{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9741942170173075, "eval_steps": 500, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010363768266141569, "grad_norm": 1.0679150819778442, "learning_rate": 1.3793103448275863e-07, "loss": 0.9527, "step": 1 }, { "epoch": 0.00020727536532283138, "grad_norm": 1.5423212051391602, "learning_rate": 2.7586206896551726e-07, "loss": 1.2815, "step": 2 }, { "epoch": 0.00031091304798424707, "grad_norm": 1.3922085762023926, "learning_rate": 4.137931034482759e-07, "loss": 1.1755, "step": 3 }, { "epoch": 0.00041455073064566275, "grad_norm": 1.4147642850875854, "learning_rate": 5.517241379310345e-07, "loss": 1.111, "step": 4 }, { "epoch": 0.0005181884133070785, "grad_norm": 1.4427387714385986, "learning_rate": 6.896551724137931e-07, "loss": 1.2391, "step": 5 }, { "epoch": 0.0006218260959684941, "grad_norm": 1.404998540878296, "learning_rate": 8.275862068965518e-07, "loss": 1.1594, "step": 6 }, { "epoch": 0.0007254637786299099, "grad_norm": 1.4490731954574585, "learning_rate": 9.655172413793103e-07, "loss": 1.1807, "step": 7 }, { "epoch": 0.0008291014612913255, "grad_norm": 1.4405816793441772, "learning_rate": 1.103448275862069e-06, "loss": 1.209, "step": 8 }, { "epoch": 0.0009327391439527412, "grad_norm": 1.3140469789505005, "learning_rate": 1.2413793103448277e-06, "loss": 1.0561, "step": 9 }, { "epoch": 0.001036376826614157, "grad_norm": 1.3970351219177246, "learning_rate": 1.3793103448275862e-06, "loss": 1.1147, "step": 10 }, { "epoch": 0.0011400145092755726, "grad_norm": 1.3507342338562012, "learning_rate": 1.517241379310345e-06, "loss": 1.1222, "step": 11 }, { "epoch": 0.0012436521919369883, "grad_norm": 1.404917597770691, "learning_rate": 1.6551724137931037e-06, "loss": 1.1796, "step": 12 }, { "epoch": 0.001347289874598404, "grad_norm": 1.2982468605041504, "learning_rate": 1.7931034482758622e-06, "loss": 1.1586, "step": 13 }, { "epoch": 0.0014509275572598197, "grad_norm": 1.3628538846969604, "learning_rate": 1.9310344827586207e-06, "loss": 1.1517, "step": 14 }, { "epoch": 0.0015545652399212354, "grad_norm": 1.6343719959259033, "learning_rate": 2.0689655172413796e-06, "loss": 1.2808, "step": 15 }, { "epoch": 0.001658202922582651, "grad_norm": 1.227524757385254, "learning_rate": 2.206896551724138e-06, "loss": 1.0468, "step": 16 }, { "epoch": 0.0017618406052440666, "grad_norm": 1.4011520147323608, "learning_rate": 2.3448275862068966e-06, "loss": 1.183, "step": 17 }, { "epoch": 0.0018654782879054825, "grad_norm": 1.3206757307052612, "learning_rate": 2.4827586206896555e-06, "loss": 1.1323, "step": 18 }, { "epoch": 0.001969115970566898, "grad_norm": 1.2996641397476196, "learning_rate": 2.6206896551724144e-06, "loss": 1.1096, "step": 19 }, { "epoch": 0.002072753653228314, "grad_norm": 1.3232481479644775, "learning_rate": 2.7586206896551725e-06, "loss": 1.1212, "step": 20 }, { "epoch": 0.0021763913358897294, "grad_norm": 1.409572958946228, "learning_rate": 2.8965517241379314e-06, "loss": 1.1313, "step": 21 }, { "epoch": 0.0022800290185511453, "grad_norm": 1.2496095895767212, "learning_rate": 3.03448275862069e-06, "loss": 1.0481, "step": 22 }, { "epoch": 0.002383666701212561, "grad_norm": 1.353409767150879, "learning_rate": 3.172413793103449e-06, "loss": 1.1167, "step": 23 }, { "epoch": 0.0024873043838739765, "grad_norm": 1.3337156772613525, "learning_rate": 3.3103448275862073e-06, "loss": 1.1315, "step": 24 }, { "epoch": 0.0025909420665353924, "grad_norm": 1.301856517791748, "learning_rate": 3.448275862068966e-06, "loss": 1.0786, "step": 25 }, { "epoch": 0.002694579749196808, "grad_norm": 1.4697136878967285, "learning_rate": 3.5862068965517243e-06, "loss": 1.168, "step": 26 }, { "epoch": 0.0027982174318582236, "grad_norm": 1.3797833919525146, "learning_rate": 3.7241379310344832e-06, "loss": 1.1165, "step": 27 }, { "epoch": 0.0029018551145196395, "grad_norm": 1.247714638710022, "learning_rate": 3.862068965517241e-06, "loss": 1.0296, "step": 28 }, { "epoch": 0.003005492797181055, "grad_norm": 1.4584671258926392, "learning_rate": 4.000000000000001e-06, "loss": 1.1499, "step": 29 }, { "epoch": 0.0031091304798424708, "grad_norm": 1.4505141973495483, "learning_rate": 4.137931034482759e-06, "loss": 1.1686, "step": 30 }, { "epoch": 0.0032127681625038866, "grad_norm": 1.4163717031478882, "learning_rate": 4.275862068965518e-06, "loss": 1.0762, "step": 31 }, { "epoch": 0.003316405845165302, "grad_norm": 1.2217729091644287, "learning_rate": 4.413793103448276e-06, "loss": 0.9751, "step": 32 }, { "epoch": 0.003420043527826718, "grad_norm": 1.5361864566802979, "learning_rate": 4.551724137931035e-06, "loss": 1.1817, "step": 33 }, { "epoch": 0.0035236812104881333, "grad_norm": 1.3760747909545898, "learning_rate": 4.689655172413793e-06, "loss": 1.024, "step": 34 }, { "epoch": 0.003627318893149549, "grad_norm": 1.1844209432601929, "learning_rate": 4.8275862068965525e-06, "loss": 0.9327, "step": 35 }, { "epoch": 0.003730956575810965, "grad_norm": 1.568856120109558, "learning_rate": 4.965517241379311e-06, "loss": 1.2155, "step": 36 }, { "epoch": 0.0038345942584723804, "grad_norm": 1.200225591659546, "learning_rate": 5.1034482758620695e-06, "loss": 0.9578, "step": 37 }, { "epoch": 0.003938231941133796, "grad_norm": 1.2568145990371704, "learning_rate": 5.241379310344829e-06, "loss": 0.9857, "step": 38 }, { "epoch": 0.004041869623795212, "grad_norm": 1.3353314399719238, "learning_rate": 5.3793103448275865e-06, "loss": 1.1098, "step": 39 }, { "epoch": 0.004145507306456628, "grad_norm": 1.161146640777588, "learning_rate": 5.517241379310345e-06, "loss": 0.9297, "step": 40 }, { "epoch": 0.004249144989118043, "grad_norm": 1.262049913406372, "learning_rate": 5.655172413793104e-06, "loss": 1.0301, "step": 41 }, { "epoch": 0.004352782671779459, "grad_norm": 1.0658643245697021, "learning_rate": 5.793103448275863e-06, "loss": 1.0132, "step": 42 }, { "epoch": 0.004456420354440875, "grad_norm": 1.0373111963272095, "learning_rate": 5.9310344827586205e-06, "loss": 0.9424, "step": 43 }, { "epoch": 0.0045600580371022905, "grad_norm": 1.1395219564437866, "learning_rate": 6.06896551724138e-06, "loss": 1.036, "step": 44 }, { "epoch": 0.004663695719763706, "grad_norm": 0.9569352865219116, "learning_rate": 6.206896551724138e-06, "loss": 0.8821, "step": 45 }, { "epoch": 0.004767333402425122, "grad_norm": 0.9801313877105713, "learning_rate": 6.344827586206898e-06, "loss": 0.9391, "step": 46 }, { "epoch": 0.004870971085086538, "grad_norm": 1.0882436037063599, "learning_rate": 6.482758620689655e-06, "loss": 0.9922, "step": 47 }, { "epoch": 0.004974608767747953, "grad_norm": 0.9098449945449829, "learning_rate": 6.620689655172415e-06, "loss": 0.8526, "step": 48 }, { "epoch": 0.0050782464504093685, "grad_norm": 0.9299390912055969, "learning_rate": 6.758620689655173e-06, "loss": 0.8582, "step": 49 }, { "epoch": 0.005181884133070785, "grad_norm": 0.8806210160255432, "learning_rate": 6.896551724137932e-06, "loss": 0.8892, "step": 50 }, { "epoch": 0.0052855218157322, "grad_norm": 0.8702979683876038, "learning_rate": 7.03448275862069e-06, "loss": 0.8592, "step": 51 }, { "epoch": 0.005389159498393616, "grad_norm": 0.8257914781570435, "learning_rate": 7.172413793103449e-06, "loss": 0.9179, "step": 52 }, { "epoch": 0.005492797181055032, "grad_norm": 0.8620724081993103, "learning_rate": 7.310344827586208e-06, "loss": 0.8952, "step": 53 }, { "epoch": 0.005596434863716447, "grad_norm": 0.8150980472564697, "learning_rate": 7.4482758620689665e-06, "loss": 0.7818, "step": 54 }, { "epoch": 0.005700072546377863, "grad_norm": 0.8714101910591125, "learning_rate": 7.586206896551724e-06, "loss": 0.8628, "step": 55 }, { "epoch": 0.005803710229039279, "grad_norm": 0.9289737939834595, "learning_rate": 7.724137931034483e-06, "loss": 0.9275, "step": 56 }, { "epoch": 0.005907347911700694, "grad_norm": 0.7288852334022522, "learning_rate": 7.862068965517242e-06, "loss": 0.7724, "step": 57 }, { "epoch": 0.00601098559436211, "grad_norm": 0.7169085741043091, "learning_rate": 8.000000000000001e-06, "loss": 0.7608, "step": 58 }, { "epoch": 0.006114623277023526, "grad_norm": 0.6929631233215332, "learning_rate": 8.137931034482759e-06, "loss": 0.7667, "step": 59 }, { "epoch": 0.0062182609596849415, "grad_norm": 0.789334237575531, "learning_rate": 8.275862068965518e-06, "loss": 0.7887, "step": 60 }, { "epoch": 0.006321898642346357, "grad_norm": 0.7103134393692017, "learning_rate": 8.413793103448276e-06, "loss": 0.7329, "step": 61 }, { "epoch": 0.006425536325007773, "grad_norm": 0.7139261960983276, "learning_rate": 8.551724137931035e-06, "loss": 0.722, "step": 62 }, { "epoch": 0.006529174007669189, "grad_norm": 0.6719704866409302, "learning_rate": 8.689655172413793e-06, "loss": 0.6828, "step": 63 }, { "epoch": 0.006632811690330604, "grad_norm": 0.6433077454566956, "learning_rate": 8.827586206896552e-06, "loss": 0.6597, "step": 64 }, { "epoch": 0.0067364493729920195, "grad_norm": 0.5811107158660889, "learning_rate": 8.965517241379312e-06, "loss": 0.6593, "step": 65 }, { "epoch": 0.006840087055653436, "grad_norm": 0.5606787800788879, "learning_rate": 9.10344827586207e-06, "loss": 0.7172, "step": 66 }, { "epoch": 0.006943724738314851, "grad_norm": 0.5808541178703308, "learning_rate": 9.241379310344829e-06, "loss": 0.6282, "step": 67 }, { "epoch": 0.007047362420976267, "grad_norm": 0.5700259208679199, "learning_rate": 9.379310344827586e-06, "loss": 0.6215, "step": 68 }, { "epoch": 0.007151000103637683, "grad_norm": 0.6022498607635498, "learning_rate": 9.517241379310346e-06, "loss": 0.6481, "step": 69 }, { "epoch": 0.007254637786299098, "grad_norm": 0.63319993019104, "learning_rate": 9.655172413793105e-06, "loss": 0.6874, "step": 70 }, { "epoch": 0.007358275468960514, "grad_norm": 0.6779617071151733, "learning_rate": 9.793103448275863e-06, "loss": 0.6006, "step": 71 }, { "epoch": 0.00746191315162193, "grad_norm": 0.7366542816162109, "learning_rate": 9.931034482758622e-06, "loss": 0.7501, "step": 72 }, { "epoch": 0.007565550834283345, "grad_norm": 0.681352436542511, "learning_rate": 1.006896551724138e-05, "loss": 0.6076, "step": 73 }, { "epoch": 0.007669188516944761, "grad_norm": 0.5272005200386047, "learning_rate": 1.0206896551724139e-05, "loss": 0.5571, "step": 74 }, { "epoch": 0.007772826199606177, "grad_norm": 0.579204797744751, "learning_rate": 1.0344827586206898e-05, "loss": 0.6202, "step": 75 }, { "epoch": 0.007876463882267593, "grad_norm": 0.4939001202583313, "learning_rate": 1.0482758620689658e-05, "loss": 0.6472, "step": 76 }, { "epoch": 0.007980101564929008, "grad_norm": 0.4667978286743164, "learning_rate": 1.0620689655172414e-05, "loss": 0.6026, "step": 77 }, { "epoch": 0.008083739247590423, "grad_norm": 0.5069308280944824, "learning_rate": 1.0758620689655173e-05, "loss": 0.6633, "step": 78 }, { "epoch": 0.008187376930251839, "grad_norm": 0.39213359355926514, "learning_rate": 1.0896551724137932e-05, "loss": 0.534, "step": 79 }, { "epoch": 0.008291014612913256, "grad_norm": 0.48177847266197205, "learning_rate": 1.103448275862069e-05, "loss": 0.5833, "step": 80 }, { "epoch": 0.008394652295574671, "grad_norm": 0.4758414030075073, "learning_rate": 1.117241379310345e-05, "loss": 0.4952, "step": 81 }, { "epoch": 0.008498289978236087, "grad_norm": 0.40138718485832214, "learning_rate": 1.1310344827586209e-05, "loss": 0.4742, "step": 82 }, { "epoch": 0.008601927660897502, "grad_norm": 0.4233314096927643, "learning_rate": 1.1448275862068966e-05, "loss": 0.6, "step": 83 }, { "epoch": 0.008705565343558918, "grad_norm": 0.4068356156349182, "learning_rate": 1.1586206896551726e-05, "loss": 0.5541, "step": 84 }, { "epoch": 0.008809203026220333, "grad_norm": 0.4177202582359314, "learning_rate": 1.1724137931034483e-05, "loss": 0.4775, "step": 85 }, { "epoch": 0.00891284070888175, "grad_norm": 0.43890196084976196, "learning_rate": 1.1862068965517241e-05, "loss": 0.5701, "step": 86 }, { "epoch": 0.009016478391543166, "grad_norm": 0.4295465648174286, "learning_rate": 1.2e-05, "loss": 0.5267, "step": 87 }, { "epoch": 0.009120116074204581, "grad_norm": 0.4324893057346344, "learning_rate": 1.213793103448276e-05, "loss": 0.5316, "step": 88 }, { "epoch": 0.009223753756865996, "grad_norm": 0.4378964304924011, "learning_rate": 1.2275862068965519e-05, "loss": 0.5169, "step": 89 }, { "epoch": 0.009327391439527412, "grad_norm": 0.46402058005332947, "learning_rate": 1.2413793103448277e-05, "loss": 0.5574, "step": 90 }, { "epoch": 0.009431029122188827, "grad_norm": 0.41128864884376526, "learning_rate": 1.2551724137931036e-05, "loss": 0.4585, "step": 91 }, { "epoch": 0.009534666804850244, "grad_norm": 0.4755265414714813, "learning_rate": 1.2689655172413795e-05, "loss": 0.544, "step": 92 }, { "epoch": 0.00963830448751166, "grad_norm": 0.3633743226528168, "learning_rate": 1.2827586206896551e-05, "loss": 0.4365, "step": 93 }, { "epoch": 0.009741942170173075, "grad_norm": 0.4546061158180237, "learning_rate": 1.296551724137931e-05, "loss": 0.5609, "step": 94 }, { "epoch": 0.00984557985283449, "grad_norm": 0.43278414011001587, "learning_rate": 1.310344827586207e-05, "loss": 0.5665, "step": 95 }, { "epoch": 0.009949217535495906, "grad_norm": 0.4906076192855835, "learning_rate": 1.324137931034483e-05, "loss": 0.5032, "step": 96 }, { "epoch": 0.010052855218157321, "grad_norm": 0.42069289088249207, "learning_rate": 1.3379310344827587e-05, "loss": 0.4994, "step": 97 }, { "epoch": 0.010156492900818737, "grad_norm": 0.4489268660545349, "learning_rate": 1.3517241379310346e-05, "loss": 0.4499, "step": 98 }, { "epoch": 0.010260130583480154, "grad_norm": 0.4298715591430664, "learning_rate": 1.3655172413793106e-05, "loss": 0.4592, "step": 99 }, { "epoch": 0.01036376826614157, "grad_norm": 0.6667768359184265, "learning_rate": 1.3793103448275863e-05, "loss": 0.557, "step": 100 }, { "epoch": 0.010467405948802985, "grad_norm": 0.39222168922424316, "learning_rate": 1.3931034482758621e-05, "loss": 0.4509, "step": 101 }, { "epoch": 0.0105710436314644, "grad_norm": 0.45276468992233276, "learning_rate": 1.406896551724138e-05, "loss": 0.4495, "step": 102 }, { "epoch": 0.010674681314125816, "grad_norm": 0.531661331653595, "learning_rate": 1.4206896551724138e-05, "loss": 0.5465, "step": 103 }, { "epoch": 0.010778318996787231, "grad_norm": 0.5024605989456177, "learning_rate": 1.4344827586206897e-05, "loss": 0.518, "step": 104 }, { "epoch": 0.010881956679448648, "grad_norm": 0.6134429574012756, "learning_rate": 1.4482758620689657e-05, "loss": 0.4726, "step": 105 }, { "epoch": 0.010985594362110064, "grad_norm": 0.41454777121543884, "learning_rate": 1.4620689655172416e-05, "loss": 0.4367, "step": 106 }, { "epoch": 0.01108923204477148, "grad_norm": 0.4755084812641144, "learning_rate": 1.4758620689655174e-05, "loss": 0.4115, "step": 107 }, { "epoch": 0.011192869727432895, "grad_norm": 0.37892940640449524, "learning_rate": 1.4896551724137933e-05, "loss": 0.4145, "step": 108 }, { "epoch": 0.01129650741009431, "grad_norm": 0.43258216977119446, "learning_rate": 1.503448275862069e-05, "loss": 0.356, "step": 109 }, { "epoch": 0.011400145092755725, "grad_norm": 0.4437129497528076, "learning_rate": 1.5172413793103448e-05, "loss": 0.4951, "step": 110 }, { "epoch": 0.01150378277541714, "grad_norm": 0.3721010386943817, "learning_rate": 1.5310344827586208e-05, "loss": 0.4168, "step": 111 }, { "epoch": 0.011607420458078558, "grad_norm": 0.4480922818183899, "learning_rate": 1.5448275862068965e-05, "loss": 0.4556, "step": 112 }, { "epoch": 0.011711058140739973, "grad_norm": 0.3953317403793335, "learning_rate": 1.5586206896551726e-05, "loss": 0.3778, "step": 113 }, { "epoch": 0.011814695823401389, "grad_norm": 0.4656274914741516, "learning_rate": 1.5724137931034484e-05, "loss": 0.4854, "step": 114 }, { "epoch": 0.011918333506062804, "grad_norm": 0.4352475106716156, "learning_rate": 1.586206896551724e-05, "loss": 0.4269, "step": 115 }, { "epoch": 0.01202197118872422, "grad_norm": 0.44665348529815674, "learning_rate": 1.6000000000000003e-05, "loss": 0.3769, "step": 116 }, { "epoch": 0.012125608871385635, "grad_norm": 0.44178900122642517, "learning_rate": 1.613793103448276e-05, "loss": 0.4108, "step": 117 }, { "epoch": 0.012229246554047052, "grad_norm": 0.45032814145088196, "learning_rate": 1.6275862068965518e-05, "loss": 0.4648, "step": 118 }, { "epoch": 0.012332884236708468, "grad_norm": 0.3899349272251129, "learning_rate": 1.6413793103448276e-05, "loss": 0.42, "step": 119 }, { "epoch": 0.012436521919369883, "grad_norm": 0.4548027813434601, "learning_rate": 1.6551724137931037e-05, "loss": 0.4473, "step": 120 }, { "epoch": 0.012540159602031298, "grad_norm": 0.4112202227115631, "learning_rate": 1.6689655172413794e-05, "loss": 0.3918, "step": 121 }, { "epoch": 0.012643797284692714, "grad_norm": 0.5322526693344116, "learning_rate": 1.6827586206896552e-05, "loss": 0.5122, "step": 122 }, { "epoch": 0.01274743496735413, "grad_norm": 0.48069503903388977, "learning_rate": 1.6965517241379313e-05, "loss": 0.4585, "step": 123 }, { "epoch": 0.012851072650015546, "grad_norm": 0.48351654410362244, "learning_rate": 1.710344827586207e-05, "loss": 0.3876, "step": 124 }, { "epoch": 0.012954710332676962, "grad_norm": 0.4733540117740631, "learning_rate": 1.7241379310344828e-05, "loss": 0.4134, "step": 125 }, { "epoch": 0.013058348015338377, "grad_norm": 0.3814505934715271, "learning_rate": 1.7379310344827586e-05, "loss": 0.3921, "step": 126 }, { "epoch": 0.013161985697999793, "grad_norm": 0.45198261737823486, "learning_rate": 1.7517241379310347e-05, "loss": 0.4672, "step": 127 }, { "epoch": 0.013265623380661208, "grad_norm": 0.5899551510810852, "learning_rate": 1.7655172413793105e-05, "loss": 0.4775, "step": 128 }, { "epoch": 0.013369261063322624, "grad_norm": 0.4327309727668762, "learning_rate": 1.7793103448275862e-05, "loss": 0.3983, "step": 129 }, { "epoch": 0.013472898745984039, "grad_norm": 0.42272329330444336, "learning_rate": 1.7931034482758623e-05, "loss": 0.4247, "step": 130 }, { "epoch": 0.013576536428645456, "grad_norm": 0.4427931308746338, "learning_rate": 1.806896551724138e-05, "loss": 0.4422, "step": 131 }, { "epoch": 0.013680174111306872, "grad_norm": 0.4716092050075531, "learning_rate": 1.820689655172414e-05, "loss": 0.4261, "step": 132 }, { "epoch": 0.013783811793968287, "grad_norm": 0.40726834535598755, "learning_rate": 1.8344827586206896e-05, "loss": 0.4017, "step": 133 }, { "epoch": 0.013887449476629702, "grad_norm": 0.3557395040988922, "learning_rate": 1.8482758620689657e-05, "loss": 0.3759, "step": 134 }, { "epoch": 0.013991087159291118, "grad_norm": 0.4733648896217346, "learning_rate": 1.8620689655172415e-05, "loss": 0.4111, "step": 135 }, { "epoch": 0.014094724841952533, "grad_norm": 0.3284488320350647, "learning_rate": 1.8758620689655173e-05, "loss": 0.3095, "step": 136 }, { "epoch": 0.01419836252461395, "grad_norm": 0.3625854551792145, "learning_rate": 1.8896551724137934e-05, "loss": 0.3512, "step": 137 }, { "epoch": 0.014302000207275366, "grad_norm": 0.4706290662288666, "learning_rate": 1.903448275862069e-05, "loss": 0.4833, "step": 138 }, { "epoch": 0.014405637889936781, "grad_norm": 0.45985788106918335, "learning_rate": 1.917241379310345e-05, "loss": 0.3579, "step": 139 }, { "epoch": 0.014509275572598197, "grad_norm": 0.4740069806575775, "learning_rate": 1.931034482758621e-05, "loss": 0.4024, "step": 140 }, { "epoch": 0.014612913255259612, "grad_norm": 0.4517245590686798, "learning_rate": 1.9448275862068968e-05, "loss": 0.3534, "step": 141 }, { "epoch": 0.014716550937921027, "grad_norm": 0.4930357336997986, "learning_rate": 1.9586206896551725e-05, "loss": 0.4098, "step": 142 }, { "epoch": 0.014820188620582445, "grad_norm": 0.4142999053001404, "learning_rate": 1.9724137931034483e-05, "loss": 0.3796, "step": 143 }, { "epoch": 0.01492382630324386, "grad_norm": 0.44440484046936035, "learning_rate": 1.9862068965517244e-05, "loss": 0.3553, "step": 144 }, { "epoch": 0.015027463985905275, "grad_norm": 0.4276621639728546, "learning_rate": 2e-05, "loss": 0.374, "step": 145 }, { "epoch": 0.01513110166856669, "grad_norm": 0.4273627698421478, "learning_rate": 2.013793103448276e-05, "loss": 0.4201, "step": 146 }, { "epoch": 0.015234739351228106, "grad_norm": 0.5435613989830017, "learning_rate": 2.027586206896552e-05, "loss": 0.4238, "step": 147 }, { "epoch": 0.015338377033889522, "grad_norm": 0.47127071022987366, "learning_rate": 2.0413793103448278e-05, "loss": 0.3729, "step": 148 }, { "epoch": 0.015442014716550937, "grad_norm": 0.37225133180618286, "learning_rate": 2.0551724137931036e-05, "loss": 0.3273, "step": 149 }, { "epoch": 0.015545652399212354, "grad_norm": 0.6549889445304871, "learning_rate": 2.0689655172413797e-05, "loss": 0.4219, "step": 150 }, { "epoch": 0.015649290081873768, "grad_norm": 0.44427254796028137, "learning_rate": 2.0827586206896554e-05, "loss": 0.3056, "step": 151 }, { "epoch": 0.015752927764535185, "grad_norm": 0.4055584669113159, "learning_rate": 2.0965517241379315e-05, "loss": 0.3699, "step": 152 }, { "epoch": 0.015856565447196602, "grad_norm": 0.4897557497024536, "learning_rate": 2.1103448275862073e-05, "loss": 0.3725, "step": 153 }, { "epoch": 0.015960203129858016, "grad_norm": 0.49200958013534546, "learning_rate": 2.1241379310344827e-05, "loss": 0.3634, "step": 154 }, { "epoch": 0.016063840812519433, "grad_norm": 0.47332412004470825, "learning_rate": 2.1379310344827585e-05, "loss": 0.4008, "step": 155 }, { "epoch": 0.016167478495180847, "grad_norm": 0.4750024378299713, "learning_rate": 2.1517241379310346e-05, "loss": 0.4689, "step": 156 }, { "epoch": 0.016271116177842264, "grad_norm": 0.45490074157714844, "learning_rate": 2.1655172413793104e-05, "loss": 0.4351, "step": 157 }, { "epoch": 0.016374753860503678, "grad_norm": 0.4558456838130951, "learning_rate": 2.1793103448275865e-05, "loss": 0.3582, "step": 158 }, { "epoch": 0.016478391543165095, "grad_norm": 0.4957145154476166, "learning_rate": 2.1931034482758622e-05, "loss": 0.4335, "step": 159 }, { "epoch": 0.016582029225826512, "grad_norm": 0.4751178026199341, "learning_rate": 2.206896551724138e-05, "loss": 0.3405, "step": 160 }, { "epoch": 0.016685666908487926, "grad_norm": 0.4940536916255951, "learning_rate": 2.220689655172414e-05, "loss": 0.386, "step": 161 }, { "epoch": 0.016789304591149343, "grad_norm": 0.525262713432312, "learning_rate": 2.23448275862069e-05, "loss": 0.337, "step": 162 }, { "epoch": 0.016892942273810756, "grad_norm": 0.4709758758544922, "learning_rate": 2.2482758620689656e-05, "loss": 0.3818, "step": 163 }, { "epoch": 0.016996579956472174, "grad_norm": 0.51386559009552, "learning_rate": 2.2620689655172417e-05, "loss": 0.3694, "step": 164 }, { "epoch": 0.01710021763913359, "grad_norm": 0.48915234208106995, "learning_rate": 2.2758620689655175e-05, "loss": 0.4096, "step": 165 }, { "epoch": 0.017203855321795004, "grad_norm": 0.5260508060455322, "learning_rate": 2.2896551724137933e-05, "loss": 0.3378, "step": 166 }, { "epoch": 0.01730749300445642, "grad_norm": 0.48009803891181946, "learning_rate": 2.3034482758620694e-05, "loss": 0.3874, "step": 167 }, { "epoch": 0.017411130687117835, "grad_norm": 0.5436667799949646, "learning_rate": 2.317241379310345e-05, "loss": 0.3528, "step": 168 }, { "epoch": 0.017514768369779252, "grad_norm": 0.5391678810119629, "learning_rate": 2.3310344827586212e-05, "loss": 0.3975, "step": 169 }, { "epoch": 0.017618406052440666, "grad_norm": 0.38910624384880066, "learning_rate": 2.3448275862068967e-05, "loss": 0.3708, "step": 170 }, { "epoch": 0.017722043735102083, "grad_norm": 0.4939960241317749, "learning_rate": 2.3586206896551724e-05, "loss": 0.357, "step": 171 }, { "epoch": 0.0178256814177635, "grad_norm": 0.3927547037601471, "learning_rate": 2.3724137931034482e-05, "loss": 0.2959, "step": 172 }, { "epoch": 0.017929319100424914, "grad_norm": 0.45952022075653076, "learning_rate": 2.3862068965517243e-05, "loss": 0.3149, "step": 173 }, { "epoch": 0.01803295678308633, "grad_norm": 0.4640410840511322, "learning_rate": 2.4e-05, "loss": 0.3859, "step": 174 }, { "epoch": 0.018136594465747745, "grad_norm": 0.6159297823905945, "learning_rate": 2.413793103448276e-05, "loss": 0.4259, "step": 175 }, { "epoch": 0.018240232148409162, "grad_norm": 0.5587474703788757, "learning_rate": 2.427586206896552e-05, "loss": 0.3207, "step": 176 }, { "epoch": 0.018343869831070576, "grad_norm": 0.5045621395111084, "learning_rate": 2.4413793103448277e-05, "loss": 0.4125, "step": 177 }, { "epoch": 0.018447507513731993, "grad_norm": 0.42282718420028687, "learning_rate": 2.4551724137931038e-05, "loss": 0.3426, "step": 178 }, { "epoch": 0.01855114519639341, "grad_norm": 0.5033625960350037, "learning_rate": 2.4689655172413796e-05, "loss": 0.3988, "step": 179 }, { "epoch": 0.018654782879054824, "grad_norm": 0.5493314862251282, "learning_rate": 2.4827586206896553e-05, "loss": 0.3396, "step": 180 }, { "epoch": 0.01875842056171624, "grad_norm": 0.47352248430252075, "learning_rate": 2.4965517241379314e-05, "loss": 0.3763, "step": 181 }, { "epoch": 0.018862058244377655, "grad_norm": 0.45625126361846924, "learning_rate": 2.5103448275862072e-05, "loss": 0.3829, "step": 182 }, { "epoch": 0.01896569592703907, "grad_norm": 0.5125209093093872, "learning_rate": 2.524137931034483e-05, "loss": 0.3919, "step": 183 }, { "epoch": 0.01906933360970049, "grad_norm": 0.4657573103904724, "learning_rate": 2.537931034482759e-05, "loss": 0.3326, "step": 184 }, { "epoch": 0.019172971292361903, "grad_norm": 0.5020167231559753, "learning_rate": 2.551724137931035e-05, "loss": 0.3733, "step": 185 }, { "epoch": 0.01927660897502332, "grad_norm": 0.48332661390304565, "learning_rate": 2.5655172413793103e-05, "loss": 0.3545, "step": 186 }, { "epoch": 0.019380246657684733, "grad_norm": 0.4483455717563629, "learning_rate": 2.5793103448275864e-05, "loss": 0.3001, "step": 187 }, { "epoch": 0.01948388434034615, "grad_norm": 0.4317588806152344, "learning_rate": 2.593103448275862e-05, "loss": 0.3211, "step": 188 }, { "epoch": 0.019587522023007564, "grad_norm": 0.5770359635353088, "learning_rate": 2.606896551724138e-05, "loss": 0.3931, "step": 189 }, { "epoch": 0.01969115970566898, "grad_norm": 0.4711754322052002, "learning_rate": 2.620689655172414e-05, "loss": 0.3387, "step": 190 }, { "epoch": 0.0197947973883304, "grad_norm": 0.4772488474845886, "learning_rate": 2.6344827586206898e-05, "loss": 0.3786, "step": 191 }, { "epoch": 0.019898435070991812, "grad_norm": 0.5390376448631287, "learning_rate": 2.648275862068966e-05, "loss": 0.359, "step": 192 }, { "epoch": 0.02000207275365323, "grad_norm": 0.5037919878959656, "learning_rate": 2.6620689655172416e-05, "loss": 0.3906, "step": 193 }, { "epoch": 0.020105710436314643, "grad_norm": 0.47281694412231445, "learning_rate": 2.6758620689655174e-05, "loss": 0.3429, "step": 194 }, { "epoch": 0.02020934811897606, "grad_norm": 0.4878309369087219, "learning_rate": 2.6896551724137935e-05, "loss": 0.349, "step": 195 }, { "epoch": 0.020312985801637474, "grad_norm": 0.41772618889808655, "learning_rate": 2.7034482758620693e-05, "loss": 0.2966, "step": 196 }, { "epoch": 0.02041662348429889, "grad_norm": 0.5848966836929321, "learning_rate": 2.717241379310345e-05, "loss": 0.3791, "step": 197 }, { "epoch": 0.020520261166960308, "grad_norm": 0.5652657151222229, "learning_rate": 2.731034482758621e-05, "loss": 0.4379, "step": 198 }, { "epoch": 0.020623898849621722, "grad_norm": 0.4548835754394531, "learning_rate": 2.744827586206897e-05, "loss": 0.2742, "step": 199 }, { "epoch": 0.02072753653228314, "grad_norm": 0.41999682784080505, "learning_rate": 2.7586206896551727e-05, "loss": 0.2811, "step": 200 }, { "epoch": 0.020831174214944553, "grad_norm": 0.48233315348625183, "learning_rate": 2.7724137931034488e-05, "loss": 0.3374, "step": 201 }, { "epoch": 0.02093481189760597, "grad_norm": 0.5580697059631348, "learning_rate": 2.7862068965517242e-05, "loss": 0.3705, "step": 202 }, { "epoch": 0.021038449580267383, "grad_norm": 0.5358230471611023, "learning_rate": 2.8e-05, "loss": 0.3769, "step": 203 }, { "epoch": 0.0211420872629288, "grad_norm": 0.4528452157974243, "learning_rate": 2.813793103448276e-05, "loss": 0.3231, "step": 204 }, { "epoch": 0.021245724945590218, "grad_norm": 0.5074284076690674, "learning_rate": 2.8275862068965518e-05, "loss": 0.3532, "step": 205 }, { "epoch": 0.02134936262825163, "grad_norm": 0.46669670939445496, "learning_rate": 2.8413793103448276e-05, "loss": 0.2634, "step": 206 }, { "epoch": 0.02145300031091305, "grad_norm": 0.445211797952652, "learning_rate": 2.8551724137931037e-05, "loss": 0.2899, "step": 207 }, { "epoch": 0.021556637993574462, "grad_norm": 0.49502161145210266, "learning_rate": 2.8689655172413795e-05, "loss": 0.3434, "step": 208 }, { "epoch": 0.02166027567623588, "grad_norm": 0.5964809060096741, "learning_rate": 2.8827586206896556e-05, "loss": 0.3686, "step": 209 }, { "epoch": 0.021763913358897297, "grad_norm": 0.45797616243362427, "learning_rate": 2.8965517241379313e-05, "loss": 0.3317, "step": 210 }, { "epoch": 0.02186755104155871, "grad_norm": 0.5436007380485535, "learning_rate": 2.910344827586207e-05, "loss": 0.3771, "step": 211 }, { "epoch": 0.021971188724220127, "grad_norm": 0.49826550483703613, "learning_rate": 2.9241379310344832e-05, "loss": 0.3123, "step": 212 }, { "epoch": 0.02207482640688154, "grad_norm": 0.5190101861953735, "learning_rate": 2.937931034482759e-05, "loss": 0.3288, "step": 213 }, { "epoch": 0.02217846408954296, "grad_norm": 0.49877676367759705, "learning_rate": 2.9517241379310347e-05, "loss": 0.317, "step": 214 }, { "epoch": 0.022282101772204372, "grad_norm": 0.524618923664093, "learning_rate": 2.965517241379311e-05, "loss": 0.3448, "step": 215 }, { "epoch": 0.02238573945486579, "grad_norm": 0.5132225155830383, "learning_rate": 2.9793103448275866e-05, "loss": 0.3199, "step": 216 }, { "epoch": 0.022489377137527206, "grad_norm": 0.44829437136650085, "learning_rate": 2.9931034482758624e-05, "loss": 0.3222, "step": 217 }, { "epoch": 0.02259301482018862, "grad_norm": 0.5427300333976746, "learning_rate": 3.006896551724138e-05, "loss": 0.3107, "step": 218 }, { "epoch": 0.022696652502850037, "grad_norm": 0.49013856053352356, "learning_rate": 3.020689655172414e-05, "loss": 0.2805, "step": 219 }, { "epoch": 0.02280029018551145, "grad_norm": 0.4895915985107422, "learning_rate": 3.0344827586206897e-05, "loss": 0.2775, "step": 220 }, { "epoch": 0.022903927868172868, "grad_norm": 0.5539257526397705, "learning_rate": 3.0482758620689658e-05, "loss": 0.364, "step": 221 }, { "epoch": 0.02300756555083428, "grad_norm": 0.5412529110908508, "learning_rate": 3.0620689655172415e-05, "loss": 0.331, "step": 222 }, { "epoch": 0.0231112032334957, "grad_norm": 0.45927807688713074, "learning_rate": 3.0758620689655176e-05, "loss": 0.3188, "step": 223 }, { "epoch": 0.023214840916157116, "grad_norm": 0.5479596257209778, "learning_rate": 3.089655172413793e-05, "loss": 0.3522, "step": 224 }, { "epoch": 0.02331847859881853, "grad_norm": 0.5624309778213501, "learning_rate": 3.103448275862069e-05, "loss": 0.4032, "step": 225 }, { "epoch": 0.023422116281479947, "grad_norm": 0.5181564688682556, "learning_rate": 3.117241379310345e-05, "loss": 0.3215, "step": 226 }, { "epoch": 0.02352575396414136, "grad_norm": 0.4655615985393524, "learning_rate": 3.131034482758621e-05, "loss": 0.3679, "step": 227 }, { "epoch": 0.023629391646802778, "grad_norm": 0.4179629981517792, "learning_rate": 3.144827586206897e-05, "loss": 0.2558, "step": 228 }, { "epoch": 0.023733029329464195, "grad_norm": 0.4396083950996399, "learning_rate": 3.158620689655173e-05, "loss": 0.2876, "step": 229 }, { "epoch": 0.02383666701212561, "grad_norm": 0.49648308753967285, "learning_rate": 3.172413793103448e-05, "loss": 0.2919, "step": 230 }, { "epoch": 0.023940304694787026, "grad_norm": 0.5090090036392212, "learning_rate": 3.1862068965517244e-05, "loss": 0.3227, "step": 231 }, { "epoch": 0.02404394237744844, "grad_norm": 0.5114050507545471, "learning_rate": 3.2000000000000005e-05, "loss": 0.3374, "step": 232 }, { "epoch": 0.024147580060109856, "grad_norm": 0.48820868134498596, "learning_rate": 3.2137931034482766e-05, "loss": 0.3371, "step": 233 }, { "epoch": 0.02425121774277127, "grad_norm": 0.5176852941513062, "learning_rate": 3.227586206896552e-05, "loss": 0.3017, "step": 234 }, { "epoch": 0.024354855425432687, "grad_norm": 0.4962432086467743, "learning_rate": 3.2413793103448275e-05, "loss": 0.311, "step": 235 }, { "epoch": 0.024458493108094104, "grad_norm": 0.6376422643661499, "learning_rate": 3.2551724137931036e-05, "loss": 0.3636, "step": 236 }, { "epoch": 0.024562130790755518, "grad_norm": 0.5156605839729309, "learning_rate": 3.26896551724138e-05, "loss": 0.283, "step": 237 }, { "epoch": 0.024665768473416935, "grad_norm": 0.5535326600074768, "learning_rate": 3.282758620689655e-05, "loss": 0.2782, "step": 238 }, { "epoch": 0.02476940615607835, "grad_norm": 0.459387868642807, "learning_rate": 3.296551724137931e-05, "loss": 0.2749, "step": 239 }, { "epoch": 0.024873043838739766, "grad_norm": 0.47547951340675354, "learning_rate": 3.310344827586207e-05, "loss": 0.2842, "step": 240 }, { "epoch": 0.02497668152140118, "grad_norm": 0.5172399282455444, "learning_rate": 3.324137931034483e-05, "loss": 0.3367, "step": 241 }, { "epoch": 0.025080319204062597, "grad_norm": 0.5787510275840759, "learning_rate": 3.337931034482759e-05, "loss": 0.3104, "step": 242 }, { "epoch": 0.025183956886724014, "grad_norm": 0.5061002969741821, "learning_rate": 3.351724137931035e-05, "loss": 0.2925, "step": 243 }, { "epoch": 0.025287594569385428, "grad_norm": 0.5571149587631226, "learning_rate": 3.3655172413793104e-05, "loss": 0.3751, "step": 244 }, { "epoch": 0.025391232252046845, "grad_norm": 0.5382230281829834, "learning_rate": 3.3793103448275865e-05, "loss": 0.3197, "step": 245 }, { "epoch": 0.02549486993470826, "grad_norm": 0.5850135087966919, "learning_rate": 3.3931034482758626e-05, "loss": 0.3414, "step": 246 }, { "epoch": 0.025598507617369676, "grad_norm": 0.5176693797111511, "learning_rate": 3.406896551724138e-05, "loss": 0.3454, "step": 247 }, { "epoch": 0.025702145300031093, "grad_norm": 0.5404151678085327, "learning_rate": 3.420689655172414e-05, "loss": 0.3133, "step": 248 }, { "epoch": 0.025805782982692507, "grad_norm": 0.49067196249961853, "learning_rate": 3.43448275862069e-05, "loss": 0.2944, "step": 249 }, { "epoch": 0.025909420665353924, "grad_norm": 0.457242876291275, "learning_rate": 3.4482758620689657e-05, "loss": 0.274, "step": 250 }, { "epoch": 0.026013058348015337, "grad_norm": 0.5542387366294861, "learning_rate": 3.462068965517242e-05, "loss": 0.3578, "step": 251 }, { "epoch": 0.026116696030676755, "grad_norm": 0.46230652928352356, "learning_rate": 3.475862068965517e-05, "loss": 0.3237, "step": 252 }, { "epoch": 0.026220333713338168, "grad_norm": 0.5638226866722107, "learning_rate": 3.489655172413793e-05, "loss": 0.3196, "step": 253 }, { "epoch": 0.026323971395999585, "grad_norm": 0.5501084923744202, "learning_rate": 3.5034482758620694e-05, "loss": 0.3407, "step": 254 }, { "epoch": 0.026427609078661003, "grad_norm": 0.5448089838027954, "learning_rate": 3.517241379310345e-05, "loss": 0.2784, "step": 255 }, { "epoch": 0.026531246761322416, "grad_norm": 0.4925558865070343, "learning_rate": 3.531034482758621e-05, "loss": 0.3386, "step": 256 }, { "epoch": 0.026634884443983833, "grad_norm": 0.5627326369285583, "learning_rate": 3.544827586206897e-05, "loss": 0.3312, "step": 257 }, { "epoch": 0.026738522126645247, "grad_norm": 0.5060065388679504, "learning_rate": 3.5586206896551725e-05, "loss": 0.3281, "step": 258 }, { "epoch": 0.026842159809306664, "grad_norm": 0.6172648668289185, "learning_rate": 3.5724137931034486e-05, "loss": 0.3321, "step": 259 }, { "epoch": 0.026945797491968078, "grad_norm": 0.5663166046142578, "learning_rate": 3.586206896551725e-05, "loss": 0.3, "step": 260 }, { "epoch": 0.027049435174629495, "grad_norm": 0.47459420561790466, "learning_rate": 3.6e-05, "loss": 0.2953, "step": 261 }, { "epoch": 0.027153072857290912, "grad_norm": 0.5799350738525391, "learning_rate": 3.613793103448276e-05, "loss": 0.3353, "step": 262 }, { "epoch": 0.027256710539952326, "grad_norm": 0.5640973448753357, "learning_rate": 3.627586206896552e-05, "loss": 0.3178, "step": 263 }, { "epoch": 0.027360348222613743, "grad_norm": 0.5171974301338196, "learning_rate": 3.641379310344828e-05, "loss": 0.3243, "step": 264 }, { "epoch": 0.027463985905275157, "grad_norm": 0.569558322429657, "learning_rate": 3.655172413793104e-05, "loss": 0.3031, "step": 265 }, { "epoch": 0.027567623587936574, "grad_norm": 0.531513512134552, "learning_rate": 3.668965517241379e-05, "loss": 0.3143, "step": 266 }, { "epoch": 0.02767126127059799, "grad_norm": 0.5028888583183289, "learning_rate": 3.6827586206896554e-05, "loss": 0.3018, "step": 267 }, { "epoch": 0.027774898953259405, "grad_norm": 0.5275846719741821, "learning_rate": 3.6965517241379315e-05, "loss": 0.3304, "step": 268 }, { "epoch": 0.027878536635920822, "grad_norm": 0.48110881447792053, "learning_rate": 3.710344827586207e-05, "loss": 0.2937, "step": 269 }, { "epoch": 0.027982174318582236, "grad_norm": 0.5279209017753601, "learning_rate": 3.724137931034483e-05, "loss": 0.3709, "step": 270 }, { "epoch": 0.028085812001243653, "grad_norm": 0.5388814210891724, "learning_rate": 3.737931034482759e-05, "loss": 0.3539, "step": 271 }, { "epoch": 0.028189449683905066, "grad_norm": 0.5041470527648926, "learning_rate": 3.7517241379310345e-05, "loss": 0.3097, "step": 272 }, { "epoch": 0.028293087366566484, "grad_norm": 0.55646812915802, "learning_rate": 3.7655172413793106e-05, "loss": 0.2919, "step": 273 }, { "epoch": 0.0283967250492279, "grad_norm": 0.5717840194702148, "learning_rate": 3.779310344827587e-05, "loss": 0.3896, "step": 274 }, { "epoch": 0.028500362731889314, "grad_norm": 0.5246320366859436, "learning_rate": 3.793103448275862e-05, "loss": 0.374, "step": 275 }, { "epoch": 0.02860400041455073, "grad_norm": 0.4986425042152405, "learning_rate": 3.806896551724138e-05, "loss": 0.3252, "step": 276 }, { "epoch": 0.028707638097212145, "grad_norm": 0.5473276376724243, "learning_rate": 3.8206896551724144e-05, "loss": 0.2667, "step": 277 }, { "epoch": 0.028811275779873562, "grad_norm": 0.6111901998519897, "learning_rate": 3.83448275862069e-05, "loss": 0.3327, "step": 278 }, { "epoch": 0.028914913462534976, "grad_norm": 0.540371835231781, "learning_rate": 3.848275862068966e-05, "loss": 0.3404, "step": 279 }, { "epoch": 0.029018551145196393, "grad_norm": 0.4997597336769104, "learning_rate": 3.862068965517242e-05, "loss": 0.3128, "step": 280 }, { "epoch": 0.02912218882785781, "grad_norm": 0.45018187165260315, "learning_rate": 3.8758620689655174e-05, "loss": 0.2513, "step": 281 }, { "epoch": 0.029225826510519224, "grad_norm": 0.4394817650318146, "learning_rate": 3.8896551724137935e-05, "loss": 0.2758, "step": 282 }, { "epoch": 0.02932946419318064, "grad_norm": 0.5027971267700195, "learning_rate": 3.903448275862069e-05, "loss": 0.3064, "step": 283 }, { "epoch": 0.029433101875842055, "grad_norm": 0.47926047444343567, "learning_rate": 3.917241379310345e-05, "loss": 0.2235, "step": 284 }, { "epoch": 0.029536739558503472, "grad_norm": 0.4586944878101349, "learning_rate": 3.931034482758621e-05, "loss": 0.295, "step": 285 }, { "epoch": 0.02964037724116489, "grad_norm": 0.6728528738021851, "learning_rate": 3.9448275862068966e-05, "loss": 0.3476, "step": 286 }, { "epoch": 0.029744014923826303, "grad_norm": 0.5615842938423157, "learning_rate": 3.958620689655173e-05, "loss": 0.2827, "step": 287 }, { "epoch": 0.02984765260648772, "grad_norm": 0.46119019389152527, "learning_rate": 3.972413793103449e-05, "loss": 0.2363, "step": 288 }, { "epoch": 0.029951290289149134, "grad_norm": 0.4322184920310974, "learning_rate": 3.986206896551724e-05, "loss": 0.2375, "step": 289 }, { "epoch": 0.03005492797181055, "grad_norm": 0.43789973855018616, "learning_rate": 4e-05, "loss": 0.2555, "step": 290 }, { "epoch": 0.030158565654471964, "grad_norm": 0.47169503569602966, "learning_rate": 3.999999887321555e-05, "loss": 0.246, "step": 291 }, { "epoch": 0.03026220333713338, "grad_norm": 0.415968656539917, "learning_rate": 3.999999549286231e-05, "loss": 0.3025, "step": 292 }, { "epoch": 0.0303658410197948, "grad_norm": 0.4890746474266052, "learning_rate": 3.999998985894067e-05, "loss": 0.2951, "step": 293 }, { "epoch": 0.030469478702456212, "grad_norm": 0.5048105716705322, "learning_rate": 3.999998197145127e-05, "loss": 0.3317, "step": 294 }, { "epoch": 0.03057311638511763, "grad_norm": 0.4656011462211609, "learning_rate": 3.999997183039498e-05, "loss": 0.299, "step": 295 }, { "epoch": 0.030676754067779043, "grad_norm": 0.5024938583374023, "learning_rate": 3.999995943577297e-05, "loss": 0.2788, "step": 296 }, { "epoch": 0.03078039175044046, "grad_norm": 0.5081213116645813, "learning_rate": 3.9999944787586606e-05, "loss": 0.3068, "step": 297 }, { "epoch": 0.030884029433101874, "grad_norm": 0.529151976108551, "learning_rate": 3.999992788583756e-05, "loss": 0.3062, "step": 298 }, { "epoch": 0.03098766711576329, "grad_norm": 0.6183048486709595, "learning_rate": 3.999990873052774e-05, "loss": 0.3258, "step": 299 }, { "epoch": 0.03109130479842471, "grad_norm": 0.4893452227115631, "learning_rate": 3.999988732165928e-05, "loss": 0.3118, "step": 300 }, { "epoch": 0.031194942481086122, "grad_norm": 0.5209515690803528, "learning_rate": 3.999986365923461e-05, "loss": 0.282, "step": 301 }, { "epoch": 0.031298580163747536, "grad_norm": 0.6380146741867065, "learning_rate": 3.99998377432564e-05, "loss": 0.3404, "step": 302 }, { "epoch": 0.03140221784640895, "grad_norm": 0.607676088809967, "learning_rate": 3.9999809573727556e-05, "loss": 0.2871, "step": 303 }, { "epoch": 0.03150585552907037, "grad_norm": 0.5492339134216309, "learning_rate": 3.9999779150651266e-05, "loss": 0.333, "step": 304 }, { "epoch": 0.03160949321173179, "grad_norm": 0.4717906415462494, "learning_rate": 3.9999746474030945e-05, "loss": 0.2943, "step": 305 }, { "epoch": 0.031713130894393204, "grad_norm": 0.5014916062355042, "learning_rate": 3.999971154387028e-05, "loss": 0.2634, "step": 306 }, { "epoch": 0.031816768577054615, "grad_norm": 0.5935418605804443, "learning_rate": 3.999967436017322e-05, "loss": 0.2988, "step": 307 }, { "epoch": 0.03192040625971603, "grad_norm": 0.5674479007720947, "learning_rate": 3.9999634922943934e-05, "loss": 0.3497, "step": 308 }, { "epoch": 0.03202404394237745, "grad_norm": 0.4754267930984497, "learning_rate": 3.999959323218688e-05, "loss": 0.2656, "step": 309 }, { "epoch": 0.032127681625038866, "grad_norm": 0.5481701493263245, "learning_rate": 3.9999549287906746e-05, "loss": 0.3455, "step": 310 }, { "epoch": 0.03223131930770028, "grad_norm": 0.6132522821426392, "learning_rate": 3.9999503090108494e-05, "loss": 0.3413, "step": 311 }, { "epoch": 0.03233495699036169, "grad_norm": 0.5571139454841614, "learning_rate": 3.999945463879732e-05, "loss": 0.3069, "step": 312 }, { "epoch": 0.03243859467302311, "grad_norm": 0.5365187525749207, "learning_rate": 3.999940393397869e-05, "loss": 0.2773, "step": 313 }, { "epoch": 0.03254223235568453, "grad_norm": 0.5804490447044373, "learning_rate": 3.99993509756583e-05, "loss": 0.3161, "step": 314 }, { "epoch": 0.032645870038345945, "grad_norm": 0.5937289595603943, "learning_rate": 3.999929576384215e-05, "loss": 0.2953, "step": 315 }, { "epoch": 0.032749507721007355, "grad_norm": 0.5334322452545166, "learning_rate": 3.9999238298536436e-05, "loss": 0.2788, "step": 316 }, { "epoch": 0.03285314540366877, "grad_norm": 0.5063037276268005, "learning_rate": 3.9999178579747636e-05, "loss": 0.2907, "step": 317 }, { "epoch": 0.03295678308633019, "grad_norm": 0.6236560344696045, "learning_rate": 3.999911660748249e-05, "loss": 0.3119, "step": 318 }, { "epoch": 0.03306042076899161, "grad_norm": 0.47660860419273376, "learning_rate": 3.999905238174797e-05, "loss": 0.2608, "step": 319 }, { "epoch": 0.033164058451653024, "grad_norm": 0.5512443780899048, "learning_rate": 3.9998985902551315e-05, "loss": 0.307, "step": 320 }, { "epoch": 0.033267696134314434, "grad_norm": 0.6016091108322144, "learning_rate": 3.999891716990002e-05, "loss": 0.3115, "step": 321 }, { "epoch": 0.03337133381697585, "grad_norm": 0.5076780319213867, "learning_rate": 3.9998846183801826e-05, "loss": 0.2699, "step": 322 }, { "epoch": 0.03347497149963727, "grad_norm": 0.46604278683662415, "learning_rate": 3.999877294426474e-05, "loss": 0.3226, "step": 323 }, { "epoch": 0.033578609182298685, "grad_norm": 0.5372481942176819, "learning_rate": 3.9998697451297e-05, "loss": 0.3307, "step": 324 }, { "epoch": 0.0336822468649601, "grad_norm": 0.5999156832695007, "learning_rate": 3.999861970490711e-05, "loss": 0.2952, "step": 325 }, { "epoch": 0.03378588454762151, "grad_norm": 0.5492348074913025, "learning_rate": 3.999853970510386e-05, "loss": 0.3538, "step": 326 }, { "epoch": 0.03388952223028293, "grad_norm": 0.4700670838356018, "learning_rate": 3.9998457451896234e-05, "loss": 0.3182, "step": 327 }, { "epoch": 0.03399315991294435, "grad_norm": 0.4508884847164154, "learning_rate": 3.999837294529351e-05, "loss": 0.2587, "step": 328 }, { "epoch": 0.034096797595605764, "grad_norm": 0.38943636417388916, "learning_rate": 3.9998286185305216e-05, "loss": 0.22, "step": 329 }, { "epoch": 0.03420043527826718, "grad_norm": 0.6094530820846558, "learning_rate": 3.999819717194111e-05, "loss": 0.3358, "step": 330 }, { "epoch": 0.03430407296092859, "grad_norm": 0.5398091077804565, "learning_rate": 3.999810590521125e-05, "loss": 0.2796, "step": 331 }, { "epoch": 0.03440771064359001, "grad_norm": 0.5134654641151428, "learning_rate": 3.9998012385125896e-05, "loss": 0.2806, "step": 332 }, { "epoch": 0.034511348326251426, "grad_norm": 0.5822792649269104, "learning_rate": 3.999791661169559e-05, "loss": 0.312, "step": 333 }, { "epoch": 0.03461498600891284, "grad_norm": 0.5909684300422668, "learning_rate": 3.999781858493114e-05, "loss": 0.3188, "step": 334 }, { "epoch": 0.03471862369157425, "grad_norm": 0.4724874794483185, "learning_rate": 3.9997718304843574e-05, "loss": 0.2591, "step": 335 }, { "epoch": 0.03482226137423567, "grad_norm": 0.5870285630226135, "learning_rate": 3.9997615771444194e-05, "loss": 0.3509, "step": 336 }, { "epoch": 0.03492589905689709, "grad_norm": 0.5800949335098267, "learning_rate": 3.999751098474455e-05, "loss": 0.285, "step": 337 }, { "epoch": 0.035029536739558505, "grad_norm": 0.4950229525566101, "learning_rate": 3.9997403944756466e-05, "loss": 0.2865, "step": 338 }, { "epoch": 0.03513317442221992, "grad_norm": 0.5681800246238708, "learning_rate": 3.999729465149199e-05, "loss": 0.3624, "step": 339 }, { "epoch": 0.03523681210488133, "grad_norm": 0.49301090836524963, "learning_rate": 3.999718310496344e-05, "loss": 0.3046, "step": 340 }, { "epoch": 0.03534044978754275, "grad_norm": 0.4641712009906769, "learning_rate": 3.999706930518338e-05, "loss": 0.2779, "step": 341 }, { "epoch": 0.035444087470204166, "grad_norm": 0.5455201268196106, "learning_rate": 3.999695325216464e-05, "loss": 0.338, "step": 342 }, { "epoch": 0.035547725152865584, "grad_norm": 0.4920612573623657, "learning_rate": 3.9996834945920286e-05, "loss": 0.3227, "step": 343 }, { "epoch": 0.035651362835527, "grad_norm": 0.4165387749671936, "learning_rate": 3.999671438646366e-05, "loss": 0.2795, "step": 344 }, { "epoch": 0.03575500051818841, "grad_norm": 0.46413376927375793, "learning_rate": 3.9996591573808346e-05, "loss": 0.2857, "step": 345 }, { "epoch": 0.03585863820084983, "grad_norm": 0.4862508773803711, "learning_rate": 3.9996466507968175e-05, "loss": 0.3072, "step": 346 }, { "epoch": 0.035962275883511245, "grad_norm": 0.4505625069141388, "learning_rate": 3.9996339188957243e-05, "loss": 0.2708, "step": 347 }, { "epoch": 0.03606591356617266, "grad_norm": 0.3864147961139679, "learning_rate": 3.9996209616789897e-05, "loss": 0.2053, "step": 348 }, { "epoch": 0.03616955124883408, "grad_norm": 0.4509885311126709, "learning_rate": 3.999607779148074e-05, "loss": 0.2996, "step": 349 }, { "epoch": 0.03627318893149549, "grad_norm": 0.4463110864162445, "learning_rate": 3.999594371304461e-05, "loss": 0.2519, "step": 350 }, { "epoch": 0.03637682661415691, "grad_norm": 0.499304860830307, "learning_rate": 3.999580738149664e-05, "loss": 0.2424, "step": 351 }, { "epoch": 0.036480464296818324, "grad_norm": 0.4653189480304718, "learning_rate": 3.9995668796852174e-05, "loss": 0.3005, "step": 352 }, { "epoch": 0.03658410197947974, "grad_norm": 0.4830104112625122, "learning_rate": 3.9995527959126835e-05, "loss": 0.2373, "step": 353 }, { "epoch": 0.03668773966214115, "grad_norm": 0.5555719137191772, "learning_rate": 3.999538486833648e-05, "loss": 0.3509, "step": 354 }, { "epoch": 0.03679137734480257, "grad_norm": 0.5193206667900085, "learning_rate": 3.999523952449725e-05, "loss": 0.2891, "step": 355 }, { "epoch": 0.036895015027463986, "grad_norm": 0.5097503066062927, "learning_rate": 3.999509192762551e-05, "loss": 0.3265, "step": 356 }, { "epoch": 0.0369986527101254, "grad_norm": 0.4666855037212372, "learning_rate": 3.999494207773789e-05, "loss": 0.2682, "step": 357 }, { "epoch": 0.03710229039278682, "grad_norm": 0.5073977112770081, "learning_rate": 3.9994789974851285e-05, "loss": 0.3099, "step": 358 }, { "epoch": 0.03720592807544823, "grad_norm": 0.4599367380142212, "learning_rate": 3.999463561898283e-05, "loss": 0.2535, "step": 359 }, { "epoch": 0.03730956575810965, "grad_norm": 0.5521755814552307, "learning_rate": 3.999447901014991e-05, "loss": 0.2691, "step": 360 }, { "epoch": 0.037413203440771065, "grad_norm": 0.5754250288009644, "learning_rate": 3.999432014837018e-05, "loss": 0.2835, "step": 361 }, { "epoch": 0.03751684112343248, "grad_norm": 0.4755854904651642, "learning_rate": 3.9994159033661535e-05, "loss": 0.2636, "step": 362 }, { "epoch": 0.0376204788060939, "grad_norm": 0.49590805172920227, "learning_rate": 3.999399566604214e-05, "loss": 0.26, "step": 363 }, { "epoch": 0.03772411648875531, "grad_norm": 0.5175451636314392, "learning_rate": 3.999383004553039e-05, "loss": 0.2751, "step": 364 }, { "epoch": 0.037827754171416726, "grad_norm": 0.5882648825645447, "learning_rate": 3.999366217214495e-05, "loss": 0.3524, "step": 365 }, { "epoch": 0.03793139185407814, "grad_norm": 0.5125890970230103, "learning_rate": 3.9993492045904734e-05, "loss": 0.3061, "step": 366 }, { "epoch": 0.03803502953673956, "grad_norm": 0.5740824937820435, "learning_rate": 3.999331966682892e-05, "loss": 0.3479, "step": 367 }, { "epoch": 0.03813866721940098, "grad_norm": 0.5392363667488098, "learning_rate": 3.999314503493692e-05, "loss": 0.2503, "step": 368 }, { "epoch": 0.03824230490206239, "grad_norm": 0.45441433787345886, "learning_rate": 3.9992968150248426e-05, "loss": 0.2621, "step": 369 }, { "epoch": 0.038345942584723805, "grad_norm": 0.5334871411323547, "learning_rate": 3.999278901278336e-05, "loss": 0.2924, "step": 370 }, { "epoch": 0.03844958026738522, "grad_norm": 0.4892536401748657, "learning_rate": 3.99926076225619e-05, "loss": 0.2559, "step": 371 }, { "epoch": 0.03855321795004664, "grad_norm": 0.48785465955734253, "learning_rate": 3.9992423979604496e-05, "loss": 0.2914, "step": 372 }, { "epoch": 0.03865685563270805, "grad_norm": 0.443486750125885, "learning_rate": 3.9992238083931834e-05, "loss": 0.2435, "step": 373 }, { "epoch": 0.03876049331536947, "grad_norm": 0.48417192697525024, "learning_rate": 3.999204993556487e-05, "loss": 0.2702, "step": 374 }, { "epoch": 0.038864130998030884, "grad_norm": 0.5423489809036255, "learning_rate": 3.99918595345248e-05, "loss": 0.2818, "step": 375 }, { "epoch": 0.0389677686806923, "grad_norm": 0.5185508728027344, "learning_rate": 3.9991666880833064e-05, "loss": 0.3319, "step": 376 }, { "epoch": 0.03907140636335372, "grad_norm": 0.5657951235771179, "learning_rate": 3.9991471974511384e-05, "loss": 0.2834, "step": 377 }, { "epoch": 0.03917504404601513, "grad_norm": 0.5035321116447449, "learning_rate": 3.9991274815581726e-05, "loss": 0.2788, "step": 378 }, { "epoch": 0.039278681728676546, "grad_norm": 0.49256327748298645, "learning_rate": 3.9991075404066296e-05, "loss": 0.2841, "step": 379 }, { "epoch": 0.03938231941133796, "grad_norm": 0.48294106125831604, "learning_rate": 3.999087373998756e-05, "loss": 0.2793, "step": 380 }, { "epoch": 0.03948595709399938, "grad_norm": 0.4720240831375122, "learning_rate": 3.9990669823368255e-05, "loss": 0.2448, "step": 381 }, { "epoch": 0.0395895947766608, "grad_norm": 0.4897540509700775, "learning_rate": 3.999046365423134e-05, "loss": 0.2596, "step": 382 }, { "epoch": 0.03969323245932221, "grad_norm": 0.6128451824188232, "learning_rate": 3.999025523260007e-05, "loss": 0.3332, "step": 383 }, { "epoch": 0.039796870141983624, "grad_norm": 0.47030091285705566, "learning_rate": 3.999004455849791e-05, "loss": 0.2902, "step": 384 }, { "epoch": 0.03990050782464504, "grad_norm": 0.4823108911514282, "learning_rate": 3.998983163194861e-05, "loss": 0.3106, "step": 385 }, { "epoch": 0.04000414550730646, "grad_norm": 0.4495014548301697, "learning_rate": 3.998961645297614e-05, "loss": 0.2716, "step": 386 }, { "epoch": 0.040107783189967876, "grad_norm": 0.4260924160480499, "learning_rate": 3.998939902160478e-05, "loss": 0.2548, "step": 387 }, { "epoch": 0.040211420872629286, "grad_norm": 0.5300273299217224, "learning_rate": 3.9989179337859e-05, "loss": 0.297, "step": 388 }, { "epoch": 0.0403150585552907, "grad_norm": 0.4261798858642578, "learning_rate": 3.998895740176358e-05, "loss": 0.2258, "step": 389 }, { "epoch": 0.04041869623795212, "grad_norm": 0.5781340003013611, "learning_rate": 3.9988733213343506e-05, "loss": 0.3491, "step": 390 }, { "epoch": 0.04052233392061354, "grad_norm": 0.4649885296821594, "learning_rate": 3.998850677262404e-05, "loss": 0.2692, "step": 391 }, { "epoch": 0.04062597160327495, "grad_norm": 0.4471288025379181, "learning_rate": 3.998827807963071e-05, "loss": 0.2811, "step": 392 }, { "epoch": 0.040729609285936365, "grad_norm": 0.49814170598983765, "learning_rate": 3.998804713438928e-05, "loss": 0.3116, "step": 393 }, { "epoch": 0.04083324696859778, "grad_norm": 0.5495662689208984, "learning_rate": 3.998781393692577e-05, "loss": 0.3267, "step": 394 }, { "epoch": 0.0409368846512592, "grad_norm": 0.5408654808998108, "learning_rate": 3.998757848726645e-05, "loss": 0.3068, "step": 395 }, { "epoch": 0.041040522333920616, "grad_norm": 0.5414074063301086, "learning_rate": 3.998734078543787e-05, "loss": 0.3334, "step": 396 }, { "epoch": 0.041144160016582026, "grad_norm": 0.5118625164031982, "learning_rate": 3.9987100831466794e-05, "loss": 0.2899, "step": 397 }, { "epoch": 0.041247797699243444, "grad_norm": 0.43798866868019104, "learning_rate": 3.998685862538026e-05, "loss": 0.2438, "step": 398 }, { "epoch": 0.04135143538190486, "grad_norm": 0.4928605556488037, "learning_rate": 3.998661416720558e-05, "loss": 0.2591, "step": 399 }, { "epoch": 0.04145507306456628, "grad_norm": 0.46029067039489746, "learning_rate": 3.9986367456970274e-05, "loss": 0.234, "step": 400 }, { "epoch": 0.041558710747227695, "grad_norm": 0.45207706093788147, "learning_rate": 3.9986118494702155e-05, "loss": 0.2274, "step": 401 }, { "epoch": 0.041662348429889105, "grad_norm": 0.4618712067604065, "learning_rate": 3.998586728042928e-05, "loss": 0.2565, "step": 402 }, { "epoch": 0.04176598611255052, "grad_norm": 0.5343668460845947, "learning_rate": 3.998561381417994e-05, "loss": 0.2647, "step": 403 }, { "epoch": 0.04186962379521194, "grad_norm": 0.4870312809944153, "learning_rate": 3.998535809598271e-05, "loss": 0.329, "step": 404 }, { "epoch": 0.04197326147787336, "grad_norm": 0.5918148756027222, "learning_rate": 3.998510012586639e-05, "loss": 0.3432, "step": 405 }, { "epoch": 0.04207689916053477, "grad_norm": 0.5239502191543579, "learning_rate": 3.9984839903860066e-05, "loss": 0.2599, "step": 406 }, { "epoch": 0.042180536843196184, "grad_norm": 0.6107182502746582, "learning_rate": 3.9984577429993044e-05, "loss": 0.298, "step": 407 }, { "epoch": 0.0422841745258576, "grad_norm": 0.39335134625434875, "learning_rate": 3.99843127042949e-05, "loss": 0.2406, "step": 408 }, { "epoch": 0.04238781220851902, "grad_norm": 0.4815625846385956, "learning_rate": 3.9984045726795474e-05, "loss": 0.2469, "step": 409 }, { "epoch": 0.042491449891180436, "grad_norm": 0.4272301495075226, "learning_rate": 3.9983776497524835e-05, "loss": 0.2548, "step": 410 }, { "epoch": 0.042595087573841846, "grad_norm": 0.5317639112472534, "learning_rate": 3.998350501651333e-05, "loss": 0.3006, "step": 411 }, { "epoch": 0.04269872525650326, "grad_norm": 0.43263331055641174, "learning_rate": 3.9983231283791537e-05, "loss": 0.2391, "step": 412 }, { "epoch": 0.04280236293916468, "grad_norm": 0.4960077404975891, "learning_rate": 3.998295529939031e-05, "loss": 0.2975, "step": 413 }, { "epoch": 0.0429060006218261, "grad_norm": 0.5747363567352295, "learning_rate": 3.998267706334075e-05, "loss": 0.3214, "step": 414 }, { "epoch": 0.043009638304487514, "grad_norm": 0.4495050013065338, "learning_rate": 3.99823965756742e-05, "loss": 0.2738, "step": 415 }, { "epoch": 0.043113275987148925, "grad_norm": 0.46255549788475037, "learning_rate": 3.998211383642226e-05, "loss": 0.2564, "step": 416 }, { "epoch": 0.04321691366981034, "grad_norm": 0.539284348487854, "learning_rate": 3.9981828845616804e-05, "loss": 0.2949, "step": 417 }, { "epoch": 0.04332055135247176, "grad_norm": 0.4439292252063751, "learning_rate": 3.9981541603289935e-05, "loss": 0.2582, "step": 418 }, { "epoch": 0.043424189035133176, "grad_norm": 0.5235161781311035, "learning_rate": 3.998125210947402e-05, "loss": 0.3272, "step": 419 }, { "epoch": 0.04352782671779459, "grad_norm": 0.42719945311546326, "learning_rate": 3.9980960364201676e-05, "loss": 0.2415, "step": 420 }, { "epoch": 0.043631464400456, "grad_norm": 0.4765814542770386, "learning_rate": 3.998066636750578e-05, "loss": 0.271, "step": 421 }, { "epoch": 0.04373510208311742, "grad_norm": 0.47974205017089844, "learning_rate": 3.998037011941946e-05, "loss": 0.2669, "step": 422 }, { "epoch": 0.04383873976577884, "grad_norm": 0.44285377860069275, "learning_rate": 3.998007161997609e-05, "loss": 0.2327, "step": 423 }, { "epoch": 0.043942377448440255, "grad_norm": 0.4870794415473938, "learning_rate": 3.997977086920932e-05, "loss": 0.2458, "step": 424 }, { "epoch": 0.044046015131101665, "grad_norm": 0.47079092264175415, "learning_rate": 3.997946786715302e-05, "loss": 0.2139, "step": 425 }, { "epoch": 0.04414965281376308, "grad_norm": 0.516667366027832, "learning_rate": 3.9979162613841336e-05, "loss": 0.2378, "step": 426 }, { "epoch": 0.0442532904964245, "grad_norm": 0.5418141484260559, "learning_rate": 3.9978855109308675e-05, "loss": 0.272, "step": 427 }, { "epoch": 0.04435692817908592, "grad_norm": 0.5159924030303955, "learning_rate": 3.9978545353589675e-05, "loss": 0.2741, "step": 428 }, { "epoch": 0.044460565861747334, "grad_norm": 0.48079657554626465, "learning_rate": 3.9978233346719235e-05, "loss": 0.2834, "step": 429 }, { "epoch": 0.044564203544408744, "grad_norm": 0.4194307327270508, "learning_rate": 3.997791908873253e-05, "loss": 0.2139, "step": 430 }, { "epoch": 0.04466784122707016, "grad_norm": 0.5443658828735352, "learning_rate": 3.997760257966495e-05, "loss": 0.292, "step": 431 }, { "epoch": 0.04477147890973158, "grad_norm": 0.5482102036476135, "learning_rate": 3.997728381955217e-05, "loss": 0.2584, "step": 432 }, { "epoch": 0.044875116592392995, "grad_norm": 0.49964722990989685, "learning_rate": 3.99769628084301e-05, "loss": 0.2593, "step": 433 }, { "epoch": 0.04497875427505441, "grad_norm": 0.45780518651008606, "learning_rate": 3.997663954633492e-05, "loss": 0.2778, "step": 434 }, { "epoch": 0.04508239195771582, "grad_norm": 0.5288244485855103, "learning_rate": 3.9976314033303056e-05, "loss": 0.2929, "step": 435 }, { "epoch": 0.04518602964037724, "grad_norm": 0.48790210485458374, "learning_rate": 3.9975986269371175e-05, "loss": 0.2738, "step": 436 }, { "epoch": 0.04528966732303866, "grad_norm": 0.4951280355453491, "learning_rate": 3.997565625457621e-05, "loss": 0.3266, "step": 437 }, { "epoch": 0.045393305005700074, "grad_norm": 0.551414966583252, "learning_rate": 3.997532398895536e-05, "loss": 0.322, "step": 438 }, { "epoch": 0.04549694268836149, "grad_norm": 0.4794701039791107, "learning_rate": 3.997498947254605e-05, "loss": 0.2303, "step": 439 }, { "epoch": 0.0456005803710229, "grad_norm": 0.4955127239227295, "learning_rate": 3.997465270538597e-05, "loss": 0.3013, "step": 440 }, { "epoch": 0.04570421805368432, "grad_norm": 0.6322245001792908, "learning_rate": 3.9974313687513086e-05, "loss": 0.3439, "step": 441 }, { "epoch": 0.045807855736345736, "grad_norm": 0.4571327865123749, "learning_rate": 3.9973972418965586e-05, "loss": 0.2581, "step": 442 }, { "epoch": 0.04591149341900715, "grad_norm": 0.5030487775802612, "learning_rate": 3.997362889978192e-05, "loss": 0.268, "step": 443 }, { "epoch": 0.04601513110166856, "grad_norm": 0.42191100120544434, "learning_rate": 3.99732831300008e-05, "loss": 0.2507, "step": 444 }, { "epoch": 0.04611876878432998, "grad_norm": 0.5020284056663513, "learning_rate": 3.997293510966119e-05, "loss": 0.3243, "step": 445 }, { "epoch": 0.0462224064669914, "grad_norm": 0.5030457973480225, "learning_rate": 3.997258483880229e-05, "loss": 0.2361, "step": 446 }, { "epoch": 0.046326044149652815, "grad_norm": 0.508976936340332, "learning_rate": 3.997223231746358e-05, "loss": 0.2452, "step": 447 }, { "epoch": 0.04642968183231423, "grad_norm": 0.5234203934669495, "learning_rate": 3.997187754568479e-05, "loss": 0.2858, "step": 448 }, { "epoch": 0.04653331951497564, "grad_norm": 0.5409331917762756, "learning_rate": 3.997152052350588e-05, "loss": 0.2737, "step": 449 }, { "epoch": 0.04663695719763706, "grad_norm": 0.48061665892601013, "learning_rate": 3.997116125096709e-05, "loss": 0.2485, "step": 450 }, { "epoch": 0.046740594880298476, "grad_norm": 0.45012375712394714, "learning_rate": 3.997079972810888e-05, "loss": 0.2657, "step": 451 }, { "epoch": 0.046844232562959894, "grad_norm": 0.4897686839103699, "learning_rate": 3.997043595497201e-05, "loss": 0.3284, "step": 452 }, { "epoch": 0.04694787024562131, "grad_norm": 0.43526583909988403, "learning_rate": 3.9970069931597465e-05, "loss": 0.2704, "step": 453 }, { "epoch": 0.04705150792828272, "grad_norm": 0.4520479738712311, "learning_rate": 3.9969701658026484e-05, "loss": 0.2512, "step": 454 }, { "epoch": 0.04715514561094414, "grad_norm": 0.507448136806488, "learning_rate": 3.996933113430056e-05, "loss": 0.3099, "step": 455 }, { "epoch": 0.047258783293605555, "grad_norm": 0.5489389300346375, "learning_rate": 3.996895836046145e-05, "loss": 0.2653, "step": 456 }, { "epoch": 0.04736242097626697, "grad_norm": 0.5679994821548462, "learning_rate": 3.996858333655115e-05, "loss": 0.2939, "step": 457 }, { "epoch": 0.04746605865892839, "grad_norm": 0.4488953649997711, "learning_rate": 3.996820606261192e-05, "loss": 0.2719, "step": 458 }, { "epoch": 0.0475696963415898, "grad_norm": 0.4154788851737976, "learning_rate": 3.9967826538686274e-05, "loss": 0.2521, "step": 459 }, { "epoch": 0.04767333402425122, "grad_norm": 0.49899592995643616, "learning_rate": 3.996744476481698e-05, "loss": 0.2997, "step": 460 }, { "epoch": 0.047776971706912634, "grad_norm": 0.5255045890808105, "learning_rate": 3.9967060741047045e-05, "loss": 0.2912, "step": 461 }, { "epoch": 0.04788060938957405, "grad_norm": 0.3737160563468933, "learning_rate": 3.996667446741975e-05, "loss": 0.2099, "step": 462 }, { "epoch": 0.04798424707223546, "grad_norm": 0.45894649624824524, "learning_rate": 3.996628594397861e-05, "loss": 0.2822, "step": 463 }, { "epoch": 0.04808788475489688, "grad_norm": 0.5205749273300171, "learning_rate": 3.996589517076741e-05, "loss": 0.2846, "step": 464 }, { "epoch": 0.048191522437558296, "grad_norm": 0.5407760739326477, "learning_rate": 3.9965502147830174e-05, "loss": 0.2829, "step": 465 }, { "epoch": 0.04829516012021971, "grad_norm": 0.5501642823219299, "learning_rate": 3.9965106875211204e-05, "loss": 0.2911, "step": 466 }, { "epoch": 0.04839879780288113, "grad_norm": 0.4986078441143036, "learning_rate": 3.9964709352955016e-05, "loss": 0.2863, "step": 467 }, { "epoch": 0.04850243548554254, "grad_norm": 0.4460117518901825, "learning_rate": 3.996430958110642e-05, "loss": 0.26, "step": 468 }, { "epoch": 0.04860607316820396, "grad_norm": 0.4906882047653198, "learning_rate": 3.996390755971046e-05, "loss": 0.2729, "step": 469 }, { "epoch": 0.048709710850865374, "grad_norm": 0.4443777799606323, "learning_rate": 3.9963503288812424e-05, "loss": 0.2802, "step": 470 }, { "epoch": 0.04881334853352679, "grad_norm": 0.40381139516830444, "learning_rate": 3.996309676845787e-05, "loss": 0.2588, "step": 471 }, { "epoch": 0.04891698621618821, "grad_norm": 0.5093889832496643, "learning_rate": 3.9962687998692605e-05, "loss": 0.2781, "step": 472 }, { "epoch": 0.04902062389884962, "grad_norm": 0.489504873752594, "learning_rate": 3.996227697956269e-05, "loss": 0.2818, "step": 473 }, { "epoch": 0.049124261581511036, "grad_norm": 0.43166133761405945, "learning_rate": 3.996186371111444e-05, "loss": 0.2617, "step": 474 }, { "epoch": 0.04922789926417245, "grad_norm": 0.5207101106643677, "learning_rate": 3.996144819339442e-05, "loss": 0.2939, "step": 475 }, { "epoch": 0.04933153694683387, "grad_norm": 0.4953382611274719, "learning_rate": 3.9961030426449445e-05, "loss": 0.2864, "step": 476 }, { "epoch": 0.04943517462949529, "grad_norm": 0.4431532919406891, "learning_rate": 3.996061041032659e-05, "loss": 0.2596, "step": 477 }, { "epoch": 0.0495388123121567, "grad_norm": 0.4658108949661255, "learning_rate": 3.996018814507319e-05, "loss": 0.2675, "step": 478 }, { "epoch": 0.049642449994818115, "grad_norm": 0.5410202741622925, "learning_rate": 3.995976363073681e-05, "loss": 0.2785, "step": 479 }, { "epoch": 0.04974608767747953, "grad_norm": 0.5721487402915955, "learning_rate": 3.995933686736529e-05, "loss": 0.2572, "step": 480 }, { "epoch": 0.04984972536014095, "grad_norm": 0.48720431327819824, "learning_rate": 3.995890785500673e-05, "loss": 0.2581, "step": 481 }, { "epoch": 0.04995336304280236, "grad_norm": 0.49117180705070496, "learning_rate": 3.995847659370945e-05, "loss": 0.3231, "step": 482 }, { "epoch": 0.05005700072546378, "grad_norm": 0.4524441659450531, "learning_rate": 3.995804308352206e-05, "loss": 0.2857, "step": 483 }, { "epoch": 0.050160638408125194, "grad_norm": 0.4876324534416199, "learning_rate": 3.995760732449341e-05, "loss": 0.2839, "step": 484 }, { "epoch": 0.05026427609078661, "grad_norm": 0.463663786649704, "learning_rate": 3.995716931667257e-05, "loss": 0.2638, "step": 485 }, { "epoch": 0.05036791377344803, "grad_norm": 0.45413050055503845, "learning_rate": 3.995672906010893e-05, "loss": 0.2708, "step": 486 }, { "epoch": 0.05047155145610944, "grad_norm": 0.407673716545105, "learning_rate": 3.995628655485208e-05, "loss": 0.2334, "step": 487 }, { "epoch": 0.050575189138770855, "grad_norm": 0.5084668397903442, "learning_rate": 3.995584180095188e-05, "loss": 0.2808, "step": 488 }, { "epoch": 0.05067882682143227, "grad_norm": 0.49144288897514343, "learning_rate": 3.995539479845845e-05, "loss": 0.2631, "step": 489 }, { "epoch": 0.05078246450409369, "grad_norm": 0.40713638067245483, "learning_rate": 3.995494554742215e-05, "loss": 0.2282, "step": 490 }, { "epoch": 0.05088610218675511, "grad_norm": 0.5845853090286255, "learning_rate": 3.995449404789361e-05, "loss": 0.2976, "step": 491 }, { "epoch": 0.05098973986941652, "grad_norm": 0.5178698301315308, "learning_rate": 3.995404029992371e-05, "loss": 0.2887, "step": 492 }, { "epoch": 0.051093377552077934, "grad_norm": 0.5840103626251221, "learning_rate": 3.9953584303563557e-05, "loss": 0.2829, "step": 493 }, { "epoch": 0.05119701523473935, "grad_norm": 0.533604085445404, "learning_rate": 3.995312605886454e-05, "loss": 0.2684, "step": 494 }, { "epoch": 0.05130065291740077, "grad_norm": 0.43775051832199097, "learning_rate": 3.995266556587831e-05, "loss": 0.2498, "step": 495 }, { "epoch": 0.051404290600062186, "grad_norm": 0.5094471573829651, "learning_rate": 3.9952202824656734e-05, "loss": 0.3014, "step": 496 }, { "epoch": 0.051507928282723596, "grad_norm": 0.48946914076805115, "learning_rate": 3.995173783525196e-05, "loss": 0.2674, "step": 497 }, { "epoch": 0.05161156596538501, "grad_norm": 0.5266274213790894, "learning_rate": 3.995127059771638e-05, "loss": 0.263, "step": 498 }, { "epoch": 0.05171520364804643, "grad_norm": 0.5169990062713623, "learning_rate": 3.995080111210265e-05, "loss": 0.2346, "step": 499 }, { "epoch": 0.05181884133070785, "grad_norm": 0.48766180872917175, "learning_rate": 3.995032937846366e-05, "loss": 0.2589, "step": 500 }, { "epoch": 0.05192247901336926, "grad_norm": 0.5541372895240784, "learning_rate": 3.9949855396852566e-05, "loss": 0.2897, "step": 501 }, { "epoch": 0.052026116696030675, "grad_norm": 0.6318419575691223, "learning_rate": 3.994937916732278e-05, "loss": 0.3263, "step": 502 }, { "epoch": 0.05212975437869209, "grad_norm": 0.472393661737442, "learning_rate": 3.994890068992797e-05, "loss": 0.2942, "step": 503 }, { "epoch": 0.05223339206135351, "grad_norm": 0.49356287717819214, "learning_rate": 3.994841996472203e-05, "loss": 0.3301, "step": 504 }, { "epoch": 0.052337029744014926, "grad_norm": 0.44888001680374146, "learning_rate": 3.994793699175915e-05, "loss": 0.1962, "step": 505 }, { "epoch": 0.052440667426676336, "grad_norm": 0.46697762608528137, "learning_rate": 3.9947451771093736e-05, "loss": 0.2719, "step": 506 }, { "epoch": 0.052544305109337754, "grad_norm": 0.4748048782348633, "learning_rate": 3.9946964302780455e-05, "loss": 0.3056, "step": 507 }, { "epoch": 0.05264794279199917, "grad_norm": 0.4572794735431671, "learning_rate": 3.9946474586874255e-05, "loss": 0.2318, "step": 508 }, { "epoch": 0.05275158047466059, "grad_norm": 0.4336269199848175, "learning_rate": 3.99459826234303e-05, "loss": 0.2087, "step": 509 }, { "epoch": 0.052855218157322005, "grad_norm": 0.5002438426017761, "learning_rate": 3.994548841250404e-05, "loss": 0.2842, "step": 510 }, { "epoch": 0.052958855839983415, "grad_norm": 0.4813220500946045, "learning_rate": 3.994499195415114e-05, "loss": 0.285, "step": 511 }, { "epoch": 0.05306249352264483, "grad_norm": 0.46560052037239075, "learning_rate": 3.994449324842756e-05, "loss": 0.2432, "step": 512 }, { "epoch": 0.05316613120530625, "grad_norm": 0.43234068155288696, "learning_rate": 3.994399229538948e-05, "loss": 0.2148, "step": 513 }, { "epoch": 0.05326976888796767, "grad_norm": 0.4758170247077942, "learning_rate": 3.994348909509335e-05, "loss": 0.2559, "step": 514 }, { "epoch": 0.053373406570629084, "grad_norm": 0.48988309502601624, "learning_rate": 3.9942983647595876e-05, "loss": 0.2804, "step": 515 }, { "epoch": 0.053477044253290494, "grad_norm": 0.47322186827659607, "learning_rate": 3.994247595295401e-05, "loss": 0.2072, "step": 516 }, { "epoch": 0.05358068193595191, "grad_norm": 0.431432843208313, "learning_rate": 3.994196601122495e-05, "loss": 0.2448, "step": 517 }, { "epoch": 0.05368431961861333, "grad_norm": 0.49172186851501465, "learning_rate": 3.9941453822466154e-05, "loss": 0.2353, "step": 518 }, { "epoch": 0.053787957301274746, "grad_norm": 0.5228767395019531, "learning_rate": 3.994093938673535e-05, "loss": 0.2858, "step": 519 }, { "epoch": 0.053891594983936156, "grad_norm": 0.5165749788284302, "learning_rate": 3.994042270409049e-05, "loss": 0.2751, "step": 520 }, { "epoch": 0.05399523266659757, "grad_norm": 0.5928406715393066, "learning_rate": 3.99399037745898e-05, "loss": 0.2959, "step": 521 }, { "epoch": 0.05409887034925899, "grad_norm": 0.4643728733062744, "learning_rate": 3.9939382598291744e-05, "loss": 0.2623, "step": 522 }, { "epoch": 0.05420250803192041, "grad_norm": 0.45560747385025024, "learning_rate": 3.993885917525506e-05, "loss": 0.2463, "step": 523 }, { "epoch": 0.054306145714581824, "grad_norm": 0.4651755392551422, "learning_rate": 3.993833350553872e-05, "loss": 0.2634, "step": 524 }, { "epoch": 0.054409783397243235, "grad_norm": 0.5355138778686523, "learning_rate": 3.9937805589201955e-05, "loss": 0.2612, "step": 525 }, { "epoch": 0.05451342107990465, "grad_norm": 0.45348060131073, "learning_rate": 3.993727542630425e-05, "loss": 0.2655, "step": 526 }, { "epoch": 0.05461705876256607, "grad_norm": 0.46827277541160583, "learning_rate": 3.993674301690534e-05, "loss": 0.2503, "step": 527 }, { "epoch": 0.054720696445227486, "grad_norm": 0.584631085395813, "learning_rate": 3.993620836106522e-05, "loss": 0.2676, "step": 528 }, { "epoch": 0.0548243341278889, "grad_norm": 0.5135299563407898, "learning_rate": 3.9935671458844136e-05, "loss": 0.2998, "step": 529 }, { "epoch": 0.05492797181055031, "grad_norm": 0.5041263103485107, "learning_rate": 3.9935132310302576e-05, "loss": 0.2631, "step": 530 }, { "epoch": 0.05503160949321173, "grad_norm": 0.4767071604728699, "learning_rate": 3.99345909155013e-05, "loss": 0.2727, "step": 531 }, { "epoch": 0.05513524717587315, "grad_norm": 0.4792020916938782, "learning_rate": 3.993404727450132e-05, "loss": 0.2791, "step": 532 }, { "epoch": 0.055238884858534565, "grad_norm": 0.4517286419868469, "learning_rate": 3.993350138736387e-05, "loss": 0.2462, "step": 533 }, { "epoch": 0.05534252254119598, "grad_norm": 0.5668163299560547, "learning_rate": 3.993295325415047e-05, "loss": 0.2838, "step": 534 }, { "epoch": 0.05544616022385739, "grad_norm": 0.5690762996673584, "learning_rate": 3.993240287492288e-05, "loss": 0.2764, "step": 535 }, { "epoch": 0.05554979790651881, "grad_norm": 0.5362303256988525, "learning_rate": 3.993185024974313e-05, "loss": 0.2808, "step": 536 }, { "epoch": 0.05565343558918023, "grad_norm": 0.5116853713989258, "learning_rate": 3.993129537867347e-05, "loss": 0.2977, "step": 537 }, { "epoch": 0.055757073271841644, "grad_norm": 0.4673402011394501, "learning_rate": 3.993073826177644e-05, "loss": 0.2497, "step": 538 }, { "epoch": 0.055860710954503054, "grad_norm": 0.5054620504379272, "learning_rate": 3.99301788991148e-05, "loss": 0.2774, "step": 539 }, { "epoch": 0.05596434863716447, "grad_norm": 0.5134178400039673, "learning_rate": 3.992961729075158e-05, "loss": 0.2455, "step": 540 }, { "epoch": 0.05606798631982589, "grad_norm": 0.4898240268230438, "learning_rate": 3.992905343675007e-05, "loss": 0.2442, "step": 541 }, { "epoch": 0.056171624002487305, "grad_norm": 0.4968765377998352, "learning_rate": 3.99284873371738e-05, "loss": 0.2789, "step": 542 }, { "epoch": 0.05627526168514872, "grad_norm": 0.6002929210662842, "learning_rate": 3.992791899208656e-05, "loss": 0.3064, "step": 543 }, { "epoch": 0.05637889936781013, "grad_norm": 0.46553024649620056, "learning_rate": 3.992734840155238e-05, "loss": 0.2779, "step": 544 }, { "epoch": 0.05648253705047155, "grad_norm": 0.44942498207092285, "learning_rate": 3.9926775565635555e-05, "loss": 0.2545, "step": 545 }, { "epoch": 0.05658617473313297, "grad_norm": 0.529929518699646, "learning_rate": 3.9926200484400644e-05, "loss": 0.2918, "step": 546 }, { "epoch": 0.056689812415794384, "grad_norm": 0.5951007008552551, "learning_rate": 3.992562315791244e-05, "loss": 0.295, "step": 547 }, { "epoch": 0.0567934500984558, "grad_norm": 0.5104761719703674, "learning_rate": 3.992504358623598e-05, "loss": 0.2625, "step": 548 }, { "epoch": 0.05689708778111721, "grad_norm": 0.5036949515342712, "learning_rate": 3.992446176943659e-05, "loss": 0.3084, "step": 549 }, { "epoch": 0.05700072546377863, "grad_norm": 0.5363962054252625, "learning_rate": 3.992387770757983e-05, "loss": 0.2859, "step": 550 }, { "epoch": 0.057104363146440046, "grad_norm": 0.5645461082458496, "learning_rate": 3.99232914007315e-05, "loss": 0.2702, "step": 551 }, { "epoch": 0.05720800082910146, "grad_norm": 0.4483615756034851, "learning_rate": 3.992270284895765e-05, "loss": 0.2359, "step": 552 }, { "epoch": 0.05731163851176288, "grad_norm": 0.5485745668411255, "learning_rate": 3.992211205232463e-05, "loss": 0.3096, "step": 553 }, { "epoch": 0.05741527619442429, "grad_norm": 0.4500156342983246, "learning_rate": 3.992151901089899e-05, "loss": 0.2573, "step": 554 }, { "epoch": 0.05751891387708571, "grad_norm": 0.49787795543670654, "learning_rate": 3.9920923724747555e-05, "loss": 0.2604, "step": 555 }, { "epoch": 0.057622551559747125, "grad_norm": 0.4587489366531372, "learning_rate": 3.9920326193937405e-05, "loss": 0.2344, "step": 556 }, { "epoch": 0.05772618924240854, "grad_norm": 0.5138572454452515, "learning_rate": 3.991972641853586e-05, "loss": 0.3073, "step": 557 }, { "epoch": 0.05782982692506995, "grad_norm": 0.5564447045326233, "learning_rate": 3.9919124398610514e-05, "loss": 0.3053, "step": 558 }, { "epoch": 0.05793346460773137, "grad_norm": 0.48643192648887634, "learning_rate": 3.991852013422919e-05, "loss": 0.2658, "step": 559 }, { "epoch": 0.058037102290392786, "grad_norm": 0.5111423134803772, "learning_rate": 3.9917913625459986e-05, "loss": 0.3184, "step": 560 }, { "epoch": 0.058140739973054203, "grad_norm": 0.534591019153595, "learning_rate": 3.9917304872371236e-05, "loss": 0.255, "step": 561 }, { "epoch": 0.05824437765571562, "grad_norm": 0.45287781953811646, "learning_rate": 3.991669387503153e-05, "loss": 0.2544, "step": 562 }, { "epoch": 0.05834801533837703, "grad_norm": 0.49371427297592163, "learning_rate": 3.991608063350973e-05, "loss": 0.2866, "step": 563 }, { "epoch": 0.05845165302103845, "grad_norm": 0.43072739243507385, "learning_rate": 3.9915465147874916e-05, "loss": 0.2397, "step": 564 }, { "epoch": 0.058555290703699865, "grad_norm": 0.5488777756690979, "learning_rate": 3.991484741819645e-05, "loss": 0.2936, "step": 565 }, { "epoch": 0.05865892838636128, "grad_norm": 0.5285367965698242, "learning_rate": 3.9914227444543936e-05, "loss": 0.3119, "step": 566 }, { "epoch": 0.0587625660690227, "grad_norm": 0.48357492685317993, "learning_rate": 3.991360522698723e-05, "loss": 0.2333, "step": 567 }, { "epoch": 0.05886620375168411, "grad_norm": 0.4516059160232544, "learning_rate": 3.991298076559645e-05, "loss": 0.257, "step": 568 }, { "epoch": 0.05896984143434553, "grad_norm": 0.4679553508758545, "learning_rate": 3.991235406044195e-05, "loss": 0.2778, "step": 569 }, { "epoch": 0.059073479117006944, "grad_norm": 0.5020044445991516, "learning_rate": 3.991172511159434e-05, "loss": 0.2706, "step": 570 }, { "epoch": 0.05917711679966836, "grad_norm": 0.4471382796764374, "learning_rate": 3.99110939191245e-05, "loss": 0.2608, "step": 571 }, { "epoch": 0.05928075448232978, "grad_norm": 0.48660770058631897, "learning_rate": 3.991046048310356e-05, "loss": 0.2749, "step": 572 }, { "epoch": 0.05938439216499119, "grad_norm": 0.5402244329452515, "learning_rate": 3.990982480360288e-05, "loss": 0.3266, "step": 573 }, { "epoch": 0.059488029847652606, "grad_norm": 0.49644187092781067, "learning_rate": 3.9909186880694086e-05, "loss": 0.2948, "step": 574 }, { "epoch": 0.05959166753031402, "grad_norm": 0.47150805592536926, "learning_rate": 3.990854671444906e-05, "loss": 0.3078, "step": 575 }, { "epoch": 0.05969530521297544, "grad_norm": 0.39560598134994507, "learning_rate": 3.990790430493995e-05, "loss": 0.1867, "step": 576 }, { "epoch": 0.05979894289563685, "grad_norm": 0.521929919719696, "learning_rate": 3.9907259652239125e-05, "loss": 0.2943, "step": 577 }, { "epoch": 0.05990258057829827, "grad_norm": 0.45911693572998047, "learning_rate": 3.9906612756419234e-05, "loss": 0.2251, "step": 578 }, { "epoch": 0.060006218260959684, "grad_norm": 0.5514270067214966, "learning_rate": 3.9905963617553154e-05, "loss": 0.3134, "step": 579 }, { "epoch": 0.0601098559436211, "grad_norm": 0.4795793294906616, "learning_rate": 3.990531223571404e-05, "loss": 0.2496, "step": 580 }, { "epoch": 0.06021349362628252, "grad_norm": 0.48294639587402344, "learning_rate": 3.990465861097529e-05, "loss": 0.2785, "step": 581 }, { "epoch": 0.06031713130894393, "grad_norm": 0.4807794690132141, "learning_rate": 3.990400274341055e-05, "loss": 0.29, "step": 582 }, { "epoch": 0.060420768991605346, "grad_norm": 0.5464568138122559, "learning_rate": 3.9903344633093724e-05, "loss": 0.2699, "step": 583 }, { "epoch": 0.06052440667426676, "grad_norm": 0.48656803369522095, "learning_rate": 3.9902684280098965e-05, "loss": 0.3006, "step": 584 }, { "epoch": 0.06062804435692818, "grad_norm": 0.4978223443031311, "learning_rate": 3.9902021684500677e-05, "loss": 0.25, "step": 585 }, { "epoch": 0.0607316820395896, "grad_norm": 0.4835212826728821, "learning_rate": 3.990135684637352e-05, "loss": 0.2632, "step": 586 }, { "epoch": 0.06083531972225101, "grad_norm": 0.47485238313674927, "learning_rate": 3.990068976579242e-05, "loss": 0.3159, "step": 587 }, { "epoch": 0.060938957404912425, "grad_norm": 0.4746256470680237, "learning_rate": 3.990002044283253e-05, "loss": 0.2545, "step": 588 }, { "epoch": 0.06104259508757384, "grad_norm": 0.4291842579841614, "learning_rate": 3.989934887756927e-05, "loss": 0.2674, "step": 589 }, { "epoch": 0.06114623277023526, "grad_norm": 0.47232916951179504, "learning_rate": 3.989867507007831e-05, "loss": 0.2618, "step": 590 }, { "epoch": 0.061249870452896676, "grad_norm": 0.49870309233665466, "learning_rate": 3.989799902043558e-05, "loss": 0.2889, "step": 591 }, { "epoch": 0.06135350813555809, "grad_norm": 0.5332955718040466, "learning_rate": 3.9897320728717254e-05, "loss": 0.3145, "step": 592 }, { "epoch": 0.061457145818219504, "grad_norm": 0.45895615220069885, "learning_rate": 3.9896640194999754e-05, "loss": 0.2917, "step": 593 }, { "epoch": 0.06156078350088092, "grad_norm": 0.5079963803291321, "learning_rate": 3.989595741935977e-05, "loss": 0.2516, "step": 594 }, { "epoch": 0.06166442118354234, "grad_norm": 0.42996639013290405, "learning_rate": 3.989527240187424e-05, "loss": 0.2601, "step": 595 }, { "epoch": 0.06176805886620375, "grad_norm": 0.48144781589508057, "learning_rate": 3.989458514262034e-05, "loss": 0.2976, "step": 596 }, { "epoch": 0.061871696548865165, "grad_norm": 0.5348926782608032, "learning_rate": 3.98938956416755e-05, "loss": 0.3098, "step": 597 }, { "epoch": 0.06197533423152658, "grad_norm": 0.46923643350601196, "learning_rate": 3.9893203899117445e-05, "loss": 0.2442, "step": 598 }, { "epoch": 0.062078971914188, "grad_norm": 0.5009055733680725, "learning_rate": 3.989250991502408e-05, "loss": 0.2768, "step": 599 }, { "epoch": 0.06218260959684942, "grad_norm": 0.47320249676704407, "learning_rate": 3.989181368947363e-05, "loss": 0.224, "step": 600 }, { "epoch": 0.06228624727951083, "grad_norm": 0.49606916308403015, "learning_rate": 3.989111522254453e-05, "loss": 0.2572, "step": 601 }, { "epoch": 0.062389884962172244, "grad_norm": 0.4603443443775177, "learning_rate": 3.9890414514315504e-05, "loss": 0.2611, "step": 602 }, { "epoch": 0.06249352264483366, "grad_norm": 0.4671558141708374, "learning_rate": 3.988971156486548e-05, "loss": 0.2703, "step": 603 }, { "epoch": 0.06259716032749507, "grad_norm": 0.4635378420352936, "learning_rate": 3.988900637427367e-05, "loss": 0.2564, "step": 604 }, { "epoch": 0.06270079801015649, "grad_norm": 0.6459101438522339, "learning_rate": 3.9888298942619555e-05, "loss": 0.3151, "step": 605 }, { "epoch": 0.0628044356928179, "grad_norm": 0.48071354627609253, "learning_rate": 3.988758926998282e-05, "loss": 0.2886, "step": 606 }, { "epoch": 0.06290807337547932, "grad_norm": 0.4736165404319763, "learning_rate": 3.988687735644345e-05, "loss": 0.237, "step": 607 }, { "epoch": 0.06301171105814074, "grad_norm": 0.45571112632751465, "learning_rate": 3.988616320208165e-05, "loss": 0.2198, "step": 608 }, { "epoch": 0.06311534874080216, "grad_norm": 0.48835575580596924, "learning_rate": 3.988544680697789e-05, "loss": 0.3071, "step": 609 }, { "epoch": 0.06321898642346357, "grad_norm": 0.49265989661216736, "learning_rate": 3.98847281712129e-05, "loss": 0.251, "step": 610 }, { "epoch": 0.06332262410612499, "grad_norm": 0.5096911787986755, "learning_rate": 3.988400729486765e-05, "loss": 0.2774, "step": 611 }, { "epoch": 0.06342626178878641, "grad_norm": 0.3974534273147583, "learning_rate": 3.988328417802337e-05, "loss": 0.2419, "step": 612 }, { "epoch": 0.06352989947144781, "grad_norm": 0.46511921286582947, "learning_rate": 3.988255882076154e-05, "loss": 0.2559, "step": 613 }, { "epoch": 0.06363353715410923, "grad_norm": 0.46322038769721985, "learning_rate": 3.988183122316389e-05, "loss": 0.2517, "step": 614 }, { "epoch": 0.06373717483677065, "grad_norm": 0.4440378248691559, "learning_rate": 3.98811013853124e-05, "loss": 0.2646, "step": 615 }, { "epoch": 0.06384081251943206, "grad_norm": 0.5909380912780762, "learning_rate": 3.988036930728931e-05, "loss": 0.3207, "step": 616 }, { "epoch": 0.06394445020209348, "grad_norm": 0.45240291953086853, "learning_rate": 3.9879634989177114e-05, "loss": 0.2645, "step": 617 }, { "epoch": 0.0640480878847549, "grad_norm": 0.47876596450805664, "learning_rate": 3.987889843105856e-05, "loss": 0.2787, "step": 618 }, { "epoch": 0.06415172556741632, "grad_norm": 0.4310585558414459, "learning_rate": 3.9878159633016624e-05, "loss": 0.2348, "step": 619 }, { "epoch": 0.06425536325007773, "grad_norm": 0.44966933131217957, "learning_rate": 3.987741859513456e-05, "loss": 0.2453, "step": 620 }, { "epoch": 0.06435900093273915, "grad_norm": 0.45143213868141174, "learning_rate": 3.987667531749587e-05, "loss": 0.2788, "step": 621 }, { "epoch": 0.06446263861540057, "grad_norm": 0.47991883754730225, "learning_rate": 3.987592980018431e-05, "loss": 0.2786, "step": 622 }, { "epoch": 0.06456627629806197, "grad_norm": 0.4811300039291382, "learning_rate": 3.987518204328387e-05, "loss": 0.2806, "step": 623 }, { "epoch": 0.06466991398072339, "grad_norm": 0.6241214275360107, "learning_rate": 3.987443204687882e-05, "loss": 0.3336, "step": 624 }, { "epoch": 0.0647735516633848, "grad_norm": 0.6035299301147461, "learning_rate": 3.987367981105366e-05, "loss": 0.3272, "step": 625 }, { "epoch": 0.06487718934604622, "grad_norm": 0.4932394325733185, "learning_rate": 3.987292533589315e-05, "loss": 0.2865, "step": 626 }, { "epoch": 0.06498082702870764, "grad_norm": 0.49736925959587097, "learning_rate": 3.9872168621482304e-05, "loss": 0.2722, "step": 627 }, { "epoch": 0.06508446471136906, "grad_norm": 0.4849455654621124, "learning_rate": 3.98714096679064e-05, "loss": 0.2399, "step": 628 }, { "epoch": 0.06518810239403047, "grad_norm": 0.4412994980812073, "learning_rate": 3.9870648475250944e-05, "loss": 0.2704, "step": 629 }, { "epoch": 0.06529174007669189, "grad_norm": 0.4405463933944702, "learning_rate": 3.98698850436017e-05, "loss": 0.2294, "step": 630 }, { "epoch": 0.06539537775935331, "grad_norm": 0.40049633383750916, "learning_rate": 3.98691193730447e-05, "loss": 0.2128, "step": 631 }, { "epoch": 0.06549901544201471, "grad_norm": 0.45948949456214905, "learning_rate": 3.9868351463666213e-05, "loss": 0.2207, "step": 632 }, { "epoch": 0.06560265312467613, "grad_norm": 0.4937278628349304, "learning_rate": 3.986758131555278e-05, "loss": 0.275, "step": 633 }, { "epoch": 0.06570629080733754, "grad_norm": 0.477566123008728, "learning_rate": 3.9866808928791154e-05, "loss": 0.2946, "step": 634 }, { "epoch": 0.06580992848999896, "grad_norm": 0.514164388179779, "learning_rate": 3.986603430346839e-05, "loss": 0.2787, "step": 635 }, { "epoch": 0.06591356617266038, "grad_norm": 0.4273333251476288, "learning_rate": 3.9865257439671765e-05, "loss": 0.22, "step": 636 }, { "epoch": 0.0660172038553218, "grad_norm": 0.5105968713760376, "learning_rate": 3.9864478337488817e-05, "loss": 0.2503, "step": 637 }, { "epoch": 0.06612084153798321, "grad_norm": 0.4005119204521179, "learning_rate": 3.986369699700732e-05, "loss": 0.2327, "step": 638 }, { "epoch": 0.06622447922064463, "grad_norm": 0.5089367032051086, "learning_rate": 3.986291341831533e-05, "loss": 0.2827, "step": 639 }, { "epoch": 0.06632811690330605, "grad_norm": 0.4681312143802643, "learning_rate": 3.986212760150113e-05, "loss": 0.2527, "step": 640 }, { "epoch": 0.06643175458596746, "grad_norm": 0.5034751892089844, "learning_rate": 3.986133954665327e-05, "loss": 0.3003, "step": 641 }, { "epoch": 0.06653539226862887, "grad_norm": 0.4600752294063568, "learning_rate": 3.986054925386055e-05, "loss": 0.2519, "step": 642 }, { "epoch": 0.06663902995129029, "grad_norm": 0.48441654443740845, "learning_rate": 3.9859756723212e-05, "loss": 0.3041, "step": 643 }, { "epoch": 0.0667426676339517, "grad_norm": 0.4735645651817322, "learning_rate": 3.985896195479694e-05, "loss": 0.2661, "step": 644 }, { "epoch": 0.06684630531661312, "grad_norm": 0.3847654461860657, "learning_rate": 3.985816494870492e-05, "loss": 0.2302, "step": 645 }, { "epoch": 0.06694994299927454, "grad_norm": 0.45513978600502014, "learning_rate": 3.985736570502575e-05, "loss": 0.2386, "step": 646 }, { "epoch": 0.06705358068193595, "grad_norm": 0.4446656405925751, "learning_rate": 3.985656422384947e-05, "loss": 0.232, "step": 647 }, { "epoch": 0.06715721836459737, "grad_norm": 0.473247766494751, "learning_rate": 3.985576050526641e-05, "loss": 0.2098, "step": 648 }, { "epoch": 0.06726085604725879, "grad_norm": 0.5222258567810059, "learning_rate": 3.985495454936712e-05, "loss": 0.2534, "step": 649 }, { "epoch": 0.0673644937299202, "grad_norm": 0.44592931866645813, "learning_rate": 3.985414635624242e-05, "loss": 0.2113, "step": 650 }, { "epoch": 0.06746813141258161, "grad_norm": 0.5243046283721924, "learning_rate": 3.9853335925983366e-05, "loss": 0.2934, "step": 651 }, { "epoch": 0.06757176909524303, "grad_norm": 0.45387986302375793, "learning_rate": 3.985252325868129e-05, "loss": 0.2599, "step": 652 }, { "epoch": 0.06767540677790444, "grad_norm": 0.5690520405769348, "learning_rate": 3.985170835442775e-05, "loss": 0.2906, "step": 653 }, { "epoch": 0.06777904446056586, "grad_norm": 0.605476975440979, "learning_rate": 3.985089121331457e-05, "loss": 0.302, "step": 654 }, { "epoch": 0.06788268214322728, "grad_norm": 0.5086917877197266, "learning_rate": 3.985007183543383e-05, "loss": 0.3054, "step": 655 }, { "epoch": 0.0679863198258887, "grad_norm": 0.4781462252140045, "learning_rate": 3.9849250220877856e-05, "loss": 0.2927, "step": 656 }, { "epoch": 0.06808995750855011, "grad_norm": 0.44163042306900024, "learning_rate": 3.984842636973921e-05, "loss": 0.2179, "step": 657 }, { "epoch": 0.06819359519121153, "grad_norm": 0.49941644072532654, "learning_rate": 3.9847600282110755e-05, "loss": 0.2359, "step": 658 }, { "epoch": 0.06829723287387295, "grad_norm": 0.5164234042167664, "learning_rate": 3.984677195808554e-05, "loss": 0.2928, "step": 659 }, { "epoch": 0.06840087055653436, "grad_norm": 0.5161421298980713, "learning_rate": 3.9845941397756924e-05, "loss": 0.3114, "step": 660 }, { "epoch": 0.06850450823919577, "grad_norm": 0.48917636275291443, "learning_rate": 3.9845108601218474e-05, "loss": 0.2335, "step": 661 }, { "epoch": 0.06860814592185718, "grad_norm": 0.44179776310920715, "learning_rate": 3.9844273568564036e-05, "loss": 0.2595, "step": 662 }, { "epoch": 0.0687117836045186, "grad_norm": 0.5596027970314026, "learning_rate": 3.98434362998877e-05, "loss": 0.2839, "step": 663 }, { "epoch": 0.06881542128718002, "grad_norm": 0.5134223103523254, "learning_rate": 3.9842596795283814e-05, "loss": 0.297, "step": 664 }, { "epoch": 0.06891905896984143, "grad_norm": 0.4692731201648712, "learning_rate": 3.984175505484697e-05, "loss": 0.2729, "step": 665 }, { "epoch": 0.06902269665250285, "grad_norm": 0.37804538011550903, "learning_rate": 3.9840911078672003e-05, "loss": 0.2095, "step": 666 }, { "epoch": 0.06912633433516427, "grad_norm": 0.5090510845184326, "learning_rate": 3.9840064866854026e-05, "loss": 0.2432, "step": 667 }, { "epoch": 0.06922997201782569, "grad_norm": 0.38887301087379456, "learning_rate": 3.983921641948838e-05, "loss": 0.1764, "step": 668 }, { "epoch": 0.0693336097004871, "grad_norm": 0.5521963834762573, "learning_rate": 3.9838365736670665e-05, "loss": 0.2711, "step": 669 }, { "epoch": 0.0694372473831485, "grad_norm": 0.6766364574432373, "learning_rate": 3.983751281849674e-05, "loss": 0.3268, "step": 670 }, { "epoch": 0.06954088506580992, "grad_norm": 0.44805410504341125, "learning_rate": 3.9836657665062704e-05, "loss": 0.2156, "step": 671 }, { "epoch": 0.06964452274847134, "grad_norm": 0.4524245262145996, "learning_rate": 3.983580027646492e-05, "loss": 0.2797, "step": 672 }, { "epoch": 0.06974816043113276, "grad_norm": 0.4633697271347046, "learning_rate": 3.983494065280001e-05, "loss": 0.241, "step": 673 }, { "epoch": 0.06985179811379418, "grad_norm": 0.4903186559677124, "learning_rate": 3.983407879416481e-05, "loss": 0.2816, "step": 674 }, { "epoch": 0.06995543579645559, "grad_norm": 0.4883442223072052, "learning_rate": 3.983321470065644e-05, "loss": 0.238, "step": 675 }, { "epoch": 0.07005907347911701, "grad_norm": 0.4707334637641907, "learning_rate": 3.983234837237228e-05, "loss": 0.2567, "step": 676 }, { "epoch": 0.07016271116177843, "grad_norm": 0.5906460285186768, "learning_rate": 3.983147980940993e-05, "loss": 0.2579, "step": 677 }, { "epoch": 0.07026634884443984, "grad_norm": 0.5003019571304321, "learning_rate": 3.983060901186726e-05, "loss": 0.2549, "step": 678 }, { "epoch": 0.07036998652710126, "grad_norm": 0.5827634334564209, "learning_rate": 3.98297359798424e-05, "loss": 0.3284, "step": 679 }, { "epoch": 0.07047362420976266, "grad_norm": 0.45304641127586365, "learning_rate": 3.9828860713433705e-05, "loss": 0.2802, "step": 680 }, { "epoch": 0.07057726189242408, "grad_norm": 0.5432493686676025, "learning_rate": 3.982798321273982e-05, "loss": 0.2634, "step": 681 }, { "epoch": 0.0706808995750855, "grad_norm": 0.5457451343536377, "learning_rate": 3.9827103477859605e-05, "loss": 0.2908, "step": 682 }, { "epoch": 0.07078453725774692, "grad_norm": 0.44750288128852844, "learning_rate": 3.9826221508892196e-05, "loss": 0.2125, "step": 683 }, { "epoch": 0.07088817494040833, "grad_norm": 0.5408027172088623, "learning_rate": 3.9825337305936965e-05, "loss": 0.2741, "step": 684 }, { "epoch": 0.07099181262306975, "grad_norm": 0.5531283617019653, "learning_rate": 3.982445086909354e-05, "loss": 0.2839, "step": 685 }, { "epoch": 0.07109545030573117, "grad_norm": 0.5220621824264526, "learning_rate": 3.982356219846182e-05, "loss": 0.2682, "step": 686 }, { "epoch": 0.07119908798839258, "grad_norm": 0.5586988925933838, "learning_rate": 3.9822671294141916e-05, "loss": 0.2544, "step": 687 }, { "epoch": 0.071302725671054, "grad_norm": 0.4634181261062622, "learning_rate": 3.9821778156234236e-05, "loss": 0.2481, "step": 688 }, { "epoch": 0.0714063633537154, "grad_norm": 0.46268972754478455, "learning_rate": 3.9820882784839405e-05, "loss": 0.2382, "step": 689 }, { "epoch": 0.07151000103637682, "grad_norm": 0.5548827052116394, "learning_rate": 3.9819985180058314e-05, "loss": 0.3154, "step": 690 }, { "epoch": 0.07161363871903824, "grad_norm": 0.40516266226768494, "learning_rate": 3.9819085341992106e-05, "loss": 0.2322, "step": 691 }, { "epoch": 0.07171727640169966, "grad_norm": 0.49902236461639404, "learning_rate": 3.981818327074216e-05, "loss": 0.2632, "step": 692 }, { "epoch": 0.07182091408436107, "grad_norm": 0.5100747346878052, "learning_rate": 3.9817278966410134e-05, "loss": 0.245, "step": 693 }, { "epoch": 0.07192455176702249, "grad_norm": 0.5476238131523132, "learning_rate": 3.981637242909793e-05, "loss": 0.2564, "step": 694 }, { "epoch": 0.07202818944968391, "grad_norm": 0.5173237919807434, "learning_rate": 3.981546365890768e-05, "loss": 0.2933, "step": 695 }, { "epoch": 0.07213182713234532, "grad_norm": 0.4427679777145386, "learning_rate": 3.9814552655941784e-05, "loss": 0.2186, "step": 696 }, { "epoch": 0.07223546481500674, "grad_norm": 0.4230489432811737, "learning_rate": 3.9813639420302906e-05, "loss": 0.2543, "step": 697 }, { "epoch": 0.07233910249766816, "grad_norm": 0.42406851053237915, "learning_rate": 3.9812723952093936e-05, "loss": 0.2499, "step": 698 }, { "epoch": 0.07244274018032956, "grad_norm": 0.41877564787864685, "learning_rate": 3.981180625141803e-05, "loss": 0.2248, "step": 699 }, { "epoch": 0.07254637786299098, "grad_norm": 0.5359909534454346, "learning_rate": 3.981088631837859e-05, "loss": 0.2902, "step": 700 }, { "epoch": 0.0726500155456524, "grad_norm": 0.43423157930374146, "learning_rate": 3.980996415307928e-05, "loss": 0.2227, "step": 701 }, { "epoch": 0.07275365322831381, "grad_norm": 0.5584090352058411, "learning_rate": 3.980903975562401e-05, "loss": 0.2968, "step": 702 }, { "epoch": 0.07285729091097523, "grad_norm": 0.4519881010055542, "learning_rate": 3.980811312611692e-05, "loss": 0.2353, "step": 703 }, { "epoch": 0.07296092859363665, "grad_norm": 0.5242766737937927, "learning_rate": 3.980718426466244e-05, "loss": 0.3233, "step": 704 }, { "epoch": 0.07306456627629807, "grad_norm": 0.5491629838943481, "learning_rate": 3.980625317136523e-05, "loss": 0.3075, "step": 705 }, { "epoch": 0.07316820395895948, "grad_norm": 0.39431241154670715, "learning_rate": 3.980531984633021e-05, "loss": 0.2077, "step": 706 }, { "epoch": 0.0732718416416209, "grad_norm": 0.5462802052497864, "learning_rate": 3.980438428966253e-05, "loss": 0.3186, "step": 707 }, { "epoch": 0.0733754793242823, "grad_norm": 0.5003985166549683, "learning_rate": 3.980344650146761e-05, "loss": 0.251, "step": 708 }, { "epoch": 0.07347911700694372, "grad_norm": 0.4941883981227875, "learning_rate": 3.980250648185113e-05, "loss": 0.2518, "step": 709 }, { "epoch": 0.07358275468960514, "grad_norm": 0.4943901002407074, "learning_rate": 3.9801564230919006e-05, "loss": 0.2836, "step": 710 }, { "epoch": 0.07368639237226655, "grad_norm": 0.4397679567337036, "learning_rate": 3.98006197487774e-05, "loss": 0.2366, "step": 711 }, { "epoch": 0.07379003005492797, "grad_norm": 0.46421751379966736, "learning_rate": 3.9799673035532745e-05, "loss": 0.2499, "step": 712 }, { "epoch": 0.07389366773758939, "grad_norm": 0.4776816964149475, "learning_rate": 3.9798724091291715e-05, "loss": 0.3052, "step": 713 }, { "epoch": 0.0739973054202508, "grad_norm": 0.506424069404602, "learning_rate": 3.979777291616122e-05, "loss": 0.2826, "step": 714 }, { "epoch": 0.07410094310291222, "grad_norm": 0.47890761494636536, "learning_rate": 3.979681951024846e-05, "loss": 0.2883, "step": 715 }, { "epoch": 0.07420458078557364, "grad_norm": 0.5536207556724548, "learning_rate": 3.9795863873660846e-05, "loss": 0.3255, "step": 716 }, { "epoch": 0.07430821846823506, "grad_norm": 0.4948398470878601, "learning_rate": 3.979490600650607e-05, "loss": 0.2697, "step": 717 }, { "epoch": 0.07441185615089646, "grad_norm": 0.4891558885574341, "learning_rate": 3.9793945908892057e-05, "loss": 0.303, "step": 718 }, { "epoch": 0.07451549383355788, "grad_norm": 0.4618590474128723, "learning_rate": 3.979298358092698e-05, "loss": 0.2451, "step": 719 }, { "epoch": 0.0746191315162193, "grad_norm": 0.4500598907470703, "learning_rate": 3.9792019022719294e-05, "loss": 0.2535, "step": 720 }, { "epoch": 0.07472276919888071, "grad_norm": 0.5183244943618774, "learning_rate": 3.9791052234377663e-05, "loss": 0.2796, "step": 721 }, { "epoch": 0.07482640688154213, "grad_norm": 0.45024576783180237, "learning_rate": 3.979008321601104e-05, "loss": 0.2339, "step": 722 }, { "epoch": 0.07493004456420355, "grad_norm": 0.5016899704933167, "learning_rate": 3.9789111967728595e-05, "loss": 0.2972, "step": 723 }, { "epoch": 0.07503368224686496, "grad_norm": 0.4528055489063263, "learning_rate": 3.9788138489639786e-05, "loss": 0.2613, "step": 724 }, { "epoch": 0.07513731992952638, "grad_norm": 0.4231579899787903, "learning_rate": 3.9787162781854284e-05, "loss": 0.2259, "step": 725 }, { "epoch": 0.0752409576121878, "grad_norm": 0.47760093212127686, "learning_rate": 3.978618484448204e-05, "loss": 0.2637, "step": 726 }, { "epoch": 0.0753445952948492, "grad_norm": 0.4602471888065338, "learning_rate": 3.978520467763325e-05, "loss": 0.2547, "step": 727 }, { "epoch": 0.07544823297751062, "grad_norm": 0.5210710167884827, "learning_rate": 3.978422228141836e-05, "loss": 0.2485, "step": 728 }, { "epoch": 0.07555187066017204, "grad_norm": 0.47295230627059937, "learning_rate": 3.9783237655948044e-05, "loss": 0.2633, "step": 729 }, { "epoch": 0.07565550834283345, "grad_norm": 0.4705461859703064, "learning_rate": 3.9782250801333274e-05, "loss": 0.2642, "step": 730 }, { "epoch": 0.07575914602549487, "grad_norm": 0.4627399444580078, "learning_rate": 3.978126171768523e-05, "loss": 0.2571, "step": 731 }, { "epoch": 0.07586278370815629, "grad_norm": 0.476418137550354, "learning_rate": 3.978027040511537e-05, "loss": 0.2702, "step": 732 }, { "epoch": 0.0759664213908177, "grad_norm": 0.5001246333122253, "learning_rate": 3.977927686373539e-05, "loss": 0.2578, "step": 733 }, { "epoch": 0.07607005907347912, "grad_norm": 0.46971946954727173, "learning_rate": 3.977828109365724e-05, "loss": 0.2661, "step": 734 }, { "epoch": 0.07617369675614054, "grad_norm": 0.4575338065624237, "learning_rate": 3.9777283094993115e-05, "loss": 0.2778, "step": 735 }, { "epoch": 0.07627733443880196, "grad_norm": 0.4811464846134186, "learning_rate": 3.9776282867855475e-05, "loss": 0.247, "step": 736 }, { "epoch": 0.07638097212146336, "grad_norm": 0.5836530923843384, "learning_rate": 3.9775280412357035e-05, "loss": 0.2472, "step": 737 }, { "epoch": 0.07648460980412478, "grad_norm": 0.4331651031970978, "learning_rate": 3.977427572861073e-05, "loss": 0.2376, "step": 738 }, { "epoch": 0.07658824748678619, "grad_norm": 0.42944300174713135, "learning_rate": 3.977326881672978e-05, "loss": 0.2161, "step": 739 }, { "epoch": 0.07669188516944761, "grad_norm": 0.4123460352420807, "learning_rate": 3.977225967682764e-05, "loss": 0.2387, "step": 740 }, { "epoch": 0.07679552285210903, "grad_norm": 0.4910091757774353, "learning_rate": 3.977124830901802e-05, "loss": 0.3004, "step": 741 }, { "epoch": 0.07689916053477044, "grad_norm": 0.552148163318634, "learning_rate": 3.977023471341487e-05, "loss": 0.2798, "step": 742 }, { "epoch": 0.07700279821743186, "grad_norm": 0.4576529860496521, "learning_rate": 3.9769218890132404e-05, "loss": 0.2756, "step": 743 }, { "epoch": 0.07710643590009328, "grad_norm": 0.5145983695983887, "learning_rate": 3.9768200839285086e-05, "loss": 0.231, "step": 744 }, { "epoch": 0.0772100735827547, "grad_norm": 0.4392525851726532, "learning_rate": 3.976718056098763e-05, "loss": 0.2115, "step": 745 }, { "epoch": 0.0773137112654161, "grad_norm": 0.465648353099823, "learning_rate": 3.9766158055354996e-05, "loss": 0.2376, "step": 746 }, { "epoch": 0.07741734894807752, "grad_norm": 0.5436767935752869, "learning_rate": 3.97651333225024e-05, "loss": 0.2779, "step": 747 }, { "epoch": 0.07752098663073893, "grad_norm": 0.523582935333252, "learning_rate": 3.9764106362545305e-05, "loss": 0.2704, "step": 748 }, { "epoch": 0.07762462431340035, "grad_norm": 0.4481733441352844, "learning_rate": 3.9763077175599426e-05, "loss": 0.2684, "step": 749 }, { "epoch": 0.07772826199606177, "grad_norm": 0.4326241612434387, "learning_rate": 3.9762045761780734e-05, "loss": 0.2551, "step": 750 }, { "epoch": 0.07783189967872318, "grad_norm": 0.526773989200592, "learning_rate": 3.9761012121205455e-05, "loss": 0.297, "step": 751 }, { "epoch": 0.0779355373613846, "grad_norm": 0.46273472905158997, "learning_rate": 3.9759976253990046e-05, "loss": 0.2507, "step": 752 }, { "epoch": 0.07803917504404602, "grad_norm": 0.44620481133461, "learning_rate": 3.975893816025123e-05, "loss": 0.2264, "step": 753 }, { "epoch": 0.07814281272670744, "grad_norm": 0.5178984999656677, "learning_rate": 3.975789784010597e-05, "loss": 0.2131, "step": 754 }, { "epoch": 0.07824645040936885, "grad_norm": 0.4811955392360687, "learning_rate": 3.97568552936715e-05, "loss": 0.2773, "step": 755 }, { "epoch": 0.07835008809203026, "grad_norm": 0.4600077271461487, "learning_rate": 3.975581052106529e-05, "loss": 0.235, "step": 756 }, { "epoch": 0.07845372577469167, "grad_norm": 0.4501517713069916, "learning_rate": 3.975476352240506e-05, "loss": 0.2325, "step": 757 }, { "epoch": 0.07855736345735309, "grad_norm": 0.4092719852924347, "learning_rate": 3.9753714297808785e-05, "loss": 0.2239, "step": 758 }, { "epoch": 0.07866100114001451, "grad_norm": 0.5300100445747375, "learning_rate": 3.975266284739469e-05, "loss": 0.2852, "step": 759 }, { "epoch": 0.07876463882267593, "grad_norm": 0.4446985423564911, "learning_rate": 3.9751609171281255e-05, "loss": 0.2648, "step": 760 }, { "epoch": 0.07886827650533734, "grad_norm": 0.5151501297950745, "learning_rate": 3.97505532695872e-05, "loss": 0.257, "step": 761 }, { "epoch": 0.07897191418799876, "grad_norm": 0.4873696565628052, "learning_rate": 3.974949514243151e-05, "loss": 0.2493, "step": 762 }, { "epoch": 0.07907555187066018, "grad_norm": 0.3995228409767151, "learning_rate": 3.9748434789933406e-05, "loss": 0.2318, "step": 763 }, { "epoch": 0.0791791895533216, "grad_norm": 0.5084617137908936, "learning_rate": 3.974737221221238e-05, "loss": 0.2919, "step": 764 }, { "epoch": 0.079282827235983, "grad_norm": 0.539893388748169, "learning_rate": 3.974630740938813e-05, "loss": 0.2722, "step": 765 }, { "epoch": 0.07938646491864441, "grad_norm": 0.466237336397171, "learning_rate": 3.974524038158067e-05, "loss": 0.2258, "step": 766 }, { "epoch": 0.07949010260130583, "grad_norm": 0.5253474712371826, "learning_rate": 3.9744171128910214e-05, "loss": 0.3033, "step": 767 }, { "epoch": 0.07959374028396725, "grad_norm": 0.3932199478149414, "learning_rate": 3.974309965149725e-05, "loss": 0.1973, "step": 768 }, { "epoch": 0.07969737796662867, "grad_norm": 0.5772408246994019, "learning_rate": 3.974202594946251e-05, "loss": 0.3122, "step": 769 }, { "epoch": 0.07980101564929008, "grad_norm": 0.5734532475471497, "learning_rate": 3.9740950022926974e-05, "loss": 0.2943, "step": 770 }, { "epoch": 0.0799046533319515, "grad_norm": 0.46139439940452576, "learning_rate": 3.973987187201188e-05, "loss": 0.2527, "step": 771 }, { "epoch": 0.08000829101461292, "grad_norm": 0.4981940686702728, "learning_rate": 3.9738791496838703e-05, "loss": 0.2318, "step": 772 }, { "epoch": 0.08011192869727433, "grad_norm": 0.5667979121208191, "learning_rate": 3.973770889752919e-05, "loss": 0.2542, "step": 773 }, { "epoch": 0.08021556637993575, "grad_norm": 0.47905823588371277, "learning_rate": 3.973662407420532e-05, "loss": 0.2411, "step": 774 }, { "epoch": 0.08031920406259715, "grad_norm": 0.5183667540550232, "learning_rate": 3.973553702698933e-05, "loss": 0.2745, "step": 775 }, { "epoch": 0.08042284174525857, "grad_norm": 0.48042669892311096, "learning_rate": 3.9734447756003704e-05, "loss": 0.2382, "step": 776 }, { "epoch": 0.08052647942791999, "grad_norm": 0.48477572202682495, "learning_rate": 3.973335626137119e-05, "loss": 0.2375, "step": 777 }, { "epoch": 0.0806301171105814, "grad_norm": 0.4594244062900543, "learning_rate": 3.973226254321477e-05, "loss": 0.2366, "step": 778 }, { "epoch": 0.08073375479324282, "grad_norm": 0.5444444417953491, "learning_rate": 3.973116660165767e-05, "loss": 0.298, "step": 779 }, { "epoch": 0.08083739247590424, "grad_norm": 0.5429214239120483, "learning_rate": 3.9730068436823395e-05, "loss": 0.2718, "step": 780 }, { "epoch": 0.08094103015856566, "grad_norm": 0.4269948899745941, "learning_rate": 3.972896804883568e-05, "loss": 0.1915, "step": 781 }, { "epoch": 0.08104466784122707, "grad_norm": 0.511663556098938, "learning_rate": 3.972786543781852e-05, "loss": 0.2616, "step": 782 }, { "epoch": 0.08114830552388849, "grad_norm": 0.49156785011291504, "learning_rate": 3.972676060389614e-05, "loss": 0.2885, "step": 783 }, { "epoch": 0.0812519432065499, "grad_norm": 0.4817125201225281, "learning_rate": 3.972565354719305e-05, "loss": 0.2804, "step": 784 }, { "epoch": 0.08135558088921131, "grad_norm": 0.4289361238479614, "learning_rate": 3.9724544267833975e-05, "loss": 0.2031, "step": 785 }, { "epoch": 0.08145921857187273, "grad_norm": 0.44129499793052673, "learning_rate": 3.9723432765943916e-05, "loss": 0.2282, "step": 786 }, { "epoch": 0.08156285625453415, "grad_norm": 0.49618256092071533, "learning_rate": 3.972231904164812e-05, "loss": 0.283, "step": 787 }, { "epoch": 0.08166649393719556, "grad_norm": 0.44237473607063293, "learning_rate": 3.9721203095072066e-05, "loss": 0.2416, "step": 788 }, { "epoch": 0.08177013161985698, "grad_norm": 0.45676106214523315, "learning_rate": 3.972008492634151e-05, "loss": 0.2704, "step": 789 }, { "epoch": 0.0818737693025184, "grad_norm": 0.45083701610565186, "learning_rate": 3.971896453558244e-05, "loss": 0.2541, "step": 790 }, { "epoch": 0.08197740698517982, "grad_norm": 0.34744521975517273, "learning_rate": 3.971784192292109e-05, "loss": 0.166, "step": 791 }, { "epoch": 0.08208104466784123, "grad_norm": 0.41484206914901733, "learning_rate": 3.971671708848398e-05, "loss": 0.2457, "step": 792 }, { "epoch": 0.08218468235050265, "grad_norm": 0.4614809453487396, "learning_rate": 3.971559003239782e-05, "loss": 0.2399, "step": 793 }, { "epoch": 0.08228832003316405, "grad_norm": 0.502249002456665, "learning_rate": 3.971446075478964e-05, "loss": 0.2933, "step": 794 }, { "epoch": 0.08239195771582547, "grad_norm": 0.42531266808509827, "learning_rate": 3.971332925578666e-05, "loss": 0.2611, "step": 795 }, { "epoch": 0.08249559539848689, "grad_norm": 0.4552249610424042, "learning_rate": 3.971219553551639e-05, "loss": 0.2785, "step": 796 }, { "epoch": 0.0825992330811483, "grad_norm": 0.43548455834388733, "learning_rate": 3.9711059594106566e-05, "loss": 0.2453, "step": 797 }, { "epoch": 0.08270287076380972, "grad_norm": 0.5675352215766907, "learning_rate": 3.970992143168519e-05, "loss": 0.2832, "step": 798 }, { "epoch": 0.08280650844647114, "grad_norm": 0.48082834482192993, "learning_rate": 3.9708781048380506e-05, "loss": 0.2183, "step": 799 }, { "epoch": 0.08291014612913256, "grad_norm": 0.5138378143310547, "learning_rate": 3.9707638444321015e-05, "loss": 0.2467, "step": 800 }, { "epoch": 0.08301378381179397, "grad_norm": 0.5684926509857178, "learning_rate": 3.970649361963545e-05, "loss": 0.2779, "step": 801 }, { "epoch": 0.08311742149445539, "grad_norm": 0.4427591562271118, "learning_rate": 3.9705346574452825e-05, "loss": 0.2376, "step": 802 }, { "epoch": 0.0832210591771168, "grad_norm": 0.5298888087272644, "learning_rate": 3.970419730890238e-05, "loss": 0.2352, "step": 803 }, { "epoch": 0.08332469685977821, "grad_norm": 0.42755240201950073, "learning_rate": 3.970304582311362e-05, "loss": 0.2489, "step": 804 }, { "epoch": 0.08342833454243963, "grad_norm": 0.43197426199913025, "learning_rate": 3.970189211721627e-05, "loss": 0.2128, "step": 805 }, { "epoch": 0.08353197222510104, "grad_norm": 0.5031106472015381, "learning_rate": 3.9700736191340355e-05, "loss": 0.2749, "step": 806 }, { "epoch": 0.08363560990776246, "grad_norm": 0.4439834654331207, "learning_rate": 3.9699578045616114e-05, "loss": 0.2277, "step": 807 }, { "epoch": 0.08373924759042388, "grad_norm": 0.4895908534526825, "learning_rate": 3.9698417680174035e-05, "loss": 0.2738, "step": 808 }, { "epoch": 0.0838428852730853, "grad_norm": 0.47002407908439636, "learning_rate": 3.9697255095144874e-05, "loss": 0.2332, "step": 809 }, { "epoch": 0.08394652295574671, "grad_norm": 0.5205109715461731, "learning_rate": 3.9696090290659634e-05, "loss": 0.2589, "step": 810 }, { "epoch": 0.08405016063840813, "grad_norm": 0.5365964770317078, "learning_rate": 3.969492326684956e-05, "loss": 0.3035, "step": 811 }, { "epoch": 0.08415379832106953, "grad_norm": 0.45762428641319275, "learning_rate": 3.9693754023846136e-05, "loss": 0.2675, "step": 812 }, { "epoch": 0.08425743600373095, "grad_norm": 0.40482908487319946, "learning_rate": 3.9692582561781135e-05, "loss": 0.2117, "step": 813 }, { "epoch": 0.08436107368639237, "grad_norm": 0.4397818446159363, "learning_rate": 3.969140888078654e-05, "loss": 0.2109, "step": 814 }, { "epoch": 0.08446471136905379, "grad_norm": 0.4823589026927948, "learning_rate": 3.96902329809946e-05, "loss": 0.259, "step": 815 }, { "epoch": 0.0845683490517152, "grad_norm": 0.4680706262588501, "learning_rate": 3.968905486253782e-05, "loss": 0.2658, "step": 816 }, { "epoch": 0.08467198673437662, "grad_norm": 0.505746066570282, "learning_rate": 3.968787452554894e-05, "loss": 0.2455, "step": 817 }, { "epoch": 0.08477562441703804, "grad_norm": 0.4473956823348999, "learning_rate": 3.968669197016097e-05, "loss": 0.2645, "step": 818 }, { "epoch": 0.08487926209969945, "grad_norm": 0.47364342212677, "learning_rate": 3.9685507196507155e-05, "loss": 0.2785, "step": 819 }, { "epoch": 0.08498289978236087, "grad_norm": 0.4266853928565979, "learning_rate": 3.968432020472098e-05, "loss": 0.2244, "step": 820 }, { "epoch": 0.08508653746502229, "grad_norm": 0.48588618636131287, "learning_rate": 3.968313099493622e-05, "loss": 0.2967, "step": 821 }, { "epoch": 0.08519017514768369, "grad_norm": 0.43978413939476013, "learning_rate": 3.968193956728684e-05, "loss": 0.2031, "step": 822 }, { "epoch": 0.08529381283034511, "grad_norm": 0.39410391449928284, "learning_rate": 3.968074592190711e-05, "loss": 0.2318, "step": 823 }, { "epoch": 0.08539745051300653, "grad_norm": 0.47819966077804565, "learning_rate": 3.967955005893154e-05, "loss": 0.2304, "step": 824 }, { "epoch": 0.08550108819566794, "grad_norm": 0.4324469268321991, "learning_rate": 3.967835197849485e-05, "loss": 0.2174, "step": 825 }, { "epoch": 0.08560472587832936, "grad_norm": 0.5045969486236572, "learning_rate": 3.967715168073205e-05, "loss": 0.26, "step": 826 }, { "epoch": 0.08570836356099078, "grad_norm": 0.470830500125885, "learning_rate": 3.967594916577838e-05, "loss": 0.2837, "step": 827 }, { "epoch": 0.0858120012436522, "grad_norm": 0.4859100878238678, "learning_rate": 3.9674744433769355e-05, "loss": 0.2184, "step": 828 }, { "epoch": 0.08591563892631361, "grad_norm": 0.499252587556839, "learning_rate": 3.967353748484071e-05, "loss": 0.2311, "step": 829 }, { "epoch": 0.08601927660897503, "grad_norm": 0.45752185583114624, "learning_rate": 3.967232831912844e-05, "loss": 0.252, "step": 830 }, { "epoch": 0.08612291429163643, "grad_norm": 0.46715739369392395, "learning_rate": 3.96711169367688e-05, "loss": 0.2308, "step": 831 }, { "epoch": 0.08622655197429785, "grad_norm": 0.4784639775753021, "learning_rate": 3.966990333789828e-05, "loss": 0.2385, "step": 832 }, { "epoch": 0.08633018965695927, "grad_norm": 0.46856051683425903, "learning_rate": 3.9668687522653636e-05, "loss": 0.2428, "step": 833 }, { "epoch": 0.08643382733962068, "grad_norm": 0.47302600741386414, "learning_rate": 3.9667469491171856e-05, "loss": 0.2339, "step": 834 }, { "epoch": 0.0865374650222821, "grad_norm": 0.5023990869522095, "learning_rate": 3.966624924359018e-05, "loss": 0.2783, "step": 835 }, { "epoch": 0.08664110270494352, "grad_norm": 0.5078696608543396, "learning_rate": 3.966502678004612e-05, "loss": 0.2787, "step": 836 }, { "epoch": 0.08674474038760493, "grad_norm": 0.49874991178512573, "learning_rate": 3.9663802100677404e-05, "loss": 0.2497, "step": 837 }, { "epoch": 0.08684837807026635, "grad_norm": 0.5799754858016968, "learning_rate": 3.966257520562204e-05, "loss": 0.2834, "step": 838 }, { "epoch": 0.08695201575292777, "grad_norm": 0.5072081089019775, "learning_rate": 3.9661346095018264e-05, "loss": 0.3123, "step": 839 }, { "epoch": 0.08705565343558919, "grad_norm": 0.4480048716068268, "learning_rate": 3.966011476900458e-05, "loss": 0.2142, "step": 840 }, { "epoch": 0.08715929111825059, "grad_norm": 0.4509342312812805, "learning_rate": 3.965888122771972e-05, "loss": 0.2228, "step": 841 }, { "epoch": 0.087262928800912, "grad_norm": 0.43247190117836, "learning_rate": 3.965764547130269e-05, "loss": 0.2275, "step": 842 }, { "epoch": 0.08736656648357342, "grad_norm": 0.53459233045578, "learning_rate": 3.9656407499892724e-05, "loss": 0.2793, "step": 843 }, { "epoch": 0.08747020416623484, "grad_norm": 0.4321698248386383, "learning_rate": 3.965516731362931e-05, "loss": 0.2133, "step": 844 }, { "epoch": 0.08757384184889626, "grad_norm": 0.5410631895065308, "learning_rate": 3.965392491265221e-05, "loss": 0.2837, "step": 845 }, { "epoch": 0.08767747953155768, "grad_norm": 0.46599480509757996, "learning_rate": 3.965268029710139e-05, "loss": 0.243, "step": 846 }, { "epoch": 0.08778111721421909, "grad_norm": 0.5210885405540466, "learning_rate": 3.9651433467117123e-05, "loss": 0.3113, "step": 847 }, { "epoch": 0.08788475489688051, "grad_norm": 0.46380823850631714, "learning_rate": 3.9650184422839875e-05, "loss": 0.2821, "step": 848 }, { "epoch": 0.08798839257954193, "grad_norm": 0.48911863565444946, "learning_rate": 3.9648933164410385e-05, "loss": 0.2866, "step": 849 }, { "epoch": 0.08809203026220333, "grad_norm": 0.4629068970680237, "learning_rate": 3.964767969196966e-05, "loss": 0.2962, "step": 850 }, { "epoch": 0.08819566794486475, "grad_norm": 0.48530182242393494, "learning_rate": 3.9646424005658925e-05, "loss": 0.256, "step": 851 }, { "epoch": 0.08829930562752616, "grad_norm": 0.4971450865268707, "learning_rate": 3.9645166105619674e-05, "loss": 0.2523, "step": 852 }, { "epoch": 0.08840294331018758, "grad_norm": 0.41097646951675415, "learning_rate": 3.964390599199364e-05, "loss": 0.198, "step": 853 }, { "epoch": 0.088506580992849, "grad_norm": 0.40204328298568726, "learning_rate": 3.9642643664922825e-05, "loss": 0.2282, "step": 854 }, { "epoch": 0.08861021867551042, "grad_norm": 0.44748103618621826, "learning_rate": 3.964137912454945e-05, "loss": 0.2481, "step": 855 }, { "epoch": 0.08871385635817183, "grad_norm": 0.48509109020233154, "learning_rate": 3.9640112371016016e-05, "loss": 0.2571, "step": 856 }, { "epoch": 0.08881749404083325, "grad_norm": 0.42726966738700867, "learning_rate": 3.963884340446525e-05, "loss": 0.2374, "step": 857 }, { "epoch": 0.08892113172349467, "grad_norm": 0.47798478603363037, "learning_rate": 3.963757222504013e-05, "loss": 0.2458, "step": 858 }, { "epoch": 0.08902476940615608, "grad_norm": 0.5577030777931213, "learning_rate": 3.9636298832883905e-05, "loss": 0.2845, "step": 859 }, { "epoch": 0.08912840708881749, "grad_norm": 0.45337674021720886, "learning_rate": 3.9635023228140056e-05, "loss": 0.2509, "step": 860 }, { "epoch": 0.0892320447714789, "grad_norm": 0.5406741499900818, "learning_rate": 3.963374541095231e-05, "loss": 0.3109, "step": 861 }, { "epoch": 0.08933568245414032, "grad_norm": 0.46739137172698975, "learning_rate": 3.963246538146465e-05, "loss": 0.2387, "step": 862 }, { "epoch": 0.08943932013680174, "grad_norm": 0.4822086691856384, "learning_rate": 3.963118313982131e-05, "loss": 0.249, "step": 863 }, { "epoch": 0.08954295781946316, "grad_norm": 0.4558001756668091, "learning_rate": 3.962989868616677e-05, "loss": 0.2376, "step": 864 }, { "epoch": 0.08964659550212457, "grad_norm": 0.44393190741539, "learning_rate": 3.9628612020645766e-05, "loss": 0.2362, "step": 865 }, { "epoch": 0.08975023318478599, "grad_norm": 0.4516092836856842, "learning_rate": 3.9627323143403276e-05, "loss": 0.2039, "step": 866 }, { "epoch": 0.08985387086744741, "grad_norm": 0.49979168176651, "learning_rate": 3.9626032054584515e-05, "loss": 0.2927, "step": 867 }, { "epoch": 0.08995750855010883, "grad_norm": 0.474592387676239, "learning_rate": 3.962473875433498e-05, "loss": 0.2919, "step": 868 }, { "epoch": 0.09006114623277023, "grad_norm": 0.4473690688610077, "learning_rate": 3.962344324280038e-05, "loss": 0.2519, "step": 869 }, { "epoch": 0.09016478391543165, "grad_norm": 0.5843013525009155, "learning_rate": 3.962214552012671e-05, "loss": 0.2983, "step": 870 }, { "epoch": 0.09026842159809306, "grad_norm": 0.5144256949424744, "learning_rate": 3.962084558646018e-05, "loss": 0.2722, "step": 871 }, { "epoch": 0.09037205928075448, "grad_norm": 0.5222553610801697, "learning_rate": 3.9619543441947274e-05, "loss": 0.2652, "step": 872 }, { "epoch": 0.0904756969634159, "grad_norm": 0.430524080991745, "learning_rate": 3.9618239086734716e-05, "loss": 0.2197, "step": 873 }, { "epoch": 0.09057933464607731, "grad_norm": 0.506688117980957, "learning_rate": 3.961693252096947e-05, "loss": 0.2509, "step": 874 }, { "epoch": 0.09068297232873873, "grad_norm": 0.41722744703292847, "learning_rate": 3.961562374479876e-05, "loss": 0.2376, "step": 875 }, { "epoch": 0.09078661001140015, "grad_norm": 0.4740089774131775, "learning_rate": 3.961431275837006e-05, "loss": 0.2546, "step": 876 }, { "epoch": 0.09089024769406157, "grad_norm": 0.46386855840682983, "learning_rate": 3.961299956183109e-05, "loss": 0.2554, "step": 877 }, { "epoch": 0.09099388537672298, "grad_norm": 0.4838809669017792, "learning_rate": 3.9611684155329825e-05, "loss": 0.2585, "step": 878 }, { "epoch": 0.09109752305938439, "grad_norm": 0.4920610189437866, "learning_rate": 3.9610366539014474e-05, "loss": 0.2723, "step": 879 }, { "epoch": 0.0912011607420458, "grad_norm": 0.4134841561317444, "learning_rate": 3.96090467130335e-05, "loss": 0.2264, "step": 880 }, { "epoch": 0.09130479842470722, "grad_norm": 0.5652831196784973, "learning_rate": 3.9607724677535626e-05, "loss": 0.2446, "step": 881 }, { "epoch": 0.09140843610736864, "grad_norm": 0.4715527892112732, "learning_rate": 3.960640043266982e-05, "loss": 0.2656, "step": 882 }, { "epoch": 0.09151207379003005, "grad_norm": 0.4669806957244873, "learning_rate": 3.960507397858529e-05, "loss": 0.2143, "step": 883 }, { "epoch": 0.09161571147269147, "grad_norm": 0.4482795000076294, "learning_rate": 3.96037453154315e-05, "loss": 0.2185, "step": 884 }, { "epoch": 0.09171934915535289, "grad_norm": 0.4721313714981079, "learning_rate": 3.960241444335817e-05, "loss": 0.2793, "step": 885 }, { "epoch": 0.0918229868380143, "grad_norm": 0.5375263094902039, "learning_rate": 3.9601081362515245e-05, "loss": 0.2409, "step": 886 }, { "epoch": 0.09192662452067572, "grad_norm": 0.5323898792266846, "learning_rate": 3.9599746073052945e-05, "loss": 0.248, "step": 887 }, { "epoch": 0.09203026220333713, "grad_norm": 0.4709322154521942, "learning_rate": 3.959840857512172e-05, "loss": 0.25, "step": 888 }, { "epoch": 0.09213389988599854, "grad_norm": 0.4204918146133423, "learning_rate": 3.9597068868872296e-05, "loss": 0.2437, "step": 889 }, { "epoch": 0.09223753756865996, "grad_norm": 0.5008912086486816, "learning_rate": 3.9595726954455606e-05, "loss": 0.2705, "step": 890 }, { "epoch": 0.09234117525132138, "grad_norm": 0.4866129755973816, "learning_rate": 3.959438283202287e-05, "loss": 0.2598, "step": 891 }, { "epoch": 0.0924448129339828, "grad_norm": 0.5146870613098145, "learning_rate": 3.959303650172554e-05, "loss": 0.2436, "step": 892 }, { "epoch": 0.09254845061664421, "grad_norm": 0.4937049448490143, "learning_rate": 3.959168796371531e-05, "loss": 0.2376, "step": 893 }, { "epoch": 0.09265208829930563, "grad_norm": 0.5434243679046631, "learning_rate": 3.959033721814413e-05, "loss": 0.282, "step": 894 }, { "epoch": 0.09275572598196705, "grad_norm": 0.5378921627998352, "learning_rate": 3.958898426516421e-05, "loss": 0.2877, "step": 895 }, { "epoch": 0.09285936366462846, "grad_norm": 0.5169168710708618, "learning_rate": 3.9587629104927995e-05, "loss": 0.2575, "step": 896 }, { "epoch": 0.09296300134728988, "grad_norm": 0.511336624622345, "learning_rate": 3.9586271737588184e-05, "loss": 0.2763, "step": 897 }, { "epoch": 0.09306663902995128, "grad_norm": 0.4890974462032318, "learning_rate": 3.958491216329772e-05, "loss": 0.2712, "step": 898 }, { "epoch": 0.0931702767126127, "grad_norm": 0.47972339391708374, "learning_rate": 3.95835503822098e-05, "loss": 0.2379, "step": 899 }, { "epoch": 0.09327391439527412, "grad_norm": 0.5554192662239075, "learning_rate": 3.9582186394477864e-05, "loss": 0.284, "step": 900 }, { "epoch": 0.09337755207793554, "grad_norm": 0.5789952278137207, "learning_rate": 3.958082020025561e-05, "loss": 0.2869, "step": 901 }, { "epoch": 0.09348118976059695, "grad_norm": 0.5500551462173462, "learning_rate": 3.957945179969697e-05, "loss": 0.2719, "step": 902 }, { "epoch": 0.09358482744325837, "grad_norm": 0.5016873478889465, "learning_rate": 3.957808119295614e-05, "loss": 0.2527, "step": 903 }, { "epoch": 0.09368846512591979, "grad_norm": 0.4230045676231384, "learning_rate": 3.957670838018755e-05, "loss": 0.21, "step": 904 }, { "epoch": 0.0937921028085812, "grad_norm": 0.513180673122406, "learning_rate": 3.957533336154591e-05, "loss": 0.2474, "step": 905 }, { "epoch": 0.09389574049124262, "grad_norm": 0.5388766527175903, "learning_rate": 3.9573956137186124e-05, "loss": 0.2803, "step": 906 }, { "epoch": 0.09399937817390402, "grad_norm": 0.42283692955970764, "learning_rate": 3.957257670726339e-05, "loss": 0.2359, "step": 907 }, { "epoch": 0.09410301585656544, "grad_norm": 0.47266629338264465, "learning_rate": 3.957119507193314e-05, "loss": 0.2384, "step": 908 }, { "epoch": 0.09420665353922686, "grad_norm": 0.43167486786842346, "learning_rate": 3.956981123135105e-05, "loss": 0.2279, "step": 909 }, { "epoch": 0.09431029122188828, "grad_norm": 0.42065706849098206, "learning_rate": 3.956842518567305e-05, "loss": 0.1911, "step": 910 }, { "epoch": 0.0944139289045497, "grad_norm": 0.49886035919189453, "learning_rate": 3.956703693505533e-05, "loss": 0.2458, "step": 911 }, { "epoch": 0.09451756658721111, "grad_norm": 0.5268099308013916, "learning_rate": 3.95656464796543e-05, "loss": 0.3251, "step": 912 }, { "epoch": 0.09462120426987253, "grad_norm": 0.4417741596698761, "learning_rate": 3.956425381962664e-05, "loss": 0.2542, "step": 913 }, { "epoch": 0.09472484195253394, "grad_norm": 0.4796222150325775, "learning_rate": 3.956285895512928e-05, "loss": 0.233, "step": 914 }, { "epoch": 0.09482847963519536, "grad_norm": 0.46497902274131775, "learning_rate": 3.956146188631937e-05, "loss": 0.2712, "step": 915 }, { "epoch": 0.09493211731785678, "grad_norm": 0.4230740964412689, "learning_rate": 3.956006261335435e-05, "loss": 0.216, "step": 916 }, { "epoch": 0.09503575500051818, "grad_norm": 0.421990305185318, "learning_rate": 3.9558661136391886e-05, "loss": 0.225, "step": 917 }, { "epoch": 0.0951393926831796, "grad_norm": 0.4670604169368744, "learning_rate": 3.955725745558988e-05, "loss": 0.2331, "step": 918 }, { "epoch": 0.09524303036584102, "grad_norm": 0.5368228554725647, "learning_rate": 3.9555851571106514e-05, "loss": 0.2914, "step": 919 }, { "epoch": 0.09534666804850243, "grad_norm": 0.42806532979011536, "learning_rate": 3.955444348310019e-05, "loss": 0.236, "step": 920 }, { "epoch": 0.09545030573116385, "grad_norm": 0.49855703115463257, "learning_rate": 3.9553033191729576e-05, "loss": 0.2523, "step": 921 }, { "epoch": 0.09555394341382527, "grad_norm": 0.41719886660575867, "learning_rate": 3.9551620697153575e-05, "loss": 0.2338, "step": 922 }, { "epoch": 0.09565758109648669, "grad_norm": 0.4818481206893921, "learning_rate": 3.955020599953135e-05, "loss": 0.2773, "step": 923 }, { "epoch": 0.0957612187791481, "grad_norm": 0.4772093594074249, "learning_rate": 3.9548789099022305e-05, "loss": 0.2894, "step": 924 }, { "epoch": 0.09586485646180952, "grad_norm": 0.39468058943748474, "learning_rate": 3.9547369995786084e-05, "loss": 0.1933, "step": 925 }, { "epoch": 0.09596849414447092, "grad_norm": 0.4253804683685303, "learning_rate": 3.9545948689982605e-05, "loss": 0.2506, "step": 926 }, { "epoch": 0.09607213182713234, "grad_norm": 0.4445357620716095, "learning_rate": 3.954452518177201e-05, "loss": 0.2319, "step": 927 }, { "epoch": 0.09617576950979376, "grad_norm": 0.4653438329696655, "learning_rate": 3.954309947131471e-05, "loss": 0.2988, "step": 928 }, { "epoch": 0.09627940719245517, "grad_norm": 0.3784976899623871, "learning_rate": 3.9541671558771334e-05, "loss": 0.1875, "step": 929 }, { "epoch": 0.09638304487511659, "grad_norm": 0.47835689783096313, "learning_rate": 3.954024144430278e-05, "loss": 0.2569, "step": 930 }, { "epoch": 0.09648668255777801, "grad_norm": 0.4231659770011902, "learning_rate": 3.95388091280702e-05, "loss": 0.2265, "step": 931 }, { "epoch": 0.09659032024043943, "grad_norm": 0.46786612272262573, "learning_rate": 3.953737461023499e-05, "loss": 0.2365, "step": 932 }, { "epoch": 0.09669395792310084, "grad_norm": 0.4952070116996765, "learning_rate": 3.953593789095877e-05, "loss": 0.267, "step": 933 }, { "epoch": 0.09679759560576226, "grad_norm": 0.4483940601348877, "learning_rate": 3.953449897040344e-05, "loss": 0.2318, "step": 934 }, { "epoch": 0.09690123328842368, "grad_norm": 0.5074480772018433, "learning_rate": 3.953305784873114e-05, "loss": 0.2706, "step": 935 }, { "epoch": 0.09700487097108508, "grad_norm": 0.5509966015815735, "learning_rate": 3.9531614526104237e-05, "loss": 0.2932, "step": 936 }, { "epoch": 0.0971085086537465, "grad_norm": 0.3346215784549713, "learning_rate": 3.953016900268537e-05, "loss": 0.1715, "step": 937 }, { "epoch": 0.09721214633640791, "grad_norm": 0.37619563937187195, "learning_rate": 3.952872127863743e-05, "loss": 0.1943, "step": 938 }, { "epoch": 0.09731578401906933, "grad_norm": 0.508621096611023, "learning_rate": 3.952727135412353e-05, "loss": 0.2681, "step": 939 }, { "epoch": 0.09741942170173075, "grad_norm": 0.4613504409790039, "learning_rate": 3.9525819229307044e-05, "loss": 0.2572, "step": 940 }, { "epoch": 0.09752305938439217, "grad_norm": 0.554082453250885, "learning_rate": 3.952436490435161e-05, "loss": 0.3056, "step": 941 }, { "epoch": 0.09762669706705358, "grad_norm": 0.4735698997974396, "learning_rate": 3.952290837942108e-05, "loss": 0.1899, "step": 942 }, { "epoch": 0.097730334749715, "grad_norm": 0.4331062138080597, "learning_rate": 3.952144965467959e-05, "loss": 0.25, "step": 943 }, { "epoch": 0.09783397243237642, "grad_norm": 0.4538209140300751, "learning_rate": 3.9519988730291493e-05, "loss": 0.2491, "step": 944 }, { "epoch": 0.09793761011503782, "grad_norm": 0.4904068112373352, "learning_rate": 3.9518525606421414e-05, "loss": 0.2782, "step": 945 }, { "epoch": 0.09804124779769924, "grad_norm": 0.5205404162406921, "learning_rate": 3.9517060283234216e-05, "loss": 0.2507, "step": 946 }, { "epoch": 0.09814488548036066, "grad_norm": 0.48228219151496887, "learning_rate": 3.9515592760895005e-05, "loss": 0.2576, "step": 947 }, { "epoch": 0.09824852316302207, "grad_norm": 0.4108220636844635, "learning_rate": 3.9514123039569135e-05, "loss": 0.233, "step": 948 }, { "epoch": 0.09835216084568349, "grad_norm": 0.4790742099285126, "learning_rate": 3.951265111942221e-05, "loss": 0.2426, "step": 949 }, { "epoch": 0.0984557985283449, "grad_norm": 0.4508419632911682, "learning_rate": 3.95111770006201e-05, "loss": 0.252, "step": 950 }, { "epoch": 0.09855943621100632, "grad_norm": 0.41636496782302856, "learning_rate": 3.95097006833289e-05, "loss": 0.2434, "step": 951 }, { "epoch": 0.09866307389366774, "grad_norm": 0.5083922743797302, "learning_rate": 3.9508222167714945e-05, "loss": 0.2878, "step": 952 }, { "epoch": 0.09876671157632916, "grad_norm": 0.46241459250450134, "learning_rate": 3.950674145394484e-05, "loss": 0.2322, "step": 953 }, { "epoch": 0.09887034925899058, "grad_norm": 0.458252489566803, "learning_rate": 3.950525854218544e-05, "loss": 0.2587, "step": 954 }, { "epoch": 0.09897398694165198, "grad_norm": 0.4902607202529907, "learning_rate": 3.950377343260383e-05, "loss": 0.2161, "step": 955 }, { "epoch": 0.0990776246243134, "grad_norm": 0.40840044617652893, "learning_rate": 3.9502286125367345e-05, "loss": 0.1812, "step": 956 }, { "epoch": 0.09918126230697481, "grad_norm": 0.42781862616539, "learning_rate": 3.950079662064358e-05, "loss": 0.2268, "step": 957 }, { "epoch": 0.09928489998963623, "grad_norm": 0.48561370372772217, "learning_rate": 3.949930491860036e-05, "loss": 0.2736, "step": 958 }, { "epoch": 0.09938853767229765, "grad_norm": 0.5395079255104065, "learning_rate": 3.949781101940578e-05, "loss": 0.2579, "step": 959 }, { "epoch": 0.09949217535495906, "grad_norm": 0.5001274943351746, "learning_rate": 3.949631492322816e-05, "loss": 0.2844, "step": 960 }, { "epoch": 0.09959581303762048, "grad_norm": 0.5049706101417542, "learning_rate": 3.949481663023608e-05, "loss": 0.2715, "step": 961 }, { "epoch": 0.0996994507202819, "grad_norm": 0.5008152723312378, "learning_rate": 3.9493316140598376e-05, "loss": 0.2894, "step": 962 }, { "epoch": 0.09980308840294332, "grad_norm": 0.5027458667755127, "learning_rate": 3.94918134544841e-05, "loss": 0.2589, "step": 963 }, { "epoch": 0.09990672608560472, "grad_norm": 0.5868988633155823, "learning_rate": 3.94903085720626e-05, "loss": 0.2717, "step": 964 }, { "epoch": 0.10001036376826614, "grad_norm": 0.4468805193901062, "learning_rate": 3.9488801493503414e-05, "loss": 0.2377, "step": 965 }, { "epoch": 0.10011400145092755, "grad_norm": 0.5798137187957764, "learning_rate": 3.948729221897638e-05, "loss": 0.2686, "step": 966 }, { "epoch": 0.10021763913358897, "grad_norm": 0.5064831972122192, "learning_rate": 3.948578074865155e-05, "loss": 0.2471, "step": 967 }, { "epoch": 0.10032127681625039, "grad_norm": 0.4484928846359253, "learning_rate": 3.9484267082699236e-05, "loss": 0.2226, "step": 968 }, { "epoch": 0.1004249144989118, "grad_norm": 0.42684027552604675, "learning_rate": 3.948275122129e-05, "loss": 0.2371, "step": 969 }, { "epoch": 0.10052855218157322, "grad_norm": 0.500918447971344, "learning_rate": 3.948123316459464e-05, "loss": 0.2368, "step": 970 }, { "epoch": 0.10063218986423464, "grad_norm": 0.4330311417579651, "learning_rate": 3.947971291278421e-05, "loss": 0.227, "step": 971 }, { "epoch": 0.10073582754689606, "grad_norm": 0.3878055214881897, "learning_rate": 3.947819046603001e-05, "loss": 0.216, "step": 972 }, { "epoch": 0.10083946522955747, "grad_norm": 0.5194640159606934, "learning_rate": 3.947666582450359e-05, "loss": 0.3169, "step": 973 }, { "epoch": 0.10094310291221888, "grad_norm": 0.4383881390094757, "learning_rate": 3.947513898837674e-05, "loss": 0.2236, "step": 974 }, { "epoch": 0.1010467405948803, "grad_norm": 0.4771685004234314, "learning_rate": 3.94736099578215e-05, "loss": 0.2398, "step": 975 }, { "epoch": 0.10115037827754171, "grad_norm": 0.4392111003398895, "learning_rate": 3.9472078733010174e-05, "loss": 0.2375, "step": 976 }, { "epoch": 0.10125401596020313, "grad_norm": 0.502129077911377, "learning_rate": 3.9470545314115274e-05, "loss": 0.2515, "step": 977 }, { "epoch": 0.10135765364286455, "grad_norm": 0.5018049478530884, "learning_rate": 3.9469009701309605e-05, "loss": 0.2249, "step": 978 }, { "epoch": 0.10146129132552596, "grad_norm": 0.4413333535194397, "learning_rate": 3.946747189476618e-05, "loss": 0.2153, "step": 979 }, { "epoch": 0.10156492900818738, "grad_norm": 0.5340810418128967, "learning_rate": 3.946593189465829e-05, "loss": 0.2898, "step": 980 }, { "epoch": 0.1016685666908488, "grad_norm": 0.46548914909362793, "learning_rate": 3.946438970115945e-05, "loss": 0.2418, "step": 981 }, { "epoch": 0.10177220437351021, "grad_norm": 0.5104466676712036, "learning_rate": 3.9462845314443445e-05, "loss": 0.2741, "step": 982 }, { "epoch": 0.10187584205617162, "grad_norm": 0.4898616373538971, "learning_rate": 3.9461298734684275e-05, "loss": 0.2547, "step": 983 }, { "epoch": 0.10197947973883303, "grad_norm": 0.4741784334182739, "learning_rate": 3.9459749962056225e-05, "loss": 0.2502, "step": 984 }, { "epoch": 0.10208311742149445, "grad_norm": 0.5162531733512878, "learning_rate": 3.94581989967338e-05, "loss": 0.3015, "step": 985 }, { "epoch": 0.10218675510415587, "grad_norm": 0.5185749530792236, "learning_rate": 3.9456645838891755e-05, "loss": 0.2858, "step": 986 }, { "epoch": 0.10229039278681729, "grad_norm": 0.4627940356731415, "learning_rate": 3.94550904887051e-05, "loss": 0.2377, "step": 987 }, { "epoch": 0.1023940304694787, "grad_norm": 0.5982307195663452, "learning_rate": 3.94535329463491e-05, "loss": 0.2988, "step": 988 }, { "epoch": 0.10249766815214012, "grad_norm": 0.4750824570655823, "learning_rate": 3.945197321199925e-05, "loss": 0.2445, "step": 989 }, { "epoch": 0.10260130583480154, "grad_norm": 0.5667946934700012, "learning_rate": 3.945041128583129e-05, "loss": 0.2684, "step": 990 }, { "epoch": 0.10270494351746295, "grad_norm": 0.4947032928466797, "learning_rate": 3.9448847168021226e-05, "loss": 0.2445, "step": 991 }, { "epoch": 0.10280858120012437, "grad_norm": 0.4986145496368408, "learning_rate": 3.9447280858745295e-05, "loss": 0.3004, "step": 992 }, { "epoch": 0.10291221888278577, "grad_norm": 0.4144374132156372, "learning_rate": 3.944571235817999e-05, "loss": 0.2092, "step": 993 }, { "epoch": 0.10301585656544719, "grad_norm": 0.5639985799789429, "learning_rate": 3.944414166650204e-05, "loss": 0.3377, "step": 994 }, { "epoch": 0.10311949424810861, "grad_norm": 0.5175818204879761, "learning_rate": 3.9442568783888443e-05, "loss": 0.2365, "step": 995 }, { "epoch": 0.10322313193077003, "grad_norm": 0.4092256724834442, "learning_rate": 3.9440993710516415e-05, "loss": 0.2572, "step": 996 }, { "epoch": 0.10332676961343144, "grad_norm": 0.47614118456840515, "learning_rate": 3.943941644656344e-05, "loss": 0.2535, "step": 997 }, { "epoch": 0.10343040729609286, "grad_norm": 0.5114443898200989, "learning_rate": 3.9437836992207234e-05, "loss": 0.2779, "step": 998 }, { "epoch": 0.10353404497875428, "grad_norm": 0.4928836524486542, "learning_rate": 3.9436255347625775e-05, "loss": 0.2521, "step": 999 }, { "epoch": 0.1036376826614157, "grad_norm": 0.46215057373046875, "learning_rate": 3.9434671512997276e-05, "loss": 0.2436, "step": 1000 }, { "epoch": 0.10374132034407711, "grad_norm": 0.45834508538246155, "learning_rate": 3.943308548850021e-05, "loss": 0.249, "step": 1001 }, { "epoch": 0.10384495802673852, "grad_norm": 0.40858447551727295, "learning_rate": 3.943149727431327e-05, "loss": 0.1887, "step": 1002 }, { "epoch": 0.10394859570939993, "grad_norm": 0.5563355684280396, "learning_rate": 3.942990687061543e-05, "loss": 0.2852, "step": 1003 }, { "epoch": 0.10405223339206135, "grad_norm": 0.43062031269073486, "learning_rate": 3.942831427758589e-05, "loss": 0.208, "step": 1004 }, { "epoch": 0.10415587107472277, "grad_norm": 0.5552340149879456, "learning_rate": 3.942671949540409e-05, "loss": 0.2622, "step": 1005 }, { "epoch": 0.10425950875738418, "grad_norm": 0.4066930413246155, "learning_rate": 3.942512252424974e-05, "loss": 0.1943, "step": 1006 }, { "epoch": 0.1043631464400456, "grad_norm": 0.42987269163131714, "learning_rate": 3.9423523364302795e-05, "loss": 0.2548, "step": 1007 }, { "epoch": 0.10446678412270702, "grad_norm": 0.45666632056236267, "learning_rate": 3.942192201574341e-05, "loss": 0.2442, "step": 1008 }, { "epoch": 0.10457042180536844, "grad_norm": 0.5226937532424927, "learning_rate": 3.9420318478752056e-05, "loss": 0.2615, "step": 1009 }, { "epoch": 0.10467405948802985, "grad_norm": 0.4175627827644348, "learning_rate": 3.9418712753509406e-05, "loss": 0.1772, "step": 1010 }, { "epoch": 0.10477769717069127, "grad_norm": 0.4888598918914795, "learning_rate": 3.941710484019639e-05, "loss": 0.2718, "step": 1011 }, { "epoch": 0.10488133485335267, "grad_norm": 0.4504351019859314, "learning_rate": 3.941549473899418e-05, "loss": 0.2557, "step": 1012 }, { "epoch": 0.10498497253601409, "grad_norm": 0.5280157923698425, "learning_rate": 3.941388245008421e-05, "loss": 0.2456, "step": 1013 }, { "epoch": 0.10508861021867551, "grad_norm": 0.4623817801475525, "learning_rate": 3.941226797364814e-05, "loss": 0.2398, "step": 1014 }, { "epoch": 0.10519224790133692, "grad_norm": 0.42403942346572876, "learning_rate": 3.94106513098679e-05, "loss": 0.2141, "step": 1015 }, { "epoch": 0.10529588558399834, "grad_norm": 0.483390748500824, "learning_rate": 3.940903245892564e-05, "loss": 0.2364, "step": 1016 }, { "epoch": 0.10539952326665976, "grad_norm": 0.47593533992767334, "learning_rate": 3.940741142100377e-05, "loss": 0.2373, "step": 1017 }, { "epoch": 0.10550316094932118, "grad_norm": 0.39513906836509705, "learning_rate": 3.940578819628495e-05, "loss": 0.2268, "step": 1018 }, { "epoch": 0.10560679863198259, "grad_norm": 0.48310190439224243, "learning_rate": 3.940416278495209e-05, "loss": 0.257, "step": 1019 }, { "epoch": 0.10571043631464401, "grad_norm": 0.5143681764602661, "learning_rate": 3.940253518718833e-05, "loss": 0.2589, "step": 1020 }, { "epoch": 0.10581407399730541, "grad_norm": 0.5714600086212158, "learning_rate": 3.940090540317706e-05, "loss": 0.308, "step": 1021 }, { "epoch": 0.10591771167996683, "grad_norm": 0.5120255947113037, "learning_rate": 3.939927343310194e-05, "loss": 0.3122, "step": 1022 }, { "epoch": 0.10602134936262825, "grad_norm": 0.4649025797843933, "learning_rate": 3.939763927714684e-05, "loss": 0.2271, "step": 1023 }, { "epoch": 0.10612498704528966, "grad_norm": 0.5476794242858887, "learning_rate": 3.9396002935495895e-05, "loss": 0.2673, "step": 1024 }, { "epoch": 0.10622862472795108, "grad_norm": 0.5482632517814636, "learning_rate": 3.93943644083335e-05, "loss": 0.2655, "step": 1025 }, { "epoch": 0.1063322624106125, "grad_norm": 0.4433247148990631, "learning_rate": 3.939272369584427e-05, "loss": 0.2377, "step": 1026 }, { "epoch": 0.10643590009327392, "grad_norm": 0.4874042272567749, "learning_rate": 3.939108079821308e-05, "loss": 0.228, "step": 1027 }, { "epoch": 0.10653953777593533, "grad_norm": 0.4970521926879883, "learning_rate": 3.938943571562505e-05, "loss": 0.2692, "step": 1028 }, { "epoch": 0.10664317545859675, "grad_norm": 0.4594666063785553, "learning_rate": 3.9387788448265546e-05, "loss": 0.2255, "step": 1029 }, { "epoch": 0.10674681314125817, "grad_norm": 0.40364277362823486, "learning_rate": 3.9386138996320176e-05, "loss": 0.2188, "step": 1030 }, { "epoch": 0.10685045082391957, "grad_norm": 0.49549341201782227, "learning_rate": 3.9384487359974806e-05, "loss": 0.2475, "step": 1031 }, { "epoch": 0.10695408850658099, "grad_norm": 0.4368329644203186, "learning_rate": 3.9382833539415526e-05, "loss": 0.2564, "step": 1032 }, { "epoch": 0.1070577261892424, "grad_norm": 0.48317834734916687, "learning_rate": 3.93811775348287e-05, "loss": 0.2726, "step": 1033 }, { "epoch": 0.10716136387190382, "grad_norm": 0.4917926490306854, "learning_rate": 3.937951934640091e-05, "loss": 0.2571, "step": 1034 }, { "epoch": 0.10726500155456524, "grad_norm": 0.5338187217712402, "learning_rate": 3.9377858974319014e-05, "loss": 0.2776, "step": 1035 }, { "epoch": 0.10736863923722666, "grad_norm": 0.49937704205513, "learning_rate": 3.937619641877009e-05, "loss": 0.2573, "step": 1036 }, { "epoch": 0.10747227691988807, "grad_norm": 0.4757545292377472, "learning_rate": 3.9374531679941474e-05, "loss": 0.2601, "step": 1037 }, { "epoch": 0.10757591460254949, "grad_norm": 0.49643903970718384, "learning_rate": 3.937286475802075e-05, "loss": 0.2899, "step": 1038 }, { "epoch": 0.10767955228521091, "grad_norm": 0.4637889564037323, "learning_rate": 3.937119565319574e-05, "loss": 0.2456, "step": 1039 }, { "epoch": 0.10778318996787231, "grad_norm": 0.44887062907218933, "learning_rate": 3.936952436565451e-05, "loss": 0.2265, "step": 1040 }, { "epoch": 0.10788682765053373, "grad_norm": 0.44160398840904236, "learning_rate": 3.9367850895585394e-05, "loss": 0.2418, "step": 1041 }, { "epoch": 0.10799046533319515, "grad_norm": 0.5461088418960571, "learning_rate": 3.936617524317694e-05, "loss": 0.2811, "step": 1042 }, { "epoch": 0.10809410301585656, "grad_norm": 0.44432151317596436, "learning_rate": 3.936449740861797e-05, "loss": 0.233, "step": 1043 }, { "epoch": 0.10819774069851798, "grad_norm": 0.4568310081958771, "learning_rate": 3.936281739209752e-05, "loss": 0.2414, "step": 1044 }, { "epoch": 0.1083013783811794, "grad_norm": 0.47912347316741943, "learning_rate": 3.936113519380493e-05, "loss": 0.2377, "step": 1045 }, { "epoch": 0.10840501606384081, "grad_norm": 0.5290972590446472, "learning_rate": 3.9359450813929705e-05, "loss": 0.2794, "step": 1046 }, { "epoch": 0.10850865374650223, "grad_norm": 0.5089946389198303, "learning_rate": 3.935776425266166e-05, "loss": 0.2565, "step": 1047 }, { "epoch": 0.10861229142916365, "grad_norm": 0.4862841069698334, "learning_rate": 3.935607551019084e-05, "loss": 0.2442, "step": 1048 }, { "epoch": 0.10871592911182507, "grad_norm": 0.46862688660621643, "learning_rate": 3.935438458670752e-05, "loss": 0.2144, "step": 1049 }, { "epoch": 0.10881956679448647, "grad_norm": 0.4151781499385834, "learning_rate": 3.935269148240223e-05, "loss": 0.261, "step": 1050 }, { "epoch": 0.10892320447714789, "grad_norm": 0.5200570821762085, "learning_rate": 3.935099619746575e-05, "loss": 0.2695, "step": 1051 }, { "epoch": 0.1090268421598093, "grad_norm": 0.48534032702445984, "learning_rate": 3.934929873208909e-05, "loss": 0.2555, "step": 1052 }, { "epoch": 0.10913047984247072, "grad_norm": 0.4310669004917145, "learning_rate": 3.934759908646354e-05, "loss": 0.2691, "step": 1053 }, { "epoch": 0.10923411752513214, "grad_norm": 0.49501046538352966, "learning_rate": 3.934589726078059e-05, "loss": 0.2694, "step": 1054 }, { "epoch": 0.10933775520779355, "grad_norm": 0.537093997001648, "learning_rate": 3.9344193255232016e-05, "loss": 0.2443, "step": 1055 }, { "epoch": 0.10944139289045497, "grad_norm": 0.49397650361061096, "learning_rate": 3.934248707000982e-05, "loss": 0.2945, "step": 1056 }, { "epoch": 0.10954503057311639, "grad_norm": 0.5158434510231018, "learning_rate": 3.9340778705306244e-05, "loss": 0.2643, "step": 1057 }, { "epoch": 0.1096486682557778, "grad_norm": 0.4754325747489929, "learning_rate": 3.9339068161313796e-05, "loss": 0.2584, "step": 1058 }, { "epoch": 0.10975230593843921, "grad_norm": 0.5299050807952881, "learning_rate": 3.9337355438225205e-05, "loss": 0.2647, "step": 1059 }, { "epoch": 0.10985594362110063, "grad_norm": 0.5079212188720703, "learning_rate": 3.9335640536233465e-05, "loss": 0.2731, "step": 1060 }, { "epoch": 0.10995958130376204, "grad_norm": 0.4635586142539978, "learning_rate": 3.933392345553181e-05, "loss": 0.2567, "step": 1061 }, { "epoch": 0.11006321898642346, "grad_norm": 0.4382088780403137, "learning_rate": 3.933220419631371e-05, "loss": 0.25, "step": 1062 }, { "epoch": 0.11016685666908488, "grad_norm": 0.49184006452560425, "learning_rate": 3.93304827587729e-05, "loss": 0.2391, "step": 1063 }, { "epoch": 0.1102704943517463, "grad_norm": 0.4317704141139984, "learning_rate": 3.932875914310334e-05, "loss": 0.2253, "step": 1064 }, { "epoch": 0.11037413203440771, "grad_norm": 0.471828430891037, "learning_rate": 3.9327033349499247e-05, "loss": 0.2143, "step": 1065 }, { "epoch": 0.11047776971706913, "grad_norm": 0.4559275805950165, "learning_rate": 3.9325305378155076e-05, "loss": 0.2205, "step": 1066 }, { "epoch": 0.11058140739973055, "grad_norm": 0.45583653450012207, "learning_rate": 3.932357522926554e-05, "loss": 0.2311, "step": 1067 }, { "epoch": 0.11068504508239196, "grad_norm": 0.5379915237426758, "learning_rate": 3.932184290302559e-05, "loss": 0.2838, "step": 1068 }, { "epoch": 0.11078868276505337, "grad_norm": 0.4416240155696869, "learning_rate": 3.9320108399630414e-05, "loss": 0.2054, "step": 1069 }, { "epoch": 0.11089232044771478, "grad_norm": 0.4274689257144928, "learning_rate": 3.931837171927546e-05, "loss": 0.2233, "step": 1070 }, { "epoch": 0.1109959581303762, "grad_norm": 0.4448922872543335, "learning_rate": 3.931663286215641e-05, "loss": 0.263, "step": 1071 }, { "epoch": 0.11109959581303762, "grad_norm": 0.47742193937301636, "learning_rate": 3.93148918284692e-05, "loss": 0.2838, "step": 1072 }, { "epoch": 0.11120323349569904, "grad_norm": 0.454584002494812, "learning_rate": 3.931314861841e-05, "loss": 0.228, "step": 1073 }, { "epoch": 0.11130687117836045, "grad_norm": 0.4988914728164673, "learning_rate": 3.931140323217524e-05, "loss": 0.2943, "step": 1074 }, { "epoch": 0.11141050886102187, "grad_norm": 0.48816895484924316, "learning_rate": 3.930965566996158e-05, "loss": 0.3103, "step": 1075 }, { "epoch": 0.11151414654368329, "grad_norm": 0.41996386647224426, "learning_rate": 3.9307905931965934e-05, "loss": 0.2142, "step": 1076 }, { "epoch": 0.1116177842263447, "grad_norm": 0.4714778661727905, "learning_rate": 3.9306154018385474e-05, "loss": 0.2659, "step": 1077 }, { "epoch": 0.11172142190900611, "grad_norm": 0.48112183809280396, "learning_rate": 3.930439992941758e-05, "loss": 0.2355, "step": 1078 }, { "epoch": 0.11182505959166752, "grad_norm": 0.5149953961372375, "learning_rate": 3.930264366525992e-05, "loss": 0.2331, "step": 1079 }, { "epoch": 0.11192869727432894, "grad_norm": 0.5702659487724304, "learning_rate": 3.930088522611037e-05, "loss": 0.2842, "step": 1080 }, { "epoch": 0.11203233495699036, "grad_norm": 0.5287723541259766, "learning_rate": 3.929912461216708e-05, "loss": 0.2585, "step": 1081 }, { "epoch": 0.11213597263965178, "grad_norm": 0.47466516494750977, "learning_rate": 3.929736182362843e-05, "loss": 0.2428, "step": 1082 }, { "epoch": 0.1122396103223132, "grad_norm": 0.4626266062259674, "learning_rate": 3.9295596860693054e-05, "loss": 0.2083, "step": 1083 }, { "epoch": 0.11234324800497461, "grad_norm": 0.4878407120704651, "learning_rate": 3.929382972355981e-05, "loss": 0.2483, "step": 1084 }, { "epoch": 0.11244688568763603, "grad_norm": 0.4909697473049164, "learning_rate": 3.929206041242782e-05, "loss": 0.258, "step": 1085 }, { "epoch": 0.11255052337029745, "grad_norm": 0.5589547753334045, "learning_rate": 3.929028892749647e-05, "loss": 0.2531, "step": 1086 }, { "epoch": 0.11265416105295886, "grad_norm": 0.46808531880378723, "learning_rate": 3.928851526896535e-05, "loss": 0.269, "step": 1087 }, { "epoch": 0.11275779873562027, "grad_norm": 0.5738458633422852, "learning_rate": 3.9286739437034304e-05, "loss": 0.2583, "step": 1088 }, { "epoch": 0.11286143641828168, "grad_norm": 0.500787615776062, "learning_rate": 3.928496143190344e-05, "loss": 0.2518, "step": 1089 }, { "epoch": 0.1129650741009431, "grad_norm": 0.5720177292823792, "learning_rate": 3.92831812537731e-05, "loss": 0.267, "step": 1090 }, { "epoch": 0.11306871178360452, "grad_norm": 0.5120452046394348, "learning_rate": 3.9281398902843875e-05, "loss": 0.2359, "step": 1091 }, { "epoch": 0.11317234946626593, "grad_norm": 0.4737999439239502, "learning_rate": 3.92796143793166e-05, "loss": 0.2387, "step": 1092 }, { "epoch": 0.11327598714892735, "grad_norm": 0.47964850068092346, "learning_rate": 3.927782768339235e-05, "loss": 0.2534, "step": 1093 }, { "epoch": 0.11337962483158877, "grad_norm": 0.5042358040809631, "learning_rate": 3.9276038815272436e-05, "loss": 0.2673, "step": 1094 }, { "epoch": 0.11348326251425019, "grad_norm": 0.4781404137611389, "learning_rate": 3.9274247775158433e-05, "loss": 0.237, "step": 1095 }, { "epoch": 0.1135869001969116, "grad_norm": 0.4982626736164093, "learning_rate": 3.927245456325216e-05, "loss": 0.2354, "step": 1096 }, { "epoch": 0.113690537879573, "grad_norm": 0.4899817109107971, "learning_rate": 3.9270659179755656e-05, "loss": 0.3026, "step": 1097 }, { "epoch": 0.11379417556223442, "grad_norm": 0.4677768349647522, "learning_rate": 3.926886162487124e-05, "loss": 0.206, "step": 1098 }, { "epoch": 0.11389781324489584, "grad_norm": 0.5187677145004272, "learning_rate": 3.926706189880145e-05, "loss": 0.2868, "step": 1099 }, { "epoch": 0.11400145092755726, "grad_norm": 0.42227962613105774, "learning_rate": 3.926526000174908e-05, "loss": 0.2685, "step": 1100 }, { "epoch": 0.11410508861021867, "grad_norm": 0.5097355842590332, "learning_rate": 3.926345593391715e-05, "loss": 0.2664, "step": 1101 }, { "epoch": 0.11420872629288009, "grad_norm": 0.39787158370018005, "learning_rate": 3.926164969550896e-05, "loss": 0.2331, "step": 1102 }, { "epoch": 0.11431236397554151, "grad_norm": 0.504854142665863, "learning_rate": 3.9259841286728024e-05, "loss": 0.2386, "step": 1103 }, { "epoch": 0.11441600165820293, "grad_norm": 0.5150250196456909, "learning_rate": 3.925803070777812e-05, "loss": 0.2853, "step": 1104 }, { "epoch": 0.11451963934086434, "grad_norm": 0.4854704737663269, "learning_rate": 3.9256217958863236e-05, "loss": 0.2674, "step": 1105 }, { "epoch": 0.11462327702352576, "grad_norm": 0.4782135486602783, "learning_rate": 3.9254403040187664e-05, "loss": 0.2326, "step": 1106 }, { "epoch": 0.11472691470618716, "grad_norm": 0.5136960744857788, "learning_rate": 3.925258595195587e-05, "loss": 0.2808, "step": 1107 }, { "epoch": 0.11483055238884858, "grad_norm": 0.45660775899887085, "learning_rate": 3.9250766694372634e-05, "loss": 0.2145, "step": 1108 }, { "epoch": 0.11493419007151, "grad_norm": 0.4734261929988861, "learning_rate": 3.924894526764293e-05, "loss": 0.2327, "step": 1109 }, { "epoch": 0.11503782775417142, "grad_norm": 0.451330304145813, "learning_rate": 3.924712167197199e-05, "loss": 0.2568, "step": 1110 }, { "epoch": 0.11514146543683283, "grad_norm": 0.4574805200099945, "learning_rate": 3.924529590756531e-05, "loss": 0.233, "step": 1111 }, { "epoch": 0.11524510311949425, "grad_norm": 0.468637615442276, "learning_rate": 3.9243467974628596e-05, "loss": 0.2596, "step": 1112 }, { "epoch": 0.11534874080215567, "grad_norm": 0.4985135793685913, "learning_rate": 3.924163787336783e-05, "loss": 0.2166, "step": 1113 }, { "epoch": 0.11545237848481708, "grad_norm": 0.5171657204627991, "learning_rate": 3.9239805603989213e-05, "loss": 0.2658, "step": 1114 }, { "epoch": 0.1155560161674785, "grad_norm": 0.48922353982925415, "learning_rate": 3.923797116669922e-05, "loss": 0.2254, "step": 1115 }, { "epoch": 0.1156596538501399, "grad_norm": 0.4309007227420807, "learning_rate": 3.923613456170454e-05, "loss": 0.2001, "step": 1116 }, { "epoch": 0.11576329153280132, "grad_norm": 0.4351019263267517, "learning_rate": 3.923429578921211e-05, "loss": 0.2553, "step": 1117 }, { "epoch": 0.11586692921546274, "grad_norm": 0.49027588963508606, "learning_rate": 3.9232454849429144e-05, "loss": 0.2328, "step": 1118 }, { "epoch": 0.11597056689812416, "grad_norm": 0.42478036880493164, "learning_rate": 3.9230611742563055e-05, "loss": 0.2166, "step": 1119 }, { "epoch": 0.11607420458078557, "grad_norm": 0.5433241128921509, "learning_rate": 3.922876646882153e-05, "loss": 0.2613, "step": 1120 }, { "epoch": 0.11617784226344699, "grad_norm": 0.3327256739139557, "learning_rate": 3.9226919028412494e-05, "loss": 0.1829, "step": 1121 }, { "epoch": 0.11628147994610841, "grad_norm": 0.5044670701026917, "learning_rate": 3.9225069421544113e-05, "loss": 0.2609, "step": 1122 }, { "epoch": 0.11638511762876982, "grad_norm": 0.5583537817001343, "learning_rate": 3.922321764842479e-05, "loss": 0.283, "step": 1123 }, { "epoch": 0.11648875531143124, "grad_norm": 0.44634732604026794, "learning_rate": 3.922136370926319e-05, "loss": 0.2372, "step": 1124 }, { "epoch": 0.11659239299409266, "grad_norm": 0.43324288725852966, "learning_rate": 3.92195076042682e-05, "loss": 0.2096, "step": 1125 }, { "epoch": 0.11669603067675406, "grad_norm": 0.4762877821922302, "learning_rate": 3.9217649333648984e-05, "loss": 0.2601, "step": 1126 }, { "epoch": 0.11679966835941548, "grad_norm": 0.529367983341217, "learning_rate": 3.92157888976149e-05, "loss": 0.2616, "step": 1127 }, { "epoch": 0.1169033060420769, "grad_norm": 0.4866292178630829, "learning_rate": 3.92139262963756e-05, "loss": 0.2193, "step": 1128 }, { "epoch": 0.11700694372473831, "grad_norm": 0.4979463219642639, "learning_rate": 3.921206153014096e-05, "loss": 0.2631, "step": 1129 }, { "epoch": 0.11711058140739973, "grad_norm": 0.5391631722450256, "learning_rate": 3.921019459912109e-05, "loss": 0.2552, "step": 1130 }, { "epoch": 0.11721421909006115, "grad_norm": 0.5459678173065186, "learning_rate": 3.920832550352635e-05, "loss": 0.2565, "step": 1131 }, { "epoch": 0.11731785677272256, "grad_norm": 0.5035747289657593, "learning_rate": 3.920645424356735e-05, "loss": 0.2511, "step": 1132 }, { "epoch": 0.11742149445538398, "grad_norm": 0.4486197233200073, "learning_rate": 3.920458081945495e-05, "loss": 0.227, "step": 1133 }, { "epoch": 0.1175251321380454, "grad_norm": 0.4556668996810913, "learning_rate": 3.9202705231400237e-05, "loss": 0.2207, "step": 1134 }, { "epoch": 0.1176287698207068, "grad_norm": 0.4258916974067688, "learning_rate": 3.920082747961455e-05, "loss": 0.242, "step": 1135 }, { "epoch": 0.11773240750336822, "grad_norm": 0.48589828610420227, "learning_rate": 3.919894756430947e-05, "loss": 0.2494, "step": 1136 }, { "epoch": 0.11783604518602964, "grad_norm": 0.4887816309928894, "learning_rate": 3.919706548569682e-05, "loss": 0.2222, "step": 1137 }, { "epoch": 0.11793968286869105, "grad_norm": 0.48513126373291016, "learning_rate": 3.9195181243988676e-05, "loss": 0.2251, "step": 1138 }, { "epoch": 0.11804332055135247, "grad_norm": 0.43728193640708923, "learning_rate": 3.919329483939735e-05, "loss": 0.2303, "step": 1139 }, { "epoch": 0.11814695823401389, "grad_norm": 0.5254392623901367, "learning_rate": 3.91914062721354e-05, "loss": 0.3175, "step": 1140 }, { "epoch": 0.1182505959166753, "grad_norm": 0.40364351868629456, "learning_rate": 3.918951554241562e-05, "loss": 0.2119, "step": 1141 }, { "epoch": 0.11835423359933672, "grad_norm": 0.46782156825065613, "learning_rate": 3.9187622650451065e-05, "loss": 0.2109, "step": 1142 }, { "epoch": 0.11845787128199814, "grad_norm": 0.5325164198875427, "learning_rate": 3.9185727596455015e-05, "loss": 0.2793, "step": 1143 }, { "epoch": 0.11856150896465956, "grad_norm": 0.48460426926612854, "learning_rate": 3.9183830380641e-05, "loss": 0.248, "step": 1144 }, { "epoch": 0.11866514664732096, "grad_norm": 0.4963580369949341, "learning_rate": 3.91819310032228e-05, "loss": 0.2803, "step": 1145 }, { "epoch": 0.11876878432998238, "grad_norm": 0.4866742789745331, "learning_rate": 3.918002946441444e-05, "loss": 0.2398, "step": 1146 }, { "epoch": 0.1188724220126438, "grad_norm": 0.4291539490222931, "learning_rate": 3.917812576443017e-05, "loss": 0.2557, "step": 1147 }, { "epoch": 0.11897605969530521, "grad_norm": 0.4939398765563965, "learning_rate": 3.91762199034845e-05, "loss": 0.2478, "step": 1148 }, { "epoch": 0.11907969737796663, "grad_norm": 0.4453083574771881, "learning_rate": 3.917431188179219e-05, "loss": 0.2164, "step": 1149 }, { "epoch": 0.11918333506062805, "grad_norm": 0.5078995227813721, "learning_rate": 3.917240169956822e-05, "loss": 0.265, "step": 1150 }, { "epoch": 0.11928697274328946, "grad_norm": 0.4686465561389923, "learning_rate": 3.9170489357027827e-05, "loss": 0.25, "step": 1151 }, { "epoch": 0.11939061042595088, "grad_norm": 0.40397870540618896, "learning_rate": 3.91685748543865e-05, "loss": 0.2002, "step": 1152 }, { "epoch": 0.1194942481086123, "grad_norm": 0.5199308395385742, "learning_rate": 3.916665819185995e-05, "loss": 0.271, "step": 1153 }, { "epoch": 0.1195978857912737, "grad_norm": 0.5083966255187988, "learning_rate": 3.916473936966416e-05, "loss": 0.2282, "step": 1154 }, { "epoch": 0.11970152347393512, "grad_norm": 0.4933621287345886, "learning_rate": 3.9162818388015324e-05, "loss": 0.2453, "step": 1155 }, { "epoch": 0.11980516115659653, "grad_norm": 0.4529334008693695, "learning_rate": 3.9160895247129905e-05, "loss": 0.2728, "step": 1156 }, { "epoch": 0.11990879883925795, "grad_norm": 0.4741211533546448, "learning_rate": 3.915896994722458e-05, "loss": 0.2299, "step": 1157 }, { "epoch": 0.12001243652191937, "grad_norm": 0.48024997115135193, "learning_rate": 3.9157042488516325e-05, "loss": 0.2603, "step": 1158 }, { "epoch": 0.12011607420458079, "grad_norm": 0.501326322555542, "learning_rate": 3.915511287122229e-05, "loss": 0.2643, "step": 1159 }, { "epoch": 0.1202197118872422, "grad_norm": 0.48749133944511414, "learning_rate": 3.9153181095559924e-05, "loss": 0.2703, "step": 1160 }, { "epoch": 0.12032334956990362, "grad_norm": 0.505791425704956, "learning_rate": 3.915124716174688e-05, "loss": 0.2386, "step": 1161 }, { "epoch": 0.12042698725256504, "grad_norm": 0.5771316885948181, "learning_rate": 3.914931107000107e-05, "loss": 0.248, "step": 1162 }, { "epoch": 0.12053062493522645, "grad_norm": 0.5785459280014038, "learning_rate": 3.914737282054067e-05, "loss": 0.2648, "step": 1163 }, { "epoch": 0.12063426261788786, "grad_norm": 0.4337507486343384, "learning_rate": 3.914543241358406e-05, "loss": 0.24, "step": 1164 }, { "epoch": 0.12073790030054928, "grad_norm": 0.3794737756252289, "learning_rate": 3.9143489849349886e-05, "loss": 0.2227, "step": 1165 }, { "epoch": 0.12084153798321069, "grad_norm": 0.5467318892478943, "learning_rate": 3.914154512805704e-05, "loss": 0.3101, "step": 1166 }, { "epoch": 0.12094517566587211, "grad_norm": 0.4539571702480316, "learning_rate": 3.9139598249924635e-05, "loss": 0.2309, "step": 1167 }, { "epoch": 0.12104881334853353, "grad_norm": 0.5110743641853333, "learning_rate": 3.913764921517207e-05, "loss": 0.2546, "step": 1168 }, { "epoch": 0.12115245103119494, "grad_norm": 0.4412529170513153, "learning_rate": 3.913569802401892e-05, "loss": 0.2387, "step": 1169 }, { "epoch": 0.12125608871385636, "grad_norm": 0.4804104268550873, "learning_rate": 3.9133744676685075e-05, "loss": 0.2543, "step": 1170 }, { "epoch": 0.12135972639651778, "grad_norm": 0.4778258502483368, "learning_rate": 3.913178917339062e-05, "loss": 0.2648, "step": 1171 }, { "epoch": 0.1214633640791792, "grad_norm": 0.48556414246559143, "learning_rate": 3.912983151435591e-05, "loss": 0.239, "step": 1172 }, { "epoch": 0.1215670017618406, "grad_norm": 0.4241321086883545, "learning_rate": 3.912787169980152e-05, "loss": 0.2021, "step": 1173 }, { "epoch": 0.12167063944450202, "grad_norm": 0.4774876832962036, "learning_rate": 3.9125909729948276e-05, "loss": 0.24, "step": 1174 }, { "epoch": 0.12177427712716343, "grad_norm": 0.4637620449066162, "learning_rate": 3.912394560501726e-05, "loss": 0.2167, "step": 1175 }, { "epoch": 0.12187791480982485, "grad_norm": 0.48373448848724365, "learning_rate": 3.9121979325229784e-05, "loss": 0.2424, "step": 1176 }, { "epoch": 0.12198155249248627, "grad_norm": 0.49754947423934937, "learning_rate": 3.91200108908074e-05, "loss": 0.2698, "step": 1177 }, { "epoch": 0.12208519017514768, "grad_norm": 0.46409887075424194, "learning_rate": 3.911804030197191e-05, "loss": 0.2258, "step": 1178 }, { "epoch": 0.1221888278578091, "grad_norm": 0.56962651014328, "learning_rate": 3.911606755894536e-05, "loss": 0.2766, "step": 1179 }, { "epoch": 0.12229246554047052, "grad_norm": 0.48047998547554016, "learning_rate": 3.911409266195003e-05, "loss": 0.2596, "step": 1180 }, { "epoch": 0.12239610322313194, "grad_norm": 0.5867676734924316, "learning_rate": 3.911211561120846e-05, "loss": 0.3096, "step": 1181 }, { "epoch": 0.12249974090579335, "grad_norm": 0.49909263849258423, "learning_rate": 3.911013640694341e-05, "loss": 0.2556, "step": 1182 }, { "epoch": 0.12260337858845476, "grad_norm": 0.501254677772522, "learning_rate": 3.910815504937789e-05, "loss": 0.2347, "step": 1183 }, { "epoch": 0.12270701627111617, "grad_norm": 0.5377936959266663, "learning_rate": 3.910617153873517e-05, "loss": 0.2529, "step": 1184 }, { "epoch": 0.12281065395377759, "grad_norm": 0.5332674384117126, "learning_rate": 3.910418587523874e-05, "loss": 0.3185, "step": 1185 }, { "epoch": 0.12291429163643901, "grad_norm": 0.4071737825870514, "learning_rate": 3.910219805911234e-05, "loss": 0.2396, "step": 1186 }, { "epoch": 0.12301792931910042, "grad_norm": 0.5417530536651611, "learning_rate": 3.910020809057997e-05, "loss": 0.2836, "step": 1187 }, { "epoch": 0.12312156700176184, "grad_norm": 0.44844117760658264, "learning_rate": 3.909821596986584e-05, "loss": 0.2133, "step": 1188 }, { "epoch": 0.12322520468442326, "grad_norm": 0.41205480694770813, "learning_rate": 3.909622169719442e-05, "loss": 0.2244, "step": 1189 }, { "epoch": 0.12332884236708468, "grad_norm": 0.49824705719947815, "learning_rate": 3.909422527279042e-05, "loss": 0.2742, "step": 1190 }, { "epoch": 0.1234324800497461, "grad_norm": 0.4864721894264221, "learning_rate": 3.909222669687881e-05, "loss": 0.2334, "step": 1191 }, { "epoch": 0.1235361177324075, "grad_norm": 0.3949887454509735, "learning_rate": 3.909022596968477e-05, "loss": 0.1864, "step": 1192 }, { "epoch": 0.12363975541506891, "grad_norm": 0.6071784496307373, "learning_rate": 3.908822309143374e-05, "loss": 0.3133, "step": 1193 }, { "epoch": 0.12374339309773033, "grad_norm": 0.4521191120147705, "learning_rate": 3.908621806235141e-05, "loss": 0.2416, "step": 1194 }, { "epoch": 0.12384703078039175, "grad_norm": 0.5484431385993958, "learning_rate": 3.9084210882663695e-05, "loss": 0.2785, "step": 1195 }, { "epoch": 0.12395066846305317, "grad_norm": 0.5148352980613708, "learning_rate": 3.908220155259677e-05, "loss": 0.2188, "step": 1196 }, { "epoch": 0.12405430614571458, "grad_norm": 0.43146973848342896, "learning_rate": 3.908019007237703e-05, "loss": 0.1878, "step": 1197 }, { "epoch": 0.124157943828376, "grad_norm": 0.5318440794944763, "learning_rate": 3.907817644223114e-05, "loss": 0.2551, "step": 1198 }, { "epoch": 0.12426158151103742, "grad_norm": 0.4649069905281067, "learning_rate": 3.9076160662385986e-05, "loss": 0.2022, "step": 1199 }, { "epoch": 0.12436521919369883, "grad_norm": 0.4180242717266083, "learning_rate": 3.9074142733068704e-05, "loss": 0.2165, "step": 1200 }, { "epoch": 0.12446885687636025, "grad_norm": 0.5106714963912964, "learning_rate": 3.907212265450666e-05, "loss": 0.2603, "step": 1201 }, { "epoch": 0.12457249455902165, "grad_norm": 0.507351815700531, "learning_rate": 3.90701004269275e-05, "loss": 0.284, "step": 1202 }, { "epoch": 0.12467613224168307, "grad_norm": 0.5832152366638184, "learning_rate": 3.906807605055906e-05, "loss": 0.3114, "step": 1203 }, { "epoch": 0.12477976992434449, "grad_norm": 0.5098874568939209, "learning_rate": 3.906604952562945e-05, "loss": 0.2527, "step": 1204 }, { "epoch": 0.1248834076070059, "grad_norm": 0.47632232308387756, "learning_rate": 3.9064020852367024e-05, "loss": 0.2398, "step": 1205 }, { "epoch": 0.12498704528966732, "grad_norm": 0.515520453453064, "learning_rate": 3.906199003100036e-05, "loss": 0.2738, "step": 1206 }, { "epoch": 0.12509068297232873, "grad_norm": 0.5277115702629089, "learning_rate": 3.905995706175829e-05, "loss": 0.2652, "step": 1207 }, { "epoch": 0.12519432065499014, "grad_norm": 0.44119536876678467, "learning_rate": 3.9057921944869896e-05, "loss": 0.2407, "step": 1208 }, { "epoch": 0.12529795833765156, "grad_norm": 0.5089069604873657, "learning_rate": 3.9055884680564474e-05, "loss": 0.2819, "step": 1209 }, { "epoch": 0.12540159602031298, "grad_norm": 0.5063955783843994, "learning_rate": 3.9053845269071595e-05, "loss": 0.2752, "step": 1210 }, { "epoch": 0.1255052337029744, "grad_norm": 0.4868340790271759, "learning_rate": 3.905180371062105e-05, "loss": 0.272, "step": 1211 }, { "epoch": 0.1256088713856358, "grad_norm": 0.4226343035697937, "learning_rate": 3.9049760005442875e-05, "loss": 0.2112, "step": 1212 }, { "epoch": 0.12571250906829723, "grad_norm": 0.41173243522644043, "learning_rate": 3.904771415376736e-05, "loss": 0.2205, "step": 1213 }, { "epoch": 0.12581614675095865, "grad_norm": 0.48055100440979004, "learning_rate": 3.9045666155825024e-05, "loss": 0.2855, "step": 1214 }, { "epoch": 0.12591978443362006, "grad_norm": 0.45464372634887695, "learning_rate": 3.904361601184663e-05, "loss": 0.2378, "step": 1215 }, { "epoch": 0.12602342211628148, "grad_norm": 0.5273199081420898, "learning_rate": 3.904156372206319e-05, "loss": 0.3178, "step": 1216 }, { "epoch": 0.1261270597989429, "grad_norm": 0.4722169041633606, "learning_rate": 3.903950928670595e-05, "loss": 0.2473, "step": 1217 }, { "epoch": 0.12623069748160431, "grad_norm": 0.4227035343647003, "learning_rate": 3.90374527060064e-05, "loss": 0.2393, "step": 1218 }, { "epoch": 0.12633433516426573, "grad_norm": 0.45303425192832947, "learning_rate": 3.9035393980196274e-05, "loss": 0.2462, "step": 1219 }, { "epoch": 0.12643797284692715, "grad_norm": 0.3675241768360138, "learning_rate": 3.903333310950755e-05, "loss": 0.1741, "step": 1220 }, { "epoch": 0.12654161052958857, "grad_norm": 0.39168018102645874, "learning_rate": 3.903127009417244e-05, "loss": 0.2223, "step": 1221 }, { "epoch": 0.12664524821224998, "grad_norm": 0.4818548858165741, "learning_rate": 3.902920493442339e-05, "loss": 0.2515, "step": 1222 }, { "epoch": 0.1267488858949114, "grad_norm": 0.412105530500412, "learning_rate": 3.9027137630493114e-05, "loss": 0.1881, "step": 1223 }, { "epoch": 0.12685252357757282, "grad_norm": 0.49911603331565857, "learning_rate": 3.902506818261455e-05, "loss": 0.28, "step": 1224 }, { "epoch": 0.12695616126023423, "grad_norm": 0.4557046592235565, "learning_rate": 3.902299659102088e-05, "loss": 0.2285, "step": 1225 }, { "epoch": 0.12705979894289562, "grad_norm": 0.4454115331172943, "learning_rate": 3.902092285594552e-05, "loss": 0.2487, "step": 1226 }, { "epoch": 0.12716343662555704, "grad_norm": 0.4897434711456299, "learning_rate": 3.9018846977622143e-05, "loss": 0.2826, "step": 1227 }, { "epoch": 0.12726707430821846, "grad_norm": 0.5356862545013428, "learning_rate": 3.901676895628466e-05, "loss": 0.304, "step": 1228 }, { "epoch": 0.12737071199087988, "grad_norm": 0.4942310154438019, "learning_rate": 3.9014688792167206e-05, "loss": 0.2785, "step": 1229 }, { "epoch": 0.1274743496735413, "grad_norm": 0.45313510298728943, "learning_rate": 3.901260648550418e-05, "loss": 0.2448, "step": 1230 }, { "epoch": 0.1275779873562027, "grad_norm": 0.41935995221138, "learning_rate": 3.901052203653021e-05, "loss": 0.2032, "step": 1231 }, { "epoch": 0.12768162503886413, "grad_norm": 0.3813270032405853, "learning_rate": 3.900843544548017e-05, "loss": 0.194, "step": 1232 }, { "epoch": 0.12778526272152554, "grad_norm": 0.4859898090362549, "learning_rate": 3.900634671258917e-05, "loss": 0.2654, "step": 1233 }, { "epoch": 0.12788890040418696, "grad_norm": 0.49495765566825867, "learning_rate": 3.900425583809258e-05, "loss": 0.2822, "step": 1234 }, { "epoch": 0.12799253808684838, "grad_norm": 0.4281288683414459, "learning_rate": 3.9002162822225975e-05, "loss": 0.2209, "step": 1235 }, { "epoch": 0.1280961757695098, "grad_norm": 0.4575282037258148, "learning_rate": 3.900006766522521e-05, "loss": 0.2194, "step": 1236 }, { "epoch": 0.1281998134521712, "grad_norm": 0.551102340221405, "learning_rate": 3.899797036732635e-05, "loss": 0.2612, "step": 1237 }, { "epoch": 0.12830345113483263, "grad_norm": 0.44362667202949524, "learning_rate": 3.899587092876572e-05, "loss": 0.2357, "step": 1238 }, { "epoch": 0.12840708881749405, "grad_norm": 0.42648833990097046, "learning_rate": 3.899376934977989e-05, "loss": 0.2324, "step": 1239 }, { "epoch": 0.12851072650015546, "grad_norm": 0.48824894428253174, "learning_rate": 3.899166563060565e-05, "loss": 0.2273, "step": 1240 }, { "epoch": 0.12861436418281688, "grad_norm": 0.48277342319488525, "learning_rate": 3.898955977148005e-05, "loss": 0.2452, "step": 1241 }, { "epoch": 0.1287180018654783, "grad_norm": 0.5302169919013977, "learning_rate": 3.8987451772640386e-05, "loss": 0.2873, "step": 1242 }, { "epoch": 0.12882163954813972, "grad_norm": 0.45799848437309265, "learning_rate": 3.898534163432416e-05, "loss": 0.2306, "step": 1243 }, { "epoch": 0.12892527723080113, "grad_norm": 0.42073294520378113, "learning_rate": 3.8983229356769155e-05, "loss": 0.2101, "step": 1244 }, { "epoch": 0.12902891491346252, "grad_norm": 0.4514122009277344, "learning_rate": 3.898111494021338e-05, "loss": 0.2231, "step": 1245 }, { "epoch": 0.12913255259612394, "grad_norm": 0.4837682545185089, "learning_rate": 3.897899838489507e-05, "loss": 0.2748, "step": 1246 }, { "epoch": 0.12923619027878536, "grad_norm": 0.5559665560722351, "learning_rate": 3.8976879691052743e-05, "loss": 0.2748, "step": 1247 }, { "epoch": 0.12933982796144677, "grad_norm": 0.4597959816455841, "learning_rate": 3.89747588589251e-05, "loss": 0.2271, "step": 1248 }, { "epoch": 0.1294434656441082, "grad_norm": 0.45351025462150574, "learning_rate": 3.8972635888751125e-05, "loss": 0.2592, "step": 1249 }, { "epoch": 0.1295471033267696, "grad_norm": 0.42491164803504944, "learning_rate": 3.897051078077003e-05, "loss": 0.2205, "step": 1250 }, { "epoch": 0.12965074100943103, "grad_norm": 0.4073490798473358, "learning_rate": 3.896838353522128e-05, "loss": 0.2481, "step": 1251 }, { "epoch": 0.12975437869209244, "grad_norm": 0.43321773409843445, "learning_rate": 3.8966254152344555e-05, "loss": 0.2368, "step": 1252 }, { "epoch": 0.12985801637475386, "grad_norm": 0.4953174591064453, "learning_rate": 3.89641226323798e-05, "loss": 0.2316, "step": 1253 }, { "epoch": 0.12996165405741528, "grad_norm": 0.487069308757782, "learning_rate": 3.896198897556718e-05, "loss": 0.2518, "step": 1254 }, { "epoch": 0.1300652917400767, "grad_norm": 0.44341564178466797, "learning_rate": 3.895985318214712e-05, "loss": 0.2243, "step": 1255 }, { "epoch": 0.1301689294227381, "grad_norm": 0.49199700355529785, "learning_rate": 3.895771525236028e-05, "loss": 0.2572, "step": 1256 }, { "epoch": 0.13027256710539953, "grad_norm": 0.4807712733745575, "learning_rate": 3.895557518644756e-05, "loss": 0.2587, "step": 1257 }, { "epoch": 0.13037620478806095, "grad_norm": 0.48649948835372925, "learning_rate": 3.8953432984650085e-05, "loss": 0.2514, "step": 1258 }, { "epoch": 0.13047984247072236, "grad_norm": 0.46023765206336975, "learning_rate": 3.895128864720925e-05, "loss": 0.2664, "step": 1259 }, { "epoch": 0.13058348015338378, "grad_norm": 0.45767319202423096, "learning_rate": 3.894914217436667e-05, "loss": 0.2184, "step": 1260 }, { "epoch": 0.1306871178360452, "grad_norm": 0.4645031988620758, "learning_rate": 3.894699356636421e-05, "loss": 0.2444, "step": 1261 }, { "epoch": 0.13079075551870661, "grad_norm": 0.556087076663971, "learning_rate": 3.894484282344396e-05, "loss": 0.2425, "step": 1262 }, { "epoch": 0.13089439320136803, "grad_norm": 0.4621123969554901, "learning_rate": 3.8942689945848284e-05, "loss": 0.2066, "step": 1263 }, { "epoch": 0.13099803088402942, "grad_norm": 0.5394175052642822, "learning_rate": 3.8940534933819744e-05, "loss": 0.2577, "step": 1264 }, { "epoch": 0.13110166856669084, "grad_norm": 0.42368635535240173, "learning_rate": 3.893837778760117e-05, "loss": 0.2162, "step": 1265 }, { "epoch": 0.13120530624935225, "grad_norm": 0.4939993917942047, "learning_rate": 3.893621850743563e-05, "loss": 0.2421, "step": 1266 }, { "epoch": 0.13130894393201367, "grad_norm": 0.48513132333755493, "learning_rate": 3.893405709356642e-05, "loss": 0.2505, "step": 1267 }, { "epoch": 0.1314125816146751, "grad_norm": 0.4326397180557251, "learning_rate": 3.89318935462371e-05, "loss": 0.1924, "step": 1268 }, { "epoch": 0.1315162192973365, "grad_norm": 0.49334537982940674, "learning_rate": 3.8929727865691445e-05, "loss": 0.2633, "step": 1269 }, { "epoch": 0.13161985697999792, "grad_norm": 0.4784174859523773, "learning_rate": 3.892756005217347e-05, "loss": 0.2485, "step": 1270 }, { "epoch": 0.13172349466265934, "grad_norm": 0.5221570134162903, "learning_rate": 3.892539010592746e-05, "loss": 0.2777, "step": 1271 }, { "epoch": 0.13182713234532076, "grad_norm": 0.5145605802536011, "learning_rate": 3.892321802719791e-05, "loss": 0.2546, "step": 1272 }, { "epoch": 0.13193077002798217, "grad_norm": 0.5601503849029541, "learning_rate": 3.892104381622957e-05, "loss": 0.2766, "step": 1273 }, { "epoch": 0.1320344077106436, "grad_norm": 0.5030035972595215, "learning_rate": 3.891886747326743e-05, "loss": 0.2316, "step": 1274 }, { "epoch": 0.132138045393305, "grad_norm": 0.4209156930446625, "learning_rate": 3.891668899855671e-05, "loss": 0.2388, "step": 1275 }, { "epoch": 0.13224168307596643, "grad_norm": 0.4128512740135193, "learning_rate": 3.891450839234288e-05, "loss": 0.1973, "step": 1276 }, { "epoch": 0.13234532075862784, "grad_norm": 0.3734406530857086, "learning_rate": 3.891232565487164e-05, "loss": 0.1845, "step": 1277 }, { "epoch": 0.13244895844128926, "grad_norm": 0.4477710425853729, "learning_rate": 3.891014078638896e-05, "loss": 0.2195, "step": 1278 }, { "epoch": 0.13255259612395068, "grad_norm": 0.43623194098472595, "learning_rate": 3.890795378714101e-05, "loss": 0.2471, "step": 1279 }, { "epoch": 0.1326562338066121, "grad_norm": 0.571938693523407, "learning_rate": 3.890576465737421e-05, "loss": 0.2825, "step": 1280 }, { "epoch": 0.1327598714892735, "grad_norm": 0.4960414171218872, "learning_rate": 3.890357339733524e-05, "loss": 0.2314, "step": 1281 }, { "epoch": 0.13286350917193493, "grad_norm": 0.4360418915748596, "learning_rate": 3.890138000727101e-05, "loss": 0.1954, "step": 1282 }, { "epoch": 0.13296714685459632, "grad_norm": 0.49753016233444214, "learning_rate": 3.8899184487428665e-05, "loss": 0.2618, "step": 1283 }, { "epoch": 0.13307078453725774, "grad_norm": 0.492436021566391, "learning_rate": 3.889698683805559e-05, "loss": 0.2932, "step": 1284 }, { "epoch": 0.13317442221991915, "grad_norm": 0.5095851421356201, "learning_rate": 3.889478705939941e-05, "loss": 0.2694, "step": 1285 }, { "epoch": 0.13327805990258057, "grad_norm": 0.47561195492744446, "learning_rate": 3.8892585151708005e-05, "loss": 0.2354, "step": 1286 }, { "epoch": 0.133381697585242, "grad_norm": 0.5100952386856079, "learning_rate": 3.8890381115229465e-05, "loss": 0.286, "step": 1287 }, { "epoch": 0.1334853352679034, "grad_norm": 0.3915499448776245, "learning_rate": 3.888817495021215e-05, "loss": 0.1861, "step": 1288 }, { "epoch": 0.13358897295056482, "grad_norm": 0.4240892827510834, "learning_rate": 3.8885966656904646e-05, "loss": 0.2148, "step": 1289 }, { "epoch": 0.13369261063322624, "grad_norm": 0.492962121963501, "learning_rate": 3.888375623555578e-05, "loss": 0.286, "step": 1290 }, { "epoch": 0.13379624831588766, "grad_norm": 0.4343388080596924, "learning_rate": 3.888154368641461e-05, "loss": 0.2041, "step": 1291 }, { "epoch": 0.13389988599854907, "grad_norm": 0.46965283155441284, "learning_rate": 3.887932900973045e-05, "loss": 0.2112, "step": 1292 }, { "epoch": 0.1340035236812105, "grad_norm": 0.48123374581336975, "learning_rate": 3.887711220575285e-05, "loss": 0.2624, "step": 1293 }, { "epoch": 0.1341071613638719, "grad_norm": 0.5123097896575928, "learning_rate": 3.887489327473159e-05, "loss": 0.2465, "step": 1294 }, { "epoch": 0.13421079904653332, "grad_norm": 0.5587708353996277, "learning_rate": 3.8872672216916696e-05, "loss": 0.2606, "step": 1295 }, { "epoch": 0.13431443672919474, "grad_norm": 0.5073254108428955, "learning_rate": 3.8870449032558436e-05, "loss": 0.2617, "step": 1296 }, { "epoch": 0.13441807441185616, "grad_norm": 0.4759473204612732, "learning_rate": 3.8868223721907314e-05, "loss": 0.2218, "step": 1297 }, { "epoch": 0.13452171209451758, "grad_norm": 0.4606642425060272, "learning_rate": 3.886599628521407e-05, "loss": 0.2088, "step": 1298 }, { "epoch": 0.134625349777179, "grad_norm": 0.4292793869972229, "learning_rate": 3.8863766722729696e-05, "loss": 0.1981, "step": 1299 }, { "epoch": 0.1347289874598404, "grad_norm": 0.4475545585155487, "learning_rate": 3.8861535034705416e-05, "loss": 0.2544, "step": 1300 }, { "epoch": 0.13483262514250183, "grad_norm": 0.4809749722480774, "learning_rate": 3.8859301221392686e-05, "loss": 0.2726, "step": 1301 }, { "epoch": 0.13493626282516322, "grad_norm": 0.5430611371994019, "learning_rate": 3.885706528304321e-05, "loss": 0.2832, "step": 1302 }, { "epoch": 0.13503990050782463, "grad_norm": 0.5341582894325256, "learning_rate": 3.885482721990893e-05, "loss": 0.2874, "step": 1303 }, { "epoch": 0.13514353819048605, "grad_norm": 0.4888921082019806, "learning_rate": 3.885258703224204e-05, "loss": 0.2659, "step": 1304 }, { "epoch": 0.13524717587314747, "grad_norm": 0.4214054346084595, "learning_rate": 3.8850344720294934e-05, "loss": 0.1975, "step": 1305 }, { "epoch": 0.13535081355580889, "grad_norm": 0.5062821507453918, "learning_rate": 3.8848100284320295e-05, "loss": 0.2452, "step": 1306 }, { "epoch": 0.1354544512384703, "grad_norm": 0.4390522837638855, "learning_rate": 3.884585372457102e-05, "loss": 0.2504, "step": 1307 }, { "epoch": 0.13555808892113172, "grad_norm": 0.46084293723106384, "learning_rate": 3.8843605041300245e-05, "loss": 0.225, "step": 1308 }, { "epoch": 0.13566172660379314, "grad_norm": 0.40920358896255493, "learning_rate": 3.884135423476134e-05, "loss": 0.2164, "step": 1309 }, { "epoch": 0.13576536428645455, "grad_norm": 0.4455513656139374, "learning_rate": 3.8839101305207934e-05, "loss": 0.2211, "step": 1310 }, { "epoch": 0.13586900196911597, "grad_norm": 0.42182791233062744, "learning_rate": 3.883684625289388e-05, "loss": 0.2196, "step": 1311 }, { "epoch": 0.1359726396517774, "grad_norm": 0.47031956911087036, "learning_rate": 3.883458907807327e-05, "loss": 0.2434, "step": 1312 }, { "epoch": 0.1360762773344388, "grad_norm": 0.46593621373176575, "learning_rate": 3.883232978100044e-05, "loss": 0.253, "step": 1313 }, { "epoch": 0.13617991501710022, "grad_norm": 0.49692028760910034, "learning_rate": 3.883006836192997e-05, "loss": 0.2765, "step": 1314 }, { "epoch": 0.13628355269976164, "grad_norm": 0.5057439208030701, "learning_rate": 3.882780482111666e-05, "loss": 0.2238, "step": 1315 }, { "epoch": 0.13638719038242306, "grad_norm": 0.4612136781215668, "learning_rate": 3.882553915881558e-05, "loss": 0.2381, "step": 1316 }, { "epoch": 0.13649082806508447, "grad_norm": 0.5236420631408691, "learning_rate": 3.8823271375282005e-05, "loss": 0.2598, "step": 1317 }, { "epoch": 0.1365944657477459, "grad_norm": 0.5677441358566284, "learning_rate": 3.882100147077148e-05, "loss": 0.3423, "step": 1318 }, { "epoch": 0.1366981034304073, "grad_norm": 0.48475411534309387, "learning_rate": 3.8818729445539765e-05, "loss": 0.2229, "step": 1319 }, { "epoch": 0.13680174111306873, "grad_norm": 0.5205956697463989, "learning_rate": 3.881645529984287e-05, "loss": 0.2704, "step": 1320 }, { "epoch": 0.13690537879573011, "grad_norm": 0.43500372767448425, "learning_rate": 3.881417903393704e-05, "loss": 0.1902, "step": 1321 }, { "epoch": 0.13700901647839153, "grad_norm": 0.46301934123039246, "learning_rate": 3.8811900648078766e-05, "loss": 0.2265, "step": 1322 }, { "epoch": 0.13711265416105295, "grad_norm": 0.5650634169578552, "learning_rate": 3.880962014252477e-05, "loss": 0.287, "step": 1323 }, { "epoch": 0.13721629184371437, "grad_norm": 0.59044349193573, "learning_rate": 3.880733751753202e-05, "loss": 0.2648, "step": 1324 }, { "epoch": 0.13731992952637578, "grad_norm": 0.4861128628253937, "learning_rate": 3.880505277335771e-05, "loss": 0.278, "step": 1325 }, { "epoch": 0.1374235672090372, "grad_norm": 0.48721516132354736, "learning_rate": 3.880276591025929e-05, "loss": 0.2859, "step": 1326 }, { "epoch": 0.13752720489169862, "grad_norm": 0.4308353662490845, "learning_rate": 3.880047692849443e-05, "loss": 0.1972, "step": 1327 }, { "epoch": 0.13763084257436003, "grad_norm": 0.44840529561042786, "learning_rate": 3.8798185828321064e-05, "loss": 0.2119, "step": 1328 }, { "epoch": 0.13773448025702145, "grad_norm": 0.4419281780719757, "learning_rate": 3.879589260999734e-05, "loss": 0.2327, "step": 1329 }, { "epoch": 0.13783811793968287, "grad_norm": 0.41590288281440735, "learning_rate": 3.8793597273781654e-05, "loss": 0.2311, "step": 1330 }, { "epoch": 0.1379417556223443, "grad_norm": 0.51755690574646, "learning_rate": 3.879129981993265e-05, "loss": 0.2652, "step": 1331 }, { "epoch": 0.1380453933050057, "grad_norm": 0.5011487007141113, "learning_rate": 3.878900024870918e-05, "loss": 0.284, "step": 1332 }, { "epoch": 0.13814903098766712, "grad_norm": 0.4875727593898773, "learning_rate": 3.8786698560370374e-05, "loss": 0.266, "step": 1333 }, { "epoch": 0.13825266867032854, "grad_norm": 0.4918694496154785, "learning_rate": 3.878439475517558e-05, "loss": 0.2329, "step": 1334 }, { "epoch": 0.13835630635298996, "grad_norm": 0.49377235770225525, "learning_rate": 3.878208883338439e-05, "loss": 0.2486, "step": 1335 }, { "epoch": 0.13845994403565137, "grad_norm": 0.4716956913471222, "learning_rate": 3.8779780795256625e-05, "loss": 0.2515, "step": 1336 }, { "epoch": 0.1385635817183128, "grad_norm": 0.44217389822006226, "learning_rate": 3.877747064105235e-05, "loss": 0.2106, "step": 1337 }, { "epoch": 0.1386672194009742, "grad_norm": 0.464129239320755, "learning_rate": 3.877515837103188e-05, "loss": 0.2344, "step": 1338 }, { "epoch": 0.13877085708363562, "grad_norm": 0.5682270526885986, "learning_rate": 3.877284398545575e-05, "loss": 0.3267, "step": 1339 }, { "epoch": 0.138874494766297, "grad_norm": 0.48125720024108887, "learning_rate": 3.8770527484584735e-05, "loss": 0.251, "step": 1340 }, { "epoch": 0.13897813244895843, "grad_norm": 0.5513635873794556, "learning_rate": 3.876820886867987e-05, "loss": 0.2555, "step": 1341 }, { "epoch": 0.13908177013161985, "grad_norm": 0.4538300037384033, "learning_rate": 3.87658881380024e-05, "loss": 0.2604, "step": 1342 }, { "epoch": 0.13918540781428126, "grad_norm": 0.45465579628944397, "learning_rate": 3.876356529281383e-05, "loss": 0.2267, "step": 1343 }, { "epoch": 0.13928904549694268, "grad_norm": 0.4481503963470459, "learning_rate": 3.876124033337589e-05, "loss": 0.2248, "step": 1344 }, { "epoch": 0.1393926831796041, "grad_norm": 0.493288516998291, "learning_rate": 3.875891325995056e-05, "loss": 0.2517, "step": 1345 }, { "epoch": 0.13949632086226552, "grad_norm": 0.5286715626716614, "learning_rate": 3.875658407280004e-05, "loss": 0.257, "step": 1346 }, { "epoch": 0.13959995854492693, "grad_norm": 0.4413294196128845, "learning_rate": 3.875425277218678e-05, "loss": 0.2114, "step": 1347 }, { "epoch": 0.13970359622758835, "grad_norm": 0.4887286126613617, "learning_rate": 3.875191935837348e-05, "loss": 0.2617, "step": 1348 }, { "epoch": 0.13980723391024977, "grad_norm": 0.42799079418182373, "learning_rate": 3.874958383162305e-05, "loss": 0.1822, "step": 1349 }, { "epoch": 0.13991087159291118, "grad_norm": 0.5083425045013428, "learning_rate": 3.874724619219867e-05, "loss": 0.2603, "step": 1350 }, { "epoch": 0.1400145092755726, "grad_norm": 0.4632699489593506, "learning_rate": 3.874490644036373e-05, "loss": 0.2258, "step": 1351 }, { "epoch": 0.14011814695823402, "grad_norm": 0.4966972768306732, "learning_rate": 3.8742564576381864e-05, "loss": 0.248, "step": 1352 }, { "epoch": 0.14022178464089544, "grad_norm": 0.4451618194580078, "learning_rate": 3.8740220600516964e-05, "loss": 0.2373, "step": 1353 }, { "epoch": 0.14032542232355685, "grad_norm": 0.46346795558929443, "learning_rate": 3.8737874513033134e-05, "loss": 0.2441, "step": 1354 }, { "epoch": 0.14042906000621827, "grad_norm": 0.43191859126091003, "learning_rate": 3.8735526314194735e-05, "loss": 0.2372, "step": 1355 }, { "epoch": 0.1405326976888797, "grad_norm": 0.4335680902004242, "learning_rate": 3.873317600426636e-05, "loss": 0.2367, "step": 1356 }, { "epoch": 0.1406363353715411, "grad_norm": 0.49184444546699524, "learning_rate": 3.873082358351283e-05, "loss": 0.2548, "step": 1357 }, { "epoch": 0.14073997305420252, "grad_norm": 0.48698222637176514, "learning_rate": 3.8728469052199214e-05, "loss": 0.2618, "step": 1358 }, { "epoch": 0.1408436107368639, "grad_norm": 0.48168468475341797, "learning_rate": 3.872611241059083e-05, "loss": 0.2138, "step": 1359 }, { "epoch": 0.14094724841952533, "grad_norm": 0.4432443678379059, "learning_rate": 3.872375365895319e-05, "loss": 0.2345, "step": 1360 }, { "epoch": 0.14105088610218675, "grad_norm": 0.4656221866607666, "learning_rate": 3.872139279755211e-05, "loss": 0.2455, "step": 1361 }, { "epoch": 0.14115452378484816, "grad_norm": 0.5141227841377258, "learning_rate": 3.8719029826653584e-05, "loss": 0.2426, "step": 1362 }, { "epoch": 0.14125816146750958, "grad_norm": 0.4511367678642273, "learning_rate": 3.8716664746523885e-05, "loss": 0.232, "step": 1363 }, { "epoch": 0.141361799150171, "grad_norm": 0.44538968801498413, "learning_rate": 3.87142975574295e-05, "loss": 0.2215, "step": 1364 }, { "epoch": 0.14146543683283241, "grad_norm": 0.4484039843082428, "learning_rate": 3.871192825963714e-05, "loss": 0.2219, "step": 1365 }, { "epoch": 0.14156907451549383, "grad_norm": 0.4748300015926361, "learning_rate": 3.870955685341381e-05, "loss": 0.2509, "step": 1366 }, { "epoch": 0.14167271219815525, "grad_norm": 0.541286826133728, "learning_rate": 3.870718333902669e-05, "loss": 0.2251, "step": 1367 }, { "epoch": 0.14177634988081667, "grad_norm": 0.4920642375946045, "learning_rate": 3.8704807716743235e-05, "loss": 0.2539, "step": 1368 }, { "epoch": 0.14187998756347808, "grad_norm": 0.43952393531799316, "learning_rate": 3.8702429986831124e-05, "loss": 0.2329, "step": 1369 }, { "epoch": 0.1419836252461395, "grad_norm": 0.48505479097366333, "learning_rate": 3.870005014955827e-05, "loss": 0.263, "step": 1370 }, { "epoch": 0.14208726292880092, "grad_norm": 0.4841783940792084, "learning_rate": 3.8697668205192843e-05, "loss": 0.2223, "step": 1371 }, { "epoch": 0.14219090061146233, "grad_norm": 0.4626272916793823, "learning_rate": 3.8695284154003226e-05, "loss": 0.2635, "step": 1372 }, { "epoch": 0.14229453829412375, "grad_norm": 0.4150180518627167, "learning_rate": 3.869289799625805e-05, "loss": 0.1935, "step": 1373 }, { "epoch": 0.14239817597678517, "grad_norm": 0.44310134649276733, "learning_rate": 3.8690509732226186e-05, "loss": 0.2141, "step": 1374 }, { "epoch": 0.14250181365944659, "grad_norm": 0.4911743700504303, "learning_rate": 3.8688119362176745e-05, "loss": 0.212, "step": 1375 }, { "epoch": 0.142605451342108, "grad_norm": 0.5100070834159851, "learning_rate": 3.868572688637906e-05, "loss": 0.2713, "step": 1376 }, { "epoch": 0.14270908902476942, "grad_norm": 0.4721531271934509, "learning_rate": 3.868333230510273e-05, "loss": 0.2523, "step": 1377 }, { "epoch": 0.1428127267074308, "grad_norm": 0.550628125667572, "learning_rate": 3.868093561861755e-05, "loss": 0.2788, "step": 1378 }, { "epoch": 0.14291636439009223, "grad_norm": 0.5342934131622314, "learning_rate": 3.867853682719358e-05, "loss": 0.2644, "step": 1379 }, { "epoch": 0.14302000207275364, "grad_norm": 0.47953546047210693, "learning_rate": 3.8676135931101126e-05, "loss": 0.2527, "step": 1380 }, { "epoch": 0.14312363975541506, "grad_norm": 0.5149561762809753, "learning_rate": 3.86737329306107e-05, "loss": 0.2622, "step": 1381 }, { "epoch": 0.14322727743807648, "grad_norm": 0.4426271915435791, "learning_rate": 3.867132782599308e-05, "loss": 0.2703, "step": 1382 }, { "epoch": 0.1433309151207379, "grad_norm": 0.5571516156196594, "learning_rate": 3.8668920617519267e-05, "loss": 0.3005, "step": 1383 }, { "epoch": 0.1434345528033993, "grad_norm": 0.5235035419464111, "learning_rate": 3.86665113054605e-05, "loss": 0.2747, "step": 1384 }, { "epoch": 0.14353819048606073, "grad_norm": 0.4032425284385681, "learning_rate": 3.8664099890088254e-05, "loss": 0.2243, "step": 1385 }, { "epoch": 0.14364182816872215, "grad_norm": 0.47944414615631104, "learning_rate": 3.866168637167425e-05, "loss": 0.2625, "step": 1386 }, { "epoch": 0.14374546585138356, "grad_norm": 0.4225427210330963, "learning_rate": 3.865927075049043e-05, "loss": 0.2474, "step": 1387 }, { "epoch": 0.14384910353404498, "grad_norm": 0.5522814393043518, "learning_rate": 3.8656853026808997e-05, "loss": 0.2607, "step": 1388 }, { "epoch": 0.1439527412167064, "grad_norm": 0.4542481601238251, "learning_rate": 3.8654433200902355e-05, "loss": 0.2425, "step": 1389 }, { "epoch": 0.14405637889936782, "grad_norm": 0.4231477677822113, "learning_rate": 3.865201127304319e-05, "loss": 0.2069, "step": 1390 }, { "epoch": 0.14416001658202923, "grad_norm": 0.49372538924217224, "learning_rate": 3.864958724350438e-05, "loss": 0.2639, "step": 1391 }, { "epoch": 0.14426365426469065, "grad_norm": 0.5042855739593506, "learning_rate": 3.864716111255908e-05, "loss": 0.296, "step": 1392 }, { "epoch": 0.14436729194735207, "grad_norm": 0.47383350133895874, "learning_rate": 3.864473288048065e-05, "loss": 0.2672, "step": 1393 }, { "epoch": 0.14447092963001348, "grad_norm": 0.4303416907787323, "learning_rate": 3.8642302547542704e-05, "loss": 0.1998, "step": 1394 }, { "epoch": 0.1445745673126749, "grad_norm": 0.35755103826522827, "learning_rate": 3.863987011401909e-05, "loss": 0.1763, "step": 1395 }, { "epoch": 0.14467820499533632, "grad_norm": 0.4051773250102997, "learning_rate": 3.863743558018388e-05, "loss": 0.2044, "step": 1396 }, { "epoch": 0.1447818426779977, "grad_norm": 0.5011558532714844, "learning_rate": 3.86349989463114e-05, "loss": 0.2867, "step": 1397 }, { "epoch": 0.14488548036065912, "grad_norm": 0.4668574631214142, "learning_rate": 3.8632560212676215e-05, "loss": 0.2441, "step": 1398 }, { "epoch": 0.14498911804332054, "grad_norm": 0.43723446130752563, "learning_rate": 3.863011937955311e-05, "loss": 0.223, "step": 1399 }, { "epoch": 0.14509275572598196, "grad_norm": 0.4947415888309479, "learning_rate": 3.862767644721711e-05, "loss": 0.2607, "step": 1400 }, { "epoch": 0.14519639340864338, "grad_norm": 0.4283679723739624, "learning_rate": 3.8625231415943486e-05, "loss": 0.2271, "step": 1401 }, { "epoch": 0.1453000310913048, "grad_norm": 0.40165016055107117, "learning_rate": 3.8622784286007744e-05, "loss": 0.2107, "step": 1402 }, { "epoch": 0.1454036687739662, "grad_norm": 0.4284477233886719, "learning_rate": 3.8620335057685616e-05, "loss": 0.2341, "step": 1403 }, { "epoch": 0.14550730645662763, "grad_norm": 0.44606518745422363, "learning_rate": 3.861788373125308e-05, "loss": 0.2221, "step": 1404 }, { "epoch": 0.14561094413928904, "grad_norm": 0.5379354357719421, "learning_rate": 3.861543030698634e-05, "loss": 0.2498, "step": 1405 }, { "epoch": 0.14571458182195046, "grad_norm": 0.4723847210407257, "learning_rate": 3.861297478516186e-05, "loss": 0.2483, "step": 1406 }, { "epoch": 0.14581821950461188, "grad_norm": 0.4253641664981842, "learning_rate": 3.861051716605631e-05, "loss": 0.2081, "step": 1407 }, { "epoch": 0.1459218571872733, "grad_norm": 0.42284271121025085, "learning_rate": 3.860805744994662e-05, "loss": 0.2198, "step": 1408 }, { "epoch": 0.1460254948699347, "grad_norm": 0.49065491557121277, "learning_rate": 3.860559563710994e-05, "loss": 0.2668, "step": 1409 }, { "epoch": 0.14612913255259613, "grad_norm": 0.4915112555027008, "learning_rate": 3.860313172782367e-05, "loss": 0.2572, "step": 1410 }, { "epoch": 0.14623277023525755, "grad_norm": 0.5048714280128479, "learning_rate": 3.8600665722365434e-05, "loss": 0.2325, "step": 1411 }, { "epoch": 0.14633640791791896, "grad_norm": 0.45734068751335144, "learning_rate": 3.8598197621013106e-05, "loss": 0.2633, "step": 1412 }, { "epoch": 0.14644004560058038, "grad_norm": 0.44115594029426575, "learning_rate": 3.859572742404477e-05, "loss": 0.2272, "step": 1413 }, { "epoch": 0.1465436832832418, "grad_norm": 0.4850781559944153, "learning_rate": 3.859325513173878e-05, "loss": 0.2463, "step": 1414 }, { "epoch": 0.14664732096590322, "grad_norm": 0.4548441171646118, "learning_rate": 3.8590780744373715e-05, "loss": 0.2373, "step": 1415 }, { "epoch": 0.1467509586485646, "grad_norm": 0.4415612816810608, "learning_rate": 3.8588304262228365e-05, "loss": 0.2392, "step": 1416 }, { "epoch": 0.14685459633122602, "grad_norm": 0.5979070067405701, "learning_rate": 3.858582568558179e-05, "loss": 0.2893, "step": 1417 }, { "epoch": 0.14695823401388744, "grad_norm": 0.5068161487579346, "learning_rate": 3.8583345014713264e-05, "loss": 0.2626, "step": 1418 }, { "epoch": 0.14706187169654886, "grad_norm": 0.4130820333957672, "learning_rate": 3.8580862249902324e-05, "loss": 0.2448, "step": 1419 }, { "epoch": 0.14716550937921027, "grad_norm": 0.4339570999145508, "learning_rate": 3.85783773914287e-05, "loss": 0.2182, "step": 1420 }, { "epoch": 0.1472691470618717, "grad_norm": 0.4481070041656494, "learning_rate": 3.857589043957239e-05, "loss": 0.2051, "step": 1421 }, { "epoch": 0.1473727847445331, "grad_norm": 0.4021722972393036, "learning_rate": 3.8573401394613624e-05, "loss": 0.1988, "step": 1422 }, { "epoch": 0.14747642242719453, "grad_norm": 0.5416178107261658, "learning_rate": 3.8570910256832866e-05, "loss": 0.2911, "step": 1423 }, { "epoch": 0.14758006010985594, "grad_norm": 0.4623812139034271, "learning_rate": 3.85684170265108e-05, "loss": 0.2437, "step": 1424 }, { "epoch": 0.14768369779251736, "grad_norm": 0.5158494114875793, "learning_rate": 3.856592170392838e-05, "loss": 0.2771, "step": 1425 }, { "epoch": 0.14778733547517878, "grad_norm": 0.3807986080646515, "learning_rate": 3.856342428936675e-05, "loss": 0.2136, "step": 1426 }, { "epoch": 0.1478909731578402, "grad_norm": 0.4274327754974365, "learning_rate": 3.856092478310734e-05, "loss": 0.2196, "step": 1427 }, { "epoch": 0.1479946108405016, "grad_norm": 0.46692219376564026, "learning_rate": 3.855842318543178e-05, "loss": 0.2327, "step": 1428 }, { "epoch": 0.14809824852316303, "grad_norm": 0.504139244556427, "learning_rate": 3.855591949662194e-05, "loss": 0.2566, "step": 1429 }, { "epoch": 0.14820188620582445, "grad_norm": 0.49943894147872925, "learning_rate": 3.855341371695994e-05, "loss": 0.2472, "step": 1430 }, { "epoch": 0.14830552388848586, "grad_norm": 0.4539870321750641, "learning_rate": 3.855090584672812e-05, "loss": 0.2612, "step": 1431 }, { "epoch": 0.14840916157114728, "grad_norm": 0.4134329855442047, "learning_rate": 3.854839588620907e-05, "loss": 0.2064, "step": 1432 }, { "epoch": 0.1485127992538087, "grad_norm": 0.4111216068267822, "learning_rate": 3.8545883835685606e-05, "loss": 0.2015, "step": 1433 }, { "epoch": 0.14861643693647011, "grad_norm": 0.4707288444042206, "learning_rate": 3.854336969544078e-05, "loss": 0.243, "step": 1434 }, { "epoch": 0.1487200746191315, "grad_norm": 0.44714653491973877, "learning_rate": 3.8540853465757885e-05, "loss": 0.2217, "step": 1435 }, { "epoch": 0.14882371230179292, "grad_norm": 0.5175408124923706, "learning_rate": 3.853833514692044e-05, "loss": 0.2918, "step": 1436 }, { "epoch": 0.14892734998445434, "grad_norm": 0.4449577033519745, "learning_rate": 3.853581473921221e-05, "loss": 0.2179, "step": 1437 }, { "epoch": 0.14903098766711576, "grad_norm": 0.45618268847465515, "learning_rate": 3.8533292242917195e-05, "loss": 0.2558, "step": 1438 }, { "epoch": 0.14913462534977717, "grad_norm": 0.499709814786911, "learning_rate": 3.8530767658319614e-05, "loss": 0.2354, "step": 1439 }, { "epoch": 0.1492382630324386, "grad_norm": 0.5203845500946045, "learning_rate": 3.852824098570394e-05, "loss": 0.2389, "step": 1440 }, { "epoch": 0.1493419007151, "grad_norm": 0.4207056164741516, "learning_rate": 3.852571222535487e-05, "loss": 0.2087, "step": 1441 }, { "epoch": 0.14944553839776142, "grad_norm": 0.494806706905365, "learning_rate": 3.852318137755736e-05, "loss": 0.2368, "step": 1442 }, { "epoch": 0.14954917608042284, "grad_norm": 0.5283109545707703, "learning_rate": 3.852064844259656e-05, "loss": 0.2782, "step": 1443 }, { "epoch": 0.14965281376308426, "grad_norm": 0.5322892069816589, "learning_rate": 3.851811342075788e-05, "loss": 0.235, "step": 1444 }, { "epoch": 0.14975645144574568, "grad_norm": 0.48319143056869507, "learning_rate": 3.851557631232697e-05, "loss": 0.2362, "step": 1445 }, { "epoch": 0.1498600891284071, "grad_norm": 0.4722559452056885, "learning_rate": 3.851303711758971e-05, "loss": 0.2772, "step": 1446 }, { "epoch": 0.1499637268110685, "grad_norm": 0.447968989610672, "learning_rate": 3.85104958368322e-05, "loss": 0.2705, "step": 1447 }, { "epoch": 0.15006736449372993, "grad_norm": 0.49089664220809937, "learning_rate": 3.8507952470340794e-05, "loss": 0.2811, "step": 1448 }, { "epoch": 0.15017100217639134, "grad_norm": 0.45806270837783813, "learning_rate": 3.8505407018402073e-05, "loss": 0.2484, "step": 1449 }, { "epoch": 0.15027463985905276, "grad_norm": 0.5139738321304321, "learning_rate": 3.850285948130286e-05, "loss": 0.2482, "step": 1450 }, { "epoch": 0.15037827754171418, "grad_norm": 0.4105122685432434, "learning_rate": 3.8500309859330205e-05, "loss": 0.2106, "step": 1451 }, { "epoch": 0.1504819152243756, "grad_norm": 0.41013506054878235, "learning_rate": 3.849775815277139e-05, "loss": 0.1953, "step": 1452 }, { "epoch": 0.150585552907037, "grad_norm": 0.527901828289032, "learning_rate": 3.8495204361913944e-05, "loss": 0.2527, "step": 1453 }, { "epoch": 0.1506891905896984, "grad_norm": 0.5156935453414917, "learning_rate": 3.849264848704563e-05, "loss": 0.2418, "step": 1454 }, { "epoch": 0.15079282827235982, "grad_norm": 0.5491344928741455, "learning_rate": 3.8490090528454415e-05, "loss": 0.276, "step": 1455 }, { "epoch": 0.15089646595502124, "grad_norm": 0.4275567829608917, "learning_rate": 3.8487530486428554e-05, "loss": 0.2141, "step": 1456 }, { "epoch": 0.15100010363768265, "grad_norm": 0.4257248342037201, "learning_rate": 3.84849683612565e-05, "loss": 0.236, "step": 1457 }, { "epoch": 0.15110374132034407, "grad_norm": 0.43031546473503113, "learning_rate": 3.848240415322693e-05, "loss": 0.2132, "step": 1458 }, { "epoch": 0.1512073790030055, "grad_norm": 0.445788711309433, "learning_rate": 3.847983786262881e-05, "loss": 0.2503, "step": 1459 }, { "epoch": 0.1513110166856669, "grad_norm": 0.49891895055770874, "learning_rate": 3.847726948975128e-05, "loss": 0.2545, "step": 1460 }, { "epoch": 0.15141465436832832, "grad_norm": 0.4228783845901489, "learning_rate": 3.847469903488375e-05, "loss": 0.2335, "step": 1461 }, { "epoch": 0.15151829205098974, "grad_norm": 0.4258784055709839, "learning_rate": 3.847212649831585e-05, "loss": 0.2165, "step": 1462 }, { "epoch": 0.15162192973365116, "grad_norm": 0.5519275069236755, "learning_rate": 3.846955188033745e-05, "loss": 0.2555, "step": 1463 }, { "epoch": 0.15172556741631257, "grad_norm": 0.43952473998069763, "learning_rate": 3.846697518123866e-05, "loss": 0.2355, "step": 1464 }, { "epoch": 0.151829205098974, "grad_norm": 0.494721919298172, "learning_rate": 3.8464396401309804e-05, "loss": 0.2615, "step": 1465 }, { "epoch": 0.1519328427816354, "grad_norm": 0.5367245078086853, "learning_rate": 3.846181554084147e-05, "loss": 0.2833, "step": 1466 }, { "epoch": 0.15203648046429682, "grad_norm": 0.519638180732727, "learning_rate": 3.845923260012446e-05, "loss": 0.3263, "step": 1467 }, { "epoch": 0.15214011814695824, "grad_norm": 0.4717256724834442, "learning_rate": 3.845664757944983e-05, "loss": 0.2725, "step": 1468 }, { "epoch": 0.15224375582961966, "grad_norm": 0.47035694122314453, "learning_rate": 3.845406047910883e-05, "loss": 0.2707, "step": 1469 }, { "epoch": 0.15234739351228108, "grad_norm": 0.5218305587768555, "learning_rate": 3.845147129939298e-05, "loss": 0.2632, "step": 1470 }, { "epoch": 0.1524510311949425, "grad_norm": 0.5742754936218262, "learning_rate": 3.844888004059403e-05, "loss": 0.2487, "step": 1471 }, { "epoch": 0.1525546688776039, "grad_norm": 0.4790658950805664, "learning_rate": 3.844628670300396e-05, "loss": 0.2402, "step": 1472 }, { "epoch": 0.1526583065602653, "grad_norm": 0.4891185760498047, "learning_rate": 3.844369128691497e-05, "loss": 0.2472, "step": 1473 }, { "epoch": 0.15276194424292672, "grad_norm": 0.4714306890964508, "learning_rate": 3.844109379261953e-05, "loss": 0.2062, "step": 1474 }, { "epoch": 0.15286558192558813, "grad_norm": 0.46967020630836487, "learning_rate": 3.84384942204103e-05, "loss": 0.1977, "step": 1475 }, { "epoch": 0.15296921960824955, "grad_norm": 0.46074581146240234, "learning_rate": 3.8435892570580205e-05, "loss": 0.2474, "step": 1476 }, { "epoch": 0.15307285729091097, "grad_norm": 0.5243543386459351, "learning_rate": 3.8433288843422395e-05, "loss": 0.2718, "step": 1477 }, { "epoch": 0.15317649497357239, "grad_norm": 0.4539581835269928, "learning_rate": 3.8430683039230254e-05, "loss": 0.2315, "step": 1478 }, { "epoch": 0.1532801326562338, "grad_norm": 0.5196117162704468, "learning_rate": 3.8428075158297404e-05, "loss": 0.2527, "step": 1479 }, { "epoch": 0.15338377033889522, "grad_norm": 0.482964426279068, "learning_rate": 3.842546520091769e-05, "loss": 0.2847, "step": 1480 }, { "epoch": 0.15348740802155664, "grad_norm": 0.5170326828956604, "learning_rate": 3.8422853167385195e-05, "loss": 0.2953, "step": 1481 }, { "epoch": 0.15359104570421805, "grad_norm": 0.48660704493522644, "learning_rate": 3.842023905799425e-05, "loss": 0.2518, "step": 1482 }, { "epoch": 0.15369468338687947, "grad_norm": 0.46241337060928345, "learning_rate": 3.8417622873039396e-05, "loss": 0.2286, "step": 1483 }, { "epoch": 0.1537983210695409, "grad_norm": 0.44095274806022644, "learning_rate": 3.841500461281543e-05, "loss": 0.2328, "step": 1484 }, { "epoch": 0.1539019587522023, "grad_norm": 0.4544067978858948, "learning_rate": 3.8412384277617374e-05, "loss": 0.2414, "step": 1485 }, { "epoch": 0.15400559643486372, "grad_norm": 0.38432058691978455, "learning_rate": 3.8409761867740476e-05, "loss": 0.1946, "step": 1486 }, { "epoch": 0.15410923411752514, "grad_norm": 0.44219785928726196, "learning_rate": 3.840713738348023e-05, "loss": 0.232, "step": 1487 }, { "epoch": 0.15421287180018656, "grad_norm": 0.5125858783721924, "learning_rate": 3.840451082513236e-05, "loss": 0.2364, "step": 1488 }, { "epoch": 0.15431650948284797, "grad_norm": 0.5096924901008606, "learning_rate": 3.840188219299282e-05, "loss": 0.2639, "step": 1489 }, { "epoch": 0.1544201471655094, "grad_norm": 0.4817996919155121, "learning_rate": 3.83992514873578e-05, "loss": 0.2794, "step": 1490 }, { "epoch": 0.1545237848481708, "grad_norm": 0.46409645676612854, "learning_rate": 3.839661870852372e-05, "loss": 0.2414, "step": 1491 }, { "epoch": 0.1546274225308322, "grad_norm": 0.500841498374939, "learning_rate": 3.839398385678725e-05, "loss": 0.231, "step": 1492 }, { "epoch": 0.15473106021349362, "grad_norm": 0.5692920684814453, "learning_rate": 3.839134693244527e-05, "loss": 0.2904, "step": 1493 }, { "epoch": 0.15483469789615503, "grad_norm": 0.49656811356544495, "learning_rate": 3.8388707935794905e-05, "loss": 0.2348, "step": 1494 }, { "epoch": 0.15493833557881645, "grad_norm": 0.40897035598754883, "learning_rate": 3.8386066867133515e-05, "loss": 0.2214, "step": 1495 }, { "epoch": 0.15504197326147787, "grad_norm": 0.48259782791137695, "learning_rate": 3.83834237267587e-05, "loss": 0.2226, "step": 1496 }, { "epoch": 0.15514561094413928, "grad_norm": 0.532426655292511, "learning_rate": 3.838077851496827e-05, "loss": 0.2935, "step": 1497 }, { "epoch": 0.1552492486268007, "grad_norm": 0.4772799611091614, "learning_rate": 3.837813123206029e-05, "loss": 0.2373, "step": 1498 }, { "epoch": 0.15535288630946212, "grad_norm": 0.47948452830314636, "learning_rate": 3.837548187833306e-05, "loss": 0.2564, "step": 1499 }, { "epoch": 0.15545652399212354, "grad_norm": 0.4946514070034027, "learning_rate": 3.8372830454085095e-05, "loss": 0.2392, "step": 1500 }, { "epoch": 0.15556016167478495, "grad_norm": 0.4094104766845703, "learning_rate": 3.8370176959615154e-05, "loss": 0.1878, "step": 1501 }, { "epoch": 0.15566379935744637, "grad_norm": 0.4557799994945526, "learning_rate": 3.8367521395222225e-05, "loss": 0.2277, "step": 1502 }, { "epoch": 0.1557674370401078, "grad_norm": 0.3870164453983307, "learning_rate": 3.8364863761205546e-05, "loss": 0.1776, "step": 1503 }, { "epoch": 0.1558710747227692, "grad_norm": 0.5103594660758972, "learning_rate": 3.836220405786456e-05, "loss": 0.2714, "step": 1504 }, { "epoch": 0.15597471240543062, "grad_norm": 0.5301050543785095, "learning_rate": 3.8359542285498966e-05, "loss": 0.2546, "step": 1505 }, { "epoch": 0.15607835008809204, "grad_norm": 0.4772056043148041, "learning_rate": 3.8356878444408696e-05, "loss": 0.2327, "step": 1506 }, { "epoch": 0.15618198777075346, "grad_norm": 0.5653616786003113, "learning_rate": 3.835421253489389e-05, "loss": 0.2944, "step": 1507 }, { "epoch": 0.15628562545341487, "grad_norm": 0.5195934772491455, "learning_rate": 3.8351544557254954e-05, "loss": 0.2897, "step": 1508 }, { "epoch": 0.1563892631360763, "grad_norm": 0.4947530925273895, "learning_rate": 3.83488745117925e-05, "loss": 0.2477, "step": 1509 }, { "epoch": 0.1564929008187377, "grad_norm": 0.519658088684082, "learning_rate": 3.834620239880739e-05, "loss": 0.2648, "step": 1510 }, { "epoch": 0.1565965385013991, "grad_norm": 0.46499353647232056, "learning_rate": 3.834352821860072e-05, "loss": 0.2663, "step": 1511 }, { "epoch": 0.1567001761840605, "grad_norm": 0.5055245161056519, "learning_rate": 3.83408519714738e-05, "loss": 0.2683, "step": 1512 }, { "epoch": 0.15680381386672193, "grad_norm": 0.5055966377258301, "learning_rate": 3.833817365772819e-05, "loss": 0.2382, "step": 1513 }, { "epoch": 0.15690745154938335, "grad_norm": 0.43308383226394653, "learning_rate": 3.833549327766569e-05, "loss": 0.2109, "step": 1514 }, { "epoch": 0.15701108923204476, "grad_norm": 0.4320563077926636, "learning_rate": 3.83328108315883e-05, "loss": 0.201, "step": 1515 }, { "epoch": 0.15711472691470618, "grad_norm": 0.5178250670433044, "learning_rate": 3.833012631979829e-05, "loss": 0.2529, "step": 1516 }, { "epoch": 0.1572183645973676, "grad_norm": 0.4101965129375458, "learning_rate": 3.832743974259814e-05, "loss": 0.2078, "step": 1517 }, { "epoch": 0.15732200228002902, "grad_norm": 0.5203193426132202, "learning_rate": 3.832475110029056e-05, "loss": 0.2528, "step": 1518 }, { "epoch": 0.15742563996269043, "grad_norm": 0.4788103401660919, "learning_rate": 3.8322060393178526e-05, "loss": 0.2372, "step": 1519 }, { "epoch": 0.15752927764535185, "grad_norm": 0.49796929955482483, "learning_rate": 3.8319367621565205e-05, "loss": 0.2327, "step": 1520 }, { "epoch": 0.15763291532801327, "grad_norm": 0.5318668484687805, "learning_rate": 3.831667278575402e-05, "loss": 0.2643, "step": 1521 }, { "epoch": 0.15773655301067468, "grad_norm": 0.4808575510978699, "learning_rate": 3.831397588604861e-05, "loss": 0.2411, "step": 1522 }, { "epoch": 0.1578401906933361, "grad_norm": 0.5338442325592041, "learning_rate": 3.8311276922752876e-05, "loss": 0.2625, "step": 1523 }, { "epoch": 0.15794382837599752, "grad_norm": 0.5726016759872437, "learning_rate": 3.830857589617092e-05, "loss": 0.2925, "step": 1524 }, { "epoch": 0.15804746605865894, "grad_norm": 0.49443721771240234, "learning_rate": 3.83058728066071e-05, "loss": 0.2573, "step": 1525 }, { "epoch": 0.15815110374132035, "grad_norm": 0.5603029131889343, "learning_rate": 3.830316765436598e-05, "loss": 0.2656, "step": 1526 }, { "epoch": 0.15825474142398177, "grad_norm": 0.4532327950000763, "learning_rate": 3.830046043975239e-05, "loss": 0.2201, "step": 1527 }, { "epoch": 0.1583583791066432, "grad_norm": 0.45034271478652954, "learning_rate": 3.8297751163071356e-05, "loss": 0.2703, "step": 1528 }, { "epoch": 0.1584620167893046, "grad_norm": 0.49351736903190613, "learning_rate": 3.829503982462817e-05, "loss": 0.2187, "step": 1529 }, { "epoch": 0.158565654471966, "grad_norm": 0.46013131737709045, "learning_rate": 3.8292326424728344e-05, "loss": 0.252, "step": 1530 }, { "epoch": 0.1586692921546274, "grad_norm": 0.4857384264469147, "learning_rate": 3.8289610963677605e-05, "loss": 0.2406, "step": 1531 }, { "epoch": 0.15877292983728883, "grad_norm": 0.38991162180900574, "learning_rate": 3.8286893441781935e-05, "loss": 0.1895, "step": 1532 }, { "epoch": 0.15887656751995025, "grad_norm": 0.435508668422699, "learning_rate": 3.828417385934754e-05, "loss": 0.2135, "step": 1533 }, { "epoch": 0.15898020520261166, "grad_norm": 0.5303035378456116, "learning_rate": 3.828145221668086e-05, "loss": 0.2434, "step": 1534 }, { "epoch": 0.15908384288527308, "grad_norm": 0.46581482887268066, "learning_rate": 3.827872851408856e-05, "loss": 0.2462, "step": 1535 }, { "epoch": 0.1591874805679345, "grad_norm": 0.5437631607055664, "learning_rate": 3.827600275187755e-05, "loss": 0.2885, "step": 1536 }, { "epoch": 0.15929111825059591, "grad_norm": 0.45981255173683167, "learning_rate": 3.8273274930354955e-05, "loss": 0.2248, "step": 1537 }, { "epoch": 0.15939475593325733, "grad_norm": 0.4518927037715912, "learning_rate": 3.827054504982815e-05, "loss": 0.2211, "step": 1538 }, { "epoch": 0.15949839361591875, "grad_norm": 0.44678646326065063, "learning_rate": 3.826781311060473e-05, "loss": 0.2235, "step": 1539 }, { "epoch": 0.15960203129858017, "grad_norm": 0.45804503560066223, "learning_rate": 3.8265079112992525e-05, "loss": 0.2477, "step": 1540 }, { "epoch": 0.15970566898124158, "grad_norm": 0.46019431948661804, "learning_rate": 3.82623430572996e-05, "loss": 0.2451, "step": 1541 }, { "epoch": 0.159809306663903, "grad_norm": 0.5183833837509155, "learning_rate": 3.825960494383426e-05, "loss": 0.2567, "step": 1542 }, { "epoch": 0.15991294434656442, "grad_norm": 0.49382057785987854, "learning_rate": 3.8256864772905006e-05, "loss": 0.2484, "step": 1543 }, { "epoch": 0.16001658202922583, "grad_norm": 0.4504323899745941, "learning_rate": 3.8254122544820615e-05, "loss": 0.2157, "step": 1544 }, { "epoch": 0.16012021971188725, "grad_norm": 0.46211907267570496, "learning_rate": 3.825137825989007e-05, "loss": 0.244, "step": 1545 }, { "epoch": 0.16022385739454867, "grad_norm": 0.4525067210197449, "learning_rate": 3.8248631918422595e-05, "loss": 0.2706, "step": 1546 }, { "epoch": 0.16032749507721009, "grad_norm": 0.42060381174087524, "learning_rate": 3.8245883520727646e-05, "loss": 0.217, "step": 1547 }, { "epoch": 0.1604311327598715, "grad_norm": 0.46641871333122253, "learning_rate": 3.82431330671149e-05, "loss": 0.2388, "step": 1548 }, { "epoch": 0.1605347704425329, "grad_norm": 0.45688316226005554, "learning_rate": 3.824038055789429e-05, "loss": 0.2185, "step": 1549 }, { "epoch": 0.1606384081251943, "grad_norm": 0.5488545298576355, "learning_rate": 3.823762599337595e-05, "loss": 0.2564, "step": 1550 }, { "epoch": 0.16074204580785573, "grad_norm": 0.4734223186969757, "learning_rate": 3.823486937387026e-05, "loss": 0.2141, "step": 1551 }, { "epoch": 0.16084568349051714, "grad_norm": 0.4606945514678955, "learning_rate": 3.8232110699687836e-05, "loss": 0.2681, "step": 1552 }, { "epoch": 0.16094932117317856, "grad_norm": 0.49000391364097595, "learning_rate": 3.822934997113953e-05, "loss": 0.252, "step": 1553 }, { "epoch": 0.16105295885583998, "grad_norm": 0.4744507074356079, "learning_rate": 3.82265871885364e-05, "loss": 0.248, "step": 1554 }, { "epoch": 0.1611565965385014, "grad_norm": 0.5346882939338684, "learning_rate": 3.822382235218975e-05, "loss": 0.3036, "step": 1555 }, { "epoch": 0.1612602342211628, "grad_norm": 0.4564545154571533, "learning_rate": 3.822105546241114e-05, "loss": 0.2214, "step": 1556 }, { "epoch": 0.16136387190382423, "grad_norm": 0.4706862270832062, "learning_rate": 3.821828651951232e-05, "loss": 0.2465, "step": 1557 }, { "epoch": 0.16146750958648565, "grad_norm": 0.41739633679389954, "learning_rate": 3.82155155238053e-05, "loss": 0.2372, "step": 1558 }, { "epoch": 0.16157114726914706, "grad_norm": 0.4622024595737457, "learning_rate": 3.82127424756023e-05, "loss": 0.2483, "step": 1559 }, { "epoch": 0.16167478495180848, "grad_norm": 0.42721524834632874, "learning_rate": 3.82099673752158e-05, "loss": 0.2631, "step": 1560 }, { "epoch": 0.1617784226344699, "grad_norm": 0.5179228782653809, "learning_rate": 3.8207190222958474e-05, "loss": 0.2988, "step": 1561 }, { "epoch": 0.16188206031713132, "grad_norm": 0.5048723816871643, "learning_rate": 3.820441101914327e-05, "loss": 0.2608, "step": 1562 }, { "epoch": 0.16198569799979273, "grad_norm": 0.49215126037597656, "learning_rate": 3.820162976408332e-05, "loss": 0.2485, "step": 1563 }, { "epoch": 0.16208933568245415, "grad_norm": 0.5016050934791565, "learning_rate": 3.819884645809203e-05, "loss": 0.2456, "step": 1564 }, { "epoch": 0.16219297336511557, "grad_norm": 0.42429667711257935, "learning_rate": 3.819606110148301e-05, "loss": 0.1731, "step": 1565 }, { "epoch": 0.16229661104777698, "grad_norm": 0.4097731411457062, "learning_rate": 3.8193273694570105e-05, "loss": 0.2605, "step": 1566 }, { "epoch": 0.1624002487304384, "grad_norm": 0.4741736948490143, "learning_rate": 3.819048423766741e-05, "loss": 0.2363, "step": 1567 }, { "epoch": 0.1625038864130998, "grad_norm": 0.4571591913700104, "learning_rate": 3.818769273108923e-05, "loss": 0.231, "step": 1568 }, { "epoch": 0.1626075240957612, "grad_norm": 0.41287946701049805, "learning_rate": 3.81848991751501e-05, "loss": 0.1934, "step": 1569 }, { "epoch": 0.16271116177842262, "grad_norm": 0.600933313369751, "learning_rate": 3.81821035701648e-05, "loss": 0.2542, "step": 1570 }, { "epoch": 0.16281479946108404, "grad_norm": 0.4297773540019989, "learning_rate": 3.817930591644834e-05, "loss": 0.2204, "step": 1571 }, { "epoch": 0.16291843714374546, "grad_norm": 0.43877971172332764, "learning_rate": 3.817650621431595e-05, "loss": 0.1925, "step": 1572 }, { "epoch": 0.16302207482640688, "grad_norm": 0.4170320928096771, "learning_rate": 3.817370446408309e-05, "loss": 0.1872, "step": 1573 }, { "epoch": 0.1631257125090683, "grad_norm": 0.5067045092582703, "learning_rate": 3.817090066606547e-05, "loss": 0.2353, "step": 1574 }, { "epoch": 0.1632293501917297, "grad_norm": 0.5097575187683105, "learning_rate": 3.8168094820579e-05, "loss": 0.2528, "step": 1575 }, { "epoch": 0.16333298787439113, "grad_norm": 0.49412232637405396, "learning_rate": 3.816528692793985e-05, "loss": 0.2501, "step": 1576 }, { "epoch": 0.16343662555705255, "grad_norm": 0.4716198444366455, "learning_rate": 3.816247698846441e-05, "loss": 0.2394, "step": 1577 }, { "epoch": 0.16354026323971396, "grad_norm": 0.5026634931564331, "learning_rate": 3.81596650024693e-05, "loss": 0.2521, "step": 1578 }, { "epoch": 0.16364390092237538, "grad_norm": 0.4558325409889221, "learning_rate": 3.815685097027137e-05, "loss": 0.2268, "step": 1579 }, { "epoch": 0.1637475386050368, "grad_norm": 0.4844086766242981, "learning_rate": 3.8154034892187685e-05, "loss": 0.242, "step": 1580 }, { "epoch": 0.1638511762876982, "grad_norm": 0.43945661187171936, "learning_rate": 3.8151216768535584e-05, "loss": 0.2156, "step": 1581 }, { "epoch": 0.16395481397035963, "grad_norm": 0.5005728006362915, "learning_rate": 3.8148396599632585e-05, "loss": 0.253, "step": 1582 }, { "epoch": 0.16405845165302105, "grad_norm": 0.5019499063491821, "learning_rate": 3.8145574385796475e-05, "loss": 0.2484, "step": 1583 }, { "epoch": 0.16416208933568247, "grad_norm": 0.4787789285182953, "learning_rate": 3.8142750127345244e-05, "loss": 0.2561, "step": 1584 }, { "epoch": 0.16426572701834388, "grad_norm": 0.49771633744239807, "learning_rate": 3.813992382459714e-05, "loss": 0.218, "step": 1585 }, { "epoch": 0.1643693647010053, "grad_norm": 0.5096080303192139, "learning_rate": 3.8137095477870616e-05, "loss": 0.2274, "step": 1586 }, { "epoch": 0.1644730023836667, "grad_norm": 0.46017125248908997, "learning_rate": 3.8134265087484364e-05, "loss": 0.2196, "step": 1587 }, { "epoch": 0.1645766400663281, "grad_norm": 0.42982977628707886, "learning_rate": 3.8131432653757315e-05, "loss": 0.2139, "step": 1588 }, { "epoch": 0.16468027774898952, "grad_norm": 0.447560578584671, "learning_rate": 3.812859817700862e-05, "loss": 0.228, "step": 1589 }, { "epoch": 0.16478391543165094, "grad_norm": 0.5123468041419983, "learning_rate": 3.812576165755767e-05, "loss": 0.2573, "step": 1590 }, { "epoch": 0.16488755311431236, "grad_norm": 0.5309221148490906, "learning_rate": 3.8122923095724064e-05, "loss": 0.2648, "step": 1591 }, { "epoch": 0.16499119079697377, "grad_norm": 0.4955211281776428, "learning_rate": 3.812008249182766e-05, "loss": 0.2401, "step": 1592 }, { "epoch": 0.1650948284796352, "grad_norm": 0.47056087851524353, "learning_rate": 3.811723984618853e-05, "loss": 0.2236, "step": 1593 }, { "epoch": 0.1651984661622966, "grad_norm": 0.49324530363082886, "learning_rate": 3.811439515912698e-05, "loss": 0.2231, "step": 1594 }, { "epoch": 0.16530210384495803, "grad_norm": 0.4553638994693756, "learning_rate": 3.811154843096354e-05, "loss": 0.215, "step": 1595 }, { "epoch": 0.16540574152761944, "grad_norm": 0.4878029525279999, "learning_rate": 3.810869966201898e-05, "loss": 0.2443, "step": 1596 }, { "epoch": 0.16550937921028086, "grad_norm": 0.49212896823883057, "learning_rate": 3.8105848852614286e-05, "loss": 0.2045, "step": 1597 }, { "epoch": 0.16561301689294228, "grad_norm": 0.42695632576942444, "learning_rate": 3.810299600307069e-05, "loss": 0.2011, "step": 1598 }, { "epoch": 0.1657166545756037, "grad_norm": 0.5269631743431091, "learning_rate": 3.810014111370966e-05, "loss": 0.2627, "step": 1599 }, { "epoch": 0.1658202922582651, "grad_norm": 0.48581254482269287, "learning_rate": 3.8097284184852853e-05, "loss": 0.2273, "step": 1600 }, { "epoch": 0.16592392994092653, "grad_norm": 0.523431122303009, "learning_rate": 3.80944252168222e-05, "loss": 0.2213, "step": 1601 }, { "epoch": 0.16602756762358795, "grad_norm": 0.5090680718421936, "learning_rate": 3.8091564209939834e-05, "loss": 0.2753, "step": 1602 }, { "epoch": 0.16613120530624936, "grad_norm": 0.5586525797843933, "learning_rate": 3.808870116452815e-05, "loss": 0.2552, "step": 1603 }, { "epoch": 0.16623484298891078, "grad_norm": 0.5396630764007568, "learning_rate": 3.808583608090974e-05, "loss": 0.2336, "step": 1604 }, { "epoch": 0.1663384806715722, "grad_norm": 0.5063744783401489, "learning_rate": 3.808296895940742e-05, "loss": 0.2239, "step": 1605 }, { "epoch": 0.1664421183542336, "grad_norm": 0.45250818133354187, "learning_rate": 3.808009980034428e-05, "loss": 0.244, "step": 1606 }, { "epoch": 0.166545756036895, "grad_norm": 0.4877474904060364, "learning_rate": 3.8077228604043595e-05, "loss": 0.2478, "step": 1607 }, { "epoch": 0.16664939371955642, "grad_norm": 0.460329532623291, "learning_rate": 3.8074355370828896e-05, "loss": 0.2195, "step": 1608 }, { "epoch": 0.16675303140221784, "grad_norm": 0.5120121240615845, "learning_rate": 3.807148010102393e-05, "loss": 0.2765, "step": 1609 }, { "epoch": 0.16685666908487926, "grad_norm": 0.40834420919418335, "learning_rate": 3.8068602794952675e-05, "loss": 0.2175, "step": 1610 }, { "epoch": 0.16696030676754067, "grad_norm": 0.44246426224708557, "learning_rate": 3.806572345293935e-05, "loss": 0.2393, "step": 1611 }, { "epoch": 0.1670639444502021, "grad_norm": 0.49997183680534363, "learning_rate": 3.806284207530839e-05, "loss": 0.2735, "step": 1612 }, { "epoch": 0.1671675821328635, "grad_norm": 0.5130741596221924, "learning_rate": 3.805995866238446e-05, "loss": 0.2287, "step": 1613 }, { "epoch": 0.16727121981552492, "grad_norm": 0.4857131242752075, "learning_rate": 3.805707321449247e-05, "loss": 0.2527, "step": 1614 }, { "epoch": 0.16737485749818634, "grad_norm": 0.5433309674263, "learning_rate": 3.8054185731957536e-05, "loss": 0.2963, "step": 1615 }, { "epoch": 0.16747849518084776, "grad_norm": 0.47748440504074097, "learning_rate": 3.805129621510502e-05, "loss": 0.1869, "step": 1616 }, { "epoch": 0.16758213286350918, "grad_norm": 0.5478339195251465, "learning_rate": 3.804840466426051e-05, "loss": 0.2652, "step": 1617 }, { "epoch": 0.1676857705461706, "grad_norm": 0.509493350982666, "learning_rate": 3.8045511079749816e-05, "loss": 0.243, "step": 1618 }, { "epoch": 0.167789408228832, "grad_norm": 0.5129976868629456, "learning_rate": 3.804261546189899e-05, "loss": 0.2909, "step": 1619 }, { "epoch": 0.16789304591149343, "grad_norm": 0.5227918028831482, "learning_rate": 3.803971781103429e-05, "loss": 0.2589, "step": 1620 }, { "epoch": 0.16799668359415484, "grad_norm": 0.4164186418056488, "learning_rate": 3.803681812748224e-05, "loss": 0.2058, "step": 1621 }, { "epoch": 0.16810032127681626, "grad_norm": 0.3985108733177185, "learning_rate": 3.803391641156956e-05, "loss": 0.2003, "step": 1622 }, { "epoch": 0.16820395895947768, "grad_norm": 0.5249270796775818, "learning_rate": 3.803101266362321e-05, "loss": 0.2415, "step": 1623 }, { "epoch": 0.16830759664213907, "grad_norm": 0.4701800048351288, "learning_rate": 3.8028106883970386e-05, "loss": 0.2596, "step": 1624 }, { "epoch": 0.16841123432480049, "grad_norm": 0.5113517642021179, "learning_rate": 3.80251990729385e-05, "loss": 0.2563, "step": 1625 }, { "epoch": 0.1685148720074619, "grad_norm": 0.5005125999450684, "learning_rate": 3.802228923085522e-05, "loss": 0.2691, "step": 1626 }, { "epoch": 0.16861850969012332, "grad_norm": 0.4985792338848114, "learning_rate": 3.801937735804838e-05, "loss": 0.2418, "step": 1627 }, { "epoch": 0.16872214737278474, "grad_norm": 0.4471662938594818, "learning_rate": 3.8016463454846125e-05, "loss": 0.213, "step": 1628 }, { "epoch": 0.16882578505544615, "grad_norm": 0.5097355842590332, "learning_rate": 3.801354752157678e-05, "loss": 0.3001, "step": 1629 }, { "epoch": 0.16892942273810757, "grad_norm": 0.466145783662796, "learning_rate": 3.8010629558568895e-05, "loss": 0.2276, "step": 1630 }, { "epoch": 0.169033060420769, "grad_norm": 0.472263902425766, "learning_rate": 3.800770956615127e-05, "loss": 0.2542, "step": 1631 }, { "epoch": 0.1691366981034304, "grad_norm": 0.5002596974372864, "learning_rate": 3.800478754465292e-05, "loss": 0.233, "step": 1632 }, { "epoch": 0.16924033578609182, "grad_norm": 0.4746779501438141, "learning_rate": 3.800186349440311e-05, "loss": 0.2507, "step": 1633 }, { "epoch": 0.16934397346875324, "grad_norm": 0.47505295276641846, "learning_rate": 3.79989374157313e-05, "loss": 0.2394, "step": 1634 }, { "epoch": 0.16944761115141466, "grad_norm": 0.4893549680709839, "learning_rate": 3.79960093089672e-05, "loss": 0.2677, "step": 1635 }, { "epoch": 0.16955124883407607, "grad_norm": 0.41501083970069885, "learning_rate": 3.799307917444075e-05, "loss": 0.2226, "step": 1636 }, { "epoch": 0.1696548865167375, "grad_norm": 0.5570014119148254, "learning_rate": 3.7990147012482104e-05, "loss": 0.267, "step": 1637 }, { "epoch": 0.1697585241993989, "grad_norm": 0.41493335366249084, "learning_rate": 3.798721282342167e-05, "loss": 0.241, "step": 1638 }, { "epoch": 0.16986216188206033, "grad_norm": 0.5655845403671265, "learning_rate": 3.7984276607590044e-05, "loss": 0.2715, "step": 1639 }, { "epoch": 0.16996579956472174, "grad_norm": 0.40720781683921814, "learning_rate": 3.798133836531809e-05, "loss": 0.1749, "step": 1640 }, { "epoch": 0.17006943724738316, "grad_norm": 0.46933940052986145, "learning_rate": 3.7978398096936887e-05, "loss": 0.2353, "step": 1641 }, { "epoch": 0.17017307493004458, "grad_norm": 0.4930836856365204, "learning_rate": 3.797545580277773e-05, "loss": 0.2269, "step": 1642 }, { "epoch": 0.17027671261270597, "grad_norm": 0.4270491302013397, "learning_rate": 3.7972511483172157e-05, "loss": 0.2244, "step": 1643 }, { "epoch": 0.17038035029536738, "grad_norm": 0.49993693828582764, "learning_rate": 3.7969565138451934e-05, "loss": 0.262, "step": 1644 }, { "epoch": 0.1704839879780288, "grad_norm": 0.4708077609539032, "learning_rate": 3.796661676894903e-05, "loss": 0.221, "step": 1645 }, { "epoch": 0.17058762566069022, "grad_norm": 0.39685335755348206, "learning_rate": 3.79636663749957e-05, "loss": 0.1984, "step": 1646 }, { "epoch": 0.17069126334335163, "grad_norm": 0.5080126523971558, "learning_rate": 3.796071395692435e-05, "loss": 0.2375, "step": 1647 }, { "epoch": 0.17079490102601305, "grad_norm": 0.5168275237083435, "learning_rate": 3.7957759515067676e-05, "loss": 0.2201, "step": 1648 }, { "epoch": 0.17089853870867447, "grad_norm": 0.47243815660476685, "learning_rate": 3.7954803049758584e-05, "loss": 0.2194, "step": 1649 }, { "epoch": 0.17100217639133589, "grad_norm": 0.4236190617084503, "learning_rate": 3.795184456133019e-05, "loss": 0.2056, "step": 1650 }, { "epoch": 0.1711058140739973, "grad_norm": 0.47571811079978943, "learning_rate": 3.794888405011586e-05, "loss": 0.2113, "step": 1651 }, { "epoch": 0.17120945175665872, "grad_norm": 0.4334424138069153, "learning_rate": 3.794592151644917e-05, "loss": 0.2364, "step": 1652 }, { "epoch": 0.17131308943932014, "grad_norm": 0.522921085357666, "learning_rate": 3.794295696066395e-05, "loss": 0.2501, "step": 1653 }, { "epoch": 0.17141672712198155, "grad_norm": 0.4549519717693329, "learning_rate": 3.793999038309423e-05, "loss": 0.2209, "step": 1654 }, { "epoch": 0.17152036480464297, "grad_norm": 0.4927518963813782, "learning_rate": 3.793702178407427e-05, "loss": 0.2683, "step": 1655 }, { "epoch": 0.1716240024873044, "grad_norm": 0.42857837677001953, "learning_rate": 3.79340511639386e-05, "loss": 0.2232, "step": 1656 }, { "epoch": 0.1717276401699658, "grad_norm": 0.4798828661441803, "learning_rate": 3.7931078523021906e-05, "loss": 0.2271, "step": 1657 }, { "epoch": 0.17183127785262722, "grad_norm": 0.48226112127304077, "learning_rate": 3.792810386165917e-05, "loss": 0.2419, "step": 1658 }, { "epoch": 0.17193491553528864, "grad_norm": 0.4466378390789032, "learning_rate": 3.792512718018555e-05, "loss": 0.2011, "step": 1659 }, { "epoch": 0.17203855321795006, "grad_norm": 0.41471484303474426, "learning_rate": 3.7922148478936476e-05, "loss": 0.2134, "step": 1660 }, { "epoch": 0.17214219090061147, "grad_norm": 0.4517219364643097, "learning_rate": 3.791916775824757e-05, "loss": 0.2131, "step": 1661 }, { "epoch": 0.17224582858327286, "grad_norm": 0.44317197799682617, "learning_rate": 3.791618501845469e-05, "loss": 0.2009, "step": 1662 }, { "epoch": 0.17234946626593428, "grad_norm": 0.5028525590896606, "learning_rate": 3.791320025989394e-05, "loss": 0.2398, "step": 1663 }, { "epoch": 0.1724531039485957, "grad_norm": 0.46632954478263855, "learning_rate": 3.7910213482901625e-05, "loss": 0.1982, "step": 1664 }, { "epoch": 0.17255674163125712, "grad_norm": 0.5463936924934387, "learning_rate": 3.79072246878143e-05, "loss": 0.267, "step": 1665 }, { "epoch": 0.17266037931391853, "grad_norm": 0.5574196577072144, "learning_rate": 3.7904233874968737e-05, "loss": 0.3005, "step": 1666 }, { "epoch": 0.17276401699657995, "grad_norm": 0.5142245888710022, "learning_rate": 3.7901241044701934e-05, "loss": 0.2538, "step": 1667 }, { "epoch": 0.17286765467924137, "grad_norm": 0.3882453739643097, "learning_rate": 3.789824619735111e-05, "loss": 0.1806, "step": 1668 }, { "epoch": 0.17297129236190278, "grad_norm": 0.426973432302475, "learning_rate": 3.789524933325373e-05, "loss": 0.2028, "step": 1669 }, { "epoch": 0.1730749300445642, "grad_norm": 0.49242454767227173, "learning_rate": 3.789225045274748e-05, "loss": 0.2358, "step": 1670 }, { "epoch": 0.17317856772722562, "grad_norm": 0.43440622091293335, "learning_rate": 3.788924955617026e-05, "loss": 0.2029, "step": 1671 }, { "epoch": 0.17328220540988704, "grad_norm": 0.44206124544143677, "learning_rate": 3.7886246643860205e-05, "loss": 0.2188, "step": 1672 }, { "epoch": 0.17338584309254845, "grad_norm": 0.45106032490730286, "learning_rate": 3.788324171615569e-05, "loss": 0.2165, "step": 1673 }, { "epoch": 0.17348948077520987, "grad_norm": 0.4641522765159607, "learning_rate": 3.78802347733953e-05, "loss": 0.2457, "step": 1674 }, { "epoch": 0.1735931184578713, "grad_norm": 0.5247939229011536, "learning_rate": 3.787722581591784e-05, "loss": 0.2566, "step": 1675 }, { "epoch": 0.1736967561405327, "grad_norm": 0.4361657202243805, "learning_rate": 3.787421484406238e-05, "loss": 0.2386, "step": 1676 }, { "epoch": 0.17380039382319412, "grad_norm": 0.4735523760318756, "learning_rate": 3.7871201858168165e-05, "loss": 0.2391, "step": 1677 }, { "epoch": 0.17390403150585554, "grad_norm": 0.48238077759742737, "learning_rate": 3.786818685857471e-05, "loss": 0.2358, "step": 1678 }, { "epoch": 0.17400766918851696, "grad_norm": 0.47058871388435364, "learning_rate": 3.786516984562174e-05, "loss": 0.2377, "step": 1679 }, { "epoch": 0.17411130687117837, "grad_norm": 0.429928183555603, "learning_rate": 3.78621508196492e-05, "loss": 0.2096, "step": 1680 }, { "epoch": 0.17421494455383976, "grad_norm": 0.5752142667770386, "learning_rate": 3.7859129780997274e-05, "loss": 0.2683, "step": 1681 }, { "epoch": 0.17431858223650118, "grad_norm": 0.45631688833236694, "learning_rate": 3.785610673000637e-05, "loss": 0.2206, "step": 1682 }, { "epoch": 0.1744222199191626, "grad_norm": 0.43998152017593384, "learning_rate": 3.7853081667017114e-05, "loss": 0.2012, "step": 1683 }, { "epoch": 0.174525857601824, "grad_norm": 0.40259674191474915, "learning_rate": 3.7850054592370363e-05, "loss": 0.2024, "step": 1684 }, { "epoch": 0.17462949528448543, "grad_norm": 0.4920617341995239, "learning_rate": 3.784702550640722e-05, "loss": 0.2325, "step": 1685 }, { "epoch": 0.17473313296714685, "grad_norm": 0.46443817019462585, "learning_rate": 3.7843994409468984e-05, "loss": 0.2327, "step": 1686 }, { "epoch": 0.17483677064980827, "grad_norm": 0.48127278685569763, "learning_rate": 3.784096130189719e-05, "loss": 0.2373, "step": 1687 }, { "epoch": 0.17494040833246968, "grad_norm": 0.4313235282897949, "learning_rate": 3.783792618403362e-05, "loss": 0.2353, "step": 1688 }, { "epoch": 0.1750440460151311, "grad_norm": 0.4920996427536011, "learning_rate": 3.783488905622025e-05, "loss": 0.271, "step": 1689 }, { "epoch": 0.17514768369779252, "grad_norm": 0.40804946422576904, "learning_rate": 3.783184991879931e-05, "loss": 0.2022, "step": 1690 }, { "epoch": 0.17525132138045393, "grad_norm": 0.49815499782562256, "learning_rate": 3.782880877211324e-05, "loss": 0.2574, "step": 1691 }, { "epoch": 0.17535495906311535, "grad_norm": 0.4492497444152832, "learning_rate": 3.782576561650471e-05, "loss": 0.2049, "step": 1692 }, { "epoch": 0.17545859674577677, "grad_norm": 0.4622615873813629, "learning_rate": 3.7822720452316625e-05, "loss": 0.2345, "step": 1693 }, { "epoch": 0.17556223442843819, "grad_norm": 0.4740571081638336, "learning_rate": 3.781967327989211e-05, "loss": 0.2069, "step": 1694 }, { "epoch": 0.1756658721110996, "grad_norm": 0.5480529069900513, "learning_rate": 3.78166240995745e-05, "loss": 0.2657, "step": 1695 }, { "epoch": 0.17576950979376102, "grad_norm": 0.47912469506263733, "learning_rate": 3.781357291170739e-05, "loss": 0.2428, "step": 1696 }, { "epoch": 0.17587314747642244, "grad_norm": 0.4989534318447113, "learning_rate": 3.7810519716634575e-05, "loss": 0.2095, "step": 1697 }, { "epoch": 0.17597678515908385, "grad_norm": 0.5266689658164978, "learning_rate": 3.780746451470008e-05, "loss": 0.2175, "step": 1698 }, { "epoch": 0.17608042284174527, "grad_norm": 0.4845711886882782, "learning_rate": 3.7804407306248177e-05, "loss": 0.2, "step": 1699 }, { "epoch": 0.17618406052440666, "grad_norm": 0.45821481943130493, "learning_rate": 3.780134809162332e-05, "loss": 0.2165, "step": 1700 }, { "epoch": 0.17628769820706808, "grad_norm": 0.4305170476436615, "learning_rate": 3.779828687117025e-05, "loss": 0.202, "step": 1701 }, { "epoch": 0.1763913358897295, "grad_norm": 0.4545448422431946, "learning_rate": 3.7795223645233876e-05, "loss": 0.2185, "step": 1702 }, { "epoch": 0.1764949735723909, "grad_norm": 0.5075645446777344, "learning_rate": 3.779215841415936e-05, "loss": 0.2546, "step": 1703 }, { "epoch": 0.17659861125505233, "grad_norm": 0.573638916015625, "learning_rate": 3.77890911782921e-05, "loss": 0.2826, "step": 1704 }, { "epoch": 0.17670224893771375, "grad_norm": 0.5099818110466003, "learning_rate": 3.7786021937977694e-05, "loss": 0.2561, "step": 1705 }, { "epoch": 0.17680588662037516, "grad_norm": 0.5081855654716492, "learning_rate": 3.778295069356199e-05, "loss": 0.2702, "step": 1706 }, { "epoch": 0.17690952430303658, "grad_norm": 0.49809956550598145, "learning_rate": 3.777987744539104e-05, "loss": 0.2416, "step": 1707 }, { "epoch": 0.177013161985698, "grad_norm": 0.45424211025238037, "learning_rate": 3.7776802193811146e-05, "loss": 0.1965, "step": 1708 }, { "epoch": 0.17711679966835941, "grad_norm": 0.5301138162612915, "learning_rate": 3.777372493916881e-05, "loss": 0.2866, "step": 1709 }, { "epoch": 0.17722043735102083, "grad_norm": 0.5104231834411621, "learning_rate": 3.7770645681810786e-05, "loss": 0.2571, "step": 1710 }, { "epoch": 0.17732407503368225, "grad_norm": 0.5096403360366821, "learning_rate": 3.776756442208402e-05, "loss": 0.2279, "step": 1711 }, { "epoch": 0.17742771271634367, "grad_norm": 0.46347206830978394, "learning_rate": 3.776448116033572e-05, "loss": 0.1851, "step": 1712 }, { "epoch": 0.17753135039900508, "grad_norm": 0.5343012809753418, "learning_rate": 3.77613958969133e-05, "loss": 0.2606, "step": 1713 }, { "epoch": 0.1776349880816665, "grad_norm": 0.5330291390419006, "learning_rate": 3.77583086321644e-05, "loss": 0.2886, "step": 1714 }, { "epoch": 0.17773862576432792, "grad_norm": 0.619606614112854, "learning_rate": 3.775521936643689e-05, "loss": 0.2602, "step": 1715 }, { "epoch": 0.17784226344698933, "grad_norm": 0.38278481364250183, "learning_rate": 3.775212810007886e-05, "loss": 0.1895, "step": 1716 }, { "epoch": 0.17794590112965075, "grad_norm": 0.5417003035545349, "learning_rate": 3.774903483343863e-05, "loss": 0.2846, "step": 1717 }, { "epoch": 0.17804953881231217, "grad_norm": 0.5265429615974426, "learning_rate": 3.774593956686475e-05, "loss": 0.2595, "step": 1718 }, { "epoch": 0.17815317649497356, "grad_norm": 0.45582106709480286, "learning_rate": 3.774284230070599e-05, "loss": 0.2193, "step": 1719 }, { "epoch": 0.17825681417763498, "grad_norm": 0.4111036956310272, "learning_rate": 3.773974303531134e-05, "loss": 0.1962, "step": 1720 }, { "epoch": 0.1783604518602964, "grad_norm": 0.48741012811660767, "learning_rate": 3.7736641771030015e-05, "loss": 0.2775, "step": 1721 }, { "epoch": 0.1784640895429578, "grad_norm": 0.4571603238582611, "learning_rate": 3.773353850821147e-05, "loss": 0.2303, "step": 1722 }, { "epoch": 0.17856772722561923, "grad_norm": 0.4960167706012726, "learning_rate": 3.773043324720537e-05, "loss": 0.2443, "step": 1723 }, { "epoch": 0.17867136490828064, "grad_norm": 0.49686622619628906, "learning_rate": 3.772732598836163e-05, "loss": 0.2539, "step": 1724 }, { "epoch": 0.17877500259094206, "grad_norm": 0.5505495071411133, "learning_rate": 3.772421673203034e-05, "loss": 0.2692, "step": 1725 }, { "epoch": 0.17887864027360348, "grad_norm": 0.5004148483276367, "learning_rate": 3.7721105478561866e-05, "loss": 0.2403, "step": 1726 }, { "epoch": 0.1789822779562649, "grad_norm": 0.515392541885376, "learning_rate": 3.771799222830677e-05, "loss": 0.2311, "step": 1727 }, { "epoch": 0.1790859156389263, "grad_norm": 0.5341514348983765, "learning_rate": 3.7714876981615866e-05, "loss": 0.2478, "step": 1728 }, { "epoch": 0.17918955332158773, "grad_norm": 0.41764670610427856, "learning_rate": 3.771175973884014e-05, "loss": 0.1995, "step": 1729 }, { "epoch": 0.17929319100424915, "grad_norm": 0.4541582763195038, "learning_rate": 3.770864050033088e-05, "loss": 0.2117, "step": 1730 }, { "epoch": 0.17939682868691056, "grad_norm": 0.463843435049057, "learning_rate": 3.770551926643953e-05, "loss": 0.2273, "step": 1731 }, { "epoch": 0.17950046636957198, "grad_norm": 0.45743948221206665, "learning_rate": 3.7702396037517795e-05, "loss": 0.2495, "step": 1732 }, { "epoch": 0.1796041040522334, "grad_norm": 0.4244634509086609, "learning_rate": 3.769927081391759e-05, "loss": 0.1787, "step": 1733 }, { "epoch": 0.17970774173489482, "grad_norm": 0.4777889549732208, "learning_rate": 3.769614359599106e-05, "loss": 0.2215, "step": 1734 }, { "epoch": 0.17981137941755623, "grad_norm": 0.44900938868522644, "learning_rate": 3.769301438409059e-05, "loss": 0.2304, "step": 1735 }, { "epoch": 0.17991501710021765, "grad_norm": 0.5480301380157471, "learning_rate": 3.7689883178568755e-05, "loss": 0.2723, "step": 1736 }, { "epoch": 0.18001865478287907, "grad_norm": 0.47610998153686523, "learning_rate": 3.7686749979778386e-05, "loss": 0.2481, "step": 1737 }, { "epoch": 0.18012229246554046, "grad_norm": 0.5337226390838623, "learning_rate": 3.7683614788072527e-05, "loss": 0.236, "step": 1738 }, { "epoch": 0.18022593014820187, "grad_norm": 0.4076583981513977, "learning_rate": 3.768047760380444e-05, "loss": 0.2005, "step": 1739 }, { "epoch": 0.1803295678308633, "grad_norm": 0.46692726016044617, "learning_rate": 3.767733842732762e-05, "loss": 0.2354, "step": 1740 }, { "epoch": 0.1804332055135247, "grad_norm": 0.4673640727996826, "learning_rate": 3.7674197258995785e-05, "loss": 0.2642, "step": 1741 }, { "epoch": 0.18053684319618613, "grad_norm": 0.47768813371658325, "learning_rate": 3.767105409916288e-05, "loss": 0.2261, "step": 1742 }, { "epoch": 0.18064048087884754, "grad_norm": 0.4129585921764374, "learning_rate": 3.7667908948183075e-05, "loss": 0.1918, "step": 1743 }, { "epoch": 0.18074411856150896, "grad_norm": 0.4703997075557709, "learning_rate": 3.7664761806410744e-05, "loss": 0.2537, "step": 1744 }, { "epoch": 0.18084775624417038, "grad_norm": 0.5203202366828918, "learning_rate": 3.766161267420052e-05, "loss": 0.2438, "step": 1745 }, { "epoch": 0.1809513939268318, "grad_norm": 0.5779595375061035, "learning_rate": 3.765846155190723e-05, "loss": 0.2892, "step": 1746 }, { "epoch": 0.1810550316094932, "grad_norm": 0.6186638474464417, "learning_rate": 3.765530843988595e-05, "loss": 0.2868, "step": 1747 }, { "epoch": 0.18115866929215463, "grad_norm": 0.5214221477508545, "learning_rate": 3.765215333849196e-05, "loss": 0.2815, "step": 1748 }, { "epoch": 0.18126230697481605, "grad_norm": 0.4598151445388794, "learning_rate": 3.7648996248080765e-05, "loss": 0.2439, "step": 1749 }, { "epoch": 0.18136594465747746, "grad_norm": 0.5000353455543518, "learning_rate": 3.764583716900812e-05, "loss": 0.2499, "step": 1750 }, { "epoch": 0.18146958234013888, "grad_norm": 0.44633814692497253, "learning_rate": 3.764267610162996e-05, "loss": 0.2367, "step": 1751 }, { "epoch": 0.1815732200228003, "grad_norm": 0.5109118819236755, "learning_rate": 3.763951304630249e-05, "loss": 0.2787, "step": 1752 }, { "epoch": 0.18167685770546171, "grad_norm": 0.47466611862182617, "learning_rate": 3.763634800338211e-05, "loss": 0.218, "step": 1753 }, { "epoch": 0.18178049538812313, "grad_norm": 0.4710037112236023, "learning_rate": 3.763318097322546e-05, "loss": 0.2761, "step": 1754 }, { "epoch": 0.18188413307078455, "grad_norm": 0.4349900484085083, "learning_rate": 3.7630011956189386e-05, "loss": 0.1997, "step": 1755 }, { "epoch": 0.18198777075344597, "grad_norm": 0.5574545860290527, "learning_rate": 3.7626840952630966e-05, "loss": 0.2889, "step": 1756 }, { "epoch": 0.18209140843610735, "grad_norm": 0.405887246131897, "learning_rate": 3.762366796290751e-05, "loss": 0.1968, "step": 1757 }, { "epoch": 0.18219504611876877, "grad_norm": 0.44126248359680176, "learning_rate": 3.7620492987376544e-05, "loss": 0.2313, "step": 1758 }, { "epoch": 0.1822986838014302, "grad_norm": 0.448207288980484, "learning_rate": 3.7617316026395824e-05, "loss": 0.2524, "step": 1759 }, { "epoch": 0.1824023214840916, "grad_norm": 0.4973071217536926, "learning_rate": 3.761413708032332e-05, "loss": 0.244, "step": 1760 }, { "epoch": 0.18250595916675302, "grad_norm": 0.4090721607208252, "learning_rate": 3.7610956149517235e-05, "loss": 0.216, "step": 1761 }, { "epoch": 0.18260959684941444, "grad_norm": 0.44438639283180237, "learning_rate": 3.7607773234335984e-05, "loss": 0.2264, "step": 1762 }, { "epoch": 0.18271323453207586, "grad_norm": 0.5119732022285461, "learning_rate": 3.760458833513821e-05, "loss": 0.2179, "step": 1763 }, { "epoch": 0.18281687221473727, "grad_norm": 0.4416314661502838, "learning_rate": 3.7601401452282795e-05, "loss": 0.2009, "step": 1764 }, { "epoch": 0.1829205098973987, "grad_norm": 0.5023295283317566, "learning_rate": 3.759821258612883e-05, "loss": 0.272, "step": 1765 }, { "epoch": 0.1830241475800601, "grad_norm": 0.4766654074192047, "learning_rate": 3.759502173703562e-05, "loss": 0.2325, "step": 1766 }, { "epoch": 0.18312778526272153, "grad_norm": 0.49825185537338257, "learning_rate": 3.7591828905362724e-05, "loss": 0.2496, "step": 1767 }, { "epoch": 0.18323142294538294, "grad_norm": 0.4586202800273895, "learning_rate": 3.758863409146988e-05, "loss": 0.2415, "step": 1768 }, { "epoch": 0.18333506062804436, "grad_norm": 0.5108445286750793, "learning_rate": 3.75854372957171e-05, "loss": 0.2488, "step": 1769 }, { "epoch": 0.18343869831070578, "grad_norm": 0.5143339037895203, "learning_rate": 3.7582238518464576e-05, "loss": 0.238, "step": 1770 }, { "epoch": 0.1835423359933672, "grad_norm": 0.4804529845714569, "learning_rate": 3.757903776007275e-05, "loss": 0.2545, "step": 1771 }, { "epoch": 0.1836459736760286, "grad_norm": 0.4834483861923218, "learning_rate": 3.7575835020902275e-05, "loss": 0.2442, "step": 1772 }, { "epoch": 0.18374961135869003, "grad_norm": 0.462864488363266, "learning_rate": 3.7572630301314036e-05, "loss": 0.2142, "step": 1773 }, { "epoch": 0.18385324904135145, "grad_norm": 0.493757963180542, "learning_rate": 3.756942360166913e-05, "loss": 0.2236, "step": 1774 }, { "epoch": 0.18395688672401286, "grad_norm": 0.5044764280319214, "learning_rate": 3.756621492232888e-05, "loss": 0.2864, "step": 1775 }, { "epoch": 0.18406052440667425, "grad_norm": 0.4586033225059509, "learning_rate": 3.756300426365485e-05, "loss": 0.2048, "step": 1776 }, { "epoch": 0.18416416208933567, "grad_norm": 0.4786682426929474, "learning_rate": 3.7559791626008795e-05, "loss": 0.2639, "step": 1777 }, { "epoch": 0.1842677997719971, "grad_norm": 0.5083273649215698, "learning_rate": 3.755657700975272e-05, "loss": 0.2491, "step": 1778 }, { "epoch": 0.1843714374546585, "grad_norm": 0.46496713161468506, "learning_rate": 3.755336041524883e-05, "loss": 0.225, "step": 1779 }, { "epoch": 0.18447507513731992, "grad_norm": 0.45612651109695435, "learning_rate": 3.7550141842859586e-05, "loss": 0.215, "step": 1780 }, { "epoch": 0.18457871281998134, "grad_norm": 0.41664206981658936, "learning_rate": 3.754692129294764e-05, "loss": 0.2182, "step": 1781 }, { "epoch": 0.18468235050264276, "grad_norm": 0.5147910714149475, "learning_rate": 3.7543698765875873e-05, "loss": 0.2259, "step": 1782 }, { "epoch": 0.18478598818530417, "grad_norm": 0.4890103340148926, "learning_rate": 3.754047426200741e-05, "loss": 0.2464, "step": 1783 }, { "epoch": 0.1848896258679656, "grad_norm": 0.4901570975780487, "learning_rate": 3.753724778170557e-05, "loss": 0.2265, "step": 1784 }, { "epoch": 0.184993263550627, "grad_norm": 0.49182382225990295, "learning_rate": 3.753401932533391e-05, "loss": 0.2065, "step": 1785 }, { "epoch": 0.18509690123328842, "grad_norm": 0.47069478034973145, "learning_rate": 3.7530788893256217e-05, "loss": 0.2427, "step": 1786 }, { "epoch": 0.18520053891594984, "grad_norm": 0.4829593598842621, "learning_rate": 3.752755648583648e-05, "loss": 0.1938, "step": 1787 }, { "epoch": 0.18530417659861126, "grad_norm": 0.5271267294883728, "learning_rate": 3.752432210343893e-05, "loss": 0.2727, "step": 1788 }, { "epoch": 0.18540781428127268, "grad_norm": 0.49652940034866333, "learning_rate": 3.752108574642799e-05, "loss": 0.2396, "step": 1789 }, { "epoch": 0.1855114519639341, "grad_norm": 0.43199917674064636, "learning_rate": 3.7517847415168365e-05, "loss": 0.2286, "step": 1790 }, { "epoch": 0.1856150896465955, "grad_norm": 0.4996117055416107, "learning_rate": 3.751460711002492e-05, "loss": 0.2406, "step": 1791 }, { "epoch": 0.18571872732925693, "grad_norm": 0.5022050142288208, "learning_rate": 3.7511364831362766e-05, "loss": 0.2317, "step": 1792 }, { "epoch": 0.18582236501191834, "grad_norm": 0.44132325053215027, "learning_rate": 3.750812057954725e-05, "loss": 0.2145, "step": 1793 }, { "epoch": 0.18592600269457976, "grad_norm": 0.5774767994880676, "learning_rate": 3.750487435494392e-05, "loss": 0.3222, "step": 1794 }, { "epoch": 0.18602964037724115, "grad_norm": 0.49793311953544617, "learning_rate": 3.7501626157918564e-05, "loss": 0.2787, "step": 1795 }, { "epoch": 0.18613327805990257, "grad_norm": 0.469102680683136, "learning_rate": 3.749837598883718e-05, "loss": 0.2617, "step": 1796 }, { "epoch": 0.18623691574256399, "grad_norm": 0.5038089156150818, "learning_rate": 3.7495123848065984e-05, "loss": 0.2302, "step": 1797 }, { "epoch": 0.1863405534252254, "grad_norm": 0.5174588561058044, "learning_rate": 3.749186973597144e-05, "loss": 0.2199, "step": 1798 }, { "epoch": 0.18644419110788682, "grad_norm": 0.44706490635871887, "learning_rate": 3.74886136529202e-05, "loss": 0.247, "step": 1799 }, { "epoch": 0.18654782879054824, "grad_norm": 0.5332117080688477, "learning_rate": 3.748535559927916e-05, "loss": 0.2707, "step": 1800 }, { "epoch": 0.18665146647320965, "grad_norm": 0.45966261625289917, "learning_rate": 3.748209557541543e-05, "loss": 0.2536, "step": 1801 }, { "epoch": 0.18675510415587107, "grad_norm": 0.4928770959377289, "learning_rate": 3.7478833581696354e-05, "loss": 0.2439, "step": 1802 }, { "epoch": 0.1868587418385325, "grad_norm": 0.48167261481285095, "learning_rate": 3.747556961848948e-05, "loss": 0.2288, "step": 1803 }, { "epoch": 0.1869623795211939, "grad_norm": 0.5721211433410645, "learning_rate": 3.747230368616258e-05, "loss": 0.2477, "step": 1804 }, { "epoch": 0.18706601720385532, "grad_norm": 0.4811793565750122, "learning_rate": 3.746903578508367e-05, "loss": 0.1919, "step": 1805 }, { "epoch": 0.18716965488651674, "grad_norm": 0.4622481167316437, "learning_rate": 3.746576591562096e-05, "loss": 0.2346, "step": 1806 }, { "epoch": 0.18727329256917816, "grad_norm": 0.4700213372707367, "learning_rate": 3.74624940781429e-05, "loss": 0.2085, "step": 1807 }, { "epoch": 0.18737693025183957, "grad_norm": 0.4065195918083191, "learning_rate": 3.745922027301814e-05, "loss": 0.1784, "step": 1808 }, { "epoch": 0.187480567934501, "grad_norm": 0.4072580635547638, "learning_rate": 3.74559445006156e-05, "loss": 0.1962, "step": 1809 }, { "epoch": 0.1875842056171624, "grad_norm": 0.4518824815750122, "learning_rate": 3.7452666761304365e-05, "loss": 0.2514, "step": 1810 }, { "epoch": 0.18768784329982383, "grad_norm": 0.46154463291168213, "learning_rate": 3.744938705545377e-05, "loss": 0.266, "step": 1811 }, { "epoch": 0.18779148098248524, "grad_norm": 0.4089922308921814, "learning_rate": 3.7446105383433364e-05, "loss": 0.2019, "step": 1812 }, { "epoch": 0.18789511866514666, "grad_norm": 0.4270873963832855, "learning_rate": 3.744282174561292e-05, "loss": 0.2124, "step": 1813 }, { "epoch": 0.18799875634780805, "grad_norm": 0.46813175082206726, "learning_rate": 3.743953614236244e-05, "loss": 0.2316, "step": 1814 }, { "epoch": 0.18810239403046947, "grad_norm": 0.4075770378112793, "learning_rate": 3.743624857405214e-05, "loss": 0.2163, "step": 1815 }, { "epoch": 0.18820603171313088, "grad_norm": 0.5519664287567139, "learning_rate": 3.7432959041052455e-05, "loss": 0.2886, "step": 1816 }, { "epoch": 0.1883096693957923, "grad_norm": 0.4090479016304016, "learning_rate": 3.7429667543734045e-05, "loss": 0.1961, "step": 1817 }, { "epoch": 0.18841330707845372, "grad_norm": 0.4551500380039215, "learning_rate": 3.742637408246779e-05, "loss": 0.2134, "step": 1818 }, { "epoch": 0.18851694476111513, "grad_norm": 0.5673427581787109, "learning_rate": 3.742307865762479e-05, "loss": 0.2555, "step": 1819 }, { "epoch": 0.18862058244377655, "grad_norm": 0.42178842425346375, "learning_rate": 3.741978126957638e-05, "loss": 0.2362, "step": 1820 }, { "epoch": 0.18872422012643797, "grad_norm": 0.5114960074424744, "learning_rate": 3.7416481918694094e-05, "loss": 0.2224, "step": 1821 }, { "epoch": 0.1888278578090994, "grad_norm": 0.44898855686187744, "learning_rate": 3.74131806053497e-05, "loss": 0.2132, "step": 1822 }, { "epoch": 0.1889314954917608, "grad_norm": 0.5565721988677979, "learning_rate": 3.740987732991518e-05, "loss": 0.2961, "step": 1823 }, { "epoch": 0.18903513317442222, "grad_norm": 0.5278633236885071, "learning_rate": 3.7406572092762744e-05, "loss": 0.2566, "step": 1824 }, { "epoch": 0.18913877085708364, "grad_norm": 0.48794278502464294, "learning_rate": 3.7403264894264836e-05, "loss": 0.2649, "step": 1825 }, { "epoch": 0.18924240853974506, "grad_norm": 0.5211032032966614, "learning_rate": 3.739995573479408e-05, "loss": 0.2638, "step": 1826 }, { "epoch": 0.18934604622240647, "grad_norm": 0.5264724493026733, "learning_rate": 3.7396644614723374e-05, "loss": 0.2508, "step": 1827 }, { "epoch": 0.1894496839050679, "grad_norm": 0.4915875196456909, "learning_rate": 3.739333153442579e-05, "loss": 0.2422, "step": 1828 }, { "epoch": 0.1895533215877293, "grad_norm": 0.5053110718727112, "learning_rate": 3.739001649427464e-05, "loss": 0.2531, "step": 1829 }, { "epoch": 0.18965695927039072, "grad_norm": 0.43746939301490784, "learning_rate": 3.738669949464347e-05, "loss": 0.2166, "step": 1830 }, { "epoch": 0.18976059695305214, "grad_norm": 0.5301503539085388, "learning_rate": 3.7383380535906033e-05, "loss": 0.2461, "step": 1831 }, { "epoch": 0.18986423463571356, "grad_norm": 0.4544839560985565, "learning_rate": 3.7380059618436305e-05, "loss": 0.2497, "step": 1832 }, { "epoch": 0.18996787231837495, "grad_norm": 0.428805410861969, "learning_rate": 3.7376736742608465e-05, "loss": 0.1934, "step": 1833 }, { "epoch": 0.19007151000103636, "grad_norm": 0.44237765669822693, "learning_rate": 3.7373411908796944e-05, "loss": 0.2145, "step": 1834 }, { "epoch": 0.19017514768369778, "grad_norm": 0.5216934084892273, "learning_rate": 3.737008511737638e-05, "loss": 0.2676, "step": 1835 }, { "epoch": 0.1902787853663592, "grad_norm": 0.5544553399085999, "learning_rate": 3.736675636872162e-05, "loss": 0.3118, "step": 1836 }, { "epoch": 0.19038242304902062, "grad_norm": 0.4797340929508209, "learning_rate": 3.736342566320776e-05, "loss": 0.2353, "step": 1837 }, { "epoch": 0.19048606073168203, "grad_norm": 0.4621637463569641, "learning_rate": 3.736009300121009e-05, "loss": 0.2282, "step": 1838 }, { "epoch": 0.19058969841434345, "grad_norm": 0.45533865690231323, "learning_rate": 3.735675838310412e-05, "loss": 0.2282, "step": 1839 }, { "epoch": 0.19069333609700487, "grad_norm": 0.4414368271827698, "learning_rate": 3.7353421809265596e-05, "loss": 0.2026, "step": 1840 }, { "epoch": 0.19079697377966628, "grad_norm": 0.37621623277664185, "learning_rate": 3.735008328007048e-05, "loss": 0.1971, "step": 1841 }, { "epoch": 0.1909006114623277, "grad_norm": 0.36020252108573914, "learning_rate": 3.7346742795894954e-05, "loss": 0.1844, "step": 1842 }, { "epoch": 0.19100424914498912, "grad_norm": 0.5098097324371338, "learning_rate": 3.734340035711541e-05, "loss": 0.2449, "step": 1843 }, { "epoch": 0.19110788682765054, "grad_norm": 0.4699155390262604, "learning_rate": 3.734005596410848e-05, "loss": 0.2441, "step": 1844 }, { "epoch": 0.19121152451031195, "grad_norm": 0.3725084364414215, "learning_rate": 3.7336709617251e-05, "loss": 0.2027, "step": 1845 }, { "epoch": 0.19131516219297337, "grad_norm": 0.49921226501464844, "learning_rate": 3.733336131692003e-05, "loss": 0.2866, "step": 1846 }, { "epoch": 0.1914187998756348, "grad_norm": 0.4483483135700226, "learning_rate": 3.733001106349285e-05, "loss": 0.2498, "step": 1847 }, { "epoch": 0.1915224375582962, "grad_norm": 0.43417519330978394, "learning_rate": 3.7326658857346964e-05, "loss": 0.2234, "step": 1848 }, { "epoch": 0.19162607524095762, "grad_norm": 0.4553414583206177, "learning_rate": 3.73233046988601e-05, "loss": 0.2338, "step": 1849 }, { "epoch": 0.19172971292361904, "grad_norm": 0.4434491991996765, "learning_rate": 3.731994858841018e-05, "loss": 0.2092, "step": 1850 }, { "epoch": 0.19183335060628046, "grad_norm": 0.5110379457473755, "learning_rate": 3.7316590526375385e-05, "loss": 0.2763, "step": 1851 }, { "epoch": 0.19193698828894185, "grad_norm": 0.4639647603034973, "learning_rate": 3.731323051313409e-05, "loss": 0.249, "step": 1852 }, { "epoch": 0.19204062597160326, "grad_norm": 0.4629019498825073, "learning_rate": 3.730986854906489e-05, "loss": 0.2365, "step": 1853 }, { "epoch": 0.19214426365426468, "grad_norm": 0.510564923286438, "learning_rate": 3.7306504634546605e-05, "loss": 0.2578, "step": 1854 }, { "epoch": 0.1922479013369261, "grad_norm": 0.4567071497440338, "learning_rate": 3.730313876995829e-05, "loss": 0.2494, "step": 1855 }, { "epoch": 0.19235153901958751, "grad_norm": 0.4712611436843872, "learning_rate": 3.7299770955679196e-05, "loss": 0.2677, "step": 1856 }, { "epoch": 0.19245517670224893, "grad_norm": 0.49776506423950195, "learning_rate": 3.7296401192088804e-05, "loss": 0.2344, "step": 1857 }, { "epoch": 0.19255881438491035, "grad_norm": 0.5162355899810791, "learning_rate": 3.729302947956681e-05, "loss": 0.2403, "step": 1858 }, { "epoch": 0.19266245206757177, "grad_norm": 0.45831117033958435, "learning_rate": 3.728965581849314e-05, "loss": 0.2333, "step": 1859 }, { "epoch": 0.19276608975023318, "grad_norm": 0.39789503812789917, "learning_rate": 3.728628020924793e-05, "loss": 0.2093, "step": 1860 }, { "epoch": 0.1928697274328946, "grad_norm": 0.406093031167984, "learning_rate": 3.7282902652211535e-05, "loss": 0.1883, "step": 1861 }, { "epoch": 0.19297336511555602, "grad_norm": 0.5074322819709778, "learning_rate": 3.7279523147764536e-05, "loss": 0.2556, "step": 1862 }, { "epoch": 0.19307700279821743, "grad_norm": 0.4497910737991333, "learning_rate": 3.727614169628773e-05, "loss": 0.2734, "step": 1863 }, { "epoch": 0.19318064048087885, "grad_norm": 0.49084556102752686, "learning_rate": 3.727275829816214e-05, "loss": 0.2526, "step": 1864 }, { "epoch": 0.19328427816354027, "grad_norm": 0.4497647285461426, "learning_rate": 3.7269372953768995e-05, "loss": 0.2143, "step": 1865 }, { "epoch": 0.19338791584620169, "grad_norm": 0.5706869959831238, "learning_rate": 3.726598566348974e-05, "loss": 0.2836, "step": 1866 }, { "epoch": 0.1934915535288631, "grad_norm": 0.3616214990615845, "learning_rate": 3.7262596427706075e-05, "loss": 0.1878, "step": 1867 }, { "epoch": 0.19359519121152452, "grad_norm": 0.4331905245780945, "learning_rate": 3.725920524679987e-05, "loss": 0.2154, "step": 1868 }, { "epoch": 0.19369882889418594, "grad_norm": 0.3976379930973053, "learning_rate": 3.725581212115325e-05, "loss": 0.1836, "step": 1869 }, { "epoch": 0.19380246657684735, "grad_norm": 0.4745163023471832, "learning_rate": 3.725241705114855e-05, "loss": 0.2518, "step": 1870 }, { "epoch": 0.19390610425950874, "grad_norm": 0.498110830783844, "learning_rate": 3.724902003716831e-05, "loss": 0.2702, "step": 1871 }, { "epoch": 0.19400974194217016, "grad_norm": 0.5331942439079285, "learning_rate": 3.724562107959531e-05, "loss": 0.269, "step": 1872 }, { "epoch": 0.19411337962483158, "grad_norm": 0.47925272583961487, "learning_rate": 3.724222017881253e-05, "loss": 0.2345, "step": 1873 }, { "epoch": 0.194217017307493, "grad_norm": 0.47513824701309204, "learning_rate": 3.723881733520319e-05, "loss": 0.2699, "step": 1874 }, { "epoch": 0.1943206549901544, "grad_norm": 0.4839094579219818, "learning_rate": 3.723541254915071e-05, "loss": 0.251, "step": 1875 }, { "epoch": 0.19442429267281583, "grad_norm": 0.42007312178611755, "learning_rate": 3.723200582103874e-05, "loss": 0.2155, "step": 1876 }, { "epoch": 0.19452793035547725, "grad_norm": 0.5020776987075806, "learning_rate": 3.722859715125114e-05, "loss": 0.2374, "step": 1877 }, { "epoch": 0.19463156803813866, "grad_norm": 0.4552202820777893, "learning_rate": 3.722518654017199e-05, "loss": 0.218, "step": 1878 }, { "epoch": 0.19473520572080008, "grad_norm": 0.48936206102371216, "learning_rate": 3.7221773988185604e-05, "loss": 0.2722, "step": 1879 }, { "epoch": 0.1948388434034615, "grad_norm": 0.49154961109161377, "learning_rate": 3.721835949567649e-05, "loss": 0.2223, "step": 1880 }, { "epoch": 0.19494248108612292, "grad_norm": 0.5503633618354797, "learning_rate": 3.7214943063029395e-05, "loss": 0.2764, "step": 1881 }, { "epoch": 0.19504611876878433, "grad_norm": 0.46910014748573303, "learning_rate": 3.721152469062928e-05, "loss": 0.229, "step": 1882 }, { "epoch": 0.19514975645144575, "grad_norm": 0.4484991431236267, "learning_rate": 3.720810437886132e-05, "loss": 0.2392, "step": 1883 }, { "epoch": 0.19525339413410717, "grad_norm": 0.47034701704978943, "learning_rate": 3.7204682128110905e-05, "loss": 0.2275, "step": 1884 }, { "epoch": 0.19535703181676858, "grad_norm": 0.42318418622016907, "learning_rate": 3.7201257938763656e-05, "loss": 0.2151, "step": 1885 }, { "epoch": 0.19546066949943, "grad_norm": 0.5113075971603394, "learning_rate": 3.719783181120541e-05, "loss": 0.2369, "step": 1886 }, { "epoch": 0.19556430718209142, "grad_norm": 0.46260493993759155, "learning_rate": 3.719440374582219e-05, "loss": 0.2378, "step": 1887 }, { "epoch": 0.19566794486475284, "grad_norm": 0.427514910697937, "learning_rate": 3.71909737430003e-05, "loss": 0.2124, "step": 1888 }, { "epoch": 0.19577158254741425, "grad_norm": 0.5521390438079834, "learning_rate": 3.718754180312621e-05, "loss": 0.2562, "step": 1889 }, { "epoch": 0.19587522023007564, "grad_norm": 0.3623069226741791, "learning_rate": 3.718410792658663e-05, "loss": 0.1583, "step": 1890 }, { "epoch": 0.19597885791273706, "grad_norm": 0.4941210448741913, "learning_rate": 3.718067211376848e-05, "loss": 0.2501, "step": 1891 }, { "epoch": 0.19608249559539848, "grad_norm": 0.4543443024158478, "learning_rate": 3.717723436505891e-05, "loss": 0.2043, "step": 1892 }, { "epoch": 0.1961861332780599, "grad_norm": 0.42174211144447327, "learning_rate": 3.717379468084526e-05, "loss": 0.2133, "step": 1893 }, { "epoch": 0.1962897709607213, "grad_norm": 0.5058109164237976, "learning_rate": 3.7170353061515135e-05, "loss": 0.2495, "step": 1894 }, { "epoch": 0.19639340864338273, "grad_norm": 0.43948596715927124, "learning_rate": 3.716690950745632e-05, "loss": 0.2201, "step": 1895 }, { "epoch": 0.19649704632604414, "grad_norm": 0.48528286814689636, "learning_rate": 3.7163464019056824e-05, "loss": 0.2503, "step": 1896 }, { "epoch": 0.19660068400870556, "grad_norm": 0.4711983799934387, "learning_rate": 3.7160016596704876e-05, "loss": 0.2445, "step": 1897 }, { "epoch": 0.19670432169136698, "grad_norm": 0.5053095817565918, "learning_rate": 3.715656724078894e-05, "loss": 0.2604, "step": 1898 }, { "epoch": 0.1968079593740284, "grad_norm": 0.5295904278755188, "learning_rate": 3.715311595169768e-05, "loss": 0.2671, "step": 1899 }, { "epoch": 0.1969115970566898, "grad_norm": 0.494743287563324, "learning_rate": 3.7149662729819976e-05, "loss": 0.2782, "step": 1900 }, { "epoch": 0.19701523473935123, "grad_norm": 0.4124172627925873, "learning_rate": 3.7146207575544935e-05, "loss": 0.1894, "step": 1901 }, { "epoch": 0.19711887242201265, "grad_norm": 0.457582950592041, "learning_rate": 3.714275048926188e-05, "loss": 0.2048, "step": 1902 }, { "epoch": 0.19722251010467406, "grad_norm": 0.5982666611671448, "learning_rate": 3.713929147136035e-05, "loss": 0.2634, "step": 1903 }, { "epoch": 0.19732614778733548, "grad_norm": 0.49243828654289246, "learning_rate": 3.71358305222301e-05, "loss": 0.237, "step": 1904 }, { "epoch": 0.1974297854699969, "grad_norm": 0.5259522199630737, "learning_rate": 3.7132367642261106e-05, "loss": 0.2442, "step": 1905 }, { "epoch": 0.19753342315265832, "grad_norm": 0.4355629086494446, "learning_rate": 3.7128902831843554e-05, "loss": 0.2314, "step": 1906 }, { "epoch": 0.19763706083531973, "grad_norm": 0.4574223458766937, "learning_rate": 3.7125436091367866e-05, "loss": 0.2163, "step": 1907 }, { "epoch": 0.19774069851798115, "grad_norm": 0.480646014213562, "learning_rate": 3.712196742122466e-05, "loss": 0.2259, "step": 1908 }, { "epoch": 0.19784433620064254, "grad_norm": 0.5398069024085999, "learning_rate": 3.711849682180477e-05, "loss": 0.2917, "step": 1909 }, { "epoch": 0.19794797388330396, "grad_norm": 0.4341377019882202, "learning_rate": 3.711502429349928e-05, "loss": 0.2098, "step": 1910 }, { "epoch": 0.19805161156596537, "grad_norm": 0.47715920209884644, "learning_rate": 3.7111549836699456e-05, "loss": 0.2335, "step": 1911 }, { "epoch": 0.1981552492486268, "grad_norm": 0.4509512186050415, "learning_rate": 3.71080734517968e-05, "loss": 0.212, "step": 1912 }, { "epoch": 0.1982588869312882, "grad_norm": 0.438888818025589, "learning_rate": 3.7104595139183014e-05, "loss": 0.237, "step": 1913 }, { "epoch": 0.19836252461394963, "grad_norm": 0.4621768593788147, "learning_rate": 3.710111489925004e-05, "loss": 0.2551, "step": 1914 }, { "epoch": 0.19846616229661104, "grad_norm": 0.5049877762794495, "learning_rate": 3.709763273239003e-05, "loss": 0.2512, "step": 1915 }, { "epoch": 0.19856979997927246, "grad_norm": 0.4743228554725647, "learning_rate": 3.709414863899534e-05, "loss": 0.2509, "step": 1916 }, { "epoch": 0.19867343766193388, "grad_norm": 0.44488999247550964, "learning_rate": 3.7090662619458555e-05, "loss": 0.2225, "step": 1917 }, { "epoch": 0.1987770753445953, "grad_norm": 0.45012685656547546, "learning_rate": 3.708717467417248e-05, "loss": 0.2298, "step": 1918 }, { "epoch": 0.1988807130272567, "grad_norm": 0.5009463429450989, "learning_rate": 3.708368480353011e-05, "loss": 0.2378, "step": 1919 }, { "epoch": 0.19898435070991813, "grad_norm": 0.5277925133705139, "learning_rate": 3.70801930079247e-05, "loss": 0.2575, "step": 1920 }, { "epoch": 0.19908798839257955, "grad_norm": 0.4720187187194824, "learning_rate": 3.707669928774969e-05, "loss": 0.2611, "step": 1921 }, { "epoch": 0.19919162607524096, "grad_norm": 0.5206968188285828, "learning_rate": 3.7073203643398764e-05, "loss": 0.2704, "step": 1922 }, { "epoch": 0.19929526375790238, "grad_norm": 0.5649044513702393, "learning_rate": 3.706970607526578e-05, "loss": 0.2472, "step": 1923 }, { "epoch": 0.1993989014405638, "grad_norm": 0.4891642928123474, "learning_rate": 3.7066206583744855e-05, "loss": 0.2403, "step": 1924 }, { "epoch": 0.19950253912322521, "grad_norm": 0.46503427624702454, "learning_rate": 3.70627051692303e-05, "loss": 0.2065, "step": 1925 }, { "epoch": 0.19960617680588663, "grad_norm": 0.567818820476532, "learning_rate": 3.705920183211666e-05, "loss": 0.3012, "step": 1926 }, { "epoch": 0.19970981448854805, "grad_norm": 0.49717381596565247, "learning_rate": 3.705569657279866e-05, "loss": 0.2438, "step": 1927 }, { "epoch": 0.19981345217120944, "grad_norm": 0.5408019423484802, "learning_rate": 3.7052189391671295e-05, "loss": 0.2561, "step": 1928 }, { "epoch": 0.19991708985387086, "grad_norm": 0.4729301333427429, "learning_rate": 3.704868028912974e-05, "loss": 0.2296, "step": 1929 }, { "epoch": 0.20002072753653227, "grad_norm": 0.4479348361492157, "learning_rate": 3.7045169265569384e-05, "loss": 0.2332, "step": 1930 }, { "epoch": 0.2001243652191937, "grad_norm": 0.4986315071582794, "learning_rate": 3.7041656321385857e-05, "loss": 0.2766, "step": 1931 }, { "epoch": 0.2002280029018551, "grad_norm": 0.49219274520874023, "learning_rate": 3.7038141456974986e-05, "loss": 0.2417, "step": 1932 }, { "epoch": 0.20033164058451652, "grad_norm": 0.4674849212169647, "learning_rate": 3.703462467273282e-05, "loss": 0.2562, "step": 1933 }, { "epoch": 0.20043527826717794, "grad_norm": 0.4261247217655182, "learning_rate": 3.703110596905563e-05, "loss": 0.2184, "step": 1934 }, { "epoch": 0.20053891594983936, "grad_norm": 0.40725192427635193, "learning_rate": 3.70275853463399e-05, "loss": 0.1822, "step": 1935 }, { "epoch": 0.20064255363250078, "grad_norm": 0.5302891135215759, "learning_rate": 3.7024062804982315e-05, "loss": 0.2622, "step": 1936 }, { "epoch": 0.2007461913151622, "grad_norm": 0.4687095284461975, "learning_rate": 3.70205383453798e-05, "loss": 0.2528, "step": 1937 }, { "epoch": 0.2008498289978236, "grad_norm": 0.43511658906936646, "learning_rate": 3.7017011967929484e-05, "loss": 0.2163, "step": 1938 }, { "epoch": 0.20095346668048503, "grad_norm": 0.40954121947288513, "learning_rate": 3.701348367302871e-05, "loss": 0.2077, "step": 1939 }, { "epoch": 0.20105710436314644, "grad_norm": 0.504981279373169, "learning_rate": 3.7009953461075044e-05, "loss": 0.2458, "step": 1940 }, { "epoch": 0.20116074204580786, "grad_norm": 0.4793414771556854, "learning_rate": 3.700642133246627e-05, "loss": 0.231, "step": 1941 }, { "epoch": 0.20126437972846928, "grad_norm": 0.5226635336875916, "learning_rate": 3.700288728760037e-05, "loss": 0.242, "step": 1942 }, { "epoch": 0.2013680174111307, "grad_norm": 0.5597573518753052, "learning_rate": 3.699935132687556e-05, "loss": 0.3137, "step": 1943 }, { "epoch": 0.2014716550937921, "grad_norm": 0.4148508608341217, "learning_rate": 3.699581345069028e-05, "loss": 0.1813, "step": 1944 }, { "epoch": 0.20157529277645353, "grad_norm": 0.49836692214012146, "learning_rate": 3.699227365944316e-05, "loss": 0.2657, "step": 1945 }, { "epoch": 0.20167893045911495, "grad_norm": 0.4882723093032837, "learning_rate": 3.698873195353305e-05, "loss": 0.2314, "step": 1946 }, { "epoch": 0.20178256814177634, "grad_norm": 0.4234292507171631, "learning_rate": 3.698518833335904e-05, "loss": 0.2216, "step": 1947 }, { "epoch": 0.20188620582443775, "grad_norm": 0.48601916432380676, "learning_rate": 3.69816427993204e-05, "loss": 0.2464, "step": 1948 }, { "epoch": 0.20198984350709917, "grad_norm": 0.46592000126838684, "learning_rate": 3.6978095351816656e-05, "loss": 0.2274, "step": 1949 }, { "epoch": 0.2020934811897606, "grad_norm": 0.4673402011394501, "learning_rate": 3.697454599124753e-05, "loss": 0.2211, "step": 1950 }, { "epoch": 0.202197118872422, "grad_norm": 0.4336487948894501, "learning_rate": 3.697099471801294e-05, "loss": 0.2031, "step": 1951 }, { "epoch": 0.20230075655508342, "grad_norm": 0.5105946063995361, "learning_rate": 3.6967441532513046e-05, "loss": 0.2742, "step": 1952 }, { "epoch": 0.20240439423774484, "grad_norm": 0.46542996168136597, "learning_rate": 3.696388643514822e-05, "loss": 0.2272, "step": 1953 }, { "epoch": 0.20250803192040626, "grad_norm": 0.48160192370414734, "learning_rate": 3.696032942631904e-05, "loss": 0.2484, "step": 1954 }, { "epoch": 0.20261166960306767, "grad_norm": 0.5089412927627563, "learning_rate": 3.6956770506426304e-05, "loss": 0.2255, "step": 1955 }, { "epoch": 0.2027153072857291, "grad_norm": 0.5144649147987366, "learning_rate": 3.695320967587103e-05, "loss": 0.2596, "step": 1956 }, { "epoch": 0.2028189449683905, "grad_norm": 0.4798566997051239, "learning_rate": 3.6949646935054445e-05, "loss": 0.2526, "step": 1957 }, { "epoch": 0.20292258265105192, "grad_norm": 0.39153149724006653, "learning_rate": 3.694608228437798e-05, "loss": 0.1896, "step": 1958 }, { "epoch": 0.20302622033371334, "grad_norm": 0.47644275426864624, "learning_rate": 3.6942515724243326e-05, "loss": 0.2485, "step": 1959 }, { "epoch": 0.20312985801637476, "grad_norm": 0.39403602480888367, "learning_rate": 3.693894725505232e-05, "loss": 0.1817, "step": 1960 }, { "epoch": 0.20323349569903618, "grad_norm": 0.48983171582221985, "learning_rate": 3.6935376877207086e-05, "loss": 0.232, "step": 1961 }, { "epoch": 0.2033371333816976, "grad_norm": 0.5000522136688232, "learning_rate": 3.69318045911099e-05, "loss": 0.2101, "step": 1962 }, { "epoch": 0.203440771064359, "grad_norm": 0.47150513529777527, "learning_rate": 3.69282303971633e-05, "loss": 0.2131, "step": 1963 }, { "epoch": 0.20354440874702043, "grad_norm": 0.44279807806015015, "learning_rate": 3.692465429577001e-05, "loss": 0.2111, "step": 1964 }, { "epoch": 0.20364804642968184, "grad_norm": 0.49045294523239136, "learning_rate": 3.6921076287332985e-05, "loss": 0.2326, "step": 1965 }, { "epoch": 0.20375168411234323, "grad_norm": 0.4297841489315033, "learning_rate": 3.691749637225539e-05, "loss": 0.2008, "step": 1966 }, { "epoch": 0.20385532179500465, "grad_norm": 0.51743483543396, "learning_rate": 3.69139145509406e-05, "loss": 0.2289, "step": 1967 }, { "epoch": 0.20395895947766607, "grad_norm": 0.4800795316696167, "learning_rate": 3.691033082379221e-05, "loss": 0.2418, "step": 1968 }, { "epoch": 0.20406259716032749, "grad_norm": 0.4997004270553589, "learning_rate": 3.6906745191214035e-05, "loss": 0.2497, "step": 1969 }, { "epoch": 0.2041662348429889, "grad_norm": 0.5265177488327026, "learning_rate": 3.690315765361009e-05, "loss": 0.2331, "step": 1970 }, { "epoch": 0.20426987252565032, "grad_norm": 0.4550437033176422, "learning_rate": 3.689956821138462e-05, "loss": 0.2309, "step": 1971 }, { "epoch": 0.20437351020831174, "grad_norm": 0.4945697486400604, "learning_rate": 3.689597686494208e-05, "loss": 0.2477, "step": 1972 }, { "epoch": 0.20447714789097315, "grad_norm": 0.44872772693634033, "learning_rate": 3.689238361468712e-05, "loss": 0.2023, "step": 1973 }, { "epoch": 0.20458078557363457, "grad_norm": 0.4784891605377197, "learning_rate": 3.6888788461024636e-05, "loss": 0.2607, "step": 1974 }, { "epoch": 0.204684423256296, "grad_norm": 0.3880353271961212, "learning_rate": 3.6885191404359725e-05, "loss": 0.2065, "step": 1975 }, { "epoch": 0.2047880609389574, "grad_norm": 0.5710169672966003, "learning_rate": 3.68815924450977e-05, "loss": 0.2543, "step": 1976 }, { "epoch": 0.20489169862161882, "grad_norm": 0.505615234375, "learning_rate": 3.687799158364408e-05, "loss": 0.2596, "step": 1977 }, { "epoch": 0.20499533630428024, "grad_norm": 0.48438864946365356, "learning_rate": 3.6874388820404604e-05, "loss": 0.2379, "step": 1978 }, { "epoch": 0.20509897398694166, "grad_norm": 0.4900949001312256, "learning_rate": 3.6870784155785225e-05, "loss": 0.245, "step": 1979 }, { "epoch": 0.20520261166960307, "grad_norm": 0.5086212754249573, "learning_rate": 3.686717759019212e-05, "loss": 0.2613, "step": 1980 }, { "epoch": 0.2053062493522645, "grad_norm": 0.495339035987854, "learning_rate": 3.686356912403166e-05, "loss": 0.2395, "step": 1981 }, { "epoch": 0.2054098870349259, "grad_norm": 0.4228864908218384, "learning_rate": 3.6859958757710444e-05, "loss": 0.2175, "step": 1982 }, { "epoch": 0.20551352471758733, "grad_norm": 0.5192626714706421, "learning_rate": 3.685634649163529e-05, "loss": 0.2428, "step": 1983 }, { "epoch": 0.20561716240024874, "grad_norm": 0.5048412680625916, "learning_rate": 3.6852732326213206e-05, "loss": 0.2664, "step": 1984 }, { "epoch": 0.20572080008291013, "grad_norm": 0.45641231536865234, "learning_rate": 3.684911626185146e-05, "loss": 0.2276, "step": 1985 }, { "epoch": 0.20582443776557155, "grad_norm": 0.42920756340026855, "learning_rate": 3.6845498298957466e-05, "loss": 0.2067, "step": 1986 }, { "epoch": 0.20592807544823297, "grad_norm": 0.5071409344673157, "learning_rate": 3.684187843793892e-05, "loss": 0.2515, "step": 1987 }, { "epoch": 0.20603171313089438, "grad_norm": 0.4680119752883911, "learning_rate": 3.68382566792037e-05, "loss": 0.2415, "step": 1988 }, { "epoch": 0.2061353508135558, "grad_norm": 0.49129483103752136, "learning_rate": 3.6834633023159885e-05, "loss": 0.2643, "step": 1989 }, { "epoch": 0.20623898849621722, "grad_norm": 0.4970560669898987, "learning_rate": 3.6831007470215785e-05, "loss": 0.2455, "step": 1990 }, { "epoch": 0.20634262617887864, "grad_norm": 0.4532671868801117, "learning_rate": 3.682738002077994e-05, "loss": 0.2252, "step": 1991 }, { "epoch": 0.20644626386154005, "grad_norm": 0.47092416882514954, "learning_rate": 3.6823750675261064e-05, "loss": 0.2445, "step": 1992 }, { "epoch": 0.20654990154420147, "grad_norm": 0.5188236832618713, "learning_rate": 3.682011943406812e-05, "loss": 0.2674, "step": 1993 }, { "epoch": 0.2066535392268629, "grad_norm": 0.4372886121273041, "learning_rate": 3.681648629761026e-05, "loss": 0.1847, "step": 1994 }, { "epoch": 0.2067571769095243, "grad_norm": 0.44967779517173767, "learning_rate": 3.6812851266296866e-05, "loss": 0.2293, "step": 1995 }, { "epoch": 0.20686081459218572, "grad_norm": 0.515315592288971, "learning_rate": 3.680921434053753e-05, "loss": 0.246, "step": 1996 }, { "epoch": 0.20696445227484714, "grad_norm": 0.49991822242736816, "learning_rate": 3.6805575520742057e-05, "loss": 0.2767, "step": 1997 }, { "epoch": 0.20706808995750856, "grad_norm": 0.43403372168540955, "learning_rate": 3.6801934807320455e-05, "loss": 0.2123, "step": 1998 }, { "epoch": 0.20717172764016997, "grad_norm": 0.49474579095840454, "learning_rate": 3.679829220068296e-05, "loss": 0.2626, "step": 1999 }, { "epoch": 0.2072753653228314, "grad_norm": 0.45910385251045227, "learning_rate": 3.679464770124001e-05, "loss": 0.2441, "step": 2000 }, { "epoch": 0.2073790030054928, "grad_norm": 0.45843109488487244, "learning_rate": 3.679100130940227e-05, "loss": 0.2406, "step": 2001 }, { "epoch": 0.20748264068815422, "grad_norm": 0.478091299533844, "learning_rate": 3.6787353025580596e-05, "loss": 0.2225, "step": 2002 }, { "epoch": 0.20758627837081564, "grad_norm": 0.49887460470199585, "learning_rate": 3.678370285018608e-05, "loss": 0.2616, "step": 2003 }, { "epoch": 0.20768991605347703, "grad_norm": 0.4933151304721832, "learning_rate": 3.6780050783630024e-05, "loss": 0.2249, "step": 2004 }, { "epoch": 0.20779355373613845, "grad_norm": 0.4972792863845825, "learning_rate": 3.6776396826323925e-05, "loss": 0.2304, "step": 2005 }, { "epoch": 0.20789719141879986, "grad_norm": 0.4850703775882721, "learning_rate": 3.6772740978679517e-05, "loss": 0.2548, "step": 2006 }, { "epoch": 0.20800082910146128, "grad_norm": 0.5416536927223206, "learning_rate": 3.676908324110873e-05, "loss": 0.2829, "step": 2007 }, { "epoch": 0.2081044667841227, "grad_norm": 0.46809735894203186, "learning_rate": 3.676542361402371e-05, "loss": 0.2198, "step": 2008 }, { "epoch": 0.20820810446678412, "grad_norm": 0.44908079504966736, "learning_rate": 3.676176209783681e-05, "loss": 0.2197, "step": 2009 }, { "epoch": 0.20831174214944553, "grad_norm": 0.5226306319236755, "learning_rate": 3.675809869296063e-05, "loss": 0.2744, "step": 2010 }, { "epoch": 0.20841537983210695, "grad_norm": 0.5002926588058472, "learning_rate": 3.6754433399807925e-05, "loss": 0.2611, "step": 2011 }, { "epoch": 0.20851901751476837, "grad_norm": 0.5227557420730591, "learning_rate": 3.675076621879172e-05, "loss": 0.253, "step": 2012 }, { "epoch": 0.20862265519742978, "grad_norm": 0.5399764180183411, "learning_rate": 3.674709715032521e-05, "loss": 0.2428, "step": 2013 }, { "epoch": 0.2087262928800912, "grad_norm": 0.5577982664108276, "learning_rate": 3.6743426194821836e-05, "loss": 0.2532, "step": 2014 }, { "epoch": 0.20882993056275262, "grad_norm": 0.4555790424346924, "learning_rate": 3.6739753352695224e-05, "loss": 0.218, "step": 2015 }, { "epoch": 0.20893356824541404, "grad_norm": 0.4721558392047882, "learning_rate": 3.6736078624359216e-05, "loss": 0.206, "step": 2016 }, { "epoch": 0.20903720592807545, "grad_norm": 0.6262646317481995, "learning_rate": 3.6732402010227895e-05, "loss": 0.2736, "step": 2017 }, { "epoch": 0.20914084361073687, "grad_norm": 0.4947630763053894, "learning_rate": 3.672872351071552e-05, "loss": 0.229, "step": 2018 }, { "epoch": 0.2092444812933983, "grad_norm": 0.46532395482063293, "learning_rate": 3.6725043126236596e-05, "loss": 0.2402, "step": 2019 }, { "epoch": 0.2093481189760597, "grad_norm": 0.4798278212547302, "learning_rate": 3.67213608572058e-05, "loss": 0.2595, "step": 2020 }, { "epoch": 0.20945175665872112, "grad_norm": 0.4924963116645813, "learning_rate": 3.671767670403807e-05, "loss": 0.2451, "step": 2021 }, { "epoch": 0.20955539434138254, "grad_norm": 0.4918677806854248, "learning_rate": 3.6713990667148507e-05, "loss": 0.229, "step": 2022 }, { "epoch": 0.20965903202404393, "grad_norm": 0.4604406952857971, "learning_rate": 3.6710302746952466e-05, "loss": 0.2251, "step": 2023 }, { "epoch": 0.20976266970670535, "grad_norm": 0.4431096017360687, "learning_rate": 3.670661294386548e-05, "loss": 0.2266, "step": 2024 }, { "epoch": 0.20986630738936676, "grad_norm": 0.49358218908309937, "learning_rate": 3.670292125830332e-05, "loss": 0.2289, "step": 2025 }, { "epoch": 0.20996994507202818, "grad_norm": 0.47046852111816406, "learning_rate": 3.669922769068196e-05, "loss": 0.2291, "step": 2026 }, { "epoch": 0.2100735827546896, "grad_norm": 0.601206362247467, "learning_rate": 3.669553224141758e-05, "loss": 0.2383, "step": 2027 }, { "epoch": 0.21017722043735101, "grad_norm": 0.5137280225753784, "learning_rate": 3.669183491092658e-05, "loss": 0.2755, "step": 2028 }, { "epoch": 0.21028085812001243, "grad_norm": 0.5034176707267761, "learning_rate": 3.668813569962557e-05, "loss": 0.2303, "step": 2029 }, { "epoch": 0.21038449580267385, "grad_norm": 0.42328396439552307, "learning_rate": 3.668443460793138e-05, "loss": 0.1977, "step": 2030 }, { "epoch": 0.21048813348533527, "grad_norm": 0.5424888730049133, "learning_rate": 3.668073163626103e-05, "loss": 0.2446, "step": 2031 }, { "epoch": 0.21059177116799668, "grad_norm": 0.43510910868644714, "learning_rate": 3.667702678503177e-05, "loss": 0.2236, "step": 2032 }, { "epoch": 0.2106954088506581, "grad_norm": 0.4465221166610718, "learning_rate": 3.667332005466105e-05, "loss": 0.2022, "step": 2033 }, { "epoch": 0.21079904653331952, "grad_norm": 0.45926061272621155, "learning_rate": 3.666961144556655e-05, "loss": 0.2257, "step": 2034 }, { "epoch": 0.21090268421598093, "grad_norm": 0.5046967267990112, "learning_rate": 3.6665900958166154e-05, "loss": 0.2671, "step": 2035 }, { "epoch": 0.21100632189864235, "grad_norm": 0.4858223795890808, "learning_rate": 3.6662188592877936e-05, "loss": 0.2366, "step": 2036 }, { "epoch": 0.21110995958130377, "grad_norm": 0.4389292299747467, "learning_rate": 3.665847435012022e-05, "loss": 0.2114, "step": 2037 }, { "epoch": 0.21121359726396519, "grad_norm": 0.5166811943054199, "learning_rate": 3.66547582303115e-05, "loss": 0.2513, "step": 2038 }, { "epoch": 0.2113172349466266, "grad_norm": 0.43512311577796936, "learning_rate": 3.6651040233870514e-05, "loss": 0.2319, "step": 2039 }, { "epoch": 0.21142087262928802, "grad_norm": 0.5205447673797607, "learning_rate": 3.66473203612162e-05, "loss": 0.2404, "step": 2040 }, { "epoch": 0.21152451031194944, "grad_norm": 0.5103970170021057, "learning_rate": 3.66435986127677e-05, "loss": 0.2808, "step": 2041 }, { "epoch": 0.21162814799461083, "grad_norm": 0.47343921661376953, "learning_rate": 3.66398749889444e-05, "loss": 0.2526, "step": 2042 }, { "epoch": 0.21173178567727224, "grad_norm": 0.4443468451499939, "learning_rate": 3.663614949016584e-05, "loss": 0.2371, "step": 2043 }, { "epoch": 0.21183542335993366, "grad_norm": 0.4487053155899048, "learning_rate": 3.663242211685181e-05, "loss": 0.217, "step": 2044 }, { "epoch": 0.21193906104259508, "grad_norm": 0.41238850355148315, "learning_rate": 3.662869286942233e-05, "loss": 0.1768, "step": 2045 }, { "epoch": 0.2120426987252565, "grad_norm": 0.5283915996551514, "learning_rate": 3.662496174829757e-05, "loss": 0.2673, "step": 2046 }, { "epoch": 0.2121463364079179, "grad_norm": 0.3949659466743469, "learning_rate": 3.662122875389797e-05, "loss": 0.1958, "step": 2047 }, { "epoch": 0.21224997409057933, "grad_norm": 0.49384358525276184, "learning_rate": 3.661749388664416e-05, "loss": 0.2489, "step": 2048 }, { "epoch": 0.21235361177324075, "grad_norm": 0.43529611825942993, "learning_rate": 3.6613757146956964e-05, "loss": 0.2323, "step": 2049 }, { "epoch": 0.21245724945590216, "grad_norm": 0.46041977405548096, "learning_rate": 3.661001853525744e-05, "loss": 0.1857, "step": 2050 }, { "epoch": 0.21256088713856358, "grad_norm": 0.5310932397842407, "learning_rate": 3.660627805196685e-05, "loss": 0.2152, "step": 2051 }, { "epoch": 0.212664524821225, "grad_norm": 0.4441661238670349, "learning_rate": 3.660253569750666e-05, "loss": 0.2208, "step": 2052 }, { "epoch": 0.21276816250388642, "grad_norm": 0.5119684338569641, "learning_rate": 3.659879147229856e-05, "loss": 0.197, "step": 2053 }, { "epoch": 0.21287180018654783, "grad_norm": 0.5342808961868286, "learning_rate": 3.659504537676444e-05, "loss": 0.2501, "step": 2054 }, { "epoch": 0.21297543786920925, "grad_norm": 0.453165203332901, "learning_rate": 3.6591297411326404e-05, "loss": 0.2131, "step": 2055 }, { "epoch": 0.21307907555187067, "grad_norm": 0.5051000118255615, "learning_rate": 3.6587547576406764e-05, "loss": 0.229, "step": 2056 }, { "epoch": 0.21318271323453208, "grad_norm": 0.49012821912765503, "learning_rate": 3.658379587242805e-05, "loss": 0.2067, "step": 2057 }, { "epoch": 0.2132863509171935, "grad_norm": 0.4396739602088928, "learning_rate": 3.6580042299813004e-05, "loss": 0.203, "step": 2058 }, { "epoch": 0.21338998859985492, "grad_norm": 0.5717998743057251, "learning_rate": 3.657628685898456e-05, "loss": 0.2327, "step": 2059 }, { "epoch": 0.21349362628251634, "grad_norm": 0.4730100929737091, "learning_rate": 3.657252955036588e-05, "loss": 0.2186, "step": 2060 }, { "epoch": 0.21359726396517772, "grad_norm": 0.5230618119239807, "learning_rate": 3.6568770374380335e-05, "loss": 0.2373, "step": 2061 }, { "epoch": 0.21370090164783914, "grad_norm": 0.4315328299999237, "learning_rate": 3.6565009331451505e-05, "loss": 0.178, "step": 2062 }, { "epoch": 0.21380453933050056, "grad_norm": 0.5424378514289856, "learning_rate": 3.656124642200316e-05, "loss": 0.2195, "step": 2063 }, { "epoch": 0.21390817701316198, "grad_norm": 0.475759357213974, "learning_rate": 3.6557481646459325e-05, "loss": 0.2354, "step": 2064 }, { "epoch": 0.2140118146958234, "grad_norm": 0.44949331879615784, "learning_rate": 3.65537150052442e-05, "loss": 0.2196, "step": 2065 }, { "epoch": 0.2141154523784848, "grad_norm": 0.4517166316509247, "learning_rate": 3.6549946498782195e-05, "loss": 0.208, "step": 2066 }, { "epoch": 0.21421909006114623, "grad_norm": 0.5226971507072449, "learning_rate": 3.6546176127497954e-05, "loss": 0.255, "step": 2067 }, { "epoch": 0.21432272774380765, "grad_norm": 0.5534842014312744, "learning_rate": 3.6542403891816303e-05, "loss": 0.2933, "step": 2068 }, { "epoch": 0.21442636542646906, "grad_norm": 0.46718254685401917, "learning_rate": 3.65386297921623e-05, "loss": 0.2338, "step": 2069 }, { "epoch": 0.21453000310913048, "grad_norm": 0.5315462946891785, "learning_rate": 3.6534853828961194e-05, "loss": 0.248, "step": 2070 }, { "epoch": 0.2146336407917919, "grad_norm": 0.5207595825195312, "learning_rate": 3.653107600263846e-05, "loss": 0.2666, "step": 2071 }, { "epoch": 0.2147372784744533, "grad_norm": 0.4615035951137543, "learning_rate": 3.652729631361979e-05, "loss": 0.2273, "step": 2072 }, { "epoch": 0.21484091615711473, "grad_norm": 0.4617545008659363, "learning_rate": 3.652351476233106e-05, "loss": 0.2371, "step": 2073 }, { "epoch": 0.21494455383977615, "grad_norm": 0.5457558631896973, "learning_rate": 3.651973134919837e-05, "loss": 0.2653, "step": 2074 }, { "epoch": 0.21504819152243757, "grad_norm": 0.5435750484466553, "learning_rate": 3.651594607464804e-05, "loss": 0.2927, "step": 2075 }, { "epoch": 0.21515182920509898, "grad_norm": 0.4262548089027405, "learning_rate": 3.651215893910657e-05, "loss": 0.2037, "step": 2076 }, { "epoch": 0.2152554668877604, "grad_norm": 0.4520435631275177, "learning_rate": 3.650836994300071e-05, "loss": 0.2371, "step": 2077 }, { "epoch": 0.21535910457042182, "grad_norm": 0.4527454972267151, "learning_rate": 3.650457908675738e-05, "loss": 0.2498, "step": 2078 }, { "epoch": 0.21546274225308323, "grad_norm": 0.45087677240371704, "learning_rate": 3.650078637080374e-05, "loss": 0.2273, "step": 2079 }, { "epoch": 0.21556637993574462, "grad_norm": 0.49029573798179626, "learning_rate": 3.6496991795567146e-05, "loss": 0.2562, "step": 2080 }, { "epoch": 0.21567001761840604, "grad_norm": 0.46473249793052673, "learning_rate": 3.649319536147515e-05, "loss": 0.2418, "step": 2081 }, { "epoch": 0.21577365530106746, "grad_norm": 0.54538494348526, "learning_rate": 3.648939706895555e-05, "loss": 0.2899, "step": 2082 }, { "epoch": 0.21587729298372887, "grad_norm": 0.4631831645965576, "learning_rate": 3.648559691843632e-05, "loss": 0.2447, "step": 2083 }, { "epoch": 0.2159809306663903, "grad_norm": 0.43592122197151184, "learning_rate": 3.648179491034565e-05, "loss": 0.2054, "step": 2084 }, { "epoch": 0.2160845683490517, "grad_norm": 0.42652252316474915, "learning_rate": 3.647799104511195e-05, "loss": 0.1869, "step": 2085 }, { "epoch": 0.21618820603171313, "grad_norm": 0.46224814653396606, "learning_rate": 3.647418532316385e-05, "loss": 0.2235, "step": 2086 }, { "epoch": 0.21629184371437454, "grad_norm": 0.4874723553657532, "learning_rate": 3.6470377744930145e-05, "loss": 0.2386, "step": 2087 }, { "epoch": 0.21639548139703596, "grad_norm": 0.5245859622955322, "learning_rate": 3.646656831083988e-05, "loss": 0.2737, "step": 2088 }, { "epoch": 0.21649911907969738, "grad_norm": 0.462100088596344, "learning_rate": 3.64627570213223e-05, "loss": 0.2354, "step": 2089 }, { "epoch": 0.2166027567623588, "grad_norm": 0.5441925525665283, "learning_rate": 3.645894387680685e-05, "loss": 0.2697, "step": 2090 }, { "epoch": 0.2167063944450202, "grad_norm": 0.48411786556243896, "learning_rate": 3.6455128877723186e-05, "loss": 0.2207, "step": 2091 }, { "epoch": 0.21681003212768163, "grad_norm": 0.41479653120040894, "learning_rate": 3.645131202450119e-05, "loss": 0.1996, "step": 2092 }, { "epoch": 0.21691366981034305, "grad_norm": 0.474609911441803, "learning_rate": 3.6447493317570914e-05, "loss": 0.2263, "step": 2093 }, { "epoch": 0.21701730749300446, "grad_norm": 0.442793071269989, "learning_rate": 3.6443672757362666e-05, "loss": 0.2181, "step": 2094 }, { "epoch": 0.21712094517566588, "grad_norm": 0.5405283570289612, "learning_rate": 3.643985034430693e-05, "loss": 0.237, "step": 2095 }, { "epoch": 0.2172245828583273, "grad_norm": 0.5367290377616882, "learning_rate": 3.643602607883442e-05, "loss": 0.2752, "step": 2096 }, { "epoch": 0.21732822054098871, "grad_norm": 0.5797774195671082, "learning_rate": 3.643219996137604e-05, "loss": 0.2599, "step": 2097 }, { "epoch": 0.21743185822365013, "grad_norm": 0.49853330850601196, "learning_rate": 3.6428371992362916e-05, "loss": 0.2604, "step": 2098 }, { "epoch": 0.21753549590631152, "grad_norm": 0.4744069576263428, "learning_rate": 3.642454217222637e-05, "loss": 0.2321, "step": 2099 }, { "epoch": 0.21763913358897294, "grad_norm": 0.5347335338592529, "learning_rate": 3.6420710501397944e-05, "loss": 0.2782, "step": 2100 }, { "epoch": 0.21774277127163436, "grad_norm": 0.4640110433101654, "learning_rate": 3.641687698030938e-05, "loss": 0.2306, "step": 2101 }, { "epoch": 0.21784640895429577, "grad_norm": 0.48161885142326355, "learning_rate": 3.641304160939265e-05, "loss": 0.2285, "step": 2102 }, { "epoch": 0.2179500466369572, "grad_norm": 0.5034273862838745, "learning_rate": 3.6409204389079896e-05, "loss": 0.2126, "step": 2103 }, { "epoch": 0.2180536843196186, "grad_norm": 0.4994756877422333, "learning_rate": 3.640536531980351e-05, "loss": 0.27, "step": 2104 }, { "epoch": 0.21815732200228002, "grad_norm": 0.44723254442214966, "learning_rate": 3.6401524401996056e-05, "loss": 0.2185, "step": 2105 }, { "epoch": 0.21826095968494144, "grad_norm": 0.4936169385910034, "learning_rate": 3.639768163609033e-05, "loss": 0.2574, "step": 2106 }, { "epoch": 0.21836459736760286, "grad_norm": 0.48400330543518066, "learning_rate": 3.639383702251933e-05, "loss": 0.2183, "step": 2107 }, { "epoch": 0.21846823505026428, "grad_norm": 0.5214065313339233, "learning_rate": 3.638999056171626e-05, "loss": 0.2313, "step": 2108 }, { "epoch": 0.2185718727329257, "grad_norm": 0.45542773604393005, "learning_rate": 3.638614225411452e-05, "loss": 0.2153, "step": 2109 }, { "epoch": 0.2186755104155871, "grad_norm": 0.5218877196311951, "learning_rate": 3.638229210014776e-05, "loss": 0.2727, "step": 2110 }, { "epoch": 0.21877914809824853, "grad_norm": 0.4047102928161621, "learning_rate": 3.6378440100249785e-05, "loss": 0.1887, "step": 2111 }, { "epoch": 0.21888278578090994, "grad_norm": 0.4060623347759247, "learning_rate": 3.637458625485464e-05, "loss": 0.2128, "step": 2112 }, { "epoch": 0.21898642346357136, "grad_norm": 0.4566936194896698, "learning_rate": 3.637073056439657e-05, "loss": 0.2326, "step": 2113 }, { "epoch": 0.21909006114623278, "grad_norm": 0.39076003432273865, "learning_rate": 3.636687302931003e-05, "loss": 0.1933, "step": 2114 }, { "epoch": 0.2191936988288942, "grad_norm": 0.5355175733566284, "learning_rate": 3.636301365002968e-05, "loss": 0.2741, "step": 2115 }, { "epoch": 0.2192973365115556, "grad_norm": 0.5408210158348083, "learning_rate": 3.6359152426990384e-05, "loss": 0.2438, "step": 2116 }, { "epoch": 0.21940097419421703, "grad_norm": 0.535264253616333, "learning_rate": 3.6355289360627236e-05, "loss": 0.2508, "step": 2117 }, { "epoch": 0.21950461187687842, "grad_norm": 0.47466620802879333, "learning_rate": 3.6351424451375494e-05, "loss": 0.2343, "step": 2118 }, { "epoch": 0.21960824955953984, "grad_norm": 0.5331148505210876, "learning_rate": 3.6347557699670675e-05, "loss": 0.2753, "step": 2119 }, { "epoch": 0.21971188724220125, "grad_norm": 0.48955854773521423, "learning_rate": 3.634368910594846e-05, "loss": 0.2518, "step": 2120 }, { "epoch": 0.21981552492486267, "grad_norm": 0.5582018494606018, "learning_rate": 3.633981867064476e-05, "loss": 0.2871, "step": 2121 }, { "epoch": 0.2199191626075241, "grad_norm": 0.4639434218406677, "learning_rate": 3.633594639419571e-05, "loss": 0.2199, "step": 2122 }, { "epoch": 0.2200228002901855, "grad_norm": 0.550144612789154, "learning_rate": 3.6332072277037596e-05, "loss": 0.3005, "step": 2123 }, { "epoch": 0.22012643797284692, "grad_norm": 0.47822409868240356, "learning_rate": 3.632819631960697e-05, "loss": 0.2721, "step": 2124 }, { "epoch": 0.22023007565550834, "grad_norm": 0.43804317712783813, "learning_rate": 3.6324318522340576e-05, "loss": 0.2284, "step": 2125 }, { "epoch": 0.22033371333816976, "grad_norm": 0.4789344072341919, "learning_rate": 3.632043888567534e-05, "loss": 0.236, "step": 2126 }, { "epoch": 0.22043735102083117, "grad_norm": 0.42324569821357727, "learning_rate": 3.631655741004842e-05, "loss": 0.212, "step": 2127 }, { "epoch": 0.2205409887034926, "grad_norm": 0.4153405427932739, "learning_rate": 3.631267409589717e-05, "loss": 0.1964, "step": 2128 }, { "epoch": 0.220644626386154, "grad_norm": 0.44336336851119995, "learning_rate": 3.6308788943659174e-05, "loss": 0.2468, "step": 2129 }, { "epoch": 0.22074826406881543, "grad_norm": 0.4833187460899353, "learning_rate": 3.6304901953772185e-05, "loss": 0.2162, "step": 2130 }, { "epoch": 0.22085190175147684, "grad_norm": 0.5347158312797546, "learning_rate": 3.630101312667419e-05, "loss": 0.2594, "step": 2131 }, { "epoch": 0.22095553943413826, "grad_norm": 0.520388126373291, "learning_rate": 3.629712246280338e-05, "loss": 0.2475, "step": 2132 }, { "epoch": 0.22105917711679968, "grad_norm": 0.4484923779964447, "learning_rate": 3.6293229962598144e-05, "loss": 0.2394, "step": 2133 }, { "epoch": 0.2211628147994611, "grad_norm": 0.494242399930954, "learning_rate": 3.6289335626497085e-05, "loss": 0.2432, "step": 2134 }, { "epoch": 0.2212664524821225, "grad_norm": 0.49932751059532166, "learning_rate": 3.628543945493901e-05, "loss": 0.2355, "step": 2135 }, { "epoch": 0.22137009016478393, "grad_norm": 0.5083844065666199, "learning_rate": 3.628154144836293e-05, "loss": 0.2473, "step": 2136 }, { "epoch": 0.22147372784744532, "grad_norm": 0.4479765295982361, "learning_rate": 3.627764160720807e-05, "loss": 0.2316, "step": 2137 }, { "epoch": 0.22157736553010673, "grad_norm": 0.4882799983024597, "learning_rate": 3.627373993191386e-05, "loss": 0.2514, "step": 2138 }, { "epoch": 0.22168100321276815, "grad_norm": 0.5669364929199219, "learning_rate": 3.6269836422919933e-05, "loss": 0.2423, "step": 2139 }, { "epoch": 0.22178464089542957, "grad_norm": 0.5127422213554382, "learning_rate": 3.6265931080666125e-05, "loss": 0.2577, "step": 2140 }, { "epoch": 0.22188827857809099, "grad_norm": 0.5250160098075867, "learning_rate": 3.626202390559249e-05, "loss": 0.263, "step": 2141 }, { "epoch": 0.2219919162607524, "grad_norm": 0.4941604435443878, "learning_rate": 3.625811489813929e-05, "loss": 0.2477, "step": 2142 }, { "epoch": 0.22209555394341382, "grad_norm": 0.4347946345806122, "learning_rate": 3.6254204058746966e-05, "loss": 0.2361, "step": 2143 }, { "epoch": 0.22219919162607524, "grad_norm": 0.539672315120697, "learning_rate": 3.62502913878562e-05, "loss": 0.2687, "step": 2144 }, { "epoch": 0.22230282930873665, "grad_norm": 0.4801972508430481, "learning_rate": 3.624637688590787e-05, "loss": 0.2288, "step": 2145 }, { "epoch": 0.22240646699139807, "grad_norm": 0.4568727910518646, "learning_rate": 3.624246055334304e-05, "loss": 0.2154, "step": 2146 }, { "epoch": 0.2225101046740595, "grad_norm": 0.49095943570137024, "learning_rate": 3.6238542390603006e-05, "loss": 0.2024, "step": 2147 }, { "epoch": 0.2226137423567209, "grad_norm": 0.49304577708244324, "learning_rate": 3.623462239812925e-05, "loss": 0.2166, "step": 2148 }, { "epoch": 0.22271738003938232, "grad_norm": 0.5479660630226135, "learning_rate": 3.623070057636349e-05, "loss": 0.251, "step": 2149 }, { "epoch": 0.22282101772204374, "grad_norm": 0.4642309248447418, "learning_rate": 3.6226776925747615e-05, "loss": 0.24, "step": 2150 }, { "epoch": 0.22292465540470516, "grad_norm": 0.44070860743522644, "learning_rate": 3.622285144672375e-05, "loss": 0.2082, "step": 2151 }, { "epoch": 0.22302829308736657, "grad_norm": 0.519973635673523, "learning_rate": 3.62189241397342e-05, "loss": 0.268, "step": 2152 }, { "epoch": 0.223131930770028, "grad_norm": 0.5418823957443237, "learning_rate": 3.621499500522149e-05, "loss": 0.2498, "step": 2153 }, { "epoch": 0.2232355684526894, "grad_norm": 0.5058969259262085, "learning_rate": 3.621106404362834e-05, "loss": 0.2339, "step": 2154 }, { "epoch": 0.22333920613535083, "grad_norm": 0.4511276185512543, "learning_rate": 3.6207131255397705e-05, "loss": 0.1953, "step": 2155 }, { "epoch": 0.22344284381801222, "grad_norm": 0.5690330862998962, "learning_rate": 3.62031966409727e-05, "loss": 0.2619, "step": 2156 }, { "epoch": 0.22354648150067363, "grad_norm": 0.46670278906822205, "learning_rate": 3.6199260200796704e-05, "loss": 0.2208, "step": 2157 }, { "epoch": 0.22365011918333505, "grad_norm": 0.5800625085830688, "learning_rate": 3.619532193531324e-05, "loss": 0.2527, "step": 2158 }, { "epoch": 0.22375375686599647, "grad_norm": 0.5150881409645081, "learning_rate": 3.619138184496608e-05, "loss": 0.2255, "step": 2159 }, { "epoch": 0.22385739454865788, "grad_norm": 0.5035178661346436, "learning_rate": 3.618743993019919e-05, "loss": 0.2525, "step": 2160 }, { "epoch": 0.2239610322313193, "grad_norm": 0.5059001445770264, "learning_rate": 3.618349619145672e-05, "loss": 0.2804, "step": 2161 }, { "epoch": 0.22406466991398072, "grad_norm": 0.43283551931381226, "learning_rate": 3.6179550629183065e-05, "loss": 0.2177, "step": 2162 }, { "epoch": 0.22416830759664214, "grad_norm": 0.46817097067832947, "learning_rate": 3.617560324382279e-05, "loss": 0.233, "step": 2163 }, { "epoch": 0.22427194527930355, "grad_norm": 0.506182074546814, "learning_rate": 3.61716540358207e-05, "loss": 0.2184, "step": 2164 }, { "epoch": 0.22437558296196497, "grad_norm": 0.3902835249900818, "learning_rate": 3.6167703005621755e-05, "loss": 0.1976, "step": 2165 }, { "epoch": 0.2244792206446264, "grad_norm": 0.4224104881286621, "learning_rate": 3.6163750153671175e-05, "loss": 0.198, "step": 2166 }, { "epoch": 0.2245828583272878, "grad_norm": 0.5203019380569458, "learning_rate": 3.615979548041436e-05, "loss": 0.2684, "step": 2167 }, { "epoch": 0.22468649600994922, "grad_norm": 0.41954994201660156, "learning_rate": 3.615583898629691e-05, "loss": 0.1831, "step": 2168 }, { "epoch": 0.22479013369261064, "grad_norm": 0.4721447825431824, "learning_rate": 3.615188067176464e-05, "loss": 0.2176, "step": 2169 }, { "epoch": 0.22489377137527206, "grad_norm": 0.44744497537612915, "learning_rate": 3.6147920537263554e-05, "loss": 0.2144, "step": 2170 }, { "epoch": 0.22499740905793347, "grad_norm": 0.5009974241256714, "learning_rate": 3.6143958583239894e-05, "loss": 0.2345, "step": 2171 }, { "epoch": 0.2251010467405949, "grad_norm": 0.48787763714790344, "learning_rate": 3.6139994810140075e-05, "loss": 0.2087, "step": 2172 }, { "epoch": 0.2252046844232563, "grad_norm": 0.5042424201965332, "learning_rate": 3.6136029218410725e-05, "loss": 0.2421, "step": 2173 }, { "epoch": 0.22530832210591772, "grad_norm": 0.4651280641555786, "learning_rate": 3.6132061808498694e-05, "loss": 0.2095, "step": 2174 }, { "epoch": 0.2254119597885791, "grad_norm": 0.47553327679634094, "learning_rate": 3.6128092580851016e-05, "loss": 0.2376, "step": 2175 }, { "epoch": 0.22551559747124053, "grad_norm": 0.5473289489746094, "learning_rate": 3.612412153591493e-05, "loss": 0.2601, "step": 2176 }, { "epoch": 0.22561923515390195, "grad_norm": 0.5122581124305725, "learning_rate": 3.61201486741379e-05, "loss": 0.2436, "step": 2177 }, { "epoch": 0.22572287283656337, "grad_norm": 0.5065480470657349, "learning_rate": 3.6116173995967575e-05, "loss": 0.2909, "step": 2178 }, { "epoch": 0.22582651051922478, "grad_norm": 0.42552104592323303, "learning_rate": 3.611219750185182e-05, "loss": 0.2091, "step": 2179 }, { "epoch": 0.2259301482018862, "grad_norm": 0.49925532937049866, "learning_rate": 3.61082191922387e-05, "loss": 0.2441, "step": 2180 }, { "epoch": 0.22603378588454762, "grad_norm": 0.5123841166496277, "learning_rate": 3.610423906757648e-05, "loss": 0.2403, "step": 2181 }, { "epoch": 0.22613742356720903, "grad_norm": 0.5170781016349792, "learning_rate": 3.610025712831363e-05, "loss": 0.2609, "step": 2182 }, { "epoch": 0.22624106124987045, "grad_norm": 0.49091219902038574, "learning_rate": 3.609627337489884e-05, "loss": 0.2492, "step": 2183 }, { "epoch": 0.22634469893253187, "grad_norm": 0.5327954292297363, "learning_rate": 3.609228780778099e-05, "loss": 0.3047, "step": 2184 }, { "epoch": 0.22644833661519329, "grad_norm": 0.5308407545089722, "learning_rate": 3.6088300427409164e-05, "loss": 0.2458, "step": 2185 }, { "epoch": 0.2265519742978547, "grad_norm": 0.44684699177742004, "learning_rate": 3.608431123423265e-05, "loss": 0.1978, "step": 2186 }, { "epoch": 0.22665561198051612, "grad_norm": 0.4521043300628662, "learning_rate": 3.608032022870096e-05, "loss": 0.2171, "step": 2187 }, { "epoch": 0.22675924966317754, "grad_norm": 0.4902312755584717, "learning_rate": 3.607632741126378e-05, "loss": 0.224, "step": 2188 }, { "epoch": 0.22686288734583895, "grad_norm": 0.4611247479915619, "learning_rate": 3.607233278237102e-05, "loss": 0.2333, "step": 2189 }, { "epoch": 0.22696652502850037, "grad_norm": 0.5038001537322998, "learning_rate": 3.606833634247278e-05, "loss": 0.2534, "step": 2190 }, { "epoch": 0.2270701627111618, "grad_norm": 0.489808589220047, "learning_rate": 3.6064338092019386e-05, "loss": 0.246, "step": 2191 }, { "epoch": 0.2271738003938232, "grad_norm": 0.4640105962753296, "learning_rate": 3.6060338031461346e-05, "loss": 0.2347, "step": 2192 }, { "epoch": 0.22727743807648462, "grad_norm": 0.5169340372085571, "learning_rate": 3.605633616124938e-05, "loss": 0.2775, "step": 2193 }, { "epoch": 0.227381075759146, "grad_norm": 0.38409602642059326, "learning_rate": 3.605233248183442e-05, "loss": 0.1974, "step": 2194 }, { "epoch": 0.22748471344180743, "grad_norm": 0.48459893465042114, "learning_rate": 3.604832699366759e-05, "loss": 0.2115, "step": 2195 }, { "epoch": 0.22758835112446885, "grad_norm": 0.4470805525779724, "learning_rate": 3.604431969720022e-05, "loss": 0.2337, "step": 2196 }, { "epoch": 0.22769198880713026, "grad_norm": 0.4740588963031769, "learning_rate": 3.604031059288385e-05, "loss": 0.2527, "step": 2197 }, { "epoch": 0.22779562648979168, "grad_norm": 0.4325917363166809, "learning_rate": 3.603629968117021e-05, "loss": 0.2073, "step": 2198 }, { "epoch": 0.2278992641724531, "grad_norm": 0.49139603972435, "learning_rate": 3.603228696251126e-05, "loss": 0.2524, "step": 2199 }, { "epoch": 0.22800290185511451, "grad_norm": 0.39995265007019043, "learning_rate": 3.602827243735913e-05, "loss": 0.1868, "step": 2200 }, { "epoch": 0.22810653953777593, "grad_norm": 0.4172254204750061, "learning_rate": 3.6024256106166194e-05, "loss": 0.2258, "step": 2201 }, { "epoch": 0.22821017722043735, "grad_norm": 0.4221954643726349, "learning_rate": 3.602023796938497e-05, "loss": 0.2019, "step": 2202 }, { "epoch": 0.22831381490309877, "grad_norm": 0.4521850347518921, "learning_rate": 3.601621802746825e-05, "loss": 0.2284, "step": 2203 }, { "epoch": 0.22841745258576018, "grad_norm": 0.42566412687301636, "learning_rate": 3.601219628086897e-05, "loss": 0.1948, "step": 2204 }, { "epoch": 0.2285210902684216, "grad_norm": 0.4874782860279083, "learning_rate": 3.600817273004031e-05, "loss": 0.2305, "step": 2205 }, { "epoch": 0.22862472795108302, "grad_norm": 0.48723307251930237, "learning_rate": 3.600414737543563e-05, "loss": 0.2804, "step": 2206 }, { "epoch": 0.22872836563374443, "grad_norm": 0.5111587643623352, "learning_rate": 3.600012021750851e-05, "loss": 0.234, "step": 2207 }, { "epoch": 0.22883200331640585, "grad_norm": 0.42869290709495544, "learning_rate": 3.599609125671271e-05, "loss": 0.1748, "step": 2208 }, { "epoch": 0.22893564099906727, "grad_norm": 0.5120351314544678, "learning_rate": 3.599206049350222e-05, "loss": 0.2618, "step": 2209 }, { "epoch": 0.2290392786817287, "grad_norm": 0.4712057411670685, "learning_rate": 3.59880279283312e-05, "loss": 0.2117, "step": 2210 }, { "epoch": 0.2291429163643901, "grad_norm": 0.4707184433937073, "learning_rate": 3.5983993561654056e-05, "loss": 0.2593, "step": 2211 }, { "epoch": 0.22924655404705152, "grad_norm": 0.5554648041725159, "learning_rate": 3.597995739392536e-05, "loss": 0.2521, "step": 2212 }, { "epoch": 0.2293501917297129, "grad_norm": 0.5168809294700623, "learning_rate": 3.597591942559991e-05, "loss": 0.2678, "step": 2213 }, { "epoch": 0.22945382941237433, "grad_norm": 0.453225314617157, "learning_rate": 3.59718796571327e-05, "loss": 0.2176, "step": 2214 }, { "epoch": 0.22955746709503574, "grad_norm": 0.4564521610736847, "learning_rate": 3.596783808897891e-05, "loss": 0.1954, "step": 2215 }, { "epoch": 0.22966110477769716, "grad_norm": 0.4787885844707489, "learning_rate": 3.5963794721593954e-05, "loss": 0.2448, "step": 2216 }, { "epoch": 0.22976474246035858, "grad_norm": 0.461749404668808, "learning_rate": 3.595974955543341e-05, "loss": 0.2387, "step": 2217 }, { "epoch": 0.22986838014302, "grad_norm": 0.4586969017982483, "learning_rate": 3.595570259095311e-05, "loss": 0.24, "step": 2218 }, { "epoch": 0.2299720178256814, "grad_norm": 0.4212735891342163, "learning_rate": 3.595165382860905e-05, "loss": 0.2298, "step": 2219 }, { "epoch": 0.23007565550834283, "grad_norm": 0.4474669396877289, "learning_rate": 3.594760326885742e-05, "loss": 0.1942, "step": 2220 }, { "epoch": 0.23017929319100425, "grad_norm": 0.467050701379776, "learning_rate": 3.594355091215465e-05, "loss": 0.2101, "step": 2221 }, { "epoch": 0.23028293087366566, "grad_norm": 0.511583685874939, "learning_rate": 3.593949675895735e-05, "loss": 0.2294, "step": 2222 }, { "epoch": 0.23038656855632708, "grad_norm": 0.48996490240097046, "learning_rate": 3.5935440809722336e-05, "loss": 0.2568, "step": 2223 }, { "epoch": 0.2304902062389885, "grad_norm": 0.43239009380340576, "learning_rate": 3.5931383064906617e-05, "loss": 0.199, "step": 2224 }, { "epoch": 0.23059384392164992, "grad_norm": 0.565646767616272, "learning_rate": 3.5927323524967426e-05, "loss": 0.2698, "step": 2225 }, { "epoch": 0.23069748160431133, "grad_norm": 0.4224366843700409, "learning_rate": 3.5923262190362176e-05, "loss": 0.2195, "step": 2226 }, { "epoch": 0.23080111928697275, "grad_norm": 0.44575369358062744, "learning_rate": 3.5919199061548494e-05, "loss": 0.2244, "step": 2227 }, { "epoch": 0.23090475696963417, "grad_norm": 0.39572209119796753, "learning_rate": 3.591513413898421e-05, "loss": 0.2164, "step": 2228 }, { "epoch": 0.23100839465229558, "grad_norm": 0.5055858492851257, "learning_rate": 3.591106742312736e-05, "loss": 0.2646, "step": 2229 }, { "epoch": 0.231112032334957, "grad_norm": 0.4088776707649231, "learning_rate": 3.590699891443616e-05, "loss": 0.1873, "step": 2230 }, { "epoch": 0.23121567001761842, "grad_norm": 0.4450381100177765, "learning_rate": 3.590292861336905e-05, "loss": 0.2417, "step": 2231 }, { "epoch": 0.2313193077002798, "grad_norm": 0.4661766588687897, "learning_rate": 3.589885652038466e-05, "loss": 0.2358, "step": 2232 }, { "epoch": 0.23142294538294123, "grad_norm": 0.5066637992858887, "learning_rate": 3.5894782635941845e-05, "loss": 0.2599, "step": 2233 }, { "epoch": 0.23152658306560264, "grad_norm": 0.47971197962760925, "learning_rate": 3.5890706960499626e-05, "loss": 0.2382, "step": 2234 }, { "epoch": 0.23163022074826406, "grad_norm": 0.5027540326118469, "learning_rate": 3.588662949451724e-05, "loss": 0.2304, "step": 2235 }, { "epoch": 0.23173385843092548, "grad_norm": 0.5136678814888, "learning_rate": 3.588255023845415e-05, "loss": 0.2408, "step": 2236 }, { "epoch": 0.2318374961135869, "grad_norm": 0.44080325961112976, "learning_rate": 3.587846919276999e-05, "loss": 0.239, "step": 2237 }, { "epoch": 0.2319411337962483, "grad_norm": 0.4627954959869385, "learning_rate": 3.58743863579246e-05, "loss": 0.2559, "step": 2238 }, { "epoch": 0.23204477147890973, "grad_norm": 0.46266400814056396, "learning_rate": 3.587030173437803e-05, "loss": 0.2562, "step": 2239 }, { "epoch": 0.23214840916157115, "grad_norm": 0.4966365694999695, "learning_rate": 3.586621532259053e-05, "loss": 0.2602, "step": 2240 }, { "epoch": 0.23225204684423256, "grad_norm": 0.3625810146331787, "learning_rate": 3.586212712302256e-05, "loss": 0.1887, "step": 2241 }, { "epoch": 0.23235568452689398, "grad_norm": 0.47931361198425293, "learning_rate": 3.5858037136134765e-05, "loss": 0.1949, "step": 2242 }, { "epoch": 0.2324593222095554, "grad_norm": 0.4693504571914673, "learning_rate": 3.585394536238799e-05, "loss": 0.2414, "step": 2243 }, { "epoch": 0.23256295989221681, "grad_norm": 0.46216142177581787, "learning_rate": 3.58498518022433e-05, "loss": 0.219, "step": 2244 }, { "epoch": 0.23266659757487823, "grad_norm": 0.5020700693130493, "learning_rate": 3.5845756456161945e-05, "loss": 0.245, "step": 2245 }, { "epoch": 0.23277023525753965, "grad_norm": 0.4642801284790039, "learning_rate": 3.584165932460539e-05, "loss": 0.2375, "step": 2246 }, { "epoch": 0.23287387294020107, "grad_norm": 0.48018375039100647, "learning_rate": 3.583756040803529e-05, "loss": 0.2736, "step": 2247 }, { "epoch": 0.23297751062286248, "grad_norm": 0.5564702153205872, "learning_rate": 3.5833459706913494e-05, "loss": 0.246, "step": 2248 }, { "epoch": 0.2330811483055239, "grad_norm": 0.45636841654777527, "learning_rate": 3.582935722170208e-05, "loss": 0.2145, "step": 2249 }, { "epoch": 0.23318478598818532, "grad_norm": 0.4764099717140198, "learning_rate": 3.5825252952863296e-05, "loss": 0.2133, "step": 2250 }, { "epoch": 0.2332884236708467, "grad_norm": 0.44867387413978577, "learning_rate": 3.5821146900859615e-05, "loss": 0.2396, "step": 2251 }, { "epoch": 0.23339206135350812, "grad_norm": 0.43731316924095154, "learning_rate": 3.58170390661537e-05, "loss": 0.1834, "step": 2252 }, { "epoch": 0.23349569903616954, "grad_norm": 0.4410570561885834, "learning_rate": 3.58129294492084e-05, "loss": 0.2443, "step": 2253 }, { "epoch": 0.23359933671883096, "grad_norm": 0.4620094299316406, "learning_rate": 3.580881805048679e-05, "loss": 0.2463, "step": 2254 }, { "epoch": 0.23370297440149237, "grad_norm": 0.49641790986061096, "learning_rate": 3.580470487045215e-05, "loss": 0.209, "step": 2255 }, { "epoch": 0.2338066120841538, "grad_norm": 0.5519564151763916, "learning_rate": 3.580058990956793e-05, "loss": 0.2769, "step": 2256 }, { "epoch": 0.2339102497668152, "grad_norm": 0.46755626797676086, "learning_rate": 3.57964731682978e-05, "loss": 0.2131, "step": 2257 }, { "epoch": 0.23401388744947663, "grad_norm": 0.482364296913147, "learning_rate": 3.579235464710563e-05, "loss": 0.2219, "step": 2258 }, { "epoch": 0.23411752513213804, "grad_norm": 0.44284772872924805, "learning_rate": 3.578823434645548e-05, "loss": 0.1814, "step": 2259 }, { "epoch": 0.23422116281479946, "grad_norm": 0.5211065411567688, "learning_rate": 3.578411226681164e-05, "loss": 0.2518, "step": 2260 }, { "epoch": 0.23432480049746088, "grad_norm": 0.4506303369998932, "learning_rate": 3.577998840863856e-05, "loss": 0.2337, "step": 2261 }, { "epoch": 0.2344284381801223, "grad_norm": 0.4736618101596832, "learning_rate": 3.5775862772400915e-05, "loss": 0.2252, "step": 2262 }, { "epoch": 0.2345320758627837, "grad_norm": 0.5009260177612305, "learning_rate": 3.577173535856358e-05, "loss": 0.2371, "step": 2263 }, { "epoch": 0.23463571354544513, "grad_norm": 0.3928007185459137, "learning_rate": 3.576760616759162e-05, "loss": 0.1764, "step": 2264 }, { "epoch": 0.23473935122810655, "grad_norm": 0.39292821288108826, "learning_rate": 3.576347519995031e-05, "loss": 0.1968, "step": 2265 }, { "epoch": 0.23484298891076796, "grad_norm": 0.45694684982299805, "learning_rate": 3.575934245610512e-05, "loss": 0.2036, "step": 2266 }, { "epoch": 0.23494662659342938, "grad_norm": 0.5031452775001526, "learning_rate": 3.575520793652172e-05, "loss": 0.2422, "step": 2267 }, { "epoch": 0.2350502642760908, "grad_norm": 0.4028596878051758, "learning_rate": 3.575107164166598e-05, "loss": 0.1861, "step": 2268 }, { "epoch": 0.23515390195875222, "grad_norm": 0.4759109914302826, "learning_rate": 3.574693357200398e-05, "loss": 0.2187, "step": 2269 }, { "epoch": 0.2352575396414136, "grad_norm": 0.4903968274593353, "learning_rate": 3.574279372800197e-05, "loss": 0.229, "step": 2270 }, { "epoch": 0.23536117732407502, "grad_norm": 0.5485450625419617, "learning_rate": 3.5738652110126446e-05, "loss": 0.2908, "step": 2271 }, { "epoch": 0.23546481500673644, "grad_norm": 0.43799784779548645, "learning_rate": 3.573450871884407e-05, "loss": 0.2087, "step": 2272 }, { "epoch": 0.23556845268939786, "grad_norm": 0.43799030780792236, "learning_rate": 3.57303635546217e-05, "loss": 0.195, "step": 2273 }, { "epoch": 0.23567209037205927, "grad_norm": 0.4460390508174896, "learning_rate": 3.572621661792643e-05, "loss": 0.2226, "step": 2274 }, { "epoch": 0.2357757280547207, "grad_norm": 0.5236432552337646, "learning_rate": 3.572206790922551e-05, "loss": 0.2997, "step": 2275 }, { "epoch": 0.2358793657373821, "grad_norm": 0.42137596011161804, "learning_rate": 3.571791742898642e-05, "loss": 0.2122, "step": 2276 }, { "epoch": 0.23598300342004352, "grad_norm": 0.5710049867630005, "learning_rate": 3.571376517767683e-05, "loss": 0.2646, "step": 2277 }, { "epoch": 0.23608664110270494, "grad_norm": 0.49377962946891785, "learning_rate": 3.5709611155764613e-05, "loss": 0.2466, "step": 2278 }, { "epoch": 0.23619027878536636, "grad_norm": 0.4961196184158325, "learning_rate": 3.570545536371783e-05, "loss": 0.2743, "step": 2279 }, { "epoch": 0.23629391646802778, "grad_norm": 0.46566084027290344, "learning_rate": 3.570129780200474e-05, "loss": 0.194, "step": 2280 }, { "epoch": 0.2363975541506892, "grad_norm": 0.5308505892753601, "learning_rate": 3.569713847109383e-05, "loss": 0.2664, "step": 2281 }, { "epoch": 0.2365011918333506, "grad_norm": 0.4228411614894867, "learning_rate": 3.569297737145376e-05, "loss": 0.1951, "step": 2282 }, { "epoch": 0.23660482951601203, "grad_norm": 0.44380027055740356, "learning_rate": 3.568881450355339e-05, "loss": 0.2404, "step": 2283 }, { "epoch": 0.23670846719867344, "grad_norm": 0.4705795347690582, "learning_rate": 3.56846498678618e-05, "loss": 0.2116, "step": 2284 }, { "epoch": 0.23681210488133486, "grad_norm": 0.4383763372898102, "learning_rate": 3.568048346484824e-05, "loss": 0.2554, "step": 2285 }, { "epoch": 0.23691574256399628, "grad_norm": 0.47628912329673767, "learning_rate": 3.5676315294982175e-05, "loss": 0.2251, "step": 2286 }, { "epoch": 0.2370193802466577, "grad_norm": 0.4527871608734131, "learning_rate": 3.567214535873327e-05, "loss": 0.2059, "step": 2287 }, { "epoch": 0.2371230179293191, "grad_norm": 0.43125396966934204, "learning_rate": 3.56679736565714e-05, "loss": 0.1852, "step": 2288 }, { "epoch": 0.2372266556119805, "grad_norm": 0.5477954149246216, "learning_rate": 3.56638001889666e-05, "loss": 0.2607, "step": 2289 }, { "epoch": 0.23733029329464192, "grad_norm": 0.483190655708313, "learning_rate": 3.5659624956389155e-05, "loss": 0.2338, "step": 2290 }, { "epoch": 0.23743393097730334, "grad_norm": 0.4622649848461151, "learning_rate": 3.5655447959309515e-05, "loss": 0.2365, "step": 2291 }, { "epoch": 0.23753756865996475, "grad_norm": 0.506360650062561, "learning_rate": 3.5651269198198334e-05, "loss": 0.2201, "step": 2292 }, { "epoch": 0.23764120634262617, "grad_norm": 0.6054961681365967, "learning_rate": 3.564708867352646e-05, "loss": 0.2303, "step": 2293 }, { "epoch": 0.2377448440252876, "grad_norm": 0.4164169728755951, "learning_rate": 3.564290638576497e-05, "loss": 0.1778, "step": 2294 }, { "epoch": 0.237848481707949, "grad_norm": 0.4977959990501404, "learning_rate": 3.56387223353851e-05, "loss": 0.236, "step": 2295 }, { "epoch": 0.23795211939061042, "grad_norm": 0.3724239468574524, "learning_rate": 3.563453652285831e-05, "loss": 0.1979, "step": 2296 }, { "epoch": 0.23805575707327184, "grad_norm": 0.42812469601631165, "learning_rate": 3.563034894865625e-05, "loss": 0.203, "step": 2297 }, { "epoch": 0.23815939475593326, "grad_norm": 0.49317336082458496, "learning_rate": 3.5626159613250765e-05, "loss": 0.2338, "step": 2298 }, { "epoch": 0.23826303243859467, "grad_norm": 0.49684178829193115, "learning_rate": 3.562196851711391e-05, "loss": 0.236, "step": 2299 }, { "epoch": 0.2383666701212561, "grad_norm": 0.4792449176311493, "learning_rate": 3.561777566071793e-05, "loss": 0.2391, "step": 2300 }, { "epoch": 0.2384703078039175, "grad_norm": 0.47309496998786926, "learning_rate": 3.5613581044535266e-05, "loss": 0.2172, "step": 2301 }, { "epoch": 0.23857394548657893, "grad_norm": 0.618942141532898, "learning_rate": 3.5609384669038556e-05, "loss": 0.2557, "step": 2302 }, { "epoch": 0.23867758316924034, "grad_norm": 0.3872990310192108, "learning_rate": 3.5605186534700645e-05, "loss": 0.177, "step": 2303 }, { "epoch": 0.23878122085190176, "grad_norm": 0.4337783753871918, "learning_rate": 3.560098664199458e-05, "loss": 0.1974, "step": 2304 }, { "epoch": 0.23888485853456318, "grad_norm": 0.4320196807384491, "learning_rate": 3.5596784991393586e-05, "loss": 0.1936, "step": 2305 }, { "epoch": 0.2389884962172246, "grad_norm": 0.49404609203338623, "learning_rate": 3.559258158337112e-05, "loss": 0.201, "step": 2306 }, { "epoch": 0.239092133899886, "grad_norm": 0.5494398474693298, "learning_rate": 3.558837641840078e-05, "loss": 0.2736, "step": 2307 }, { "epoch": 0.2391957715825474, "grad_norm": 0.5490972399711609, "learning_rate": 3.558416949695643e-05, "loss": 0.2663, "step": 2308 }, { "epoch": 0.23929940926520882, "grad_norm": 0.45828548073768616, "learning_rate": 3.5579960819512087e-05, "loss": 0.2081, "step": 2309 }, { "epoch": 0.23940304694787023, "grad_norm": 0.402240127325058, "learning_rate": 3.557575038654198e-05, "loss": 0.1676, "step": 2310 }, { "epoch": 0.23950668463053165, "grad_norm": 0.5030764937400818, "learning_rate": 3.557153819852052e-05, "loss": 0.2264, "step": 2311 }, { "epoch": 0.23961032231319307, "grad_norm": 0.5498928427696228, "learning_rate": 3.556732425592235e-05, "loss": 0.2523, "step": 2312 }, { "epoch": 0.2397139599958545, "grad_norm": 0.45575132966041565, "learning_rate": 3.5563108559222285e-05, "loss": 0.2067, "step": 2313 }, { "epoch": 0.2398175976785159, "grad_norm": 0.5032340288162231, "learning_rate": 3.555889110889534e-05, "loss": 0.199, "step": 2314 }, { "epoch": 0.23992123536117732, "grad_norm": 0.4415561258792877, "learning_rate": 3.5554671905416734e-05, "loss": 0.2416, "step": 2315 }, { "epoch": 0.24002487304383874, "grad_norm": 0.5081425905227661, "learning_rate": 3.555045094926187e-05, "loss": 0.2588, "step": 2316 }, { "epoch": 0.24012851072650016, "grad_norm": 0.49607375264167786, "learning_rate": 3.5546228240906374e-05, "loss": 0.2602, "step": 2317 }, { "epoch": 0.24023214840916157, "grad_norm": 0.46513596177101135, "learning_rate": 3.554200378082604e-05, "loss": 0.2349, "step": 2318 }, { "epoch": 0.240335786091823, "grad_norm": 0.5502655506134033, "learning_rate": 3.553777756949689e-05, "loss": 0.2683, "step": 2319 }, { "epoch": 0.2404394237744844, "grad_norm": 0.4852595329284668, "learning_rate": 3.553354960739511e-05, "loss": 0.2198, "step": 2320 }, { "epoch": 0.24054306145714582, "grad_norm": 0.4226512908935547, "learning_rate": 3.552931989499711e-05, "loss": 0.2103, "step": 2321 }, { "epoch": 0.24064669913980724, "grad_norm": 0.435518354177475, "learning_rate": 3.552508843277949e-05, "loss": 0.2134, "step": 2322 }, { "epoch": 0.24075033682246866, "grad_norm": 0.4544883072376251, "learning_rate": 3.552085522121903e-05, "loss": 0.2135, "step": 2323 }, { "epoch": 0.24085397450513008, "grad_norm": 0.4653942883014679, "learning_rate": 3.5516620260792736e-05, "loss": 0.2045, "step": 2324 }, { "epoch": 0.2409576121877915, "grad_norm": 0.5308933854103088, "learning_rate": 3.551238355197779e-05, "loss": 0.2572, "step": 2325 }, { "epoch": 0.2410612498704529, "grad_norm": 0.4636653661727905, "learning_rate": 3.550814509525158e-05, "loss": 0.2315, "step": 2326 }, { "epoch": 0.2411648875531143, "grad_norm": 0.4664221704006195, "learning_rate": 3.550390489109169e-05, "loss": 0.242, "step": 2327 }, { "epoch": 0.24126852523577572, "grad_norm": 0.4433833360671997, "learning_rate": 3.54996629399759e-05, "loss": 0.2044, "step": 2328 }, { "epoch": 0.24137216291843713, "grad_norm": 0.47471415996551514, "learning_rate": 3.549541924238218e-05, "loss": 0.2377, "step": 2329 }, { "epoch": 0.24147580060109855, "grad_norm": 0.47291579842567444, "learning_rate": 3.549117379878872e-05, "loss": 0.2312, "step": 2330 }, { "epoch": 0.24157943828375997, "grad_norm": 0.5618043541908264, "learning_rate": 3.5486926609673856e-05, "loss": 0.2786, "step": 2331 }, { "epoch": 0.24168307596642138, "grad_norm": 0.5181592106819153, "learning_rate": 3.54826776755162e-05, "loss": 0.224, "step": 2332 }, { "epoch": 0.2417867136490828, "grad_norm": 0.4476979672908783, "learning_rate": 3.547842699679447e-05, "loss": 0.227, "step": 2333 }, { "epoch": 0.24189035133174422, "grad_norm": 0.49343031644821167, "learning_rate": 3.5474174573987664e-05, "loss": 0.236, "step": 2334 }, { "epoch": 0.24199398901440564, "grad_norm": 0.5434033274650574, "learning_rate": 3.546992040757492e-05, "loss": 0.2521, "step": 2335 }, { "epoch": 0.24209762669706705, "grad_norm": 0.5006096959114075, "learning_rate": 3.546566449803558e-05, "loss": 0.2538, "step": 2336 }, { "epoch": 0.24220126437972847, "grad_norm": 0.48902684450149536, "learning_rate": 3.5461406845849224e-05, "loss": 0.2282, "step": 2337 }, { "epoch": 0.2423049020623899, "grad_norm": 0.5164515972137451, "learning_rate": 3.545714745149557e-05, "loss": 0.2763, "step": 2338 }, { "epoch": 0.2424085397450513, "grad_norm": 0.5709071755409241, "learning_rate": 3.5452886315454574e-05, "loss": 0.2716, "step": 2339 }, { "epoch": 0.24251217742771272, "grad_norm": 0.5185917019844055, "learning_rate": 3.5448623438206364e-05, "loss": 0.2517, "step": 2340 }, { "epoch": 0.24261581511037414, "grad_norm": 0.48523247241973877, "learning_rate": 3.544435882023129e-05, "loss": 0.2657, "step": 2341 }, { "epoch": 0.24271945279303556, "grad_norm": 0.4242948889732361, "learning_rate": 3.544009246200986e-05, "loss": 0.2054, "step": 2342 }, { "epoch": 0.24282309047569697, "grad_norm": 0.47015151381492615, "learning_rate": 3.543582436402283e-05, "loss": 0.2423, "step": 2343 }, { "epoch": 0.2429267281583584, "grad_norm": 0.4143276810646057, "learning_rate": 3.543155452675109e-05, "loss": 0.1662, "step": 2344 }, { "epoch": 0.2430303658410198, "grad_norm": 0.4106583893299103, "learning_rate": 3.542728295067578e-05, "loss": 0.2155, "step": 2345 }, { "epoch": 0.2431340035236812, "grad_norm": 0.4531787037849426, "learning_rate": 3.542300963627821e-05, "loss": 0.2699, "step": 2346 }, { "epoch": 0.24323764120634261, "grad_norm": 0.39408352971076965, "learning_rate": 3.541873458403989e-05, "loss": 0.1848, "step": 2347 }, { "epoch": 0.24334127888900403, "grad_norm": 0.46209919452667236, "learning_rate": 3.541445779444252e-05, "loss": 0.2232, "step": 2348 }, { "epoch": 0.24344491657166545, "grad_norm": 0.46624454855918884, "learning_rate": 3.541017926796802e-05, "loss": 0.2159, "step": 2349 }, { "epoch": 0.24354855425432687, "grad_norm": 0.5353434681892395, "learning_rate": 3.540589900509847e-05, "loss": 0.2553, "step": 2350 }, { "epoch": 0.24365219193698828, "grad_norm": 0.4967138469219208, "learning_rate": 3.540161700631617e-05, "loss": 0.2271, "step": 2351 }, { "epoch": 0.2437558296196497, "grad_norm": 0.5033750534057617, "learning_rate": 3.5397333272103606e-05, "loss": 0.2315, "step": 2352 }, { "epoch": 0.24385946730231112, "grad_norm": 0.5195525884628296, "learning_rate": 3.5393047802943466e-05, "loss": 0.2609, "step": 2353 }, { "epoch": 0.24396310498497253, "grad_norm": 0.48657095432281494, "learning_rate": 3.538876059931862e-05, "loss": 0.2344, "step": 2354 }, { "epoch": 0.24406674266763395, "grad_norm": 0.45042067766189575, "learning_rate": 3.538447166171216e-05, "loss": 0.252, "step": 2355 }, { "epoch": 0.24417038035029537, "grad_norm": 0.4620642066001892, "learning_rate": 3.538018099060735e-05, "loss": 0.2239, "step": 2356 }, { "epoch": 0.24427401803295679, "grad_norm": 0.5294321179389954, "learning_rate": 3.5375888586487654e-05, "loss": 0.233, "step": 2357 }, { "epoch": 0.2443776557156182, "grad_norm": 0.4590419828891754, "learning_rate": 3.537159444983673e-05, "loss": 0.2632, "step": 2358 }, { "epoch": 0.24448129339827962, "grad_norm": 0.4740966260433197, "learning_rate": 3.536729858113845e-05, "loss": 0.2107, "step": 2359 }, { "epoch": 0.24458493108094104, "grad_norm": 0.39860785007476807, "learning_rate": 3.5363000980876845e-05, "loss": 0.2037, "step": 2360 }, { "epoch": 0.24468856876360245, "grad_norm": 0.5298658609390259, "learning_rate": 3.535870164953617e-05, "loss": 0.2321, "step": 2361 }, { "epoch": 0.24479220644626387, "grad_norm": 0.5323640704154968, "learning_rate": 3.535440058760088e-05, "loss": 0.2316, "step": 2362 }, { "epoch": 0.2448958441289253, "grad_norm": 0.459436297416687, "learning_rate": 3.5350097795555595e-05, "loss": 0.2277, "step": 2363 }, { "epoch": 0.2449994818115867, "grad_norm": 0.5758830308914185, "learning_rate": 3.5345793273885146e-05, "loss": 0.2836, "step": 2364 }, { "epoch": 0.2451031194942481, "grad_norm": 0.48922109603881836, "learning_rate": 3.534148702307457e-05, "loss": 0.2193, "step": 2365 }, { "epoch": 0.2452067571769095, "grad_norm": 0.4887892007827759, "learning_rate": 3.533717904360909e-05, "loss": 0.2279, "step": 2366 }, { "epoch": 0.24531039485957093, "grad_norm": 0.3865070939064026, "learning_rate": 3.533286933597412e-05, "loss": 0.1826, "step": 2367 }, { "epoch": 0.24541403254223235, "grad_norm": 0.4107268750667572, "learning_rate": 3.532855790065526e-05, "loss": 0.1901, "step": 2368 }, { "epoch": 0.24551767022489376, "grad_norm": 0.5012355446815491, "learning_rate": 3.532424473813833e-05, "loss": 0.2448, "step": 2369 }, { "epoch": 0.24562130790755518, "grad_norm": 0.49892574548721313, "learning_rate": 3.5319929848909315e-05, "loss": 0.2523, "step": 2370 }, { "epoch": 0.2457249455902166, "grad_norm": 0.5325135588645935, "learning_rate": 3.531561323345442e-05, "loss": 0.2776, "step": 2371 }, { "epoch": 0.24582858327287802, "grad_norm": 0.544353723526001, "learning_rate": 3.531129489226005e-05, "loss": 0.2531, "step": 2372 }, { "epoch": 0.24593222095553943, "grad_norm": 0.4833967089653015, "learning_rate": 3.530697482581277e-05, "loss": 0.2313, "step": 2373 }, { "epoch": 0.24603585863820085, "grad_norm": 0.45267507433891296, "learning_rate": 3.5302653034599346e-05, "loss": 0.2006, "step": 2374 }, { "epoch": 0.24613949632086227, "grad_norm": 0.47396013140678406, "learning_rate": 3.5298329519106784e-05, "loss": 0.2584, "step": 2375 }, { "epoch": 0.24624313400352368, "grad_norm": 0.5356199741363525, "learning_rate": 3.529400427982222e-05, "loss": 0.2736, "step": 2376 }, { "epoch": 0.2463467716861851, "grad_norm": 0.5099332332611084, "learning_rate": 3.528967731723304e-05, "loss": 0.2403, "step": 2377 }, { "epoch": 0.24645040936884652, "grad_norm": 0.48083239793777466, "learning_rate": 3.528534863182678e-05, "loss": 0.2074, "step": 2378 }, { "epoch": 0.24655404705150794, "grad_norm": 0.46469470858573914, "learning_rate": 3.52810182240912e-05, "loss": 0.2522, "step": 2379 }, { "epoch": 0.24665768473416935, "grad_norm": 0.39701616764068604, "learning_rate": 3.527668609451424e-05, "loss": 0.1751, "step": 2380 }, { "epoch": 0.24676132241683077, "grad_norm": 0.4343932271003723, "learning_rate": 3.527235224358405e-05, "loss": 0.1808, "step": 2381 }, { "epoch": 0.2468649600994922, "grad_norm": 0.46541762351989746, "learning_rate": 3.526801667178894e-05, "loss": 0.2317, "step": 2382 }, { "epoch": 0.2469685977821536, "grad_norm": 0.4486386775970459, "learning_rate": 3.526367937961745e-05, "loss": 0.2143, "step": 2383 }, { "epoch": 0.247072235464815, "grad_norm": 0.5101872682571411, "learning_rate": 3.525934036755829e-05, "loss": 0.1991, "step": 2384 }, { "epoch": 0.2471758731474764, "grad_norm": 0.4728240966796875, "learning_rate": 3.525499963610038e-05, "loss": 0.1982, "step": 2385 }, { "epoch": 0.24727951083013783, "grad_norm": 0.47777464985847473, "learning_rate": 3.525065718573283e-05, "loss": 0.2096, "step": 2386 }, { "epoch": 0.24738314851279924, "grad_norm": 0.512395977973938, "learning_rate": 3.524631301694493e-05, "loss": 0.2312, "step": 2387 }, { "epoch": 0.24748678619546066, "grad_norm": 0.4622325301170349, "learning_rate": 3.524196713022619e-05, "loss": 0.2352, "step": 2388 }, { "epoch": 0.24759042387812208, "grad_norm": 0.5057352781295776, "learning_rate": 3.523761952606628e-05, "loss": 0.2622, "step": 2389 }, { "epoch": 0.2476940615607835, "grad_norm": 0.5191410183906555, "learning_rate": 3.523327020495509e-05, "loss": 0.2335, "step": 2390 }, { "epoch": 0.2477976992434449, "grad_norm": 0.5610035061836243, "learning_rate": 3.522891916738269e-05, "loss": 0.2573, "step": 2391 }, { "epoch": 0.24790133692610633, "grad_norm": 0.5406128168106079, "learning_rate": 3.5224566413839354e-05, "loss": 0.2566, "step": 2392 }, { "epoch": 0.24800497460876775, "grad_norm": 0.45537373423576355, "learning_rate": 3.522021194481555e-05, "loss": 0.2189, "step": 2393 }, { "epoch": 0.24810861229142916, "grad_norm": 0.4934905469417572, "learning_rate": 3.5215855760801916e-05, "loss": 0.21, "step": 2394 }, { "epoch": 0.24821224997409058, "grad_norm": 0.4511372745037079, "learning_rate": 3.521149786228931e-05, "loss": 0.2128, "step": 2395 }, { "epoch": 0.248315887656752, "grad_norm": 0.6283062100410461, "learning_rate": 3.5207138249768774e-05, "loss": 0.2992, "step": 2396 }, { "epoch": 0.24841952533941342, "grad_norm": 0.5505823493003845, "learning_rate": 3.520277692373154e-05, "loss": 0.2483, "step": 2397 }, { "epoch": 0.24852316302207483, "grad_norm": 0.504241943359375, "learning_rate": 3.519841388466903e-05, "loss": 0.2516, "step": 2398 }, { "epoch": 0.24862680070473625, "grad_norm": 0.5520890355110168, "learning_rate": 3.519404913307288e-05, "loss": 0.2548, "step": 2399 }, { "epoch": 0.24873043838739767, "grad_norm": 0.48844993114471436, "learning_rate": 3.518968266943488e-05, "loss": 0.2167, "step": 2400 }, { "epoch": 0.24883407607005908, "grad_norm": 0.419709712266922, "learning_rate": 3.5185314494247054e-05, "loss": 0.2273, "step": 2401 }, { "epoch": 0.2489377137527205, "grad_norm": 0.5344715714454651, "learning_rate": 3.51809446080016e-05, "loss": 0.241, "step": 2402 }, { "epoch": 0.2490413514353819, "grad_norm": 0.46909359097480774, "learning_rate": 3.517657301119091e-05, "loss": 0.2167, "step": 2403 }, { "epoch": 0.2491449891180433, "grad_norm": 0.4646020531654358, "learning_rate": 3.5172199704307556e-05, "loss": 0.2554, "step": 2404 }, { "epoch": 0.24924862680070473, "grad_norm": 0.5733885169029236, "learning_rate": 3.516782468784433e-05, "loss": 0.2596, "step": 2405 }, { "epoch": 0.24935226448336614, "grad_norm": 0.41124191880226135, "learning_rate": 3.51634479622942e-05, "loss": 0.1986, "step": 2406 }, { "epoch": 0.24945590216602756, "grad_norm": 0.40536609292030334, "learning_rate": 3.515906952815032e-05, "loss": 0.1833, "step": 2407 }, { "epoch": 0.24955953984868898, "grad_norm": 0.46964260935783386, "learning_rate": 3.5154689385906057e-05, "loss": 0.2116, "step": 2408 }, { "epoch": 0.2496631775313504, "grad_norm": 0.4841628968715668, "learning_rate": 3.515030753605495e-05, "loss": 0.2472, "step": 2409 }, { "epoch": 0.2497668152140118, "grad_norm": 0.431428462266922, "learning_rate": 3.514592397909073e-05, "loss": 0.2226, "step": 2410 }, { "epoch": 0.24987045289667323, "grad_norm": 0.46539610624313354, "learning_rate": 3.5141538715507365e-05, "loss": 0.239, "step": 2411 }, { "epoch": 0.24997409057933465, "grad_norm": 0.4847576320171356, "learning_rate": 3.5137151745798936e-05, "loss": 0.2361, "step": 2412 }, { "epoch": 0.25007772826199604, "grad_norm": 0.46512630581855774, "learning_rate": 3.513276307045979e-05, "loss": 0.2222, "step": 2413 }, { "epoch": 0.25018136594465745, "grad_norm": 0.5072996616363525, "learning_rate": 3.512837268998442e-05, "loss": 0.2377, "step": 2414 }, { "epoch": 0.25028500362731887, "grad_norm": 0.4271259903907776, "learning_rate": 3.512398060486753e-05, "loss": 0.2092, "step": 2415 }, { "epoch": 0.2503886413099803, "grad_norm": 0.43090930581092834, "learning_rate": 3.5119586815604024e-05, "loss": 0.1862, "step": 2416 }, { "epoch": 0.2504922789926417, "grad_norm": 0.49372291564941406, "learning_rate": 3.511519132268897e-05, "loss": 0.2342, "step": 2417 }, { "epoch": 0.2505959166753031, "grad_norm": 0.47086745500564575, "learning_rate": 3.511079412661766e-05, "loss": 0.2385, "step": 2418 }, { "epoch": 0.25069955435796454, "grad_norm": 0.4185890853404999, "learning_rate": 3.510639522788556e-05, "loss": 0.1988, "step": 2419 }, { "epoch": 0.25080319204062596, "grad_norm": 0.40863776206970215, "learning_rate": 3.510199462698832e-05, "loss": 0.2124, "step": 2420 }, { "epoch": 0.2509068297232874, "grad_norm": 0.4368896186351776, "learning_rate": 3.509759232442182e-05, "loss": 0.2064, "step": 2421 }, { "epoch": 0.2510104674059488, "grad_norm": 0.5333195924758911, "learning_rate": 3.509318832068207e-05, "loss": 0.2248, "step": 2422 }, { "epoch": 0.2511141050886102, "grad_norm": 0.4488769769668579, "learning_rate": 3.508878261626533e-05, "loss": 0.1975, "step": 2423 }, { "epoch": 0.2512177427712716, "grad_norm": 0.40054166316986084, "learning_rate": 3.508437521166802e-05, "loss": 0.1882, "step": 2424 }, { "epoch": 0.25132138045393304, "grad_norm": 0.4375198185443878, "learning_rate": 3.507996610738676e-05, "loss": 0.2201, "step": 2425 }, { "epoch": 0.25142501813659446, "grad_norm": 0.45398038625717163, "learning_rate": 3.507555530391836e-05, "loss": 0.2449, "step": 2426 }, { "epoch": 0.2515286558192559, "grad_norm": 0.48056045174598694, "learning_rate": 3.507114280175983e-05, "loss": 0.196, "step": 2427 }, { "epoch": 0.2516322935019173, "grad_norm": 0.3780093193054199, "learning_rate": 3.5066728601408345e-05, "loss": 0.1725, "step": 2428 }, { "epoch": 0.2517359311845787, "grad_norm": 0.4994482696056366, "learning_rate": 3.506231270336131e-05, "loss": 0.2284, "step": 2429 }, { "epoch": 0.2518395688672401, "grad_norm": 0.4925689995288849, "learning_rate": 3.50578951081163e-05, "loss": 0.2282, "step": 2430 }, { "epoch": 0.25194320654990154, "grad_norm": 0.4943445026874542, "learning_rate": 3.505347581617107e-05, "loss": 0.2013, "step": 2431 }, { "epoch": 0.25204684423256296, "grad_norm": 0.4667070508003235, "learning_rate": 3.504905482802358e-05, "loss": 0.2371, "step": 2432 }, { "epoch": 0.2521504819152244, "grad_norm": 0.45808538794517517, "learning_rate": 3.5044632144172e-05, "loss": 0.2211, "step": 2433 }, { "epoch": 0.2522541195978858, "grad_norm": 0.5020188093185425, "learning_rate": 3.5040207765114646e-05, "loss": 0.2668, "step": 2434 }, { "epoch": 0.2523577572805472, "grad_norm": 0.5777788162231445, "learning_rate": 3.503578169135007e-05, "loss": 0.2702, "step": 2435 }, { "epoch": 0.25246139496320863, "grad_norm": 0.4506339728832245, "learning_rate": 3.5031353923376985e-05, "loss": 0.2052, "step": 2436 }, { "epoch": 0.25256503264587005, "grad_norm": 0.5711771845817566, "learning_rate": 3.50269244616943e-05, "loss": 0.2459, "step": 2437 }, { "epoch": 0.25266867032853146, "grad_norm": 0.4558737277984619, "learning_rate": 3.502249330680114e-05, "loss": 0.2507, "step": 2438 }, { "epoch": 0.2527723080111929, "grad_norm": 0.45028817653656006, "learning_rate": 3.5018060459196774e-05, "loss": 0.2199, "step": 2439 }, { "epoch": 0.2528759456938543, "grad_norm": 0.46420130133628845, "learning_rate": 3.501362591938071e-05, "loss": 0.2087, "step": 2440 }, { "epoch": 0.2529795833765157, "grad_norm": 0.4835816025733948, "learning_rate": 3.500918968785261e-05, "loss": 0.2513, "step": 2441 }, { "epoch": 0.25308322105917713, "grad_norm": 0.5205420851707458, "learning_rate": 3.500475176511235e-05, "loss": 0.2436, "step": 2442 }, { "epoch": 0.25318685874183855, "grad_norm": 0.4250602722167969, "learning_rate": 3.500031215165999e-05, "loss": 0.1797, "step": 2443 }, { "epoch": 0.25329049642449997, "grad_norm": 0.5139288306236267, "learning_rate": 3.499587084799577e-05, "loss": 0.2537, "step": 2444 }, { "epoch": 0.2533941341071614, "grad_norm": 0.4984401762485504, "learning_rate": 3.499142785462014e-05, "loss": 0.2866, "step": 2445 }, { "epoch": 0.2534977717898228, "grad_norm": 0.5047465562820435, "learning_rate": 3.498698317203372e-05, "loss": 0.2601, "step": 2446 }, { "epoch": 0.2536014094724842, "grad_norm": 0.47029536962509155, "learning_rate": 3.498253680073733e-05, "loss": 0.2057, "step": 2447 }, { "epoch": 0.25370504715514564, "grad_norm": 0.458085834980011, "learning_rate": 3.497808874123199e-05, "loss": 0.2185, "step": 2448 }, { "epoch": 0.25380868483780705, "grad_norm": 0.507517397403717, "learning_rate": 3.497363899401889e-05, "loss": 0.2218, "step": 2449 }, { "epoch": 0.25391232252046847, "grad_norm": 0.3929824233055115, "learning_rate": 3.496918755959943e-05, "loss": 0.2094, "step": 2450 }, { "epoch": 0.25401596020312983, "grad_norm": 0.4605729579925537, "learning_rate": 3.496473443847519e-05, "loss": 0.2345, "step": 2451 }, { "epoch": 0.25411959788579125, "grad_norm": 0.46283668279647827, "learning_rate": 3.4960279631147926e-05, "loss": 0.25, "step": 2452 }, { "epoch": 0.25422323556845267, "grad_norm": 0.5951175093650818, "learning_rate": 3.4955823138119616e-05, "loss": 0.2615, "step": 2453 }, { "epoch": 0.2543268732511141, "grad_norm": 0.5442880392074585, "learning_rate": 3.4951364959892404e-05, "loss": 0.2556, "step": 2454 }, { "epoch": 0.2544305109337755, "grad_norm": 0.48734351992607117, "learning_rate": 3.494690509696863e-05, "loss": 0.2347, "step": 2455 }, { "epoch": 0.2545341486164369, "grad_norm": 0.4687855541706085, "learning_rate": 3.4942443549850825e-05, "loss": 0.2264, "step": 2456 }, { "epoch": 0.25463778629909833, "grad_norm": 0.4448159337043762, "learning_rate": 3.4937980319041704e-05, "loss": 0.2161, "step": 2457 }, { "epoch": 0.25474142398175975, "grad_norm": 0.5229969024658203, "learning_rate": 3.493351540504419e-05, "loss": 0.2545, "step": 2458 }, { "epoch": 0.25484506166442117, "grad_norm": 0.48294875025749207, "learning_rate": 3.492904880836137e-05, "loss": 0.2283, "step": 2459 }, { "epoch": 0.2549486993470826, "grad_norm": 0.4588148295879364, "learning_rate": 3.492458052949654e-05, "loss": 0.227, "step": 2460 }, { "epoch": 0.255052337029744, "grad_norm": 0.4651046097278595, "learning_rate": 3.492011056895318e-05, "loss": 0.2307, "step": 2461 }, { "epoch": 0.2551559747124054, "grad_norm": 0.44987449049949646, "learning_rate": 3.4915638927234945e-05, "loss": 0.1988, "step": 2462 }, { "epoch": 0.25525961239506684, "grad_norm": 0.49422404170036316, "learning_rate": 3.491116560484571e-05, "loss": 0.2618, "step": 2463 }, { "epoch": 0.25536325007772825, "grad_norm": 0.43810033798217773, "learning_rate": 3.490669060228951e-05, "loss": 0.209, "step": 2464 }, { "epoch": 0.25546688776038967, "grad_norm": 0.46789655089378357, "learning_rate": 3.4902213920070594e-05, "loss": 0.2543, "step": 2465 }, { "epoch": 0.2555705254430511, "grad_norm": 0.49316537380218506, "learning_rate": 3.489773555869337e-05, "loss": 0.2182, "step": 2466 }, { "epoch": 0.2556741631257125, "grad_norm": 0.4785871207714081, "learning_rate": 3.489325551866246e-05, "loss": 0.2429, "step": 2467 }, { "epoch": 0.2557778008083739, "grad_norm": 0.4753807485103607, "learning_rate": 3.488877380048268e-05, "loss": 0.2197, "step": 2468 }, { "epoch": 0.25588143849103534, "grad_norm": 0.5222952365875244, "learning_rate": 3.488429040465901e-05, "loss": 0.2271, "step": 2469 }, { "epoch": 0.25598507617369676, "grad_norm": 0.4613376557826996, "learning_rate": 3.4879805331696636e-05, "loss": 0.1977, "step": 2470 }, { "epoch": 0.2560887138563582, "grad_norm": 0.47917234897613525, "learning_rate": 3.487531858210093e-05, "loss": 0.249, "step": 2471 }, { "epoch": 0.2561923515390196, "grad_norm": 0.48481738567352295, "learning_rate": 3.487083015637745e-05, "loss": 0.2302, "step": 2472 }, { "epoch": 0.256295989221681, "grad_norm": 0.4428340196609497, "learning_rate": 3.486634005503194e-05, "loss": 0.2583, "step": 2473 }, { "epoch": 0.2563996269043424, "grad_norm": 0.49516594409942627, "learning_rate": 3.486184827857034e-05, "loss": 0.2256, "step": 2474 }, { "epoch": 0.25650326458700384, "grad_norm": 0.442982017993927, "learning_rate": 3.4857354827498785e-05, "loss": 0.224, "step": 2475 }, { "epoch": 0.25660690226966526, "grad_norm": 0.5688411593437195, "learning_rate": 3.485285970232359e-05, "loss": 0.2637, "step": 2476 }, { "epoch": 0.2567105399523267, "grad_norm": 0.6057034134864807, "learning_rate": 3.484836290355124e-05, "loss": 0.2628, "step": 2477 }, { "epoch": 0.2568141776349881, "grad_norm": 0.5497249960899353, "learning_rate": 3.484386443168845e-05, "loss": 0.2511, "step": 2478 }, { "epoch": 0.2569178153176495, "grad_norm": 0.5606237053871155, "learning_rate": 3.483936428724209e-05, "loss": 0.2256, "step": 2479 }, { "epoch": 0.25702145300031093, "grad_norm": 0.37174859642982483, "learning_rate": 3.483486247071923e-05, "loss": 0.1698, "step": 2480 }, { "epoch": 0.25712509068297235, "grad_norm": 0.4627086818218231, "learning_rate": 3.483035898262712e-05, "loss": 0.2211, "step": 2481 }, { "epoch": 0.25722872836563376, "grad_norm": 0.5031776428222656, "learning_rate": 3.482585382347323e-05, "loss": 0.2481, "step": 2482 }, { "epoch": 0.2573323660482952, "grad_norm": 0.44983866810798645, "learning_rate": 3.482134699376517e-05, "loss": 0.2178, "step": 2483 }, { "epoch": 0.2574360037309566, "grad_norm": 0.4599207937717438, "learning_rate": 3.481683849401076e-05, "loss": 0.2105, "step": 2484 }, { "epoch": 0.257539641413618, "grad_norm": 0.503778338432312, "learning_rate": 3.481232832471803e-05, "loss": 0.2501, "step": 2485 }, { "epoch": 0.25764327909627943, "grad_norm": 0.4835233688354492, "learning_rate": 3.480781648639518e-05, "loss": 0.2587, "step": 2486 }, { "epoch": 0.25774691677894085, "grad_norm": 0.46209481358528137, "learning_rate": 3.480330297955058e-05, "loss": 0.2162, "step": 2487 }, { "epoch": 0.25785055446160227, "grad_norm": 0.47634264826774597, "learning_rate": 3.479878780469281e-05, "loss": 0.2128, "step": 2488 }, { "epoch": 0.2579541921442636, "grad_norm": 0.5128913521766663, "learning_rate": 3.4794270962330636e-05, "loss": 0.2432, "step": 2489 }, { "epoch": 0.25805782982692504, "grad_norm": 0.5134532451629639, "learning_rate": 3.478975245297301e-05, "loss": 0.2343, "step": 2490 }, { "epoch": 0.25816146750958646, "grad_norm": 0.47444865107536316, "learning_rate": 3.478523227712907e-05, "loss": 0.2099, "step": 2491 }, { "epoch": 0.2582651051922479, "grad_norm": 0.4637894928455353, "learning_rate": 3.478071043530814e-05, "loss": 0.1962, "step": 2492 }, { "epoch": 0.2583687428749093, "grad_norm": 0.4853185713291168, "learning_rate": 3.477618692801973e-05, "loss": 0.2556, "step": 2493 }, { "epoch": 0.2584723805575707, "grad_norm": 0.44991862773895264, "learning_rate": 3.4771661755773554e-05, "loss": 0.2264, "step": 2494 }, { "epoch": 0.25857601824023213, "grad_norm": 0.5146219730377197, "learning_rate": 3.47671349190795e-05, "loss": 0.2321, "step": 2495 }, { "epoch": 0.25867965592289355, "grad_norm": 0.46559420228004456, "learning_rate": 3.4762606418447626e-05, "loss": 0.236, "step": 2496 }, { "epoch": 0.25878329360555496, "grad_norm": 0.4216259717941284, "learning_rate": 3.4758076254388215e-05, "loss": 0.2275, "step": 2497 }, { "epoch": 0.2588869312882164, "grad_norm": 0.526623010635376, "learning_rate": 3.475354442741171e-05, "loss": 0.2449, "step": 2498 }, { "epoch": 0.2589905689708778, "grad_norm": 0.48265042901039124, "learning_rate": 3.474901093802876e-05, "loss": 0.2404, "step": 2499 }, { "epoch": 0.2590942066535392, "grad_norm": 0.5185845494270325, "learning_rate": 3.474447578675018e-05, "loss": 0.2179, "step": 2500 }, { "epoch": 0.25919784433620063, "grad_norm": 0.43969860672950745, "learning_rate": 3.4739938974086995e-05, "loss": 0.2103, "step": 2501 }, { "epoch": 0.25930148201886205, "grad_norm": 0.5436468720436096, "learning_rate": 3.473540050055039e-05, "loss": 0.2532, "step": 2502 }, { "epoch": 0.25940511970152347, "grad_norm": 0.4670475125312805, "learning_rate": 3.473086036665177e-05, "loss": 0.2131, "step": 2503 }, { "epoch": 0.2595087573841849, "grad_norm": 0.5052348971366882, "learning_rate": 3.472631857290271e-05, "loss": 0.2488, "step": 2504 }, { "epoch": 0.2596123950668463, "grad_norm": 0.43734100461006165, "learning_rate": 3.472177511981496e-05, "loss": 0.181, "step": 2505 }, { "epoch": 0.2597160327495077, "grad_norm": 0.46347135305404663, "learning_rate": 3.4717230007900475e-05, "loss": 0.2301, "step": 2506 }, { "epoch": 0.25981967043216914, "grad_norm": 0.46348029375076294, "learning_rate": 3.4712683237671384e-05, "loss": 0.2098, "step": 2507 }, { "epoch": 0.25992330811483055, "grad_norm": 0.4893947243690491, "learning_rate": 3.470813480964003e-05, "loss": 0.2338, "step": 2508 }, { "epoch": 0.26002694579749197, "grad_norm": 0.5009042024612427, "learning_rate": 3.47035847243189e-05, "loss": 0.2403, "step": 2509 }, { "epoch": 0.2601305834801534, "grad_norm": 0.4790656864643097, "learning_rate": 3.46990329822207e-05, "loss": 0.2428, "step": 2510 }, { "epoch": 0.2602342211628148, "grad_norm": 0.49752435088157654, "learning_rate": 3.469447958385832e-05, "loss": 0.243, "step": 2511 }, { "epoch": 0.2603378588454762, "grad_norm": 0.43102583289146423, "learning_rate": 3.468992452974482e-05, "loss": 0.2117, "step": 2512 }, { "epoch": 0.26044149652813764, "grad_norm": 0.4716110825538635, "learning_rate": 3.468536782039346e-05, "loss": 0.2481, "step": 2513 }, { "epoch": 0.26054513421079906, "grad_norm": 0.4976402521133423, "learning_rate": 3.468080945631768e-05, "loss": 0.2461, "step": 2514 }, { "epoch": 0.2606487718934605, "grad_norm": 0.5053207278251648, "learning_rate": 3.467624943803112e-05, "loss": 0.2132, "step": 2515 }, { "epoch": 0.2607524095761219, "grad_norm": 0.4024655818939209, "learning_rate": 3.4671687766047585e-05, "loss": 0.1757, "step": 2516 }, { "epoch": 0.2608560472587833, "grad_norm": 0.402815043926239, "learning_rate": 3.4667124440881074e-05, "loss": 0.1968, "step": 2517 }, { "epoch": 0.2609596849414447, "grad_norm": 0.4925979673862457, "learning_rate": 3.466255946304579e-05, "loss": 0.2339, "step": 2518 }, { "epoch": 0.26106332262410614, "grad_norm": 0.49440789222717285, "learning_rate": 3.4657992833056095e-05, "loss": 0.213, "step": 2519 }, { "epoch": 0.26116696030676756, "grad_norm": 0.5009583234786987, "learning_rate": 3.465342455142655e-05, "loss": 0.2516, "step": 2520 }, { "epoch": 0.261270597989429, "grad_norm": 0.4979705214500427, "learning_rate": 3.464885461867191e-05, "loss": 0.2497, "step": 2521 }, { "epoch": 0.2613742356720904, "grad_norm": 0.45276740193367004, "learning_rate": 3.464428303530711e-05, "loss": 0.1933, "step": 2522 }, { "epoch": 0.2614778733547518, "grad_norm": 0.4588010907173157, "learning_rate": 3.4639709801847254e-05, "loss": 0.2244, "step": 2523 }, { "epoch": 0.26158151103741323, "grad_norm": 0.4481523931026459, "learning_rate": 3.4635134918807656e-05, "loss": 0.2337, "step": 2524 }, { "epoch": 0.26168514872007465, "grad_norm": 0.4547864496707916, "learning_rate": 3.463055838670381e-05, "loss": 0.2262, "step": 2525 }, { "epoch": 0.26178878640273606, "grad_norm": 0.4842788279056549, "learning_rate": 3.4625980206051385e-05, "loss": 0.2523, "step": 2526 }, { "epoch": 0.2618924240853974, "grad_norm": 0.43599626421928406, "learning_rate": 3.462140037736624e-05, "loss": 0.1896, "step": 2527 }, { "epoch": 0.26199606176805884, "grad_norm": 0.46129560470581055, "learning_rate": 3.461681890116445e-05, "loss": 0.2364, "step": 2528 }, { "epoch": 0.26209969945072026, "grad_norm": 0.3715370297431946, "learning_rate": 3.461223577796221e-05, "loss": 0.1899, "step": 2529 }, { "epoch": 0.2622033371333817, "grad_norm": 0.48308712244033813, "learning_rate": 3.460765100827597e-05, "loss": 0.2143, "step": 2530 }, { "epoch": 0.2623069748160431, "grad_norm": 0.5160189270973206, "learning_rate": 3.4603064592622315e-05, "loss": 0.26, "step": 2531 }, { "epoch": 0.2624106124987045, "grad_norm": 0.4867519736289978, "learning_rate": 3.4598476531518045e-05, "loss": 0.2412, "step": 2532 }, { "epoch": 0.2625142501813659, "grad_norm": 0.465255469083786, "learning_rate": 3.459388682548013e-05, "loss": 0.2242, "step": 2533 }, { "epoch": 0.26261788786402734, "grad_norm": 0.4549020528793335, "learning_rate": 3.458929547502574e-05, "loss": 0.2067, "step": 2534 }, { "epoch": 0.26272152554668876, "grad_norm": 0.43989869952201843, "learning_rate": 3.458470248067221e-05, "loss": 0.2126, "step": 2535 }, { "epoch": 0.2628251632293502, "grad_norm": 0.45250603556632996, "learning_rate": 3.458010784293708e-05, "loss": 0.2006, "step": 2536 }, { "epoch": 0.2629288009120116, "grad_norm": 0.3904036581516266, "learning_rate": 3.457551156233806e-05, "loss": 0.1817, "step": 2537 }, { "epoch": 0.263032438594673, "grad_norm": 0.46451374888420105, "learning_rate": 3.457091363939306e-05, "loss": 0.2423, "step": 2538 }, { "epoch": 0.26313607627733443, "grad_norm": 0.4874878525733948, "learning_rate": 3.456631407462016e-05, "loss": 0.2326, "step": 2539 }, { "epoch": 0.26323971395999585, "grad_norm": 0.39999979734420776, "learning_rate": 3.4561712868537634e-05, "loss": 0.1798, "step": 2540 }, { "epoch": 0.26334335164265726, "grad_norm": 0.49433350563049316, "learning_rate": 3.4557110021663934e-05, "loss": 0.2475, "step": 2541 }, { "epoch": 0.2634469893253187, "grad_norm": 0.4696984589099884, "learning_rate": 3.4552505534517714e-05, "loss": 0.185, "step": 2542 }, { "epoch": 0.2635506270079801, "grad_norm": 0.5648089647293091, "learning_rate": 3.4547899407617786e-05, "loss": 0.2477, "step": 2543 }, { "epoch": 0.2636542646906415, "grad_norm": 0.5114067792892456, "learning_rate": 3.454329164148317e-05, "loss": 0.221, "step": 2544 }, { "epoch": 0.26375790237330293, "grad_norm": 0.43366900086402893, "learning_rate": 3.453868223663306e-05, "loss": 0.2543, "step": 2545 }, { "epoch": 0.26386154005596435, "grad_norm": 0.563288688659668, "learning_rate": 3.453407119358684e-05, "loss": 0.2662, "step": 2546 }, { "epoch": 0.26396517773862577, "grad_norm": 0.3807101845741272, "learning_rate": 3.4529458512864064e-05, "loss": 0.1638, "step": 2547 }, { "epoch": 0.2640688154212872, "grad_norm": 0.4380934238433838, "learning_rate": 3.4524844194984496e-05, "loss": 0.225, "step": 2548 }, { "epoch": 0.2641724531039486, "grad_norm": 0.4026784300804138, "learning_rate": 3.4520228240468065e-05, "loss": 0.2008, "step": 2549 }, { "epoch": 0.26427609078661, "grad_norm": 0.3506837487220764, "learning_rate": 3.451561064983488e-05, "loss": 0.177, "step": 2550 }, { "epoch": 0.26437972846927144, "grad_norm": 0.6199037432670593, "learning_rate": 3.4510991423605254e-05, "loss": 0.2554, "step": 2551 }, { "epoch": 0.26448336615193285, "grad_norm": 0.48114362359046936, "learning_rate": 3.450637056229968e-05, "loss": 0.2453, "step": 2552 }, { "epoch": 0.26458700383459427, "grad_norm": 0.5279095768928528, "learning_rate": 3.450174806643881e-05, "loss": 0.2228, "step": 2553 }, { "epoch": 0.2646906415172557, "grad_norm": 0.47686752676963806, "learning_rate": 3.449712393654352e-05, "loss": 0.2405, "step": 2554 }, { "epoch": 0.2647942791999171, "grad_norm": 0.5018643140792847, "learning_rate": 3.449249817313484e-05, "loss": 0.2228, "step": 2555 }, { "epoch": 0.2648979168825785, "grad_norm": 0.558510422706604, "learning_rate": 3.4487870776733993e-05, "loss": 0.2629, "step": 2556 }, { "epoch": 0.26500155456523994, "grad_norm": 0.48924878239631653, "learning_rate": 3.448324174786239e-05, "loss": 0.2092, "step": 2557 }, { "epoch": 0.26510519224790136, "grad_norm": 0.4139244556427002, "learning_rate": 3.447861108704162e-05, "loss": 0.2089, "step": 2558 }, { "epoch": 0.2652088299305628, "grad_norm": 0.5043182969093323, "learning_rate": 3.447397879479346e-05, "loss": 0.2725, "step": 2559 }, { "epoch": 0.2653124676132242, "grad_norm": 0.4431025981903076, "learning_rate": 3.446934487163988e-05, "loss": 0.208, "step": 2560 }, { "epoch": 0.2654161052958856, "grad_norm": 0.4326241910457611, "learning_rate": 3.446470931810299e-05, "loss": 0.2237, "step": 2561 }, { "epoch": 0.265519742978547, "grad_norm": 0.43413740396499634, "learning_rate": 3.4460072134705156e-05, "loss": 0.2078, "step": 2562 }, { "epoch": 0.26562338066120844, "grad_norm": 0.46051687002182007, "learning_rate": 3.445543332196887e-05, "loss": 0.1887, "step": 2563 }, { "epoch": 0.26572701834386986, "grad_norm": 0.4019477665424347, "learning_rate": 3.445079288041683e-05, "loss": 0.1932, "step": 2564 }, { "epoch": 0.2658306560265312, "grad_norm": 0.5146316289901733, "learning_rate": 3.444615081057191e-05, "loss": 0.2214, "step": 2565 }, { "epoch": 0.26593429370919264, "grad_norm": 0.47542494535446167, "learning_rate": 3.4441507112957174e-05, "loss": 0.2559, "step": 2566 }, { "epoch": 0.26603793139185405, "grad_norm": 0.46404582262039185, "learning_rate": 3.443686178809587e-05, "loss": 0.2318, "step": 2567 }, { "epoch": 0.26614156907451547, "grad_norm": 0.5001510381698608, "learning_rate": 3.443221483651141e-05, "loss": 0.2377, "step": 2568 }, { "epoch": 0.2662452067571769, "grad_norm": 0.5247422456741333, "learning_rate": 3.4427566258727436e-05, "loss": 0.2905, "step": 2569 }, { "epoch": 0.2663488444398383, "grad_norm": 0.47278907895088196, "learning_rate": 3.442291605526771e-05, "loss": 0.2313, "step": 2570 }, { "epoch": 0.2664524821224997, "grad_norm": 0.4855808913707733, "learning_rate": 3.441826422665623e-05, "loss": 0.2213, "step": 2571 }, { "epoch": 0.26655611980516114, "grad_norm": 0.4366244077682495, "learning_rate": 3.4413610773417144e-05, "loss": 0.2079, "step": 2572 }, { "epoch": 0.26665975748782256, "grad_norm": 0.4548654556274414, "learning_rate": 3.440895569607481e-05, "loss": 0.2035, "step": 2573 }, { "epoch": 0.266763395170484, "grad_norm": 0.4923447072505951, "learning_rate": 3.440429899515375e-05, "loss": 0.2339, "step": 2574 }, { "epoch": 0.2668670328531454, "grad_norm": 0.48065465688705444, "learning_rate": 3.439964067117866e-05, "loss": 0.2111, "step": 2575 }, { "epoch": 0.2669706705358068, "grad_norm": 0.4575445353984833, "learning_rate": 3.439498072467445e-05, "loss": 0.1983, "step": 2576 }, { "epoch": 0.2670743082184682, "grad_norm": 0.5345796942710876, "learning_rate": 3.439031915616619e-05, "loss": 0.2442, "step": 2577 }, { "epoch": 0.26717794590112964, "grad_norm": 0.6588506698608398, "learning_rate": 3.438565596617913e-05, "loss": 0.2577, "step": 2578 }, { "epoch": 0.26728158358379106, "grad_norm": 0.4015815258026123, "learning_rate": 3.438099115523873e-05, "loss": 0.1669, "step": 2579 }, { "epoch": 0.2673852212664525, "grad_norm": 0.5120643377304077, "learning_rate": 3.437632472387059e-05, "loss": 0.2513, "step": 2580 }, { "epoch": 0.2674888589491139, "grad_norm": 0.4354187548160553, "learning_rate": 3.437165667260054e-05, "loss": 0.2074, "step": 2581 }, { "epoch": 0.2675924966317753, "grad_norm": 0.45943692326545715, "learning_rate": 3.4366987001954555e-05, "loss": 0.2222, "step": 2582 }, { "epoch": 0.26769613431443673, "grad_norm": 0.49233517050743103, "learning_rate": 3.436231571245881e-05, "loss": 0.2487, "step": 2583 }, { "epoch": 0.26779977199709815, "grad_norm": 0.5188155174255371, "learning_rate": 3.435764280463965e-05, "loss": 0.256, "step": 2584 }, { "epoch": 0.26790340967975956, "grad_norm": 0.43278053402900696, "learning_rate": 3.4352968279023624e-05, "loss": 0.2005, "step": 2585 }, { "epoch": 0.268007047362421, "grad_norm": 0.3969469368457794, "learning_rate": 3.434829213613744e-05, "loss": 0.204, "step": 2586 }, { "epoch": 0.2681106850450824, "grad_norm": 0.5019869804382324, "learning_rate": 3.434361437650801e-05, "loss": 0.2308, "step": 2587 }, { "epoch": 0.2682143227277438, "grad_norm": 0.5247126817703247, "learning_rate": 3.43389350006624e-05, "loss": 0.2222, "step": 2588 }, { "epoch": 0.26831796041040523, "grad_norm": 0.46773356199264526, "learning_rate": 3.433425400912789e-05, "loss": 0.1998, "step": 2589 }, { "epoch": 0.26842159809306665, "grad_norm": 0.4832060933113098, "learning_rate": 3.4329571402431924e-05, "loss": 0.2589, "step": 2590 }, { "epoch": 0.26852523577572807, "grad_norm": 0.4738462567329407, "learning_rate": 3.432488718110213e-05, "loss": 0.2095, "step": 2591 }, { "epoch": 0.2686288734583895, "grad_norm": 0.497236967086792, "learning_rate": 3.4320201345666306e-05, "loss": 0.2728, "step": 2592 }, { "epoch": 0.2687325111410509, "grad_norm": 0.4942580461502075, "learning_rate": 3.431551389665246e-05, "loss": 0.2264, "step": 2593 }, { "epoch": 0.2688361488237123, "grad_norm": 0.5522456169128418, "learning_rate": 3.431082483458876e-05, "loss": 0.2528, "step": 2594 }, { "epoch": 0.26893978650637373, "grad_norm": 0.4271014630794525, "learning_rate": 3.4306134160003575e-05, "loss": 0.1912, "step": 2595 }, { "epoch": 0.26904342418903515, "grad_norm": 0.43170085549354553, "learning_rate": 3.430144187342542e-05, "loss": 0.2042, "step": 2596 }, { "epoch": 0.26914706187169657, "grad_norm": 0.48882734775543213, "learning_rate": 3.429674797538304e-05, "loss": 0.2515, "step": 2597 }, { "epoch": 0.269250699554358, "grad_norm": 0.44802772998809814, "learning_rate": 3.429205246640531e-05, "loss": 0.2281, "step": 2598 }, { "epoch": 0.2693543372370194, "grad_norm": 0.518919825553894, "learning_rate": 3.428735534702133e-05, "loss": 0.2292, "step": 2599 }, { "epoch": 0.2694579749196808, "grad_norm": 0.41916918754577637, "learning_rate": 3.4282656617760355e-05, "loss": 0.2027, "step": 2600 }, { "epoch": 0.26956161260234224, "grad_norm": 0.47483789920806885, "learning_rate": 3.427795627915184e-05, "loss": 0.2318, "step": 2601 }, { "epoch": 0.26966525028500365, "grad_norm": 0.4560285806655884, "learning_rate": 3.427325433172541e-05, "loss": 0.2335, "step": 2602 }, { "epoch": 0.269768887967665, "grad_norm": 0.5098150372505188, "learning_rate": 3.426855077601086e-05, "loss": 0.236, "step": 2603 }, { "epoch": 0.26987252565032643, "grad_norm": 0.47620120644569397, "learning_rate": 3.4263845612538203e-05, "loss": 0.2085, "step": 2604 }, { "epoch": 0.26997616333298785, "grad_norm": 0.4596398174762726, "learning_rate": 3.425913884183759e-05, "loss": 0.2159, "step": 2605 }, { "epoch": 0.27007980101564927, "grad_norm": 0.6123204231262207, "learning_rate": 3.425443046443938e-05, "loss": 0.2241, "step": 2606 }, { "epoch": 0.2701834386983107, "grad_norm": 0.5035721659660339, "learning_rate": 3.42497204808741e-05, "loss": 0.2487, "step": 2607 }, { "epoch": 0.2702870763809721, "grad_norm": 0.47740867733955383, "learning_rate": 3.424500889167247e-05, "loss": 0.2217, "step": 2608 }, { "epoch": 0.2703907140636335, "grad_norm": 0.47603389620780945, "learning_rate": 3.424029569736538e-05, "loss": 0.2013, "step": 2609 }, { "epoch": 0.27049435174629494, "grad_norm": 0.5369718670845032, "learning_rate": 3.423558089848391e-05, "loss": 0.2376, "step": 2610 }, { "epoch": 0.27059798942895635, "grad_norm": 0.5321739315986633, "learning_rate": 3.423086449555931e-05, "loss": 0.2722, "step": 2611 }, { "epoch": 0.27070162711161777, "grad_norm": 0.47373032569885254, "learning_rate": 3.422614648912303e-05, "loss": 0.2063, "step": 2612 }, { "epoch": 0.2708052647942792, "grad_norm": 0.44949063658714294, "learning_rate": 3.4221426879706676e-05, "loss": 0.221, "step": 2613 }, { "epoch": 0.2709089024769406, "grad_norm": 0.4665064215660095, "learning_rate": 3.4216705667842044e-05, "loss": 0.1946, "step": 2614 }, { "epoch": 0.271012540159602, "grad_norm": 0.5441843271255493, "learning_rate": 3.421198285406112e-05, "loss": 0.2502, "step": 2615 }, { "epoch": 0.27111617784226344, "grad_norm": 0.45392951369285583, "learning_rate": 3.4207258438896056e-05, "loss": 0.2215, "step": 2616 }, { "epoch": 0.27121981552492486, "grad_norm": 0.516017496585846, "learning_rate": 3.4202532422879204e-05, "loss": 0.2451, "step": 2617 }, { "epoch": 0.2713234532075863, "grad_norm": 0.5189616680145264, "learning_rate": 3.4197804806543076e-05, "loss": 0.2074, "step": 2618 }, { "epoch": 0.2714270908902477, "grad_norm": 0.5190929174423218, "learning_rate": 3.419307559042037e-05, "loss": 0.2542, "step": 2619 }, { "epoch": 0.2715307285729091, "grad_norm": 0.5525068044662476, "learning_rate": 3.4188344775043976e-05, "loss": 0.2576, "step": 2620 }, { "epoch": 0.2716343662555705, "grad_norm": 0.48754870891571045, "learning_rate": 3.4183612360946945e-05, "loss": 0.2111, "step": 2621 }, { "epoch": 0.27173800393823194, "grad_norm": 0.4261727035045624, "learning_rate": 3.4178878348662524e-05, "loss": 0.2137, "step": 2622 }, { "epoch": 0.27184164162089336, "grad_norm": 0.46469005942344666, "learning_rate": 3.417414273872413e-05, "loss": 0.2086, "step": 2623 }, { "epoch": 0.2719452793035548, "grad_norm": 0.43636736273765564, "learning_rate": 3.416940553166536e-05, "loss": 0.2079, "step": 2624 }, { "epoch": 0.2720489169862162, "grad_norm": 0.5391519665718079, "learning_rate": 3.416466672802001e-05, "loss": 0.259, "step": 2625 }, { "epoch": 0.2721525546688776, "grad_norm": 0.47564294934272766, "learning_rate": 3.415992632832203e-05, "loss": 0.2077, "step": 2626 }, { "epoch": 0.27225619235153903, "grad_norm": 0.5383174419403076, "learning_rate": 3.415518433310556e-05, "loss": 0.2078, "step": 2627 }, { "epoch": 0.27235983003420045, "grad_norm": 0.4875207543373108, "learning_rate": 3.415044074290493e-05, "loss": 0.2087, "step": 2628 }, { "epoch": 0.27246346771686186, "grad_norm": 0.587772011756897, "learning_rate": 3.4145695558254635e-05, "loss": 0.2498, "step": 2629 }, { "epoch": 0.2725671053995233, "grad_norm": 0.48780080676078796, "learning_rate": 3.4140948779689344e-05, "loss": 0.2204, "step": 2630 }, { "epoch": 0.2726707430821847, "grad_norm": 0.5223898887634277, "learning_rate": 3.413620040774394e-05, "loss": 0.2468, "step": 2631 }, { "epoch": 0.2727743807648461, "grad_norm": 0.46727365255355835, "learning_rate": 3.413145044295345e-05, "loss": 0.2159, "step": 2632 }, { "epoch": 0.27287801844750753, "grad_norm": 0.4932716190814972, "learning_rate": 3.412669888585308e-05, "loss": 0.2174, "step": 2633 }, { "epoch": 0.27298165613016895, "grad_norm": 0.5126780867576599, "learning_rate": 3.412194573697824e-05, "loss": 0.2561, "step": 2634 }, { "epoch": 0.27308529381283037, "grad_norm": 0.5749641060829163, "learning_rate": 3.411719099686452e-05, "loss": 0.2558, "step": 2635 }, { "epoch": 0.2731889314954918, "grad_norm": 0.5617765188217163, "learning_rate": 3.4112434666047656e-05, "loss": 0.2858, "step": 2636 }, { "epoch": 0.2732925691781532, "grad_norm": 0.5142896771430969, "learning_rate": 3.410767674506359e-05, "loss": 0.2607, "step": 2637 }, { "epoch": 0.2733962068608146, "grad_norm": 0.5220364928245544, "learning_rate": 3.410291723444845e-05, "loss": 0.2621, "step": 2638 }, { "epoch": 0.27349984454347603, "grad_norm": 0.5148720741271973, "learning_rate": 3.4098156134738505e-05, "loss": 0.2601, "step": 2639 }, { "epoch": 0.27360348222613745, "grad_norm": 0.42664462327957153, "learning_rate": 3.409339344647025e-05, "loss": 0.2046, "step": 2640 }, { "epoch": 0.2737071199087988, "grad_norm": 0.5582861304283142, "learning_rate": 3.408862917018033e-05, "loss": 0.2342, "step": 2641 }, { "epoch": 0.27381075759146023, "grad_norm": 0.4054379463195801, "learning_rate": 3.4083863306405576e-05, "loss": 0.1905, "step": 2642 }, { "epoch": 0.27391439527412165, "grad_norm": 0.41754868626594543, "learning_rate": 3.4079095855683e-05, "loss": 0.2054, "step": 2643 }, { "epoch": 0.27401803295678306, "grad_norm": 0.4834083318710327, "learning_rate": 3.407432681854978e-05, "loss": 0.2179, "step": 2644 }, { "epoch": 0.2741216706394445, "grad_norm": 0.46197500824928284, "learning_rate": 3.4069556195543305e-05, "loss": 0.213, "step": 2645 }, { "epoch": 0.2742253083221059, "grad_norm": 0.47068068385124207, "learning_rate": 3.4064783987201104e-05, "loss": 0.2639, "step": 2646 }, { "epoch": 0.2743289460047673, "grad_norm": 0.43616458773612976, "learning_rate": 3.406001019406091e-05, "loss": 0.1928, "step": 2647 }, { "epoch": 0.27443258368742873, "grad_norm": 0.4012099504470825, "learning_rate": 3.405523481666063e-05, "loss": 0.177, "step": 2648 }, { "epoch": 0.27453622137009015, "grad_norm": 0.6151448488235474, "learning_rate": 3.405045785553832e-05, "loss": 0.2866, "step": 2649 }, { "epoch": 0.27463985905275157, "grad_norm": 0.5154361128807068, "learning_rate": 3.4045679311232276e-05, "loss": 0.2403, "step": 2650 }, { "epoch": 0.274743496735413, "grad_norm": 0.41954943537712097, "learning_rate": 3.404089918428092e-05, "loss": 0.2054, "step": 2651 }, { "epoch": 0.2748471344180744, "grad_norm": 0.5191131234169006, "learning_rate": 3.4036117475222865e-05, "loss": 0.2279, "step": 2652 }, { "epoch": 0.2749507721007358, "grad_norm": 0.4807083010673523, "learning_rate": 3.4031334184596926e-05, "loss": 0.208, "step": 2653 }, { "epoch": 0.27505440978339724, "grad_norm": 0.4737519919872284, "learning_rate": 3.4026549312942046e-05, "loss": 0.2369, "step": 2654 }, { "epoch": 0.27515804746605865, "grad_norm": 0.4876102805137634, "learning_rate": 3.4021762860797405e-05, "loss": 0.2183, "step": 2655 }, { "epoch": 0.27526168514872007, "grad_norm": 0.44601619243621826, "learning_rate": 3.4016974828702326e-05, "loss": 0.2141, "step": 2656 }, { "epoch": 0.2753653228313815, "grad_norm": 0.5628261566162109, "learning_rate": 3.40121852171963e-05, "loss": 0.216, "step": 2657 }, { "epoch": 0.2754689605140429, "grad_norm": 0.48517346382141113, "learning_rate": 3.400739402681904e-05, "loss": 0.2285, "step": 2658 }, { "epoch": 0.2755725981967043, "grad_norm": 0.47906678915023804, "learning_rate": 3.400260125811039e-05, "loss": 0.2302, "step": 2659 }, { "epoch": 0.27567623587936574, "grad_norm": 0.5147571563720703, "learning_rate": 3.399780691161039e-05, "loss": 0.2663, "step": 2660 }, { "epoch": 0.27577987356202716, "grad_norm": 0.49551793932914734, "learning_rate": 3.3993010987859275e-05, "loss": 0.2448, "step": 2661 }, { "epoch": 0.2758835112446886, "grad_norm": 0.5411267876625061, "learning_rate": 3.398821348739743e-05, "loss": 0.2527, "step": 2662 }, { "epoch": 0.27598714892735, "grad_norm": 0.44436827301979065, "learning_rate": 3.3983414410765445e-05, "loss": 0.2313, "step": 2663 }, { "epoch": 0.2760907866100114, "grad_norm": 0.5024104714393616, "learning_rate": 3.397861375850405e-05, "loss": 0.2151, "step": 2664 }, { "epoch": 0.2761944242926728, "grad_norm": 0.46596401929855347, "learning_rate": 3.397381153115419e-05, "loss": 0.2007, "step": 2665 }, { "epoch": 0.27629806197533424, "grad_norm": 0.5202387571334839, "learning_rate": 3.396900772925697e-05, "loss": 0.2144, "step": 2666 }, { "epoch": 0.27640169965799566, "grad_norm": 0.4352811574935913, "learning_rate": 3.396420235335367e-05, "loss": 0.1939, "step": 2667 }, { "epoch": 0.2765053373406571, "grad_norm": 0.47732260823249817, "learning_rate": 3.395939540398577e-05, "loss": 0.2285, "step": 2668 }, { "epoch": 0.2766089750233185, "grad_norm": 0.43177780508995056, "learning_rate": 3.395458688169489e-05, "loss": 0.2192, "step": 2669 }, { "epoch": 0.2767126127059799, "grad_norm": 0.4750939905643463, "learning_rate": 3.394977678702285e-05, "loss": 0.2262, "step": 2670 }, { "epoch": 0.2768162503886413, "grad_norm": 0.49694186449050903, "learning_rate": 3.3944965120511645e-05, "loss": 0.2308, "step": 2671 }, { "epoch": 0.27691988807130274, "grad_norm": 0.4427125155925751, "learning_rate": 3.3940151882703446e-05, "loss": 0.2138, "step": 2672 }, { "epoch": 0.27702352575396416, "grad_norm": 0.4885327219963074, "learning_rate": 3.393533707414061e-05, "loss": 0.2419, "step": 2673 }, { "epoch": 0.2771271634366256, "grad_norm": 0.43068984150886536, "learning_rate": 3.393052069536566e-05, "loss": 0.2122, "step": 2674 }, { "epoch": 0.277230801119287, "grad_norm": 0.4368421137332916, "learning_rate": 3.392570274692128e-05, "loss": 0.2531, "step": 2675 }, { "epoch": 0.2773344388019484, "grad_norm": 0.5177136063575745, "learning_rate": 3.3920883229350375e-05, "loss": 0.2249, "step": 2676 }, { "epoch": 0.27743807648460983, "grad_norm": 0.5152319073677063, "learning_rate": 3.391606214319598e-05, "loss": 0.2245, "step": 2677 }, { "epoch": 0.27754171416727125, "grad_norm": 0.4497338831424713, "learning_rate": 3.3911239489001344e-05, "loss": 0.1975, "step": 2678 }, { "epoch": 0.2776453518499326, "grad_norm": 0.5171423554420471, "learning_rate": 3.3906415267309855e-05, "loss": 0.2159, "step": 2679 }, { "epoch": 0.277748989532594, "grad_norm": 0.4115270972251892, "learning_rate": 3.390158947866512e-05, "loss": 0.1753, "step": 2680 }, { "epoch": 0.27785262721525544, "grad_norm": 0.5156992077827454, "learning_rate": 3.389676212361089e-05, "loss": 0.2541, "step": 2681 }, { "epoch": 0.27795626489791686, "grad_norm": 0.44904083013534546, "learning_rate": 3.3891933202691105e-05, "loss": 0.2106, "step": 2682 }, { "epoch": 0.2780599025805783, "grad_norm": 0.3925611674785614, "learning_rate": 3.388710271644989e-05, "loss": 0.1621, "step": 2683 }, { "epoch": 0.2781635402632397, "grad_norm": 0.4597630500793457, "learning_rate": 3.388227066543152e-05, "loss": 0.2062, "step": 2684 }, { "epoch": 0.2782671779459011, "grad_norm": 0.5479700565338135, "learning_rate": 3.3877437050180485e-05, "loss": 0.2441, "step": 2685 }, { "epoch": 0.27837081562856253, "grad_norm": 0.4624266028404236, "learning_rate": 3.38726018712414e-05, "loss": 0.2156, "step": 2686 }, { "epoch": 0.27847445331122395, "grad_norm": 0.5356397032737732, "learning_rate": 3.386776512915911e-05, "loss": 0.2616, "step": 2687 }, { "epoch": 0.27857809099388536, "grad_norm": 0.5255545973777771, "learning_rate": 3.38629268244786e-05, "loss": 0.2488, "step": 2688 }, { "epoch": 0.2786817286765468, "grad_norm": 0.492130845785141, "learning_rate": 3.385808695774505e-05, "loss": 0.2256, "step": 2689 }, { "epoch": 0.2787853663592082, "grad_norm": 0.5462003350257874, "learning_rate": 3.38532455295038e-05, "loss": 0.2542, "step": 2690 }, { "epoch": 0.2788890040418696, "grad_norm": 0.4930932819843292, "learning_rate": 3.384840254030039e-05, "loss": 0.2697, "step": 2691 }, { "epoch": 0.27899264172453103, "grad_norm": 0.48198801279067993, "learning_rate": 3.38435579906805e-05, "loss": 0.2177, "step": 2692 }, { "epoch": 0.27909627940719245, "grad_norm": 0.5145705938339233, "learning_rate": 3.383871188119001e-05, "loss": 0.2553, "step": 2693 }, { "epoch": 0.27919991708985387, "grad_norm": 0.4967647194862366, "learning_rate": 3.383386421237498e-05, "loss": 0.2098, "step": 2694 }, { "epoch": 0.2793035547725153, "grad_norm": 0.4569028317928314, "learning_rate": 3.382901498478164e-05, "loss": 0.2186, "step": 2695 }, { "epoch": 0.2794071924551767, "grad_norm": 0.5114362239837646, "learning_rate": 3.382416419895639e-05, "loss": 0.2598, "step": 2696 }, { "epoch": 0.2795108301378381, "grad_norm": 0.4411246180534363, "learning_rate": 3.3819311855445814e-05, "loss": 0.1998, "step": 2697 }, { "epoch": 0.27961446782049953, "grad_norm": 0.4930228292942047, "learning_rate": 3.381445795479665e-05, "loss": 0.2473, "step": 2698 }, { "epoch": 0.27971810550316095, "grad_norm": 0.4813242554664612, "learning_rate": 3.380960249755584e-05, "loss": 0.2299, "step": 2699 }, { "epoch": 0.27982174318582237, "grad_norm": 0.5559103488922119, "learning_rate": 3.3804745484270496e-05, "loss": 0.2343, "step": 2700 }, { "epoch": 0.2799253808684838, "grad_norm": 0.4250171482563019, "learning_rate": 3.379988691548788e-05, "loss": 0.1892, "step": 2701 }, { "epoch": 0.2800290185511452, "grad_norm": 0.5068527460098267, "learning_rate": 3.379502679175547e-05, "loss": 0.2442, "step": 2702 }, { "epoch": 0.2801326562338066, "grad_norm": 0.48161518573760986, "learning_rate": 3.379016511362088e-05, "loss": 0.2018, "step": 2703 }, { "epoch": 0.28023629391646804, "grad_norm": 0.4886307418346405, "learning_rate": 3.378530188163192e-05, "loss": 0.2528, "step": 2704 }, { "epoch": 0.28033993159912945, "grad_norm": 0.43843701481819153, "learning_rate": 3.378043709633658e-05, "loss": 0.2159, "step": 2705 }, { "epoch": 0.28044356928179087, "grad_norm": 0.44648537039756775, "learning_rate": 3.3775570758283004e-05, "loss": 0.199, "step": 2706 }, { "epoch": 0.2805472069644523, "grad_norm": 0.3697032630443573, "learning_rate": 3.377070286801953e-05, "loss": 0.1692, "step": 2707 }, { "epoch": 0.2806508446471137, "grad_norm": 0.5212875008583069, "learning_rate": 3.3765833426094664e-05, "loss": 0.2153, "step": 2708 }, { "epoch": 0.2807544823297751, "grad_norm": 0.5370132327079773, "learning_rate": 3.376096243305709e-05, "loss": 0.2295, "step": 2709 }, { "epoch": 0.28085812001243654, "grad_norm": 0.4295409321784973, "learning_rate": 3.375608988945566e-05, "loss": 0.1938, "step": 2710 }, { "epoch": 0.28096175769509796, "grad_norm": 0.5155740976333618, "learning_rate": 3.3751215795839405e-05, "loss": 0.2662, "step": 2711 }, { "epoch": 0.2810653953777594, "grad_norm": 0.498410165309906, "learning_rate": 3.374634015275753e-05, "loss": 0.238, "step": 2712 }, { "epoch": 0.2811690330604208, "grad_norm": 0.4350877106189728, "learning_rate": 3.374146296075942e-05, "loss": 0.2165, "step": 2713 }, { "epoch": 0.2812726707430822, "grad_norm": 0.4817065894603729, "learning_rate": 3.373658422039461e-05, "loss": 0.2161, "step": 2714 }, { "epoch": 0.2813763084257436, "grad_norm": 0.4794848561286926, "learning_rate": 3.373170393221286e-05, "loss": 0.2226, "step": 2715 }, { "epoch": 0.28147994610840504, "grad_norm": 0.5036630034446716, "learning_rate": 3.372682209676406e-05, "loss": 0.2261, "step": 2716 }, { "epoch": 0.2815835837910664, "grad_norm": 0.49164390563964844, "learning_rate": 3.372193871459827e-05, "loss": 0.2334, "step": 2717 }, { "epoch": 0.2816872214737278, "grad_norm": 0.5513508319854736, "learning_rate": 3.371705378626577e-05, "loss": 0.2651, "step": 2718 }, { "epoch": 0.28179085915638924, "grad_norm": 0.4919818639755249, "learning_rate": 3.371216731231696e-05, "loss": 0.2358, "step": 2719 }, { "epoch": 0.28189449683905066, "grad_norm": 0.5139323472976685, "learning_rate": 3.370727929330246e-05, "loss": 0.2665, "step": 2720 }, { "epoch": 0.2819981345217121, "grad_norm": 0.45646989345550537, "learning_rate": 3.370238972977304e-05, "loss": 0.2199, "step": 2721 }, { "epoch": 0.2821017722043735, "grad_norm": 0.4081878364086151, "learning_rate": 3.369749862227965e-05, "loss": 0.1909, "step": 2722 }, { "epoch": 0.2822054098870349, "grad_norm": 0.41155028343200684, "learning_rate": 3.3692605971373396e-05, "loss": 0.1797, "step": 2723 }, { "epoch": 0.2823090475696963, "grad_norm": 0.5415491461753845, "learning_rate": 3.368771177760559e-05, "loss": 0.2148, "step": 2724 }, { "epoch": 0.28241268525235774, "grad_norm": 0.4850313365459442, "learning_rate": 3.368281604152771e-05, "loss": 0.2251, "step": 2725 }, { "epoch": 0.28251632293501916, "grad_norm": 0.5246897339820862, "learning_rate": 3.367791876369138e-05, "loss": 0.2501, "step": 2726 }, { "epoch": 0.2826199606176806, "grad_norm": 0.5343725681304932, "learning_rate": 3.3673019944648425e-05, "loss": 0.2495, "step": 2727 }, { "epoch": 0.282723598300342, "grad_norm": 0.501419186592102, "learning_rate": 3.366811958495084e-05, "loss": 0.2135, "step": 2728 }, { "epoch": 0.2828272359830034, "grad_norm": 0.5163164734840393, "learning_rate": 3.366321768515079e-05, "loss": 0.2465, "step": 2729 }, { "epoch": 0.28293087366566483, "grad_norm": 0.5618857145309448, "learning_rate": 3.36583142458006e-05, "loss": 0.2716, "step": 2730 }, { "epoch": 0.28303451134832625, "grad_norm": 0.4317639172077179, "learning_rate": 3.3653409267452805e-05, "loss": 0.2219, "step": 2731 }, { "epoch": 0.28313814903098766, "grad_norm": 0.4637451767921448, "learning_rate": 3.364850275066008e-05, "loss": 0.2314, "step": 2732 }, { "epoch": 0.2832417867136491, "grad_norm": 0.4839981198310852, "learning_rate": 3.3643594695975275e-05, "loss": 0.2227, "step": 2733 }, { "epoch": 0.2833454243963105, "grad_norm": 0.43426045775413513, "learning_rate": 3.3638685103951427e-05, "loss": 0.1999, "step": 2734 }, { "epoch": 0.2834490620789719, "grad_norm": 0.4142928421497345, "learning_rate": 3.363377397514176e-05, "loss": 0.1953, "step": 2735 }, { "epoch": 0.28355269976163333, "grad_norm": 0.5417696237564087, "learning_rate": 3.3628861310099615e-05, "loss": 0.2151, "step": 2736 }, { "epoch": 0.28365633744429475, "grad_norm": 0.49811097979545593, "learning_rate": 3.3623947109378574e-05, "loss": 0.2449, "step": 2737 }, { "epoch": 0.28375997512695617, "grad_norm": 0.48678550124168396, "learning_rate": 3.3619031373532344e-05, "loss": 0.2261, "step": 2738 }, { "epoch": 0.2838636128096176, "grad_norm": 0.4436796307563782, "learning_rate": 3.3614114103114835e-05, "loss": 0.2415, "step": 2739 }, { "epoch": 0.283967250492279, "grad_norm": 0.46993231773376465, "learning_rate": 3.360919529868012e-05, "loss": 0.2432, "step": 2740 }, { "epoch": 0.2840708881749404, "grad_norm": 0.4227633774280548, "learning_rate": 3.3604274960782426e-05, "loss": 0.2129, "step": 2741 }, { "epoch": 0.28417452585760183, "grad_norm": 0.4004562795162201, "learning_rate": 3.3599353089976184e-05, "loss": 0.1948, "step": 2742 }, { "epoch": 0.28427816354026325, "grad_norm": 0.4743999242782593, "learning_rate": 3.3594429686815965e-05, "loss": 0.2525, "step": 2743 }, { "epoch": 0.28438180122292467, "grad_norm": 0.46449390053749084, "learning_rate": 3.358950475185655e-05, "loss": 0.2396, "step": 2744 }, { "epoch": 0.2844854389055861, "grad_norm": 0.497114360332489, "learning_rate": 3.3584578285652866e-05, "loss": 0.2229, "step": 2745 }, { "epoch": 0.2845890765882475, "grad_norm": 0.519870936870575, "learning_rate": 3.357965028876001e-05, "loss": 0.305, "step": 2746 }, { "epoch": 0.2846927142709089, "grad_norm": 0.46930116415023804, "learning_rate": 3.357472076173328e-05, "loss": 0.2255, "step": 2747 }, { "epoch": 0.28479635195357034, "grad_norm": 0.48047900199890137, "learning_rate": 3.35697897051281e-05, "loss": 0.2303, "step": 2748 }, { "epoch": 0.28489998963623175, "grad_norm": 0.430600106716156, "learning_rate": 3.356485711950013e-05, "loss": 0.2214, "step": 2749 }, { "epoch": 0.28500362731889317, "grad_norm": 0.403071790933609, "learning_rate": 3.355992300540514e-05, "loss": 0.1785, "step": 2750 }, { "epoch": 0.2851072650015546, "grad_norm": 0.5231764316558838, "learning_rate": 3.355498736339911e-05, "loss": 0.2488, "step": 2751 }, { "epoch": 0.285210902684216, "grad_norm": 0.39844778180122375, "learning_rate": 3.355005019403817e-05, "loss": 0.1972, "step": 2752 }, { "epoch": 0.2853145403668774, "grad_norm": 0.4338219463825226, "learning_rate": 3.3545111497878636e-05, "loss": 0.2196, "step": 2753 }, { "epoch": 0.28541817804953884, "grad_norm": 0.4960634112358093, "learning_rate": 3.3540171275477e-05, "loss": 0.2427, "step": 2754 }, { "epoch": 0.2855218157322002, "grad_norm": 0.4221111238002777, "learning_rate": 3.353522952738991e-05, "loss": 0.197, "step": 2755 }, { "epoch": 0.2856254534148616, "grad_norm": 0.5171911120414734, "learning_rate": 3.353028625417419e-05, "loss": 0.2553, "step": 2756 }, { "epoch": 0.28572909109752304, "grad_norm": 0.4602873921394348, "learning_rate": 3.352534145638687e-05, "loss": 0.213, "step": 2757 }, { "epoch": 0.28583272878018445, "grad_norm": 0.5187010765075684, "learning_rate": 3.352039513458508e-05, "loss": 0.2479, "step": 2758 }, { "epoch": 0.28593636646284587, "grad_norm": 0.4925081431865692, "learning_rate": 3.351544728932619e-05, "loss": 0.2646, "step": 2759 }, { "epoch": 0.2860400041455073, "grad_norm": 0.44228821992874146, "learning_rate": 3.3510497921167706e-05, "loss": 0.1664, "step": 2760 }, { "epoch": 0.2861436418281687, "grad_norm": 0.4919978976249695, "learning_rate": 3.350554703066733e-05, "loss": 0.2295, "step": 2761 }, { "epoch": 0.2862472795108301, "grad_norm": 0.512550950050354, "learning_rate": 3.350059461838291e-05, "loss": 0.207, "step": 2762 }, { "epoch": 0.28635091719349154, "grad_norm": 0.5739670991897583, "learning_rate": 3.3495640684872455e-05, "loss": 0.2877, "step": 2763 }, { "epoch": 0.28645455487615296, "grad_norm": 0.5250087976455688, "learning_rate": 3.34906852306942e-05, "loss": 0.2482, "step": 2764 }, { "epoch": 0.2865581925588144, "grad_norm": 0.5244482159614563, "learning_rate": 3.348572825640651e-05, "loss": 0.2481, "step": 2765 }, { "epoch": 0.2866618302414758, "grad_norm": 0.46387696266174316, "learning_rate": 3.3480769762567914e-05, "loss": 0.2042, "step": 2766 }, { "epoch": 0.2867654679241372, "grad_norm": 0.5388164520263672, "learning_rate": 3.3475809749737146e-05, "loss": 0.2556, "step": 2767 }, { "epoch": 0.2868691056067986, "grad_norm": 0.3879207372665405, "learning_rate": 3.3470848218473076e-05, "loss": 0.1961, "step": 2768 }, { "epoch": 0.28697274328946004, "grad_norm": 0.5269706845283508, "learning_rate": 3.3465885169334774e-05, "loss": 0.2097, "step": 2769 }, { "epoch": 0.28707638097212146, "grad_norm": 0.5072423219680786, "learning_rate": 3.3460920602881465e-05, "loss": 0.2636, "step": 2770 }, { "epoch": 0.2871800186547829, "grad_norm": 0.44804468750953674, "learning_rate": 3.345595451967254e-05, "loss": 0.2273, "step": 2771 }, { "epoch": 0.2872836563374443, "grad_norm": 0.4465629458427429, "learning_rate": 3.345098692026759e-05, "loss": 0.2041, "step": 2772 }, { "epoch": 0.2873872940201057, "grad_norm": 0.3973003029823303, "learning_rate": 3.344601780522634e-05, "loss": 0.1755, "step": 2773 }, { "epoch": 0.2874909317027671, "grad_norm": 0.4714430272579193, "learning_rate": 3.34410471751087e-05, "loss": 0.246, "step": 2774 }, { "epoch": 0.28759456938542854, "grad_norm": 0.46732309460639954, "learning_rate": 3.343607503047476e-05, "loss": 0.1942, "step": 2775 }, { "epoch": 0.28769820706808996, "grad_norm": 0.5006055235862732, "learning_rate": 3.343110137188478e-05, "loss": 0.2429, "step": 2776 }, { "epoch": 0.2878018447507514, "grad_norm": 0.48675471544265747, "learning_rate": 3.342612619989917e-05, "loss": 0.2485, "step": 2777 }, { "epoch": 0.2879054824334128, "grad_norm": 0.44414421916007996, "learning_rate": 3.342114951507854e-05, "loss": 0.2069, "step": 2778 }, { "epoch": 0.2880091201160742, "grad_norm": 0.4877408742904663, "learning_rate": 3.341617131798364e-05, "loss": 0.2289, "step": 2779 }, { "epoch": 0.28811275779873563, "grad_norm": 0.5503265261650085, "learning_rate": 3.34111916091754e-05, "loss": 0.2699, "step": 2780 }, { "epoch": 0.28821639548139705, "grad_norm": 0.4621533453464508, "learning_rate": 3.340621038921495e-05, "loss": 0.2018, "step": 2781 }, { "epoch": 0.28832003316405846, "grad_norm": 0.5180187225341797, "learning_rate": 3.3401227658663555e-05, "loss": 0.2269, "step": 2782 }, { "epoch": 0.2884236708467199, "grad_norm": 0.4518931210041046, "learning_rate": 3.339624341808266e-05, "loss": 0.2024, "step": 2783 }, { "epoch": 0.2885273085293813, "grad_norm": 0.5057093501091003, "learning_rate": 3.3391257668033875e-05, "loss": 0.2671, "step": 2784 }, { "epoch": 0.2886309462120427, "grad_norm": 0.45429331064224243, "learning_rate": 3.3386270409078994e-05, "loss": 0.2417, "step": 2785 }, { "epoch": 0.28873458389470413, "grad_norm": 0.4282505512237549, "learning_rate": 3.338128164177998e-05, "loss": 0.1975, "step": 2786 }, { "epoch": 0.28883822157736555, "grad_norm": 0.4234720766544342, "learning_rate": 3.337629136669894e-05, "loss": 0.186, "step": 2787 }, { "epoch": 0.28894185926002697, "grad_norm": 0.5359960794448853, "learning_rate": 3.337129958439819e-05, "loss": 0.2675, "step": 2788 }, { "epoch": 0.2890454969426884, "grad_norm": 0.47228848934173584, "learning_rate": 3.3366306295440195e-05, "loss": 0.2325, "step": 2789 }, { "epoch": 0.2891491346253498, "grad_norm": 0.4502311944961548, "learning_rate": 3.336131150038758e-05, "loss": 0.2237, "step": 2790 }, { "epoch": 0.2892527723080112, "grad_norm": 0.4958683252334595, "learning_rate": 3.335631519980315e-05, "loss": 0.248, "step": 2791 }, { "epoch": 0.28935640999067264, "grad_norm": 0.47601819038391113, "learning_rate": 3.335131739424989e-05, "loss": 0.2319, "step": 2792 }, { "epoch": 0.289460047673334, "grad_norm": 0.4568394124507904, "learning_rate": 3.3346318084290944e-05, "loss": 0.2319, "step": 2793 }, { "epoch": 0.2895636853559954, "grad_norm": 0.4676230847835541, "learning_rate": 3.334131727048962e-05, "loss": 0.1982, "step": 2794 }, { "epoch": 0.28966732303865683, "grad_norm": 0.537865400314331, "learning_rate": 3.333631495340941e-05, "loss": 0.257, "step": 2795 }, { "epoch": 0.28977096072131825, "grad_norm": 0.45650193095207214, "learning_rate": 3.333131113361396e-05, "loss": 0.2095, "step": 2796 }, { "epoch": 0.28987459840397967, "grad_norm": 0.4981147348880768, "learning_rate": 3.332630581166709e-05, "loss": 0.2283, "step": 2797 }, { "epoch": 0.2899782360866411, "grad_norm": 0.4698350131511688, "learning_rate": 3.3321298988132804e-05, "loss": 0.2071, "step": 2798 }, { "epoch": 0.2900818737693025, "grad_norm": 0.40696632862091064, "learning_rate": 3.331629066357526e-05, "loss": 0.2053, "step": 2799 }, { "epoch": 0.2901855114519639, "grad_norm": 0.5765353441238403, "learning_rate": 3.3311280838558775e-05, "loss": 0.2077, "step": 2800 }, { "epoch": 0.29028914913462533, "grad_norm": 0.41738903522491455, "learning_rate": 3.3306269513647866e-05, "loss": 0.2314, "step": 2801 }, { "epoch": 0.29039278681728675, "grad_norm": 0.4574735164642334, "learning_rate": 3.330125668940718e-05, "loss": 0.2015, "step": 2802 }, { "epoch": 0.29049642449994817, "grad_norm": 0.5367337465286255, "learning_rate": 3.329624236640158e-05, "loss": 0.2695, "step": 2803 }, { "epoch": 0.2906000621826096, "grad_norm": 0.49216893315315247, "learning_rate": 3.329122654519606e-05, "loss": 0.2188, "step": 2804 }, { "epoch": 0.290703699865271, "grad_norm": 0.48801082372665405, "learning_rate": 3.328620922635579e-05, "loss": 0.216, "step": 2805 }, { "epoch": 0.2908073375479324, "grad_norm": 0.4400445222854614, "learning_rate": 3.328119041044611e-05, "loss": 0.205, "step": 2806 }, { "epoch": 0.29091097523059384, "grad_norm": 0.45788559317588806, "learning_rate": 3.3276170098032554e-05, "loss": 0.2079, "step": 2807 }, { "epoch": 0.29101461291325526, "grad_norm": 0.4800960421562195, "learning_rate": 3.327114828968079e-05, "loss": 0.208, "step": 2808 }, { "epoch": 0.29111825059591667, "grad_norm": 0.48112618923187256, "learning_rate": 3.326612498595666e-05, "loss": 0.2366, "step": 2809 }, { "epoch": 0.2912218882785781, "grad_norm": 0.514846920967102, "learning_rate": 3.326110018742619e-05, "loss": 0.2296, "step": 2810 }, { "epoch": 0.2913255259612395, "grad_norm": 0.534676194190979, "learning_rate": 3.325607389465557e-05, "loss": 0.2436, "step": 2811 }, { "epoch": 0.2914291636439009, "grad_norm": 0.5063111782073975, "learning_rate": 3.3251046108211146e-05, "loss": 0.2508, "step": 2812 }, { "epoch": 0.29153280132656234, "grad_norm": 0.4884932041168213, "learning_rate": 3.324601682865945e-05, "loss": 0.1963, "step": 2813 }, { "epoch": 0.29163643900922376, "grad_norm": 0.4822821617126465, "learning_rate": 3.324098605656716e-05, "loss": 0.2566, "step": 2814 }, { "epoch": 0.2917400766918852, "grad_norm": 0.5030156970024109, "learning_rate": 3.323595379250116e-05, "loss": 0.2319, "step": 2815 }, { "epoch": 0.2918437143745466, "grad_norm": 0.5082794427871704, "learning_rate": 3.3230920037028454e-05, "loss": 0.2419, "step": 2816 }, { "epoch": 0.291947352057208, "grad_norm": 0.5310083627700806, "learning_rate": 3.322588479071624e-05, "loss": 0.2438, "step": 2817 }, { "epoch": 0.2920509897398694, "grad_norm": 0.5204180479049683, "learning_rate": 3.3220848054131894e-05, "loss": 0.2373, "step": 2818 }, { "epoch": 0.29215462742253084, "grad_norm": 0.5026124119758606, "learning_rate": 3.321580982784294e-05, "loss": 0.2799, "step": 2819 }, { "epoch": 0.29225826510519226, "grad_norm": 0.48803022503852844, "learning_rate": 3.321077011241708e-05, "loss": 0.2551, "step": 2820 }, { "epoch": 0.2923619027878537, "grad_norm": 0.5668957829475403, "learning_rate": 3.3205728908422185e-05, "loss": 0.2575, "step": 2821 }, { "epoch": 0.2924655404705151, "grad_norm": 0.468801885843277, "learning_rate": 3.320068621642627e-05, "loss": 0.2291, "step": 2822 }, { "epoch": 0.2925691781531765, "grad_norm": 0.4148104190826416, "learning_rate": 3.3195642036997565e-05, "loss": 0.2065, "step": 2823 }, { "epoch": 0.29267281583583793, "grad_norm": 0.4796377420425415, "learning_rate": 3.319059637070443e-05, "loss": 0.2096, "step": 2824 }, { "epoch": 0.29277645351849935, "grad_norm": 0.46540844440460205, "learning_rate": 3.31855492181154e-05, "loss": 0.2421, "step": 2825 }, { "epoch": 0.29288009120116076, "grad_norm": 0.5077078938484192, "learning_rate": 3.3180500579799174e-05, "loss": 0.2703, "step": 2826 }, { "epoch": 0.2929837288838222, "grad_norm": 0.4614660441875458, "learning_rate": 3.317545045632464e-05, "loss": 0.1994, "step": 2827 }, { "epoch": 0.2930873665664836, "grad_norm": 0.46856820583343506, "learning_rate": 3.3170398848260824e-05, "loss": 0.2523, "step": 2828 }, { "epoch": 0.293191004249145, "grad_norm": 0.40745294094085693, "learning_rate": 3.316534575617694e-05, "loss": 0.1807, "step": 2829 }, { "epoch": 0.29329464193180643, "grad_norm": 0.47301098704338074, "learning_rate": 3.316029118064237e-05, "loss": 0.2208, "step": 2830 }, { "epoch": 0.2933982796144678, "grad_norm": 0.49375954270362854, "learning_rate": 3.3155235122226644e-05, "loss": 0.2289, "step": 2831 }, { "epoch": 0.2935019172971292, "grad_norm": 0.4718109667301178, "learning_rate": 3.315017758149947e-05, "loss": 0.2461, "step": 2832 }, { "epoch": 0.29360555497979063, "grad_norm": 0.45232123136520386, "learning_rate": 3.314511855903074e-05, "loss": 0.1911, "step": 2833 }, { "epoch": 0.29370919266245205, "grad_norm": 0.46630412340164185, "learning_rate": 3.314005805539047e-05, "loss": 0.2235, "step": 2834 }, { "epoch": 0.29381283034511346, "grad_norm": 0.4936429262161255, "learning_rate": 3.313499607114889e-05, "loss": 0.2478, "step": 2835 }, { "epoch": 0.2939164680277749, "grad_norm": 0.5034680366516113, "learning_rate": 3.312993260687638e-05, "loss": 0.2093, "step": 2836 }, { "epoch": 0.2940201057104363, "grad_norm": 0.5205070972442627, "learning_rate": 3.3124867663143465e-05, "loss": 0.2508, "step": 2837 }, { "epoch": 0.2941237433930977, "grad_norm": 0.47330421209335327, "learning_rate": 3.311980124052087e-05, "loss": 0.2285, "step": 2838 }, { "epoch": 0.29422738107575913, "grad_norm": 0.6019695997238159, "learning_rate": 3.3114733339579466e-05, "loss": 0.2709, "step": 2839 }, { "epoch": 0.29433101875842055, "grad_norm": 0.543933093547821, "learning_rate": 3.3109663960890294e-05, "loss": 0.2559, "step": 2840 }, { "epoch": 0.29443465644108197, "grad_norm": 0.5584891438484192, "learning_rate": 3.3104593105024566e-05, "loss": 0.2622, "step": 2841 }, { "epoch": 0.2945382941237434, "grad_norm": 0.48917511105537415, "learning_rate": 3.309952077255366e-05, "loss": 0.2082, "step": 2842 }, { "epoch": 0.2946419318064048, "grad_norm": 0.4935635030269623, "learning_rate": 3.309444696404912e-05, "loss": 0.2342, "step": 2843 }, { "epoch": 0.2947455694890662, "grad_norm": 0.4728579819202423, "learning_rate": 3.308937168008265e-05, "loss": 0.2032, "step": 2844 }, { "epoch": 0.29484920717172763, "grad_norm": 0.44754189252853394, "learning_rate": 3.308429492122612e-05, "loss": 0.1801, "step": 2845 }, { "epoch": 0.29495284485438905, "grad_norm": 0.508368730545044, "learning_rate": 3.307921668805158e-05, "loss": 0.1915, "step": 2846 }, { "epoch": 0.29505648253705047, "grad_norm": 0.5484712719917297, "learning_rate": 3.307413698113125e-05, "loss": 0.2658, "step": 2847 }, { "epoch": 0.2951601202197119, "grad_norm": 0.4943729043006897, "learning_rate": 3.306905580103747e-05, "loss": 0.2126, "step": 2848 }, { "epoch": 0.2952637579023733, "grad_norm": 0.4991382360458374, "learning_rate": 3.306397314834281e-05, "loss": 0.2014, "step": 2849 }, { "epoch": 0.2953673955850347, "grad_norm": 0.4928804636001587, "learning_rate": 3.305888902361996e-05, "loss": 0.2488, "step": 2850 }, { "epoch": 0.29547103326769614, "grad_norm": 0.5445026159286499, "learning_rate": 3.3053803427441794e-05, "loss": 0.2528, "step": 2851 }, { "epoch": 0.29557467095035755, "grad_norm": 0.3906521797180176, "learning_rate": 3.304871636038136e-05, "loss": 0.1731, "step": 2852 }, { "epoch": 0.29567830863301897, "grad_norm": 0.45666688680648804, "learning_rate": 3.3043627823011835e-05, "loss": 0.2324, "step": 2853 }, { "epoch": 0.2957819463156804, "grad_norm": 0.39771783351898193, "learning_rate": 3.3038537815906614e-05, "loss": 0.1834, "step": 2854 }, { "epoch": 0.2958855839983418, "grad_norm": 0.4375212788581848, "learning_rate": 3.303344633963922e-05, "loss": 0.1887, "step": 2855 }, { "epoch": 0.2959892216810032, "grad_norm": 0.5007089376449585, "learning_rate": 3.3028353394783356e-05, "loss": 0.2079, "step": 2856 }, { "epoch": 0.29609285936366464, "grad_norm": 0.4184623956680298, "learning_rate": 3.302325898191287e-05, "loss": 0.1885, "step": 2857 }, { "epoch": 0.29619649704632606, "grad_norm": 0.4855022430419922, "learning_rate": 3.3018163101601826e-05, "loss": 0.2266, "step": 2858 }, { "epoch": 0.2963001347289875, "grad_norm": 0.38218221068382263, "learning_rate": 3.301306575442439e-05, "loss": 0.1572, "step": 2859 }, { "epoch": 0.2964037724116489, "grad_norm": 0.5013300180435181, "learning_rate": 3.3007966940954935e-05, "loss": 0.2348, "step": 2860 }, { "epoch": 0.2965074100943103, "grad_norm": 0.4487498104572296, "learning_rate": 3.300286666176799e-05, "loss": 0.2037, "step": 2861 }, { "epoch": 0.2966110477769717, "grad_norm": 0.49756789207458496, "learning_rate": 3.2997764917438244e-05, "loss": 0.2194, "step": 2862 }, { "epoch": 0.29671468545963314, "grad_norm": 0.4958988428115845, "learning_rate": 3.299266170854055e-05, "loss": 0.1784, "step": 2863 }, { "epoch": 0.29681832314229456, "grad_norm": 0.5324316620826721, "learning_rate": 3.298755703564993e-05, "loss": 0.2172, "step": 2864 }, { "epoch": 0.296921960824956, "grad_norm": 0.4497959017753601, "learning_rate": 3.298245089934158e-05, "loss": 0.1974, "step": 2865 }, { "epoch": 0.2970255985076174, "grad_norm": 0.4527725577354431, "learning_rate": 3.297734330019083e-05, "loss": 0.1927, "step": 2866 }, { "epoch": 0.2971292361902788, "grad_norm": 0.49358245730400085, "learning_rate": 3.2972234238773216e-05, "loss": 0.2326, "step": 2867 }, { "epoch": 0.29723287387294023, "grad_norm": 0.4673818349838257, "learning_rate": 3.296712371566442e-05, "loss": 0.2101, "step": 2868 }, { "epoch": 0.2973365115556016, "grad_norm": 0.8467883467674255, "learning_rate": 3.296201173144028e-05, "loss": 0.2568, "step": 2869 }, { "epoch": 0.297440149238263, "grad_norm": 0.5352072715759277, "learning_rate": 3.295689828667681e-05, "loss": 0.2206, "step": 2870 }, { "epoch": 0.2975437869209244, "grad_norm": 0.5007523894309998, "learning_rate": 3.2951783381950174e-05, "loss": 0.2206, "step": 2871 }, { "epoch": 0.29764742460358584, "grad_norm": 0.515024721622467, "learning_rate": 3.294666701783673e-05, "loss": 0.2372, "step": 2872 }, { "epoch": 0.29775106228624726, "grad_norm": 0.5650159120559692, "learning_rate": 3.2941549194912964e-05, "loss": 0.2429, "step": 2873 }, { "epoch": 0.2978546999689087, "grad_norm": 0.33148810267448425, "learning_rate": 3.293642991375556e-05, "loss": 0.1549, "step": 2874 }, { "epoch": 0.2979583376515701, "grad_norm": 0.6188404560089111, "learning_rate": 3.293130917494134e-05, "loss": 0.2466, "step": 2875 }, { "epoch": 0.2980619753342315, "grad_norm": 0.451937735080719, "learning_rate": 3.29261869790473e-05, "loss": 0.184, "step": 2876 }, { "epoch": 0.2981656130168929, "grad_norm": 0.47771209478378296, "learning_rate": 3.292106332665061e-05, "loss": 0.2318, "step": 2877 }, { "epoch": 0.29826925069955434, "grad_norm": 0.4931339919567108, "learning_rate": 3.291593821832859e-05, "loss": 0.2409, "step": 2878 }, { "epoch": 0.29837288838221576, "grad_norm": 0.44261494278907776, "learning_rate": 3.2910811654658734e-05, "loss": 0.2072, "step": 2879 }, { "epoch": 0.2984765260648772, "grad_norm": 0.5541775226593018, "learning_rate": 3.2905683636218684e-05, "loss": 0.2444, "step": 2880 }, { "epoch": 0.2985801637475386, "grad_norm": 0.46059805154800415, "learning_rate": 3.290055416358627e-05, "loss": 0.2344, "step": 2881 }, { "epoch": 0.2986838014302, "grad_norm": 0.47794246673583984, "learning_rate": 3.2895423237339465e-05, "loss": 0.2564, "step": 2882 }, { "epoch": 0.29878743911286143, "grad_norm": 0.46349892020225525, "learning_rate": 3.2890290858056415e-05, "loss": 0.2233, "step": 2883 }, { "epoch": 0.29889107679552285, "grad_norm": 0.40743401646614075, "learning_rate": 3.288515702631543e-05, "loss": 0.2031, "step": 2884 }, { "epoch": 0.29899471447818426, "grad_norm": 0.5463640689849854, "learning_rate": 3.288002174269498e-05, "loss": 0.2295, "step": 2885 }, { "epoch": 0.2990983521608457, "grad_norm": 0.46196120977401733, "learning_rate": 3.28748850077737e-05, "loss": 0.2328, "step": 2886 }, { "epoch": 0.2992019898435071, "grad_norm": 0.48436087369918823, "learning_rate": 3.28697468221304e-05, "loss": 0.2364, "step": 2887 }, { "epoch": 0.2993056275261685, "grad_norm": 0.49515339732170105, "learning_rate": 3.2864607186344026e-05, "loss": 0.2263, "step": 2888 }, { "epoch": 0.29940926520882993, "grad_norm": 0.49403858184814453, "learning_rate": 3.2859466100993723e-05, "loss": 0.2266, "step": 2889 }, { "epoch": 0.29951290289149135, "grad_norm": 0.47844377160072327, "learning_rate": 3.2854323566658765e-05, "loss": 0.2178, "step": 2890 }, { "epoch": 0.29961654057415277, "grad_norm": 0.5542422533035278, "learning_rate": 3.28491795839186e-05, "loss": 0.24, "step": 2891 }, { "epoch": 0.2997201782568142, "grad_norm": 0.48089051246643066, "learning_rate": 3.284403415335287e-05, "loss": 0.2399, "step": 2892 }, { "epoch": 0.2998238159394756, "grad_norm": 0.44947952032089233, "learning_rate": 3.283888727554133e-05, "loss": 0.2078, "step": 2893 }, { "epoch": 0.299927453622137, "grad_norm": 0.47643810510635376, "learning_rate": 3.283373895106393e-05, "loss": 0.225, "step": 2894 }, { "epoch": 0.30003109130479844, "grad_norm": 0.4375708997249603, "learning_rate": 3.282858918050078e-05, "loss": 0.2326, "step": 2895 }, { "epoch": 0.30013472898745985, "grad_norm": 0.42022502422332764, "learning_rate": 3.282343796443214e-05, "loss": 0.2018, "step": 2896 }, { "epoch": 0.30023836667012127, "grad_norm": 0.47073331475257874, "learning_rate": 3.2818285303438436e-05, "loss": 0.2165, "step": 2897 }, { "epoch": 0.3003420043527827, "grad_norm": 0.41679564118385315, "learning_rate": 3.281313119810028e-05, "loss": 0.2027, "step": 2898 }, { "epoch": 0.3004456420354441, "grad_norm": 0.459945410490036, "learning_rate": 3.2807975648998426e-05, "loss": 0.2193, "step": 2899 }, { "epoch": 0.3005492797181055, "grad_norm": 0.5290501713752747, "learning_rate": 3.280281865671378e-05, "loss": 0.2539, "step": 2900 }, { "epoch": 0.30065291740076694, "grad_norm": 0.45214882493019104, "learning_rate": 3.279766022182742e-05, "loss": 0.1958, "step": 2901 }, { "epoch": 0.30075655508342836, "grad_norm": 0.48252326250076294, "learning_rate": 3.279250034492061e-05, "loss": 0.2137, "step": 2902 }, { "epoch": 0.3008601927660898, "grad_norm": 0.47179168462753296, "learning_rate": 3.278733902657475e-05, "loss": 0.2163, "step": 2903 }, { "epoch": 0.3009638304487512, "grad_norm": 0.49783962965011597, "learning_rate": 3.2782176267371405e-05, "loss": 0.2305, "step": 2904 }, { "epoch": 0.3010674681314126, "grad_norm": 0.5070213079452515, "learning_rate": 3.277701206789231e-05, "loss": 0.2238, "step": 2905 }, { "epoch": 0.301171105814074, "grad_norm": 0.534799337387085, "learning_rate": 3.2771846428719346e-05, "loss": 0.2196, "step": 2906 }, { "epoch": 0.3012747434967354, "grad_norm": 0.519961953163147, "learning_rate": 3.276667935043459e-05, "loss": 0.2191, "step": 2907 }, { "epoch": 0.3013783811793968, "grad_norm": 0.48025771975517273, "learning_rate": 3.276151083362025e-05, "loss": 0.2283, "step": 2908 }, { "epoch": 0.3014820188620582, "grad_norm": 0.4781363606452942, "learning_rate": 3.2756340878858705e-05, "loss": 0.2372, "step": 2909 }, { "epoch": 0.30158565654471964, "grad_norm": 0.5782521367073059, "learning_rate": 3.27511694867325e-05, "loss": 0.2735, "step": 2910 }, { "epoch": 0.30168929422738106, "grad_norm": 0.47211775183677673, "learning_rate": 3.2745996657824344e-05, "loss": 0.2132, "step": 2911 }, { "epoch": 0.3017929319100425, "grad_norm": 0.5187753438949585, "learning_rate": 3.27408223927171e-05, "loss": 0.2434, "step": 2912 }, { "epoch": 0.3018965695927039, "grad_norm": 0.4415639042854309, "learning_rate": 3.273564669199379e-05, "loss": 0.2159, "step": 2913 }, { "epoch": 0.3020002072753653, "grad_norm": 0.3430590033531189, "learning_rate": 3.273046955623761e-05, "loss": 0.1583, "step": 2914 }, { "epoch": 0.3021038449580267, "grad_norm": 0.5086635947227478, "learning_rate": 3.272529098603191e-05, "loss": 0.2336, "step": 2915 }, { "epoch": 0.30220748264068814, "grad_norm": 0.5063747763633728, "learning_rate": 3.272011098196019e-05, "loss": 0.2242, "step": 2916 }, { "epoch": 0.30231112032334956, "grad_norm": 0.4871509075164795, "learning_rate": 3.271492954460616e-05, "loss": 0.2192, "step": 2917 }, { "epoch": 0.302414758006011, "grad_norm": 0.49528932571411133, "learning_rate": 3.270974667455363e-05, "loss": 0.2385, "step": 2918 }, { "epoch": 0.3025183956886724, "grad_norm": 0.5143001675605774, "learning_rate": 3.27045623723866e-05, "loss": 0.2266, "step": 2919 }, { "epoch": 0.3026220333713338, "grad_norm": 0.4750981032848358, "learning_rate": 3.269937663868923e-05, "loss": 0.2625, "step": 2920 }, { "epoch": 0.3027256710539952, "grad_norm": 0.4782578945159912, "learning_rate": 3.269418947404584e-05, "loss": 0.2223, "step": 2921 }, { "epoch": 0.30282930873665664, "grad_norm": 0.5503315329551697, "learning_rate": 3.268900087904092e-05, "loss": 0.2176, "step": 2922 }, { "epoch": 0.30293294641931806, "grad_norm": 0.447047621011734, "learning_rate": 3.26838108542591e-05, "loss": 0.201, "step": 2923 }, { "epoch": 0.3030365841019795, "grad_norm": 0.46707892417907715, "learning_rate": 3.2678619400285194e-05, "loss": 0.2134, "step": 2924 }, { "epoch": 0.3031402217846409, "grad_norm": 0.42201143503189087, "learning_rate": 3.267342651770416e-05, "loss": 0.1949, "step": 2925 }, { "epoch": 0.3032438594673023, "grad_norm": 0.4469546675682068, "learning_rate": 3.266823220710113e-05, "loss": 0.2177, "step": 2926 }, { "epoch": 0.30334749714996373, "grad_norm": 0.5175836086273193, "learning_rate": 3.266303646906138e-05, "loss": 0.2585, "step": 2927 }, { "epoch": 0.30345113483262515, "grad_norm": 0.44420644640922546, "learning_rate": 3.2657839304170376e-05, "loss": 0.2004, "step": 2928 }, { "epoch": 0.30355477251528656, "grad_norm": 0.589979350566864, "learning_rate": 3.2652640713013716e-05, "loss": 0.2362, "step": 2929 }, { "epoch": 0.303658410197948, "grad_norm": 0.4705340266227722, "learning_rate": 3.264744069617716e-05, "loss": 0.2222, "step": 2930 }, { "epoch": 0.3037620478806094, "grad_norm": 0.47983983159065247, "learning_rate": 3.2642239254246654e-05, "loss": 0.2291, "step": 2931 }, { "epoch": 0.3038656855632708, "grad_norm": 0.3722575306892395, "learning_rate": 3.263703638780828e-05, "loss": 0.1571, "step": 2932 }, { "epoch": 0.30396932324593223, "grad_norm": 0.5379013419151306, "learning_rate": 3.26318320974483e-05, "loss": 0.2528, "step": 2933 }, { "epoch": 0.30407296092859365, "grad_norm": 0.5010048151016235, "learning_rate": 3.2626626383753096e-05, "loss": 0.234, "step": 2934 }, { "epoch": 0.30417659861125507, "grad_norm": 0.4466943144798279, "learning_rate": 3.262141924730928e-05, "loss": 0.2191, "step": 2935 }, { "epoch": 0.3042802362939165, "grad_norm": 0.45711278915405273, "learning_rate": 3.261621068870355e-05, "loss": 0.2232, "step": 2936 }, { "epoch": 0.3043838739765779, "grad_norm": 0.4415625333786011, "learning_rate": 3.2611000708522816e-05, "loss": 0.1871, "step": 2937 }, { "epoch": 0.3044875116592393, "grad_norm": 0.505708634853363, "learning_rate": 3.260578930735413e-05, "loss": 0.2555, "step": 2938 }, { "epoch": 0.30459114934190074, "grad_norm": 0.4457675516605377, "learning_rate": 3.26005764857847e-05, "loss": 0.2082, "step": 2939 }, { "epoch": 0.30469478702456215, "grad_norm": 0.4445909857749939, "learning_rate": 3.259536224440189e-05, "loss": 0.1989, "step": 2940 }, { "epoch": 0.30479842470722357, "grad_norm": 0.5568691492080688, "learning_rate": 3.259014658379325e-05, "loss": 0.2802, "step": 2941 }, { "epoch": 0.304902062389885, "grad_norm": 0.47887614369392395, "learning_rate": 3.258492950454647e-05, "loss": 0.2461, "step": 2942 }, { "epoch": 0.3050057000725464, "grad_norm": 0.4918515384197235, "learning_rate": 3.257971100724939e-05, "loss": 0.234, "step": 2943 }, { "epoch": 0.3051093377552078, "grad_norm": 0.5362921953201294, "learning_rate": 3.2574491092490035e-05, "loss": 0.2481, "step": 2944 }, { "epoch": 0.3052129754378692, "grad_norm": 0.6173157095909119, "learning_rate": 3.256926976085656e-05, "loss": 0.2721, "step": 2945 }, { "epoch": 0.3053166131205306, "grad_norm": 0.47653764486312866, "learning_rate": 3.2564047012937314e-05, "loss": 0.2253, "step": 2946 }, { "epoch": 0.305420250803192, "grad_norm": 0.4073546826839447, "learning_rate": 3.255882284932078e-05, "loss": 0.1797, "step": 2947 }, { "epoch": 0.30552388848585343, "grad_norm": 0.4936966598033905, "learning_rate": 3.2553597270595617e-05, "loss": 0.2383, "step": 2948 }, { "epoch": 0.30562752616851485, "grad_norm": 0.4709612727165222, "learning_rate": 3.2548370277350625e-05, "loss": 0.2253, "step": 2949 }, { "epoch": 0.30573116385117627, "grad_norm": 0.46805763244628906, "learning_rate": 3.254314187017477e-05, "loss": 0.1926, "step": 2950 }, { "epoch": 0.3058348015338377, "grad_norm": 0.4419136047363281, "learning_rate": 3.2537912049657197e-05, "loss": 0.1716, "step": 2951 }, { "epoch": 0.3059384392164991, "grad_norm": 0.4995832145214081, "learning_rate": 3.253268081638718e-05, "loss": 0.2273, "step": 2952 }, { "epoch": 0.3060420768991605, "grad_norm": 0.47150254249572754, "learning_rate": 3.2527448170954174e-05, "loss": 0.239, "step": 2953 }, { "epoch": 0.30614571458182194, "grad_norm": 0.48097914457321167, "learning_rate": 3.2522214113947775e-05, "loss": 0.2356, "step": 2954 }, { "epoch": 0.30624935226448335, "grad_norm": 0.48609909415245056, "learning_rate": 3.251697864595777e-05, "loss": 0.2218, "step": 2955 }, { "epoch": 0.30635298994714477, "grad_norm": 0.5264389514923096, "learning_rate": 3.2511741767574055e-05, "loss": 0.2461, "step": 2956 }, { "epoch": 0.3064566276298062, "grad_norm": 0.46607768535614014, "learning_rate": 3.250650347938673e-05, "loss": 0.1884, "step": 2957 }, { "epoch": 0.3065602653124676, "grad_norm": 0.45885488390922546, "learning_rate": 3.250126378198604e-05, "loss": 0.2234, "step": 2958 }, { "epoch": 0.306663902995129, "grad_norm": 0.5328109860420227, "learning_rate": 3.249602267596238e-05, "loss": 0.2408, "step": 2959 }, { "epoch": 0.30676754067779044, "grad_norm": 0.5008374452590942, "learning_rate": 3.249078016190631e-05, "loss": 0.2543, "step": 2960 }, { "epoch": 0.30687117836045186, "grad_norm": 0.5086766481399536, "learning_rate": 3.248553624040855e-05, "loss": 0.2036, "step": 2961 }, { "epoch": 0.3069748160431133, "grad_norm": 0.5206964015960693, "learning_rate": 3.248029091205997e-05, "loss": 0.1785, "step": 2962 }, { "epoch": 0.3070784537257747, "grad_norm": 0.4729783535003662, "learning_rate": 3.247504417745162e-05, "loss": 0.2269, "step": 2963 }, { "epoch": 0.3071820914084361, "grad_norm": 0.5227089524269104, "learning_rate": 3.246979603717467e-05, "loss": 0.2451, "step": 2964 }, { "epoch": 0.3072857290910975, "grad_norm": 0.4764210879802704, "learning_rate": 3.24645464918205e-05, "loss": 0.2012, "step": 2965 }, { "epoch": 0.30738936677375894, "grad_norm": 0.4624175429344177, "learning_rate": 3.245929554198061e-05, "loss": 0.228, "step": 2966 }, { "epoch": 0.30749300445642036, "grad_norm": 0.4398234784603119, "learning_rate": 3.245404318824665e-05, "loss": 0.2152, "step": 2967 }, { "epoch": 0.3075966421390818, "grad_norm": 0.6094075441360474, "learning_rate": 3.2448789431210484e-05, "loss": 0.2587, "step": 2968 }, { "epoch": 0.3077002798217432, "grad_norm": 0.5074710249900818, "learning_rate": 3.2443534271464066e-05, "loss": 0.2235, "step": 2969 }, { "epoch": 0.3078039175044046, "grad_norm": 0.5189184546470642, "learning_rate": 3.2438277709599556e-05, "loss": 0.225, "step": 2970 }, { "epoch": 0.30790755518706603, "grad_norm": 0.458265095949173, "learning_rate": 3.243301974620924e-05, "loss": 0.2228, "step": 2971 }, { "epoch": 0.30801119286972745, "grad_norm": 0.47438445687294006, "learning_rate": 3.242776038188559e-05, "loss": 0.2019, "step": 2972 }, { "epoch": 0.30811483055238886, "grad_norm": 0.4507431387901306, "learning_rate": 3.242249961722122e-05, "loss": 0.1969, "step": 2973 }, { "epoch": 0.3082184682350503, "grad_norm": 0.45547452569007874, "learning_rate": 3.2417237452808906e-05, "loss": 0.2172, "step": 2974 }, { "epoch": 0.3083221059177117, "grad_norm": 0.4992049038410187, "learning_rate": 3.2411973889241575e-05, "loss": 0.2085, "step": 2975 }, { "epoch": 0.3084257436003731, "grad_norm": 0.5008047223091125, "learning_rate": 3.240670892711233e-05, "loss": 0.2157, "step": 2976 }, { "epoch": 0.30852938128303453, "grad_norm": 0.5114787817001343, "learning_rate": 3.24014425670144e-05, "loss": 0.2332, "step": 2977 }, { "epoch": 0.30863301896569595, "grad_norm": 0.5216313600540161, "learning_rate": 3.2396174809541204e-05, "loss": 0.2363, "step": 2978 }, { "epoch": 0.30873665664835737, "grad_norm": 0.45231184363365173, "learning_rate": 3.23909056552863e-05, "loss": 0.1808, "step": 2979 }, { "epoch": 0.3088402943310188, "grad_norm": 0.4798497259616852, "learning_rate": 3.238563510484341e-05, "loss": 0.2297, "step": 2980 }, { "epoch": 0.3089439320136802, "grad_norm": 0.5364487767219543, "learning_rate": 3.2380363158806404e-05, "loss": 0.2599, "step": 2981 }, { "epoch": 0.3090475696963416, "grad_norm": 0.5060192346572876, "learning_rate": 3.237508981776933e-05, "loss": 0.2063, "step": 2982 }, { "epoch": 0.309151207379003, "grad_norm": 0.4609908163547516, "learning_rate": 3.2369815082326375e-05, "loss": 0.2181, "step": 2983 }, { "epoch": 0.3092548450616644, "grad_norm": 0.4750822186470032, "learning_rate": 3.236453895307188e-05, "loss": 0.224, "step": 2984 }, { "epoch": 0.3093584827443258, "grad_norm": 0.5447022914886475, "learning_rate": 3.235926143060036e-05, "loss": 0.2667, "step": 2985 }, { "epoch": 0.30946212042698723, "grad_norm": 0.4615114629268646, "learning_rate": 3.2353982515506474e-05, "loss": 0.2105, "step": 2986 }, { "epoch": 0.30956575810964865, "grad_norm": 0.5125647783279419, "learning_rate": 3.234870220838504e-05, "loss": 0.2268, "step": 2987 }, { "epoch": 0.30966939579231006, "grad_norm": 0.42592471837997437, "learning_rate": 3.234342050983104e-05, "loss": 0.1967, "step": 2988 }, { "epoch": 0.3097730334749715, "grad_norm": 0.4169655442237854, "learning_rate": 3.2338137420439605e-05, "loss": 0.1783, "step": 2989 }, { "epoch": 0.3098766711576329, "grad_norm": 0.4965342879295349, "learning_rate": 3.2332852940806026e-05, "loss": 0.2288, "step": 2990 }, { "epoch": 0.3099803088402943, "grad_norm": 0.5323944091796875, "learning_rate": 3.232756707152575e-05, "loss": 0.2666, "step": 2991 }, { "epoch": 0.31008394652295573, "grad_norm": 0.44411584734916687, "learning_rate": 3.232227981319438e-05, "loss": 0.1984, "step": 2992 }, { "epoch": 0.31018758420561715, "grad_norm": 0.4794510006904602, "learning_rate": 3.231699116640768e-05, "loss": 0.2346, "step": 2993 }, { "epoch": 0.31029122188827857, "grad_norm": 0.5558497309684753, "learning_rate": 3.2311701131761545e-05, "loss": 0.2594, "step": 2994 }, { "epoch": 0.31039485957094, "grad_norm": 0.5508059859275818, "learning_rate": 3.230640970985208e-05, "loss": 0.2549, "step": 2995 }, { "epoch": 0.3104984972536014, "grad_norm": 0.47948360443115234, "learning_rate": 3.23011169012755e-05, "loss": 0.2273, "step": 2996 }, { "epoch": 0.3106021349362628, "grad_norm": 0.4396754801273346, "learning_rate": 3.229582270662819e-05, "loss": 0.2065, "step": 2997 }, { "epoch": 0.31070577261892424, "grad_norm": 0.5111278891563416, "learning_rate": 3.229052712650669e-05, "loss": 0.2326, "step": 2998 }, { "epoch": 0.31080941030158565, "grad_norm": 0.536485493183136, "learning_rate": 3.228523016150769e-05, "loss": 0.2356, "step": 2999 }, { "epoch": 0.31091304798424707, "grad_norm": 0.4807487726211548, "learning_rate": 3.2279931812228066e-05, "loss": 0.1892, "step": 3000 }, { "epoch": 0.3110166856669085, "grad_norm": 0.476895272731781, "learning_rate": 3.2274632079264806e-05, "loss": 0.2202, "step": 3001 }, { "epoch": 0.3111203233495699, "grad_norm": 0.5374165773391724, "learning_rate": 3.226933096321509e-05, "loss": 0.2163, "step": 3002 }, { "epoch": 0.3112239610322313, "grad_norm": 0.4013517200946808, "learning_rate": 3.2264028464676235e-05, "loss": 0.1641, "step": 3003 }, { "epoch": 0.31132759871489274, "grad_norm": 0.45430639386177063, "learning_rate": 3.2258724584245714e-05, "loss": 0.2065, "step": 3004 }, { "epoch": 0.31143123639755416, "grad_norm": 0.5279176831245422, "learning_rate": 3.225341932252117e-05, "loss": 0.2276, "step": 3005 }, { "epoch": 0.3115348740802156, "grad_norm": 0.433173805475235, "learning_rate": 3.224811268010037e-05, "loss": 0.2105, "step": 3006 }, { "epoch": 0.311638511762877, "grad_norm": 0.4613426923751831, "learning_rate": 3.224280465758129e-05, "loss": 0.2328, "step": 3007 }, { "epoch": 0.3117421494455384, "grad_norm": 0.5462455153465271, "learning_rate": 3.2237495255562e-05, "loss": 0.2742, "step": 3008 }, { "epoch": 0.3118457871281998, "grad_norm": 0.45191264152526855, "learning_rate": 3.223218447464078e-05, "loss": 0.2205, "step": 3009 }, { "epoch": 0.31194942481086124, "grad_norm": 0.42294684052467346, "learning_rate": 3.222687231541602e-05, "loss": 0.1812, "step": 3010 }, { "epoch": 0.31205306249352266, "grad_norm": 0.4990582764148712, "learning_rate": 3.2221558778486306e-05, "loss": 0.2395, "step": 3011 }, { "epoch": 0.3121567001761841, "grad_norm": 0.4808881878852844, "learning_rate": 3.221624386445034e-05, "loss": 0.2356, "step": 3012 }, { "epoch": 0.3122603378588455, "grad_norm": 0.5097794532775879, "learning_rate": 3.2210927573907005e-05, "loss": 0.2568, "step": 3013 }, { "epoch": 0.3123639755415069, "grad_norm": 0.47635650634765625, "learning_rate": 3.220560990745533e-05, "loss": 0.2498, "step": 3014 }, { "epoch": 0.31246761322416833, "grad_norm": 0.5116894841194153, "learning_rate": 3.220029086569451e-05, "loss": 0.2193, "step": 3015 }, { "epoch": 0.31257125090682975, "grad_norm": 0.43708619475364685, "learning_rate": 3.219497044922387e-05, "loss": 0.2245, "step": 3016 }, { "epoch": 0.31267488858949116, "grad_norm": 0.4694969058036804, "learning_rate": 3.218964865864293e-05, "loss": 0.2264, "step": 3017 }, { "epoch": 0.3127785262721526, "grad_norm": 0.5043584108352661, "learning_rate": 3.2184325494551324e-05, "loss": 0.2223, "step": 3018 }, { "epoch": 0.312882163954814, "grad_norm": 0.5066149830818176, "learning_rate": 3.2179000957548864e-05, "loss": 0.2381, "step": 3019 }, { "epoch": 0.3129858016374754, "grad_norm": 0.46746236085891724, "learning_rate": 3.217367504823551e-05, "loss": 0.2025, "step": 3020 }, { "epoch": 0.3130894393201368, "grad_norm": 0.5089457631111145, "learning_rate": 3.216834776721137e-05, "loss": 0.2606, "step": 3021 }, { "epoch": 0.3131930770027982, "grad_norm": 0.4705091714859009, "learning_rate": 3.2163019115076726e-05, "loss": 0.2394, "step": 3022 }, { "epoch": 0.3132967146854596, "grad_norm": 0.46200108528137207, "learning_rate": 3.215768909243199e-05, "loss": 0.2027, "step": 3023 }, { "epoch": 0.313400352368121, "grad_norm": 0.5282918214797974, "learning_rate": 3.215235769987775e-05, "loss": 0.2104, "step": 3024 }, { "epoch": 0.31350399005078244, "grad_norm": 0.4366593360900879, "learning_rate": 3.2147024938014736e-05, "loss": 0.2224, "step": 3025 }, { "epoch": 0.31360762773344386, "grad_norm": 0.5153645277023315, "learning_rate": 3.214169080744383e-05, "loss": 0.2207, "step": 3026 }, { "epoch": 0.3137112654161053, "grad_norm": 0.5471642017364502, "learning_rate": 3.2136355308766084e-05, "loss": 0.2624, "step": 3027 }, { "epoch": 0.3138149030987667, "grad_norm": 0.5268349051475525, "learning_rate": 3.213101844258269e-05, "loss": 0.2451, "step": 3028 }, { "epoch": 0.3139185407814281, "grad_norm": 0.43142902851104736, "learning_rate": 3.2125680209494994e-05, "loss": 0.2066, "step": 3029 }, { "epoch": 0.31402217846408953, "grad_norm": 0.5392974019050598, "learning_rate": 3.21203406101045e-05, "loss": 0.2476, "step": 3030 }, { "epoch": 0.31412581614675095, "grad_norm": 0.46632081270217896, "learning_rate": 3.211499964501286e-05, "loss": 0.2199, "step": 3031 }, { "epoch": 0.31422945382941236, "grad_norm": 0.42282339930534363, "learning_rate": 3.21096573148219e-05, "loss": 0.1972, "step": 3032 }, { "epoch": 0.3143330915120738, "grad_norm": 0.4383908212184906, "learning_rate": 3.210431362013358e-05, "loss": 0.1942, "step": 3033 }, { "epoch": 0.3144367291947352, "grad_norm": 0.4615064263343811, "learning_rate": 3.2098968561550024e-05, "loss": 0.186, "step": 3034 }, { "epoch": 0.3145403668773966, "grad_norm": 0.4781564474105835, "learning_rate": 3.209362213967349e-05, "loss": 0.1935, "step": 3035 }, { "epoch": 0.31464400456005803, "grad_norm": 0.48859652876853943, "learning_rate": 3.208827435510642e-05, "loss": 0.2017, "step": 3036 }, { "epoch": 0.31474764224271945, "grad_norm": 0.5409640669822693, "learning_rate": 3.208292520845138e-05, "loss": 0.2363, "step": 3037 }, { "epoch": 0.31485127992538087, "grad_norm": 0.5108470320701599, "learning_rate": 3.2077574700311115e-05, "loss": 0.2529, "step": 3038 }, { "epoch": 0.3149549176080423, "grad_norm": 0.48213520646095276, "learning_rate": 3.20722228312885e-05, "loss": 0.1981, "step": 3039 }, { "epoch": 0.3150585552907037, "grad_norm": 0.45971915125846863, "learning_rate": 3.206686960198659e-05, "loss": 0.2123, "step": 3040 }, { "epoch": 0.3151621929733651, "grad_norm": 0.5029280185699463, "learning_rate": 3.206151501300857e-05, "loss": 0.2282, "step": 3041 }, { "epoch": 0.31526583065602654, "grad_norm": 0.4633747935295105, "learning_rate": 3.205615906495779e-05, "loss": 0.2132, "step": 3042 }, { "epoch": 0.31536946833868795, "grad_norm": 0.49723488092422485, "learning_rate": 3.2050801758437744e-05, "loss": 0.2267, "step": 3043 }, { "epoch": 0.31547310602134937, "grad_norm": 0.4490548372268677, "learning_rate": 3.204544309405209e-05, "loss": 0.2058, "step": 3044 }, { "epoch": 0.3155767437040108, "grad_norm": 0.4246513247489929, "learning_rate": 3.204008307240464e-05, "loss": 0.174, "step": 3045 }, { "epoch": 0.3156803813866722, "grad_norm": 0.5136212706565857, "learning_rate": 3.203472169409934e-05, "loss": 0.2301, "step": 3046 }, { "epoch": 0.3157840190693336, "grad_norm": 0.5009952783584595, "learning_rate": 3.202935895974031e-05, "loss": 0.2162, "step": 3047 }, { "epoch": 0.31588765675199504, "grad_norm": 0.49895063042640686, "learning_rate": 3.202399486993181e-05, "loss": 0.2342, "step": 3048 }, { "epoch": 0.31599129443465646, "grad_norm": 0.5760830044746399, "learning_rate": 3.2018629425278266e-05, "loss": 0.2478, "step": 3049 }, { "epoch": 0.3160949321173179, "grad_norm": 0.48224908113479614, "learning_rate": 3.201326262638423e-05, "loss": 0.2174, "step": 3050 }, { "epoch": 0.3161985697999793, "grad_norm": 0.5179287791252136, "learning_rate": 3.200789447385445e-05, "loss": 0.2583, "step": 3051 }, { "epoch": 0.3163022074826407, "grad_norm": 0.46274638175964355, "learning_rate": 3.200252496829378e-05, "loss": 0.2115, "step": 3052 }, { "epoch": 0.3164058451653021, "grad_norm": 0.4526350796222687, "learning_rate": 3.199715411030726e-05, "loss": 0.1978, "step": 3053 }, { "epoch": 0.31650948284796354, "grad_norm": 0.5232929587364197, "learning_rate": 3.1991781900500054e-05, "loss": 0.2694, "step": 3054 }, { "epoch": 0.31661312053062496, "grad_norm": 0.5103795528411865, "learning_rate": 3.1986408339477515e-05, "loss": 0.222, "step": 3055 }, { "epoch": 0.3167167582132864, "grad_norm": 0.4008198082447052, "learning_rate": 3.198103342784511e-05, "loss": 0.1697, "step": 3056 }, { "epoch": 0.3168203958959478, "grad_norm": 0.45790600776672363, "learning_rate": 3.1975657166208486e-05, "loss": 0.2185, "step": 3057 }, { "epoch": 0.3169240335786092, "grad_norm": 0.5262759923934937, "learning_rate": 3.197027955517343e-05, "loss": 0.2439, "step": 3058 }, { "epoch": 0.31702767126127057, "grad_norm": 0.47552964091300964, "learning_rate": 3.196490059534588e-05, "loss": 0.2011, "step": 3059 }, { "epoch": 0.317131308943932, "grad_norm": 0.4066140353679657, "learning_rate": 3.195952028733193e-05, "loss": 0.1874, "step": 3060 }, { "epoch": 0.3172349466265934, "grad_norm": 0.49806711077690125, "learning_rate": 3.1954138631737836e-05, "loss": 0.2417, "step": 3061 }, { "epoch": 0.3173385843092548, "grad_norm": 0.49791496992111206, "learning_rate": 3.194875562916997e-05, "loss": 0.2377, "step": 3062 }, { "epoch": 0.31744222199191624, "grad_norm": 0.5316081047058105, "learning_rate": 3.1943371280234905e-05, "loss": 0.2532, "step": 3063 }, { "epoch": 0.31754585967457766, "grad_norm": 0.48708289861679077, "learning_rate": 3.193798558553933e-05, "loss": 0.2109, "step": 3064 }, { "epoch": 0.3176494973572391, "grad_norm": 0.5030457377433777, "learning_rate": 3.193259854569009e-05, "loss": 0.2195, "step": 3065 }, { "epoch": 0.3177531350399005, "grad_norm": 0.4297417104244232, "learning_rate": 3.19272101612942e-05, "loss": 0.1726, "step": 3066 }, { "epoch": 0.3178567727225619, "grad_norm": 0.5806198120117188, "learning_rate": 3.192182043295881e-05, "loss": 0.2898, "step": 3067 }, { "epoch": 0.3179604104052233, "grad_norm": 0.49950316548347473, "learning_rate": 3.191642936129122e-05, "loss": 0.2381, "step": 3068 }, { "epoch": 0.31806404808788474, "grad_norm": 0.5107114315032959, "learning_rate": 3.1911036946898896e-05, "loss": 0.2421, "step": 3069 }, { "epoch": 0.31816768577054616, "grad_norm": 0.5184438228607178, "learning_rate": 3.190564319038945e-05, "loss": 0.2508, "step": 3070 }, { "epoch": 0.3182713234532076, "grad_norm": 0.504603385925293, "learning_rate": 3.190024809237064e-05, "loss": 0.223, "step": 3071 }, { "epoch": 0.318374961135869, "grad_norm": 0.43039676547050476, "learning_rate": 3.189485165345037e-05, "loss": 0.1966, "step": 3072 }, { "epoch": 0.3184785988185304, "grad_norm": 0.43356984853744507, "learning_rate": 3.188945387423671e-05, "loss": 0.1816, "step": 3073 }, { "epoch": 0.31858223650119183, "grad_norm": 0.439496248960495, "learning_rate": 3.188405475533786e-05, "loss": 0.1978, "step": 3074 }, { "epoch": 0.31868587418385325, "grad_norm": 0.5059715509414673, "learning_rate": 3.1878654297362196e-05, "loss": 0.2591, "step": 3075 }, { "epoch": 0.31878951186651466, "grad_norm": 0.4735623002052307, "learning_rate": 3.187325250091824e-05, "loss": 0.2131, "step": 3076 }, { "epoch": 0.3188931495491761, "grad_norm": 0.4958748519420624, "learning_rate": 3.1867849366614644e-05, "loss": 0.2159, "step": 3077 }, { "epoch": 0.3189967872318375, "grad_norm": 0.48477983474731445, "learning_rate": 3.186244489506024e-05, "loss": 0.2237, "step": 3078 }, { "epoch": 0.3191004249144989, "grad_norm": 0.4658282399177551, "learning_rate": 3.185703908686397e-05, "loss": 0.2029, "step": 3079 }, { "epoch": 0.31920406259716033, "grad_norm": 0.4622492492198944, "learning_rate": 3.185163194263497e-05, "loss": 0.2376, "step": 3080 }, { "epoch": 0.31930770027982175, "grad_norm": 0.4980875849723816, "learning_rate": 3.184622346298252e-05, "loss": 0.2247, "step": 3081 }, { "epoch": 0.31941133796248317, "grad_norm": 0.5297892689704895, "learning_rate": 3.1840813648516015e-05, "loss": 0.2165, "step": 3082 }, { "epoch": 0.3195149756451446, "grad_norm": 0.5445452332496643, "learning_rate": 3.183540249984504e-05, "loss": 0.2828, "step": 3083 }, { "epoch": 0.319618613327806, "grad_norm": 0.4954063892364502, "learning_rate": 3.1829990017579306e-05, "loss": 0.2117, "step": 3084 }, { "epoch": 0.3197222510104674, "grad_norm": 0.4327438473701477, "learning_rate": 3.182457620232868e-05, "loss": 0.2227, "step": 3085 }, { "epoch": 0.31982588869312883, "grad_norm": 0.4633021950721741, "learning_rate": 3.18191610547032e-05, "loss": 0.2006, "step": 3086 }, { "epoch": 0.31992952637579025, "grad_norm": 0.43436625599861145, "learning_rate": 3.181374457531303e-05, "loss": 0.195, "step": 3087 }, { "epoch": 0.32003316405845167, "grad_norm": 0.5254188179969788, "learning_rate": 3.180832676476848e-05, "loss": 0.238, "step": 3088 }, { "epoch": 0.3201368017411131, "grad_norm": 0.4394558370113373, "learning_rate": 3.180290762368002e-05, "loss": 0.2024, "step": 3089 }, { "epoch": 0.3202404394237745, "grad_norm": 0.4608902335166931, "learning_rate": 3.179748715265828e-05, "loss": 0.1795, "step": 3090 }, { "epoch": 0.3203440771064359, "grad_norm": 0.4589252173900604, "learning_rate": 3.179206535231403e-05, "loss": 0.1954, "step": 3091 }, { "epoch": 0.32044771478909734, "grad_norm": 0.5103592276573181, "learning_rate": 3.1786642223258186e-05, "loss": 0.2091, "step": 3092 }, { "epoch": 0.32055135247175875, "grad_norm": 0.45052146911621094, "learning_rate": 3.178121776610182e-05, "loss": 0.1921, "step": 3093 }, { "epoch": 0.32065499015442017, "grad_norm": 0.49646496772766113, "learning_rate": 3.177579198145615e-05, "loss": 0.2066, "step": 3094 }, { "epoch": 0.3207586278370816, "grad_norm": 0.4916592836380005, "learning_rate": 3.177036486993255e-05, "loss": 0.2183, "step": 3095 }, { "epoch": 0.320862265519743, "grad_norm": 0.5783033967018127, "learning_rate": 3.1764936432142525e-05, "loss": 0.2631, "step": 3096 }, { "epoch": 0.32096590320240437, "grad_norm": 0.4790973663330078, "learning_rate": 3.175950666869776e-05, "loss": 0.2235, "step": 3097 }, { "epoch": 0.3210695408850658, "grad_norm": 1.142399549484253, "learning_rate": 3.1754075580210054e-05, "loss": 0.2191, "step": 3098 }, { "epoch": 0.3211731785677272, "grad_norm": 0.5207923054695129, "learning_rate": 3.174864316729139e-05, "loss": 0.2476, "step": 3099 }, { "epoch": 0.3212768162503886, "grad_norm": 0.5381038784980774, "learning_rate": 3.1743209430553883e-05, "loss": 0.2596, "step": 3100 }, { "epoch": 0.32138045393305004, "grad_norm": 0.5408893823623657, "learning_rate": 3.173777437060978e-05, "loss": 0.2685, "step": 3101 }, { "epoch": 0.32148409161571145, "grad_norm": 0.521721601486206, "learning_rate": 3.173233798807152e-05, "loss": 0.2394, "step": 3102 }, { "epoch": 0.32158772929837287, "grad_norm": 0.43176308274269104, "learning_rate": 3.172690028355165e-05, "loss": 0.2122, "step": 3103 }, { "epoch": 0.3216913669810343, "grad_norm": 0.431378036737442, "learning_rate": 3.172146125766288e-05, "loss": 0.1887, "step": 3104 }, { "epoch": 0.3217950046636957, "grad_norm": 0.47518616914749146, "learning_rate": 3.171602091101808e-05, "loss": 0.257, "step": 3105 }, { "epoch": 0.3218986423463571, "grad_norm": 0.46573805809020996, "learning_rate": 3.171057924423026e-05, "loss": 0.2172, "step": 3106 }, { "epoch": 0.32200228002901854, "grad_norm": 0.4244065582752228, "learning_rate": 3.170513625791257e-05, "loss": 0.1887, "step": 3107 }, { "epoch": 0.32210591771167996, "grad_norm": 0.45914992690086365, "learning_rate": 3.169969195267833e-05, "loss": 0.2254, "step": 3108 }, { "epoch": 0.3222095553943414, "grad_norm": 0.4996054768562317, "learning_rate": 3.169424632914098e-05, "loss": 0.2468, "step": 3109 }, { "epoch": 0.3223131930770028, "grad_norm": 0.49699288606643677, "learning_rate": 3.168879938791413e-05, "loss": 0.2291, "step": 3110 }, { "epoch": 0.3224168307596642, "grad_norm": 0.4879097640514374, "learning_rate": 3.1683351129611547e-05, "loss": 0.2367, "step": 3111 }, { "epoch": 0.3225204684423256, "grad_norm": 0.46923696994781494, "learning_rate": 3.1677901554847116e-05, "loss": 0.2089, "step": 3112 }, { "epoch": 0.32262410612498704, "grad_norm": 0.46386706829071045, "learning_rate": 3.16724506642349e-05, "loss": 0.2316, "step": 3113 }, { "epoch": 0.32272774380764846, "grad_norm": 0.4610229432582855, "learning_rate": 3.166699845838907e-05, "loss": 0.2175, "step": 3114 }, { "epoch": 0.3228313814903099, "grad_norm": 0.5416106581687927, "learning_rate": 3.1661544937923996e-05, "loss": 0.2118, "step": 3115 }, { "epoch": 0.3229350191729713, "grad_norm": 0.5286328196525574, "learning_rate": 3.1656090103454174e-05, "loss": 0.2406, "step": 3116 }, { "epoch": 0.3230386568556327, "grad_norm": 0.5512104630470276, "learning_rate": 3.1650633955594235e-05, "loss": 0.2189, "step": 3117 }, { "epoch": 0.32314229453829413, "grad_norm": 0.4454720914363861, "learning_rate": 3.164517649495898e-05, "loss": 0.2077, "step": 3118 }, { "epoch": 0.32324593222095555, "grad_norm": 0.5066326260566711, "learning_rate": 3.163971772216333e-05, "loss": 0.2053, "step": 3119 }, { "epoch": 0.32334956990361696, "grad_norm": 0.4619614779949188, "learning_rate": 3.163425763782238e-05, "loss": 0.2108, "step": 3120 }, { "epoch": 0.3234532075862784, "grad_norm": 0.5279788374900818, "learning_rate": 3.1628796242551374e-05, "loss": 0.2304, "step": 3121 }, { "epoch": 0.3235568452689398, "grad_norm": 0.509197473526001, "learning_rate": 3.1623333536965684e-05, "loss": 0.2144, "step": 3122 }, { "epoch": 0.3236604829516012, "grad_norm": 0.5262678861618042, "learning_rate": 3.161786952168083e-05, "loss": 0.2838, "step": 3123 }, { "epoch": 0.32376412063426263, "grad_norm": 0.4825746715068817, "learning_rate": 3.161240419731251e-05, "loss": 0.2476, "step": 3124 }, { "epoch": 0.32386775831692405, "grad_norm": 0.4855850636959076, "learning_rate": 3.160693756447654e-05, "loss": 0.2653, "step": 3125 }, { "epoch": 0.32397139599958547, "grad_norm": 0.4569106698036194, "learning_rate": 3.160146962378887e-05, "loss": 0.2241, "step": 3126 }, { "epoch": 0.3240750336822469, "grad_norm": 0.47725579142570496, "learning_rate": 3.159600037586565e-05, "loss": 0.2526, "step": 3127 }, { "epoch": 0.3241786713649083, "grad_norm": 0.4275054633617401, "learning_rate": 3.1590529821323134e-05, "loss": 0.1906, "step": 3128 }, { "epoch": 0.3242823090475697, "grad_norm": 0.472245991230011, "learning_rate": 3.1585057960777735e-05, "loss": 0.2525, "step": 3129 }, { "epoch": 0.32438594673023113, "grad_norm": 0.4678281843662262, "learning_rate": 3.1579584794846015e-05, "loss": 0.2226, "step": 3130 }, { "epoch": 0.32448958441289255, "grad_norm": 0.4702921211719513, "learning_rate": 3.1574110324144676e-05, "loss": 0.1996, "step": 3131 }, { "epoch": 0.32459322209555397, "grad_norm": 0.4830780029296875, "learning_rate": 3.156863454929059e-05, "loss": 0.2251, "step": 3132 }, { "epoch": 0.3246968597782154, "grad_norm": 0.4437926411628723, "learning_rate": 3.156315747090073e-05, "loss": 0.1927, "step": 3133 }, { "epoch": 0.3248004974608768, "grad_norm": 0.5422351956367493, "learning_rate": 3.1557679089592274e-05, "loss": 0.222, "step": 3134 }, { "epoch": 0.32490413514353816, "grad_norm": 0.48913538455963135, "learning_rate": 3.15521994059825e-05, "loss": 0.2249, "step": 3135 }, { "epoch": 0.3250077728261996, "grad_norm": 0.43056854605674744, "learning_rate": 3.154671842068886e-05, "loss": 0.1881, "step": 3136 }, { "epoch": 0.325111410508861, "grad_norm": 0.41883689165115356, "learning_rate": 3.154123613432893e-05, "loss": 0.1765, "step": 3137 }, { "epoch": 0.3252150481915224, "grad_norm": 0.5446802973747253, "learning_rate": 3.1535752547520456e-05, "loss": 0.2483, "step": 3138 }, { "epoch": 0.32531868587418383, "grad_norm": 0.4917711615562439, "learning_rate": 3.153026766088132e-05, "loss": 0.2339, "step": 3139 }, { "epoch": 0.32542232355684525, "grad_norm": 0.5216474533081055, "learning_rate": 3.152478147502954e-05, "loss": 0.259, "step": 3140 }, { "epoch": 0.32552596123950667, "grad_norm": 0.4385351240634918, "learning_rate": 3.1519293990583316e-05, "loss": 0.1905, "step": 3141 }, { "epoch": 0.3256295989221681, "grad_norm": 0.527709424495697, "learning_rate": 3.151380520816094e-05, "loss": 0.2384, "step": 3142 }, { "epoch": 0.3257332366048295, "grad_norm": 0.5104259252548218, "learning_rate": 3.15083151283809e-05, "loss": 0.2303, "step": 3143 }, { "epoch": 0.3258368742874909, "grad_norm": 0.45804575085639954, "learning_rate": 3.150282375186179e-05, "loss": 0.2083, "step": 3144 }, { "epoch": 0.32594051197015234, "grad_norm": 0.5373900532722473, "learning_rate": 3.149733107922239e-05, "loss": 0.2249, "step": 3145 }, { "epoch": 0.32604414965281375, "grad_norm": 0.4487946927547455, "learning_rate": 3.149183711108159e-05, "loss": 0.1957, "step": 3146 }, { "epoch": 0.32614778733547517, "grad_norm": 0.48644623160362244, "learning_rate": 3.148634184805845e-05, "loss": 0.2174, "step": 3147 }, { "epoch": 0.3262514250181366, "grad_norm": 0.536426305770874, "learning_rate": 3.1480845290772176e-05, "loss": 0.2566, "step": 3148 }, { "epoch": 0.326355062700798, "grad_norm": 0.4516526758670807, "learning_rate": 3.147534743984209e-05, "loss": 0.2121, "step": 3149 }, { "epoch": 0.3264587003834594, "grad_norm": 0.5031577348709106, "learning_rate": 3.146984829588769e-05, "loss": 0.2263, "step": 3150 }, { "epoch": 0.32656233806612084, "grad_norm": 0.4382554590702057, "learning_rate": 3.146434785952863e-05, "loss": 0.2149, "step": 3151 }, { "epoch": 0.32666597574878226, "grad_norm": 0.5372516512870789, "learning_rate": 3.1458846131384666e-05, "loss": 0.2253, "step": 3152 }, { "epoch": 0.3267696134314437, "grad_norm": 0.530376672744751, "learning_rate": 3.145334311207574e-05, "loss": 0.2312, "step": 3153 }, { "epoch": 0.3268732511141051, "grad_norm": 0.4759053587913513, "learning_rate": 3.144783880222191e-05, "loss": 0.2315, "step": 3154 }, { "epoch": 0.3269768887967665, "grad_norm": 0.4735720753669739, "learning_rate": 3.1442333202443394e-05, "loss": 0.2465, "step": 3155 }, { "epoch": 0.3270805264794279, "grad_norm": 0.525758683681488, "learning_rate": 3.1436826313360565e-05, "loss": 0.2394, "step": 3156 }, { "epoch": 0.32718416416208934, "grad_norm": 0.5047840476036072, "learning_rate": 3.1431318135593936e-05, "loss": 0.2349, "step": 3157 }, { "epoch": 0.32728780184475076, "grad_norm": 0.4203908443450928, "learning_rate": 3.142580866976414e-05, "loss": 0.193, "step": 3158 }, { "epoch": 0.3273914395274122, "grad_norm": 0.442714124917984, "learning_rate": 3.142029791649198e-05, "loss": 0.1932, "step": 3159 }, { "epoch": 0.3274950772100736, "grad_norm": 0.5398364663124084, "learning_rate": 3.1414785876398416e-05, "loss": 0.2462, "step": 3160 }, { "epoch": 0.327598714892735, "grad_norm": 0.5328658819198608, "learning_rate": 3.140927255010452e-05, "loss": 0.2018, "step": 3161 }, { "epoch": 0.3277023525753964, "grad_norm": 0.5347991585731506, "learning_rate": 3.140375793823152e-05, "loss": 0.2357, "step": 3162 }, { "epoch": 0.32780599025805784, "grad_norm": 0.40971866250038147, "learning_rate": 3.139824204140082e-05, "loss": 0.1614, "step": 3163 }, { "epoch": 0.32790962794071926, "grad_norm": 0.4941691756248474, "learning_rate": 3.139272486023391e-05, "loss": 0.2036, "step": 3164 }, { "epoch": 0.3280132656233807, "grad_norm": 0.42997175455093384, "learning_rate": 3.1387206395352486e-05, "loss": 0.2139, "step": 3165 }, { "epoch": 0.3281169033060421, "grad_norm": 0.531755805015564, "learning_rate": 3.138168664737833e-05, "loss": 0.2309, "step": 3166 }, { "epoch": 0.3282205409887035, "grad_norm": 0.5963094830513, "learning_rate": 3.137616561693343e-05, "loss": 0.2677, "step": 3167 }, { "epoch": 0.32832417867136493, "grad_norm": 0.4682312309741974, "learning_rate": 3.137064330463987e-05, "loss": 0.2123, "step": 3168 }, { "epoch": 0.32842781635402635, "grad_norm": 0.5144062638282776, "learning_rate": 3.13651197111199e-05, "loss": 0.2315, "step": 3169 }, { "epoch": 0.32853145403668776, "grad_norm": 0.5011259317398071, "learning_rate": 3.1359594836995906e-05, "loss": 0.219, "step": 3170 }, { "epoch": 0.3286350917193492, "grad_norm": 0.508209228515625, "learning_rate": 3.135406868289042e-05, "loss": 0.258, "step": 3171 }, { "epoch": 0.3287387294020106, "grad_norm": 0.5223318338394165, "learning_rate": 3.134854124942613e-05, "loss": 0.2294, "step": 3172 }, { "epoch": 0.32884236708467196, "grad_norm": 0.3559339642524719, "learning_rate": 3.134301253722585e-05, "loss": 0.1636, "step": 3173 }, { "epoch": 0.3289460047673334, "grad_norm": 0.4399315118789673, "learning_rate": 3.133748254691256e-05, "loss": 0.2027, "step": 3174 }, { "epoch": 0.3290496424499948, "grad_norm": 0.48089268803596497, "learning_rate": 3.1331951279109354e-05, "loss": 0.2416, "step": 3175 }, { "epoch": 0.3291532801326562, "grad_norm": 0.5257017016410828, "learning_rate": 3.1326418734439495e-05, "loss": 0.243, "step": 3176 }, { "epoch": 0.32925691781531763, "grad_norm": 0.46509110927581787, "learning_rate": 3.132088491352638e-05, "loss": 0.2278, "step": 3177 }, { "epoch": 0.32936055549797905, "grad_norm": 0.5531212091445923, "learning_rate": 3.131534981699355e-05, "loss": 0.2315, "step": 3178 }, { "epoch": 0.32946419318064046, "grad_norm": 0.43978351354599, "learning_rate": 3.1309813445464695e-05, "loss": 0.2014, "step": 3179 }, { "epoch": 0.3295678308633019, "grad_norm": 0.5329275131225586, "learning_rate": 3.1304275799563645e-05, "loss": 0.2421, "step": 3180 }, { "epoch": 0.3296714685459633, "grad_norm": 0.47121936082839966, "learning_rate": 3.1298736879914364e-05, "loss": 0.2161, "step": 3181 }, { "epoch": 0.3297751062286247, "grad_norm": 0.4731251001358032, "learning_rate": 3.1293196687140973e-05, "loss": 0.2332, "step": 3182 }, { "epoch": 0.32987874391128613, "grad_norm": 0.4779028594493866, "learning_rate": 3.128765522186774e-05, "loss": 0.1887, "step": 3183 }, { "epoch": 0.32998238159394755, "grad_norm": 0.45643532276153564, "learning_rate": 3.1282112484719066e-05, "loss": 0.21, "step": 3184 }, { "epoch": 0.33008601927660897, "grad_norm": 0.4478624761104584, "learning_rate": 3.1276568476319495e-05, "loss": 0.2212, "step": 3185 }, { "epoch": 0.3301896569592704, "grad_norm": 0.5598623752593994, "learning_rate": 3.127102319729372e-05, "loss": 0.2548, "step": 3186 }, { "epoch": 0.3302932946419318, "grad_norm": 0.492170512676239, "learning_rate": 3.1265476648266565e-05, "loss": 0.2141, "step": 3187 }, { "epoch": 0.3303969323245932, "grad_norm": 0.475373238325119, "learning_rate": 3.125992882986302e-05, "loss": 0.2155, "step": 3188 }, { "epoch": 0.33050057000725463, "grad_norm": 0.4140654504299164, "learning_rate": 3.1254379742708195e-05, "loss": 0.2101, "step": 3189 }, { "epoch": 0.33060420768991605, "grad_norm": 0.4968665838241577, "learning_rate": 3.124882938742736e-05, "loss": 0.2455, "step": 3190 }, { "epoch": 0.33070784537257747, "grad_norm": 0.49836060404777527, "learning_rate": 3.1243277764645905e-05, "loss": 0.2396, "step": 3191 }, { "epoch": 0.3308114830552389, "grad_norm": 0.373451828956604, "learning_rate": 3.1237724874989405e-05, "loss": 0.1591, "step": 3192 }, { "epoch": 0.3309151207379003, "grad_norm": 0.5305958390235901, "learning_rate": 3.1232170719083525e-05, "loss": 0.2475, "step": 3193 }, { "epoch": 0.3310187584205617, "grad_norm": 0.40864086151123047, "learning_rate": 3.1226615297554114e-05, "loss": 0.187, "step": 3194 }, { "epoch": 0.33112239610322314, "grad_norm": 0.43606048822402954, "learning_rate": 3.122105861102714e-05, "loss": 0.2194, "step": 3195 }, { "epoch": 0.33122603378588455, "grad_norm": 0.4913009703159332, "learning_rate": 3.121550066012873e-05, "loss": 0.2425, "step": 3196 }, { "epoch": 0.33132967146854597, "grad_norm": 0.4447884261608124, "learning_rate": 3.120994144548513e-05, "loss": 0.2283, "step": 3197 }, { "epoch": 0.3314333091512074, "grad_norm": 0.4598451256752014, "learning_rate": 3.120438096772277e-05, "loss": 0.2035, "step": 3198 }, { "epoch": 0.3315369468338688, "grad_norm": 0.4184786379337311, "learning_rate": 3.1198819227468166e-05, "loss": 0.2052, "step": 3199 }, { "epoch": 0.3316405845165302, "grad_norm": 0.517906665802002, "learning_rate": 3.1193256225348025e-05, "loss": 0.2555, "step": 3200 }, { "epoch": 0.33174422219919164, "grad_norm": 0.5278466939926147, "learning_rate": 3.1187691961989184e-05, "loss": 0.2461, "step": 3201 }, { "epoch": 0.33184785988185306, "grad_norm": 0.3784679174423218, "learning_rate": 3.118212643801859e-05, "loss": 0.1559, "step": 3202 }, { "epoch": 0.3319514975645145, "grad_norm": 0.47298792004585266, "learning_rate": 3.1176559654063375e-05, "loss": 0.2083, "step": 3203 }, { "epoch": 0.3320551352471759, "grad_norm": 0.48570263385772705, "learning_rate": 3.1170991610750795e-05, "loss": 0.2348, "step": 3204 }, { "epoch": 0.3321587729298373, "grad_norm": 0.5153487920761108, "learning_rate": 3.116542230870824e-05, "loss": 0.2424, "step": 3205 }, { "epoch": 0.3322624106124987, "grad_norm": 0.45193734765052795, "learning_rate": 3.1159851748563265e-05, "loss": 0.2048, "step": 3206 }, { "epoch": 0.33236604829516014, "grad_norm": 0.5584096312522888, "learning_rate": 3.115427993094354e-05, "loss": 0.256, "step": 3207 }, { "epoch": 0.33246968597782156, "grad_norm": 0.4924525320529938, "learning_rate": 3.114870685647688e-05, "loss": 0.2356, "step": 3208 }, { "epoch": 0.332573323660483, "grad_norm": 0.4525347948074341, "learning_rate": 3.1143132525791275e-05, "loss": 0.1851, "step": 3209 }, { "epoch": 0.3326769613431444, "grad_norm": 0.4745256304740906, "learning_rate": 3.113755693951482e-05, "loss": 0.1991, "step": 3210 }, { "epoch": 0.33278059902580576, "grad_norm": 0.40173184871673584, "learning_rate": 3.113198009827576e-05, "loss": 0.1902, "step": 3211 }, { "epoch": 0.3328842367084672, "grad_norm": 0.5420322418212891, "learning_rate": 3.1126402002702495e-05, "loss": 0.2449, "step": 3212 }, { "epoch": 0.3329878743911286, "grad_norm": 0.5138987898826599, "learning_rate": 3.112082265342354e-05, "loss": 0.233, "step": 3213 }, { "epoch": 0.33309151207379, "grad_norm": 0.49910151958465576, "learning_rate": 3.1115242051067574e-05, "loss": 0.2059, "step": 3214 }, { "epoch": 0.3331951497564514, "grad_norm": 0.5475865602493286, "learning_rate": 3.110966019626342e-05, "loss": 0.2524, "step": 3215 }, { "epoch": 0.33329878743911284, "grad_norm": 0.4959619641304016, "learning_rate": 3.1104077089640016e-05, "loss": 0.2182, "step": 3216 }, { "epoch": 0.33340242512177426, "grad_norm": 0.5604843497276306, "learning_rate": 3.109849273182648e-05, "loss": 0.2404, "step": 3217 }, { "epoch": 0.3335060628044357, "grad_norm": 0.6018540859222412, "learning_rate": 3.1092907123452024e-05, "loss": 0.2359, "step": 3218 }, { "epoch": 0.3336097004870971, "grad_norm": 0.4417458772659302, "learning_rate": 3.108732026514604e-05, "loss": 0.1685, "step": 3219 }, { "epoch": 0.3337133381697585, "grad_norm": 0.4794652760028839, "learning_rate": 3.108173215753805e-05, "loss": 0.2495, "step": 3220 }, { "epoch": 0.33381697585241993, "grad_norm": 0.5345839858055115, "learning_rate": 3.10761428012577e-05, "loss": 0.2436, "step": 3221 }, { "epoch": 0.33392061353508135, "grad_norm": 0.5043509602546692, "learning_rate": 3.1070552196934803e-05, "loss": 0.2336, "step": 3222 }, { "epoch": 0.33402425121774276, "grad_norm": 0.5134027004241943, "learning_rate": 3.106496034519929e-05, "loss": 0.2336, "step": 3223 }, { "epoch": 0.3341278889004042, "grad_norm": 0.5828744769096375, "learning_rate": 3.105936724668125e-05, "loss": 0.2761, "step": 3224 }, { "epoch": 0.3342315265830656, "grad_norm": 0.5718686580657959, "learning_rate": 3.10537729020109e-05, "loss": 0.2309, "step": 3225 }, { "epoch": 0.334335164265727, "grad_norm": 0.45800167322158813, "learning_rate": 3.10481773118186e-05, "loss": 0.2076, "step": 3226 }, { "epoch": 0.33443880194838843, "grad_norm": 0.45891281962394714, "learning_rate": 3.104258047673486e-05, "loss": 0.1819, "step": 3227 }, { "epoch": 0.33454243963104985, "grad_norm": 0.4926428198814392, "learning_rate": 3.103698239739031e-05, "loss": 0.1926, "step": 3228 }, { "epoch": 0.33464607731371127, "grad_norm": 0.5176846385002136, "learning_rate": 3.103138307441575e-05, "loss": 0.2335, "step": 3229 }, { "epoch": 0.3347497149963727, "grad_norm": 0.509946882724762, "learning_rate": 3.102578250844209e-05, "loss": 0.2154, "step": 3230 }, { "epoch": 0.3348533526790341, "grad_norm": 0.5066658854484558, "learning_rate": 3.1020180700100395e-05, "loss": 0.2191, "step": 3231 }, { "epoch": 0.3349569903616955, "grad_norm": 0.4816665053367615, "learning_rate": 3.101457765002187e-05, "loss": 0.2324, "step": 3232 }, { "epoch": 0.33506062804435693, "grad_norm": 0.4046016335487366, "learning_rate": 3.100897335883786e-05, "loss": 0.177, "step": 3233 }, { "epoch": 0.33516426572701835, "grad_norm": 0.38172265887260437, "learning_rate": 3.100336782717984e-05, "loss": 0.1846, "step": 3234 }, { "epoch": 0.33526790340967977, "grad_norm": 0.45876002311706543, "learning_rate": 3.099776105567945e-05, "loss": 0.2034, "step": 3235 }, { "epoch": 0.3353715410923412, "grad_norm": 0.49584850668907166, "learning_rate": 3.099215304496843e-05, "loss": 0.2165, "step": 3236 }, { "epoch": 0.3354751787750026, "grad_norm": 0.5085937976837158, "learning_rate": 3.09865437956787e-05, "loss": 0.2458, "step": 3237 }, { "epoch": 0.335578816457664, "grad_norm": 0.49745145440101624, "learning_rate": 3.0980933308442295e-05, "loss": 0.2399, "step": 3238 }, { "epoch": 0.33568245414032544, "grad_norm": 0.48892679810523987, "learning_rate": 3.097532158389139e-05, "loss": 0.2472, "step": 3239 }, { "epoch": 0.33578609182298685, "grad_norm": 0.5717560648918152, "learning_rate": 3.09697086226583e-05, "loss": 0.2519, "step": 3240 }, { "epoch": 0.33588972950564827, "grad_norm": 0.42260032892227173, "learning_rate": 3.0964094425375515e-05, "loss": 0.1985, "step": 3241 }, { "epoch": 0.3359933671883097, "grad_norm": 0.38656240701675415, "learning_rate": 3.0958478992675606e-05, "loss": 0.1557, "step": 3242 }, { "epoch": 0.3360970048709711, "grad_norm": 0.49424460530281067, "learning_rate": 3.095286232519131e-05, "loss": 0.273, "step": 3243 }, { "epoch": 0.3362006425536325, "grad_norm": 0.49864912033081055, "learning_rate": 3.0947244423555526e-05, "loss": 0.2151, "step": 3244 }, { "epoch": 0.33630428023629394, "grad_norm": 0.4592277407646179, "learning_rate": 3.094162528840126e-05, "loss": 0.2061, "step": 3245 }, { "epoch": 0.33640791791895536, "grad_norm": 0.4824981987476349, "learning_rate": 3.0936004920361654e-05, "loss": 0.2311, "step": 3246 }, { "epoch": 0.3365115556016168, "grad_norm": 0.43453484773635864, "learning_rate": 3.0930383320070025e-05, "loss": 0.1828, "step": 3247 }, { "epoch": 0.33661519328427814, "grad_norm": 0.5099527835845947, "learning_rate": 3.092476048815979e-05, "loss": 0.2207, "step": 3248 }, { "epoch": 0.33671883096693955, "grad_norm": 0.4967183768749237, "learning_rate": 3.0919136425264525e-05, "loss": 0.2227, "step": 3249 }, { "epoch": 0.33682246864960097, "grad_norm": 0.46992024779319763, "learning_rate": 3.0913511132017943e-05, "loss": 0.2024, "step": 3250 }, { "epoch": 0.3369261063322624, "grad_norm": 0.47221264243125916, "learning_rate": 3.090788460905389e-05, "loss": 0.2158, "step": 3251 }, { "epoch": 0.3370297440149238, "grad_norm": 0.4187302887439728, "learning_rate": 3.090225685700636e-05, "loss": 0.1696, "step": 3252 }, { "epoch": 0.3371333816975852, "grad_norm": 0.4217827618122101, "learning_rate": 3.089662787650947e-05, "loss": 0.1975, "step": 3253 }, { "epoch": 0.33723701938024664, "grad_norm": 0.3924787640571594, "learning_rate": 3.089099766819749e-05, "loss": 0.1776, "step": 3254 }, { "epoch": 0.33734065706290806, "grad_norm": 0.5434726476669312, "learning_rate": 3.088536623270483e-05, "loss": 0.2242, "step": 3255 }, { "epoch": 0.3374442947455695, "grad_norm": 0.5202988982200623, "learning_rate": 3.0879733570666024e-05, "loss": 0.2304, "step": 3256 }, { "epoch": 0.3375479324282309, "grad_norm": 0.5428398251533508, "learning_rate": 3.0874099682715745e-05, "loss": 0.2248, "step": 3257 }, { "epoch": 0.3376515701108923, "grad_norm": 0.5170606970787048, "learning_rate": 3.086846456948882e-05, "loss": 0.2164, "step": 3258 }, { "epoch": 0.3377552077935537, "grad_norm": 0.4656817317008972, "learning_rate": 3.0862828231620206e-05, "loss": 0.2134, "step": 3259 }, { "epoch": 0.33785884547621514, "grad_norm": 0.5413587093353271, "learning_rate": 3.085719066974499e-05, "loss": 0.2403, "step": 3260 }, { "epoch": 0.33796248315887656, "grad_norm": 0.565420925617218, "learning_rate": 3.0851551884498414e-05, "loss": 0.2482, "step": 3261 }, { "epoch": 0.338066120841538, "grad_norm": 0.5669689178466797, "learning_rate": 3.084591187651583e-05, "loss": 0.2584, "step": 3262 }, { "epoch": 0.3381697585241994, "grad_norm": 0.4945541024208069, "learning_rate": 3.0840270646432765e-05, "loss": 0.2163, "step": 3263 }, { "epoch": 0.3382733962068608, "grad_norm": 0.5486595630645752, "learning_rate": 3.0834628194884854e-05, "loss": 0.2259, "step": 3264 }, { "epoch": 0.3383770338895222, "grad_norm": 0.4185464680194855, "learning_rate": 3.0828984522507875e-05, "loss": 0.1834, "step": 3265 }, { "epoch": 0.33848067157218364, "grad_norm": 0.49174776673316956, "learning_rate": 3.082333962993776e-05, "loss": 0.2183, "step": 3266 }, { "epoch": 0.33858430925484506, "grad_norm": 0.5619238615036011, "learning_rate": 3.0817693517810555e-05, "loss": 0.2336, "step": 3267 }, { "epoch": 0.3386879469375065, "grad_norm": 0.4658437669277191, "learning_rate": 3.081204618676246e-05, "loss": 0.2035, "step": 3268 }, { "epoch": 0.3387915846201679, "grad_norm": 0.4304681420326233, "learning_rate": 3.0806397637429815e-05, "loss": 0.1782, "step": 3269 }, { "epoch": 0.3388952223028293, "grad_norm": 0.49248623847961426, "learning_rate": 3.0800747870449085e-05, "loss": 0.1977, "step": 3270 }, { "epoch": 0.33899885998549073, "grad_norm": 0.44981497526168823, "learning_rate": 3.0795096886456864e-05, "loss": 0.2195, "step": 3271 }, { "epoch": 0.33910249766815215, "grad_norm": 0.5155792832374573, "learning_rate": 3.078944468608992e-05, "loss": 0.2303, "step": 3272 }, { "epoch": 0.33920613535081356, "grad_norm": 0.5373932123184204, "learning_rate": 3.078379126998511e-05, "loss": 0.235, "step": 3273 }, { "epoch": 0.339309773033475, "grad_norm": 0.5162752270698547, "learning_rate": 3.077813663877946e-05, "loss": 0.2134, "step": 3274 }, { "epoch": 0.3394134107161364, "grad_norm": 0.48236146569252014, "learning_rate": 3.077248079311015e-05, "loss": 0.2218, "step": 3275 }, { "epoch": 0.3395170483987978, "grad_norm": 0.5287761688232422, "learning_rate": 3.076682373361443e-05, "loss": 0.273, "step": 3276 }, { "epoch": 0.33962068608145923, "grad_norm": 0.4486253261566162, "learning_rate": 3.076116546092975e-05, "loss": 0.2095, "step": 3277 }, { "epoch": 0.33972432376412065, "grad_norm": 0.4924604296684265, "learning_rate": 3.075550597569369e-05, "loss": 0.2117, "step": 3278 }, { "epoch": 0.33982796144678207, "grad_norm": 0.45519301295280457, "learning_rate": 3.074984527854392e-05, "loss": 0.1761, "step": 3279 }, { "epoch": 0.3399315991294435, "grad_norm": 0.5044865608215332, "learning_rate": 3.074418337011831e-05, "loss": 0.1913, "step": 3280 }, { "epoch": 0.3400352368121049, "grad_norm": 0.5022374391555786, "learning_rate": 3.073852025105481e-05, "loss": 0.2132, "step": 3281 }, { "epoch": 0.3401388744947663, "grad_norm": 0.4834980070590973, "learning_rate": 3.073285592199154e-05, "loss": 0.2091, "step": 3282 }, { "epoch": 0.34024251217742774, "grad_norm": 0.5449108481407166, "learning_rate": 3.072719038356675e-05, "loss": 0.2529, "step": 3283 }, { "epoch": 0.34034614986008915, "grad_norm": 0.535485029220581, "learning_rate": 3.072152363641883e-05, "loss": 0.2137, "step": 3284 }, { "epoch": 0.34044978754275057, "grad_norm": 0.5348849296569824, "learning_rate": 3.0715855681186294e-05, "loss": 0.2221, "step": 3285 }, { "epoch": 0.34055342522541193, "grad_norm": 0.48995643854141235, "learning_rate": 3.0710186518507794e-05, "loss": 0.2252, "step": 3286 }, { "epoch": 0.34065706290807335, "grad_norm": 0.5089548230171204, "learning_rate": 3.0704516149022126e-05, "loss": 0.2416, "step": 3287 }, { "epoch": 0.34076070059073477, "grad_norm": 0.4678729176521301, "learning_rate": 3.069884457336822e-05, "loss": 0.208, "step": 3288 }, { "epoch": 0.3408643382733962, "grad_norm": 0.47132188081741333, "learning_rate": 3.069317179218513e-05, "loss": 0.1738, "step": 3289 }, { "epoch": 0.3409679759560576, "grad_norm": 0.46246013045310974, "learning_rate": 3.068749780611208e-05, "loss": 0.2065, "step": 3290 }, { "epoch": 0.341071613638719, "grad_norm": 0.5273382067680359, "learning_rate": 3.068182261578839e-05, "loss": 0.2094, "step": 3291 }, { "epoch": 0.34117525132138043, "grad_norm": 0.47908997535705566, "learning_rate": 3.067614622185352e-05, "loss": 0.2143, "step": 3292 }, { "epoch": 0.34127888900404185, "grad_norm": 0.45752736926078796, "learning_rate": 3.06704686249471e-05, "loss": 0.1816, "step": 3293 }, { "epoch": 0.34138252668670327, "grad_norm": 0.42608192563056946, "learning_rate": 3.066478982570886e-05, "loss": 0.1813, "step": 3294 }, { "epoch": 0.3414861643693647, "grad_norm": 0.5494682788848877, "learning_rate": 3.065910982477868e-05, "loss": 0.2782, "step": 3295 }, { "epoch": 0.3415898020520261, "grad_norm": 0.5171986818313599, "learning_rate": 3.065342862279658e-05, "loss": 0.2454, "step": 3296 }, { "epoch": 0.3416934397346875, "grad_norm": 0.4978666305541992, "learning_rate": 3.06477462204027e-05, "loss": 0.2121, "step": 3297 }, { "epoch": 0.34179707741734894, "grad_norm": 0.4874645471572876, "learning_rate": 3.0642062618237326e-05, "loss": 0.2193, "step": 3298 }, { "epoch": 0.34190071510001036, "grad_norm": 0.46120017766952515, "learning_rate": 3.063637781694088e-05, "loss": 0.2243, "step": 3299 }, { "epoch": 0.34200435278267177, "grad_norm": 0.5550026893615723, "learning_rate": 3.063069181715392e-05, "loss": 0.224, "step": 3300 }, { "epoch": 0.3421079904653332, "grad_norm": 0.47999265789985657, "learning_rate": 3.0625004619517136e-05, "loss": 0.2073, "step": 3301 }, { "epoch": 0.3422116281479946, "grad_norm": 0.5649874806404114, "learning_rate": 3.061931622467134e-05, "loss": 0.2269, "step": 3302 }, { "epoch": 0.342315265830656, "grad_norm": 0.4317651093006134, "learning_rate": 3.0613626633257504e-05, "loss": 0.1858, "step": 3303 }, { "epoch": 0.34241890351331744, "grad_norm": 0.4310738444328308, "learning_rate": 3.060793584591671e-05, "loss": 0.1749, "step": 3304 }, { "epoch": 0.34252254119597886, "grad_norm": 0.45287322998046875, "learning_rate": 3.060224386329021e-05, "loss": 0.1899, "step": 3305 }, { "epoch": 0.3426261788786403, "grad_norm": 0.577049970626831, "learning_rate": 3.059655068601934e-05, "loss": 0.2766, "step": 3306 }, { "epoch": 0.3427298165613017, "grad_norm": 0.47695785760879517, "learning_rate": 3.059085631474562e-05, "loss": 0.2051, "step": 3307 }, { "epoch": 0.3428334542439631, "grad_norm": 0.4532393515110016, "learning_rate": 3.0585160750110664e-05, "loss": 0.1965, "step": 3308 }, { "epoch": 0.3429370919266245, "grad_norm": 0.5475507974624634, "learning_rate": 3.057946399275626e-05, "loss": 0.2345, "step": 3309 }, { "epoch": 0.34304072960928594, "grad_norm": 0.4509216845035553, "learning_rate": 3.0573766043324294e-05, "loss": 0.1935, "step": 3310 }, { "epoch": 0.34314436729194736, "grad_norm": 0.5102453231811523, "learning_rate": 3.056806690245681e-05, "loss": 0.2255, "step": 3311 }, { "epoch": 0.3432480049746088, "grad_norm": 0.5190200805664062, "learning_rate": 3.056236657079597e-05, "loss": 0.2351, "step": 3312 }, { "epoch": 0.3433516426572702, "grad_norm": 0.4880451261997223, "learning_rate": 3.0556665048984094e-05, "loss": 0.2048, "step": 3313 }, { "epoch": 0.3434552803399316, "grad_norm": 0.48179447650909424, "learning_rate": 3.05509623376636e-05, "loss": 0.2381, "step": 3314 }, { "epoch": 0.34355891802259303, "grad_norm": 0.5229012966156006, "learning_rate": 3.054525843747708e-05, "loss": 0.2506, "step": 3315 }, { "epoch": 0.34366255570525445, "grad_norm": 0.5210829377174377, "learning_rate": 3.053955334906723e-05, "loss": 0.2234, "step": 3316 }, { "epoch": 0.34376619338791586, "grad_norm": 0.5315191745758057, "learning_rate": 3.053384707307689e-05, "loss": 0.246, "step": 3317 }, { "epoch": 0.3438698310705773, "grad_norm": 0.5583591461181641, "learning_rate": 3.052813961014904e-05, "loss": 0.2363, "step": 3318 }, { "epoch": 0.3439734687532387, "grad_norm": 0.4921298623085022, "learning_rate": 3.0522430960926786e-05, "loss": 0.2049, "step": 3319 }, { "epoch": 0.3440771064359001, "grad_norm": 0.5304951667785645, "learning_rate": 3.051672112605337e-05, "loss": 0.2364, "step": 3320 }, { "epoch": 0.34418074411856153, "grad_norm": 0.4428289234638214, "learning_rate": 3.051101010617216e-05, "loss": 0.2182, "step": 3321 }, { "epoch": 0.34428438180122295, "grad_norm": 0.5289511680603027, "learning_rate": 3.0505297901926672e-05, "loss": 0.265, "step": 3322 }, { "epoch": 0.34438801948388437, "grad_norm": 0.45246875286102295, "learning_rate": 3.0499584513960553e-05, "loss": 0.1823, "step": 3323 }, { "epoch": 0.34449165716654573, "grad_norm": 0.5081989169120789, "learning_rate": 3.0493869942917563e-05, "loss": 0.2345, "step": 3324 }, { "epoch": 0.34459529484920715, "grad_norm": 0.4332667589187622, "learning_rate": 3.0488154189441627e-05, "loss": 0.1824, "step": 3325 }, { "epoch": 0.34469893253186856, "grad_norm": 0.5207022428512573, "learning_rate": 3.0482437254176785e-05, "loss": 0.1947, "step": 3326 }, { "epoch": 0.34480257021453, "grad_norm": 0.5865523815155029, "learning_rate": 3.04767191377672e-05, "loss": 0.262, "step": 3327 }, { "epoch": 0.3449062078971914, "grad_norm": 0.5545551776885986, "learning_rate": 3.04709998408572e-05, "loss": 0.2542, "step": 3328 }, { "epoch": 0.3450098455798528, "grad_norm": 0.49805060029029846, "learning_rate": 3.0465279364091204e-05, "loss": 0.2334, "step": 3329 }, { "epoch": 0.34511348326251423, "grad_norm": 0.4513069689273834, "learning_rate": 3.0459557708113806e-05, "loss": 0.1891, "step": 3330 }, { "epoch": 0.34521712094517565, "grad_norm": 0.5274832248687744, "learning_rate": 3.0453834873569703e-05, "loss": 0.2066, "step": 3331 }, { "epoch": 0.34532075862783707, "grad_norm": 0.5223729610443115, "learning_rate": 3.0448110861103735e-05, "loss": 0.2185, "step": 3332 }, { "epoch": 0.3454243963104985, "grad_norm": 0.5173854827880859, "learning_rate": 3.0442385671360876e-05, "loss": 0.2362, "step": 3333 }, { "epoch": 0.3455280339931599, "grad_norm": 0.5483968257904053, "learning_rate": 3.043665930498623e-05, "loss": 0.2287, "step": 3334 }, { "epoch": 0.3456316716758213, "grad_norm": 0.549764096736908, "learning_rate": 3.0430931762625052e-05, "loss": 0.2352, "step": 3335 }, { "epoch": 0.34573530935848273, "grad_norm": 0.5403971672058105, "learning_rate": 3.0425203044922687e-05, "loss": 0.2391, "step": 3336 }, { "epoch": 0.34583894704114415, "grad_norm": 0.48038166761398315, "learning_rate": 3.041947315252465e-05, "loss": 0.2254, "step": 3337 }, { "epoch": 0.34594258472380557, "grad_norm": 0.5125417113304138, "learning_rate": 3.0413742086076577e-05, "loss": 0.2023, "step": 3338 }, { "epoch": 0.346046222406467, "grad_norm": 0.4226140081882477, "learning_rate": 3.040800984622423e-05, "loss": 0.1708, "step": 3339 }, { "epoch": 0.3461498600891284, "grad_norm": 0.4800489842891693, "learning_rate": 3.040227643361352e-05, "loss": 0.2031, "step": 3340 }, { "epoch": 0.3462534977717898, "grad_norm": 0.5459068417549133, "learning_rate": 3.0396541848890472e-05, "loss": 0.2332, "step": 3341 }, { "epoch": 0.34635713545445124, "grad_norm": 0.470639169216156, "learning_rate": 3.039080609270124e-05, "loss": 0.2205, "step": 3342 }, { "epoch": 0.34646077313711265, "grad_norm": 0.5183528065681458, "learning_rate": 3.0385069165692137e-05, "loss": 0.2205, "step": 3343 }, { "epoch": 0.34656441081977407, "grad_norm": 0.583718478679657, "learning_rate": 3.0379331068509587e-05, "loss": 0.2338, "step": 3344 }, { "epoch": 0.3466680485024355, "grad_norm": 0.4664159417152405, "learning_rate": 3.0373591801800147e-05, "loss": 0.2279, "step": 3345 }, { "epoch": 0.3467716861850969, "grad_norm": 0.4871425926685333, "learning_rate": 3.0367851366210507e-05, "loss": 0.22, "step": 3346 }, { "epoch": 0.3468753238677583, "grad_norm": 0.4640686511993408, "learning_rate": 3.0362109762387488e-05, "loss": 0.2021, "step": 3347 }, { "epoch": 0.34697896155041974, "grad_norm": 0.4763514995574951, "learning_rate": 3.0356366990978055e-05, "loss": 0.2174, "step": 3348 }, { "epoch": 0.34708259923308116, "grad_norm": 0.5456576347351074, "learning_rate": 3.0350623052629284e-05, "loss": 0.2332, "step": 3349 }, { "epoch": 0.3471862369157426, "grad_norm": 0.4503636658191681, "learning_rate": 3.0344877947988397e-05, "loss": 0.2002, "step": 3350 }, { "epoch": 0.347289874598404, "grad_norm": 0.47983184456825256, "learning_rate": 3.0339131677702754e-05, "loss": 0.22, "step": 3351 }, { "epoch": 0.3473935122810654, "grad_norm": 0.6034876108169556, "learning_rate": 3.033338424241982e-05, "loss": 0.259, "step": 3352 }, { "epoch": 0.3474971499637268, "grad_norm": 0.521963357925415, "learning_rate": 3.0327635642787208e-05, "loss": 0.2344, "step": 3353 }, { "epoch": 0.34760078764638824, "grad_norm": 0.5032604932785034, "learning_rate": 3.0321885879452673e-05, "loss": 0.1985, "step": 3354 }, { "epoch": 0.34770442532904966, "grad_norm": 0.5303429365158081, "learning_rate": 3.0316134953064083e-05, "loss": 0.2356, "step": 3355 }, { "epoch": 0.3478080630117111, "grad_norm": 0.46349650621414185, "learning_rate": 3.0310382864269442e-05, "loss": 0.1997, "step": 3356 }, { "epoch": 0.3479117006943725, "grad_norm": 0.5277966260910034, "learning_rate": 3.0304629613716882e-05, "loss": 0.2755, "step": 3357 }, { "epoch": 0.3480153383770339, "grad_norm": 0.5392859578132629, "learning_rate": 3.029887520205469e-05, "loss": 0.2278, "step": 3358 }, { "epoch": 0.34811897605969533, "grad_norm": 0.5537436008453369, "learning_rate": 3.0293119629931235e-05, "loss": 0.2596, "step": 3359 }, { "epoch": 0.34822261374235675, "grad_norm": 0.5064177513122559, "learning_rate": 3.0287362897995068e-05, "loss": 0.2116, "step": 3360 }, { "epoch": 0.34832625142501816, "grad_norm": 0.4268822968006134, "learning_rate": 3.0281605006894837e-05, "loss": 0.2007, "step": 3361 }, { "epoch": 0.3484298891076795, "grad_norm": 0.5377349257469177, "learning_rate": 3.027584595727934e-05, "loss": 0.2513, "step": 3362 }, { "epoch": 0.34853352679034094, "grad_norm": 0.5493626594543457, "learning_rate": 3.02700857497975e-05, "loss": 0.2513, "step": 3363 }, { "epoch": 0.34863716447300236, "grad_norm": 0.4258686900138855, "learning_rate": 3.0264324385098356e-05, "loss": 0.1931, "step": 3364 }, { "epoch": 0.3487408021556638, "grad_norm": 0.5182408094406128, "learning_rate": 3.0258561863831103e-05, "loss": 0.2278, "step": 3365 }, { "epoch": 0.3488444398383252, "grad_norm": 0.5387588143348694, "learning_rate": 3.025279818664504e-05, "loss": 0.2599, "step": 3366 }, { "epoch": 0.3489480775209866, "grad_norm": 0.4380408525466919, "learning_rate": 3.024703335418962e-05, "loss": 0.1948, "step": 3367 }, { "epoch": 0.349051715203648, "grad_norm": 0.42744994163513184, "learning_rate": 3.0241267367114404e-05, "loss": 0.2135, "step": 3368 }, { "epoch": 0.34915535288630944, "grad_norm": 0.5398333072662354, "learning_rate": 3.0235500226069105e-05, "loss": 0.2239, "step": 3369 }, { "epoch": 0.34925899056897086, "grad_norm": 0.40965038537979126, "learning_rate": 3.0229731931703558e-05, "loss": 0.172, "step": 3370 }, { "epoch": 0.3493626282516323, "grad_norm": 0.4280138611793518, "learning_rate": 3.022396248466771e-05, "loss": 0.1964, "step": 3371 }, { "epoch": 0.3494662659342937, "grad_norm": 0.5184552073478699, "learning_rate": 3.0218191885611666e-05, "loss": 0.2325, "step": 3372 }, { "epoch": 0.3495699036169551, "grad_norm": 0.4650012254714966, "learning_rate": 3.0212420135185652e-05, "loss": 0.2152, "step": 3373 }, { "epoch": 0.34967354129961653, "grad_norm": 0.4905893802642822, "learning_rate": 3.0206647234040006e-05, "loss": 0.2161, "step": 3374 }, { "epoch": 0.34977717898227795, "grad_norm": 0.4558361768722534, "learning_rate": 3.0200873182825218e-05, "loss": 0.1786, "step": 3375 }, { "epoch": 0.34988081666493936, "grad_norm": 0.5681077837944031, "learning_rate": 3.019509798219189e-05, "loss": 0.2663, "step": 3376 }, { "epoch": 0.3499844543476008, "grad_norm": 0.5157054662704468, "learning_rate": 3.018932163279078e-05, "loss": 0.2384, "step": 3377 }, { "epoch": 0.3500880920302622, "grad_norm": 0.49062496423721313, "learning_rate": 3.0183544135272744e-05, "loss": 0.2436, "step": 3378 }, { "epoch": 0.3501917297129236, "grad_norm": 0.4667421579360962, "learning_rate": 3.017776549028879e-05, "loss": 0.209, "step": 3379 }, { "epoch": 0.35029536739558503, "grad_norm": 0.5137354135513306, "learning_rate": 3.0171985698490034e-05, "loss": 0.226, "step": 3380 }, { "epoch": 0.35039900507824645, "grad_norm": 0.47683942317962646, "learning_rate": 3.016620476052774e-05, "loss": 0.1956, "step": 3381 }, { "epoch": 0.35050264276090787, "grad_norm": 0.5153138637542725, "learning_rate": 3.0160422677053306e-05, "loss": 0.22, "step": 3382 }, { "epoch": 0.3506062804435693, "grad_norm": 0.5153190493583679, "learning_rate": 3.0154639448718242e-05, "loss": 0.2453, "step": 3383 }, { "epoch": 0.3507099181262307, "grad_norm": 0.47236472368240356, "learning_rate": 3.014885507617418e-05, "loss": 0.2101, "step": 3384 }, { "epoch": 0.3508135558088921, "grad_norm": 0.4625147879123688, "learning_rate": 3.0143069560072914e-05, "loss": 0.2132, "step": 3385 }, { "epoch": 0.35091719349155354, "grad_norm": 0.46439385414123535, "learning_rate": 3.0137282901066332e-05, "loss": 0.2111, "step": 3386 }, { "epoch": 0.35102083117421495, "grad_norm": 0.4748516082763672, "learning_rate": 3.0131495099806472e-05, "loss": 0.2143, "step": 3387 }, { "epoch": 0.35112446885687637, "grad_norm": 0.5125780701637268, "learning_rate": 3.012570615694549e-05, "loss": 0.2532, "step": 3388 }, { "epoch": 0.3512281065395378, "grad_norm": 0.4637886583805084, "learning_rate": 3.011991607313569e-05, "loss": 0.2181, "step": 3389 }, { "epoch": 0.3513317442221992, "grad_norm": 0.45637691020965576, "learning_rate": 3.0114124849029474e-05, "loss": 0.2108, "step": 3390 }, { "epoch": 0.3514353819048606, "grad_norm": 0.4881954789161682, "learning_rate": 3.0108332485279387e-05, "loss": 0.2281, "step": 3391 }, { "epoch": 0.35153901958752204, "grad_norm": 0.4588933289051056, "learning_rate": 3.0102538982538116e-05, "loss": 0.219, "step": 3392 }, { "epoch": 0.35164265727018346, "grad_norm": 0.5305696725845337, "learning_rate": 3.0096744341458452e-05, "loss": 0.2512, "step": 3393 }, { "epoch": 0.3517462949528449, "grad_norm": 0.5035028457641602, "learning_rate": 3.0090948562693336e-05, "loss": 0.2561, "step": 3394 }, { "epoch": 0.3518499326355063, "grad_norm": 0.39127054810523987, "learning_rate": 3.0085151646895823e-05, "loss": 0.1747, "step": 3395 }, { "epoch": 0.3519535703181677, "grad_norm": 0.46729522943496704, "learning_rate": 3.007935359471909e-05, "loss": 0.2006, "step": 3396 }, { "epoch": 0.3520572080008291, "grad_norm": 0.419251024723053, "learning_rate": 3.0073554406816474e-05, "loss": 0.1925, "step": 3397 }, { "epoch": 0.35216084568349054, "grad_norm": 0.46076175570487976, "learning_rate": 3.0067754083841406e-05, "loss": 0.2069, "step": 3398 }, { "epoch": 0.35226448336615196, "grad_norm": 0.4998854696750641, "learning_rate": 3.0061952626447458e-05, "loss": 0.2189, "step": 3399 }, { "epoch": 0.3523681210488133, "grad_norm": 0.4538286328315735, "learning_rate": 3.0056150035288323e-05, "loss": 0.2224, "step": 3400 }, { "epoch": 0.35247175873147474, "grad_norm": 0.5140456557273865, "learning_rate": 3.0050346311017842e-05, "loss": 0.2425, "step": 3401 }, { "epoch": 0.35257539641413616, "grad_norm": 0.40988877415657043, "learning_rate": 3.004454145428996e-05, "loss": 0.1665, "step": 3402 }, { "epoch": 0.3526790340967976, "grad_norm": 0.43955564498901367, "learning_rate": 3.003873546575876e-05, "loss": 0.1954, "step": 3403 }, { "epoch": 0.352782671779459, "grad_norm": 0.44053974747657776, "learning_rate": 3.0032928346078453e-05, "loss": 0.183, "step": 3404 }, { "epoch": 0.3528863094621204, "grad_norm": 0.4559771418571472, "learning_rate": 3.0027120095903378e-05, "loss": 0.206, "step": 3405 }, { "epoch": 0.3529899471447818, "grad_norm": 0.4658268988132477, "learning_rate": 3.0021310715887996e-05, "loss": 0.2101, "step": 3406 }, { "epoch": 0.35309358482744324, "grad_norm": 0.44966891407966614, "learning_rate": 3.0015500206686906e-05, "loss": 0.2125, "step": 3407 }, { "epoch": 0.35319722251010466, "grad_norm": 0.48129624128341675, "learning_rate": 3.0009688568954818e-05, "loss": 0.2433, "step": 3408 }, { "epoch": 0.3533008601927661, "grad_norm": 0.4340304434299469, "learning_rate": 3.0003875803346577e-05, "loss": 0.2029, "step": 3409 }, { "epoch": 0.3534044978754275, "grad_norm": 0.4602864384651184, "learning_rate": 2.9998061910517172e-05, "loss": 0.219, "step": 3410 }, { "epoch": 0.3535081355580889, "grad_norm": 0.5180659890174866, "learning_rate": 2.999224689112169e-05, "loss": 0.1932, "step": 3411 }, { "epoch": 0.3536117732407503, "grad_norm": 0.42697539925575256, "learning_rate": 2.998643074581536e-05, "loss": 0.189, "step": 3412 }, { "epoch": 0.35371541092341174, "grad_norm": 0.46669483184814453, "learning_rate": 2.9980613475253535e-05, "loss": 0.2007, "step": 3413 }, { "epoch": 0.35381904860607316, "grad_norm": 0.6057338714599609, "learning_rate": 2.9974795080091708e-05, "loss": 0.2793, "step": 3414 }, { "epoch": 0.3539226862887346, "grad_norm": 0.5159379243850708, "learning_rate": 2.996897556098547e-05, "loss": 0.2276, "step": 3415 }, { "epoch": 0.354026323971396, "grad_norm": 0.5511611700057983, "learning_rate": 2.996315491859056e-05, "loss": 0.2427, "step": 3416 }, { "epoch": 0.3541299616540574, "grad_norm": 0.4959019124507904, "learning_rate": 2.9957333153562847e-05, "loss": 0.2244, "step": 3417 }, { "epoch": 0.35423359933671883, "grad_norm": 0.5330846905708313, "learning_rate": 2.9951510266558314e-05, "loss": 0.2392, "step": 3418 }, { "epoch": 0.35433723701938025, "grad_norm": 0.49244412779808044, "learning_rate": 2.9945686258233073e-05, "loss": 0.2299, "step": 3419 }, { "epoch": 0.35444087470204166, "grad_norm": 0.47341403365135193, "learning_rate": 2.9939861129243368e-05, "loss": 0.1775, "step": 3420 }, { "epoch": 0.3545445123847031, "grad_norm": 0.5534030199050903, "learning_rate": 2.9934034880245554e-05, "loss": 0.2475, "step": 3421 }, { "epoch": 0.3546481500673645, "grad_norm": 0.488478422164917, "learning_rate": 2.992820751189614e-05, "loss": 0.2421, "step": 3422 }, { "epoch": 0.3547517877500259, "grad_norm": 0.5435716509819031, "learning_rate": 2.992237902485174e-05, "loss": 0.2221, "step": 3423 }, { "epoch": 0.35485542543268733, "grad_norm": 0.4245526194572449, "learning_rate": 2.9916549419769086e-05, "loss": 0.1846, "step": 3424 }, { "epoch": 0.35495906311534875, "grad_norm": 0.5004781484603882, "learning_rate": 2.991071869730507e-05, "loss": 0.2062, "step": 3425 }, { "epoch": 0.35506270079801017, "grad_norm": 0.5690580010414124, "learning_rate": 2.990488685811667e-05, "loss": 0.2647, "step": 3426 }, { "epoch": 0.3551663384806716, "grad_norm": 0.5165721774101257, "learning_rate": 2.989905390286102e-05, "loss": 0.2444, "step": 3427 }, { "epoch": 0.355269976163333, "grad_norm": 0.5091362595558167, "learning_rate": 2.989321983219536e-05, "loss": 0.2333, "step": 3428 }, { "epoch": 0.3553736138459944, "grad_norm": 0.49818992614746094, "learning_rate": 2.988738464677707e-05, "loss": 0.2068, "step": 3429 }, { "epoch": 0.35547725152865584, "grad_norm": 0.4385213553905487, "learning_rate": 2.9881548347263654e-05, "loss": 0.2095, "step": 3430 }, { "epoch": 0.35558088921131725, "grad_norm": 0.5639091730117798, "learning_rate": 2.9875710934312723e-05, "loss": 0.2368, "step": 3431 }, { "epoch": 0.35568452689397867, "grad_norm": 0.5173773765563965, "learning_rate": 2.986987240858204e-05, "loss": 0.2211, "step": 3432 }, { "epoch": 0.3557881645766401, "grad_norm": 0.4789592921733856, "learning_rate": 2.986403277072948e-05, "loss": 0.2244, "step": 3433 }, { "epoch": 0.3558918022593015, "grad_norm": 0.5333912372589111, "learning_rate": 2.985819202141304e-05, "loss": 0.2436, "step": 3434 }, { "epoch": 0.3559954399419629, "grad_norm": 0.5361971259117126, "learning_rate": 2.9852350161290844e-05, "loss": 0.2124, "step": 3435 }, { "epoch": 0.35609907762462434, "grad_norm": 0.5212494134902954, "learning_rate": 2.984650719102115e-05, "loss": 0.2428, "step": 3436 }, { "epoch": 0.35620271530728576, "grad_norm": 0.5393432974815369, "learning_rate": 2.9840663111262334e-05, "loss": 0.2282, "step": 3437 }, { "epoch": 0.3563063529899471, "grad_norm": 0.506872296333313, "learning_rate": 2.9834817922672892e-05, "loss": 0.2369, "step": 3438 }, { "epoch": 0.35640999067260853, "grad_norm": 0.4239386022090912, "learning_rate": 2.9828971625911465e-05, "loss": 0.1886, "step": 3439 }, { "epoch": 0.35651362835526995, "grad_norm": 0.5943200588226318, "learning_rate": 2.9823124221636784e-05, "loss": 0.2464, "step": 3440 }, { "epoch": 0.35661726603793137, "grad_norm": 0.4862738847732544, "learning_rate": 2.9817275710507735e-05, "loss": 0.2484, "step": 3441 }, { "epoch": 0.3567209037205928, "grad_norm": 0.483430415391922, "learning_rate": 2.981142609318333e-05, "loss": 0.2233, "step": 3442 }, { "epoch": 0.3568245414032542, "grad_norm": 0.45114192366600037, "learning_rate": 2.980557537032268e-05, "loss": 0.1913, "step": 3443 }, { "epoch": 0.3569281790859156, "grad_norm": 0.510878324508667, "learning_rate": 2.979972354258504e-05, "loss": 0.2194, "step": 3444 }, { "epoch": 0.35703181676857704, "grad_norm": 0.4664299190044403, "learning_rate": 2.979387061062978e-05, "loss": 0.2104, "step": 3445 }, { "epoch": 0.35713545445123845, "grad_norm": 0.5411281585693359, "learning_rate": 2.9788016575116412e-05, "loss": 0.2701, "step": 3446 }, { "epoch": 0.35723909213389987, "grad_norm": 0.41982153058052063, "learning_rate": 2.978216143670455e-05, "loss": 0.1865, "step": 3447 }, { "epoch": 0.3573427298165613, "grad_norm": 0.45480507612228394, "learning_rate": 2.977630519605394e-05, "loss": 0.2287, "step": 3448 }, { "epoch": 0.3574463674992227, "grad_norm": 0.4503597319126129, "learning_rate": 2.9770447853824468e-05, "loss": 0.2002, "step": 3449 }, { "epoch": 0.3575500051818841, "grad_norm": 0.5262497067451477, "learning_rate": 2.976458941067611e-05, "loss": 0.2392, "step": 3450 }, { "epoch": 0.35765364286454554, "grad_norm": 0.42646539211273193, "learning_rate": 2.9758729867268998e-05, "loss": 0.2063, "step": 3451 }, { "epoch": 0.35775728054720696, "grad_norm": 0.5226529240608215, "learning_rate": 2.975286922426338e-05, "loss": 0.2364, "step": 3452 }, { "epoch": 0.3578609182298684, "grad_norm": 0.49958691000938416, "learning_rate": 2.9747007482319616e-05, "loss": 0.231, "step": 3453 }, { "epoch": 0.3579645559125298, "grad_norm": 0.44578859210014343, "learning_rate": 2.9741144642098204e-05, "loss": 0.2012, "step": 3454 }, { "epoch": 0.3580681935951912, "grad_norm": 0.49792999029159546, "learning_rate": 2.9735280704259755e-05, "loss": 0.2201, "step": 3455 }, { "epoch": 0.3581718312778526, "grad_norm": 0.4345203638076782, "learning_rate": 2.9729415669465004e-05, "loss": 0.1937, "step": 3456 }, { "epoch": 0.35827546896051404, "grad_norm": 0.48358073830604553, "learning_rate": 2.9723549538374824e-05, "loss": 0.2582, "step": 3457 }, { "epoch": 0.35837910664317546, "grad_norm": 0.488992303609848, "learning_rate": 2.9717682311650204e-05, "loss": 0.2291, "step": 3458 }, { "epoch": 0.3584827443258369, "grad_norm": 0.5303345918655396, "learning_rate": 2.9711813989952242e-05, "loss": 0.2248, "step": 3459 }, { "epoch": 0.3585863820084983, "grad_norm": 0.4908701181411743, "learning_rate": 2.9705944573942173e-05, "loss": 0.2136, "step": 3460 }, { "epoch": 0.3586900196911597, "grad_norm": 0.4816449284553528, "learning_rate": 2.9700074064281367e-05, "loss": 0.2352, "step": 3461 }, { "epoch": 0.35879365737382113, "grad_norm": 0.4632469713687897, "learning_rate": 2.9694202461631282e-05, "loss": 0.2046, "step": 3462 }, { "epoch": 0.35889729505648255, "grad_norm": 0.49483367800712585, "learning_rate": 2.968832976665354e-05, "loss": 0.233, "step": 3463 }, { "epoch": 0.35900093273914396, "grad_norm": 0.45997875928878784, "learning_rate": 2.9682455980009862e-05, "loss": 0.1865, "step": 3464 }, { "epoch": 0.3591045704218054, "grad_norm": 0.449470192193985, "learning_rate": 2.96765811023621e-05, "loss": 0.1967, "step": 3465 }, { "epoch": 0.3592082081044668, "grad_norm": 0.5391585230827332, "learning_rate": 2.9670705134372216e-05, "loss": 0.2325, "step": 3466 }, { "epoch": 0.3593118457871282, "grad_norm": 0.4918924570083618, "learning_rate": 2.9664828076702307e-05, "loss": 0.2164, "step": 3467 }, { "epoch": 0.35941548346978963, "grad_norm": 0.5079565048217773, "learning_rate": 2.9658949930014606e-05, "loss": 0.2176, "step": 3468 }, { "epoch": 0.35951912115245105, "grad_norm": 0.44536030292510986, "learning_rate": 2.9653070694971435e-05, "loss": 0.1739, "step": 3469 }, { "epoch": 0.35962275883511247, "grad_norm": 0.44883498549461365, "learning_rate": 2.9647190372235265e-05, "loss": 0.1945, "step": 3470 }, { "epoch": 0.3597263965177739, "grad_norm": 0.510392427444458, "learning_rate": 2.9641308962468676e-05, "loss": 0.2237, "step": 3471 }, { "epoch": 0.3598300342004353, "grad_norm": 0.47959601879119873, "learning_rate": 2.9635426466334384e-05, "loss": 0.1916, "step": 3472 }, { "epoch": 0.3599336718830967, "grad_norm": 0.5199118256568909, "learning_rate": 2.962954288449522e-05, "loss": 0.2417, "step": 3473 }, { "epoch": 0.36003730956575813, "grad_norm": 0.5270677208900452, "learning_rate": 2.9623658217614132e-05, "loss": 0.2059, "step": 3474 }, { "epoch": 0.36014094724841955, "grad_norm": 0.5287850499153137, "learning_rate": 2.9617772466354192e-05, "loss": 0.2272, "step": 3475 }, { "epoch": 0.3602445849310809, "grad_norm": 0.481442928314209, "learning_rate": 2.9611885631378602e-05, "loss": 0.2097, "step": 3476 }, { "epoch": 0.36034822261374233, "grad_norm": 0.4829895794391632, "learning_rate": 2.9605997713350686e-05, "loss": 0.2042, "step": 3477 }, { "epoch": 0.36045186029640375, "grad_norm": 0.49132823944091797, "learning_rate": 2.960010871293388e-05, "loss": 0.2404, "step": 3478 }, { "epoch": 0.36055549797906516, "grad_norm": 0.4517565369606018, "learning_rate": 2.9594218630791746e-05, "loss": 0.1784, "step": 3479 }, { "epoch": 0.3606591356617266, "grad_norm": 0.5756237506866455, "learning_rate": 2.958832746758797e-05, "loss": 0.2601, "step": 3480 }, { "epoch": 0.360762773344388, "grad_norm": 0.47717320919036865, "learning_rate": 2.9582435223986363e-05, "loss": 0.218, "step": 3481 }, { "epoch": 0.3608664110270494, "grad_norm": 0.515692412853241, "learning_rate": 2.9576541900650847e-05, "loss": 0.2357, "step": 3482 }, { "epoch": 0.36097004870971083, "grad_norm": 0.5456742644309998, "learning_rate": 2.9570647498245484e-05, "loss": 0.2441, "step": 3483 }, { "epoch": 0.36107368639237225, "grad_norm": 0.5447889566421509, "learning_rate": 2.9564752017434432e-05, "loss": 0.2426, "step": 3484 }, { "epoch": 0.36117732407503367, "grad_norm": 0.5068838596343994, "learning_rate": 2.955885545888199e-05, "loss": 0.2359, "step": 3485 }, { "epoch": 0.3612809617576951, "grad_norm": 0.5069774389266968, "learning_rate": 2.955295782325258e-05, "loss": 0.2234, "step": 3486 }, { "epoch": 0.3613845994403565, "grad_norm": 0.48465102910995483, "learning_rate": 2.954705911121073e-05, "loss": 0.2047, "step": 3487 }, { "epoch": 0.3614882371230179, "grad_norm": 0.44714057445526123, "learning_rate": 2.95411593234211e-05, "loss": 0.1955, "step": 3488 }, { "epoch": 0.36159187480567934, "grad_norm": 0.46095696091651917, "learning_rate": 2.9535258460548473e-05, "loss": 0.1973, "step": 3489 }, { "epoch": 0.36169551248834075, "grad_norm": 0.5460710525512695, "learning_rate": 2.9529356523257742e-05, "loss": 0.2626, "step": 3490 }, { "epoch": 0.36179915017100217, "grad_norm": 0.49723753333091736, "learning_rate": 2.952345351221393e-05, "loss": 0.2202, "step": 3491 }, { "epoch": 0.3619027878536636, "grad_norm": 0.476315975189209, "learning_rate": 2.9517549428082185e-05, "loss": 0.2361, "step": 3492 }, { "epoch": 0.362006425536325, "grad_norm": 0.4768473207950592, "learning_rate": 2.951164427152777e-05, "loss": 0.2027, "step": 3493 }, { "epoch": 0.3621100632189864, "grad_norm": 0.49800312519073486, "learning_rate": 2.9505738043216053e-05, "loss": 0.2222, "step": 3494 }, { "epoch": 0.36221370090164784, "grad_norm": 0.48882049322128296, "learning_rate": 2.9499830743812557e-05, "loss": 0.215, "step": 3495 }, { "epoch": 0.36231733858430926, "grad_norm": 0.5254577994346619, "learning_rate": 2.94939223739829e-05, "loss": 0.2355, "step": 3496 }, { "epoch": 0.3624209762669707, "grad_norm": 0.46470046043395996, "learning_rate": 2.9488012934392828e-05, "loss": 0.2513, "step": 3497 }, { "epoch": 0.3625246139496321, "grad_norm": 0.5480517148971558, "learning_rate": 2.948210242570821e-05, "loss": 0.2706, "step": 3498 }, { "epoch": 0.3626282516322935, "grad_norm": 0.4826633930206299, "learning_rate": 2.9476190848595032e-05, "loss": 0.2083, "step": 3499 }, { "epoch": 0.3627318893149549, "grad_norm": 0.45155906677246094, "learning_rate": 2.947027820371939e-05, "loss": 0.2236, "step": 3500 }, { "epoch": 0.36283552699761634, "grad_norm": 0.5023136138916016, "learning_rate": 2.946436449174753e-05, "loss": 0.2225, "step": 3501 }, { "epoch": 0.36293916468027776, "grad_norm": 0.44159698486328125, "learning_rate": 2.9458449713345795e-05, "loss": 0.1906, "step": 3502 }, { "epoch": 0.3630428023629392, "grad_norm": 0.5016050934791565, "learning_rate": 2.9452533869180643e-05, "loss": 0.2195, "step": 3503 }, { "epoch": 0.3631464400456006, "grad_norm": 0.4987817704677582, "learning_rate": 2.944661695991867e-05, "loss": 0.2113, "step": 3504 }, { "epoch": 0.363250077728262, "grad_norm": 0.4627358615398407, "learning_rate": 2.9440698986226585e-05, "loss": 0.2135, "step": 3505 }, { "epoch": 0.36335371541092343, "grad_norm": 0.45628219842910767, "learning_rate": 2.9434779948771214e-05, "loss": 0.1765, "step": 3506 }, { "epoch": 0.36345735309358485, "grad_norm": 0.43228745460510254, "learning_rate": 2.9428859848219505e-05, "loss": 0.1921, "step": 3507 }, { "epoch": 0.36356099077624626, "grad_norm": 0.47669291496276855, "learning_rate": 2.9422938685238524e-05, "loss": 0.2206, "step": 3508 }, { "epoch": 0.3636646284589077, "grad_norm": 0.5110993385314941, "learning_rate": 2.9417016460495466e-05, "loss": 0.2518, "step": 3509 }, { "epoch": 0.3637682661415691, "grad_norm": 0.5036219358444214, "learning_rate": 2.9411093174657622e-05, "loss": 0.2165, "step": 3510 }, { "epoch": 0.3638719038242305, "grad_norm": 0.4140499234199524, "learning_rate": 2.9405168828392436e-05, "loss": 0.1599, "step": 3511 }, { "epoch": 0.36397554150689193, "grad_norm": 0.5629992485046387, "learning_rate": 2.9399243422367445e-05, "loss": 0.2612, "step": 3512 }, { "epoch": 0.36407917918955335, "grad_norm": 0.46416953206062317, "learning_rate": 2.9393316957250317e-05, "loss": 0.1849, "step": 3513 }, { "epoch": 0.3641828168722147, "grad_norm": 0.5125628113746643, "learning_rate": 2.9387389433708837e-05, "loss": 0.2172, "step": 3514 }, { "epoch": 0.3642864545548761, "grad_norm": 0.5483174920082092, "learning_rate": 2.9381460852410906e-05, "loss": 0.2493, "step": 3515 }, { "epoch": 0.36439009223753754, "grad_norm": 0.5044114589691162, "learning_rate": 2.9375531214024553e-05, "loss": 0.2402, "step": 3516 }, { "epoch": 0.36449372992019896, "grad_norm": 0.4661901593208313, "learning_rate": 2.9369600519217916e-05, "loss": 0.1922, "step": 3517 }, { "epoch": 0.3645973676028604, "grad_norm": 0.5807094573974609, "learning_rate": 2.9363668768659263e-05, "loss": 0.2717, "step": 3518 }, { "epoch": 0.3647010052855218, "grad_norm": 0.5298463106155396, "learning_rate": 2.9357735963016963e-05, "loss": 0.1951, "step": 3519 }, { "epoch": 0.3648046429681832, "grad_norm": 0.5299417972564697, "learning_rate": 2.935180210295952e-05, "loss": 0.2032, "step": 3520 }, { "epoch": 0.36490828065084463, "grad_norm": 0.5053216814994812, "learning_rate": 2.9345867189155562e-05, "loss": 0.2035, "step": 3521 }, { "epoch": 0.36501191833350605, "grad_norm": 0.48312923312187195, "learning_rate": 2.933993122227381e-05, "loss": 0.206, "step": 3522 }, { "epoch": 0.36511555601616746, "grad_norm": 0.5141869783401489, "learning_rate": 2.933399420298313e-05, "loss": 0.222, "step": 3523 }, { "epoch": 0.3652191936988289, "grad_norm": 0.4666402339935303, "learning_rate": 2.932805613195249e-05, "loss": 0.1896, "step": 3524 }, { "epoch": 0.3653228313814903, "grad_norm": 0.4823445975780487, "learning_rate": 2.9322117009850988e-05, "loss": 0.1735, "step": 3525 }, { "epoch": 0.3654264690641517, "grad_norm": 0.5427972078323364, "learning_rate": 2.9316176837347834e-05, "loss": 0.2418, "step": 3526 }, { "epoch": 0.36553010674681313, "grad_norm": 0.4606572091579437, "learning_rate": 2.9310235615112357e-05, "loss": 0.1913, "step": 3527 }, { "epoch": 0.36563374442947455, "grad_norm": 0.3660353422164917, "learning_rate": 2.9304293343814005e-05, "loss": 0.1392, "step": 3528 }, { "epoch": 0.36573738211213597, "grad_norm": 0.5093293190002441, "learning_rate": 2.929835002412234e-05, "loss": 0.2118, "step": 3529 }, { "epoch": 0.3658410197947974, "grad_norm": 0.5066865682601929, "learning_rate": 2.9292405656707044e-05, "loss": 0.2008, "step": 3530 }, { "epoch": 0.3659446574774588, "grad_norm": 0.6015675663948059, "learning_rate": 2.928646024223793e-05, "loss": 0.2406, "step": 3531 }, { "epoch": 0.3660482951601202, "grad_norm": 0.6050959229469299, "learning_rate": 2.9280513781384913e-05, "loss": 0.2522, "step": 3532 }, { "epoch": 0.36615193284278164, "grad_norm": 0.4754674434661865, "learning_rate": 2.9274566274818027e-05, "loss": 0.2329, "step": 3533 }, { "epoch": 0.36625557052544305, "grad_norm": 0.527370274066925, "learning_rate": 2.9268617723207433e-05, "loss": 0.2376, "step": 3534 }, { "epoch": 0.36635920820810447, "grad_norm": 0.4930630624294281, "learning_rate": 2.92626681272234e-05, "loss": 0.1882, "step": 3535 }, { "epoch": 0.3664628458907659, "grad_norm": 0.5467690825462341, "learning_rate": 2.9256717487536322e-05, "loss": 0.2391, "step": 3536 }, { "epoch": 0.3665664835734273, "grad_norm": 0.5535871386528015, "learning_rate": 2.9250765804816712e-05, "loss": 0.264, "step": 3537 }, { "epoch": 0.3666701212560887, "grad_norm": 0.4704132676124573, "learning_rate": 2.9244813079735186e-05, "loss": 0.2198, "step": 3538 }, { "epoch": 0.36677375893875014, "grad_norm": 0.4724007248878479, "learning_rate": 2.9238859312962496e-05, "loss": 0.2065, "step": 3539 }, { "epoch": 0.36687739662141156, "grad_norm": 0.5545615553855896, "learning_rate": 2.9232904505169498e-05, "loss": 0.2557, "step": 3540 }, { "epoch": 0.366981034304073, "grad_norm": 0.4997697174549103, "learning_rate": 2.9226948657027178e-05, "loss": 0.2299, "step": 3541 }, { "epoch": 0.3670846719867344, "grad_norm": 0.4856959581375122, "learning_rate": 2.9220991769206617e-05, "loss": 0.1989, "step": 3542 }, { "epoch": 0.3671883096693958, "grad_norm": 0.5277553796768188, "learning_rate": 2.9215033842379048e-05, "loss": 0.2166, "step": 3543 }, { "epoch": 0.3672919473520572, "grad_norm": 0.5107161998748779, "learning_rate": 2.920907487721579e-05, "loss": 0.2279, "step": 3544 }, { "epoch": 0.36739558503471864, "grad_norm": 0.44258975982666016, "learning_rate": 2.920311487438828e-05, "loss": 0.1818, "step": 3545 }, { "epoch": 0.36749922271738006, "grad_norm": 0.4603618085384369, "learning_rate": 2.9197153834568106e-05, "loss": 0.1815, "step": 3546 }, { "epoch": 0.3676028604000415, "grad_norm": 0.47472718358039856, "learning_rate": 2.919119175842693e-05, "loss": 0.2362, "step": 3547 }, { "epoch": 0.3677064980827029, "grad_norm": 0.4437291920185089, "learning_rate": 2.9185228646636553e-05, "loss": 0.1845, "step": 3548 }, { "epoch": 0.3678101357653643, "grad_norm": 0.4785867929458618, "learning_rate": 2.9179264499868893e-05, "loss": 0.2044, "step": 3549 }, { "epoch": 0.3679137734480257, "grad_norm": 0.5831817388534546, "learning_rate": 2.917329931879598e-05, "loss": 0.261, "step": 3550 }, { "epoch": 0.36801741113068714, "grad_norm": 0.45431435108184814, "learning_rate": 2.9167333104089956e-05, "loss": 0.2025, "step": 3551 }, { "epoch": 0.3681210488133485, "grad_norm": 0.5137087106704712, "learning_rate": 2.916136585642309e-05, "loss": 0.208, "step": 3552 }, { "epoch": 0.3682246864960099, "grad_norm": 0.5459833741188049, "learning_rate": 2.9155397576467765e-05, "loss": 0.2807, "step": 3553 }, { "epoch": 0.36832832417867134, "grad_norm": 0.470268189907074, "learning_rate": 2.9149428264896465e-05, "loss": 0.2268, "step": 3554 }, { "epoch": 0.36843196186133276, "grad_norm": 0.48998451232910156, "learning_rate": 2.9143457922381816e-05, "loss": 0.2062, "step": 3555 }, { "epoch": 0.3685355995439942, "grad_norm": 0.5100763440132141, "learning_rate": 2.9137486549596544e-05, "loss": 0.2345, "step": 3556 }, { "epoch": 0.3686392372266556, "grad_norm": 0.5126616954803467, "learning_rate": 2.913151414721349e-05, "loss": 0.2055, "step": 3557 }, { "epoch": 0.368742874909317, "grad_norm": 0.43402886390686035, "learning_rate": 2.9125540715905613e-05, "loss": 0.1838, "step": 3558 }, { "epoch": 0.3688465125919784, "grad_norm": 0.4938167631626129, "learning_rate": 2.9119566256346e-05, "loss": 0.2162, "step": 3559 }, { "epoch": 0.36895015027463984, "grad_norm": 0.4344816207885742, "learning_rate": 2.911359076920784e-05, "loss": 0.1651, "step": 3560 }, { "epoch": 0.36905378795730126, "grad_norm": 0.4878370463848114, "learning_rate": 2.910761425516443e-05, "loss": 0.2244, "step": 3561 }, { "epoch": 0.3691574256399627, "grad_norm": 0.5052449107170105, "learning_rate": 2.9101636714889215e-05, "loss": 0.2288, "step": 3562 }, { "epoch": 0.3692610633226241, "grad_norm": 0.5431941151618958, "learning_rate": 2.9095658149055713e-05, "loss": 0.2603, "step": 3563 }, { "epoch": 0.3693647010052855, "grad_norm": 0.5375214219093323, "learning_rate": 2.9089678558337593e-05, "loss": 0.2502, "step": 3564 }, { "epoch": 0.36946833868794693, "grad_norm": 0.5288280844688416, "learning_rate": 2.908369794340863e-05, "loss": 0.2508, "step": 3565 }, { "epoch": 0.36957197637060835, "grad_norm": 0.5204939842224121, "learning_rate": 2.9077716304942698e-05, "loss": 0.2162, "step": 3566 }, { "epoch": 0.36967561405326976, "grad_norm": 0.5446228981018066, "learning_rate": 2.9071733643613806e-05, "loss": 0.2541, "step": 3567 }, { "epoch": 0.3697792517359312, "grad_norm": 0.5477862358093262, "learning_rate": 2.9065749960096066e-05, "loss": 0.2338, "step": 3568 }, { "epoch": 0.3698828894185926, "grad_norm": 0.5047724843025208, "learning_rate": 2.9059765255063718e-05, "loss": 0.2153, "step": 3569 }, { "epoch": 0.369986527101254, "grad_norm": 0.571573793888092, "learning_rate": 2.90537795291911e-05, "loss": 0.2785, "step": 3570 }, { "epoch": 0.37009016478391543, "grad_norm": 0.4648495018482208, "learning_rate": 2.9047792783152685e-05, "loss": 0.2054, "step": 3571 }, { "epoch": 0.37019380246657685, "grad_norm": 0.5595817565917969, "learning_rate": 2.9041805017623043e-05, "loss": 0.2432, "step": 3572 }, { "epoch": 0.37029744014923827, "grad_norm": 0.4277222752571106, "learning_rate": 2.9035816233276866e-05, "loss": 0.1885, "step": 3573 }, { "epoch": 0.3704010778318997, "grad_norm": 0.41399163007736206, "learning_rate": 2.902982643078896e-05, "loss": 0.1749, "step": 3574 }, { "epoch": 0.3705047155145611, "grad_norm": 0.49881187081336975, "learning_rate": 2.9023835610834253e-05, "loss": 0.2281, "step": 3575 }, { "epoch": 0.3706083531972225, "grad_norm": 0.5957393646240234, "learning_rate": 2.9017843774087774e-05, "loss": 0.2337, "step": 3576 }, { "epoch": 0.37071199087988393, "grad_norm": 0.5654463171958923, "learning_rate": 2.901185092122468e-05, "loss": 0.2728, "step": 3577 }, { "epoch": 0.37081562856254535, "grad_norm": 0.4537357985973358, "learning_rate": 2.9005857052920232e-05, "loss": 0.2025, "step": 3578 }, { "epoch": 0.37091926624520677, "grad_norm": 0.5320644378662109, "learning_rate": 2.8999862169849807e-05, "loss": 0.2283, "step": 3579 }, { "epoch": 0.3710229039278682, "grad_norm": 0.5118852257728577, "learning_rate": 2.8993866272688905e-05, "loss": 0.2241, "step": 3580 }, { "epoch": 0.3711265416105296, "grad_norm": 0.48598912358283997, "learning_rate": 2.898786936211314e-05, "loss": 0.2328, "step": 3581 }, { "epoch": 0.371230179293191, "grad_norm": 0.44688498973846436, "learning_rate": 2.898187143879822e-05, "loss": 0.1865, "step": 3582 }, { "epoch": 0.37133381697585244, "grad_norm": 0.5546681880950928, "learning_rate": 2.897587250341999e-05, "loss": 0.2309, "step": 3583 }, { "epoch": 0.37143745465851385, "grad_norm": 0.48288416862487793, "learning_rate": 2.89698725566544e-05, "loss": 0.2094, "step": 3584 }, { "epoch": 0.37154109234117527, "grad_norm": 0.47836169600486755, "learning_rate": 2.8963871599177517e-05, "loss": 0.2109, "step": 3585 }, { "epoch": 0.3716447300238367, "grad_norm": 0.4424706995487213, "learning_rate": 2.8957869631665514e-05, "loss": 0.1797, "step": 3586 }, { "epoch": 0.3717483677064981, "grad_norm": 0.48061931133270264, "learning_rate": 2.8951866654794683e-05, "loss": 0.2098, "step": 3587 }, { "epoch": 0.3718520053891595, "grad_norm": 0.4864051043987274, "learning_rate": 2.8945862669241443e-05, "loss": 0.1919, "step": 3588 }, { "epoch": 0.37195564307182094, "grad_norm": 0.4833623170852661, "learning_rate": 2.893985767568229e-05, "loss": 0.2074, "step": 3589 }, { "epoch": 0.3720592807544823, "grad_norm": 0.49619123339653015, "learning_rate": 2.8933851674793883e-05, "loss": 0.1877, "step": 3590 }, { "epoch": 0.3721629184371437, "grad_norm": 0.4423529803752899, "learning_rate": 2.8927844667252953e-05, "loss": 0.1977, "step": 3591 }, { "epoch": 0.37226655611980514, "grad_norm": 0.38620245456695557, "learning_rate": 2.8921836653736366e-05, "loss": 0.1848, "step": 3592 }, { "epoch": 0.37237019380246655, "grad_norm": 0.4467632472515106, "learning_rate": 2.8915827634921094e-05, "loss": 0.1845, "step": 3593 }, { "epoch": 0.37247383148512797, "grad_norm": 0.4751870930194855, "learning_rate": 2.890981761148422e-05, "loss": 0.2199, "step": 3594 }, { "epoch": 0.3725774691677894, "grad_norm": 0.466667115688324, "learning_rate": 2.890380658410295e-05, "loss": 0.2033, "step": 3595 }, { "epoch": 0.3726811068504508, "grad_norm": 0.48820653557777405, "learning_rate": 2.8897794553454597e-05, "loss": 0.2083, "step": 3596 }, { "epoch": 0.3727847445331122, "grad_norm": 0.5090116262435913, "learning_rate": 2.889178152021659e-05, "loss": 0.2137, "step": 3597 }, { "epoch": 0.37288838221577364, "grad_norm": 0.5207107663154602, "learning_rate": 2.888576748506646e-05, "loss": 0.1894, "step": 3598 }, { "epoch": 0.37299201989843506, "grad_norm": 0.5283442139625549, "learning_rate": 2.8879752448681856e-05, "loss": 0.2255, "step": 3599 }, { "epoch": 0.3730956575810965, "grad_norm": 0.49328505992889404, "learning_rate": 2.8873736411740557e-05, "loss": 0.2099, "step": 3600 }, { "epoch": 0.3731992952637579, "grad_norm": 0.4782813489437103, "learning_rate": 2.886771937492043e-05, "loss": 0.1914, "step": 3601 }, { "epoch": 0.3733029329464193, "grad_norm": 0.45772305130958557, "learning_rate": 2.8861701338899472e-05, "loss": 0.1888, "step": 3602 }, { "epoch": 0.3734065706290807, "grad_norm": 0.5250973105430603, "learning_rate": 2.8855682304355787e-05, "loss": 0.187, "step": 3603 }, { "epoch": 0.37351020831174214, "grad_norm": 0.48250705003738403, "learning_rate": 2.884966227196758e-05, "loss": 0.1926, "step": 3604 }, { "epoch": 0.37361384599440356, "grad_norm": 0.5074548125267029, "learning_rate": 2.8843641242413184e-05, "loss": 0.2024, "step": 3605 }, { "epoch": 0.373717483677065, "grad_norm": 0.516728937625885, "learning_rate": 2.8837619216371045e-05, "loss": 0.2338, "step": 3606 }, { "epoch": 0.3738211213597264, "grad_norm": 0.4710516333580017, "learning_rate": 2.8831596194519713e-05, "loss": 0.2226, "step": 3607 }, { "epoch": 0.3739247590423878, "grad_norm": 0.5935936570167542, "learning_rate": 2.8825572177537846e-05, "loss": 0.2288, "step": 3608 }, { "epoch": 0.37402839672504923, "grad_norm": 0.46532249450683594, "learning_rate": 2.8819547166104228e-05, "loss": 0.2031, "step": 3609 }, { "epoch": 0.37413203440771065, "grad_norm": 0.49024295806884766, "learning_rate": 2.8813521160897742e-05, "loss": 0.2221, "step": 3610 }, { "epoch": 0.37423567209037206, "grad_norm": 0.46530261635780334, "learning_rate": 2.8807494162597395e-05, "loss": 0.198, "step": 3611 }, { "epoch": 0.3743393097730335, "grad_norm": 0.5083104372024536, "learning_rate": 2.8801466171882296e-05, "loss": 0.2171, "step": 3612 }, { "epoch": 0.3744429474556949, "grad_norm": 0.534168541431427, "learning_rate": 2.8795437189431675e-05, "loss": 0.2295, "step": 3613 }, { "epoch": 0.3745465851383563, "grad_norm": 0.5137151479721069, "learning_rate": 2.8789407215924855e-05, "loss": 0.2035, "step": 3614 }, { "epoch": 0.37465022282101773, "grad_norm": 0.5426977872848511, "learning_rate": 2.87833762520413e-05, "loss": 0.2551, "step": 3615 }, { "epoch": 0.37475386050367915, "grad_norm": 0.5401124358177185, "learning_rate": 2.877734429846057e-05, "loss": 0.214, "step": 3616 }, { "epoch": 0.37485749818634057, "grad_norm": 0.46718594431877136, "learning_rate": 2.8771311355862323e-05, "loss": 0.2072, "step": 3617 }, { "epoch": 0.374961135869002, "grad_norm": 0.5239135026931763, "learning_rate": 2.876527742492634e-05, "loss": 0.2267, "step": 3618 }, { "epoch": 0.3750647735516634, "grad_norm": 0.4801141321659088, "learning_rate": 2.8759242506332534e-05, "loss": 0.2098, "step": 3619 }, { "epoch": 0.3751684112343248, "grad_norm": 0.5632691979408264, "learning_rate": 2.875320660076089e-05, "loss": 0.2429, "step": 3620 }, { "epoch": 0.37527204891698623, "grad_norm": 0.48936334252357483, "learning_rate": 2.8747169708891537e-05, "loss": 0.2224, "step": 3621 }, { "epoch": 0.37537568659964765, "grad_norm": 0.47907862067222595, "learning_rate": 2.87411318314047e-05, "loss": 0.191, "step": 3622 }, { "epoch": 0.37547932428230907, "grad_norm": 0.5278570055961609, "learning_rate": 2.873509296898072e-05, "loss": 0.2606, "step": 3623 }, { "epoch": 0.3755829619649705, "grad_norm": 0.5191770792007446, "learning_rate": 2.8729053122300032e-05, "loss": 0.2244, "step": 3624 }, { "epoch": 0.3756865996476319, "grad_norm": 0.4299468994140625, "learning_rate": 2.8723012292043223e-05, "loss": 0.2104, "step": 3625 }, { "epoch": 0.3757902373302933, "grad_norm": 0.5098922848701477, "learning_rate": 2.871697047889094e-05, "loss": 0.2499, "step": 3626 }, { "epoch": 0.37589387501295474, "grad_norm": 0.44818735122680664, "learning_rate": 2.871092768352397e-05, "loss": 0.1935, "step": 3627 }, { "epoch": 0.3759975126956161, "grad_norm": 0.5166599154472351, "learning_rate": 2.8704883906623216e-05, "loss": 0.2431, "step": 3628 }, { "epoch": 0.3761011503782775, "grad_norm": 0.43848398327827454, "learning_rate": 2.8698839148869676e-05, "loss": 0.1817, "step": 3629 }, { "epoch": 0.37620478806093893, "grad_norm": 0.48116207122802734, "learning_rate": 2.869279341094446e-05, "loss": 0.2319, "step": 3630 }, { "epoch": 0.37630842574360035, "grad_norm": 0.4673698842525482, "learning_rate": 2.86867466935288e-05, "loss": 0.1873, "step": 3631 }, { "epoch": 0.37641206342626177, "grad_norm": 0.4763447344303131, "learning_rate": 2.8680698997304025e-05, "loss": 0.2004, "step": 3632 }, { "epoch": 0.3765157011089232, "grad_norm": 0.42831769585609436, "learning_rate": 2.867465032295158e-05, "loss": 0.1795, "step": 3633 }, { "epoch": 0.3766193387915846, "grad_norm": 0.5147861838340759, "learning_rate": 2.866860067115302e-05, "loss": 0.2013, "step": 3634 }, { "epoch": 0.376722976474246, "grad_norm": 0.5274387001991272, "learning_rate": 2.8662550042590015e-05, "loss": 0.2056, "step": 3635 }, { "epoch": 0.37682661415690744, "grad_norm": 0.5126632452011108, "learning_rate": 2.8656498437944335e-05, "loss": 0.2297, "step": 3636 }, { "epoch": 0.37693025183956885, "grad_norm": 0.49158743023872375, "learning_rate": 2.865044585789787e-05, "loss": 0.2318, "step": 3637 }, { "epoch": 0.37703388952223027, "grad_norm": 0.5416737794876099, "learning_rate": 2.8644392303132612e-05, "loss": 0.2301, "step": 3638 }, { "epoch": 0.3771375272048917, "grad_norm": 0.44698041677474976, "learning_rate": 2.8638337774330667e-05, "loss": 0.2062, "step": 3639 }, { "epoch": 0.3772411648875531, "grad_norm": 0.4664641320705414, "learning_rate": 2.863228227217425e-05, "loss": 0.2039, "step": 3640 }, { "epoch": 0.3773448025702145, "grad_norm": 0.4857037365436554, "learning_rate": 2.8626225797345692e-05, "loss": 0.2384, "step": 3641 }, { "epoch": 0.37744844025287594, "grad_norm": 0.5228758454322815, "learning_rate": 2.8620168350527414e-05, "loss": 0.2612, "step": 3642 }, { "epoch": 0.37755207793553736, "grad_norm": 0.4399373531341553, "learning_rate": 2.8614109932401968e-05, "loss": 0.1923, "step": 3643 }, { "epoch": 0.3776557156181988, "grad_norm": 0.5993676781654358, "learning_rate": 2.8608050543652005e-05, "loss": 0.2368, "step": 3644 }, { "epoch": 0.3777593533008602, "grad_norm": 0.5373175740242004, "learning_rate": 2.8601990184960287e-05, "loss": 0.245, "step": 3645 }, { "epoch": 0.3778629909835216, "grad_norm": 0.5086439251899719, "learning_rate": 2.859592885700969e-05, "loss": 0.2329, "step": 3646 }, { "epoch": 0.377966628666183, "grad_norm": 0.4674811363220215, "learning_rate": 2.8589866560483188e-05, "loss": 0.1995, "step": 3647 }, { "epoch": 0.37807026634884444, "grad_norm": 0.5111047625541687, "learning_rate": 2.858380329606388e-05, "loss": 0.2598, "step": 3648 }, { "epoch": 0.37817390403150586, "grad_norm": 0.4779401421546936, "learning_rate": 2.8577739064434954e-05, "loss": 0.2097, "step": 3649 }, { "epoch": 0.3782775417141673, "grad_norm": 0.47433412075042725, "learning_rate": 2.857167386627973e-05, "loss": 0.2137, "step": 3650 }, { "epoch": 0.3783811793968287, "grad_norm": 0.4639725685119629, "learning_rate": 2.8565607702281622e-05, "loss": 0.1845, "step": 3651 }, { "epoch": 0.3784848170794901, "grad_norm": 0.5112288594245911, "learning_rate": 2.8559540573124152e-05, "loss": 0.2398, "step": 3652 }, { "epoch": 0.3785884547621515, "grad_norm": 0.5371864438056946, "learning_rate": 2.8553472479490952e-05, "loss": 0.2088, "step": 3653 }, { "epoch": 0.37869209244481294, "grad_norm": 0.5942397117614746, "learning_rate": 2.8547403422065776e-05, "loss": 0.2508, "step": 3654 }, { "epoch": 0.37879573012747436, "grad_norm": 0.5302644371986389, "learning_rate": 2.8541333401532463e-05, "loss": 0.2158, "step": 3655 }, { "epoch": 0.3788993678101358, "grad_norm": 0.489822655916214, "learning_rate": 2.8535262418574982e-05, "loss": 0.237, "step": 3656 }, { "epoch": 0.3790030054927972, "grad_norm": 0.47960638999938965, "learning_rate": 2.85291904738774e-05, "loss": 0.224, "step": 3657 }, { "epoch": 0.3791066431754586, "grad_norm": 0.41798311471939087, "learning_rate": 2.852311756812389e-05, "loss": 0.1702, "step": 3658 }, { "epoch": 0.37921028085812003, "grad_norm": 0.5734483599662781, "learning_rate": 2.851704370199875e-05, "loss": 0.2361, "step": 3659 }, { "epoch": 0.37931391854078145, "grad_norm": 0.48896273970603943, "learning_rate": 2.8510968876186365e-05, "loss": 0.2099, "step": 3660 }, { "epoch": 0.37941755622344286, "grad_norm": 0.5119735598564148, "learning_rate": 2.8504893091371236e-05, "loss": 0.2296, "step": 3661 }, { "epoch": 0.3795211939061043, "grad_norm": 0.44356822967529297, "learning_rate": 2.849881634823797e-05, "loss": 0.1887, "step": 3662 }, { "epoch": 0.3796248315887657, "grad_norm": 0.47616955637931824, "learning_rate": 2.84927386474713e-05, "loss": 0.2442, "step": 3663 }, { "epoch": 0.3797284692714271, "grad_norm": 0.5216185450553894, "learning_rate": 2.8486659989756034e-05, "loss": 0.2451, "step": 3664 }, { "epoch": 0.37983210695408853, "grad_norm": 0.5075971484184265, "learning_rate": 2.848058037577711e-05, "loss": 0.2533, "step": 3665 }, { "epoch": 0.3799357446367499, "grad_norm": 0.5356259346008301, "learning_rate": 2.8474499806219577e-05, "loss": 0.2233, "step": 3666 }, { "epoch": 0.3800393823194113, "grad_norm": 0.36403751373291016, "learning_rate": 2.8468418281768586e-05, "loss": 0.1487, "step": 3667 }, { "epoch": 0.38014302000207273, "grad_norm": 0.5536927580833435, "learning_rate": 2.8462335803109372e-05, "loss": 0.2205, "step": 3668 }, { "epoch": 0.38024665768473415, "grad_norm": 0.5602768063545227, "learning_rate": 2.8456252370927324e-05, "loss": 0.2484, "step": 3669 }, { "epoch": 0.38035029536739556, "grad_norm": 0.44447940587997437, "learning_rate": 2.845016798590791e-05, "loss": 0.2046, "step": 3670 }, { "epoch": 0.380453933050057, "grad_norm": 0.4588824212551117, "learning_rate": 2.8444082648736695e-05, "loss": 0.1836, "step": 3671 }, { "epoch": 0.3805575707327184, "grad_norm": 0.5337849259376526, "learning_rate": 2.843799636009937e-05, "loss": 0.2089, "step": 3672 }, { "epoch": 0.3806612084153798, "grad_norm": 0.5306573510169983, "learning_rate": 2.843190912068174e-05, "loss": 0.2089, "step": 3673 }, { "epoch": 0.38076484609804123, "grad_norm": 0.42665427923202515, "learning_rate": 2.8425820931169695e-05, "loss": 0.1583, "step": 3674 }, { "epoch": 0.38086848378070265, "grad_norm": 0.47262972593307495, "learning_rate": 2.8419731792249248e-05, "loss": 0.2146, "step": 3675 }, { "epoch": 0.38097212146336407, "grad_norm": 0.5057385563850403, "learning_rate": 2.8413641704606516e-05, "loss": 0.2437, "step": 3676 }, { "epoch": 0.3810757591460255, "grad_norm": 0.5412306785583496, "learning_rate": 2.8407550668927708e-05, "loss": 0.2116, "step": 3677 }, { "epoch": 0.3811793968286869, "grad_norm": 0.4526110887527466, "learning_rate": 2.8401458685899153e-05, "loss": 0.1855, "step": 3678 }, { "epoch": 0.3812830345113483, "grad_norm": 0.4231882393360138, "learning_rate": 2.8395365756207312e-05, "loss": 0.173, "step": 3679 }, { "epoch": 0.38138667219400973, "grad_norm": 0.4655570685863495, "learning_rate": 2.8389271880538694e-05, "loss": 0.1903, "step": 3680 }, { "epoch": 0.38149030987667115, "grad_norm": 0.5213018655776978, "learning_rate": 2.8383177059579972e-05, "loss": 0.2384, "step": 3681 }, { "epoch": 0.38159394755933257, "grad_norm": 0.5192651748657227, "learning_rate": 2.8377081294017883e-05, "loss": 0.2217, "step": 3682 }, { "epoch": 0.381697585241994, "grad_norm": 0.5380532145500183, "learning_rate": 2.83709845845393e-05, "loss": 0.2111, "step": 3683 }, { "epoch": 0.3818012229246554, "grad_norm": 0.5495359301567078, "learning_rate": 2.8364886931831183e-05, "loss": 0.2514, "step": 3684 }, { "epoch": 0.3819048606073168, "grad_norm": 0.5004047155380249, "learning_rate": 2.8358788336580618e-05, "loss": 0.2099, "step": 3685 }, { "epoch": 0.38200849828997824, "grad_norm": 0.4904569089412689, "learning_rate": 2.8352688799474776e-05, "loss": 0.1872, "step": 3686 }, { "epoch": 0.38211213597263965, "grad_norm": 0.44963860511779785, "learning_rate": 2.834658832120094e-05, "loss": 0.1746, "step": 3687 }, { "epoch": 0.38221577365530107, "grad_norm": 0.5347903966903687, "learning_rate": 2.8340486902446506e-05, "loss": 0.2204, "step": 3688 }, { "epoch": 0.3823194113379625, "grad_norm": 0.5351092219352722, "learning_rate": 2.8334384543898975e-05, "loss": 0.1985, "step": 3689 }, { "epoch": 0.3824230490206239, "grad_norm": 0.4588063359260559, "learning_rate": 2.832828124624595e-05, "loss": 0.1982, "step": 3690 }, { "epoch": 0.3825266867032853, "grad_norm": 0.501385509967804, "learning_rate": 2.832217701017514e-05, "loss": 0.1994, "step": 3691 }, { "epoch": 0.38263032438594674, "grad_norm": 0.6098371744155884, "learning_rate": 2.8316071836374365e-05, "loss": 0.2754, "step": 3692 }, { "epoch": 0.38273396206860816, "grad_norm": 0.5000957250595093, "learning_rate": 2.8309965725531535e-05, "loss": 0.2122, "step": 3693 }, { "epoch": 0.3828375997512696, "grad_norm": 0.5229092240333557, "learning_rate": 2.830385867833469e-05, "loss": 0.2474, "step": 3694 }, { "epoch": 0.382941237433931, "grad_norm": 0.5045500993728638, "learning_rate": 2.8297750695471965e-05, "loss": 0.2237, "step": 3695 }, { "epoch": 0.3830448751165924, "grad_norm": 0.46148043870925903, "learning_rate": 2.829164177763158e-05, "loss": 0.2063, "step": 3696 }, { "epoch": 0.3831485127992538, "grad_norm": 0.5285515785217285, "learning_rate": 2.8285531925501897e-05, "loss": 0.2431, "step": 3697 }, { "epoch": 0.38325215048191524, "grad_norm": 0.5902262926101685, "learning_rate": 2.827942113977135e-05, "loss": 0.2401, "step": 3698 }, { "epoch": 0.38335578816457666, "grad_norm": 0.489153653383255, "learning_rate": 2.8273309421128502e-05, "loss": 0.1986, "step": 3699 }, { "epoch": 0.3834594258472381, "grad_norm": 0.5225285887718201, "learning_rate": 2.826719677026201e-05, "loss": 0.2199, "step": 3700 }, { "epoch": 0.3835630635298995, "grad_norm": 0.4897618889808655, "learning_rate": 2.8261083187860635e-05, "loss": 0.1975, "step": 3701 }, { "epoch": 0.3836667012125609, "grad_norm": 0.5175366401672363, "learning_rate": 2.8254968674613254e-05, "loss": 0.2548, "step": 3702 }, { "epoch": 0.38377033889522233, "grad_norm": 0.4286321997642517, "learning_rate": 2.8248853231208832e-05, "loss": 0.1868, "step": 3703 }, { "epoch": 0.3838739765778837, "grad_norm": 0.49945926666259766, "learning_rate": 2.8242736858336455e-05, "loss": 0.2267, "step": 3704 }, { "epoch": 0.3839776142605451, "grad_norm": 0.5295497179031372, "learning_rate": 2.8236619556685298e-05, "loss": 0.2116, "step": 3705 }, { "epoch": 0.3840812519432065, "grad_norm": 0.4920368194580078, "learning_rate": 2.823050132694465e-05, "loss": 0.1964, "step": 3706 }, { "epoch": 0.38418488962586794, "grad_norm": 0.49020951986312866, "learning_rate": 2.8224382169803913e-05, "loss": 0.2093, "step": 3707 }, { "epoch": 0.38428852730852936, "grad_norm": 0.4164317846298218, "learning_rate": 2.8218262085952573e-05, "loss": 0.1682, "step": 3708 }, { "epoch": 0.3843921649911908, "grad_norm": 0.4984332323074341, "learning_rate": 2.8212141076080244e-05, "loss": 0.2156, "step": 3709 }, { "epoch": 0.3844958026738522, "grad_norm": 0.6256229281425476, "learning_rate": 2.820601914087662e-05, "loss": 0.2604, "step": 3710 }, { "epoch": 0.3845994403565136, "grad_norm": 0.478753924369812, "learning_rate": 2.8199896281031522e-05, "loss": 0.1884, "step": 3711 }, { "epoch": 0.38470307803917503, "grad_norm": 0.5280382633209229, "learning_rate": 2.819377249723485e-05, "loss": 0.2263, "step": 3712 }, { "epoch": 0.38480671572183645, "grad_norm": 0.42487865686416626, "learning_rate": 2.818764779017663e-05, "loss": 0.1919, "step": 3713 }, { "epoch": 0.38491035340449786, "grad_norm": 0.5221870541572571, "learning_rate": 2.818152216054699e-05, "loss": 0.203, "step": 3714 }, { "epoch": 0.3850139910871593, "grad_norm": 0.45968756079673767, "learning_rate": 2.8175395609036148e-05, "loss": 0.206, "step": 3715 }, { "epoch": 0.3851176287698207, "grad_norm": 0.4523147940635681, "learning_rate": 2.816926813633444e-05, "loss": 0.1907, "step": 3716 }, { "epoch": 0.3852212664524821, "grad_norm": 0.46949905157089233, "learning_rate": 2.81631397431323e-05, "loss": 0.2099, "step": 3717 }, { "epoch": 0.38532490413514353, "grad_norm": 0.4426126480102539, "learning_rate": 2.8157010430120257e-05, "loss": 0.177, "step": 3718 }, { "epoch": 0.38542854181780495, "grad_norm": 0.553290069103241, "learning_rate": 2.8150880197988958e-05, "loss": 0.2324, "step": 3719 }, { "epoch": 0.38553217950046637, "grad_norm": 0.4237673282623291, "learning_rate": 2.8144749047429155e-05, "loss": 0.1808, "step": 3720 }, { "epoch": 0.3856358171831278, "grad_norm": 0.4610205590724945, "learning_rate": 2.813861697913169e-05, "loss": 0.2064, "step": 3721 }, { "epoch": 0.3857394548657892, "grad_norm": 0.513278603553772, "learning_rate": 2.8132483993787513e-05, "loss": 0.2378, "step": 3722 }, { "epoch": 0.3858430925484506, "grad_norm": 0.5233225226402283, "learning_rate": 2.8126350092087683e-05, "loss": 0.2396, "step": 3723 }, { "epoch": 0.38594673023111203, "grad_norm": 0.4505366086959839, "learning_rate": 2.812021527472336e-05, "loss": 0.1931, "step": 3724 }, { "epoch": 0.38605036791377345, "grad_norm": 0.5511522889137268, "learning_rate": 2.81140795423858e-05, "loss": 0.2346, "step": 3725 }, { "epoch": 0.38615400559643487, "grad_norm": 0.4193154275417328, "learning_rate": 2.8107942895766372e-05, "loss": 0.2101, "step": 3726 }, { "epoch": 0.3862576432790963, "grad_norm": 0.42316773533821106, "learning_rate": 2.8101805335556543e-05, "loss": 0.1671, "step": 3727 }, { "epoch": 0.3863612809617577, "grad_norm": 0.48060035705566406, "learning_rate": 2.8095666862447876e-05, "loss": 0.209, "step": 3728 }, { "epoch": 0.3864649186444191, "grad_norm": 0.44975948333740234, "learning_rate": 2.808952747713206e-05, "loss": 0.2219, "step": 3729 }, { "epoch": 0.38656855632708054, "grad_norm": 0.4692697525024414, "learning_rate": 2.8083387180300864e-05, "loss": 0.2145, "step": 3730 }, { "epoch": 0.38667219400974195, "grad_norm": 0.5550007224082947, "learning_rate": 2.807724597264616e-05, "loss": 0.2177, "step": 3731 }, { "epoch": 0.38677583169240337, "grad_norm": 0.46553054451942444, "learning_rate": 2.8071103854859943e-05, "loss": 0.1989, "step": 3732 }, { "epoch": 0.3868794693750648, "grad_norm": 0.5102365016937256, "learning_rate": 2.8064960827634284e-05, "loss": 0.2313, "step": 3733 }, { "epoch": 0.3869831070577262, "grad_norm": 0.48375582695007324, "learning_rate": 2.805881689166138e-05, "loss": 0.2148, "step": 3734 }, { "epoch": 0.3870867447403876, "grad_norm": 0.50575190782547, "learning_rate": 2.8052672047633514e-05, "loss": 0.2292, "step": 3735 }, { "epoch": 0.38719038242304904, "grad_norm": 0.5282059907913208, "learning_rate": 2.804652629624309e-05, "loss": 0.2265, "step": 3736 }, { "epoch": 0.38729402010571046, "grad_norm": 0.5234432220458984, "learning_rate": 2.804037963818258e-05, "loss": 0.205, "step": 3737 }, { "epoch": 0.3873976577883719, "grad_norm": 0.5600778460502625, "learning_rate": 2.8034232074144586e-05, "loss": 0.2626, "step": 3738 }, { "epoch": 0.3875012954710333, "grad_norm": 0.41027340292930603, "learning_rate": 2.8028083604821827e-05, "loss": 0.1776, "step": 3739 }, { "epoch": 0.3876049331536947, "grad_norm": 0.5438401699066162, "learning_rate": 2.802193423090708e-05, "loss": 0.2424, "step": 3740 }, { "epoch": 0.3877085708363561, "grad_norm": 0.459451824426651, "learning_rate": 2.8015783953093253e-05, "loss": 0.1835, "step": 3741 }, { "epoch": 0.3878122085190175, "grad_norm": 0.46863898634910583, "learning_rate": 2.8009632772073348e-05, "loss": 0.2197, "step": 3742 }, { "epoch": 0.3879158462016789, "grad_norm": 0.4693146347999573, "learning_rate": 2.800348068854048e-05, "loss": 0.2177, "step": 3743 }, { "epoch": 0.3880194838843403, "grad_norm": 0.6115514636039734, "learning_rate": 2.7997327703187848e-05, "loss": 0.272, "step": 3744 }, { "epoch": 0.38812312156700174, "grad_norm": 0.5041795969009399, "learning_rate": 2.7991173816708765e-05, "loss": 0.2129, "step": 3745 }, { "epoch": 0.38822675924966316, "grad_norm": 0.5383294820785522, "learning_rate": 2.7985019029796636e-05, "loss": 0.2416, "step": 3746 }, { "epoch": 0.3883303969323246, "grad_norm": 0.6601837873458862, "learning_rate": 2.7978863343144973e-05, "loss": 0.2338, "step": 3747 }, { "epoch": 0.388434034614986, "grad_norm": 0.5024908781051636, "learning_rate": 2.7972706757447392e-05, "loss": 0.2098, "step": 3748 }, { "epoch": 0.3885376722976474, "grad_norm": 0.43663129210472107, "learning_rate": 2.796654927339761e-05, "loss": 0.1831, "step": 3749 }, { "epoch": 0.3886413099803088, "grad_norm": 0.4988679885864258, "learning_rate": 2.796039089168944e-05, "loss": 0.1949, "step": 3750 }, { "epoch": 0.38874494766297024, "grad_norm": 0.4759289026260376, "learning_rate": 2.79542316130168e-05, "loss": 0.2208, "step": 3751 }, { "epoch": 0.38884858534563166, "grad_norm": 0.558282196521759, "learning_rate": 2.7948071438073702e-05, "loss": 0.2515, "step": 3752 }, { "epoch": 0.3889522230282931, "grad_norm": 0.3603500425815582, "learning_rate": 2.7941910367554276e-05, "loss": 0.1413, "step": 3753 }, { "epoch": 0.3890558607109545, "grad_norm": 0.5307490229606628, "learning_rate": 2.7935748402152733e-05, "loss": 0.1958, "step": 3754 }, { "epoch": 0.3891594983936159, "grad_norm": 0.49905791878700256, "learning_rate": 2.7929585542563404e-05, "loss": 0.1986, "step": 3755 }, { "epoch": 0.3892631360762773, "grad_norm": 0.5282691717147827, "learning_rate": 2.7923421789480692e-05, "loss": 0.1965, "step": 3756 }, { "epoch": 0.38936677375893874, "grad_norm": 0.48384660482406616, "learning_rate": 2.791725714359913e-05, "loss": 0.1925, "step": 3757 }, { "epoch": 0.38947041144160016, "grad_norm": 0.49548470973968506, "learning_rate": 2.7911091605613348e-05, "loss": 0.2268, "step": 3758 }, { "epoch": 0.3895740491242616, "grad_norm": 0.48279035091400146, "learning_rate": 2.7904925176218055e-05, "loss": 0.2361, "step": 3759 }, { "epoch": 0.389677686806923, "grad_norm": 0.4718702733516693, "learning_rate": 2.7898757856108086e-05, "loss": 0.1889, "step": 3760 }, { "epoch": 0.3897813244895844, "grad_norm": 0.5636996030807495, "learning_rate": 2.789258964597836e-05, "loss": 0.2656, "step": 3761 }, { "epoch": 0.38988496217224583, "grad_norm": 0.41767802834510803, "learning_rate": 2.78864205465239e-05, "loss": 0.1772, "step": 3762 }, { "epoch": 0.38998859985490725, "grad_norm": 0.4735535681247711, "learning_rate": 2.788025055843983e-05, "loss": 0.2161, "step": 3763 }, { "epoch": 0.39009223753756866, "grad_norm": 0.5146605968475342, "learning_rate": 2.787407968242138e-05, "loss": 0.214, "step": 3764 }, { "epoch": 0.3901958752202301, "grad_norm": 0.4984552264213562, "learning_rate": 2.7867907919163878e-05, "loss": 0.2188, "step": 3765 }, { "epoch": 0.3902995129028915, "grad_norm": 0.5831129550933838, "learning_rate": 2.786173526936274e-05, "loss": 0.2528, "step": 3766 }, { "epoch": 0.3904031505855529, "grad_norm": 0.4989360272884369, "learning_rate": 2.7855561733713486e-05, "loss": 0.2426, "step": 3767 }, { "epoch": 0.39050678826821433, "grad_norm": 0.40938708186149597, "learning_rate": 2.7849387312911754e-05, "loss": 0.1791, "step": 3768 }, { "epoch": 0.39061042595087575, "grad_norm": 0.5959457159042358, "learning_rate": 2.784321200765326e-05, "loss": 0.2583, "step": 3769 }, { "epoch": 0.39071406363353717, "grad_norm": 0.4509630501270294, "learning_rate": 2.7837035818633827e-05, "loss": 0.2004, "step": 3770 }, { "epoch": 0.3908177013161986, "grad_norm": 0.47014522552490234, "learning_rate": 2.7830858746549388e-05, "loss": 0.2221, "step": 3771 }, { "epoch": 0.39092133899886, "grad_norm": 0.5484637022018433, "learning_rate": 2.7824680792095945e-05, "loss": 0.2175, "step": 3772 }, { "epoch": 0.3910249766815214, "grad_norm": 0.48253583908081055, "learning_rate": 2.7818501955969642e-05, "loss": 0.2088, "step": 3773 }, { "epoch": 0.39112861436418284, "grad_norm": 0.4862441122531891, "learning_rate": 2.78123222388667e-05, "loss": 0.2095, "step": 3774 }, { "epoch": 0.39123225204684425, "grad_norm": 0.45672932267189026, "learning_rate": 2.7806141641483425e-05, "loss": 0.1935, "step": 3775 }, { "epoch": 0.39133588972950567, "grad_norm": 0.5105577707290649, "learning_rate": 2.7799960164516243e-05, "loss": 0.2201, "step": 3776 }, { "epoch": 0.3914395274121671, "grad_norm": 0.47937294840812683, "learning_rate": 2.7793777808661676e-05, "loss": 0.1911, "step": 3777 }, { "epoch": 0.3915431650948285, "grad_norm": 0.566051185131073, "learning_rate": 2.7787594574616345e-05, "loss": 0.2338, "step": 3778 }, { "epoch": 0.3916468027774899, "grad_norm": 0.5488920211791992, "learning_rate": 2.7781410463076963e-05, "loss": 0.2653, "step": 3779 }, { "epoch": 0.3917504404601513, "grad_norm": 0.48996976017951965, "learning_rate": 2.7775225474740347e-05, "loss": 0.2128, "step": 3780 }, { "epoch": 0.3918540781428127, "grad_norm": 0.5063322186470032, "learning_rate": 2.7769039610303408e-05, "loss": 0.2321, "step": 3781 }, { "epoch": 0.3919577158254741, "grad_norm": 0.5853592157363892, "learning_rate": 2.776285287046316e-05, "loss": 0.2537, "step": 3782 }, { "epoch": 0.39206135350813553, "grad_norm": 0.5853617191314697, "learning_rate": 2.775666525591673e-05, "loss": 0.257, "step": 3783 }, { "epoch": 0.39216499119079695, "grad_norm": 0.5316409468650818, "learning_rate": 2.77504767673613e-05, "loss": 0.1986, "step": 3784 }, { "epoch": 0.39226862887345837, "grad_norm": 0.4623914062976837, "learning_rate": 2.774428740549421e-05, "loss": 0.2091, "step": 3785 }, { "epoch": 0.3923722665561198, "grad_norm": 0.5352954268455505, "learning_rate": 2.7738097171012848e-05, "loss": 0.2586, "step": 3786 }, { "epoch": 0.3924759042387812, "grad_norm": 0.4515346884727478, "learning_rate": 2.773190606461473e-05, "loss": 0.1974, "step": 3787 }, { "epoch": 0.3925795419214426, "grad_norm": 0.519649088382721, "learning_rate": 2.772571408699745e-05, "loss": 0.2108, "step": 3788 }, { "epoch": 0.39268317960410404, "grad_norm": 0.4203835427761078, "learning_rate": 2.771952123885872e-05, "loss": 0.1778, "step": 3789 }, { "epoch": 0.39278681728676546, "grad_norm": 0.47701048851013184, "learning_rate": 2.771332752089634e-05, "loss": 0.2384, "step": 3790 }, { "epoch": 0.39289045496942687, "grad_norm": 0.4731229543685913, "learning_rate": 2.7707132933808202e-05, "loss": 0.1945, "step": 3791 }, { "epoch": 0.3929940926520883, "grad_norm": 0.5297321677207947, "learning_rate": 2.77009374782923e-05, "loss": 0.226, "step": 3792 }, { "epoch": 0.3930977303347497, "grad_norm": 0.45654892921447754, "learning_rate": 2.769474115504674e-05, "loss": 0.2059, "step": 3793 }, { "epoch": 0.3932013680174111, "grad_norm": 0.45342886447906494, "learning_rate": 2.7688543964769716e-05, "loss": 0.2228, "step": 3794 }, { "epoch": 0.39330500570007254, "grad_norm": 0.49522092938423157, "learning_rate": 2.7682345908159497e-05, "loss": 0.231, "step": 3795 }, { "epoch": 0.39340864338273396, "grad_norm": 0.5499382019042969, "learning_rate": 2.76761469859145e-05, "loss": 0.2165, "step": 3796 }, { "epoch": 0.3935122810653954, "grad_norm": 0.48214516043663025, "learning_rate": 2.7669947198733177e-05, "loss": 0.1901, "step": 3797 }, { "epoch": 0.3936159187480568, "grad_norm": 0.48165568709373474, "learning_rate": 2.7663746547314134e-05, "loss": 0.2256, "step": 3798 }, { "epoch": 0.3937195564307182, "grad_norm": 0.5463171601295471, "learning_rate": 2.7657545032356042e-05, "loss": 0.2254, "step": 3799 }, { "epoch": 0.3938231941133796, "grad_norm": 0.5542256832122803, "learning_rate": 2.765134265455768e-05, "loss": 0.2026, "step": 3800 }, { "epoch": 0.39392683179604104, "grad_norm": 0.4160597026348114, "learning_rate": 2.7645139414617922e-05, "loss": 0.1652, "step": 3801 }, { "epoch": 0.39403046947870246, "grad_norm": 0.5020577907562256, "learning_rate": 2.7638935313235738e-05, "loss": 0.2038, "step": 3802 }, { "epoch": 0.3941341071613639, "grad_norm": 0.46063679456710815, "learning_rate": 2.76327303511102e-05, "loss": 0.2035, "step": 3803 }, { "epoch": 0.3942377448440253, "grad_norm": 0.6740379333496094, "learning_rate": 2.762652452894047e-05, "loss": 0.2884, "step": 3804 }, { "epoch": 0.3943413825266867, "grad_norm": 0.487909734249115, "learning_rate": 2.7620317847425808e-05, "loss": 0.2064, "step": 3805 }, { "epoch": 0.39444502020934813, "grad_norm": 0.49684950709342957, "learning_rate": 2.7614110307265587e-05, "loss": 0.2161, "step": 3806 }, { "epoch": 0.39454865789200955, "grad_norm": 0.5438411831855774, "learning_rate": 2.7607901909159243e-05, "loss": 0.2406, "step": 3807 }, { "epoch": 0.39465229557467096, "grad_norm": 0.49316468834877014, "learning_rate": 2.7601692653806343e-05, "loss": 0.2279, "step": 3808 }, { "epoch": 0.3947559332573324, "grad_norm": 0.49097537994384766, "learning_rate": 2.7595482541906534e-05, "loss": 0.2197, "step": 3809 }, { "epoch": 0.3948595709399938, "grad_norm": 0.5558735728263855, "learning_rate": 2.758927157415956e-05, "loss": 0.2393, "step": 3810 }, { "epoch": 0.3949632086226552, "grad_norm": 0.45270583033561707, "learning_rate": 2.7583059751265256e-05, "loss": 0.2114, "step": 3811 }, { "epoch": 0.39506684630531663, "grad_norm": 0.4534200429916382, "learning_rate": 2.7576847073923572e-05, "loss": 0.1899, "step": 3812 }, { "epoch": 0.39517048398797805, "grad_norm": 0.5005395412445068, "learning_rate": 2.757063354283454e-05, "loss": 0.2217, "step": 3813 }, { "epoch": 0.39527412167063947, "grad_norm": 0.47601640224456787, "learning_rate": 2.7564419158698282e-05, "loss": 0.1935, "step": 3814 }, { "epoch": 0.3953777593533009, "grad_norm": 0.4823366701602936, "learning_rate": 2.7558203922215044e-05, "loss": 0.2128, "step": 3815 }, { "epoch": 0.3954813970359623, "grad_norm": 0.4116901457309723, "learning_rate": 2.7551987834085125e-05, "loss": 0.1867, "step": 3816 }, { "epoch": 0.3955850347186237, "grad_norm": 0.45607835054397583, "learning_rate": 2.7545770895008962e-05, "loss": 0.1936, "step": 3817 }, { "epoch": 0.3956886724012851, "grad_norm": 0.46392399072647095, "learning_rate": 2.7539553105687063e-05, "loss": 0.2034, "step": 3818 }, { "epoch": 0.3957923100839465, "grad_norm": 0.5502959489822388, "learning_rate": 2.7533334466820046e-05, "loss": 0.2184, "step": 3819 }, { "epoch": 0.3958959477666079, "grad_norm": 0.49488842487335205, "learning_rate": 2.752711497910861e-05, "loss": 0.2121, "step": 3820 }, { "epoch": 0.39599958544926933, "grad_norm": 0.49059224128723145, "learning_rate": 2.7520894643253554e-05, "loss": 0.2081, "step": 3821 }, { "epoch": 0.39610322313193075, "grad_norm": 0.5765684843063354, "learning_rate": 2.7514673459955786e-05, "loss": 0.2582, "step": 3822 }, { "epoch": 0.39620686081459217, "grad_norm": 0.44499170780181885, "learning_rate": 2.750845142991629e-05, "loss": 0.1702, "step": 3823 }, { "epoch": 0.3963104984972536, "grad_norm": 0.49501338601112366, "learning_rate": 2.750222855383616e-05, "loss": 0.1933, "step": 3824 }, { "epoch": 0.396414136179915, "grad_norm": 0.5264593362808228, "learning_rate": 2.7496004832416584e-05, "loss": 0.2337, "step": 3825 }, { "epoch": 0.3965177738625764, "grad_norm": 0.5584926009178162, "learning_rate": 2.7489780266358835e-05, "loss": 0.2621, "step": 3826 }, { "epoch": 0.39662141154523783, "grad_norm": 0.47269389033317566, "learning_rate": 2.7483554856364282e-05, "loss": 0.2089, "step": 3827 }, { "epoch": 0.39672504922789925, "grad_norm": 0.5708098411560059, "learning_rate": 2.7477328603134413e-05, "loss": 0.235, "step": 3828 }, { "epoch": 0.39682868691056067, "grad_norm": 0.5021374821662903, "learning_rate": 2.7471101507370768e-05, "loss": 0.2116, "step": 3829 }, { "epoch": 0.3969323245932221, "grad_norm": 0.45472949743270874, "learning_rate": 2.746487356977503e-05, "loss": 0.1767, "step": 3830 }, { "epoch": 0.3970359622758835, "grad_norm": 0.4058346748352051, "learning_rate": 2.7458644791048937e-05, "loss": 0.172, "step": 3831 }, { "epoch": 0.3971395999585449, "grad_norm": 0.5224340558052063, "learning_rate": 2.745241517189434e-05, "loss": 0.2334, "step": 3832 }, { "epoch": 0.39724323764120634, "grad_norm": 0.550030529499054, "learning_rate": 2.744618471301319e-05, "loss": 0.2282, "step": 3833 }, { "epoch": 0.39734687532386775, "grad_norm": 0.4936904013156891, "learning_rate": 2.7439953415107527e-05, "loss": 0.2152, "step": 3834 }, { "epoch": 0.39745051300652917, "grad_norm": 0.5040317177772522, "learning_rate": 2.7433721278879474e-05, "loss": 0.2241, "step": 3835 }, { "epoch": 0.3975541506891906, "grad_norm": 0.47307634353637695, "learning_rate": 2.7427488305031264e-05, "loss": 0.219, "step": 3836 }, { "epoch": 0.397657788371852, "grad_norm": 0.46557918190956116, "learning_rate": 2.7421254494265218e-05, "loss": 0.2127, "step": 3837 }, { "epoch": 0.3977614260545134, "grad_norm": 0.5010596513748169, "learning_rate": 2.741501984728375e-05, "loss": 0.2423, "step": 3838 }, { "epoch": 0.39786506373717484, "grad_norm": 0.6242858171463013, "learning_rate": 2.7408784364789373e-05, "loss": 0.2613, "step": 3839 }, { "epoch": 0.39796870141983626, "grad_norm": 0.5589104294776917, "learning_rate": 2.7402548047484693e-05, "loss": 0.2325, "step": 3840 }, { "epoch": 0.3980723391024977, "grad_norm": 0.5010468363761902, "learning_rate": 2.7396310896072412e-05, "loss": 0.2008, "step": 3841 }, { "epoch": 0.3981759767851591, "grad_norm": 0.4926588833332062, "learning_rate": 2.73900729112553e-05, "loss": 0.1933, "step": 3842 }, { "epoch": 0.3982796144678205, "grad_norm": 0.5381539463996887, "learning_rate": 2.7383834093736278e-05, "loss": 0.2323, "step": 3843 }, { "epoch": 0.3983832521504819, "grad_norm": 0.5139055252075195, "learning_rate": 2.7377594444218298e-05, "loss": 0.202, "step": 3844 }, { "epoch": 0.39848688983314334, "grad_norm": 0.5416883826255798, "learning_rate": 2.737135396340445e-05, "loss": 0.2483, "step": 3845 }, { "epoch": 0.39859052751580476, "grad_norm": 0.5522505640983582, "learning_rate": 2.7365112651997895e-05, "loss": 0.2305, "step": 3846 }, { "epoch": 0.3986941651984662, "grad_norm": 0.5479319095611572, "learning_rate": 2.7358870510701895e-05, "loss": 0.2176, "step": 3847 }, { "epoch": 0.3987978028811276, "grad_norm": 0.4639412462711334, "learning_rate": 2.7352627540219806e-05, "loss": 0.184, "step": 3848 }, { "epoch": 0.398901440563789, "grad_norm": 0.45885124802589417, "learning_rate": 2.7346383741255076e-05, "loss": 0.1719, "step": 3849 }, { "epoch": 0.39900507824645043, "grad_norm": 0.5005857348442078, "learning_rate": 2.734013911451125e-05, "loss": 0.2172, "step": 3850 }, { "epoch": 0.39910871592911185, "grad_norm": 0.5310930609703064, "learning_rate": 2.733389366069195e-05, "loss": 0.2308, "step": 3851 }, { "epoch": 0.39921235361177326, "grad_norm": 0.5748088955879211, "learning_rate": 2.732764738050092e-05, "loss": 0.2393, "step": 3852 }, { "epoch": 0.3993159912944347, "grad_norm": 0.5303531885147095, "learning_rate": 2.7321400274641972e-05, "loss": 0.23, "step": 3853 }, { "epoch": 0.3994196289770961, "grad_norm": 0.5156660079956055, "learning_rate": 2.7315152343819026e-05, "loss": 0.2149, "step": 3854 }, { "epoch": 0.3995232666597575, "grad_norm": 0.5107412338256836, "learning_rate": 2.730890358873608e-05, "loss": 0.2374, "step": 3855 }, { "epoch": 0.3996269043424189, "grad_norm": 0.5499140620231628, "learning_rate": 2.730265401009724e-05, "loss": 0.2443, "step": 3856 }, { "epoch": 0.3997305420250803, "grad_norm": 0.5674372315406799, "learning_rate": 2.7296403608606698e-05, "loss": 0.2273, "step": 3857 }, { "epoch": 0.3998341797077417, "grad_norm": 0.5294408798217773, "learning_rate": 2.7290152384968743e-05, "loss": 0.2283, "step": 3858 }, { "epoch": 0.3999378173904031, "grad_norm": 0.4700827896595001, "learning_rate": 2.7283900339887753e-05, "loss": 0.1761, "step": 3859 }, { "epoch": 0.40004145507306454, "grad_norm": 0.489352822303772, "learning_rate": 2.727764747406819e-05, "loss": 0.1984, "step": 3860 }, { "epoch": 0.40014509275572596, "grad_norm": 0.47893935441970825, "learning_rate": 2.7271393788214622e-05, "loss": 0.1767, "step": 3861 }, { "epoch": 0.4002487304383874, "grad_norm": 0.5101784467697144, "learning_rate": 2.7265139283031713e-05, "loss": 0.2046, "step": 3862 }, { "epoch": 0.4003523681210488, "grad_norm": 0.5397927165031433, "learning_rate": 2.7258883959224197e-05, "loss": 0.235, "step": 3863 }, { "epoch": 0.4004560058037102, "grad_norm": 0.4748551845550537, "learning_rate": 2.7252627817496923e-05, "loss": 0.1911, "step": 3864 }, { "epoch": 0.40055964348637163, "grad_norm": 0.5310261845588684, "learning_rate": 2.7246370858554816e-05, "loss": 0.2191, "step": 3865 }, { "epoch": 0.40066328116903305, "grad_norm": 0.5089344382286072, "learning_rate": 2.7240113083102913e-05, "loss": 0.2, "step": 3866 }, { "epoch": 0.40076691885169446, "grad_norm": 0.6126011610031128, "learning_rate": 2.7233854491846314e-05, "loss": 0.2525, "step": 3867 }, { "epoch": 0.4008705565343559, "grad_norm": 0.5655479431152344, "learning_rate": 2.7227595085490242e-05, "loss": 0.2657, "step": 3868 }, { "epoch": 0.4009741942170173, "grad_norm": 0.5203585624694824, "learning_rate": 2.7221334864739994e-05, "loss": 0.2292, "step": 3869 }, { "epoch": 0.4010778318996787, "grad_norm": 0.5850130319595337, "learning_rate": 2.721507383030096e-05, "loss": 0.23, "step": 3870 }, { "epoch": 0.40118146958234013, "grad_norm": 0.5276358127593994, "learning_rate": 2.7208811982878614e-05, "loss": 0.2176, "step": 3871 }, { "epoch": 0.40128510726500155, "grad_norm": 0.5064187049865723, "learning_rate": 2.7202549323178543e-05, "loss": 0.2187, "step": 3872 }, { "epoch": 0.40138874494766297, "grad_norm": 0.5020982623100281, "learning_rate": 2.7196285851906417e-05, "loss": 0.1937, "step": 3873 }, { "epoch": 0.4014923826303244, "grad_norm": 0.5079458355903625, "learning_rate": 2.719002156976798e-05, "loss": 0.2184, "step": 3874 }, { "epoch": 0.4015960203129858, "grad_norm": 0.5517920851707458, "learning_rate": 2.7183756477469096e-05, "loss": 0.2227, "step": 3875 }, { "epoch": 0.4016996579956472, "grad_norm": 0.48392802476882935, "learning_rate": 2.7177490575715695e-05, "loss": 0.1766, "step": 3876 }, { "epoch": 0.40180329567830864, "grad_norm": 0.48515447974205017, "learning_rate": 2.7171223865213816e-05, "loss": 0.1998, "step": 3877 }, { "epoch": 0.40190693336097005, "grad_norm": 0.5205907821655273, "learning_rate": 2.716495634666958e-05, "loss": 0.2373, "step": 3878 }, { "epoch": 0.40201057104363147, "grad_norm": 0.4959084987640381, "learning_rate": 2.7158688020789202e-05, "loss": 0.2178, "step": 3879 }, { "epoch": 0.4021142087262929, "grad_norm": 0.47091081738471985, "learning_rate": 2.7152418888278983e-05, "loss": 0.1983, "step": 3880 }, { "epoch": 0.4022178464089543, "grad_norm": 0.5432823300361633, "learning_rate": 2.714614894984532e-05, "loss": 0.2555, "step": 3881 }, { "epoch": 0.4023214840916157, "grad_norm": 0.5162708759307861, "learning_rate": 2.7139878206194708e-05, "loss": 0.2116, "step": 3882 }, { "epoch": 0.40242512177427714, "grad_norm": 0.5271724462509155, "learning_rate": 2.7133606658033717e-05, "loss": 0.21, "step": 3883 }, { "epoch": 0.40252875945693856, "grad_norm": 0.4762440621852875, "learning_rate": 2.7127334306069016e-05, "loss": 0.2041, "step": 3884 }, { "epoch": 0.4026323971396, "grad_norm": 0.4821658134460449, "learning_rate": 2.712106115100737e-05, "loss": 0.2078, "step": 3885 }, { "epoch": 0.4027360348222614, "grad_norm": 0.46634823083877563, "learning_rate": 2.7114787193555615e-05, "loss": 0.1904, "step": 3886 }, { "epoch": 0.4028396725049228, "grad_norm": 0.519838273525238, "learning_rate": 2.71085124344207e-05, "loss": 0.2123, "step": 3887 }, { "epoch": 0.4029433101875842, "grad_norm": 0.531387209892273, "learning_rate": 2.7102236874309666e-05, "loss": 0.2175, "step": 3888 }, { "epoch": 0.40304694787024564, "grad_norm": 0.5151052474975586, "learning_rate": 2.709596051392961e-05, "loss": 0.2335, "step": 3889 }, { "epoch": 0.40315058555290706, "grad_norm": 0.5302076935768127, "learning_rate": 2.708968335398776e-05, "loss": 0.1885, "step": 3890 }, { "epoch": 0.4032542232355685, "grad_norm": 0.4122977554798126, "learning_rate": 2.708340539519141e-05, "loss": 0.1672, "step": 3891 }, { "epoch": 0.4033578609182299, "grad_norm": 0.5019770860671997, "learning_rate": 2.707712663824795e-05, "loss": 0.2152, "step": 3892 }, { "epoch": 0.4034614986008913, "grad_norm": 0.5264571309089661, "learning_rate": 2.7070847083864858e-05, "loss": 0.2132, "step": 3893 }, { "epoch": 0.4035651362835527, "grad_norm": 0.489394873380661, "learning_rate": 2.706456673274972e-05, "loss": 0.2037, "step": 3894 }, { "epoch": 0.4036687739662141, "grad_norm": 0.5119697451591492, "learning_rate": 2.7058285585610174e-05, "loss": 0.2347, "step": 3895 }, { "epoch": 0.4037724116488755, "grad_norm": 0.5221233367919922, "learning_rate": 2.7052003643153976e-05, "loss": 0.2272, "step": 3896 }, { "epoch": 0.4038760493315369, "grad_norm": 0.5005158185958862, "learning_rate": 2.704572090608898e-05, "loss": 0.2381, "step": 3897 }, { "epoch": 0.40397968701419834, "grad_norm": 0.4670834243297577, "learning_rate": 2.7039437375123108e-05, "loss": 0.2344, "step": 3898 }, { "epoch": 0.40408332469685976, "grad_norm": 0.5773009657859802, "learning_rate": 2.703315305096437e-05, "loss": 0.2493, "step": 3899 }, { "epoch": 0.4041869623795212, "grad_norm": 0.592467188835144, "learning_rate": 2.702686793432088e-05, "loss": 0.2573, "step": 3900 }, { "epoch": 0.4042906000621826, "grad_norm": 0.5141617059707642, "learning_rate": 2.7020582025900832e-05, "loss": 0.2319, "step": 3901 }, { "epoch": 0.404394237744844, "grad_norm": 0.4802696406841278, "learning_rate": 2.7014295326412514e-05, "loss": 0.2043, "step": 3902 }, { "epoch": 0.4044978754275054, "grad_norm": 0.5270540714263916, "learning_rate": 2.7008007836564307e-05, "loss": 0.2206, "step": 3903 }, { "epoch": 0.40460151311016684, "grad_norm": 0.4769273102283478, "learning_rate": 2.7001719557064673e-05, "loss": 0.2162, "step": 3904 }, { "epoch": 0.40470515079282826, "grad_norm": 0.5118731260299683, "learning_rate": 2.699543048862216e-05, "loss": 0.2191, "step": 3905 }, { "epoch": 0.4048087884754897, "grad_norm": 0.4943947196006775, "learning_rate": 2.6989140631945412e-05, "loss": 0.228, "step": 3906 }, { "epoch": 0.4049124261581511, "grad_norm": 0.40463706851005554, "learning_rate": 2.6982849987743168e-05, "loss": 0.1749, "step": 3907 }, { "epoch": 0.4050160638408125, "grad_norm": 0.47197356820106506, "learning_rate": 2.697655855672424e-05, "loss": 0.2002, "step": 3908 }, { "epoch": 0.40511970152347393, "grad_norm": 0.47983160614967346, "learning_rate": 2.697026633959754e-05, "loss": 0.1981, "step": 3909 }, { "epoch": 0.40522333920613535, "grad_norm": 0.5490193963050842, "learning_rate": 2.6963973337072066e-05, "loss": 0.2429, "step": 3910 }, { "epoch": 0.40532697688879676, "grad_norm": 0.46993234753608704, "learning_rate": 2.6957679549856893e-05, "loss": 0.1988, "step": 3911 }, { "epoch": 0.4054306145714582, "grad_norm": 0.5280964970588684, "learning_rate": 2.695138497866121e-05, "loss": 0.2176, "step": 3912 }, { "epoch": 0.4055342522541196, "grad_norm": 0.5112079381942749, "learning_rate": 2.694508962419428e-05, "loss": 0.2213, "step": 3913 }, { "epoch": 0.405637889936781, "grad_norm": 0.45652469992637634, "learning_rate": 2.693879348716544e-05, "loss": 0.221, "step": 3914 }, { "epoch": 0.40574152761944243, "grad_norm": 0.45350202918052673, "learning_rate": 2.6932496568284138e-05, "loss": 0.1938, "step": 3915 }, { "epoch": 0.40584516530210385, "grad_norm": 0.43645814061164856, "learning_rate": 2.69261988682599e-05, "loss": 0.1972, "step": 3916 }, { "epoch": 0.40594880298476527, "grad_norm": 0.40848255157470703, "learning_rate": 2.691990038780234e-05, "loss": 0.1787, "step": 3917 }, { "epoch": 0.4060524406674267, "grad_norm": 0.4718957245349884, "learning_rate": 2.691360112762116e-05, "loss": 0.1987, "step": 3918 }, { "epoch": 0.4061560783500881, "grad_norm": 0.5582277178764343, "learning_rate": 2.6907301088426155e-05, "loss": 0.2202, "step": 3919 }, { "epoch": 0.4062597160327495, "grad_norm": 0.46594712138175964, "learning_rate": 2.6901000270927204e-05, "loss": 0.2082, "step": 3920 }, { "epoch": 0.40636335371541094, "grad_norm": 0.48121878504753113, "learning_rate": 2.689469867583426e-05, "loss": 0.2238, "step": 3921 }, { "epoch": 0.40646699139807235, "grad_norm": 0.5775296092033386, "learning_rate": 2.68883963038574e-05, "loss": 0.263, "step": 3922 }, { "epoch": 0.40657062908073377, "grad_norm": 0.4708368182182312, "learning_rate": 2.6882093155706742e-05, "loss": 0.2085, "step": 3923 }, { "epoch": 0.4066742667633952, "grad_norm": 0.4810127317905426, "learning_rate": 2.687578923209253e-05, "loss": 0.2149, "step": 3924 }, { "epoch": 0.4067779044460566, "grad_norm": 0.45860347151756287, "learning_rate": 2.686948453372508e-05, "loss": 0.1914, "step": 3925 }, { "epoch": 0.406881542128718, "grad_norm": 0.5676863789558411, "learning_rate": 2.6863179061314784e-05, "loss": 0.218, "step": 3926 }, { "epoch": 0.40698517981137944, "grad_norm": 0.44273561239242554, "learning_rate": 2.6856872815572145e-05, "loss": 0.1738, "step": 3927 }, { "epoch": 0.40708881749404086, "grad_norm": 0.5740095376968384, "learning_rate": 2.6850565797207733e-05, "loss": 0.2254, "step": 3928 }, { "epoch": 0.4071924551767023, "grad_norm": 0.5443901419639587, "learning_rate": 2.684425800693222e-05, "loss": 0.239, "step": 3929 }, { "epoch": 0.4072960928593637, "grad_norm": 0.43100705742836, "learning_rate": 2.6837949445456355e-05, "loss": 0.1679, "step": 3930 }, { "epoch": 0.4073997305420251, "grad_norm": 0.4696880280971527, "learning_rate": 2.6831640113490965e-05, "loss": 0.1976, "step": 3931 }, { "epoch": 0.40750336822468647, "grad_norm": 0.5075644254684448, "learning_rate": 2.6825330011747003e-05, "loss": 0.1958, "step": 3932 }, { "epoch": 0.4076070059073479, "grad_norm": 0.4977028965950012, "learning_rate": 2.6819019140935458e-05, "loss": 0.1925, "step": 3933 }, { "epoch": 0.4077106435900093, "grad_norm": 0.5052193999290466, "learning_rate": 2.6812707501767438e-05, "loss": 0.2208, "step": 3934 }, { "epoch": 0.4078142812726707, "grad_norm": 0.4574637711048126, "learning_rate": 2.6806395094954126e-05, "loss": 0.1852, "step": 3935 }, { "epoch": 0.40791791895533214, "grad_norm": 0.550905704498291, "learning_rate": 2.6800081921206795e-05, "loss": 0.2398, "step": 3936 }, { "epoch": 0.40802155663799355, "grad_norm": 0.4931041896343231, "learning_rate": 2.6793767981236807e-05, "loss": 0.2163, "step": 3937 }, { "epoch": 0.40812519432065497, "grad_norm": 0.5394712686538696, "learning_rate": 2.6787453275755603e-05, "loss": 0.2329, "step": 3938 }, { "epoch": 0.4082288320033164, "grad_norm": 0.5719344019889832, "learning_rate": 2.6781137805474714e-05, "loss": 0.2535, "step": 3939 }, { "epoch": 0.4083324696859778, "grad_norm": 0.4687522053718567, "learning_rate": 2.6774821571105758e-05, "loss": 0.1757, "step": 3940 }, { "epoch": 0.4084361073686392, "grad_norm": 0.46701177954673767, "learning_rate": 2.6768504573360438e-05, "loss": 0.1978, "step": 3941 }, { "epoch": 0.40853974505130064, "grad_norm": 0.4797942340373993, "learning_rate": 2.6762186812950548e-05, "loss": 0.2064, "step": 3942 }, { "epoch": 0.40864338273396206, "grad_norm": 0.5513239502906799, "learning_rate": 2.6755868290587956e-05, "loss": 0.2065, "step": 3943 }, { "epoch": 0.4087470204166235, "grad_norm": 0.48958635330200195, "learning_rate": 2.6749549006984633e-05, "loss": 0.2184, "step": 3944 }, { "epoch": 0.4088506580992849, "grad_norm": 0.5532996654510498, "learning_rate": 2.674322896285262e-05, "loss": 0.2361, "step": 3945 }, { "epoch": 0.4089542957819463, "grad_norm": 0.5579491257667542, "learning_rate": 2.673690815890404e-05, "loss": 0.2286, "step": 3946 }, { "epoch": 0.4090579334646077, "grad_norm": 0.4894980192184448, "learning_rate": 2.6730586595851127e-05, "loss": 0.2012, "step": 3947 }, { "epoch": 0.40916157114726914, "grad_norm": 0.4395800232887268, "learning_rate": 2.6724264274406185e-05, "loss": 0.202, "step": 3948 }, { "epoch": 0.40926520882993056, "grad_norm": 0.4964560270309448, "learning_rate": 2.6717941195281595e-05, "loss": 0.1948, "step": 3949 }, { "epoch": 0.409368846512592, "grad_norm": 0.5479037165641785, "learning_rate": 2.6711617359189827e-05, "loss": 0.2029, "step": 3950 }, { "epoch": 0.4094724841952534, "grad_norm": 0.5029044151306152, "learning_rate": 2.6705292766843455e-05, "loss": 0.2172, "step": 3951 }, { "epoch": 0.4095761218779148, "grad_norm": 0.5871431231498718, "learning_rate": 2.6698967418955116e-05, "loss": 0.2156, "step": 3952 }, { "epoch": 0.40967975956057623, "grad_norm": 0.4270620048046112, "learning_rate": 2.669264131623754e-05, "loss": 0.1749, "step": 3953 }, { "epoch": 0.40978339724323765, "grad_norm": 0.4759542644023895, "learning_rate": 2.668631445940355e-05, "loss": 0.2114, "step": 3954 }, { "epoch": 0.40988703492589906, "grad_norm": 0.4727574288845062, "learning_rate": 2.6679986849166034e-05, "loss": 0.2146, "step": 3955 }, { "epoch": 0.4099906726085605, "grad_norm": 0.47743672132492065, "learning_rate": 2.6673658486237986e-05, "loss": 0.2077, "step": 3956 }, { "epoch": 0.4100943102912219, "grad_norm": 0.542067289352417, "learning_rate": 2.6667329371332482e-05, "loss": 0.2316, "step": 3957 }, { "epoch": 0.4101979479738833, "grad_norm": 0.4978281557559967, "learning_rate": 2.6660999505162662e-05, "loss": 0.2187, "step": 3958 }, { "epoch": 0.41030158565654473, "grad_norm": 0.5522135496139526, "learning_rate": 2.6654668888441776e-05, "loss": 0.2458, "step": 3959 }, { "epoch": 0.41040522333920615, "grad_norm": 0.5353384613990784, "learning_rate": 2.664833752188314e-05, "loss": 0.2263, "step": 3960 }, { "epoch": 0.41050886102186757, "grad_norm": 0.4794256389141083, "learning_rate": 2.6642005406200166e-05, "loss": 0.2042, "step": 3961 }, { "epoch": 0.410612498704529, "grad_norm": 0.515170693397522, "learning_rate": 2.663567254210635e-05, "loss": 0.2041, "step": 3962 }, { "epoch": 0.4107161363871904, "grad_norm": 0.6219731569290161, "learning_rate": 2.662933893031527e-05, "loss": 0.2692, "step": 3963 }, { "epoch": 0.4108197740698518, "grad_norm": 0.5235690474510193, "learning_rate": 2.6623004571540584e-05, "loss": 0.2223, "step": 3964 }, { "epoch": 0.41092341175251323, "grad_norm": 0.5011698007583618, "learning_rate": 2.6616669466496037e-05, "loss": 0.194, "step": 3965 }, { "epoch": 0.41102704943517465, "grad_norm": 0.5136545300483704, "learning_rate": 2.661033361589546e-05, "loss": 0.1901, "step": 3966 }, { "epoch": 0.41113068711783607, "grad_norm": 0.5623643398284912, "learning_rate": 2.6603997020452773e-05, "loss": 0.2327, "step": 3967 }, { "epoch": 0.4112343248004975, "grad_norm": 0.5664448142051697, "learning_rate": 2.659765968088196e-05, "loss": 0.2145, "step": 3968 }, { "epoch": 0.4113379624831589, "grad_norm": 0.4725498855113983, "learning_rate": 2.659132159789711e-05, "loss": 0.1775, "step": 3969 }, { "epoch": 0.41144160016582026, "grad_norm": 0.5574590563774109, "learning_rate": 2.6584982772212394e-05, "loss": 0.2156, "step": 3970 }, { "epoch": 0.4115452378484817, "grad_norm": 0.46027258038520813, "learning_rate": 2.6578643204542052e-05, "loss": 0.1852, "step": 3971 }, { "epoch": 0.4116488755311431, "grad_norm": 0.5373114943504333, "learning_rate": 2.6572302895600422e-05, "loss": 0.2292, "step": 3972 }, { "epoch": 0.4117525132138045, "grad_norm": 0.5287101864814758, "learning_rate": 2.6565961846101922e-05, "loss": 0.2131, "step": 3973 }, { "epoch": 0.41185615089646593, "grad_norm": 0.5218322277069092, "learning_rate": 2.6559620056761044e-05, "loss": 0.1849, "step": 3974 }, { "epoch": 0.41195978857912735, "grad_norm": 0.5134084820747375, "learning_rate": 2.655327752829237e-05, "loss": 0.2233, "step": 3975 }, { "epoch": 0.41206342626178877, "grad_norm": 0.47732385993003845, "learning_rate": 2.6546934261410588e-05, "loss": 0.1842, "step": 3976 }, { "epoch": 0.4121670639444502, "grad_norm": 0.499118447303772, "learning_rate": 2.654059025683042e-05, "loss": 0.2001, "step": 3977 }, { "epoch": 0.4122707016271116, "grad_norm": 0.5397217869758606, "learning_rate": 2.6534245515266707e-05, "loss": 0.2192, "step": 3978 }, { "epoch": 0.412374339309773, "grad_norm": 0.47455599904060364, "learning_rate": 2.6527900037434368e-05, "loss": 0.1985, "step": 3979 }, { "epoch": 0.41247797699243444, "grad_norm": 0.49492162466049194, "learning_rate": 2.65215538240484e-05, "loss": 0.1935, "step": 3980 }, { "epoch": 0.41258161467509585, "grad_norm": 0.446444571018219, "learning_rate": 2.651520687582389e-05, "loss": 0.1995, "step": 3981 }, { "epoch": 0.41268525235775727, "grad_norm": 0.4676581919193268, "learning_rate": 2.6508859193475994e-05, "loss": 0.1677, "step": 3982 }, { "epoch": 0.4127888900404187, "grad_norm": 0.47199270129203796, "learning_rate": 2.6502510777719967e-05, "loss": 0.2, "step": 3983 }, { "epoch": 0.4128925277230801, "grad_norm": 0.4684051275253296, "learning_rate": 2.649616162927113e-05, "loss": 0.1958, "step": 3984 }, { "epoch": 0.4129961654057415, "grad_norm": 0.5107256770133972, "learning_rate": 2.6489811748844897e-05, "loss": 0.2066, "step": 3985 }, { "epoch": 0.41309980308840294, "grad_norm": 0.5172587633132935, "learning_rate": 2.6483461137156767e-05, "loss": 0.1881, "step": 3986 }, { "epoch": 0.41320344077106436, "grad_norm": 0.5457533597946167, "learning_rate": 2.6477109794922317e-05, "loss": 0.2704, "step": 3987 }, { "epoch": 0.4133070784537258, "grad_norm": 0.4835330843925476, "learning_rate": 2.6470757722857195e-05, "loss": 0.1947, "step": 3988 }, { "epoch": 0.4134107161363872, "grad_norm": 0.5259234309196472, "learning_rate": 2.6464404921677168e-05, "loss": 0.2229, "step": 3989 }, { "epoch": 0.4135143538190486, "grad_norm": 0.5861340761184692, "learning_rate": 2.645805139209803e-05, "loss": 0.2424, "step": 3990 }, { "epoch": 0.41361799150171, "grad_norm": 0.48513346910476685, "learning_rate": 2.64516971348357e-05, "loss": 0.173, "step": 3991 }, { "epoch": 0.41372162918437144, "grad_norm": 0.4559204578399658, "learning_rate": 2.6445342150606175e-05, "loss": 0.1826, "step": 3992 }, { "epoch": 0.41382526686703286, "grad_norm": 0.5155350565910339, "learning_rate": 2.6438986440125513e-05, "loss": 0.2136, "step": 3993 }, { "epoch": 0.4139289045496943, "grad_norm": 0.6065822839736938, "learning_rate": 2.6432630004109862e-05, "loss": 0.2694, "step": 3994 }, { "epoch": 0.4140325422323557, "grad_norm": 0.4986894726753235, "learning_rate": 2.6426272843275467e-05, "loss": 0.24, "step": 3995 }, { "epoch": 0.4141361799150171, "grad_norm": 0.5501571297645569, "learning_rate": 2.641991495833864e-05, "loss": 0.2292, "step": 3996 }, { "epoch": 0.41423981759767853, "grad_norm": 0.5160378813743591, "learning_rate": 2.6413556350015773e-05, "loss": 0.191, "step": 3997 }, { "epoch": 0.41434345528033995, "grad_norm": 0.47425827383995056, "learning_rate": 2.6407197019023346e-05, "loss": 0.2144, "step": 3998 }, { "epoch": 0.41444709296300136, "grad_norm": 0.5496262311935425, "learning_rate": 2.6400836966077924e-05, "loss": 0.234, "step": 3999 }, { "epoch": 0.4145507306456628, "grad_norm": 0.5443497896194458, "learning_rate": 2.639447619189613e-05, "loss": 0.1911, "step": 4000 }, { "epoch": 0.4146543683283242, "grad_norm": 0.4492267668247223, "learning_rate": 2.6388114697194717e-05, "loss": 0.1755, "step": 4001 }, { "epoch": 0.4147580060109856, "grad_norm": 0.4667634963989258, "learning_rate": 2.638175248269046e-05, "loss": 0.1822, "step": 4002 }, { "epoch": 0.41486164369364703, "grad_norm": 0.48646092414855957, "learning_rate": 2.6375389549100253e-05, "loss": 0.2004, "step": 4003 }, { "epoch": 0.41496528137630845, "grad_norm": 0.46625807881355286, "learning_rate": 2.6369025897141065e-05, "loss": 0.2038, "step": 4004 }, { "epoch": 0.41506891905896987, "grad_norm": 0.46255430579185486, "learning_rate": 2.6362661527529935e-05, "loss": 0.2021, "step": 4005 }, { "epoch": 0.4151725567416313, "grad_norm": 0.5445932745933533, "learning_rate": 2.6356296440984003e-05, "loss": 0.2351, "step": 4006 }, { "epoch": 0.4152761944242927, "grad_norm": 0.5121250748634338, "learning_rate": 2.6349930638220463e-05, "loss": 0.193, "step": 4007 }, { "epoch": 0.41537983210695406, "grad_norm": 0.48741304874420166, "learning_rate": 2.6343564119956617e-05, "loss": 0.2042, "step": 4008 }, { "epoch": 0.4154834697896155, "grad_norm": 0.4352017641067505, "learning_rate": 2.6337196886909823e-05, "loss": 0.1824, "step": 4009 }, { "epoch": 0.4155871074722769, "grad_norm": 0.5594912171363831, "learning_rate": 2.633082893979753e-05, "loss": 0.242, "step": 4010 }, { "epoch": 0.4156907451549383, "grad_norm": 0.5285469889640808, "learning_rate": 2.6324460279337282e-05, "loss": 0.2176, "step": 4011 }, { "epoch": 0.41579438283759973, "grad_norm": 0.4648481607437134, "learning_rate": 2.6318090906246677e-05, "loss": 0.1913, "step": 4012 }, { "epoch": 0.41589802052026115, "grad_norm": 0.4458993673324585, "learning_rate": 2.631172082124341e-05, "loss": 0.2024, "step": 4013 }, { "epoch": 0.41600165820292256, "grad_norm": 0.5384868383407593, "learning_rate": 2.6305350025045257e-05, "loss": 0.235, "step": 4014 }, { "epoch": 0.416105295885584, "grad_norm": 0.45828738808631897, "learning_rate": 2.629897851837006e-05, "loss": 0.2207, "step": 4015 }, { "epoch": 0.4162089335682454, "grad_norm": 0.4959215223789215, "learning_rate": 2.6292606301935752e-05, "loss": 0.2176, "step": 4016 }, { "epoch": 0.4163125712509068, "grad_norm": 0.49704524874687195, "learning_rate": 2.628623337646036e-05, "loss": 0.2098, "step": 4017 }, { "epoch": 0.41641620893356823, "grad_norm": 0.4785382151603699, "learning_rate": 2.6279859742661954e-05, "loss": 0.2164, "step": 4018 }, { "epoch": 0.41651984661622965, "grad_norm": 0.5414184331893921, "learning_rate": 2.627348540125872e-05, "loss": 0.2271, "step": 4019 }, { "epoch": 0.41662348429889107, "grad_norm": 0.45238590240478516, "learning_rate": 2.6267110352968894e-05, "loss": 0.1869, "step": 4020 }, { "epoch": 0.4167271219815525, "grad_norm": 0.49890977144241333, "learning_rate": 2.626073459851082e-05, "loss": 0.2091, "step": 4021 }, { "epoch": 0.4168307596642139, "grad_norm": 0.5310577154159546, "learning_rate": 2.625435813860291e-05, "loss": 0.249, "step": 4022 }, { "epoch": 0.4169343973468753, "grad_norm": 0.5554194450378418, "learning_rate": 2.6247980973963642e-05, "loss": 0.2236, "step": 4023 }, { "epoch": 0.41703803502953674, "grad_norm": 0.5059272646903992, "learning_rate": 2.6241603105311594e-05, "loss": 0.2097, "step": 4024 }, { "epoch": 0.41714167271219815, "grad_norm": 0.522890567779541, "learning_rate": 2.6235224533365403e-05, "loss": 0.2178, "step": 4025 }, { "epoch": 0.41724531039485957, "grad_norm": 0.5513536930084229, "learning_rate": 2.622884525884381e-05, "loss": 0.2594, "step": 4026 }, { "epoch": 0.417348948077521, "grad_norm": 0.4362293481826782, "learning_rate": 2.622246528246562e-05, "loss": 0.2008, "step": 4027 }, { "epoch": 0.4174525857601824, "grad_norm": 0.4287848174571991, "learning_rate": 2.6216084604949715e-05, "loss": 0.1771, "step": 4028 }, { "epoch": 0.4175562234428438, "grad_norm": 0.4799305200576782, "learning_rate": 2.6209703227015054e-05, "loss": 0.1934, "step": 4029 }, { "epoch": 0.41765986112550524, "grad_norm": 0.5041770339012146, "learning_rate": 2.6203321149380685e-05, "loss": 0.2348, "step": 4030 }, { "epoch": 0.41776349880816666, "grad_norm": 0.4590016007423401, "learning_rate": 2.6196938372765736e-05, "loss": 0.1825, "step": 4031 }, { "epoch": 0.4178671364908281, "grad_norm": 0.4430880546569824, "learning_rate": 2.6190554897889406e-05, "loss": 0.1719, "step": 4032 }, { "epoch": 0.4179707741734895, "grad_norm": 0.5018654465675354, "learning_rate": 2.6184170725470974e-05, "loss": 0.2149, "step": 4033 }, { "epoch": 0.4180744118561509, "grad_norm": 0.4524014890193939, "learning_rate": 2.6177785856229795e-05, "loss": 0.2038, "step": 4034 }, { "epoch": 0.4181780495388123, "grad_norm": 0.4818570017814636, "learning_rate": 2.6171400290885305e-05, "loss": 0.1997, "step": 4035 }, { "epoch": 0.41828168722147374, "grad_norm": 0.5182315111160278, "learning_rate": 2.6165014030157032e-05, "loss": 0.199, "step": 4036 }, { "epoch": 0.41838532490413516, "grad_norm": 0.509839653968811, "learning_rate": 2.6158627074764564e-05, "loss": 0.199, "step": 4037 }, { "epoch": 0.4184889625867966, "grad_norm": 0.5443018078804016, "learning_rate": 2.6152239425427563e-05, "loss": 0.2108, "step": 4038 }, { "epoch": 0.418592600269458, "grad_norm": 0.5486571788787842, "learning_rate": 2.6145851082865788e-05, "loss": 0.2207, "step": 4039 }, { "epoch": 0.4186962379521194, "grad_norm": 0.48583099246025085, "learning_rate": 2.6139462047799067e-05, "loss": 0.22, "step": 4040 }, { "epoch": 0.4187998756347808, "grad_norm": 0.5152609348297119, "learning_rate": 2.613307232094731e-05, "loss": 0.226, "step": 4041 }, { "epoch": 0.41890351331744224, "grad_norm": 0.558648943901062, "learning_rate": 2.612668190303049e-05, "loss": 0.2448, "step": 4042 }, { "epoch": 0.41900715100010366, "grad_norm": 0.4209331274032593, "learning_rate": 2.6120290794768694e-05, "loss": 0.1843, "step": 4043 }, { "epoch": 0.4191107886827651, "grad_norm": 0.5400482416152954, "learning_rate": 2.611389899688203e-05, "loss": 0.2258, "step": 4044 }, { "epoch": 0.4192144263654265, "grad_norm": 0.4837553799152374, "learning_rate": 2.6107506510090735e-05, "loss": 0.2076, "step": 4045 }, { "epoch": 0.41931806404808786, "grad_norm": 0.4797718822956085, "learning_rate": 2.6101113335115106e-05, "loss": 0.2159, "step": 4046 }, { "epoch": 0.4194217017307493, "grad_norm": 0.5106173157691956, "learning_rate": 2.6094719472675506e-05, "loss": 0.2231, "step": 4047 }, { "epoch": 0.4195253394134107, "grad_norm": 0.4600318968296051, "learning_rate": 2.608832492349239e-05, "loss": 0.1843, "step": 4048 }, { "epoch": 0.4196289770960721, "grad_norm": 0.5094060897827148, "learning_rate": 2.608192968828629e-05, "loss": 0.1999, "step": 4049 }, { "epoch": 0.4197326147787335, "grad_norm": 0.425560861825943, "learning_rate": 2.60755337677778e-05, "loss": 0.1894, "step": 4050 }, { "epoch": 0.41983625246139494, "grad_norm": 0.516232430934906, "learning_rate": 2.6069137162687614e-05, "loss": 0.2042, "step": 4051 }, { "epoch": 0.41993989014405636, "grad_norm": 0.48487234115600586, "learning_rate": 2.606273987373649e-05, "loss": 0.2229, "step": 4052 }, { "epoch": 0.4200435278267178, "grad_norm": 0.5296849012374878, "learning_rate": 2.6056341901645263e-05, "loss": 0.2178, "step": 4053 }, { "epoch": 0.4201471655093792, "grad_norm": 0.5496042370796204, "learning_rate": 2.6049943247134836e-05, "loss": 0.2556, "step": 4054 }, { "epoch": 0.4202508031920406, "grad_norm": 0.5170057415962219, "learning_rate": 2.6043543910926214e-05, "loss": 0.2248, "step": 4055 }, { "epoch": 0.42035444087470203, "grad_norm": 0.5637809038162231, "learning_rate": 2.603714389374046e-05, "loss": 0.2415, "step": 4056 }, { "epoch": 0.42045807855736345, "grad_norm": 0.4687712788581848, "learning_rate": 2.6030743196298716e-05, "loss": 0.2121, "step": 4057 }, { "epoch": 0.42056171624002486, "grad_norm": 0.5689849257469177, "learning_rate": 2.6024341819322203e-05, "loss": 0.258, "step": 4058 }, { "epoch": 0.4206653539226863, "grad_norm": 0.5509251952171326, "learning_rate": 2.601793976353222e-05, "loss": 0.2148, "step": 4059 }, { "epoch": 0.4207689916053477, "grad_norm": 0.586294412612915, "learning_rate": 2.6011537029650135e-05, "loss": 0.2386, "step": 4060 }, { "epoch": 0.4208726292880091, "grad_norm": 0.49938663840293884, "learning_rate": 2.600513361839741e-05, "loss": 0.19, "step": 4061 }, { "epoch": 0.42097626697067053, "grad_norm": 0.3958568871021271, "learning_rate": 2.5998729530495564e-05, "loss": 0.1808, "step": 4062 }, { "epoch": 0.42107990465333195, "grad_norm": 0.5225630402565002, "learning_rate": 2.5992324766666194e-05, "loss": 0.2262, "step": 4063 }, { "epoch": 0.42118354233599337, "grad_norm": 0.5840221643447876, "learning_rate": 2.598591932763099e-05, "loss": 0.2594, "step": 4064 }, { "epoch": 0.4212871800186548, "grad_norm": 0.44575127959251404, "learning_rate": 2.5979513214111697e-05, "loss": 0.1738, "step": 4065 }, { "epoch": 0.4213908177013162, "grad_norm": 0.5635486841201782, "learning_rate": 2.5973106426830148e-05, "loss": 0.2284, "step": 4066 }, { "epoch": 0.4214944553839776, "grad_norm": 0.4605032205581665, "learning_rate": 2.5966698966508257e-05, "loss": 0.1794, "step": 4067 }, { "epoch": 0.42159809306663903, "grad_norm": 0.4816349446773529, "learning_rate": 2.5960290833868004e-05, "loss": 0.1917, "step": 4068 }, { "epoch": 0.42170173074930045, "grad_norm": 0.5689332485198975, "learning_rate": 2.595388202963144e-05, "loss": 0.2646, "step": 4069 }, { "epoch": 0.42180536843196187, "grad_norm": 0.3935214877128601, "learning_rate": 2.5947472554520702e-05, "loss": 0.1494, "step": 4070 }, { "epoch": 0.4219090061146233, "grad_norm": 0.536814272403717, "learning_rate": 2.5941062409258013e-05, "loss": 0.232, "step": 4071 }, { "epoch": 0.4220126437972847, "grad_norm": 0.43292945623397827, "learning_rate": 2.593465159456564e-05, "loss": 0.1705, "step": 4072 }, { "epoch": 0.4221162814799461, "grad_norm": 0.44623786211013794, "learning_rate": 2.5928240111165952e-05, "loss": 0.2061, "step": 4073 }, { "epoch": 0.42221991916260754, "grad_norm": 0.4869650900363922, "learning_rate": 2.592182795978138e-05, "loss": 0.2111, "step": 4074 }, { "epoch": 0.42232355684526895, "grad_norm": 0.5473789572715759, "learning_rate": 2.5915415141134443e-05, "loss": 0.2491, "step": 4075 }, { "epoch": 0.42242719452793037, "grad_norm": 0.5232890248298645, "learning_rate": 2.5909001655947723e-05, "loss": 0.2273, "step": 4076 }, { "epoch": 0.4225308322105918, "grad_norm": 0.47419649362564087, "learning_rate": 2.590258750494388e-05, "loss": 0.1944, "step": 4077 }, { "epoch": 0.4226344698932532, "grad_norm": 0.5294226408004761, "learning_rate": 2.589617268884566e-05, "loss": 0.2216, "step": 4078 }, { "epoch": 0.4227381075759146, "grad_norm": 0.5194813013076782, "learning_rate": 2.5889757208375858e-05, "loss": 0.1979, "step": 4079 }, { "epoch": 0.42284174525857604, "grad_norm": 0.4927207827568054, "learning_rate": 2.588334106425738e-05, "loss": 0.2037, "step": 4080 }, { "epoch": 0.42294538294123746, "grad_norm": 0.48588693141937256, "learning_rate": 2.587692425721317e-05, "loss": 0.2071, "step": 4081 }, { "epoch": 0.4230490206238989, "grad_norm": 0.5707266330718994, "learning_rate": 2.5870506787966272e-05, "loss": 0.2475, "step": 4082 }, { "epoch": 0.4231526583065603, "grad_norm": 0.43110227584838867, "learning_rate": 2.5864088657239795e-05, "loss": 0.1909, "step": 4083 }, { "epoch": 0.42325629598922165, "grad_norm": 0.453917533159256, "learning_rate": 2.585766986575692e-05, "loss": 0.1965, "step": 4084 }, { "epoch": 0.42335993367188307, "grad_norm": 0.4615586996078491, "learning_rate": 2.5851250414240915e-05, "loss": 0.176, "step": 4085 }, { "epoch": 0.4234635713545445, "grad_norm": 0.5152726173400879, "learning_rate": 2.5844830303415105e-05, "loss": 0.183, "step": 4086 }, { "epoch": 0.4235672090372059, "grad_norm": 0.5132525563240051, "learning_rate": 2.5838409534002907e-05, "loss": 0.2253, "step": 4087 }, { "epoch": 0.4236708467198673, "grad_norm": 0.5109318494796753, "learning_rate": 2.58319881067278e-05, "loss": 0.2057, "step": 4088 }, { "epoch": 0.42377448440252874, "grad_norm": 0.4559604227542877, "learning_rate": 2.5825566022313327e-05, "loss": 0.1812, "step": 4089 }, { "epoch": 0.42387812208519016, "grad_norm": 0.4541739523410797, "learning_rate": 2.581914328148315e-05, "loss": 0.1889, "step": 4090 }, { "epoch": 0.4239817597678516, "grad_norm": 0.6262094974517822, "learning_rate": 2.581271988496094e-05, "loss": 0.2568, "step": 4091 }, { "epoch": 0.424085397450513, "grad_norm": 0.5760964751243591, "learning_rate": 2.5806295833470493e-05, "loss": 0.2161, "step": 4092 }, { "epoch": 0.4241890351331744, "grad_norm": 0.5587107539176941, "learning_rate": 2.5799871127735663e-05, "loss": 0.2343, "step": 4093 }, { "epoch": 0.4242926728158358, "grad_norm": 0.4467000663280487, "learning_rate": 2.579344576848036e-05, "loss": 0.1805, "step": 4094 }, { "epoch": 0.42439631049849724, "grad_norm": 0.5288705825805664, "learning_rate": 2.57870197564286e-05, "loss": 0.2407, "step": 4095 }, { "epoch": 0.42449994818115866, "grad_norm": 0.5100021958351135, "learning_rate": 2.5780593092304452e-05, "loss": 0.2091, "step": 4096 }, { "epoch": 0.4246035858638201, "grad_norm": 0.4132768213748932, "learning_rate": 2.5774165776832058e-05, "loss": 0.1696, "step": 4097 }, { "epoch": 0.4247072235464815, "grad_norm": 0.5586245656013489, "learning_rate": 2.5767737810735636e-05, "loss": 0.2356, "step": 4098 }, { "epoch": 0.4248108612291429, "grad_norm": 0.5645946264266968, "learning_rate": 2.5761309194739486e-05, "loss": 0.2492, "step": 4099 }, { "epoch": 0.42491449891180433, "grad_norm": 0.4798961281776428, "learning_rate": 2.575487992956798e-05, "loss": 0.1909, "step": 4100 }, { "epoch": 0.42501813659446575, "grad_norm": 0.5436182618141174, "learning_rate": 2.574845001594554e-05, "loss": 0.2442, "step": 4101 }, { "epoch": 0.42512177427712716, "grad_norm": 0.5537167191505432, "learning_rate": 2.5742019454596688e-05, "loss": 0.2371, "step": 4102 }, { "epoch": 0.4252254119597886, "grad_norm": 0.4842146039009094, "learning_rate": 2.573558824624602e-05, "loss": 0.1927, "step": 4103 }, { "epoch": 0.42532904964245, "grad_norm": 0.48249712586402893, "learning_rate": 2.5729156391618172e-05, "loss": 0.1799, "step": 4104 }, { "epoch": 0.4254326873251114, "grad_norm": 0.5177933573722839, "learning_rate": 2.5722723891437894e-05, "loss": 0.2161, "step": 4105 }, { "epoch": 0.42553632500777283, "grad_norm": 0.5344215035438538, "learning_rate": 2.571629074642999e-05, "loss": 0.2228, "step": 4106 }, { "epoch": 0.42563996269043425, "grad_norm": 0.5359126925468445, "learning_rate": 2.5709856957319323e-05, "loss": 0.2086, "step": 4107 }, { "epoch": 0.42574360037309567, "grad_norm": 0.45025259256362915, "learning_rate": 2.570342252483085e-05, "loss": 0.1903, "step": 4108 }, { "epoch": 0.4258472380557571, "grad_norm": 0.4615389108657837, "learning_rate": 2.5696987449689594e-05, "loss": 0.1903, "step": 4109 }, { "epoch": 0.4259508757384185, "grad_norm": 0.5702045559883118, "learning_rate": 2.569055173262065e-05, "loss": 0.2017, "step": 4110 }, { "epoch": 0.4260545134210799, "grad_norm": 0.4639468193054199, "learning_rate": 2.5684115374349184e-05, "loss": 0.1703, "step": 4111 }, { "epoch": 0.42615815110374133, "grad_norm": 0.4748694598674774, "learning_rate": 2.5677678375600436e-05, "loss": 0.1906, "step": 4112 }, { "epoch": 0.42626178878640275, "grad_norm": 0.5352087616920471, "learning_rate": 2.567124073709971e-05, "loss": 0.2314, "step": 4113 }, { "epoch": 0.42636542646906417, "grad_norm": 0.5662137866020203, "learning_rate": 2.566480245957239e-05, "loss": 0.2385, "step": 4114 }, { "epoch": 0.4264690641517256, "grad_norm": 0.495313435792923, "learning_rate": 2.5658363543743944e-05, "loss": 0.2215, "step": 4115 }, { "epoch": 0.426572701834387, "grad_norm": 0.4843652844429016, "learning_rate": 2.5651923990339884e-05, "loss": 0.1973, "step": 4116 }, { "epoch": 0.4266763395170484, "grad_norm": 0.5149955153465271, "learning_rate": 2.5645483800085815e-05, "loss": 0.1962, "step": 4117 }, { "epoch": 0.42677997719970984, "grad_norm": 0.41277220845222473, "learning_rate": 2.563904297370741e-05, "loss": 0.1654, "step": 4118 }, { "epoch": 0.42688361488237125, "grad_norm": 0.47097402811050415, "learning_rate": 2.5632601511930405e-05, "loss": 0.1941, "step": 4119 }, { "epoch": 0.42698725256503267, "grad_norm": 0.390764981508255, "learning_rate": 2.562615941548062e-05, "loss": 0.1509, "step": 4120 }, { "epoch": 0.4270908902476941, "grad_norm": 0.5125734210014343, "learning_rate": 2.561971668508394e-05, "loss": 0.208, "step": 4121 }, { "epoch": 0.42719452793035545, "grad_norm": 0.5046721696853638, "learning_rate": 2.5613273321466324e-05, "loss": 0.1955, "step": 4122 }, { "epoch": 0.42729816561301687, "grad_norm": 0.5146350860595703, "learning_rate": 2.5606829325353788e-05, "loss": 0.202, "step": 4123 }, { "epoch": 0.4274018032956783, "grad_norm": 0.5878974795341492, "learning_rate": 2.560038469747244e-05, "loss": 0.2564, "step": 4124 }, { "epoch": 0.4275054409783397, "grad_norm": 0.5669657588005066, "learning_rate": 2.5593939438548455e-05, "loss": 0.2733, "step": 4125 }, { "epoch": 0.4276090786610011, "grad_norm": 0.5380009412765503, "learning_rate": 2.558749354930807e-05, "loss": 0.199, "step": 4126 }, { "epoch": 0.42771271634366254, "grad_norm": 0.4734484553337097, "learning_rate": 2.558104703047759e-05, "loss": 0.2015, "step": 4127 }, { "epoch": 0.42781635402632395, "grad_norm": 0.43804338574409485, "learning_rate": 2.5574599882783417e-05, "loss": 0.2008, "step": 4128 }, { "epoch": 0.42791999170898537, "grad_norm": 0.507434070110321, "learning_rate": 2.5568152106951986e-05, "loss": 0.1931, "step": 4129 }, { "epoch": 0.4280236293916468, "grad_norm": 0.5040130019187927, "learning_rate": 2.5561703703709837e-05, "loss": 0.2176, "step": 4130 }, { "epoch": 0.4281272670743082, "grad_norm": 0.5868910551071167, "learning_rate": 2.555525467378356e-05, "loss": 0.264, "step": 4131 }, { "epoch": 0.4282309047569696, "grad_norm": 0.5386139750480652, "learning_rate": 2.554880501789982e-05, "loss": 0.2438, "step": 4132 }, { "epoch": 0.42833454243963104, "grad_norm": 0.5160743594169617, "learning_rate": 2.554235473678536e-05, "loss": 0.2014, "step": 4133 }, { "epoch": 0.42843818012229246, "grad_norm": 0.4884987771511078, "learning_rate": 2.553590383116698e-05, "loss": 0.2044, "step": 4134 }, { "epoch": 0.4285418178049539, "grad_norm": 0.5191545486450195, "learning_rate": 2.5529452301771563e-05, "loss": 0.2101, "step": 4135 }, { "epoch": 0.4286454554876153, "grad_norm": 0.495727002620697, "learning_rate": 2.5523000149326053e-05, "loss": 0.1947, "step": 4136 }, { "epoch": 0.4287490931702767, "grad_norm": 0.4700315296649933, "learning_rate": 2.551654737455748e-05, "loss": 0.1862, "step": 4137 }, { "epoch": 0.4288527308529381, "grad_norm": 0.4800029397010803, "learning_rate": 2.5510093978192922e-05, "loss": 0.1945, "step": 4138 }, { "epoch": 0.42895636853559954, "grad_norm": 0.4926530718803406, "learning_rate": 2.5503639960959534e-05, "loss": 0.2111, "step": 4139 }, { "epoch": 0.42906000621826096, "grad_norm": 0.4847051203250885, "learning_rate": 2.5497185323584556e-05, "loss": 0.1836, "step": 4140 }, { "epoch": 0.4291636439009224, "grad_norm": 0.4954644739627838, "learning_rate": 2.5490730066795282e-05, "loss": 0.194, "step": 4141 }, { "epoch": 0.4292672815835838, "grad_norm": 0.5430189967155457, "learning_rate": 2.548427419131908e-05, "loss": 0.208, "step": 4142 }, { "epoch": 0.4293709192662452, "grad_norm": 0.5013940334320068, "learning_rate": 2.5477817697883383e-05, "loss": 0.2248, "step": 4143 }, { "epoch": 0.4294745569489066, "grad_norm": 0.4881177544593811, "learning_rate": 2.5471360587215706e-05, "loss": 0.2098, "step": 4144 }, { "epoch": 0.42957819463156804, "grad_norm": 0.439849317073822, "learning_rate": 2.546490286004362e-05, "loss": 0.1732, "step": 4145 }, { "epoch": 0.42968183231422946, "grad_norm": 0.5770775675773621, "learning_rate": 2.5458444517094777e-05, "loss": 0.2279, "step": 4146 }, { "epoch": 0.4297854699968909, "grad_norm": 0.4989546239376068, "learning_rate": 2.5451985559096903e-05, "loss": 0.2004, "step": 4147 }, { "epoch": 0.4298891076795523, "grad_norm": 0.5036958456039429, "learning_rate": 2.5445525986777755e-05, "loss": 0.2017, "step": 4148 }, { "epoch": 0.4299927453622137, "grad_norm": 0.562789797782898, "learning_rate": 2.5439065800865206e-05, "loss": 0.2267, "step": 4149 }, { "epoch": 0.43009638304487513, "grad_norm": 0.5571634769439697, "learning_rate": 2.543260500208719e-05, "loss": 0.2299, "step": 4150 }, { "epoch": 0.43020002072753655, "grad_norm": 0.6189036965370178, "learning_rate": 2.5426143591171678e-05, "loss": 0.2534, "step": 4151 }, { "epoch": 0.43030365841019796, "grad_norm": 0.5095949172973633, "learning_rate": 2.5419681568846742e-05, "loss": 0.2192, "step": 4152 }, { "epoch": 0.4304072960928594, "grad_norm": 0.4679073691368103, "learning_rate": 2.5413218935840508e-05, "loss": 0.2048, "step": 4153 }, { "epoch": 0.4305109337755208, "grad_norm": 0.5005534887313843, "learning_rate": 2.5406755692881183e-05, "loss": 0.2092, "step": 4154 }, { "epoch": 0.4306145714581822, "grad_norm": 0.5270726084709167, "learning_rate": 2.5400291840697032e-05, "loss": 0.2268, "step": 4155 }, { "epoch": 0.43071820914084363, "grad_norm": 0.49191877245903015, "learning_rate": 2.5393827380016397e-05, "loss": 0.241, "step": 4156 }, { "epoch": 0.43082184682350505, "grad_norm": 0.5938627123832703, "learning_rate": 2.538736231156767e-05, "loss": 0.247, "step": 4157 }, { "epoch": 0.43092548450616647, "grad_norm": 0.4768686592578888, "learning_rate": 2.538089663607933e-05, "loss": 0.1919, "step": 4158 }, { "epoch": 0.4310291221888279, "grad_norm": 0.4200623035430908, "learning_rate": 2.5374430354279934e-05, "loss": 0.1469, "step": 4159 }, { "epoch": 0.43113275987148925, "grad_norm": 0.4872795343399048, "learning_rate": 2.5367963466898073e-05, "loss": 0.218, "step": 4160 }, { "epoch": 0.43123639755415066, "grad_norm": 0.5187094211578369, "learning_rate": 2.536149597466243e-05, "loss": 0.2321, "step": 4161 }, { "epoch": 0.4313400352368121, "grad_norm": 0.5779088139533997, "learning_rate": 2.5355027878301756e-05, "loss": 0.2624, "step": 4162 }, { "epoch": 0.4314436729194735, "grad_norm": 0.48629242181777954, "learning_rate": 2.5348559178544866e-05, "loss": 0.1804, "step": 4163 }, { "epoch": 0.4315473106021349, "grad_norm": 0.3519076704978943, "learning_rate": 2.5342089876120647e-05, "loss": 0.1498, "step": 4164 }, { "epoch": 0.43165094828479633, "grad_norm": 0.40314781665802, "learning_rate": 2.533561997175804e-05, "loss": 0.1749, "step": 4165 }, { "epoch": 0.43175458596745775, "grad_norm": 0.5147646069526672, "learning_rate": 2.5329149466186075e-05, "loss": 0.2372, "step": 4166 }, { "epoch": 0.43185822365011917, "grad_norm": 0.4711988568305969, "learning_rate": 2.532267836013383e-05, "loss": 0.1937, "step": 4167 }, { "epoch": 0.4319618613327806, "grad_norm": 0.5647103786468506, "learning_rate": 2.531620665433045e-05, "loss": 0.2275, "step": 4168 }, { "epoch": 0.432065499015442, "grad_norm": 0.4511049687862396, "learning_rate": 2.5309734349505183e-05, "loss": 0.19, "step": 4169 }, { "epoch": 0.4321691366981034, "grad_norm": 0.5962859988212585, "learning_rate": 2.53032614463873e-05, "loss": 0.2334, "step": 4170 }, { "epoch": 0.43227277438076483, "grad_norm": 0.5469899773597717, "learning_rate": 2.5296787945706162e-05, "loss": 0.2063, "step": 4171 }, { "epoch": 0.43237641206342625, "grad_norm": 0.47863686084747314, "learning_rate": 2.5290313848191193e-05, "loss": 0.192, "step": 4172 }, { "epoch": 0.43248004974608767, "grad_norm": 0.6556797623634338, "learning_rate": 2.5283839154571878e-05, "loss": 0.2346, "step": 4173 }, { "epoch": 0.4325836874287491, "grad_norm": 0.5168253183364868, "learning_rate": 2.5277363865577783e-05, "loss": 0.2092, "step": 4174 }, { "epoch": 0.4326873251114105, "grad_norm": 0.5221461653709412, "learning_rate": 2.5270887981938533e-05, "loss": 0.2228, "step": 4175 }, { "epoch": 0.4327909627940719, "grad_norm": 0.5210689306259155, "learning_rate": 2.5264411504383822e-05, "loss": 0.2144, "step": 4176 }, { "epoch": 0.43289460047673334, "grad_norm": 0.5394599437713623, "learning_rate": 2.5257934433643404e-05, "loss": 0.2284, "step": 4177 }, { "epoch": 0.43299823815939475, "grad_norm": 0.5580217242240906, "learning_rate": 2.5251456770447105e-05, "loss": 0.2218, "step": 4178 }, { "epoch": 0.43310187584205617, "grad_norm": 0.5175135135650635, "learning_rate": 2.5244978515524824e-05, "loss": 0.222, "step": 4179 }, { "epoch": 0.4332055135247176, "grad_norm": 0.47659069299697876, "learning_rate": 2.523849966960651e-05, "loss": 0.2125, "step": 4180 }, { "epoch": 0.433309151207379, "grad_norm": 0.4961530864238739, "learning_rate": 2.5232020233422202e-05, "loss": 0.226, "step": 4181 }, { "epoch": 0.4334127888900404, "grad_norm": 0.5616767406463623, "learning_rate": 2.5225540207701996e-05, "loss": 0.2508, "step": 4182 }, { "epoch": 0.43351642657270184, "grad_norm": 0.5131274461746216, "learning_rate": 2.5219059593176026e-05, "loss": 0.1833, "step": 4183 }, { "epoch": 0.43362006425536326, "grad_norm": 0.5015151500701904, "learning_rate": 2.5212578390574542e-05, "loss": 0.2318, "step": 4184 }, { "epoch": 0.4337237019380247, "grad_norm": 0.5001304745674133, "learning_rate": 2.5206096600627832e-05, "loss": 0.2145, "step": 4185 }, { "epoch": 0.4338273396206861, "grad_norm": 0.44869324564933777, "learning_rate": 2.519961422406625e-05, "loss": 0.2037, "step": 4186 }, { "epoch": 0.4339309773033475, "grad_norm": 0.5340890884399414, "learning_rate": 2.5193131261620213e-05, "loss": 0.2216, "step": 4187 }, { "epoch": 0.4340346149860089, "grad_norm": 0.4965475797653198, "learning_rate": 2.518664771402022e-05, "loss": 0.2003, "step": 4188 }, { "epoch": 0.43413825266867034, "grad_norm": 0.45350730419158936, "learning_rate": 2.5180163581996828e-05, "loss": 0.1868, "step": 4189 }, { "epoch": 0.43424189035133176, "grad_norm": 0.4761691987514496, "learning_rate": 2.5173678866280655e-05, "loss": 0.231, "step": 4190 }, { "epoch": 0.4343455280339932, "grad_norm": 0.4665203094482422, "learning_rate": 2.5167193567602395e-05, "loss": 0.189, "step": 4191 }, { "epoch": 0.4344491657166546, "grad_norm": 0.5481677055358887, "learning_rate": 2.5160707686692796e-05, "loss": 0.2258, "step": 4192 }, { "epoch": 0.434552803399316, "grad_norm": 0.5124077796936035, "learning_rate": 2.5154221224282664e-05, "loss": 0.1925, "step": 4193 }, { "epoch": 0.43465644108197743, "grad_norm": 0.45199984312057495, "learning_rate": 2.5147734181102915e-05, "loss": 0.1949, "step": 4194 }, { "epoch": 0.43476007876463885, "grad_norm": 0.434619277715683, "learning_rate": 2.5141246557884466e-05, "loss": 0.1763, "step": 4195 }, { "epoch": 0.43486371644730026, "grad_norm": 0.4808880388736725, "learning_rate": 2.5134758355358355e-05, "loss": 0.1883, "step": 4196 }, { "epoch": 0.4349673541299617, "grad_norm": 0.5743508338928223, "learning_rate": 2.5128269574255653e-05, "loss": 0.2515, "step": 4197 }, { "epoch": 0.43507099181262304, "grad_norm": 0.5212010741233826, "learning_rate": 2.5121780215307507e-05, "loss": 0.1886, "step": 4198 }, { "epoch": 0.43517462949528446, "grad_norm": 0.5839496850967407, "learning_rate": 2.511529027924513e-05, "loss": 0.2768, "step": 4199 }, { "epoch": 0.4352782671779459, "grad_norm": 0.48613330721855164, "learning_rate": 2.5108799766799794e-05, "loss": 0.1711, "step": 4200 }, { "epoch": 0.4353819048606073, "grad_norm": 0.47015008330345154, "learning_rate": 2.5102308678702842e-05, "loss": 0.2118, "step": 4201 }, { "epoch": 0.4354855425432687, "grad_norm": 0.45157095789909363, "learning_rate": 2.5095817015685686e-05, "loss": 0.1939, "step": 4202 }, { "epoch": 0.43558918022593013, "grad_norm": 0.5309513807296753, "learning_rate": 2.508932477847978e-05, "loss": 0.2386, "step": 4203 }, { "epoch": 0.43569281790859155, "grad_norm": 0.5956526398658752, "learning_rate": 2.5082831967816676e-05, "loss": 0.2582, "step": 4204 }, { "epoch": 0.43579645559125296, "grad_norm": 0.47897085547447205, "learning_rate": 2.5076338584427963e-05, "loss": 0.1902, "step": 4205 }, { "epoch": 0.4359000932739144, "grad_norm": 0.4405311346054077, "learning_rate": 2.5069844629045314e-05, "loss": 0.1887, "step": 4206 }, { "epoch": 0.4360037309565758, "grad_norm": 0.5245810747146606, "learning_rate": 2.5063350102400454e-05, "loss": 0.2111, "step": 4207 }, { "epoch": 0.4361073686392372, "grad_norm": 0.4980141222476959, "learning_rate": 2.505685500522517e-05, "loss": 0.1955, "step": 4208 }, { "epoch": 0.43621100632189863, "grad_norm": 0.5519107580184937, "learning_rate": 2.505035933825133e-05, "loss": 0.2498, "step": 4209 }, { "epoch": 0.43631464400456005, "grad_norm": 0.5352892875671387, "learning_rate": 2.5043863102210854e-05, "loss": 0.2226, "step": 4210 }, { "epoch": 0.43641828168722147, "grad_norm": 0.4760989546775818, "learning_rate": 2.5037366297835716e-05, "loss": 0.2011, "step": 4211 }, { "epoch": 0.4365219193698829, "grad_norm": 0.48700448870658875, "learning_rate": 2.5030868925857976e-05, "loss": 0.1922, "step": 4212 }, { "epoch": 0.4366255570525443, "grad_norm": 0.5479152202606201, "learning_rate": 2.5024370987009748e-05, "loss": 0.2453, "step": 4213 }, { "epoch": 0.4367291947352057, "grad_norm": 0.4179893136024475, "learning_rate": 2.5017872482023208e-05, "loss": 0.1989, "step": 4214 }, { "epoch": 0.43683283241786713, "grad_norm": 0.5843135714530945, "learning_rate": 2.5011373411630598e-05, "loss": 0.2674, "step": 4215 }, { "epoch": 0.43693647010052855, "grad_norm": 0.5585675239562988, "learning_rate": 2.500487377656422e-05, "loss": 0.2292, "step": 4216 }, { "epoch": 0.43704010778318997, "grad_norm": 0.508930504322052, "learning_rate": 2.4998373577556446e-05, "loss": 0.2291, "step": 4217 }, { "epoch": 0.4371437454658514, "grad_norm": 0.5432911515235901, "learning_rate": 2.4991872815339706e-05, "loss": 0.2018, "step": 4218 }, { "epoch": 0.4372473831485128, "grad_norm": 0.5012953877449036, "learning_rate": 2.4985371490646505e-05, "loss": 0.1923, "step": 4219 }, { "epoch": 0.4373510208311742, "grad_norm": 0.4885612726211548, "learning_rate": 2.4978869604209385e-05, "loss": 0.1867, "step": 4220 }, { "epoch": 0.43745465851383564, "grad_norm": 0.5111250877380371, "learning_rate": 2.4972367156760982e-05, "loss": 0.2209, "step": 4221 }, { "epoch": 0.43755829619649705, "grad_norm": 0.49524879455566406, "learning_rate": 2.4965864149033972e-05, "loss": 0.2193, "step": 4222 }, { "epoch": 0.43766193387915847, "grad_norm": 0.5377824306488037, "learning_rate": 2.4959360581761118e-05, "loss": 0.2349, "step": 4223 }, { "epoch": 0.4377655715618199, "grad_norm": 0.44770196080207825, "learning_rate": 2.4952856455675214e-05, "loss": 0.1717, "step": 4224 }, { "epoch": 0.4378692092444813, "grad_norm": 0.6824713349342346, "learning_rate": 2.4946351771509153e-05, "loss": 0.2685, "step": 4225 }, { "epoch": 0.4379728469271427, "grad_norm": 0.533047080039978, "learning_rate": 2.4939846529995858e-05, "loss": 0.2252, "step": 4226 }, { "epoch": 0.43807648460980414, "grad_norm": 0.5649407505989075, "learning_rate": 2.4933340731868342e-05, "loss": 0.2244, "step": 4227 }, { "epoch": 0.43818012229246556, "grad_norm": 0.5743054151535034, "learning_rate": 2.4926834377859646e-05, "loss": 0.2235, "step": 4228 }, { "epoch": 0.438283759975127, "grad_norm": 0.5511937141418457, "learning_rate": 2.4920327468702927e-05, "loss": 0.2072, "step": 4229 }, { "epoch": 0.4383873976577884, "grad_norm": 0.5833871364593506, "learning_rate": 2.4913820005131353e-05, "loss": 0.2507, "step": 4230 }, { "epoch": 0.4384910353404498, "grad_norm": 0.49012407660484314, "learning_rate": 2.4907311987878177e-05, "loss": 0.2389, "step": 4231 }, { "epoch": 0.4385946730231112, "grad_norm": 0.5859277844429016, "learning_rate": 2.4900803417676715e-05, "loss": 0.258, "step": 4232 }, { "epoch": 0.43869831070577264, "grad_norm": 0.55899977684021, "learning_rate": 2.4894294295260344e-05, "loss": 0.2351, "step": 4233 }, { "epoch": 0.43880194838843406, "grad_norm": 0.4472266733646393, "learning_rate": 2.4887784621362498e-05, "loss": 0.1898, "step": 4234 }, { "epoch": 0.4389055860710955, "grad_norm": 0.4899350702762604, "learning_rate": 2.4881274396716687e-05, "loss": 0.2168, "step": 4235 }, { "epoch": 0.43900922375375684, "grad_norm": 0.4696301221847534, "learning_rate": 2.4874763622056453e-05, "loss": 0.1883, "step": 4236 }, { "epoch": 0.43911286143641826, "grad_norm": 0.45876142382621765, "learning_rate": 2.4868252298115437e-05, "loss": 0.1895, "step": 4237 }, { "epoch": 0.4392164991190797, "grad_norm": 0.6082130074501038, "learning_rate": 2.4861740425627323e-05, "loss": 0.2643, "step": 4238 }, { "epoch": 0.4393201368017411, "grad_norm": 0.5665858387947083, "learning_rate": 2.4855228005325854e-05, "loss": 0.2428, "step": 4239 }, { "epoch": 0.4394237744844025, "grad_norm": 0.45706334710121155, "learning_rate": 2.4848715037944836e-05, "loss": 0.1914, "step": 4240 }, { "epoch": 0.4395274121670639, "grad_norm": 0.4469106197357178, "learning_rate": 2.484220152421815e-05, "loss": 0.1838, "step": 4241 }, { "epoch": 0.43963104984972534, "grad_norm": 0.5052451491355896, "learning_rate": 2.483568746487972e-05, "loss": 0.2364, "step": 4242 }, { "epoch": 0.43973468753238676, "grad_norm": 0.5797638893127441, "learning_rate": 2.482917286066355e-05, "loss": 0.2248, "step": 4243 }, { "epoch": 0.4398383252150482, "grad_norm": 0.5379804968833923, "learning_rate": 2.482265771230368e-05, "loss": 0.192, "step": 4244 }, { "epoch": 0.4399419628977096, "grad_norm": 0.5726395845413208, "learning_rate": 2.481614202053425e-05, "loss": 0.2318, "step": 4245 }, { "epoch": 0.440045600580371, "grad_norm": 0.5437195301055908, "learning_rate": 2.4809625786089413e-05, "loss": 0.2094, "step": 4246 }, { "epoch": 0.4401492382630324, "grad_norm": 0.48460930585861206, "learning_rate": 2.4803109009703417e-05, "loss": 0.1963, "step": 4247 }, { "epoch": 0.44025287594569384, "grad_norm": 0.5928154587745667, "learning_rate": 2.479659169211057e-05, "loss": 0.2648, "step": 4248 }, { "epoch": 0.44035651362835526, "grad_norm": 0.5197213292121887, "learning_rate": 2.4790073834045226e-05, "loss": 0.2283, "step": 4249 }, { "epoch": 0.4404601513110167, "grad_norm": 0.5223854780197144, "learning_rate": 2.478355543624181e-05, "loss": 0.199, "step": 4250 }, { "epoch": 0.4405637889936781, "grad_norm": 0.5380105376243591, "learning_rate": 2.4777036499434805e-05, "loss": 0.2177, "step": 4251 }, { "epoch": 0.4406674266763395, "grad_norm": 0.44573792815208435, "learning_rate": 2.477051702435875e-05, "loss": 0.1869, "step": 4252 }, { "epoch": 0.44077106435900093, "grad_norm": 0.5541878938674927, "learning_rate": 2.4763997011748253e-05, "loss": 0.1807, "step": 4253 }, { "epoch": 0.44087470204166235, "grad_norm": 0.43598732352256775, "learning_rate": 2.4757476462337985e-05, "loss": 0.1723, "step": 4254 }, { "epoch": 0.44097833972432376, "grad_norm": 0.5411472320556641, "learning_rate": 2.4750955376862655e-05, "loss": 0.2145, "step": 4255 }, { "epoch": 0.4410819774069852, "grad_norm": 0.43212613463401794, "learning_rate": 2.4744433756057062e-05, "loss": 0.1869, "step": 4256 }, { "epoch": 0.4411856150896466, "grad_norm": 0.560360312461853, "learning_rate": 2.473791160065605e-05, "loss": 0.2129, "step": 4257 }, { "epoch": 0.441289252772308, "grad_norm": 0.5539674162864685, "learning_rate": 2.473138891139452e-05, "loss": 0.2323, "step": 4258 }, { "epoch": 0.44139289045496943, "grad_norm": 0.5044907927513123, "learning_rate": 2.472486568900745e-05, "loss": 0.205, "step": 4259 }, { "epoch": 0.44149652813763085, "grad_norm": 0.4743616282939911, "learning_rate": 2.4718341934229852e-05, "loss": 0.1676, "step": 4260 }, { "epoch": 0.44160016582029227, "grad_norm": 0.47938790917396545, "learning_rate": 2.4711817647796828e-05, "loss": 0.1853, "step": 4261 }, { "epoch": 0.4417038035029537, "grad_norm": 0.5426909327507019, "learning_rate": 2.470529283044351e-05, "loss": 0.2362, "step": 4262 }, { "epoch": 0.4418074411856151, "grad_norm": 0.5705090165138245, "learning_rate": 2.469876748290511e-05, "loss": 0.2277, "step": 4263 }, { "epoch": 0.4419110788682765, "grad_norm": 0.492149293422699, "learning_rate": 2.4692241605916897e-05, "loss": 0.2039, "step": 4264 }, { "epoch": 0.44201471655093794, "grad_norm": 0.5022493004798889, "learning_rate": 2.468571520021419e-05, "loss": 0.193, "step": 4265 }, { "epoch": 0.44211835423359935, "grad_norm": 0.5307921171188354, "learning_rate": 2.467918826653238e-05, "loss": 0.2117, "step": 4266 }, { "epoch": 0.44222199191626077, "grad_norm": 0.5519681572914124, "learning_rate": 2.4672660805606913e-05, "loss": 0.2201, "step": 4267 }, { "epoch": 0.4423256295989222, "grad_norm": 0.5887792706489563, "learning_rate": 2.466613281817329e-05, "loss": 0.2721, "step": 4268 }, { "epoch": 0.4424292672815836, "grad_norm": 0.596578061580658, "learning_rate": 2.4659604304967068e-05, "loss": 0.2425, "step": 4269 }, { "epoch": 0.442532904964245, "grad_norm": 0.5199716091156006, "learning_rate": 2.4653075266723886e-05, "loss": 0.1963, "step": 4270 }, { "epoch": 0.44263654264690644, "grad_norm": 0.46791988611221313, "learning_rate": 2.4646545704179413e-05, "loss": 0.1851, "step": 4271 }, { "epoch": 0.44274018032956786, "grad_norm": 0.4838850796222687, "learning_rate": 2.4640015618069386e-05, "loss": 0.2125, "step": 4272 }, { "epoch": 0.4428438180122293, "grad_norm": 0.5285201072692871, "learning_rate": 2.4633485009129622e-05, "loss": 0.2239, "step": 4273 }, { "epoch": 0.44294745569489063, "grad_norm": 0.5395921468734741, "learning_rate": 2.4626953878095968e-05, "loss": 0.2252, "step": 4274 }, { "epoch": 0.44305109337755205, "grad_norm": 0.5135499238967896, "learning_rate": 2.4620422225704342e-05, "loss": 0.2168, "step": 4275 }, { "epoch": 0.44315473106021347, "grad_norm": 0.5286493897438049, "learning_rate": 2.4613890052690722e-05, "loss": 0.2413, "step": 4276 }, { "epoch": 0.4432583687428749, "grad_norm": 0.5408833622932434, "learning_rate": 2.4607357359791146e-05, "loss": 0.2342, "step": 4277 }, { "epoch": 0.4433620064255363, "grad_norm": 0.620570719242096, "learning_rate": 2.4600824147741698e-05, "loss": 0.2632, "step": 4278 }, { "epoch": 0.4434656441081977, "grad_norm": 0.5322298407554626, "learning_rate": 2.4594290417278542e-05, "loss": 0.2383, "step": 4279 }, { "epoch": 0.44356928179085914, "grad_norm": 0.43045082688331604, "learning_rate": 2.458775616913789e-05, "loss": 0.1629, "step": 4280 }, { "epoch": 0.44367291947352056, "grad_norm": 0.6002359390258789, "learning_rate": 2.4581221404055992e-05, "loss": 0.2368, "step": 4281 }, { "epoch": 0.44377655715618197, "grad_norm": 0.44602683186531067, "learning_rate": 2.4574686122769195e-05, "loss": 0.1859, "step": 4282 }, { "epoch": 0.4438801948388434, "grad_norm": 0.5169301629066467, "learning_rate": 2.4568150326013877e-05, "loss": 0.1988, "step": 4283 }, { "epoch": 0.4439838325215048, "grad_norm": 0.5686281323432922, "learning_rate": 2.456161401452648e-05, "loss": 0.2601, "step": 4284 }, { "epoch": 0.4440874702041662, "grad_norm": 0.5369881391525269, "learning_rate": 2.455507718904351e-05, "loss": 0.227, "step": 4285 }, { "epoch": 0.44419110788682764, "grad_norm": 0.5094032883644104, "learning_rate": 2.4548539850301523e-05, "loss": 0.2082, "step": 4286 }, { "epoch": 0.44429474556948906, "grad_norm": 0.5631538033485413, "learning_rate": 2.4542001999037125e-05, "loss": 0.2334, "step": 4287 }, { "epoch": 0.4443983832521505, "grad_norm": 0.4589706361293793, "learning_rate": 2.4535463635987012e-05, "loss": 0.1805, "step": 4288 }, { "epoch": 0.4445020209348119, "grad_norm": 0.5049294233322144, "learning_rate": 2.4528924761887915e-05, "loss": 0.2093, "step": 4289 }, { "epoch": 0.4446056586174733, "grad_norm": 0.5298270583152771, "learning_rate": 2.4522385377476607e-05, "loss": 0.2439, "step": 4290 }, { "epoch": 0.4447092963001347, "grad_norm": 0.49855804443359375, "learning_rate": 2.4515845483489943e-05, "loss": 0.2057, "step": 4291 }, { "epoch": 0.44481293398279614, "grad_norm": 0.4899182617664337, "learning_rate": 2.4509305080664834e-05, "loss": 0.1818, "step": 4292 }, { "epoch": 0.44491657166545756, "grad_norm": 0.45728784799575806, "learning_rate": 2.4502764169738237e-05, "loss": 0.1793, "step": 4293 }, { "epoch": 0.445020209348119, "grad_norm": 0.534850001335144, "learning_rate": 2.449622275144717e-05, "loss": 0.2487, "step": 4294 }, { "epoch": 0.4451238470307804, "grad_norm": 0.5148128867149353, "learning_rate": 2.448968082652872e-05, "loss": 0.2385, "step": 4295 }, { "epoch": 0.4452274847134418, "grad_norm": 0.5814290642738342, "learning_rate": 2.4483138395720013e-05, "loss": 0.2136, "step": 4296 }, { "epoch": 0.44533112239610323, "grad_norm": 0.46989554166793823, "learning_rate": 2.4476595459758234e-05, "loss": 0.1806, "step": 4297 }, { "epoch": 0.44543476007876465, "grad_norm": 0.5008494853973389, "learning_rate": 2.4470052019380646e-05, "loss": 0.1966, "step": 4298 }, { "epoch": 0.44553839776142606, "grad_norm": 0.5500649809837341, "learning_rate": 2.446350807532454e-05, "loss": 0.259, "step": 4299 }, { "epoch": 0.4456420354440875, "grad_norm": 0.46479493379592896, "learning_rate": 2.4456963628327284e-05, "loss": 0.2102, "step": 4300 }, { "epoch": 0.4457456731267489, "grad_norm": 0.4430640935897827, "learning_rate": 2.445041867912629e-05, "loss": 0.1848, "step": 4301 }, { "epoch": 0.4458493108094103, "grad_norm": 0.48857590556144714, "learning_rate": 2.4443873228459044e-05, "loss": 0.2187, "step": 4302 }, { "epoch": 0.44595294849207173, "grad_norm": 0.5656534433364868, "learning_rate": 2.443732727706307e-05, "loss": 0.2404, "step": 4303 }, { "epoch": 0.44605658617473315, "grad_norm": 0.5321723818778992, "learning_rate": 2.4430780825675952e-05, "loss": 0.235, "step": 4304 }, { "epoch": 0.44616022385739457, "grad_norm": 0.5956742763519287, "learning_rate": 2.4424233875035344e-05, "loss": 0.2373, "step": 4305 }, { "epoch": 0.446263861540056, "grad_norm": 0.43731212615966797, "learning_rate": 2.441768642587894e-05, "loss": 0.1862, "step": 4306 }, { "epoch": 0.4463674992227174, "grad_norm": 0.45064225792884827, "learning_rate": 2.4411138478944488e-05, "loss": 0.2012, "step": 4307 }, { "epoch": 0.4464711369053788, "grad_norm": 0.5171825885772705, "learning_rate": 2.4404590034969822e-05, "loss": 0.2055, "step": 4308 }, { "epoch": 0.44657477458804024, "grad_norm": 0.5186307430267334, "learning_rate": 2.439804109469279e-05, "loss": 0.2207, "step": 4309 }, { "epoch": 0.44667841227070165, "grad_norm": 0.4361405074596405, "learning_rate": 2.4391491658851324e-05, "loss": 0.1691, "step": 4310 }, { "epoch": 0.44678204995336307, "grad_norm": 0.5209653973579407, "learning_rate": 2.4384941728183406e-05, "loss": 0.2176, "step": 4311 }, { "epoch": 0.44688568763602443, "grad_norm": 0.49713316559791565, "learning_rate": 2.4378391303427072e-05, "loss": 0.1889, "step": 4312 }, { "epoch": 0.44698932531868585, "grad_norm": 0.46346545219421387, "learning_rate": 2.4371840385320413e-05, "loss": 0.1594, "step": 4313 }, { "epoch": 0.44709296300134727, "grad_norm": 0.4363242983818054, "learning_rate": 2.436528897460158e-05, "loss": 0.181, "step": 4314 }, { "epoch": 0.4471966006840087, "grad_norm": 0.5040505528450012, "learning_rate": 2.4358737072008763e-05, "loss": 0.2105, "step": 4315 }, { "epoch": 0.4473002383666701, "grad_norm": 0.5211902856826782, "learning_rate": 2.435218467828023e-05, "loss": 0.1993, "step": 4316 }, { "epoch": 0.4474038760493315, "grad_norm": 0.5485498309135437, "learning_rate": 2.4345631794154297e-05, "loss": 0.2324, "step": 4317 }, { "epoch": 0.44750751373199293, "grad_norm": 0.4832819700241089, "learning_rate": 2.4339078420369325e-05, "loss": 0.1938, "step": 4318 }, { "epoch": 0.44761115141465435, "grad_norm": 0.4788733720779419, "learning_rate": 2.433252455766374e-05, "loss": 0.1766, "step": 4319 }, { "epoch": 0.44771478909731577, "grad_norm": 0.5498034358024597, "learning_rate": 2.4325970206776028e-05, "loss": 0.2261, "step": 4320 }, { "epoch": 0.4478184267799772, "grad_norm": 0.5819903016090393, "learning_rate": 2.431941536844472e-05, "loss": 0.2342, "step": 4321 }, { "epoch": 0.4479220644626386, "grad_norm": 0.41093122959136963, "learning_rate": 2.431286004340839e-05, "loss": 0.1647, "step": 4322 }, { "epoch": 0.4480257021453, "grad_norm": 0.5445446968078613, "learning_rate": 2.4306304232405707e-05, "loss": 0.2403, "step": 4323 }, { "epoch": 0.44812933982796144, "grad_norm": 0.4773034453392029, "learning_rate": 2.4299747936175354e-05, "loss": 0.1937, "step": 4324 }, { "epoch": 0.44823297751062285, "grad_norm": 0.6398991942405701, "learning_rate": 2.4293191155456087e-05, "loss": 0.2661, "step": 4325 }, { "epoch": 0.44833661519328427, "grad_norm": 0.46146780252456665, "learning_rate": 2.428663389098672e-05, "loss": 0.1862, "step": 4326 }, { "epoch": 0.4484402528759457, "grad_norm": 0.5381836891174316, "learning_rate": 2.4280076143506103e-05, "loss": 0.2242, "step": 4327 }, { "epoch": 0.4485438905586071, "grad_norm": 0.5368882417678833, "learning_rate": 2.427351791375316e-05, "loss": 0.2284, "step": 4328 }, { "epoch": 0.4486475282412685, "grad_norm": 0.5917689204216003, "learning_rate": 2.4266959202466862e-05, "loss": 0.2463, "step": 4329 }, { "epoch": 0.44875116592392994, "grad_norm": 0.5041471123695374, "learning_rate": 2.426040001038624e-05, "loss": 0.2225, "step": 4330 }, { "epoch": 0.44885480360659136, "grad_norm": 0.5714327096939087, "learning_rate": 2.4253840338250364e-05, "loss": 0.2419, "step": 4331 }, { "epoch": 0.4489584412892528, "grad_norm": 0.5183489918708801, "learning_rate": 2.4247280186798364e-05, "loss": 0.2156, "step": 4332 }, { "epoch": 0.4490620789719142, "grad_norm": 0.68091881275177, "learning_rate": 2.4240719556769446e-05, "loss": 0.2074, "step": 4333 }, { "epoch": 0.4491657166545756, "grad_norm": 0.4969347417354584, "learning_rate": 2.4234158448902835e-05, "loss": 0.2056, "step": 4334 }, { "epoch": 0.449269354337237, "grad_norm": 0.512744128704071, "learning_rate": 2.4227596863937835e-05, "loss": 0.1999, "step": 4335 }, { "epoch": 0.44937299201989844, "grad_norm": 0.49277830123901367, "learning_rate": 2.422103480261379e-05, "loss": 0.1888, "step": 4336 }, { "epoch": 0.44947662970255986, "grad_norm": 0.5266940593719482, "learning_rate": 2.4214472265670105e-05, "loss": 0.2017, "step": 4337 }, { "epoch": 0.4495802673852213, "grad_norm": 0.5924473404884338, "learning_rate": 2.420790925384624e-05, "loss": 0.2185, "step": 4338 }, { "epoch": 0.4496839050678827, "grad_norm": 0.505537748336792, "learning_rate": 2.4201345767881697e-05, "loss": 0.1918, "step": 4339 }, { "epoch": 0.4497875427505441, "grad_norm": 0.5462311506271362, "learning_rate": 2.4194781808516047e-05, "loss": 0.2503, "step": 4340 }, { "epoch": 0.44989118043320553, "grad_norm": 0.5938394665718079, "learning_rate": 2.41882173764889e-05, "loss": 0.2578, "step": 4341 }, { "epoch": 0.44999481811586695, "grad_norm": 0.5128920674324036, "learning_rate": 2.4181652472539937e-05, "loss": 0.2014, "step": 4342 }, { "epoch": 0.45009845579852836, "grad_norm": 0.5374717712402344, "learning_rate": 2.417508709740887e-05, "loss": 0.2188, "step": 4343 }, { "epoch": 0.4502020934811898, "grad_norm": 0.5328230261802673, "learning_rate": 2.4168521251835477e-05, "loss": 0.2324, "step": 4344 }, { "epoch": 0.4503057311638512, "grad_norm": 0.4726838171482086, "learning_rate": 2.416195493655959e-05, "loss": 0.1806, "step": 4345 }, { "epoch": 0.4504093688465126, "grad_norm": 0.4431437849998474, "learning_rate": 2.4155388152321094e-05, "loss": 0.1856, "step": 4346 }, { "epoch": 0.45051300652917403, "grad_norm": 0.4972296357154846, "learning_rate": 2.414882089985992e-05, "loss": 0.1903, "step": 4347 }, { "epoch": 0.45061664421183545, "grad_norm": 0.45771318674087524, "learning_rate": 2.414225317991605e-05, "loss": 0.1796, "step": 4348 }, { "epoch": 0.45072028189449687, "grad_norm": 0.4780783951282501, "learning_rate": 2.4135684993229546e-05, "loss": 0.1904, "step": 4349 }, { "epoch": 0.4508239195771582, "grad_norm": 0.4782392382621765, "learning_rate": 2.4129116340540472e-05, "loss": 0.2062, "step": 4350 }, { "epoch": 0.45092755725981964, "grad_norm": 0.4502820670604706, "learning_rate": 2.4122547222588986e-05, "loss": 0.192, "step": 4351 }, { "epoch": 0.45103119494248106, "grad_norm": 0.5763320922851562, "learning_rate": 2.41159776401153e-05, "loss": 0.2177, "step": 4352 }, { "epoch": 0.4511348326251425, "grad_norm": 0.5172169804573059, "learning_rate": 2.410940759385964e-05, "loss": 0.2476, "step": 4353 }, { "epoch": 0.4512384703078039, "grad_norm": 0.48860692977905273, "learning_rate": 2.410283708456233e-05, "loss": 0.1816, "step": 4354 }, { "epoch": 0.4513421079904653, "grad_norm": 0.4798571765422821, "learning_rate": 2.4096266112963707e-05, "loss": 0.2029, "step": 4355 }, { "epoch": 0.45144574567312673, "grad_norm": 0.5416128039360046, "learning_rate": 2.408969467980419e-05, "loss": 0.2245, "step": 4356 }, { "epoch": 0.45154938335578815, "grad_norm": 0.4884701371192932, "learning_rate": 2.4083122785824236e-05, "loss": 0.1841, "step": 4357 }, { "epoch": 0.45165302103844956, "grad_norm": 0.5009334683418274, "learning_rate": 2.407655043176435e-05, "loss": 0.2266, "step": 4358 }, { "epoch": 0.451756658721111, "grad_norm": 0.49989357590675354, "learning_rate": 2.4069977618365106e-05, "loss": 0.1884, "step": 4359 }, { "epoch": 0.4518602964037724, "grad_norm": 0.5729777216911316, "learning_rate": 2.4063404346367102e-05, "loss": 0.2186, "step": 4360 }, { "epoch": 0.4519639340864338, "grad_norm": 0.5348790287971497, "learning_rate": 2.4056830616511015e-05, "loss": 0.2289, "step": 4361 }, { "epoch": 0.45206757176909523, "grad_norm": 0.53462153673172, "learning_rate": 2.4050256429537565e-05, "loss": 0.2166, "step": 4362 }, { "epoch": 0.45217120945175665, "grad_norm": 0.5014505386352539, "learning_rate": 2.404368178618751e-05, "loss": 0.1993, "step": 4363 }, { "epoch": 0.45227484713441807, "grad_norm": 0.5104119181632996, "learning_rate": 2.4037106687201683e-05, "loss": 0.2259, "step": 4364 }, { "epoch": 0.4523784848170795, "grad_norm": 0.5958729386329651, "learning_rate": 2.4030531133320947e-05, "loss": 0.2364, "step": 4365 }, { "epoch": 0.4524821224997409, "grad_norm": 0.5871717929840088, "learning_rate": 2.4023955125286228e-05, "loss": 0.2327, "step": 4366 }, { "epoch": 0.4525857601824023, "grad_norm": 0.4634658396244049, "learning_rate": 2.40173786638385e-05, "loss": 0.1715, "step": 4367 }, { "epoch": 0.45268939786506374, "grad_norm": 0.5972458124160767, "learning_rate": 2.40108017497188e-05, "loss": 0.2327, "step": 4368 }, { "epoch": 0.45279303554772515, "grad_norm": 0.5323976874351501, "learning_rate": 2.4004224383668183e-05, "loss": 0.2112, "step": 4369 }, { "epoch": 0.45289667323038657, "grad_norm": 0.47007569670677185, "learning_rate": 2.399764656642779e-05, "loss": 0.2046, "step": 4370 }, { "epoch": 0.453000310913048, "grad_norm": 0.5574849843978882, "learning_rate": 2.3991068298738794e-05, "loss": 0.2419, "step": 4371 }, { "epoch": 0.4531039485957094, "grad_norm": 0.5175203680992126, "learning_rate": 2.398448958134243e-05, "loss": 0.2094, "step": 4372 }, { "epoch": 0.4532075862783708, "grad_norm": 0.476266473531723, "learning_rate": 2.397791041497997e-05, "loss": 0.2042, "step": 4373 }, { "epoch": 0.45331122396103224, "grad_norm": 0.41861820220947266, "learning_rate": 2.3971330800392753e-05, "loss": 0.1562, "step": 4374 }, { "epoch": 0.45341486164369366, "grad_norm": 0.5280885696411133, "learning_rate": 2.3964750738322155e-05, "loss": 0.1882, "step": 4375 }, { "epoch": 0.4535184993263551, "grad_norm": 0.6168330311775208, "learning_rate": 2.39581702295096e-05, "loss": 0.2421, "step": 4376 }, { "epoch": 0.4536221370090165, "grad_norm": 0.4856494963169098, "learning_rate": 2.3951589274696586e-05, "loss": 0.1916, "step": 4377 }, { "epoch": 0.4537257746916779, "grad_norm": 0.5814061164855957, "learning_rate": 2.394500787462463e-05, "loss": 0.2429, "step": 4378 }, { "epoch": 0.4538294123743393, "grad_norm": 0.4881944954395294, "learning_rate": 2.393842603003532e-05, "loss": 0.2185, "step": 4379 }, { "epoch": 0.45393305005700074, "grad_norm": 0.515495240688324, "learning_rate": 2.3931843741670283e-05, "loss": 0.1946, "step": 4380 }, { "epoch": 0.45403668773966216, "grad_norm": 0.5544822216033936, "learning_rate": 2.3925261010271212e-05, "loss": 0.2752, "step": 4381 }, { "epoch": 0.4541403254223236, "grad_norm": 0.44878914952278137, "learning_rate": 2.3918677836579828e-05, "loss": 0.1865, "step": 4382 }, { "epoch": 0.454243963104985, "grad_norm": 0.4595224857330322, "learning_rate": 2.391209422133792e-05, "loss": 0.1835, "step": 4383 }, { "epoch": 0.4543476007876464, "grad_norm": 0.5050191283226013, "learning_rate": 2.3905510165287317e-05, "loss": 0.2167, "step": 4384 }, { "epoch": 0.45445123847030783, "grad_norm": 0.497149258852005, "learning_rate": 2.38989256691699e-05, "loss": 0.1899, "step": 4385 }, { "epoch": 0.45455487615296924, "grad_norm": 0.5135137438774109, "learning_rate": 2.3892340733727594e-05, "loss": 0.2113, "step": 4386 }, { "epoch": 0.45465851383563066, "grad_norm": 0.546467661857605, "learning_rate": 2.3885755359702395e-05, "loss": 0.2358, "step": 4387 }, { "epoch": 0.454762151518292, "grad_norm": 0.46558383107185364, "learning_rate": 2.387916954783631e-05, "loss": 0.1788, "step": 4388 }, { "epoch": 0.45486578920095344, "grad_norm": 0.49247148633003235, "learning_rate": 2.387258329887144e-05, "loss": 0.1819, "step": 4389 }, { "epoch": 0.45496942688361486, "grad_norm": 0.5415985584259033, "learning_rate": 2.3865996613549905e-05, "loss": 0.2116, "step": 4390 }, { "epoch": 0.4550730645662763, "grad_norm": 0.5160948038101196, "learning_rate": 2.3859409492613873e-05, "loss": 0.2166, "step": 4391 }, { "epoch": 0.4551767022489377, "grad_norm": 0.5398045182228088, "learning_rate": 2.3852821936805582e-05, "loss": 0.2496, "step": 4392 }, { "epoch": 0.4552803399315991, "grad_norm": 0.46893683075904846, "learning_rate": 2.384623394686731e-05, "loss": 0.2064, "step": 4393 }, { "epoch": 0.4553839776142605, "grad_norm": 0.5869868397712708, "learning_rate": 2.3839645523541376e-05, "loss": 0.2697, "step": 4394 }, { "epoch": 0.45548761529692194, "grad_norm": 0.45383915305137634, "learning_rate": 2.3833056667570146e-05, "loss": 0.1766, "step": 4395 }, { "epoch": 0.45559125297958336, "grad_norm": 0.48213571310043335, "learning_rate": 2.382646737969605e-05, "loss": 0.1922, "step": 4396 }, { "epoch": 0.4556948906622448, "grad_norm": 0.5315578579902649, "learning_rate": 2.381987766066156e-05, "loss": 0.2304, "step": 4397 }, { "epoch": 0.4557985283449062, "grad_norm": 0.5228009223937988, "learning_rate": 2.3813287511209194e-05, "loss": 0.2411, "step": 4398 }, { "epoch": 0.4559021660275676, "grad_norm": 0.6200061440467834, "learning_rate": 2.3806696932081516e-05, "loss": 0.2588, "step": 4399 }, { "epoch": 0.45600580371022903, "grad_norm": 0.5222203731536865, "learning_rate": 2.3800105924021154e-05, "loss": 0.2328, "step": 4400 }, { "epoch": 0.45610944139289045, "grad_norm": 0.5595455765724182, "learning_rate": 2.3793514487770753e-05, "loss": 0.2555, "step": 4401 }, { "epoch": 0.45621307907555186, "grad_norm": 0.4543464779853821, "learning_rate": 2.378692262407304e-05, "loss": 0.1815, "step": 4402 }, { "epoch": 0.4563167167582133, "grad_norm": 0.522218644618988, "learning_rate": 2.378033033367078e-05, "loss": 0.2272, "step": 4403 }, { "epoch": 0.4564203544408747, "grad_norm": 0.5159570574760437, "learning_rate": 2.377373761730677e-05, "loss": 0.2134, "step": 4404 }, { "epoch": 0.4565239921235361, "grad_norm": 0.49490806460380554, "learning_rate": 2.376714447572387e-05, "loss": 0.2054, "step": 4405 }, { "epoch": 0.45662762980619753, "grad_norm": 0.4948660433292389, "learning_rate": 2.3760550909664987e-05, "loss": 0.2206, "step": 4406 }, { "epoch": 0.45673126748885895, "grad_norm": 0.4976104199886322, "learning_rate": 2.3753956919873074e-05, "loss": 0.1837, "step": 4407 }, { "epoch": 0.45683490517152037, "grad_norm": 0.510616660118103, "learning_rate": 2.3747362507091126e-05, "loss": 0.2338, "step": 4408 }, { "epoch": 0.4569385428541818, "grad_norm": 0.6493490934371948, "learning_rate": 2.3740767672062206e-05, "loss": 0.2459, "step": 4409 }, { "epoch": 0.4570421805368432, "grad_norm": 0.533977746963501, "learning_rate": 2.3734172415529394e-05, "loss": 0.2021, "step": 4410 }, { "epoch": 0.4571458182195046, "grad_norm": 0.5535910129547119, "learning_rate": 2.3727576738235838e-05, "loss": 0.2244, "step": 4411 }, { "epoch": 0.45724945590216604, "grad_norm": 0.5072131752967834, "learning_rate": 2.3720980640924733e-05, "loss": 0.2006, "step": 4412 }, { "epoch": 0.45735309358482745, "grad_norm": 0.5280940532684326, "learning_rate": 2.371438412433931e-05, "loss": 0.2347, "step": 4413 }, { "epoch": 0.45745673126748887, "grad_norm": 0.5479831099510193, "learning_rate": 2.370778718922286e-05, "loss": 0.2098, "step": 4414 }, { "epoch": 0.4575603689501503, "grad_norm": 0.47882604598999023, "learning_rate": 2.3701189836318715e-05, "loss": 0.184, "step": 4415 }, { "epoch": 0.4576640066328117, "grad_norm": 0.5942421555519104, "learning_rate": 2.369459206637025e-05, "loss": 0.2398, "step": 4416 }, { "epoch": 0.4577676443154731, "grad_norm": 0.483982652425766, "learning_rate": 2.3687993880120895e-05, "loss": 0.2383, "step": 4417 }, { "epoch": 0.45787128199813454, "grad_norm": 0.5605577826499939, "learning_rate": 2.3681395278314125e-05, "loss": 0.2347, "step": 4418 }, { "epoch": 0.45797491968079596, "grad_norm": 0.49620258808135986, "learning_rate": 2.3674796261693456e-05, "loss": 0.1894, "step": 4419 }, { "epoch": 0.4580785573634574, "grad_norm": 0.5180416107177734, "learning_rate": 2.366819683100246e-05, "loss": 0.213, "step": 4420 }, { "epoch": 0.4581821950461188, "grad_norm": 0.5405642986297607, "learning_rate": 2.366159698698474e-05, "loss": 0.1836, "step": 4421 }, { "epoch": 0.4582858327287802, "grad_norm": 0.5185651183128357, "learning_rate": 2.365499673038397e-05, "loss": 0.2256, "step": 4422 }, { "epoch": 0.4583894704114416, "grad_norm": 0.5593555569648743, "learning_rate": 2.364839606194385e-05, "loss": 0.2403, "step": 4423 }, { "epoch": 0.45849310809410304, "grad_norm": 0.49372783303260803, "learning_rate": 2.3641794982408133e-05, "loss": 0.2145, "step": 4424 }, { "epoch": 0.45859674577676446, "grad_norm": 0.4593977630138397, "learning_rate": 2.3635193492520617e-05, "loss": 0.1916, "step": 4425 }, { "epoch": 0.4587003834594258, "grad_norm": 0.5412962436676025, "learning_rate": 2.362859159302515e-05, "loss": 0.2149, "step": 4426 }, { "epoch": 0.45880402114208724, "grad_norm": 0.4642784595489502, "learning_rate": 2.3621989284665617e-05, "loss": 0.191, "step": 4427 }, { "epoch": 0.45890765882474865, "grad_norm": 0.4705636203289032, "learning_rate": 2.3615386568185973e-05, "loss": 0.1854, "step": 4428 }, { "epoch": 0.45901129650741007, "grad_norm": 0.5116401314735413, "learning_rate": 2.3608783444330184e-05, "loss": 0.2054, "step": 4429 }, { "epoch": 0.4591149341900715, "grad_norm": 0.4768635332584381, "learning_rate": 2.3602179913842286e-05, "loss": 0.1735, "step": 4430 }, { "epoch": 0.4592185718727329, "grad_norm": 0.524996280670166, "learning_rate": 2.3595575977466355e-05, "loss": 0.2116, "step": 4431 }, { "epoch": 0.4593222095553943, "grad_norm": 0.5416904091835022, "learning_rate": 2.3588971635946517e-05, "loss": 0.2347, "step": 4432 }, { "epoch": 0.45942584723805574, "grad_norm": 0.5637031197547913, "learning_rate": 2.358236689002693e-05, "loss": 0.2178, "step": 4433 }, { "epoch": 0.45952948492071716, "grad_norm": 0.5045896172523499, "learning_rate": 2.357576174045181e-05, "loss": 0.2282, "step": 4434 }, { "epoch": 0.4596331226033786, "grad_norm": 0.48037102818489075, "learning_rate": 2.3569156187965418e-05, "loss": 0.1931, "step": 4435 }, { "epoch": 0.45973676028604, "grad_norm": 0.4950512945652008, "learning_rate": 2.3562550233312054e-05, "loss": 0.1801, "step": 4436 }, { "epoch": 0.4598403979687014, "grad_norm": 0.5815441608428955, "learning_rate": 2.355594387723607e-05, "loss": 0.2448, "step": 4437 }, { "epoch": 0.4599440356513628, "grad_norm": 0.5947690010070801, "learning_rate": 2.3549337120481858e-05, "loss": 0.2311, "step": 4438 }, { "epoch": 0.46004767333402424, "grad_norm": 0.5021811723709106, "learning_rate": 2.3542729963793854e-05, "loss": 0.2134, "step": 4439 }, { "epoch": 0.46015131101668566, "grad_norm": 0.5300816893577576, "learning_rate": 2.353612240791655e-05, "loss": 0.2169, "step": 4440 }, { "epoch": 0.4602549486993471, "grad_norm": 0.5285887718200684, "learning_rate": 2.3529514453594465e-05, "loss": 0.2223, "step": 4441 }, { "epoch": 0.4603585863820085, "grad_norm": 0.4838586747646332, "learning_rate": 2.3522906101572174e-05, "loss": 0.2023, "step": 4442 }, { "epoch": 0.4604622240646699, "grad_norm": 0.4455113708972931, "learning_rate": 2.351629735259431e-05, "loss": 0.2087, "step": 4443 }, { "epoch": 0.46056586174733133, "grad_norm": 0.6215282082557678, "learning_rate": 2.350968820740552e-05, "loss": 0.2205, "step": 4444 }, { "epoch": 0.46066949942999275, "grad_norm": 0.5159878730773926, "learning_rate": 2.3503078666750518e-05, "loss": 0.2092, "step": 4445 }, { "epoch": 0.46077313711265416, "grad_norm": 0.5394431948661804, "learning_rate": 2.349646873137406e-05, "loss": 0.2548, "step": 4446 }, { "epoch": 0.4608767747953156, "grad_norm": 0.5611411333084106, "learning_rate": 2.348985840202094e-05, "loss": 0.2058, "step": 4447 }, { "epoch": 0.460980412477977, "grad_norm": 0.5420775413513184, "learning_rate": 2.3483247679436004e-05, "loss": 0.2025, "step": 4448 }, { "epoch": 0.4610840501606384, "grad_norm": 0.5555642247200012, "learning_rate": 2.3476636564364128e-05, "loss": 0.2105, "step": 4449 }, { "epoch": 0.46118768784329983, "grad_norm": 0.48131993412971497, "learning_rate": 2.3470025057550253e-05, "loss": 0.1676, "step": 4450 }, { "epoch": 0.46129132552596125, "grad_norm": 0.5748143196105957, "learning_rate": 2.346341315973935e-05, "loss": 0.2159, "step": 4451 }, { "epoch": 0.46139496320862267, "grad_norm": 0.4542897343635559, "learning_rate": 2.3456800871676428e-05, "loss": 0.1623, "step": 4452 }, { "epoch": 0.4614986008912841, "grad_norm": 0.532109260559082, "learning_rate": 2.345018819410657e-05, "loss": 0.228, "step": 4453 }, { "epoch": 0.4616022385739455, "grad_norm": 0.49552032351493835, "learning_rate": 2.344357512777486e-05, "loss": 0.1974, "step": 4454 }, { "epoch": 0.4617058762566069, "grad_norm": 0.5018864274024963, "learning_rate": 2.3436961673426456e-05, "loss": 0.2239, "step": 4455 }, { "epoch": 0.46180951393926833, "grad_norm": 0.6501008868217468, "learning_rate": 2.3430347831806565e-05, "loss": 0.2513, "step": 4456 }, { "epoch": 0.46191315162192975, "grad_norm": 0.5242273211479187, "learning_rate": 2.3423733603660406e-05, "loss": 0.196, "step": 4457 }, { "epoch": 0.46201678930459117, "grad_norm": 0.4985031485557556, "learning_rate": 2.3417118989733265e-05, "loss": 0.2189, "step": 4458 }, { "epoch": 0.4621204269872526, "grad_norm": 0.4776060879230499, "learning_rate": 2.3410503990770468e-05, "loss": 0.1906, "step": 4459 }, { "epoch": 0.462224064669914, "grad_norm": 0.5783148407936096, "learning_rate": 2.3403888607517385e-05, "loss": 0.235, "step": 4460 }, { "epoch": 0.4623277023525754, "grad_norm": 0.5404558777809143, "learning_rate": 2.3397272840719425e-05, "loss": 0.2144, "step": 4461 }, { "epoch": 0.46243134003523684, "grad_norm": 0.5489241480827332, "learning_rate": 2.339065669112204e-05, "loss": 0.2439, "step": 4462 }, { "epoch": 0.46253497771789825, "grad_norm": 0.5944549441337585, "learning_rate": 2.3384040159470738e-05, "loss": 0.2608, "step": 4463 }, { "epoch": 0.4626386154005596, "grad_norm": 0.5333137512207031, "learning_rate": 2.3377423246511047e-05, "loss": 0.2113, "step": 4464 }, { "epoch": 0.46274225308322103, "grad_norm": 0.5300441980361938, "learning_rate": 2.3370805952988546e-05, "loss": 0.2222, "step": 4465 }, { "epoch": 0.46284589076588245, "grad_norm": 0.4752691984176636, "learning_rate": 2.3364188279648886e-05, "loss": 0.1833, "step": 4466 }, { "epoch": 0.46294952844854387, "grad_norm": 0.5182095766067505, "learning_rate": 2.3357570227237712e-05, "loss": 0.2192, "step": 4467 }, { "epoch": 0.4630531661312053, "grad_norm": 0.49979159235954285, "learning_rate": 2.3350951796500744e-05, "loss": 0.2226, "step": 4468 }, { "epoch": 0.4631568038138667, "grad_norm": 0.5155590772628784, "learning_rate": 2.334433298818374e-05, "loss": 0.2073, "step": 4469 }, { "epoch": 0.4632604414965281, "grad_norm": 0.5436052680015564, "learning_rate": 2.3337713803032487e-05, "loss": 0.1981, "step": 4470 }, { "epoch": 0.46336407917918954, "grad_norm": 0.5005167126655579, "learning_rate": 2.3331094241792834e-05, "loss": 0.1916, "step": 4471 }, { "epoch": 0.46346771686185095, "grad_norm": 0.5140410661697388, "learning_rate": 2.3324474305210666e-05, "loss": 0.1918, "step": 4472 }, { "epoch": 0.46357135454451237, "grad_norm": 0.5105440616607666, "learning_rate": 2.3317853994031897e-05, "loss": 0.2205, "step": 4473 }, { "epoch": 0.4636749922271738, "grad_norm": 0.47799333930015564, "learning_rate": 2.3311233309002493e-05, "loss": 0.2034, "step": 4474 }, { "epoch": 0.4637786299098352, "grad_norm": 0.48624324798583984, "learning_rate": 2.3304612250868472e-05, "loss": 0.1824, "step": 4475 }, { "epoch": 0.4638822675924966, "grad_norm": 0.4912174344062805, "learning_rate": 2.329799082037588e-05, "loss": 0.2126, "step": 4476 }, { "epoch": 0.46398590527515804, "grad_norm": 0.5111244916915894, "learning_rate": 2.329136901827081e-05, "loss": 0.1873, "step": 4477 }, { "epoch": 0.46408954295781946, "grad_norm": 0.5195016264915466, "learning_rate": 2.3284746845299396e-05, "loss": 0.2154, "step": 4478 }, { "epoch": 0.4641931806404809, "grad_norm": 0.5187585949897766, "learning_rate": 2.3278124302207812e-05, "loss": 0.2148, "step": 4479 }, { "epoch": 0.4642968183231423, "grad_norm": 0.5739063024520874, "learning_rate": 2.3271501389742273e-05, "loss": 0.2576, "step": 4480 }, { "epoch": 0.4644004560058037, "grad_norm": 0.44460976123809814, "learning_rate": 2.3264878108649046e-05, "loss": 0.1872, "step": 4481 }, { "epoch": 0.4645040936884651, "grad_norm": 0.5364735126495361, "learning_rate": 2.3258254459674438e-05, "loss": 0.222, "step": 4482 }, { "epoch": 0.46460773137112654, "grad_norm": 0.5735208988189697, "learning_rate": 2.3251630443564773e-05, "loss": 0.2206, "step": 4483 }, { "epoch": 0.46471136905378796, "grad_norm": 0.5087382197380066, "learning_rate": 2.3245006061066446e-05, "loss": 0.2137, "step": 4484 }, { "epoch": 0.4648150067364494, "grad_norm": 0.5853786468505859, "learning_rate": 2.323838131292588e-05, "loss": 0.2365, "step": 4485 }, { "epoch": 0.4649186444191108, "grad_norm": 0.5093985199928284, "learning_rate": 2.323175619988954e-05, "loss": 0.1947, "step": 4486 }, { "epoch": 0.4650222821017722, "grad_norm": 0.5494992733001709, "learning_rate": 2.322513072270394e-05, "loss": 0.2159, "step": 4487 }, { "epoch": 0.46512591978443363, "grad_norm": 0.4877338111400604, "learning_rate": 2.3218504882115624e-05, "loss": 0.1989, "step": 4488 }, { "epoch": 0.46522955746709505, "grad_norm": 0.4691953659057617, "learning_rate": 2.321187867887118e-05, "loss": 0.167, "step": 4489 }, { "epoch": 0.46533319514975646, "grad_norm": 0.5337307453155518, "learning_rate": 2.3205252113717234e-05, "loss": 0.2049, "step": 4490 }, { "epoch": 0.4654368328324179, "grad_norm": 0.5148242712020874, "learning_rate": 2.3198625187400473e-05, "loss": 0.218, "step": 4491 }, { "epoch": 0.4655404705150793, "grad_norm": 0.5255232453346252, "learning_rate": 2.3191997900667588e-05, "loss": 0.1783, "step": 4492 }, { "epoch": 0.4656441081977407, "grad_norm": 0.6174560189247131, "learning_rate": 2.3185370254265343e-05, "loss": 0.2203, "step": 4493 }, { "epoch": 0.46574774588040213, "grad_norm": 0.39398008584976196, "learning_rate": 2.3178742248940534e-05, "loss": 0.1436, "step": 4494 }, { "epoch": 0.46585138356306355, "grad_norm": 0.5350342392921448, "learning_rate": 2.317211388543999e-05, "loss": 0.2203, "step": 4495 }, { "epoch": 0.46595502124572497, "grad_norm": 0.5608664751052856, "learning_rate": 2.3165485164510582e-05, "loss": 0.1973, "step": 4496 }, { "epoch": 0.4660586589283864, "grad_norm": 0.5720195770263672, "learning_rate": 2.3158856086899223e-05, "loss": 0.2351, "step": 4497 }, { "epoch": 0.4661622966110478, "grad_norm": 0.42739763855934143, "learning_rate": 2.315222665335288e-05, "loss": 0.183, "step": 4498 }, { "epoch": 0.4662659342937092, "grad_norm": 0.537451446056366, "learning_rate": 2.3145596864618534e-05, "loss": 0.2143, "step": 4499 }, { "epoch": 0.46636957197637063, "grad_norm": 0.554932713508606, "learning_rate": 2.3138966721443213e-05, "loss": 0.2185, "step": 4500 }, { "epoch": 0.46647320965903205, "grad_norm": 0.5975545048713684, "learning_rate": 2.3132336224574015e-05, "loss": 0.2352, "step": 4501 }, { "epoch": 0.4665768473416934, "grad_norm": 0.604669451713562, "learning_rate": 2.3125705374758037e-05, "loss": 0.2296, "step": 4502 }, { "epoch": 0.46668048502435483, "grad_norm": 0.493513286113739, "learning_rate": 2.3119074172742435e-05, "loss": 0.1878, "step": 4503 }, { "epoch": 0.46678412270701625, "grad_norm": 0.5445199608802795, "learning_rate": 2.3112442619274408e-05, "loss": 0.2184, "step": 4504 }, { "epoch": 0.46688776038967766, "grad_norm": 0.5227720737457275, "learning_rate": 2.3105810715101175e-05, "loss": 0.203, "step": 4505 }, { "epoch": 0.4669913980723391, "grad_norm": 0.5246191024780273, "learning_rate": 2.3099178460970025e-05, "loss": 0.1861, "step": 4506 }, { "epoch": 0.4670950357550005, "grad_norm": 0.5684474110603333, "learning_rate": 2.3092545857628265e-05, "loss": 0.2275, "step": 4507 }, { "epoch": 0.4671986734376619, "grad_norm": 0.49848929047584534, "learning_rate": 2.3085912905823246e-05, "loss": 0.18, "step": 4508 }, { "epoch": 0.46730231112032333, "grad_norm": 0.5276935696601868, "learning_rate": 2.3079279606302355e-05, "loss": 0.1834, "step": 4509 }, { "epoch": 0.46740594880298475, "grad_norm": 0.5134925842285156, "learning_rate": 2.3072645959813026e-05, "loss": 0.1973, "step": 4510 }, { "epoch": 0.46750958648564617, "grad_norm": 0.5260006785392761, "learning_rate": 2.3066011967102723e-05, "loss": 0.2145, "step": 4511 }, { "epoch": 0.4676132241683076, "grad_norm": 0.511784553527832, "learning_rate": 2.3059377628918963e-05, "loss": 0.2012, "step": 4512 }, { "epoch": 0.467716861850969, "grad_norm": 0.5256336331367493, "learning_rate": 2.3052742946009284e-05, "loss": 0.2149, "step": 4513 }, { "epoch": 0.4678204995336304, "grad_norm": 0.5943604111671448, "learning_rate": 2.3046107919121284e-05, "loss": 0.2157, "step": 4514 }, { "epoch": 0.46792413721629184, "grad_norm": 0.47138938307762146, "learning_rate": 2.3039472549002567e-05, "loss": 0.1846, "step": 4515 }, { "epoch": 0.46802777489895325, "grad_norm": 0.4793859124183655, "learning_rate": 2.3032836836400816e-05, "loss": 0.1732, "step": 4516 }, { "epoch": 0.46813141258161467, "grad_norm": 0.47348594665527344, "learning_rate": 2.3026200782063724e-05, "loss": 0.2121, "step": 4517 }, { "epoch": 0.4682350502642761, "grad_norm": 0.49051177501678467, "learning_rate": 2.301956438673903e-05, "loss": 0.2176, "step": 4518 }, { "epoch": 0.4683386879469375, "grad_norm": 0.49050796031951904, "learning_rate": 2.301292765117451e-05, "loss": 0.1984, "step": 4519 }, { "epoch": 0.4684423256295989, "grad_norm": 0.48881155252456665, "learning_rate": 2.3006290576117993e-05, "loss": 0.1835, "step": 4520 }, { "epoch": 0.46854596331226034, "grad_norm": 0.5071969628334045, "learning_rate": 2.2999653162317324e-05, "loss": 0.2173, "step": 4521 }, { "epoch": 0.46864960099492176, "grad_norm": 0.5467783808708191, "learning_rate": 2.29930154105204e-05, "loss": 0.2094, "step": 4522 }, { "epoch": 0.4687532386775832, "grad_norm": 0.4916337728500366, "learning_rate": 2.298637732147516e-05, "loss": 0.1885, "step": 4523 }, { "epoch": 0.4688568763602446, "grad_norm": 0.5077255964279175, "learning_rate": 2.2979738895929557e-05, "loss": 0.2174, "step": 4524 }, { "epoch": 0.468960514042906, "grad_norm": 0.5012960433959961, "learning_rate": 2.2973100134631606e-05, "loss": 0.2026, "step": 4525 }, { "epoch": 0.4690641517255674, "grad_norm": 0.4907589256763458, "learning_rate": 2.2966461038329363e-05, "loss": 0.1988, "step": 4526 }, { "epoch": 0.46916778940822884, "grad_norm": 0.4194316565990448, "learning_rate": 2.295982160777089e-05, "loss": 0.1829, "step": 4527 }, { "epoch": 0.46927142709089026, "grad_norm": 0.5632020235061646, "learning_rate": 2.295318184370433e-05, "loss": 0.2158, "step": 4528 }, { "epoch": 0.4693750647735517, "grad_norm": 0.6058095693588257, "learning_rate": 2.294654174687782e-05, "loss": 0.2683, "step": 4529 }, { "epoch": 0.4694787024562131, "grad_norm": 0.5311865210533142, "learning_rate": 2.2939901318039574e-05, "loss": 0.2035, "step": 4530 }, { "epoch": 0.4695823401388745, "grad_norm": 0.4692043960094452, "learning_rate": 2.2933260557937817e-05, "loss": 0.2069, "step": 4531 }, { "epoch": 0.4696859778215359, "grad_norm": 0.5152689218521118, "learning_rate": 2.292661946732082e-05, "loss": 0.234, "step": 4532 }, { "epoch": 0.46978961550419734, "grad_norm": 0.4731234312057495, "learning_rate": 2.291997804693689e-05, "loss": 0.1788, "step": 4533 }, { "epoch": 0.46989325318685876, "grad_norm": 0.4742664694786072, "learning_rate": 2.291333629753437e-05, "loss": 0.1968, "step": 4534 }, { "epoch": 0.4699968908695202, "grad_norm": 0.46158522367477417, "learning_rate": 2.290669421986165e-05, "loss": 0.1797, "step": 4535 }, { "epoch": 0.4701005285521816, "grad_norm": 0.5073385834693909, "learning_rate": 2.290005181466714e-05, "loss": 0.217, "step": 4536 }, { "epoch": 0.470204166234843, "grad_norm": 0.6219344735145569, "learning_rate": 2.2893409082699304e-05, "loss": 0.256, "step": 4537 }, { "epoch": 0.47030780391750443, "grad_norm": 0.5234584212303162, "learning_rate": 2.2886766024706626e-05, "loss": 0.1764, "step": 4538 }, { "epoch": 0.47041144160016585, "grad_norm": 0.4554639160633087, "learning_rate": 2.2880122641437642e-05, "loss": 0.1643, "step": 4539 }, { "epoch": 0.4705150792828272, "grad_norm": 0.4849519729614258, "learning_rate": 2.2873478933640918e-05, "loss": 0.2102, "step": 4540 }, { "epoch": 0.4706187169654886, "grad_norm": 0.5174660682678223, "learning_rate": 2.286683490206505e-05, "loss": 0.2159, "step": 4541 }, { "epoch": 0.47072235464815004, "grad_norm": 0.48189786076545715, "learning_rate": 2.286019054745869e-05, "loss": 0.1991, "step": 4542 }, { "epoch": 0.47082599233081146, "grad_norm": 0.5426444411277771, "learning_rate": 2.2853545870570496e-05, "loss": 0.2493, "step": 4543 }, { "epoch": 0.4709296300134729, "grad_norm": 0.5645356178283691, "learning_rate": 2.2846900872149188e-05, "loss": 0.2221, "step": 4544 }, { "epoch": 0.4710332676961343, "grad_norm": 0.45435675978660583, "learning_rate": 2.2840255552943527e-05, "loss": 0.1806, "step": 4545 }, { "epoch": 0.4711369053787957, "grad_norm": 0.5457807183265686, "learning_rate": 2.2833609913702276e-05, "loss": 0.1892, "step": 4546 }, { "epoch": 0.47124054306145713, "grad_norm": 0.551639199256897, "learning_rate": 2.2826963955174266e-05, "loss": 0.1931, "step": 4547 }, { "epoch": 0.47134418074411855, "grad_norm": 0.4076830744743347, "learning_rate": 2.282031767810836e-05, "loss": 0.1541, "step": 4548 }, { "epoch": 0.47144781842677996, "grad_norm": 0.4333003759384155, "learning_rate": 2.281367108325343e-05, "loss": 0.1656, "step": 4549 }, { "epoch": 0.4715514561094414, "grad_norm": 0.48861321806907654, "learning_rate": 2.2807024171358424e-05, "loss": 0.194, "step": 4550 }, { "epoch": 0.4716550937921028, "grad_norm": 0.4885448217391968, "learning_rate": 2.28003769431723e-05, "loss": 0.2106, "step": 4551 }, { "epoch": 0.4717587314747642, "grad_norm": 0.5283402800559998, "learning_rate": 2.2793729399444052e-05, "loss": 0.2272, "step": 4552 }, { "epoch": 0.47186236915742563, "grad_norm": 0.48492392897605896, "learning_rate": 2.2787081540922716e-05, "loss": 0.1947, "step": 4553 }, { "epoch": 0.47196600684008705, "grad_norm": 0.4583858847618103, "learning_rate": 2.2780433368357366e-05, "loss": 0.1907, "step": 4554 }, { "epoch": 0.47206964452274847, "grad_norm": 0.5057693123817444, "learning_rate": 2.2773784882497104e-05, "loss": 0.1971, "step": 4555 }, { "epoch": 0.4721732822054099, "grad_norm": 0.5850982666015625, "learning_rate": 2.2767136084091076e-05, "loss": 0.2276, "step": 4556 }, { "epoch": 0.4722769198880713, "grad_norm": 0.5325038433074951, "learning_rate": 2.2760486973888452e-05, "loss": 0.2018, "step": 4557 }, { "epoch": 0.4723805575707327, "grad_norm": 0.5394957065582275, "learning_rate": 2.275383755263846e-05, "loss": 0.223, "step": 4558 }, { "epoch": 0.47248419525339413, "grad_norm": 0.4816633462905884, "learning_rate": 2.274718782109032e-05, "loss": 0.2213, "step": 4559 }, { "epoch": 0.47258783293605555, "grad_norm": 0.48285600543022156, "learning_rate": 2.274053777999333e-05, "loss": 0.1915, "step": 4560 }, { "epoch": 0.47269147061871697, "grad_norm": 0.5504255890846252, "learning_rate": 2.273388743009681e-05, "loss": 0.2267, "step": 4561 }, { "epoch": 0.4727951083013784, "grad_norm": 0.5120184421539307, "learning_rate": 2.2727236772150095e-05, "loss": 0.2017, "step": 4562 }, { "epoch": 0.4728987459840398, "grad_norm": 0.5613784193992615, "learning_rate": 2.2720585806902582e-05, "loss": 0.2139, "step": 4563 }, { "epoch": 0.4730023836667012, "grad_norm": 0.6070305109024048, "learning_rate": 2.2713934535103692e-05, "loss": 0.2626, "step": 4564 }, { "epoch": 0.47310602134936264, "grad_norm": 0.5143283009529114, "learning_rate": 2.2707282957502875e-05, "loss": 0.1937, "step": 4565 }, { "epoch": 0.47320965903202405, "grad_norm": 0.545512318611145, "learning_rate": 2.2700631074849624e-05, "loss": 0.218, "step": 4566 }, { "epoch": 0.47331329671468547, "grad_norm": 0.5820977091789246, "learning_rate": 2.2693978887893467e-05, "loss": 0.2178, "step": 4567 }, { "epoch": 0.4734169343973469, "grad_norm": 0.6015923023223877, "learning_rate": 2.2687326397383952e-05, "loss": 0.2258, "step": 4568 }, { "epoch": 0.4735205720800083, "grad_norm": 0.5356124043464661, "learning_rate": 2.2680673604070675e-05, "loss": 0.2095, "step": 4569 }, { "epoch": 0.4736242097626697, "grad_norm": 0.43398767709732056, "learning_rate": 2.2674020508703266e-05, "loss": 0.1709, "step": 4570 }, { "epoch": 0.47372784744533114, "grad_norm": 0.5021622180938721, "learning_rate": 2.2667367112031382e-05, "loss": 0.1984, "step": 4571 }, { "epoch": 0.47383148512799256, "grad_norm": 0.4682827293872833, "learning_rate": 2.2660713414804713e-05, "loss": 0.1612, "step": 4572 }, { "epoch": 0.473935122810654, "grad_norm": 0.530903697013855, "learning_rate": 2.2654059417773e-05, "loss": 0.22, "step": 4573 }, { "epoch": 0.4740387604933154, "grad_norm": 0.5349231362342834, "learning_rate": 2.2647405121685996e-05, "loss": 0.2349, "step": 4574 }, { "epoch": 0.4741423981759768, "grad_norm": 0.5128585696220398, "learning_rate": 2.2640750527293495e-05, "loss": 0.2182, "step": 4575 }, { "epoch": 0.4742460358586382, "grad_norm": 0.5996513962745667, "learning_rate": 2.2634095635345333e-05, "loss": 0.2495, "step": 4576 }, { "epoch": 0.47434967354129964, "grad_norm": 0.5359048843383789, "learning_rate": 2.262744044659137e-05, "loss": 0.2267, "step": 4577 }, { "epoch": 0.474453311223961, "grad_norm": 0.5653430223464966, "learning_rate": 2.2620784961781502e-05, "loss": 0.2158, "step": 4578 }, { "epoch": 0.4745569489066224, "grad_norm": 0.5262527465820312, "learning_rate": 2.261412918166565e-05, "loss": 0.2212, "step": 4579 }, { "epoch": 0.47466058658928384, "grad_norm": 0.4911423623561859, "learning_rate": 2.2607473106993796e-05, "loss": 0.2193, "step": 4580 }, { "epoch": 0.47476422427194526, "grad_norm": 0.5154918432235718, "learning_rate": 2.2600816738515924e-05, "loss": 0.1967, "step": 4581 }, { "epoch": 0.4748678619546067, "grad_norm": 0.4786593019962311, "learning_rate": 2.2594160076982063e-05, "loss": 0.1907, "step": 4582 }, { "epoch": 0.4749714996372681, "grad_norm": 0.6081719398498535, "learning_rate": 2.2587503123142282e-05, "loss": 0.2068, "step": 4583 }, { "epoch": 0.4750751373199295, "grad_norm": 0.5504110455513, "learning_rate": 2.2580845877746662e-05, "loss": 0.2328, "step": 4584 }, { "epoch": 0.4751787750025909, "grad_norm": 0.5487823486328125, "learning_rate": 2.2574188341545343e-05, "loss": 0.2313, "step": 4585 }, { "epoch": 0.47528241268525234, "grad_norm": 0.49996358156204224, "learning_rate": 2.256753051528849e-05, "loss": 0.1872, "step": 4586 }, { "epoch": 0.47538605036791376, "grad_norm": 0.5519025325775146, "learning_rate": 2.2560872399726286e-05, "loss": 0.2212, "step": 4587 }, { "epoch": 0.4754896880505752, "grad_norm": 0.5117783546447754, "learning_rate": 2.255421399560896e-05, "loss": 0.2141, "step": 4588 }, { "epoch": 0.4755933257332366, "grad_norm": 0.5507332682609558, "learning_rate": 2.2547555303686774e-05, "loss": 0.2443, "step": 4589 }, { "epoch": 0.475696963415898, "grad_norm": 0.5101597309112549, "learning_rate": 2.2540896324710015e-05, "loss": 0.2159, "step": 4590 }, { "epoch": 0.47580060109855943, "grad_norm": 0.5492226481437683, "learning_rate": 2.2534237059429004e-05, "loss": 0.2243, "step": 4591 }, { "epoch": 0.47590423878122085, "grad_norm": 0.4963025450706482, "learning_rate": 2.2527577508594107e-05, "loss": 0.1767, "step": 4592 }, { "epoch": 0.47600787646388226, "grad_norm": 0.5667779445648193, "learning_rate": 2.2520917672955706e-05, "loss": 0.2069, "step": 4593 }, { "epoch": 0.4761115141465437, "grad_norm": 0.5749843120574951, "learning_rate": 2.2514257553264213e-05, "loss": 0.2216, "step": 4594 }, { "epoch": 0.4762151518292051, "grad_norm": 0.5432445406913757, "learning_rate": 2.25075971502701e-05, "loss": 0.2001, "step": 4595 }, { "epoch": 0.4763187895118665, "grad_norm": 0.43292057514190674, "learning_rate": 2.2500936464723825e-05, "loss": 0.1596, "step": 4596 }, { "epoch": 0.47642242719452793, "grad_norm": 0.4687357246875763, "learning_rate": 2.2494275497375925e-05, "loss": 0.1798, "step": 4597 }, { "epoch": 0.47652606487718935, "grad_norm": 0.41464748978614807, "learning_rate": 2.2487614248976932e-05, "loss": 0.1577, "step": 4598 }, { "epoch": 0.47662970255985077, "grad_norm": 0.5634739995002747, "learning_rate": 2.2480952720277437e-05, "loss": 0.2338, "step": 4599 }, { "epoch": 0.4767333402425122, "grad_norm": 0.6048978567123413, "learning_rate": 2.247429091202805e-05, "loss": 0.2468, "step": 4600 }, { "epoch": 0.4768369779251736, "grad_norm": 0.42859944701194763, "learning_rate": 2.2467628824979402e-05, "loss": 0.17, "step": 4601 }, { "epoch": 0.476940615607835, "grad_norm": 0.5351769328117371, "learning_rate": 2.2460966459882184e-05, "loss": 0.2032, "step": 4602 }, { "epoch": 0.47704425329049643, "grad_norm": 0.4877007305622101, "learning_rate": 2.245430381748708e-05, "loss": 0.1612, "step": 4603 }, { "epoch": 0.47714789097315785, "grad_norm": 0.5551847219467163, "learning_rate": 2.244764089854484e-05, "loss": 0.2231, "step": 4604 }, { "epoch": 0.47725152865581927, "grad_norm": 0.46438121795654297, "learning_rate": 2.2440977703806237e-05, "loss": 0.1837, "step": 4605 }, { "epoch": 0.4773551663384807, "grad_norm": 0.48726150393486023, "learning_rate": 2.2434314234022052e-05, "loss": 0.1802, "step": 4606 }, { "epoch": 0.4774588040211421, "grad_norm": 0.5019385814666748, "learning_rate": 2.2427650489943124e-05, "loss": 0.2204, "step": 4607 }, { "epoch": 0.4775624417038035, "grad_norm": 0.4917573034763336, "learning_rate": 2.2420986472320312e-05, "loss": 0.1845, "step": 4608 }, { "epoch": 0.47766607938646494, "grad_norm": 0.5490579605102539, "learning_rate": 2.241432218190451e-05, "loss": 0.2184, "step": 4609 }, { "epoch": 0.47776971706912635, "grad_norm": 0.5504197478294373, "learning_rate": 2.2407657619446637e-05, "loss": 0.2286, "step": 4610 }, { "epoch": 0.47787335475178777, "grad_norm": 0.5806339383125305, "learning_rate": 2.240099278569765e-05, "loss": 0.2198, "step": 4611 }, { "epoch": 0.4779769924344492, "grad_norm": 0.5220992565155029, "learning_rate": 2.2394327681408527e-05, "loss": 0.2153, "step": 4612 }, { "epoch": 0.4780806301171106, "grad_norm": 0.5559518337249756, "learning_rate": 2.238766230733028e-05, "loss": 0.2228, "step": 4613 }, { "epoch": 0.478184267799772, "grad_norm": 0.5041811466217041, "learning_rate": 2.2380996664213957e-05, "loss": 0.2144, "step": 4614 }, { "epoch": 0.47828790548243344, "grad_norm": 0.5417464375495911, "learning_rate": 2.237433075281063e-05, "loss": 0.2393, "step": 4615 }, { "epoch": 0.4783915431650948, "grad_norm": 0.4931691884994507, "learning_rate": 2.2367664573871406e-05, "loss": 0.2006, "step": 4616 }, { "epoch": 0.4784951808477562, "grad_norm": 0.523485541343689, "learning_rate": 2.2360998128147417e-05, "loss": 0.2208, "step": 4617 }, { "epoch": 0.47859881853041764, "grad_norm": 0.4347810447216034, "learning_rate": 2.2354331416389835e-05, "loss": 0.1571, "step": 4618 }, { "epoch": 0.47870245621307905, "grad_norm": 0.5515587329864502, "learning_rate": 2.2347664439349838e-05, "loss": 0.2146, "step": 4619 }, { "epoch": 0.47880609389574047, "grad_norm": 0.48896217346191406, "learning_rate": 2.234099719777867e-05, "loss": 0.2021, "step": 4620 }, { "epoch": 0.4789097315784019, "grad_norm": 0.5830408334732056, "learning_rate": 2.2334329692427577e-05, "loss": 0.2272, "step": 4621 }, { "epoch": 0.4790133692610633, "grad_norm": 0.5266159772872925, "learning_rate": 2.2327661924047842e-05, "loss": 0.2095, "step": 4622 }, { "epoch": 0.4791170069437247, "grad_norm": 0.5474720597267151, "learning_rate": 2.2320993893390775e-05, "loss": 0.2496, "step": 4623 }, { "epoch": 0.47922064462638614, "grad_norm": 0.5088273882865906, "learning_rate": 2.231432560120773e-05, "loss": 0.1993, "step": 4624 }, { "epoch": 0.47932428230904756, "grad_norm": 0.5491158366203308, "learning_rate": 2.230765704825007e-05, "loss": 0.2022, "step": 4625 }, { "epoch": 0.479427919991709, "grad_norm": 0.5160688757896423, "learning_rate": 2.23009882352692e-05, "loss": 0.2078, "step": 4626 }, { "epoch": 0.4795315576743704, "grad_norm": 0.4869052469730377, "learning_rate": 2.229431916301656e-05, "loss": 0.1796, "step": 4627 }, { "epoch": 0.4796351953570318, "grad_norm": 0.5505785346031189, "learning_rate": 2.22876498322436e-05, "loss": 0.2223, "step": 4628 }, { "epoch": 0.4797388330396932, "grad_norm": 0.45815008878707886, "learning_rate": 2.228098024370181e-05, "loss": 0.1812, "step": 4629 }, { "epoch": 0.47984247072235464, "grad_norm": 0.5766584277153015, "learning_rate": 2.2274310398142713e-05, "loss": 0.2521, "step": 4630 }, { "epoch": 0.47994610840501606, "grad_norm": 0.5113991498947144, "learning_rate": 2.2267640296317857e-05, "loss": 0.2069, "step": 4631 }, { "epoch": 0.4800497460876775, "grad_norm": 0.5927762985229492, "learning_rate": 2.2260969938978815e-05, "loss": 0.243, "step": 4632 }, { "epoch": 0.4801533837703389, "grad_norm": 0.5017601251602173, "learning_rate": 2.22542993268772e-05, "loss": 0.1858, "step": 4633 }, { "epoch": 0.4802570214530003, "grad_norm": 0.5273293256759644, "learning_rate": 2.224762846076464e-05, "loss": 0.2179, "step": 4634 }, { "epoch": 0.4803606591356617, "grad_norm": 0.4704946279525757, "learning_rate": 2.2240957341392796e-05, "loss": 0.1698, "step": 4635 }, { "epoch": 0.48046429681832314, "grad_norm": 0.47459760308265686, "learning_rate": 2.2234285969513363e-05, "loss": 0.1876, "step": 4636 }, { "epoch": 0.48056793450098456, "grad_norm": 0.5367644429206848, "learning_rate": 2.2227614345878066e-05, "loss": 0.1927, "step": 4637 }, { "epoch": 0.480671572183646, "grad_norm": 0.5465733408927917, "learning_rate": 2.2220942471238636e-05, "loss": 0.211, "step": 4638 }, { "epoch": 0.4807752098663074, "grad_norm": 0.5373432040214539, "learning_rate": 2.2214270346346866e-05, "loss": 0.225, "step": 4639 }, { "epoch": 0.4808788475489688, "grad_norm": 0.5162397027015686, "learning_rate": 2.220759797195456e-05, "loss": 0.1835, "step": 4640 }, { "epoch": 0.48098248523163023, "grad_norm": 0.4821995794773102, "learning_rate": 2.2200925348813542e-05, "loss": 0.1701, "step": 4641 }, { "epoch": 0.48108612291429165, "grad_norm": 0.57627272605896, "learning_rate": 2.2194252477675676e-05, "loss": 0.2357, "step": 4642 }, { "epoch": 0.48118976059695306, "grad_norm": 0.6950919032096863, "learning_rate": 2.218757935929286e-05, "loss": 0.2545, "step": 4643 }, { "epoch": 0.4812933982796145, "grad_norm": 0.4768347442150116, "learning_rate": 2.2180905994416992e-05, "loss": 0.2094, "step": 4644 }, { "epoch": 0.4813970359622759, "grad_norm": 0.5636467933654785, "learning_rate": 2.2174232383800033e-05, "loss": 0.2087, "step": 4645 }, { "epoch": 0.4815006736449373, "grad_norm": 0.5050401091575623, "learning_rate": 2.216755852819395e-05, "loss": 0.1834, "step": 4646 }, { "epoch": 0.48160431132759873, "grad_norm": 0.6181920170783997, "learning_rate": 2.2160884428350737e-05, "loss": 0.2612, "step": 4647 }, { "epoch": 0.48170794901026015, "grad_norm": 0.5703398585319519, "learning_rate": 2.2154210085022426e-05, "loss": 0.2377, "step": 4648 }, { "epoch": 0.48181158669292157, "grad_norm": 0.5547765493392944, "learning_rate": 2.2147535498961075e-05, "loss": 0.2058, "step": 4649 }, { "epoch": 0.481915224375583, "grad_norm": 0.5765746831893921, "learning_rate": 2.2140860670918762e-05, "loss": 0.2426, "step": 4650 }, { "epoch": 0.4820188620582444, "grad_norm": 0.5647233724594116, "learning_rate": 2.2134185601647595e-05, "loss": 0.1993, "step": 4651 }, { "epoch": 0.4821224997409058, "grad_norm": 0.5586739778518677, "learning_rate": 2.212751029189971e-05, "loss": 0.2334, "step": 4652 }, { "epoch": 0.48222613742356724, "grad_norm": 0.5199017524719238, "learning_rate": 2.212083474242727e-05, "loss": 0.1745, "step": 4653 }, { "epoch": 0.4823297751062286, "grad_norm": 0.5173331499099731, "learning_rate": 2.211415895398248e-05, "loss": 0.2134, "step": 4654 }, { "epoch": 0.48243341278889, "grad_norm": 0.6455654501914978, "learning_rate": 2.2107482927317534e-05, "loss": 0.203, "step": 4655 }, { "epoch": 0.48253705047155143, "grad_norm": 0.4340267479419708, "learning_rate": 2.21008066631847e-05, "loss": 0.1797, "step": 4656 }, { "epoch": 0.48264068815421285, "grad_norm": 0.5833914279937744, "learning_rate": 2.2094130162336227e-05, "loss": 0.2338, "step": 4657 }, { "epoch": 0.48274432583687427, "grad_norm": 0.5767900347709656, "learning_rate": 2.2087453425524426e-05, "loss": 0.2228, "step": 4658 }, { "epoch": 0.4828479635195357, "grad_norm": 0.5963987708091736, "learning_rate": 2.2080776453501614e-05, "loss": 0.2438, "step": 4659 }, { "epoch": 0.4829516012021971, "grad_norm": 0.46724367141723633, "learning_rate": 2.2074099247020147e-05, "loss": 0.1832, "step": 4660 }, { "epoch": 0.4830552388848585, "grad_norm": 0.5257183909416199, "learning_rate": 2.2067421806832403e-05, "loss": 0.2144, "step": 4661 }, { "epoch": 0.48315887656751993, "grad_norm": 0.6028289198875427, "learning_rate": 2.206074413369079e-05, "loss": 0.2536, "step": 4662 }, { "epoch": 0.48326251425018135, "grad_norm": 0.5243949890136719, "learning_rate": 2.205406622834772e-05, "loss": 0.1765, "step": 4663 }, { "epoch": 0.48336615193284277, "grad_norm": 0.4694502353668213, "learning_rate": 2.2047388091555665e-05, "loss": 0.1839, "step": 4664 }, { "epoch": 0.4834697896155042, "grad_norm": 0.5306639671325684, "learning_rate": 2.2040709724067106e-05, "loss": 0.2186, "step": 4665 }, { "epoch": 0.4835734272981656, "grad_norm": 0.4971131384372711, "learning_rate": 2.2034031126634544e-05, "loss": 0.1806, "step": 4666 }, { "epoch": 0.483677064980827, "grad_norm": 0.6058434844017029, "learning_rate": 2.202735230001052e-05, "loss": 0.2814, "step": 4667 }, { "epoch": 0.48378070266348844, "grad_norm": 0.4897499978542328, "learning_rate": 2.2020673244947587e-05, "loss": 0.1726, "step": 4668 }, { "epoch": 0.48388434034614985, "grad_norm": 0.43752723932266235, "learning_rate": 2.2013993962198336e-05, "loss": 0.1812, "step": 4669 }, { "epoch": 0.48398797802881127, "grad_norm": 0.4837397038936615, "learning_rate": 2.2007314452515375e-05, "loss": 0.2176, "step": 4670 }, { "epoch": 0.4840916157114727, "grad_norm": 0.5185476541519165, "learning_rate": 2.2000634716651338e-05, "loss": 0.203, "step": 4671 }, { "epoch": 0.4841952533941341, "grad_norm": 0.46402788162231445, "learning_rate": 2.19939547553589e-05, "loss": 0.177, "step": 4672 }, { "epoch": 0.4842988910767955, "grad_norm": 0.5397825241088867, "learning_rate": 2.1987274569390727e-05, "loss": 0.237, "step": 4673 }, { "epoch": 0.48440252875945694, "grad_norm": 0.5343857407569885, "learning_rate": 2.1980594159499558e-05, "loss": 0.2077, "step": 4674 }, { "epoch": 0.48450616644211836, "grad_norm": 0.500300943851471, "learning_rate": 2.1973913526438114e-05, "loss": 0.219, "step": 4675 }, { "epoch": 0.4846098041247798, "grad_norm": 0.5813903212547302, "learning_rate": 2.1967232670959158e-05, "loss": 0.2583, "step": 4676 }, { "epoch": 0.4847134418074412, "grad_norm": 0.5086579918861389, "learning_rate": 2.196055159381549e-05, "loss": 0.2018, "step": 4677 }, { "epoch": 0.4848170794901026, "grad_norm": 0.5221996307373047, "learning_rate": 2.195387029575991e-05, "loss": 0.2156, "step": 4678 }, { "epoch": 0.484920717172764, "grad_norm": 0.5296904444694519, "learning_rate": 2.194718877754526e-05, "loss": 0.2459, "step": 4679 }, { "epoch": 0.48502435485542544, "grad_norm": 0.5555968880653381, "learning_rate": 2.1940507039924414e-05, "loss": 0.2386, "step": 4680 }, { "epoch": 0.48512799253808686, "grad_norm": 0.4701452851295471, "learning_rate": 2.193382508365025e-05, "loss": 0.1761, "step": 4681 }, { "epoch": 0.4852316302207483, "grad_norm": 0.5224292874336243, "learning_rate": 2.192714290947568e-05, "loss": 0.2258, "step": 4682 }, { "epoch": 0.4853352679034097, "grad_norm": 0.5685223340988159, "learning_rate": 2.1920460518153637e-05, "loss": 0.2386, "step": 4683 }, { "epoch": 0.4854389055860711, "grad_norm": 0.6110420227050781, "learning_rate": 2.1913777910437094e-05, "loss": 0.2472, "step": 4684 }, { "epoch": 0.48554254326873253, "grad_norm": 0.523505687713623, "learning_rate": 2.190709508707903e-05, "loss": 0.2076, "step": 4685 }, { "epoch": 0.48564618095139395, "grad_norm": 0.4906814694404602, "learning_rate": 2.1900412048832456e-05, "loss": 0.1953, "step": 4686 }, { "epoch": 0.48574981863405536, "grad_norm": 0.49548250436782837, "learning_rate": 2.189372879645041e-05, "loss": 0.2112, "step": 4687 }, { "epoch": 0.4858534563167168, "grad_norm": 0.5095117688179016, "learning_rate": 2.1887045330685937e-05, "loss": 0.1941, "step": 4688 }, { "epoch": 0.4859570939993782, "grad_norm": 0.521227240562439, "learning_rate": 2.188036165229214e-05, "loss": 0.2176, "step": 4689 }, { "epoch": 0.4860607316820396, "grad_norm": 0.5012630224227905, "learning_rate": 2.1873677762022116e-05, "loss": 0.2102, "step": 4690 }, { "epoch": 0.48616436936470103, "grad_norm": 0.5226165056228638, "learning_rate": 2.186699366062899e-05, "loss": 0.2343, "step": 4691 }, { "epoch": 0.4862680070473624, "grad_norm": 0.5168052315711975, "learning_rate": 2.186030934886592e-05, "loss": 0.1981, "step": 4692 }, { "epoch": 0.4863716447300238, "grad_norm": 0.5914785265922546, "learning_rate": 2.1853624827486082e-05, "loss": 0.2423, "step": 4693 }, { "epoch": 0.48647528241268523, "grad_norm": 0.508635938167572, "learning_rate": 2.1846940097242684e-05, "loss": 0.1999, "step": 4694 }, { "epoch": 0.48657892009534665, "grad_norm": 0.5382908582687378, "learning_rate": 2.1840255158888946e-05, "loss": 0.2426, "step": 4695 }, { "epoch": 0.48668255777800806, "grad_norm": 0.5902379751205444, "learning_rate": 2.1833570013178117e-05, "loss": 0.223, "step": 4696 }, { "epoch": 0.4867861954606695, "grad_norm": 0.4909026622772217, "learning_rate": 2.1826884660863473e-05, "loss": 0.1949, "step": 4697 }, { "epoch": 0.4868898331433309, "grad_norm": 0.5593119859695435, "learning_rate": 2.18201991026983e-05, "loss": 0.2712, "step": 4698 }, { "epoch": 0.4869934708259923, "grad_norm": 0.5627800822257996, "learning_rate": 2.1813513339435924e-05, "loss": 0.2411, "step": 4699 }, { "epoch": 0.48709710850865373, "grad_norm": 0.5743441581726074, "learning_rate": 2.1806827371829686e-05, "loss": 0.2438, "step": 4700 }, { "epoch": 0.48720074619131515, "grad_norm": 0.554901659488678, "learning_rate": 2.1800141200632944e-05, "loss": 0.2311, "step": 4701 }, { "epoch": 0.48730438387397657, "grad_norm": 0.5914807319641113, "learning_rate": 2.1793454826599092e-05, "loss": 0.2139, "step": 4702 }, { "epoch": 0.487408021556638, "grad_norm": 0.5732871294021606, "learning_rate": 2.178676825048154e-05, "loss": 0.207, "step": 4703 }, { "epoch": 0.4875116592392994, "grad_norm": 0.5167139768600464, "learning_rate": 2.1780081473033715e-05, "loss": 0.2267, "step": 4704 }, { "epoch": 0.4876152969219608, "grad_norm": 0.5026474595069885, "learning_rate": 2.177339449500908e-05, "loss": 0.1783, "step": 4705 }, { "epoch": 0.48771893460462223, "grad_norm": 0.5291749835014343, "learning_rate": 2.1766707317161115e-05, "loss": 0.2097, "step": 4706 }, { "epoch": 0.48782257228728365, "grad_norm": 0.5648260116577148, "learning_rate": 2.176001994024331e-05, "loss": 0.2037, "step": 4707 }, { "epoch": 0.48792620996994507, "grad_norm": 0.6325567960739136, "learning_rate": 2.175333236500919e-05, "loss": 0.2531, "step": 4708 }, { "epoch": 0.4880298476526065, "grad_norm": 0.46111971139907837, "learning_rate": 2.1746644592212314e-05, "loss": 0.179, "step": 4709 }, { "epoch": 0.4881334853352679, "grad_norm": 0.4900364279747009, "learning_rate": 2.173995662260623e-05, "loss": 0.2037, "step": 4710 }, { "epoch": 0.4882371230179293, "grad_norm": 0.5328616499900818, "learning_rate": 2.1733268456944544e-05, "loss": 0.2163, "step": 4711 }, { "epoch": 0.48834076070059074, "grad_norm": 0.5089332461357117, "learning_rate": 2.1726580095980866e-05, "loss": 0.1893, "step": 4712 }, { "epoch": 0.48844439838325215, "grad_norm": 0.5162228345870972, "learning_rate": 2.1719891540468824e-05, "loss": 0.1971, "step": 4713 }, { "epoch": 0.48854803606591357, "grad_norm": 0.4371989667415619, "learning_rate": 2.1713202791162074e-05, "loss": 0.1675, "step": 4714 }, { "epoch": 0.488651673748575, "grad_norm": 0.528220534324646, "learning_rate": 2.17065138488143e-05, "loss": 0.2311, "step": 4715 }, { "epoch": 0.4887553114312364, "grad_norm": 0.5761193633079529, "learning_rate": 2.16998247141792e-05, "loss": 0.2424, "step": 4716 }, { "epoch": 0.4888589491138978, "grad_norm": 0.5048497319221497, "learning_rate": 2.1693135388010492e-05, "loss": 0.2086, "step": 4717 }, { "epoch": 0.48896258679655924, "grad_norm": 0.4758215546607971, "learning_rate": 2.1686445871061913e-05, "loss": 0.1793, "step": 4718 }, { "epoch": 0.48906622447922066, "grad_norm": 0.5638699531555176, "learning_rate": 2.1679756164087248e-05, "loss": 0.2315, "step": 4719 }, { "epoch": 0.4891698621618821, "grad_norm": 0.4771273136138916, "learning_rate": 2.1673066267840266e-05, "loss": 0.1924, "step": 4720 }, { "epoch": 0.4892734998445435, "grad_norm": 0.5214678645133972, "learning_rate": 2.166637618307477e-05, "loss": 0.2128, "step": 4721 }, { "epoch": 0.4893771375272049, "grad_norm": 0.4893636703491211, "learning_rate": 2.165968591054461e-05, "loss": 0.1814, "step": 4722 }, { "epoch": 0.4894807752098663, "grad_norm": 0.46817266941070557, "learning_rate": 2.1652995451003608e-05, "loss": 0.1749, "step": 4723 }, { "epoch": 0.48958441289252774, "grad_norm": 0.5227934122085571, "learning_rate": 2.164630480520565e-05, "loss": 0.2133, "step": 4724 }, { "epoch": 0.48968805057518916, "grad_norm": 0.5478593111038208, "learning_rate": 2.1639613973904633e-05, "loss": 0.2459, "step": 4725 }, { "epoch": 0.4897916882578506, "grad_norm": 0.5256202220916748, "learning_rate": 2.163292295785446e-05, "loss": 0.1603, "step": 4726 }, { "epoch": 0.489895325940512, "grad_norm": 0.6194120645523071, "learning_rate": 2.162623175780906e-05, "loss": 0.2218, "step": 4727 }, { "epoch": 0.4899989636231734, "grad_norm": 0.5352566242218018, "learning_rate": 2.16195403745224e-05, "loss": 0.1922, "step": 4728 }, { "epoch": 0.49010260130583483, "grad_norm": 0.5198310613632202, "learning_rate": 2.1612848808748446e-05, "loss": 0.2033, "step": 4729 }, { "epoch": 0.4902062389884962, "grad_norm": 0.5337640643119812, "learning_rate": 2.1606157061241196e-05, "loss": 0.218, "step": 4730 }, { "epoch": 0.4903098766711576, "grad_norm": 0.42674437165260315, "learning_rate": 2.1599465132754664e-05, "loss": 0.1643, "step": 4731 }, { "epoch": 0.490413514353819, "grad_norm": 0.4258442521095276, "learning_rate": 2.159277302404289e-05, "loss": 0.1576, "step": 4732 }, { "epoch": 0.49051715203648044, "grad_norm": 0.5152009129524231, "learning_rate": 2.1586080735859926e-05, "loss": 0.2069, "step": 4733 }, { "epoch": 0.49062078971914186, "grad_norm": 0.5394965410232544, "learning_rate": 2.1579388268959848e-05, "loss": 0.2046, "step": 4734 }, { "epoch": 0.4907244274018033, "grad_norm": 0.5877050757408142, "learning_rate": 2.1572695624096763e-05, "loss": 0.2356, "step": 4735 }, { "epoch": 0.4908280650844647, "grad_norm": 0.5340604782104492, "learning_rate": 2.1566002802024776e-05, "loss": 0.2014, "step": 4736 }, { "epoch": 0.4909317027671261, "grad_norm": 0.4920124113559723, "learning_rate": 2.1559309803498022e-05, "loss": 0.1995, "step": 4737 }, { "epoch": 0.4910353404497875, "grad_norm": 0.4382156729698181, "learning_rate": 2.1552616629270668e-05, "loss": 0.1734, "step": 4738 }, { "epoch": 0.49113897813244894, "grad_norm": 0.5280501246452332, "learning_rate": 2.1545923280096885e-05, "loss": 0.1988, "step": 4739 }, { "epoch": 0.49124261581511036, "grad_norm": 0.5084026455879211, "learning_rate": 2.1539229756730868e-05, "loss": 0.185, "step": 4740 }, { "epoch": 0.4913462534977718, "grad_norm": 0.457512229681015, "learning_rate": 2.1532536059926842e-05, "loss": 0.1759, "step": 4741 }, { "epoch": 0.4914498911804332, "grad_norm": 0.5790635943412781, "learning_rate": 2.1525842190439022e-05, "loss": 0.2617, "step": 4742 }, { "epoch": 0.4915535288630946, "grad_norm": 0.5027420520782471, "learning_rate": 2.1519148149021688e-05, "loss": 0.1973, "step": 4743 }, { "epoch": 0.49165716654575603, "grad_norm": 0.6793406009674072, "learning_rate": 2.15124539364291e-05, "loss": 0.2529, "step": 4744 }, { "epoch": 0.49176080422841745, "grad_norm": 0.5920283198356628, "learning_rate": 2.1505759553415554e-05, "loss": 0.2527, "step": 4745 }, { "epoch": 0.49186444191107886, "grad_norm": 0.4970380365848541, "learning_rate": 2.1499065000735357e-05, "loss": 0.1747, "step": 4746 }, { "epoch": 0.4919680795937403, "grad_norm": 0.543004035949707, "learning_rate": 2.1492370279142848e-05, "loss": 0.2162, "step": 4747 }, { "epoch": 0.4920717172764017, "grad_norm": 0.5673389434814453, "learning_rate": 2.1485675389392377e-05, "loss": 0.25, "step": 4748 }, { "epoch": 0.4921753549590631, "grad_norm": 0.5720213055610657, "learning_rate": 2.147898033223831e-05, "loss": 0.208, "step": 4749 }, { "epoch": 0.49227899264172453, "grad_norm": 0.6086804866790771, "learning_rate": 2.1472285108435044e-05, "loss": 0.2467, "step": 4750 }, { "epoch": 0.49238263032438595, "grad_norm": 0.5073210597038269, "learning_rate": 2.146558971873698e-05, "loss": 0.2411, "step": 4751 }, { "epoch": 0.49248626800704737, "grad_norm": 0.4711146652698517, "learning_rate": 2.1458894163898537e-05, "loss": 0.1911, "step": 4752 }, { "epoch": 0.4925899056897088, "grad_norm": 0.5544717907905579, "learning_rate": 2.1452198444674176e-05, "loss": 0.206, "step": 4753 }, { "epoch": 0.4926935433723702, "grad_norm": 0.5063883662223816, "learning_rate": 2.144550256181835e-05, "loss": 0.2161, "step": 4754 }, { "epoch": 0.4927971810550316, "grad_norm": 0.5890721082687378, "learning_rate": 2.143880651608554e-05, "loss": 0.2086, "step": 4755 }, { "epoch": 0.49290081873769304, "grad_norm": 0.5515021681785583, "learning_rate": 2.1432110308230246e-05, "loss": 0.2401, "step": 4756 }, { "epoch": 0.49300445642035445, "grad_norm": 0.390842080116272, "learning_rate": 2.1425413939006995e-05, "loss": 0.1599, "step": 4757 }, { "epoch": 0.49310809410301587, "grad_norm": 0.5897576808929443, "learning_rate": 2.1418717409170312e-05, "loss": 0.2546, "step": 4758 }, { "epoch": 0.4932117317856773, "grad_norm": 0.5300201177597046, "learning_rate": 2.1412020719474758e-05, "loss": 0.2265, "step": 4759 }, { "epoch": 0.4933153694683387, "grad_norm": 0.4570264220237732, "learning_rate": 2.140532387067491e-05, "loss": 0.1888, "step": 4760 }, { "epoch": 0.4934190071510001, "grad_norm": 0.5168429017066956, "learning_rate": 2.139862686352535e-05, "loss": 0.2056, "step": 4761 }, { "epoch": 0.49352264483366154, "grad_norm": 0.49334126710891724, "learning_rate": 2.139192969878068e-05, "loss": 0.1755, "step": 4762 }, { "epoch": 0.49362628251632296, "grad_norm": 0.5192726850509644, "learning_rate": 2.138523237719555e-05, "loss": 0.1771, "step": 4763 }, { "epoch": 0.4937299201989844, "grad_norm": 0.4422583281993866, "learning_rate": 2.137853489952458e-05, "loss": 0.1702, "step": 4764 }, { "epoch": 0.4938335578816458, "grad_norm": 0.47275489568710327, "learning_rate": 2.1371837266522443e-05, "loss": 0.1853, "step": 4765 }, { "epoch": 0.4939371955643072, "grad_norm": 0.4642319083213806, "learning_rate": 2.1365139478943816e-05, "loss": 0.1811, "step": 4766 }, { "epoch": 0.4940408332469686, "grad_norm": 0.5717727541923523, "learning_rate": 2.1358441537543393e-05, "loss": 0.2215, "step": 4767 }, { "epoch": 0.49414447092963, "grad_norm": 0.48989662528038025, "learning_rate": 2.135174344307589e-05, "loss": 0.1697, "step": 4768 }, { "epoch": 0.4942481086122914, "grad_norm": 0.5562463998794556, "learning_rate": 2.1345045196296036e-05, "loss": 0.2133, "step": 4769 }, { "epoch": 0.4943517462949528, "grad_norm": 0.5160841941833496, "learning_rate": 2.1338346797958584e-05, "loss": 0.2158, "step": 4770 }, { "epoch": 0.49445538397761424, "grad_norm": 0.5394311547279358, "learning_rate": 2.1331648248818292e-05, "loss": 0.2265, "step": 4771 }, { "epoch": 0.49455902166027566, "grad_norm": 0.5124887824058533, "learning_rate": 2.1324949549629946e-05, "loss": 0.2185, "step": 4772 }, { "epoch": 0.49466265934293707, "grad_norm": 0.42647621035575867, "learning_rate": 2.1318250701148343e-05, "loss": 0.1415, "step": 4773 }, { "epoch": 0.4947662970255985, "grad_norm": 0.5553656816482544, "learning_rate": 2.1311551704128298e-05, "loss": 0.2234, "step": 4774 }, { "epoch": 0.4948699347082599, "grad_norm": 0.5774763226509094, "learning_rate": 2.1304852559324653e-05, "loss": 0.245, "step": 4775 }, { "epoch": 0.4949735723909213, "grad_norm": 0.5323425531387329, "learning_rate": 2.129815326749225e-05, "loss": 0.2211, "step": 4776 }, { "epoch": 0.49507721007358274, "grad_norm": 0.5304404497146606, "learning_rate": 2.1291453829385947e-05, "loss": 0.214, "step": 4777 }, { "epoch": 0.49518084775624416, "grad_norm": 0.5394851565361023, "learning_rate": 2.1284754245760637e-05, "loss": 0.1973, "step": 4778 }, { "epoch": 0.4952844854389056, "grad_norm": 0.5241143703460693, "learning_rate": 2.127805451737122e-05, "loss": 0.1916, "step": 4779 }, { "epoch": 0.495388123121567, "grad_norm": 0.48268353939056396, "learning_rate": 2.1271354644972603e-05, "loss": 0.1924, "step": 4780 }, { "epoch": 0.4954917608042284, "grad_norm": 0.5393835306167603, "learning_rate": 2.126465462931972e-05, "loss": 0.2122, "step": 4781 }, { "epoch": 0.4955953984868898, "grad_norm": 0.4567290246486664, "learning_rate": 2.1257954471167525e-05, "loss": 0.1814, "step": 4782 }, { "epoch": 0.49569903616955124, "grad_norm": 0.5594385862350464, "learning_rate": 2.1251254171270972e-05, "loss": 0.2462, "step": 4783 }, { "epoch": 0.49580267385221266, "grad_norm": 0.604749858379364, "learning_rate": 2.124455373038504e-05, "loss": 0.2055, "step": 4784 }, { "epoch": 0.4959063115348741, "grad_norm": 0.5307607650756836, "learning_rate": 2.123785314926474e-05, "loss": 0.2186, "step": 4785 }, { "epoch": 0.4960099492175355, "grad_norm": 0.5244830846786499, "learning_rate": 2.123115242866506e-05, "loss": 0.2084, "step": 4786 }, { "epoch": 0.4961135869001969, "grad_norm": 0.5358648300170898, "learning_rate": 2.122445156934104e-05, "loss": 0.217, "step": 4787 }, { "epoch": 0.49621722458285833, "grad_norm": 0.6167570948600769, "learning_rate": 2.1217750572047725e-05, "loss": 0.2607, "step": 4788 }, { "epoch": 0.49632086226551975, "grad_norm": 0.5059612393379211, "learning_rate": 2.1211049437540166e-05, "loss": 0.1914, "step": 4789 }, { "epoch": 0.49642449994818116, "grad_norm": 0.5213268995285034, "learning_rate": 2.120434816657344e-05, "loss": 0.1881, "step": 4790 }, { "epoch": 0.4965281376308426, "grad_norm": 0.4669734239578247, "learning_rate": 2.119764675990263e-05, "loss": 0.1719, "step": 4791 }, { "epoch": 0.496631775313504, "grad_norm": 0.5827397108078003, "learning_rate": 2.119094521828285e-05, "loss": 0.2356, "step": 4792 }, { "epoch": 0.4967354129961654, "grad_norm": 0.5616671442985535, "learning_rate": 2.1184243542469214e-05, "loss": 0.218, "step": 4793 }, { "epoch": 0.49683905067882683, "grad_norm": 0.5867148637771606, "learning_rate": 2.1177541733216853e-05, "loss": 0.2114, "step": 4794 }, { "epoch": 0.49694268836148825, "grad_norm": 0.4665326774120331, "learning_rate": 2.1170839791280928e-05, "loss": 0.1659, "step": 4795 }, { "epoch": 0.49704632604414967, "grad_norm": 0.5649195313453674, "learning_rate": 2.116413771741659e-05, "loss": 0.2069, "step": 4796 }, { "epoch": 0.4971499637268111, "grad_norm": 0.5897424817085266, "learning_rate": 2.115743551237902e-05, "loss": 0.1988, "step": 4797 }, { "epoch": 0.4972536014094725, "grad_norm": 0.6335086822509766, "learning_rate": 2.115073317692342e-05, "loss": 0.2381, "step": 4798 }, { "epoch": 0.4973572390921339, "grad_norm": 0.5553082227706909, "learning_rate": 2.1144030711804996e-05, "loss": 0.2303, "step": 4799 }, { "epoch": 0.49746087677479534, "grad_norm": 0.4504654109477997, "learning_rate": 2.1137328117778967e-05, "loss": 0.1665, "step": 4800 }, { "epoch": 0.49756451445745675, "grad_norm": 0.5283129215240479, "learning_rate": 2.113062539560058e-05, "loss": 0.215, "step": 4801 }, { "epoch": 0.49766815214011817, "grad_norm": 0.5294463038444519, "learning_rate": 2.112392254602507e-05, "loss": 0.2278, "step": 4802 }, { "epoch": 0.4977717898227796, "grad_norm": 0.579262912273407, "learning_rate": 2.1117219569807717e-05, "loss": 0.2302, "step": 4803 }, { "epoch": 0.497875427505441, "grad_norm": 0.5535164475440979, "learning_rate": 2.111051646770381e-05, "loss": 0.2272, "step": 4804 }, { "epoch": 0.4979790651881024, "grad_norm": 0.5065291523933411, "learning_rate": 2.1103813240468624e-05, "loss": 0.199, "step": 4805 }, { "epoch": 0.4980827028707638, "grad_norm": 0.4622606635093689, "learning_rate": 2.109710988885748e-05, "loss": 0.1854, "step": 4806 }, { "epoch": 0.4981863405534252, "grad_norm": 0.5690516829490662, "learning_rate": 2.10904064136257e-05, "loss": 0.2311, "step": 4807 }, { "epoch": 0.4982899782360866, "grad_norm": 0.5897866487503052, "learning_rate": 2.108370281552862e-05, "loss": 0.2058, "step": 4808 }, { "epoch": 0.49839361591874803, "grad_norm": 0.5769507884979248, "learning_rate": 2.107699909532159e-05, "loss": 0.2183, "step": 4809 }, { "epoch": 0.49849725360140945, "grad_norm": 0.5400382876396179, "learning_rate": 2.107029525375998e-05, "loss": 0.1919, "step": 4810 }, { "epoch": 0.49860089128407087, "grad_norm": 0.48948460817337036, "learning_rate": 2.1063591291599167e-05, "loss": 0.2164, "step": 4811 }, { "epoch": 0.4987045289667323, "grad_norm": 0.49134010076522827, "learning_rate": 2.105688720959453e-05, "loss": 0.218, "step": 4812 }, { "epoch": 0.4988081666493937, "grad_norm": 0.5676142573356628, "learning_rate": 2.1050183008501487e-05, "loss": 0.2142, "step": 4813 }, { "epoch": 0.4989118043320551, "grad_norm": 0.46498799324035645, "learning_rate": 2.1043478689075464e-05, "loss": 0.1828, "step": 4814 }, { "epoch": 0.49901544201471654, "grad_norm": 0.5404950380325317, "learning_rate": 2.103677425207188e-05, "loss": 0.2091, "step": 4815 }, { "epoch": 0.49911907969737795, "grad_norm": 0.5214618444442749, "learning_rate": 2.103006969824618e-05, "loss": 0.2306, "step": 4816 }, { "epoch": 0.49922271738003937, "grad_norm": 0.6194533109664917, "learning_rate": 2.1023365028353835e-05, "loss": 0.235, "step": 4817 }, { "epoch": 0.4993263550627008, "grad_norm": 0.649368941783905, "learning_rate": 2.101666024315031e-05, "loss": 0.2216, "step": 4818 }, { "epoch": 0.4994299927453622, "grad_norm": 0.49496763944625854, "learning_rate": 2.1009955343391084e-05, "loss": 0.1852, "step": 4819 }, { "epoch": 0.4995336304280236, "grad_norm": 0.5390393137931824, "learning_rate": 2.1003250329831664e-05, "loss": 0.2087, "step": 4820 }, { "epoch": 0.49963726811068504, "grad_norm": 0.5321680307388306, "learning_rate": 2.0996545203227556e-05, "loss": 0.2018, "step": 4821 }, { "epoch": 0.49974090579334646, "grad_norm": 0.6124078035354614, "learning_rate": 2.0989839964334275e-05, "loss": 0.2178, "step": 4822 }, { "epoch": 0.4998445434760079, "grad_norm": 0.5372506380081177, "learning_rate": 2.0983134613907378e-05, "loss": 0.2017, "step": 4823 }, { "epoch": 0.4999481811586693, "grad_norm": 0.586346447467804, "learning_rate": 2.0976429152702392e-05, "loss": 0.2346, "step": 4824 }, { "epoch": 0.5000518188413307, "grad_norm": 0.5241424441337585, "learning_rate": 2.096972358147489e-05, "loss": 0.1981, "step": 4825 }, { "epoch": 0.5001554565239921, "grad_norm": 0.5134982466697693, "learning_rate": 2.0963017900980444e-05, "loss": 0.2092, "step": 4826 }, { "epoch": 0.5002590942066535, "grad_norm": 0.5294827818870544, "learning_rate": 2.0956312111974636e-05, "loss": 0.2074, "step": 4827 }, { "epoch": 0.5003627318893149, "grad_norm": 0.5249479413032532, "learning_rate": 2.094960621521307e-05, "loss": 0.2131, "step": 4828 }, { "epoch": 0.5004663695719763, "grad_norm": 0.5820158123970032, "learning_rate": 2.0942900211451352e-05, "loss": 0.2111, "step": 4829 }, { "epoch": 0.5005700072546377, "grad_norm": 0.5495398044586182, "learning_rate": 2.09361941014451e-05, "loss": 0.2015, "step": 4830 }, { "epoch": 0.5006736449372992, "grad_norm": 0.5136843323707581, "learning_rate": 2.0929487885949945e-05, "loss": 0.2091, "step": 4831 }, { "epoch": 0.5007772826199606, "grad_norm": 0.5137192606925964, "learning_rate": 2.0922781565721552e-05, "loss": 0.172, "step": 4832 }, { "epoch": 0.500880920302622, "grad_norm": 0.5403234958648682, "learning_rate": 2.0916075141515563e-05, "loss": 0.2079, "step": 4833 }, { "epoch": 0.5009845579852834, "grad_norm": 0.5703449845314026, "learning_rate": 2.090936861408765e-05, "loss": 0.2085, "step": 4834 }, { "epoch": 0.5010881956679448, "grad_norm": 0.4938044548034668, "learning_rate": 2.0902661984193493e-05, "loss": 0.1959, "step": 4835 }, { "epoch": 0.5011918333506062, "grad_norm": 0.4589805603027344, "learning_rate": 2.0895955252588787e-05, "loss": 0.1679, "step": 4836 }, { "epoch": 0.5012954710332677, "grad_norm": 0.5060716867446899, "learning_rate": 2.088924842002924e-05, "loss": 0.1942, "step": 4837 }, { "epoch": 0.5013991087159291, "grad_norm": 0.5256392359733582, "learning_rate": 2.088254148727056e-05, "loss": 0.1835, "step": 4838 }, { "epoch": 0.5015027463985905, "grad_norm": 0.5400627255439758, "learning_rate": 2.0875834455068485e-05, "loss": 0.1998, "step": 4839 }, { "epoch": 0.5016063840812519, "grad_norm": 0.5017799139022827, "learning_rate": 2.0869127324178734e-05, "loss": 0.1944, "step": 4840 }, { "epoch": 0.5017100217639133, "grad_norm": 0.5077838897705078, "learning_rate": 2.086242009535707e-05, "loss": 0.189, "step": 4841 }, { "epoch": 0.5018136594465747, "grad_norm": 0.5228158831596375, "learning_rate": 2.0855712769359257e-05, "loss": 0.2039, "step": 4842 }, { "epoch": 0.5019172971292362, "grad_norm": 0.5608428716659546, "learning_rate": 2.0849005346941053e-05, "loss": 0.2434, "step": 4843 }, { "epoch": 0.5020209348118976, "grad_norm": 0.4849930703639984, "learning_rate": 2.084229782885825e-05, "loss": 0.1997, "step": 4844 }, { "epoch": 0.502124572494559, "grad_norm": 0.7088670134544373, "learning_rate": 2.0835590215866638e-05, "loss": 0.2212, "step": 4845 }, { "epoch": 0.5022282101772204, "grad_norm": 0.4910408854484558, "learning_rate": 2.0828882508722012e-05, "loss": 0.2057, "step": 4846 }, { "epoch": 0.5023318478598818, "grad_norm": 0.4826197028160095, "learning_rate": 2.08221747081802e-05, "loss": 0.2006, "step": 4847 }, { "epoch": 0.5024354855425432, "grad_norm": 0.5076550841331482, "learning_rate": 2.0815466814997017e-05, "loss": 0.2186, "step": 4848 }, { "epoch": 0.5025391232252047, "grad_norm": 0.473873496055603, "learning_rate": 2.0808758829928304e-05, "loss": 0.1932, "step": 4849 }, { "epoch": 0.5026427609078661, "grad_norm": 0.5110844969749451, "learning_rate": 2.08020507537299e-05, "loss": 0.2079, "step": 4850 }, { "epoch": 0.5027463985905275, "grad_norm": 0.4520585536956787, "learning_rate": 2.0795342587157664e-05, "loss": 0.1579, "step": 4851 }, { "epoch": 0.5028500362731889, "grad_norm": 0.5274686217308044, "learning_rate": 2.0788634330967464e-05, "loss": 0.1953, "step": 4852 }, { "epoch": 0.5029536739558503, "grad_norm": 0.637174129486084, "learning_rate": 2.078192598591517e-05, "loss": 0.2671, "step": 4853 }, { "epoch": 0.5030573116385118, "grad_norm": 0.5289623141288757, "learning_rate": 2.0775217552756673e-05, "loss": 0.1988, "step": 4854 }, { "epoch": 0.5031609493211732, "grad_norm": 0.5649394392967224, "learning_rate": 2.076850903224787e-05, "loss": 0.2036, "step": 4855 }, { "epoch": 0.5032645870038346, "grad_norm": 0.5184451937675476, "learning_rate": 2.076180042514466e-05, "loss": 0.2003, "step": 4856 }, { "epoch": 0.503368224686496, "grad_norm": 0.6143597364425659, "learning_rate": 2.0755091732202963e-05, "loss": 0.2641, "step": 4857 }, { "epoch": 0.5034718623691574, "grad_norm": 0.5865727066993713, "learning_rate": 2.074838295417871e-05, "loss": 0.2279, "step": 4858 }, { "epoch": 0.5035755000518188, "grad_norm": 0.5569704174995422, "learning_rate": 2.074167409182782e-05, "loss": 0.2323, "step": 4859 }, { "epoch": 0.5036791377344803, "grad_norm": 0.5546584129333496, "learning_rate": 2.0734965145906248e-05, "loss": 0.2227, "step": 4860 }, { "epoch": 0.5037827754171417, "grad_norm": 0.5088528990745544, "learning_rate": 2.0728256117169948e-05, "loss": 0.2144, "step": 4861 }, { "epoch": 0.5038864130998031, "grad_norm": 0.4553952217102051, "learning_rate": 2.0721547006374882e-05, "loss": 0.1935, "step": 4862 }, { "epoch": 0.5039900507824645, "grad_norm": 0.5757675766944885, "learning_rate": 2.071483781427702e-05, "loss": 0.2233, "step": 4863 }, { "epoch": 0.5040936884651259, "grad_norm": 0.4998389482498169, "learning_rate": 2.0708128541632347e-05, "loss": 0.1866, "step": 4864 }, { "epoch": 0.5041973261477873, "grad_norm": 0.5182422995567322, "learning_rate": 2.070141918919685e-05, "loss": 0.1961, "step": 4865 }, { "epoch": 0.5043009638304488, "grad_norm": 0.5532435774803162, "learning_rate": 2.069470975772652e-05, "loss": 0.2189, "step": 4866 }, { "epoch": 0.5044046015131102, "grad_norm": 0.5550752282142639, "learning_rate": 2.0688000247977385e-05, "loss": 0.2251, "step": 4867 }, { "epoch": 0.5045082391957716, "grad_norm": 0.6300921440124512, "learning_rate": 2.0681290660705446e-05, "loss": 0.2397, "step": 4868 }, { "epoch": 0.504611876878433, "grad_norm": 0.535825788974762, "learning_rate": 2.0674580996666734e-05, "loss": 0.2173, "step": 4869 }, { "epoch": 0.5047155145610944, "grad_norm": 0.561592698097229, "learning_rate": 2.0667871256617288e-05, "loss": 0.2028, "step": 4870 }, { "epoch": 0.5048191522437558, "grad_norm": 0.4686198830604553, "learning_rate": 2.0661161441313145e-05, "loss": 0.1866, "step": 4871 }, { "epoch": 0.5049227899264173, "grad_norm": 0.6049419045448303, "learning_rate": 2.0654451551510358e-05, "loss": 0.2358, "step": 4872 }, { "epoch": 0.5050264276090787, "grad_norm": 0.5065613389015198, "learning_rate": 2.064774158796499e-05, "loss": 0.2327, "step": 4873 }, { "epoch": 0.5051300652917401, "grad_norm": 0.5832587480545044, "learning_rate": 2.064103155143311e-05, "loss": 0.198, "step": 4874 }, { "epoch": 0.5052337029744015, "grad_norm": 0.6004157662391663, "learning_rate": 2.0634321442670786e-05, "loss": 0.2501, "step": 4875 }, { "epoch": 0.5053373406570629, "grad_norm": 0.5657427310943604, "learning_rate": 2.0627611262434103e-05, "loss": 0.1991, "step": 4876 }, { "epoch": 0.5054409783397243, "grad_norm": 0.45743632316589355, "learning_rate": 2.0620901011479167e-05, "loss": 0.1834, "step": 4877 }, { "epoch": 0.5055446160223858, "grad_norm": 0.58927983045578, "learning_rate": 2.0614190690562065e-05, "loss": 0.2683, "step": 4878 }, { "epoch": 0.5056482537050472, "grad_norm": 0.5818038582801819, "learning_rate": 2.0607480300438914e-05, "loss": 0.2057, "step": 4879 }, { "epoch": 0.5057518913877086, "grad_norm": 0.5848224759101868, "learning_rate": 2.0600769841865832e-05, "loss": 0.2057, "step": 4880 }, { "epoch": 0.50585552907037, "grad_norm": 0.5434708595275879, "learning_rate": 2.0594059315598924e-05, "loss": 0.2042, "step": 4881 }, { "epoch": 0.5059591667530314, "grad_norm": 0.6042613387107849, "learning_rate": 2.0587348722394346e-05, "loss": 0.2285, "step": 4882 }, { "epoch": 0.5060628044356928, "grad_norm": 0.5077903866767883, "learning_rate": 2.0580638063008228e-05, "loss": 0.1994, "step": 4883 }, { "epoch": 0.5061664421183543, "grad_norm": 0.5937612652778625, "learning_rate": 2.0573927338196712e-05, "loss": 0.2173, "step": 4884 }, { "epoch": 0.5062700798010157, "grad_norm": 0.6044044494628906, "learning_rate": 2.056721654871596e-05, "loss": 0.2195, "step": 4885 }, { "epoch": 0.5063737174836771, "grad_norm": 0.5159188508987427, "learning_rate": 2.056050569532212e-05, "loss": 0.2136, "step": 4886 }, { "epoch": 0.5064773551663385, "grad_norm": 0.5349753499031067, "learning_rate": 2.0553794778771375e-05, "loss": 0.206, "step": 4887 }, { "epoch": 0.5065809928489999, "grad_norm": 0.650302529335022, "learning_rate": 2.05470837998199e-05, "loss": 0.2485, "step": 4888 }, { "epoch": 0.5066846305316614, "grad_norm": 0.5740201473236084, "learning_rate": 2.0540372759223865e-05, "loss": 0.2163, "step": 4889 }, { "epoch": 0.5067882682143228, "grad_norm": 0.49569568037986755, "learning_rate": 2.053366165773947e-05, "loss": 0.1884, "step": 4890 }, { "epoch": 0.5068919058969842, "grad_norm": 0.602543830871582, "learning_rate": 2.052695049612291e-05, "loss": 0.2305, "step": 4891 }, { "epoch": 0.5069955435796456, "grad_norm": 0.6395046710968018, "learning_rate": 2.0520239275130386e-05, "loss": 0.236, "step": 4892 }, { "epoch": 0.507099181262307, "grad_norm": 0.5917165875434875, "learning_rate": 2.051352799551811e-05, "loss": 0.2195, "step": 4893 }, { "epoch": 0.5072028189449684, "grad_norm": 0.5440151691436768, "learning_rate": 2.05068166580423e-05, "loss": 0.2175, "step": 4894 }, { "epoch": 0.5073064566276299, "grad_norm": 0.5186762809753418, "learning_rate": 2.0500105263459177e-05, "loss": 0.2083, "step": 4895 }, { "epoch": 0.5074100943102913, "grad_norm": 0.47229626774787903, "learning_rate": 2.0493393812524967e-05, "loss": 0.1562, "step": 4896 }, { "epoch": 0.5075137319929527, "grad_norm": 0.5724924802780151, "learning_rate": 2.048668230599591e-05, "loss": 0.2029, "step": 4897 }, { "epoch": 0.5076173696756141, "grad_norm": 0.48794323205947876, "learning_rate": 2.0479970744628245e-05, "loss": 0.2231, "step": 4898 }, { "epoch": 0.5077210073582755, "grad_norm": 0.5196564793586731, "learning_rate": 2.047325912917823e-05, "loss": 0.1771, "step": 4899 }, { "epoch": 0.5078246450409369, "grad_norm": 0.6032145023345947, "learning_rate": 2.0466547460402105e-05, "loss": 0.2086, "step": 4900 }, { "epoch": 0.5079282827235982, "grad_norm": 0.5414867401123047, "learning_rate": 2.0459835739056134e-05, "loss": 0.2299, "step": 4901 }, { "epoch": 0.5080319204062597, "grad_norm": 0.602116584777832, "learning_rate": 2.0453123965896598e-05, "loss": 0.2339, "step": 4902 }, { "epoch": 0.5081355580889211, "grad_norm": 0.5289559364318848, "learning_rate": 2.0446412141679756e-05, "loss": 0.2001, "step": 4903 }, { "epoch": 0.5082391957715825, "grad_norm": 0.506097137928009, "learning_rate": 2.043970026716188e-05, "loss": 0.1866, "step": 4904 }, { "epoch": 0.5083428334542439, "grad_norm": 0.4521408677101135, "learning_rate": 2.0432988343099267e-05, "loss": 0.1532, "step": 4905 }, { "epoch": 0.5084464711369053, "grad_norm": 0.5555333495140076, "learning_rate": 2.0426276370248197e-05, "loss": 0.2099, "step": 4906 }, { "epoch": 0.5085501088195667, "grad_norm": 0.5497279763221741, "learning_rate": 2.041956434936497e-05, "loss": 0.2062, "step": 4907 }, { "epoch": 0.5086537465022282, "grad_norm": 0.5221918225288391, "learning_rate": 2.041285228120589e-05, "loss": 0.1963, "step": 4908 }, { "epoch": 0.5087573841848896, "grad_norm": 0.5457268953323364, "learning_rate": 2.0406140166527247e-05, "loss": 0.189, "step": 4909 }, { "epoch": 0.508861021867551, "grad_norm": 0.5438562631607056, "learning_rate": 2.039942800608537e-05, "loss": 0.196, "step": 4910 }, { "epoch": 0.5089646595502124, "grad_norm": 0.5521365404129028, "learning_rate": 2.039271580063656e-05, "loss": 0.2085, "step": 4911 }, { "epoch": 0.5090682972328738, "grad_norm": 0.6124582290649414, "learning_rate": 2.0386003550937147e-05, "loss": 0.2398, "step": 4912 }, { "epoch": 0.5091719349155353, "grad_norm": 0.49621763825416565, "learning_rate": 2.037929125774345e-05, "loss": 0.1739, "step": 4913 }, { "epoch": 0.5092755725981967, "grad_norm": 0.48987457156181335, "learning_rate": 2.037257892181181e-05, "loss": 0.1832, "step": 4914 }, { "epoch": 0.5093792102808581, "grad_norm": 0.5281900763511658, "learning_rate": 2.0365866543898556e-05, "loss": 0.1738, "step": 4915 }, { "epoch": 0.5094828479635195, "grad_norm": 0.4487219452857971, "learning_rate": 2.0359154124760022e-05, "loss": 0.1562, "step": 4916 }, { "epoch": 0.5095864856461809, "grad_norm": 0.5459572076797485, "learning_rate": 2.035244166515256e-05, "loss": 0.221, "step": 4917 }, { "epoch": 0.5096901233288423, "grad_norm": 0.590134859085083, "learning_rate": 2.0345729165832527e-05, "loss": 0.2269, "step": 4918 }, { "epoch": 0.5097937610115038, "grad_norm": 0.5557520389556885, "learning_rate": 2.033901662755626e-05, "loss": 0.1803, "step": 4919 }, { "epoch": 0.5098973986941652, "grad_norm": 0.5433508157730103, "learning_rate": 2.0332304051080135e-05, "loss": 0.2154, "step": 4920 }, { "epoch": 0.5100010363768266, "grad_norm": 0.6076089143753052, "learning_rate": 2.03255914371605e-05, "loss": 0.2399, "step": 4921 }, { "epoch": 0.510104674059488, "grad_norm": 0.6198269128799438, "learning_rate": 2.0318878786553727e-05, "loss": 0.2298, "step": 4922 }, { "epoch": 0.5102083117421494, "grad_norm": 0.5292469263076782, "learning_rate": 2.0312166100016192e-05, "loss": 0.1892, "step": 4923 }, { "epoch": 0.5103119494248108, "grad_norm": 0.6244973540306091, "learning_rate": 2.030545337830427e-05, "loss": 0.2101, "step": 4924 }, { "epoch": 0.5104155871074723, "grad_norm": 0.5299911499023438, "learning_rate": 2.0298740622174328e-05, "loss": 0.2013, "step": 4925 }, { "epoch": 0.5105192247901337, "grad_norm": 0.4421478509902954, "learning_rate": 2.029202783238276e-05, "loss": 0.1414, "step": 4926 }, { "epoch": 0.5106228624727951, "grad_norm": 0.43288445472717285, "learning_rate": 2.0285315009685952e-05, "loss": 0.1628, "step": 4927 }, { "epoch": 0.5107265001554565, "grad_norm": 0.6298075914382935, "learning_rate": 2.027860215484029e-05, "loss": 0.2313, "step": 4928 }, { "epoch": 0.5108301378381179, "grad_norm": 0.4587681293487549, "learning_rate": 2.027188926860217e-05, "loss": 0.1725, "step": 4929 }, { "epoch": 0.5109337755207793, "grad_norm": 0.4807582497596741, "learning_rate": 2.026517635172799e-05, "loss": 0.1722, "step": 4930 }, { "epoch": 0.5110374132034408, "grad_norm": 0.591253399848938, "learning_rate": 2.0258463404974155e-05, "loss": 0.2413, "step": 4931 }, { "epoch": 0.5111410508861022, "grad_norm": 0.557607889175415, "learning_rate": 2.0251750429097057e-05, "loss": 0.1867, "step": 4932 }, { "epoch": 0.5112446885687636, "grad_norm": 0.520668089389801, "learning_rate": 2.0245037424853117e-05, "loss": 0.2009, "step": 4933 }, { "epoch": 0.511348326251425, "grad_norm": 0.5009155869483948, "learning_rate": 2.0238324392998745e-05, "loss": 0.2092, "step": 4934 }, { "epoch": 0.5114519639340864, "grad_norm": 0.5666335225105286, "learning_rate": 2.0231611334290343e-05, "loss": 0.2012, "step": 4935 }, { "epoch": 0.5115556016167478, "grad_norm": 0.519422173500061, "learning_rate": 2.022489824948434e-05, "loss": 0.1971, "step": 4936 }, { "epoch": 0.5116592392994093, "grad_norm": 0.5896724462509155, "learning_rate": 2.0218185139337155e-05, "loss": 0.2373, "step": 4937 }, { "epoch": 0.5117628769820707, "grad_norm": 0.5998659133911133, "learning_rate": 2.02114720046052e-05, "loss": 0.2382, "step": 4938 }, { "epoch": 0.5118665146647321, "grad_norm": 0.49710676074028015, "learning_rate": 2.0204758846044912e-05, "loss": 0.1944, "step": 4939 }, { "epoch": 0.5119701523473935, "grad_norm": 0.5756406784057617, "learning_rate": 2.0198045664412717e-05, "loss": 0.2207, "step": 4940 }, { "epoch": 0.5120737900300549, "grad_norm": 0.4823175072669983, "learning_rate": 2.0191332460465042e-05, "loss": 0.1776, "step": 4941 }, { "epoch": 0.5121774277127163, "grad_norm": 0.5221631526947021, "learning_rate": 2.0184619234958322e-05, "loss": 0.2037, "step": 4942 }, { "epoch": 0.5122810653953778, "grad_norm": 0.5067155361175537, "learning_rate": 2.0177905988649e-05, "loss": 0.1981, "step": 4943 }, { "epoch": 0.5123847030780392, "grad_norm": 0.6370543837547302, "learning_rate": 2.0171192722293504e-05, "loss": 0.2487, "step": 4944 }, { "epoch": 0.5124883407607006, "grad_norm": 0.5774009823799133, "learning_rate": 2.0164479436648272e-05, "loss": 0.2187, "step": 4945 }, { "epoch": 0.512591978443362, "grad_norm": 0.5051264762878418, "learning_rate": 2.0157766132469762e-05, "loss": 0.1969, "step": 4946 }, { "epoch": 0.5126956161260234, "grad_norm": 0.5043640732765198, "learning_rate": 2.0151052810514405e-05, "loss": 0.176, "step": 4947 }, { "epoch": 0.5127992538086849, "grad_norm": 0.581299901008606, "learning_rate": 2.014433947153865e-05, "loss": 0.2074, "step": 4948 }, { "epoch": 0.5129028914913463, "grad_norm": 0.5512892603874207, "learning_rate": 2.013762611629895e-05, "loss": 0.2097, "step": 4949 }, { "epoch": 0.5130065291740077, "grad_norm": 0.5397634506225586, "learning_rate": 2.0130912745551753e-05, "loss": 0.21, "step": 4950 }, { "epoch": 0.5131101668566691, "grad_norm": 0.5209311842918396, "learning_rate": 2.0124199360053516e-05, "loss": 0.2005, "step": 4951 }, { "epoch": 0.5132138045393305, "grad_norm": 0.5762450098991394, "learning_rate": 2.0117485960560682e-05, "loss": 0.2271, "step": 4952 }, { "epoch": 0.5133174422219919, "grad_norm": 0.503059446811676, "learning_rate": 2.011077254782972e-05, "loss": 0.1871, "step": 4953 }, { "epoch": 0.5134210799046534, "grad_norm": 0.42786842584609985, "learning_rate": 2.0104059122617073e-05, "loss": 0.1904, "step": 4954 }, { "epoch": 0.5135247175873148, "grad_norm": 0.4788164496421814, "learning_rate": 2.009734568567921e-05, "loss": 0.192, "step": 4955 }, { "epoch": 0.5136283552699762, "grad_norm": 0.4950418174266815, "learning_rate": 2.009063223777258e-05, "loss": 0.2189, "step": 4956 }, { "epoch": 0.5137319929526376, "grad_norm": 0.5276430249214172, "learning_rate": 2.0083918779653653e-05, "loss": 0.2255, "step": 4957 }, { "epoch": 0.513835630635299, "grad_norm": 0.4317634403705597, "learning_rate": 2.007720531207889e-05, "loss": 0.1522, "step": 4958 }, { "epoch": 0.5139392683179604, "grad_norm": 0.5495805740356445, "learning_rate": 2.0070491835804752e-05, "loss": 0.2207, "step": 4959 }, { "epoch": 0.5140429060006219, "grad_norm": 0.5706672668457031, "learning_rate": 2.00637783515877e-05, "loss": 0.2179, "step": 4960 }, { "epoch": 0.5141465436832833, "grad_norm": 0.5287724137306213, "learning_rate": 2.0057064860184207e-05, "loss": 0.1862, "step": 4961 }, { "epoch": 0.5142501813659447, "grad_norm": 0.5140873193740845, "learning_rate": 2.0050351362350737e-05, "loss": 0.2104, "step": 4962 }, { "epoch": 0.5143538190486061, "grad_norm": 0.5113810896873474, "learning_rate": 2.0043637858843748e-05, "loss": 0.208, "step": 4963 }, { "epoch": 0.5144574567312675, "grad_norm": 0.5623809099197388, "learning_rate": 2.0036924350419716e-05, "loss": 0.2047, "step": 4964 }, { "epoch": 0.5145610944139289, "grad_norm": 0.5720166563987732, "learning_rate": 2.0030210837835105e-05, "loss": 0.2262, "step": 4965 }, { "epoch": 0.5146647320965904, "grad_norm": 0.5529698133468628, "learning_rate": 2.002349732184638e-05, "loss": 0.1992, "step": 4966 }, { "epoch": 0.5147683697792518, "grad_norm": 0.5903406143188477, "learning_rate": 2.0016783803210015e-05, "loss": 0.2305, "step": 4967 }, { "epoch": 0.5148720074619132, "grad_norm": 0.5520504117012024, "learning_rate": 2.001007028268248e-05, "loss": 0.2056, "step": 4968 }, { "epoch": 0.5149756451445746, "grad_norm": 0.5906986594200134, "learning_rate": 2.000335676102024e-05, "loss": 0.2187, "step": 4969 }, { "epoch": 0.515079282827236, "grad_norm": 0.4421522319316864, "learning_rate": 1.9996643238979763e-05, "loss": 0.1622, "step": 4970 }, { "epoch": 0.5151829205098974, "grad_norm": 0.5289708375930786, "learning_rate": 1.9989929717317528e-05, "loss": 0.2081, "step": 4971 }, { "epoch": 0.5152865581925589, "grad_norm": 0.5530785322189331, "learning_rate": 1.9983216196789988e-05, "loss": 0.2322, "step": 4972 }, { "epoch": 0.5153901958752203, "grad_norm": 0.5963031649589539, "learning_rate": 1.9976502678153622e-05, "loss": 0.2172, "step": 4973 }, { "epoch": 0.5154938335578817, "grad_norm": 0.5122589468955994, "learning_rate": 1.9969789162164905e-05, "loss": 0.2029, "step": 4974 }, { "epoch": 0.5155974712405431, "grad_norm": 0.5265605449676514, "learning_rate": 1.996307564958029e-05, "loss": 0.1863, "step": 4975 }, { "epoch": 0.5157011089232045, "grad_norm": 0.5558157563209534, "learning_rate": 1.9956362141156262e-05, "loss": 0.1968, "step": 4976 }, { "epoch": 0.5158047466058658, "grad_norm": 0.43986696004867554, "learning_rate": 1.994964863764927e-05, "loss": 0.1801, "step": 4977 }, { "epoch": 0.5159083842885273, "grad_norm": 0.45596033334732056, "learning_rate": 1.9942935139815796e-05, "loss": 0.1739, "step": 4978 }, { "epoch": 0.5160120219711887, "grad_norm": 0.621364951133728, "learning_rate": 1.9936221648412305e-05, "loss": 0.2441, "step": 4979 }, { "epoch": 0.5161156596538501, "grad_norm": 0.49341389536857605, "learning_rate": 1.992950816419525e-05, "loss": 0.1733, "step": 4980 }, { "epoch": 0.5162192973365115, "grad_norm": 0.4995219111442566, "learning_rate": 1.9922794687921117e-05, "loss": 0.1743, "step": 4981 }, { "epoch": 0.5163229350191729, "grad_norm": 0.5646112561225891, "learning_rate": 1.9916081220346353e-05, "loss": 0.2044, "step": 4982 }, { "epoch": 0.5164265727018343, "grad_norm": 0.5320212841033936, "learning_rate": 1.990936776222742e-05, "loss": 0.2062, "step": 4983 }, { "epoch": 0.5165302103844958, "grad_norm": 0.5387780666351318, "learning_rate": 1.99026543143208e-05, "loss": 0.2139, "step": 4984 }, { "epoch": 0.5166338480671572, "grad_norm": 0.5501453280448914, "learning_rate": 1.9895940877382934e-05, "loss": 0.215, "step": 4985 }, { "epoch": 0.5167374857498186, "grad_norm": 0.5094042420387268, "learning_rate": 1.9889227452170294e-05, "loss": 0.2023, "step": 4986 }, { "epoch": 0.51684112343248, "grad_norm": 0.5256874561309814, "learning_rate": 1.9882514039439324e-05, "loss": 0.2131, "step": 4987 }, { "epoch": 0.5169447611151414, "grad_norm": 0.5167512893676758, "learning_rate": 1.9875800639946487e-05, "loss": 0.2212, "step": 4988 }, { "epoch": 0.5170483987978028, "grad_norm": 0.4585399925708771, "learning_rate": 1.986908725444825e-05, "loss": 0.1731, "step": 4989 }, { "epoch": 0.5171520364804643, "grad_norm": 0.5621468424797058, "learning_rate": 1.9862373883701055e-05, "loss": 0.218, "step": 4990 }, { "epoch": 0.5172556741631257, "grad_norm": 0.5321661233901978, "learning_rate": 1.9855660528461356e-05, "loss": 0.1796, "step": 4991 }, { "epoch": 0.5173593118457871, "grad_norm": 0.4230225682258606, "learning_rate": 1.98489471894856e-05, "loss": 0.1497, "step": 4992 }, { "epoch": 0.5174629495284485, "grad_norm": 0.4817984402179718, "learning_rate": 1.984223386753024e-05, "loss": 0.1789, "step": 4993 }, { "epoch": 0.5175665872111099, "grad_norm": 0.47356268763542175, "learning_rate": 1.9835520563351735e-05, "loss": 0.1957, "step": 4994 }, { "epoch": 0.5176702248937713, "grad_norm": 0.5884582996368408, "learning_rate": 1.9828807277706502e-05, "loss": 0.2165, "step": 4995 }, { "epoch": 0.5177738625764328, "grad_norm": 0.545630156993866, "learning_rate": 1.982209401135101e-05, "loss": 0.1977, "step": 4996 }, { "epoch": 0.5178775002590942, "grad_norm": 0.5935811996459961, "learning_rate": 1.981538076504168e-05, "loss": 0.2542, "step": 4997 }, { "epoch": 0.5179811379417556, "grad_norm": 0.49267682433128357, "learning_rate": 1.980866753953496e-05, "loss": 0.2011, "step": 4998 }, { "epoch": 0.518084775624417, "grad_norm": 0.5562503337860107, "learning_rate": 1.980195433558729e-05, "loss": 0.2326, "step": 4999 }, { "epoch": 0.5181884133070784, "grad_norm": 0.5674566626548767, "learning_rate": 1.9795241153955094e-05, "loss": 0.215, "step": 5000 }, { "epoch": 0.5182920509897398, "grad_norm": 0.5795115232467651, "learning_rate": 1.978852799539481e-05, "loss": 0.226, "step": 5001 }, { "epoch": 0.5183956886724013, "grad_norm": 0.44766589999198914, "learning_rate": 1.9781814860662855e-05, "loss": 0.1581, "step": 5002 }, { "epoch": 0.5184993263550627, "grad_norm": 0.4725109338760376, "learning_rate": 1.9775101750515663e-05, "loss": 0.1696, "step": 5003 }, { "epoch": 0.5186029640377241, "grad_norm": 0.5291849374771118, "learning_rate": 1.9768388665709664e-05, "loss": 0.1988, "step": 5004 }, { "epoch": 0.5187066017203855, "grad_norm": 0.5688635110855103, "learning_rate": 1.976167560700126e-05, "loss": 0.2089, "step": 5005 }, { "epoch": 0.5188102394030469, "grad_norm": 0.4801862835884094, "learning_rate": 1.9754962575146887e-05, "loss": 0.1639, "step": 5006 }, { "epoch": 0.5189138770857084, "grad_norm": 0.45741963386535645, "learning_rate": 1.974824957090295e-05, "loss": 0.1668, "step": 5007 }, { "epoch": 0.5190175147683698, "grad_norm": 0.5641513466835022, "learning_rate": 1.974153659502585e-05, "loss": 0.2191, "step": 5008 }, { "epoch": 0.5191211524510312, "grad_norm": 0.5678064823150635, "learning_rate": 1.9734823648272013e-05, "loss": 0.2029, "step": 5009 }, { "epoch": 0.5192247901336926, "grad_norm": 0.5019567608833313, "learning_rate": 1.9728110731397835e-05, "loss": 0.2027, "step": 5010 }, { "epoch": 0.519328427816354, "grad_norm": 0.536777675151825, "learning_rate": 1.972139784515972e-05, "loss": 0.209, "step": 5011 }, { "epoch": 0.5194320654990154, "grad_norm": 0.5042787194252014, "learning_rate": 1.971468499031405e-05, "loss": 0.1956, "step": 5012 }, { "epoch": 0.5195357031816769, "grad_norm": 0.5100334286689758, "learning_rate": 1.970797216761724e-05, "loss": 0.2024, "step": 5013 }, { "epoch": 0.5196393408643383, "grad_norm": 0.5792742967605591, "learning_rate": 1.970125937782568e-05, "loss": 0.2186, "step": 5014 }, { "epoch": 0.5197429785469997, "grad_norm": 0.5526664853096008, "learning_rate": 1.9694546621695737e-05, "loss": 0.2045, "step": 5015 }, { "epoch": 0.5198466162296611, "grad_norm": 0.48117056488990784, "learning_rate": 1.9687833899983818e-05, "loss": 0.1734, "step": 5016 }, { "epoch": 0.5199502539123225, "grad_norm": 0.5357035994529724, "learning_rate": 1.9681121213446276e-05, "loss": 0.1905, "step": 5017 }, { "epoch": 0.5200538915949839, "grad_norm": 0.636917769908905, "learning_rate": 1.96744085628395e-05, "loss": 0.2659, "step": 5018 }, { "epoch": 0.5201575292776454, "grad_norm": 0.5372515916824341, "learning_rate": 1.9667695948919875e-05, "loss": 0.2142, "step": 5019 }, { "epoch": 0.5202611669603068, "grad_norm": 0.5247703194618225, "learning_rate": 1.966098337244374e-05, "loss": 0.1983, "step": 5020 }, { "epoch": 0.5203648046429682, "grad_norm": 0.663495659828186, "learning_rate": 1.9654270834167483e-05, "loss": 0.2582, "step": 5021 }, { "epoch": 0.5204684423256296, "grad_norm": 0.5289007425308228, "learning_rate": 1.9647558334847445e-05, "loss": 0.192, "step": 5022 }, { "epoch": 0.520572080008291, "grad_norm": 0.4809255003929138, "learning_rate": 1.964084587523998e-05, "loss": 0.1857, "step": 5023 }, { "epoch": 0.5206757176909524, "grad_norm": 0.5267800688743591, "learning_rate": 1.9634133456101458e-05, "loss": 0.2173, "step": 5024 }, { "epoch": 0.5207793553736139, "grad_norm": 0.5737069249153137, "learning_rate": 1.9627421078188197e-05, "loss": 0.2565, "step": 5025 }, { "epoch": 0.5208829930562753, "grad_norm": 0.6062821745872498, "learning_rate": 1.962070874225656e-05, "loss": 0.2093, "step": 5026 }, { "epoch": 0.5209866307389367, "grad_norm": 0.5270495414733887, "learning_rate": 1.961399644906286e-05, "loss": 0.2017, "step": 5027 }, { "epoch": 0.5210902684215981, "grad_norm": 0.5432182550430298, "learning_rate": 1.9607284199363445e-05, "loss": 0.2218, "step": 5028 }, { "epoch": 0.5211939061042595, "grad_norm": 0.7014081478118896, "learning_rate": 1.960057199391464e-05, "loss": 0.2633, "step": 5029 }, { "epoch": 0.521297543786921, "grad_norm": 0.5988870859146118, "learning_rate": 1.9593859833472756e-05, "loss": 0.1928, "step": 5030 }, { "epoch": 0.5214011814695824, "grad_norm": 0.5661015510559082, "learning_rate": 1.958714771879412e-05, "loss": 0.2194, "step": 5031 }, { "epoch": 0.5215048191522438, "grad_norm": 0.5360023975372314, "learning_rate": 1.958043565063504e-05, "loss": 0.2002, "step": 5032 }, { "epoch": 0.5216084568349052, "grad_norm": 0.5409901738166809, "learning_rate": 1.9573723629751803e-05, "loss": 0.2131, "step": 5033 }, { "epoch": 0.5217120945175666, "grad_norm": 0.5607601404190063, "learning_rate": 1.956701165690074e-05, "loss": 0.2015, "step": 5034 }, { "epoch": 0.521815732200228, "grad_norm": 0.529923677444458, "learning_rate": 1.9560299732838127e-05, "loss": 0.1902, "step": 5035 }, { "epoch": 0.5219193698828895, "grad_norm": 0.6019048094749451, "learning_rate": 1.9553587858320257e-05, "loss": 0.2162, "step": 5036 }, { "epoch": 0.5220230075655509, "grad_norm": 0.5528945922851562, "learning_rate": 1.954687603410341e-05, "loss": 0.243, "step": 5037 }, { "epoch": 0.5221266452482123, "grad_norm": 0.5756701827049255, "learning_rate": 1.9540164260943866e-05, "loss": 0.1943, "step": 5038 }, { "epoch": 0.5222302829308737, "grad_norm": 0.5087042450904846, "learning_rate": 1.9533452539597905e-05, "loss": 0.2166, "step": 5039 }, { "epoch": 0.5223339206135351, "grad_norm": 0.5744715929031372, "learning_rate": 1.9526740870821776e-05, "loss": 0.2086, "step": 5040 }, { "epoch": 0.5224375582961965, "grad_norm": 0.5722135305404663, "learning_rate": 1.952002925537176e-05, "loss": 0.2113, "step": 5041 }, { "epoch": 0.522541195978858, "grad_norm": 0.5168009400367737, "learning_rate": 1.9513317694004097e-05, "loss": 0.1892, "step": 5042 }, { "epoch": 0.5226448336615194, "grad_norm": 0.5232700109481812, "learning_rate": 1.9506606187475036e-05, "loss": 0.1743, "step": 5043 }, { "epoch": 0.5227484713441808, "grad_norm": 0.6535675525665283, "learning_rate": 1.9499894736540833e-05, "loss": 0.2438, "step": 5044 }, { "epoch": 0.5228521090268422, "grad_norm": 0.6077508926391602, "learning_rate": 1.9493183341957706e-05, "loss": 0.1878, "step": 5045 }, { "epoch": 0.5229557467095036, "grad_norm": 0.5761801600456238, "learning_rate": 1.94864720044819e-05, "loss": 0.1892, "step": 5046 }, { "epoch": 0.523059384392165, "grad_norm": 0.5046095848083496, "learning_rate": 1.9479760724869617e-05, "loss": 0.1862, "step": 5047 }, { "epoch": 0.5231630220748265, "grad_norm": 0.514988362789154, "learning_rate": 1.9473049503877094e-05, "loss": 0.1951, "step": 5048 }, { "epoch": 0.5232666597574879, "grad_norm": 0.5308780074119568, "learning_rate": 1.946633834226054e-05, "loss": 0.2073, "step": 5049 }, { "epoch": 0.5233702974401493, "grad_norm": 0.5171186327934265, "learning_rate": 1.9459627240776142e-05, "loss": 0.1954, "step": 5050 }, { "epoch": 0.5234739351228107, "grad_norm": 0.5829634070396423, "learning_rate": 1.9452916200180115e-05, "loss": 0.209, "step": 5051 }, { "epoch": 0.5235775728054721, "grad_norm": 0.585152804851532, "learning_rate": 1.9446205221228628e-05, "loss": 0.2127, "step": 5052 }, { "epoch": 0.5236812104881334, "grad_norm": 0.499165415763855, "learning_rate": 1.9439494304677883e-05, "loss": 0.1915, "step": 5053 }, { "epoch": 0.5237848481707948, "grad_norm": 0.5298867225646973, "learning_rate": 1.9432783451284055e-05, "loss": 0.1859, "step": 5054 }, { "epoch": 0.5238884858534563, "grad_norm": 0.4951799213886261, "learning_rate": 1.942607266180329e-05, "loss": 0.1751, "step": 5055 }, { "epoch": 0.5239921235361177, "grad_norm": 0.6185844540596008, "learning_rate": 1.9419361936991782e-05, "loss": 0.2251, "step": 5056 }, { "epoch": 0.5240957612187791, "grad_norm": 0.5453723669052124, "learning_rate": 1.941265127760566e-05, "loss": 0.2042, "step": 5057 }, { "epoch": 0.5241993989014405, "grad_norm": 0.5065323710441589, "learning_rate": 1.9405940684401076e-05, "loss": 0.1834, "step": 5058 }, { "epoch": 0.5243030365841019, "grad_norm": 0.6081438660621643, "learning_rate": 1.9399230158134178e-05, "loss": 0.2117, "step": 5059 }, { "epoch": 0.5244066742667634, "grad_norm": 0.547488272190094, "learning_rate": 1.9392519699561092e-05, "loss": 0.2205, "step": 5060 }, { "epoch": 0.5245103119494248, "grad_norm": 0.4661538600921631, "learning_rate": 1.9385809309437945e-05, "loss": 0.1676, "step": 5061 }, { "epoch": 0.5246139496320862, "grad_norm": 0.5201432108879089, "learning_rate": 1.937909898852084e-05, "loss": 0.1842, "step": 5062 }, { "epoch": 0.5247175873147476, "grad_norm": 0.5604529976844788, "learning_rate": 1.93723887375659e-05, "loss": 0.1932, "step": 5063 }, { "epoch": 0.524821224997409, "grad_norm": 0.5652872920036316, "learning_rate": 1.9365678557329227e-05, "loss": 0.2091, "step": 5064 }, { "epoch": 0.5249248626800704, "grad_norm": 0.5462526082992554, "learning_rate": 1.93589684485669e-05, "loss": 0.193, "step": 5065 }, { "epoch": 0.5250285003627319, "grad_norm": 0.576276421546936, "learning_rate": 1.9352258412035016e-05, "loss": 0.2424, "step": 5066 }, { "epoch": 0.5251321380453933, "grad_norm": 0.5367346405982971, "learning_rate": 1.9345548448489645e-05, "loss": 0.1786, "step": 5067 }, { "epoch": 0.5252357757280547, "grad_norm": 0.6185262203216553, "learning_rate": 1.9338838558686858e-05, "loss": 0.2227, "step": 5068 }, { "epoch": 0.5253394134107161, "grad_norm": 0.5650673508644104, "learning_rate": 1.9332128743382715e-05, "loss": 0.2209, "step": 5069 }, { "epoch": 0.5254430510933775, "grad_norm": 0.5670083165168762, "learning_rate": 1.932541900333327e-05, "loss": 0.2333, "step": 5070 }, { "epoch": 0.5255466887760389, "grad_norm": 0.5780977010726929, "learning_rate": 1.9318709339294554e-05, "loss": 0.2025, "step": 5071 }, { "epoch": 0.5256503264587004, "grad_norm": 0.6275104284286499, "learning_rate": 1.931199975202262e-05, "loss": 0.2034, "step": 5072 }, { "epoch": 0.5257539641413618, "grad_norm": 0.5302552580833435, "learning_rate": 1.9305290242273482e-05, "loss": 0.1911, "step": 5073 }, { "epoch": 0.5258576018240232, "grad_norm": 0.6476036310195923, "learning_rate": 1.9298580810803163e-05, "loss": 0.2388, "step": 5074 }, { "epoch": 0.5259612395066846, "grad_norm": 0.551716685295105, "learning_rate": 1.929187145836766e-05, "loss": 0.2417, "step": 5075 }, { "epoch": 0.526064877189346, "grad_norm": 0.5433430671691895, "learning_rate": 1.928516218572298e-05, "loss": 0.2182, "step": 5076 }, { "epoch": 0.5261685148720074, "grad_norm": 0.5775318145751953, "learning_rate": 1.927845299362512e-05, "loss": 0.2168, "step": 5077 }, { "epoch": 0.5262721525546689, "grad_norm": 0.48974794149398804, "learning_rate": 1.9271743882830052e-05, "loss": 0.1999, "step": 5078 }, { "epoch": 0.5263757902373303, "grad_norm": 0.6543878316879272, "learning_rate": 1.926503485409376e-05, "loss": 0.2193, "step": 5079 }, { "epoch": 0.5264794279199917, "grad_norm": 0.5222949385643005, "learning_rate": 1.9258325908172185e-05, "loss": 0.2047, "step": 5080 }, { "epoch": 0.5265830656026531, "grad_norm": 0.5059442520141602, "learning_rate": 1.9251617045821295e-05, "loss": 0.2218, "step": 5081 }, { "epoch": 0.5266867032853145, "grad_norm": 0.5379745364189148, "learning_rate": 1.9244908267797043e-05, "loss": 0.2186, "step": 5082 }, { "epoch": 0.526790340967976, "grad_norm": 0.5693245530128479, "learning_rate": 1.9238199574855344e-05, "loss": 0.2163, "step": 5083 }, { "epoch": 0.5268939786506374, "grad_norm": 0.44062313437461853, "learning_rate": 1.9231490967752137e-05, "loss": 0.1705, "step": 5084 }, { "epoch": 0.5269976163332988, "grad_norm": 0.5416108965873718, "learning_rate": 1.9224782447243334e-05, "loss": 0.2132, "step": 5085 }, { "epoch": 0.5271012540159602, "grad_norm": 0.5123291015625, "learning_rate": 1.921807401408483e-05, "loss": 0.2008, "step": 5086 }, { "epoch": 0.5272048916986216, "grad_norm": 0.5766613483428955, "learning_rate": 1.9211365669032546e-05, "loss": 0.2038, "step": 5087 }, { "epoch": 0.527308529381283, "grad_norm": 0.5707951188087463, "learning_rate": 1.9204657412842342e-05, "loss": 0.2128, "step": 5088 }, { "epoch": 0.5274121670639444, "grad_norm": 0.5377147197723389, "learning_rate": 1.9197949246270112e-05, "loss": 0.1951, "step": 5089 }, { "epoch": 0.5275158047466059, "grad_norm": 0.5024455189704895, "learning_rate": 1.9191241170071702e-05, "loss": 0.1794, "step": 5090 }, { "epoch": 0.5276194424292673, "grad_norm": 0.5326787233352661, "learning_rate": 1.9184533185002986e-05, "loss": 0.1962, "step": 5091 }, { "epoch": 0.5277230801119287, "grad_norm": 0.5894343852996826, "learning_rate": 1.917782529181981e-05, "loss": 0.2277, "step": 5092 }, { "epoch": 0.5278267177945901, "grad_norm": 0.5485954284667969, "learning_rate": 1.917111749127799e-05, "loss": 0.1965, "step": 5093 }, { "epoch": 0.5279303554772515, "grad_norm": 0.534939706325531, "learning_rate": 1.9164409784133372e-05, "loss": 0.211, "step": 5094 }, { "epoch": 0.528033993159913, "grad_norm": 0.5456961989402771, "learning_rate": 1.9157702171141757e-05, "loss": 0.1976, "step": 5095 }, { "epoch": 0.5281376308425744, "grad_norm": 0.5366075038909912, "learning_rate": 1.9150994653058947e-05, "loss": 0.2125, "step": 5096 }, { "epoch": 0.5282412685252358, "grad_norm": 0.5909968018531799, "learning_rate": 1.914428723064075e-05, "loss": 0.2123, "step": 5097 }, { "epoch": 0.5283449062078972, "grad_norm": 0.64056795835495, "learning_rate": 1.913757990464293e-05, "loss": 0.2349, "step": 5098 }, { "epoch": 0.5284485438905586, "grad_norm": 0.5149229168891907, "learning_rate": 1.9130872675821273e-05, "loss": 0.1872, "step": 5099 }, { "epoch": 0.52855218157322, "grad_norm": 0.5525230765342712, "learning_rate": 1.912416554493152e-05, "loss": 0.2375, "step": 5100 }, { "epoch": 0.5286558192558815, "grad_norm": 0.5269282460212708, "learning_rate": 1.9117458512729443e-05, "loss": 0.1978, "step": 5101 }, { "epoch": 0.5287594569385429, "grad_norm": 0.5171878337860107, "learning_rate": 1.9110751579970767e-05, "loss": 0.1845, "step": 5102 }, { "epoch": 0.5288630946212043, "grad_norm": 0.5259308815002441, "learning_rate": 1.9104044747411213e-05, "loss": 0.1994, "step": 5103 }, { "epoch": 0.5289667323038657, "grad_norm": 0.49962759017944336, "learning_rate": 1.9097338015806514e-05, "loss": 0.1928, "step": 5104 }, { "epoch": 0.5290703699865271, "grad_norm": 0.5970175862312317, "learning_rate": 1.9090631385912356e-05, "loss": 0.2264, "step": 5105 }, { "epoch": 0.5291740076691885, "grad_norm": 0.5184756517410278, "learning_rate": 1.908392485848444e-05, "loss": 0.181, "step": 5106 }, { "epoch": 0.52927764535185, "grad_norm": 0.5002952814102173, "learning_rate": 1.9077218434278454e-05, "loss": 0.2004, "step": 5107 }, { "epoch": 0.5293812830345114, "grad_norm": 0.5318555235862732, "learning_rate": 1.9070512114050055e-05, "loss": 0.2001, "step": 5108 }, { "epoch": 0.5294849207171728, "grad_norm": 0.6008946299552917, "learning_rate": 1.906380589855491e-05, "loss": 0.2246, "step": 5109 }, { "epoch": 0.5295885583998342, "grad_norm": 0.6363059282302856, "learning_rate": 1.9057099788548658e-05, "loss": 0.2458, "step": 5110 }, { "epoch": 0.5296921960824956, "grad_norm": 0.5909748673439026, "learning_rate": 1.9050393784786933e-05, "loss": 0.2314, "step": 5111 }, { "epoch": 0.529795833765157, "grad_norm": 0.6305270791053772, "learning_rate": 1.9043687888025367e-05, "loss": 0.2594, "step": 5112 }, { "epoch": 0.5298994714478185, "grad_norm": 0.6209995746612549, "learning_rate": 1.903698209901956e-05, "loss": 0.2149, "step": 5113 }, { "epoch": 0.5300031091304799, "grad_norm": 0.6202288269996643, "learning_rate": 1.903027641852512e-05, "loss": 0.2201, "step": 5114 }, { "epoch": 0.5301067468131413, "grad_norm": 0.6097816824913025, "learning_rate": 1.902357084729761e-05, "loss": 0.209, "step": 5115 }, { "epoch": 0.5302103844958027, "grad_norm": 0.6084482073783875, "learning_rate": 1.901686538609263e-05, "loss": 0.2025, "step": 5116 }, { "epoch": 0.5303140221784641, "grad_norm": 0.6160656809806824, "learning_rate": 1.901016003566573e-05, "loss": 0.2042, "step": 5117 }, { "epoch": 0.5304176598611255, "grad_norm": 0.5739774107933044, "learning_rate": 1.900345479677245e-05, "loss": 0.203, "step": 5118 }, { "epoch": 0.530521297543787, "grad_norm": 0.5758394598960876, "learning_rate": 1.8996749670168342e-05, "loss": 0.221, "step": 5119 }, { "epoch": 0.5306249352264484, "grad_norm": 0.5090250372886658, "learning_rate": 1.8990044656608922e-05, "loss": 0.2106, "step": 5120 }, { "epoch": 0.5307285729091098, "grad_norm": 0.5946542024612427, "learning_rate": 1.8983339756849693e-05, "loss": 0.2208, "step": 5121 }, { "epoch": 0.5308322105917712, "grad_norm": 0.5539801120758057, "learning_rate": 1.8976634971646168e-05, "loss": 0.2032, "step": 5122 }, { "epoch": 0.5309358482744326, "grad_norm": 0.6142858266830444, "learning_rate": 1.8969930301753822e-05, "loss": 0.2313, "step": 5123 }, { "epoch": 0.531039485957094, "grad_norm": 0.5650430917739868, "learning_rate": 1.896322574792813e-05, "loss": 0.2256, "step": 5124 }, { "epoch": 0.5311431236397555, "grad_norm": 0.4985640347003937, "learning_rate": 1.895652131092454e-05, "loss": 0.1825, "step": 5125 }, { "epoch": 0.5312467613224169, "grad_norm": 0.5422653555870056, "learning_rate": 1.8949816991498512e-05, "loss": 0.201, "step": 5126 }, { "epoch": 0.5313503990050783, "grad_norm": 0.4722491204738617, "learning_rate": 1.894311279040548e-05, "loss": 0.18, "step": 5127 }, { "epoch": 0.5314540366877397, "grad_norm": 0.5422795414924622, "learning_rate": 1.8936408708400843e-05, "loss": 0.1689, "step": 5128 }, { "epoch": 0.531557674370401, "grad_norm": 0.5806071162223816, "learning_rate": 1.8929704746240028e-05, "loss": 0.2322, "step": 5129 }, { "epoch": 0.5316613120530624, "grad_norm": 0.5612425208091736, "learning_rate": 1.8923000904678413e-05, "loss": 0.2202, "step": 5130 }, { "epoch": 0.5317649497357239, "grad_norm": 0.6409311294555664, "learning_rate": 1.891629718447138e-05, "loss": 0.2545, "step": 5131 }, { "epoch": 0.5318685874183853, "grad_norm": 0.507507860660553, "learning_rate": 1.8909593586374306e-05, "loss": 0.1902, "step": 5132 }, { "epoch": 0.5319722251010467, "grad_norm": 0.5384594202041626, "learning_rate": 1.8902890111142524e-05, "loss": 0.2057, "step": 5133 }, { "epoch": 0.5320758627837081, "grad_norm": 0.5002520680427551, "learning_rate": 1.8896186759531386e-05, "loss": 0.1977, "step": 5134 }, { "epoch": 0.5321795004663695, "grad_norm": 0.5171283483505249, "learning_rate": 1.8889483532296198e-05, "loss": 0.1795, "step": 5135 }, { "epoch": 0.5322831381490309, "grad_norm": 0.4970911145210266, "learning_rate": 1.8882780430192283e-05, "loss": 0.1786, "step": 5136 }, { "epoch": 0.5323867758316924, "grad_norm": 0.5327664613723755, "learning_rate": 1.8876077453974936e-05, "loss": 0.2345, "step": 5137 }, { "epoch": 0.5324904135143538, "grad_norm": 0.5078004002571106, "learning_rate": 1.886937460439943e-05, "loss": 0.1826, "step": 5138 }, { "epoch": 0.5325940511970152, "grad_norm": 0.5058934092521667, "learning_rate": 1.886267188222104e-05, "loss": 0.2089, "step": 5139 }, { "epoch": 0.5326976888796766, "grad_norm": 0.5866641998291016, "learning_rate": 1.885596928819501e-05, "loss": 0.2104, "step": 5140 }, { "epoch": 0.532801326562338, "grad_norm": 0.5487980842590332, "learning_rate": 1.8849266823076578e-05, "loss": 0.2334, "step": 5141 }, { "epoch": 0.5329049642449994, "grad_norm": 0.6039403080940247, "learning_rate": 1.8842564487620987e-05, "loss": 0.248, "step": 5142 }, { "epoch": 0.5330086019276609, "grad_norm": 0.6887938380241394, "learning_rate": 1.8835862282583418e-05, "loss": 0.2647, "step": 5143 }, { "epoch": 0.5331122396103223, "grad_norm": 0.542961835861206, "learning_rate": 1.8829160208719082e-05, "loss": 0.2161, "step": 5144 }, { "epoch": 0.5332158772929837, "grad_norm": 0.5670207738876343, "learning_rate": 1.8822458266783154e-05, "loss": 0.1922, "step": 5145 }, { "epoch": 0.5333195149756451, "grad_norm": 0.5435192584991455, "learning_rate": 1.8815756457530786e-05, "loss": 0.194, "step": 5146 }, { "epoch": 0.5334231526583065, "grad_norm": 0.4792133569717407, "learning_rate": 1.8809054781717156e-05, "loss": 0.199, "step": 5147 }, { "epoch": 0.533526790340968, "grad_norm": 0.5294674634933472, "learning_rate": 1.8802353240097374e-05, "loss": 0.2228, "step": 5148 }, { "epoch": 0.5336304280236294, "grad_norm": 0.5679932832717896, "learning_rate": 1.8795651833426572e-05, "loss": 0.2322, "step": 5149 }, { "epoch": 0.5337340657062908, "grad_norm": 0.5157346725463867, "learning_rate": 1.878895056245984e-05, "loss": 0.1722, "step": 5150 }, { "epoch": 0.5338377033889522, "grad_norm": 0.5690714120864868, "learning_rate": 1.8782249427952282e-05, "loss": 0.2167, "step": 5151 }, { "epoch": 0.5339413410716136, "grad_norm": 0.5080891847610474, "learning_rate": 1.877554843065897e-05, "loss": 0.1814, "step": 5152 }, { "epoch": 0.534044978754275, "grad_norm": 0.5569474101066589, "learning_rate": 1.8768847571334945e-05, "loss": 0.2148, "step": 5153 }, { "epoch": 0.5341486164369365, "grad_norm": 0.5498877763748169, "learning_rate": 1.876214685073527e-05, "loss": 0.2354, "step": 5154 }, { "epoch": 0.5342522541195979, "grad_norm": 0.6328274011611938, "learning_rate": 1.8755446269614964e-05, "loss": 0.2635, "step": 5155 }, { "epoch": 0.5343558918022593, "grad_norm": 0.5649670362472534, "learning_rate": 1.874874582872903e-05, "loss": 0.2419, "step": 5156 }, { "epoch": 0.5344595294849207, "grad_norm": 0.6114292144775391, "learning_rate": 1.8742045528832482e-05, "loss": 0.2163, "step": 5157 }, { "epoch": 0.5345631671675821, "grad_norm": 0.47934606671333313, "learning_rate": 1.8735345370680283e-05, "loss": 0.183, "step": 5158 }, { "epoch": 0.5346668048502435, "grad_norm": 0.5587397217750549, "learning_rate": 1.8728645355027407e-05, "loss": 0.2233, "step": 5159 }, { "epoch": 0.534770442532905, "grad_norm": 0.556676983833313, "learning_rate": 1.8721945482628786e-05, "loss": 0.2177, "step": 5160 }, { "epoch": 0.5348740802155664, "grad_norm": 0.4846968352794647, "learning_rate": 1.8715245754239363e-05, "loss": 0.2065, "step": 5161 }, { "epoch": 0.5349777178982278, "grad_norm": 0.6382477283477783, "learning_rate": 1.870854617061406e-05, "loss": 0.2509, "step": 5162 }, { "epoch": 0.5350813555808892, "grad_norm": 0.5473085045814514, "learning_rate": 1.8701846732507757e-05, "loss": 0.2239, "step": 5163 }, { "epoch": 0.5351849932635506, "grad_norm": 0.5322921276092529, "learning_rate": 1.8695147440675354e-05, "loss": 0.1842, "step": 5164 }, { "epoch": 0.535288630946212, "grad_norm": 0.5844441652297974, "learning_rate": 1.8688448295871705e-05, "loss": 0.2158, "step": 5165 }, { "epoch": 0.5353922686288735, "grad_norm": 0.49183139204978943, "learning_rate": 1.868174929885166e-05, "loss": 0.1852, "step": 5166 }, { "epoch": 0.5354959063115349, "grad_norm": 0.659501850605011, "learning_rate": 1.867505045037006e-05, "loss": 0.2578, "step": 5167 }, { "epoch": 0.5355995439941963, "grad_norm": 0.5893154740333557, "learning_rate": 1.8668351751181715e-05, "loss": 0.2354, "step": 5168 }, { "epoch": 0.5357031816768577, "grad_norm": 0.5923011302947998, "learning_rate": 1.8661653202041427e-05, "loss": 0.2168, "step": 5169 }, { "epoch": 0.5358068193595191, "grad_norm": 0.5813918709754944, "learning_rate": 1.8654954803703967e-05, "loss": 0.2229, "step": 5170 }, { "epoch": 0.5359104570421805, "grad_norm": 0.5587177276611328, "learning_rate": 1.8648256556924114e-05, "loss": 0.2057, "step": 5171 }, { "epoch": 0.536014094724842, "grad_norm": 0.5316544771194458, "learning_rate": 1.8641558462456614e-05, "loss": 0.219, "step": 5172 }, { "epoch": 0.5361177324075034, "grad_norm": 0.47134703397750854, "learning_rate": 1.8634860521056187e-05, "loss": 0.1603, "step": 5173 }, { "epoch": 0.5362213700901648, "grad_norm": 0.5119736194610596, "learning_rate": 1.8628162733477567e-05, "loss": 0.2112, "step": 5174 }, { "epoch": 0.5363250077728262, "grad_norm": 0.5992642641067505, "learning_rate": 1.8621465100475426e-05, "loss": 0.2282, "step": 5175 }, { "epoch": 0.5364286454554876, "grad_norm": 0.5826268792152405, "learning_rate": 1.8614767622804457e-05, "loss": 0.2159, "step": 5176 }, { "epoch": 0.536532283138149, "grad_norm": 0.4886619448661804, "learning_rate": 1.8608070301219323e-05, "loss": 0.1942, "step": 5177 }, { "epoch": 0.5366359208208105, "grad_norm": 0.5100725293159485, "learning_rate": 1.8601373136474657e-05, "loss": 0.1664, "step": 5178 }, { "epoch": 0.5367395585034719, "grad_norm": 0.5713315606117249, "learning_rate": 1.85946761293251e-05, "loss": 0.2196, "step": 5179 }, { "epoch": 0.5368431961861333, "grad_norm": 0.5622671246528625, "learning_rate": 1.8587979280525245e-05, "loss": 0.1861, "step": 5180 }, { "epoch": 0.5369468338687947, "grad_norm": 0.5729225873947144, "learning_rate": 1.8581282590829687e-05, "loss": 0.178, "step": 5181 }, { "epoch": 0.5370504715514561, "grad_norm": 0.5069655179977417, "learning_rate": 1.857458606099301e-05, "loss": 0.1662, "step": 5182 }, { "epoch": 0.5371541092341175, "grad_norm": 0.5307016968727112, "learning_rate": 1.8567889691769757e-05, "loss": 0.1914, "step": 5183 }, { "epoch": 0.537257746916779, "grad_norm": 0.5231932997703552, "learning_rate": 1.856119348391447e-05, "loss": 0.2088, "step": 5184 }, { "epoch": 0.5373613845994404, "grad_norm": 0.49470946192741394, "learning_rate": 1.8554497438181655e-05, "loss": 0.1688, "step": 5185 }, { "epoch": 0.5374650222821018, "grad_norm": 0.5611957311630249, "learning_rate": 1.8547801555325827e-05, "loss": 0.2162, "step": 5186 }, { "epoch": 0.5375686599647632, "grad_norm": 0.5838019251823425, "learning_rate": 1.854110583610147e-05, "loss": 0.188, "step": 5187 }, { "epoch": 0.5376722976474246, "grad_norm": 0.5590904355049133, "learning_rate": 1.8534410281263027e-05, "loss": 0.2025, "step": 5188 }, { "epoch": 0.537775935330086, "grad_norm": 0.6141221523284912, "learning_rate": 1.8527714891564963e-05, "loss": 0.2027, "step": 5189 }, { "epoch": 0.5378795730127475, "grad_norm": 0.6489647030830383, "learning_rate": 1.8521019667761697e-05, "loss": 0.241, "step": 5190 }, { "epoch": 0.5379832106954089, "grad_norm": 0.5584114193916321, "learning_rate": 1.8514324610607626e-05, "loss": 0.2239, "step": 5191 }, { "epoch": 0.5380868483780703, "grad_norm": 0.5955229997634888, "learning_rate": 1.8507629720857156e-05, "loss": 0.2114, "step": 5192 }, { "epoch": 0.5381904860607317, "grad_norm": 0.5925705432891846, "learning_rate": 1.850093499926465e-05, "loss": 0.1965, "step": 5193 }, { "epoch": 0.5382941237433931, "grad_norm": 0.556430995464325, "learning_rate": 1.849424044658446e-05, "loss": 0.1813, "step": 5194 }, { "epoch": 0.5383977614260546, "grad_norm": 0.5698986053466797, "learning_rate": 1.8487546063570905e-05, "loss": 0.1943, "step": 5195 }, { "epoch": 0.538501399108716, "grad_norm": 0.5680100917816162, "learning_rate": 1.8480851850978315e-05, "loss": 0.201, "step": 5196 }, { "epoch": 0.5386050367913774, "grad_norm": 0.5762329697608948, "learning_rate": 1.847415780956098e-05, "loss": 0.2255, "step": 5197 }, { "epoch": 0.5387086744740388, "grad_norm": 0.6530503630638123, "learning_rate": 1.8467463940073165e-05, "loss": 0.2322, "step": 5198 }, { "epoch": 0.5388123121567002, "grad_norm": 0.5293810367584229, "learning_rate": 1.8460770243269135e-05, "loss": 0.1851, "step": 5199 }, { "epoch": 0.5389159498393616, "grad_norm": 0.6074128746986389, "learning_rate": 1.8454076719903122e-05, "loss": 0.2146, "step": 5200 }, { "epoch": 0.5390195875220231, "grad_norm": 0.602114737033844, "learning_rate": 1.8447383370729335e-05, "loss": 0.2105, "step": 5201 }, { "epoch": 0.5391232252046845, "grad_norm": 0.6234201788902283, "learning_rate": 1.844069019650198e-05, "loss": 0.2281, "step": 5202 }, { "epoch": 0.5392268628873459, "grad_norm": 0.6111294627189636, "learning_rate": 1.8433997197975234e-05, "loss": 0.2109, "step": 5203 }, { "epoch": 0.5393305005700073, "grad_norm": 0.5196408629417419, "learning_rate": 1.8427304375903247e-05, "loss": 0.2038, "step": 5204 }, { "epoch": 0.5394341382526686, "grad_norm": 0.4035626947879791, "learning_rate": 1.8420611731040155e-05, "loss": 0.1288, "step": 5205 }, { "epoch": 0.53953777593533, "grad_norm": 0.6713810563087463, "learning_rate": 1.8413919264140078e-05, "loss": 0.2453, "step": 5206 }, { "epoch": 0.5396414136179915, "grad_norm": 0.5893852114677429, "learning_rate": 1.8407226975957118e-05, "loss": 0.2025, "step": 5207 }, { "epoch": 0.5397450513006529, "grad_norm": 0.53298020362854, "learning_rate": 1.840053486724534e-05, "loss": 0.1801, "step": 5208 }, { "epoch": 0.5398486889833143, "grad_norm": 0.5477540493011475, "learning_rate": 1.8393842938758814e-05, "loss": 0.1831, "step": 5209 }, { "epoch": 0.5399523266659757, "grad_norm": 0.5426579117774963, "learning_rate": 1.838715119125156e-05, "loss": 0.2084, "step": 5210 }, { "epoch": 0.5400559643486371, "grad_norm": 0.47737252712249756, "learning_rate": 1.8380459625477605e-05, "loss": 0.1755, "step": 5211 }, { "epoch": 0.5401596020312985, "grad_norm": 0.6123090982437134, "learning_rate": 1.8373768242190947e-05, "loss": 0.2052, "step": 5212 }, { "epoch": 0.54026323971396, "grad_norm": 0.5149741172790527, "learning_rate": 1.8367077042145547e-05, "loss": 0.1853, "step": 5213 }, { "epoch": 0.5403668773966214, "grad_norm": 0.580742597579956, "learning_rate": 1.8360386026095377e-05, "loss": 0.2322, "step": 5214 }, { "epoch": 0.5404705150792828, "grad_norm": 0.5247928500175476, "learning_rate": 1.8353695194794355e-05, "loss": 0.1819, "step": 5215 }, { "epoch": 0.5405741527619442, "grad_norm": 0.5545816421508789, "learning_rate": 1.8347004548996395e-05, "loss": 0.1981, "step": 5216 }, { "epoch": 0.5406777904446056, "grad_norm": 0.5632359385490417, "learning_rate": 1.8340314089455403e-05, "loss": 0.1988, "step": 5217 }, { "epoch": 0.540781428127267, "grad_norm": 0.5110858082771301, "learning_rate": 1.8333623816925232e-05, "loss": 0.1861, "step": 5218 }, { "epoch": 0.5408850658099285, "grad_norm": 0.563378095626831, "learning_rate": 1.8326933732159748e-05, "loss": 0.2071, "step": 5219 }, { "epoch": 0.5409887034925899, "grad_norm": 0.6319681406021118, "learning_rate": 1.8320243835912755e-05, "loss": 0.2227, "step": 5220 }, { "epoch": 0.5410923411752513, "grad_norm": 0.7164770364761353, "learning_rate": 1.8313554128938084e-05, "loss": 0.2571, "step": 5221 }, { "epoch": 0.5411959788579127, "grad_norm": 0.5762877464294434, "learning_rate": 1.8306864611989518e-05, "loss": 0.2073, "step": 5222 }, { "epoch": 0.5412996165405741, "grad_norm": 0.570988118648529, "learning_rate": 1.8300175285820802e-05, "loss": 0.2105, "step": 5223 }, { "epoch": 0.5414032542232355, "grad_norm": 0.54582279920578, "learning_rate": 1.8293486151185703e-05, "loss": 0.2258, "step": 5224 }, { "epoch": 0.541506891905897, "grad_norm": 0.598730206489563, "learning_rate": 1.828679720883793e-05, "loss": 0.2315, "step": 5225 }, { "epoch": 0.5416105295885584, "grad_norm": 0.5679086446762085, "learning_rate": 1.828010845953118e-05, "loss": 0.1977, "step": 5226 }, { "epoch": 0.5417141672712198, "grad_norm": 0.5190294981002808, "learning_rate": 1.827341990401914e-05, "loss": 0.1964, "step": 5227 }, { "epoch": 0.5418178049538812, "grad_norm": 0.5730674266815186, "learning_rate": 1.826673154305546e-05, "loss": 0.2332, "step": 5228 }, { "epoch": 0.5419214426365426, "grad_norm": 0.5738920569419861, "learning_rate": 1.8260043377393777e-05, "loss": 0.1991, "step": 5229 }, { "epoch": 0.542025080319204, "grad_norm": 0.648719847202301, "learning_rate": 1.8253355407787693e-05, "loss": 0.2623, "step": 5230 }, { "epoch": 0.5421287180018655, "grad_norm": 0.5778685808181763, "learning_rate": 1.8246667634990815e-05, "loss": 0.2092, "step": 5231 }, { "epoch": 0.5422323556845269, "grad_norm": 0.47101718187332153, "learning_rate": 1.82399800597567e-05, "loss": 0.1834, "step": 5232 }, { "epoch": 0.5423359933671883, "grad_norm": 0.587877631187439, "learning_rate": 1.8233292682838892e-05, "loss": 0.226, "step": 5233 }, { "epoch": 0.5424396310498497, "grad_norm": 0.571196436882019, "learning_rate": 1.822660550499093e-05, "loss": 0.1996, "step": 5234 }, { "epoch": 0.5425432687325111, "grad_norm": 0.5070984363555908, "learning_rate": 1.821991852696629e-05, "loss": 0.1936, "step": 5235 }, { "epoch": 0.5426469064151725, "grad_norm": 0.49533402919769287, "learning_rate": 1.821323174951846e-05, "loss": 0.1849, "step": 5236 }, { "epoch": 0.542750544097834, "grad_norm": 0.44407081604003906, "learning_rate": 1.8206545173400915e-05, "loss": 0.1468, "step": 5237 }, { "epoch": 0.5428541817804954, "grad_norm": 0.5772473812103271, "learning_rate": 1.819985879936706e-05, "loss": 0.1975, "step": 5238 }, { "epoch": 0.5429578194631568, "grad_norm": 0.5305479168891907, "learning_rate": 1.8193172628170324e-05, "loss": 0.1822, "step": 5239 }, { "epoch": 0.5430614571458182, "grad_norm": 0.576831579208374, "learning_rate": 1.8186486660564083e-05, "loss": 0.2108, "step": 5240 }, { "epoch": 0.5431650948284796, "grad_norm": 0.5997173190116882, "learning_rate": 1.81798008973017e-05, "loss": 0.2474, "step": 5241 }, { "epoch": 0.543268732511141, "grad_norm": 0.5615498423576355, "learning_rate": 1.8173115339136537e-05, "loss": 0.2051, "step": 5242 }, { "epoch": 0.5433723701938025, "grad_norm": 0.6059595942497253, "learning_rate": 1.8166429986821887e-05, "loss": 0.2629, "step": 5243 }, { "epoch": 0.5434760078764639, "grad_norm": 0.5712944865226746, "learning_rate": 1.8159744841111064e-05, "loss": 0.2219, "step": 5244 }, { "epoch": 0.5435796455591253, "grad_norm": 0.5239284634590149, "learning_rate": 1.8153059902757322e-05, "loss": 0.1881, "step": 5245 }, { "epoch": 0.5436832832417867, "grad_norm": 0.5493417978286743, "learning_rate": 1.814637517251392e-05, "loss": 0.1873, "step": 5246 }, { "epoch": 0.5437869209244481, "grad_norm": 0.47680315375328064, "learning_rate": 1.8139690651134092e-05, "loss": 0.1967, "step": 5247 }, { "epoch": 0.5438905586071096, "grad_norm": 0.6329566836357117, "learning_rate": 1.813300633937102e-05, "loss": 0.201, "step": 5248 }, { "epoch": 0.543994196289771, "grad_norm": 0.5699524879455566, "learning_rate": 1.8126322237977894e-05, "loss": 0.2109, "step": 5249 }, { "epoch": 0.5440978339724324, "grad_norm": 0.6076616644859314, "learning_rate": 1.8119638347707868e-05, "loss": 0.2299, "step": 5250 }, { "epoch": 0.5442014716550938, "grad_norm": 0.5323348045349121, "learning_rate": 1.811295466931406e-05, "loss": 0.1677, "step": 5251 }, { "epoch": 0.5443051093377552, "grad_norm": 0.5809974670410156, "learning_rate": 1.81062712035496e-05, "loss": 0.2257, "step": 5252 }, { "epoch": 0.5444087470204166, "grad_norm": 0.5623114109039307, "learning_rate": 1.809958795116755e-05, "loss": 0.2039, "step": 5253 }, { "epoch": 0.5445123847030781, "grad_norm": 0.6232031583786011, "learning_rate": 1.809290491292098e-05, "loss": 0.2525, "step": 5254 }, { "epoch": 0.5446160223857395, "grad_norm": 0.5744406580924988, "learning_rate": 1.808622208956291e-05, "loss": 0.2111, "step": 5255 }, { "epoch": 0.5447196600684009, "grad_norm": 0.5415107011795044, "learning_rate": 1.8079539481846366e-05, "loss": 0.2234, "step": 5256 }, { "epoch": 0.5448232977510623, "grad_norm": 0.673896312713623, "learning_rate": 1.8072857090524332e-05, "loss": 0.2452, "step": 5257 }, { "epoch": 0.5449269354337237, "grad_norm": 0.5377049446105957, "learning_rate": 1.806617491634976e-05, "loss": 0.2092, "step": 5258 }, { "epoch": 0.5450305731163851, "grad_norm": 0.5719529986381531, "learning_rate": 1.8059492960075593e-05, "loss": 0.1924, "step": 5259 }, { "epoch": 0.5451342107990466, "grad_norm": 0.5697689652442932, "learning_rate": 1.8052811222454743e-05, "loss": 0.2074, "step": 5260 }, { "epoch": 0.545237848481708, "grad_norm": 0.5704839825630188, "learning_rate": 1.804612970424009e-05, "loss": 0.1868, "step": 5261 }, { "epoch": 0.5453414861643694, "grad_norm": 0.5120481848716736, "learning_rate": 1.803944840618452e-05, "loss": 0.1946, "step": 5262 }, { "epoch": 0.5454451238470308, "grad_norm": 0.561516523361206, "learning_rate": 1.8032767329040846e-05, "loss": 0.2016, "step": 5263 }, { "epoch": 0.5455487615296922, "grad_norm": 0.5484476089477539, "learning_rate": 1.802608647356189e-05, "loss": 0.1962, "step": 5264 }, { "epoch": 0.5456523992123536, "grad_norm": 0.5292932987213135, "learning_rate": 1.8019405840500446e-05, "loss": 0.1673, "step": 5265 }, { "epoch": 0.5457560368950151, "grad_norm": 0.539897620677948, "learning_rate": 1.8012725430609273e-05, "loss": 0.1865, "step": 5266 }, { "epoch": 0.5458596745776765, "grad_norm": 0.5219693183898926, "learning_rate": 1.800604524464111e-05, "loss": 0.2064, "step": 5267 }, { "epoch": 0.5459633122603379, "grad_norm": 0.5294369459152222, "learning_rate": 1.7999365283348665e-05, "loss": 0.198, "step": 5268 }, { "epoch": 0.5460669499429993, "grad_norm": 0.49916645884513855, "learning_rate": 1.7992685547484628e-05, "loss": 0.1724, "step": 5269 }, { "epoch": 0.5461705876256607, "grad_norm": 0.5305742621421814, "learning_rate": 1.7986006037801674e-05, "loss": 0.1846, "step": 5270 }, { "epoch": 0.5462742253083221, "grad_norm": 0.6302502751350403, "learning_rate": 1.797932675505242e-05, "loss": 0.2534, "step": 5271 }, { "epoch": 0.5463778629909836, "grad_norm": 0.496896892786026, "learning_rate": 1.797264769998949e-05, "loss": 0.2217, "step": 5272 }, { "epoch": 0.546481500673645, "grad_norm": 0.5490161776542664, "learning_rate": 1.796596887336546e-05, "loss": 0.1957, "step": 5273 }, { "epoch": 0.5465851383563064, "grad_norm": 0.517518937587738, "learning_rate": 1.79592902759329e-05, "loss": 0.197, "step": 5274 }, { "epoch": 0.5466887760389678, "grad_norm": 0.5890717506408691, "learning_rate": 1.7952611908444342e-05, "loss": 0.1834, "step": 5275 }, { "epoch": 0.5467924137216292, "grad_norm": 0.528845489025116, "learning_rate": 1.794593377165228e-05, "loss": 0.2173, "step": 5276 }, { "epoch": 0.5468960514042907, "grad_norm": 0.6080582141876221, "learning_rate": 1.793925586630922e-05, "loss": 0.2091, "step": 5277 }, { "epoch": 0.5469996890869521, "grad_norm": 0.5256290435791016, "learning_rate": 1.79325781931676e-05, "loss": 0.173, "step": 5278 }, { "epoch": 0.5471033267696135, "grad_norm": 0.5938299894332886, "learning_rate": 1.7925900752979853e-05, "loss": 0.2164, "step": 5279 }, { "epoch": 0.5472069644522749, "grad_norm": 0.6256065368652344, "learning_rate": 1.791922354649839e-05, "loss": 0.2082, "step": 5280 }, { "epoch": 0.5473106021349362, "grad_norm": 0.562027633190155, "learning_rate": 1.791254657447558e-05, "loss": 0.186, "step": 5281 }, { "epoch": 0.5474142398175976, "grad_norm": 0.6371752023696899, "learning_rate": 1.7905869837663783e-05, "loss": 0.2397, "step": 5282 }, { "epoch": 0.547517877500259, "grad_norm": 0.5378360152244568, "learning_rate": 1.7899193336815307e-05, "loss": 0.1959, "step": 5283 }, { "epoch": 0.5476215151829205, "grad_norm": 0.5654333233833313, "learning_rate": 1.7892517072682466e-05, "loss": 0.2047, "step": 5284 }, { "epoch": 0.5477251528655819, "grad_norm": 0.49719518423080444, "learning_rate": 1.7885841046017528e-05, "loss": 0.1789, "step": 5285 }, { "epoch": 0.5478287905482433, "grad_norm": 0.5758084058761597, "learning_rate": 1.7879165257572725e-05, "loss": 0.2246, "step": 5286 }, { "epoch": 0.5479324282309047, "grad_norm": 0.5711327791213989, "learning_rate": 1.7872489708100295e-05, "loss": 0.2078, "step": 5287 }, { "epoch": 0.5480360659135661, "grad_norm": 0.6247648000717163, "learning_rate": 1.786581439835241e-05, "loss": 0.2448, "step": 5288 }, { "epoch": 0.5481397035962275, "grad_norm": 0.6398104429244995, "learning_rate": 1.785913932908124e-05, "loss": 0.2248, "step": 5289 }, { "epoch": 0.548243341278889, "grad_norm": 0.602163553237915, "learning_rate": 1.785246450103893e-05, "loss": 0.2323, "step": 5290 }, { "epoch": 0.5483469789615504, "grad_norm": 0.6218096017837524, "learning_rate": 1.7845789914977578e-05, "loss": 0.2218, "step": 5291 }, { "epoch": 0.5484506166442118, "grad_norm": 0.5533711314201355, "learning_rate": 1.783911557164927e-05, "loss": 0.1929, "step": 5292 }, { "epoch": 0.5485542543268732, "grad_norm": 0.5798667669296265, "learning_rate": 1.7832441471806053e-05, "loss": 0.1912, "step": 5293 }, { "epoch": 0.5486578920095346, "grad_norm": 0.6059601306915283, "learning_rate": 1.782576761619997e-05, "loss": 0.2237, "step": 5294 }, { "epoch": 0.548761529692196, "grad_norm": 0.553433895111084, "learning_rate": 1.781909400558301e-05, "loss": 0.1725, "step": 5295 }, { "epoch": 0.5488651673748575, "grad_norm": 0.5430698990821838, "learning_rate": 1.7812420640707143e-05, "loss": 0.2093, "step": 5296 }, { "epoch": 0.5489688050575189, "grad_norm": 0.5995080471038818, "learning_rate": 1.780574752232433e-05, "loss": 0.221, "step": 5297 }, { "epoch": 0.5490724427401803, "grad_norm": 0.46209654211997986, "learning_rate": 1.779907465118646e-05, "loss": 0.1817, "step": 5298 }, { "epoch": 0.5491760804228417, "grad_norm": 0.5681026577949524, "learning_rate": 1.7792402028045442e-05, "loss": 0.2409, "step": 5299 }, { "epoch": 0.5492797181055031, "grad_norm": 0.5082581043243408, "learning_rate": 1.7785729653653137e-05, "loss": 0.1704, "step": 5300 }, { "epoch": 0.5493833557881646, "grad_norm": 0.559217631816864, "learning_rate": 1.7779057528761364e-05, "loss": 0.2277, "step": 5301 }, { "epoch": 0.549486993470826, "grad_norm": 0.5087469220161438, "learning_rate": 1.7772385654121947e-05, "loss": 0.1993, "step": 5302 }, { "epoch": 0.5495906311534874, "grad_norm": 0.5892218947410583, "learning_rate": 1.7765714030486644e-05, "loss": 0.2039, "step": 5303 }, { "epoch": 0.5496942688361488, "grad_norm": 0.5468608736991882, "learning_rate": 1.7759042658607208e-05, "loss": 0.1872, "step": 5304 }, { "epoch": 0.5497979065188102, "grad_norm": 0.4923431873321533, "learning_rate": 1.7752371539235367e-05, "loss": 0.1761, "step": 5305 }, { "epoch": 0.5499015442014716, "grad_norm": 0.5435743927955627, "learning_rate": 1.7745700673122804e-05, "loss": 0.1834, "step": 5306 }, { "epoch": 0.550005181884133, "grad_norm": 0.5914965271949768, "learning_rate": 1.773903006102119e-05, "loss": 0.2169, "step": 5307 }, { "epoch": 0.5501088195667945, "grad_norm": 0.6041461825370789, "learning_rate": 1.7732359703682146e-05, "loss": 0.2351, "step": 5308 }, { "epoch": 0.5502124572494559, "grad_norm": 0.5261842012405396, "learning_rate": 1.772568960185729e-05, "loss": 0.186, "step": 5309 }, { "epoch": 0.5503160949321173, "grad_norm": 0.5709972977638245, "learning_rate": 1.7719019756298198e-05, "loss": 0.1804, "step": 5310 }, { "epoch": 0.5504197326147787, "grad_norm": 0.5214843153953552, "learning_rate": 1.7712350167756407e-05, "loss": 0.2023, "step": 5311 }, { "epoch": 0.5505233702974401, "grad_norm": 0.6243929266929626, "learning_rate": 1.7705680836983448e-05, "loss": 0.2634, "step": 5312 }, { "epoch": 0.5506270079801016, "grad_norm": 0.5082367062568665, "learning_rate": 1.7699011764730806e-05, "loss": 0.1727, "step": 5313 }, { "epoch": 0.550730645662763, "grad_norm": 0.604770302772522, "learning_rate": 1.769234295174993e-05, "loss": 0.2468, "step": 5314 }, { "epoch": 0.5508342833454244, "grad_norm": 0.5542752146720886, "learning_rate": 1.7685674398792278e-05, "loss": 0.22, "step": 5315 }, { "epoch": 0.5509379210280858, "grad_norm": 0.6486345529556274, "learning_rate": 1.7679006106609228e-05, "loss": 0.2442, "step": 5316 }, { "epoch": 0.5510415587107472, "grad_norm": 0.6120084524154663, "learning_rate": 1.767233807595217e-05, "loss": 0.2233, "step": 5317 }, { "epoch": 0.5511451963934086, "grad_norm": 0.5620784163475037, "learning_rate": 1.766567030757243e-05, "loss": 0.2261, "step": 5318 }, { "epoch": 0.5512488340760701, "grad_norm": 0.6042924523353577, "learning_rate": 1.7659002802221334e-05, "loss": 0.2238, "step": 5319 }, { "epoch": 0.5513524717587315, "grad_norm": 0.59671950340271, "learning_rate": 1.7652335560650165e-05, "loss": 0.221, "step": 5320 }, { "epoch": 0.5514561094413929, "grad_norm": 0.6434497237205505, "learning_rate": 1.764566858361017e-05, "loss": 0.2577, "step": 5321 }, { "epoch": 0.5515597471240543, "grad_norm": 0.5868349671363831, "learning_rate": 1.763900187185259e-05, "loss": 0.2109, "step": 5322 }, { "epoch": 0.5516633848067157, "grad_norm": 0.6528464555740356, "learning_rate": 1.76323354261286e-05, "loss": 0.2481, "step": 5323 }, { "epoch": 0.5517670224893771, "grad_norm": 0.5790517926216125, "learning_rate": 1.7625669247189372e-05, "loss": 0.2097, "step": 5324 }, { "epoch": 0.5518706601720386, "grad_norm": 0.5709297060966492, "learning_rate": 1.7619003335786053e-05, "loss": 0.2235, "step": 5325 }, { "epoch": 0.5519742978547, "grad_norm": 0.5696895122528076, "learning_rate": 1.7612337692669726e-05, "loss": 0.2164, "step": 5326 }, { "epoch": 0.5520779355373614, "grad_norm": 0.5986068248748779, "learning_rate": 1.7605672318591484e-05, "loss": 0.2088, "step": 5327 }, { "epoch": 0.5521815732200228, "grad_norm": 0.6396403908729553, "learning_rate": 1.7599007214302356e-05, "loss": 0.2216, "step": 5328 }, { "epoch": 0.5522852109026842, "grad_norm": 0.49083516001701355, "learning_rate": 1.7592342380553366e-05, "loss": 0.1829, "step": 5329 }, { "epoch": 0.5523888485853456, "grad_norm": 0.5620934367179871, "learning_rate": 1.7585677818095493e-05, "loss": 0.2067, "step": 5330 }, { "epoch": 0.5524924862680071, "grad_norm": 0.5595473051071167, "learning_rate": 1.7579013527679694e-05, "loss": 0.195, "step": 5331 }, { "epoch": 0.5525961239506685, "grad_norm": 0.48822200298309326, "learning_rate": 1.7572349510056886e-05, "loss": 0.1652, "step": 5332 }, { "epoch": 0.5526997616333299, "grad_norm": 0.48362472653388977, "learning_rate": 1.7565685765977955e-05, "loss": 0.1778, "step": 5333 }, { "epoch": 0.5528033993159913, "grad_norm": 0.5342475771903992, "learning_rate": 1.7559022296193774e-05, "loss": 0.2133, "step": 5334 }, { "epoch": 0.5529070369986527, "grad_norm": 0.5649202466011047, "learning_rate": 1.7552359101455166e-05, "loss": 0.1966, "step": 5335 }, { "epoch": 0.5530106746813142, "grad_norm": 0.5567264556884766, "learning_rate": 1.7545696182512923e-05, "loss": 0.1973, "step": 5336 }, { "epoch": 0.5531143123639756, "grad_norm": 0.5125313401222229, "learning_rate": 1.753903354011783e-05, "loss": 0.1939, "step": 5337 }, { "epoch": 0.553217950046637, "grad_norm": 0.5587508082389832, "learning_rate": 1.7532371175020604e-05, "loss": 0.2031, "step": 5338 }, { "epoch": 0.5533215877292984, "grad_norm": 0.4835485816001892, "learning_rate": 1.7525709087971953e-05, "loss": 0.1942, "step": 5339 }, { "epoch": 0.5534252254119598, "grad_norm": 0.4491787552833557, "learning_rate": 1.7519047279722566e-05, "loss": 0.156, "step": 5340 }, { "epoch": 0.5535288630946212, "grad_norm": 0.555193305015564, "learning_rate": 1.751238575102307e-05, "loss": 0.1826, "step": 5341 }, { "epoch": 0.5536325007772827, "grad_norm": 0.48197105526924133, "learning_rate": 1.750572450262409e-05, "loss": 0.1616, "step": 5342 }, { "epoch": 0.5537361384599441, "grad_norm": 0.5574571490287781, "learning_rate": 1.7499063535276178e-05, "loss": 0.244, "step": 5343 }, { "epoch": 0.5538397761426055, "grad_norm": 0.5440523624420166, "learning_rate": 1.749240284972991e-05, "loss": 0.203, "step": 5344 }, { "epoch": 0.5539434138252669, "grad_norm": 0.6209940314292908, "learning_rate": 1.748574244673579e-05, "loss": 0.2292, "step": 5345 }, { "epoch": 0.5540470515079283, "grad_norm": 0.6357861161231995, "learning_rate": 1.7479082327044297e-05, "loss": 0.2167, "step": 5346 }, { "epoch": 0.5541506891905897, "grad_norm": 0.6537289023399353, "learning_rate": 1.74724224914059e-05, "loss": 0.2441, "step": 5347 }, { "epoch": 0.5542543268732512, "grad_norm": 0.5783520936965942, "learning_rate": 1.7465762940571e-05, "loss": 0.2096, "step": 5348 }, { "epoch": 0.5543579645559126, "grad_norm": 0.548708975315094, "learning_rate": 1.745910367528999e-05, "loss": 0.1975, "step": 5349 }, { "epoch": 0.554461602238574, "grad_norm": 0.5915510654449463, "learning_rate": 1.7452444696313232e-05, "loss": 0.2071, "step": 5350 }, { "epoch": 0.5545652399212354, "grad_norm": 0.3948794901371002, "learning_rate": 1.7445786004391046e-05, "loss": 0.1414, "step": 5351 }, { "epoch": 0.5546688776038968, "grad_norm": 0.5463305711746216, "learning_rate": 1.743912760027372e-05, "loss": 0.2018, "step": 5352 }, { "epoch": 0.5547725152865582, "grad_norm": 0.5292659401893616, "learning_rate": 1.7432469484711516e-05, "loss": 0.1838, "step": 5353 }, { "epoch": 0.5548761529692197, "grad_norm": 0.6309893131256104, "learning_rate": 1.7425811658454657e-05, "loss": 0.2153, "step": 5354 }, { "epoch": 0.5549797906518811, "grad_norm": 0.49494248628616333, "learning_rate": 1.7419154122253344e-05, "loss": 0.1536, "step": 5355 }, { "epoch": 0.5550834283345425, "grad_norm": 0.5081691145896912, "learning_rate": 1.741249687685772e-05, "loss": 0.2007, "step": 5356 }, { "epoch": 0.5551870660172038, "grad_norm": 0.5499346852302551, "learning_rate": 1.740583992301794e-05, "loss": 0.2021, "step": 5357 }, { "epoch": 0.5552907036998652, "grad_norm": 0.5672243237495422, "learning_rate": 1.7399183261484083e-05, "loss": 0.2332, "step": 5358 }, { "epoch": 0.5553943413825266, "grad_norm": 0.5117417573928833, "learning_rate": 1.7392526893006204e-05, "loss": 0.1965, "step": 5359 }, { "epoch": 0.555497979065188, "grad_norm": 0.5445107221603394, "learning_rate": 1.7385870818334353e-05, "loss": 0.1574, "step": 5360 }, { "epoch": 0.5556016167478495, "grad_norm": 0.5121918320655823, "learning_rate": 1.7379215038218505e-05, "loss": 0.1841, "step": 5361 }, { "epoch": 0.5557052544305109, "grad_norm": 0.5723550915718079, "learning_rate": 1.737255955340864e-05, "loss": 0.2184, "step": 5362 }, { "epoch": 0.5558088921131723, "grad_norm": 0.5024679899215698, "learning_rate": 1.7365904364654677e-05, "loss": 0.1718, "step": 5363 }, { "epoch": 0.5559125297958337, "grad_norm": 0.5417926907539368, "learning_rate": 1.7359249472706508e-05, "loss": 0.1767, "step": 5364 }, { "epoch": 0.5560161674784951, "grad_norm": 0.5689263343811035, "learning_rate": 1.7352594878314014e-05, "loss": 0.1896, "step": 5365 }, { "epoch": 0.5561198051611566, "grad_norm": 0.580989420413971, "learning_rate": 1.7345940582227004e-05, "loss": 0.2157, "step": 5366 }, { "epoch": 0.556223442843818, "grad_norm": 0.5768171548843384, "learning_rate": 1.7339286585195294e-05, "loss": 0.1891, "step": 5367 }, { "epoch": 0.5563270805264794, "grad_norm": 0.5534247159957886, "learning_rate": 1.7332632887968625e-05, "loss": 0.2009, "step": 5368 }, { "epoch": 0.5564307182091408, "grad_norm": 0.5858224630355835, "learning_rate": 1.732597949129674e-05, "loss": 0.2313, "step": 5369 }, { "epoch": 0.5565343558918022, "grad_norm": 0.6130474209785461, "learning_rate": 1.7319326395929335e-05, "loss": 0.2484, "step": 5370 }, { "epoch": 0.5566379935744636, "grad_norm": 0.5742282867431641, "learning_rate": 1.7312673602616055e-05, "loss": 0.2073, "step": 5371 }, { "epoch": 0.5567416312571251, "grad_norm": 0.5607818961143494, "learning_rate": 1.7306021112106543e-05, "loss": 0.2023, "step": 5372 }, { "epoch": 0.5568452689397865, "grad_norm": 0.5325602889060974, "learning_rate": 1.729936892515038e-05, "loss": 0.1775, "step": 5373 }, { "epoch": 0.5569489066224479, "grad_norm": 0.6108415722846985, "learning_rate": 1.7292717042497125e-05, "loss": 0.2389, "step": 5374 }, { "epoch": 0.5570525443051093, "grad_norm": 0.4758548438549042, "learning_rate": 1.7286065464896315e-05, "loss": 0.1564, "step": 5375 }, { "epoch": 0.5571561819877707, "grad_norm": 0.5790165662765503, "learning_rate": 1.7279414193097424e-05, "loss": 0.218, "step": 5376 }, { "epoch": 0.5572598196704321, "grad_norm": 0.603882908821106, "learning_rate": 1.7272763227849915e-05, "loss": 0.2127, "step": 5377 }, { "epoch": 0.5573634573530936, "grad_norm": 0.5412938594818115, "learning_rate": 1.7266112569903198e-05, "loss": 0.1934, "step": 5378 }, { "epoch": 0.557467095035755, "grad_norm": 0.5319254398345947, "learning_rate": 1.7259462220006673e-05, "loss": 0.2102, "step": 5379 }, { "epoch": 0.5575707327184164, "grad_norm": 0.534984827041626, "learning_rate": 1.7252812178909687e-05, "loss": 0.2199, "step": 5380 }, { "epoch": 0.5576743704010778, "grad_norm": 0.5318877100944519, "learning_rate": 1.7246162447361546e-05, "loss": 0.1978, "step": 5381 }, { "epoch": 0.5577780080837392, "grad_norm": 0.6203436255455017, "learning_rate": 1.723951302611155e-05, "loss": 0.2205, "step": 5382 }, { "epoch": 0.5578816457664006, "grad_norm": 0.5534346699714661, "learning_rate": 1.723286391590893e-05, "loss": 0.2086, "step": 5383 }, { "epoch": 0.5579852834490621, "grad_norm": 0.5063457489013672, "learning_rate": 1.7226215117502896e-05, "loss": 0.1778, "step": 5384 }, { "epoch": 0.5580889211317235, "grad_norm": 0.6248680949211121, "learning_rate": 1.721956663164264e-05, "loss": 0.2315, "step": 5385 }, { "epoch": 0.5581925588143849, "grad_norm": 0.5708314776420593, "learning_rate": 1.721291845907729e-05, "loss": 0.2203, "step": 5386 }, { "epoch": 0.5582961964970463, "grad_norm": 0.6492812633514404, "learning_rate": 1.720627060055596e-05, "loss": 0.2406, "step": 5387 }, { "epoch": 0.5583998341797077, "grad_norm": 0.507900059223175, "learning_rate": 1.7199623056827708e-05, "loss": 0.1866, "step": 5388 }, { "epoch": 0.5585034718623691, "grad_norm": 0.6005551815032959, "learning_rate": 1.719297582864158e-05, "loss": 0.2196, "step": 5389 }, { "epoch": 0.5586071095450306, "grad_norm": 0.649582028388977, "learning_rate": 1.7186328916746576e-05, "loss": 0.2067, "step": 5390 }, { "epoch": 0.558710747227692, "grad_norm": 0.5855095982551575, "learning_rate": 1.7179682321891648e-05, "loss": 0.2202, "step": 5391 }, { "epoch": 0.5588143849103534, "grad_norm": 0.5517435073852539, "learning_rate": 1.717303604482574e-05, "loss": 0.194, "step": 5392 }, { "epoch": 0.5589180225930148, "grad_norm": 0.5487895607948303, "learning_rate": 1.7166390086297727e-05, "loss": 0.1985, "step": 5393 }, { "epoch": 0.5590216602756762, "grad_norm": 0.5209961533546448, "learning_rate": 1.715974444705648e-05, "loss": 0.1841, "step": 5394 }, { "epoch": 0.5591252979583377, "grad_norm": 0.5808537602424622, "learning_rate": 1.7153099127850815e-05, "loss": 0.229, "step": 5395 }, { "epoch": 0.5592289356409991, "grad_norm": 0.5422407984733582, "learning_rate": 1.7146454129429508e-05, "loss": 0.1929, "step": 5396 }, { "epoch": 0.5593325733236605, "grad_norm": 0.672896146774292, "learning_rate": 1.7139809452541324e-05, "loss": 0.2492, "step": 5397 }, { "epoch": 0.5594362110063219, "grad_norm": 0.5069525837898254, "learning_rate": 1.7133165097934957e-05, "loss": 0.1646, "step": 5398 }, { "epoch": 0.5595398486889833, "grad_norm": 0.7111231088638306, "learning_rate": 1.7126521066359085e-05, "loss": 0.1945, "step": 5399 }, { "epoch": 0.5596434863716447, "grad_norm": 0.5727432370185852, "learning_rate": 1.711987735856236e-05, "loss": 0.2158, "step": 5400 }, { "epoch": 0.5597471240543062, "grad_norm": 0.5655161738395691, "learning_rate": 1.7113233975293377e-05, "loss": 0.2227, "step": 5401 }, { "epoch": 0.5598507617369676, "grad_norm": 0.4536275267601013, "learning_rate": 1.7106590917300706e-05, "loss": 0.1658, "step": 5402 }, { "epoch": 0.559954399419629, "grad_norm": 0.5846953392028809, "learning_rate": 1.7099948185332862e-05, "loss": 0.1957, "step": 5403 }, { "epoch": 0.5600580371022904, "grad_norm": 0.49920523166656494, "learning_rate": 1.7093305780138352e-05, "loss": 0.1929, "step": 5404 }, { "epoch": 0.5601616747849518, "grad_norm": 0.5249096155166626, "learning_rate": 1.7086663702465635e-05, "loss": 0.1501, "step": 5405 }, { "epoch": 0.5602653124676132, "grad_norm": 0.5586709380149841, "learning_rate": 1.7080021953063112e-05, "loss": 0.1983, "step": 5406 }, { "epoch": 0.5603689501502747, "grad_norm": 0.569900393486023, "learning_rate": 1.7073380532679187e-05, "loss": 0.2374, "step": 5407 }, { "epoch": 0.5604725878329361, "grad_norm": 0.6065160036087036, "learning_rate": 1.706673944206219e-05, "loss": 0.2185, "step": 5408 }, { "epoch": 0.5605762255155975, "grad_norm": 0.5849648118019104, "learning_rate": 1.7060098681960426e-05, "loss": 0.1944, "step": 5409 }, { "epoch": 0.5606798631982589, "grad_norm": 0.4929162263870239, "learning_rate": 1.7053458253122183e-05, "loss": 0.1848, "step": 5410 }, { "epoch": 0.5607835008809203, "grad_norm": 0.503531277179718, "learning_rate": 1.7046818156295678e-05, "loss": 0.1887, "step": 5411 }, { "epoch": 0.5608871385635817, "grad_norm": 0.6490597128868103, "learning_rate": 1.7040178392229116e-05, "loss": 0.2554, "step": 5412 }, { "epoch": 0.5609907762462432, "grad_norm": 0.5584279894828796, "learning_rate": 1.7033538961670647e-05, "loss": 0.2067, "step": 5413 }, { "epoch": 0.5610944139289046, "grad_norm": 0.478782057762146, "learning_rate": 1.7026899865368397e-05, "loss": 0.1619, "step": 5414 }, { "epoch": 0.561198051611566, "grad_norm": 0.6137062907218933, "learning_rate": 1.7020261104070453e-05, "loss": 0.2265, "step": 5415 }, { "epoch": 0.5613016892942274, "grad_norm": 0.49538955092430115, "learning_rate": 1.701362267852485e-05, "loss": 0.1764, "step": 5416 }, { "epoch": 0.5614053269768888, "grad_norm": 0.5120887756347656, "learning_rate": 1.7006984589479604e-05, "loss": 0.2008, "step": 5417 }, { "epoch": 0.5615089646595502, "grad_norm": 0.6444122791290283, "learning_rate": 1.7000346837682682e-05, "loss": 0.2251, "step": 5418 }, { "epoch": 0.5616126023422117, "grad_norm": 0.5767812728881836, "learning_rate": 1.699370942388201e-05, "loss": 0.2123, "step": 5419 }, { "epoch": 0.5617162400248731, "grad_norm": 0.4728251099586487, "learning_rate": 1.6987072348825493e-05, "loss": 0.1823, "step": 5420 }, { "epoch": 0.5618198777075345, "grad_norm": 0.7038987874984741, "learning_rate": 1.6980435613260978e-05, "loss": 0.2367, "step": 5421 }, { "epoch": 0.5619235153901959, "grad_norm": 0.6106839776039124, "learning_rate": 1.697379921793629e-05, "loss": 0.2225, "step": 5422 }, { "epoch": 0.5620271530728573, "grad_norm": 0.5902169346809387, "learning_rate": 1.696716316359919e-05, "loss": 0.2348, "step": 5423 }, { "epoch": 0.5621307907555187, "grad_norm": 0.5676903128623962, "learning_rate": 1.6960527450997436e-05, "loss": 0.1769, "step": 5424 }, { "epoch": 0.5622344284381802, "grad_norm": 0.5985881090164185, "learning_rate": 1.695389208087873e-05, "loss": 0.2165, "step": 5425 }, { "epoch": 0.5623380661208416, "grad_norm": 0.5767608880996704, "learning_rate": 1.6947257053990722e-05, "loss": 0.1941, "step": 5426 }, { "epoch": 0.562441703803503, "grad_norm": 0.49438780546188354, "learning_rate": 1.6940622371081047e-05, "loss": 0.2138, "step": 5427 }, { "epoch": 0.5625453414861644, "grad_norm": 0.5235041975975037, "learning_rate": 1.693398803289728e-05, "loss": 0.1915, "step": 5428 }, { "epoch": 0.5626489791688258, "grad_norm": 0.43641653656959534, "learning_rate": 1.692735404018698e-05, "loss": 0.1612, "step": 5429 }, { "epoch": 0.5627526168514873, "grad_norm": 0.4511115550994873, "learning_rate": 1.6920720393697655e-05, "loss": 0.163, "step": 5430 }, { "epoch": 0.5628562545341487, "grad_norm": 0.5550640821456909, "learning_rate": 1.6914087094176758e-05, "loss": 0.204, "step": 5431 }, { "epoch": 0.5629598922168101, "grad_norm": 0.5841968655586243, "learning_rate": 1.6907454142371742e-05, "loss": 0.2113, "step": 5432 }, { "epoch": 0.5630635298994714, "grad_norm": 0.6049975156784058, "learning_rate": 1.6900821539029982e-05, "loss": 0.2142, "step": 5433 }, { "epoch": 0.5631671675821328, "grad_norm": 0.5049476623535156, "learning_rate": 1.6894189284898825e-05, "loss": 0.1679, "step": 5434 }, { "epoch": 0.5632708052647942, "grad_norm": 0.5981543064117432, "learning_rate": 1.6887557380725602e-05, "loss": 0.2469, "step": 5435 }, { "epoch": 0.5633744429474556, "grad_norm": 0.663126528263092, "learning_rate": 1.6880925827257572e-05, "loss": 0.2443, "step": 5436 }, { "epoch": 0.5634780806301171, "grad_norm": 0.5187920928001404, "learning_rate": 1.6874294625241973e-05, "loss": 0.195, "step": 5437 }, { "epoch": 0.5635817183127785, "grad_norm": 0.5009486675262451, "learning_rate": 1.686766377542599e-05, "loss": 0.1967, "step": 5438 }, { "epoch": 0.5636853559954399, "grad_norm": 0.5059811472892761, "learning_rate": 1.6861033278556787e-05, "loss": 0.2093, "step": 5439 }, { "epoch": 0.5637889936781013, "grad_norm": 0.6117614507675171, "learning_rate": 1.685440313538148e-05, "loss": 0.2318, "step": 5440 }, { "epoch": 0.5638926313607627, "grad_norm": 0.5326939225196838, "learning_rate": 1.6847773346647123e-05, "loss": 0.1856, "step": 5441 }, { "epoch": 0.5639962690434241, "grad_norm": 0.5146560072898865, "learning_rate": 1.684114391310078e-05, "loss": 0.2088, "step": 5442 }, { "epoch": 0.5640999067260856, "grad_norm": 0.45405811071395874, "learning_rate": 1.6834514835489428e-05, "loss": 0.1723, "step": 5443 }, { "epoch": 0.564203544408747, "grad_norm": 0.5213394165039062, "learning_rate": 1.6827886114560014e-05, "loss": 0.1969, "step": 5444 }, { "epoch": 0.5643071820914084, "grad_norm": 0.5760708451271057, "learning_rate": 1.6821257751059473e-05, "loss": 0.2074, "step": 5445 }, { "epoch": 0.5644108197740698, "grad_norm": 0.5615007281303406, "learning_rate": 1.681462974573466e-05, "loss": 0.187, "step": 5446 }, { "epoch": 0.5645144574567312, "grad_norm": 0.5178715586662292, "learning_rate": 1.6808002099332422e-05, "loss": 0.1816, "step": 5447 }, { "epoch": 0.5646180951393927, "grad_norm": 0.5387221574783325, "learning_rate": 1.6801374812599537e-05, "loss": 0.1927, "step": 5448 }, { "epoch": 0.5647217328220541, "grad_norm": 0.5163358449935913, "learning_rate": 1.679474788628277e-05, "loss": 0.1766, "step": 5449 }, { "epoch": 0.5648253705047155, "grad_norm": 0.5913562178611755, "learning_rate": 1.6788121321128832e-05, "loss": 0.2392, "step": 5450 }, { "epoch": 0.5649290081873769, "grad_norm": 0.49062827229499817, "learning_rate": 1.6781495117884382e-05, "loss": 0.1978, "step": 5451 }, { "epoch": 0.5650326458700383, "grad_norm": 0.6228064894676208, "learning_rate": 1.6774869277296065e-05, "loss": 0.247, "step": 5452 }, { "epoch": 0.5651362835526997, "grad_norm": 0.645623505115509, "learning_rate": 1.6768243800110464e-05, "loss": 0.2396, "step": 5453 }, { "epoch": 0.5652399212353612, "grad_norm": 0.5212389230728149, "learning_rate": 1.676161868707412e-05, "loss": 0.2024, "step": 5454 }, { "epoch": 0.5653435589180226, "grad_norm": 0.6496005654335022, "learning_rate": 1.6754993938933564e-05, "loss": 0.2437, "step": 5455 }, { "epoch": 0.565447196600684, "grad_norm": 0.5416711568832397, "learning_rate": 1.6748369556435234e-05, "loss": 0.1886, "step": 5456 }, { "epoch": 0.5655508342833454, "grad_norm": 0.5800312161445618, "learning_rate": 1.6741745540325572e-05, "loss": 0.2158, "step": 5457 }, { "epoch": 0.5656544719660068, "grad_norm": 0.5040571093559265, "learning_rate": 1.6735121891350957e-05, "loss": 0.1883, "step": 5458 }, { "epoch": 0.5657581096486682, "grad_norm": 0.5947726368904114, "learning_rate": 1.672849861025773e-05, "loss": 0.2403, "step": 5459 }, { "epoch": 0.5658617473313297, "grad_norm": 0.5437408685684204, "learning_rate": 1.6721875697792198e-05, "loss": 0.211, "step": 5460 }, { "epoch": 0.5659653850139911, "grad_norm": 0.6372669339179993, "learning_rate": 1.6715253154700614e-05, "loss": 0.2235, "step": 5461 }, { "epoch": 0.5660690226966525, "grad_norm": 0.5936444401741028, "learning_rate": 1.6708630981729194e-05, "loss": 0.1839, "step": 5462 }, { "epoch": 0.5661726603793139, "grad_norm": 0.5205757021903992, "learning_rate": 1.6702009179624123e-05, "loss": 0.1911, "step": 5463 }, { "epoch": 0.5662762980619753, "grad_norm": 0.5825114846229553, "learning_rate": 1.669538774913153e-05, "loss": 0.2063, "step": 5464 }, { "epoch": 0.5663799357446367, "grad_norm": 0.580242931842804, "learning_rate": 1.6688766690997514e-05, "loss": 0.2016, "step": 5465 }, { "epoch": 0.5664835734272982, "grad_norm": 0.508276104927063, "learning_rate": 1.668214600596811e-05, "loss": 0.1754, "step": 5466 }, { "epoch": 0.5665872111099596, "grad_norm": 0.566452145576477, "learning_rate": 1.667552569478934e-05, "loss": 0.2086, "step": 5467 }, { "epoch": 0.566690848792621, "grad_norm": 0.5992406606674194, "learning_rate": 1.6668905758207173e-05, "loss": 0.2269, "step": 5468 }, { "epoch": 0.5667944864752824, "grad_norm": 0.5830585360527039, "learning_rate": 1.6662286196967517e-05, "loss": 0.207, "step": 5469 }, { "epoch": 0.5668981241579438, "grad_norm": 0.5889588594436646, "learning_rate": 1.665566701181627e-05, "loss": 0.2168, "step": 5470 }, { "epoch": 0.5670017618406052, "grad_norm": 0.5479435920715332, "learning_rate": 1.6649048203499263e-05, "loss": 0.2006, "step": 5471 }, { "epoch": 0.5671053995232667, "grad_norm": 0.5586081147193909, "learning_rate": 1.664242977276229e-05, "loss": 0.2087, "step": 5472 }, { "epoch": 0.5672090372059281, "grad_norm": 0.5428041219711304, "learning_rate": 1.6635811720351124e-05, "loss": 0.2113, "step": 5473 }, { "epoch": 0.5673126748885895, "grad_norm": 0.523199737071991, "learning_rate": 1.6629194047011454e-05, "loss": 0.184, "step": 5474 }, { "epoch": 0.5674163125712509, "grad_norm": 0.5387806296348572, "learning_rate": 1.6622576753488963e-05, "loss": 0.2036, "step": 5475 }, { "epoch": 0.5675199502539123, "grad_norm": 0.5509522557258606, "learning_rate": 1.661595984052927e-05, "loss": 0.2163, "step": 5476 }, { "epoch": 0.5676235879365737, "grad_norm": 0.4755285382270813, "learning_rate": 1.660934330887796e-05, "loss": 0.1716, "step": 5477 }, { "epoch": 0.5677272256192352, "grad_norm": 0.5920076370239258, "learning_rate": 1.660272715928058e-05, "loss": 0.2038, "step": 5478 }, { "epoch": 0.5678308633018966, "grad_norm": 0.5493197441101074, "learning_rate": 1.6596111392482618e-05, "loss": 0.2052, "step": 5479 }, { "epoch": 0.567934500984558, "grad_norm": 0.6421695947647095, "learning_rate": 1.658949600922954e-05, "loss": 0.2539, "step": 5480 }, { "epoch": 0.5680381386672194, "grad_norm": 0.6136369109153748, "learning_rate": 1.658288101026674e-05, "loss": 0.2309, "step": 5481 }, { "epoch": 0.5681417763498808, "grad_norm": 0.563614547252655, "learning_rate": 1.6576266396339597e-05, "loss": 0.1861, "step": 5482 }, { "epoch": 0.5682454140325423, "grad_norm": 0.5318019390106201, "learning_rate": 1.6569652168193442e-05, "loss": 0.1826, "step": 5483 }, { "epoch": 0.5683490517152037, "grad_norm": 0.5460665225982666, "learning_rate": 1.6563038326573544e-05, "loss": 0.1832, "step": 5484 }, { "epoch": 0.5684526893978651, "grad_norm": 0.6247243285179138, "learning_rate": 1.655642487222515e-05, "loss": 0.2252, "step": 5485 }, { "epoch": 0.5685563270805265, "grad_norm": 0.671765148639679, "learning_rate": 1.6549811805893437e-05, "loss": 0.235, "step": 5486 }, { "epoch": 0.5686599647631879, "grad_norm": 0.5407508015632629, "learning_rate": 1.654319912832357e-05, "loss": 0.2001, "step": 5487 }, { "epoch": 0.5687636024458493, "grad_norm": 0.5194950103759766, "learning_rate": 1.6536586840260657e-05, "loss": 0.1991, "step": 5488 }, { "epoch": 0.5688672401285108, "grad_norm": 0.581840991973877, "learning_rate": 1.652997494244975e-05, "loss": 0.1809, "step": 5489 }, { "epoch": 0.5689708778111722, "grad_norm": 0.5414415597915649, "learning_rate": 1.652336343563588e-05, "loss": 0.2264, "step": 5490 }, { "epoch": 0.5690745154938336, "grad_norm": 0.6323450207710266, "learning_rate": 1.6516752320564003e-05, "loss": 0.2452, "step": 5491 }, { "epoch": 0.569178153176495, "grad_norm": 0.6471281051635742, "learning_rate": 1.6510141597979062e-05, "loss": 0.2658, "step": 5492 }, { "epoch": 0.5692817908591564, "grad_norm": 0.5367169976234436, "learning_rate": 1.650353126862595e-05, "loss": 0.2028, "step": 5493 }, { "epoch": 0.5693854285418178, "grad_norm": 0.47116124629974365, "learning_rate": 1.6496921333249486e-05, "loss": 0.158, "step": 5494 }, { "epoch": 0.5694890662244793, "grad_norm": 0.6376896500587463, "learning_rate": 1.6490311792594486e-05, "loss": 0.2416, "step": 5495 }, { "epoch": 0.5695927039071407, "grad_norm": 0.5647479295730591, "learning_rate": 1.64837026474057e-05, "loss": 0.1938, "step": 5496 }, { "epoch": 0.5696963415898021, "grad_norm": 0.5366935729980469, "learning_rate": 1.6477093898427826e-05, "loss": 0.2007, "step": 5497 }, { "epoch": 0.5697999792724635, "grad_norm": 0.5869206786155701, "learning_rate": 1.6470485546405545e-05, "loss": 0.1958, "step": 5498 }, { "epoch": 0.5699036169551249, "grad_norm": 0.6000187993049622, "learning_rate": 1.646387759208346e-05, "loss": 0.2337, "step": 5499 }, { "epoch": 0.5700072546377863, "grad_norm": 0.6028838753700256, "learning_rate": 1.6457270036206153e-05, "loss": 0.2222, "step": 5500 }, { "epoch": 0.5701108923204478, "grad_norm": 0.5079191327095032, "learning_rate": 1.6450662879518146e-05, "loss": 0.1862, "step": 5501 }, { "epoch": 0.5702145300031092, "grad_norm": 0.5666100978851318, "learning_rate": 1.644405612276393e-05, "loss": 0.2222, "step": 5502 }, { "epoch": 0.5703181676857706, "grad_norm": 0.5046568512916565, "learning_rate": 1.6437449766687952e-05, "loss": 0.1666, "step": 5503 }, { "epoch": 0.570421805368432, "grad_norm": 0.5350242853164673, "learning_rate": 1.6430843812034582e-05, "loss": 0.1935, "step": 5504 }, { "epoch": 0.5705254430510934, "grad_norm": 0.5835912823677063, "learning_rate": 1.6424238259548197e-05, "loss": 0.2177, "step": 5505 }, { "epoch": 0.5706290807337548, "grad_norm": 0.6207568049430847, "learning_rate": 1.6417633109973076e-05, "loss": 0.2086, "step": 5506 }, { "epoch": 0.5707327184164163, "grad_norm": 0.4970071017742157, "learning_rate": 1.6411028364053486e-05, "loss": 0.1772, "step": 5507 }, { "epoch": 0.5708363560990777, "grad_norm": 0.484801709651947, "learning_rate": 1.640442402253365e-05, "loss": 0.1792, "step": 5508 }, { "epoch": 0.570939993781739, "grad_norm": 0.497266948223114, "learning_rate": 1.639782008615772e-05, "loss": 0.1754, "step": 5509 }, { "epoch": 0.5710436314644004, "grad_norm": 0.696564257144928, "learning_rate": 1.6391216555669826e-05, "loss": 0.2451, "step": 5510 }, { "epoch": 0.5711472691470618, "grad_norm": 0.5477309823036194, "learning_rate": 1.6384613431814033e-05, "loss": 0.1897, "step": 5511 }, { "epoch": 0.5712509068297232, "grad_norm": 0.4967729151248932, "learning_rate": 1.6378010715334383e-05, "loss": 0.1624, "step": 5512 }, { "epoch": 0.5713545445123847, "grad_norm": 0.6100444197654724, "learning_rate": 1.637140840697486e-05, "loss": 0.2225, "step": 5513 }, { "epoch": 0.5714581821950461, "grad_norm": 0.5622698664665222, "learning_rate": 1.6364806507479386e-05, "loss": 0.1817, "step": 5514 }, { "epoch": 0.5715618198777075, "grad_norm": 0.5402734875679016, "learning_rate": 1.6358205017591874e-05, "loss": 0.1982, "step": 5515 }, { "epoch": 0.5716654575603689, "grad_norm": 0.6594802737236023, "learning_rate": 1.6351603938056157e-05, "loss": 0.2517, "step": 5516 }, { "epoch": 0.5717690952430303, "grad_norm": 0.5295108556747437, "learning_rate": 1.634500326961603e-05, "loss": 0.2083, "step": 5517 }, { "epoch": 0.5718727329256917, "grad_norm": 0.5516922473907471, "learning_rate": 1.633840301301527e-05, "loss": 0.1915, "step": 5518 }, { "epoch": 0.5719763706083532, "grad_norm": 0.6079210638999939, "learning_rate": 1.6331803168997547e-05, "loss": 0.2179, "step": 5519 }, { "epoch": 0.5720800082910146, "grad_norm": 0.5783078670501709, "learning_rate": 1.632520373830655e-05, "loss": 0.2124, "step": 5520 }, { "epoch": 0.572183645973676, "grad_norm": 0.6089596748352051, "learning_rate": 1.6318604721685882e-05, "loss": 0.1942, "step": 5521 }, { "epoch": 0.5722872836563374, "grad_norm": 0.6430178880691528, "learning_rate": 1.6312006119879105e-05, "loss": 0.2204, "step": 5522 }, { "epoch": 0.5723909213389988, "grad_norm": 0.5460180044174194, "learning_rate": 1.6305407933629754e-05, "loss": 0.1975, "step": 5523 }, { "epoch": 0.5724945590216602, "grad_norm": 0.571719229221344, "learning_rate": 1.6298810163681292e-05, "loss": 0.1972, "step": 5524 }, { "epoch": 0.5725981967043217, "grad_norm": 0.5407332181930542, "learning_rate": 1.6292212810777148e-05, "loss": 0.1786, "step": 5525 }, { "epoch": 0.5727018343869831, "grad_norm": 0.5866890549659729, "learning_rate": 1.6285615875660696e-05, "loss": 0.2088, "step": 5526 }, { "epoch": 0.5728054720696445, "grad_norm": 0.6215521693229675, "learning_rate": 1.627901935907527e-05, "loss": 0.1896, "step": 5527 }, { "epoch": 0.5729091097523059, "grad_norm": 0.4777623414993286, "learning_rate": 1.627242326176417e-05, "loss": 0.1722, "step": 5528 }, { "epoch": 0.5730127474349673, "grad_norm": 0.4880693852901459, "learning_rate": 1.626582758447061e-05, "loss": 0.1898, "step": 5529 }, { "epoch": 0.5731163851176287, "grad_norm": 0.6543561220169067, "learning_rate": 1.62592323279378e-05, "loss": 0.2323, "step": 5530 }, { "epoch": 0.5732200228002902, "grad_norm": 0.6428809762001038, "learning_rate": 1.6252637492908877e-05, "loss": 0.2266, "step": 5531 }, { "epoch": 0.5733236604829516, "grad_norm": 0.5870211124420166, "learning_rate": 1.624604308012693e-05, "loss": 0.1892, "step": 5532 }, { "epoch": 0.573427298165613, "grad_norm": 0.5597742795944214, "learning_rate": 1.623944909033502e-05, "loss": 0.1971, "step": 5533 }, { "epoch": 0.5735309358482744, "grad_norm": 0.6064586639404297, "learning_rate": 1.6232855524276137e-05, "loss": 0.2119, "step": 5534 }, { "epoch": 0.5736345735309358, "grad_norm": 0.6230360865592957, "learning_rate": 1.622626238269324e-05, "loss": 0.236, "step": 5535 }, { "epoch": 0.5737382112135972, "grad_norm": 0.6390331983566284, "learning_rate": 1.6219669666329224e-05, "loss": 0.2296, "step": 5536 }, { "epoch": 0.5738418488962587, "grad_norm": 0.551810622215271, "learning_rate": 1.6213077375926957e-05, "loss": 0.1991, "step": 5537 }, { "epoch": 0.5739454865789201, "grad_norm": 0.5981395244598389, "learning_rate": 1.620648551222925e-05, "loss": 0.2252, "step": 5538 }, { "epoch": 0.5740491242615815, "grad_norm": 0.5947065353393555, "learning_rate": 1.619989407597885e-05, "loss": 0.2526, "step": 5539 }, { "epoch": 0.5741527619442429, "grad_norm": 0.4955461323261261, "learning_rate": 1.6193303067918488e-05, "loss": 0.1816, "step": 5540 }, { "epoch": 0.5742563996269043, "grad_norm": 0.579858124256134, "learning_rate": 1.618671248879081e-05, "loss": 0.1989, "step": 5541 }, { "epoch": 0.5743600373095658, "grad_norm": 0.49859556555747986, "learning_rate": 1.618012233933844e-05, "loss": 0.1659, "step": 5542 }, { "epoch": 0.5744636749922272, "grad_norm": 0.5783235430717468, "learning_rate": 1.6173532620303953e-05, "loss": 0.2121, "step": 5543 }, { "epoch": 0.5745673126748886, "grad_norm": 0.6743443608283997, "learning_rate": 1.616694333242986e-05, "loss": 0.2295, "step": 5544 }, { "epoch": 0.57467095035755, "grad_norm": 0.5629667639732361, "learning_rate": 1.6160354476458638e-05, "loss": 0.2156, "step": 5545 }, { "epoch": 0.5747745880402114, "grad_norm": 0.5217868089675903, "learning_rate": 1.6153766053132694e-05, "loss": 0.2063, "step": 5546 }, { "epoch": 0.5748782257228728, "grad_norm": 0.5237048864364624, "learning_rate": 1.6147178063194418e-05, "loss": 0.1993, "step": 5547 }, { "epoch": 0.5749818634055343, "grad_norm": 0.48419585824012756, "learning_rate": 1.614059050738613e-05, "loss": 0.1416, "step": 5548 }, { "epoch": 0.5750855010881957, "grad_norm": 0.6031510829925537, "learning_rate": 1.61340033864501e-05, "loss": 0.221, "step": 5549 }, { "epoch": 0.5751891387708571, "grad_norm": 0.5714973211288452, "learning_rate": 1.6127416701128572e-05, "loss": 0.1976, "step": 5550 }, { "epoch": 0.5752927764535185, "grad_norm": 0.7110316157341003, "learning_rate": 1.6120830452163692e-05, "loss": 0.2463, "step": 5551 }, { "epoch": 0.5753964141361799, "grad_norm": 0.6350976824760437, "learning_rate": 1.6114244640297615e-05, "loss": 0.2138, "step": 5552 }, { "epoch": 0.5755000518188413, "grad_norm": 0.6358919739723206, "learning_rate": 1.6107659266272413e-05, "loss": 0.197, "step": 5553 }, { "epoch": 0.5756036895015028, "grad_norm": 0.5224093794822693, "learning_rate": 1.6101074330830106e-05, "loss": 0.1934, "step": 5554 }, { "epoch": 0.5757073271841642, "grad_norm": 0.5277766585350037, "learning_rate": 1.609448983471269e-05, "loss": 0.1736, "step": 5555 }, { "epoch": 0.5758109648668256, "grad_norm": 0.5051241517066956, "learning_rate": 1.6087905778662087e-05, "loss": 0.1814, "step": 5556 }, { "epoch": 0.575914602549487, "grad_norm": 0.5881791114807129, "learning_rate": 1.6081322163420172e-05, "loss": 0.1983, "step": 5557 }, { "epoch": 0.5760182402321484, "grad_norm": 0.613420844078064, "learning_rate": 1.6074738989728794e-05, "loss": 0.2434, "step": 5558 }, { "epoch": 0.5761218779148098, "grad_norm": 0.6119860410690308, "learning_rate": 1.606815625832972e-05, "loss": 0.2256, "step": 5559 }, { "epoch": 0.5762255155974713, "grad_norm": 0.6397984623908997, "learning_rate": 1.6061573969964694e-05, "loss": 0.2369, "step": 5560 }, { "epoch": 0.5763291532801327, "grad_norm": 0.6807885766029358, "learning_rate": 1.6054992125375377e-05, "loss": 0.2651, "step": 5561 }, { "epoch": 0.5764327909627941, "grad_norm": 0.5684066414833069, "learning_rate": 1.6048410725303424e-05, "loss": 0.1837, "step": 5562 }, { "epoch": 0.5765364286454555, "grad_norm": 0.5673686861991882, "learning_rate": 1.6041829770490407e-05, "loss": 0.1845, "step": 5563 }, { "epoch": 0.5766400663281169, "grad_norm": 0.6042220592498779, "learning_rate": 1.6035249261677852e-05, "loss": 0.2123, "step": 5564 }, { "epoch": 0.5767437040107783, "grad_norm": 0.6854713559150696, "learning_rate": 1.6028669199607258e-05, "loss": 0.211, "step": 5565 }, { "epoch": 0.5768473416934398, "grad_norm": 0.48943090438842773, "learning_rate": 1.6022089585020036e-05, "loss": 0.16, "step": 5566 }, { "epoch": 0.5769509793761012, "grad_norm": 0.5919265151023865, "learning_rate": 1.6015510418657574e-05, "loss": 0.2261, "step": 5567 }, { "epoch": 0.5770546170587626, "grad_norm": 0.5791772603988647, "learning_rate": 1.600893170126121e-05, "loss": 0.2395, "step": 5568 }, { "epoch": 0.577158254741424, "grad_norm": 0.6177813410758972, "learning_rate": 1.6002353433572214e-05, "loss": 0.2158, "step": 5569 }, { "epoch": 0.5772618924240854, "grad_norm": 0.5023506283760071, "learning_rate": 1.5995775616331827e-05, "loss": 0.1814, "step": 5570 }, { "epoch": 0.5773655301067468, "grad_norm": 0.594092071056366, "learning_rate": 1.5989198250281208e-05, "loss": 0.2052, "step": 5571 }, { "epoch": 0.5774691677894083, "grad_norm": 0.5519578456878662, "learning_rate": 1.5982621336161497e-05, "loss": 0.1876, "step": 5572 }, { "epoch": 0.5775728054720697, "grad_norm": 0.6383631229400635, "learning_rate": 1.5976044874713775e-05, "loss": 0.2333, "step": 5573 }, { "epoch": 0.5776764431547311, "grad_norm": 0.5924476385116577, "learning_rate": 1.5969468866679056e-05, "loss": 0.2161, "step": 5574 }, { "epoch": 0.5777800808373925, "grad_norm": 0.5061485767364502, "learning_rate": 1.5962893312798324e-05, "loss": 0.1701, "step": 5575 }, { "epoch": 0.5778837185200539, "grad_norm": 0.5206407308578491, "learning_rate": 1.5956318213812493e-05, "loss": 0.1783, "step": 5576 }, { "epoch": 0.5779873562027154, "grad_norm": 0.5351812839508057, "learning_rate": 1.5949743570462438e-05, "loss": 0.183, "step": 5577 }, { "epoch": 0.5780909938853768, "grad_norm": 0.6140007972717285, "learning_rate": 1.594316938348899e-05, "loss": 0.2215, "step": 5578 }, { "epoch": 0.5781946315680382, "grad_norm": 0.6508222222328186, "learning_rate": 1.59365956536329e-05, "loss": 0.2327, "step": 5579 }, { "epoch": 0.5782982692506996, "grad_norm": 0.46037814021110535, "learning_rate": 1.5930022381634908e-05, "loss": 0.1498, "step": 5580 }, { "epoch": 0.578401906933361, "grad_norm": 0.5092030763626099, "learning_rate": 1.5923449568235656e-05, "loss": 0.1948, "step": 5581 }, { "epoch": 0.5785055446160224, "grad_norm": 0.5261053442955017, "learning_rate": 1.5916877214175768e-05, "loss": 0.1641, "step": 5582 }, { "epoch": 0.5786091822986839, "grad_norm": 0.5427508354187012, "learning_rate": 1.5910305320195814e-05, "loss": 0.209, "step": 5583 }, { "epoch": 0.5787128199813453, "grad_norm": 0.5705498456954956, "learning_rate": 1.59037338870363e-05, "loss": 0.2181, "step": 5584 }, { "epoch": 0.5788164576640066, "grad_norm": 0.5972639918327332, "learning_rate": 1.589716291543768e-05, "loss": 0.2035, "step": 5585 }, { "epoch": 0.578920095346668, "grad_norm": 0.6710713505744934, "learning_rate": 1.5890592406140363e-05, "loss": 0.261, "step": 5586 }, { "epoch": 0.5790237330293294, "grad_norm": 0.45358237624168396, "learning_rate": 1.588402235988471e-05, "loss": 0.1604, "step": 5587 }, { "epoch": 0.5791273707119908, "grad_norm": 0.5760126113891602, "learning_rate": 1.5877452777411017e-05, "loss": 0.1777, "step": 5588 }, { "epoch": 0.5792310083946522, "grad_norm": 0.6019428968429565, "learning_rate": 1.587088365945953e-05, "loss": 0.2076, "step": 5589 }, { "epoch": 0.5793346460773137, "grad_norm": 0.6227850317955017, "learning_rate": 1.5864315006770467e-05, "loss": 0.221, "step": 5590 }, { "epoch": 0.5794382837599751, "grad_norm": 0.6790367960929871, "learning_rate": 1.5857746820083952e-05, "loss": 0.2219, "step": 5591 }, { "epoch": 0.5795419214426365, "grad_norm": 0.603782594203949, "learning_rate": 1.5851179100140085e-05, "loss": 0.2325, "step": 5592 }, { "epoch": 0.5796455591252979, "grad_norm": 0.5080618858337402, "learning_rate": 1.5844611847678912e-05, "loss": 0.1768, "step": 5593 }, { "epoch": 0.5797491968079593, "grad_norm": 0.6221045851707458, "learning_rate": 1.5838045063440413e-05, "loss": 0.2104, "step": 5594 }, { "epoch": 0.5798528344906207, "grad_norm": 0.45891180634498596, "learning_rate": 1.583147874816453e-05, "loss": 0.1379, "step": 5595 }, { "epoch": 0.5799564721732822, "grad_norm": 0.4983937740325928, "learning_rate": 1.5824912902591134e-05, "loss": 0.1699, "step": 5596 }, { "epoch": 0.5800601098559436, "grad_norm": 0.6115486025810242, "learning_rate": 1.5818347527460067e-05, "loss": 0.2125, "step": 5597 }, { "epoch": 0.580163747538605, "grad_norm": 0.6062911152839661, "learning_rate": 1.5811782623511104e-05, "loss": 0.2391, "step": 5598 }, { "epoch": 0.5802673852212664, "grad_norm": 0.5856295228004456, "learning_rate": 1.5805218191483956e-05, "loss": 0.1943, "step": 5599 }, { "epoch": 0.5803710229039278, "grad_norm": 0.6322320699691772, "learning_rate": 1.579865423211831e-05, "loss": 0.2269, "step": 5600 }, { "epoch": 0.5804746605865893, "grad_norm": 0.5917003154754639, "learning_rate": 1.5792090746153766e-05, "loss": 0.2041, "step": 5601 }, { "epoch": 0.5805782982692507, "grad_norm": 0.6539716124534607, "learning_rate": 1.5785527734329895e-05, "loss": 0.2067, "step": 5602 }, { "epoch": 0.5806819359519121, "grad_norm": 0.584199845790863, "learning_rate": 1.5778965197386216e-05, "loss": 0.232, "step": 5603 }, { "epoch": 0.5807855736345735, "grad_norm": 0.6193050146102905, "learning_rate": 1.5772403136062172e-05, "loss": 0.2086, "step": 5604 }, { "epoch": 0.5808892113172349, "grad_norm": 0.6499961018562317, "learning_rate": 1.5765841551097172e-05, "loss": 0.2251, "step": 5605 }, { "epoch": 0.5809928489998963, "grad_norm": 0.5342284440994263, "learning_rate": 1.5759280443230558e-05, "loss": 0.1961, "step": 5606 }, { "epoch": 0.5810964866825578, "grad_norm": 0.5079427361488342, "learning_rate": 1.5752719813201636e-05, "loss": 0.1861, "step": 5607 }, { "epoch": 0.5812001243652192, "grad_norm": 0.6445949077606201, "learning_rate": 1.5746159661749646e-05, "loss": 0.2146, "step": 5608 }, { "epoch": 0.5813037620478806, "grad_norm": 0.574851930141449, "learning_rate": 1.5739599989613764e-05, "loss": 0.2117, "step": 5609 }, { "epoch": 0.581407399730542, "grad_norm": 0.5927166938781738, "learning_rate": 1.5733040797533148e-05, "loss": 0.2057, "step": 5610 }, { "epoch": 0.5815110374132034, "grad_norm": 0.5667980313301086, "learning_rate": 1.5726482086246846e-05, "loss": 0.2046, "step": 5611 }, { "epoch": 0.5816146750958648, "grad_norm": 0.5506219863891602, "learning_rate": 1.57199238564939e-05, "loss": 0.2038, "step": 5612 }, { "epoch": 0.5817183127785263, "grad_norm": 0.5867335200309753, "learning_rate": 1.5713366109013294e-05, "loss": 0.178, "step": 5613 }, { "epoch": 0.5818219504611877, "grad_norm": 0.5505114793777466, "learning_rate": 1.5706808844543916e-05, "loss": 0.1969, "step": 5614 }, { "epoch": 0.5819255881438491, "grad_norm": 0.6018428206443787, "learning_rate": 1.5700252063824652e-05, "loss": 0.2279, "step": 5615 }, { "epoch": 0.5820292258265105, "grad_norm": 0.5699183344841003, "learning_rate": 1.5693695767594303e-05, "loss": 0.1997, "step": 5616 }, { "epoch": 0.5821328635091719, "grad_norm": 0.5802676677703857, "learning_rate": 1.5687139956591608e-05, "loss": 0.2091, "step": 5617 }, { "epoch": 0.5822365011918333, "grad_norm": 0.5912351012229919, "learning_rate": 1.568058463155529e-05, "loss": 0.1978, "step": 5618 }, { "epoch": 0.5823401388744948, "grad_norm": 0.5182409882545471, "learning_rate": 1.567402979322398e-05, "loss": 0.1968, "step": 5619 }, { "epoch": 0.5824437765571562, "grad_norm": 0.553266704082489, "learning_rate": 1.566747544233627e-05, "loss": 0.1964, "step": 5620 }, { "epoch": 0.5825474142398176, "grad_norm": 0.6378732323646545, "learning_rate": 1.5660921579630682e-05, "loss": 0.2161, "step": 5621 }, { "epoch": 0.582651051922479, "grad_norm": 0.6760129928588867, "learning_rate": 1.5654368205845713e-05, "loss": 0.2399, "step": 5622 }, { "epoch": 0.5827546896051404, "grad_norm": 0.6148195862770081, "learning_rate": 1.564781532171978e-05, "loss": 0.2071, "step": 5623 }, { "epoch": 0.5828583272878018, "grad_norm": 0.6314781904220581, "learning_rate": 1.5641262927991243e-05, "loss": 0.212, "step": 5624 }, { "epoch": 0.5829619649704633, "grad_norm": 0.5955851674079895, "learning_rate": 1.5634711025398433e-05, "loss": 0.1934, "step": 5625 }, { "epoch": 0.5830656026531247, "grad_norm": 0.6427017450332642, "learning_rate": 1.562815961467959e-05, "loss": 0.2414, "step": 5626 }, { "epoch": 0.5831692403357861, "grad_norm": 0.5245332717895508, "learning_rate": 1.562160869657293e-05, "loss": 0.1862, "step": 5627 }, { "epoch": 0.5832728780184475, "grad_norm": 0.570083737373352, "learning_rate": 1.5615058271816597e-05, "loss": 0.2063, "step": 5628 }, { "epoch": 0.5833765157011089, "grad_norm": 0.5223228335380554, "learning_rate": 1.560850834114868e-05, "loss": 0.196, "step": 5629 }, { "epoch": 0.5834801533837704, "grad_norm": 0.5994619131088257, "learning_rate": 1.560195890530722e-05, "loss": 0.2046, "step": 5630 }, { "epoch": 0.5835837910664318, "grad_norm": 0.5086055397987366, "learning_rate": 1.5595409965030188e-05, "loss": 0.174, "step": 5631 }, { "epoch": 0.5836874287490932, "grad_norm": 0.5901955366134644, "learning_rate": 1.5588861521055515e-05, "loss": 0.2171, "step": 5632 }, { "epoch": 0.5837910664317546, "grad_norm": 0.5532594323158264, "learning_rate": 1.5582313574121073e-05, "loss": 0.1983, "step": 5633 }, { "epoch": 0.583894704114416, "grad_norm": 0.5771147012710571, "learning_rate": 1.5575766124964662e-05, "loss": 0.216, "step": 5634 }, { "epoch": 0.5839983417970774, "grad_norm": 0.5754862427711487, "learning_rate": 1.5569219174324055e-05, "loss": 0.2245, "step": 5635 }, { "epoch": 0.5841019794797389, "grad_norm": 0.5546948313713074, "learning_rate": 1.556267272293694e-05, "loss": 0.1975, "step": 5636 }, { "epoch": 0.5842056171624003, "grad_norm": 0.5417519211769104, "learning_rate": 1.555612677154096e-05, "loss": 0.2128, "step": 5637 }, { "epoch": 0.5843092548450617, "grad_norm": 0.4762413799762726, "learning_rate": 1.5549581320873715e-05, "loss": 0.1572, "step": 5638 }, { "epoch": 0.5844128925277231, "grad_norm": 0.5316885113716125, "learning_rate": 1.5543036371672723e-05, "loss": 0.1968, "step": 5639 }, { "epoch": 0.5845165302103845, "grad_norm": 0.5961049199104309, "learning_rate": 1.553649192467547e-05, "loss": 0.2024, "step": 5640 }, { "epoch": 0.5846201678930459, "grad_norm": 0.5178672671318054, "learning_rate": 1.552994798061936e-05, "loss": 0.1809, "step": 5641 }, { "epoch": 0.5847238055757074, "grad_norm": 0.5495697259902954, "learning_rate": 1.552340454024177e-05, "loss": 0.191, "step": 5642 }, { "epoch": 0.5848274432583688, "grad_norm": 0.5312830209732056, "learning_rate": 1.5516861604279997e-05, "loss": 0.1858, "step": 5643 }, { "epoch": 0.5849310809410302, "grad_norm": 0.5942406058311462, "learning_rate": 1.5510319173471285e-05, "loss": 0.2073, "step": 5644 }, { "epoch": 0.5850347186236916, "grad_norm": 0.5381253957748413, "learning_rate": 1.5503777248552836e-05, "loss": 0.1863, "step": 5645 }, { "epoch": 0.585138356306353, "grad_norm": 0.6405827403068542, "learning_rate": 1.5497235830261766e-05, "loss": 0.1973, "step": 5646 }, { "epoch": 0.5852419939890144, "grad_norm": 0.5816875696182251, "learning_rate": 1.5490694919335172e-05, "loss": 0.2108, "step": 5647 }, { "epoch": 0.5853456316716759, "grad_norm": 0.43517184257507324, "learning_rate": 1.5484154516510063e-05, "loss": 0.1403, "step": 5648 }, { "epoch": 0.5854492693543373, "grad_norm": 0.620516836643219, "learning_rate": 1.54776146225234e-05, "loss": 0.1912, "step": 5649 }, { "epoch": 0.5855529070369987, "grad_norm": 0.5462132096290588, "learning_rate": 1.5471075238112098e-05, "loss": 0.1708, "step": 5650 }, { "epoch": 0.5856565447196601, "grad_norm": 0.603030800819397, "learning_rate": 1.546453636401299e-05, "loss": 0.2087, "step": 5651 }, { "epoch": 0.5857601824023215, "grad_norm": 0.5474585294723511, "learning_rate": 1.545799800096287e-05, "loss": 0.2027, "step": 5652 }, { "epoch": 0.585863820084983, "grad_norm": 0.6245854496955872, "learning_rate": 1.545146014969849e-05, "loss": 0.2468, "step": 5653 }, { "epoch": 0.5859674577676444, "grad_norm": 0.5252482295036316, "learning_rate": 1.5444922810956498e-05, "loss": 0.1801, "step": 5654 }, { "epoch": 0.5860710954503058, "grad_norm": 0.6183892488479614, "learning_rate": 1.5438385985473523e-05, "loss": 0.2425, "step": 5655 }, { "epoch": 0.5861747331329672, "grad_norm": 0.6712120175361633, "learning_rate": 1.543184967398613e-05, "loss": 0.2037, "step": 5656 }, { "epoch": 0.5862783708156286, "grad_norm": 0.5625413060188293, "learning_rate": 1.542531387723081e-05, "loss": 0.1956, "step": 5657 }, { "epoch": 0.58638200849829, "grad_norm": 0.5676254630088806, "learning_rate": 1.5418778595944015e-05, "loss": 0.1947, "step": 5658 }, { "epoch": 0.5864856461809514, "grad_norm": 0.533459484577179, "learning_rate": 1.5412243830862118e-05, "loss": 0.2088, "step": 5659 }, { "epoch": 0.5865892838636129, "grad_norm": 0.7574283480644226, "learning_rate": 1.540570958272146e-05, "loss": 0.247, "step": 5660 }, { "epoch": 0.5866929215462742, "grad_norm": 0.6113412976264954, "learning_rate": 1.539917585225831e-05, "loss": 0.2037, "step": 5661 }, { "epoch": 0.5867965592289356, "grad_norm": 0.5184718370437622, "learning_rate": 1.5392642640208857e-05, "loss": 0.2112, "step": 5662 }, { "epoch": 0.586900196911597, "grad_norm": 0.5778669118881226, "learning_rate": 1.5386109947309284e-05, "loss": 0.2067, "step": 5663 }, { "epoch": 0.5870038345942584, "grad_norm": 0.5942280888557434, "learning_rate": 1.5379577774295665e-05, "loss": 0.2127, "step": 5664 }, { "epoch": 0.5871074722769198, "grad_norm": 0.539800226688385, "learning_rate": 1.5373046121904032e-05, "loss": 0.1769, "step": 5665 }, { "epoch": 0.5872111099595813, "grad_norm": 0.6362435817718506, "learning_rate": 1.536651499087038e-05, "loss": 0.2244, "step": 5666 }, { "epoch": 0.5873147476422427, "grad_norm": 0.5899614095687866, "learning_rate": 1.5359984381930613e-05, "loss": 0.2061, "step": 5667 }, { "epoch": 0.5874183853249041, "grad_norm": 0.588805079460144, "learning_rate": 1.5353454295820594e-05, "loss": 0.1829, "step": 5668 }, { "epoch": 0.5875220230075655, "grad_norm": 0.6606040000915527, "learning_rate": 1.5346924733276117e-05, "loss": 0.2479, "step": 5669 }, { "epoch": 0.5876256606902269, "grad_norm": 0.5283950567245483, "learning_rate": 1.534039569503293e-05, "loss": 0.188, "step": 5670 }, { "epoch": 0.5877292983728883, "grad_norm": 0.5973883271217346, "learning_rate": 1.5333867181826717e-05, "loss": 0.1903, "step": 5671 }, { "epoch": 0.5878329360555498, "grad_norm": 0.5801593065261841, "learning_rate": 1.5327339194393087e-05, "loss": 0.1868, "step": 5672 }, { "epoch": 0.5879365737382112, "grad_norm": 0.5748353600502014, "learning_rate": 1.5320811733467626e-05, "loss": 0.2009, "step": 5673 }, { "epoch": 0.5880402114208726, "grad_norm": 0.5203397274017334, "learning_rate": 1.5314284799785815e-05, "loss": 0.1895, "step": 5674 }, { "epoch": 0.588143849103534, "grad_norm": 0.5716513991355896, "learning_rate": 1.5307758394083103e-05, "loss": 0.2138, "step": 5675 }, { "epoch": 0.5882474867861954, "grad_norm": 0.5933395028114319, "learning_rate": 1.53012325170949e-05, "loss": 0.2089, "step": 5676 }, { "epoch": 0.5883511244688568, "grad_norm": 0.7028188109397888, "learning_rate": 1.5294707169556494e-05, "loss": 0.2341, "step": 5677 }, { "epoch": 0.5884547621515183, "grad_norm": 0.8074510097503662, "learning_rate": 1.5288182352203182e-05, "loss": 0.1966, "step": 5678 }, { "epoch": 0.5885583998341797, "grad_norm": 0.4657239019870758, "learning_rate": 1.528165806577015e-05, "loss": 0.1423, "step": 5679 }, { "epoch": 0.5886620375168411, "grad_norm": 0.5782691240310669, "learning_rate": 1.5275134310992553e-05, "loss": 0.216, "step": 5680 }, { "epoch": 0.5887656751995025, "grad_norm": 0.641411304473877, "learning_rate": 1.5268611088605482e-05, "loss": 0.2132, "step": 5681 }, { "epoch": 0.5888693128821639, "grad_norm": 0.5617420077323914, "learning_rate": 1.5262088399343954e-05, "loss": 0.2034, "step": 5682 }, { "epoch": 0.5889729505648253, "grad_norm": 0.6933878660202026, "learning_rate": 1.5255566243942945e-05, "loss": 0.2358, "step": 5683 }, { "epoch": 0.5890765882474868, "grad_norm": 0.5039753317832947, "learning_rate": 1.524904462313735e-05, "loss": 0.1609, "step": 5684 }, { "epoch": 0.5891802259301482, "grad_norm": 0.6344572305679321, "learning_rate": 1.5242523537662023e-05, "loss": 0.216, "step": 5685 }, { "epoch": 0.5892838636128096, "grad_norm": 0.5435722470283508, "learning_rate": 1.5236002988251752e-05, "loss": 0.2057, "step": 5686 }, { "epoch": 0.589387501295471, "grad_norm": 0.6607723236083984, "learning_rate": 1.5229482975641252e-05, "loss": 0.2446, "step": 5687 }, { "epoch": 0.5894911389781324, "grad_norm": 0.5871069431304932, "learning_rate": 1.5222963500565201e-05, "loss": 0.2199, "step": 5688 }, { "epoch": 0.5895947766607939, "grad_norm": 0.642362117767334, "learning_rate": 1.5216444563758195e-05, "loss": 0.2033, "step": 5689 }, { "epoch": 0.5896984143434553, "grad_norm": 0.547693133354187, "learning_rate": 1.5209926165954772e-05, "loss": 0.183, "step": 5690 }, { "epoch": 0.5898020520261167, "grad_norm": 0.5935719609260559, "learning_rate": 1.5203408307889433e-05, "loss": 0.1924, "step": 5691 }, { "epoch": 0.5899056897087781, "grad_norm": 0.5445742011070251, "learning_rate": 1.5196890990296584e-05, "loss": 0.1783, "step": 5692 }, { "epoch": 0.5900093273914395, "grad_norm": 0.6331069469451904, "learning_rate": 1.5190374213910597e-05, "loss": 0.2173, "step": 5693 }, { "epoch": 0.5901129650741009, "grad_norm": 0.5657593011856079, "learning_rate": 1.5183857979465757e-05, "loss": 0.1942, "step": 5694 }, { "epoch": 0.5902166027567624, "grad_norm": 0.5342247486114502, "learning_rate": 1.5177342287696317e-05, "loss": 0.1977, "step": 5695 }, { "epoch": 0.5903202404394238, "grad_norm": 0.5558289885520935, "learning_rate": 1.5170827139336457e-05, "loss": 0.1896, "step": 5696 }, { "epoch": 0.5904238781220852, "grad_norm": 0.6776431202888489, "learning_rate": 1.5164312535120279e-05, "loss": 0.2219, "step": 5697 }, { "epoch": 0.5905275158047466, "grad_norm": 0.5458036065101624, "learning_rate": 1.5157798475781856e-05, "loss": 0.2065, "step": 5698 }, { "epoch": 0.590631153487408, "grad_norm": 0.5801975131034851, "learning_rate": 1.5151284962055168e-05, "loss": 0.2235, "step": 5699 }, { "epoch": 0.5907347911700694, "grad_norm": 0.5337779521942139, "learning_rate": 1.514477199467415e-05, "loss": 0.198, "step": 5700 }, { "epoch": 0.5908384288527309, "grad_norm": 0.6479519009590149, "learning_rate": 1.5138259574372684e-05, "loss": 0.2172, "step": 5701 }, { "epoch": 0.5909420665353923, "grad_norm": 0.5114519000053406, "learning_rate": 1.5131747701884566e-05, "loss": 0.1868, "step": 5702 }, { "epoch": 0.5910457042180537, "grad_norm": 0.5659390091896057, "learning_rate": 1.5125236377943555e-05, "loss": 0.1897, "step": 5703 }, { "epoch": 0.5911493419007151, "grad_norm": 0.6139878630638123, "learning_rate": 1.5118725603283321e-05, "loss": 0.2211, "step": 5704 }, { "epoch": 0.5912529795833765, "grad_norm": 0.5344733595848083, "learning_rate": 1.5112215378637505e-05, "loss": 0.1686, "step": 5705 }, { "epoch": 0.5913566172660379, "grad_norm": 0.5037564039230347, "learning_rate": 1.5105705704739664e-05, "loss": 0.165, "step": 5706 }, { "epoch": 0.5914602549486994, "grad_norm": 0.5492294430732727, "learning_rate": 1.5099196582323293e-05, "loss": 0.205, "step": 5707 }, { "epoch": 0.5915638926313608, "grad_norm": 0.5443542003631592, "learning_rate": 1.5092688012121835e-05, "loss": 0.1677, "step": 5708 }, { "epoch": 0.5916675303140222, "grad_norm": 0.5195833444595337, "learning_rate": 1.5086179994868656e-05, "loss": 0.1945, "step": 5709 }, { "epoch": 0.5917711679966836, "grad_norm": 0.6017307639122009, "learning_rate": 1.5079672531297078e-05, "loss": 0.2087, "step": 5710 }, { "epoch": 0.591874805679345, "grad_norm": 0.5943590998649597, "learning_rate": 1.5073165622140358e-05, "loss": 0.2028, "step": 5711 }, { "epoch": 0.5919784433620064, "grad_norm": 0.5091972351074219, "learning_rate": 1.5066659268131666e-05, "loss": 0.1927, "step": 5712 }, { "epoch": 0.5920820810446679, "grad_norm": 0.5621009469032288, "learning_rate": 1.5060153470004149e-05, "loss": 0.1909, "step": 5713 }, { "epoch": 0.5921857187273293, "grad_norm": 0.5365742444992065, "learning_rate": 1.5053648228490857e-05, "loss": 0.2044, "step": 5714 }, { "epoch": 0.5922893564099907, "grad_norm": 0.6904767751693726, "learning_rate": 1.5047143544324784e-05, "loss": 0.2657, "step": 5715 }, { "epoch": 0.5923929940926521, "grad_norm": 0.6271598935127258, "learning_rate": 1.5040639418238889e-05, "loss": 0.2233, "step": 5716 }, { "epoch": 0.5924966317753135, "grad_norm": 0.624140739440918, "learning_rate": 1.5034135850966033e-05, "loss": 0.2019, "step": 5717 }, { "epoch": 0.592600269457975, "grad_norm": 0.6414351463317871, "learning_rate": 1.502763284323903e-05, "loss": 0.2285, "step": 5718 }, { "epoch": 0.5927039071406364, "grad_norm": 0.6503438949584961, "learning_rate": 1.502113039579062e-05, "loss": 0.216, "step": 5719 }, { "epoch": 0.5928075448232978, "grad_norm": 0.6282237768173218, "learning_rate": 1.5014628509353503e-05, "loss": 0.2206, "step": 5720 }, { "epoch": 0.5929111825059592, "grad_norm": 0.5512993931770325, "learning_rate": 1.50081271846603e-05, "loss": 0.1927, "step": 5721 }, { "epoch": 0.5930148201886206, "grad_norm": 0.6197196841239929, "learning_rate": 1.5001626422443556e-05, "loss": 0.2021, "step": 5722 }, { "epoch": 0.593118457871282, "grad_norm": 0.5854577422142029, "learning_rate": 1.4995126223435788e-05, "loss": 0.2068, "step": 5723 }, { "epoch": 0.5932220955539435, "grad_norm": 0.5938782095909119, "learning_rate": 1.498862658836941e-05, "loss": 0.1997, "step": 5724 }, { "epoch": 0.5933257332366049, "grad_norm": 0.5628288984298706, "learning_rate": 1.4982127517976794e-05, "loss": 0.1953, "step": 5725 }, { "epoch": 0.5934293709192663, "grad_norm": 0.5366216897964478, "learning_rate": 1.4975629012990255e-05, "loss": 0.1715, "step": 5726 }, { "epoch": 0.5935330086019277, "grad_norm": 0.6630962491035461, "learning_rate": 1.4969131074142027e-05, "loss": 0.2667, "step": 5727 }, { "epoch": 0.5936366462845891, "grad_norm": 0.5116687417030334, "learning_rate": 1.4962633702164296e-05, "loss": 0.1781, "step": 5728 }, { "epoch": 0.5937402839672505, "grad_norm": 0.5852224230766296, "learning_rate": 1.4956136897789155e-05, "loss": 0.1788, "step": 5729 }, { "epoch": 0.593843921649912, "grad_norm": 0.6093877553939819, "learning_rate": 1.4949640661748674e-05, "loss": 0.2172, "step": 5730 }, { "epoch": 0.5939475593325734, "grad_norm": 0.518648087978363, "learning_rate": 1.4943144994774836e-05, "loss": 0.1722, "step": 5731 }, { "epoch": 0.5940511970152348, "grad_norm": 0.5264543890953064, "learning_rate": 1.4936649897599548e-05, "loss": 0.182, "step": 5732 }, { "epoch": 0.5941548346978962, "grad_norm": 0.6355027556419373, "learning_rate": 1.4930155370954693e-05, "loss": 0.2237, "step": 5733 }, { "epoch": 0.5942584723805576, "grad_norm": 0.6642647981643677, "learning_rate": 1.4923661415572039e-05, "loss": 0.2471, "step": 5734 }, { "epoch": 0.594362110063219, "grad_norm": 0.5539326667785645, "learning_rate": 1.4917168032183326e-05, "loss": 0.173, "step": 5735 }, { "epoch": 0.5944657477458805, "grad_norm": 0.600437581539154, "learning_rate": 1.4910675221520228e-05, "loss": 0.2121, "step": 5736 }, { "epoch": 0.5945693854285418, "grad_norm": 0.6293091177940369, "learning_rate": 1.4904182984314321e-05, "loss": 0.2145, "step": 5737 }, { "epoch": 0.5946730231112032, "grad_norm": 0.5350037813186646, "learning_rate": 1.4897691321297164e-05, "loss": 0.1654, "step": 5738 }, { "epoch": 0.5947766607938646, "grad_norm": 0.516567051410675, "learning_rate": 1.4891200233200214e-05, "loss": 0.1768, "step": 5739 }, { "epoch": 0.594880298476526, "grad_norm": 0.512410581111908, "learning_rate": 1.4884709720754873e-05, "loss": 0.1932, "step": 5740 }, { "epoch": 0.5949839361591874, "grad_norm": 0.5908318161964417, "learning_rate": 1.4878219784692501e-05, "loss": 0.2061, "step": 5741 }, { "epoch": 0.5950875738418488, "grad_norm": 0.590922474861145, "learning_rate": 1.4871730425744352e-05, "loss": 0.2305, "step": 5742 }, { "epoch": 0.5951912115245103, "grad_norm": 0.6016734838485718, "learning_rate": 1.4865241644641655e-05, "loss": 0.1954, "step": 5743 }, { "epoch": 0.5952948492071717, "grad_norm": 0.5654884576797485, "learning_rate": 1.4858753442115537e-05, "loss": 0.2019, "step": 5744 }, { "epoch": 0.5953984868898331, "grad_norm": 0.6041907668113708, "learning_rate": 1.4852265818897094e-05, "loss": 0.2211, "step": 5745 }, { "epoch": 0.5955021245724945, "grad_norm": 0.5785591006278992, "learning_rate": 1.484577877571734e-05, "loss": 0.1885, "step": 5746 }, { "epoch": 0.5956057622551559, "grad_norm": 0.610321044921875, "learning_rate": 1.4839292313307211e-05, "loss": 0.2192, "step": 5747 }, { "epoch": 0.5957093999378174, "grad_norm": 0.5311514735221863, "learning_rate": 1.4832806432397613e-05, "loss": 0.1803, "step": 5748 }, { "epoch": 0.5958130376204788, "grad_norm": 0.6609798669815063, "learning_rate": 1.4826321133719348e-05, "loss": 0.2282, "step": 5749 }, { "epoch": 0.5959166753031402, "grad_norm": 0.5810403823852539, "learning_rate": 1.481983641800317e-05, "loss": 0.174, "step": 5750 }, { "epoch": 0.5960203129858016, "grad_norm": 0.5086999535560608, "learning_rate": 1.4813352285979783e-05, "loss": 0.1838, "step": 5751 }, { "epoch": 0.596123950668463, "grad_norm": 0.507858157157898, "learning_rate": 1.4806868738379792e-05, "loss": 0.1733, "step": 5752 }, { "epoch": 0.5962275883511244, "grad_norm": 0.4975156784057617, "learning_rate": 1.4800385775933763e-05, "loss": 0.1727, "step": 5753 }, { "epoch": 0.5963312260337859, "grad_norm": 0.6003329753875732, "learning_rate": 1.4793903399372171e-05, "loss": 0.2041, "step": 5754 }, { "epoch": 0.5964348637164473, "grad_norm": 0.6466993689537048, "learning_rate": 1.4787421609425458e-05, "loss": 0.2228, "step": 5755 }, { "epoch": 0.5965385013991087, "grad_norm": 0.5515438318252563, "learning_rate": 1.478094040682398e-05, "loss": 0.1908, "step": 5756 }, { "epoch": 0.5966421390817701, "grad_norm": 0.5963743329048157, "learning_rate": 1.4774459792298012e-05, "loss": 0.2199, "step": 5757 }, { "epoch": 0.5967457767644315, "grad_norm": 0.6550231575965881, "learning_rate": 1.4767979766577803e-05, "loss": 0.2265, "step": 5758 }, { "epoch": 0.5968494144470929, "grad_norm": 0.6037734150886536, "learning_rate": 1.4761500330393493e-05, "loss": 0.2229, "step": 5759 }, { "epoch": 0.5969530521297544, "grad_norm": 0.6222856044769287, "learning_rate": 1.4755021484475181e-05, "loss": 0.2153, "step": 5760 }, { "epoch": 0.5970566898124158, "grad_norm": 0.5862886905670166, "learning_rate": 1.4748543229552904e-05, "loss": 0.1949, "step": 5761 }, { "epoch": 0.5971603274950772, "grad_norm": 0.5452234148979187, "learning_rate": 1.4742065566356605e-05, "loss": 0.1907, "step": 5762 }, { "epoch": 0.5972639651777386, "grad_norm": 0.5806859135627747, "learning_rate": 1.4735588495616188e-05, "loss": 0.2052, "step": 5763 }, { "epoch": 0.5973676028604, "grad_norm": 0.5556889772415161, "learning_rate": 1.4729112018061469e-05, "loss": 0.1936, "step": 5764 }, { "epoch": 0.5974712405430614, "grad_norm": 0.48853084444999695, "learning_rate": 1.4722636134422218e-05, "loss": 0.1507, "step": 5765 }, { "epoch": 0.5975748782257229, "grad_norm": 0.47970500588417053, "learning_rate": 1.4716160845428129e-05, "loss": 0.1691, "step": 5766 }, { "epoch": 0.5976785159083843, "grad_norm": 0.6103032231330872, "learning_rate": 1.470968615180881e-05, "loss": 0.2244, "step": 5767 }, { "epoch": 0.5977821535910457, "grad_norm": 0.6266652941703796, "learning_rate": 1.470321205429385e-05, "loss": 0.1797, "step": 5768 }, { "epoch": 0.5978857912737071, "grad_norm": 0.6148141026496887, "learning_rate": 1.4696738553612706e-05, "loss": 0.2012, "step": 5769 }, { "epoch": 0.5979894289563685, "grad_norm": 0.5959713459014893, "learning_rate": 1.469026565049482e-05, "loss": 0.1835, "step": 5770 }, { "epoch": 0.59809306663903, "grad_norm": 0.6304055452346802, "learning_rate": 1.4683793345669552e-05, "loss": 0.2218, "step": 5771 }, { "epoch": 0.5981967043216914, "grad_norm": 0.6706244945526123, "learning_rate": 1.4677321639866178e-05, "loss": 0.2184, "step": 5772 }, { "epoch": 0.5983003420043528, "grad_norm": 0.5483915209770203, "learning_rate": 1.4670850533813936e-05, "loss": 0.185, "step": 5773 }, { "epoch": 0.5984039796870142, "grad_norm": 0.626857340335846, "learning_rate": 1.4664380028241967e-05, "loss": 0.213, "step": 5774 }, { "epoch": 0.5985076173696756, "grad_norm": 0.5656337738037109, "learning_rate": 1.4657910123879356e-05, "loss": 0.1883, "step": 5775 }, { "epoch": 0.598611255052337, "grad_norm": 0.46662232279777527, "learning_rate": 1.4651440821455137e-05, "loss": 0.1711, "step": 5776 }, { "epoch": 0.5987148927349984, "grad_norm": 0.5397834777832031, "learning_rate": 1.464497212169825e-05, "loss": 0.1711, "step": 5777 }, { "epoch": 0.5988185304176599, "grad_norm": 0.4762875735759735, "learning_rate": 1.463850402533758e-05, "loss": 0.1724, "step": 5778 }, { "epoch": 0.5989221681003213, "grad_norm": 0.5345758199691772, "learning_rate": 1.4632036533101937e-05, "loss": 0.1922, "step": 5779 }, { "epoch": 0.5990258057829827, "grad_norm": 0.6376749873161316, "learning_rate": 1.4625569645720075e-05, "loss": 0.2209, "step": 5780 }, { "epoch": 0.5991294434656441, "grad_norm": 0.6170158386230469, "learning_rate": 1.4619103363920674e-05, "loss": 0.2231, "step": 5781 }, { "epoch": 0.5992330811483055, "grad_norm": 0.6733803749084473, "learning_rate": 1.4612637688432334e-05, "loss": 0.2256, "step": 5782 }, { "epoch": 0.599336718830967, "grad_norm": 0.5524088144302368, "learning_rate": 1.4606172619983614e-05, "loss": 0.1688, "step": 5783 }, { "epoch": 0.5994403565136284, "grad_norm": 0.5725765228271484, "learning_rate": 1.4599708159302972e-05, "loss": 0.1957, "step": 5784 }, { "epoch": 0.5995439941962898, "grad_norm": 0.5375611186027527, "learning_rate": 1.4593244307118817e-05, "loss": 0.1875, "step": 5785 }, { "epoch": 0.5996476318789512, "grad_norm": 0.6030101180076599, "learning_rate": 1.4586781064159497e-05, "loss": 0.2153, "step": 5786 }, { "epoch": 0.5997512695616126, "grad_norm": 0.5640246868133545, "learning_rate": 1.4580318431153266e-05, "loss": 0.1789, "step": 5787 }, { "epoch": 0.599854907244274, "grad_norm": 0.6239410042762756, "learning_rate": 1.4573856408828335e-05, "loss": 0.2141, "step": 5788 }, { "epoch": 0.5999585449269355, "grad_norm": 0.6547892093658447, "learning_rate": 1.4567394997912821e-05, "loss": 0.2006, "step": 5789 }, { "epoch": 0.6000621826095969, "grad_norm": 0.6406012773513794, "learning_rate": 1.4560934199134792e-05, "loss": 0.2158, "step": 5790 }, { "epoch": 0.6001658202922583, "grad_norm": 0.49104514718055725, "learning_rate": 1.4554474013222254e-05, "loss": 0.1527, "step": 5791 }, { "epoch": 0.6002694579749197, "grad_norm": 0.5826954245567322, "learning_rate": 1.4548014440903106e-05, "loss": 0.1849, "step": 5792 }, { "epoch": 0.6003730956575811, "grad_norm": 0.6551445126533508, "learning_rate": 1.4541555482905225e-05, "loss": 0.219, "step": 5793 }, { "epoch": 0.6004767333402425, "grad_norm": 0.5907532572746277, "learning_rate": 1.4535097139956383e-05, "loss": 0.2236, "step": 5794 }, { "epoch": 0.600580371022904, "grad_norm": 0.5432443022727966, "learning_rate": 1.4528639412784295e-05, "loss": 0.1802, "step": 5795 }, { "epoch": 0.6006840087055654, "grad_norm": 0.6686699390411377, "learning_rate": 1.452218230211662e-05, "loss": 0.2265, "step": 5796 }, { "epoch": 0.6007876463882268, "grad_norm": 0.536210834980011, "learning_rate": 1.4515725808680927e-05, "loss": 0.1792, "step": 5797 }, { "epoch": 0.6008912840708882, "grad_norm": 0.6930983662605286, "learning_rate": 1.4509269933204726e-05, "loss": 0.2277, "step": 5798 }, { "epoch": 0.6009949217535496, "grad_norm": 0.5104745626449585, "learning_rate": 1.450281467641545e-05, "loss": 0.1933, "step": 5799 }, { "epoch": 0.601098559436211, "grad_norm": 0.6219746470451355, "learning_rate": 1.4496360039040466e-05, "loss": 0.2294, "step": 5800 }, { "epoch": 0.6012021971188725, "grad_norm": 0.6207789182662964, "learning_rate": 1.4489906021807088e-05, "loss": 0.2242, "step": 5801 }, { "epoch": 0.6013058348015339, "grad_norm": 0.5910596251487732, "learning_rate": 1.4483452625442526e-05, "loss": 0.2022, "step": 5802 }, { "epoch": 0.6014094724841953, "grad_norm": 0.5521644353866577, "learning_rate": 1.4476999850673953e-05, "loss": 0.2039, "step": 5803 }, { "epoch": 0.6015131101668567, "grad_norm": 0.6113677024841309, "learning_rate": 1.4470547698228444e-05, "loss": 0.2237, "step": 5804 }, { "epoch": 0.6016167478495181, "grad_norm": 0.8434403538703918, "learning_rate": 1.4464096168833026e-05, "loss": 0.2313, "step": 5805 }, { "epoch": 0.6017203855321795, "grad_norm": 0.7482733130455017, "learning_rate": 1.4457645263214653e-05, "loss": 0.2591, "step": 5806 }, { "epoch": 0.601824023214841, "grad_norm": 0.6481677293777466, "learning_rate": 1.4451194982100182e-05, "loss": 0.2041, "step": 5807 }, { "epoch": 0.6019276608975024, "grad_norm": 0.5924575328826904, "learning_rate": 1.4444745326216448e-05, "loss": 0.2071, "step": 5808 }, { "epoch": 0.6020312985801638, "grad_norm": 0.6279072165489197, "learning_rate": 1.4438296296290171e-05, "loss": 0.2094, "step": 5809 }, { "epoch": 0.6021349362628252, "grad_norm": 0.5018296241760254, "learning_rate": 1.4431847893048014e-05, "loss": 0.1717, "step": 5810 }, { "epoch": 0.6022385739454866, "grad_norm": 0.5699949264526367, "learning_rate": 1.4425400117216591e-05, "loss": 0.1707, "step": 5811 }, { "epoch": 0.602342211628148, "grad_norm": 0.743048906326294, "learning_rate": 1.4418952969522413e-05, "loss": 0.2423, "step": 5812 }, { "epoch": 0.6024458493108094, "grad_norm": 0.5955899357795715, "learning_rate": 1.4412506450691943e-05, "loss": 0.2163, "step": 5813 }, { "epoch": 0.6025494869934708, "grad_norm": 0.572127103805542, "learning_rate": 1.4406060561451552e-05, "loss": 0.1935, "step": 5814 }, { "epoch": 0.6026531246761322, "grad_norm": 0.562829315662384, "learning_rate": 1.4399615302527563e-05, "loss": 0.1898, "step": 5815 }, { "epoch": 0.6027567623587936, "grad_norm": 0.5324281454086304, "learning_rate": 1.4393170674646219e-05, "loss": 0.1956, "step": 5816 }, { "epoch": 0.602860400041455, "grad_norm": 0.5725294947624207, "learning_rate": 1.4386726678533683e-05, "loss": 0.2052, "step": 5817 }, { "epoch": 0.6029640377241164, "grad_norm": 0.6350352168083191, "learning_rate": 1.4380283314916064e-05, "loss": 0.2063, "step": 5818 }, { "epoch": 0.6030676754067779, "grad_norm": 0.6516824960708618, "learning_rate": 1.4373840584519382e-05, "loss": 0.2174, "step": 5819 }, { "epoch": 0.6031713130894393, "grad_norm": 0.6768869161605835, "learning_rate": 1.4367398488069593e-05, "loss": 0.2007, "step": 5820 }, { "epoch": 0.6032749507721007, "grad_norm": 0.5802131295204163, "learning_rate": 1.4360957026292597e-05, "loss": 0.1979, "step": 5821 }, { "epoch": 0.6033785884547621, "grad_norm": 0.5170549154281616, "learning_rate": 1.4354516199914189e-05, "loss": 0.1807, "step": 5822 }, { "epoch": 0.6034822261374235, "grad_norm": 0.6196373105049133, "learning_rate": 1.4348076009660126e-05, "loss": 0.1965, "step": 5823 }, { "epoch": 0.603585863820085, "grad_norm": 0.5476102828979492, "learning_rate": 1.4341636456256062e-05, "loss": 0.168, "step": 5824 }, { "epoch": 0.6036895015027464, "grad_norm": 0.4828929603099823, "learning_rate": 1.4335197540427611e-05, "loss": 0.1573, "step": 5825 }, { "epoch": 0.6037931391854078, "grad_norm": 0.5340254902839661, "learning_rate": 1.4328759262900301e-05, "loss": 0.1683, "step": 5826 }, { "epoch": 0.6038967768680692, "grad_norm": 0.5734612941741943, "learning_rate": 1.432232162439957e-05, "loss": 0.1818, "step": 5827 }, { "epoch": 0.6040004145507306, "grad_norm": 0.5522999167442322, "learning_rate": 1.4315884625650823e-05, "loss": 0.2115, "step": 5828 }, { "epoch": 0.604104052233392, "grad_norm": 0.6274425387382507, "learning_rate": 1.4309448267379353e-05, "loss": 0.2546, "step": 5829 }, { "epoch": 0.6042076899160534, "grad_norm": 0.5536311268806458, "learning_rate": 1.4303012550310404e-05, "loss": 0.1809, "step": 5830 }, { "epoch": 0.6043113275987149, "grad_norm": 0.6251480579376221, "learning_rate": 1.4296577475169158e-05, "loss": 0.2058, "step": 5831 }, { "epoch": 0.6044149652813763, "grad_norm": 0.653142511844635, "learning_rate": 1.4290143042680682e-05, "loss": 0.21, "step": 5832 }, { "epoch": 0.6045186029640377, "grad_norm": 0.5902639031410217, "learning_rate": 1.4283709253570022e-05, "loss": 0.1963, "step": 5833 }, { "epoch": 0.6046222406466991, "grad_norm": 0.534339427947998, "learning_rate": 1.427727610856211e-05, "loss": 0.1939, "step": 5834 }, { "epoch": 0.6047258783293605, "grad_norm": 0.6114777326583862, "learning_rate": 1.4270843608381828e-05, "loss": 0.207, "step": 5835 }, { "epoch": 0.604829516012022, "grad_norm": 0.6344853639602661, "learning_rate": 1.4264411753753991e-05, "loss": 0.196, "step": 5836 }, { "epoch": 0.6049331536946834, "grad_norm": 0.6889345049858093, "learning_rate": 1.4257980545403315e-05, "loss": 0.2376, "step": 5837 }, { "epoch": 0.6050367913773448, "grad_norm": 0.5750885009765625, "learning_rate": 1.4251549984054472e-05, "loss": 0.1835, "step": 5838 }, { "epoch": 0.6051404290600062, "grad_norm": 0.6389616131782532, "learning_rate": 1.424512007043203e-05, "loss": 0.2091, "step": 5839 }, { "epoch": 0.6052440667426676, "grad_norm": 0.5323380827903748, "learning_rate": 1.4238690805260515e-05, "loss": 0.1784, "step": 5840 }, { "epoch": 0.605347704425329, "grad_norm": 0.5227212905883789, "learning_rate": 1.4232262189264373e-05, "loss": 0.1749, "step": 5841 }, { "epoch": 0.6054513421079905, "grad_norm": 0.5386372208595276, "learning_rate": 1.4225834223167948e-05, "loss": 0.1762, "step": 5842 }, { "epoch": 0.6055549797906519, "grad_norm": 0.509102463722229, "learning_rate": 1.421940690769556e-05, "loss": 0.1793, "step": 5843 }, { "epoch": 0.6056586174733133, "grad_norm": 0.5998203158378601, "learning_rate": 1.4212980243571406e-05, "loss": 0.2048, "step": 5844 }, { "epoch": 0.6057622551559747, "grad_norm": 0.6335312128067017, "learning_rate": 1.4206554231519642e-05, "loss": 0.2481, "step": 5845 }, { "epoch": 0.6058658928386361, "grad_norm": 0.5277844071388245, "learning_rate": 1.4200128872264347e-05, "loss": 0.1876, "step": 5846 }, { "epoch": 0.6059695305212975, "grad_norm": 0.5580236911773682, "learning_rate": 1.4193704166529512e-05, "loss": 0.2033, "step": 5847 }, { "epoch": 0.606073168203959, "grad_norm": 0.621285617351532, "learning_rate": 1.4187280115039062e-05, "loss": 0.1903, "step": 5848 }, { "epoch": 0.6061768058866204, "grad_norm": 0.5097457766532898, "learning_rate": 1.4180856718516858e-05, "loss": 0.1686, "step": 5849 }, { "epoch": 0.6062804435692818, "grad_norm": 0.6255810856819153, "learning_rate": 1.417443397768667e-05, "loss": 0.2329, "step": 5850 }, { "epoch": 0.6063840812519432, "grad_norm": 0.5889554619789124, "learning_rate": 1.416801189327221e-05, "loss": 0.2081, "step": 5851 }, { "epoch": 0.6064877189346046, "grad_norm": 0.6353864669799805, "learning_rate": 1.4161590465997095e-05, "loss": 0.2118, "step": 5852 }, { "epoch": 0.606591356617266, "grad_norm": 0.5470232367515564, "learning_rate": 1.4155169696584895e-05, "loss": 0.1968, "step": 5853 }, { "epoch": 0.6066949942999275, "grad_norm": 0.6519717574119568, "learning_rate": 1.4148749585759092e-05, "loss": 0.2176, "step": 5854 }, { "epoch": 0.6067986319825889, "grad_norm": 0.6173345446586609, "learning_rate": 1.4142330134243081e-05, "loss": 0.2298, "step": 5855 }, { "epoch": 0.6069022696652503, "grad_norm": 0.5106127262115479, "learning_rate": 1.4135911342760213e-05, "loss": 0.1628, "step": 5856 }, { "epoch": 0.6070059073479117, "grad_norm": 0.5532249212265015, "learning_rate": 1.4129493212033736e-05, "loss": 0.1945, "step": 5857 }, { "epoch": 0.6071095450305731, "grad_norm": 0.5892948508262634, "learning_rate": 1.412307574278683e-05, "loss": 0.1949, "step": 5858 }, { "epoch": 0.6072131827132345, "grad_norm": 0.4962332844734192, "learning_rate": 1.4116658935742625e-05, "loss": 0.1676, "step": 5859 }, { "epoch": 0.607316820395896, "grad_norm": 0.5226655602455139, "learning_rate": 1.411024279162414e-05, "loss": 0.1594, "step": 5860 }, { "epoch": 0.6074204580785574, "grad_norm": 0.6553589701652527, "learning_rate": 1.4103827311154347e-05, "loss": 0.2408, "step": 5861 }, { "epoch": 0.6075240957612188, "grad_norm": 0.480268269777298, "learning_rate": 1.4097412495056122e-05, "loss": 0.1564, "step": 5862 }, { "epoch": 0.6076277334438802, "grad_norm": 0.6084377765655518, "learning_rate": 1.4090998344052277e-05, "loss": 0.2365, "step": 5863 }, { "epoch": 0.6077313711265416, "grad_norm": 0.5667449831962585, "learning_rate": 1.408458485886556e-05, "loss": 0.21, "step": 5864 }, { "epoch": 0.607835008809203, "grad_norm": 0.5660369396209717, "learning_rate": 1.4078172040218622e-05, "loss": 0.1853, "step": 5865 }, { "epoch": 0.6079386464918645, "grad_norm": 0.5079784393310547, "learning_rate": 1.407175988883406e-05, "loss": 0.1748, "step": 5866 }, { "epoch": 0.6080422841745259, "grad_norm": 0.6271408200263977, "learning_rate": 1.4065348405434365e-05, "loss": 0.2038, "step": 5867 }, { "epoch": 0.6081459218571873, "grad_norm": 0.5546168684959412, "learning_rate": 1.4058937590741995e-05, "loss": 0.1919, "step": 5868 }, { "epoch": 0.6082495595398487, "grad_norm": 0.628492534160614, "learning_rate": 1.40525274454793e-05, "loss": 0.2464, "step": 5869 }, { "epoch": 0.6083531972225101, "grad_norm": 0.5692856907844543, "learning_rate": 1.4046117970368562e-05, "loss": 0.2058, "step": 5870 }, { "epoch": 0.6084568349051716, "grad_norm": 0.5562016367912292, "learning_rate": 1.4039709166132006e-05, "loss": 0.173, "step": 5871 }, { "epoch": 0.608560472587833, "grad_norm": 0.5617178678512573, "learning_rate": 1.4033301033491748e-05, "loss": 0.1998, "step": 5872 }, { "epoch": 0.6086641102704944, "grad_norm": 0.6305001974105835, "learning_rate": 1.402689357316985e-05, "loss": 0.208, "step": 5873 }, { "epoch": 0.6087677479531558, "grad_norm": 0.6569817662239075, "learning_rate": 1.402048678588831e-05, "loss": 0.2387, "step": 5874 }, { "epoch": 0.6088713856358172, "grad_norm": 0.6421927809715271, "learning_rate": 1.401408067236902e-05, "loss": 0.2147, "step": 5875 }, { "epoch": 0.6089750233184786, "grad_norm": 0.5628452301025391, "learning_rate": 1.4007675233333812e-05, "loss": 0.1741, "step": 5876 }, { "epoch": 0.60907866100114, "grad_norm": 0.5609790086746216, "learning_rate": 1.4001270469504442e-05, "loss": 0.2206, "step": 5877 }, { "epoch": 0.6091822986838015, "grad_norm": 0.5732092261314392, "learning_rate": 1.3994866381602592e-05, "loss": 0.197, "step": 5878 }, { "epoch": 0.6092859363664629, "grad_norm": 0.5437310338020325, "learning_rate": 1.3988462970349868e-05, "loss": 0.1677, "step": 5879 }, { "epoch": 0.6093895740491243, "grad_norm": 0.528835654258728, "learning_rate": 1.398206023646778e-05, "loss": 0.1728, "step": 5880 }, { "epoch": 0.6094932117317857, "grad_norm": 0.6198895573616028, "learning_rate": 1.39756581806778e-05, "loss": 0.1956, "step": 5881 }, { "epoch": 0.6095968494144471, "grad_norm": 0.6650860905647278, "learning_rate": 1.3969256803701288e-05, "loss": 0.2185, "step": 5882 }, { "epoch": 0.6097004870971086, "grad_norm": 0.6388286352157593, "learning_rate": 1.396285610625954e-05, "loss": 0.2208, "step": 5883 }, { "epoch": 0.60980412477977, "grad_norm": 0.5759437680244446, "learning_rate": 1.3956456089073789e-05, "loss": 0.1941, "step": 5884 }, { "epoch": 0.6099077624624314, "grad_norm": 0.6277048587799072, "learning_rate": 1.3950056752865166e-05, "loss": 0.2193, "step": 5885 }, { "epoch": 0.6100114001450928, "grad_norm": 0.6908484697341919, "learning_rate": 1.3943658098354749e-05, "loss": 0.2166, "step": 5886 }, { "epoch": 0.6101150378277542, "grad_norm": 0.6081604957580566, "learning_rate": 1.3937260126263512e-05, "loss": 0.2151, "step": 5887 }, { "epoch": 0.6102186755104156, "grad_norm": 0.6466516256332397, "learning_rate": 1.3930862837312384e-05, "loss": 0.2133, "step": 5888 }, { "epoch": 0.610322313193077, "grad_norm": 0.6081497669219971, "learning_rate": 1.3924466232222203e-05, "loss": 0.2237, "step": 5889 }, { "epoch": 0.6104259508757384, "grad_norm": 0.5361385345458984, "learning_rate": 1.3918070311713714e-05, "loss": 0.1824, "step": 5890 }, { "epoch": 0.6105295885583998, "grad_norm": 0.6183144450187683, "learning_rate": 1.3911675076507613e-05, "loss": 0.203, "step": 5891 }, { "epoch": 0.6106332262410612, "grad_norm": 0.5001223087310791, "learning_rate": 1.3905280527324499e-05, "loss": 0.1663, "step": 5892 }, { "epoch": 0.6107368639237226, "grad_norm": 0.6348176598548889, "learning_rate": 1.3898886664884894e-05, "loss": 0.2415, "step": 5893 }, { "epoch": 0.610840501606384, "grad_norm": 0.5159345865249634, "learning_rate": 1.389249348990927e-05, "loss": 0.1831, "step": 5894 }, { "epoch": 0.6109441392890455, "grad_norm": 0.6143372654914856, "learning_rate": 1.388610100311797e-05, "loss": 0.1734, "step": 5895 }, { "epoch": 0.6110477769717069, "grad_norm": 0.6014468669891357, "learning_rate": 1.3879709205231318e-05, "loss": 0.2024, "step": 5896 }, { "epoch": 0.6111514146543683, "grad_norm": 0.6340698003768921, "learning_rate": 1.387331809696951e-05, "loss": 0.2011, "step": 5897 }, { "epoch": 0.6112550523370297, "grad_norm": 0.5582529902458191, "learning_rate": 1.3866927679052693e-05, "loss": 0.1802, "step": 5898 }, { "epoch": 0.6113586900196911, "grad_norm": 0.601702868938446, "learning_rate": 1.386053795220094e-05, "loss": 0.1888, "step": 5899 }, { "epoch": 0.6114623277023525, "grad_norm": 0.5560715198516846, "learning_rate": 1.3854148917134219e-05, "loss": 0.1572, "step": 5900 }, { "epoch": 0.611565965385014, "grad_norm": 0.5951459407806396, "learning_rate": 1.3847760574572449e-05, "loss": 0.1912, "step": 5901 }, { "epoch": 0.6116696030676754, "grad_norm": 0.5832486152648926, "learning_rate": 1.3841372925235446e-05, "loss": 0.1851, "step": 5902 }, { "epoch": 0.6117732407503368, "grad_norm": 0.5868365168571472, "learning_rate": 1.3834985969842971e-05, "loss": 0.2079, "step": 5903 }, { "epoch": 0.6118768784329982, "grad_norm": 0.6246257424354553, "learning_rate": 1.3828599709114698e-05, "loss": 0.1944, "step": 5904 }, { "epoch": 0.6119805161156596, "grad_norm": 0.6336971521377563, "learning_rate": 1.382221414377021e-05, "loss": 0.2636, "step": 5905 }, { "epoch": 0.612084153798321, "grad_norm": 0.5235512256622314, "learning_rate": 1.3815829274529036e-05, "loss": 0.1764, "step": 5906 }, { "epoch": 0.6121877914809825, "grad_norm": 0.5604624152183533, "learning_rate": 1.3809445102110601e-05, "loss": 0.1885, "step": 5907 }, { "epoch": 0.6122914291636439, "grad_norm": 0.6259495615959167, "learning_rate": 1.3803061627234264e-05, "loss": 0.1987, "step": 5908 }, { "epoch": 0.6123950668463053, "grad_norm": 0.567039430141449, "learning_rate": 1.3796678850619318e-05, "loss": 0.1948, "step": 5909 }, { "epoch": 0.6124987045289667, "grad_norm": 0.510911762714386, "learning_rate": 1.3790296772984952e-05, "loss": 0.1761, "step": 5910 }, { "epoch": 0.6126023422116281, "grad_norm": 0.5724498629570007, "learning_rate": 1.3783915395050298e-05, "loss": 0.186, "step": 5911 }, { "epoch": 0.6127059798942895, "grad_norm": 0.7346729040145874, "learning_rate": 1.3777534717534384e-05, "loss": 0.2581, "step": 5912 }, { "epoch": 0.612809617576951, "grad_norm": 0.7468560338020325, "learning_rate": 1.3771154741156189e-05, "loss": 0.2128, "step": 5913 }, { "epoch": 0.6129132552596124, "grad_norm": 0.6244396567344666, "learning_rate": 1.3764775466634602e-05, "loss": 0.2212, "step": 5914 }, { "epoch": 0.6130168929422738, "grad_norm": 0.6232682466506958, "learning_rate": 1.3758396894688411e-05, "loss": 0.229, "step": 5915 }, { "epoch": 0.6131205306249352, "grad_norm": 0.7059705257415771, "learning_rate": 1.3752019026036365e-05, "loss": 0.2301, "step": 5916 }, { "epoch": 0.6132241683075966, "grad_norm": 0.5770846605300903, "learning_rate": 1.37456418613971e-05, "loss": 0.2041, "step": 5917 }, { "epoch": 0.613327805990258, "grad_norm": 0.5559613704681396, "learning_rate": 1.3739265401489177e-05, "loss": 0.1936, "step": 5918 }, { "epoch": 0.6134314436729195, "grad_norm": 0.6115998029708862, "learning_rate": 1.373288964703111e-05, "loss": 0.2183, "step": 5919 }, { "epoch": 0.6135350813555809, "grad_norm": 0.5869366526603699, "learning_rate": 1.372651459874129e-05, "loss": 0.1974, "step": 5920 }, { "epoch": 0.6136387190382423, "grad_norm": 0.6263245940208435, "learning_rate": 1.3720140257338054e-05, "loss": 0.211, "step": 5921 }, { "epoch": 0.6137423567209037, "grad_norm": 0.6086539626121521, "learning_rate": 1.3713766623539648e-05, "loss": 0.2089, "step": 5922 }, { "epoch": 0.6138459944035651, "grad_norm": 0.5775662660598755, "learning_rate": 1.3707393698064246e-05, "loss": 0.179, "step": 5923 }, { "epoch": 0.6139496320862265, "grad_norm": 0.4994238615036011, "learning_rate": 1.370102148162995e-05, "loss": 0.1706, "step": 5924 }, { "epoch": 0.614053269768888, "grad_norm": 0.5579550266265869, "learning_rate": 1.369464997495475e-05, "loss": 0.1878, "step": 5925 }, { "epoch": 0.6141569074515494, "grad_norm": 0.6892876029014587, "learning_rate": 1.36882791787566e-05, "loss": 0.2255, "step": 5926 }, { "epoch": 0.6142605451342108, "grad_norm": 0.5811253786087036, "learning_rate": 1.368190909375333e-05, "loss": 0.1869, "step": 5927 }, { "epoch": 0.6143641828168722, "grad_norm": 0.5586794018745422, "learning_rate": 1.3675539720662724e-05, "loss": 0.1835, "step": 5928 }, { "epoch": 0.6144678204995336, "grad_norm": 0.5717200636863708, "learning_rate": 1.3669171060202477e-05, "loss": 0.2098, "step": 5929 }, { "epoch": 0.614571458182195, "grad_norm": 0.5924586057662964, "learning_rate": 1.3662803113090183e-05, "loss": 0.2235, "step": 5930 }, { "epoch": 0.6146750958648565, "grad_norm": 0.6300281882286072, "learning_rate": 1.3656435880043393e-05, "loss": 0.191, "step": 5931 }, { "epoch": 0.6147787335475179, "grad_norm": 0.4872299134731293, "learning_rate": 1.365006936177954e-05, "loss": 0.1658, "step": 5932 }, { "epoch": 0.6148823712301793, "grad_norm": 0.6179653406143188, "learning_rate": 1.3643703559016e-05, "loss": 0.2167, "step": 5933 }, { "epoch": 0.6149860089128407, "grad_norm": 0.6873209476470947, "learning_rate": 1.3637338472470068e-05, "loss": 0.2234, "step": 5934 }, { "epoch": 0.6150896465955021, "grad_norm": 0.562300980091095, "learning_rate": 1.363097410285894e-05, "loss": 0.1928, "step": 5935 }, { "epoch": 0.6151932842781636, "grad_norm": 0.5228613615036011, "learning_rate": 1.3624610450899755e-05, "loss": 0.1785, "step": 5936 }, { "epoch": 0.615296921960825, "grad_norm": 0.6604247689247131, "learning_rate": 1.3618247517309548e-05, "loss": 0.2466, "step": 5937 }, { "epoch": 0.6154005596434864, "grad_norm": 0.6046388149261475, "learning_rate": 1.3611885302805291e-05, "loss": 0.2204, "step": 5938 }, { "epoch": 0.6155041973261478, "grad_norm": 0.5771061182022095, "learning_rate": 1.3605523808103874e-05, "loss": 0.1655, "step": 5939 }, { "epoch": 0.6156078350088092, "grad_norm": 0.5569134950637817, "learning_rate": 1.3599163033922083e-05, "loss": 0.1799, "step": 5940 }, { "epoch": 0.6157114726914706, "grad_norm": 0.5640320777893066, "learning_rate": 1.3592802980976661e-05, "loss": 0.1726, "step": 5941 }, { "epoch": 0.6158151103741321, "grad_norm": 0.640559732913971, "learning_rate": 1.3586443649984234e-05, "loss": 0.2414, "step": 5942 }, { "epoch": 0.6159187480567935, "grad_norm": 0.5472736954689026, "learning_rate": 1.3580085041661364e-05, "loss": 0.1822, "step": 5943 }, { "epoch": 0.6160223857394549, "grad_norm": 0.6231314539909363, "learning_rate": 1.3573727156724537e-05, "loss": 0.2318, "step": 5944 }, { "epoch": 0.6161260234221163, "grad_norm": 0.4999285936355591, "learning_rate": 1.3567369995890141e-05, "loss": 0.1651, "step": 5945 }, { "epoch": 0.6162296611047777, "grad_norm": 0.6263250112533569, "learning_rate": 1.35610135598745e-05, "loss": 0.2065, "step": 5946 }, { "epoch": 0.6163332987874391, "grad_norm": 0.6203282475471497, "learning_rate": 1.3554657849393831e-05, "loss": 0.1995, "step": 5947 }, { "epoch": 0.6164369364701006, "grad_norm": 0.5617152452468872, "learning_rate": 1.35483028651643e-05, "loss": 0.1854, "step": 5948 }, { "epoch": 0.616540574152762, "grad_norm": 0.7309468388557434, "learning_rate": 1.354194860790198e-05, "loss": 0.251, "step": 5949 }, { "epoch": 0.6166442118354234, "grad_norm": 0.5707843899726868, "learning_rate": 1.353559507832284e-05, "loss": 0.211, "step": 5950 }, { "epoch": 0.6167478495180848, "grad_norm": 0.6699648499488831, "learning_rate": 1.3529242277142806e-05, "loss": 0.2132, "step": 5951 }, { "epoch": 0.6168514872007462, "grad_norm": 0.631347119808197, "learning_rate": 1.3522890205077693e-05, "loss": 0.2241, "step": 5952 }, { "epoch": 0.6169551248834076, "grad_norm": 0.5974149703979492, "learning_rate": 1.3516538862843236e-05, "loss": 0.1831, "step": 5953 }, { "epoch": 0.6170587625660691, "grad_norm": 0.6694416403770447, "learning_rate": 1.351018825115511e-05, "loss": 0.2164, "step": 5954 }, { "epoch": 0.6171624002487305, "grad_norm": 0.5892156958580017, "learning_rate": 1.3503838370728879e-05, "loss": 0.2071, "step": 5955 }, { "epoch": 0.6172660379313919, "grad_norm": 0.6549639701843262, "learning_rate": 1.3497489222280045e-05, "loss": 0.2303, "step": 5956 }, { "epoch": 0.6173696756140533, "grad_norm": 0.6274462938308716, "learning_rate": 1.3491140806524013e-05, "loss": 0.2158, "step": 5957 }, { "epoch": 0.6174733132967147, "grad_norm": 0.6224632263183594, "learning_rate": 1.3484793124176112e-05, "loss": 0.2081, "step": 5958 }, { "epoch": 0.6175769509793761, "grad_norm": 0.5738040804862976, "learning_rate": 1.3478446175951603e-05, "loss": 0.1823, "step": 5959 }, { "epoch": 0.6176805886620376, "grad_norm": 0.5787317156791687, "learning_rate": 1.3472099962565639e-05, "loss": 0.2163, "step": 5960 }, { "epoch": 0.617784226344699, "grad_norm": 0.6176537275314331, "learning_rate": 1.3465754484733305e-05, "loss": 0.2005, "step": 5961 }, { "epoch": 0.6178878640273604, "grad_norm": 0.5757036805152893, "learning_rate": 1.345940974316959e-05, "loss": 0.1916, "step": 5962 }, { "epoch": 0.6179915017100218, "grad_norm": 0.4411177635192871, "learning_rate": 1.3453065738589422e-05, "loss": 0.1488, "step": 5963 }, { "epoch": 0.6180951393926832, "grad_norm": 0.6579338908195496, "learning_rate": 1.3446722471707632e-05, "loss": 0.2305, "step": 5964 }, { "epoch": 0.6181987770753445, "grad_norm": 0.6656500101089478, "learning_rate": 1.344037994323896e-05, "loss": 0.2265, "step": 5965 }, { "epoch": 0.618302414758006, "grad_norm": 0.6222073435783386, "learning_rate": 1.3434038153898086e-05, "loss": 0.2064, "step": 5966 }, { "epoch": 0.6184060524406674, "grad_norm": 0.6472442746162415, "learning_rate": 1.3427697104399583e-05, "loss": 0.2306, "step": 5967 }, { "epoch": 0.6185096901233288, "grad_norm": 0.617219865322113, "learning_rate": 1.3421356795457948e-05, "loss": 0.2312, "step": 5968 }, { "epoch": 0.6186133278059902, "grad_norm": 0.5383247137069702, "learning_rate": 1.3415017227787613e-05, "loss": 0.179, "step": 5969 }, { "epoch": 0.6187169654886516, "grad_norm": 0.5395634770393372, "learning_rate": 1.3408678402102892e-05, "loss": 0.1796, "step": 5970 }, { "epoch": 0.618820603171313, "grad_norm": 0.5641382932662964, "learning_rate": 1.340234031911805e-05, "loss": 0.1987, "step": 5971 }, { "epoch": 0.6189242408539745, "grad_norm": 0.49503156542778015, "learning_rate": 1.3396002979547236e-05, "loss": 0.1857, "step": 5972 }, { "epoch": 0.6190278785366359, "grad_norm": 0.5869657397270203, "learning_rate": 1.3389666384104544e-05, "loss": 0.2176, "step": 5973 }, { "epoch": 0.6191315162192973, "grad_norm": 0.582115113735199, "learning_rate": 1.3383330533503971e-05, "loss": 0.2017, "step": 5974 }, { "epoch": 0.6192351539019587, "grad_norm": 0.5133143663406372, "learning_rate": 1.3376995428459421e-05, "loss": 0.1893, "step": 5975 }, { "epoch": 0.6193387915846201, "grad_norm": 0.5100739598274231, "learning_rate": 1.3370661069684739e-05, "loss": 0.1676, "step": 5976 }, { "epoch": 0.6194424292672815, "grad_norm": 0.5148884057998657, "learning_rate": 1.3364327457893654e-05, "loss": 0.1661, "step": 5977 }, { "epoch": 0.619546066949943, "grad_norm": 0.6616286635398865, "learning_rate": 1.3357994593799834e-05, "loss": 0.2332, "step": 5978 }, { "epoch": 0.6196497046326044, "grad_norm": 0.6580483913421631, "learning_rate": 1.3351662478116867e-05, "loss": 0.2324, "step": 5979 }, { "epoch": 0.6197533423152658, "grad_norm": 0.5478088855743408, "learning_rate": 1.3345331111558233e-05, "loss": 0.1849, "step": 5980 }, { "epoch": 0.6198569799979272, "grad_norm": 0.5173262357711792, "learning_rate": 1.3339000494837348e-05, "loss": 0.1809, "step": 5981 }, { "epoch": 0.6199606176805886, "grad_norm": 0.6629555225372314, "learning_rate": 1.3332670628667523e-05, "loss": 0.2339, "step": 5982 }, { "epoch": 0.62006425536325, "grad_norm": 0.5579218864440918, "learning_rate": 1.3326341513762014e-05, "loss": 0.1951, "step": 5983 }, { "epoch": 0.6201678930459115, "grad_norm": 0.608124315738678, "learning_rate": 1.3320013150833971e-05, "loss": 0.1826, "step": 5984 }, { "epoch": 0.6202715307285729, "grad_norm": 0.6063987016677856, "learning_rate": 1.3313685540596452e-05, "loss": 0.181, "step": 5985 }, { "epoch": 0.6203751684112343, "grad_norm": 0.5893003344535828, "learning_rate": 1.3307358683762469e-05, "loss": 0.2025, "step": 5986 }, { "epoch": 0.6204788060938957, "grad_norm": 0.6283261775970459, "learning_rate": 1.3301032581044887e-05, "loss": 0.1977, "step": 5987 }, { "epoch": 0.6205824437765571, "grad_norm": 0.6476390361785889, "learning_rate": 1.3294707233156548e-05, "loss": 0.2452, "step": 5988 }, { "epoch": 0.6206860814592186, "grad_norm": 0.6753994226455688, "learning_rate": 1.3288382640810183e-05, "loss": 0.2337, "step": 5989 }, { "epoch": 0.62078971914188, "grad_norm": 0.5716283321380615, "learning_rate": 1.3282058804718415e-05, "loss": 0.1922, "step": 5990 }, { "epoch": 0.6208933568245414, "grad_norm": 0.5066370964050293, "learning_rate": 1.3275735725593827e-05, "loss": 0.179, "step": 5991 }, { "epoch": 0.6209969945072028, "grad_norm": 0.598387598991394, "learning_rate": 1.3269413404148878e-05, "loss": 0.2116, "step": 5992 }, { "epoch": 0.6211006321898642, "grad_norm": 0.5515682101249695, "learning_rate": 1.3263091841095963e-05, "loss": 0.186, "step": 5993 }, { "epoch": 0.6212042698725256, "grad_norm": 0.578323483467102, "learning_rate": 1.3256771037147393e-05, "loss": 0.185, "step": 5994 }, { "epoch": 0.6213079075551871, "grad_norm": 0.6118210554122925, "learning_rate": 1.3250450993015375e-05, "loss": 0.2163, "step": 5995 }, { "epoch": 0.6214115452378485, "grad_norm": 0.6113288998603821, "learning_rate": 1.324413170941205e-05, "loss": 0.2106, "step": 5996 }, { "epoch": 0.6215151829205099, "grad_norm": 0.4464210569858551, "learning_rate": 1.323781318704946e-05, "loss": 0.1524, "step": 5997 }, { "epoch": 0.6216188206031713, "grad_norm": 0.5403082966804504, "learning_rate": 1.3231495426639565e-05, "loss": 0.1801, "step": 5998 }, { "epoch": 0.6217224582858327, "grad_norm": 0.5802052617073059, "learning_rate": 1.3225178428894252e-05, "loss": 0.2067, "step": 5999 }, { "epoch": 0.6218260959684941, "grad_norm": 0.6188898086547852, "learning_rate": 1.3218862194525293e-05, "loss": 0.2286, "step": 6000 }, { "epoch": 0.6219297336511556, "grad_norm": 0.6729816794395447, "learning_rate": 1.3212546724244407e-05, "loss": 0.2219, "step": 6001 }, { "epoch": 0.622033371333817, "grad_norm": 0.5771058797836304, "learning_rate": 1.3206232018763201e-05, "loss": 0.2049, "step": 6002 }, { "epoch": 0.6221370090164784, "grad_norm": 0.5784742832183838, "learning_rate": 1.3199918078793205e-05, "loss": 0.1952, "step": 6003 }, { "epoch": 0.6222406466991398, "grad_norm": 0.5525607466697693, "learning_rate": 1.3193604905045881e-05, "loss": 0.1839, "step": 6004 }, { "epoch": 0.6223442843818012, "grad_norm": 0.587228000164032, "learning_rate": 1.3187292498232567e-05, "loss": 0.2023, "step": 6005 }, { "epoch": 0.6224479220644626, "grad_norm": 0.6367173790931702, "learning_rate": 1.3180980859064549e-05, "loss": 0.1923, "step": 6006 }, { "epoch": 0.6225515597471241, "grad_norm": 0.5229629874229431, "learning_rate": 1.3174669988253002e-05, "loss": 0.1719, "step": 6007 }, { "epoch": 0.6226551974297855, "grad_norm": 0.572173535823822, "learning_rate": 1.3168359886509032e-05, "loss": 0.1868, "step": 6008 }, { "epoch": 0.6227588351124469, "grad_norm": 0.5453316569328308, "learning_rate": 1.3162050554543655e-05, "loss": 0.2008, "step": 6009 }, { "epoch": 0.6228624727951083, "grad_norm": 0.5589512586593628, "learning_rate": 1.3155741993067782e-05, "loss": 0.1805, "step": 6010 }, { "epoch": 0.6229661104777697, "grad_norm": 0.582014799118042, "learning_rate": 1.3149434202792272e-05, "loss": 0.199, "step": 6011 }, { "epoch": 0.6230697481604311, "grad_norm": 0.5078872442245483, "learning_rate": 1.3143127184427863e-05, "loss": 0.1789, "step": 6012 }, { "epoch": 0.6231733858430926, "grad_norm": 0.5509869456291199, "learning_rate": 1.3136820938685218e-05, "loss": 0.1822, "step": 6013 }, { "epoch": 0.623277023525754, "grad_norm": 0.619751513004303, "learning_rate": 1.3130515466274929e-05, "loss": 0.1909, "step": 6014 }, { "epoch": 0.6233806612084154, "grad_norm": 0.5621147751808167, "learning_rate": 1.3124210767907472e-05, "loss": 0.1961, "step": 6015 }, { "epoch": 0.6234842988910768, "grad_norm": 0.686894953250885, "learning_rate": 1.3117906844293265e-05, "loss": 0.2095, "step": 6016 }, { "epoch": 0.6235879365737382, "grad_norm": 0.6219806671142578, "learning_rate": 1.3111603696142608e-05, "loss": 0.2168, "step": 6017 }, { "epoch": 0.6236915742563997, "grad_norm": 0.5394052267074585, "learning_rate": 1.310530132416574e-05, "loss": 0.1844, "step": 6018 }, { "epoch": 0.6237952119390611, "grad_norm": 0.5847477316856384, "learning_rate": 1.3098999729072808e-05, "loss": 0.2056, "step": 6019 }, { "epoch": 0.6238988496217225, "grad_norm": 0.572024405002594, "learning_rate": 1.3092698911573851e-05, "loss": 0.2052, "step": 6020 }, { "epoch": 0.6240024873043839, "grad_norm": 0.6376693844795227, "learning_rate": 1.308639887237885e-05, "loss": 0.2321, "step": 6021 }, { "epoch": 0.6241061249870453, "grad_norm": 0.6629868745803833, "learning_rate": 1.3080099612197668e-05, "loss": 0.2018, "step": 6022 }, { "epoch": 0.6242097626697067, "grad_norm": 0.577773928642273, "learning_rate": 1.3073801131740104e-05, "loss": 0.1968, "step": 6023 }, { "epoch": 0.6243134003523682, "grad_norm": 0.6478099226951599, "learning_rate": 1.306750343171587e-05, "loss": 0.2023, "step": 6024 }, { "epoch": 0.6244170380350296, "grad_norm": 0.6318678259849548, "learning_rate": 1.3061206512834566e-05, "loss": 0.2024, "step": 6025 }, { "epoch": 0.624520675717691, "grad_norm": 0.5699331760406494, "learning_rate": 1.305491037580573e-05, "loss": 0.1704, "step": 6026 }, { "epoch": 0.6246243134003524, "grad_norm": 0.4767056405544281, "learning_rate": 1.3048615021338793e-05, "loss": 0.1338, "step": 6027 }, { "epoch": 0.6247279510830138, "grad_norm": 0.5985897779464722, "learning_rate": 1.3042320450143107e-05, "loss": 0.186, "step": 6028 }, { "epoch": 0.6248315887656752, "grad_norm": 0.5831640958786011, "learning_rate": 1.3036026662927945e-05, "loss": 0.1772, "step": 6029 }, { "epoch": 0.6249352264483367, "grad_norm": 0.5923576951026917, "learning_rate": 1.3029733660402468e-05, "loss": 0.2073, "step": 6030 }, { "epoch": 0.6250388641309981, "grad_norm": 0.5854648351669312, "learning_rate": 1.3023441443275769e-05, "loss": 0.1938, "step": 6031 }, { "epoch": 0.6251425018136595, "grad_norm": 0.6780679821968079, "learning_rate": 1.3017150012256839e-05, "loss": 0.2207, "step": 6032 }, { "epoch": 0.6252461394963209, "grad_norm": 0.6159749031066895, "learning_rate": 1.301085936805459e-05, "loss": 0.2198, "step": 6033 }, { "epoch": 0.6253497771789823, "grad_norm": 0.5998734831809998, "learning_rate": 1.3004569511377847e-05, "loss": 0.1759, "step": 6034 }, { "epoch": 0.6254534148616437, "grad_norm": 0.5629125237464905, "learning_rate": 1.2998280442935332e-05, "loss": 0.1934, "step": 6035 }, { "epoch": 0.6255570525443052, "grad_norm": 0.6259785890579224, "learning_rate": 1.2991992163435698e-05, "loss": 0.2059, "step": 6036 }, { "epoch": 0.6256606902269666, "grad_norm": 0.6138043403625488, "learning_rate": 1.2985704673587489e-05, "loss": 0.1864, "step": 6037 }, { "epoch": 0.625764327909628, "grad_norm": 0.5702097415924072, "learning_rate": 1.2979417974099171e-05, "loss": 0.2021, "step": 6038 }, { "epoch": 0.6258679655922894, "grad_norm": 0.5695834755897522, "learning_rate": 1.297313206567913e-05, "loss": 0.2073, "step": 6039 }, { "epoch": 0.6259716032749508, "grad_norm": 0.5235719680786133, "learning_rate": 1.2966846949035638e-05, "loss": 0.1831, "step": 6040 }, { "epoch": 0.6260752409576121, "grad_norm": 0.7065122723579407, "learning_rate": 1.2960562624876905e-05, "loss": 0.2351, "step": 6041 }, { "epoch": 0.6261788786402736, "grad_norm": 0.5279736518859863, "learning_rate": 1.2954279093911022e-05, "loss": 0.1734, "step": 6042 }, { "epoch": 0.626282516322935, "grad_norm": 0.6119756698608398, "learning_rate": 1.2947996356846022e-05, "loss": 0.1967, "step": 6043 }, { "epoch": 0.6263861540055964, "grad_norm": 0.5212472081184387, "learning_rate": 1.2941714414389836e-05, "loss": 0.1729, "step": 6044 }, { "epoch": 0.6264897916882578, "grad_norm": 0.5788500308990479, "learning_rate": 1.2935433267250291e-05, "loss": 0.2174, "step": 6045 }, { "epoch": 0.6265934293709192, "grad_norm": 0.6433359384536743, "learning_rate": 1.2929152916135142e-05, "loss": 0.2228, "step": 6046 }, { "epoch": 0.6266970670535806, "grad_norm": 0.5505836606025696, "learning_rate": 1.292287336175206e-05, "loss": 0.1746, "step": 6047 }, { "epoch": 0.626800704736242, "grad_norm": 0.6313563585281372, "learning_rate": 1.2916594604808595e-05, "loss": 0.205, "step": 6048 }, { "epoch": 0.6269043424189035, "grad_norm": 0.6382592916488647, "learning_rate": 1.291031664601225e-05, "loss": 0.2262, "step": 6049 }, { "epoch": 0.6270079801015649, "grad_norm": 0.5864508748054504, "learning_rate": 1.2904039486070393e-05, "loss": 0.2127, "step": 6050 }, { "epoch": 0.6271116177842263, "grad_norm": 0.6004242897033691, "learning_rate": 1.2897763125690337e-05, "loss": 0.2149, "step": 6051 }, { "epoch": 0.6272152554668877, "grad_norm": 0.6075720191001892, "learning_rate": 1.2891487565579301e-05, "loss": 0.2105, "step": 6052 }, { "epoch": 0.6273188931495491, "grad_norm": 0.6497997641563416, "learning_rate": 1.2885212806444386e-05, "loss": 0.2003, "step": 6053 }, { "epoch": 0.6274225308322106, "grad_norm": 0.6276025176048279, "learning_rate": 1.287893884899264e-05, "loss": 0.2091, "step": 6054 }, { "epoch": 0.627526168514872, "grad_norm": 0.6251934766769409, "learning_rate": 1.2872665693930989e-05, "loss": 0.2007, "step": 6055 }, { "epoch": 0.6276298061975334, "grad_norm": 0.5782637596130371, "learning_rate": 1.2866393341966283e-05, "loss": 0.2139, "step": 6056 }, { "epoch": 0.6277334438801948, "grad_norm": 0.5191376209259033, "learning_rate": 1.2860121793805295e-05, "loss": 0.1795, "step": 6057 }, { "epoch": 0.6278370815628562, "grad_norm": 0.6319491267204285, "learning_rate": 1.2853851050154683e-05, "loss": 0.2185, "step": 6058 }, { "epoch": 0.6279407192455176, "grad_norm": 0.5615255236625671, "learning_rate": 1.2847581111721029e-05, "loss": 0.1724, "step": 6059 }, { "epoch": 0.6280443569281791, "grad_norm": 0.6250758171081543, "learning_rate": 1.2841311979210804e-05, "loss": 0.2187, "step": 6060 }, { "epoch": 0.6281479946108405, "grad_norm": 0.5791369676589966, "learning_rate": 1.2835043653330423e-05, "loss": 0.1838, "step": 6061 }, { "epoch": 0.6282516322935019, "grad_norm": 0.6014963984489441, "learning_rate": 1.2828776134786192e-05, "loss": 0.2082, "step": 6062 }, { "epoch": 0.6283552699761633, "grad_norm": 0.6349720358848572, "learning_rate": 1.2822509424284308e-05, "loss": 0.1994, "step": 6063 }, { "epoch": 0.6284589076588247, "grad_norm": 0.5179983377456665, "learning_rate": 1.281624352253091e-05, "loss": 0.1738, "step": 6064 }, { "epoch": 0.6285625453414861, "grad_norm": 0.5795208811759949, "learning_rate": 1.2809978430232024e-05, "loss": 0.1772, "step": 6065 }, { "epoch": 0.6286661830241476, "grad_norm": 0.5753578543663025, "learning_rate": 1.2803714148093586e-05, "loss": 0.186, "step": 6066 }, { "epoch": 0.628769820706809, "grad_norm": 0.5059354305267334, "learning_rate": 1.2797450676821459e-05, "loss": 0.1857, "step": 6067 }, { "epoch": 0.6288734583894704, "grad_norm": 0.5674526691436768, "learning_rate": 1.279118801712139e-05, "loss": 0.2047, "step": 6068 }, { "epoch": 0.6289770960721318, "grad_norm": 0.5886660814285278, "learning_rate": 1.2784926169699053e-05, "loss": 0.189, "step": 6069 }, { "epoch": 0.6290807337547932, "grad_norm": 0.5672776103019714, "learning_rate": 1.2778665135260009e-05, "loss": 0.2002, "step": 6070 }, { "epoch": 0.6291843714374546, "grad_norm": 0.6054661273956299, "learning_rate": 1.277240491450976e-05, "loss": 0.2121, "step": 6071 }, { "epoch": 0.6292880091201161, "grad_norm": 0.6700482964515686, "learning_rate": 1.2766145508153689e-05, "loss": 0.2335, "step": 6072 }, { "epoch": 0.6293916468027775, "grad_norm": 0.5708560943603516, "learning_rate": 1.275988691689709e-05, "loss": 0.1988, "step": 6073 }, { "epoch": 0.6294952844854389, "grad_norm": 0.5475946068763733, "learning_rate": 1.2753629141445187e-05, "loss": 0.1719, "step": 6074 }, { "epoch": 0.6295989221681003, "grad_norm": 0.6204192638397217, "learning_rate": 1.2747372182503085e-05, "loss": 0.1957, "step": 6075 }, { "epoch": 0.6297025598507617, "grad_norm": 0.4806893765926361, "learning_rate": 1.2741116040775805e-05, "loss": 0.1546, "step": 6076 }, { "epoch": 0.6298061975334232, "grad_norm": 0.562664270401001, "learning_rate": 1.2734860716968295e-05, "loss": 0.1696, "step": 6077 }, { "epoch": 0.6299098352160846, "grad_norm": 0.6378372311592102, "learning_rate": 1.2728606211785381e-05, "loss": 0.2234, "step": 6078 }, { "epoch": 0.630013472898746, "grad_norm": 0.6317660212516785, "learning_rate": 1.2722352525931818e-05, "loss": 0.1851, "step": 6079 }, { "epoch": 0.6301171105814074, "grad_norm": 0.6436264514923096, "learning_rate": 1.2716099660112253e-05, "loss": 0.2365, "step": 6080 }, { "epoch": 0.6302207482640688, "grad_norm": 0.529187798500061, "learning_rate": 1.2709847615031258e-05, "loss": 0.1745, "step": 6081 }, { "epoch": 0.6303243859467302, "grad_norm": 0.587397575378418, "learning_rate": 1.2703596391393305e-05, "loss": 0.1912, "step": 6082 }, { "epoch": 0.6304280236293917, "grad_norm": 0.597055971622467, "learning_rate": 1.2697345989902765e-05, "loss": 0.2151, "step": 6083 }, { "epoch": 0.6305316613120531, "grad_norm": 0.6372289657592773, "learning_rate": 1.2691096411263932e-05, "loss": 0.1959, "step": 6084 }, { "epoch": 0.6306352989947145, "grad_norm": 0.6131249666213989, "learning_rate": 1.2684847656180982e-05, "loss": 0.1865, "step": 6085 }, { "epoch": 0.6307389366773759, "grad_norm": 0.5528261065483093, "learning_rate": 1.2678599725358031e-05, "loss": 0.1794, "step": 6086 }, { "epoch": 0.6308425743600373, "grad_norm": 0.5730780959129333, "learning_rate": 1.2672352619499089e-05, "loss": 0.1803, "step": 6087 }, { "epoch": 0.6309462120426987, "grad_norm": 0.6822916865348816, "learning_rate": 1.2666106339308053e-05, "loss": 0.2319, "step": 6088 }, { "epoch": 0.6310498497253602, "grad_norm": 0.6828444600105286, "learning_rate": 1.2659860885488761e-05, "loss": 0.2448, "step": 6089 }, { "epoch": 0.6311534874080216, "grad_norm": 0.6281768083572388, "learning_rate": 1.2653616258744929e-05, "loss": 0.1941, "step": 6090 }, { "epoch": 0.631257125090683, "grad_norm": 0.5856252312660217, "learning_rate": 1.2647372459780196e-05, "loss": 0.1912, "step": 6091 }, { "epoch": 0.6313607627733444, "grad_norm": 0.5877519249916077, "learning_rate": 1.264112948929811e-05, "loss": 0.2097, "step": 6092 }, { "epoch": 0.6314644004560058, "grad_norm": 0.6103312969207764, "learning_rate": 1.263488734800211e-05, "loss": 0.2068, "step": 6093 }, { "epoch": 0.6315680381386672, "grad_norm": 0.5803897380828857, "learning_rate": 1.2628646036595558e-05, "loss": 0.2086, "step": 6094 }, { "epoch": 0.6316716758213287, "grad_norm": 0.5573654770851135, "learning_rate": 1.2622405555781705e-05, "loss": 0.1879, "step": 6095 }, { "epoch": 0.6317753135039901, "grad_norm": 0.4737562835216522, "learning_rate": 1.2616165906263729e-05, "loss": 0.1428, "step": 6096 }, { "epoch": 0.6318789511866515, "grad_norm": 0.6127933263778687, "learning_rate": 1.2609927088744702e-05, "loss": 0.2075, "step": 6097 }, { "epoch": 0.6319825888693129, "grad_norm": 0.5848706364631653, "learning_rate": 1.2603689103927596e-05, "loss": 0.225, "step": 6098 }, { "epoch": 0.6320862265519743, "grad_norm": 0.6001607775688171, "learning_rate": 1.2597451952515312e-05, "loss": 0.2076, "step": 6099 }, { "epoch": 0.6321898642346357, "grad_norm": 0.5365419983863831, "learning_rate": 1.2591215635210632e-05, "loss": 0.1666, "step": 6100 }, { "epoch": 0.6322935019172972, "grad_norm": 0.5268027186393738, "learning_rate": 1.2584980152716253e-05, "loss": 0.1595, "step": 6101 }, { "epoch": 0.6323971395999586, "grad_norm": 0.4245770573616028, "learning_rate": 1.2578745505734789e-05, "loss": 0.1364, "step": 6102 }, { "epoch": 0.63250077728262, "grad_norm": 0.6496620774269104, "learning_rate": 1.2572511694968741e-05, "loss": 0.2125, "step": 6103 }, { "epoch": 0.6326044149652814, "grad_norm": 0.5363298654556274, "learning_rate": 1.2566278721120536e-05, "loss": 0.1786, "step": 6104 }, { "epoch": 0.6327080526479428, "grad_norm": 0.6401138305664062, "learning_rate": 1.2560046584892478e-05, "loss": 0.2154, "step": 6105 }, { "epoch": 0.6328116903306042, "grad_norm": 0.5801215767860413, "learning_rate": 1.255381528698681e-05, "loss": 0.1994, "step": 6106 }, { "epoch": 0.6329153280132657, "grad_norm": 0.5988919734954834, "learning_rate": 1.2547584828105665e-05, "loss": 0.2116, "step": 6107 }, { "epoch": 0.6330189656959271, "grad_norm": 0.6742331981658936, "learning_rate": 1.2541355208951066e-05, "loss": 0.2405, "step": 6108 }, { "epoch": 0.6331226033785885, "grad_norm": 0.5302967429161072, "learning_rate": 1.2535126430224977e-05, "loss": 0.1704, "step": 6109 }, { "epoch": 0.6332262410612499, "grad_norm": 0.530825138092041, "learning_rate": 1.2528898492629233e-05, "loss": 0.1832, "step": 6110 }, { "epoch": 0.6333298787439113, "grad_norm": 0.6048321723937988, "learning_rate": 1.2522671396865592e-05, "loss": 0.199, "step": 6111 }, { "epoch": 0.6334335164265728, "grad_norm": 0.6834918260574341, "learning_rate": 1.2516445143635723e-05, "loss": 0.244, "step": 6112 }, { "epoch": 0.6335371541092342, "grad_norm": 0.49825942516326904, "learning_rate": 1.2510219733641171e-05, "loss": 0.168, "step": 6113 }, { "epoch": 0.6336407917918956, "grad_norm": 0.5733488202095032, "learning_rate": 1.2503995167583423e-05, "loss": 0.1786, "step": 6114 }, { "epoch": 0.633744429474557, "grad_norm": 0.5585787296295166, "learning_rate": 1.2497771446163842e-05, "loss": 0.1995, "step": 6115 }, { "epoch": 0.6338480671572184, "grad_norm": 0.5882747769355774, "learning_rate": 1.249154857008371e-05, "loss": 0.188, "step": 6116 }, { "epoch": 0.6339517048398797, "grad_norm": 0.5983887314796448, "learning_rate": 1.2485326540044223e-05, "loss": 0.1747, "step": 6117 }, { "epoch": 0.6340553425225411, "grad_norm": 0.6462018489837646, "learning_rate": 1.2479105356746453e-05, "loss": 0.2252, "step": 6118 }, { "epoch": 0.6341589802052026, "grad_norm": 0.6256545186042786, "learning_rate": 1.2472885020891403e-05, "loss": 0.2436, "step": 6119 }, { "epoch": 0.634262617887864, "grad_norm": 0.5571950078010559, "learning_rate": 1.246666553317996e-05, "loss": 0.2127, "step": 6120 }, { "epoch": 0.6343662555705254, "grad_norm": 0.6304576396942139, "learning_rate": 1.2460446894312938e-05, "loss": 0.196, "step": 6121 }, { "epoch": 0.6344698932531868, "grad_norm": 0.5518038272857666, "learning_rate": 1.2454229104991046e-05, "loss": 0.1952, "step": 6122 }, { "epoch": 0.6345735309358482, "grad_norm": 0.5594738125801086, "learning_rate": 1.2448012165914879e-05, "loss": 0.1723, "step": 6123 }, { "epoch": 0.6346771686185096, "grad_norm": 0.5120114684104919, "learning_rate": 1.244179607778497e-05, "loss": 0.1828, "step": 6124 }, { "epoch": 0.6347808063011711, "grad_norm": 0.615667462348938, "learning_rate": 1.2435580841301723e-05, "loss": 0.2202, "step": 6125 }, { "epoch": 0.6348844439838325, "grad_norm": 0.6665404438972473, "learning_rate": 1.2429366457165464e-05, "loss": 0.2206, "step": 6126 }, { "epoch": 0.6349880816664939, "grad_norm": 0.6501171588897705, "learning_rate": 1.2423152926076434e-05, "loss": 0.191, "step": 6127 }, { "epoch": 0.6350917193491553, "grad_norm": 0.5853009819984436, "learning_rate": 1.2416940248734748e-05, "loss": 0.2092, "step": 6128 }, { "epoch": 0.6351953570318167, "grad_norm": 0.6276318430900574, "learning_rate": 1.2410728425840452e-05, "loss": 0.2164, "step": 6129 }, { "epoch": 0.6352989947144781, "grad_norm": 0.6532617211341858, "learning_rate": 1.2404517458093471e-05, "loss": 0.2066, "step": 6130 }, { "epoch": 0.6354026323971396, "grad_norm": 0.6746910810470581, "learning_rate": 1.2398307346193659e-05, "loss": 0.2314, "step": 6131 }, { "epoch": 0.635506270079801, "grad_norm": 0.6130415797233582, "learning_rate": 1.239209809084076e-05, "loss": 0.2115, "step": 6132 }, { "epoch": 0.6356099077624624, "grad_norm": 0.6588493585586548, "learning_rate": 1.2385889692734416e-05, "loss": 0.2204, "step": 6133 }, { "epoch": 0.6357135454451238, "grad_norm": 0.5292779207229614, "learning_rate": 1.2379682152574195e-05, "loss": 0.2022, "step": 6134 }, { "epoch": 0.6358171831277852, "grad_norm": 0.4803861975669861, "learning_rate": 1.2373475471059538e-05, "loss": 0.1608, "step": 6135 }, { "epoch": 0.6359208208104467, "grad_norm": 0.5866460204124451, "learning_rate": 1.2367269648889804e-05, "loss": 0.199, "step": 6136 }, { "epoch": 0.6360244584931081, "grad_norm": 0.6466445326805115, "learning_rate": 1.2361064686764265e-05, "loss": 0.2038, "step": 6137 }, { "epoch": 0.6361280961757695, "grad_norm": 0.5443708300590515, "learning_rate": 1.2354860585382084e-05, "loss": 0.1699, "step": 6138 }, { "epoch": 0.6362317338584309, "grad_norm": 0.638749361038208, "learning_rate": 1.234865734544233e-05, "loss": 0.1992, "step": 6139 }, { "epoch": 0.6363353715410923, "grad_norm": 0.5929224491119385, "learning_rate": 1.2342454967643964e-05, "loss": 0.1935, "step": 6140 }, { "epoch": 0.6364390092237537, "grad_norm": 0.6019617915153503, "learning_rate": 1.2336253452685871e-05, "loss": 0.2156, "step": 6141 }, { "epoch": 0.6365426469064152, "grad_norm": 0.6148455142974854, "learning_rate": 1.2330052801266832e-05, "loss": 0.1975, "step": 6142 }, { "epoch": 0.6366462845890766, "grad_norm": 0.598908543586731, "learning_rate": 1.232385301408551e-05, "loss": 0.2119, "step": 6143 }, { "epoch": 0.636749922271738, "grad_norm": 0.6222452521324158, "learning_rate": 1.231765409184051e-05, "loss": 0.1918, "step": 6144 }, { "epoch": 0.6368535599543994, "grad_norm": 0.6636112928390503, "learning_rate": 1.2311456035230291e-05, "loss": 0.194, "step": 6145 }, { "epoch": 0.6369571976370608, "grad_norm": 0.5806705951690674, "learning_rate": 1.230525884495326e-05, "loss": 0.2072, "step": 6146 }, { "epoch": 0.6370608353197222, "grad_norm": 0.6016364097595215, "learning_rate": 1.2299062521707702e-05, "loss": 0.2144, "step": 6147 }, { "epoch": 0.6371644730023837, "grad_norm": 0.7070096135139465, "learning_rate": 1.2292867066191803e-05, "loss": 0.2558, "step": 6148 }, { "epoch": 0.6372681106850451, "grad_norm": 0.6243141889572144, "learning_rate": 1.2286672479103672e-05, "loss": 0.1832, "step": 6149 }, { "epoch": 0.6373717483677065, "grad_norm": 0.5644826292991638, "learning_rate": 1.2280478761141286e-05, "loss": 0.2061, "step": 6150 }, { "epoch": 0.6374753860503679, "grad_norm": 0.5745707154273987, "learning_rate": 1.2274285913002553e-05, "loss": 0.2043, "step": 6151 }, { "epoch": 0.6375790237330293, "grad_norm": 0.5755109190940857, "learning_rate": 1.226809393538528e-05, "loss": 0.1898, "step": 6152 }, { "epoch": 0.6376826614156907, "grad_norm": 0.5627861618995667, "learning_rate": 1.2261902828987155e-05, "loss": 0.1917, "step": 6153 }, { "epoch": 0.6377862990983522, "grad_norm": 0.5213609933853149, "learning_rate": 1.22557125945058e-05, "loss": 0.1626, "step": 6154 }, { "epoch": 0.6378899367810136, "grad_norm": 0.5159831047058105, "learning_rate": 1.22495232326387e-05, "loss": 0.173, "step": 6155 }, { "epoch": 0.637993574463675, "grad_norm": 0.5197933316230774, "learning_rate": 1.224333474408328e-05, "loss": 0.1644, "step": 6156 }, { "epoch": 0.6380972121463364, "grad_norm": 0.6517233848571777, "learning_rate": 1.2237147129536844e-05, "loss": 0.2025, "step": 6157 }, { "epoch": 0.6382008498289978, "grad_norm": 0.5894943475723267, "learning_rate": 1.2230960389696595e-05, "loss": 0.1898, "step": 6158 }, { "epoch": 0.6383044875116592, "grad_norm": 0.6199367046356201, "learning_rate": 1.2224774525259661e-05, "loss": 0.2153, "step": 6159 }, { "epoch": 0.6384081251943207, "grad_norm": 0.552657425403595, "learning_rate": 1.2218589536923045e-05, "loss": 0.1642, "step": 6160 }, { "epoch": 0.6385117628769821, "grad_norm": 0.5651284456253052, "learning_rate": 1.2212405425383655e-05, "loss": 0.2049, "step": 6161 }, { "epoch": 0.6386154005596435, "grad_norm": 0.6005743741989136, "learning_rate": 1.2206222191338326e-05, "loss": 0.1984, "step": 6162 }, { "epoch": 0.6387190382423049, "grad_norm": 0.5854420065879822, "learning_rate": 1.220003983548376e-05, "loss": 0.2186, "step": 6163 }, { "epoch": 0.6388226759249663, "grad_norm": 0.6057052612304688, "learning_rate": 1.2193858358516585e-05, "loss": 0.1816, "step": 6164 }, { "epoch": 0.6389263136076277, "grad_norm": 0.5955823659896851, "learning_rate": 1.2187677761133307e-05, "loss": 0.2141, "step": 6165 }, { "epoch": 0.6390299512902892, "grad_norm": 0.5140160322189331, "learning_rate": 1.2181498044030358e-05, "loss": 0.1802, "step": 6166 }, { "epoch": 0.6391335889729506, "grad_norm": 0.4712127447128296, "learning_rate": 1.217531920790406e-05, "loss": 0.1374, "step": 6167 }, { "epoch": 0.639237226655612, "grad_norm": 0.5993489027023315, "learning_rate": 1.216914125345062e-05, "loss": 0.202, "step": 6168 }, { "epoch": 0.6393408643382734, "grad_norm": 0.6108299493789673, "learning_rate": 1.2162964181366181e-05, "loss": 0.1944, "step": 6169 }, { "epoch": 0.6394445020209348, "grad_norm": 0.49651244282722473, "learning_rate": 1.215678799234675e-05, "loss": 0.1705, "step": 6170 }, { "epoch": 0.6395481397035963, "grad_norm": 0.5781722068786621, "learning_rate": 1.215061268708825e-05, "loss": 0.1894, "step": 6171 }, { "epoch": 0.6396517773862577, "grad_norm": 0.5969220995903015, "learning_rate": 1.214443826628652e-05, "loss": 0.2042, "step": 6172 }, { "epoch": 0.6397554150689191, "grad_norm": 0.5631805658340454, "learning_rate": 1.213826473063727e-05, "loss": 0.2022, "step": 6173 }, { "epoch": 0.6398590527515805, "grad_norm": 0.5786588191986084, "learning_rate": 1.213209208083613e-05, "loss": 0.2157, "step": 6174 }, { "epoch": 0.6399626904342419, "grad_norm": 0.5629664659500122, "learning_rate": 1.2125920317578625e-05, "loss": 0.2047, "step": 6175 }, { "epoch": 0.6400663281169033, "grad_norm": 0.6054370403289795, "learning_rate": 1.2119749441560172e-05, "loss": 0.203, "step": 6176 }, { "epoch": 0.6401699657995648, "grad_norm": 0.6195040941238403, "learning_rate": 1.211357945347611e-05, "loss": 0.191, "step": 6177 }, { "epoch": 0.6402736034822262, "grad_norm": 0.6448860168457031, "learning_rate": 1.210741035402165e-05, "loss": 0.2079, "step": 6178 }, { "epoch": 0.6403772411648876, "grad_norm": 0.535200297832489, "learning_rate": 1.2101242143891928e-05, "loss": 0.1747, "step": 6179 }, { "epoch": 0.640480878847549, "grad_norm": 0.5435940027236938, "learning_rate": 1.2095074823781951e-05, "loss": 0.1863, "step": 6180 }, { "epoch": 0.6405845165302104, "grad_norm": 0.5571568608283997, "learning_rate": 1.208890839438666e-05, "loss": 0.1791, "step": 6181 }, { "epoch": 0.6406881542128718, "grad_norm": 0.568340003490448, "learning_rate": 1.2082742856400878e-05, "loss": 0.206, "step": 6182 }, { "epoch": 0.6407917918955333, "grad_norm": 0.598192572593689, "learning_rate": 1.2076578210519316e-05, "loss": 0.1983, "step": 6183 }, { "epoch": 0.6408954295781947, "grad_norm": 0.6520498394966125, "learning_rate": 1.207041445743661e-05, "loss": 0.2388, "step": 6184 }, { "epoch": 0.6409990672608561, "grad_norm": 0.6946018934249878, "learning_rate": 1.2064251597847272e-05, "loss": 0.2306, "step": 6185 }, { "epoch": 0.6411027049435175, "grad_norm": 0.6726908683776855, "learning_rate": 1.2058089632445724e-05, "loss": 0.2149, "step": 6186 }, { "epoch": 0.6412063426261789, "grad_norm": 0.5651692748069763, "learning_rate": 1.2051928561926301e-05, "loss": 0.1764, "step": 6187 }, { "epoch": 0.6413099803088403, "grad_norm": 0.5629836916923523, "learning_rate": 1.2045768386983206e-05, "loss": 0.1805, "step": 6188 }, { "epoch": 0.6414136179915018, "grad_norm": 0.5897392630577087, "learning_rate": 1.2039609108310567e-05, "loss": 0.1846, "step": 6189 }, { "epoch": 0.6415172556741632, "grad_norm": 0.5797823071479797, "learning_rate": 1.2033450726602393e-05, "loss": 0.1657, "step": 6190 }, { "epoch": 0.6416208933568246, "grad_norm": 0.5978760123252869, "learning_rate": 1.2027293242552611e-05, "loss": 0.1973, "step": 6191 }, { "epoch": 0.641724531039486, "grad_norm": 0.5614368915557861, "learning_rate": 1.2021136656855034e-05, "loss": 0.1924, "step": 6192 }, { "epoch": 0.6418281687221473, "grad_norm": 0.548904299736023, "learning_rate": 1.2014980970203372e-05, "loss": 0.1604, "step": 6193 }, { "epoch": 0.6419318064048087, "grad_norm": 0.5700724720954895, "learning_rate": 1.2008826183291244e-05, "loss": 0.1799, "step": 6194 }, { "epoch": 0.6420354440874702, "grad_norm": 0.5496739745140076, "learning_rate": 1.2002672296812157e-05, "loss": 0.2177, "step": 6195 }, { "epoch": 0.6421390817701316, "grad_norm": 0.5980311632156372, "learning_rate": 1.199651931145952e-05, "loss": 0.1943, "step": 6196 }, { "epoch": 0.642242719452793, "grad_norm": 0.587797999382019, "learning_rate": 1.1990367227926653e-05, "loss": 0.1994, "step": 6197 }, { "epoch": 0.6423463571354544, "grad_norm": 0.6709038615226746, "learning_rate": 1.1984216046906752e-05, "loss": 0.2178, "step": 6198 }, { "epoch": 0.6424499948181158, "grad_norm": 0.741703987121582, "learning_rate": 1.197806576909293e-05, "loss": 0.2407, "step": 6199 }, { "epoch": 0.6425536325007772, "grad_norm": 0.6239121556282043, "learning_rate": 1.1971916395178178e-05, "loss": 0.1947, "step": 6200 }, { "epoch": 0.6426572701834387, "grad_norm": 0.4833756685256958, "learning_rate": 1.1965767925855412e-05, "loss": 0.1475, "step": 6201 }, { "epoch": 0.6427609078661001, "grad_norm": 0.5679334402084351, "learning_rate": 1.195962036181743e-05, "loss": 0.1836, "step": 6202 }, { "epoch": 0.6428645455487615, "grad_norm": 0.7786445021629333, "learning_rate": 1.1953473703756919e-05, "loss": 0.2027, "step": 6203 }, { "epoch": 0.6429681832314229, "grad_norm": 0.6195889115333557, "learning_rate": 1.1947327952366492e-05, "loss": 0.1896, "step": 6204 }, { "epoch": 0.6430718209140843, "grad_norm": 0.6255584359169006, "learning_rate": 1.1941183108338623e-05, "loss": 0.2115, "step": 6205 }, { "epoch": 0.6431754585967457, "grad_norm": 0.5589773654937744, "learning_rate": 1.1935039172365714e-05, "loss": 0.1666, "step": 6206 }, { "epoch": 0.6432790962794072, "grad_norm": 0.6697091460227966, "learning_rate": 1.1928896145140066e-05, "loss": 0.2285, "step": 6207 }, { "epoch": 0.6433827339620686, "grad_norm": 0.5722152590751648, "learning_rate": 1.1922754027353843e-05, "loss": 0.1814, "step": 6208 }, { "epoch": 0.64348637164473, "grad_norm": 0.6211981177330017, "learning_rate": 1.1916612819699145e-05, "loss": 0.1856, "step": 6209 }, { "epoch": 0.6435900093273914, "grad_norm": 0.6125348210334778, "learning_rate": 1.1910472522867947e-05, "loss": 0.1893, "step": 6210 }, { "epoch": 0.6436936470100528, "grad_norm": 0.5103516578674316, "learning_rate": 1.1904333137552124e-05, "loss": 0.1666, "step": 6211 }, { "epoch": 0.6437972846927142, "grad_norm": 0.5360734462738037, "learning_rate": 1.1898194664443468e-05, "loss": 0.1575, "step": 6212 }, { "epoch": 0.6439009223753757, "grad_norm": 0.6586069464683533, "learning_rate": 1.1892057104233637e-05, "loss": 0.1877, "step": 6213 }, { "epoch": 0.6440045600580371, "grad_norm": 0.6260287165641785, "learning_rate": 1.188592045761421e-05, "loss": 0.166, "step": 6214 }, { "epoch": 0.6441081977406985, "grad_norm": 0.5675740838050842, "learning_rate": 1.1879784725276646e-05, "loss": 0.1914, "step": 6215 }, { "epoch": 0.6442118354233599, "grad_norm": 0.6457982659339905, "learning_rate": 1.1873649907912319e-05, "loss": 0.2159, "step": 6216 }, { "epoch": 0.6443154731060213, "grad_norm": 0.6307981610298157, "learning_rate": 1.1867516006212494e-05, "loss": 0.2285, "step": 6217 }, { "epoch": 0.6444191107886827, "grad_norm": 0.5788914561271667, "learning_rate": 1.1861383020868313e-05, "loss": 0.1898, "step": 6218 }, { "epoch": 0.6445227484713442, "grad_norm": 0.607032835483551, "learning_rate": 1.1855250952570852e-05, "loss": 0.1993, "step": 6219 }, { "epoch": 0.6446263861540056, "grad_norm": 0.7031238079071045, "learning_rate": 1.1849119802011047e-05, "loss": 0.2415, "step": 6220 }, { "epoch": 0.644730023836667, "grad_norm": 0.5608206987380981, "learning_rate": 1.1842989569879748e-05, "loss": 0.197, "step": 6221 }, { "epoch": 0.6448336615193284, "grad_norm": 0.6588431000709534, "learning_rate": 1.1836860256867712e-05, "loss": 0.203, "step": 6222 }, { "epoch": 0.6449372992019898, "grad_norm": 0.5253265500068665, "learning_rate": 1.1830731863665567e-05, "loss": 0.1723, "step": 6223 }, { "epoch": 0.6450409368846513, "grad_norm": 0.6054903864860535, "learning_rate": 1.1824604390963864e-05, "loss": 0.1933, "step": 6224 }, { "epoch": 0.6451445745673127, "grad_norm": 0.6375058889389038, "learning_rate": 1.1818477839453015e-05, "loss": 0.203, "step": 6225 }, { "epoch": 0.6452482122499741, "grad_norm": 0.6265483498573303, "learning_rate": 1.1812352209823374e-05, "loss": 0.2128, "step": 6226 }, { "epoch": 0.6453518499326355, "grad_norm": 0.583483874797821, "learning_rate": 1.1806227502765162e-05, "loss": 0.1917, "step": 6227 }, { "epoch": 0.6454554876152969, "grad_norm": 0.6172074675559998, "learning_rate": 1.1800103718968488e-05, "loss": 0.2075, "step": 6228 }, { "epoch": 0.6455591252979583, "grad_norm": 0.6154376864433289, "learning_rate": 1.1793980859123387e-05, "loss": 0.1963, "step": 6229 }, { "epoch": 0.6456627629806198, "grad_norm": 0.6489928960800171, "learning_rate": 1.1787858923919764e-05, "loss": 0.1812, "step": 6230 }, { "epoch": 0.6457664006632812, "grad_norm": 0.6327508091926575, "learning_rate": 1.1781737914047425e-05, "loss": 0.2166, "step": 6231 }, { "epoch": 0.6458700383459426, "grad_norm": 0.6692586541175842, "learning_rate": 1.1775617830196092e-05, "loss": 0.2319, "step": 6232 }, { "epoch": 0.645973676028604, "grad_norm": 0.5764483213424683, "learning_rate": 1.1769498673055352e-05, "loss": 0.1785, "step": 6233 }, { "epoch": 0.6460773137112654, "grad_norm": 0.6731470227241516, "learning_rate": 1.1763380443314714e-05, "loss": 0.237, "step": 6234 }, { "epoch": 0.6461809513939268, "grad_norm": 0.6515440940856934, "learning_rate": 1.1757263141663552e-05, "loss": 0.2232, "step": 6235 }, { "epoch": 0.6462845890765883, "grad_norm": 0.5058755278587341, "learning_rate": 1.1751146768791171e-05, "loss": 0.151, "step": 6236 }, { "epoch": 0.6463882267592497, "grad_norm": 0.6155193448066711, "learning_rate": 1.1745031325386753e-05, "loss": 0.1943, "step": 6237 }, { "epoch": 0.6464918644419111, "grad_norm": 0.6133295893669128, "learning_rate": 1.1738916812139367e-05, "loss": 0.2002, "step": 6238 }, { "epoch": 0.6465955021245725, "grad_norm": 0.7166955471038818, "learning_rate": 1.173280322973799e-05, "loss": 0.2125, "step": 6239 }, { "epoch": 0.6466991398072339, "grad_norm": 0.6078068017959595, "learning_rate": 1.1726690578871503e-05, "loss": 0.2031, "step": 6240 }, { "epoch": 0.6468027774898953, "grad_norm": 0.6322109699249268, "learning_rate": 1.1720578860228656e-05, "loss": 0.2118, "step": 6241 }, { "epoch": 0.6469064151725568, "grad_norm": 0.6122873425483704, "learning_rate": 1.1714468074498115e-05, "loss": 0.2086, "step": 6242 }, { "epoch": 0.6470100528552182, "grad_norm": 0.6217548251152039, "learning_rate": 1.1708358222368424e-05, "loss": 0.2045, "step": 6243 }, { "epoch": 0.6471136905378796, "grad_norm": 0.6710754632949829, "learning_rate": 1.1702249304528042e-05, "loss": 0.2115, "step": 6244 }, { "epoch": 0.647217328220541, "grad_norm": 0.6420841813087463, "learning_rate": 1.1696141321665312e-05, "loss": 0.2321, "step": 6245 }, { "epoch": 0.6473209659032024, "grad_norm": 0.6284909248352051, "learning_rate": 1.1690034274468465e-05, "loss": 0.2028, "step": 6246 }, { "epoch": 0.6474246035858638, "grad_norm": 0.6745210289955139, "learning_rate": 1.1683928163625644e-05, "loss": 0.2271, "step": 6247 }, { "epoch": 0.6475282412685253, "grad_norm": 0.5823943018913269, "learning_rate": 1.1677822989824867e-05, "loss": 0.1933, "step": 6248 }, { "epoch": 0.6476318789511867, "grad_norm": 0.5115127563476562, "learning_rate": 1.1671718753754053e-05, "loss": 0.1693, "step": 6249 }, { "epoch": 0.6477355166338481, "grad_norm": 0.5595300197601318, "learning_rate": 1.1665615456101031e-05, "loss": 0.1855, "step": 6250 }, { "epoch": 0.6478391543165095, "grad_norm": 0.6594432592391968, "learning_rate": 1.1659513097553496e-05, "loss": 0.2192, "step": 6251 }, { "epoch": 0.6479427919991709, "grad_norm": 0.5352497696876526, "learning_rate": 1.1653411678799067e-05, "loss": 0.1705, "step": 6252 }, { "epoch": 0.6480464296818323, "grad_norm": 0.6237494349479675, "learning_rate": 1.1647311200525238e-05, "loss": 0.215, "step": 6253 }, { "epoch": 0.6481500673644938, "grad_norm": 0.5473231077194214, "learning_rate": 1.1641211663419387e-05, "loss": 0.1566, "step": 6254 }, { "epoch": 0.6482537050471552, "grad_norm": 0.5947620868682861, "learning_rate": 1.1635113068168819e-05, "loss": 0.2016, "step": 6255 }, { "epoch": 0.6483573427298166, "grad_norm": 0.6945819854736328, "learning_rate": 1.16290154154607e-05, "loss": 0.2172, "step": 6256 }, { "epoch": 0.648460980412478, "grad_norm": 0.5550529360771179, "learning_rate": 1.162291870598212e-05, "loss": 0.199, "step": 6257 }, { "epoch": 0.6485646180951394, "grad_norm": 0.5681254863739014, "learning_rate": 1.1616822940420031e-05, "loss": 0.1788, "step": 6258 }, { "epoch": 0.6486682557778009, "grad_norm": 0.6118218898773193, "learning_rate": 1.1610728119461307e-05, "loss": 0.1859, "step": 6259 }, { "epoch": 0.6487718934604623, "grad_norm": 0.6234940886497498, "learning_rate": 1.1604634243792698e-05, "loss": 0.1804, "step": 6260 }, { "epoch": 0.6488755311431237, "grad_norm": 0.4990558624267578, "learning_rate": 1.1598541314100843e-05, "loss": 0.2122, "step": 6261 }, { "epoch": 0.6489791688257851, "grad_norm": 0.6863871216773987, "learning_rate": 1.1592449331072302e-05, "loss": 0.2144, "step": 6262 }, { "epoch": 0.6490828065084465, "grad_norm": 0.5716148614883423, "learning_rate": 1.1586358295393492e-05, "loss": 0.1828, "step": 6263 }, { "epoch": 0.6491864441911079, "grad_norm": 0.6025523543357849, "learning_rate": 1.1580268207750748e-05, "loss": 0.2148, "step": 6264 }, { "epoch": 0.6492900818737694, "grad_norm": 0.6130708456039429, "learning_rate": 1.1574179068830305e-05, "loss": 0.1948, "step": 6265 }, { "epoch": 0.6493937195564308, "grad_norm": 0.5691248178482056, "learning_rate": 1.1568090879318263e-05, "loss": 0.1868, "step": 6266 }, { "epoch": 0.6494973572390922, "grad_norm": 0.6407581567764282, "learning_rate": 1.1562003639900637e-05, "loss": 0.2172, "step": 6267 }, { "epoch": 0.6496009949217536, "grad_norm": 0.6883420348167419, "learning_rate": 1.1555917351263313e-05, "loss": 0.2487, "step": 6268 }, { "epoch": 0.6497046326044149, "grad_norm": 0.5731263160705566, "learning_rate": 1.1549832014092096e-05, "loss": 0.1926, "step": 6269 }, { "epoch": 0.6498082702870763, "grad_norm": 0.5525518655776978, "learning_rate": 1.1543747629072677e-05, "loss": 0.1879, "step": 6270 }, { "epoch": 0.6499119079697377, "grad_norm": 0.591450572013855, "learning_rate": 1.1537664196890633e-05, "loss": 0.2096, "step": 6271 }, { "epoch": 0.6500155456523992, "grad_norm": 0.5698865056037903, "learning_rate": 1.1531581718231431e-05, "loss": 0.1797, "step": 6272 }, { "epoch": 0.6501191833350606, "grad_norm": 0.4984050393104553, "learning_rate": 1.1525500193780428e-05, "loss": 0.1639, "step": 6273 }, { "epoch": 0.650222821017722, "grad_norm": 0.6695106029510498, "learning_rate": 1.151941962422289e-05, "loss": 0.2214, "step": 6274 }, { "epoch": 0.6503264587003834, "grad_norm": 0.5672677755355835, "learning_rate": 1.1513340010243974e-05, "loss": 0.1743, "step": 6275 }, { "epoch": 0.6504300963830448, "grad_norm": 0.6967812180519104, "learning_rate": 1.1507261352528705e-05, "loss": 0.2428, "step": 6276 }, { "epoch": 0.6505337340657062, "grad_norm": 0.6740899682044983, "learning_rate": 1.1501183651762032e-05, "loss": 0.2171, "step": 6277 }, { "epoch": 0.6506373717483677, "grad_norm": 0.6313149333000183, "learning_rate": 1.1495106908628772e-05, "loss": 0.2304, "step": 6278 }, { "epoch": 0.6507410094310291, "grad_norm": 0.625226616859436, "learning_rate": 1.1489031123813636e-05, "loss": 0.22, "step": 6279 }, { "epoch": 0.6508446471136905, "grad_norm": 0.5863350629806519, "learning_rate": 1.1482956298001256e-05, "loss": 0.1794, "step": 6280 }, { "epoch": 0.6509482847963519, "grad_norm": 0.5141674876213074, "learning_rate": 1.1476882431876107e-05, "loss": 0.211, "step": 6281 }, { "epoch": 0.6510519224790133, "grad_norm": 0.595971941947937, "learning_rate": 1.1470809526122606e-05, "loss": 0.1766, "step": 6282 }, { "epoch": 0.6511555601616748, "grad_norm": 0.5330041646957397, "learning_rate": 1.1464737581425021e-05, "loss": 0.1635, "step": 6283 }, { "epoch": 0.6512591978443362, "grad_norm": 0.5961182713508606, "learning_rate": 1.1458666598467542e-05, "loss": 0.2467, "step": 6284 }, { "epoch": 0.6513628355269976, "grad_norm": 0.6333070993423462, "learning_rate": 1.1452596577934236e-05, "loss": 0.2263, "step": 6285 }, { "epoch": 0.651466473209659, "grad_norm": 0.5714941024780273, "learning_rate": 1.1446527520509053e-05, "loss": 0.1853, "step": 6286 }, { "epoch": 0.6515701108923204, "grad_norm": 0.5562633872032166, "learning_rate": 1.1440459426875858e-05, "loss": 0.1808, "step": 6287 }, { "epoch": 0.6516737485749818, "grad_norm": 0.5774336457252502, "learning_rate": 1.1434392297718383e-05, "loss": 0.1897, "step": 6288 }, { "epoch": 0.6517773862576433, "grad_norm": 0.5281375646591187, "learning_rate": 1.1428326133720264e-05, "loss": 0.1773, "step": 6289 }, { "epoch": 0.6518810239403047, "grad_norm": 0.7557573914527893, "learning_rate": 1.1422260935565044e-05, "loss": 0.2354, "step": 6290 }, { "epoch": 0.6519846616229661, "grad_norm": 0.6803430318832397, "learning_rate": 1.1416196703936126e-05, "loss": 0.2208, "step": 6291 }, { "epoch": 0.6520882993056275, "grad_norm": 0.6010298132896423, "learning_rate": 1.1410133439516819e-05, "loss": 0.214, "step": 6292 }, { "epoch": 0.6521919369882889, "grad_norm": 0.5949862003326416, "learning_rate": 1.1404071142990316e-05, "loss": 0.21, "step": 6293 }, { "epoch": 0.6522955746709503, "grad_norm": 0.5815996527671814, "learning_rate": 1.1398009815039713e-05, "loss": 0.1684, "step": 6294 }, { "epoch": 0.6523992123536118, "grad_norm": 0.559094250202179, "learning_rate": 1.1391949456348002e-05, "loss": 0.1721, "step": 6295 }, { "epoch": 0.6525028500362732, "grad_norm": 0.6896238923072815, "learning_rate": 1.1385890067598035e-05, "loss": 0.2419, "step": 6296 }, { "epoch": 0.6526064877189346, "grad_norm": 0.6087605357170105, "learning_rate": 1.1379831649472593e-05, "loss": 0.203, "step": 6297 }, { "epoch": 0.652710125401596, "grad_norm": 0.5834180116653442, "learning_rate": 1.1373774202654318e-05, "loss": 0.2032, "step": 6298 }, { "epoch": 0.6528137630842574, "grad_norm": 0.5780649781227112, "learning_rate": 1.136771772782575e-05, "loss": 0.1786, "step": 6299 }, { "epoch": 0.6529174007669188, "grad_norm": 0.6673455238342285, "learning_rate": 1.1361662225669336e-05, "loss": 0.2009, "step": 6300 }, { "epoch": 0.6530210384495803, "grad_norm": 0.6683764457702637, "learning_rate": 1.1355607696867388e-05, "loss": 0.2237, "step": 6301 }, { "epoch": 0.6531246761322417, "grad_norm": 0.6092632412910461, "learning_rate": 1.1349554142102135e-05, "loss": 0.2161, "step": 6302 }, { "epoch": 0.6532283138149031, "grad_norm": 0.6827420592308044, "learning_rate": 1.1343501562055672e-05, "loss": 0.2094, "step": 6303 }, { "epoch": 0.6533319514975645, "grad_norm": 0.7059791088104248, "learning_rate": 1.133744995740999e-05, "loss": 0.2332, "step": 6304 }, { "epoch": 0.6534355891802259, "grad_norm": 0.6687110662460327, "learning_rate": 1.1331399328846988e-05, "loss": 0.2179, "step": 6305 }, { "epoch": 0.6535392268628873, "grad_norm": 0.5980477333068848, "learning_rate": 1.1325349677048423e-05, "loss": 0.1849, "step": 6306 }, { "epoch": 0.6536428645455488, "grad_norm": 0.5202463269233704, "learning_rate": 1.1319301002695983e-05, "loss": 0.1744, "step": 6307 }, { "epoch": 0.6537465022282102, "grad_norm": 0.611396849155426, "learning_rate": 1.1313253306471203e-05, "loss": 0.2171, "step": 6308 }, { "epoch": 0.6538501399108716, "grad_norm": 0.5297112464904785, "learning_rate": 1.1307206589055543e-05, "loss": 0.185, "step": 6309 }, { "epoch": 0.653953777593533, "grad_norm": 0.5845366716384888, "learning_rate": 1.1301160851130332e-05, "loss": 0.2043, "step": 6310 }, { "epoch": 0.6540574152761944, "grad_norm": 0.6273592114448547, "learning_rate": 1.1295116093376789e-05, "loss": 0.1966, "step": 6311 }, { "epoch": 0.6541610529588558, "grad_norm": 0.5722415447235107, "learning_rate": 1.1289072316476038e-05, "loss": 0.1606, "step": 6312 }, { "epoch": 0.6542646906415173, "grad_norm": 0.6014687418937683, "learning_rate": 1.1283029521109068e-05, "loss": 0.2056, "step": 6313 }, { "epoch": 0.6543683283241787, "grad_norm": 0.5814545750617981, "learning_rate": 1.1276987707956781e-05, "loss": 0.1998, "step": 6314 }, { "epoch": 0.6544719660068401, "grad_norm": 0.5836402773857117, "learning_rate": 1.1270946877699966e-05, "loss": 0.2036, "step": 6315 }, { "epoch": 0.6545756036895015, "grad_norm": 0.4817843735218048, "learning_rate": 1.126490703101929e-05, "loss": 0.1646, "step": 6316 }, { "epoch": 0.6546792413721629, "grad_norm": 0.6447229385375977, "learning_rate": 1.1258868168595309e-05, "loss": 0.2061, "step": 6317 }, { "epoch": 0.6547828790548244, "grad_norm": 0.626266360282898, "learning_rate": 1.1252830291108467e-05, "loss": 0.1843, "step": 6318 }, { "epoch": 0.6548865167374858, "grad_norm": 0.6417117714881897, "learning_rate": 1.1246793399239108e-05, "loss": 0.2096, "step": 6319 }, { "epoch": 0.6549901544201472, "grad_norm": 0.6152032017707825, "learning_rate": 1.1240757493667473e-05, "loss": 0.1958, "step": 6320 }, { "epoch": 0.6550937921028086, "grad_norm": 0.5442467927932739, "learning_rate": 1.1234722575073658e-05, "loss": 0.1809, "step": 6321 }, { "epoch": 0.65519742978547, "grad_norm": 0.5608288645744324, "learning_rate": 1.1228688644137686e-05, "loss": 0.1761, "step": 6322 }, { "epoch": 0.6553010674681314, "grad_norm": 0.6397298574447632, "learning_rate": 1.1222655701539442e-05, "loss": 0.225, "step": 6323 }, { "epoch": 0.6554047051507929, "grad_norm": 0.5038039684295654, "learning_rate": 1.12166237479587e-05, "loss": 0.1614, "step": 6324 }, { "epoch": 0.6555083428334543, "grad_norm": 0.7007007598876953, "learning_rate": 1.1210592784075147e-05, "loss": 0.2412, "step": 6325 }, { "epoch": 0.6556119805161157, "grad_norm": 0.6230477094650269, "learning_rate": 1.1204562810568328e-05, "loss": 0.2036, "step": 6326 }, { "epoch": 0.6557156181987771, "grad_norm": 0.5754637718200684, "learning_rate": 1.1198533828117709e-05, "loss": 0.1917, "step": 6327 }, { "epoch": 0.6558192558814385, "grad_norm": 0.6380010843276978, "learning_rate": 1.1192505837402608e-05, "loss": 0.2174, "step": 6328 }, { "epoch": 0.6559228935640999, "grad_norm": 0.6515641808509827, "learning_rate": 1.1186478839102264e-05, "loss": 0.1706, "step": 6329 }, { "epoch": 0.6560265312467614, "grad_norm": 0.7134867310523987, "learning_rate": 1.1180452833895786e-05, "loss": 0.2298, "step": 6330 }, { "epoch": 0.6561301689294228, "grad_norm": 0.5794904828071594, "learning_rate": 1.117442782246216e-05, "loss": 0.1696, "step": 6331 }, { "epoch": 0.6562338066120842, "grad_norm": 0.5649944543838501, "learning_rate": 1.1168403805480299e-05, "loss": 0.1748, "step": 6332 }, { "epoch": 0.6563374442947456, "grad_norm": 0.6089136600494385, "learning_rate": 1.1162380783628959e-05, "loss": 0.1737, "step": 6333 }, { "epoch": 0.656441081977407, "grad_norm": 0.6334869861602783, "learning_rate": 1.1156358757586823e-05, "loss": 0.2071, "step": 6334 }, { "epoch": 0.6565447196600684, "grad_norm": 0.5529614090919495, "learning_rate": 1.1150337728032431e-05, "loss": 0.1934, "step": 6335 }, { "epoch": 0.6566483573427299, "grad_norm": 0.6286069750785828, "learning_rate": 1.1144317695644222e-05, "loss": 0.2047, "step": 6336 }, { "epoch": 0.6567519950253913, "grad_norm": 0.605652391910553, "learning_rate": 1.1138298661100536e-05, "loss": 0.2188, "step": 6337 }, { "epoch": 0.6568556327080527, "grad_norm": 0.6238192915916443, "learning_rate": 1.1132280625079572e-05, "loss": 0.2033, "step": 6338 }, { "epoch": 0.6569592703907141, "grad_norm": 0.4853861629962921, "learning_rate": 1.1126263588259443e-05, "loss": 0.1712, "step": 6339 }, { "epoch": 0.6570629080733755, "grad_norm": 0.6667852401733398, "learning_rate": 1.1120247551318149e-05, "loss": 0.2145, "step": 6340 }, { "epoch": 0.657166545756037, "grad_norm": 0.6394658088684082, "learning_rate": 1.1114232514933553e-05, "loss": 0.2086, "step": 6341 }, { "epoch": 0.6572701834386984, "grad_norm": 0.6247240900993347, "learning_rate": 1.1108218479783423e-05, "loss": 0.2268, "step": 6342 }, { "epoch": 0.6573738211213598, "grad_norm": 0.5785343050956726, "learning_rate": 1.110220544654541e-05, "loss": 0.17, "step": 6343 }, { "epoch": 0.6574774588040212, "grad_norm": 0.5689795017242432, "learning_rate": 1.109619341589705e-05, "loss": 0.1695, "step": 6344 }, { "epoch": 0.6575810964866825, "grad_norm": 0.5147643089294434, "learning_rate": 1.1090182388515785e-05, "loss": 0.1636, "step": 6345 }, { "epoch": 0.6576847341693439, "grad_norm": 0.6086384654045105, "learning_rate": 1.108417236507891e-05, "loss": 0.2108, "step": 6346 }, { "epoch": 0.6577883718520053, "grad_norm": 0.6293398141860962, "learning_rate": 1.1078163346263642e-05, "loss": 0.201, "step": 6347 }, { "epoch": 0.6578920095346668, "grad_norm": 0.642492949962616, "learning_rate": 1.1072155332747055e-05, "loss": 0.2068, "step": 6348 }, { "epoch": 0.6579956472173282, "grad_norm": 0.5426843166351318, "learning_rate": 1.106614832520612e-05, "loss": 0.1698, "step": 6349 }, { "epoch": 0.6580992848999896, "grad_norm": 0.5720564723014832, "learning_rate": 1.1060142324317714e-05, "loss": 0.1949, "step": 6350 }, { "epoch": 0.658202922582651, "grad_norm": 0.5899260640144348, "learning_rate": 1.1054137330758566e-05, "loss": 0.1784, "step": 6351 }, { "epoch": 0.6583065602653124, "grad_norm": 0.5442241430282593, "learning_rate": 1.1048133345205324e-05, "loss": 0.2007, "step": 6352 }, { "epoch": 0.6584101979479738, "grad_norm": 0.6020263433456421, "learning_rate": 1.1042130368334489e-05, "loss": 0.199, "step": 6353 }, { "epoch": 0.6585138356306353, "grad_norm": 0.620783805847168, "learning_rate": 1.1036128400822492e-05, "loss": 0.1957, "step": 6354 }, { "epoch": 0.6586174733132967, "grad_norm": 0.5608299970626831, "learning_rate": 1.1030127443345608e-05, "loss": 0.1779, "step": 6355 }, { "epoch": 0.6587211109959581, "grad_norm": 0.6411195993423462, "learning_rate": 1.1024127496580015e-05, "loss": 0.2047, "step": 6356 }, { "epoch": 0.6588247486786195, "grad_norm": 0.6850147247314453, "learning_rate": 1.1018128561201788e-05, "loss": 0.2488, "step": 6357 }, { "epoch": 0.6589283863612809, "grad_norm": 0.6264161467552185, "learning_rate": 1.1012130637886866e-05, "loss": 0.1941, "step": 6358 }, { "epoch": 0.6590320240439423, "grad_norm": 0.5913993716239929, "learning_rate": 1.1006133727311093e-05, "loss": 0.1888, "step": 6359 }, { "epoch": 0.6591356617266038, "grad_norm": 0.603714108467102, "learning_rate": 1.1000137830150194e-05, "loss": 0.1881, "step": 6360 }, { "epoch": 0.6592392994092652, "grad_norm": 0.5576220154762268, "learning_rate": 1.0994142947079775e-05, "loss": 0.1861, "step": 6361 }, { "epoch": 0.6593429370919266, "grad_norm": 0.7011269330978394, "learning_rate": 1.0988149078775333e-05, "loss": 0.2525, "step": 6362 }, { "epoch": 0.659446574774588, "grad_norm": 0.670717179775238, "learning_rate": 1.0982156225912233e-05, "loss": 0.1932, "step": 6363 }, { "epoch": 0.6595502124572494, "grad_norm": 0.6681691408157349, "learning_rate": 1.0976164389165749e-05, "loss": 0.2259, "step": 6364 }, { "epoch": 0.6596538501399108, "grad_norm": 0.5890450477600098, "learning_rate": 1.0970173569211045e-05, "loss": 0.1853, "step": 6365 }, { "epoch": 0.6597574878225723, "grad_norm": 0.6007232666015625, "learning_rate": 1.0964183766723142e-05, "loss": 0.2006, "step": 6366 }, { "epoch": 0.6598611255052337, "grad_norm": 0.5991565585136414, "learning_rate": 1.095819498237697e-05, "loss": 0.1848, "step": 6367 }, { "epoch": 0.6599647631878951, "grad_norm": 0.5873733758926392, "learning_rate": 1.0952207216847322e-05, "loss": 0.194, "step": 6368 }, { "epoch": 0.6600684008705565, "grad_norm": 0.6535905599594116, "learning_rate": 1.09462204708089e-05, "loss": 0.2136, "step": 6369 }, { "epoch": 0.6601720385532179, "grad_norm": 0.585079550743103, "learning_rate": 1.0940234744936289e-05, "loss": 0.1893, "step": 6370 }, { "epoch": 0.6602756762358793, "grad_norm": 0.6181477308273315, "learning_rate": 1.0934250039903933e-05, "loss": 0.1949, "step": 6371 }, { "epoch": 0.6603793139185408, "grad_norm": 0.5763558745384216, "learning_rate": 1.0928266356386199e-05, "loss": 0.1837, "step": 6372 }, { "epoch": 0.6604829516012022, "grad_norm": 0.49055740237236023, "learning_rate": 1.092228369505731e-05, "loss": 0.145, "step": 6373 }, { "epoch": 0.6605865892838636, "grad_norm": 0.6213757991790771, "learning_rate": 1.0916302056591377e-05, "loss": 0.2113, "step": 6374 }, { "epoch": 0.660690226966525, "grad_norm": 0.6518123149871826, "learning_rate": 1.091032144166241e-05, "loss": 0.2039, "step": 6375 }, { "epoch": 0.6607938646491864, "grad_norm": 0.5271971821784973, "learning_rate": 1.0904341850944288e-05, "loss": 0.1671, "step": 6376 }, { "epoch": 0.6608975023318479, "grad_norm": 0.6460587382316589, "learning_rate": 1.0898363285110796e-05, "loss": 0.2075, "step": 6377 }, { "epoch": 0.6610011400145093, "grad_norm": 0.5846865773200989, "learning_rate": 1.0892385744835573e-05, "loss": 0.2127, "step": 6378 }, { "epoch": 0.6611047776971707, "grad_norm": 0.5889509916305542, "learning_rate": 1.088640923079217e-05, "loss": 0.1931, "step": 6379 }, { "epoch": 0.6612084153798321, "grad_norm": 0.6355632543563843, "learning_rate": 1.0880433743654008e-05, "loss": 0.1969, "step": 6380 }, { "epoch": 0.6613120530624935, "grad_norm": 0.6751269102096558, "learning_rate": 1.087445928409439e-05, "loss": 0.1973, "step": 6381 }, { "epoch": 0.6614156907451549, "grad_norm": 0.6042639017105103, "learning_rate": 1.0868485852786522e-05, "loss": 0.1817, "step": 6382 }, { "epoch": 0.6615193284278164, "grad_norm": 0.5439656972885132, "learning_rate": 1.0862513450403463e-05, "loss": 0.1766, "step": 6383 }, { "epoch": 0.6616229661104778, "grad_norm": 0.554614245891571, "learning_rate": 1.0856542077618184e-05, "loss": 0.1997, "step": 6384 }, { "epoch": 0.6617266037931392, "grad_norm": 0.5925115942955017, "learning_rate": 1.0850571735103537e-05, "loss": 0.1999, "step": 6385 }, { "epoch": 0.6618302414758006, "grad_norm": 0.643631100654602, "learning_rate": 1.0844602423532247e-05, "loss": 0.2104, "step": 6386 }, { "epoch": 0.661933879158462, "grad_norm": 0.6977851986885071, "learning_rate": 1.0838634143576922e-05, "loss": 0.2173, "step": 6387 }, { "epoch": 0.6620375168411234, "grad_norm": 0.6471225023269653, "learning_rate": 1.0832666895910051e-05, "loss": 0.2138, "step": 6388 }, { "epoch": 0.6621411545237849, "grad_norm": 0.6244288682937622, "learning_rate": 1.0826700681204026e-05, "loss": 0.1965, "step": 6389 }, { "epoch": 0.6622447922064463, "grad_norm": 0.533596396446228, "learning_rate": 1.0820735500131112e-05, "loss": 0.1782, "step": 6390 }, { "epoch": 0.6623484298891077, "grad_norm": 0.6266582012176514, "learning_rate": 1.0814771353363447e-05, "loss": 0.2079, "step": 6391 }, { "epoch": 0.6624520675717691, "grad_norm": 0.6816765666007996, "learning_rate": 1.0808808241573082e-05, "loss": 0.2015, "step": 6392 }, { "epoch": 0.6625557052544305, "grad_norm": 0.668770968914032, "learning_rate": 1.0802846165431901e-05, "loss": 0.2086, "step": 6393 }, { "epoch": 0.6626593429370919, "grad_norm": 0.5263325572013855, "learning_rate": 1.0796885125611719e-05, "loss": 0.1932, "step": 6394 }, { "epoch": 0.6627629806197534, "grad_norm": 0.6744509339332581, "learning_rate": 1.079092512278422e-05, "loss": 0.1999, "step": 6395 }, { "epoch": 0.6628666183024148, "grad_norm": 0.6415967345237732, "learning_rate": 1.0784966157620956e-05, "loss": 0.204, "step": 6396 }, { "epoch": 0.6629702559850762, "grad_norm": 0.6224689483642578, "learning_rate": 1.0779008230793386e-05, "loss": 0.2143, "step": 6397 }, { "epoch": 0.6630738936677376, "grad_norm": 0.6195698380470276, "learning_rate": 1.0773051342972835e-05, "loss": 0.2197, "step": 6398 }, { "epoch": 0.663177531350399, "grad_norm": 0.6115604043006897, "learning_rate": 1.0767095494830509e-05, "loss": 0.1848, "step": 6399 }, { "epoch": 0.6632811690330604, "grad_norm": 0.5525495409965515, "learning_rate": 1.0761140687037516e-05, "loss": 0.1694, "step": 6400 }, { "epoch": 0.6633848067157219, "grad_norm": 0.6176031827926636, "learning_rate": 1.0755186920264819e-05, "loss": 0.22, "step": 6401 }, { "epoch": 0.6634884443983833, "grad_norm": 0.5892438292503357, "learning_rate": 1.07492341951833e-05, "loss": 0.1728, "step": 6402 }, { "epoch": 0.6635920820810447, "grad_norm": 0.6732398867607117, "learning_rate": 1.0743282512463681e-05, "loss": 0.2228, "step": 6403 }, { "epoch": 0.6636957197637061, "grad_norm": 0.618508517742157, "learning_rate": 1.0737331872776606e-05, "loss": 0.1928, "step": 6404 }, { "epoch": 0.6637993574463675, "grad_norm": 0.6269073486328125, "learning_rate": 1.0731382276792579e-05, "loss": 0.227, "step": 6405 }, { "epoch": 0.663902995129029, "grad_norm": 0.6141325235366821, "learning_rate": 1.0725433725181977e-05, "loss": 0.1951, "step": 6406 }, { "epoch": 0.6640066328116904, "grad_norm": 0.6289651989936829, "learning_rate": 1.0719486218615099e-05, "loss": 0.1731, "step": 6407 }, { "epoch": 0.6641102704943518, "grad_norm": 0.57537442445755, "learning_rate": 1.0713539757762073e-05, "loss": 0.181, "step": 6408 }, { "epoch": 0.6642139081770132, "grad_norm": 0.5934721827507019, "learning_rate": 1.0707594343292956e-05, "loss": 0.1923, "step": 6409 }, { "epoch": 0.6643175458596746, "grad_norm": 0.5678470730781555, "learning_rate": 1.0701649975877668e-05, "loss": 0.1672, "step": 6410 }, { "epoch": 0.664421183542336, "grad_norm": 0.5607133507728577, "learning_rate": 1.0695706656186005e-05, "loss": 0.1695, "step": 6411 }, { "epoch": 0.6645248212249975, "grad_norm": 0.47020161151885986, "learning_rate": 1.0689764384887655e-05, "loss": 0.1408, "step": 6412 }, { "epoch": 0.6646284589076589, "grad_norm": 0.6394020318984985, "learning_rate": 1.0683823162652171e-05, "loss": 0.2396, "step": 6413 }, { "epoch": 0.6647320965903203, "grad_norm": 0.5689187049865723, "learning_rate": 1.067788299014901e-05, "loss": 0.1671, "step": 6414 }, { "epoch": 0.6648357342729817, "grad_norm": 0.5286722779273987, "learning_rate": 1.0671943868047514e-05, "loss": 0.1927, "step": 6415 }, { "epoch": 0.6649393719556431, "grad_norm": 0.7043396830558777, "learning_rate": 1.0666005797016874e-05, "loss": 0.2081, "step": 6416 }, { "epoch": 0.6650430096383045, "grad_norm": 0.5633207559585571, "learning_rate": 1.0660068777726196e-05, "loss": 0.174, "step": 6417 }, { "epoch": 0.665146647320966, "grad_norm": 0.5071108937263489, "learning_rate": 1.0654132810844452e-05, "loss": 0.1632, "step": 6418 }, { "epoch": 0.6652502850036274, "grad_norm": 0.626396119594574, "learning_rate": 1.0648197897040483e-05, "loss": 0.2045, "step": 6419 }, { "epoch": 0.6653539226862888, "grad_norm": 0.6252375245094299, "learning_rate": 1.0642264036983045e-05, "loss": 0.1788, "step": 6420 }, { "epoch": 0.6654575603689501, "grad_norm": 0.6734023094177246, "learning_rate": 1.0636331231340744e-05, "loss": 0.2031, "step": 6421 }, { "epoch": 0.6655611980516115, "grad_norm": 0.565576434135437, "learning_rate": 1.0630399480782087e-05, "loss": 0.1719, "step": 6422 }, { "epoch": 0.6656648357342729, "grad_norm": 0.5246537327766418, "learning_rate": 1.0624468785975448e-05, "loss": 0.1728, "step": 6423 }, { "epoch": 0.6657684734169343, "grad_norm": 0.5331704616546631, "learning_rate": 1.0618539147589098e-05, "loss": 0.1855, "step": 6424 }, { "epoch": 0.6658721110995958, "grad_norm": 0.6282480955123901, "learning_rate": 1.0612610566291171e-05, "loss": 0.1903, "step": 6425 }, { "epoch": 0.6659757487822572, "grad_norm": 0.5429293513298035, "learning_rate": 1.0606683042749687e-05, "loss": 0.1755, "step": 6426 }, { "epoch": 0.6660793864649186, "grad_norm": 0.6085336208343506, "learning_rate": 1.0600756577632563e-05, "loss": 0.1785, "step": 6427 }, { "epoch": 0.66618302414758, "grad_norm": 0.6111792922019958, "learning_rate": 1.059483117160757e-05, "loss": 0.2024, "step": 6428 }, { "epoch": 0.6662866618302414, "grad_norm": 0.590161919593811, "learning_rate": 1.0588906825342384e-05, "loss": 0.192, "step": 6429 }, { "epoch": 0.6663902995129029, "grad_norm": 0.6143799424171448, "learning_rate": 1.058298353950455e-05, "loss": 0.1819, "step": 6430 }, { "epoch": 0.6664939371955643, "grad_norm": 0.644903302192688, "learning_rate": 1.0577061314761483e-05, "loss": 0.186, "step": 6431 }, { "epoch": 0.6665975748782257, "grad_norm": 0.5767917037010193, "learning_rate": 1.0571140151780498e-05, "loss": 0.1574, "step": 6432 }, { "epoch": 0.6667012125608871, "grad_norm": 0.5911566615104675, "learning_rate": 1.0565220051228793e-05, "loss": 0.2091, "step": 6433 }, { "epoch": 0.6668048502435485, "grad_norm": 0.5595060586929321, "learning_rate": 1.0559301013773418e-05, "loss": 0.1742, "step": 6434 }, { "epoch": 0.6669084879262099, "grad_norm": 0.6926925778388977, "learning_rate": 1.0553383040081333e-05, "loss": 0.2148, "step": 6435 }, { "epoch": 0.6670121256088714, "grad_norm": 0.607693076133728, "learning_rate": 1.0547466130819365e-05, "loss": 0.2077, "step": 6436 }, { "epoch": 0.6671157632915328, "grad_norm": 0.6666292548179626, "learning_rate": 1.0541550286654212e-05, "loss": 0.2025, "step": 6437 }, { "epoch": 0.6672194009741942, "grad_norm": 0.6411061882972717, "learning_rate": 1.0535635508252478e-05, "loss": 0.2155, "step": 6438 }, { "epoch": 0.6673230386568556, "grad_norm": 0.5984596610069275, "learning_rate": 1.052972179628061e-05, "loss": 0.1794, "step": 6439 }, { "epoch": 0.667426676339517, "grad_norm": 0.5402892231941223, "learning_rate": 1.052380915140498e-05, "loss": 0.1639, "step": 6440 }, { "epoch": 0.6675303140221784, "grad_norm": 0.6519803404808044, "learning_rate": 1.0517897574291794e-05, "loss": 0.2258, "step": 6441 }, { "epoch": 0.6676339517048399, "grad_norm": 0.6968314051628113, "learning_rate": 1.0511987065607179e-05, "loss": 0.2165, "step": 6442 }, { "epoch": 0.6677375893875013, "grad_norm": 0.6417881846427917, "learning_rate": 1.0506077626017111e-05, "loss": 0.2114, "step": 6443 }, { "epoch": 0.6678412270701627, "grad_norm": 0.660434365272522, "learning_rate": 1.0500169256187448e-05, "loss": 0.22, "step": 6444 }, { "epoch": 0.6679448647528241, "grad_norm": 0.6588543653488159, "learning_rate": 1.0494261956783954e-05, "loss": 0.2094, "step": 6445 }, { "epoch": 0.6680485024354855, "grad_norm": 0.5505183339118958, "learning_rate": 1.048835572847224e-05, "loss": 0.1897, "step": 6446 }, { "epoch": 0.6681521401181469, "grad_norm": 0.6822441220283508, "learning_rate": 1.0482450571917813e-05, "loss": 0.2243, "step": 6447 }, { "epoch": 0.6682557778008084, "grad_norm": 0.5411374568939209, "learning_rate": 1.047654648778607e-05, "loss": 0.1799, "step": 6448 }, { "epoch": 0.6683594154834698, "grad_norm": 0.6397514343261719, "learning_rate": 1.0470643476742266e-05, "loss": 0.2116, "step": 6449 }, { "epoch": 0.6684630531661312, "grad_norm": 0.6044232845306396, "learning_rate": 1.0464741539451539e-05, "loss": 0.185, "step": 6450 }, { "epoch": 0.6685666908487926, "grad_norm": 0.6306686401367188, "learning_rate": 1.0458840676578905e-05, "loss": 0.2067, "step": 6451 }, { "epoch": 0.668670328531454, "grad_norm": 0.6507785320281982, "learning_rate": 1.0452940888789272e-05, "loss": 0.2045, "step": 6452 }, { "epoch": 0.6687739662141154, "grad_norm": 0.7334423661231995, "learning_rate": 1.0447042176747426e-05, "loss": 0.2334, "step": 6453 }, { "epoch": 0.6688776038967769, "grad_norm": 0.517721951007843, "learning_rate": 1.0441144541118007e-05, "loss": 0.1535, "step": 6454 }, { "epoch": 0.6689812415794383, "grad_norm": 0.7203530669212341, "learning_rate": 1.043524798256558e-05, "loss": 0.2481, "step": 6455 }, { "epoch": 0.6690848792620997, "grad_norm": 0.6525432467460632, "learning_rate": 1.0429352501754526e-05, "loss": 0.1722, "step": 6456 }, { "epoch": 0.6691885169447611, "grad_norm": 0.6704126596450806, "learning_rate": 1.0423458099349153e-05, "loss": 0.22, "step": 6457 }, { "epoch": 0.6692921546274225, "grad_norm": 0.6624952554702759, "learning_rate": 1.0417564776013644e-05, "loss": 0.2111, "step": 6458 }, { "epoch": 0.669395792310084, "grad_norm": 0.7517199516296387, "learning_rate": 1.0411672532412031e-05, "loss": 0.2311, "step": 6459 }, { "epoch": 0.6694994299927454, "grad_norm": 0.5905138254165649, "learning_rate": 1.040578136920826e-05, "loss": 0.1839, "step": 6460 }, { "epoch": 0.6696030676754068, "grad_norm": 0.6565616726875305, "learning_rate": 1.0399891287066129e-05, "loss": 0.2347, "step": 6461 }, { "epoch": 0.6697067053580682, "grad_norm": 0.6401223540306091, "learning_rate": 1.0394002286649317e-05, "loss": 0.2026, "step": 6462 }, { "epoch": 0.6698103430407296, "grad_norm": 0.6125038266181946, "learning_rate": 1.0388114368621403e-05, "loss": 0.1979, "step": 6463 }, { "epoch": 0.669913980723391, "grad_norm": 0.6319140195846558, "learning_rate": 1.038222753364581e-05, "loss": 0.2092, "step": 6464 }, { "epoch": 0.6700176184060525, "grad_norm": 0.5588834881782532, "learning_rate": 1.0376341782385876e-05, "loss": 0.1929, "step": 6465 }, { "epoch": 0.6701212560887139, "grad_norm": 0.5903143286705017, "learning_rate": 1.0370457115504781e-05, "loss": 0.202, "step": 6466 }, { "epoch": 0.6702248937713753, "grad_norm": 0.5594356060028076, "learning_rate": 1.0364573533665619e-05, "loss": 0.1906, "step": 6467 }, { "epoch": 0.6703285314540367, "grad_norm": 0.6221143007278442, "learning_rate": 1.0358691037531332e-05, "loss": 0.1818, "step": 6468 }, { "epoch": 0.6704321691366981, "grad_norm": 0.5643758773803711, "learning_rate": 1.0352809627764743e-05, "loss": 0.1787, "step": 6469 }, { "epoch": 0.6705358068193595, "grad_norm": 0.5540712475776672, "learning_rate": 1.0346929305028577e-05, "loss": 0.1868, "step": 6470 }, { "epoch": 0.670639444502021, "grad_norm": 0.6116195917129517, "learning_rate": 1.0341050069985401e-05, "loss": 0.1716, "step": 6471 }, { "epoch": 0.6707430821846824, "grad_norm": 0.594440758228302, "learning_rate": 1.033517192329769e-05, "loss": 0.1982, "step": 6472 }, { "epoch": 0.6708467198673438, "grad_norm": 0.6060428023338318, "learning_rate": 1.0329294865627788e-05, "loss": 0.201, "step": 6473 }, { "epoch": 0.6709503575500052, "grad_norm": 0.6356008648872375, "learning_rate": 1.0323418897637909e-05, "loss": 0.2222, "step": 6474 }, { "epoch": 0.6710539952326666, "grad_norm": 0.6097822189331055, "learning_rate": 1.0317544019990145e-05, "loss": 0.2058, "step": 6475 }, { "epoch": 0.671157632915328, "grad_norm": 0.6528740525245667, "learning_rate": 1.0311670233346462e-05, "loss": 0.1942, "step": 6476 }, { "epoch": 0.6712612705979895, "grad_norm": 0.7413235902786255, "learning_rate": 1.0305797538368716e-05, "loss": 0.1769, "step": 6477 }, { "epoch": 0.6713649082806509, "grad_norm": 0.5531677007675171, "learning_rate": 1.0299925935718641e-05, "loss": 0.1661, "step": 6478 }, { "epoch": 0.6714685459633123, "grad_norm": 0.5698585510253906, "learning_rate": 1.0294055426057827e-05, "loss": 0.1855, "step": 6479 }, { "epoch": 0.6715721836459737, "grad_norm": 0.6898641586303711, "learning_rate": 1.0288186010047765e-05, "loss": 0.226, "step": 6480 }, { "epoch": 0.6716758213286351, "grad_norm": 0.6154083013534546, "learning_rate": 1.0282317688349805e-05, "loss": 0.1725, "step": 6481 }, { "epoch": 0.6717794590112965, "grad_norm": 0.5920049548149109, "learning_rate": 1.0276450461625176e-05, "loss": 0.1629, "step": 6482 }, { "epoch": 0.671883096693958, "grad_norm": 0.5286540985107422, "learning_rate": 1.0270584330535001e-05, "loss": 0.1745, "step": 6483 }, { "epoch": 0.6719867343766194, "grad_norm": 0.6335064768791199, "learning_rate": 1.0264719295740251e-05, "loss": 0.2205, "step": 6484 }, { "epoch": 0.6720903720592808, "grad_norm": 0.6364689469337463, "learning_rate": 1.0258855357901805e-05, "loss": 0.1982, "step": 6485 }, { "epoch": 0.6721940097419422, "grad_norm": 0.5329858660697937, "learning_rate": 1.0252992517680384e-05, "loss": 0.1636, "step": 6486 }, { "epoch": 0.6722976474246036, "grad_norm": 0.6173647046089172, "learning_rate": 1.0247130775736625e-05, "loss": 0.2033, "step": 6487 }, { "epoch": 0.672401285107265, "grad_norm": 0.6465008854866028, "learning_rate": 1.0241270132731007e-05, "loss": 0.2266, "step": 6488 }, { "epoch": 0.6725049227899265, "grad_norm": 0.6485345363616943, "learning_rate": 1.0235410589323892e-05, "loss": 0.1974, "step": 6489 }, { "epoch": 0.6726085604725879, "grad_norm": 0.6480139493942261, "learning_rate": 1.0229552146175543e-05, "loss": 0.2248, "step": 6490 }, { "epoch": 0.6727121981552493, "grad_norm": 0.5608558654785156, "learning_rate": 1.022369480394606e-05, "loss": 0.1878, "step": 6491 }, { "epoch": 0.6728158358379107, "grad_norm": 0.6115498542785645, "learning_rate": 1.0217838563295456e-05, "loss": 0.1926, "step": 6492 }, { "epoch": 0.6729194735205721, "grad_norm": 0.6429728269577026, "learning_rate": 1.0211983424883598e-05, "loss": 0.2051, "step": 6493 }, { "epoch": 0.6730231112032335, "grad_norm": 0.6617724895477295, "learning_rate": 1.0206129389370222e-05, "loss": 0.2178, "step": 6494 }, { "epoch": 0.673126748885895, "grad_norm": 0.6400306820869446, "learning_rate": 1.020027645741497e-05, "loss": 0.2006, "step": 6495 }, { "epoch": 0.6732303865685563, "grad_norm": 0.5858351588249207, "learning_rate": 1.0194424629677328e-05, "loss": 0.1879, "step": 6496 }, { "epoch": 0.6733340242512177, "grad_norm": 0.6285557150840759, "learning_rate": 1.0188573906816672e-05, "loss": 0.186, "step": 6497 }, { "epoch": 0.6734376619338791, "grad_norm": 0.5560044050216675, "learning_rate": 1.0182724289492265e-05, "loss": 0.1782, "step": 6498 }, { "epoch": 0.6735412996165405, "grad_norm": 0.5952905416488647, "learning_rate": 1.0176875778363225e-05, "loss": 0.2016, "step": 6499 }, { "epoch": 0.6736449372992019, "grad_norm": 0.5803484320640564, "learning_rate": 1.0171028374088552e-05, "loss": 0.1996, "step": 6500 }, { "epoch": 0.6737485749818634, "grad_norm": 0.7529601454734802, "learning_rate": 1.0165182077327111e-05, "loss": 0.228, "step": 6501 }, { "epoch": 0.6738522126645248, "grad_norm": 0.6083478331565857, "learning_rate": 1.015933688873767e-05, "loss": 0.203, "step": 6502 }, { "epoch": 0.6739558503471862, "grad_norm": 0.6588034629821777, "learning_rate": 1.0153492808978855e-05, "loss": 0.1929, "step": 6503 }, { "epoch": 0.6740594880298476, "grad_norm": 0.7018873691558838, "learning_rate": 1.0147649838709154e-05, "loss": 0.2466, "step": 6504 }, { "epoch": 0.674163125712509, "grad_norm": 0.614115297794342, "learning_rate": 1.0141807978586967e-05, "loss": 0.1956, "step": 6505 }, { "epoch": 0.6742667633951704, "grad_norm": 0.5698854923248291, "learning_rate": 1.0135967229270527e-05, "loss": 0.173, "step": 6506 }, { "epoch": 0.6743704010778319, "grad_norm": 0.5453628897666931, "learning_rate": 1.013012759141796e-05, "loss": 0.1696, "step": 6507 }, { "epoch": 0.6744740387604933, "grad_norm": 0.6856065988540649, "learning_rate": 1.012428906568728e-05, "loss": 0.2373, "step": 6508 }, { "epoch": 0.6745776764431547, "grad_norm": 0.6497329473495483, "learning_rate": 1.011845165273635e-05, "loss": 0.2016, "step": 6509 }, { "epoch": 0.6746813141258161, "grad_norm": 0.6395732164382935, "learning_rate": 1.0112615353222934e-05, "loss": 0.2267, "step": 6510 }, { "epoch": 0.6747849518084775, "grad_norm": 0.6444329619407654, "learning_rate": 1.0106780167804642e-05, "loss": 0.1958, "step": 6511 }, { "epoch": 0.674888589491139, "grad_norm": 0.5601248145103455, "learning_rate": 1.0100946097138988e-05, "loss": 0.1757, "step": 6512 }, { "epoch": 0.6749922271738004, "grad_norm": 0.7075749635696411, "learning_rate": 1.009511314188334e-05, "loss": 0.2391, "step": 6513 }, { "epoch": 0.6750958648564618, "grad_norm": 0.8617693781852722, "learning_rate": 1.0089281302694938e-05, "loss": 0.2913, "step": 6514 }, { "epoch": 0.6751995025391232, "grad_norm": 0.6507748961448669, "learning_rate": 1.008345058023092e-05, "loss": 0.178, "step": 6515 }, { "epoch": 0.6753031402217846, "grad_norm": 0.6890247464179993, "learning_rate": 1.0077620975148266e-05, "loss": 0.2301, "step": 6516 }, { "epoch": 0.675406777904446, "grad_norm": 0.6728772521018982, "learning_rate": 1.0071792488103858e-05, "loss": 0.2165, "step": 6517 }, { "epoch": 0.6755104155871074, "grad_norm": 0.5938656330108643, "learning_rate": 1.0065965119754444e-05, "loss": 0.2102, "step": 6518 }, { "epoch": 0.6756140532697689, "grad_norm": 0.6155257821083069, "learning_rate": 1.0060138870756639e-05, "loss": 0.1723, "step": 6519 }, { "epoch": 0.6757176909524303, "grad_norm": 0.6233095526695251, "learning_rate": 1.0054313741766935e-05, "loss": 0.1971, "step": 6520 }, { "epoch": 0.6758213286350917, "grad_norm": 0.63090980052948, "learning_rate": 1.0048489733441689e-05, "loss": 0.1919, "step": 6521 }, { "epoch": 0.6759249663177531, "grad_norm": 0.6411224603652954, "learning_rate": 1.0042666846437151e-05, "loss": 0.2079, "step": 6522 }, { "epoch": 0.6760286040004145, "grad_norm": 0.5687317252159119, "learning_rate": 1.0036845081409441e-05, "loss": 0.1825, "step": 6523 }, { "epoch": 0.676132241683076, "grad_norm": 0.625381350517273, "learning_rate": 1.003102443901454e-05, "loss": 0.1768, "step": 6524 }, { "epoch": 0.6762358793657374, "grad_norm": 0.6624096632003784, "learning_rate": 1.0025204919908304e-05, "loss": 0.2164, "step": 6525 }, { "epoch": 0.6763395170483988, "grad_norm": 0.7490399479866028, "learning_rate": 1.0019386524746468e-05, "loss": 0.2266, "step": 6526 }, { "epoch": 0.6764431547310602, "grad_norm": 0.608364462852478, "learning_rate": 1.0013569254184644e-05, "loss": 0.2014, "step": 6527 }, { "epoch": 0.6765467924137216, "grad_norm": 0.4916069209575653, "learning_rate": 1.0007753108878315e-05, "loss": 0.156, "step": 6528 }, { "epoch": 0.676650430096383, "grad_norm": 0.7098358273506165, "learning_rate": 1.000193808948283e-05, "loss": 0.2104, "step": 6529 }, { "epoch": 0.6767540677790445, "grad_norm": 0.642331600189209, "learning_rate": 9.996124196653425e-06, "loss": 0.2237, "step": 6530 }, { "epoch": 0.6768577054617059, "grad_norm": 0.6876441240310669, "learning_rate": 9.990311431045192e-06, "loss": 0.2116, "step": 6531 }, { "epoch": 0.6769613431443673, "grad_norm": 0.6571114659309387, "learning_rate": 9.984499793313101e-06, "loss": 0.2079, "step": 6532 }, { "epoch": 0.6770649808270287, "grad_norm": 0.588650107383728, "learning_rate": 9.978689284112011e-06, "loss": 0.2035, "step": 6533 }, { "epoch": 0.6771686185096901, "grad_norm": 0.5409191250801086, "learning_rate": 9.972879904096627e-06, "loss": 0.1662, "step": 6534 }, { "epoch": 0.6772722561923515, "grad_norm": 0.6306576728820801, "learning_rate": 9.967071653921553e-06, "loss": 0.2059, "step": 6535 }, { "epoch": 0.677375893875013, "grad_norm": 0.520565927028656, "learning_rate": 9.961264534241244e-06, "loss": 0.1732, "step": 6536 }, { "epoch": 0.6774795315576744, "grad_norm": 0.6069191098213196, "learning_rate": 9.955458545710048e-06, "loss": 0.1992, "step": 6537 }, { "epoch": 0.6775831692403358, "grad_norm": 0.4916059076786041, "learning_rate": 9.949653688982168e-06, "loss": 0.1414, "step": 6538 }, { "epoch": 0.6776868069229972, "grad_norm": 0.7133325338363647, "learning_rate": 9.94384996471168e-06, "loss": 0.2446, "step": 6539 }, { "epoch": 0.6777904446056586, "grad_norm": 0.6343425512313843, "learning_rate": 9.938047373552554e-06, "loss": 0.1779, "step": 6540 }, { "epoch": 0.67789408228832, "grad_norm": 0.6749465465545654, "learning_rate": 9.932245916158599e-06, "loss": 0.2319, "step": 6541 }, { "epoch": 0.6779977199709815, "grad_norm": 0.6080639958381653, "learning_rate": 9.926445593183524e-06, "loss": 0.2151, "step": 6542 }, { "epoch": 0.6781013576536429, "grad_norm": 0.7060633301734924, "learning_rate": 9.920646405280907e-06, "loss": 0.2051, "step": 6543 }, { "epoch": 0.6782049953363043, "grad_norm": 0.5446678996086121, "learning_rate": 9.914848353104185e-06, "loss": 0.1879, "step": 6544 }, { "epoch": 0.6783086330189657, "grad_norm": 0.5900073051452637, "learning_rate": 9.909051437306674e-06, "loss": 0.2155, "step": 6545 }, { "epoch": 0.6784122707016271, "grad_norm": 0.5862210392951965, "learning_rate": 9.903255658541551e-06, "loss": 0.1696, "step": 6546 }, { "epoch": 0.6785159083842885, "grad_norm": 0.6753283143043518, "learning_rate": 9.897461017461885e-06, "loss": 0.216, "step": 6547 }, { "epoch": 0.67861954606695, "grad_norm": 0.613251268863678, "learning_rate": 9.891667514720616e-06, "loss": 0.1948, "step": 6548 }, { "epoch": 0.6787231837496114, "grad_norm": 0.5268702507019043, "learning_rate": 9.885875150970527e-06, "loss": 0.15, "step": 6549 }, { "epoch": 0.6788268214322728, "grad_norm": 0.6268615126609802, "learning_rate": 9.880083926864321e-06, "loss": 0.2068, "step": 6550 }, { "epoch": 0.6789304591149342, "grad_norm": 0.6250087022781372, "learning_rate": 9.874293843054512e-06, "loss": 0.2036, "step": 6551 }, { "epoch": 0.6790340967975956, "grad_norm": 0.4842534363269806, "learning_rate": 9.86850490019353e-06, "loss": 0.1598, "step": 6552 }, { "epoch": 0.679137734480257, "grad_norm": 0.9720544219017029, "learning_rate": 9.862717098933675e-06, "loss": 0.1766, "step": 6553 }, { "epoch": 0.6792413721629185, "grad_norm": 0.6002216935157776, "learning_rate": 9.85693043992709e-06, "loss": 0.1951, "step": 6554 }, { "epoch": 0.6793450098455799, "grad_norm": 0.5289440751075745, "learning_rate": 9.851144923825823e-06, "loss": 0.1799, "step": 6555 }, { "epoch": 0.6794486475282413, "grad_norm": 0.6132556200027466, "learning_rate": 9.845360551281771e-06, "loss": 0.201, "step": 6556 }, { "epoch": 0.6795522852109027, "grad_norm": 0.659072756767273, "learning_rate": 9.839577322946697e-06, "loss": 0.2324, "step": 6557 }, { "epoch": 0.6796559228935641, "grad_norm": 0.5505070090293884, "learning_rate": 9.833795239472264e-06, "loss": 0.1905, "step": 6558 }, { "epoch": 0.6797595605762256, "grad_norm": 0.5536336898803711, "learning_rate": 9.82801430150997e-06, "loss": 0.1771, "step": 6559 }, { "epoch": 0.679863198258887, "grad_norm": 0.6265289187431335, "learning_rate": 9.82223450971122e-06, "loss": 0.2106, "step": 6560 }, { "epoch": 0.6799668359415484, "grad_norm": 0.5902706980705261, "learning_rate": 9.816455864727259e-06, "loss": 0.205, "step": 6561 }, { "epoch": 0.6800704736242098, "grad_norm": 0.5661330223083496, "learning_rate": 9.810678367209227e-06, "loss": 0.1659, "step": 6562 }, { "epoch": 0.6801741113068712, "grad_norm": 0.6367207169532776, "learning_rate": 9.804902017808116e-06, "loss": 0.2077, "step": 6563 }, { "epoch": 0.6802777489895326, "grad_norm": 0.7023079991340637, "learning_rate": 9.799126817174789e-06, "loss": 0.217, "step": 6564 }, { "epoch": 0.680381386672194, "grad_norm": 0.6935189962387085, "learning_rate": 9.793352765960004e-06, "loss": 0.2127, "step": 6565 }, { "epoch": 0.6804850243548555, "grad_norm": 0.5774267315864563, "learning_rate": 9.787579864814354e-06, "loss": 0.185, "step": 6566 }, { "epoch": 0.6805886620375169, "grad_norm": 0.5946171879768372, "learning_rate": 9.781808114388329e-06, "loss": 0.1977, "step": 6567 }, { "epoch": 0.6806922997201783, "grad_norm": 0.6323895454406738, "learning_rate": 9.776037515332291e-06, "loss": 0.2236, "step": 6568 }, { "epoch": 0.6807959374028397, "grad_norm": 0.5324143767356873, "learning_rate": 9.77026806829645e-06, "loss": 0.174, "step": 6569 }, { "epoch": 0.6808995750855011, "grad_norm": 0.6232479810714722, "learning_rate": 9.764499773930902e-06, "loss": 0.1951, "step": 6570 }, { "epoch": 0.6810032127681626, "grad_norm": 0.6016594767570496, "learning_rate": 9.7587326328856e-06, "loss": 0.1881, "step": 6571 }, { "epoch": 0.6811068504508239, "grad_norm": 0.6569531559944153, "learning_rate": 9.752966645810384e-06, "loss": 0.204, "step": 6572 }, { "epoch": 0.6812104881334853, "grad_norm": 0.6725072264671326, "learning_rate": 9.747201813354965e-06, "loss": 0.2137, "step": 6573 }, { "epoch": 0.6813141258161467, "grad_norm": 0.5521656274795532, "learning_rate": 9.741438136168902e-06, "loss": 0.1794, "step": 6574 }, { "epoch": 0.6814177634988081, "grad_norm": 0.5544648766517639, "learning_rate": 9.735675614901648e-06, "loss": 0.1838, "step": 6575 }, { "epoch": 0.6815214011814695, "grad_norm": 0.650189995765686, "learning_rate": 9.729914250202507e-06, "loss": 0.2, "step": 6576 }, { "epoch": 0.681625038864131, "grad_norm": 0.4788687229156494, "learning_rate": 9.724154042720659e-06, "loss": 0.145, "step": 6577 }, { "epoch": 0.6817286765467924, "grad_norm": 0.5694932341575623, "learning_rate": 9.718394993105167e-06, "loss": 0.193, "step": 6578 }, { "epoch": 0.6818323142294538, "grad_norm": 0.6239579319953918, "learning_rate": 9.712637102004936e-06, "loss": 0.1809, "step": 6579 }, { "epoch": 0.6819359519121152, "grad_norm": 0.6551296710968018, "learning_rate": 9.70688037006877e-06, "loss": 0.2346, "step": 6580 }, { "epoch": 0.6820395895947766, "grad_norm": 0.6136123538017273, "learning_rate": 9.701124797945318e-06, "loss": 0.199, "step": 6581 }, { "epoch": 0.682143227277438, "grad_norm": 0.5647212266921997, "learning_rate": 9.695370386283121e-06, "loss": 0.1813, "step": 6582 }, { "epoch": 0.6822468649600995, "grad_norm": 0.6047530174255371, "learning_rate": 9.689617135730566e-06, "loss": 0.1863, "step": 6583 }, { "epoch": 0.6823505026427609, "grad_norm": 0.5722807049751282, "learning_rate": 9.683865046935923e-06, "loss": 0.1696, "step": 6584 }, { "epoch": 0.6824541403254223, "grad_norm": 0.597629189491272, "learning_rate": 9.678114120547333e-06, "loss": 0.2195, "step": 6585 }, { "epoch": 0.6825577780080837, "grad_norm": 0.6136654019355774, "learning_rate": 9.672364357212793e-06, "loss": 0.2049, "step": 6586 }, { "epoch": 0.6826614156907451, "grad_norm": 0.7166646718978882, "learning_rate": 9.666615757580189e-06, "loss": 0.2406, "step": 6587 }, { "epoch": 0.6827650533734065, "grad_norm": 0.6847596168518066, "learning_rate": 9.66086832229726e-06, "loss": 0.2443, "step": 6588 }, { "epoch": 0.682868691056068, "grad_norm": 0.7182942628860474, "learning_rate": 9.655122052011604e-06, "loss": 0.2095, "step": 6589 }, { "epoch": 0.6829723287387294, "grad_norm": 0.6858102083206177, "learning_rate": 9.649376947370724e-06, "loss": 0.2029, "step": 6590 }, { "epoch": 0.6830759664213908, "grad_norm": 0.6318065524101257, "learning_rate": 9.643633009021952e-06, "loss": 0.217, "step": 6591 }, { "epoch": 0.6831796041040522, "grad_norm": 0.5490069389343262, "learning_rate": 9.637890237612512e-06, "loss": 0.1983, "step": 6592 }, { "epoch": 0.6832832417867136, "grad_norm": 0.6092766523361206, "learning_rate": 9.6321486337895e-06, "loss": 0.2206, "step": 6593 }, { "epoch": 0.683386879469375, "grad_norm": 0.6065627336502075, "learning_rate": 9.626408198199864e-06, "loss": 0.1804, "step": 6594 }, { "epoch": 0.6834905171520365, "grad_norm": 0.5605596303939819, "learning_rate": 9.620668931490425e-06, "loss": 0.1733, "step": 6595 }, { "epoch": 0.6835941548346979, "grad_norm": 0.6768938899040222, "learning_rate": 9.614930834307867e-06, "loss": 0.2123, "step": 6596 }, { "epoch": 0.6836977925173593, "grad_norm": 0.6282684803009033, "learning_rate": 9.609193907298762e-06, "loss": 0.1992, "step": 6597 }, { "epoch": 0.6838014302000207, "grad_norm": 0.512245774269104, "learning_rate": 9.60345815110954e-06, "loss": 0.1934, "step": 6598 }, { "epoch": 0.6839050678826821, "grad_norm": 0.6321278214454651, "learning_rate": 9.597723566386484e-06, "loss": 0.1997, "step": 6599 }, { "epoch": 0.6840087055653435, "grad_norm": 0.56236732006073, "learning_rate": 9.591990153775774e-06, "loss": 0.1812, "step": 6600 }, { "epoch": 0.684112343248005, "grad_norm": 0.5921213626861572, "learning_rate": 9.586257913923433e-06, "loss": 0.1733, "step": 6601 }, { "epoch": 0.6842159809306664, "grad_norm": 0.6034637093544006, "learning_rate": 9.580526847475356e-06, "loss": 0.2052, "step": 6602 }, { "epoch": 0.6843196186133278, "grad_norm": 0.6185294389724731, "learning_rate": 9.574796955077323e-06, "loss": 0.2122, "step": 6603 }, { "epoch": 0.6844232562959892, "grad_norm": 0.6712292432785034, "learning_rate": 9.569068237374955e-06, "loss": 0.1815, "step": 6604 }, { "epoch": 0.6845268939786506, "grad_norm": 0.6129352450370789, "learning_rate": 9.563340695013772e-06, "loss": 0.211, "step": 6605 }, { "epoch": 0.684630531661312, "grad_norm": 0.6321895718574524, "learning_rate": 9.557614328639127e-06, "loss": 0.1953, "step": 6606 }, { "epoch": 0.6847341693439735, "grad_norm": 0.48475220799446106, "learning_rate": 9.551889138896274e-06, "loss": 0.1606, "step": 6607 }, { "epoch": 0.6848378070266349, "grad_norm": 0.5537219643592834, "learning_rate": 9.546165126430309e-06, "loss": 0.1823, "step": 6608 }, { "epoch": 0.6849414447092963, "grad_norm": 0.8098316192626953, "learning_rate": 9.540442291886201e-06, "loss": 0.2273, "step": 6609 }, { "epoch": 0.6850450823919577, "grad_norm": 0.6700147390365601, "learning_rate": 9.534720635908803e-06, "loss": 0.2132, "step": 6610 }, { "epoch": 0.6851487200746191, "grad_norm": 0.6605697274208069, "learning_rate": 9.529000159142806e-06, "loss": 0.2098, "step": 6611 }, { "epoch": 0.6852523577572806, "grad_norm": 0.5277429223060608, "learning_rate": 9.523280862232795e-06, "loss": 0.1619, "step": 6612 }, { "epoch": 0.685355995439942, "grad_norm": 0.6423800587654114, "learning_rate": 9.517562745823228e-06, "loss": 0.1979, "step": 6613 }, { "epoch": 0.6854596331226034, "grad_norm": 0.5679594874382019, "learning_rate": 9.511845810558376e-06, "loss": 0.1915, "step": 6614 }, { "epoch": 0.6855632708052648, "grad_norm": 0.6598084568977356, "learning_rate": 9.506130057082444e-06, "loss": 0.2111, "step": 6615 }, { "epoch": 0.6856669084879262, "grad_norm": 0.6002801656723022, "learning_rate": 9.500415486039456e-06, "loss": 0.1962, "step": 6616 }, { "epoch": 0.6857705461705876, "grad_norm": 0.6224452257156372, "learning_rate": 9.49470209807333e-06, "loss": 0.2008, "step": 6617 }, { "epoch": 0.685874183853249, "grad_norm": 0.5566638708114624, "learning_rate": 9.488989893827847e-06, "loss": 0.1935, "step": 6618 }, { "epoch": 0.6859778215359105, "grad_norm": 0.6481515169143677, "learning_rate": 9.483278873946643e-06, "loss": 0.2004, "step": 6619 }, { "epoch": 0.6860814592185719, "grad_norm": 0.5335357785224915, "learning_rate": 9.477569039073227e-06, "loss": 0.1573, "step": 6620 }, { "epoch": 0.6861850969012333, "grad_norm": 0.6524526476860046, "learning_rate": 9.471860389850967e-06, "loss": 0.2323, "step": 6621 }, { "epoch": 0.6862887345838947, "grad_norm": 0.6136525273323059, "learning_rate": 9.46615292692311e-06, "loss": 0.1873, "step": 6622 }, { "epoch": 0.6863923722665561, "grad_norm": 0.6165292859077454, "learning_rate": 9.460446650932778e-06, "loss": 0.1827, "step": 6623 }, { "epoch": 0.6864960099492176, "grad_norm": 0.6506378054618835, "learning_rate": 9.454741562522922e-06, "loss": 0.1998, "step": 6624 }, { "epoch": 0.686599647631879, "grad_norm": 0.6071855425834656, "learning_rate": 9.449037662336405e-06, "loss": 0.2088, "step": 6625 }, { "epoch": 0.6867032853145404, "grad_norm": 0.6445494294166565, "learning_rate": 9.44333495101592e-06, "loss": 0.186, "step": 6626 }, { "epoch": 0.6868069229972018, "grad_norm": 0.6092303991317749, "learning_rate": 9.437633429204033e-06, "loss": 0.1988, "step": 6627 }, { "epoch": 0.6869105606798632, "grad_norm": 0.5655484795570374, "learning_rate": 9.431933097543202e-06, "loss": 0.1717, "step": 6628 }, { "epoch": 0.6870141983625246, "grad_norm": 0.6163622736930847, "learning_rate": 9.426233956675712e-06, "loss": 0.179, "step": 6629 }, { "epoch": 0.6871178360451861, "grad_norm": 0.5814242959022522, "learning_rate": 9.420536007243744e-06, "loss": 0.1849, "step": 6630 }, { "epoch": 0.6872214737278475, "grad_norm": 0.5335648059844971, "learning_rate": 9.414839249889338e-06, "loss": 0.1771, "step": 6631 }, { "epoch": 0.6873251114105089, "grad_norm": 0.6043835282325745, "learning_rate": 9.409143685254391e-06, "loss": 0.1998, "step": 6632 }, { "epoch": 0.6874287490931703, "grad_norm": 0.6903834939002991, "learning_rate": 9.403449313980671e-06, "loss": 0.2307, "step": 6633 }, { "epoch": 0.6875323867758317, "grad_norm": 0.6415154933929443, "learning_rate": 9.397756136709801e-06, "loss": 0.1853, "step": 6634 }, { "epoch": 0.6876360244584931, "grad_norm": 0.6361954808235168, "learning_rate": 9.392064154083288e-06, "loss": 0.2021, "step": 6635 }, { "epoch": 0.6877396621411546, "grad_norm": 0.6217038631439209, "learning_rate": 9.386373366742505e-06, "loss": 0.2018, "step": 6636 }, { "epoch": 0.687843299823816, "grad_norm": 0.5349450707435608, "learning_rate": 9.380683775328662e-06, "loss": 0.1548, "step": 6637 }, { "epoch": 0.6879469375064774, "grad_norm": 0.657341718673706, "learning_rate": 9.374995380482872e-06, "loss": 0.2109, "step": 6638 }, { "epoch": 0.6880505751891388, "grad_norm": 0.6744164824485779, "learning_rate": 9.369308182846086e-06, "loss": 0.179, "step": 6639 }, { "epoch": 0.6881542128718002, "grad_norm": 0.5822855234146118, "learning_rate": 9.36362218305912e-06, "loss": 0.1793, "step": 6640 }, { "epoch": 0.6882578505544616, "grad_norm": 0.6837253570556641, "learning_rate": 9.35793738176268e-06, "loss": 0.2267, "step": 6641 }, { "epoch": 0.6883614882371231, "grad_norm": 0.7134829163551331, "learning_rate": 9.352253779597305e-06, "loss": 0.2496, "step": 6642 }, { "epoch": 0.6884651259197845, "grad_norm": 0.542172908782959, "learning_rate": 9.346571377203428e-06, "loss": 0.1805, "step": 6643 }, { "epoch": 0.6885687636024459, "grad_norm": 0.634016752243042, "learning_rate": 9.34089017522132e-06, "loss": 0.1763, "step": 6644 }, { "epoch": 0.6886724012851073, "grad_norm": 0.6878634095191956, "learning_rate": 9.335210174291145e-06, "loss": 0.2227, "step": 6645 }, { "epoch": 0.6887760389677687, "grad_norm": 0.6338194608688354, "learning_rate": 9.32953137505291e-06, "loss": 0.2074, "step": 6646 }, { "epoch": 0.6888796766504302, "grad_norm": 0.588844895362854, "learning_rate": 9.32385377814648e-06, "loss": 0.1972, "step": 6647 }, { "epoch": 0.6889833143330915, "grad_norm": 0.6829007863998413, "learning_rate": 9.318177384211621e-06, "loss": 0.2387, "step": 6648 }, { "epoch": 0.6890869520157529, "grad_norm": 0.601935625076294, "learning_rate": 9.312502193887922e-06, "loss": 0.1743, "step": 6649 }, { "epoch": 0.6891905896984143, "grad_norm": 0.6067055463790894, "learning_rate": 9.30682820781487e-06, "loss": 0.1696, "step": 6650 }, { "epoch": 0.6892942273810757, "grad_norm": 0.518624484539032, "learning_rate": 9.301155426631792e-06, "loss": 0.1541, "step": 6651 }, { "epoch": 0.6893978650637371, "grad_norm": 0.6755282282829285, "learning_rate": 9.29548385097788e-06, "loss": 0.2102, "step": 6652 }, { "epoch": 0.6895015027463985, "grad_norm": 0.6593364477157593, "learning_rate": 9.289813481492216e-06, "loss": 0.1932, "step": 6653 }, { "epoch": 0.68960514042906, "grad_norm": 0.6965281963348389, "learning_rate": 9.28414431881371e-06, "loss": 0.2365, "step": 6654 }, { "epoch": 0.6897087781117214, "grad_norm": 0.6095872521400452, "learning_rate": 9.278476363581166e-06, "loss": 0.2224, "step": 6655 }, { "epoch": 0.6898124157943828, "grad_norm": 0.6280148029327393, "learning_rate": 9.272809616433245e-06, "loss": 0.1917, "step": 6656 }, { "epoch": 0.6899160534770442, "grad_norm": 0.6921609044075012, "learning_rate": 9.267144078008462e-06, "loss": 0.1953, "step": 6657 }, { "epoch": 0.6900196911597056, "grad_norm": 0.7111772894859314, "learning_rate": 9.261479748945199e-06, "loss": 0.2258, "step": 6658 }, { "epoch": 0.690123328842367, "grad_norm": 0.6491702198982239, "learning_rate": 9.255816629881698e-06, "loss": 0.187, "step": 6659 }, { "epoch": 0.6902269665250285, "grad_norm": 0.6895001530647278, "learning_rate": 9.250154721456075e-06, "loss": 0.2074, "step": 6660 }, { "epoch": 0.6903306042076899, "grad_norm": 0.6911149621009827, "learning_rate": 9.244494024306315e-06, "loss": 0.1962, "step": 6661 }, { "epoch": 0.6904342418903513, "grad_norm": 0.5192302465438843, "learning_rate": 9.23883453907024e-06, "loss": 0.1676, "step": 6662 }, { "epoch": 0.6905378795730127, "grad_norm": 0.6603487133979797, "learning_rate": 9.233176266385571e-06, "loss": 0.2041, "step": 6663 }, { "epoch": 0.6906415172556741, "grad_norm": 0.5469902157783508, "learning_rate": 9.22751920688986e-06, "loss": 0.1585, "step": 6664 }, { "epoch": 0.6907451549383355, "grad_norm": 0.6095395088195801, "learning_rate": 9.221863361220534e-06, "loss": 0.2082, "step": 6665 }, { "epoch": 0.690848792620997, "grad_norm": 0.7023966312408447, "learning_rate": 9.216208730014895e-06, "loss": 0.2116, "step": 6666 }, { "epoch": 0.6909524303036584, "grad_norm": 0.5786901116371155, "learning_rate": 9.210555313910086e-06, "loss": 0.1756, "step": 6667 }, { "epoch": 0.6910560679863198, "grad_norm": 0.6502581238746643, "learning_rate": 9.20490311354314e-06, "loss": 0.1929, "step": 6668 }, { "epoch": 0.6911597056689812, "grad_norm": 0.6279124617576599, "learning_rate": 9.199252129550922e-06, "loss": 0.1784, "step": 6669 }, { "epoch": 0.6912633433516426, "grad_norm": 0.6859745383262634, "learning_rate": 9.193602362570188e-06, "loss": 0.2056, "step": 6670 }, { "epoch": 0.691366981034304, "grad_norm": 0.6891130208969116, "learning_rate": 9.187953813237544e-06, "loss": 0.2161, "step": 6671 }, { "epoch": 0.6914706187169655, "grad_norm": 0.685141921043396, "learning_rate": 9.18230648218945e-06, "loss": 0.2127, "step": 6672 }, { "epoch": 0.6915742563996269, "grad_norm": 0.5688890814781189, "learning_rate": 9.17666037006225e-06, "loss": 0.1718, "step": 6673 }, { "epoch": 0.6916778940822883, "grad_norm": 0.6026402115821838, "learning_rate": 9.171015477492129e-06, "loss": 0.1706, "step": 6674 }, { "epoch": 0.6917815317649497, "grad_norm": 0.6500517725944519, "learning_rate": 9.165371805115151e-06, "loss": 0.1848, "step": 6675 }, { "epoch": 0.6918851694476111, "grad_norm": 0.5920248627662659, "learning_rate": 9.159729353567248e-06, "loss": 0.2026, "step": 6676 }, { "epoch": 0.6919888071302726, "grad_norm": 0.48986637592315674, "learning_rate": 9.154088123484175e-06, "loss": 0.1551, "step": 6677 }, { "epoch": 0.692092444812934, "grad_norm": 0.6007040739059448, "learning_rate": 9.1484481155016e-06, "loss": 0.1839, "step": 6678 }, { "epoch": 0.6921960824955954, "grad_norm": 0.4834362268447876, "learning_rate": 9.142809330255015e-06, "loss": 0.1471, "step": 6679 }, { "epoch": 0.6922997201782568, "grad_norm": 0.5714542269706726, "learning_rate": 9.137171768379796e-06, "loss": 0.1876, "step": 6680 }, { "epoch": 0.6924033578609182, "grad_norm": 0.5275331139564514, "learning_rate": 9.131535430511185e-06, "loss": 0.1654, "step": 6681 }, { "epoch": 0.6925069955435796, "grad_norm": 0.6573442816734314, "learning_rate": 9.125900317284265e-06, "loss": 0.211, "step": 6682 }, { "epoch": 0.6926106332262411, "grad_norm": 0.722928524017334, "learning_rate": 9.120266429333993e-06, "loss": 0.2449, "step": 6683 }, { "epoch": 0.6927142709089025, "grad_norm": 0.6090424656867981, "learning_rate": 9.114633767295178e-06, "loss": 0.2074, "step": 6684 }, { "epoch": 0.6928179085915639, "grad_norm": 0.6295527219772339, "learning_rate": 9.10900233180251e-06, "loss": 0.1865, "step": 6685 }, { "epoch": 0.6929215462742253, "grad_norm": 0.664747953414917, "learning_rate": 9.103372123490538e-06, "loss": 0.2115, "step": 6686 }, { "epoch": 0.6930251839568867, "grad_norm": 0.6330031156539917, "learning_rate": 9.097743142993644e-06, "loss": 0.2085, "step": 6687 }, { "epoch": 0.6931288216395481, "grad_norm": 0.6022166013717651, "learning_rate": 9.092115390946117e-06, "loss": 0.2151, "step": 6688 }, { "epoch": 0.6932324593222096, "grad_norm": 0.6728755831718445, "learning_rate": 9.086488867982068e-06, "loss": 0.2074, "step": 6689 }, { "epoch": 0.693336097004871, "grad_norm": 0.5417996644973755, "learning_rate": 9.08086357473548e-06, "loss": 0.1755, "step": 6690 }, { "epoch": 0.6934397346875324, "grad_norm": 0.547081708908081, "learning_rate": 9.075239511840222e-06, "loss": 0.1851, "step": 6691 }, { "epoch": 0.6935433723701938, "grad_norm": 0.5020447373390198, "learning_rate": 9.069616679929982e-06, "loss": 0.1557, "step": 6692 }, { "epoch": 0.6936470100528552, "grad_norm": 0.6151479482650757, "learning_rate": 9.063995079638352e-06, "loss": 0.1753, "step": 6693 }, { "epoch": 0.6937506477355166, "grad_norm": 0.637016773223877, "learning_rate": 9.058374711598747e-06, "loss": 0.2052, "step": 6694 }, { "epoch": 0.6938542854181781, "grad_norm": 0.5873505473136902, "learning_rate": 9.052755576444479e-06, "loss": 0.1735, "step": 6695 }, { "epoch": 0.6939579231008395, "grad_norm": 0.5946987271308899, "learning_rate": 9.047137674808694e-06, "loss": 0.2131, "step": 6696 }, { "epoch": 0.6940615607835009, "grad_norm": 0.5622970461845398, "learning_rate": 9.041521007324403e-06, "loss": 0.1803, "step": 6697 }, { "epoch": 0.6941651984661623, "grad_norm": 0.5434853434562683, "learning_rate": 9.035905574624496e-06, "loss": 0.1699, "step": 6698 }, { "epoch": 0.6942688361488237, "grad_norm": 0.599409818649292, "learning_rate": 9.030291377341698e-06, "loss": 0.1842, "step": 6699 }, { "epoch": 0.6943724738314851, "grad_norm": 0.589805006980896, "learning_rate": 9.024678416108615e-06, "loss": 0.1921, "step": 6700 }, { "epoch": 0.6944761115141466, "grad_norm": 0.6243669390678406, "learning_rate": 9.019066691557714e-06, "loss": 0.2022, "step": 6701 }, { "epoch": 0.694579749196808, "grad_norm": 0.5878159403800964, "learning_rate": 9.013456204321307e-06, "loss": 0.1865, "step": 6702 }, { "epoch": 0.6946833868794694, "grad_norm": 0.565056562423706, "learning_rate": 9.007846955031575e-06, "loss": 0.1897, "step": 6703 }, { "epoch": 0.6947870245621308, "grad_norm": 0.48694732785224915, "learning_rate": 9.002238944320555e-06, "loss": 0.1539, "step": 6704 }, { "epoch": 0.6948906622447922, "grad_norm": 0.5932483077049255, "learning_rate": 8.996632172820155e-06, "loss": 0.1718, "step": 6705 }, { "epoch": 0.6949942999274537, "grad_norm": 0.6488050818443298, "learning_rate": 8.991026641162144e-06, "loss": 0.1962, "step": 6706 }, { "epoch": 0.6950979376101151, "grad_norm": 0.6209344863891602, "learning_rate": 8.985422349978127e-06, "loss": 0.1948, "step": 6707 }, { "epoch": 0.6952015752927765, "grad_norm": 0.5865502953529358, "learning_rate": 8.979819299899615e-06, "loss": 0.1798, "step": 6708 }, { "epoch": 0.6953052129754379, "grad_norm": 0.6794852018356323, "learning_rate": 8.974217491557916e-06, "loss": 0.2036, "step": 6709 }, { "epoch": 0.6954088506580993, "grad_norm": 0.6519509553909302, "learning_rate": 8.968616925584253e-06, "loss": 0.2042, "step": 6710 }, { "epoch": 0.6955124883407607, "grad_norm": 0.6843174695968628, "learning_rate": 8.963017602609691e-06, "loss": 0.2087, "step": 6711 }, { "epoch": 0.6956161260234222, "grad_norm": 0.8070350289344788, "learning_rate": 8.957419523265142e-06, "loss": 0.2181, "step": 6712 }, { "epoch": 0.6957197637060836, "grad_norm": 0.5602341890335083, "learning_rate": 8.951822688181405e-06, "loss": 0.1616, "step": 6713 }, { "epoch": 0.695823401388745, "grad_norm": 0.6749635934829712, "learning_rate": 8.946227097989108e-06, "loss": 0.1948, "step": 6714 }, { "epoch": 0.6959270390714064, "grad_norm": 0.680906355381012, "learning_rate": 8.940632753318755e-06, "loss": 0.2078, "step": 6715 }, { "epoch": 0.6960306767540678, "grad_norm": 0.6567285656929016, "learning_rate": 8.935039654800714e-06, "loss": 0.224, "step": 6716 }, { "epoch": 0.6961343144367292, "grad_norm": 0.5188596844673157, "learning_rate": 8.929447803065202e-06, "loss": 0.1859, "step": 6717 }, { "epoch": 0.6962379521193907, "grad_norm": 0.6843985319137573, "learning_rate": 8.923857198742305e-06, "loss": 0.2187, "step": 6718 }, { "epoch": 0.6963415898020521, "grad_norm": 0.6555632948875427, "learning_rate": 8.918267842461955e-06, "loss": 0.2246, "step": 6719 }, { "epoch": 0.6964452274847135, "grad_norm": 0.563077986240387, "learning_rate": 8.912679734853963e-06, "loss": 0.1897, "step": 6720 }, { "epoch": 0.6965488651673749, "grad_norm": 0.6430466175079346, "learning_rate": 8.907092876547984e-06, "loss": 0.1791, "step": 6721 }, { "epoch": 0.6966525028500363, "grad_norm": 0.6822545528411865, "learning_rate": 8.90150726817353e-06, "loss": 0.2179, "step": 6722 }, { "epoch": 0.6967561405326977, "grad_norm": 0.6235038638114929, "learning_rate": 8.89592291035999e-06, "loss": 0.208, "step": 6723 }, { "epoch": 0.696859778215359, "grad_norm": 0.6741113662719727, "learning_rate": 8.890339803736587e-06, "loss": 0.2042, "step": 6724 }, { "epoch": 0.6969634158980205, "grad_norm": 0.6065617799758911, "learning_rate": 8.884757948932426e-06, "loss": 0.1959, "step": 6725 }, { "epoch": 0.6970670535806819, "grad_norm": 0.6889532208442688, "learning_rate": 8.879177346576466e-06, "loss": 0.2158, "step": 6726 }, { "epoch": 0.6971706912633433, "grad_norm": 0.5668037533760071, "learning_rate": 8.873597997297516e-06, "loss": 0.1659, "step": 6727 }, { "epoch": 0.6972743289460047, "grad_norm": 0.5899565815925598, "learning_rate": 8.868019901724248e-06, "loss": 0.1864, "step": 6728 }, { "epoch": 0.6973779666286661, "grad_norm": 0.6486213803291321, "learning_rate": 8.862443060485184e-06, "loss": 0.2049, "step": 6729 }, { "epoch": 0.6974816043113276, "grad_norm": 0.597908079624176, "learning_rate": 8.856867474208724e-06, "loss": 0.1649, "step": 6730 }, { "epoch": 0.697585241993989, "grad_norm": 0.6301078796386719, "learning_rate": 8.851293143523118e-06, "loss": 0.1797, "step": 6731 }, { "epoch": 0.6976888796766504, "grad_norm": 0.5568992495536804, "learning_rate": 8.845720069056465e-06, "loss": 0.1658, "step": 6732 }, { "epoch": 0.6977925173593118, "grad_norm": 0.6819639801979065, "learning_rate": 8.840148251436741e-06, "loss": 0.2158, "step": 6733 }, { "epoch": 0.6978961550419732, "grad_norm": 0.5686694979667664, "learning_rate": 8.834577691291768e-06, "loss": 0.1897, "step": 6734 }, { "epoch": 0.6979997927246346, "grad_norm": 0.5983203053474426, "learning_rate": 8.829008389249212e-06, "loss": 0.1684, "step": 6735 }, { "epoch": 0.698103430407296, "grad_norm": 0.5979479551315308, "learning_rate": 8.823440345936633e-06, "loss": 0.1926, "step": 6736 }, { "epoch": 0.6982070680899575, "grad_norm": 0.6044715046882629, "learning_rate": 8.817873561981416e-06, "loss": 0.1793, "step": 6737 }, { "epoch": 0.6983107057726189, "grad_norm": 0.5939434170722961, "learning_rate": 8.812308038010828e-06, "loss": 0.1777, "step": 6738 }, { "epoch": 0.6984143434552803, "grad_norm": 0.6867238879203796, "learning_rate": 8.80674377465198e-06, "loss": 0.2102, "step": 6739 }, { "epoch": 0.6985179811379417, "grad_norm": 0.6145426630973816, "learning_rate": 8.801180772531836e-06, "loss": 0.2017, "step": 6740 }, { "epoch": 0.6986216188206031, "grad_norm": 0.6226168274879456, "learning_rate": 8.79561903227724e-06, "loss": 0.2076, "step": 6741 }, { "epoch": 0.6987252565032646, "grad_norm": 0.567448079586029, "learning_rate": 8.790058554514868e-06, "loss": 0.1658, "step": 6742 }, { "epoch": 0.698828894185926, "grad_norm": 0.7417593598365784, "learning_rate": 8.78449933987128e-06, "loss": 0.2276, "step": 6743 }, { "epoch": 0.6989325318685874, "grad_norm": 0.6055402755737305, "learning_rate": 8.778941388972861e-06, "loss": 0.2017, "step": 6744 }, { "epoch": 0.6990361695512488, "grad_norm": 0.56300288438797, "learning_rate": 8.773384702445893e-06, "loss": 0.1638, "step": 6745 }, { "epoch": 0.6991398072339102, "grad_norm": 0.6405076384544373, "learning_rate": 8.767829280916485e-06, "loss": 0.2077, "step": 6746 }, { "epoch": 0.6992434449165716, "grad_norm": 0.6768667697906494, "learning_rate": 8.762275125010602e-06, "loss": 0.2175, "step": 6747 }, { "epoch": 0.6993470825992331, "grad_norm": 0.6095232367515564, "learning_rate": 8.756722235354099e-06, "loss": 0.2166, "step": 6748 }, { "epoch": 0.6994507202818945, "grad_norm": 0.6138764023780823, "learning_rate": 8.751170612572648e-06, "loss": 0.1859, "step": 6749 }, { "epoch": 0.6995543579645559, "grad_norm": 0.7089101076126099, "learning_rate": 8.745620257291805e-06, "loss": 0.2385, "step": 6750 }, { "epoch": 0.6996579956472173, "grad_norm": 0.599244236946106, "learning_rate": 8.740071170136986e-06, "loss": 0.1867, "step": 6751 }, { "epoch": 0.6997616333298787, "grad_norm": 0.7416609525680542, "learning_rate": 8.734523351733442e-06, "loss": 0.2266, "step": 6752 }, { "epoch": 0.6998652710125401, "grad_norm": 0.5874851942062378, "learning_rate": 8.728976802706293e-06, "loss": 0.1634, "step": 6753 }, { "epoch": 0.6999689086952016, "grad_norm": 0.6908164024353027, "learning_rate": 8.72343152368051e-06, "loss": 0.2111, "step": 6754 }, { "epoch": 0.700072546377863, "grad_norm": 0.5676965713500977, "learning_rate": 8.717887515280934e-06, "loss": 0.1673, "step": 6755 }, { "epoch": 0.7001761840605244, "grad_norm": 0.745006263256073, "learning_rate": 8.712344778132262e-06, "loss": 0.2086, "step": 6756 }, { "epoch": 0.7002798217431858, "grad_norm": 0.6484946608543396, "learning_rate": 8.706803312859025e-06, "loss": 0.2198, "step": 6757 }, { "epoch": 0.7003834594258472, "grad_norm": 0.6044569611549377, "learning_rate": 8.701263120085643e-06, "loss": 0.1877, "step": 6758 }, { "epoch": 0.7004870971085086, "grad_norm": 0.6353676319122314, "learning_rate": 8.695724200436369e-06, "loss": 0.2428, "step": 6759 }, { "epoch": 0.7005907347911701, "grad_norm": 0.6398935317993164, "learning_rate": 8.690186554535312e-06, "loss": 0.1975, "step": 6760 }, { "epoch": 0.7006943724738315, "grad_norm": 0.5691301822662354, "learning_rate": 8.684650183006457e-06, "loss": 0.1759, "step": 6761 }, { "epoch": 0.7007980101564929, "grad_norm": 0.6124591827392578, "learning_rate": 8.679115086473625e-06, "loss": 0.1891, "step": 6762 }, { "epoch": 0.7009016478391543, "grad_norm": 0.6126314401626587, "learning_rate": 8.673581265560513e-06, "loss": 0.1912, "step": 6763 }, { "epoch": 0.7010052855218157, "grad_norm": 0.6804085373878479, "learning_rate": 8.66804872089065e-06, "loss": 0.1997, "step": 6764 }, { "epoch": 0.7011089232044772, "grad_norm": 0.5831896066665649, "learning_rate": 8.662517453087446e-06, "loss": 0.2051, "step": 6765 }, { "epoch": 0.7012125608871386, "grad_norm": 0.6971094608306885, "learning_rate": 8.656987462774153e-06, "loss": 0.2093, "step": 6766 }, { "epoch": 0.7013161985698, "grad_norm": 0.593682050704956, "learning_rate": 8.651458750573874e-06, "loss": 0.172, "step": 6767 }, { "epoch": 0.7014198362524614, "grad_norm": 0.6307997107505798, "learning_rate": 8.645931317109585e-06, "loss": 0.1916, "step": 6768 }, { "epoch": 0.7015234739351228, "grad_norm": 0.7226161956787109, "learning_rate": 8.6404051630041e-06, "loss": 0.215, "step": 6769 }, { "epoch": 0.7016271116177842, "grad_norm": 0.583858072757721, "learning_rate": 8.634880288880102e-06, "loss": 0.1554, "step": 6770 }, { "epoch": 0.7017307493004457, "grad_norm": 0.6536171436309814, "learning_rate": 8.62935669536014e-06, "loss": 0.22, "step": 6771 }, { "epoch": 0.7018343869831071, "grad_norm": 0.6236019730567932, "learning_rate": 8.623834383066576e-06, "loss": 0.1963, "step": 6772 }, { "epoch": 0.7019380246657685, "grad_norm": 0.6093345880508423, "learning_rate": 8.618313352621675e-06, "loss": 0.1875, "step": 6773 }, { "epoch": 0.7020416623484299, "grad_norm": 0.6570085287094116, "learning_rate": 8.612793604647525e-06, "loss": 0.1845, "step": 6774 }, { "epoch": 0.7021453000310913, "grad_norm": 0.5571804642677307, "learning_rate": 8.607275139766089e-06, "loss": 0.1607, "step": 6775 }, { "epoch": 0.7022489377137527, "grad_norm": 0.5573575496673584, "learning_rate": 8.60175795859919e-06, "loss": 0.1622, "step": 6776 }, { "epoch": 0.7023525753964142, "grad_norm": 0.5488677024841309, "learning_rate": 8.596242061768482e-06, "loss": 0.1491, "step": 6777 }, { "epoch": 0.7024562130790756, "grad_norm": 0.6735754013061523, "learning_rate": 8.590727449895495e-06, "loss": 0.1831, "step": 6778 }, { "epoch": 0.702559850761737, "grad_norm": 0.5725656151771545, "learning_rate": 8.585214123601593e-06, "loss": 0.1875, "step": 6779 }, { "epoch": 0.7026634884443984, "grad_norm": 0.6627886295318604, "learning_rate": 8.579702083508018e-06, "loss": 0.2066, "step": 6780 }, { "epoch": 0.7027671261270598, "grad_norm": 0.5476593375205994, "learning_rate": 8.574191330235868e-06, "loss": 0.1565, "step": 6781 }, { "epoch": 0.7028707638097212, "grad_norm": 0.6509799957275391, "learning_rate": 8.56868186440607e-06, "loss": 0.1828, "step": 6782 }, { "epoch": 0.7029744014923827, "grad_norm": 0.6372617483139038, "learning_rate": 8.563173686639436e-06, "loss": 0.2039, "step": 6783 }, { "epoch": 0.7030780391750441, "grad_norm": 0.6228369474411011, "learning_rate": 8.557666797556612e-06, "loss": 0.2162, "step": 6784 }, { "epoch": 0.7031816768577055, "grad_norm": 0.5669928789138794, "learning_rate": 8.5521611977781e-06, "loss": 0.1807, "step": 6785 }, { "epoch": 0.7032853145403669, "grad_norm": 1.0194318294525146, "learning_rate": 8.546656887924275e-06, "loss": 0.1848, "step": 6786 }, { "epoch": 0.7033889522230283, "grad_norm": 0.675552487373352, "learning_rate": 8.541153868615337e-06, "loss": 0.198, "step": 6787 }, { "epoch": 0.7034925899056897, "grad_norm": 0.6050292253494263, "learning_rate": 8.535652140471377e-06, "loss": 0.1967, "step": 6788 }, { "epoch": 0.7035962275883512, "grad_norm": 0.6931806206703186, "learning_rate": 8.530151704112307e-06, "loss": 0.2266, "step": 6789 }, { "epoch": 0.7036998652710126, "grad_norm": 0.6126047372817993, "learning_rate": 8.524652560157918e-06, "loss": 0.1797, "step": 6790 }, { "epoch": 0.703803502953674, "grad_norm": 0.6635711193084717, "learning_rate": 8.51915470922784e-06, "loss": 0.2182, "step": 6791 }, { "epoch": 0.7039071406363354, "grad_norm": 0.5589820742607117, "learning_rate": 8.513658151941552e-06, "loss": 0.1763, "step": 6792 }, { "epoch": 0.7040107783189968, "grad_norm": 0.6231314539909363, "learning_rate": 8.508162888918419e-06, "loss": 0.1962, "step": 6793 }, { "epoch": 0.7041144160016582, "grad_norm": 0.6989869475364685, "learning_rate": 8.50266892077762e-06, "loss": 0.2078, "step": 6794 }, { "epoch": 0.7042180536843197, "grad_norm": 0.6447907090187073, "learning_rate": 8.497176248138212e-06, "loss": 0.1918, "step": 6795 }, { "epoch": 0.7043216913669811, "grad_norm": 0.6743923425674438, "learning_rate": 8.49168487161911e-06, "loss": 0.2248, "step": 6796 }, { "epoch": 0.7044253290496425, "grad_norm": 0.7061793208122253, "learning_rate": 8.486194791839068e-06, "loss": 0.2346, "step": 6797 }, { "epoch": 0.7045289667323039, "grad_norm": 0.5488268136978149, "learning_rate": 8.480706009416697e-06, "loss": 0.182, "step": 6798 }, { "epoch": 0.7046326044149653, "grad_norm": 0.5685331225395203, "learning_rate": 8.475218524970459e-06, "loss": 0.2149, "step": 6799 }, { "epoch": 0.7047362420976266, "grad_norm": 0.8025184273719788, "learning_rate": 8.469732339118684e-06, "loss": 0.2606, "step": 6800 }, { "epoch": 0.7048398797802881, "grad_norm": 0.6012006998062134, "learning_rate": 8.46424745247955e-06, "loss": 0.1907, "step": 6801 }, { "epoch": 0.7049435174629495, "grad_norm": 0.6444013714790344, "learning_rate": 8.45876386567108e-06, "loss": 0.1938, "step": 6802 }, { "epoch": 0.7050471551456109, "grad_norm": 0.6376757025718689, "learning_rate": 8.453281579311156e-06, "loss": 0.203, "step": 6803 }, { "epoch": 0.7051507928282723, "grad_norm": 0.7025614976882935, "learning_rate": 8.44780059401751e-06, "loss": 0.2266, "step": 6804 }, { "epoch": 0.7052544305109337, "grad_norm": 0.6612352728843689, "learning_rate": 8.44232091040773e-06, "loss": 0.2203, "step": 6805 }, { "epoch": 0.7053580681935951, "grad_norm": 0.5750066637992859, "learning_rate": 8.436842529099275e-06, "loss": 0.178, "step": 6806 }, { "epoch": 0.7054617058762566, "grad_norm": 0.4683450162410736, "learning_rate": 8.431365450709419e-06, "loss": 0.1454, "step": 6807 }, { "epoch": 0.705565343558918, "grad_norm": 0.5653798580169678, "learning_rate": 8.425889675855327e-06, "loss": 0.1553, "step": 6808 }, { "epoch": 0.7056689812415794, "grad_norm": 0.5899336338043213, "learning_rate": 8.420415205153996e-06, "loss": 0.1706, "step": 6809 }, { "epoch": 0.7057726189242408, "grad_norm": 0.6194468140602112, "learning_rate": 8.414942039222268e-06, "loss": 0.1942, "step": 6810 }, { "epoch": 0.7058762566069022, "grad_norm": 0.6131260395050049, "learning_rate": 8.409470178676873e-06, "loss": 0.1838, "step": 6811 }, { "epoch": 0.7059798942895636, "grad_norm": 0.6971158385276794, "learning_rate": 8.403999624134352e-06, "loss": 0.2318, "step": 6812 }, { "epoch": 0.7060835319722251, "grad_norm": 0.5297203063964844, "learning_rate": 8.398530376211133e-06, "loss": 0.1457, "step": 6813 }, { "epoch": 0.7061871696548865, "grad_norm": 0.6779531836509705, "learning_rate": 8.393062435523471e-06, "loss": 0.2145, "step": 6814 }, { "epoch": 0.7062908073375479, "grad_norm": 0.6087656021118164, "learning_rate": 8.387595802687497e-06, "loss": 0.1806, "step": 6815 }, { "epoch": 0.7063944450202093, "grad_norm": 0.5969190001487732, "learning_rate": 8.382130478319174e-06, "loss": 0.1882, "step": 6816 }, { "epoch": 0.7064980827028707, "grad_norm": 0.6744171977043152, "learning_rate": 8.376666463034324e-06, "loss": 0.2035, "step": 6817 }, { "epoch": 0.7066017203855322, "grad_norm": 0.6547932028770447, "learning_rate": 8.371203757448634e-06, "loss": 0.1902, "step": 6818 }, { "epoch": 0.7067053580681936, "grad_norm": 0.49832016229629517, "learning_rate": 8.36574236217762e-06, "loss": 0.1507, "step": 6819 }, { "epoch": 0.706808995750855, "grad_norm": 0.5831357836723328, "learning_rate": 8.36028227783667e-06, "loss": 0.1786, "step": 6820 }, { "epoch": 0.7069126334335164, "grad_norm": 0.6416530013084412, "learning_rate": 8.354823505041028e-06, "loss": 0.2137, "step": 6821 }, { "epoch": 0.7070162711161778, "grad_norm": 0.6294683218002319, "learning_rate": 8.349366044405769e-06, "loss": 0.2052, "step": 6822 }, { "epoch": 0.7071199087988392, "grad_norm": 0.6354984045028687, "learning_rate": 8.343909896545827e-06, "loss": 0.1817, "step": 6823 }, { "epoch": 0.7072235464815007, "grad_norm": 0.5182710289955139, "learning_rate": 8.338455062076006e-06, "loss": 0.1478, "step": 6824 }, { "epoch": 0.7073271841641621, "grad_norm": 0.6225818991661072, "learning_rate": 8.33300154161093e-06, "loss": 0.2003, "step": 6825 }, { "epoch": 0.7074308218468235, "grad_norm": 0.4340483844280243, "learning_rate": 8.327549335765112e-06, "loss": 0.131, "step": 6826 }, { "epoch": 0.7075344595294849, "grad_norm": 0.7447500228881836, "learning_rate": 8.322098445152884e-06, "loss": 0.2315, "step": 6827 }, { "epoch": 0.7076380972121463, "grad_norm": 0.6974065899848938, "learning_rate": 8.316648870388455e-06, "loss": 0.1915, "step": 6828 }, { "epoch": 0.7077417348948077, "grad_norm": 0.5116464495658875, "learning_rate": 8.31120061208587e-06, "loss": 0.143, "step": 6829 }, { "epoch": 0.7078453725774692, "grad_norm": 0.5923205614089966, "learning_rate": 8.305753670859023e-06, "loss": 0.2009, "step": 6830 }, { "epoch": 0.7079490102601306, "grad_norm": 0.6276652216911316, "learning_rate": 8.300308047321679e-06, "loss": 0.1875, "step": 6831 }, { "epoch": 0.708052647942792, "grad_norm": 0.6970288753509521, "learning_rate": 8.294863742087432e-06, "loss": 0.2212, "step": 6832 }, { "epoch": 0.7081562856254534, "grad_norm": 0.5120554566383362, "learning_rate": 8.289420755769738e-06, "loss": 0.148, "step": 6833 }, { "epoch": 0.7082599233081148, "grad_norm": 0.6152527332305908, "learning_rate": 8.283979088981929e-06, "loss": 0.1847, "step": 6834 }, { "epoch": 0.7083635609907762, "grad_norm": 0.662187933921814, "learning_rate": 8.278538742337125e-06, "loss": 0.2004, "step": 6835 }, { "epoch": 0.7084671986734377, "grad_norm": 0.6494444012641907, "learning_rate": 8.273099716448362e-06, "loss": 0.2184, "step": 6836 }, { "epoch": 0.7085708363560991, "grad_norm": 0.6606298089027405, "learning_rate": 8.267662011928485e-06, "loss": 0.195, "step": 6837 }, { "epoch": 0.7086744740387605, "grad_norm": 0.6092416048049927, "learning_rate": 8.262225629390217e-06, "loss": 0.2103, "step": 6838 }, { "epoch": 0.7087781117214219, "grad_norm": 0.5877014398574829, "learning_rate": 8.256790569446123e-06, "loss": 0.1852, "step": 6839 }, { "epoch": 0.7088817494040833, "grad_norm": 0.6736811995506287, "learning_rate": 8.251356832708615e-06, "loss": 0.2091, "step": 6840 }, { "epoch": 0.7089853870867447, "grad_norm": 0.6421195268630981, "learning_rate": 8.245924419789953e-06, "loss": 0.1842, "step": 6841 }, { "epoch": 0.7090890247694062, "grad_norm": 0.6364426016807556, "learning_rate": 8.240493331302249e-06, "loss": 0.1877, "step": 6842 }, { "epoch": 0.7091926624520676, "grad_norm": 0.6009476184844971, "learning_rate": 8.235063567857475e-06, "loss": 0.1594, "step": 6843 }, { "epoch": 0.709296300134729, "grad_norm": 0.6266801953315735, "learning_rate": 8.22963513006746e-06, "loss": 0.2032, "step": 6844 }, { "epoch": 0.7093999378173904, "grad_norm": 0.6310063004493713, "learning_rate": 8.224208018543849e-06, "loss": 0.2144, "step": 6845 }, { "epoch": 0.7095035755000518, "grad_norm": 0.5849006175994873, "learning_rate": 8.218782233898183e-06, "loss": 0.2095, "step": 6846 }, { "epoch": 0.7096072131827132, "grad_norm": 0.6484225988388062, "learning_rate": 8.213357776741819e-06, "loss": 0.1889, "step": 6847 }, { "epoch": 0.7097108508653747, "grad_norm": 0.7030849456787109, "learning_rate": 8.207934647685972e-06, "loss": 0.2273, "step": 6848 }, { "epoch": 0.7098144885480361, "grad_norm": 0.645413339138031, "learning_rate": 8.202512847341724e-06, "loss": 0.2285, "step": 6849 }, { "epoch": 0.7099181262306975, "grad_norm": 0.5695993900299072, "learning_rate": 8.197092376319984e-06, "loss": 0.1934, "step": 6850 }, { "epoch": 0.7100217639133589, "grad_norm": 0.6163286566734314, "learning_rate": 8.191673235231532e-06, "loss": 0.1681, "step": 6851 }, { "epoch": 0.7101254015960203, "grad_norm": 0.5936559438705444, "learning_rate": 8.186255424686975e-06, "loss": 0.1889, "step": 6852 }, { "epoch": 0.7102290392786818, "grad_norm": 0.6233243942260742, "learning_rate": 8.180838945296803e-06, "loss": 0.1745, "step": 6853 }, { "epoch": 0.7103326769613432, "grad_norm": 0.6084573268890381, "learning_rate": 8.175423797671322e-06, "loss": 0.2057, "step": 6854 }, { "epoch": 0.7104363146440046, "grad_norm": 0.5781868100166321, "learning_rate": 8.170009982420699e-06, "loss": 0.1724, "step": 6855 }, { "epoch": 0.710539952326666, "grad_norm": 0.6366261839866638, "learning_rate": 8.16459750015497e-06, "loss": 0.1877, "step": 6856 }, { "epoch": 0.7106435900093274, "grad_norm": 0.6529333591461182, "learning_rate": 8.159186351483987e-06, "loss": 0.2034, "step": 6857 }, { "epoch": 0.7107472276919888, "grad_norm": 0.5903447866439819, "learning_rate": 8.153776537017482e-06, "loss": 0.2054, "step": 6858 }, { "epoch": 0.7108508653746503, "grad_norm": 0.6202003359794617, "learning_rate": 8.148368057365026e-06, "loss": 0.1741, "step": 6859 }, { "epoch": 0.7109545030573117, "grad_norm": 0.662723183631897, "learning_rate": 8.142960913136036e-06, "loss": 0.2178, "step": 6860 }, { "epoch": 0.7110581407399731, "grad_norm": 0.6324710845947266, "learning_rate": 8.137555104939776e-06, "loss": 0.2129, "step": 6861 }, { "epoch": 0.7111617784226345, "grad_norm": 0.5313311219215393, "learning_rate": 8.132150633385359e-06, "loss": 0.1504, "step": 6862 }, { "epoch": 0.7112654161052959, "grad_norm": 0.6284851431846619, "learning_rate": 8.12674749908176e-06, "loss": 0.1995, "step": 6863 }, { "epoch": 0.7113690537879573, "grad_norm": 0.8839542865753174, "learning_rate": 8.121345702637804e-06, "loss": 0.2332, "step": 6864 }, { "epoch": 0.7114726914706188, "grad_norm": 0.6175948977470398, "learning_rate": 8.115945244662148e-06, "loss": 0.2128, "step": 6865 }, { "epoch": 0.7115763291532802, "grad_norm": 0.6142639517784119, "learning_rate": 8.110546125763305e-06, "loss": 0.2062, "step": 6866 }, { "epoch": 0.7116799668359416, "grad_norm": 0.7615811824798584, "learning_rate": 8.105148346549638e-06, "loss": 0.2388, "step": 6867 }, { "epoch": 0.711783604518603, "grad_norm": 0.678072452545166, "learning_rate": 8.099751907629363e-06, "loss": 0.2006, "step": 6868 }, { "epoch": 0.7118872422012644, "grad_norm": 0.7236624360084534, "learning_rate": 8.094356809610554e-06, "loss": 0.2101, "step": 6869 }, { "epoch": 0.7119908798839258, "grad_norm": 0.5722697377204895, "learning_rate": 8.0889630531011e-06, "loss": 0.1766, "step": 6870 }, { "epoch": 0.7120945175665873, "grad_norm": 0.6453374624252319, "learning_rate": 8.083570638708785e-06, "loss": 0.2015, "step": 6871 }, { "epoch": 0.7121981552492487, "grad_norm": 0.717676043510437, "learning_rate": 8.078179567041201e-06, "loss": 0.2209, "step": 6872 }, { "epoch": 0.7123017929319101, "grad_norm": 0.6053109169006348, "learning_rate": 8.072789838705805e-06, "loss": 0.1759, "step": 6873 }, { "epoch": 0.7124054306145715, "grad_norm": 0.6611320376396179, "learning_rate": 8.067401454309917e-06, "loss": 0.21, "step": 6874 }, { "epoch": 0.7125090682972329, "grad_norm": 0.6562949419021606, "learning_rate": 8.062014414460677e-06, "loss": 0.2028, "step": 6875 }, { "epoch": 0.7126127059798942, "grad_norm": 0.6381715536117554, "learning_rate": 8.056628719765103e-06, "loss": 0.2144, "step": 6876 }, { "epoch": 0.7127163436625557, "grad_norm": 0.5687152147293091, "learning_rate": 8.051244370830029e-06, "loss": 0.1689, "step": 6877 }, { "epoch": 0.7128199813452171, "grad_norm": 0.6299018263816833, "learning_rate": 8.045861368262172e-06, "loss": 0.1954, "step": 6878 }, { "epoch": 0.7129236190278785, "grad_norm": 0.6874618530273438, "learning_rate": 8.040479712668071e-06, "loss": 0.2333, "step": 6879 }, { "epoch": 0.7130272567105399, "grad_norm": 0.5801635384559631, "learning_rate": 8.035099404654122e-06, "loss": 0.1564, "step": 6880 }, { "epoch": 0.7131308943932013, "grad_norm": 0.6684380173683167, "learning_rate": 8.029720444826576e-06, "loss": 0.2082, "step": 6881 }, { "epoch": 0.7132345320758627, "grad_norm": 0.6474950909614563, "learning_rate": 8.024342833791517e-06, "loss": 0.2106, "step": 6882 }, { "epoch": 0.7133381697585242, "grad_norm": 0.6856906414031982, "learning_rate": 8.018966572154889e-06, "loss": 0.199, "step": 6883 }, { "epoch": 0.7134418074411856, "grad_norm": 0.6510111689567566, "learning_rate": 8.013591660522494e-06, "loss": 0.2001, "step": 6884 }, { "epoch": 0.713545445123847, "grad_norm": 0.5542777180671692, "learning_rate": 8.008218099499952e-06, "loss": 0.1561, "step": 6885 }, { "epoch": 0.7136490828065084, "grad_norm": 0.6461411714553833, "learning_rate": 8.002845889692756e-06, "loss": 0.1928, "step": 6886 }, { "epoch": 0.7137527204891698, "grad_norm": 0.6065050959587097, "learning_rate": 7.997475031706228e-06, "loss": 0.1815, "step": 6887 }, { "epoch": 0.7138563581718312, "grad_norm": 0.5979995131492615, "learning_rate": 7.992105526145555e-06, "loss": 0.1972, "step": 6888 }, { "epoch": 0.7139599958544927, "grad_norm": 0.6340582966804504, "learning_rate": 7.98673737361577e-06, "loss": 0.1953, "step": 6889 }, { "epoch": 0.7140636335371541, "grad_norm": 0.614409327507019, "learning_rate": 7.981370574721739e-06, "loss": 0.1865, "step": 6890 }, { "epoch": 0.7141672712198155, "grad_norm": 0.7286944389343262, "learning_rate": 7.976005130068192e-06, "loss": 0.2428, "step": 6891 }, { "epoch": 0.7142709089024769, "grad_norm": 0.6045624017715454, "learning_rate": 7.970641040259696e-06, "loss": 0.2026, "step": 6892 }, { "epoch": 0.7143745465851383, "grad_norm": 0.7129000425338745, "learning_rate": 7.965278305900661e-06, "loss": 0.2338, "step": 6893 }, { "epoch": 0.7144781842677997, "grad_norm": 0.6371878981590271, "learning_rate": 7.959916927595366e-06, "loss": 0.1773, "step": 6894 }, { "epoch": 0.7145818219504612, "grad_norm": 0.5102376341819763, "learning_rate": 7.954556905947909e-06, "loss": 0.1615, "step": 6895 }, { "epoch": 0.7146854596331226, "grad_norm": 1.1365267038345337, "learning_rate": 7.94919824156226e-06, "loss": 0.2256, "step": 6896 }, { "epoch": 0.714789097315784, "grad_norm": 0.6125771403312683, "learning_rate": 7.94384093504222e-06, "loss": 0.1686, "step": 6897 }, { "epoch": 0.7148927349984454, "grad_norm": 0.7069416046142578, "learning_rate": 7.938484986991435e-06, "loss": 0.2137, "step": 6898 }, { "epoch": 0.7149963726811068, "grad_norm": 0.7099876403808594, "learning_rate": 7.933130398013419e-06, "loss": 0.1804, "step": 6899 }, { "epoch": 0.7151000103637682, "grad_norm": 0.5490723252296448, "learning_rate": 7.927777168711503e-06, "loss": 0.1791, "step": 6900 }, { "epoch": 0.7152036480464297, "grad_norm": 0.6424699425697327, "learning_rate": 7.922425299688895e-06, "loss": 0.1828, "step": 6901 }, { "epoch": 0.7153072857290911, "grad_norm": 0.6973537802696228, "learning_rate": 7.917074791548625e-06, "loss": 0.2145, "step": 6902 }, { "epoch": 0.7154109234117525, "grad_norm": 0.6093841791152954, "learning_rate": 7.91172564489359e-06, "loss": 0.1884, "step": 6903 }, { "epoch": 0.7155145610944139, "grad_norm": 0.6583380699157715, "learning_rate": 7.90637786032652e-06, "loss": 0.2057, "step": 6904 }, { "epoch": 0.7156181987770753, "grad_norm": 0.6568527817726135, "learning_rate": 7.901031438449982e-06, "loss": 0.1879, "step": 6905 }, { "epoch": 0.7157218364597367, "grad_norm": 0.6096395254135132, "learning_rate": 7.895686379866423e-06, "loss": 0.2051, "step": 6906 }, { "epoch": 0.7158254741423982, "grad_norm": 0.5662840604782104, "learning_rate": 7.890342685178098e-06, "loss": 0.1826, "step": 6907 }, { "epoch": 0.7159291118250596, "grad_norm": 0.6252114772796631, "learning_rate": 7.885000354987136e-06, "loss": 0.1981, "step": 6908 }, { "epoch": 0.716032749507721, "grad_norm": 0.662355899810791, "learning_rate": 7.879659389895506e-06, "loss": 0.2038, "step": 6909 }, { "epoch": 0.7161363871903824, "grad_norm": 0.6738420724868774, "learning_rate": 7.874319790505016e-06, "loss": 0.2336, "step": 6910 }, { "epoch": 0.7162400248730438, "grad_norm": 0.5899892449378967, "learning_rate": 7.86898155741732e-06, "loss": 0.1656, "step": 6911 }, { "epoch": 0.7163436625557053, "grad_norm": 0.6204176545143127, "learning_rate": 7.863644691233921e-06, "loss": 0.2036, "step": 6912 }, { "epoch": 0.7164473002383667, "grad_norm": 0.5711419582366943, "learning_rate": 7.858309192556168e-06, "loss": 0.1938, "step": 6913 }, { "epoch": 0.7165509379210281, "grad_norm": 0.7031733393669128, "learning_rate": 7.852975061985269e-06, "loss": 0.1909, "step": 6914 }, { "epoch": 0.7166545756036895, "grad_norm": 0.669396162033081, "learning_rate": 7.847642300122251e-06, "loss": 0.1892, "step": 6915 }, { "epoch": 0.7167582132863509, "grad_norm": 0.6639227271080017, "learning_rate": 7.842310907568014e-06, "loss": 0.1982, "step": 6916 }, { "epoch": 0.7168618509690123, "grad_norm": 0.6344635486602783, "learning_rate": 7.836980884923282e-06, "loss": 0.1984, "step": 6917 }, { "epoch": 0.7169654886516738, "grad_norm": 0.5545706152915955, "learning_rate": 7.831652232788632e-06, "loss": 0.1731, "step": 6918 }, { "epoch": 0.7170691263343352, "grad_norm": 0.6619438529014587, "learning_rate": 7.8263249517645e-06, "loss": 0.1947, "step": 6919 }, { "epoch": 0.7171727640169966, "grad_norm": 0.7299826145172119, "learning_rate": 7.820999042451139e-06, "loss": 0.2295, "step": 6920 }, { "epoch": 0.717276401699658, "grad_norm": 0.5815308690071106, "learning_rate": 7.81567450544868e-06, "loss": 0.1882, "step": 6921 }, { "epoch": 0.7173800393823194, "grad_norm": 0.6410979628562927, "learning_rate": 7.81035134135707e-06, "loss": 0.1999, "step": 6922 }, { "epoch": 0.7174836770649808, "grad_norm": 0.7028884291648865, "learning_rate": 7.805029550776128e-06, "loss": 0.2004, "step": 6923 }, { "epoch": 0.7175873147476423, "grad_norm": 0.6730342507362366, "learning_rate": 7.799709134305502e-06, "loss": 0.2102, "step": 6924 }, { "epoch": 0.7176909524303037, "grad_norm": 0.6692425012588501, "learning_rate": 7.794390092544674e-06, "loss": 0.2261, "step": 6925 }, { "epoch": 0.7177945901129651, "grad_norm": 0.6351549625396729, "learning_rate": 7.789072426093007e-06, "loss": 0.1822, "step": 6926 }, { "epoch": 0.7178982277956265, "grad_norm": 0.6907915472984314, "learning_rate": 7.783756135549669e-06, "loss": 0.2574, "step": 6927 }, { "epoch": 0.7180018654782879, "grad_norm": 0.5755670666694641, "learning_rate": 7.778441221513704e-06, "loss": 0.1686, "step": 6928 }, { "epoch": 0.7181055031609493, "grad_norm": 0.6563580632209778, "learning_rate": 7.773127684583985e-06, "loss": 0.1904, "step": 6929 }, { "epoch": 0.7182091408436108, "grad_norm": 0.6980963349342346, "learning_rate": 7.767815525359224e-06, "loss": 0.2022, "step": 6930 }, { "epoch": 0.7183127785262722, "grad_norm": 0.6036818027496338, "learning_rate": 7.762504744438002e-06, "loss": 0.1904, "step": 6931 }, { "epoch": 0.7184164162089336, "grad_norm": 0.6479178071022034, "learning_rate": 7.757195342418716e-06, "loss": 0.1899, "step": 6932 }, { "epoch": 0.718520053891595, "grad_norm": 0.6769992113113403, "learning_rate": 7.751887319899625e-06, "loss": 0.2049, "step": 6933 }, { "epoch": 0.7186236915742564, "grad_norm": 0.6470862030982971, "learning_rate": 7.746580677478837e-06, "loss": 0.206, "step": 6934 }, { "epoch": 0.7187273292569178, "grad_norm": 0.6667788624763489, "learning_rate": 7.74127541575429e-06, "loss": 0.2166, "step": 6935 }, { "epoch": 0.7188309669395793, "grad_norm": 0.7218932509422302, "learning_rate": 7.735971535323775e-06, "loss": 0.2256, "step": 6936 }, { "epoch": 0.7189346046222407, "grad_norm": 0.6005328893661499, "learning_rate": 7.730669036784915e-06, "loss": 0.185, "step": 6937 }, { "epoch": 0.7190382423049021, "grad_norm": 0.6119803786277771, "learning_rate": 7.725367920735194e-06, "loss": 0.1941, "step": 6938 }, { "epoch": 0.7191418799875635, "grad_norm": 0.6969441771507263, "learning_rate": 7.72006818777194e-06, "loss": 0.2169, "step": 6939 }, { "epoch": 0.7192455176702249, "grad_norm": 0.5583266615867615, "learning_rate": 7.714769838492309e-06, "loss": 0.1492, "step": 6940 }, { "epoch": 0.7193491553528863, "grad_norm": 0.6057022213935852, "learning_rate": 7.70947287349332e-06, "loss": 0.1932, "step": 6941 }, { "epoch": 0.7194527930355478, "grad_norm": 0.580872118473053, "learning_rate": 7.704177293371822e-06, "loss": 0.185, "step": 6942 }, { "epoch": 0.7195564307182092, "grad_norm": 0.6409164071083069, "learning_rate": 7.698883098724506e-06, "loss": 0.1777, "step": 6943 }, { "epoch": 0.7196600684008706, "grad_norm": 0.7127074599266052, "learning_rate": 7.693590290147925e-06, "loss": 0.2104, "step": 6944 }, { "epoch": 0.719763706083532, "grad_norm": 0.5302404165267944, "learning_rate": 7.688298868238454e-06, "loss": 0.1679, "step": 6945 }, { "epoch": 0.7198673437661934, "grad_norm": 0.5907058715820312, "learning_rate": 7.683008833592336e-06, "loss": 0.1893, "step": 6946 }, { "epoch": 0.7199709814488549, "grad_norm": 0.6386611461639404, "learning_rate": 7.677720186805626e-06, "loss": 0.2356, "step": 6947 }, { "epoch": 0.7200746191315163, "grad_norm": 0.7018064856529236, "learning_rate": 7.672432928474258e-06, "loss": 0.2337, "step": 6948 }, { "epoch": 0.7201782568141777, "grad_norm": 0.6732597947120667, "learning_rate": 7.667147059193984e-06, "loss": 0.221, "step": 6949 }, { "epoch": 0.7202818944968391, "grad_norm": 0.48719078302383423, "learning_rate": 7.6618625795604e-06, "loss": 0.1384, "step": 6950 }, { "epoch": 0.7203855321795005, "grad_norm": 0.669743537902832, "learning_rate": 7.656579490168967e-06, "loss": 0.2023, "step": 6951 }, { "epoch": 0.7204891698621618, "grad_norm": 0.7527252435684204, "learning_rate": 7.651297791614964e-06, "loss": 0.206, "step": 6952 }, { "epoch": 0.7205928075448232, "grad_norm": 0.6796308159828186, "learning_rate": 7.64601748449353e-06, "loss": 0.195, "step": 6953 }, { "epoch": 0.7206964452274847, "grad_norm": 0.6304457783699036, "learning_rate": 7.640738569399645e-06, "loss": 0.1813, "step": 6954 }, { "epoch": 0.7208000829101461, "grad_norm": 0.5981799960136414, "learning_rate": 7.635461046928127e-06, "loss": 0.1854, "step": 6955 }, { "epoch": 0.7209037205928075, "grad_norm": 0.6973567605018616, "learning_rate": 7.630184917673638e-06, "loss": 0.2322, "step": 6956 }, { "epoch": 0.7210073582754689, "grad_norm": 0.6168457269668579, "learning_rate": 7.624910182230674e-06, "loss": 0.2044, "step": 6957 }, { "epoch": 0.7211109959581303, "grad_norm": 0.7069376707077026, "learning_rate": 7.619636841193594e-06, "loss": 0.2197, "step": 6958 }, { "epoch": 0.7212146336407917, "grad_norm": 0.6199985146522522, "learning_rate": 7.614364895156597e-06, "loss": 0.1872, "step": 6959 }, { "epoch": 0.7213182713234532, "grad_norm": 0.633570671081543, "learning_rate": 7.609094344713708e-06, "loss": 0.1903, "step": 6960 }, { "epoch": 0.7214219090061146, "grad_norm": 0.5866010189056396, "learning_rate": 7.603825190458809e-06, "loss": 0.1844, "step": 6961 }, { "epoch": 0.721525546688776, "grad_norm": 0.6975796222686768, "learning_rate": 7.598557432985607e-06, "loss": 0.2257, "step": 6962 }, { "epoch": 0.7216291843714374, "grad_norm": 0.575537383556366, "learning_rate": 7.5932910728876766e-06, "loss": 0.1636, "step": 6963 }, { "epoch": 0.7217328220540988, "grad_norm": 0.5930981040000916, "learning_rate": 7.588026110758428e-06, "loss": 0.2089, "step": 6964 }, { "epoch": 0.7218364597367602, "grad_norm": 0.5862255096435547, "learning_rate": 7.5827625471910985e-06, "loss": 0.2131, "step": 6965 }, { "epoch": 0.7219400974194217, "grad_norm": 0.5902138948440552, "learning_rate": 7.5775003827787864e-06, "loss": 0.1991, "step": 6966 }, { "epoch": 0.7220437351020831, "grad_norm": 0.6987681984901428, "learning_rate": 7.5722396181144185e-06, "loss": 0.2177, "step": 6967 }, { "epoch": 0.7221473727847445, "grad_norm": 0.6114003658294678, "learning_rate": 7.566980253790768e-06, "loss": 0.1965, "step": 6968 }, { "epoch": 0.7222510104674059, "grad_norm": 0.5686897039413452, "learning_rate": 7.5617222904004595e-06, "loss": 0.1715, "step": 6969 }, { "epoch": 0.7223546481500673, "grad_norm": 0.6705672740936279, "learning_rate": 7.55646572853594e-06, "loss": 0.199, "step": 6970 }, { "epoch": 0.7224582858327288, "grad_norm": 0.5653578042984009, "learning_rate": 7.551210568789526e-06, "loss": 0.1635, "step": 6971 }, { "epoch": 0.7225619235153902, "grad_norm": 0.6964936256408691, "learning_rate": 7.545956811753348e-06, "loss": 0.191, "step": 6972 }, { "epoch": 0.7226655611980516, "grad_norm": 0.7278429865837097, "learning_rate": 7.540704458019401e-06, "loss": 0.2348, "step": 6973 }, { "epoch": 0.722769198880713, "grad_norm": 0.5915662050247192, "learning_rate": 7.535453508179509e-06, "loss": 0.2003, "step": 6974 }, { "epoch": 0.7228728365633744, "grad_norm": 0.6096627712249756, "learning_rate": 7.530203962825331e-06, "loss": 0.179, "step": 6975 }, { "epoch": 0.7229764742460358, "grad_norm": 0.5854135155677795, "learning_rate": 7.5249558225483945e-06, "loss": 0.1851, "step": 6976 }, { "epoch": 0.7230801119286973, "grad_norm": 0.5830105543136597, "learning_rate": 7.519709087940034e-06, "loss": 0.1764, "step": 6977 }, { "epoch": 0.7231837496113587, "grad_norm": 0.6320511102676392, "learning_rate": 7.514463759591453e-06, "loss": 0.1989, "step": 6978 }, { "epoch": 0.7232873872940201, "grad_norm": 0.5508729219436646, "learning_rate": 7.509219838093693e-06, "loss": 0.1655, "step": 6979 }, { "epoch": 0.7233910249766815, "grad_norm": 0.5589364767074585, "learning_rate": 7.503977324037626e-06, "loss": 0.1898, "step": 6980 }, { "epoch": 0.7234946626593429, "grad_norm": 0.731992781162262, "learning_rate": 7.4987362180139665e-06, "loss": 0.2017, "step": 6981 }, { "epoch": 0.7235983003420043, "grad_norm": 0.6651973724365234, "learning_rate": 7.49349652061327e-06, "loss": 0.1773, "step": 6982 }, { "epoch": 0.7237019380246658, "grad_norm": 0.6765840649604797, "learning_rate": 7.488258232425947e-06, "loss": 0.2017, "step": 6983 }, { "epoch": 0.7238055757073272, "grad_norm": 0.6248261332511902, "learning_rate": 7.483021354042239e-06, "loss": 0.1862, "step": 6984 }, { "epoch": 0.7239092133899886, "grad_norm": 0.6525418162345886, "learning_rate": 7.477785886052223e-06, "loss": 0.1857, "step": 6985 }, { "epoch": 0.72401285107265, "grad_norm": 0.6958816647529602, "learning_rate": 7.472551829045833e-06, "loss": 0.1915, "step": 6986 }, { "epoch": 0.7241164887553114, "grad_norm": 0.624822735786438, "learning_rate": 7.467319183612827e-06, "loss": 0.1936, "step": 6987 }, { "epoch": 0.7242201264379728, "grad_norm": 0.7277296781539917, "learning_rate": 7.462087950342809e-06, "loss": 0.2439, "step": 6988 }, { "epoch": 0.7243237641206343, "grad_norm": 0.6706949472427368, "learning_rate": 7.456858129825235e-06, "loss": 0.224, "step": 6989 }, { "epoch": 0.7244274018032957, "grad_norm": 0.5695304274559021, "learning_rate": 7.45162972264938e-06, "loss": 0.1608, "step": 6990 }, { "epoch": 0.7245310394859571, "grad_norm": 0.7456833720207214, "learning_rate": 7.446402729404392e-06, "loss": 0.2379, "step": 6991 }, { "epoch": 0.7246346771686185, "grad_norm": 0.6324352025985718, "learning_rate": 7.441177150679226e-06, "loss": 0.2016, "step": 6992 }, { "epoch": 0.7247383148512799, "grad_norm": 0.6055063009262085, "learning_rate": 7.435952987062691e-06, "loss": 0.1911, "step": 6993 }, { "epoch": 0.7248419525339413, "grad_norm": 0.6443060040473938, "learning_rate": 7.430730239143449e-06, "loss": 0.1936, "step": 6994 }, { "epoch": 0.7249455902166028, "grad_norm": 0.6803500056266785, "learning_rate": 7.425508907509975e-06, "loss": 0.2094, "step": 6995 }, { "epoch": 0.7250492278992642, "grad_norm": 0.6133655309677124, "learning_rate": 7.420288992750619e-06, "loss": 0.1851, "step": 6996 }, { "epoch": 0.7251528655819256, "grad_norm": 0.6188987493515015, "learning_rate": 7.415070495453536e-06, "loss": 0.2265, "step": 6997 }, { "epoch": 0.725256503264587, "grad_norm": 0.5657846927642822, "learning_rate": 7.4098534162067535e-06, "loss": 0.1798, "step": 6998 }, { "epoch": 0.7253601409472484, "grad_norm": 0.6191845536231995, "learning_rate": 7.404637755598116e-06, "loss": 0.1956, "step": 6999 }, { "epoch": 0.7254637786299099, "grad_norm": 0.5805138349533081, "learning_rate": 7.399423514215309e-06, "loss": 0.17, "step": 7000 }, { "epoch": 0.7255674163125713, "grad_norm": 0.6225676536560059, "learning_rate": 7.39421069264588e-06, "loss": 0.2, "step": 7001 }, { "epoch": 0.7256710539952327, "grad_norm": 0.6446588635444641, "learning_rate": 7.388999291477186e-06, "loss": 0.1967, "step": 7002 }, { "epoch": 0.7257746916778941, "grad_norm": 0.67559814453125, "learning_rate": 7.38378931129645e-06, "loss": 0.1686, "step": 7003 }, { "epoch": 0.7258783293605555, "grad_norm": 0.6291429400444031, "learning_rate": 7.378580752690727e-06, "loss": 0.1794, "step": 7004 }, { "epoch": 0.7259819670432169, "grad_norm": 0.5955010056495667, "learning_rate": 7.373373616246904e-06, "loss": 0.2062, "step": 7005 }, { "epoch": 0.7260856047258784, "grad_norm": 0.6775494813919067, "learning_rate": 7.368167902551715e-06, "loss": 0.2031, "step": 7006 }, { "epoch": 0.7261892424085398, "grad_norm": 0.6713017821311951, "learning_rate": 7.362963612191723e-06, "loss": 0.1774, "step": 7007 }, { "epoch": 0.7262928800912012, "grad_norm": 0.6503675580024719, "learning_rate": 7.357760745753346e-06, "loss": 0.2129, "step": 7008 }, { "epoch": 0.7263965177738626, "grad_norm": 0.6224893927574158, "learning_rate": 7.352559303822842e-06, "loss": 0.1953, "step": 7009 }, { "epoch": 0.726500155456524, "grad_norm": 0.5616812109947205, "learning_rate": 7.347359286986289e-06, "loss": 0.1639, "step": 7010 }, { "epoch": 0.7266037931391854, "grad_norm": 0.6985262632369995, "learning_rate": 7.342160695829627e-06, "loss": 0.1931, "step": 7011 }, { "epoch": 0.7267074308218469, "grad_norm": 0.6394574046134949, "learning_rate": 7.336963530938623e-06, "loss": 0.1868, "step": 7012 }, { "epoch": 0.7268110685045083, "grad_norm": 0.6743451952934265, "learning_rate": 7.331767792898878e-06, "loss": 0.2028, "step": 7013 }, { "epoch": 0.7269147061871697, "grad_norm": 0.614748477935791, "learning_rate": 7.326573482295849e-06, "loss": 0.167, "step": 7014 }, { "epoch": 0.7270183438698311, "grad_norm": 0.6770737171173096, "learning_rate": 7.321380599714813e-06, "loss": 0.2017, "step": 7015 }, { "epoch": 0.7271219815524925, "grad_norm": 0.5676266551017761, "learning_rate": 7.3161891457409085e-06, "loss": 0.1755, "step": 7016 }, { "epoch": 0.7272256192351539, "grad_norm": 0.6676115393638611, "learning_rate": 7.310999120959085e-06, "loss": 0.1915, "step": 7017 }, { "epoch": 0.7273292569178154, "grad_norm": 0.6348340511322021, "learning_rate": 7.305810525954167e-06, "loss": 0.1791, "step": 7018 }, { "epoch": 0.7274328946004768, "grad_norm": 0.5726019144058228, "learning_rate": 7.300623361310781e-06, "loss": 0.1801, "step": 7019 }, { "epoch": 0.7275365322831382, "grad_norm": 0.506493330001831, "learning_rate": 7.295437627613407e-06, "loss": 0.151, "step": 7020 }, { "epoch": 0.7276401699657996, "grad_norm": 0.5711618661880493, "learning_rate": 7.290253325446372e-06, "loss": 0.169, "step": 7021 }, { "epoch": 0.727743807648461, "grad_norm": 0.620705783367157, "learning_rate": 7.2850704553938415e-06, "loss": 0.1716, "step": 7022 }, { "epoch": 0.7278474453311224, "grad_norm": 0.5504769086837769, "learning_rate": 7.279889018039806e-06, "loss": 0.156, "step": 7023 }, { "epoch": 0.7279510830137839, "grad_norm": 0.6168975234031677, "learning_rate": 7.274709013968102e-06, "loss": 0.219, "step": 7024 }, { "epoch": 0.7280547206964453, "grad_norm": 0.6001594066619873, "learning_rate": 7.269530443762398e-06, "loss": 0.1716, "step": 7025 }, { "epoch": 0.7281583583791067, "grad_norm": 0.7288575172424316, "learning_rate": 7.264353308006214e-06, "loss": 0.2085, "step": 7026 }, { "epoch": 0.7282619960617681, "grad_norm": 0.5633076429367065, "learning_rate": 7.259177607282908e-06, "loss": 0.1604, "step": 7027 }, { "epoch": 0.7283656337444294, "grad_norm": 0.642590343952179, "learning_rate": 7.254003342175658e-06, "loss": 0.2107, "step": 7028 }, { "epoch": 0.7284692714270908, "grad_norm": 0.6819926500320435, "learning_rate": 7.2488305132675e-06, "loss": 0.2183, "step": 7029 }, { "epoch": 0.7285729091097523, "grad_norm": 0.7595263719558716, "learning_rate": 7.2436591211412995e-06, "loss": 0.2345, "step": 7030 }, { "epoch": 0.7286765467924137, "grad_norm": 0.6355629563331604, "learning_rate": 7.238489166379754e-06, "loss": 0.186, "step": 7031 }, { "epoch": 0.7287801844750751, "grad_norm": 0.6492575407028198, "learning_rate": 7.233320649565416e-06, "loss": 0.1813, "step": 7032 }, { "epoch": 0.7288838221577365, "grad_norm": 0.6751068830490112, "learning_rate": 7.228153571280656e-06, "loss": 0.2214, "step": 7033 }, { "epoch": 0.7289874598403979, "grad_norm": 0.6983469724655151, "learning_rate": 7.222987932107704e-06, "loss": 0.2329, "step": 7034 }, { "epoch": 0.7290910975230593, "grad_norm": 0.6801983118057251, "learning_rate": 7.217823732628602e-06, "loss": 0.1896, "step": 7035 }, { "epoch": 0.7291947352057208, "grad_norm": 0.7078520059585571, "learning_rate": 7.212660973425258e-06, "loss": 0.2178, "step": 7036 }, { "epoch": 0.7292983728883822, "grad_norm": 0.7132784724235535, "learning_rate": 7.207499655079398e-06, "loss": 0.2036, "step": 7037 }, { "epoch": 0.7294020105710436, "grad_norm": 0.718744158744812, "learning_rate": 7.202339778172583e-06, "loss": 0.201, "step": 7038 }, { "epoch": 0.729505648253705, "grad_norm": 0.7349317669868469, "learning_rate": 7.197181343286233e-06, "loss": 0.2098, "step": 7039 }, { "epoch": 0.7296092859363664, "grad_norm": 0.6733152270317078, "learning_rate": 7.192024351001583e-06, "loss": 0.1987, "step": 7040 }, { "epoch": 0.7297129236190278, "grad_norm": 0.6271103024482727, "learning_rate": 7.186868801899715e-06, "loss": 0.1924, "step": 7041 }, { "epoch": 0.7298165613016893, "grad_norm": 0.6496624946594238, "learning_rate": 7.181714696561561e-06, "loss": 0.2, "step": 7042 }, { "epoch": 0.7299201989843507, "grad_norm": 0.5096017718315125, "learning_rate": 7.176562035567869e-06, "loss": 0.1585, "step": 7043 }, { "epoch": 0.7300238366670121, "grad_norm": 0.5929197072982788, "learning_rate": 7.171410819499234e-06, "loss": 0.1789, "step": 7044 }, { "epoch": 0.7301274743496735, "grad_norm": 0.6660709977149963, "learning_rate": 7.166261048936076e-06, "loss": 0.2199, "step": 7045 }, { "epoch": 0.7302311120323349, "grad_norm": 0.6655041575431824, "learning_rate": 7.161112724458672e-06, "loss": 0.1909, "step": 7046 }, { "epoch": 0.7303347497149963, "grad_norm": 0.6861178278923035, "learning_rate": 7.155965846647137e-06, "loss": 0.1877, "step": 7047 }, { "epoch": 0.7304383873976578, "grad_norm": 0.7006655931472778, "learning_rate": 7.150820416081396e-06, "loss": 0.2136, "step": 7048 }, { "epoch": 0.7305420250803192, "grad_norm": 0.6472157835960388, "learning_rate": 7.145676433341242e-06, "loss": 0.1865, "step": 7049 }, { "epoch": 0.7306456627629806, "grad_norm": 0.5800744295120239, "learning_rate": 7.140533899006286e-06, "loss": 0.1847, "step": 7050 }, { "epoch": 0.730749300445642, "grad_norm": 0.6264989972114563, "learning_rate": 7.135392813655972e-06, "loss": 0.1887, "step": 7051 }, { "epoch": 0.7308529381283034, "grad_norm": 0.7023571729660034, "learning_rate": 7.130253177869606e-06, "loss": 0.2213, "step": 7052 }, { "epoch": 0.7309565758109648, "grad_norm": 0.5341815948486328, "learning_rate": 7.125114992226298e-06, "loss": 0.1723, "step": 7053 }, { "epoch": 0.7310602134936263, "grad_norm": 0.5917410850524902, "learning_rate": 7.119978257305025e-06, "loss": 0.1953, "step": 7054 }, { "epoch": 0.7311638511762877, "grad_norm": 0.6590427756309509, "learning_rate": 7.11484297368458e-06, "loss": 0.1937, "step": 7055 }, { "epoch": 0.7312674888589491, "grad_norm": 0.6602505445480347, "learning_rate": 7.10970914194359e-06, "loss": 0.1754, "step": 7056 }, { "epoch": 0.7313711265416105, "grad_norm": 0.6671847701072693, "learning_rate": 7.104576762660544e-06, "loss": 0.1826, "step": 7057 }, { "epoch": 0.7314747642242719, "grad_norm": 0.6499585509300232, "learning_rate": 7.099445836413734e-06, "loss": 0.1776, "step": 7058 }, { "epoch": 0.7315784019069334, "grad_norm": 0.5618710517883301, "learning_rate": 7.094316363781322e-06, "loss": 0.1652, "step": 7059 }, { "epoch": 0.7316820395895948, "grad_norm": 0.7040053606033325, "learning_rate": 7.0891883453412715e-06, "loss": 0.1812, "step": 7060 }, { "epoch": 0.7317856772722562, "grad_norm": 0.7040082812309265, "learning_rate": 7.084061781671414e-06, "loss": 0.2095, "step": 7061 }, { "epoch": 0.7318893149549176, "grad_norm": 0.5922701358795166, "learning_rate": 7.078936673349397e-06, "loss": 0.1669, "step": 7062 }, { "epoch": 0.731992952637579, "grad_norm": 0.603987991809845, "learning_rate": 7.073813020952702e-06, "loss": 0.196, "step": 7063 }, { "epoch": 0.7320965903202404, "grad_norm": 0.5776903033256531, "learning_rate": 7.06869082505867e-06, "loss": 0.1604, "step": 7064 }, { "epoch": 0.7322002280029019, "grad_norm": 0.6143998503684998, "learning_rate": 7.063570086244447e-06, "loss": 0.1959, "step": 7065 }, { "epoch": 0.7323038656855633, "grad_norm": 0.6320601105690002, "learning_rate": 7.058450805087036e-06, "loss": 0.2035, "step": 7066 }, { "epoch": 0.7324075033682247, "grad_norm": 0.6088545918464661, "learning_rate": 7.053332982163277e-06, "loss": 0.1554, "step": 7067 }, { "epoch": 0.7325111410508861, "grad_norm": 0.7791801691055298, "learning_rate": 7.048216618049832e-06, "loss": 0.2042, "step": 7068 }, { "epoch": 0.7326147787335475, "grad_norm": 0.5860157608985901, "learning_rate": 7.043101713323204e-06, "loss": 0.174, "step": 7069 }, { "epoch": 0.7327184164162089, "grad_norm": 0.5433716177940369, "learning_rate": 7.037988268559726e-06, "loss": 0.169, "step": 7070 }, { "epoch": 0.7328220540988704, "grad_norm": 0.6011751294136047, "learning_rate": 7.032876284335582e-06, "loss": 0.1832, "step": 7071 }, { "epoch": 0.7329256917815318, "grad_norm": 0.6410551071166992, "learning_rate": 7.027765761226783e-06, "loss": 0.1924, "step": 7072 }, { "epoch": 0.7330293294641932, "grad_norm": 0.6368006467819214, "learning_rate": 7.022656699809169e-06, "loss": 0.2275, "step": 7073 }, { "epoch": 0.7331329671468546, "grad_norm": 0.66558837890625, "learning_rate": 7.017549100658432e-06, "loss": 0.1969, "step": 7074 }, { "epoch": 0.733236604829516, "grad_norm": 0.6814399361610413, "learning_rate": 7.012442964350079e-06, "loss": 0.1954, "step": 7075 }, { "epoch": 0.7333402425121774, "grad_norm": 0.6535932421684265, "learning_rate": 7.007338291459456e-06, "loss": 0.2077, "step": 7076 }, { "epoch": 0.7334438801948389, "grad_norm": 0.5728932619094849, "learning_rate": 7.002235082561764e-06, "loss": 0.1578, "step": 7077 }, { "epoch": 0.7335475178775003, "grad_norm": 0.7541806101799011, "learning_rate": 6.9971333382320115e-06, "loss": 0.2565, "step": 7078 }, { "epoch": 0.7336511555601617, "grad_norm": 0.6706312894821167, "learning_rate": 6.992033059045067e-06, "loss": 0.1976, "step": 7079 }, { "epoch": 0.7337547932428231, "grad_norm": 0.6851133704185486, "learning_rate": 6.986934245575609e-06, "loss": 0.2066, "step": 7080 }, { "epoch": 0.7338584309254845, "grad_norm": 0.6354007124900818, "learning_rate": 6.98183689839818e-06, "loss": 0.1962, "step": 7081 }, { "epoch": 0.733962068608146, "grad_norm": 0.662876546382904, "learning_rate": 6.97674101808713e-06, "loss": 0.192, "step": 7082 }, { "epoch": 0.7340657062908074, "grad_norm": 0.6014705300331116, "learning_rate": 6.9716466052166505e-06, "loss": 0.1826, "step": 7083 }, { "epoch": 0.7341693439734688, "grad_norm": 0.7361433506011963, "learning_rate": 6.9665536603607864e-06, "loss": 0.2282, "step": 7084 }, { "epoch": 0.7342729816561302, "grad_norm": 0.631420373916626, "learning_rate": 6.961462184093388e-06, "loss": 0.1959, "step": 7085 }, { "epoch": 0.7343766193387916, "grad_norm": 0.6514199376106262, "learning_rate": 6.956372176988169e-06, "loss": 0.1865, "step": 7086 }, { "epoch": 0.734480257021453, "grad_norm": 0.5663927793502808, "learning_rate": 6.951283639618654e-06, "loss": 0.1761, "step": 7087 }, { "epoch": 0.7345838947041144, "grad_norm": 0.789225697517395, "learning_rate": 6.946196572558208e-06, "loss": 0.2245, "step": 7088 }, { "epoch": 0.7346875323867759, "grad_norm": 0.716204047203064, "learning_rate": 6.941110976380048e-06, "loss": 0.2109, "step": 7089 }, { "epoch": 0.7347911700694373, "grad_norm": 0.626390814781189, "learning_rate": 6.936026851657196e-06, "loss": 0.2, "step": 7090 }, { "epoch": 0.7348948077520987, "grad_norm": 0.558161199092865, "learning_rate": 6.930944198962528e-06, "loss": 0.1667, "step": 7091 }, { "epoch": 0.7349984454347601, "grad_norm": 0.7186371088027954, "learning_rate": 6.925863018868759e-06, "loss": 0.1815, "step": 7092 }, { "epoch": 0.7351020831174215, "grad_norm": 0.6502755880355835, "learning_rate": 6.920783311948423e-06, "loss": 0.2085, "step": 7093 }, { "epoch": 0.735205720800083, "grad_norm": 0.6628555059432983, "learning_rate": 6.915705078773889e-06, "loss": 0.199, "step": 7094 }, { "epoch": 0.7353093584827444, "grad_norm": 0.6222200989723206, "learning_rate": 6.910628319917361e-06, "loss": 0.1827, "step": 7095 }, { "epoch": 0.7354129961654058, "grad_norm": 0.6655079126358032, "learning_rate": 6.905553035950885e-06, "loss": 0.2189, "step": 7096 }, { "epoch": 0.7355166338480672, "grad_norm": 0.7366286516189575, "learning_rate": 6.9004792274463436e-06, "loss": 0.2256, "step": 7097 }, { "epoch": 0.7356202715307286, "grad_norm": 0.6794492602348328, "learning_rate": 6.895406894975434e-06, "loss": 0.2237, "step": 7098 }, { "epoch": 0.73572390921339, "grad_norm": 0.6098574995994568, "learning_rate": 6.890336039109711e-06, "loss": 0.1884, "step": 7099 }, { "epoch": 0.7358275468960515, "grad_norm": 0.6938572525978088, "learning_rate": 6.885266660420542e-06, "loss": 0.2077, "step": 7100 }, { "epoch": 0.7359311845787129, "grad_norm": 0.6158689856529236, "learning_rate": 6.880198759479133e-06, "loss": 0.1525, "step": 7101 }, { "epoch": 0.7360348222613743, "grad_norm": 0.721074104309082, "learning_rate": 6.87513233685654e-06, "loss": 0.2077, "step": 7102 }, { "epoch": 0.7361384599440357, "grad_norm": 0.6900755167007446, "learning_rate": 6.870067393123625e-06, "loss": 0.19, "step": 7103 }, { "epoch": 0.736242097626697, "grad_norm": 0.6101871728897095, "learning_rate": 6.865003928851111e-06, "loss": 0.1671, "step": 7104 }, { "epoch": 0.7363457353093584, "grad_norm": 0.8352978229522705, "learning_rate": 6.85994194460953e-06, "loss": 0.2456, "step": 7105 }, { "epoch": 0.7364493729920198, "grad_norm": 0.6034935712814331, "learning_rate": 6.85488144096927e-06, "loss": 0.2044, "step": 7106 }, { "epoch": 0.7365530106746813, "grad_norm": 0.698615550994873, "learning_rate": 6.849822418500534e-06, "loss": 0.2079, "step": 7107 }, { "epoch": 0.7366566483573427, "grad_norm": 0.6203604340553284, "learning_rate": 6.84476487777336e-06, "loss": 0.1676, "step": 7108 }, { "epoch": 0.7367602860400041, "grad_norm": 0.7435559034347534, "learning_rate": 6.839708819357636e-06, "loss": 0.2041, "step": 7109 }, { "epoch": 0.7368639237226655, "grad_norm": 0.6135737895965576, "learning_rate": 6.834654243823058e-06, "loss": 0.1708, "step": 7110 }, { "epoch": 0.7369675614053269, "grad_norm": 0.6980074644088745, "learning_rate": 6.829601151739174e-06, "loss": 0.2352, "step": 7111 }, { "epoch": 0.7370711990879883, "grad_norm": 0.6273593902587891, "learning_rate": 6.8245495436753625e-06, "loss": 0.1904, "step": 7112 }, { "epoch": 0.7371748367706498, "grad_norm": 0.6526858806610107, "learning_rate": 6.81949942020083e-06, "loss": 0.1946, "step": 7113 }, { "epoch": 0.7372784744533112, "grad_norm": 0.6693414449691772, "learning_rate": 6.814450781884611e-06, "loss": 0.1951, "step": 7114 }, { "epoch": 0.7373821121359726, "grad_norm": 0.8420910239219666, "learning_rate": 6.809403629295575e-06, "loss": 0.2187, "step": 7115 }, { "epoch": 0.737485749818634, "grad_norm": 0.8213813304901123, "learning_rate": 6.804357963002432e-06, "loss": 0.2387, "step": 7116 }, { "epoch": 0.7375893875012954, "grad_norm": 0.7364427447319031, "learning_rate": 6.7993137835737284e-06, "loss": 0.2172, "step": 7117 }, { "epoch": 0.7376930251839569, "grad_norm": 0.6254062652587891, "learning_rate": 6.794271091577826e-06, "loss": 0.1975, "step": 7118 }, { "epoch": 0.7377966628666183, "grad_norm": 0.5966928005218506, "learning_rate": 6.789229887582931e-06, "loss": 0.171, "step": 7119 }, { "epoch": 0.7379003005492797, "grad_norm": 0.6676751971244812, "learning_rate": 6.784190172157066e-06, "loss": 0.1861, "step": 7120 }, { "epoch": 0.7380039382319411, "grad_norm": 0.7175436615943909, "learning_rate": 6.7791519458681096e-06, "loss": 0.2538, "step": 7121 }, { "epoch": 0.7381075759146025, "grad_norm": 0.6358978748321533, "learning_rate": 6.774115209283765e-06, "loss": 0.2022, "step": 7122 }, { "epoch": 0.7382112135972639, "grad_norm": 0.5605407357215881, "learning_rate": 6.769079962971552e-06, "loss": 0.1718, "step": 7123 }, { "epoch": 0.7383148512799254, "grad_norm": 0.568594753742218, "learning_rate": 6.764046207498849e-06, "loss": 0.1717, "step": 7124 }, { "epoch": 0.7384184889625868, "grad_norm": 0.5866584181785583, "learning_rate": 6.759013943432844e-06, "loss": 0.1749, "step": 7125 }, { "epoch": 0.7385221266452482, "grad_norm": 0.6093940734863281, "learning_rate": 6.7539831713405565e-06, "loss": 0.1746, "step": 7126 }, { "epoch": 0.7386257643279096, "grad_norm": 0.6412978768348694, "learning_rate": 6.748953891788861e-06, "loss": 0.1921, "step": 7127 }, { "epoch": 0.738729402010571, "grad_norm": 0.6676449179649353, "learning_rate": 6.743926105344434e-06, "loss": 0.1936, "step": 7128 }, { "epoch": 0.7388330396932324, "grad_norm": 0.7243309617042542, "learning_rate": 6.738899812573814e-06, "loss": 0.2387, "step": 7129 }, { "epoch": 0.7389366773758939, "grad_norm": 0.6230798959732056, "learning_rate": 6.73387501404334e-06, "loss": 0.1818, "step": 7130 }, { "epoch": 0.7390403150585553, "grad_norm": 0.6703595519065857, "learning_rate": 6.7288517103192175e-06, "loss": 0.1998, "step": 7131 }, { "epoch": 0.7391439527412167, "grad_norm": 0.6641132235527039, "learning_rate": 6.723829901967451e-06, "loss": 0.1881, "step": 7132 }, { "epoch": 0.7392475904238781, "grad_norm": 0.65935218334198, "learning_rate": 6.718809589553885e-06, "loss": 0.2021, "step": 7133 }, { "epoch": 0.7393512281065395, "grad_norm": 0.6337850093841553, "learning_rate": 6.713790773644218e-06, "loss": 0.2061, "step": 7134 }, { "epoch": 0.7394548657892009, "grad_norm": 0.8226786255836487, "learning_rate": 6.708773454803945e-06, "loss": 0.2009, "step": 7135 }, { "epoch": 0.7395585034718624, "grad_norm": 0.7300819158554077, "learning_rate": 6.703757633598418e-06, "loss": 0.2217, "step": 7136 }, { "epoch": 0.7396621411545238, "grad_norm": 0.586715817451477, "learning_rate": 6.698743310592817e-06, "loss": 0.1743, "step": 7137 }, { "epoch": 0.7397657788371852, "grad_norm": 0.6823568940162659, "learning_rate": 6.693730486352144e-06, "loss": 0.1799, "step": 7138 }, { "epoch": 0.7398694165198466, "grad_norm": 0.598688542842865, "learning_rate": 6.6887191614412325e-06, "loss": 0.1852, "step": 7139 }, { "epoch": 0.739973054202508, "grad_norm": 0.6224052906036377, "learning_rate": 6.683709336424748e-06, "loss": 0.1757, "step": 7140 }, { "epoch": 0.7400766918851694, "grad_norm": 0.7828407287597656, "learning_rate": 6.6787010118671945e-06, "loss": 0.2367, "step": 7141 }, { "epoch": 0.7401803295678309, "grad_norm": 0.595071017742157, "learning_rate": 6.673694188332911e-06, "loss": 0.1754, "step": 7142 }, { "epoch": 0.7402839672504923, "grad_norm": 0.6053313612937927, "learning_rate": 6.6686888663860415e-06, "loss": 0.1788, "step": 7143 }, { "epoch": 0.7403876049331537, "grad_norm": 0.6300621032714844, "learning_rate": 6.6636850465906e-06, "loss": 0.2062, "step": 7144 }, { "epoch": 0.7404912426158151, "grad_norm": 0.552588939666748, "learning_rate": 6.658682729510384e-06, "loss": 0.1553, "step": 7145 }, { "epoch": 0.7405948802984765, "grad_norm": 0.5638970136642456, "learning_rate": 6.653681915709058e-06, "loss": 0.163, "step": 7146 }, { "epoch": 0.740698517981138, "grad_norm": 0.7059263586997986, "learning_rate": 6.648682605750112e-06, "loss": 0.208, "step": 7147 }, { "epoch": 0.7408021556637994, "grad_norm": 0.6765500903129578, "learning_rate": 6.64368480019685e-06, "loss": 0.226, "step": 7148 }, { "epoch": 0.7409057933464608, "grad_norm": 0.6322110295295715, "learning_rate": 6.638688499612426e-06, "loss": 0.1794, "step": 7149 }, { "epoch": 0.7410094310291222, "grad_norm": 0.6688316464424133, "learning_rate": 6.633693704559814e-06, "loss": 0.1942, "step": 7150 }, { "epoch": 0.7411130687117836, "grad_norm": 0.7907984852790833, "learning_rate": 6.628700415601809e-06, "loss": 0.2294, "step": 7151 }, { "epoch": 0.741216706394445, "grad_norm": 0.5939874053001404, "learning_rate": 6.623708633301063e-06, "loss": 0.1794, "step": 7152 }, { "epoch": 0.7413203440771065, "grad_norm": 0.5933582782745361, "learning_rate": 6.618718358220027e-06, "loss": 0.1885, "step": 7153 }, { "epoch": 0.7414239817597679, "grad_norm": 0.5784906148910522, "learning_rate": 6.61372959092101e-06, "loss": 0.1943, "step": 7154 }, { "epoch": 0.7415276194424293, "grad_norm": 0.628635048866272, "learning_rate": 6.608742331966127e-06, "loss": 0.1953, "step": 7155 }, { "epoch": 0.7416312571250907, "grad_norm": 0.6248700618743896, "learning_rate": 6.603756581917349e-06, "loss": 0.1832, "step": 7156 }, { "epoch": 0.7417348948077521, "grad_norm": 0.572599470615387, "learning_rate": 6.598772341336455e-06, "loss": 0.176, "step": 7157 }, { "epoch": 0.7418385324904135, "grad_norm": 0.735245406627655, "learning_rate": 6.593789610785053e-06, "loss": 0.2286, "step": 7158 }, { "epoch": 0.741942170173075, "grad_norm": 0.6545553803443909, "learning_rate": 6.588808390824604e-06, "loss": 0.1877, "step": 7159 }, { "epoch": 0.7420458078557364, "grad_norm": 0.6569068431854248, "learning_rate": 6.583828682016371e-06, "loss": 0.1726, "step": 7160 }, { "epoch": 0.7421494455383978, "grad_norm": 0.6712062954902649, "learning_rate": 6.578850484921466e-06, "loss": 0.2028, "step": 7161 }, { "epoch": 0.7422530832210592, "grad_norm": 0.6517971754074097, "learning_rate": 6.573873800100832e-06, "loss": 0.1797, "step": 7162 }, { "epoch": 0.7423567209037206, "grad_norm": 0.5719771981239319, "learning_rate": 6.568898628115226e-06, "loss": 0.194, "step": 7163 }, { "epoch": 0.742460358586382, "grad_norm": 0.6975439786911011, "learning_rate": 6.563924969525244e-06, "loss": 0.1985, "step": 7164 }, { "epoch": 0.7425639962690435, "grad_norm": 0.6007943153381348, "learning_rate": 6.5589528248913025e-06, "loss": 0.1948, "step": 7165 }, { "epoch": 0.7426676339517049, "grad_norm": 0.6068305969238281, "learning_rate": 6.553982194773663e-06, "loss": 0.1751, "step": 7166 }, { "epoch": 0.7427712716343663, "grad_norm": 0.5859012603759766, "learning_rate": 6.549013079732413e-06, "loss": 0.173, "step": 7167 }, { "epoch": 0.7428749093170277, "grad_norm": 0.7530663013458252, "learning_rate": 6.544045480327455e-06, "loss": 0.2106, "step": 7168 }, { "epoch": 0.7429785469996891, "grad_norm": 0.6807491779327393, "learning_rate": 6.539079397118539e-06, "loss": 0.1839, "step": 7169 }, { "epoch": 0.7430821846823505, "grad_norm": 0.7634007334709167, "learning_rate": 6.534114830665232e-06, "loss": 0.2349, "step": 7170 }, { "epoch": 0.743185822365012, "grad_norm": 0.7185533046722412, "learning_rate": 6.529151781526926e-06, "loss": 0.2298, "step": 7171 }, { "epoch": 0.7432894600476734, "grad_norm": 0.6481119990348816, "learning_rate": 6.524190250262863e-06, "loss": 0.1949, "step": 7172 }, { "epoch": 0.7433930977303348, "grad_norm": 0.6291430592536926, "learning_rate": 6.519230237432088e-06, "loss": 0.2016, "step": 7173 }, { "epoch": 0.7434967354129962, "grad_norm": 0.874639630317688, "learning_rate": 6.514271743593499e-06, "loss": 0.2434, "step": 7174 }, { "epoch": 0.7436003730956576, "grad_norm": 0.5795329213142395, "learning_rate": 6.509314769305801e-06, "loss": 0.1647, "step": 7175 }, { "epoch": 0.743704010778319, "grad_norm": 0.6141259074211121, "learning_rate": 6.504359315127549e-06, "loss": 0.22, "step": 7176 }, { "epoch": 0.7438076484609805, "grad_norm": 0.5499668121337891, "learning_rate": 6.49940538161711e-06, "loss": 0.1762, "step": 7177 }, { "epoch": 0.7439112861436419, "grad_norm": 0.5772035717964172, "learning_rate": 6.494452969332678e-06, "loss": 0.1637, "step": 7178 }, { "epoch": 0.7440149238263033, "grad_norm": 0.678733766078949, "learning_rate": 6.489502078832297e-06, "loss": 0.2116, "step": 7179 }, { "epoch": 0.7441185615089646, "grad_norm": 0.6421101093292236, "learning_rate": 6.484552710673815e-06, "loss": 0.2189, "step": 7180 }, { "epoch": 0.744222199191626, "grad_norm": 0.6648598313331604, "learning_rate": 6.479604865414928e-06, "loss": 0.1842, "step": 7181 }, { "epoch": 0.7443258368742874, "grad_norm": 0.6394937634468079, "learning_rate": 6.474658543613146e-06, "loss": 0.2011, "step": 7182 }, { "epoch": 0.7444294745569489, "grad_norm": 0.5273407697677612, "learning_rate": 6.46971374582581e-06, "loss": 0.1534, "step": 7183 }, { "epoch": 0.7445331122396103, "grad_norm": 0.6797968745231628, "learning_rate": 6.4647704726101e-06, "loss": 0.2042, "step": 7184 }, { "epoch": 0.7446367499222717, "grad_norm": 0.6618654131889343, "learning_rate": 6.459828724523007e-06, "loss": 0.1849, "step": 7185 }, { "epoch": 0.7447403876049331, "grad_norm": 0.6694583892822266, "learning_rate": 6.4548885021213635e-06, "loss": 0.2022, "step": 7186 }, { "epoch": 0.7448440252875945, "grad_norm": 0.6789039969444275, "learning_rate": 6.449949805961835e-06, "loss": 0.1922, "step": 7187 }, { "epoch": 0.7449476629702559, "grad_norm": 0.714964747428894, "learning_rate": 6.445012636600898e-06, "loss": 0.1984, "step": 7188 }, { "epoch": 0.7450513006529174, "grad_norm": 0.6054500937461853, "learning_rate": 6.440076994594866e-06, "loss": 0.1852, "step": 7189 }, { "epoch": 0.7451549383355788, "grad_norm": 0.669039785861969, "learning_rate": 6.435142880499874e-06, "loss": 0.1992, "step": 7190 }, { "epoch": 0.7452585760182402, "grad_norm": 0.6293997168540955, "learning_rate": 6.430210294871893e-06, "loss": 0.21, "step": 7191 }, { "epoch": 0.7453622137009016, "grad_norm": 0.6848145723342896, "learning_rate": 6.4252792382667285e-06, "loss": 0.1994, "step": 7192 }, { "epoch": 0.745465851383563, "grad_norm": 0.6127070188522339, "learning_rate": 6.420349711239988e-06, "loss": 0.1897, "step": 7193 }, { "epoch": 0.7455694890662244, "grad_norm": 0.6485124826431274, "learning_rate": 6.415421714347141e-06, "loss": 0.1981, "step": 7194 }, { "epoch": 0.7456731267488859, "grad_norm": 0.6168133616447449, "learning_rate": 6.410495248143458e-06, "loss": 0.1741, "step": 7195 }, { "epoch": 0.7457767644315473, "grad_norm": 0.659416913986206, "learning_rate": 6.405570313184038e-06, "loss": 0.239, "step": 7196 }, { "epoch": 0.7458804021142087, "grad_norm": 0.6403185129165649, "learning_rate": 6.400646910023829e-06, "loss": 0.2017, "step": 7197 }, { "epoch": 0.7459840397968701, "grad_norm": 0.7637109160423279, "learning_rate": 6.395725039217579e-06, "loss": 0.2671, "step": 7198 }, { "epoch": 0.7460876774795315, "grad_norm": 0.5837101936340332, "learning_rate": 6.39080470131989e-06, "loss": 0.1724, "step": 7199 }, { "epoch": 0.746191315162193, "grad_norm": 0.5817883014678955, "learning_rate": 6.3858858968851645e-06, "loss": 0.1936, "step": 7200 }, { "epoch": 0.7462949528448544, "grad_norm": 0.6717552542686462, "learning_rate": 6.380968626467659e-06, "loss": 0.2043, "step": 7201 }, { "epoch": 0.7463985905275158, "grad_norm": 0.5266337394714355, "learning_rate": 6.376052890621438e-06, "loss": 0.1498, "step": 7202 }, { "epoch": 0.7465022282101772, "grad_norm": 0.5839206576347351, "learning_rate": 6.371138689900392e-06, "loss": 0.1862, "step": 7203 }, { "epoch": 0.7466058658928386, "grad_norm": 0.7016838192939758, "learning_rate": 6.3662260248582575e-06, "loss": 0.1997, "step": 7204 }, { "epoch": 0.7467095035755, "grad_norm": 0.8019984364509583, "learning_rate": 6.361314896048574e-06, "loss": 0.2547, "step": 7205 }, { "epoch": 0.7468131412581615, "grad_norm": 0.6328192353248596, "learning_rate": 6.356405304024726e-06, "loss": 0.1839, "step": 7206 }, { "epoch": 0.7469167789408229, "grad_norm": 0.5738548636436462, "learning_rate": 6.351497249339933e-06, "loss": 0.139, "step": 7207 }, { "epoch": 0.7470204166234843, "grad_norm": 0.6084631085395813, "learning_rate": 6.3465907325471996e-06, "loss": 0.1768, "step": 7208 }, { "epoch": 0.7471240543061457, "grad_norm": 0.6383935213088989, "learning_rate": 6.341685754199405e-06, "loss": 0.1846, "step": 7209 }, { "epoch": 0.7472276919888071, "grad_norm": 0.6913857460021973, "learning_rate": 6.336782314849219e-06, "loss": 0.2225, "step": 7210 }, { "epoch": 0.7473313296714685, "grad_norm": 0.7214654684066772, "learning_rate": 6.331880415049163e-06, "loss": 0.1921, "step": 7211 }, { "epoch": 0.74743496735413, "grad_norm": 0.6223048567771912, "learning_rate": 6.326980055351581e-06, "loss": 0.1955, "step": 7212 }, { "epoch": 0.7475386050367914, "grad_norm": 0.5999380350112915, "learning_rate": 6.32208123630863e-06, "loss": 0.1961, "step": 7213 }, { "epoch": 0.7476422427194528, "grad_norm": 0.7001415491104126, "learning_rate": 6.317183958472297e-06, "loss": 0.2018, "step": 7214 }, { "epoch": 0.7477458804021142, "grad_norm": 0.5766058564186096, "learning_rate": 6.312288222394414e-06, "loss": 0.1626, "step": 7215 }, { "epoch": 0.7478495180847756, "grad_norm": 0.5628561973571777, "learning_rate": 6.307394028626605e-06, "loss": 0.1717, "step": 7216 }, { "epoch": 0.747953155767437, "grad_norm": 0.7716298699378967, "learning_rate": 6.3025013777203605e-06, "loss": 0.2195, "step": 7217 }, { "epoch": 0.7480567934500985, "grad_norm": 0.7538269758224487, "learning_rate": 6.2976102702269615e-06, "loss": 0.1996, "step": 7218 }, { "epoch": 0.7481604311327599, "grad_norm": 0.5600654482841492, "learning_rate": 6.292720706697541e-06, "loss": 0.1617, "step": 7219 }, { "epoch": 0.7482640688154213, "grad_norm": 0.6885689496994019, "learning_rate": 6.287832687683046e-06, "loss": 0.2133, "step": 7220 }, { "epoch": 0.7483677064980827, "grad_norm": 0.5769020318984985, "learning_rate": 6.28294621373424e-06, "loss": 0.1793, "step": 7221 }, { "epoch": 0.7484713441807441, "grad_norm": 0.689033567905426, "learning_rate": 6.278061285401735e-06, "loss": 0.1862, "step": 7222 }, { "epoch": 0.7485749818634055, "grad_norm": 0.6020885109901428, "learning_rate": 6.27317790323595e-06, "loss": 0.1927, "step": 7223 }, { "epoch": 0.748678619546067, "grad_norm": 0.6004186868667603, "learning_rate": 6.268296067787138e-06, "loss": 0.1753, "step": 7224 }, { "epoch": 0.7487822572287284, "grad_norm": 0.615755021572113, "learning_rate": 6.263415779605386e-06, "loss": 0.1849, "step": 7225 }, { "epoch": 0.7488858949113898, "grad_norm": 0.562086284160614, "learning_rate": 6.258537039240591e-06, "loss": 0.167, "step": 7226 }, { "epoch": 0.7489895325940512, "grad_norm": 0.6605566143989563, "learning_rate": 6.253659847242479e-06, "loss": 0.1977, "step": 7227 }, { "epoch": 0.7490931702767126, "grad_norm": 0.623592734336853, "learning_rate": 6.248784204160601e-06, "loss": 0.1716, "step": 7228 }, { "epoch": 0.749196807959374, "grad_norm": 0.545217752456665, "learning_rate": 6.2439101105443424e-06, "loss": 0.1843, "step": 7229 }, { "epoch": 0.7493004456420355, "grad_norm": 0.6777985692024231, "learning_rate": 6.239037566942914e-06, "loss": 0.2135, "step": 7230 }, { "epoch": 0.7494040833246969, "grad_norm": 0.5926411747932434, "learning_rate": 6.234166573905336e-06, "loss": 0.1834, "step": 7231 }, { "epoch": 0.7495077210073583, "grad_norm": 0.5609111189842224, "learning_rate": 6.229297131980474e-06, "loss": 0.1676, "step": 7232 }, { "epoch": 0.7496113586900197, "grad_norm": 0.6948944330215454, "learning_rate": 6.224429241717003e-06, "loss": 0.2067, "step": 7233 }, { "epoch": 0.7497149963726811, "grad_norm": 0.5710058808326721, "learning_rate": 6.219562903663425e-06, "loss": 0.18, "step": 7234 }, { "epoch": 0.7498186340553425, "grad_norm": 0.6364341974258423, "learning_rate": 6.214698118368085e-06, "loss": 0.1769, "step": 7235 }, { "epoch": 0.749922271738004, "grad_norm": 0.6758280992507935, "learning_rate": 6.209834886379123e-06, "loss": 0.1917, "step": 7236 }, { "epoch": 0.7500259094206654, "grad_norm": 0.7250998616218567, "learning_rate": 6.204973208244536e-06, "loss": 0.2122, "step": 7237 }, { "epoch": 0.7501295471033268, "grad_norm": 0.5818528532981873, "learning_rate": 6.200113084512116e-06, "loss": 0.1641, "step": 7238 }, { "epoch": 0.7502331847859882, "grad_norm": 0.7421915531158447, "learning_rate": 6.19525451572951e-06, "loss": 0.2021, "step": 7239 }, { "epoch": 0.7503368224686496, "grad_norm": 0.6421279907226562, "learning_rate": 6.1903975024441634e-06, "loss": 0.198, "step": 7240 }, { "epoch": 0.750440460151311, "grad_norm": 0.6554982662200928, "learning_rate": 6.185542045203352e-06, "loss": 0.2247, "step": 7241 }, { "epoch": 0.7505440978339725, "grad_norm": 0.5850462317466736, "learning_rate": 6.180688144554194e-06, "loss": 0.1705, "step": 7242 }, { "epoch": 0.7506477355166339, "grad_norm": 0.6490334868431091, "learning_rate": 6.175835801043608e-06, "loss": 0.1991, "step": 7243 }, { "epoch": 0.7507513731992953, "grad_norm": 0.7006539106369019, "learning_rate": 6.17098501521836e-06, "loss": 0.2035, "step": 7244 }, { "epoch": 0.7508550108819567, "grad_norm": 0.6657992601394653, "learning_rate": 6.166135787625023e-06, "loss": 0.1977, "step": 7245 }, { "epoch": 0.7509586485646181, "grad_norm": 0.619107723236084, "learning_rate": 6.161288118809994e-06, "loss": 0.1919, "step": 7246 }, { "epoch": 0.7510622862472796, "grad_norm": 0.810283899307251, "learning_rate": 6.156442009319512e-06, "loss": 0.2356, "step": 7247 }, { "epoch": 0.751165923929941, "grad_norm": 0.5877438187599182, "learning_rate": 6.151597459699621e-06, "loss": 0.1847, "step": 7248 }, { "epoch": 0.7512695616126024, "grad_norm": 0.6159684658050537, "learning_rate": 6.1467544704961965e-06, "loss": 0.1944, "step": 7249 }, { "epoch": 0.7513731992952638, "grad_norm": 0.8051716685295105, "learning_rate": 6.141913042254952e-06, "loss": 0.2004, "step": 7250 }, { "epoch": 0.7514768369779252, "grad_norm": 0.7064564228057861, "learning_rate": 6.137073175521402e-06, "loss": 0.2126, "step": 7251 }, { "epoch": 0.7515804746605866, "grad_norm": 0.6129339933395386, "learning_rate": 6.132234870840899e-06, "loss": 0.1987, "step": 7252 }, { "epoch": 0.7516841123432481, "grad_norm": 0.5935209393501282, "learning_rate": 6.1273981287586035e-06, "loss": 0.1574, "step": 7253 }, { "epoch": 0.7517877500259095, "grad_norm": 0.7329012751579285, "learning_rate": 6.122562949819522e-06, "loss": 0.2455, "step": 7254 }, { "epoch": 0.7518913877085709, "grad_norm": 0.6233592629432678, "learning_rate": 6.117729334568481e-06, "loss": 0.205, "step": 7255 }, { "epoch": 0.7519950253912322, "grad_norm": 0.5638620257377625, "learning_rate": 6.112897283550112e-06, "loss": 0.1628, "step": 7256 }, { "epoch": 0.7520986630738936, "grad_norm": 0.6088778376579285, "learning_rate": 6.108066797308896e-06, "loss": 0.1974, "step": 7257 }, { "epoch": 0.752202300756555, "grad_norm": 0.7371406555175781, "learning_rate": 6.103237876389117e-06, "loss": 0.1761, "step": 7258 }, { "epoch": 0.7523059384392164, "grad_norm": 0.5529123544692993, "learning_rate": 6.098410521334883e-06, "loss": 0.163, "step": 7259 }, { "epoch": 0.7524095761218779, "grad_norm": 0.6525443196296692, "learning_rate": 6.09358473269015e-06, "loss": 0.2138, "step": 7260 }, { "epoch": 0.7525132138045393, "grad_norm": 0.669493556022644, "learning_rate": 6.088760510998664e-06, "loss": 0.2048, "step": 7261 }, { "epoch": 0.7526168514872007, "grad_norm": 0.6533212661743164, "learning_rate": 6.083937856804025e-06, "loss": 0.1936, "step": 7262 }, { "epoch": 0.7527204891698621, "grad_norm": 0.6517473459243774, "learning_rate": 6.079116770649629e-06, "loss": 0.198, "step": 7263 }, { "epoch": 0.7528241268525235, "grad_norm": 0.724342942237854, "learning_rate": 6.074297253078723e-06, "loss": 0.2223, "step": 7264 }, { "epoch": 0.752927764535185, "grad_norm": 0.6567179560661316, "learning_rate": 6.069479304634352e-06, "loss": 0.1875, "step": 7265 }, { "epoch": 0.7530314022178464, "grad_norm": 0.7255767583847046, "learning_rate": 6.06466292585939e-06, "loss": 0.2116, "step": 7266 }, { "epoch": 0.7531350399005078, "grad_norm": 0.6283934712409973, "learning_rate": 6.059848117296556e-06, "loss": 0.1815, "step": 7267 }, { "epoch": 0.7532386775831692, "grad_norm": 0.7235457897186279, "learning_rate": 6.05503487948836e-06, "loss": 0.1972, "step": 7268 }, { "epoch": 0.7533423152658306, "grad_norm": 0.5651537179946899, "learning_rate": 6.050223212977153e-06, "loss": 0.161, "step": 7269 }, { "epoch": 0.753445952948492, "grad_norm": 0.7023213505744934, "learning_rate": 6.045413118305123e-06, "loss": 0.2197, "step": 7270 }, { "epoch": 0.7535495906311535, "grad_norm": 0.7070270776748657, "learning_rate": 6.0406045960142365e-06, "loss": 0.2125, "step": 7271 }, { "epoch": 0.7536532283138149, "grad_norm": 0.6896175146102905, "learning_rate": 6.035797646646331e-06, "loss": 0.2296, "step": 7272 }, { "epoch": 0.7537568659964763, "grad_norm": 0.705349326133728, "learning_rate": 6.030992270743032e-06, "loss": 0.2146, "step": 7273 }, { "epoch": 0.7538605036791377, "grad_norm": 0.49893879890441895, "learning_rate": 6.026188468845811e-06, "loss": 0.1506, "step": 7274 }, { "epoch": 0.7539641413617991, "grad_norm": 0.5738864541053772, "learning_rate": 6.021386241495955e-06, "loss": 0.1677, "step": 7275 }, { "epoch": 0.7540677790444605, "grad_norm": 0.6488167643547058, "learning_rate": 6.016585589234567e-06, "loss": 0.2096, "step": 7276 }, { "epoch": 0.754171416727122, "grad_norm": 0.6203997731208801, "learning_rate": 6.011786512602576e-06, "loss": 0.1919, "step": 7277 }, { "epoch": 0.7542750544097834, "grad_norm": 0.578915536403656, "learning_rate": 6.006989012140732e-06, "loss": 0.1701, "step": 7278 }, { "epoch": 0.7543786920924448, "grad_norm": 0.5318428874015808, "learning_rate": 6.002193088389612e-06, "loss": 0.1654, "step": 7279 }, { "epoch": 0.7544823297751062, "grad_norm": 0.5900943875312805, "learning_rate": 5.997398741889619e-06, "loss": 0.1857, "step": 7280 }, { "epoch": 0.7545859674577676, "grad_norm": 0.6281424760818481, "learning_rate": 5.992605973180965e-06, "loss": 0.192, "step": 7281 }, { "epoch": 0.754689605140429, "grad_norm": 0.6875169277191162, "learning_rate": 5.987814782803702e-06, "loss": 0.2051, "step": 7282 }, { "epoch": 0.7547932428230905, "grad_norm": 0.7132881879806519, "learning_rate": 5.983025171297685e-06, "loss": 0.2304, "step": 7283 }, { "epoch": 0.7548968805057519, "grad_norm": 0.7771185636520386, "learning_rate": 5.978237139202596e-06, "loss": 0.2364, "step": 7284 }, { "epoch": 0.7550005181884133, "grad_norm": 0.637414276599884, "learning_rate": 5.973450687057956e-06, "loss": 0.2289, "step": 7285 }, { "epoch": 0.7551041558710747, "grad_norm": 0.6778030395507812, "learning_rate": 5.9686658154030804e-06, "loss": 0.1916, "step": 7286 }, { "epoch": 0.7552077935537361, "grad_norm": 0.5655515789985657, "learning_rate": 5.963882524777136e-06, "loss": 0.1663, "step": 7287 }, { "epoch": 0.7553114312363975, "grad_norm": 0.6484770774841309, "learning_rate": 5.959100815719083e-06, "loss": 0.1996, "step": 7288 }, { "epoch": 0.755415068919059, "grad_norm": 0.6523261070251465, "learning_rate": 5.954320688767727e-06, "loss": 0.1783, "step": 7289 }, { "epoch": 0.7555187066017204, "grad_norm": 0.706913948059082, "learning_rate": 5.949542144461684e-06, "loss": 0.2012, "step": 7290 }, { "epoch": 0.7556223442843818, "grad_norm": 0.7174582481384277, "learning_rate": 5.944765183339383e-06, "loss": 0.1925, "step": 7291 }, { "epoch": 0.7557259819670432, "grad_norm": 0.6907774209976196, "learning_rate": 5.9399898059390996e-06, "loss": 0.2149, "step": 7292 }, { "epoch": 0.7558296196497046, "grad_norm": 0.7346753478050232, "learning_rate": 5.935216012798899e-06, "loss": 0.2104, "step": 7293 }, { "epoch": 0.755933257332366, "grad_norm": 0.6349897980690002, "learning_rate": 5.930443804456696e-06, "loss": 0.1915, "step": 7294 }, { "epoch": 0.7560368950150275, "grad_norm": 0.5483682155609131, "learning_rate": 5.925673181450217e-06, "loss": 0.1488, "step": 7295 }, { "epoch": 0.7561405326976889, "grad_norm": 0.6724481582641602, "learning_rate": 5.920904144317008e-06, "loss": 0.2061, "step": 7296 }, { "epoch": 0.7562441703803503, "grad_norm": 0.6132513284683228, "learning_rate": 5.916136693594434e-06, "loss": 0.1948, "step": 7297 }, { "epoch": 0.7563478080630117, "grad_norm": 0.5399127006530762, "learning_rate": 5.911370829819676e-06, "loss": 0.1638, "step": 7298 }, { "epoch": 0.7564514457456731, "grad_norm": 0.6391360759735107, "learning_rate": 5.906606553529752e-06, "loss": 0.1918, "step": 7299 }, { "epoch": 0.7565550834283346, "grad_norm": 0.6599064469337463, "learning_rate": 5.901843865261499e-06, "loss": 0.1923, "step": 7300 }, { "epoch": 0.756658721110996, "grad_norm": 0.7922466993331909, "learning_rate": 5.897082765551556e-06, "loss": 0.2055, "step": 7301 }, { "epoch": 0.7567623587936574, "grad_norm": 0.6952490210533142, "learning_rate": 5.892323254936419e-06, "loss": 0.2004, "step": 7302 }, { "epoch": 0.7568659964763188, "grad_norm": 0.6422750949859619, "learning_rate": 5.887565333952352e-06, "loss": 0.1778, "step": 7303 }, { "epoch": 0.7569696341589802, "grad_norm": 0.6436856985092163, "learning_rate": 5.882809003135486e-06, "loss": 0.1764, "step": 7304 }, { "epoch": 0.7570732718416416, "grad_norm": 0.6292835474014282, "learning_rate": 5.87805426302176e-06, "loss": 0.1754, "step": 7305 }, { "epoch": 0.757176909524303, "grad_norm": 0.7755428552627563, "learning_rate": 5.8733011141469236e-06, "loss": 0.2428, "step": 7306 }, { "epoch": 0.7572805472069645, "grad_norm": 0.6775556802749634, "learning_rate": 5.868549557046561e-06, "loss": 0.1965, "step": 7307 }, { "epoch": 0.7573841848896259, "grad_norm": 0.7172718048095703, "learning_rate": 5.863799592256067e-06, "loss": 0.1977, "step": 7308 }, { "epoch": 0.7574878225722873, "grad_norm": 0.5608749985694885, "learning_rate": 5.8590512203106544e-06, "loss": 0.1709, "step": 7309 }, { "epoch": 0.7575914602549487, "grad_norm": 0.6475331783294678, "learning_rate": 5.854304441745373e-06, "loss": 0.2118, "step": 7310 }, { "epoch": 0.7576950979376101, "grad_norm": 0.6698943376541138, "learning_rate": 5.8495592570950724e-06, "loss": 0.204, "step": 7311 }, { "epoch": 0.7577987356202716, "grad_norm": 0.6197548508644104, "learning_rate": 5.844815666894443e-06, "loss": 0.1903, "step": 7312 }, { "epoch": 0.757902373302933, "grad_norm": 0.6404499411582947, "learning_rate": 5.840073671677973e-06, "loss": 0.1973, "step": 7313 }, { "epoch": 0.7580060109855944, "grad_norm": 0.6370285749435425, "learning_rate": 5.835333271979995e-06, "loss": 0.1978, "step": 7314 }, { "epoch": 0.7581096486682558, "grad_norm": 0.6697351336479187, "learning_rate": 5.830594468334647e-06, "loss": 0.2058, "step": 7315 }, { "epoch": 0.7582132863509172, "grad_norm": 0.6191688179969788, "learning_rate": 5.82585726127588e-06, "loss": 0.1983, "step": 7316 }, { "epoch": 0.7583169240335786, "grad_norm": 0.6752298474311829, "learning_rate": 5.821121651337489e-06, "loss": 0.1862, "step": 7317 }, { "epoch": 0.7584205617162401, "grad_norm": 0.6014376878738403, "learning_rate": 5.81638763905306e-06, "loss": 0.1798, "step": 7318 }, { "epoch": 0.7585241993989015, "grad_norm": 0.7816725373268127, "learning_rate": 5.8116552249560274e-06, "loss": 0.24, "step": 7319 }, { "epoch": 0.7586278370815629, "grad_norm": 0.6555365324020386, "learning_rate": 5.806924409579631e-06, "loss": 0.207, "step": 7320 }, { "epoch": 0.7587314747642243, "grad_norm": 0.6532207727432251, "learning_rate": 5.80219519345693e-06, "loss": 0.1918, "step": 7321 }, { "epoch": 0.7588351124468857, "grad_norm": 0.7684751152992249, "learning_rate": 5.797467577120803e-06, "loss": 0.2297, "step": 7322 }, { "epoch": 0.7589387501295471, "grad_norm": 0.6303255558013916, "learning_rate": 5.792741561103945e-06, "loss": 0.1956, "step": 7323 }, { "epoch": 0.7590423878122086, "grad_norm": 0.5961942076683044, "learning_rate": 5.788017145938882e-06, "loss": 0.1808, "step": 7324 }, { "epoch": 0.75914602549487, "grad_norm": 0.6447713375091553, "learning_rate": 5.783294332157963e-06, "loss": 0.2017, "step": 7325 }, { "epoch": 0.7592496631775314, "grad_norm": 0.725047767162323, "learning_rate": 5.778573120293329e-06, "loss": 0.2063, "step": 7326 }, { "epoch": 0.7593533008601928, "grad_norm": 0.5291570425033569, "learning_rate": 5.773853510876975e-06, "loss": 0.1796, "step": 7327 }, { "epoch": 0.7594569385428542, "grad_norm": 0.5901037454605103, "learning_rate": 5.769135504440693e-06, "loss": 0.1886, "step": 7328 }, { "epoch": 0.7595605762255156, "grad_norm": 0.6665926575660706, "learning_rate": 5.764419101516095e-06, "loss": 0.2019, "step": 7329 }, { "epoch": 0.7596642139081771, "grad_norm": 0.6632572412490845, "learning_rate": 5.759704302634626e-06, "loss": 0.2079, "step": 7330 }, { "epoch": 0.7597678515908385, "grad_norm": 0.5482375025749207, "learning_rate": 5.754991108327534e-06, "loss": 0.1617, "step": 7331 }, { "epoch": 0.7598714892734998, "grad_norm": 0.6653674840927124, "learning_rate": 5.750279519125908e-06, "loss": 0.187, "step": 7332 }, { "epoch": 0.7599751269561612, "grad_norm": 0.562900185585022, "learning_rate": 5.7455695355606314e-06, "loss": 0.1761, "step": 7333 }, { "epoch": 0.7600787646388226, "grad_norm": 0.6961675882339478, "learning_rate": 5.740861158162416e-06, "loss": 0.2146, "step": 7334 }, { "epoch": 0.760182402321484, "grad_norm": 0.5686554312705994, "learning_rate": 5.736154387461805e-06, "loss": 0.1669, "step": 7335 }, { "epoch": 0.7602860400041455, "grad_norm": 0.6474700570106506, "learning_rate": 5.731449223989138e-06, "loss": 0.2088, "step": 7336 }, { "epoch": 0.7603896776868069, "grad_norm": 0.6459776759147644, "learning_rate": 5.726745668274598e-06, "loss": 0.2023, "step": 7337 }, { "epoch": 0.7604933153694683, "grad_norm": 0.7052670121192932, "learning_rate": 5.72204372084816e-06, "loss": 0.2519, "step": 7338 }, { "epoch": 0.7605969530521297, "grad_norm": 0.5499739646911621, "learning_rate": 5.717343382239649e-06, "loss": 0.1594, "step": 7339 }, { "epoch": 0.7607005907347911, "grad_norm": 0.6012383103370667, "learning_rate": 5.7126446529786804e-06, "loss": 0.1812, "step": 7340 }, { "epoch": 0.7608042284174525, "grad_norm": 0.7203385829925537, "learning_rate": 5.707947533594698e-06, "loss": 0.2274, "step": 7341 }, { "epoch": 0.760907866100114, "grad_norm": 0.6299017667770386, "learning_rate": 5.703252024616974e-06, "loss": 0.1962, "step": 7342 }, { "epoch": 0.7610115037827754, "grad_norm": 0.6651351451873779, "learning_rate": 5.698558126574583e-06, "loss": 0.2004, "step": 7343 }, { "epoch": 0.7611151414654368, "grad_norm": 0.6428758502006531, "learning_rate": 5.6938658399964285e-06, "loss": 0.1912, "step": 7344 }, { "epoch": 0.7612187791480982, "grad_norm": 0.5440841913223267, "learning_rate": 5.6891751654112405e-06, "loss": 0.1646, "step": 7345 }, { "epoch": 0.7613224168307596, "grad_norm": 0.5122158527374268, "learning_rate": 5.6844861033475466e-06, "loss": 0.1455, "step": 7346 }, { "epoch": 0.761426054513421, "grad_norm": 0.5525471568107605, "learning_rate": 5.679798654333704e-06, "loss": 0.1597, "step": 7347 }, { "epoch": 0.7615296921960825, "grad_norm": 0.7224408388137817, "learning_rate": 5.6751128188978835e-06, "loss": 0.2429, "step": 7348 }, { "epoch": 0.7616333298787439, "grad_norm": 0.6339262127876282, "learning_rate": 5.670428597568081e-06, "loss": 0.2082, "step": 7349 }, { "epoch": 0.7617369675614053, "grad_norm": 0.6057512164115906, "learning_rate": 5.665745990872114e-06, "loss": 0.1794, "step": 7350 }, { "epoch": 0.7618406052440667, "grad_norm": 0.7479916214942932, "learning_rate": 5.6610649993376e-06, "loss": 0.2254, "step": 7351 }, { "epoch": 0.7619442429267281, "grad_norm": 0.5429776310920715, "learning_rate": 5.656385623491998e-06, "loss": 0.1486, "step": 7352 }, { "epoch": 0.7620478806093895, "grad_norm": 0.5628483891487122, "learning_rate": 5.651707863862566e-06, "loss": 0.1827, "step": 7353 }, { "epoch": 0.762151518292051, "grad_norm": 0.6502082943916321, "learning_rate": 5.647031720976382e-06, "loss": 0.1867, "step": 7354 }, { "epoch": 0.7622551559747124, "grad_norm": 0.6297394633293152, "learning_rate": 5.642357195360355e-06, "loss": 0.2034, "step": 7355 }, { "epoch": 0.7623587936573738, "grad_norm": 0.6706293821334839, "learning_rate": 5.637684287541196e-06, "loss": 0.195, "step": 7356 }, { "epoch": 0.7624624313400352, "grad_norm": 0.6522697806358337, "learning_rate": 5.633012998045451e-06, "loss": 0.2099, "step": 7357 }, { "epoch": 0.7625660690226966, "grad_norm": 0.6541911959648132, "learning_rate": 5.628343327399462e-06, "loss": 0.1754, "step": 7358 }, { "epoch": 0.762669706705358, "grad_norm": 0.6433303952217102, "learning_rate": 5.62367527612941e-06, "loss": 0.1875, "step": 7359 }, { "epoch": 0.7627733443880195, "grad_norm": 0.6429468393325806, "learning_rate": 5.61900884476128e-06, "loss": 0.1947, "step": 7360 }, { "epoch": 0.7628769820706809, "grad_norm": 0.6449114084243774, "learning_rate": 5.614344033820871e-06, "loss": 0.1945, "step": 7361 }, { "epoch": 0.7629806197533423, "grad_norm": 0.6627293825149536, "learning_rate": 5.60968084383382e-06, "loss": 0.2207, "step": 7362 }, { "epoch": 0.7630842574360037, "grad_norm": 0.7052158713340759, "learning_rate": 5.6050192753255565e-06, "loss": 0.2095, "step": 7363 }, { "epoch": 0.7631878951186651, "grad_norm": 0.6348506212234497, "learning_rate": 5.600359328821341e-06, "loss": 0.19, "step": 7364 }, { "epoch": 0.7632915328013266, "grad_norm": 0.5358766317367554, "learning_rate": 5.595701004846266e-06, "loss": 0.1686, "step": 7365 }, { "epoch": 0.763395170483988, "grad_norm": 0.7228176593780518, "learning_rate": 5.591044303925197e-06, "loss": 0.2052, "step": 7366 }, { "epoch": 0.7634988081666494, "grad_norm": 0.5297076106071472, "learning_rate": 5.586389226582862e-06, "loss": 0.1641, "step": 7367 }, { "epoch": 0.7636024458493108, "grad_norm": 0.6142310500144958, "learning_rate": 5.581735773343777e-06, "loss": 0.172, "step": 7368 }, { "epoch": 0.7637060835319722, "grad_norm": 0.5850446224212646, "learning_rate": 5.5770839447322936e-06, "loss": 0.1708, "step": 7369 }, { "epoch": 0.7638097212146336, "grad_norm": 0.6489421725273132, "learning_rate": 5.572433741272574e-06, "loss": 0.1959, "step": 7370 }, { "epoch": 0.7639133588972951, "grad_norm": 0.7610423564910889, "learning_rate": 5.567785163488592e-06, "loss": 0.2226, "step": 7371 }, { "epoch": 0.7640169965799565, "grad_norm": 0.5759432911872864, "learning_rate": 5.563138211904144e-06, "loss": 0.1486, "step": 7372 }, { "epoch": 0.7641206342626179, "grad_norm": 0.6531688570976257, "learning_rate": 5.558492887042832e-06, "loss": 0.1952, "step": 7373 }, { "epoch": 0.7642242719452793, "grad_norm": 0.6584306955337524, "learning_rate": 5.5538491894280935e-06, "loss": 0.1974, "step": 7374 }, { "epoch": 0.7643279096279407, "grad_norm": 0.6911411881446838, "learning_rate": 5.549207119583177e-06, "loss": 0.209, "step": 7375 }, { "epoch": 0.7644315473106021, "grad_norm": 0.711522102355957, "learning_rate": 5.544566678031132e-06, "loss": 0.2089, "step": 7376 }, { "epoch": 0.7645351849932636, "grad_norm": 0.6881641745567322, "learning_rate": 5.539927865294848e-06, "loss": 0.2114, "step": 7377 }, { "epoch": 0.764638822675925, "grad_norm": 0.5821783542633057, "learning_rate": 5.535290681897014e-06, "loss": 0.1677, "step": 7378 }, { "epoch": 0.7647424603585864, "grad_norm": 0.5881667733192444, "learning_rate": 5.530655128360134e-06, "loss": 0.1903, "step": 7379 }, { "epoch": 0.7648460980412478, "grad_norm": 0.6320826411247253, "learning_rate": 5.526021205206546e-06, "loss": 0.209, "step": 7380 }, { "epoch": 0.7649497357239092, "grad_norm": 0.619613528251648, "learning_rate": 5.521388912958383e-06, "loss": 0.1741, "step": 7381 }, { "epoch": 0.7650533734065706, "grad_norm": 0.7030311226844788, "learning_rate": 5.5167582521376175e-06, "loss": 0.1982, "step": 7382 }, { "epoch": 0.7651570110892321, "grad_norm": 0.6498759388923645, "learning_rate": 5.51212922326601e-06, "loss": 0.1697, "step": 7383 }, { "epoch": 0.7652606487718935, "grad_norm": 0.5602517127990723, "learning_rate": 5.507501826865164e-06, "loss": 0.1753, "step": 7384 }, { "epoch": 0.7653642864545549, "grad_norm": 0.6065822839736938, "learning_rate": 5.502876063456486e-06, "loss": 0.1891, "step": 7385 }, { "epoch": 0.7654679241372163, "grad_norm": 0.5793746113777161, "learning_rate": 5.498251933561189e-06, "loss": 0.1856, "step": 7386 }, { "epoch": 0.7655715618198777, "grad_norm": 0.6203474998474121, "learning_rate": 5.4936294377003296e-06, "loss": 0.1801, "step": 7387 }, { "epoch": 0.7656751995025391, "grad_norm": 0.6482757925987244, "learning_rate": 5.489008576394745e-06, "loss": 0.1831, "step": 7388 }, { "epoch": 0.7657788371852006, "grad_norm": 0.7696741819381714, "learning_rate": 5.484389350165118e-06, "loss": 0.2658, "step": 7389 }, { "epoch": 0.765882474867862, "grad_norm": 0.6601531505584717, "learning_rate": 5.47977175953194e-06, "loss": 0.1854, "step": 7390 }, { "epoch": 0.7659861125505234, "grad_norm": 0.5935264229774475, "learning_rate": 5.4751558050155085e-06, "loss": 0.1804, "step": 7391 }, { "epoch": 0.7660897502331848, "grad_norm": 0.5694248676300049, "learning_rate": 5.470541487135941e-06, "loss": 0.1706, "step": 7392 }, { "epoch": 0.7661933879158462, "grad_norm": 0.70427405834198, "learning_rate": 5.465928806413166e-06, "loss": 0.1906, "step": 7393 }, { "epoch": 0.7662970255985077, "grad_norm": 0.6865828633308411, "learning_rate": 5.4613177633669405e-06, "loss": 0.2231, "step": 7394 }, { "epoch": 0.7664006632811691, "grad_norm": 0.6764995455741882, "learning_rate": 5.456708358516833e-06, "loss": 0.2044, "step": 7395 }, { "epoch": 0.7665043009638305, "grad_norm": 0.6275007724761963, "learning_rate": 5.452100592382221e-06, "loss": 0.1497, "step": 7396 }, { "epoch": 0.7666079386464919, "grad_norm": 0.662211537361145, "learning_rate": 5.447494465482299e-06, "loss": 0.1907, "step": 7397 }, { "epoch": 0.7667115763291533, "grad_norm": 0.7616379261016846, "learning_rate": 5.442889978336072e-06, "loss": 0.1956, "step": 7398 }, { "epoch": 0.7668152140118147, "grad_norm": 0.7183577418327332, "learning_rate": 5.438287131462372e-06, "loss": 0.2269, "step": 7399 }, { "epoch": 0.7669188516944762, "grad_norm": 0.597819983959198, "learning_rate": 5.433685925379848e-06, "loss": 0.154, "step": 7400 }, { "epoch": 0.7670224893771376, "grad_norm": 0.6873559951782227, "learning_rate": 5.429086360606946e-06, "loss": 0.1902, "step": 7401 }, { "epoch": 0.767126127059799, "grad_norm": 0.6474601626396179, "learning_rate": 5.424488437661946e-06, "loss": 0.2061, "step": 7402 }, { "epoch": 0.7672297647424604, "grad_norm": 0.6201249361038208, "learning_rate": 5.419892157062929e-06, "loss": 0.2112, "step": 7403 }, { "epoch": 0.7673334024251218, "grad_norm": 0.7287440896034241, "learning_rate": 5.4152975193277955e-06, "loss": 0.2156, "step": 7404 }, { "epoch": 0.7674370401077832, "grad_norm": 0.6518657803535461, "learning_rate": 5.4107045249742705e-06, "loss": 0.1786, "step": 7405 }, { "epoch": 0.7675406777904447, "grad_norm": 1.0298590660095215, "learning_rate": 5.406113174519874e-06, "loss": 0.2331, "step": 7406 }, { "epoch": 0.7676443154731061, "grad_norm": 0.6306723952293396, "learning_rate": 5.401523468481957e-06, "loss": 0.1963, "step": 7407 }, { "epoch": 0.7677479531557674, "grad_norm": 0.662980854511261, "learning_rate": 5.3969354073776905e-06, "loss": 0.213, "step": 7408 }, { "epoch": 0.7678515908384288, "grad_norm": 0.5138009190559387, "learning_rate": 5.392348991724039e-06, "loss": 0.1588, "step": 7409 }, { "epoch": 0.7679552285210902, "grad_norm": 0.6134780645370483, "learning_rate": 5.387764222037797e-06, "loss": 0.162, "step": 7410 }, { "epoch": 0.7680588662037516, "grad_norm": 0.6860098838806152, "learning_rate": 5.383181098835559e-06, "loss": 0.1942, "step": 7411 }, { "epoch": 0.768162503886413, "grad_norm": 0.7752522230148315, "learning_rate": 5.378599622633754e-06, "loss": 0.27, "step": 7412 }, { "epoch": 0.7682661415690745, "grad_norm": 0.7609741687774658, "learning_rate": 5.374019793948619e-06, "loss": 0.2123, "step": 7413 }, { "epoch": 0.7683697792517359, "grad_norm": 0.567396879196167, "learning_rate": 5.369441613296191e-06, "loss": 0.1608, "step": 7414 }, { "epoch": 0.7684734169343973, "grad_norm": 0.6172008514404297, "learning_rate": 5.3648650811923476e-06, "loss": 0.1783, "step": 7415 }, { "epoch": 0.7685770546170587, "grad_norm": 0.6998493075370789, "learning_rate": 5.3602901981527535e-06, "loss": 0.2033, "step": 7416 }, { "epoch": 0.7686806922997201, "grad_norm": 0.6385000348091125, "learning_rate": 5.355716964692896e-06, "loss": 0.1916, "step": 7417 }, { "epoch": 0.7687843299823816, "grad_norm": 0.6977570652961731, "learning_rate": 5.351145381328091e-06, "loss": 0.1962, "step": 7418 }, { "epoch": 0.768887967665043, "grad_norm": 0.8362807631492615, "learning_rate": 5.346575448573448e-06, "loss": 0.2188, "step": 7419 }, { "epoch": 0.7689916053477044, "grad_norm": 0.6531651020050049, "learning_rate": 5.34200716694391e-06, "loss": 0.2103, "step": 7420 }, { "epoch": 0.7690952430303658, "grad_norm": 0.7263716459274292, "learning_rate": 5.337440536954213e-06, "loss": 0.224, "step": 7421 }, { "epoch": 0.7691988807130272, "grad_norm": 0.6424431204795837, "learning_rate": 5.332875559118926e-06, "loss": 0.1877, "step": 7422 }, { "epoch": 0.7693025183956886, "grad_norm": 0.5936616063117981, "learning_rate": 5.328312233952424e-06, "loss": 0.1721, "step": 7423 }, { "epoch": 0.7694061560783501, "grad_norm": 0.6746377348899841, "learning_rate": 5.323750561968883e-06, "loss": 0.1984, "step": 7424 }, { "epoch": 0.7695097937610115, "grad_norm": 0.6120424866676331, "learning_rate": 5.319190543682322e-06, "loss": 0.1611, "step": 7425 }, { "epoch": 0.7696134314436729, "grad_norm": 0.6708462834358215, "learning_rate": 5.314632179606542e-06, "loss": 0.1872, "step": 7426 }, { "epoch": 0.7697170691263343, "grad_norm": 0.5981742739677429, "learning_rate": 5.31007547025518e-06, "loss": 0.2012, "step": 7427 }, { "epoch": 0.7698207068089957, "grad_norm": 0.6955262422561646, "learning_rate": 5.30552041614169e-06, "loss": 0.2084, "step": 7428 }, { "epoch": 0.7699243444916571, "grad_norm": 0.6701587438583374, "learning_rate": 5.300967017779304e-06, "loss": 0.2103, "step": 7429 }, { "epoch": 0.7700279821743186, "grad_norm": 0.620948851108551, "learning_rate": 5.296415275681108e-06, "loss": 0.188, "step": 7430 }, { "epoch": 0.77013161985698, "grad_norm": 0.578031599521637, "learning_rate": 5.291865190359979e-06, "loss": 0.1507, "step": 7431 }, { "epoch": 0.7702352575396414, "grad_norm": 0.6619542837142944, "learning_rate": 5.287316762328614e-06, "loss": 0.1747, "step": 7432 }, { "epoch": 0.7703388952223028, "grad_norm": 0.6650355458259583, "learning_rate": 5.282769992099532e-06, "loss": 0.1927, "step": 7433 }, { "epoch": 0.7704425329049642, "grad_norm": 0.6092379689216614, "learning_rate": 5.27822488018505e-06, "loss": 0.1978, "step": 7434 }, { "epoch": 0.7705461705876256, "grad_norm": 0.6268170475959778, "learning_rate": 5.273681427097302e-06, "loss": 0.1838, "step": 7435 }, { "epoch": 0.7706498082702871, "grad_norm": 0.6930217146873474, "learning_rate": 5.269139633348231e-06, "loss": 0.232, "step": 7436 }, { "epoch": 0.7707534459529485, "grad_norm": 0.6758466958999634, "learning_rate": 5.264599499449607e-06, "loss": 0.1875, "step": 7437 }, { "epoch": 0.7708570836356099, "grad_norm": 0.5889658331871033, "learning_rate": 5.260061025913013e-06, "loss": 0.1626, "step": 7438 }, { "epoch": 0.7709607213182713, "grad_norm": 0.6119863390922546, "learning_rate": 5.255524213249821e-06, "loss": 0.1937, "step": 7439 }, { "epoch": 0.7710643590009327, "grad_norm": 0.6818948984146118, "learning_rate": 5.2509890619712455e-06, "loss": 0.198, "step": 7440 }, { "epoch": 0.7711679966835941, "grad_norm": 0.597781777381897, "learning_rate": 5.246455572588296e-06, "loss": 0.1981, "step": 7441 }, { "epoch": 0.7712716343662556, "grad_norm": 0.6981128454208374, "learning_rate": 5.24192374561179e-06, "loss": 0.2089, "step": 7442 }, { "epoch": 0.771375272048917, "grad_norm": 0.6379117965698242, "learning_rate": 5.237393581552381e-06, "loss": 0.171, "step": 7443 }, { "epoch": 0.7714789097315784, "grad_norm": 0.6575020551681519, "learning_rate": 5.23286508092051e-06, "loss": 0.1762, "step": 7444 }, { "epoch": 0.7715825474142398, "grad_norm": 0.7529862523078918, "learning_rate": 5.22833824422645e-06, "loss": 0.2192, "step": 7445 }, { "epoch": 0.7716861850969012, "grad_norm": 0.7869521975517273, "learning_rate": 5.223813071980268e-06, "loss": 0.2304, "step": 7446 }, { "epoch": 0.7717898227795627, "grad_norm": 0.6265051960945129, "learning_rate": 5.219289564691865e-06, "loss": 0.1852, "step": 7447 }, { "epoch": 0.7718934604622241, "grad_norm": 0.6529081463813782, "learning_rate": 5.21476772287094e-06, "loss": 0.1881, "step": 7448 }, { "epoch": 0.7719970981448855, "grad_norm": 0.5538001656532288, "learning_rate": 5.210247547026994e-06, "loss": 0.1643, "step": 7449 }, { "epoch": 0.7721007358275469, "grad_norm": 0.6321317553520203, "learning_rate": 5.205729037669369e-06, "loss": 0.1948, "step": 7450 }, { "epoch": 0.7722043735102083, "grad_norm": 0.7393016815185547, "learning_rate": 5.201212195307195e-06, "loss": 0.2235, "step": 7451 }, { "epoch": 0.7723080111928697, "grad_norm": 0.7127220034599304, "learning_rate": 5.196697020449422e-06, "loss": 0.1997, "step": 7452 }, { "epoch": 0.7724116488755312, "grad_norm": 0.6728219389915466, "learning_rate": 5.1921835136048246e-06, "loss": 0.1959, "step": 7453 }, { "epoch": 0.7725152865581926, "grad_norm": 0.6634606719017029, "learning_rate": 5.187671675281969e-06, "loss": 0.1965, "step": 7454 }, { "epoch": 0.772618924240854, "grad_norm": 0.6769964694976807, "learning_rate": 5.183161505989245e-06, "loss": 0.2009, "step": 7455 }, { "epoch": 0.7727225619235154, "grad_norm": 0.7333029508590698, "learning_rate": 5.178653006234842e-06, "loss": 0.2321, "step": 7456 }, { "epoch": 0.7728261996061768, "grad_norm": 0.6659340858459473, "learning_rate": 5.174146176526777e-06, "loss": 0.2039, "step": 7457 }, { "epoch": 0.7729298372888382, "grad_norm": 0.5503813028335571, "learning_rate": 5.16964101737288e-06, "loss": 0.1678, "step": 7458 }, { "epoch": 0.7730334749714997, "grad_norm": 0.7269548177719116, "learning_rate": 5.165137529280773e-06, "loss": 0.2382, "step": 7459 }, { "epoch": 0.7731371126541611, "grad_norm": 0.5385704040527344, "learning_rate": 5.16063571275792e-06, "loss": 0.1917, "step": 7460 }, { "epoch": 0.7732407503368225, "grad_norm": 0.5735783576965332, "learning_rate": 5.1561355683115556e-06, "loss": 0.1565, "step": 7461 }, { "epoch": 0.7733443880194839, "grad_norm": 0.5853539705276489, "learning_rate": 5.151637096448759e-06, "loss": 0.1584, "step": 7462 }, { "epoch": 0.7734480257021453, "grad_norm": 0.7648028135299683, "learning_rate": 5.147140297676419e-06, "loss": 0.2353, "step": 7463 }, { "epoch": 0.7735516633848067, "grad_norm": 0.6179225444793701, "learning_rate": 5.142645172501213e-06, "loss": 0.1658, "step": 7464 }, { "epoch": 0.7736553010674682, "grad_norm": 0.6266987323760986, "learning_rate": 5.138151721429661e-06, "loss": 0.1843, "step": 7465 }, { "epoch": 0.7737589387501296, "grad_norm": 0.6973028779029846, "learning_rate": 5.133659944968068e-06, "loss": 0.1981, "step": 7466 }, { "epoch": 0.773862576432791, "grad_norm": 0.6442803740501404, "learning_rate": 5.129169843622559e-06, "loss": 0.1957, "step": 7467 }, { "epoch": 0.7739662141154524, "grad_norm": 0.6411221027374268, "learning_rate": 5.124681417899078e-06, "loss": 0.1768, "step": 7468 }, { "epoch": 0.7740698517981138, "grad_norm": 0.5441047549247742, "learning_rate": 5.120194668303367e-06, "loss": 0.149, "step": 7469 }, { "epoch": 0.7741734894807752, "grad_norm": 0.6542971730232239, "learning_rate": 5.1157095953409945e-06, "loss": 0.1912, "step": 7470 }, { "epoch": 0.7742771271634367, "grad_norm": 0.6243970394134521, "learning_rate": 5.1112261995173204e-06, "loss": 0.1683, "step": 7471 }, { "epoch": 0.7743807648460981, "grad_norm": 0.6158422231674194, "learning_rate": 5.106744481337538e-06, "loss": 0.1964, "step": 7472 }, { "epoch": 0.7744844025287595, "grad_norm": 0.6128589510917664, "learning_rate": 5.102264441306637e-06, "loss": 0.1793, "step": 7473 }, { "epoch": 0.7745880402114209, "grad_norm": 0.7475075721740723, "learning_rate": 5.097786079929414e-06, "loss": 0.2269, "step": 7474 }, { "epoch": 0.7746916778940823, "grad_norm": 0.639706015586853, "learning_rate": 5.093309397710496e-06, "loss": 0.1853, "step": 7475 }, { "epoch": 0.7747953155767437, "grad_norm": 0.656455397605896, "learning_rate": 5.088834395154294e-06, "loss": 0.2121, "step": 7476 }, { "epoch": 0.7748989532594052, "grad_norm": 0.6708592772483826, "learning_rate": 5.084361072765054e-06, "loss": 0.2067, "step": 7477 }, { "epoch": 0.7750025909420666, "grad_norm": 0.7351187467575073, "learning_rate": 5.079889431046827e-06, "loss": 0.2239, "step": 7478 }, { "epoch": 0.775106228624728, "grad_norm": 0.6682601571083069, "learning_rate": 5.0754194705034665e-06, "loss": 0.1995, "step": 7479 }, { "epoch": 0.7752098663073894, "grad_norm": 0.5769458413124084, "learning_rate": 5.070951191638638e-06, "loss": 0.1914, "step": 7480 }, { "epoch": 0.7753135039900508, "grad_norm": 0.549268901348114, "learning_rate": 5.066484594955816e-06, "loss": 0.155, "step": 7481 }, { "epoch": 0.7754171416727123, "grad_norm": 0.7133857011795044, "learning_rate": 5.062019680958297e-06, "loss": 0.2038, "step": 7482 }, { "epoch": 0.7755207793553737, "grad_norm": 0.6121452450752258, "learning_rate": 5.057556450149181e-06, "loss": 0.1537, "step": 7483 }, { "epoch": 0.775624417038035, "grad_norm": 0.762610137462616, "learning_rate": 5.053094903031372e-06, "loss": 0.2088, "step": 7484 }, { "epoch": 0.7757280547206964, "grad_norm": 0.7283558249473572, "learning_rate": 5.0486350401076014e-06, "loss": 0.2135, "step": 7485 }, { "epoch": 0.7758316924033578, "grad_norm": 0.7834716439247131, "learning_rate": 5.04417686188039e-06, "loss": 0.2105, "step": 7486 }, { "epoch": 0.7759353300860192, "grad_norm": 0.7361102104187012, "learning_rate": 5.039720368852075e-06, "loss": 0.1951, "step": 7487 }, { "epoch": 0.7760389677686806, "grad_norm": 0.5629470348358154, "learning_rate": 5.035265561524818e-06, "loss": 0.1637, "step": 7488 }, { "epoch": 0.7761426054513421, "grad_norm": 0.6459644436836243, "learning_rate": 5.030812440400567e-06, "loss": 0.1788, "step": 7489 }, { "epoch": 0.7762462431340035, "grad_norm": 0.5973713994026184, "learning_rate": 5.026361005981109e-06, "loss": 0.161, "step": 7490 }, { "epoch": 0.7763498808166649, "grad_norm": 0.6738793849945068, "learning_rate": 5.021911258768015e-06, "loss": 0.1773, "step": 7491 }, { "epoch": 0.7764535184993263, "grad_norm": 0.6055402159690857, "learning_rate": 5.017463199262671e-06, "loss": 0.1879, "step": 7492 }, { "epoch": 0.7765571561819877, "grad_norm": 0.6321949362754822, "learning_rate": 5.013016827966289e-06, "loss": 0.1795, "step": 7493 }, { "epoch": 0.7766607938646491, "grad_norm": 0.6103180646896362, "learning_rate": 5.008572145379866e-06, "loss": 0.1861, "step": 7494 }, { "epoch": 0.7767644315473106, "grad_norm": 0.6383140087127686, "learning_rate": 5.004129152004236e-06, "loss": 0.1591, "step": 7495 }, { "epoch": 0.776868069229972, "grad_norm": 0.6704176068305969, "learning_rate": 4.999687848340013e-06, "loss": 0.1914, "step": 7496 }, { "epoch": 0.7769717069126334, "grad_norm": 0.7036976218223572, "learning_rate": 4.995248234887655e-06, "loss": 0.1977, "step": 7497 }, { "epoch": 0.7770753445952948, "grad_norm": 0.6711276173591614, "learning_rate": 4.990810312147398e-06, "loss": 0.1765, "step": 7498 }, { "epoch": 0.7771789822779562, "grad_norm": 0.6839596629142761, "learning_rate": 4.9863740806192965e-06, "loss": 0.2104, "step": 7499 }, { "epoch": 0.7772826199606176, "grad_norm": 0.6389532685279846, "learning_rate": 4.9819395408032316e-06, "loss": 0.198, "step": 7500 }, { "epoch": 0.7773862576432791, "grad_norm": 0.6310150027275085, "learning_rate": 4.977506693198868e-06, "loss": 0.1597, "step": 7501 }, { "epoch": 0.7774898953259405, "grad_norm": 0.6443967223167419, "learning_rate": 4.973075538305696e-06, "loss": 0.1648, "step": 7502 }, { "epoch": 0.7775935330086019, "grad_norm": 0.7315575480461121, "learning_rate": 4.968646076623018e-06, "loss": 0.2289, "step": 7503 }, { "epoch": 0.7776971706912633, "grad_norm": 0.6594371795654297, "learning_rate": 4.964218308649933e-06, "loss": 0.189, "step": 7504 }, { "epoch": 0.7778008083739247, "grad_norm": 0.6741247177124023, "learning_rate": 4.959792234885357e-06, "loss": 0.1891, "step": 7505 }, { "epoch": 0.7779044460565862, "grad_norm": 0.5423347353935242, "learning_rate": 4.955367855828006e-06, "loss": 0.174, "step": 7506 }, { "epoch": 0.7780080837392476, "grad_norm": 0.6483452916145325, "learning_rate": 4.9509451719764155e-06, "loss": 0.1826, "step": 7507 }, { "epoch": 0.778111721421909, "grad_norm": 0.6669498085975647, "learning_rate": 4.946524183828935e-06, "loss": 0.1766, "step": 7508 }, { "epoch": 0.7782153591045704, "grad_norm": 0.7897505164146423, "learning_rate": 4.942104891883706e-06, "loss": 0.1816, "step": 7509 }, { "epoch": 0.7783189967872318, "grad_norm": 0.6134946346282959, "learning_rate": 4.9376872966386915e-06, "loss": 0.1867, "step": 7510 }, { "epoch": 0.7784226344698932, "grad_norm": 0.6003133654594421, "learning_rate": 4.933271398591659e-06, "loss": 0.1704, "step": 7511 }, { "epoch": 0.7785262721525547, "grad_norm": 0.6122907400131226, "learning_rate": 4.928857198240178e-06, "loss": 0.1665, "step": 7512 }, { "epoch": 0.7786299098352161, "grad_norm": 0.6975659132003784, "learning_rate": 4.924444696081645e-06, "loss": 0.1831, "step": 7513 }, { "epoch": 0.7787335475178775, "grad_norm": 0.5816099643707275, "learning_rate": 4.9200338926132426e-06, "loss": 0.1692, "step": 7514 }, { "epoch": 0.7788371852005389, "grad_norm": 0.7342694401741028, "learning_rate": 4.915624788331985e-06, "loss": 0.1865, "step": 7515 }, { "epoch": 0.7789408228832003, "grad_norm": 0.6069790720939636, "learning_rate": 4.911217383734672e-06, "loss": 0.1633, "step": 7516 }, { "epoch": 0.7790444605658617, "grad_norm": 0.728117048740387, "learning_rate": 4.906811679317933e-06, "loss": 0.2204, "step": 7517 }, { "epoch": 0.7791480982485232, "grad_norm": 0.6618308424949646, "learning_rate": 4.902407675578191e-06, "loss": 0.1931, "step": 7518 }, { "epoch": 0.7792517359311846, "grad_norm": 0.6637287139892578, "learning_rate": 4.898005373011676e-06, "loss": 0.1927, "step": 7519 }, { "epoch": 0.779355373613846, "grad_norm": 0.6428270936012268, "learning_rate": 4.893604772114446e-06, "loss": 0.183, "step": 7520 }, { "epoch": 0.7794590112965074, "grad_norm": 0.5997044444084167, "learning_rate": 4.8892058733823415e-06, "loss": 0.1783, "step": 7521 }, { "epoch": 0.7795626489791688, "grad_norm": 0.703879177570343, "learning_rate": 4.884808677311028e-06, "loss": 0.199, "step": 7522 }, { "epoch": 0.7796662866618302, "grad_norm": 0.7539674043655396, "learning_rate": 4.880413184395989e-06, "loss": 0.1822, "step": 7523 }, { "epoch": 0.7797699243444917, "grad_norm": 0.7416384816169739, "learning_rate": 4.876019395132474e-06, "loss": 0.23, "step": 7524 }, { "epoch": 0.7798735620271531, "grad_norm": 0.6457820534706116, "learning_rate": 4.871627310015592e-06, "loss": 0.1892, "step": 7525 }, { "epoch": 0.7799771997098145, "grad_norm": 0.6704725027084351, "learning_rate": 4.8672369295402175e-06, "loss": 0.1884, "step": 7526 }, { "epoch": 0.7800808373924759, "grad_norm": 0.643944501876831, "learning_rate": 4.862848254201065e-06, "loss": 0.1914, "step": 7527 }, { "epoch": 0.7801844750751373, "grad_norm": 0.6778848767280579, "learning_rate": 4.8584612844926436e-06, "loss": 0.2133, "step": 7528 }, { "epoch": 0.7802881127577987, "grad_norm": 0.6219654083251953, "learning_rate": 4.854076020909268e-06, "loss": 0.1786, "step": 7529 }, { "epoch": 0.7803917504404602, "grad_norm": 0.6912334561347961, "learning_rate": 4.8496924639450616e-06, "loss": 0.201, "step": 7530 }, { "epoch": 0.7804953881231216, "grad_norm": 0.596788227558136, "learning_rate": 4.84531061409395e-06, "loss": 0.1809, "step": 7531 }, { "epoch": 0.780599025805783, "grad_norm": 0.6440489888191223, "learning_rate": 4.8409304718496806e-06, "loss": 0.1877, "step": 7532 }, { "epoch": 0.7807026634884444, "grad_norm": 0.6101676821708679, "learning_rate": 4.836552037705806e-06, "loss": 0.1767, "step": 7533 }, { "epoch": 0.7808063011711058, "grad_norm": 0.5853814482688904, "learning_rate": 4.832175312155671e-06, "loss": 0.1783, "step": 7534 }, { "epoch": 0.7809099388537672, "grad_norm": 0.7949321866035461, "learning_rate": 4.8278002956924466e-06, "loss": 0.2089, "step": 7535 }, { "epoch": 0.7810135765364287, "grad_norm": 0.7004521489143372, "learning_rate": 4.8234269888091015e-06, "loss": 0.199, "step": 7536 }, { "epoch": 0.7811172142190901, "grad_norm": 0.8060917258262634, "learning_rate": 4.819055391998404e-06, "loss": 0.2149, "step": 7537 }, { "epoch": 0.7812208519017515, "grad_norm": 0.6604312062263489, "learning_rate": 4.814685505752951e-06, "loss": 0.194, "step": 7538 }, { "epoch": 0.7813244895844129, "grad_norm": 0.6711355447769165, "learning_rate": 4.810317330565124e-06, "loss": 0.1831, "step": 7539 }, { "epoch": 0.7814281272670743, "grad_norm": 0.624002993106842, "learning_rate": 4.805950866927134e-06, "loss": 0.1685, "step": 7540 }, { "epoch": 0.7815317649497358, "grad_norm": 0.694841206073761, "learning_rate": 4.801586115330974e-06, "loss": 0.1975, "step": 7541 }, { "epoch": 0.7816354026323972, "grad_norm": 0.6798537969589233, "learning_rate": 4.7972230762684695e-06, "loss": 0.2092, "step": 7542 }, { "epoch": 0.7817390403150586, "grad_norm": 0.7163605093955994, "learning_rate": 4.792861750231235e-06, "loss": 0.2637, "step": 7543 }, { "epoch": 0.78184267799772, "grad_norm": 0.6866286396980286, "learning_rate": 4.788502137710696e-06, "loss": 0.2164, "step": 7544 }, { "epoch": 0.7819463156803814, "grad_norm": 0.6416062116622925, "learning_rate": 4.784144239198092e-06, "loss": 0.1825, "step": 7545 }, { "epoch": 0.7820499533630428, "grad_norm": 0.6997154355049133, "learning_rate": 4.779788055184456e-06, "loss": 0.2044, "step": 7546 }, { "epoch": 0.7821535910457043, "grad_norm": 0.7446556091308594, "learning_rate": 4.775433586160643e-06, "loss": 0.2441, "step": 7547 }, { "epoch": 0.7822572287283657, "grad_norm": 0.5584850907325745, "learning_rate": 4.7710808326173115e-06, "loss": 0.1662, "step": 7548 }, { "epoch": 0.7823608664110271, "grad_norm": 0.6249669194221497, "learning_rate": 4.7667297950449175e-06, "loss": 0.1936, "step": 7549 }, { "epoch": 0.7824645040936885, "grad_norm": 0.7454187273979187, "learning_rate": 4.7623804739337294e-06, "loss": 0.2051, "step": 7550 }, { "epoch": 0.7825681417763499, "grad_norm": 0.735968828201294, "learning_rate": 4.7580328697738185e-06, "loss": 0.2207, "step": 7551 }, { "epoch": 0.7826717794590113, "grad_norm": 0.6438806653022766, "learning_rate": 4.753686983055068e-06, "loss": 0.2113, "step": 7552 }, { "epoch": 0.7827754171416728, "grad_norm": 0.7040858268737793, "learning_rate": 4.749342814267175e-06, "loss": 0.1905, "step": 7553 }, { "epoch": 0.7828790548243342, "grad_norm": 0.7186905145645142, "learning_rate": 4.7450003638996236e-06, "loss": 0.2102, "step": 7554 }, { "epoch": 0.7829826925069956, "grad_norm": 0.6388292908668518, "learning_rate": 4.740659632441718e-06, "loss": 0.1748, "step": 7555 }, { "epoch": 0.783086330189657, "grad_norm": 0.7200673222541809, "learning_rate": 4.736320620382557e-06, "loss": 0.194, "step": 7556 }, { "epoch": 0.7831899678723184, "grad_norm": 0.7050800323486328, "learning_rate": 4.731983328211063e-06, "loss": 0.2493, "step": 7557 }, { "epoch": 0.7832936055549798, "grad_norm": 0.6410065293312073, "learning_rate": 4.727647756415959e-06, "loss": 0.1901, "step": 7558 }, { "epoch": 0.7833972432376413, "grad_norm": 0.5777173638343811, "learning_rate": 4.723313905485756e-06, "loss": 0.145, "step": 7559 }, { "epoch": 0.7835008809203026, "grad_norm": 0.6110885143280029, "learning_rate": 4.718981775908802e-06, "loss": 0.1884, "step": 7560 }, { "epoch": 0.783604518602964, "grad_norm": 0.6367966532707214, "learning_rate": 4.714651368173224e-06, "loss": 0.1869, "step": 7561 }, { "epoch": 0.7837081562856254, "grad_norm": 0.6776068210601807, "learning_rate": 4.710322682766966e-06, "loss": 0.1777, "step": 7562 }, { "epoch": 0.7838117939682868, "grad_norm": 0.7195240259170532, "learning_rate": 4.705995720177783e-06, "loss": 0.2436, "step": 7563 }, { "epoch": 0.7839154316509482, "grad_norm": 0.6103599071502686, "learning_rate": 4.7016704808932215e-06, "loss": 0.1693, "step": 7564 }, { "epoch": 0.7840190693336097, "grad_norm": 0.5551150441169739, "learning_rate": 4.697346965400655e-06, "loss": 0.1532, "step": 7565 }, { "epoch": 0.7841227070162711, "grad_norm": 0.7115580439567566, "learning_rate": 4.69302517418724e-06, "loss": 0.1867, "step": 7566 }, { "epoch": 0.7842263446989325, "grad_norm": 0.6921216249465942, "learning_rate": 4.688705107739957e-06, "loss": 0.2031, "step": 7567 }, { "epoch": 0.7843299823815939, "grad_norm": 0.667380690574646, "learning_rate": 4.684386766545581e-06, "loss": 0.1955, "step": 7568 }, { "epoch": 0.7844336200642553, "grad_norm": 0.603564441204071, "learning_rate": 4.68007015109069e-06, "loss": 0.1552, "step": 7569 }, { "epoch": 0.7845372577469167, "grad_norm": 0.6818835139274597, "learning_rate": 4.675755261861683e-06, "loss": 0.1894, "step": 7570 }, { "epoch": 0.7846408954295782, "grad_norm": 0.6953232288360596, "learning_rate": 4.671442099344748e-06, "loss": 0.2172, "step": 7571 }, { "epoch": 0.7847445331122396, "grad_norm": 0.5935460329055786, "learning_rate": 4.667130664025887e-06, "loss": 0.1555, "step": 7572 }, { "epoch": 0.784848170794901, "grad_norm": 0.541191577911377, "learning_rate": 4.662820956390914e-06, "loss": 0.1599, "step": 7573 }, { "epoch": 0.7849518084775624, "grad_norm": 0.6485776305198669, "learning_rate": 4.6585129769254335e-06, "loss": 0.1955, "step": 7574 }, { "epoch": 0.7850554461602238, "grad_norm": 0.7501851320266724, "learning_rate": 4.6542067261148605e-06, "loss": 0.1685, "step": 7575 }, { "epoch": 0.7851590838428852, "grad_norm": 0.7224445939064026, "learning_rate": 4.6499022044444146e-06, "loss": 0.1905, "step": 7576 }, { "epoch": 0.7852627215255467, "grad_norm": 0.7130233645439148, "learning_rate": 4.6455994123991245e-06, "loss": 0.1917, "step": 7577 }, { "epoch": 0.7853663592082081, "grad_norm": 0.6433622241020203, "learning_rate": 4.641298350463829e-06, "loss": 0.195, "step": 7578 }, { "epoch": 0.7854699968908695, "grad_norm": 0.5959939956665039, "learning_rate": 4.636999019123156e-06, "loss": 0.1559, "step": 7579 }, { "epoch": 0.7855736345735309, "grad_norm": 0.597389280796051, "learning_rate": 4.632701418861556e-06, "loss": 0.1695, "step": 7580 }, { "epoch": 0.7856772722561923, "grad_norm": 0.669829249382019, "learning_rate": 4.628405550163271e-06, "loss": 0.2049, "step": 7581 }, { "epoch": 0.7857809099388537, "grad_norm": 0.7399320602416992, "learning_rate": 4.624111413512347e-06, "loss": 0.2242, "step": 7582 }, { "epoch": 0.7858845476215152, "grad_norm": 0.6723091006278992, "learning_rate": 4.619819009392652e-06, "loss": 0.1893, "step": 7583 }, { "epoch": 0.7859881853041766, "grad_norm": 0.757306694984436, "learning_rate": 4.615528338287838e-06, "loss": 0.2211, "step": 7584 }, { "epoch": 0.786091822986838, "grad_norm": 0.5871422290802002, "learning_rate": 4.61123940068138e-06, "loss": 0.1739, "step": 7585 }, { "epoch": 0.7861954606694994, "grad_norm": 0.7001006603240967, "learning_rate": 4.606952197056545e-06, "loss": 0.1978, "step": 7586 }, { "epoch": 0.7862990983521608, "grad_norm": 0.6934093832969666, "learning_rate": 4.602666727896401e-06, "loss": 0.2132, "step": 7587 }, { "epoch": 0.7864027360348222, "grad_norm": 0.5585488677024841, "learning_rate": 4.598382993683839e-06, "loss": 0.1583, "step": 7588 }, { "epoch": 0.7865063737174837, "grad_norm": 0.6785788536071777, "learning_rate": 4.594100994901536e-06, "loss": 0.1813, "step": 7589 }, { "epoch": 0.7866100114001451, "grad_norm": 0.6068393588066101, "learning_rate": 4.589820732031986e-06, "loss": 0.1642, "step": 7590 }, { "epoch": 0.7867136490828065, "grad_norm": 0.6990689039230347, "learning_rate": 4.585542205557478e-06, "loss": 0.1972, "step": 7591 }, { "epoch": 0.7868172867654679, "grad_norm": 0.6085687279701233, "learning_rate": 4.581265415960117e-06, "loss": 0.1692, "step": 7592 }, { "epoch": 0.7869209244481293, "grad_norm": 0.7180781960487366, "learning_rate": 4.5769903637217985e-06, "loss": 0.2019, "step": 7593 }, { "epoch": 0.7870245621307908, "grad_norm": 0.6923770308494568, "learning_rate": 4.5727170493242245e-06, "loss": 0.1699, "step": 7594 }, { "epoch": 0.7871281998134522, "grad_norm": 0.5988040566444397, "learning_rate": 4.5684454732489195e-06, "loss": 0.1571, "step": 7595 }, { "epoch": 0.7872318374961136, "grad_norm": 0.6364946365356445, "learning_rate": 4.564175635977181e-06, "loss": 0.1648, "step": 7596 }, { "epoch": 0.787335475178775, "grad_norm": 0.7644514441490173, "learning_rate": 4.559907537990138e-06, "loss": 0.2072, "step": 7597 }, { "epoch": 0.7874391128614364, "grad_norm": 0.7390966415405273, "learning_rate": 4.555641179768718e-06, "loss": 0.2181, "step": 7598 }, { "epoch": 0.7875427505440978, "grad_norm": 0.670900285243988, "learning_rate": 4.551376561793641e-06, "loss": 0.2063, "step": 7599 }, { "epoch": 0.7876463882267593, "grad_norm": 0.6904001832008362, "learning_rate": 4.547113684545437e-06, "loss": 0.1957, "step": 7600 }, { "epoch": 0.7877500259094207, "grad_norm": 0.6743488311767578, "learning_rate": 4.542852548504435e-06, "loss": 0.1843, "step": 7601 }, { "epoch": 0.7878536635920821, "grad_norm": 0.7440134882926941, "learning_rate": 4.538593154150779e-06, "loss": 0.2007, "step": 7602 }, { "epoch": 0.7879573012747435, "grad_norm": 0.6265433430671692, "learning_rate": 4.534335501964417e-06, "loss": 0.1843, "step": 7603 }, { "epoch": 0.7880609389574049, "grad_norm": 0.6931333541870117, "learning_rate": 4.530079592425083e-06, "loss": 0.223, "step": 7604 }, { "epoch": 0.7881645766400663, "grad_norm": 0.5931916236877441, "learning_rate": 4.52582542601234e-06, "loss": 0.1664, "step": 7605 }, { "epoch": 0.7882682143227278, "grad_norm": 0.7090798020362854, "learning_rate": 4.5215730032055305e-06, "loss": 0.2137, "step": 7606 }, { "epoch": 0.7883718520053892, "grad_norm": 0.6874864101409912, "learning_rate": 4.517322324483808e-06, "loss": 0.196, "step": 7607 }, { "epoch": 0.7884754896880506, "grad_norm": 0.5806185603141785, "learning_rate": 4.5130733903261435e-06, "loss": 0.1846, "step": 7608 }, { "epoch": 0.788579127370712, "grad_norm": 0.5684027671813965, "learning_rate": 4.508826201211289e-06, "loss": 0.143, "step": 7609 }, { "epoch": 0.7886827650533734, "grad_norm": 0.6393711566925049, "learning_rate": 4.504580757617818e-06, "loss": 0.1733, "step": 7610 }, { "epoch": 0.7887864027360348, "grad_norm": 0.617420494556427, "learning_rate": 4.5003370600241024e-06, "loss": 0.1706, "step": 7611 }, { "epoch": 0.7888900404186963, "grad_norm": 0.6285762786865234, "learning_rate": 4.496095108908314e-06, "loss": 0.1777, "step": 7612 }, { "epoch": 0.7889936781013577, "grad_norm": 0.6379365921020508, "learning_rate": 4.491854904748425e-06, "loss": 0.1945, "step": 7613 }, { "epoch": 0.7890973157840191, "grad_norm": 0.7149636745452881, "learning_rate": 4.487616448022214e-06, "loss": 0.2048, "step": 7614 }, { "epoch": 0.7892009534666805, "grad_norm": 0.7371107339859009, "learning_rate": 4.483379739207268e-06, "loss": 0.2181, "step": 7615 }, { "epoch": 0.7893045911493419, "grad_norm": 0.81303870677948, "learning_rate": 4.479144778780975e-06, "loss": 0.2209, "step": 7616 }, { "epoch": 0.7894082288320033, "grad_norm": 0.6847606897354126, "learning_rate": 4.474911567220521e-06, "loss": 0.1824, "step": 7617 }, { "epoch": 0.7895118665146648, "grad_norm": 0.7516778707504272, "learning_rate": 4.470680105002898e-06, "loss": 0.2122, "step": 7618 }, { "epoch": 0.7896155041973262, "grad_norm": 0.6628775596618652, "learning_rate": 4.466450392604895e-06, "loss": 0.1991, "step": 7619 }, { "epoch": 0.7897191418799876, "grad_norm": 0.7115910053253174, "learning_rate": 4.462222430503116e-06, "loss": 0.1812, "step": 7620 }, { "epoch": 0.789822779562649, "grad_norm": 0.6615688800811768, "learning_rate": 4.457996219173961e-06, "loss": 0.2134, "step": 7621 }, { "epoch": 0.7899264172453104, "grad_norm": 0.7453076839447021, "learning_rate": 4.453771759093628e-06, "loss": 0.2128, "step": 7622 }, { "epoch": 0.7900300549279718, "grad_norm": 0.6784555912017822, "learning_rate": 4.4495490507381335e-06, "loss": 0.1785, "step": 7623 }, { "epoch": 0.7901336926106333, "grad_norm": 0.700274646282196, "learning_rate": 4.445328094583276e-06, "loss": 0.2061, "step": 7624 }, { "epoch": 0.7902373302932947, "grad_norm": 0.5862249135971069, "learning_rate": 4.441108891104664e-06, "loss": 0.1685, "step": 7625 }, { "epoch": 0.7903409679759561, "grad_norm": 0.7702421545982361, "learning_rate": 4.4368914407777195e-06, "loss": 0.2497, "step": 7626 }, { "epoch": 0.7904446056586175, "grad_norm": 0.6641459465026855, "learning_rate": 4.43267574407765e-06, "loss": 0.1881, "step": 7627 }, { "epoch": 0.7905482433412789, "grad_norm": 0.6433275938034058, "learning_rate": 4.428461801479485e-06, "loss": 0.1791, "step": 7628 }, { "epoch": 0.7906518810239404, "grad_norm": 0.5558257699012756, "learning_rate": 4.424249613458029e-06, "loss": 0.1568, "step": 7629 }, { "epoch": 0.7907555187066018, "grad_norm": 0.5986382961273193, "learning_rate": 4.420039180487921e-06, "loss": 0.1797, "step": 7630 }, { "epoch": 0.7908591563892632, "grad_norm": 0.8127989768981934, "learning_rate": 4.415830503043577e-06, "loss": 0.2045, "step": 7631 }, { "epoch": 0.7909627940719246, "grad_norm": 0.6428434252738953, "learning_rate": 4.41162358159922e-06, "loss": 0.186, "step": 7632 }, { "epoch": 0.791066431754586, "grad_norm": 0.6171351075172424, "learning_rate": 4.4074184166288926e-06, "loss": 0.1645, "step": 7633 }, { "epoch": 0.7911700694372474, "grad_norm": 0.5491045713424683, "learning_rate": 4.4032150086064145e-06, "loss": 0.1743, "step": 7634 }, { "epoch": 0.7912737071199089, "grad_norm": 0.7326172590255737, "learning_rate": 4.399013358005422e-06, "loss": 0.2124, "step": 7635 }, { "epoch": 0.7913773448025702, "grad_norm": 0.5941773653030396, "learning_rate": 4.3948134652993566e-06, "loss": 0.1853, "step": 7636 }, { "epoch": 0.7914809824852316, "grad_norm": 0.7196014523506165, "learning_rate": 4.390615330961452e-06, "loss": 0.2059, "step": 7637 }, { "epoch": 0.791584620167893, "grad_norm": 0.5386014580726624, "learning_rate": 4.386418955464746e-06, "loss": 0.1561, "step": 7638 }, { "epoch": 0.7916882578505544, "grad_norm": 0.6675041913986206, "learning_rate": 4.382224339282078e-06, "loss": 0.196, "step": 7639 }, { "epoch": 0.7917918955332158, "grad_norm": 0.6638970971107483, "learning_rate": 4.3780314828860895e-06, "loss": 0.1982, "step": 7640 }, { "epoch": 0.7918955332158772, "grad_norm": 0.8089138269424438, "learning_rate": 4.3738403867492355e-06, "loss": 0.2275, "step": 7641 }, { "epoch": 0.7919991708985387, "grad_norm": 0.7028430700302124, "learning_rate": 4.369651051343748e-06, "loss": 0.1993, "step": 7642 }, { "epoch": 0.7921028085812001, "grad_norm": 0.6406500339508057, "learning_rate": 4.365463477141691e-06, "loss": 0.1906, "step": 7643 }, { "epoch": 0.7922064462638615, "grad_norm": 0.6021590828895569, "learning_rate": 4.361277664614902e-06, "loss": 0.1731, "step": 7644 }, { "epoch": 0.7923100839465229, "grad_norm": 0.6423798203468323, "learning_rate": 4.357093614235033e-06, "loss": 0.1857, "step": 7645 }, { "epoch": 0.7924137216291843, "grad_norm": 0.6370554566383362, "learning_rate": 4.3529113264735415e-06, "loss": 0.2192, "step": 7646 }, { "epoch": 0.7925173593118457, "grad_norm": 0.7619375586509705, "learning_rate": 4.3487308018016724e-06, "loss": 0.2125, "step": 7647 }, { "epoch": 0.7926209969945072, "grad_norm": 0.776597797870636, "learning_rate": 4.344552040690491e-06, "loss": 0.2391, "step": 7648 }, { "epoch": 0.7927246346771686, "grad_norm": 0.6406593918800354, "learning_rate": 4.340375043610849e-06, "loss": 0.2062, "step": 7649 }, { "epoch": 0.79282827235983, "grad_norm": 0.555220365524292, "learning_rate": 4.336199811033399e-06, "loss": 0.1435, "step": 7650 }, { "epoch": 0.7929319100424914, "grad_norm": 0.6545402407646179, "learning_rate": 4.33202634342861e-06, "loss": 0.1921, "step": 7651 }, { "epoch": 0.7930355477251528, "grad_norm": 0.6345674395561218, "learning_rate": 4.327854641266731e-06, "loss": 0.1772, "step": 7652 }, { "epoch": 0.7931391854078143, "grad_norm": 0.7767759561538696, "learning_rate": 4.323684705017832e-06, "loss": 0.2258, "step": 7653 }, { "epoch": 0.7932428230904757, "grad_norm": 0.671420156955719, "learning_rate": 4.3195165351517665e-06, "loss": 0.1911, "step": 7654 }, { "epoch": 0.7933464607731371, "grad_norm": 0.6630171537399292, "learning_rate": 4.31535013213821e-06, "loss": 0.1895, "step": 7655 }, { "epoch": 0.7934500984557985, "grad_norm": 0.6785514950752258, "learning_rate": 4.311185496446615e-06, "loss": 0.197, "step": 7656 }, { "epoch": 0.7935537361384599, "grad_norm": 0.6731833219528198, "learning_rate": 4.307022628546245e-06, "loss": 0.1895, "step": 7657 }, { "epoch": 0.7936573738211213, "grad_norm": 0.6627957820892334, "learning_rate": 4.302861528906175e-06, "loss": 0.1866, "step": 7658 }, { "epoch": 0.7937610115037828, "grad_norm": 0.6604629158973694, "learning_rate": 4.2987021979952614e-06, "loss": 0.1938, "step": 7659 }, { "epoch": 0.7938646491864442, "grad_norm": 0.64192795753479, "learning_rate": 4.294544636282176e-06, "loss": 0.1723, "step": 7660 }, { "epoch": 0.7939682868691056, "grad_norm": 0.7034128904342651, "learning_rate": 4.290388844235393e-06, "loss": 0.2055, "step": 7661 }, { "epoch": 0.794071924551767, "grad_norm": 0.653624951839447, "learning_rate": 4.286234822323172e-06, "loss": 0.198, "step": 7662 }, { "epoch": 0.7941755622344284, "grad_norm": 0.5965669751167297, "learning_rate": 4.282082571013586e-06, "loss": 0.1825, "step": 7663 }, { "epoch": 0.7942791999170898, "grad_norm": 0.5858683586120605, "learning_rate": 4.277932090774495e-06, "loss": 0.1663, "step": 7664 }, { "epoch": 0.7943828375997513, "grad_norm": 0.6321005821228027, "learning_rate": 4.2737833820735755e-06, "loss": 0.1959, "step": 7665 }, { "epoch": 0.7944864752824127, "grad_norm": 0.7521500587463379, "learning_rate": 4.269636445378302e-06, "loss": 0.2037, "step": 7666 }, { "epoch": 0.7945901129650741, "grad_norm": 0.6142362356185913, "learning_rate": 4.265491281155938e-06, "loss": 0.1817, "step": 7667 }, { "epoch": 0.7946937506477355, "grad_norm": 0.6033626198768616, "learning_rate": 4.261347889873559e-06, "loss": 0.1917, "step": 7668 }, { "epoch": 0.7947973883303969, "grad_norm": 0.6412879228591919, "learning_rate": 4.257206271998035e-06, "loss": 0.2057, "step": 7669 }, { "epoch": 0.7949010260130583, "grad_norm": 0.6569292545318604, "learning_rate": 4.2530664279960306e-06, "loss": 0.174, "step": 7670 }, { "epoch": 0.7950046636957198, "grad_norm": 0.6312701106071472, "learning_rate": 4.248928358334028e-06, "loss": 0.1919, "step": 7671 }, { "epoch": 0.7951083013783812, "grad_norm": 0.538118302822113, "learning_rate": 4.244792063478285e-06, "loss": 0.1808, "step": 7672 }, { "epoch": 0.7952119390610426, "grad_norm": 0.6475589275360107, "learning_rate": 4.240657543894886e-06, "loss": 0.1569, "step": 7673 }, { "epoch": 0.795315576743704, "grad_norm": 0.5423773527145386, "learning_rate": 4.236524800049693e-06, "loss": 0.1594, "step": 7674 }, { "epoch": 0.7954192144263654, "grad_norm": 0.6256198883056641, "learning_rate": 4.232393832408386e-06, "loss": 0.1882, "step": 7675 }, { "epoch": 0.7955228521090268, "grad_norm": 0.6854049563407898, "learning_rate": 4.228264641436428e-06, "loss": 0.1953, "step": 7676 }, { "epoch": 0.7956264897916883, "grad_norm": 0.5907077193260193, "learning_rate": 4.22413722759909e-06, "loss": 0.1858, "step": 7677 }, { "epoch": 0.7957301274743497, "grad_norm": 0.8190000653266907, "learning_rate": 4.220011591361451e-06, "loss": 0.2183, "step": 7678 }, { "epoch": 0.7958337651570111, "grad_norm": 0.8084255456924438, "learning_rate": 4.215887733188367e-06, "loss": 0.2142, "step": 7679 }, { "epoch": 0.7959374028396725, "grad_norm": 0.6585685610771179, "learning_rate": 4.211765653544524e-06, "loss": 0.1927, "step": 7680 }, { "epoch": 0.7960410405223339, "grad_norm": 0.6803631782531738, "learning_rate": 4.2076453528943824e-06, "loss": 0.1931, "step": 7681 }, { "epoch": 0.7961446782049953, "grad_norm": 0.616976261138916, "learning_rate": 4.203526831702207e-06, "loss": 0.1928, "step": 7682 }, { "epoch": 0.7962483158876568, "grad_norm": 0.6163725256919861, "learning_rate": 4.199410090432079e-06, "loss": 0.1834, "step": 7683 }, { "epoch": 0.7963519535703182, "grad_norm": 0.6457942724227905, "learning_rate": 4.195295129547854e-06, "loss": 0.2003, "step": 7684 }, { "epoch": 0.7964555912529796, "grad_norm": 0.5381678342819214, "learning_rate": 4.191181949513206e-06, "loss": 0.1674, "step": 7685 }, { "epoch": 0.796559228935641, "grad_norm": 0.7502049207687378, "learning_rate": 4.187070550791603e-06, "loss": 0.2316, "step": 7686 }, { "epoch": 0.7966628666183024, "grad_norm": 0.6284844875335693, "learning_rate": 4.182960933846311e-06, "loss": 0.1994, "step": 7687 }, { "epoch": 0.7967665043009639, "grad_norm": 0.6811497807502747, "learning_rate": 4.178853099140392e-06, "loss": 0.2126, "step": 7688 }, { "epoch": 0.7968701419836253, "grad_norm": 0.6516885757446289, "learning_rate": 4.174747047136707e-06, "loss": 0.1746, "step": 7689 }, { "epoch": 0.7969737796662867, "grad_norm": 0.6377501487731934, "learning_rate": 4.170642778297922e-06, "loss": 0.2046, "step": 7690 }, { "epoch": 0.7970774173489481, "grad_norm": 0.5635381937026978, "learning_rate": 4.166540293086509e-06, "loss": 0.151, "step": 7691 }, { "epoch": 0.7971810550316095, "grad_norm": 0.5966200828552246, "learning_rate": 4.162439591964716e-06, "loss": 0.16, "step": 7692 }, { "epoch": 0.7972846927142709, "grad_norm": 0.6098965406417847, "learning_rate": 4.158340675394614e-06, "loss": 0.1992, "step": 7693 }, { "epoch": 0.7973883303969324, "grad_norm": 0.6058703660964966, "learning_rate": 4.154243543838059e-06, "loss": 0.1873, "step": 7694 }, { "epoch": 0.7974919680795938, "grad_norm": 0.7015929222106934, "learning_rate": 4.150148197756705e-06, "loss": 0.2142, "step": 7695 }, { "epoch": 0.7975956057622552, "grad_norm": 0.7512447834014893, "learning_rate": 4.146054637612016e-06, "loss": 0.2191, "step": 7696 }, { "epoch": 0.7976992434449166, "grad_norm": 0.7451758980751038, "learning_rate": 4.141962863865241e-06, "loss": 0.1941, "step": 7697 }, { "epoch": 0.797802881127578, "grad_norm": 0.6696723103523254, "learning_rate": 4.137872876977445e-06, "loss": 0.1771, "step": 7698 }, { "epoch": 0.7979065188102394, "grad_norm": 0.6065865755081177, "learning_rate": 4.13378467740947e-06, "loss": 0.2062, "step": 7699 }, { "epoch": 0.7980101564929009, "grad_norm": 0.7292668223381042, "learning_rate": 4.129698265621975e-06, "loss": 0.2209, "step": 7700 }, { "epoch": 0.7981137941755623, "grad_norm": 0.6175323128700256, "learning_rate": 4.12561364207541e-06, "loss": 0.1771, "step": 7701 }, { "epoch": 0.7982174318582237, "grad_norm": 0.6026126146316528, "learning_rate": 4.1215308072300185e-06, "loss": 0.167, "step": 7702 }, { "epoch": 0.7983210695408851, "grad_norm": 0.6252270340919495, "learning_rate": 4.117449761545858e-06, "loss": 0.1731, "step": 7703 }, { "epoch": 0.7984247072235465, "grad_norm": 0.6011770963668823, "learning_rate": 4.113370505482761e-06, "loss": 0.1856, "step": 7704 }, { "epoch": 0.7985283449062079, "grad_norm": 0.7582604885101318, "learning_rate": 4.109293039500379e-06, "loss": 0.2264, "step": 7705 }, { "epoch": 0.7986319825888694, "grad_norm": 0.6858969926834106, "learning_rate": 4.105217364058161e-06, "loss": 0.2149, "step": 7706 }, { "epoch": 0.7987356202715308, "grad_norm": 0.5900029540061951, "learning_rate": 4.101143479615342e-06, "loss": 0.1788, "step": 7707 }, { "epoch": 0.7988392579541922, "grad_norm": 0.6448624730110168, "learning_rate": 4.097071386630959e-06, "loss": 0.1975, "step": 7708 }, { "epoch": 0.7989428956368536, "grad_norm": 0.6842593550682068, "learning_rate": 4.093001085563848e-06, "loss": 0.2097, "step": 7709 }, { "epoch": 0.799046533319515, "grad_norm": 0.6258800625801086, "learning_rate": 4.088932576872644e-06, "loss": 0.1821, "step": 7710 }, { "epoch": 0.7991501710021764, "grad_norm": 0.7252703309059143, "learning_rate": 4.08486586101579e-06, "loss": 0.2068, "step": 7711 }, { "epoch": 0.7992538086848378, "grad_norm": 0.6990664601325989, "learning_rate": 4.08080093845151e-06, "loss": 0.1987, "step": 7712 }, { "epoch": 0.7993574463674992, "grad_norm": 0.5749051570892334, "learning_rate": 4.076737809637832e-06, "loss": 0.1504, "step": 7713 }, { "epoch": 0.7994610840501606, "grad_norm": 0.7165976762771606, "learning_rate": 4.07267647503258e-06, "loss": 0.1921, "step": 7714 }, { "epoch": 0.799564721732822, "grad_norm": 0.6567685008049011, "learning_rate": 4.068616935093383e-06, "loss": 0.1897, "step": 7715 }, { "epoch": 0.7996683594154834, "grad_norm": 0.601642906665802, "learning_rate": 4.0645591902776705e-06, "loss": 0.15, "step": 7716 }, { "epoch": 0.7997719970981448, "grad_norm": 0.6340222954750061, "learning_rate": 4.06050324104265e-06, "loss": 0.1769, "step": 7717 }, { "epoch": 0.7998756347808063, "grad_norm": 0.6264651417732239, "learning_rate": 4.056449087845351e-06, "loss": 0.1734, "step": 7718 }, { "epoch": 0.7999792724634677, "grad_norm": 0.6766581535339355, "learning_rate": 4.052396731142585e-06, "loss": 0.1894, "step": 7719 }, { "epoch": 0.8000829101461291, "grad_norm": 0.5773683786392212, "learning_rate": 4.048346171390958e-06, "loss": 0.1882, "step": 7720 }, { "epoch": 0.8001865478287905, "grad_norm": 0.5931955575942993, "learning_rate": 4.044297409046893e-06, "loss": 0.1559, "step": 7721 }, { "epoch": 0.8002901855114519, "grad_norm": 0.654396116733551, "learning_rate": 4.040250444566587e-06, "loss": 0.2006, "step": 7722 }, { "epoch": 0.8003938231941133, "grad_norm": 0.6663067936897278, "learning_rate": 4.036205278406056e-06, "loss": 0.1625, "step": 7723 }, { "epoch": 0.8004974608767748, "grad_norm": 0.6567176580429077, "learning_rate": 4.032161911021093e-06, "loss": 0.1865, "step": 7724 }, { "epoch": 0.8006010985594362, "grad_norm": 0.7086308002471924, "learning_rate": 4.0281203428673074e-06, "loss": 0.1984, "step": 7725 }, { "epoch": 0.8007047362420976, "grad_norm": 0.7191071510314941, "learning_rate": 4.024080574400095e-06, "loss": 0.201, "step": 7726 }, { "epoch": 0.800808373924759, "grad_norm": 0.6508558392524719, "learning_rate": 4.020042606074641e-06, "loss": 0.2075, "step": 7727 }, { "epoch": 0.8009120116074204, "grad_norm": 0.5657883286476135, "learning_rate": 4.016006438345952e-06, "loss": 0.1825, "step": 7728 }, { "epoch": 0.8010156492900818, "grad_norm": 0.6412180066108704, "learning_rate": 4.0119720716688036e-06, "loss": 0.1856, "step": 7729 }, { "epoch": 0.8011192869727433, "grad_norm": 0.5706683993339539, "learning_rate": 4.007939506497789e-06, "loss": 0.1576, "step": 7730 }, { "epoch": 0.8012229246554047, "grad_norm": 0.6363986730575562, "learning_rate": 4.003908743287295e-06, "loss": 0.1971, "step": 7731 }, { "epoch": 0.8013265623380661, "grad_norm": 0.7020382285118103, "learning_rate": 3.999879782491498e-06, "loss": 0.2137, "step": 7732 }, { "epoch": 0.8014302000207275, "grad_norm": 0.6399512887001038, "learning_rate": 3.995852624564373e-06, "loss": 0.1558, "step": 7733 }, { "epoch": 0.8015338377033889, "grad_norm": 0.5676230192184448, "learning_rate": 3.991827269959692e-06, "loss": 0.1692, "step": 7734 }, { "epoch": 0.8016374753860503, "grad_norm": 0.6501866579055786, "learning_rate": 3.987803719131029e-06, "loss": 0.2023, "step": 7735 }, { "epoch": 0.8017411130687118, "grad_norm": 0.7282348871231079, "learning_rate": 3.983781972531755e-06, "loss": 0.2117, "step": 7736 }, { "epoch": 0.8018447507513732, "grad_norm": 0.6642242670059204, "learning_rate": 3.9797620306150265e-06, "loss": 0.1705, "step": 7737 }, { "epoch": 0.8019483884340346, "grad_norm": 0.714993953704834, "learning_rate": 3.975743893833821e-06, "loss": 0.1853, "step": 7738 }, { "epoch": 0.802052026116696, "grad_norm": 0.7452964186668396, "learning_rate": 3.9717275626408705e-06, "loss": 0.1921, "step": 7739 }, { "epoch": 0.8021556637993574, "grad_norm": 0.6581090092658997, "learning_rate": 3.9677130374887404e-06, "loss": 0.1727, "step": 7740 }, { "epoch": 0.8022593014820188, "grad_norm": 0.7347173690795898, "learning_rate": 3.96370031882979e-06, "loss": 0.2425, "step": 7741 }, { "epoch": 0.8023629391646803, "grad_norm": 0.6474437117576599, "learning_rate": 3.959689407116154e-06, "loss": 0.1671, "step": 7742 }, { "epoch": 0.8024665768473417, "grad_norm": 0.7023115158081055, "learning_rate": 3.955680302799785e-06, "loss": 0.2162, "step": 7743 }, { "epoch": 0.8025702145300031, "grad_norm": 0.661474347114563, "learning_rate": 3.951673006332417e-06, "loss": 0.1961, "step": 7744 }, { "epoch": 0.8026738522126645, "grad_norm": 0.77756667137146, "learning_rate": 3.9476675181655835e-06, "loss": 0.2247, "step": 7745 }, { "epoch": 0.8027774898953259, "grad_norm": 0.7199652791023254, "learning_rate": 3.943663838750624e-06, "loss": 0.2, "step": 7746 }, { "epoch": 0.8028811275779874, "grad_norm": 0.6598642468452454, "learning_rate": 3.939661968538657e-06, "loss": 0.181, "step": 7747 }, { "epoch": 0.8029847652606488, "grad_norm": 0.6574103832244873, "learning_rate": 3.935661907980621e-06, "loss": 0.2025, "step": 7748 }, { "epoch": 0.8030884029433102, "grad_norm": 0.5850088596343994, "learning_rate": 3.931663657527223e-06, "loss": 0.1783, "step": 7749 }, { "epoch": 0.8031920406259716, "grad_norm": 0.6657868027687073, "learning_rate": 3.92766721762899e-06, "loss": 0.1982, "step": 7750 }, { "epoch": 0.803295678308633, "grad_norm": 0.6957859396934509, "learning_rate": 3.9236725887362295e-06, "loss": 0.2032, "step": 7751 }, { "epoch": 0.8033993159912944, "grad_norm": 0.7359391450881958, "learning_rate": 3.919679771299045e-06, "loss": 0.1726, "step": 7752 }, { "epoch": 0.8035029536739559, "grad_norm": 0.740073025226593, "learning_rate": 3.915688765767354e-06, "loss": 0.1758, "step": 7753 }, { "epoch": 0.8036065913566173, "grad_norm": 0.7247598767280579, "learning_rate": 3.911699572590841e-06, "loss": 0.1948, "step": 7754 }, { "epoch": 0.8037102290392787, "grad_norm": 0.6437425017356873, "learning_rate": 3.907712192219013e-06, "loss": 0.1864, "step": 7755 }, { "epoch": 0.8038138667219401, "grad_norm": 0.742752194404602, "learning_rate": 3.903726625101163e-06, "loss": 0.185, "step": 7756 }, { "epoch": 0.8039175044046015, "grad_norm": 0.7389211058616638, "learning_rate": 3.899742871686374e-06, "loss": 0.2026, "step": 7757 }, { "epoch": 0.8040211420872629, "grad_norm": 0.6474356651306152, "learning_rate": 3.895760932423531e-06, "loss": 0.1708, "step": 7758 }, { "epoch": 0.8041247797699244, "grad_norm": 0.6608010530471802, "learning_rate": 3.891780807761307e-06, "loss": 0.1722, "step": 7759 }, { "epoch": 0.8042284174525858, "grad_norm": 0.7372478246688843, "learning_rate": 3.887802498148181e-06, "loss": 0.2083, "step": 7760 }, { "epoch": 0.8043320551352472, "grad_norm": 0.7452009916305542, "learning_rate": 3.883826004032427e-06, "loss": 0.2486, "step": 7761 }, { "epoch": 0.8044356928179086, "grad_norm": 0.5467132329940796, "learning_rate": 3.879851325862101e-06, "loss": 0.1624, "step": 7762 }, { "epoch": 0.80453933050057, "grad_norm": 0.6250606775283813, "learning_rate": 3.875878464085072e-06, "loss": 0.1832, "step": 7763 }, { "epoch": 0.8046429681832314, "grad_norm": 0.7398848533630371, "learning_rate": 3.871907419148995e-06, "loss": 0.2039, "step": 7764 }, { "epoch": 0.8047466058658929, "grad_norm": 0.6963582634925842, "learning_rate": 3.8679381915013105e-06, "loss": 0.1991, "step": 7765 }, { "epoch": 0.8048502435485543, "grad_norm": 0.7243926525115967, "learning_rate": 3.863970781589279e-06, "loss": 0.2111, "step": 7766 }, { "epoch": 0.8049538812312157, "grad_norm": 0.646825909614563, "learning_rate": 3.860005189859932e-06, "loss": 0.1942, "step": 7767 }, { "epoch": 0.8050575189138771, "grad_norm": 0.6852625012397766, "learning_rate": 3.856041416760115e-06, "loss": 0.1953, "step": 7768 }, { "epoch": 0.8051611565965385, "grad_norm": 0.5806519985198975, "learning_rate": 3.852079462736446e-06, "loss": 0.1767, "step": 7769 }, { "epoch": 0.8052647942792, "grad_norm": 0.6389683485031128, "learning_rate": 3.848119328235369e-06, "loss": 0.1805, "step": 7770 }, { "epoch": 0.8053684319618614, "grad_norm": 0.7592096328735352, "learning_rate": 3.844161013703099e-06, "loss": 0.1841, "step": 7771 }, { "epoch": 0.8054720696445228, "grad_norm": 0.7471230626106262, "learning_rate": 3.840204519585644e-06, "loss": 0.2049, "step": 7772 }, { "epoch": 0.8055757073271842, "grad_norm": 0.664825439453125, "learning_rate": 3.836249846328828e-06, "loss": 0.2012, "step": 7773 }, { "epoch": 0.8056793450098456, "grad_norm": 0.7081588506698608, "learning_rate": 3.832296994378248e-06, "loss": 0.1837, "step": 7774 }, { "epoch": 0.805782982692507, "grad_norm": 0.6164419054985046, "learning_rate": 3.828345964179314e-06, "loss": 0.1563, "step": 7775 }, { "epoch": 0.8058866203751684, "grad_norm": 0.6512451767921448, "learning_rate": 3.824396756177218e-06, "loss": 0.1932, "step": 7776 }, { "epoch": 0.8059902580578299, "grad_norm": 0.6552871465682983, "learning_rate": 3.820449370816943e-06, "loss": 0.1938, "step": 7777 }, { "epoch": 0.8060938957404913, "grad_norm": 0.5573453307151794, "learning_rate": 3.816503808543288e-06, "loss": 0.1378, "step": 7778 }, { "epoch": 0.8061975334231527, "grad_norm": 0.7078878283500671, "learning_rate": 3.8125600698008214e-06, "loss": 0.2211, "step": 7779 }, { "epoch": 0.8063011711058141, "grad_norm": 0.7048439383506775, "learning_rate": 3.808618155033921e-06, "loss": 0.1942, "step": 7780 }, { "epoch": 0.8064048087884755, "grad_norm": 0.6537505388259888, "learning_rate": 3.8046780646867644e-06, "loss": 0.1889, "step": 7781 }, { "epoch": 0.806508446471137, "grad_norm": 0.6082873344421387, "learning_rate": 3.8007397992033056e-06, "loss": 0.1664, "step": 7782 }, { "epoch": 0.8066120841537984, "grad_norm": 0.7674204111099243, "learning_rate": 3.7968033590273035e-06, "loss": 0.2166, "step": 7783 }, { "epoch": 0.8067157218364598, "grad_norm": 0.6718268394470215, "learning_rate": 3.792868744602305e-06, "loss": 0.1989, "step": 7784 }, { "epoch": 0.8068193595191212, "grad_norm": 0.7679089903831482, "learning_rate": 3.7889359563716643e-06, "loss": 0.2455, "step": 7785 }, { "epoch": 0.8069229972017826, "grad_norm": 0.5761622786521912, "learning_rate": 3.7850049947785227e-06, "loss": 0.1587, "step": 7786 }, { "epoch": 0.807026634884444, "grad_norm": 0.7585948705673218, "learning_rate": 3.781075860265806e-06, "loss": 0.176, "step": 7787 }, { "epoch": 0.8071302725671053, "grad_norm": 0.784820020198822, "learning_rate": 3.777148553276255e-06, "loss": 0.2499, "step": 7788 }, { "epoch": 0.8072339102497668, "grad_norm": 0.6705400347709656, "learning_rate": 3.7732230742523855e-06, "loss": 0.201, "step": 7789 }, { "epoch": 0.8073375479324282, "grad_norm": 0.6821495294570923, "learning_rate": 3.76929942363651e-06, "loss": 0.198, "step": 7790 }, { "epoch": 0.8074411856150896, "grad_norm": 0.6199593544006348, "learning_rate": 3.76537760187075e-06, "loss": 0.1606, "step": 7791 }, { "epoch": 0.807544823297751, "grad_norm": 0.6515820622444153, "learning_rate": 3.761457609396999e-06, "loss": 0.1781, "step": 7792 }, { "epoch": 0.8076484609804124, "grad_norm": 0.5457937121391296, "learning_rate": 3.7575394466569657e-06, "loss": 0.1743, "step": 7793 }, { "epoch": 0.8077520986630738, "grad_norm": 0.6231228709220886, "learning_rate": 3.7536231140921353e-06, "loss": 0.1745, "step": 7794 }, { "epoch": 0.8078557363457353, "grad_norm": 0.7281748056411743, "learning_rate": 3.749708612143801e-06, "loss": 0.2469, "step": 7795 }, { "epoch": 0.8079593740283967, "grad_norm": 0.5639762878417969, "learning_rate": 3.7457959412530365e-06, "loss": 0.1465, "step": 7796 }, { "epoch": 0.8080630117110581, "grad_norm": 0.64995276927948, "learning_rate": 3.741885101860716e-06, "loss": 0.1749, "step": 7797 }, { "epoch": 0.8081666493937195, "grad_norm": 0.9467796087265015, "learning_rate": 3.737976094407505e-06, "loss": 0.2079, "step": 7798 }, { "epoch": 0.8082702870763809, "grad_norm": 0.6556515693664551, "learning_rate": 3.7340689193338754e-06, "loss": 0.19, "step": 7799 }, { "epoch": 0.8083739247590424, "grad_norm": 0.6055012941360474, "learning_rate": 3.730163577080068e-06, "loss": 0.1661, "step": 7800 }, { "epoch": 0.8084775624417038, "grad_norm": 0.7720165848731995, "learning_rate": 3.7262600680861494e-06, "loss": 0.2375, "step": 7801 }, { "epoch": 0.8085812001243652, "grad_norm": 0.7421008944511414, "learning_rate": 3.722358392791936e-06, "loss": 0.2197, "step": 7802 }, { "epoch": 0.8086848378070266, "grad_norm": 0.7144314646720886, "learning_rate": 3.718458551637074e-06, "loss": 0.1834, "step": 7803 }, { "epoch": 0.808788475489688, "grad_norm": 0.6831815242767334, "learning_rate": 3.714560545060999e-06, "loss": 0.1838, "step": 7804 }, { "epoch": 0.8088921131723494, "grad_norm": 0.646991491317749, "learning_rate": 3.710664373502919e-06, "loss": 0.1667, "step": 7805 }, { "epoch": 0.8089957508550109, "grad_norm": 0.6269602179527283, "learning_rate": 3.706770037401861e-06, "loss": 0.1731, "step": 7806 }, { "epoch": 0.8090993885376723, "grad_norm": 0.6706764101982117, "learning_rate": 3.7028775371966276e-06, "loss": 0.1996, "step": 7807 }, { "epoch": 0.8092030262203337, "grad_norm": 0.667586088180542, "learning_rate": 3.6989868733258114e-06, "loss": 0.1834, "step": 7808 }, { "epoch": 0.8093066639029951, "grad_norm": 0.7090936899185181, "learning_rate": 3.695098046227821e-06, "loss": 0.2133, "step": 7809 }, { "epoch": 0.8094103015856565, "grad_norm": 0.7052953243255615, "learning_rate": 3.691211056340831e-06, "loss": 0.1951, "step": 7810 }, { "epoch": 0.8095139392683179, "grad_norm": 0.7060131430625916, "learning_rate": 3.687325904102832e-06, "loss": 0.2182, "step": 7811 }, { "epoch": 0.8096175769509794, "grad_norm": 0.5730564594268799, "learning_rate": 3.6834425899515847e-06, "loss": 0.1709, "step": 7812 }, { "epoch": 0.8097212146336408, "grad_norm": 0.7492680549621582, "learning_rate": 3.67956111432467e-06, "loss": 0.2341, "step": 7813 }, { "epoch": 0.8098248523163022, "grad_norm": 0.6759665012359619, "learning_rate": 3.675681477659436e-06, "loss": 0.1985, "step": 7814 }, { "epoch": 0.8099284899989636, "grad_norm": 0.5997726321220398, "learning_rate": 3.6718036803930313e-06, "loss": 0.1793, "step": 7815 }, { "epoch": 0.810032127681625, "grad_norm": 0.6804363131523132, "learning_rate": 3.667927722962412e-06, "loss": 0.2198, "step": 7816 }, { "epoch": 0.8101357653642864, "grad_norm": 0.727741539478302, "learning_rate": 3.664053605804301e-06, "loss": 0.1963, "step": 7817 }, { "epoch": 0.8102394030469479, "grad_norm": 0.5082893371582031, "learning_rate": 3.6601813293552368e-06, "loss": 0.1407, "step": 7818 }, { "epoch": 0.8103430407296093, "grad_norm": 0.5630528330802917, "learning_rate": 3.6563108940515445e-06, "loss": 0.1542, "step": 7819 }, { "epoch": 0.8104466784122707, "grad_norm": 0.623420000076294, "learning_rate": 3.652442300329333e-06, "loss": 0.1824, "step": 7820 }, { "epoch": 0.8105503160949321, "grad_norm": 0.7584496736526489, "learning_rate": 3.648575548624511e-06, "loss": 0.2128, "step": 7821 }, { "epoch": 0.8106539537775935, "grad_norm": 0.5935616493225098, "learning_rate": 3.6447106393727705e-06, "loss": 0.1523, "step": 7822 }, { "epoch": 0.810757591460255, "grad_norm": 0.603014349937439, "learning_rate": 3.6408475730096117e-06, "loss": 0.1584, "step": 7823 }, { "epoch": 0.8108612291429164, "grad_norm": 0.7835105657577515, "learning_rate": 3.636986349970324e-06, "loss": 0.2411, "step": 7824 }, { "epoch": 0.8109648668255778, "grad_norm": 0.6582568883895874, "learning_rate": 3.633126970689971e-06, "loss": 0.1735, "step": 7825 }, { "epoch": 0.8110685045082392, "grad_norm": 0.8031266927719116, "learning_rate": 3.629269435603433e-06, "loss": 0.2094, "step": 7826 }, { "epoch": 0.8111721421909006, "grad_norm": 0.5696879029273987, "learning_rate": 3.6254137451453676e-06, "loss": 0.1548, "step": 7827 }, { "epoch": 0.811275779873562, "grad_norm": 0.629696249961853, "learning_rate": 3.62155989975022e-06, "loss": 0.2036, "step": 7828 }, { "epoch": 0.8113794175562234, "grad_norm": 0.6651034355163574, "learning_rate": 3.61770789985225e-06, "loss": 0.1996, "step": 7829 }, { "epoch": 0.8114830552388849, "grad_norm": 0.6993162631988525, "learning_rate": 3.6138577458854783e-06, "loss": 0.2038, "step": 7830 }, { "epoch": 0.8115866929215463, "grad_norm": 0.7502244114875793, "learning_rate": 3.6100094382837504e-06, "loss": 0.2078, "step": 7831 }, { "epoch": 0.8116903306042077, "grad_norm": 0.7379708290100098, "learning_rate": 3.606162977480674e-06, "loss": 0.2197, "step": 7832 }, { "epoch": 0.8117939682868691, "grad_norm": 0.725213348865509, "learning_rate": 3.6023183639096736e-06, "loss": 0.2164, "step": 7833 }, { "epoch": 0.8118976059695305, "grad_norm": 0.4685988128185272, "learning_rate": 3.59847559800395e-06, "loss": 0.1492, "step": 7834 }, { "epoch": 0.812001243652192, "grad_norm": 0.7826396226882935, "learning_rate": 3.5946346801964937e-06, "loss": 0.2228, "step": 7835 }, { "epoch": 0.8121048813348534, "grad_norm": 0.659755527973175, "learning_rate": 3.590795610920106e-06, "loss": 0.181, "step": 7836 }, { "epoch": 0.8122085190175148, "grad_norm": 0.7572306990623474, "learning_rate": 3.5869583906073537e-06, "loss": 0.1664, "step": 7837 }, { "epoch": 0.8123121567001762, "grad_norm": 0.70071941614151, "learning_rate": 3.5831230196906196e-06, "loss": 0.2061, "step": 7838 }, { "epoch": 0.8124157943828376, "grad_norm": 0.7053723335266113, "learning_rate": 3.5792894986020634e-06, "loss": 0.2201, "step": 7839 }, { "epoch": 0.812519432065499, "grad_norm": 0.5322561264038086, "learning_rate": 3.575457827773636e-06, "loss": 0.1807, "step": 7840 }, { "epoch": 0.8126230697481605, "grad_norm": 0.6401653289794922, "learning_rate": 3.5716280076370936e-06, "loss": 0.1678, "step": 7841 }, { "epoch": 0.8127267074308219, "grad_norm": 0.7975711226463318, "learning_rate": 3.567800038623963e-06, "loss": 0.2015, "step": 7842 }, { "epoch": 0.8128303451134833, "grad_norm": 0.6188352108001709, "learning_rate": 3.563973921165578e-06, "loss": 0.1668, "step": 7843 }, { "epoch": 0.8129339827961447, "grad_norm": 0.686998724937439, "learning_rate": 3.560149655693068e-06, "loss": 0.2124, "step": 7844 }, { "epoch": 0.8130376204788061, "grad_norm": 0.5393928289413452, "learning_rate": 3.5563272426373386e-06, "loss": 0.1506, "step": 7845 }, { "epoch": 0.8131412581614675, "grad_norm": 0.7458584904670715, "learning_rate": 3.552506682429093e-06, "loss": 0.2071, "step": 7846 }, { "epoch": 0.813244895844129, "grad_norm": 0.6960498690605164, "learning_rate": 3.548687975498821e-06, "loss": 0.194, "step": 7847 }, { "epoch": 0.8133485335267904, "grad_norm": 0.6655008792877197, "learning_rate": 3.544871122276816e-06, "loss": 0.182, "step": 7848 }, { "epoch": 0.8134521712094518, "grad_norm": 0.601629912853241, "learning_rate": 3.541056123193156e-06, "loss": 0.1485, "step": 7849 }, { "epoch": 0.8135558088921132, "grad_norm": 0.6167109608650208, "learning_rate": 3.5372429786777018e-06, "loss": 0.1899, "step": 7850 }, { "epoch": 0.8136594465747746, "grad_norm": 0.6821781992912292, "learning_rate": 3.5334316891601206e-06, "loss": 0.2029, "step": 7851 }, { "epoch": 0.813763084257436, "grad_norm": 0.6972765326499939, "learning_rate": 3.52962225506986e-06, "loss": 0.1923, "step": 7852 }, { "epoch": 0.8138667219400975, "grad_norm": 0.5665042400360107, "learning_rate": 3.5258146768361546e-06, "loss": 0.1436, "step": 7853 }, { "epoch": 0.8139703596227589, "grad_norm": 0.6780118942260742, "learning_rate": 3.5220089548880475e-06, "loss": 0.182, "step": 7854 }, { "epoch": 0.8140739973054203, "grad_norm": 0.7073417901992798, "learning_rate": 3.5182050896543497e-06, "loss": 0.1855, "step": 7855 }, { "epoch": 0.8141776349880817, "grad_norm": 0.7430309653282166, "learning_rate": 3.5144030815636867e-06, "loss": 0.1915, "step": 7856 }, { "epoch": 0.8142812726707431, "grad_norm": 0.624646782875061, "learning_rate": 3.510602931044451e-06, "loss": 0.1819, "step": 7857 }, { "epoch": 0.8143849103534045, "grad_norm": 0.6942505240440369, "learning_rate": 3.50680463852485e-06, "loss": 0.1867, "step": 7858 }, { "epoch": 0.814488548036066, "grad_norm": 0.6526444554328918, "learning_rate": 3.503008204432863e-06, "loss": 0.1955, "step": 7859 }, { "epoch": 0.8145921857187274, "grad_norm": 0.6996036171913147, "learning_rate": 3.499213629196263e-06, "loss": 0.2007, "step": 7860 }, { "epoch": 0.8146958234013888, "grad_norm": 0.725845992565155, "learning_rate": 3.495420913242622e-06, "loss": 0.2015, "step": 7861 }, { "epoch": 0.8147994610840502, "grad_norm": 0.7196703553199768, "learning_rate": 3.4916300569992934e-06, "loss": 0.223, "step": 7862 }, { "epoch": 0.8149030987667116, "grad_norm": 0.6106156706809998, "learning_rate": 3.4878410608934264e-06, "loss": 0.174, "step": 7863 }, { "epoch": 0.8150067364493729, "grad_norm": 0.5675254464149475, "learning_rate": 3.4840539253519645e-06, "loss": 0.1835, "step": 7864 }, { "epoch": 0.8151103741320344, "grad_norm": 0.707380473613739, "learning_rate": 3.4802686508016302e-06, "loss": 0.1977, "step": 7865 }, { "epoch": 0.8152140118146958, "grad_norm": 0.6617516279220581, "learning_rate": 3.4764852376689475e-06, "loss": 0.182, "step": 7866 }, { "epoch": 0.8153176494973572, "grad_norm": 0.7335023283958435, "learning_rate": 3.4727036863802143e-06, "loss": 0.2243, "step": 7867 }, { "epoch": 0.8154212871800186, "grad_norm": 0.5932573080062866, "learning_rate": 3.4689239973615374e-06, "loss": 0.1748, "step": 7868 }, { "epoch": 0.81552492486268, "grad_norm": 0.6992368698120117, "learning_rate": 3.4651461710388135e-06, "loss": 0.1935, "step": 7869 }, { "epoch": 0.8156285625453414, "grad_norm": 0.6213841438293457, "learning_rate": 3.461370207837713e-06, "loss": 0.181, "step": 7870 }, { "epoch": 0.8157322002280029, "grad_norm": 0.6884294152259827, "learning_rate": 3.4575961081837096e-06, "loss": 0.1792, "step": 7871 }, { "epoch": 0.8158358379106643, "grad_norm": 0.6872044801712036, "learning_rate": 3.4538238725020555e-06, "loss": 0.2066, "step": 7872 }, { "epoch": 0.8159394755933257, "grad_norm": 0.6185559630393982, "learning_rate": 3.4500535012178048e-06, "loss": 0.1691, "step": 7873 }, { "epoch": 0.8160431132759871, "grad_norm": 0.6163897514343262, "learning_rate": 3.446284994755804e-06, "loss": 0.1921, "step": 7874 }, { "epoch": 0.8161467509586485, "grad_norm": 0.8531190156936646, "learning_rate": 3.4425183535406713e-06, "loss": 0.2188, "step": 7875 }, { "epoch": 0.8162503886413099, "grad_norm": 0.7438344955444336, "learning_rate": 3.4387535779968363e-06, "loss": 0.1949, "step": 7876 }, { "epoch": 0.8163540263239714, "grad_norm": 0.7845487594604492, "learning_rate": 3.4349906685485036e-06, "loss": 0.1977, "step": 7877 }, { "epoch": 0.8164576640066328, "grad_norm": 0.7111387252807617, "learning_rate": 3.4312296256196674e-06, "loss": 0.1862, "step": 7878 }, { "epoch": 0.8165613016892942, "grad_norm": 0.6945541501045227, "learning_rate": 3.427470449634125e-06, "loss": 0.2017, "step": 7879 }, { "epoch": 0.8166649393719556, "grad_norm": 0.654155433177948, "learning_rate": 3.4237131410154436e-06, "loss": 0.1817, "step": 7880 }, { "epoch": 0.816768577054617, "grad_norm": 0.6121152639389038, "learning_rate": 3.4199577001870043e-06, "loss": 0.2015, "step": 7881 }, { "epoch": 0.8168722147372784, "grad_norm": 0.7844396829605103, "learning_rate": 3.416204127571949e-06, "loss": 0.2014, "step": 7882 }, { "epoch": 0.8169758524199399, "grad_norm": 0.7182947993278503, "learning_rate": 3.41245242359324e-06, "loss": 0.2143, "step": 7883 }, { "epoch": 0.8170794901026013, "grad_norm": 0.7850946187973022, "learning_rate": 3.408702588673605e-06, "loss": 0.2084, "step": 7884 }, { "epoch": 0.8171831277852627, "grad_norm": 0.604456901550293, "learning_rate": 3.4049546232355677e-06, "loss": 0.1536, "step": 7885 }, { "epoch": 0.8172867654679241, "grad_norm": 0.6011174321174622, "learning_rate": 3.4012085277014494e-06, "loss": 0.152, "step": 7886 }, { "epoch": 0.8173904031505855, "grad_norm": 0.6505752801895142, "learning_rate": 3.397464302493345e-06, "loss": 0.1617, "step": 7887 }, { "epoch": 0.817494040833247, "grad_norm": 0.7308200597763062, "learning_rate": 3.393721948033155e-06, "loss": 0.2055, "step": 7888 }, { "epoch": 0.8175976785159084, "grad_norm": 0.6321290135383606, "learning_rate": 3.3899814647425644e-06, "loss": 0.1746, "step": 7889 }, { "epoch": 0.8177013161985698, "grad_norm": 0.5009555220603943, "learning_rate": 3.3862428530430426e-06, "loss": 0.1329, "step": 7890 }, { "epoch": 0.8178049538812312, "grad_norm": 0.6724569797515869, "learning_rate": 3.3825061133558502e-06, "loss": 0.1645, "step": 7891 }, { "epoch": 0.8179085915638926, "grad_norm": 0.7115445733070374, "learning_rate": 3.3787712461020305e-06, "loss": 0.2052, "step": 7892 }, { "epoch": 0.818012229246554, "grad_norm": 0.6832154393196106, "learning_rate": 3.3750382517024294e-06, "loss": 0.1735, "step": 7893 }, { "epoch": 0.8181158669292155, "grad_norm": 0.6225373148918152, "learning_rate": 3.371307130577679e-06, "loss": 0.1836, "step": 7894 }, { "epoch": 0.8182195046118769, "grad_norm": 0.7177269458770752, "learning_rate": 3.3675778831481877e-06, "loss": 0.1943, "step": 7895 }, { "epoch": 0.8183231422945383, "grad_norm": 0.7160996198654175, "learning_rate": 3.3638505098341725e-06, "loss": 0.2182, "step": 7896 }, { "epoch": 0.8184267799771997, "grad_norm": 0.5422918200492859, "learning_rate": 3.3601250110556107e-06, "loss": 0.145, "step": 7897 }, { "epoch": 0.8185304176598611, "grad_norm": 0.6439123153686523, "learning_rate": 3.3564013872322977e-06, "loss": 0.1561, "step": 7898 }, { "epoch": 0.8186340553425225, "grad_norm": 0.7110884189605713, "learning_rate": 3.3526796387838067e-06, "loss": 0.1909, "step": 7899 }, { "epoch": 0.818737693025184, "grad_norm": 0.6405838131904602, "learning_rate": 3.3489597661294893e-06, "loss": 0.1769, "step": 7900 }, { "epoch": 0.8188413307078454, "grad_norm": 0.6757362484931946, "learning_rate": 3.3452417696885077e-06, "loss": 0.1868, "step": 7901 }, { "epoch": 0.8189449683905068, "grad_norm": 0.6399852633476257, "learning_rate": 3.341525649879791e-06, "loss": 0.2003, "step": 7902 }, { "epoch": 0.8190486060731682, "grad_norm": 0.7883408665657043, "learning_rate": 3.3378114071220647e-06, "loss": 0.1885, "step": 7903 }, { "epoch": 0.8191522437558296, "grad_norm": 0.764289379119873, "learning_rate": 3.3340990418338516e-06, "loss": 0.2051, "step": 7904 }, { "epoch": 0.819255881438491, "grad_norm": 0.5843377709388733, "learning_rate": 3.330388554433448e-06, "loss": 0.1742, "step": 7905 }, { "epoch": 0.8193595191211525, "grad_norm": 0.6539674997329712, "learning_rate": 3.326679945338951e-06, "loss": 0.1888, "step": 7906 }, { "epoch": 0.8194631568038139, "grad_norm": 0.6555024981498718, "learning_rate": 3.3229732149682347e-06, "loss": 0.1789, "step": 7907 }, { "epoch": 0.8195667944864753, "grad_norm": 0.7149132490158081, "learning_rate": 3.3192683637389768e-06, "loss": 0.2211, "step": 7908 }, { "epoch": 0.8196704321691367, "grad_norm": 0.6635371446609497, "learning_rate": 3.315565392068627e-06, "loss": 0.1692, "step": 7909 }, { "epoch": 0.8197740698517981, "grad_norm": 0.6320151090621948, "learning_rate": 3.31186430037443e-06, "loss": 0.1634, "step": 7910 }, { "epoch": 0.8198777075344595, "grad_norm": 0.7512708306312561, "learning_rate": 3.3081650890734253e-06, "loss": 0.2088, "step": 7911 }, { "epoch": 0.819981345217121, "grad_norm": 0.6643633246421814, "learning_rate": 3.3044677585824237e-06, "loss": 0.1926, "step": 7912 }, { "epoch": 0.8200849828997824, "grad_norm": 0.6350557208061218, "learning_rate": 3.300772309318043e-06, "loss": 0.1898, "step": 7913 }, { "epoch": 0.8201886205824438, "grad_norm": 0.6989490389823914, "learning_rate": 3.297078741696684e-06, "loss": 0.2359, "step": 7914 }, { "epoch": 0.8202922582651052, "grad_norm": 0.6762931942939758, "learning_rate": 3.293387056134527e-06, "loss": 0.2028, "step": 7915 }, { "epoch": 0.8203958959477666, "grad_norm": 0.8357157111167908, "learning_rate": 3.2896972530475458e-06, "loss": 0.2159, "step": 7916 }, { "epoch": 0.820499533630428, "grad_norm": 0.5877515077590942, "learning_rate": 3.2860093328514963e-06, "loss": 0.1595, "step": 7917 }, { "epoch": 0.8206031713130895, "grad_norm": 0.6448560953140259, "learning_rate": 3.282323295961933e-06, "loss": 0.1867, "step": 7918 }, { "epoch": 0.8207068089957509, "grad_norm": 0.7989858984947205, "learning_rate": 3.2786391427941977e-06, "loss": 0.2033, "step": 7919 }, { "epoch": 0.8208104466784123, "grad_norm": 0.6952617764472961, "learning_rate": 3.2749568737634064e-06, "loss": 0.1862, "step": 7920 }, { "epoch": 0.8209140843610737, "grad_norm": 0.7309961915016174, "learning_rate": 3.271276489284478e-06, "loss": 0.2101, "step": 7921 }, { "epoch": 0.8210177220437351, "grad_norm": 0.7257064580917358, "learning_rate": 3.2675979897721087e-06, "loss": 0.1971, "step": 7922 }, { "epoch": 0.8211213597263965, "grad_norm": 0.6392828226089478, "learning_rate": 3.2639213756407837e-06, "loss": 0.2122, "step": 7923 }, { "epoch": 0.821224997409058, "grad_norm": 0.6919106245040894, "learning_rate": 3.2602466473047854e-06, "loss": 0.2003, "step": 7924 }, { "epoch": 0.8213286350917194, "grad_norm": 0.6594109535217285, "learning_rate": 3.256573805178167e-06, "loss": 0.179, "step": 7925 }, { "epoch": 0.8214322727743808, "grad_norm": 0.5921263694763184, "learning_rate": 3.2529028496747904e-06, "loss": 0.1603, "step": 7926 }, { "epoch": 0.8215359104570422, "grad_norm": 0.6132636070251465, "learning_rate": 3.249233781208281e-06, "loss": 0.1844, "step": 7927 }, { "epoch": 0.8216395481397036, "grad_norm": 0.7435948848724365, "learning_rate": 3.245566600192074e-06, "loss": 0.2056, "step": 7928 }, { "epoch": 0.821743185822365, "grad_norm": 0.6977766156196594, "learning_rate": 3.241901307039379e-06, "loss": 0.2141, "step": 7929 }, { "epoch": 0.8218468235050265, "grad_norm": 0.7409849762916565, "learning_rate": 3.2382379021631883e-06, "loss": 0.2091, "step": 7930 }, { "epoch": 0.8219504611876879, "grad_norm": 0.5388767719268799, "learning_rate": 3.234576385976298e-06, "loss": 0.1455, "step": 7931 }, { "epoch": 0.8220540988703493, "grad_norm": 0.6420896053314209, "learning_rate": 3.2309167588912738e-06, "loss": 0.1823, "step": 7932 }, { "epoch": 0.8221577365530107, "grad_norm": 0.6220948696136475, "learning_rate": 3.227259021320486e-06, "loss": 0.1838, "step": 7933 }, { "epoch": 0.8222613742356721, "grad_norm": 0.608130693435669, "learning_rate": 3.2236031736760775e-06, "loss": 0.1688, "step": 7934 }, { "epoch": 0.8223650119183336, "grad_norm": 0.6391450762748718, "learning_rate": 3.21994921636998e-06, "loss": 0.1903, "step": 7935 }, { "epoch": 0.822468649600995, "grad_norm": 0.652980387210846, "learning_rate": 3.216297149813923e-06, "loss": 0.1884, "step": 7936 }, { "epoch": 0.8225722872836564, "grad_norm": 0.711746335029602, "learning_rate": 3.2126469744194087e-06, "loss": 0.2326, "step": 7937 }, { "epoch": 0.8226759249663178, "grad_norm": 0.6753913164138794, "learning_rate": 3.208998690597738e-06, "loss": 0.1629, "step": 7938 }, { "epoch": 0.8227795626489792, "grad_norm": 0.6291230916976929, "learning_rate": 3.2053522987599963e-06, "loss": 0.1576, "step": 7939 }, { "epoch": 0.8228832003316405, "grad_norm": 0.6426824927330017, "learning_rate": 3.2017077993170485e-06, "loss": 0.1681, "step": 7940 }, { "epoch": 0.822986838014302, "grad_norm": 0.6569961905479431, "learning_rate": 3.1980651926795558e-06, "loss": 0.1742, "step": 7941 }, { "epoch": 0.8230904756969634, "grad_norm": 0.7171782851219177, "learning_rate": 3.1944244792579493e-06, "loss": 0.1868, "step": 7942 }, { "epoch": 0.8231941133796248, "grad_norm": 0.7539594769477844, "learning_rate": 3.1907856594624696e-06, "loss": 0.2004, "step": 7943 }, { "epoch": 0.8232977510622862, "grad_norm": 0.684523344039917, "learning_rate": 3.187148733703138e-06, "loss": 0.2053, "step": 7944 }, { "epoch": 0.8234013887449476, "grad_norm": 0.6421189308166504, "learning_rate": 3.1835137023897445e-06, "loss": 0.1817, "step": 7945 }, { "epoch": 0.823505026427609, "grad_norm": 0.6755841970443726, "learning_rate": 3.17988056593189e-06, "loss": 0.1837, "step": 7946 }, { "epoch": 0.8236086641102704, "grad_norm": 0.7096763849258423, "learning_rate": 3.1762493247389447e-06, "loss": 0.1852, "step": 7947 }, { "epoch": 0.8237123017929319, "grad_norm": 0.7008394598960876, "learning_rate": 3.1726199792200685e-06, "loss": 0.1991, "step": 7948 }, { "epoch": 0.8238159394755933, "grad_norm": 0.6629871129989624, "learning_rate": 3.1689925297842208e-06, "loss": 0.1864, "step": 7949 }, { "epoch": 0.8239195771582547, "grad_norm": 0.5846853852272034, "learning_rate": 3.1653669768401253e-06, "loss": 0.1494, "step": 7950 }, { "epoch": 0.8240232148409161, "grad_norm": 0.7168512344360352, "learning_rate": 3.161743320796311e-06, "loss": 0.2545, "step": 7951 }, { "epoch": 0.8241268525235775, "grad_norm": 0.6414791345596313, "learning_rate": 3.1581215620610804e-06, "loss": 0.1835, "step": 7952 }, { "epoch": 0.824230490206239, "grad_norm": 0.7068555355072021, "learning_rate": 3.154501701042538e-06, "loss": 0.1911, "step": 7953 }, { "epoch": 0.8243341278889004, "grad_norm": 0.5438865423202515, "learning_rate": 3.150883738148556e-06, "loss": 0.155, "step": 7954 }, { "epoch": 0.8244377655715618, "grad_norm": 0.6644114255905151, "learning_rate": 3.1472676737867956e-06, "loss": 0.1718, "step": 7955 }, { "epoch": 0.8245414032542232, "grad_norm": 0.5944012999534607, "learning_rate": 3.1436535083647214e-06, "loss": 0.1748, "step": 7956 }, { "epoch": 0.8246450409368846, "grad_norm": 0.6920223832130432, "learning_rate": 3.14004124228956e-06, "loss": 0.2383, "step": 7957 }, { "epoch": 0.824748678619546, "grad_norm": 0.6960580945014954, "learning_rate": 3.1364308759683438e-06, "loss": 0.2038, "step": 7958 }, { "epoch": 0.8248523163022075, "grad_norm": 0.5951129794120789, "learning_rate": 3.1328224098078917e-06, "loss": 0.1752, "step": 7959 }, { "epoch": 0.8249559539848689, "grad_norm": 0.635200023651123, "learning_rate": 3.129215844214779e-06, "loss": 0.176, "step": 7960 }, { "epoch": 0.8250595916675303, "grad_norm": 0.7616642713546753, "learning_rate": 3.1256111795954046e-06, "loss": 0.2122, "step": 7961 }, { "epoch": 0.8251632293501917, "grad_norm": 0.705881655216217, "learning_rate": 3.122008416355924e-06, "loss": 0.2105, "step": 7962 }, { "epoch": 0.8252668670328531, "grad_norm": 0.7588737607002258, "learning_rate": 3.1184075549023007e-06, "loss": 0.2225, "step": 7963 }, { "epoch": 0.8253705047155145, "grad_norm": 0.7803189754486084, "learning_rate": 3.114808595640273e-06, "loss": 0.2452, "step": 7964 }, { "epoch": 0.825474142398176, "grad_norm": 0.6398329138755798, "learning_rate": 3.111211538975365e-06, "loss": 0.1879, "step": 7965 }, { "epoch": 0.8255777800808374, "grad_norm": 0.6574432849884033, "learning_rate": 3.107616385312888e-06, "loss": 0.1746, "step": 7966 }, { "epoch": 0.8256814177634988, "grad_norm": 0.7401537299156189, "learning_rate": 3.104023135057932e-06, "loss": 0.2021, "step": 7967 }, { "epoch": 0.8257850554461602, "grad_norm": 0.7985172867774963, "learning_rate": 3.1004317886153835e-06, "loss": 0.2037, "step": 7968 }, { "epoch": 0.8258886931288216, "grad_norm": 0.6925119757652283, "learning_rate": 3.0968423463899145e-06, "loss": 0.1943, "step": 7969 }, { "epoch": 0.825992330811483, "grad_norm": 0.7429485321044922, "learning_rate": 3.0932548087859683e-06, "loss": 0.1971, "step": 7970 }, { "epoch": 0.8260959684941445, "grad_norm": 0.733308732509613, "learning_rate": 3.0896691762077923e-06, "loss": 0.2351, "step": 7971 }, { "epoch": 0.8261996061768059, "grad_norm": 0.7723768353462219, "learning_rate": 3.0860854490594084e-06, "loss": 0.1963, "step": 7972 }, { "epoch": 0.8263032438594673, "grad_norm": 0.6854905486106873, "learning_rate": 3.0825036277446176e-06, "loss": 0.2001, "step": 7973 }, { "epoch": 0.8264068815421287, "grad_norm": 0.7836577296257019, "learning_rate": 3.0789237126670214e-06, "loss": 0.2203, "step": 7974 }, { "epoch": 0.8265105192247901, "grad_norm": 0.6477953195571899, "learning_rate": 3.075345704229995e-06, "loss": 0.1941, "step": 7975 }, { "epoch": 0.8266141569074515, "grad_norm": 0.6857738494873047, "learning_rate": 3.0717696028367093e-06, "loss": 0.2041, "step": 7976 }, { "epoch": 0.826717794590113, "grad_norm": 0.6048509478569031, "learning_rate": 3.0681954088901024e-06, "loss": 0.1582, "step": 7977 }, { "epoch": 0.8268214322727744, "grad_norm": 0.8324301242828369, "learning_rate": 3.0646231227929224e-06, "loss": 0.2352, "step": 7978 }, { "epoch": 0.8269250699554358, "grad_norm": 0.7217226624488831, "learning_rate": 3.061052744947681e-06, "loss": 0.2169, "step": 7979 }, { "epoch": 0.8270287076380972, "grad_norm": 0.7014948725700378, "learning_rate": 3.0574842757566814e-06, "loss": 0.1883, "step": 7980 }, { "epoch": 0.8271323453207586, "grad_norm": 0.7494576573371887, "learning_rate": 3.053917715622019e-06, "loss": 0.2175, "step": 7981 }, { "epoch": 0.82723598300342, "grad_norm": 0.6612431406974792, "learning_rate": 3.0503530649455616e-06, "loss": 0.1752, "step": 7982 }, { "epoch": 0.8273396206860815, "grad_norm": 0.710117518901825, "learning_rate": 3.046790324128972e-06, "loss": 0.2034, "step": 7983 }, { "epoch": 0.8274432583687429, "grad_norm": 0.5640646815299988, "learning_rate": 3.0432294935736985e-06, "loss": 0.1702, "step": 7984 }, { "epoch": 0.8275468960514043, "grad_norm": 0.7199845910072327, "learning_rate": 3.0396705736809664e-06, "loss": 0.2295, "step": 7985 }, { "epoch": 0.8276505337340657, "grad_norm": 0.658679187297821, "learning_rate": 3.0361135648517883e-06, "loss": 0.1973, "step": 7986 }, { "epoch": 0.8277541714167271, "grad_norm": 0.7033202648162842, "learning_rate": 3.032558467486959e-06, "loss": 0.2307, "step": 7987 }, { "epoch": 0.8278578090993886, "grad_norm": 0.7593150734901428, "learning_rate": 3.0290052819870654e-06, "loss": 0.2021, "step": 7988 }, { "epoch": 0.82796144678205, "grad_norm": 0.6131226420402527, "learning_rate": 3.0254540087524775e-06, "loss": 0.2034, "step": 7989 }, { "epoch": 0.8280650844647114, "grad_norm": 0.6799296140670776, "learning_rate": 3.0219046481833404e-06, "loss": 0.1865, "step": 7990 }, { "epoch": 0.8281687221473728, "grad_norm": 0.7688307166099548, "learning_rate": 3.0183572006796045e-06, "loss": 0.223, "step": 7991 }, { "epoch": 0.8282723598300342, "grad_norm": 0.6909458041191101, "learning_rate": 3.014811666640971e-06, "loss": 0.1901, "step": 7992 }, { "epoch": 0.8283759975126956, "grad_norm": 0.8143532276153564, "learning_rate": 3.011268046466955e-06, "loss": 0.217, "step": 7993 }, { "epoch": 0.8284796351953571, "grad_norm": 0.773020327091217, "learning_rate": 3.007726340556851e-06, "loss": 0.2167, "step": 7994 }, { "epoch": 0.8285832728780185, "grad_norm": 0.6222478151321411, "learning_rate": 3.004186549309722e-06, "loss": 0.1789, "step": 7995 }, { "epoch": 0.8286869105606799, "grad_norm": 0.7233225703239441, "learning_rate": 3.00064867312444e-06, "loss": 0.2182, "step": 7996 }, { "epoch": 0.8287905482433413, "grad_norm": 0.7089139819145203, "learning_rate": 2.997112712399637e-06, "loss": 0.2036, "step": 7997 }, { "epoch": 0.8288941859260027, "grad_norm": 0.5846203565597534, "learning_rate": 2.9935786675337365e-06, "loss": 0.1738, "step": 7998 }, { "epoch": 0.8289978236086641, "grad_norm": 0.7119741439819336, "learning_rate": 2.9900465389249623e-06, "loss": 0.2005, "step": 7999 }, { "epoch": 0.8291014612913256, "grad_norm": 0.6737663149833679, "learning_rate": 2.986516326971294e-06, "loss": 0.183, "step": 8000 }, { "epoch": 0.829205098973987, "grad_norm": 0.6113407015800476, "learning_rate": 2.9829880320705196e-06, "loss": 0.1536, "step": 8001 }, { "epoch": 0.8293087366566484, "grad_norm": 0.7092680931091309, "learning_rate": 2.979461654620206e-06, "loss": 0.1975, "step": 8002 }, { "epoch": 0.8294123743393098, "grad_norm": 0.6716724634170532, "learning_rate": 2.975937195017693e-06, "loss": 0.1731, "step": 8003 }, { "epoch": 0.8295160120219712, "grad_norm": 0.6372599601745605, "learning_rate": 2.9724146536601116e-06, "loss": 0.1856, "step": 8004 }, { "epoch": 0.8296196497046326, "grad_norm": 0.5934385061264038, "learning_rate": 2.9688940309443738e-06, "loss": 0.168, "step": 8005 }, { "epoch": 0.8297232873872941, "grad_norm": 0.48493868112564087, "learning_rate": 2.965375327267179e-06, "loss": 0.1335, "step": 8006 }, { "epoch": 0.8298269250699555, "grad_norm": 0.722053587436676, "learning_rate": 2.961858543025018e-06, "loss": 0.2245, "step": 8007 }, { "epoch": 0.8299305627526169, "grad_norm": 0.7954843640327454, "learning_rate": 2.9583436786141463e-06, "loss": 0.2197, "step": 8008 }, { "epoch": 0.8300342004352783, "grad_norm": 0.607681393623352, "learning_rate": 2.9548307344306205e-06, "loss": 0.1742, "step": 8009 }, { "epoch": 0.8301378381179397, "grad_norm": 0.7497290372848511, "learning_rate": 2.9513197108702706e-06, "loss": 0.1911, "step": 8010 }, { "epoch": 0.8302414758006011, "grad_norm": 0.7454660534858704, "learning_rate": 2.947810608328707e-06, "loss": 0.2072, "step": 8011 }, { "epoch": 0.8303451134832626, "grad_norm": 0.634694516658783, "learning_rate": 2.944303427201343e-06, "loss": 0.1618, "step": 8012 }, { "epoch": 0.830448751165924, "grad_norm": 0.7154065370559692, "learning_rate": 2.9407981678833496e-06, "loss": 0.2029, "step": 8013 }, { "epoch": 0.8305523888485854, "grad_norm": 0.6388426423072815, "learning_rate": 2.9372948307697034e-06, "loss": 0.2012, "step": 8014 }, { "epoch": 0.8306560265312468, "grad_norm": 0.69815593957901, "learning_rate": 2.9337934162551462e-06, "loss": 0.2191, "step": 8015 }, { "epoch": 0.8307596642139081, "grad_norm": 0.6910235285758972, "learning_rate": 2.9302939247342244e-06, "loss": 0.2123, "step": 8016 }, { "epoch": 0.8308633018965695, "grad_norm": 0.5625707507133484, "learning_rate": 2.9267963566012447e-06, "loss": 0.146, "step": 8017 }, { "epoch": 0.830966939579231, "grad_norm": 0.66923588514328, "learning_rate": 2.9233007122503076e-06, "loss": 0.1868, "step": 8018 }, { "epoch": 0.8310705772618924, "grad_norm": 0.6398361325263977, "learning_rate": 2.9198069920753045e-06, "loss": 0.2055, "step": 8019 }, { "epoch": 0.8311742149445538, "grad_norm": 0.6225539445877075, "learning_rate": 2.916315196469892e-06, "loss": 0.1683, "step": 8020 }, { "epoch": 0.8312778526272152, "grad_norm": 0.7289894223213196, "learning_rate": 2.9128253258275285e-06, "loss": 0.2036, "step": 8021 }, { "epoch": 0.8313814903098766, "grad_norm": 0.8791557550430298, "learning_rate": 2.9093373805414526e-06, "loss": 0.2305, "step": 8022 }, { "epoch": 0.831485127992538, "grad_norm": 0.695125162601471, "learning_rate": 2.9058513610046634e-06, "loss": 0.1801, "step": 8023 }, { "epoch": 0.8315887656751995, "grad_norm": 0.6579564809799194, "learning_rate": 2.9023672676099733e-06, "loss": 0.1957, "step": 8024 }, { "epoch": 0.8316924033578609, "grad_norm": 0.5032559633255005, "learning_rate": 2.8988851007499575e-06, "loss": 0.1511, "step": 8025 }, { "epoch": 0.8317960410405223, "grad_norm": 0.7749326229095459, "learning_rate": 2.8954048608169837e-06, "loss": 0.22, "step": 8026 }, { "epoch": 0.8318996787231837, "grad_norm": 0.6643040776252747, "learning_rate": 2.8919265482032057e-06, "loss": 0.1976, "step": 8027 }, { "epoch": 0.8320033164058451, "grad_norm": 0.6258031129837036, "learning_rate": 2.888450163300549e-06, "loss": 0.1713, "step": 8028 }, { "epoch": 0.8321069540885065, "grad_norm": 0.6977447271347046, "learning_rate": 2.884975706500728e-06, "loss": 0.2078, "step": 8029 }, { "epoch": 0.832210591771168, "grad_norm": 0.6385695338249207, "learning_rate": 2.8815031781952328e-06, "loss": 0.1869, "step": 8030 }, { "epoch": 0.8323142294538294, "grad_norm": 0.7600287199020386, "learning_rate": 2.8780325787753494e-06, "loss": 0.2049, "step": 8031 }, { "epoch": 0.8324178671364908, "grad_norm": 0.6887364983558655, "learning_rate": 2.874563908632142e-06, "loss": 0.2005, "step": 8032 }, { "epoch": 0.8325215048191522, "grad_norm": 0.7222094535827637, "learning_rate": 2.8710971681564472e-06, "loss": 0.2162, "step": 8033 }, { "epoch": 0.8326251425018136, "grad_norm": 0.5581437945365906, "learning_rate": 2.867632357738901e-06, "loss": 0.1525, "step": 8034 }, { "epoch": 0.832728780184475, "grad_norm": 0.6178992986679077, "learning_rate": 2.864169477769907e-06, "loss": 0.1748, "step": 8035 }, { "epoch": 0.8328324178671365, "grad_norm": 0.674506664276123, "learning_rate": 2.860708528639653e-06, "loss": 0.1973, "step": 8036 }, { "epoch": 0.8329360555497979, "grad_norm": 0.640842854976654, "learning_rate": 2.857249510738125e-06, "loss": 0.1906, "step": 8037 }, { "epoch": 0.8330396932324593, "grad_norm": 0.5568889379501343, "learning_rate": 2.8537924244550686e-06, "loss": 0.1765, "step": 8038 }, { "epoch": 0.8331433309151207, "grad_norm": 0.703639566898346, "learning_rate": 2.8503372701800304e-06, "loss": 0.2035, "step": 8039 }, { "epoch": 0.8332469685977821, "grad_norm": 0.7148422598838806, "learning_rate": 2.846884048302325e-06, "loss": 0.1781, "step": 8040 }, { "epoch": 0.8333506062804436, "grad_norm": 0.5351859331130981, "learning_rate": 2.8434327592110646e-06, "loss": 0.1772, "step": 8041 }, { "epoch": 0.833454243963105, "grad_norm": 0.6493597030639648, "learning_rate": 2.83998340329513e-06, "loss": 0.1941, "step": 8042 }, { "epoch": 0.8335578816457664, "grad_norm": 0.8238438963890076, "learning_rate": 2.836535980943187e-06, "loss": 0.2018, "step": 8043 }, { "epoch": 0.8336615193284278, "grad_norm": 0.7464231252670288, "learning_rate": 2.833090492543691e-06, "loss": 0.2039, "step": 8044 }, { "epoch": 0.8337651570110892, "grad_norm": 0.7440434098243713, "learning_rate": 2.829646938484869e-06, "loss": 0.1981, "step": 8045 }, { "epoch": 0.8338687946937506, "grad_norm": 0.6757184863090515, "learning_rate": 2.8262053191547377e-06, "loss": 0.177, "step": 8046 }, { "epoch": 0.833972432376412, "grad_norm": 0.5503365397453308, "learning_rate": 2.822765634941098e-06, "loss": 0.134, "step": 8047 }, { "epoch": 0.8340760700590735, "grad_norm": 0.7327800393104553, "learning_rate": 2.819327886231524e-06, "loss": 0.1837, "step": 8048 }, { "epoch": 0.8341797077417349, "grad_norm": 0.7348129153251648, "learning_rate": 2.8158920734133753e-06, "loss": 0.2082, "step": 8049 }, { "epoch": 0.8342833454243963, "grad_norm": 0.7318658232688904, "learning_rate": 2.812458196873791e-06, "loss": 0.1945, "step": 8050 }, { "epoch": 0.8343869831070577, "grad_norm": 0.6062182784080505, "learning_rate": 2.8090262569996984e-06, "loss": 0.1756, "step": 8051 }, { "epoch": 0.8344906207897191, "grad_norm": 0.6037781238555908, "learning_rate": 2.805596254177807e-06, "loss": 0.1927, "step": 8052 }, { "epoch": 0.8345942584723806, "grad_norm": 0.631915271282196, "learning_rate": 2.8021681887945964e-06, "loss": 0.1818, "step": 8053 }, { "epoch": 0.834697896155042, "grad_norm": 0.8001081943511963, "learning_rate": 2.79874206123635e-06, "loss": 0.2223, "step": 8054 }, { "epoch": 0.8348015338377034, "grad_norm": 0.7133634686470032, "learning_rate": 2.795317871889098e-06, "loss": 0.1844, "step": 8055 }, { "epoch": 0.8349051715203648, "grad_norm": 0.8070812821388245, "learning_rate": 2.7918956211386826e-06, "loss": 0.211, "step": 8056 }, { "epoch": 0.8350088092030262, "grad_norm": 0.6299300789833069, "learning_rate": 2.788475309370724e-06, "loss": 0.1743, "step": 8057 }, { "epoch": 0.8351124468856876, "grad_norm": 0.7091789841651917, "learning_rate": 2.7850569369706048e-06, "loss": 0.2124, "step": 8058 }, { "epoch": 0.8352160845683491, "grad_norm": 0.5357885956764221, "learning_rate": 2.781640504323515e-06, "loss": 0.1677, "step": 8059 }, { "epoch": 0.8353197222510105, "grad_norm": 0.6380164623260498, "learning_rate": 2.7782260118144065e-06, "loss": 0.2014, "step": 8060 }, { "epoch": 0.8354233599336719, "grad_norm": 0.7222257256507874, "learning_rate": 2.774813459828016e-06, "loss": 0.194, "step": 8061 }, { "epoch": 0.8355269976163333, "grad_norm": 0.689274787902832, "learning_rate": 2.771402848748872e-06, "loss": 0.1961, "step": 8062 }, { "epoch": 0.8356306352989947, "grad_norm": 0.6324693560600281, "learning_rate": 2.767994178961266e-06, "loss": 0.18, "step": 8063 }, { "epoch": 0.8357342729816561, "grad_norm": 0.671479344367981, "learning_rate": 2.7645874508492943e-06, "loss": 0.1977, "step": 8064 }, { "epoch": 0.8358379106643176, "grad_norm": 0.7867868542671204, "learning_rate": 2.761182664796811e-06, "loss": 0.2011, "step": 8065 }, { "epoch": 0.835941548346979, "grad_norm": 0.7570082545280457, "learning_rate": 2.7577798211874717e-06, "loss": 0.2166, "step": 8066 }, { "epoch": 0.8360451860296404, "grad_norm": 0.6764487624168396, "learning_rate": 2.7543789204046967e-06, "loss": 0.1951, "step": 8067 }, { "epoch": 0.8361488237123018, "grad_norm": 0.5519558191299438, "learning_rate": 2.7509799628316923e-06, "loss": 0.153, "step": 8068 }, { "epoch": 0.8362524613949632, "grad_norm": 0.680759072303772, "learning_rate": 2.747582948851457e-06, "loss": 0.2039, "step": 8069 }, { "epoch": 0.8363560990776246, "grad_norm": 0.658055305480957, "learning_rate": 2.7441878788467515e-06, "loss": 0.1988, "step": 8070 }, { "epoch": 0.8364597367602861, "grad_norm": 0.7835912108421326, "learning_rate": 2.740794753200131e-06, "loss": 0.1886, "step": 8071 }, { "epoch": 0.8365633744429475, "grad_norm": 0.7117691040039062, "learning_rate": 2.7374035722939307e-06, "loss": 0.2061, "step": 8072 }, { "epoch": 0.8366670121256089, "grad_norm": 0.8212706446647644, "learning_rate": 2.7340143365102623e-06, "loss": 0.2299, "step": 8073 }, { "epoch": 0.8367706498082703, "grad_norm": 0.6776049733161926, "learning_rate": 2.7306270462310158e-06, "loss": 0.2003, "step": 8074 }, { "epoch": 0.8368742874909317, "grad_norm": 0.62154221534729, "learning_rate": 2.7272417018378662e-06, "loss": 0.17, "step": 8075 }, { "epoch": 0.8369779251735932, "grad_norm": 0.5986471772193909, "learning_rate": 2.72385830371227e-06, "loss": 0.1653, "step": 8076 }, { "epoch": 0.8370815628562546, "grad_norm": 0.7299423813819885, "learning_rate": 2.7204768522354675e-06, "loss": 0.2454, "step": 8077 }, { "epoch": 0.837185200538916, "grad_norm": 0.6664876937866211, "learning_rate": 2.7170973477884666e-06, "loss": 0.1904, "step": 8078 }, { "epoch": 0.8372888382215774, "grad_norm": 0.6690018773078918, "learning_rate": 2.7137197907520763e-06, "loss": 0.1751, "step": 8079 }, { "epoch": 0.8373924759042388, "grad_norm": 0.5193243622779846, "learning_rate": 2.7103441815068656e-06, "loss": 0.1532, "step": 8080 }, { "epoch": 0.8374961135869002, "grad_norm": 0.760511577129364, "learning_rate": 2.706970520433192e-06, "loss": 0.2175, "step": 8081 }, { "epoch": 0.8375997512695617, "grad_norm": 0.6827983856201172, "learning_rate": 2.703598807911203e-06, "loss": 0.1964, "step": 8082 }, { "epoch": 0.8377033889522231, "grad_norm": 0.566498875617981, "learning_rate": 2.7002290443208056e-06, "loss": 0.1586, "step": 8083 }, { "epoch": 0.8378070266348845, "grad_norm": 0.619435727596283, "learning_rate": 2.696861230041714e-06, "loss": 0.1744, "step": 8084 }, { "epoch": 0.8379106643175459, "grad_norm": 0.6570318937301636, "learning_rate": 2.693495365453398e-06, "loss": 0.2029, "step": 8085 }, { "epoch": 0.8380143020002073, "grad_norm": 0.7254251837730408, "learning_rate": 2.6901314509351183e-06, "loss": 0.203, "step": 8086 }, { "epoch": 0.8381179396828687, "grad_norm": 0.7361481785774231, "learning_rate": 2.6867694868659213e-06, "loss": 0.2305, "step": 8087 }, { "epoch": 0.8382215773655302, "grad_norm": 0.5852012038230896, "learning_rate": 2.6834094736246207e-06, "loss": 0.1895, "step": 8088 }, { "epoch": 0.8383252150481916, "grad_norm": 0.5840774178504944, "learning_rate": 2.680051411589826e-06, "loss": 0.1664, "step": 8089 }, { "epoch": 0.838428852730853, "grad_norm": 0.6098830699920654, "learning_rate": 2.676695301139909e-06, "loss": 0.1635, "step": 8090 }, { "epoch": 0.8385324904135144, "grad_norm": 0.6075127720832825, "learning_rate": 2.6733411426530385e-06, "loss": 0.1635, "step": 8091 }, { "epoch": 0.8386361280961757, "grad_norm": 0.6864191293716431, "learning_rate": 2.669988936507155e-06, "loss": 0.1931, "step": 8092 }, { "epoch": 0.8387397657788371, "grad_norm": 0.6276586055755615, "learning_rate": 2.666638683079974e-06, "loss": 0.1773, "step": 8093 }, { "epoch": 0.8388434034614985, "grad_norm": 0.7612707018852234, "learning_rate": 2.6632903827490063e-06, "loss": 0.2042, "step": 8094 }, { "epoch": 0.83894704114416, "grad_norm": 0.8222978711128235, "learning_rate": 2.659944035891522e-06, "loss": 0.2243, "step": 8095 }, { "epoch": 0.8390506788268214, "grad_norm": 0.5825135707855225, "learning_rate": 2.6565996428845873e-06, "loss": 0.1806, "step": 8096 }, { "epoch": 0.8391543165094828, "grad_norm": 0.6076071858406067, "learning_rate": 2.653257204105051e-06, "loss": 0.1851, "step": 8097 }, { "epoch": 0.8392579541921442, "grad_norm": 0.6137374639511108, "learning_rate": 2.6499167199295263e-06, "loss": 0.159, "step": 8098 }, { "epoch": 0.8393615918748056, "grad_norm": 0.5743726491928101, "learning_rate": 2.6465781907344124e-06, "loss": 0.1445, "step": 8099 }, { "epoch": 0.839465229557467, "grad_norm": 0.722928524017334, "learning_rate": 2.6432416168958887e-06, "loss": 0.1987, "step": 8100 }, { "epoch": 0.8395688672401285, "grad_norm": 0.6331915259361267, "learning_rate": 2.6399069987899163e-06, "loss": 0.1782, "step": 8101 }, { "epoch": 0.8396725049227899, "grad_norm": 0.5965705513954163, "learning_rate": 2.6365743367922434e-06, "loss": 0.1649, "step": 8102 }, { "epoch": 0.8397761426054513, "grad_norm": 0.7161065340042114, "learning_rate": 2.633243631278375e-06, "loss": 0.1848, "step": 8103 }, { "epoch": 0.8398797802881127, "grad_norm": 0.7084481120109558, "learning_rate": 2.6299148826236233e-06, "loss": 0.216, "step": 8104 }, { "epoch": 0.8399834179707741, "grad_norm": 0.6854572892189026, "learning_rate": 2.626588091203062e-06, "loss": 0.2289, "step": 8105 }, { "epoch": 0.8400870556534356, "grad_norm": 0.7259882092475891, "learning_rate": 2.6232632573915397e-06, "loss": 0.2186, "step": 8106 }, { "epoch": 0.840190693336097, "grad_norm": 0.7211769819259644, "learning_rate": 2.619940381563706e-06, "loss": 0.1835, "step": 8107 }, { "epoch": 0.8402943310187584, "grad_norm": 0.6280680298805237, "learning_rate": 2.616619464093968e-06, "loss": 0.1744, "step": 8108 }, { "epoch": 0.8403979687014198, "grad_norm": 0.708250105381012, "learning_rate": 2.6133005053565306e-06, "loss": 0.1952, "step": 8109 }, { "epoch": 0.8405016063840812, "grad_norm": 0.7434312701225281, "learning_rate": 2.60998350572536e-06, "loss": 0.1949, "step": 8110 }, { "epoch": 0.8406052440667426, "grad_norm": 0.7382619380950928, "learning_rate": 2.606668465574218e-06, "loss": 0.1763, "step": 8111 }, { "epoch": 0.8407088817494041, "grad_norm": 0.607403576374054, "learning_rate": 2.6033553852766356e-06, "loss": 0.1779, "step": 8112 }, { "epoch": 0.8408125194320655, "grad_norm": 0.6638832092285156, "learning_rate": 2.600044265205921e-06, "loss": 0.1804, "step": 8113 }, { "epoch": 0.8409161571147269, "grad_norm": 0.702063262462616, "learning_rate": 2.596735105735173e-06, "loss": 0.1919, "step": 8114 }, { "epoch": 0.8410197947973883, "grad_norm": 0.6962888836860657, "learning_rate": 2.5934279072372558e-06, "loss": 0.1829, "step": 8115 }, { "epoch": 0.8411234324800497, "grad_norm": 0.6257210969924927, "learning_rate": 2.5901226700848226e-06, "loss": 0.1742, "step": 8116 }, { "epoch": 0.8412270701627111, "grad_norm": 0.7082609534263611, "learning_rate": 2.5868193946503106e-06, "loss": 0.188, "step": 8117 }, { "epoch": 0.8413307078453726, "grad_norm": 0.5426973104476929, "learning_rate": 2.583518081305911e-06, "loss": 0.1474, "step": 8118 }, { "epoch": 0.841434345528034, "grad_norm": 0.6663376688957214, "learning_rate": 2.580218730423627e-06, "loss": 0.1948, "step": 8119 }, { "epoch": 0.8415379832106954, "grad_norm": 0.6552466154098511, "learning_rate": 2.5769213423752093e-06, "loss": 0.1686, "step": 8120 }, { "epoch": 0.8416416208933568, "grad_norm": 0.7427363991737366, "learning_rate": 2.573625917532212e-06, "loss": 0.2328, "step": 8121 }, { "epoch": 0.8417452585760182, "grad_norm": 0.7532801032066345, "learning_rate": 2.5703324562659605e-06, "loss": 0.2163, "step": 8122 }, { "epoch": 0.8418488962586796, "grad_norm": 0.6454771757125854, "learning_rate": 2.567040958947551e-06, "loss": 0.1798, "step": 8123 }, { "epoch": 0.8419525339413411, "grad_norm": 0.6346676349639893, "learning_rate": 2.563751425947869e-06, "loss": 0.1686, "step": 8124 }, { "epoch": 0.8420561716240025, "grad_norm": 0.679057776927948, "learning_rate": 2.560463857637565e-06, "loss": 0.2044, "step": 8125 }, { "epoch": 0.8421598093066639, "grad_norm": 0.8455599546432495, "learning_rate": 2.5571782543870826e-06, "loss": 0.2225, "step": 8126 }, { "epoch": 0.8422634469893253, "grad_norm": 0.6194790005683899, "learning_rate": 2.5538946165666457e-06, "loss": 0.158, "step": 8127 }, { "epoch": 0.8423670846719867, "grad_norm": 0.6403823494911194, "learning_rate": 2.550612944546238e-06, "loss": 0.1891, "step": 8128 }, { "epoch": 0.8424707223546481, "grad_norm": 0.6203027367591858, "learning_rate": 2.5473332386956417e-06, "loss": 0.1667, "step": 8129 }, { "epoch": 0.8425743600373096, "grad_norm": 0.6994303464889526, "learning_rate": 2.544055499384406e-06, "loss": 0.1789, "step": 8130 }, { "epoch": 0.842677997719971, "grad_norm": 0.6344105005264282, "learning_rate": 2.5407797269818546e-06, "loss": 0.1967, "step": 8131 }, { "epoch": 0.8427816354026324, "grad_norm": 0.7821642756462097, "learning_rate": 2.5375059218571084e-06, "loss": 0.217, "step": 8132 }, { "epoch": 0.8428852730852938, "grad_norm": 0.6653828024864197, "learning_rate": 2.5342340843790458e-06, "loss": 0.1609, "step": 8133 }, { "epoch": 0.8429889107679552, "grad_norm": 0.6741253733634949, "learning_rate": 2.5309642149163384e-06, "loss": 0.1892, "step": 8134 }, { "epoch": 0.8430925484506167, "grad_norm": 0.7713050246238708, "learning_rate": 2.527696313837422e-06, "loss": 0.205, "step": 8135 }, { "epoch": 0.8431961861332781, "grad_norm": 0.7960719466209412, "learning_rate": 2.52443038151053e-06, "loss": 0.2105, "step": 8136 }, { "epoch": 0.8432998238159395, "grad_norm": 0.7390683889389038, "learning_rate": 2.5211664183036554e-06, "loss": 0.1933, "step": 8137 }, { "epoch": 0.8434034614986009, "grad_norm": 0.6969931721687317, "learning_rate": 2.517904424584574e-06, "loss": 0.2129, "step": 8138 }, { "epoch": 0.8435070991812623, "grad_norm": 0.626531720161438, "learning_rate": 2.5146444007208493e-06, "loss": 0.1689, "step": 8139 }, { "epoch": 0.8436107368639237, "grad_norm": 0.760380744934082, "learning_rate": 2.5113863470798074e-06, "loss": 0.2185, "step": 8140 }, { "epoch": 0.8437143745465852, "grad_norm": 0.8247089385986328, "learning_rate": 2.5081302640285656e-06, "loss": 0.2483, "step": 8141 }, { "epoch": 0.8438180122292466, "grad_norm": 0.5384553074836731, "learning_rate": 2.504876151934017e-06, "loss": 0.1601, "step": 8142 }, { "epoch": 0.843921649911908, "grad_norm": 0.6652536392211914, "learning_rate": 2.501624011162829e-06, "loss": 0.2186, "step": 8143 }, { "epoch": 0.8440252875945694, "grad_norm": 0.7298545241355896, "learning_rate": 2.498373842081443e-06, "loss": 0.2247, "step": 8144 }, { "epoch": 0.8441289252772308, "grad_norm": 0.726304829120636, "learning_rate": 2.4951256450560844e-06, "loss": 0.2118, "step": 8145 }, { "epoch": 0.8442325629598922, "grad_norm": 0.6961712837219238, "learning_rate": 2.491879420452754e-06, "loss": 0.1772, "step": 8146 }, { "epoch": 0.8443362006425537, "grad_norm": 0.7808390259742737, "learning_rate": 2.488635168637239e-06, "loss": 0.2232, "step": 8147 }, { "epoch": 0.8444398383252151, "grad_norm": 0.705258309841156, "learning_rate": 2.485392889975091e-06, "loss": 0.1916, "step": 8148 }, { "epoch": 0.8445434760078765, "grad_norm": 0.7543384432792664, "learning_rate": 2.4821525848316454e-06, "loss": 0.2208, "step": 8149 }, { "epoch": 0.8446471136905379, "grad_norm": 0.7452192306518555, "learning_rate": 2.478914253572011e-06, "loss": 0.2098, "step": 8150 }, { "epoch": 0.8447507513731993, "grad_norm": 0.8277132511138916, "learning_rate": 2.4756778965610794e-06, "loss": 0.2144, "step": 8151 }, { "epoch": 0.8448543890558607, "grad_norm": 0.553561270236969, "learning_rate": 2.472443514163525e-06, "loss": 0.1629, "step": 8152 }, { "epoch": 0.8449580267385222, "grad_norm": 0.694131076335907, "learning_rate": 2.469211106743785e-06, "loss": 0.1864, "step": 8153 }, { "epoch": 0.8450616644211836, "grad_norm": 0.6504413485527039, "learning_rate": 2.465980674666091e-06, "loss": 0.1743, "step": 8154 }, { "epoch": 0.845165302103845, "grad_norm": 0.6953386664390564, "learning_rate": 2.462752218294435e-06, "loss": 0.1974, "step": 8155 }, { "epoch": 0.8452689397865064, "grad_norm": 0.7989981770515442, "learning_rate": 2.4595257379925943e-06, "loss": 0.2132, "step": 8156 }, { "epoch": 0.8453725774691678, "grad_norm": 0.6267284750938416, "learning_rate": 2.45630123412413e-06, "loss": 0.1682, "step": 8157 }, { "epoch": 0.8454762151518292, "grad_norm": 0.6220939755439758, "learning_rate": 2.4530787070523655e-06, "loss": 0.1936, "step": 8158 }, { "epoch": 0.8455798528344907, "grad_norm": 0.6248716711997986, "learning_rate": 2.4498581571404188e-06, "loss": 0.1933, "step": 8159 }, { "epoch": 0.8456834905171521, "grad_norm": 0.6560169458389282, "learning_rate": 2.446639584751169e-06, "loss": 0.1932, "step": 8160 }, { "epoch": 0.8457871281998135, "grad_norm": 0.723879873752594, "learning_rate": 2.4434229902472882e-06, "loss": 0.2124, "step": 8161 }, { "epoch": 0.8458907658824749, "grad_norm": 0.671961784362793, "learning_rate": 2.440208373991213e-06, "loss": 0.2075, "step": 8162 }, { "epoch": 0.8459944035651363, "grad_norm": 0.7564287185668945, "learning_rate": 2.4369957363451557e-06, "loss": 0.2031, "step": 8163 }, { "epoch": 0.8460980412477977, "grad_norm": 0.7248294353485107, "learning_rate": 2.4337850776711223e-06, "loss": 0.209, "step": 8164 }, { "epoch": 0.8462016789304592, "grad_norm": 0.663909375667572, "learning_rate": 2.430576398330873e-06, "loss": 0.2041, "step": 8165 }, { "epoch": 0.8463053166131206, "grad_norm": 0.647461473941803, "learning_rate": 2.427369698685964e-06, "loss": 0.1846, "step": 8166 }, { "epoch": 0.846408954295782, "grad_norm": 0.5828126072883606, "learning_rate": 2.424164979097725e-06, "loss": 0.1643, "step": 8167 }, { "epoch": 0.8465125919784433, "grad_norm": 0.690223753452301, "learning_rate": 2.420962239927254e-06, "loss": 0.19, "step": 8168 }, { "epoch": 0.8466162296611047, "grad_norm": 0.6529852151870728, "learning_rate": 2.41776148153543e-06, "loss": 0.197, "step": 8169 }, { "epoch": 0.8467198673437661, "grad_norm": 0.6087262630462646, "learning_rate": 2.4145627042829056e-06, "loss": 0.1779, "step": 8170 }, { "epoch": 0.8468235050264276, "grad_norm": 0.6241489052772522, "learning_rate": 2.411365908530119e-06, "loss": 0.1634, "step": 8171 }, { "epoch": 0.846927142709089, "grad_norm": 0.6478610634803772, "learning_rate": 2.408171094637284e-06, "loss": 0.1907, "step": 8172 }, { "epoch": 0.8470307803917504, "grad_norm": 0.7067448496818542, "learning_rate": 2.404978262964379e-06, "loss": 0.1986, "step": 8173 }, { "epoch": 0.8471344180744118, "grad_norm": 0.6290725469589233, "learning_rate": 2.401787413871175e-06, "loss": 0.167, "step": 8174 }, { "epoch": 0.8472380557570732, "grad_norm": 0.6874917149543762, "learning_rate": 2.398598547717208e-06, "loss": 0.216, "step": 8175 }, { "epoch": 0.8473416934397346, "grad_norm": 0.6628933548927307, "learning_rate": 2.3954116648617907e-06, "loss": 0.1737, "step": 8176 }, { "epoch": 0.8474453311223961, "grad_norm": 0.7646678686141968, "learning_rate": 2.3922267656640253e-06, "loss": 0.2003, "step": 8177 }, { "epoch": 0.8475489688050575, "grad_norm": 0.7109581232070923, "learning_rate": 2.3890438504827706e-06, "loss": 0.2019, "step": 8178 }, { "epoch": 0.8476526064877189, "grad_norm": 0.628286600112915, "learning_rate": 2.3858629196766846e-06, "loss": 0.1878, "step": 8179 }, { "epoch": 0.8477562441703803, "grad_norm": 0.5861483812332153, "learning_rate": 2.382683973604181e-06, "loss": 0.1746, "step": 8180 }, { "epoch": 0.8478598818530417, "grad_norm": 0.631664514541626, "learning_rate": 2.3795070126234563e-06, "loss": 0.1521, "step": 8181 }, { "epoch": 0.8479635195357031, "grad_norm": 0.7326377630233765, "learning_rate": 2.376332037092495e-06, "loss": 0.2062, "step": 8182 }, { "epoch": 0.8480671572183646, "grad_norm": 0.7119858860969543, "learning_rate": 2.373159047369038e-06, "loss": 0.1766, "step": 8183 }, { "epoch": 0.848170794901026, "grad_norm": 0.6483974456787109, "learning_rate": 2.3699880438106225e-06, "loss": 0.1695, "step": 8184 }, { "epoch": 0.8482744325836874, "grad_norm": 0.5860996842384338, "learning_rate": 2.366819026774545e-06, "loss": 0.1443, "step": 8185 }, { "epoch": 0.8483780702663488, "grad_norm": 0.6049185991287231, "learning_rate": 2.3636519966178905e-06, "loss": 0.1847, "step": 8186 }, { "epoch": 0.8484817079490102, "grad_norm": 0.7595677375793457, "learning_rate": 2.360486953697516e-06, "loss": 0.2154, "step": 8187 }, { "epoch": 0.8485853456316717, "grad_norm": 0.6683211326599121, "learning_rate": 2.3573238983700432e-06, "loss": 0.206, "step": 8188 }, { "epoch": 0.8486889833143331, "grad_norm": 0.7454718351364136, "learning_rate": 2.3541628309918886e-06, "loss": 0.2157, "step": 8189 }, { "epoch": 0.8487926209969945, "grad_norm": 0.7679634094238281, "learning_rate": 2.3510037519192385e-06, "loss": 0.2123, "step": 8190 }, { "epoch": 0.8488962586796559, "grad_norm": 0.6262506246566772, "learning_rate": 2.3478466615080465e-06, "loss": 0.1947, "step": 8191 }, { "epoch": 0.8489998963623173, "grad_norm": 0.7192820310592651, "learning_rate": 2.3446915601140564e-06, "loss": 0.2036, "step": 8192 }, { "epoch": 0.8491035340449787, "grad_norm": 0.7609272599220276, "learning_rate": 2.341538448092775e-06, "loss": 0.1991, "step": 8193 }, { "epoch": 0.8492071717276402, "grad_norm": 0.6904714107513428, "learning_rate": 2.3383873257994848e-06, "loss": 0.1854, "step": 8194 }, { "epoch": 0.8493108094103016, "grad_norm": 0.741290807723999, "learning_rate": 2.3352381935892622e-06, "loss": 0.2169, "step": 8195 }, { "epoch": 0.849414447092963, "grad_norm": 0.5596386790275574, "learning_rate": 2.3320910518169335e-06, "loss": 0.1678, "step": 8196 }, { "epoch": 0.8495180847756244, "grad_norm": 0.7157734036445618, "learning_rate": 2.328945900837125e-06, "loss": 0.2256, "step": 8197 }, { "epoch": 0.8496217224582858, "grad_norm": 0.6879149675369263, "learning_rate": 2.3258027410042173e-06, "loss": 0.2025, "step": 8198 }, { "epoch": 0.8497253601409472, "grad_norm": 0.7603225708007812, "learning_rate": 2.3226615726723868e-06, "loss": 0.212, "step": 8199 }, { "epoch": 0.8498289978236087, "grad_norm": 0.6839963793754578, "learning_rate": 2.3195223961955705e-06, "loss": 0.2131, "step": 8200 }, { "epoch": 0.8499326355062701, "grad_norm": 0.5889342427253723, "learning_rate": 2.316385211927479e-06, "loss": 0.163, "step": 8201 }, { "epoch": 0.8500362731889315, "grad_norm": 0.6352614164352417, "learning_rate": 2.313250020221618e-06, "loss": 0.1916, "step": 8202 }, { "epoch": 0.8501399108715929, "grad_norm": 0.6570771336555481, "learning_rate": 2.3101168214312474e-06, "loss": 0.155, "step": 8203 }, { "epoch": 0.8502435485542543, "grad_norm": 0.79152911901474, "learning_rate": 2.3069856159094115e-06, "loss": 0.222, "step": 8204 }, { "epoch": 0.8503471862369157, "grad_norm": 0.7477522492408752, "learning_rate": 2.3038564040089374e-06, "loss": 0.2124, "step": 8205 }, { "epoch": 0.8504508239195772, "grad_norm": 0.7140838503837585, "learning_rate": 2.3007291860824155e-06, "loss": 0.1901, "step": 8206 }, { "epoch": 0.8505544616022386, "grad_norm": 0.5891298651695251, "learning_rate": 2.2976039624822133e-06, "loss": 0.1695, "step": 8207 }, { "epoch": 0.8506580992849, "grad_norm": 0.6833799481391907, "learning_rate": 2.2944807335604733e-06, "loss": 0.191, "step": 8208 }, { "epoch": 0.8507617369675614, "grad_norm": 0.6844556331634521, "learning_rate": 2.2913594996691212e-06, "loss": 0.1878, "step": 8209 }, { "epoch": 0.8508653746502228, "grad_norm": 0.6452999711036682, "learning_rate": 2.2882402611598574e-06, "loss": 0.1819, "step": 8210 }, { "epoch": 0.8509690123328842, "grad_norm": 0.6083611845970154, "learning_rate": 2.2851230183841453e-06, "loss": 0.1653, "step": 8211 }, { "epoch": 0.8510726500155457, "grad_norm": 0.6736093759536743, "learning_rate": 2.2820077716932353e-06, "loss": 0.1728, "step": 8212 }, { "epoch": 0.8511762876982071, "grad_norm": 0.6951460838317871, "learning_rate": 2.27889452143814e-06, "loss": 0.1975, "step": 8213 }, { "epoch": 0.8512799253808685, "grad_norm": 1.0119082927703857, "learning_rate": 2.2757832679696645e-06, "loss": 0.2236, "step": 8214 }, { "epoch": 0.8513835630635299, "grad_norm": 0.7682976126670837, "learning_rate": 2.2726740116383805e-06, "loss": 0.2365, "step": 8215 }, { "epoch": 0.8514872007461913, "grad_norm": 0.7260084748268127, "learning_rate": 2.2695667527946253e-06, "loss": 0.2021, "step": 8216 }, { "epoch": 0.8515908384288527, "grad_norm": 0.663433313369751, "learning_rate": 2.2664614917885318e-06, "loss": 0.1833, "step": 8217 }, { "epoch": 0.8516944761115142, "grad_norm": 0.6920781135559082, "learning_rate": 2.263358228969992e-06, "loss": 0.1774, "step": 8218 }, { "epoch": 0.8517981137941756, "grad_norm": 0.6206676959991455, "learning_rate": 2.2602569646886674e-06, "loss": 0.1613, "step": 8219 }, { "epoch": 0.851901751476837, "grad_norm": 0.7389729619026184, "learning_rate": 2.257157699294017e-06, "loss": 0.2059, "step": 8220 }, { "epoch": 0.8520053891594984, "grad_norm": 0.6792731285095215, "learning_rate": 2.25406043313525e-06, "loss": 0.1891, "step": 8221 }, { "epoch": 0.8521090268421598, "grad_norm": 0.6999104022979736, "learning_rate": 2.2509651665613717e-06, "loss": 0.2218, "step": 8222 }, { "epoch": 0.8522126645248213, "grad_norm": 0.714362621307373, "learning_rate": 2.2478718999211436e-06, "loss": 0.1854, "step": 8223 }, { "epoch": 0.8523163022074827, "grad_norm": 0.8368086218833923, "learning_rate": 2.244780633563115e-06, "loss": 0.2328, "step": 8224 }, { "epoch": 0.8524199398901441, "grad_norm": 0.6363280415534973, "learning_rate": 2.2416913678356054e-06, "loss": 0.1777, "step": 8225 }, { "epoch": 0.8525235775728055, "grad_norm": 0.6111525893211365, "learning_rate": 2.2386041030867034e-06, "loss": 0.1913, "step": 8226 }, { "epoch": 0.8526272152554669, "grad_norm": 0.760571300983429, "learning_rate": 2.2355188396642833e-06, "loss": 0.2093, "step": 8227 }, { "epoch": 0.8527308529381283, "grad_norm": 0.7413966059684753, "learning_rate": 2.232435577915981e-06, "loss": 0.2058, "step": 8228 }, { "epoch": 0.8528344906207898, "grad_norm": 0.6896591782569885, "learning_rate": 2.2293543181892186e-06, "loss": 0.1985, "step": 8229 }, { "epoch": 0.8529381283034512, "grad_norm": 0.6558910608291626, "learning_rate": 2.226275060831189e-06, "loss": 0.1944, "step": 8230 }, { "epoch": 0.8530417659861126, "grad_norm": 0.6779370903968811, "learning_rate": 2.223197806188857e-06, "loss": 0.1768, "step": 8231 }, { "epoch": 0.853145403668774, "grad_norm": 0.5688185095787048, "learning_rate": 2.2201225546089612e-06, "loss": 0.1503, "step": 8232 }, { "epoch": 0.8532490413514354, "grad_norm": 0.7514565587043762, "learning_rate": 2.2170493064380126e-06, "loss": 0.2315, "step": 8233 }, { "epoch": 0.8533526790340968, "grad_norm": 0.7149367332458496, "learning_rate": 2.213978062022304e-06, "loss": 0.1942, "step": 8234 }, { "epoch": 0.8534563167167583, "grad_norm": 0.6362113356590271, "learning_rate": 2.2109088217079023e-06, "loss": 0.1959, "step": 8235 }, { "epoch": 0.8535599543994197, "grad_norm": 0.6998996138572693, "learning_rate": 2.2078415858406377e-06, "loss": 0.196, "step": 8236 }, { "epoch": 0.8536635920820811, "grad_norm": 0.6976892352104187, "learning_rate": 2.2047763547661295e-06, "loss": 0.1936, "step": 8237 }, { "epoch": 0.8537672297647425, "grad_norm": 0.6329091787338257, "learning_rate": 2.2017131288297567e-06, "loss": 0.1782, "step": 8238 }, { "epoch": 0.8538708674474039, "grad_norm": 0.6380835771560669, "learning_rate": 2.1986519083766767e-06, "loss": 0.1838, "step": 8239 }, { "epoch": 0.8539745051300653, "grad_norm": 0.702772855758667, "learning_rate": 2.1955926937518314e-06, "loss": 0.1531, "step": 8240 }, { "epoch": 0.8540781428127268, "grad_norm": 0.5946405529975891, "learning_rate": 2.1925354852999204e-06, "loss": 0.1662, "step": 8241 }, { "epoch": 0.8541817804953882, "grad_norm": 0.7317295670509338, "learning_rate": 2.1894802833654305e-06, "loss": 0.2033, "step": 8242 }, { "epoch": 0.8542854181780496, "grad_norm": 0.6746308207511902, "learning_rate": 2.1864270882926176e-06, "loss": 0.1898, "step": 8243 }, { "epoch": 0.8543890558607109, "grad_norm": 0.6947521567344666, "learning_rate": 2.183375900425504e-06, "loss": 0.1915, "step": 8244 }, { "epoch": 0.8544926935433723, "grad_norm": 0.6422945857048035, "learning_rate": 2.1803267201079015e-06, "loss": 0.1886, "step": 8245 }, { "epoch": 0.8545963312260337, "grad_norm": 0.6722944378852844, "learning_rate": 2.1772795476833776e-06, "loss": 0.217, "step": 8246 }, { "epoch": 0.8546999689086952, "grad_norm": 0.731451690196991, "learning_rate": 2.174234383495293e-06, "loss": 0.1954, "step": 8247 }, { "epoch": 0.8548036065913566, "grad_norm": 0.5368337035179138, "learning_rate": 2.171191227886764e-06, "loss": 0.1504, "step": 8248 }, { "epoch": 0.854907244274018, "grad_norm": 0.7373712062835693, "learning_rate": 2.168150081200697e-06, "loss": 0.1878, "step": 8249 }, { "epoch": 0.8550108819566794, "grad_norm": 0.6935915350914001, "learning_rate": 2.165110943779756e-06, "loss": 0.1821, "step": 8250 }, { "epoch": 0.8551145196393408, "grad_norm": 0.7318151593208313, "learning_rate": 2.162073815966388e-06, "loss": 0.2027, "step": 8251 }, { "epoch": 0.8552181573220022, "grad_norm": 0.7722030878067017, "learning_rate": 2.1590386981028154e-06, "loss": 0.1945, "step": 8252 }, { "epoch": 0.8553217950046637, "grad_norm": 0.640789270401001, "learning_rate": 2.1560055905310227e-06, "loss": 0.1789, "step": 8253 }, { "epoch": 0.8554254326873251, "grad_norm": 0.6491336226463318, "learning_rate": 2.1529744935927834e-06, "loss": 0.1898, "step": 8254 }, { "epoch": 0.8555290703699865, "grad_norm": 0.7431808710098267, "learning_rate": 2.1499454076296365e-06, "loss": 0.2145, "step": 8255 }, { "epoch": 0.8556327080526479, "grad_norm": 0.747519850730896, "learning_rate": 2.1469183329828925e-06, "loss": 0.2015, "step": 8256 }, { "epoch": 0.8557363457353093, "grad_norm": 0.7901133894920349, "learning_rate": 2.14389326999364e-06, "loss": 0.2204, "step": 8257 }, { "epoch": 0.8558399834179707, "grad_norm": 0.6684821844100952, "learning_rate": 2.140870219002731e-06, "loss": 0.197, "step": 8258 }, { "epoch": 0.8559436211006322, "grad_norm": 0.6593709588050842, "learning_rate": 2.137849180350802e-06, "loss": 0.1911, "step": 8259 }, { "epoch": 0.8560472587832936, "grad_norm": 0.7188991904258728, "learning_rate": 2.1348301543782648e-06, "loss": 0.2288, "step": 8260 }, { "epoch": 0.856150896465955, "grad_norm": 0.683214008808136, "learning_rate": 2.1318131414252895e-06, "loss": 0.185, "step": 8261 }, { "epoch": 0.8562545341486164, "grad_norm": 0.6181463599205017, "learning_rate": 2.128798141831836e-06, "loss": 0.1876, "step": 8262 }, { "epoch": 0.8563581718312778, "grad_norm": 0.6907486319541931, "learning_rate": 2.1257851559376296e-06, "loss": 0.1847, "step": 8263 }, { "epoch": 0.8564618095139392, "grad_norm": 0.6637782454490662, "learning_rate": 2.122774184082159e-06, "loss": 0.1711, "step": 8264 }, { "epoch": 0.8565654471966007, "grad_norm": 0.6911178827285767, "learning_rate": 2.1197652266047064e-06, "loss": 0.2138, "step": 8265 }, { "epoch": 0.8566690848792621, "grad_norm": 0.5903525948524475, "learning_rate": 2.116758283844311e-06, "loss": 0.1618, "step": 8266 }, { "epoch": 0.8567727225619235, "grad_norm": 0.6123262047767639, "learning_rate": 2.1137533561397937e-06, "loss": 0.1849, "step": 8267 }, { "epoch": 0.8568763602445849, "grad_norm": 0.5878732800483704, "learning_rate": 2.110750443829741e-06, "loss": 0.1721, "step": 8268 }, { "epoch": 0.8569799979272463, "grad_norm": 0.7839241623878479, "learning_rate": 2.1077495472525242e-06, "loss": 0.2296, "step": 8269 }, { "epoch": 0.8570836356099077, "grad_norm": 0.6753618717193604, "learning_rate": 2.1047506667462713e-06, "loss": 0.1854, "step": 8270 }, { "epoch": 0.8571872732925692, "grad_norm": 0.751511812210083, "learning_rate": 2.1017538026488936e-06, "loss": 0.1914, "step": 8271 }, { "epoch": 0.8572909109752306, "grad_norm": 0.639025866985321, "learning_rate": 2.098758955298077e-06, "loss": 0.1722, "step": 8272 }, { "epoch": 0.857394548657892, "grad_norm": 0.7590596079826355, "learning_rate": 2.095766125031269e-06, "loss": 0.2329, "step": 8273 }, { "epoch": 0.8574981863405534, "grad_norm": 0.7018405795097351, "learning_rate": 2.0927753121857043e-06, "loss": 0.2226, "step": 8274 }, { "epoch": 0.8576018240232148, "grad_norm": 0.6705461144447327, "learning_rate": 2.08978651709838e-06, "loss": 0.2002, "step": 8275 }, { "epoch": 0.8577054617058762, "grad_norm": 0.681593120098114, "learning_rate": 2.0867997401060667e-06, "loss": 0.2266, "step": 8276 }, { "epoch": 0.8578090993885377, "grad_norm": 0.590961217880249, "learning_rate": 2.083814981545316e-06, "loss": 0.2027, "step": 8277 }, { "epoch": 0.8579127370711991, "grad_norm": 0.6383353471755981, "learning_rate": 2.080832241752437e-06, "loss": 0.1774, "step": 8278 }, { "epoch": 0.8580163747538605, "grad_norm": 0.6680395007133484, "learning_rate": 2.077851521063525e-06, "loss": 0.1708, "step": 8279 }, { "epoch": 0.8581200124365219, "grad_norm": 0.8208906650543213, "learning_rate": 2.0748728198144484e-06, "loss": 0.1869, "step": 8280 }, { "epoch": 0.8582236501191833, "grad_norm": 0.6549916863441467, "learning_rate": 2.0718961383408365e-06, "loss": 0.1796, "step": 8281 }, { "epoch": 0.8583272878018448, "grad_norm": 0.6959506273269653, "learning_rate": 2.0689214769780962e-06, "loss": 0.1727, "step": 8282 }, { "epoch": 0.8584309254845062, "grad_norm": 0.8060963153839111, "learning_rate": 2.0659488360614087e-06, "loss": 0.2413, "step": 8283 }, { "epoch": 0.8585345631671676, "grad_norm": 0.7632854580879211, "learning_rate": 2.062978215925726e-06, "loss": 0.1939, "step": 8284 }, { "epoch": 0.858638200849829, "grad_norm": 0.661284327507019, "learning_rate": 2.060009616905778e-06, "loss": 0.1853, "step": 8285 }, { "epoch": 0.8587418385324904, "grad_norm": 0.608495831489563, "learning_rate": 2.057043039336053e-06, "loss": 0.1729, "step": 8286 }, { "epoch": 0.8588454762151518, "grad_norm": 0.7137344479560852, "learning_rate": 2.0540784835508322e-06, "loss": 0.1857, "step": 8287 }, { "epoch": 0.8589491138978133, "grad_norm": 0.6977378726005554, "learning_rate": 2.051115949884148e-06, "loss": 0.1964, "step": 8288 }, { "epoch": 0.8590527515804747, "grad_norm": 0.7782437205314636, "learning_rate": 2.0481554386698142e-06, "loss": 0.2284, "step": 8289 }, { "epoch": 0.8591563892631361, "grad_norm": 0.6119537353515625, "learning_rate": 2.0451969502414214e-06, "loss": 0.1852, "step": 8290 }, { "epoch": 0.8592600269457975, "grad_norm": 0.7304680347442627, "learning_rate": 2.0422404849323207e-06, "loss": 0.2084, "step": 8291 }, { "epoch": 0.8593636646284589, "grad_norm": 0.6950478553771973, "learning_rate": 2.0392860430756523e-06, "loss": 0.1838, "step": 8292 }, { "epoch": 0.8594673023111203, "grad_norm": 0.663688600063324, "learning_rate": 2.0363336250043074e-06, "loss": 0.1667, "step": 8293 }, { "epoch": 0.8595709399937818, "grad_norm": 0.7153915166854858, "learning_rate": 2.033383231050967e-06, "loss": 0.1977, "step": 8294 }, { "epoch": 0.8596745776764432, "grad_norm": 0.7251477837562561, "learning_rate": 2.0304348615480763e-06, "loss": 0.2058, "step": 8295 }, { "epoch": 0.8597782153591046, "grad_norm": 0.6031265258789062, "learning_rate": 2.0274885168278467e-06, "loss": 0.1914, "step": 8296 }, { "epoch": 0.859881853041766, "grad_norm": 0.735448956489563, "learning_rate": 2.024544197222276e-06, "loss": 0.2079, "step": 8297 }, { "epoch": 0.8599854907244274, "grad_norm": 0.7274251580238342, "learning_rate": 2.021601903063117e-06, "loss": 0.189, "step": 8298 }, { "epoch": 0.8600891284070888, "grad_norm": 0.7757635712623596, "learning_rate": 2.0186616346819087e-06, "loss": 0.2079, "step": 8299 }, { "epoch": 0.8601927660897503, "grad_norm": 0.5823425650596619, "learning_rate": 2.015723392409958e-06, "loss": 0.1876, "step": 8300 }, { "epoch": 0.8602964037724117, "grad_norm": 0.704903244972229, "learning_rate": 2.012787176578339e-06, "loss": 0.2228, "step": 8301 }, { "epoch": 0.8604000414550731, "grad_norm": 0.7526519894599915, "learning_rate": 2.0098529875178995e-06, "loss": 0.2367, "step": 8302 }, { "epoch": 0.8605036791377345, "grad_norm": 0.6222978234291077, "learning_rate": 2.0069208255592555e-06, "loss": 0.1892, "step": 8303 }, { "epoch": 0.8606073168203959, "grad_norm": 0.7311579585075378, "learning_rate": 2.0039906910328e-06, "loss": 0.2006, "step": 8304 }, { "epoch": 0.8607109545030573, "grad_norm": 0.6790103316307068, "learning_rate": 2.0010625842687047e-06, "loss": 0.1814, "step": 8305 }, { "epoch": 0.8608145921857188, "grad_norm": 0.6187705397605896, "learning_rate": 1.998136505596895e-06, "loss": 0.1786, "step": 8306 }, { "epoch": 0.8609182298683802, "grad_norm": 0.6673712134361267, "learning_rate": 1.995212455347082e-06, "loss": 0.1717, "step": 8307 }, { "epoch": 0.8610218675510416, "grad_norm": 0.8776508569717407, "learning_rate": 1.992290433848736e-06, "loss": 0.2063, "step": 8308 }, { "epoch": 0.861125505233703, "grad_norm": 0.7525877952575684, "learning_rate": 1.9893704414311086e-06, "loss": 0.2112, "step": 8309 }, { "epoch": 0.8612291429163644, "grad_norm": 0.6398442983627319, "learning_rate": 1.9864524784232265e-06, "loss": 0.1998, "step": 8310 }, { "epoch": 0.8613327805990258, "grad_norm": 0.6990947723388672, "learning_rate": 1.9835365451538725e-06, "loss": 0.2018, "step": 8311 }, { "epoch": 0.8614364182816873, "grad_norm": 0.6815956830978394, "learning_rate": 1.9806226419516195e-06, "loss": 0.2087, "step": 8312 }, { "epoch": 0.8615400559643487, "grad_norm": 0.7535561323165894, "learning_rate": 1.977710769144794e-06, "loss": 0.2075, "step": 8313 }, { "epoch": 0.8616436936470101, "grad_norm": 0.6954166293144226, "learning_rate": 1.9748009270614978e-06, "loss": 0.1772, "step": 8314 }, { "epoch": 0.8617473313296715, "grad_norm": 0.7022972702980042, "learning_rate": 1.9718931160296175e-06, "loss": 0.2064, "step": 8315 }, { "epoch": 0.8618509690123329, "grad_norm": 0.6478790044784546, "learning_rate": 1.9689873363767907e-06, "loss": 0.1998, "step": 8316 }, { "epoch": 0.8619546066949944, "grad_norm": 0.6146255135536194, "learning_rate": 1.966083588430445e-06, "loss": 0.1938, "step": 8317 }, { "epoch": 0.8620582443776558, "grad_norm": 0.7748193144798279, "learning_rate": 1.9631818725177654e-06, "loss": 0.1849, "step": 8318 }, { "epoch": 0.8621618820603172, "grad_norm": 0.5730516314506531, "learning_rate": 1.9602821889657144e-06, "loss": 0.1722, "step": 8319 }, { "epoch": 0.8622655197429785, "grad_norm": 0.643845796585083, "learning_rate": 1.9573845381010236e-06, "loss": 0.1853, "step": 8320 }, { "epoch": 0.8623691574256399, "grad_norm": 0.7794548869132996, "learning_rate": 1.9544889202501925e-06, "loss": 0.1981, "step": 8321 }, { "epoch": 0.8624727951083013, "grad_norm": 0.6954991221427917, "learning_rate": 1.9515953357395e-06, "loss": 0.2042, "step": 8322 }, { "epoch": 0.8625764327909627, "grad_norm": 0.7107481360435486, "learning_rate": 1.9487037848949854e-06, "loss": 0.2207, "step": 8323 }, { "epoch": 0.8626800704736242, "grad_norm": 0.7179362177848816, "learning_rate": 1.9458142680424674e-06, "loss": 0.2147, "step": 8324 }, { "epoch": 0.8627837081562856, "grad_norm": 0.7268587350845337, "learning_rate": 1.942926785507535e-06, "loss": 0.2107, "step": 8325 }, { "epoch": 0.862887345838947, "grad_norm": 0.678972601890564, "learning_rate": 1.9400413376155414e-06, "loss": 0.166, "step": 8326 }, { "epoch": 0.8629909835216084, "grad_norm": 0.6394493579864502, "learning_rate": 1.9371579246916173e-06, "loss": 0.1717, "step": 8327 }, { "epoch": 0.8630946212042698, "grad_norm": 0.6306682825088501, "learning_rate": 1.934276547060654e-06, "loss": 0.1982, "step": 8328 }, { "epoch": 0.8631982588869312, "grad_norm": 0.6881734728813171, "learning_rate": 1.931397205047325e-06, "loss": 0.1803, "step": 8329 }, { "epoch": 0.8633018965695927, "grad_norm": 0.6395617723464966, "learning_rate": 1.9285198989760757e-06, "loss": 0.1674, "step": 8330 }, { "epoch": 0.8634055342522541, "grad_norm": 0.7378185987472534, "learning_rate": 1.925644629171106e-06, "loss": 0.2131, "step": 8331 }, { "epoch": 0.8635091719349155, "grad_norm": 0.6664524674415588, "learning_rate": 1.9227713959564066e-06, "loss": 0.1983, "step": 8332 }, { "epoch": 0.8636128096175769, "grad_norm": 0.5630848407745361, "learning_rate": 1.9199001996557263e-06, "loss": 0.1648, "step": 8333 }, { "epoch": 0.8637164473002383, "grad_norm": 0.7485938668251038, "learning_rate": 1.917031040592581e-06, "loss": 0.1823, "step": 8334 }, { "epoch": 0.8638200849828997, "grad_norm": 0.6663389801979065, "learning_rate": 1.9141639190902707e-06, "loss": 0.1965, "step": 8335 }, { "epoch": 0.8639237226655612, "grad_norm": 0.6991190314292908, "learning_rate": 1.911298835471851e-06, "loss": 0.2125, "step": 8336 }, { "epoch": 0.8640273603482226, "grad_norm": 0.6290115714073181, "learning_rate": 1.9084357900601637e-06, "loss": 0.1882, "step": 8337 }, { "epoch": 0.864130998030884, "grad_norm": 0.5512939691543579, "learning_rate": 1.9055747831778082e-06, "loss": 0.1605, "step": 8338 }, { "epoch": 0.8642346357135454, "grad_norm": 0.7532790303230286, "learning_rate": 1.9027158151471537e-06, "loss": 0.2065, "step": 8339 }, { "epoch": 0.8643382733962068, "grad_norm": 0.7798289656639099, "learning_rate": 1.899858886290351e-06, "loss": 0.1994, "step": 8340 }, { "epoch": 0.8644419110788683, "grad_norm": 0.660067081451416, "learning_rate": 1.8970039969293096e-06, "loss": 0.1888, "step": 8341 }, { "epoch": 0.8645455487615297, "grad_norm": 0.7333917021751404, "learning_rate": 1.8941511473857188e-06, "loss": 0.212, "step": 8342 }, { "epoch": 0.8646491864441911, "grad_norm": 0.7680668830871582, "learning_rate": 1.8913003379810258e-06, "loss": 0.1958, "step": 8343 }, { "epoch": 0.8647528241268525, "grad_norm": 0.688458263874054, "learning_rate": 1.8884515690364669e-06, "loss": 0.1967, "step": 8344 }, { "epoch": 0.8648564618095139, "grad_norm": 0.6076627969741821, "learning_rate": 1.8856048408730277e-06, "loss": 0.1723, "step": 8345 }, { "epoch": 0.8649600994921753, "grad_norm": 0.6366537809371948, "learning_rate": 1.8827601538114736e-06, "loss": 0.1806, "step": 8346 }, { "epoch": 0.8650637371748368, "grad_norm": 0.6349666118621826, "learning_rate": 1.8799175081723443e-06, "loss": 0.1895, "step": 8347 }, { "epoch": 0.8651673748574982, "grad_norm": 0.6724454760551453, "learning_rate": 1.8770769042759384e-06, "loss": 0.1907, "step": 8348 }, { "epoch": 0.8652710125401596, "grad_norm": 0.7153465747833252, "learning_rate": 1.8742383424423339e-06, "loss": 0.1836, "step": 8349 }, { "epoch": 0.865374650222821, "grad_norm": 0.6909909844398499, "learning_rate": 1.8714018229913812e-06, "loss": 0.2226, "step": 8350 }, { "epoch": 0.8654782879054824, "grad_norm": 0.6756448745727539, "learning_rate": 1.8685673462426867e-06, "loss": 0.1931, "step": 8351 }, { "epoch": 0.8655819255881438, "grad_norm": 0.6229551434516907, "learning_rate": 1.8657349125156398e-06, "loss": 0.1787, "step": 8352 }, { "epoch": 0.8656855632708053, "grad_norm": 0.8107147812843323, "learning_rate": 1.8629045221293894e-06, "loss": 0.2089, "step": 8353 }, { "epoch": 0.8657892009534667, "grad_norm": 0.730259358882904, "learning_rate": 1.860076175402863e-06, "loss": 0.1814, "step": 8354 }, { "epoch": 0.8658928386361281, "grad_norm": 0.7431645393371582, "learning_rate": 1.857249872654756e-06, "loss": 0.2158, "step": 8355 }, { "epoch": 0.8659964763187895, "grad_norm": 0.6153815388679504, "learning_rate": 1.8544256142035278e-06, "loss": 0.1675, "step": 8356 }, { "epoch": 0.8661001140014509, "grad_norm": 0.6440610289573669, "learning_rate": 1.8516034003674167e-06, "loss": 0.165, "step": 8357 }, { "epoch": 0.8662037516841123, "grad_norm": 0.5951587557792664, "learning_rate": 1.8487832314644216e-06, "loss": 0.1589, "step": 8358 }, { "epoch": 0.8663073893667738, "grad_norm": 0.6662946939468384, "learning_rate": 1.845965107812313e-06, "loss": 0.1968, "step": 8359 }, { "epoch": 0.8664110270494352, "grad_norm": 0.6845201253890991, "learning_rate": 1.8431490297286369e-06, "loss": 0.1838, "step": 8360 }, { "epoch": 0.8665146647320966, "grad_norm": 0.7018457651138306, "learning_rate": 1.8403349975307017e-06, "loss": 0.1808, "step": 8361 }, { "epoch": 0.866618302414758, "grad_norm": 0.6310449838638306, "learning_rate": 1.8375230115355913e-06, "loss": 0.159, "step": 8362 }, { "epoch": 0.8667219400974194, "grad_norm": 0.7740344405174255, "learning_rate": 1.8347130720601503e-06, "loss": 0.2258, "step": 8363 }, { "epoch": 0.8668255777800808, "grad_norm": 0.6985799074172974, "learning_rate": 1.8319051794210053e-06, "loss": 0.1968, "step": 8364 }, { "epoch": 0.8669292154627423, "grad_norm": 0.7374464273452759, "learning_rate": 1.8290993339345408e-06, "loss": 0.2033, "step": 8365 }, { "epoch": 0.8670328531454037, "grad_norm": 0.6944618225097656, "learning_rate": 1.8262955359169155e-06, "loss": 0.1908, "step": 8366 }, { "epoch": 0.8671364908280651, "grad_norm": 0.8125670552253723, "learning_rate": 1.8234937856840585e-06, "loss": 0.2245, "step": 8367 }, { "epoch": 0.8672401285107265, "grad_norm": 0.7239158153533936, "learning_rate": 1.820694083551664e-06, "loss": 0.1842, "step": 8368 }, { "epoch": 0.8673437661933879, "grad_norm": 0.6269294023513794, "learning_rate": 1.817896429835202e-06, "loss": 0.213, "step": 8369 }, { "epoch": 0.8674474038760493, "grad_norm": 0.6639854907989502, "learning_rate": 1.815100824849907e-06, "loss": 0.1927, "step": 8370 }, { "epoch": 0.8675510415587108, "grad_norm": 0.6192017793655396, "learning_rate": 1.812307268910778e-06, "loss": 0.1647, "step": 8371 }, { "epoch": 0.8676546792413722, "grad_norm": 0.6484594941139221, "learning_rate": 1.8095157623325943e-06, "loss": 0.1913, "step": 8372 }, { "epoch": 0.8677583169240336, "grad_norm": 0.7138566970825195, "learning_rate": 1.8067263054298955e-06, "loss": 0.1758, "step": 8373 }, { "epoch": 0.867861954606695, "grad_norm": 0.6029613018035889, "learning_rate": 1.8039388985169947e-06, "loss": 0.1631, "step": 8374 }, { "epoch": 0.8679655922893564, "grad_norm": 0.651297390460968, "learning_rate": 1.801153541907974e-06, "loss": 0.1831, "step": 8375 }, { "epoch": 0.8680692299720179, "grad_norm": 0.6598191261291504, "learning_rate": 1.7983702359166844e-06, "loss": 0.1793, "step": 8376 }, { "epoch": 0.8681728676546793, "grad_norm": 0.6889581680297852, "learning_rate": 1.7955889808567396e-06, "loss": 0.1924, "step": 8377 }, { "epoch": 0.8682765053373407, "grad_norm": 0.6279385089874268, "learning_rate": 1.7928097770415264e-06, "loss": 0.1672, "step": 8378 }, { "epoch": 0.8683801430200021, "grad_norm": 0.5347639918327332, "learning_rate": 1.7900326247842036e-06, "loss": 0.1342, "step": 8379 }, { "epoch": 0.8684837807026635, "grad_norm": 0.6175400018692017, "learning_rate": 1.787257524397701e-06, "loss": 0.1555, "step": 8380 }, { "epoch": 0.8685874183853249, "grad_norm": 0.6695175170898438, "learning_rate": 1.7844844761947033e-06, "loss": 0.1776, "step": 8381 }, { "epoch": 0.8686910560679864, "grad_norm": 0.7033392786979675, "learning_rate": 1.7817134804876835e-06, "loss": 0.2009, "step": 8382 }, { "epoch": 0.8687946937506478, "grad_norm": 0.691206693649292, "learning_rate": 1.778944537588867e-06, "loss": 0.1606, "step": 8383 }, { "epoch": 0.8688983314333092, "grad_norm": 0.7595584988594055, "learning_rate": 1.7761776478102511e-06, "loss": 0.2042, "step": 8384 }, { "epoch": 0.8690019691159706, "grad_norm": 0.6974121928215027, "learning_rate": 1.7734128114636106e-06, "loss": 0.1898, "step": 8385 }, { "epoch": 0.869105606798632, "grad_norm": 0.6632170677185059, "learning_rate": 1.7706500288604788e-06, "loss": 0.1832, "step": 8386 }, { "epoch": 0.8692092444812934, "grad_norm": 0.6973819136619568, "learning_rate": 1.7678893003121644e-06, "loss": 0.1884, "step": 8387 }, { "epoch": 0.8693128821639549, "grad_norm": 0.6757559776306152, "learning_rate": 1.765130626129743e-06, "loss": 0.191, "step": 8388 }, { "epoch": 0.8694165198466163, "grad_norm": 0.5859283208847046, "learning_rate": 1.7623740066240568e-06, "loss": 0.1687, "step": 8389 }, { "epoch": 0.8695201575292777, "grad_norm": 0.65776127576828, "learning_rate": 1.7596194421057178e-06, "loss": 0.1918, "step": 8390 }, { "epoch": 0.8696237952119391, "grad_norm": 0.689621090888977, "learning_rate": 1.7568669328850996e-06, "loss": 0.1998, "step": 8391 }, { "epoch": 0.8697274328946005, "grad_norm": 0.88386470079422, "learning_rate": 1.7541164792723565e-06, "loss": 0.2358, "step": 8392 }, { "epoch": 0.8698310705772619, "grad_norm": 0.7360150218009949, "learning_rate": 1.751368081577407e-06, "loss": 0.214, "step": 8393 }, { "epoch": 0.8699347082599234, "grad_norm": 0.7591801285743713, "learning_rate": 1.7486217401099326e-06, "loss": 0.1993, "step": 8394 }, { "epoch": 0.8700383459425848, "grad_norm": 0.5601509809494019, "learning_rate": 1.74587745517939e-06, "loss": 0.1616, "step": 8395 }, { "epoch": 0.8701419836252461, "grad_norm": 0.6487360596656799, "learning_rate": 1.7431352270950008e-06, "loss": 0.173, "step": 8396 }, { "epoch": 0.8702456213079075, "grad_norm": 0.7577373385429382, "learning_rate": 1.740395056165749e-06, "loss": 0.1838, "step": 8397 }, { "epoch": 0.8703492589905689, "grad_norm": 0.7360081672668457, "learning_rate": 1.737656942700401e-06, "loss": 0.2132, "step": 8398 }, { "epoch": 0.8704528966732303, "grad_norm": 0.6201830506324768, "learning_rate": 1.7349208870074763e-06, "loss": 0.1699, "step": 8399 }, { "epoch": 0.8705565343558918, "grad_norm": 0.6153380274772644, "learning_rate": 1.7321868893952754e-06, "loss": 0.1726, "step": 8400 }, { "epoch": 0.8706601720385532, "grad_norm": 0.5483555793762207, "learning_rate": 1.729454950171856e-06, "loss": 0.1651, "step": 8401 }, { "epoch": 0.8707638097212146, "grad_norm": 0.6861063241958618, "learning_rate": 1.72672506964505e-06, "loss": 0.1513, "step": 8402 }, { "epoch": 0.870867447403876, "grad_norm": 0.6212663054466248, "learning_rate": 1.72399724812246e-06, "loss": 0.1682, "step": 8403 }, { "epoch": 0.8709710850865374, "grad_norm": 0.612116813659668, "learning_rate": 1.7212714859114442e-06, "loss": 0.1855, "step": 8404 }, { "epoch": 0.8710747227691988, "grad_norm": 0.6737285256385803, "learning_rate": 1.7185477833191467e-06, "loss": 0.1908, "step": 8405 }, { "epoch": 0.8711783604518603, "grad_norm": 0.7784683108329773, "learning_rate": 1.7158261406524635e-06, "loss": 0.2532, "step": 8406 }, { "epoch": 0.8712819981345217, "grad_norm": 0.7392608523368835, "learning_rate": 1.7131065582180695e-06, "loss": 0.1981, "step": 8407 }, { "epoch": 0.8713856358171831, "grad_norm": 0.6000805497169495, "learning_rate": 1.710389036322402e-06, "loss": 0.1531, "step": 8408 }, { "epoch": 0.8714892734998445, "grad_norm": 0.7079887986183167, "learning_rate": 1.7076735752716622e-06, "loss": 0.1919, "step": 8409 }, { "epoch": 0.8715929111825059, "grad_norm": 0.6999341249465942, "learning_rate": 1.7049601753718325e-06, "loss": 0.1987, "step": 8410 }, { "epoch": 0.8716965488651673, "grad_norm": 0.6581019163131714, "learning_rate": 1.7022488369286462e-06, "loss": 0.1993, "step": 8411 }, { "epoch": 0.8718001865478288, "grad_norm": 0.7524918913841248, "learning_rate": 1.699539560247616e-06, "loss": 0.2153, "step": 8412 }, { "epoch": 0.8719038242304902, "grad_norm": 0.6387086510658264, "learning_rate": 1.696832345634023e-06, "loss": 0.1677, "step": 8413 }, { "epoch": 0.8720074619131516, "grad_norm": 0.8131616711616516, "learning_rate": 1.6941271933929071e-06, "loss": 0.199, "step": 8414 }, { "epoch": 0.872111099595813, "grad_norm": 0.7414273619651794, "learning_rate": 1.6914241038290846e-06, "loss": 0.2105, "step": 8415 }, { "epoch": 0.8722147372784744, "grad_norm": 0.6521408557891846, "learning_rate": 1.6887230772471276e-06, "loss": 0.1887, "step": 8416 }, { "epoch": 0.8723183749611358, "grad_norm": 0.7166591286659241, "learning_rate": 1.6860241139513899e-06, "loss": 0.2254, "step": 8417 }, { "epoch": 0.8724220126437973, "grad_norm": 0.6408938765525818, "learning_rate": 1.6833272142459888e-06, "loss": 0.1757, "step": 8418 }, { "epoch": 0.8725256503264587, "grad_norm": 0.6115769743919373, "learning_rate": 1.680632378434799e-06, "loss": 0.1787, "step": 8419 }, { "epoch": 0.8726292880091201, "grad_norm": 0.7922074794769287, "learning_rate": 1.6779396068214792e-06, "loss": 0.2271, "step": 8420 }, { "epoch": 0.8727329256917815, "grad_norm": 0.8125144243240356, "learning_rate": 1.6752488997094408e-06, "loss": 0.228, "step": 8421 }, { "epoch": 0.8728365633744429, "grad_norm": 0.6652587056159973, "learning_rate": 1.6725602574018695e-06, "loss": 0.1765, "step": 8422 }, { "epoch": 0.8729402010571043, "grad_norm": 0.7473057508468628, "learning_rate": 1.6698736802017191e-06, "loss": 0.1948, "step": 8423 }, { "epoch": 0.8730438387397658, "grad_norm": 0.6176879405975342, "learning_rate": 1.6671891684117048e-06, "loss": 0.1811, "step": 8424 }, { "epoch": 0.8731474764224272, "grad_norm": 0.6850529909133911, "learning_rate": 1.6645067223343181e-06, "loss": 0.189, "step": 8425 }, { "epoch": 0.8732511141050886, "grad_norm": 0.6224367618560791, "learning_rate": 1.6618263422718084e-06, "loss": 0.1704, "step": 8426 }, { "epoch": 0.87335475178775, "grad_norm": 0.6955450177192688, "learning_rate": 1.659148028526203e-06, "loss": 0.1871, "step": 8427 }, { "epoch": 0.8734583894704114, "grad_norm": 0.7510151863098145, "learning_rate": 1.656471781399287e-06, "loss": 0.2142, "step": 8428 }, { "epoch": 0.8735620271530729, "grad_norm": 0.6863204836845398, "learning_rate": 1.6537976011926105e-06, "loss": 0.1914, "step": 8429 }, { "epoch": 0.8736656648357343, "grad_norm": 0.7114805579185486, "learning_rate": 1.6511254882075056e-06, "loss": 0.1905, "step": 8430 }, { "epoch": 0.8737693025183957, "grad_norm": 0.6145648956298828, "learning_rate": 1.6484554427450516e-06, "loss": 0.1829, "step": 8431 }, { "epoch": 0.8738729402010571, "grad_norm": 0.6042888164520264, "learning_rate": 1.6457874651061145e-06, "loss": 0.19, "step": 8432 }, { "epoch": 0.8739765778837185, "grad_norm": 0.7755526304244995, "learning_rate": 1.6431215555913138e-06, "loss": 0.2108, "step": 8433 }, { "epoch": 0.8740802155663799, "grad_norm": 0.7109092473983765, "learning_rate": 1.6404577145010358e-06, "loss": 0.1888, "step": 8434 }, { "epoch": 0.8741838532490414, "grad_norm": 0.7340528964996338, "learning_rate": 1.637795942135445e-06, "loss": 0.1662, "step": 8435 }, { "epoch": 0.8742874909317028, "grad_norm": 0.7330647110939026, "learning_rate": 1.635136238794459e-06, "loss": 0.2035, "step": 8436 }, { "epoch": 0.8743911286143642, "grad_norm": 0.6903119087219238, "learning_rate": 1.6324786047777763e-06, "loss": 0.1828, "step": 8437 }, { "epoch": 0.8744947662970256, "grad_norm": 0.669479489326477, "learning_rate": 1.6298230403848526e-06, "loss": 0.2263, "step": 8438 }, { "epoch": 0.874598403979687, "grad_norm": 0.683139979839325, "learning_rate": 1.6271695459149106e-06, "loss": 0.2082, "step": 8439 }, { "epoch": 0.8747020416623484, "grad_norm": 0.6867363452911377, "learning_rate": 1.6245181216669447e-06, "loss": 0.2003, "step": 8440 }, { "epoch": 0.8748056793450099, "grad_norm": 0.6809399724006653, "learning_rate": 1.6218687679397072e-06, "loss": 0.1851, "step": 8441 }, { "epoch": 0.8749093170276713, "grad_norm": 0.6889615058898926, "learning_rate": 1.6192214850317277e-06, "loss": 0.1993, "step": 8442 }, { "epoch": 0.8750129547103327, "grad_norm": 0.6928166151046753, "learning_rate": 1.6165762732413037e-06, "loss": 0.1885, "step": 8443 }, { "epoch": 0.8751165923929941, "grad_norm": 0.520487904548645, "learning_rate": 1.6139331328664809e-06, "loss": 0.1578, "step": 8444 }, { "epoch": 0.8752202300756555, "grad_norm": 0.6969999074935913, "learning_rate": 1.6112920642050967e-06, "loss": 0.1966, "step": 8445 }, { "epoch": 0.8753238677583169, "grad_norm": 0.6650992631912231, "learning_rate": 1.608653067554735e-06, "loss": 0.1963, "step": 8446 }, { "epoch": 0.8754275054409784, "grad_norm": 0.6840571761131287, "learning_rate": 1.606016143212754e-06, "loss": 0.1662, "step": 8447 }, { "epoch": 0.8755311431236398, "grad_norm": 0.6494219303131104, "learning_rate": 1.6033812914762826e-06, "loss": 0.1947, "step": 8448 }, { "epoch": 0.8756347808063012, "grad_norm": 0.7240989804267883, "learning_rate": 1.6007485126422051e-06, "loss": 0.2117, "step": 8449 }, { "epoch": 0.8757384184889626, "grad_norm": 0.6862815022468567, "learning_rate": 1.5981178070071868e-06, "loss": 0.1972, "step": 8450 }, { "epoch": 0.875842056171624, "grad_norm": 0.7563923597335815, "learning_rate": 1.595489174867646e-06, "loss": 0.2292, "step": 8451 }, { "epoch": 0.8759456938542854, "grad_norm": 0.7190267443656921, "learning_rate": 1.5928626165197768e-06, "loss": 0.1881, "step": 8452 }, { "epoch": 0.8760493315369469, "grad_norm": 0.6157662272453308, "learning_rate": 1.590238132259534e-06, "loss": 0.1751, "step": 8453 }, { "epoch": 0.8761529692196083, "grad_norm": 0.6653298139572144, "learning_rate": 1.587615722382634e-06, "loss": 0.1936, "step": 8454 }, { "epoch": 0.8762566069022697, "grad_norm": 0.6411898136138916, "learning_rate": 1.584995387184578e-06, "loss": 0.1855, "step": 8455 }, { "epoch": 0.8763602445849311, "grad_norm": 0.6861348748207092, "learning_rate": 1.5823771269606102e-06, "loss": 0.1738, "step": 8456 }, { "epoch": 0.8764638822675925, "grad_norm": 0.6964961290359497, "learning_rate": 1.579760942005759e-06, "loss": 0.2199, "step": 8457 }, { "epoch": 0.876567519950254, "grad_norm": 0.6873329281806946, "learning_rate": 1.577146832614811e-06, "loss": 0.1788, "step": 8458 }, { "epoch": 0.8766711576329154, "grad_norm": 0.6251301765441895, "learning_rate": 1.5745347990823213e-06, "loss": 0.1615, "step": 8459 }, { "epoch": 0.8767747953155768, "grad_norm": 0.8414469957351685, "learning_rate": 1.5719248417026057e-06, "loss": 0.2304, "step": 8460 }, { "epoch": 0.8768784329982382, "grad_norm": 0.6537505388259888, "learning_rate": 1.5693169607697489e-06, "loss": 0.1712, "step": 8461 }, { "epoch": 0.8769820706808996, "grad_norm": 0.797130823135376, "learning_rate": 1.5667111565776049e-06, "loss": 0.2095, "step": 8462 }, { "epoch": 0.877085708363561, "grad_norm": 0.718692421913147, "learning_rate": 1.5641074294197988e-06, "loss": 0.19, "step": 8463 }, { "epoch": 0.8771893460462225, "grad_norm": 0.6466525793075562, "learning_rate": 1.561505779589707e-06, "loss": 0.1863, "step": 8464 }, { "epoch": 0.8772929837288839, "grad_norm": 0.6893818974494934, "learning_rate": 1.5589062073804796e-06, "loss": 0.1938, "step": 8465 }, { "epoch": 0.8773966214115453, "grad_norm": 0.7317748665809631, "learning_rate": 1.5563087130850307e-06, "loss": 0.2374, "step": 8466 }, { "epoch": 0.8775002590942067, "grad_norm": 0.7286064624786377, "learning_rate": 1.5537132969960466e-06, "loss": 0.2192, "step": 8467 }, { "epoch": 0.8776038967768681, "grad_norm": 0.655089259147644, "learning_rate": 1.5511199594059733e-06, "loss": 0.1532, "step": 8468 }, { "epoch": 0.8777075344595295, "grad_norm": 0.7922578454017639, "learning_rate": 1.5485287006070238e-06, "loss": 0.2014, "step": 8469 }, { "epoch": 0.877811172142191, "grad_norm": 0.6886960864067078, "learning_rate": 1.5459395208911776e-06, "loss": 0.1911, "step": 8470 }, { "epoch": 0.8779148098248524, "grad_norm": 0.6597391963005066, "learning_rate": 1.5433524205501793e-06, "loss": 0.1979, "step": 8471 }, { "epoch": 0.8780184475075137, "grad_norm": 0.7308657169342041, "learning_rate": 1.5407673998755358e-06, "loss": 0.1626, "step": 8472 }, { "epoch": 0.8781220851901751, "grad_norm": 0.5964559316635132, "learning_rate": 1.5381844591585294e-06, "loss": 0.1551, "step": 8473 }, { "epoch": 0.8782257228728365, "grad_norm": 0.5970776081085205, "learning_rate": 1.5356035986901962e-06, "loss": 0.1964, "step": 8474 }, { "epoch": 0.8783293605554979, "grad_norm": 0.8105737566947937, "learning_rate": 1.5330248187613484e-06, "loss": 0.2208, "step": 8475 }, { "epoch": 0.8784329982381593, "grad_norm": 0.6745266914367676, "learning_rate": 1.5304481196625531e-06, "loss": 0.1824, "step": 8476 }, { "epoch": 0.8785366359208208, "grad_norm": 0.8474743962287903, "learning_rate": 1.527873501684156e-06, "loss": 0.2178, "step": 8477 }, { "epoch": 0.8786402736034822, "grad_norm": 0.6158428192138672, "learning_rate": 1.5253009651162564e-06, "loss": 0.1684, "step": 8478 }, { "epoch": 0.8787439112861436, "grad_norm": 0.6813104748725891, "learning_rate": 1.5227305102487223e-06, "loss": 0.1848, "step": 8479 }, { "epoch": 0.878847548968805, "grad_norm": 0.5705843567848206, "learning_rate": 1.520162137371195e-06, "loss": 0.1832, "step": 8480 }, { "epoch": 0.8789511866514664, "grad_norm": 0.6192941069602966, "learning_rate": 1.5175958467730656e-06, "loss": 0.1557, "step": 8481 }, { "epoch": 0.8790548243341278, "grad_norm": 0.6419438719749451, "learning_rate": 1.5150316387435049e-06, "loss": 0.1714, "step": 8482 }, { "epoch": 0.8791584620167893, "grad_norm": 0.5894890427589417, "learning_rate": 1.5124695135714463e-06, "loss": 0.1507, "step": 8483 }, { "epoch": 0.8792620996994507, "grad_norm": 0.7742616534233093, "learning_rate": 1.5099094715455852e-06, "loss": 0.1856, "step": 8484 }, { "epoch": 0.8793657373821121, "grad_norm": 0.6059996485710144, "learning_rate": 1.5073515129543802e-06, "loss": 0.1659, "step": 8485 }, { "epoch": 0.8794693750647735, "grad_norm": 0.7219932079315186, "learning_rate": 1.5047956380860584e-06, "loss": 0.2266, "step": 8486 }, { "epoch": 0.8795730127474349, "grad_norm": 0.6520408987998962, "learning_rate": 1.5022418472286094e-06, "loss": 0.1831, "step": 8487 }, { "epoch": 0.8796766504300964, "grad_norm": 0.5570911765098572, "learning_rate": 1.4996901406697983e-06, "loss": 0.1414, "step": 8488 }, { "epoch": 0.8797802881127578, "grad_norm": 0.6972008943557739, "learning_rate": 1.49714051869714e-06, "loss": 0.1907, "step": 8489 }, { "epoch": 0.8798839257954192, "grad_norm": 0.6869020462036133, "learning_rate": 1.4945929815979332e-06, "loss": 0.1736, "step": 8490 }, { "epoch": 0.8799875634780806, "grad_norm": 0.7016257643699646, "learning_rate": 1.492047529659213e-06, "loss": 0.1684, "step": 8491 }, { "epoch": 0.880091201160742, "grad_norm": 0.7112381458282471, "learning_rate": 1.4895041631678053e-06, "loss": 0.1909, "step": 8492 }, { "epoch": 0.8801948388434034, "grad_norm": 0.7013789415359497, "learning_rate": 1.4869628824102989e-06, "loss": 0.2006, "step": 8493 }, { "epoch": 0.8802984765260649, "grad_norm": 0.6262712478637695, "learning_rate": 1.4844236876730312e-06, "loss": 0.1677, "step": 8494 }, { "epoch": 0.8804021142087263, "grad_norm": 0.6590498685836792, "learning_rate": 1.4818865792421221e-06, "loss": 0.1858, "step": 8495 }, { "epoch": 0.8805057518913877, "grad_norm": 0.7231387495994568, "learning_rate": 1.4793515574034478e-06, "loss": 0.2059, "step": 8496 }, { "epoch": 0.8806093895740491, "grad_norm": 0.6526983380317688, "learning_rate": 1.4768186224426463e-06, "loss": 0.1733, "step": 8497 }, { "epoch": 0.8807130272567105, "grad_norm": 0.7078770399093628, "learning_rate": 1.474287774645129e-06, "loss": 0.1852, "step": 8498 }, { "epoch": 0.8808166649393719, "grad_norm": 0.7307820320129395, "learning_rate": 1.4717590142960637e-06, "loss": 0.2009, "step": 8499 }, { "epoch": 0.8809203026220334, "grad_norm": 0.7521584630012512, "learning_rate": 1.4692323416803934e-06, "loss": 0.1977, "step": 8500 }, { "epoch": 0.8810239403046948, "grad_norm": 0.6136340498924255, "learning_rate": 1.4667077570828125e-06, "loss": 0.165, "step": 8501 }, { "epoch": 0.8811275779873562, "grad_norm": 0.6525123119354248, "learning_rate": 1.464185260787796e-06, "loss": 0.2126, "step": 8502 }, { "epoch": 0.8812312156700176, "grad_norm": 0.666297435760498, "learning_rate": 1.4616648530795673e-06, "loss": 0.1823, "step": 8503 }, { "epoch": 0.881334853352679, "grad_norm": 0.68965744972229, "learning_rate": 1.4591465342421218e-06, "loss": 0.1814, "step": 8504 }, { "epoch": 0.8814384910353404, "grad_norm": 0.7001585960388184, "learning_rate": 1.4566303045592279e-06, "loss": 0.1772, "step": 8505 }, { "epoch": 0.8815421287180019, "grad_norm": 0.7614684104919434, "learning_rate": 1.4541161643144008e-06, "loss": 0.2056, "step": 8506 }, { "epoch": 0.8816457664006633, "grad_norm": 0.6306325793266296, "learning_rate": 1.451604113790932e-06, "loss": 0.1705, "step": 8507 }, { "epoch": 0.8817494040833247, "grad_norm": 0.7202686071395874, "learning_rate": 1.449094153271884e-06, "loss": 0.2256, "step": 8508 }, { "epoch": 0.8818530417659861, "grad_norm": 0.6471310257911682, "learning_rate": 1.4465862830400678e-06, "loss": 0.1681, "step": 8509 }, { "epoch": 0.8819566794486475, "grad_norm": 0.7523260712623596, "learning_rate": 1.444080503378067e-06, "loss": 0.199, "step": 8510 }, { "epoch": 0.882060317131309, "grad_norm": 0.7514868378639221, "learning_rate": 1.4415768145682264e-06, "loss": 0.1917, "step": 8511 }, { "epoch": 0.8821639548139704, "grad_norm": 0.5885258316993713, "learning_rate": 1.4390752168926603e-06, "loss": 0.1863, "step": 8512 }, { "epoch": 0.8822675924966318, "grad_norm": 0.6411988139152527, "learning_rate": 1.4365757106332479e-06, "loss": 0.1728, "step": 8513 }, { "epoch": 0.8823712301792932, "grad_norm": 0.6003291606903076, "learning_rate": 1.4340782960716238e-06, "loss": 0.159, "step": 8514 }, { "epoch": 0.8824748678619546, "grad_norm": 0.647544801235199, "learning_rate": 1.4315829734892006e-06, "loss": 0.1769, "step": 8515 }, { "epoch": 0.882578505544616, "grad_norm": 0.6379469633102417, "learning_rate": 1.4290897431671424e-06, "loss": 0.1869, "step": 8516 }, { "epoch": 0.8826821432272774, "grad_norm": 0.8017239570617676, "learning_rate": 1.4265986053863802e-06, "loss": 0.2074, "step": 8517 }, { "epoch": 0.8827857809099389, "grad_norm": 0.6173701882362366, "learning_rate": 1.4241095604276157e-06, "loss": 0.1819, "step": 8518 }, { "epoch": 0.8828894185926003, "grad_norm": 0.7021307349205017, "learning_rate": 1.421622608571307e-06, "loss": 0.1928, "step": 8519 }, { "epoch": 0.8829930562752617, "grad_norm": 0.7378525733947754, "learning_rate": 1.4191377500976856e-06, "loss": 0.2155, "step": 8520 }, { "epoch": 0.8830966939579231, "grad_norm": 0.5751429796218872, "learning_rate": 1.416654985286734e-06, "loss": 0.1536, "step": 8521 }, { "epoch": 0.8832003316405845, "grad_norm": 0.6245465874671936, "learning_rate": 1.4141743144182153e-06, "loss": 0.1699, "step": 8522 }, { "epoch": 0.883303969323246, "grad_norm": 0.6487674713134766, "learning_rate": 1.4116957377716412e-06, "loss": 0.1718, "step": 8523 }, { "epoch": 0.8834076070059074, "grad_norm": 0.5731277465820312, "learning_rate": 1.4092192556262907e-06, "loss": 0.1552, "step": 8524 }, { "epoch": 0.8835112446885688, "grad_norm": 0.7612659335136414, "learning_rate": 1.4067448682612207e-06, "loss": 0.2067, "step": 8525 }, { "epoch": 0.8836148823712302, "grad_norm": 0.5277737975120544, "learning_rate": 1.40427257595523e-06, "loss": 0.1608, "step": 8526 }, { "epoch": 0.8837185200538916, "grad_norm": 0.6964701414108276, "learning_rate": 1.4018023789869005e-06, "loss": 0.1774, "step": 8527 }, { "epoch": 0.883822157736553, "grad_norm": 0.6501127481460571, "learning_rate": 1.3993342776345698e-06, "loss": 0.1771, "step": 8528 }, { "epoch": 0.8839257954192145, "grad_norm": 0.6661208868026733, "learning_rate": 1.3968682721763328e-06, "loss": 0.1711, "step": 8529 }, { "epoch": 0.8840294331018759, "grad_norm": 0.6030734181404114, "learning_rate": 1.3944043628900627e-06, "loss": 0.1518, "step": 8530 }, { "epoch": 0.8841330707845373, "grad_norm": 0.726662278175354, "learning_rate": 1.3919425500533823e-06, "loss": 0.189, "step": 8531 }, { "epoch": 0.8842367084671987, "grad_norm": 0.7446136474609375, "learning_rate": 1.3894828339436894e-06, "loss": 0.1774, "step": 8532 }, { "epoch": 0.8843403461498601, "grad_norm": 0.6524977684020996, "learning_rate": 1.3870252148381446e-06, "loss": 0.188, "step": 8533 }, { "epoch": 0.8844439838325215, "grad_norm": 0.7023906707763672, "learning_rate": 1.3845696930136621e-06, "loss": 0.1999, "step": 8534 }, { "epoch": 0.884547621515183, "grad_norm": 0.5903967618942261, "learning_rate": 1.3821162687469291e-06, "loss": 0.1639, "step": 8535 }, { "epoch": 0.8846512591978444, "grad_norm": 0.6685062646865845, "learning_rate": 1.3796649423143915e-06, "loss": 0.2118, "step": 8536 }, { "epoch": 0.8847548968805058, "grad_norm": 0.7017149329185486, "learning_rate": 1.3772157139922593e-06, "loss": 0.2078, "step": 8537 }, { "epoch": 0.8848585345631672, "grad_norm": 0.6549966335296631, "learning_rate": 1.374768584056516e-06, "loss": 0.1869, "step": 8538 }, { "epoch": 0.8849621722458286, "grad_norm": 0.6312375068664551, "learning_rate": 1.3723235527828904e-06, "loss": 0.1654, "step": 8539 }, { "epoch": 0.88506580992849, "grad_norm": 0.5591091513633728, "learning_rate": 1.369880620446895e-06, "loss": 0.1516, "step": 8540 }, { "epoch": 0.8851694476111515, "grad_norm": 0.6782501935958862, "learning_rate": 1.3674397873237877e-06, "loss": 0.1706, "step": 8541 }, { "epoch": 0.8852730852938129, "grad_norm": 0.7043976187705994, "learning_rate": 1.3650010536885994e-06, "loss": 0.1916, "step": 8542 }, { "epoch": 0.8853767229764743, "grad_norm": 0.650366485118866, "learning_rate": 1.3625644198161259e-06, "loss": 0.1798, "step": 8543 }, { "epoch": 0.8854803606591357, "grad_norm": 0.6807155013084412, "learning_rate": 1.3601298859809165e-06, "loss": 0.2027, "step": 8544 }, { "epoch": 0.8855839983417971, "grad_norm": 0.7555018067359924, "learning_rate": 1.3576974524573006e-06, "loss": 0.211, "step": 8545 }, { "epoch": 0.8856876360244585, "grad_norm": 0.6486994028091431, "learning_rate": 1.3552671195193523e-06, "loss": 0.1975, "step": 8546 }, { "epoch": 0.88579127370712, "grad_norm": 0.7745081782341003, "learning_rate": 1.3528388874409238e-06, "loss": 0.2216, "step": 8547 }, { "epoch": 0.8858949113897813, "grad_norm": 0.7181711792945862, "learning_rate": 1.3504127564956205e-06, "loss": 0.1998, "step": 8548 }, { "epoch": 0.8859985490724427, "grad_norm": 0.7709293961524963, "learning_rate": 1.3479887269568148e-06, "loss": 0.2232, "step": 8549 }, { "epoch": 0.8861021867551041, "grad_norm": 0.8563987016677856, "learning_rate": 1.3455667990976483e-06, "loss": 0.222, "step": 8550 }, { "epoch": 0.8862058244377655, "grad_norm": 0.7017079591751099, "learning_rate": 1.3431469731910118e-06, "loss": 0.1925, "step": 8551 }, { "epoch": 0.8863094621204269, "grad_norm": 0.6531928777694702, "learning_rate": 1.3407292495095715e-06, "loss": 0.1829, "step": 8552 }, { "epoch": 0.8864130998030884, "grad_norm": 0.6812769174575806, "learning_rate": 1.3383136283257581e-06, "loss": 0.1918, "step": 8553 }, { "epoch": 0.8865167374857498, "grad_norm": 0.560120701789856, "learning_rate": 1.3359001099117518e-06, "loss": 0.1385, "step": 8554 }, { "epoch": 0.8866203751684112, "grad_norm": 0.6926707625389099, "learning_rate": 1.3334886945395086e-06, "loss": 0.2024, "step": 8555 }, { "epoch": 0.8867240128510726, "grad_norm": 0.6750642657279968, "learning_rate": 1.3310793824807378e-06, "loss": 0.1806, "step": 8556 }, { "epoch": 0.886827650533734, "grad_norm": 0.6305122971534729, "learning_rate": 1.328672174006922e-06, "loss": 0.1703, "step": 8557 }, { "epoch": 0.8869312882163954, "grad_norm": 0.5967231392860413, "learning_rate": 1.326267069389302e-06, "loss": 0.1607, "step": 8558 }, { "epoch": 0.8870349258990569, "grad_norm": 0.7030181288719177, "learning_rate": 1.3238640688988814e-06, "loss": 0.2134, "step": 8559 }, { "epoch": 0.8871385635817183, "grad_norm": 0.7760520577430725, "learning_rate": 1.321463172806423e-06, "loss": 0.2166, "step": 8560 }, { "epoch": 0.8872422012643797, "grad_norm": 0.7813576459884644, "learning_rate": 1.3190643813824577e-06, "loss": 0.1998, "step": 8561 }, { "epoch": 0.8873458389470411, "grad_norm": 0.7017430067062378, "learning_rate": 1.3166676948972757e-06, "loss": 0.1894, "step": 8562 }, { "epoch": 0.8874494766297025, "grad_norm": 0.720665454864502, "learning_rate": 1.3142731136209364e-06, "loss": 0.1956, "step": 8563 }, { "epoch": 0.8875531143123639, "grad_norm": 0.7103095054626465, "learning_rate": 1.311880637823255e-06, "loss": 0.1927, "step": 8564 }, { "epoch": 0.8876567519950254, "grad_norm": 0.7657372355461121, "learning_rate": 1.309490267773812e-06, "loss": 0.1948, "step": 8565 }, { "epoch": 0.8877603896776868, "grad_norm": 0.6733664274215698, "learning_rate": 1.3071020037419535e-06, "loss": 0.1827, "step": 8566 }, { "epoch": 0.8878640273603482, "grad_norm": 0.7801433205604553, "learning_rate": 1.304715845996778e-06, "loss": 0.2261, "step": 8567 }, { "epoch": 0.8879676650430096, "grad_norm": 0.6368528604507446, "learning_rate": 1.3023317948071611e-06, "loss": 0.1702, "step": 8568 }, { "epoch": 0.888071302725671, "grad_norm": 0.5935059785842896, "learning_rate": 1.2999498504417286e-06, "loss": 0.158, "step": 8569 }, { "epoch": 0.8881749404083324, "grad_norm": 0.6271074414253235, "learning_rate": 1.2975700131688806e-06, "loss": 0.1565, "step": 8570 }, { "epoch": 0.8882785780909939, "grad_norm": 0.7890324592590332, "learning_rate": 1.2951922832567676e-06, "loss": 0.2071, "step": 8571 }, { "epoch": 0.8883822157736553, "grad_norm": 0.7705354690551758, "learning_rate": 1.292816660973315e-06, "loss": 0.2464, "step": 8572 }, { "epoch": 0.8884858534563167, "grad_norm": 0.6207772493362427, "learning_rate": 1.2904431465861978e-06, "loss": 0.1653, "step": 8573 }, { "epoch": 0.8885894911389781, "grad_norm": 0.7003123760223389, "learning_rate": 1.2880717403628596e-06, "loss": 0.1639, "step": 8574 }, { "epoch": 0.8886931288216395, "grad_norm": 0.5598231554031372, "learning_rate": 1.2857024425705133e-06, "loss": 0.1635, "step": 8575 }, { "epoch": 0.888796766504301, "grad_norm": 0.5815957188606262, "learning_rate": 1.2833352534761212e-06, "loss": 0.168, "step": 8576 }, { "epoch": 0.8889004041869624, "grad_norm": 0.6298869848251343, "learning_rate": 1.2809701733464164e-06, "loss": 0.1857, "step": 8577 }, { "epoch": 0.8890040418696238, "grad_norm": 0.6543485522270203, "learning_rate": 1.2786072024478945e-06, "loss": 0.1884, "step": 8578 }, { "epoch": 0.8891076795522852, "grad_norm": 0.5738658308982849, "learning_rate": 1.2762463410468117e-06, "loss": 0.163, "step": 8579 }, { "epoch": 0.8892113172349466, "grad_norm": 0.7424209117889404, "learning_rate": 1.2738875894091817e-06, "loss": 0.2165, "step": 8580 }, { "epoch": 0.889314954917608, "grad_norm": 0.6845530867576599, "learning_rate": 1.271530947800792e-06, "loss": 0.1887, "step": 8581 }, { "epoch": 0.8894185926002695, "grad_norm": 0.7086684703826904, "learning_rate": 1.2691764164871768e-06, "loss": 0.2111, "step": 8582 }, { "epoch": 0.8895222302829309, "grad_norm": 0.6318870186805725, "learning_rate": 1.266823995733648e-06, "loss": 0.1869, "step": 8583 }, { "epoch": 0.8896258679655923, "grad_norm": 0.7254533171653748, "learning_rate": 1.2644736858052675e-06, "loss": 0.208, "step": 8584 }, { "epoch": 0.8897295056482537, "grad_norm": 0.7029841542243958, "learning_rate": 1.2621254869668698e-06, "loss": 0.1879, "step": 8585 }, { "epoch": 0.8898331433309151, "grad_norm": 0.7651259303092957, "learning_rate": 1.259779399483043e-06, "loss": 0.1963, "step": 8586 }, { "epoch": 0.8899367810135765, "grad_norm": 0.6780407428741455, "learning_rate": 1.2574354236181408e-06, "loss": 0.2059, "step": 8587 }, { "epoch": 0.890040418696238, "grad_norm": 0.6573833227157593, "learning_rate": 1.25509355963628e-06, "loss": 0.2028, "step": 8588 }, { "epoch": 0.8901440563788994, "grad_norm": 0.5772253274917603, "learning_rate": 1.2527538078013346e-06, "loss": 0.1696, "step": 8589 }, { "epoch": 0.8902476940615608, "grad_norm": 0.6660610437393188, "learning_rate": 1.2504161683769512e-06, "loss": 0.1676, "step": 8590 }, { "epoch": 0.8903513317442222, "grad_norm": 0.708368718624115, "learning_rate": 1.2480806416265256e-06, "loss": 0.197, "step": 8591 }, { "epoch": 0.8904549694268836, "grad_norm": 0.6154781579971313, "learning_rate": 1.245747227813221e-06, "loss": 0.1903, "step": 8592 }, { "epoch": 0.890558607109545, "grad_norm": 0.778201699256897, "learning_rate": 1.2434159271999668e-06, "loss": 0.212, "step": 8593 }, { "epoch": 0.8906622447922065, "grad_norm": 0.7690391540527344, "learning_rate": 1.2410867400494464e-06, "loss": 0.205, "step": 8594 }, { "epoch": 0.8907658824748679, "grad_norm": 0.7358372211456299, "learning_rate": 1.2387596666241097e-06, "loss": 0.199, "step": 8595 }, { "epoch": 0.8908695201575293, "grad_norm": 0.6339231133460999, "learning_rate": 1.2364347071861716e-06, "loss": 0.1822, "step": 8596 }, { "epoch": 0.8909731578401907, "grad_norm": 0.624960720539093, "learning_rate": 1.2341118619976023e-06, "loss": 0.1765, "step": 8597 }, { "epoch": 0.8910767955228521, "grad_norm": 0.6983635425567627, "learning_rate": 1.2317911313201369e-06, "loss": 0.1782, "step": 8598 }, { "epoch": 0.8911804332055135, "grad_norm": 0.607162594795227, "learning_rate": 1.2294725154152687e-06, "loss": 0.1706, "step": 8599 }, { "epoch": 0.891284070888175, "grad_norm": 0.6569267511367798, "learning_rate": 1.2271560145442574e-06, "loss": 0.1998, "step": 8600 }, { "epoch": 0.8913877085708364, "grad_norm": 0.5935954451560974, "learning_rate": 1.2248416289681253e-06, "loss": 0.1625, "step": 8601 }, { "epoch": 0.8914913462534978, "grad_norm": 0.5493476390838623, "learning_rate": 1.2225293589476506e-06, "loss": 0.1439, "step": 8602 }, { "epoch": 0.8915949839361592, "grad_norm": 0.6254029273986816, "learning_rate": 1.2202192047433802e-06, "loss": 0.172, "step": 8603 }, { "epoch": 0.8916986216188206, "grad_norm": 0.5982598066329956, "learning_rate": 1.2179111666156152e-06, "loss": 0.1627, "step": 8604 }, { "epoch": 0.891802259301482, "grad_norm": 0.6403316259384155, "learning_rate": 1.2156052448244204e-06, "loss": 0.1734, "step": 8605 }, { "epoch": 0.8919058969841435, "grad_norm": 0.6913343071937561, "learning_rate": 1.2133014396296283e-06, "loss": 0.1981, "step": 8606 }, { "epoch": 0.8920095346668049, "grad_norm": 0.7176704406738281, "learning_rate": 1.2109997512908245e-06, "loss": 0.2002, "step": 8607 }, { "epoch": 0.8921131723494663, "grad_norm": 0.703881561756134, "learning_rate": 1.2087001800673615e-06, "loss": 0.1836, "step": 8608 }, { "epoch": 0.8922168100321277, "grad_norm": 0.738377034664154, "learning_rate": 1.2064027262183475e-06, "loss": 0.2089, "step": 8609 }, { "epoch": 0.8923204477147891, "grad_norm": 0.6741861701011658, "learning_rate": 1.2041073900026624e-06, "loss": 0.1878, "step": 8610 }, { "epoch": 0.8924240853974506, "grad_norm": 0.9033978581428528, "learning_rate": 1.201814171678939e-06, "loss": 0.2101, "step": 8611 }, { "epoch": 0.892527723080112, "grad_norm": 0.7610499262809753, "learning_rate": 1.1995230715055684e-06, "loss": 0.2188, "step": 8612 }, { "epoch": 0.8926313607627734, "grad_norm": 0.6552374362945557, "learning_rate": 1.1972340897407153e-06, "loss": 0.1904, "step": 8613 }, { "epoch": 0.8927349984454348, "grad_norm": 0.6682619452476501, "learning_rate": 1.1949472266422913e-06, "loss": 0.1852, "step": 8614 }, { "epoch": 0.8928386361280962, "grad_norm": 0.6607064604759216, "learning_rate": 1.1926624824679833e-06, "loss": 0.1904, "step": 8615 }, { "epoch": 0.8929422738107576, "grad_norm": 0.7705369591712952, "learning_rate": 1.1903798574752346e-06, "loss": 0.1992, "step": 8616 }, { "epoch": 0.893045911493419, "grad_norm": 0.6280906796455383, "learning_rate": 1.188099351921237e-06, "loss": 0.1838, "step": 8617 }, { "epoch": 0.8931495491760805, "grad_norm": 0.6903097033500671, "learning_rate": 1.185820966062965e-06, "loss": 0.1842, "step": 8618 }, { "epoch": 0.8932531868587419, "grad_norm": 0.8103142976760864, "learning_rate": 1.1835447001571353e-06, "loss": 0.2249, "step": 8619 }, { "epoch": 0.8933568245414033, "grad_norm": 0.7311362624168396, "learning_rate": 1.1812705544602387e-06, "loss": 0.1945, "step": 8620 }, { "epoch": 0.8934604622240647, "grad_norm": 0.6058357954025269, "learning_rate": 1.1789985292285233e-06, "loss": 0.153, "step": 8621 }, { "epoch": 0.8935640999067261, "grad_norm": 0.5987060070037842, "learning_rate": 1.1767286247179976e-06, "loss": 0.1663, "step": 8622 }, { "epoch": 0.8936677375893876, "grad_norm": 0.6946430206298828, "learning_rate": 1.1744608411844282e-06, "loss": 0.1893, "step": 8623 }, { "epoch": 0.8937713752720489, "grad_norm": 0.7491544485092163, "learning_rate": 1.172195178883344e-06, "loss": 0.2054, "step": 8624 }, { "epoch": 0.8938750129547103, "grad_norm": 0.6955428123474121, "learning_rate": 1.1699316380700388e-06, "loss": 0.1676, "step": 8625 }, { "epoch": 0.8939786506373717, "grad_norm": 0.6969360709190369, "learning_rate": 1.1676702189995681e-06, "loss": 0.1758, "step": 8626 }, { "epoch": 0.8940822883200331, "grad_norm": 0.6750574111938477, "learning_rate": 1.1654109219267373e-06, "loss": 0.1787, "step": 8627 }, { "epoch": 0.8941859260026945, "grad_norm": 0.6682292819023132, "learning_rate": 1.163153747106127e-06, "loss": 0.1799, "step": 8628 }, { "epoch": 0.894289563685356, "grad_norm": 0.7346889972686768, "learning_rate": 1.160898694792072e-06, "loss": 0.2096, "step": 8629 }, { "epoch": 0.8943932013680174, "grad_norm": 0.8192048072814941, "learning_rate": 1.158645765238664e-06, "loss": 0.213, "step": 8630 }, { "epoch": 0.8944968390506788, "grad_norm": 0.669141411781311, "learning_rate": 1.1563949586997625e-06, "loss": 0.1865, "step": 8631 }, { "epoch": 0.8946004767333402, "grad_norm": 0.7453798055648804, "learning_rate": 1.1541462754289823e-06, "loss": 0.1989, "step": 8632 }, { "epoch": 0.8947041144160016, "grad_norm": 0.6614513397216797, "learning_rate": 1.1518997156797053e-06, "loss": 0.1701, "step": 8633 }, { "epoch": 0.894807752098663, "grad_norm": 0.6962074637413025, "learning_rate": 1.1496552797050665e-06, "loss": 0.1856, "step": 8634 }, { "epoch": 0.8949113897813245, "grad_norm": 0.7701820135116577, "learning_rate": 1.1474129677579703e-06, "loss": 0.2305, "step": 8635 }, { "epoch": 0.8950150274639859, "grad_norm": 0.601924479007721, "learning_rate": 1.1451727800910728e-06, "loss": 0.18, "step": 8636 }, { "epoch": 0.8951186651466473, "grad_norm": 0.7435898184776306, "learning_rate": 1.1429347169567938e-06, "loss": 0.1789, "step": 8637 }, { "epoch": 0.8952223028293087, "grad_norm": 0.7584136724472046, "learning_rate": 1.1406987786073209e-06, "loss": 0.2202, "step": 8638 }, { "epoch": 0.8953259405119701, "grad_norm": 0.6906249523162842, "learning_rate": 1.1384649652945877e-06, "loss": 0.1893, "step": 8639 }, { "epoch": 0.8954295781946315, "grad_norm": 0.6184383630752563, "learning_rate": 1.1362332772703021e-06, "loss": 0.1861, "step": 8640 }, { "epoch": 0.895533215877293, "grad_norm": 0.6365854740142822, "learning_rate": 1.1340037147859295e-06, "loss": 0.1636, "step": 8641 }, { "epoch": 0.8956368535599544, "grad_norm": 0.7137528657913208, "learning_rate": 1.131776278092691e-06, "loss": 0.1808, "step": 8642 }, { "epoch": 0.8957404912426158, "grad_norm": 0.7200307250022888, "learning_rate": 1.1295509674415683e-06, "loss": 0.2088, "step": 8643 }, { "epoch": 0.8958441289252772, "grad_norm": 0.7620137333869934, "learning_rate": 1.1273277830833075e-06, "loss": 0.2023, "step": 8644 }, { "epoch": 0.8959477666079386, "grad_norm": 0.5705604553222656, "learning_rate": 1.1251067252684122e-06, "loss": 0.1473, "step": 8645 }, { "epoch": 0.8960514042906, "grad_norm": 0.8270102739334106, "learning_rate": 1.122887794247154e-06, "loss": 0.2148, "step": 8646 }, { "epoch": 0.8961550419732615, "grad_norm": 0.7535790205001831, "learning_rate": 1.1206709902695502e-06, "loss": 0.1974, "step": 8647 }, { "epoch": 0.8962586796559229, "grad_norm": 0.6767138838768005, "learning_rate": 1.1184563135853965e-06, "loss": 0.1893, "step": 8648 }, { "epoch": 0.8963623173385843, "grad_norm": 0.860120952129364, "learning_rate": 1.1162437644442291e-06, "loss": 0.2046, "step": 8649 }, { "epoch": 0.8964659550212457, "grad_norm": 0.5672703385353088, "learning_rate": 1.1140333430953577e-06, "loss": 0.1682, "step": 8650 }, { "epoch": 0.8965695927039071, "grad_norm": 0.7149715423583984, "learning_rate": 1.1118250497878535e-06, "loss": 0.1976, "step": 8651 }, { "epoch": 0.8966732303865685, "grad_norm": 0.7060601115226746, "learning_rate": 1.1096188847705357e-06, "loss": 0.2031, "step": 8652 }, { "epoch": 0.89677686806923, "grad_norm": 0.7362112402915955, "learning_rate": 1.1074148482920011e-06, "loss": 0.2101, "step": 8653 }, { "epoch": 0.8968805057518914, "grad_norm": 0.9833632707595825, "learning_rate": 1.105212940600593e-06, "loss": 0.1849, "step": 8654 }, { "epoch": 0.8969841434345528, "grad_norm": 0.7174100279808044, "learning_rate": 1.1030131619444128e-06, "loss": 0.1928, "step": 8655 }, { "epoch": 0.8970877811172142, "grad_norm": 0.559037446975708, "learning_rate": 1.1008155125713382e-06, "loss": 0.165, "step": 8656 }, { "epoch": 0.8971914187998756, "grad_norm": 0.7384814023971558, "learning_rate": 1.098619992728991e-06, "loss": 0.2041, "step": 8657 }, { "epoch": 0.897295056482537, "grad_norm": 0.7134201526641846, "learning_rate": 1.0964266026647596e-06, "loss": 0.2359, "step": 8658 }, { "epoch": 0.8973986941651985, "grad_norm": 0.8288852572441101, "learning_rate": 1.0942353426257934e-06, "loss": 0.2553, "step": 8659 }, { "epoch": 0.8975023318478599, "grad_norm": 0.6036843657493591, "learning_rate": 1.0920462128589992e-06, "loss": 0.168, "step": 8660 }, { "epoch": 0.8976059695305213, "grad_norm": 0.7506285309791565, "learning_rate": 1.0898592136110465e-06, "loss": 0.1689, "step": 8661 }, { "epoch": 0.8977096072131827, "grad_norm": 0.6949458122253418, "learning_rate": 1.0876743451283555e-06, "loss": 0.2195, "step": 8662 }, { "epoch": 0.8978132448958441, "grad_norm": 0.5990825295448303, "learning_rate": 1.0854916076571254e-06, "loss": 0.1691, "step": 8663 }, { "epoch": 0.8979168825785055, "grad_norm": 0.7362884879112244, "learning_rate": 1.0833110014432945e-06, "loss": 0.1726, "step": 8664 }, { "epoch": 0.898020520261167, "grad_norm": 0.5977020263671875, "learning_rate": 1.0811325267325712e-06, "loss": 0.1707, "step": 8665 }, { "epoch": 0.8981241579438284, "grad_norm": 0.6358360052108765, "learning_rate": 1.07895618377043e-06, "loss": 0.1798, "step": 8666 }, { "epoch": 0.8982277956264898, "grad_norm": 0.7800115346908569, "learning_rate": 1.0767819728020924e-06, "loss": 0.2185, "step": 8667 }, { "epoch": 0.8983314333091512, "grad_norm": 0.6631283760070801, "learning_rate": 1.0746098940725447e-06, "loss": 0.1629, "step": 8668 }, { "epoch": 0.8984350709918126, "grad_norm": 0.6898950934410095, "learning_rate": 1.0724399478265312e-06, "loss": 0.1972, "step": 8669 }, { "epoch": 0.898538708674474, "grad_norm": 0.7569713592529297, "learning_rate": 1.0702721343085608e-06, "loss": 0.2137, "step": 8670 }, { "epoch": 0.8986423463571355, "grad_norm": 0.6086417436599731, "learning_rate": 1.0681064537629027e-06, "loss": 0.1691, "step": 8671 }, { "epoch": 0.8987459840397969, "grad_norm": 0.6909456253051758, "learning_rate": 1.065942906433577e-06, "loss": 0.1838, "step": 8672 }, { "epoch": 0.8988496217224583, "grad_norm": 0.6393236517906189, "learning_rate": 1.063781492564373e-06, "loss": 0.1817, "step": 8673 }, { "epoch": 0.8989532594051197, "grad_norm": 0.8174936175346375, "learning_rate": 1.061622212398834e-06, "loss": 0.23, "step": 8674 }, { "epoch": 0.8990568970877811, "grad_norm": 0.6729822754859924, "learning_rate": 1.0594650661802608e-06, "loss": 0.1935, "step": 8675 }, { "epoch": 0.8991605347704426, "grad_norm": 0.6885797381401062, "learning_rate": 1.0573100541517234e-06, "loss": 0.1822, "step": 8676 }, { "epoch": 0.899264172453104, "grad_norm": 0.5672880411148071, "learning_rate": 1.0551571765560388e-06, "loss": 0.1614, "step": 8677 }, { "epoch": 0.8993678101357654, "grad_norm": 0.6624981164932251, "learning_rate": 1.0530064336357948e-06, "loss": 0.1921, "step": 8678 }, { "epoch": 0.8994714478184268, "grad_norm": 0.6525665521621704, "learning_rate": 1.0508578256333335e-06, "loss": 0.1975, "step": 8679 }, { "epoch": 0.8995750855010882, "grad_norm": 0.6125645637512207, "learning_rate": 1.0487113527907522e-06, "loss": 0.1559, "step": 8680 }, { "epoch": 0.8996787231837496, "grad_norm": 0.6657199859619141, "learning_rate": 1.0465670153499196e-06, "loss": 0.1928, "step": 8681 }, { "epoch": 0.8997823608664111, "grad_norm": 0.7960999608039856, "learning_rate": 1.044424813552447e-06, "loss": 0.2421, "step": 8682 }, { "epoch": 0.8998859985490725, "grad_norm": 0.6669391989707947, "learning_rate": 1.0422847476397235e-06, "loss": 0.1811, "step": 8683 }, { "epoch": 0.8999896362317339, "grad_norm": 0.7245315909385681, "learning_rate": 1.0401468178528829e-06, "loss": 0.1971, "step": 8684 }, { "epoch": 0.9000932739143953, "grad_norm": 0.6361146569252014, "learning_rate": 1.0380110244328256e-06, "loss": 0.1836, "step": 8685 }, { "epoch": 0.9001969115970567, "grad_norm": 0.806462287902832, "learning_rate": 1.0358773676202105e-06, "loss": 0.2434, "step": 8686 }, { "epoch": 0.9003005492797181, "grad_norm": 0.6648523807525635, "learning_rate": 1.0337458476554497e-06, "loss": 0.1886, "step": 8687 }, { "epoch": 0.9004041869623796, "grad_norm": 0.6641026735305786, "learning_rate": 1.0316164647787263e-06, "loss": 0.1828, "step": 8688 }, { "epoch": 0.900507824645041, "grad_norm": 0.7108498215675354, "learning_rate": 1.0294892192299688e-06, "loss": 0.19, "step": 8689 }, { "epoch": 0.9006114623277024, "grad_norm": 0.6778939962387085, "learning_rate": 1.0273641112488787e-06, "loss": 0.1905, "step": 8690 }, { "epoch": 0.9007151000103638, "grad_norm": 0.5897559523582458, "learning_rate": 1.0252411410749063e-06, "loss": 0.156, "step": 8691 }, { "epoch": 0.9008187376930252, "grad_norm": 0.8290709257125854, "learning_rate": 1.0231203089472674e-06, "loss": 0.2357, "step": 8692 }, { "epoch": 0.9009223753756866, "grad_norm": 0.7354229092597961, "learning_rate": 1.0210016151049329e-06, "loss": 0.1803, "step": 8693 }, { "epoch": 0.9010260130583481, "grad_norm": 0.8105208873748779, "learning_rate": 1.0188850597866272e-06, "loss": 0.2522, "step": 8694 }, { "epoch": 0.9011296507410095, "grad_norm": 0.5526991486549377, "learning_rate": 1.0167706432308487e-06, "loss": 0.1541, "step": 8695 }, { "epoch": 0.9012332884236709, "grad_norm": 0.7528881430625916, "learning_rate": 1.0146583656758468e-06, "loss": 0.2092, "step": 8696 }, { "epoch": 0.9013369261063323, "grad_norm": 0.7546420693397522, "learning_rate": 1.0125482273596222e-06, "loss": 0.2322, "step": 8697 }, { "epoch": 0.9014405637889937, "grad_norm": 0.685276985168457, "learning_rate": 1.010440228519951e-06, "loss": 0.1759, "step": 8698 }, { "epoch": 0.9015442014716551, "grad_norm": 0.6832001209259033, "learning_rate": 1.0083343693943548e-06, "loss": 0.1734, "step": 8699 }, { "epoch": 0.9016478391543165, "grad_norm": 0.6144412755966187, "learning_rate": 1.006230650220117e-06, "loss": 0.1697, "step": 8700 }, { "epoch": 0.9017514768369779, "grad_norm": 0.8056115508079529, "learning_rate": 1.0041290712342833e-06, "loss": 0.2224, "step": 8701 }, { "epoch": 0.9018551145196393, "grad_norm": 0.832954466342926, "learning_rate": 1.0020296326736557e-06, "loss": 0.2105, "step": 8702 }, { "epoch": 0.9019587522023007, "grad_norm": 0.6306507587432861, "learning_rate": 9.999323347747981e-07, "loss": 0.1892, "step": 8703 }, { "epoch": 0.9020623898849621, "grad_norm": 0.7022785544395447, "learning_rate": 9.97837177774026e-07, "loss": 0.1928, "step": 8704 }, { "epoch": 0.9021660275676235, "grad_norm": 0.6971010565757751, "learning_rate": 9.95744161907426e-07, "loss": 0.1981, "step": 8705 }, { "epoch": 0.902269665250285, "grad_norm": 0.7028632164001465, "learning_rate": 9.936532874108296e-07, "loss": 0.1925, "step": 8706 }, { "epoch": 0.9023733029329464, "grad_norm": 0.7240568995475769, "learning_rate": 9.915645545198304e-07, "loss": 0.2021, "step": 8707 }, { "epoch": 0.9024769406156078, "grad_norm": 0.747568666934967, "learning_rate": 9.894779634697937e-07, "loss": 0.1929, "step": 8708 }, { "epoch": 0.9025805782982692, "grad_norm": 0.6814967393875122, "learning_rate": 9.873935144958224e-07, "loss": 0.1784, "step": 8709 }, { "epoch": 0.9026842159809306, "grad_norm": 0.614301860332489, "learning_rate": 9.853112078327954e-07, "loss": 0.1796, "step": 8710 }, { "epoch": 0.902787853663592, "grad_norm": 0.7195767164230347, "learning_rate": 9.832310437153469e-07, "loss": 0.2036, "step": 8711 }, { "epoch": 0.9028914913462535, "grad_norm": 0.6737602949142456, "learning_rate": 9.811530223778587e-07, "loss": 0.1664, "step": 8712 }, { "epoch": 0.9029951290289149, "grad_norm": 0.6514472961425781, "learning_rate": 9.790771440544856e-07, "loss": 0.1883, "step": 8713 }, { "epoch": 0.9030987667115763, "grad_norm": 0.7326091527938843, "learning_rate": 9.770034089791269e-07, "loss": 0.191, "step": 8714 }, { "epoch": 0.9032024043942377, "grad_norm": 0.7871467471122742, "learning_rate": 9.749318173854515e-07, "loss": 0.2391, "step": 8715 }, { "epoch": 0.9033060420768991, "grad_norm": 0.6911875605583191, "learning_rate": 9.728623695068885e-07, "loss": 0.2012, "step": 8716 }, { "epoch": 0.9034096797595605, "grad_norm": 0.7176652550697327, "learning_rate": 9.707950655766152e-07, "loss": 0.192, "step": 8717 }, { "epoch": 0.903513317442222, "grad_norm": 0.5769132971763611, "learning_rate": 9.687299058275723e-07, "loss": 0.1668, "step": 8718 }, { "epoch": 0.9036169551248834, "grad_norm": 0.7209014296531677, "learning_rate": 9.666668904924558e-07, "loss": 0.209, "step": 8719 }, { "epoch": 0.9037205928075448, "grad_norm": 0.7249887585639954, "learning_rate": 9.646060198037267e-07, "loss": 0.2065, "step": 8720 }, { "epoch": 0.9038242304902062, "grad_norm": 0.7001287937164307, "learning_rate": 9.625472939936031e-07, "loss": 0.208, "step": 8721 }, { "epoch": 0.9039278681728676, "grad_norm": 0.7644567489624023, "learning_rate": 9.604907132940511e-07, "loss": 0.2086, "step": 8722 }, { "epoch": 0.904031505855529, "grad_norm": 0.7459152340888977, "learning_rate": 9.58436277936814e-07, "loss": 0.2362, "step": 8723 }, { "epoch": 0.9041351435381905, "grad_norm": 0.590069055557251, "learning_rate": 9.563839881533754e-07, "loss": 0.167, "step": 8724 }, { "epoch": 0.9042387812208519, "grad_norm": 0.7091349959373474, "learning_rate": 9.543338441749816e-07, "loss": 0.1824, "step": 8725 }, { "epoch": 0.9043424189035133, "grad_norm": 0.7616497874259949, "learning_rate": 9.522858462326456e-07, "loss": 0.2033, "step": 8726 }, { "epoch": 0.9044460565861747, "grad_norm": 0.7694166302680969, "learning_rate": 9.502399945571272e-07, "loss": 0.199, "step": 8727 }, { "epoch": 0.9045496942688361, "grad_norm": 0.6686285138130188, "learning_rate": 9.481962893789575e-07, "loss": 0.1821, "step": 8728 }, { "epoch": 0.9046533319514976, "grad_norm": 0.5675410032272339, "learning_rate": 9.461547309284103e-07, "loss": 0.1629, "step": 8729 }, { "epoch": 0.904756969634159, "grad_norm": 0.7331852912902832, "learning_rate": 9.441153194355301e-07, "loss": 0.2246, "step": 8730 }, { "epoch": 0.9048606073168204, "grad_norm": 0.6197007298469543, "learning_rate": 9.420780551301134e-07, "loss": 0.1641, "step": 8731 }, { "epoch": 0.9049642449994818, "grad_norm": 0.6497044563293457, "learning_rate": 9.40042938241712e-07, "loss": 0.1727, "step": 8732 }, { "epoch": 0.9050678826821432, "grad_norm": 0.7364713549613953, "learning_rate": 9.380099689996447e-07, "loss": 0.2017, "step": 8733 }, { "epoch": 0.9051715203648046, "grad_norm": 0.6770288348197937, "learning_rate": 9.359791476329793e-07, "loss": 0.1878, "step": 8734 }, { "epoch": 0.905275158047466, "grad_norm": 0.679532527923584, "learning_rate": 9.339504743705508e-07, "loss": 0.2033, "step": 8735 }, { "epoch": 0.9053787957301275, "grad_norm": 0.6726140975952148, "learning_rate": 9.319239494409427e-07, "loss": 0.1659, "step": 8736 }, { "epoch": 0.9054824334127889, "grad_norm": 0.6465378403663635, "learning_rate": 9.298995730725058e-07, "loss": 0.1983, "step": 8737 }, { "epoch": 0.9055860710954503, "grad_norm": 0.7215073108673096, "learning_rate": 9.278773454933376e-07, "loss": 0.2055, "step": 8738 }, { "epoch": 0.9056897087781117, "grad_norm": 0.7202661037445068, "learning_rate": 9.258572669313004e-07, "loss": 0.1934, "step": 8739 }, { "epoch": 0.9057933464607731, "grad_norm": 0.7599125504493713, "learning_rate": 9.238393376140142e-07, "loss": 0.1996, "step": 8740 }, { "epoch": 0.9058969841434346, "grad_norm": 0.813451886177063, "learning_rate": 9.218235577688594e-07, "loss": 0.2303, "step": 8741 }, { "epoch": 0.906000621826096, "grad_norm": 0.858730137348175, "learning_rate": 9.198099276229699e-07, "loss": 0.2127, "step": 8742 }, { "epoch": 0.9061042595087574, "grad_norm": 0.6611201167106628, "learning_rate": 9.177984474032353e-07, "loss": 0.1684, "step": 8743 }, { "epoch": 0.9062078971914188, "grad_norm": 0.614651620388031, "learning_rate": 9.157891173363075e-07, "loss": 0.158, "step": 8744 }, { "epoch": 0.9063115348740802, "grad_norm": 0.7337183952331543, "learning_rate": 9.137819376485924e-07, "loss": 0.1859, "step": 8745 }, { "epoch": 0.9064151725567416, "grad_norm": 0.6367558240890503, "learning_rate": 9.11776908566262e-07, "loss": 0.1739, "step": 8746 }, { "epoch": 0.9065188102394031, "grad_norm": 0.7510764598846436, "learning_rate": 9.097740303152336e-07, "loss": 0.2334, "step": 8747 }, { "epoch": 0.9066224479220645, "grad_norm": 0.7323877215385437, "learning_rate": 9.077733031211955e-07, "loss": 0.213, "step": 8748 }, { "epoch": 0.9067260856047259, "grad_norm": 0.6575753092765808, "learning_rate": 9.057747272095807e-07, "loss": 0.1793, "step": 8749 }, { "epoch": 0.9068297232873873, "grad_norm": 0.6251068711280823, "learning_rate": 9.037783028055847e-07, "loss": 0.1631, "step": 8750 }, { "epoch": 0.9069333609700487, "grad_norm": 0.6162066459655762, "learning_rate": 9.017840301341651e-07, "loss": 0.1867, "step": 8751 }, { "epoch": 0.9070369986527101, "grad_norm": 0.6379433274269104, "learning_rate": 8.997919094200314e-07, "loss": 0.1753, "step": 8752 }, { "epoch": 0.9071406363353716, "grad_norm": 0.6354124546051025, "learning_rate": 8.978019408876548e-07, "loss": 0.1817, "step": 8753 }, { "epoch": 0.907244274018033, "grad_norm": 0.6392737627029419, "learning_rate": 8.958141247612606e-07, "loss": 0.1569, "step": 8754 }, { "epoch": 0.9073479117006944, "grad_norm": 0.5502351522445679, "learning_rate": 8.938284612648318e-07, "loss": 0.1339, "step": 8755 }, { "epoch": 0.9074515493833558, "grad_norm": 0.7764959931373596, "learning_rate": 8.918449506221138e-07, "loss": 0.2219, "step": 8756 }, { "epoch": 0.9075551870660172, "grad_norm": 0.7255973815917969, "learning_rate": 8.898635930565991e-07, "loss": 0.1985, "step": 8757 }, { "epoch": 0.9076588247486786, "grad_norm": 0.6193172931671143, "learning_rate": 8.878843887915489e-07, "loss": 0.1817, "step": 8758 }, { "epoch": 0.9077624624313401, "grad_norm": 0.7121764421463013, "learning_rate": 8.859073380499739e-07, "loss": 0.1974, "step": 8759 }, { "epoch": 0.9078661001140015, "grad_norm": 0.7299541234970093, "learning_rate": 8.839324410546468e-07, "loss": 0.2213, "step": 8760 }, { "epoch": 0.9079697377966629, "grad_norm": 0.7306295037269592, "learning_rate": 8.819596980280964e-07, "loss": 0.2208, "step": 8761 }, { "epoch": 0.9080733754793243, "grad_norm": 0.6126157641410828, "learning_rate": 8.799891091926094e-07, "loss": 0.1538, "step": 8762 }, { "epoch": 0.9081770131619857, "grad_norm": 0.769790768623352, "learning_rate": 8.780206747702258e-07, "loss": 0.2081, "step": 8763 }, { "epoch": 0.9082806508446472, "grad_norm": 0.7183113098144531, "learning_rate": 8.760543949827461e-07, "loss": 0.1936, "step": 8764 }, { "epoch": 0.9083842885273086, "grad_norm": 0.6745706796646118, "learning_rate": 8.740902700517284e-07, "loss": 0.1883, "step": 8765 }, { "epoch": 0.90848792620997, "grad_norm": 0.79713374376297, "learning_rate": 8.721283001984871e-07, "loss": 0.2036, "step": 8766 }, { "epoch": 0.9085915638926314, "grad_norm": 0.8247671723365784, "learning_rate": 8.70168485644094e-07, "loss": 0.207, "step": 8767 }, { "epoch": 0.9086952015752928, "grad_norm": 0.6058986783027649, "learning_rate": 8.682108266093792e-07, "loss": 0.1597, "step": 8768 }, { "epoch": 0.9087988392579542, "grad_norm": 0.6964036226272583, "learning_rate": 8.662553233149284e-07, "loss": 0.1965, "step": 8769 }, { "epoch": 0.9089024769406157, "grad_norm": 0.6855599880218506, "learning_rate": 8.643019759810811e-07, "loss": 0.2013, "step": 8770 }, { "epoch": 0.9090061146232771, "grad_norm": 0.7175893187522888, "learning_rate": 8.623507848279433e-07, "loss": 0.18, "step": 8771 }, { "epoch": 0.9091097523059385, "grad_norm": 0.7140921354293823, "learning_rate": 8.604017500753659e-07, "loss": 0.2082, "step": 8772 }, { "epoch": 0.9092133899885999, "grad_norm": 0.7414721250534058, "learning_rate": 8.584548719429664e-07, "loss": 0.2212, "step": 8773 }, { "epoch": 0.9093170276712613, "grad_norm": 0.7165692448616028, "learning_rate": 8.565101506501206e-07, "loss": 0.2057, "step": 8774 }, { "epoch": 0.9094206653539227, "grad_norm": 0.5552352070808411, "learning_rate": 8.545675864159464e-07, "loss": 0.1537, "step": 8775 }, { "epoch": 0.909524303036584, "grad_norm": 0.746362030506134, "learning_rate": 8.526271794593377e-07, "loss": 0.21, "step": 8776 }, { "epoch": 0.9096279407192455, "grad_norm": 0.7297185659408569, "learning_rate": 8.506889299989307e-07, "loss": 0.2053, "step": 8777 }, { "epoch": 0.9097315784019069, "grad_norm": 0.7221811413764954, "learning_rate": 8.487528382531263e-07, "loss": 0.1842, "step": 8778 }, { "epoch": 0.9098352160845683, "grad_norm": 0.7709521651268005, "learning_rate": 8.468189044400832e-07, "loss": 0.2198, "step": 8779 }, { "epoch": 0.9099388537672297, "grad_norm": 0.6700122356414795, "learning_rate": 8.448871287777116e-07, "loss": 0.1561, "step": 8780 }, { "epoch": 0.9100424914498911, "grad_norm": 0.7160411477088928, "learning_rate": 8.429575114836819e-07, "loss": 0.1951, "step": 8781 }, { "epoch": 0.9101461291325526, "grad_norm": 0.6808386445045471, "learning_rate": 8.410300527754178e-07, "loss": 0.171, "step": 8782 }, { "epoch": 0.910249766815214, "grad_norm": 0.730686604976654, "learning_rate": 8.391047528701035e-07, "loss": 0.1973, "step": 8783 }, { "epoch": 0.9103534044978754, "grad_norm": 0.6939843893051147, "learning_rate": 8.37181611984681e-07, "loss": 0.1772, "step": 8784 }, { "epoch": 0.9104570421805368, "grad_norm": 0.6927918195724487, "learning_rate": 8.352606303358435e-07, "loss": 0.2027, "step": 8785 }, { "epoch": 0.9105606798631982, "grad_norm": 0.7357034087181091, "learning_rate": 8.33341808140049e-07, "loss": 0.232, "step": 8786 }, { "epoch": 0.9106643175458596, "grad_norm": 0.592159628868103, "learning_rate": 8.314251456135047e-07, "loss": 0.1668, "step": 8787 }, { "epoch": 0.910767955228521, "grad_norm": 0.6434322595596313, "learning_rate": 8.295106429721733e-07, "loss": 0.1891, "step": 8788 }, { "epoch": 0.9108715929111825, "grad_norm": 0.7842497229576111, "learning_rate": 8.275983004317845e-07, "loss": 0.2292, "step": 8789 }, { "epoch": 0.9109752305938439, "grad_norm": 0.682867169380188, "learning_rate": 8.256881182078125e-07, "loss": 0.2043, "step": 8790 }, { "epoch": 0.9110788682765053, "grad_norm": 0.7165136933326721, "learning_rate": 8.237800965154985e-07, "loss": 0.2061, "step": 8791 }, { "epoch": 0.9111825059591667, "grad_norm": 0.7450674176216125, "learning_rate": 8.218742355698306e-07, "loss": 0.2129, "step": 8792 }, { "epoch": 0.9112861436418281, "grad_norm": 0.7600066065788269, "learning_rate": 8.199705355855637e-07, "loss": 0.2152, "step": 8793 }, { "epoch": 0.9113897813244896, "grad_norm": 0.6899908185005188, "learning_rate": 8.180689967772016e-07, "loss": 0.1928, "step": 8794 }, { "epoch": 0.911493419007151, "grad_norm": 0.7446398138999939, "learning_rate": 8.16169619359004e-07, "loss": 0.1994, "step": 8795 }, { "epoch": 0.9115970566898124, "grad_norm": 0.6524834632873535, "learning_rate": 8.142724035449934e-07, "loss": 0.1942, "step": 8796 }, { "epoch": 0.9117006943724738, "grad_norm": 0.804182231426239, "learning_rate": 8.123773495489406e-07, "loss": 0.1911, "step": 8797 }, { "epoch": 0.9118043320551352, "grad_norm": 0.677555501461029, "learning_rate": 8.104844575843795e-07, "loss": 0.1789, "step": 8798 }, { "epoch": 0.9119079697377966, "grad_norm": 0.6828414797782898, "learning_rate": 8.085937278646039e-07, "loss": 0.1907, "step": 8799 }, { "epoch": 0.9120116074204581, "grad_norm": 0.5415399074554443, "learning_rate": 8.067051606026521e-07, "loss": 0.1591, "step": 8800 }, { "epoch": 0.9121152451031195, "grad_norm": 0.7139943838119507, "learning_rate": 8.048187560113274e-07, "loss": 0.1868, "step": 8801 }, { "epoch": 0.9122188827857809, "grad_norm": 0.8067030906677246, "learning_rate": 8.029345143031819e-07, "loss": 0.2089, "step": 8802 }, { "epoch": 0.9123225204684423, "grad_norm": 0.6702319979667664, "learning_rate": 8.010524356905325e-07, "loss": 0.1661, "step": 8803 }, { "epoch": 0.9124261581511037, "grad_norm": 0.786279559135437, "learning_rate": 7.99172520385454e-07, "loss": 0.2033, "step": 8804 }, { "epoch": 0.9125297958337651, "grad_norm": 0.6434553861618042, "learning_rate": 7.972947685997634e-07, "loss": 0.1687, "step": 8805 }, { "epoch": 0.9126334335164266, "grad_norm": 0.729469358921051, "learning_rate": 7.954191805450518e-07, "loss": 0.1963, "step": 8806 }, { "epoch": 0.912737071199088, "grad_norm": 0.8612606525421143, "learning_rate": 7.935457564326477e-07, "loss": 0.2483, "step": 8807 }, { "epoch": 0.9128407088817494, "grad_norm": 0.6381658911705017, "learning_rate": 7.916744964736511e-07, "loss": 0.1866, "step": 8808 }, { "epoch": 0.9129443465644108, "grad_norm": 0.7000229954719543, "learning_rate": 7.898054008789157e-07, "loss": 0.1798, "step": 8809 }, { "epoch": 0.9130479842470722, "grad_norm": 0.6891587972640991, "learning_rate": 7.87938469859042e-07, "loss": 0.1968, "step": 8810 }, { "epoch": 0.9131516219297336, "grad_norm": 0.7172576785087585, "learning_rate": 7.860737036243993e-07, "loss": 0.1819, "step": 8811 }, { "epoch": 0.9132552596123951, "grad_norm": 0.5555979013442993, "learning_rate": 7.842111023851018e-07, "loss": 0.1501, "step": 8812 }, { "epoch": 0.9133588972950565, "grad_norm": 0.7300063967704773, "learning_rate": 7.823506663510239e-07, "loss": 0.2363, "step": 8813 }, { "epoch": 0.9134625349777179, "grad_norm": 0.645727276802063, "learning_rate": 7.804923957318001e-07, "loss": 0.1643, "step": 8814 }, { "epoch": 0.9135661726603793, "grad_norm": 0.6470401883125305, "learning_rate": 7.786362907368139e-07, "loss": 0.1829, "step": 8815 }, { "epoch": 0.9136698103430407, "grad_norm": 0.6089012622833252, "learning_rate": 7.767823515752116e-07, "loss": 0.1669, "step": 8816 }, { "epoch": 0.9137734480257022, "grad_norm": 0.6825041770935059, "learning_rate": 7.749305784558902e-07, "loss": 0.1641, "step": 8817 }, { "epoch": 0.9138770857083636, "grad_norm": 0.7475860714912415, "learning_rate": 7.730809715875076e-07, "loss": 0.2253, "step": 8818 }, { "epoch": 0.913980723391025, "grad_norm": 0.6520041227340698, "learning_rate": 7.712335311784703e-07, "loss": 0.1733, "step": 8819 }, { "epoch": 0.9140843610736864, "grad_norm": 0.6163454055786133, "learning_rate": 7.693882574369471e-07, "loss": 0.1588, "step": 8820 }, { "epoch": 0.9141879987563478, "grad_norm": 0.7530646324157715, "learning_rate": 7.675451505708609e-07, "loss": 0.2205, "step": 8821 }, { "epoch": 0.9142916364390092, "grad_norm": 0.6425605416297913, "learning_rate": 7.657042107878898e-07, "loss": 0.1807, "step": 8822 }, { "epoch": 0.9143952741216707, "grad_norm": 0.831058919429779, "learning_rate": 7.638654382954657e-07, "loss": 0.2212, "step": 8823 }, { "epoch": 0.9144989118043321, "grad_norm": 0.7559433579444885, "learning_rate": 7.62028833300783e-07, "loss": 0.2217, "step": 8824 }, { "epoch": 0.9146025494869935, "grad_norm": 0.736668050289154, "learning_rate": 7.601943960107871e-07, "loss": 0.226, "step": 8825 }, { "epoch": 0.9147061871696549, "grad_norm": 0.7217003107070923, "learning_rate": 7.583621266321773e-07, "loss": 0.1864, "step": 8826 }, { "epoch": 0.9148098248523163, "grad_norm": 0.6183484196662903, "learning_rate": 7.565320253714082e-07, "loss": 0.1827, "step": 8827 }, { "epoch": 0.9149134625349777, "grad_norm": 0.7267236709594727, "learning_rate": 7.547040924346948e-07, "loss": 0.2064, "step": 8828 }, { "epoch": 0.9150171002176392, "grad_norm": 0.7210378050804138, "learning_rate": 7.528783280280127e-07, "loss": 0.214, "step": 8829 }, { "epoch": 0.9151207379003006, "grad_norm": 0.6646870970726013, "learning_rate": 7.510547323570749e-07, "loss": 0.1879, "step": 8830 }, { "epoch": 0.915224375582962, "grad_norm": 0.7031145691871643, "learning_rate": 7.492333056273704e-07, "loss": 0.1817, "step": 8831 }, { "epoch": 0.9153280132656234, "grad_norm": 0.6843001842498779, "learning_rate": 7.474140480441305e-07, "loss": 0.2295, "step": 8832 }, { "epoch": 0.9154316509482848, "grad_norm": 0.6840508580207825, "learning_rate": 7.455969598123447e-07, "loss": 0.2066, "step": 8833 }, { "epoch": 0.9155352886309462, "grad_norm": 0.7510146498680115, "learning_rate": 7.437820411367669e-07, "loss": 0.2307, "step": 8834 }, { "epoch": 0.9156389263136077, "grad_norm": 0.6502298712730408, "learning_rate": 7.419692922218891e-07, "loss": 0.2034, "step": 8835 }, { "epoch": 0.9157425639962691, "grad_norm": 0.6547967195510864, "learning_rate": 7.401587132719767e-07, "loss": 0.1581, "step": 8836 }, { "epoch": 0.9158462016789305, "grad_norm": 0.6777070164680481, "learning_rate": 7.383503044910423e-07, "loss": 0.1929, "step": 8837 }, { "epoch": 0.9159498393615919, "grad_norm": 0.6916144490242004, "learning_rate": 7.365440660828472e-07, "loss": 0.1757, "step": 8838 }, { "epoch": 0.9160534770442533, "grad_norm": 0.6567953824996948, "learning_rate": 7.347399982509263e-07, "loss": 0.2006, "step": 8839 }, { "epoch": 0.9161571147269147, "grad_norm": 0.6022465825080872, "learning_rate": 7.329381011985504e-07, "loss": 0.1526, "step": 8840 }, { "epoch": 0.9162607524095762, "grad_norm": 0.7050966024398804, "learning_rate": 7.311383751287616e-07, "loss": 0.1838, "step": 8841 }, { "epoch": 0.9163643900922376, "grad_norm": 0.7759564518928528, "learning_rate": 7.293408202443441e-07, "loss": 0.2183, "step": 8842 }, { "epoch": 0.916468027774899, "grad_norm": 0.6298695206642151, "learning_rate": 7.275454367478474e-07, "loss": 0.1592, "step": 8843 }, { "epoch": 0.9165716654575604, "grad_norm": 0.6032199263572693, "learning_rate": 7.257522248415716e-07, "loss": 0.1811, "step": 8844 }, { "epoch": 0.9166753031402218, "grad_norm": 0.6469445824623108, "learning_rate": 7.239611847275707e-07, "loss": 0.1816, "step": 8845 }, { "epoch": 0.9167789408228832, "grad_norm": 0.6629596948623657, "learning_rate": 7.221723166076611e-07, "loss": 0.1962, "step": 8846 }, { "epoch": 0.9168825785055447, "grad_norm": 0.6826691627502441, "learning_rate": 7.203856206834037e-07, "loss": 0.2113, "step": 8847 }, { "epoch": 0.9169862161882061, "grad_norm": 0.80489182472229, "learning_rate": 7.186010971561241e-07, "loss": 0.1662, "step": 8848 }, { "epoch": 0.9170898538708675, "grad_norm": 0.5962115526199341, "learning_rate": 7.168187462269016e-07, "loss": 0.1684, "step": 8849 }, { "epoch": 0.9171934915535289, "grad_norm": 0.6177490949630737, "learning_rate": 7.150385680965666e-07, "loss": 0.1645, "step": 8850 }, { "epoch": 0.9172971292361903, "grad_norm": 0.5791274309158325, "learning_rate": 7.132605629657052e-07, "loss": 0.1671, "step": 8851 }, { "epoch": 0.9174007669188516, "grad_norm": 0.9155428409576416, "learning_rate": 7.114847310346617e-07, "loss": 0.2238, "step": 8852 }, { "epoch": 0.9175044046015131, "grad_norm": 0.694946825504303, "learning_rate": 7.097110725035339e-07, "loss": 0.201, "step": 8853 }, { "epoch": 0.9176080422841745, "grad_norm": 0.5938006639480591, "learning_rate": 7.079395875721751e-07, "loss": 0.1707, "step": 8854 }, { "epoch": 0.9177116799668359, "grad_norm": 0.6835440993309021, "learning_rate": 7.061702764401945e-07, "loss": 0.1877, "step": 8855 }, { "epoch": 0.9178153176494973, "grad_norm": 0.6363372206687927, "learning_rate": 7.04403139306955e-07, "loss": 0.1741, "step": 8856 }, { "epoch": 0.9179189553321587, "grad_norm": 0.634978711605072, "learning_rate": 7.026381763715729e-07, "loss": 0.1802, "step": 8857 }, { "epoch": 0.9180225930148201, "grad_norm": 0.5975295901298523, "learning_rate": 7.008753878329222e-07, "loss": 0.1777, "step": 8858 }, { "epoch": 0.9181262306974816, "grad_norm": 0.7016348242759705, "learning_rate": 6.991147738896331e-07, "loss": 0.2119, "step": 8859 }, { "epoch": 0.918229868380143, "grad_norm": 0.6460996866226196, "learning_rate": 6.973563347400869e-07, "loss": 0.1849, "step": 8860 }, { "epoch": 0.9183335060628044, "grad_norm": 0.6963163614273071, "learning_rate": 6.956000705824228e-07, "loss": 0.1901, "step": 8861 }, { "epoch": 0.9184371437454658, "grad_norm": 0.7260379195213318, "learning_rate": 6.938459816145316e-07, "loss": 0.201, "step": 8862 }, { "epoch": 0.9185407814281272, "grad_norm": 0.5716794729232788, "learning_rate": 6.92094068034066e-07, "loss": 0.1861, "step": 8863 }, { "epoch": 0.9186444191107886, "grad_norm": 0.7157220244407654, "learning_rate": 6.90344330038426e-07, "loss": 0.1921, "step": 8864 }, { "epoch": 0.9187480567934501, "grad_norm": 0.6244716048240662, "learning_rate": 6.885967678247652e-07, "loss": 0.1564, "step": 8865 }, { "epoch": 0.9188516944761115, "grad_norm": 0.6170865893363953, "learning_rate": 6.868513815900057e-07, "loss": 0.1619, "step": 8866 }, { "epoch": 0.9189553321587729, "grad_norm": 0.7579802870750427, "learning_rate": 6.851081715308061e-07, "loss": 0.1691, "step": 8867 }, { "epoch": 0.9190589698414343, "grad_norm": 0.6972897052764893, "learning_rate": 6.833671378435913e-07, "loss": 0.1976, "step": 8868 }, { "epoch": 0.9191626075240957, "grad_norm": 0.8163078427314758, "learning_rate": 6.816282807245444e-07, "loss": 0.2038, "step": 8869 }, { "epoch": 0.9192662452067571, "grad_norm": 0.7421157956123352, "learning_rate": 6.798916003695888e-07, "loss": 0.1971, "step": 8870 }, { "epoch": 0.9193698828894186, "grad_norm": 0.6653786301612854, "learning_rate": 6.781570969744145e-07, "loss": 0.1626, "step": 8871 }, { "epoch": 0.91947352057208, "grad_norm": 0.7527048587799072, "learning_rate": 6.764247707344606e-07, "loss": 0.1949, "step": 8872 }, { "epoch": 0.9195771582547414, "grad_norm": 0.6733422875404358, "learning_rate": 6.746946218449224e-07, "loss": 0.1754, "step": 8873 }, { "epoch": 0.9196807959374028, "grad_norm": 0.7400389909744263, "learning_rate": 6.729666505007571e-07, "loss": 0.202, "step": 8874 }, { "epoch": 0.9197844336200642, "grad_norm": 0.791100263595581, "learning_rate": 6.712408568966644e-07, "loss": 0.2167, "step": 8875 }, { "epoch": 0.9198880713027257, "grad_norm": 0.6577385663986206, "learning_rate": 6.695172412271044e-07, "loss": 0.1858, "step": 8876 }, { "epoch": 0.9199917089853871, "grad_norm": 0.720878005027771, "learning_rate": 6.677958036862908e-07, "loss": 0.2019, "step": 8877 }, { "epoch": 0.9200953466680485, "grad_norm": 0.7390361428260803, "learning_rate": 6.660765444681927e-07, "loss": 0.2165, "step": 8878 }, { "epoch": 0.9201989843507099, "grad_norm": 0.7230072617530823, "learning_rate": 6.643594637665374e-07, "loss": 0.1955, "step": 8879 }, { "epoch": 0.9203026220333713, "grad_norm": 0.6402953863143921, "learning_rate": 6.626445617747968e-07, "loss": 0.1666, "step": 8880 }, { "epoch": 0.9204062597160327, "grad_norm": 0.5999537706375122, "learning_rate": 6.609318386862096e-07, "loss": 0.1816, "step": 8881 }, { "epoch": 0.9205098973986942, "grad_norm": 0.7926232814788818, "learning_rate": 6.592212946937571e-07, "loss": 0.1697, "step": 8882 }, { "epoch": 0.9206135350813556, "grad_norm": 0.6867687702178955, "learning_rate": 6.575129299901828e-07, "loss": 0.1815, "step": 8883 }, { "epoch": 0.920717172764017, "grad_norm": 0.5919355750083923, "learning_rate": 6.558067447679861e-07, "loss": 0.1497, "step": 8884 }, { "epoch": 0.9208208104466784, "grad_norm": 0.7224149703979492, "learning_rate": 6.541027392194111e-07, "loss": 0.1787, "step": 8885 }, { "epoch": 0.9209244481293398, "grad_norm": 0.6357755064964294, "learning_rate": 6.524009135364684e-07, "loss": 0.1604, "step": 8886 }, { "epoch": 0.9210280858120012, "grad_norm": 0.6600928902626038, "learning_rate": 6.507012679109115e-07, "loss": 0.1786, "step": 8887 }, { "epoch": 0.9211317234946627, "grad_norm": 0.6421642303466797, "learning_rate": 6.490038025342604e-07, "loss": 0.1625, "step": 8888 }, { "epoch": 0.9212353611773241, "grad_norm": 0.6637979745864868, "learning_rate": 6.473085175977778e-07, "loss": 0.1932, "step": 8889 }, { "epoch": 0.9213389988599855, "grad_norm": 0.7803764343261719, "learning_rate": 6.456154132924841e-07, "loss": 0.2317, "step": 8890 }, { "epoch": 0.9214426365426469, "grad_norm": 0.5398008227348328, "learning_rate": 6.439244898091623e-07, "loss": 0.1584, "step": 8891 }, { "epoch": 0.9215462742253083, "grad_norm": 0.6695163249969482, "learning_rate": 6.422357473383378e-07, "loss": 0.1805, "step": 8892 }, { "epoch": 0.9216499119079697, "grad_norm": 0.7654611468315125, "learning_rate": 6.40549186070294e-07, "loss": 0.2207, "step": 8893 }, { "epoch": 0.9217535495906312, "grad_norm": 0.5898266434669495, "learning_rate": 6.388648061950786e-07, "loss": 0.1428, "step": 8894 }, { "epoch": 0.9218571872732926, "grad_norm": 0.7077470421791077, "learning_rate": 6.371826079024778e-07, "loss": 0.1897, "step": 8895 }, { "epoch": 0.921960824955954, "grad_norm": 0.7756674885749817, "learning_rate": 6.355025913820401e-07, "loss": 0.2364, "step": 8896 }, { "epoch": 0.9220644626386154, "grad_norm": 0.7032382488250732, "learning_rate": 6.338247568230671e-07, "loss": 0.2081, "step": 8897 }, { "epoch": 0.9221681003212768, "grad_norm": 0.5878440141677856, "learning_rate": 6.321491044146145e-07, "loss": 0.1781, "step": 8898 }, { "epoch": 0.9222717380039382, "grad_norm": 0.6283981800079346, "learning_rate": 6.304756343454954e-07, "loss": 0.1764, "step": 8899 }, { "epoch": 0.9223753756865997, "grad_norm": 0.6028159856796265, "learning_rate": 6.288043468042704e-07, "loss": 0.1623, "step": 8900 }, { "epoch": 0.9224790133692611, "grad_norm": 0.7289518117904663, "learning_rate": 6.271352419792576e-07, "loss": 0.1888, "step": 8901 }, { "epoch": 0.9225826510519225, "grad_norm": 0.7628673315048218, "learning_rate": 6.25468320058531e-07, "loss": 0.2095, "step": 8902 }, { "epoch": 0.9226862887345839, "grad_norm": 0.6608951091766357, "learning_rate": 6.238035812299137e-07, "loss": 0.1904, "step": 8903 }, { "epoch": 0.9227899264172453, "grad_norm": 0.7164463996887207, "learning_rate": 6.22141025680989e-07, "loss": 0.1777, "step": 8904 }, { "epoch": 0.9228935640999067, "grad_norm": 0.7240995168685913, "learning_rate": 6.204806535990893e-07, "loss": 0.1771, "step": 8905 }, { "epoch": 0.9229972017825682, "grad_norm": 0.5761838555335999, "learning_rate": 6.188224651713071e-07, "loss": 0.1562, "step": 8906 }, { "epoch": 0.9231008394652296, "grad_norm": 0.7067749500274658, "learning_rate": 6.171664605844796e-07, "loss": 0.1994, "step": 8907 }, { "epoch": 0.923204477147891, "grad_norm": 0.7407668828964233, "learning_rate": 6.155126400252021e-07, "loss": 0.1908, "step": 8908 }, { "epoch": 0.9233081148305524, "grad_norm": 0.8542502522468567, "learning_rate": 6.138610036798276e-07, "loss": 0.2135, "step": 8909 }, { "epoch": 0.9234117525132138, "grad_norm": 0.5649498105049133, "learning_rate": 6.122115517344585e-07, "loss": 0.1675, "step": 8910 }, { "epoch": 0.9235153901958753, "grad_norm": 0.729465663433075, "learning_rate": 6.105642843749526e-07, "loss": 0.209, "step": 8911 }, { "epoch": 0.9236190278785367, "grad_norm": 0.7549067139625549, "learning_rate": 6.089192017869217e-07, "loss": 0.2098, "step": 8912 }, { "epoch": 0.9237226655611981, "grad_norm": 0.6583209037780762, "learning_rate": 6.072763041557328e-07, "loss": 0.188, "step": 8913 }, { "epoch": 0.9238263032438595, "grad_norm": 0.7007696628570557, "learning_rate": 6.056355916665024e-07, "loss": 0.1745, "step": 8914 }, { "epoch": 0.9239299409265209, "grad_norm": 0.7478169798851013, "learning_rate": 6.039970645041027e-07, "loss": 0.2256, "step": 8915 }, { "epoch": 0.9240335786091823, "grad_norm": 0.6510449051856995, "learning_rate": 6.023607228531659e-07, "loss": 0.1608, "step": 8916 }, { "epoch": 0.9241372162918438, "grad_norm": 0.7374513745307922, "learning_rate": 6.007265668980644e-07, "loss": 0.2035, "step": 8917 }, { "epoch": 0.9242408539745052, "grad_norm": 0.8026915192604065, "learning_rate": 5.990945968229378e-07, "loss": 0.2051, "step": 8918 }, { "epoch": 0.9243444916571666, "grad_norm": 0.6376965045928955, "learning_rate": 5.974648128116744e-07, "loss": 0.162, "step": 8919 }, { "epoch": 0.924448129339828, "grad_norm": 0.7291052341461182, "learning_rate": 5.958372150479141e-07, "loss": 0.1945, "step": 8920 }, { "epoch": 0.9245517670224894, "grad_norm": 0.7327719330787659, "learning_rate": 5.942118037150524e-07, "loss": 0.1804, "step": 8921 }, { "epoch": 0.9246554047051508, "grad_norm": 0.650361180305481, "learning_rate": 5.92588578996236e-07, "loss": 0.1703, "step": 8922 }, { "epoch": 0.9247590423878123, "grad_norm": 0.572733998298645, "learning_rate": 5.909675410743676e-07, "loss": 0.1423, "step": 8923 }, { "epoch": 0.9248626800704737, "grad_norm": 0.7012404799461365, "learning_rate": 5.893486901321077e-07, "loss": 0.1902, "step": 8924 }, { "epoch": 0.9249663177531351, "grad_norm": 0.7306863069534302, "learning_rate": 5.877320263518615e-07, "loss": 0.1976, "step": 8925 }, { "epoch": 0.9250699554357965, "grad_norm": 0.7554032206535339, "learning_rate": 5.861175499157945e-07, "loss": 0.2146, "step": 8926 }, { "epoch": 0.9251735931184579, "grad_norm": 0.5923937559127808, "learning_rate": 5.845052610058232e-07, "loss": 0.1641, "step": 8927 }, { "epoch": 0.9252772308011192, "grad_norm": 0.6130524277687073, "learning_rate": 5.828951598036137e-07, "loss": 0.1876, "step": 8928 }, { "epoch": 0.9253808684837806, "grad_norm": 0.6826179027557373, "learning_rate": 5.812872464905984e-07, "loss": 0.1976, "step": 8929 }, { "epoch": 0.9254845061664421, "grad_norm": 0.7306415438652039, "learning_rate": 5.796815212479434e-07, "loss": 0.2122, "step": 8930 }, { "epoch": 0.9255881438491035, "grad_norm": 0.7335067391395569, "learning_rate": 5.780779842565887e-07, "loss": 0.1904, "step": 8931 }, { "epoch": 0.9256917815317649, "grad_norm": 0.7055259943008423, "learning_rate": 5.764766356972163e-07, "loss": 0.2108, "step": 8932 }, { "epoch": 0.9257954192144263, "grad_norm": 0.812872052192688, "learning_rate": 5.748774757502573e-07, "loss": 0.2261, "step": 8933 }, { "epoch": 0.9258990568970877, "grad_norm": 0.5800080299377441, "learning_rate": 5.732805045959122e-07, "loss": 0.1649, "step": 8934 }, { "epoch": 0.9260026945797492, "grad_norm": 0.7482984066009521, "learning_rate": 5.71685722414117e-07, "loss": 0.2178, "step": 8935 }, { "epoch": 0.9261063322624106, "grad_norm": 0.6158731579780579, "learning_rate": 5.700931293845746e-07, "loss": 0.1653, "step": 8936 }, { "epoch": 0.926209969945072, "grad_norm": 0.6006136536598206, "learning_rate": 5.685027256867326e-07, "loss": 0.158, "step": 8937 }, { "epoch": 0.9263136076277334, "grad_norm": 0.6906245946884155, "learning_rate": 5.669145114997987e-07, "loss": 0.1747, "step": 8938 }, { "epoch": 0.9264172453103948, "grad_norm": 0.7390557527542114, "learning_rate": 5.653284870027276e-07, "loss": 0.2019, "step": 8939 }, { "epoch": 0.9265208829930562, "grad_norm": 0.7188860774040222, "learning_rate": 5.637446523742274e-07, "loss": 0.2142, "step": 8940 }, { "epoch": 0.9266245206757177, "grad_norm": 0.44092702865600586, "learning_rate": 5.621630077927709e-07, "loss": 0.1086, "step": 8941 }, { "epoch": 0.9267281583583791, "grad_norm": 0.5864990949630737, "learning_rate": 5.605835534365644e-07, "loss": 0.1586, "step": 8942 }, { "epoch": 0.9268317960410405, "grad_norm": 0.6084216237068176, "learning_rate": 5.590062894835857e-07, "loss": 0.1601, "step": 8943 }, { "epoch": 0.9269354337237019, "grad_norm": 0.7542744278907776, "learning_rate": 5.574312161115591e-07, "loss": 0.1936, "step": 8944 }, { "epoch": 0.9270390714063633, "grad_norm": 0.6386050581932068, "learning_rate": 5.558583334979584e-07, "loss": 0.1715, "step": 8945 }, { "epoch": 0.9271427090890247, "grad_norm": 0.7549616694450378, "learning_rate": 5.542876418200149e-07, "loss": 0.2269, "step": 8946 }, { "epoch": 0.9272463467716862, "grad_norm": 0.6269014477729797, "learning_rate": 5.527191412547095e-07, "loss": 0.1779, "step": 8947 }, { "epoch": 0.9273499844543476, "grad_norm": 0.6783638000488281, "learning_rate": 5.511528319787784e-07, "loss": 0.1994, "step": 8948 }, { "epoch": 0.927453622137009, "grad_norm": 0.6666401028633118, "learning_rate": 5.49588714168714e-07, "loss": 0.1861, "step": 8949 }, { "epoch": 0.9275572598196704, "grad_norm": 0.6111328601837158, "learning_rate": 5.480267880007572e-07, "loss": 0.1851, "step": 8950 }, { "epoch": 0.9276608975023318, "grad_norm": 0.6061242818832397, "learning_rate": 5.464670536509031e-07, "loss": 0.1717, "step": 8951 }, { "epoch": 0.9277645351849932, "grad_norm": 0.6167566180229187, "learning_rate": 5.449095112949021e-07, "loss": 0.1655, "step": 8952 }, { "epoch": 0.9278681728676547, "grad_norm": 0.6780339479446411, "learning_rate": 5.433541611082493e-07, "loss": 0.2067, "step": 8953 }, { "epoch": 0.9279718105503161, "grad_norm": 0.6359187960624695, "learning_rate": 5.418010032662091e-07, "loss": 0.1675, "step": 8954 }, { "epoch": 0.9280754482329775, "grad_norm": 0.7773694396018982, "learning_rate": 5.402500379437792e-07, "loss": 0.2221, "step": 8955 }, { "epoch": 0.9281790859156389, "grad_norm": 0.5864004492759705, "learning_rate": 5.387012653157264e-07, "loss": 0.163, "step": 8956 }, { "epoch": 0.9282827235983003, "grad_norm": 0.7232480645179749, "learning_rate": 5.371546855565601e-07, "loss": 0.1983, "step": 8957 }, { "epoch": 0.9283863612809617, "grad_norm": 0.8004658818244934, "learning_rate": 5.356102988405498e-07, "loss": 0.2239, "step": 8958 }, { "epoch": 0.9284899989636232, "grad_norm": 0.7216724753379822, "learning_rate": 5.340681053417141e-07, "loss": 0.1964, "step": 8959 }, { "epoch": 0.9285936366462846, "grad_norm": 0.7647455334663391, "learning_rate": 5.325281052338227e-07, "loss": 0.2242, "step": 8960 }, { "epoch": 0.928697274328946, "grad_norm": 0.5940206050872803, "learning_rate": 5.309902986904015e-07, "loss": 0.1849, "step": 8961 }, { "epoch": 0.9288009120116074, "grad_norm": 0.6256440281867981, "learning_rate": 5.294546858847271e-07, "loss": 0.17, "step": 8962 }, { "epoch": 0.9289045496942688, "grad_norm": 0.7074966430664062, "learning_rate": 5.279212669898326e-07, "loss": 0.197, "step": 8963 }, { "epoch": 0.9290081873769303, "grad_norm": 0.6957035660743713, "learning_rate": 5.263900421785017e-07, "loss": 0.2053, "step": 8964 }, { "epoch": 0.9291118250595917, "grad_norm": 0.5913291573524475, "learning_rate": 5.248610116232633e-07, "loss": 0.1513, "step": 8965 }, { "epoch": 0.9292154627422531, "grad_norm": 0.6100219488143921, "learning_rate": 5.233341754964172e-07, "loss": 0.1573, "step": 8966 }, { "epoch": 0.9293191004249145, "grad_norm": 0.7508609294891357, "learning_rate": 5.218095339699947e-07, "loss": 0.2029, "step": 8967 }, { "epoch": 0.9294227381075759, "grad_norm": 0.57771235704422, "learning_rate": 5.202870872157939e-07, "loss": 0.1745, "step": 8968 }, { "epoch": 0.9295263757902373, "grad_norm": 0.829882800579071, "learning_rate": 5.187668354053666e-07, "loss": 0.2003, "step": 8969 }, { "epoch": 0.9296300134728988, "grad_norm": 0.6365578770637512, "learning_rate": 5.172487787100066e-07, "loss": 0.1669, "step": 8970 }, { "epoch": 0.9297336511555602, "grad_norm": 0.6566683053970337, "learning_rate": 5.157329173007663e-07, "loss": 0.1639, "step": 8971 }, { "epoch": 0.9298372888382216, "grad_norm": 0.7249619364738464, "learning_rate": 5.142192513484534e-07, "loss": 0.1855, "step": 8972 }, { "epoch": 0.929940926520883, "grad_norm": 0.7078553438186646, "learning_rate": 5.127077810236225e-07, "loss": 0.1881, "step": 8973 }, { "epoch": 0.9300445642035444, "grad_norm": 0.5799292325973511, "learning_rate": 5.111985064965864e-07, "loss": 0.1479, "step": 8974 }, { "epoch": 0.9301482018862058, "grad_norm": 0.6730943918228149, "learning_rate": 5.096914279374066e-07, "loss": 0.1947, "step": 8975 }, { "epoch": 0.9302518395688673, "grad_norm": 0.5914413928985596, "learning_rate": 5.081865455158985e-07, "loss": 0.1675, "step": 8976 }, { "epoch": 0.9303554772515287, "grad_norm": 0.6787246465682983, "learning_rate": 5.06683859401631e-07, "loss": 0.204, "step": 8977 }, { "epoch": 0.9304591149341901, "grad_norm": 0.6746557950973511, "learning_rate": 5.051833697639197e-07, "loss": 0.1957, "step": 8978 }, { "epoch": 0.9305627526168515, "grad_norm": 0.6631671786308289, "learning_rate": 5.036850767718448e-07, "loss": 0.1639, "step": 8979 }, { "epoch": 0.9306663902995129, "grad_norm": 0.6939098238945007, "learning_rate": 5.021889805942248e-07, "loss": 0.185, "step": 8980 }, { "epoch": 0.9307700279821743, "grad_norm": 0.80235755443573, "learning_rate": 5.006950813996403e-07, "loss": 0.219, "step": 8981 }, { "epoch": 0.9308736656648358, "grad_norm": 0.6713320016860962, "learning_rate": 4.992033793564255e-07, "loss": 0.1947, "step": 8982 }, { "epoch": 0.9309773033474972, "grad_norm": 0.6114919781684875, "learning_rate": 4.977138746326571e-07, "loss": 0.1707, "step": 8983 }, { "epoch": 0.9310809410301586, "grad_norm": 0.6712514162063599, "learning_rate": 4.962265673961742e-07, "loss": 0.1758, "step": 8984 }, { "epoch": 0.93118457871282, "grad_norm": 0.6477010250091553, "learning_rate": 4.947414578145604e-07, "loss": 0.1921, "step": 8985 }, { "epoch": 0.9312882163954814, "grad_norm": 0.6463591456413269, "learning_rate": 4.932585460551576e-07, "loss": 0.1665, "step": 8986 }, { "epoch": 0.9313918540781428, "grad_norm": 0.6120611429214478, "learning_rate": 4.917778322850586e-07, "loss": 0.1445, "step": 8987 }, { "epoch": 0.9314954917608043, "grad_norm": 0.6259697675704956, "learning_rate": 4.902993166711056e-07, "loss": 0.1939, "step": 8988 }, { "epoch": 0.9315991294434657, "grad_norm": 0.6382227540016174, "learning_rate": 4.888229993799009e-07, "loss": 0.1686, "step": 8989 }, { "epoch": 0.9317027671261271, "grad_norm": 0.688135027885437, "learning_rate": 4.873488805777893e-07, "loss": 0.1923, "step": 8990 }, { "epoch": 0.9318064048087885, "grad_norm": 0.7672633528709412, "learning_rate": 4.858769604308689e-07, "loss": 0.2028, "step": 8991 }, { "epoch": 0.9319100424914499, "grad_norm": 0.735131561756134, "learning_rate": 4.844072391050003e-07, "loss": 0.2298, "step": 8992 }, { "epoch": 0.9320136801741113, "grad_norm": 0.6174731254577637, "learning_rate": 4.829397167657867e-07, "loss": 0.1749, "step": 8993 }, { "epoch": 0.9321173178567728, "grad_norm": 0.6736615300178528, "learning_rate": 4.814743935785848e-07, "loss": 0.1787, "step": 8994 }, { "epoch": 0.9322209555394342, "grad_norm": 0.6681990623474121, "learning_rate": 4.800112697085068e-07, "loss": 0.1808, "step": 8995 }, { "epoch": 0.9323245932220956, "grad_norm": 0.7692642211914062, "learning_rate": 4.785503453204143e-07, "loss": 0.1991, "step": 8996 }, { "epoch": 0.932428230904757, "grad_norm": 0.5919527411460876, "learning_rate": 4.770916205789222e-07, "loss": 0.1672, "step": 8997 }, { "epoch": 0.9325318685874184, "grad_norm": 0.621511697769165, "learning_rate": 4.756350956483968e-07, "loss": 0.1641, "step": 8998 }, { "epoch": 0.9326355062700799, "grad_norm": 0.6952032446861267, "learning_rate": 4.74180770692958e-07, "loss": 0.1866, "step": 8999 }, { "epoch": 0.9327391439527413, "grad_norm": 0.576474130153656, "learning_rate": 4.727286458764768e-07, "loss": 0.1555, "step": 9000 }, { "epoch": 0.9328427816354027, "grad_norm": 0.8230246901512146, "learning_rate": 4.712787213625758e-07, "loss": 0.2268, "step": 9001 }, { "epoch": 0.9329464193180641, "grad_norm": 0.630755603313446, "learning_rate": 4.698309973146309e-07, "loss": 0.158, "step": 9002 }, { "epoch": 0.9330500570007255, "grad_norm": 0.7077135443687439, "learning_rate": 4.683854738957694e-07, "loss": 0.2077, "step": 9003 }, { "epoch": 0.9331536946833868, "grad_norm": 0.7478125095367432, "learning_rate": 4.669421512688699e-07, "loss": 0.2247, "step": 9004 }, { "epoch": 0.9332573323660482, "grad_norm": 0.6382695436477661, "learning_rate": 4.655010295965623e-07, "loss": 0.1805, "step": 9005 }, { "epoch": 0.9333609700487097, "grad_norm": 0.6977059245109558, "learning_rate": 4.6406210904123226e-07, "loss": 0.1851, "step": 9006 }, { "epoch": 0.9334646077313711, "grad_norm": 0.7372855544090271, "learning_rate": 4.626253897650146e-07, "loss": 0.1846, "step": 9007 }, { "epoch": 0.9335682454140325, "grad_norm": 0.5672875046730042, "learning_rate": 4.611908719297997e-07, "loss": 0.1489, "step": 9008 }, { "epoch": 0.9336718830966939, "grad_norm": 0.7301895022392273, "learning_rate": 4.597585556972206e-07, "loss": 0.206, "step": 9009 }, { "epoch": 0.9337755207793553, "grad_norm": 0.7008638978004456, "learning_rate": 4.583284412286726e-07, "loss": 0.2102, "step": 9010 }, { "epoch": 0.9338791584620167, "grad_norm": 0.5648622512817383, "learning_rate": 4.5690052868529564e-07, "loss": 0.1679, "step": 9011 }, { "epoch": 0.9339827961446782, "grad_norm": 0.6398313045501709, "learning_rate": 4.5547481822799e-07, "loss": 0.1742, "step": 9012 }, { "epoch": 0.9340864338273396, "grad_norm": 0.6302163600921631, "learning_rate": 4.54051310017396e-07, "loss": 0.1678, "step": 9013 }, { "epoch": 0.934190071510001, "grad_norm": 0.7005049586296082, "learning_rate": 4.5263000421391866e-07, "loss": 0.2095, "step": 9014 }, { "epoch": 0.9342937091926624, "grad_norm": 0.7499439716339111, "learning_rate": 4.5121090097770547e-07, "loss": 0.2092, "step": 9015 }, { "epoch": 0.9343973468753238, "grad_norm": 0.7579978108406067, "learning_rate": 4.497940004686552e-07, "loss": 0.2138, "step": 9016 }, { "epoch": 0.9345009845579852, "grad_norm": 0.6810645461082458, "learning_rate": 4.4837930284642896e-07, "loss": 0.1777, "step": 9017 }, { "epoch": 0.9346046222406467, "grad_norm": 0.6928239464759827, "learning_rate": 4.4696680827042813e-07, "loss": 0.1789, "step": 9018 }, { "epoch": 0.9347082599233081, "grad_norm": 0.7871753573417664, "learning_rate": 4.4555651689981214e-07, "loss": 0.2459, "step": 9019 }, { "epoch": 0.9348118976059695, "grad_norm": 0.6925694942474365, "learning_rate": 4.441484288934872e-07, "loss": 0.1903, "step": 9020 }, { "epoch": 0.9349155352886309, "grad_norm": 0.6814939975738525, "learning_rate": 4.427425444101219e-07, "loss": 0.1729, "step": 9021 }, { "epoch": 0.9350191729712923, "grad_norm": 0.7239201068878174, "learning_rate": 4.413388636081206e-07, "loss": 0.1968, "step": 9022 }, { "epoch": 0.9351228106539538, "grad_norm": 0.6216186285018921, "learning_rate": 4.3993738664565245e-07, "loss": 0.1728, "step": 9023 }, { "epoch": 0.9352264483366152, "grad_norm": 0.7584130764007568, "learning_rate": 4.3853811368063326e-07, "loss": 0.2498, "step": 9024 }, { "epoch": 0.9353300860192766, "grad_norm": 0.671943187713623, "learning_rate": 4.3714104487073027e-07, "loss": 0.1835, "step": 9025 }, { "epoch": 0.935433723701938, "grad_norm": 0.7655714154243469, "learning_rate": 4.3574618037336427e-07, "loss": 0.2028, "step": 9026 }, { "epoch": 0.9355373613845994, "grad_norm": 0.7326068878173828, "learning_rate": 4.34353520345705e-07, "loss": 0.2027, "step": 9027 }, { "epoch": 0.9356409990672608, "grad_norm": 0.6733595132827759, "learning_rate": 4.3296306494467587e-07, "loss": 0.1714, "step": 9028 }, { "epoch": 0.9357446367499223, "grad_norm": 0.654135525226593, "learning_rate": 4.3157481432694936e-07, "loss": 0.1662, "step": 9029 }, { "epoch": 0.9358482744325837, "grad_norm": 0.7005829811096191, "learning_rate": 4.3018876864895365e-07, "loss": 0.2, "step": 9030 }, { "epoch": 0.9359519121152451, "grad_norm": 0.7137699127197266, "learning_rate": 4.28804928066866e-07, "loss": 0.2008, "step": 9031 }, { "epoch": 0.9360555497979065, "grad_norm": 0.6908015012741089, "learning_rate": 4.274232927366151e-07, "loss": 0.2008, "step": 9032 }, { "epoch": 0.9361591874805679, "grad_norm": 0.7445901036262512, "learning_rate": 4.26043862813883e-07, "loss": 0.2115, "step": 9033 }, { "epoch": 0.9362628251632293, "grad_norm": 0.6784663796424866, "learning_rate": 4.2466663845409874e-07, "loss": 0.2025, "step": 9034 }, { "epoch": 0.9363664628458908, "grad_norm": 0.8212520480155945, "learning_rate": 4.2329161981244703e-07, "loss": 0.2295, "step": 9035 }, { "epoch": 0.9364701005285522, "grad_norm": 0.665834903717041, "learning_rate": 4.2191880704386177e-07, "loss": 0.1906, "step": 9036 }, { "epoch": 0.9365737382112136, "grad_norm": 0.5903333425521851, "learning_rate": 4.205482003030326e-07, "loss": 0.1417, "step": 9037 }, { "epoch": 0.936677375893875, "grad_norm": 0.7341218590736389, "learning_rate": 4.191797997443936e-07, "loss": 0.1764, "step": 9038 }, { "epoch": 0.9367810135765364, "grad_norm": 0.6420240998268127, "learning_rate": 4.178136055221371e-07, "loss": 0.1736, "step": 9039 }, { "epoch": 0.9368846512591978, "grad_norm": 0.6921041011810303, "learning_rate": 4.1644961779020444e-07, "loss": 0.1772, "step": 9040 }, { "epoch": 0.9369882889418593, "grad_norm": 0.7865322232246399, "learning_rate": 4.1508783670228145e-07, "loss": 0.2269, "step": 9041 }, { "epoch": 0.9370919266245207, "grad_norm": 0.6674950122833252, "learning_rate": 4.137282624118188e-07, "loss": 0.2126, "step": 9042 }, { "epoch": 0.9371955643071821, "grad_norm": 0.6780579686164856, "learning_rate": 4.123708950720073e-07, "loss": 0.1804, "step": 9043 }, { "epoch": 0.9372992019898435, "grad_norm": 0.7245308756828308, "learning_rate": 4.110157348357935e-07, "loss": 0.1784, "step": 9044 }, { "epoch": 0.9374028396725049, "grad_norm": 0.8370404839515686, "learning_rate": 4.0966278185587296e-07, "loss": 0.2782, "step": 9045 }, { "epoch": 0.9375064773551663, "grad_norm": 0.5746482014656067, "learning_rate": 4.0831203628469927e-07, "loss": 0.1499, "step": 9046 }, { "epoch": 0.9376101150378278, "grad_norm": 0.5932947397232056, "learning_rate": 4.069634982744708e-07, "loss": 0.1518, "step": 9047 }, { "epoch": 0.9377137527204892, "grad_norm": 0.6348161697387695, "learning_rate": 4.056171679771326e-07, "loss": 0.161, "step": 9048 }, { "epoch": 0.9378173904031506, "grad_norm": 0.6963269114494324, "learning_rate": 4.0427304554439664e-07, "loss": 0.1708, "step": 9049 }, { "epoch": 0.937921028085812, "grad_norm": 0.7186452746391296, "learning_rate": 4.029311311277084e-07, "loss": 0.1884, "step": 9050 }, { "epoch": 0.9380246657684734, "grad_norm": 0.7195757627487183, "learning_rate": 4.01591424878276e-07, "loss": 0.1871, "step": 9051 }, { "epoch": 0.9381283034511348, "grad_norm": 0.7170066237449646, "learning_rate": 4.0025392694705843e-07, "loss": 0.1975, "step": 9052 }, { "epoch": 0.9382319411337963, "grad_norm": 0.7198824286460876, "learning_rate": 3.9891863748475754e-07, "loss": 0.188, "step": 9053 }, { "epoch": 0.9383355788164577, "grad_norm": 0.7737149000167847, "learning_rate": 3.9758555664183517e-07, "loss": 0.2208, "step": 9054 }, { "epoch": 0.9384392164991191, "grad_norm": 0.7000073790550232, "learning_rate": 3.9625468456850005e-07, "loss": 0.1748, "step": 9055 }, { "epoch": 0.9385428541817805, "grad_norm": 0.678845226764679, "learning_rate": 3.9492602141470995e-07, "loss": 0.1802, "step": 9056 }, { "epoch": 0.9386464918644419, "grad_norm": 0.6686621904373169, "learning_rate": 3.935995673301829e-07, "loss": 0.1957, "step": 9057 }, { "epoch": 0.9387501295471034, "grad_norm": 0.6434568166732788, "learning_rate": 3.9227532246437495e-07, "loss": 0.1681, "step": 9058 }, { "epoch": 0.9388537672297648, "grad_norm": 0.8343382477760315, "learning_rate": 3.9095328696650446e-07, "loss": 0.2164, "step": 9059 }, { "epoch": 0.9389574049124262, "grad_norm": 0.7999582886695862, "learning_rate": 3.8963346098553457e-07, "loss": 0.2098, "step": 9060 }, { "epoch": 0.9390610425950876, "grad_norm": 0.7141200304031372, "learning_rate": 3.883158446701796e-07, "loss": 0.1948, "step": 9061 }, { "epoch": 0.939164680277749, "grad_norm": 0.7116166353225708, "learning_rate": 3.8700043816890966e-07, "loss": 0.1903, "step": 9062 }, { "epoch": 0.9392683179604104, "grad_norm": 0.6133732199668884, "learning_rate": 3.8568724162994174e-07, "loss": 0.1553, "step": 9063 }, { "epoch": 0.9393719556430719, "grad_norm": 0.6451056003570557, "learning_rate": 3.843762552012442e-07, "loss": 0.1747, "step": 9064 }, { "epoch": 0.9394755933257333, "grad_norm": 0.5624836087226868, "learning_rate": 3.8306747903053666e-07, "loss": 0.1739, "step": 9065 }, { "epoch": 0.9395792310083947, "grad_norm": 0.6717318296432495, "learning_rate": 3.8176091326528995e-07, "loss": 0.1789, "step": 9066 }, { "epoch": 0.9396828686910561, "grad_norm": 0.5935096740722656, "learning_rate": 3.804565580527286e-07, "loss": 0.1773, "step": 9067 }, { "epoch": 0.9397865063737175, "grad_norm": 0.6754716038703918, "learning_rate": 3.7915441353982174e-07, "loss": 0.1854, "step": 9068 }, { "epoch": 0.9398901440563789, "grad_norm": 0.6990920305252075, "learning_rate": 3.7785447987329415e-07, "loss": 0.1883, "step": 9069 }, { "epoch": 0.9399937817390404, "grad_norm": 0.7798400521278381, "learning_rate": 3.765567571996198e-07, "loss": 0.2196, "step": 9070 }, { "epoch": 0.9400974194217018, "grad_norm": 0.7619076371192932, "learning_rate": 3.752612456650262e-07, "loss": 0.2451, "step": 9071 }, { "epoch": 0.9402010571043632, "grad_norm": 0.6839298009872437, "learning_rate": 3.7396794541548985e-07, "loss": 0.1684, "step": 9072 }, { "epoch": 0.9403046947870246, "grad_norm": 0.7317507863044739, "learning_rate": 3.72676856596732e-07, "loss": 0.188, "step": 9073 }, { "epoch": 0.940408332469686, "grad_norm": 0.7622932195663452, "learning_rate": 3.713879793542385e-07, "loss": 0.2338, "step": 9074 }, { "epoch": 0.9405119701523474, "grad_norm": 0.5636216998100281, "learning_rate": 3.70101313833231e-07, "loss": 0.1524, "step": 9075 }, { "epoch": 0.9406156078350089, "grad_norm": 0.6244856715202332, "learning_rate": 3.688168601786912e-07, "loss": 0.1589, "step": 9076 }, { "epoch": 0.9407192455176703, "grad_norm": 0.7944729328155518, "learning_rate": 3.6753461853535455e-07, "loss": 0.1947, "step": 9077 }, { "epoch": 0.9408228832003317, "grad_norm": 0.7152156829833984, "learning_rate": 3.662545890476965e-07, "loss": 0.205, "step": 9078 }, { "epoch": 0.9409265208829931, "grad_norm": 0.7420468330383301, "learning_rate": 3.6497677185995064e-07, "loss": 0.1886, "step": 9079 }, { "epoch": 0.9410301585656544, "grad_norm": 0.6091653108596802, "learning_rate": 3.6370116711609725e-07, "loss": 0.1668, "step": 9080 }, { "epoch": 0.9411337962483158, "grad_norm": 0.7298439741134644, "learning_rate": 3.6242777495987036e-07, "loss": 0.1966, "step": 9081 }, { "epoch": 0.9412374339309773, "grad_norm": 0.6698196530342102, "learning_rate": 3.6115659553475733e-07, "loss": 0.1659, "step": 9082 }, { "epoch": 0.9413410716136387, "grad_norm": 0.7139626741409302, "learning_rate": 3.598876289839881e-07, "loss": 0.1842, "step": 9083 }, { "epoch": 0.9414447092963001, "grad_norm": 0.6268137097358704, "learning_rate": 3.5862087545055267e-07, "loss": 0.1794, "step": 9084 }, { "epoch": 0.9415483469789615, "grad_norm": 0.7238689064979553, "learning_rate": 3.5735633507717917e-07, "loss": 0.1939, "step": 9085 }, { "epoch": 0.9416519846616229, "grad_norm": 0.6313364505767822, "learning_rate": 3.560940080063602e-07, "loss": 0.1668, "step": 9086 }, { "epoch": 0.9417556223442843, "grad_norm": 0.7056628465652466, "learning_rate": 3.548338943803331e-07, "loss": 0.1791, "step": 9087 }, { "epoch": 0.9418592600269458, "grad_norm": 0.6397473216056824, "learning_rate": 3.5357599434108216e-07, "loss": 0.1679, "step": 9088 }, { "epoch": 0.9419628977096072, "grad_norm": 0.6657278537750244, "learning_rate": 3.523203080303472e-07, "loss": 0.1768, "step": 9089 }, { "epoch": 0.9420665353922686, "grad_norm": 0.7117383480072021, "learning_rate": 3.510668355896196e-07, "loss": 0.1996, "step": 9090 }, { "epoch": 0.94217017307493, "grad_norm": 0.6991865038871765, "learning_rate": 3.4981557716013305e-07, "loss": 0.1769, "step": 9091 }, { "epoch": 0.9422738107575914, "grad_norm": 0.658190131187439, "learning_rate": 3.4856653288288353e-07, "loss": 0.1916, "step": 9092 }, { "epoch": 0.9423774484402528, "grad_norm": 0.7647689580917358, "learning_rate": 3.473197028986053e-07, "loss": 0.1954, "step": 9093 }, { "epoch": 0.9424810861229143, "grad_norm": 0.8443564176559448, "learning_rate": 3.460750873477925e-07, "loss": 0.1755, "step": 9094 }, { "epoch": 0.9425847238055757, "grad_norm": 0.6932904720306396, "learning_rate": 3.4483268637068634e-07, "loss": 0.1827, "step": 9095 }, { "epoch": 0.9426883614882371, "grad_norm": 0.7962846755981445, "learning_rate": 3.435925001072815e-07, "loss": 0.2141, "step": 9096 }, { "epoch": 0.9427919991708985, "grad_norm": 0.6723485589027405, "learning_rate": 3.423545286973151e-07, "loss": 0.1673, "step": 9097 }, { "epoch": 0.9428956368535599, "grad_norm": 0.6464245319366455, "learning_rate": 3.4111877228028e-07, "loss": 0.1705, "step": 9098 }, { "epoch": 0.9429992745362213, "grad_norm": 0.6785967350006104, "learning_rate": 3.398852309954248e-07, "loss": 0.1905, "step": 9099 }, { "epoch": 0.9431029122188828, "grad_norm": 0.680761456489563, "learning_rate": 3.3865390498173835e-07, "loss": 0.1936, "step": 9100 }, { "epoch": 0.9432065499015442, "grad_norm": 0.7931050658226013, "learning_rate": 3.374247943779629e-07, "loss": 0.1745, "step": 9101 }, { "epoch": 0.9433101875842056, "grad_norm": 0.722449004650116, "learning_rate": 3.3619789932259896e-07, "loss": 0.1806, "step": 9102 }, { "epoch": 0.943413825266867, "grad_norm": 0.7283592820167542, "learning_rate": 3.349732199538891e-07, "loss": 0.1982, "step": 9103 }, { "epoch": 0.9435174629495284, "grad_norm": 0.5951731204986572, "learning_rate": 3.337507564098252e-07, "loss": 0.1577, "step": 9104 }, { "epoch": 0.9436211006321898, "grad_norm": 0.7373857498168945, "learning_rate": 3.3253050882815276e-07, "loss": 0.1905, "step": 9105 }, { "epoch": 0.9437247383148513, "grad_norm": 0.665081262588501, "learning_rate": 3.313124773463683e-07, "loss": 0.202, "step": 9106 }, { "epoch": 0.9438283759975127, "grad_norm": 0.6457472443580627, "learning_rate": 3.3009666210171985e-07, "loss": 0.1814, "step": 9107 }, { "epoch": 0.9439320136801741, "grad_norm": 0.740166425704956, "learning_rate": 3.288830632312023e-07, "loss": 0.2053, "step": 9108 }, { "epoch": 0.9440356513628355, "grad_norm": 0.6799501776695251, "learning_rate": 3.276716808715619e-07, "loss": 0.223, "step": 9109 }, { "epoch": 0.9441392890454969, "grad_norm": 0.7502972483634949, "learning_rate": 3.2646251515929597e-07, "loss": 0.205, "step": 9110 }, { "epoch": 0.9442429267281583, "grad_norm": 0.7048971652984619, "learning_rate": 3.252555662306489e-07, "loss": 0.1915, "step": 9111 }, { "epoch": 0.9443465644108198, "grad_norm": 0.6879097819328308, "learning_rate": 3.240508342216209e-07, "loss": 0.194, "step": 9112 }, { "epoch": 0.9444502020934812, "grad_norm": 0.6178298592567444, "learning_rate": 3.2284831926795877e-07, "loss": 0.1754, "step": 9113 }, { "epoch": 0.9445538397761426, "grad_norm": 0.7785412669181824, "learning_rate": 3.2164802150515874e-07, "loss": 0.2021, "step": 9114 }, { "epoch": 0.944657477458804, "grad_norm": 0.7239789366722107, "learning_rate": 3.204499410684658e-07, "loss": 0.1903, "step": 9115 }, { "epoch": 0.9447611151414654, "grad_norm": 0.7297715544700623, "learning_rate": 3.1925407809288545e-07, "loss": 0.2184, "step": 9116 }, { "epoch": 0.9448647528241269, "grad_norm": 0.8004382848739624, "learning_rate": 3.180604327131609e-07, "loss": 0.2201, "step": 9117 }, { "epoch": 0.9449683905067883, "grad_norm": 0.6330424547195435, "learning_rate": 3.1686900506378904e-07, "loss": 0.1734, "step": 9118 }, { "epoch": 0.9450720281894497, "grad_norm": 0.7204705476760864, "learning_rate": 3.1567979527902025e-07, "loss": 0.2118, "step": 9119 }, { "epoch": 0.9451756658721111, "grad_norm": 0.6637647747993469, "learning_rate": 3.144928034928496e-07, "loss": 0.1751, "step": 9120 }, { "epoch": 0.9452793035547725, "grad_norm": 0.6790656447410583, "learning_rate": 3.133080298390323e-07, "loss": 0.1743, "step": 9121 }, { "epoch": 0.9453829412374339, "grad_norm": 0.6014872193336487, "learning_rate": 3.121254744510616e-07, "loss": 0.172, "step": 9122 }, { "epoch": 0.9454865789200954, "grad_norm": 0.8160472512245178, "learning_rate": 3.1094513746218634e-07, "loss": 0.2306, "step": 9123 }, { "epoch": 0.9455902166027568, "grad_norm": 0.7487515807151794, "learning_rate": 3.0976701900540474e-07, "loss": 0.216, "step": 9124 }, { "epoch": 0.9456938542854182, "grad_norm": 0.7319645285606384, "learning_rate": 3.085911192134683e-07, "loss": 0.1976, "step": 9125 }, { "epoch": 0.9457974919680796, "grad_norm": 0.6307010054588318, "learning_rate": 3.0741743821887104e-07, "loss": 0.1757, "step": 9126 }, { "epoch": 0.945901129650741, "grad_norm": 0.5927242636680603, "learning_rate": 3.062459761538672e-07, "loss": 0.168, "step": 9127 }, { "epoch": 0.9460047673334024, "grad_norm": 0.9647350311279297, "learning_rate": 3.0507673315045114e-07, "loss": 0.2184, "step": 9128 }, { "epoch": 0.9461084050160639, "grad_norm": 0.8228685855865479, "learning_rate": 3.039097093403731e-07, "loss": 0.2137, "step": 9129 }, { "epoch": 0.9462120426987253, "grad_norm": 0.7404673099517822, "learning_rate": 3.0274490485512785e-07, "loss": 0.2161, "step": 9130 }, { "epoch": 0.9463156803813867, "grad_norm": 0.6658329963684082, "learning_rate": 3.015823198259682e-07, "loss": 0.1994, "step": 9131 }, { "epoch": 0.9464193180640481, "grad_norm": 0.6319071054458618, "learning_rate": 3.0042195438389156e-07, "loss": 0.1669, "step": 9132 }, { "epoch": 0.9465229557467095, "grad_norm": 0.8151889443397522, "learning_rate": 2.9926380865964664e-07, "loss": 0.2278, "step": 9133 }, { "epoch": 0.9466265934293709, "grad_norm": 0.652004063129425, "learning_rate": 2.9810788278372915e-07, "loss": 0.1911, "step": 9134 }, { "epoch": 0.9467302311120324, "grad_norm": 0.6238911747932434, "learning_rate": 2.969541768863882e-07, "loss": 0.2012, "step": 9135 }, { "epoch": 0.9468338687946938, "grad_norm": 0.6931841969490051, "learning_rate": 2.9580269109762193e-07, "loss": 0.1979, "step": 9136 }, { "epoch": 0.9469375064773552, "grad_norm": 0.7002904415130615, "learning_rate": 2.9465342554717777e-07, "loss": 0.1809, "step": 9137 }, { "epoch": 0.9470411441600166, "grad_norm": 0.706741988658905, "learning_rate": 2.9350638036455216e-07, "loss": 0.1801, "step": 9138 }, { "epoch": 0.947144781842678, "grad_norm": 0.7300030589103699, "learning_rate": 2.9236155567899493e-07, "loss": 0.2023, "step": 9139 }, { "epoch": 0.9472484195253394, "grad_norm": 0.8258287906646729, "learning_rate": 2.9121895161949855e-07, "loss": 0.1999, "step": 9140 }, { "epoch": 0.9473520572080009, "grad_norm": 0.6586878299713135, "learning_rate": 2.900785683148155e-07, "loss": 0.2066, "step": 9141 }, { "epoch": 0.9474556948906623, "grad_norm": 0.5997200012207031, "learning_rate": 2.8894040589344086e-07, "loss": 0.17, "step": 9142 }, { "epoch": 0.9475593325733237, "grad_norm": 0.6866727471351624, "learning_rate": 2.8780446448361644e-07, "loss": 0.226, "step": 9143 }, { "epoch": 0.9476629702559851, "grad_norm": 0.6648144721984863, "learning_rate": 2.8667074421334426e-07, "loss": 0.1901, "step": 9144 }, { "epoch": 0.9477666079386465, "grad_norm": 0.6004384756088257, "learning_rate": 2.8553924521036446e-07, "loss": 0.1911, "step": 9145 }, { "epoch": 0.947870245621308, "grad_norm": 0.6397583484649658, "learning_rate": 2.844099676021772e-07, "loss": 0.2107, "step": 9146 }, { "epoch": 0.9479738833039694, "grad_norm": 0.7320306301116943, "learning_rate": 2.832829115160296e-07, "loss": 0.1859, "step": 9147 }, { "epoch": 0.9480775209866308, "grad_norm": 0.717109203338623, "learning_rate": 2.8215807707891117e-07, "loss": 0.2056, "step": 9148 }, { "epoch": 0.9481811586692922, "grad_norm": 0.6429559588432312, "learning_rate": 2.810354644175672e-07, "loss": 0.1857, "step": 9149 }, { "epoch": 0.9482847963519536, "grad_norm": 0.6143239140510559, "learning_rate": 2.799150736584944e-07, "loss": 0.1653, "step": 9150 }, { "epoch": 0.948388434034615, "grad_norm": 0.6657137870788574, "learning_rate": 2.787969049279338e-07, "loss": 0.1894, "step": 9151 }, { "epoch": 0.9484920717172765, "grad_norm": 0.7038910984992981, "learning_rate": 2.776809583518847e-07, "loss": 0.1777, "step": 9152 }, { "epoch": 0.9485957093999379, "grad_norm": 0.7006789445877075, "learning_rate": 2.7656723405608435e-07, "loss": 0.1983, "step": 9153 }, { "epoch": 0.9486993470825993, "grad_norm": 0.7506330609321594, "learning_rate": 2.754557321660278e-07, "loss": 0.2015, "step": 9154 }, { "epoch": 0.9488029847652607, "grad_norm": 0.73332279920578, "learning_rate": 2.7434645280695507e-07, "loss": 0.1957, "step": 9155 }, { "epoch": 0.948906622447922, "grad_norm": 0.6071427464485168, "learning_rate": 2.732393961038615e-07, "loss": 0.1673, "step": 9156 }, { "epoch": 0.9490102601305834, "grad_norm": 0.5476712584495544, "learning_rate": 2.7213456218148747e-07, "loss": 0.1428, "step": 9157 }, { "epoch": 0.9491138978132448, "grad_norm": 0.7443254590034485, "learning_rate": 2.7103195116432e-07, "loss": 0.1875, "step": 9158 }, { "epoch": 0.9492175354959063, "grad_norm": 0.723513126373291, "learning_rate": 2.699315631766064e-07, "loss": 0.2015, "step": 9159 }, { "epoch": 0.9493211731785677, "grad_norm": 0.6531522274017334, "learning_rate": 2.6883339834233413e-07, "loss": 0.1771, "step": 9160 }, { "epoch": 0.9494248108612291, "grad_norm": 0.790747880935669, "learning_rate": 2.6773745678523975e-07, "loss": 0.2109, "step": 9161 }, { "epoch": 0.9495284485438905, "grad_norm": 0.6858984231948853, "learning_rate": 2.666437386288156e-07, "loss": 0.1845, "step": 9162 }, { "epoch": 0.9496320862265519, "grad_norm": 0.7284145951271057, "learning_rate": 2.6555224399629654e-07, "loss": 0.2084, "step": 9163 }, { "epoch": 0.9497357239092133, "grad_norm": 0.6425133943557739, "learning_rate": 2.6446297301067294e-07, "loss": 0.1689, "step": 9164 }, { "epoch": 0.9498393615918748, "grad_norm": 0.6820522546768188, "learning_rate": 2.633759257946844e-07, "loss": 0.1882, "step": 9165 }, { "epoch": 0.9499429992745362, "grad_norm": 0.68821120262146, "learning_rate": 2.6229110247081525e-07, "loss": 0.1756, "step": 9166 }, { "epoch": 0.9500466369571976, "grad_norm": 0.7660084962844849, "learning_rate": 2.61208503161301e-07, "loss": 0.2169, "step": 9167 }, { "epoch": 0.950150274639859, "grad_norm": 0.7064154148101807, "learning_rate": 2.601281279881285e-07, "loss": 0.1985, "step": 9168 }, { "epoch": 0.9502539123225204, "grad_norm": 0.6654224395751953, "learning_rate": 2.590499770730293e-07, "loss": 0.1806, "step": 9169 }, { "epoch": 0.9503575500051819, "grad_norm": 0.6362894177436829, "learning_rate": 2.5797405053749503e-07, "loss": 0.1939, "step": 9170 }, { "epoch": 0.9504611876878433, "grad_norm": 0.5990746021270752, "learning_rate": 2.5690034850275325e-07, "loss": 0.1821, "step": 9171 }, { "epoch": 0.9505648253705047, "grad_norm": 0.6387087106704712, "learning_rate": 2.5582887108978937e-07, "loss": 0.1815, "step": 9172 }, { "epoch": 0.9506684630531661, "grad_norm": 0.7194986343383789, "learning_rate": 2.547596184193357e-07, "loss": 0.1952, "step": 9173 }, { "epoch": 0.9507721007358275, "grad_norm": 0.7512621283531189, "learning_rate": 2.5369259061187147e-07, "loss": 0.1851, "step": 9174 }, { "epoch": 0.9508757384184889, "grad_norm": 0.6478919386863708, "learning_rate": 2.5262778778763373e-07, "loss": 0.2025, "step": 9175 }, { "epoch": 0.9509793761011504, "grad_norm": 0.7147660851478577, "learning_rate": 2.515652100665955e-07, "loss": 0.188, "step": 9176 }, { "epoch": 0.9510830137838118, "grad_norm": 0.6657465696334839, "learning_rate": 2.5050485756849205e-07, "loss": 0.2053, "step": 9177 }, { "epoch": 0.9511866514664732, "grad_norm": 0.7419307231903076, "learning_rate": 2.4944673041279896e-07, "loss": 0.1912, "step": 9178 }, { "epoch": 0.9512902891491346, "grad_norm": 0.7255851030349731, "learning_rate": 2.483908287187453e-07, "loss": 0.2018, "step": 9179 }, { "epoch": 0.951393926831796, "grad_norm": 0.6799176931381226, "learning_rate": 2.473371526053092e-07, "loss": 0.2009, "step": 9180 }, { "epoch": 0.9514975645144574, "grad_norm": 0.6410163640975952, "learning_rate": 2.462857021912157e-07, "loss": 0.1848, "step": 9181 }, { "epoch": 0.9516012021971189, "grad_norm": 0.605385422706604, "learning_rate": 2.452364775949434e-07, "loss": 0.1717, "step": 9182 }, { "epoch": 0.9517048398797803, "grad_norm": 0.6804389357566833, "learning_rate": 2.441894789347132e-07, "loss": 0.1711, "step": 9183 }, { "epoch": 0.9518084775624417, "grad_norm": 0.6403427720069885, "learning_rate": 2.4314470632850417e-07, "loss": 0.1905, "step": 9184 }, { "epoch": 0.9519121152451031, "grad_norm": 0.5662232637405396, "learning_rate": 2.421021598940354e-07, "loss": 0.1414, "step": 9185 }, { "epoch": 0.9520157529277645, "grad_norm": 0.5852274298667908, "learning_rate": 2.4106183974877963e-07, "loss": 0.1705, "step": 9186 }, { "epoch": 0.9521193906104259, "grad_norm": 0.7288892269134521, "learning_rate": 2.4002374600996305e-07, "loss": 0.2011, "step": 9187 }, { "epoch": 0.9522230282930874, "grad_norm": 0.6094224452972412, "learning_rate": 2.3898787879454986e-07, "loss": 0.1471, "step": 9188 }, { "epoch": 0.9523266659757488, "grad_norm": 0.6364356875419617, "learning_rate": 2.3795423821926457e-07, "loss": 0.1697, "step": 9189 }, { "epoch": 0.9524303036584102, "grad_norm": 0.787897527217865, "learning_rate": 2.3692282440057613e-07, "loss": 0.2307, "step": 9190 }, { "epoch": 0.9525339413410716, "grad_norm": 0.8060945868492126, "learning_rate": 2.3589363745470273e-07, "loss": 0.2175, "step": 9191 }, { "epoch": 0.952637579023733, "grad_norm": 0.6362557411193848, "learning_rate": 2.3486667749760716e-07, "loss": 0.1504, "step": 9192 }, { "epoch": 0.9527412167063944, "grad_norm": 0.6807636618614197, "learning_rate": 2.338419446450102e-07, "loss": 0.194, "step": 9193 }, { "epoch": 0.9528448543890559, "grad_norm": 0.8208065629005432, "learning_rate": 2.3281943901237504e-07, "loss": 0.2368, "step": 9194 }, { "epoch": 0.9529484920717173, "grad_norm": 0.5947349667549133, "learning_rate": 2.3179916071491838e-07, "loss": 0.1703, "step": 9195 }, { "epoch": 0.9530521297543787, "grad_norm": 0.7142931222915649, "learning_rate": 2.307811098676016e-07, "loss": 0.2065, "step": 9196 }, { "epoch": 0.9531557674370401, "grad_norm": 0.7377387881278992, "learning_rate": 2.2976528658513743e-07, "loss": 0.2239, "step": 9197 }, { "epoch": 0.9532594051197015, "grad_norm": 0.6919092535972595, "learning_rate": 2.2875169098198758e-07, "loss": 0.1961, "step": 9198 }, { "epoch": 0.953363042802363, "grad_norm": 0.638052761554718, "learning_rate": 2.2774032317236073e-07, "loss": 0.1716, "step": 9199 }, { "epoch": 0.9534666804850244, "grad_norm": 0.7168495655059814, "learning_rate": 2.2673118327021904e-07, "loss": 0.1944, "step": 9200 }, { "epoch": 0.9535703181676858, "grad_norm": 0.7718733549118042, "learning_rate": 2.2572427138926934e-07, "loss": 0.2063, "step": 9201 }, { "epoch": 0.9536739558503472, "grad_norm": 0.5611803531646729, "learning_rate": 2.2471958764296974e-07, "loss": 0.1399, "step": 9202 }, { "epoch": 0.9537775935330086, "grad_norm": 0.7074396014213562, "learning_rate": 2.2371713214452306e-07, "loss": 0.2244, "step": 9203 }, { "epoch": 0.95388123121567, "grad_norm": 0.730533242225647, "learning_rate": 2.2271690500689003e-07, "loss": 0.2261, "step": 9204 }, { "epoch": 0.9539848688983315, "grad_norm": 0.6675938963890076, "learning_rate": 2.217189063427716e-07, "loss": 0.2112, "step": 9205 }, { "epoch": 0.9540885065809929, "grad_norm": 0.7420927286148071, "learning_rate": 2.2072313626461783e-07, "loss": 0.204, "step": 9206 }, { "epoch": 0.9541921442636543, "grad_norm": 0.609803318977356, "learning_rate": 2.1972959488463674e-07, "loss": 0.181, "step": 9207 }, { "epoch": 0.9542957819463157, "grad_norm": 0.6916093230247498, "learning_rate": 2.1873828231477433e-07, "loss": 0.1855, "step": 9208 }, { "epoch": 0.9543994196289771, "grad_norm": 0.5898705720901489, "learning_rate": 2.1774919866673016e-07, "loss": 0.1625, "step": 9209 }, { "epoch": 0.9545030573116385, "grad_norm": 0.6615305542945862, "learning_rate": 2.167623440519573e-07, "loss": 0.1782, "step": 9210 }, { "epoch": 0.9546066949943, "grad_norm": 0.7574900984764099, "learning_rate": 2.1577771858164897e-07, "loss": 0.2053, "step": 9211 }, { "epoch": 0.9547103326769614, "grad_norm": 0.7126626968383789, "learning_rate": 2.1479532236675427e-07, "loss": 0.1804, "step": 9212 }, { "epoch": 0.9548139703596228, "grad_norm": 0.6488138437271118, "learning_rate": 2.1381515551796239e-07, "loss": 0.16, "step": 9213 }, { "epoch": 0.9549176080422842, "grad_norm": 0.7436590194702148, "learning_rate": 2.1283721814572057e-07, "loss": 0.1717, "step": 9214 }, { "epoch": 0.9550212457249456, "grad_norm": 0.7559380531311035, "learning_rate": 2.1186151036022285e-07, "loss": 0.1938, "step": 9215 }, { "epoch": 0.955124883407607, "grad_norm": 0.6089216470718384, "learning_rate": 2.108880322714102e-07, "loss": 0.2057, "step": 9216 }, { "epoch": 0.9552285210902685, "grad_norm": 0.721402645111084, "learning_rate": 2.099167839889682e-07, "loss": 0.2071, "step": 9217 }, { "epoch": 0.9553321587729299, "grad_norm": 1.3151590824127197, "learning_rate": 2.0894776562234043e-07, "loss": 0.1693, "step": 9218 }, { "epoch": 0.9554357964555913, "grad_norm": 0.6875127553939819, "learning_rate": 2.0798097728071065e-07, "loss": 0.19, "step": 9219 }, { "epoch": 0.9555394341382527, "grad_norm": 0.6574918031692505, "learning_rate": 2.070164190730206e-07, "loss": 0.1833, "step": 9220 }, { "epoch": 0.9556430718209141, "grad_norm": 0.6254417300224304, "learning_rate": 2.0605409110794782e-07, "loss": 0.1849, "step": 9221 }, { "epoch": 0.9557467095035755, "grad_norm": 0.850127100944519, "learning_rate": 2.0509399349393223e-07, "loss": 0.2251, "step": 9222 }, { "epoch": 0.955850347186237, "grad_norm": 0.7196301221847534, "learning_rate": 2.0413612633915393e-07, "loss": 0.1976, "step": 9223 }, { "epoch": 0.9559539848688984, "grad_norm": 0.824651837348938, "learning_rate": 2.0318048975154213e-07, "loss": 0.2216, "step": 9224 }, { "epoch": 0.9560576225515598, "grad_norm": 0.8376520276069641, "learning_rate": 2.0222708383877965e-07, "loss": 0.2255, "step": 9225 }, { "epoch": 0.9561612602342212, "grad_norm": 0.663469672203064, "learning_rate": 2.0127590870829162e-07, "loss": 0.1942, "step": 9226 }, { "epoch": 0.9562648979168826, "grad_norm": 0.6651266813278198, "learning_rate": 2.0032696446725897e-07, "loss": 0.1909, "step": 9227 }, { "epoch": 0.956368535599544, "grad_norm": 0.746058464050293, "learning_rate": 1.9938025122260064e-07, "loss": 0.2136, "step": 9228 }, { "epoch": 0.9564721732822055, "grad_norm": 0.6924718022346497, "learning_rate": 1.9843576908099792e-07, "loss": 0.2021, "step": 9229 }, { "epoch": 0.9565758109648669, "grad_norm": 0.5991740822792053, "learning_rate": 1.9749351814887018e-07, "loss": 0.1677, "step": 9230 }, { "epoch": 0.9566794486475283, "grad_norm": 0.6867091655731201, "learning_rate": 1.9655349853238803e-07, "loss": 0.1728, "step": 9231 }, { "epoch": 0.9567830863301896, "grad_norm": 0.6148860454559326, "learning_rate": 1.956157103374756e-07, "loss": 0.1728, "step": 9232 }, { "epoch": 0.956886724012851, "grad_norm": 0.5756577253341675, "learning_rate": 1.9468015366979508e-07, "loss": 0.1648, "step": 9233 }, { "epoch": 0.9569903616955124, "grad_norm": 0.7107852101325989, "learning_rate": 1.9374682863476658e-07, "loss": 0.1957, "step": 9234 }, { "epoch": 0.9570939993781739, "grad_norm": 0.6376127004623413, "learning_rate": 1.9281573533755927e-07, "loss": 0.1805, "step": 9235 }, { "epoch": 0.9571976370608353, "grad_norm": 0.680597722530365, "learning_rate": 1.9188687388308258e-07, "loss": 0.2029, "step": 9236 }, { "epoch": 0.9573012747434967, "grad_norm": 0.6080154180526733, "learning_rate": 1.9096024437600168e-07, "loss": 0.1682, "step": 9237 }, { "epoch": 0.9574049124261581, "grad_norm": 0.583561897277832, "learning_rate": 1.900358469207242e-07, "loss": 0.1859, "step": 9238 }, { "epoch": 0.9575085501088195, "grad_norm": 0.6905496120452881, "learning_rate": 1.891136816214134e-07, "loss": 0.1795, "step": 9239 }, { "epoch": 0.9576121877914809, "grad_norm": 0.7289076447486877, "learning_rate": 1.8819374858197515e-07, "loss": 0.2005, "step": 9240 }, { "epoch": 0.9577158254741424, "grad_norm": 0.6302348971366882, "learning_rate": 1.8727604790606868e-07, "loss": 0.1881, "step": 9241 }, { "epoch": 0.9578194631568038, "grad_norm": 0.6139100193977356, "learning_rate": 1.863605796971002e-07, "loss": 0.1731, "step": 9242 }, { "epoch": 0.9579231008394652, "grad_norm": 0.8011465072631836, "learning_rate": 1.8544734405821608e-07, "loss": 0.2258, "step": 9243 }, { "epoch": 0.9580267385221266, "grad_norm": 0.7786160111427307, "learning_rate": 1.845363410923251e-07, "loss": 0.2141, "step": 9244 }, { "epoch": 0.958130376204788, "grad_norm": 0.6988627314567566, "learning_rate": 1.8362757090207628e-07, "loss": 0.1819, "step": 9245 }, { "epoch": 0.9582340138874494, "grad_norm": 0.7249823808670044, "learning_rate": 1.8272103358986549e-07, "loss": 0.2156, "step": 9246 }, { "epoch": 0.9583376515701109, "grad_norm": 0.7648645043373108, "learning_rate": 1.818167292578421e-07, "loss": 0.2203, "step": 9247 }, { "epoch": 0.9584412892527723, "grad_norm": 0.692196786403656, "learning_rate": 1.8091465800790465e-07, "loss": 0.1896, "step": 9248 }, { "epoch": 0.9585449269354337, "grad_norm": 0.7493067383766174, "learning_rate": 1.8001481994169178e-07, "loss": 0.2059, "step": 9249 }, { "epoch": 0.9586485646180951, "grad_norm": 0.8199605941772461, "learning_rate": 1.79117215160598e-07, "loss": 0.2293, "step": 9250 }, { "epoch": 0.9587522023007565, "grad_norm": 0.7317061424255371, "learning_rate": 1.7822184376576456e-07, "loss": 0.2183, "step": 9251 }, { "epoch": 0.958855839983418, "grad_norm": 0.6175105571746826, "learning_rate": 1.7732870585808194e-07, "loss": 0.1434, "step": 9252 }, { "epoch": 0.9589594776660794, "grad_norm": 0.6819245219230652, "learning_rate": 1.764378015381829e-07, "loss": 0.2049, "step": 9253 }, { "epoch": 0.9590631153487408, "grad_norm": 0.6690202355384827, "learning_rate": 1.7554913090645832e-07, "loss": 0.1815, "step": 9254 }, { "epoch": 0.9591667530314022, "grad_norm": 0.7080608010292053, "learning_rate": 1.7466269406304136e-07, "loss": 0.1971, "step": 9255 }, { "epoch": 0.9592703907140636, "grad_norm": 0.6241846680641174, "learning_rate": 1.7377849110780997e-07, "loss": 0.1851, "step": 9256 }, { "epoch": 0.959374028396725, "grad_norm": 0.723426878452301, "learning_rate": 1.7289652214039775e-07, "loss": 0.183, "step": 9257 }, { "epoch": 0.9594776660793864, "grad_norm": 0.7585938572883606, "learning_rate": 1.7201678726018522e-07, "loss": 0.2243, "step": 9258 }, { "epoch": 0.9595813037620479, "grad_norm": 0.6271798610687256, "learning_rate": 1.711392865662953e-07, "loss": 0.1819, "step": 9259 }, { "epoch": 0.9596849414447093, "grad_norm": 0.699207067489624, "learning_rate": 1.702640201576089e-07, "loss": 0.197, "step": 9260 }, { "epoch": 0.9597885791273707, "grad_norm": 0.6738049983978271, "learning_rate": 1.6939098813274713e-07, "loss": 0.2064, "step": 9261 }, { "epoch": 0.9598922168100321, "grad_norm": 0.5655495524406433, "learning_rate": 1.6852019059008019e-07, "loss": 0.1767, "step": 9262 }, { "epoch": 0.9599958544926935, "grad_norm": 0.6773292422294617, "learning_rate": 1.6765162762772957e-07, "loss": 0.2361, "step": 9263 }, { "epoch": 0.960099492175355, "grad_norm": 0.7021222114562988, "learning_rate": 1.6678529934356148e-07, "loss": 0.1996, "step": 9264 }, { "epoch": 0.9602031298580164, "grad_norm": 0.7449448704719543, "learning_rate": 1.6592120583519778e-07, "loss": 0.2057, "step": 9265 }, { "epoch": 0.9603067675406778, "grad_norm": 0.6998816728591919, "learning_rate": 1.6505934719999839e-07, "loss": 0.1839, "step": 9266 }, { "epoch": 0.9604104052233392, "grad_norm": 0.6950123310089111, "learning_rate": 1.6419972353507895e-07, "loss": 0.177, "step": 9267 }, { "epoch": 0.9605140429060006, "grad_norm": 0.7720685005187988, "learning_rate": 1.6334233493729757e-07, "loss": 0.2048, "step": 9268 }, { "epoch": 0.960617680588662, "grad_norm": 0.6883362531661987, "learning_rate": 1.624871815032658e-07, "loss": 0.2034, "step": 9269 }, { "epoch": 0.9607213182713235, "grad_norm": 0.6283169388771057, "learning_rate": 1.616342633293422e-07, "loss": 0.1621, "step": 9270 }, { "epoch": 0.9608249559539849, "grad_norm": 0.6474786400794983, "learning_rate": 1.6078358051162757e-07, "loss": 0.1752, "step": 9271 }, { "epoch": 0.9609285936366463, "grad_norm": 0.7147000432014465, "learning_rate": 1.5993513314598085e-07, "loss": 0.2124, "step": 9272 }, { "epoch": 0.9610322313193077, "grad_norm": 0.8779515624046326, "learning_rate": 1.590889213279989e-07, "loss": 0.2072, "step": 9273 }, { "epoch": 0.9611358690019691, "grad_norm": 0.5219985246658325, "learning_rate": 1.5824494515303658e-07, "loss": 0.1388, "step": 9274 }, { "epoch": 0.9612395066846305, "grad_norm": 0.6944804191589355, "learning_rate": 1.5740320471618885e-07, "loss": 0.1865, "step": 9275 }, { "epoch": 0.961343144367292, "grad_norm": 0.5461245775222778, "learning_rate": 1.5656370011229994e-07, "loss": 0.158, "step": 9276 }, { "epoch": 0.9614467820499534, "grad_norm": 0.7172948122024536, "learning_rate": 1.5572643143596744e-07, "loss": 0.2155, "step": 9277 }, { "epoch": 0.9615504197326148, "grad_norm": 0.6956398487091064, "learning_rate": 1.548913987815315e-07, "loss": 0.167, "step": 9278 }, { "epoch": 0.9616540574152762, "grad_norm": 0.739754855632782, "learning_rate": 1.5405860224308345e-07, "loss": 0.1697, "step": 9279 }, { "epoch": 0.9617576950979376, "grad_norm": 0.5874171257019043, "learning_rate": 1.532280419144616e-07, "loss": 0.1471, "step": 9280 }, { "epoch": 0.961861332780599, "grad_norm": 0.6367971301078796, "learning_rate": 1.5239971788925113e-07, "loss": 0.1815, "step": 9281 }, { "epoch": 0.9619649704632605, "grad_norm": 0.7605910897254944, "learning_rate": 1.515736302607884e-07, "loss": 0.2044, "step": 9282 }, { "epoch": 0.9620686081459219, "grad_norm": 0.6360923647880554, "learning_rate": 1.5074977912215016e-07, "loss": 0.1852, "step": 9283 }, { "epoch": 0.9621722458285833, "grad_norm": 0.6246673464775085, "learning_rate": 1.499281645661732e-07, "loss": 0.1708, "step": 9284 }, { "epoch": 0.9622758835112447, "grad_norm": 0.7202011346817017, "learning_rate": 1.4910878668543238e-07, "loss": 0.1855, "step": 9285 }, { "epoch": 0.9623795211939061, "grad_norm": 0.5647118091583252, "learning_rate": 1.4829164557225607e-07, "loss": 0.1536, "step": 9286 }, { "epoch": 0.9624831588765675, "grad_norm": 0.6627938151359558, "learning_rate": 1.4747674131871502e-07, "loss": 0.1647, "step": 9287 }, { "epoch": 0.962586796559229, "grad_norm": 0.6464048624038696, "learning_rate": 1.4666407401663586e-07, "loss": 0.1842, "step": 9288 }, { "epoch": 0.9626904342418904, "grad_norm": 0.6384124755859375, "learning_rate": 1.4585364375758304e-07, "loss": 0.1662, "step": 9289 }, { "epoch": 0.9627940719245518, "grad_norm": 0.5953946709632874, "learning_rate": 1.4504545063287912e-07, "loss": 0.164, "step": 9290 }, { "epoch": 0.9628977096072132, "grad_norm": 0.9141512513160706, "learning_rate": 1.44239494733589e-07, "loss": 0.1747, "step": 9291 }, { "epoch": 0.9630013472898746, "grad_norm": 0.7162027955055237, "learning_rate": 1.4343577615052672e-07, "loss": 0.2083, "step": 9292 }, { "epoch": 0.963104984972536, "grad_norm": 0.7092261910438538, "learning_rate": 1.4263429497425318e-07, "loss": 0.1882, "step": 9293 }, { "epoch": 0.9632086226551975, "grad_norm": 0.7536466717720032, "learning_rate": 1.4183505129507835e-07, "loss": 0.1833, "step": 9294 }, { "epoch": 0.9633122603378589, "grad_norm": 0.5375593304634094, "learning_rate": 1.4103804520305908e-07, "loss": 0.1514, "step": 9295 }, { "epoch": 0.9634158980205203, "grad_norm": 0.7242726683616638, "learning_rate": 1.4024327678800353e-07, "loss": 0.1717, "step": 9296 }, { "epoch": 0.9635195357031817, "grad_norm": 0.7065474987030029, "learning_rate": 1.394507461394623e-07, "loss": 0.1905, "step": 9297 }, { "epoch": 0.9636231733858431, "grad_norm": 0.7065313458442688, "learning_rate": 1.3866045334673506e-07, "loss": 0.1847, "step": 9298 }, { "epoch": 0.9637268110685046, "grad_norm": 0.597445547580719, "learning_rate": 1.37872398498875e-07, "loss": 0.1622, "step": 9299 }, { "epoch": 0.963830448751166, "grad_norm": 0.7109730243682861, "learning_rate": 1.3708658168467558e-07, "loss": 0.2074, "step": 9300 }, { "epoch": 0.9639340864338274, "grad_norm": 0.7618610858917236, "learning_rate": 1.3630300299268374e-07, "loss": 0.1903, "step": 9301 }, { "epoch": 0.9640377241164888, "grad_norm": 0.6909206509590149, "learning_rate": 1.3552166251119103e-07, "loss": 0.2081, "step": 9302 }, { "epoch": 0.9641413617991502, "grad_norm": 0.6721199750900269, "learning_rate": 1.3474256032823596e-07, "loss": 0.1874, "step": 9303 }, { "epoch": 0.9642449994818116, "grad_norm": 0.5766302347183228, "learning_rate": 1.3396569653160829e-07, "loss": 0.1534, "step": 9304 }, { "epoch": 0.964348637164473, "grad_norm": 0.605042576789856, "learning_rate": 1.3319107120884467e-07, "loss": 0.1516, "step": 9305 }, { "epoch": 0.9644522748471345, "grad_norm": 0.6952525973320007, "learning_rate": 1.324186844472264e-07, "loss": 0.1817, "step": 9306 }, { "epoch": 0.9645559125297959, "grad_norm": 0.7488659620285034, "learning_rate": 1.3164853633378826e-07, "loss": 0.2072, "step": 9307 }, { "epoch": 0.9646595502124572, "grad_norm": 0.7539721131324768, "learning_rate": 1.3088062695530312e-07, "loss": 0.2186, "step": 9308 }, { "epoch": 0.9647631878951186, "grad_norm": 0.8046135902404785, "learning_rate": 1.3011495639830395e-07, "loss": 0.2327, "step": 9309 }, { "epoch": 0.96486682557778, "grad_norm": 0.7128415107727051, "learning_rate": 1.2935152474906398e-07, "loss": 0.193, "step": 9310 }, { "epoch": 0.9649704632604414, "grad_norm": 0.6780071258544922, "learning_rate": 1.285903320936055e-07, "loss": 0.2047, "step": 9311 }, { "epoch": 0.9650741009431029, "grad_norm": 0.8416036367416382, "learning_rate": 1.2783137851769544e-07, "loss": 0.2005, "step": 9312 }, { "epoch": 0.9651777386257643, "grad_norm": 0.6061697602272034, "learning_rate": 1.270746641068543e-07, "loss": 0.15, "step": 9313 }, { "epoch": 0.9652813763084257, "grad_norm": 0.7234962582588196, "learning_rate": 1.2632018894634725e-07, "loss": 0.1989, "step": 9314 }, { "epoch": 0.9653850139910871, "grad_norm": 0.7064085006713867, "learning_rate": 1.2556795312118619e-07, "loss": 0.1927, "step": 9315 }, { "epoch": 0.9654886516737485, "grad_norm": 0.7468860149383545, "learning_rate": 1.248179567161345e-07, "loss": 0.1868, "step": 9316 }, { "epoch": 0.96559228935641, "grad_norm": 0.5981663465499878, "learning_rate": 1.240701998156979e-07, "loss": 0.1849, "step": 9317 }, { "epoch": 0.9656959270390714, "grad_norm": 0.6494339108467102, "learning_rate": 1.2332468250413343e-07, "loss": 0.1836, "step": 9318 }, { "epoch": 0.9657995647217328, "grad_norm": 0.6810412406921387, "learning_rate": 1.22581404865445e-07, "loss": 0.1801, "step": 9319 }, { "epoch": 0.9659032024043942, "grad_norm": 0.74522465467453, "learning_rate": 1.2184036698338343e-07, "loss": 0.2059, "step": 9320 }, { "epoch": 0.9660068400870556, "grad_norm": 0.7757186889648438, "learning_rate": 1.2110156894144852e-07, "loss": 0.2175, "step": 9321 }, { "epoch": 0.966110477769717, "grad_norm": 0.675933837890625, "learning_rate": 1.203650108228871e-07, "loss": 0.1979, "step": 9322 }, { "epoch": 0.9662141154523785, "grad_norm": 0.57355135679245, "learning_rate": 1.196306927106905e-07, "loss": 0.1434, "step": 9323 }, { "epoch": 0.9663177531350399, "grad_norm": 0.6624523401260376, "learning_rate": 1.1889861468760589e-07, "loss": 0.169, "step": 9324 }, { "epoch": 0.9664213908177013, "grad_norm": 0.698999285697937, "learning_rate": 1.1816877683611838e-07, "loss": 0.2011, "step": 9325 }, { "epoch": 0.9665250285003627, "grad_norm": 0.6993268728256226, "learning_rate": 1.1744117923846443e-07, "loss": 0.2027, "step": 9326 }, { "epoch": 0.9666286661830241, "grad_norm": 0.69150710105896, "learning_rate": 1.16715821976634e-07, "loss": 0.1873, "step": 9327 }, { "epoch": 0.9667323038656855, "grad_norm": 0.6487718820571899, "learning_rate": 1.1599270513235283e-07, "loss": 0.1739, "step": 9328 }, { "epoch": 0.966835941548347, "grad_norm": 0.6418184041976929, "learning_rate": 1.1527182878710241e-07, "loss": 0.1768, "step": 9329 }, { "epoch": 0.9669395792310084, "grad_norm": 0.7726601958274841, "learning_rate": 1.1455319302211332e-07, "loss": 0.2201, "step": 9330 }, { "epoch": 0.9670432169136698, "grad_norm": 0.6095311641693115, "learning_rate": 1.1383679791835633e-07, "loss": 0.1785, "step": 9331 }, { "epoch": 0.9671468545963312, "grad_norm": 0.6452474594116211, "learning_rate": 1.13122643556558e-07, "loss": 0.1712, "step": 9332 }, { "epoch": 0.9672504922789926, "grad_norm": 0.728452205657959, "learning_rate": 1.1241073001718283e-07, "loss": 0.1975, "step": 9333 }, { "epoch": 0.967354129961654, "grad_norm": 0.679295539855957, "learning_rate": 1.1170105738045112e-07, "loss": 0.1911, "step": 9334 }, { "epoch": 0.9674577676443155, "grad_norm": 0.716543436050415, "learning_rate": 1.1099362572632777e-07, "loss": 0.2033, "step": 9335 }, { "epoch": 0.9675614053269769, "grad_norm": 0.5981671214103699, "learning_rate": 1.1028843513452236e-07, "loss": 0.1659, "step": 9336 }, { "epoch": 0.9676650430096383, "grad_norm": 0.6800544857978821, "learning_rate": 1.0958548568450245e-07, "loss": 0.1971, "step": 9337 }, { "epoch": 0.9677686806922997, "grad_norm": 0.6196473836898804, "learning_rate": 1.0888477745546466e-07, "loss": 0.1876, "step": 9338 }, { "epoch": 0.9678723183749611, "grad_norm": 0.6917783617973328, "learning_rate": 1.081863105263703e-07, "loss": 0.1862, "step": 9339 }, { "epoch": 0.9679759560576225, "grad_norm": 0.622767448425293, "learning_rate": 1.0749008497592083e-07, "loss": 0.1601, "step": 9340 }, { "epoch": 0.968079593740284, "grad_norm": 0.6830424666404724, "learning_rate": 1.0679610088256242e-07, "loss": 0.1952, "step": 9341 }, { "epoch": 0.9681832314229454, "grad_norm": 0.706273078918457, "learning_rate": 1.0610435832449917e-07, "loss": 0.1878, "step": 9342 }, { "epoch": 0.9682868691056068, "grad_norm": 0.7518766522407532, "learning_rate": 1.0541485737966872e-07, "loss": 0.1948, "step": 9343 }, { "epoch": 0.9683905067882682, "grad_norm": 0.7455720901489258, "learning_rate": 1.047275981257645e-07, "loss": 0.2139, "step": 9344 }, { "epoch": 0.9684941444709296, "grad_norm": 0.746716320514679, "learning_rate": 1.0404258064022898e-07, "loss": 0.1952, "step": 9345 }, { "epoch": 0.968597782153591, "grad_norm": 0.6383845210075378, "learning_rate": 1.033598050002449e-07, "loss": 0.1509, "step": 9346 }, { "epoch": 0.9687014198362525, "grad_norm": 0.7649304270744324, "learning_rate": 1.0267927128274846e-07, "loss": 0.2389, "step": 9347 }, { "epoch": 0.9688050575189139, "grad_norm": 0.5000459551811218, "learning_rate": 1.0200097956442057e-07, "loss": 0.1464, "step": 9348 }, { "epoch": 0.9689086952015753, "grad_norm": 0.7772543430328369, "learning_rate": 1.0132492992168896e-07, "loss": 0.2005, "step": 9349 }, { "epoch": 0.9690123328842367, "grad_norm": 0.5948247313499451, "learning_rate": 1.0065112243073494e-07, "loss": 0.1756, "step": 9350 }, { "epoch": 0.9691159705668981, "grad_norm": 0.7647456526756287, "learning_rate": 9.997955716747553e-08, "loss": 0.1956, "step": 9351 }, { "epoch": 0.9692196082495595, "grad_norm": 0.5967316031455994, "learning_rate": 9.931023420758356e-08, "loss": 0.1497, "step": 9352 }, { "epoch": 0.969323245932221, "grad_norm": 0.7856627702713013, "learning_rate": 9.864315362648091e-08, "loss": 0.2095, "step": 9353 }, { "epoch": 0.9694268836148824, "grad_norm": 0.6720486283302307, "learning_rate": 9.797831549932745e-08, "loss": 0.1724, "step": 9354 }, { "epoch": 0.9695305212975438, "grad_norm": 0.68314129114151, "learning_rate": 9.731571990104105e-08, "loss": 0.2019, "step": 9355 }, { "epoch": 0.9696341589802052, "grad_norm": 0.7092345952987671, "learning_rate": 9.665536690627974e-08, "loss": 0.1744, "step": 9356 }, { "epoch": 0.9697377966628666, "grad_norm": 0.6779714822769165, "learning_rate": 9.599725658945069e-08, "loss": 0.1733, "step": 9357 }, { "epoch": 0.969841434345528, "grad_norm": 0.690662145614624, "learning_rate": 9.534138902471235e-08, "loss": 0.1796, "step": 9358 }, { "epoch": 0.9699450720281895, "grad_norm": 0.9036988019943237, "learning_rate": 9.468776428596115e-08, "loss": 0.1887, "step": 9359 }, { "epoch": 0.9700487097108509, "grad_norm": 0.6716302633285522, "learning_rate": 9.403638244684931e-08, "loss": 0.1672, "step": 9360 }, { "epoch": 0.9701523473935123, "grad_norm": 0.7448993921279907, "learning_rate": 9.338724358077367e-08, "loss": 0.2256, "step": 9361 }, { "epoch": 0.9702559850761737, "grad_norm": 0.7266402840614319, "learning_rate": 9.274034776087793e-08, "loss": 0.1953, "step": 9362 }, { "epoch": 0.9703596227588351, "grad_norm": 0.6478952169418335, "learning_rate": 9.209569506005489e-08, "loss": 0.1765, "step": 9363 }, { "epoch": 0.9704632604414966, "grad_norm": 0.7265430688858032, "learning_rate": 9.14532855509398e-08, "loss": 0.2049, "step": 9364 }, { "epoch": 0.970566898124158, "grad_norm": 0.6949917674064636, "learning_rate": 9.081311930591919e-08, "loss": 0.215, "step": 9365 }, { "epoch": 0.9706705358068194, "grad_norm": 0.8174939155578613, "learning_rate": 9.017519639712868e-08, "loss": 0.2426, "step": 9366 }, { "epoch": 0.9707741734894808, "grad_norm": 0.75638347864151, "learning_rate": 8.953951689644636e-08, "loss": 0.2089, "step": 9367 }, { "epoch": 0.9708778111721422, "grad_norm": 0.745216429233551, "learning_rate": 8.890608087549934e-08, "loss": 0.2288, "step": 9368 }, { "epoch": 0.9709814488548036, "grad_norm": 0.6507091522216797, "learning_rate": 8.82748884056639e-08, "loss": 0.179, "step": 9369 }, { "epoch": 0.9710850865374651, "grad_norm": 0.659831166267395, "learning_rate": 8.764593955805866e-08, "loss": 0.198, "step": 9370 }, { "epoch": 0.9711887242201265, "grad_norm": 0.6593323349952698, "learning_rate": 8.701923440355586e-08, "loss": 0.1899, "step": 9371 }, { "epoch": 0.9712923619027879, "grad_norm": 0.791563093662262, "learning_rate": 8.63947730127701e-08, "loss": 0.1864, "step": 9372 }, { "epoch": 0.9713959995854493, "grad_norm": 0.6524644494056702, "learning_rate": 8.577255545606511e-08, "loss": 0.1968, "step": 9373 }, { "epoch": 0.9714996372681107, "grad_norm": 0.6397282481193542, "learning_rate": 8.515258180355146e-08, "loss": 0.1862, "step": 9374 }, { "epoch": 0.9716032749507721, "grad_norm": 0.6900851726531982, "learning_rate": 8.453485212508661e-08, "loss": 0.1816, "step": 9375 }, { "epoch": 0.9717069126334336, "grad_norm": 0.7994019985198975, "learning_rate": 8.391936649027488e-08, "loss": 0.2319, "step": 9376 }, { "epoch": 0.971810550316095, "grad_norm": 0.7205654382705688, "learning_rate": 8.330612496846968e-08, "loss": 0.1788, "step": 9377 }, { "epoch": 0.9719141879987564, "grad_norm": 0.6512675881385803, "learning_rate": 8.269512762876908e-08, "loss": 0.1773, "step": 9378 }, { "epoch": 0.9720178256814178, "grad_norm": 0.6505001783370972, "learning_rate": 8.2086374540018e-08, "loss": 0.1936, "step": 9379 }, { "epoch": 0.9721214633640792, "grad_norm": 0.6861594319343567, "learning_rate": 8.147986577081269e-08, "loss": 0.1831, "step": 9380 }, { "epoch": 0.9722251010467406, "grad_norm": 0.6237919926643372, "learning_rate": 8.087560138949179e-08, "loss": 0.1648, "step": 9381 }, { "epoch": 0.9723287387294021, "grad_norm": 0.7278616428375244, "learning_rate": 8.027358146414311e-08, "loss": 0.1742, "step": 9382 }, { "epoch": 0.9724323764120635, "grad_norm": 0.7543025016784668, "learning_rate": 7.967380606260344e-08, "loss": 0.2103, "step": 9383 }, { "epoch": 0.9725360140947248, "grad_norm": 0.5872170925140381, "learning_rate": 7.907627525244988e-08, "loss": 0.1625, "step": 9384 }, { "epoch": 0.9726396517773862, "grad_norm": 0.6460816264152527, "learning_rate": 7.84809891010152e-08, "loss": 0.2087, "step": 9385 }, { "epoch": 0.9727432894600476, "grad_norm": 0.6365247368812561, "learning_rate": 7.788794767537244e-08, "loss": 0.1757, "step": 9386 }, { "epoch": 0.972846927142709, "grad_norm": 0.6571633815765381, "learning_rate": 7.729715104234814e-08, "loss": 0.195, "step": 9387 }, { "epoch": 0.9729505648253705, "grad_norm": 0.6919662356376648, "learning_rate": 7.670859926851126e-08, "loss": 0.2203, "step": 9388 }, { "epoch": 0.9730542025080319, "grad_norm": 0.7071418762207031, "learning_rate": 7.612229242017543e-08, "loss": 0.1816, "step": 9389 }, { "epoch": 0.9731578401906933, "grad_norm": 0.8264790773391724, "learning_rate": 7.553823056340781e-08, "loss": 0.1992, "step": 9390 }, { "epoch": 0.9732614778733547, "grad_norm": 0.6832471489906311, "learning_rate": 7.495641376402019e-08, "loss": 0.2038, "step": 9391 }, { "epoch": 0.9733651155560161, "grad_norm": 0.7595140933990479, "learning_rate": 7.4376842087569e-08, "loss": 0.2317, "step": 9392 }, { "epoch": 0.9734687532386775, "grad_norm": 0.7731542587280273, "learning_rate": 7.379951559936205e-08, "loss": 0.2163, "step": 9393 }, { "epoch": 0.973572390921339, "grad_norm": 0.8025881052017212, "learning_rate": 7.322443436444949e-08, "loss": 0.1935, "step": 9394 }, { "epoch": 0.9736760286040004, "grad_norm": 0.7015080451965332, "learning_rate": 7.265159844762837e-08, "loss": 0.2051, "step": 9395 }, { "epoch": 0.9737796662866618, "grad_norm": 0.6422966718673706, "learning_rate": 7.20810079134493e-08, "loss": 0.1644, "step": 9396 }, { "epoch": 0.9738833039693232, "grad_norm": 0.6956278681755066, "learning_rate": 7.151266282620306e-08, "loss": 0.2061, "step": 9397 }, { "epoch": 0.9739869416519846, "grad_norm": 0.7768704891204834, "learning_rate": 7.094656324993177e-08, "loss": 0.2021, "step": 9398 }, { "epoch": 0.974090579334646, "grad_norm": 0.670602560043335, "learning_rate": 7.038270924841995e-08, "loss": 0.2034, "step": 9399 }, { "epoch": 0.9741942170173075, "grad_norm": 0.6462625861167908, "learning_rate": 6.982110088520566e-08, "loss": 0.1871, "step": 9400 } ], "logging_steps": 1.0, "max_steps": 9649, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.184630033364665e+22, "train_batch_size": 2, "trial_name": null, "trial_params": null }