| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 200.0, |
| "global_step": 10410, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00019212295869356388, |
| "grad_norm": 0.08390819281339645, |
| "learning_rate": 5.758157389635316e-07, |
| "loss": 3.274601936340332, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009606147934678194, |
| "grad_norm": 0.08173709362745285, |
| "learning_rate": 2.879078694817658e-06, |
| "loss": 3.2788190841674805, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0019212295869356388, |
| "grad_norm": 0.09356535971164703, |
| "learning_rate": 5.758157389635316e-06, |
| "loss": 3.2803062438964843, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002881844380403458, |
| "grad_norm": 0.11097322404384613, |
| "learning_rate": 8.637236084452974e-06, |
| "loss": 3.277487564086914, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0038424591738712775, |
| "grad_norm": 0.15006589889526367, |
| "learning_rate": 1.1516314779270632e-05, |
| "loss": 3.281480407714844, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004803073967339097, |
| "grad_norm": 0.14452151954174042, |
| "learning_rate": 1.439539347408829e-05, |
| "loss": 3.2803359985351563, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.005763688760806916, |
| "grad_norm": 0.16946451365947723, |
| "learning_rate": 1.7274472168905948e-05, |
| "loss": 3.287946319580078, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0067243035542747355, |
| "grad_norm": 0.14511597156524658, |
| "learning_rate": 2.015355086372361e-05, |
| "loss": 3.278676986694336, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.007684918347742555, |
| "grad_norm": 0.21068508923053741, |
| "learning_rate": 2.3032629558541264e-05, |
| "loss": 3.2816986083984374, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.008645533141210375, |
| "grad_norm": 0.2152123898267746, |
| "learning_rate": 2.591170825335892e-05, |
| "loss": 3.2830833435058593, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.009606147934678195, |
| "grad_norm": 0.2621569037437439, |
| "learning_rate": 2.879078694817658e-05, |
| "loss": 3.279366302490234, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.010566762728146013, |
| "grad_norm": 0.29736942052841187, |
| "learning_rate": 3.166986564299424e-05, |
| "loss": 3.2836788177490233, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.011527377521613832, |
| "grad_norm": 0.31358596682548523, |
| "learning_rate": 3.4548944337811895e-05, |
| "loss": 3.2797149658203124, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.012487992315081652, |
| "grad_norm": 0.42882370948791504, |
| "learning_rate": 3.742802303262955e-05, |
| "loss": 3.2872390747070312, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.013448607108549471, |
| "grad_norm": 0.3247455060482025, |
| "learning_rate": 4.030710172744722e-05, |
| "loss": 3.2825721740722655, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01440922190201729, |
| "grad_norm": 0.40256041288375854, |
| "learning_rate": 4.318618042226487e-05, |
| "loss": 3.279859924316406, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01536983669548511, |
| "grad_norm": 0.5199999809265137, |
| "learning_rate": 4.606525911708253e-05, |
| "loss": 3.284808349609375, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01633045148895293, |
| "grad_norm": 0.6197025179862976, |
| "learning_rate": 4.894433781190019e-05, |
| "loss": 3.2813262939453125, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01729106628242075, |
| "grad_norm": 0.4358411133289337, |
| "learning_rate": 5.182341650671784e-05, |
| "loss": 3.2836116790771483, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.01825168107588857, |
| "grad_norm": 0.5619639158248901, |
| "learning_rate": 5.4702495201535504e-05, |
| "loss": 3.281964874267578, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.01921229586935639, |
| "grad_norm": 0.42559099197387695, |
| "learning_rate": 5.758157389635316e-05, |
| "loss": 3.284071350097656, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.020172910662824207, |
| "grad_norm": 0.5353518128395081, |
| "learning_rate": 6.0460652591170814e-05, |
| "loss": 3.286463165283203, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.021133525456292025, |
| "grad_norm": 0.8242971897125244, |
| "learning_rate": 6.333973128598848e-05, |
| "loss": 3.28314094543457, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.022094140249759846, |
| "grad_norm": 0.5128299593925476, |
| "learning_rate": 6.621880998080614e-05, |
| "loss": 3.2824722290039063, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.023054755043227664, |
| "grad_norm": 0.7521133422851562, |
| "learning_rate": 6.909788867562379e-05, |
| "loss": 3.2825164794921875, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.024015369836695485, |
| "grad_norm": 0.7099725008010864, |
| "learning_rate": 7.197696737044145e-05, |
| "loss": 3.2856216430664062, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.024975984630163303, |
| "grad_norm": 0.673387885093689, |
| "learning_rate": 7.48560460652591e-05, |
| "loss": 3.2845367431640624, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.025936599423631124, |
| "grad_norm": 0.47837865352630615, |
| "learning_rate": 7.773512476007677e-05, |
| "loss": 3.2803184509277346, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.026897214217098942, |
| "grad_norm": 0.5746335983276367, |
| "learning_rate": 8.061420345489444e-05, |
| "loss": 3.2853370666503907, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.027857829010566763, |
| "grad_norm": 0.9516947269439697, |
| "learning_rate": 8.349328214971209e-05, |
| "loss": 3.2811763763427733, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "grad_norm": 0.7206563949584961, |
| "learning_rate": 8.637236084452975e-05, |
| "loss": 3.2858177185058595, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.029779058597502402, |
| "grad_norm": 0.9468950033187866, |
| "learning_rate": 8.92514395393474e-05, |
| "loss": 3.2817359924316407, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.03073967339097022, |
| "grad_norm": 0.9247293472290039, |
| "learning_rate": 9.213051823416505e-05, |
| "loss": 3.283827209472656, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03170028818443804, |
| "grad_norm": 0.7639815807342529, |
| "learning_rate": 9.500959692898272e-05, |
| "loss": 3.2805789947509765, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03266090297790586, |
| "grad_norm": 1.0864217281341553, |
| "learning_rate": 9.788867562380038e-05, |
| "loss": 3.282984161376953, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03362151777137368, |
| "grad_norm": 1.0888683795928955, |
| "learning_rate": 0.00010076775431861803, |
| "loss": 3.2868572235107423, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0345821325648415, |
| "grad_norm": 0.7901690602302551, |
| "learning_rate": 0.00010364683301343569, |
| "loss": 3.2854034423828127, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.03554274735830932, |
| "grad_norm": 1.132866621017456, |
| "learning_rate": 0.00010652591170825334, |
| "loss": 3.283061218261719, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.03650336215177714, |
| "grad_norm": 1.1301084756851196, |
| "learning_rate": 0.00010940499040307101, |
| "loss": 3.2865623474121093, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.037463976945244955, |
| "grad_norm": 0.9956747889518738, |
| "learning_rate": 0.00011228406909788866, |
| "loss": 3.2860641479492188, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.03842459173871278, |
| "grad_norm": 1.1139202117919922, |
| "learning_rate": 0.00011516314779270632, |
| "loss": 3.2860763549804686, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0393852065321806, |
| "grad_norm": 1.1494112014770508, |
| "learning_rate": 0.00011804222648752397, |
| "loss": 3.2874046325683595, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.040345821325648415, |
| "grad_norm": 1.1791850328445435, |
| "learning_rate": 0.00012092130518234163, |
| "loss": 3.28511962890625, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04130643611911623, |
| "grad_norm": 0.8412664532661438, |
| "learning_rate": 0.0001238003838771593, |
| "loss": 3.2826396942138674, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.04226705091258405, |
| "grad_norm": 1.0537320375442505, |
| "learning_rate": 0.00012667946257197696, |
| "loss": 3.2859474182128907, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.043227665706051875, |
| "grad_norm": 0.9535348415374756, |
| "learning_rate": 0.00012955854126679462, |
| "loss": 3.2872699737548827, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.04418828049951969, |
| "grad_norm": 1.2287579774856567, |
| "learning_rate": 0.00013243761996161227, |
| "loss": 3.2854686737060548, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.04514889529298751, |
| "grad_norm": 0.8720296621322632, |
| "learning_rate": 0.00013531669865642993, |
| "loss": 3.2873931884765626, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.04610951008645533, |
| "grad_norm": 0.9984288811683655, |
| "learning_rate": 0.00013819577735124758, |
| "loss": 3.28588752746582, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.04707012487992315, |
| "grad_norm": 1.3232122659683228, |
| "learning_rate": 0.00014107485604606524, |
| "loss": 3.2899200439453127, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.04803073967339097, |
| "grad_norm": 1.0983723402023315, |
| "learning_rate": 0.0001439539347408829, |
| "loss": 3.287739562988281, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04899135446685879, |
| "grad_norm": 0.9880014657974243, |
| "learning_rate": 0.00014683301343570055, |
| "loss": 3.2914413452148437, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.049951969260326606, |
| "grad_norm": 1.0988938808441162, |
| "learning_rate": 0.0001497120921305182, |
| "loss": 3.2892181396484377, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05091258405379443, |
| "grad_norm": 0.9497994780540466, |
| "learning_rate": 0.00015259117082533588, |
| "loss": 3.289907455444336, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05187319884726225, |
| "grad_norm": 1.4786131381988525, |
| "learning_rate": 0.00015547024952015354, |
| "loss": 3.2885669708251952, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.052833813640730067, |
| "grad_norm": 1.1913130283355713, |
| "learning_rate": 0.0001583493282149712, |
| "loss": 3.2930023193359377, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.053794428434197884, |
| "grad_norm": 1.0576707124710083, |
| "learning_rate": 0.00016122840690978887, |
| "loss": 3.285129165649414, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.05475504322766571, |
| "grad_norm": 1.3840175867080688, |
| "learning_rate": 0.0001641074856046065, |
| "loss": 3.2881790161132813, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.05571565802113353, |
| "grad_norm": 1.1912461519241333, |
| "learning_rate": 0.00016698656429942418, |
| "loss": 3.2907310485839845, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.056676272814601344, |
| "grad_norm": 0.8021464347839355, |
| "learning_rate": 0.0001698656429942418, |
| "loss": 3.2897079467773436, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "grad_norm": 1.2219209671020508, |
| "learning_rate": 0.0001727447216890595, |
| "loss": 3.288962554931641, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05859750240153699, |
| "grad_norm": 1.13152015209198, |
| "learning_rate": 0.00017562380038387714, |
| "loss": 3.2893699645996093, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.059558117195004805, |
| "grad_norm": 1.5439852476119995, |
| "learning_rate": 0.0001785028790786948, |
| "loss": 3.293457794189453, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06051873198847262, |
| "grad_norm": 1.4060869216918945, |
| "learning_rate": 0.00018138195777351245, |
| "loss": 3.2903827667236327, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.06147934678194044, |
| "grad_norm": 1.0909712314605713, |
| "learning_rate": 0.0001842610364683301, |
| "loss": 3.293254852294922, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.06243996157540826, |
| "grad_norm": 1.4758312702178955, |
| "learning_rate": 0.00018714011516314776, |
| "loss": 3.291452407836914, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.06340057636887608, |
| "grad_norm": 1.3612134456634521, |
| "learning_rate": 0.00019001919385796544, |
| "loss": 3.2947055816650392, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0643611911623439, |
| "grad_norm": 1.017069935798645, |
| "learning_rate": 0.00019289827255278307, |
| "loss": 3.2914665222167967, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.06532180595581172, |
| "grad_norm": 1.114537239074707, |
| "learning_rate": 0.00019577735124760075, |
| "loss": 3.2918880462646483, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.06628242074927954, |
| "grad_norm": 1.359885334968567, |
| "learning_rate": 0.00019865642994241838, |
| "loss": 3.291158676147461, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.06724303554274735, |
| "grad_norm": 1.5216479301452637, |
| "learning_rate": 0.00020153550863723606, |
| "loss": 3.2951950073242187, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06820365033621517, |
| "grad_norm": 1.0338709354400635, |
| "learning_rate": 0.00020441458733205374, |
| "loss": 3.2910877227783204, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.069164265129683, |
| "grad_norm": 1.3676224946975708, |
| "learning_rate": 0.00020729366602687137, |
| "loss": 3.2979248046875, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07012487992315082, |
| "grad_norm": 1.508606195449829, |
| "learning_rate": 0.00021017274472168905, |
| "loss": 3.2947929382324217, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.07108549471661864, |
| "grad_norm": 1.4694204330444336, |
| "learning_rate": 0.00021305182341650668, |
| "loss": 3.290261077880859, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.07204610951008646, |
| "grad_norm": 1.629552960395813, |
| "learning_rate": 0.00021593090211132436, |
| "loss": 3.292864990234375, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.07300672430355427, |
| "grad_norm": 1.3059035539627075, |
| "learning_rate": 0.00021880998080614202, |
| "loss": 3.297530746459961, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.07396733909702209, |
| "grad_norm": 1.6734189987182617, |
| "learning_rate": 0.00022168905950095967, |
| "loss": 3.2937080383300783, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.07492795389048991, |
| "grad_norm": 1.9466580152511597, |
| "learning_rate": 0.00022456813819577733, |
| "loss": 3.2989707946777345, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.07588856868395773, |
| "grad_norm": 1.4589389562606812, |
| "learning_rate": 0.00022744721689059498, |
| "loss": 3.2969600677490236, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.07684918347742556, |
| "grad_norm": 1.273814082145691, |
| "learning_rate": 0.00023032629558541264, |
| "loss": 3.2937450408935547, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07780979827089338, |
| "grad_norm": 1.6920170783996582, |
| "learning_rate": 0.00023320537428023032, |
| "loss": 3.2936920166015624, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.0787704130643612, |
| "grad_norm": 1.7424761056900024, |
| "learning_rate": 0.00023608445297504794, |
| "loss": 3.29752197265625, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.07973102785782901, |
| "grad_norm": 1.4278359413146973, |
| "learning_rate": 0.00023896353166986563, |
| "loss": 3.2931358337402346, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.08069164265129683, |
| "grad_norm": 1.3596338033676147, |
| "learning_rate": 0.00024184261036468325, |
| "loss": 3.2951515197753904, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.08165225744476465, |
| "grad_norm": 1.4509109258651733, |
| "learning_rate": 0.00024472168905950096, |
| "loss": 3.2955284118652344, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.08261287223823247, |
| "grad_norm": 2.0564208030700684, |
| "learning_rate": 0.0002476007677543186, |
| "loss": 3.2958652496337892, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.08357348703170028, |
| "grad_norm": 1.80084228515625, |
| "learning_rate": 0.00025047984644913627, |
| "loss": 3.299582672119141, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0845341018251681, |
| "grad_norm": 1.2458295822143555, |
| "learning_rate": 0.0002533589251439539, |
| "loss": 3.2957221984863283, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.08549471661863593, |
| "grad_norm": 1.5943655967712402, |
| "learning_rate": 0.0002562380038387716, |
| "loss": 3.296315002441406, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.08645533141210375, |
| "grad_norm": 1.4959216117858887, |
| "learning_rate": 0.00025911708253358924, |
| "loss": 3.2936065673828123, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08741594620557157, |
| "grad_norm": 0.9593575596809387, |
| "learning_rate": 0.0002619961612284069, |
| "loss": 3.2923255920410157, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.08837656099903939, |
| "grad_norm": 1.7525019645690918, |
| "learning_rate": 0.00026487523992322454, |
| "loss": 3.297338104248047, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0893371757925072, |
| "grad_norm": 1.5253691673278809, |
| "learning_rate": 0.0002677543186180422, |
| "loss": 3.29698486328125, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.09029779058597502, |
| "grad_norm": 1.7394542694091797, |
| "learning_rate": 0.00027063339731285985, |
| "loss": 3.2963401794433596, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.09125840537944284, |
| "grad_norm": 1.3029968738555908, |
| "learning_rate": 0.0002735124760076775, |
| "loss": 3.2992328643798827, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.09221902017291066, |
| "grad_norm": 1.3961071968078613, |
| "learning_rate": 0.00027639155470249516, |
| "loss": 3.3040702819824217, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.09317963496637849, |
| "grad_norm": 0.9128854274749756, |
| "learning_rate": 0.0002792706333973128, |
| "loss": 3.2994720458984377, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.0941402497598463, |
| "grad_norm": 1.39821195602417, |
| "learning_rate": 0.00028214971209213047, |
| "loss": 3.3009670257568358, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.09510086455331412, |
| "grad_norm": 1.3767975568771362, |
| "learning_rate": 0.0002850287907869481, |
| "loss": 3.298174285888672, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.09606147934678194, |
| "grad_norm": 1.5919218063354492, |
| "learning_rate": 0.0002879078694817658, |
| "loss": 3.2930580139160157, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09702209414024976, |
| "grad_norm": 2.060764789581299, |
| "learning_rate": 0.0002907869481765835, |
| "loss": 3.3007606506347655, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.09798270893371758, |
| "grad_norm": 1.864414095878601, |
| "learning_rate": 0.0002936660268714011, |
| "loss": 3.302916717529297, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0989433237271854, |
| "grad_norm": 1.410025954246521, |
| "learning_rate": 0.0002965451055662188, |
| "loss": 3.297088623046875, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.09990393852065321, |
| "grad_norm": 1.4724565744400024, |
| "learning_rate": 0.0002994241842610364, |
| "loss": 3.2969940185546873, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.10086455331412104, |
| "grad_norm": 1.1308706998825073, |
| "learning_rate": 0.0002999998788910666, |
| "loss": 3.295259475708008, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.10182516810758886, |
| "grad_norm": 2.0092132091522217, |
| "learning_rate": 0.00029999938688636, |
| "loss": 3.3064605712890627, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.10278578290105668, |
| "grad_norm": 1.5756242275238037, |
| "learning_rate": 0.0002999985164178122, |
| "loss": 3.302138900756836, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.1037463976945245, |
| "grad_norm": 1.6854816675186157, |
| "learning_rate": 0.00029999726748761955, |
| "loss": 3.29990234375, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.10470701248799232, |
| "grad_norm": 1.5925079584121704, |
| "learning_rate": 0.0002999956400989331, |
| "loss": 3.3017444610595703, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.10566762728146013, |
| "grad_norm": 0.9805465340614319, |
| "learning_rate": 0.00029999363425585907, |
| "loss": 3.2947647094726564, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.10662824207492795, |
| "grad_norm": 1.143500804901123, |
| "learning_rate": 0.0002999912499634584, |
| "loss": 3.3032691955566404, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.10758885686839577, |
| "grad_norm": 1.57884681224823, |
| "learning_rate": 0.00029998848722774676, |
| "loss": 3.3033897399902346, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.10854947166186359, |
| "grad_norm": 1.9447771310806274, |
| "learning_rate": 0.00029998534605569496, |
| "loss": 3.2998809814453125, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.10951008645533142, |
| "grad_norm": 1.4967366456985474, |
| "learning_rate": 0.0002999818264552284, |
| "loss": 3.2998199462890625, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.11047070124879924, |
| "grad_norm": 1.5965176820755005, |
| "learning_rate": 0.0002999779284352275, |
| "loss": 3.2984855651855467, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.11143131604226705, |
| "grad_norm": 1.3109067678451538, |
| "learning_rate": 0.0002999736520055273, |
| "loss": 3.293305206298828, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.11239193083573487, |
| "grad_norm": 1.374250054359436, |
| "learning_rate": 0.00029996899717691766, |
| "loss": 3.2990135192871093, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.11335254562920269, |
| "grad_norm": 1.8241665363311768, |
| "learning_rate": 0.0002999639639611432, |
| "loss": 3.2949737548828124, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1143131604226705, |
| "grad_norm": 1.1245280504226685, |
| "learning_rate": 0.00029995855237090327, |
| "loss": 3.3007225036621093, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.11527377521613832, |
| "grad_norm": 1.290839433670044, |
| "learning_rate": 0.0002999527624198518, |
| "loss": 3.2996570587158205, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.11623439000960614, |
| "grad_norm": 1.100143551826477, |
| "learning_rate": 0.00029994659412259746, |
| "loss": 3.2947521209716797, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.11719500480307397, |
| "grad_norm": 1.6502522230148315, |
| "learning_rate": 0.0002999400474947036, |
| "loss": 3.295918273925781, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.11815561959654179, |
| "grad_norm": 1.4078381061553955, |
| "learning_rate": 0.0002999331225526879, |
| "loss": 3.299440383911133, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.11911623439000961, |
| "grad_norm": 1.4280340671539307, |
| "learning_rate": 0.0002999258193140227, |
| "loss": 3.296113967895508, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.12007684918347743, |
| "grad_norm": 1.210864543914795, |
| "learning_rate": 0.00029991813779713485, |
| "loss": 3.292288970947266, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.12103746397694524, |
| "grad_norm": 1.7756609916687012, |
| "learning_rate": 0.0002999100780214057, |
| "loss": 3.291936492919922, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.12199807877041306, |
| "grad_norm": 2.3280141353607178, |
| "learning_rate": 0.0002999016400071708, |
| "loss": 3.2992881774902343, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.12295869356388088, |
| "grad_norm": 1.2652477025985718, |
| "learning_rate": 0.00029989282377572006, |
| "loss": 3.302785110473633, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1239193083573487, |
| "grad_norm": 1.1368358135223389, |
| "learning_rate": 0.00029988362934929786, |
| "loss": 3.298853302001953, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.12487992315081652, |
| "grad_norm": 1.5556303262710571, |
| "learning_rate": 0.0002998740567511026, |
| "loss": 3.2962833404541017, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.12584053794428435, |
| "grad_norm": 1.2470626831054688, |
| "learning_rate": 0.000299864106005287, |
| "loss": 3.296977996826172, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.12680115273775217, |
| "grad_norm": 1.8531115055084229, |
| "learning_rate": 0.00029985377713695775, |
| "loss": 3.294302749633789, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.12776176753121998, |
| "grad_norm": 1.524565577507019, |
| "learning_rate": 0.0002998430701721757, |
| "loss": 3.2964263916015626, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1287223823246878, |
| "grad_norm": 1.3688217401504517, |
| "learning_rate": 0.00029983198513795556, |
| "loss": 3.2987823486328125, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.12968299711815562, |
| "grad_norm": 1.3249455690383911, |
| "learning_rate": 0.000299820522062266, |
| "loss": 3.293718719482422, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.13064361191162344, |
| "grad_norm": 1.0875604152679443, |
| "learning_rate": 0.0002998086809740296, |
| "loss": 3.294192886352539, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.13160422670509125, |
| "grad_norm": 1.4566972255706787, |
| "learning_rate": 0.00029979646190312264, |
| "loss": 3.2981857299804687, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.13256484149855907, |
| "grad_norm": 1.506569743156433, |
| "learning_rate": 0.0002997838648803751, |
| "loss": 3.2970245361328123, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1335254562920269, |
| "grad_norm": 1.2062523365020752, |
| "learning_rate": 0.00029977088993757045, |
| "loss": 3.294635772705078, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1344860710854947, |
| "grad_norm": 1.4316554069519043, |
| "learning_rate": 0.0002997575371074458, |
| "loss": 3.294428253173828, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.13544668587896252, |
| "grad_norm": 1.0954806804656982, |
| "learning_rate": 0.0002997438064236919, |
| "loss": 3.2926597595214844, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.13640730067243034, |
| "grad_norm": 1.2398245334625244, |
| "learning_rate": 0.0002997296979209526, |
| "loss": 3.293048858642578, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1373679154658982, |
| "grad_norm": 1.7894244194030762, |
| "learning_rate": 0.00029971521163482494, |
| "loss": 3.2915740966796876, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.138328530259366, |
| "grad_norm": 1.1798651218414307, |
| "learning_rate": 0.0002997003476018595, |
| "loss": 3.2928890228271483, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.13928914505283382, |
| "grad_norm": 1.4055883884429932, |
| "learning_rate": 0.0002996851058595596, |
| "loss": 3.295462417602539, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.14024975984630164, |
| "grad_norm": 1.3189594745635986, |
| "learning_rate": 0.00029966948644638184, |
| "loss": 3.2915382385253906, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.14121037463976946, |
| "grad_norm": 2.0678322315216064, |
| "learning_rate": 0.0002996534894017356, |
| "loss": 3.2929306030273438, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.14217098943323728, |
| "grad_norm": 1.0185853242874146, |
| "learning_rate": 0.000299637114765983, |
| "loss": 3.2973453521728517, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1431316042267051, |
| "grad_norm": 1.1556357145309448, |
| "learning_rate": 0.00029962036258043893, |
| "loss": 3.2935462951660157, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1440922190201729, |
| "grad_norm": 1.392240285873413, |
| "learning_rate": 0.00029960323288737097, |
| "loss": 3.2977447509765625, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.14505283381364073, |
| "grad_norm": 1.6778427362442017, |
| "learning_rate": 0.000299585725729999, |
| "loss": 3.2953746795654295, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.14601344860710855, |
| "grad_norm": 0.8692209720611572, |
| "learning_rate": 0.00029956784115249535, |
| "loss": 3.2932708740234373, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.14697406340057637, |
| "grad_norm": 1.0066865682601929, |
| "learning_rate": 0.0002995495791999847, |
| "loss": 3.291333770751953, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.14793467819404418, |
| "grad_norm": 1.1920307874679565, |
| "learning_rate": 0.0002995309399185439, |
| "loss": 3.2948654174804686, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.148895292987512, |
| "grad_norm": 0.9732314348220825, |
| "learning_rate": 0.0002995119233552016, |
| "loss": 3.2924430847167967, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.14985590778097982, |
| "grad_norm": 1.306164264678955, |
| "learning_rate": 0.0002994925295579386, |
| "loss": 3.2954177856445312, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.15081652257444764, |
| "grad_norm": 1.2705974578857422, |
| "learning_rate": 0.0002994727585756875, |
| "loss": 3.291736602783203, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.15177713736791545, |
| "grad_norm": 1.4319415092468262, |
| "learning_rate": 0.00029945261045833256, |
| "loss": 3.2872940063476563, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.15273775216138327, |
| "grad_norm": 1.5740435123443604, |
| "learning_rate": 0.0002994320852567094, |
| "loss": 3.2921607971191404, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.15369836695485112, |
| "grad_norm": 1.2682417631149292, |
| "learning_rate": 0.0002994111830226053, |
| "loss": 3.2847610473632813, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.15465898174831894, |
| "grad_norm": 1.3516907691955566, |
| "learning_rate": 0.0002993899038087587, |
| "loss": 3.2877117156982423, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.15561959654178675, |
| "grad_norm": 1.1541163921356201, |
| "learning_rate": 0.00029936824766885927, |
| "loss": 3.28739013671875, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.15658021133525457, |
| "grad_norm": 1.4818735122680664, |
| "learning_rate": 0.0002993462146575477, |
| "loss": 3.2846580505371095, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.1575408261287224, |
| "grad_norm": 1.0636875629425049, |
| "learning_rate": 0.00029932380483041547, |
| "loss": 3.2877876281738283, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1585014409221902, |
| "grad_norm": 1.1833419799804688, |
| "learning_rate": 0.00029930101824400486, |
| "loss": 3.289693069458008, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.15946205571565802, |
| "grad_norm": 1.19170343875885, |
| "learning_rate": 0.00029927785495580887, |
| "loss": 3.283536911010742, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.16042267050912584, |
| "grad_norm": 1.074803352355957, |
| "learning_rate": 0.00029925431502427077, |
| "loss": 3.2888442993164064, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.16138328530259366, |
| "grad_norm": 1.205644130706787, |
| "learning_rate": 0.00029923039850878423, |
| "loss": 3.2893348693847657, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.16234390009606148, |
| "grad_norm": 1.6655492782592773, |
| "learning_rate": 0.00029920610546969314, |
| "loss": 3.2857383728027343, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.1633045148895293, |
| "grad_norm": 1.3908863067626953, |
| "learning_rate": 0.0002991814359682912, |
| "loss": 3.2931854248046877, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1642651296829971, |
| "grad_norm": 1.3854031562805176, |
| "learning_rate": 0.00029915639006682223, |
| "loss": 3.291329193115234, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.16522574447646493, |
| "grad_norm": 1.3702648878097534, |
| "learning_rate": 0.0002991309678284795, |
| "loss": 3.283129119873047, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.16618635926993275, |
| "grad_norm": 1.3857735395431519, |
| "learning_rate": 0.000299105169317406, |
| "loss": 3.285200500488281, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.16714697406340057, |
| "grad_norm": 1.0670974254608154, |
| "learning_rate": 0.000299078994598694, |
| "loss": 3.287001037597656, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.16810758885686838, |
| "grad_norm": 1.2670496702194214, |
| "learning_rate": 0.000299052443738385, |
| "loss": 3.28797607421875, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.1690682036503362, |
| "grad_norm": 1.4439146518707275, |
| "learning_rate": 0.00029902551680346956, |
| "loss": 3.2858448028564453, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.17002881844380405, |
| "grad_norm": 1.4315037727355957, |
| "learning_rate": 0.000298998213861887, |
| "loss": 3.28741340637207, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.17098943323727187, |
| "grad_norm": 1.227054238319397, |
| "learning_rate": 0.0002989705349825256, |
| "loss": 3.285395050048828, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.17195004803073968, |
| "grad_norm": 1.9081028699874878, |
| "learning_rate": 0.0002989424802352218, |
| "loss": 3.2881591796875, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.1729106628242075, |
| "grad_norm": 1.5766661167144775, |
| "learning_rate": 0.0002989140496907609, |
| "loss": 3.282776641845703, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.17387127761767532, |
| "grad_norm": 1.3651994466781616, |
| "learning_rate": 0.00029888524342087583, |
| "loss": 3.288460540771484, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.17483189241114314, |
| "grad_norm": 1.1733394861221313, |
| "learning_rate": 0.00029885606149824786, |
| "loss": 3.2853572845458983, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.17579250720461095, |
| "grad_norm": 1.798338770866394, |
| "learning_rate": 0.00029882650399650596, |
| "loss": 3.2855796813964844, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.17675312199807877, |
| "grad_norm": 1.5101557970046997, |
| "learning_rate": 0.00029879657099022677, |
| "loss": 3.2913845062255858, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1777137367915466, |
| "grad_norm": 1.1141709089279175, |
| "learning_rate": 0.0002987662625549343, |
| "loss": 3.2880714416503904, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.1786743515850144, |
| "grad_norm": 1.4672577381134033, |
| "learning_rate": 0.00029873557876709984, |
| "loss": 3.2870162963867187, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.17963496637848222, |
| "grad_norm": 1.2008938789367676, |
| "learning_rate": 0.00029870451970414176, |
| "loss": 3.281757354736328, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.18059558117195004, |
| "grad_norm": 1.4659417867660522, |
| "learning_rate": 0.00029867308544442525, |
| "loss": 3.2835357666015623, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.18155619596541786, |
| "grad_norm": 1.200221300125122, |
| "learning_rate": 0.0002986412760672622, |
| "loss": 3.2824310302734374, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.18251681075888568, |
| "grad_norm": 1.1019620895385742, |
| "learning_rate": 0.0002986090916529109, |
| "loss": 3.2842033386230467, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1834774255523535, |
| "grad_norm": 1.3469750881195068, |
| "learning_rate": 0.00029857653228257587, |
| "loss": 3.285472106933594, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.1844380403458213, |
| "grad_norm": 1.3108643293380737, |
| "learning_rate": 0.0002985435980384078, |
| "loss": 3.285649871826172, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.18539865513928913, |
| "grad_norm": 1.452244758605957, |
| "learning_rate": 0.0002985102890035032, |
| "loss": 3.2803878784179688, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.18635926993275698, |
| "grad_norm": 1.229927897453308, |
| "learning_rate": 0.0002984766052619041, |
| "loss": 3.283854675292969, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1873198847262248, |
| "grad_norm": 1.1009323596954346, |
| "learning_rate": 0.0002984425468985982, |
| "loss": 3.2806129455566406, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.1882804995196926, |
| "grad_norm": 1.5917284488677979, |
| "learning_rate": 0.00029840811399951806, |
| "loss": 3.282550048828125, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.18924111431316043, |
| "grad_norm": 1.1452780961990356, |
| "learning_rate": 0.00029837330665154156, |
| "loss": 3.2815521240234373, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.19020172910662825, |
| "grad_norm": 0.9764254093170166, |
| "learning_rate": 0.000298338124942491, |
| "loss": 3.284914016723633, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.19116234390009607, |
| "grad_norm": 1.045482873916626, |
| "learning_rate": 0.0002983025689611337, |
| "loss": 3.2865413665771483, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.19212295869356388, |
| "grad_norm": 1.3126083612442017, |
| "learning_rate": 0.0002982666387971809, |
| "loss": 3.281140899658203, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1930835734870317, |
| "grad_norm": 1.3957022428512573, |
| "learning_rate": 0.0002982303345412881, |
| "loss": 3.2785770416259767, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.19404418828049952, |
| "grad_norm": 1.2035447359085083, |
| "learning_rate": 0.0002981936562850547, |
| "loss": 3.279229736328125, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.19500480307396734, |
| "grad_norm": 0.8585781455039978, |
| "learning_rate": 0.00029815660412102366, |
| "loss": 3.2802391052246094, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.19596541786743515, |
| "grad_norm": 1.203058123588562, |
| "learning_rate": 0.00029811917814268144, |
| "loss": 3.2790596008300783, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.19692603266090297, |
| "grad_norm": 1.4647698402404785, |
| "learning_rate": 0.0002980813784444576, |
| "loss": 3.2819541931152343, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.1978866474543708, |
| "grad_norm": 1.4942032098770142, |
| "learning_rate": 0.0002980432051217246, |
| "loss": 3.2802181243896484, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.1988472622478386, |
| "grad_norm": 1.1856545209884644, |
| "learning_rate": 0.0002980046582707978, |
| "loss": 3.290009307861328, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.19980787704130643, |
| "grad_norm": 1.166818380355835, |
| "learning_rate": 0.00029796573798893464, |
| "loss": 3.28170166015625, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.20076849183477424, |
| "grad_norm": 1.310017466545105, |
| "learning_rate": 0.0002979264443743352, |
| "loss": 3.282215118408203, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2017291066282421, |
| "grad_norm": 1.7260593175888062, |
| "learning_rate": 0.00029788677752614116, |
| "loss": 3.2847213745117188, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2026897214217099, |
| "grad_norm": 1.083449125289917, |
| "learning_rate": 0.0002978467375444361, |
| "loss": 3.278899002075195, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.20365033621517772, |
| "grad_norm": 1.2556031942367554, |
| "learning_rate": 0.000297806324530245, |
| "loss": 3.280004119873047, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.20461095100864554, |
| "grad_norm": 0.8801208138465881, |
| "learning_rate": 0.00029776553858553405, |
| "loss": 3.2757003784179686, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.20557156580211336, |
| "grad_norm": 1.419739007949829, |
| "learning_rate": 0.0002977243798132103, |
| "loss": 3.2807285308837892, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.20653218059558118, |
| "grad_norm": 1.3385144472122192, |
| "learning_rate": 0.00029768284831712175, |
| "loss": 3.280821990966797, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.207492795389049, |
| "grad_norm": 1.0410194396972656, |
| "learning_rate": 0.0002976409442020564, |
| "loss": 3.279698944091797, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2084534101825168, |
| "grad_norm": 1.2109322547912598, |
| "learning_rate": 0.0002975986675737427, |
| "loss": 3.2776737213134766, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.20941402497598463, |
| "grad_norm": 1.598090648651123, |
| "learning_rate": 0.00029755601853884893, |
| "loss": 3.277222442626953, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.21037463976945245, |
| "grad_norm": 1.0197858810424805, |
| "learning_rate": 0.000297512997204983, |
| "loss": 3.2803329467773437, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.21133525456292027, |
| "grad_norm": 0.9741032123565674, |
| "learning_rate": 0.0002974696036806922, |
| "loss": 3.2853607177734374, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.21229586935638808, |
| "grad_norm": 1.3451876640319824, |
| "learning_rate": 0.00029742583807546274, |
| "loss": 3.276333236694336, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.2132564841498559, |
| "grad_norm": 1.2260080575942993, |
| "learning_rate": 0.00029738170049971974, |
| "loss": 3.2778556823730467, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.21421709894332372, |
| "grad_norm": 1.4238629341125488, |
| "learning_rate": 0.00029733719106482694, |
| "loss": 3.27607421875, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.21517771373679154, |
| "grad_norm": 1.0617904663085938, |
| "learning_rate": 0.0002972923098830861, |
| "loss": 3.278319549560547, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.21613832853025935, |
| "grad_norm": 1.3632889986038208, |
| "learning_rate": 0.0002972470570677371, |
| "loss": 3.2795860290527346, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.21709894332372717, |
| "grad_norm": 1.2691421508789062, |
| "learning_rate": 0.00029720143273295735, |
| "loss": 3.277021026611328, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.21805955811719502, |
| "grad_norm": 1.2249642610549927, |
| "learning_rate": 0.0002971554369938619, |
| "loss": 3.274774169921875, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.21902017291066284, |
| "grad_norm": 1.6009410619735718, |
| "learning_rate": 0.0002971090699665025, |
| "loss": 3.2758060455322267, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.21998078770413065, |
| "grad_norm": 1.1862974166870117, |
| "learning_rate": 0.0002970623317678681, |
| "loss": 3.278969955444336, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.22094140249759847, |
| "grad_norm": 1.1896966695785522, |
| "learning_rate": 0.00029701522251588395, |
| "loss": 3.277644729614258, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2219020172910663, |
| "grad_norm": 1.2047041654586792, |
| "learning_rate": 0.00029696774232941135, |
| "loss": 3.276856231689453, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2228626320845341, |
| "grad_norm": 1.4511812925338745, |
| "learning_rate": 0.0002969198913282479, |
| "loss": 3.2762985229492188, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.22382324687800192, |
| "grad_norm": 1.0419248342514038, |
| "learning_rate": 0.0002968716696331265, |
| "loss": 3.2781463623046876, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.22478386167146974, |
| "grad_norm": 1.0537165403366089, |
| "learning_rate": 0.00029682307736571533, |
| "loss": 3.280584716796875, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.22574447646493756, |
| "grad_norm": 0.9234645366668701, |
| "learning_rate": 0.00029677411464861783, |
| "loss": 3.2776565551757812, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.22670509125840538, |
| "grad_norm": 1.0644463300704956, |
| "learning_rate": 0.00029672478160537184, |
| "loss": 3.2758762359619142, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2276657060518732, |
| "grad_norm": 0.9237310290336609, |
| "learning_rate": 0.00029667507836044984, |
| "loss": 3.2722846984863283, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.228626320845341, |
| "grad_norm": 1.5153244733810425, |
| "learning_rate": 0.00029662500503925803, |
| "loss": 3.2741416931152343, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.22958693563880883, |
| "grad_norm": 1.3842270374298096, |
| "learning_rate": 0.00029657456176813664, |
| "loss": 3.2764862060546873, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.23054755043227665, |
| "grad_norm": 1.1134364604949951, |
| "learning_rate": 0.0002965237486743592, |
| "loss": 3.275590515136719, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.23150816522574447, |
| "grad_norm": 1.4811969995498657, |
| "learning_rate": 0.0002964725658861323, |
| "loss": 3.2718353271484375, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.23246878001921228, |
| "grad_norm": 1.2888314723968506, |
| "learning_rate": 0.0002964210135325955, |
| "loss": 3.2731605529785157, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2334293948126801, |
| "grad_norm": 1.530621886253357, |
| "learning_rate": 0.0002963690917438206, |
| "loss": 3.2751121520996094, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.23439000960614795, |
| "grad_norm": 1.0951229333877563, |
| "learning_rate": 0.0002963168006508116, |
| "loss": 3.2797927856445312, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.23535062439961577, |
| "grad_norm": 1.0309784412384033, |
| "learning_rate": 0.00029626414038550424, |
| "loss": 3.279781723022461, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.23631123919308358, |
| "grad_norm": 1.211702823638916, |
| "learning_rate": 0.00029621111108076584, |
| "loss": 3.276560592651367, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2372718539865514, |
| "grad_norm": 1.0704128742218018, |
| "learning_rate": 0.0002961577128703948, |
| "loss": 3.2736587524414062, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.23823246878001922, |
| "grad_norm": 1.4519094228744507, |
| "learning_rate": 0.0002961039458891202, |
| "loss": 3.274006652832031, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.23919308357348704, |
| "grad_norm": 1.2808977365493774, |
| "learning_rate": 0.00029604981027260167, |
| "loss": 3.27347412109375, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.24015369836695485, |
| "grad_norm": 0.8584306836128235, |
| "learning_rate": 0.00029599530615742896, |
| "loss": 3.2714912414550783, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.24111431316042267, |
| "grad_norm": 1.1275231838226318, |
| "learning_rate": 0.0002959404336811215, |
| "loss": 3.2730926513671874, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.2420749279538905, |
| "grad_norm": 1.422745943069458, |
| "learning_rate": 0.0002958851929821283, |
| "loss": 3.27635498046875, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2430355427473583, |
| "grad_norm": 1.0575655698776245, |
| "learning_rate": 0.00029582958419982713, |
| "loss": 3.272149658203125, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.24399615754082613, |
| "grad_norm": 1.2576707601547241, |
| "learning_rate": 0.0002957736074745248, |
| "loss": 3.275560760498047, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.24495677233429394, |
| "grad_norm": 1.328376054763794, |
| "learning_rate": 0.0002957172629474562, |
| "loss": 3.272699737548828, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.24591738712776176, |
| "grad_norm": 1.0527936220169067, |
| "learning_rate": 0.00029566055076078447, |
| "loss": 3.269004058837891, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.24687800192122958, |
| "grad_norm": 1.1557375192642212, |
| "learning_rate": 0.00029560347105760023, |
| "loss": 3.2709724426269533, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.2478386167146974, |
| "grad_norm": 1.1958216428756714, |
| "learning_rate": 0.00029554602398192137, |
| "loss": 3.276508331298828, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.24879923150816521, |
| "grad_norm": 1.0567305088043213, |
| "learning_rate": 0.00029548820967869287, |
| "loss": 3.270316314697266, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.24975984630163303, |
| "grad_norm": 1.1860175132751465, |
| "learning_rate": 0.00029543002829378605, |
| "loss": 3.274994659423828, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2507204610951009, |
| "grad_norm": 1.2902470827102661, |
| "learning_rate": 0.0002953714799739986, |
| "loss": 3.2740650177001953, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.2516810758885687, |
| "grad_norm": 1.0249444246292114, |
| "learning_rate": 0.0002953125648670538, |
| "loss": 3.273143768310547, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2526416906820365, |
| "grad_norm": 1.051010251045227, |
| "learning_rate": 0.00029525328312160064, |
| "loss": 3.270255279541016, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.25360230547550433, |
| "grad_norm": 1.5545085668563843, |
| "learning_rate": 0.00029519363488721297, |
| "loss": 3.270277404785156, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.25456292026897215, |
| "grad_norm": 1.096474289894104, |
| "learning_rate": 0.0002951336203143895, |
| "loss": 3.2695423126220704, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.25552353506243997, |
| "grad_norm": 1.3266576528549194, |
| "learning_rate": 0.0002950732395545531, |
| "loss": 3.268444061279297, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.2564841498559078, |
| "grad_norm": 1.3308000564575195, |
| "learning_rate": 0.0002950124927600505, |
| "loss": 3.270121765136719, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.2574447646493756, |
| "grad_norm": 0.9661091566085815, |
| "learning_rate": 0.0002949513800841523, |
| "loss": 3.2689208984375, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.2584053794428434, |
| "grad_norm": 1.0783677101135254, |
| "learning_rate": 0.0002948899016810519, |
| "loss": 3.2726470947265627, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.25936599423631124, |
| "grad_norm": 1.44166898727417, |
| "learning_rate": 0.00029482805770586566, |
| "loss": 3.2676158905029298, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.26032660902977905, |
| "grad_norm": 0.9048498868942261, |
| "learning_rate": 0.0002947658483146323, |
| "loss": 3.268976593017578, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.2612872238232469, |
| "grad_norm": 0.9976827502250671, |
| "learning_rate": 0.00029470327366431257, |
| "loss": 3.2651641845703123, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.2622478386167147, |
| "grad_norm": 1.4668549299240112, |
| "learning_rate": 0.00029464033391278857, |
| "loss": 3.267919158935547, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.2632084534101825, |
| "grad_norm": 1.3420720100402832, |
| "learning_rate": 0.0002945770292188638, |
| "loss": 3.2697959899902345, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.2641690682036503, |
| "grad_norm": 1.3224592208862305, |
| "learning_rate": 0.00029451335974226255, |
| "loss": 3.2723613739013673, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.26512968299711814, |
| "grad_norm": 1.1924461126327515, |
| "learning_rate": 0.0002944493256436294, |
| "loss": 3.270499038696289, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.26609029779058596, |
| "grad_norm": 1.2803032398223877, |
| "learning_rate": 0.000294384927084529, |
| "loss": 3.269664001464844, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.2670509125840538, |
| "grad_norm": 1.0187225341796875, |
| "learning_rate": 0.0002943201642274455, |
| "loss": 3.2647491455078126, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.2680115273775216, |
| "grad_norm": 1.3384113311767578, |
| "learning_rate": 0.00029425503723578216, |
| "loss": 3.2721004486083984, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.2689721421709894, |
| "grad_norm": 1.2812883853912354, |
| "learning_rate": 0.00029418954627386115, |
| "loss": 3.269911193847656, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.26993275696445723, |
| "grad_norm": 1.1734299659729004, |
| "learning_rate": 0.0002941236915069228, |
| "loss": 3.2672080993652344, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.27089337175792505, |
| "grad_norm": 1.069024682044983, |
| "learning_rate": 0.00029405747310112557, |
| "loss": 3.2702381134033205, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.27185398655139287, |
| "grad_norm": 1.286638617515564, |
| "learning_rate": 0.0002939908912235452, |
| "loss": 3.2697906494140625, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.2728146013448607, |
| "grad_norm": 1.1734787225723267, |
| "learning_rate": 0.0002939239460421746, |
| "loss": 3.2664981842041017, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.2737752161383285, |
| "grad_norm": 1.150191307067871, |
| "learning_rate": 0.00029385663772592334, |
| "loss": 3.2677589416503907, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.2747358309317964, |
| "grad_norm": 1.3920680284500122, |
| "learning_rate": 0.00029378896644461716, |
| "loss": 3.2696159362792967, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.2756964457252642, |
| "grad_norm": 1.1501007080078125, |
| "learning_rate": 0.0002937209323689978, |
| "loss": 3.268462371826172, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.276657060518732, |
| "grad_norm": 1.2256152629852295, |
| "learning_rate": 0.00029365253567072207, |
| "loss": 3.269298553466797, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.27761767531219983, |
| "grad_norm": 1.2467095851898193, |
| "learning_rate": 0.00029358377652236186, |
| "loss": 3.2696155548095702, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.27857829010566765, |
| "grad_norm": 0.7786490321159363, |
| "learning_rate": 0.00029351465509740366, |
| "loss": 3.265594482421875, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.27953890489913547, |
| "grad_norm": 0.9412602186203003, |
| "learning_rate": 0.00029344517157024773, |
| "loss": 3.2679397583007814, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.2804995196926033, |
| "grad_norm": 1.1962471008300781, |
| "learning_rate": 0.0002933753261162084, |
| "loss": 3.276123046875, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.2814601344860711, |
| "grad_norm": 1.1592360734939575, |
| "learning_rate": 0.00029330511891151263, |
| "loss": 3.2704566955566405, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.2824207492795389, |
| "grad_norm": 0.8931114673614502, |
| "learning_rate": 0.00029323455013330064, |
| "loss": 3.2649574279785156, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.28338136407300674, |
| "grad_norm": 0.8980585932731628, |
| "learning_rate": 0.00029316361995962453, |
| "loss": 3.2683528900146483, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.28434197886647455, |
| "grad_norm": 1.330986499786377, |
| "learning_rate": 0.0002930923285694485, |
| "loss": 3.2716033935546873, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.28530259365994237, |
| "grad_norm": 1.0797970294952393, |
| "learning_rate": 0.000293020676142648, |
| "loss": 3.268834686279297, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.2862632084534102, |
| "grad_norm": 1.2156567573547363, |
| "learning_rate": 0.00029294866286000946, |
| "loss": 3.2620380401611326, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.287223823246878, |
| "grad_norm": 1.3845075368881226, |
| "learning_rate": 0.00029287628890322986, |
| "loss": 3.2627891540527343, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.2881844380403458, |
| "grad_norm": 1.2039449214935303, |
| "learning_rate": 0.00029280355445491596, |
| "loss": 3.2676162719726562, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.28914505283381364, |
| "grad_norm": 1.2778565883636475, |
| "learning_rate": 0.00029273045969858437, |
| "loss": 3.267996978759766, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.29010566762728146, |
| "grad_norm": 0.9892801642417908, |
| "learning_rate": 0.0002926570048186606, |
| "loss": 3.2660484313964844, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.2910662824207493, |
| "grad_norm": 1.0218427181243896, |
| "learning_rate": 0.00029258319000047885, |
| "loss": 3.269457244873047, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.2920268972142171, |
| "grad_norm": 1.0072425603866577, |
| "learning_rate": 0.00029250901543028144, |
| "loss": 3.267464828491211, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.2929875120076849, |
| "grad_norm": 1.4437980651855469, |
| "learning_rate": 0.00029243448129521847, |
| "loss": 3.2681037902832033, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.29394812680115273, |
| "grad_norm": 0.8555430769920349, |
| "learning_rate": 0.0002923595877833472, |
| "loss": 3.2668888092041017, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.29490874159462055, |
| "grad_norm": 1.2450693845748901, |
| "learning_rate": 0.0002922843350836317, |
| "loss": 3.2664356231689453, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.29586935638808837, |
| "grad_norm": 0.9553253650665283, |
| "learning_rate": 0.00029220872338594215, |
| "loss": 3.263041687011719, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2968299711815562, |
| "grad_norm": 1.2076423168182373, |
| "learning_rate": 0.0002921327528810547, |
| "loss": 3.266950988769531, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.297790585975024, |
| "grad_norm": 2.1191205978393555, |
| "learning_rate": 0.00029205642376065066, |
| "loss": 3.2656558990478515, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2987512007684918, |
| "grad_norm": 1.4229607582092285, |
| "learning_rate": 0.0002919797362173163, |
| "loss": 3.2715381622314452, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.29971181556195964, |
| "grad_norm": 1.2368606328964233, |
| "learning_rate": 0.00029190269044454206, |
| "loss": 3.2648460388183596, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.30067243035542746, |
| "grad_norm": 1.266961693763733, |
| "learning_rate": 0.00029182528663672245, |
| "loss": 3.2667465209960938, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.3016330451488953, |
| "grad_norm": 0.9238030314445496, |
| "learning_rate": 0.00029174752498915515, |
| "loss": 3.262348175048828, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.3025936599423631, |
| "grad_norm": 1.0845513343811035, |
| "learning_rate": 0.0002916694056980408, |
| "loss": 3.266324996948242, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3035542747358309, |
| "grad_norm": 1.050512433052063, |
| "learning_rate": 0.0002915909289604823, |
| "loss": 3.2667007446289062, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.3045148895292987, |
| "grad_norm": 1.25586998462677, |
| "learning_rate": 0.00029151209497448467, |
| "loss": 3.262495422363281, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.30547550432276654, |
| "grad_norm": 1.1228755712509155, |
| "learning_rate": 0.000291432903938954, |
| "loss": 3.26649169921875, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.30643611911623436, |
| "grad_norm": 1.2967936992645264, |
| "learning_rate": 0.0002913533560536975, |
| "loss": 3.264292526245117, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.30739673390970224, |
| "grad_norm": 1.1702841520309448, |
| "learning_rate": 0.00029127345151942253, |
| "loss": 3.2614276885986326, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.30835734870317005, |
| "grad_norm": 1.438833236694336, |
| "learning_rate": 0.00029119319053773655, |
| "loss": 3.2649909973144533, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.30931796349663787, |
| "grad_norm": 1.1444823741912842, |
| "learning_rate": 0.00029111257331114617, |
| "loss": 3.2618698120117187, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.3102785782901057, |
| "grad_norm": 1.2421047687530518, |
| "learning_rate": 0.00029103160004305693, |
| "loss": 3.260860061645508, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.3112391930835735, |
| "grad_norm": 1.3885422945022583, |
| "learning_rate": 0.00029095027093777276, |
| "loss": 3.2636131286621093, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3121998078770413, |
| "grad_norm": 1.403899073600769, |
| "learning_rate": 0.00029086858620049527, |
| "loss": 3.261060333251953, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.31316042267050914, |
| "grad_norm": 1.4193172454833984, |
| "learning_rate": 0.0002907865460373234, |
| "loss": 3.2587600708007813, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.31412103746397696, |
| "grad_norm": 0.9989815354347229, |
| "learning_rate": 0.00029070415065525295, |
| "loss": 3.2618736267089843, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.3150816522574448, |
| "grad_norm": 1.0307117700576782, |
| "learning_rate": 0.00029062140026217596, |
| "loss": 3.25679931640625, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3160422670509126, |
| "grad_norm": 1.3937115669250488, |
| "learning_rate": 0.0002905382950668801, |
| "loss": 3.2610740661621094, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.3170028818443804, |
| "grad_norm": 1.0371273756027222, |
| "learning_rate": 0.0002904548352790483, |
| "loss": 3.264708709716797, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.31796349663784823, |
| "grad_norm": 1.202774167060852, |
| "learning_rate": 0.0002903710211092582, |
| "loss": 3.261525344848633, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.31892411143131605, |
| "grad_norm": 1.2215559482574463, |
| "learning_rate": 0.0002902868527689816, |
| "loss": 3.264019012451172, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.31988472622478387, |
| "grad_norm": 0.8910490274429321, |
| "learning_rate": 0.0002902023304705837, |
| "loss": 3.262919616699219, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.3208453410182517, |
| "grad_norm": 1.35845148563385, |
| "learning_rate": 0.00029011745442732303, |
| "loss": 3.2655372619628906, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.3218059558117195, |
| "grad_norm": 1.354686975479126, |
| "learning_rate": 0.0002900322248533506, |
| "loss": 3.26285400390625, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.3227665706051873, |
| "grad_norm": 1.0969462394714355, |
| "learning_rate": 0.0002899466419637093, |
| "loss": 3.257940673828125, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.32372718539865514, |
| "grad_norm": 1.291715145111084, |
| "learning_rate": 0.00028986070597433354, |
| "loss": 3.2633174896240233, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.32468780019212296, |
| "grad_norm": 1.4123157262802124, |
| "learning_rate": 0.00028977441710204867, |
| "loss": 3.2593170166015626, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3256484149855908, |
| "grad_norm": 1.1881605386734009, |
| "learning_rate": 0.0002896877755645703, |
| "loss": 3.2590873718261717, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.3266090297790586, |
| "grad_norm": 1.043522596359253, |
| "learning_rate": 0.00028960078158050403, |
| "loss": 3.258655548095703, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3275696445725264, |
| "grad_norm": 1.0990368127822876, |
| "learning_rate": 0.0002895134353693445, |
| "loss": 3.258600616455078, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3285302593659942, |
| "grad_norm": 1.4628812074661255, |
| "learning_rate": 0.0002894257371514752, |
| "loss": 3.262346649169922, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.32949087415946204, |
| "grad_norm": 0.8175418376922607, |
| "learning_rate": 0.0002893376871481678, |
| "loss": 3.255454254150391, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.33045148895292986, |
| "grad_norm": 1.0393476486206055, |
| "learning_rate": 0.0002892492855815814, |
| "loss": 3.2613197326660157, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.3314121037463977, |
| "grad_norm": 1.250258445739746, |
| "learning_rate": 0.0002891605326747622, |
| "loss": 3.260877227783203, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.3323727185398655, |
| "grad_norm": 0.8695818781852722, |
| "learning_rate": 0.00028907142865164305, |
| "loss": 3.259082794189453, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.0892332792282104, |
| "learning_rate": 0.00028898197373704234, |
| "loss": 3.261730194091797, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.33429394812680113, |
| "grad_norm": 1.2874714136123657, |
| "learning_rate": 0.00028889216815666406, |
| "loss": 3.2563186645507813, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.33525456292026895, |
| "grad_norm": 1.2988804578781128, |
| "learning_rate": 0.000288802012137097, |
| "loss": 3.2569385528564454, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.33621517771373677, |
| "grad_norm": 1.2552379369735718, |
| "learning_rate": 0.0002887115059058139, |
| "loss": 3.2592254638671876, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.3371757925072046, |
| "grad_norm": 0.9785555601119995, |
| "learning_rate": 0.0002886206496911714, |
| "loss": 3.257631301879883, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.3381364073006724, |
| "grad_norm": 1.3783308267593384, |
| "learning_rate": 0.00028852944372240896, |
| "loss": 3.263458251953125, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.3390970220941403, |
| "grad_norm": 1.2814546823501587, |
| "learning_rate": 0.00028843788822964864, |
| "loss": 3.2578887939453125, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.3400576368876081, |
| "grad_norm": 1.2692376375198364, |
| "learning_rate": 0.00028834598344389437, |
| "loss": 3.2599555969238283, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3410182516810759, |
| "grad_norm": 1.3274823427200317, |
| "learning_rate": 0.0002882537295970312, |
| "loss": 3.261073684692383, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.34197886647454373, |
| "grad_norm": 1.1028993129730225, |
| "learning_rate": 0.00028816112692182526, |
| "loss": 3.2600685119628907, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.34293948126801155, |
| "grad_norm": 1.0940430164337158, |
| "learning_rate": 0.0002880681756519224, |
| "loss": 3.2631156921386717, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.34390009606147937, |
| "grad_norm": 1.1741334199905396, |
| "learning_rate": 0.0002879748760218484, |
| "loss": 3.2562606811523436, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3448607108549472, |
| "grad_norm": 1.4189056158065796, |
| "learning_rate": 0.0002878812282670077, |
| "loss": 3.2629486083984376, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.345821325648415, |
| "grad_norm": 0.9202475547790527, |
| "learning_rate": 0.00028778723262368313, |
| "loss": 3.258461761474609, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3467819404418828, |
| "grad_norm": 1.0866518020629883, |
| "learning_rate": 0.00028769288932903547, |
| "loss": 3.2561798095703125, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.34774255523535064, |
| "grad_norm": 1.1435341835021973, |
| "learning_rate": 0.00028759819862110244, |
| "loss": 3.253205108642578, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.34870317002881845, |
| "grad_norm": 0.9997341632843018, |
| "learning_rate": 0.00028750316073879845, |
| "loss": 3.256957244873047, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.34966378482228627, |
| "grad_norm": 1.5304245948791504, |
| "learning_rate": 0.0002874077759219138, |
| "loss": 3.253832244873047, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.3506243996157541, |
| "grad_norm": 1.3517911434173584, |
| "learning_rate": 0.0002873120444111142, |
| "loss": 3.2599609375, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.3515850144092219, |
| "grad_norm": 1.1517263650894165, |
| "learning_rate": 0.0002872159664479401, |
| "loss": 3.253156280517578, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.3525456292026897, |
| "grad_norm": 1.2207057476043701, |
| "learning_rate": 0.00028711954227480596, |
| "loss": 3.255885696411133, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.35350624399615754, |
| "grad_norm": 1.4356719255447388, |
| "learning_rate": 0.00028702277213499993, |
| "loss": 3.2568824768066404, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.35446685878962536, |
| "grad_norm": 0.9663187265396118, |
| "learning_rate": 0.000286925656272683, |
| "loss": 3.2607173919677734, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.3554274735830932, |
| "grad_norm": 1.0548568964004517, |
| "learning_rate": 0.0002868281949328884, |
| "loss": 3.254198455810547, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.356388088376561, |
| "grad_norm": 1.216261386871338, |
| "learning_rate": 0.0002867303883615212, |
| "loss": 3.25797119140625, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.3573487031700288, |
| "grad_norm": 0.9959939122200012, |
| "learning_rate": 0.00028663223680535727, |
| "loss": 3.2549095153808594, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.35830931796349663, |
| "grad_norm": 1.0812616348266602, |
| "learning_rate": 0.0002865337405120432, |
| "loss": 3.2560733795166015, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.35926993275696445, |
| "grad_norm": 1.2685303688049316, |
| "learning_rate": 0.0002864348997300951, |
| "loss": 3.2555168151855467, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.36023054755043227, |
| "grad_norm": 1.0879360437393188, |
| "learning_rate": 0.0002863357147088985, |
| "loss": 3.255245590209961, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.3611911623439001, |
| "grad_norm": 1.0502004623413086, |
| "learning_rate": 0.0002862361856987073, |
| "loss": 3.260142517089844, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.3621517771373679, |
| "grad_norm": 1.2763992547988892, |
| "learning_rate": 0.0002861363129506435, |
| "loss": 3.2576175689697267, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.3631123919308357, |
| "grad_norm": 1.2673193216323853, |
| "learning_rate": 0.0002860360967166963, |
| "loss": 3.259668731689453, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.36407300672430354, |
| "grad_norm": 1.0005592107772827, |
| "learning_rate": 0.00028593553724972146, |
| "loss": 3.2603923797607424, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.36503362151777136, |
| "grad_norm": 1.2387863397598267, |
| "learning_rate": 0.0002858346348034408, |
| "loss": 3.2571121215820313, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3659942363112392, |
| "grad_norm": 1.0775574445724487, |
| "learning_rate": 0.0002857333896324417, |
| "loss": 3.2519359588623047, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.366954851104707, |
| "grad_norm": 1.1767851114273071, |
| "learning_rate": 0.00028563180199217596, |
| "loss": 3.256037139892578, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.3679154658981748, |
| "grad_norm": 1.2488412857055664, |
| "learning_rate": 0.00028552987213895965, |
| "loss": 3.2543380737304686, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.3688760806916426, |
| "grad_norm": 1.2731399536132812, |
| "learning_rate": 0.0002854276003299723, |
| "loss": 3.251921844482422, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.36983669548511044, |
| "grad_norm": 1.2652751207351685, |
| "learning_rate": 0.00028532498682325613, |
| "loss": 3.254206085205078, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.37079731027857826, |
| "grad_norm": 1.3380461931228638, |
| "learning_rate": 0.0002852220318777156, |
| "loss": 3.2542865753173826, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.37175792507204614, |
| "grad_norm": 1.0234968662261963, |
| "learning_rate": 0.0002851187357531165, |
| "loss": 3.2551605224609377, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.37271853986551395, |
| "grad_norm": 0.7750507593154907, |
| "learning_rate": 0.00028501509871008563, |
| "loss": 3.258992004394531, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.37367915465898177, |
| "grad_norm": 0.9654103517532349, |
| "learning_rate": 0.00028491112101010986, |
| "loss": 3.2557037353515623, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.3746397694524496, |
| "grad_norm": 1.1453462839126587, |
| "learning_rate": 0.0002848068029155356, |
| "loss": 3.249644470214844, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3756003842459174, |
| "grad_norm": 1.0247918367385864, |
| "learning_rate": 0.0002847021446895681, |
| "loss": 3.253766632080078, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.3765609990393852, |
| "grad_norm": 1.4280420541763306, |
| "learning_rate": 0.0002845971465962708, |
| "loss": 3.2554515838623046, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.37752161383285304, |
| "grad_norm": 1.534204125404358, |
| "learning_rate": 0.00028449180890056464, |
| "loss": 3.2532962799072265, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.37848222862632086, |
| "grad_norm": 1.2856396436691284, |
| "learning_rate": 0.00028438613186822746, |
| "loss": 3.251531219482422, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.3794428434197887, |
| "grad_norm": 1.0911295413970947, |
| "learning_rate": 0.00028428011576589327, |
| "loss": 3.250580978393555, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.3804034582132565, |
| "grad_norm": 1.142147421836853, |
| "learning_rate": 0.00028417376086105155, |
| "loss": 3.254068374633789, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.3813640730067243, |
| "grad_norm": 1.335033655166626, |
| "learning_rate": 0.0002840670674220466, |
| "loss": 3.2494667053222654, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.38232468780019213, |
| "grad_norm": 1.410131812095642, |
| "learning_rate": 0.00028396003571807697, |
| "loss": 3.2515232086181642, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.38328530259365995, |
| "grad_norm": 1.3424606323242188, |
| "learning_rate": 0.0002838526660191946, |
| "loss": 3.250862884521484, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.38424591738712777, |
| "grad_norm": 0.8202113509178162, |
| "learning_rate": 0.0002837449585963043, |
| "loss": 3.249342346191406, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3852065321805956, |
| "grad_norm": 0.9713074564933777, |
| "learning_rate": 0.00028363691372116284, |
| "loss": 3.2535247802734375, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.3861671469740634, |
| "grad_norm": 0.7178729176521301, |
| "learning_rate": 0.0002835285316663786, |
| "loss": 3.2526882171630858, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.3871277617675312, |
| "grad_norm": 0.9730674624443054, |
| "learning_rate": 0.0002834198127054106, |
| "loss": 3.2526809692382814, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.38808837656099904, |
| "grad_norm": 1.3725463151931763, |
| "learning_rate": 0.000283310757112568, |
| "loss": 3.2512813568115235, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.38904899135446686, |
| "grad_norm": 1.5489643812179565, |
| "learning_rate": 0.0002832013651630091, |
| "loss": 3.2502918243408203, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.3900096061479347, |
| "grad_norm": 0.8844899535179138, |
| "learning_rate": 0.0002830916371327412, |
| "loss": 3.250738525390625, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.3909702209414025, |
| "grad_norm": 1.0500115156173706, |
| "learning_rate": 0.0002829815732986192, |
| "loss": 3.2540382385253905, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.3919308357348703, |
| "grad_norm": 1.0166023969650269, |
| "learning_rate": 0.00028287117393834563, |
| "loss": 3.2548370361328125, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.3928914505283381, |
| "grad_norm": 1.216377854347229, |
| "learning_rate": 0.00028276043933046926, |
| "loss": 3.248933792114258, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.39385206532180594, |
| "grad_norm": 0.9477090239524841, |
| "learning_rate": 0.0002826493697543849, |
| "loss": 3.2496387481689455, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.39481268011527376, |
| "grad_norm": 1.4546550512313843, |
| "learning_rate": 0.00028253796549033245, |
| "loss": 3.257366943359375, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.3957732949087416, |
| "grad_norm": 1.3477628231048584, |
| "learning_rate": 0.0002824262268193964, |
| "loss": 3.2546245574951174, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3967339097022094, |
| "grad_norm": 1.1829005479812622, |
| "learning_rate": 0.00028231415402350476, |
| "loss": 3.2527755737304687, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.3976945244956772, |
| "grad_norm": 1.00674569606781, |
| "learning_rate": 0.00028220174738542876, |
| "loss": 3.249166488647461, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.39865513928914503, |
| "grad_norm": 1.143075942993164, |
| "learning_rate": 0.00028208900718878187, |
| "loss": 3.2508075714111326, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.39961575408261285, |
| "grad_norm": 0.9893442392349243, |
| "learning_rate": 0.00028197593371801915, |
| "loss": 3.256524658203125, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.40057636887608067, |
| "grad_norm": 0.9596649408340454, |
| "learning_rate": 0.00028186252725843664, |
| "loss": 3.2511486053466796, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4015369836695485, |
| "grad_norm": 1.4209188222885132, |
| "learning_rate": 0.00028174878809617037, |
| "loss": 3.2548614501953126, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4024975984630163, |
| "grad_norm": 1.0415889024734497, |
| "learning_rate": 0.000281634716518196, |
| "loss": 3.2491207122802734, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.4034582132564842, |
| "grad_norm": 1.0843944549560547, |
| "learning_rate": 0.00028152031281232775, |
| "loss": 3.2516613006591797, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.404418828049952, |
| "grad_norm": 1.5569794178009033, |
| "learning_rate": 0.00028140557726721795, |
| "loss": 3.2495628356933595, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.4053794428434198, |
| "grad_norm": 0.8837640881538391, |
| "learning_rate": 0.00028129051017235614, |
| "loss": 3.249878692626953, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.40634005763688763, |
| "grad_norm": 1.1749989986419678, |
| "learning_rate": 0.0002811751118180684, |
| "loss": 3.2475624084472656, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.40730067243035545, |
| "grad_norm": 1.2945916652679443, |
| "learning_rate": 0.00028105938249551666, |
| "loss": 3.2487873077392577, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.40826128722382327, |
| "grad_norm": 0.9443672299385071, |
| "learning_rate": 0.0002809433224966978, |
| "loss": 3.247914123535156, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.4092219020172911, |
| "grad_norm": 1.1064391136169434, |
| "learning_rate": 0.0002808269321144431, |
| "loss": 3.250864028930664, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4101825168107589, |
| "grad_norm": 1.2150980234146118, |
| "learning_rate": 0.00028071021164241755, |
| "loss": 3.248881530761719, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.4111431316042267, |
| "grad_norm": 0.8171827793121338, |
| "learning_rate": 0.00028059316137511877, |
| "loss": 3.246689224243164, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.41210374639769454, |
| "grad_norm": 1.093009114265442, |
| "learning_rate": 0.0002804757816078766, |
| "loss": 3.2493213653564452, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.41306436119116235, |
| "grad_norm": 1.021937370300293, |
| "learning_rate": 0.00028035807263685224, |
| "loss": 3.2454933166503905, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.4140249759846302, |
| "grad_norm": 1.1384049654006958, |
| "learning_rate": 0.0002802400347590376, |
| "loss": 3.2516769409179687, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.414985590778098, |
| "grad_norm": 1.0551131963729858, |
| "learning_rate": 0.00028012166827225425, |
| "loss": 3.2476879119873048, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4159462055715658, |
| "grad_norm": 0.9579499363899231, |
| "learning_rate": 0.0002800029734751529, |
| "loss": 3.247665023803711, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.4169068203650336, |
| "grad_norm": 1.290895938873291, |
| "learning_rate": 0.0002798839506672129, |
| "loss": 3.248125457763672, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.41786743515850144, |
| "grad_norm": 1.1702419519424438, |
| "learning_rate": 0.00027976460014874087, |
| "loss": 3.2500679016113283, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.41882804995196926, |
| "grad_norm": 1.2696362733840942, |
| "learning_rate": 0.0002796449222208704, |
| "loss": 3.2472507476806642, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4197886647454371, |
| "grad_norm": 1.1748182773590088, |
| "learning_rate": 0.0002795249171855613, |
| "loss": 3.245960998535156, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.4207492795389049, |
| "grad_norm": 1.2495172023773193, |
| "learning_rate": 0.00027940458534559846, |
| "loss": 3.2419815063476562, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4217098943323727, |
| "grad_norm": 1.2086557149887085, |
| "learning_rate": 0.0002792839270045916, |
| "loss": 3.2496551513671874, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.42267050912584053, |
| "grad_norm": 0.9928983449935913, |
| "learning_rate": 0.0002791629424669739, |
| "loss": 3.2451755523681642, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.42363112391930835, |
| "grad_norm": 1.264974594116211, |
| "learning_rate": 0.00027904163203800185, |
| "loss": 3.2482452392578125, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.42459173871277617, |
| "grad_norm": 1.2751965522766113, |
| "learning_rate": 0.0002789199960237542, |
| "loss": 3.251111602783203, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.425552353506244, |
| "grad_norm": 1.115212082862854, |
| "learning_rate": 0.0002787980347311309, |
| "loss": 3.2442550659179688, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.4265129682997118, |
| "grad_norm": 1.3036905527114868, |
| "learning_rate": 0.00027867574846785295, |
| "loss": 3.250843048095703, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4274735830931796, |
| "grad_norm": 1.3169866800308228, |
| "learning_rate": 0.0002785531375424611, |
| "loss": 3.2423397064208985, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.42843419788664744, |
| "grad_norm": 1.1663439273834229, |
| "learning_rate": 0.00027843020226431535, |
| "loss": 3.2481651306152344, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.42939481268011526, |
| "grad_norm": 1.014664649963379, |
| "learning_rate": 0.0002783069429435939, |
| "loss": 3.245722198486328, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4303554274735831, |
| "grad_norm": 1.1078412532806396, |
| "learning_rate": 0.00027818335989129275, |
| "loss": 3.2450366973876954, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.4313160422670509, |
| "grad_norm": 1.0631850957870483, |
| "learning_rate": 0.00027805945341922457, |
| "loss": 3.2482345581054686, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.4322766570605187, |
| "grad_norm": 1.182057499885559, |
| "learning_rate": 0.0002779352238400181, |
| "loss": 3.2490921020507812, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.4332372718539865, |
| "grad_norm": 1.021649718284607, |
| "learning_rate": 0.0002778106714671174, |
| "loss": 3.2460681915283205, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.43419788664745435, |
| "grad_norm": 1.0771024227142334, |
| "learning_rate": 0.00027768579661478077, |
| "loss": 3.2438682556152343, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.43515850144092216, |
| "grad_norm": 1.1866998672485352, |
| "learning_rate": 0.0002775605995980803, |
| "loss": 3.2406051635742186, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.43611911623439004, |
| "grad_norm": 1.2904285192489624, |
| "learning_rate": 0.00027743508073290097, |
| "loss": 3.247574234008789, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.43707973102785785, |
| "grad_norm": 0.990243136882782, |
| "learning_rate": 0.0002773092403359397, |
| "loss": 3.2439697265625, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.43804034582132567, |
| "grad_norm": 0.976154625415802, |
| "learning_rate": 0.0002771830787247047, |
| "loss": 3.251276397705078, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.4390009606147935, |
| "grad_norm": 1.2034424543380737, |
| "learning_rate": 0.0002770565962175148, |
| "loss": 3.2444232940673827, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.4399615754082613, |
| "grad_norm": 1.4228894710540771, |
| "learning_rate": 0.00027692979313349815, |
| "loss": 3.2472274780273436, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.4409221902017291, |
| "grad_norm": 1.3287744522094727, |
| "learning_rate": 0.000276802669792592, |
| "loss": 3.247750091552734, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.44188280499519694, |
| "grad_norm": 1.1008442640304565, |
| "learning_rate": 0.0002766752265155416, |
| "loss": 3.2413604736328123, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.44284341978866476, |
| "grad_norm": 1.3397494554519653, |
| "learning_rate": 0.0002765474636238994, |
| "loss": 3.243372344970703, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.4438040345821326, |
| "grad_norm": 1.2448689937591553, |
| "learning_rate": 0.00027641938144002434, |
| "loss": 3.24469108581543, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.4447646493756004, |
| "grad_norm": 1.2328872680664062, |
| "learning_rate": 0.0002762909802870809, |
| "loss": 3.2434837341308596, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.4457252641690682, |
| "grad_norm": 1.3019057512283325, |
| "learning_rate": 0.0002761622604890382, |
| "loss": 3.2452354431152344, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.44668587896253603, |
| "grad_norm": 1.0617128610610962, |
| "learning_rate": 0.0002760332223706697, |
| "loss": 3.243347930908203, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.44764649375600385, |
| "grad_norm": 1.2202798128128052, |
| "learning_rate": 0.0002759038662575518, |
| "loss": 3.2428775787353517, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.44860710854947167, |
| "grad_norm": 0.8098715543746948, |
| "learning_rate": 0.0002757741924760631, |
| "loss": 3.2433307647705076, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.4495677233429395, |
| "grad_norm": 1.2258954048156738, |
| "learning_rate": 0.0002756442013533839, |
| "loss": 3.2418006896972655, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.4505283381364073, |
| "grad_norm": 1.155310869216919, |
| "learning_rate": 0.0002755138932174952, |
| "loss": 3.2446517944335938, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.4514889529298751, |
| "grad_norm": 0.9408679604530334, |
| "learning_rate": 0.0002753832683971778, |
| "loss": 3.2486572265625, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.45244956772334294, |
| "grad_norm": 1.1427699327468872, |
| "learning_rate": 0.0002752523272220114, |
| "loss": 3.2479103088378904, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.45341018251681076, |
| "grad_norm": 0.9570598602294922, |
| "learning_rate": 0.00027512107002237415, |
| "loss": 3.247629165649414, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.4543707973102786, |
| "grad_norm": 0.8978891968727112, |
| "learning_rate": 0.0002749894971294414, |
| "loss": 3.242329406738281, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.4553314121037464, |
| "grad_norm": 1.0361249446868896, |
| "learning_rate": 0.000274857608875185, |
| "loss": 3.243325042724609, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.4562920268972142, |
| "grad_norm": 0.9221097230911255, |
| "learning_rate": 0.0002747254055923726, |
| "loss": 3.2444053649902345, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.457252641690682, |
| "grad_norm": 1.2986423969268799, |
| "learning_rate": 0.0002745928876145667, |
| "loss": 3.2442516326904296, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.45821325648414984, |
| "grad_norm": 0.9883876442909241, |
| "learning_rate": 0.0002744600552761236, |
| "loss": 3.2429885864257812, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.45917387127761766, |
| "grad_norm": 1.4296432733535767, |
| "learning_rate": 0.000274326908912193, |
| "loss": 3.2417251586914064, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.4601344860710855, |
| "grad_norm": 1.216537594795227, |
| "learning_rate": 0.00027419344885871685, |
| "loss": 3.2435916900634765, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.4610951008645533, |
| "grad_norm": 1.0442991256713867, |
| "learning_rate": 0.0002740596754524285, |
| "loss": 3.2400821685791015, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4620557156580211, |
| "grad_norm": 1.4031963348388672, |
| "learning_rate": 0.0002739255890308521, |
| "loss": 3.2466384887695314, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.46301633045148893, |
| "grad_norm": 1.301132082939148, |
| "learning_rate": 0.0002737911899323013, |
| "loss": 3.241625213623047, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.46397694524495675, |
| "grad_norm": 1.3836039304733276, |
| "learning_rate": 0.00027365647849587893, |
| "loss": 3.2370471954345703, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.46493756003842457, |
| "grad_norm": 0.9351392984390259, |
| "learning_rate": 0.0002735214550614757, |
| "loss": 3.2448040008544923, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.4658981748318924, |
| "grad_norm": 0.9278304576873779, |
| "learning_rate": 0.00027338611996976955, |
| "loss": 3.239888000488281, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.4668587896253602, |
| "grad_norm": 0.8525119423866272, |
| "learning_rate": 0.0002732504735622248, |
| "loss": 3.2448410034179687, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.4678194044188281, |
| "grad_norm": 1.2899324893951416, |
| "learning_rate": 0.00027311451618109144, |
| "loss": 3.2423675537109373, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.4687800192122959, |
| "grad_norm": 0.8825019598007202, |
| "learning_rate": 0.00027297824816940365, |
| "loss": 3.240159606933594, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.4697406340057637, |
| "grad_norm": 1.301020860671997, |
| "learning_rate": 0.00027284166987097977, |
| "loss": 3.24395751953125, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.47070124879923153, |
| "grad_norm": 0.9350459575653076, |
| "learning_rate": 0.0002727047816304208, |
| "loss": 3.2402999877929686, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.47166186359269935, |
| "grad_norm": 1.1744569540023804, |
| "learning_rate": 0.00027256758379310975, |
| "loss": 3.239767837524414, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.47262247838616717, |
| "grad_norm": 0.9879583716392517, |
| "learning_rate": 0.00027243007670521086, |
| "loss": 3.242339324951172, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.473583093179635, |
| "grad_norm": 0.8559085726737976, |
| "learning_rate": 0.00027229226071366874, |
| "loss": 3.241751861572266, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.4745437079731028, |
| "grad_norm": 1.3251603841781616, |
| "learning_rate": 0.00027215413616620714, |
| "loss": 3.2432788848876952, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.4755043227665706, |
| "grad_norm": 1.1395015716552734, |
| "learning_rate": 0.00027201570341132855, |
| "loss": 3.239895248413086, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.47646493756003844, |
| "grad_norm": 1.1372860670089722, |
| "learning_rate": 0.0002718769627983129, |
| "loss": 3.2404502868652343, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.47742555235350626, |
| "grad_norm": 1.014133334159851, |
| "learning_rate": 0.00027173791467721714, |
| "loss": 3.238873291015625, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.4783861671469741, |
| "grad_norm": 1.2768558263778687, |
| "learning_rate": 0.00027159855939887386, |
| "loss": 3.2407760620117188, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.4793467819404419, |
| "grad_norm": 1.0559134483337402, |
| "learning_rate": 0.0002714588973148907, |
| "loss": 3.2391632080078123, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.4803073967339097, |
| "grad_norm": 1.212760090827942, |
| "learning_rate": 0.00027131892877764945, |
| "loss": 3.242381286621094, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4812680115273775, |
| "grad_norm": 1.3705852031707764, |
| "learning_rate": 0.0002711786541403051, |
| "loss": 3.2453689575195312, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.48222862632084534, |
| "grad_norm": 1.1016364097595215, |
| "learning_rate": 0.00027103807375678494, |
| "loss": 3.240717315673828, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.48318924111431316, |
| "grad_norm": 1.0440564155578613, |
| "learning_rate": 0.0002708971879817877, |
| "loss": 3.2409019470214844, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.484149855907781, |
| "grad_norm": 1.175846815109253, |
| "learning_rate": 0.0002707559971707827, |
| "loss": 3.2366703033447264, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.4851104707012488, |
| "grad_norm": 1.2363582849502563, |
| "learning_rate": 0.00027061450168000875, |
| "loss": 3.235219192504883, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.4860710854947166, |
| "grad_norm": 1.2251267433166504, |
| "learning_rate": 0.00027047270186647353, |
| "loss": 3.2362335205078123, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.48703170028818443, |
| "grad_norm": 0.8210157752037048, |
| "learning_rate": 0.00027033059808795253, |
| "loss": 3.2397132873535157, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.48799231508165225, |
| "grad_norm": 1.229904055595398, |
| "learning_rate": 0.0002701881907029881, |
| "loss": 3.2385082244873047, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.48895292987512007, |
| "grad_norm": 1.1676335334777832, |
| "learning_rate": 0.00027004548007088876, |
| "loss": 3.2385066986083983, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.4899135446685879, |
| "grad_norm": 1.271478533744812, |
| "learning_rate": 0.000269902466551728, |
| "loss": 3.2415950775146483, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.4908741594620557, |
| "grad_norm": 1.395169973373413, |
| "learning_rate": 0.00026975915050634367, |
| "loss": 3.2399360656738283, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.4918347742555235, |
| "grad_norm": 1.3647792339324951, |
| "learning_rate": 0.00026961553229633685, |
| "loss": 3.243121337890625, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.49279538904899134, |
| "grad_norm": 1.0769857168197632, |
| "learning_rate": 0.000269471612284071, |
| "loss": 3.2393962860107424, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.49375600384245916, |
| "grad_norm": 0.9694387316703796, |
| "learning_rate": 0.00026932739083267105, |
| "loss": 3.2386848449707033, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.494716618635927, |
| "grad_norm": 0.9222965240478516, |
| "learning_rate": 0.00026918286830602254, |
| "loss": 3.238786315917969, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.4956772334293948, |
| "grad_norm": 1.1549302339553833, |
| "learning_rate": 0.00026903804506877064, |
| "loss": 3.239440155029297, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.4966378482228626, |
| "grad_norm": 1.0019875764846802, |
| "learning_rate": 0.0002688929214863192, |
| "loss": 3.237133026123047, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.49759846301633043, |
| "grad_norm": 1.1600396633148193, |
| "learning_rate": 0.0002687474979248299, |
| "loss": 3.2444324493408203, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.49855907780979825, |
| "grad_norm": 1.2407069206237793, |
| "learning_rate": 0.00026860177475122133, |
| "loss": 3.2405441284179686, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.49951969260326606, |
| "grad_norm": 1.313706874847412, |
| "learning_rate": 0.00026845575233316806, |
| "loss": 3.2400081634521483, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5004803073967339, |
| "grad_norm": 1.102402925491333, |
| "learning_rate": 0.00026830943103909954, |
| "loss": 3.236441421508789, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.5014409221902018, |
| "grad_norm": 1.4062278270721436, |
| "learning_rate": 0.00026816281123819946, |
| "loss": 3.24122314453125, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5024015369836695, |
| "grad_norm": 1.1859440803527832, |
| "learning_rate": 0.00026801589330040465, |
| "loss": 3.238700103759766, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5033621517771374, |
| "grad_norm": 0.8828426599502563, |
| "learning_rate": 0.000267868677596404, |
| "loss": 3.2366287231445314, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5043227665706052, |
| "grad_norm": 1.1908822059631348, |
| "learning_rate": 0.000267721164497638, |
| "loss": 3.2335018157958983, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.505283381364073, |
| "grad_norm": 1.3185487985610962, |
| "learning_rate": 0.00026757335437629725, |
| "loss": 3.2354408264160157, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5062439961575408, |
| "grad_norm": 1.1835713386535645, |
| "learning_rate": 0.00026742524760532183, |
| "loss": 3.235929489135742, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5072046109510087, |
| "grad_norm": 1.6619583368301392, |
| "learning_rate": 0.00026727684455840037, |
| "loss": 3.236317443847656, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5081652257444764, |
| "grad_norm": 1.0703696012496948, |
| "learning_rate": 0.00026712814560996896, |
| "loss": 3.2386764526367187, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.5091258405379443, |
| "grad_norm": 1.149133324623108, |
| "learning_rate": 0.0002669791511352103, |
| "loss": 3.23681526184082, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5100864553314121, |
| "grad_norm": 1.1290984153747559, |
| "learning_rate": 0.0002668298615100527, |
| "loss": 3.2398101806640627, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.5110470701248799, |
| "grad_norm": 1.0490314960479736, |
| "learning_rate": 0.0002666802771111693, |
| "loss": 3.238353729248047, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5120076849183477, |
| "grad_norm": 0.849686861038208, |
| "learning_rate": 0.0002665303983159767, |
| "loss": 3.2377159118652346, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5129682997118156, |
| "grad_norm": 1.13809072971344, |
| "learning_rate": 0.00026638022550263467, |
| "loss": 3.2373073577880858, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5139289145052833, |
| "grad_norm": 1.1344239711761475, |
| "learning_rate": 0.0002662297590500445, |
| "loss": 3.233965301513672, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.5148895292987512, |
| "grad_norm": 1.3232028484344482, |
| "learning_rate": 0.0002660789993378486, |
| "loss": 3.236408233642578, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.515850144092219, |
| "grad_norm": 1.2224704027175903, |
| "learning_rate": 0.0002659279467464291, |
| "loss": 3.238209915161133, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5168107588856868, |
| "grad_norm": 1.106103777885437, |
| "learning_rate": 0.0002657766016569072, |
| "loss": 3.2342296600341798, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5177713736791547, |
| "grad_norm": 0.8532865643501282, |
| "learning_rate": 0.00026562496445114205, |
| "loss": 3.238364410400391, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5187319884726225, |
| "grad_norm": 0.8444566130638123, |
| "learning_rate": 0.00026547303551172996, |
| "loss": 3.236472320556641, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5196926032660903, |
| "grad_norm": 0.8221146464347839, |
| "learning_rate": 0.0002653208152220032, |
| "loss": 3.2367286682128906, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.5206532180595581, |
| "grad_norm": 1.1430490016937256, |
| "learning_rate": 0.00026516830396602914, |
| "loss": 3.2309627532958984, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.521613832853026, |
| "grad_norm": 1.3973969221115112, |
| "learning_rate": 0.00026501550212860937, |
| "loss": 3.2348331451416015, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.5225744476464937, |
| "grad_norm": 1.2749481201171875, |
| "learning_rate": 0.0002648624100952786, |
| "loss": 3.2363750457763674, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.5235350624399616, |
| "grad_norm": 1.2370373010635376, |
| "learning_rate": 0.00026470902825230367, |
| "loss": 3.2376075744628907, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.5244956772334294, |
| "grad_norm": 1.0846977233886719, |
| "learning_rate": 0.0002645553569866828, |
| "loss": 3.2337451934814454, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.5254562920268973, |
| "grad_norm": 1.026764154434204, |
| "learning_rate": 0.00026440139668614427, |
| "loss": 3.237920379638672, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.526416906820365, |
| "grad_norm": 1.045480489730835, |
| "learning_rate": 0.00026424714773914574, |
| "loss": 3.231882858276367, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.5273775216138329, |
| "grad_norm": 0.9559823274612427, |
| "learning_rate": 0.0002640926105348732, |
| "loss": 3.23189697265625, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.5283381364073007, |
| "grad_norm": 1.3470317125320435, |
| "learning_rate": 0.00026393778546323976, |
| "loss": 3.2387470245361327, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.5292987512007685, |
| "grad_norm": 1.1389282941818237, |
| "learning_rate": 0.00026378267291488506, |
| "loss": 3.2349002838134764, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.5302593659942363, |
| "grad_norm": 1.0445148944854736, |
| "learning_rate": 0.00026362727328117384, |
| "loss": 3.2324363708496096, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.5312199807877042, |
| "grad_norm": 1.0291074514389038, |
| "learning_rate": 0.00026347158695419546, |
| "loss": 3.2345924377441406, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.5321805955811719, |
| "grad_norm": 1.23056161403656, |
| "learning_rate": 0.00026331561432676244, |
| "loss": 3.2309837341308594, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.5331412103746398, |
| "grad_norm": 0.658456563949585, |
| "learning_rate": 0.0002631593557924097, |
| "loss": 3.232289123535156, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.5341018251681076, |
| "grad_norm": 1.1292603015899658, |
| "learning_rate": 0.0002630028117453936, |
| "loss": 3.2309852600097657, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.5350624399615754, |
| "grad_norm": 0.8553731441497803, |
| "learning_rate": 0.00026284598258069074, |
| "loss": 3.235504150390625, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.5360230547550432, |
| "grad_norm": 0.8991851210594177, |
| "learning_rate": 0.00026268886869399723, |
| "loss": 3.233042907714844, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.5369836695485111, |
| "grad_norm": 0.916896402835846, |
| "learning_rate": 0.0002625314704817276, |
| "loss": 3.2310935974121096, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.5379442843419788, |
| "grad_norm": 1.1668161153793335, |
| "learning_rate": 0.0002623737883410136, |
| "loss": 3.232282257080078, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5389048991354467, |
| "grad_norm": 1.223141074180603, |
| "learning_rate": 0.00026221582266970346, |
| "loss": 3.2328109741210938, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.5398655139289145, |
| "grad_norm": 1.0517135858535767, |
| "learning_rate": 0.00026205757386636085, |
| "loss": 3.2323383331298827, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.5408261287223823, |
| "grad_norm": 0.7666857242584229, |
| "learning_rate": 0.0002618990423302636, |
| "loss": 3.234081268310547, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.5417867435158501, |
| "grad_norm": 1.253327488899231, |
| "learning_rate": 0.0002617402284614031, |
| "loss": 3.233143997192383, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.542747358309318, |
| "grad_norm": 0.7495427131652832, |
| "learning_rate": 0.00026158113266048317, |
| "loss": 3.236143112182617, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.5437079731027857, |
| "grad_norm": 1.1047379970550537, |
| "learning_rate": 0.0002614217553289186, |
| "loss": 3.234874725341797, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.5446685878962536, |
| "grad_norm": 1.2154449224472046, |
| "learning_rate": 0.0002612620968688349, |
| "loss": 3.2348342895507813, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.5456292026897214, |
| "grad_norm": 1.1259676218032837, |
| "learning_rate": 0.0002611021576830667, |
| "loss": 3.232221221923828, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.5465898174831892, |
| "grad_norm": 1.2192593812942505, |
| "learning_rate": 0.00026094193817515697, |
| "loss": 3.2312171936035154, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.547550432276657, |
| "grad_norm": 1.3673197031021118, |
| "learning_rate": 0.00026078143874935604, |
| "loss": 3.229291534423828, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.5485110470701249, |
| "grad_norm": 1.3008506298065186, |
| "learning_rate": 0.00026062065981062024, |
| "loss": 3.2330032348632813, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.5494716618635928, |
| "grad_norm": 1.1764352321624756, |
| "learning_rate": 0.00026045960176461155, |
| "loss": 3.232608413696289, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.5504322766570605, |
| "grad_norm": 1.0877325534820557, |
| "learning_rate": 0.0002602982650176958, |
| "loss": 3.232305908203125, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.5513928914505284, |
| "grad_norm": 1.1681047677993774, |
| "learning_rate": 0.0002601366499769422, |
| "loss": 3.233287811279297, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.5523535062439962, |
| "grad_norm": 1.0753793716430664, |
| "learning_rate": 0.00025997475705012203, |
| "loss": 3.2348102569580077, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.553314121037464, |
| "grad_norm": 0.9505957365036011, |
| "learning_rate": 0.0002598125866457078, |
| "loss": 3.2324668884277346, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.5542747358309318, |
| "grad_norm": 1.2333307266235352, |
| "learning_rate": 0.0002596501391728721, |
| "loss": 3.235234832763672, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.5552353506243997, |
| "grad_norm": 1.0360201597213745, |
| "learning_rate": 0.00025948741504148656, |
| "loss": 3.234451675415039, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.5561959654178674, |
| "grad_norm": 0.9275029301643372, |
| "learning_rate": 0.0002593244146621208, |
| "loss": 3.2318180084228514, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.5571565802113353, |
| "grad_norm": 0.8183401823043823, |
| "learning_rate": 0.00025916113844604157, |
| "loss": 3.2336944580078124, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5581171950048031, |
| "grad_norm": 0.7255450487136841, |
| "learning_rate": 0.0002589975868052114, |
| "loss": 3.2311965942382814, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.5590778097982709, |
| "grad_norm": 1.1232422590255737, |
| "learning_rate": 0.000258833760152288, |
| "loss": 3.2349414825439453, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.5600384245917387, |
| "grad_norm": 1.311683177947998, |
| "learning_rate": 0.00025866965890062276, |
| "loss": 3.233409881591797, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.5609990393852066, |
| "grad_norm": 1.104836106300354, |
| "learning_rate": 0.0002585052834642599, |
| "loss": 3.228401947021484, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.5619596541786743, |
| "grad_norm": 1.170188069343567, |
| "learning_rate": 0.0002583406342579357, |
| "loss": 3.2292003631591797, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.5629202689721422, |
| "grad_norm": 1.1829124689102173, |
| "learning_rate": 0.0002581757116970768, |
| "loss": 3.2359298706054687, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.56388088376561, |
| "grad_norm": 0.885960042476654, |
| "learning_rate": 0.00025801051619779987, |
| "loss": 3.2320457458496095, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.5648414985590778, |
| "grad_norm": 1.0792263746261597, |
| "learning_rate": 0.0002578450481769099, |
| "loss": 3.2303844451904298, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.5658021133525456, |
| "grad_norm": 1.2442518472671509, |
| "learning_rate": 0.0002576793080519, |
| "loss": 3.2286468505859376, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.5667627281460135, |
| "grad_norm": 0.976241409778595, |
| "learning_rate": 0.00025751329624094926, |
| "loss": 3.226253128051758, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.5677233429394812, |
| "grad_norm": 1.2932735681533813, |
| "learning_rate": 0.00025734701316292263, |
| "loss": 3.2291339874267577, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.5686839577329491, |
| "grad_norm": 1.17381751537323, |
| "learning_rate": 0.0002571804592373693, |
| "loss": 3.2318046569824217, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.5696445725264169, |
| "grad_norm": 1.1502513885498047, |
| "learning_rate": 0.00025701363488452196, |
| "loss": 3.230632781982422, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.5706051873198847, |
| "grad_norm": 1.2376641035079956, |
| "learning_rate": 0.00025684654052529556, |
| "loss": 3.2285648345947267, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.5715658021133525, |
| "grad_norm": 1.2463771104812622, |
| "learning_rate": 0.0002566791765812862, |
| "loss": 3.232989501953125, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.5725264169068204, |
| "grad_norm": 1.148836612701416, |
| "learning_rate": 0.0002565115434747705, |
| "loss": 3.2307807922363283, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.5734870317002881, |
| "grad_norm": 1.313881516456604, |
| "learning_rate": 0.00025634364162870386, |
| "loss": 3.2307106018066407, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.574447646493756, |
| "grad_norm": 1.0571279525756836, |
| "learning_rate": 0.0002561754714667198, |
| "loss": 3.230280303955078, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.5754082612872238, |
| "grad_norm": 0.9680122137069702, |
| "learning_rate": 0.000256007033413129, |
| "loss": 3.2283321380615235, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.5763688760806917, |
| "grad_norm": 1.11472749710083, |
| "learning_rate": 0.00025583832789291794, |
| "loss": 3.229805755615234, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5773294908741594, |
| "grad_norm": 1.0987354516983032, |
| "learning_rate": 0.0002556693553317479, |
| "loss": 3.2307418823242187, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.5782901056676273, |
| "grad_norm": 0.8845497369766235, |
| "learning_rate": 0.0002555001161559539, |
| "loss": 3.226715850830078, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.579250720461095, |
| "grad_norm": 1.37449049949646, |
| "learning_rate": 0.000255330610792544, |
| "loss": 3.230916976928711, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.5802113352545629, |
| "grad_norm": 1.2311089038848877, |
| "learning_rate": 0.00025516083966919744, |
| "loss": 3.228615570068359, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.5811719500480308, |
| "grad_norm": 1.0169094800949097, |
| "learning_rate": 0.00025499080321426417, |
| "loss": 3.2289390563964844, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.5821325648414986, |
| "grad_norm": 1.280392050743103, |
| "learning_rate": 0.00025482050185676367, |
| "loss": 3.227009963989258, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.5830931796349664, |
| "grad_norm": 0.9485029578208923, |
| "learning_rate": 0.0002546499360263837, |
| "loss": 3.231037139892578, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.5840537944284342, |
| "grad_norm": 1.0025532245635986, |
| "learning_rate": 0.0002544791061534794, |
| "loss": 3.231890869140625, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.5850144092219021, |
| "grad_norm": 0.9485952854156494, |
| "learning_rate": 0.000254308012669072, |
| "loss": 3.2329727172851563, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.5859750240153698, |
| "grad_norm": 0.9976146817207336, |
| "learning_rate": 0.00025413665600484796, |
| "loss": 3.228594207763672, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.5869356388088377, |
| "grad_norm": 0.9106408953666687, |
| "learning_rate": 0.0002539650365931577, |
| "loss": 3.225025177001953, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.5878962536023055, |
| "grad_norm": 0.823363721370697, |
| "learning_rate": 0.00025379315486701454, |
| "loss": 3.227878189086914, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.5888568683957733, |
| "grad_norm": 0.9073953032493591, |
| "learning_rate": 0.00025362101126009387, |
| "loss": 3.2288444519042967, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.5898174831892411, |
| "grad_norm": 1.0130420923233032, |
| "learning_rate": 0.0002534486062067316, |
| "loss": 3.228134536743164, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.590778097982709, |
| "grad_norm": 0.8417114615440369, |
| "learning_rate": 0.00025327594014192326, |
| "loss": 3.2314247131347655, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.5917387127761767, |
| "grad_norm": 0.8221418261528015, |
| "learning_rate": 0.0002531030135013232, |
| "loss": 3.226838302612305, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.5926993275696446, |
| "grad_norm": 1.0958610773086548, |
| "learning_rate": 0.0002529298267212429, |
| "loss": 3.2264358520507814, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.5936599423631124, |
| "grad_norm": 1.3347340822219849, |
| "learning_rate": 0.00025275638023865055, |
| "loss": 3.2262367248535155, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.5946205571565802, |
| "grad_norm": 0.8695631623268127, |
| "learning_rate": 0.0002525826744911693, |
| "loss": 3.228030776977539, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.595581171950048, |
| "grad_norm": 0.8885836601257324, |
| "learning_rate": 0.00025240870991707665, |
| "loss": 3.229400634765625, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5965417867435159, |
| "grad_norm": 0.8682327270507812, |
| "learning_rate": 0.000252234486955303, |
| "loss": 3.2293159484863283, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.5975024015369836, |
| "grad_norm": 1.130051851272583, |
| "learning_rate": 0.00025206000604543083, |
| "loss": 3.2304660797119142, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.5984630163304515, |
| "grad_norm": 1.0737762451171875, |
| "learning_rate": 0.00025188526762769326, |
| "loss": 3.2278984069824217, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.5994236311239193, |
| "grad_norm": 1.096584677696228, |
| "learning_rate": 0.0002517102721429734, |
| "loss": 3.2275615692138673, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6003842459173871, |
| "grad_norm": 1.1471421718597412, |
| "learning_rate": 0.00025153502003280267, |
| "loss": 3.225168991088867, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.6013448607108549, |
| "grad_norm": 0.8774202466011047, |
| "learning_rate": 0.0002513595117393602, |
| "loss": 3.225673294067383, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.6023054755043228, |
| "grad_norm": 1.1272785663604736, |
| "learning_rate": 0.00025118374770547136, |
| "loss": 3.228855514526367, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6032660902977905, |
| "grad_norm": 1.0108392238616943, |
| "learning_rate": 0.0002510077283746069, |
| "loss": 3.226605224609375, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.6042267050912584, |
| "grad_norm": 0.9391249418258667, |
| "learning_rate": 0.00025083145419088165, |
| "loss": 3.2272396087646484, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.6051873198847262, |
| "grad_norm": 1.0647284984588623, |
| "learning_rate": 0.0002506549255990534, |
| "loss": 3.2265804290771483, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6061479346781941, |
| "grad_norm": 1.143602967262268, |
| "learning_rate": 0.00025047814304452197, |
| "loss": 3.2242530822753905, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.6071085494716618, |
| "grad_norm": 1.098870873451233, |
| "learning_rate": 0.0002503011069733278, |
| "loss": 3.225702667236328, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6080691642651297, |
| "grad_norm": 1.1901029348373413, |
| "learning_rate": 0.00025012381783215117, |
| "loss": 3.2249305725097654, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.6090297790585975, |
| "grad_norm": 1.254163384437561, |
| "learning_rate": 0.0002499462760683106, |
| "loss": 3.2239353179931642, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.6099903938520653, |
| "grad_norm": 1.3494505882263184, |
| "learning_rate": 0.00024976848212976237, |
| "loss": 3.225982666015625, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.6109510086455331, |
| "grad_norm": 0.9600538015365601, |
| "learning_rate": 0.00024959043646509866, |
| "loss": 3.2219696044921875, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.611911623439001, |
| "grad_norm": 1.1406611204147339, |
| "learning_rate": 0.00024941213952354703, |
| "loss": 3.225663757324219, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.6128722382324687, |
| "grad_norm": 1.276875376701355, |
| "learning_rate": 0.000249233591754969, |
| "loss": 3.226406478881836, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.6138328530259366, |
| "grad_norm": 1.0921087265014648, |
| "learning_rate": 0.00024905479360985884, |
| "loss": 3.227063751220703, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.6147934678194045, |
| "grad_norm": 1.0305696725845337, |
| "learning_rate": 0.0002488757455393426, |
| "loss": 3.2252223968505858, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.6157540826128722, |
| "grad_norm": 1.0612218379974365, |
| "learning_rate": 0.0002486964479951769, |
| "loss": 3.2239383697509765, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.6167146974063401, |
| "grad_norm": 1.011340856552124, |
| "learning_rate": 0.00024851690142974796, |
| "loss": 3.2294452667236326, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.6176753121998079, |
| "grad_norm": 1.0465643405914307, |
| "learning_rate": 0.0002483371062960701, |
| "loss": 3.2301746368408204, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.6186359269932757, |
| "grad_norm": 0.8956519961357117, |
| "learning_rate": 0.0002481570630477849, |
| "loss": 3.226789093017578, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.6195965417867435, |
| "grad_norm": 0.8143766522407532, |
| "learning_rate": 0.00024797677213915997, |
| "loss": 3.2227046966552733, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.6205571565802114, |
| "grad_norm": 1.0714408159255981, |
| "learning_rate": 0.00024779623402508766, |
| "loss": 3.225188064575195, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.6215177713736791, |
| "grad_norm": 1.0253653526306152, |
| "learning_rate": 0.0002476154491610843, |
| "loss": 3.225335693359375, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.622478386167147, |
| "grad_norm": 1.0260114669799805, |
| "learning_rate": 0.00024743441800328866, |
| "loss": 3.2250225067138674, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.6234390009606148, |
| "grad_norm": 1.0167043209075928, |
| "learning_rate": 0.0002472531410084607, |
| "loss": 3.222640609741211, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.6243996157540826, |
| "grad_norm": 1.0281862020492554, |
| "learning_rate": 0.00024707161863398114, |
| "loss": 3.223117446899414, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.6253602305475504, |
| "grad_norm": 1.1766680479049683, |
| "learning_rate": 0.0002468898513378494, |
| "loss": 3.2250850677490233, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.6263208453410183, |
| "grad_norm": 0.9479981064796448, |
| "learning_rate": 0.0002467078395786831, |
| "loss": 3.2229190826416017, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.627281460134486, |
| "grad_norm": 0.9690991640090942, |
| "learning_rate": 0.00024652558381571646, |
| "loss": 3.2238792419433593, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.6282420749279539, |
| "grad_norm": 1.127388596534729, |
| "learning_rate": 0.00024634308450879953, |
| "loss": 3.22578125, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.6292026897214217, |
| "grad_norm": 1.0700501203536987, |
| "learning_rate": 0.0002461603421183968, |
| "loss": 3.222941589355469, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.6301633045148896, |
| "grad_norm": 1.1317108869552612, |
| "learning_rate": 0.00024597735710558613, |
| "loss": 3.228685760498047, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.6311239193083573, |
| "grad_norm": 1.0256825685501099, |
| "learning_rate": 0.0002457941299320574, |
| "loss": 3.2236000061035157, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.6320845341018252, |
| "grad_norm": 1.0949205160140991, |
| "learning_rate": 0.0002456106610601115, |
| "loss": 3.223644256591797, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.633045148895293, |
| "grad_norm": 1.147100567817688, |
| "learning_rate": 0.0002454269509526593, |
| "loss": 3.222336196899414, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.6340057636887608, |
| "grad_norm": 1.0118300914764404, |
| "learning_rate": 0.0002452430000732203, |
| "loss": 3.2236583709716795, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.6349663784822286, |
| "grad_norm": 1.4460432529449463, |
| "learning_rate": 0.00024505880888592134, |
| "loss": 3.225933074951172, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.6359269932756965, |
| "grad_norm": 1.0727514028549194, |
| "learning_rate": 0.0002448743778554957, |
| "loss": 3.222777557373047, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.6368876080691642, |
| "grad_norm": 0.9514535069465637, |
| "learning_rate": 0.0002446897074472819, |
| "loss": 3.2218833923339845, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.6378482228626321, |
| "grad_norm": 0.8115454912185669, |
| "learning_rate": 0.0002445047981272221, |
| "loss": 3.2228424072265627, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.6388088376560999, |
| "grad_norm": 1.2015719413757324, |
| "learning_rate": 0.00024431965036186175, |
| "loss": 3.2233604431152343, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.6397694524495677, |
| "grad_norm": 1.0239508152008057, |
| "learning_rate": 0.00024413426461834746, |
| "loss": 3.223210906982422, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.6407300672430355, |
| "grad_norm": 0.8938039541244507, |
| "learning_rate": 0.0002439486413644265, |
| "loss": 3.2242591857910154, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.6416906820365034, |
| "grad_norm": 1.0734831094741821, |
| "learning_rate": 0.00024376278106844548, |
| "loss": 3.221482849121094, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.6426512968299711, |
| "grad_norm": 0.9349012970924377, |
| "learning_rate": 0.00024357668419934894, |
| "loss": 3.2233955383300783, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.643611911623439, |
| "grad_norm": 1.0495328903198242, |
| "learning_rate": 0.00024339035122667837, |
| "loss": 3.2246463775634764, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.6445725264169068, |
| "grad_norm": 1.152726173400879, |
| "learning_rate": 0.00024320378262057102, |
| "loss": 3.221533203125, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.6455331412103746, |
| "grad_norm": 1.1071127653121948, |
| "learning_rate": 0.00024301697885175847, |
| "loss": 3.2202079772949217, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.6464937560038425, |
| "grad_norm": 1.143314003944397, |
| "learning_rate": 0.00024282994039156598, |
| "loss": 3.228096771240234, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.6474543707973103, |
| "grad_norm": 1.0042517185211182, |
| "learning_rate": 0.00024264266771191066, |
| "loss": 3.2260887145996096, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.6484149855907781, |
| "grad_norm": 0.9083545804023743, |
| "learning_rate": 0.00024245516128530077, |
| "loss": 3.2202564239501954, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.6493756003842459, |
| "grad_norm": 0.8550533056259155, |
| "learning_rate": 0.0002422674215848342, |
| "loss": 3.22034912109375, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.6503362151777138, |
| "grad_norm": 0.9592113494873047, |
| "learning_rate": 0.0002420794490841975, |
| "loss": 3.22401123046875, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.6512968299711815, |
| "grad_norm": 0.8267397880554199, |
| "learning_rate": 0.00024189124425766454, |
| "loss": 3.2178253173828124, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.6522574447646494, |
| "grad_norm": 0.9734513759613037, |
| "learning_rate": 0.00024170280758009546, |
| "loss": 3.2190723419189453, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.6532180595581172, |
| "grad_norm": 1.0415617227554321, |
| "learning_rate": 0.00024151413952693524, |
| "loss": 3.2226608276367186, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.654178674351585, |
| "grad_norm": 0.9090771675109863, |
| "learning_rate": 0.00024132524057421287, |
| "loss": 3.221820831298828, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.6551392891450528, |
| "grad_norm": 0.8799076676368713, |
| "learning_rate": 0.0002411361111985396, |
| "loss": 3.2193443298339846, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.6560999039385207, |
| "grad_norm": 1.0817033052444458, |
| "learning_rate": 0.00024094675187710847, |
| "loss": 3.2185131072998048, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.6570605187319885, |
| "grad_norm": 0.9831625819206238, |
| "learning_rate": 0.0002407571630876923, |
| "loss": 3.2214286804199217, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.6580211335254563, |
| "grad_norm": 0.987235963344574, |
| "learning_rate": 0.00024056734530864313, |
| "loss": 3.219521713256836, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.6589817483189241, |
| "grad_norm": 1.047991394996643, |
| "learning_rate": 0.00024037729901889068, |
| "loss": 3.2219139099121095, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.659942363112392, |
| "grad_norm": 0.8672451376914978, |
| "learning_rate": 0.00024018702469794125, |
| "loss": 3.2201393127441404, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.6609029779058597, |
| "grad_norm": 1.1878256797790527, |
| "learning_rate": 0.0002399965228258764, |
| "loss": 3.2203353881835937, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.6618635926993276, |
| "grad_norm": 1.1251111030578613, |
| "learning_rate": 0.00023980579388335193, |
| "loss": 3.21944580078125, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.6628242074927954, |
| "grad_norm": 0.9487177729606628, |
| "learning_rate": 0.0002396148383515966, |
| "loss": 3.2208953857421876, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.6637848222862632, |
| "grad_norm": 0.9404230713844299, |
| "learning_rate": 0.00023942365671241072, |
| "loss": 3.22216796875, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.664745437079731, |
| "grad_norm": 1.1221790313720703, |
| "learning_rate": 0.00023923224944816522, |
| "loss": 3.220103454589844, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.6657060518731989, |
| "grad_norm": 1.1422487497329712, |
| "learning_rate": 0.00023904061704180027, |
| "loss": 3.2190887451171877, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.9574413299560547, |
| "learning_rate": 0.00023884875997682404, |
| "loss": 3.215955352783203, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.6676272814601345, |
| "grad_norm": 1.1348673105239868, |
| "learning_rate": 0.00023865667873731168, |
| "loss": 3.2160018920898437, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.6685878962536023, |
| "grad_norm": 1.0261040925979614, |
| "learning_rate": 0.00023846437380790368, |
| "loss": 3.221507263183594, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.6695485110470701, |
| "grad_norm": 1.009157657623291, |
| "learning_rate": 0.0002382718456738053, |
| "loss": 3.2182361602783205, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.6705091258405379, |
| "grad_norm": 0.9334181547164917, |
| "learning_rate": 0.00023807909482078475, |
| "loss": 3.221875762939453, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.6714697406340058, |
| "grad_norm": 1.0333514213562012, |
| "learning_rate": 0.0002378861217351721, |
| "loss": 3.2202327728271483, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.6724303554274735, |
| "grad_norm": 0.8510705828666687, |
| "learning_rate": 0.00023769292690385831, |
| "loss": 3.2205406188964845, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6733909702209414, |
| "grad_norm": 1.0097713470458984, |
| "learning_rate": 0.0002374995108142938, |
| "loss": 3.2197288513183593, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.6743515850144092, |
| "grad_norm": 1.3777557611465454, |
| "learning_rate": 0.0002373058739544871, |
| "loss": 3.2197029113769533, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.675312199807877, |
| "grad_norm": 1.1440997123718262, |
| "learning_rate": 0.0002371120168130039, |
| "loss": 3.2237571716308593, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.6762728146013448, |
| "grad_norm": 1.1652058362960815, |
| "learning_rate": 0.0002369179398789657, |
| "loss": 3.216419982910156, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.6772334293948127, |
| "grad_norm": 0.7069166302680969, |
| "learning_rate": 0.00023672364364204853, |
| "loss": 3.2144538879394533, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.6781940441882806, |
| "grad_norm": 1.2215242385864258, |
| "learning_rate": 0.00023652912859248166, |
| "loss": 3.221971130371094, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.6791546589817483, |
| "grad_norm": 0.978817880153656, |
| "learning_rate": 0.00023633439522104658, |
| "loss": 3.21834716796875, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.6801152737752162, |
| "grad_norm": 1.0850077867507935, |
| "learning_rate": 0.0002361394440190755, |
| "loss": 3.216111755371094, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.681075888568684, |
| "grad_norm": 1.0549561977386475, |
| "learning_rate": 0.00023594427547845033, |
| "loss": 3.2226284027099608, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.6820365033621518, |
| "grad_norm": 1.0915031433105469, |
| "learning_rate": 0.00023574889009160134, |
| "loss": 3.220775604248047, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.6829971181556196, |
| "grad_norm": 1.2648931741714478, |
| "learning_rate": 0.00023555328835150587, |
| "loss": 3.219307708740234, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.6839577329490875, |
| "grad_norm": 0.8500558733940125, |
| "learning_rate": 0.00023535747075168726, |
| "loss": 3.2166213989257812, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.6849183477425552, |
| "grad_norm": 0.9885119795799255, |
| "learning_rate": 0.00023516143778621327, |
| "loss": 3.219788360595703, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.6858789625360231, |
| "grad_norm": 1.1124404668807983, |
| "learning_rate": 0.00023496518994969528, |
| "loss": 3.218476104736328, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.6868395773294909, |
| "grad_norm": 1.0537521839141846, |
| "learning_rate": 0.00023476872773728666, |
| "loss": 3.217354583740234, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.6878001921229587, |
| "grad_norm": 1.0711948871612549, |
| "learning_rate": 0.00023457205164468173, |
| "loss": 3.2162689208984374, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.6887608069164265, |
| "grad_norm": 1.0931755304336548, |
| "learning_rate": 0.0002343751621681145, |
| "loss": 3.217786407470703, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.6897214217098944, |
| "grad_norm": 0.9976537227630615, |
| "learning_rate": 0.00023417805980435736, |
| "loss": 3.216781997680664, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.6906820365033621, |
| "grad_norm": 0.8797841668128967, |
| "learning_rate": 0.00023398074505071964, |
| "loss": 3.21632080078125, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.69164265129683, |
| "grad_norm": 1.0468313694000244, |
| "learning_rate": 0.0002337832184050468, |
| "loss": 3.2173301696777346, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6926032660902978, |
| "grad_norm": 1.2071698904037476, |
| "learning_rate": 0.0002335854803657188, |
| "loss": 3.2191944122314453, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.6935638808837656, |
| "grad_norm": 1.0584746599197388, |
| "learning_rate": 0.00023338753143164906, |
| "loss": 3.2196414947509764, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.6945244956772334, |
| "grad_norm": 0.9132346510887146, |
| "learning_rate": 0.00023318937210228295, |
| "loss": 3.215652847290039, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.6954851104707013, |
| "grad_norm": 0.9715785384178162, |
| "learning_rate": 0.00023299100287759686, |
| "loss": 3.215423583984375, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.696445725264169, |
| "grad_norm": 1.1859434843063354, |
| "learning_rate": 0.00023279242425809667, |
| "loss": 3.211594009399414, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.6974063400576369, |
| "grad_norm": 1.0725688934326172, |
| "learning_rate": 0.00023259363674481666, |
| "loss": 3.2200057983398436, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.6983669548511047, |
| "grad_norm": 1.0033189058303833, |
| "learning_rate": 0.00023239464083931802, |
| "loss": 3.217108154296875, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.6993275696445725, |
| "grad_norm": 1.0625147819519043, |
| "learning_rate": 0.00023219543704368792, |
| "loss": 3.2169322967529297, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.7002881844380403, |
| "grad_norm": 0.9788894653320312, |
| "learning_rate": 0.00023199602586053793, |
| "loss": 3.2101593017578125, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.7012487992315082, |
| "grad_norm": 0.9568902254104614, |
| "learning_rate": 0.0002317964077930029, |
| "loss": 3.215932846069336, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.7022094140249759, |
| "grad_norm": 1.3549705743789673, |
| "learning_rate": 0.00023159658334473974, |
| "loss": 3.2176162719726564, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.7031700288184438, |
| "grad_norm": 0.9975321292877197, |
| "learning_rate": 0.00023139655301992603, |
| "loss": 3.218878173828125, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.7041306436119116, |
| "grad_norm": 0.8793084621429443, |
| "learning_rate": 0.00023119631732325882, |
| "loss": 3.2155433654785157, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.7050912584053795, |
| "grad_norm": 1.2263166904449463, |
| "learning_rate": 0.0002309958767599532, |
| "loss": 3.2179286956787108, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.7060518731988472, |
| "grad_norm": 1.0385061502456665, |
| "learning_rate": 0.0002307952318357414, |
| "loss": 3.2134330749511717, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.7070124879923151, |
| "grad_norm": 1.019902229309082, |
| "learning_rate": 0.0002305943830568711, |
| "loss": 3.2176845550537108, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.7079731027857828, |
| "grad_norm": 0.9984716773033142, |
| "learning_rate": 0.00023039333093010433, |
| "loss": 3.218429946899414, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.7089337175792507, |
| "grad_norm": 1.0424875020980835, |
| "learning_rate": 0.00023019207596271634, |
| "loss": 3.2165111541748046, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.7098943323727186, |
| "grad_norm": 1.0895493030548096, |
| "learning_rate": 0.00022999061866249397, |
| "loss": 3.2162498474121093, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.7108549471661864, |
| "grad_norm": 1.0710501670837402, |
| "learning_rate": 0.00022978895953773464, |
| "loss": 3.2143184661865236, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.7118155619596542, |
| "grad_norm": 1.1927725076675415, |
| "learning_rate": 0.00022958709909724512, |
| "loss": 3.2180755615234373, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.712776176753122, |
| "grad_norm": 0.9517913460731506, |
| "learning_rate": 0.00022938503785033991, |
| "loss": 3.2159660339355467, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.7137367915465899, |
| "grad_norm": 1.0126185417175293, |
| "learning_rate": 0.00022918277630684032, |
| "loss": 3.2123146057128906, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.7146974063400576, |
| "grad_norm": 1.0625087022781372, |
| "learning_rate": 0.0002289803149770729, |
| "loss": 3.215711212158203, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.7156580211335255, |
| "grad_norm": 1.2499691247940063, |
| "learning_rate": 0.00022877765437186843, |
| "loss": 3.215605545043945, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.7166186359269933, |
| "grad_norm": 1.0240933895111084, |
| "learning_rate": 0.0002285747950025604, |
| "loss": 3.2169639587402346, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.7175792507204611, |
| "grad_norm": 1.0153264999389648, |
| "learning_rate": 0.00022837173738098375, |
| "loss": 3.2149864196777345, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.7185398655139289, |
| "grad_norm": 0.7960460782051086, |
| "learning_rate": 0.00022816848201947376, |
| "loss": 3.217276382446289, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.7195004803073968, |
| "grad_norm": 0.886752724647522, |
| "learning_rate": 0.0002279650294308645, |
| "loss": 3.2126708984375, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.7204610951008645, |
| "grad_norm": 0.9281694889068604, |
| "learning_rate": 0.00022776138012848776, |
| "loss": 3.2142318725585937, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.7214217098943324, |
| "grad_norm": 0.9548509120941162, |
| "learning_rate": 0.00022755753462617162, |
| "loss": 3.2146896362304687, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.7223823246878002, |
| "grad_norm": 1.099318265914917, |
| "learning_rate": 0.0002273534934382392, |
| "loss": 3.2121913909912108, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.723342939481268, |
| "grad_norm": 1.0480355024337769, |
| "learning_rate": 0.00022714925707950734, |
| "loss": 3.2157440185546875, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.7243035542747358, |
| "grad_norm": 0.8996202349662781, |
| "learning_rate": 0.0002269448260652853, |
| "loss": 3.2162132263183594, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.7252641690682037, |
| "grad_norm": 1.0007187128067017, |
| "learning_rate": 0.00022674020091137357, |
| "loss": 3.216075897216797, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.7262247838616714, |
| "grad_norm": 1.0066853761672974, |
| "learning_rate": 0.0002265353821340623, |
| "loss": 3.2114944458007812, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.7271853986551393, |
| "grad_norm": 0.8113794922828674, |
| "learning_rate": 0.00022633037025013034, |
| "loss": 3.212854766845703, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.7281460134486071, |
| "grad_norm": 0.8382619619369507, |
| "learning_rate": 0.00022612516577684373, |
| "loss": 3.2154186248779295, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.729106628242075, |
| "grad_norm": 0.7889550924301147, |
| "learning_rate": 0.0002259197692319544, |
| "loss": 3.212788391113281, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.7300672430355427, |
| "grad_norm": 0.988488495349884, |
| "learning_rate": 0.00022571418113369885, |
| "loss": 3.2164848327636717, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.7310278578290106, |
| "grad_norm": 1.2173198461532593, |
| "learning_rate": 0.00022550840200079696, |
| "loss": 3.216341018676758, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.7319884726224783, |
| "grad_norm": 0.9247362017631531, |
| "learning_rate": 0.00022530243235245067, |
| "loss": 3.214260482788086, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.7329490874159462, |
| "grad_norm": 1.0379600524902344, |
| "learning_rate": 0.0002250962727083424, |
| "loss": 3.211573028564453, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.733909702209414, |
| "grad_norm": 1.0794739723205566, |
| "learning_rate": 0.00022488992358863416, |
| "loss": 3.2143383026123047, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.7348703170028819, |
| "grad_norm": 0.7588670253753662, |
| "learning_rate": 0.00022468338551396598, |
| "loss": 3.2139129638671875, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.7358309317963496, |
| "grad_norm": 0.9358298182487488, |
| "learning_rate": 0.00022447665900545452, |
| "loss": 3.2157051086425783, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.7367915465898175, |
| "grad_norm": 1.0112359523773193, |
| "learning_rate": 0.00022426974458469206, |
| "loss": 3.2114688873291017, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.7377521613832853, |
| "grad_norm": 1.0056138038635254, |
| "learning_rate": 0.00022406264277374487, |
| "loss": 3.211741638183594, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.7387127761767531, |
| "grad_norm": 1.0002721548080444, |
| "learning_rate": 0.00022385535409515203, |
| "loss": 3.214643096923828, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.7396733909702209, |
| "grad_norm": 1.0511516332626343, |
| "learning_rate": 0.0002236478790719242, |
| "loss": 3.2131919860839844, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.7406340057636888, |
| "grad_norm": 1.0013073682785034, |
| "learning_rate": 0.00022344021822754212, |
| "loss": 3.2114883422851563, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.7415946205571565, |
| "grad_norm": 0.9666048288345337, |
| "learning_rate": 0.0002232323720859554, |
| "loss": 3.2139522552490236, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.7425552353506244, |
| "grad_norm": 0.9765871167182922, |
| "learning_rate": 0.0002230243411715812, |
| "loss": 3.216571044921875, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.7435158501440923, |
| "grad_norm": 0.8558294773101807, |
| "learning_rate": 0.00022281612600930282, |
| "loss": 3.211492156982422, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.74447646493756, |
| "grad_norm": 0.9790803790092468, |
| "learning_rate": 0.00022260772712446848, |
| "loss": 3.212940979003906, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.7454370797310279, |
| "grad_norm": 1.115885853767395, |
| "learning_rate": 0.00022239914504289002, |
| "loss": 3.215241241455078, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.7463976945244957, |
| "grad_norm": 0.8012208342552185, |
| "learning_rate": 0.00022219038029084134, |
| "loss": 3.2129531860351563, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.7473583093179635, |
| "grad_norm": 0.9198108315467834, |
| "learning_rate": 0.00022198143339505736, |
| "loss": 3.2131282806396486, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.7483189241114313, |
| "grad_norm": 0.9384759068489075, |
| "learning_rate": 0.00022177230488273255, |
| "loss": 3.2148651123046874, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.7492795389048992, |
| "grad_norm": 0.9330629706382751, |
| "learning_rate": 0.00022156299528151957, |
| "loss": 3.2130111694335937, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.7502401536983669, |
| "grad_norm": 1.0961406230926514, |
| "learning_rate": 0.00022135350511952805, |
| "loss": 3.209431457519531, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.7512007684918348, |
| "grad_norm": 1.0598336458206177, |
| "learning_rate": 0.0002211438349253231, |
| "loss": 3.2150421142578125, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.7521613832853026, |
| "grad_norm": 0.8614126443862915, |
| "learning_rate": 0.00022093398522792418, |
| "loss": 3.2113304138183594, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.7531219980787704, |
| "grad_norm": 1.2163376808166504, |
| "learning_rate": 0.0002207239565568036, |
| "loss": 3.213220977783203, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.7540826128722382, |
| "grad_norm": 1.068865180015564, |
| "learning_rate": 0.00022051374944188524, |
| "loss": 3.2152915954589845, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.7550432276657061, |
| "grad_norm": 0.8628993034362793, |
| "learning_rate": 0.0002203033644135432, |
| "loss": 3.2100101470947267, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.7560038424591738, |
| "grad_norm": 1.0070104598999023, |
| "learning_rate": 0.00022009280200260053, |
| "loss": 3.214314651489258, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.7569644572526417, |
| "grad_norm": 1.1911405324935913, |
| "learning_rate": 0.00021988206274032776, |
| "loss": 3.2116607666015624, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.7579250720461095, |
| "grad_norm": 1.0605741739273071, |
| "learning_rate": 0.00021967114715844163, |
| "loss": 3.210451126098633, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.7588856868395774, |
| "grad_norm": 1.010385274887085, |
| "learning_rate": 0.0002194600557891039, |
| "loss": 3.2117488861083983, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.7598463016330451, |
| "grad_norm": 0.9285851716995239, |
| "learning_rate": 0.00021924878916491962, |
| "loss": 3.2095130920410155, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.760806916426513, |
| "grad_norm": 0.9447981715202332, |
| "learning_rate": 0.00021903734781893625, |
| "loss": 3.214548873901367, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.7617675312199808, |
| "grad_norm": 1.0844841003417969, |
| "learning_rate": 0.000218825732284642, |
| "loss": 3.2142662048339843, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.7627281460134486, |
| "grad_norm": 0.9654716849327087, |
| "learning_rate": 0.00021861394309596446, |
| "loss": 3.214899444580078, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.7636887608069164, |
| "grad_norm": 0.8219507336616516, |
| "learning_rate": 0.00021840198078726964, |
| "loss": 3.2075408935546874, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.7646493756003843, |
| "grad_norm": 1.0514135360717773, |
| "learning_rate": 0.00021818984589336006, |
| "loss": 3.2124794006347654, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.765609990393852, |
| "grad_norm": 0.764786958694458, |
| "learning_rate": 0.0002179775389494739, |
| "loss": 3.210370635986328, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.7665706051873199, |
| "grad_norm": 0.9463275671005249, |
| "learning_rate": 0.0002177650604912833, |
| "loss": 3.214555358886719, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.7675312199807877, |
| "grad_norm": 0.9087357521057129, |
| "learning_rate": 0.0002175524110548932, |
| "loss": 3.211328125, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.7684918347742555, |
| "grad_norm": 0.8839055299758911, |
| "learning_rate": 0.00021733959117684008, |
| "loss": 3.208209991455078, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7694524495677233, |
| "grad_norm": 1.0021069049835205, |
| "learning_rate": 0.00021712660139409015, |
| "loss": 3.213267517089844, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.7704130643611912, |
| "grad_norm": 1.089827060699463, |
| "learning_rate": 0.0002169134422440386, |
| "loss": 3.209787368774414, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.7713736791546589, |
| "grad_norm": 0.9058536887168884, |
| "learning_rate": 0.00021670011426450772, |
| "loss": 3.2052375793457033, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.7723342939481268, |
| "grad_norm": 0.8882672190666199, |
| "learning_rate": 0.000216486617993746, |
| "loss": 3.2103240966796873, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.7732949087415946, |
| "grad_norm": 0.7981977462768555, |
| "learning_rate": 0.00021627295397042635, |
| "loss": 3.2096282958984377, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.7742555235350624, |
| "grad_norm": 0.9157530665397644, |
| "learning_rate": 0.00021605912273364513, |
| "loss": 3.213287353515625, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.7752161383285303, |
| "grad_norm": 0.7963248491287231, |
| "learning_rate": 0.00021584512482292038, |
| "loss": 3.2098575592041017, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.7761767531219981, |
| "grad_norm": 1.0376338958740234, |
| "learning_rate": 0.00021563096077819083, |
| "loss": 3.2072002410888674, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.777137367915466, |
| "grad_norm": 1.159191608428955, |
| "learning_rate": 0.00021541663113981433, |
| "loss": 3.2127113342285156, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.7780979827089337, |
| "grad_norm": 1.213563323020935, |
| "learning_rate": 0.0002152021364485665, |
| "loss": 3.2094375610351564, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.7790585975024016, |
| "grad_norm": 1.1174225807189941, |
| "learning_rate": 0.00021498747724563953, |
| "loss": 3.2087074279785157, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.7800192122958693, |
| "grad_norm": 1.0365922451019287, |
| "learning_rate": 0.00021477265407264051, |
| "loss": 3.2079147338867187, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.7809798270893372, |
| "grad_norm": 1.0354453325271606, |
| "learning_rate": 0.00021455766747159044, |
| "loss": 3.2089813232421873, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.781940441882805, |
| "grad_norm": 1.0720642805099487, |
| "learning_rate": 0.0002143425179849226, |
| "loss": 3.207681655883789, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.7829010566762729, |
| "grad_norm": 0.9447735548019409, |
| "learning_rate": 0.00021412720615548105, |
| "loss": 3.2113746643066405, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.7838616714697406, |
| "grad_norm": 0.9376322627067566, |
| "learning_rate": 0.00021391173252651978, |
| "loss": 3.2086830139160156, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.7848222862632085, |
| "grad_norm": 1.0265626907348633, |
| "learning_rate": 0.00021369609764170075, |
| "loss": 3.2098388671875, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.7857829010566763, |
| "grad_norm": 1.1230889558792114, |
| "learning_rate": 0.00021348030204509303, |
| "loss": 3.208446502685547, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.7867435158501441, |
| "grad_norm": 0.8800603747367859, |
| "learning_rate": 0.00021326434628117088, |
| "loss": 3.208513641357422, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.7877041306436119, |
| "grad_norm": 1.0674163103103638, |
| "learning_rate": 0.00021304823089481298, |
| "loss": 3.2110557556152344, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7886647454370798, |
| "grad_norm": 0.8815765976905823, |
| "learning_rate": 0.00021283195643130058, |
| "loss": 3.210008239746094, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.7896253602305475, |
| "grad_norm": 1.0902719497680664, |
| "learning_rate": 0.00021261552343631633, |
| "loss": 3.2107887268066406, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.7905859750240154, |
| "grad_norm": 0.859643816947937, |
| "learning_rate": 0.00021239893245594287, |
| "loss": 3.210626220703125, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.7915465898174832, |
| "grad_norm": 0.9734321236610413, |
| "learning_rate": 0.00021218218403666148, |
| "loss": 3.207859420776367, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.792507204610951, |
| "grad_norm": 0.8096666932106018, |
| "learning_rate": 0.00021196527872535068, |
| "loss": 3.2104820251464843, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.7934678194044188, |
| "grad_norm": 1.0135759115219116, |
| "learning_rate": 0.0002117482170692847, |
| "loss": 3.2095088958740234, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.7944284341978867, |
| "grad_norm": 0.9503042101860046, |
| "learning_rate": 0.00021153099961613257, |
| "loss": 3.2078937530517577, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.7953890489913544, |
| "grad_norm": 0.8183991312980652, |
| "learning_rate": 0.00021131362691395608, |
| "loss": 3.2082897186279298, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.7963496637848223, |
| "grad_norm": 0.9838517308235168, |
| "learning_rate": 0.00021109609951120887, |
| "loss": 3.2070068359375, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.7973102785782901, |
| "grad_norm": 1.0721310377120972, |
| "learning_rate": 0.00021087841795673488, |
| "loss": 3.206983947753906, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.7982708933717579, |
| "grad_norm": 1.0762015581130981, |
| "learning_rate": 0.00021066058279976704, |
| "loss": 3.2114967346191405, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.7992315081652257, |
| "grad_norm": 0.7675787806510925, |
| "learning_rate": 0.0002104425945899258, |
| "loss": 3.2034461975097654, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.8001921229586936, |
| "grad_norm": 1.0134726762771606, |
| "learning_rate": 0.00021022445387721767, |
| "loss": 3.207468032836914, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.8011527377521613, |
| "grad_norm": 0.9967617988586426, |
| "learning_rate": 0.00021000616121203422, |
| "loss": 3.2145057678222657, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.8021133525456292, |
| "grad_norm": 1.0330275297164917, |
| "learning_rate": 0.00020978771714515016, |
| "loss": 3.205029296875, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.803073967339097, |
| "grad_norm": 0.8626118898391724, |
| "learning_rate": 0.00020956912222772222, |
| "loss": 3.2084095001220705, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.8040345821325648, |
| "grad_norm": 1.186023235321045, |
| "learning_rate": 0.0002093503770112879, |
| "loss": 3.209725189208984, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.8049951969260326, |
| "grad_norm": 0.943437397480011, |
| "learning_rate": 0.00020913148204776378, |
| "loss": 3.209857177734375, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.8059558117195005, |
| "grad_norm": 1.167366623878479, |
| "learning_rate": 0.00020891243788944428, |
| "loss": 3.2103233337402344, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.8069164265129684, |
| "grad_norm": 0.9998597502708435, |
| "learning_rate": 0.00020869324508900026, |
| "loss": 3.211322784423828, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.8078770413064361, |
| "grad_norm": 0.9104480147361755, |
| "learning_rate": 0.00020847390419947778, |
| "loss": 3.2085220336914064, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.808837656099904, |
| "grad_norm": 1.0278260707855225, |
| "learning_rate": 0.0002082544157742963, |
| "loss": 3.2045223236083986, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.8097982708933718, |
| "grad_norm": 0.9279727339744568, |
| "learning_rate": 0.0002080347803672476, |
| "loss": 3.2089134216308595, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.8107588856868396, |
| "grad_norm": 0.9136155247688293, |
| "learning_rate": 0.00020781499853249441, |
| "loss": 3.2083240509033204, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.8117195004803074, |
| "grad_norm": 0.8539382219314575, |
| "learning_rate": 0.00020759507082456885, |
| "loss": 3.204279327392578, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.8126801152737753, |
| "grad_norm": 1.1511064767837524, |
| "learning_rate": 0.00020737499779837105, |
| "loss": 3.2057140350341795, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.813640730067243, |
| "grad_norm": 0.8153745532035828, |
| "learning_rate": 0.00020715478000916783, |
| "loss": 3.2046875, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.8146013448607109, |
| "grad_norm": 0.8916031122207642, |
| "learning_rate": 0.00020693441801259135, |
| "loss": 3.210984802246094, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.8155619596541787, |
| "grad_norm": 0.9601288437843323, |
| "learning_rate": 0.00020671391236463742, |
| "loss": 3.2044132232666014, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.8165225744476465, |
| "grad_norm": 0.9480450749397278, |
| "learning_rate": 0.00020649326362166449, |
| "loss": 3.2047096252441407, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.8174831892411143, |
| "grad_norm": 1.3009711503982544, |
| "learning_rate": 0.0002062724723403919, |
| "loss": 3.210066223144531, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.8184438040345822, |
| "grad_norm": 0.8482257127761841, |
| "learning_rate": 0.00020605153907789876, |
| "loss": 3.2040420532226563, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.8194044188280499, |
| "grad_norm": 1.1215349435806274, |
| "learning_rate": 0.00020583046439162232, |
| "loss": 3.209978485107422, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.8203650336215178, |
| "grad_norm": 1.0416927337646484, |
| "learning_rate": 0.00020560924883935674, |
| "loss": 3.20634765625, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.8213256484149856, |
| "grad_norm": 0.9002500176429749, |
| "learning_rate": 0.00020538789297925154, |
| "loss": 3.210291290283203, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.8222862632084534, |
| "grad_norm": 0.7959718108177185, |
| "learning_rate": 0.00020516639736981027, |
| "loss": 3.205914306640625, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.8232468780019212, |
| "grad_norm": 0.9458216428756714, |
| "learning_rate": 0.000204944762569889, |
| "loss": 3.2048385620117186, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.8242074927953891, |
| "grad_norm": 1.0240002870559692, |
| "learning_rate": 0.00020472298913869514, |
| "loss": 3.2106887817382814, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.8251681075888568, |
| "grad_norm": 0.9878635406494141, |
| "learning_rate": 0.00020450107763578582, |
| "loss": 3.206460952758789, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.8261287223823247, |
| "grad_norm": 0.961821973323822, |
| "learning_rate": 0.00020427902862106645, |
| "loss": 3.208537292480469, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.8270893371757925, |
| "grad_norm": 1.0129098892211914, |
| "learning_rate": 0.00020405684265478955, |
| "loss": 3.2066085815429686, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.8280499519692603, |
| "grad_norm": 1.2535499334335327, |
| "learning_rate": 0.00020383452029755308, |
| "loss": 3.206789016723633, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.8290105667627281, |
| "grad_norm": 1.0553781986236572, |
| "learning_rate": 0.00020361206211029915, |
| "loss": 3.208069610595703, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.829971181556196, |
| "grad_norm": 1.033327341079712, |
| "learning_rate": 0.00020338946865431257, |
| "loss": 3.208452606201172, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.8309317963496637, |
| "grad_norm": 1.1007755994796753, |
| "learning_rate": 0.00020316674049121954, |
| "loss": 3.205517578125, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.8318924111431316, |
| "grad_norm": 1.0913561582565308, |
| "learning_rate": 0.00020294387818298596, |
| "loss": 3.205488586425781, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.8328530259365994, |
| "grad_norm": 1.073475956916809, |
| "learning_rate": 0.00020272088229191638, |
| "loss": 3.2070037841796877, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.8338136407300673, |
| "grad_norm": 0.8359492421150208, |
| "learning_rate": 0.00020249775338065224, |
| "loss": 3.2025604248046875, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.834774255523535, |
| "grad_norm": 1.1163265705108643, |
| "learning_rate": 0.0002022744920121707, |
| "loss": 3.205812454223633, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.8357348703170029, |
| "grad_norm": 1.0195002555847168, |
| "learning_rate": 0.0002020510987497832, |
| "loss": 3.2072071075439452, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.8366954851104706, |
| "grad_norm": 1.06333327293396, |
| "learning_rate": 0.0002018275741571337, |
| "loss": 3.2045135498046875, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.8376560999039385, |
| "grad_norm": 1.07416570186615, |
| "learning_rate": 0.00020160391879819775, |
| "loss": 3.2041862487792967, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.8386167146974063, |
| "grad_norm": 0.8247685432434082, |
| "learning_rate": 0.00020138013323728072, |
| "loss": 3.204395294189453, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.8395773294908742, |
| "grad_norm": 0.7512625455856323, |
| "learning_rate": 0.00020115621803901658, |
| "loss": 3.2026763916015626, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.840537944284342, |
| "grad_norm": 0.6788026094436646, |
| "learning_rate": 0.0002009321737683664, |
| "loss": 3.202665328979492, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.8414985590778098, |
| "grad_norm": 1.178909182548523, |
| "learning_rate": 0.00020070800099061676, |
| "loss": 3.203816604614258, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.8424591738712777, |
| "grad_norm": 1.056028127670288, |
| "learning_rate": 0.00020048370027137864, |
| "loss": 3.203491973876953, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.8434197886647454, |
| "grad_norm": 0.9850196838378906, |
| "learning_rate": 0.0002002592721765857, |
| "loss": 3.203214645385742, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.8443804034582133, |
| "grad_norm": 0.812002956867218, |
| "learning_rate": 0.00020003471727249317, |
| "loss": 3.2064640045166017, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.8453410182516811, |
| "grad_norm": 0.7780818343162537, |
| "learning_rate": 0.00019981003612567594, |
| "loss": 3.2039905548095704, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.8463016330451489, |
| "grad_norm": 0.7282761931419373, |
| "learning_rate": 0.0001995852293030277, |
| "loss": 3.2027183532714845, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.8472622478386167, |
| "grad_norm": 1.597205400466919, |
| "learning_rate": 0.0001993602973717591, |
| "loss": 3.2074440002441404, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.8482228626320846, |
| "grad_norm": 0.7743385434150696, |
| "learning_rate": 0.0001991352408993965, |
| "loss": 3.20322265625, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.8491834774255523, |
| "grad_norm": 0.8802571296691895, |
| "learning_rate": 0.00019891006045378047, |
| "loss": 3.2041389465332033, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.8501440922190202, |
| "grad_norm": 0.9674323797225952, |
| "learning_rate": 0.00019868475660306435, |
| "loss": 3.2038337707519533, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.851104707012488, |
| "grad_norm": 0.8163370490074158, |
| "learning_rate": 0.00019845932991571284, |
| "loss": 3.204595947265625, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.8520653218059558, |
| "grad_norm": 0.8586170077323914, |
| "learning_rate": 0.00019823378096050067, |
| "loss": 3.204967498779297, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.8530259365994236, |
| "grad_norm": 0.9709478616714478, |
| "learning_rate": 0.00019800811030651095, |
| "loss": 3.200875091552734, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.8539865513928915, |
| "grad_norm": 0.8611642718315125, |
| "learning_rate": 0.00019778231852313386, |
| "loss": 3.2068603515625, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.8549471661863592, |
| "grad_norm": 0.8717654347419739, |
| "learning_rate": 0.00019755640618006532, |
| "loss": 3.2012969970703127, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.8559077809798271, |
| "grad_norm": 0.9735947251319885, |
| "learning_rate": 0.0001973303738473053, |
| "loss": 3.2008651733398437, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.8568683957732949, |
| "grad_norm": 0.9653201699256897, |
| "learning_rate": 0.0001971042220951565, |
| "loss": 3.2013267517089843, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.8578290105667628, |
| "grad_norm": 0.8183594346046448, |
| "learning_rate": 0.00019687795149422315, |
| "loss": 3.204022979736328, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.8587896253602305, |
| "grad_norm": 0.8697395920753479, |
| "learning_rate": 0.00019665156261540898, |
| "loss": 3.202830505371094, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.8597502401536984, |
| "grad_norm": 0.829225480556488, |
| "learning_rate": 0.00019642505602991654, |
| "loss": 3.2021392822265624, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.8607108549471661, |
| "grad_norm": 1.1149920225143433, |
| "learning_rate": 0.0001961984323092451, |
| "loss": 3.205643081665039, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.861671469740634, |
| "grad_norm": 0.8502190113067627, |
| "learning_rate": 0.00019597169202518954, |
| "loss": 3.2061065673828124, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.8626320845341018, |
| "grad_norm": 0.7430519461631775, |
| "learning_rate": 0.0001957448357498389, |
| "loss": 3.2033714294433593, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.8635926993275697, |
| "grad_norm": 0.8692427277565002, |
| "learning_rate": 0.0001955178640555748, |
| "loss": 3.2003097534179688, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.8645533141210374, |
| "grad_norm": 0.809934139251709, |
| "learning_rate": 0.00019529077751507016, |
| "loss": 3.2053035736083983, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.8655139289145053, |
| "grad_norm": 0.6470674276351929, |
| "learning_rate": 0.00019506357670128749, |
| "loss": 3.2052642822265627, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.866474543707973, |
| "grad_norm": 0.9643762111663818, |
| "learning_rate": 0.00019483626218747794, |
| "loss": 3.203343963623047, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.8674351585014409, |
| "grad_norm": 1.0639313459396362, |
| "learning_rate": 0.00019460883454717922, |
| "loss": 3.2024681091308596, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.8683957732949087, |
| "grad_norm": 0.8861836194992065, |
| "learning_rate": 0.00019438129435421462, |
| "loss": 3.201047897338867, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.8693563880883766, |
| "grad_norm": 1.0320836305618286, |
| "learning_rate": 0.00019415364218269146, |
| "loss": 3.2003639221191404, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.8703170028818443, |
| "grad_norm": 0.9835689663887024, |
| "learning_rate": 0.00019392587860699942, |
| "loss": 3.2048595428466795, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.8712776176753122, |
| "grad_norm": 0.7003117799758911, |
| "learning_rate": 0.00019369800420180943, |
| "loss": 3.204084014892578, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.8722382324687801, |
| "grad_norm": 0.9389500021934509, |
| "learning_rate": 0.00019347001954207193, |
| "loss": 3.2038955688476562, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.8731988472622478, |
| "grad_norm": 1.1108181476593018, |
| "learning_rate": 0.00019324192520301566, |
| "loss": 3.200461196899414, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.8741594620557157, |
| "grad_norm": 0.84727942943573, |
| "learning_rate": 0.00019301372176014605, |
| "loss": 3.2037403106689455, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.8751200768491835, |
| "grad_norm": 0.776715874671936, |
| "learning_rate": 0.00019278540978924378, |
| "loss": 3.1999557495117186, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.8760806916426513, |
| "grad_norm": 0.873356282711029, |
| "learning_rate": 0.0001925569898663633, |
| "loss": 3.201426696777344, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.8770413064361191, |
| "grad_norm": 1.0719802379608154, |
| "learning_rate": 0.00019232846256783163, |
| "loss": 3.2020469665527345, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.878001921229587, |
| "grad_norm": 0.8912343978881836, |
| "learning_rate": 0.00019209982847024655, |
| "loss": 3.200605010986328, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.8789625360230547, |
| "grad_norm": 0.8289459347724915, |
| "learning_rate": 0.00019187108815047523, |
| "loss": 3.2027835845947266, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.8799231508165226, |
| "grad_norm": 0.7044758200645447, |
| "learning_rate": 0.00019164224218565313, |
| "loss": 3.2026336669921873, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.8808837656099904, |
| "grad_norm": 0.8316428661346436, |
| "learning_rate": 0.00019141329115318203, |
| "loss": 3.204214096069336, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.8818443804034583, |
| "grad_norm": 0.9814732074737549, |
| "learning_rate": 0.00019118423563072885, |
| "loss": 3.2025177001953127, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.882804995196926, |
| "grad_norm": 0.6341441869735718, |
| "learning_rate": 0.0001909550761962242, |
| "loss": 3.2031497955322266, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.8837656099903939, |
| "grad_norm": 0.977326512336731, |
| "learning_rate": 0.00019072581342786084, |
| "loss": 3.199177551269531, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.8847262247838616, |
| "grad_norm": 0.7784291505813599, |
| "learning_rate": 0.00019049644790409225, |
| "loss": 3.198267936706543, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.8856868395773295, |
| "grad_norm": 1.1206945180892944, |
| "learning_rate": 0.00019026698020363107, |
| "loss": 3.205250549316406, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.8866474543707973, |
| "grad_norm": 0.8689852356910706, |
| "learning_rate": 0.00019003741090544804, |
| "loss": 3.2022071838378907, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.8876080691642652, |
| "grad_norm": 1.1133830547332764, |
| "learning_rate": 0.00018980774058876995, |
| "loss": 3.2035804748535157, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.8885686839577329, |
| "grad_norm": 0.8632625937461853, |
| "learning_rate": 0.00018957796983307858, |
| "loss": 3.2034885406494142, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.8895292987512008, |
| "grad_norm": 0.8747223615646362, |
| "learning_rate": 0.0001893480992181091, |
| "loss": 3.2007545471191405, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.8904899135446686, |
| "grad_norm": 1.075469732284546, |
| "learning_rate": 0.00018911812932384872, |
| "loss": 3.2042964935302733, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.8914505283381364, |
| "grad_norm": 1.0559624433517456, |
| "learning_rate": 0.000188888060730535, |
| "loss": 3.199691963195801, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.8924111431316042, |
| "grad_norm": 1.0888924598693848, |
| "learning_rate": 0.00018865789401865458, |
| "loss": 3.202299118041992, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.8933717579250721, |
| "grad_norm": 0.7396308779716492, |
| "learning_rate": 0.0001884276297689418, |
| "loss": 3.19757080078125, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.8943323727185398, |
| "grad_norm": 0.8118330240249634, |
| "learning_rate": 0.00018819726856237685, |
| "loss": 3.2041015625, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.8952929875120077, |
| "grad_norm": 0.9625673890113831, |
| "learning_rate": 0.00018796681098018486, |
| "loss": 3.2014694213867188, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.8962536023054755, |
| "grad_norm": 1.0025596618652344, |
| "learning_rate": 0.00018773625760383375, |
| "loss": 3.2010726928710938, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.8972142170989433, |
| "grad_norm": 1.0931344032287598, |
| "learning_rate": 0.00018750560901503353, |
| "loss": 3.2006298065185548, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.8981748318924111, |
| "grad_norm": 1.0470645427703857, |
| "learning_rate": 0.00018727486579573407, |
| "loss": 3.1973636627197264, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.899135446685879, |
| "grad_norm": 0.9046833515167236, |
| "learning_rate": 0.00018704402852812431, |
| "loss": 3.2017478942871094, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.9000960614793467, |
| "grad_norm": 0.9769369959831238, |
| "learning_rate": 0.00018681309779463033, |
| "loss": 3.2017196655273437, |
| "step": 4685 |
| }, |
| { |
| "epoch": 0.9010566762728146, |
| "grad_norm": 0.8879526853561401, |
| "learning_rate": 0.00018658207417791405, |
| "loss": 3.2009010314941406, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.9020172910662824, |
| "grad_norm": 0.7592387795448303, |
| "learning_rate": 0.00018635095826087175, |
| "loss": 3.2003753662109373, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.9029779058597502, |
| "grad_norm": 0.9972590804100037, |
| "learning_rate": 0.00018611975062663263, |
| "loss": 3.1992095947265624, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.9039385206532181, |
| "grad_norm": 0.9059445261955261, |
| "learning_rate": 0.0001858884518585572, |
| "loss": 3.195656585693359, |
| "step": 4705 |
| }, |
| { |
| "epoch": 0.9048991354466859, |
| "grad_norm": 0.8296246528625488, |
| "learning_rate": 0.00018565706254023605, |
| "loss": 3.196135711669922, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.9058597502401537, |
| "grad_norm": 0.9912506937980652, |
| "learning_rate": 0.00018542558325548814, |
| "loss": 3.1977779388427736, |
| "step": 4715 |
| }, |
| { |
| "epoch": 0.9068203650336215, |
| "grad_norm": 0.7257289290428162, |
| "learning_rate": 0.00018519401458835948, |
| "loss": 3.197937774658203, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.9077809798270894, |
| "grad_norm": 1.0594534873962402, |
| "learning_rate": 0.00018496235712312154, |
| "loss": 3.193804168701172, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.9087415946205571, |
| "grad_norm": 1.0351169109344482, |
| "learning_rate": 0.00018473061144426986, |
| "loss": 3.196752166748047, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.909702209414025, |
| "grad_norm": 0.8392277359962463, |
| "learning_rate": 0.0001844987781365226, |
| "loss": 3.2042442321777345, |
| "step": 4735 |
| }, |
| { |
| "epoch": 0.9106628242074928, |
| "grad_norm": 0.6720691323280334, |
| "learning_rate": 0.00018426685778481897, |
| "loss": 3.2033058166503907, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.9116234390009607, |
| "grad_norm": 1.009544014930725, |
| "learning_rate": 0.00018403485097431778, |
| "loss": 3.200358200073242, |
| "step": 4745 |
| }, |
| { |
| "epoch": 0.9125840537944284, |
| "grad_norm": 0.8520932793617249, |
| "learning_rate": 0.00018380275829039602, |
| "loss": 3.201191711425781, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.9135446685878963, |
| "grad_norm": 0.9514408111572266, |
| "learning_rate": 0.0001835705803186474, |
| "loss": 3.194804000854492, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.914505283381364, |
| "grad_norm": 0.8069214224815369, |
| "learning_rate": 0.00018333831764488065, |
| "loss": 3.1965736389160155, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.9154658981748319, |
| "grad_norm": 0.8542043566703796, |
| "learning_rate": 0.00018310597085511844, |
| "loss": 3.1960010528564453, |
| "step": 4765 |
| }, |
| { |
| "epoch": 0.9164265129682997, |
| "grad_norm": 0.7166752815246582, |
| "learning_rate": 0.00018287354053559546, |
| "loss": 3.1996959686279296, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.9173871277617676, |
| "grad_norm": 1.2815790176391602, |
| "learning_rate": 0.0001826410272727574, |
| "loss": 3.200403594970703, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.9183477425552353, |
| "grad_norm": 0.9036345481872559, |
| "learning_rate": 0.00018240843165325882, |
| "loss": 3.203938674926758, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.9193083573487032, |
| "grad_norm": 0.9565702080726624, |
| "learning_rate": 0.00018217575426396256, |
| "loss": 3.197901153564453, |
| "step": 4785 |
| }, |
| { |
| "epoch": 0.920268972142171, |
| "grad_norm": 0.8386558890342712, |
| "learning_rate": 0.00018194299569193747, |
| "loss": 3.194584274291992, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.9212295869356388, |
| "grad_norm": 1.0965979099273682, |
| "learning_rate": 0.0001817101565244573, |
| "loss": 3.20220947265625, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.9221902017291066, |
| "grad_norm": 1.1644456386566162, |
| "learning_rate": 0.00018147723734899916, |
| "loss": 3.1982275009155274, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.9231508165225745, |
| "grad_norm": 1.0380982160568237, |
| "learning_rate": 0.00018124423875324198, |
| "loss": 3.1980897903442385, |
| "step": 4805 |
| }, |
| { |
| "epoch": 0.9241114313160422, |
| "grad_norm": 0.823890209197998, |
| "learning_rate": 0.00018101116132506522, |
| "loss": 3.1994667053222656, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.9250720461095101, |
| "grad_norm": 0.5867325663566589, |
| "learning_rate": 0.00018077800565254702, |
| "loss": 3.1975902557373046, |
| "step": 4815 |
| }, |
| { |
| "epoch": 0.9260326609029779, |
| "grad_norm": 1.1908833980560303, |
| "learning_rate": 0.00018054477232396312, |
| "loss": 3.198208808898926, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.9269932756964457, |
| "grad_norm": 1.0711902379989624, |
| "learning_rate": 0.00018031146192778517, |
| "loss": 3.1953447341918944, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.9279538904899135, |
| "grad_norm": 0.9716858863830566, |
| "learning_rate": 0.0001800780750526792, |
| "loss": 3.1982431411743164, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.9289145052833814, |
| "grad_norm": 0.9620060920715332, |
| "learning_rate": 0.00017984461228750422, |
| "loss": 3.196470260620117, |
| "step": 4835 |
| }, |
| { |
| "epoch": 0.9298751200768491, |
| "grad_norm": 1.015932321548462, |
| "learning_rate": 0.00017961107422131075, |
| "loss": 3.199555206298828, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.930835734870317, |
| "grad_norm": 0.9855188727378845, |
| "learning_rate": 0.00017937746144333934, |
| "loss": 3.2012577056884766, |
| "step": 4845 |
| }, |
| { |
| "epoch": 0.9317963496637848, |
| "grad_norm": 0.8330841660499573, |
| "learning_rate": 0.0001791437745430189, |
| "loss": 3.1955251693725586, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.9327569644572526, |
| "grad_norm": 0.8580245971679688, |
| "learning_rate": 0.00017891001410996554, |
| "loss": 3.1982753753662108, |
| "step": 4855 |
| }, |
| { |
| "epoch": 0.9337175792507204, |
| "grad_norm": 1.1361298561096191, |
| "learning_rate": 0.00017867618073398074, |
| "loss": 3.202219009399414, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.9346781940441883, |
| "grad_norm": 0.9495155215263367, |
| "learning_rate": 0.00017844227500505016, |
| "loss": 3.1957366943359373, |
| "step": 4865 |
| }, |
| { |
| "epoch": 0.9356388088376562, |
| "grad_norm": 0.8877089619636536, |
| "learning_rate": 0.00017820829751334194, |
| "loss": 3.198602867126465, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.9365994236311239, |
| "grad_norm": 0.7768687605857849, |
| "learning_rate": 0.0001779742488492052, |
| "loss": 3.1972635269165037, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.9375600384245918, |
| "grad_norm": 1.0583457946777344, |
| "learning_rate": 0.00017774012960316887, |
| "loss": 3.1948143005371095, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.9385206532180596, |
| "grad_norm": 0.9406375288963318, |
| "learning_rate": 0.00017750594036593974, |
| "loss": 3.1969860076904295, |
| "step": 4885 |
| }, |
| { |
| "epoch": 0.9394812680115274, |
| "grad_norm": 1.0013177394866943, |
| "learning_rate": 0.00017727168172840133, |
| "loss": 3.1973243713378907, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.9404418828049952, |
| "grad_norm": 0.7509773373603821, |
| "learning_rate": 0.00017703735428161216, |
| "loss": 3.198769760131836, |
| "step": 4895 |
| }, |
| { |
| "epoch": 0.9414024975984631, |
| "grad_norm": 0.8109461069107056, |
| "learning_rate": 0.00017680295861680445, |
| "loss": 3.1947399139404298, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.9423631123919308, |
| "grad_norm": 0.8327275514602661, |
| "learning_rate": 0.0001765684953253825, |
| "loss": 3.196604919433594, |
| "step": 4905 |
| }, |
| { |
| "epoch": 0.9433237271853987, |
| "grad_norm": 0.8777264356613159, |
| "learning_rate": 0.00017633396499892125, |
| "loss": 3.197701644897461, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.9442843419788665, |
| "grad_norm": 1.045627474784851, |
| "learning_rate": 0.00017609936822916475, |
| "loss": 3.204631805419922, |
| "step": 4915 |
| }, |
| { |
| "epoch": 0.9452449567723343, |
| "grad_norm": 1.0442372560501099, |
| "learning_rate": 0.0001758647056080248, |
| "loss": 3.2004959106445314, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.9462055715658021, |
| "grad_norm": 0.7651126384735107, |
| "learning_rate": 0.00017562997772757916, |
| "loss": 3.1948537826538086, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.94716618635927, |
| "grad_norm": 0.790465235710144, |
| "learning_rate": 0.00017539518518007043, |
| "loss": 3.196913719177246, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.9481268011527377, |
| "grad_norm": 0.9765505194664001, |
| "learning_rate": 0.00017516032855790424, |
| "loss": 3.1958955764770507, |
| "step": 4935 |
| }, |
| { |
| "epoch": 0.9490874159462056, |
| "grad_norm": 1.0204505920410156, |
| "learning_rate": 0.00017492540845364798, |
| "loss": 3.197770690917969, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.9500480307396734, |
| "grad_norm": 0.7875639796257019, |
| "learning_rate": 0.00017469042546002913, |
| "loss": 3.1972192764282226, |
| "step": 4945 |
| }, |
| { |
| "epoch": 0.9510086455331412, |
| "grad_norm": 0.8441899418830872, |
| "learning_rate": 0.00017445538016993393, |
| "loss": 3.1970443725585938, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.951969260326609, |
| "grad_norm": 0.9154574275016785, |
| "learning_rate": 0.00017422027317640575, |
| "loss": 3.1967355728149416, |
| "step": 4955 |
| }, |
| { |
| "epoch": 0.9529298751200769, |
| "grad_norm": 0.6774746775627136, |
| "learning_rate": 0.00017398510507264363, |
| "loss": 3.1978105545043944, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.9538904899135446, |
| "grad_norm": 0.9463170170783997, |
| "learning_rate": 0.0001737498764520009, |
| "loss": 3.195131301879883, |
| "step": 4965 |
| }, |
| { |
| "epoch": 0.9548511047070125, |
| "grad_norm": 1.0156409740447998, |
| "learning_rate": 0.00017351458790798338, |
| "loss": 3.197483253479004, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.9558117195004803, |
| "grad_norm": 0.8678054213523865, |
| "learning_rate": 0.0001732792400342483, |
| "loss": 3.1987661361694335, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.9567723342939481, |
| "grad_norm": 0.8004742860794067, |
| "learning_rate": 0.00017304383342460242, |
| "loss": 3.1965717315673827, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.9577329490874159, |
| "grad_norm": 1.0178972482681274, |
| "learning_rate": 0.0001728083686730008, |
| "loss": 3.197255325317383, |
| "step": 4985 |
| }, |
| { |
| "epoch": 0.9586935638808838, |
| "grad_norm": 0.7633374333381653, |
| "learning_rate": 0.00017257284637354524, |
| "loss": 3.1934268951416014, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.9596541786743515, |
| "grad_norm": 0.735908031463623, |
| "learning_rate": 0.00017233726712048253, |
| "loss": 3.194230079650879, |
| "step": 4995 |
| }, |
| { |
| "epoch": 0.9606147934678194, |
| "grad_norm": 0.936837911605835, |
| "learning_rate": 0.0001721016315082034, |
| "loss": 3.1968971252441407, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.9615754082612872, |
| "grad_norm": 0.8966473937034607, |
| "learning_rate": 0.00017186594013124057, |
| "loss": 3.2022254943847654, |
| "step": 5005 |
| }, |
| { |
| "epoch": 0.962536023054755, |
| "grad_norm": 0.8859227299690247, |
| "learning_rate": 0.00017163019358426763, |
| "loss": 3.19702205657959, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.9634966378482228, |
| "grad_norm": 0.6885091662406921, |
| "learning_rate": 0.00017139439246209728, |
| "loss": 3.1955480575561523, |
| "step": 5015 |
| }, |
| { |
| "epoch": 0.9644572526416907, |
| "grad_norm": 0.6677486896514893, |
| "learning_rate": 0.00017115853735967995, |
| "loss": 3.1977615356445312, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.9654178674351584, |
| "grad_norm": 1.0436347723007202, |
| "learning_rate": 0.00017092262887210232, |
| "loss": 3.195005416870117, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.9663784822286263, |
| "grad_norm": 0.9044774174690247, |
| "learning_rate": 0.0001706866675945856, |
| "loss": 3.1969493865966796, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.9673390970220941, |
| "grad_norm": 0.9574539661407471, |
| "learning_rate": 0.00017045065412248434, |
| "loss": 3.194741058349609, |
| "step": 5035 |
| }, |
| { |
| "epoch": 0.968299711815562, |
| "grad_norm": 0.9477970004081726, |
| "learning_rate": 0.00017021458905128477, |
| "loss": 3.1963451385498045, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.9692603266090298, |
| "grad_norm": 0.8571361899375916, |
| "learning_rate": 0.00016997847297660324, |
| "loss": 3.194466972351074, |
| "step": 5045 |
| }, |
| { |
| "epoch": 0.9702209414024976, |
| "grad_norm": 0.8901039361953735, |
| "learning_rate": 0.00016974230649418487, |
| "loss": 3.1929143905639648, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.9711815561959655, |
| "grad_norm": 1.027086615562439, |
| "learning_rate": 0.00016950609019990187, |
| "loss": 3.191559982299805, |
| "step": 5055 |
| }, |
| { |
| "epoch": 0.9721421709894332, |
| "grad_norm": 0.8591023683547974, |
| "learning_rate": 0.00016926982468975225, |
| "loss": 3.199761962890625, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.9731027857829011, |
| "grad_norm": 1.0826267004013062, |
| "learning_rate": 0.00016903351055985806, |
| "loss": 3.1961299896240236, |
| "step": 5065 |
| }, |
| { |
| "epoch": 0.9740634005763689, |
| "grad_norm": 1.0031397342681885, |
| "learning_rate": 0.0001687971484064642, |
| "loss": 3.1959823608398437, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.9750240153698367, |
| "grad_norm": 0.7752434015274048, |
| "learning_rate": 0.00016856073882593646, |
| "loss": 3.1921688079833985, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.9759846301633045, |
| "grad_norm": 0.911412239074707, |
| "learning_rate": 0.00016832428241476063, |
| "loss": 3.191498947143555, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.9769452449567724, |
| "grad_norm": 0.9647287726402283, |
| "learning_rate": 0.00016808777976954042, |
| "loss": 3.1929500579833983, |
| "step": 5085 |
| }, |
| { |
| "epoch": 0.9779058597502401, |
| "grad_norm": 0.7081168293952942, |
| "learning_rate": 0.00016785123148699624, |
| "loss": 3.1945589065551756, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.978866474543708, |
| "grad_norm": 0.8361444473266602, |
| "learning_rate": 0.00016761463816396374, |
| "loss": 3.1967111587524415, |
| "step": 5095 |
| }, |
| { |
| "epoch": 0.9798270893371758, |
| "grad_norm": 0.8908597230911255, |
| "learning_rate": 0.00016737800039739212, |
| "loss": 3.1946334838867188, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.9807877041306436, |
| "grad_norm": 0.8121721744537354, |
| "learning_rate": 0.0001671413187843427, |
| "loss": 3.195412826538086, |
| "step": 5105 |
| }, |
| { |
| "epoch": 0.9817483189241114, |
| "grad_norm": 0.6848476529121399, |
| "learning_rate": 0.00016690459392198752, |
| "loss": 3.191677284240723, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.9827089337175793, |
| "grad_norm": 0.6763195991516113, |
| "learning_rate": 0.00016666782640760766, |
| "loss": 3.1958782196044924, |
| "step": 5115 |
| }, |
| { |
| "epoch": 0.983669548511047, |
| "grad_norm": 0.8343232870101929, |
| "learning_rate": 0.00016643101683859186, |
| "loss": 3.197767639160156, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.9846301633045149, |
| "grad_norm": 0.9125416874885559, |
| "learning_rate": 0.00016619416581243498, |
| "loss": 3.1921886444091796, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.9855907780979827, |
| "grad_norm": 0.8966071009635925, |
| "learning_rate": 0.00016595727392673643, |
| "loss": 3.1952947616577148, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.9865513928914506, |
| "grad_norm": 0.6765633225440979, |
| "learning_rate": 0.00016572034177919875, |
| "loss": 3.1933788299560546, |
| "step": 5135 |
| }, |
| { |
| "epoch": 0.9875120076849183, |
| "grad_norm": 0.814222514629364, |
| "learning_rate": 0.00016548336996762608, |
| "loss": 3.1904666900634764, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.9884726224783862, |
| "grad_norm": 0.9140084981918335, |
| "learning_rate": 0.00016524635908992253, |
| "loss": 3.1964300155639647, |
| "step": 5145 |
| }, |
| { |
| "epoch": 0.989433237271854, |
| "grad_norm": 0.9220796227455139, |
| "learning_rate": 0.00016500930974409092, |
| "loss": 3.191013526916504, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.9903938520653218, |
| "grad_norm": 0.7357509732246399, |
| "learning_rate": 0.00016477222252823107, |
| "loss": 3.192600059509277, |
| "step": 5155 |
| }, |
| { |
| "epoch": 0.9913544668587896, |
| "grad_norm": 0.9331154227256775, |
| "learning_rate": 0.00016453509804053833, |
| "loss": 3.195221519470215, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.9923150816522575, |
| "grad_norm": 0.8373104929924011, |
| "learning_rate": 0.00016429793687930215, |
| "loss": 3.195628356933594, |
| "step": 5165 |
| }, |
| { |
| "epoch": 0.9932756964457252, |
| "grad_norm": 0.8984804153442383, |
| "learning_rate": 0.0001640607396429044, |
| "loss": 3.192228889465332, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.9942363112391931, |
| "grad_norm": 0.8978838324546814, |
| "learning_rate": 0.00016382350692981812, |
| "loss": 3.1939905166625975, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.9951969260326609, |
| "grad_norm": 0.6365435719490051, |
| "learning_rate": 0.00016358623933860567, |
| "loss": 3.1925621032714844, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.9961575408261287, |
| "grad_norm": 0.7753834128379822, |
| "learning_rate": 0.00016334893746791762, |
| "loss": 3.192973327636719, |
| "step": 5185 |
| }, |
| { |
| "epoch": 0.9971181556195965, |
| "grad_norm": 0.9735800623893738, |
| "learning_rate": 0.0001631116019164909, |
| "loss": 3.1959808349609373, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.9980787704130644, |
| "grad_norm": 0.6787250638008118, |
| "learning_rate": 0.00016287423328314746, |
| "loss": 3.193808746337891, |
| "step": 5195 |
| }, |
| { |
| "epoch": 0.9990393852065321, |
| "grad_norm": 0.7312700152397156, |
| "learning_rate": 0.00016263683216679274, |
| "loss": 3.1927206039428713, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7704550623893738, |
| "learning_rate": 0.00016239939916641404, |
| "loss": 3.1963579177856447, |
| "step": 5205 |
| }, |
| { |
| "epoch": 1.0009606147934678, |
| "grad_norm": 0.7807424664497375, |
| "learning_rate": 0.00016216193488107926, |
| "loss": 3.1920791625976563, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.0019212295869357, |
| "grad_norm": 0.9543339014053345, |
| "learning_rate": 0.0001619244399099351, |
| "loss": 3.1910972595214844, |
| "step": 5215 |
| }, |
| { |
| "epoch": 1.0028818443804035, |
| "grad_norm": 0.7766204476356506, |
| "learning_rate": 0.00016168691485220573, |
| "loss": 3.194061851501465, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.0038424591738713, |
| "grad_norm": 0.8086463809013367, |
| "learning_rate": 0.00016144936030719126, |
| "loss": 3.1898380279541017, |
| "step": 5225 |
| }, |
| { |
| "epoch": 1.004803073967339, |
| "grad_norm": 1.1027806997299194, |
| "learning_rate": 0.00016121177687426617, |
| "loss": 3.193951416015625, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.005763688760807, |
| "grad_norm": 0.996457576751709, |
| "learning_rate": 0.00016097416515287787, |
| "loss": 3.188690757751465, |
| "step": 5235 |
| }, |
| { |
| "epoch": 1.0067243035542748, |
| "grad_norm": 0.7975702881813049, |
| "learning_rate": 0.00016073652574254504, |
| "loss": 3.1924358367919923, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.0076849183477425, |
| "grad_norm": 0.7981067299842834, |
| "learning_rate": 0.00016049885924285638, |
| "loss": 3.1881412506103515, |
| "step": 5245 |
| }, |
| { |
| "epoch": 1.0086455331412103, |
| "grad_norm": 0.8105031251907349, |
| "learning_rate": 0.00016026116625346876, |
| "loss": 3.192718505859375, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.0096061479346783, |
| "grad_norm": 0.7112568616867065, |
| "learning_rate": 0.00016002344737410602, |
| "loss": 3.187815856933594, |
| "step": 5255 |
| }, |
| { |
| "epoch": 1.010566762728146, |
| "grad_norm": 0.8126310706138611, |
| "learning_rate": 0.0001597857032045573, |
| "loss": 3.191788673400879, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.0115273775216138, |
| "grad_norm": 0.733974814414978, |
| "learning_rate": 0.00015954793434467545, |
| "loss": 3.1942237854003905, |
| "step": 5265 |
| }, |
| { |
| "epoch": 1.0124879923150816, |
| "grad_norm": 0.6596633791923523, |
| "learning_rate": 0.0001593101413943758, |
| "loss": 3.189275932312012, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.0134486071085496, |
| "grad_norm": 0.8669559955596924, |
| "learning_rate": 0.0001590723249536343, |
| "loss": 3.1921119689941406, |
| "step": 5275 |
| }, |
| { |
| "epoch": 1.0144092219020173, |
| "grad_norm": 1.2266993522644043, |
| "learning_rate": 0.00015883448562248624, |
| "loss": 3.191496467590332, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.015369836695485, |
| "grad_norm": 0.8078704476356506, |
| "learning_rate": 0.00015859662400102462, |
| "loss": 3.1911800384521483, |
| "step": 5285 |
| }, |
| { |
| "epoch": 1.0163304514889528, |
| "grad_norm": 0.6551411151885986, |
| "learning_rate": 0.00015835874068939878, |
| "loss": 3.188446044921875, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.0172910662824208, |
| "grad_norm": 0.8124401569366455, |
| "learning_rate": 0.0001581208362878126, |
| "loss": 3.1922882080078123, |
| "step": 5295 |
| }, |
| { |
| "epoch": 1.0182516810758886, |
| "grad_norm": 0.8051562309265137, |
| "learning_rate": 0.00015788291139652344, |
| "loss": 3.190486717224121, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.0192122958693564, |
| "grad_norm": 0.9743736386299133, |
| "learning_rate": 0.00015764496661584013, |
| "loss": 3.1894975662231446, |
| "step": 5305 |
| }, |
| { |
| "epoch": 1.0201729106628241, |
| "grad_norm": 0.7459861040115356, |
| "learning_rate": 0.00015740700254612177, |
| "loss": 3.193043518066406, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.021133525456292, |
| "grad_norm": 1.2092807292938232, |
| "learning_rate": 0.0001571690197877761, |
| "loss": 3.1933567047119142, |
| "step": 5315 |
| }, |
| { |
| "epoch": 1.0220941402497599, |
| "grad_norm": 0.7785642147064209, |
| "learning_rate": 0.00015693101894125806, |
| "loss": 3.1891590118408204, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.0230547550432276, |
| "grad_norm": 0.7812630534172058, |
| "learning_rate": 0.00015669300060706823, |
| "loss": 3.190948486328125, |
| "step": 5325 |
| }, |
| { |
| "epoch": 1.0240153698366954, |
| "grad_norm": 0.9370684027671814, |
| "learning_rate": 0.0001564549653857512, |
| "loss": 3.1883705139160154, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.0249759846301634, |
| "grad_norm": 0.8281055092811584, |
| "learning_rate": 0.00015621691387789432, |
| "loss": 3.1863418579101563, |
| "step": 5335 |
| }, |
| { |
| "epoch": 1.0259365994236311, |
| "grad_norm": 1.0316886901855469, |
| "learning_rate": 0.000155978846684126, |
| "loss": 3.19388370513916, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.026897214217099, |
| "grad_norm": 0.758702278137207, |
| "learning_rate": 0.00015574076440511407, |
| "loss": 3.1906862258911133, |
| "step": 5345 |
| }, |
| { |
| "epoch": 1.0278578290105667, |
| "grad_norm": 0.5779961347579956, |
| "learning_rate": 0.00015550266764156466, |
| "loss": 3.190965461730957, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.0288184438040346, |
| "grad_norm": 0.9982998371124268, |
| "learning_rate": 0.00015526455699422023, |
| "loss": 3.190749740600586, |
| "step": 5355 |
| }, |
| { |
| "epoch": 1.0297790585975024, |
| "grad_norm": 1.105873465538025, |
| "learning_rate": 0.00015502643306385846, |
| "loss": 3.1928503036499025, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.0307396733909702, |
| "grad_norm": 0.9287274479866028, |
| "learning_rate": 0.00015478829645129046, |
| "loss": 3.1860063552856444, |
| "step": 5365 |
| }, |
| { |
| "epoch": 1.031700288184438, |
| "grad_norm": 0.7572025656700134, |
| "learning_rate": 0.00015455014775735923, |
| "loss": 3.1904781341552733, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.032660902977906, |
| "grad_norm": 0.8866919279098511, |
| "learning_rate": 0.0001543119875829385, |
| "loss": 3.1870819091796876, |
| "step": 5375 |
| }, |
| { |
| "epoch": 1.0336215177713737, |
| "grad_norm": 0.7839595675468445, |
| "learning_rate": 0.00015407381652893066, |
| "loss": 3.1915761947631838, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.0345821325648414, |
| "grad_norm": 0.7243456244468689, |
| "learning_rate": 0.00015383563519626582, |
| "loss": 3.1893665313720705, |
| "step": 5385 |
| }, |
| { |
| "epoch": 1.0355427473583094, |
| "grad_norm": 1.0272892713546753, |
| "learning_rate": 0.0001535974441858999, |
| "loss": 3.187778854370117, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.0365033621517772, |
| "grad_norm": 1.069861650466919, |
| "learning_rate": 0.00015335924409881323, |
| "loss": 3.1904586791992187, |
| "step": 5395 |
| }, |
| { |
| "epoch": 1.037463976945245, |
| "grad_norm": 0.8298363089561462, |
| "learning_rate": 0.00015312103553600913, |
| "loss": 3.1916831970214843, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.0384245917387127, |
| "grad_norm": 0.7426701784133911, |
| "learning_rate": 0.00015288281909851213, |
| "loss": 3.1897172927856445, |
| "step": 5405 |
| }, |
| { |
| "epoch": 1.0393852065321807, |
| "grad_norm": 0.8290877938270569, |
| "learning_rate": 0.00015264459538736686, |
| "loss": 3.1861778259277345, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.0403458213256485, |
| "grad_norm": 0.5183548927307129, |
| "learning_rate": 0.00015240636500363607, |
| "loss": 3.185122489929199, |
| "step": 5415 |
| }, |
| { |
| "epoch": 1.0413064361191162, |
| "grad_norm": 0.9525085687637329, |
| "learning_rate": 0.00015216812854839957, |
| "loss": 3.187310791015625, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.042267050912584, |
| "grad_norm": 0.9072858691215515, |
| "learning_rate": 0.0001519298866227523, |
| "loss": 3.1853969573974608, |
| "step": 5425 |
| }, |
| { |
| "epoch": 1.043227665706052, |
| "grad_norm": 0.8549609780311584, |
| "learning_rate": 0.00015169163982780304, |
| "loss": 3.1908397674560547, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.0441882804995197, |
| "grad_norm": 0.5929837822914124, |
| "learning_rate": 0.00015145338876467303, |
| "loss": 3.1879791259765624, |
| "step": 5435 |
| }, |
| { |
| "epoch": 1.0451488952929875, |
| "grad_norm": 0.8749310970306396, |
| "learning_rate": 0.00015121513403449402, |
| "loss": 3.1909906387329103, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.0461095100864553, |
| "grad_norm": 0.6831730604171753, |
| "learning_rate": 0.00015097687623840726, |
| "loss": 3.188145637512207, |
| "step": 5445 |
| }, |
| { |
| "epoch": 1.0470701248799232, |
| "grad_norm": 0.7838398814201355, |
| "learning_rate": 0.00015073861597756144, |
| "loss": 3.1877973556518553, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.048030739673391, |
| "grad_norm": 0.6628211140632629, |
| "learning_rate": 0.00015050035385311182, |
| "loss": 3.1896636962890623, |
| "step": 5455 |
| }, |
| { |
| "epoch": 1.0489913544668588, |
| "grad_norm": 0.737322211265564, |
| "learning_rate": 0.00015026209046621803, |
| "loss": 3.1869300842285155, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.0499519692603265, |
| "grad_norm": 1.0209139585494995, |
| "learning_rate": 0.0001500238264180431, |
| "loss": 3.187608528137207, |
| "step": 5465 |
| }, |
| { |
| "epoch": 1.0509125840537945, |
| "grad_norm": 0.7242445945739746, |
| "learning_rate": 0.00014978556230975173, |
| "loss": 3.188776969909668, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.0518731988472623, |
| "grad_norm": 0.968614399433136, |
| "learning_rate": 0.0001495472987425086, |
| "loss": 3.1897449493408203, |
| "step": 5475 |
| }, |
| { |
| "epoch": 1.05283381364073, |
| "grad_norm": 1.0760611295700073, |
| "learning_rate": 0.00014930903631747724, |
| "loss": 3.1843488693237303, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.0537944284341978, |
| "grad_norm": 0.8381465673446655, |
| "learning_rate": 0.00014907077563581809, |
| "loss": 3.189168930053711, |
| "step": 5485 |
| }, |
| { |
| "epoch": 1.0547550432276658, |
| "grad_norm": 0.5522369742393494, |
| "learning_rate": 0.00014883251729868737, |
| "loss": 3.1856075286865235, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.0557156580211335, |
| "grad_norm": 0.8688974380493164, |
| "learning_rate": 0.00014859426190723523, |
| "loss": 3.1877534866333006, |
| "step": 5495 |
| }, |
| { |
| "epoch": 1.0566762728146013, |
| "grad_norm": 1.0312248468399048, |
| "learning_rate": 0.00014835601006260455, |
| "loss": 3.187721824645996, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.057636887608069, |
| "grad_norm": 1.0165605545043945, |
| "learning_rate": 0.00014811776236592914, |
| "loss": 3.1891069412231445, |
| "step": 5505 |
| }, |
| { |
| "epoch": 1.058597502401537, |
| "grad_norm": 0.6776289939880371, |
| "learning_rate": 0.00014787951941833236, |
| "loss": 3.188289451599121, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.0595581171950048, |
| "grad_norm": 0.6294692158699036, |
| "learning_rate": 0.00014764128182092557, |
| "loss": 3.191664123535156, |
| "step": 5515 |
| }, |
| { |
| "epoch": 1.0605187319884726, |
| "grad_norm": 0.8317521810531616, |
| "learning_rate": 0.00014740305017480674, |
| "loss": 3.1893966674804686, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.0614793467819403, |
| "grad_norm": 0.9894767999649048, |
| "learning_rate": 0.0001471648250810588, |
| "loss": 3.1885601043701173, |
| "step": 5525 |
| }, |
| { |
| "epoch": 1.0624399615754083, |
| "grad_norm": 0.7484564185142517, |
| "learning_rate": 0.00014692660714074796, |
| "loss": 3.18869571685791, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.063400576368876, |
| "grad_norm": 0.8479475975036621, |
| "learning_rate": 0.00014668839695492264, |
| "loss": 3.184678649902344, |
| "step": 5535 |
| }, |
| { |
| "epoch": 1.0643611911623438, |
| "grad_norm": 0.6609562039375305, |
| "learning_rate": 0.00014645019512461157, |
| "loss": 3.1846710205078126, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.0653218059558118, |
| "grad_norm": 0.955204963684082, |
| "learning_rate": 0.0001462120022508223, |
| "loss": 3.19210205078125, |
| "step": 5545 |
| }, |
| { |
| "epoch": 1.0662824207492796, |
| "grad_norm": 0.7516992092132568, |
| "learning_rate": 0.00014597381893453998, |
| "loss": 3.188296318054199, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.0672430355427474, |
| "grad_norm": 0.7409364581108093, |
| "learning_rate": 0.0001457356457767255, |
| "loss": 3.1862789154052735, |
| "step": 5555 |
| }, |
| { |
| "epoch": 1.0682036503362151, |
| "grad_norm": 0.6613131165504456, |
| "learning_rate": 0.00014549748337831423, |
| "loss": 3.185582733154297, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.069164265129683, |
| "grad_norm": 0.6710831522941589, |
| "learning_rate": 0.00014525933234021428, |
| "loss": 3.1856903076171874, |
| "step": 5565 |
| }, |
| { |
| "epoch": 1.0701248799231509, |
| "grad_norm": 0.6263184547424316, |
| "learning_rate": 0.00014502119326330516, |
| "loss": 3.186991310119629, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.0710854947166186, |
| "grad_norm": 0.838955283164978, |
| "learning_rate": 0.0001447830667484362, |
| "loss": 3.1853305816650392, |
| "step": 5575 |
| }, |
| { |
| "epoch": 1.0720461095100864, |
| "grad_norm": 0.6884387135505676, |
| "learning_rate": 0.00014454495339642504, |
| "loss": 3.189616584777832, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.0730067243035544, |
| "grad_norm": 0.8635831475257874, |
| "learning_rate": 0.00014430685380805605, |
| "loss": 3.185613822937012, |
| "step": 5585 |
| }, |
| { |
| "epoch": 1.0739673390970221, |
| "grad_norm": 0.842369794845581, |
| "learning_rate": 0.00014406876858407894, |
| "loss": 3.1873950958251953, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.07492795389049, |
| "grad_norm": 0.6380950212478638, |
| "learning_rate": 0.00014383069832520721, |
| "loss": 3.1872686386108398, |
| "step": 5595 |
| }, |
| { |
| "epoch": 1.0758885686839577, |
| "grad_norm": 0.6167466044425964, |
| "learning_rate": 0.00014359264363211647, |
| "loss": 3.1889183044433596, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.0768491834774256, |
| "grad_norm": 0.8871692419052124, |
| "learning_rate": 0.00014335460510544318, |
| "loss": 3.190326118469238, |
| "step": 5605 |
| }, |
| { |
| "epoch": 1.0778097982708934, |
| "grad_norm": 0.8428598642349243, |
| "learning_rate": 0.00014311658334578296, |
| "loss": 3.190188407897949, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.0787704130643612, |
| "grad_norm": 0.7600328922271729, |
| "learning_rate": 0.00014287857895368906, |
| "loss": 3.1856243133544924, |
| "step": 5615 |
| }, |
| { |
| "epoch": 1.079731027857829, |
| "grad_norm": 0.7061536908149719, |
| "learning_rate": 0.00014264059252967108, |
| "loss": 3.1858592987060548, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.080691642651297, |
| "grad_norm": 0.6776458024978638, |
| "learning_rate": 0.0001424026246741931, |
| "loss": 3.188393402099609, |
| "step": 5625 |
| }, |
| { |
| "epoch": 1.0816522574447647, |
| "grad_norm": 0.9490513205528259, |
| "learning_rate": 0.00014216467598767248, |
| "loss": 3.1876445770263673, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.0826128722382324, |
| "grad_norm": 0.8668431043624878, |
| "learning_rate": 0.0001419267470704781, |
| "loss": 3.1881153106689455, |
| "step": 5635 |
| }, |
| { |
| "epoch": 1.0835734870317002, |
| "grad_norm": 0.7393624782562256, |
| "learning_rate": 0.00014168883852292915, |
| "loss": 3.186983108520508, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.0845341018251682, |
| "grad_norm": 0.7788834571838379, |
| "learning_rate": 0.00014145095094529318, |
| "loss": 3.187802314758301, |
| "step": 5645 |
| }, |
| { |
| "epoch": 1.085494716618636, |
| "grad_norm": 0.6699324250221252, |
| "learning_rate": 0.00014121308493778503, |
| "loss": 3.1878355026245115, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.0864553314121037, |
| "grad_norm": 0.7733721733093262, |
| "learning_rate": 0.00014097524110056494, |
| "loss": 3.1843936920166014, |
| "step": 5655 |
| }, |
| { |
| "epoch": 1.0874159462055715, |
| "grad_norm": 0.711729884147644, |
| "learning_rate": 0.0001407374200337374, |
| "loss": 3.1883363723754883, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.0883765609990395, |
| "grad_norm": 0.6013932824134827, |
| "learning_rate": 0.0001404996223373494, |
| "loss": 3.1905609130859376, |
| "step": 5665 |
| }, |
| { |
| "epoch": 1.0893371757925072, |
| "grad_norm": 0.7252037525177002, |
| "learning_rate": 0.0001402618486113888, |
| "loss": 3.1879077911376954, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.090297790585975, |
| "grad_norm": 1.1090692281723022, |
| "learning_rate": 0.0001400240994557832, |
| "loss": 3.1889415740966798, |
| "step": 5675 |
| }, |
| { |
| "epoch": 1.0912584053794427, |
| "grad_norm": 1.0110430717468262, |
| "learning_rate": 0.0001397863754703981, |
| "loss": 3.185939407348633, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.0922190201729107, |
| "grad_norm": 0.8803393840789795, |
| "learning_rate": 0.00013954867725503542, |
| "loss": 3.1873733520507814, |
| "step": 5685 |
| }, |
| { |
| "epoch": 1.0931796349663785, |
| "grad_norm": 0.6381970047950745, |
| "learning_rate": 0.00013931100540943227, |
| "loss": 3.1853010177612306, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.0941402497598463, |
| "grad_norm": 0.5555366277694702, |
| "learning_rate": 0.00013907336053325896, |
| "loss": 3.184972381591797, |
| "step": 5695 |
| }, |
| { |
| "epoch": 1.0951008645533142, |
| "grad_norm": 1.0896075963974, |
| "learning_rate": 0.00013883574322611801, |
| "loss": 3.186721992492676, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.096061479346782, |
| "grad_norm": 0.8639760613441467, |
| "learning_rate": 0.00013859815408754218, |
| "loss": 3.1882835388183595, |
| "step": 5705 |
| }, |
| { |
| "epoch": 1.0970220941402498, |
| "grad_norm": 0.7950151562690735, |
| "learning_rate": 0.00013836059371699332, |
| "loss": 3.187421417236328, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.0979827089337175, |
| "grad_norm": 0.6552711725234985, |
| "learning_rate": 0.00013812306271386048, |
| "loss": 3.184224319458008, |
| "step": 5715 |
| }, |
| { |
| "epoch": 1.0989433237271853, |
| "grad_norm": 0.7803718447685242, |
| "learning_rate": 0.00013788556167745886, |
| "loss": 3.1838443756103514, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.0999039385206533, |
| "grad_norm": 0.8713034987449646, |
| "learning_rate": 0.00013764809120702797, |
| "loss": 3.188005828857422, |
| "step": 5725 |
| }, |
| { |
| "epoch": 1.100864553314121, |
| "grad_norm": 0.638888955116272, |
| "learning_rate": 0.00013741065190173008, |
| "loss": 3.183907890319824, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.1018251681075888, |
| "grad_norm": 0.8170698285102844, |
| "learning_rate": 0.00013717324436064897, |
| "loss": 3.184910011291504, |
| "step": 5735 |
| }, |
| { |
| "epoch": 1.1027857829010568, |
| "grad_norm": 0.8812254667282104, |
| "learning_rate": 0.0001369358691827882, |
| "loss": 3.1871992111206056, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.1037463976945245, |
| "grad_norm": 0.6346907615661621, |
| "learning_rate": 0.0001366985269670697, |
| "loss": 3.1861663818359376, |
| "step": 5745 |
| }, |
| { |
| "epoch": 1.1047070124879923, |
| "grad_norm": 0.930957019329071, |
| "learning_rate": 0.00013646121831233226, |
| "loss": 3.1821567535400392, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.10566762728146, |
| "grad_norm": 0.7975543141365051, |
| "learning_rate": 0.00013622394381732986, |
| "loss": 3.1874359130859373, |
| "step": 5755 |
| }, |
| { |
| "epoch": 1.106628242074928, |
| "grad_norm": 0.6229592561721802, |
| "learning_rate": 0.0001359867040807305, |
| "loss": 3.1849109649658205, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.1075888568683958, |
| "grad_norm": 0.7393524050712585, |
| "learning_rate": 0.0001357494997011143, |
| "loss": 3.1865949630737305, |
| "step": 5765 |
| }, |
| { |
| "epoch": 1.1085494716618636, |
| "grad_norm": 0.7825970649719238, |
| "learning_rate": 0.0001355123312769723, |
| "loss": 3.179779815673828, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.1095100864553313, |
| "grad_norm": 1.018021821975708, |
| "learning_rate": 0.00013527519940670474, |
| "loss": 3.1880813598632813, |
| "step": 5775 |
| }, |
| { |
| "epoch": 1.1104707012487993, |
| "grad_norm": 0.7721784114837646, |
| "learning_rate": 0.00013503810468861967, |
| "loss": 3.1860368728637694, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.111431316042267, |
| "grad_norm": 0.7420386075973511, |
| "learning_rate": 0.00013480104772093136, |
| "loss": 3.181845855712891, |
| "step": 5785 |
| }, |
| { |
| "epoch": 1.1123919308357348, |
| "grad_norm": 0.7086904644966125, |
| "learning_rate": 0.0001345640291017588, |
| "loss": 3.181869125366211, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.1133525456292026, |
| "grad_norm": 0.5291205048561096, |
| "learning_rate": 0.00013432704942912445, |
| "loss": 3.1842918395996094, |
| "step": 5795 |
| }, |
| { |
| "epoch": 1.1143131604226706, |
| "grad_norm": 0.6153315901756287, |
| "learning_rate": 0.0001340901093009522, |
| "loss": 3.180573654174805, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.1152737752161384, |
| "grad_norm": 0.8189062476158142, |
| "learning_rate": 0.00013385320931506636, |
| "loss": 3.181578826904297, |
| "step": 5805 |
| }, |
| { |
| "epoch": 1.1162343900096061, |
| "grad_norm": 0.8425748348236084, |
| "learning_rate": 0.00013361635006918987, |
| "loss": 3.1840728759765624, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.1171950048030739, |
| "grad_norm": 0.6964288353919983, |
| "learning_rate": 0.00013337953216094293, |
| "loss": 3.183258056640625, |
| "step": 5815 |
| }, |
| { |
| "epoch": 1.1181556195965419, |
| "grad_norm": 0.8112155795097351, |
| "learning_rate": 0.0001331427561878414, |
| "loss": 3.1822595596313477, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.1191162343900096, |
| "grad_norm": 0.5789711475372314, |
| "learning_rate": 0.0001329060227472953, |
| "loss": 3.1870792388916014, |
| "step": 5825 |
| }, |
| { |
| "epoch": 1.1200768491834774, |
| "grad_norm": 0.7857502102851868, |
| "learning_rate": 0.00013266933243660748, |
| "loss": 3.1816539764404297, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.1210374639769451, |
| "grad_norm": 0.8584936857223511, |
| "learning_rate": 0.0001324326858529718, |
| "loss": 3.1840972900390625, |
| "step": 5835 |
| }, |
| { |
| "epoch": 1.1219980787704131, |
| "grad_norm": 0.7861515879631042, |
| "learning_rate": 0.00013219608359347194, |
| "loss": 3.1865585327148436, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.122958693563881, |
| "grad_norm": 0.6675422787666321, |
| "learning_rate": 0.0001319595262550796, |
| "loss": 3.1822277069091798, |
| "step": 5845 |
| }, |
| { |
| "epoch": 1.1239193083573487, |
| "grad_norm": 0.7415298223495483, |
| "learning_rate": 0.00013172301443465327, |
| "loss": 3.18204345703125, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.1248799231508164, |
| "grad_norm": 0.7668789029121399, |
| "learning_rate": 0.00013148654872893647, |
| "loss": 3.1823986053466795, |
| "step": 5855 |
| }, |
| { |
| "epoch": 1.1258405379442844, |
| "grad_norm": 1.0284775495529175, |
| "learning_rate": 0.00013125012973455645, |
| "loss": 3.186284637451172, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.1268011527377522, |
| "grad_norm": 0.9501076936721802, |
| "learning_rate": 0.00013101375804802268, |
| "loss": 3.180257034301758, |
| "step": 5865 |
| }, |
| { |
| "epoch": 1.12776176753122, |
| "grad_norm": 0.6620021462440491, |
| "learning_rate": 0.00013077743426572508, |
| "loss": 3.1850921630859377, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.1287223823246877, |
| "grad_norm": 0.6377175450325012, |
| "learning_rate": 0.0001305411589839328, |
| "loss": 3.1847787857055665, |
| "step": 5875 |
| }, |
| { |
| "epoch": 1.1296829971181557, |
| "grad_norm": 0.8956018686294556, |
| "learning_rate": 0.00013030493279879267, |
| "loss": 3.1820547103881838, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.1306436119116234, |
| "grad_norm": 0.9648638367652893, |
| "learning_rate": 0.00013006875630632757, |
| "loss": 3.1855083465576173, |
| "step": 5885 |
| }, |
| { |
| "epoch": 1.1316042267050912, |
| "grad_norm": 0.7749320864677429, |
| "learning_rate": 0.000129832630102435, |
| "loss": 3.18371639251709, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.1325648414985592, |
| "grad_norm": 0.7162895202636719, |
| "learning_rate": 0.00012959655478288556, |
| "loss": 3.1814332962036134, |
| "step": 5895 |
| }, |
| { |
| "epoch": 1.133525456292027, |
| "grad_norm": 0.7946479320526123, |
| "learning_rate": 0.00012936053094332158, |
| "loss": 3.183310699462891, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.1344860710854947, |
| "grad_norm": 0.9230923652648926, |
| "learning_rate": 0.00012912455917925535, |
| "loss": 3.1882291793823243, |
| "step": 5905 |
| }, |
| { |
| "epoch": 1.1354466858789625, |
| "grad_norm": 0.6934447288513184, |
| "learning_rate": 0.0001288886400860679, |
| "loss": 3.1866472244262694, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.1364073006724302, |
| "grad_norm": 0.5946722626686096, |
| "learning_rate": 0.00012865277425900724, |
| "loss": 3.1836269378662108, |
| "step": 5915 |
| }, |
| { |
| "epoch": 1.1373679154658982, |
| "grad_norm": 0.6142674088478088, |
| "learning_rate": 0.00012841696229318712, |
| "loss": 3.1850711822509767, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.138328530259366, |
| "grad_norm": 0.5882613062858582, |
| "learning_rate": 0.00012818120478358522, |
| "loss": 3.1808708190917967, |
| "step": 5925 |
| }, |
| { |
| "epoch": 1.1392891450528337, |
| "grad_norm": 1.1265987157821655, |
| "learning_rate": 0.000127945502325042, |
| "loss": 3.1862953186035154, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.1402497598463017, |
| "grad_norm": 0.8293631076812744, |
| "learning_rate": 0.00012770985551225898, |
| "loss": 3.1817962646484377, |
| "step": 5935 |
| }, |
| { |
| "epoch": 1.1412103746397695, |
| "grad_norm": 0.6100813150405884, |
| "learning_rate": 0.00012747426493979713, |
| "loss": 3.182262420654297, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.1421709894332372, |
| "grad_norm": 0.6388895511627197, |
| "learning_rate": 0.00012723873120207575, |
| "loss": 3.183791732788086, |
| "step": 5945 |
| }, |
| { |
| "epoch": 1.143131604226705, |
| "grad_norm": 1.1153887510299683, |
| "learning_rate": 0.00012700325489337056, |
| "loss": 3.1867645263671873, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.144092219020173, |
| "grad_norm": 0.6275951862335205, |
| "learning_rate": 0.00012676783660781245, |
| "loss": 3.183899688720703, |
| "step": 5955 |
| }, |
| { |
| "epoch": 1.1450528338136408, |
| "grad_norm": 0.6649777293205261, |
| "learning_rate": 0.00012653247693938588, |
| "loss": 3.1856327056884766, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.1460134486071085, |
| "grad_norm": 0.7300867438316345, |
| "learning_rate": 0.00012629717648192747, |
| "loss": 3.1813177108764648, |
| "step": 5965 |
| }, |
| { |
| "epoch": 1.1469740634005763, |
| "grad_norm": 0.8946056365966797, |
| "learning_rate": 0.00012606193582912446, |
| "loss": 3.182071304321289, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.1479346781940443, |
| "grad_norm": 0.8307191729545593, |
| "learning_rate": 0.00012582675557451306, |
| "loss": 3.1808849334716798, |
| "step": 5975 |
| }, |
| { |
| "epoch": 1.148895292987512, |
| "grad_norm": 0.8783102631568909, |
| "learning_rate": 0.00012559163631147723, |
| "loss": 3.184329032897949, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.1498559077809798, |
| "grad_norm": 0.7081605792045593, |
| "learning_rate": 0.00012535657863324695, |
| "loss": 3.1825115203857424, |
| "step": 5985 |
| }, |
| { |
| "epoch": 1.1508165225744476, |
| "grad_norm": 0.5934545993804932, |
| "learning_rate": 0.0001251215831328969, |
| "loss": 3.1839290618896485, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.1517771373679155, |
| "grad_norm": 0.8041390180587769, |
| "learning_rate": 0.00012488665040334479, |
| "loss": 3.183102607727051, |
| "step": 5995 |
| }, |
| { |
| "epoch": 1.1527377521613833, |
| "grad_norm": 0.6526947617530823, |
| "learning_rate": 0.00012465178103735, |
| "loss": 3.1817670822143556, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.153698366954851, |
| "grad_norm": 0.6892653703689575, |
| "learning_rate": 0.00012441697562751215, |
| "loss": 3.180023193359375, |
| "step": 6005 |
| }, |
| { |
| "epoch": 1.154658981748319, |
| "grad_norm": 0.6279175877571106, |
| "learning_rate": 0.00012418223476626925, |
| "loss": 3.180769157409668, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.1556195965417868, |
| "grad_norm": 0.8991310596466064, |
| "learning_rate": 0.00012394755904589664, |
| "loss": 3.185033416748047, |
| "step": 6015 |
| }, |
| { |
| "epoch": 1.1565802113352546, |
| "grad_norm": 0.6720035672187805, |
| "learning_rate": 0.00012371294905850518, |
| "loss": 3.1769649505615236, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.1575408261287223, |
| "grad_norm": 0.8395649790763855, |
| "learning_rate": 0.00012347840539603998, |
| "loss": 3.1827545166015625, |
| "step": 6025 |
| }, |
| { |
| "epoch": 1.15850144092219, |
| "grad_norm": 0.8189507722854614, |
| "learning_rate": 0.00012324392865027873, |
| "loss": 3.1804759979248045, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.159462055715658, |
| "grad_norm": 0.9116944074630737, |
| "learning_rate": 0.00012300951941283036, |
| "loss": 3.185091018676758, |
| "step": 6035 |
| }, |
| { |
| "epoch": 1.1604226705091258, |
| "grad_norm": 0.6200742125511169, |
| "learning_rate": 0.00012277517827513341, |
| "loss": 3.1775371551513674, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.1613832853025936, |
| "grad_norm": 0.6865299344062805, |
| "learning_rate": 0.00012254090582845457, |
| "loss": 3.1813201904296875, |
| "step": 6045 |
| }, |
| { |
| "epoch": 1.1623439000960616, |
| "grad_norm": 0.7450284957885742, |
| "learning_rate": 0.00012230670266388728, |
| "loss": 3.18134765625, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.1633045148895294, |
| "grad_norm": 0.814132571220398, |
| "learning_rate": 0.00012207256937235014, |
| "loss": 3.182879638671875, |
| "step": 6055 |
| }, |
| { |
| "epoch": 1.1642651296829971, |
| "grad_norm": 0.8822999000549316, |
| "learning_rate": 0.00012183850654458553, |
| "loss": 3.1848114013671873, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.1652257444764649, |
| "grad_norm": 0.5908654928207397, |
| "learning_rate": 0.00012160451477115783, |
| "loss": 3.1844045639038088, |
| "step": 6065 |
| }, |
| { |
| "epoch": 1.1661863592699326, |
| "grad_norm": 0.5196489095687866, |
| "learning_rate": 0.00012137059464245242, |
| "loss": 3.181051254272461, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.1671469740634006, |
| "grad_norm": 0.7493326663970947, |
| "learning_rate": 0.00012113674674867379, |
| "loss": 3.180977630615234, |
| "step": 6075 |
| }, |
| { |
| "epoch": 1.1681075888568684, |
| "grad_norm": 0.8588430881500244, |
| "learning_rate": 0.00012090297167984409, |
| "loss": 3.183562088012695, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.1690682036503361, |
| "grad_norm": 0.707949697971344, |
| "learning_rate": 0.00012066927002580185, |
| "loss": 3.186185836791992, |
| "step": 6085 |
| }, |
| { |
| "epoch": 1.1700288184438041, |
| "grad_norm": 0.7008398771286011, |
| "learning_rate": 0.00012043564237620028, |
| "loss": 3.183705520629883, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.170989433237272, |
| "grad_norm": 0.5148990750312805, |
| "learning_rate": 0.00012020208932050595, |
| "loss": 3.180450439453125, |
| "step": 6095 |
| }, |
| { |
| "epoch": 1.1719500480307397, |
| "grad_norm": 1.0000126361846924, |
| "learning_rate": 0.00011996861144799712, |
| "loss": 3.1828609466552735, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.1729106628242074, |
| "grad_norm": 0.9941655993461609, |
| "learning_rate": 0.0001197352093477625, |
| "loss": 3.1851844787597656, |
| "step": 6105 |
| }, |
| { |
| "epoch": 1.1738712776176754, |
| "grad_norm": 0.6262122988700867, |
| "learning_rate": 0.00011950188360869947, |
| "loss": 3.186477279663086, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.1748318924111432, |
| "grad_norm": 0.7170460224151611, |
| "learning_rate": 0.00011926863481951279, |
| "loss": 3.181361770629883, |
| "step": 6115 |
| }, |
| { |
| "epoch": 1.175792507204611, |
| "grad_norm": 0.5651366114616394, |
| "learning_rate": 0.00011903546356871315, |
| "loss": 3.180009460449219, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.1767531219980787, |
| "grad_norm": 0.7348089814186096, |
| "learning_rate": 0.00011880237044461546, |
| "loss": 3.1843181610107423, |
| "step": 6125 |
| }, |
| { |
| "epoch": 1.1777137367915467, |
| "grad_norm": 0.8490554690361023, |
| "learning_rate": 0.00011856935603533759, |
| "loss": 3.1855024337768554, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.1786743515850144, |
| "grad_norm": 0.688126802444458, |
| "learning_rate": 0.0001183364209287989, |
| "loss": 3.1825450897216796, |
| "step": 6135 |
| }, |
| { |
| "epoch": 1.1796349663784822, |
| "grad_norm": 0.6043757796287537, |
| "learning_rate": 0.00011810356571271847, |
| "loss": 3.1808750152587892, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.18059558117195, |
| "grad_norm": 0.6653470396995544, |
| "learning_rate": 0.00011787079097461394, |
| "loss": 3.185806655883789, |
| "step": 6145 |
| }, |
| { |
| "epoch": 1.181556195965418, |
| "grad_norm": 0.5727336406707764, |
| "learning_rate": 0.0001176380973017998, |
| "loss": 3.182196617126465, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.1825168107588857, |
| "grad_norm": 0.4174487292766571, |
| "learning_rate": 0.00011740548528138613, |
| "loss": 3.1803747177124024, |
| "step": 6155 |
| }, |
| { |
| "epoch": 1.1834774255523535, |
| "grad_norm": 0.8878217339515686, |
| "learning_rate": 0.00011717295550027685, |
| "loss": 3.1840145111083986, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.1844380403458212, |
| "grad_norm": 0.8910025954246521, |
| "learning_rate": 0.00011694050854516853, |
| "loss": 3.181249237060547, |
| "step": 6165 |
| }, |
| { |
| "epoch": 1.1853986551392892, |
| "grad_norm": 0.7205166220664978, |
| "learning_rate": 0.00011670814500254858, |
| "loss": 3.1778148651123046, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.186359269932757, |
| "grad_norm": 0.6521825790405273, |
| "learning_rate": 0.00011647586545869418, |
| "loss": 3.17816276550293, |
| "step": 6175 |
| }, |
| { |
| "epoch": 1.1873198847262247, |
| "grad_norm": 0.6897614002227783, |
| "learning_rate": 0.00011624367049967037, |
| "loss": 3.1780323028564452, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.1882804995196925, |
| "grad_norm": 0.5899947881698608, |
| "learning_rate": 0.00011601156071132883, |
| "loss": 3.181020164489746, |
| "step": 6185 |
| }, |
| { |
| "epoch": 1.1892411143131605, |
| "grad_norm": 0.6569263935089111, |
| "learning_rate": 0.00011577953667930641, |
| "loss": 3.1823570251464846, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.1902017291066282, |
| "grad_norm": 0.6824354529380798, |
| "learning_rate": 0.00011554759898902355, |
| "loss": 3.1782264709472656, |
| "step": 6195 |
| }, |
| { |
| "epoch": 1.191162343900096, |
| "grad_norm": 0.5225342512130737, |
| "learning_rate": 0.00011531574822568278, |
| "loss": 3.1797101974487303, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.192122958693564, |
| "grad_norm": 0.5605508685112, |
| "learning_rate": 0.00011508398497426745, |
| "loss": 3.1792762756347654, |
| "step": 6205 |
| }, |
| { |
| "epoch": 1.1930835734870318, |
| "grad_norm": 0.8098820447921753, |
| "learning_rate": 0.00011485230981953997, |
| "loss": 3.183140182495117, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.1940441882804995, |
| "grad_norm": 0.6241822838783264, |
| "learning_rate": 0.00011462072334604053, |
| "loss": 3.1846290588378907, |
| "step": 6215 |
| }, |
| { |
| "epoch": 1.1950048030739673, |
| "grad_norm": 0.6163104772567749, |
| "learning_rate": 0.00011438922613808554, |
| "loss": 3.184892272949219, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.195965417867435, |
| "grad_norm": 0.6423901915550232, |
| "learning_rate": 0.00011415781877976628, |
| "loss": 3.180209922790527, |
| "step": 6225 |
| }, |
| { |
| "epoch": 1.196926032660903, |
| "grad_norm": 0.743615448474884, |
| "learning_rate": 0.00011392650185494712, |
| "loss": 3.181998634338379, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.1978866474543708, |
| "grad_norm": 0.7482926845550537, |
| "learning_rate": 0.00011369527594726452, |
| "loss": 3.178305244445801, |
| "step": 6235 |
| }, |
| { |
| "epoch": 1.1988472622478386, |
| "grad_norm": 0.6290075778961182, |
| "learning_rate": 0.0001134641416401251, |
| "loss": 3.182933807373047, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.1998078770413065, |
| "grad_norm": 0.5498515963554382, |
| "learning_rate": 0.00011323309951670444, |
| "loss": 3.1798477172851562, |
| "step": 6245 |
| }, |
| { |
| "epoch": 1.2007684918347743, |
| "grad_norm": 0.6985566020011902, |
| "learning_rate": 0.00011300215015994554, |
| "loss": 3.1814619064331056, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.201729106628242, |
| "grad_norm": 0.8303685784339905, |
| "learning_rate": 0.00011277129415255727, |
| "loss": 3.1828535079956053, |
| "step": 6255 |
| }, |
| { |
| "epoch": 1.2026897214217098, |
| "grad_norm": 0.7280902862548828, |
| "learning_rate": 0.00011254053207701308, |
| "loss": 3.17972354888916, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.2036503362151778, |
| "grad_norm": 0.6531400084495544, |
| "learning_rate": 0.00011230986451554932, |
| "loss": 3.1785831451416016, |
| "step": 6265 |
| }, |
| { |
| "epoch": 1.2046109510086456, |
| "grad_norm": 0.6152121424674988, |
| "learning_rate": 0.00011207929205016386, |
| "loss": 3.177712631225586, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.2055715658021133, |
| "grad_norm": 0.5569039583206177, |
| "learning_rate": 0.00011184881526261483, |
| "loss": 3.1826234817504884, |
| "step": 6275 |
| }, |
| { |
| "epoch": 1.206532180595581, |
| "grad_norm": 1.1272635459899902, |
| "learning_rate": 0.00011161843473441867, |
| "loss": 3.179414749145508, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.207492795389049, |
| "grad_norm": 0.6206833720207214, |
| "learning_rate": 0.0001113881510468492, |
| "loss": 3.184890365600586, |
| "step": 6285 |
| }, |
| { |
| "epoch": 1.2084534101825168, |
| "grad_norm": 0.6447250247001648, |
| "learning_rate": 0.00011115796478093569, |
| "loss": 3.1766143798828126, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.2094140249759846, |
| "grad_norm": 0.7120223641395569, |
| "learning_rate": 0.00011092787651746178, |
| "loss": 3.1796751022338867, |
| "step": 6295 |
| }, |
| { |
| "epoch": 1.2103746397694524, |
| "grad_norm": 0.620508074760437, |
| "learning_rate": 0.00011069788683696366, |
| "loss": 3.1746284484863283, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.2113352545629203, |
| "grad_norm": 0.6917296051979065, |
| "learning_rate": 0.00011046799631972903, |
| "loss": 3.1808353424072267, |
| "step": 6305 |
| }, |
| { |
| "epoch": 1.2122958693563881, |
| "grad_norm": 0.7654911875724792, |
| "learning_rate": 0.00011023820554579512, |
| "loss": 3.1830776214599608, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.2132564841498559, |
| "grad_norm": 0.7507694959640503, |
| "learning_rate": 0.00011000851509494771, |
| "loss": 3.181525230407715, |
| "step": 6315 |
| }, |
| { |
| "epoch": 1.2142170989433236, |
| "grad_norm": 0.6086501479148865, |
| "learning_rate": 0.00010977892554671935, |
| "loss": 3.1778955459594727, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.2151777137367916, |
| "grad_norm": 0.5555998682975769, |
| "learning_rate": 0.00010954943748038798, |
| "loss": 3.1771930694580077, |
| "step": 6325 |
| }, |
| { |
| "epoch": 1.2161383285302594, |
| "grad_norm": 0.6753772497177124, |
| "learning_rate": 0.00010932005147497559, |
| "loss": 3.178631591796875, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.2170989433237271, |
| "grad_norm": 0.5674150586128235, |
| "learning_rate": 0.00010909076810924654, |
| "loss": 3.183444786071777, |
| "step": 6335 |
| }, |
| { |
| "epoch": 1.218059558117195, |
| "grad_norm": 0.8342090249061584, |
| "learning_rate": 0.00010886158796170633, |
| "loss": 3.181511116027832, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.219020172910663, |
| "grad_norm": 0.9122212529182434, |
| "learning_rate": 0.00010863251161060002, |
| "loss": 3.1852695465087892, |
| "step": 6345 |
| }, |
| { |
| "epoch": 1.2199807877041307, |
| "grad_norm": 0.44202694296836853, |
| "learning_rate": 0.00010840353963391065, |
| "loss": 3.1827775955200197, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.2209414024975984, |
| "grad_norm": 0.5556421279907227, |
| "learning_rate": 0.0001081746726093581, |
| "loss": 3.176057815551758, |
| "step": 6355 |
| }, |
| { |
| "epoch": 1.2219020172910664, |
| "grad_norm": 0.5319374203681946, |
| "learning_rate": 0.00010794591111439729, |
| "loss": 3.177410125732422, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.2228626320845342, |
| "grad_norm": 0.7626725435256958, |
| "learning_rate": 0.000107717255726217, |
| "loss": 3.182592582702637, |
| "step": 6365 |
| }, |
| { |
| "epoch": 1.223823246878002, |
| "grad_norm": 0.7391705513000488, |
| "learning_rate": 0.00010748870702173815, |
| "loss": 3.178932952880859, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.2247838616714697, |
| "grad_norm": 0.4964456260204315, |
| "learning_rate": 0.00010726026557761264, |
| "loss": 3.1790546417236327, |
| "step": 6375 |
| }, |
| { |
| "epoch": 1.2257444764649374, |
| "grad_norm": 0.5090710520744324, |
| "learning_rate": 0.00010703193197022159, |
| "loss": 3.1741233825683595, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.2267050912584054, |
| "grad_norm": 0.8102352023124695, |
| "learning_rate": 0.00010680370677567423, |
| "loss": 3.184102249145508, |
| "step": 6385 |
| }, |
| { |
| "epoch": 1.2276657060518732, |
| "grad_norm": 0.7811996340751648, |
| "learning_rate": 0.00010657559056980603, |
| "loss": 3.1772319793701174, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.228626320845341, |
| "grad_norm": 0.7701186537742615, |
| "learning_rate": 0.00010634758392817762, |
| "loss": 3.1799732208251954, |
| "step": 6395 |
| }, |
| { |
| "epoch": 1.229586935638809, |
| "grad_norm": 0.685007631778717, |
| "learning_rate": 0.00010611968742607318, |
| "loss": 3.176517105102539, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.2305475504322767, |
| "grad_norm": 0.5316605567932129, |
| "learning_rate": 0.00010589190163849885, |
| "loss": 3.1800348281860353, |
| "step": 6405 |
| }, |
| { |
| "epoch": 1.2315081652257445, |
| "grad_norm": 0.6539739370346069, |
| "learning_rate": 0.00010566422714018167, |
| "loss": 3.1817600250244142, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.2324687800192122, |
| "grad_norm": 0.5445916056632996, |
| "learning_rate": 0.00010543666450556774, |
| "loss": 3.1751848220825196, |
| "step": 6415 |
| }, |
| { |
| "epoch": 1.23342939481268, |
| "grad_norm": 0.6994644403457642, |
| "learning_rate": 0.00010520921430882088, |
| "loss": 3.182989311218262, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.234390009606148, |
| "grad_norm": 0.6963391304016113, |
| "learning_rate": 0.00010498187712382132, |
| "loss": 3.1813974380493164, |
| "step": 6425 |
| }, |
| { |
| "epoch": 1.2353506243996157, |
| "grad_norm": 0.8707190752029419, |
| "learning_rate": 0.00010475465352416402, |
| "loss": 3.1847021102905275, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.2363112391930835, |
| "grad_norm": 0.628430962562561, |
| "learning_rate": 0.00010452754408315754, |
| "loss": 3.180718994140625, |
| "step": 6435 |
| }, |
| { |
| "epoch": 1.2372718539865515, |
| "grad_norm": 0.4727374017238617, |
| "learning_rate": 0.00010430054937382219, |
| "loss": 3.179873466491699, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.2382324687800192, |
| "grad_norm": 0.6456632614135742, |
| "learning_rate": 0.000104073669968889, |
| "loss": 3.1764720916748046, |
| "step": 6445 |
| }, |
| { |
| "epoch": 1.239193083573487, |
| "grad_norm": 0.9211506843566895, |
| "learning_rate": 0.0001038469064407979, |
| "loss": 3.175563430786133, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.2401536983669548, |
| "grad_norm": 0.7014700770378113, |
| "learning_rate": 0.00010362025936169664, |
| "loss": 3.1777694702148436, |
| "step": 6455 |
| }, |
| { |
| "epoch": 1.2411143131604228, |
| "grad_norm": 0.612933874130249, |
| "learning_rate": 0.00010339372930343896, |
| "loss": 3.177769088745117, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.2420749279538905, |
| "grad_norm": 0.47281619906425476, |
| "learning_rate": 0.00010316731683758346, |
| "loss": 3.177565574645996, |
| "step": 6465 |
| }, |
| { |
| "epoch": 1.2430355427473583, |
| "grad_norm": 0.6775256395339966, |
| "learning_rate": 0.00010294102253539204, |
| "loss": 3.1776294708251953, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.243996157540826, |
| "grad_norm": 0.7892163395881653, |
| "learning_rate": 0.00010271484696782837, |
| "loss": 3.175709342956543, |
| "step": 6475 |
| }, |
| { |
| "epoch": 1.244956772334294, |
| "grad_norm": 0.6570128202438354, |
| "learning_rate": 0.00010248879070555668, |
| "loss": 3.1776430130004885, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.2459173871277618, |
| "grad_norm": 0.5200539231300354, |
| "learning_rate": 0.00010226285431894013, |
| "loss": 3.176724433898926, |
| "step": 6485 |
| }, |
| { |
| "epoch": 1.2468780019212296, |
| "grad_norm": 0.7223069667816162, |
| "learning_rate": 0.00010203703837803934, |
| "loss": 3.176532745361328, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.2478386167146973, |
| "grad_norm": 0.8052407503128052, |
| "learning_rate": 0.00010181134345261115, |
| "loss": 3.176080322265625, |
| "step": 6495 |
| }, |
| { |
| "epoch": 1.2487992315081653, |
| "grad_norm": 0.48731186985969543, |
| "learning_rate": 0.00010158577011210695, |
| "loss": 3.173239517211914, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.249759846301633, |
| "grad_norm": 0.48055845499038696, |
| "learning_rate": 0.00010136031892567145, |
| "loss": 3.1757953643798826, |
| "step": 6505 |
| }, |
| { |
| "epoch": 1.2507204610951008, |
| "grad_norm": 0.9334460496902466, |
| "learning_rate": 0.00010113499046214113, |
| "loss": 3.173488998413086, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.2516810758885688, |
| "grad_norm": 0.747382402420044, |
| "learning_rate": 0.00010090978529004284, |
| "loss": 3.1791568756103517, |
| "step": 6515 |
| }, |
| { |
| "epoch": 1.2526416906820366, |
| "grad_norm": 0.5684623718261719, |
| "learning_rate": 0.00010068470397759226, |
| "loss": 3.1786075592041017, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.2536023054755043, |
| "grad_norm": 0.7101691365242004, |
| "learning_rate": 0.0001004597470926927, |
| "loss": 3.17834587097168, |
| "step": 6525 |
| }, |
| { |
| "epoch": 1.254562920268972, |
| "grad_norm": 0.6113804578781128, |
| "learning_rate": 0.0001002349152029334, |
| "loss": 3.1760730743408203, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.2555235350624399, |
| "grad_norm": 0.634519100189209, |
| "learning_rate": 0.00010001020887558837, |
| "loss": 3.1759071350097656, |
| "step": 6535 |
| }, |
| { |
| "epoch": 1.2564841498559078, |
| "grad_norm": 0.6819270253181458, |
| "learning_rate": 9.978562867761465e-05, |
| "loss": 3.1800254821777343, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.2574447646493756, |
| "grad_norm": 0.6920965313911438, |
| "learning_rate": 9.95611751756511e-05, |
| "loss": 3.179019546508789, |
| "step": 6545 |
| }, |
| { |
| "epoch": 1.2584053794428434, |
| "grad_norm": 0.7164304256439209, |
| "learning_rate": 9.9336848936017e-05, |
| "loss": 3.174900436401367, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.2593659942363113, |
| "grad_norm": 0.5404880046844482, |
| "learning_rate": 9.911265052471046e-05, |
| "loss": 3.1754596710205076, |
| "step": 6555 |
| }, |
| { |
| "epoch": 1.260326609029779, |
| "grad_norm": 0.6277545690536499, |
| "learning_rate": 9.888858050740703e-05, |
| "loss": 3.1767545700073243, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.2612872238232469, |
| "grad_norm": 0.4282405972480774, |
| "learning_rate": 9.866463944945839e-05, |
| "loss": 3.175259590148926, |
| "step": 6565 |
| }, |
| { |
| "epoch": 1.2622478386167146, |
| "grad_norm": 0.740606963634491, |
| "learning_rate": 9.844082791589075e-05, |
| "loss": 3.1768466949462892, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.2632084534101824, |
| "grad_norm": 0.6880332231521606, |
| "learning_rate": 9.821714647140363e-05, |
| "loss": 3.1770793914794924, |
| "step": 6575 |
| }, |
| { |
| "epoch": 1.2641690682036504, |
| "grad_norm": 0.5514328479766846, |
| "learning_rate": 9.799359568036821e-05, |
| "loss": 3.180289649963379, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.2651296829971181, |
| "grad_norm": 0.569648027420044, |
| "learning_rate": 9.777017610682612e-05, |
| "loss": 3.1760627746582033, |
| "step": 6585 |
| }, |
| { |
| "epoch": 1.266090297790586, |
| "grad_norm": 0.6740429401397705, |
| "learning_rate": 9.754688831448781e-05, |
| "loss": 3.173795700073242, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.267050912584054, |
| "grad_norm": 0.6410177946090698, |
| "learning_rate": 9.732373286673135e-05, |
| "loss": 3.178542709350586, |
| "step": 6595 |
| }, |
| { |
| "epoch": 1.2680115273775217, |
| "grad_norm": 0.5730255246162415, |
| "learning_rate": 9.710071032660076e-05, |
| "loss": 3.1776718139648437, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.2689721421709894, |
| "grad_norm": 0.6753981113433838, |
| "learning_rate": 9.687782125680484e-05, |
| "loss": 3.1791826248168946, |
| "step": 6605 |
| }, |
| { |
| "epoch": 1.2699327569644572, |
| "grad_norm": 0.5752228498458862, |
| "learning_rate": 9.66550662197155e-05, |
| "loss": 3.173518753051758, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.270893371757925, |
| "grad_norm": 0.5868836045265198, |
| "learning_rate": 9.643244577736667e-05, |
| "loss": 3.173557090759277, |
| "step": 6615 |
| }, |
| { |
| "epoch": 1.271853986551393, |
| "grad_norm": 0.5383107662200928, |
| "learning_rate": 9.620996049145247e-05, |
| "loss": 3.1773754119873048, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.2728146013448607, |
| "grad_norm": 0.5838056802749634, |
| "learning_rate": 9.598761092332616e-05, |
| "loss": 3.1778125762939453, |
| "step": 6625 |
| }, |
| { |
| "epoch": 1.2737752161383284, |
| "grad_norm": 0.5867598056793213, |
| "learning_rate": 9.576539763399847e-05, |
| "loss": 3.183711814880371, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.2747358309317964, |
| "grad_norm": 0.6111621856689453, |
| "learning_rate": 9.554332118413635e-05, |
| "loss": 3.1793869018554686, |
| "step": 6635 |
| }, |
| { |
| "epoch": 1.2756964457252642, |
| "grad_norm": 0.8043511509895325, |
| "learning_rate": 9.532138213406143e-05, |
| "loss": 3.175552749633789, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.276657060518732, |
| "grad_norm": 0.8225224018096924, |
| "learning_rate": 9.509958104374877e-05, |
| "loss": 3.1786468505859373, |
| "step": 6645 |
| }, |
| { |
| "epoch": 1.2776176753121997, |
| "grad_norm": 0.6611747741699219, |
| "learning_rate": 9.487791847282517e-05, |
| "loss": 3.1788875579833986, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.2785782901056677, |
| "grad_norm": 0.49061068892478943, |
| "learning_rate": 9.465639498056815e-05, |
| "loss": 3.1779201507568358, |
| "step": 6655 |
| }, |
| { |
| "epoch": 1.2795389048991355, |
| "grad_norm": 0.6303700804710388, |
| "learning_rate": 9.443501112590413e-05, |
| "loss": 3.176443862915039, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.2804995196926032, |
| "grad_norm": 0.7213342189788818, |
| "learning_rate": 9.421376746740733e-05, |
| "loss": 3.1703159332275392, |
| "step": 6665 |
| }, |
| { |
| "epoch": 1.2814601344860712, |
| "grad_norm": 0.6647571921348572, |
| "learning_rate": 9.399266456329815e-05, |
| "loss": 3.1765552520751954, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.282420749279539, |
| "grad_norm": 0.6062517762184143, |
| "learning_rate": 9.377170297144196e-05, |
| "loss": 3.1765026092529296, |
| "step": 6675 |
| }, |
| { |
| "epoch": 1.2833813640730067, |
| "grad_norm": 0.4922938048839569, |
| "learning_rate": 9.355088324934741e-05, |
| "loss": 3.176412582397461, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.2843419788664745, |
| "grad_norm": 0.5771006941795349, |
| "learning_rate": 9.333020595416548e-05, |
| "loss": 3.1751476287841798, |
| "step": 6685 |
| }, |
| { |
| "epoch": 1.2853025936599423, |
| "grad_norm": 0.6197171211242676, |
| "learning_rate": 9.310967164268749e-05, |
| "loss": 3.174680709838867, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.2862632084534102, |
| "grad_norm": 0.5435570478439331, |
| "learning_rate": 9.28892808713442e-05, |
| "loss": 3.1765819549560548, |
| "step": 6695 |
| }, |
| { |
| "epoch": 1.287223823246878, |
| "grad_norm": 0.5521527528762817, |
| "learning_rate": 9.266903419620411e-05, |
| "loss": 3.173064041137695, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.2881844380403458, |
| "grad_norm": 0.5107563138008118, |
| "learning_rate": 9.24489321729722e-05, |
| "loss": 3.1766895294189452, |
| "step": 6705 |
| }, |
| { |
| "epoch": 1.2891450528338138, |
| "grad_norm": 0.66566401720047, |
| "learning_rate": 9.222897535698841e-05, |
| "loss": 3.175777053833008, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.2901056676272815, |
| "grad_norm": 0.501441478729248, |
| "learning_rate": 9.20091643032264e-05, |
| "loss": 3.1773338317871094, |
| "step": 6715 |
| }, |
| { |
| "epoch": 1.2910662824207493, |
| "grad_norm": 0.7562515735626221, |
| "learning_rate": 9.178949956629195e-05, |
| "loss": 3.1776708602905273, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.292026897214217, |
| "grad_norm": 0.7048882246017456, |
| "learning_rate": 9.156998170042182e-05, |
| "loss": 3.1779850006103514, |
| "step": 6725 |
| }, |
| { |
| "epoch": 1.2929875120076848, |
| "grad_norm": 0.5082115530967712, |
| "learning_rate": 9.1350611259482e-05, |
| "loss": 3.174315071105957, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.2939481268011528, |
| "grad_norm": 0.5481780171394348, |
| "learning_rate": 9.113138879696667e-05, |
| "loss": 3.174916648864746, |
| "step": 6735 |
| }, |
| { |
| "epoch": 1.2949087415946205, |
| "grad_norm": 0.5947979688644409, |
| "learning_rate": 9.091231486599656e-05, |
| "loss": 3.177842903137207, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.2958693563880883, |
| "grad_norm": 0.5829041600227356, |
| "learning_rate": 9.069339001931775e-05, |
| "loss": 3.1755516052246096, |
| "step": 6745 |
| }, |
| { |
| "epoch": 1.2968299711815563, |
| "grad_norm": 0.7779812812805176, |
| "learning_rate": 9.047461480929996e-05, |
| "loss": 3.1758398056030273, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.297790585975024, |
| "grad_norm": 0.5685781836509705, |
| "learning_rate": 9.025598978793564e-05, |
| "loss": 3.1764198303222657, |
| "step": 6755 |
| }, |
| { |
| "epoch": 1.2987512007684918, |
| "grad_norm": 0.5285525918006897, |
| "learning_rate": 9.003751550683803e-05, |
| "loss": 3.1752038955688477, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.2997118155619596, |
| "grad_norm": 0.6162580251693726, |
| "learning_rate": 8.981919251724023e-05, |
| "loss": 3.172860336303711, |
| "step": 6765 |
| }, |
| { |
| "epoch": 1.3006724303554273, |
| "grad_norm": 0.6464502215385437, |
| "learning_rate": 8.960102136999346e-05, |
| "loss": 3.174723815917969, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.3016330451488953, |
| "grad_norm": 0.472971111536026, |
| "learning_rate": 8.938300261556599e-05, |
| "loss": 3.176887321472168, |
| "step": 6775 |
| }, |
| { |
| "epoch": 1.302593659942363, |
| "grad_norm": 0.46003440022468567, |
| "learning_rate": 8.916513680404145e-05, |
| "loss": 3.176510047912598, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.3035542747358309, |
| "grad_norm": 0.7004385590553284, |
| "learning_rate": 8.894742448511766e-05, |
| "loss": 3.1788423538208006, |
| "step": 6785 |
| }, |
| { |
| "epoch": 1.3045148895292988, |
| "grad_norm": 0.6769760847091675, |
| "learning_rate": 8.872986620810508e-05, |
| "loss": 3.175991439819336, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.3054755043227666, |
| "grad_norm": 0.593636691570282, |
| "learning_rate": 8.851246252192566e-05, |
| "loss": 3.1727279663085937, |
| "step": 6795 |
| }, |
| { |
| "epoch": 1.3064361191162344, |
| "grad_norm": 0.580614447593689, |
| "learning_rate": 8.82952139751111e-05, |
| "loss": 3.178467559814453, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.3073967339097021, |
| "grad_norm": 0.519120991230011, |
| "learning_rate": 8.807812111580186e-05, |
| "loss": 3.1754281997680662, |
| "step": 6805 |
| }, |
| { |
| "epoch": 1.30835734870317, |
| "grad_norm": 0.5653340816497803, |
| "learning_rate": 8.786118449174541e-05, |
| "loss": 3.1773509979248047, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.3093179634966379, |
| "grad_norm": 0.7600631713867188, |
| "learning_rate": 8.764440465029513e-05, |
| "loss": 3.1753978729248047, |
| "step": 6815 |
| }, |
| { |
| "epoch": 1.3102785782901056, |
| "grad_norm": 0.5709202885627747, |
| "learning_rate": 8.742778213840888e-05, |
| "loss": 3.176059341430664, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.3112391930835736, |
| "grad_norm": 0.5290617346763611, |
| "learning_rate": 8.721131750264737e-05, |
| "loss": 3.176310348510742, |
| "step": 6825 |
| }, |
| { |
| "epoch": 1.3121998078770414, |
| "grad_norm": 0.5382722020149231, |
| "learning_rate": 8.699501128917321e-05, |
| "loss": 3.1754171371459963, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.3131604226705091, |
| "grad_norm": 0.5834631323814392, |
| "learning_rate": 8.677886404374903e-05, |
| "loss": 3.176200103759766, |
| "step": 6835 |
| }, |
| { |
| "epoch": 1.314121037463977, |
| "grad_norm": 0.44905054569244385, |
| "learning_rate": 8.656287631173656e-05, |
| "loss": 3.178561210632324, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.3150816522574447, |
| "grad_norm": 0.4696488380432129, |
| "learning_rate": 8.634704863809501e-05, |
| "loss": 3.1775485992431642, |
| "step": 6845 |
| }, |
| { |
| "epoch": 1.3160422670509127, |
| "grad_norm": 0.5930847525596619, |
| "learning_rate": 8.61313815673798e-05, |
| "loss": 3.176204299926758, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.3170028818443804, |
| "grad_norm": 0.6078386902809143, |
| "learning_rate": 8.591587564374094e-05, |
| "loss": 3.175630569458008, |
| "step": 6855 |
| }, |
| { |
| "epoch": 1.3179634966378482, |
| "grad_norm": 0.6279536485671997, |
| "learning_rate": 8.570053141092209e-05, |
| "loss": 3.1791570663452147, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.3189241114313162, |
| "grad_norm": 0.4797205328941345, |
| "learning_rate": 8.548534941225877e-05, |
| "loss": 3.1787494659423827, |
| "step": 6865 |
| }, |
| { |
| "epoch": 1.319884726224784, |
| "grad_norm": 0.692002534866333, |
| "learning_rate": 8.527033019067736e-05, |
| "loss": 3.1755470275878905, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.3208453410182517, |
| "grad_norm": 0.609710693359375, |
| "learning_rate": 8.505547428869326e-05, |
| "loss": 3.1747051239013673, |
| "step": 6875 |
| }, |
| { |
| "epoch": 1.3218059558117194, |
| "grad_norm": 0.511199414730072, |
| "learning_rate": 8.484078224841002e-05, |
| "loss": 3.176365280151367, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.3227665706051872, |
| "grad_norm": 0.4836551547050476, |
| "learning_rate": 8.462625461151769e-05, |
| "loss": 3.1734235763549803, |
| "step": 6885 |
| }, |
| { |
| "epoch": 1.3237271853986552, |
| "grad_norm": 0.6896669268608093, |
| "learning_rate": 8.441189191929152e-05, |
| "loss": 3.1753162384033202, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.324687800192123, |
| "grad_norm": 0.5728012323379517, |
| "learning_rate": 8.419769471259053e-05, |
| "loss": 3.1740385055541993, |
| "step": 6895 |
| }, |
| { |
| "epoch": 1.3256484149855907, |
| "grad_norm": 0.6955984234809875, |
| "learning_rate": 8.398366353185633e-05, |
| "loss": 3.175432586669922, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.3266090297790587, |
| "grad_norm": 0.48423975706100464, |
| "learning_rate": 8.376979891711145e-05, |
| "loss": 3.1750982284545897, |
| "step": 6905 |
| }, |
| { |
| "epoch": 1.3275696445725265, |
| "grad_norm": 0.4743121862411499, |
| "learning_rate": 8.355610140795827e-05, |
| "loss": 3.1710472106933594, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.3285302593659942, |
| "grad_norm": 0.6560218334197998, |
| "learning_rate": 8.334257154357755e-05, |
| "loss": 3.1815349578857424, |
| "step": 6915 |
| }, |
| { |
| "epoch": 1.329490874159462, |
| "grad_norm": 0.536353588104248, |
| "learning_rate": 8.31292098627271e-05, |
| "loss": 3.1730472564697267, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.3304514889529298, |
| "grad_norm": 0.49340781569480896, |
| "learning_rate": 8.291601690374021e-05, |
| "loss": 3.1722347259521486, |
| "step": 6925 |
| }, |
| { |
| "epoch": 1.3314121037463977, |
| "grad_norm": 0.5234084129333496, |
| "learning_rate": 8.270299320452467e-05, |
| "loss": 3.172749710083008, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.3323727185398655, |
| "grad_norm": 0.6601616740226746, |
| "learning_rate": 8.249013930256108e-05, |
| "loss": 3.1766475677490233, |
| "step": 6935 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.7026968002319336, |
| "learning_rate": 8.227745573490176e-05, |
| "loss": 3.173655319213867, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.3342939481268012, |
| "grad_norm": 0.5302594304084778, |
| "learning_rate": 8.206494303816904e-05, |
| "loss": 3.178236389160156, |
| "step": 6945 |
| }, |
| { |
| "epoch": 1.335254562920269, |
| "grad_norm": 0.46280986070632935, |
| "learning_rate": 8.185260174855433e-05, |
| "loss": 3.173746871948242, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.3362151777137368, |
| "grad_norm": 0.5142591595649719, |
| "learning_rate": 8.164043240181646e-05, |
| "loss": 3.173639106750488, |
| "step": 6955 |
| }, |
| { |
| "epoch": 1.3371757925072045, |
| "grad_norm": 0.682403028011322, |
| "learning_rate": 8.142843553328048e-05, |
| "loss": 3.175322151184082, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.3381364073006723, |
| "grad_norm": 0.6334633231163025, |
| "learning_rate": 8.121661167783625e-05, |
| "loss": 3.17366943359375, |
| "step": 6965 |
| }, |
| { |
| "epoch": 1.3390970220941403, |
| "grad_norm": 0.606118381023407, |
| "learning_rate": 8.100496136993712e-05, |
| "loss": 3.1736032485961916, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.340057636887608, |
| "grad_norm": 0.5890917778015137, |
| "learning_rate": 8.07934851435984e-05, |
| "loss": 3.172843360900879, |
| "step": 6975 |
| }, |
| { |
| "epoch": 1.341018251681076, |
| "grad_norm": 0.47088661789894104, |
| "learning_rate": 8.05821835323964e-05, |
| "loss": 3.1722528457641603, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.3419788664745438, |
| "grad_norm": 0.48279380798339844, |
| "learning_rate": 8.037105706946673e-05, |
| "loss": 3.177801513671875, |
| "step": 6985 |
| }, |
| { |
| "epoch": 1.3429394812680115, |
| "grad_norm": 0.5353664755821228, |
| "learning_rate": 8.01601062875032e-05, |
| "loss": 3.1740434646606444, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.3439000960614793, |
| "grad_norm": 0.5050175189971924, |
| "learning_rate": 7.994933171875613e-05, |
| "loss": 3.1727882385253907, |
| "step": 6995 |
| }, |
| { |
| "epoch": 1.344860710854947, |
| "grad_norm": 0.44954636693000793, |
| "learning_rate": 7.973873389503149e-05, |
| "loss": 3.1749853134155273, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.345821325648415, |
| "grad_norm": 0.6471356749534607, |
| "learning_rate": 7.952831334768913e-05, |
| "loss": 3.1730751037597655, |
| "step": 7005 |
| }, |
| { |
| "epoch": 1.3467819404418828, |
| "grad_norm": 0.5839700102806091, |
| "learning_rate": 7.931807060764176e-05, |
| "loss": 3.172525405883789, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.3477425552353506, |
| "grad_norm": 0.5643643140792847, |
| "learning_rate": 7.910800620535329e-05, |
| "loss": 3.172414016723633, |
| "step": 7015 |
| }, |
| { |
| "epoch": 1.3487031700288186, |
| "grad_norm": 0.5194916129112244, |
| "learning_rate": 7.889812067083777e-05, |
| "loss": 3.1767629623413085, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.3496637848222863, |
| "grad_norm": 0.4932091236114502, |
| "learning_rate": 7.868841453365797e-05, |
| "loss": 3.1722953796386717, |
| "step": 7025 |
| }, |
| { |
| "epoch": 1.350624399615754, |
| "grad_norm": 0.5307772159576416, |
| "learning_rate": 7.847888832292397e-05, |
| "loss": 3.1748939514160157, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.3515850144092219, |
| "grad_norm": 0.5100224614143372, |
| "learning_rate": 7.826954256729188e-05, |
| "loss": 3.1737871170043945, |
| "step": 7035 |
| }, |
| { |
| "epoch": 1.3525456292026896, |
| "grad_norm": 0.5833878517150879, |
| "learning_rate": 7.806037779496264e-05, |
| "loss": 3.171864128112793, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.3535062439961576, |
| "grad_norm": 0.4274962842464447, |
| "learning_rate": 7.785139453368025e-05, |
| "loss": 3.174275207519531, |
| "step": 7045 |
| }, |
| { |
| "epoch": 1.3544668587896254, |
| "grad_norm": 0.43694546818733215, |
| "learning_rate": 7.764259331073096e-05, |
| "loss": 3.1718505859375, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.3554274735830931, |
| "grad_norm": 0.5787099003791809, |
| "learning_rate": 7.743397465294174e-05, |
| "loss": 3.171575164794922, |
| "step": 7055 |
| }, |
| { |
| "epoch": 1.356388088376561, |
| "grad_norm": 0.45274344086647034, |
| "learning_rate": 7.72255390866789e-05, |
| "loss": 3.169318199157715, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.3573487031700289, |
| "grad_norm": 0.490500807762146, |
| "learning_rate": 7.701728713784662e-05, |
| "loss": 3.172596740722656, |
| "step": 7065 |
| }, |
| { |
| "epoch": 1.3583093179634966, |
| "grad_norm": 0.8986951112747192, |
| "learning_rate": 7.6809219331886e-05, |
| "loss": 3.177069664001465, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.3592699327569644, |
| "grad_norm": 0.6794254183769226, |
| "learning_rate": 7.660133619377346e-05, |
| "loss": 3.1766523361206054, |
| "step": 7075 |
| }, |
| { |
| "epoch": 1.3602305475504322, |
| "grad_norm": 0.4294492304325104, |
| "learning_rate": 7.639363824801957e-05, |
| "loss": 3.171103668212891, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.3611911623439001, |
| "grad_norm": 0.4653557240962982, |
| "learning_rate": 7.618612601866738e-05, |
| "loss": 3.1739612579345704, |
| "step": 7085 |
| }, |
| { |
| "epoch": 1.362151777137368, |
| "grad_norm": 0.48339158296585083, |
| "learning_rate": 7.597880002929155e-05, |
| "loss": 3.174298095703125, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.3631123919308357, |
| "grad_norm": 0.4108154773712158, |
| "learning_rate": 7.577166080299704e-05, |
| "loss": 3.1711090087890623, |
| "step": 7095 |
| }, |
| { |
| "epoch": 1.3640730067243036, |
| "grad_norm": 0.5386460423469543, |
| "learning_rate": 7.556470886241715e-05, |
| "loss": 3.172881317138672, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.3650336215177714, |
| "grad_norm": 0.521826982498169, |
| "learning_rate": 7.535794472971292e-05, |
| "loss": 3.1787548065185547, |
| "step": 7105 |
| }, |
| { |
| "epoch": 1.3659942363112392, |
| "grad_norm": 0.5726050138473511, |
| "learning_rate": 7.515136892657152e-05, |
| "loss": 3.1746589660644533, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.366954851104707, |
| "grad_norm": 0.49367332458496094, |
| "learning_rate": 7.494498197420486e-05, |
| "loss": 3.171868896484375, |
| "step": 7115 |
| }, |
| { |
| "epoch": 1.3679154658981747, |
| "grad_norm": 0.6521613597869873, |
| "learning_rate": 7.473878439334833e-05, |
| "loss": 3.1738697052001954, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.3688760806916427, |
| "grad_norm": 0.4904996454715729, |
| "learning_rate": 7.453277670425964e-05, |
| "loss": 3.1751945495605467, |
| "step": 7125 |
| }, |
| { |
| "epoch": 1.3698366954851104, |
| "grad_norm": 0.5470160841941833, |
| "learning_rate": 7.432695942671736e-05, |
| "loss": 3.1728076934814453, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.3707973102785782, |
| "grad_norm": 0.6032967567443848, |
| "learning_rate": 7.412133308001952e-05, |
| "loss": 3.166953468322754, |
| "step": 7135 |
| }, |
| { |
| "epoch": 1.3717579250720462, |
| "grad_norm": 0.4871714413166046, |
| "learning_rate": 7.39158981829825e-05, |
| "loss": 3.1728256225585936, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.372718539865514, |
| "grad_norm": 0.42759451270103455, |
| "learning_rate": 7.371065525393965e-05, |
| "loss": 3.1744380950927735, |
| "step": 7145 |
| }, |
| { |
| "epoch": 1.3736791546589817, |
| "grad_norm": 0.5663604736328125, |
| "learning_rate": 7.350560481073996e-05, |
| "loss": 3.1777429580688477, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.3746397694524495, |
| "grad_norm": 0.5459421873092651, |
| "learning_rate": 7.330074737074665e-05, |
| "loss": 3.170821762084961, |
| "step": 7155 |
| }, |
| { |
| "epoch": 1.3756003842459175, |
| "grad_norm": 0.46091675758361816, |
| "learning_rate": 7.309608345083605e-05, |
| "loss": 3.1742374420166017, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.3765609990393852, |
| "grad_norm": 0.38046908378601074, |
| "learning_rate": 7.289161356739638e-05, |
| "loss": 3.1713001251220705, |
| "step": 7165 |
| }, |
| { |
| "epoch": 1.377521613832853, |
| "grad_norm": 0.468205064535141, |
| "learning_rate": 7.268733823632601e-05, |
| "loss": 3.1741426467895506, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.378482228626321, |
| "grad_norm": 0.6682205200195312, |
| "learning_rate": 7.248325797303256e-05, |
| "loss": 3.1712989807128906, |
| "step": 7175 |
| }, |
| { |
| "epoch": 1.3794428434197887, |
| "grad_norm": 0.5630512833595276, |
| "learning_rate": 7.227937329243149e-05, |
| "loss": 3.171531867980957, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.3804034582132565, |
| "grad_norm": 0.54999178647995, |
| "learning_rate": 7.20756847089448e-05, |
| "loss": 3.173867416381836, |
| "step": 7185 |
| }, |
| { |
| "epoch": 1.3813640730067243, |
| "grad_norm": 0.43921148777008057, |
| "learning_rate": 7.187219273649962e-05, |
| "loss": 3.170378494262695, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.382324687800192, |
| "grad_norm": 0.4150092601776123, |
| "learning_rate": 7.166889788852707e-05, |
| "loss": 3.170474624633789, |
| "step": 7195 |
| }, |
| { |
| "epoch": 1.38328530259366, |
| "grad_norm": 0.38194531202316284, |
| "learning_rate": 7.146580067796102e-05, |
| "loss": 3.1690601348876952, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.3842459173871278, |
| "grad_norm": 0.7853124141693115, |
| "learning_rate": 7.126290161723642e-05, |
| "loss": 3.1698192596435546, |
| "step": 7205 |
| }, |
| { |
| "epoch": 1.3852065321805955, |
| "grad_norm": 0.4423576593399048, |
| "learning_rate": 7.106020121828848e-05, |
| "loss": 3.1732460021972657, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.3861671469740635, |
| "grad_norm": 0.4560396671295166, |
| "learning_rate": 7.085769999255108e-05, |
| "loss": 3.1727630615234377, |
| "step": 7215 |
| }, |
| { |
| "epoch": 1.3871277617675313, |
| "grad_norm": 0.41349244117736816, |
| "learning_rate": 7.065539845095567e-05, |
| "loss": 3.1731325149536134, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.388088376560999, |
| "grad_norm": 0.47393569350242615, |
| "learning_rate": 7.045329710392967e-05, |
| "loss": 3.1727859497070314, |
| "step": 7225 |
| }, |
| { |
| "epoch": 1.3890489913544668, |
| "grad_norm": 0.7396698594093323, |
| "learning_rate": 7.025139646139553e-05, |
| "loss": 3.1715877532958983, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.3900096061479346, |
| "grad_norm": 0.7079951763153076, |
| "learning_rate": 7.004969703276941e-05, |
| "loss": 3.169532585144043, |
| "step": 7235 |
| }, |
| { |
| "epoch": 1.3909702209414025, |
| "grad_norm": 0.46009525656700134, |
| "learning_rate": 6.984819932695956e-05, |
| "loss": 3.169749450683594, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.3919308357348703, |
| "grad_norm": 0.4487673044204712, |
| "learning_rate": 6.964690385236534e-05, |
| "loss": 3.1695636749267577, |
| "step": 7245 |
| }, |
| { |
| "epoch": 1.392891450528338, |
| "grad_norm": 0.4718167781829834, |
| "learning_rate": 6.944581111687593e-05, |
| "loss": 3.168662452697754, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.393852065321806, |
| "grad_norm": 0.46017783880233765, |
| "learning_rate": 6.924492162786898e-05, |
| "loss": 3.169230842590332, |
| "step": 7255 |
| }, |
| { |
| "epoch": 1.3948126801152738, |
| "grad_norm": 0.45555198192596436, |
| "learning_rate": 6.904423589220917e-05, |
| "loss": 3.169890594482422, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.3957732949087416, |
| "grad_norm": 0.4948507845401764, |
| "learning_rate": 6.884375441624724e-05, |
| "loss": 3.173093795776367, |
| "step": 7265 |
| }, |
| { |
| "epoch": 1.3967339097022093, |
| "grad_norm": 0.43780985474586487, |
| "learning_rate": 6.864347770581859e-05, |
| "loss": 3.168751335144043, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.397694524495677, |
| "grad_norm": 0.4438318908214569, |
| "learning_rate": 6.844340626624178e-05, |
| "loss": 3.1701900482177736, |
| "step": 7275 |
| }, |
| { |
| "epoch": 1.398655139289145, |
| "grad_norm": 0.5062385201454163, |
| "learning_rate": 6.824354060231765e-05, |
| "loss": 3.1665103912353514, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.3996157540826129, |
| "grad_norm": 0.4086364507675171, |
| "learning_rate": 6.804388121832777e-05, |
| "loss": 3.1716842651367188, |
| "step": 7285 |
| }, |
| { |
| "epoch": 1.4005763688760806, |
| "grad_norm": 0.5124282836914062, |
| "learning_rate": 6.784442861803331e-05, |
| "loss": 3.1696632385253904, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.4015369836695486, |
| "grad_norm": 0.5204159617424011, |
| "learning_rate": 6.764518330467346e-05, |
| "loss": 3.1707353591918945, |
| "step": 7295 |
| }, |
| { |
| "epoch": 1.4024975984630164, |
| "grad_norm": 0.6575141549110413, |
| "learning_rate": 6.744614578096475e-05, |
| "loss": 3.171962547302246, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.4034582132564841, |
| "grad_norm": 0.5063356757164001, |
| "learning_rate": 6.72473165490993e-05, |
| "loss": 3.1709272384643556, |
| "step": 7305 |
| }, |
| { |
| "epoch": 1.4044188280499519, |
| "grad_norm": 0.4604819416999817, |
| "learning_rate": 6.704869611074351e-05, |
| "loss": 3.173125076293945, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.4053794428434199, |
| "grad_norm": 0.4766218960285187, |
| "learning_rate": 6.685028496703719e-05, |
| "loss": 3.1713457107543945, |
| "step": 7315 |
| }, |
| { |
| "epoch": 1.4063400576368876, |
| "grad_norm": 0.48007673025131226, |
| "learning_rate": 6.665208361859203e-05, |
| "loss": 3.1723331451416015, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.4073006724303554, |
| "grad_norm": 0.3352242410182953, |
| "learning_rate": 6.64540925654904e-05, |
| "loss": 3.171708679199219, |
| "step": 7325 |
| }, |
| { |
| "epoch": 1.4082612872238234, |
| "grad_norm": 0.6097206473350525, |
| "learning_rate": 6.625631230728393e-05, |
| "loss": 3.169724464416504, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.4092219020172911, |
| "grad_norm": 0.45327913761138916, |
| "learning_rate": 6.605874334299257e-05, |
| "loss": 3.172129821777344, |
| "step": 7335 |
| }, |
| { |
| "epoch": 1.410182516810759, |
| "grad_norm": 0.3918154537677765, |
| "learning_rate": 6.586138617110312e-05, |
| "loss": 3.1696929931640625, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.4111431316042267, |
| "grad_norm": 0.45734044909477234, |
| "learning_rate": 6.566424128956788e-05, |
| "loss": 3.169087600708008, |
| "step": 7345 |
| }, |
| { |
| "epoch": 1.4121037463976944, |
| "grad_norm": 0.41583287715911865, |
| "learning_rate": 6.546730919580365e-05, |
| "loss": 3.169902801513672, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.4130643611911624, |
| "grad_norm": 0.3783586919307709, |
| "learning_rate": 6.527059038669032e-05, |
| "loss": 3.173455810546875, |
| "step": 7355 |
| }, |
| { |
| "epoch": 1.4140249759846302, |
| "grad_norm": 0.5969278216362, |
| "learning_rate": 6.507408535856968e-05, |
| "loss": 3.171731185913086, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.414985590778098, |
| "grad_norm": 0.5529734492301941, |
| "learning_rate": 6.487779460724388e-05, |
| "loss": 3.169993591308594, |
| "step": 7365 |
| }, |
| { |
| "epoch": 1.415946205571566, |
| "grad_norm": 0.460746705532074, |
| "learning_rate": 6.468171862797484e-05, |
| "loss": 3.1684539794921873, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.4169068203650337, |
| "grad_norm": 0.40012434124946594, |
| "learning_rate": 6.448585791548231e-05, |
| "loss": 3.170420837402344, |
| "step": 7375 |
| }, |
| { |
| "epoch": 1.4178674351585014, |
| "grad_norm": 0.4528000056743622, |
| "learning_rate": 6.42902129639429e-05, |
| "loss": 3.1724185943603516, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.4188280499519692, |
| "grad_norm": 0.5503178834915161, |
| "learning_rate": 6.409478426698893e-05, |
| "loss": 3.170566749572754, |
| "step": 7385 |
| }, |
| { |
| "epoch": 1.419788664745437, |
| "grad_norm": 0.4782489240169525, |
| "learning_rate": 6.389957231770705e-05, |
| "loss": 3.1753549575805664, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.420749279538905, |
| "grad_norm": 0.5017015933990479, |
| "learning_rate": 6.370457760863708e-05, |
| "loss": 3.1712177276611326, |
| "step": 7395 |
| }, |
| { |
| "epoch": 1.4217098943323727, |
| "grad_norm": 0.4609530568122864, |
| "learning_rate": 6.35098006317706e-05, |
| "loss": 3.1715621948242188, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.4226705091258405, |
| "grad_norm": 0.43000540137290955, |
| "learning_rate": 6.331524187854992e-05, |
| "loss": 3.1696197509765627, |
| "step": 7405 |
| }, |
| { |
| "epoch": 1.4236311239193085, |
| "grad_norm": 0.6466848850250244, |
| "learning_rate": 6.31209018398668e-05, |
| "loss": 3.171028900146484, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.4245917387127762, |
| "grad_norm": 0.4881599247455597, |
| "learning_rate": 6.292678100606101e-05, |
| "loss": 3.1710824966430664, |
| "step": 7415 |
| }, |
| { |
| "epoch": 1.425552353506244, |
| "grad_norm": 0.42242470383644104, |
| "learning_rate": 6.273287986691934e-05, |
| "loss": 3.1717771530151366, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.4265129682997117, |
| "grad_norm": 0.40897586941719055, |
| "learning_rate": 6.253919891167427e-05, |
| "loss": 3.168376159667969, |
| "step": 7425 |
| }, |
| { |
| "epoch": 1.4274735830931795, |
| "grad_norm": 0.31631234288215637, |
| "learning_rate": 6.23457386290028e-05, |
| "loss": 3.167464828491211, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.4284341978866475, |
| "grad_norm": 0.423149973154068, |
| "learning_rate": 6.215249950702488e-05, |
| "loss": 3.1704282760620117, |
| "step": 7435 |
| }, |
| { |
| "epoch": 1.4293948126801153, |
| "grad_norm": 0.663411557674408, |
| "learning_rate": 6.195948203330282e-05, |
| "loss": 3.170808219909668, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.430355427473583, |
| "grad_norm": 0.35881131887435913, |
| "learning_rate": 6.176668669483948e-05, |
| "loss": 3.1693355560302736, |
| "step": 7445 |
| }, |
| { |
| "epoch": 1.431316042267051, |
| "grad_norm": 0.3775772452354431, |
| "learning_rate": 6.15741139780772e-05, |
| "loss": 3.1732666015625, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.4322766570605188, |
| "grad_norm": 0.40818941593170166, |
| "learning_rate": 6.138176436889672e-05, |
| "loss": 3.1699262619018556, |
| "step": 7455 |
| }, |
| { |
| "epoch": 1.4332372718539865, |
| "grad_norm": 0.31840816140174866, |
| "learning_rate": 6.118963835261588e-05, |
| "loss": 3.171256256103516, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.4341978866474543, |
| "grad_norm": 0.45461708307266235, |
| "learning_rate": 6.099773641398834e-05, |
| "loss": 3.1705402374267577, |
| "step": 7465 |
| }, |
| { |
| "epoch": 1.435158501440922, |
| "grad_norm": 0.49488258361816406, |
| "learning_rate": 6.080605903720229e-05, |
| "loss": 3.1715473175048827, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.43611911623439, |
| "grad_norm": 0.4354032278060913, |
| "learning_rate": 6.061460670587948e-05, |
| "loss": 3.170640563964844, |
| "step": 7475 |
| }, |
| { |
| "epoch": 1.4370797310278578, |
| "grad_norm": 0.39679399132728577, |
| "learning_rate": 6.0423379903073824e-05, |
| "loss": 3.170828437805176, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.4380403458213258, |
| "grad_norm": 0.37695419788360596, |
| "learning_rate": 6.023237911127005e-05, |
| "loss": 3.167501449584961, |
| "step": 7485 |
| }, |
| { |
| "epoch": 1.4390009606147935, |
| "grad_norm": 0.37574145197868347, |
| "learning_rate": 6.004160481238281e-05, |
| "loss": 3.172044372558594, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.4399615754082613, |
| "grad_norm": 0.5872917771339417, |
| "learning_rate": 5.9851057487755225e-05, |
| "loss": 3.167539596557617, |
| "step": 7495 |
| }, |
| { |
| "epoch": 1.440922190201729, |
| "grad_norm": 0.43774786591529846, |
| "learning_rate": 5.966073761815776e-05, |
| "loss": 3.170111656188965, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.4418828049951968, |
| "grad_norm": 0.319992333650589, |
| "learning_rate": 5.947064568378696e-05, |
| "loss": 3.1693355560302736, |
| "step": 7505 |
| }, |
| { |
| "epoch": 1.4428434197886648, |
| "grad_norm": 0.4077332019805908, |
| "learning_rate": 5.928078216426427e-05, |
| "loss": 3.1672155380249025, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.4438040345821326, |
| "grad_norm": 0.355669766664505, |
| "learning_rate": 5.909114753863488e-05, |
| "loss": 3.1689855575561525, |
| "step": 7515 |
| }, |
| { |
| "epoch": 1.4447646493756003, |
| "grad_norm": 0.5554761290550232, |
| "learning_rate": 5.8901742285366315e-05, |
| "loss": 3.171320343017578, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.4457252641690683, |
| "grad_norm": 0.4790358543395996, |
| "learning_rate": 5.8712566882347504e-05, |
| "loss": 3.1696239471435548, |
| "step": 7525 |
| }, |
| { |
| "epoch": 1.446685878962536, |
| "grad_norm": 0.4286898076534271, |
| "learning_rate": 5.8523621806887374e-05, |
| "loss": 3.169162368774414, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.4476464937560038, |
| "grad_norm": 0.42978528141975403, |
| "learning_rate": 5.8334907535713836e-05, |
| "loss": 3.172647476196289, |
| "step": 7535 |
| }, |
| { |
| "epoch": 1.4486071085494716, |
| "grad_norm": 0.4055570363998413, |
| "learning_rate": 5.8146424544972205e-05, |
| "loss": 3.1680522918701173, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.4495677233429394, |
| "grad_norm": 0.3212641775608063, |
| "learning_rate": 5.7958173310224465e-05, |
| "loss": 3.168552017211914, |
| "step": 7545 |
| }, |
| { |
| "epoch": 1.4505283381364074, |
| "grad_norm": 0.40570881962776184, |
| "learning_rate": 5.7770154306447866e-05, |
| "loss": 3.172994041442871, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.4514889529298751, |
| "grad_norm": 0.4512649476528168, |
| "learning_rate": 5.758236800803351e-05, |
| "loss": 3.171183395385742, |
| "step": 7555 |
| }, |
| { |
| "epoch": 1.4524495677233429, |
| "grad_norm": 0.5307871103286743, |
| "learning_rate": 5.739481488878558e-05, |
| "loss": 3.172830581665039, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.4534101825168109, |
| "grad_norm": 0.38034340739250183, |
| "learning_rate": 5.720749542191979e-05, |
| "loss": 3.1710861206054686, |
| "step": 7565 |
| }, |
| { |
| "epoch": 1.4543707973102786, |
| "grad_norm": 0.46515336632728577, |
| "learning_rate": 5.7020410080062416e-05, |
| "loss": 3.1718137741088865, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.4553314121037464, |
| "grad_norm": 0.4164998531341553, |
| "learning_rate": 5.683355933524894e-05, |
| "loss": 3.1689435958862306, |
| "step": 7575 |
| }, |
| { |
| "epoch": 1.4562920268972142, |
| "grad_norm": 0.48741379380226135, |
| "learning_rate": 5.6646943658922975e-05, |
| "loss": 3.167072296142578, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.457252641690682, |
| "grad_norm": 0.39116013050079346, |
| "learning_rate": 5.646056352193505e-05, |
| "loss": 3.171469497680664, |
| "step": 7585 |
| }, |
| { |
| "epoch": 1.45821325648415, |
| "grad_norm": 0.4745250642299652, |
| "learning_rate": 5.6274419394541296e-05, |
| "loss": 3.168760871887207, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.4591738712776177, |
| "grad_norm": 0.5160530805587769, |
| "learning_rate": 5.608851174640247e-05, |
| "loss": 3.168941307067871, |
| "step": 7595 |
| }, |
| { |
| "epoch": 1.4601344860710854, |
| "grad_norm": 0.3649737238883972, |
| "learning_rate": 5.590284104658264e-05, |
| "loss": 3.1685501098632813, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.4610951008645534, |
| "grad_norm": 0.37525928020477295, |
| "learning_rate": 5.571740776354811e-05, |
| "loss": 3.169935607910156, |
| "step": 7605 |
| }, |
| { |
| "epoch": 1.4620557156580212, |
| "grad_norm": 0.35180380940437317, |
| "learning_rate": 5.553221236516594e-05, |
| "loss": 3.1680299758911135, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.463016330451489, |
| "grad_norm": 0.5641520023345947, |
| "learning_rate": 5.534725531870317e-05, |
| "loss": 3.171617126464844, |
| "step": 7615 |
| }, |
| { |
| "epoch": 1.4639769452449567, |
| "grad_norm": 0.5281386375427246, |
| "learning_rate": 5.516253709082547e-05, |
| "loss": 3.1680675506591798, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.4649375600384245, |
| "grad_norm": 0.37240076065063477, |
| "learning_rate": 5.4978058147595796e-05, |
| "loss": 3.164999008178711, |
| "step": 7625 |
| }, |
| { |
| "epoch": 1.4658981748318924, |
| "grad_norm": 0.37205061316490173, |
| "learning_rate": 5.479381895447346e-05, |
| "loss": 3.1697301864624023, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.4668587896253602, |
| "grad_norm": 0.35913726687431335, |
| "learning_rate": 5.4609819976312854e-05, |
| "loss": 3.1634567260742186, |
| "step": 7635 |
| }, |
| { |
| "epoch": 1.4678194044188282, |
| "grad_norm": 0.42885205149650574, |
| "learning_rate": 5.4426061677362284e-05, |
| "loss": 3.173727798461914, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.468780019212296, |
| "grad_norm": 0.3557131588459015, |
| "learning_rate": 5.424254452126279e-05, |
| "loss": 3.1679935455322266, |
| "step": 7645 |
| }, |
| { |
| "epoch": 1.4697406340057637, |
| "grad_norm": 0.4141709804534912, |
| "learning_rate": 5.4059268971047e-05, |
| "loss": 3.1674577713012697, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.4707012487992315, |
| "grad_norm": 0.40807923674583435, |
| "learning_rate": 5.387623548913795e-05, |
| "loss": 3.171774673461914, |
| "step": 7655 |
| }, |
| { |
| "epoch": 1.4716618635926992, |
| "grad_norm": 0.3850151598453522, |
| "learning_rate": 5.36934445373478e-05, |
| "loss": 3.164442443847656, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.4726224783861672, |
| "grad_norm": 0.43482470512390137, |
| "learning_rate": 5.3510896576876924e-05, |
| "loss": 3.1658775329589846, |
| "step": 7665 |
| }, |
| { |
| "epoch": 1.473583093179635, |
| "grad_norm": 0.40651193261146545, |
| "learning_rate": 5.3328592068312565e-05, |
| "loss": 3.164666748046875, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.4745437079731027, |
| "grad_norm": 0.33272117376327515, |
| "learning_rate": 5.3146531471627737e-05, |
| "loss": 3.169040298461914, |
| "step": 7675 |
| }, |
| { |
| "epoch": 1.4755043227665707, |
| "grad_norm": 0.3396502435207367, |
| "learning_rate": 5.29647152461799e-05, |
| "loss": 3.1710128784179688, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.4764649375600385, |
| "grad_norm": 0.27867111563682556, |
| "learning_rate": 5.278314385071011e-05, |
| "loss": 3.169612693786621, |
| "step": 7685 |
| }, |
| { |
| "epoch": 1.4774255523535063, |
| "grad_norm": 0.5024138689041138, |
| "learning_rate": 5.260181774334165e-05, |
| "loss": 3.1680776596069338, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.478386167146974, |
| "grad_norm": 0.5927129983901978, |
| "learning_rate": 5.2420737381578814e-05, |
| "loss": 3.1725555419921876, |
| "step": 7695 |
| }, |
| { |
| "epoch": 1.4793467819404418, |
| "grad_norm": 0.2990652322769165, |
| "learning_rate": 5.223990322230596e-05, |
| "loss": 3.1685482025146485, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.4803073967339098, |
| "grad_norm": 0.41576090455055237, |
| "learning_rate": 5.205931572178625e-05, |
| "loss": 3.1704280853271483, |
| "step": 7705 |
| }, |
| { |
| "epoch": 1.4812680115273775, |
| "grad_norm": 0.33696505427360535, |
| "learning_rate": 5.187897533566047e-05, |
| "loss": 3.1678043365478517, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.4822286263208453, |
| "grad_norm": 0.33144330978393555, |
| "learning_rate": 5.169888251894587e-05, |
| "loss": 3.1680809020996095, |
| "step": 7715 |
| }, |
| { |
| "epoch": 1.4831892411143133, |
| "grad_norm": 0.35232898592948914, |
| "learning_rate": 5.151903772603517e-05, |
| "loss": 3.1694143295288084, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.484149855907781, |
| "grad_norm": 0.482440710067749, |
| "learning_rate": 5.1339441410695225e-05, |
| "loss": 3.167070007324219, |
| "step": 7725 |
| }, |
| { |
| "epoch": 1.4851104707012488, |
| "grad_norm": 0.44148990511894226, |
| "learning_rate": 5.116009402606591e-05, |
| "loss": 3.1692365646362304, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.4860710854947166, |
| "grad_norm": 0.4089405834674835, |
| "learning_rate": 5.0980996024659075e-05, |
| "loss": 3.165630912780762, |
| "step": 7735 |
| }, |
| { |
| "epoch": 1.4870317002881843, |
| "grad_norm": 0.3344769775867462, |
| "learning_rate": 5.0802147858357386e-05, |
| "loss": 3.1695476531982423, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.4879923150816523, |
| "grad_norm": 0.389249324798584, |
| "learning_rate": 5.0623549978413166e-05, |
| "loss": 3.171805000305176, |
| "step": 7745 |
| }, |
| { |
| "epoch": 1.48895292987512, |
| "grad_norm": 0.3440057635307312, |
| "learning_rate": 5.0445202835447056e-05, |
| "loss": 3.1670265197753906, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.4899135446685878, |
| "grad_norm": 0.3163948059082031, |
| "learning_rate": 5.026710687944728e-05, |
| "loss": 3.1670303344726562, |
| "step": 7755 |
| }, |
| { |
| "epoch": 1.4908741594620558, |
| "grad_norm": 0.3952416777610779, |
| "learning_rate": 5.0089262559768246e-05, |
| "loss": 3.171236038208008, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.4918347742555236, |
| "grad_norm": 0.3361209034919739, |
| "learning_rate": 4.9911670325129304e-05, |
| "loss": 3.1686822891235353, |
| "step": 7765 |
| }, |
| { |
| "epoch": 1.4927953890489913, |
| "grad_norm": 0.35905590653419495, |
| "learning_rate": 4.9734330623613924e-05, |
| "loss": 3.1694427490234376, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.493756003842459, |
| "grad_norm": 0.4923272132873535, |
| "learning_rate": 4.955724390266841e-05, |
| "loss": 3.171158218383789, |
| "step": 7775 |
| }, |
| { |
| "epoch": 1.4947166186359269, |
| "grad_norm": 0.3285467326641083, |
| "learning_rate": 4.9380410609100674e-05, |
| "loss": 3.1719661712646485, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.4956772334293948, |
| "grad_norm": 0.35682541131973267, |
| "learning_rate": 4.920383118907929e-05, |
| "loss": 3.1671146392822265, |
| "step": 7785 |
| }, |
| { |
| "epoch": 1.4966378482228626, |
| "grad_norm": 0.5638254880905151, |
| "learning_rate": 4.902750608813222e-05, |
| "loss": 3.1699798583984373, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.4975984630163304, |
| "grad_norm": 0.2942904531955719, |
| "learning_rate": 4.885143575114587e-05, |
| "loss": 3.1689422607421873, |
| "step": 7795 |
| }, |
| { |
| "epoch": 1.4985590778097984, |
| "grad_norm": 0.3493422269821167, |
| "learning_rate": 4.8675620622363645e-05, |
| "loss": 3.168619918823242, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.4995196926032661, |
| "grad_norm": 0.40712451934814453, |
| "learning_rate": 4.850006114538519e-05, |
| "loss": 3.169388771057129, |
| "step": 7805 |
| }, |
| { |
| "epoch": 1.5004803073967339, |
| "grad_norm": 0.31108608841896057, |
| "learning_rate": 4.8324757763165075e-05, |
| "loss": 3.1686771392822264, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.5014409221902016, |
| "grad_norm": 0.5020172595977783, |
| "learning_rate": 4.814971091801179e-05, |
| "loss": 3.1644512176513673, |
| "step": 7815 |
| }, |
| { |
| "epoch": 1.5024015369836694, |
| "grad_norm": 0.4801461696624756, |
| "learning_rate": 4.7974921051586385e-05, |
| "loss": 3.1699331283569334, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.5033621517771374, |
| "grad_norm": 0.31796136498451233, |
| "learning_rate": 4.780038860490164e-05, |
| "loss": 3.1699861526489257, |
| "step": 7825 |
| }, |
| { |
| "epoch": 1.5043227665706052, |
| "grad_norm": 0.3169126510620117, |
| "learning_rate": 4.762611401832089e-05, |
| "loss": 3.1705883026123045, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.5052833813640731, |
| "grad_norm": 0.3533933758735657, |
| "learning_rate": 4.745209773155671e-05, |
| "loss": 3.165675926208496, |
| "step": 7835 |
| }, |
| { |
| "epoch": 1.506243996157541, |
| "grad_norm": 0.4423210024833679, |
| "learning_rate": 4.727834018367007e-05, |
| "loss": 3.1672183990478517, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.5072046109510087, |
| "grad_norm": 0.33951136469841003, |
| "learning_rate": 4.710484181306912e-05, |
| "loss": 3.169887733459473, |
| "step": 7845 |
| }, |
| { |
| "epoch": 1.5081652257444764, |
| "grad_norm": 0.298172265291214, |
| "learning_rate": 4.693160305750801e-05, |
| "loss": 3.1656867980957033, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.5091258405379442, |
| "grad_norm": 0.3188192546367645, |
| "learning_rate": 4.675862435408591e-05, |
| "loss": 3.1662307739257813, |
| "step": 7855 |
| }, |
| { |
| "epoch": 1.510086455331412, |
| "grad_norm": 0.3231668174266815, |
| "learning_rate": 4.6585906139245834e-05, |
| "loss": 3.1684810638427736, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.51104707012488, |
| "grad_norm": 0.46347707509994507, |
| "learning_rate": 4.641344884877362e-05, |
| "loss": 3.1662145614624024, |
| "step": 7865 |
| }, |
| { |
| "epoch": 1.5120076849183477, |
| "grad_norm": 0.4303688704967499, |
| "learning_rate": 4.6241252917796576e-05, |
| "loss": 3.166950798034668, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.5129682997118157, |
| "grad_norm": 0.3407163918018341, |
| "learning_rate": 4.6069318780782765e-05, |
| "loss": 3.1677284240722656, |
| "step": 7875 |
| }, |
| { |
| "epoch": 1.5139289145052834, |
| "grad_norm": 0.2888704240322113, |
| "learning_rate": 4.589764687153967e-05, |
| "loss": 3.162090301513672, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.5148895292987512, |
| "grad_norm": 0.3005317747592926, |
| "learning_rate": 4.5726237623213155e-05, |
| "loss": 3.1687442779541017, |
| "step": 7885 |
| }, |
| { |
| "epoch": 1.515850144092219, |
| "grad_norm": 0.2978266477584839, |
| "learning_rate": 4.555509146828624e-05, |
| "loss": 3.1686517715454103, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.5168107588856867, |
| "grad_norm": 0.2926294505596161, |
| "learning_rate": 4.53842088385783e-05, |
| "loss": 3.170303726196289, |
| "step": 7895 |
| }, |
| { |
| "epoch": 1.5177713736791547, |
| "grad_norm": 0.47925063967704773, |
| "learning_rate": 4.521359016524376e-05, |
| "loss": 3.169039726257324, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.5187319884726225, |
| "grad_norm": 0.4029441475868225, |
| "learning_rate": 4.5043235878770965e-05, |
| "loss": 3.166218376159668, |
| "step": 7905 |
| }, |
| { |
| "epoch": 1.5196926032660905, |
| "grad_norm": 0.31125885248184204, |
| "learning_rate": 4.4873146408981295e-05, |
| "loss": 3.1654186248779297, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.5206532180595582, |
| "grad_norm": 0.3251868784427643, |
| "learning_rate": 4.4703322185027926e-05, |
| "loss": 3.1693817138671876, |
| "step": 7915 |
| }, |
| { |
| "epoch": 1.521613832853026, |
| "grad_norm": 0.31480100750923157, |
| "learning_rate": 4.453376363539481e-05, |
| "loss": 3.1616186141967773, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.5225744476464937, |
| "grad_norm": 0.3208870589733124, |
| "learning_rate": 4.436447118789555e-05, |
| "loss": 3.168798637390137, |
| "step": 7925 |
| }, |
| { |
| "epoch": 1.5235350624399615, |
| "grad_norm": 0.3242574632167816, |
| "learning_rate": 4.419544526967238e-05, |
| "loss": 3.171648406982422, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.5244956772334293, |
| "grad_norm": 0.3922894597053528, |
| "learning_rate": 4.402668630719504e-05, |
| "loss": 3.1695817947387694, |
| "step": 7935 |
| }, |
| { |
| "epoch": 1.5254562920268973, |
| "grad_norm": 0.3969297409057617, |
| "learning_rate": 4.385819472625963e-05, |
| "loss": 3.1659820556640623, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.526416906820365, |
| "grad_norm": 0.3534747362136841, |
| "learning_rate": 4.368997095198775e-05, |
| "loss": 3.164035415649414, |
| "step": 7945 |
| }, |
| { |
| "epoch": 1.527377521613833, |
| "grad_norm": 0.387599915266037, |
| "learning_rate": 4.352201540882523e-05, |
| "loss": 3.166557502746582, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.5283381364073008, |
| "grad_norm": 0.3567873537540436, |
| "learning_rate": 4.33543285205412e-05, |
| "loss": 3.1643226623535154, |
| "step": 7955 |
| }, |
| { |
| "epoch": 1.5292987512007685, |
| "grad_norm": 0.3381181061267853, |
| "learning_rate": 4.318691071022676e-05, |
| "loss": 3.1638862609863283, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.5302593659942363, |
| "grad_norm": 0.2984130084514618, |
| "learning_rate": 4.301976240029428e-05, |
| "loss": 3.1657569885253904, |
| "step": 7965 |
| }, |
| { |
| "epoch": 1.531219980787704, |
| "grad_norm": 0.2934906780719757, |
| "learning_rate": 4.285288401247614e-05, |
| "loss": 3.165040969848633, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.5321805955811718, |
| "grad_norm": 0.2868797779083252, |
| "learning_rate": 4.268627596782354e-05, |
| "loss": 3.166617202758789, |
| "step": 7975 |
| }, |
| { |
| "epoch": 1.5331412103746398, |
| "grad_norm": 0.40537068247795105, |
| "learning_rate": 4.251993868670569e-05, |
| "loss": 3.1657033920288087, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.5341018251681076, |
| "grad_norm": 0.3606870770454407, |
| "learning_rate": 4.235387258880871e-05, |
| "loss": 3.1681283950805663, |
| "step": 7985 |
| }, |
| { |
| "epoch": 1.5350624399615755, |
| "grad_norm": 0.3049268126487732, |
| "learning_rate": 4.218807809313428e-05, |
| "loss": 3.165813446044922, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.5360230547550433, |
| "grad_norm": 0.3047685921192169, |
| "learning_rate": 4.202255561799897e-05, |
| "loss": 3.1700771331787108, |
| "step": 7995 |
| }, |
| { |
| "epoch": 1.536983669548511, |
| "grad_norm": 0.32123667001724243, |
| "learning_rate": 4.1857305581032974e-05, |
| "loss": 3.16713924407959, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.5379442843419788, |
| "grad_norm": 0.31456029415130615, |
| "learning_rate": 4.1692328399179134e-05, |
| "loss": 3.1665233612060546, |
| "step": 8005 |
| }, |
| { |
| "epoch": 1.5389048991354466, |
| "grad_norm": 0.2823289632797241, |
| "learning_rate": 4.1527624488691706e-05, |
| "loss": 3.165792465209961, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.5398655139289144, |
| "grad_norm": 0.41505372524261475, |
| "learning_rate": 4.1363194265135584e-05, |
| "loss": 3.165696907043457, |
| "step": 8015 |
| }, |
| { |
| "epoch": 1.5408261287223823, |
| "grad_norm": 0.3873119056224823, |
| "learning_rate": 4.1199038143385114e-05, |
| "loss": 3.1616024017333983, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.54178674351585, |
| "grad_norm": 0.29045772552490234, |
| "learning_rate": 4.1035156537623056e-05, |
| "loss": 3.163679504394531, |
| "step": 8025 |
| }, |
| { |
| "epoch": 1.542747358309318, |
| "grad_norm": 0.31360870599746704, |
| "learning_rate": 4.087154986133944e-05, |
| "loss": 3.1689876556396483, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.5437079731027858, |
| "grad_norm": 0.34343641996383667, |
| "learning_rate": 4.070821852733074e-05, |
| "loss": 3.1644660949707033, |
| "step": 8035 |
| }, |
| { |
| "epoch": 1.5446685878962536, |
| "grad_norm": 0.3275468647480011, |
| "learning_rate": 4.054516294769871e-05, |
| "loss": 3.161859321594238, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.5456292026897214, |
| "grad_norm": 0.3492441773414612, |
| "learning_rate": 4.038238353384919e-05, |
| "loss": 3.166157531738281, |
| "step": 8045 |
| }, |
| { |
| "epoch": 1.5465898174831891, |
| "grad_norm": 0.3080005645751953, |
| "learning_rate": 4.021988069649138e-05, |
| "loss": 3.169676399230957, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.547550432276657, |
| "grad_norm": 0.2594451904296875, |
| "learning_rate": 4.0057654845636714e-05, |
| "loss": 3.170206642150879, |
| "step": 8055 |
| }, |
| { |
| "epoch": 1.5485110470701249, |
| "grad_norm": 0.2814268469810486, |
| "learning_rate": 3.989570639059753e-05, |
| "loss": 3.164537811279297, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.5494716618635929, |
| "grad_norm": 0.3419535756111145, |
| "learning_rate": 3.973403573998647e-05, |
| "loss": 3.169506645202637, |
| "step": 8065 |
| }, |
| { |
| "epoch": 1.5504322766570606, |
| "grad_norm": 0.3430168032646179, |
| "learning_rate": 3.957264330171511e-05, |
| "loss": 3.1708099365234377, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.5513928914505284, |
| "grad_norm": 0.3074200749397278, |
| "learning_rate": 3.941152948299321e-05, |
| "loss": 3.1663097381591796, |
| "step": 8075 |
| }, |
| { |
| "epoch": 1.5523535062439962, |
| "grad_norm": 0.29986321926116943, |
| "learning_rate": 3.9250694690327383e-05, |
| "loss": 3.1678709030151366, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.553314121037464, |
| "grad_norm": 0.305226594209671, |
| "learning_rate": 3.9090139329520327e-05, |
| "loss": 3.163295364379883, |
| "step": 8085 |
| }, |
| { |
| "epoch": 1.5542747358309317, |
| "grad_norm": 0.27455848455429077, |
| "learning_rate": 3.8929863805669706e-05, |
| "loss": 3.163608741760254, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.5552353506243997, |
| "grad_norm": 0.30126896500587463, |
| "learning_rate": 3.876986852316715e-05, |
| "loss": 3.1656238555908205, |
| "step": 8095 |
| }, |
| { |
| "epoch": 1.5561959654178674, |
| "grad_norm": 0.3914654552936554, |
| "learning_rate": 3.861015388569709e-05, |
| "loss": 3.1641334533691405, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.5571565802113354, |
| "grad_norm": 0.3612956404685974, |
| "learning_rate": 3.845072029623598e-05, |
| "loss": 3.1632358551025392, |
| "step": 8105 |
| }, |
| { |
| "epoch": 1.5581171950048032, |
| "grad_norm": 0.2885013222694397, |
| "learning_rate": 3.8291568157051154e-05, |
| "loss": 3.1646907806396483, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.559077809798271, |
| "grad_norm": 0.29719772934913635, |
| "learning_rate": 3.8132697869699705e-05, |
| "loss": 3.1659112930297852, |
| "step": 8115 |
| }, |
| { |
| "epoch": 1.5600384245917387, |
| "grad_norm": 0.308212012052536, |
| "learning_rate": 3.797410983502766e-05, |
| "loss": 3.1631364822387695, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.5609990393852065, |
| "grad_norm": 0.30908966064453125, |
| "learning_rate": 3.781580445316906e-05, |
| "loss": 3.1642078399658202, |
| "step": 8125 |
| }, |
| { |
| "epoch": 1.5619596541786742, |
| "grad_norm": 0.3072703182697296, |
| "learning_rate": 3.765778212354445e-05, |
| "loss": 3.1633129119873047, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.5629202689721422, |
| "grad_norm": 0.3363180458545685, |
| "learning_rate": 3.7500043244860475e-05, |
| "loss": 3.16600341796875, |
| "step": 8135 |
| }, |
| { |
| "epoch": 1.56388088376561, |
| "grad_norm": 0.41240906715393066, |
| "learning_rate": 3.7342588215108446e-05, |
| "loss": 3.168244743347168, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.564841498559078, |
| "grad_norm": 0.3254190683364868, |
| "learning_rate": 3.7185417431563644e-05, |
| "loss": 3.1647628784179687, |
| "step": 8145 |
| }, |
| { |
| "epoch": 1.5658021133525457, |
| "grad_norm": 0.3488558232784271, |
| "learning_rate": 3.702853129078398e-05, |
| "loss": 3.1683582305908202, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.5667627281460135, |
| "grad_norm": 0.34358084201812744, |
| "learning_rate": 3.6871930188609325e-05, |
| "loss": 3.1676990509033205, |
| "step": 8155 |
| }, |
| { |
| "epoch": 1.5677233429394812, |
| "grad_norm": 0.3370071053504944, |
| "learning_rate": 3.671561452016033e-05, |
| "loss": 3.1652057647705076, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.568683957732949, |
| "grad_norm": 0.23271813988685608, |
| "learning_rate": 3.655958467983749e-05, |
| "loss": 3.164310073852539, |
| "step": 8165 |
| }, |
| { |
| "epoch": 1.5696445725264168, |
| "grad_norm": 0.33882594108581543, |
| "learning_rate": 3.6403841061320026e-05, |
| "loss": 3.1641658782958983, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.5706051873198847, |
| "grad_norm": 0.25975945591926575, |
| "learning_rate": 3.6248384057565104e-05, |
| "loss": 3.1639142990112306, |
| "step": 8175 |
| }, |
| { |
| "epoch": 1.5715658021133525, |
| "grad_norm": 0.3010103404521942, |
| "learning_rate": 3.6093214060806686e-05, |
| "loss": 3.166419792175293, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.5725264169068205, |
| "grad_norm": 0.26971593499183655, |
| "learning_rate": 3.593833146255461e-05, |
| "loss": 3.1657012939453124, |
| "step": 8185 |
| }, |
| { |
| "epoch": 1.5734870317002883, |
| "grad_norm": 0.2859204411506653, |
| "learning_rate": 3.5783736653593546e-05, |
| "loss": 3.163465118408203, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.574447646493756, |
| "grad_norm": 0.2561207115650177, |
| "learning_rate": 3.56294300239821e-05, |
| "loss": 3.164503288269043, |
| "step": 8195 |
| }, |
| { |
| "epoch": 1.5754082612872238, |
| "grad_norm": 0.22760829329490662, |
| "learning_rate": 3.547541196305166e-05, |
| "loss": 3.1677932739257812, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.5763688760806915, |
| "grad_norm": 0.3261941969394684, |
| "learning_rate": 3.53216828594056e-05, |
| "loss": 3.164171600341797, |
| "step": 8205 |
| }, |
| { |
| "epoch": 1.5773294908741593, |
| "grad_norm": 0.3237496018409729, |
| "learning_rate": 3.5168243100918254e-05, |
| "loss": 3.1644121170043946, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.5782901056676273, |
| "grad_norm": 0.308450311422348, |
| "learning_rate": 3.501509307473391e-05, |
| "loss": 3.167286682128906, |
| "step": 8215 |
| }, |
| { |
| "epoch": 1.579250720461095, |
| "grad_norm": 0.2737598717212677, |
| "learning_rate": 3.486223316726569e-05, |
| "loss": 3.159839630126953, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.580211335254563, |
| "grad_norm": 0.2531875967979431, |
| "learning_rate": 3.470966376419489e-05, |
| "loss": 3.165401268005371, |
| "step": 8225 |
| }, |
| { |
| "epoch": 1.5811719500480308, |
| "grad_norm": 0.3886658847332001, |
| "learning_rate": 3.455738525046976e-05, |
| "loss": 3.164730453491211, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.5821325648414986, |
| "grad_norm": 0.32052338123321533, |
| "learning_rate": 3.440539801030463e-05, |
| "loss": 3.1636079788208007, |
| "step": 8235 |
| }, |
| { |
| "epoch": 1.5830931796349663, |
| "grad_norm": 0.3415239751338959, |
| "learning_rate": 3.425370242717887e-05, |
| "loss": 3.1651962280273436, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.584053794428434, |
| "grad_norm": 0.27457112073898315, |
| "learning_rate": 3.410229888383597e-05, |
| "loss": 3.169073486328125, |
| "step": 8245 |
| }, |
| { |
| "epoch": 1.585014409221902, |
| "grad_norm": 0.3607783615589142, |
| "learning_rate": 3.395118776228265e-05, |
| "loss": 3.1643707275390627, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.5859750240153698, |
| "grad_norm": 0.44193530082702637, |
| "learning_rate": 3.380036944378775e-05, |
| "loss": 3.165296936035156, |
| "step": 8255 |
| }, |
| { |
| "epoch": 1.5869356388088378, |
| "grad_norm": 0.27032527327537537, |
| "learning_rate": 3.364984430888138e-05, |
| "loss": 3.1653570175170898, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.5878962536023056, |
| "grad_norm": 0.3761611580848694, |
| "learning_rate": 3.34996127373539e-05, |
| "loss": 3.1637210845947266, |
| "step": 8265 |
| }, |
| { |
| "epoch": 1.5888568683957733, |
| "grad_norm": 0.2229749858379364, |
| "learning_rate": 3.3349675108254946e-05, |
| "loss": 3.165303421020508, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.589817483189241, |
| "grad_norm": 0.23744769394397736, |
| "learning_rate": 3.320003179989254e-05, |
| "loss": 3.16616268157959, |
| "step": 8275 |
| }, |
| { |
| "epoch": 1.5907780979827089, |
| "grad_norm": 0.28989988565444946, |
| "learning_rate": 3.305068318983211e-05, |
| "loss": 3.1658939361572265, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.5917387127761766, |
| "grad_norm": 0.2983841300010681, |
| "learning_rate": 3.290162965489558e-05, |
| "loss": 3.1631855010986327, |
| "step": 8285 |
| }, |
| { |
| "epoch": 1.5926993275696446, |
| "grad_norm": 0.2570306658744812, |
| "learning_rate": 3.275287157116021e-05, |
| "loss": 3.1675746917724608, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.5936599423631124, |
| "grad_norm": 0.3952568471431732, |
| "learning_rate": 3.260440931395794e-05, |
| "loss": 3.1651771545410154, |
| "step": 8295 |
| }, |
| { |
| "epoch": 1.5946205571565804, |
| "grad_norm": 0.3825446367263794, |
| "learning_rate": 3.24562432578743e-05, |
| "loss": 3.1654695510864257, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.5955811719500481, |
| "grad_norm": 0.2898649275302887, |
| "learning_rate": 3.230837377674746e-05, |
| "loss": 3.1621742248535156, |
| "step": 8305 |
| }, |
| { |
| "epoch": 1.5965417867435159, |
| "grad_norm": 0.24470216035842896, |
| "learning_rate": 3.216080124366724e-05, |
| "loss": 3.1656517028808593, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.5975024015369836, |
| "grad_norm": 0.23829184472560883, |
| "learning_rate": 3.20135260309743e-05, |
| "loss": 3.166254425048828, |
| "step": 8315 |
| }, |
| { |
| "epoch": 1.5984630163304514, |
| "grad_norm": 0.2703918218612671, |
| "learning_rate": 3.186654851025911e-05, |
| "loss": 3.1674873352050783, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.5994236311239192, |
| "grad_norm": 0.2798570394515991, |
| "learning_rate": 3.171986905236104e-05, |
| "loss": 3.16519718170166, |
| "step": 8325 |
| }, |
| { |
| "epoch": 1.6003842459173871, |
| "grad_norm": 0.29663723707199097, |
| "learning_rate": 3.15734880273674e-05, |
| "loss": 3.1651836395263673, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.601344860710855, |
| "grad_norm": 0.2850891351699829, |
| "learning_rate": 3.142740580461261e-05, |
| "loss": 3.1634387969970703, |
| "step": 8335 |
| }, |
| { |
| "epoch": 1.602305475504323, |
| "grad_norm": 0.3124440610408783, |
| "learning_rate": 3.128162275267697e-05, |
| "loss": 3.1671958923339845, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.6032660902977907, |
| "grad_norm": 0.3228466808795929, |
| "learning_rate": 3.113613923938614e-05, |
| "loss": 3.1625482559204103, |
| "step": 8345 |
| }, |
| { |
| "epoch": 1.6042267050912584, |
| "grad_norm": 0.23934929072856903, |
| "learning_rate": 3.0990955631809965e-05, |
| "loss": 3.1659454345703124, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.6051873198847262, |
| "grad_norm": 0.2973260283470154, |
| "learning_rate": 3.08460722962616e-05, |
| "loss": 3.167291259765625, |
| "step": 8355 |
| }, |
| { |
| "epoch": 1.606147934678194, |
| "grad_norm": 0.26656073331832886, |
| "learning_rate": 3.070148959829649e-05, |
| "loss": 3.165090560913086, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.6071085494716617, |
| "grad_norm": 0.25156885385513306, |
| "learning_rate": 3.055720790271164e-05, |
| "loss": 3.1673526763916016, |
| "step": 8365 |
| }, |
| { |
| "epoch": 1.6080691642651297, |
| "grad_norm": 0.2427086979150772, |
| "learning_rate": 3.0413227573544592e-05, |
| "loss": 3.167298698425293, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.6090297790585975, |
| "grad_norm": 0.33012548089027405, |
| "learning_rate": 3.026954897407252e-05, |
| "loss": 3.16772403717041, |
| "step": 8375 |
| }, |
| { |
| "epoch": 1.6099903938520654, |
| "grad_norm": 0.19765010476112366, |
| "learning_rate": 3.01261724668112e-05, |
| "loss": 3.1641408920288088, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.6109510086455332, |
| "grad_norm": 0.25813835859298706, |
| "learning_rate": 2.9983098413514284e-05, |
| "loss": 3.168695068359375, |
| "step": 8385 |
| }, |
| { |
| "epoch": 1.611911623439001, |
| "grad_norm": 0.2662206292152405, |
| "learning_rate": 2.9840327175172295e-05, |
| "loss": 3.1604537963867188, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.6128722382324687, |
| "grad_norm": 0.2579902410507202, |
| "learning_rate": 2.969785911201172e-05, |
| "loss": 3.1639808654785155, |
| "step": 8395 |
| }, |
| { |
| "epoch": 1.6138328530259365, |
| "grad_norm": 0.27471011877059937, |
| "learning_rate": 2.9555694583494095e-05, |
| "loss": 3.1644439697265625, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.6147934678194045, |
| "grad_norm": 0.2601883113384247, |
| "learning_rate": 2.9413833948315163e-05, |
| "loss": 3.1667598724365233, |
| "step": 8405 |
| }, |
| { |
| "epoch": 1.6157540826128722, |
| "grad_norm": 0.27512237429618835, |
| "learning_rate": 2.9272277564403746e-05, |
| "loss": 3.1641109466552733, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.6167146974063402, |
| "grad_norm": 0.25258708000183105, |
| "learning_rate": 2.9131025788921193e-05, |
| "loss": 3.1663232803344727, |
| "step": 8415 |
| }, |
| { |
| "epoch": 1.617675312199808, |
| "grad_norm": 0.22575309872627258, |
| "learning_rate": 2.8990078978260216e-05, |
| "loss": 3.166961669921875, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.6186359269932757, |
| "grad_norm": 0.24490588903427124, |
| "learning_rate": 2.8849437488044118e-05, |
| "loss": 3.1663583755493163, |
| "step": 8425 |
| }, |
| { |
| "epoch": 1.6195965417867435, |
| "grad_norm": 0.2155570238828659, |
| "learning_rate": 2.8709101673125728e-05, |
| "loss": 3.1656875610351562, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.6205571565802113, |
| "grad_norm": 0.2795376479625702, |
| "learning_rate": 2.856907188758674e-05, |
| "loss": 3.162942314147949, |
| "step": 8435 |
| }, |
| { |
| "epoch": 1.621517771373679, |
| "grad_norm": 0.3358766734600067, |
| "learning_rate": 2.8429348484736658e-05, |
| "loss": 3.1658599853515623, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.622478386167147, |
| "grad_norm": 0.2666330933570862, |
| "learning_rate": 2.8289931817112027e-05, |
| "loss": 3.1691190719604494, |
| "step": 8445 |
| }, |
| { |
| "epoch": 1.6234390009606148, |
| "grad_norm": 0.2384926676750183, |
| "learning_rate": 2.8150822236475283e-05, |
| "loss": 3.1659183502197266, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.6243996157540828, |
| "grad_norm": 0.25563499331474304, |
| "learning_rate": 2.8012020093814252e-05, |
| "loss": 3.1584651947021483, |
| "step": 8455 |
| }, |
| { |
| "epoch": 1.6253602305475505, |
| "grad_norm": 0.2299119532108307, |
| "learning_rate": 2.7873525739340973e-05, |
| "loss": 3.1645713806152345, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.6263208453410183, |
| "grad_norm": 0.26834285259246826, |
| "learning_rate": 2.773533952249088e-05, |
| "loss": 3.166657257080078, |
| "step": 8465 |
| }, |
| { |
| "epoch": 1.627281460134486, |
| "grad_norm": 0.2765657901763916, |
| "learning_rate": 2.7597461791921987e-05, |
| "loss": 3.165719223022461, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.6282420749279538, |
| "grad_norm": 0.26235702633857727, |
| "learning_rate": 2.745989289551401e-05, |
| "loss": 3.163796615600586, |
| "step": 8475 |
| }, |
| { |
| "epoch": 1.6292026897214216, |
| "grad_norm": 0.2585254907608032, |
| "learning_rate": 2.7322633180367286e-05, |
| "loss": 3.1652366638183596, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.6301633045148896, |
| "grad_norm": 0.24528291821479797, |
| "learning_rate": 2.7185682992802215e-05, |
| "loss": 3.165533447265625, |
| "step": 8485 |
| }, |
| { |
| "epoch": 1.6311239193083573, |
| "grad_norm": 0.24676673114299774, |
| "learning_rate": 2.7049042678358157e-05, |
| "loss": 3.162363624572754, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.6320845341018253, |
| "grad_norm": 0.28905969858169556, |
| "learning_rate": 2.6912712581792684e-05, |
| "loss": 3.165178680419922, |
| "step": 8495 |
| }, |
| { |
| "epoch": 1.633045148895293, |
| "grad_norm": 0.25018957257270813, |
| "learning_rate": 2.6776693047080546e-05, |
| "loss": 3.163674736022949, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.6340057636887608, |
| "grad_norm": 0.24032709002494812, |
| "learning_rate": 2.6640984417412996e-05, |
| "loss": 3.1674957275390625, |
| "step": 8505 |
| }, |
| { |
| "epoch": 1.6349663784822286, |
| "grad_norm": 0.2825995981693268, |
| "learning_rate": 2.6505587035196862e-05, |
| "loss": 3.164845085144043, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.6359269932756964, |
| "grad_norm": 0.3082216680049896, |
| "learning_rate": 2.6370501242053655e-05, |
| "loss": 3.164422607421875, |
| "step": 8515 |
| }, |
| { |
| "epoch": 1.6368876080691641, |
| "grad_norm": 0.2581336796283722, |
| "learning_rate": 2.6235727378818617e-05, |
| "loss": 3.1643039703369142, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.637848222862632, |
| "grad_norm": 0.2364456057548523, |
| "learning_rate": 2.6101265785540054e-05, |
| "loss": 3.1647056579589843, |
| "step": 8525 |
| }, |
| { |
| "epoch": 1.6388088376560999, |
| "grad_norm": 0.2648424804210663, |
| "learning_rate": 2.596711680147837e-05, |
| "loss": 3.1666595458984377, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.6397694524495678, |
| "grad_norm": 0.21114224195480347, |
| "learning_rate": 2.5833280765105218e-05, |
| "loss": 3.1652767181396486, |
| "step": 8535 |
| }, |
| { |
| "epoch": 1.6407300672430356, |
| "grad_norm": 0.29811277985572815, |
| "learning_rate": 2.5699758014102627e-05, |
| "loss": 3.1638210296630858, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.6416906820365034, |
| "grad_norm": 0.22697743773460388, |
| "learning_rate": 2.5566548885362233e-05, |
| "loss": 3.1660308837890625, |
| "step": 8545 |
| }, |
| { |
| "epoch": 1.6426512968299711, |
| "grad_norm": 0.2113056480884552, |
| "learning_rate": 2.543365371498434e-05, |
| "loss": 3.1632022857666016, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.643611911623439, |
| "grad_norm": 0.24464410543441772, |
| "learning_rate": 2.5301072838277026e-05, |
| "loss": 3.1658231735229494, |
| "step": 8555 |
| }, |
| { |
| "epoch": 1.6445725264169067, |
| "grad_norm": 0.23001989722251892, |
| "learning_rate": 2.5168806589755497e-05, |
| "loss": 3.163945960998535, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.6455331412103746, |
| "grad_norm": 0.20726712048053741, |
| "learning_rate": 2.503685530314109e-05, |
| "loss": 3.162770080566406, |
| "step": 8565 |
| }, |
| { |
| "epoch": 1.6464937560038426, |
| "grad_norm": 0.23915457725524902, |
| "learning_rate": 2.490521931136036e-05, |
| "loss": 3.1642734527587892, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.6474543707973104, |
| "grad_norm": 0.2551625370979309, |
| "learning_rate": 2.4773898946544473e-05, |
| "loss": 3.1642024993896483, |
| "step": 8575 |
| }, |
| { |
| "epoch": 1.6484149855907781, |
| "grad_norm": 0.2408856302499771, |
| "learning_rate": 2.4642894540028164e-05, |
| "loss": 3.1599807739257812, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.649375600384246, |
| "grad_norm": 0.2513860762119293, |
| "learning_rate": 2.4512206422349024e-05, |
| "loss": 3.163848876953125, |
| "step": 8585 |
| }, |
| { |
| "epoch": 1.6503362151777137, |
| "grad_norm": 0.21447288990020752, |
| "learning_rate": 2.438183492324654e-05, |
| "loss": 3.1619583129882813, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.6512968299711814, |
| "grad_norm": 0.1881914734840393, |
| "learning_rate": 2.4251780371661373e-05, |
| "loss": 3.1636112213134764, |
| "step": 8595 |
| }, |
| { |
| "epoch": 1.6522574447646494, |
| "grad_norm": 0.19982366263866425, |
| "learning_rate": 2.4122043095734518e-05, |
| "loss": 3.1659828186035157, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.6532180595581172, |
| "grad_norm": 0.2119777649641037, |
| "learning_rate": 2.3992623422806444e-05, |
| "loss": 3.161235809326172, |
| "step": 8605 |
| }, |
| { |
| "epoch": 1.6541786743515852, |
| "grad_norm": 0.2393619865179062, |
| "learning_rate": 2.3863521679416237e-05, |
| "loss": 3.1633071899414062, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.655139289145053, |
| "grad_norm": 0.24367739260196686, |
| "learning_rate": 2.3734738191300862e-05, |
| "loss": 3.16229133605957, |
| "step": 8615 |
| }, |
| { |
| "epoch": 1.6560999039385207, |
| "grad_norm": 0.29672330617904663, |
| "learning_rate": 2.36062732833943e-05, |
| "loss": 3.1618356704711914, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.6570605187319885, |
| "grad_norm": 0.24469807744026184, |
| "learning_rate": 2.347812727982661e-05, |
| "loss": 3.161136245727539, |
| "step": 8625 |
| }, |
| { |
| "epoch": 1.6580211335254562, |
| "grad_norm": 0.21228572726249695, |
| "learning_rate": 2.3350300503923352e-05, |
| "loss": 3.1614845275878904, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.658981748318924, |
| "grad_norm": 0.20836694538593292, |
| "learning_rate": 2.3222793278204626e-05, |
| "loss": 3.1641204833984373, |
| "step": 8635 |
| }, |
| { |
| "epoch": 1.659942363112392, |
| "grad_norm": 0.2582319974899292, |
| "learning_rate": 2.309560592438417e-05, |
| "loss": 3.1627979278564453, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.6609029779058597, |
| "grad_norm": 0.23718759417533875, |
| "learning_rate": 2.2968738763368765e-05, |
| "loss": 3.1604846954345702, |
| "step": 8645 |
| }, |
| { |
| "epoch": 1.6618635926993277, |
| "grad_norm": 0.19198426604270935, |
| "learning_rate": 2.2842192115257295e-05, |
| "loss": 3.1641172409057616, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.6628242074927955, |
| "grad_norm": 0.258806049823761, |
| "learning_rate": 2.271596629933992e-05, |
| "loss": 3.16113224029541, |
| "step": 8655 |
| }, |
| { |
| "epoch": 1.6637848222862632, |
| "grad_norm": 0.21029417216777802, |
| "learning_rate": 2.25900616340973e-05, |
| "loss": 3.1632869720458983, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.664745437079731, |
| "grad_norm": 0.21884505450725555, |
| "learning_rate": 2.2464478437199808e-05, |
| "loss": 3.163255310058594, |
| "step": 8665 |
| }, |
| { |
| "epoch": 1.6657060518731988, |
| "grad_norm": 0.22930586338043213, |
| "learning_rate": 2.2339217025506813e-05, |
| "loss": 3.163426399230957, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.2358139008283615, |
| "learning_rate": 2.2214277715065636e-05, |
| "loss": 3.1608362197875977, |
| "step": 8675 |
| }, |
| { |
| "epoch": 1.6676272814601345, |
| "grad_norm": 0.22611092031002045, |
| "learning_rate": 2.2089660821110988e-05, |
| "loss": 3.1634220123291015, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.6685878962536023, |
| "grad_norm": 0.19509707391262054, |
| "learning_rate": 2.1965366658064086e-05, |
| "loss": 3.158903884887695, |
| "step": 8685 |
| }, |
| { |
| "epoch": 1.6695485110470702, |
| "grad_norm": 0.19261038303375244, |
| "learning_rate": 2.1841395539531893e-05, |
| "loss": 3.161344528198242, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.670509125840538, |
| "grad_norm": 0.1918260157108307, |
| "learning_rate": 2.1717747778306177e-05, |
| "loss": 3.1661489486694334, |
| "step": 8695 |
| }, |
| { |
| "epoch": 1.6714697406340058, |
| "grad_norm": 0.20543381571769714, |
| "learning_rate": 2.159442368636297e-05, |
| "loss": 3.1635005950927733, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.6724303554274735, |
| "grad_norm": 0.2433311641216278, |
| "learning_rate": 2.147142357486164e-05, |
| "loss": 3.163587951660156, |
| "step": 8705 |
| }, |
| { |
| "epoch": 1.6733909702209413, |
| "grad_norm": 0.21309548616409302, |
| "learning_rate": 2.1348747754144004e-05, |
| "loss": 3.1650224685668946, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.674351585014409, |
| "grad_norm": 0.2069951891899109, |
| "learning_rate": 2.1226396533733796e-05, |
| "loss": 3.164658546447754, |
| "step": 8715 |
| }, |
| { |
| "epoch": 1.675312199807877, |
| "grad_norm": 0.24692699313163757, |
| "learning_rate": 2.1104370222335688e-05, |
| "loss": 3.1658843994140624, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.6762728146013448, |
| "grad_norm": 0.23039954900741577, |
| "learning_rate": 2.0982669127834622e-05, |
| "loss": 3.1638277053833006, |
| "step": 8725 |
| }, |
| { |
| "epoch": 1.6772334293948128, |
| "grad_norm": 0.2396543025970459, |
| "learning_rate": 2.0861293557294862e-05, |
| "loss": 3.162588119506836, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.6781940441882806, |
| "grad_norm": 0.2332344651222229, |
| "learning_rate": 2.0740243816959452e-05, |
| "loss": 3.163272476196289, |
| "step": 8735 |
| }, |
| { |
| "epoch": 1.6791546589817483, |
| "grad_norm": 0.21478639543056488, |
| "learning_rate": 2.061952021224938e-05, |
| "loss": 3.162495803833008, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.680115273775216, |
| "grad_norm": 0.19731956720352173, |
| "learning_rate": 2.0499123047762576e-05, |
| "loss": 3.162389945983887, |
| "step": 8745 |
| }, |
| { |
| "epoch": 1.6810758885686838, |
| "grad_norm": 0.19572487473487854, |
| "learning_rate": 2.03790526272735e-05, |
| "loss": 3.1616092681884767, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.6820365033621518, |
| "grad_norm": 0.184941828250885, |
| "learning_rate": 2.025930925373213e-05, |
| "loss": 3.1640493392944338, |
| "step": 8755 |
| }, |
| { |
| "epoch": 1.6829971181556196, |
| "grad_norm": 0.1868741363286972, |
| "learning_rate": 2.013989322926331e-05, |
| "loss": 3.164847183227539, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.6839577329490876, |
| "grad_norm": 0.19907650351524353, |
| "learning_rate": 2.0020804855165857e-05, |
| "loss": 3.164986801147461, |
| "step": 8765 |
| }, |
| { |
| "epoch": 1.6849183477425553, |
| "grad_norm": 0.19891948997974396, |
| "learning_rate": 1.9902044431912e-05, |
| "loss": 3.16265869140625, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.685878962536023, |
| "grad_norm": 0.23641318082809448, |
| "learning_rate": 1.9783612259146485e-05, |
| "loss": 3.164572525024414, |
| "step": 8775 |
| }, |
| { |
| "epoch": 1.6868395773294909, |
| "grad_norm": 0.2543870210647583, |
| "learning_rate": 1.9665508635685767e-05, |
| "loss": 3.1619945526123048, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.6878001921229586, |
| "grad_norm": 0.1898890733718872, |
| "learning_rate": 1.9547733859517443e-05, |
| "loss": 3.161852264404297, |
| "step": 8785 |
| }, |
| { |
| "epoch": 1.6887608069164264, |
| "grad_norm": 0.18471160531044006, |
| "learning_rate": 1.9430288227799367e-05, |
| "loss": 3.167014312744141, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.6897214217098944, |
| "grad_norm": 0.1837644726037979, |
| "learning_rate": 1.931317203685893e-05, |
| "loss": 3.1675960540771486, |
| "step": 8795 |
| }, |
| { |
| "epoch": 1.6906820365033621, |
| "grad_norm": 0.16458193957805634, |
| "learning_rate": 1.9196385582192247e-05, |
| "loss": 3.161328125, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.6916426512968301, |
| "grad_norm": 0.18657121062278748, |
| "learning_rate": 1.9079929158463526e-05, |
| "loss": 3.163207244873047, |
| "step": 8805 |
| }, |
| { |
| "epoch": 1.6926032660902979, |
| "grad_norm": 0.2095474749803543, |
| "learning_rate": 1.896380305950434e-05, |
| "loss": 3.1646053314208986, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.6935638808837656, |
| "grad_norm": 0.1963958740234375, |
| "learning_rate": 1.8848007578312686e-05, |
| "loss": 3.164917755126953, |
| "step": 8815 |
| }, |
| { |
| "epoch": 1.6945244956772334, |
| "grad_norm": 0.18170610070228577, |
| "learning_rate": 1.8732543007052452e-05, |
| "loss": 3.1576461791992188, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.6954851104707012, |
| "grad_norm": 0.2065751552581787, |
| "learning_rate": 1.8617409637052606e-05, |
| "loss": 3.166869354248047, |
| "step": 8825 |
| }, |
| { |
| "epoch": 1.696445725264169, |
| "grad_norm": 0.18570922315120697, |
| "learning_rate": 1.8502607758806487e-05, |
| "loss": 3.162985992431641, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.697406340057637, |
| "grad_norm": 0.22870124876499176, |
| "learning_rate": 1.8388137661970948e-05, |
| "loss": 3.165153694152832, |
| "step": 8835 |
| }, |
| { |
| "epoch": 1.6983669548511047, |
| "grad_norm": 0.18405385315418243, |
| "learning_rate": 1.8273999635365806e-05, |
| "loss": 3.162175750732422, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.6993275696445727, |
| "grad_norm": 0.1959793120622635, |
| "learning_rate": 1.8160193966973047e-05, |
| "loss": 3.161460113525391, |
| "step": 8845 |
| }, |
| { |
| "epoch": 1.7002881844380404, |
| "grad_norm": 0.20207500457763672, |
| "learning_rate": 1.8046720943936e-05, |
| "loss": 3.1632549285888674, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.7012487992315082, |
| "grad_norm": 0.20162688195705414, |
| "learning_rate": 1.7933580852558742e-05, |
| "loss": 3.1618804931640625, |
| "step": 8855 |
| }, |
| { |
| "epoch": 1.702209414024976, |
| "grad_norm": 0.18540622293949127, |
| "learning_rate": 1.7820773978305365e-05, |
| "loss": 3.161595916748047, |
| "step": 8860 |
| }, |
| { |
| "epoch": 1.7031700288184437, |
| "grad_norm": 0.19820909202098846, |
| "learning_rate": 1.7708300605799202e-05, |
| "loss": 3.1606048583984374, |
| "step": 8865 |
| }, |
| { |
| "epoch": 1.7041306436119115, |
| "grad_norm": 0.2218032330274582, |
| "learning_rate": 1.7596161018822007e-05, |
| "loss": 3.1620101928710938, |
| "step": 8870 |
| }, |
| { |
| "epoch": 1.7050912584053795, |
| "grad_norm": 0.1894785612821579, |
| "learning_rate": 1.7484355500313568e-05, |
| "loss": 3.161252975463867, |
| "step": 8875 |
| }, |
| { |
| "epoch": 1.7060518731988472, |
| "grad_norm": 0.20882797241210938, |
| "learning_rate": 1.7372884332370677e-05, |
| "loss": 3.162582015991211, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.7070124879923152, |
| "grad_norm": 0.2097301483154297, |
| "learning_rate": 1.7261747796246478e-05, |
| "loss": 3.162227249145508, |
| "step": 8885 |
| }, |
| { |
| "epoch": 1.707973102785783, |
| "grad_norm": 0.18620242178440094, |
| "learning_rate": 1.7150946172349844e-05, |
| "loss": 3.163176345825195, |
| "step": 8890 |
| }, |
| { |
| "epoch": 1.7089337175792507, |
| "grad_norm": 0.1744890809059143, |
| "learning_rate": 1.7040479740244684e-05, |
| "loss": 3.1600067138671877, |
| "step": 8895 |
| }, |
| { |
| "epoch": 1.7098943323727185, |
| "grad_norm": 0.20441320538520813, |
| "learning_rate": 1.693034877864913e-05, |
| "loss": 3.161859130859375, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.7108549471661862, |
| "grad_norm": 0.18480157852172852, |
| "learning_rate": 1.682055356543487e-05, |
| "loss": 3.1619277954101563, |
| "step": 8905 |
| }, |
| { |
| "epoch": 1.7118155619596542, |
| "grad_norm": 0.19677984714508057, |
| "learning_rate": 1.6711094377626495e-05, |
| "loss": 3.1647516250610352, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.712776176753122, |
| "grad_norm": 0.19380095601081848, |
| "learning_rate": 1.66019714914008e-05, |
| "loss": 3.1602848052978514, |
| "step": 8915 |
| }, |
| { |
| "epoch": 1.71373679154659, |
| "grad_norm": 0.17474035918712616, |
| "learning_rate": 1.6493185182085967e-05, |
| "loss": 3.164753532409668, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.7146974063400577, |
| "grad_norm": 0.18277983367443085, |
| "learning_rate": 1.6384735724161045e-05, |
| "loss": 3.162521743774414, |
| "step": 8925 |
| }, |
| { |
| "epoch": 1.7156580211335255, |
| "grad_norm": 0.17841650545597076, |
| "learning_rate": 1.6276623391255146e-05, |
| "loss": 3.1669170379638674, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.7166186359269933, |
| "grad_norm": 0.18692415952682495, |
| "learning_rate": 1.6168848456146793e-05, |
| "loss": 3.1644006729125977, |
| "step": 8935 |
| }, |
| { |
| "epoch": 1.717579250720461, |
| "grad_norm": 0.22790934145450592, |
| "learning_rate": 1.606141119076314e-05, |
| "loss": 3.164959716796875, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.7185398655139288, |
| "grad_norm": 0.19892485439777374, |
| "learning_rate": 1.595431186617948e-05, |
| "loss": 3.1620708465576173, |
| "step": 8945 |
| }, |
| { |
| "epoch": 1.7195004803073968, |
| "grad_norm": 0.19590741395950317, |
| "learning_rate": 1.5847550752618427e-05, |
| "loss": 3.158066177368164, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.7204610951008645, |
| "grad_norm": 0.2052772343158722, |
| "learning_rate": 1.5741128119449153e-05, |
| "loss": 3.161151885986328, |
| "step": 8955 |
| }, |
| { |
| "epoch": 1.7214217098943325, |
| "grad_norm": 0.18683621287345886, |
| "learning_rate": 1.5635044235186906e-05, |
| "loss": 3.1654678344726563, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.7223823246878003, |
| "grad_norm": 0.19518622756004333, |
| "learning_rate": 1.5529299367492208e-05, |
| "loss": 3.160481262207031, |
| "step": 8965 |
| }, |
| { |
| "epoch": 1.723342939481268, |
| "grad_norm": 0.1885916143655777, |
| "learning_rate": 1.5423893783170262e-05, |
| "loss": 3.162572479248047, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.7243035542747358, |
| "grad_norm": 0.18152864277362823, |
| "learning_rate": 1.531882774817007e-05, |
| "loss": 3.1627574920654298, |
| "step": 8975 |
| }, |
| { |
| "epoch": 1.7252641690682036, |
| "grad_norm": 0.17760129272937775, |
| "learning_rate": 1.5214101527584071e-05, |
| "loss": 3.1614215850830076, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.7262247838616713, |
| "grad_norm": 0.20811888575553894, |
| "learning_rate": 1.5109715385647297e-05, |
| "loss": 3.163728141784668, |
| "step": 8985 |
| }, |
| { |
| "epoch": 1.7271853986551393, |
| "grad_norm": 0.18515148758888245, |
| "learning_rate": 1.5005669585736618e-05, |
| "loss": 3.161965179443359, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.728146013448607, |
| "grad_norm": 0.1933458298444748, |
| "learning_rate": 1.4901964390370308e-05, |
| "loss": 3.163407325744629, |
| "step": 8995 |
| }, |
| { |
| "epoch": 1.729106628242075, |
| "grad_norm": 0.20849090814590454, |
| "learning_rate": 1.4798600061207195e-05, |
| "loss": 3.163631057739258, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.7300672430355428, |
| "grad_norm": 0.18622250854969025, |
| "learning_rate": 1.4695576859046127e-05, |
| "loss": 3.159561538696289, |
| "step": 9005 |
| }, |
| { |
| "epoch": 1.7310278578290106, |
| "grad_norm": 0.1835029423236847, |
| "learning_rate": 1.4592895043825126e-05, |
| "loss": 3.163999557495117, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.7319884726224783, |
| "grad_norm": 0.1808691918849945, |
| "learning_rate": 1.449055487462102e-05, |
| "loss": 3.1594869613647463, |
| "step": 9015 |
| }, |
| { |
| "epoch": 1.732949087415946, |
| "grad_norm": 0.16524961590766907, |
| "learning_rate": 1.4388556609648572e-05, |
| "loss": 3.163185691833496, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.7339097022094139, |
| "grad_norm": 0.16896623373031616, |
| "learning_rate": 1.428690050625979e-05, |
| "loss": 3.1632381439208985, |
| "step": 9025 |
| }, |
| { |
| "epoch": 1.7348703170028819, |
| "grad_norm": 0.1609111875295639, |
| "learning_rate": 1.4185586820943506e-05, |
| "loss": 3.1643299102783202, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.7358309317963496, |
| "grad_norm": 0.16877403855323792, |
| "learning_rate": 1.4084615809324523e-05, |
| "loss": 3.164197540283203, |
| "step": 9035 |
| }, |
| { |
| "epoch": 1.7367915465898176, |
| "grad_norm": 0.17374147474765778, |
| "learning_rate": 1.3983987726163087e-05, |
| "loss": 3.1597652435302734, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.7377521613832854, |
| "grad_norm": 0.1869489699602127, |
| "learning_rate": 1.3883702825354138e-05, |
| "loss": 3.160333251953125, |
| "step": 9045 |
| }, |
| { |
| "epoch": 1.7387127761767531, |
| "grad_norm": 0.19941848516464233, |
| "learning_rate": 1.3783761359926771e-05, |
| "loss": 3.1596229553222654, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.739673390970221, |
| "grad_norm": 0.16694855690002441, |
| "learning_rate": 1.3684163582043595e-05, |
| "loss": 3.159475898742676, |
| "step": 9055 |
| }, |
| { |
| "epoch": 1.7406340057636887, |
| "grad_norm": 0.1921820491552353, |
| "learning_rate": 1.3584909742999978e-05, |
| "loss": 3.162578010559082, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.7415946205571564, |
| "grad_norm": 0.17421427369117737, |
| "learning_rate": 1.3486000093223565e-05, |
| "loss": 3.1633106231689454, |
| "step": 9065 |
| }, |
| { |
| "epoch": 1.7425552353506244, |
| "grad_norm": 0.16822272539138794, |
| "learning_rate": 1.3387434882273529e-05, |
| "loss": 3.1628774642944335, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.7435158501440924, |
| "grad_norm": 0.2147689312696457, |
| "learning_rate": 1.3289214358840022e-05, |
| "loss": 3.164061737060547, |
| "step": 9075 |
| }, |
| { |
| "epoch": 1.7444764649375601, |
| "grad_norm": 0.19877059757709503, |
| "learning_rate": 1.3191338770743493e-05, |
| "loss": 3.164179039001465, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.745437079731028, |
| "grad_norm": 0.17470191419124603, |
| "learning_rate": 1.30938083649341e-05, |
| "loss": 3.162880706787109, |
| "step": 9085 |
| }, |
| { |
| "epoch": 1.7463976945244957, |
| "grad_norm": 0.17637498676776886, |
| "learning_rate": 1.2996623387491085e-05, |
| "loss": 3.163307952880859, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.7473583093179634, |
| "grad_norm": 0.17012238502502441, |
| "learning_rate": 1.2899784083622067e-05, |
| "loss": 3.1613311767578125, |
| "step": 9095 |
| }, |
| { |
| "epoch": 1.7483189241114312, |
| "grad_norm": 0.15726859867572784, |
| "learning_rate": 1.2803290697662566e-05, |
| "loss": 3.161721038818359, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.7492795389048992, |
| "grad_norm": 0.1671910136938095, |
| "learning_rate": 1.2707143473075299e-05, |
| "loss": 3.1644134521484375, |
| "step": 9105 |
| }, |
| { |
| "epoch": 1.750240153698367, |
| "grad_norm": 0.17212195694446564, |
| "learning_rate": 1.2611342652449597e-05, |
| "loss": 3.1598962783813476, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.751200768491835, |
| "grad_norm": 0.17955084145069122, |
| "learning_rate": 1.2515888477500708e-05, |
| "loss": 3.161183166503906, |
| "step": 9115 |
| }, |
| { |
| "epoch": 1.7521613832853027, |
| "grad_norm": 0.15037564933300018, |
| "learning_rate": 1.2420781189069346e-05, |
| "loss": 3.164091873168945, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.7531219980787704, |
| "grad_norm": 0.16244594752788544, |
| "learning_rate": 1.2326021027120958e-05, |
| "loss": 3.162504196166992, |
| "step": 9125 |
| }, |
| { |
| "epoch": 1.7540826128722382, |
| "grad_norm": 0.1728479117155075, |
| "learning_rate": 1.2231608230745128e-05, |
| "loss": 3.159666633605957, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.755043227665706, |
| "grad_norm": 0.17677444219589233, |
| "learning_rate": 1.2137543038155034e-05, |
| "loss": 3.16094970703125, |
| "step": 9135 |
| }, |
| { |
| "epoch": 1.7560038424591737, |
| "grad_norm": 0.16001811623573303, |
| "learning_rate": 1.2043825686686798e-05, |
| "loss": 3.1602264404296876, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.7569644572526417, |
| "grad_norm": 0.1979171186685562, |
| "learning_rate": 1.1950456412798954e-05, |
| "loss": 3.160991096496582, |
| "step": 9145 |
| }, |
| { |
| "epoch": 1.7579250720461095, |
| "grad_norm": 0.18909943103790283, |
| "learning_rate": 1.1857435452071706e-05, |
| "loss": 3.164293098449707, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.7588856868395775, |
| "grad_norm": 0.1578613966703415, |
| "learning_rate": 1.1764763039206516e-05, |
| "loss": 3.1651378631591798, |
| "step": 9155 |
| }, |
| { |
| "epoch": 1.7598463016330452, |
| "grad_norm": 0.17254026234149933, |
| "learning_rate": 1.1672439408025409e-05, |
| "loss": 3.1591054916381838, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.760806916426513, |
| "grad_norm": 0.1548478901386261, |
| "learning_rate": 1.1580464791470317e-05, |
| "loss": 3.162530517578125, |
| "step": 9165 |
| }, |
| { |
| "epoch": 1.7617675312199808, |
| "grad_norm": 0.16993309557437897, |
| "learning_rate": 1.1488839421602663e-05, |
| "loss": 3.1617578506469726, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.7627281460134485, |
| "grad_norm": 0.18223033845424652, |
| "learning_rate": 1.1397563529602654e-05, |
| "loss": 3.1608226776123045, |
| "step": 9175 |
| }, |
| { |
| "epoch": 1.7636887608069163, |
| "grad_norm": 0.15401305258274078, |
| "learning_rate": 1.130663734576877e-05, |
| "loss": 3.1587234497070313, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.7646493756003843, |
| "grad_norm": 0.19235272705554962, |
| "learning_rate": 1.1216061099517016e-05, |
| "loss": 3.158829116821289, |
| "step": 9185 |
| }, |
| { |
| "epoch": 1.765609990393852, |
| "grad_norm": 0.1655486822128296, |
| "learning_rate": 1.1125835019380614e-05, |
| "loss": 3.160569190979004, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.76657060518732, |
| "grad_norm": 0.17558416724205017, |
| "learning_rate": 1.1035959333009231e-05, |
| "loss": 3.1613176345825194, |
| "step": 9195 |
| }, |
| { |
| "epoch": 1.7675312199807878, |
| "grad_norm": 0.16277319192886353, |
| "learning_rate": 1.0946434267168375e-05, |
| "loss": 3.1619541168212892, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.7684918347742555, |
| "grad_norm": 0.15174464881420135, |
| "learning_rate": 1.0857260047739025e-05, |
| "loss": 3.1618457794189454, |
| "step": 9205 |
| }, |
| { |
| "epoch": 1.7694524495677233, |
| "grad_norm": 0.14863047003746033, |
| "learning_rate": 1.076843689971687e-05, |
| "loss": 3.1616443634033202, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.770413064361191, |
| "grad_norm": 0.16424240171909332, |
| "learning_rate": 1.0679965047211841e-05, |
| "loss": 3.1608753204345703, |
| "step": 9215 |
| }, |
| { |
| "epoch": 1.7713736791546588, |
| "grad_norm": 0.15550029277801514, |
| "learning_rate": 1.0591844713447479e-05, |
| "loss": 3.15771427154541, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.7723342939481268, |
| "grad_norm": 0.15241944789886475, |
| "learning_rate": 1.0504076120760413e-05, |
| "loss": 3.1600357055664063, |
| "step": 9225 |
| }, |
| { |
| "epoch": 1.7732949087415946, |
| "grad_norm": 0.15490137040615082, |
| "learning_rate": 1.0416659490599871e-05, |
| "loss": 3.163216400146484, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.7742555235350626, |
| "grad_norm": 0.17097872495651245, |
| "learning_rate": 1.0329595043526905e-05, |
| "loss": 3.164168930053711, |
| "step": 9235 |
| }, |
| { |
| "epoch": 1.7752161383285303, |
| "grad_norm": 0.19601082801818848, |
| "learning_rate": 1.024288299921408e-05, |
| "loss": 3.1567230224609375, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.776176753121998, |
| "grad_norm": 0.1483290195465088, |
| "learning_rate": 1.015652357644477e-05, |
| "loss": 3.1643510818481446, |
| "step": 9245 |
| }, |
| { |
| "epoch": 1.7771373679154658, |
| "grad_norm": 0.1415608525276184, |
| "learning_rate": 1.0070516993112676e-05, |
| "loss": 3.157514953613281, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.7780979827089336, |
| "grad_norm": 0.14610423147678375, |
| "learning_rate": 9.984863466221199e-06, |
| "loss": 3.1627222061157227, |
| "step": 9255 |
| }, |
| { |
| "epoch": 1.7790585975024016, |
| "grad_norm": 0.17516079545021057, |
| "learning_rate": 9.899563211883e-06, |
| "loss": 3.1590530395507814, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.7800192122958693, |
| "grad_norm": 0.1540340632200241, |
| "learning_rate": 9.814616445319384e-06, |
| "loss": 3.1593671798706056, |
| "step": 9265 |
| }, |
| { |
| "epoch": 1.7809798270893373, |
| "grad_norm": 0.18057873845100403, |
| "learning_rate": 9.730023380859725e-06, |
| "loss": 3.162105751037598, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.781940441882805, |
| "grad_norm": 0.17139260470867157, |
| "learning_rate": 9.645784231941005e-06, |
| "loss": 3.1630107879638674, |
| "step": 9275 |
| }, |
| { |
| "epoch": 1.7829010566762729, |
| "grad_norm": 0.16848276555538177, |
| "learning_rate": 9.561899211107244e-06, |
| "loss": 3.1598880767822264, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.7838616714697406, |
| "grad_norm": 0.16549763083457947, |
| "learning_rate": 9.478368530008967e-06, |
| "loss": 3.160906219482422, |
| "step": 9285 |
| }, |
| { |
| "epoch": 1.7848222862632084, |
| "grad_norm": 0.15802277624607086, |
| "learning_rate": 9.39519239940264e-06, |
| "loss": 3.161147689819336, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.7857829010566761, |
| "grad_norm": 0.1761743128299713, |
| "learning_rate": 9.31237102915019e-06, |
| "loss": 3.158751678466797, |
| "step": 9295 |
| }, |
| { |
| "epoch": 1.7867435158501441, |
| "grad_norm": 0.1671990603208542, |
| "learning_rate": 9.229904628218427e-06, |
| "loss": 3.1624494552612306, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.7877041306436119, |
| "grad_norm": 0.19958704710006714, |
| "learning_rate": 9.14779340467851e-06, |
| "loss": 3.1618110656738283, |
| "step": 9305 |
| }, |
| { |
| "epoch": 1.7886647454370799, |
| "grad_norm": 0.16844980418682098, |
| "learning_rate": 9.06603756570552e-06, |
| "loss": 3.156951141357422, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.7896253602305476, |
| "grad_norm": 0.14970090985298157, |
| "learning_rate": 8.984637317577782e-06, |
| "loss": 3.1624423980712892, |
| "step": 9315 |
| }, |
| { |
| "epoch": 1.7905859750240154, |
| "grad_norm": 0.15226224064826965, |
| "learning_rate": 8.90359286567654e-06, |
| "loss": 3.1624835968017577, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.7915465898174832, |
| "grad_norm": 0.1699807345867157, |
| "learning_rate": 8.822904414485194e-06, |
| "loss": 3.1621606826782225, |
| "step": 9325 |
| }, |
| { |
| "epoch": 1.792507204610951, |
| "grad_norm": 0.15113097429275513, |
| "learning_rate": 8.742572167589008e-06, |
| "loss": 3.16312255859375, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.7934678194044187, |
| "grad_norm": 0.16513291001319885, |
| "learning_rate": 8.662596327674499e-06, |
| "loss": 3.1624629974365233, |
| "step": 9335 |
| }, |
| { |
| "epoch": 1.7944284341978867, |
| "grad_norm": 0.154737189412117, |
| "learning_rate": 8.582977096528887e-06, |
| "loss": 3.1638771057128907, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.7953890489913544, |
| "grad_norm": 0.18483127653598785, |
| "learning_rate": 8.503714675039663e-06, |
| "loss": 3.16489315032959, |
| "step": 9345 |
| }, |
| { |
| "epoch": 1.7963496637848224, |
| "grad_norm": 0.16609624028205872, |
| "learning_rate": 8.424809263194054e-06, |
| "loss": 3.1592342376708986, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.7973102785782902, |
| "grad_norm": 0.13648296892642975, |
| "learning_rate": 8.346261060078524e-06, |
| "loss": 3.163716506958008, |
| "step": 9355 |
| }, |
| { |
| "epoch": 1.798270893371758, |
| "grad_norm": 0.16231390833854675, |
| "learning_rate": 8.268070263878223e-06, |
| "loss": 3.1625925064086915, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.7992315081652257, |
| "grad_norm": 0.15581952035427094, |
| "learning_rate": 8.190237071876576e-06, |
| "loss": 3.161655235290527, |
| "step": 9365 |
| }, |
| { |
| "epoch": 1.8001921229586935, |
| "grad_norm": 0.15568803250789642, |
| "learning_rate": 8.11276168045471e-06, |
| "loss": 3.1620370864868166, |
| "step": 9370 |
| }, |
| { |
| "epoch": 1.8011527377521612, |
| "grad_norm": 0.1662697196006775, |
| "learning_rate": 8.035644285090958e-06, |
| "loss": 3.1623306274414062, |
| "step": 9375 |
| }, |
| { |
| "epoch": 1.8021133525456292, |
| "grad_norm": 0.16496489942073822, |
| "learning_rate": 7.958885080360445e-06, |
| "loss": 3.1613933563232424, |
| "step": 9380 |
| }, |
| { |
| "epoch": 1.803073967339097, |
| "grad_norm": 0.15053115785121918, |
| "learning_rate": 7.882484259934497e-06, |
| "loss": 3.1627952575683596, |
| "step": 9385 |
| }, |
| { |
| "epoch": 1.804034582132565, |
| "grad_norm": 0.14695799350738525, |
| "learning_rate": 7.806442016580267e-06, |
| "loss": 3.160305404663086, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.8049951969260327, |
| "grad_norm": 0.15005071461200714, |
| "learning_rate": 7.730758542160064e-06, |
| "loss": 3.1611255645751952, |
| "step": 9395 |
| }, |
| { |
| "epoch": 1.8059558117195005, |
| "grad_norm": 0.15226683020591736, |
| "learning_rate": 7.655434027631124e-06, |
| "loss": 3.1629497528076174, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.8069164265129682, |
| "grad_norm": 0.15546758472919464, |
| "learning_rate": 7.580468663044903e-06, |
| "loss": 3.1586400985717775, |
| "step": 9405 |
| }, |
| { |
| "epoch": 1.807877041306436, |
| "grad_norm": 0.1452452838420868, |
| "learning_rate": 7.505862637546684e-06, |
| "loss": 3.160877227783203, |
| "step": 9410 |
| }, |
| { |
| "epoch": 1.808837656099904, |
| "grad_norm": 0.13006171584129333, |
| "learning_rate": 7.43161613937514e-06, |
| "loss": 3.1579397201538084, |
| "step": 9415 |
| }, |
| { |
| "epoch": 1.8097982708933718, |
| "grad_norm": 0.15625065565109253, |
| "learning_rate": 7.3577293558618075e-06, |
| "loss": 3.1598045349121096, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.8107588856868397, |
| "grad_norm": 0.16364313662052155, |
| "learning_rate": 7.284202473430612e-06, |
| "loss": 3.1616336822509767, |
| "step": 9425 |
| }, |
| { |
| "epoch": 1.8117195004803075, |
| "grad_norm": 0.15080556273460388, |
| "learning_rate": 7.211035677597421e-06, |
| "loss": 3.1628395080566407, |
| "step": 9430 |
| }, |
| { |
| "epoch": 1.8126801152737753, |
| "grad_norm": 0.1453072428703308, |
| "learning_rate": 7.138229152969582e-06, |
| "loss": 3.161026382446289, |
| "step": 9435 |
| }, |
| { |
| "epoch": 1.813640730067243, |
| "grad_norm": 0.1453263759613037, |
| "learning_rate": 7.065783083245435e-06, |
| "loss": 3.16085262298584, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.8146013448607108, |
| "grad_norm": 0.13254213333129883, |
| "learning_rate": 6.993697651213798e-06, |
| "loss": 3.1604400634765626, |
| "step": 9445 |
| }, |
| { |
| "epoch": 1.8155619596541785, |
| "grad_norm": 0.1310938000679016, |
| "learning_rate": 6.921973038753647e-06, |
| "loss": 3.158857727050781, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.8165225744476465, |
| "grad_norm": 0.13702644407749176, |
| "learning_rate": 6.8506094268335255e-06, |
| "loss": 3.16072998046875, |
| "step": 9455 |
| }, |
| { |
| "epoch": 1.8174831892411143, |
| "grad_norm": 0.15652552247047424, |
| "learning_rate": 6.779606995511183e-06, |
| "loss": 3.161703109741211, |
| "step": 9460 |
| }, |
| { |
| "epoch": 1.8184438040345823, |
| "grad_norm": 0.1748208999633789, |
| "learning_rate": 6.7089659239329685e-06, |
| "loss": 3.162480926513672, |
| "step": 9465 |
| }, |
| { |
| "epoch": 1.81940441882805, |
| "grad_norm": 0.15371812880039215, |
| "learning_rate": 6.638686390333592e-06, |
| "loss": 3.1619606018066406, |
| "step": 9470 |
| }, |
| { |
| "epoch": 1.8203650336215178, |
| "grad_norm": 0.15862195193767548, |
| "learning_rate": 6.56876857203556e-06, |
| "loss": 3.1612228393554687, |
| "step": 9475 |
| }, |
| { |
| "epoch": 1.8213256484149856, |
| "grad_norm": 0.14958879351615906, |
| "learning_rate": 6.499212645448626e-06, |
| "loss": 3.161460113525391, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.8222862632084533, |
| "grad_norm": 0.14320014417171478, |
| "learning_rate": 6.430018786069574e-06, |
| "loss": 3.1611602783203123, |
| "step": 9485 |
| }, |
| { |
| "epoch": 1.823246878001921, |
| "grad_norm": 0.14477957785129547, |
| "learning_rate": 6.361187168481618e-06, |
| "loss": 3.162501907348633, |
| "step": 9490 |
| }, |
| { |
| "epoch": 1.824207492795389, |
| "grad_norm": 0.14906539022922516, |
| "learning_rate": 6.292717966353955e-06, |
| "loss": 3.1564708709716798, |
| "step": 9495 |
| }, |
| { |
| "epoch": 1.8251681075888568, |
| "grad_norm": 0.1561676263809204, |
| "learning_rate": 6.224611352441444e-06, |
| "loss": 3.164084053039551, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.8261287223823248, |
| "grad_norm": 0.15524768829345703, |
| "learning_rate": 6.156867498584028e-06, |
| "loss": 3.164693832397461, |
| "step": 9505 |
| }, |
| { |
| "epoch": 1.8270893371757926, |
| "grad_norm": 0.13482025265693665, |
| "learning_rate": 6.089486575706431e-06, |
| "loss": 3.161751556396484, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.8280499519692603, |
| "grad_norm": 0.1538052260875702, |
| "learning_rate": 6.022468753817611e-06, |
| "loss": 3.162434196472168, |
| "step": 9515 |
| }, |
| { |
| "epoch": 1.829010566762728, |
| "grad_norm": 0.13271762430667877, |
| "learning_rate": 5.955814202010406e-06, |
| "loss": 3.1618423461914062, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.8299711815561959, |
| "grad_norm": 0.14370056986808777, |
| "learning_rate": 5.88952308846109e-06, |
| "loss": 3.1616491317749023, |
| "step": 9525 |
| }, |
| { |
| "epoch": 1.8309317963496636, |
| "grad_norm": 0.13553740084171295, |
| "learning_rate": 5.823595580428969e-06, |
| "loss": 3.1615522384643553, |
| "step": 9530 |
| }, |
| { |
| "epoch": 1.8318924111431316, |
| "grad_norm": 0.14558495581150055, |
| "learning_rate": 5.758031844255884e-06, |
| "loss": 3.163438415527344, |
| "step": 9535 |
| }, |
| { |
| "epoch": 1.8328530259365994, |
| "grad_norm": 0.1292106956243515, |
| "learning_rate": 5.692832045365858e-06, |
| "loss": 3.1632017135620116, |
| "step": 9540 |
| }, |
| { |
| "epoch": 1.8338136407300674, |
| "grad_norm": 0.13839663565158844, |
| "learning_rate": 5.627996348264685e-06, |
| "loss": 3.1595331192016602, |
| "step": 9545 |
| }, |
| { |
| "epoch": 1.8347742555235351, |
| "grad_norm": 0.13135622441768646, |
| "learning_rate": 5.563524916539508e-06, |
| "loss": 3.161990165710449, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.8357348703170029, |
| "grad_norm": 0.13140968978405, |
| "learning_rate": 5.499417912858289e-06, |
| "loss": 3.159449005126953, |
| "step": 9555 |
| }, |
| { |
| "epoch": 1.8366954851104706, |
| "grad_norm": 0.14151158928871155, |
| "learning_rate": 5.435675498969661e-06, |
| "loss": 3.1604162216186524, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.8376560999039384, |
| "grad_norm": 0.1417761594057083, |
| "learning_rate": 5.372297835702222e-06, |
| "loss": 3.1592178344726562, |
| "step": 9565 |
| }, |
| { |
| "epoch": 1.8386167146974062, |
| "grad_norm": 0.13075558841228485, |
| "learning_rate": 5.309285082964343e-06, |
| "loss": 3.159617233276367, |
| "step": 9570 |
| }, |
| { |
| "epoch": 1.8395773294908742, |
| "grad_norm": 0.13188965618610382, |
| "learning_rate": 5.24663739974363e-06, |
| "loss": 3.1601367950439454, |
| "step": 9575 |
| }, |
| { |
| "epoch": 1.8405379442843421, |
| "grad_norm": 0.13191638886928558, |
| "learning_rate": 5.184354944106661e-06, |
| "loss": 3.16494197845459, |
| "step": 9580 |
| }, |
| { |
| "epoch": 1.84149855907781, |
| "grad_norm": 0.16160184144973755, |
| "learning_rate": 5.122437873198415e-06, |
| "loss": 3.159535217285156, |
| "step": 9585 |
| }, |
| { |
| "epoch": 1.8424591738712777, |
| "grad_norm": 0.13549911975860596, |
| "learning_rate": 5.060886343242043e-06, |
| "loss": 3.1587257385253906, |
| "step": 9590 |
| }, |
| { |
| "epoch": 1.8434197886647454, |
| "grad_norm": 0.13970741629600525, |
| "learning_rate": 4.999700509538368e-06, |
| "loss": 3.1649681091308595, |
| "step": 9595 |
| }, |
| { |
| "epoch": 1.8443804034582132, |
| "grad_norm": 0.134404718875885, |
| "learning_rate": 4.938880526465516e-06, |
| "loss": 3.1567209243774412, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.845341018251681, |
| "grad_norm": 0.14060495793819427, |
| "learning_rate": 4.878426547478537e-06, |
| "loss": 3.161220741271973, |
| "step": 9605 |
| }, |
| { |
| "epoch": 1.846301633045149, |
| "grad_norm": 0.14048533141613007, |
| "learning_rate": 4.818338725109033e-06, |
| "loss": 3.1618812561035154, |
| "step": 9610 |
| }, |
| { |
| "epoch": 1.8472622478386167, |
| "grad_norm": 0.13674895465373993, |
| "learning_rate": 4.758617210964749e-06, |
| "loss": 3.1565229415893556, |
| "step": 9615 |
| }, |
| { |
| "epoch": 1.8482228626320847, |
| "grad_norm": 0.13577382266521454, |
| "learning_rate": 4.6992621557292e-06, |
| "loss": 3.158913230895996, |
| "step": 9620 |
| }, |
| { |
| "epoch": 1.8491834774255524, |
| "grad_norm": 0.13337448239326477, |
| "learning_rate": 4.640273709161224e-06, |
| "loss": 3.161669158935547, |
| "step": 9625 |
| }, |
| { |
| "epoch": 1.8501440922190202, |
| "grad_norm": 0.134219229221344, |
| "learning_rate": 4.5816520200948005e-06, |
| "loss": 3.160819435119629, |
| "step": 9630 |
| }, |
| { |
| "epoch": 1.851104707012488, |
| "grad_norm": 0.13151714205741882, |
| "learning_rate": 4.5233972364383975e-06, |
| "loss": 3.1615375518798827, |
| "step": 9635 |
| }, |
| { |
| "epoch": 1.8520653218059557, |
| "grad_norm": 0.11864073574542999, |
| "learning_rate": 4.465509505174858e-06, |
| "loss": 3.1609018325805662, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.8530259365994235, |
| "grad_norm": 0.14346528053283691, |
| "learning_rate": 4.407988972360849e-06, |
| "loss": 3.161561393737793, |
| "step": 9645 |
| }, |
| { |
| "epoch": 1.8539865513928915, |
| "grad_norm": 0.13390202820301056, |
| "learning_rate": 4.350835783126594e-06, |
| "loss": 3.1598655700683596, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.8549471661863592, |
| "grad_norm": 0.12858052551746368, |
| "learning_rate": 4.2940500816754094e-06, |
| "loss": 3.160798263549805, |
| "step": 9655 |
| }, |
| { |
| "epoch": 1.8559077809798272, |
| "grad_norm": 0.13176590204238892, |
| "learning_rate": 4.237632011283471e-06, |
| "loss": 3.158690643310547, |
| "step": 9660 |
| }, |
| { |
| "epoch": 1.856868395773295, |
| "grad_norm": 0.12532593309879303, |
| "learning_rate": 4.181581714299359e-06, |
| "loss": 3.159891128540039, |
| "step": 9665 |
| }, |
| { |
| "epoch": 1.8578290105667628, |
| "grad_norm": 0.11781672388315201, |
| "learning_rate": 4.12589933214375e-06, |
| "loss": 3.161539077758789, |
| "step": 9670 |
| }, |
| { |
| "epoch": 1.8587896253602305, |
| "grad_norm": 0.1212988868355751, |
| "learning_rate": 4.070585005308946e-06, |
| "loss": 3.16253662109375, |
| "step": 9675 |
| }, |
| { |
| "epoch": 1.8597502401536983, |
| "grad_norm": 0.11885207146406174, |
| "learning_rate": 4.015638873358707e-06, |
| "loss": 3.162197303771973, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.860710854947166, |
| "grad_norm": 0.12261178344488144, |
| "learning_rate": 3.961061074927752e-06, |
| "loss": 3.1611923217773437, |
| "step": 9685 |
| }, |
| { |
| "epoch": 1.861671469740634, |
| "grad_norm": 0.13136336207389832, |
| "learning_rate": 3.906851747721495e-06, |
| "loss": 3.160177993774414, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.8626320845341018, |
| "grad_norm": 0.13114266097545624, |
| "learning_rate": 3.853011028515563e-06, |
| "loss": 3.15836124420166, |
| "step": 9695 |
| }, |
| { |
| "epoch": 1.8635926993275698, |
| "grad_norm": 0.13357418775558472, |
| "learning_rate": 3.79953905315567e-06, |
| "loss": 3.161663627624512, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.8645533141210375, |
| "grad_norm": 0.11809264123439789, |
| "learning_rate": 3.746435956557081e-06, |
| "loss": 3.1624309539794924, |
| "step": 9705 |
| }, |
| { |
| "epoch": 1.8655139289145053, |
| "grad_norm": 0.1218031793832779, |
| "learning_rate": 3.6937018727043523e-06, |
| "loss": 3.1634284973144533, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.866474543707973, |
| "grad_norm": 0.12914207577705383, |
| "learning_rate": 3.6413369346509848e-06, |
| "loss": 3.1623727798461916, |
| "step": 9715 |
| }, |
| { |
| "epoch": 1.8674351585014408, |
| "grad_norm": 0.12263166159391403, |
| "learning_rate": 3.5893412745191085e-06, |
| "loss": 3.1578353881835937, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.8683957732949086, |
| "grad_norm": 0.11959100514650345, |
| "learning_rate": 3.5377150234990824e-06, |
| "loss": 3.1602123260498045, |
| "step": 9725 |
| }, |
| { |
| "epoch": 1.8693563880883766, |
| "grad_norm": 0.132080078125, |
| "learning_rate": 3.4864583118492438e-06, |
| "loss": 3.1630222320556642, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.8703170028818443, |
| "grad_norm": 0.13228577375411987, |
| "learning_rate": 3.435571268895526e-06, |
| "loss": 3.155730438232422, |
| "step": 9735 |
| }, |
| { |
| "epoch": 1.8712776176753123, |
| "grad_norm": 0.12898947298526764, |
| "learning_rate": 3.3850540230311918e-06, |
| "loss": 3.1614667892456056, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.87223823246878, |
| "grad_norm": 0.1316165179014206, |
| "learning_rate": 3.3349067017163833e-06, |
| "loss": 3.162592315673828, |
| "step": 9745 |
| }, |
| { |
| "epoch": 1.8731988472622478, |
| "grad_norm": 0.12914599478244781, |
| "learning_rate": 3.28512943147799e-06, |
| "loss": 3.160664749145508, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.8741594620557156, |
| "grad_norm": 0.11813253164291382, |
| "learning_rate": 3.23572233790913e-06, |
| "loss": 3.1575859069824217, |
| "step": 9755 |
| }, |
| { |
| "epoch": 1.8751200768491834, |
| "grad_norm": 0.1261204481124878, |
| "learning_rate": 3.1866855456689878e-06, |
| "loss": 3.1581016540527345, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.8760806916426513, |
| "grad_norm": 0.120733842253685, |
| "learning_rate": 3.138019178482409e-06, |
| "loss": 3.1588390350341795, |
| "step": 9765 |
| }, |
| { |
| "epoch": 1.877041306436119, |
| "grad_norm": 0.13560213148593903, |
| "learning_rate": 3.0897233591396562e-06, |
| "loss": 3.1578319549560545, |
| "step": 9770 |
| }, |
| { |
| "epoch": 1.878001921229587, |
| "grad_norm": 0.11367765814065933, |
| "learning_rate": 3.0417982094960212e-06, |
| "loss": 3.1566787719726563, |
| "step": 9775 |
| }, |
| { |
| "epoch": 1.8789625360230549, |
| "grad_norm": 0.11928807944059372, |
| "learning_rate": 2.994243850471545e-06, |
| "loss": 3.162850570678711, |
| "step": 9780 |
| }, |
| { |
| "epoch": 1.8799231508165226, |
| "grad_norm": 0.11847756057977676, |
| "learning_rate": 2.9470604020507994e-06, |
| "loss": 3.1612308502197264, |
| "step": 9785 |
| }, |
| { |
| "epoch": 1.8808837656099904, |
| "grad_norm": 0.1250849962234497, |
| "learning_rate": 2.9002479832824553e-06, |
| "loss": 3.1637001037597656, |
| "step": 9790 |
| }, |
| { |
| "epoch": 1.8818443804034581, |
| "grad_norm": 0.12311087548732758, |
| "learning_rate": 2.8538067122790164e-06, |
| "loss": 3.1589778900146483, |
| "step": 9795 |
| }, |
| { |
| "epoch": 1.882804995196926, |
| "grad_norm": 0.11985550820827484, |
| "learning_rate": 2.8077367062166013e-06, |
| "loss": 3.1609506607055664, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.8837656099903939, |
| "grad_norm": 0.12335570156574249, |
| "learning_rate": 2.762038081334561e-06, |
| "loss": 3.1584909439086912, |
| "step": 9805 |
| }, |
| { |
| "epoch": 1.8847262247838616, |
| "grad_norm": 0.12690205872058868, |
| "learning_rate": 2.716710952935214e-06, |
| "loss": 3.160411071777344, |
| "step": 9810 |
| }, |
| { |
| "epoch": 1.8856868395773296, |
| "grad_norm": 0.12958942353725433, |
| "learning_rate": 2.6717554353835435e-06, |
| "loss": 3.16156005859375, |
| "step": 9815 |
| }, |
| { |
| "epoch": 1.8866474543707974, |
| "grad_norm": 0.1530323475599289, |
| "learning_rate": 2.6271716421069344e-06, |
| "loss": 3.1630611419677734, |
| "step": 9820 |
| }, |
| { |
| "epoch": 1.8876080691642652, |
| "grad_norm": 0.11445850133895874, |
| "learning_rate": 2.5829596855948876e-06, |
| "loss": 3.160757064819336, |
| "step": 9825 |
| }, |
| { |
| "epoch": 1.888568683957733, |
| "grad_norm": 0.12335135042667389, |
| "learning_rate": 2.539119677398671e-06, |
| "loss": 3.159043884277344, |
| "step": 9830 |
| }, |
| { |
| "epoch": 1.8895292987512007, |
| "grad_norm": 0.12193870544433594, |
| "learning_rate": 2.495651728131154e-06, |
| "loss": 3.1591602325439454, |
| "step": 9835 |
| }, |
| { |
| "epoch": 1.8904899135446684, |
| "grad_norm": 0.11826537549495697, |
| "learning_rate": 2.452555947466439e-06, |
| "loss": 3.161988067626953, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.8914505283381364, |
| "grad_norm": 0.12225425243377686, |
| "learning_rate": 2.4098324441395645e-06, |
| "loss": 3.164390754699707, |
| "step": 9845 |
| }, |
| { |
| "epoch": 1.8924111431316042, |
| "grad_norm": 0.11919926851987839, |
| "learning_rate": 2.367481325946352e-06, |
| "loss": 3.160402297973633, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.8933717579250722, |
| "grad_norm": 0.11494944989681244, |
| "learning_rate": 2.325502699742976e-06, |
| "loss": 3.159711456298828, |
| "step": 9855 |
| }, |
| { |
| "epoch": 1.89433237271854, |
| "grad_norm": 0.11843105405569077, |
| "learning_rate": 2.283896671445862e-06, |
| "loss": 3.159711456298828, |
| "step": 9860 |
| }, |
| { |
| "epoch": 1.8952929875120077, |
| "grad_norm": 0.12161135673522949, |
| "learning_rate": 2.2426633460312373e-06, |
| "loss": 3.1607955932617187, |
| "step": 9865 |
| }, |
| { |
| "epoch": 1.8962536023054755, |
| "grad_norm": 0.114869125187397, |
| "learning_rate": 2.2018028275350652e-06, |
| "loss": 3.161262321472168, |
| "step": 9870 |
| }, |
| { |
| "epoch": 1.8972142170989432, |
| "grad_norm": 0.129729226231575, |
| "learning_rate": 2.1613152190525785e-06, |
| "loss": 3.161954879760742, |
| "step": 9875 |
| }, |
| { |
| "epoch": 1.898174831892411, |
| "grad_norm": 0.12527115643024445, |
| "learning_rate": 2.1212006227382117e-06, |
| "loss": 3.159313201904297, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.899135446685879, |
| "grad_norm": 0.1106216311454773, |
| "learning_rate": 2.0814591398051527e-06, |
| "loss": 3.160965347290039, |
| "step": 9885 |
| }, |
| { |
| "epoch": 1.9000960614793467, |
| "grad_norm": 0.11833595484495163, |
| "learning_rate": 2.0420908705252926e-06, |
| "loss": 3.160472106933594, |
| "step": 9890 |
| }, |
| { |
| "epoch": 1.9010566762728147, |
| "grad_norm": 0.10949314385652542, |
| "learning_rate": 2.003095914228775e-06, |
| "loss": 3.163528060913086, |
| "step": 9895 |
| }, |
| { |
| "epoch": 1.9020172910662825, |
| "grad_norm": 0.1126236766576767, |
| "learning_rate": 1.9644743693038977e-06, |
| "loss": 3.1650867462158203, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.9029779058597502, |
| "grad_norm": 0.1123654693365097, |
| "learning_rate": 1.9262263331967785e-06, |
| "loss": 3.1654218673706054, |
| "step": 9905 |
| }, |
| { |
| "epoch": 1.903938520653218, |
| "grad_norm": 0.12411278486251831, |
| "learning_rate": 1.8883519024111227e-06, |
| "loss": 3.156511688232422, |
| "step": 9910 |
| }, |
| { |
| "epoch": 1.9048991354466858, |
| "grad_norm": 0.1198534443974495, |
| "learning_rate": 1.8508511725080388e-06, |
| "loss": 3.160737991333008, |
| "step": 9915 |
| }, |
| { |
| "epoch": 1.9058597502401537, |
| "grad_norm": 0.11347641795873642, |
| "learning_rate": 1.8137242381056571e-06, |
| "loss": 3.1631191253662108, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.9068203650336215, |
| "grad_norm": 0.11204273998737335, |
| "learning_rate": 1.7769711928790953e-06, |
| "loss": 3.158831787109375, |
| "step": 9925 |
| }, |
| { |
| "epoch": 1.9077809798270895, |
| "grad_norm": 0.12282492965459824, |
| "learning_rate": 1.7405921295600421e-06, |
| "loss": 3.1593994140625, |
| "step": 9930 |
| }, |
| { |
| "epoch": 1.9087415946205573, |
| "grad_norm": 0.11055224388837814, |
| "learning_rate": 1.7045871399365918e-06, |
| "loss": 3.159510040283203, |
| "step": 9935 |
| }, |
| { |
| "epoch": 1.909702209414025, |
| "grad_norm": 0.11742359399795532, |
| "learning_rate": 1.668956314853026e-06, |
| "loss": 3.161764144897461, |
| "step": 9940 |
| }, |
| { |
| "epoch": 1.9106628242074928, |
| "grad_norm": 0.10971947759389877, |
| "learning_rate": 1.6336997442095823e-06, |
| "loss": 3.1589191436767576, |
| "step": 9945 |
| }, |
| { |
| "epoch": 1.9116234390009605, |
| "grad_norm": 0.11144320666790009, |
| "learning_rate": 1.5988175169622197e-06, |
| "loss": 3.1609643936157226, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.9125840537944283, |
| "grad_norm": 0.11729393154382706, |
| "learning_rate": 1.5643097211223533e-06, |
| "loss": 3.1646373748779295, |
| "step": 9955 |
| }, |
| { |
| "epoch": 1.9135446685878963, |
| "grad_norm": 0.11545123904943466, |
| "learning_rate": 1.5301764437567032e-06, |
| "loss": 3.1612579345703127, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.914505283381364, |
| "grad_norm": 0.1158137395977974, |
| "learning_rate": 1.4964177709870128e-06, |
| "loss": 3.1609870910644533, |
| "step": 9965 |
| }, |
| { |
| "epoch": 1.915465898174832, |
| "grad_norm": 0.1098775789141655, |
| "learning_rate": 1.4630337879899146e-06, |
| "loss": 3.1606449127197265, |
| "step": 9970 |
| }, |
| { |
| "epoch": 1.9164265129682998, |
| "grad_norm": 0.11874253302812576, |
| "learning_rate": 1.4300245789965803e-06, |
| "loss": 3.1594825744628907, |
| "step": 9975 |
| }, |
| { |
| "epoch": 1.9173871277617676, |
| "grad_norm": 0.11549612134695053, |
| "learning_rate": 1.3973902272926384e-06, |
| "loss": 3.160371780395508, |
| "step": 9980 |
| }, |
| { |
| "epoch": 1.9183477425552353, |
| "grad_norm": 0.10717111080884933, |
| "learning_rate": 1.3651308152179407e-06, |
| "loss": 3.1619895935058593, |
| "step": 9985 |
| }, |
| { |
| "epoch": 1.919308357348703, |
| "grad_norm": 0.11006828397512436, |
| "learning_rate": 1.3332464241662622e-06, |
| "loss": 3.1590812683105467, |
| "step": 9990 |
| }, |
| { |
| "epoch": 1.9202689721421708, |
| "grad_norm": 0.11138733476400375, |
| "learning_rate": 1.3017371345851846e-06, |
| "loss": 3.158272933959961, |
| "step": 9995 |
| }, |
| { |
| "epoch": 1.9212295869356388, |
| "grad_norm": 0.1066170483827591, |
| "learning_rate": 1.2706030259759137e-06, |
| "loss": 3.158770942687988, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.9221902017291066, |
| "grad_norm": 0.11191676557064056, |
| "learning_rate": 1.2398441768929624e-06, |
| "loss": 3.1578929901123045, |
| "step": 10005 |
| }, |
| { |
| "epoch": 1.9231508165225746, |
| "grad_norm": 0.11436916887760162, |
| "learning_rate": 1.2094606649440841e-06, |
| "loss": 3.1585205078125, |
| "step": 10010 |
| }, |
| { |
| "epoch": 1.9241114313160423, |
| "grad_norm": 0.11085876077413559, |
| "learning_rate": 1.1794525667899901e-06, |
| "loss": 3.1594661712646483, |
| "step": 10015 |
| }, |
| { |
| "epoch": 1.92507204610951, |
| "grad_norm": 0.11293391138315201, |
| "learning_rate": 1.149819958144199e-06, |
| "loss": 3.1591583251953126, |
| "step": 10020 |
| }, |
| { |
| "epoch": 1.9260326609029779, |
| "grad_norm": 0.10681528598070145, |
| "learning_rate": 1.1205629137728044e-06, |
| "loss": 3.160277557373047, |
| "step": 10025 |
| }, |
| { |
| "epoch": 1.9269932756964456, |
| "grad_norm": 0.1138753667473793, |
| "learning_rate": 1.0916815074943242e-06, |
| "loss": 3.15789909362793, |
| "step": 10030 |
| }, |
| { |
| "epoch": 1.9279538904899134, |
| "grad_norm": 0.10544762760400772, |
| "learning_rate": 1.0631758121795508e-06, |
| "loss": 3.1582752227783204, |
| "step": 10035 |
| }, |
| { |
| "epoch": 1.9289145052833814, |
| "grad_norm": 0.11651872098445892, |
| "learning_rate": 1.0350458997512356e-06, |
| "loss": 3.1609378814697267, |
| "step": 10040 |
| }, |
| { |
| "epoch": 1.9298751200768491, |
| "grad_norm": 0.11160679161548615, |
| "learning_rate": 1.0072918411840548e-06, |
| "loss": 3.161101722717285, |
| "step": 10045 |
| }, |
| { |
| "epoch": 1.9308357348703171, |
| "grad_norm": 0.11564800143241882, |
| "learning_rate": 9.799137065043428e-07, |
| "loss": 3.1580835342407227, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.9317963496637849, |
| "grad_norm": 0.12115105986595154, |
| "learning_rate": 9.529115647899599e-07, |
| "loss": 3.160675811767578, |
| "step": 10055 |
| }, |
| { |
| "epoch": 1.9327569644572526, |
| "grad_norm": 0.11305255442857742, |
| "learning_rate": 9.262854841700418e-07, |
| "loss": 3.1640155792236326, |
| "step": 10060 |
| }, |
| { |
| "epoch": 1.9337175792507204, |
| "grad_norm": 0.11431337147951126, |
| "learning_rate": 9.00035531824933e-07, |
| "loss": 3.162336730957031, |
| "step": 10065 |
| }, |
| { |
| "epoch": 1.9346781940441882, |
| "grad_norm": 0.10593244433403015, |
| "learning_rate": 8.741617739859708e-07, |
| "loss": 3.1616371154785154, |
| "step": 10070 |
| }, |
| { |
| "epoch": 1.9356388088376562, |
| "grad_norm": 0.10147248208522797, |
| "learning_rate": 8.486642759353013e-07, |
| "loss": 3.1602996826171874, |
| "step": 10075 |
| }, |
| { |
| "epoch": 1.936599423631124, |
| "grad_norm": 0.11662466824054718, |
| "learning_rate": 8.23543102005697e-07, |
| "loss": 3.1570274353027346, |
| "step": 10080 |
| }, |
| { |
| "epoch": 1.937560038424592, |
| "grad_norm": 0.10574644058942795, |
| "learning_rate": 7.9879831558049e-07, |
| "loss": 3.156863975524902, |
| "step": 10085 |
| }, |
| { |
| "epoch": 1.9385206532180597, |
| "grad_norm": 0.10977693647146225, |
| "learning_rate": 7.744299790933217e-07, |
| "loss": 3.1590835571289064, |
| "step": 10090 |
| }, |
| { |
| "epoch": 1.9394812680115274, |
| "grad_norm": 0.10733279585838318, |
| "learning_rate": 7.504381540279603e-07, |
| "loss": 3.1599714279174806, |
| "step": 10095 |
| }, |
| { |
| "epoch": 1.9404418828049952, |
| "grad_norm": 0.1001046821475029, |
| "learning_rate": 7.268229009182836e-07, |
| "loss": 3.158583068847656, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.941402497598463, |
| "grad_norm": 0.10647624731063843, |
| "learning_rate": 7.035842793479795e-07, |
| "loss": 3.158983612060547, |
| "step": 10105 |
| }, |
| { |
| "epoch": 1.9423631123919307, |
| "grad_norm": 0.10598830133676529, |
| "learning_rate": 6.807223479504631e-07, |
| "loss": 3.1631649017333983, |
| "step": 10110 |
| }, |
| { |
| "epoch": 1.9433237271853987, |
| "grad_norm": 0.10469937324523926, |
| "learning_rate": 6.582371644087592e-07, |
| "loss": 3.1588485717773436, |
| "step": 10115 |
| }, |
| { |
| "epoch": 1.9442843419788665, |
| "grad_norm": 0.10287413746118546, |
| "learning_rate": 6.361287854552865e-07, |
| "loss": 3.161878967285156, |
| "step": 10120 |
| }, |
| { |
| "epoch": 1.9452449567723344, |
| "grad_norm": 0.10505318641662598, |
| "learning_rate": 6.143972668717411e-07, |
| "loss": 3.1627128601074217, |
| "step": 10125 |
| }, |
| { |
| "epoch": 1.9462055715658022, |
| "grad_norm": 0.1040089949965477, |
| "learning_rate": 5.930426634889796e-07, |
| "loss": 3.1591156005859373, |
| "step": 10130 |
| }, |
| { |
| "epoch": 1.94716618635927, |
| "grad_norm": 0.10172892361879349, |
| "learning_rate": 5.720650291868523e-07, |
| "loss": 3.1622541427612303, |
| "step": 10135 |
| }, |
| { |
| "epoch": 1.9481268011527377, |
| "grad_norm": 0.10863189399242401, |
| "learning_rate": 5.514644168941041e-07, |
| "loss": 3.161451721191406, |
| "step": 10140 |
| }, |
| { |
| "epoch": 1.9490874159462055, |
| "grad_norm": 0.10447549819946289, |
| "learning_rate": 5.312408785881573e-07, |
| "loss": 3.1583093643188476, |
| "step": 10145 |
| }, |
| { |
| "epoch": 1.9500480307396733, |
| "grad_norm": 0.10685736685991287, |
| "learning_rate": 5.113944652951118e-07, |
| "loss": 3.1573591232299805, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.9510086455331412, |
| "grad_norm": 0.10184746235609055, |
| "learning_rate": 4.919252270894625e-07, |
| "loss": 3.155690383911133, |
| "step": 10155 |
| }, |
| { |
| "epoch": 1.951969260326609, |
| "grad_norm": 0.10571934282779694, |
| "learning_rate": 4.7283321309413145e-07, |
| "loss": 3.159967613220215, |
| "step": 10160 |
| }, |
| { |
| "epoch": 1.952929875120077, |
| "grad_norm": 0.1015496551990509, |
| "learning_rate": 4.541184714802193e-07, |
| "loss": 3.1597368240356447, |
| "step": 10165 |
| }, |
| { |
| "epoch": 1.9538904899135447, |
| "grad_norm": 0.1008259505033493, |
| "learning_rate": 4.3578104946692114e-07, |
| "loss": 3.156237030029297, |
| "step": 10170 |
| }, |
| { |
| "epoch": 1.9548511047070125, |
| "grad_norm": 0.10270283371210098, |
| "learning_rate": 4.1782099332146046e-07, |
| "loss": 3.162929916381836, |
| "step": 10175 |
| }, |
| { |
| "epoch": 1.9558117195004803, |
| "grad_norm": 0.10830524563789368, |
| "learning_rate": 4.002383483588723e-07, |
| "loss": 3.157078170776367, |
| "step": 10180 |
| }, |
| { |
| "epoch": 1.956772334293948, |
| "grad_norm": 0.09829416126012802, |
| "learning_rate": 3.8303315894200326e-07, |
| "loss": 3.1587419509887695, |
| "step": 10185 |
| }, |
| { |
| "epoch": 1.9577329490874158, |
| "grad_norm": 0.10378840565681458, |
| "learning_rate": 3.662054684812954e-07, |
| "loss": 3.1602014541625976, |
| "step": 10190 |
| }, |
| { |
| "epoch": 1.9586935638808838, |
| "grad_norm": 0.10607574880123138, |
| "learning_rate": 3.497553194347358e-07, |
| "loss": 3.1587203979492187, |
| "step": 10195 |
| }, |
| { |
| "epoch": 1.9596541786743515, |
| "grad_norm": 0.10446880012750626, |
| "learning_rate": 3.3368275330775686e-07, |
| "loss": 3.160129928588867, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.9606147934678195, |
| "grad_norm": 0.10566939413547516, |
| "learning_rate": 3.1798781065305314e-07, |
| "loss": 3.162166213989258, |
| "step": 10205 |
| }, |
| { |
| "epoch": 1.9615754082612873, |
| "grad_norm": 0.10455495119094849, |
| "learning_rate": 3.0267053107061456e-07, |
| "loss": 3.1622539520263673, |
| "step": 10210 |
| }, |
| { |
| "epoch": 1.962536023054755, |
| "grad_norm": 0.10329185426235199, |
| "learning_rate": 2.877309532074934e-07, |
| "loss": 3.155780029296875, |
| "step": 10215 |
| }, |
| { |
| "epoch": 1.9634966378482228, |
| "grad_norm": 0.09786950796842575, |
| "learning_rate": 2.7316911475777083e-07, |
| "loss": 3.160215377807617, |
| "step": 10220 |
| }, |
| { |
| "epoch": 1.9644572526416906, |
| "grad_norm": 0.10430426150560379, |
| "learning_rate": 2.589850524624737e-07, |
| "loss": 3.158669662475586, |
| "step": 10225 |
| }, |
| { |
| "epoch": 1.9654178674351583, |
| "grad_norm": 0.10500769317150116, |
| "learning_rate": 2.451788021094414e-07, |
| "loss": 3.1637641906738283, |
| "step": 10230 |
| }, |
| { |
| "epoch": 1.9663784822286263, |
| "grad_norm": 0.09861162304878235, |
| "learning_rate": 2.317503985332425e-07, |
| "loss": 3.1611236572265624, |
| "step": 10235 |
| }, |
| { |
| "epoch": 1.967339097022094, |
| "grad_norm": 0.09918565303087234, |
| "learning_rate": 2.1869987561514147e-07, |
| "loss": 3.15871639251709, |
| "step": 10240 |
| }, |
| { |
| "epoch": 1.968299711815562, |
| "grad_norm": 0.1011766865849495, |
| "learning_rate": 2.060272662829321e-07, |
| "loss": 3.16119499206543, |
| "step": 10245 |
| }, |
| { |
| "epoch": 1.9692603266090298, |
| "grad_norm": 0.11001937091350555, |
| "learning_rate": 1.9373260251092092e-07, |
| "loss": 3.1602630615234375, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.9702209414024976, |
| "grad_norm": 0.10449342429637909, |
| "learning_rate": 1.8181591531977736e-07, |
| "loss": 3.160749816894531, |
| "step": 10255 |
| }, |
| { |
| "epoch": 1.9711815561959654, |
| "grad_norm": 0.09981942176818848, |
| "learning_rate": 1.7027723477656686e-07, |
| "loss": 3.159575653076172, |
| "step": 10260 |
| }, |
| { |
| "epoch": 1.9721421709894331, |
| "grad_norm": 0.10080744326114655, |
| "learning_rate": 1.5911658999453458e-07, |
| "loss": 3.16530704498291, |
| "step": 10265 |
| }, |
| { |
| "epoch": 1.973102785782901, |
| "grad_norm": 0.09806544333696365, |
| "learning_rate": 1.4833400913313864e-07, |
| "loss": 3.1608566284179687, |
| "step": 10270 |
| }, |
| { |
| "epoch": 1.9740634005763689, |
| "grad_norm": 0.10161542147397995, |
| "learning_rate": 1.379295193979335e-07, |
| "loss": 3.1610599517822267, |
| "step": 10275 |
| }, |
| { |
| "epoch": 1.9750240153698368, |
| "grad_norm": 0.10026436299085617, |
| "learning_rate": 1.2790314704052008e-07, |
| "loss": 3.1599550247192383, |
| "step": 10280 |
| }, |
| { |
| "epoch": 1.9759846301633046, |
| "grad_norm": 0.09924504905939102, |
| "learning_rate": 1.1825491735846237e-07, |
| "loss": 3.1612056732177733, |
| "step": 10285 |
| }, |
| { |
| "epoch": 1.9769452449567724, |
| "grad_norm": 0.10245411098003387, |
| "learning_rate": 1.0898485469523766e-07, |
| "loss": 3.1604587554931642, |
| "step": 10290 |
| }, |
| { |
| "epoch": 1.9779058597502401, |
| "grad_norm": 0.09821955114603043, |
| "learning_rate": 1.0009298244016972e-07, |
| "loss": 3.1620269775390626, |
| "step": 10295 |
| }, |
| { |
| "epoch": 1.978866474543708, |
| "grad_norm": 0.09943889081478119, |
| "learning_rate": 9.15793230283457e-08, |
| "loss": 3.160098648071289, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.9798270893371757, |
| "grad_norm": 0.09800975024700165, |
| "learning_rate": 8.3443897940616e-08, |
| "loss": 3.1579994201660155, |
| "step": 10305 |
| }, |
| { |
| "epoch": 1.9807877041306436, |
| "grad_norm": 0.0975160002708435, |
| "learning_rate": 7.568672770349449e-08, |
| "loss": 3.160677909851074, |
| "step": 10310 |
| }, |
| { |
| "epoch": 1.9817483189241114, |
| "grad_norm": 0.09866613149642944, |
| "learning_rate": 6.830783188910838e-08, |
| "loss": 3.15645809173584, |
| "step": 10315 |
| }, |
| { |
| "epoch": 1.9827089337175794, |
| "grad_norm": 0.09947178512811661, |
| "learning_rate": 6.130722911516505e-08, |
| "loss": 3.1583110809326174, |
| "step": 10320 |
| }, |
| { |
| "epoch": 1.9836695485110472, |
| "grad_norm": 0.09944622218608856, |
| "learning_rate": 5.468493704491872e-08, |
| "loss": 3.1639808654785155, |
| "step": 10325 |
| }, |
| { |
| "epoch": 1.984630163304515, |
| "grad_norm": 0.09704981744289398, |
| "learning_rate": 4.844097238708711e-08, |
| "loss": 3.1581130981445313, |
| "step": 10330 |
| }, |
| { |
| "epoch": 1.9855907780979827, |
| "grad_norm": 0.09650828689336777, |
| "learning_rate": 4.257535089581821e-08, |
| "loss": 3.156038284301758, |
| "step": 10335 |
| }, |
| { |
| "epoch": 1.9865513928914504, |
| "grad_norm": 0.09726572781801224, |
| "learning_rate": 3.70880873707069e-08, |
| "loss": 3.1570310592651367, |
| "step": 10340 |
| }, |
| { |
| "epoch": 1.9875120076849182, |
| "grad_norm": 0.09816308319568634, |
| "learning_rate": 3.1979195656645084e-08, |
| "loss": 3.160786819458008, |
| "step": 10345 |
| }, |
| { |
| "epoch": 1.9884726224783862, |
| "grad_norm": 0.09846552461385727, |
| "learning_rate": 2.72486886439216e-08, |
| "loss": 3.1611724853515626, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.989433237271854, |
| "grad_norm": 0.09757398068904877, |
| "learning_rate": 2.289657826807234e-08, |
| "loss": 3.1581047058105467, |
| "step": 10355 |
| }, |
| { |
| "epoch": 1.990393852065322, |
| "grad_norm": 0.10126172751188278, |
| "learning_rate": 1.8922875509930212e-08, |
| "loss": 3.160317611694336, |
| "step": 10360 |
| }, |
| { |
| "epoch": 1.9913544668587897, |
| "grad_norm": 0.09841560572385788, |
| "learning_rate": 1.532759039554188e-08, |
| "loss": 3.158650207519531, |
| "step": 10365 |
| }, |
| { |
| "epoch": 1.9923150816522575, |
| "grad_norm": 0.09537240862846375, |
| "learning_rate": 1.2110731996201051e-08, |
| "loss": 3.1590465545654296, |
| "step": 10370 |
| }, |
| { |
| "epoch": 1.9932756964457252, |
| "grad_norm": 0.09531023353338242, |
| "learning_rate": 9.272308428348586e-09, |
| "loss": 3.159567642211914, |
| "step": 10375 |
| }, |
| { |
| "epoch": 1.994236311239193, |
| "grad_norm": 0.10250640660524368, |
| "learning_rate": 6.812326853639083e-09, |
| "loss": 3.1602352142333983, |
| "step": 10380 |
| }, |
| { |
| "epoch": 1.9951969260326607, |
| "grad_norm": 0.09563779830932617, |
| "learning_rate": 4.730793478840977e-09, |
| "loss": 3.157382583618164, |
| "step": 10385 |
| }, |
| { |
| "epoch": 1.9961575408261287, |
| "grad_norm": 0.09357914328575134, |
| "learning_rate": 3.0277135558864906e-09, |
| "loss": 3.1614322662353516, |
| "step": 10390 |
| }, |
| { |
| "epoch": 1.9971181556195965, |
| "grad_norm": 0.09247033298015594, |
| "learning_rate": 1.7030913818050217e-09, |
| "loss": 3.161212921142578, |
| "step": 10395 |
| }, |
| { |
| "epoch": 1.9980787704130645, |
| "grad_norm": 0.09770449995994568, |
| "learning_rate": 7.569302987897597e-10, |
| "loss": 3.1566755294799806, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.9990393852065322, |
| "grad_norm": 0.0940788984298706, |
| "learning_rate": 1.8923269406445618e-10, |
| "loss": 3.157200050354004, |
| "step": 10405 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.10163447260856628, |
| "learning_rate": 0.0, |
| "loss": 3.162412643432617, |
| "step": 10410 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 10410, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9541356371946714e+20, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|