| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 6250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008, | |
| "grad_norm": 0.7526811361312866, | |
| "learning_rate": 4.999992104320636e-05, | |
| "loss": 1.0428, | |
| "num_input_tokens_seen": 26624, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0016, | |
| "grad_norm": 0.5957724452018738, | |
| "learning_rate": 4.999968417332415e-05, | |
| "loss": 1.1061, | |
| "num_input_tokens_seen": 51408, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0024, | |
| "grad_norm": 0.8826403021812439, | |
| "learning_rate": 4.999928939184958e-05, | |
| "loss": 1.0426, | |
| "num_input_tokens_seen": 75040, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 0.6897421479225159, | |
| "learning_rate": 4.9998736701276295e-05, | |
| "loss": 1.1472, | |
| "num_input_tokens_seen": 98848, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 0.7432862520217896, | |
| "learning_rate": 4.9998026105095405e-05, | |
| "loss": 0.9781, | |
| "num_input_tokens_seen": 123104, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0048, | |
| "grad_norm": 0.9830231666564941, | |
| "learning_rate": 4.999715760779541e-05, | |
| "loss": 0.9889, | |
| "num_input_tokens_seen": 152144, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0056, | |
| "grad_norm": 0.9177331924438477, | |
| "learning_rate": 4.999613121486222e-05, | |
| "loss": 0.9345, | |
| "num_input_tokens_seen": 177216, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 0.6646199822425842, | |
| "learning_rate": 4.999494693277907e-05, | |
| "loss": 0.8539, | |
| "num_input_tokens_seen": 203152, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0072, | |
| "grad_norm": 0.5822590589523315, | |
| "learning_rate": 4.999360476902656e-05, | |
| "loss": 0.9183, | |
| "num_input_tokens_seen": 233568, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 0.7686595916748047, | |
| "learning_rate": 4.99921047320825e-05, | |
| "loss": 0.873, | |
| "num_input_tokens_seen": 264752, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0088, | |
| "grad_norm": 0.729837954044342, | |
| "learning_rate": 4.9990446831421955e-05, | |
| "loss": 0.8676, | |
| "num_input_tokens_seen": 291040, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 0.9523835778236389, | |
| "learning_rate": 4.998863107751711e-05, | |
| "loss": 0.9004, | |
| "num_input_tokens_seen": 321760, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0104, | |
| "grad_norm": 0.6720367670059204, | |
| "learning_rate": 4.9986657481837277e-05, | |
| "loss": 0.8536, | |
| "num_input_tokens_seen": 347168, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0112, | |
| "grad_norm": 0.4336840808391571, | |
| "learning_rate": 4.998452605684874e-05, | |
| "loss": 0.8027, | |
| "num_input_tokens_seen": 373888, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.808559238910675, | |
| "learning_rate": 4.998223681601473e-05, | |
| "loss": 0.8075, | |
| "num_input_tokens_seen": 398752, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 0.5663979053497314, | |
| "learning_rate": 4.997978977379536e-05, | |
| "loss": 0.7919, | |
| "num_input_tokens_seen": 421344, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0136, | |
| "grad_norm": 0.5677878260612488, | |
| "learning_rate": 4.9977184945647473e-05, | |
| "loss": 0.7512, | |
| "num_input_tokens_seen": 451296, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.0144, | |
| "grad_norm": 0.674132227897644, | |
| "learning_rate": 4.997442234802456e-05, | |
| "loss": 0.7713, | |
| "num_input_tokens_seen": 482416, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0152, | |
| "grad_norm": 0.5088427662849426, | |
| "learning_rate": 4.997150199837671e-05, | |
| "loss": 0.7965, | |
| "num_input_tokens_seen": 513008, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.6657032370567322, | |
| "learning_rate": 4.996842391515044e-05, | |
| "loss": 0.8623, | |
| "num_input_tokens_seen": 537984, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0168, | |
| "grad_norm": 0.6862130761146545, | |
| "learning_rate": 4.996518811778858e-05, | |
| "loss": 0.7797, | |
| "num_input_tokens_seen": 564528, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0176, | |
| "grad_norm": 0.6449868083000183, | |
| "learning_rate": 4.99617946267302e-05, | |
| "loss": 0.7732, | |
| "num_input_tokens_seen": 588608, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0184, | |
| "grad_norm": 0.5512914657592773, | |
| "learning_rate": 4.9958243463410414e-05, | |
| "loss": 0.7478, | |
| "num_input_tokens_seen": 620752, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 0.7411808371543884, | |
| "learning_rate": 4.995453465026032e-05, | |
| "loss": 0.7194, | |
| "num_input_tokens_seen": 649200, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.9926447868347168, | |
| "learning_rate": 4.995066821070679e-05, | |
| "loss": 0.7506, | |
| "num_input_tokens_seen": 679200, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0208, | |
| "grad_norm": 0.7455246448516846, | |
| "learning_rate": 4.9946644169172355e-05, | |
| "loss": 0.6886, | |
| "num_input_tokens_seen": 702144, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0216, | |
| "grad_norm": 0.5429201126098633, | |
| "learning_rate": 4.9942462551075056e-05, | |
| "loss": 0.8481, | |
| "num_input_tokens_seen": 730128, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 0.49708107113838196, | |
| "learning_rate": 4.993812338282826e-05, | |
| "loss": 0.7999, | |
| "num_input_tokens_seen": 757248, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0232, | |
| "grad_norm": 0.6150819063186646, | |
| "learning_rate": 4.993362669184051e-05, | |
| "loss": 0.7877, | |
| "num_input_tokens_seen": 786096, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.6751016974449158, | |
| "learning_rate": 4.992897250651535e-05, | |
| "loss": 0.9312, | |
| "num_input_tokens_seen": 814192, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0248, | |
| "grad_norm": 0.6245042085647583, | |
| "learning_rate": 4.992416085625115e-05, | |
| "loss": 0.8767, | |
| "num_input_tokens_seen": 840144, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 0.5385093688964844, | |
| "learning_rate": 4.9919191771440905e-05, | |
| "loss": 0.8646, | |
| "num_input_tokens_seen": 870368, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0264, | |
| "grad_norm": 0.5674800276756287, | |
| "learning_rate": 4.991406528347206e-05, | |
| "loss": 0.7159, | |
| "num_input_tokens_seen": 897296, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.0272, | |
| "grad_norm": 0.3683065176010132, | |
| "learning_rate": 4.990878142472628e-05, | |
| "loss": 0.7573, | |
| "num_input_tokens_seen": 924576, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 0.5186157822608948, | |
| "learning_rate": 4.990334022857932e-05, | |
| "loss": 0.8083, | |
| "num_input_tokens_seen": 945856, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 0.5749205350875854, | |
| "learning_rate": 4.9897741729400705e-05, | |
| "loss": 0.7074, | |
| "num_input_tokens_seen": 970416, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0296, | |
| "grad_norm": 0.5518808960914612, | |
| "learning_rate": 4.9891985962553606e-05, | |
| "loss": 0.7709, | |
| "num_input_tokens_seen": 994288, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.0304, | |
| "grad_norm": 0.5436009764671326, | |
| "learning_rate": 4.988607296439458e-05, | |
| "loss": 0.7, | |
| "num_input_tokens_seen": 1029872, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0312, | |
| "grad_norm": 0.4599573612213135, | |
| "learning_rate": 4.988000277227334e-05, | |
| "loss": 0.8251, | |
| "num_input_tokens_seen": 1061072, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.4540678858757019, | |
| "learning_rate": 4.987377542453251e-05, | |
| "loss": 0.6707, | |
| "num_input_tokens_seen": 1089312, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0328, | |
| "grad_norm": 0.5874660611152649, | |
| "learning_rate": 4.98673909605074e-05, | |
| "loss": 0.7264, | |
| "num_input_tokens_seen": 1114272, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.0336, | |
| "grad_norm": 0.6792047619819641, | |
| "learning_rate": 4.9860849420525766e-05, | |
| "loss": 0.7906, | |
| "num_input_tokens_seen": 1139808, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0344, | |
| "grad_norm": 0.5740874409675598, | |
| "learning_rate": 4.985415084590752e-05, | |
| "loss": 0.8062, | |
| "num_input_tokens_seen": 1163072, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 0.5089894533157349, | |
| "learning_rate": 4.9847295278964514e-05, | |
| "loss": 0.7432, | |
| "num_input_tokens_seen": 1193936, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.7231270670890808, | |
| "learning_rate": 4.984028276300021e-05, | |
| "loss": 0.7586, | |
| "num_input_tokens_seen": 1219696, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0368, | |
| "grad_norm": 0.6494696140289307, | |
| "learning_rate": 4.98331133423095e-05, | |
| "loss": 0.7532, | |
| "num_input_tokens_seen": 1248096, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0376, | |
| "grad_norm": 0.6063010692596436, | |
| "learning_rate": 4.9825787062178315e-05, | |
| "loss": 0.786, | |
| "num_input_tokens_seen": 1276624, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 0.8775933384895325, | |
| "learning_rate": 4.981830396888344e-05, | |
| "loss": 0.7947, | |
| "num_input_tokens_seen": 1303472, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0392, | |
| "grad_norm": 0.7558068633079529, | |
| "learning_rate": 4.981066410969215e-05, | |
| "loss": 0.6988, | |
| "num_input_tokens_seen": 1326816, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.8880596160888672, | |
| "learning_rate": 4.980286753286195e-05, | |
| "loss": 0.7078, | |
| "num_input_tokens_seen": 1351008, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0408, | |
| "grad_norm": 0.661428689956665, | |
| "learning_rate": 4.979491428764026e-05, | |
| "loss": 0.7491, | |
| "num_input_tokens_seen": 1374656, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 0.7624719738960266, | |
| "learning_rate": 4.9786804424264085e-05, | |
| "loss": 0.75, | |
| "num_input_tokens_seen": 1399264, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0424, | |
| "grad_norm": 0.6995192170143127, | |
| "learning_rate": 4.977853799395976e-05, | |
| "loss": 0.798, | |
| "num_input_tokens_seen": 1422304, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.0432, | |
| "grad_norm": 0.5227561593055725, | |
| "learning_rate": 4.977011504894252e-05, | |
| "loss": 0.8814, | |
| "num_input_tokens_seen": 1447184, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 0.7046292424201965, | |
| "learning_rate": 4.976153564241628e-05, | |
| "loss": 0.7203, | |
| "num_input_tokens_seen": 1474304, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 0.7567644119262695, | |
| "learning_rate": 4.975279982857324e-05, | |
| "loss": 0.6936, | |
| "num_input_tokens_seen": 1500896, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0456, | |
| "grad_norm": 0.6787880063056946, | |
| "learning_rate": 4.9743907662593524e-05, | |
| "loss": 0.7872, | |
| "num_input_tokens_seen": 1528688, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.0464, | |
| "grad_norm": 0.5113949775695801, | |
| "learning_rate": 4.9734859200644905e-05, | |
| "loss": 0.7517, | |
| "num_input_tokens_seen": 1561328, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.0472, | |
| "grad_norm": 0.7206217050552368, | |
| "learning_rate": 4.972565449988239e-05, | |
| "loss": 0.6726, | |
| "num_input_tokens_seen": 1589088, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.602922797203064, | |
| "learning_rate": 4.971629361844785e-05, | |
| "loss": 0.7259, | |
| "num_input_tokens_seen": 1615712, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0488, | |
| "grad_norm": 0.7673738598823547, | |
| "learning_rate": 4.9706776615469716e-05, | |
| "loss": 0.8337, | |
| "num_input_tokens_seen": 1638640, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.0496, | |
| "grad_norm": 0.7302682995796204, | |
| "learning_rate": 4.9697103551062556e-05, | |
| "loss": 0.731, | |
| "num_input_tokens_seen": 1664304, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0504, | |
| "grad_norm": 0.45416679978370667, | |
| "learning_rate": 4.968727448632669e-05, | |
| "loss": 0.7285, | |
| "num_input_tokens_seen": 1697648, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 0.5968911051750183, | |
| "learning_rate": 4.967728948334784e-05, | |
| "loss": 0.723, | |
| "num_input_tokens_seen": 1726608, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 0.6134063601493835, | |
| "learning_rate": 4.96671486051967e-05, | |
| "loss": 0.7918, | |
| "num_input_tokens_seen": 1750912, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0528, | |
| "grad_norm": 0.5388225317001343, | |
| "learning_rate": 4.965685191592859e-05, | |
| "loss": 0.6448, | |
| "num_input_tokens_seen": 1782912, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0536, | |
| "grad_norm": 0.6615162491798401, | |
| "learning_rate": 4.964639948058297e-05, | |
| "loss": 0.7874, | |
| "num_input_tokens_seen": 1804704, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 0.8656606078147888, | |
| "learning_rate": 4.963579136518312e-05, | |
| "loss": 0.7025, | |
| "num_input_tokens_seen": 1827248, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0552, | |
| "grad_norm": 0.7784980535507202, | |
| "learning_rate": 4.962502763673565e-05, | |
| "loss": 0.6676, | |
| "num_input_tokens_seen": 1854304, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.847607433795929, | |
| "learning_rate": 4.9614108363230135e-05, | |
| "loss": 0.7774, | |
| "num_input_tokens_seen": 1878768, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0568, | |
| "grad_norm": 0.5412896275520325, | |
| "learning_rate": 4.9603033613638626e-05, | |
| "loss": 0.7641, | |
| "num_input_tokens_seen": 1905744, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 0.5192331671714783, | |
| "learning_rate": 4.959180345791528e-05, | |
| "loss": 0.7169, | |
| "num_input_tokens_seen": 1931392, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0584, | |
| "grad_norm": 0.7992143630981445, | |
| "learning_rate": 4.958041796699583e-05, | |
| "loss": 0.7033, | |
| "num_input_tokens_seen": 1954304, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.0592, | |
| "grad_norm": 0.49692437052726746, | |
| "learning_rate": 4.956887721279726e-05, | |
| "loss": 0.6569, | |
| "num_input_tokens_seen": 1987264, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.7032391428947449, | |
| "learning_rate": 4.9557181268217227e-05, | |
| "loss": 0.7809, | |
| "num_input_tokens_seen": 2010160, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 0.780989944934845, | |
| "learning_rate": 4.9545330207133664e-05, | |
| "loss": 0.811, | |
| "num_input_tokens_seen": 2038880, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.0616, | |
| "grad_norm": 0.819433867931366, | |
| "learning_rate": 4.953332410440435e-05, | |
| "loss": 0.825, | |
| "num_input_tokens_seen": 2065344, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.0624, | |
| "grad_norm": 0.7076752781867981, | |
| "learning_rate": 4.952116303586631e-05, | |
| "loss": 0.7479, | |
| "num_input_tokens_seen": 2092064, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0632, | |
| "grad_norm": 0.6264218688011169, | |
| "learning_rate": 4.9508847078335495e-05, | |
| "loss": 0.7246, | |
| "num_input_tokens_seen": 2119360, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.5829480290412903, | |
| "learning_rate": 4.949637630960617e-05, | |
| "loss": 0.6956, | |
| "num_input_tokens_seen": 2146560, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0648, | |
| "grad_norm": 0.5653419494628906, | |
| "learning_rate": 4.94837508084505e-05, | |
| "loss": 0.7315, | |
| "num_input_tokens_seen": 2169232, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.0656, | |
| "grad_norm": 1.0192047357559204, | |
| "learning_rate": 4.947097065461801e-05, | |
| "loss": 0.7075, | |
| "num_input_tokens_seen": 2192224, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.0664, | |
| "grad_norm": 0.7392141819000244, | |
| "learning_rate": 4.945803592883509e-05, | |
| "loss": 0.811, | |
| "num_input_tokens_seen": 2216784, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 0.6470807194709778, | |
| "learning_rate": 4.9444946712804494e-05, | |
| "loss": 0.7835, | |
| "num_input_tokens_seen": 2243120, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 0.5305742025375366, | |
| "learning_rate": 4.943170308920484e-05, | |
| "loss": 0.7211, | |
| "num_input_tokens_seen": 2270896, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0688, | |
| "grad_norm": 0.8647666573524475, | |
| "learning_rate": 4.941830514169004e-05, | |
| "loss": 0.72, | |
| "num_input_tokens_seen": 2298528, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.0696, | |
| "grad_norm": 0.6244668364524841, | |
| "learning_rate": 4.9404752954888824e-05, | |
| "loss": 0.7206, | |
| "num_input_tokens_seen": 2328080, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 0.6552883386611938, | |
| "learning_rate": 4.939104661440415e-05, | |
| "loss": 0.8018, | |
| "num_input_tokens_seen": 2355776, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0712, | |
| "grad_norm": 0.8276055455207825, | |
| "learning_rate": 4.937718620681273e-05, | |
| "loss": 0.8267, | |
| "num_input_tokens_seen": 2379056, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.6930189728736877, | |
| "learning_rate": 4.9363171819664434e-05, | |
| "loss": 0.8961, | |
| "num_input_tokens_seen": 2401664, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0728, | |
| "grad_norm": 0.7441433668136597, | |
| "learning_rate": 4.934900354148173e-05, | |
| "loss": 0.6942, | |
| "num_input_tokens_seen": 2427456, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 0.5929616093635559, | |
| "learning_rate": 4.933468146175918e-05, | |
| "loss": 0.7874, | |
| "num_input_tokens_seen": 2450752, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0744, | |
| "grad_norm": 0.5789006948471069, | |
| "learning_rate": 4.9320205670962814e-05, | |
| "loss": 0.7162, | |
| "num_input_tokens_seen": 2473856, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.0752, | |
| "grad_norm": 0.6359069347381592, | |
| "learning_rate": 4.9305576260529607e-05, | |
| "loss": 0.7434, | |
| "num_input_tokens_seen": 2502928, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 0.6155191659927368, | |
| "learning_rate": 4.929079332286685e-05, | |
| "loss": 0.6932, | |
| "num_input_tokens_seen": 2536144, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 0.6511387228965759, | |
| "learning_rate": 4.927585695135162e-05, | |
| "loss": 0.8053, | |
| "num_input_tokens_seen": 2562688, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0776, | |
| "grad_norm": 0.5791414976119995, | |
| "learning_rate": 4.926076724033016e-05, | |
| "loss": 0.7482, | |
| "num_input_tokens_seen": 2594480, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.0784, | |
| "grad_norm": 0.5258495807647705, | |
| "learning_rate": 4.9245524285117274e-05, | |
| "loss": 0.7075, | |
| "num_input_tokens_seen": 2624736, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.0792, | |
| "grad_norm": 0.5191717743873596, | |
| "learning_rate": 4.923012818199576e-05, | |
| "loss": 0.6132, | |
| "num_input_tokens_seen": 2648880, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.8281647562980652, | |
| "learning_rate": 4.9214579028215776e-05, | |
| "loss": 0.6679, | |
| "num_input_tokens_seen": 2675888, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0808, | |
| "grad_norm": 0.588010847568512, | |
| "learning_rate": 4.919887692199423e-05, | |
| "loss": 0.7016, | |
| "num_input_tokens_seen": 2699392, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.0816, | |
| "grad_norm": 0.8409311771392822, | |
| "learning_rate": 4.918302196251415e-05, | |
| "loss": 0.7216, | |
| "num_input_tokens_seen": 2726432, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0824, | |
| "grad_norm": 0.6029579639434814, | |
| "learning_rate": 4.9167014249924075e-05, | |
| "loss": 0.6602, | |
| "num_input_tokens_seen": 2756336, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 0.7269614934921265, | |
| "learning_rate": 4.9150853885337426e-05, | |
| "loss": 0.6956, | |
| "num_input_tokens_seen": 2781648, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.5419861674308777, | |
| "learning_rate": 4.913454097083185e-05, | |
| "loss": 0.6427, | |
| "num_input_tokens_seen": 2810336, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.0848, | |
| "grad_norm": 0.9006750583648682, | |
| "learning_rate": 4.911807560944858e-05, | |
| "loss": 0.8328, | |
| "num_input_tokens_seen": 2836432, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0856, | |
| "grad_norm": 0.7180121541023254, | |
| "learning_rate": 4.9101457905191774e-05, | |
| "loss": 0.8104, | |
| "num_input_tokens_seen": 2863616, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 0.6724479794502258, | |
| "learning_rate": 4.9084687963027894e-05, | |
| "loss": 0.6858, | |
| "num_input_tokens_seen": 2891264, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0872, | |
| "grad_norm": 0.7073305249214172, | |
| "learning_rate": 4.906776588888502e-05, | |
| "loss": 0.7271, | |
| "num_input_tokens_seen": 2916256, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.7945154309272766, | |
| "learning_rate": 4.905069178965215e-05, | |
| "loss": 0.7527, | |
| "num_input_tokens_seen": 2944112, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0888, | |
| "grad_norm": 0.5791934728622437, | |
| "learning_rate": 4.903346577317859e-05, | |
| "loss": 0.7341, | |
| "num_input_tokens_seen": 2972512, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 0.8222031593322754, | |
| "learning_rate": 4.90160879482732e-05, | |
| "loss": 0.7339, | |
| "num_input_tokens_seen": 2997168, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.0904, | |
| "grad_norm": 0.6719418168067932, | |
| "learning_rate": 4.89985584247038e-05, | |
| "loss": 0.768, | |
| "num_input_tokens_seen": 3020880, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.0912, | |
| "grad_norm": 0.7740746140480042, | |
| "learning_rate": 4.898087731319636e-05, | |
| "loss": 0.7014, | |
| "num_input_tokens_seen": 3044224, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.5642164945602417, | |
| "learning_rate": 4.89630447254344e-05, | |
| "loss": 0.6203, | |
| "num_input_tokens_seen": 3071680, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 1.4719825983047485, | |
| "learning_rate": 4.894506077405824e-05, | |
| "loss": 0.7461, | |
| "num_input_tokens_seen": 3099088, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.0936, | |
| "grad_norm": 0.6961272954940796, | |
| "learning_rate": 4.892692557266429e-05, | |
| "loss": 0.7357, | |
| "num_input_tokens_seen": 3127728, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.0944, | |
| "grad_norm": 0.686820924282074, | |
| "learning_rate": 4.8908639235804324e-05, | |
| "loss": 0.7819, | |
| "num_input_tokens_seen": 3154336, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0952, | |
| "grad_norm": 0.7145109176635742, | |
| "learning_rate": 4.8890201878984796e-05, | |
| "loss": 0.7121, | |
| "num_input_tokens_seen": 3178768, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.6159213781356812, | |
| "learning_rate": 4.887161361866608e-05, | |
| "loss": 0.6698, | |
| "num_input_tokens_seen": 3211968, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0968, | |
| "grad_norm": 0.8054212927818298, | |
| "learning_rate": 4.885287457226172e-05, | |
| "loss": 0.7606, | |
| "num_input_tokens_seen": 3238272, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.0976, | |
| "grad_norm": 1.1526386737823486, | |
| "learning_rate": 4.8833984858137715e-05, | |
| "loss": 0.7694, | |
| "num_input_tokens_seen": 3270208, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.0984, | |
| "grad_norm": 0.5728780031204224, | |
| "learning_rate": 4.8814944595611776e-05, | |
| "loss": 0.7227, | |
| "num_input_tokens_seen": 3296192, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 0.6360820531845093, | |
| "learning_rate": 4.8795753904952534e-05, | |
| "loss": 0.7275, | |
| "num_input_tokens_seen": 3321232, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.6169213056564331, | |
| "learning_rate": 4.877641290737884e-05, | |
| "loss": 0.6746, | |
| "num_input_tokens_seen": 3343984, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.1008, | |
| "grad_norm": 0.8000876307487488, | |
| "learning_rate": 4.8756921725058934e-05, | |
| "loss": 0.8223, | |
| "num_input_tokens_seen": 3367824, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1016, | |
| "grad_norm": 0.5983218550682068, | |
| "learning_rate": 4.8737280481109724e-05, | |
| "loss": 0.8487, | |
| "num_input_tokens_seen": 3394800, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 0.9402346014976501, | |
| "learning_rate": 4.871748929959598e-05, | |
| "loss": 0.7441, | |
| "num_input_tokens_seen": 3421360, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1032, | |
| "grad_norm": 0.6266387104988098, | |
| "learning_rate": 4.869754830552956e-05, | |
| "loss": 0.7631, | |
| "num_input_tokens_seen": 3449584, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.7829232215881348, | |
| "learning_rate": 4.867745762486861e-05, | |
| "loss": 0.7793, | |
| "num_input_tokens_seen": 3477168, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1048, | |
| "grad_norm": 0.7125943303108215, | |
| "learning_rate": 4.86572173845168e-05, | |
| "loss": 0.7415, | |
| "num_input_tokens_seen": 3505056, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 0.6520003080368042, | |
| "learning_rate": 4.863682771232248e-05, | |
| "loss": 0.7157, | |
| "num_input_tokens_seen": 3534576, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1064, | |
| "grad_norm": 0.5907071828842163, | |
| "learning_rate": 4.861628873707792e-05, | |
| "loss": 0.7287, | |
| "num_input_tokens_seen": 3560688, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.1072, | |
| "grad_norm": 0.8829016089439392, | |
| "learning_rate": 4.859560058851844e-05, | |
| "loss": 0.7351, | |
| "num_input_tokens_seen": 3586176, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.917322039604187, | |
| "learning_rate": 4.8574763397321614e-05, | |
| "loss": 0.6213, | |
| "num_input_tokens_seen": 3615472, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 0.6344768404960632, | |
| "learning_rate": 4.855377729510648e-05, | |
| "loss": 0.729, | |
| "num_input_tokens_seen": 3638256, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1096, | |
| "grad_norm": 0.7305799722671509, | |
| "learning_rate": 4.8532642414432674e-05, | |
| "loss": 0.7242, | |
| "num_input_tokens_seen": 3667824, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.1104, | |
| "grad_norm": 0.7569555044174194, | |
| "learning_rate": 4.851135888879958e-05, | |
| "loss": 0.7831, | |
| "num_input_tokens_seen": 3695408, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.1112, | |
| "grad_norm": 0.7566932439804077, | |
| "learning_rate": 4.8489926852645505e-05, | |
| "loss": 0.7181, | |
| "num_input_tokens_seen": 3719888, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.7932357788085938, | |
| "learning_rate": 4.846834644134686e-05, | |
| "loss": 0.7961, | |
| "num_input_tokens_seen": 3744512, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1128, | |
| "grad_norm": 0.708210825920105, | |
| "learning_rate": 4.844661779121722e-05, | |
| "loss": 0.8362, | |
| "num_input_tokens_seen": 3771968, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.1136, | |
| "grad_norm": 0.7361094951629639, | |
| "learning_rate": 4.8424741039506575e-05, | |
| "loss": 0.7645, | |
| "num_input_tokens_seen": 3801680, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1144, | |
| "grad_norm": 0.48908814787864685, | |
| "learning_rate": 4.840271632440038e-05, | |
| "loss": 0.7042, | |
| "num_input_tokens_seen": 3833952, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 0.6167788505554199, | |
| "learning_rate": 4.8380543785018677e-05, | |
| "loss": 0.7476, | |
| "num_input_tokens_seen": 3860144, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 0.68650883436203, | |
| "learning_rate": 4.8358223561415304e-05, | |
| "loss": 0.7415, | |
| "num_input_tokens_seen": 3890304, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.1168, | |
| "grad_norm": 0.7059746384620667, | |
| "learning_rate": 4.833575579457691e-05, | |
| "loss": 0.6717, | |
| "num_input_tokens_seen": 3914560, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1176, | |
| "grad_norm": 0.8362336158752441, | |
| "learning_rate": 4.8313140626422125e-05, | |
| "loss": 0.7545, | |
| "num_input_tokens_seen": 3940128, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 0.5400592684745789, | |
| "learning_rate": 4.829037819980065e-05, | |
| "loss": 0.7809, | |
| "num_input_tokens_seen": 3970608, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1192, | |
| "grad_norm": 1.0431326627731323, | |
| "learning_rate": 4.8267468658492335e-05, | |
| "loss": 0.7904, | |
| "num_input_tokens_seen": 3996960, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.5358662605285645, | |
| "learning_rate": 4.8244412147206284e-05, | |
| "loss": 0.7688, | |
| "num_input_tokens_seen": 4021488, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1208, | |
| "grad_norm": 0.8147661685943604, | |
| "learning_rate": 4.822120881157998e-05, | |
| "loss": 0.7819, | |
| "num_input_tokens_seen": 4047136, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 0.6247139573097229, | |
| "learning_rate": 4.819785879817827e-05, | |
| "loss": 0.6757, | |
| "num_input_tokens_seen": 4072256, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.1224, | |
| "grad_norm": 0.8849884271621704, | |
| "learning_rate": 4.817436225449255e-05, | |
| "loss": 0.8952, | |
| "num_input_tokens_seen": 4095328, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1232, | |
| "grad_norm": 0.8693557977676392, | |
| "learning_rate": 4.8150719328939755e-05, | |
| "loss": 0.6998, | |
| "num_input_tokens_seen": 4118896, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.9492819905281067, | |
| "learning_rate": 4.812693017086145e-05, | |
| "loss": 0.7675, | |
| "num_input_tokens_seen": 4144576, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 0.8479375243186951, | |
| "learning_rate": 4.810299493052289e-05, | |
| "loss": 0.7332, | |
| "num_input_tokens_seen": 4172448, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1256, | |
| "grad_norm": 0.7956748008728027, | |
| "learning_rate": 4.8078913759112066e-05, | |
| "loss": 0.6942, | |
| "num_input_tokens_seen": 4196032, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.1264, | |
| "grad_norm": 0.6426162123680115, | |
| "learning_rate": 4.805468680873874e-05, | |
| "loss": 0.7536, | |
| "num_input_tokens_seen": 4224320, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1272, | |
| "grad_norm": 0.6501713991165161, | |
| "learning_rate": 4.803031423243349e-05, | |
| "loss": 0.6722, | |
| "num_input_tokens_seen": 4252752, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.773551881313324, | |
| "learning_rate": 4.800579618414676e-05, | |
| "loss": 0.7651, | |
| "num_input_tokens_seen": 4278480, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1288, | |
| "grad_norm": 0.6473078727722168, | |
| "learning_rate": 4.7981132818747876e-05, | |
| "loss": 0.6626, | |
| "num_input_tokens_seen": 4305920, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.1296, | |
| "grad_norm": 0.5944277048110962, | |
| "learning_rate": 4.795632429202405e-05, | |
| "loss": 0.8511, | |
| "num_input_tokens_seen": 4330448, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1304, | |
| "grad_norm": 0.6878964900970459, | |
| "learning_rate": 4.793137076067942e-05, | |
| "loss": 0.7524, | |
| "num_input_tokens_seen": 4356880, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.1312, | |
| "grad_norm": 0.9247101545333862, | |
| "learning_rate": 4.790627238233405e-05, | |
| "loss": 0.8498, | |
| "num_input_tokens_seen": 4383744, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.9401747584342957, | |
| "learning_rate": 4.788102931552294e-05, | |
| "loss": 0.647, | |
| "num_input_tokens_seen": 4411120, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.1328, | |
| "grad_norm": 0.765521764755249, | |
| "learning_rate": 4.7855641719695023e-05, | |
| "loss": 0.7766, | |
| "num_input_tokens_seen": 4435920, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1336, | |
| "grad_norm": 0.8985924124717712, | |
| "learning_rate": 4.783010975521216e-05, | |
| "loss": 0.7426, | |
| "num_input_tokens_seen": 4462768, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 0.8223104476928711, | |
| "learning_rate": 4.78044335833481e-05, | |
| "loss": 0.6919, | |
| "num_input_tokens_seen": 4493232, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.1352, | |
| "grad_norm": 0.6721159219741821, | |
| "learning_rate": 4.7778613366287505e-05, | |
| "loss": 0.7221, | |
| "num_input_tokens_seen": 4520048, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.6136463284492493, | |
| "learning_rate": 4.775264926712489e-05, | |
| "loss": 0.7344, | |
| "num_input_tokens_seen": 4545984, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1368, | |
| "grad_norm": 0.7662776708602905, | |
| "learning_rate": 4.772654144986364e-05, | |
| "loss": 0.6648, | |
| "num_input_tokens_seen": 4577296, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.1376, | |
| "grad_norm": 0.8455452919006348, | |
| "learning_rate": 4.7700290079414896e-05, | |
| "loss": 0.7513, | |
| "num_input_tokens_seen": 4602272, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.1384, | |
| "grad_norm": 0.8613116145133972, | |
| "learning_rate": 4.767389532159659e-05, | |
| "loss": 0.7792, | |
| "num_input_tokens_seen": 4631008, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.1392, | |
| "grad_norm": 0.5791791677474976, | |
| "learning_rate": 4.764735734313236e-05, | |
| "loss": 0.7529, | |
| "num_input_tokens_seen": 4660112, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.8197824954986572, | |
| "learning_rate": 4.762067631165049e-05, | |
| "loss": 0.6728, | |
| "num_input_tokens_seen": 4689504, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 0.9617213606834412, | |
| "learning_rate": 4.759385239568289e-05, | |
| "loss": 0.6935, | |
| "num_input_tokens_seen": 4715312, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1416, | |
| "grad_norm": 0.7933773398399353, | |
| "learning_rate": 4.756688576466398e-05, | |
| "loss": 0.8062, | |
| "num_input_tokens_seen": 4735936, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.1424, | |
| "grad_norm": 0.975724458694458, | |
| "learning_rate": 4.753977658892967e-05, | |
| "loss": 0.7149, | |
| "num_input_tokens_seen": 4760256, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.1432, | |
| "grad_norm": 0.755167543888092, | |
| "learning_rate": 4.751252503971624e-05, | |
| "loss": 0.7062, | |
| "num_input_tokens_seen": 4789264, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.84686279296875, | |
| "learning_rate": 4.7485131289159276e-05, | |
| "loss": 0.837, | |
| "num_input_tokens_seen": 4815344, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1448, | |
| "grad_norm": 0.9440627098083496, | |
| "learning_rate": 4.745759551029261e-05, | |
| "loss": 0.6907, | |
| "num_input_tokens_seen": 4840528, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.1456, | |
| "grad_norm": 0.7293935418128967, | |
| "learning_rate": 4.742991787704719e-05, | |
| "loss": 0.7192, | |
| "num_input_tokens_seen": 4868032, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1464, | |
| "grad_norm": 0.6401370763778687, | |
| "learning_rate": 4.7402098564249974e-05, | |
| "loss": 0.7223, | |
| "num_input_tokens_seen": 4893376, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 0.8882667422294617, | |
| "learning_rate": 4.737413774762287e-05, | |
| "loss": 0.6847, | |
| "num_input_tokens_seen": 4918288, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.7663973569869995, | |
| "learning_rate": 4.73460356037816e-05, | |
| "loss": 0.7273, | |
| "num_input_tokens_seen": 4944688, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.1488, | |
| "grad_norm": 0.7966660857200623, | |
| "learning_rate": 4.731779231023456e-05, | |
| "loss": 0.7087, | |
| "num_input_tokens_seen": 4969744, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.1496, | |
| "grad_norm": 1.3590271472930908, | |
| "learning_rate": 4.728940804538176e-05, | |
| "loss": 0.7771, | |
| "num_input_tokens_seen": 4997072, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.1504, | |
| "grad_norm": 0.8245935440063477, | |
| "learning_rate": 4.7260882988513624e-05, | |
| "loss": 0.7598, | |
| "num_input_tokens_seen": 5024672, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1512, | |
| "grad_norm": 0.6972777247428894, | |
| "learning_rate": 4.723221731980993e-05, | |
| "loss": 0.7961, | |
| "num_input_tokens_seen": 5051952, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.7691385746002197, | |
| "learning_rate": 4.720341122033862e-05, | |
| "loss": 0.773, | |
| "num_input_tokens_seen": 5074528, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1528, | |
| "grad_norm": 1.4361584186553955, | |
| "learning_rate": 4.717446487205466e-05, | |
| "loss": 0.7216, | |
| "num_input_tokens_seen": 5099840, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 0.9640448689460754, | |
| "learning_rate": 4.714537845779894e-05, | |
| "loss": 0.6569, | |
| "num_input_tokens_seen": 5122848, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.1544, | |
| "grad_norm": 0.8036720156669617, | |
| "learning_rate": 4.7116152161297045e-05, | |
| "loss": 0.7994, | |
| "num_input_tokens_seen": 5152320, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.1552, | |
| "grad_norm": 0.7904760241508484, | |
| "learning_rate": 4.708678616715815e-05, | |
| "loss": 0.7259, | |
| "num_input_tokens_seen": 5178816, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.7007213830947876, | |
| "learning_rate": 4.7057280660873835e-05, | |
| "loss": 0.747, | |
| "num_input_tokens_seen": 5208112, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.1568, | |
| "grad_norm": 0.8959905505180359, | |
| "learning_rate": 4.702763582881692e-05, | |
| "loss": 0.8487, | |
| "num_input_tokens_seen": 5231200, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1576, | |
| "grad_norm": 0.8828222155570984, | |
| "learning_rate": 4.699785185824026e-05, | |
| "loss": 0.7654, | |
| "num_input_tokens_seen": 5257312, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.1584, | |
| "grad_norm": 1.0120501518249512, | |
| "learning_rate": 4.696792893727562e-05, | |
| "loss": 0.7748, | |
| "num_input_tokens_seen": 5280288, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.1592, | |
| "grad_norm": 0.8728295564651489, | |
| "learning_rate": 4.693786725493242e-05, | |
| "loss": 0.6957, | |
| "num_input_tokens_seen": 5308272, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.252682089805603, | |
| "learning_rate": 4.690766700109659e-05, | |
| "loss": 0.7418, | |
| "num_input_tokens_seen": 5335568, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1608, | |
| "grad_norm": 1.0933836698532104, | |
| "learning_rate": 4.6877328366529346e-05, | |
| "loss": 0.8225, | |
| "num_input_tokens_seen": 5361872, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.1616, | |
| "grad_norm": 0.8167702555656433, | |
| "learning_rate": 4.684685154286599e-05, | |
| "loss": 0.8552, | |
| "num_input_tokens_seen": 5387456, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.1624, | |
| "grad_norm": 0.6640987396240234, | |
| "learning_rate": 4.681623672261469e-05, | |
| "loss": 0.6654, | |
| "num_input_tokens_seen": 5411472, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.1632, | |
| "grad_norm": 0.9311304688453674, | |
| "learning_rate": 4.678548409915532e-05, | |
| "loss": 0.7339, | |
| "num_input_tokens_seen": 5439648, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.9733570218086243, | |
| "learning_rate": 4.675459386673815e-05, | |
| "loss": 0.7324, | |
| "num_input_tokens_seen": 5468416, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.1648, | |
| "grad_norm": 0.6521669030189514, | |
| "learning_rate": 4.6723566220482664e-05, | |
| "loss": 0.7065, | |
| "num_input_tokens_seen": 5498800, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1656, | |
| "grad_norm": 0.6702024340629578, | |
| "learning_rate": 4.669240135637635e-05, | |
| "loss": 0.6822, | |
| "num_input_tokens_seen": 5527856, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 1.3757870197296143, | |
| "learning_rate": 4.666109947127343e-05, | |
| "loss": 0.7554, | |
| "num_input_tokens_seen": 5550848, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.1672, | |
| "grad_norm": 0.7441242933273315, | |
| "learning_rate": 4.662966076289362e-05, | |
| "loss": 0.6784, | |
| "num_input_tokens_seen": 5581552, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.7709234356880188, | |
| "learning_rate": 4.659808542982088e-05, | |
| "loss": 0.8294, | |
| "num_input_tokens_seen": 5604288, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1688, | |
| "grad_norm": 0.5358073115348816, | |
| "learning_rate": 4.6566373671502196e-05, | |
| "loss": 0.6633, | |
| "num_input_tokens_seen": 5630336, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.1696, | |
| "grad_norm": 0.5856006741523743, | |
| "learning_rate": 4.653452568824625e-05, | |
| "loss": 0.6684, | |
| "num_input_tokens_seen": 5662480, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.1704, | |
| "grad_norm": 0.7003797292709351, | |
| "learning_rate": 4.650254168122222e-05, | |
| "loss": 0.7109, | |
| "num_input_tokens_seen": 5687376, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.1712, | |
| "grad_norm": 0.7874431014060974, | |
| "learning_rate": 4.647042185245847e-05, | |
| "loss": 0.8036, | |
| "num_input_tokens_seen": 5714896, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.6988087296485901, | |
| "learning_rate": 4.643816640484131e-05, | |
| "loss": 0.6575, | |
| "num_input_tokens_seen": 5740192, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 0.982477068901062, | |
| "learning_rate": 4.640577554211366e-05, | |
| "loss": 0.7477, | |
| "num_input_tokens_seen": 5768656, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1736, | |
| "grad_norm": 1.1265698671340942, | |
| "learning_rate": 4.6373249468873833e-05, | |
| "loss": 0.7555, | |
| "num_input_tokens_seen": 5794576, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.1744, | |
| "grad_norm": 0.6747913360595703, | |
| "learning_rate": 4.634058839057417e-05, | |
| "loss": 0.6695, | |
| "num_input_tokens_seen": 5823296, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.1752, | |
| "grad_norm": 0.8027223348617554, | |
| "learning_rate": 4.63077925135198e-05, | |
| "loss": 0.6948, | |
| "num_input_tokens_seen": 5846928, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.7862293720245361, | |
| "learning_rate": 4.6274862044867304e-05, | |
| "loss": 0.7728, | |
| "num_input_tokens_seen": 5871968, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1768, | |
| "grad_norm": 0.7790197134017944, | |
| "learning_rate": 4.624179719262342e-05, | |
| "loss": 0.765, | |
| "num_input_tokens_seen": 5900304, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.1776, | |
| "grad_norm": 0.8996221423149109, | |
| "learning_rate": 4.6208598165643715e-05, | |
| "loss": 0.6515, | |
| "num_input_tokens_seen": 5925792, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1784, | |
| "grad_norm": 0.7972677946090698, | |
| "learning_rate": 4.61752651736313e-05, | |
| "loss": 0.75, | |
| "num_input_tokens_seen": 5950672, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 0.6896753907203674, | |
| "learning_rate": 4.614179842713547e-05, | |
| "loss": 0.6592, | |
| "num_input_tokens_seen": 5985552, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.823128342628479, | |
| "learning_rate": 4.610819813755038e-05, | |
| "loss": 0.8463, | |
| "num_input_tokens_seen": 6009904, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.1808, | |
| "grad_norm": 0.8550837635993958, | |
| "learning_rate": 4.607446451711372e-05, | |
| "loss": 0.7349, | |
| "num_input_tokens_seen": 6034160, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.1816, | |
| "grad_norm": 0.8120406270027161, | |
| "learning_rate": 4.604059777890537e-05, | |
| "loss": 0.6396, | |
| "num_input_tokens_seen": 6056544, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.1824, | |
| "grad_norm": 0.6196752786636353, | |
| "learning_rate": 4.6006598136846056e-05, | |
| "loss": 0.6164, | |
| "num_input_tokens_seen": 6083920, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1832, | |
| "grad_norm": 0.6641353368759155, | |
| "learning_rate": 4.5972465805695996e-05, | |
| "loss": 0.6775, | |
| "num_input_tokens_seen": 6111520, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.7323867082595825, | |
| "learning_rate": 4.593820100105355e-05, | |
| "loss": 0.6295, | |
| "num_input_tokens_seen": 6141056, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1848, | |
| "grad_norm": 0.6919586658477783, | |
| "learning_rate": 4.590380393935383e-05, | |
| "loss": 0.7429, | |
| "num_input_tokens_seen": 6163408, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 0.9530206322669983, | |
| "learning_rate": 4.5869274837867394e-05, | |
| "loss": 0.7516, | |
| "num_input_tokens_seen": 6188816, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.1864, | |
| "grad_norm": 0.9966915845870972, | |
| "learning_rate": 4.583461391469879e-05, | |
| "loss": 0.7524, | |
| "num_input_tokens_seen": 6216800, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.1872, | |
| "grad_norm": 1.096708059310913, | |
| "learning_rate": 4.579982138878527e-05, | |
| "loss": 0.7337, | |
| "num_input_tokens_seen": 6245888, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.8707526326179504, | |
| "learning_rate": 4.5764897479895317e-05, | |
| "loss": 0.7891, | |
| "num_input_tokens_seen": 6275120, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.1888, | |
| "grad_norm": 0.7489879727363586, | |
| "learning_rate": 4.5729842408627334e-05, | |
| "loss": 0.79, | |
| "num_input_tokens_seen": 6299760, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1896, | |
| "grad_norm": 0.7835171222686768, | |
| "learning_rate": 4.5694656396408195e-05, | |
| "loss": 0.7506, | |
| "num_input_tokens_seen": 6326720, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.1904, | |
| "grad_norm": 0.7588552832603455, | |
| "learning_rate": 4.565933966549189e-05, | |
| "loss": 0.6294, | |
| "num_input_tokens_seen": 6353728, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.1912, | |
| "grad_norm": 0.6706573367118835, | |
| "learning_rate": 4.5623892438958074e-05, | |
| "loss": 0.7564, | |
| "num_input_tokens_seen": 6379536, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.7340586185455322, | |
| "learning_rate": 4.558831494071069e-05, | |
| "loss": 0.7683, | |
| "num_input_tokens_seen": 6407152, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1928, | |
| "grad_norm": 0.735789954662323, | |
| "learning_rate": 4.555260739547657e-05, | |
| "loss": 0.7701, | |
| "num_input_tokens_seen": 6434480, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.1936, | |
| "grad_norm": 0.8325262069702148, | |
| "learning_rate": 4.5516770028803954e-05, | |
| "loss": 0.694, | |
| "num_input_tokens_seen": 6463424, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.1944, | |
| "grad_norm": 0.7930346727371216, | |
| "learning_rate": 4.548080306706114e-05, | |
| "loss": 0.7322, | |
| "num_input_tokens_seen": 6487136, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.1952, | |
| "grad_norm": 0.7683930397033691, | |
| "learning_rate": 4.5444706737435014e-05, | |
| "loss": 0.7616, | |
| "num_input_tokens_seen": 6513120, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.600136399269104, | |
| "learning_rate": 4.5408481267929605e-05, | |
| "loss": 0.6743, | |
| "num_input_tokens_seen": 6543040, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.1968, | |
| "grad_norm": 0.9069085121154785, | |
| "learning_rate": 4.5372126887364655e-05, | |
| "loss": 0.7377, | |
| "num_input_tokens_seen": 6572432, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1976, | |
| "grad_norm": 0.9226580262184143, | |
| "learning_rate": 4.533564382537421e-05, | |
| "loss": 0.7766, | |
| "num_input_tokens_seen": 6593136, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 0.7376300096511841, | |
| "learning_rate": 4.529903231240511e-05, | |
| "loss": 0.7873, | |
| "num_input_tokens_seen": 6621024, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.1992, | |
| "grad_norm": 0.6371731162071228, | |
| "learning_rate": 4.5262292579715556e-05, | |
| "loss": 0.7096, | |
| "num_input_tokens_seen": 6646480, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.8643271327018738, | |
| "learning_rate": 4.522542485937369e-05, | |
| "loss": 0.8187, | |
| "num_input_tokens_seen": 6674032, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2008, | |
| "grad_norm": 0.8012109398841858, | |
| "learning_rate": 4.518842938425605e-05, | |
| "loss": 0.772, | |
| "num_input_tokens_seen": 6700112, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.2016, | |
| "grad_norm": 0.7719143033027649, | |
| "learning_rate": 4.5151306388046175e-05, | |
| "loss": 0.6796, | |
| "num_input_tokens_seen": 6727008, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.2024, | |
| "grad_norm": 0.8668113946914673, | |
| "learning_rate": 4.511405610523309e-05, | |
| "loss": 0.7177, | |
| "num_input_tokens_seen": 6752768, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.2032, | |
| "grad_norm": 0.8964220285415649, | |
| "learning_rate": 4.5076678771109815e-05, | |
| "loss": 0.7078, | |
| "num_input_tokens_seen": 6778112, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.7097613215446472, | |
| "learning_rate": 4.503917462177192e-05, | |
| "loss": 0.6496, | |
| "num_input_tokens_seen": 6804432, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 0.842675507068634, | |
| "learning_rate": 4.5001543894115975e-05, | |
| "loss": 0.6802, | |
| "num_input_tokens_seen": 6829824, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2056, | |
| "grad_norm": 0.7390193343162537, | |
| "learning_rate": 4.496378682583813e-05, | |
| "loss": 0.7187, | |
| "num_input_tokens_seen": 6858480, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.2064, | |
| "grad_norm": 0.5758505463600159, | |
| "learning_rate": 4.492590365543253e-05, | |
| "loss": 0.6198, | |
| "num_input_tokens_seen": 6886960, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2072, | |
| "grad_norm": 0.9554662108421326, | |
| "learning_rate": 4.488789462218987e-05, | |
| "loss": 0.6105, | |
| "num_input_tokens_seen": 6912560, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.9423254728317261, | |
| "learning_rate": 4.484975996619589e-05, | |
| "loss": 0.7671, | |
| "num_input_tokens_seen": 6938912, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2088, | |
| "grad_norm": 0.7120509743690491, | |
| "learning_rate": 4.481149992832977e-05, | |
| "loss": 0.6833, | |
| "num_input_tokens_seen": 6967616, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.2096, | |
| "grad_norm": 0.9409400224685669, | |
| "learning_rate": 4.477311475026271e-05, | |
| "loss": 0.7547, | |
| "num_input_tokens_seen": 6993872, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.2104, | |
| "grad_norm": 0.8102442026138306, | |
| "learning_rate": 4.473460467445637e-05, | |
| "loss": 0.7479, | |
| "num_input_tokens_seen": 7020784, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 0.787486732006073, | |
| "learning_rate": 4.46959699441613e-05, | |
| "loss": 0.761, | |
| "num_input_tokens_seen": 7045024, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.8877683877944946, | |
| "learning_rate": 4.465721080341547e-05, | |
| "loss": 0.7612, | |
| "num_input_tokens_seen": 7072448, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.2128, | |
| "grad_norm": 0.7483372688293457, | |
| "learning_rate": 4.461832749704268e-05, | |
| "loss": 0.6792, | |
| "num_input_tokens_seen": 7097776, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.2136, | |
| "grad_norm": 0.7852973341941833, | |
| "learning_rate": 4.457932027065102e-05, | |
| "loss": 0.7357, | |
| "num_input_tokens_seen": 7123568, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.2144, | |
| "grad_norm": 0.7306565642356873, | |
| "learning_rate": 4.4540189370631315e-05, | |
| "loss": 0.6676, | |
| "num_input_tokens_seen": 7151728, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.2152, | |
| "grad_norm": 0.7990534901618958, | |
| "learning_rate": 4.4500935044155626e-05, | |
| "loss": 0.7394, | |
| "num_input_tokens_seen": 7181664, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 1.287644863128662, | |
| "learning_rate": 4.4461557539175594e-05, | |
| "loss": 0.8017, | |
| "num_input_tokens_seen": 7210336, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2168, | |
| "grad_norm": 0.7476962208747864, | |
| "learning_rate": 4.4422057104420946e-05, | |
| "loss": 0.6533, | |
| "num_input_tokens_seen": 7240992, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 0.8233410120010376, | |
| "learning_rate": 4.4382433989397895e-05, | |
| "loss": 0.7029, | |
| "num_input_tokens_seen": 7268048, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.2184, | |
| "grad_norm": 0.609846293926239, | |
| "learning_rate": 4.434268844438758e-05, | |
| "loss": 0.7096, | |
| "num_input_tokens_seen": 7297616, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.2192, | |
| "grad_norm": 1.010886549949646, | |
| "learning_rate": 4.4302820720444456e-05, | |
| "loss": 0.8103, | |
| "num_input_tokens_seen": 7326912, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.7681688070297241, | |
| "learning_rate": 4.426283106939474e-05, | |
| "loss": 0.6238, | |
| "num_input_tokens_seen": 7355136, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.2208, | |
| "grad_norm": 0.7759270071983337, | |
| "learning_rate": 4.422271974383479e-05, | |
| "loss": 0.6625, | |
| "num_input_tokens_seen": 7377584, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.2216, | |
| "grad_norm": 0.831362783908844, | |
| "learning_rate": 4.418248699712955e-05, | |
| "loss": 0.6831, | |
| "num_input_tokens_seen": 7405552, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.2224, | |
| "grad_norm": 0.7530121207237244, | |
| "learning_rate": 4.414213308341092e-05, | |
| "loss": 0.7664, | |
| "num_input_tokens_seen": 7430960, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.2232, | |
| "grad_norm": 0.8572810292243958, | |
| "learning_rate": 4.410165825757613e-05, | |
| "loss": 0.7273, | |
| "num_input_tokens_seen": 7457136, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.7553160190582275, | |
| "learning_rate": 4.40610627752862e-05, | |
| "loss": 0.6607, | |
| "num_input_tokens_seen": 7482208, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2248, | |
| "grad_norm": 0.6897515058517456, | |
| "learning_rate": 4.4020346892964246e-05, | |
| "loss": 0.731, | |
| "num_input_tokens_seen": 7515760, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.2256, | |
| "grad_norm": 0.7974178791046143, | |
| "learning_rate": 4.3979510867793917e-05, | |
| "loss": 0.7258, | |
| "num_input_tokens_seen": 7542944, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.2264, | |
| "grad_norm": 0.8745766282081604, | |
| "learning_rate": 4.393855495771774e-05, | |
| "loss": 0.6566, | |
| "num_input_tokens_seen": 7573760, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.2272, | |
| "grad_norm": 0.749857485294342, | |
| "learning_rate": 4.38974794214355e-05, | |
| "loss": 0.7433, | |
| "num_input_tokens_seen": 7606592, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.7722298502922058, | |
| "learning_rate": 4.3856284518402594e-05, | |
| "loss": 0.7452, | |
| "num_input_tokens_seen": 7628672, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.2288, | |
| "grad_norm": 0.8768362998962402, | |
| "learning_rate": 4.381497050882845e-05, | |
| "loss": 0.7077, | |
| "num_input_tokens_seen": 7658528, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.2296, | |
| "grad_norm": 0.7979273796081543, | |
| "learning_rate": 4.377353765367479e-05, | |
| "loss": 0.6274, | |
| "num_input_tokens_seen": 7685248, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 0.988314151763916, | |
| "learning_rate": 4.3731986214654035e-05, | |
| "loss": 0.6845, | |
| "num_input_tokens_seen": 7713616, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.2312, | |
| "grad_norm": 0.7991346120834351, | |
| "learning_rate": 4.3690316454227674e-05, | |
| "loss": 0.7115, | |
| "num_input_tokens_seen": 7740304, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 1.072383999824524, | |
| "learning_rate": 4.3648528635604556e-05, | |
| "loss": 0.7209, | |
| "num_input_tokens_seen": 7766848, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2328, | |
| "grad_norm": 1.357325792312622, | |
| "learning_rate": 4.360662302273924e-05, | |
| "loss": 0.8239, | |
| "num_input_tokens_seen": 7791888, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.2336, | |
| "grad_norm": 0.6083495020866394, | |
| "learning_rate": 4.3564599880330385e-05, | |
| "loss": 0.6199, | |
| "num_input_tokens_seen": 7822448, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.2344, | |
| "grad_norm": 0.7359746098518372, | |
| "learning_rate": 4.352245947381898e-05, | |
| "loss": 0.7481, | |
| "num_input_tokens_seen": 7848464, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.2352, | |
| "grad_norm": 0.9160847067832947, | |
| "learning_rate": 4.348020206938672e-05, | |
| "loss": 0.7235, | |
| "num_input_tokens_seen": 7877216, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 0.7445215582847595, | |
| "learning_rate": 4.343782793395435e-05, | |
| "loss": 0.7345, | |
| "num_input_tokens_seen": 7904368, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 0.8324536681175232, | |
| "learning_rate": 4.3395337335179945e-05, | |
| "loss": 0.7532, | |
| "num_input_tokens_seen": 7931520, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2376, | |
| "grad_norm": 1.0249683856964111, | |
| "learning_rate": 4.335273054145722e-05, | |
| "loss": 0.6902, | |
| "num_input_tokens_seen": 7953296, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.2384, | |
| "grad_norm": 0.6565669775009155, | |
| "learning_rate": 4.3310007821913836e-05, | |
| "loss": 0.7329, | |
| "num_input_tokens_seen": 7978832, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.2392, | |
| "grad_norm": 0.8256237506866455, | |
| "learning_rate": 4.32671694464097e-05, | |
| "loss": 0.6693, | |
| "num_input_tokens_seen": 8004992, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9722650051116943, | |
| "learning_rate": 4.3224215685535294e-05, | |
| "loss": 0.7418, | |
| "num_input_tokens_seen": 8027824, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2408, | |
| "grad_norm": 0.599818229675293, | |
| "learning_rate": 4.31811468106099e-05, | |
| "loss": 0.6157, | |
| "num_input_tokens_seen": 8058528, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.2416, | |
| "grad_norm": 1.0976861715316772, | |
| "learning_rate": 4.3137963093679945e-05, | |
| "loss": 0.6302, | |
| "num_input_tokens_seen": 8081984, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.2424, | |
| "grad_norm": 0.5699600577354431, | |
| "learning_rate": 4.309466480751726e-05, | |
| "loss": 0.628, | |
| "num_input_tokens_seen": 8113216, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 0.8899049758911133, | |
| "learning_rate": 4.305125222561736e-05, | |
| "loss": 0.635, | |
| "num_input_tokens_seen": 8142080, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.9494242072105408, | |
| "learning_rate": 4.3007725622197674e-05, | |
| "loss": 0.8114, | |
| "num_input_tokens_seen": 8171008, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.2448, | |
| "grad_norm": 0.9237959384918213, | |
| "learning_rate": 4.296408527219592e-05, | |
| "loss": 0.6678, | |
| "num_input_tokens_seen": 8197696, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.2456, | |
| "grad_norm": 0.8756378889083862, | |
| "learning_rate": 4.292033145126825e-05, | |
| "loss": 0.8364, | |
| "num_input_tokens_seen": 8225552, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.2464, | |
| "grad_norm": 0.9631836414337158, | |
| "learning_rate": 4.287646443578758e-05, | |
| "loss": 0.7312, | |
| "num_input_tokens_seen": 8257120, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2472, | |
| "grad_norm": 0.920713484287262, | |
| "learning_rate": 4.283248450284182e-05, | |
| "loss": 0.8067, | |
| "num_input_tokens_seen": 8282400, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 1.0773414373397827, | |
| "learning_rate": 4.2788391930232136e-05, | |
| "loss": 0.7109, | |
| "num_input_tokens_seen": 8309568, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.2488, | |
| "grad_norm": 0.5621623396873474, | |
| "learning_rate": 4.2744186996471174e-05, | |
| "loss": 0.6543, | |
| "num_input_tokens_seen": 8338864, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 0.8737258315086365, | |
| "learning_rate": 4.269986998078132e-05, | |
| "loss": 0.7401, | |
| "num_input_tokens_seen": 8364592, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.2504, | |
| "grad_norm": 0.8454060554504395, | |
| "learning_rate": 4.265544116309294e-05, | |
| "loss": 0.7538, | |
| "num_input_tokens_seen": 8391120, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.2512, | |
| "grad_norm": 0.8107228875160217, | |
| "learning_rate": 4.261090082404258e-05, | |
| "loss": 0.7705, | |
| "num_input_tokens_seen": 8418320, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.7339603304862976, | |
| "learning_rate": 4.256624924497123e-05, | |
| "loss": 0.6846, | |
| "num_input_tokens_seen": 8446640, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.2528, | |
| "grad_norm": 1.0036543607711792, | |
| "learning_rate": 4.252148670792254e-05, | |
| "loss": 0.8502, | |
| "num_input_tokens_seen": 8470416, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2536, | |
| "grad_norm": 0.8186982870101929, | |
| "learning_rate": 4.2476613495641026e-05, | |
| "loss": 0.6987, | |
| "num_input_tokens_seen": 8498160, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.2544, | |
| "grad_norm": 0.9724487066268921, | |
| "learning_rate": 4.2431629891570266e-05, | |
| "loss": 0.6461, | |
| "num_input_tokens_seen": 8525904, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2552, | |
| "grad_norm": 0.5958553552627563, | |
| "learning_rate": 4.238653617985118e-05, | |
| "loss": 0.7143, | |
| "num_input_tokens_seen": 8551872, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 1.0192784070968628, | |
| "learning_rate": 4.234133264532012e-05, | |
| "loss": 0.7215, | |
| "num_input_tokens_seen": 8583440, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2568, | |
| "grad_norm": 0.7806874513626099, | |
| "learning_rate": 4.229601957350722e-05, | |
| "loss": 0.8008, | |
| "num_input_tokens_seen": 8609632, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.2576, | |
| "grad_norm": 1.086475133895874, | |
| "learning_rate": 4.225059725063444e-05, | |
| "loss": 0.6612, | |
| "num_input_tokens_seen": 8633888, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2584, | |
| "grad_norm": 0.6213988065719604, | |
| "learning_rate": 4.2205065963613864e-05, | |
| "loss": 0.7544, | |
| "num_input_tokens_seen": 8660288, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.2592, | |
| "grad_norm": 1.0608100891113281, | |
| "learning_rate": 4.2159426000045854e-05, | |
| "loss": 0.7569, | |
| "num_input_tokens_seen": 8689184, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.7601464986801147, | |
| "learning_rate": 4.211367764821722e-05, | |
| "loss": 0.8161, | |
| "num_input_tokens_seen": 8713504, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.2608, | |
| "grad_norm": 0.9310168623924255, | |
| "learning_rate": 4.206782119709942e-05, | |
| "loss": 0.8283, | |
| "num_input_tokens_seen": 8741088, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.2616, | |
| "grad_norm": 0.6408126354217529, | |
| "learning_rate": 4.20218569363467e-05, | |
| "loss": 0.5745, | |
| "num_input_tokens_seen": 8767456, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 1.1697090864181519, | |
| "learning_rate": 4.197578515629435e-05, | |
| "loss": 0.7525, | |
| "num_input_tokens_seen": 8791952, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2632, | |
| "grad_norm": 0.9160236716270447, | |
| "learning_rate": 4.192960614795675e-05, | |
| "loss": 0.7991, | |
| "num_input_tokens_seen": 8816080, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 1.0530091524124146, | |
| "learning_rate": 4.188332020302561e-05, | |
| "loss": 0.7297, | |
| "num_input_tokens_seen": 8841536, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2648, | |
| "grad_norm": 0.8888834118843079, | |
| "learning_rate": 4.183692761386813e-05, | |
| "loss": 0.6276, | |
| "num_input_tokens_seen": 8869872, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.2656, | |
| "grad_norm": 0.6144154667854309, | |
| "learning_rate": 4.179042867352511e-05, | |
| "loss": 0.7127, | |
| "num_input_tokens_seen": 8893152, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2664, | |
| "grad_norm": 0.814166247844696, | |
| "learning_rate": 4.174382367570912e-05, | |
| "loss": 0.7712, | |
| "num_input_tokens_seen": 8923040, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.2672, | |
| "grad_norm": 0.8960988521575928, | |
| "learning_rate": 4.169711291480266e-05, | |
| "loss": 0.8388, | |
| "num_input_tokens_seen": 8945856, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 0.8164514303207397, | |
| "learning_rate": 4.165029668585629e-05, | |
| "loss": 0.7538, | |
| "num_input_tokens_seen": 8971664, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 0.8044324517250061, | |
| "learning_rate": 4.160337528458676e-05, | |
| "loss": 0.708, | |
| "num_input_tokens_seen": 8996064, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.2696, | |
| "grad_norm": 0.7704948782920837, | |
| "learning_rate": 4.155634900737513e-05, | |
| "loss": 0.668, | |
| "num_input_tokens_seen": 9022416, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.2704, | |
| "grad_norm": 0.8315603137016296, | |
| "learning_rate": 4.150921815126493e-05, | |
| "loss": 0.752, | |
| "num_input_tokens_seen": 9052480, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.2712, | |
| "grad_norm": 0.7516652345657349, | |
| "learning_rate": 4.1461983013960245e-05, | |
| "loss": 0.6534, | |
| "num_input_tokens_seen": 9079760, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.7449467182159424, | |
| "learning_rate": 4.1414643893823914e-05, | |
| "loss": 0.6808, | |
| "num_input_tokens_seen": 9109424, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2728, | |
| "grad_norm": 0.6889111995697021, | |
| "learning_rate": 4.136720108987552e-05, | |
| "loss": 0.7627, | |
| "num_input_tokens_seen": 9132128, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.2736, | |
| "grad_norm": 0.9195050597190857, | |
| "learning_rate": 4.131965490178959e-05, | |
| "loss": 0.6527, | |
| "num_input_tokens_seen": 9160960, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2744, | |
| "grad_norm": 0.9934877157211304, | |
| "learning_rate": 4.1272005629893714e-05, | |
| "loss": 0.7102, | |
| "num_input_tokens_seen": 9190992, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 0.8816946148872375, | |
| "learning_rate": 4.122425357516658e-05, | |
| "loss": 0.67, | |
| "num_input_tokens_seen": 9218320, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.7904371619224548, | |
| "learning_rate": 4.1176399039236116e-05, | |
| "loss": 0.7159, | |
| "num_input_tokens_seen": 9246304, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.2768, | |
| "grad_norm": 0.795921266078949, | |
| "learning_rate": 4.112844232437757e-05, | |
| "loss": 0.8248, | |
| "num_input_tokens_seen": 9271856, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2776, | |
| "grad_norm": 0.8109453320503235, | |
| "learning_rate": 4.108038373351163e-05, | |
| "loss": 0.7264, | |
| "num_input_tokens_seen": 9297152, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.2784, | |
| "grad_norm": 0.8012672066688538, | |
| "learning_rate": 4.1032223570202474e-05, | |
| "loss": 0.7368, | |
| "num_input_tokens_seen": 9326896, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2792, | |
| "grad_norm": 0.8711723685264587, | |
| "learning_rate": 4.0983962138655873e-05, | |
| "loss": 0.6245, | |
| "num_input_tokens_seen": 9351680, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.034636378288269, | |
| "learning_rate": 4.093559974371725e-05, | |
| "loss": 0.8033, | |
| "num_input_tokens_seen": 9374896, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2808, | |
| "grad_norm": 0.8999419808387756, | |
| "learning_rate": 4.088713669086977e-05, | |
| "loss": 0.6803, | |
| "num_input_tokens_seen": 9400592, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 0.5961094498634338, | |
| "learning_rate": 4.083857328623243e-05, | |
| "loss": 0.7384, | |
| "num_input_tokens_seen": 9429280, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2824, | |
| "grad_norm": 1.194028377532959, | |
| "learning_rate": 4.078990983655807e-05, | |
| "loss": 0.8149, | |
| "num_input_tokens_seen": 9454736, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.2832, | |
| "grad_norm": 0.904292643070221, | |
| "learning_rate": 4.0741146649231504e-05, | |
| "loss": 0.7243, | |
| "num_input_tokens_seen": 9479648, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 0.8501243591308594, | |
| "learning_rate": 4.0692284032267516e-05, | |
| "loss": 0.7639, | |
| "num_input_tokens_seen": 9504432, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.2848, | |
| "grad_norm": 1.0718458890914917, | |
| "learning_rate": 4.064332229430895e-05, | |
| "loss": 0.6857, | |
| "num_input_tokens_seen": 9528880, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.2856, | |
| "grad_norm": 0.7065584063529968, | |
| "learning_rate": 4.059426174462476e-05, | |
| "loss": 0.69, | |
| "num_input_tokens_seen": 9557360, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.2864, | |
| "grad_norm": 1.0800750255584717, | |
| "learning_rate": 4.054510269310803e-05, | |
| "loss": 0.704, | |
| "num_input_tokens_seen": 9580608, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.2872, | |
| "grad_norm": 0.5907096862792969, | |
| "learning_rate": 4.0495845450274064e-05, | |
| "loss": 0.8015, | |
| "num_input_tokens_seen": 9611376, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.9455146789550781, | |
| "learning_rate": 4.044649032725836e-05, | |
| "loss": 0.7382, | |
| "num_input_tokens_seen": 9640784, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2888, | |
| "grad_norm": 1.0408939123153687, | |
| "learning_rate": 4.039703763581472e-05, | |
| "loss": 0.7299, | |
| "num_input_tokens_seen": 9667120, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.2896, | |
| "grad_norm": 0.8098856806755066, | |
| "learning_rate": 4.0347487688313194e-05, | |
| "loss": 0.6402, | |
| "num_input_tokens_seen": 9696832, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.2904, | |
| "grad_norm": 0.695599377155304, | |
| "learning_rate": 4.02978407977382e-05, | |
| "loss": 0.711, | |
| "num_input_tokens_seen": 9722080, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.2912, | |
| "grad_norm": 0.6605217456817627, | |
| "learning_rate": 4.024809727768648e-05, | |
| "loss": 0.6587, | |
| "num_input_tokens_seen": 9748096, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 0.9249849915504456, | |
| "learning_rate": 4.019825744236514e-05, | |
| "loss": 0.6656, | |
| "num_input_tokens_seen": 9774128, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.2928, | |
| "grad_norm": 0.8226694464683533, | |
| "learning_rate": 4.0148321606589656e-05, | |
| "loss": 0.7143, | |
| "num_input_tokens_seen": 9805488, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2936, | |
| "grad_norm": 1.0425550937652588, | |
| "learning_rate": 4.009829008578192e-05, | |
| "loss": 0.6735, | |
| "num_input_tokens_seen": 9828480, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 0.6911535263061523, | |
| "learning_rate": 4.0048163195968214e-05, | |
| "loss": 0.7395, | |
| "num_input_tokens_seen": 9863648, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2952, | |
| "grad_norm": 0.8600900769233704, | |
| "learning_rate": 3.999794125377721e-05, | |
| "loss": 0.729, | |
| "num_input_tokens_seen": 9893184, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 1.009696364402771, | |
| "learning_rate": 3.9947624576437975e-05, | |
| "loss": 0.6565, | |
| "num_input_tokens_seen": 9922464, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2968, | |
| "grad_norm": 0.916327178478241, | |
| "learning_rate": 3.9897213481778006e-05, | |
| "loss": 0.691, | |
| "num_input_tokens_seen": 9948384, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.2976, | |
| "grad_norm": 0.9392701387405396, | |
| "learning_rate": 3.984670828822118e-05, | |
| "loss": 0.7408, | |
| "num_input_tokens_seen": 9973760, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2984, | |
| "grad_norm": 0.9044517278671265, | |
| "learning_rate": 3.979610931478574e-05, | |
| "loss": 0.761, | |
| "num_input_tokens_seen": 10001648, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.2992, | |
| "grad_norm": 0.9471223950386047, | |
| "learning_rate": 3.97454168810823e-05, | |
| "loss": 0.8524, | |
| "num_input_tokens_seen": 10024912, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.0985262393951416, | |
| "learning_rate": 3.969463130731183e-05, | |
| "loss": 0.7221, | |
| "num_input_tokens_seen": 10049872, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 0.8284273147583008, | |
| "learning_rate": 3.964375291426361e-05, | |
| "loss": 0.7708, | |
| "num_input_tokens_seen": 10073568, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3016, | |
| "grad_norm": 0.7012784481048584, | |
| "learning_rate": 3.959278202331322e-05, | |
| "loss": 0.6842, | |
| "num_input_tokens_seen": 10098448, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.3024, | |
| "grad_norm": 1.1056398153305054, | |
| "learning_rate": 3.954171895642052e-05, | |
| "loss": 0.772, | |
| "num_input_tokens_seen": 10123168, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.3032, | |
| "grad_norm": 1.0128076076507568, | |
| "learning_rate": 3.949056403612758e-05, | |
| "loss": 0.6993, | |
| "num_input_tokens_seen": 10149440, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.7793564796447754, | |
| "learning_rate": 3.943931758555669e-05, | |
| "loss": 0.7672, | |
| "num_input_tokens_seen": 10174496, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3048, | |
| "grad_norm": 0.909677267074585, | |
| "learning_rate": 3.938797992840828e-05, | |
| "loss": 0.6716, | |
| "num_input_tokens_seen": 10199648, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.3056, | |
| "grad_norm": 0.8851680159568787, | |
| "learning_rate": 3.933655138895889e-05, | |
| "loss": 0.7062, | |
| "num_input_tokens_seen": 10221840, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.3064, | |
| "grad_norm": 0.9452556371688843, | |
| "learning_rate": 3.928503229205913e-05, | |
| "loss": 0.6748, | |
| "num_input_tokens_seen": 10247504, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 0.8891339302062988, | |
| "learning_rate": 3.9233422963131616e-05, | |
| "loss": 0.6331, | |
| "num_input_tokens_seen": 10277984, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 0.9662081599235535, | |
| "learning_rate": 3.9181723728168916e-05, | |
| "loss": 0.779, | |
| "num_input_tokens_seen": 10300400, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.3088, | |
| "grad_norm": 0.9517924785614014, | |
| "learning_rate": 3.91299349137315e-05, | |
| "loss": 0.722, | |
| "num_input_tokens_seen": 10326672, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.3096, | |
| "grad_norm": 0.755901575088501, | |
| "learning_rate": 3.907805684694566e-05, | |
| "loss": 0.6321, | |
| "num_input_tokens_seen": 10356864, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.3104, | |
| "grad_norm": 0.8272456526756287, | |
| "learning_rate": 3.902608985550147e-05, | |
| "loss": 0.6077, | |
| "num_input_tokens_seen": 10388032, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.3112, | |
| "grad_norm": 1.138036847114563, | |
| "learning_rate": 3.897403426765069e-05, | |
| "loss": 0.6726, | |
| "num_input_tokens_seen": 10417152, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.8155280351638794, | |
| "learning_rate": 3.8921890412204705e-05, | |
| "loss": 0.741, | |
| "num_input_tokens_seen": 10448128, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3128, | |
| "grad_norm": 0.7004032135009766, | |
| "learning_rate": 3.886965861853244e-05, | |
| "loss": 0.6555, | |
| "num_input_tokens_seen": 10474960, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 0.9554680585861206, | |
| "learning_rate": 3.881733921655829e-05, | |
| "loss": 0.75, | |
| "num_input_tokens_seen": 10502848, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3144, | |
| "grad_norm": 0.8525771498680115, | |
| "learning_rate": 3.876493253676004e-05, | |
| "loss": 0.7042, | |
| "num_input_tokens_seen": 10532640, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.3152, | |
| "grad_norm": 0.8739621043205261, | |
| "learning_rate": 3.871243891016676e-05, | |
| "loss": 0.6188, | |
| "num_input_tokens_seen": 10560096, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 0.9146223068237305, | |
| "learning_rate": 3.865985866835673e-05, | |
| "loss": 0.8165, | |
| "num_input_tokens_seen": 10585520, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.3168, | |
| "grad_norm": 1.1149648427963257, | |
| "learning_rate": 3.8607192143455326e-05, | |
| "loss": 0.7437, | |
| "num_input_tokens_seen": 10614560, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.3176, | |
| "grad_norm": 0.9382626414299011, | |
| "learning_rate": 3.8554439668132946e-05, | |
| "loss": 0.7758, | |
| "num_input_tokens_seen": 10637344, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.3184, | |
| "grad_norm": 0.9469596743583679, | |
| "learning_rate": 3.85016015756029e-05, | |
| "loss": 0.7593, | |
| "num_input_tokens_seen": 10663440, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.3192, | |
| "grad_norm": 0.8701977133750916, | |
| "learning_rate": 3.844867819961928e-05, | |
| "loss": 0.6535, | |
| "num_input_tokens_seen": 10693392, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.7110251188278198, | |
| "learning_rate": 3.8395669874474915e-05, | |
| "loss": 0.8263, | |
| "num_input_tokens_seen": 10719232, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3208, | |
| "grad_norm": 0.8518005609512329, | |
| "learning_rate": 3.8342576934999184e-05, | |
| "loss": 0.7992, | |
| "num_input_tokens_seen": 10746560, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.3216, | |
| "grad_norm": 0.9604689478874207, | |
| "learning_rate": 3.828939971655595e-05, | |
| "loss": 0.7513, | |
| "num_input_tokens_seen": 10768512, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3224, | |
| "grad_norm": 0.8639784455299377, | |
| "learning_rate": 3.8236138555041434e-05, | |
| "loss": 0.6775, | |
| "num_input_tokens_seen": 10803648, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.3232, | |
| "grad_norm": 0.8527281880378723, | |
| "learning_rate": 3.8182793786882065e-05, | |
| "loss": 0.7856, | |
| "num_input_tokens_seen": 10830640, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 0.7717742919921875, | |
| "learning_rate": 3.81293657490324e-05, | |
| "loss": 0.6793, | |
| "num_input_tokens_seen": 10860272, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.3248, | |
| "grad_norm": 0.6685821413993835, | |
| "learning_rate": 3.8075854778972955e-05, | |
| "loss": 0.6546, | |
| "num_input_tokens_seen": 10887664, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.3256, | |
| "grad_norm": 0.9813340306282043, | |
| "learning_rate": 3.802226121470811e-05, | |
| "loss": 0.6673, | |
| "num_input_tokens_seen": 10912000, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 1.0419212579727173, | |
| "learning_rate": 3.796858539476394e-05, | |
| "loss": 0.6933, | |
| "num_input_tokens_seen": 10936704, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.3272, | |
| "grad_norm": 0.851434588432312, | |
| "learning_rate": 3.7914827658186103e-05, | |
| "loss": 0.6593, | |
| "num_input_tokens_seen": 10960464, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.7272098660469055, | |
| "learning_rate": 3.786098834453766e-05, | |
| "loss": 0.6246, | |
| "num_input_tokens_seen": 10989680, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3288, | |
| "grad_norm": 0.7740225791931152, | |
| "learning_rate": 3.780706779389701e-05, | |
| "loss": 0.7029, | |
| "num_input_tokens_seen": 11014928, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.3296, | |
| "grad_norm": 0.963455080986023, | |
| "learning_rate": 3.775306634685562e-05, | |
| "loss": 0.7331, | |
| "num_input_tokens_seen": 11041920, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.3304, | |
| "grad_norm": 0.7765479683876038, | |
| "learning_rate": 3.7698984344515997e-05, | |
| "loss": 0.6624, | |
| "num_input_tokens_seen": 11070304, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.3312, | |
| "grad_norm": 0.8283601999282837, | |
| "learning_rate": 3.764482212848948e-05, | |
| "loss": 0.7505, | |
| "num_input_tokens_seen": 11099520, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 0.5610854029655457, | |
| "learning_rate": 3.759058004089402e-05, | |
| "loss": 0.6908, | |
| "num_input_tokens_seen": 11129008, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 0.8462053537368774, | |
| "learning_rate": 3.753625842435216e-05, | |
| "loss": 0.7062, | |
| "num_input_tokens_seen": 11151600, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.3336, | |
| "grad_norm": 0.8926122188568115, | |
| "learning_rate": 3.748185762198873e-05, | |
| "loss": 0.7177, | |
| "num_input_tokens_seen": 11176784, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.3344, | |
| "grad_norm": 0.6711943745613098, | |
| "learning_rate": 3.742737797742878e-05, | |
| "loss": 0.7504, | |
| "num_input_tokens_seen": 11205008, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.3352, | |
| "grad_norm": 1.014253854751587, | |
| "learning_rate": 3.7372819834795335e-05, | |
| "loss": 0.7144, | |
| "num_input_tokens_seen": 11229872, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.7249706983566284, | |
| "learning_rate": 3.731818353870729e-05, | |
| "loss": 0.6876, | |
| "num_input_tokens_seen": 11253296, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3368, | |
| "grad_norm": 0.8249915838241577, | |
| "learning_rate": 3.726346943427719e-05, | |
| "loss": 0.7102, | |
| "num_input_tokens_seen": 11279408, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.3376, | |
| "grad_norm": 1.027541995048523, | |
| "learning_rate": 3.720867786710904e-05, | |
| "loss": 0.7708, | |
| "num_input_tokens_seen": 11304176, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3384, | |
| "grad_norm": 0.7004812955856323, | |
| "learning_rate": 3.7153809183296176e-05, | |
| "loss": 0.5882, | |
| "num_input_tokens_seen": 11330944, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 1.1122636795043945, | |
| "learning_rate": 3.7098863729419e-05, | |
| "loss": 0.6127, | |
| "num_input_tokens_seen": 11354064, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.925553560256958, | |
| "learning_rate": 3.704384185254288e-05, | |
| "loss": 0.7732, | |
| "num_input_tokens_seen": 11376288, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.3408, | |
| "grad_norm": 0.6940233707427979, | |
| "learning_rate": 3.6988743900215894e-05, | |
| "loss": 0.7334, | |
| "num_input_tokens_seen": 11405472, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3416, | |
| "grad_norm": 0.7634669542312622, | |
| "learning_rate": 3.693357022046665e-05, | |
| "loss": 0.8137, | |
| "num_input_tokens_seen": 11431552, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.3424, | |
| "grad_norm": 0.804530680179596, | |
| "learning_rate": 3.68783211618021e-05, | |
| "loss": 0.6987, | |
| "num_input_tokens_seen": 11459152, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.3432, | |
| "grad_norm": 1.1058536767959595, | |
| "learning_rate": 3.682299707320532e-05, | |
| "loss": 0.6614, | |
| "num_input_tokens_seen": 11487552, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.6808910369873047, | |
| "learning_rate": 3.6767598304133324e-05, | |
| "loss": 0.688, | |
| "num_input_tokens_seen": 11515792, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3448, | |
| "grad_norm": 1.0619826316833496, | |
| "learning_rate": 3.671212520451484e-05, | |
| "loss": 0.7897, | |
| "num_input_tokens_seen": 11541280, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 0.8404290080070496, | |
| "learning_rate": 3.665657812474812e-05, | |
| "loss": 0.7086, | |
| "num_input_tokens_seen": 11569440, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.3464, | |
| "grad_norm": 1.316372036933899, | |
| "learning_rate": 3.660095741569871e-05, | |
| "loss": 0.7421, | |
| "num_input_tokens_seen": 11597792, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.3472, | |
| "grad_norm": 0.7798539400100708, | |
| "learning_rate": 3.654526342869724e-05, | |
| "loss": 0.6954, | |
| "num_input_tokens_seen": 11622864, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 0.7101672887802124, | |
| "learning_rate": 3.6489496515537204e-05, | |
| "loss": 0.6764, | |
| "num_input_tokens_seen": 11651280, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.3488, | |
| "grad_norm": 0.8456715941429138, | |
| "learning_rate": 3.643365702847272e-05, | |
| "loss": 0.705, | |
| "num_input_tokens_seen": 11680048, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3496, | |
| "grad_norm": 0.9790185689926147, | |
| "learning_rate": 3.6377745320216346e-05, | |
| "loss": 0.7433, | |
| "num_input_tokens_seen": 11702144, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.3504, | |
| "grad_norm": 0.9205552935600281, | |
| "learning_rate": 3.632176174393682e-05, | |
| "loss": 0.653, | |
| "num_input_tokens_seen": 11728816, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.3512, | |
| "grad_norm": 0.8499376177787781, | |
| "learning_rate": 3.626570665325684e-05, | |
| "loss": 0.6381, | |
| "num_input_tokens_seen": 11756688, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.7778225541114807, | |
| "learning_rate": 3.6209580402250815e-05, | |
| "loss": 0.7347, | |
| "num_input_tokens_seen": 11781664, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3528, | |
| "grad_norm": 0.8913766145706177, | |
| "learning_rate": 3.615338334544265e-05, | |
| "loss": 0.8036, | |
| "num_input_tokens_seen": 11808352, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.3536, | |
| "grad_norm": 1.0191758871078491, | |
| "learning_rate": 3.6097115837803505e-05, | |
| "loss": 0.7486, | |
| "num_input_tokens_seen": 11836400, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.3544, | |
| "grad_norm": 0.7858436703681946, | |
| "learning_rate": 3.604077823474954e-05, | |
| "loss": 0.7885, | |
| "num_input_tokens_seen": 11862608, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.3552, | |
| "grad_norm": 0.6349871158599854, | |
| "learning_rate": 3.5984370892139666e-05, | |
| "loss": 0.7005, | |
| "num_input_tokens_seen": 11886528, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 0.8877844214439392, | |
| "learning_rate": 3.592789416627332e-05, | |
| "loss": 0.607, | |
| "num_input_tokens_seen": 11915040, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.3568, | |
| "grad_norm": 1.1504970788955688, | |
| "learning_rate": 3.5871348413888204e-05, | |
| "loss": 0.6723, | |
| "num_input_tokens_seen": 11942768, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.3576, | |
| "grad_norm": 0.7394466400146484, | |
| "learning_rate": 3.581473399215802e-05, | |
| "loss": 0.7302, | |
| "num_input_tokens_seen": 11978464, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 1.1570250988006592, | |
| "learning_rate": 3.575805125869022e-05, | |
| "loss": 0.6867, | |
| "num_input_tokens_seen": 12001392, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.3592, | |
| "grad_norm": 0.8141620755195618, | |
| "learning_rate": 3.5701300571523755e-05, | |
| "loss": 0.7346, | |
| "num_input_tokens_seen": 12030352, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.8653257489204407, | |
| "learning_rate": 3.564448228912682e-05, | |
| "loss": 0.6381, | |
| "num_input_tokens_seen": 12062384, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3608, | |
| "grad_norm": 0.8065868020057678, | |
| "learning_rate": 3.558759677039455e-05, | |
| "loss": 0.7679, | |
| "num_input_tokens_seen": 12089408, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.3616, | |
| "grad_norm": 0.7610428929328918, | |
| "learning_rate": 3.5530644374646815e-05, | |
| "loss": 0.668, | |
| "num_input_tokens_seen": 12114656, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.3624, | |
| "grad_norm": 0.8063391447067261, | |
| "learning_rate": 3.547362546162588e-05, | |
| "loss": 0.7454, | |
| "num_input_tokens_seen": 12144832, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.3632, | |
| "grad_norm": 1.0300970077514648, | |
| "learning_rate": 3.54165403914942e-05, | |
| "loss": 0.7513, | |
| "num_input_tokens_seen": 12170096, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 1.1293412446975708, | |
| "learning_rate": 3.535938952483211e-05, | |
| "loss": 0.7881, | |
| "num_input_tokens_seen": 12191104, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 0.8911874294281006, | |
| "learning_rate": 3.5302173222635524e-05, | |
| "loss": 0.7253, | |
| "num_input_tokens_seen": 12214416, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3656, | |
| "grad_norm": 1.0665303468704224, | |
| "learning_rate": 3.5244891846313736e-05, | |
| "loss": 0.8122, | |
| "num_input_tokens_seen": 12241344, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.3664, | |
| "grad_norm": 0.6204916834831238, | |
| "learning_rate": 3.5187545757687015e-05, | |
| "loss": 0.6188, | |
| "num_input_tokens_seen": 12269376, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3672, | |
| "grad_norm": 0.7871102094650269, | |
| "learning_rate": 3.5130135318984456e-05, | |
| "loss": 0.7138, | |
| "num_input_tokens_seen": 12294960, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.7584692239761353, | |
| "learning_rate": 3.507266089284157e-05, | |
| "loss": 0.7425, | |
| "num_input_tokens_seen": 12318864, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3688, | |
| "grad_norm": 0.6678550839424133, | |
| "learning_rate": 3.501512284229807e-05, | |
| "loss": 0.7238, | |
| "num_input_tokens_seen": 12345520, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.3696, | |
| "grad_norm": 0.9825206398963928, | |
| "learning_rate": 3.495752153079557e-05, | |
| "loss": 0.684, | |
| "num_input_tokens_seen": 12369600, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3704, | |
| "grad_norm": 0.8038123250007629, | |
| "learning_rate": 3.489985732217525e-05, | |
| "loss": 0.707, | |
| "num_input_tokens_seen": 12394400, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 1.158873438835144, | |
| "learning_rate": 3.484213058067559e-05, | |
| "loss": 0.5843, | |
| "num_input_tokens_seen": 12420848, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 0.8114385604858398, | |
| "learning_rate": 3.4784341670930065e-05, | |
| "loss": 0.7014, | |
| "num_input_tokens_seen": 12446192, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.3728, | |
| "grad_norm": 0.8132364749908447, | |
| "learning_rate": 3.4726490957964834e-05, | |
| "loss": 0.777, | |
| "num_input_tokens_seen": 12472960, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.3736, | |
| "grad_norm": 0.7918152213096619, | |
| "learning_rate": 3.466857880719645e-05, | |
| "loss": 0.6856, | |
| "num_input_tokens_seen": 12504256, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.3744, | |
| "grad_norm": 0.8399984240531921, | |
| "learning_rate": 3.461060558442952e-05, | |
| "loss": 0.7742, | |
| "num_input_tokens_seen": 12529872, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3752, | |
| "grad_norm": 1.0398231744766235, | |
| "learning_rate": 3.455257165585444e-05, | |
| "loss": 0.6815, | |
| "num_input_tokens_seen": 12552368, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.9708042144775391, | |
| "learning_rate": 3.4494477388045035e-05, | |
| "loss": 0.677, | |
| "num_input_tokens_seen": 12576720, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3768, | |
| "grad_norm": 0.8928380012512207, | |
| "learning_rate": 3.443632314795627e-05, | |
| "loss": 0.6239, | |
| "num_input_tokens_seen": 12606096, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 1.3437156677246094, | |
| "learning_rate": 3.437810930292195e-05, | |
| "loss": 0.7379, | |
| "num_input_tokens_seen": 12631376, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3784, | |
| "grad_norm": 0.9309334754943848, | |
| "learning_rate": 3.4319836220652335e-05, | |
| "loss": 0.7315, | |
| "num_input_tokens_seen": 12662096, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.3792, | |
| "grad_norm": 1.4636520147323608, | |
| "learning_rate": 3.4261504269231904e-05, | |
| "loss": 0.7738, | |
| "num_input_tokens_seen": 12691696, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.8436228632926941, | |
| "learning_rate": 3.4203113817116957e-05, | |
| "loss": 0.7307, | |
| "num_input_tokens_seen": 12718368, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.3808, | |
| "grad_norm": 0.877709150314331, | |
| "learning_rate": 3.414466523313332e-05, | |
| "loss": 0.7119, | |
| "num_input_tokens_seen": 12743664, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.3816, | |
| "grad_norm": 1.2288016080856323, | |
| "learning_rate": 3.408615888647402e-05, | |
| "loss": 0.781, | |
| "num_input_tokens_seen": 12775088, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.3824, | |
| "grad_norm": 0.8335594534873962, | |
| "learning_rate": 3.402759514669694e-05, | |
| "loss": 0.6256, | |
| "num_input_tokens_seen": 12802576, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3832, | |
| "grad_norm": 1.0417710542678833, | |
| "learning_rate": 3.3968974383722495e-05, | |
| "loss": 0.7672, | |
| "num_input_tokens_seen": 12831280, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 1.1079373359680176, | |
| "learning_rate": 3.3910296967831266e-05, | |
| "loss": 0.7665, | |
| "num_input_tokens_seen": 12853744, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3848, | |
| "grad_norm": 0.870614230632782, | |
| "learning_rate": 3.3851563269661726e-05, | |
| "loss": 0.6321, | |
| "num_input_tokens_seen": 12883408, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.3856, | |
| "grad_norm": 1.090280294418335, | |
| "learning_rate": 3.379277366020782e-05, | |
| "loss": 0.7086, | |
| "num_input_tokens_seen": 12914592, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3864, | |
| "grad_norm": 0.8816367983818054, | |
| "learning_rate": 3.373392851081668e-05, | |
| "loss": 0.7712, | |
| "num_input_tokens_seen": 12936832, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.3872, | |
| "grad_norm": 0.8722823858261108, | |
| "learning_rate": 3.367502819318624e-05, | |
| "loss": 0.6844, | |
| "num_input_tokens_seen": 12962864, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 0.9704541563987732, | |
| "learning_rate": 3.3616073079362926e-05, | |
| "loss": 0.6877, | |
| "num_input_tokens_seen": 12992560, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.3888, | |
| "grad_norm": 0.8094004988670349, | |
| "learning_rate": 3.355706354173928e-05, | |
| "loss": 0.8139, | |
| "num_input_tokens_seen": 13015440, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.3896, | |
| "grad_norm": 0.8286037445068359, | |
| "learning_rate": 3.349799995305162e-05, | |
| "loss": 0.6696, | |
| "num_input_tokens_seen": 13039008, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.3904, | |
| "grad_norm": 0.985637366771698, | |
| "learning_rate": 3.343888268637765e-05, | |
| "loss": 0.7001, | |
| "num_input_tokens_seen": 13067648, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.3912, | |
| "grad_norm": 0.8938013911247253, | |
| "learning_rate": 3.337971211513417e-05, | |
| "loss": 0.8036, | |
| "num_input_tokens_seen": 13090064, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.7293727397918701, | |
| "learning_rate": 3.332048861307467e-05, | |
| "loss": 0.7405, | |
| "num_input_tokens_seen": 13119856, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3928, | |
| "grad_norm": 0.5999038219451904, | |
| "learning_rate": 3.3261212554286975e-05, | |
| "loss": 0.6975, | |
| "num_input_tokens_seen": 13148288, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.3936, | |
| "grad_norm": 0.8091318607330322, | |
| "learning_rate": 3.320188431319088e-05, | |
| "loss": 0.6809, | |
| "num_input_tokens_seen": 13175616, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3944, | |
| "grad_norm": 1.0293824672698975, | |
| "learning_rate": 3.3142504264535804e-05, | |
| "loss": 0.7749, | |
| "num_input_tokens_seen": 13199280, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.3952, | |
| "grad_norm": 0.793485701084137, | |
| "learning_rate": 3.3083072783398416e-05, | |
| "loss": 0.6873, | |
| "num_input_tokens_seen": 13224640, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 0.8636240363121033, | |
| "learning_rate": 3.302359024518024e-05, | |
| "loss": 0.7554, | |
| "num_input_tokens_seen": 13250448, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.3968, | |
| "grad_norm": 0.9471914172172546, | |
| "learning_rate": 3.296405702560532e-05, | |
| "loss": 0.7112, | |
| "num_input_tokens_seen": 13273472, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3976, | |
| "grad_norm": 1.1579172611236572, | |
| "learning_rate": 3.2904473500717824e-05, | |
| "loss": 0.8207, | |
| "num_input_tokens_seen": 13300608, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.3984, | |
| "grad_norm": 1.022197961807251, | |
| "learning_rate": 3.2844840046879686e-05, | |
| "loss": 0.693, | |
| "num_input_tokens_seen": 13326976, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.3992, | |
| "grad_norm": 0.7574387788772583, | |
| "learning_rate": 3.278515704076821e-05, | |
| "loss": 0.6826, | |
| "num_input_tokens_seen": 13358528, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.7097072005271912, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 0.6714, | |
| "num_input_tokens_seen": 13384096, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4008, | |
| "grad_norm": 0.8780053853988647, | |
| "learning_rate": 3.2665643879997056e-05, | |
| "loss": 0.7387, | |
| "num_input_tokens_seen": 13417120, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.4016, | |
| "grad_norm": 0.8968010544776917, | |
| "learning_rate": 3.260581448024745e-05, | |
| "loss": 0.6875, | |
| "num_input_tokens_seen": 13444832, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.4024, | |
| "grad_norm": 0.9647771716117859, | |
| "learning_rate": 3.25459370380399e-05, | |
| "loss": 0.834, | |
| "num_input_tokens_seen": 13472304, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.4032, | |
| "grad_norm": 0.9738301038742065, | |
| "learning_rate": 3.248601193159287e-05, | |
| "loss": 0.7144, | |
| "num_input_tokens_seen": 13495984, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.404, | |
| "grad_norm": 1.03775155544281, | |
| "learning_rate": 3.2426039539425876e-05, | |
| "loss": 0.7171, | |
| "num_input_tokens_seen": 13523360, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.4048, | |
| "grad_norm": 1.3964909315109253, | |
| "learning_rate": 3.236602024035716e-05, | |
| "loss": 0.7197, | |
| "num_input_tokens_seen": 13550016, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.4056, | |
| "grad_norm": 1.0805152654647827, | |
| "learning_rate": 3.230595441350125e-05, | |
| "loss": 0.7997, | |
| "num_input_tokens_seen": 13575088, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.4064, | |
| "grad_norm": 0.9613687992095947, | |
| "learning_rate": 3.2245842438266526e-05, | |
| "loss": 0.7847, | |
| "num_input_tokens_seen": 13600832, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.4072, | |
| "grad_norm": 0.9843304753303528, | |
| "learning_rate": 3.2185684694352916e-05, | |
| "loss": 0.7213, | |
| "num_input_tokens_seen": 13627328, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.7906083464622498, | |
| "learning_rate": 3.21254815617494e-05, | |
| "loss": 0.633, | |
| "num_input_tokens_seen": 13651664, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4088, | |
| "grad_norm": 0.788149893283844, | |
| "learning_rate": 3.206523342073172e-05, | |
| "loss": 0.7512, | |
| "num_input_tokens_seen": 13677248, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.4096, | |
| "grad_norm": 0.7680060863494873, | |
| "learning_rate": 3.2004940651859844e-05, | |
| "loss": 0.703, | |
| "num_input_tokens_seen": 13705904, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.4104, | |
| "grad_norm": 0.8078610301017761, | |
| "learning_rate": 3.194460363597569e-05, | |
| "loss": 0.7212, | |
| "num_input_tokens_seen": 13731520, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.4112, | |
| "grad_norm": 1.2152231931686401, | |
| "learning_rate": 3.1884222754200625e-05, | |
| "loss": 0.7009, | |
| "num_input_tokens_seen": 13753840, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.412, | |
| "grad_norm": 0.8687548637390137, | |
| "learning_rate": 3.1823798387933134e-05, | |
| "loss": 0.718, | |
| "num_input_tokens_seen": 13777504, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.4128, | |
| "grad_norm": 1.1128169298171997, | |
| "learning_rate": 3.176333091884635e-05, | |
| "loss": 0.6796, | |
| "num_input_tokens_seen": 13805392, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.4136, | |
| "grad_norm": 0.6620244383811951, | |
| "learning_rate": 3.170282072888566e-05, | |
| "loss": 0.6632, | |
| "num_input_tokens_seen": 13835600, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.4144, | |
| "grad_norm": 1.0803226232528687, | |
| "learning_rate": 3.1642268200266317e-05, | |
| "loss": 0.743, | |
| "num_input_tokens_seen": 13862528, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.4152, | |
| "grad_norm": 0.8314620852470398, | |
| "learning_rate": 3.1581673715471006e-05, | |
| "loss": 0.7091, | |
| "num_input_tokens_seen": 13890272, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 1.0047166347503662, | |
| "learning_rate": 3.152103765724743e-05, | |
| "loss": 0.8011, | |
| "num_input_tokens_seen": 13913328, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4168, | |
| "grad_norm": 0.9856431484222412, | |
| "learning_rate": 3.1460360408605866e-05, | |
| "loss": 0.7569, | |
| "num_input_tokens_seen": 13943040, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.4176, | |
| "grad_norm": 0.8467027544975281, | |
| "learning_rate": 3.139964235281682e-05, | |
| "loss": 0.6976, | |
| "num_input_tokens_seen": 13971872, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.4184, | |
| "grad_norm": 1.2195795774459839, | |
| "learning_rate": 3.1338883873408516e-05, | |
| "loss": 0.7039, | |
| "num_input_tokens_seen": 13997456, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.4192, | |
| "grad_norm": 0.832929253578186, | |
| "learning_rate": 3.127808535416454e-05, | |
| "loss": 0.7153, | |
| "num_input_tokens_seen": 14024656, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.8261767625808716, | |
| "learning_rate": 3.121724717912138e-05, | |
| "loss": 0.7317, | |
| "num_input_tokens_seen": 14053680, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.4208, | |
| "grad_norm": 0.8690986633300781, | |
| "learning_rate": 3.1156369732566006e-05, | |
| "loss": 0.6991, | |
| "num_input_tokens_seen": 14080096, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4216, | |
| "grad_norm": 1.041561484336853, | |
| "learning_rate": 3.1095453399033466e-05, | |
| "loss": 0.7442, | |
| "num_input_tokens_seen": 14108080, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.4224, | |
| "grad_norm": 1.1139183044433594, | |
| "learning_rate": 3.103449856330443e-05, | |
| "loss": 0.7026, | |
| "num_input_tokens_seen": 14132448, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.4232, | |
| "grad_norm": 0.9388411045074463, | |
| "learning_rate": 3.0973505610402765e-05, | |
| "loss": 0.6425, | |
| "num_input_tokens_seen": 14157312, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.8923696279525757, | |
| "learning_rate": 3.091247492559312e-05, | |
| "loss": 0.7421, | |
| "num_input_tokens_seen": 14184288, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.4248, | |
| "grad_norm": 0.9683478474617004, | |
| "learning_rate": 3.085140689437846e-05, | |
| "loss": 0.7044, | |
| "num_input_tokens_seen": 14207920, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.4256, | |
| "grad_norm": 0.7942652106285095, | |
| "learning_rate": 3.0790301902497666e-05, | |
| "loss": 0.6892, | |
| "num_input_tokens_seen": 14235504, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.4264, | |
| "grad_norm": 0.9955897331237793, | |
| "learning_rate": 3.072916033592307e-05, | |
| "loss": 0.6595, | |
| "num_input_tokens_seen": 14259280, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.4272, | |
| "grad_norm": 0.9912785291671753, | |
| "learning_rate": 3.0667982580858044e-05, | |
| "loss": 0.6948, | |
| "num_input_tokens_seen": 14286592, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.428, | |
| "grad_norm": 1.352742314338684, | |
| "learning_rate": 3.0606769023734536e-05, | |
| "loss": 0.7009, | |
| "num_input_tokens_seen": 14309280, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.4288, | |
| "grad_norm": 1.183185338973999, | |
| "learning_rate": 3.054552005121064e-05, | |
| "loss": 0.6814, | |
| "num_input_tokens_seen": 14335984, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.4296, | |
| "grad_norm": 1.2679824829101562, | |
| "learning_rate": 3.0484236050168153e-05, | |
| "loss": 0.7468, | |
| "num_input_tokens_seen": 14361024, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.4304, | |
| "grad_norm": 1.1353107690811157, | |
| "learning_rate": 3.0422917407710137e-05, | |
| "loss": 0.629, | |
| "num_input_tokens_seen": 14391440, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.4312, | |
| "grad_norm": 1.1603094339370728, | |
| "learning_rate": 3.0361564511158457e-05, | |
| "loss": 0.7106, | |
| "num_input_tokens_seen": 14417952, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.9477285146713257, | |
| "learning_rate": 3.0300177748051373e-05, | |
| "loss": 0.7136, | |
| "num_input_tokens_seen": 14446752, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4328, | |
| "grad_norm": 0.9295204281806946, | |
| "learning_rate": 3.0238757506141012e-05, | |
| "loss": 0.6269, | |
| "num_input_tokens_seen": 14475280, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.4336, | |
| "grad_norm": 0.8617603182792664, | |
| "learning_rate": 3.0177304173391037e-05, | |
| "loss": 0.6517, | |
| "num_input_tokens_seen": 14498112, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.4344, | |
| "grad_norm": 0.962295413017273, | |
| "learning_rate": 3.0115818137974067e-05, | |
| "loss": 0.6903, | |
| "num_input_tokens_seen": 14525664, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.4352, | |
| "grad_norm": 0.7317754030227661, | |
| "learning_rate": 3.005429978826934e-05, | |
| "loss": 0.7302, | |
| "num_input_tokens_seen": 14551536, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.436, | |
| "grad_norm": 0.9604383111000061, | |
| "learning_rate": 2.9992749512860173e-05, | |
| "loss": 0.7126, | |
| "num_input_tokens_seen": 14574560, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.4368, | |
| "grad_norm": 0.9363977313041687, | |
| "learning_rate": 2.9931167700531578e-05, | |
| "loss": 0.6701, | |
| "num_input_tokens_seen": 14602384, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.4376, | |
| "grad_norm": 1.0513427257537842, | |
| "learning_rate": 2.9869554740267724e-05, | |
| "loss": 0.5816, | |
| "num_input_tokens_seen": 14633728, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.4384, | |
| "grad_norm": 1.0142287015914917, | |
| "learning_rate": 2.9807911021249573e-05, | |
| "loss": 0.7965, | |
| "num_input_tokens_seen": 14662752, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.4392, | |
| "grad_norm": 0.8593106269836426, | |
| "learning_rate": 2.9746236932852355e-05, | |
| "loss": 0.6396, | |
| "num_input_tokens_seen": 14690896, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.912413477897644, | |
| "learning_rate": 2.9684532864643122e-05, | |
| "loss": 0.6914, | |
| "num_input_tokens_seen": 14717680, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4408, | |
| "grad_norm": 1.1753630638122559, | |
| "learning_rate": 2.9622799206378305e-05, | |
| "loss": 0.7188, | |
| "num_input_tokens_seen": 14744176, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.4416, | |
| "grad_norm": 1.0383411645889282, | |
| "learning_rate": 2.956103634800126e-05, | |
| "loss": 0.6936, | |
| "num_input_tokens_seen": 14772464, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.4424, | |
| "grad_norm": 0.8875827789306641, | |
| "learning_rate": 2.949924467963975e-05, | |
| "loss": 0.709, | |
| "num_input_tokens_seen": 14800896, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.4432, | |
| "grad_norm": 1.0359493494033813, | |
| "learning_rate": 2.943742459160354e-05, | |
| "loss": 0.6361, | |
| "num_input_tokens_seen": 14826624, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 0.7070389986038208, | |
| "learning_rate": 2.9375576474381905e-05, | |
| "loss": 0.6062, | |
| "num_input_tokens_seen": 14859392, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.4448, | |
| "grad_norm": 1.0716419219970703, | |
| "learning_rate": 2.9313700718641167e-05, | |
| "loss": 0.7882, | |
| "num_input_tokens_seen": 14882336, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.4456, | |
| "grad_norm": 0.8054667115211487, | |
| "learning_rate": 2.925179771522223e-05, | |
| "loss": 0.7978, | |
| "num_input_tokens_seen": 14911312, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.4464, | |
| "grad_norm": 0.9502078294754028, | |
| "learning_rate": 2.9189867855138103e-05, | |
| "loss": 0.6835, | |
| "num_input_tokens_seen": 14938400, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4472, | |
| "grad_norm": 0.8377355933189392, | |
| "learning_rate": 2.912791152957145e-05, | |
| "loss": 0.6566, | |
| "num_input_tokens_seen": 14965424, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.8674115538597107, | |
| "learning_rate": 2.9065929129872094e-05, | |
| "loss": 0.6616, | |
| "num_input_tokens_seen": 14994368, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4488, | |
| "grad_norm": 0.9967759251594543, | |
| "learning_rate": 2.900392104755455e-05, | |
| "loss": 0.8051, | |
| "num_input_tokens_seen": 15018480, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.4496, | |
| "grad_norm": 1.04585862159729, | |
| "learning_rate": 2.894188767429557e-05, | |
| "loss": 0.6961, | |
| "num_input_tokens_seen": 15045840, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.4504, | |
| "grad_norm": 0.8065064549446106, | |
| "learning_rate": 2.8879829401931652e-05, | |
| "loss": 0.6898, | |
| "num_input_tokens_seen": 15070832, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.4512, | |
| "grad_norm": 0.7077392935752869, | |
| "learning_rate": 2.881774662245658e-05, | |
| "loss": 0.6789, | |
| "num_input_tokens_seen": 15097008, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.452, | |
| "grad_norm": 1.068467378616333, | |
| "learning_rate": 2.875563972801893e-05, | |
| "loss": 0.6684, | |
| "num_input_tokens_seen": 15120080, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.4528, | |
| "grad_norm": 0.7860395312309265, | |
| "learning_rate": 2.8693509110919598e-05, | |
| "loss": 0.6577, | |
| "num_input_tokens_seen": 15144976, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.4536, | |
| "grad_norm": 0.86238032579422, | |
| "learning_rate": 2.863135516360932e-05, | |
| "loss": 0.7893, | |
| "num_input_tokens_seen": 15174640, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.4544, | |
| "grad_norm": 0.8910583257675171, | |
| "learning_rate": 2.856917827868622e-05, | |
| "loss": 0.7377, | |
| "num_input_tokens_seen": 15198128, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.4552, | |
| "grad_norm": 0.9576541781425476, | |
| "learning_rate": 2.8506978848893302e-05, | |
| "loss": 0.821, | |
| "num_input_tokens_seen": 15222224, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 1.114388108253479, | |
| "learning_rate": 2.844475726711595e-05, | |
| "loss": 0.695, | |
| "num_input_tokens_seen": 15246640, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4568, | |
| "grad_norm": 0.8997412323951721, | |
| "learning_rate": 2.8382513926379504e-05, | |
| "loss": 0.7175, | |
| "num_input_tokens_seen": 15277728, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.4576, | |
| "grad_norm": 1.1595414876937866, | |
| "learning_rate": 2.832024921984674e-05, | |
| "loss": 0.6505, | |
| "num_input_tokens_seen": 15307040, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.4584, | |
| "grad_norm": 0.7592776417732239, | |
| "learning_rate": 2.825796354081537e-05, | |
| "loss": 0.6686, | |
| "num_input_tokens_seen": 15334176, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.4592, | |
| "grad_norm": 1.0087366104125977, | |
| "learning_rate": 2.8195657282715594e-05, | |
| "loss": 0.6365, | |
| "num_input_tokens_seen": 15360496, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.9191427826881409, | |
| "learning_rate": 2.8133330839107608e-05, | |
| "loss": 0.6518, | |
| "num_input_tokens_seen": 15381328, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.4608, | |
| "grad_norm": 1.0468344688415527, | |
| "learning_rate": 2.8070984603679107e-05, | |
| "loss": 0.6262, | |
| "num_input_tokens_seen": 15409936, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.4616, | |
| "grad_norm": 1.1070493459701538, | |
| "learning_rate": 2.800861897024279e-05, | |
| "loss": 0.6684, | |
| "num_input_tokens_seen": 15436848, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.4624, | |
| "grad_norm": 1.3349978923797607, | |
| "learning_rate": 2.79462343327339e-05, | |
| "loss": 0.6978, | |
| "num_input_tokens_seen": 15463328, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.4632, | |
| "grad_norm": 0.7566163539886475, | |
| "learning_rate": 2.7883831085207707e-05, | |
| "loss": 0.7062, | |
| "num_input_tokens_seen": 15489232, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.7610609531402588, | |
| "learning_rate": 2.782140962183704e-05, | |
| "loss": 0.6642, | |
| "num_input_tokens_seen": 15516224, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.4648, | |
| "grad_norm": 0.7749585509300232, | |
| "learning_rate": 2.7758970336909795e-05, | |
| "loss": 0.6287, | |
| "num_input_tokens_seen": 15545584, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.4656, | |
| "grad_norm": 1.0202007293701172, | |
| "learning_rate": 2.769651362482642e-05, | |
| "loss": 0.6672, | |
| "num_input_tokens_seen": 15571216, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.4664, | |
| "grad_norm": 0.7980359792709351, | |
| "learning_rate": 2.763403988009746e-05, | |
| "loss": 0.737, | |
| "num_input_tokens_seen": 15597744, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.4672, | |
| "grad_norm": 0.942456841468811, | |
| "learning_rate": 2.7571549497341042e-05, | |
| "loss": 0.7715, | |
| "num_input_tokens_seen": 15622496, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 0.7782229781150818, | |
| "learning_rate": 2.7509042871280372e-05, | |
| "loss": 0.7435, | |
| "num_input_tokens_seen": 15647344, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.4688, | |
| "grad_norm": 1.0889509916305542, | |
| "learning_rate": 2.744652039674129e-05, | |
| "loss": 0.6946, | |
| "num_input_tokens_seen": 15672672, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.4696, | |
| "grad_norm": 1.0606461763381958, | |
| "learning_rate": 2.7383982468649714e-05, | |
| "loss": 0.7523, | |
| "num_input_tokens_seen": 15696144, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.4704, | |
| "grad_norm": 0.942613959312439, | |
| "learning_rate": 2.73214294820292e-05, | |
| "loss": 0.7218, | |
| "num_input_tokens_seen": 15723600, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.4712, | |
| "grad_norm": 0.8650354743003845, | |
| "learning_rate": 2.7258861831998388e-05, | |
| "loss": 0.6736, | |
| "num_input_tokens_seen": 15749680, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.8573226928710938, | |
| "learning_rate": 2.7196279913768584e-05, | |
| "loss": 0.6314, | |
| "num_input_tokens_seen": 15776768, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.4728, | |
| "grad_norm": 0.9692303538322449, | |
| "learning_rate": 2.713368412264118e-05, | |
| "loss": 0.7035, | |
| "num_input_tokens_seen": 15801376, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.4736, | |
| "grad_norm": 1.2111790180206299, | |
| "learning_rate": 2.707107485400521e-05, | |
| "loss": 0.6785, | |
| "num_input_tokens_seen": 15828416, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.4744, | |
| "grad_norm": 1.0816082954406738, | |
| "learning_rate": 2.7008452503334858e-05, | |
| "loss": 0.7672, | |
| "num_input_tokens_seen": 15852720, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.4752, | |
| "grad_norm": 0.980903148651123, | |
| "learning_rate": 2.6945817466186912e-05, | |
| "loss": 0.7723, | |
| "num_input_tokens_seen": 15880624, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.476, | |
| "grad_norm": 1.012623906135559, | |
| "learning_rate": 2.6883170138198323e-05, | |
| "loss": 0.6105, | |
| "num_input_tokens_seen": 15912176, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.4768, | |
| "grad_norm": 1.069486141204834, | |
| "learning_rate": 2.6820510915083648e-05, | |
| "loss": 0.6941, | |
| "num_input_tokens_seen": 15944384, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.4776, | |
| "grad_norm": 0.8411433100700378, | |
| "learning_rate": 2.6757840192632598e-05, | |
| "loss": 0.6669, | |
| "num_input_tokens_seen": 15969680, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.4784, | |
| "grad_norm": 0.6902319192886353, | |
| "learning_rate": 2.6695158366707522e-05, | |
| "loss": 0.6814, | |
| "num_input_tokens_seen": 15997264, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.4792, | |
| "grad_norm": 0.7844128012657166, | |
| "learning_rate": 2.6632465833240893e-05, | |
| "loss": 0.5641, | |
| "num_input_tokens_seen": 16029664, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.1021162271499634, | |
| "learning_rate": 2.656976298823284e-05, | |
| "loss": 0.7797, | |
| "num_input_tokens_seen": 16054864, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4808, | |
| "grad_norm": 1.2080223560333252, | |
| "learning_rate": 2.650705022774859e-05, | |
| "loss": 0.6778, | |
| "num_input_tokens_seen": 16079552, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.4816, | |
| "grad_norm": 1.1015046834945679, | |
| "learning_rate": 2.6444327947916036e-05, | |
| "loss": 0.6806, | |
| "num_input_tokens_seen": 16105632, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.4824, | |
| "grad_norm": 1.0269590616226196, | |
| "learning_rate": 2.638159654492318e-05, | |
| "loss": 0.7589, | |
| "num_input_tokens_seen": 16134688, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.4832, | |
| "grad_norm": 0.8565163612365723, | |
| "learning_rate": 2.6318856415015664e-05, | |
| "loss": 0.677, | |
| "num_input_tokens_seen": 16163152, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.484, | |
| "grad_norm": 0.8519279956817627, | |
| "learning_rate": 2.6256107954494242e-05, | |
| "loss": 0.6136, | |
| "num_input_tokens_seen": 16189248, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.4848, | |
| "grad_norm": 0.9466794729232788, | |
| "learning_rate": 2.6193351559712292e-05, | |
| "loss": 0.6369, | |
| "num_input_tokens_seen": 16214832, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.4856, | |
| "grad_norm": 0.9867402911186218, | |
| "learning_rate": 2.6130587627073315e-05, | |
| "loss": 0.7202, | |
| "num_input_tokens_seen": 16244736, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.4864, | |
| "grad_norm": 0.8893384337425232, | |
| "learning_rate": 2.606781655302843e-05, | |
| "loss": 0.7057, | |
| "num_input_tokens_seen": 16272064, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.4872, | |
| "grad_norm": 1.2341115474700928, | |
| "learning_rate": 2.6005038734073833e-05, | |
| "loss": 0.6715, | |
| "num_input_tokens_seen": 16301344, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 1.0158292055130005, | |
| "learning_rate": 2.594225456674837e-05, | |
| "loss": 0.7479, | |
| "num_input_tokens_seen": 16325872, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.4888, | |
| "grad_norm": 0.9316710233688354, | |
| "learning_rate": 2.5879464447630946e-05, | |
| "loss": 0.6581, | |
| "num_input_tokens_seen": 16352272, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.4896, | |
| "grad_norm": 0.9104299545288086, | |
| "learning_rate": 2.5816668773338098e-05, | |
| "loss": 0.691, | |
| "num_input_tokens_seen": 16380464, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.4904, | |
| "grad_norm": 0.8835129737854004, | |
| "learning_rate": 2.575386794052142e-05, | |
| "loss": 0.6606, | |
| "num_input_tokens_seen": 16408736, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.4912, | |
| "grad_norm": 0.869504451751709, | |
| "learning_rate": 2.569106234586511e-05, | |
| "loss": 0.729, | |
| "num_input_tokens_seen": 16436352, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 1.0879722833633423, | |
| "learning_rate": 2.562825238608344e-05, | |
| "loss": 0.7137, | |
| "num_input_tokens_seen": 16464624, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.4928, | |
| "grad_norm": 0.9328833818435669, | |
| "learning_rate": 2.5565438457918244e-05, | |
| "loss": 0.7238, | |
| "num_input_tokens_seen": 16496720, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.4936, | |
| "grad_norm": 0.7433749437332153, | |
| "learning_rate": 2.5502620958136443e-05, | |
| "loss": 0.7019, | |
| "num_input_tokens_seen": 16524208, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.4944, | |
| "grad_norm": 0.7768478989601135, | |
| "learning_rate": 2.5439800283527494e-05, | |
| "loss": 0.5851, | |
| "num_input_tokens_seen": 16552192, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.4952, | |
| "grad_norm": 1.139993667602539, | |
| "learning_rate": 2.537697683090093e-05, | |
| "loss": 0.7357, | |
| "num_input_tokens_seen": 16578144, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.9104892611503601, | |
| "learning_rate": 2.531415099708382e-05, | |
| "loss": 0.6254, | |
| "num_input_tokens_seen": 16608288, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4968, | |
| "grad_norm": 0.6912931799888611, | |
| "learning_rate": 2.5251323178918268e-05, | |
| "loss": 0.7284, | |
| "num_input_tokens_seen": 16636176, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.4976, | |
| "grad_norm": 0.8370018601417542, | |
| "learning_rate": 2.518849377325893e-05, | |
| "loss": 0.8136, | |
| "num_input_tokens_seen": 16659168, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.4984, | |
| "grad_norm": 1.049914836883545, | |
| "learning_rate": 2.5125663176970476e-05, | |
| "loss": 0.7334, | |
| "num_input_tokens_seen": 16687344, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.4992, | |
| "grad_norm": 1.0298138856887817, | |
| "learning_rate": 2.5062831786925102e-05, | |
| "loss": 0.7599, | |
| "num_input_tokens_seen": 16714496, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.7912611961364746, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6517, | |
| "num_input_tokens_seen": 16742528, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.5008, | |
| "grad_norm": 1.0854965448379517, | |
| "learning_rate": 2.4937168213074907e-05, | |
| "loss": 0.6778, | |
| "num_input_tokens_seen": 16771248, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.5016, | |
| "grad_norm": 0.9674849510192871, | |
| "learning_rate": 2.4874336823029526e-05, | |
| "loss": 0.6847, | |
| "num_input_tokens_seen": 16799136, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.5024, | |
| "grad_norm": 0.8434900641441345, | |
| "learning_rate": 2.481150622674108e-05, | |
| "loss": 0.6638, | |
| "num_input_tokens_seen": 16825648, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.5032, | |
| "grad_norm": 0.8714620471000671, | |
| "learning_rate": 2.4748676821081738e-05, | |
| "loss": 0.7139, | |
| "num_input_tokens_seen": 16852240, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.8312164545059204, | |
| "learning_rate": 2.4685849002916183e-05, | |
| "loss": 0.7507, | |
| "num_input_tokens_seen": 16878624, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5048, | |
| "grad_norm": 1.1353472471237183, | |
| "learning_rate": 2.4623023169099073e-05, | |
| "loss": 0.6951, | |
| "num_input_tokens_seen": 16906864, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 1.1486365795135498, | |
| "learning_rate": 2.4560199716472508e-05, | |
| "loss": 0.733, | |
| "num_input_tokens_seen": 16930080, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.5064, | |
| "grad_norm": 0.9651095867156982, | |
| "learning_rate": 2.449737904186357e-05, | |
| "loss": 0.7517, | |
| "num_input_tokens_seen": 16952240, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.5072, | |
| "grad_norm": 0.8921483755111694, | |
| "learning_rate": 2.4434561542081762e-05, | |
| "loss": 0.7472, | |
| "num_input_tokens_seen": 16985408, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.508, | |
| "grad_norm": 1.0625066757202148, | |
| "learning_rate": 2.4371747613916566e-05, | |
| "loss": 0.7514, | |
| "num_input_tokens_seen": 17013776, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.5088, | |
| "grad_norm": 1.10313081741333, | |
| "learning_rate": 2.4308937654134893e-05, | |
| "loss": 0.7633, | |
| "num_input_tokens_seen": 17039120, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.5096, | |
| "grad_norm": 1.115670084953308, | |
| "learning_rate": 2.4246132059478578e-05, | |
| "loss": 0.6606, | |
| "num_input_tokens_seen": 17065296, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.5104, | |
| "grad_norm": 1.0417555570602417, | |
| "learning_rate": 2.418333122666191e-05, | |
| "loss": 0.764, | |
| "num_input_tokens_seen": 17089264, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5112, | |
| "grad_norm": 0.9926926493644714, | |
| "learning_rate": 2.412053555236906e-05, | |
| "loss": 0.751, | |
| "num_input_tokens_seen": 17117488, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 1.1716359853744507, | |
| "learning_rate": 2.4057745433251635e-05, | |
| "loss": 0.7067, | |
| "num_input_tokens_seen": 17141232, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5128, | |
| "grad_norm": 1.0248827934265137, | |
| "learning_rate": 2.3994961265926166e-05, | |
| "loss": 0.6432, | |
| "num_input_tokens_seen": 17171632, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.5136, | |
| "grad_norm": 0.8832619190216064, | |
| "learning_rate": 2.3932183446971583e-05, | |
| "loss": 0.6373, | |
| "num_input_tokens_seen": 17198640, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5144, | |
| "grad_norm": 0.8581608533859253, | |
| "learning_rate": 2.3869412372926687e-05, | |
| "loss": 0.7347, | |
| "num_input_tokens_seen": 17228240, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.5152, | |
| "grad_norm": 1.0683339834213257, | |
| "learning_rate": 2.3806648440287714e-05, | |
| "loss": 0.6789, | |
| "num_input_tokens_seen": 17259392, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.516, | |
| "grad_norm": 0.9491643309593201, | |
| "learning_rate": 2.3743892045505764e-05, | |
| "loss": 0.7548, | |
| "num_input_tokens_seen": 17287808, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.5168, | |
| "grad_norm": 0.8620545864105225, | |
| "learning_rate": 2.368114358498434e-05, | |
| "loss": 0.7792, | |
| "num_input_tokens_seen": 17311520, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5176, | |
| "grad_norm": 1.0956013202667236, | |
| "learning_rate": 2.361840345507683e-05, | |
| "loss": 0.6575, | |
| "num_input_tokens_seen": 17340816, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.5184, | |
| "grad_norm": 1.0029551982879639, | |
| "learning_rate": 2.355567205208397e-05, | |
| "loss": 0.6414, | |
| "num_input_tokens_seen": 17363408, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5192, | |
| "grad_norm": 1.0472480058670044, | |
| "learning_rate": 2.3492949772251414e-05, | |
| "loss": 0.7161, | |
| "num_input_tokens_seen": 17393248, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.8757247924804688, | |
| "learning_rate": 2.3430237011767167e-05, | |
| "loss": 0.6957, | |
| "num_input_tokens_seen": 17425232, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5208, | |
| "grad_norm": 1.0374081134796143, | |
| "learning_rate": 2.3367534166759102e-05, | |
| "loss": 0.7615, | |
| "num_input_tokens_seen": 17446864, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.5216, | |
| "grad_norm": 1.0572500228881836, | |
| "learning_rate": 2.3304841633292487e-05, | |
| "loss": 0.6994, | |
| "num_input_tokens_seen": 17470896, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5224, | |
| "grad_norm": 1.0209540128707886, | |
| "learning_rate": 2.3242159807367408e-05, | |
| "loss": 0.7116, | |
| "num_input_tokens_seen": 17501488, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.5232, | |
| "grad_norm": 1.0854222774505615, | |
| "learning_rate": 2.3179489084916358e-05, | |
| "loss": 0.7583, | |
| "num_input_tokens_seen": 17526032, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.524, | |
| "grad_norm": 1.0327874422073364, | |
| "learning_rate": 2.3116829861801686e-05, | |
| "loss": 0.7302, | |
| "num_input_tokens_seen": 17550144, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.5248, | |
| "grad_norm": 1.186990737915039, | |
| "learning_rate": 2.3054182533813087e-05, | |
| "loss": 0.6794, | |
| "num_input_tokens_seen": 17575600, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5256, | |
| "grad_norm": 1.000475287437439, | |
| "learning_rate": 2.2991547496665148e-05, | |
| "loss": 0.7294, | |
| "num_input_tokens_seen": 17601408, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.5264, | |
| "grad_norm": 1.1120193004608154, | |
| "learning_rate": 2.2928925145994794e-05, | |
| "loss": 0.6333, | |
| "num_input_tokens_seen": 17624752, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5272, | |
| "grad_norm": 0.7765217423439026, | |
| "learning_rate": 2.286631587735883e-05, | |
| "loss": 0.7779, | |
| "num_input_tokens_seen": 17651040, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.9403998255729675, | |
| "learning_rate": 2.280372008623142e-05, | |
| "loss": 0.7035, | |
| "num_input_tokens_seen": 17678288, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5288, | |
| "grad_norm": 1.019305944442749, | |
| "learning_rate": 2.2741138168001608e-05, | |
| "loss": 0.719, | |
| "num_input_tokens_seen": 17702816, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.5296, | |
| "grad_norm": 1.0804177522659302, | |
| "learning_rate": 2.267857051797081e-05, | |
| "loss": 0.7134, | |
| "num_input_tokens_seen": 17728848, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5304, | |
| "grad_norm": 0.7340876460075378, | |
| "learning_rate": 2.2616017531350288e-05, | |
| "loss": 0.6916, | |
| "num_input_tokens_seen": 17756240, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.5312, | |
| "grad_norm": 0.9618902802467346, | |
| "learning_rate": 2.255347960325871e-05, | |
| "loss": 0.6389, | |
| "num_input_tokens_seen": 17781104, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.532, | |
| "grad_norm": 0.9528229832649231, | |
| "learning_rate": 2.2490957128719624e-05, | |
| "loss": 0.6648, | |
| "num_input_tokens_seen": 17808816, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.5328, | |
| "grad_norm": 1.043328881263733, | |
| "learning_rate": 2.2428450502658967e-05, | |
| "loss": 0.6683, | |
| "num_input_tokens_seen": 17834496, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5336, | |
| "grad_norm": 0.8162310719490051, | |
| "learning_rate": 2.2365960119902545e-05, | |
| "loss": 0.7686, | |
| "num_input_tokens_seen": 17862880, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.5344, | |
| "grad_norm": 0.8925397396087646, | |
| "learning_rate": 2.2303486375173585e-05, | |
| "loss": 0.7073, | |
| "num_input_tokens_seen": 17890064, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5352, | |
| "grad_norm": 1.0610705614089966, | |
| "learning_rate": 2.224102966309021e-05, | |
| "loss": 0.642, | |
| "num_input_tokens_seen": 17918144, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 1.1452162265777588, | |
| "learning_rate": 2.217859037816296e-05, | |
| "loss": 0.7078, | |
| "num_input_tokens_seen": 17945344, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5368, | |
| "grad_norm": 0.8698946833610535, | |
| "learning_rate": 2.2116168914792292e-05, | |
| "loss": 0.7437, | |
| "num_input_tokens_seen": 17970096, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.5376, | |
| "grad_norm": 1.1551156044006348, | |
| "learning_rate": 2.205376566726611e-05, | |
| "loss": 0.7606, | |
| "num_input_tokens_seen": 17997328, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5384, | |
| "grad_norm": 1.3479046821594238, | |
| "learning_rate": 2.1991381029757215e-05, | |
| "loss": 0.6824, | |
| "num_input_tokens_seen": 18022464, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.5392, | |
| "grad_norm": 0.9218052625656128, | |
| "learning_rate": 2.19290153963209e-05, | |
| "loss": 0.7262, | |
| "num_input_tokens_seen": 18052176, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.302252173423767, | |
| "learning_rate": 2.186666916089239e-05, | |
| "loss": 0.7491, | |
| "num_input_tokens_seen": 18079008, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.5408, | |
| "grad_norm": 1.4532941579818726, | |
| "learning_rate": 2.1804342717284415e-05, | |
| "loss": 0.6246, | |
| "num_input_tokens_seen": 18102784, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.5416, | |
| "grad_norm": 0.7572783827781677, | |
| "learning_rate": 2.174203645918464e-05, | |
| "loss": 0.6712, | |
| "num_input_tokens_seen": 18130688, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.5424, | |
| "grad_norm": 1.0954492092132568, | |
| "learning_rate": 2.1679750780153267e-05, | |
| "loss": 0.7238, | |
| "num_input_tokens_seen": 18159200, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5432, | |
| "grad_norm": 1.1352787017822266, | |
| "learning_rate": 2.1617486073620498e-05, | |
| "loss": 0.663, | |
| "num_input_tokens_seen": 18188736, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 1.012987732887268, | |
| "learning_rate": 2.155524273288405e-05, | |
| "loss": 0.6928, | |
| "num_input_tokens_seen": 18217856, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5448, | |
| "grad_norm": 0.8638446927070618, | |
| "learning_rate": 2.1493021151106703e-05, | |
| "loss": 0.7373, | |
| "num_input_tokens_seen": 18247616, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.5456, | |
| "grad_norm": 1.2647075653076172, | |
| "learning_rate": 2.1430821721313782e-05, | |
| "loss": 0.7593, | |
| "num_input_tokens_seen": 18274416, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5464, | |
| "grad_norm": 0.8533580899238586, | |
| "learning_rate": 2.1368644836390684e-05, | |
| "loss": 0.6718, | |
| "num_input_tokens_seen": 18298720, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.5472, | |
| "grad_norm": 0.8091197609901428, | |
| "learning_rate": 2.130649088908041e-05, | |
| "loss": 0.7303, | |
| "num_input_tokens_seen": 18326160, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.548, | |
| "grad_norm": 0.886374294757843, | |
| "learning_rate": 2.1244360271981073e-05, | |
| "loss": 0.74, | |
| "num_input_tokens_seen": 18351344, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.5488, | |
| "grad_norm": 0.8284346461296082, | |
| "learning_rate": 2.1182253377543425e-05, | |
| "loss": 0.6448, | |
| "num_input_tokens_seen": 18374752, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.5496, | |
| "grad_norm": 0.9252715706825256, | |
| "learning_rate": 2.112017059806835e-05, | |
| "loss": 0.6759, | |
| "num_input_tokens_seen": 18402432, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.5504, | |
| "grad_norm": 1.0463306903839111, | |
| "learning_rate": 2.1058112325704436e-05, | |
| "loss": 0.7327, | |
| "num_input_tokens_seen": 18428656, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5512, | |
| "grad_norm": 0.999754011631012, | |
| "learning_rate": 2.0996078952445452e-05, | |
| "loss": 0.7214, | |
| "num_input_tokens_seen": 18451744, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.759445071220398, | |
| "learning_rate": 2.0934070870127912e-05, | |
| "loss": 0.7192, | |
| "num_input_tokens_seen": 18476960, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5528, | |
| "grad_norm": 0.757986843585968, | |
| "learning_rate": 2.0872088470428553e-05, | |
| "loss": 0.6481, | |
| "num_input_tokens_seen": 18507280, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.5536, | |
| "grad_norm": 1.555863857269287, | |
| "learning_rate": 2.08101321448619e-05, | |
| "loss": 0.6906, | |
| "num_input_tokens_seen": 18531264, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5544, | |
| "grad_norm": 0.9648059606552124, | |
| "learning_rate": 2.0748202284777777e-05, | |
| "loss": 0.6691, | |
| "num_input_tokens_seen": 18559552, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.5552, | |
| "grad_norm": 0.7556829452514648, | |
| "learning_rate": 2.0686299281358835e-05, | |
| "loss": 0.743, | |
| "num_input_tokens_seen": 18587408, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.556, | |
| "grad_norm": 1.2063502073287964, | |
| "learning_rate": 2.0624423525618098e-05, | |
| "loss": 0.6896, | |
| "num_input_tokens_seen": 18616384, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.5568, | |
| "grad_norm": 1.0923407077789307, | |
| "learning_rate": 2.056257540839647e-05, | |
| "loss": 0.7799, | |
| "num_input_tokens_seen": 18640432, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5576, | |
| "grad_norm": 0.9768779873847961, | |
| "learning_rate": 2.050075532036026e-05, | |
| "loss": 0.6796, | |
| "num_input_tokens_seen": 18661696, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.5584, | |
| "grad_norm": 1.0903396606445312, | |
| "learning_rate": 2.0438963651998747e-05, | |
| "loss": 0.6601, | |
| "num_input_tokens_seen": 18689280, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.5592, | |
| "grad_norm": 1.0859960317611694, | |
| "learning_rate": 2.037720079362169e-05, | |
| "loss": 0.7247, | |
| "num_input_tokens_seen": 18713776, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.979040801525116, | |
| "learning_rate": 2.031546713535688e-05, | |
| "loss": 0.7113, | |
| "num_input_tokens_seen": 18739632, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5608, | |
| "grad_norm": 0.9411745667457581, | |
| "learning_rate": 2.0253763067147657e-05, | |
| "loss": 0.604, | |
| "num_input_tokens_seen": 18767504, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.5616, | |
| "grad_norm": 1.0369850397109985, | |
| "learning_rate": 2.0192088978750433e-05, | |
| "loss": 0.6292, | |
| "num_input_tokens_seen": 18794976, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5624, | |
| "grad_norm": 1.033288598060608, | |
| "learning_rate": 2.0130445259732285e-05, | |
| "loss": 0.7227, | |
| "num_input_tokens_seen": 18823456, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.5632, | |
| "grad_norm": 1.0590053796768188, | |
| "learning_rate": 2.0068832299468428e-05, | |
| "loss": 0.6536, | |
| "num_input_tokens_seen": 18851104, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.564, | |
| "grad_norm": 0.8934491276741028, | |
| "learning_rate": 2.000725048713983e-05, | |
| "loss": 0.6729, | |
| "num_input_tokens_seen": 18882096, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.5648, | |
| "grad_norm": 1.3166091442108154, | |
| "learning_rate": 1.994570021173067e-05, | |
| "loss": 0.8178, | |
| "num_input_tokens_seen": 18903520, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.5656, | |
| "grad_norm": 0.7813395857810974, | |
| "learning_rate": 1.988418186202594e-05, | |
| "loss": 0.6575, | |
| "num_input_tokens_seen": 18937200, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.5664, | |
| "grad_norm": 1.1987985372543335, | |
| "learning_rate": 1.9822695826608972e-05, | |
| "loss": 0.7789, | |
| "num_input_tokens_seen": 18965424, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5672, | |
| "grad_norm": 0.9326161742210388, | |
| "learning_rate": 1.9761242493858987e-05, | |
| "loss": 0.6699, | |
| "num_input_tokens_seen": 18989456, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.736609160900116, | |
| "learning_rate": 1.969982225194864e-05, | |
| "loss": 0.7095, | |
| "num_input_tokens_seen": 19017808, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5688, | |
| "grad_norm": 1.0842061042785645, | |
| "learning_rate": 1.9638435488841546e-05, | |
| "loss": 0.7191, | |
| "num_input_tokens_seen": 19046496, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.5696, | |
| "grad_norm": 0.9748028516769409, | |
| "learning_rate": 1.957708259228987e-05, | |
| "loss": 0.7016, | |
| "num_input_tokens_seen": 19072128, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5704, | |
| "grad_norm": 1.0534452199935913, | |
| "learning_rate": 1.951576394983185e-05, | |
| "loss": 0.6903, | |
| "num_input_tokens_seen": 19096528, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.5712, | |
| "grad_norm": 0.860016405582428, | |
| "learning_rate": 1.945447994878937e-05, | |
| "loss": 0.6144, | |
| "num_input_tokens_seen": 19126240, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.572, | |
| "grad_norm": 0.9540638327598572, | |
| "learning_rate": 1.9393230976265473e-05, | |
| "loss": 0.6755, | |
| "num_input_tokens_seen": 19152752, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.5728, | |
| "grad_norm": 0.8391373157501221, | |
| "learning_rate": 1.9332017419141962e-05, | |
| "loss": 0.748, | |
| "num_input_tokens_seen": 19179296, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.5736, | |
| "grad_norm": 1.1639857292175293, | |
| "learning_rate": 1.9270839664076936e-05, | |
| "loss": 0.7011, | |
| "num_input_tokens_seen": 19205616, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.5744, | |
| "grad_norm": 0.8739202618598938, | |
| "learning_rate": 1.920969809750234e-05, | |
| "loss": 0.6672, | |
| "num_input_tokens_seen": 19231440, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5752, | |
| "grad_norm": 0.8280954360961914, | |
| "learning_rate": 1.914859310562154e-05, | |
| "loss": 0.7261, | |
| "num_input_tokens_seen": 19258288, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.671859622001648, | |
| "learning_rate": 1.908752507440689e-05, | |
| "loss": 0.7838, | |
| "num_input_tokens_seen": 19284464, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5768, | |
| "grad_norm": 0.7985883951187134, | |
| "learning_rate": 1.9026494389597238e-05, | |
| "loss": 0.6574, | |
| "num_input_tokens_seen": 19312272, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.5776, | |
| "grad_norm": 1.1776115894317627, | |
| "learning_rate": 1.8965501436695577e-05, | |
| "loss": 0.7692, | |
| "num_input_tokens_seen": 19335408, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.5784, | |
| "grad_norm": 0.7614251971244812, | |
| "learning_rate": 1.890454660096654e-05, | |
| "loss": 0.7165, | |
| "num_input_tokens_seen": 19360768, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.5792, | |
| "grad_norm": 1.0146969556808472, | |
| "learning_rate": 1.8843630267434e-05, | |
| "loss": 0.8187, | |
| "num_input_tokens_seen": 19386016, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.8127625584602356, | |
| "learning_rate": 1.8782752820878634e-05, | |
| "loss": 0.6587, | |
| "num_input_tokens_seen": 19408560, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.5808, | |
| "grad_norm": 1.102415680885315, | |
| "learning_rate": 1.872191464583547e-05, | |
| "loss": 0.6268, | |
| "num_input_tokens_seen": 19436592, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.5816, | |
| "grad_norm": 0.8009477853775024, | |
| "learning_rate": 1.866111612659149e-05, | |
| "loss": 0.6977, | |
| "num_input_tokens_seen": 19463440, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 0.5824, | |
| "grad_norm": 0.9613442420959473, | |
| "learning_rate": 1.8600357647183185e-05, | |
| "loss": 0.6292, | |
| "num_input_tokens_seen": 19493360, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.5832, | |
| "grad_norm": 1.1276973485946655, | |
| "learning_rate": 1.8539639591394133e-05, | |
| "loss": 0.6547, | |
| "num_input_tokens_seen": 19521392, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 1.2128732204437256, | |
| "learning_rate": 1.8478962342752583e-05, | |
| "loss": 0.6717, | |
| "num_input_tokens_seen": 19550336, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5848, | |
| "grad_norm": 1.1931806802749634, | |
| "learning_rate": 1.8418326284528996e-05, | |
| "loss": 0.7065, | |
| "num_input_tokens_seen": 19575776, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 0.5856, | |
| "grad_norm": 0.921335756778717, | |
| "learning_rate": 1.8357731799733686e-05, | |
| "loss": 0.7029, | |
| "num_input_tokens_seen": 19598128, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.5864, | |
| "grad_norm": 0.8000009655952454, | |
| "learning_rate": 1.8297179271114346e-05, | |
| "loss": 0.7311, | |
| "num_input_tokens_seen": 19625648, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 0.5872, | |
| "grad_norm": 1.0933367013931274, | |
| "learning_rate": 1.8236669081153657e-05, | |
| "loss": 0.7296, | |
| "num_input_tokens_seen": 19649952, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.588, | |
| "grad_norm": 0.8331469297409058, | |
| "learning_rate": 1.817620161206687e-05, | |
| "loss": 0.7534, | |
| "num_input_tokens_seen": 19677680, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.5888, | |
| "grad_norm": 1.3450491428375244, | |
| "learning_rate": 1.811577724579938e-05, | |
| "loss": 0.6995, | |
| "num_input_tokens_seen": 19711904, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.5896, | |
| "grad_norm": 1.0697826147079468, | |
| "learning_rate": 1.8055396364024317e-05, | |
| "loss": 0.7517, | |
| "num_input_tokens_seen": 19734272, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 0.5904, | |
| "grad_norm": 0.9218750596046448, | |
| "learning_rate": 1.7995059348140165e-05, | |
| "loss": 0.7048, | |
| "num_input_tokens_seen": 19761136, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.5912, | |
| "grad_norm": 0.7037175297737122, | |
| "learning_rate": 1.7934766579268292e-05, | |
| "loss": 0.6385, | |
| "num_input_tokens_seen": 19784880, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.9812880754470825, | |
| "learning_rate": 1.7874518438250597e-05, | |
| "loss": 0.8177, | |
| "num_input_tokens_seen": 19811456, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5928, | |
| "grad_norm": 1.0128806829452515, | |
| "learning_rate": 1.7814315305647093e-05, | |
| "loss": 0.7373, | |
| "num_input_tokens_seen": 19839168, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.5936, | |
| "grad_norm": 0.8506542444229126, | |
| "learning_rate": 1.7754157561733476e-05, | |
| "loss": 0.723, | |
| "num_input_tokens_seen": 19865584, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.5944, | |
| "grad_norm": 0.8591898679733276, | |
| "learning_rate": 1.7694045586498752e-05, | |
| "loss": 0.6315, | |
| "num_input_tokens_seen": 19893232, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 0.5952, | |
| "grad_norm": 0.9761216640472412, | |
| "learning_rate": 1.7633979759642844e-05, | |
| "loss": 0.6184, | |
| "num_input_tokens_seen": 19918512, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.596, | |
| "grad_norm": 0.9515823721885681, | |
| "learning_rate": 1.7573960460574133e-05, | |
| "loss": 0.682, | |
| "num_input_tokens_seen": 19944544, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.5968, | |
| "grad_norm": 1.2393804788589478, | |
| "learning_rate": 1.7513988068407146e-05, | |
| "loss": 0.6738, | |
| "num_input_tokens_seen": 19971104, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.5976, | |
| "grad_norm": 1.2483285665512085, | |
| "learning_rate": 1.74540629619601e-05, | |
| "loss": 0.6895, | |
| "num_input_tokens_seen": 19996352, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.5984, | |
| "grad_norm": 1.162599802017212, | |
| "learning_rate": 1.7394185519752545e-05, | |
| "loss": 0.7436, | |
| "num_input_tokens_seen": 20021744, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.5992, | |
| "grad_norm": 0.8526731133460999, | |
| "learning_rate": 1.7334356120002957e-05, | |
| "loss": 0.7612, | |
| "num_input_tokens_seen": 20046560, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.1033904552459717, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 0.7139, | |
| "num_input_tokens_seen": 20072560, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6008, | |
| "grad_norm": 0.9515017867088318, | |
| "learning_rate": 1.7214842959231794e-05, | |
| "loss": 0.6556, | |
| "num_input_tokens_seen": 20103488, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 0.6016, | |
| "grad_norm": 1.1830626726150513, | |
| "learning_rate": 1.7155159953120313e-05, | |
| "loss": 0.6884, | |
| "num_input_tokens_seen": 20130784, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.6024, | |
| "grad_norm": 1.1456624269485474, | |
| "learning_rate": 1.7095526499282172e-05, | |
| "loss": 0.7729, | |
| "num_input_tokens_seen": 20158720, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.6032, | |
| "grad_norm": 0.8993046879768372, | |
| "learning_rate": 1.703594297439469e-05, | |
| "loss": 0.7427, | |
| "num_input_tokens_seen": 20180736, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.604, | |
| "grad_norm": 1.0378142595291138, | |
| "learning_rate": 1.6976409754819767e-05, | |
| "loss": 0.6831, | |
| "num_input_tokens_seen": 20203744, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.6048, | |
| "grad_norm": 1.0006003379821777, | |
| "learning_rate": 1.6916927216601593e-05, | |
| "loss": 0.6098, | |
| "num_input_tokens_seen": 20232784, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.6056, | |
| "grad_norm": 0.9714456796646118, | |
| "learning_rate": 1.6857495735464195e-05, | |
| "loss": 0.812, | |
| "num_input_tokens_seen": 20262256, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 0.6064, | |
| "grad_norm": 1.226090431213379, | |
| "learning_rate": 1.6798115686809125e-05, | |
| "loss": 0.6337, | |
| "num_input_tokens_seen": 20290720, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.6072, | |
| "grad_norm": 1.2579602003097534, | |
| "learning_rate": 1.6738787445713037e-05, | |
| "loss": 0.7105, | |
| "num_input_tokens_seen": 20314368, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.9636203050613403, | |
| "learning_rate": 1.6679511386925337e-05, | |
| "loss": 0.7776, | |
| "num_input_tokens_seen": 20337648, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6088, | |
| "grad_norm": 0.8365712761878967, | |
| "learning_rate": 1.662028788486583e-05, | |
| "loss": 0.6626, | |
| "num_input_tokens_seen": 20367344, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 0.6096, | |
| "grad_norm": 1.340468168258667, | |
| "learning_rate": 1.656111731362236e-05, | |
| "loss": 0.6983, | |
| "num_input_tokens_seen": 20391616, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6104, | |
| "grad_norm": 0.878955066204071, | |
| "learning_rate": 1.650200004694839e-05, | |
| "loss": 0.6948, | |
| "num_input_tokens_seen": 20419520, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.6112, | |
| "grad_norm": 0.9543726444244385, | |
| "learning_rate": 1.644293645826072e-05, | |
| "loss": 0.8154, | |
| "num_input_tokens_seen": 20446048, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.612, | |
| "grad_norm": 1.2340530157089233, | |
| "learning_rate": 1.6383926920637077e-05, | |
| "loss": 0.7234, | |
| "num_input_tokens_seen": 20472960, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.6128, | |
| "grad_norm": 0.8794097900390625, | |
| "learning_rate": 1.6324971806813767e-05, | |
| "loss": 0.668, | |
| "num_input_tokens_seen": 20502816, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6136, | |
| "grad_norm": 1.0433683395385742, | |
| "learning_rate": 1.6266071489183327e-05, | |
| "loss": 0.6936, | |
| "num_input_tokens_seen": 20529056, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 0.6144, | |
| "grad_norm": 1.3372730016708374, | |
| "learning_rate": 1.620722633979219e-05, | |
| "loss": 0.7988, | |
| "num_input_tokens_seen": 20555392, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.6152, | |
| "grad_norm": 1.0201383829116821, | |
| "learning_rate": 1.614843673033828e-05, | |
| "loss": 0.7752, | |
| "num_input_tokens_seen": 20583888, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.7360509634017944, | |
| "learning_rate": 1.6089703032168733e-05, | |
| "loss": 0.6784, | |
| "num_input_tokens_seen": 20612112, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6168, | |
| "grad_norm": 0.8650053143501282, | |
| "learning_rate": 1.603102561627751e-05, | |
| "loss": 0.6483, | |
| "num_input_tokens_seen": 20639296, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.6176, | |
| "grad_norm": 0.9596717953681946, | |
| "learning_rate": 1.5972404853303062e-05, | |
| "loss": 0.6876, | |
| "num_input_tokens_seen": 20663680, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6184, | |
| "grad_norm": 0.9903700947761536, | |
| "learning_rate": 1.5913841113525992e-05, | |
| "loss": 0.7651, | |
| "num_input_tokens_seen": 20690592, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 0.6192, | |
| "grad_norm": 1.0361056327819824, | |
| "learning_rate": 1.585533476686669e-05, | |
| "loss": 0.6692, | |
| "num_input_tokens_seen": 20716944, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.3285175561904907, | |
| "learning_rate": 1.5796886182883053e-05, | |
| "loss": 0.708, | |
| "num_input_tokens_seen": 20742128, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.6208, | |
| "grad_norm": 0.9742456078529358, | |
| "learning_rate": 1.5738495730768105e-05, | |
| "loss": 0.6734, | |
| "num_input_tokens_seen": 20769344, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6216, | |
| "grad_norm": 0.9866467118263245, | |
| "learning_rate": 1.5680163779347667e-05, | |
| "loss": 0.7442, | |
| "num_input_tokens_seen": 20793920, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.6224, | |
| "grad_norm": 1.2530503273010254, | |
| "learning_rate": 1.562189069707807e-05, | |
| "loss": 0.786, | |
| "num_input_tokens_seen": 20819616, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6232, | |
| "grad_norm": 1.0180388689041138, | |
| "learning_rate": 1.556367685204374e-05, | |
| "loss": 0.6565, | |
| "num_input_tokens_seen": 20843056, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 1.1570924520492554, | |
| "learning_rate": 1.5505522611954975e-05, | |
| "loss": 0.8403, | |
| "num_input_tokens_seen": 20870320, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6248, | |
| "grad_norm": 0.9555189609527588, | |
| "learning_rate": 1.5447428344145563e-05, | |
| "loss": 0.717, | |
| "num_input_tokens_seen": 20894448, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 0.6256, | |
| "grad_norm": 0.7047298550605774, | |
| "learning_rate": 1.538939441557048e-05, | |
| "loss": 0.6563, | |
| "num_input_tokens_seen": 20926800, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6264, | |
| "grad_norm": 1.1212091445922852, | |
| "learning_rate": 1.5331421192803565e-05, | |
| "loss": 0.7742, | |
| "num_input_tokens_seen": 20954016, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.6272, | |
| "grad_norm": 0.9030645489692688, | |
| "learning_rate": 1.5273509042035172e-05, | |
| "loss": 0.6654, | |
| "num_input_tokens_seen": 20982512, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.628, | |
| "grad_norm": 0.9414677619934082, | |
| "learning_rate": 1.521565832906994e-05, | |
| "loss": 0.6737, | |
| "num_input_tokens_seen": 21008768, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.6288, | |
| "grad_norm": 1.1415228843688965, | |
| "learning_rate": 1.515786941932441e-05, | |
| "loss": 0.7259, | |
| "num_input_tokens_seen": 21038144, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6296, | |
| "grad_norm": 1.0087826251983643, | |
| "learning_rate": 1.5100142677824753e-05, | |
| "loss": 0.6793, | |
| "num_input_tokens_seen": 21065120, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 0.6304, | |
| "grad_norm": 1.3329883813858032, | |
| "learning_rate": 1.5042478469204435e-05, | |
| "loss": 0.6934, | |
| "num_input_tokens_seen": 21091296, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.6312, | |
| "grad_norm": 0.9850925803184509, | |
| "learning_rate": 1.4984877157701932e-05, | |
| "loss": 0.7746, | |
| "num_input_tokens_seen": 21117568, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.8925058245658875, | |
| "learning_rate": 1.4927339107158437e-05, | |
| "loss": 0.6311, | |
| "num_input_tokens_seen": 21146640, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6328, | |
| "grad_norm": 1.2707431316375732, | |
| "learning_rate": 1.486986468101555e-05, | |
| "loss": 0.7614, | |
| "num_input_tokens_seen": 21169680, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 0.6336, | |
| "grad_norm": 1.0344791412353516, | |
| "learning_rate": 1.4812454242312979e-05, | |
| "loss": 0.7291, | |
| "num_input_tokens_seen": 21195360, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6344, | |
| "grad_norm": 0.8999541997909546, | |
| "learning_rate": 1.4755108153686275e-05, | |
| "loss": 0.7421, | |
| "num_input_tokens_seen": 21218896, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 0.6352, | |
| "grad_norm": 1.3539083003997803, | |
| "learning_rate": 1.4697826777364477e-05, | |
| "loss": 0.7831, | |
| "num_input_tokens_seen": 21244080, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.636, | |
| "grad_norm": 0.9629884362220764, | |
| "learning_rate": 1.4640610475167898e-05, | |
| "loss": 0.6907, | |
| "num_input_tokens_seen": 21271024, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.6368, | |
| "grad_norm": 0.9040243625640869, | |
| "learning_rate": 1.4583459608505801e-05, | |
| "loss": 0.7001, | |
| "num_input_tokens_seen": 21298992, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6376, | |
| "grad_norm": 1.122290849685669, | |
| "learning_rate": 1.4526374538374132e-05, | |
| "loss": 0.6729, | |
| "num_input_tokens_seen": 21324032, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 0.6384, | |
| "grad_norm": 0.8082273602485657, | |
| "learning_rate": 1.4469355625353198e-05, | |
| "loss": 0.6636, | |
| "num_input_tokens_seen": 21354256, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.6392, | |
| "grad_norm": 0.7639278173446655, | |
| "learning_rate": 1.4412403229605454e-05, | |
| "loss": 0.6349, | |
| "num_input_tokens_seen": 21382144, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.0013383626937866, | |
| "learning_rate": 1.4355517710873184e-05, | |
| "loss": 0.6892, | |
| "num_input_tokens_seen": 21408112, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6408, | |
| "grad_norm": 0.918889582157135, | |
| "learning_rate": 1.4298699428476236e-05, | |
| "loss": 0.6602, | |
| "num_input_tokens_seen": 21438800, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.6416, | |
| "grad_norm": 0.9448719620704651, | |
| "learning_rate": 1.4241948741309782e-05, | |
| "loss": 0.6613, | |
| "num_input_tokens_seen": 21466464, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.6424, | |
| "grad_norm": 1.1950000524520874, | |
| "learning_rate": 1.418526600784198e-05, | |
| "loss": 0.6821, | |
| "num_input_tokens_seen": 21496864, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 0.6432, | |
| "grad_norm": 1.0359631776809692, | |
| "learning_rate": 1.412865158611179e-05, | |
| "loss": 0.6698, | |
| "num_input_tokens_seen": 21523456, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.644, | |
| "grad_norm": 0.9636697769165039, | |
| "learning_rate": 1.4072105833726684e-05, | |
| "loss": 0.5917, | |
| "num_input_tokens_seen": 21554320, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.6448, | |
| "grad_norm": 0.8568287491798401, | |
| "learning_rate": 1.401562910786034e-05, | |
| "loss": 0.7332, | |
| "num_input_tokens_seen": 21584496, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6456, | |
| "grad_norm": 0.7950714230537415, | |
| "learning_rate": 1.3959221765250469e-05, | |
| "loss": 0.6826, | |
| "num_input_tokens_seen": 21615104, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.6464, | |
| "grad_norm": 0.9343571662902832, | |
| "learning_rate": 1.3902884162196508e-05, | |
| "loss": 0.7349, | |
| "num_input_tokens_seen": 21642144, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6472, | |
| "grad_norm": 0.8434100151062012, | |
| "learning_rate": 1.3846616654557362e-05, | |
| "loss": 0.6341, | |
| "num_input_tokens_seen": 21671408, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 1.1461455821990967, | |
| "learning_rate": 1.3790419597749199e-05, | |
| "loss": 0.7531, | |
| "num_input_tokens_seen": 21698880, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6488, | |
| "grad_norm": 1.261234164237976, | |
| "learning_rate": 1.3734293346743168e-05, | |
| "loss": 0.6284, | |
| "num_input_tokens_seen": 21727280, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 0.6496, | |
| "grad_norm": 1.0802944898605347, | |
| "learning_rate": 1.367823825606319e-05, | |
| "loss": 0.7148, | |
| "num_input_tokens_seen": 21751824, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.6504, | |
| "grad_norm": 1.1353379487991333, | |
| "learning_rate": 1.3622254679783663e-05, | |
| "loss": 0.7022, | |
| "num_input_tokens_seen": 21782080, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.6512, | |
| "grad_norm": 1.0912383794784546, | |
| "learning_rate": 1.3566342971527291e-05, | |
| "loss": 0.7308, | |
| "num_input_tokens_seen": 21809376, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.652, | |
| "grad_norm": 1.0142539739608765, | |
| "learning_rate": 1.3510503484462805e-05, | |
| "loss": 0.7338, | |
| "num_input_tokens_seen": 21836240, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.6528, | |
| "grad_norm": 1.0957950353622437, | |
| "learning_rate": 1.3454736571302763e-05, | |
| "loss": 0.6941, | |
| "num_input_tokens_seen": 21862768, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6536, | |
| "grad_norm": 0.9035006761550903, | |
| "learning_rate": 1.3399042584301298e-05, | |
| "loss": 0.7218, | |
| "num_input_tokens_seen": 21890304, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 0.6544, | |
| "grad_norm": 1.0284723043441772, | |
| "learning_rate": 1.3343421875251888e-05, | |
| "loss": 0.8144, | |
| "num_input_tokens_seen": 21912192, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.6552, | |
| "grad_norm": 1.0489941835403442, | |
| "learning_rate": 1.3287874795485167e-05, | |
| "loss": 0.8236, | |
| "num_input_tokens_seen": 21939984, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.9491598606109619, | |
| "learning_rate": 1.3232401695866687e-05, | |
| "loss": 0.6512, | |
| "num_input_tokens_seen": 21967168, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6568, | |
| "grad_norm": 1.0019705295562744, | |
| "learning_rate": 1.3177002926794685e-05, | |
| "loss": 0.7271, | |
| "num_input_tokens_seen": 21999904, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 0.6576, | |
| "grad_norm": 1.0153288841247559, | |
| "learning_rate": 1.3121678838197909e-05, | |
| "loss": 0.635, | |
| "num_input_tokens_seen": 22028272, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.6584, | |
| "grad_norm": 0.8823714852333069, | |
| "learning_rate": 1.3066429779533351e-05, | |
| "loss": 0.6708, | |
| "num_input_tokens_seen": 22052224, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 0.6592, | |
| "grad_norm": 0.8675245642662048, | |
| "learning_rate": 1.3011256099784103e-05, | |
| "loss": 0.5916, | |
| "num_input_tokens_seen": 22081360, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.1484395265579224, | |
| "learning_rate": 1.2956158147457115e-05, | |
| "loss": 0.7399, | |
| "num_input_tokens_seen": 22112080, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.6608, | |
| "grad_norm": 1.2655909061431885, | |
| "learning_rate": 1.2901136270580993e-05, | |
| "loss": 0.6543, | |
| "num_input_tokens_seen": 22139792, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6616, | |
| "grad_norm": 1.2049787044525146, | |
| "learning_rate": 1.2846190816703835e-05, | |
| "loss": 0.6808, | |
| "num_input_tokens_seen": 22163136, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 0.6624, | |
| "grad_norm": 0.7781268358230591, | |
| "learning_rate": 1.279132213289096e-05, | |
| "loss": 0.6939, | |
| "num_input_tokens_seen": 22192464, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6632, | |
| "grad_norm": 1.1952673196792603, | |
| "learning_rate": 1.273653056572282e-05, | |
| "loss": 0.6628, | |
| "num_input_tokens_seen": 22219424, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 1.0534050464630127, | |
| "learning_rate": 1.2681816461292715e-05, | |
| "loss": 0.686, | |
| "num_input_tokens_seen": 22244496, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6648, | |
| "grad_norm": 1.18624746799469, | |
| "learning_rate": 1.2627180165204671e-05, | |
| "loss": 0.7278, | |
| "num_input_tokens_seen": 22271600, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.6656, | |
| "grad_norm": 0.8680349588394165, | |
| "learning_rate": 1.257262202257124e-05, | |
| "loss": 0.6531, | |
| "num_input_tokens_seen": 22298080, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6664, | |
| "grad_norm": 0.90425705909729, | |
| "learning_rate": 1.251814237801128e-05, | |
| "loss": 0.5756, | |
| "num_input_tokens_seen": 22324832, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 0.6672, | |
| "grad_norm": 1.0510259866714478, | |
| "learning_rate": 1.246374157564785e-05, | |
| "loss": 0.647, | |
| "num_input_tokens_seen": 22353728, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.668, | |
| "grad_norm": 1.1430630683898926, | |
| "learning_rate": 1.2409419959105981e-05, | |
| "loss": 0.7024, | |
| "num_input_tokens_seen": 22374880, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.6688, | |
| "grad_norm": 0.8265404105186462, | |
| "learning_rate": 1.2355177871510538e-05, | |
| "loss": 0.7661, | |
| "num_input_tokens_seen": 22402288, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.6696, | |
| "grad_norm": 0.8584622144699097, | |
| "learning_rate": 1.2301015655484006e-05, | |
| "loss": 0.6462, | |
| "num_input_tokens_seen": 22430240, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.6704, | |
| "grad_norm": 1.0526131391525269, | |
| "learning_rate": 1.2246933653144385e-05, | |
| "loss": 0.6487, | |
| "num_input_tokens_seen": 22454800, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6712, | |
| "grad_norm": 1.1912094354629517, | |
| "learning_rate": 1.2192932206103e-05, | |
| "loss": 0.7369, | |
| "num_input_tokens_seen": 22482528, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.8804354071617126, | |
| "learning_rate": 1.2139011655462337e-05, | |
| "loss": 0.6942, | |
| "num_input_tokens_seen": 22508976, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6728, | |
| "grad_norm": 0.9333446025848389, | |
| "learning_rate": 1.2085172341813911e-05, | |
| "loss": 0.7691, | |
| "num_input_tokens_seen": 22538976, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 0.6736, | |
| "grad_norm": 0.8501102328300476, | |
| "learning_rate": 1.2031414605236066e-05, | |
| "loss": 0.5865, | |
| "num_input_tokens_seen": 22566368, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.6744, | |
| "grad_norm": 0.89410001039505, | |
| "learning_rate": 1.1977738785291895e-05, | |
| "loss": 0.6916, | |
| "num_input_tokens_seen": 22592656, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.6752, | |
| "grad_norm": 0.9672756195068359, | |
| "learning_rate": 1.1924145221027047e-05, | |
| "loss": 0.7436, | |
| "num_input_tokens_seen": 22619872, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.676, | |
| "grad_norm": 0.8680210113525391, | |
| "learning_rate": 1.1870634250967605e-05, | |
| "loss": 0.6728, | |
| "num_input_tokens_seen": 22650320, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.6768, | |
| "grad_norm": 1.0170356035232544, | |
| "learning_rate": 1.1817206213117946e-05, | |
| "loss": 0.728, | |
| "num_input_tokens_seen": 22676896, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.6776, | |
| "grad_norm": 1.0950289964675903, | |
| "learning_rate": 1.1763861444958573e-05, | |
| "loss": 0.6696, | |
| "num_input_tokens_seen": 22702352, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 0.6784, | |
| "grad_norm": 1.2183908224105835, | |
| "learning_rate": 1.1710600283444047e-05, | |
| "loss": 0.7827, | |
| "num_input_tokens_seen": 22728288, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.6792, | |
| "grad_norm": 0.9134287238121033, | |
| "learning_rate": 1.1657423065000811e-05, | |
| "loss": 0.7166, | |
| "num_input_tokens_seen": 22757632, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.0772439241409302, | |
| "learning_rate": 1.1604330125525079e-05, | |
| "loss": 0.7143, | |
| "num_input_tokens_seen": 22783440, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.6808, | |
| "grad_norm": 1.003915786743164, | |
| "learning_rate": 1.155132180038072e-05, | |
| "loss": 0.82, | |
| "num_input_tokens_seen": 22809616, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 0.6816, | |
| "grad_norm": 0.9822829961776733, | |
| "learning_rate": 1.1498398424397106e-05, | |
| "loss": 0.7297, | |
| "num_input_tokens_seen": 22835792, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.6824, | |
| "grad_norm": 0.855888307094574, | |
| "learning_rate": 1.1445560331867053e-05, | |
| "loss": 0.6956, | |
| "num_input_tokens_seen": 22864560, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 0.6832, | |
| "grad_norm": 0.850237250328064, | |
| "learning_rate": 1.1392807856544683e-05, | |
| "loss": 0.7157, | |
| "num_input_tokens_seen": 22892912, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.684, | |
| "grad_norm": 1.4631861448287964, | |
| "learning_rate": 1.1340141331643276e-05, | |
| "loss": 0.753, | |
| "num_input_tokens_seen": 22912640, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.6848, | |
| "grad_norm": 1.1579556465148926, | |
| "learning_rate": 1.1287561089833248e-05, | |
| "loss": 0.7247, | |
| "num_input_tokens_seen": 22937072, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.6856, | |
| "grad_norm": 1.3090944290161133, | |
| "learning_rate": 1.1235067463239967e-05, | |
| "loss": 0.7671, | |
| "num_input_tokens_seen": 22961104, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 0.6864, | |
| "grad_norm": 0.9593985676765442, | |
| "learning_rate": 1.1182660783441718e-05, | |
| "loss": 0.6771, | |
| "num_input_tokens_seen": 22983744, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.6872, | |
| "grad_norm": 0.6641414165496826, | |
| "learning_rate": 1.1130341381467569e-05, | |
| "loss": 0.7179, | |
| "num_input_tokens_seen": 23010208, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.6973831057548523, | |
| "learning_rate": 1.107810958779531e-05, | |
| "loss": 0.5792, | |
| "num_input_tokens_seen": 23043392, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.6888, | |
| "grad_norm": 0.9109097123146057, | |
| "learning_rate": 1.1025965732349316e-05, | |
| "loss": 0.6574, | |
| "num_input_tokens_seen": 23074928, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.6896, | |
| "grad_norm": 0.8845970034599304, | |
| "learning_rate": 1.0973910144498534e-05, | |
| "loss": 0.6544, | |
| "num_input_tokens_seen": 23105728, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.6904, | |
| "grad_norm": 1.181096076965332, | |
| "learning_rate": 1.0921943153054343e-05, | |
| "loss": 0.6503, | |
| "num_input_tokens_seen": 23132768, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 0.6912, | |
| "grad_norm": 0.785658061504364, | |
| "learning_rate": 1.0870065086268505e-05, | |
| "loss": 0.6502, | |
| "num_input_tokens_seen": 23160080, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.692, | |
| "grad_norm": 1.1847856044769287, | |
| "learning_rate": 1.0818276271831093e-05, | |
| "loss": 0.7114, | |
| "num_input_tokens_seen": 23187696, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.6928, | |
| "grad_norm": 1.1500554084777832, | |
| "learning_rate": 1.0766577036868395e-05, | |
| "loss": 0.6546, | |
| "num_input_tokens_seen": 23211904, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.6936, | |
| "grad_norm": 0.9399601221084595, | |
| "learning_rate": 1.0714967707940875e-05, | |
| "loss": 0.6965, | |
| "num_input_tokens_seen": 23238144, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.6944, | |
| "grad_norm": 0.928415060043335, | |
| "learning_rate": 1.0663448611041113e-05, | |
| "loss": 0.6232, | |
| "num_input_tokens_seen": 23267104, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.6952, | |
| "grad_norm": 1.2702407836914062, | |
| "learning_rate": 1.0612020071591722e-05, | |
| "loss": 0.6686, | |
| "num_input_tokens_seen": 23298976, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 1.1251682043075562, | |
| "learning_rate": 1.0560682414443315e-05, | |
| "loss": 0.6975, | |
| "num_input_tokens_seen": 23329552, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.6968, | |
| "grad_norm": 0.8569183945655823, | |
| "learning_rate": 1.0509435963872422e-05, | |
| "loss": 0.7017, | |
| "num_input_tokens_seen": 23359664, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 0.6976, | |
| "grad_norm": 0.9474520683288574, | |
| "learning_rate": 1.0458281043579482e-05, | |
| "loss": 0.6856, | |
| "num_input_tokens_seen": 23386320, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.6984, | |
| "grad_norm": 1.3578598499298096, | |
| "learning_rate": 1.0407217976686775e-05, | |
| "loss": 0.6739, | |
| "num_input_tokens_seen": 23416512, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.6992, | |
| "grad_norm": 1.1748965978622437, | |
| "learning_rate": 1.0356247085736386e-05, | |
| "loss": 0.6803, | |
| "num_input_tokens_seen": 23439904, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.7888918519020081, | |
| "learning_rate": 1.0305368692688174e-05, | |
| "loss": 0.7111, | |
| "num_input_tokens_seen": 23466256, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.7008, | |
| "grad_norm": 1.137215256690979, | |
| "learning_rate": 1.0254583118917698e-05, | |
| "loss": 0.762, | |
| "num_input_tokens_seen": 23493536, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7016, | |
| "grad_norm": 1.337811827659607, | |
| "learning_rate": 1.020389068521426e-05, | |
| "loss": 0.6206, | |
| "num_input_tokens_seen": 23515632, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 0.7024, | |
| "grad_norm": 1.0022634267807007, | |
| "learning_rate": 1.0153291711778826e-05, | |
| "loss": 0.6711, | |
| "num_input_tokens_seen": 23541152, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.7032, | |
| "grad_norm": 1.339572548866272, | |
| "learning_rate": 1.0102786518221997e-05, | |
| "loss": 0.7606, | |
| "num_input_tokens_seen": 23565424, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.9340786337852478, | |
| "learning_rate": 1.0052375423562038e-05, | |
| "loss": 0.7306, | |
| "num_input_tokens_seen": 23592256, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7048, | |
| "grad_norm": 1.1283642053604126, | |
| "learning_rate": 1.0002058746222806e-05, | |
| "loss": 0.6121, | |
| "num_input_tokens_seen": 23622240, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 0.7056, | |
| "grad_norm": 0.7319700717926025, | |
| "learning_rate": 9.951836804031794e-06, | |
| "loss": 0.6273, | |
| "num_input_tokens_seen": 23650544, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.7064, | |
| "grad_norm": 1.1631896495819092, | |
| "learning_rate": 9.90170991421808e-06, | |
| "loss": 0.7261, | |
| "num_input_tokens_seen": 23676016, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 0.7072, | |
| "grad_norm": 0.7371265292167664, | |
| "learning_rate": 9.851678393410343e-06, | |
| "loss": 0.8013, | |
| "num_input_tokens_seen": 23701232, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.708, | |
| "grad_norm": 0.8485360741615295, | |
| "learning_rate": 9.801742557634872e-06, | |
| "loss": 0.7189, | |
| "num_input_tokens_seen": 23731984, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.7088, | |
| "grad_norm": 0.958996057510376, | |
| "learning_rate": 9.751902722313527e-06, | |
| "loss": 0.7397, | |
| "num_input_tokens_seen": 23756560, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.7096, | |
| "grad_norm": 0.9431763887405396, | |
| "learning_rate": 9.702159202261801e-06, | |
| "loss": 0.625, | |
| "num_input_tokens_seen": 23785504, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 0.7104, | |
| "grad_norm": 1.2089407444000244, | |
| "learning_rate": 9.652512311686809e-06, | |
| "loss": 0.7958, | |
| "num_input_tokens_seen": 23811840, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.7112, | |
| "grad_norm": 1.212649941444397, | |
| "learning_rate": 9.602962364185286e-06, | |
| "loss": 0.7092, | |
| "num_input_tokens_seen": 23834688, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.8656122088432312, | |
| "learning_rate": 9.553509672741645e-06, | |
| "loss": 0.6516, | |
| "num_input_tokens_seen": 23858736, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7128, | |
| "grad_norm": 0.8871904611587524, | |
| "learning_rate": 9.504154549725943e-06, | |
| "loss": 0.6276, | |
| "num_input_tokens_seen": 23883696, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.7136, | |
| "grad_norm": 0.8539274334907532, | |
| "learning_rate": 9.454897306891972e-06, | |
| "loss": 0.6741, | |
| "num_input_tokens_seen": 23909904, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.7144, | |
| "grad_norm": 1.0730023384094238, | |
| "learning_rate": 9.405738255375244e-06, | |
| "loss": 0.7054, | |
| "num_input_tokens_seen": 23933056, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 0.7152, | |
| "grad_norm": 1.2047233581542969, | |
| "learning_rate": 9.356677705691058e-06, | |
| "loss": 0.7593, | |
| "num_input_tokens_seen": 23957440, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.716, | |
| "grad_norm": 0.8580662608146667, | |
| "learning_rate": 9.307715967732491e-06, | |
| "loss": 0.6264, | |
| "num_input_tokens_seen": 23985088, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.7168, | |
| "grad_norm": 0.884903073310852, | |
| "learning_rate": 9.258853350768499e-06, | |
| "loss": 0.6596, | |
| "num_input_tokens_seen": 24010448, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.7176, | |
| "grad_norm": 0.9185977578163147, | |
| "learning_rate": 9.210090163441929e-06, | |
| "loss": 0.7053, | |
| "num_input_tokens_seen": 24035040, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.7184, | |
| "grad_norm": 1.3897747993469238, | |
| "learning_rate": 9.161426713767574e-06, | |
| "loss": 0.6769, | |
| "num_input_tokens_seen": 24057872, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.7192, | |
| "grad_norm": 1.220688819885254, | |
| "learning_rate": 9.112863309130235e-06, | |
| "loss": 0.7486, | |
| "num_input_tokens_seen": 24077920, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9036649465560913, | |
| "learning_rate": 9.064400256282757e-06, | |
| "loss": 0.765, | |
| "num_input_tokens_seen": 24104320, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7208, | |
| "grad_norm": 0.7980133891105652, | |
| "learning_rate": 9.016037861344129e-06, | |
| "loss": 0.653, | |
| "num_input_tokens_seen": 24134144, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 0.7216, | |
| "grad_norm": 0.7849147915840149, | |
| "learning_rate": 8.967776429797528e-06, | |
| "loss": 0.6412, | |
| "num_input_tokens_seen": 24164576, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7224, | |
| "grad_norm": 0.8543937802314758, | |
| "learning_rate": 8.919616266488373e-06, | |
| "loss": 0.7113, | |
| "num_input_tokens_seen": 24192736, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.7232, | |
| "grad_norm": 0.9191213250160217, | |
| "learning_rate": 8.871557675622441e-06, | |
| "loss": 0.8171, | |
| "num_input_tokens_seen": 24218064, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.724, | |
| "grad_norm": 1.1177440881729126, | |
| "learning_rate": 8.8236009607639e-06, | |
| "loss": 0.7845, | |
| "num_input_tokens_seen": 24244832, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.7248, | |
| "grad_norm": 0.899111807346344, | |
| "learning_rate": 8.775746424833427e-06, | |
| "loss": 0.7025, | |
| "num_input_tokens_seen": 24272848, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.7256, | |
| "grad_norm": 1.1424217224121094, | |
| "learning_rate": 8.727994370106288e-06, | |
| "loss": 0.868, | |
| "num_input_tokens_seen": 24298240, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 0.7264, | |
| "grad_norm": 0.9559049010276794, | |
| "learning_rate": 8.680345098210408e-06, | |
| "loss": 0.6285, | |
| "num_input_tokens_seen": 24327776, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7272, | |
| "grad_norm": 0.9032924771308899, | |
| "learning_rate": 8.632798910124492e-06, | |
| "loss": 0.6583, | |
| "num_input_tokens_seen": 24355424, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 1.056780457496643, | |
| "learning_rate": 8.585356106176094e-06, | |
| "loss": 0.756, | |
| "num_input_tokens_seen": 24381104, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7288, | |
| "grad_norm": 1.2001997232437134, | |
| "learning_rate": 8.538016986039754e-06, | |
| "loss": 0.7739, | |
| "num_input_tokens_seen": 24403760, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 0.7296, | |
| "grad_norm": 1.1103582382202148, | |
| "learning_rate": 8.49078184873508e-06, | |
| "loss": 0.6998, | |
| "num_input_tokens_seen": 24431280, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.7304, | |
| "grad_norm": 1.0271393060684204, | |
| "learning_rate": 8.443650992624877e-06, | |
| "loss": 0.723, | |
| "num_input_tokens_seen": 24459120, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 0.7312, | |
| "grad_norm": 0.7871257066726685, | |
| "learning_rate": 8.39662471541325e-06, | |
| "loss": 0.7225, | |
| "num_input_tokens_seen": 24485152, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.732, | |
| "grad_norm": 0.8319628238677979, | |
| "learning_rate": 8.34970331414371e-06, | |
| "loss": 0.5801, | |
| "num_input_tokens_seen": 24512416, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.7328, | |
| "grad_norm": 0.7009981274604797, | |
| "learning_rate": 8.302887085197341e-06, | |
| "loss": 0.6043, | |
| "num_input_tokens_seen": 24543328, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7336, | |
| "grad_norm": 1.0223398208618164, | |
| "learning_rate": 8.256176324290885e-06, | |
| "loss": 0.6533, | |
| "num_input_tokens_seen": 24566000, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 0.7344, | |
| "grad_norm": 1.127424955368042, | |
| "learning_rate": 8.209571326474896e-06, | |
| "loss": 0.6906, | |
| "num_input_tokens_seen": 24594080, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.7352, | |
| "grad_norm": 0.9771124124526978, | |
| "learning_rate": 8.163072386131876e-06, | |
| "loss": 0.6661, | |
| "num_input_tokens_seen": 24621424, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.859312117099762, | |
| "learning_rate": 8.116679796974392e-06, | |
| "loss": 0.6663, | |
| "num_input_tokens_seen": 24644288, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7368, | |
| "grad_norm": 1.3819899559020996, | |
| "learning_rate": 8.070393852043251e-06, | |
| "loss": 0.7064, | |
| "num_input_tokens_seen": 24674048, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.7376, | |
| "grad_norm": 1.034734845161438, | |
| "learning_rate": 8.024214843705646e-06, | |
| "loss": 0.6837, | |
| "num_input_tokens_seen": 24696320, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7384, | |
| "grad_norm": 0.9610295295715332, | |
| "learning_rate": 7.978143063653298e-06, | |
| "loss": 0.5342, | |
| "num_input_tokens_seen": 24729280, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 0.7392, | |
| "grad_norm": 1.166585922241211, | |
| "learning_rate": 7.93217880290059e-06, | |
| "loss": 0.6907, | |
| "num_input_tokens_seen": 24758080, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.1341148614883423, | |
| "learning_rate": 7.886322351782783e-06, | |
| "loss": 0.6856, | |
| "num_input_tokens_seen": 24787472, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.7408, | |
| "grad_norm": 0.9481520056724548, | |
| "learning_rate": 7.840573999954153e-06, | |
| "loss": 0.713, | |
| "num_input_tokens_seen": 24815936, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.7416, | |
| "grad_norm": 1.2403899431228638, | |
| "learning_rate": 7.79493403638614e-06, | |
| "loss": 0.7692, | |
| "num_input_tokens_seen": 24840096, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.7424, | |
| "grad_norm": 0.9576728343963623, | |
| "learning_rate": 7.749402749365572e-06, | |
| "loss": 0.7177, | |
| "num_input_tokens_seen": 24866480, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7432, | |
| "grad_norm": 1.0239994525909424, | |
| "learning_rate": 7.703980426492791e-06, | |
| "loss": 0.7124, | |
| "num_input_tokens_seen": 24889456, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 1.0492584705352783, | |
| "learning_rate": 7.658667354679883e-06, | |
| "loss": 0.7038, | |
| "num_input_tokens_seen": 24913824, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7448, | |
| "grad_norm": 1.1247596740722656, | |
| "learning_rate": 7.613463820148831e-06, | |
| "loss": 0.6662, | |
| "num_input_tokens_seen": 24940880, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 0.7456, | |
| "grad_norm": 1.2390748262405396, | |
| "learning_rate": 7.568370108429732e-06, | |
| "loss": 0.7949, | |
| "num_input_tokens_seen": 24965696, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7464, | |
| "grad_norm": 0.7792567610740662, | |
| "learning_rate": 7.523386504358984e-06, | |
| "loss": 0.7146, | |
| "num_input_tokens_seen": 24992096, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.7472, | |
| "grad_norm": 0.9417341351509094, | |
| "learning_rate": 7.478513292077463e-06, | |
| "loss": 0.669, | |
| "num_input_tokens_seen": 25024320, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.748, | |
| "grad_norm": 1.226563572883606, | |
| "learning_rate": 7.433750755028773e-06, | |
| "loss": 0.7789, | |
| "num_input_tokens_seen": 25049152, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.7488, | |
| "grad_norm": 0.8685075640678406, | |
| "learning_rate": 7.389099175957429e-06, | |
| "loss": 0.6992, | |
| "num_input_tokens_seen": 25077328, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.7496, | |
| "grad_norm": 0.7221574187278748, | |
| "learning_rate": 7.344558836907067e-06, | |
| "loss": 0.6421, | |
| "num_input_tokens_seen": 25105008, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 0.7504, | |
| "grad_norm": 0.8624604940414429, | |
| "learning_rate": 7.300130019218687e-06, | |
| "loss": 0.7656, | |
| "num_input_tokens_seen": 25131392, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.7512, | |
| "grad_norm": 1.0061527490615845, | |
| "learning_rate": 7.255813003528833e-06, | |
| "loss": 0.6506, | |
| "num_input_tokens_seen": 25159984, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 1.0879433155059814, | |
| "learning_rate": 7.211608069767867e-06, | |
| "loss": 0.6253, | |
| "num_input_tokens_seen": 25188192, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7528, | |
| "grad_norm": 1.2521827220916748, | |
| "learning_rate": 7.1675154971581785e-06, | |
| "loss": 0.6776, | |
| "num_input_tokens_seen": 25215360, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 0.7536, | |
| "grad_norm": 1.4772545099258423, | |
| "learning_rate": 7.123535564212422e-06, | |
| "loss": 0.8286, | |
| "num_input_tokens_seen": 25240384, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.7544, | |
| "grad_norm": 1.0587224960327148, | |
| "learning_rate": 7.079668548731758e-06, | |
| "loss": 0.6152, | |
| "num_input_tokens_seen": 25263104, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 0.7552, | |
| "grad_norm": 1.562467336654663, | |
| "learning_rate": 7.035914727804085e-06, | |
| "loss": 0.7227, | |
| "num_input_tokens_seen": 25288176, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.756, | |
| "grad_norm": 1.3081474304199219, | |
| "learning_rate": 6.992274377802327e-06, | |
| "loss": 0.6808, | |
| "num_input_tokens_seen": 25313536, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.7568, | |
| "grad_norm": 1.1339465379714966, | |
| "learning_rate": 6.94874777438265e-06, | |
| "loss": 0.7039, | |
| "num_input_tokens_seen": 25339744, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.7576, | |
| "grad_norm": 0.8575751185417175, | |
| "learning_rate": 6.905335192482737e-06, | |
| "loss": 0.7081, | |
| "num_input_tokens_seen": 25367440, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 0.7584, | |
| "grad_norm": 0.9493206143379211, | |
| "learning_rate": 6.862036906320058e-06, | |
| "loss": 0.6139, | |
| "num_input_tokens_seen": 25395952, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7592, | |
| "grad_norm": 1.0198074579238892, | |
| "learning_rate": 6.818853189390104e-06, | |
| "loss": 0.8142, | |
| "num_input_tokens_seen": 25421744, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.8722714185714722, | |
| "learning_rate": 6.775784314464717e-06, | |
| "loss": 0.6538, | |
| "num_input_tokens_seen": 25448944, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.7608, | |
| "grad_norm": 0.750995397567749, | |
| "learning_rate": 6.732830553590305e-06, | |
| "loss": 0.6409, | |
| "num_input_tokens_seen": 25476640, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.7616, | |
| "grad_norm": 1.152645230293274, | |
| "learning_rate": 6.689992178086174e-06, | |
| "loss": 0.6814, | |
| "num_input_tokens_seen": 25503328, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.7624, | |
| "grad_norm": 0.9856323599815369, | |
| "learning_rate": 6.647269458542793e-06, | |
| "loss": 0.739, | |
| "num_input_tokens_seen": 25530384, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 0.7632, | |
| "grad_norm": 1.0248849391937256, | |
| "learning_rate": 6.604662664820063e-06, | |
| "loss": 0.6775, | |
| "num_input_tokens_seen": 25558880, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.764, | |
| "grad_norm": 1.1603997945785522, | |
| "learning_rate": 6.562172066045655e-06, | |
| "loss": 0.8087, | |
| "num_input_tokens_seen": 25584016, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.7648, | |
| "grad_norm": 1.0015392303466797, | |
| "learning_rate": 6.519797930613289e-06, | |
| "loss": 0.6836, | |
| "num_input_tokens_seen": 25611712, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.7656, | |
| "grad_norm": 0.929892897605896, | |
| "learning_rate": 6.4775405261810364e-06, | |
| "loss": 0.7174, | |
| "num_input_tokens_seen": 25640928, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 0.7664, | |
| "grad_norm": 1.0972721576690674, | |
| "learning_rate": 6.435400119669618e-06, | |
| "loss": 0.6151, | |
| "num_input_tokens_seen": 25667376, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.7672, | |
| "grad_norm": 1.1308437585830688, | |
| "learning_rate": 6.3933769772607535e-06, | |
| "loss": 0.7291, | |
| "num_input_tokens_seen": 25697136, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.9035334587097168, | |
| "learning_rate": 6.3514713643954475e-06, | |
| "loss": 0.7215, | |
| "num_input_tokens_seen": 25718912, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.7688, | |
| "grad_norm": 0.7563897371292114, | |
| "learning_rate": 6.309683545772327e-06, | |
| "loss": 0.7092, | |
| "num_input_tokens_seen": 25746400, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 0.7696, | |
| "grad_norm": 0.8460260629653931, | |
| "learning_rate": 6.268013785345969e-06, | |
| "loss": 0.6675, | |
| "num_input_tokens_seen": 25772192, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7704, | |
| "grad_norm": 0.8550633788108826, | |
| "learning_rate": 6.226462346325221e-06, | |
| "loss": 0.7428, | |
| "num_input_tokens_seen": 25802256, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 0.7712, | |
| "grad_norm": 1.0745741128921509, | |
| "learning_rate": 6.185029491171554e-06, | |
| "loss": 0.7039, | |
| "num_input_tokens_seen": 25829952, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.772, | |
| "grad_norm": 0.9149506688117981, | |
| "learning_rate": 6.143715481597404e-06, | |
| "loss": 0.6733, | |
| "num_input_tokens_seen": 25854752, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.7728, | |
| "grad_norm": 1.1725239753723145, | |
| "learning_rate": 6.102520578564508e-06, | |
| "loss": 0.7872, | |
| "num_input_tokens_seen": 25881264, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.7736, | |
| "grad_norm": 1.1597212553024292, | |
| "learning_rate": 6.061445042282271e-06, | |
| "loss": 0.7586, | |
| "num_input_tokens_seen": 25906064, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 0.7744, | |
| "grad_norm": 0.9395809173583984, | |
| "learning_rate": 6.020489132206089e-06, | |
| "loss": 0.7269, | |
| "num_input_tokens_seen": 25931280, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.7752, | |
| "grad_norm": 0.8174002170562744, | |
| "learning_rate": 5.979653107035757e-06, | |
| "loss": 0.6304, | |
| "num_input_tokens_seen": 25958880, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.9226968884468079, | |
| "learning_rate": 5.9389372247138e-06, | |
| "loss": 0.6855, | |
| "num_input_tokens_seen": 25984528, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.7768, | |
| "grad_norm": 1.0765284299850464, | |
| "learning_rate": 5.898341742423865e-06, | |
| "loss": 0.7141, | |
| "num_input_tokens_seen": 26014272, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 0.7776, | |
| "grad_norm": 0.7940208315849304, | |
| "learning_rate": 5.857866916589089e-06, | |
| "loss": 0.668, | |
| "num_input_tokens_seen": 26045888, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.7784, | |
| "grad_norm": 0.9069024920463562, | |
| "learning_rate": 5.81751300287045e-06, | |
| "loss": 0.7404, | |
| "num_input_tokens_seen": 26069232, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 0.7792, | |
| "grad_norm": 1.2687326669692993, | |
| "learning_rate": 5.777280256165218e-06, | |
| "loss": 0.633, | |
| "num_input_tokens_seen": 26095936, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.0579140186309814, | |
| "learning_rate": 5.737168930605272e-06, | |
| "loss": 0.6365, | |
| "num_input_tokens_seen": 26121184, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.7808, | |
| "grad_norm": 0.8767179846763611, | |
| "learning_rate": 5.6971792795555505e-06, | |
| "loss": 0.6427, | |
| "num_input_tokens_seen": 26147504, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.7816, | |
| "grad_norm": 0.9713358283042908, | |
| "learning_rate": 5.6573115556124325e-06, | |
| "loss": 0.6509, | |
| "num_input_tokens_seen": 26174208, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 0.7824, | |
| "grad_norm": 0.7532449960708618, | |
| "learning_rate": 5.617566010602113e-06, | |
| "loss": 0.7382, | |
| "num_input_tokens_seen": 26200112, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.7832, | |
| "grad_norm": 1.191658854484558, | |
| "learning_rate": 5.577942895579064e-06, | |
| "loss": 0.7537, | |
| "num_input_tokens_seen": 26227952, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.9605312943458557, | |
| "learning_rate": 5.538442460824417e-06, | |
| "loss": 0.673, | |
| "num_input_tokens_seen": 26259392, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7848, | |
| "grad_norm": 1.0074589252471924, | |
| "learning_rate": 5.499064955844382e-06, | |
| "loss": 0.6684, | |
| "num_input_tokens_seen": 26285456, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 0.7856, | |
| "grad_norm": 0.8559053540229797, | |
| "learning_rate": 5.4598106293686916e-06, | |
| "loss": 0.7051, | |
| "num_input_tokens_seen": 26316544, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.7864, | |
| "grad_norm": 1.0885223150253296, | |
| "learning_rate": 5.420679729348993e-06, | |
| "loss": 0.6481, | |
| "num_input_tokens_seen": 26341840, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 0.7872, | |
| "grad_norm": 1.0094472169876099, | |
| "learning_rate": 5.381672502957324e-06, | |
| "loss": 0.7953, | |
| "num_input_tokens_seen": 26371008, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.788, | |
| "grad_norm": 1.1841135025024414, | |
| "learning_rate": 5.342789196584527e-06, | |
| "loss": 0.6966, | |
| "num_input_tokens_seen": 26400048, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.7888, | |
| "grad_norm": 0.9742115139961243, | |
| "learning_rate": 5.304030055838705e-06, | |
| "loss": 0.6804, | |
| "num_input_tokens_seen": 26425408, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.7896, | |
| "grad_norm": 1.327427625656128, | |
| "learning_rate": 5.26539532554364e-06, | |
| "loss": 0.6282, | |
| "num_input_tokens_seen": 26452352, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.7904, | |
| "grad_norm": 0.9497706890106201, | |
| "learning_rate": 5.226885249737293e-06, | |
| "loss": 0.588, | |
| "num_input_tokens_seen": 26479456, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.7912, | |
| "grad_norm": 1.0179611444473267, | |
| "learning_rate": 5.1885000716702355e-06, | |
| "loss": 0.738, | |
| "num_input_tokens_seen": 26504912, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 1.0633511543273926, | |
| "learning_rate": 5.150240033804116e-06, | |
| "loss": 0.661, | |
| "num_input_tokens_seen": 26528320, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.7928, | |
| "grad_norm": 0.887589693069458, | |
| "learning_rate": 5.112105377810128e-06, | |
| "loss": 0.8033, | |
| "num_input_tokens_seen": 26553984, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 0.7936, | |
| "grad_norm": 1.0450173616409302, | |
| "learning_rate": 5.074096344567475e-06, | |
| "loss": 0.6161, | |
| "num_input_tokens_seen": 26582768, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.7944, | |
| "grad_norm": 1.2726836204528809, | |
| "learning_rate": 5.036213174161877e-06, | |
| "loss": 0.7286, | |
| "num_input_tokens_seen": 26610272, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 0.7952, | |
| "grad_norm": 1.0096362829208374, | |
| "learning_rate": 4.998456105884025e-06, | |
| "loss": 0.7065, | |
| "num_input_tokens_seen": 26636352, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.796, | |
| "grad_norm": 1.1659733057022095, | |
| "learning_rate": 4.960825378228082e-06, | |
| "loss": 0.6842, | |
| "num_input_tokens_seen": 26667824, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.7968, | |
| "grad_norm": 0.9078534245491028, | |
| "learning_rate": 4.9233212288901845e-06, | |
| "loss": 0.7069, | |
| "num_input_tokens_seen": 26698272, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.7976, | |
| "grad_norm": 0.8440881967544556, | |
| "learning_rate": 4.885943894766909e-06, | |
| "loss": 0.5942, | |
| "num_input_tokens_seen": 26725984, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 0.7984, | |
| "grad_norm": 1.0457020998001099, | |
| "learning_rate": 4.848693611953825e-06, | |
| "loss": 0.8419, | |
| "num_input_tokens_seen": 26751360, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.7992, | |
| "grad_norm": 0.947726309299469, | |
| "learning_rate": 4.811570615743952e-06, | |
| "loss": 0.5888, | |
| "num_input_tokens_seen": 26782672, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.1138640642166138, | |
| "learning_rate": 4.7745751406263165e-06, | |
| "loss": 0.6504, | |
| "num_input_tokens_seen": 26809120, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8008, | |
| "grad_norm": 0.9211150407791138, | |
| "learning_rate": 4.737707420284451e-06, | |
| "loss": 0.6603, | |
| "num_input_tokens_seen": 26839552, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 0.8016, | |
| "grad_norm": 1.2926892042160034, | |
| "learning_rate": 4.700967687594901e-06, | |
| "loss": 0.627, | |
| "num_input_tokens_seen": 26864416, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.8024, | |
| "grad_norm": 0.9436898827552795, | |
| "learning_rate": 4.664356174625795e-06, | |
| "loss": 0.6509, | |
| "num_input_tokens_seen": 26890368, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 0.8032, | |
| "grad_norm": 0.8215711712837219, | |
| "learning_rate": 4.627873112635345e-06, | |
| "loss": 0.6673, | |
| "num_input_tokens_seen": 26916064, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.804, | |
| "grad_norm": 0.9311307072639465, | |
| "learning_rate": 4.591518732070402e-06, | |
| "loss": 0.7972, | |
| "num_input_tokens_seen": 26940528, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.8048, | |
| "grad_norm": 1.1058831214904785, | |
| "learning_rate": 4.5552932625649944e-06, | |
| "loss": 0.6977, | |
| "num_input_tokens_seen": 26965296, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.8056, | |
| "grad_norm": 1.2519973516464233, | |
| "learning_rate": 4.5191969329388625e-06, | |
| "loss": 0.8094, | |
| "num_input_tokens_seen": 26988240, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 0.8064, | |
| "grad_norm": 0.9225587248802185, | |
| "learning_rate": 4.483229971196054e-06, | |
| "loss": 0.7441, | |
| "num_input_tokens_seen": 27015632, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.8072, | |
| "grad_norm": 0.8636001944541931, | |
| "learning_rate": 4.44739260452344e-06, | |
| "loss": 0.6618, | |
| "num_input_tokens_seen": 27040528, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 1.094529151916504, | |
| "learning_rate": 4.411685059289314e-06, | |
| "loss": 0.7527, | |
| "num_input_tokens_seen": 27066560, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.8088, | |
| "grad_norm": 0.9794814586639404, | |
| "learning_rate": 4.376107561041937e-06, | |
| "loss": 0.7844, | |
| "num_input_tokens_seen": 27089408, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 0.8096, | |
| "grad_norm": 0.8608947396278381, | |
| "learning_rate": 4.340660334508115e-06, | |
| "loss": 0.7598, | |
| "num_input_tokens_seen": 27114832, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.8104, | |
| "grad_norm": 0.8481171727180481, | |
| "learning_rate": 4.305343603591802e-06, | |
| "loss": 0.6645, | |
| "num_input_tokens_seen": 27140320, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 0.8112, | |
| "grad_norm": 0.9115588068962097, | |
| "learning_rate": 4.270157591372667e-06, | |
| "loss": 0.7065, | |
| "num_input_tokens_seen": 27171200, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.812, | |
| "grad_norm": 1.4465726613998413, | |
| "learning_rate": 4.235102520104681e-06, | |
| "loss": 0.7481, | |
| "num_input_tokens_seen": 27195056, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.8128, | |
| "grad_norm": 1.1063685417175293, | |
| "learning_rate": 4.200178611214736e-06, | |
| "loss": 0.6154, | |
| "num_input_tokens_seen": 27220816, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.8136, | |
| "grad_norm": 1.3894023895263672, | |
| "learning_rate": 4.165386085301212e-06, | |
| "loss": 0.6661, | |
| "num_input_tokens_seen": 27246400, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 0.8144, | |
| "grad_norm": 1.0136696100234985, | |
| "learning_rate": 4.130725162132612e-06, | |
| "loss": 0.7043, | |
| "num_input_tokens_seen": 27269936, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.8152, | |
| "grad_norm": 1.36388099193573, | |
| "learning_rate": 4.096196060646168e-06, | |
| "loss": 0.8173, | |
| "num_input_tokens_seen": 27293488, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.9864152669906616, | |
| "learning_rate": 4.061798998946459e-06, | |
| "loss": 0.7154, | |
| "num_input_tokens_seen": 27318592, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8168, | |
| "grad_norm": 0.9855313301086426, | |
| "learning_rate": 4.027534194304005e-06, | |
| "loss": 0.6336, | |
| "num_input_tokens_seen": 27343616, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 0.8176, | |
| "grad_norm": 0.9487363696098328, | |
| "learning_rate": 3.99340186315395e-06, | |
| "loss": 0.7355, | |
| "num_input_tokens_seen": 27369216, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.8184, | |
| "grad_norm": 0.8174062371253967, | |
| "learning_rate": 3.959402221094635e-06, | |
| "loss": 0.6034, | |
| "num_input_tokens_seen": 27398704, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.8192, | |
| "grad_norm": 1.17109215259552, | |
| "learning_rate": 3.925535482886286e-06, | |
| "loss": 0.7962, | |
| "num_input_tokens_seen": 27424176, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.9322016835212708, | |
| "learning_rate": 3.891801862449629e-06, | |
| "loss": 0.7289, | |
| "num_input_tokens_seen": 27452656, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.8208, | |
| "grad_norm": 0.8374980688095093, | |
| "learning_rate": 3.858201572864537e-06, | |
| "loss": 0.6644, | |
| "num_input_tokens_seen": 27478656, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8216, | |
| "grad_norm": 0.8750137686729431, | |
| "learning_rate": 3.824734826368703e-06, | |
| "loss": 0.7519, | |
| "num_input_tokens_seen": 27507184, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 0.8224, | |
| "grad_norm": 0.935739278793335, | |
| "learning_rate": 3.7914018343562895e-06, | |
| "loss": 0.7611, | |
| "num_input_tokens_seen": 27536112, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.8232, | |
| "grad_norm": 1.0115796327590942, | |
| "learning_rate": 3.75820280737659e-06, | |
| "loss": 0.6713, | |
| "num_input_tokens_seen": 27563728, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 1.1365923881530762, | |
| "learning_rate": 3.725137955132707e-06, | |
| "loss": 0.6522, | |
| "num_input_tokens_seen": 27587120, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8248, | |
| "grad_norm": 1.2187998294830322, | |
| "learning_rate": 3.692207486480209e-06, | |
| "loss": 0.7707, | |
| "num_input_tokens_seen": 27608240, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 0.8256, | |
| "grad_norm": 1.2192776203155518, | |
| "learning_rate": 3.6594116094258337e-06, | |
| "loss": 0.6148, | |
| "num_input_tokens_seen": 27637840, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8264, | |
| "grad_norm": 0.8527853488922119, | |
| "learning_rate": 3.626750531126169e-06, | |
| "loss": 0.6884, | |
| "num_input_tokens_seen": 27662144, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 0.8272, | |
| "grad_norm": 1.293915033340454, | |
| "learning_rate": 3.594224457886336e-06, | |
| "loss": 0.6954, | |
| "num_input_tokens_seen": 27691184, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.828, | |
| "grad_norm": 0.9655611515045166, | |
| "learning_rate": 3.561833595158698e-06, | |
| "loss": 0.6736, | |
| "num_input_tokens_seen": 27719376, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.8288, | |
| "grad_norm": 0.8014153838157654, | |
| "learning_rate": 3.529578147541532e-06, | |
| "loss": 0.7575, | |
| "num_input_tokens_seen": 27749248, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8296, | |
| "grad_norm": 0.8693012595176697, | |
| "learning_rate": 3.4974583187777852e-06, | |
| "loss": 0.6607, | |
| "num_input_tokens_seen": 27779136, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 0.8304, | |
| "grad_norm": 1.2928762435913086, | |
| "learning_rate": 3.4654743117537524e-06, | |
| "loss": 0.844, | |
| "num_input_tokens_seen": 27802592, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8312, | |
| "grad_norm": 0.9536007046699524, | |
| "learning_rate": 3.433626328497805e-06, | |
| "loss": 0.6596, | |
| "num_input_tokens_seen": 27826864, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 1.01909601688385, | |
| "learning_rate": 3.4019145701791184e-06, | |
| "loss": 0.7834, | |
| "num_input_tokens_seen": 27851680, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8328, | |
| "grad_norm": 0.8944137096405029, | |
| "learning_rate": 3.3703392371063845e-06, | |
| "loss": 0.6874, | |
| "num_input_tokens_seen": 27880208, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 0.8336, | |
| "grad_norm": 1.2721880674362183, | |
| "learning_rate": 3.338900528726571e-06, | |
| "loss": 0.6468, | |
| "num_input_tokens_seen": 27907392, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8344, | |
| "grad_norm": 1.3406318426132202, | |
| "learning_rate": 3.3075986436236493e-06, | |
| "loss": 0.6675, | |
| "num_input_tokens_seen": 27934560, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 0.8352, | |
| "grad_norm": 0.9489244818687439, | |
| "learning_rate": 3.2764337795173435e-06, | |
| "loss": 0.6704, | |
| "num_input_tokens_seen": 27963248, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.836, | |
| "grad_norm": 0.9695754647254944, | |
| "learning_rate": 3.245406133261858e-06, | |
| "loss": 0.6903, | |
| "num_input_tokens_seen": 27989872, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.8368, | |
| "grad_norm": 0.7802848219871521, | |
| "learning_rate": 3.2145159008446807e-06, | |
| "loss": 0.7441, | |
| "num_input_tokens_seen": 28012208, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8376, | |
| "grad_norm": 1.2693451642990112, | |
| "learning_rate": 3.1837632773853098e-06, | |
| "loss": 0.6636, | |
| "num_input_tokens_seen": 28041200, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 0.8384, | |
| "grad_norm": 1.1437602043151855, | |
| "learning_rate": 3.15314845713402e-06, | |
| "loss": 0.7246, | |
| "num_input_tokens_seen": 28068272, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.8392, | |
| "grad_norm": 0.8950793743133545, | |
| "learning_rate": 3.122671633470664e-06, | |
| "loss": 0.6583, | |
| "num_input_tokens_seen": 28092768, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.010606288909912, | |
| "learning_rate": 3.0923329989034132e-06, | |
| "loss": 0.6823, | |
| "num_input_tokens_seen": 28122944, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8408, | |
| "grad_norm": 1.0807864665985107, | |
| "learning_rate": 3.062132745067581e-06, | |
| "loss": 0.7467, | |
| "num_input_tokens_seen": 28151424, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 0.8416, | |
| "grad_norm": 1.1845099925994873, | |
| "learning_rate": 3.0320710627243813e-06, | |
| "loss": 0.7541, | |
| "num_input_tokens_seen": 28176752, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.8424, | |
| "grad_norm": 0.7790080904960632, | |
| "learning_rate": 3.002148141759739e-06, | |
| "loss": 0.6829, | |
| "num_input_tokens_seen": 28205456, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 0.8432, | |
| "grad_norm": 1.364443302154541, | |
| "learning_rate": 2.97236417118309e-06, | |
| "loss": 0.692, | |
| "num_input_tokens_seen": 28230304, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.844, | |
| "grad_norm": 1.1289632320404053, | |
| "learning_rate": 2.942719339126171e-06, | |
| "loss": 0.7628, | |
| "num_input_tokens_seen": 28255536, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.8448, | |
| "grad_norm": 1.3984529972076416, | |
| "learning_rate": 2.9132138328418573e-06, | |
| "loss": 0.6972, | |
| "num_input_tokens_seen": 28279600, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.8456, | |
| "grad_norm": 1.0336651802062988, | |
| "learning_rate": 2.8838478387029606e-06, | |
| "loss": 0.6801, | |
| "num_input_tokens_seen": 28304688, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 0.8464, | |
| "grad_norm": 0.8985416293144226, | |
| "learning_rate": 2.8546215422010638e-06, | |
| "loss": 0.6697, | |
| "num_input_tokens_seen": 28331584, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.8472, | |
| "grad_norm": 1.3255959749221802, | |
| "learning_rate": 2.8255351279453446e-06, | |
| "loss": 0.6816, | |
| "num_input_tokens_seen": 28360256, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.9332570433616638, | |
| "learning_rate": 2.7965887796613884e-06, | |
| "loss": 0.7763, | |
| "num_input_tokens_seen": 28385168, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8488, | |
| "grad_norm": 1.041506290435791, | |
| "learning_rate": 2.767782680190073e-06, | |
| "loss": 0.7837, | |
| "num_input_tokens_seen": 28407248, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 0.8496, | |
| "grad_norm": 1.075434923171997, | |
| "learning_rate": 2.739117011486378e-06, | |
| "loss": 0.6975, | |
| "num_input_tokens_seen": 28434304, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8504, | |
| "grad_norm": 1.0231233835220337, | |
| "learning_rate": 2.710591954618247e-06, | |
| "loss": 0.7119, | |
| "num_input_tokens_seen": 28465424, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 0.8512, | |
| "grad_norm": 1.0655337572097778, | |
| "learning_rate": 2.6822076897654452e-06, | |
| "loss": 0.7644, | |
| "num_input_tokens_seen": 28494416, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.852, | |
| "grad_norm": 1.177964448928833, | |
| "learning_rate": 2.6539643962184057e-06, | |
| "loss": 0.644, | |
| "num_input_tokens_seen": 28519552, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.8528, | |
| "grad_norm": 1.0055785179138184, | |
| "learning_rate": 2.6258622523771287e-06, | |
| "loss": 0.7378, | |
| "num_input_tokens_seen": 28545632, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.8536, | |
| "grad_norm": 1.2302112579345703, | |
| "learning_rate": 2.5979014357500248e-06, | |
| "loss": 0.7267, | |
| "num_input_tokens_seen": 28571440, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 0.8544, | |
| "grad_norm": 1.2346506118774414, | |
| "learning_rate": 2.570082122952816e-06, | |
| "loss": 0.6015, | |
| "num_input_tokens_seen": 28599472, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.8552, | |
| "grad_norm": 0.8938889503479004, | |
| "learning_rate": 2.5424044897073895e-06, | |
| "loss": 0.6327, | |
| "num_input_tokens_seen": 28629136, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.8958183526992798, | |
| "learning_rate": 2.514868710840723e-06, | |
| "loss": 0.739, | |
| "num_input_tokens_seen": 28648928, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.8568, | |
| "grad_norm": 1.0484833717346191, | |
| "learning_rate": 2.4874749602837697e-06, | |
| "loss": 0.7279, | |
| "num_input_tokens_seen": 28675056, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 0.8576, | |
| "grad_norm": 0.887750506401062, | |
| "learning_rate": 2.4602234110703364e-06, | |
| "loss": 0.726, | |
| "num_input_tokens_seen": 28698416, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.8584, | |
| "grad_norm": 0.7482147216796875, | |
| "learning_rate": 2.43311423533602e-06, | |
| "loss": 0.7046, | |
| "num_input_tokens_seen": 28729856, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 0.8592, | |
| "grad_norm": 1.003188967704773, | |
| "learning_rate": 2.406147604317119e-06, | |
| "loss": 0.6922, | |
| "num_input_tokens_seen": 28757360, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.5129293203353882, | |
| "learning_rate": 2.379323688349516e-06, | |
| "loss": 0.6664, | |
| "num_input_tokens_seen": 28780624, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.8608, | |
| "grad_norm": 0.9118067622184753, | |
| "learning_rate": 2.3526426568676483e-06, | |
| "loss": 0.6532, | |
| "num_input_tokens_seen": 28805616, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.8616, | |
| "grad_norm": 1.007717251777649, | |
| "learning_rate": 2.326104678403415e-06, | |
| "loss": 0.6678, | |
| "num_input_tokens_seen": 28833504, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 0.8624, | |
| "grad_norm": 0.9576060771942139, | |
| "learning_rate": 2.299709920585108e-06, | |
| "loss": 0.6152, | |
| "num_input_tokens_seen": 28862704, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.8632, | |
| "grad_norm": 1.1482651233673096, | |
| "learning_rate": 2.2734585501363673e-06, | |
| "loss": 0.7131, | |
| "num_input_tokens_seen": 28886224, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 1.0614038705825806, | |
| "learning_rate": 2.2473507328751086e-06, | |
| "loss": 0.735, | |
| "num_input_tokens_seen": 28911760, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.8648, | |
| "grad_norm": 1.0017966032028198, | |
| "learning_rate": 2.2213866337125022e-06, | |
| "loss": 0.6706, | |
| "num_input_tokens_seen": 28941360, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 0.8656, | |
| "grad_norm": 0.963431179523468, | |
| "learning_rate": 2.1955664166519036e-06, | |
| "loss": 0.7683, | |
| "num_input_tokens_seen": 28965568, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.8664, | |
| "grad_norm": 1.1716382503509521, | |
| "learning_rate": 2.1698902447878477e-06, | |
| "loss": 0.623, | |
| "num_input_tokens_seen": 28994432, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 0.8672, | |
| "grad_norm": 1.2640058994293213, | |
| "learning_rate": 2.1443582803049755e-06, | |
| "loss": 0.7774, | |
| "num_input_tokens_seen": 29016560, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.868, | |
| "grad_norm": 0.9828547239303589, | |
| "learning_rate": 2.118970684477062e-06, | |
| "loss": 0.6332, | |
| "num_input_tokens_seen": 29043920, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.8688, | |
| "grad_norm": 0.9180524349212646, | |
| "learning_rate": 2.093727617665955e-06, | |
| "loss": 0.6658, | |
| "num_input_tokens_seen": 29073840, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.8696, | |
| "grad_norm": 1.1137315034866333, | |
| "learning_rate": 2.068629239320588e-06, | |
| "loss": 0.7078, | |
| "num_input_tokens_seen": 29102752, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 0.8704, | |
| "grad_norm": 1.1765251159667969, | |
| "learning_rate": 2.043675707975959e-06, | |
| "loss": 0.7049, | |
| "num_input_tokens_seen": 29126576, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.8712, | |
| "grad_norm": 0.9444310665130615, | |
| "learning_rate": 2.0188671812521292e-06, | |
| "loss": 0.7931, | |
| "num_input_tokens_seen": 29153120, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.914959192276001, | |
| "learning_rate": 1.9942038158532407e-06, | |
| "loss": 0.8394, | |
| "num_input_tokens_seen": 29182192, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.8728, | |
| "grad_norm": 1.21523916721344, | |
| "learning_rate": 1.969685767566512e-06, | |
| "loss": 0.6915, | |
| "num_input_tokens_seen": 29206368, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 0.8736, | |
| "grad_norm": 0.8198549151420593, | |
| "learning_rate": 1.9453131912612694e-06, | |
| "loss": 0.6627, | |
| "num_input_tokens_seen": 29235984, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.8744, | |
| "grad_norm": 0.9284049868583679, | |
| "learning_rate": 1.921086240887937e-06, | |
| "loss": 0.6671, | |
| "num_input_tokens_seen": 29260672, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 0.8752, | |
| "grad_norm": 0.9254517555236816, | |
| "learning_rate": 1.8970050694771064e-06, | |
| "loss": 0.665, | |
| "num_input_tokens_seen": 29287792, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.876, | |
| "grad_norm": 0.8524356484413147, | |
| "learning_rate": 1.8730698291385518e-06, | |
| "loss": 0.663, | |
| "num_input_tokens_seen": 29314656, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.8768, | |
| "grad_norm": 0.8038460612297058, | |
| "learning_rate": 1.8492806710602496e-06, | |
| "loss": 0.7054, | |
| "num_input_tokens_seen": 29338976, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.8776, | |
| "grad_norm": 0.8914519548416138, | |
| "learning_rate": 1.8256377455074525e-06, | |
| "loss": 0.6905, | |
| "num_input_tokens_seen": 29364912, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 0.8784, | |
| "grad_norm": 0.8928874135017395, | |
| "learning_rate": 1.802141201821736e-06, | |
| "loss": 0.7641, | |
| "num_input_tokens_seen": 29392960, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.8792, | |
| "grad_norm": 0.7842042446136475, | |
| "learning_rate": 1.7787911884200314e-06, | |
| "loss": 0.6918, | |
| "num_input_tokens_seen": 29416848, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.335999846458435, | |
| "learning_rate": 1.7555878527937164e-06, | |
| "loss": 0.6281, | |
| "num_input_tokens_seen": 29445856, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8808, | |
| "grad_norm": 0.981039822101593, | |
| "learning_rate": 1.7325313415076705e-06, | |
| "loss": 0.7199, | |
| "num_input_tokens_seen": 29474400, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 0.8816, | |
| "grad_norm": 1.003205418586731, | |
| "learning_rate": 1.7096218001993513e-06, | |
| "loss": 0.7022, | |
| "num_input_tokens_seen": 29501312, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.8824, | |
| "grad_norm": 0.804409921169281, | |
| "learning_rate": 1.686859373577876e-06, | |
| "loss": 0.635, | |
| "num_input_tokens_seen": 29530160, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 0.8832, | |
| "grad_norm": 0.6613283157348633, | |
| "learning_rate": 1.6642442054230934e-06, | |
| "loss": 0.6752, | |
| "num_input_tokens_seen": 29557168, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.884, | |
| "grad_norm": 1.0624265670776367, | |
| "learning_rate": 1.6417764385846996e-06, | |
| "loss": 0.7824, | |
| "num_input_tokens_seen": 29584832, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.8848, | |
| "grad_norm": 1.0792144536972046, | |
| "learning_rate": 1.6194562149813242e-06, | |
| "loss": 0.6823, | |
| "num_input_tokens_seen": 29609504, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.8856, | |
| "grad_norm": 1.0641052722930908, | |
| "learning_rate": 1.5972836755996285e-06, | |
| "loss": 0.6777, | |
| "num_input_tokens_seen": 29636768, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 0.8864, | |
| "grad_norm": 1.2235264778137207, | |
| "learning_rate": 1.5752589604934255e-06, | |
| "loss": 0.7372, | |
| "num_input_tokens_seen": 29660496, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.8872, | |
| "grad_norm": 0.9904717206954956, | |
| "learning_rate": 1.5533822087827805e-06, | |
| "loss": 0.7126, | |
| "num_input_tokens_seen": 29686928, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 1.2209672927856445, | |
| "learning_rate": 1.5316535586531483e-06, | |
| "loss": 0.6564, | |
| "num_input_tokens_seen": 29714800, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.8888, | |
| "grad_norm": 1.2465245723724365, | |
| "learning_rate": 1.5100731473544933e-06, | |
| "loss": 0.8006, | |
| "num_input_tokens_seen": 29741808, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 0.8896, | |
| "grad_norm": 1.0243083238601685, | |
| "learning_rate": 1.4886411112004255e-06, | |
| "loss": 0.7322, | |
| "num_input_tokens_seen": 29763088, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.8904, | |
| "grad_norm": 0.9879063367843628, | |
| "learning_rate": 1.4673575855673277e-06, | |
| "loss": 0.7243, | |
| "num_input_tokens_seen": 29791520, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 0.8912, | |
| "grad_norm": 0.8593109250068665, | |
| "learning_rate": 1.4462227048935183e-06, | |
| "loss": 0.6955, | |
| "num_input_tokens_seen": 29817600, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.892, | |
| "grad_norm": 0.9585959911346436, | |
| "learning_rate": 1.425236602678387e-06, | |
| "loss": 0.6658, | |
| "num_input_tokens_seen": 29843136, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.8928, | |
| "grad_norm": 1.2681745290756226, | |
| "learning_rate": 1.4043994114815661e-06, | |
| "loss": 0.7943, | |
| "num_input_tokens_seen": 29864864, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.8936, | |
| "grad_norm": 0.8207817673683167, | |
| "learning_rate": 1.38371126292208e-06, | |
| "loss": 0.7734, | |
| "num_input_tokens_seen": 29890416, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 0.8944, | |
| "grad_norm": 1.0749483108520508, | |
| "learning_rate": 1.3631722876775138e-06, | |
| "loss": 0.7008, | |
| "num_input_tokens_seen": 29916400, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.8952, | |
| "grad_norm": 1.1091291904449463, | |
| "learning_rate": 1.3427826154832042e-06, | |
| "loss": 0.6434, | |
| "num_input_tokens_seen": 29944304, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 1.3164221048355103, | |
| "learning_rate": 1.3225423751313942e-06, | |
| "loss": 0.6952, | |
| "num_input_tokens_seen": 29967648, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.8968, | |
| "grad_norm": 1.2402032613754272, | |
| "learning_rate": 1.3024516944704496e-06, | |
| "loss": 0.6331, | |
| "num_input_tokens_seen": 29989312, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 0.8976, | |
| "grad_norm": 1.1296372413635254, | |
| "learning_rate": 1.2825107004040272e-06, | |
| "loss": 0.7894, | |
| "num_input_tokens_seen": 30012384, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.8984, | |
| "grad_norm": 1.109983205795288, | |
| "learning_rate": 1.2627195188902791e-06, | |
| "loss": 0.6819, | |
| "num_input_tokens_seen": 30042656, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 0.8992, | |
| "grad_norm": 1.0760643482208252, | |
| "learning_rate": 1.2430782749410673e-06, | |
| "loss": 0.8208, | |
| "num_input_tokens_seen": 30068464, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.1865812540054321, | |
| "learning_rate": 1.2235870926211619e-06, | |
| "loss": 0.7203, | |
| "num_input_tokens_seen": 30093216, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.9008, | |
| "grad_norm": 1.0090526342391968, | |
| "learning_rate": 1.2042460950474648e-06, | |
| "loss": 0.7368, | |
| "num_input_tokens_seen": 30117488, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.9016, | |
| "grad_norm": 0.8406433463096619, | |
| "learning_rate": 1.1850554043882328e-06, | |
| "loss": 0.7681, | |
| "num_input_tokens_seen": 30144016, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 0.9024, | |
| "grad_norm": 1.057853102684021, | |
| "learning_rate": 1.1660151418622922e-06, | |
| "loss": 0.6962, | |
| "num_input_tokens_seen": 30177184, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.9032, | |
| "grad_norm": 0.9097471237182617, | |
| "learning_rate": 1.1471254277382881e-06, | |
| "loss": 0.7056, | |
| "num_input_tokens_seen": 30206048, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 1.0608327388763428, | |
| "learning_rate": 1.1283863813339263e-06, | |
| "loss": 0.7089, | |
| "num_input_tokens_seen": 30229936, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.9048, | |
| "grad_norm": 1.1075376272201538, | |
| "learning_rate": 1.1097981210152043e-06, | |
| "loss": 0.7794, | |
| "num_input_tokens_seen": 30257760, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 0.9056, | |
| "grad_norm": 0.9509792327880859, | |
| "learning_rate": 1.0913607641956841e-06, | |
| "loss": 0.77, | |
| "num_input_tokens_seen": 30286464, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.9064, | |
| "grad_norm": 0.981621265411377, | |
| "learning_rate": 1.0730744273357213e-06, | |
| "loss": 0.708, | |
| "num_input_tokens_seen": 30317040, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 0.9072, | |
| "grad_norm": 0.8428750038146973, | |
| "learning_rate": 1.0549392259417646e-06, | |
| "loss": 0.6423, | |
| "num_input_tokens_seen": 30342672, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.908, | |
| "grad_norm": 0.9921499490737915, | |
| "learning_rate": 1.0369552745656013e-06, | |
| "loss": 0.8176, | |
| "num_input_tokens_seen": 30369952, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.9088, | |
| "grad_norm": 0.9219129681587219, | |
| "learning_rate": 1.0191226868036418e-06, | |
| "loss": 0.6924, | |
| "num_input_tokens_seen": 30400800, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.9096, | |
| "grad_norm": 1.451660394668579, | |
| "learning_rate": 1.001441575296208e-06, | |
| "loss": 0.576, | |
| "num_input_tokens_seen": 30436240, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 0.9104, | |
| "grad_norm": 0.9631555676460266, | |
| "learning_rate": 9.839120517267985e-07, | |
| "loss": 0.6206, | |
| "num_input_tokens_seen": 30465232, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.9112, | |
| "grad_norm": 1.125351071357727, | |
| "learning_rate": 9.665342268214166e-07, | |
| "loss": 0.7424, | |
| "num_input_tokens_seen": 30489776, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 1.0404316186904907, | |
| "learning_rate": 9.493082103478517e-07, | |
| "loss": 0.6868, | |
| "num_input_tokens_seen": 30514592, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9128, | |
| "grad_norm": 0.9020105004310608, | |
| "learning_rate": 9.322341111149852e-07, | |
| "loss": 0.7017, | |
| "num_input_tokens_seen": 30544112, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 0.9136, | |
| "grad_norm": 1.0047924518585205, | |
| "learning_rate": 9.153120369721046e-07, | |
| "loss": 0.6429, | |
| "num_input_tokens_seen": 30577440, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.9144, | |
| "grad_norm": 1.087547779083252, | |
| "learning_rate": 8.985420948082329e-07, | |
| "loss": 0.6507, | |
| "num_input_tokens_seen": 30602704, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 0.9152, | |
| "grad_norm": 0.9647343754768372, | |
| "learning_rate": 8.819243905514308e-07, | |
| "loss": 0.7508, | |
| "num_input_tokens_seen": 30627360, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.916, | |
| "grad_norm": 0.9557966589927673, | |
| "learning_rate": 8.65459029168153e-07, | |
| "loss": 0.6506, | |
| "num_input_tokens_seen": 30657216, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.9168, | |
| "grad_norm": 0.991233229637146, | |
| "learning_rate": 8.491461146625773e-07, | |
| "loss": 0.6426, | |
| "num_input_tokens_seen": 30683472, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9176, | |
| "grad_norm": 0.8405401706695557, | |
| "learning_rate": 8.329857500759292e-07, | |
| "loss": 0.6227, | |
| "num_input_tokens_seen": 30707392, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 0.9184, | |
| "grad_norm": 1.2772481441497803, | |
| "learning_rate": 8.169780374858577e-07, | |
| "loss": 0.839, | |
| "num_input_tokens_seen": 30732160, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.9192, | |
| "grad_norm": 0.8571876883506775, | |
| "learning_rate": 8.011230780057749e-07, | |
| "loss": 0.6817, | |
| "num_input_tokens_seen": 30760336, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.000186562538147, | |
| "learning_rate": 7.854209717842231e-07, | |
| "loss": 0.7016, | |
| "num_input_tokens_seen": 30788800, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.9208, | |
| "grad_norm": 1.0842921733856201, | |
| "learning_rate": 7.698718180042392e-07, | |
| "loss": 0.702, | |
| "num_input_tokens_seen": 30813632, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 0.9216, | |
| "grad_norm": 1.0399980545043945, | |
| "learning_rate": 7.544757148827297e-07, | |
| "loss": 0.7203, | |
| "num_input_tokens_seen": 30840816, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9224, | |
| "grad_norm": 1.1896955966949463, | |
| "learning_rate": 7.392327596698473e-07, | |
| "loss": 0.6873, | |
| "num_input_tokens_seen": 30861664, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 0.9232, | |
| "grad_norm": 1.3494455814361572, | |
| "learning_rate": 7.241430486483819e-07, | |
| "loss": 0.6975, | |
| "num_input_tokens_seen": 30886064, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.924, | |
| "grad_norm": 1.1318798065185547, | |
| "learning_rate": 7.092066771331507e-07, | |
| "loss": 0.6058, | |
| "num_input_tokens_seen": 30910608, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.9248, | |
| "grad_norm": 1.230055332183838, | |
| "learning_rate": 6.944237394703984e-07, | |
| "loss": 0.8128, | |
| "num_input_tokens_seen": 30935008, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.9256, | |
| "grad_norm": 1.0150400400161743, | |
| "learning_rate": 6.797943290371839e-07, | |
| "loss": 0.7329, | |
| "num_input_tokens_seen": 30959792, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 0.9264, | |
| "grad_norm": 0.9498345255851746, | |
| "learning_rate": 6.653185382408194e-07, | |
| "loss": 0.673, | |
| "num_input_tokens_seen": 30985856, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9272, | |
| "grad_norm": 1.047587275505066, | |
| "learning_rate": 6.509964585182687e-07, | |
| "loss": 0.7395, | |
| "num_input_tokens_seen": 31013888, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.9112536907196045, | |
| "learning_rate": 6.368281803355691e-07, | |
| "loss": 0.753, | |
| "num_input_tokens_seen": 31038352, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9288, | |
| "grad_norm": 0.9714504480361938, | |
| "learning_rate": 6.228137931872713e-07, | |
| "loss": 0.7573, | |
| "num_input_tokens_seen": 31066624, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 0.9296, | |
| "grad_norm": 1.1300855875015259, | |
| "learning_rate": 6.089533855958507e-07, | |
| "loss": 0.759, | |
| "num_input_tokens_seen": 31093184, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9304, | |
| "grad_norm": 1.0004905462265015, | |
| "learning_rate": 5.95247045111183e-07, | |
| "loss": 0.7482, | |
| "num_input_tokens_seen": 31118352, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 0.9312, | |
| "grad_norm": 0.8432052731513977, | |
| "learning_rate": 5.816948583099613e-07, | |
| "loss": 0.6295, | |
| "num_input_tokens_seen": 31145616, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.932, | |
| "grad_norm": 0.923195481300354, | |
| "learning_rate": 5.68296910795163e-07, | |
| "loss": 0.7596, | |
| "num_input_tokens_seen": 31167088, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.9328, | |
| "grad_norm": 1.0346819162368774, | |
| "learning_rate": 5.550532871955061e-07, | |
| "loss": 0.689, | |
| "num_input_tokens_seen": 31192672, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9336, | |
| "grad_norm": 0.8770239949226379, | |
| "learning_rate": 5.419640711649188e-07, | |
| "loss": 0.6387, | |
| "num_input_tokens_seen": 31224016, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 0.9344, | |
| "grad_norm": 1.2829992771148682, | |
| "learning_rate": 5.290293453819955e-07, | |
| "loss": 0.7316, | |
| "num_input_tokens_seen": 31247008, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9352, | |
| "grad_norm": 0.9933931231498718, | |
| "learning_rate": 5.162491915495005e-07, | |
| "loss": 0.7255, | |
| "num_input_tokens_seen": 31273232, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.8756529092788696, | |
| "learning_rate": 5.036236903938285e-07, | |
| "loss": 0.7188, | |
| "num_input_tokens_seen": 31299504, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9368, | |
| "grad_norm": 0.85035240650177, | |
| "learning_rate": 4.911529216645088e-07, | |
| "loss": 0.6763, | |
| "num_input_tokens_seen": 31325792, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 0.9376, | |
| "grad_norm": 0.9068401455879211, | |
| "learning_rate": 4.788369641336943e-07, | |
| "loss": 0.6109, | |
| "num_input_tokens_seen": 31351216, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.9384, | |
| "grad_norm": 1.079689860343933, | |
| "learning_rate": 4.666758955956613e-07, | |
| "loss": 0.7778, | |
| "num_input_tokens_seen": 31376464, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 0.9392, | |
| "grad_norm": 0.964074969291687, | |
| "learning_rate": 4.546697928663357e-07, | |
| "loss": 0.6315, | |
| "num_input_tokens_seen": 31408832, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.1026054620742798, | |
| "learning_rate": 4.4281873178278475e-07, | |
| "loss": 0.7918, | |
| "num_input_tokens_seen": 31432352, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.9408, | |
| "grad_norm": 0.914069652557373, | |
| "learning_rate": 4.311227872027479e-07, | |
| "loss": 0.6983, | |
| "num_input_tokens_seen": 31457392, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9416, | |
| "grad_norm": 0.989115834236145, | |
| "learning_rate": 4.1958203300417054e-07, | |
| "loss": 0.7233, | |
| "num_input_tokens_seen": 31482704, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 0.9424, | |
| "grad_norm": 1.034597396850586, | |
| "learning_rate": 4.0819654208472947e-07, | |
| "loss": 0.6402, | |
| "num_input_tokens_seen": 31512368, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.9432, | |
| "grad_norm": 1.3309321403503418, | |
| "learning_rate": 3.9696638636137206e-07, | |
| "loss": 0.6942, | |
| "num_input_tokens_seen": 31539040, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 1.2237857580184937, | |
| "learning_rate": 3.8589163676986674e-07, | |
| "loss": 0.7119, | |
| "num_input_tokens_seen": 31563712, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9448, | |
| "grad_norm": 1.0268304347991943, | |
| "learning_rate": 3.7497236326434757e-07, | |
| "loss": 0.6575, | |
| "num_input_tokens_seen": 31587760, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 0.9456, | |
| "grad_norm": 1.0060738325119019, | |
| "learning_rate": 3.6420863481688437e-07, | |
| "loss": 0.704, | |
| "num_input_tokens_seen": 31612976, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.9464, | |
| "grad_norm": 0.9219969511032104, | |
| "learning_rate": 3.536005194170328e-07, | |
| "loss": 0.7876, | |
| "num_input_tokens_seen": 31639472, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 0.9472, | |
| "grad_norm": 0.8863883018493652, | |
| "learning_rate": 3.431480840714152e-07, | |
| "loss": 0.7033, | |
| "num_input_tokens_seen": 31670768, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.948, | |
| "grad_norm": 0.9394556879997253, | |
| "learning_rate": 3.328513948032991e-07, | |
| "loss": 0.7095, | |
| "num_input_tokens_seen": 31696624, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.9488, | |
| "grad_norm": 0.8008967638015747, | |
| "learning_rate": 3.227105166521638e-07, | |
| "loss": 0.6629, | |
| "num_input_tokens_seen": 31723840, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.9496, | |
| "grad_norm": 0.9910029172897339, | |
| "learning_rate": 3.127255136733093e-07, | |
| "loss": 0.591, | |
| "num_input_tokens_seen": 31752736, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 0.9504, | |
| "grad_norm": 0.9355325698852539, | |
| "learning_rate": 3.0289644893744527e-07, | |
| "loss": 0.6641, | |
| "num_input_tokens_seen": 31777760, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9512, | |
| "grad_norm": 0.9911002516746521, | |
| "learning_rate": 2.9322338453028066e-07, | |
| "loss": 0.6156, | |
| "num_input_tokens_seen": 31805264, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 1.4127229452133179, | |
| "learning_rate": 2.8370638155215123e-07, | |
| "loss": 0.7834, | |
| "num_input_tokens_seen": 31828656, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.9528, | |
| "grad_norm": 1.0222047567367554, | |
| "learning_rate": 2.743455001176176e-07, | |
| "loss": 0.6998, | |
| "num_input_tokens_seen": 31855424, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 0.9536, | |
| "grad_norm": 0.9893736839294434, | |
| "learning_rate": 2.6514079935509584e-07, | |
| "loss": 0.7458, | |
| "num_input_tokens_seen": 31879168, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.9544, | |
| "grad_norm": 0.8317204713821411, | |
| "learning_rate": 2.560923374064772e-07, | |
| "loss": 0.7061, | |
| "num_input_tokens_seen": 31903824, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 0.9552, | |
| "grad_norm": 1.3220785856246948, | |
| "learning_rate": 2.472001714267674e-07, | |
| "loss": 0.8603, | |
| "num_input_tokens_seen": 31927184, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.956, | |
| "grad_norm": 0.8110103607177734, | |
| "learning_rate": 2.384643575837203e-07, | |
| "loss": 0.6273, | |
| "num_input_tokens_seen": 31955104, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.9568, | |
| "grad_norm": 0.6332679390907288, | |
| "learning_rate": 2.298849510574824e-07, | |
| "loss": 0.714, | |
| "num_input_tokens_seen": 31985888, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.9576, | |
| "grad_norm": 0.9290034174919128, | |
| "learning_rate": 2.2146200604024613e-07, | |
| "loss": 0.6899, | |
| "num_input_tokens_seen": 32013520, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 0.9584, | |
| "grad_norm": 1.0509424209594727, | |
| "learning_rate": 2.1319557573591108e-07, | |
| "loss": 0.677, | |
| "num_input_tokens_seen": 32038880, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.9592, | |
| "grad_norm": 1.0169018507003784, | |
| "learning_rate": 2.050857123597455e-07, | |
| "loss": 0.7033, | |
| "num_input_tokens_seen": 32062160, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.053408145904541, | |
| "learning_rate": 1.9713246713805588e-07, | |
| "loss": 0.6431, | |
| "num_input_tokens_seen": 32085712, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9608, | |
| "grad_norm": 1.1077343225479126, | |
| "learning_rate": 1.8933589030785682e-07, | |
| "loss": 0.683, | |
| "num_input_tokens_seen": 32115232, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 0.9616, | |
| "grad_norm": 1.2601428031921387, | |
| "learning_rate": 1.8169603111656552e-07, | |
| "loss": 0.751, | |
| "num_input_tokens_seen": 32142992, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.9624, | |
| "grad_norm": 0.7372344136238098, | |
| "learning_rate": 1.7421293782168835e-07, | |
| "loss": 0.5808, | |
| "num_input_tokens_seen": 32176176, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 0.9632, | |
| "grad_norm": 0.856760561466217, | |
| "learning_rate": 1.6688665769050703e-07, | |
| "loss": 0.6852, | |
| "num_input_tokens_seen": 32204992, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.964, | |
| "grad_norm": 1.110574722290039, | |
| "learning_rate": 1.5971723699979013e-07, | |
| "loss": 0.6778, | |
| "num_input_tokens_seen": 32232960, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.9648, | |
| "grad_norm": 1.084190845489502, | |
| "learning_rate": 1.5270472103549315e-07, | |
| "loss": 0.7036, | |
| "num_input_tokens_seen": 32262672, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.9656, | |
| "grad_norm": 0.9454313516616821, | |
| "learning_rate": 1.4584915409248112e-07, | |
| "loss": 0.655, | |
| "num_input_tokens_seen": 32285504, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 0.9664, | |
| "grad_norm": 0.9206419587135315, | |
| "learning_rate": 1.3915057947423705e-07, | |
| "loss": 0.7324, | |
| "num_input_tokens_seen": 32312288, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.9672, | |
| "grad_norm": 0.9567137956619263, | |
| "learning_rate": 1.3260903949260107e-07, | |
| "loss": 0.7166, | |
| "num_input_tokens_seen": 32339424, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 1.0180697441101074, | |
| "learning_rate": 1.2622457546749567e-07, | |
| "loss": 0.7, | |
| "num_input_tokens_seen": 32362848, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.9688, | |
| "grad_norm": 1.2073848247528076, | |
| "learning_rate": 1.1999722772666476e-07, | |
| "loss": 0.7519, | |
| "num_input_tokens_seen": 32393264, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 0.9696, | |
| "grad_norm": 1.020180583000183, | |
| "learning_rate": 1.1392703560542117e-07, | |
| "loss": 0.7524, | |
| "num_input_tokens_seen": 32418464, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.9704, | |
| "grad_norm": 1.0939137935638428, | |
| "learning_rate": 1.080140374463967e-07, | |
| "loss": 0.5829, | |
| "num_input_tokens_seen": 32449248, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 0.9712, | |
| "grad_norm": 1.2307384014129639, | |
| "learning_rate": 1.0225827059930083e-07, | |
| "loss": 0.7017, | |
| "num_input_tokens_seen": 32477312, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.972, | |
| "grad_norm": 1.0141756534576416, | |
| "learning_rate": 9.665977142068738e-08, | |
| "loss": 0.6852, | |
| "num_input_tokens_seen": 32505024, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.9728, | |
| "grad_norm": 1.0366077423095703, | |
| "learning_rate": 9.121857527372158e-08, | |
| "loss": 0.733, | |
| "num_input_tokens_seen": 32530080, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.9736, | |
| "grad_norm": 0.9831274151802063, | |
| "learning_rate": 8.593471652794949e-08, | |
| "loss": 0.6721, | |
| "num_input_tokens_seen": 32557488, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 0.9744, | |
| "grad_norm": 1.0170478820800781, | |
| "learning_rate": 8.080822855909831e-08, | |
| "loss": 0.6572, | |
| "num_input_tokens_seen": 32589072, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.9752, | |
| "grad_norm": 1.0840100049972534, | |
| "learning_rate": 7.583914374885426e-08, | |
| "loss": 0.7535, | |
| "num_input_tokens_seen": 32613296, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 1.1899126768112183, | |
| "learning_rate": 7.102749348465165e-08, | |
| "loss": 0.639, | |
| "num_input_tokens_seen": 32642512, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.9768, | |
| "grad_norm": 1.0756986141204834, | |
| "learning_rate": 6.637330815949527e-08, | |
| "loss": 0.7558, | |
| "num_input_tokens_seen": 32666064, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 0.9776, | |
| "grad_norm": 0.9403240084648132, | |
| "learning_rate": 6.187661717174386e-08, | |
| "loss": 0.7228, | |
| "num_input_tokens_seen": 32690016, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.9784, | |
| "grad_norm": 0.9194949269294739, | |
| "learning_rate": 5.753744892494639e-08, | |
| "loss": 0.7079, | |
| "num_input_tokens_seen": 32716240, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 0.9792, | |
| "grad_norm": 0.9947624206542969, | |
| "learning_rate": 5.335583082764495e-08, | |
| "loss": 0.7692, | |
| "num_input_tokens_seen": 32741648, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.2828369140625, | |
| "learning_rate": 4.9331789293211026e-08, | |
| "loss": 0.6285, | |
| "num_input_tokens_seen": 32770224, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.9808, | |
| "grad_norm": 1.0066205263137817, | |
| "learning_rate": 4.546534973968175e-08, | |
| "loss": 0.7464, | |
| "num_input_tokens_seen": 32798864, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.9816, | |
| "grad_norm": 1.3146965503692627, | |
| "learning_rate": 4.1756536589585004e-08, | |
| "loss": 0.6632, | |
| "num_input_tokens_seen": 32829136, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 0.9824, | |
| "grad_norm": 0.8514100909233093, | |
| "learning_rate": 3.820537326980622e-08, | |
| "loss": 0.7378, | |
| "num_input_tokens_seen": 32858976, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.9832, | |
| "grad_norm": 0.9065835475921631, | |
| "learning_rate": 3.481188221142184e-08, | |
| "loss": 0.7125, | |
| "num_input_tokens_seen": 32886208, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 1.2251099348068237, | |
| "learning_rate": 3.157608484956332e-08, | |
| "loss": 0.7723, | |
| "num_input_tokens_seen": 32912960, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.9848, | |
| "grad_norm": 1.0463021993637085, | |
| "learning_rate": 2.8498001623286642e-08, | |
| "loss": 0.6472, | |
| "num_input_tokens_seen": 32941072, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 0.9856, | |
| "grad_norm": 1.001555323600769, | |
| "learning_rate": 2.557765197543638e-08, | |
| "loss": 0.7462, | |
| "num_input_tokens_seen": 32968304, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.9864, | |
| "grad_norm": 1.0437195301055908, | |
| "learning_rate": 2.281505435253184e-08, | |
| "loss": 0.7079, | |
| "num_input_tokens_seen": 32993008, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 0.9872, | |
| "grad_norm": 1.1048009395599365, | |
| "learning_rate": 2.0210226204639414e-08, | |
| "loss": 0.7058, | |
| "num_input_tokens_seen": 33018128, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.988, | |
| "grad_norm": 1.1593177318572998, | |
| "learning_rate": 1.7763183985269883e-08, | |
| "loss": 0.5942, | |
| "num_input_tokens_seen": 33047968, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.9888, | |
| "grad_norm": 1.3036433458328247, | |
| "learning_rate": 1.5473943151270153e-08, | |
| "loss": 0.7738, | |
| "num_input_tokens_seen": 33072560, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.9896, | |
| "grad_norm": 1.0337815284729004, | |
| "learning_rate": 1.3342518162728912e-08, | |
| "loss": 0.781, | |
| "num_input_tokens_seen": 33096928, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 0.9904, | |
| "grad_norm": 1.0598255395889282, | |
| "learning_rate": 1.136892248288779e-08, | |
| "loss": 0.6607, | |
| "num_input_tokens_seen": 33123808, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.9912, | |
| "grad_norm": 1.658722162246704, | |
| "learning_rate": 9.553168578049775e-09, | |
| "loss": 0.7506, | |
| "num_input_tokens_seen": 33148688, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.9460881352424622, | |
| "learning_rate": 7.895267917501504e-09, | |
| "loss": 0.6521, | |
| "num_input_tokens_seen": 33176624, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.9928, | |
| "grad_norm": 0.9642547369003296, | |
| "learning_rate": 6.395230973443856e-09, | |
| "loss": 0.709, | |
| "num_input_tokens_seen": 33198160, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 0.9936, | |
| "grad_norm": 1.1588774919509888, | |
| "learning_rate": 5.053067220925356e-09, | |
| "loss": 0.6685, | |
| "num_input_tokens_seen": 33226336, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.9944, | |
| "grad_norm": 0.7818155288696289, | |
| "learning_rate": 3.868785137786657e-09, | |
| "loss": 0.6672, | |
| "num_input_tokens_seen": 33245824, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 0.9952, | |
| "grad_norm": 0.8517420887947083, | |
| "learning_rate": 2.842392204591149e-09, | |
| "loss": 0.7053, | |
| "num_input_tokens_seen": 33274176, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.996, | |
| "grad_norm": 1.1486226320266724, | |
| "learning_rate": 1.973894904597207e-09, | |
| "loss": 0.7184, | |
| "num_input_tokens_seen": 33302528, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.9968, | |
| "grad_norm": 0.9121309518814087, | |
| "learning_rate": 1.2632987237054528e-09, | |
| "loss": 0.7092, | |
| "num_input_tokens_seen": 33330384, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.9976, | |
| "grad_norm": 1.1165632009506226, | |
| "learning_rate": 7.106081504254514e-10, | |
| "loss": 0.6142, | |
| "num_input_tokens_seen": 33357968, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "grad_norm": 1.0336644649505615, | |
| "learning_rate": 3.158266758562789e-10, | |
| "loss": 0.7147, | |
| "num_input_tokens_seen": 33381536, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.9992, | |
| "grad_norm": 1.1494331359863281, | |
| "learning_rate": 7.89567936476665e-11, | |
| "loss": 0.7005, | |
| "num_input_tokens_seen": 33409984, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.0414918661117554, | |
| "learning_rate": 0.0, | |
| "loss": 0.7516, | |
| "num_input_tokens_seen": 33437856, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "num_input_tokens_seen": 33437856, | |
| "step": 6250, | |
| "total_flos": 7.1914395644928e+16, | |
| "train_loss": 0.7151971128082275, | |
| "train_runtime": 36754.4929, | |
| "train_samples_per_second": 2.721, | |
| "train_steps_per_second": 0.17 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 6250, | |
| "num_input_tokens_seen": 33437856, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.1914395644928e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |