diff --git "a/checkpoint-16421/trainer_state.json" "b/checkpoint-16421/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-16421/trainer_state.json" @@ -0,0 +1,114980 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999969552111561, + "eval_steps": 500, + "global_step": 16421, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.089577687787352e-05, + "grad_norm": 1.2783087128363426, + "learning_rate": 5.0000000000000004e-08, + "loss": 1.2081, + "step": 1 + }, + { + "epoch": 0.00012179155375574704, + "grad_norm": 1.3613236856754432, + "learning_rate": 1.0000000000000001e-07, + "loss": 1.2835, + "step": 2 + }, + { + "epoch": 0.00018268733063362056, + "grad_norm": 1.30298213226854, + "learning_rate": 1.5000000000000002e-07, + "loss": 1.2077, + "step": 3 + }, + { + "epoch": 0.00024358310751149408, + "grad_norm": 1.368037692261487, + "learning_rate": 2.0000000000000002e-07, + "loss": 1.2432, + "step": 4 + }, + { + "epoch": 0.0003044788843893676, + "grad_norm": 1.4688587783314677, + "learning_rate": 2.5000000000000004e-07, + "loss": 1.2565, + "step": 5 + }, + { + "epoch": 0.0003653746612672411, + "grad_norm": 1.3521010226655206, + "learning_rate": 3.0000000000000004e-07, + "loss": 1.2392, + "step": 6 + }, + { + "epoch": 0.0004262704381451146, + "grad_norm": 1.29609633985871, + "learning_rate": 3.5000000000000004e-07, + "loss": 1.2221, + "step": 7 + }, + { + "epoch": 0.00048716621502298816, + "grad_norm": 1.3769312847454855, + "learning_rate": 4.0000000000000003e-07, + "loss": 1.2871, + "step": 8 + }, + { + "epoch": 0.0005480619919008617, + "grad_norm": 1.4064798068116708, + "learning_rate": 4.5000000000000003e-07, + "loss": 1.314, + "step": 9 + }, + { + "epoch": 0.0006089577687787351, + "grad_norm": 1.364989462955391, + "learning_rate": 5.000000000000001e-07, + "loss": 1.2562, + "step": 10 + }, + { + "epoch": 0.0006698535456566087, + "grad_norm": 1.2610318355920762, + "learning_rate": 5.5e-07, + "loss": 1.221, + "step": 11 + }, + { + "epoch": 0.0007307493225344822, + "grad_norm": 1.3980912017066547, + "learning_rate": 6.000000000000001e-07, + "loss": 1.2528, + "step": 12 + }, + { + "epoch": 0.0007916450994123557, + "grad_norm": 1.2606235299921411, + "learning_rate": 6.5e-07, + "loss": 1.1981, + "step": 13 + }, + { + "epoch": 0.0008525408762902292, + "grad_norm": 1.2985310084373154, + "learning_rate": 7.000000000000001e-07, + "loss": 1.2424, + "step": 14 + }, + { + "epoch": 0.0009134366531681028, + "grad_norm": 1.3163754114281772, + "learning_rate": 7.5e-07, + "loss": 1.2285, + "step": 15 + }, + { + "epoch": 0.0009743324300459763, + "grad_norm": 1.2821693232198674, + "learning_rate": 8.000000000000001e-07, + "loss": 1.23, + "step": 16 + }, + { + "epoch": 0.0010352282069238498, + "grad_norm": 1.3210071176961222, + "learning_rate": 8.500000000000001e-07, + "loss": 1.2896, + "step": 17 + }, + { + "epoch": 0.0010961239838017233, + "grad_norm": 1.4278164117207706, + "learning_rate": 9.000000000000001e-07, + "loss": 1.2955, + "step": 18 + }, + { + "epoch": 0.0011570197606795968, + "grad_norm": 1.449650040941209, + "learning_rate": 9.500000000000001e-07, + "loss": 1.304, + "step": 19 + }, + { + "epoch": 0.0012179155375574703, + "grad_norm": 1.2024611163883157, + "learning_rate": 1.0000000000000002e-06, + "loss": 1.2692, + "step": 20 + }, + { + "epoch": 0.001278811314435344, + "grad_norm": 1.190892382796453, + "learning_rate": 1.0500000000000001e-06, + "loss": 1.2073, + "step": 21 + }, + { + "epoch": 0.0013397070913132175, + "grad_norm": 1.1450327134036158, + "learning_rate": 1.1e-06, + "loss": 1.1972, + "step": 22 + }, + { + "epoch": 0.001400602868191091, + "grad_norm": 1.0909544550721726, + "learning_rate": 1.1500000000000002e-06, + "loss": 1.1695, + "step": 23 + }, + { + "epoch": 0.0014614986450689645, + "grad_norm": 1.088507811747228, + "learning_rate": 1.2000000000000002e-06, + "loss": 1.2186, + "step": 24 + }, + { + "epoch": 0.001522394421946838, + "grad_norm": 1.1452750682112331, + "learning_rate": 1.25e-06, + "loss": 1.2484, + "step": 25 + }, + { + "epoch": 0.0015832901988247115, + "grad_norm": 1.085326027122175, + "learning_rate": 1.3e-06, + "loss": 1.228, + "step": 26 + }, + { + "epoch": 0.001644185975702585, + "grad_norm": 0.9838162373848378, + "learning_rate": 1.3500000000000002e-06, + "loss": 1.1831, + "step": 27 + }, + { + "epoch": 0.0017050817525804585, + "grad_norm": 0.9875806111015505, + "learning_rate": 1.4000000000000001e-06, + "loss": 1.1558, + "step": 28 + }, + { + "epoch": 0.0017659775294583322, + "grad_norm": 0.9236168177688888, + "learning_rate": 1.45e-06, + "loss": 1.1292, + "step": 29 + }, + { + "epoch": 0.0018268733063362057, + "grad_norm": 0.9042913599389717, + "learning_rate": 1.5e-06, + "loss": 1.1916, + "step": 30 + }, + { + "epoch": 0.0018877690832140792, + "grad_norm": 0.9688611403020976, + "learning_rate": 1.5500000000000002e-06, + "loss": 1.1633, + "step": 31 + }, + { + "epoch": 0.0019486648600919526, + "grad_norm": 0.9127073730197273, + "learning_rate": 1.6000000000000001e-06, + "loss": 1.2167, + "step": 32 + }, + { + "epoch": 0.0020095606369698264, + "grad_norm": 0.8422078353450044, + "learning_rate": 1.6500000000000003e-06, + "loss": 1.1119, + "step": 33 + }, + { + "epoch": 0.0020704564138476996, + "grad_norm": 0.8507073955625997, + "learning_rate": 1.7000000000000002e-06, + "loss": 1.1717, + "step": 34 + }, + { + "epoch": 0.0021313521907255733, + "grad_norm": 0.7688402284704656, + "learning_rate": 1.75e-06, + "loss": 1.0982, + "step": 35 + }, + { + "epoch": 0.0021922479676034466, + "grad_norm": 0.8209157124780203, + "learning_rate": 1.8000000000000001e-06, + "loss": 1.1588, + "step": 36 + }, + { + "epoch": 0.0022531437444813203, + "grad_norm": 0.7732505451688806, + "learning_rate": 1.85e-06, + "loss": 1.1405, + "step": 37 + }, + { + "epoch": 0.0023140395213591936, + "grad_norm": 0.7400784055294407, + "learning_rate": 1.9000000000000002e-06, + "loss": 1.1173, + "step": 38 + }, + { + "epoch": 0.0023749352982370673, + "grad_norm": 0.74552272964745, + "learning_rate": 1.9500000000000004e-06, + "loss": 1.126, + "step": 39 + }, + { + "epoch": 0.0024358310751149406, + "grad_norm": 0.7661829692157198, + "learning_rate": 2.0000000000000003e-06, + "loss": 1.1094, + "step": 40 + }, + { + "epoch": 0.0024967268519928143, + "grad_norm": 0.6986791166318455, + "learning_rate": 2.05e-06, + "loss": 1.0649, + "step": 41 + }, + { + "epoch": 0.002557622628870688, + "grad_norm": 0.7239054229001102, + "learning_rate": 2.1000000000000002e-06, + "loss": 1.0664, + "step": 42 + }, + { + "epoch": 0.0026185184057485613, + "grad_norm": 0.6796740833720761, + "learning_rate": 2.15e-06, + "loss": 1.0484, + "step": 43 + }, + { + "epoch": 0.002679414182626435, + "grad_norm": 0.7236233259643801, + "learning_rate": 2.2e-06, + "loss": 1.0719, + "step": 44 + }, + { + "epoch": 0.0027403099595043083, + "grad_norm": 0.7131928979660329, + "learning_rate": 2.25e-06, + "loss": 1.0954, + "step": 45 + }, + { + "epoch": 0.002801205736382182, + "grad_norm": 0.6916052186956844, + "learning_rate": 2.3000000000000004e-06, + "loss": 1.068, + "step": 46 + }, + { + "epoch": 0.0028621015132600553, + "grad_norm": 0.7338346898557735, + "learning_rate": 2.35e-06, + "loss": 1.1337, + "step": 47 + }, + { + "epoch": 0.002922997290137929, + "grad_norm": 0.7127052386654825, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.0364, + "step": 48 + }, + { + "epoch": 0.0029838930670158022, + "grad_norm": 0.7109363185975538, + "learning_rate": 2.4500000000000003e-06, + "loss": 1.0356, + "step": 49 + }, + { + "epoch": 0.003044788843893676, + "grad_norm": 0.6404186765114072, + "learning_rate": 2.5e-06, + "loss": 1.0379, + "step": 50 + }, + { + "epoch": 0.0031056846207715497, + "grad_norm": 0.6998851088838369, + "learning_rate": 2.55e-06, + "loss": 1.0652, + "step": 51 + }, + { + "epoch": 0.003166580397649423, + "grad_norm": 0.6466384854106114, + "learning_rate": 2.6e-06, + "loss": 1.0206, + "step": 52 + }, + { + "epoch": 0.0032274761745272967, + "grad_norm": 0.6652449670577572, + "learning_rate": 2.6500000000000005e-06, + "loss": 1.0451, + "step": 53 + }, + { + "epoch": 0.00328837195140517, + "grad_norm": 0.6794460180057439, + "learning_rate": 2.7000000000000004e-06, + "loss": 1.0077, + "step": 54 + }, + { + "epoch": 0.0033492677282830436, + "grad_norm": 0.6222955922127837, + "learning_rate": 2.7500000000000004e-06, + "loss": 1.0051, + "step": 55 + }, + { + "epoch": 0.003410163505160917, + "grad_norm": 0.6895274401041883, + "learning_rate": 2.8000000000000003e-06, + "loss": 1.0272, + "step": 56 + }, + { + "epoch": 0.0034710592820387906, + "grad_norm": 0.6847377406567425, + "learning_rate": 2.85e-06, + "loss": 0.9933, + "step": 57 + }, + { + "epoch": 0.0035319550589166643, + "grad_norm": 0.6141086214919128, + "learning_rate": 2.9e-06, + "loss": 0.9998, + "step": 58 + }, + { + "epoch": 0.0035928508357945376, + "grad_norm": 0.6767516370650634, + "learning_rate": 2.95e-06, + "loss": 0.9733, + "step": 59 + }, + { + "epoch": 0.0036537466126724113, + "grad_norm": 0.654701145061518, + "learning_rate": 3e-06, + "loss": 1.0394, + "step": 60 + }, + { + "epoch": 0.0037146423895502846, + "grad_norm": 0.6497783455180624, + "learning_rate": 3.05e-06, + "loss": 0.9817, + "step": 61 + }, + { + "epoch": 0.0037755381664281583, + "grad_norm": 0.6272373703795496, + "learning_rate": 3.1000000000000004e-06, + "loss": 1.0388, + "step": 62 + }, + { + "epoch": 0.0038364339433060316, + "grad_norm": 0.6666492103700693, + "learning_rate": 3.1500000000000003e-06, + "loss": 1.0004, + "step": 63 + }, + { + "epoch": 0.0038973297201839053, + "grad_norm": 0.6210329756811196, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.9683, + "step": 64 + }, + { + "epoch": 0.003958225497061779, + "grad_norm": 0.6284933174969854, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.9259, + "step": 65 + }, + { + "epoch": 0.004019121273939653, + "grad_norm": 0.6181144182280022, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.9478, + "step": 66 + }, + { + "epoch": 0.0040800170508175256, + "grad_norm": 0.6638012384977436, + "learning_rate": 3.3500000000000005e-06, + "loss": 0.9454, + "step": 67 + }, + { + "epoch": 0.004140912827695399, + "grad_norm": 0.6126923389742809, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.8998, + "step": 68 + }, + { + "epoch": 0.004201808604573273, + "grad_norm": 0.6498517762588266, + "learning_rate": 3.45e-06, + "loss": 0.9558, + "step": 69 + }, + { + "epoch": 0.004262704381451147, + "grad_norm": 0.6299906610400277, + "learning_rate": 3.5e-06, + "loss": 0.9364, + "step": 70 + }, + { + "epoch": 0.0043236001583290195, + "grad_norm": 0.6767449694631275, + "learning_rate": 3.5500000000000003e-06, + "loss": 0.9062, + "step": 71 + }, + { + "epoch": 0.004384495935206893, + "grad_norm": 0.741078767464641, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.9693, + "step": 72 + }, + { + "epoch": 0.004445391712084767, + "grad_norm": 0.6679504048480375, + "learning_rate": 3.65e-06, + "loss": 0.9224, + "step": 73 + }, + { + "epoch": 0.004506287488962641, + "grad_norm": 0.719246215898473, + "learning_rate": 3.7e-06, + "loss": 0.9685, + "step": 74 + }, + { + "epoch": 0.004567183265840514, + "grad_norm": 0.637886583107297, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.9074, + "step": 75 + }, + { + "epoch": 0.004628079042718387, + "grad_norm": 0.6331989492396305, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.9085, + "step": 76 + }, + { + "epoch": 0.004688974819596261, + "grad_norm": 0.6494531462077495, + "learning_rate": 3.85e-06, + "loss": 0.8724, + "step": 77 + }, + { + "epoch": 0.004749870596474135, + "grad_norm": 0.6768427312532551, + "learning_rate": 3.900000000000001e-06, + "loss": 0.9074, + "step": 78 + }, + { + "epoch": 0.004810766373352008, + "grad_norm": 0.6838136341557196, + "learning_rate": 3.95e-06, + "loss": 0.9191, + "step": 79 + }, + { + "epoch": 0.004871662150229881, + "grad_norm": 0.6802361612355575, + "learning_rate": 4.000000000000001e-06, + "loss": 0.8616, + "step": 80 + }, + { + "epoch": 0.004932557927107755, + "grad_norm": 0.7121742369187243, + "learning_rate": 4.05e-06, + "loss": 0.8727, + "step": 81 + }, + { + "epoch": 0.004993453703985629, + "grad_norm": 0.720021332039233, + "learning_rate": 4.1e-06, + "loss": 0.924, + "step": 82 + }, + { + "epoch": 0.005054349480863502, + "grad_norm": 0.6173661542330908, + "learning_rate": 4.15e-06, + "loss": 0.8718, + "step": 83 + }, + { + "epoch": 0.005115245257741376, + "grad_norm": 0.7136441168396123, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.8726, + "step": 84 + }, + { + "epoch": 0.005176141034619249, + "grad_norm": 0.6466816889421091, + "learning_rate": 4.25e-06, + "loss": 0.8748, + "step": 85 + }, + { + "epoch": 0.005237036811497123, + "grad_norm": 0.6782624476040339, + "learning_rate": 4.3e-06, + "loss": 0.8709, + "step": 86 + }, + { + "epoch": 0.005297932588374996, + "grad_norm": 0.604268387158685, + "learning_rate": 4.350000000000001e-06, + "loss": 0.8512, + "step": 87 + }, + { + "epoch": 0.00535882836525287, + "grad_norm": 0.6591563240624356, + "learning_rate": 4.4e-06, + "loss": 0.9027, + "step": 88 + }, + { + "epoch": 0.005419724142130743, + "grad_norm": 0.6367661068281147, + "learning_rate": 4.450000000000001e-06, + "loss": 0.862, + "step": 89 + }, + { + "epoch": 0.0054806199190086165, + "grad_norm": 0.652591644341844, + "learning_rate": 4.5e-06, + "loss": 0.8405, + "step": 90 + }, + { + "epoch": 0.00554151569588649, + "grad_norm": 0.6460001302844072, + "learning_rate": 4.5500000000000005e-06, + "loss": 0.8678, + "step": 91 + }, + { + "epoch": 0.005602411472764364, + "grad_norm": 0.6528770416590572, + "learning_rate": 4.600000000000001e-06, + "loss": 0.9136, + "step": 92 + }, + { + "epoch": 0.005663307249642238, + "grad_norm": 0.6154680768220303, + "learning_rate": 4.65e-06, + "loss": 0.811, + "step": 93 + }, + { + "epoch": 0.0057242030265201105, + "grad_norm": 0.6414346841538292, + "learning_rate": 4.7e-06, + "loss": 0.8646, + "step": 94 + }, + { + "epoch": 0.005785098803397984, + "grad_norm": 0.6476425301670001, + "learning_rate": 4.75e-06, + "loss": 0.8607, + "step": 95 + }, + { + "epoch": 0.005845994580275858, + "grad_norm": 0.665120292815135, + "learning_rate": 4.800000000000001e-06, + "loss": 0.8155, + "step": 96 + }, + { + "epoch": 0.005906890357153732, + "grad_norm": 0.6543842343163409, + "learning_rate": 4.85e-06, + "loss": 0.839, + "step": 97 + }, + { + "epoch": 0.0059677861340316045, + "grad_norm": 0.6493833764230088, + "learning_rate": 4.9000000000000005e-06, + "loss": 0.7764, + "step": 98 + }, + { + "epoch": 0.006028681910909478, + "grad_norm": 0.6910350684709983, + "learning_rate": 4.95e-06, + "loss": 0.835, + "step": 99 + }, + { + "epoch": 0.006089577687787352, + "grad_norm": 0.6939532471647948, + "learning_rate": 5e-06, + "loss": 0.8889, + "step": 100 + }, + { + "epoch": 0.006150473464665226, + "grad_norm": 0.6801399484740052, + "learning_rate": 4.999999998726526e-06, + "loss": 0.8525, + "step": 101 + }, + { + "epoch": 0.006211369241543099, + "grad_norm": 0.6057438194423901, + "learning_rate": 4.999999994906104e-06, + "loss": 0.804, + "step": 102 + }, + { + "epoch": 0.006272265018420972, + "grad_norm": 0.6381262259706383, + "learning_rate": 4.999999988538734e-06, + "loss": 0.8352, + "step": 103 + }, + { + "epoch": 0.006333160795298846, + "grad_norm": 0.6503324179586444, + "learning_rate": 4.999999979624416e-06, + "loss": 0.8122, + "step": 104 + }, + { + "epoch": 0.00639405657217672, + "grad_norm": 0.6508882951803143, + "learning_rate": 4.999999968163151e-06, + "loss": 0.8378, + "step": 105 + }, + { + "epoch": 0.006454952349054593, + "grad_norm": 0.6761069223715721, + "learning_rate": 4.999999954154936e-06, + "loss": 0.8092, + "step": 106 + }, + { + "epoch": 0.006515848125932467, + "grad_norm": 0.6242061897581022, + "learning_rate": 4.999999937599774e-06, + "loss": 0.7822, + "step": 107 + }, + { + "epoch": 0.00657674390281034, + "grad_norm": 0.6737989504180351, + "learning_rate": 4.999999918497664e-06, + "loss": 0.7877, + "step": 108 + }, + { + "epoch": 0.0066376396796882136, + "grad_norm": 0.6557666927699501, + "learning_rate": 4.999999896848606e-06, + "loss": 0.7953, + "step": 109 + }, + { + "epoch": 0.006698535456566087, + "grad_norm": 0.6590897897144997, + "learning_rate": 4.9999998726526e-06, + "loss": 0.8083, + "step": 110 + }, + { + "epoch": 0.006759431233443961, + "grad_norm": 0.6633869714276917, + "learning_rate": 4.999999845909646e-06, + "loss": 0.7788, + "step": 111 + }, + { + "epoch": 0.006820327010321834, + "grad_norm": 0.6795631537974902, + "learning_rate": 4.999999816619745e-06, + "loss": 0.7729, + "step": 112 + }, + { + "epoch": 0.0068812227871997075, + "grad_norm": 0.6601932877676258, + "learning_rate": 4.999999784782895e-06, + "loss": 0.7891, + "step": 113 + }, + { + "epoch": 0.006942118564077581, + "grad_norm": 0.6622531999132142, + "learning_rate": 4.999999750399098e-06, + "loss": 0.7941, + "step": 114 + }, + { + "epoch": 0.007003014340955455, + "grad_norm": 0.6686235251282103, + "learning_rate": 4.999999713468353e-06, + "loss": 0.8606, + "step": 115 + }, + { + "epoch": 0.007063910117833329, + "grad_norm": 0.6479924171702877, + "learning_rate": 4.9999996739906605e-06, + "loss": 0.7609, + "step": 116 + }, + { + "epoch": 0.0071248058947112015, + "grad_norm": 0.685268150071863, + "learning_rate": 4.9999996319660195e-06, + "loss": 0.759, + "step": 117 + }, + { + "epoch": 0.007185701671589075, + "grad_norm": 0.6512632561280425, + "learning_rate": 4.999999587394432e-06, + "loss": 0.7775, + "step": 118 + }, + { + "epoch": 0.007246597448466949, + "grad_norm": 0.6437624902005535, + "learning_rate": 4.9999995402758964e-06, + "loss": 0.8006, + "step": 119 + }, + { + "epoch": 0.007307493225344823, + "grad_norm": 0.6737099934566816, + "learning_rate": 4.9999994906104135e-06, + "loss": 0.7856, + "step": 120 + }, + { + "epoch": 0.0073683890022226955, + "grad_norm": 0.655236375407972, + "learning_rate": 4.999999438397982e-06, + "loss": 0.743, + "step": 121 + }, + { + "epoch": 0.007429284779100569, + "grad_norm": 0.6829460867692894, + "learning_rate": 4.999999383638604e-06, + "loss": 0.7955, + "step": 122 + }, + { + "epoch": 0.007490180555978443, + "grad_norm": 0.6872450008851713, + "learning_rate": 4.999999326332278e-06, + "loss": 0.8011, + "step": 123 + }, + { + "epoch": 0.007551076332856317, + "grad_norm": 0.6323451453704254, + "learning_rate": 4.999999266479006e-06, + "loss": 0.7418, + "step": 124 + }, + { + "epoch": 0.00761197210973419, + "grad_norm": 0.6290339271054501, + "learning_rate": 4.999999204078785e-06, + "loss": 0.8091, + "step": 125 + }, + { + "epoch": 0.007672867886612063, + "grad_norm": 0.6495150715269111, + "learning_rate": 4.999999139131618e-06, + "loss": 0.8052, + "step": 126 + }, + { + "epoch": 0.007733763663489937, + "grad_norm": 0.6542039835413054, + "learning_rate": 4.999999071637503e-06, + "loss": 0.7447, + "step": 127 + }, + { + "epoch": 0.007794659440367811, + "grad_norm": 0.661968201697012, + "learning_rate": 4.999999001596441e-06, + "loss": 0.7957, + "step": 128 + }, + { + "epoch": 0.007855555217245683, + "grad_norm": 0.7160738512666721, + "learning_rate": 4.9999989290084324e-06, + "loss": 0.7541, + "step": 129 + }, + { + "epoch": 0.007916450994123558, + "grad_norm": 0.7122792990292478, + "learning_rate": 4.999998853873478e-06, + "loss": 0.7819, + "step": 130 + }, + { + "epoch": 0.00797734677100143, + "grad_norm": 0.6281901645388035, + "learning_rate": 4.9999987761915745e-06, + "loss": 0.7744, + "step": 131 + }, + { + "epoch": 0.008038242547879305, + "grad_norm": 0.6733062772158463, + "learning_rate": 4.999998695962725e-06, + "loss": 0.7735, + "step": 132 + }, + { + "epoch": 0.008099138324757178, + "grad_norm": 0.6632923519770085, + "learning_rate": 4.9999986131869295e-06, + "loss": 0.7773, + "step": 133 + }, + { + "epoch": 0.008160034101635051, + "grad_norm": 0.6814438859738214, + "learning_rate": 4.999998527864187e-06, + "loss": 0.7964, + "step": 134 + }, + { + "epoch": 0.008220929878512926, + "grad_norm": 0.7397127769806833, + "learning_rate": 4.9999984399944975e-06, + "loss": 0.7545, + "step": 135 + }, + { + "epoch": 0.008281825655390799, + "grad_norm": 0.6371514271193969, + "learning_rate": 4.999998349577862e-06, + "loss": 0.77, + "step": 136 + }, + { + "epoch": 0.008342721432268671, + "grad_norm": 0.7438929630053858, + "learning_rate": 4.999998256614281e-06, + "loss": 0.7888, + "step": 137 + }, + { + "epoch": 0.008403617209146546, + "grad_norm": 0.7719529834546661, + "learning_rate": 4.999998161103753e-06, + "loss": 0.7503, + "step": 138 + }, + { + "epoch": 0.008464512986024419, + "grad_norm": 0.658634004291322, + "learning_rate": 4.999998063046278e-06, + "loss": 0.7677, + "step": 139 + }, + { + "epoch": 0.008525408762902293, + "grad_norm": 0.6992507081175925, + "learning_rate": 4.9999979624418586e-06, + "loss": 0.72, + "step": 140 + }, + { + "epoch": 0.008586304539780166, + "grad_norm": 0.6605488740604673, + "learning_rate": 4.999997859290492e-06, + "loss": 0.749, + "step": 141 + }, + { + "epoch": 0.008647200316658039, + "grad_norm": 0.7171750160790671, + "learning_rate": 4.9999977535921796e-06, + "loss": 0.7752, + "step": 142 + }, + { + "epoch": 0.008708096093535914, + "grad_norm": 0.6530496500614424, + "learning_rate": 4.999997645346922e-06, + "loss": 0.7705, + "step": 143 + }, + { + "epoch": 0.008768991870413786, + "grad_norm": 0.7130315279628207, + "learning_rate": 4.999997534554719e-06, + "loss": 0.8622, + "step": 144 + }, + { + "epoch": 0.008829887647291661, + "grad_norm": 0.7544125161508873, + "learning_rate": 4.99999742121557e-06, + "loss": 0.7688, + "step": 145 + }, + { + "epoch": 0.008890783424169534, + "grad_norm": 0.6802518901542594, + "learning_rate": 4.999997305329476e-06, + "loss": 0.7766, + "step": 146 + }, + { + "epoch": 0.008951679201047407, + "grad_norm": 0.6921700626668295, + "learning_rate": 4.9999971868964355e-06, + "loss": 0.7423, + "step": 147 + }, + { + "epoch": 0.009012574977925281, + "grad_norm": 0.6901437006788196, + "learning_rate": 4.999997065916451e-06, + "loss": 0.8055, + "step": 148 + }, + { + "epoch": 0.009073470754803154, + "grad_norm": 0.6933242049538111, + "learning_rate": 4.999996942389521e-06, + "loss": 0.8123, + "step": 149 + }, + { + "epoch": 0.009134366531681029, + "grad_norm": 0.6841548888369027, + "learning_rate": 4.999996816315647e-06, + "loss": 0.765, + "step": 150 + }, + { + "epoch": 0.009195262308558902, + "grad_norm": 0.738456866140826, + "learning_rate": 4.999996687694827e-06, + "loss": 0.7103, + "step": 151 + }, + { + "epoch": 0.009256158085436774, + "grad_norm": 0.6700083821557311, + "learning_rate": 4.999996556527062e-06, + "loss": 0.7403, + "step": 152 + }, + { + "epoch": 0.009317053862314649, + "grad_norm": 0.7300100644303995, + "learning_rate": 4.999996422812354e-06, + "loss": 0.7786, + "step": 153 + }, + { + "epoch": 0.009377949639192522, + "grad_norm": 0.7012301576521878, + "learning_rate": 4.999996286550701e-06, + "loss": 0.7361, + "step": 154 + }, + { + "epoch": 0.009438845416070395, + "grad_norm": 0.6559976995671444, + "learning_rate": 4.9999961477421034e-06, + "loss": 0.7412, + "step": 155 + }, + { + "epoch": 0.00949974119294827, + "grad_norm": 0.6703323494691581, + "learning_rate": 4.999996006386563e-06, + "loss": 0.7617, + "step": 156 + }, + { + "epoch": 0.009560636969826142, + "grad_norm": 0.7587015195055649, + "learning_rate": 4.999995862484077e-06, + "loss": 0.7329, + "step": 157 + }, + { + "epoch": 0.009621532746704017, + "grad_norm": 0.7411009769887599, + "learning_rate": 4.9999957160346476e-06, + "loss": 0.7656, + "step": 158 + }, + { + "epoch": 0.00968242852358189, + "grad_norm": 0.7452934246409302, + "learning_rate": 4.999995567038275e-06, + "loss": 0.756, + "step": 159 + }, + { + "epoch": 0.009743324300459762, + "grad_norm": 0.71031818418992, + "learning_rate": 4.999995415494959e-06, + "loss": 0.7395, + "step": 160 + }, + { + "epoch": 0.009804220077337637, + "grad_norm": 0.7235057867877875, + "learning_rate": 4.999995261404699e-06, + "loss": 0.7627, + "step": 161 + }, + { + "epoch": 0.00986511585421551, + "grad_norm": 0.6731178777700476, + "learning_rate": 4.999995104767497e-06, + "loss": 0.7601, + "step": 162 + }, + { + "epoch": 0.009926011631093384, + "grad_norm": 0.7028183845454333, + "learning_rate": 4.99999494558335e-06, + "loss": 0.7239, + "step": 163 + }, + { + "epoch": 0.009986907407971257, + "grad_norm": 0.6656216184227579, + "learning_rate": 4.9999947838522615e-06, + "loss": 0.7534, + "step": 164 + }, + { + "epoch": 0.01004780318484913, + "grad_norm": 0.7186416210405268, + "learning_rate": 4.9999946195742295e-06, + "loss": 0.7387, + "step": 165 + }, + { + "epoch": 0.010108698961727005, + "grad_norm": 0.6638945421808334, + "learning_rate": 4.999994452749256e-06, + "loss": 0.7372, + "step": 166 + }, + { + "epoch": 0.010169594738604877, + "grad_norm": 0.6927389901084501, + "learning_rate": 4.99999428337734e-06, + "loss": 0.7426, + "step": 167 + }, + { + "epoch": 0.010230490515482752, + "grad_norm": 0.6661595960886136, + "learning_rate": 4.999994111458481e-06, + "loss": 0.6729, + "step": 168 + }, + { + "epoch": 0.010291386292360625, + "grad_norm": 0.6830685353785078, + "learning_rate": 4.999993936992681e-06, + "loss": 0.746, + "step": 169 + }, + { + "epoch": 0.010352282069238498, + "grad_norm": 0.7088937701487941, + "learning_rate": 4.999993759979938e-06, + "loss": 0.7377, + "step": 170 + }, + { + "epoch": 0.010413177846116372, + "grad_norm": 0.7365563605281285, + "learning_rate": 4.999993580420254e-06, + "loss": 0.6641, + "step": 171 + }, + { + "epoch": 0.010474073622994245, + "grad_norm": 0.7002862888633864, + "learning_rate": 4.999993398313629e-06, + "loss": 0.7457, + "step": 172 + }, + { + "epoch": 0.01053496939987212, + "grad_norm": 0.7055463561808132, + "learning_rate": 4.999993213660062e-06, + "loss": 0.7409, + "step": 173 + }, + { + "epoch": 0.010595865176749993, + "grad_norm": 0.7268351427171649, + "learning_rate": 4.999993026459553e-06, + "loss": 0.7227, + "step": 174 + }, + { + "epoch": 0.010656760953627865, + "grad_norm": 0.6972055186960928, + "learning_rate": 4.999992836712104e-06, + "loss": 0.7249, + "step": 175 + }, + { + "epoch": 0.01071765673050574, + "grad_norm": 0.7284296199886579, + "learning_rate": 4.999992644417715e-06, + "loss": 0.7425, + "step": 176 + }, + { + "epoch": 0.010778552507383613, + "grad_norm": 0.7450000144631348, + "learning_rate": 4.999992449576384e-06, + "loss": 0.6667, + "step": 177 + }, + { + "epoch": 0.010839448284261486, + "grad_norm": 0.6702586477860759, + "learning_rate": 4.999992252188114e-06, + "loss": 0.737, + "step": 178 + }, + { + "epoch": 0.01090034406113936, + "grad_norm": 0.7276886097678802, + "learning_rate": 4.999992052252904e-06, + "loss": 0.6886, + "step": 179 + }, + { + "epoch": 0.010961239838017233, + "grad_norm": 0.7225606522968886, + "learning_rate": 4.9999918497707525e-06, + "loss": 0.7279, + "step": 180 + }, + { + "epoch": 0.011022135614895108, + "grad_norm": 0.6800258954571242, + "learning_rate": 4.999991644741662e-06, + "loss": 0.6573, + "step": 181 + }, + { + "epoch": 0.01108303139177298, + "grad_norm": 0.7479304269240068, + "learning_rate": 4.999991437165632e-06, + "loss": 0.7354, + "step": 182 + }, + { + "epoch": 0.011143927168650853, + "grad_norm": 0.7468248698179674, + "learning_rate": 4.999991227042663e-06, + "loss": 0.7741, + "step": 183 + }, + { + "epoch": 0.011204822945528728, + "grad_norm": 0.7381839443912191, + "learning_rate": 4.999991014372755e-06, + "loss": 0.7812, + "step": 184 + }, + { + "epoch": 0.0112657187224066, + "grad_norm": 0.7351804188410662, + "learning_rate": 4.999990799155908e-06, + "loss": 0.7668, + "step": 185 + }, + { + "epoch": 0.011326614499284475, + "grad_norm": 0.7128303002347449, + "learning_rate": 4.999990581392122e-06, + "loss": 0.7439, + "step": 186 + }, + { + "epoch": 0.011387510276162348, + "grad_norm": 0.7192879692104173, + "learning_rate": 4.999990361081398e-06, + "loss": 0.7388, + "step": 187 + }, + { + "epoch": 0.011448406053040221, + "grad_norm": 0.7368820906153425, + "learning_rate": 4.999990138223736e-06, + "loss": 0.7213, + "step": 188 + }, + { + "epoch": 0.011509301829918096, + "grad_norm": 0.735237081482363, + "learning_rate": 4.999989912819135e-06, + "loss": 0.7792, + "step": 189 + }, + { + "epoch": 0.011570197606795968, + "grad_norm": 0.7626984494359809, + "learning_rate": 4.999989684867598e-06, + "loss": 0.7494, + "step": 190 + }, + { + "epoch": 0.011631093383673843, + "grad_norm": 0.7559286289168233, + "learning_rate": 4.999989454369122e-06, + "loss": 0.7788, + "step": 191 + }, + { + "epoch": 0.011691989160551716, + "grad_norm": 0.774095413744315, + "learning_rate": 4.999989221323709e-06, + "loss": 0.7185, + "step": 192 + }, + { + "epoch": 0.011752884937429589, + "grad_norm": 0.6982704051138752, + "learning_rate": 4.9999889857313585e-06, + "loss": 0.7331, + "step": 193 + }, + { + "epoch": 0.011813780714307463, + "grad_norm": 0.6979326595216816, + "learning_rate": 4.999988747592073e-06, + "loss": 0.7229, + "step": 194 + }, + { + "epoch": 0.011874676491185336, + "grad_norm": 0.7532191780884344, + "learning_rate": 4.999988506905849e-06, + "loss": 0.7282, + "step": 195 + }, + { + "epoch": 0.011935572268063209, + "grad_norm": 0.6907249300335847, + "learning_rate": 4.99998826367269e-06, + "loss": 0.7211, + "step": 196 + }, + { + "epoch": 0.011996468044941084, + "grad_norm": 0.7517778917690918, + "learning_rate": 4.9999880178925945e-06, + "loss": 0.7515, + "step": 197 + }, + { + "epoch": 0.012057363821818956, + "grad_norm": 0.7176994339981974, + "learning_rate": 4.999987769565563e-06, + "loss": 0.7661, + "step": 198 + }, + { + "epoch": 0.012118259598696831, + "grad_norm": 0.7295939588365645, + "learning_rate": 4.999987518691595e-06, + "loss": 0.7175, + "step": 199 + }, + { + "epoch": 0.012179155375574704, + "grad_norm": 0.7999127501197879, + "learning_rate": 4.9999872652706925e-06, + "loss": 0.7044, + "step": 200 + }, + { + "epoch": 0.012240051152452577, + "grad_norm": 0.6922281983934921, + "learning_rate": 4.999987009302856e-06, + "loss": 0.7247, + "step": 201 + }, + { + "epoch": 0.012300946929330451, + "grad_norm": 0.7079161306807291, + "learning_rate": 4.999986750788084e-06, + "loss": 0.6779, + "step": 202 + }, + { + "epoch": 0.012361842706208324, + "grad_norm": 0.6801442855984485, + "learning_rate": 4.9999864897263765e-06, + "loss": 0.6863, + "step": 203 + }, + { + "epoch": 0.012422738483086199, + "grad_norm": 0.7283869699903739, + "learning_rate": 4.999986226117735e-06, + "loss": 0.7491, + "step": 204 + }, + { + "epoch": 0.012483634259964072, + "grad_norm": 0.7362100167076587, + "learning_rate": 4.999985959962161e-06, + "loss": 0.6756, + "step": 205 + }, + { + "epoch": 0.012544530036841944, + "grad_norm": 0.7206450618922078, + "learning_rate": 4.999985691259652e-06, + "loss": 0.7533, + "step": 206 + }, + { + "epoch": 0.012605425813719819, + "grad_norm": 0.7939605905912132, + "learning_rate": 4.99998542001021e-06, + "loss": 0.7536, + "step": 207 + }, + { + "epoch": 0.012666321590597692, + "grad_norm": 0.7820908083479423, + "learning_rate": 4.999985146213835e-06, + "loss": 0.7388, + "step": 208 + }, + { + "epoch": 0.012727217367475566, + "grad_norm": 0.7637260642072888, + "learning_rate": 4.999984869870527e-06, + "loss": 0.7277, + "step": 209 + }, + { + "epoch": 0.01278811314435344, + "grad_norm": 0.6837686847407585, + "learning_rate": 4.999984590980285e-06, + "loss": 0.7043, + "step": 210 + }, + { + "epoch": 0.012849008921231312, + "grad_norm": 0.7541234576238729, + "learning_rate": 4.999984309543113e-06, + "loss": 0.7302, + "step": 211 + }, + { + "epoch": 0.012909904698109187, + "grad_norm": 0.7293000900276323, + "learning_rate": 4.999984025559007e-06, + "loss": 0.7178, + "step": 212 + }, + { + "epoch": 0.01297080047498706, + "grad_norm": 0.7724389324980305, + "learning_rate": 4.999983739027971e-06, + "loss": 0.6974, + "step": 213 + }, + { + "epoch": 0.013031696251864934, + "grad_norm": 0.7679095595828379, + "learning_rate": 4.9999834499500025e-06, + "loss": 0.7225, + "step": 214 + }, + { + "epoch": 0.013092592028742807, + "grad_norm": 0.763849029802229, + "learning_rate": 4.999983158325103e-06, + "loss": 0.7417, + "step": 215 + }, + { + "epoch": 0.01315348780562068, + "grad_norm": 0.7896744330689136, + "learning_rate": 4.999982864153272e-06, + "loss": 0.6555, + "step": 216 + }, + { + "epoch": 0.013214383582498554, + "grad_norm": 0.7518091043051811, + "learning_rate": 4.999982567434511e-06, + "loss": 0.6608, + "step": 217 + }, + { + "epoch": 0.013275279359376427, + "grad_norm": 0.7217134085677327, + "learning_rate": 4.99998226816882e-06, + "loss": 0.6772, + "step": 218 + }, + { + "epoch": 0.0133361751362543, + "grad_norm": 0.7660022936244933, + "learning_rate": 4.999981966356199e-06, + "loss": 0.7223, + "step": 219 + }, + { + "epoch": 0.013397070913132175, + "grad_norm": 0.8043673906066173, + "learning_rate": 4.999981661996648e-06, + "loss": 0.6696, + "step": 220 + }, + { + "epoch": 0.013457966690010047, + "grad_norm": 0.7726735235289754, + "learning_rate": 4.999981355090167e-06, + "loss": 0.7308, + "step": 221 + }, + { + "epoch": 0.013518862466887922, + "grad_norm": 0.751614647030011, + "learning_rate": 4.999981045636758e-06, + "loss": 0.6988, + "step": 222 + }, + { + "epoch": 0.013579758243765795, + "grad_norm": 0.7813709504981456, + "learning_rate": 4.999980733636421e-06, + "loss": 0.7198, + "step": 223 + }, + { + "epoch": 0.013640654020643668, + "grad_norm": 0.778072375539278, + "learning_rate": 4.999980419089154e-06, + "loss": 0.7183, + "step": 224 + }, + { + "epoch": 0.013701549797521542, + "grad_norm": 0.707481014697059, + "learning_rate": 4.99998010199496e-06, + "loss": 0.7296, + "step": 225 + }, + { + "epoch": 0.013762445574399415, + "grad_norm": 0.6722953148537729, + "learning_rate": 4.999979782353838e-06, + "loss": 0.7127, + "step": 226 + }, + { + "epoch": 0.01382334135127729, + "grad_norm": 0.7562454337840977, + "learning_rate": 4.999979460165788e-06, + "loss": 0.7345, + "step": 227 + }, + { + "epoch": 0.013884237128155162, + "grad_norm": 0.8015498144333107, + "learning_rate": 4.999979135430812e-06, + "loss": 0.6884, + "step": 228 + }, + { + "epoch": 0.013945132905033035, + "grad_norm": 0.7689328295369451, + "learning_rate": 4.999978808148908e-06, + "loss": 0.7084, + "step": 229 + }, + { + "epoch": 0.01400602868191091, + "grad_norm": 0.7330568283569818, + "learning_rate": 4.999978478320078e-06, + "loss": 0.6386, + "step": 230 + }, + { + "epoch": 0.014066924458788783, + "grad_norm": 0.7853218550645573, + "learning_rate": 4.999978145944322e-06, + "loss": 0.711, + "step": 231 + }, + { + "epoch": 0.014127820235666657, + "grad_norm": 0.7928018634524029, + "learning_rate": 4.9999778110216405e-06, + "loss": 0.6643, + "step": 232 + }, + { + "epoch": 0.01418871601254453, + "grad_norm": 0.8175395654488663, + "learning_rate": 4.999977473552034e-06, + "loss": 0.6837, + "step": 233 + }, + { + "epoch": 0.014249611789422403, + "grad_norm": 0.7349920139248245, + "learning_rate": 4.9999771335355005e-06, + "loss": 0.7012, + "step": 234 + }, + { + "epoch": 0.014310507566300278, + "grad_norm": 0.7013419658758762, + "learning_rate": 4.999976790972044e-06, + "loss": 0.6687, + "step": 235 + }, + { + "epoch": 0.01437140334317815, + "grad_norm": 0.7258687937921421, + "learning_rate": 4.9999764458616625e-06, + "loss": 0.6783, + "step": 236 + }, + { + "epoch": 0.014432299120056023, + "grad_norm": 0.7941293289605419, + "learning_rate": 4.999976098204358e-06, + "loss": 0.7409, + "step": 237 + }, + { + "epoch": 0.014493194896933898, + "grad_norm": 0.7322461764813147, + "learning_rate": 4.999975748000129e-06, + "loss": 0.6797, + "step": 238 + }, + { + "epoch": 0.01455409067381177, + "grad_norm": 0.6994558154611148, + "learning_rate": 4.999975395248976e-06, + "loss": 0.7129, + "step": 239 + }, + { + "epoch": 0.014614986450689645, + "grad_norm": 0.7468726168659512, + "learning_rate": 4.9999750399509005e-06, + "loss": 0.6933, + "step": 240 + }, + { + "epoch": 0.014675882227567518, + "grad_norm": 0.7246430714836691, + "learning_rate": 4.999974682105903e-06, + "loss": 0.7282, + "step": 241 + }, + { + "epoch": 0.014736778004445391, + "grad_norm": 0.778010169433594, + "learning_rate": 4.999974321713983e-06, + "loss": 0.6623, + "step": 242 + }, + { + "epoch": 0.014797673781323266, + "grad_norm": 0.7264859630625451, + "learning_rate": 4.99997395877514e-06, + "loss": 0.6392, + "step": 243 + }, + { + "epoch": 0.014858569558201138, + "grad_norm": 0.7953761232979301, + "learning_rate": 4.999973593289378e-06, + "loss": 0.6854, + "step": 244 + }, + { + "epoch": 0.014919465335079013, + "grad_norm": 0.8169468837231547, + "learning_rate": 4.999973225256693e-06, + "loss": 0.7259, + "step": 245 + }, + { + "epoch": 0.014980361111956886, + "grad_norm": 0.792524461048797, + "learning_rate": 4.999972854677087e-06, + "loss": 0.7046, + "step": 246 + }, + { + "epoch": 0.015041256888834759, + "grad_norm": 0.7940635197160933, + "learning_rate": 4.999972481550562e-06, + "loss": 0.7874, + "step": 247 + }, + { + "epoch": 0.015102152665712633, + "grad_norm": 0.7311284478399898, + "learning_rate": 4.9999721058771165e-06, + "loss": 0.6631, + "step": 248 + }, + { + "epoch": 0.015163048442590506, + "grad_norm": 0.7400526588256793, + "learning_rate": 4.99997172765675e-06, + "loss": 0.6494, + "step": 249 + }, + { + "epoch": 0.01522394421946838, + "grad_norm": 0.796559565322799, + "learning_rate": 4.999971346889466e-06, + "loss": 0.6853, + "step": 250 + }, + { + "epoch": 0.015284839996346253, + "grad_norm": 0.7719225920603545, + "learning_rate": 4.999970963575263e-06, + "loss": 0.6911, + "step": 251 + }, + { + "epoch": 0.015345735773224126, + "grad_norm": 0.8260605781988979, + "learning_rate": 4.999970577714141e-06, + "loss": 0.7203, + "step": 252 + }, + { + "epoch": 0.015406631550102001, + "grad_norm": 0.7720637369342904, + "learning_rate": 4.999970189306101e-06, + "loss": 0.654, + "step": 253 + }, + { + "epoch": 0.015467527326979874, + "grad_norm": 0.7839169050574772, + "learning_rate": 4.999969798351144e-06, + "loss": 0.6919, + "step": 254 + }, + { + "epoch": 0.015528423103857748, + "grad_norm": 0.8148309106221141, + "learning_rate": 4.999969404849268e-06, + "loss": 0.6542, + "step": 255 + }, + { + "epoch": 0.015589318880735621, + "grad_norm": 0.7853665700861517, + "learning_rate": 4.999969008800477e-06, + "loss": 0.7079, + "step": 256 + }, + { + "epoch": 0.015650214657613494, + "grad_norm": 0.744231971596786, + "learning_rate": 4.999968610204769e-06, + "loss": 0.6509, + "step": 257 + }, + { + "epoch": 0.015711110434491367, + "grad_norm": 0.855620516744312, + "learning_rate": 4.999968209062145e-06, + "loss": 0.6607, + "step": 258 + }, + { + "epoch": 0.015772006211369243, + "grad_norm": 0.7339662937019528, + "learning_rate": 4.999967805372605e-06, + "loss": 0.6427, + "step": 259 + }, + { + "epoch": 0.015832901988247116, + "grad_norm": 0.738609131906306, + "learning_rate": 4.999967399136151e-06, + "loss": 0.6819, + "step": 260 + }, + { + "epoch": 0.01589379776512499, + "grad_norm": 0.7867287424343871, + "learning_rate": 4.99996699035278e-06, + "loss": 0.6494, + "step": 261 + }, + { + "epoch": 0.01595469354200286, + "grad_norm": 0.7354194659393885, + "learning_rate": 4.999966579022497e-06, + "loss": 0.6913, + "step": 262 + }, + { + "epoch": 0.016015589318880735, + "grad_norm": 0.7819926810876368, + "learning_rate": 4.999966165145298e-06, + "loss": 0.7125, + "step": 263 + }, + { + "epoch": 0.01607648509575861, + "grad_norm": 0.7054957363647959, + "learning_rate": 4.999965748721187e-06, + "loss": 0.6564, + "step": 264 + }, + { + "epoch": 0.016137380872636484, + "grad_norm": 0.7217517075926269, + "learning_rate": 4.9999653297501615e-06, + "loss": 0.66, + "step": 265 + }, + { + "epoch": 0.016198276649514357, + "grad_norm": 0.7094708912526655, + "learning_rate": 4.999964908232224e-06, + "loss": 0.6605, + "step": 266 + }, + { + "epoch": 0.01625917242639223, + "grad_norm": 0.7163070332356367, + "learning_rate": 4.9999644841673745e-06, + "loss": 0.6842, + "step": 267 + }, + { + "epoch": 0.016320068203270102, + "grad_norm": 0.7790964287377811, + "learning_rate": 4.999964057555613e-06, + "loss": 0.7121, + "step": 268 + }, + { + "epoch": 0.016380963980147975, + "grad_norm": 0.8366744934298782, + "learning_rate": 4.99996362839694e-06, + "loss": 0.749, + "step": 269 + }, + { + "epoch": 0.01644185975702585, + "grad_norm": 0.7626826126815852, + "learning_rate": 4.999963196691355e-06, + "loss": 0.6751, + "step": 270 + }, + { + "epoch": 0.016502755533903724, + "grad_norm": 0.7951658830117461, + "learning_rate": 4.999962762438861e-06, + "loss": 0.6711, + "step": 271 + }, + { + "epoch": 0.016563651310781597, + "grad_norm": 0.7309595516242022, + "learning_rate": 4.9999623256394565e-06, + "loss": 0.7304, + "step": 272 + }, + { + "epoch": 0.01662454708765947, + "grad_norm": 0.7608699663306677, + "learning_rate": 4.9999618862931424e-06, + "loss": 0.7223, + "step": 273 + }, + { + "epoch": 0.016685442864537343, + "grad_norm": 0.804576193407754, + "learning_rate": 4.999961444399919e-06, + "loss": 0.6703, + "step": 274 + }, + { + "epoch": 0.01674633864141522, + "grad_norm": 0.8268714208076846, + "learning_rate": 4.999960999959787e-06, + "loss": 0.6865, + "step": 275 + }, + { + "epoch": 0.016807234418293092, + "grad_norm": 0.7632156726774376, + "learning_rate": 4.999960552972746e-06, + "loss": 0.6993, + "step": 276 + }, + { + "epoch": 0.016868130195170965, + "grad_norm": 0.7542677251057993, + "learning_rate": 4.999960103438798e-06, + "loss": 0.6792, + "step": 277 + }, + { + "epoch": 0.016929025972048838, + "grad_norm": 0.8387648177951366, + "learning_rate": 4.9999596513579416e-06, + "loss": 0.6851, + "step": 278 + }, + { + "epoch": 0.01698992174892671, + "grad_norm": 0.694673133486192, + "learning_rate": 4.99995919673018e-06, + "loss": 0.6624, + "step": 279 + }, + { + "epoch": 0.017050817525804587, + "grad_norm": 0.8133784896972817, + "learning_rate": 4.99995873955551e-06, + "loss": 0.6576, + "step": 280 + }, + { + "epoch": 0.01711171330268246, + "grad_norm": 0.8273860908783668, + "learning_rate": 4.999958279833936e-06, + "loss": 0.6924, + "step": 281 + }, + { + "epoch": 0.017172609079560332, + "grad_norm": 0.8277678773537923, + "learning_rate": 4.999957817565455e-06, + "loss": 0.7333, + "step": 282 + }, + { + "epoch": 0.017233504856438205, + "grad_norm": 0.8328156500701911, + "learning_rate": 4.999957352750069e-06, + "loss": 0.6616, + "step": 283 + }, + { + "epoch": 0.017294400633316078, + "grad_norm": 0.7669797247375859, + "learning_rate": 4.999956885387779e-06, + "loss": 0.691, + "step": 284 + }, + { + "epoch": 0.017355296410193954, + "grad_norm": 0.7933377384491528, + "learning_rate": 4.999956415478584e-06, + "loss": 0.6987, + "step": 285 + }, + { + "epoch": 0.017416192187071827, + "grad_norm": 0.8116884979172979, + "learning_rate": 4.9999559430224865e-06, + "loss": 0.6351, + "step": 286 + }, + { + "epoch": 0.0174770879639497, + "grad_norm": 0.7843909400630843, + "learning_rate": 4.999955468019485e-06, + "loss": 0.7254, + "step": 287 + }, + { + "epoch": 0.017537983740827573, + "grad_norm": 0.755313476991527, + "learning_rate": 4.9999549904695815e-06, + "loss": 0.7352, + "step": 288 + }, + { + "epoch": 0.017598879517705446, + "grad_norm": 0.7083760414508964, + "learning_rate": 4.999954510372776e-06, + "loss": 0.702, + "step": 289 + }, + { + "epoch": 0.017659775294583322, + "grad_norm": 0.7830708445309442, + "learning_rate": 4.999954027729068e-06, + "loss": 0.637, + "step": 290 + }, + { + "epoch": 0.017720671071461195, + "grad_norm": 0.8026591907283346, + "learning_rate": 4.9999535425384595e-06, + "loss": 0.6624, + "step": 291 + }, + { + "epoch": 0.017781566848339068, + "grad_norm": 0.7398162509596033, + "learning_rate": 4.99995305480095e-06, + "loss": 0.6971, + "step": 292 + }, + { + "epoch": 0.01784246262521694, + "grad_norm": 0.849608682059187, + "learning_rate": 4.99995256451654e-06, + "loss": 0.7151, + "step": 293 + }, + { + "epoch": 0.017903358402094813, + "grad_norm": 0.8078020917319928, + "learning_rate": 4.999952071685231e-06, + "loss": 0.7479, + "step": 294 + }, + { + "epoch": 0.01796425417897269, + "grad_norm": 0.7747287091256054, + "learning_rate": 4.9999515763070224e-06, + "loss": 0.705, + "step": 295 + }, + { + "epoch": 0.018025149955850563, + "grad_norm": 0.7507041980159189, + "learning_rate": 4.999951078381915e-06, + "loss": 0.6359, + "step": 296 + }, + { + "epoch": 0.018086045732728435, + "grad_norm": 0.8154290929181294, + "learning_rate": 4.99995057790991e-06, + "loss": 0.7149, + "step": 297 + }, + { + "epoch": 0.01814694150960631, + "grad_norm": 0.7616529413229899, + "learning_rate": 4.999950074891007e-06, + "loss": 0.6635, + "step": 298 + }, + { + "epoch": 0.01820783728648418, + "grad_norm": 0.7770854590693149, + "learning_rate": 4.999949569325206e-06, + "loss": 0.6445, + "step": 299 + }, + { + "epoch": 0.018268733063362057, + "grad_norm": 0.7794103786448381, + "learning_rate": 4.999949061212509e-06, + "loss": 0.7052, + "step": 300 + }, + { + "epoch": 0.01832962884023993, + "grad_norm": 0.7067674189412413, + "learning_rate": 4.999948550552916e-06, + "loss": 0.6475, + "step": 301 + }, + { + "epoch": 0.018390524617117803, + "grad_norm": 0.7642864830013847, + "learning_rate": 4.999948037346428e-06, + "loss": 0.6796, + "step": 302 + }, + { + "epoch": 0.018451420393995676, + "grad_norm": 0.7693880119536499, + "learning_rate": 4.9999475215930434e-06, + "loss": 0.6664, + "step": 303 + }, + { + "epoch": 0.01851231617087355, + "grad_norm": 0.7728419105665237, + "learning_rate": 4.999947003292766e-06, + "loss": 0.6919, + "step": 304 + }, + { + "epoch": 0.018573211947751425, + "grad_norm": 0.7535372175710863, + "learning_rate": 4.9999464824455936e-06, + "loss": 0.6505, + "step": 305 + }, + { + "epoch": 0.018634107724629298, + "grad_norm": 0.8302332353833313, + "learning_rate": 4.999945959051527e-06, + "loss": 0.6686, + "step": 306 + }, + { + "epoch": 0.01869500350150717, + "grad_norm": 0.8065572425218912, + "learning_rate": 4.999945433110569e-06, + "loss": 0.6692, + "step": 307 + }, + { + "epoch": 0.018755899278385044, + "grad_norm": 0.7452052857864022, + "learning_rate": 4.999944904622718e-06, + "loss": 0.6713, + "step": 308 + }, + { + "epoch": 0.018816795055262917, + "grad_norm": 0.8100509913545736, + "learning_rate": 4.999944373587974e-06, + "loss": 0.6211, + "step": 309 + }, + { + "epoch": 0.01887769083214079, + "grad_norm": 0.8381308714422074, + "learning_rate": 4.99994384000634e-06, + "loss": 0.6827, + "step": 310 + }, + { + "epoch": 0.018938586609018666, + "grad_norm": 0.7948504658516677, + "learning_rate": 4.999943303877814e-06, + "loss": 0.7021, + "step": 311 + }, + { + "epoch": 0.01899948238589654, + "grad_norm": 0.7992771295231328, + "learning_rate": 4.999942765202399e-06, + "loss": 0.693, + "step": 312 + }, + { + "epoch": 0.01906037816277441, + "grad_norm": 0.7737377698601231, + "learning_rate": 4.999942223980094e-06, + "loss": 0.708, + "step": 313 + }, + { + "epoch": 0.019121273939652284, + "grad_norm": 0.7643948245435148, + "learning_rate": 4.999941680210899e-06, + "loss": 0.701, + "step": 314 + }, + { + "epoch": 0.019182169716530157, + "grad_norm": 0.8202366713481364, + "learning_rate": 4.999941133894816e-06, + "loss": 0.6673, + "step": 315 + }, + { + "epoch": 0.019243065493408033, + "grad_norm": 0.7493324617677929, + "learning_rate": 4.999940585031845e-06, + "loss": 0.6509, + "step": 316 + }, + { + "epoch": 0.019303961270285906, + "grad_norm": 0.7397035566441604, + "learning_rate": 4.9999400336219865e-06, + "loss": 0.6529, + "step": 317 + }, + { + "epoch": 0.01936485704716378, + "grad_norm": 0.8067959666796467, + "learning_rate": 4.999939479665241e-06, + "loss": 0.7073, + "step": 318 + }, + { + "epoch": 0.019425752824041652, + "grad_norm": 0.8446904156112273, + "learning_rate": 4.999938923161609e-06, + "loss": 0.6674, + "step": 319 + }, + { + "epoch": 0.019486648600919525, + "grad_norm": 0.8248495149429563, + "learning_rate": 4.999938364111092e-06, + "loss": 0.6915, + "step": 320 + }, + { + "epoch": 0.0195475443777974, + "grad_norm": 0.8457527035021967, + "learning_rate": 4.999937802513689e-06, + "loss": 0.6336, + "step": 321 + }, + { + "epoch": 0.019608440154675274, + "grad_norm": 0.9102832931052375, + "learning_rate": 4.999937238369401e-06, + "loss": 0.6504, + "step": 322 + }, + { + "epoch": 0.019669335931553147, + "grad_norm": 0.8723419850799531, + "learning_rate": 4.999936671678229e-06, + "loss": 0.6275, + "step": 323 + }, + { + "epoch": 0.01973023170843102, + "grad_norm": 0.8620196142082291, + "learning_rate": 4.999936102440174e-06, + "loss": 0.6495, + "step": 324 + }, + { + "epoch": 0.019791127485308892, + "grad_norm": 0.7989741024823176, + "learning_rate": 4.999935530655237e-06, + "loss": 0.6399, + "step": 325 + }, + { + "epoch": 0.01985202326218677, + "grad_norm": 0.7950548359449368, + "learning_rate": 4.999934956323415e-06, + "loss": 0.6364, + "step": 326 + }, + { + "epoch": 0.01991291903906464, + "grad_norm": 0.8332394553546462, + "learning_rate": 4.999934379444714e-06, + "loss": 0.6533, + "step": 327 + }, + { + "epoch": 0.019973814815942514, + "grad_norm": 0.8578002368884328, + "learning_rate": 4.99993380001913e-06, + "loss": 0.7001, + "step": 328 + }, + { + "epoch": 0.020034710592820387, + "grad_norm": 0.8110497984296026, + "learning_rate": 4.999933218046667e-06, + "loss": 0.6798, + "step": 329 + }, + { + "epoch": 0.02009560636969826, + "grad_norm": 0.9201520261127892, + "learning_rate": 4.999932633527322e-06, + "loss": 0.6805, + "step": 330 + }, + { + "epoch": 0.020156502146576136, + "grad_norm": 0.8504251932566226, + "learning_rate": 4.999932046461099e-06, + "loss": 0.6866, + "step": 331 + }, + { + "epoch": 0.02021739792345401, + "grad_norm": 0.7991206341289229, + "learning_rate": 4.999931456847998e-06, + "loss": 0.6775, + "step": 332 + }, + { + "epoch": 0.020278293700331882, + "grad_norm": 0.8247108538023933, + "learning_rate": 4.999930864688016e-06, + "loss": 0.7175, + "step": 333 + }, + { + "epoch": 0.020339189477209755, + "grad_norm": 0.7772316142576166, + "learning_rate": 4.9999302699811595e-06, + "loss": 0.6883, + "step": 334 + }, + { + "epoch": 0.020400085254087628, + "grad_norm": 0.8188218511497163, + "learning_rate": 4.999929672727424e-06, + "loss": 0.6635, + "step": 335 + }, + { + "epoch": 0.020460981030965504, + "grad_norm": 0.8352994588630622, + "learning_rate": 4.999929072926812e-06, + "loss": 0.6332, + "step": 336 + }, + { + "epoch": 0.020521876807843377, + "grad_norm": 0.7869442911377629, + "learning_rate": 4.999928470579326e-06, + "loss": 0.689, + "step": 337 + }, + { + "epoch": 0.02058277258472125, + "grad_norm": 0.8153634551720391, + "learning_rate": 4.999927865684963e-06, + "loss": 0.6781, + "step": 338 + }, + { + "epoch": 0.020643668361599123, + "grad_norm": 0.7593974657912167, + "learning_rate": 4.999927258243727e-06, + "loss": 0.6473, + "step": 339 + }, + { + "epoch": 0.020704564138476995, + "grad_norm": 0.8222330936101717, + "learning_rate": 4.999926648255616e-06, + "loss": 0.6833, + "step": 340 + }, + { + "epoch": 0.020765459915354872, + "grad_norm": 0.7880456730025663, + "learning_rate": 4.999926035720633e-06, + "loss": 0.6331, + "step": 341 + }, + { + "epoch": 0.020826355692232745, + "grad_norm": 0.8423418522027794, + "learning_rate": 4.999925420638776e-06, + "loss": 0.6695, + "step": 342 + }, + { + "epoch": 0.020887251469110617, + "grad_norm": 0.8273999818990211, + "learning_rate": 4.999924803010047e-06, + "loss": 0.6733, + "step": 343 + }, + { + "epoch": 0.02094814724598849, + "grad_norm": 0.8068359260397066, + "learning_rate": 4.999924182834447e-06, + "loss": 0.6131, + "step": 344 + }, + { + "epoch": 0.021009043022866363, + "grad_norm": 0.8444209022615488, + "learning_rate": 4.999923560111977e-06, + "loss": 0.6655, + "step": 345 + }, + { + "epoch": 0.02106993879974424, + "grad_norm": 0.7678009798881188, + "learning_rate": 4.999922934842636e-06, + "loss": 0.6314, + "step": 346 + }, + { + "epoch": 0.021130834576622112, + "grad_norm": 0.7838240410282088, + "learning_rate": 4.999922307026425e-06, + "loss": 0.6676, + "step": 347 + }, + { + "epoch": 0.021191730353499985, + "grad_norm": 0.8078629063928293, + "learning_rate": 4.999921676663345e-06, + "loss": 0.7461, + "step": 348 + }, + { + "epoch": 0.021252626130377858, + "grad_norm": 0.7668198901833109, + "learning_rate": 4.999921043753398e-06, + "loss": 0.6222, + "step": 349 + }, + { + "epoch": 0.02131352190725573, + "grad_norm": 0.9078761397001247, + "learning_rate": 4.999920408296582e-06, + "loss": 0.6603, + "step": 350 + }, + { + "epoch": 0.021374417684133604, + "grad_norm": 0.7818911046746919, + "learning_rate": 4.9999197702929e-06, + "loss": 0.668, + "step": 351 + }, + { + "epoch": 0.02143531346101148, + "grad_norm": 0.8191784430566436, + "learning_rate": 4.999919129742352e-06, + "loss": 0.6599, + "step": 352 + }, + { + "epoch": 0.021496209237889353, + "grad_norm": 0.783071107294683, + "learning_rate": 4.999918486644938e-06, + "loss": 0.6581, + "step": 353 + }, + { + "epoch": 0.021557105014767226, + "grad_norm": 0.7517410234914055, + "learning_rate": 4.999917841000659e-06, + "loss": 0.6571, + "step": 354 + }, + { + "epoch": 0.0216180007916451, + "grad_norm": 0.7625094896728111, + "learning_rate": 4.999917192809516e-06, + "loss": 0.6256, + "step": 355 + }, + { + "epoch": 0.02167889656852297, + "grad_norm": 0.8096196240480884, + "learning_rate": 4.999916542071509e-06, + "loss": 0.64, + "step": 356 + }, + { + "epoch": 0.021739792345400848, + "grad_norm": 0.8479246369556783, + "learning_rate": 4.999915888786639e-06, + "loss": 0.706, + "step": 357 + }, + { + "epoch": 0.02180068812227872, + "grad_norm": 0.8228950423793091, + "learning_rate": 4.999915232954906e-06, + "loss": 0.6632, + "step": 358 + }, + { + "epoch": 0.021861583899156593, + "grad_norm": 0.7975163297341146, + "learning_rate": 4.999914574576313e-06, + "loss": 0.5989, + "step": 359 + }, + { + "epoch": 0.021922479676034466, + "grad_norm": 0.8884259076325945, + "learning_rate": 4.999913913650858e-06, + "loss": 0.674, + "step": 360 + }, + { + "epoch": 0.02198337545291234, + "grad_norm": 0.8486122524768349, + "learning_rate": 4.9999132501785424e-06, + "loss": 0.7057, + "step": 361 + }, + { + "epoch": 0.022044271229790215, + "grad_norm": 0.9128520789758711, + "learning_rate": 4.999912584159368e-06, + "loss": 0.6989, + "step": 362 + }, + { + "epoch": 0.022105167006668088, + "grad_norm": 0.7805790238367319, + "learning_rate": 4.999911915593334e-06, + "loss": 0.5996, + "step": 363 + }, + { + "epoch": 0.02216606278354596, + "grad_norm": 0.7661732478003527, + "learning_rate": 4.999911244480441e-06, + "loss": 0.6946, + "step": 364 + }, + { + "epoch": 0.022226958560423834, + "grad_norm": 0.7783461548955787, + "learning_rate": 4.999910570820692e-06, + "loss": 0.6234, + "step": 365 + }, + { + "epoch": 0.022287854337301707, + "grad_norm": 0.8039747208398055, + "learning_rate": 4.999909894614085e-06, + "loss": 0.6623, + "step": 366 + }, + { + "epoch": 0.022348750114179583, + "grad_norm": 0.8185305763492312, + "learning_rate": 4.999909215860622e-06, + "loss": 0.6372, + "step": 367 + }, + { + "epoch": 0.022409645891057456, + "grad_norm": 0.8271119318617564, + "learning_rate": 4.999908534560304e-06, + "loss": 0.6636, + "step": 368 + }, + { + "epoch": 0.02247054166793533, + "grad_norm": 0.809494333885654, + "learning_rate": 4.99990785071313e-06, + "loss": 0.6382, + "step": 369 + }, + { + "epoch": 0.0225314374448132, + "grad_norm": 0.7639997683021247, + "learning_rate": 4.999907164319103e-06, + "loss": 0.6393, + "step": 370 + }, + { + "epoch": 0.022592333221691074, + "grad_norm": 0.965544178297169, + "learning_rate": 4.999906475378222e-06, + "loss": 0.6704, + "step": 371 + }, + { + "epoch": 0.02265322899856895, + "grad_norm": 0.8368325157524191, + "learning_rate": 4.999905783890489e-06, + "loss": 0.6653, + "step": 372 + }, + { + "epoch": 0.022714124775446824, + "grad_norm": 0.8580585134740449, + "learning_rate": 4.9999050898559026e-06, + "loss": 0.6906, + "step": 373 + }, + { + "epoch": 0.022775020552324696, + "grad_norm": 0.8864382843941513, + "learning_rate": 4.999904393274465e-06, + "loss": 0.6924, + "step": 374 + }, + { + "epoch": 0.02283591632920257, + "grad_norm": 0.8214585001478643, + "learning_rate": 4.999903694146178e-06, + "loss": 0.6324, + "step": 375 + }, + { + "epoch": 0.022896812106080442, + "grad_norm": 0.8003680388857501, + "learning_rate": 4.999902992471041e-06, + "loss": 0.6034, + "step": 376 + }, + { + "epoch": 0.02295770788295832, + "grad_norm": 0.8965912704506847, + "learning_rate": 4.999902288249053e-06, + "loss": 0.6284, + "step": 377 + }, + { + "epoch": 0.02301860365983619, + "grad_norm": 0.8166609665623451, + "learning_rate": 4.9999015814802184e-06, + "loss": 0.6312, + "step": 378 + }, + { + "epoch": 0.023079499436714064, + "grad_norm": 0.8542409224508395, + "learning_rate": 4.999900872164536e-06, + "loss": 0.6432, + "step": 379 + }, + { + "epoch": 0.023140395213591937, + "grad_norm": 0.77172810509425, + "learning_rate": 4.999900160302006e-06, + "loss": 0.6586, + "step": 380 + }, + { + "epoch": 0.02320129099046981, + "grad_norm": 0.8023244526896741, + "learning_rate": 4.999899445892629e-06, + "loss": 0.6139, + "step": 381 + }, + { + "epoch": 0.023262186767347686, + "grad_norm": 0.8375708187024465, + "learning_rate": 4.9998987289364074e-06, + "loss": 0.674, + "step": 382 + }, + { + "epoch": 0.02332308254422556, + "grad_norm": 0.9194454460619872, + "learning_rate": 4.99989800943334e-06, + "loss": 0.7069, + "step": 383 + }, + { + "epoch": 0.023383978321103432, + "grad_norm": 0.8709395477168814, + "learning_rate": 4.999897287383429e-06, + "loss": 0.6863, + "step": 384 + }, + { + "epoch": 0.023444874097981305, + "grad_norm": 0.8223666780585378, + "learning_rate": 4.999896562786676e-06, + "loss": 0.6239, + "step": 385 + }, + { + "epoch": 0.023505769874859177, + "grad_norm": 0.8496100396312322, + "learning_rate": 4.999895835643078e-06, + "loss": 0.6622, + "step": 386 + }, + { + "epoch": 0.023566665651737054, + "grad_norm": 0.8867287690792346, + "learning_rate": 4.99989510595264e-06, + "loss": 0.6172, + "step": 387 + }, + { + "epoch": 0.023627561428614927, + "grad_norm": 0.8100565682035575, + "learning_rate": 4.99989437371536e-06, + "loss": 0.6875, + "step": 388 + }, + { + "epoch": 0.0236884572054928, + "grad_norm": 0.8140553860900619, + "learning_rate": 4.999893638931239e-06, + "loss": 0.6329, + "step": 389 + }, + { + "epoch": 0.023749352982370672, + "grad_norm": 0.7897498227490352, + "learning_rate": 4.999892901600279e-06, + "loss": 0.6315, + "step": 390 + }, + { + "epoch": 0.023810248759248545, + "grad_norm": 0.8595494455330078, + "learning_rate": 4.99989216172248e-06, + "loss": 0.5807, + "step": 391 + }, + { + "epoch": 0.023871144536126418, + "grad_norm": 0.7760899615846001, + "learning_rate": 4.9998914192978436e-06, + "loss": 0.5885, + "step": 392 + }, + { + "epoch": 0.023932040313004294, + "grad_norm": 0.7979232600213241, + "learning_rate": 4.999890674326369e-06, + "loss": 0.6807, + "step": 393 + }, + { + "epoch": 0.023992936089882167, + "grad_norm": 0.8135092723190958, + "learning_rate": 4.999889926808058e-06, + "loss": 0.6347, + "step": 394 + }, + { + "epoch": 0.02405383186676004, + "grad_norm": 0.8040599904284218, + "learning_rate": 4.9998891767429105e-06, + "loss": 0.651, + "step": 395 + }, + { + "epoch": 0.024114727643637913, + "grad_norm": 0.8146045467916457, + "learning_rate": 4.999888424130929e-06, + "loss": 0.6331, + "step": 396 + }, + { + "epoch": 0.024175623420515786, + "grad_norm": 0.8138900703214225, + "learning_rate": 4.999887668972112e-06, + "loss": 0.6434, + "step": 397 + }, + { + "epoch": 0.024236519197393662, + "grad_norm": 0.8443801016204281, + "learning_rate": 4.999886911266462e-06, + "loss": 0.6236, + "step": 398 + }, + { + "epoch": 0.024297414974271535, + "grad_norm": 0.796103696298503, + "learning_rate": 4.999886151013979e-06, + "loss": 0.5971, + "step": 399 + }, + { + "epoch": 0.024358310751149408, + "grad_norm": 0.8408801222708111, + "learning_rate": 4.9998853882146645e-06, + "loss": 0.6476, + "step": 400 + }, + { + "epoch": 0.02441920652802728, + "grad_norm": 0.8185994200479856, + "learning_rate": 4.9998846228685185e-06, + "loss": 0.613, + "step": 401 + }, + { + "epoch": 0.024480102304905153, + "grad_norm": 0.8533862506865407, + "learning_rate": 4.999883854975542e-06, + "loss": 0.654, + "step": 402 + }, + { + "epoch": 0.02454099808178303, + "grad_norm": 0.8278738369638949, + "learning_rate": 4.999883084535735e-06, + "loss": 0.6384, + "step": 403 + }, + { + "epoch": 0.024601893858660902, + "grad_norm": 0.8493634595896167, + "learning_rate": 4.999882311549099e-06, + "loss": 0.636, + "step": 404 + }, + { + "epoch": 0.024662789635538775, + "grad_norm": 0.7979396448016045, + "learning_rate": 4.999881536015636e-06, + "loss": 0.6746, + "step": 405 + }, + { + "epoch": 0.024723685412416648, + "grad_norm": 0.8768286232589058, + "learning_rate": 4.999880757935345e-06, + "loss": 0.7483, + "step": 406 + }, + { + "epoch": 0.02478458118929452, + "grad_norm": 0.8304333564660699, + "learning_rate": 4.999879977308228e-06, + "loss": 0.6407, + "step": 407 + }, + { + "epoch": 0.024845476966172397, + "grad_norm": 0.874930872113359, + "learning_rate": 4.999879194134285e-06, + "loss": 0.6642, + "step": 408 + }, + { + "epoch": 0.02490637274305027, + "grad_norm": 0.8517903392881333, + "learning_rate": 4.999878408413517e-06, + "loss": 0.6373, + "step": 409 + }, + { + "epoch": 0.024967268519928143, + "grad_norm": 0.8160019892686183, + "learning_rate": 4.999877620145925e-06, + "loss": 0.6872, + "step": 410 + }, + { + "epoch": 0.025028164296806016, + "grad_norm": 0.8174442246705625, + "learning_rate": 4.999876829331509e-06, + "loss": 0.5921, + "step": 411 + }, + { + "epoch": 0.02508906007368389, + "grad_norm": 0.8201156842848333, + "learning_rate": 4.999876035970272e-06, + "loss": 0.7016, + "step": 412 + }, + { + "epoch": 0.025149955850561765, + "grad_norm": 0.8449310704720301, + "learning_rate": 4.999875240062212e-06, + "loss": 0.6772, + "step": 413 + }, + { + "epoch": 0.025210851627439638, + "grad_norm": 0.8231243081860802, + "learning_rate": 4.999874441607331e-06, + "loss": 0.7075, + "step": 414 + }, + { + "epoch": 0.02527174740431751, + "grad_norm": 0.8324086827787579, + "learning_rate": 4.9998736406056305e-06, + "loss": 0.6074, + "step": 415 + }, + { + "epoch": 0.025332643181195384, + "grad_norm": 0.8483743731054529, + "learning_rate": 4.99987283705711e-06, + "loss": 0.6476, + "step": 416 + }, + { + "epoch": 0.025393538958073256, + "grad_norm": 0.8748152498162979, + "learning_rate": 4.999872030961772e-06, + "loss": 0.6642, + "step": 417 + }, + { + "epoch": 0.025454434734951133, + "grad_norm": 0.8378792265286606, + "learning_rate": 4.999871222319615e-06, + "loss": 0.6274, + "step": 418 + }, + { + "epoch": 0.025515330511829006, + "grad_norm": 0.8804880604185217, + "learning_rate": 4.999870411130643e-06, + "loss": 0.6104, + "step": 419 + }, + { + "epoch": 0.02557622628870688, + "grad_norm": 0.8179281965456936, + "learning_rate": 4.999869597394854e-06, + "loss": 0.6362, + "step": 420 + }, + { + "epoch": 0.02563712206558475, + "grad_norm": 0.9790355006042828, + "learning_rate": 4.999868781112249e-06, + "loss": 0.6399, + "step": 421 + }, + { + "epoch": 0.025698017842462624, + "grad_norm": 0.8138044658823335, + "learning_rate": 4.999867962282831e-06, + "loss": 0.6422, + "step": 422 + }, + { + "epoch": 0.0257589136193405, + "grad_norm": 0.9501232443853702, + "learning_rate": 4.999867140906599e-06, + "loss": 0.6503, + "step": 423 + }, + { + "epoch": 0.025819809396218373, + "grad_norm": 0.8567002829777202, + "learning_rate": 4.999866316983554e-06, + "loss": 0.589, + "step": 424 + }, + { + "epoch": 0.025880705173096246, + "grad_norm": 0.8524126162250004, + "learning_rate": 4.999865490513698e-06, + "loss": 0.6508, + "step": 425 + }, + { + "epoch": 0.02594160094997412, + "grad_norm": 0.8158516658240909, + "learning_rate": 4.99986466149703e-06, + "loss": 0.6074, + "step": 426 + }, + { + "epoch": 0.02600249672685199, + "grad_norm": 0.8554304852404819, + "learning_rate": 4.9998638299335524e-06, + "loss": 0.588, + "step": 427 + }, + { + "epoch": 0.026063392503729868, + "grad_norm": 0.8732335864923195, + "learning_rate": 4.999862995823265e-06, + "loss": 0.6121, + "step": 428 + }, + { + "epoch": 0.02612428828060774, + "grad_norm": 0.8625349924583123, + "learning_rate": 4.99986215916617e-06, + "loss": 0.6891, + "step": 429 + }, + { + "epoch": 0.026185184057485614, + "grad_norm": 0.9043225624217842, + "learning_rate": 4.999861319962267e-06, + "loss": 0.6728, + "step": 430 + }, + { + "epoch": 0.026246079834363487, + "grad_norm": 0.9233222681170974, + "learning_rate": 4.999860478211558e-06, + "loss": 0.6098, + "step": 431 + }, + { + "epoch": 0.02630697561124136, + "grad_norm": 0.8679685840916898, + "learning_rate": 4.9998596339140415e-06, + "loss": 0.6997, + "step": 432 + }, + { + "epoch": 0.026367871388119232, + "grad_norm": 0.8797328107786474, + "learning_rate": 4.999858787069722e-06, + "loss": 0.6575, + "step": 433 + }, + { + "epoch": 0.02642876716499711, + "grad_norm": 0.8773337916187751, + "learning_rate": 4.999857937678596e-06, + "loss": 0.6885, + "step": 434 + }, + { + "epoch": 0.02648966294187498, + "grad_norm": 0.8463141106416795, + "learning_rate": 4.999857085740668e-06, + "loss": 0.6153, + "step": 435 + }, + { + "epoch": 0.026550558718752854, + "grad_norm": 0.8695931277823966, + "learning_rate": 4.999856231255937e-06, + "loss": 0.6126, + "step": 436 + }, + { + "epoch": 0.026611454495630727, + "grad_norm": 0.8130096227868048, + "learning_rate": 4.999855374224406e-06, + "loss": 0.6157, + "step": 437 + }, + { + "epoch": 0.0266723502725086, + "grad_norm": 0.8637057669130215, + "learning_rate": 4.999854514646073e-06, + "loss": 0.6509, + "step": 438 + }, + { + "epoch": 0.026733246049386476, + "grad_norm": 0.8635227944655051, + "learning_rate": 4.99985365252094e-06, + "loss": 0.6862, + "step": 439 + }, + { + "epoch": 0.02679414182626435, + "grad_norm": 0.8098887086410573, + "learning_rate": 4.999852787849009e-06, + "loss": 0.6531, + "step": 440 + }, + { + "epoch": 0.026855037603142222, + "grad_norm": 0.8896421369930613, + "learning_rate": 4.999851920630278e-06, + "loss": 0.6069, + "step": 441 + }, + { + "epoch": 0.026915933380020095, + "grad_norm": 0.8885332035235441, + "learning_rate": 4.999851050864752e-06, + "loss": 0.6384, + "step": 442 + }, + { + "epoch": 0.026976829156897968, + "grad_norm": 0.819210187872491, + "learning_rate": 4.999850178552429e-06, + "loss": 0.5742, + "step": 443 + }, + { + "epoch": 0.027037724933775844, + "grad_norm": 0.8252928496315517, + "learning_rate": 4.999849303693311e-06, + "loss": 0.5884, + "step": 444 + }, + { + "epoch": 0.027098620710653717, + "grad_norm": 0.9372406522190535, + "learning_rate": 4.999848426287398e-06, + "loss": 0.6173, + "step": 445 + }, + { + "epoch": 0.02715951648753159, + "grad_norm": 0.8747337376163049, + "learning_rate": 4.999847546334692e-06, + "loss": 0.6444, + "step": 446 + }, + { + "epoch": 0.027220412264409462, + "grad_norm": 0.8660984985124811, + "learning_rate": 4.999846663835193e-06, + "loss": 0.592, + "step": 447 + }, + { + "epoch": 0.027281308041287335, + "grad_norm": 0.8130650490781918, + "learning_rate": 4.999845778788902e-06, + "loss": 0.6932, + "step": 448 + }, + { + "epoch": 0.02734220381816521, + "grad_norm": 0.8876907139009043, + "learning_rate": 4.99984489119582e-06, + "loss": 0.703, + "step": 449 + }, + { + "epoch": 0.027403099595043084, + "grad_norm": 0.8579019826340917, + "learning_rate": 4.999844001055948e-06, + "loss": 0.5935, + "step": 450 + }, + { + "epoch": 0.027463995371920957, + "grad_norm": 0.8024649278022873, + "learning_rate": 4.999843108369287e-06, + "loss": 0.6546, + "step": 451 + }, + { + "epoch": 0.02752489114879883, + "grad_norm": 0.9103199335431142, + "learning_rate": 4.999842213135837e-06, + "loss": 0.6371, + "step": 452 + }, + { + "epoch": 0.027585786925676703, + "grad_norm": 0.8983894021899964, + "learning_rate": 4.999841315355601e-06, + "loss": 0.6252, + "step": 453 + }, + { + "epoch": 0.02764668270255458, + "grad_norm": 0.8104551986647945, + "learning_rate": 4.999840415028579e-06, + "loss": 0.6141, + "step": 454 + }, + { + "epoch": 0.027707578479432452, + "grad_norm": 0.8522042675253364, + "learning_rate": 4.99983951215477e-06, + "loss": 0.6532, + "step": 455 + }, + { + "epoch": 0.027768474256310325, + "grad_norm": 0.8392137291140584, + "learning_rate": 4.999838606734177e-06, + "loss": 0.6075, + "step": 456 + }, + { + "epoch": 0.027829370033188198, + "grad_norm": 0.9015535377891991, + "learning_rate": 4.9998376987668004e-06, + "loss": 0.6276, + "step": 457 + }, + { + "epoch": 0.02789026581006607, + "grad_norm": 0.8574097920804514, + "learning_rate": 4.999836788252642e-06, + "loss": 0.5974, + "step": 458 + }, + { + "epoch": 0.027951161586943947, + "grad_norm": 0.8825154827224055, + "learning_rate": 4.999835875191701e-06, + "loss": 0.6317, + "step": 459 + }, + { + "epoch": 0.02801205736382182, + "grad_norm": 0.8421333580415384, + "learning_rate": 4.999834959583979e-06, + "loss": 0.5858, + "step": 460 + }, + { + "epoch": 0.028072953140699693, + "grad_norm": 0.7911673451621303, + "learning_rate": 4.999834041429478e-06, + "loss": 0.6435, + "step": 461 + }, + { + "epoch": 0.028133848917577566, + "grad_norm": 0.8803682659414177, + "learning_rate": 4.9998331207281974e-06, + "loss": 0.6328, + "step": 462 + }, + { + "epoch": 0.02819474469445544, + "grad_norm": 0.7932756415951809, + "learning_rate": 4.999832197480139e-06, + "loss": 0.6487, + "step": 463 + }, + { + "epoch": 0.028255640471333315, + "grad_norm": 0.8603290962794222, + "learning_rate": 4.9998312716853035e-06, + "loss": 0.6257, + "step": 464 + }, + { + "epoch": 0.028316536248211188, + "grad_norm": 0.8804126469049063, + "learning_rate": 4.999830343343692e-06, + "loss": 0.632, + "step": 465 + }, + { + "epoch": 0.02837743202508906, + "grad_norm": 0.8996107808081768, + "learning_rate": 4.999829412455305e-06, + "loss": 0.5827, + "step": 466 + }, + { + "epoch": 0.028438327801966933, + "grad_norm": 0.8758628922903379, + "learning_rate": 4.999828479020144e-06, + "loss": 0.672, + "step": 467 + }, + { + "epoch": 0.028499223578844806, + "grad_norm": 0.8572420595536744, + "learning_rate": 4.999827543038209e-06, + "loss": 0.5981, + "step": 468 + }, + { + "epoch": 0.028560119355722682, + "grad_norm": 0.7776560270734729, + "learning_rate": 4.9998266045095025e-06, + "loss": 0.6186, + "step": 469 + }, + { + "epoch": 0.028621015132600555, + "grad_norm": 0.9649287795955926, + "learning_rate": 4.999825663434024e-06, + "loss": 0.6683, + "step": 470 + }, + { + "epoch": 0.028681910909478428, + "grad_norm": 0.8512084010554957, + "learning_rate": 4.9998247198117764e-06, + "loss": 0.6113, + "step": 471 + }, + { + "epoch": 0.0287428066863563, + "grad_norm": 0.9290489888783665, + "learning_rate": 4.999823773642759e-06, + "loss": 0.7006, + "step": 472 + }, + { + "epoch": 0.028803702463234174, + "grad_norm": 0.8798085039346486, + "learning_rate": 4.999822824926972e-06, + "loss": 0.6754, + "step": 473 + }, + { + "epoch": 0.028864598240112047, + "grad_norm": 0.8976873255604702, + "learning_rate": 4.999821873664418e-06, + "loss": 0.6282, + "step": 474 + }, + { + "epoch": 0.028925494016989923, + "grad_norm": 0.9058676247338799, + "learning_rate": 4.999820919855098e-06, + "loss": 0.6124, + "step": 475 + }, + { + "epoch": 0.028986389793867796, + "grad_norm": 0.8586046721654451, + "learning_rate": 4.999819963499012e-06, + "loss": 0.6293, + "step": 476 + }, + { + "epoch": 0.02904728557074567, + "grad_norm": 0.8128610641268553, + "learning_rate": 4.999819004596161e-06, + "loss": 0.5654, + "step": 477 + }, + { + "epoch": 0.02910818134762354, + "grad_norm": 0.8368807810282304, + "learning_rate": 4.999818043146546e-06, + "loss": 0.6426, + "step": 478 + }, + { + "epoch": 0.029169077124501414, + "grad_norm": 0.8839469693107543, + "learning_rate": 4.99981707915017e-06, + "loss": 0.6236, + "step": 479 + }, + { + "epoch": 0.02922997290137929, + "grad_norm": 0.9439892964129745, + "learning_rate": 4.999816112607031e-06, + "loss": 0.6928, + "step": 480 + }, + { + "epoch": 0.029290868678257163, + "grad_norm": 0.9791338644215659, + "learning_rate": 4.999815143517132e-06, + "loss": 0.6777, + "step": 481 + }, + { + "epoch": 0.029351764455135036, + "grad_norm": 0.8978624281608163, + "learning_rate": 4.999814171880473e-06, + "loss": 0.6304, + "step": 482 + }, + { + "epoch": 0.02941266023201291, + "grad_norm": 0.8811033571403838, + "learning_rate": 4.999813197697055e-06, + "loss": 0.645, + "step": 483 + }, + { + "epoch": 0.029473556008890782, + "grad_norm": 0.8445482476809665, + "learning_rate": 4.9998122209668795e-06, + "loss": 0.6622, + "step": 484 + }, + { + "epoch": 0.029534451785768658, + "grad_norm": 0.8514340570722598, + "learning_rate": 4.999811241689948e-06, + "loss": 0.6286, + "step": 485 + }, + { + "epoch": 0.02959534756264653, + "grad_norm": 0.8256422870893059, + "learning_rate": 4.99981025986626e-06, + "loss": 0.661, + "step": 486 + }, + { + "epoch": 0.029656243339524404, + "grad_norm": 0.842447794611616, + "learning_rate": 4.999809275495817e-06, + "loss": 0.5762, + "step": 487 + }, + { + "epoch": 0.029717139116402277, + "grad_norm": 0.8902189507645389, + "learning_rate": 4.999808288578621e-06, + "loss": 0.6152, + "step": 488 + }, + { + "epoch": 0.02977803489328015, + "grad_norm": 0.9032549698145211, + "learning_rate": 4.999807299114672e-06, + "loss": 0.5915, + "step": 489 + }, + { + "epoch": 0.029838930670158026, + "grad_norm": 0.8628736875286452, + "learning_rate": 4.999806307103972e-06, + "loss": 0.5834, + "step": 490 + }, + { + "epoch": 0.0298998264470359, + "grad_norm": 0.8574011370442478, + "learning_rate": 4.99980531254652e-06, + "loss": 0.656, + "step": 491 + }, + { + "epoch": 0.02996072222391377, + "grad_norm": 0.8204905039182027, + "learning_rate": 4.9998043154423185e-06, + "loss": 0.6307, + "step": 492 + }, + { + "epoch": 0.030021618000791644, + "grad_norm": 0.9003122139678936, + "learning_rate": 4.999803315791369e-06, + "loss": 0.645, + "step": 493 + }, + { + "epoch": 0.030082513777669517, + "grad_norm": 0.8472178063785583, + "learning_rate": 4.999802313593671e-06, + "loss": 0.6423, + "step": 494 + }, + { + "epoch": 0.030143409554547394, + "grad_norm": 0.947024560501822, + "learning_rate": 4.999801308849228e-06, + "loss": 0.6295, + "step": 495 + }, + { + "epoch": 0.030204305331425266, + "grad_norm": 0.8582464915081885, + "learning_rate": 4.999800301558037e-06, + "loss": 0.6062, + "step": 496 + }, + { + "epoch": 0.03026520110830314, + "grad_norm": 0.9226052488796491, + "learning_rate": 4.999799291720103e-06, + "loss": 0.5993, + "step": 497 + }, + { + "epoch": 0.030326096885181012, + "grad_norm": 0.7994435196941898, + "learning_rate": 4.999798279335426e-06, + "loss": 0.6676, + "step": 498 + }, + { + "epoch": 0.030386992662058885, + "grad_norm": 0.8641846892253697, + "learning_rate": 4.999797264404006e-06, + "loss": 0.6413, + "step": 499 + }, + { + "epoch": 0.03044788843893676, + "grad_norm": 0.9212093973444776, + "learning_rate": 4.999796246925844e-06, + "loss": 0.6241, + "step": 500 + }, + { + "epoch": 0.030508784215814634, + "grad_norm": 0.8953382523685107, + "learning_rate": 4.999795226900942e-06, + "loss": 0.6708, + "step": 501 + }, + { + "epoch": 0.030569679992692507, + "grad_norm": 0.8633566355474558, + "learning_rate": 4.9997942043293e-06, + "loss": 0.6247, + "step": 502 + }, + { + "epoch": 0.03063057576957038, + "grad_norm": 0.9213562353535294, + "learning_rate": 4.99979317921092e-06, + "loss": 0.6294, + "step": 503 + }, + { + "epoch": 0.030691471546448253, + "grad_norm": 0.9319135391682981, + "learning_rate": 4.9997921515458034e-06, + "loss": 0.6796, + "step": 504 + }, + { + "epoch": 0.03075236732332613, + "grad_norm": 0.8314602517634049, + "learning_rate": 4.99979112133395e-06, + "loss": 0.6616, + "step": 505 + }, + { + "epoch": 0.030813263100204002, + "grad_norm": 0.8529202620576182, + "learning_rate": 4.999790088575361e-06, + "loss": 0.6847, + "step": 506 + }, + { + "epoch": 0.030874158877081875, + "grad_norm": 0.9324200193624258, + "learning_rate": 4.9997890532700375e-06, + "loss": 0.6372, + "step": 507 + }, + { + "epoch": 0.030935054653959747, + "grad_norm": 0.8947452907370006, + "learning_rate": 4.999788015417982e-06, + "loss": 0.6792, + "step": 508 + }, + { + "epoch": 0.03099595043083762, + "grad_norm": 0.8675204188242819, + "learning_rate": 4.9997869750191945e-06, + "loss": 0.6739, + "step": 509 + }, + { + "epoch": 0.031056846207715497, + "grad_norm": 0.916743570141975, + "learning_rate": 4.999785932073674e-06, + "loss": 0.6995, + "step": 510 + }, + { + "epoch": 0.03111774198459337, + "grad_norm": 0.8589994995363067, + "learning_rate": 4.999784886581426e-06, + "loss": 0.665, + "step": 511 + }, + { + "epoch": 0.031178637761471242, + "grad_norm": 0.892772193362744, + "learning_rate": 4.999783838542448e-06, + "loss": 0.5941, + "step": 512 + }, + { + "epoch": 0.031239533538349115, + "grad_norm": 0.9116854607245185, + "learning_rate": 4.999782787956742e-06, + "loss": 0.6227, + "step": 513 + }, + { + "epoch": 0.03130042931522699, + "grad_norm": 0.9205514070820439, + "learning_rate": 4.99978173482431e-06, + "loss": 0.6396, + "step": 514 + }, + { + "epoch": 0.03136132509210486, + "grad_norm": 0.8921555871366313, + "learning_rate": 4.999780679145152e-06, + "loss": 0.6008, + "step": 515 + }, + { + "epoch": 0.031422220868982734, + "grad_norm": 0.8640570532389226, + "learning_rate": 4.999779620919269e-06, + "loss": 0.5905, + "step": 516 + }, + { + "epoch": 0.03148311664586061, + "grad_norm": 0.8659278493603734, + "learning_rate": 4.999778560146663e-06, + "loss": 0.6742, + "step": 517 + }, + { + "epoch": 0.031544012422738486, + "grad_norm": 0.8797486433033918, + "learning_rate": 4.999777496827334e-06, + "loss": 0.684, + "step": 518 + }, + { + "epoch": 0.03160490819961636, + "grad_norm": 0.9023872523215322, + "learning_rate": 4.999776430961284e-06, + "loss": 0.6229, + "step": 519 + }, + { + "epoch": 0.03166580397649423, + "grad_norm": 0.9529985199337727, + "learning_rate": 4.999775362548514e-06, + "loss": 0.6656, + "step": 520 + }, + { + "epoch": 0.031726699753372105, + "grad_norm": 0.8798293295109569, + "learning_rate": 4.999774291589025e-06, + "loss": 0.626, + "step": 521 + }, + { + "epoch": 0.03178759553024998, + "grad_norm": 0.8742176709815166, + "learning_rate": 4.999773218082817e-06, + "loss": 0.584, + "step": 522 + }, + { + "epoch": 0.03184849130712785, + "grad_norm": 0.8449710716525559, + "learning_rate": 4.9997721420298926e-06, + "loss": 0.6074, + "step": 523 + }, + { + "epoch": 0.03190938708400572, + "grad_norm": 0.8766425922899205, + "learning_rate": 4.999771063430252e-06, + "loss": 0.6151, + "step": 524 + }, + { + "epoch": 0.031970282860883596, + "grad_norm": 0.8673171725822507, + "learning_rate": 4.999769982283897e-06, + "loss": 0.6324, + "step": 525 + }, + { + "epoch": 0.03203117863776147, + "grad_norm": 0.8753341372596111, + "learning_rate": 4.999768898590829e-06, + "loss": 0.7082, + "step": 526 + }, + { + "epoch": 0.03209207441463934, + "grad_norm": 0.8658220014086978, + "learning_rate": 4.999767812351047e-06, + "loss": 0.5884, + "step": 527 + }, + { + "epoch": 0.03215297019151722, + "grad_norm": 0.9201531370795614, + "learning_rate": 4.999766723564554e-06, + "loss": 0.6084, + "step": 528 + }, + { + "epoch": 0.032213865968395095, + "grad_norm": 0.8889450976246752, + "learning_rate": 4.999765632231352e-06, + "loss": 0.6634, + "step": 529 + }, + { + "epoch": 0.03227476174527297, + "grad_norm": 0.857675306959387, + "learning_rate": 4.999764538351439e-06, + "loss": 0.6411, + "step": 530 + }, + { + "epoch": 0.03233565752215084, + "grad_norm": 0.9048851142462304, + "learning_rate": 4.999763441924818e-06, + "loss": 0.5977, + "step": 531 + }, + { + "epoch": 0.03239655329902871, + "grad_norm": 0.8919630006035432, + "learning_rate": 4.999762342951491e-06, + "loss": 0.6414, + "step": 532 + }, + { + "epoch": 0.032457449075906586, + "grad_norm": 0.901297362341284, + "learning_rate": 4.999761241431457e-06, + "loss": 0.6402, + "step": 533 + }, + { + "epoch": 0.03251834485278446, + "grad_norm": 0.8681590587464822, + "learning_rate": 4.9997601373647185e-06, + "loss": 0.6583, + "step": 534 + }, + { + "epoch": 0.03257924062966233, + "grad_norm": 0.8539040316466944, + "learning_rate": 4.9997590307512765e-06, + "loss": 0.5911, + "step": 535 + }, + { + "epoch": 0.032640136406540204, + "grad_norm": 0.9441666570205184, + "learning_rate": 4.999757921591132e-06, + "loss": 0.6235, + "step": 536 + }, + { + "epoch": 0.03270103218341808, + "grad_norm": 0.9414750838533829, + "learning_rate": 4.999756809884287e-06, + "loss": 0.619, + "step": 537 + }, + { + "epoch": 0.03276192796029595, + "grad_norm": 0.9175708951908074, + "learning_rate": 4.99975569563074e-06, + "loss": 0.6257, + "step": 538 + }, + { + "epoch": 0.03282282373717383, + "grad_norm": 0.9214948183903635, + "learning_rate": 4.999754578830495e-06, + "loss": 0.5955, + "step": 539 + }, + { + "epoch": 0.0328837195140517, + "grad_norm": 0.9424129726282491, + "learning_rate": 4.9997534594835514e-06, + "loss": 0.6202, + "step": 540 + }, + { + "epoch": 0.032944615290929576, + "grad_norm": 0.9389387099284442, + "learning_rate": 4.999752337589911e-06, + "loss": 0.5515, + "step": 541 + }, + { + "epoch": 0.03300551106780745, + "grad_norm": 0.88234522356561, + "learning_rate": 4.999751213149575e-06, + "loss": 0.6645, + "step": 542 + }, + { + "epoch": 0.03306640684468532, + "grad_norm": 0.9028932671934276, + "learning_rate": 4.999750086162545e-06, + "loss": 0.558, + "step": 543 + }, + { + "epoch": 0.033127302621563194, + "grad_norm": 0.8040073518496615, + "learning_rate": 4.999748956628822e-06, + "loss": 0.6015, + "step": 544 + }, + { + "epoch": 0.03318819839844107, + "grad_norm": 0.9442654574210785, + "learning_rate": 4.999747824548405e-06, + "loss": 0.6639, + "step": 545 + }, + { + "epoch": 0.03324909417531894, + "grad_norm": 0.9015275678577019, + "learning_rate": 4.9997466899212974e-06, + "loss": 0.6872, + "step": 546 + }, + { + "epoch": 0.03330998995219681, + "grad_norm": 0.8163607014405337, + "learning_rate": 4.9997455527475005e-06, + "loss": 0.5867, + "step": 547 + }, + { + "epoch": 0.033370885729074685, + "grad_norm": 0.8935310340996083, + "learning_rate": 4.999744413027014e-06, + "loss": 0.6019, + "step": 548 + }, + { + "epoch": 0.033431781505952565, + "grad_norm": 0.8901360528923237, + "learning_rate": 4.999743270759841e-06, + "loss": 0.622, + "step": 549 + }, + { + "epoch": 0.03349267728283044, + "grad_norm": 0.8586180481140363, + "learning_rate": 4.999742125945981e-06, + "loss": 0.616, + "step": 550 + }, + { + "epoch": 0.03355357305970831, + "grad_norm": 0.8230395599384366, + "learning_rate": 4.999740978585435e-06, + "loss": 0.5988, + "step": 551 + }, + { + "epoch": 0.033614468836586184, + "grad_norm": 0.9145119778704469, + "learning_rate": 4.999739828678206e-06, + "loss": 0.5673, + "step": 552 + }, + { + "epoch": 0.03367536461346406, + "grad_norm": 1.0541586394347071, + "learning_rate": 4.999738676224294e-06, + "loss": 0.6603, + "step": 553 + }, + { + "epoch": 0.03373626039034193, + "grad_norm": 0.873099280451214, + "learning_rate": 4.9997375212236995e-06, + "loss": 0.708, + "step": 554 + }, + { + "epoch": 0.0337971561672198, + "grad_norm": 0.9562625677917975, + "learning_rate": 4.999736363676424e-06, + "loss": 0.5916, + "step": 555 + }, + { + "epoch": 0.033858051944097675, + "grad_norm": 0.8317525720319099, + "learning_rate": 4.9997352035824695e-06, + "loss": 0.6216, + "step": 556 + }, + { + "epoch": 0.03391894772097555, + "grad_norm": 0.9171177659800822, + "learning_rate": 4.9997340409418375e-06, + "loss": 0.63, + "step": 557 + }, + { + "epoch": 0.03397984349785342, + "grad_norm": 0.8989458141767893, + "learning_rate": 4.999732875754527e-06, + "loss": 0.5978, + "step": 558 + }, + { + "epoch": 0.0340407392747313, + "grad_norm": 0.9019813252373554, + "learning_rate": 4.999731708020542e-06, + "loss": 0.5635, + "step": 559 + }, + { + "epoch": 0.03410163505160917, + "grad_norm": 0.8727992627575653, + "learning_rate": 4.999730537739882e-06, + "loss": 0.6766, + "step": 560 + }, + { + "epoch": 0.034162530828487046, + "grad_norm": 1.047977154290726, + "learning_rate": 4.999729364912548e-06, + "loss": 0.602, + "step": 561 + }, + { + "epoch": 0.03422342660536492, + "grad_norm": 0.8666786451313352, + "learning_rate": 4.999728189538542e-06, + "loss": 0.5989, + "step": 562 + }, + { + "epoch": 0.03428432238224279, + "grad_norm": 0.9645031060810192, + "learning_rate": 4.999727011617865e-06, + "loss": 0.658, + "step": 563 + }, + { + "epoch": 0.034345218159120665, + "grad_norm": 0.8940944226840101, + "learning_rate": 4.999725831150518e-06, + "loss": 0.6444, + "step": 564 + }, + { + "epoch": 0.03440611393599854, + "grad_norm": 0.9346654203953113, + "learning_rate": 4.999724648136502e-06, + "loss": 0.6207, + "step": 565 + }, + { + "epoch": 0.03446700971287641, + "grad_norm": 0.9113792083007628, + "learning_rate": 4.999723462575819e-06, + "loss": 0.5924, + "step": 566 + }, + { + "epoch": 0.03452790548975428, + "grad_norm": 0.8895091665305688, + "learning_rate": 4.999722274468469e-06, + "loss": 0.6159, + "step": 567 + }, + { + "epoch": 0.034588801266632156, + "grad_norm": 0.8292053902675454, + "learning_rate": 4.999721083814455e-06, + "loss": 0.6013, + "step": 568 + }, + { + "epoch": 0.034649697043510036, + "grad_norm": 0.8664356414310306, + "learning_rate": 4.999719890613776e-06, + "loss": 0.5749, + "step": 569 + }, + { + "epoch": 0.03471059282038791, + "grad_norm": 0.8999877858699046, + "learning_rate": 4.999718694866435e-06, + "loss": 0.6479, + "step": 570 + }, + { + "epoch": 0.03477148859726578, + "grad_norm": 0.8468004589859273, + "learning_rate": 4.999717496572433e-06, + "loss": 0.6039, + "step": 571 + }, + { + "epoch": 0.034832384374143655, + "grad_norm": 0.872879419765749, + "learning_rate": 4.99971629573177e-06, + "loss": 0.6106, + "step": 572 + }, + { + "epoch": 0.03489328015102153, + "grad_norm": 0.9334862102425937, + "learning_rate": 4.999715092344448e-06, + "loss": 0.6107, + "step": 573 + }, + { + "epoch": 0.0349541759278994, + "grad_norm": 0.9291657398918164, + "learning_rate": 4.999713886410468e-06, + "loss": 0.592, + "step": 574 + }, + { + "epoch": 0.03501507170477727, + "grad_norm": 0.9563999889459246, + "learning_rate": 4.999712677929832e-06, + "loss": 0.5983, + "step": 575 + }, + { + "epoch": 0.035075967481655146, + "grad_norm": 0.9319219175370166, + "learning_rate": 4.99971146690254e-06, + "loss": 0.5864, + "step": 576 + }, + { + "epoch": 0.03513686325853302, + "grad_norm": 0.899887376892984, + "learning_rate": 4.999710253328595e-06, + "loss": 0.6194, + "step": 577 + }, + { + "epoch": 0.03519775903541089, + "grad_norm": 0.8715018890128784, + "learning_rate": 4.999709037207997e-06, + "loss": 0.6476, + "step": 578 + }, + { + "epoch": 0.035258654812288764, + "grad_norm": 0.944585602653932, + "learning_rate": 4.999707818540747e-06, + "loss": 0.602, + "step": 579 + }, + { + "epoch": 0.035319550589166644, + "grad_norm": 0.8561889637672415, + "learning_rate": 4.999706597326847e-06, + "loss": 0.6168, + "step": 580 + }, + { + "epoch": 0.03538044636604452, + "grad_norm": 0.8056900907454947, + "learning_rate": 4.999705373566297e-06, + "loss": 0.6431, + "step": 581 + }, + { + "epoch": 0.03544134214292239, + "grad_norm": 0.8910734324274614, + "learning_rate": 4.9997041472591e-06, + "loss": 0.6251, + "step": 582 + }, + { + "epoch": 0.03550223791980026, + "grad_norm": 0.9032061941764189, + "learning_rate": 4.999702918405256e-06, + "loss": 0.5924, + "step": 583 + }, + { + "epoch": 0.035563133696678136, + "grad_norm": 0.9841111970684098, + "learning_rate": 4.999701687004767e-06, + "loss": 0.549, + "step": 584 + }, + { + "epoch": 0.03562402947355601, + "grad_norm": 0.9286790743514601, + "learning_rate": 4.999700453057633e-06, + "loss": 0.5954, + "step": 585 + }, + { + "epoch": 0.03568492525043388, + "grad_norm": 0.8751425080153574, + "learning_rate": 4.999699216563857e-06, + "loss": 0.639, + "step": 586 + }, + { + "epoch": 0.035745821027311754, + "grad_norm": 0.993809385074006, + "learning_rate": 4.99969797752344e-06, + "loss": 0.6132, + "step": 587 + }, + { + "epoch": 0.03580671680418963, + "grad_norm": 0.912702769692447, + "learning_rate": 4.999696735936382e-06, + "loss": 0.583, + "step": 588 + }, + { + "epoch": 0.0358676125810675, + "grad_norm": 1.0278836228844093, + "learning_rate": 4.9996954918026844e-06, + "loss": 0.6257, + "step": 589 + }, + { + "epoch": 0.03592850835794538, + "grad_norm": 0.9310595252437611, + "learning_rate": 4.99969424512235e-06, + "loss": 0.6049, + "step": 590 + }, + { + "epoch": 0.03598940413482325, + "grad_norm": 1.0095404541613693, + "learning_rate": 4.999692995895378e-06, + "loss": 0.6355, + "step": 591 + }, + { + "epoch": 0.036050299911701125, + "grad_norm": 0.8645506411402605, + "learning_rate": 4.999691744121771e-06, + "loss": 0.6055, + "step": 592 + }, + { + "epoch": 0.036111195688579, + "grad_norm": 0.8764079291608898, + "learning_rate": 4.999690489801531e-06, + "loss": 0.5862, + "step": 593 + }, + { + "epoch": 0.03617209146545687, + "grad_norm": 0.9658373514336904, + "learning_rate": 4.999689232934657e-06, + "loss": 0.5565, + "step": 594 + }, + { + "epoch": 0.036232987242334744, + "grad_norm": 0.9244628996065855, + "learning_rate": 4.999687973521152e-06, + "loss": 0.6371, + "step": 595 + }, + { + "epoch": 0.03629388301921262, + "grad_norm": 0.8841726696823312, + "learning_rate": 4.999686711561017e-06, + "loss": 0.6546, + "step": 596 + }, + { + "epoch": 0.03635477879609049, + "grad_norm": 0.9090601064042594, + "learning_rate": 4.999685447054253e-06, + "loss": 0.6076, + "step": 597 + }, + { + "epoch": 0.03641567457296836, + "grad_norm": 0.9095515469713252, + "learning_rate": 4.999684180000862e-06, + "loss": 0.5946, + "step": 598 + }, + { + "epoch": 0.036476570349846235, + "grad_norm": 0.9787665184280723, + "learning_rate": 4.999682910400844e-06, + "loss": 0.6599, + "step": 599 + }, + { + "epoch": 0.036537466126724115, + "grad_norm": 0.9284584729010522, + "learning_rate": 4.999681638254202e-06, + "loss": 0.6131, + "step": 600 + }, + { + "epoch": 0.03659836190360199, + "grad_norm": 0.9454009442104677, + "learning_rate": 4.999680363560935e-06, + "loss": 0.675, + "step": 601 + }, + { + "epoch": 0.03665925768047986, + "grad_norm": 0.9077061559228002, + "learning_rate": 4.999679086321046e-06, + "loss": 0.6455, + "step": 602 + }, + { + "epoch": 0.03672015345735773, + "grad_norm": 0.8582415633151084, + "learning_rate": 4.999677806534536e-06, + "loss": 0.664, + "step": 603 + }, + { + "epoch": 0.036781049234235606, + "grad_norm": 0.8781249091544013, + "learning_rate": 4.999676524201406e-06, + "loss": 0.6031, + "step": 604 + }, + { + "epoch": 0.03684194501111348, + "grad_norm": 0.9170951091774144, + "learning_rate": 4.9996752393216585e-06, + "loss": 0.6073, + "step": 605 + }, + { + "epoch": 0.03690284078799135, + "grad_norm": 0.9041797233698343, + "learning_rate": 4.999673951895293e-06, + "loss": 0.6117, + "step": 606 + }, + { + "epoch": 0.036963736564869225, + "grad_norm": 0.882594077824798, + "learning_rate": 4.999672661922313e-06, + "loss": 0.5807, + "step": 607 + }, + { + "epoch": 0.0370246323417471, + "grad_norm": 0.9469328310521461, + "learning_rate": 4.999671369402716e-06, + "loss": 0.6157, + "step": 608 + }, + { + "epoch": 0.03708552811862497, + "grad_norm": 0.9452051310432236, + "learning_rate": 4.999670074336508e-06, + "loss": 0.6018, + "step": 609 + }, + { + "epoch": 0.03714642389550285, + "grad_norm": 0.9039953983026277, + "learning_rate": 4.999668776723687e-06, + "loss": 0.6013, + "step": 610 + }, + { + "epoch": 0.03720731967238072, + "grad_norm": 0.9252127217156216, + "learning_rate": 4.999667476564256e-06, + "loss": 0.6308, + "step": 611 + }, + { + "epoch": 0.037268215449258596, + "grad_norm": 0.864873936782976, + "learning_rate": 4.999666173858215e-06, + "loss": 0.6641, + "step": 612 + }, + { + "epoch": 0.03732911122613647, + "grad_norm": 0.9580950388042523, + "learning_rate": 4.999664868605567e-06, + "loss": 0.5854, + "step": 613 + }, + { + "epoch": 0.03739000700301434, + "grad_norm": 0.9256683906637035, + "learning_rate": 4.9996635608063115e-06, + "loss": 0.6399, + "step": 614 + }, + { + "epoch": 0.037450902779892215, + "grad_norm": 0.9016899595850172, + "learning_rate": 4.999662250460451e-06, + "loss": 0.6404, + "step": 615 + }, + { + "epoch": 0.03751179855677009, + "grad_norm": 0.9047804944091163, + "learning_rate": 4.999660937567987e-06, + "loss": 0.6561, + "step": 616 + }, + { + "epoch": 0.03757269433364796, + "grad_norm": 0.9325088912193812, + "learning_rate": 4.999659622128921e-06, + "loss": 0.6247, + "step": 617 + }, + { + "epoch": 0.03763359011052583, + "grad_norm": 0.9038121092027975, + "learning_rate": 4.999658304143252e-06, + "loss": 0.5715, + "step": 618 + }, + { + "epoch": 0.037694485887403706, + "grad_norm": 0.921784187090497, + "learning_rate": 4.9996569836109844e-06, + "loss": 0.5563, + "step": 619 + }, + { + "epoch": 0.03775538166428158, + "grad_norm": 0.8897618584779313, + "learning_rate": 4.999655660532118e-06, + "loss": 0.5698, + "step": 620 + }, + { + "epoch": 0.03781627744115946, + "grad_norm": 0.9198661021216509, + "learning_rate": 4.999654334906654e-06, + "loss": 0.6161, + "step": 621 + }, + { + "epoch": 0.03787717321803733, + "grad_norm": 0.9152990649347437, + "learning_rate": 4.999653006734594e-06, + "loss": 0.5984, + "step": 622 + }, + { + "epoch": 0.037938068994915204, + "grad_norm": 0.953253278403368, + "learning_rate": 4.99965167601594e-06, + "loss": 0.5599, + "step": 623 + }, + { + "epoch": 0.03799896477179308, + "grad_norm": 0.9791020021243315, + "learning_rate": 4.9996503427506925e-06, + "loss": 0.5426, + "step": 624 + }, + { + "epoch": 0.03805986054867095, + "grad_norm": 0.9883139154101797, + "learning_rate": 4.9996490069388535e-06, + "loss": 0.5856, + "step": 625 + }, + { + "epoch": 0.03812075632554882, + "grad_norm": 0.9910716964620262, + "learning_rate": 4.9996476685804235e-06, + "loss": 0.5973, + "step": 626 + }, + { + "epoch": 0.038181652102426696, + "grad_norm": 0.8817347575755441, + "learning_rate": 4.999646327675405e-06, + "loss": 0.5756, + "step": 627 + }, + { + "epoch": 0.03824254787930457, + "grad_norm": 0.9007983196151219, + "learning_rate": 4.999644984223798e-06, + "loss": 0.5948, + "step": 628 + }, + { + "epoch": 0.03830344365618244, + "grad_norm": 0.9433073365518962, + "learning_rate": 4.999643638225605e-06, + "loss": 0.644, + "step": 629 + }, + { + "epoch": 0.038364339433060314, + "grad_norm": 0.8448417977554687, + "learning_rate": 4.999642289680828e-06, + "loss": 0.5983, + "step": 630 + }, + { + "epoch": 0.038425235209938194, + "grad_norm": 1.0049028553947297, + "learning_rate": 4.999640938589466e-06, + "loss": 0.6161, + "step": 631 + }, + { + "epoch": 0.03848613098681607, + "grad_norm": 0.8934088358919284, + "learning_rate": 4.999639584951524e-06, + "loss": 0.6236, + "step": 632 + }, + { + "epoch": 0.03854702676369394, + "grad_norm": 0.9209963584503512, + "learning_rate": 4.999638228766999e-06, + "loss": 0.6077, + "step": 633 + }, + { + "epoch": 0.03860792254057181, + "grad_norm": 0.8744335984171467, + "learning_rate": 4.999636870035894e-06, + "loss": 0.5463, + "step": 634 + }, + { + "epoch": 0.038668818317449685, + "grad_norm": 0.9006860787013771, + "learning_rate": 4.999635508758213e-06, + "loss": 0.6572, + "step": 635 + }, + { + "epoch": 0.03872971409432756, + "grad_norm": 1.0048829027310382, + "learning_rate": 4.999634144933954e-06, + "loss": 0.6219, + "step": 636 + }, + { + "epoch": 0.03879060987120543, + "grad_norm": 0.960703868038752, + "learning_rate": 4.999632778563121e-06, + "loss": 0.6192, + "step": 637 + }, + { + "epoch": 0.038851505648083304, + "grad_norm": 0.8858878315830038, + "learning_rate": 4.999631409645712e-06, + "loss": 0.6386, + "step": 638 + }, + { + "epoch": 0.03891240142496118, + "grad_norm": 0.9100843045170944, + "learning_rate": 4.999630038181732e-06, + "loss": 0.6065, + "step": 639 + }, + { + "epoch": 0.03897329720183905, + "grad_norm": 0.9013908681215274, + "learning_rate": 4.999628664171181e-06, + "loss": 0.6028, + "step": 640 + }, + { + "epoch": 0.03903419297871693, + "grad_norm": 0.9344584537333636, + "learning_rate": 4.99962728761406e-06, + "loss": 0.553, + "step": 641 + }, + { + "epoch": 0.0390950887555948, + "grad_norm": 0.9112795402529298, + "learning_rate": 4.99962590851037e-06, + "loss": 0.5771, + "step": 642 + }, + { + "epoch": 0.039155984532472675, + "grad_norm": 0.9213674452736826, + "learning_rate": 4.999624526860114e-06, + "loss": 0.6231, + "step": 643 + }, + { + "epoch": 0.03921688030935055, + "grad_norm": 0.9540262167687221, + "learning_rate": 4.999623142663293e-06, + "loss": 0.5647, + "step": 644 + }, + { + "epoch": 0.03927777608622842, + "grad_norm": 0.9364337374882771, + "learning_rate": 4.999621755919906e-06, + "loss": 0.6524, + "step": 645 + }, + { + "epoch": 0.03933867186310629, + "grad_norm": 1.0274540312449252, + "learning_rate": 4.999620366629958e-06, + "loss": 0.6092, + "step": 646 + }, + { + "epoch": 0.039399567639984166, + "grad_norm": 0.9142972916088651, + "learning_rate": 4.999618974793448e-06, + "loss": 0.6066, + "step": 647 + }, + { + "epoch": 0.03946046341686204, + "grad_norm": 0.830492781702811, + "learning_rate": 4.9996175804103775e-06, + "loss": 0.6173, + "step": 648 + }, + { + "epoch": 0.03952135919373991, + "grad_norm": 0.9094778739119247, + "learning_rate": 4.99961618348075e-06, + "loss": 0.6474, + "step": 649 + }, + { + "epoch": 0.039582254970617785, + "grad_norm": 0.9674054701354592, + "learning_rate": 4.9996147840045645e-06, + "loss": 0.6655, + "step": 650 + }, + { + "epoch": 0.039643150747495665, + "grad_norm": 0.8876152331177954, + "learning_rate": 4.9996133819818235e-06, + "loss": 0.6642, + "step": 651 + }, + { + "epoch": 0.03970404652437354, + "grad_norm": 0.9971611987194013, + "learning_rate": 4.999611977412529e-06, + "loss": 0.5769, + "step": 652 + }, + { + "epoch": 0.03976494230125141, + "grad_norm": 0.9597730520114094, + "learning_rate": 4.999610570296681e-06, + "loss": 0.5856, + "step": 653 + }, + { + "epoch": 0.03982583807812928, + "grad_norm": 0.8636268067314244, + "learning_rate": 4.9996091606342826e-06, + "loss": 0.6241, + "step": 654 + }, + { + "epoch": 0.039886733855007156, + "grad_norm": 0.9005671874770992, + "learning_rate": 4.9996077484253335e-06, + "loss": 0.6216, + "step": 655 + }, + { + "epoch": 0.03994762963188503, + "grad_norm": 0.9077103925356858, + "learning_rate": 4.9996063336698366e-06, + "loss": 0.6289, + "step": 656 + }, + { + "epoch": 0.0400085254087629, + "grad_norm": 0.9113273615026914, + "learning_rate": 4.999604916367793e-06, + "loss": 0.5919, + "step": 657 + }, + { + "epoch": 0.040069421185640774, + "grad_norm": 0.8262017117520554, + "learning_rate": 4.999603496519203e-06, + "loss": 0.6064, + "step": 658 + }, + { + "epoch": 0.04013031696251865, + "grad_norm": 0.9694948951627942, + "learning_rate": 4.999602074124069e-06, + "loss": 0.6463, + "step": 659 + }, + { + "epoch": 0.04019121273939652, + "grad_norm": 0.8392799413068273, + "learning_rate": 4.999600649182393e-06, + "loss": 0.668, + "step": 660 + }, + { + "epoch": 0.04025210851627439, + "grad_norm": 0.957416939579636, + "learning_rate": 4.999599221694176e-06, + "loss": 0.5946, + "step": 661 + }, + { + "epoch": 0.04031300429315227, + "grad_norm": 1.0548134373064368, + "learning_rate": 4.999597791659418e-06, + "loss": 0.6535, + "step": 662 + }, + { + "epoch": 0.040373900070030146, + "grad_norm": 0.9436762516635531, + "learning_rate": 4.999596359078123e-06, + "loss": 0.5918, + "step": 663 + }, + { + "epoch": 0.04043479584690802, + "grad_norm": 0.9425440991316258, + "learning_rate": 4.99959492395029e-06, + "loss": 0.5987, + "step": 664 + }, + { + "epoch": 0.04049569162378589, + "grad_norm": 0.9491197095871141, + "learning_rate": 4.999593486275923e-06, + "loss": 0.6337, + "step": 665 + }, + { + "epoch": 0.040556587400663764, + "grad_norm": 0.9038244821459279, + "learning_rate": 4.999592046055022e-06, + "loss": 0.6228, + "step": 666 + }, + { + "epoch": 0.04061748317754164, + "grad_norm": 0.8932053840474375, + "learning_rate": 4.9995906032875874e-06, + "loss": 0.6197, + "step": 667 + }, + { + "epoch": 0.04067837895441951, + "grad_norm": 0.914825936881544, + "learning_rate": 4.999589157973623e-06, + "loss": 0.5954, + "step": 668 + }, + { + "epoch": 0.04073927473129738, + "grad_norm": 0.9046013277943018, + "learning_rate": 4.999587710113128e-06, + "loss": 0.6258, + "step": 669 + }, + { + "epoch": 0.040800170508175256, + "grad_norm": 0.8894805148565166, + "learning_rate": 4.999586259706106e-06, + "loss": 0.5538, + "step": 670 + }, + { + "epoch": 0.04086106628505313, + "grad_norm": 0.986441106154532, + "learning_rate": 4.999584806752558e-06, + "loss": 0.5559, + "step": 671 + }, + { + "epoch": 0.04092196206193101, + "grad_norm": 0.9229374952204834, + "learning_rate": 4.999583351252484e-06, + "loss": 0.6485, + "step": 672 + }, + { + "epoch": 0.04098285783880888, + "grad_norm": 0.9453155717116566, + "learning_rate": 4.9995818932058856e-06, + "loss": 0.6161, + "step": 673 + }, + { + "epoch": 0.041043753615686754, + "grad_norm": 0.9346829885044637, + "learning_rate": 4.999580432612767e-06, + "loss": 0.5639, + "step": 674 + }, + { + "epoch": 0.04110464939256463, + "grad_norm": 0.9591041997176056, + "learning_rate": 4.999578969473127e-06, + "loss": 0.5394, + "step": 675 + }, + { + "epoch": 0.0411655451694425, + "grad_norm": 1.0664029652476756, + "learning_rate": 4.999577503786968e-06, + "loss": 0.6045, + "step": 676 + }, + { + "epoch": 0.04122644094632037, + "grad_norm": 0.9523609107075215, + "learning_rate": 4.999576035554291e-06, + "loss": 0.6153, + "step": 677 + }, + { + "epoch": 0.041287336723198245, + "grad_norm": 0.9905026149765738, + "learning_rate": 4.999574564775098e-06, + "loss": 0.6167, + "step": 678 + }, + { + "epoch": 0.04134823250007612, + "grad_norm": 0.9037248990876888, + "learning_rate": 4.999573091449391e-06, + "loss": 0.5963, + "step": 679 + }, + { + "epoch": 0.04140912827695399, + "grad_norm": 1.07494335887253, + "learning_rate": 4.99957161557717e-06, + "loss": 0.5675, + "step": 680 + }, + { + "epoch": 0.041470024053831864, + "grad_norm": 0.9441389547214672, + "learning_rate": 4.999570137158438e-06, + "loss": 0.6014, + "step": 681 + }, + { + "epoch": 0.041530919830709744, + "grad_norm": 0.9694120046656267, + "learning_rate": 4.999568656193196e-06, + "loss": 0.647, + "step": 682 + }, + { + "epoch": 0.041591815607587616, + "grad_norm": 0.9803819813472991, + "learning_rate": 4.999567172681445e-06, + "loss": 0.583, + "step": 683 + }, + { + "epoch": 0.04165271138446549, + "grad_norm": 0.886694519787291, + "learning_rate": 4.999565686623188e-06, + "loss": 0.5928, + "step": 684 + }, + { + "epoch": 0.04171360716134336, + "grad_norm": 0.9447995519440603, + "learning_rate": 4.999564198018424e-06, + "loss": 0.5411, + "step": 685 + }, + { + "epoch": 0.041774502938221235, + "grad_norm": 0.930279251792346, + "learning_rate": 4.999562706867157e-06, + "loss": 0.5998, + "step": 686 + }, + { + "epoch": 0.04183539871509911, + "grad_norm": 0.9479938777674947, + "learning_rate": 4.999561213169387e-06, + "loss": 0.5381, + "step": 687 + }, + { + "epoch": 0.04189629449197698, + "grad_norm": 0.9561622308464508, + "learning_rate": 4.999559716925116e-06, + "loss": 0.6587, + "step": 688 + }, + { + "epoch": 0.04195719026885485, + "grad_norm": 0.9669579508253766, + "learning_rate": 4.999558218134346e-06, + "loss": 0.6025, + "step": 689 + }, + { + "epoch": 0.042018086045732726, + "grad_norm": 0.8637025172586335, + "learning_rate": 4.999556716797077e-06, + "loss": 0.633, + "step": 690 + }, + { + "epoch": 0.0420789818226106, + "grad_norm": 0.9384106325659761, + "learning_rate": 4.9995552129133125e-06, + "loss": 0.6035, + "step": 691 + }, + { + "epoch": 0.04213987759948848, + "grad_norm": 0.9128893538889681, + "learning_rate": 4.999553706483053e-06, + "loss": 0.5676, + "step": 692 + }, + { + "epoch": 0.04220077337636635, + "grad_norm": 0.9462418677363831, + "learning_rate": 4.9995521975063e-06, + "loss": 0.6357, + "step": 693 + }, + { + "epoch": 0.042261669153244225, + "grad_norm": 0.8920234034234613, + "learning_rate": 4.999550685983055e-06, + "loss": 0.6374, + "step": 694 + }, + { + "epoch": 0.0423225649301221, + "grad_norm": 0.9978034806481134, + "learning_rate": 4.99954917191332e-06, + "loss": 0.555, + "step": 695 + }, + { + "epoch": 0.04238346070699997, + "grad_norm": 0.8876790738434981, + "learning_rate": 4.999547655297096e-06, + "loss": 0.631, + "step": 696 + }, + { + "epoch": 0.04244435648387784, + "grad_norm": 0.921682239980844, + "learning_rate": 4.999546136134385e-06, + "loss": 0.6452, + "step": 697 + }, + { + "epoch": 0.042505252260755716, + "grad_norm": 0.9255251442065211, + "learning_rate": 4.999544614425188e-06, + "loss": 0.6183, + "step": 698 + }, + { + "epoch": 0.04256614803763359, + "grad_norm": 0.9059807718047895, + "learning_rate": 4.999543090169508e-06, + "loss": 0.6693, + "step": 699 + }, + { + "epoch": 0.04262704381451146, + "grad_norm": 0.8948268457624917, + "learning_rate": 4.999541563367344e-06, + "loss": 0.5963, + "step": 700 + }, + { + "epoch": 0.042687939591389334, + "grad_norm": 0.8659892660880926, + "learning_rate": 4.9995400340187e-06, + "loss": 0.6506, + "step": 701 + }, + { + "epoch": 0.04274883536826721, + "grad_norm": 1.0102530844779543, + "learning_rate": 4.9995385021235765e-06, + "loss": 0.6023, + "step": 702 + }, + { + "epoch": 0.04280973114514509, + "grad_norm": 0.9196330495222329, + "learning_rate": 4.999536967681975e-06, + "loss": 0.6814, + "step": 703 + }, + { + "epoch": 0.04287062692202296, + "grad_norm": 1.003902450643636, + "learning_rate": 4.999535430693897e-06, + "loss": 0.5123, + "step": 704 + }, + { + "epoch": 0.04293152269890083, + "grad_norm": 0.9996990087861035, + "learning_rate": 4.999533891159345e-06, + "loss": 0.6605, + "step": 705 + }, + { + "epoch": 0.042992418475778706, + "grad_norm": 0.9013058004912472, + "learning_rate": 4.999532349078319e-06, + "loss": 0.6083, + "step": 706 + }, + { + "epoch": 0.04305331425265658, + "grad_norm": 0.9187164104203875, + "learning_rate": 4.999530804450822e-06, + "loss": 0.5792, + "step": 707 + }, + { + "epoch": 0.04311421002953445, + "grad_norm": 0.869619877511593, + "learning_rate": 4.999529257276854e-06, + "loss": 0.5762, + "step": 708 + }, + { + "epoch": 0.043175105806412324, + "grad_norm": 1.0165200779387324, + "learning_rate": 4.9995277075564185e-06, + "loss": 0.6096, + "step": 709 + }, + { + "epoch": 0.0432360015832902, + "grad_norm": 0.9419292870964436, + "learning_rate": 4.999526155289516e-06, + "loss": 0.6503, + "step": 710 + }, + { + "epoch": 0.04329689736016807, + "grad_norm": 0.8973690833937918, + "learning_rate": 4.999524600476148e-06, + "loss": 0.5583, + "step": 711 + }, + { + "epoch": 0.04335779313704594, + "grad_norm": 0.9644238717075332, + "learning_rate": 4.999523043116316e-06, + "loss": 0.6225, + "step": 712 + }, + { + "epoch": 0.04341868891392382, + "grad_norm": 1.0161142029877794, + "learning_rate": 4.999521483210023e-06, + "loss": 0.6323, + "step": 713 + }, + { + "epoch": 0.043479584690801695, + "grad_norm": 0.923608328470222, + "learning_rate": 4.999519920757269e-06, + "loss": 0.6008, + "step": 714 + }, + { + "epoch": 0.04354048046767957, + "grad_norm": 0.9724553920360316, + "learning_rate": 4.9995183557580555e-06, + "loss": 0.7392, + "step": 715 + }, + { + "epoch": 0.04360137624455744, + "grad_norm": 0.9154211378171839, + "learning_rate": 4.9995167882123855e-06, + "loss": 0.5985, + "step": 716 + }, + { + "epoch": 0.043662272021435314, + "grad_norm": 0.9324359341923898, + "learning_rate": 4.9995152181202584e-06, + "loss": 0.6001, + "step": 717 + }, + { + "epoch": 0.04372316779831319, + "grad_norm": 1.0140034940689702, + "learning_rate": 4.999513645481678e-06, + "loss": 0.5939, + "step": 718 + }, + { + "epoch": 0.04378406357519106, + "grad_norm": 0.9265048799590763, + "learning_rate": 4.999512070296646e-06, + "loss": 0.6576, + "step": 719 + }, + { + "epoch": 0.04384495935206893, + "grad_norm": 0.9640522833241182, + "learning_rate": 4.999510492565161e-06, + "loss": 0.6182, + "step": 720 + }, + { + "epoch": 0.043905855128946805, + "grad_norm": 0.9486803523455828, + "learning_rate": 4.999508912287228e-06, + "loss": 0.5919, + "step": 721 + }, + { + "epoch": 0.04396675090582468, + "grad_norm": 0.938953694254195, + "learning_rate": 4.999507329462848e-06, + "loss": 0.6497, + "step": 722 + }, + { + "epoch": 0.04402764668270256, + "grad_norm": 1.0242143796077097, + "learning_rate": 4.999505744092021e-06, + "loss": 0.6008, + "step": 723 + }, + { + "epoch": 0.04408854245958043, + "grad_norm": 0.9256790563970995, + "learning_rate": 4.999504156174749e-06, + "loss": 0.6146, + "step": 724 + }, + { + "epoch": 0.044149438236458304, + "grad_norm": 1.0143896284552028, + "learning_rate": 4.999502565711035e-06, + "loss": 0.6521, + "step": 725 + }, + { + "epoch": 0.044210334013336176, + "grad_norm": 0.8929719792909309, + "learning_rate": 4.999500972700879e-06, + "loss": 0.617, + "step": 726 + }, + { + "epoch": 0.04427122979021405, + "grad_norm": 1.0005012415238297, + "learning_rate": 4.999499377144284e-06, + "loss": 0.6577, + "step": 727 + }, + { + "epoch": 0.04433212556709192, + "grad_norm": 1.0311959592900908, + "learning_rate": 4.999497779041251e-06, + "loss": 0.5765, + "step": 728 + }, + { + "epoch": 0.044393021343969795, + "grad_norm": 0.8731169576004627, + "learning_rate": 4.999496178391782e-06, + "loss": 0.5969, + "step": 729 + }, + { + "epoch": 0.04445391712084767, + "grad_norm": 0.8646869955184578, + "learning_rate": 4.999494575195878e-06, + "loss": 0.62, + "step": 730 + }, + { + "epoch": 0.04451481289772554, + "grad_norm": 0.9426373451556163, + "learning_rate": 4.99949296945354e-06, + "loss": 0.6255, + "step": 731 + }, + { + "epoch": 0.04457570867460341, + "grad_norm": 0.959868185241791, + "learning_rate": 4.999491361164771e-06, + "loss": 0.5724, + "step": 732 + }, + { + "epoch": 0.04463660445148129, + "grad_norm": 0.8924901219220744, + "learning_rate": 4.9994897503295725e-06, + "loss": 0.6726, + "step": 733 + }, + { + "epoch": 0.044697500228359166, + "grad_norm": 0.9677539445708461, + "learning_rate": 4.999488136947945e-06, + "loss": 0.5682, + "step": 734 + }, + { + "epoch": 0.04475839600523704, + "grad_norm": 0.9940509207431923, + "learning_rate": 4.999486521019892e-06, + "loss": 0.5459, + "step": 735 + }, + { + "epoch": 0.04481929178211491, + "grad_norm": 0.9577280424090192, + "learning_rate": 4.999484902545414e-06, + "loss": 0.6042, + "step": 736 + }, + { + "epoch": 0.044880187558992785, + "grad_norm": 0.9384139213964082, + "learning_rate": 4.999483281524512e-06, + "loss": 0.548, + "step": 737 + }, + { + "epoch": 0.04494108333587066, + "grad_norm": 0.9193230592459444, + "learning_rate": 4.99948165795719e-06, + "loss": 0.5731, + "step": 738 + }, + { + "epoch": 0.04500197911274853, + "grad_norm": 0.9887821157606658, + "learning_rate": 4.999480031843445e-06, + "loss": 0.5933, + "step": 739 + }, + { + "epoch": 0.0450628748896264, + "grad_norm": 0.9403209417411679, + "learning_rate": 4.999478403183284e-06, + "loss": 0.6465, + "step": 740 + }, + { + "epoch": 0.045123770666504276, + "grad_norm": 0.9196633532170205, + "learning_rate": 4.999476771976706e-06, + "loss": 0.5771, + "step": 741 + }, + { + "epoch": 0.04518466644338215, + "grad_norm": 0.8865552161989972, + "learning_rate": 4.999475138223714e-06, + "loss": 0.5807, + "step": 742 + }, + { + "epoch": 0.04524556222026002, + "grad_norm": 0.9165595759481814, + "learning_rate": 4.9994735019243066e-06, + "loss": 0.6349, + "step": 743 + }, + { + "epoch": 0.0453064579971379, + "grad_norm": 0.9631010238514078, + "learning_rate": 4.999471863078489e-06, + "loss": 0.5652, + "step": 744 + }, + { + "epoch": 0.045367353774015774, + "grad_norm": 0.9887085061737799, + "learning_rate": 4.9994702216862615e-06, + "loss": 0.647, + "step": 745 + }, + { + "epoch": 0.04542824955089365, + "grad_norm": 0.9173855291252766, + "learning_rate": 4.999468577747625e-06, + "loss": 0.634, + "step": 746 + }, + { + "epoch": 0.04548914532777152, + "grad_norm": 0.9349271936047793, + "learning_rate": 4.9994669312625825e-06, + "loss": 0.6375, + "step": 747 + }, + { + "epoch": 0.04555004110464939, + "grad_norm": 0.9976711644210832, + "learning_rate": 4.999465282231134e-06, + "loss": 0.5569, + "step": 748 + }, + { + "epoch": 0.045610936881527266, + "grad_norm": 0.9516578277755166, + "learning_rate": 4.999463630653283e-06, + "loss": 0.5967, + "step": 749 + }, + { + "epoch": 0.04567183265840514, + "grad_norm": 0.9562888046100467, + "learning_rate": 4.99946197652903e-06, + "loss": 0.6369, + "step": 750 + }, + { + "epoch": 0.04573272843528301, + "grad_norm": 0.8918414433334476, + "learning_rate": 4.999460319858378e-06, + "loss": 0.5717, + "step": 751 + }, + { + "epoch": 0.045793624212160884, + "grad_norm": 0.8705508655665568, + "learning_rate": 4.999458660641327e-06, + "loss": 0.6079, + "step": 752 + }, + { + "epoch": 0.04585451998903876, + "grad_norm": 0.8929097034883898, + "learning_rate": 4.99945699887788e-06, + "loss": 0.5993, + "step": 753 + }, + { + "epoch": 0.04591541576591664, + "grad_norm": 0.9270321038640618, + "learning_rate": 4.999455334568037e-06, + "loss": 0.554, + "step": 754 + }, + { + "epoch": 0.04597631154279451, + "grad_norm": 0.9538659674683392, + "learning_rate": 4.999453667711802e-06, + "loss": 0.5975, + "step": 755 + }, + { + "epoch": 0.04603720731967238, + "grad_norm": 1.0527984843916622, + "learning_rate": 4.999451998309176e-06, + "loss": 0.6631, + "step": 756 + }, + { + "epoch": 0.046098103096550255, + "grad_norm": 0.8932276761523216, + "learning_rate": 4.999450326360159e-06, + "loss": 0.6405, + "step": 757 + }, + { + "epoch": 0.04615899887342813, + "grad_norm": 0.929582613448443, + "learning_rate": 4.999448651864754e-06, + "loss": 0.6442, + "step": 758 + }, + { + "epoch": 0.046219894650306, + "grad_norm": 0.98020531604507, + "learning_rate": 4.999446974822964e-06, + "loss": 0.582, + "step": 759 + }, + { + "epoch": 0.046280790427183874, + "grad_norm": 0.9329649880851422, + "learning_rate": 4.999445295234789e-06, + "loss": 0.5977, + "step": 760 + }, + { + "epoch": 0.04634168620406175, + "grad_norm": 0.9443838505098391, + "learning_rate": 4.999443613100231e-06, + "loss": 0.6293, + "step": 761 + }, + { + "epoch": 0.04640258198093962, + "grad_norm": 1.0175858888368574, + "learning_rate": 4.999441928419291e-06, + "loss": 0.5832, + "step": 762 + }, + { + "epoch": 0.04646347775781749, + "grad_norm": 1.0137322426102544, + "learning_rate": 4.999440241191972e-06, + "loss": 0.5846, + "step": 763 + }, + { + "epoch": 0.04652437353469537, + "grad_norm": 0.86840977946459, + "learning_rate": 4.999438551418276e-06, + "loss": 0.6258, + "step": 764 + }, + { + "epoch": 0.046585269311573245, + "grad_norm": 0.9206770049578523, + "learning_rate": 4.999436859098202e-06, + "loss": 0.5963, + "step": 765 + }, + { + "epoch": 0.04664616508845112, + "grad_norm": 1.0060434258546966, + "learning_rate": 4.999435164231756e-06, + "loss": 0.5695, + "step": 766 + }, + { + "epoch": 0.04670706086532899, + "grad_norm": 0.9719706880019677, + "learning_rate": 4.999433466818936e-06, + "loss": 0.5677, + "step": 767 + }, + { + "epoch": 0.046767956642206863, + "grad_norm": 1.025244253285763, + "learning_rate": 4.999431766859746e-06, + "loss": 0.5873, + "step": 768 + }, + { + "epoch": 0.046828852419084736, + "grad_norm": 0.917547173173034, + "learning_rate": 4.999430064354186e-06, + "loss": 0.5823, + "step": 769 + }, + { + "epoch": 0.04688974819596261, + "grad_norm": 0.986122664562976, + "learning_rate": 4.9994283593022595e-06, + "loss": 0.5831, + "step": 770 + }, + { + "epoch": 0.04695064397284048, + "grad_norm": 0.9848048477602993, + "learning_rate": 4.999426651703967e-06, + "loss": 0.5854, + "step": 771 + }, + { + "epoch": 0.047011539749718355, + "grad_norm": 0.9432499839603535, + "learning_rate": 4.99942494155931e-06, + "loss": 0.6311, + "step": 772 + }, + { + "epoch": 0.04707243552659623, + "grad_norm": 0.9778250298213461, + "learning_rate": 4.999423228868292e-06, + "loss": 0.5673, + "step": 773 + }, + { + "epoch": 0.04713333130347411, + "grad_norm": 1.0060280814458449, + "learning_rate": 4.999421513630912e-06, + "loss": 0.6342, + "step": 774 + }, + { + "epoch": 0.04719422708035198, + "grad_norm": 0.9785373952591474, + "learning_rate": 4.999419795847174e-06, + "loss": 0.5378, + "step": 775 + }, + { + "epoch": 0.04725512285722985, + "grad_norm": 0.9845759993206283, + "learning_rate": 4.999418075517079e-06, + "loss": 0.5751, + "step": 776 + }, + { + "epoch": 0.047316018634107726, + "grad_norm": 0.9959300781454401, + "learning_rate": 4.9994163526406305e-06, + "loss": 0.6859, + "step": 777 + }, + { + "epoch": 0.0473769144109856, + "grad_norm": 0.9582242353296838, + "learning_rate": 4.999414627217827e-06, + "loss": 0.6106, + "step": 778 + }, + { + "epoch": 0.04743781018786347, + "grad_norm": 0.9365561149969012, + "learning_rate": 4.999412899248672e-06, + "loss": 0.5972, + "step": 779 + }, + { + "epoch": 0.047498705964741345, + "grad_norm": 1.0014248409600786, + "learning_rate": 4.999411168733167e-06, + "loss": 0.6276, + "step": 780 + }, + { + "epoch": 0.04755960174161922, + "grad_norm": 1.0160758579230542, + "learning_rate": 4.999409435671314e-06, + "loss": 0.5586, + "step": 781 + }, + { + "epoch": 0.04762049751849709, + "grad_norm": 0.9753210214773294, + "learning_rate": 4.9994077000631145e-06, + "loss": 0.6149, + "step": 782 + }, + { + "epoch": 0.04768139329537496, + "grad_norm": 0.9070771520787325, + "learning_rate": 4.999405961908571e-06, + "loss": 0.5732, + "step": 783 + }, + { + "epoch": 0.047742289072252836, + "grad_norm": 0.931001782111828, + "learning_rate": 4.999404221207684e-06, + "loss": 0.6448, + "step": 784 + }, + { + "epoch": 0.047803184849130716, + "grad_norm": 0.9226829783856478, + "learning_rate": 4.999402477960456e-06, + "loss": 0.5966, + "step": 785 + }, + { + "epoch": 0.04786408062600859, + "grad_norm": 0.9485489763058337, + "learning_rate": 4.999400732166889e-06, + "loss": 0.5325, + "step": 786 + }, + { + "epoch": 0.04792497640288646, + "grad_norm": 0.910428303127011, + "learning_rate": 4.999398983826984e-06, + "loss": 0.5413, + "step": 787 + }, + { + "epoch": 0.047985872179764334, + "grad_norm": 0.9378468717312077, + "learning_rate": 4.999397232940744e-06, + "loss": 0.5968, + "step": 788 + }, + { + "epoch": 0.04804676795664221, + "grad_norm": 0.8886185196144795, + "learning_rate": 4.99939547950817e-06, + "loss": 0.549, + "step": 789 + }, + { + "epoch": 0.04810766373352008, + "grad_norm": 1.024857859512105, + "learning_rate": 4.999393723529264e-06, + "loss": 0.6216, + "step": 790 + }, + { + "epoch": 0.04816855951039795, + "grad_norm": 0.9320595287923821, + "learning_rate": 4.999391965004027e-06, + "loss": 0.5877, + "step": 791 + }, + { + "epoch": 0.048229455287275826, + "grad_norm": 0.9985372307979925, + "learning_rate": 4.999390203932461e-06, + "loss": 0.6102, + "step": 792 + }, + { + "epoch": 0.0482903510641537, + "grad_norm": 1.0372495403321893, + "learning_rate": 4.999388440314569e-06, + "loss": 0.6414, + "step": 793 + }, + { + "epoch": 0.04835124684103157, + "grad_norm": 0.960207516496005, + "learning_rate": 4.999386674150351e-06, + "loss": 0.5857, + "step": 794 + }, + { + "epoch": 0.04841214261790945, + "grad_norm": 0.9615645096753326, + "learning_rate": 4.999384905439811e-06, + "loss": 0.6236, + "step": 795 + }, + { + "epoch": 0.048473038394787324, + "grad_norm": 0.9940042365119532, + "learning_rate": 4.999383134182951e-06, + "loss": 0.5982, + "step": 796 + }, + { + "epoch": 0.0485339341716652, + "grad_norm": 1.0251465671341073, + "learning_rate": 4.999381360379769e-06, + "loss": 0.603, + "step": 797 + }, + { + "epoch": 0.04859482994854307, + "grad_norm": 0.901480048262466, + "learning_rate": 4.999379584030269e-06, + "loss": 0.6122, + "step": 798 + }, + { + "epoch": 0.04865572572542094, + "grad_norm": 0.9519083293258097, + "learning_rate": 4.999377805134454e-06, + "loss": 0.6595, + "step": 799 + }, + { + "epoch": 0.048716621502298815, + "grad_norm": 0.9872030055075024, + "learning_rate": 4.999376023692326e-06, + "loss": 0.6186, + "step": 800 + }, + { + "epoch": 0.04877751727917669, + "grad_norm": 0.964092841760155, + "learning_rate": 4.999374239703884e-06, + "loss": 0.555, + "step": 801 + }, + { + "epoch": 0.04883841305605456, + "grad_norm": 0.9538961824793578, + "learning_rate": 4.999372453169132e-06, + "loss": 0.5997, + "step": 802 + }, + { + "epoch": 0.048899308832932434, + "grad_norm": 0.8858989606100357, + "learning_rate": 4.999370664088071e-06, + "loss": 0.6076, + "step": 803 + }, + { + "epoch": 0.04896020460981031, + "grad_norm": 0.9728672673987911, + "learning_rate": 4.999368872460704e-06, + "loss": 0.6043, + "step": 804 + }, + { + "epoch": 0.049021100386688186, + "grad_norm": 0.9598993376637882, + "learning_rate": 4.999367078287032e-06, + "loss": 0.5297, + "step": 805 + }, + { + "epoch": 0.04908199616356606, + "grad_norm": 0.9580739268832047, + "learning_rate": 4.999365281567056e-06, + "loss": 0.5061, + "step": 806 + }, + { + "epoch": 0.04914289194044393, + "grad_norm": 1.010286124092416, + "learning_rate": 4.999363482300778e-06, + "loss": 0.5775, + "step": 807 + }, + { + "epoch": 0.049203787717321805, + "grad_norm": 1.0066947030690239, + "learning_rate": 4.999361680488202e-06, + "loss": 0.5813, + "step": 808 + }, + { + "epoch": 0.04926468349419968, + "grad_norm": 1.104535419206599, + "learning_rate": 4.999359876129327e-06, + "loss": 0.505, + "step": 809 + }, + { + "epoch": 0.04932557927107755, + "grad_norm": 0.9292068801621749, + "learning_rate": 4.999358069224157e-06, + "loss": 0.5509, + "step": 810 + }, + { + "epoch": 0.049386475047955423, + "grad_norm": 0.913664262036396, + "learning_rate": 4.999356259772693e-06, + "loss": 0.5474, + "step": 811 + }, + { + "epoch": 0.049447370824833296, + "grad_norm": 0.9686742961110151, + "learning_rate": 4.9993544477749364e-06, + "loss": 0.5781, + "step": 812 + }, + { + "epoch": 0.04950826660171117, + "grad_norm": 0.9852725392481547, + "learning_rate": 4.99935263323089e-06, + "loss": 0.6359, + "step": 813 + }, + { + "epoch": 0.04956916237858904, + "grad_norm": 1.0066939369739636, + "learning_rate": 4.999350816140554e-06, + "loss": 0.53, + "step": 814 + }, + { + "epoch": 0.04963005815546692, + "grad_norm": 1.0096493534567426, + "learning_rate": 4.999348996503932e-06, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.049690953932344795, + "grad_norm": 0.8903071596326964, + "learning_rate": 4.9993471743210254e-06, + "loss": 0.6134, + "step": 816 + }, + { + "epoch": 0.04975184970922267, + "grad_norm": 0.9692841576790168, + "learning_rate": 4.999345349591835e-06, + "loss": 0.5866, + "step": 817 + }, + { + "epoch": 0.04981274548610054, + "grad_norm": 0.9785250226048439, + "learning_rate": 4.999343522316364e-06, + "loss": 0.5418, + "step": 818 + }, + { + "epoch": 0.04987364126297841, + "grad_norm": 0.9462277946054182, + "learning_rate": 4.9993416924946145e-06, + "loss": 0.6104, + "step": 819 + }, + { + "epoch": 0.049934537039856286, + "grad_norm": 0.9377600211916157, + "learning_rate": 4.999339860126587e-06, + "loss": 0.6118, + "step": 820 + }, + { + "epoch": 0.04999543281673416, + "grad_norm": 0.9250218537733235, + "learning_rate": 4.999338025212284e-06, + "loss": 0.6629, + "step": 821 + }, + { + "epoch": 0.05005632859361203, + "grad_norm": 0.9292312470670782, + "learning_rate": 4.999336187751708e-06, + "loss": 0.6509, + "step": 822 + }, + { + "epoch": 0.050117224370489905, + "grad_norm": 0.9341040472351617, + "learning_rate": 4.9993343477448595e-06, + "loss": 0.5608, + "step": 823 + }, + { + "epoch": 0.05017812014736778, + "grad_norm": 0.9735742904458778, + "learning_rate": 4.999332505191741e-06, + "loss": 0.5559, + "step": 824 + }, + { + "epoch": 0.05023901592424565, + "grad_norm": 1.0373772947261266, + "learning_rate": 4.999330660092355e-06, + "loss": 0.6371, + "step": 825 + }, + { + "epoch": 0.05029991170112353, + "grad_norm": 1.018542355597574, + "learning_rate": 4.999328812446704e-06, + "loss": 0.6169, + "step": 826 + }, + { + "epoch": 0.0503608074780014, + "grad_norm": 1.014960941387071, + "learning_rate": 4.9993269622547866e-06, + "loss": 0.5525, + "step": 827 + }, + { + "epoch": 0.050421703254879276, + "grad_norm": 1.04122706799669, + "learning_rate": 4.999325109516607e-06, + "loss": 0.5553, + "step": 828 + }, + { + "epoch": 0.05048259903175715, + "grad_norm": 0.9918292544711612, + "learning_rate": 4.999323254232169e-06, + "loss": 0.5774, + "step": 829 + }, + { + "epoch": 0.05054349480863502, + "grad_norm": 0.9107401710702069, + "learning_rate": 4.999321396401471e-06, + "loss": 0.5996, + "step": 830 + }, + { + "epoch": 0.050604390585512894, + "grad_norm": 0.933737560960647, + "learning_rate": 4.999319536024516e-06, + "loss": 0.5894, + "step": 831 + }, + { + "epoch": 0.05066528636239077, + "grad_norm": 0.9521402646326996, + "learning_rate": 4.9993176731013075e-06, + "loss": 0.54, + "step": 832 + }, + { + "epoch": 0.05072618213926864, + "grad_norm": 0.9278641037840688, + "learning_rate": 4.999315807631845e-06, + "loss": 0.5682, + "step": 833 + }, + { + "epoch": 0.05078707791614651, + "grad_norm": 0.8731156499825803, + "learning_rate": 4.999313939616131e-06, + "loss": 0.6241, + "step": 834 + }, + { + "epoch": 0.050847973693024386, + "grad_norm": 0.8581551839165459, + "learning_rate": 4.999312069054169e-06, + "loss": 0.6343, + "step": 835 + }, + { + "epoch": 0.050908869469902265, + "grad_norm": 0.9763777978879581, + "learning_rate": 4.99931019594596e-06, + "loss": 0.5899, + "step": 836 + }, + { + "epoch": 0.05096976524678014, + "grad_norm": 1.0378729561443174, + "learning_rate": 4.999308320291505e-06, + "loss": 0.6062, + "step": 837 + }, + { + "epoch": 0.05103066102365801, + "grad_norm": 0.9900485277881569, + "learning_rate": 4.999306442090807e-06, + "loss": 0.6356, + "step": 838 + }, + { + "epoch": 0.051091556800535884, + "grad_norm": 0.9531887085745027, + "learning_rate": 4.999304561343867e-06, + "loss": 0.6107, + "step": 839 + }, + { + "epoch": 0.05115245257741376, + "grad_norm": 0.9258363215493612, + "learning_rate": 4.9993026780506885e-06, + "loss": 0.5974, + "step": 840 + }, + { + "epoch": 0.05121334835429163, + "grad_norm": 0.9540190883099346, + "learning_rate": 4.999300792211272e-06, + "loss": 0.6013, + "step": 841 + }, + { + "epoch": 0.0512742441311695, + "grad_norm": 0.902786019858358, + "learning_rate": 4.99929890382562e-06, + "loss": 0.5832, + "step": 842 + }, + { + "epoch": 0.051335139908047375, + "grad_norm": 0.9826552125824675, + "learning_rate": 4.999297012893734e-06, + "loss": 0.5643, + "step": 843 + }, + { + "epoch": 0.05139603568492525, + "grad_norm": 0.9767142580348566, + "learning_rate": 4.999295119415616e-06, + "loss": 0.5667, + "step": 844 + }, + { + "epoch": 0.05145693146180312, + "grad_norm": 0.914766017685997, + "learning_rate": 4.999293223391268e-06, + "loss": 0.5958, + "step": 845 + }, + { + "epoch": 0.051517827238681, + "grad_norm": 1.0138141246731234, + "learning_rate": 4.999291324820692e-06, + "loss": 0.6552, + "step": 846 + }, + { + "epoch": 0.051578723015558874, + "grad_norm": 0.9395028794347936, + "learning_rate": 4.99928942370389e-06, + "loss": 0.6078, + "step": 847 + }, + { + "epoch": 0.051639618792436746, + "grad_norm": 0.9645746957456716, + "learning_rate": 4.9992875200408644e-06, + "loss": 0.5825, + "step": 848 + }, + { + "epoch": 0.05170051456931462, + "grad_norm": 0.9459173703794295, + "learning_rate": 4.999285613831616e-06, + "loss": 0.5547, + "step": 849 + }, + { + "epoch": 0.05176141034619249, + "grad_norm": 0.988055949633709, + "learning_rate": 4.999283705076148e-06, + "loss": 0.554, + "step": 850 + }, + { + "epoch": 0.051822306123070365, + "grad_norm": 0.9166868143248652, + "learning_rate": 4.999281793774461e-06, + "loss": 0.5712, + "step": 851 + }, + { + "epoch": 0.05188320189994824, + "grad_norm": 0.8833575473460488, + "learning_rate": 4.9992798799265584e-06, + "loss": 0.5833, + "step": 852 + }, + { + "epoch": 0.05194409767682611, + "grad_norm": 0.995540197880221, + "learning_rate": 4.999277963532441e-06, + "loss": 0.5498, + "step": 853 + }, + { + "epoch": 0.05200499345370398, + "grad_norm": 0.9189699146347367, + "learning_rate": 4.999276044592111e-06, + "loss": 0.613, + "step": 854 + }, + { + "epoch": 0.052065889230581856, + "grad_norm": 0.944068537992771, + "learning_rate": 4.999274123105571e-06, + "loss": 0.5806, + "step": 855 + }, + { + "epoch": 0.052126785007459736, + "grad_norm": 0.9907420617496323, + "learning_rate": 4.999272199072822e-06, + "loss": 0.6301, + "step": 856 + }, + { + "epoch": 0.05218768078433761, + "grad_norm": 1.0487930927001048, + "learning_rate": 4.999270272493867e-06, + "loss": 0.6161, + "step": 857 + }, + { + "epoch": 0.05224857656121548, + "grad_norm": 0.9307390265704412, + "learning_rate": 4.999268343368707e-06, + "loss": 0.6193, + "step": 858 + }, + { + "epoch": 0.052309472338093355, + "grad_norm": 0.9736041482820871, + "learning_rate": 4.999266411697344e-06, + "loss": 0.5209, + "step": 859 + }, + { + "epoch": 0.05237036811497123, + "grad_norm": 0.9726051870611648, + "learning_rate": 4.999264477479782e-06, + "loss": 0.574, + "step": 860 + }, + { + "epoch": 0.0524312638918491, + "grad_norm": 0.9380955889526035, + "learning_rate": 4.99926254071602e-06, + "loss": 0.5906, + "step": 861 + }, + { + "epoch": 0.05249215966872697, + "grad_norm": 1.0000164637462818, + "learning_rate": 4.999260601406061e-06, + "loss": 0.5512, + "step": 862 + }, + { + "epoch": 0.052553055445604846, + "grad_norm": 1.0668235990302743, + "learning_rate": 4.999258659549908e-06, + "loss": 0.576, + "step": 863 + }, + { + "epoch": 0.05261395122248272, + "grad_norm": 0.9206942741447739, + "learning_rate": 4.999256715147562e-06, + "loss": 0.5928, + "step": 864 + }, + { + "epoch": 0.05267484699936059, + "grad_norm": 0.9059627335453616, + "learning_rate": 4.999254768199025e-06, + "loss": 0.6216, + "step": 865 + }, + { + "epoch": 0.052735742776238465, + "grad_norm": 0.9888613568503292, + "learning_rate": 4.9992528187043e-06, + "loss": 0.5571, + "step": 866 + }, + { + "epoch": 0.052796638553116344, + "grad_norm": 1.0487183069217008, + "learning_rate": 4.999250866663387e-06, + "loss": 0.5197, + "step": 867 + }, + { + "epoch": 0.05285753432999422, + "grad_norm": 0.9861891119523672, + "learning_rate": 4.99924891207629e-06, + "loss": 0.5482, + "step": 868 + }, + { + "epoch": 0.05291843010687209, + "grad_norm": 0.9265120773899509, + "learning_rate": 4.9992469549430105e-06, + "loss": 0.583, + "step": 869 + }, + { + "epoch": 0.05297932588374996, + "grad_norm": 0.9620551590646347, + "learning_rate": 4.999244995263549e-06, + "loss": 0.5933, + "step": 870 + }, + { + "epoch": 0.053040221660627836, + "grad_norm": 0.8991086303466952, + "learning_rate": 4.999243033037909e-06, + "loss": 0.5566, + "step": 871 + }, + { + "epoch": 0.05310111743750571, + "grad_norm": 0.9439204094319452, + "learning_rate": 4.999241068266093e-06, + "loss": 0.6026, + "step": 872 + }, + { + "epoch": 0.05316201321438358, + "grad_norm": 0.9693802821420067, + "learning_rate": 4.999239100948101e-06, + "loss": 0.6113, + "step": 873 + }, + { + "epoch": 0.053222908991261454, + "grad_norm": 1.0157958705334358, + "learning_rate": 4.999237131083936e-06, + "loss": 0.5164, + "step": 874 + }, + { + "epoch": 0.05328380476813933, + "grad_norm": 0.916966239117875, + "learning_rate": 4.9992351586736015e-06, + "loss": 0.6001, + "step": 875 + }, + { + "epoch": 0.0533447005450172, + "grad_norm": 0.9062026804756468, + "learning_rate": 4.999233183717097e-06, + "loss": 0.564, + "step": 876 + }, + { + "epoch": 0.05340559632189508, + "grad_norm": 0.9692945174482966, + "learning_rate": 4.999231206214427e-06, + "loss": 0.6266, + "step": 877 + }, + { + "epoch": 0.05346649209877295, + "grad_norm": 0.9645985893640531, + "learning_rate": 4.999229226165591e-06, + "loss": 0.6047, + "step": 878 + }, + { + "epoch": 0.053527387875650825, + "grad_norm": 0.9755561642202087, + "learning_rate": 4.999227243570593e-06, + "loss": 0.6407, + "step": 879 + }, + { + "epoch": 0.0535882836525287, + "grad_norm": 0.9607968153497156, + "learning_rate": 4.999225258429434e-06, + "loss": 0.572, + "step": 880 + }, + { + "epoch": 0.05364917942940657, + "grad_norm": 1.0366656920694501, + "learning_rate": 4.999223270742116e-06, + "loss": 0.5733, + "step": 881 + }, + { + "epoch": 0.053710075206284444, + "grad_norm": 0.9977432683631673, + "learning_rate": 4.999221280508641e-06, + "loss": 0.601, + "step": 882 + }, + { + "epoch": 0.05377097098316232, + "grad_norm": 0.9395376531736732, + "learning_rate": 4.999219287729012e-06, + "loss": 0.5564, + "step": 883 + }, + { + "epoch": 0.05383186676004019, + "grad_norm": 0.9623150967604917, + "learning_rate": 4.999217292403231e-06, + "loss": 0.5794, + "step": 884 + }, + { + "epoch": 0.05389276253691806, + "grad_norm": 0.919783859292383, + "learning_rate": 4.999215294531297e-06, + "loss": 0.6179, + "step": 885 + }, + { + "epoch": 0.053953658313795935, + "grad_norm": 1.0199002750942012, + "learning_rate": 4.9992132941132175e-06, + "loss": 0.622, + "step": 886 + }, + { + "epoch": 0.054014554090673815, + "grad_norm": 1.0158937419712673, + "learning_rate": 4.99921129114899e-06, + "loss": 0.6724, + "step": 887 + }, + { + "epoch": 0.05407544986755169, + "grad_norm": 0.983942346074935, + "learning_rate": 4.999209285638618e-06, + "loss": 0.5676, + "step": 888 + }, + { + "epoch": 0.05413634564442956, + "grad_norm": 1.02515205635902, + "learning_rate": 4.9992072775821034e-06, + "loss": 0.5917, + "step": 889 + }, + { + "epoch": 0.054197241421307434, + "grad_norm": 0.9629787411552249, + "learning_rate": 4.999205266979448e-06, + "loss": 0.6072, + "step": 890 + }, + { + "epoch": 0.054258137198185306, + "grad_norm": 0.9423121251248786, + "learning_rate": 4.999203253830655e-06, + "loss": 0.5951, + "step": 891 + }, + { + "epoch": 0.05431903297506318, + "grad_norm": 0.9819222992913832, + "learning_rate": 4.999201238135724e-06, + "loss": 0.582, + "step": 892 + }, + { + "epoch": 0.05437992875194105, + "grad_norm": 0.9120249201468291, + "learning_rate": 4.999199219894661e-06, + "loss": 0.5398, + "step": 893 + }, + { + "epoch": 0.054440824528818925, + "grad_norm": 1.0033820288334894, + "learning_rate": 4.999197199107465e-06, + "loss": 0.5592, + "step": 894 + }, + { + "epoch": 0.0545017203056968, + "grad_norm": 0.9648336948732708, + "learning_rate": 4.9991951757741385e-06, + "loss": 0.5572, + "step": 895 + }, + { + "epoch": 0.05456261608257467, + "grad_norm": 0.9958211413284794, + "learning_rate": 4.9991931498946844e-06, + "loss": 0.5534, + "step": 896 + }, + { + "epoch": 0.05462351185945255, + "grad_norm": 1.1091107416092967, + "learning_rate": 4.9991911214691044e-06, + "loss": 0.5244, + "step": 897 + }, + { + "epoch": 0.05468440763633042, + "grad_norm": 1.052389843086847, + "learning_rate": 4.999189090497399e-06, + "loss": 0.5887, + "step": 898 + }, + { + "epoch": 0.054745303413208296, + "grad_norm": 1.048474714029665, + "learning_rate": 4.999187056979573e-06, + "loss": 0.5791, + "step": 899 + }, + { + "epoch": 0.05480619919008617, + "grad_norm": 0.9542346999397776, + "learning_rate": 4.999185020915628e-06, + "loss": 0.6018, + "step": 900 + }, + { + "epoch": 0.05486709496696404, + "grad_norm": 0.9336475324504342, + "learning_rate": 4.9991829823055636e-06, + "loss": 0.6023, + "step": 901 + }, + { + "epoch": 0.054927990743841915, + "grad_norm": 1.0040457828259335, + "learning_rate": 4.999180941149384e-06, + "loss": 0.6155, + "step": 902 + }, + { + "epoch": 0.05498888652071979, + "grad_norm": 0.9858314904658784, + "learning_rate": 4.9991788974470914e-06, + "loss": 0.6287, + "step": 903 + }, + { + "epoch": 0.05504978229759766, + "grad_norm": 0.9953340490288817, + "learning_rate": 4.999176851198687e-06, + "loss": 0.5054, + "step": 904 + }, + { + "epoch": 0.05511067807447553, + "grad_norm": 0.9679008809017587, + "learning_rate": 4.999174802404173e-06, + "loss": 0.6151, + "step": 905 + }, + { + "epoch": 0.055171573851353406, + "grad_norm": 0.9669218403930584, + "learning_rate": 4.9991727510635515e-06, + "loss": 0.5587, + "step": 906 + }, + { + "epoch": 0.05523246962823128, + "grad_norm": 0.958223416813371, + "learning_rate": 4.999170697176825e-06, + "loss": 0.5733, + "step": 907 + }, + { + "epoch": 0.05529336540510916, + "grad_norm": 0.9915322495021359, + "learning_rate": 4.999168640743996e-06, + "loss": 0.6855, + "step": 908 + }, + { + "epoch": 0.05535426118198703, + "grad_norm": 0.9869147209236713, + "learning_rate": 4.999166581765065e-06, + "loss": 0.6708, + "step": 909 + }, + { + "epoch": 0.055415156958864904, + "grad_norm": 1.001419197418264, + "learning_rate": 4.999164520240035e-06, + "loss": 0.5856, + "step": 910 + }, + { + "epoch": 0.05547605273574278, + "grad_norm": 1.0238416578916918, + "learning_rate": 4.999162456168909e-06, + "loss": 0.5988, + "step": 911 + }, + { + "epoch": 0.05553694851262065, + "grad_norm": 0.9880094228373512, + "learning_rate": 4.9991603895516875e-06, + "loss": 0.5768, + "step": 912 + }, + { + "epoch": 0.05559784428949852, + "grad_norm": 1.0049574948458533, + "learning_rate": 4.999158320388374e-06, + "loss": 0.5654, + "step": 913 + }, + { + "epoch": 0.055658740066376396, + "grad_norm": 0.9817767903549594, + "learning_rate": 4.999156248678969e-06, + "loss": 0.6162, + "step": 914 + }, + { + "epoch": 0.05571963584325427, + "grad_norm": 1.0330008871990495, + "learning_rate": 4.999154174423476e-06, + "loss": 0.6172, + "step": 915 + }, + { + "epoch": 0.05578053162013214, + "grad_norm": 0.9873130877929537, + "learning_rate": 4.999152097621897e-06, + "loss": 0.5678, + "step": 916 + }, + { + "epoch": 0.055841427397010014, + "grad_norm": 1.0944351183975671, + "learning_rate": 4.999150018274234e-06, + "loss": 0.5687, + "step": 917 + }, + { + "epoch": 0.055902323173887894, + "grad_norm": 0.9938616582301094, + "learning_rate": 4.999147936380488e-06, + "loss": 0.6131, + "step": 918 + }, + { + "epoch": 0.05596321895076577, + "grad_norm": 0.9798743780188272, + "learning_rate": 4.999145851940664e-06, + "loss": 0.5556, + "step": 919 + }, + { + "epoch": 0.05602411472764364, + "grad_norm": 0.9300771167017353, + "learning_rate": 4.9991437649547595e-06, + "loss": 0.5776, + "step": 920 + }, + { + "epoch": 0.05608501050452151, + "grad_norm": 1.0847118771233735, + "learning_rate": 4.99914167542278e-06, + "loss": 0.548, + "step": 921 + }, + { + "epoch": 0.056145906281399385, + "grad_norm": 0.9282673970660102, + "learning_rate": 4.999139583344728e-06, + "loss": 0.5492, + "step": 922 + }, + { + "epoch": 0.05620680205827726, + "grad_norm": 0.9706297051493294, + "learning_rate": 4.9991374887206046e-06, + "loss": 0.5056, + "step": 923 + }, + { + "epoch": 0.05626769783515513, + "grad_norm": 0.9066904517268061, + "learning_rate": 4.999135391550411e-06, + "loss": 0.5912, + "step": 924 + }, + { + "epoch": 0.056328593612033004, + "grad_norm": 1.0242793581786516, + "learning_rate": 4.99913329183415e-06, + "loss": 0.5405, + "step": 925 + }, + { + "epoch": 0.05638948938891088, + "grad_norm": 0.9012103028359335, + "learning_rate": 4.999131189571825e-06, + "loss": 0.571, + "step": 926 + }, + { + "epoch": 0.05645038516578875, + "grad_norm": 0.9875609443521314, + "learning_rate": 4.999129084763437e-06, + "loss": 0.6046, + "step": 927 + }, + { + "epoch": 0.05651128094266663, + "grad_norm": 1.1107652948365199, + "learning_rate": 4.999126977408987e-06, + "loss": 0.5697, + "step": 928 + }, + { + "epoch": 0.0565721767195445, + "grad_norm": 0.9817597955960062, + "learning_rate": 4.999124867508479e-06, + "loss": 0.6138, + "step": 929 + }, + { + "epoch": 0.056633072496422375, + "grad_norm": 0.958178297368871, + "learning_rate": 4.999122755061915e-06, + "loss": 0.5918, + "step": 930 + }, + { + "epoch": 0.05669396827330025, + "grad_norm": 0.8915922827243015, + "learning_rate": 4.999120640069297e-06, + "loss": 0.5916, + "step": 931 + }, + { + "epoch": 0.05675486405017812, + "grad_norm": 1.0819682663381285, + "learning_rate": 4.999118522530626e-06, + "loss": 0.6275, + "step": 932 + }, + { + "epoch": 0.056815759827055994, + "grad_norm": 0.9612767679043457, + "learning_rate": 4.999116402445905e-06, + "loss": 0.6339, + "step": 933 + }, + { + "epoch": 0.056876655603933866, + "grad_norm": 1.012949804153997, + "learning_rate": 4.999114279815137e-06, + "loss": 0.6007, + "step": 934 + }, + { + "epoch": 0.05693755138081174, + "grad_norm": 0.9924375420962076, + "learning_rate": 4.999112154638322e-06, + "loss": 0.5566, + "step": 935 + }, + { + "epoch": 0.05699844715768961, + "grad_norm": 0.9743777338841779, + "learning_rate": 4.999110026915465e-06, + "loss": 0.5552, + "step": 936 + }, + { + "epoch": 0.057059342934567485, + "grad_norm": 1.04262078070277, + "learning_rate": 4.9991078966465665e-06, + "loss": 0.5471, + "step": 937 + }, + { + "epoch": 0.057120238711445365, + "grad_norm": 1.1653106656095904, + "learning_rate": 4.999105763831628e-06, + "loss": 0.575, + "step": 938 + }, + { + "epoch": 0.05718113448832324, + "grad_norm": 0.9610113750728128, + "learning_rate": 4.999103628470653e-06, + "loss": 0.5534, + "step": 939 + }, + { + "epoch": 0.05724203026520111, + "grad_norm": 1.0845247767025128, + "learning_rate": 4.999101490563642e-06, + "loss": 0.6186, + "step": 940 + }, + { + "epoch": 0.05730292604207898, + "grad_norm": 0.9450490153760482, + "learning_rate": 4.9990993501106e-06, + "loss": 0.5765, + "step": 941 + }, + { + "epoch": 0.057363821818956856, + "grad_norm": 0.9389444131240168, + "learning_rate": 4.999097207111527e-06, + "loss": 0.5432, + "step": 942 + }, + { + "epoch": 0.05742471759583473, + "grad_norm": 1.0247710972217723, + "learning_rate": 4.999095061566426e-06, + "loss": 0.5902, + "step": 943 + }, + { + "epoch": 0.0574856133727126, + "grad_norm": 1.0023695737922138, + "learning_rate": 4.999092913475298e-06, + "loss": 0.5639, + "step": 944 + }, + { + "epoch": 0.057546509149590475, + "grad_norm": 0.9459005809385052, + "learning_rate": 4.999090762838147e-06, + "loss": 0.6156, + "step": 945 + }, + { + "epoch": 0.05760740492646835, + "grad_norm": 0.9201949013742049, + "learning_rate": 4.999088609654973e-06, + "loss": 0.5655, + "step": 946 + }, + { + "epoch": 0.05766830070334622, + "grad_norm": 0.9783418905613359, + "learning_rate": 4.999086453925781e-06, + "loss": 0.5255, + "step": 947 + }, + { + "epoch": 0.05772919648022409, + "grad_norm": 1.0413227822670452, + "learning_rate": 4.999084295650572e-06, + "loss": 0.6082, + "step": 948 + }, + { + "epoch": 0.05779009225710197, + "grad_norm": 1.0100024236177638, + "learning_rate": 4.999082134829346e-06, + "loss": 0.5836, + "step": 949 + }, + { + "epoch": 0.057850988033979846, + "grad_norm": 0.9883102240636047, + "learning_rate": 4.999079971462108e-06, + "loss": 0.5491, + "step": 950 + }, + { + "epoch": 0.05791188381085772, + "grad_norm": 0.9952166904643946, + "learning_rate": 4.9990778055488595e-06, + "loss": 0.6341, + "step": 951 + }, + { + "epoch": 0.05797277958773559, + "grad_norm": 0.9630369906433324, + "learning_rate": 4.999075637089602e-06, + "loss": 0.6206, + "step": 952 + }, + { + "epoch": 0.058033675364613464, + "grad_norm": 0.9232431552310361, + "learning_rate": 4.9990734660843385e-06, + "loss": 0.5584, + "step": 953 + }, + { + "epoch": 0.05809457114149134, + "grad_norm": 0.9656190323655853, + "learning_rate": 4.999071292533071e-06, + "loss": 0.5972, + "step": 954 + }, + { + "epoch": 0.05815546691836921, + "grad_norm": 1.1141077254197984, + "learning_rate": 4.999069116435802e-06, + "loss": 0.5993, + "step": 955 + }, + { + "epoch": 0.05821636269524708, + "grad_norm": 0.9578627542401189, + "learning_rate": 4.999066937792533e-06, + "loss": 0.597, + "step": 956 + }, + { + "epoch": 0.058277258472124956, + "grad_norm": 1.0118483158015108, + "learning_rate": 4.999064756603266e-06, + "loss": 0.5723, + "step": 957 + }, + { + "epoch": 0.05833815424900283, + "grad_norm": 0.9347829753802769, + "learning_rate": 4.999062572868004e-06, + "loss": 0.5311, + "step": 958 + }, + { + "epoch": 0.05839905002588071, + "grad_norm": 0.9821849686024163, + "learning_rate": 4.999060386586749e-06, + "loss": 0.6159, + "step": 959 + }, + { + "epoch": 0.05845994580275858, + "grad_norm": 0.9625982163690116, + "learning_rate": 4.999058197759504e-06, + "loss": 0.6315, + "step": 960 + }, + { + "epoch": 0.058520841579636454, + "grad_norm": 0.985970204798722, + "learning_rate": 4.99905600638627e-06, + "loss": 0.5879, + "step": 961 + }, + { + "epoch": 0.05858173735651433, + "grad_norm": 0.9040630502875485, + "learning_rate": 4.99905381246705e-06, + "loss": 0.5834, + "step": 962 + }, + { + "epoch": 0.0586426331333922, + "grad_norm": 0.9529992540525056, + "learning_rate": 4.999051616001845e-06, + "loss": 0.5297, + "step": 963 + }, + { + "epoch": 0.05870352891027007, + "grad_norm": 1.0177628538361756, + "learning_rate": 4.999049416990659e-06, + "loss": 0.5658, + "step": 964 + }, + { + "epoch": 0.058764424687147945, + "grad_norm": 1.025677254273749, + "learning_rate": 4.999047215433493e-06, + "loss": 0.5683, + "step": 965 + }, + { + "epoch": 0.05882532046402582, + "grad_norm": 0.9262938445100659, + "learning_rate": 4.9990450113303494e-06, + "loss": 0.6469, + "step": 966 + }, + { + "epoch": 0.05888621624090369, + "grad_norm": 1.0101484030065253, + "learning_rate": 4.999042804681231e-06, + "loss": 0.5898, + "step": 967 + }, + { + "epoch": 0.058947112017781564, + "grad_norm": 0.9656606109132535, + "learning_rate": 4.99904059548614e-06, + "loss": 0.6228, + "step": 968 + }, + { + "epoch": 0.059008007794659444, + "grad_norm": 0.9758478490957241, + "learning_rate": 4.999038383745078e-06, + "loss": 0.6303, + "step": 969 + }, + { + "epoch": 0.059068903571537316, + "grad_norm": 0.9809284115056828, + "learning_rate": 4.9990361694580485e-06, + "loss": 0.5532, + "step": 970 + }, + { + "epoch": 0.05912979934841519, + "grad_norm": 0.9660620196980217, + "learning_rate": 4.999033952625052e-06, + "loss": 0.5676, + "step": 971 + }, + { + "epoch": 0.05919069512529306, + "grad_norm": 0.982818115263906, + "learning_rate": 4.999031733246092e-06, + "loss": 0.5945, + "step": 972 + }, + { + "epoch": 0.059251590902170935, + "grad_norm": 1.0238410769111408, + "learning_rate": 4.99902951132117e-06, + "loss": 0.6221, + "step": 973 + }, + { + "epoch": 0.05931248667904881, + "grad_norm": 0.9452475731268106, + "learning_rate": 4.99902728685029e-06, + "loss": 0.5963, + "step": 974 + }, + { + "epoch": 0.05937338245592668, + "grad_norm": 0.9353972099363475, + "learning_rate": 4.999025059833451e-06, + "loss": 0.5465, + "step": 975 + }, + { + "epoch": 0.059434278232804554, + "grad_norm": 0.9613843024143783, + "learning_rate": 4.999022830270659e-06, + "loss": 0.6286, + "step": 976 + }, + { + "epoch": 0.059495174009682426, + "grad_norm": 0.9850293263276708, + "learning_rate": 4.999020598161913e-06, + "loss": 0.6424, + "step": 977 + }, + { + "epoch": 0.0595560697865603, + "grad_norm": 0.9243184469812022, + "learning_rate": 4.9990183635072174e-06, + "loss": 0.5962, + "step": 978 + }, + { + "epoch": 0.05961696556343818, + "grad_norm": 1.016453642250704, + "learning_rate": 4.9990161263065744e-06, + "loss": 0.5646, + "step": 979 + }, + { + "epoch": 0.05967786134031605, + "grad_norm": 1.035162138445804, + "learning_rate": 4.999013886559986e-06, + "loss": 0.6231, + "step": 980 + }, + { + "epoch": 0.059738757117193925, + "grad_norm": 0.9630200059730052, + "learning_rate": 4.999011644267453e-06, + "loss": 0.6196, + "step": 981 + }, + { + "epoch": 0.0597996528940718, + "grad_norm": 1.0448474251584412, + "learning_rate": 4.999009399428979e-06, + "loss": 0.5697, + "step": 982 + }, + { + "epoch": 0.05986054867094967, + "grad_norm": 0.999981067244038, + "learning_rate": 4.999007152044567e-06, + "loss": 0.5868, + "step": 983 + }, + { + "epoch": 0.05992144444782754, + "grad_norm": 0.9744796315018791, + "learning_rate": 4.9990049021142174e-06, + "loss": 0.5211, + "step": 984 + }, + { + "epoch": 0.059982340224705416, + "grad_norm": 0.9276972091988175, + "learning_rate": 4.999002649637935e-06, + "loss": 0.6277, + "step": 985 + }, + { + "epoch": 0.06004323600158329, + "grad_norm": 0.9113780601713034, + "learning_rate": 4.9990003946157195e-06, + "loss": 0.5607, + "step": 986 + }, + { + "epoch": 0.06010413177846116, + "grad_norm": 0.9739931912062036, + "learning_rate": 4.998998137047575e-06, + "loss": 0.599, + "step": 987 + }, + { + "epoch": 0.060165027555339035, + "grad_norm": 0.9668588210184933, + "learning_rate": 4.998995876933503e-06, + "loss": 0.6105, + "step": 988 + }, + { + "epoch": 0.06022592333221691, + "grad_norm": 0.9332709377713672, + "learning_rate": 4.998993614273505e-06, + "loss": 0.4966, + "step": 989 + }, + { + "epoch": 0.06028681910909479, + "grad_norm": 1.0368196759374475, + "learning_rate": 4.998991349067585e-06, + "loss": 0.5705, + "step": 990 + }, + { + "epoch": 0.06034771488597266, + "grad_norm": 0.9974037908910551, + "learning_rate": 4.998989081315745e-06, + "loss": 0.5861, + "step": 991 + }, + { + "epoch": 0.06040861066285053, + "grad_norm": 0.9573758346726627, + "learning_rate": 4.998986811017986e-06, + "loss": 0.5793, + "step": 992 + }, + { + "epoch": 0.060469506439728406, + "grad_norm": 0.9953272158318682, + "learning_rate": 4.998984538174313e-06, + "loss": 0.6221, + "step": 993 + }, + { + "epoch": 0.06053040221660628, + "grad_norm": 1.1149007331849212, + "learning_rate": 4.998982262784725e-06, + "loss": 0.6075, + "step": 994 + }, + { + "epoch": 0.06059129799348415, + "grad_norm": 1.0312247660065077, + "learning_rate": 4.998979984849226e-06, + "loss": 0.58, + "step": 995 + }, + { + "epoch": 0.060652193770362024, + "grad_norm": 1.0180439298478936, + "learning_rate": 4.998977704367818e-06, + "loss": 0.5572, + "step": 996 + }, + { + "epoch": 0.0607130895472399, + "grad_norm": 1.1309483453352378, + "learning_rate": 4.998975421340504e-06, + "loss": 0.5389, + "step": 997 + }, + { + "epoch": 0.06077398532411777, + "grad_norm": 0.9311858795363546, + "learning_rate": 4.998973135767285e-06, + "loss": 0.6189, + "step": 998 + }, + { + "epoch": 0.06083488110099564, + "grad_norm": 1.0640609902010916, + "learning_rate": 4.998970847648165e-06, + "loss": 0.5642, + "step": 999 + }, + { + "epoch": 0.06089577687787352, + "grad_norm": 0.988276046726626, + "learning_rate": 4.998968556983145e-06, + "loss": 0.5866, + "step": 1000 + }, + { + "epoch": 0.060956672654751395, + "grad_norm": 0.9615023882318459, + "learning_rate": 4.998966263772228e-06, + "loss": 0.5252, + "step": 1001 + }, + { + "epoch": 0.06101756843162927, + "grad_norm": 0.8900036389478199, + "learning_rate": 4.998963968015416e-06, + "loss": 0.5887, + "step": 1002 + }, + { + "epoch": 0.06107846420850714, + "grad_norm": 0.8879068157206109, + "learning_rate": 4.998961669712711e-06, + "loss": 0.6085, + "step": 1003 + }, + { + "epoch": 0.061139359985385014, + "grad_norm": 1.0825580494935745, + "learning_rate": 4.998959368864117e-06, + "loss": 0.5212, + "step": 1004 + }, + { + "epoch": 0.06120025576226289, + "grad_norm": 1.0409385027245937, + "learning_rate": 4.998957065469634e-06, + "loss": 0.5686, + "step": 1005 + }, + { + "epoch": 0.06126115153914076, + "grad_norm": 0.9492060764973137, + "learning_rate": 4.998954759529265e-06, + "loss": 0.5927, + "step": 1006 + }, + { + "epoch": 0.06132204731601863, + "grad_norm": 1.048044810796853, + "learning_rate": 4.998952451043014e-06, + "loss": 0.5682, + "step": 1007 + }, + { + "epoch": 0.061382943092896505, + "grad_norm": 0.995580997141499, + "learning_rate": 4.998950140010882e-06, + "loss": 0.5913, + "step": 1008 + }, + { + "epoch": 0.06144383886977438, + "grad_norm": 0.9932319534639721, + "learning_rate": 4.998947826432871e-06, + "loss": 0.6104, + "step": 1009 + }, + { + "epoch": 0.06150473464665226, + "grad_norm": 0.9556961596428207, + "learning_rate": 4.998945510308985e-06, + "loss": 0.6173, + "step": 1010 + }, + { + "epoch": 0.06156563042353013, + "grad_norm": 1.0119047229177949, + "learning_rate": 4.998943191639225e-06, + "loss": 0.5565, + "step": 1011 + }, + { + "epoch": 0.061626526200408004, + "grad_norm": 1.0398035401782373, + "learning_rate": 4.9989408704235935e-06, + "loss": 0.6354, + "step": 1012 + }, + { + "epoch": 0.061687421977285876, + "grad_norm": 0.969674417666448, + "learning_rate": 4.998938546662092e-06, + "loss": 0.5306, + "step": 1013 + }, + { + "epoch": 0.06174831775416375, + "grad_norm": 1.0063879523780435, + "learning_rate": 4.998936220354726e-06, + "loss": 0.5818, + "step": 1014 + }, + { + "epoch": 0.06180921353104162, + "grad_norm": 0.8904749085346283, + "learning_rate": 4.998933891501493e-06, + "loss": 0.601, + "step": 1015 + }, + { + "epoch": 0.061870109307919495, + "grad_norm": 1.013446326427147, + "learning_rate": 4.9989315601024e-06, + "loss": 0.5847, + "step": 1016 + }, + { + "epoch": 0.06193100508479737, + "grad_norm": 0.9571432349215787, + "learning_rate": 4.998929226157447e-06, + "loss": 0.5635, + "step": 1017 + }, + { + "epoch": 0.06199190086167524, + "grad_norm": 1.050420964096867, + "learning_rate": 4.998926889666636e-06, + "loss": 0.5378, + "step": 1018 + }, + { + "epoch": 0.062052796638553114, + "grad_norm": 1.0986099977093682, + "learning_rate": 4.998924550629972e-06, + "loss": 0.5794, + "step": 1019 + }, + { + "epoch": 0.06211369241543099, + "grad_norm": 0.9694436907463465, + "learning_rate": 4.998922209047454e-06, + "loss": 0.5515, + "step": 1020 + }, + { + "epoch": 0.062174588192308866, + "grad_norm": 0.9709300574970176, + "learning_rate": 4.998919864919087e-06, + "loss": 0.594, + "step": 1021 + }, + { + "epoch": 0.06223548396918674, + "grad_norm": 0.9760997300259705, + "learning_rate": 4.998917518244872e-06, + "loss": 0.5374, + "step": 1022 + }, + { + "epoch": 0.06229637974606461, + "grad_norm": 1.0520038812715131, + "learning_rate": 4.998915169024812e-06, + "loss": 0.571, + "step": 1023 + }, + { + "epoch": 0.062357275522942485, + "grad_norm": 0.9633809633416157, + "learning_rate": 4.998912817258909e-06, + "loss": 0.5713, + "step": 1024 + }, + { + "epoch": 0.06241817129982036, + "grad_norm": 1.0732327876368675, + "learning_rate": 4.9989104629471655e-06, + "loss": 0.6511, + "step": 1025 + }, + { + "epoch": 0.06247906707669823, + "grad_norm": 1.0379288026350502, + "learning_rate": 4.998908106089585e-06, + "loss": 0.5848, + "step": 1026 + }, + { + "epoch": 0.0625399628535761, + "grad_norm": 0.9588500683068157, + "learning_rate": 4.998905746686167e-06, + "loss": 0.5315, + "step": 1027 + }, + { + "epoch": 0.06260085863045398, + "grad_norm": 0.9222342926626905, + "learning_rate": 4.998903384736917e-06, + "loss": 0.6112, + "step": 1028 + }, + { + "epoch": 0.06266175440733185, + "grad_norm": 1.0003480541846217, + "learning_rate": 4.998901020241837e-06, + "loss": 0.5928, + "step": 1029 + }, + { + "epoch": 0.06272265018420972, + "grad_norm": 1.0040783299477993, + "learning_rate": 4.998898653200926e-06, + "loss": 0.6068, + "step": 1030 + }, + { + "epoch": 0.0627835459610876, + "grad_norm": 0.9873909827023611, + "learning_rate": 4.998896283614191e-06, + "loss": 0.595, + "step": 1031 + }, + { + "epoch": 0.06284444173796547, + "grad_norm": 0.9913778116649314, + "learning_rate": 4.998893911481632e-06, + "loss": 0.6035, + "step": 1032 + }, + { + "epoch": 0.06290533751484334, + "grad_norm": 1.0191250464875845, + "learning_rate": 4.998891536803252e-06, + "loss": 0.5622, + "step": 1033 + }, + { + "epoch": 0.06296623329172121, + "grad_norm": 1.0339865024020396, + "learning_rate": 4.998889159579054e-06, + "loss": 0.5802, + "step": 1034 + }, + { + "epoch": 0.06302712906859909, + "grad_norm": 0.9624897366021908, + "learning_rate": 4.998886779809038e-06, + "loss": 0.5951, + "step": 1035 + }, + { + "epoch": 0.06308802484547697, + "grad_norm": 1.0082980554949585, + "learning_rate": 4.998884397493209e-06, + "loss": 0.5695, + "step": 1036 + }, + { + "epoch": 0.06314892062235485, + "grad_norm": 0.9835347177182159, + "learning_rate": 4.998882012631568e-06, + "loss": 0.5524, + "step": 1037 + }, + { + "epoch": 0.06320981639923272, + "grad_norm": 0.9228106346969007, + "learning_rate": 4.998879625224119e-06, + "loss": 0.5672, + "step": 1038 + }, + { + "epoch": 0.06327071217611059, + "grad_norm": 0.971412673785772, + "learning_rate": 4.998877235270862e-06, + "loss": 0.5783, + "step": 1039 + }, + { + "epoch": 0.06333160795298846, + "grad_norm": 0.9759853950004008, + "learning_rate": 4.998874842771802e-06, + "loss": 0.5585, + "step": 1040 + }, + { + "epoch": 0.06339250372986634, + "grad_norm": 0.9295364897139666, + "learning_rate": 4.998872447726939e-06, + "loss": 0.5504, + "step": 1041 + }, + { + "epoch": 0.06345339950674421, + "grad_norm": 1.0651623088783224, + "learning_rate": 4.9988700501362775e-06, + "loss": 0.5283, + "step": 1042 + }, + { + "epoch": 0.06351429528362208, + "grad_norm": 0.985476450472393, + "learning_rate": 4.9988676499998194e-06, + "loss": 0.5524, + "step": 1043 + }, + { + "epoch": 0.06357519106049996, + "grad_norm": 1.026868734688713, + "learning_rate": 4.9988652473175666e-06, + "loss": 0.5381, + "step": 1044 + }, + { + "epoch": 0.06363608683737783, + "grad_norm": 1.0022146676368695, + "learning_rate": 4.998862842089522e-06, + "loss": 0.5647, + "step": 1045 + }, + { + "epoch": 0.0636969826142557, + "grad_norm": 1.0025426596597533, + "learning_rate": 4.9988604343156874e-06, + "loss": 0.5777, + "step": 1046 + }, + { + "epoch": 0.06375787839113357, + "grad_norm": 0.9672946445464287, + "learning_rate": 4.998858023996066e-06, + "loss": 0.5916, + "step": 1047 + }, + { + "epoch": 0.06381877416801145, + "grad_norm": 0.997312534248901, + "learning_rate": 4.9988556111306605e-06, + "loss": 0.5679, + "step": 1048 + }, + { + "epoch": 0.06387966994488932, + "grad_norm": 1.0401262671804936, + "learning_rate": 4.998853195719473e-06, + "loss": 0.5844, + "step": 1049 + }, + { + "epoch": 0.06394056572176719, + "grad_norm": 0.9723278895475567, + "learning_rate": 4.998850777762505e-06, + "loss": 0.6037, + "step": 1050 + }, + { + "epoch": 0.06400146149864507, + "grad_norm": 0.9756063273709292, + "learning_rate": 4.99884835725976e-06, + "loss": 0.5945, + "step": 1051 + }, + { + "epoch": 0.06406235727552294, + "grad_norm": 0.888221745643605, + "learning_rate": 4.99884593421124e-06, + "loss": 0.571, + "step": 1052 + }, + { + "epoch": 0.06412325305240081, + "grad_norm": 1.0378789144161147, + "learning_rate": 4.9988435086169485e-06, + "loss": 0.642, + "step": 1053 + }, + { + "epoch": 0.06418414882927868, + "grad_norm": 0.9559129206021948, + "learning_rate": 4.998841080476886e-06, + "loss": 0.6319, + "step": 1054 + }, + { + "epoch": 0.06424504460615656, + "grad_norm": 0.993884959586545, + "learning_rate": 4.998838649791057e-06, + "loss": 0.5407, + "step": 1055 + }, + { + "epoch": 0.06430594038303444, + "grad_norm": 0.9773463166639096, + "learning_rate": 4.998836216559463e-06, + "loss": 0.5693, + "step": 1056 + }, + { + "epoch": 0.06436683615991232, + "grad_norm": 0.9700417314258541, + "learning_rate": 4.998833780782107e-06, + "loss": 0.5381, + "step": 1057 + }, + { + "epoch": 0.06442773193679019, + "grad_norm": 0.9854544687656484, + "learning_rate": 4.998831342458991e-06, + "loss": 0.5076, + "step": 1058 + }, + { + "epoch": 0.06448862771366806, + "grad_norm": 0.9601528857106746, + "learning_rate": 4.998828901590117e-06, + "loss": 0.489, + "step": 1059 + }, + { + "epoch": 0.06454952349054593, + "grad_norm": 1.01240951155444, + "learning_rate": 4.998826458175489e-06, + "loss": 0.5273, + "step": 1060 + }, + { + "epoch": 0.06461041926742381, + "grad_norm": 0.9719382259073027, + "learning_rate": 4.998824012215108e-06, + "loss": 0.6162, + "step": 1061 + }, + { + "epoch": 0.06467131504430168, + "grad_norm": 0.9859552643660942, + "learning_rate": 4.998821563708977e-06, + "loss": 0.5111, + "step": 1062 + }, + { + "epoch": 0.06473221082117955, + "grad_norm": 0.9889614016627707, + "learning_rate": 4.998819112657098e-06, + "loss": 0.5596, + "step": 1063 + }, + { + "epoch": 0.06479310659805743, + "grad_norm": 0.9131786556684204, + "learning_rate": 4.998816659059474e-06, + "loss": 0.5755, + "step": 1064 + }, + { + "epoch": 0.0648540023749353, + "grad_norm": 1.0471731208773147, + "learning_rate": 4.9988142029161084e-06, + "loss": 0.6015, + "step": 1065 + }, + { + "epoch": 0.06491489815181317, + "grad_norm": 1.0916177345609486, + "learning_rate": 4.998811744227002e-06, + "loss": 0.5618, + "step": 1066 + }, + { + "epoch": 0.06497579392869104, + "grad_norm": 1.030900803175794, + "learning_rate": 4.998809282992159e-06, + "loss": 0.5473, + "step": 1067 + }, + { + "epoch": 0.06503668970556892, + "grad_norm": 1.012988665683088, + "learning_rate": 4.998806819211581e-06, + "loss": 0.5556, + "step": 1068 + }, + { + "epoch": 0.06509758548244679, + "grad_norm": 1.0308606890686238, + "learning_rate": 4.9988043528852706e-06, + "loss": 0.6032, + "step": 1069 + }, + { + "epoch": 0.06515848125932466, + "grad_norm": 1.0003979070817486, + "learning_rate": 4.99880188401323e-06, + "loss": 0.5508, + "step": 1070 + }, + { + "epoch": 0.06521937703620254, + "grad_norm": 1.005335430255451, + "learning_rate": 4.998799412595462e-06, + "loss": 0.5265, + "step": 1071 + }, + { + "epoch": 0.06528027281308041, + "grad_norm": 1.0130087494584408, + "learning_rate": 4.998796938631969e-06, + "loss": 0.5857, + "step": 1072 + }, + { + "epoch": 0.06534116858995828, + "grad_norm": 1.0489067398765395, + "learning_rate": 4.998794462122754e-06, + "loss": 0.5952, + "step": 1073 + }, + { + "epoch": 0.06540206436683615, + "grad_norm": 0.9366542245563985, + "learning_rate": 4.998791983067818e-06, + "loss": 0.6004, + "step": 1074 + }, + { + "epoch": 0.06546296014371403, + "grad_norm": 0.9970823873553314, + "learning_rate": 4.998789501467166e-06, + "loss": 0.6117, + "step": 1075 + }, + { + "epoch": 0.0655238559205919, + "grad_norm": 0.9471243141965684, + "learning_rate": 4.998787017320799e-06, + "loss": 0.5644, + "step": 1076 + }, + { + "epoch": 0.06558475169746979, + "grad_norm": 1.0224714250757294, + "learning_rate": 4.998784530628719e-06, + "loss": 0.5302, + "step": 1077 + }, + { + "epoch": 0.06564564747434766, + "grad_norm": 1.0089448369346707, + "learning_rate": 4.99878204139093e-06, + "loss": 0.595, + "step": 1078 + }, + { + "epoch": 0.06570654325122553, + "grad_norm": 0.9755328186549233, + "learning_rate": 4.998779549607433e-06, + "loss": 0.5275, + "step": 1079 + }, + { + "epoch": 0.0657674390281034, + "grad_norm": 1.0926095520808843, + "learning_rate": 4.998777055278232e-06, + "loss": 0.4772, + "step": 1080 + }, + { + "epoch": 0.06582833480498128, + "grad_norm": 1.0285112897402637, + "learning_rate": 4.998774558403329e-06, + "loss": 0.5178, + "step": 1081 + }, + { + "epoch": 0.06588923058185915, + "grad_norm": 1.0475680607819775, + "learning_rate": 4.998772058982726e-06, + "loss": 0.5743, + "step": 1082 + }, + { + "epoch": 0.06595012635873702, + "grad_norm": 1.0076704965987107, + "learning_rate": 4.998769557016426e-06, + "loss": 0.5392, + "step": 1083 + }, + { + "epoch": 0.0660110221356149, + "grad_norm": 0.9213828584482909, + "learning_rate": 4.998767052504432e-06, + "loss": 0.552, + "step": 1084 + }, + { + "epoch": 0.06607191791249277, + "grad_norm": 0.9507968291714667, + "learning_rate": 4.998764545446746e-06, + "loss": 0.5786, + "step": 1085 + }, + { + "epoch": 0.06613281368937064, + "grad_norm": 1.046112233826315, + "learning_rate": 4.998762035843371e-06, + "loss": 0.515, + "step": 1086 + }, + { + "epoch": 0.06619370946624852, + "grad_norm": 0.9558113761476924, + "learning_rate": 4.998759523694308e-06, + "loss": 0.5409, + "step": 1087 + }, + { + "epoch": 0.06625460524312639, + "grad_norm": 1.0625338775837616, + "learning_rate": 4.998757008999562e-06, + "loss": 0.6052, + "step": 1088 + }, + { + "epoch": 0.06631550102000426, + "grad_norm": 1.0885209724630815, + "learning_rate": 4.9987544917591335e-06, + "loss": 0.5394, + "step": 1089 + }, + { + "epoch": 0.06637639679688213, + "grad_norm": 1.1523947457763393, + "learning_rate": 4.9987519719730256e-06, + "loss": 0.5526, + "step": 1090 + }, + { + "epoch": 0.06643729257376, + "grad_norm": 1.0455296794212945, + "learning_rate": 4.9987494496412414e-06, + "loss": 0.493, + "step": 1091 + }, + { + "epoch": 0.06649818835063788, + "grad_norm": 0.9206695246454, + "learning_rate": 4.998746924763784e-06, + "loss": 0.5513, + "step": 1092 + }, + { + "epoch": 0.06655908412751575, + "grad_norm": 1.0688938348338488, + "learning_rate": 4.998744397340655e-06, + "loss": 0.6174, + "step": 1093 + }, + { + "epoch": 0.06661997990439363, + "grad_norm": 0.9455674405267943, + "learning_rate": 4.9987418673718555e-06, + "loss": 0.5863, + "step": 1094 + }, + { + "epoch": 0.0666808756812715, + "grad_norm": 0.9582849901383308, + "learning_rate": 4.998739334857391e-06, + "loss": 0.5919, + "step": 1095 + }, + { + "epoch": 0.06674177145814937, + "grad_norm": 0.9712541828912208, + "learning_rate": 4.998736799797263e-06, + "loss": 0.5764, + "step": 1096 + }, + { + "epoch": 0.06680266723502726, + "grad_norm": 1.177735915768093, + "learning_rate": 4.998734262191474e-06, + "loss": 0.6355, + "step": 1097 + }, + { + "epoch": 0.06686356301190513, + "grad_norm": 0.9639192610879905, + "learning_rate": 4.998731722040026e-06, + "loss": 0.5663, + "step": 1098 + }, + { + "epoch": 0.066924458788783, + "grad_norm": 1.0868618625818645, + "learning_rate": 4.998729179342922e-06, + "loss": 0.563, + "step": 1099 + }, + { + "epoch": 0.06698535456566088, + "grad_norm": 0.9668223644606615, + "learning_rate": 4.998726634100166e-06, + "loss": 0.5946, + "step": 1100 + }, + { + "epoch": 0.06704625034253875, + "grad_norm": 1.0361331978325228, + "learning_rate": 4.998724086311758e-06, + "loss": 0.5771, + "step": 1101 + }, + { + "epoch": 0.06710714611941662, + "grad_norm": 1.0136516205811252, + "learning_rate": 4.998721535977702e-06, + "loss": 0.5439, + "step": 1102 + }, + { + "epoch": 0.0671680418962945, + "grad_norm": 0.9693332166873624, + "learning_rate": 4.998718983098e-06, + "loss": 0.5792, + "step": 1103 + }, + { + "epoch": 0.06722893767317237, + "grad_norm": 0.966619422324193, + "learning_rate": 4.998716427672656e-06, + "loss": 0.5242, + "step": 1104 + }, + { + "epoch": 0.06728983345005024, + "grad_norm": 0.981217783878956, + "learning_rate": 4.998713869701671e-06, + "loss": 0.5923, + "step": 1105 + }, + { + "epoch": 0.06735072922692811, + "grad_norm": 1.0037972478721944, + "learning_rate": 4.998711309185048e-06, + "loss": 0.5851, + "step": 1106 + }, + { + "epoch": 0.06741162500380599, + "grad_norm": 1.0188697580014867, + "learning_rate": 4.99870874612279e-06, + "loss": 0.6279, + "step": 1107 + }, + { + "epoch": 0.06747252078068386, + "grad_norm": 1.006247925677007, + "learning_rate": 4.9987061805149e-06, + "loss": 0.6215, + "step": 1108 + }, + { + "epoch": 0.06753341655756173, + "grad_norm": 1.100633696684614, + "learning_rate": 4.99870361236138e-06, + "loss": 0.5952, + "step": 1109 + }, + { + "epoch": 0.0675943123344396, + "grad_norm": 0.914982456389676, + "learning_rate": 4.998701041662233e-06, + "loss": 0.5904, + "step": 1110 + }, + { + "epoch": 0.06765520811131748, + "grad_norm": 0.9866660367589831, + "learning_rate": 4.998698468417461e-06, + "loss": 0.6209, + "step": 1111 + }, + { + "epoch": 0.06771610388819535, + "grad_norm": 0.9287597465851741, + "learning_rate": 4.998695892627067e-06, + "loss": 0.5828, + "step": 1112 + }, + { + "epoch": 0.06777699966507322, + "grad_norm": 1.1003454841382385, + "learning_rate": 4.9986933142910534e-06, + "loss": 0.6235, + "step": 1113 + }, + { + "epoch": 0.0678378954419511, + "grad_norm": 0.9922321887365004, + "learning_rate": 4.998690733409423e-06, + "loss": 0.583, + "step": 1114 + }, + { + "epoch": 0.06789879121882897, + "grad_norm": 0.9635948284733309, + "learning_rate": 4.998688149982178e-06, + "loss": 0.6688, + "step": 1115 + }, + { + "epoch": 0.06795968699570684, + "grad_norm": 1.0795566658264677, + "learning_rate": 4.998685564009322e-06, + "loss": 0.5719, + "step": 1116 + }, + { + "epoch": 0.06802058277258471, + "grad_norm": 1.0650639306623544, + "learning_rate": 4.998682975490857e-06, + "loss": 0.5265, + "step": 1117 + }, + { + "epoch": 0.0680814785494626, + "grad_norm": 0.9957836849040824, + "learning_rate": 4.998680384426786e-06, + "loss": 0.5882, + "step": 1118 + }, + { + "epoch": 0.06814237432634047, + "grad_norm": 1.0266044766736049, + "learning_rate": 4.998677790817112e-06, + "loss": 0.5541, + "step": 1119 + }, + { + "epoch": 0.06820327010321835, + "grad_norm": 0.9802192274852616, + "learning_rate": 4.998675194661835e-06, + "loss": 0.6428, + "step": 1120 + }, + { + "epoch": 0.06826416588009622, + "grad_norm": 1.005006294103443, + "learning_rate": 4.998672595960961e-06, + "loss": 0.5455, + "step": 1121 + }, + { + "epoch": 0.06832506165697409, + "grad_norm": 1.0183542431614239, + "learning_rate": 4.998669994714491e-06, + "loss": 0.5353, + "step": 1122 + }, + { + "epoch": 0.06838595743385197, + "grad_norm": 0.9770477351209569, + "learning_rate": 4.998667390922428e-06, + "loss": 0.5631, + "step": 1123 + }, + { + "epoch": 0.06844685321072984, + "grad_norm": 1.112643019779469, + "learning_rate": 4.998664784584775e-06, + "loss": 0.5962, + "step": 1124 + }, + { + "epoch": 0.06850774898760771, + "grad_norm": 0.9973371092242096, + "learning_rate": 4.9986621757015345e-06, + "loss": 0.5548, + "step": 1125 + }, + { + "epoch": 0.06856864476448558, + "grad_norm": 0.9877069245089684, + "learning_rate": 4.998659564272708e-06, + "loss": 0.5718, + "step": 1126 + }, + { + "epoch": 0.06862954054136346, + "grad_norm": 1.0047249413504298, + "learning_rate": 4.9986569502983e-06, + "loss": 0.5764, + "step": 1127 + }, + { + "epoch": 0.06869043631824133, + "grad_norm": 0.9854046285369198, + "learning_rate": 4.998654333778311e-06, + "loss": 0.5669, + "step": 1128 + }, + { + "epoch": 0.0687513320951192, + "grad_norm": 1.0365847617905175, + "learning_rate": 4.998651714712745e-06, + "loss": 0.6734, + "step": 1129 + }, + { + "epoch": 0.06881222787199708, + "grad_norm": 0.9608209747177409, + "learning_rate": 4.998649093101606e-06, + "loss": 0.6245, + "step": 1130 + }, + { + "epoch": 0.06887312364887495, + "grad_norm": 1.1286156592036898, + "learning_rate": 4.998646468944894e-06, + "loss": 0.4927, + "step": 1131 + }, + { + "epoch": 0.06893401942575282, + "grad_norm": 0.9993360210773147, + "learning_rate": 4.998643842242613e-06, + "loss": 0.4652, + "step": 1132 + }, + { + "epoch": 0.0689949152026307, + "grad_norm": 1.0340139170152896, + "learning_rate": 4.998641212994767e-06, + "loss": 0.5908, + "step": 1133 + }, + { + "epoch": 0.06905581097950857, + "grad_norm": 1.027267392846702, + "learning_rate": 4.998638581201356e-06, + "loss": 0.531, + "step": 1134 + }, + { + "epoch": 0.06911670675638644, + "grad_norm": 1.0371900217491317, + "learning_rate": 4.998635946862384e-06, + "loss": 0.4941, + "step": 1135 + }, + { + "epoch": 0.06917760253326431, + "grad_norm": 1.100154467361626, + "learning_rate": 4.998633309977854e-06, + "loss": 0.616, + "step": 1136 + }, + { + "epoch": 0.06923849831014219, + "grad_norm": 0.9698016221045916, + "learning_rate": 4.998630670547768e-06, + "loss": 0.5311, + "step": 1137 + }, + { + "epoch": 0.06929939408702007, + "grad_norm": 1.0121844959032837, + "learning_rate": 4.99862802857213e-06, + "loss": 0.5307, + "step": 1138 + }, + { + "epoch": 0.06936028986389794, + "grad_norm": 1.0171566608231233, + "learning_rate": 4.99862538405094e-06, + "loss": 0.5988, + "step": 1139 + }, + { + "epoch": 0.06942118564077582, + "grad_norm": 1.0969127201630895, + "learning_rate": 4.998622736984204e-06, + "loss": 0.5519, + "step": 1140 + }, + { + "epoch": 0.06948208141765369, + "grad_norm": 0.973346524939584, + "learning_rate": 4.9986200873719224e-06, + "loss": 0.5962, + "step": 1141 + }, + { + "epoch": 0.06954297719453156, + "grad_norm": 0.9374202928308082, + "learning_rate": 4.998617435214098e-06, + "loss": 0.6045, + "step": 1142 + }, + { + "epoch": 0.06960387297140944, + "grad_norm": 0.9877600934536377, + "learning_rate": 4.998614780510735e-06, + "loss": 0.5501, + "step": 1143 + }, + { + "epoch": 0.06966476874828731, + "grad_norm": 0.9281263549960233, + "learning_rate": 4.998612123261835e-06, + "loss": 0.6014, + "step": 1144 + }, + { + "epoch": 0.06972566452516518, + "grad_norm": 1.0244116103899692, + "learning_rate": 4.998609463467401e-06, + "loss": 0.5912, + "step": 1145 + }, + { + "epoch": 0.06978656030204305, + "grad_norm": 0.9783845150248924, + "learning_rate": 4.9986068011274356e-06, + "loss": 0.6021, + "step": 1146 + }, + { + "epoch": 0.06984745607892093, + "grad_norm": 1.0314553182330384, + "learning_rate": 4.998604136241941e-06, + "loss": 0.5593, + "step": 1147 + }, + { + "epoch": 0.0699083518557988, + "grad_norm": 1.0617005258732215, + "learning_rate": 4.998601468810921e-06, + "loss": 0.6167, + "step": 1148 + }, + { + "epoch": 0.06996924763267667, + "grad_norm": 0.9787644319417639, + "learning_rate": 4.998598798834377e-06, + "loss": 0.5635, + "step": 1149 + }, + { + "epoch": 0.07003014340955455, + "grad_norm": 1.0315274561255217, + "learning_rate": 4.998596126312314e-06, + "loss": 0.576, + "step": 1150 + }, + { + "epoch": 0.07009103918643242, + "grad_norm": 0.9973259441290757, + "learning_rate": 4.998593451244732e-06, + "loss": 0.4807, + "step": 1151 + }, + { + "epoch": 0.07015193496331029, + "grad_norm": 0.9447770460077095, + "learning_rate": 4.998590773631636e-06, + "loss": 0.5238, + "step": 1152 + }, + { + "epoch": 0.07021283074018816, + "grad_norm": 1.051885070012112, + "learning_rate": 4.998588093473027e-06, + "loss": 0.5496, + "step": 1153 + }, + { + "epoch": 0.07027372651706604, + "grad_norm": 1.0492274723426336, + "learning_rate": 4.998585410768908e-06, + "loss": 0.5803, + "step": 1154 + }, + { + "epoch": 0.07033462229394391, + "grad_norm": 1.022272407552834, + "learning_rate": 4.998582725519283e-06, + "loss": 0.6862, + "step": 1155 + }, + { + "epoch": 0.07039551807082178, + "grad_norm": 1.0197645810543696, + "learning_rate": 4.998580037724153e-06, + "loss": 0.5733, + "step": 1156 + }, + { + "epoch": 0.07045641384769966, + "grad_norm": 1.044850155500971, + "learning_rate": 4.998577347383522e-06, + "loss": 0.5856, + "step": 1157 + }, + { + "epoch": 0.07051730962457753, + "grad_norm": 1.019892171857956, + "learning_rate": 4.998574654497393e-06, + "loss": 0.5448, + "step": 1158 + }, + { + "epoch": 0.07057820540145542, + "grad_norm": 0.9660994332985852, + "learning_rate": 4.9985719590657665e-06, + "loss": 0.5697, + "step": 1159 + }, + { + "epoch": 0.07063910117833329, + "grad_norm": 1.024350506751212, + "learning_rate": 4.998569261088648e-06, + "loss": 0.5383, + "step": 1160 + }, + { + "epoch": 0.07069999695521116, + "grad_norm": 1.0020588073495769, + "learning_rate": 4.998566560566039e-06, + "loss": 0.5544, + "step": 1161 + }, + { + "epoch": 0.07076089273208903, + "grad_norm": 1.0209602207347863, + "learning_rate": 4.998563857497942e-06, + "loss": 0.5386, + "step": 1162 + }, + { + "epoch": 0.0708217885089669, + "grad_norm": 1.0574792912342732, + "learning_rate": 4.99856115188436e-06, + "loss": 0.5111, + "step": 1163 + }, + { + "epoch": 0.07088268428584478, + "grad_norm": 1.0086509144332119, + "learning_rate": 4.9985584437252965e-06, + "loss": 0.5755, + "step": 1164 + }, + { + "epoch": 0.07094358006272265, + "grad_norm": 0.9884474184798417, + "learning_rate": 4.998555733020753e-06, + "loss": 0.5677, + "step": 1165 + }, + { + "epoch": 0.07100447583960053, + "grad_norm": 1.0071668429949763, + "learning_rate": 4.998553019770733e-06, + "loss": 0.5585, + "step": 1166 + }, + { + "epoch": 0.0710653716164784, + "grad_norm": 0.9048342841336526, + "learning_rate": 4.9985503039752396e-06, + "loss": 0.6033, + "step": 1167 + }, + { + "epoch": 0.07112626739335627, + "grad_norm": 1.060020860396882, + "learning_rate": 4.998547585634275e-06, + "loss": 0.5771, + "step": 1168 + }, + { + "epoch": 0.07118716317023414, + "grad_norm": 0.9150263863933339, + "learning_rate": 4.998544864747841e-06, + "loss": 0.5861, + "step": 1169 + }, + { + "epoch": 0.07124805894711202, + "grad_norm": 0.9986598364460237, + "learning_rate": 4.998542141315942e-06, + "loss": 0.5622, + "step": 1170 + }, + { + "epoch": 0.07130895472398989, + "grad_norm": 1.0845580988350936, + "learning_rate": 4.998539415338581e-06, + "loss": 0.566, + "step": 1171 + }, + { + "epoch": 0.07136985050086776, + "grad_norm": 1.0985118493485801, + "learning_rate": 4.99853668681576e-06, + "loss": 0.525, + "step": 1172 + }, + { + "epoch": 0.07143074627774564, + "grad_norm": 1.0170281646705763, + "learning_rate": 4.9985339557474804e-06, + "loss": 0.5485, + "step": 1173 + }, + { + "epoch": 0.07149164205462351, + "grad_norm": 0.9479186025390727, + "learning_rate": 4.998531222133747e-06, + "loss": 0.5503, + "step": 1174 + }, + { + "epoch": 0.07155253783150138, + "grad_norm": 0.9477571767316635, + "learning_rate": 4.998528485974562e-06, + "loss": 0.5829, + "step": 1175 + }, + { + "epoch": 0.07161343360837925, + "grad_norm": 1.0029526340235162, + "learning_rate": 4.998525747269928e-06, + "loss": 0.5529, + "step": 1176 + }, + { + "epoch": 0.07167432938525713, + "grad_norm": 1.003240813093453, + "learning_rate": 4.998523006019849e-06, + "loss": 0.5326, + "step": 1177 + }, + { + "epoch": 0.071735225162135, + "grad_norm": 1.0420448572655625, + "learning_rate": 4.998520262224325e-06, + "loss": 0.6129, + "step": 1178 + }, + { + "epoch": 0.07179612093901289, + "grad_norm": 0.951074061795172, + "learning_rate": 4.998517515883361e-06, + "loss": 0.5948, + "step": 1179 + }, + { + "epoch": 0.07185701671589076, + "grad_norm": 1.0552976507732788, + "learning_rate": 4.99851476699696e-06, + "loss": 0.549, + "step": 1180 + }, + { + "epoch": 0.07191791249276863, + "grad_norm": 0.9669334924908903, + "learning_rate": 4.998512015565124e-06, + "loss": 0.6119, + "step": 1181 + }, + { + "epoch": 0.0719788082696465, + "grad_norm": 1.0720507105725434, + "learning_rate": 4.998509261587855e-06, + "loss": 0.569, + "step": 1182 + }, + { + "epoch": 0.07203970404652438, + "grad_norm": 1.0331390123016844, + "learning_rate": 4.998506505065158e-06, + "loss": 0.5765, + "step": 1183 + }, + { + "epoch": 0.07210059982340225, + "grad_norm": 1.0390285014564644, + "learning_rate": 4.998503745997034e-06, + "loss": 0.576, + "step": 1184 + }, + { + "epoch": 0.07216149560028012, + "grad_norm": 1.0016893206957649, + "learning_rate": 4.9985009843834855e-06, + "loss": 0.5732, + "step": 1185 + }, + { + "epoch": 0.072222391377158, + "grad_norm": 1.0301703449090118, + "learning_rate": 4.998498220224517e-06, + "loss": 0.5929, + "step": 1186 + }, + { + "epoch": 0.07228328715403587, + "grad_norm": 1.0428638959350198, + "learning_rate": 4.99849545352013e-06, + "loss": 0.5876, + "step": 1187 + }, + { + "epoch": 0.07234418293091374, + "grad_norm": 0.9563407693078828, + "learning_rate": 4.998492684270329e-06, + "loss": 0.546, + "step": 1188 + }, + { + "epoch": 0.07240507870779161, + "grad_norm": 0.9543710589373997, + "learning_rate": 4.998489912475114e-06, + "loss": 0.5097, + "step": 1189 + }, + { + "epoch": 0.07246597448466949, + "grad_norm": 1.0140143672391557, + "learning_rate": 4.9984871381344904e-06, + "loss": 0.6019, + "step": 1190 + }, + { + "epoch": 0.07252687026154736, + "grad_norm": 0.9893222583812554, + "learning_rate": 4.998484361248459e-06, + "loss": 0.5785, + "step": 1191 + }, + { + "epoch": 0.07258776603842523, + "grad_norm": 0.9634497394159248, + "learning_rate": 4.998481581817025e-06, + "loss": 0.602, + "step": 1192 + }, + { + "epoch": 0.0726486618153031, + "grad_norm": 1.0277842359613598, + "learning_rate": 4.998478799840189e-06, + "loss": 0.6183, + "step": 1193 + }, + { + "epoch": 0.07270955759218098, + "grad_norm": 1.0561216853444582, + "learning_rate": 4.998476015317955e-06, + "loss": 0.5511, + "step": 1194 + }, + { + "epoch": 0.07277045336905885, + "grad_norm": 1.0315341492319363, + "learning_rate": 4.998473228250326e-06, + "loss": 0.6078, + "step": 1195 + }, + { + "epoch": 0.07283134914593672, + "grad_norm": 0.9859338612643538, + "learning_rate": 4.9984704386373036e-06, + "loss": 0.5651, + "step": 1196 + }, + { + "epoch": 0.0728922449228146, + "grad_norm": 0.941229271235631, + "learning_rate": 4.998467646478892e-06, + "loss": 0.586, + "step": 1197 + }, + { + "epoch": 0.07295314069969247, + "grad_norm": 1.0112568353857272, + "learning_rate": 4.998464851775094e-06, + "loss": 0.549, + "step": 1198 + }, + { + "epoch": 0.07301403647657034, + "grad_norm": 0.9482523307783411, + "learning_rate": 4.998462054525911e-06, + "loss": 0.5913, + "step": 1199 + }, + { + "epoch": 0.07307493225344823, + "grad_norm": 0.997554836572336, + "learning_rate": 4.998459254731347e-06, + "loss": 0.5619, + "step": 1200 + }, + { + "epoch": 0.0731358280303261, + "grad_norm": 0.934055213659878, + "learning_rate": 4.998456452391405e-06, + "loss": 0.5249, + "step": 1201 + }, + { + "epoch": 0.07319672380720398, + "grad_norm": 1.1067300588073856, + "learning_rate": 4.9984536475060875e-06, + "loss": 0.5643, + "step": 1202 + }, + { + "epoch": 0.07325761958408185, + "grad_norm": 0.9987728682007665, + "learning_rate": 4.998450840075397e-06, + "loss": 0.5538, + "step": 1203 + }, + { + "epoch": 0.07331851536095972, + "grad_norm": 1.0118252132019658, + "learning_rate": 4.998448030099337e-06, + "loss": 0.6228, + "step": 1204 + }, + { + "epoch": 0.0733794111378376, + "grad_norm": 1.109523566495688, + "learning_rate": 4.9984452175779106e-06, + "loss": 0.5644, + "step": 1205 + }, + { + "epoch": 0.07344030691471547, + "grad_norm": 1.0541191693553016, + "learning_rate": 4.998442402511119e-06, + "loss": 0.5732, + "step": 1206 + }, + { + "epoch": 0.07350120269159334, + "grad_norm": 0.9607233597704039, + "learning_rate": 4.998439584898967e-06, + "loss": 0.5159, + "step": 1207 + }, + { + "epoch": 0.07356209846847121, + "grad_norm": 1.0373898907294488, + "learning_rate": 4.9984367647414565e-06, + "loss": 0.5299, + "step": 1208 + }, + { + "epoch": 0.07362299424534909, + "grad_norm": 1.0366548524284496, + "learning_rate": 4.998433942038591e-06, + "loss": 0.5203, + "step": 1209 + }, + { + "epoch": 0.07368389002222696, + "grad_norm": 0.8999612007801143, + "learning_rate": 4.9984311167903725e-06, + "loss": 0.6097, + "step": 1210 + }, + { + "epoch": 0.07374478579910483, + "grad_norm": 1.104227513084385, + "learning_rate": 4.998428288996804e-06, + "loss": 0.5787, + "step": 1211 + }, + { + "epoch": 0.0738056815759827, + "grad_norm": 1.0730472465942713, + "learning_rate": 4.998425458657889e-06, + "loss": 0.5192, + "step": 1212 + }, + { + "epoch": 0.07386657735286058, + "grad_norm": 0.9601544706070597, + "learning_rate": 4.9984226257736304e-06, + "loss": 0.5838, + "step": 1213 + }, + { + "epoch": 0.07392747312973845, + "grad_norm": 1.0139278066905026, + "learning_rate": 4.99841979034403e-06, + "loss": 0.6546, + "step": 1214 + }, + { + "epoch": 0.07398836890661632, + "grad_norm": 1.0700496891402338, + "learning_rate": 4.998416952369092e-06, + "loss": 0.5738, + "step": 1215 + }, + { + "epoch": 0.0740492646834942, + "grad_norm": 0.9571049463574032, + "learning_rate": 4.998414111848819e-06, + "loss": 0.6831, + "step": 1216 + }, + { + "epoch": 0.07411016046037207, + "grad_norm": 1.0267449074727277, + "learning_rate": 4.998411268783213e-06, + "loss": 0.5628, + "step": 1217 + }, + { + "epoch": 0.07417105623724994, + "grad_norm": 1.0229663870317407, + "learning_rate": 4.998408423172278e-06, + "loss": 0.5498, + "step": 1218 + }, + { + "epoch": 0.07423195201412781, + "grad_norm": 0.9819748199025332, + "learning_rate": 4.998405575016016e-06, + "loss": 0.6028, + "step": 1219 + }, + { + "epoch": 0.0742928477910057, + "grad_norm": 1.0494742656540514, + "learning_rate": 4.998402724314431e-06, + "loss": 0.5299, + "step": 1220 + }, + { + "epoch": 0.07435374356788357, + "grad_norm": 1.0391025597176269, + "learning_rate": 4.998399871067525e-06, + "loss": 0.558, + "step": 1221 + }, + { + "epoch": 0.07441463934476145, + "grad_norm": 0.9484574719713161, + "learning_rate": 4.998397015275301e-06, + "loss": 0.553, + "step": 1222 + }, + { + "epoch": 0.07447553512163932, + "grad_norm": 0.9515794370497722, + "learning_rate": 4.998394156937763e-06, + "loss": 0.5945, + "step": 1223 + }, + { + "epoch": 0.07453643089851719, + "grad_norm": 0.9699349276432713, + "learning_rate": 4.9983912960549115e-06, + "loss": 0.5485, + "step": 1224 + }, + { + "epoch": 0.07459732667539506, + "grad_norm": 0.992800701605478, + "learning_rate": 4.998388432626752e-06, + "loss": 0.5072, + "step": 1225 + }, + { + "epoch": 0.07465822245227294, + "grad_norm": 1.005243258821148, + "learning_rate": 4.998385566653286e-06, + "loss": 0.5644, + "step": 1226 + }, + { + "epoch": 0.07471911822915081, + "grad_norm": 0.9983940758986716, + "learning_rate": 4.998382698134516e-06, + "loss": 0.5637, + "step": 1227 + }, + { + "epoch": 0.07478001400602868, + "grad_norm": 1.0394280317065345, + "learning_rate": 4.998379827070446e-06, + "loss": 0.6091, + "step": 1228 + }, + { + "epoch": 0.07484090978290656, + "grad_norm": 1.0068486056574648, + "learning_rate": 4.998376953461079e-06, + "loss": 0.5785, + "step": 1229 + }, + { + "epoch": 0.07490180555978443, + "grad_norm": 0.9913329400498256, + "learning_rate": 4.998374077306417e-06, + "loss": 0.5695, + "step": 1230 + }, + { + "epoch": 0.0749627013366623, + "grad_norm": 0.9884312902890091, + "learning_rate": 4.998371198606464e-06, + "loss": 0.6269, + "step": 1231 + }, + { + "epoch": 0.07502359711354017, + "grad_norm": 1.0408900888368038, + "learning_rate": 4.998368317361222e-06, + "loss": 0.5638, + "step": 1232 + }, + { + "epoch": 0.07508449289041805, + "grad_norm": 0.9948596435918458, + "learning_rate": 4.998365433570694e-06, + "loss": 0.5534, + "step": 1233 + }, + { + "epoch": 0.07514538866729592, + "grad_norm": 1.0851637036315944, + "learning_rate": 4.998362547234884e-06, + "loss": 0.5832, + "step": 1234 + }, + { + "epoch": 0.0752062844441738, + "grad_norm": 1.0213182732479609, + "learning_rate": 4.998359658353793e-06, + "loss": 0.5554, + "step": 1235 + }, + { + "epoch": 0.07526718022105167, + "grad_norm": 0.9779438420717514, + "learning_rate": 4.998356766927427e-06, + "loss": 0.6067, + "step": 1236 + }, + { + "epoch": 0.07532807599792954, + "grad_norm": 1.0139869228221998, + "learning_rate": 4.998353872955786e-06, + "loss": 0.5363, + "step": 1237 + }, + { + "epoch": 0.07538897177480741, + "grad_norm": 1.012986764406692, + "learning_rate": 4.9983509764388735e-06, + "loss": 0.582, + "step": 1238 + }, + { + "epoch": 0.07544986755168528, + "grad_norm": 1.0373892902529627, + "learning_rate": 4.998348077376693e-06, + "loss": 0.5724, + "step": 1239 + }, + { + "epoch": 0.07551076332856316, + "grad_norm": 1.0469319702890991, + "learning_rate": 4.998345175769248e-06, + "loss": 0.5311, + "step": 1240 + }, + { + "epoch": 0.07557165910544104, + "grad_norm": 0.9574229427004137, + "learning_rate": 4.998342271616541e-06, + "loss": 0.5194, + "step": 1241 + }, + { + "epoch": 0.07563255488231892, + "grad_norm": 1.0338218095865146, + "learning_rate": 4.998339364918575e-06, + "loss": 0.5524, + "step": 1242 + }, + { + "epoch": 0.07569345065919679, + "grad_norm": 1.1461504542175425, + "learning_rate": 4.998336455675352e-06, + "loss": 0.6365, + "step": 1243 + }, + { + "epoch": 0.07575434643607466, + "grad_norm": 1.0303062335632427, + "learning_rate": 4.998333543886876e-06, + "loss": 0.559, + "step": 1244 + }, + { + "epoch": 0.07581524221295254, + "grad_norm": 0.975906325313318, + "learning_rate": 4.99833062955315e-06, + "loss": 0.5879, + "step": 1245 + }, + { + "epoch": 0.07587613798983041, + "grad_norm": 1.0638795579929976, + "learning_rate": 4.998327712674177e-06, + "loss": 0.5103, + "step": 1246 + }, + { + "epoch": 0.07593703376670828, + "grad_norm": 1.0809723193355294, + "learning_rate": 4.998324793249959e-06, + "loss": 0.5958, + "step": 1247 + }, + { + "epoch": 0.07599792954358615, + "grad_norm": 0.9731515984509215, + "learning_rate": 4.9983218712805e-06, + "loss": 0.5258, + "step": 1248 + }, + { + "epoch": 0.07605882532046403, + "grad_norm": 1.0842232663095668, + "learning_rate": 4.9983189467658025e-06, + "loss": 0.5613, + "step": 1249 + }, + { + "epoch": 0.0761197210973419, + "grad_norm": 1.0170256031807774, + "learning_rate": 4.99831601970587e-06, + "loss": 0.5534, + "step": 1250 + }, + { + "epoch": 0.07618061687421977, + "grad_norm": 1.022588582352, + "learning_rate": 4.9983130901007045e-06, + "loss": 0.5964, + "step": 1251 + }, + { + "epoch": 0.07624151265109765, + "grad_norm": 1.0409056310986078, + "learning_rate": 4.9983101579503095e-06, + "loss": 0.5914, + "step": 1252 + }, + { + "epoch": 0.07630240842797552, + "grad_norm": 1.0200216127776982, + "learning_rate": 4.998307223254688e-06, + "loss": 0.5931, + "step": 1253 + }, + { + "epoch": 0.07636330420485339, + "grad_norm": 1.0251525579161205, + "learning_rate": 4.998304286013844e-06, + "loss": 0.5828, + "step": 1254 + }, + { + "epoch": 0.07642419998173126, + "grad_norm": 0.9711082369298831, + "learning_rate": 4.998301346227779e-06, + "loss": 0.5903, + "step": 1255 + }, + { + "epoch": 0.07648509575860914, + "grad_norm": 1.0636988404104293, + "learning_rate": 4.998298403896496e-06, + "loss": 0.5878, + "step": 1256 + }, + { + "epoch": 0.07654599153548701, + "grad_norm": 0.9396555179806282, + "learning_rate": 4.998295459019999e-06, + "loss": 0.5582, + "step": 1257 + }, + { + "epoch": 0.07660688731236488, + "grad_norm": 1.0647335456954574, + "learning_rate": 4.99829251159829e-06, + "loss": 0.5461, + "step": 1258 + }, + { + "epoch": 0.07666778308924276, + "grad_norm": 1.0419858730922644, + "learning_rate": 4.998289561631373e-06, + "loss": 0.6439, + "step": 1259 + }, + { + "epoch": 0.07672867886612063, + "grad_norm": 1.0121396819271393, + "learning_rate": 4.998286609119251e-06, + "loss": 0.5178, + "step": 1260 + }, + { + "epoch": 0.07678957464299851, + "grad_norm": 1.0068704251218528, + "learning_rate": 4.998283654061926e-06, + "loss": 0.5969, + "step": 1261 + }, + { + "epoch": 0.07685047041987639, + "grad_norm": 1.0209458330189722, + "learning_rate": 4.998280696459402e-06, + "loss": 0.5736, + "step": 1262 + }, + { + "epoch": 0.07691136619675426, + "grad_norm": 1.046161346108272, + "learning_rate": 4.998277736311681e-06, + "loss": 0.5297, + "step": 1263 + }, + { + "epoch": 0.07697226197363213, + "grad_norm": 0.93245485406517, + "learning_rate": 4.998274773618767e-06, + "loss": 0.5319, + "step": 1264 + }, + { + "epoch": 0.07703315775051, + "grad_norm": 0.9561970159788988, + "learning_rate": 4.998271808380663e-06, + "loss": 0.5685, + "step": 1265 + }, + { + "epoch": 0.07709405352738788, + "grad_norm": 0.9647048668169314, + "learning_rate": 4.99826884059737e-06, + "loss": 0.57, + "step": 1266 + }, + { + "epoch": 0.07715494930426575, + "grad_norm": 1.0436805820011876, + "learning_rate": 4.998265870268893e-06, + "loss": 0.5971, + "step": 1267 + }, + { + "epoch": 0.07721584508114362, + "grad_norm": 1.0826067033081626, + "learning_rate": 4.998262897395236e-06, + "loss": 0.5457, + "step": 1268 + }, + { + "epoch": 0.0772767408580215, + "grad_norm": 1.017794935338194, + "learning_rate": 4.9982599219764e-06, + "loss": 0.5925, + "step": 1269 + }, + { + "epoch": 0.07733763663489937, + "grad_norm": 1.0797630074663152, + "learning_rate": 4.998256944012389e-06, + "loss": 0.5389, + "step": 1270 + }, + { + "epoch": 0.07739853241177724, + "grad_norm": 1.208518304985967, + "learning_rate": 4.998253963503205e-06, + "loss": 0.5111, + "step": 1271 + }, + { + "epoch": 0.07745942818865512, + "grad_norm": 1.036703327789083, + "learning_rate": 4.998250980448853e-06, + "loss": 0.6252, + "step": 1272 + }, + { + "epoch": 0.07752032396553299, + "grad_norm": 1.0030469628563228, + "learning_rate": 4.998247994849334e-06, + "loss": 0.5954, + "step": 1273 + }, + { + "epoch": 0.07758121974241086, + "grad_norm": 1.052497297799712, + "learning_rate": 4.998245006704652e-06, + "loss": 0.5878, + "step": 1274 + }, + { + "epoch": 0.07764211551928873, + "grad_norm": 1.0070183953992327, + "learning_rate": 4.99824201601481e-06, + "loss": 0.5375, + "step": 1275 + }, + { + "epoch": 0.07770301129616661, + "grad_norm": 1.0852154755564956, + "learning_rate": 4.998239022779811e-06, + "loss": 0.5581, + "step": 1276 + }, + { + "epoch": 0.07776390707304448, + "grad_norm": 1.0097498992603215, + "learning_rate": 4.998236026999658e-06, + "loss": 0.5597, + "step": 1277 + }, + { + "epoch": 0.07782480284992235, + "grad_norm": 1.0203817264335495, + "learning_rate": 4.998233028674354e-06, + "loss": 0.5418, + "step": 1278 + }, + { + "epoch": 0.07788569862680023, + "grad_norm": 1.056525625803713, + "learning_rate": 4.998230027803902e-06, + "loss": 0.6001, + "step": 1279 + }, + { + "epoch": 0.0779465944036781, + "grad_norm": 0.9607154832911161, + "learning_rate": 4.998227024388306e-06, + "loss": 0.5643, + "step": 1280 + }, + { + "epoch": 0.07800749018055597, + "grad_norm": 0.9243228350506005, + "learning_rate": 4.998224018427567e-06, + "loss": 0.5477, + "step": 1281 + }, + { + "epoch": 0.07806838595743386, + "grad_norm": 1.0595571770603964, + "learning_rate": 4.99822100992169e-06, + "loss": 0.6179, + "step": 1282 + }, + { + "epoch": 0.07812928173431173, + "grad_norm": 0.9780378960537958, + "learning_rate": 4.998217998870677e-06, + "loss": 0.5524, + "step": 1283 + }, + { + "epoch": 0.0781901775111896, + "grad_norm": 1.082051429111794, + "learning_rate": 4.998214985274532e-06, + "loss": 0.5297, + "step": 1284 + }, + { + "epoch": 0.07825107328806748, + "grad_norm": 1.1384153381768176, + "learning_rate": 4.998211969133257e-06, + "loss": 0.5432, + "step": 1285 + }, + { + "epoch": 0.07831196906494535, + "grad_norm": 0.9382629256334661, + "learning_rate": 4.998208950446856e-06, + "loss": 0.5469, + "step": 1286 + }, + { + "epoch": 0.07837286484182322, + "grad_norm": 1.1136320403592888, + "learning_rate": 4.9982059292153315e-06, + "loss": 0.5571, + "step": 1287 + }, + { + "epoch": 0.0784337606187011, + "grad_norm": 0.9823247776870848, + "learning_rate": 4.998202905438687e-06, + "loss": 0.5283, + "step": 1288 + }, + { + "epoch": 0.07849465639557897, + "grad_norm": 1.0259633760239792, + "learning_rate": 4.998199879116925e-06, + "loss": 0.5144, + "step": 1289 + }, + { + "epoch": 0.07855555217245684, + "grad_norm": 1.0250984650854649, + "learning_rate": 4.998196850250049e-06, + "loss": 0.5043, + "step": 1290 + }, + { + "epoch": 0.07861644794933471, + "grad_norm": 1.0423025765621967, + "learning_rate": 4.998193818838062e-06, + "loss": 0.5746, + "step": 1291 + }, + { + "epoch": 0.07867734372621259, + "grad_norm": 0.9863438356379844, + "learning_rate": 4.998190784880967e-06, + "loss": 0.5665, + "step": 1292 + }, + { + "epoch": 0.07873823950309046, + "grad_norm": 0.9627636842680778, + "learning_rate": 4.998187748378768e-06, + "loss": 0.5572, + "step": 1293 + }, + { + "epoch": 0.07879913527996833, + "grad_norm": 1.0093451282027273, + "learning_rate": 4.998184709331465e-06, + "loss": 0.577, + "step": 1294 + }, + { + "epoch": 0.0788600310568462, + "grad_norm": 0.9906904264511571, + "learning_rate": 4.998181667739065e-06, + "loss": 0.5109, + "step": 1295 + }, + { + "epoch": 0.07892092683372408, + "grad_norm": 0.9817016183400088, + "learning_rate": 4.9981786236015695e-06, + "loss": 0.4849, + "step": 1296 + }, + { + "epoch": 0.07898182261060195, + "grad_norm": 1.168490182660131, + "learning_rate": 4.998175576918982e-06, + "loss": 0.5378, + "step": 1297 + }, + { + "epoch": 0.07904271838747982, + "grad_norm": 0.9429280818596947, + "learning_rate": 4.998172527691304e-06, + "loss": 0.5899, + "step": 1298 + }, + { + "epoch": 0.0791036141643577, + "grad_norm": 0.9652422171579774, + "learning_rate": 4.9981694759185405e-06, + "loss": 0.478, + "step": 1299 + }, + { + "epoch": 0.07916450994123557, + "grad_norm": 1.029828377241511, + "learning_rate": 4.998166421600693e-06, + "loss": 0.5388, + "step": 1300 + }, + { + "epoch": 0.07922540571811344, + "grad_norm": 1.0623845315391092, + "learning_rate": 4.998163364737766e-06, + "loss": 0.5625, + "step": 1301 + }, + { + "epoch": 0.07928630149499133, + "grad_norm": 1.0577995817051065, + "learning_rate": 4.998160305329762e-06, + "loss": 0.5428, + "step": 1302 + }, + { + "epoch": 0.0793471972718692, + "grad_norm": 1.031556345840726, + "learning_rate": 4.998157243376685e-06, + "loss": 0.5685, + "step": 1303 + }, + { + "epoch": 0.07940809304874707, + "grad_norm": 1.045842882193498, + "learning_rate": 4.998154178878537e-06, + "loss": 0.5797, + "step": 1304 + }, + { + "epoch": 0.07946898882562495, + "grad_norm": 0.9845149024298658, + "learning_rate": 4.998151111835321e-06, + "loss": 0.5118, + "step": 1305 + }, + { + "epoch": 0.07952988460250282, + "grad_norm": 1.0266381375383078, + "learning_rate": 4.998148042247041e-06, + "loss": 0.5516, + "step": 1306 + }, + { + "epoch": 0.0795907803793807, + "grad_norm": 1.0263059760076163, + "learning_rate": 4.9981449701137e-06, + "loss": 0.6006, + "step": 1307 + }, + { + "epoch": 0.07965167615625857, + "grad_norm": 1.069353258442347, + "learning_rate": 4.9981418954353e-06, + "loss": 0.5769, + "step": 1308 + }, + { + "epoch": 0.07971257193313644, + "grad_norm": 1.120264238897858, + "learning_rate": 4.998138818211845e-06, + "loss": 0.5735, + "step": 1309 + }, + { + "epoch": 0.07977346771001431, + "grad_norm": 1.140150989569195, + "learning_rate": 4.998135738443339e-06, + "loss": 0.5236, + "step": 1310 + }, + { + "epoch": 0.07983436348689218, + "grad_norm": 1.0292591864220622, + "learning_rate": 4.998132656129784e-06, + "loss": 0.5665, + "step": 1311 + }, + { + "epoch": 0.07989525926377006, + "grad_norm": 0.99614503474071, + "learning_rate": 4.998129571271182e-06, + "loss": 0.5492, + "step": 1312 + }, + { + "epoch": 0.07995615504064793, + "grad_norm": 1.0074257139534901, + "learning_rate": 4.998126483867539e-06, + "loss": 0.6106, + "step": 1313 + }, + { + "epoch": 0.0800170508175258, + "grad_norm": 1.053817586112949, + "learning_rate": 4.998123393918856e-06, + "loss": 0.5735, + "step": 1314 + }, + { + "epoch": 0.08007794659440368, + "grad_norm": 1.0828431556273712, + "learning_rate": 4.998120301425138e-06, + "loss": 0.5898, + "step": 1315 + }, + { + "epoch": 0.08013884237128155, + "grad_norm": 1.0014159759171686, + "learning_rate": 4.9981172063863855e-06, + "loss": 0.6491, + "step": 1316 + }, + { + "epoch": 0.08019973814815942, + "grad_norm": 1.0631516295467878, + "learning_rate": 4.998114108802604e-06, + "loss": 0.578, + "step": 1317 + }, + { + "epoch": 0.0802606339250373, + "grad_norm": 1.1469917517408457, + "learning_rate": 4.998111008673795e-06, + "loss": 0.635, + "step": 1318 + }, + { + "epoch": 0.08032152970191517, + "grad_norm": 1.0369777268127425, + "learning_rate": 4.998107905999963e-06, + "loss": 0.6274, + "step": 1319 + }, + { + "epoch": 0.08038242547879304, + "grad_norm": 1.0933505840864048, + "learning_rate": 4.998104800781111e-06, + "loss": 0.5302, + "step": 1320 + }, + { + "epoch": 0.08044332125567091, + "grad_norm": 1.0723938297123092, + "learning_rate": 4.998101693017241e-06, + "loss": 0.505, + "step": 1321 + }, + { + "epoch": 0.08050421703254879, + "grad_norm": 0.9861843715344029, + "learning_rate": 4.998098582708357e-06, + "loss": 0.6196, + "step": 1322 + }, + { + "epoch": 0.08056511280942667, + "grad_norm": 0.8845598074602188, + "learning_rate": 4.998095469854462e-06, + "loss": 0.6342, + "step": 1323 + }, + { + "epoch": 0.08062600858630455, + "grad_norm": 0.9661747277608366, + "learning_rate": 4.99809235445556e-06, + "loss": 0.6338, + "step": 1324 + }, + { + "epoch": 0.08068690436318242, + "grad_norm": 1.0668242612977026, + "learning_rate": 4.998089236511654e-06, + "loss": 0.5329, + "step": 1325 + }, + { + "epoch": 0.08074780014006029, + "grad_norm": 1.030231501243152, + "learning_rate": 4.998086116022745e-06, + "loss": 0.552, + "step": 1326 + }, + { + "epoch": 0.08080869591693816, + "grad_norm": 1.0077843827560926, + "learning_rate": 4.998082992988839e-06, + "loss": 0.5089, + "step": 1327 + }, + { + "epoch": 0.08086959169381604, + "grad_norm": 1.0118280558806028, + "learning_rate": 4.998079867409937e-06, + "loss": 0.5629, + "step": 1328 + }, + { + "epoch": 0.08093048747069391, + "grad_norm": 0.9771515407307051, + "learning_rate": 4.998076739286044e-06, + "loss": 0.5347, + "step": 1329 + }, + { + "epoch": 0.08099138324757178, + "grad_norm": 0.966935908836208, + "learning_rate": 4.998073608617161e-06, + "loss": 0.5847, + "step": 1330 + }, + { + "epoch": 0.08105227902444966, + "grad_norm": 1.137715972427165, + "learning_rate": 4.998070475403295e-06, + "loss": 0.5107, + "step": 1331 + }, + { + "epoch": 0.08111317480132753, + "grad_norm": 0.98716720835164, + "learning_rate": 4.9980673396444445e-06, + "loss": 0.563, + "step": 1332 + }, + { + "epoch": 0.0811740705782054, + "grad_norm": 1.0726813888408366, + "learning_rate": 4.998064201340615e-06, + "loss": 0.5942, + "step": 1333 + }, + { + "epoch": 0.08123496635508327, + "grad_norm": 0.9898635080362551, + "learning_rate": 4.998061060491811e-06, + "loss": 0.5995, + "step": 1334 + }, + { + "epoch": 0.08129586213196115, + "grad_norm": 0.9652649294810579, + "learning_rate": 4.998057917098034e-06, + "loss": 0.5686, + "step": 1335 + }, + { + "epoch": 0.08135675790883902, + "grad_norm": 1.046962334502556, + "learning_rate": 4.998054771159287e-06, + "loss": 0.5107, + "step": 1336 + }, + { + "epoch": 0.08141765368571689, + "grad_norm": 0.9832744212465208, + "learning_rate": 4.998051622675574e-06, + "loss": 0.5733, + "step": 1337 + }, + { + "epoch": 0.08147854946259477, + "grad_norm": 0.9368689927558395, + "learning_rate": 4.998048471646898e-06, + "loss": 0.5599, + "step": 1338 + }, + { + "epoch": 0.08153944523947264, + "grad_norm": 1.055196289303668, + "learning_rate": 4.998045318073262e-06, + "loss": 0.5511, + "step": 1339 + }, + { + "epoch": 0.08160034101635051, + "grad_norm": 1.0749040684455782, + "learning_rate": 4.998042161954669e-06, + "loss": 0.5701, + "step": 1340 + }, + { + "epoch": 0.08166123679322838, + "grad_norm": 1.0145056995553623, + "learning_rate": 4.998039003291123e-06, + "loss": 0.6229, + "step": 1341 + }, + { + "epoch": 0.08172213257010626, + "grad_norm": 0.9796650532587855, + "learning_rate": 4.998035842082627e-06, + "loss": 0.6331, + "step": 1342 + }, + { + "epoch": 0.08178302834698414, + "grad_norm": 1.0435448447172657, + "learning_rate": 4.998032678329184e-06, + "loss": 0.6048, + "step": 1343 + }, + { + "epoch": 0.08184392412386202, + "grad_norm": 1.0301046873282795, + "learning_rate": 4.998029512030796e-06, + "loss": 0.5525, + "step": 1344 + }, + { + "epoch": 0.08190481990073989, + "grad_norm": 1.0471543501445146, + "learning_rate": 4.998026343187469e-06, + "loss": 0.6515, + "step": 1345 + }, + { + "epoch": 0.08196571567761776, + "grad_norm": 1.0492853745177222, + "learning_rate": 4.998023171799204e-06, + "loss": 0.5018, + "step": 1346 + }, + { + "epoch": 0.08202661145449563, + "grad_norm": 1.0570140887830555, + "learning_rate": 4.9980199978660055e-06, + "loss": 0.5638, + "step": 1347 + }, + { + "epoch": 0.08208750723137351, + "grad_norm": 1.0254615641151106, + "learning_rate": 4.998016821387875e-06, + "loss": 0.6149, + "step": 1348 + }, + { + "epoch": 0.08214840300825138, + "grad_norm": 1.1051196221212887, + "learning_rate": 4.998013642364818e-06, + "loss": 0.538, + "step": 1349 + }, + { + "epoch": 0.08220929878512925, + "grad_norm": 1.0573606207749888, + "learning_rate": 4.9980104607968374e-06, + "loss": 0.5252, + "step": 1350 + }, + { + "epoch": 0.08227019456200713, + "grad_norm": 0.9446013413856239, + "learning_rate": 4.998007276683934e-06, + "loss": 0.6062, + "step": 1351 + }, + { + "epoch": 0.082331090338885, + "grad_norm": 0.995532122699198, + "learning_rate": 4.998004090026114e-06, + "loss": 0.5375, + "step": 1352 + }, + { + "epoch": 0.08239198611576287, + "grad_norm": 1.0804735908514178, + "learning_rate": 4.998000900823378e-06, + "loss": 0.5289, + "step": 1353 + }, + { + "epoch": 0.08245288189264074, + "grad_norm": 1.037330228042543, + "learning_rate": 4.997997709075732e-06, + "loss": 0.592, + "step": 1354 + }, + { + "epoch": 0.08251377766951862, + "grad_norm": 0.9974115773532269, + "learning_rate": 4.9979945147831765e-06, + "loss": 0.6366, + "step": 1355 + }, + { + "epoch": 0.08257467344639649, + "grad_norm": 1.0222800624282542, + "learning_rate": 4.9979913179457165e-06, + "loss": 0.5905, + "step": 1356 + }, + { + "epoch": 0.08263556922327436, + "grad_norm": 0.9780487459100352, + "learning_rate": 4.997988118563355e-06, + "loss": 0.5722, + "step": 1357 + }, + { + "epoch": 0.08269646500015224, + "grad_norm": 1.0219318885594033, + "learning_rate": 4.9979849166360965e-06, + "loss": 0.6011, + "step": 1358 + }, + { + "epoch": 0.08275736077703011, + "grad_norm": 1.0758291638979902, + "learning_rate": 4.997981712163941e-06, + "loss": 0.5162, + "step": 1359 + }, + { + "epoch": 0.08281825655390798, + "grad_norm": 0.999832132315375, + "learning_rate": 4.997978505146895e-06, + "loss": 0.5622, + "step": 1360 + }, + { + "epoch": 0.08287915233078585, + "grad_norm": 0.9808118654023391, + "learning_rate": 4.997975295584959e-06, + "loss": 0.6046, + "step": 1361 + }, + { + "epoch": 0.08294004810766373, + "grad_norm": 1.1092381489336622, + "learning_rate": 4.997972083478139e-06, + "loss": 0.5619, + "step": 1362 + }, + { + "epoch": 0.0830009438845416, + "grad_norm": 1.0424230626462863, + "learning_rate": 4.997968868826436e-06, + "loss": 0.544, + "step": 1363 + }, + { + "epoch": 0.08306183966141949, + "grad_norm": 1.0018157474870995, + "learning_rate": 4.997965651629855e-06, + "loss": 0.5661, + "step": 1364 + }, + { + "epoch": 0.08312273543829736, + "grad_norm": 1.0616809141868, + "learning_rate": 4.997962431888398e-06, + "loss": 0.5381, + "step": 1365 + }, + { + "epoch": 0.08318363121517523, + "grad_norm": 1.1332564891024013, + "learning_rate": 4.997959209602069e-06, + "loss": 0.6207, + "step": 1366 + }, + { + "epoch": 0.0832445269920531, + "grad_norm": 0.9827494918664205, + "learning_rate": 4.997955984770872e-06, + "loss": 0.5591, + "step": 1367 + }, + { + "epoch": 0.08330542276893098, + "grad_norm": 1.0168679967341927, + "learning_rate": 4.997952757394809e-06, + "loss": 0.5713, + "step": 1368 + }, + { + "epoch": 0.08336631854580885, + "grad_norm": 1.0451460056429356, + "learning_rate": 4.997949527473882e-06, + "loss": 0.5258, + "step": 1369 + }, + { + "epoch": 0.08342721432268672, + "grad_norm": 0.9608058777648145, + "learning_rate": 4.997946295008098e-06, + "loss": 0.5385, + "step": 1370 + }, + { + "epoch": 0.0834881100995646, + "grad_norm": 1.0052888464370753, + "learning_rate": 4.997943059997456e-06, + "loss": 0.5611, + "step": 1371 + }, + { + "epoch": 0.08354900587644247, + "grad_norm": 0.9253910354367356, + "learning_rate": 4.997939822441964e-06, + "loss": 0.6202, + "step": 1372 + }, + { + "epoch": 0.08360990165332034, + "grad_norm": 1.12127032484878, + "learning_rate": 4.997936582341622e-06, + "loss": 0.4604, + "step": 1373 + }, + { + "epoch": 0.08367079743019822, + "grad_norm": 1.1295133506301014, + "learning_rate": 4.997933339696434e-06, + "loss": 0.5926, + "step": 1374 + }, + { + "epoch": 0.08373169320707609, + "grad_norm": 1.0038694219269453, + "learning_rate": 4.997930094506403e-06, + "loss": 0.5032, + "step": 1375 + }, + { + "epoch": 0.08379258898395396, + "grad_norm": 1.0525057907776012, + "learning_rate": 4.997926846771534e-06, + "loss": 0.5264, + "step": 1376 + }, + { + "epoch": 0.08385348476083183, + "grad_norm": 0.9752283523748062, + "learning_rate": 4.997923596491827e-06, + "loss": 0.5388, + "step": 1377 + }, + { + "epoch": 0.0839143805377097, + "grad_norm": 1.0056225533012288, + "learning_rate": 4.997920343667289e-06, + "loss": 0.6161, + "step": 1378 + }, + { + "epoch": 0.08397527631458758, + "grad_norm": 0.9517339861709416, + "learning_rate": 4.997917088297921e-06, + "loss": 0.5825, + "step": 1379 + }, + { + "epoch": 0.08403617209146545, + "grad_norm": 1.0483468745579916, + "learning_rate": 4.9979138303837275e-06, + "loss": 0.6243, + "step": 1380 + }, + { + "epoch": 0.08409706786834333, + "grad_norm": 1.0526937855025145, + "learning_rate": 4.997910569924711e-06, + "loss": 0.5865, + "step": 1381 + }, + { + "epoch": 0.0841579636452212, + "grad_norm": 1.0519133941561867, + "learning_rate": 4.997907306920875e-06, + "loss": 0.5529, + "step": 1382 + }, + { + "epoch": 0.08421885942209907, + "grad_norm": 0.9936847049611551, + "learning_rate": 4.997904041372223e-06, + "loss": 0.5124, + "step": 1383 + }, + { + "epoch": 0.08427975519897696, + "grad_norm": 1.0426497269219892, + "learning_rate": 4.997900773278759e-06, + "loss": 0.528, + "step": 1384 + }, + { + "epoch": 0.08434065097585483, + "grad_norm": 1.0588432654556976, + "learning_rate": 4.997897502640485e-06, + "loss": 0.5863, + "step": 1385 + }, + { + "epoch": 0.0844015467527327, + "grad_norm": 0.9125416209454333, + "learning_rate": 4.997894229457405e-06, + "loss": 0.5919, + "step": 1386 + }, + { + "epoch": 0.08446244252961058, + "grad_norm": 1.1610164023350729, + "learning_rate": 4.997890953729523e-06, + "loss": 0.5343, + "step": 1387 + }, + { + "epoch": 0.08452333830648845, + "grad_norm": 1.046387991786299, + "learning_rate": 4.997887675456841e-06, + "loss": 0.552, + "step": 1388 + }, + { + "epoch": 0.08458423408336632, + "grad_norm": 0.9806410909948885, + "learning_rate": 4.997884394639363e-06, + "loss": 0.6115, + "step": 1389 + }, + { + "epoch": 0.0846451298602442, + "grad_norm": 1.0685078013425313, + "learning_rate": 4.997881111277092e-06, + "loss": 0.5033, + "step": 1390 + }, + { + "epoch": 0.08470602563712207, + "grad_norm": 1.0285100216045793, + "learning_rate": 4.997877825370032e-06, + "loss": 0.5427, + "step": 1391 + }, + { + "epoch": 0.08476692141399994, + "grad_norm": 0.9890166207437792, + "learning_rate": 4.997874536918185e-06, + "loss": 0.534, + "step": 1392 + }, + { + "epoch": 0.08482781719087781, + "grad_norm": 0.978209647363005, + "learning_rate": 4.997871245921557e-06, + "loss": 0.6265, + "step": 1393 + }, + { + "epoch": 0.08488871296775569, + "grad_norm": 1.1507850374043587, + "learning_rate": 4.997867952380149e-06, + "loss": 0.5361, + "step": 1394 + }, + { + "epoch": 0.08494960874463356, + "grad_norm": 0.9458799981211218, + "learning_rate": 4.997864656293965e-06, + "loss": 0.5654, + "step": 1395 + }, + { + "epoch": 0.08501050452151143, + "grad_norm": 1.0653908195035158, + "learning_rate": 4.997861357663009e-06, + "loss": 0.5708, + "step": 1396 + }, + { + "epoch": 0.0850714002983893, + "grad_norm": 0.9832083459749902, + "learning_rate": 4.997858056487283e-06, + "loss": 0.6144, + "step": 1397 + }, + { + "epoch": 0.08513229607526718, + "grad_norm": 1.0249520820930933, + "learning_rate": 4.997854752766791e-06, + "loss": 0.5323, + "step": 1398 + }, + { + "epoch": 0.08519319185214505, + "grad_norm": 1.0110827225296726, + "learning_rate": 4.997851446501537e-06, + "loss": 0.5512, + "step": 1399 + }, + { + "epoch": 0.08525408762902292, + "grad_norm": 0.9883251659717444, + "learning_rate": 4.997848137691525e-06, + "loss": 0.5644, + "step": 1400 + }, + { + "epoch": 0.0853149834059008, + "grad_norm": 0.9623680422273015, + "learning_rate": 4.997844826336755e-06, + "loss": 0.5772, + "step": 1401 + }, + { + "epoch": 0.08537587918277867, + "grad_norm": 1.1064886346974587, + "learning_rate": 4.997841512437234e-06, + "loss": 0.5224, + "step": 1402 + }, + { + "epoch": 0.08543677495965654, + "grad_norm": 1.0253791751720778, + "learning_rate": 4.997838195992964e-06, + "loss": 0.5314, + "step": 1403 + }, + { + "epoch": 0.08549767073653441, + "grad_norm": 1.0898475718300318, + "learning_rate": 4.997834877003947e-06, + "loss": 0.5406, + "step": 1404 + }, + { + "epoch": 0.0855585665134123, + "grad_norm": 1.0135577231400803, + "learning_rate": 4.99783155547019e-06, + "loss": 0.5673, + "step": 1405 + }, + { + "epoch": 0.08561946229029017, + "grad_norm": 1.0572636733680414, + "learning_rate": 4.997828231391693e-06, + "loss": 0.5269, + "step": 1406 + }, + { + "epoch": 0.08568035806716805, + "grad_norm": 1.044291244956292, + "learning_rate": 4.99782490476846e-06, + "loss": 0.5125, + "step": 1407 + }, + { + "epoch": 0.08574125384404592, + "grad_norm": 1.1385597566459287, + "learning_rate": 4.997821575600495e-06, + "loss": 0.6108, + "step": 1408 + }, + { + "epoch": 0.08580214962092379, + "grad_norm": 1.0149380631100289, + "learning_rate": 4.997818243887802e-06, + "loss": 0.536, + "step": 1409 + }, + { + "epoch": 0.08586304539780167, + "grad_norm": 0.9591231348521068, + "learning_rate": 4.9978149096303835e-06, + "loss": 0.6501, + "step": 1410 + }, + { + "epoch": 0.08592394117467954, + "grad_norm": 1.07694937637205, + "learning_rate": 4.997811572828243e-06, + "loss": 0.5801, + "step": 1411 + }, + { + "epoch": 0.08598483695155741, + "grad_norm": 0.9814813676378317, + "learning_rate": 4.997808233481384e-06, + "loss": 0.5415, + "step": 1412 + }, + { + "epoch": 0.08604573272843528, + "grad_norm": 1.1194211270791776, + "learning_rate": 4.99780489158981e-06, + "loss": 0.5346, + "step": 1413 + }, + { + "epoch": 0.08610662850531316, + "grad_norm": 1.0877240173262892, + "learning_rate": 4.997801547153524e-06, + "loss": 0.5917, + "step": 1414 + }, + { + "epoch": 0.08616752428219103, + "grad_norm": 1.0508841654139938, + "learning_rate": 4.99779820017253e-06, + "loss": 0.5515, + "step": 1415 + }, + { + "epoch": 0.0862284200590689, + "grad_norm": 1.0368533898527248, + "learning_rate": 4.997794850646831e-06, + "loss": 0.5449, + "step": 1416 + }, + { + "epoch": 0.08628931583594678, + "grad_norm": 0.9749447423036011, + "learning_rate": 4.997791498576431e-06, + "loss": 0.5707, + "step": 1417 + }, + { + "epoch": 0.08635021161282465, + "grad_norm": 1.0896998362410615, + "learning_rate": 4.997788143961332e-06, + "loss": 0.5697, + "step": 1418 + }, + { + "epoch": 0.08641110738970252, + "grad_norm": 1.0298344892350537, + "learning_rate": 4.997784786801539e-06, + "loss": 0.4894, + "step": 1419 + }, + { + "epoch": 0.0864720031665804, + "grad_norm": 1.0513141846293907, + "learning_rate": 4.997781427097055e-06, + "loss": 0.557, + "step": 1420 + }, + { + "epoch": 0.08653289894345827, + "grad_norm": 1.0595630971554015, + "learning_rate": 4.997778064847882e-06, + "loss": 0.5555, + "step": 1421 + }, + { + "epoch": 0.08659379472033614, + "grad_norm": 0.9386100046397133, + "learning_rate": 4.997774700054026e-06, + "loss": 0.5809, + "step": 1422 + }, + { + "epoch": 0.08665469049721401, + "grad_norm": 1.0831023387306655, + "learning_rate": 4.997771332715488e-06, + "loss": 0.5047, + "step": 1423 + }, + { + "epoch": 0.08671558627409189, + "grad_norm": 1.0386275500494708, + "learning_rate": 4.997767962832274e-06, + "loss": 0.5113, + "step": 1424 + }, + { + "epoch": 0.08677648205096977, + "grad_norm": 1.0196578641150749, + "learning_rate": 4.997764590404386e-06, + "loss": 0.5531, + "step": 1425 + }, + { + "epoch": 0.08683737782784764, + "grad_norm": 1.0054996361558854, + "learning_rate": 4.997761215431825e-06, + "loss": 0.541, + "step": 1426 + }, + { + "epoch": 0.08689827360472552, + "grad_norm": 1.0262025888770043, + "learning_rate": 4.997757837914598e-06, + "loss": 0.5513, + "step": 1427 + }, + { + "epoch": 0.08695916938160339, + "grad_norm": 1.1113985121907357, + "learning_rate": 4.997754457852708e-06, + "loss": 0.5794, + "step": 1428 + }, + { + "epoch": 0.08702006515848126, + "grad_norm": 0.9938661966260649, + "learning_rate": 4.997751075246157e-06, + "loss": 0.5774, + "step": 1429 + }, + { + "epoch": 0.08708096093535914, + "grad_norm": 1.0329260867319197, + "learning_rate": 4.9977476900949494e-06, + "loss": 0.5607, + "step": 1430 + }, + { + "epoch": 0.08714185671223701, + "grad_norm": 1.0338019149930968, + "learning_rate": 4.997744302399089e-06, + "loss": 0.5459, + "step": 1431 + }, + { + "epoch": 0.08720275248911488, + "grad_norm": 1.0345679020411003, + "learning_rate": 4.997740912158577e-06, + "loss": 0.5639, + "step": 1432 + }, + { + "epoch": 0.08726364826599275, + "grad_norm": 1.0284295666658978, + "learning_rate": 4.99773751937342e-06, + "loss": 0.5406, + "step": 1433 + }, + { + "epoch": 0.08732454404287063, + "grad_norm": 1.0410918485948286, + "learning_rate": 4.99773412404362e-06, + "loss": 0.5621, + "step": 1434 + }, + { + "epoch": 0.0873854398197485, + "grad_norm": 1.0691144751914856, + "learning_rate": 4.997730726169179e-06, + "loss": 0.4641, + "step": 1435 + }, + { + "epoch": 0.08744633559662637, + "grad_norm": 1.0350929910829778, + "learning_rate": 4.997727325750102e-06, + "loss": 0.508, + "step": 1436 + }, + { + "epoch": 0.08750723137350425, + "grad_norm": 1.0736502719106746, + "learning_rate": 4.997723922786394e-06, + "loss": 0.5569, + "step": 1437 + }, + { + "epoch": 0.08756812715038212, + "grad_norm": 1.0439770582496712, + "learning_rate": 4.997720517278055e-06, + "loss": 0.6457, + "step": 1438 + }, + { + "epoch": 0.08762902292725999, + "grad_norm": 1.0746961933877615, + "learning_rate": 4.997717109225091e-06, + "loss": 0.52, + "step": 1439 + }, + { + "epoch": 0.08768991870413786, + "grad_norm": 1.1218209856827606, + "learning_rate": 4.997713698627506e-06, + "loss": 0.5238, + "step": 1440 + }, + { + "epoch": 0.08775081448101574, + "grad_norm": 1.083361370382905, + "learning_rate": 4.9977102854853e-06, + "loss": 0.5434, + "step": 1441 + }, + { + "epoch": 0.08781171025789361, + "grad_norm": 1.0201669579796162, + "learning_rate": 4.99770686979848e-06, + "loss": 0.547, + "step": 1442 + }, + { + "epoch": 0.08787260603477148, + "grad_norm": 1.1388569690790045, + "learning_rate": 4.997703451567047e-06, + "loss": 0.5379, + "step": 1443 + }, + { + "epoch": 0.08793350181164936, + "grad_norm": 1.023607137618843, + "learning_rate": 4.9977000307910076e-06, + "loss": 0.6016, + "step": 1444 + }, + { + "epoch": 0.08799439758852723, + "grad_norm": 1.0288567234464243, + "learning_rate": 4.997696607470361e-06, + "loss": 0.5656, + "step": 1445 + }, + { + "epoch": 0.08805529336540512, + "grad_norm": 1.053174616110197, + "learning_rate": 4.997693181605115e-06, + "loss": 0.5053, + "step": 1446 + }, + { + "epoch": 0.08811618914228299, + "grad_norm": 1.147217374379264, + "learning_rate": 4.99768975319527e-06, + "loss": 0.5617, + "step": 1447 + }, + { + "epoch": 0.08817708491916086, + "grad_norm": 0.9808081873354311, + "learning_rate": 4.9976863222408315e-06, + "loss": 0.5677, + "step": 1448 + }, + { + "epoch": 0.08823798069603873, + "grad_norm": 0.9976017514534825, + "learning_rate": 4.997682888741801e-06, + "loss": 0.5208, + "step": 1449 + }, + { + "epoch": 0.08829887647291661, + "grad_norm": 1.0358935190333154, + "learning_rate": 4.997679452698184e-06, + "loss": 0.6268, + "step": 1450 + }, + { + "epoch": 0.08835977224979448, + "grad_norm": 1.0230771127122553, + "learning_rate": 4.997676014109982e-06, + "loss": 0.5686, + "step": 1451 + }, + { + "epoch": 0.08842066802667235, + "grad_norm": 0.9942033346516335, + "learning_rate": 4.9976725729772e-06, + "loss": 0.5691, + "step": 1452 + }, + { + "epoch": 0.08848156380355023, + "grad_norm": 1.0592168587071582, + "learning_rate": 4.997669129299843e-06, + "loss": 0.5414, + "step": 1453 + }, + { + "epoch": 0.0885424595804281, + "grad_norm": 1.0225186110225175, + "learning_rate": 4.99766568307791e-06, + "loss": 0.5433, + "step": 1454 + }, + { + "epoch": 0.08860335535730597, + "grad_norm": 1.110918785586578, + "learning_rate": 4.997662234311409e-06, + "loss": 0.4987, + "step": 1455 + }, + { + "epoch": 0.08866425113418384, + "grad_norm": 1.0408031009115777, + "learning_rate": 4.997658783000341e-06, + "loss": 0.5705, + "step": 1456 + }, + { + "epoch": 0.08872514691106172, + "grad_norm": 1.0688611402532135, + "learning_rate": 4.997655329144709e-06, + "loss": 0.4916, + "step": 1457 + }, + { + "epoch": 0.08878604268793959, + "grad_norm": 1.095416605117354, + "learning_rate": 4.997651872744519e-06, + "loss": 0.522, + "step": 1458 + }, + { + "epoch": 0.08884693846481746, + "grad_norm": 1.1839311023953907, + "learning_rate": 4.997648413799772e-06, + "loss": 0.5558, + "step": 1459 + }, + { + "epoch": 0.08890783424169534, + "grad_norm": 1.0396988902746995, + "learning_rate": 4.997644952310475e-06, + "loss": 0.5842, + "step": 1460 + }, + { + "epoch": 0.08896873001857321, + "grad_norm": 0.9545960636033236, + "learning_rate": 4.997641488276627e-06, + "loss": 0.564, + "step": 1461 + }, + { + "epoch": 0.08902962579545108, + "grad_norm": 1.039427804600207, + "learning_rate": 4.9976380216982355e-06, + "loss": 0.547, + "step": 1462 + }, + { + "epoch": 0.08909052157232895, + "grad_norm": 1.0183226728438297, + "learning_rate": 4.997634552575301e-06, + "loss": 0.5686, + "step": 1463 + }, + { + "epoch": 0.08915141734920683, + "grad_norm": 1.1406363836260522, + "learning_rate": 4.997631080907829e-06, + "loss": 0.5689, + "step": 1464 + }, + { + "epoch": 0.0892123131260847, + "grad_norm": 0.954764264489982, + "learning_rate": 4.997627606695822e-06, + "loss": 0.5657, + "step": 1465 + }, + { + "epoch": 0.08927320890296259, + "grad_norm": 1.0514434387313412, + "learning_rate": 4.997624129939285e-06, + "loss": 0.5594, + "step": 1466 + }, + { + "epoch": 0.08933410467984046, + "grad_norm": 1.1355427286401618, + "learning_rate": 4.9976206506382185e-06, + "loss": 0.5396, + "step": 1467 + }, + { + "epoch": 0.08939500045671833, + "grad_norm": 1.025275028018011, + "learning_rate": 4.9976171687926295e-06, + "loss": 0.5283, + "step": 1468 + }, + { + "epoch": 0.0894558962335962, + "grad_norm": 1.052539185221459, + "learning_rate": 4.99761368440252e-06, + "loss": 0.5892, + "step": 1469 + }, + { + "epoch": 0.08951679201047408, + "grad_norm": 1.0441638021676458, + "learning_rate": 4.997610197467892e-06, + "loss": 0.5084, + "step": 1470 + }, + { + "epoch": 0.08957768778735195, + "grad_norm": 0.9828948772711906, + "learning_rate": 4.997606707988753e-06, + "loss": 0.5196, + "step": 1471 + }, + { + "epoch": 0.08963858356422982, + "grad_norm": 1.044570197782546, + "learning_rate": 4.997603215965103e-06, + "loss": 0.6172, + "step": 1472 + }, + { + "epoch": 0.0896994793411077, + "grad_norm": 1.0019109570095557, + "learning_rate": 4.997599721396947e-06, + "loss": 0.563, + "step": 1473 + }, + { + "epoch": 0.08976037511798557, + "grad_norm": 1.0104474715745193, + "learning_rate": 4.997596224284288e-06, + "loss": 0.5874, + "step": 1474 + }, + { + "epoch": 0.08982127089486344, + "grad_norm": 1.0229569792691542, + "learning_rate": 4.99759272462713e-06, + "loss": 0.5832, + "step": 1475 + }, + { + "epoch": 0.08988216667174131, + "grad_norm": 1.0113815704523144, + "learning_rate": 4.997589222425477e-06, + "loss": 0.5361, + "step": 1476 + }, + { + "epoch": 0.08994306244861919, + "grad_norm": 1.069070565971854, + "learning_rate": 4.997585717679331e-06, + "loss": 0.5972, + "step": 1477 + }, + { + "epoch": 0.09000395822549706, + "grad_norm": 1.031671044534895, + "learning_rate": 4.997582210388697e-06, + "loss": 0.5571, + "step": 1478 + }, + { + "epoch": 0.09006485400237493, + "grad_norm": 0.9302830592559393, + "learning_rate": 4.997578700553579e-06, + "loss": 0.6045, + "step": 1479 + }, + { + "epoch": 0.0901257497792528, + "grad_norm": 0.9583526507188402, + "learning_rate": 4.9975751881739785e-06, + "loss": 0.5069, + "step": 1480 + }, + { + "epoch": 0.09018664555613068, + "grad_norm": 1.0717051620533726, + "learning_rate": 4.997571673249901e-06, + "loss": 0.5348, + "step": 1481 + }, + { + "epoch": 0.09024754133300855, + "grad_norm": 1.056127623223244, + "learning_rate": 4.997568155781349e-06, + "loss": 0.6048, + "step": 1482 + }, + { + "epoch": 0.09030843710988642, + "grad_norm": 1.0307393941051817, + "learning_rate": 4.9975646357683274e-06, + "loss": 0.5791, + "step": 1483 + }, + { + "epoch": 0.0903693328867643, + "grad_norm": 1.0530716484852736, + "learning_rate": 4.9975611132108385e-06, + "loss": 0.5443, + "step": 1484 + }, + { + "epoch": 0.09043022866364217, + "grad_norm": 0.9259547168392037, + "learning_rate": 4.997557588108886e-06, + "loss": 0.5326, + "step": 1485 + }, + { + "epoch": 0.09049112444052004, + "grad_norm": 1.0423985984834203, + "learning_rate": 4.997554060462474e-06, + "loss": 0.5341, + "step": 1486 + }, + { + "epoch": 0.09055202021739793, + "grad_norm": 1.0435297512326267, + "learning_rate": 4.9975505302716055e-06, + "loss": 0.5399, + "step": 1487 + }, + { + "epoch": 0.0906129159942758, + "grad_norm": 1.028291802933058, + "learning_rate": 4.997546997536285e-06, + "loss": 0.477, + "step": 1488 + }, + { + "epoch": 0.09067381177115368, + "grad_norm": 1.0268939094049014, + "learning_rate": 4.997543462256514e-06, + "loss": 0.5423, + "step": 1489 + }, + { + "epoch": 0.09073470754803155, + "grad_norm": 0.9831200943773456, + "learning_rate": 4.9975399244323e-06, + "loss": 0.5742, + "step": 1490 + }, + { + "epoch": 0.09079560332490942, + "grad_norm": 1.104751051325835, + "learning_rate": 4.9975363840636425e-06, + "loss": 0.5929, + "step": 1491 + }, + { + "epoch": 0.0908564991017873, + "grad_norm": 1.0753710109943513, + "learning_rate": 4.9975328411505474e-06, + "loss": 0.5444, + "step": 1492 + }, + { + "epoch": 0.09091739487866517, + "grad_norm": 1.0327806918957327, + "learning_rate": 4.997529295693018e-06, + "loss": 0.5441, + "step": 1493 + }, + { + "epoch": 0.09097829065554304, + "grad_norm": 1.0529295416883744, + "learning_rate": 4.997525747691058e-06, + "loss": 0.5696, + "step": 1494 + }, + { + "epoch": 0.09103918643242091, + "grad_norm": 0.9992504249387475, + "learning_rate": 4.99752219714467e-06, + "loss": 0.591, + "step": 1495 + }, + { + "epoch": 0.09110008220929879, + "grad_norm": 1.037861396286008, + "learning_rate": 4.997518644053858e-06, + "loss": 0.5544, + "step": 1496 + }, + { + "epoch": 0.09116097798617666, + "grad_norm": 1.0394573515446932, + "learning_rate": 4.997515088418626e-06, + "loss": 0.5394, + "step": 1497 + }, + { + "epoch": 0.09122187376305453, + "grad_norm": 0.9996856964863439, + "learning_rate": 4.997511530238979e-06, + "loss": 0.5899, + "step": 1498 + }, + { + "epoch": 0.0912827695399324, + "grad_norm": 1.0668797451346084, + "learning_rate": 4.997507969514918e-06, + "loss": 0.5156, + "step": 1499 + }, + { + "epoch": 0.09134366531681028, + "grad_norm": 1.0769169270260723, + "learning_rate": 4.997504406246447e-06, + "loss": 0.4925, + "step": 1500 + }, + { + "epoch": 0.09140456109368815, + "grad_norm": 1.0939306350403828, + "learning_rate": 4.997500840433572e-06, + "loss": 0.5604, + "step": 1501 + }, + { + "epoch": 0.09146545687056602, + "grad_norm": 1.0312137302268414, + "learning_rate": 4.997497272076293e-06, + "loss": 0.535, + "step": 1502 + }, + { + "epoch": 0.0915263526474439, + "grad_norm": 1.0185687828685626, + "learning_rate": 4.997493701174618e-06, + "loss": 0.6177, + "step": 1503 + }, + { + "epoch": 0.09158724842432177, + "grad_norm": 0.9652339289400337, + "learning_rate": 4.997490127728548e-06, + "loss": 0.5492, + "step": 1504 + }, + { + "epoch": 0.09164814420119964, + "grad_norm": 0.9924036299537408, + "learning_rate": 4.997486551738087e-06, + "loss": 0.5799, + "step": 1505 + }, + { + "epoch": 0.09170903997807751, + "grad_norm": 1.0402205987782847, + "learning_rate": 4.997482973203237e-06, + "loss": 0.5606, + "step": 1506 + }, + { + "epoch": 0.0917699357549554, + "grad_norm": 0.9354647843503042, + "learning_rate": 4.997479392124005e-06, + "loss": 0.5865, + "step": 1507 + }, + { + "epoch": 0.09183083153183327, + "grad_norm": 1.1290563967528362, + "learning_rate": 4.997475808500392e-06, + "loss": 0.5671, + "step": 1508 + }, + { + "epoch": 0.09189172730871115, + "grad_norm": 1.0125627414094271, + "learning_rate": 4.997472222332402e-06, + "loss": 0.5597, + "step": 1509 + }, + { + "epoch": 0.09195262308558902, + "grad_norm": 1.0433103850491439, + "learning_rate": 4.997468633620041e-06, + "loss": 0.5367, + "step": 1510 + }, + { + "epoch": 0.09201351886246689, + "grad_norm": 1.0859122148466283, + "learning_rate": 4.997465042363309e-06, + "loss": 0.5285, + "step": 1511 + }, + { + "epoch": 0.09207441463934476, + "grad_norm": 1.059179631550993, + "learning_rate": 4.997461448562213e-06, + "loss": 0.4644, + "step": 1512 + }, + { + "epoch": 0.09213531041622264, + "grad_norm": 1.0762717486743663, + "learning_rate": 4.997457852216755e-06, + "loss": 0.5254, + "step": 1513 + }, + { + "epoch": 0.09219620619310051, + "grad_norm": 1.07458906415593, + "learning_rate": 4.997454253326939e-06, + "loss": 0.502, + "step": 1514 + }, + { + "epoch": 0.09225710196997838, + "grad_norm": 1.0173029709389938, + "learning_rate": 4.997450651892768e-06, + "loss": 0.6298, + "step": 1515 + }, + { + "epoch": 0.09231799774685626, + "grad_norm": 0.9976254784758077, + "learning_rate": 4.997447047914246e-06, + "loss": 0.6038, + "step": 1516 + }, + { + "epoch": 0.09237889352373413, + "grad_norm": 1.018991013837208, + "learning_rate": 4.997443441391377e-06, + "loss": 0.5671, + "step": 1517 + }, + { + "epoch": 0.092439789300612, + "grad_norm": 1.0607170570831757, + "learning_rate": 4.997439832324165e-06, + "loss": 0.5327, + "step": 1518 + }, + { + "epoch": 0.09250068507748987, + "grad_norm": 1.0298225846643443, + "learning_rate": 4.9974362207126125e-06, + "loss": 0.6091, + "step": 1519 + }, + { + "epoch": 0.09256158085436775, + "grad_norm": 1.1216921910167503, + "learning_rate": 4.997432606556725e-06, + "loss": 0.4841, + "step": 1520 + }, + { + "epoch": 0.09262247663124562, + "grad_norm": 1.0143590249594023, + "learning_rate": 4.9974289898565034e-06, + "loss": 0.5583, + "step": 1521 + }, + { + "epoch": 0.0926833724081235, + "grad_norm": 0.9823227316362182, + "learning_rate": 4.997425370611954e-06, + "loss": 0.5136, + "step": 1522 + }, + { + "epoch": 0.09274426818500137, + "grad_norm": 1.0322133440895667, + "learning_rate": 4.99742174882308e-06, + "loss": 0.5393, + "step": 1523 + }, + { + "epoch": 0.09280516396187924, + "grad_norm": 1.0091754046593295, + "learning_rate": 4.997418124489885e-06, + "loss": 0.5102, + "step": 1524 + }, + { + "epoch": 0.09286605973875711, + "grad_norm": 1.0173147824999038, + "learning_rate": 4.99741449761237e-06, + "loss": 0.5238, + "step": 1525 + }, + { + "epoch": 0.09292695551563498, + "grad_norm": 1.0286818400162883, + "learning_rate": 4.997410868190543e-06, + "loss": 0.5426, + "step": 1526 + }, + { + "epoch": 0.09298785129251286, + "grad_norm": 1.0552073370210773, + "learning_rate": 4.997407236224406e-06, + "loss": 0.5348, + "step": 1527 + }, + { + "epoch": 0.09304874706939074, + "grad_norm": 0.9775566873325453, + "learning_rate": 4.997403601713961e-06, + "loss": 0.5283, + "step": 1528 + }, + { + "epoch": 0.09310964284626862, + "grad_norm": 1.0947866092436982, + "learning_rate": 4.9973999646592145e-06, + "loss": 0.519, + "step": 1529 + }, + { + "epoch": 0.09317053862314649, + "grad_norm": 1.1222237975267093, + "learning_rate": 4.997396325060169e-06, + "loss": 0.4601, + "step": 1530 + }, + { + "epoch": 0.09323143440002436, + "grad_norm": 1.1075191908964093, + "learning_rate": 4.997392682916827e-06, + "loss": 0.5338, + "step": 1531 + }, + { + "epoch": 0.09329233017690224, + "grad_norm": 1.0374044469803558, + "learning_rate": 4.997389038229194e-06, + "loss": 0.5435, + "step": 1532 + }, + { + "epoch": 0.09335322595378011, + "grad_norm": 1.0238648306797096, + "learning_rate": 4.9973853909972715e-06, + "loss": 0.617, + "step": 1533 + }, + { + "epoch": 0.09341412173065798, + "grad_norm": 1.0110120262003894, + "learning_rate": 4.997381741221067e-06, + "loss": 0.537, + "step": 1534 + }, + { + "epoch": 0.09347501750753585, + "grad_norm": 1.1116045466283238, + "learning_rate": 4.99737808890058e-06, + "loss": 0.5812, + "step": 1535 + }, + { + "epoch": 0.09353591328441373, + "grad_norm": 1.138186247347138, + "learning_rate": 4.997374434035817e-06, + "loss": 0.5693, + "step": 1536 + }, + { + "epoch": 0.0935968090612916, + "grad_norm": 1.1078288071429587, + "learning_rate": 4.9973707766267795e-06, + "loss": 0.5356, + "step": 1537 + }, + { + "epoch": 0.09365770483816947, + "grad_norm": 1.004650256773113, + "learning_rate": 4.9973671166734746e-06, + "loss": 0.5227, + "step": 1538 + }, + { + "epoch": 0.09371860061504735, + "grad_norm": 1.0458720868304787, + "learning_rate": 4.997363454175903e-06, + "loss": 0.5935, + "step": 1539 + }, + { + "epoch": 0.09377949639192522, + "grad_norm": 0.9747879925101511, + "learning_rate": 4.99735978913407e-06, + "loss": 0.5739, + "step": 1540 + }, + { + "epoch": 0.09384039216880309, + "grad_norm": 1.0478052868457708, + "learning_rate": 4.997356121547978e-06, + "loss": 0.5427, + "step": 1541 + }, + { + "epoch": 0.09390128794568096, + "grad_norm": 0.9402820622815071, + "learning_rate": 4.9973524514176315e-06, + "loss": 0.5858, + "step": 1542 + }, + { + "epoch": 0.09396218372255884, + "grad_norm": 1.0038702873038106, + "learning_rate": 4.997348778743034e-06, + "loss": 0.5239, + "step": 1543 + }, + { + "epoch": 0.09402307949943671, + "grad_norm": 1.0151349313094575, + "learning_rate": 4.997345103524191e-06, + "loss": 0.5163, + "step": 1544 + }, + { + "epoch": 0.09408397527631458, + "grad_norm": 1.1156061530707904, + "learning_rate": 4.997341425761103e-06, + "loss": 0.5184, + "step": 1545 + }, + { + "epoch": 0.09414487105319246, + "grad_norm": 1.0582680102508362, + "learning_rate": 4.997337745453776e-06, + "loss": 0.5521, + "step": 1546 + }, + { + "epoch": 0.09420576683007033, + "grad_norm": 1.0490234027638012, + "learning_rate": 4.997334062602214e-06, + "loss": 0.4862, + "step": 1547 + }, + { + "epoch": 0.09426666260694821, + "grad_norm": 0.9842453263800433, + "learning_rate": 4.997330377206419e-06, + "loss": 0.6266, + "step": 1548 + }, + { + "epoch": 0.09432755838382609, + "grad_norm": 1.0241601609899917, + "learning_rate": 4.997326689266396e-06, + "loss": 0.48, + "step": 1549 + }, + { + "epoch": 0.09438845416070396, + "grad_norm": 1.041415023022409, + "learning_rate": 4.9973229987821496e-06, + "loss": 0.5573, + "step": 1550 + }, + { + "epoch": 0.09444934993758183, + "grad_norm": 0.9987046454912827, + "learning_rate": 4.997319305753681e-06, + "loss": 0.5782, + "step": 1551 + }, + { + "epoch": 0.0945102457144597, + "grad_norm": 0.9831255558441936, + "learning_rate": 4.997315610180996e-06, + "loss": 0.5812, + "step": 1552 + }, + { + "epoch": 0.09457114149133758, + "grad_norm": 1.0150558065891764, + "learning_rate": 4.997311912064098e-06, + "loss": 0.5358, + "step": 1553 + }, + { + "epoch": 0.09463203726821545, + "grad_norm": 1.0066199112388885, + "learning_rate": 4.99730821140299e-06, + "loss": 0.5794, + "step": 1554 + }, + { + "epoch": 0.09469293304509332, + "grad_norm": 1.0793831341996227, + "learning_rate": 4.9973045081976766e-06, + "loss": 0.5476, + "step": 1555 + }, + { + "epoch": 0.0947538288219712, + "grad_norm": 1.0426746813722718, + "learning_rate": 4.9973008024481615e-06, + "loss": 0.5689, + "step": 1556 + }, + { + "epoch": 0.09481472459884907, + "grad_norm": 1.0580146053699575, + "learning_rate": 4.997297094154447e-06, + "loss": 0.5272, + "step": 1557 + }, + { + "epoch": 0.09487562037572694, + "grad_norm": 0.9281574672614727, + "learning_rate": 4.997293383316539e-06, + "loss": 0.5342, + "step": 1558 + }, + { + "epoch": 0.09493651615260482, + "grad_norm": 1.0260567417074147, + "learning_rate": 4.997289669934442e-06, + "loss": 0.5665, + "step": 1559 + }, + { + "epoch": 0.09499741192948269, + "grad_norm": 1.080653521711041, + "learning_rate": 4.997285954008156e-06, + "loss": 0.5541, + "step": 1560 + }, + { + "epoch": 0.09505830770636056, + "grad_norm": 1.0542801858590571, + "learning_rate": 4.9972822355376885e-06, + "loss": 0.4749, + "step": 1561 + }, + { + "epoch": 0.09511920348323843, + "grad_norm": 1.0169727377991524, + "learning_rate": 4.9972785145230405e-06, + "loss": 0.6022, + "step": 1562 + }, + { + "epoch": 0.09518009926011631, + "grad_norm": 1.146382297884247, + "learning_rate": 4.997274790964217e-06, + "loss": 0.584, + "step": 1563 + }, + { + "epoch": 0.09524099503699418, + "grad_norm": 1.0925623592528895, + "learning_rate": 4.997271064861223e-06, + "loss": 0.5592, + "step": 1564 + }, + { + "epoch": 0.09530189081387205, + "grad_norm": 1.050967815688295, + "learning_rate": 4.997267336214061e-06, + "loss": 0.5131, + "step": 1565 + }, + { + "epoch": 0.09536278659074993, + "grad_norm": 0.95096346804481, + "learning_rate": 4.997263605022734e-06, + "loss": 0.5599, + "step": 1566 + }, + { + "epoch": 0.0954236823676278, + "grad_norm": 0.9592202214737985, + "learning_rate": 4.997259871287248e-06, + "loss": 0.5557, + "step": 1567 + }, + { + "epoch": 0.09548457814450567, + "grad_norm": 0.9891832824946665, + "learning_rate": 4.997256135007604e-06, + "loss": 0.5809, + "step": 1568 + }, + { + "epoch": 0.09554547392138356, + "grad_norm": 0.988082912695688, + "learning_rate": 4.997252396183809e-06, + "loss": 0.548, + "step": 1569 + }, + { + "epoch": 0.09560636969826143, + "grad_norm": 1.0495837025772932, + "learning_rate": 4.997248654815864e-06, + "loss": 0.5424, + "step": 1570 + }, + { + "epoch": 0.0956672654751393, + "grad_norm": 1.0506824849627017, + "learning_rate": 4.997244910903775e-06, + "loss": 0.5351, + "step": 1571 + }, + { + "epoch": 0.09572816125201718, + "grad_norm": 1.0076493019882604, + "learning_rate": 4.9972411644475434e-06, + "loss": 0.5409, + "step": 1572 + }, + { + "epoch": 0.09578905702889505, + "grad_norm": 0.9574091855158229, + "learning_rate": 4.997237415447176e-06, + "loss": 0.5306, + "step": 1573 + }, + { + "epoch": 0.09584995280577292, + "grad_norm": 1.0688514449495776, + "learning_rate": 4.9972336639026746e-06, + "loss": 0.6222, + "step": 1574 + }, + { + "epoch": 0.0959108485826508, + "grad_norm": 1.014330762404642, + "learning_rate": 4.997229909814043e-06, + "loss": 0.5506, + "step": 1575 + }, + { + "epoch": 0.09597174435952867, + "grad_norm": 1.0548737965095851, + "learning_rate": 4.997226153181285e-06, + "loss": 0.4952, + "step": 1576 + }, + { + "epoch": 0.09603264013640654, + "grad_norm": 1.1088414210380102, + "learning_rate": 4.997222394004405e-06, + "loss": 0.5097, + "step": 1577 + }, + { + "epoch": 0.09609353591328441, + "grad_norm": 1.0274473028913385, + "learning_rate": 4.997218632283408e-06, + "loss": 0.4958, + "step": 1578 + }, + { + "epoch": 0.09615443169016229, + "grad_norm": 1.044897866548084, + "learning_rate": 4.997214868018296e-06, + "loss": 0.5665, + "step": 1579 + }, + { + "epoch": 0.09621532746704016, + "grad_norm": 1.0122183704238317, + "learning_rate": 4.997211101209073e-06, + "loss": 0.5603, + "step": 1580 + }, + { + "epoch": 0.09627622324391803, + "grad_norm": 0.8771526626743141, + "learning_rate": 4.997207331855744e-06, + "loss": 0.5417, + "step": 1581 + }, + { + "epoch": 0.0963371190207959, + "grad_norm": 1.0327186318233272, + "learning_rate": 4.997203559958311e-06, + "loss": 0.4961, + "step": 1582 + }, + { + "epoch": 0.09639801479767378, + "grad_norm": 1.0653502157672567, + "learning_rate": 4.99719978551678e-06, + "loss": 0.5163, + "step": 1583 + }, + { + "epoch": 0.09645891057455165, + "grad_norm": 1.094303821977199, + "learning_rate": 4.997196008531153e-06, + "loss": 0.5635, + "step": 1584 + }, + { + "epoch": 0.09651980635142952, + "grad_norm": 1.0898070521865237, + "learning_rate": 4.9971922290014356e-06, + "loss": 0.5281, + "step": 1585 + }, + { + "epoch": 0.0965807021283074, + "grad_norm": 1.0508055164904817, + "learning_rate": 4.99718844692763e-06, + "loss": 0.5349, + "step": 1586 + }, + { + "epoch": 0.09664159790518527, + "grad_norm": 1.0449785320384775, + "learning_rate": 4.997184662309741e-06, + "loss": 0.4678, + "step": 1587 + }, + { + "epoch": 0.09670249368206314, + "grad_norm": 0.9608799629650544, + "learning_rate": 4.997180875147771e-06, + "loss": 0.5726, + "step": 1588 + }, + { + "epoch": 0.09676338945894103, + "grad_norm": 1.0589726621498352, + "learning_rate": 4.997177085441727e-06, + "loss": 0.5764, + "step": 1589 + }, + { + "epoch": 0.0968242852358189, + "grad_norm": 1.0402023153469924, + "learning_rate": 4.99717329319161e-06, + "loss": 0.5333, + "step": 1590 + }, + { + "epoch": 0.09688518101269677, + "grad_norm": 1.1480961437663826, + "learning_rate": 4.997169498397424e-06, + "loss": 0.5458, + "step": 1591 + }, + { + "epoch": 0.09694607678957465, + "grad_norm": 1.0492152162174317, + "learning_rate": 4.997165701059175e-06, + "loss": 0.4963, + "step": 1592 + }, + { + "epoch": 0.09700697256645252, + "grad_norm": 1.0494049982685263, + "learning_rate": 4.997161901176865e-06, + "loss": 0.6331, + "step": 1593 + }, + { + "epoch": 0.0970678683433304, + "grad_norm": 1.0530816811503116, + "learning_rate": 4.997158098750498e-06, + "loss": 0.5482, + "step": 1594 + }, + { + "epoch": 0.09712876412020827, + "grad_norm": 1.056144132256503, + "learning_rate": 4.997154293780078e-06, + "loss": 0.518, + "step": 1595 + }, + { + "epoch": 0.09718965989708614, + "grad_norm": 1.083980197418238, + "learning_rate": 4.9971504862656096e-06, + "loss": 0.5467, + "step": 1596 + }, + { + "epoch": 0.09725055567396401, + "grad_norm": 0.9895856942452385, + "learning_rate": 4.997146676207096e-06, + "loss": 0.5809, + "step": 1597 + }, + { + "epoch": 0.09731145145084188, + "grad_norm": 1.006164071378832, + "learning_rate": 4.997142863604542e-06, + "loss": 0.5575, + "step": 1598 + }, + { + "epoch": 0.09737234722771976, + "grad_norm": 1.0060217843085906, + "learning_rate": 4.997139048457949e-06, + "loss": 0.5524, + "step": 1599 + }, + { + "epoch": 0.09743324300459763, + "grad_norm": 1.0499065109490533, + "learning_rate": 4.997135230767325e-06, + "loss": 0.6315, + "step": 1600 + }, + { + "epoch": 0.0974941387814755, + "grad_norm": 0.9459363408265279, + "learning_rate": 4.99713141053267e-06, + "loss": 0.6146, + "step": 1601 + }, + { + "epoch": 0.09755503455835338, + "grad_norm": 0.9677273042392831, + "learning_rate": 4.997127587753989e-06, + "loss": 0.6101, + "step": 1602 + }, + { + "epoch": 0.09761593033523125, + "grad_norm": 1.1040265602940726, + "learning_rate": 4.9971237624312876e-06, + "loss": 0.5918, + "step": 1603 + }, + { + "epoch": 0.09767682611210912, + "grad_norm": 0.9673238105062145, + "learning_rate": 4.997119934564568e-06, + "loss": 0.5945, + "step": 1604 + }, + { + "epoch": 0.097737721888987, + "grad_norm": 1.0385811926560016, + "learning_rate": 4.997116104153835e-06, + "loss": 0.5017, + "step": 1605 + }, + { + "epoch": 0.09779861766586487, + "grad_norm": 1.0924994877208196, + "learning_rate": 4.997112271199092e-06, + "loss": 0.54, + "step": 1606 + }, + { + "epoch": 0.09785951344274274, + "grad_norm": 1.0984714330536742, + "learning_rate": 4.997108435700342e-06, + "loss": 0.5217, + "step": 1607 + }, + { + "epoch": 0.09792040921962061, + "grad_norm": 1.1076211461452965, + "learning_rate": 4.99710459765759e-06, + "loss": 0.5101, + "step": 1608 + }, + { + "epoch": 0.09798130499649849, + "grad_norm": 1.0309572964691338, + "learning_rate": 4.99710075707084e-06, + "loss": 0.6072, + "step": 1609 + }, + { + "epoch": 0.09804220077337637, + "grad_norm": 1.1598314623467008, + "learning_rate": 4.997096913940096e-06, + "loss": 0.4946, + "step": 1610 + }, + { + "epoch": 0.09810309655025425, + "grad_norm": 0.9649607105716999, + "learning_rate": 4.997093068265361e-06, + "loss": 0.5898, + "step": 1611 + }, + { + "epoch": 0.09816399232713212, + "grad_norm": 1.0740658280050155, + "learning_rate": 4.9970892200466404e-06, + "loss": 0.4929, + "step": 1612 + }, + { + "epoch": 0.09822488810400999, + "grad_norm": 1.0548457210877893, + "learning_rate": 4.997085369283936e-06, + "loss": 0.599, + "step": 1613 + }, + { + "epoch": 0.09828578388088786, + "grad_norm": 1.0463515671982837, + "learning_rate": 4.997081515977253e-06, + "loss": 0.5766, + "step": 1614 + }, + { + "epoch": 0.09834667965776574, + "grad_norm": 1.184934601623124, + "learning_rate": 4.9970776601265965e-06, + "loss": 0.5412, + "step": 1615 + }, + { + "epoch": 0.09840757543464361, + "grad_norm": 0.9569547232273173, + "learning_rate": 4.997073801731969e-06, + "loss": 0.5652, + "step": 1616 + }, + { + "epoch": 0.09846847121152148, + "grad_norm": 1.0677910138729887, + "learning_rate": 4.997069940793374e-06, + "loss": 0.5457, + "step": 1617 + }, + { + "epoch": 0.09852936698839936, + "grad_norm": 1.045797203809808, + "learning_rate": 4.997066077310816e-06, + "loss": 0.4915, + "step": 1618 + }, + { + "epoch": 0.09859026276527723, + "grad_norm": 1.009444067826701, + "learning_rate": 4.997062211284299e-06, + "loss": 0.5466, + "step": 1619 + }, + { + "epoch": 0.0986511585421551, + "grad_norm": 1.0360269955297459, + "learning_rate": 4.9970583427138275e-06, + "loss": 0.533, + "step": 1620 + }, + { + "epoch": 0.09871205431903297, + "grad_norm": 1.0005987213485463, + "learning_rate": 4.997054471599404e-06, + "loss": 0.5893, + "step": 1621 + }, + { + "epoch": 0.09877295009591085, + "grad_norm": 1.19080785133743, + "learning_rate": 4.997050597941034e-06, + "loss": 0.526, + "step": 1622 + }, + { + "epoch": 0.09883384587278872, + "grad_norm": 1.0701734217052064, + "learning_rate": 4.9970467217387205e-06, + "loss": 0.5537, + "step": 1623 + }, + { + "epoch": 0.09889474164966659, + "grad_norm": 1.0262236021560172, + "learning_rate": 4.9970428429924685e-06, + "loss": 0.5831, + "step": 1624 + }, + { + "epoch": 0.09895563742654447, + "grad_norm": 1.0592401269019265, + "learning_rate": 4.99703896170228e-06, + "loss": 0.5082, + "step": 1625 + }, + { + "epoch": 0.09901653320342234, + "grad_norm": 1.0294396481232975, + "learning_rate": 4.997035077868161e-06, + "loss": 0.6048, + "step": 1626 + }, + { + "epoch": 0.09907742898030021, + "grad_norm": 1.0712444035132218, + "learning_rate": 4.9970311914901135e-06, + "loss": 0.5282, + "step": 1627 + }, + { + "epoch": 0.09913832475717808, + "grad_norm": 1.0201893384144007, + "learning_rate": 4.997027302568144e-06, + "loss": 0.4903, + "step": 1628 + }, + { + "epoch": 0.09919922053405596, + "grad_norm": 1.0500106808726113, + "learning_rate": 4.997023411102254e-06, + "loss": 0.5567, + "step": 1629 + }, + { + "epoch": 0.09926011631093384, + "grad_norm": 1.0324008578928712, + "learning_rate": 4.997019517092449e-06, + "loss": 0.5689, + "step": 1630 + }, + { + "epoch": 0.09932101208781172, + "grad_norm": 0.9984614804117524, + "learning_rate": 4.9970156205387325e-06, + "loss": 0.5665, + "step": 1631 + }, + { + "epoch": 0.09938190786468959, + "grad_norm": 1.0412374187952131, + "learning_rate": 4.997011721441107e-06, + "loss": 0.6086, + "step": 1632 + }, + { + "epoch": 0.09944280364156746, + "grad_norm": 1.0253725531842854, + "learning_rate": 4.99700781979958e-06, + "loss": 0.5605, + "step": 1633 + }, + { + "epoch": 0.09950369941844533, + "grad_norm": 1.1065156383615307, + "learning_rate": 4.997003915614151e-06, + "loss": 0.5372, + "step": 1634 + }, + { + "epoch": 0.09956459519532321, + "grad_norm": 1.123751778752941, + "learning_rate": 4.997000008884828e-06, + "loss": 0.526, + "step": 1635 + }, + { + "epoch": 0.09962549097220108, + "grad_norm": 0.9448964530511076, + "learning_rate": 4.996996099611613e-06, + "loss": 0.6052, + "step": 1636 + }, + { + "epoch": 0.09968638674907895, + "grad_norm": 1.0279268124592427, + "learning_rate": 4.9969921877945105e-06, + "loss": 0.5397, + "step": 1637 + }, + { + "epoch": 0.09974728252595683, + "grad_norm": 1.135041739719436, + "learning_rate": 4.996988273433524e-06, + "loss": 0.5171, + "step": 1638 + }, + { + "epoch": 0.0998081783028347, + "grad_norm": 1.1853031709820716, + "learning_rate": 4.996984356528657e-06, + "loss": 0.5193, + "step": 1639 + }, + { + "epoch": 0.09986907407971257, + "grad_norm": 1.0360247508771494, + "learning_rate": 4.996980437079915e-06, + "loss": 0.5163, + "step": 1640 + }, + { + "epoch": 0.09992996985659044, + "grad_norm": 1.1127928193727894, + "learning_rate": 4.996976515087301e-06, + "loss": 0.579, + "step": 1641 + }, + { + "epoch": 0.09999086563346832, + "grad_norm": 1.215921847440474, + "learning_rate": 4.99697259055082e-06, + "loss": 0.4907, + "step": 1642 + }, + { + "epoch": 0.10005176141034619, + "grad_norm": 0.9960483170141813, + "learning_rate": 4.996968663470474e-06, + "loss": 0.5443, + "step": 1643 + }, + { + "epoch": 0.10011265718722406, + "grad_norm": 1.0199276845261946, + "learning_rate": 4.996964733846269e-06, + "loss": 0.5514, + "step": 1644 + }, + { + "epoch": 0.10017355296410194, + "grad_norm": 1.0181854755768216, + "learning_rate": 4.996960801678209e-06, + "loss": 0.6025, + "step": 1645 + }, + { + "epoch": 0.10023444874097981, + "grad_norm": 0.9397853048015088, + "learning_rate": 4.996956866966296e-06, + "loss": 0.5562, + "step": 1646 + }, + { + "epoch": 0.10029534451785768, + "grad_norm": 1.1453137165321257, + "learning_rate": 4.996952929710536e-06, + "loss": 0.5501, + "step": 1647 + }, + { + "epoch": 0.10035624029473555, + "grad_norm": 1.0745003297576192, + "learning_rate": 4.9969489899109315e-06, + "loss": 0.6093, + "step": 1648 + }, + { + "epoch": 0.10041713607161343, + "grad_norm": 1.007211203289983, + "learning_rate": 4.9969450475674875e-06, + "loss": 0.5005, + "step": 1649 + }, + { + "epoch": 0.1004780318484913, + "grad_norm": 1.0774116484464575, + "learning_rate": 4.996941102680209e-06, + "loss": 0.5598, + "step": 1650 + }, + { + "epoch": 0.10053892762536919, + "grad_norm": 0.9956222125322651, + "learning_rate": 4.996937155249098e-06, + "loss": 0.6007, + "step": 1651 + }, + { + "epoch": 0.10059982340224706, + "grad_norm": 1.042800612644546, + "learning_rate": 4.996933205274158e-06, + "loss": 0.5185, + "step": 1652 + }, + { + "epoch": 0.10066071917912493, + "grad_norm": 1.0427137072019084, + "learning_rate": 4.996929252755396e-06, + "loss": 0.5505, + "step": 1653 + }, + { + "epoch": 0.1007216149560028, + "grad_norm": 1.0032361463838362, + "learning_rate": 4.996925297692814e-06, + "loss": 0.5824, + "step": 1654 + }, + { + "epoch": 0.10078251073288068, + "grad_norm": 1.0218722381505958, + "learning_rate": 4.9969213400864154e-06, + "loss": 0.6844, + "step": 1655 + }, + { + "epoch": 0.10084340650975855, + "grad_norm": 1.0658528430895233, + "learning_rate": 4.996917379936207e-06, + "loss": 0.5447, + "step": 1656 + }, + { + "epoch": 0.10090430228663642, + "grad_norm": 1.0674475411601154, + "learning_rate": 4.99691341724219e-06, + "loss": 0.5317, + "step": 1657 + }, + { + "epoch": 0.1009651980635143, + "grad_norm": 1.0466589516755371, + "learning_rate": 4.99690945200437e-06, + "loss": 0.4784, + "step": 1658 + }, + { + "epoch": 0.10102609384039217, + "grad_norm": 1.1203625785754157, + "learning_rate": 4.9969054842227506e-06, + "loss": 0.5169, + "step": 1659 + }, + { + "epoch": 0.10108698961727004, + "grad_norm": 1.0292750644858562, + "learning_rate": 4.996901513897335e-06, + "loss": 0.5441, + "step": 1660 + }, + { + "epoch": 0.10114788539414792, + "grad_norm": 0.9810794552396621, + "learning_rate": 4.996897541028129e-06, + "loss": 0.5937, + "step": 1661 + }, + { + "epoch": 0.10120878117102579, + "grad_norm": 1.032579125241424, + "learning_rate": 4.9968935656151355e-06, + "loss": 0.535, + "step": 1662 + }, + { + "epoch": 0.10126967694790366, + "grad_norm": 1.1395235261557664, + "learning_rate": 4.996889587658358e-06, + "loss": 0.491, + "step": 1663 + }, + { + "epoch": 0.10133057272478153, + "grad_norm": 1.0848052334082923, + "learning_rate": 4.996885607157802e-06, + "loss": 0.5802, + "step": 1664 + }, + { + "epoch": 0.10139146850165941, + "grad_norm": 1.0299315273618883, + "learning_rate": 4.996881624113471e-06, + "loss": 0.5417, + "step": 1665 + }, + { + "epoch": 0.10145236427853728, + "grad_norm": 1.0361999772949415, + "learning_rate": 4.996877638525368e-06, + "loss": 0.5156, + "step": 1666 + }, + { + "epoch": 0.10151326005541515, + "grad_norm": 0.9814212612719733, + "learning_rate": 4.996873650393499e-06, + "loss": 0.5272, + "step": 1667 + }, + { + "epoch": 0.10157415583229303, + "grad_norm": 1.0893565505108886, + "learning_rate": 4.996869659717867e-06, + "loss": 0.5897, + "step": 1668 + }, + { + "epoch": 0.1016350516091709, + "grad_norm": 0.9740236209885118, + "learning_rate": 4.996865666498476e-06, + "loss": 0.5473, + "step": 1669 + }, + { + "epoch": 0.10169594738604877, + "grad_norm": 1.0796919151194282, + "learning_rate": 4.996861670735329e-06, + "loss": 0.5862, + "step": 1670 + }, + { + "epoch": 0.10175684316292666, + "grad_norm": 1.0677942335693211, + "learning_rate": 4.996857672428432e-06, + "loss": 0.5099, + "step": 1671 + }, + { + "epoch": 0.10181773893980453, + "grad_norm": 1.1036227162720051, + "learning_rate": 4.99685367157779e-06, + "loss": 0.5413, + "step": 1672 + }, + { + "epoch": 0.1018786347166824, + "grad_norm": 0.9745475816729223, + "learning_rate": 4.9968496681834025e-06, + "loss": 0.5668, + "step": 1673 + }, + { + "epoch": 0.10193953049356028, + "grad_norm": 1.0851722582213068, + "learning_rate": 4.996845662245278e-06, + "loss": 0.5008, + "step": 1674 + }, + { + "epoch": 0.10200042627043815, + "grad_norm": 1.1454709699567331, + "learning_rate": 4.996841653763419e-06, + "loss": 0.5232, + "step": 1675 + }, + { + "epoch": 0.10206132204731602, + "grad_norm": 1.1398200122731326, + "learning_rate": 4.996837642737829e-06, + "loss": 0.5151, + "step": 1676 + }, + { + "epoch": 0.1021222178241939, + "grad_norm": 1.0798872597905045, + "learning_rate": 4.996833629168514e-06, + "loss": 0.4811, + "step": 1677 + }, + { + "epoch": 0.10218311360107177, + "grad_norm": 1.051458899656529, + "learning_rate": 4.996829613055476e-06, + "loss": 0.5604, + "step": 1678 + }, + { + "epoch": 0.10224400937794964, + "grad_norm": 1.0954820393907971, + "learning_rate": 4.99682559439872e-06, + "loss": 0.5545, + "step": 1679 + }, + { + "epoch": 0.10230490515482751, + "grad_norm": 1.15296699434543, + "learning_rate": 4.996821573198249e-06, + "loss": 0.5303, + "step": 1680 + }, + { + "epoch": 0.10236580093170539, + "grad_norm": 1.087817299548718, + "learning_rate": 4.996817549454069e-06, + "loss": 0.4881, + "step": 1681 + }, + { + "epoch": 0.10242669670858326, + "grad_norm": 1.0398611135368738, + "learning_rate": 4.996813523166184e-06, + "loss": 0.5493, + "step": 1682 + }, + { + "epoch": 0.10248759248546113, + "grad_norm": 1.0424751651573563, + "learning_rate": 4.996809494334596e-06, + "loss": 0.561, + "step": 1683 + }, + { + "epoch": 0.102548488262339, + "grad_norm": 1.0893207748337808, + "learning_rate": 4.996805462959311e-06, + "loss": 0.4902, + "step": 1684 + }, + { + "epoch": 0.10260938403921688, + "grad_norm": 1.0761885931252027, + "learning_rate": 4.9968014290403325e-06, + "loss": 0.4669, + "step": 1685 + }, + { + "epoch": 0.10267027981609475, + "grad_norm": 1.0114898442194173, + "learning_rate": 4.996797392577665e-06, + "loss": 0.4942, + "step": 1686 + }, + { + "epoch": 0.10273117559297262, + "grad_norm": 0.9418573139046353, + "learning_rate": 4.996793353571311e-06, + "loss": 0.5991, + "step": 1687 + }, + { + "epoch": 0.1027920713698505, + "grad_norm": 1.0481297164331296, + "learning_rate": 4.996789312021277e-06, + "loss": 0.4797, + "step": 1688 + }, + { + "epoch": 0.10285296714672837, + "grad_norm": 1.0718218539241076, + "learning_rate": 4.996785267927566e-06, + "loss": 0.5619, + "step": 1689 + }, + { + "epoch": 0.10291386292360624, + "grad_norm": 0.9478142839990505, + "learning_rate": 4.996781221290181e-06, + "loss": 0.6306, + "step": 1690 + }, + { + "epoch": 0.10297475870048411, + "grad_norm": 0.9925755233042549, + "learning_rate": 4.996777172109128e-06, + "loss": 0.5716, + "step": 1691 + }, + { + "epoch": 0.103035654477362, + "grad_norm": 1.0251064865818886, + "learning_rate": 4.996773120384411e-06, + "loss": 0.4714, + "step": 1692 + }, + { + "epoch": 0.10309655025423987, + "grad_norm": 0.9927887457818231, + "learning_rate": 4.996769066116032e-06, + "loss": 0.5567, + "step": 1693 + }, + { + "epoch": 0.10315744603111775, + "grad_norm": 1.0647482212678125, + "learning_rate": 4.996765009303997e-06, + "loss": 0.5784, + "step": 1694 + }, + { + "epoch": 0.10321834180799562, + "grad_norm": 1.018995301849709, + "learning_rate": 4.99676094994831e-06, + "loss": 0.5321, + "step": 1695 + }, + { + "epoch": 0.10327923758487349, + "grad_norm": 1.0766568590577419, + "learning_rate": 4.996756888048975e-06, + "loss": 0.487, + "step": 1696 + }, + { + "epoch": 0.10334013336175137, + "grad_norm": 1.0562313610797487, + "learning_rate": 4.996752823605995e-06, + "loss": 0.5259, + "step": 1697 + }, + { + "epoch": 0.10340102913862924, + "grad_norm": 0.9457696049069677, + "learning_rate": 4.996748756619376e-06, + "loss": 0.6163, + "step": 1698 + }, + { + "epoch": 0.10346192491550711, + "grad_norm": 1.2239394878611567, + "learning_rate": 4.996744687089121e-06, + "loss": 0.5078, + "step": 1699 + }, + { + "epoch": 0.10352282069238498, + "grad_norm": 0.9926485554793067, + "learning_rate": 4.996740615015235e-06, + "loss": 0.5697, + "step": 1700 + }, + { + "epoch": 0.10358371646926286, + "grad_norm": 1.0372014052457623, + "learning_rate": 4.996736540397722e-06, + "loss": 0.5488, + "step": 1701 + }, + { + "epoch": 0.10364461224614073, + "grad_norm": 0.996839520953574, + "learning_rate": 4.9967324632365844e-06, + "loss": 0.5605, + "step": 1702 + }, + { + "epoch": 0.1037055080230186, + "grad_norm": 1.0608019965169122, + "learning_rate": 4.996728383531828e-06, + "loss": 0.5099, + "step": 1703 + }, + { + "epoch": 0.10376640379989648, + "grad_norm": 1.082333285503111, + "learning_rate": 4.996724301283457e-06, + "loss": 0.5503, + "step": 1704 + }, + { + "epoch": 0.10382729957677435, + "grad_norm": 1.0232805506503986, + "learning_rate": 4.996720216491474e-06, + "loss": 0.567, + "step": 1705 + }, + { + "epoch": 0.10388819535365222, + "grad_norm": 1.0630315860084953, + "learning_rate": 4.996716129155887e-06, + "loss": 0.5892, + "step": 1706 + }, + { + "epoch": 0.1039490911305301, + "grad_norm": 1.0741858171281857, + "learning_rate": 4.996712039276695e-06, + "loss": 0.545, + "step": 1707 + }, + { + "epoch": 0.10400998690740797, + "grad_norm": 1.0374000397979912, + "learning_rate": 4.9967079468539055e-06, + "loss": 0.5274, + "step": 1708 + }, + { + "epoch": 0.10407088268428584, + "grad_norm": 1.024356088006433, + "learning_rate": 4.996703851887522e-06, + "loss": 0.5972, + "step": 1709 + }, + { + "epoch": 0.10413177846116371, + "grad_norm": 1.0530121044577043, + "learning_rate": 4.996699754377548e-06, + "loss": 0.5307, + "step": 1710 + }, + { + "epoch": 0.10419267423804159, + "grad_norm": 0.9535226943596117, + "learning_rate": 4.996695654323989e-06, + "loss": 0.602, + "step": 1711 + }, + { + "epoch": 0.10425357001491947, + "grad_norm": 1.030329399496865, + "learning_rate": 4.996691551726848e-06, + "loss": 0.5097, + "step": 1712 + }, + { + "epoch": 0.10431446579179735, + "grad_norm": 1.0973671657825153, + "learning_rate": 4.996687446586129e-06, + "loss": 0.6642, + "step": 1713 + }, + { + "epoch": 0.10437536156867522, + "grad_norm": 1.010332372289805, + "learning_rate": 4.996683338901838e-06, + "loss": 0.562, + "step": 1714 + }, + { + "epoch": 0.10443625734555309, + "grad_norm": 1.0603870335199188, + "learning_rate": 4.996679228673976e-06, + "loss": 0.5444, + "step": 1715 + }, + { + "epoch": 0.10449715312243096, + "grad_norm": 1.1844143511667597, + "learning_rate": 4.9966751159025504e-06, + "loss": 0.5026, + "step": 1716 + }, + { + "epoch": 0.10455804889930884, + "grad_norm": 1.1476081513254086, + "learning_rate": 4.9966710005875645e-06, + "loss": 0.5364, + "step": 1717 + }, + { + "epoch": 0.10461894467618671, + "grad_norm": 1.068864930697842, + "learning_rate": 4.996666882729022e-06, + "loss": 0.5592, + "step": 1718 + }, + { + "epoch": 0.10467984045306458, + "grad_norm": 0.9731913457900554, + "learning_rate": 4.996662762326926e-06, + "loss": 0.5483, + "step": 1719 + }, + { + "epoch": 0.10474073622994245, + "grad_norm": 1.0450935734097802, + "learning_rate": 4.996658639381283e-06, + "loss": 0.5464, + "step": 1720 + }, + { + "epoch": 0.10480163200682033, + "grad_norm": 1.021978111408529, + "learning_rate": 4.9966545138920955e-06, + "loss": 0.5479, + "step": 1721 + }, + { + "epoch": 0.1048625277836982, + "grad_norm": 1.1360691099884468, + "learning_rate": 4.996650385859368e-06, + "loss": 0.5313, + "step": 1722 + }, + { + "epoch": 0.10492342356057607, + "grad_norm": 1.0310857846721262, + "learning_rate": 4.996646255283107e-06, + "loss": 0.5559, + "step": 1723 + }, + { + "epoch": 0.10498431933745395, + "grad_norm": 1.1331813300466749, + "learning_rate": 4.996642122163313e-06, + "loss": 0.4649, + "step": 1724 + }, + { + "epoch": 0.10504521511433182, + "grad_norm": 1.0472808933750806, + "learning_rate": 4.996637986499992e-06, + "loss": 0.529, + "step": 1725 + }, + { + "epoch": 0.10510611089120969, + "grad_norm": 1.0633602374194229, + "learning_rate": 4.996633848293148e-06, + "loss": 0.521, + "step": 1726 + }, + { + "epoch": 0.10516700666808756, + "grad_norm": 1.0900157285448655, + "learning_rate": 4.9966297075427855e-06, + "loss": 0.5269, + "step": 1727 + }, + { + "epoch": 0.10522790244496544, + "grad_norm": 1.0955320259768848, + "learning_rate": 4.996625564248909e-06, + "loss": 0.5518, + "step": 1728 + }, + { + "epoch": 0.10528879822184331, + "grad_norm": 1.126634945603069, + "learning_rate": 4.996621418411522e-06, + "loss": 0.4902, + "step": 1729 + }, + { + "epoch": 0.10534969399872118, + "grad_norm": 0.9812783723164764, + "learning_rate": 4.996617270030629e-06, + "loss": 0.5676, + "step": 1730 + }, + { + "epoch": 0.10541058977559906, + "grad_norm": 1.0891004917879188, + "learning_rate": 4.996613119106234e-06, + "loss": 0.5152, + "step": 1731 + }, + { + "epoch": 0.10547148555247693, + "grad_norm": 1.0279339391387845, + "learning_rate": 4.996608965638342e-06, + "loss": 0.5819, + "step": 1732 + }, + { + "epoch": 0.10553238132935482, + "grad_norm": 1.142646834850904, + "learning_rate": 4.996604809626956e-06, + "loss": 0.482, + "step": 1733 + }, + { + "epoch": 0.10559327710623269, + "grad_norm": 1.0770997602969976, + "learning_rate": 4.996600651072081e-06, + "loss": 0.5012, + "step": 1734 + }, + { + "epoch": 0.10565417288311056, + "grad_norm": 1.035994752358036, + "learning_rate": 4.996596489973722e-06, + "loss": 0.5154, + "step": 1735 + }, + { + "epoch": 0.10571506865998843, + "grad_norm": 1.1180477400450488, + "learning_rate": 4.996592326331882e-06, + "loss": 0.5234, + "step": 1736 + }, + { + "epoch": 0.10577596443686631, + "grad_norm": 1.0920299013743027, + "learning_rate": 4.996588160146565e-06, + "loss": 0.5837, + "step": 1737 + }, + { + "epoch": 0.10583686021374418, + "grad_norm": 1.1154964583492333, + "learning_rate": 4.996583991417776e-06, + "loss": 0.5622, + "step": 1738 + }, + { + "epoch": 0.10589775599062205, + "grad_norm": 1.0353724029183167, + "learning_rate": 4.996579820145519e-06, + "loss": 0.51, + "step": 1739 + }, + { + "epoch": 0.10595865176749993, + "grad_norm": 1.0390768721639931, + "learning_rate": 4.9965756463298e-06, + "loss": 0.4857, + "step": 1740 + }, + { + "epoch": 0.1060195475443778, + "grad_norm": 1.081458570543921, + "learning_rate": 4.9965714699706204e-06, + "loss": 0.467, + "step": 1741 + }, + { + "epoch": 0.10608044332125567, + "grad_norm": 0.9686078039442046, + "learning_rate": 4.996567291067985e-06, + "loss": 0.5696, + "step": 1742 + }, + { + "epoch": 0.10614133909813354, + "grad_norm": 0.9684680843497067, + "learning_rate": 4.9965631096219005e-06, + "loss": 0.5434, + "step": 1743 + }, + { + "epoch": 0.10620223487501142, + "grad_norm": 1.0325585691872274, + "learning_rate": 4.996558925632367e-06, + "loss": 0.5392, + "step": 1744 + }, + { + "epoch": 0.10626313065188929, + "grad_norm": 0.9852175390252056, + "learning_rate": 4.996554739099393e-06, + "loss": 0.5011, + "step": 1745 + }, + { + "epoch": 0.10632402642876716, + "grad_norm": 1.0503192432698503, + "learning_rate": 4.996550550022981e-06, + "loss": 0.491, + "step": 1746 + }, + { + "epoch": 0.10638492220564504, + "grad_norm": 1.0087160194758449, + "learning_rate": 4.9965463584031345e-06, + "loss": 0.5199, + "step": 1747 + }, + { + "epoch": 0.10644581798252291, + "grad_norm": 0.9726056096404159, + "learning_rate": 4.996542164239859e-06, + "loss": 0.6042, + "step": 1748 + }, + { + "epoch": 0.10650671375940078, + "grad_norm": 1.019430748383045, + "learning_rate": 4.996537967533158e-06, + "loss": 0.542, + "step": 1749 + }, + { + "epoch": 0.10656760953627865, + "grad_norm": 1.1224842739559349, + "learning_rate": 4.996533768283036e-06, + "loss": 0.5826, + "step": 1750 + }, + { + "epoch": 0.10662850531315653, + "grad_norm": 1.0313450963727968, + "learning_rate": 4.996529566489497e-06, + "loss": 0.5363, + "step": 1751 + }, + { + "epoch": 0.1066894010900344, + "grad_norm": 0.9755428810872762, + "learning_rate": 4.996525362152547e-06, + "loss": 0.5551, + "step": 1752 + }, + { + "epoch": 0.10675029686691229, + "grad_norm": 0.9708752144626873, + "learning_rate": 4.996521155272187e-06, + "loss": 0.5395, + "step": 1753 + }, + { + "epoch": 0.10681119264379016, + "grad_norm": 1.0140790796114103, + "learning_rate": 4.996516945848424e-06, + "loss": 0.5406, + "step": 1754 + }, + { + "epoch": 0.10687208842066803, + "grad_norm": 1.0003815597020813, + "learning_rate": 4.996512733881261e-06, + "loss": 0.5301, + "step": 1755 + }, + { + "epoch": 0.1069329841975459, + "grad_norm": 1.205361460837066, + "learning_rate": 4.9965085193707036e-06, + "loss": 0.5282, + "step": 1756 + }, + { + "epoch": 0.10699387997442378, + "grad_norm": 1.059891491963777, + "learning_rate": 4.996504302316755e-06, + "loss": 0.5134, + "step": 1757 + }, + { + "epoch": 0.10705477575130165, + "grad_norm": 0.990021941027551, + "learning_rate": 4.9965000827194186e-06, + "loss": 0.5933, + "step": 1758 + }, + { + "epoch": 0.10711567152817952, + "grad_norm": 1.1365303407705232, + "learning_rate": 4.9964958605787015e-06, + "loss": 0.5628, + "step": 1759 + }, + { + "epoch": 0.1071765673050574, + "grad_norm": 1.0066990029815868, + "learning_rate": 4.996491635894605e-06, + "loss": 0.4683, + "step": 1760 + }, + { + "epoch": 0.10723746308193527, + "grad_norm": 0.9572334659261204, + "learning_rate": 4.9964874086671354e-06, + "loss": 0.5595, + "step": 1761 + }, + { + "epoch": 0.10729835885881314, + "grad_norm": 0.9605404713782302, + "learning_rate": 4.9964831788962965e-06, + "loss": 0.5255, + "step": 1762 + }, + { + "epoch": 0.10735925463569101, + "grad_norm": 1.0605351394237161, + "learning_rate": 4.996478946582092e-06, + "loss": 0.5515, + "step": 1763 + }, + { + "epoch": 0.10742015041256889, + "grad_norm": 1.03645794770236, + "learning_rate": 4.996474711724526e-06, + "loss": 0.5517, + "step": 1764 + }, + { + "epoch": 0.10748104618944676, + "grad_norm": 1.0309384894148184, + "learning_rate": 4.9964704743236045e-06, + "loss": 0.5609, + "step": 1765 + }, + { + "epoch": 0.10754194196632463, + "grad_norm": 1.0659224777021032, + "learning_rate": 4.99646623437933e-06, + "loss": 0.5387, + "step": 1766 + }, + { + "epoch": 0.1076028377432025, + "grad_norm": 1.0171544526092064, + "learning_rate": 4.996461991891709e-06, + "loss": 0.5889, + "step": 1767 + }, + { + "epoch": 0.10766373352008038, + "grad_norm": 1.1463871584070118, + "learning_rate": 4.996457746860743e-06, + "loss": 0.5937, + "step": 1768 + }, + { + "epoch": 0.10772462929695825, + "grad_norm": 1.0423133686720698, + "learning_rate": 4.996453499286438e-06, + "loss": 0.5429, + "step": 1769 + }, + { + "epoch": 0.10778552507383612, + "grad_norm": 0.9955857308836678, + "learning_rate": 4.996449249168799e-06, + "loss": 0.4901, + "step": 1770 + }, + { + "epoch": 0.107846420850714, + "grad_norm": 1.111897604361036, + "learning_rate": 4.996444996507829e-06, + "loss": 0.5178, + "step": 1771 + }, + { + "epoch": 0.10790731662759187, + "grad_norm": 1.0114652270911213, + "learning_rate": 4.996440741303532e-06, + "loss": 0.5439, + "step": 1772 + }, + { + "epoch": 0.10796821240446974, + "grad_norm": 0.9525725976344975, + "learning_rate": 4.996436483555913e-06, + "loss": 0.6092, + "step": 1773 + }, + { + "epoch": 0.10802910818134763, + "grad_norm": 1.0487106849901062, + "learning_rate": 4.996432223264978e-06, + "loss": 0.5274, + "step": 1774 + }, + { + "epoch": 0.1080900039582255, + "grad_norm": 1.0536202206714569, + "learning_rate": 4.996427960430728e-06, + "loss": 0.5844, + "step": 1775 + }, + { + "epoch": 0.10815089973510338, + "grad_norm": 1.0488106397686263, + "learning_rate": 4.996423695053169e-06, + "loss": 0.5633, + "step": 1776 + }, + { + "epoch": 0.10821179551198125, + "grad_norm": 1.0667570628009386, + "learning_rate": 4.996419427132308e-06, + "loss": 0.4549, + "step": 1777 + }, + { + "epoch": 0.10827269128885912, + "grad_norm": 1.0720685308403228, + "learning_rate": 4.996415156668144e-06, + "loss": 0.4962, + "step": 1778 + }, + { + "epoch": 0.108333587065737, + "grad_norm": 0.9842558923310549, + "learning_rate": 4.996410883660685e-06, + "loss": 0.5814, + "step": 1779 + }, + { + "epoch": 0.10839448284261487, + "grad_norm": 0.9803446947903159, + "learning_rate": 4.996406608109935e-06, + "loss": 0.5792, + "step": 1780 + }, + { + "epoch": 0.10845537861949274, + "grad_norm": 1.071972898134292, + "learning_rate": 4.996402330015898e-06, + "loss": 0.5174, + "step": 1781 + }, + { + "epoch": 0.10851627439637061, + "grad_norm": 1.0923192948715454, + "learning_rate": 4.996398049378577e-06, + "loss": 0.5402, + "step": 1782 + }, + { + "epoch": 0.10857717017324849, + "grad_norm": 1.0258265982932828, + "learning_rate": 4.996393766197979e-06, + "loss": 0.5095, + "step": 1783 + }, + { + "epoch": 0.10863806595012636, + "grad_norm": 0.9931125541610801, + "learning_rate": 4.996389480474106e-06, + "loss": 0.5744, + "step": 1784 + }, + { + "epoch": 0.10869896172700423, + "grad_norm": 1.0761452523274695, + "learning_rate": 4.996385192206963e-06, + "loss": 0.5003, + "step": 1785 + }, + { + "epoch": 0.1087598575038821, + "grad_norm": 1.0635300434815718, + "learning_rate": 4.996380901396556e-06, + "loss": 0.576, + "step": 1786 + }, + { + "epoch": 0.10882075328075998, + "grad_norm": 1.0612246719912464, + "learning_rate": 4.996376608042887e-06, + "loss": 0.5592, + "step": 1787 + }, + { + "epoch": 0.10888164905763785, + "grad_norm": 1.0539681806164969, + "learning_rate": 4.996372312145962e-06, + "loss": 0.5877, + "step": 1788 + }, + { + "epoch": 0.10894254483451572, + "grad_norm": 0.9858774002220871, + "learning_rate": 4.996368013705784e-06, + "loss": 0.5698, + "step": 1789 + }, + { + "epoch": 0.1090034406113936, + "grad_norm": 1.1010012972689507, + "learning_rate": 4.996363712722359e-06, + "loss": 0.5111, + "step": 1790 + }, + { + "epoch": 0.10906433638827147, + "grad_norm": 1.0427382834655152, + "learning_rate": 4.99635940919569e-06, + "loss": 0.5418, + "step": 1791 + }, + { + "epoch": 0.10912523216514934, + "grad_norm": 0.9433634053320837, + "learning_rate": 4.9963551031257814e-06, + "loss": 0.5504, + "step": 1792 + }, + { + "epoch": 0.10918612794202721, + "grad_norm": 1.0744198283887572, + "learning_rate": 4.99635079451264e-06, + "loss": 0.5369, + "step": 1793 + }, + { + "epoch": 0.1092470237189051, + "grad_norm": 1.0367218823229851, + "learning_rate": 4.996346483356266e-06, + "loss": 0.571, + "step": 1794 + }, + { + "epoch": 0.10930791949578297, + "grad_norm": 0.9518974197887267, + "learning_rate": 4.996342169656668e-06, + "loss": 0.5414, + "step": 1795 + }, + { + "epoch": 0.10936881527266085, + "grad_norm": 1.1593089956257472, + "learning_rate": 4.9963378534138475e-06, + "loss": 0.5705, + "step": 1796 + }, + { + "epoch": 0.10942971104953872, + "grad_norm": 1.14986509881599, + "learning_rate": 4.99633353462781e-06, + "loss": 0.5518, + "step": 1797 + }, + { + "epoch": 0.10949060682641659, + "grad_norm": 1.1851779836877863, + "learning_rate": 4.9963292132985595e-06, + "loss": 0.4474, + "step": 1798 + }, + { + "epoch": 0.10955150260329446, + "grad_norm": 1.032241175731747, + "learning_rate": 4.9963248894261015e-06, + "loss": 0.5036, + "step": 1799 + }, + { + "epoch": 0.10961239838017234, + "grad_norm": 1.0811070096601771, + "learning_rate": 4.99632056301044e-06, + "loss": 0.5717, + "step": 1800 + }, + { + "epoch": 0.10967329415705021, + "grad_norm": 1.0750661674677393, + "learning_rate": 4.996316234051578e-06, + "loss": 0.4967, + "step": 1801 + }, + { + "epoch": 0.10973418993392808, + "grad_norm": 1.0279018068484733, + "learning_rate": 4.996311902549521e-06, + "loss": 0.5043, + "step": 1802 + }, + { + "epoch": 0.10979508571080596, + "grad_norm": 1.0707950972251334, + "learning_rate": 4.996307568504274e-06, + "loss": 0.5954, + "step": 1803 + }, + { + "epoch": 0.10985598148768383, + "grad_norm": 1.0562882317328932, + "learning_rate": 4.9963032319158394e-06, + "loss": 0.5287, + "step": 1804 + }, + { + "epoch": 0.1099168772645617, + "grad_norm": 0.9704391410957339, + "learning_rate": 4.9962988927842235e-06, + "loss": 0.6099, + "step": 1805 + }, + { + "epoch": 0.10997777304143957, + "grad_norm": 0.9995488648943995, + "learning_rate": 4.996294551109431e-06, + "loss": 0.6417, + "step": 1806 + }, + { + "epoch": 0.11003866881831745, + "grad_norm": 1.1008214043950628, + "learning_rate": 4.996290206891465e-06, + "loss": 0.5941, + "step": 1807 + }, + { + "epoch": 0.11009956459519532, + "grad_norm": 1.0950009130043912, + "learning_rate": 4.99628586013033e-06, + "loss": 0.5616, + "step": 1808 + }, + { + "epoch": 0.1101604603720732, + "grad_norm": 1.0422525528596744, + "learning_rate": 4.996281510826032e-06, + "loss": 0.602, + "step": 1809 + }, + { + "epoch": 0.11022135614895107, + "grad_norm": 1.1165355335711402, + "learning_rate": 4.996277158978573e-06, + "loss": 0.5495, + "step": 1810 + }, + { + "epoch": 0.11028225192582894, + "grad_norm": 1.0030540239203927, + "learning_rate": 4.996272804587959e-06, + "loss": 0.5263, + "step": 1811 + }, + { + "epoch": 0.11034314770270681, + "grad_norm": 0.9603609617960293, + "learning_rate": 4.996268447654195e-06, + "loss": 0.5094, + "step": 1812 + }, + { + "epoch": 0.11040404347958468, + "grad_norm": 1.0493635627560793, + "learning_rate": 4.996264088177284e-06, + "loss": 0.5648, + "step": 1813 + }, + { + "epoch": 0.11046493925646256, + "grad_norm": 1.1127218676257313, + "learning_rate": 4.996259726157231e-06, + "loss": 0.5317, + "step": 1814 + }, + { + "epoch": 0.11052583503334044, + "grad_norm": 1.0741650242211582, + "learning_rate": 4.996255361594041e-06, + "loss": 0.5625, + "step": 1815 + }, + { + "epoch": 0.11058673081021832, + "grad_norm": 0.9884485673073425, + "learning_rate": 4.996250994487717e-06, + "loss": 0.609, + "step": 1816 + }, + { + "epoch": 0.11064762658709619, + "grad_norm": 1.0100426627064114, + "learning_rate": 4.996246624838266e-06, + "loss": 0.5142, + "step": 1817 + }, + { + "epoch": 0.11070852236397406, + "grad_norm": 1.103025018886602, + "learning_rate": 4.996242252645689e-06, + "loss": 0.5105, + "step": 1818 + }, + { + "epoch": 0.11076941814085194, + "grad_norm": 1.0430189776507874, + "learning_rate": 4.996237877909993e-06, + "loss": 0.5996, + "step": 1819 + }, + { + "epoch": 0.11083031391772981, + "grad_norm": 0.9794500593454651, + "learning_rate": 4.996233500631182e-06, + "loss": 0.5231, + "step": 1820 + }, + { + "epoch": 0.11089120969460768, + "grad_norm": 1.0436955841094127, + "learning_rate": 4.996229120809261e-06, + "loss": 0.5272, + "step": 1821 + }, + { + "epoch": 0.11095210547148555, + "grad_norm": 1.069598222713175, + "learning_rate": 4.996224738444232e-06, + "loss": 0.4819, + "step": 1822 + }, + { + "epoch": 0.11101300124836343, + "grad_norm": 1.0282867641200237, + "learning_rate": 4.996220353536102e-06, + "loss": 0.5095, + "step": 1823 + }, + { + "epoch": 0.1110738970252413, + "grad_norm": 1.0659701580403138, + "learning_rate": 4.996215966084874e-06, + "loss": 0.6526, + "step": 1824 + }, + { + "epoch": 0.11113479280211917, + "grad_norm": 1.0711508540695838, + "learning_rate": 4.996211576090554e-06, + "loss": 0.4904, + "step": 1825 + }, + { + "epoch": 0.11119568857899705, + "grad_norm": 1.0445105218286674, + "learning_rate": 4.996207183553145e-06, + "loss": 0.5517, + "step": 1826 + }, + { + "epoch": 0.11125658435587492, + "grad_norm": 1.0237588705014908, + "learning_rate": 4.996202788472651e-06, + "loss": 0.5055, + "step": 1827 + }, + { + "epoch": 0.11131748013275279, + "grad_norm": 1.033499118291495, + "learning_rate": 4.996198390849079e-06, + "loss": 0.5629, + "step": 1828 + }, + { + "epoch": 0.11137837590963066, + "grad_norm": 1.0438215686742498, + "learning_rate": 4.996193990682432e-06, + "loss": 0.5365, + "step": 1829 + }, + { + "epoch": 0.11143927168650854, + "grad_norm": 1.0567640192296262, + "learning_rate": 4.996189587972714e-06, + "loss": 0.5199, + "step": 1830 + }, + { + "epoch": 0.11150016746338641, + "grad_norm": 1.0533377049824402, + "learning_rate": 4.99618518271993e-06, + "loss": 0.5326, + "step": 1831 + }, + { + "epoch": 0.11156106324026428, + "grad_norm": 1.0460182872536743, + "learning_rate": 4.996180774924085e-06, + "loss": 0.5202, + "step": 1832 + }, + { + "epoch": 0.11162195901714216, + "grad_norm": 1.1382306928826302, + "learning_rate": 4.996176364585181e-06, + "loss": 0.528, + "step": 1833 + }, + { + "epoch": 0.11168285479402003, + "grad_norm": 1.0937324913961817, + "learning_rate": 4.996171951703226e-06, + "loss": 0.5189, + "step": 1834 + }, + { + "epoch": 0.11174375057089792, + "grad_norm": 1.013475787092547, + "learning_rate": 4.996167536278223e-06, + "loss": 0.5258, + "step": 1835 + }, + { + "epoch": 0.11180464634777579, + "grad_norm": 1.1436433899686214, + "learning_rate": 4.996163118310176e-06, + "loss": 0.5454, + "step": 1836 + }, + { + "epoch": 0.11186554212465366, + "grad_norm": 1.0423266086850071, + "learning_rate": 4.99615869779909e-06, + "loss": 0.5335, + "step": 1837 + }, + { + "epoch": 0.11192643790153153, + "grad_norm": 1.0983775645375236, + "learning_rate": 4.99615427474497e-06, + "loss": 0.5567, + "step": 1838 + }, + { + "epoch": 0.1119873336784094, + "grad_norm": 1.0458947064609418, + "learning_rate": 4.9961498491478185e-06, + "loss": 0.5438, + "step": 1839 + }, + { + "epoch": 0.11204822945528728, + "grad_norm": 1.011425451928268, + "learning_rate": 4.996145421007642e-06, + "loss": 0.615, + "step": 1840 + }, + { + "epoch": 0.11210912523216515, + "grad_norm": 1.0851727398397542, + "learning_rate": 4.996140990324445e-06, + "loss": 0.5486, + "step": 1841 + }, + { + "epoch": 0.11217002100904302, + "grad_norm": 0.9829779534254999, + "learning_rate": 4.996136557098231e-06, + "loss": 0.5711, + "step": 1842 + }, + { + "epoch": 0.1122309167859209, + "grad_norm": 0.979284765441551, + "learning_rate": 4.996132121329006e-06, + "loss": 0.5594, + "step": 1843 + }, + { + "epoch": 0.11229181256279877, + "grad_norm": 1.048430715587884, + "learning_rate": 4.996127683016772e-06, + "loss": 0.5691, + "step": 1844 + }, + { + "epoch": 0.11235270833967664, + "grad_norm": 0.9800123444796744, + "learning_rate": 4.996123242161536e-06, + "loss": 0.5603, + "step": 1845 + }, + { + "epoch": 0.11241360411655452, + "grad_norm": 0.995489108983238, + "learning_rate": 4.9961187987633005e-06, + "loss": 0.529, + "step": 1846 + }, + { + "epoch": 0.11247449989343239, + "grad_norm": 1.034250329582233, + "learning_rate": 4.996114352822072e-06, + "loss": 0.5156, + "step": 1847 + }, + { + "epoch": 0.11253539567031026, + "grad_norm": 0.9820420548326423, + "learning_rate": 4.996109904337853e-06, + "loss": 0.5219, + "step": 1848 + }, + { + "epoch": 0.11259629144718813, + "grad_norm": 1.0446390408759836, + "learning_rate": 4.996105453310651e-06, + "loss": 0.5717, + "step": 1849 + }, + { + "epoch": 0.11265718722406601, + "grad_norm": 1.151153721548018, + "learning_rate": 4.996100999740467e-06, + "loss": 0.5118, + "step": 1850 + }, + { + "epoch": 0.11271808300094388, + "grad_norm": 1.1244929215161281, + "learning_rate": 4.996096543627308e-06, + "loss": 0.5324, + "step": 1851 + }, + { + "epoch": 0.11277897877782175, + "grad_norm": 1.1255276444253044, + "learning_rate": 4.9960920849711775e-06, + "loss": 0.5055, + "step": 1852 + }, + { + "epoch": 0.11283987455469963, + "grad_norm": 1.1014014166193398, + "learning_rate": 4.99608762377208e-06, + "loss": 0.5571, + "step": 1853 + }, + { + "epoch": 0.1129007703315775, + "grad_norm": 1.105023153058197, + "learning_rate": 4.996083160030021e-06, + "loss": 0.5439, + "step": 1854 + }, + { + "epoch": 0.11296166610845537, + "grad_norm": 1.100500484249664, + "learning_rate": 4.996078693745004e-06, + "loss": 0.4756, + "step": 1855 + }, + { + "epoch": 0.11302256188533326, + "grad_norm": 0.9825598577157797, + "learning_rate": 4.9960742249170334e-06, + "loss": 0.5446, + "step": 1856 + }, + { + "epoch": 0.11308345766221113, + "grad_norm": 1.0565833211356086, + "learning_rate": 4.996069753546115e-06, + "loss": 0.4918, + "step": 1857 + }, + { + "epoch": 0.113144353439089, + "grad_norm": 1.1175763639064378, + "learning_rate": 4.996065279632253e-06, + "loss": 0.5124, + "step": 1858 + }, + { + "epoch": 0.11320524921596688, + "grad_norm": 0.92513368537073, + "learning_rate": 4.996060803175451e-06, + "loss": 0.5812, + "step": 1859 + }, + { + "epoch": 0.11326614499284475, + "grad_norm": 1.0814544031358755, + "learning_rate": 4.9960563241757135e-06, + "loss": 0.5251, + "step": 1860 + }, + { + "epoch": 0.11332704076972262, + "grad_norm": 1.108513606541972, + "learning_rate": 4.996051842633047e-06, + "loss": 0.5947, + "step": 1861 + }, + { + "epoch": 0.1133879365466005, + "grad_norm": 0.9906223842611532, + "learning_rate": 4.996047358547454e-06, + "loss": 0.5216, + "step": 1862 + }, + { + "epoch": 0.11344883232347837, + "grad_norm": 1.077180566489411, + "learning_rate": 4.9960428719189396e-06, + "loss": 0.5289, + "step": 1863 + }, + { + "epoch": 0.11350972810035624, + "grad_norm": 1.078030958106031, + "learning_rate": 4.996038382747509e-06, + "loss": 0.5529, + "step": 1864 + }, + { + "epoch": 0.11357062387723411, + "grad_norm": 1.0809782141970734, + "learning_rate": 4.996033891033166e-06, + "loss": 0.6242, + "step": 1865 + }, + { + "epoch": 0.11363151965411199, + "grad_norm": 1.0113270598272546, + "learning_rate": 4.9960293967759165e-06, + "loss": 0.5558, + "step": 1866 + }, + { + "epoch": 0.11369241543098986, + "grad_norm": 0.9704247532473209, + "learning_rate": 4.996024899975763e-06, + "loss": 0.5823, + "step": 1867 + }, + { + "epoch": 0.11375331120786773, + "grad_norm": 1.0229987878276663, + "learning_rate": 4.996020400632713e-06, + "loss": 0.5697, + "step": 1868 + }, + { + "epoch": 0.1138142069847456, + "grad_norm": 1.0395421578917021, + "learning_rate": 4.996015898746768e-06, + "loss": 0.5498, + "step": 1869 + }, + { + "epoch": 0.11387510276162348, + "grad_norm": 0.91815413224145, + "learning_rate": 4.9960113943179335e-06, + "loss": 0.5702, + "step": 1870 + }, + { + "epoch": 0.11393599853850135, + "grad_norm": 0.9685249543190139, + "learning_rate": 4.996006887346216e-06, + "loss": 0.605, + "step": 1871 + }, + { + "epoch": 0.11399689431537922, + "grad_norm": 1.022024593145439, + "learning_rate": 4.996002377831617e-06, + "loss": 0.5474, + "step": 1872 + }, + { + "epoch": 0.1140577900922571, + "grad_norm": 1.071138782779551, + "learning_rate": 4.9959978657741435e-06, + "loss": 0.4922, + "step": 1873 + }, + { + "epoch": 0.11411868586913497, + "grad_norm": 1.0892621396608342, + "learning_rate": 4.995993351173799e-06, + "loss": 0.4889, + "step": 1874 + }, + { + "epoch": 0.11417958164601284, + "grad_norm": 1.0253571013033382, + "learning_rate": 4.995988834030588e-06, + "loss": 0.5353, + "step": 1875 + }, + { + "epoch": 0.11424047742289073, + "grad_norm": 1.0912510631032613, + "learning_rate": 4.995984314344516e-06, + "loss": 0.5361, + "step": 1876 + }, + { + "epoch": 0.1143013731997686, + "grad_norm": 1.034904429577967, + "learning_rate": 4.995979792115587e-06, + "loss": 0.5383, + "step": 1877 + }, + { + "epoch": 0.11436226897664648, + "grad_norm": 1.0060274869531647, + "learning_rate": 4.995975267343806e-06, + "loss": 0.5967, + "step": 1878 + }, + { + "epoch": 0.11442316475352435, + "grad_norm": 1.0678052362251962, + "learning_rate": 4.995970740029176e-06, + "loss": 0.5029, + "step": 1879 + }, + { + "epoch": 0.11448406053040222, + "grad_norm": 1.0635933164730074, + "learning_rate": 4.995966210171705e-06, + "loss": 0.555, + "step": 1880 + }, + { + "epoch": 0.1145449563072801, + "grad_norm": 1.0025655478932722, + "learning_rate": 4.995961677771394e-06, + "loss": 0.5722, + "step": 1881 + }, + { + "epoch": 0.11460585208415797, + "grad_norm": 1.0883171488602406, + "learning_rate": 4.995957142828249e-06, + "loss": 0.5467, + "step": 1882 + }, + { + "epoch": 0.11466674786103584, + "grad_norm": 1.0109722201650468, + "learning_rate": 4.995952605342275e-06, + "loss": 0.5934, + "step": 1883 + }, + { + "epoch": 0.11472764363791371, + "grad_norm": 1.0586923980343774, + "learning_rate": 4.995948065313477e-06, + "loss": 0.4828, + "step": 1884 + }, + { + "epoch": 0.11478853941479158, + "grad_norm": 1.0351467829340377, + "learning_rate": 4.9959435227418586e-06, + "loss": 0.5107, + "step": 1885 + }, + { + "epoch": 0.11484943519166946, + "grad_norm": 1.103032871190561, + "learning_rate": 4.9959389776274246e-06, + "loss": 0.4934, + "step": 1886 + }, + { + "epoch": 0.11491033096854733, + "grad_norm": 0.9758242431943243, + "learning_rate": 4.99593442997018e-06, + "loss": 0.6228, + "step": 1887 + }, + { + "epoch": 0.1149712267454252, + "grad_norm": 0.9715081763813744, + "learning_rate": 4.9959298797701295e-06, + "loss": 0.5243, + "step": 1888 + }, + { + "epoch": 0.11503212252230308, + "grad_norm": 1.0343850983481966, + "learning_rate": 4.995925327027277e-06, + "loss": 0.4794, + "step": 1889 + }, + { + "epoch": 0.11509301829918095, + "grad_norm": 1.0388166155437695, + "learning_rate": 4.995920771741629e-06, + "loss": 0.5454, + "step": 1890 + }, + { + "epoch": 0.11515391407605882, + "grad_norm": 1.1601879343365291, + "learning_rate": 4.995916213913188e-06, + "loss": 0.4877, + "step": 1891 + }, + { + "epoch": 0.1152148098529367, + "grad_norm": 1.0272331799408367, + "learning_rate": 4.9959116535419585e-06, + "loss": 0.5661, + "step": 1892 + }, + { + "epoch": 0.11527570562981457, + "grad_norm": 1.0046979581090119, + "learning_rate": 4.995907090627947e-06, + "loss": 0.6506, + "step": 1893 + }, + { + "epoch": 0.11533660140669244, + "grad_norm": 1.0102467417297205, + "learning_rate": 4.995902525171157e-06, + "loss": 0.6099, + "step": 1894 + }, + { + "epoch": 0.11539749718357031, + "grad_norm": 1.0185014432408512, + "learning_rate": 4.995897957171594e-06, + "loss": 0.5397, + "step": 1895 + }, + { + "epoch": 0.11545839296044819, + "grad_norm": 0.9993698043807705, + "learning_rate": 4.995893386629261e-06, + "loss": 0.5308, + "step": 1896 + }, + { + "epoch": 0.11551928873732607, + "grad_norm": 0.9834660635561201, + "learning_rate": 4.995888813544165e-06, + "loss": 0.5618, + "step": 1897 + }, + { + "epoch": 0.11558018451420395, + "grad_norm": 1.066182513830402, + "learning_rate": 4.995884237916309e-06, + "loss": 0.5085, + "step": 1898 + }, + { + "epoch": 0.11564108029108182, + "grad_norm": 1.1244524020940883, + "learning_rate": 4.995879659745697e-06, + "loss": 0.5277, + "step": 1899 + }, + { + "epoch": 0.11570197606795969, + "grad_norm": 0.9513665529333153, + "learning_rate": 4.995875079032336e-06, + "loss": 0.5413, + "step": 1900 + }, + { + "epoch": 0.11576287184483756, + "grad_norm": 1.0293090458423284, + "learning_rate": 4.995870495776229e-06, + "loss": 0.5249, + "step": 1901 + }, + { + "epoch": 0.11582376762171544, + "grad_norm": 1.015510388696106, + "learning_rate": 4.995865909977381e-06, + "loss": 0.4946, + "step": 1902 + }, + { + "epoch": 0.11588466339859331, + "grad_norm": 1.0541311679620646, + "learning_rate": 4.995861321635796e-06, + "loss": 0.4987, + "step": 1903 + }, + { + "epoch": 0.11594555917547118, + "grad_norm": 1.001902511335164, + "learning_rate": 4.99585673075148e-06, + "loss": 0.5651, + "step": 1904 + }, + { + "epoch": 0.11600645495234906, + "grad_norm": 1.056619314679146, + "learning_rate": 4.9958521373244376e-06, + "loss": 0.5521, + "step": 1905 + }, + { + "epoch": 0.11606735072922693, + "grad_norm": 0.9447301031127794, + "learning_rate": 4.995847541354671e-06, + "loss": 0.6111, + "step": 1906 + }, + { + "epoch": 0.1161282465061048, + "grad_norm": 1.0792769654808843, + "learning_rate": 4.9958429428421886e-06, + "loss": 0.5, + "step": 1907 + }, + { + "epoch": 0.11618914228298267, + "grad_norm": 1.0460459144981529, + "learning_rate": 4.9958383417869924e-06, + "loss": 0.4717, + "step": 1908 + }, + { + "epoch": 0.11625003805986055, + "grad_norm": 1.116409272232721, + "learning_rate": 4.995833738189089e-06, + "loss": 0.5722, + "step": 1909 + }, + { + "epoch": 0.11631093383673842, + "grad_norm": 1.0345837615648423, + "learning_rate": 4.995829132048482e-06, + "loss": 0.517, + "step": 1910 + }, + { + "epoch": 0.11637182961361629, + "grad_norm": 1.077401512563418, + "learning_rate": 4.995824523365175e-06, + "loss": 0.5173, + "step": 1911 + }, + { + "epoch": 0.11643272539049417, + "grad_norm": 1.0174699407961476, + "learning_rate": 4.995819912139175e-06, + "loss": 0.5642, + "step": 1912 + }, + { + "epoch": 0.11649362116737204, + "grad_norm": 0.9714143613388142, + "learning_rate": 4.9958152983704845e-06, + "loss": 0.5782, + "step": 1913 + }, + { + "epoch": 0.11655451694424991, + "grad_norm": 1.0368678453733646, + "learning_rate": 4.99581068205911e-06, + "loss": 0.5151, + "step": 1914 + }, + { + "epoch": 0.11661541272112778, + "grad_norm": 1.0908516973280826, + "learning_rate": 4.995806063205055e-06, + "loss": 0.5354, + "step": 1915 + }, + { + "epoch": 0.11667630849800566, + "grad_norm": 1.080809830578159, + "learning_rate": 4.995801441808325e-06, + "loss": 0.5232, + "step": 1916 + }, + { + "epoch": 0.11673720427488354, + "grad_norm": 0.9954027396850231, + "learning_rate": 4.995796817868925e-06, + "loss": 0.5168, + "step": 1917 + }, + { + "epoch": 0.11679810005176142, + "grad_norm": 0.9708959304960411, + "learning_rate": 4.9957921913868576e-06, + "loss": 0.5705, + "step": 1918 + }, + { + "epoch": 0.11685899582863929, + "grad_norm": 0.9810341328532971, + "learning_rate": 4.99578756236213e-06, + "loss": 0.5523, + "step": 1919 + }, + { + "epoch": 0.11691989160551716, + "grad_norm": 1.0423294511381207, + "learning_rate": 4.995782930794747e-06, + "loss": 0.5794, + "step": 1920 + }, + { + "epoch": 0.11698078738239504, + "grad_norm": 1.0205850558962655, + "learning_rate": 4.99577829668471e-06, + "loss": 0.5975, + "step": 1921 + }, + { + "epoch": 0.11704168315927291, + "grad_norm": 1.0738975431452507, + "learning_rate": 4.995773660032027e-06, + "loss": 0.4934, + "step": 1922 + }, + { + "epoch": 0.11710257893615078, + "grad_norm": 1.073441190077022, + "learning_rate": 4.995769020836701e-06, + "loss": 0.5226, + "step": 1923 + }, + { + "epoch": 0.11716347471302865, + "grad_norm": 1.0236512645922649, + "learning_rate": 4.995764379098739e-06, + "loss": 0.5541, + "step": 1924 + }, + { + "epoch": 0.11722437048990653, + "grad_norm": 1.1024769095441236, + "learning_rate": 4.995759734818143e-06, + "loss": 0.5549, + "step": 1925 + }, + { + "epoch": 0.1172852662667844, + "grad_norm": 1.1924501433461847, + "learning_rate": 4.995755087994919e-06, + "loss": 0.5411, + "step": 1926 + }, + { + "epoch": 0.11734616204366227, + "grad_norm": 1.0830983759609487, + "learning_rate": 4.995750438629072e-06, + "loss": 0.4725, + "step": 1927 + }, + { + "epoch": 0.11740705782054014, + "grad_norm": 1.092741023637041, + "learning_rate": 4.995745786720606e-06, + "loss": 0.5369, + "step": 1928 + }, + { + "epoch": 0.11746795359741802, + "grad_norm": 0.9996182543090801, + "learning_rate": 4.995741132269526e-06, + "loss": 0.5163, + "step": 1929 + }, + { + "epoch": 0.11752884937429589, + "grad_norm": 1.0647870013387795, + "learning_rate": 4.995736475275837e-06, + "loss": 0.5246, + "step": 1930 + }, + { + "epoch": 0.11758974515117376, + "grad_norm": 1.0507602597221612, + "learning_rate": 4.995731815739544e-06, + "loss": 0.4832, + "step": 1931 + }, + { + "epoch": 0.11765064092805164, + "grad_norm": 1.0596958094601, + "learning_rate": 4.99572715366065e-06, + "loss": 0.5724, + "step": 1932 + }, + { + "epoch": 0.11771153670492951, + "grad_norm": 0.9916734059221358, + "learning_rate": 4.995722489039162e-06, + "loss": 0.5158, + "step": 1933 + }, + { + "epoch": 0.11777243248180738, + "grad_norm": 0.9949742962370867, + "learning_rate": 4.995717821875084e-06, + "loss": 0.54, + "step": 1934 + }, + { + "epoch": 0.11783332825868525, + "grad_norm": 1.0276207250623461, + "learning_rate": 4.9957131521684195e-06, + "loss": 0.5583, + "step": 1935 + }, + { + "epoch": 0.11789422403556313, + "grad_norm": 0.9871967412511898, + "learning_rate": 4.995708479919176e-06, + "loss": 0.5437, + "step": 1936 + }, + { + "epoch": 0.117955119812441, + "grad_norm": 1.1097554249276247, + "learning_rate": 4.995703805127355e-06, + "loss": 0.516, + "step": 1937 + }, + { + "epoch": 0.11801601558931889, + "grad_norm": 1.092615866679437, + "learning_rate": 4.995699127792964e-06, + "loss": 0.5667, + "step": 1938 + }, + { + "epoch": 0.11807691136619676, + "grad_norm": 1.0977543003260117, + "learning_rate": 4.995694447916006e-06, + "loss": 0.4924, + "step": 1939 + }, + { + "epoch": 0.11813780714307463, + "grad_norm": 1.1599234394138889, + "learning_rate": 4.995689765496486e-06, + "loss": 0.5618, + "step": 1940 + }, + { + "epoch": 0.1181987029199525, + "grad_norm": 0.9838490056422822, + "learning_rate": 4.99568508053441e-06, + "loss": 0.5617, + "step": 1941 + }, + { + "epoch": 0.11825959869683038, + "grad_norm": 1.004090286180282, + "learning_rate": 4.995680393029782e-06, + "loss": 0.5245, + "step": 1942 + }, + { + "epoch": 0.11832049447370825, + "grad_norm": 1.0649536154756185, + "learning_rate": 4.995675702982606e-06, + "loss": 0.5138, + "step": 1943 + }, + { + "epoch": 0.11838139025058612, + "grad_norm": 1.0421768004454548, + "learning_rate": 4.995671010392888e-06, + "loss": 0.4622, + "step": 1944 + }, + { + "epoch": 0.118442286027464, + "grad_norm": 1.0455099486163477, + "learning_rate": 4.995666315260632e-06, + "loss": 0.598, + "step": 1945 + }, + { + "epoch": 0.11850318180434187, + "grad_norm": 1.03868894314013, + "learning_rate": 4.995661617585843e-06, + "loss": 0.5415, + "step": 1946 + }, + { + "epoch": 0.11856407758121974, + "grad_norm": 1.0491117944715598, + "learning_rate": 4.995656917368526e-06, + "loss": 0.5208, + "step": 1947 + }, + { + "epoch": 0.11862497335809762, + "grad_norm": 1.0975261775674285, + "learning_rate": 4.9956522146086855e-06, + "loss": 0.5576, + "step": 1948 + }, + { + "epoch": 0.11868586913497549, + "grad_norm": 0.9419234576248314, + "learning_rate": 4.9956475093063264e-06, + "loss": 0.5589, + "step": 1949 + }, + { + "epoch": 0.11874676491185336, + "grad_norm": 1.0515278361033769, + "learning_rate": 4.995642801461453e-06, + "loss": 0.5029, + "step": 1950 + }, + { + "epoch": 0.11880766068873123, + "grad_norm": 0.9997058079507244, + "learning_rate": 4.995638091074072e-06, + "loss": 0.5771, + "step": 1951 + }, + { + "epoch": 0.11886855646560911, + "grad_norm": 1.0824517137021405, + "learning_rate": 4.995633378144186e-06, + "loss": 0.5646, + "step": 1952 + }, + { + "epoch": 0.11892945224248698, + "grad_norm": 1.1240290021136325, + "learning_rate": 4.9956286626718005e-06, + "loss": 0.5159, + "step": 1953 + }, + { + "epoch": 0.11899034801936485, + "grad_norm": 1.0889001027432559, + "learning_rate": 4.99562394465692e-06, + "loss": 0.5338, + "step": 1954 + }, + { + "epoch": 0.11905124379624273, + "grad_norm": 1.2129590556672774, + "learning_rate": 4.9956192240995504e-06, + "loss": 0.5016, + "step": 1955 + }, + { + "epoch": 0.1191121395731206, + "grad_norm": 1.1123059630631424, + "learning_rate": 4.995614500999696e-06, + "loss": 0.5328, + "step": 1956 + }, + { + "epoch": 0.11917303534999847, + "grad_norm": 1.0842902680449693, + "learning_rate": 4.99560977535736e-06, + "loss": 0.4666, + "step": 1957 + }, + { + "epoch": 0.11923393112687636, + "grad_norm": 1.0799714491028776, + "learning_rate": 4.99560504717255e-06, + "loss": 0.5462, + "step": 1958 + }, + { + "epoch": 0.11929482690375423, + "grad_norm": 1.0165541062846075, + "learning_rate": 4.995600316445269e-06, + "loss": 0.5864, + "step": 1959 + }, + { + "epoch": 0.1193557226806321, + "grad_norm": 1.1461409380169354, + "learning_rate": 4.995595583175523e-06, + "loss": 0.5027, + "step": 1960 + }, + { + "epoch": 0.11941661845750998, + "grad_norm": 0.9702731968171178, + "learning_rate": 4.995590847363315e-06, + "loss": 0.4762, + "step": 1961 + }, + { + "epoch": 0.11947751423438785, + "grad_norm": 1.1226021949970193, + "learning_rate": 4.995586109008652e-06, + "loss": 0.5375, + "step": 1962 + }, + { + "epoch": 0.11953841001126572, + "grad_norm": 1.048545789552324, + "learning_rate": 4.9955813681115376e-06, + "loss": 0.5348, + "step": 1963 + }, + { + "epoch": 0.1195993057881436, + "grad_norm": 1.066349990443488, + "learning_rate": 4.995576624671976e-06, + "loss": 0.5688, + "step": 1964 + }, + { + "epoch": 0.11966020156502147, + "grad_norm": 0.9992058205842604, + "learning_rate": 4.9955718786899735e-06, + "loss": 0.5601, + "step": 1965 + }, + { + "epoch": 0.11972109734189934, + "grad_norm": 1.1031046961780469, + "learning_rate": 4.995567130165533e-06, + "loss": 0.5229, + "step": 1966 + }, + { + "epoch": 0.11978199311877721, + "grad_norm": 1.0129517188527721, + "learning_rate": 4.995562379098662e-06, + "loss": 0.4997, + "step": 1967 + }, + { + "epoch": 0.11984288889565509, + "grad_norm": 1.0076094945328282, + "learning_rate": 4.995557625489363e-06, + "loss": 0.6612, + "step": 1968 + }, + { + "epoch": 0.11990378467253296, + "grad_norm": 0.993390962457168, + "learning_rate": 4.9955528693376435e-06, + "loss": 0.5025, + "step": 1969 + }, + { + "epoch": 0.11996468044941083, + "grad_norm": 1.1864369524320824, + "learning_rate": 4.995548110643505e-06, + "loss": 0.6458, + "step": 1970 + }, + { + "epoch": 0.1200255762262887, + "grad_norm": 1.0301843235088823, + "learning_rate": 4.995543349406954e-06, + "loss": 0.531, + "step": 1971 + }, + { + "epoch": 0.12008647200316658, + "grad_norm": 0.9978725433516997, + "learning_rate": 4.995538585627996e-06, + "loss": 0.5254, + "step": 1972 + }, + { + "epoch": 0.12014736778004445, + "grad_norm": 1.037433180301015, + "learning_rate": 4.995533819306635e-06, + "loss": 0.5074, + "step": 1973 + }, + { + "epoch": 0.12020826355692232, + "grad_norm": 0.9921867128082734, + "learning_rate": 4.995529050442875e-06, + "loss": 0.5017, + "step": 1974 + }, + { + "epoch": 0.1202691593338002, + "grad_norm": 1.0647106841543776, + "learning_rate": 4.9955242790367235e-06, + "loss": 0.5394, + "step": 1975 + }, + { + "epoch": 0.12033005511067807, + "grad_norm": 0.9734910881219094, + "learning_rate": 4.995519505088183e-06, + "loss": 0.488, + "step": 1976 + }, + { + "epoch": 0.12039095088755594, + "grad_norm": 0.9791276769941627, + "learning_rate": 4.995514728597259e-06, + "loss": 0.4785, + "step": 1977 + }, + { + "epoch": 0.12045184666443381, + "grad_norm": 1.08161725969866, + "learning_rate": 4.9955099495639565e-06, + "loss": 0.5376, + "step": 1978 + }, + { + "epoch": 0.1205127424413117, + "grad_norm": 0.9630073454478473, + "learning_rate": 4.99550516798828e-06, + "loss": 0.5811, + "step": 1979 + }, + { + "epoch": 0.12057363821818957, + "grad_norm": 1.1103048750103652, + "learning_rate": 4.995500383870236e-06, + "loss": 0.567, + "step": 1980 + }, + { + "epoch": 0.12063453399506745, + "grad_norm": 0.9614542296199277, + "learning_rate": 4.995495597209827e-06, + "loss": 0.6339, + "step": 1981 + }, + { + "epoch": 0.12069542977194532, + "grad_norm": 1.0223082964390624, + "learning_rate": 4.995490808007059e-06, + "loss": 0.5642, + "step": 1982 + }, + { + "epoch": 0.12075632554882319, + "grad_norm": 1.0404579542913066, + "learning_rate": 4.995486016261936e-06, + "loss": 0.5046, + "step": 1983 + }, + { + "epoch": 0.12081722132570107, + "grad_norm": 1.1055947967916173, + "learning_rate": 4.995481221974466e-06, + "loss": 0.524, + "step": 1984 + }, + { + "epoch": 0.12087811710257894, + "grad_norm": 0.9847340718257267, + "learning_rate": 4.9954764251446506e-06, + "loss": 0.5371, + "step": 1985 + }, + { + "epoch": 0.12093901287945681, + "grad_norm": 0.9674077698086528, + "learning_rate": 4.995471625772495e-06, + "loss": 0.5545, + "step": 1986 + }, + { + "epoch": 0.12099990865633468, + "grad_norm": 1.0692407805781163, + "learning_rate": 4.995466823858005e-06, + "loss": 0.5243, + "step": 1987 + }, + { + "epoch": 0.12106080443321256, + "grad_norm": 1.0443238178141467, + "learning_rate": 4.995462019401186e-06, + "loss": 0.5571, + "step": 1988 + }, + { + "epoch": 0.12112170021009043, + "grad_norm": 1.0764019740226995, + "learning_rate": 4.995457212402042e-06, + "loss": 0.5323, + "step": 1989 + }, + { + "epoch": 0.1211825959869683, + "grad_norm": 1.0848190581571255, + "learning_rate": 4.995452402860578e-06, + "loss": 0.5627, + "step": 1990 + }, + { + "epoch": 0.12124349176384618, + "grad_norm": 1.1568917675070665, + "learning_rate": 4.995447590776798e-06, + "loss": 0.5318, + "step": 1991 + }, + { + "epoch": 0.12130438754072405, + "grad_norm": 1.0971523176549374, + "learning_rate": 4.995442776150709e-06, + "loss": 0.4935, + "step": 1992 + }, + { + "epoch": 0.12136528331760192, + "grad_norm": 1.028894348738708, + "learning_rate": 4.995437958982315e-06, + "loss": 0.5171, + "step": 1993 + }, + { + "epoch": 0.1214261790944798, + "grad_norm": 1.047026429290539, + "learning_rate": 4.9954331392716194e-06, + "loss": 0.5456, + "step": 1994 + }, + { + "epoch": 0.12148707487135767, + "grad_norm": 1.0075725293607591, + "learning_rate": 4.995428317018629e-06, + "loss": 0.5547, + "step": 1995 + }, + { + "epoch": 0.12154797064823554, + "grad_norm": 1.1067168970140713, + "learning_rate": 4.995423492223349e-06, + "loss": 0.4832, + "step": 1996 + }, + { + "epoch": 0.12160886642511341, + "grad_norm": 1.1307697847268017, + "learning_rate": 4.995418664885783e-06, + "loss": 0.5011, + "step": 1997 + }, + { + "epoch": 0.12166976220199129, + "grad_norm": 1.0929435385037687, + "learning_rate": 4.995413835005936e-06, + "loss": 0.5218, + "step": 1998 + }, + { + "epoch": 0.12173065797886917, + "grad_norm": 1.0210987285380264, + "learning_rate": 4.995409002583813e-06, + "loss": 0.5147, + "step": 1999 + }, + { + "epoch": 0.12179155375574705, + "grad_norm": 1.200457822240292, + "learning_rate": 4.995404167619419e-06, + "loss": 0.5473, + "step": 2000 + }, + { + "epoch": 0.12185244953262492, + "grad_norm": 1.0493162020909226, + "learning_rate": 4.99539933011276e-06, + "loss": 0.5021, + "step": 2001 + }, + { + "epoch": 0.12191334530950279, + "grad_norm": 1.1335389505359488, + "learning_rate": 4.99539449006384e-06, + "loss": 0.5054, + "step": 2002 + }, + { + "epoch": 0.12197424108638066, + "grad_norm": 1.0727906131273073, + "learning_rate": 4.995389647472663e-06, + "loss": 0.5651, + "step": 2003 + }, + { + "epoch": 0.12203513686325854, + "grad_norm": 1.0071366632759449, + "learning_rate": 4.995384802339236e-06, + "loss": 0.6416, + "step": 2004 + }, + { + "epoch": 0.12209603264013641, + "grad_norm": 1.0026881466148814, + "learning_rate": 4.995379954663562e-06, + "loss": 0.5293, + "step": 2005 + }, + { + "epoch": 0.12215692841701428, + "grad_norm": 1.0197090897537968, + "learning_rate": 4.995375104445647e-06, + "loss": 0.5113, + "step": 2006 + }, + { + "epoch": 0.12221782419389216, + "grad_norm": 1.0508338533222017, + "learning_rate": 4.995370251685496e-06, + "loss": 0.5208, + "step": 2007 + }, + { + "epoch": 0.12227871997077003, + "grad_norm": 0.9966202208481657, + "learning_rate": 4.995365396383114e-06, + "loss": 0.5885, + "step": 2008 + }, + { + "epoch": 0.1223396157476479, + "grad_norm": 1.0842119043001366, + "learning_rate": 4.995360538538505e-06, + "loss": 0.6105, + "step": 2009 + }, + { + "epoch": 0.12240051152452577, + "grad_norm": 1.0456835070364647, + "learning_rate": 4.995355678151674e-06, + "loss": 0.5413, + "step": 2010 + }, + { + "epoch": 0.12246140730140365, + "grad_norm": 1.16369991618452, + "learning_rate": 4.995350815222628e-06, + "loss": 0.5474, + "step": 2011 + }, + { + "epoch": 0.12252230307828152, + "grad_norm": 1.0814797963073004, + "learning_rate": 4.99534594975137e-06, + "loss": 0.4955, + "step": 2012 + }, + { + "epoch": 0.12258319885515939, + "grad_norm": 1.0566379637742223, + "learning_rate": 4.995341081737904e-06, + "loss": 0.526, + "step": 2013 + }, + { + "epoch": 0.12264409463203726, + "grad_norm": 1.015155218742299, + "learning_rate": 4.995336211182238e-06, + "loss": 0.4967, + "step": 2014 + }, + { + "epoch": 0.12270499040891514, + "grad_norm": 0.9286729817910063, + "learning_rate": 4.995331338084375e-06, + "loss": 0.5176, + "step": 2015 + }, + { + "epoch": 0.12276588618579301, + "grad_norm": 1.0252174156648046, + "learning_rate": 4.9953264624443195e-06, + "loss": 0.5064, + "step": 2016 + }, + { + "epoch": 0.12282678196267088, + "grad_norm": 1.0004179337787553, + "learning_rate": 4.9953215842620786e-06, + "loss": 0.5332, + "step": 2017 + }, + { + "epoch": 0.12288767773954876, + "grad_norm": 0.9695097454635692, + "learning_rate": 4.995316703537655e-06, + "loss": 0.5381, + "step": 2018 + }, + { + "epoch": 0.12294857351642663, + "grad_norm": 1.1178332761877396, + "learning_rate": 4.995311820271055e-06, + "loss": 0.4583, + "step": 2019 + }, + { + "epoch": 0.12300946929330452, + "grad_norm": 1.0861533791548732, + "learning_rate": 4.995306934462284e-06, + "loss": 0.4979, + "step": 2020 + }, + { + "epoch": 0.12307036507018239, + "grad_norm": 1.04720205733096, + "learning_rate": 4.9953020461113445e-06, + "loss": 0.5479, + "step": 2021 + }, + { + "epoch": 0.12313126084706026, + "grad_norm": 1.016000854997744, + "learning_rate": 4.995297155218244e-06, + "loss": 0.4887, + "step": 2022 + }, + { + "epoch": 0.12319215662393813, + "grad_norm": 1.0806149115702994, + "learning_rate": 4.995292261782987e-06, + "loss": 0.6052, + "step": 2023 + }, + { + "epoch": 0.12325305240081601, + "grad_norm": 1.161845413034692, + "learning_rate": 4.9952873658055775e-06, + "loss": 0.5357, + "step": 2024 + }, + { + "epoch": 0.12331394817769388, + "grad_norm": 1.0317790208659148, + "learning_rate": 4.995282467286021e-06, + "loss": 0.4855, + "step": 2025 + }, + { + "epoch": 0.12337484395457175, + "grad_norm": 1.0281896919336817, + "learning_rate": 4.9952775662243235e-06, + "loss": 0.5204, + "step": 2026 + }, + { + "epoch": 0.12343573973144963, + "grad_norm": 1.0278154970388031, + "learning_rate": 4.9952726626204885e-06, + "loss": 0.5278, + "step": 2027 + }, + { + "epoch": 0.1234966355083275, + "grad_norm": 1.0577458452374164, + "learning_rate": 4.995267756474521e-06, + "loss": 0.4863, + "step": 2028 + }, + { + "epoch": 0.12355753128520537, + "grad_norm": 1.0329017244707641, + "learning_rate": 4.995262847786428e-06, + "loss": 0.5365, + "step": 2029 + }, + { + "epoch": 0.12361842706208324, + "grad_norm": 1.0167571601531422, + "learning_rate": 4.995257936556211e-06, + "loss": 0.5321, + "step": 2030 + }, + { + "epoch": 0.12367932283896112, + "grad_norm": 1.0767443487332633, + "learning_rate": 4.995253022783879e-06, + "loss": 0.5453, + "step": 2031 + }, + { + "epoch": 0.12374021861583899, + "grad_norm": 1.0678598592771935, + "learning_rate": 4.995248106469435e-06, + "loss": 0.5261, + "step": 2032 + }, + { + "epoch": 0.12380111439271686, + "grad_norm": 1.0046908399416754, + "learning_rate": 4.9952431876128835e-06, + "loss": 0.5387, + "step": 2033 + }, + { + "epoch": 0.12386201016959474, + "grad_norm": 1.1590744909764925, + "learning_rate": 4.99523826621423e-06, + "loss": 0.5438, + "step": 2034 + }, + { + "epoch": 0.12392290594647261, + "grad_norm": 1.0521677200528226, + "learning_rate": 4.99523334227348e-06, + "loss": 0.466, + "step": 2035 + }, + { + "epoch": 0.12398380172335048, + "grad_norm": 1.1234063056236858, + "learning_rate": 4.995228415790638e-06, + "loss": 0.5352, + "step": 2036 + }, + { + "epoch": 0.12404469750022835, + "grad_norm": 1.0964363474028576, + "learning_rate": 4.995223486765709e-06, + "loss": 0.4385, + "step": 2037 + }, + { + "epoch": 0.12410559327710623, + "grad_norm": 1.0432815136368376, + "learning_rate": 4.995218555198698e-06, + "loss": 0.5394, + "step": 2038 + }, + { + "epoch": 0.1241664890539841, + "grad_norm": 1.014364634750578, + "learning_rate": 4.99521362108961e-06, + "loss": 0.5158, + "step": 2039 + }, + { + "epoch": 0.12422738483086199, + "grad_norm": 1.0702322605879642, + "learning_rate": 4.995208684438452e-06, + "loss": 0.5146, + "step": 2040 + }, + { + "epoch": 0.12428828060773986, + "grad_norm": 1.0479965595022942, + "learning_rate": 4.995203745245226e-06, + "loss": 0.5098, + "step": 2041 + }, + { + "epoch": 0.12434917638461773, + "grad_norm": 1.0994282458669773, + "learning_rate": 4.995198803509938e-06, + "loss": 0.506, + "step": 2042 + }, + { + "epoch": 0.1244100721614956, + "grad_norm": 0.9762517586678886, + "learning_rate": 4.9951938592325935e-06, + "loss": 0.5481, + "step": 2043 + }, + { + "epoch": 0.12447096793837348, + "grad_norm": 1.2031219469306018, + "learning_rate": 4.995188912413198e-06, + "loss": 0.5363, + "step": 2044 + }, + { + "epoch": 0.12453186371525135, + "grad_norm": 0.9541048445506638, + "learning_rate": 4.995183963051755e-06, + "loss": 0.5223, + "step": 2045 + }, + { + "epoch": 0.12459275949212922, + "grad_norm": 1.0364684024427435, + "learning_rate": 4.995179011148271e-06, + "loss": 0.528, + "step": 2046 + }, + { + "epoch": 0.1246536552690071, + "grad_norm": 1.0709185374449652, + "learning_rate": 4.995174056702751e-06, + "loss": 0.513, + "step": 2047 + }, + { + "epoch": 0.12471455104588497, + "grad_norm": 1.1859789962294502, + "learning_rate": 4.995169099715199e-06, + "loss": 0.5126, + "step": 2048 + }, + { + "epoch": 0.12477544682276284, + "grad_norm": 1.036628774132535, + "learning_rate": 4.99516414018562e-06, + "loss": 0.5434, + "step": 2049 + }, + { + "epoch": 0.12483634259964072, + "grad_norm": 1.0320386428430355, + "learning_rate": 4.995159178114021e-06, + "loss": 0.505, + "step": 2050 + }, + { + "epoch": 0.12489723837651859, + "grad_norm": 1.0492256234286317, + "learning_rate": 4.995154213500404e-06, + "loss": 0.5473, + "step": 2051 + }, + { + "epoch": 0.12495813415339646, + "grad_norm": 1.035454847549353, + "learning_rate": 4.995149246344777e-06, + "loss": 0.533, + "step": 2052 + }, + { + "epoch": 0.12501902993027433, + "grad_norm": 1.0951992785861488, + "learning_rate": 4.995144276647143e-06, + "loss": 0.5632, + "step": 2053 + }, + { + "epoch": 0.1250799257071522, + "grad_norm": 1.19594511257362, + "learning_rate": 4.995139304407509e-06, + "loss": 0.4943, + "step": 2054 + }, + { + "epoch": 0.12514082148403008, + "grad_norm": 1.03895620776062, + "learning_rate": 4.995134329625878e-06, + "loss": 0.4706, + "step": 2055 + }, + { + "epoch": 0.12520171726090795, + "grad_norm": 0.9892966182628714, + "learning_rate": 4.995129352302257e-06, + "loss": 0.5679, + "step": 2056 + }, + { + "epoch": 0.12526261303778582, + "grad_norm": 1.017093146714465, + "learning_rate": 4.9951243724366494e-06, + "loss": 0.5806, + "step": 2057 + }, + { + "epoch": 0.1253235088146637, + "grad_norm": 1.0992272552794902, + "learning_rate": 4.995119390029061e-06, + "loss": 0.5191, + "step": 2058 + }, + { + "epoch": 0.12538440459154157, + "grad_norm": 1.0942748213407978, + "learning_rate": 4.995114405079496e-06, + "loss": 0.5381, + "step": 2059 + }, + { + "epoch": 0.12544530036841944, + "grad_norm": 0.9804334998534947, + "learning_rate": 4.995109417587962e-06, + "loss": 0.5687, + "step": 2060 + }, + { + "epoch": 0.12550619614529732, + "grad_norm": 1.0332930828523208, + "learning_rate": 4.995104427554462e-06, + "loss": 0.5534, + "step": 2061 + }, + { + "epoch": 0.1255670919221752, + "grad_norm": 1.1682760417462412, + "learning_rate": 4.995099434979001e-06, + "loss": 0.5806, + "step": 2062 + }, + { + "epoch": 0.12562798769905306, + "grad_norm": 1.0774259737281284, + "learning_rate": 4.995094439861584e-06, + "loss": 0.5636, + "step": 2063 + }, + { + "epoch": 0.12568888347593093, + "grad_norm": 0.9743188819981542, + "learning_rate": 4.9950894422022175e-06, + "loss": 0.6071, + "step": 2064 + }, + { + "epoch": 0.1257497792528088, + "grad_norm": 1.1205253637066266, + "learning_rate": 4.995084442000906e-06, + "loss": 0.5567, + "step": 2065 + }, + { + "epoch": 0.12581067502968668, + "grad_norm": 1.0257101252099232, + "learning_rate": 4.995079439257653e-06, + "loss": 0.5224, + "step": 2066 + }, + { + "epoch": 0.12587157080656455, + "grad_norm": 1.0586937377677772, + "learning_rate": 4.995074433972466e-06, + "loss": 0.605, + "step": 2067 + }, + { + "epoch": 0.12593246658344243, + "grad_norm": 0.9980367111876604, + "learning_rate": 4.995069426145349e-06, + "loss": 0.5279, + "step": 2068 + }, + { + "epoch": 0.1259933623603203, + "grad_norm": 1.0343094344958008, + "learning_rate": 4.995064415776307e-06, + "loss": 0.5215, + "step": 2069 + }, + { + "epoch": 0.12605425813719817, + "grad_norm": 1.098162270648388, + "learning_rate": 4.9950594028653455e-06, + "loss": 0.5, + "step": 2070 + }, + { + "epoch": 0.12611515391407607, + "grad_norm": 1.1469663463198698, + "learning_rate": 4.995054387412469e-06, + "loss": 0.488, + "step": 2071 + }, + { + "epoch": 0.12617604969095395, + "grad_norm": 0.9096808615797569, + "learning_rate": 4.995049369417683e-06, + "loss": 0.5487, + "step": 2072 + }, + { + "epoch": 0.12623694546783182, + "grad_norm": 1.00912613238366, + "learning_rate": 4.995044348880993e-06, + "loss": 0.5285, + "step": 2073 + }, + { + "epoch": 0.1262978412447097, + "grad_norm": 1.0752077545935592, + "learning_rate": 4.9950393258024035e-06, + "loss": 0.5431, + "step": 2074 + }, + { + "epoch": 0.12635873702158756, + "grad_norm": 1.0330405801717757, + "learning_rate": 4.995034300181919e-06, + "loss": 0.5229, + "step": 2075 + }, + { + "epoch": 0.12641963279846544, + "grad_norm": 1.0462082450009091, + "learning_rate": 4.995029272019546e-06, + "loss": 0.5516, + "step": 2076 + }, + { + "epoch": 0.1264805285753433, + "grad_norm": 1.0981599844047356, + "learning_rate": 4.995024241315289e-06, + "loss": 0.526, + "step": 2077 + }, + { + "epoch": 0.12654142435222118, + "grad_norm": 1.125989214571041, + "learning_rate": 4.995019208069154e-06, + "loss": 0.4331, + "step": 2078 + }, + { + "epoch": 0.12660232012909906, + "grad_norm": 1.0783489957379422, + "learning_rate": 4.995014172281144e-06, + "loss": 0.5316, + "step": 2079 + }, + { + "epoch": 0.12666321590597693, + "grad_norm": 0.9665770873623739, + "learning_rate": 4.995009133951266e-06, + "loss": 0.5527, + "step": 2080 + }, + { + "epoch": 0.1267241116828548, + "grad_norm": 1.1331057719306377, + "learning_rate": 4.995004093079525e-06, + "loss": 0.4549, + "step": 2081 + }, + { + "epoch": 0.12678500745973267, + "grad_norm": 1.0826393923607567, + "learning_rate": 4.994999049665925e-06, + "loss": 0.5304, + "step": 2082 + }, + { + "epoch": 0.12684590323661055, + "grad_norm": 1.0097103898553572, + "learning_rate": 4.994994003710473e-06, + "loss": 0.5126, + "step": 2083 + }, + { + "epoch": 0.12690679901348842, + "grad_norm": 1.0786440626233307, + "learning_rate": 4.994988955213172e-06, + "loss": 0.566, + "step": 2084 + }, + { + "epoch": 0.1269676947903663, + "grad_norm": 1.1098472544527516, + "learning_rate": 4.9949839041740285e-06, + "loss": 0.5109, + "step": 2085 + }, + { + "epoch": 0.12702859056724417, + "grad_norm": 1.010333019079247, + "learning_rate": 4.9949788505930465e-06, + "loss": 0.6141, + "step": 2086 + }, + { + "epoch": 0.12708948634412204, + "grad_norm": 1.026432127752044, + "learning_rate": 4.994973794470233e-06, + "loss": 0.504, + "step": 2087 + }, + { + "epoch": 0.1271503821209999, + "grad_norm": 1.1022799959878615, + "learning_rate": 4.994968735805591e-06, + "loss": 0.5136, + "step": 2088 + }, + { + "epoch": 0.12721127789787778, + "grad_norm": 0.9681360928105647, + "learning_rate": 4.994963674599127e-06, + "loss": 0.5619, + "step": 2089 + }, + { + "epoch": 0.12727217367475566, + "grad_norm": 1.0677144621860617, + "learning_rate": 4.994958610850846e-06, + "loss": 0.553, + "step": 2090 + }, + { + "epoch": 0.12733306945163353, + "grad_norm": 1.1452069076498284, + "learning_rate": 4.994953544560754e-06, + "loss": 0.5142, + "step": 2091 + }, + { + "epoch": 0.1273939652285114, + "grad_norm": 1.057778358490308, + "learning_rate": 4.994948475728855e-06, + "loss": 0.6105, + "step": 2092 + }, + { + "epoch": 0.12745486100538927, + "grad_norm": 1.0770532823467702, + "learning_rate": 4.994943404355153e-06, + "loss": 0.5613, + "step": 2093 + }, + { + "epoch": 0.12751575678226715, + "grad_norm": 1.131152615293246, + "learning_rate": 4.994938330439655e-06, + "loss": 0.5003, + "step": 2094 + }, + { + "epoch": 0.12757665255914502, + "grad_norm": 1.0054247602421167, + "learning_rate": 4.9949332539823656e-06, + "loss": 0.5576, + "step": 2095 + }, + { + "epoch": 0.1276375483360229, + "grad_norm": 1.0523817227288437, + "learning_rate": 4.994928174983291e-06, + "loss": 0.5402, + "step": 2096 + }, + { + "epoch": 0.12769844411290077, + "grad_norm": 1.1049477804612677, + "learning_rate": 4.994923093442435e-06, + "loss": 0.5619, + "step": 2097 + }, + { + "epoch": 0.12775933988977864, + "grad_norm": 1.0786580393901084, + "learning_rate": 4.994918009359803e-06, + "loss": 0.5235, + "step": 2098 + }, + { + "epoch": 0.1278202356666565, + "grad_norm": 1.2079630272731356, + "learning_rate": 4.994912922735401e-06, + "loss": 0.4876, + "step": 2099 + }, + { + "epoch": 0.12788113144353438, + "grad_norm": 1.0696693991384705, + "learning_rate": 4.9949078335692326e-06, + "loss": 0.5457, + "step": 2100 + }, + { + "epoch": 0.12794202722041226, + "grad_norm": 0.994699098236643, + "learning_rate": 4.994902741861305e-06, + "loss": 0.5279, + "step": 2101 + }, + { + "epoch": 0.12800292299729013, + "grad_norm": 1.0534738255269789, + "learning_rate": 4.994897647611622e-06, + "loss": 0.5139, + "step": 2102 + }, + { + "epoch": 0.128063818774168, + "grad_norm": 1.0233691840369288, + "learning_rate": 4.9948925508201885e-06, + "loss": 0.4625, + "step": 2103 + }, + { + "epoch": 0.12812471455104588, + "grad_norm": 1.1252656777822323, + "learning_rate": 4.994887451487011e-06, + "loss": 0.5465, + "step": 2104 + }, + { + "epoch": 0.12818561032792375, + "grad_norm": 1.0149817264655387, + "learning_rate": 4.994882349612094e-06, + "loss": 0.5508, + "step": 2105 + }, + { + "epoch": 0.12824650610480162, + "grad_norm": 0.9908440772761741, + "learning_rate": 4.994877245195442e-06, + "loss": 0.5419, + "step": 2106 + }, + { + "epoch": 0.1283074018816795, + "grad_norm": 1.0675992902152254, + "learning_rate": 4.994872138237061e-06, + "loss": 0.5186, + "step": 2107 + }, + { + "epoch": 0.12836829765855737, + "grad_norm": 0.9711684918027259, + "learning_rate": 4.994867028736957e-06, + "loss": 0.5446, + "step": 2108 + }, + { + "epoch": 0.12842919343543524, + "grad_norm": 1.0379386486539437, + "learning_rate": 4.994861916695134e-06, + "loss": 0.5406, + "step": 2109 + }, + { + "epoch": 0.1284900892123131, + "grad_norm": 1.0516907139683418, + "learning_rate": 4.9948568021115975e-06, + "loss": 0.5812, + "step": 2110 + }, + { + "epoch": 0.128550984989191, + "grad_norm": 1.062219311446186, + "learning_rate": 4.994851684986353e-06, + "loss": 0.4605, + "step": 2111 + }, + { + "epoch": 0.1286118807660689, + "grad_norm": 1.026211047966583, + "learning_rate": 4.994846565319405e-06, + "loss": 0.5207, + "step": 2112 + }, + { + "epoch": 0.12867277654294676, + "grad_norm": 1.1047697645625891, + "learning_rate": 4.994841443110759e-06, + "loss": 0.4659, + "step": 2113 + }, + { + "epoch": 0.12873367231982463, + "grad_norm": 1.077676164298384, + "learning_rate": 4.994836318360422e-06, + "loss": 0.5225, + "step": 2114 + }, + { + "epoch": 0.1287945680967025, + "grad_norm": 1.2039275943872958, + "learning_rate": 4.994831191068396e-06, + "loss": 0.512, + "step": 2115 + }, + { + "epoch": 0.12885546387358038, + "grad_norm": 1.214471813143012, + "learning_rate": 4.994826061234689e-06, + "loss": 0.5164, + "step": 2116 + }, + { + "epoch": 0.12891635965045825, + "grad_norm": 1.030998327351342, + "learning_rate": 4.9948209288593045e-06, + "loss": 0.4792, + "step": 2117 + }, + { + "epoch": 0.12897725542733612, + "grad_norm": 0.9974627742156775, + "learning_rate": 4.994815793942248e-06, + "loss": 0.5518, + "step": 2118 + }, + { + "epoch": 0.129038151204214, + "grad_norm": 1.0362358446879465, + "learning_rate": 4.994810656483525e-06, + "loss": 0.5516, + "step": 2119 + }, + { + "epoch": 0.12909904698109187, + "grad_norm": 1.1031983105068985, + "learning_rate": 4.994805516483141e-06, + "loss": 0.4654, + "step": 2120 + }, + { + "epoch": 0.12915994275796974, + "grad_norm": 1.1421437391900036, + "learning_rate": 4.9948003739411015e-06, + "loss": 0.5281, + "step": 2121 + }, + { + "epoch": 0.12922083853484762, + "grad_norm": 0.9988019336393804, + "learning_rate": 4.99479522885741e-06, + "loss": 0.5398, + "step": 2122 + }, + { + "epoch": 0.1292817343117255, + "grad_norm": 1.0994612055967892, + "learning_rate": 4.994790081232074e-06, + "loss": 0.4297, + "step": 2123 + }, + { + "epoch": 0.12934263008860336, + "grad_norm": 1.0066235043906644, + "learning_rate": 4.9947849310650975e-06, + "loss": 0.5193, + "step": 2124 + }, + { + "epoch": 0.12940352586548123, + "grad_norm": 1.0953870013259372, + "learning_rate": 4.994779778356486e-06, + "loss": 0.4866, + "step": 2125 + }, + { + "epoch": 0.1294644216423591, + "grad_norm": 1.1040560567074897, + "learning_rate": 4.994774623106245e-06, + "loss": 0.516, + "step": 2126 + }, + { + "epoch": 0.12952531741923698, + "grad_norm": 1.162782414033461, + "learning_rate": 4.994769465314379e-06, + "loss": 0.5015, + "step": 2127 + }, + { + "epoch": 0.12958621319611485, + "grad_norm": 1.037247236436997, + "learning_rate": 4.994764304980894e-06, + "loss": 0.4828, + "step": 2128 + }, + { + "epoch": 0.12964710897299273, + "grad_norm": 1.1293708470296309, + "learning_rate": 4.994759142105795e-06, + "loss": 0.5328, + "step": 2129 + }, + { + "epoch": 0.1297080047498706, + "grad_norm": 0.9210270259901354, + "learning_rate": 4.9947539766890875e-06, + "loss": 0.5783, + "step": 2130 + }, + { + "epoch": 0.12976890052674847, + "grad_norm": 1.1298668318657212, + "learning_rate": 4.994748808730776e-06, + "loss": 0.5647, + "step": 2131 + }, + { + "epoch": 0.12982979630362634, + "grad_norm": 1.0403239939985685, + "learning_rate": 4.994743638230866e-06, + "loss": 0.4939, + "step": 2132 + }, + { + "epoch": 0.12989069208050422, + "grad_norm": 1.086727077614537, + "learning_rate": 4.994738465189363e-06, + "loss": 0.5634, + "step": 2133 + }, + { + "epoch": 0.1299515878573821, + "grad_norm": 1.0645860694094058, + "learning_rate": 4.994733289606274e-06, + "loss": 0.585, + "step": 2134 + }, + { + "epoch": 0.13001248363425996, + "grad_norm": 1.132019405806143, + "learning_rate": 4.9947281114816016e-06, + "loss": 0.4922, + "step": 2135 + }, + { + "epoch": 0.13007337941113783, + "grad_norm": 1.1236123361525, + "learning_rate": 4.994722930815352e-06, + "loss": 0.5891, + "step": 2136 + }, + { + "epoch": 0.1301342751880157, + "grad_norm": 1.0576044736064194, + "learning_rate": 4.99471774760753e-06, + "loss": 0.5302, + "step": 2137 + }, + { + "epoch": 0.13019517096489358, + "grad_norm": 1.1154170188663723, + "learning_rate": 4.994712561858142e-06, + "loss": 0.5544, + "step": 2138 + }, + { + "epoch": 0.13025606674177145, + "grad_norm": 1.0489756702751556, + "learning_rate": 4.994707373567192e-06, + "loss": 0.5672, + "step": 2139 + }, + { + "epoch": 0.13031696251864933, + "grad_norm": 0.9726022207000563, + "learning_rate": 4.994702182734687e-06, + "loss": 0.4961, + "step": 2140 + }, + { + "epoch": 0.1303778582955272, + "grad_norm": 1.061771650105231, + "learning_rate": 4.994696989360631e-06, + "loss": 0.5095, + "step": 2141 + }, + { + "epoch": 0.13043875407240507, + "grad_norm": 1.1506935157059341, + "learning_rate": 4.994691793445029e-06, + "loss": 0.4551, + "step": 2142 + }, + { + "epoch": 0.13049964984928294, + "grad_norm": 0.9873620093010786, + "learning_rate": 4.994686594987888e-06, + "loss": 0.5802, + "step": 2143 + }, + { + "epoch": 0.13056054562616082, + "grad_norm": 0.9958164418136389, + "learning_rate": 4.994681393989211e-06, + "loss": 0.5251, + "step": 2144 + }, + { + "epoch": 0.1306214414030387, + "grad_norm": 1.039863281188883, + "learning_rate": 4.994676190449005e-06, + "loss": 0.5075, + "step": 2145 + }, + { + "epoch": 0.13068233717991656, + "grad_norm": 1.1228889197255056, + "learning_rate": 4.994670984367275e-06, + "loss": 0.6113, + "step": 2146 + }, + { + "epoch": 0.13074323295679444, + "grad_norm": 1.000367234543684, + "learning_rate": 4.994665775744025e-06, + "loss": 0.6545, + "step": 2147 + }, + { + "epoch": 0.1308041287336723, + "grad_norm": 1.1025086016935004, + "learning_rate": 4.9946605645792624e-06, + "loss": 0.5125, + "step": 2148 + }, + { + "epoch": 0.13086502451055018, + "grad_norm": 1.1425130568105528, + "learning_rate": 4.994655350872992e-06, + "loss": 0.5169, + "step": 2149 + }, + { + "epoch": 0.13092592028742805, + "grad_norm": 0.9631525610986171, + "learning_rate": 4.9946501346252174e-06, + "loss": 0.4881, + "step": 2150 + }, + { + "epoch": 0.13098681606430593, + "grad_norm": 0.9823090406905182, + "learning_rate": 4.994644915835945e-06, + "loss": 0.6038, + "step": 2151 + }, + { + "epoch": 0.1310477118411838, + "grad_norm": 0.9623894870994768, + "learning_rate": 4.994639694505181e-06, + "loss": 0.5219, + "step": 2152 + }, + { + "epoch": 0.1311086076180617, + "grad_norm": 1.047988690183531, + "learning_rate": 4.99463447063293e-06, + "loss": 0.4745, + "step": 2153 + }, + { + "epoch": 0.13116950339493957, + "grad_norm": 1.0234904122678912, + "learning_rate": 4.994629244219197e-06, + "loss": 0.5851, + "step": 2154 + }, + { + "epoch": 0.13123039917181745, + "grad_norm": 1.1465394412638228, + "learning_rate": 4.994624015263987e-06, + "loss": 0.5006, + "step": 2155 + }, + { + "epoch": 0.13129129494869532, + "grad_norm": 1.1968249576626895, + "learning_rate": 4.9946187837673065e-06, + "loss": 0.4969, + "step": 2156 + }, + { + "epoch": 0.1313521907255732, + "grad_norm": 1.1410437536876108, + "learning_rate": 4.99461354972916e-06, + "loss": 0.6011, + "step": 2157 + }, + { + "epoch": 0.13141308650245107, + "grad_norm": 1.0026190218724238, + "learning_rate": 4.994608313149553e-06, + "loss": 0.5372, + "step": 2158 + }, + { + "epoch": 0.13147398227932894, + "grad_norm": 1.0472673900799059, + "learning_rate": 4.994603074028492e-06, + "loss": 0.513, + "step": 2159 + }, + { + "epoch": 0.1315348780562068, + "grad_norm": 1.0764145650470298, + "learning_rate": 4.99459783236598e-06, + "loss": 0.5387, + "step": 2160 + }, + { + "epoch": 0.13159577383308468, + "grad_norm": 1.0416459505878484, + "learning_rate": 4.994592588162025e-06, + "loss": 0.5506, + "step": 2161 + }, + { + "epoch": 0.13165666960996256, + "grad_norm": 1.0907591765178848, + "learning_rate": 4.994587341416629e-06, + "loss": 0.5391, + "step": 2162 + }, + { + "epoch": 0.13171756538684043, + "grad_norm": 1.0257348284420074, + "learning_rate": 4.994582092129801e-06, + "loss": 0.5359, + "step": 2163 + }, + { + "epoch": 0.1317784611637183, + "grad_norm": 1.0555250843035398, + "learning_rate": 4.994576840301544e-06, + "loss": 0.5179, + "step": 2164 + }, + { + "epoch": 0.13183935694059618, + "grad_norm": 1.0257961315151316, + "learning_rate": 4.9945715859318635e-06, + "loss": 0.4884, + "step": 2165 + }, + { + "epoch": 0.13190025271747405, + "grad_norm": 1.1016593364886011, + "learning_rate": 4.994566329020766e-06, + "loss": 0.5297, + "step": 2166 + }, + { + "epoch": 0.13196114849435192, + "grad_norm": 1.098545748594072, + "learning_rate": 4.9945610695682555e-06, + "loss": 0.5428, + "step": 2167 + }, + { + "epoch": 0.1320220442712298, + "grad_norm": 1.035022694361597, + "learning_rate": 4.994555807574339e-06, + "loss": 0.5304, + "step": 2168 + }, + { + "epoch": 0.13208294004810767, + "grad_norm": 1.0210639405064736, + "learning_rate": 4.99455054303902e-06, + "loss": 0.5221, + "step": 2169 + }, + { + "epoch": 0.13214383582498554, + "grad_norm": 1.0441234118084222, + "learning_rate": 4.994545275962305e-06, + "loss": 0.5008, + "step": 2170 + }, + { + "epoch": 0.1322047316018634, + "grad_norm": 1.0693563921774638, + "learning_rate": 4.994540006344199e-06, + "loss": 0.5272, + "step": 2171 + }, + { + "epoch": 0.13226562737874129, + "grad_norm": 1.0480817020296183, + "learning_rate": 4.9945347341847084e-06, + "loss": 0.5497, + "step": 2172 + }, + { + "epoch": 0.13232652315561916, + "grad_norm": 1.0489502253926293, + "learning_rate": 4.994529459483837e-06, + "loss": 0.5425, + "step": 2173 + }, + { + "epoch": 0.13238741893249703, + "grad_norm": 1.0842041870565118, + "learning_rate": 4.994524182241591e-06, + "loss": 0.4875, + "step": 2174 + }, + { + "epoch": 0.1324483147093749, + "grad_norm": 1.0102807829314358, + "learning_rate": 4.994518902457976e-06, + "loss": 0.4877, + "step": 2175 + }, + { + "epoch": 0.13250921048625278, + "grad_norm": 1.0463568167427029, + "learning_rate": 4.994513620132996e-06, + "loss": 0.5279, + "step": 2176 + }, + { + "epoch": 0.13257010626313065, + "grad_norm": 0.9964972929946765, + "learning_rate": 4.994508335266658e-06, + "loss": 0.6494, + "step": 2177 + }, + { + "epoch": 0.13263100204000852, + "grad_norm": 1.045981580605222, + "learning_rate": 4.994503047858966e-06, + "loss": 0.5673, + "step": 2178 + }, + { + "epoch": 0.1326918978168864, + "grad_norm": 0.9765117311210217, + "learning_rate": 4.994497757909927e-06, + "loss": 0.6022, + "step": 2179 + }, + { + "epoch": 0.13275279359376427, + "grad_norm": 1.1232474344820293, + "learning_rate": 4.994492465419546e-06, + "loss": 0.4825, + "step": 2180 + }, + { + "epoch": 0.13281368937064214, + "grad_norm": 1.0203588160744739, + "learning_rate": 4.994487170387828e-06, + "loss": 0.5344, + "step": 2181 + }, + { + "epoch": 0.13287458514752, + "grad_norm": 1.0328680071452823, + "learning_rate": 4.994481872814777e-06, + "loss": 0.4819, + "step": 2182 + }, + { + "epoch": 0.1329354809243979, + "grad_norm": 1.0732170504812888, + "learning_rate": 4.9944765727004e-06, + "loss": 0.5204, + "step": 2183 + }, + { + "epoch": 0.13299637670127576, + "grad_norm": 0.9914696904093478, + "learning_rate": 4.9944712700447026e-06, + "loss": 0.5175, + "step": 2184 + }, + { + "epoch": 0.13305727247815363, + "grad_norm": 1.0297885079729163, + "learning_rate": 4.994465964847689e-06, + "loss": 0.5261, + "step": 2185 + }, + { + "epoch": 0.1331181682550315, + "grad_norm": 1.0944115804937151, + "learning_rate": 4.9944606571093665e-06, + "loss": 0.5338, + "step": 2186 + }, + { + "epoch": 0.13317906403190938, + "grad_norm": 1.034858824556912, + "learning_rate": 4.994455346829739e-06, + "loss": 0.6, + "step": 2187 + }, + { + "epoch": 0.13323995980878725, + "grad_norm": 0.9990610787813946, + "learning_rate": 4.994450034008812e-06, + "loss": 0.6149, + "step": 2188 + }, + { + "epoch": 0.13330085558566512, + "grad_norm": 1.0409606054051632, + "learning_rate": 4.994444718646591e-06, + "loss": 0.5611, + "step": 2189 + }, + { + "epoch": 0.133361751362543, + "grad_norm": 1.0549846052216914, + "learning_rate": 4.994439400743082e-06, + "loss": 0.5117, + "step": 2190 + }, + { + "epoch": 0.13342264713942087, + "grad_norm": 1.0803786111263587, + "learning_rate": 4.994434080298289e-06, + "loss": 0.5943, + "step": 2191 + }, + { + "epoch": 0.13348354291629874, + "grad_norm": 1.0146070680096433, + "learning_rate": 4.99442875731222e-06, + "loss": 0.5265, + "step": 2192 + }, + { + "epoch": 0.13354443869317661, + "grad_norm": 1.2012269906830635, + "learning_rate": 4.9944234317848785e-06, + "loss": 0.4969, + "step": 2193 + }, + { + "epoch": 0.13360533447005452, + "grad_norm": 1.173809557778946, + "learning_rate": 4.994418103716269e-06, + "loss": 0.486, + "step": 2194 + }, + { + "epoch": 0.1336662302469324, + "grad_norm": 1.0340839143368372, + "learning_rate": 4.9944127731064e-06, + "loss": 0.5049, + "step": 2195 + }, + { + "epoch": 0.13372712602381026, + "grad_norm": 1.0447329265192569, + "learning_rate": 4.994407439955273e-06, + "loss": 0.6131, + "step": 2196 + }, + { + "epoch": 0.13378802180068813, + "grad_norm": 1.0075495146298106, + "learning_rate": 4.994402104262898e-06, + "loss": 0.5111, + "step": 2197 + }, + { + "epoch": 0.133848917577566, + "grad_norm": 1.0026502844043368, + "learning_rate": 4.9943967660292755e-06, + "loss": 0.5147, + "step": 2198 + }, + { + "epoch": 0.13390981335444388, + "grad_norm": 1.089590093573451, + "learning_rate": 4.994391425254415e-06, + "loss": 0.5019, + "step": 2199 + }, + { + "epoch": 0.13397070913132175, + "grad_norm": 1.1218243172733202, + "learning_rate": 4.994386081938321e-06, + "loss": 0.5228, + "step": 2200 + }, + { + "epoch": 0.13403160490819963, + "grad_norm": 0.9788847366831586, + "learning_rate": 4.994380736080996e-06, + "loss": 0.5339, + "step": 2201 + }, + { + "epoch": 0.1340925006850775, + "grad_norm": 1.0142537623532422, + "learning_rate": 4.99437538768245e-06, + "loss": 0.5326, + "step": 2202 + }, + { + "epoch": 0.13415339646195537, + "grad_norm": 1.189462193924427, + "learning_rate": 4.994370036742685e-06, + "loss": 0.4832, + "step": 2203 + }, + { + "epoch": 0.13421429223883324, + "grad_norm": 1.0315017567041538, + "learning_rate": 4.994364683261708e-06, + "loss": 0.5711, + "step": 2204 + }, + { + "epoch": 0.13427518801571112, + "grad_norm": 0.9666682960441391, + "learning_rate": 4.994359327239524e-06, + "loss": 0.4943, + "step": 2205 + }, + { + "epoch": 0.134336083792589, + "grad_norm": 1.0657991914411427, + "learning_rate": 4.994353968676139e-06, + "loss": 0.5806, + "step": 2206 + }, + { + "epoch": 0.13439697956946686, + "grad_norm": 1.155628133539745, + "learning_rate": 4.994348607571557e-06, + "loss": 0.497, + "step": 2207 + }, + { + "epoch": 0.13445787534634474, + "grad_norm": 1.0066441738398848, + "learning_rate": 4.994343243925786e-06, + "loss": 0.5863, + "step": 2208 + }, + { + "epoch": 0.1345187711232226, + "grad_norm": 1.0761850073292927, + "learning_rate": 4.994337877738829e-06, + "loss": 0.5462, + "step": 2209 + }, + { + "epoch": 0.13457966690010048, + "grad_norm": 1.0310406257761606, + "learning_rate": 4.994332509010693e-06, + "loss": 0.5629, + "step": 2210 + }, + { + "epoch": 0.13464056267697835, + "grad_norm": 0.9476337212130281, + "learning_rate": 4.994327137741382e-06, + "loss": 0.577, + "step": 2211 + }, + { + "epoch": 0.13470145845385623, + "grad_norm": 1.0525975506786425, + "learning_rate": 4.994321763930903e-06, + "loss": 0.5717, + "step": 2212 + }, + { + "epoch": 0.1347623542307341, + "grad_norm": 0.9932526497820812, + "learning_rate": 4.994316387579261e-06, + "loss": 0.4707, + "step": 2213 + }, + { + "epoch": 0.13482325000761197, + "grad_norm": 1.0625134150698459, + "learning_rate": 4.994311008686461e-06, + "loss": 0.5082, + "step": 2214 + }, + { + "epoch": 0.13488414578448985, + "grad_norm": 1.0248144822083403, + "learning_rate": 4.994305627252509e-06, + "loss": 0.5229, + "step": 2215 + }, + { + "epoch": 0.13494504156136772, + "grad_norm": 0.9909994948248603, + "learning_rate": 4.99430024327741e-06, + "loss": 0.5743, + "step": 2216 + }, + { + "epoch": 0.1350059373382456, + "grad_norm": 1.1017403649611663, + "learning_rate": 4.99429485676117e-06, + "loss": 0.4702, + "step": 2217 + }, + { + "epoch": 0.13506683311512346, + "grad_norm": 1.0280491495268493, + "learning_rate": 4.994289467703794e-06, + "loss": 0.5778, + "step": 2218 + }, + { + "epoch": 0.13512772889200134, + "grad_norm": 0.9937801838217093, + "learning_rate": 4.994284076105288e-06, + "loss": 0.5541, + "step": 2219 + }, + { + "epoch": 0.1351886246688792, + "grad_norm": 1.064929708487776, + "learning_rate": 4.994278681965657e-06, + "loss": 0.5247, + "step": 2220 + }, + { + "epoch": 0.13524952044575708, + "grad_norm": 1.0655088432643074, + "learning_rate": 4.9942732852849065e-06, + "loss": 0.4978, + "step": 2221 + }, + { + "epoch": 0.13531041622263495, + "grad_norm": 1.0328536837134805, + "learning_rate": 4.994267886063043e-06, + "loss": 0.5992, + "step": 2222 + }, + { + "epoch": 0.13537131199951283, + "grad_norm": 1.0309121767219573, + "learning_rate": 4.994262484300071e-06, + "loss": 0.6166, + "step": 2223 + }, + { + "epoch": 0.1354322077763907, + "grad_norm": 1.0726863967977567, + "learning_rate": 4.994257079995996e-06, + "loss": 0.5062, + "step": 2224 + }, + { + "epoch": 0.13549310355326857, + "grad_norm": 0.9755300289531466, + "learning_rate": 4.994251673150824e-06, + "loss": 0.5357, + "step": 2225 + }, + { + "epoch": 0.13555399933014645, + "grad_norm": 0.9993599839044814, + "learning_rate": 4.99424626376456e-06, + "loss": 0.5573, + "step": 2226 + }, + { + "epoch": 0.13561489510702432, + "grad_norm": 1.0079842750987456, + "learning_rate": 4.9942408518372095e-06, + "loss": 0.5198, + "step": 2227 + }, + { + "epoch": 0.1356757908839022, + "grad_norm": 1.0290311451434777, + "learning_rate": 4.994235437368779e-06, + "loss": 0.5301, + "step": 2228 + }, + { + "epoch": 0.13573668666078006, + "grad_norm": 1.088767031340088, + "learning_rate": 4.994230020359273e-06, + "loss": 0.4911, + "step": 2229 + }, + { + "epoch": 0.13579758243765794, + "grad_norm": 1.011088184320105, + "learning_rate": 4.994224600808697e-06, + "loss": 0.5694, + "step": 2230 + }, + { + "epoch": 0.1358584782145358, + "grad_norm": 1.0147733010191529, + "learning_rate": 4.994219178717057e-06, + "loss": 0.4979, + "step": 2231 + }, + { + "epoch": 0.13591937399141368, + "grad_norm": 1.0474129952005546, + "learning_rate": 4.994213754084359e-06, + "loss": 0.5315, + "step": 2232 + }, + { + "epoch": 0.13598026976829156, + "grad_norm": 1.0131990601789405, + "learning_rate": 4.9942083269106065e-06, + "loss": 0.5623, + "step": 2233 + }, + { + "epoch": 0.13604116554516943, + "grad_norm": 1.0564291898868943, + "learning_rate": 4.994202897195808e-06, + "loss": 0.5141, + "step": 2234 + }, + { + "epoch": 0.13610206132204733, + "grad_norm": 1.086843985444809, + "learning_rate": 4.994197464939966e-06, + "loss": 0.5197, + "step": 2235 + }, + { + "epoch": 0.1361629570989252, + "grad_norm": 1.031619063290875, + "learning_rate": 4.994192030143089e-06, + "loss": 0.5513, + "step": 2236 + }, + { + "epoch": 0.13622385287580308, + "grad_norm": 1.0256935041200723, + "learning_rate": 4.994186592805179e-06, + "loss": 0.5098, + "step": 2237 + }, + { + "epoch": 0.13628474865268095, + "grad_norm": 1.119891615479487, + "learning_rate": 4.9941811529262456e-06, + "loss": 0.5637, + "step": 2238 + }, + { + "epoch": 0.13634564442955882, + "grad_norm": 1.0080131983185718, + "learning_rate": 4.994175710506292e-06, + "loss": 0.5709, + "step": 2239 + }, + { + "epoch": 0.1364065402064367, + "grad_norm": 1.0823582426163862, + "learning_rate": 4.994170265545323e-06, + "loss": 0.6132, + "step": 2240 + }, + { + "epoch": 0.13646743598331457, + "grad_norm": 0.9542920769802781, + "learning_rate": 4.994164818043346e-06, + "loss": 0.5415, + "step": 2241 + }, + { + "epoch": 0.13652833176019244, + "grad_norm": 1.075457386522021, + "learning_rate": 4.994159368000365e-06, + "loss": 0.606, + "step": 2242 + }, + { + "epoch": 0.1365892275370703, + "grad_norm": 1.051610874185148, + "learning_rate": 4.994153915416386e-06, + "loss": 0.4609, + "step": 2243 + }, + { + "epoch": 0.13665012331394819, + "grad_norm": 1.0617266716084033, + "learning_rate": 4.994148460291416e-06, + "loss": 0.547, + "step": 2244 + }, + { + "epoch": 0.13671101909082606, + "grad_norm": 1.0146586996273375, + "learning_rate": 4.9941430026254585e-06, + "loss": 0.5041, + "step": 2245 + }, + { + "epoch": 0.13677191486770393, + "grad_norm": 1.0100663112746917, + "learning_rate": 4.994137542418521e-06, + "loss": 0.5225, + "step": 2246 + }, + { + "epoch": 0.1368328106445818, + "grad_norm": 1.1876149188299077, + "learning_rate": 4.994132079670607e-06, + "loss": 0.5021, + "step": 2247 + }, + { + "epoch": 0.13689370642145968, + "grad_norm": 1.0562565403724427, + "learning_rate": 4.994126614381723e-06, + "loss": 0.545, + "step": 2248 + }, + { + "epoch": 0.13695460219833755, + "grad_norm": 1.0623890465027757, + "learning_rate": 4.9941211465518754e-06, + "loss": 0.5253, + "step": 2249 + }, + { + "epoch": 0.13701549797521542, + "grad_norm": 1.017492586115053, + "learning_rate": 4.994115676181069e-06, + "loss": 0.5305, + "step": 2250 + }, + { + "epoch": 0.1370763937520933, + "grad_norm": 0.9472259278980238, + "learning_rate": 4.99411020326931e-06, + "loss": 0.6202, + "step": 2251 + }, + { + "epoch": 0.13713728952897117, + "grad_norm": 0.9999080099141797, + "learning_rate": 4.9941047278166025e-06, + "loss": 0.5326, + "step": 2252 + }, + { + "epoch": 0.13719818530584904, + "grad_norm": 0.9847565323111603, + "learning_rate": 4.994099249822952e-06, + "loss": 0.5731, + "step": 2253 + }, + { + "epoch": 0.1372590810827269, + "grad_norm": 1.0610354897514576, + "learning_rate": 4.994093769288366e-06, + "loss": 0.5276, + "step": 2254 + }, + { + "epoch": 0.1373199768596048, + "grad_norm": 1.1161575627276425, + "learning_rate": 4.99408828621285e-06, + "loss": 0.5224, + "step": 2255 + }, + { + "epoch": 0.13738087263648266, + "grad_norm": 1.0918422770751668, + "learning_rate": 4.994082800596408e-06, + "loss": 0.4956, + "step": 2256 + }, + { + "epoch": 0.13744176841336053, + "grad_norm": 1.0113706036552086, + "learning_rate": 4.994077312439046e-06, + "loss": 0.5795, + "step": 2257 + }, + { + "epoch": 0.1375026641902384, + "grad_norm": 1.01772815883812, + "learning_rate": 4.9940718217407705e-06, + "loss": 0.5176, + "step": 2258 + }, + { + "epoch": 0.13756355996711628, + "grad_norm": 1.0599902602452183, + "learning_rate": 4.994066328501586e-06, + "loss": 0.5169, + "step": 2259 + }, + { + "epoch": 0.13762445574399415, + "grad_norm": 1.09173259436316, + "learning_rate": 4.994060832721499e-06, + "loss": 0.6029, + "step": 2260 + }, + { + "epoch": 0.13768535152087202, + "grad_norm": 1.0403032629146516, + "learning_rate": 4.994055334400514e-06, + "loss": 0.6055, + "step": 2261 + }, + { + "epoch": 0.1377462472977499, + "grad_norm": 1.1292667968449759, + "learning_rate": 4.994049833538637e-06, + "loss": 0.5154, + "step": 2262 + }, + { + "epoch": 0.13780714307462777, + "grad_norm": 1.0493504355886094, + "learning_rate": 4.994044330135875e-06, + "loss": 0.4635, + "step": 2263 + }, + { + "epoch": 0.13786803885150564, + "grad_norm": 1.0312565051564129, + "learning_rate": 4.994038824192232e-06, + "loss": 0.517, + "step": 2264 + }, + { + "epoch": 0.13792893462838351, + "grad_norm": 0.9874243959705971, + "learning_rate": 4.994033315707714e-06, + "loss": 0.5361, + "step": 2265 + }, + { + "epoch": 0.1379898304052614, + "grad_norm": 1.0413851343114897, + "learning_rate": 4.994027804682326e-06, + "loss": 0.5625, + "step": 2266 + }, + { + "epoch": 0.13805072618213926, + "grad_norm": 1.064039704992051, + "learning_rate": 4.994022291116075e-06, + "loss": 0.5452, + "step": 2267 + }, + { + "epoch": 0.13811162195901713, + "grad_norm": 1.154923625444499, + "learning_rate": 4.994016775008967e-06, + "loss": 0.5174, + "step": 2268 + }, + { + "epoch": 0.138172517735895, + "grad_norm": 1.0839570012816246, + "learning_rate": 4.9940112563610056e-06, + "loss": 0.5085, + "step": 2269 + }, + { + "epoch": 0.13823341351277288, + "grad_norm": 1.0303587274338182, + "learning_rate": 4.994005735172197e-06, + "loss": 0.5295, + "step": 2270 + }, + { + "epoch": 0.13829430928965075, + "grad_norm": 0.9583953097832033, + "learning_rate": 4.994000211442548e-06, + "loss": 0.5409, + "step": 2271 + }, + { + "epoch": 0.13835520506652862, + "grad_norm": 1.0382044350792317, + "learning_rate": 4.993994685172063e-06, + "loss": 0.4986, + "step": 2272 + }, + { + "epoch": 0.1384161008434065, + "grad_norm": 1.033836777099319, + "learning_rate": 4.993989156360748e-06, + "loss": 0.5166, + "step": 2273 + }, + { + "epoch": 0.13847699662028437, + "grad_norm": 1.0651384479861548, + "learning_rate": 4.9939836250086086e-06, + "loss": 0.5607, + "step": 2274 + }, + { + "epoch": 0.13853789239716224, + "grad_norm": 1.0844507247402462, + "learning_rate": 4.993978091115651e-06, + "loss": 0.521, + "step": 2275 + }, + { + "epoch": 0.13859878817404014, + "grad_norm": 1.0656933670439712, + "learning_rate": 4.99397255468188e-06, + "loss": 0.5025, + "step": 2276 + }, + { + "epoch": 0.13865968395091802, + "grad_norm": 1.1083592768825914, + "learning_rate": 4.9939670157073016e-06, + "loss": 0.4841, + "step": 2277 + }, + { + "epoch": 0.1387205797277959, + "grad_norm": 1.0239992443636334, + "learning_rate": 4.993961474191922e-06, + "loss": 0.5748, + "step": 2278 + }, + { + "epoch": 0.13878147550467376, + "grad_norm": 1.1030559628367933, + "learning_rate": 4.9939559301357455e-06, + "loss": 0.5055, + "step": 2279 + }, + { + "epoch": 0.13884237128155164, + "grad_norm": 1.0229539786661996, + "learning_rate": 4.993950383538779e-06, + "loss": 0.5648, + "step": 2280 + }, + { + "epoch": 0.1389032670584295, + "grad_norm": 1.0855798152452205, + "learning_rate": 4.993944834401028e-06, + "loss": 0.4864, + "step": 2281 + }, + { + "epoch": 0.13896416283530738, + "grad_norm": 1.0025732805404302, + "learning_rate": 4.993939282722498e-06, + "loss": 0.506, + "step": 2282 + }, + { + "epoch": 0.13902505861218525, + "grad_norm": 1.0949423169903296, + "learning_rate": 4.993933728503193e-06, + "loss": 0.5114, + "step": 2283 + }, + { + "epoch": 0.13908595438906313, + "grad_norm": 0.9693286900914392, + "learning_rate": 4.993928171743122e-06, + "loss": 0.5309, + "step": 2284 + }, + { + "epoch": 0.139146850165941, + "grad_norm": 1.0318645373833886, + "learning_rate": 4.993922612442288e-06, + "loss": 0.5094, + "step": 2285 + }, + { + "epoch": 0.13920774594281887, + "grad_norm": 1.0278990814826585, + "learning_rate": 4.9939170506006975e-06, + "loss": 0.5616, + "step": 2286 + }, + { + "epoch": 0.13926864171969675, + "grad_norm": 1.0552312577729879, + "learning_rate": 4.993911486218356e-06, + "loss": 0.5433, + "step": 2287 + }, + { + "epoch": 0.13932953749657462, + "grad_norm": 1.0542364191026994, + "learning_rate": 4.993905919295269e-06, + "loss": 0.5075, + "step": 2288 + }, + { + "epoch": 0.1393904332734525, + "grad_norm": 1.0545686250425834, + "learning_rate": 4.993900349831443e-06, + "loss": 0.4844, + "step": 2289 + }, + { + "epoch": 0.13945132905033036, + "grad_norm": 1.1903849028458484, + "learning_rate": 4.9938947778268835e-06, + "loss": 0.5259, + "step": 2290 + }, + { + "epoch": 0.13951222482720824, + "grad_norm": 1.105615336419567, + "learning_rate": 4.993889203281596e-06, + "loss": 0.53, + "step": 2291 + }, + { + "epoch": 0.1395731206040861, + "grad_norm": 1.122581152173588, + "learning_rate": 4.993883626195584e-06, + "loss": 0.5029, + "step": 2292 + }, + { + "epoch": 0.13963401638096398, + "grad_norm": 1.0498841141519926, + "learning_rate": 4.993878046568857e-06, + "loss": 0.5014, + "step": 2293 + }, + { + "epoch": 0.13969491215784186, + "grad_norm": 1.0196095015560538, + "learning_rate": 4.993872464401419e-06, + "loss": 0.5213, + "step": 2294 + }, + { + "epoch": 0.13975580793471973, + "grad_norm": 1.0384291670557024, + "learning_rate": 4.993866879693276e-06, + "loss": 0.5128, + "step": 2295 + }, + { + "epoch": 0.1398167037115976, + "grad_norm": 0.9772498219036625, + "learning_rate": 4.993861292444432e-06, + "loss": 0.6011, + "step": 2296 + }, + { + "epoch": 0.13987759948847547, + "grad_norm": 1.0734107355103577, + "learning_rate": 4.993855702654894e-06, + "loss": 0.4743, + "step": 2297 + }, + { + "epoch": 0.13993849526535335, + "grad_norm": 1.0945613267787857, + "learning_rate": 4.993850110324668e-06, + "loss": 0.4655, + "step": 2298 + }, + { + "epoch": 0.13999939104223122, + "grad_norm": 1.0786566356936487, + "learning_rate": 4.99384451545376e-06, + "loss": 0.4857, + "step": 2299 + }, + { + "epoch": 0.1400602868191091, + "grad_norm": 1.0298422744648748, + "learning_rate": 4.993838918042174e-06, + "loss": 0.5021, + "step": 2300 + }, + { + "epoch": 0.14012118259598697, + "grad_norm": 1.0041628473768203, + "learning_rate": 4.993833318089918e-06, + "loss": 0.5139, + "step": 2301 + }, + { + "epoch": 0.14018207837286484, + "grad_norm": 1.050042532786145, + "learning_rate": 4.993827715596995e-06, + "loss": 0.581, + "step": 2302 + }, + { + "epoch": 0.1402429741497427, + "grad_norm": 1.0306857848394897, + "learning_rate": 4.993822110563414e-06, + "loss": 0.5675, + "step": 2303 + }, + { + "epoch": 0.14030386992662058, + "grad_norm": 1.1397063772350262, + "learning_rate": 4.993816502989177e-06, + "loss": 0.5553, + "step": 2304 + }, + { + "epoch": 0.14036476570349846, + "grad_norm": 1.1072694264279346, + "learning_rate": 4.993810892874293e-06, + "loss": 0.5024, + "step": 2305 + }, + { + "epoch": 0.14042566148037633, + "grad_norm": 1.088525503600628, + "learning_rate": 4.993805280218765e-06, + "loss": 0.4899, + "step": 2306 + }, + { + "epoch": 0.1404865572572542, + "grad_norm": 1.021165274800886, + "learning_rate": 4.993799665022601e-06, + "loss": 0.4895, + "step": 2307 + }, + { + "epoch": 0.14054745303413207, + "grad_norm": 1.024001492262837, + "learning_rate": 4.993794047285805e-06, + "loss": 0.5253, + "step": 2308 + }, + { + "epoch": 0.14060834881100995, + "grad_norm": 0.9239827072772366, + "learning_rate": 4.993788427008384e-06, + "loss": 0.5519, + "step": 2309 + }, + { + "epoch": 0.14066924458788782, + "grad_norm": 1.0085361471209127, + "learning_rate": 4.993782804190344e-06, + "loss": 0.5398, + "step": 2310 + }, + { + "epoch": 0.1407301403647657, + "grad_norm": 1.097176390019581, + "learning_rate": 4.9937771788316885e-06, + "loss": 0.4632, + "step": 2311 + }, + { + "epoch": 0.14079103614164357, + "grad_norm": 1.063124552236338, + "learning_rate": 4.993771550932426e-06, + "loss": 0.5004, + "step": 2312 + }, + { + "epoch": 0.14085193191852144, + "grad_norm": 0.9947474198384793, + "learning_rate": 4.993765920492559e-06, + "loss": 0.5602, + "step": 2313 + }, + { + "epoch": 0.1409128276953993, + "grad_norm": 1.1274516826916041, + "learning_rate": 4.993760287512097e-06, + "loss": 0.4232, + "step": 2314 + }, + { + "epoch": 0.14097372347227718, + "grad_norm": 1.031686007794025, + "learning_rate": 4.993754651991043e-06, + "loss": 0.4493, + "step": 2315 + }, + { + "epoch": 0.14103461924915506, + "grad_norm": 1.110107065483981, + "learning_rate": 4.9937490139294035e-06, + "loss": 0.5211, + "step": 2316 + }, + { + "epoch": 0.14109551502603296, + "grad_norm": 1.0697359658626968, + "learning_rate": 4.993743373327184e-06, + "loss": 0.581, + "step": 2317 + }, + { + "epoch": 0.14115641080291083, + "grad_norm": 1.0067639528028638, + "learning_rate": 4.993737730184391e-06, + "loss": 0.5573, + "step": 2318 + }, + { + "epoch": 0.1412173065797887, + "grad_norm": 1.008988180514407, + "learning_rate": 4.993732084501031e-06, + "loss": 0.5061, + "step": 2319 + }, + { + "epoch": 0.14127820235666658, + "grad_norm": 0.9950785556539449, + "learning_rate": 4.993726436277108e-06, + "loss": 0.5282, + "step": 2320 + }, + { + "epoch": 0.14133909813354445, + "grad_norm": 1.0552020401442412, + "learning_rate": 4.9937207855126265e-06, + "loss": 0.5209, + "step": 2321 + }, + { + "epoch": 0.14139999391042232, + "grad_norm": 1.022062730453616, + "learning_rate": 4.993715132207596e-06, + "loss": 0.5403, + "step": 2322 + }, + { + "epoch": 0.1414608896873002, + "grad_norm": 1.066826470246887, + "learning_rate": 4.99370947636202e-06, + "loss": 0.5252, + "step": 2323 + }, + { + "epoch": 0.14152178546417807, + "grad_norm": 1.0146206722887003, + "learning_rate": 4.993703817975905e-06, + "loss": 0.5434, + "step": 2324 + }, + { + "epoch": 0.14158268124105594, + "grad_norm": 1.0450933820373014, + "learning_rate": 4.993698157049256e-06, + "loss": 0.4834, + "step": 2325 + }, + { + "epoch": 0.1416435770179338, + "grad_norm": 1.2217347181648328, + "learning_rate": 4.993692493582078e-06, + "loss": 0.5068, + "step": 2326 + }, + { + "epoch": 0.1417044727948117, + "grad_norm": 1.0059795015103712, + "learning_rate": 4.99368682757438e-06, + "loss": 0.5491, + "step": 2327 + }, + { + "epoch": 0.14176536857168956, + "grad_norm": 1.1191015875721244, + "learning_rate": 4.993681159026165e-06, + "loss": 0.4863, + "step": 2328 + }, + { + "epoch": 0.14182626434856743, + "grad_norm": 1.0751260977376962, + "learning_rate": 4.993675487937439e-06, + "loss": 0.4555, + "step": 2329 + }, + { + "epoch": 0.1418871601254453, + "grad_norm": 1.0857842916268585, + "learning_rate": 4.993669814308208e-06, + "loss": 0.4655, + "step": 2330 + }, + { + "epoch": 0.14194805590232318, + "grad_norm": 1.0070496815028818, + "learning_rate": 4.993664138138478e-06, + "loss": 0.5478, + "step": 2331 + }, + { + "epoch": 0.14200895167920105, + "grad_norm": 1.0592320662670147, + "learning_rate": 4.993658459428257e-06, + "loss": 0.4991, + "step": 2332 + }, + { + "epoch": 0.14206984745607892, + "grad_norm": 1.028067058643168, + "learning_rate": 4.993652778177547e-06, + "loss": 0.5585, + "step": 2333 + }, + { + "epoch": 0.1421307432329568, + "grad_norm": 1.1208924447465654, + "learning_rate": 4.9936470943863545e-06, + "loss": 0.4553, + "step": 2334 + }, + { + "epoch": 0.14219163900983467, + "grad_norm": 1.007358838095519, + "learning_rate": 4.993641408054687e-06, + "loss": 0.5389, + "step": 2335 + }, + { + "epoch": 0.14225253478671254, + "grad_norm": 1.0863951501580171, + "learning_rate": 4.9936357191825504e-06, + "loss": 0.5372, + "step": 2336 + }, + { + "epoch": 0.14231343056359042, + "grad_norm": 1.1295852001184516, + "learning_rate": 4.9936300277699476e-06, + "loss": 0.4325, + "step": 2337 + }, + { + "epoch": 0.1423743263404683, + "grad_norm": 1.0675879583475667, + "learning_rate": 4.993624333816888e-06, + "loss": 0.5481, + "step": 2338 + }, + { + "epoch": 0.14243522211734616, + "grad_norm": 1.1247182271520402, + "learning_rate": 4.993618637323376e-06, + "loss": 0.5002, + "step": 2339 + }, + { + "epoch": 0.14249611789422403, + "grad_norm": 1.0348663390057122, + "learning_rate": 4.993612938289416e-06, + "loss": 0.5201, + "step": 2340 + }, + { + "epoch": 0.1425570136711019, + "grad_norm": 1.0914146322235103, + "learning_rate": 4.993607236715015e-06, + "loss": 0.4786, + "step": 2341 + }, + { + "epoch": 0.14261790944797978, + "grad_norm": 1.1365407053438576, + "learning_rate": 4.9936015326001796e-06, + "loss": 0.4972, + "step": 2342 + }, + { + "epoch": 0.14267880522485765, + "grad_norm": 0.9692945510241882, + "learning_rate": 4.993595825944914e-06, + "loss": 0.5385, + "step": 2343 + }, + { + "epoch": 0.14273970100173552, + "grad_norm": 0.9882400463515096, + "learning_rate": 4.993590116749225e-06, + "loss": 0.5356, + "step": 2344 + }, + { + "epoch": 0.1428005967786134, + "grad_norm": 0.9745822120780115, + "learning_rate": 4.993584405013119e-06, + "loss": 0.5518, + "step": 2345 + }, + { + "epoch": 0.14286149255549127, + "grad_norm": 1.1553747147371367, + "learning_rate": 4.9935786907366005e-06, + "loss": 0.5026, + "step": 2346 + }, + { + "epoch": 0.14292238833236914, + "grad_norm": 1.0940020912188806, + "learning_rate": 4.993572973919676e-06, + "loss": 0.5248, + "step": 2347 + }, + { + "epoch": 0.14298328410924702, + "grad_norm": 1.1382709505041846, + "learning_rate": 4.99356725456235e-06, + "loss": 0.4523, + "step": 2348 + }, + { + "epoch": 0.1430441798861249, + "grad_norm": 1.0614213452364047, + "learning_rate": 4.993561532664632e-06, + "loss": 0.5593, + "step": 2349 + }, + { + "epoch": 0.14310507566300276, + "grad_norm": 1.0630503477177273, + "learning_rate": 4.9935558082265234e-06, + "loss": 0.4686, + "step": 2350 + }, + { + "epoch": 0.14316597143988063, + "grad_norm": 1.076930553106851, + "learning_rate": 4.993550081248033e-06, + "loss": 0.5467, + "step": 2351 + }, + { + "epoch": 0.1432268672167585, + "grad_norm": 1.0300668587645017, + "learning_rate": 4.993544351729165e-06, + "loss": 0.5751, + "step": 2352 + }, + { + "epoch": 0.14328776299363638, + "grad_norm": 1.1521838063839844, + "learning_rate": 4.993538619669925e-06, + "loss": 0.5059, + "step": 2353 + }, + { + "epoch": 0.14334865877051425, + "grad_norm": 1.1104289974611699, + "learning_rate": 4.993532885070321e-06, + "loss": 0.5628, + "step": 2354 + }, + { + "epoch": 0.14340955454739213, + "grad_norm": 1.095199626544282, + "learning_rate": 4.993527147930357e-06, + "loss": 0.478, + "step": 2355 + }, + { + "epoch": 0.14347045032427, + "grad_norm": 1.0364574574690615, + "learning_rate": 4.99352140825004e-06, + "loss": 0.5247, + "step": 2356 + }, + { + "epoch": 0.14353134610114787, + "grad_norm": 1.0625363201711866, + "learning_rate": 4.993515666029375e-06, + "loss": 0.5736, + "step": 2357 + }, + { + "epoch": 0.14359224187802577, + "grad_norm": 0.974620708351207, + "learning_rate": 4.993509921268368e-06, + "loss": 0.5377, + "step": 2358 + }, + { + "epoch": 0.14365313765490365, + "grad_norm": 1.0872242034588018, + "learning_rate": 4.993504173967025e-06, + "loss": 0.4344, + "step": 2359 + }, + { + "epoch": 0.14371403343178152, + "grad_norm": 1.0752491242165478, + "learning_rate": 4.9934984241253515e-06, + "loss": 0.5186, + "step": 2360 + }, + { + "epoch": 0.1437749292086594, + "grad_norm": 1.1481579442154832, + "learning_rate": 4.993492671743354e-06, + "loss": 0.499, + "step": 2361 + }, + { + "epoch": 0.14383582498553726, + "grad_norm": 0.9972270246080913, + "learning_rate": 4.993486916821038e-06, + "loss": 0.5172, + "step": 2362 + }, + { + "epoch": 0.14389672076241514, + "grad_norm": 1.1188785802242305, + "learning_rate": 4.993481159358409e-06, + "loss": 0.4912, + "step": 2363 + }, + { + "epoch": 0.143957616539293, + "grad_norm": 1.0512862103897036, + "learning_rate": 4.993475399355473e-06, + "loss": 0.5306, + "step": 2364 + }, + { + "epoch": 0.14401851231617088, + "grad_norm": 0.9972637184289976, + "learning_rate": 4.993469636812237e-06, + "loss": 0.5084, + "step": 2365 + }, + { + "epoch": 0.14407940809304876, + "grad_norm": 1.0853543178101301, + "learning_rate": 4.9934638717287055e-06, + "loss": 0.5451, + "step": 2366 + }, + { + "epoch": 0.14414030386992663, + "grad_norm": 1.0948603859011456, + "learning_rate": 4.993458104104885e-06, + "loss": 0.5418, + "step": 2367 + }, + { + "epoch": 0.1442011996468045, + "grad_norm": 1.0929121270048583, + "learning_rate": 4.99345233394078e-06, + "loss": 0.5004, + "step": 2368 + }, + { + "epoch": 0.14426209542368237, + "grad_norm": 1.0930967605587678, + "learning_rate": 4.993446561236399e-06, + "loss": 0.6018, + "step": 2369 + }, + { + "epoch": 0.14432299120056025, + "grad_norm": 1.0922590804208276, + "learning_rate": 4.993440785991746e-06, + "loss": 0.4809, + "step": 2370 + }, + { + "epoch": 0.14438388697743812, + "grad_norm": 1.1252625457498189, + "learning_rate": 4.993435008206829e-06, + "loss": 0.5289, + "step": 2371 + }, + { + "epoch": 0.144444782754316, + "grad_norm": 1.12674063917501, + "learning_rate": 4.99342922788165e-06, + "loss": 0.4796, + "step": 2372 + }, + { + "epoch": 0.14450567853119387, + "grad_norm": 1.1555489980330933, + "learning_rate": 4.993423445016218e-06, + "loss": 0.5251, + "step": 2373 + }, + { + "epoch": 0.14456657430807174, + "grad_norm": 1.0220256174341484, + "learning_rate": 4.993417659610538e-06, + "loss": 0.4845, + "step": 2374 + }, + { + "epoch": 0.1446274700849496, + "grad_norm": 1.1466744679088448, + "learning_rate": 4.993411871664615e-06, + "loss": 0.5126, + "step": 2375 + }, + { + "epoch": 0.14468836586182748, + "grad_norm": 1.065848561812752, + "learning_rate": 4.993406081178457e-06, + "loss": 0.4939, + "step": 2376 + }, + { + "epoch": 0.14474926163870536, + "grad_norm": 1.0788440453774666, + "learning_rate": 4.993400288152069e-06, + "loss": 0.5488, + "step": 2377 + }, + { + "epoch": 0.14481015741558323, + "grad_norm": 1.076695481119285, + "learning_rate": 4.993394492585456e-06, + "loss": 0.4914, + "step": 2378 + }, + { + "epoch": 0.1448710531924611, + "grad_norm": 1.060220931966688, + "learning_rate": 4.993388694478624e-06, + "loss": 0.5748, + "step": 2379 + }, + { + "epoch": 0.14493194896933898, + "grad_norm": 0.9816014871226806, + "learning_rate": 4.9933828938315805e-06, + "loss": 0.5379, + "step": 2380 + }, + { + "epoch": 0.14499284474621685, + "grad_norm": 0.965153066733068, + "learning_rate": 4.9933770906443295e-06, + "loss": 0.5601, + "step": 2381 + }, + { + "epoch": 0.14505374052309472, + "grad_norm": 1.0029131845115766, + "learning_rate": 4.993371284916878e-06, + "loss": 0.551, + "step": 2382 + }, + { + "epoch": 0.1451146362999726, + "grad_norm": 1.1688783911050467, + "learning_rate": 4.993365476649232e-06, + "loss": 0.4796, + "step": 2383 + }, + { + "epoch": 0.14517553207685047, + "grad_norm": 1.0807386249572402, + "learning_rate": 4.993359665841398e-06, + "loss": 0.4784, + "step": 2384 + }, + { + "epoch": 0.14523642785372834, + "grad_norm": 0.9498667898345313, + "learning_rate": 4.993353852493379e-06, + "loss": 0.5675, + "step": 2385 + }, + { + "epoch": 0.1452973236306062, + "grad_norm": 1.0938592044627706, + "learning_rate": 4.993348036605183e-06, + "loss": 0.4748, + "step": 2386 + }, + { + "epoch": 0.14535821940748408, + "grad_norm": 0.9500347280712079, + "learning_rate": 4.993342218176818e-06, + "loss": 0.5606, + "step": 2387 + }, + { + "epoch": 0.14541911518436196, + "grad_norm": 1.0485830315338138, + "learning_rate": 4.993336397208286e-06, + "loss": 0.556, + "step": 2388 + }, + { + "epoch": 0.14548001096123983, + "grad_norm": 1.2524629065982562, + "learning_rate": 4.993330573699595e-06, + "loss": 0.4422, + "step": 2389 + }, + { + "epoch": 0.1455409067381177, + "grad_norm": 0.946924728933562, + "learning_rate": 4.993324747650751e-06, + "loss": 0.5646, + "step": 2390 + }, + { + "epoch": 0.14560180251499558, + "grad_norm": 1.0359521592992935, + "learning_rate": 4.9933189190617595e-06, + "loss": 0.5045, + "step": 2391 + }, + { + "epoch": 0.14566269829187345, + "grad_norm": 1.0053404541657602, + "learning_rate": 4.993313087932626e-06, + "loss": 0.5589, + "step": 2392 + }, + { + "epoch": 0.14572359406875132, + "grad_norm": 1.0856856816627114, + "learning_rate": 4.993307254263357e-06, + "loss": 0.4947, + "step": 2393 + }, + { + "epoch": 0.1457844898456292, + "grad_norm": 1.0863461269274963, + "learning_rate": 4.993301418053959e-06, + "loss": 0.5482, + "step": 2394 + }, + { + "epoch": 0.14584538562250707, + "grad_norm": 1.0476786463422072, + "learning_rate": 4.993295579304438e-06, + "loss": 0.56, + "step": 2395 + }, + { + "epoch": 0.14590628139938494, + "grad_norm": 1.0880962393390015, + "learning_rate": 4.9932897380147985e-06, + "loss": 0.4675, + "step": 2396 + }, + { + "epoch": 0.1459671771762628, + "grad_norm": 1.0584923937312651, + "learning_rate": 4.993283894185047e-06, + "loss": 0.5128, + "step": 2397 + }, + { + "epoch": 0.1460280729531407, + "grad_norm": 1.120969329324478, + "learning_rate": 4.99327804781519e-06, + "loss": 0.5451, + "step": 2398 + }, + { + "epoch": 0.1460889687300186, + "grad_norm": 1.093557523170927, + "learning_rate": 4.993272198905233e-06, + "loss": 0.5303, + "step": 2399 + }, + { + "epoch": 0.14614986450689646, + "grad_norm": 1.0502425948044853, + "learning_rate": 4.993266347455182e-06, + "loss": 0.4937, + "step": 2400 + }, + { + "epoch": 0.14621076028377433, + "grad_norm": 0.9702904871939518, + "learning_rate": 4.993260493465044e-06, + "loss": 0.5907, + "step": 2401 + }, + { + "epoch": 0.1462716560606522, + "grad_norm": 1.0686083918491232, + "learning_rate": 4.993254636934823e-06, + "loss": 0.5165, + "step": 2402 + }, + { + "epoch": 0.14633255183753008, + "grad_norm": 0.9680413528702653, + "learning_rate": 4.993248777864526e-06, + "loss": 0.5622, + "step": 2403 + }, + { + "epoch": 0.14639344761440795, + "grad_norm": 1.0304545227048931, + "learning_rate": 4.993242916254159e-06, + "loss": 0.5713, + "step": 2404 + }, + { + "epoch": 0.14645434339128582, + "grad_norm": 1.043803153308817, + "learning_rate": 4.9932370521037286e-06, + "loss": 0.4982, + "step": 2405 + }, + { + "epoch": 0.1465152391681637, + "grad_norm": 1.069432707114138, + "learning_rate": 4.99323118541324e-06, + "loss": 0.5528, + "step": 2406 + }, + { + "epoch": 0.14657613494504157, + "grad_norm": 0.982113011366172, + "learning_rate": 4.9932253161827e-06, + "loss": 0.5744, + "step": 2407 + }, + { + "epoch": 0.14663703072191944, + "grad_norm": 0.948015472300026, + "learning_rate": 4.993219444412112e-06, + "loss": 0.5459, + "step": 2408 + }, + { + "epoch": 0.14669792649879732, + "grad_norm": 0.9959103962280902, + "learning_rate": 4.993213570101485e-06, + "loss": 0.5335, + "step": 2409 + }, + { + "epoch": 0.1467588222756752, + "grad_norm": 1.0231955862413369, + "learning_rate": 4.993207693250824e-06, + "loss": 0.5685, + "step": 2410 + }, + { + "epoch": 0.14681971805255306, + "grad_norm": 1.066536718206446, + "learning_rate": 4.9932018138601344e-06, + "loss": 0.4669, + "step": 2411 + }, + { + "epoch": 0.14688061382943093, + "grad_norm": 1.053354254607229, + "learning_rate": 4.993195931929423e-06, + "loss": 0.5587, + "step": 2412 + }, + { + "epoch": 0.1469415096063088, + "grad_norm": 1.0390917809993145, + "learning_rate": 4.9931900474586945e-06, + "loss": 0.4731, + "step": 2413 + }, + { + "epoch": 0.14700240538318668, + "grad_norm": 1.1223803730347472, + "learning_rate": 4.993184160447957e-06, + "loss": 0.5536, + "step": 2414 + }, + { + "epoch": 0.14706330116006455, + "grad_norm": 1.0879923005157721, + "learning_rate": 4.993178270897215e-06, + "loss": 0.5348, + "step": 2415 + }, + { + "epoch": 0.14712419693694243, + "grad_norm": 1.0793932599669671, + "learning_rate": 4.993172378806475e-06, + "loss": 0.5612, + "step": 2416 + }, + { + "epoch": 0.1471850927138203, + "grad_norm": 1.233875916016436, + "learning_rate": 4.993166484175742e-06, + "loss": 0.4978, + "step": 2417 + }, + { + "epoch": 0.14724598849069817, + "grad_norm": 1.0625996387109087, + "learning_rate": 4.993160587005023e-06, + "loss": 0.5088, + "step": 2418 + }, + { + "epoch": 0.14730688426757604, + "grad_norm": 1.0559561954545595, + "learning_rate": 4.993154687294324e-06, + "loss": 0.5775, + "step": 2419 + }, + { + "epoch": 0.14736778004445392, + "grad_norm": 1.037332131624886, + "learning_rate": 4.993148785043651e-06, + "loss": 0.4916, + "step": 2420 + }, + { + "epoch": 0.1474286758213318, + "grad_norm": 1.0388196415843405, + "learning_rate": 4.99314288025301e-06, + "loss": 0.5671, + "step": 2421 + }, + { + "epoch": 0.14748957159820966, + "grad_norm": 1.0463771197268767, + "learning_rate": 4.9931369729224066e-06, + "loss": 0.518, + "step": 2422 + }, + { + "epoch": 0.14755046737508754, + "grad_norm": 1.1414548943852438, + "learning_rate": 4.993131063051847e-06, + "loss": 0.5487, + "step": 2423 + }, + { + "epoch": 0.1476113631519654, + "grad_norm": 1.0671214966360865, + "learning_rate": 4.993125150641338e-06, + "loss": 0.5166, + "step": 2424 + }, + { + "epoch": 0.14767225892884328, + "grad_norm": 1.0897371810895793, + "learning_rate": 4.993119235690884e-06, + "loss": 0.4933, + "step": 2425 + }, + { + "epoch": 0.14773315470572115, + "grad_norm": 0.9775606709526539, + "learning_rate": 4.993113318200492e-06, + "loss": 0.5936, + "step": 2426 + }, + { + "epoch": 0.14779405048259903, + "grad_norm": 1.069349619975326, + "learning_rate": 4.9931073981701685e-06, + "loss": 0.5661, + "step": 2427 + }, + { + "epoch": 0.1478549462594769, + "grad_norm": 0.976201266616463, + "learning_rate": 4.993101475599919e-06, + "loss": 0.5139, + "step": 2428 + }, + { + "epoch": 0.14791584203635477, + "grad_norm": 0.9527347441128732, + "learning_rate": 4.993095550489749e-06, + "loss": 0.5806, + "step": 2429 + }, + { + "epoch": 0.14797673781323264, + "grad_norm": 1.0900130031159043, + "learning_rate": 4.993089622839667e-06, + "loss": 0.5095, + "step": 2430 + }, + { + "epoch": 0.14803763359011052, + "grad_norm": 1.134999491442151, + "learning_rate": 4.9930836926496745e-06, + "loss": 0.5313, + "step": 2431 + }, + { + "epoch": 0.1480985293669884, + "grad_norm": 1.1213965300225313, + "learning_rate": 4.993077759919781e-06, + "loss": 0.5014, + "step": 2432 + }, + { + "epoch": 0.14815942514386626, + "grad_norm": 1.0267060867109226, + "learning_rate": 4.993071824649992e-06, + "loss": 0.4983, + "step": 2433 + }, + { + "epoch": 0.14822032092074414, + "grad_norm": 1.035208642743681, + "learning_rate": 4.993065886840314e-06, + "loss": 0.5816, + "step": 2434 + }, + { + "epoch": 0.148281216697622, + "grad_norm": 1.1352572574375, + "learning_rate": 4.993059946490751e-06, + "loss": 0.5051, + "step": 2435 + }, + { + "epoch": 0.14834211247449988, + "grad_norm": 1.0674972479211104, + "learning_rate": 4.993054003601311e-06, + "loss": 0.4849, + "step": 2436 + }, + { + "epoch": 0.14840300825137775, + "grad_norm": 1.01487254661745, + "learning_rate": 4.993048058171999e-06, + "loss": 0.5528, + "step": 2437 + }, + { + "epoch": 0.14846390402825563, + "grad_norm": 1.0361541003061985, + "learning_rate": 4.993042110202822e-06, + "loss": 0.5568, + "step": 2438 + }, + { + "epoch": 0.1485247998051335, + "grad_norm": 1.0328325003157501, + "learning_rate": 4.993036159693785e-06, + "loss": 0.6495, + "step": 2439 + }, + { + "epoch": 0.1485856955820114, + "grad_norm": 1.0453640868543714, + "learning_rate": 4.993030206644895e-06, + "loss": 0.5215, + "step": 2440 + }, + { + "epoch": 0.14864659135888927, + "grad_norm": 1.0563451216546569, + "learning_rate": 4.993024251056158e-06, + "loss": 0.5155, + "step": 2441 + }, + { + "epoch": 0.14870748713576715, + "grad_norm": 1.0237300662612119, + "learning_rate": 4.993018292927579e-06, + "loss": 0.5449, + "step": 2442 + }, + { + "epoch": 0.14876838291264502, + "grad_norm": 1.0714902216455926, + "learning_rate": 4.993012332259165e-06, + "loss": 0.5511, + "step": 2443 + }, + { + "epoch": 0.1488292786895229, + "grad_norm": 1.025165719635034, + "learning_rate": 4.993006369050922e-06, + "loss": 0.497, + "step": 2444 + }, + { + "epoch": 0.14889017446640077, + "grad_norm": 1.032877692600213, + "learning_rate": 4.993000403302856e-06, + "loss": 0.544, + "step": 2445 + }, + { + "epoch": 0.14895107024327864, + "grad_norm": 0.9990060427900036, + "learning_rate": 4.992994435014973e-06, + "loss": 0.488, + "step": 2446 + }, + { + "epoch": 0.1490119660201565, + "grad_norm": 1.1166430786955657, + "learning_rate": 4.99298846418728e-06, + "loss": 0.5158, + "step": 2447 + }, + { + "epoch": 0.14907286179703438, + "grad_norm": 1.048825958699336, + "learning_rate": 4.99298249081978e-06, + "loss": 0.4955, + "step": 2448 + }, + { + "epoch": 0.14913375757391226, + "grad_norm": 0.9883260834324052, + "learning_rate": 4.992976514912483e-06, + "loss": 0.578, + "step": 2449 + }, + { + "epoch": 0.14919465335079013, + "grad_norm": 1.1434749054841808, + "learning_rate": 4.992970536465392e-06, + "loss": 0.4986, + "step": 2450 + }, + { + "epoch": 0.149255549127668, + "grad_norm": 1.1316201072352132, + "learning_rate": 4.992964555478516e-06, + "loss": 0.6303, + "step": 2451 + }, + { + "epoch": 0.14931644490454588, + "grad_norm": 1.0920252308786944, + "learning_rate": 4.992958571951858e-06, + "loss": 0.5403, + "step": 2452 + }, + { + "epoch": 0.14937734068142375, + "grad_norm": 1.0756526535815185, + "learning_rate": 4.992952585885427e-06, + "loss": 0.5435, + "step": 2453 + }, + { + "epoch": 0.14943823645830162, + "grad_norm": 1.0269014010008646, + "learning_rate": 4.992946597279227e-06, + "loss": 0.5172, + "step": 2454 + }, + { + "epoch": 0.1494991322351795, + "grad_norm": 1.0660844157313074, + "learning_rate": 4.992940606133266e-06, + "loss": 0.5746, + "step": 2455 + }, + { + "epoch": 0.14956002801205737, + "grad_norm": 0.9649131449951128, + "learning_rate": 4.9929346124475475e-06, + "loss": 0.5877, + "step": 2456 + }, + { + "epoch": 0.14962092378893524, + "grad_norm": 0.9901239902888693, + "learning_rate": 4.992928616222079e-06, + "loss": 0.4975, + "step": 2457 + }, + { + "epoch": 0.1496818195658131, + "grad_norm": 1.1352618744043448, + "learning_rate": 4.992922617456867e-06, + "loss": 0.528, + "step": 2458 + }, + { + "epoch": 0.14974271534269099, + "grad_norm": 1.1262653441503034, + "learning_rate": 4.992916616151918e-06, + "loss": 0.5321, + "step": 2459 + }, + { + "epoch": 0.14980361111956886, + "grad_norm": 0.9878255365605337, + "learning_rate": 4.992910612307237e-06, + "loss": 0.5467, + "step": 2460 + }, + { + "epoch": 0.14986450689644673, + "grad_norm": 1.00583727113917, + "learning_rate": 4.99290460592283e-06, + "loss": 0.5541, + "step": 2461 + }, + { + "epoch": 0.1499254026733246, + "grad_norm": 1.0672102719106524, + "learning_rate": 4.992898596998704e-06, + "loss": 0.5191, + "step": 2462 + }, + { + "epoch": 0.14998629845020248, + "grad_norm": 1.1333136712313876, + "learning_rate": 4.992892585534864e-06, + "loss": 0.4772, + "step": 2463 + }, + { + "epoch": 0.15004719422708035, + "grad_norm": 1.0320423999642852, + "learning_rate": 4.992886571531318e-06, + "loss": 0.5101, + "step": 2464 + }, + { + "epoch": 0.15010809000395822, + "grad_norm": 1.032037320200048, + "learning_rate": 4.992880554988071e-06, + "loss": 0.4269, + "step": 2465 + }, + { + "epoch": 0.1501689857808361, + "grad_norm": 1.0667553558262541, + "learning_rate": 4.992874535905128e-06, + "loss": 0.577, + "step": 2466 + }, + { + "epoch": 0.15022988155771397, + "grad_norm": 1.092012637997363, + "learning_rate": 4.992868514282497e-06, + "loss": 0.5242, + "step": 2467 + }, + { + "epoch": 0.15029077733459184, + "grad_norm": 1.0172208349242822, + "learning_rate": 4.992862490120183e-06, + "loss": 0.5025, + "step": 2468 + }, + { + "epoch": 0.1503516731114697, + "grad_norm": 1.093021729441943, + "learning_rate": 4.992856463418193e-06, + "loss": 0.502, + "step": 2469 + }, + { + "epoch": 0.1504125688883476, + "grad_norm": 0.9618819281751945, + "learning_rate": 4.992850434176532e-06, + "loss": 0.5306, + "step": 2470 + }, + { + "epoch": 0.15047346466522546, + "grad_norm": 1.106887959426957, + "learning_rate": 4.992844402395208e-06, + "loss": 0.5163, + "step": 2471 + }, + { + "epoch": 0.15053436044210333, + "grad_norm": 0.982383622057061, + "learning_rate": 4.992838368074226e-06, + "loss": 0.5624, + "step": 2472 + }, + { + "epoch": 0.1505952562189812, + "grad_norm": 1.0403102931261816, + "learning_rate": 4.992832331213591e-06, + "loss": 0.5651, + "step": 2473 + }, + { + "epoch": 0.15065615199585908, + "grad_norm": 1.0402060574357985, + "learning_rate": 4.99282629181331e-06, + "loss": 0.5435, + "step": 2474 + }, + { + "epoch": 0.15071704777273695, + "grad_norm": 1.110388612043396, + "learning_rate": 4.99282024987339e-06, + "loss": 0.5307, + "step": 2475 + }, + { + "epoch": 0.15077794354961482, + "grad_norm": 1.121151907849297, + "learning_rate": 4.992814205393837e-06, + "loss": 0.508, + "step": 2476 + }, + { + "epoch": 0.1508388393264927, + "grad_norm": 1.0728818738651096, + "learning_rate": 4.992808158374655e-06, + "loss": 0.4763, + "step": 2477 + }, + { + "epoch": 0.15089973510337057, + "grad_norm": 1.0938761867664655, + "learning_rate": 4.992802108815854e-06, + "loss": 0.6062, + "step": 2478 + }, + { + "epoch": 0.15096063088024844, + "grad_norm": 1.0590041496222227, + "learning_rate": 4.992796056717437e-06, + "loss": 0.5764, + "step": 2479 + }, + { + "epoch": 0.15102152665712631, + "grad_norm": 1.044602030475801, + "learning_rate": 4.992790002079411e-06, + "loss": 0.5007, + "step": 2480 + }, + { + "epoch": 0.15108242243400422, + "grad_norm": 1.0178898155374587, + "learning_rate": 4.9927839449017835e-06, + "loss": 0.5993, + "step": 2481 + }, + { + "epoch": 0.1511433182108821, + "grad_norm": 0.9751973489666135, + "learning_rate": 4.992777885184558e-06, + "loss": 0.4827, + "step": 2482 + }, + { + "epoch": 0.15120421398775996, + "grad_norm": 1.1577146032361698, + "learning_rate": 4.992771822927744e-06, + "loss": 0.4997, + "step": 2483 + }, + { + "epoch": 0.15126510976463783, + "grad_norm": 1.0432023249853397, + "learning_rate": 4.992765758131344e-06, + "loss": 0.5197, + "step": 2484 + }, + { + "epoch": 0.1513260055415157, + "grad_norm": 0.9735048674997951, + "learning_rate": 4.9927596907953664e-06, + "loss": 0.5356, + "step": 2485 + }, + { + "epoch": 0.15138690131839358, + "grad_norm": 1.0681464365765767, + "learning_rate": 4.992753620919818e-06, + "loss": 0.546, + "step": 2486 + }, + { + "epoch": 0.15144779709527145, + "grad_norm": 1.098385213390006, + "learning_rate": 4.9927475485047035e-06, + "loss": 0.526, + "step": 2487 + }, + { + "epoch": 0.15150869287214933, + "grad_norm": 1.1280583360659087, + "learning_rate": 4.99274147355003e-06, + "loss": 0.4457, + "step": 2488 + }, + { + "epoch": 0.1515695886490272, + "grad_norm": 1.1694876693934355, + "learning_rate": 4.992735396055803e-06, + "loss": 0.492, + "step": 2489 + }, + { + "epoch": 0.15163048442590507, + "grad_norm": 1.1004920767149702, + "learning_rate": 4.992729316022029e-06, + "loss": 0.4974, + "step": 2490 + }, + { + "epoch": 0.15169138020278294, + "grad_norm": 1.064046406627593, + "learning_rate": 4.992723233448714e-06, + "loss": 0.5268, + "step": 2491 + }, + { + "epoch": 0.15175227597966082, + "grad_norm": 1.112763666876491, + "learning_rate": 4.992717148335865e-06, + "loss": 0.5091, + "step": 2492 + }, + { + "epoch": 0.1518131717565387, + "grad_norm": 0.9864092858245589, + "learning_rate": 4.992711060683487e-06, + "loss": 0.5555, + "step": 2493 + }, + { + "epoch": 0.15187406753341656, + "grad_norm": 1.0287161857962703, + "learning_rate": 4.992704970491588e-06, + "loss": 0.4775, + "step": 2494 + }, + { + "epoch": 0.15193496331029444, + "grad_norm": 1.0480102106223532, + "learning_rate": 4.992698877760171e-06, + "loss": 0.5175, + "step": 2495 + }, + { + "epoch": 0.1519958590871723, + "grad_norm": 1.1190219083113706, + "learning_rate": 4.992692782489245e-06, + "loss": 0.5222, + "step": 2496 + }, + { + "epoch": 0.15205675486405018, + "grad_norm": 1.2428700734218403, + "learning_rate": 4.9926866846788165e-06, + "loss": 0.5114, + "step": 2497 + }, + { + "epoch": 0.15211765064092805, + "grad_norm": 1.0423202615012537, + "learning_rate": 4.99268058432889e-06, + "loss": 0.5581, + "step": 2498 + }, + { + "epoch": 0.15217854641780593, + "grad_norm": 1.0721449482053398, + "learning_rate": 4.992674481439473e-06, + "loss": 0.5532, + "step": 2499 + }, + { + "epoch": 0.1522394421946838, + "grad_norm": 1.1262636290107442, + "learning_rate": 4.992668376010569e-06, + "loss": 0.5315, + "step": 2500 + }, + { + "epoch": 0.15230033797156167, + "grad_norm": 1.0187856319459214, + "learning_rate": 4.992662268042188e-06, + "loss": 0.5223, + "step": 2501 + }, + { + "epoch": 0.15236123374843955, + "grad_norm": 1.0719507550614693, + "learning_rate": 4.9926561575343335e-06, + "loss": 0.5792, + "step": 2502 + }, + { + "epoch": 0.15242212952531742, + "grad_norm": 0.9881962079177027, + "learning_rate": 4.9926500444870136e-06, + "loss": 0.5378, + "step": 2503 + }, + { + "epoch": 0.1524830253021953, + "grad_norm": 1.0625258216472449, + "learning_rate": 4.992643928900233e-06, + "loss": 0.544, + "step": 2504 + }, + { + "epoch": 0.15254392107907316, + "grad_norm": 1.0548949572569595, + "learning_rate": 4.992637810773999e-06, + "loss": 0.5318, + "step": 2505 + }, + { + "epoch": 0.15260481685595104, + "grad_norm": 1.0811896301842863, + "learning_rate": 4.9926316901083175e-06, + "loss": 0.5338, + "step": 2506 + }, + { + "epoch": 0.1526657126328289, + "grad_norm": 0.9561110191823566, + "learning_rate": 4.992625566903194e-06, + "loss": 0.5471, + "step": 2507 + }, + { + "epoch": 0.15272660840970678, + "grad_norm": 1.0155954245438423, + "learning_rate": 4.992619441158636e-06, + "loss": 0.4894, + "step": 2508 + }, + { + "epoch": 0.15278750418658466, + "grad_norm": 1.0659160276622512, + "learning_rate": 4.992613312874649e-06, + "loss": 0.4838, + "step": 2509 + }, + { + "epoch": 0.15284839996346253, + "grad_norm": 1.0779526819167673, + "learning_rate": 4.99260718205124e-06, + "loss": 0.5091, + "step": 2510 + }, + { + "epoch": 0.1529092957403404, + "grad_norm": 1.082354512442352, + "learning_rate": 4.992601048688413e-06, + "loss": 0.4948, + "step": 2511 + }, + { + "epoch": 0.15297019151721827, + "grad_norm": 0.9896906575972525, + "learning_rate": 4.9925949127861775e-06, + "loss": 0.4989, + "step": 2512 + }, + { + "epoch": 0.15303108729409615, + "grad_norm": 1.1777369848685637, + "learning_rate": 4.992588774344538e-06, + "loss": 0.518, + "step": 2513 + }, + { + "epoch": 0.15309198307097402, + "grad_norm": 1.0860356325719565, + "learning_rate": 4.992582633363499e-06, + "loss": 0.549, + "step": 2514 + }, + { + "epoch": 0.1531528788478519, + "grad_norm": 0.9436880425522627, + "learning_rate": 4.9925764898430705e-06, + "loss": 0.523, + "step": 2515 + }, + { + "epoch": 0.15321377462472976, + "grad_norm": 1.0766070975103197, + "learning_rate": 4.992570343783256e-06, + "loss": 0.5447, + "step": 2516 + }, + { + "epoch": 0.15327467040160764, + "grad_norm": 1.1094380687022705, + "learning_rate": 4.992564195184065e-06, + "loss": 0.5535, + "step": 2517 + }, + { + "epoch": 0.1533355661784855, + "grad_norm": 1.0195847645821856, + "learning_rate": 4.9925580440454985e-06, + "loss": 0.6323, + "step": 2518 + }, + { + "epoch": 0.15339646195536338, + "grad_norm": 1.0541380315256943, + "learning_rate": 4.992551890367567e-06, + "loss": 0.49, + "step": 2519 + }, + { + "epoch": 0.15345735773224126, + "grad_norm": 1.0225681251169028, + "learning_rate": 4.992545734150275e-06, + "loss": 0.4918, + "step": 2520 + }, + { + "epoch": 0.15351825350911913, + "grad_norm": 1.0734081397454152, + "learning_rate": 4.992539575393629e-06, + "loss": 0.5726, + "step": 2521 + }, + { + "epoch": 0.15357914928599703, + "grad_norm": 1.0044330366259513, + "learning_rate": 4.992533414097635e-06, + "loss": 0.4881, + "step": 2522 + }, + { + "epoch": 0.1536400450628749, + "grad_norm": 1.1630243081646932, + "learning_rate": 4.992527250262301e-06, + "loss": 0.4608, + "step": 2523 + }, + { + "epoch": 0.15370094083975278, + "grad_norm": 0.9261010707108193, + "learning_rate": 4.9925210838876325e-06, + "loss": 0.5648, + "step": 2524 + }, + { + "epoch": 0.15376183661663065, + "grad_norm": 0.9627797408671431, + "learning_rate": 4.9925149149736345e-06, + "loss": 0.5485, + "step": 2525 + }, + { + "epoch": 0.15382273239350852, + "grad_norm": 0.9793334308891601, + "learning_rate": 4.992508743520314e-06, + "loss": 0.5171, + "step": 2526 + }, + { + "epoch": 0.1538836281703864, + "grad_norm": 1.0852135160491876, + "learning_rate": 4.992502569527677e-06, + "loss": 0.4797, + "step": 2527 + }, + { + "epoch": 0.15394452394726427, + "grad_norm": 1.019968434033996, + "learning_rate": 4.992496392995732e-06, + "loss": 0.5475, + "step": 2528 + }, + { + "epoch": 0.15400541972414214, + "grad_norm": 1.0800289435556996, + "learning_rate": 4.9924902139244814e-06, + "loss": 0.5104, + "step": 2529 + }, + { + "epoch": 0.15406631550102, + "grad_norm": 1.0117880243081294, + "learning_rate": 4.992484032313935e-06, + "loss": 0.5786, + "step": 2530 + }, + { + "epoch": 0.15412721127789789, + "grad_norm": 1.035741081241186, + "learning_rate": 4.992477848164097e-06, + "loss": 0.4681, + "step": 2531 + }, + { + "epoch": 0.15418810705477576, + "grad_norm": 1.0088924610778338, + "learning_rate": 4.992471661474975e-06, + "loss": 0.5138, + "step": 2532 + }, + { + "epoch": 0.15424900283165363, + "grad_norm": 1.075970013128953, + "learning_rate": 4.992465472246574e-06, + "loss": 0.5028, + "step": 2533 + }, + { + "epoch": 0.1543098986085315, + "grad_norm": 1.003491021549956, + "learning_rate": 4.9924592804789e-06, + "loss": 0.529, + "step": 2534 + }, + { + "epoch": 0.15437079438540938, + "grad_norm": 1.0370871050083033, + "learning_rate": 4.9924530861719624e-06, + "loss": 0.5118, + "step": 2535 + }, + { + "epoch": 0.15443169016228725, + "grad_norm": 1.07777999667459, + "learning_rate": 4.992446889325765e-06, + "loss": 0.5183, + "step": 2536 + }, + { + "epoch": 0.15449258593916512, + "grad_norm": 1.1193204467280498, + "learning_rate": 4.992440689940314e-06, + "loss": 0.5878, + "step": 2537 + }, + { + "epoch": 0.154553481716043, + "grad_norm": 1.1060728951722227, + "learning_rate": 4.992434488015616e-06, + "loss": 0.4957, + "step": 2538 + }, + { + "epoch": 0.15461437749292087, + "grad_norm": 1.0758463460792207, + "learning_rate": 4.992428283551678e-06, + "loss": 0.5201, + "step": 2539 + }, + { + "epoch": 0.15467527326979874, + "grad_norm": 1.1081847140291947, + "learning_rate": 4.992422076548505e-06, + "loss": 0.5101, + "step": 2540 + }, + { + "epoch": 0.1547361690466766, + "grad_norm": 1.1760266407722622, + "learning_rate": 4.992415867006105e-06, + "loss": 0.4355, + "step": 2541 + }, + { + "epoch": 0.1547970648235545, + "grad_norm": 1.0871465585571198, + "learning_rate": 4.9924096549244834e-06, + "loss": 0.472, + "step": 2542 + }, + { + "epoch": 0.15485796060043236, + "grad_norm": 1.0648975519323127, + "learning_rate": 4.992403440303647e-06, + "loss": 0.5403, + "step": 2543 + }, + { + "epoch": 0.15491885637731023, + "grad_norm": 1.1122335985660812, + "learning_rate": 4.992397223143602e-06, + "loss": 0.592, + "step": 2544 + }, + { + "epoch": 0.1549797521541881, + "grad_norm": 1.0237533690228025, + "learning_rate": 4.992391003444354e-06, + "loss": 0.5106, + "step": 2545 + }, + { + "epoch": 0.15504064793106598, + "grad_norm": 0.9556372737748862, + "learning_rate": 4.99238478120591e-06, + "loss": 0.5168, + "step": 2546 + }, + { + "epoch": 0.15510154370794385, + "grad_norm": 1.0952897003466284, + "learning_rate": 4.992378556428276e-06, + "loss": 0.4525, + "step": 2547 + }, + { + "epoch": 0.15516243948482172, + "grad_norm": 0.9950723457331299, + "learning_rate": 4.992372329111459e-06, + "loss": 0.5269, + "step": 2548 + }, + { + "epoch": 0.1552233352616996, + "grad_norm": 0.9811003921136067, + "learning_rate": 4.992366099255465e-06, + "loss": 0.5419, + "step": 2549 + }, + { + "epoch": 0.15528423103857747, + "grad_norm": 1.0121596152535999, + "learning_rate": 4.992359866860299e-06, + "loss": 0.5193, + "step": 2550 + }, + { + "epoch": 0.15534512681545534, + "grad_norm": 1.0577777293560058, + "learning_rate": 4.9923536319259695e-06, + "loss": 0.5061, + "step": 2551 + }, + { + "epoch": 0.15540602259233322, + "grad_norm": 1.0455974920039641, + "learning_rate": 4.992347394452482e-06, + "loss": 0.5552, + "step": 2552 + }, + { + "epoch": 0.1554669183692111, + "grad_norm": 1.0854764014922278, + "learning_rate": 4.992341154439843e-06, + "loss": 0.5231, + "step": 2553 + }, + { + "epoch": 0.15552781414608896, + "grad_norm": 1.0276154251905032, + "learning_rate": 4.992334911888058e-06, + "loss": 0.571, + "step": 2554 + }, + { + "epoch": 0.15558870992296683, + "grad_norm": 0.9769381279151127, + "learning_rate": 4.9923286667971334e-06, + "loss": 0.5758, + "step": 2555 + }, + { + "epoch": 0.1556496056998447, + "grad_norm": 1.156751611992498, + "learning_rate": 4.992322419167079e-06, + "loss": 0.53, + "step": 2556 + }, + { + "epoch": 0.15571050147672258, + "grad_norm": 1.092555771697481, + "learning_rate": 4.992316168997895e-06, + "loss": 0.5034, + "step": 2557 + }, + { + "epoch": 0.15577139725360045, + "grad_norm": 1.143547368836882, + "learning_rate": 4.9923099162895925e-06, + "loss": 0.4808, + "step": 2558 + }, + { + "epoch": 0.15583229303047832, + "grad_norm": 1.0597575892014566, + "learning_rate": 4.992303661042177e-06, + "loss": 0.5999, + "step": 2559 + }, + { + "epoch": 0.1558931888073562, + "grad_norm": 1.0378157083822288, + "learning_rate": 4.992297403255654e-06, + "loss": 0.5859, + "step": 2560 + }, + { + "epoch": 0.15595408458423407, + "grad_norm": 1.0175998278745078, + "learning_rate": 4.992291142930029e-06, + "loss": 0.4497, + "step": 2561 + }, + { + "epoch": 0.15601498036111194, + "grad_norm": 1.0020800974279973, + "learning_rate": 4.992284880065311e-06, + "loss": 0.5417, + "step": 2562 + }, + { + "epoch": 0.15607587613798984, + "grad_norm": 1.194784201679778, + "learning_rate": 4.992278614661505e-06, + "loss": 0.4644, + "step": 2563 + }, + { + "epoch": 0.15613677191486772, + "grad_norm": 1.044937883008089, + "learning_rate": 4.992272346718617e-06, + "loss": 0.4942, + "step": 2564 + }, + { + "epoch": 0.1561976676917456, + "grad_norm": 0.9794253186809694, + "learning_rate": 4.992266076236654e-06, + "loss": 0.512, + "step": 2565 + }, + { + "epoch": 0.15625856346862346, + "grad_norm": 1.1301048881048834, + "learning_rate": 4.992259803215622e-06, + "loss": 0.446, + "step": 2566 + }, + { + "epoch": 0.15631945924550134, + "grad_norm": 1.0321581279944596, + "learning_rate": 4.992253527655527e-06, + "loss": 0.5349, + "step": 2567 + }, + { + "epoch": 0.1563803550223792, + "grad_norm": 1.0179270310552901, + "learning_rate": 4.992247249556376e-06, + "loss": 0.5195, + "step": 2568 + }, + { + "epoch": 0.15644125079925708, + "grad_norm": 1.037448782230392, + "learning_rate": 4.992240968918176e-06, + "loss": 0.5319, + "step": 2569 + }, + { + "epoch": 0.15650214657613495, + "grad_norm": 1.0175463316019242, + "learning_rate": 4.992234685740932e-06, + "loss": 0.4827, + "step": 2570 + }, + { + "epoch": 0.15656304235301283, + "grad_norm": 1.0843764158575289, + "learning_rate": 4.992228400024652e-06, + "loss": 0.5462, + "step": 2571 + }, + { + "epoch": 0.1566239381298907, + "grad_norm": 1.086288297873897, + "learning_rate": 4.992222111769341e-06, + "loss": 0.5191, + "step": 2572 + }, + { + "epoch": 0.15668483390676857, + "grad_norm": 1.0608852966894236, + "learning_rate": 4.992215820975006e-06, + "loss": 0.5303, + "step": 2573 + }, + { + "epoch": 0.15674572968364645, + "grad_norm": 1.0698965139131487, + "learning_rate": 4.992209527641653e-06, + "loss": 0.5155, + "step": 2574 + }, + { + "epoch": 0.15680662546052432, + "grad_norm": 1.1183478110279494, + "learning_rate": 4.9922032317692895e-06, + "loss": 0.4934, + "step": 2575 + }, + { + "epoch": 0.1568675212374022, + "grad_norm": 1.2092922347684538, + "learning_rate": 4.99219693335792e-06, + "loss": 0.509, + "step": 2576 + }, + { + "epoch": 0.15692841701428006, + "grad_norm": 0.9800039830089251, + "learning_rate": 4.9921906324075534e-06, + "loss": 0.5545, + "step": 2577 + }, + { + "epoch": 0.15698931279115794, + "grad_norm": 1.0242641952557892, + "learning_rate": 4.992184328918194e-06, + "loss": 0.5466, + "step": 2578 + }, + { + "epoch": 0.1570502085680358, + "grad_norm": 1.055300739074551, + "learning_rate": 4.992178022889848e-06, + "loss": 0.5713, + "step": 2579 + }, + { + "epoch": 0.15711110434491368, + "grad_norm": 1.0613056623557402, + "learning_rate": 4.992171714322525e-06, + "loss": 0.5003, + "step": 2580 + }, + { + "epoch": 0.15717200012179156, + "grad_norm": 1.0257897312510744, + "learning_rate": 4.992165403216228e-06, + "loss": 0.5228, + "step": 2581 + }, + { + "epoch": 0.15723289589866943, + "grad_norm": 1.0001915131256194, + "learning_rate": 4.992159089570965e-06, + "loss": 0.577, + "step": 2582 + }, + { + "epoch": 0.1572937916755473, + "grad_norm": 1.096205794355187, + "learning_rate": 4.9921527733867425e-06, + "loss": 0.4924, + "step": 2583 + }, + { + "epoch": 0.15735468745242517, + "grad_norm": 1.080066664054663, + "learning_rate": 4.992146454663566e-06, + "loss": 0.4794, + "step": 2584 + }, + { + "epoch": 0.15741558322930305, + "grad_norm": 1.1292952672317547, + "learning_rate": 4.992140133401443e-06, + "loss": 0.4863, + "step": 2585 + }, + { + "epoch": 0.15747647900618092, + "grad_norm": 1.0986695029289961, + "learning_rate": 4.992133809600379e-06, + "loss": 0.5738, + "step": 2586 + }, + { + "epoch": 0.1575373747830588, + "grad_norm": 1.0944942941022282, + "learning_rate": 4.99212748326038e-06, + "loss": 0.5787, + "step": 2587 + }, + { + "epoch": 0.15759827055993667, + "grad_norm": 1.0641372579759265, + "learning_rate": 4.992121154381455e-06, + "loss": 0.5263, + "step": 2588 + }, + { + "epoch": 0.15765916633681454, + "grad_norm": 0.9550840406261758, + "learning_rate": 4.992114822963608e-06, + "loss": 0.516, + "step": 2589 + }, + { + "epoch": 0.1577200621136924, + "grad_norm": 1.0659288923517538, + "learning_rate": 4.992108489006846e-06, + "loss": 0.5546, + "step": 2590 + }, + { + "epoch": 0.15778095789057028, + "grad_norm": 1.1501160140122448, + "learning_rate": 4.992102152511177e-06, + "loss": 0.489, + "step": 2591 + }, + { + "epoch": 0.15784185366744816, + "grad_norm": 1.0688635884390105, + "learning_rate": 4.992095813476605e-06, + "loss": 0.4921, + "step": 2592 + }, + { + "epoch": 0.15790274944432603, + "grad_norm": 1.0406540397436232, + "learning_rate": 4.9920894719031375e-06, + "loss": 0.5124, + "step": 2593 + }, + { + "epoch": 0.1579636452212039, + "grad_norm": 1.0486013945885582, + "learning_rate": 4.992083127790782e-06, + "loss": 0.4943, + "step": 2594 + }, + { + "epoch": 0.15802454099808178, + "grad_norm": 1.0642460409282146, + "learning_rate": 4.992076781139543e-06, + "loss": 0.478, + "step": 2595 + }, + { + "epoch": 0.15808543677495965, + "grad_norm": 1.1038833547985802, + "learning_rate": 4.992070431949428e-06, + "loss": 0.5992, + "step": 2596 + }, + { + "epoch": 0.15814633255183752, + "grad_norm": 1.115393599211892, + "learning_rate": 4.992064080220444e-06, + "loss": 0.5647, + "step": 2597 + }, + { + "epoch": 0.1582072283287154, + "grad_norm": 1.0729702609368583, + "learning_rate": 4.992057725952597e-06, + "loss": 0.5108, + "step": 2598 + }, + { + "epoch": 0.15826812410559327, + "grad_norm": 1.1564801116076202, + "learning_rate": 4.992051369145893e-06, + "loss": 0.4607, + "step": 2599 + }, + { + "epoch": 0.15832901988247114, + "grad_norm": 0.9797085498389075, + "learning_rate": 4.992045009800339e-06, + "loss": 0.4746, + "step": 2600 + }, + { + "epoch": 0.158389915659349, + "grad_norm": 1.0142529565680332, + "learning_rate": 4.992038647915941e-06, + "loss": 0.5507, + "step": 2601 + }, + { + "epoch": 0.15845081143622688, + "grad_norm": 1.0435908544157484, + "learning_rate": 4.9920322834927065e-06, + "loss": 0.626, + "step": 2602 + }, + { + "epoch": 0.15851170721310476, + "grad_norm": 1.0609425144520421, + "learning_rate": 4.992025916530641e-06, + "loss": 0.5403, + "step": 2603 + }, + { + "epoch": 0.15857260298998266, + "grad_norm": 1.0179584115772757, + "learning_rate": 4.992019547029752e-06, + "loss": 0.4869, + "step": 2604 + }, + { + "epoch": 0.15863349876686053, + "grad_norm": 1.07376537272316, + "learning_rate": 4.992013174990044e-06, + "loss": 0.4329, + "step": 2605 + }, + { + "epoch": 0.1586943945437384, + "grad_norm": 1.1580166731546921, + "learning_rate": 4.992006800411525e-06, + "loss": 0.5151, + "step": 2606 + }, + { + "epoch": 0.15875529032061628, + "grad_norm": 0.9580182490961687, + "learning_rate": 4.992000423294202e-06, + "loss": 0.488, + "step": 2607 + }, + { + "epoch": 0.15881618609749415, + "grad_norm": 1.1388389370108145, + "learning_rate": 4.991994043638081e-06, + "loss": 0.5532, + "step": 2608 + }, + { + "epoch": 0.15887708187437202, + "grad_norm": 1.0832401021536153, + "learning_rate": 4.9919876614431675e-06, + "loss": 0.5237, + "step": 2609 + }, + { + "epoch": 0.1589379776512499, + "grad_norm": 1.1417662669536117, + "learning_rate": 4.991981276709469e-06, + "loss": 0.5537, + "step": 2610 + }, + { + "epoch": 0.15899887342812777, + "grad_norm": 1.0143735305832462, + "learning_rate": 4.991974889436992e-06, + "loss": 0.5781, + "step": 2611 + }, + { + "epoch": 0.15905976920500564, + "grad_norm": 0.9806430812630272, + "learning_rate": 4.991968499625742e-06, + "loss": 0.559, + "step": 2612 + }, + { + "epoch": 0.15912066498188351, + "grad_norm": 1.0129678341655717, + "learning_rate": 4.9919621072757275e-06, + "loss": 0.5771, + "step": 2613 + }, + { + "epoch": 0.1591815607587614, + "grad_norm": 1.0989977976917944, + "learning_rate": 4.9919557123869535e-06, + "loss": 0.4921, + "step": 2614 + }, + { + "epoch": 0.15924245653563926, + "grad_norm": 1.0561139392386654, + "learning_rate": 4.991949314959426e-06, + "loss": 0.5279, + "step": 2615 + }, + { + "epoch": 0.15930335231251713, + "grad_norm": 0.9937027481659902, + "learning_rate": 4.991942914993153e-06, + "loss": 0.5357, + "step": 2616 + }, + { + "epoch": 0.159364248089395, + "grad_norm": 0.9175838540979713, + "learning_rate": 4.99193651248814e-06, + "loss": 0.5543, + "step": 2617 + }, + { + "epoch": 0.15942514386627288, + "grad_norm": 0.9786015960772871, + "learning_rate": 4.991930107444395e-06, + "loss": 0.5977, + "step": 2618 + }, + { + "epoch": 0.15948603964315075, + "grad_norm": 1.0409053725080188, + "learning_rate": 4.991923699861922e-06, + "loss": 0.4867, + "step": 2619 + }, + { + "epoch": 0.15954693542002862, + "grad_norm": 1.0583552055811372, + "learning_rate": 4.991917289740729e-06, + "loss": 0.4602, + "step": 2620 + }, + { + "epoch": 0.1596078311969065, + "grad_norm": 1.0059113840265925, + "learning_rate": 4.991910877080823e-06, + "loss": 0.5243, + "step": 2621 + }, + { + "epoch": 0.15966872697378437, + "grad_norm": 1.104247303161446, + "learning_rate": 4.991904461882211e-06, + "loss": 0.4192, + "step": 2622 + }, + { + "epoch": 0.15972962275066224, + "grad_norm": 0.9776447002319045, + "learning_rate": 4.991898044144897e-06, + "loss": 0.5986, + "step": 2623 + }, + { + "epoch": 0.15979051852754012, + "grad_norm": 1.071039096499261, + "learning_rate": 4.99189162386889e-06, + "loss": 0.5095, + "step": 2624 + }, + { + "epoch": 0.159851414304418, + "grad_norm": 1.0002016199462411, + "learning_rate": 4.991885201054195e-06, + "loss": 0.5201, + "step": 2625 + }, + { + "epoch": 0.15991231008129586, + "grad_norm": 1.0598238200555685, + "learning_rate": 4.991878775700819e-06, + "loss": 0.4723, + "step": 2626 + }, + { + "epoch": 0.15997320585817373, + "grad_norm": 1.158156052090699, + "learning_rate": 4.991872347808769e-06, + "loss": 0.4764, + "step": 2627 + }, + { + "epoch": 0.1600341016350516, + "grad_norm": 1.0413874131349758, + "learning_rate": 4.9918659173780515e-06, + "loss": 0.5347, + "step": 2628 + }, + { + "epoch": 0.16009499741192948, + "grad_norm": 1.1049384113744014, + "learning_rate": 4.9918594844086735e-06, + "loss": 0.5097, + "step": 2629 + }, + { + "epoch": 0.16015589318880735, + "grad_norm": 1.09706410063075, + "learning_rate": 4.9918530489006395e-06, + "loss": 0.5121, + "step": 2630 + }, + { + "epoch": 0.16021678896568523, + "grad_norm": 1.1291586566274292, + "learning_rate": 4.991846610853958e-06, + "loss": 0.5784, + "step": 2631 + }, + { + "epoch": 0.1602776847425631, + "grad_norm": 1.16586466684923, + "learning_rate": 4.991840170268635e-06, + "loss": 0.4893, + "step": 2632 + }, + { + "epoch": 0.16033858051944097, + "grad_norm": 1.0973811366370816, + "learning_rate": 4.991833727144677e-06, + "loss": 0.5394, + "step": 2633 + }, + { + "epoch": 0.16039947629631884, + "grad_norm": 1.032383312029091, + "learning_rate": 4.991827281482091e-06, + "loss": 0.5878, + "step": 2634 + }, + { + "epoch": 0.16046037207319672, + "grad_norm": 1.080675597695051, + "learning_rate": 4.991820833280883e-06, + "loss": 0.5113, + "step": 2635 + }, + { + "epoch": 0.1605212678500746, + "grad_norm": 1.1392356782452133, + "learning_rate": 4.991814382541059e-06, + "loss": 0.4703, + "step": 2636 + }, + { + "epoch": 0.16058216362695246, + "grad_norm": 0.9784544580120039, + "learning_rate": 4.9918079292626265e-06, + "loss": 0.5398, + "step": 2637 + }, + { + "epoch": 0.16064305940383033, + "grad_norm": 0.9664645254747626, + "learning_rate": 4.991801473445592e-06, + "loss": 0.5316, + "step": 2638 + }, + { + "epoch": 0.1607039551807082, + "grad_norm": 0.9553990089051348, + "learning_rate": 4.991795015089963e-06, + "loss": 0.5297, + "step": 2639 + }, + { + "epoch": 0.16076485095758608, + "grad_norm": 0.9903211943796809, + "learning_rate": 4.991788554195744e-06, + "loss": 0.5369, + "step": 2640 + }, + { + "epoch": 0.16082574673446395, + "grad_norm": 1.0177072794890971, + "learning_rate": 4.991782090762942e-06, + "loss": 0.5374, + "step": 2641 + }, + { + "epoch": 0.16088664251134183, + "grad_norm": 1.070948674259093, + "learning_rate": 4.991775624791566e-06, + "loss": 0.4591, + "step": 2642 + }, + { + "epoch": 0.1609475382882197, + "grad_norm": 1.0678968203983668, + "learning_rate": 4.9917691562816195e-06, + "loss": 0.5086, + "step": 2643 + }, + { + "epoch": 0.16100843406509757, + "grad_norm": 1.0902573939981008, + "learning_rate": 4.9917626852331105e-06, + "loss": 0.5306, + "step": 2644 + }, + { + "epoch": 0.16106932984197547, + "grad_norm": 1.0822644828031385, + "learning_rate": 4.991756211646046e-06, + "loss": 0.4734, + "step": 2645 + }, + { + "epoch": 0.16113022561885335, + "grad_norm": 1.1442272447597848, + "learning_rate": 4.991749735520432e-06, + "loss": 0.5995, + "step": 2646 + }, + { + "epoch": 0.16119112139573122, + "grad_norm": 1.095111873169538, + "learning_rate": 4.991743256856275e-06, + "loss": 0.4561, + "step": 2647 + }, + { + "epoch": 0.1612520171726091, + "grad_norm": 1.0113587430921378, + "learning_rate": 4.991736775653582e-06, + "loss": 0.4836, + "step": 2648 + }, + { + "epoch": 0.16131291294948696, + "grad_norm": 1.1362371809826601, + "learning_rate": 4.9917302919123585e-06, + "loss": 0.5068, + "step": 2649 + }, + { + "epoch": 0.16137380872636484, + "grad_norm": 1.001333493239545, + "learning_rate": 4.991723805632613e-06, + "loss": 0.4921, + "step": 2650 + }, + { + "epoch": 0.1614347045032427, + "grad_norm": 1.0075067423450494, + "learning_rate": 4.991717316814351e-06, + "loss": 0.6351, + "step": 2651 + }, + { + "epoch": 0.16149560028012058, + "grad_norm": 0.9949803845467409, + "learning_rate": 4.991710825457579e-06, + "loss": 0.5615, + "step": 2652 + }, + { + "epoch": 0.16155649605699846, + "grad_norm": 1.016143190886092, + "learning_rate": 4.991704331562303e-06, + "loss": 0.486, + "step": 2653 + }, + { + "epoch": 0.16161739183387633, + "grad_norm": 1.0507781957598343, + "learning_rate": 4.991697835128532e-06, + "loss": 0.5596, + "step": 2654 + }, + { + "epoch": 0.1616782876107542, + "grad_norm": 1.1246051315132928, + "learning_rate": 4.99169133615627e-06, + "loss": 0.5417, + "step": 2655 + }, + { + "epoch": 0.16173918338763207, + "grad_norm": 0.975220389213433, + "learning_rate": 4.9916848346455245e-06, + "loss": 0.5278, + "step": 2656 + }, + { + "epoch": 0.16180007916450995, + "grad_norm": 1.020738946283857, + "learning_rate": 4.991678330596303e-06, + "loss": 0.4641, + "step": 2657 + }, + { + "epoch": 0.16186097494138782, + "grad_norm": 1.026848838857214, + "learning_rate": 4.99167182400861e-06, + "loss": 0.4897, + "step": 2658 + }, + { + "epoch": 0.1619218707182657, + "grad_norm": 1.069819956889003, + "learning_rate": 4.991665314882456e-06, + "loss": 0.5108, + "step": 2659 + }, + { + "epoch": 0.16198276649514357, + "grad_norm": 1.10203102893673, + "learning_rate": 4.991658803217843e-06, + "loss": 0.5457, + "step": 2660 + }, + { + "epoch": 0.16204366227202144, + "grad_norm": 1.053136217907735, + "learning_rate": 4.991652289014781e-06, + "loss": 0.523, + "step": 2661 + }, + { + "epoch": 0.1621045580488993, + "grad_norm": 1.1005359142487499, + "learning_rate": 4.991645772273275e-06, + "loss": 0.4982, + "step": 2662 + }, + { + "epoch": 0.16216545382577718, + "grad_norm": 1.001219351232135, + "learning_rate": 4.991639252993332e-06, + "loss": 0.5354, + "step": 2663 + }, + { + "epoch": 0.16222634960265506, + "grad_norm": 1.0469747778872975, + "learning_rate": 4.991632731174959e-06, + "loss": 0.4782, + "step": 2664 + }, + { + "epoch": 0.16228724537953293, + "grad_norm": 1.0626903956918048, + "learning_rate": 4.991626206818162e-06, + "loss": 0.4994, + "step": 2665 + }, + { + "epoch": 0.1623481411564108, + "grad_norm": 1.1378202368668038, + "learning_rate": 4.991619679922949e-06, + "loss": 0.4585, + "step": 2666 + }, + { + "epoch": 0.16240903693328868, + "grad_norm": 1.0117829201861008, + "learning_rate": 4.991613150489325e-06, + "loss": 0.526, + "step": 2667 + }, + { + "epoch": 0.16246993271016655, + "grad_norm": 0.9437106936049251, + "learning_rate": 4.991606618517297e-06, + "loss": 0.5238, + "step": 2668 + }, + { + "epoch": 0.16253082848704442, + "grad_norm": 1.1378861394523276, + "learning_rate": 4.9916000840068725e-06, + "loss": 0.4531, + "step": 2669 + }, + { + "epoch": 0.1625917242639223, + "grad_norm": 1.1063159083846632, + "learning_rate": 4.991593546958057e-06, + "loss": 0.5156, + "step": 2670 + }, + { + "epoch": 0.16265262004080017, + "grad_norm": 1.070412221579265, + "learning_rate": 4.991587007370859e-06, + "loss": 0.5373, + "step": 2671 + }, + { + "epoch": 0.16271351581767804, + "grad_norm": 1.1074392917141156, + "learning_rate": 4.9915804652452825e-06, + "loss": 0.4537, + "step": 2672 + }, + { + "epoch": 0.1627744115945559, + "grad_norm": 1.0937286329765379, + "learning_rate": 4.9915739205813365e-06, + "loss": 0.5238, + "step": 2673 + }, + { + "epoch": 0.16283530737143379, + "grad_norm": 1.0925508228445737, + "learning_rate": 4.991567373379027e-06, + "loss": 0.496, + "step": 2674 + }, + { + "epoch": 0.16289620314831166, + "grad_norm": 1.171195203918714, + "learning_rate": 4.991560823638359e-06, + "loss": 0.4736, + "step": 2675 + }, + { + "epoch": 0.16295709892518953, + "grad_norm": 0.9944713458758395, + "learning_rate": 4.991554271359342e-06, + "loss": 0.5093, + "step": 2676 + }, + { + "epoch": 0.1630179947020674, + "grad_norm": 1.1216067779031424, + "learning_rate": 4.991547716541981e-06, + "loss": 0.5234, + "step": 2677 + }, + { + "epoch": 0.16307889047894528, + "grad_norm": 1.0166782213015428, + "learning_rate": 4.991541159186283e-06, + "loss": 0.5297, + "step": 2678 + }, + { + "epoch": 0.16313978625582315, + "grad_norm": 1.137016440528037, + "learning_rate": 4.9915345992922546e-06, + "loss": 0.5802, + "step": 2679 + }, + { + "epoch": 0.16320068203270102, + "grad_norm": 1.0675341447706577, + "learning_rate": 4.991528036859903e-06, + "loss": 0.476, + "step": 2680 + }, + { + "epoch": 0.1632615778095789, + "grad_norm": 1.0536629623157305, + "learning_rate": 4.991521471889234e-06, + "loss": 0.503, + "step": 2681 + }, + { + "epoch": 0.16332247358645677, + "grad_norm": 1.0216418782973864, + "learning_rate": 4.9915149043802545e-06, + "loss": 0.5891, + "step": 2682 + }, + { + "epoch": 0.16338336936333464, + "grad_norm": 1.1158581972319055, + "learning_rate": 4.9915083343329715e-06, + "loss": 0.4607, + "step": 2683 + }, + { + "epoch": 0.1634442651402125, + "grad_norm": 1.0628550643495978, + "learning_rate": 4.991501761747392e-06, + "loss": 0.5983, + "step": 2684 + }, + { + "epoch": 0.1635051609170904, + "grad_norm": 1.0823624322103942, + "learning_rate": 4.991495186623522e-06, + "loss": 0.5478, + "step": 2685 + }, + { + "epoch": 0.1635660566939683, + "grad_norm": 1.1230224112520084, + "learning_rate": 4.991488608961369e-06, + "loss": 0.4847, + "step": 2686 + }, + { + "epoch": 0.16362695247084616, + "grad_norm": 0.9725190169064424, + "learning_rate": 4.991482028760939e-06, + "loss": 0.524, + "step": 2687 + }, + { + "epoch": 0.16368784824772403, + "grad_norm": 0.9794849590678537, + "learning_rate": 4.9914754460222385e-06, + "loss": 0.4904, + "step": 2688 + }, + { + "epoch": 0.1637487440246019, + "grad_norm": 1.085887177295332, + "learning_rate": 4.991468860745274e-06, + "loss": 0.451, + "step": 2689 + }, + { + "epoch": 0.16380963980147978, + "grad_norm": 1.035687472384461, + "learning_rate": 4.991462272930054e-06, + "loss": 0.4592, + "step": 2690 + }, + { + "epoch": 0.16387053557835765, + "grad_norm": 1.0558468152685654, + "learning_rate": 4.991455682576583e-06, + "loss": 0.5, + "step": 2691 + }, + { + "epoch": 0.16393143135523552, + "grad_norm": 1.2231556071201213, + "learning_rate": 4.99144908968487e-06, + "loss": 0.5429, + "step": 2692 + }, + { + "epoch": 0.1639923271321134, + "grad_norm": 1.0538400161559247, + "learning_rate": 4.99144249425492e-06, + "loss": 0.4832, + "step": 2693 + }, + { + "epoch": 0.16405322290899127, + "grad_norm": 1.037708148188799, + "learning_rate": 4.99143589628674e-06, + "loss": 0.4916, + "step": 2694 + }, + { + "epoch": 0.16411411868586914, + "grad_norm": 1.0401599803622088, + "learning_rate": 4.991429295780337e-06, + "loss": 0.5156, + "step": 2695 + }, + { + "epoch": 0.16417501446274702, + "grad_norm": 1.0572531972949, + "learning_rate": 4.9914226927357175e-06, + "loss": 0.5678, + "step": 2696 + }, + { + "epoch": 0.1642359102396249, + "grad_norm": 1.129830016718747, + "learning_rate": 4.991416087152889e-06, + "loss": 0.427, + "step": 2697 + }, + { + "epoch": 0.16429680601650276, + "grad_norm": 1.0854384814298255, + "learning_rate": 4.9914094790318564e-06, + "loss": 0.5101, + "step": 2698 + }, + { + "epoch": 0.16435770179338063, + "grad_norm": 0.9655376317487521, + "learning_rate": 4.991402868372629e-06, + "loss": 0.4866, + "step": 2699 + }, + { + "epoch": 0.1644185975702585, + "grad_norm": 1.0398463715881647, + "learning_rate": 4.991396255175211e-06, + "loss": 0.5389, + "step": 2700 + }, + { + "epoch": 0.16447949334713638, + "grad_norm": 1.0748113071792644, + "learning_rate": 4.991389639439611e-06, + "loss": 0.5138, + "step": 2701 + }, + { + "epoch": 0.16454038912401425, + "grad_norm": 0.9602775871160099, + "learning_rate": 4.991383021165835e-06, + "loss": 0.5027, + "step": 2702 + }, + { + "epoch": 0.16460128490089213, + "grad_norm": 0.9496350765772753, + "learning_rate": 4.991376400353889e-06, + "loss": 0.5388, + "step": 2703 + }, + { + "epoch": 0.16466218067777, + "grad_norm": 1.082797445263557, + "learning_rate": 4.991369777003781e-06, + "loss": 0.5091, + "step": 2704 + }, + { + "epoch": 0.16472307645464787, + "grad_norm": 0.9903065436834231, + "learning_rate": 4.991363151115518e-06, + "loss": 0.4448, + "step": 2705 + }, + { + "epoch": 0.16478397223152574, + "grad_norm": 1.0346835943427912, + "learning_rate": 4.991356522689105e-06, + "loss": 0.5775, + "step": 2706 + }, + { + "epoch": 0.16484486800840362, + "grad_norm": 1.0429906920833412, + "learning_rate": 4.99134989172455e-06, + "loss": 0.5217, + "step": 2707 + }, + { + "epoch": 0.1649057637852815, + "grad_norm": 1.0681541589658363, + "learning_rate": 4.99134325822186e-06, + "loss": 0.4954, + "step": 2708 + }, + { + "epoch": 0.16496665956215936, + "grad_norm": 1.1460915950011168, + "learning_rate": 4.9913366221810415e-06, + "loss": 0.5051, + "step": 2709 + }, + { + "epoch": 0.16502755533903724, + "grad_norm": 1.0878830203041965, + "learning_rate": 4.9913299836021e-06, + "loss": 0.4911, + "step": 2710 + }, + { + "epoch": 0.1650884511159151, + "grad_norm": 0.9892932953432554, + "learning_rate": 4.991323342485043e-06, + "loss": 0.524, + "step": 2711 + }, + { + "epoch": 0.16514934689279298, + "grad_norm": 1.1218118242576196, + "learning_rate": 4.991316698829879e-06, + "loss": 0.5092, + "step": 2712 + }, + { + "epoch": 0.16521024266967085, + "grad_norm": 0.995811649942255, + "learning_rate": 4.991310052636613e-06, + "loss": 0.5378, + "step": 2713 + }, + { + "epoch": 0.16527113844654873, + "grad_norm": 0.9354965329250947, + "learning_rate": 4.991303403905251e-06, + "loss": 0.5493, + "step": 2714 + }, + { + "epoch": 0.1653320342234266, + "grad_norm": 1.0767891989239866, + "learning_rate": 4.9912967526358025e-06, + "loss": 0.5158, + "step": 2715 + }, + { + "epoch": 0.16539293000030447, + "grad_norm": 1.004905818612976, + "learning_rate": 4.9912900988282714e-06, + "loss": 0.5375, + "step": 2716 + }, + { + "epoch": 0.16545382577718235, + "grad_norm": 1.075190676992048, + "learning_rate": 4.991283442482666e-06, + "loss": 0.5163, + "step": 2717 + }, + { + "epoch": 0.16551472155406022, + "grad_norm": 1.1092269200412495, + "learning_rate": 4.991276783598993e-06, + "loss": 0.5405, + "step": 2718 + }, + { + "epoch": 0.1655756173309381, + "grad_norm": 1.0461324491518276, + "learning_rate": 4.99127012217726e-06, + "loss": 0.5223, + "step": 2719 + }, + { + "epoch": 0.16563651310781596, + "grad_norm": 0.9754072980919567, + "learning_rate": 4.991263458217472e-06, + "loss": 0.5466, + "step": 2720 + }, + { + "epoch": 0.16569740888469384, + "grad_norm": 0.9544618450239257, + "learning_rate": 4.991256791719635e-06, + "loss": 0.5535, + "step": 2721 + }, + { + "epoch": 0.1657583046615717, + "grad_norm": 1.0648888747064493, + "learning_rate": 4.99125012268376e-06, + "loss": 0.5513, + "step": 2722 + }, + { + "epoch": 0.16581920043844958, + "grad_norm": 1.075123557521074, + "learning_rate": 4.99124345110985e-06, + "loss": 0.5188, + "step": 2723 + }, + { + "epoch": 0.16588009621532745, + "grad_norm": 1.0578750659040064, + "learning_rate": 4.991236776997912e-06, + "loss": 0.5394, + "step": 2724 + }, + { + "epoch": 0.16594099199220533, + "grad_norm": 1.0020392173369737, + "learning_rate": 4.9912301003479545e-06, + "loss": 0.5457, + "step": 2725 + }, + { + "epoch": 0.1660018877690832, + "grad_norm": 0.997170119745342, + "learning_rate": 4.991223421159984e-06, + "loss": 0.5661, + "step": 2726 + }, + { + "epoch": 0.1660627835459611, + "grad_norm": 1.106496304819477, + "learning_rate": 4.991216739434007e-06, + "loss": 0.5342, + "step": 2727 + }, + { + "epoch": 0.16612367932283897, + "grad_norm": 1.142998241349699, + "learning_rate": 4.9912100551700295e-06, + "loss": 0.5132, + "step": 2728 + }, + { + "epoch": 0.16618457509971685, + "grad_norm": 1.1178747171145162, + "learning_rate": 4.99120336836806e-06, + "loss": 0.5753, + "step": 2729 + }, + { + "epoch": 0.16624547087659472, + "grad_norm": 1.1228982600527717, + "learning_rate": 4.991196679028103e-06, + "loss": 0.4881, + "step": 2730 + }, + { + "epoch": 0.1663063666534726, + "grad_norm": 0.9700762822768757, + "learning_rate": 4.991189987150167e-06, + "loss": 0.5215, + "step": 2731 + }, + { + "epoch": 0.16636726243035047, + "grad_norm": 1.0755146675952505, + "learning_rate": 4.991183292734259e-06, + "loss": 0.4988, + "step": 2732 + }, + { + "epoch": 0.16642815820722834, + "grad_norm": 0.9925287723464853, + "learning_rate": 4.991176595780385e-06, + "loss": 0.485, + "step": 2733 + }, + { + "epoch": 0.1664890539841062, + "grad_norm": 1.1252876360535016, + "learning_rate": 4.991169896288552e-06, + "loss": 0.5638, + "step": 2734 + }, + { + "epoch": 0.16654994976098408, + "grad_norm": 0.996610968806556, + "learning_rate": 4.991163194258767e-06, + "loss": 0.5748, + "step": 2735 + }, + { + "epoch": 0.16661084553786196, + "grad_norm": 1.136752660548816, + "learning_rate": 4.991156489691036e-06, + "loss": 0.4721, + "step": 2736 + }, + { + "epoch": 0.16667174131473983, + "grad_norm": 1.1236606030952112, + "learning_rate": 4.9911497825853674e-06, + "loss": 0.5512, + "step": 2737 + }, + { + "epoch": 0.1667326370916177, + "grad_norm": 1.026740306299525, + "learning_rate": 4.9911430729417676e-06, + "loss": 0.5276, + "step": 2738 + }, + { + "epoch": 0.16679353286849558, + "grad_norm": 0.9619989203757799, + "learning_rate": 4.991136360760242e-06, + "loss": 0.5001, + "step": 2739 + }, + { + "epoch": 0.16685442864537345, + "grad_norm": 1.0011236017950504, + "learning_rate": 4.991129646040799e-06, + "loss": 0.5586, + "step": 2740 + }, + { + "epoch": 0.16691532442225132, + "grad_norm": 1.0269830796653654, + "learning_rate": 4.991122928783445e-06, + "loss": 0.5799, + "step": 2741 + }, + { + "epoch": 0.1669762201991292, + "grad_norm": 1.0461879478380072, + "learning_rate": 4.991116208988186e-06, + "loss": 0.5528, + "step": 2742 + }, + { + "epoch": 0.16703711597600707, + "grad_norm": 1.0596382475931543, + "learning_rate": 4.99110948665503e-06, + "loss": 0.4737, + "step": 2743 + }, + { + "epoch": 0.16709801175288494, + "grad_norm": 1.0484023966823774, + "learning_rate": 4.991102761783984e-06, + "loss": 0.4636, + "step": 2744 + }, + { + "epoch": 0.1671589075297628, + "grad_norm": 1.119498466771803, + "learning_rate": 4.991096034375054e-06, + "loss": 0.4512, + "step": 2745 + }, + { + "epoch": 0.16721980330664069, + "grad_norm": 1.0197743286425938, + "learning_rate": 4.991089304428247e-06, + "loss": 0.557, + "step": 2746 + }, + { + "epoch": 0.16728069908351856, + "grad_norm": 1.0335363016475971, + "learning_rate": 4.991082571943569e-06, + "loss": 0.5463, + "step": 2747 + }, + { + "epoch": 0.16734159486039643, + "grad_norm": 1.156182203977758, + "learning_rate": 4.99107583692103e-06, + "loss": 0.5235, + "step": 2748 + }, + { + "epoch": 0.1674024906372743, + "grad_norm": 1.0843273074191415, + "learning_rate": 4.991069099360634e-06, + "loss": 0.4815, + "step": 2749 + }, + { + "epoch": 0.16746338641415218, + "grad_norm": 1.084988384962405, + "learning_rate": 4.991062359262388e-06, + "loss": 0.5567, + "step": 2750 + }, + { + "epoch": 0.16752428219103005, + "grad_norm": 1.1160813036822828, + "learning_rate": 4.991055616626299e-06, + "loss": 0.5228, + "step": 2751 + }, + { + "epoch": 0.16758517796790792, + "grad_norm": 1.031441877456662, + "learning_rate": 4.991048871452375e-06, + "loss": 0.5063, + "step": 2752 + }, + { + "epoch": 0.1676460737447858, + "grad_norm": 1.1115112221565828, + "learning_rate": 4.9910421237406225e-06, + "loss": 0.5146, + "step": 2753 + }, + { + "epoch": 0.16770696952166367, + "grad_norm": 1.097758939826504, + "learning_rate": 4.9910353734910475e-06, + "loss": 0.4574, + "step": 2754 + }, + { + "epoch": 0.16776786529854154, + "grad_norm": 1.1223038811829529, + "learning_rate": 4.9910286207036584e-06, + "loss": 0.5263, + "step": 2755 + }, + { + "epoch": 0.1678287610754194, + "grad_norm": 1.0378952480567196, + "learning_rate": 4.99102186537846e-06, + "loss": 0.4863, + "step": 2756 + }, + { + "epoch": 0.1678896568522973, + "grad_norm": 1.0749022680184375, + "learning_rate": 4.991015107515461e-06, + "loss": 0.5113, + "step": 2757 + }, + { + "epoch": 0.16795055262917516, + "grad_norm": 1.2376035496725921, + "learning_rate": 4.991008347114667e-06, + "loss": 0.5063, + "step": 2758 + }, + { + "epoch": 0.16801144840605303, + "grad_norm": 1.1022349300761587, + "learning_rate": 4.991001584176086e-06, + "loss": 0.4981, + "step": 2759 + }, + { + "epoch": 0.1680723441829309, + "grad_norm": 1.1819187884299625, + "learning_rate": 4.990994818699724e-06, + "loss": 0.4752, + "step": 2760 + }, + { + "epoch": 0.16813323995980878, + "grad_norm": 1.0322094413585923, + "learning_rate": 4.990988050685589e-06, + "loss": 0.5262, + "step": 2761 + }, + { + "epoch": 0.16819413573668665, + "grad_norm": 0.9951172531134892, + "learning_rate": 4.990981280133686e-06, + "loss": 0.5602, + "step": 2762 + }, + { + "epoch": 0.16825503151356452, + "grad_norm": 1.0300673738294612, + "learning_rate": 4.990974507044024e-06, + "loss": 0.5796, + "step": 2763 + }, + { + "epoch": 0.1683159272904424, + "grad_norm": 1.096680884085464, + "learning_rate": 4.990967731416609e-06, + "loss": 0.4866, + "step": 2764 + }, + { + "epoch": 0.16837682306732027, + "grad_norm": 1.059951179625986, + "learning_rate": 4.990960953251448e-06, + "loss": 0.4906, + "step": 2765 + }, + { + "epoch": 0.16843771884419814, + "grad_norm": 1.056360149534859, + "learning_rate": 4.9909541725485465e-06, + "loss": 0.6054, + "step": 2766 + }, + { + "epoch": 0.16849861462107601, + "grad_norm": 1.0417690156938268, + "learning_rate": 4.990947389307914e-06, + "loss": 0.502, + "step": 2767 + }, + { + "epoch": 0.16855951039795392, + "grad_norm": 1.019374865704244, + "learning_rate": 4.990940603529556e-06, + "loss": 0.5692, + "step": 2768 + }, + { + "epoch": 0.1686204061748318, + "grad_norm": 1.0213749437074848, + "learning_rate": 4.990933815213479e-06, + "loss": 0.5488, + "step": 2769 + }, + { + "epoch": 0.16868130195170966, + "grad_norm": 1.0066734540347066, + "learning_rate": 4.9909270243596906e-06, + "loss": 0.5526, + "step": 2770 + }, + { + "epoch": 0.16874219772858753, + "grad_norm": 1.0854242630530795, + "learning_rate": 4.990920230968197e-06, + "loss": 0.5135, + "step": 2771 + }, + { + "epoch": 0.1688030935054654, + "grad_norm": 1.2436637258378764, + "learning_rate": 4.990913435039006e-06, + "loss": 0.5292, + "step": 2772 + }, + { + "epoch": 0.16886398928234328, + "grad_norm": 1.075319006375499, + "learning_rate": 4.990906636572125e-06, + "loss": 0.5153, + "step": 2773 + }, + { + "epoch": 0.16892488505922115, + "grad_norm": 1.2281640793786042, + "learning_rate": 4.990899835567559e-06, + "loss": 0.516, + "step": 2774 + }, + { + "epoch": 0.16898578083609903, + "grad_norm": 1.1010900386896316, + "learning_rate": 4.990893032025316e-06, + "loss": 0.4862, + "step": 2775 + }, + { + "epoch": 0.1690466766129769, + "grad_norm": 1.1140044879524127, + "learning_rate": 4.990886225945404e-06, + "loss": 0.4372, + "step": 2776 + }, + { + "epoch": 0.16910757238985477, + "grad_norm": 0.9749092480357102, + "learning_rate": 4.990879417327828e-06, + "loss": 0.5706, + "step": 2777 + }, + { + "epoch": 0.16916846816673264, + "grad_norm": 1.1768800803987973, + "learning_rate": 4.990872606172596e-06, + "loss": 0.5204, + "step": 2778 + }, + { + "epoch": 0.16922936394361052, + "grad_norm": 1.0946869792305314, + "learning_rate": 4.990865792479715e-06, + "loss": 0.477, + "step": 2779 + }, + { + "epoch": 0.1692902597204884, + "grad_norm": 1.013822031546885, + "learning_rate": 4.990858976249191e-06, + "loss": 0.5078, + "step": 2780 + }, + { + "epoch": 0.16935115549736626, + "grad_norm": 1.056464762125387, + "learning_rate": 4.990852157481033e-06, + "loss": 0.4766, + "step": 2781 + }, + { + "epoch": 0.16941205127424414, + "grad_norm": 1.1669467997493665, + "learning_rate": 4.990845336175246e-06, + "loss": 0.5695, + "step": 2782 + }, + { + "epoch": 0.169472947051122, + "grad_norm": 0.9394691882805167, + "learning_rate": 4.990838512331837e-06, + "loss": 0.5205, + "step": 2783 + }, + { + "epoch": 0.16953384282799988, + "grad_norm": 1.0510634351027965, + "learning_rate": 4.990831685950814e-06, + "loss": 0.5795, + "step": 2784 + }, + { + "epoch": 0.16959473860487775, + "grad_norm": 0.9779053168676226, + "learning_rate": 4.9908248570321834e-06, + "loss": 0.543, + "step": 2785 + }, + { + "epoch": 0.16965563438175563, + "grad_norm": 0.9792427470815624, + "learning_rate": 4.990818025575952e-06, + "loss": 0.5387, + "step": 2786 + }, + { + "epoch": 0.1697165301586335, + "grad_norm": 0.9840424620288444, + "learning_rate": 4.990811191582128e-06, + "loss": 0.5628, + "step": 2787 + }, + { + "epoch": 0.16977742593551137, + "grad_norm": 1.044829634402247, + "learning_rate": 4.9908043550507165e-06, + "loss": 0.4934, + "step": 2788 + }, + { + "epoch": 0.16983832171238925, + "grad_norm": 1.0410293881762738, + "learning_rate": 4.990797515981725e-06, + "loss": 0.5007, + "step": 2789 + }, + { + "epoch": 0.16989921748926712, + "grad_norm": 1.0651744334311575, + "learning_rate": 4.9907906743751615e-06, + "loss": 0.4733, + "step": 2790 + }, + { + "epoch": 0.169960113266145, + "grad_norm": 1.1772188180412784, + "learning_rate": 4.990783830231032e-06, + "loss": 0.4919, + "step": 2791 + }, + { + "epoch": 0.17002100904302286, + "grad_norm": 1.1022574594589656, + "learning_rate": 4.990776983549344e-06, + "loss": 0.5496, + "step": 2792 + }, + { + "epoch": 0.17008190481990074, + "grad_norm": 0.9854699265880038, + "learning_rate": 4.990770134330103e-06, + "loss": 0.5089, + "step": 2793 + }, + { + "epoch": 0.1701428005967786, + "grad_norm": 1.053729941064043, + "learning_rate": 4.990763282573319e-06, + "loss": 0.4921, + "step": 2794 + }, + { + "epoch": 0.17020369637365648, + "grad_norm": 1.1161092466908684, + "learning_rate": 4.990756428278995e-06, + "loss": 0.5218, + "step": 2795 + }, + { + "epoch": 0.17026459215053436, + "grad_norm": 1.0633172502072248, + "learning_rate": 4.990749571447142e-06, + "loss": 0.5472, + "step": 2796 + }, + { + "epoch": 0.17032548792741223, + "grad_norm": 0.9990622164562154, + "learning_rate": 4.990742712077764e-06, + "loss": 0.5446, + "step": 2797 + }, + { + "epoch": 0.1703863837042901, + "grad_norm": 1.0495662879411818, + "learning_rate": 4.990735850170869e-06, + "loss": 0.5172, + "step": 2798 + }, + { + "epoch": 0.17044727948116797, + "grad_norm": 1.0461651593868748, + "learning_rate": 4.990728985726465e-06, + "loss": 0.5686, + "step": 2799 + }, + { + "epoch": 0.17050817525804585, + "grad_norm": 1.1034464138009952, + "learning_rate": 4.9907221187445575e-06, + "loss": 0.5367, + "step": 2800 + }, + { + "epoch": 0.17056907103492372, + "grad_norm": 1.0066783541200572, + "learning_rate": 4.990715249225154e-06, + "loss": 0.5279, + "step": 2801 + }, + { + "epoch": 0.1706299668118016, + "grad_norm": 0.9626907028298224, + "learning_rate": 4.990708377168263e-06, + "loss": 0.5605, + "step": 2802 + }, + { + "epoch": 0.17069086258867947, + "grad_norm": 1.127185883263584, + "learning_rate": 4.990701502573888e-06, + "loss": 0.4903, + "step": 2803 + }, + { + "epoch": 0.17075175836555734, + "grad_norm": 1.070424965863723, + "learning_rate": 4.99069462544204e-06, + "loss": 0.5376, + "step": 2804 + }, + { + "epoch": 0.1708126541424352, + "grad_norm": 1.0352624909703068, + "learning_rate": 4.990687745772723e-06, + "loss": 0.5153, + "step": 2805 + }, + { + "epoch": 0.17087354991931308, + "grad_norm": 1.0868882433639415, + "learning_rate": 4.990680863565945e-06, + "loss": 0.5665, + "step": 2806 + }, + { + "epoch": 0.17093444569619096, + "grad_norm": 1.1498751333425743, + "learning_rate": 4.990673978821713e-06, + "loss": 0.5723, + "step": 2807 + }, + { + "epoch": 0.17099534147306883, + "grad_norm": 1.0503326773767916, + "learning_rate": 4.990667091540035e-06, + "loss": 0.5183, + "step": 2808 + }, + { + "epoch": 0.17105623724994673, + "grad_norm": 1.0997222482926126, + "learning_rate": 4.990660201720916e-06, + "loss": 0.5683, + "step": 2809 + }, + { + "epoch": 0.1711171330268246, + "grad_norm": 0.940266127173205, + "learning_rate": 4.990653309364365e-06, + "loss": 0.5695, + "step": 2810 + }, + { + "epoch": 0.17117802880370248, + "grad_norm": 1.070796892161803, + "learning_rate": 4.990646414470388e-06, + "loss": 0.5181, + "step": 2811 + }, + { + "epoch": 0.17123892458058035, + "grad_norm": 1.065163813431522, + "learning_rate": 4.990639517038992e-06, + "loss": 0.5453, + "step": 2812 + }, + { + "epoch": 0.17129982035745822, + "grad_norm": 0.9892453150466485, + "learning_rate": 4.9906326170701845e-06, + "loss": 0.5695, + "step": 2813 + }, + { + "epoch": 0.1713607161343361, + "grad_norm": 0.9576827853589414, + "learning_rate": 4.990625714563972e-06, + "loss": 0.5191, + "step": 2814 + }, + { + "epoch": 0.17142161191121397, + "grad_norm": 1.0382044643083195, + "learning_rate": 4.990618809520362e-06, + "loss": 0.4889, + "step": 2815 + }, + { + "epoch": 0.17148250768809184, + "grad_norm": 1.047116926990181, + "learning_rate": 4.990611901939362e-06, + "loss": 0.5271, + "step": 2816 + }, + { + "epoch": 0.1715434034649697, + "grad_norm": 1.00820248306595, + "learning_rate": 4.9906049918209775e-06, + "loss": 0.5121, + "step": 2817 + }, + { + "epoch": 0.17160429924184759, + "grad_norm": 1.0667811966576801, + "learning_rate": 4.990598079165217e-06, + "loss": 0.5315, + "step": 2818 + }, + { + "epoch": 0.17166519501872546, + "grad_norm": 1.0356175856648462, + "learning_rate": 4.990591163972087e-06, + "loss": 0.4462, + "step": 2819 + }, + { + "epoch": 0.17172609079560333, + "grad_norm": 1.1834793622345228, + "learning_rate": 4.990584246241594e-06, + "loss": 0.5473, + "step": 2820 + }, + { + "epoch": 0.1717869865724812, + "grad_norm": 1.136712619758158, + "learning_rate": 4.990577325973746e-06, + "loss": 0.5489, + "step": 2821 + }, + { + "epoch": 0.17184788234935908, + "grad_norm": 1.0748973504429145, + "learning_rate": 4.990570403168549e-06, + "loss": 0.4992, + "step": 2822 + }, + { + "epoch": 0.17190877812623695, + "grad_norm": 1.0179640603182982, + "learning_rate": 4.99056347782601e-06, + "loss": 0.5399, + "step": 2823 + }, + { + "epoch": 0.17196967390311482, + "grad_norm": 1.0229042944474314, + "learning_rate": 4.990556549946138e-06, + "loss": 0.5198, + "step": 2824 + }, + { + "epoch": 0.1720305696799927, + "grad_norm": 1.0639781771423258, + "learning_rate": 4.990549619528939e-06, + "loss": 0.5373, + "step": 2825 + }, + { + "epoch": 0.17209146545687057, + "grad_norm": 1.0344370620783385, + "learning_rate": 4.990542686574419e-06, + "loss": 0.5415, + "step": 2826 + }, + { + "epoch": 0.17215236123374844, + "grad_norm": 1.031824150863195, + "learning_rate": 4.990535751082585e-06, + "loss": 0.4939, + "step": 2827 + }, + { + "epoch": 0.1722132570106263, + "grad_norm": 1.0818433035472284, + "learning_rate": 4.990528813053447e-06, + "loss": 0.5193, + "step": 2828 + }, + { + "epoch": 0.1722741527875042, + "grad_norm": 1.0580872460135264, + "learning_rate": 4.99052187248701e-06, + "loss": 0.5769, + "step": 2829 + }, + { + "epoch": 0.17233504856438206, + "grad_norm": 1.1421209826242573, + "learning_rate": 4.990514929383279e-06, + "loss": 0.5182, + "step": 2830 + }, + { + "epoch": 0.17239594434125993, + "grad_norm": 0.994124819717035, + "learning_rate": 4.990507983742265e-06, + "loss": 0.4835, + "step": 2831 + }, + { + "epoch": 0.1724568401181378, + "grad_norm": 1.090468653868473, + "learning_rate": 4.990501035563973e-06, + "loss": 0.4005, + "step": 2832 + }, + { + "epoch": 0.17251773589501568, + "grad_norm": 0.9645813192506792, + "learning_rate": 4.990494084848409e-06, + "loss": 0.5512, + "step": 2833 + }, + { + "epoch": 0.17257863167189355, + "grad_norm": 1.0274702115141894, + "learning_rate": 4.990487131595583e-06, + "loss": 0.5021, + "step": 2834 + }, + { + "epoch": 0.17263952744877142, + "grad_norm": 1.0137468539455536, + "learning_rate": 4.9904801758055e-06, + "loss": 0.5426, + "step": 2835 + }, + { + "epoch": 0.1727004232256493, + "grad_norm": 0.9336835761677942, + "learning_rate": 4.9904732174781675e-06, + "loss": 0.5153, + "step": 2836 + }, + { + "epoch": 0.17276131900252717, + "grad_norm": 1.181349674349817, + "learning_rate": 4.990466256613592e-06, + "loss": 0.4497, + "step": 2837 + }, + { + "epoch": 0.17282221477940504, + "grad_norm": 1.1016020651813316, + "learning_rate": 4.990459293211782e-06, + "loss": 0.4953, + "step": 2838 + }, + { + "epoch": 0.17288311055628292, + "grad_norm": 0.932403680713954, + "learning_rate": 4.990452327272743e-06, + "loss": 0.5929, + "step": 2839 + }, + { + "epoch": 0.1729440063331608, + "grad_norm": 1.1033196257091769, + "learning_rate": 4.990445358796484e-06, + "loss": 0.4815, + "step": 2840 + }, + { + "epoch": 0.17300490211003866, + "grad_norm": 0.9791316593566508, + "learning_rate": 4.990438387783011e-06, + "loss": 0.5793, + "step": 2841 + }, + { + "epoch": 0.17306579788691653, + "grad_norm": 1.0241999766501135, + "learning_rate": 4.9904314142323305e-06, + "loss": 0.4922, + "step": 2842 + }, + { + "epoch": 0.1731266936637944, + "grad_norm": 1.0813930554385807, + "learning_rate": 4.990424438144451e-06, + "loss": 0.448, + "step": 2843 + }, + { + "epoch": 0.17318758944067228, + "grad_norm": 1.0049213914613377, + "learning_rate": 4.990417459519379e-06, + "loss": 0.5693, + "step": 2844 + }, + { + "epoch": 0.17324848521755015, + "grad_norm": 0.9986698332217383, + "learning_rate": 4.990410478357121e-06, + "loss": 0.5375, + "step": 2845 + }, + { + "epoch": 0.17330938099442803, + "grad_norm": 1.0732560725406215, + "learning_rate": 4.990403494657684e-06, + "loss": 0.487, + "step": 2846 + }, + { + "epoch": 0.1733702767713059, + "grad_norm": 0.948030582488427, + "learning_rate": 4.990396508421076e-06, + "loss": 0.5586, + "step": 2847 + }, + { + "epoch": 0.17343117254818377, + "grad_norm": 1.0769328202764965, + "learning_rate": 4.990389519647304e-06, + "loss": 0.5225, + "step": 2848 + }, + { + "epoch": 0.17349206832506164, + "grad_norm": 1.110471250602923, + "learning_rate": 4.990382528336375e-06, + "loss": 0.528, + "step": 2849 + }, + { + "epoch": 0.17355296410193954, + "grad_norm": 1.0325151393441803, + "learning_rate": 4.990375534488296e-06, + "loss": 0.5727, + "step": 2850 + }, + { + "epoch": 0.17361385987881742, + "grad_norm": 1.0935110339361174, + "learning_rate": 4.990368538103074e-06, + "loss": 0.5128, + "step": 2851 + }, + { + "epoch": 0.1736747556556953, + "grad_norm": 1.0763073025687575, + "learning_rate": 4.990361539180717e-06, + "loss": 0.4738, + "step": 2852 + }, + { + "epoch": 0.17373565143257316, + "grad_norm": 1.015158663200676, + "learning_rate": 4.99035453772123e-06, + "loss": 0.4827, + "step": 2853 + }, + { + "epoch": 0.17379654720945104, + "grad_norm": 1.0940206618729, + "learning_rate": 4.990347533724623e-06, + "loss": 0.5059, + "step": 2854 + }, + { + "epoch": 0.1738574429863289, + "grad_norm": 0.944963081508007, + "learning_rate": 4.990340527190901e-06, + "loss": 0.5308, + "step": 2855 + }, + { + "epoch": 0.17391833876320678, + "grad_norm": 1.067287525286117, + "learning_rate": 4.990333518120072e-06, + "loss": 0.501, + "step": 2856 + }, + { + "epoch": 0.17397923454008465, + "grad_norm": 1.0940080176931142, + "learning_rate": 4.990326506512143e-06, + "loss": 0.5097, + "step": 2857 + }, + { + "epoch": 0.17404013031696253, + "grad_norm": 0.9601523206785813, + "learning_rate": 4.990319492367121e-06, + "loss": 0.5122, + "step": 2858 + }, + { + "epoch": 0.1741010260938404, + "grad_norm": 1.1138189136042353, + "learning_rate": 4.990312475685014e-06, + "loss": 0.5153, + "step": 2859 + }, + { + "epoch": 0.17416192187071827, + "grad_norm": 0.9627410679911211, + "learning_rate": 4.990305456465828e-06, + "loss": 0.5306, + "step": 2860 + }, + { + "epoch": 0.17422281764759615, + "grad_norm": 1.1485663342906365, + "learning_rate": 4.990298434709571e-06, + "loss": 0.4799, + "step": 2861 + }, + { + "epoch": 0.17428371342447402, + "grad_norm": 1.0538980002239324, + "learning_rate": 4.990291410416248e-06, + "loss": 0.5694, + "step": 2862 + }, + { + "epoch": 0.1743446092013519, + "grad_norm": 1.0851165137272176, + "learning_rate": 4.990284383585869e-06, + "loss": 0.5245, + "step": 2863 + }, + { + "epoch": 0.17440550497822976, + "grad_norm": 0.9958313898450191, + "learning_rate": 4.990277354218441e-06, + "loss": 0.5846, + "step": 2864 + }, + { + "epoch": 0.17446640075510764, + "grad_norm": 1.0908389434248047, + "learning_rate": 4.990270322313969e-06, + "loss": 0.4758, + "step": 2865 + }, + { + "epoch": 0.1745272965319855, + "grad_norm": 1.0365647506971172, + "learning_rate": 4.9902632878724615e-06, + "loss": 0.5732, + "step": 2866 + }, + { + "epoch": 0.17458819230886338, + "grad_norm": 1.0239564858370875, + "learning_rate": 4.990256250893925e-06, + "loss": 0.5107, + "step": 2867 + }, + { + "epoch": 0.17464908808574126, + "grad_norm": 0.9857937349122177, + "learning_rate": 4.990249211378369e-06, + "loss": 0.5153, + "step": 2868 + }, + { + "epoch": 0.17470998386261913, + "grad_norm": 1.0608235484946178, + "learning_rate": 4.990242169325798e-06, + "loss": 0.5055, + "step": 2869 + }, + { + "epoch": 0.174770879639497, + "grad_norm": 1.047745994394843, + "learning_rate": 4.99023512473622e-06, + "loss": 0.4922, + "step": 2870 + }, + { + "epoch": 0.17483177541637487, + "grad_norm": 0.9842879889870572, + "learning_rate": 4.990228077609641e-06, + "loss": 0.5133, + "step": 2871 + }, + { + "epoch": 0.17489267119325275, + "grad_norm": 1.0962082387953496, + "learning_rate": 4.990221027946071e-06, + "loss": 0.4837, + "step": 2872 + }, + { + "epoch": 0.17495356697013062, + "grad_norm": 1.1078890066700453, + "learning_rate": 4.990213975745515e-06, + "loss": 0.5475, + "step": 2873 + }, + { + "epoch": 0.1750144627470085, + "grad_norm": 1.0886014680181912, + "learning_rate": 4.990206921007981e-06, + "loss": 0.4676, + "step": 2874 + }, + { + "epoch": 0.17507535852388637, + "grad_norm": 1.0657385089530438, + "learning_rate": 4.990199863733476e-06, + "loss": 0.5387, + "step": 2875 + }, + { + "epoch": 0.17513625430076424, + "grad_norm": 1.080845299211313, + "learning_rate": 4.990192803922007e-06, + "loss": 0.5299, + "step": 2876 + }, + { + "epoch": 0.1751971500776421, + "grad_norm": 1.101243950131989, + "learning_rate": 4.990185741573581e-06, + "loss": 0.5542, + "step": 2877 + }, + { + "epoch": 0.17525804585451998, + "grad_norm": 1.0195902392730605, + "learning_rate": 4.990178676688206e-06, + "loss": 0.4704, + "step": 2878 + }, + { + "epoch": 0.17531894163139786, + "grad_norm": 1.0370627034937698, + "learning_rate": 4.990171609265889e-06, + "loss": 0.4706, + "step": 2879 + }, + { + "epoch": 0.17537983740827573, + "grad_norm": 1.0268323816938052, + "learning_rate": 4.990164539306636e-06, + "loss": 0.4361, + "step": 2880 + }, + { + "epoch": 0.1754407331851536, + "grad_norm": 0.9948700954720737, + "learning_rate": 4.990157466810456e-06, + "loss": 0.5515, + "step": 2881 + }, + { + "epoch": 0.17550162896203148, + "grad_norm": 1.0762551068184951, + "learning_rate": 4.9901503917773546e-06, + "loss": 0.4877, + "step": 2882 + }, + { + "epoch": 0.17556252473890935, + "grad_norm": 1.0387403663712056, + "learning_rate": 4.990143314207341e-06, + "loss": 0.4933, + "step": 2883 + }, + { + "epoch": 0.17562342051578722, + "grad_norm": 1.070106313196307, + "learning_rate": 4.99013623410042e-06, + "loss": 0.5191, + "step": 2884 + }, + { + "epoch": 0.1756843162926651, + "grad_norm": 1.1217640132014104, + "learning_rate": 4.990129151456599e-06, + "loss": 0.5125, + "step": 2885 + }, + { + "epoch": 0.17574521206954297, + "grad_norm": 1.0468195489205663, + "learning_rate": 4.990122066275889e-06, + "loss": 0.4342, + "step": 2886 + }, + { + "epoch": 0.17580610784642084, + "grad_norm": 1.0003391476231984, + "learning_rate": 4.990114978558293e-06, + "loss": 0.5701, + "step": 2887 + }, + { + "epoch": 0.1758670036232987, + "grad_norm": 1.0444770143360116, + "learning_rate": 4.990107888303819e-06, + "loss": 0.474, + "step": 2888 + }, + { + "epoch": 0.17592789940017659, + "grad_norm": 1.056410805849003, + "learning_rate": 4.990100795512476e-06, + "loss": 0.4796, + "step": 2889 + }, + { + "epoch": 0.17598879517705446, + "grad_norm": 1.025992427674052, + "learning_rate": 4.990093700184269e-06, + "loss": 0.4695, + "step": 2890 + }, + { + "epoch": 0.17604969095393236, + "grad_norm": 1.046911684341831, + "learning_rate": 4.990086602319206e-06, + "loss": 0.5768, + "step": 2891 + }, + { + "epoch": 0.17611058673081023, + "grad_norm": 1.0832372925122673, + "learning_rate": 4.990079501917296e-06, + "loss": 0.4588, + "step": 2892 + }, + { + "epoch": 0.1761714825076881, + "grad_norm": 0.9744888419801195, + "learning_rate": 4.990072398978544e-06, + "loss": 0.4744, + "step": 2893 + }, + { + "epoch": 0.17623237828456598, + "grad_norm": 1.1463478746474702, + "learning_rate": 4.990065293502958e-06, + "loss": 0.4597, + "step": 2894 + }, + { + "epoch": 0.17629327406144385, + "grad_norm": 0.9861464190763995, + "learning_rate": 4.990058185490545e-06, + "loss": 0.5483, + "step": 2895 + }, + { + "epoch": 0.17635416983832172, + "grad_norm": 1.0233087424189025, + "learning_rate": 4.990051074941314e-06, + "loss": 0.5325, + "step": 2896 + }, + { + "epoch": 0.1764150656151996, + "grad_norm": 1.066183218541031, + "learning_rate": 4.990043961855269e-06, + "loss": 0.514, + "step": 2897 + }, + { + "epoch": 0.17647596139207747, + "grad_norm": 0.967799192669706, + "learning_rate": 4.99003684623242e-06, + "loss": 0.5456, + "step": 2898 + }, + { + "epoch": 0.17653685716895534, + "grad_norm": 1.0598647521457913, + "learning_rate": 4.990029728072772e-06, + "loss": 0.4709, + "step": 2899 + }, + { + "epoch": 0.17659775294583321, + "grad_norm": 1.0555974807079176, + "learning_rate": 4.990022607376335e-06, + "loss": 0.5369, + "step": 2900 + }, + { + "epoch": 0.1766586487227111, + "grad_norm": 1.0183882569502805, + "learning_rate": 4.990015484143113e-06, + "loss": 0.5011, + "step": 2901 + }, + { + "epoch": 0.17671954449958896, + "grad_norm": 0.9646501189490966, + "learning_rate": 4.990008358373116e-06, + "loss": 0.5395, + "step": 2902 + }, + { + "epoch": 0.17678044027646683, + "grad_norm": 1.0311877039417705, + "learning_rate": 4.99000123006635e-06, + "loss": 0.546, + "step": 2903 + }, + { + "epoch": 0.1768413360533447, + "grad_norm": 1.1124749443110156, + "learning_rate": 4.989994099222823e-06, + "loss": 0.4831, + "step": 2904 + }, + { + "epoch": 0.17690223183022258, + "grad_norm": 1.0753524632190885, + "learning_rate": 4.989986965842542e-06, + "loss": 0.4896, + "step": 2905 + }, + { + "epoch": 0.17696312760710045, + "grad_norm": 1.0816577128250198, + "learning_rate": 4.989979829925514e-06, + "loss": 0.4381, + "step": 2906 + }, + { + "epoch": 0.17702402338397832, + "grad_norm": 1.096709533487499, + "learning_rate": 4.9899726914717455e-06, + "loss": 0.4659, + "step": 2907 + }, + { + "epoch": 0.1770849191608562, + "grad_norm": 1.1531183672064396, + "learning_rate": 4.9899655504812446e-06, + "loss": 0.5203, + "step": 2908 + }, + { + "epoch": 0.17714581493773407, + "grad_norm": 1.023014523800475, + "learning_rate": 4.98995840695402e-06, + "loss": 0.4928, + "step": 2909 + }, + { + "epoch": 0.17720671071461194, + "grad_norm": 0.9958430993449372, + "learning_rate": 4.989951260890076e-06, + "loss": 0.5416, + "step": 2910 + }, + { + "epoch": 0.17726760649148982, + "grad_norm": 1.112405234525034, + "learning_rate": 4.989944112289422e-06, + "loss": 0.4172, + "step": 2911 + }, + { + "epoch": 0.1773285022683677, + "grad_norm": 0.9882807506997218, + "learning_rate": 4.9899369611520644e-06, + "loss": 0.5149, + "step": 2912 + }, + { + "epoch": 0.17738939804524556, + "grad_norm": 1.1040986656232328, + "learning_rate": 4.989929807478011e-06, + "loss": 0.4887, + "step": 2913 + }, + { + "epoch": 0.17745029382212343, + "grad_norm": 0.99720261196506, + "learning_rate": 4.98992265126727e-06, + "loss": 0.5077, + "step": 2914 + }, + { + "epoch": 0.1775111895990013, + "grad_norm": 0.9573113483054733, + "learning_rate": 4.9899154925198455e-06, + "loss": 0.5171, + "step": 2915 + }, + { + "epoch": 0.17757208537587918, + "grad_norm": 1.0538335030681827, + "learning_rate": 4.989908331235748e-06, + "loss": 0.5169, + "step": 2916 + }, + { + "epoch": 0.17763298115275705, + "grad_norm": 1.0023611959813634, + "learning_rate": 4.9899011674149834e-06, + "loss": 0.4851, + "step": 2917 + }, + { + "epoch": 0.17769387692963493, + "grad_norm": 1.002942266648752, + "learning_rate": 4.989894001057559e-06, + "loss": 0.5429, + "step": 2918 + }, + { + "epoch": 0.1777547727065128, + "grad_norm": 1.1461247440508093, + "learning_rate": 4.989886832163483e-06, + "loss": 0.4774, + "step": 2919 + }, + { + "epoch": 0.17781566848339067, + "grad_norm": 1.0760741897668422, + "learning_rate": 4.989879660732761e-06, + "loss": 0.5329, + "step": 2920 + }, + { + "epoch": 0.17787656426026854, + "grad_norm": 1.0733303125532931, + "learning_rate": 4.9898724867654025e-06, + "loss": 0.4224, + "step": 2921 + }, + { + "epoch": 0.17793746003714642, + "grad_norm": 1.0414654801019132, + "learning_rate": 4.989865310261414e-06, + "loss": 0.4906, + "step": 2922 + }, + { + "epoch": 0.1779983558140243, + "grad_norm": 1.0825052364630556, + "learning_rate": 4.989858131220801e-06, + "loss": 0.5117, + "step": 2923 + }, + { + "epoch": 0.17805925159090216, + "grad_norm": 1.0035847653927539, + "learning_rate": 4.989850949643573e-06, + "loss": 0.5151, + "step": 2924 + }, + { + "epoch": 0.17812014736778004, + "grad_norm": 0.9852571853059593, + "learning_rate": 4.989843765529737e-06, + "loss": 0.511, + "step": 2925 + }, + { + "epoch": 0.1781810431446579, + "grad_norm": 1.0495445239278571, + "learning_rate": 4.9898365788793e-06, + "loss": 0.572, + "step": 2926 + }, + { + "epoch": 0.17824193892153578, + "grad_norm": 0.9624871331460763, + "learning_rate": 4.989829389692269e-06, + "loss": 0.5805, + "step": 2927 + }, + { + "epoch": 0.17830283469841365, + "grad_norm": 1.0667337430113062, + "learning_rate": 4.989822197968651e-06, + "loss": 0.4918, + "step": 2928 + }, + { + "epoch": 0.17836373047529153, + "grad_norm": 1.1840816938378262, + "learning_rate": 4.9898150037084545e-06, + "loss": 0.5009, + "step": 2929 + }, + { + "epoch": 0.1784246262521694, + "grad_norm": 0.9764981874415312, + "learning_rate": 4.989807806911686e-06, + "loss": 0.541, + "step": 2930 + }, + { + "epoch": 0.17848552202904727, + "grad_norm": 1.0145003442027638, + "learning_rate": 4.9898006075783535e-06, + "loss": 0.4666, + "step": 2931 + }, + { + "epoch": 0.17854641780592517, + "grad_norm": 0.9712197228916821, + "learning_rate": 4.989793405708464e-06, + "loss": 0.5274, + "step": 2932 + }, + { + "epoch": 0.17860731358280305, + "grad_norm": 1.0002849295095022, + "learning_rate": 4.989786201302024e-06, + "loss": 0.5784, + "step": 2933 + }, + { + "epoch": 0.17866820935968092, + "grad_norm": 1.1331426017906456, + "learning_rate": 4.989778994359043e-06, + "loss": 0.5003, + "step": 2934 + }, + { + "epoch": 0.1787291051365588, + "grad_norm": 1.068311680199407, + "learning_rate": 4.989771784879525e-06, + "loss": 0.5417, + "step": 2935 + }, + { + "epoch": 0.17879000091343666, + "grad_norm": 1.0492294993702347, + "learning_rate": 4.9897645728634805e-06, + "loss": 0.5935, + "step": 2936 + }, + { + "epoch": 0.17885089669031454, + "grad_norm": 1.0975065331998533, + "learning_rate": 4.989757358310915e-06, + "loss": 0.4994, + "step": 2937 + }, + { + "epoch": 0.1789117924671924, + "grad_norm": 1.0189743994425222, + "learning_rate": 4.989750141221837e-06, + "loss": 0.5126, + "step": 2938 + }, + { + "epoch": 0.17897268824407028, + "grad_norm": 0.9726091581253123, + "learning_rate": 4.989742921596253e-06, + "loss": 0.547, + "step": 2939 + }, + { + "epoch": 0.17903358402094816, + "grad_norm": 1.0799937061815332, + "learning_rate": 4.98973569943417e-06, + "loss": 0.5029, + "step": 2940 + }, + { + "epoch": 0.17909447979782603, + "grad_norm": 1.0945217259187257, + "learning_rate": 4.989728474735597e-06, + "loss": 0.4988, + "step": 2941 + }, + { + "epoch": 0.1791553755747039, + "grad_norm": 1.0637505051670029, + "learning_rate": 4.989721247500541e-06, + "loss": 0.5343, + "step": 2942 + }, + { + "epoch": 0.17921627135158177, + "grad_norm": 1.049199996667739, + "learning_rate": 4.989714017729007e-06, + "loss": 0.5203, + "step": 2943 + }, + { + "epoch": 0.17927716712845965, + "grad_norm": 1.0530029253241426, + "learning_rate": 4.989706785421006e-06, + "loss": 0.5309, + "step": 2944 + }, + { + "epoch": 0.17933806290533752, + "grad_norm": 1.069780805328936, + "learning_rate": 4.989699550576542e-06, + "loss": 0.4911, + "step": 2945 + }, + { + "epoch": 0.1793989586822154, + "grad_norm": 1.0423387549674876, + "learning_rate": 4.989692313195624e-06, + "loss": 0.4742, + "step": 2946 + }, + { + "epoch": 0.17945985445909327, + "grad_norm": 1.1089285318115027, + "learning_rate": 4.98968507327826e-06, + "loss": 0.4564, + "step": 2947 + }, + { + "epoch": 0.17952075023597114, + "grad_norm": 1.1439799000242916, + "learning_rate": 4.989677830824456e-06, + "loss": 0.463, + "step": 2948 + }, + { + "epoch": 0.179581646012849, + "grad_norm": 0.9169150028090098, + "learning_rate": 4.98967058583422e-06, + "loss": 0.526, + "step": 2949 + }, + { + "epoch": 0.17964254178972688, + "grad_norm": 1.1500012470349439, + "learning_rate": 4.98966333830756e-06, + "loss": 0.5372, + "step": 2950 + }, + { + "epoch": 0.17970343756660476, + "grad_norm": 1.0836098616666747, + "learning_rate": 4.989656088244482e-06, + "loss": 0.5163, + "step": 2951 + }, + { + "epoch": 0.17976433334348263, + "grad_norm": 0.9875413087533366, + "learning_rate": 4.989648835644994e-06, + "loss": 0.4857, + "step": 2952 + }, + { + "epoch": 0.1798252291203605, + "grad_norm": 0.9797632086807937, + "learning_rate": 4.989641580509105e-06, + "loss": 0.4964, + "step": 2953 + }, + { + "epoch": 0.17988612489723838, + "grad_norm": 0.9709888884279402, + "learning_rate": 4.98963432283682e-06, + "loss": 0.5455, + "step": 2954 + }, + { + "epoch": 0.17994702067411625, + "grad_norm": 1.0224105311829343, + "learning_rate": 4.989627062628147e-06, + "loss": 0.4892, + "step": 2955 + }, + { + "epoch": 0.18000791645099412, + "grad_norm": 1.169484171214119, + "learning_rate": 4.989619799883094e-06, + "loss": 0.4715, + "step": 2956 + }, + { + "epoch": 0.180068812227872, + "grad_norm": 1.0334368144036203, + "learning_rate": 4.989612534601667e-06, + "loss": 0.5519, + "step": 2957 + }, + { + "epoch": 0.18012970800474987, + "grad_norm": 1.0432350371129349, + "learning_rate": 4.989605266783876e-06, + "loss": 0.4858, + "step": 2958 + }, + { + "epoch": 0.18019060378162774, + "grad_norm": 0.9883117441431462, + "learning_rate": 4.989597996429727e-06, + "loss": 0.5313, + "step": 2959 + }, + { + "epoch": 0.1802514995585056, + "grad_norm": 1.2112218433893853, + "learning_rate": 4.989590723539226e-06, + "loss": 0.4404, + "step": 2960 + }, + { + "epoch": 0.18031239533538349, + "grad_norm": 1.0024216538099917, + "learning_rate": 4.989583448112383e-06, + "loss": 0.5369, + "step": 2961 + }, + { + "epoch": 0.18037329111226136, + "grad_norm": 0.9441034066706006, + "learning_rate": 4.989576170149204e-06, + "loss": 0.5286, + "step": 2962 + }, + { + "epoch": 0.18043418688913923, + "grad_norm": 1.0002070414035555, + "learning_rate": 4.989568889649696e-06, + "loss": 0.5431, + "step": 2963 + }, + { + "epoch": 0.1804950826660171, + "grad_norm": 1.1270788808570624, + "learning_rate": 4.989561606613867e-06, + "loss": 0.4601, + "step": 2964 + }, + { + "epoch": 0.18055597844289498, + "grad_norm": 1.1537515150038782, + "learning_rate": 4.989554321041724e-06, + "loss": 0.4994, + "step": 2965 + }, + { + "epoch": 0.18061687421977285, + "grad_norm": 1.1153800459869663, + "learning_rate": 4.989547032933276e-06, + "loss": 0.5008, + "step": 2966 + }, + { + "epoch": 0.18067776999665072, + "grad_norm": 1.1208892539850333, + "learning_rate": 4.989539742288529e-06, + "loss": 0.5394, + "step": 2967 + }, + { + "epoch": 0.1807386657735286, + "grad_norm": 1.0490909892304123, + "learning_rate": 4.989532449107491e-06, + "loss": 0.5188, + "step": 2968 + }, + { + "epoch": 0.18079956155040647, + "grad_norm": 0.9796315661679105, + "learning_rate": 4.989525153390168e-06, + "loss": 0.5087, + "step": 2969 + }, + { + "epoch": 0.18086045732728434, + "grad_norm": 0.9727815677274755, + "learning_rate": 4.989517855136568e-06, + "loss": 0.5334, + "step": 2970 + }, + { + "epoch": 0.1809213531041622, + "grad_norm": 1.0617890946594448, + "learning_rate": 4.9895105543467e-06, + "loss": 0.4983, + "step": 2971 + }, + { + "epoch": 0.1809822488810401, + "grad_norm": 1.002861051863644, + "learning_rate": 4.989503251020571e-06, + "loss": 0.4955, + "step": 2972 + }, + { + "epoch": 0.181043144657918, + "grad_norm": 1.0733719489621698, + "learning_rate": 4.989495945158187e-06, + "loss": 0.5463, + "step": 2973 + }, + { + "epoch": 0.18110404043479586, + "grad_norm": 1.0125665028932713, + "learning_rate": 4.989488636759557e-06, + "loss": 0.6167, + "step": 2974 + }, + { + "epoch": 0.18116493621167373, + "grad_norm": 0.9579094349427676, + "learning_rate": 4.989481325824687e-06, + "loss": 0.5418, + "step": 2975 + }, + { + "epoch": 0.1812258319885516, + "grad_norm": 1.0106035987253672, + "learning_rate": 4.989474012353585e-06, + "loss": 0.4979, + "step": 2976 + }, + { + "epoch": 0.18128672776542948, + "grad_norm": 1.154605339076068, + "learning_rate": 4.989466696346259e-06, + "loss": 0.4749, + "step": 2977 + }, + { + "epoch": 0.18134762354230735, + "grad_norm": 1.090846269537333, + "learning_rate": 4.989459377802716e-06, + "loss": 0.5102, + "step": 2978 + }, + { + "epoch": 0.18140851931918522, + "grad_norm": 1.0058843530329094, + "learning_rate": 4.989452056722964e-06, + "loss": 0.5108, + "step": 2979 + }, + { + "epoch": 0.1814694150960631, + "grad_norm": 0.9949185343541124, + "learning_rate": 4.9894447331070095e-06, + "loss": 0.5488, + "step": 2980 + }, + { + "epoch": 0.18153031087294097, + "grad_norm": 0.9954191785928252, + "learning_rate": 4.98943740695486e-06, + "loss": 0.5436, + "step": 2981 + }, + { + "epoch": 0.18159120664981884, + "grad_norm": 1.011321402897738, + "learning_rate": 4.989430078266524e-06, + "loss": 0.4717, + "step": 2982 + }, + { + "epoch": 0.18165210242669672, + "grad_norm": 1.1229632897865807, + "learning_rate": 4.989422747042009e-06, + "loss": 0.5433, + "step": 2983 + }, + { + "epoch": 0.1817129982035746, + "grad_norm": 1.0824794785539829, + "learning_rate": 4.989415413281321e-06, + "loss": 0.5461, + "step": 2984 + }, + { + "epoch": 0.18177389398045246, + "grad_norm": 1.0405269607482674, + "learning_rate": 4.989408076984469e-06, + "loss": 0.4891, + "step": 2985 + }, + { + "epoch": 0.18183478975733033, + "grad_norm": 1.0469529302489466, + "learning_rate": 4.98940073815146e-06, + "loss": 0.5714, + "step": 2986 + }, + { + "epoch": 0.1818956855342082, + "grad_norm": 0.9033409570781581, + "learning_rate": 4.9893933967823e-06, + "loss": 0.4826, + "step": 2987 + }, + { + "epoch": 0.18195658131108608, + "grad_norm": 1.1072790733934696, + "learning_rate": 4.989386052876999e-06, + "loss": 0.5228, + "step": 2988 + }, + { + "epoch": 0.18201747708796395, + "grad_norm": 1.2311264900164969, + "learning_rate": 4.989378706435563e-06, + "loss": 0.4691, + "step": 2989 + }, + { + "epoch": 0.18207837286484183, + "grad_norm": 1.115906797296451, + "learning_rate": 4.9893713574579986e-06, + "loss": 0.4839, + "step": 2990 + }, + { + "epoch": 0.1821392686417197, + "grad_norm": 1.0902770730741886, + "learning_rate": 4.989364005944315e-06, + "loss": 0.5115, + "step": 2991 + }, + { + "epoch": 0.18220016441859757, + "grad_norm": 1.0072942008216492, + "learning_rate": 4.98935665189452e-06, + "loss": 0.5429, + "step": 2992 + }, + { + "epoch": 0.18226106019547544, + "grad_norm": 1.1077611774558933, + "learning_rate": 4.9893492953086194e-06, + "loss": 0.5188, + "step": 2993 + }, + { + "epoch": 0.18232195597235332, + "grad_norm": 1.2007658249977458, + "learning_rate": 4.989341936186621e-06, + "loss": 0.4494, + "step": 2994 + }, + { + "epoch": 0.1823828517492312, + "grad_norm": 1.1277211635797146, + "learning_rate": 4.989334574528533e-06, + "loss": 0.453, + "step": 2995 + }, + { + "epoch": 0.18244374752610906, + "grad_norm": 1.0474446009502603, + "learning_rate": 4.989327210334364e-06, + "loss": 0.4918, + "step": 2996 + }, + { + "epoch": 0.18250464330298694, + "grad_norm": 1.073641760455255, + "learning_rate": 4.989319843604119e-06, + "loss": 0.4617, + "step": 2997 + }, + { + "epoch": 0.1825655390798648, + "grad_norm": 0.9924653507476717, + "learning_rate": 4.989312474337808e-06, + "loss": 0.5645, + "step": 2998 + }, + { + "epoch": 0.18262643485674268, + "grad_norm": 1.11497310186271, + "learning_rate": 4.989305102535436e-06, + "loss": 0.5119, + "step": 2999 + }, + { + "epoch": 0.18268733063362055, + "grad_norm": 1.0789060357063553, + "learning_rate": 4.9892977281970114e-06, + "loss": 0.5191, + "step": 3000 + }, + { + "epoch": 0.18274822641049843, + "grad_norm": 1.0863450875460754, + "learning_rate": 4.9892903513225434e-06, + "loss": 0.4847, + "step": 3001 + }, + { + "epoch": 0.1828091221873763, + "grad_norm": 1.1876218428985053, + "learning_rate": 4.989282971912037e-06, + "loss": 0.4852, + "step": 3002 + }, + { + "epoch": 0.18287001796425417, + "grad_norm": 1.0650240049353972, + "learning_rate": 4.989275589965501e-06, + "loss": 0.5408, + "step": 3003 + }, + { + "epoch": 0.18293091374113205, + "grad_norm": 0.9431314809732075, + "learning_rate": 4.989268205482943e-06, + "loss": 0.557, + "step": 3004 + }, + { + "epoch": 0.18299180951800992, + "grad_norm": 1.0533349605804216, + "learning_rate": 4.9892608184643695e-06, + "loss": 0.5107, + "step": 3005 + }, + { + "epoch": 0.1830527052948878, + "grad_norm": 1.0708328504203484, + "learning_rate": 4.98925342890979e-06, + "loss": 0.5094, + "step": 3006 + }, + { + "epoch": 0.18311360107176566, + "grad_norm": 1.1018991035143113, + "learning_rate": 4.989246036819211e-06, + "loss": 0.512, + "step": 3007 + }, + { + "epoch": 0.18317449684864354, + "grad_norm": 1.1015862906961902, + "learning_rate": 4.989238642192639e-06, + "loss": 0.477, + "step": 3008 + }, + { + "epoch": 0.1832353926255214, + "grad_norm": 1.0224091797985666, + "learning_rate": 4.9892312450300826e-06, + "loss": 0.4809, + "step": 3009 + }, + { + "epoch": 0.18329628840239928, + "grad_norm": 1.0298460027193963, + "learning_rate": 4.98922384533155e-06, + "loss": 0.5135, + "step": 3010 + }, + { + "epoch": 0.18335718417927716, + "grad_norm": 1.0109119530362292, + "learning_rate": 4.989216443097047e-06, + "loss": 0.4788, + "step": 3011 + }, + { + "epoch": 0.18341807995615503, + "grad_norm": 0.9957525961387114, + "learning_rate": 4.9892090383265824e-06, + "loss": 0.5582, + "step": 3012 + }, + { + "epoch": 0.1834789757330329, + "grad_norm": 1.0487742648547973, + "learning_rate": 4.989201631020163e-06, + "loss": 0.5073, + "step": 3013 + }, + { + "epoch": 0.1835398715099108, + "grad_norm": 1.0094848994284027, + "learning_rate": 4.989194221177797e-06, + "loss": 0.4649, + "step": 3014 + }, + { + "epoch": 0.18360076728678867, + "grad_norm": 1.0831544403658697, + "learning_rate": 4.989186808799492e-06, + "loss": 0.5346, + "step": 3015 + }, + { + "epoch": 0.18366166306366655, + "grad_norm": 1.083137002844984, + "learning_rate": 4.989179393885254e-06, + "loss": 0.5279, + "step": 3016 + }, + { + "epoch": 0.18372255884054442, + "grad_norm": 0.9893901741364975, + "learning_rate": 4.989171976435093e-06, + "loss": 0.5521, + "step": 3017 + }, + { + "epoch": 0.1837834546174223, + "grad_norm": 1.0750113109511321, + "learning_rate": 4.989164556449016e-06, + "loss": 0.4733, + "step": 3018 + }, + { + "epoch": 0.18384435039430017, + "grad_norm": 1.132894492116815, + "learning_rate": 4.989157133927028e-06, + "loss": 0.5353, + "step": 3019 + }, + { + "epoch": 0.18390524617117804, + "grad_norm": 1.03379568012454, + "learning_rate": 4.98914970886914e-06, + "loss": 0.4748, + "step": 3020 + }, + { + "epoch": 0.1839661419480559, + "grad_norm": 1.0168347488630651, + "learning_rate": 4.9891422812753575e-06, + "loss": 0.4965, + "step": 3021 + }, + { + "epoch": 0.18402703772493378, + "grad_norm": 1.0081124354040771, + "learning_rate": 4.9891348511456885e-06, + "loss": 0.474, + "step": 3022 + }, + { + "epoch": 0.18408793350181166, + "grad_norm": 1.038522484221617, + "learning_rate": 4.98912741848014e-06, + "loss": 0.4785, + "step": 3023 + }, + { + "epoch": 0.18414882927868953, + "grad_norm": 1.089000941554744, + "learning_rate": 4.989119983278722e-06, + "loss": 0.5265, + "step": 3024 + }, + { + "epoch": 0.1842097250555674, + "grad_norm": 0.9390049464364594, + "learning_rate": 4.989112545541438e-06, + "loss": 0.5093, + "step": 3025 + }, + { + "epoch": 0.18427062083244528, + "grad_norm": 1.0033229653127544, + "learning_rate": 4.989105105268299e-06, + "loss": 0.5488, + "step": 3026 + }, + { + "epoch": 0.18433151660932315, + "grad_norm": 0.9758999528479504, + "learning_rate": 4.9890976624593125e-06, + "loss": 0.5276, + "step": 3027 + }, + { + "epoch": 0.18439241238620102, + "grad_norm": 1.129049411522796, + "learning_rate": 4.989090217114484e-06, + "loss": 0.4616, + "step": 3028 + }, + { + "epoch": 0.1844533081630789, + "grad_norm": 1.080919836895642, + "learning_rate": 4.989082769233822e-06, + "loss": 0.4492, + "step": 3029 + }, + { + "epoch": 0.18451420393995677, + "grad_norm": 1.1170419624393422, + "learning_rate": 4.989075318817335e-06, + "loss": 0.484, + "step": 3030 + }, + { + "epoch": 0.18457509971683464, + "grad_norm": 1.0493133470708642, + "learning_rate": 4.989067865865029e-06, + "loss": 0.5259, + "step": 3031 + }, + { + "epoch": 0.1846359954937125, + "grad_norm": 1.033773250873303, + "learning_rate": 4.989060410376912e-06, + "loss": 0.4768, + "step": 3032 + }, + { + "epoch": 0.18469689127059039, + "grad_norm": 1.0469351240877973, + "learning_rate": 4.989052952352993e-06, + "loss": 0.4933, + "step": 3033 + }, + { + "epoch": 0.18475778704746826, + "grad_norm": 1.022132531180393, + "learning_rate": 4.9890454917932785e-06, + "loss": 0.4437, + "step": 3034 + }, + { + "epoch": 0.18481868282434613, + "grad_norm": 1.0024032958595672, + "learning_rate": 4.989038028697777e-06, + "loss": 0.56, + "step": 3035 + }, + { + "epoch": 0.184879578601224, + "grad_norm": 0.9571343636151619, + "learning_rate": 4.989030563066494e-06, + "loss": 0.5624, + "step": 3036 + }, + { + "epoch": 0.18494047437810188, + "grad_norm": 1.1287206432652925, + "learning_rate": 4.989023094899438e-06, + "loss": 0.4436, + "step": 3037 + }, + { + "epoch": 0.18500137015497975, + "grad_norm": 1.1416085150434292, + "learning_rate": 4.989015624196618e-06, + "loss": 0.5071, + "step": 3038 + }, + { + "epoch": 0.18506226593185762, + "grad_norm": 1.0910592973325148, + "learning_rate": 4.98900815095804e-06, + "loss": 0.46, + "step": 3039 + }, + { + "epoch": 0.1851231617087355, + "grad_norm": 1.0214799300327766, + "learning_rate": 4.989000675183713e-06, + "loss": 0.4776, + "step": 3040 + }, + { + "epoch": 0.18518405748561337, + "grad_norm": 1.0598411235948164, + "learning_rate": 4.988993196873644e-06, + "loss": 0.4375, + "step": 3041 + }, + { + "epoch": 0.18524495326249124, + "grad_norm": 1.171630937016776, + "learning_rate": 4.98898571602784e-06, + "loss": 0.4628, + "step": 3042 + }, + { + "epoch": 0.1853058490393691, + "grad_norm": 1.0330863070229936, + "learning_rate": 4.988978232646309e-06, + "loss": 0.4895, + "step": 3043 + }, + { + "epoch": 0.185366744816247, + "grad_norm": 1.1238594874082646, + "learning_rate": 4.988970746729059e-06, + "loss": 0.5591, + "step": 3044 + }, + { + "epoch": 0.18542764059312486, + "grad_norm": 1.0619893801895828, + "learning_rate": 4.988963258276097e-06, + "loss": 0.548, + "step": 3045 + }, + { + "epoch": 0.18548853637000273, + "grad_norm": 1.0467144166356988, + "learning_rate": 4.988955767287431e-06, + "loss": 0.4769, + "step": 3046 + }, + { + "epoch": 0.1855494321468806, + "grad_norm": 1.106558556639032, + "learning_rate": 4.988948273763068e-06, + "loss": 0.4904, + "step": 3047 + }, + { + "epoch": 0.18561032792375848, + "grad_norm": 1.0999431464760059, + "learning_rate": 4.988940777703018e-06, + "loss": 0.491, + "step": 3048 + }, + { + "epoch": 0.18567122370063635, + "grad_norm": 1.1492163867976506, + "learning_rate": 4.988933279107285e-06, + "loss": 0.479, + "step": 3049 + }, + { + "epoch": 0.18573211947751422, + "grad_norm": 1.1193422700592082, + "learning_rate": 4.988925777975879e-06, + "loss": 0.5256, + "step": 3050 + }, + { + "epoch": 0.1857930152543921, + "grad_norm": 1.1203731149713823, + "learning_rate": 4.9889182743088085e-06, + "loss": 0.4957, + "step": 3051 + }, + { + "epoch": 0.18585391103126997, + "grad_norm": 0.9775794168476334, + "learning_rate": 4.988910768106079e-06, + "loss": 0.5308, + "step": 3052 + }, + { + "epoch": 0.18591480680814784, + "grad_norm": 1.0460863436336703, + "learning_rate": 4.9889032593676986e-06, + "loss": 0.4671, + "step": 3053 + }, + { + "epoch": 0.18597570258502572, + "grad_norm": 1.1170985164460292, + "learning_rate": 4.988895748093675e-06, + "loss": 0.495, + "step": 3054 + }, + { + "epoch": 0.18603659836190362, + "grad_norm": 1.0083468979837484, + "learning_rate": 4.988888234284017e-06, + "loss": 0.486, + "step": 3055 + }, + { + "epoch": 0.1860974941387815, + "grad_norm": 1.045733163478923, + "learning_rate": 4.988880717938731e-06, + "loss": 0.4743, + "step": 3056 + }, + { + "epoch": 0.18615838991565936, + "grad_norm": 1.0457132333939045, + "learning_rate": 4.988873199057826e-06, + "loss": 0.4576, + "step": 3057 + }, + { + "epoch": 0.18621928569253723, + "grad_norm": 0.9407185743200098, + "learning_rate": 4.988865677641307e-06, + "loss": 0.5319, + "step": 3058 + }, + { + "epoch": 0.1862801814694151, + "grad_norm": 1.0415034843698698, + "learning_rate": 4.988858153689184e-06, + "loss": 0.5215, + "step": 3059 + }, + { + "epoch": 0.18634107724629298, + "grad_norm": 1.012897371019878, + "learning_rate": 4.988850627201465e-06, + "loss": 0.5539, + "step": 3060 + }, + { + "epoch": 0.18640197302317085, + "grad_norm": 1.0442162769630015, + "learning_rate": 4.9888430981781554e-06, + "loss": 0.4976, + "step": 3061 + }, + { + "epoch": 0.18646286880004873, + "grad_norm": 0.9793823382720429, + "learning_rate": 4.988835566619266e-06, + "loss": 0.5533, + "step": 3062 + }, + { + "epoch": 0.1865237645769266, + "grad_norm": 1.1192364806004569, + "learning_rate": 4.988828032524801e-06, + "loss": 0.4884, + "step": 3063 + }, + { + "epoch": 0.18658466035380447, + "grad_norm": 1.0169373219089992, + "learning_rate": 4.98882049589477e-06, + "loss": 0.5279, + "step": 3064 + }, + { + "epoch": 0.18664555613068234, + "grad_norm": 1.0506242110997424, + "learning_rate": 4.9888129567291806e-06, + "loss": 0.4899, + "step": 3065 + }, + { + "epoch": 0.18670645190756022, + "grad_norm": 1.0578632474043266, + "learning_rate": 4.9888054150280404e-06, + "loss": 0.5536, + "step": 3066 + }, + { + "epoch": 0.1867673476844381, + "grad_norm": 1.0370030937209052, + "learning_rate": 4.988797870791357e-06, + "loss": 0.4716, + "step": 3067 + }, + { + "epoch": 0.18682824346131596, + "grad_norm": 1.1605503819769973, + "learning_rate": 4.988790324019138e-06, + "loss": 0.5399, + "step": 3068 + }, + { + "epoch": 0.18688913923819384, + "grad_norm": 1.0896355402098847, + "learning_rate": 4.988782774711392e-06, + "loss": 0.5358, + "step": 3069 + }, + { + "epoch": 0.1869500350150717, + "grad_norm": 1.0306848895112284, + "learning_rate": 4.988775222868124e-06, + "loss": 0.5177, + "step": 3070 + }, + { + "epoch": 0.18701093079194958, + "grad_norm": 1.0379050361482678, + "learning_rate": 4.988767668489345e-06, + "loss": 0.5255, + "step": 3071 + }, + { + "epoch": 0.18707182656882745, + "grad_norm": 1.1252450947885337, + "learning_rate": 4.98876011157506e-06, + "loss": 0.4696, + "step": 3072 + }, + { + "epoch": 0.18713272234570533, + "grad_norm": 1.0313780538029533, + "learning_rate": 4.98875255212528e-06, + "loss": 0.4762, + "step": 3073 + }, + { + "epoch": 0.1871936181225832, + "grad_norm": 0.9040937955286812, + "learning_rate": 4.98874499014001e-06, + "loss": 0.5599, + "step": 3074 + }, + { + "epoch": 0.18725451389946107, + "grad_norm": 1.0773671201889772, + "learning_rate": 4.988737425619256e-06, + "loss": 0.5287, + "step": 3075 + }, + { + "epoch": 0.18731540967633895, + "grad_norm": 1.1293866134666655, + "learning_rate": 4.98872985856303e-06, + "loss": 0.5434, + "step": 3076 + }, + { + "epoch": 0.18737630545321682, + "grad_norm": 1.0291282573916793, + "learning_rate": 4.988722288971338e-06, + "loss": 0.5629, + "step": 3077 + }, + { + "epoch": 0.1874372012300947, + "grad_norm": 1.110238853882906, + "learning_rate": 4.9887147168441876e-06, + "loss": 0.4752, + "step": 3078 + }, + { + "epoch": 0.18749809700697256, + "grad_norm": 1.0981776575351196, + "learning_rate": 4.988707142181586e-06, + "loss": 0.4702, + "step": 3079 + }, + { + "epoch": 0.18755899278385044, + "grad_norm": 0.9240959712827479, + "learning_rate": 4.9886995649835415e-06, + "loss": 0.4986, + "step": 3080 + }, + { + "epoch": 0.1876198885607283, + "grad_norm": 1.1673560251932178, + "learning_rate": 4.988691985250061e-06, + "loss": 0.534, + "step": 3081 + }, + { + "epoch": 0.18768078433760618, + "grad_norm": 1.0855676509897174, + "learning_rate": 4.988684402981153e-06, + "loss": 0.4757, + "step": 3082 + }, + { + "epoch": 0.18774168011448406, + "grad_norm": 1.0299972609954193, + "learning_rate": 4.9886768181768255e-06, + "loss": 0.5139, + "step": 3083 + }, + { + "epoch": 0.18780257589136193, + "grad_norm": 1.0445777009179282, + "learning_rate": 4.988669230837085e-06, + "loss": 0.4757, + "step": 3084 + }, + { + "epoch": 0.1878634716682398, + "grad_norm": 1.0583623251895278, + "learning_rate": 4.9886616409619405e-06, + "loss": 0.5244, + "step": 3085 + }, + { + "epoch": 0.18792436744511767, + "grad_norm": 1.0838038439345008, + "learning_rate": 4.988654048551399e-06, + "loss": 0.5063, + "step": 3086 + }, + { + "epoch": 0.18798526322199555, + "grad_norm": 1.0905194937578422, + "learning_rate": 4.988646453605469e-06, + "loss": 0.5512, + "step": 3087 + }, + { + "epoch": 0.18804615899887342, + "grad_norm": 1.0450965462896307, + "learning_rate": 4.9886388561241576e-06, + "loss": 0.5263, + "step": 3088 + }, + { + "epoch": 0.1881070547757513, + "grad_norm": 1.0723681318009226, + "learning_rate": 4.988631256107473e-06, + "loss": 0.5751, + "step": 3089 + }, + { + "epoch": 0.18816795055262917, + "grad_norm": 1.018079991461013, + "learning_rate": 4.9886236535554225e-06, + "loss": 0.4786, + "step": 3090 + }, + { + "epoch": 0.18822884632950704, + "grad_norm": 1.163687511763957, + "learning_rate": 4.988616048468013e-06, + "loss": 0.4306, + "step": 3091 + }, + { + "epoch": 0.1882897421063849, + "grad_norm": 0.9781867703195624, + "learning_rate": 4.988608440845254e-06, + "loss": 0.5342, + "step": 3092 + }, + { + "epoch": 0.18835063788326278, + "grad_norm": 1.062754565652498, + "learning_rate": 4.988600830687153e-06, + "loss": 0.5541, + "step": 3093 + }, + { + "epoch": 0.18841153366014066, + "grad_norm": 1.0349173184405176, + "learning_rate": 4.988593217993716e-06, + "loss": 0.5653, + "step": 3094 + }, + { + "epoch": 0.18847242943701853, + "grad_norm": 1.0226786913123496, + "learning_rate": 4.988585602764953e-06, + "loss": 0.5388, + "step": 3095 + }, + { + "epoch": 0.18853332521389643, + "grad_norm": 0.9925608451651292, + "learning_rate": 4.98857798500087e-06, + "loss": 0.5465, + "step": 3096 + }, + { + "epoch": 0.1885942209907743, + "grad_norm": 1.0194618590132898, + "learning_rate": 4.988570364701476e-06, + "loss": 0.5374, + "step": 3097 + }, + { + "epoch": 0.18865511676765218, + "grad_norm": 1.0097926064579354, + "learning_rate": 4.9885627418667775e-06, + "loss": 0.513, + "step": 3098 + }, + { + "epoch": 0.18871601254453005, + "grad_norm": 1.0437458765460037, + "learning_rate": 4.988555116496784e-06, + "loss": 0.5161, + "step": 3099 + }, + { + "epoch": 0.18877690832140792, + "grad_norm": 1.0781017165729385, + "learning_rate": 4.988547488591502e-06, + "loss": 0.4861, + "step": 3100 + }, + { + "epoch": 0.1888378040982858, + "grad_norm": 1.0800027826757872, + "learning_rate": 4.988539858150939e-06, + "loss": 0.4468, + "step": 3101 + }, + { + "epoch": 0.18889869987516367, + "grad_norm": 0.9314685023814835, + "learning_rate": 4.988532225175103e-06, + "loss": 0.566, + "step": 3102 + }, + { + "epoch": 0.18895959565204154, + "grad_norm": 1.062574987024794, + "learning_rate": 4.9885245896640034e-06, + "loss": 0.4794, + "step": 3103 + }, + { + "epoch": 0.1890204914289194, + "grad_norm": 0.9310620848859642, + "learning_rate": 4.988516951617645e-06, + "loss": 0.6073, + "step": 3104 + }, + { + "epoch": 0.18908138720579729, + "grad_norm": 1.0303269523485012, + "learning_rate": 4.98850931103604e-06, + "loss": 0.4856, + "step": 3105 + }, + { + "epoch": 0.18914228298267516, + "grad_norm": 1.1065970629230184, + "learning_rate": 4.988501667919191e-06, + "loss": 0.4813, + "step": 3106 + }, + { + "epoch": 0.18920317875955303, + "grad_norm": 1.047502282902639, + "learning_rate": 4.98849402226711e-06, + "loss": 0.5464, + "step": 3107 + }, + { + "epoch": 0.1892640745364309, + "grad_norm": 1.157059487833186, + "learning_rate": 4.988486374079802e-06, + "loss": 0.4938, + "step": 3108 + }, + { + "epoch": 0.18932497031330878, + "grad_norm": 1.0013730434097707, + "learning_rate": 4.988478723357275e-06, + "loss": 0.4469, + "step": 3109 + }, + { + "epoch": 0.18938586609018665, + "grad_norm": 1.1150099675759806, + "learning_rate": 4.988471070099538e-06, + "loss": 0.4761, + "step": 3110 + }, + { + "epoch": 0.18944676186706452, + "grad_norm": 1.1314015736547756, + "learning_rate": 4.9884634143066e-06, + "loss": 0.4904, + "step": 3111 + }, + { + "epoch": 0.1895076576439424, + "grad_norm": 1.0373527964458378, + "learning_rate": 4.988455755978466e-06, + "loss": 0.5333, + "step": 3112 + }, + { + "epoch": 0.18956855342082027, + "grad_norm": 1.1539633642853337, + "learning_rate": 4.988448095115145e-06, + "loss": 0.4647, + "step": 3113 + }, + { + "epoch": 0.18962944919769814, + "grad_norm": 0.9830996599738232, + "learning_rate": 4.988440431716644e-06, + "loss": 0.5837, + "step": 3114 + }, + { + "epoch": 0.18969034497457601, + "grad_norm": 0.9510612537312464, + "learning_rate": 4.988432765782973e-06, + "loss": 0.5458, + "step": 3115 + }, + { + "epoch": 0.1897512407514539, + "grad_norm": 1.1034487721754829, + "learning_rate": 4.9884250973141385e-06, + "loss": 0.5219, + "step": 3116 + }, + { + "epoch": 0.18981213652833176, + "grad_norm": 1.0320066124665832, + "learning_rate": 4.988417426310148e-06, + "loss": 0.559, + "step": 3117 + }, + { + "epoch": 0.18987303230520963, + "grad_norm": 1.0497695053984155, + "learning_rate": 4.988409752771009e-06, + "loss": 0.5278, + "step": 3118 + }, + { + "epoch": 0.1899339280820875, + "grad_norm": 1.083058047420587, + "learning_rate": 4.98840207669673e-06, + "loss": 0.4975, + "step": 3119 + }, + { + "epoch": 0.18999482385896538, + "grad_norm": 1.0587877532811245, + "learning_rate": 4.9883943980873195e-06, + "loss": 0.5286, + "step": 3120 + }, + { + "epoch": 0.19005571963584325, + "grad_norm": 1.1183060151104027, + "learning_rate": 4.988386716942783e-06, + "loss": 0.4597, + "step": 3121 + }, + { + "epoch": 0.19011661541272112, + "grad_norm": 1.0190735284735564, + "learning_rate": 4.988379033263131e-06, + "loss": 0.4644, + "step": 3122 + }, + { + "epoch": 0.190177511189599, + "grad_norm": 1.0653990685650443, + "learning_rate": 4.9883713470483706e-06, + "loss": 0.4993, + "step": 3123 + }, + { + "epoch": 0.19023840696647687, + "grad_norm": 1.079521542958363, + "learning_rate": 4.9883636582985084e-06, + "loss": 0.4744, + "step": 3124 + }, + { + "epoch": 0.19029930274335474, + "grad_norm": 1.0747730145974308, + "learning_rate": 4.988355967013553e-06, + "loss": 0.5, + "step": 3125 + }, + { + "epoch": 0.19036019852023262, + "grad_norm": 1.1653140083068911, + "learning_rate": 4.9883482731935126e-06, + "loss": 0.4934, + "step": 3126 + }, + { + "epoch": 0.1904210942971105, + "grad_norm": 1.2360558050270674, + "learning_rate": 4.988340576838394e-06, + "loss": 0.4598, + "step": 3127 + }, + { + "epoch": 0.19048199007398836, + "grad_norm": 1.0610528544100235, + "learning_rate": 4.988332877948207e-06, + "loss": 0.4588, + "step": 3128 + }, + { + "epoch": 0.19054288585086623, + "grad_norm": 0.9975550317777437, + "learning_rate": 4.988325176522957e-06, + "loss": 0.5461, + "step": 3129 + }, + { + "epoch": 0.1906037816277441, + "grad_norm": 1.0262150676831667, + "learning_rate": 4.9883174725626535e-06, + "loss": 0.5511, + "step": 3130 + }, + { + "epoch": 0.19066467740462198, + "grad_norm": 1.1730452700525547, + "learning_rate": 4.988309766067304e-06, + "loss": 0.4806, + "step": 3131 + }, + { + "epoch": 0.19072557318149985, + "grad_norm": 0.9802355486311569, + "learning_rate": 4.9883020570369166e-06, + "loss": 0.543, + "step": 3132 + }, + { + "epoch": 0.19078646895837773, + "grad_norm": 1.1761761779364406, + "learning_rate": 4.988294345471499e-06, + "loss": 0.5024, + "step": 3133 + }, + { + "epoch": 0.1908473647352556, + "grad_norm": 1.029415293917612, + "learning_rate": 4.988286631371058e-06, + "loss": 0.5242, + "step": 3134 + }, + { + "epoch": 0.19090826051213347, + "grad_norm": 1.036146986752756, + "learning_rate": 4.988278914735603e-06, + "loss": 0.4872, + "step": 3135 + }, + { + "epoch": 0.19096915628901134, + "grad_norm": 1.078030684815678, + "learning_rate": 4.9882711955651395e-06, + "loss": 0.4844, + "step": 3136 + }, + { + "epoch": 0.19103005206588924, + "grad_norm": 1.1585890849552485, + "learning_rate": 4.988263473859679e-06, + "loss": 0.4719, + "step": 3137 + }, + { + "epoch": 0.19109094784276712, + "grad_norm": 1.053584530256567, + "learning_rate": 4.9882557496192265e-06, + "loss": 0.5573, + "step": 3138 + }, + { + "epoch": 0.191151843619645, + "grad_norm": 1.0411826106098117, + "learning_rate": 4.988248022843791e-06, + "loss": 0.5103, + "step": 3139 + }, + { + "epoch": 0.19121273939652286, + "grad_norm": 1.0147613710147703, + "learning_rate": 4.98824029353338e-06, + "loss": 0.5632, + "step": 3140 + }, + { + "epoch": 0.19127363517340074, + "grad_norm": 1.0863178672672789, + "learning_rate": 4.9882325616880015e-06, + "loss": 0.559, + "step": 3141 + }, + { + "epoch": 0.1913345309502786, + "grad_norm": 1.0987538450172691, + "learning_rate": 4.988224827307664e-06, + "loss": 0.5052, + "step": 3142 + }, + { + "epoch": 0.19139542672715648, + "grad_norm": 1.0325870974989728, + "learning_rate": 4.988217090392374e-06, + "loss": 0.5316, + "step": 3143 + }, + { + "epoch": 0.19145632250403435, + "grad_norm": 1.0436837763778004, + "learning_rate": 4.9882093509421405e-06, + "loss": 0.5006, + "step": 3144 + }, + { + "epoch": 0.19151721828091223, + "grad_norm": 1.1022191327661157, + "learning_rate": 4.988201608956971e-06, + "loss": 0.4897, + "step": 3145 + }, + { + "epoch": 0.1915781140577901, + "grad_norm": 1.0303785080004753, + "learning_rate": 4.988193864436873e-06, + "loss": 0.505, + "step": 3146 + }, + { + "epoch": 0.19163900983466797, + "grad_norm": 1.1085962048382996, + "learning_rate": 4.9881861173818555e-06, + "loss": 0.5175, + "step": 3147 + }, + { + "epoch": 0.19169990561154585, + "grad_norm": 1.0359501748092508, + "learning_rate": 4.988178367791926e-06, + "loss": 0.4706, + "step": 3148 + }, + { + "epoch": 0.19176080138842372, + "grad_norm": 0.9227329846658368, + "learning_rate": 4.9881706156670916e-06, + "loss": 0.513, + "step": 3149 + }, + { + "epoch": 0.1918216971653016, + "grad_norm": 0.9825607415512168, + "learning_rate": 4.9881628610073605e-06, + "loss": 0.5088, + "step": 3150 + }, + { + "epoch": 0.19188259294217946, + "grad_norm": 1.0927358120671489, + "learning_rate": 4.988155103812741e-06, + "loss": 0.5016, + "step": 3151 + }, + { + "epoch": 0.19194348871905734, + "grad_norm": 1.0993913066165892, + "learning_rate": 4.988147344083241e-06, + "loss": 0.4893, + "step": 3152 + }, + { + "epoch": 0.1920043844959352, + "grad_norm": 1.0646958458723192, + "learning_rate": 4.9881395818188675e-06, + "loss": 0.4796, + "step": 3153 + }, + { + "epoch": 0.19206528027281308, + "grad_norm": 1.038909682278899, + "learning_rate": 4.98813181701963e-06, + "loss": 0.4707, + "step": 3154 + }, + { + "epoch": 0.19212617604969096, + "grad_norm": 1.105549053378795, + "learning_rate": 4.988124049685535e-06, + "loss": 0.4841, + "step": 3155 + }, + { + "epoch": 0.19218707182656883, + "grad_norm": 1.025482810721373, + "learning_rate": 4.9881162798165914e-06, + "loss": 0.4628, + "step": 3156 + }, + { + "epoch": 0.1922479676034467, + "grad_norm": 1.0514034982120968, + "learning_rate": 4.988108507412807e-06, + "loss": 0.5354, + "step": 3157 + }, + { + "epoch": 0.19230886338032457, + "grad_norm": 1.0588941804950456, + "learning_rate": 4.988100732474188e-06, + "loss": 0.5514, + "step": 3158 + }, + { + "epoch": 0.19236975915720245, + "grad_norm": 1.0023212688980707, + "learning_rate": 4.988092955000744e-06, + "loss": 0.5722, + "step": 3159 + }, + { + "epoch": 0.19243065493408032, + "grad_norm": 1.0677189036951786, + "learning_rate": 4.988085174992483e-06, + "loss": 0.5503, + "step": 3160 + }, + { + "epoch": 0.1924915507109582, + "grad_norm": 1.1897781092466693, + "learning_rate": 4.988077392449413e-06, + "loss": 0.5152, + "step": 3161 + }, + { + "epoch": 0.19255244648783607, + "grad_norm": 1.0736021221191296, + "learning_rate": 4.988069607371542e-06, + "loss": 0.5034, + "step": 3162 + }, + { + "epoch": 0.19261334226471394, + "grad_norm": 1.023592846427027, + "learning_rate": 4.988061819758876e-06, + "loss": 0.5173, + "step": 3163 + }, + { + "epoch": 0.1926742380415918, + "grad_norm": 1.1918268470612021, + "learning_rate": 4.9880540296114245e-06, + "loss": 0.491, + "step": 3164 + }, + { + "epoch": 0.19273513381846968, + "grad_norm": 1.0811542857785914, + "learning_rate": 4.988046236929196e-06, + "loss": 0.4888, + "step": 3165 + }, + { + "epoch": 0.19279602959534756, + "grad_norm": 1.0371334081809283, + "learning_rate": 4.9880384417121975e-06, + "loss": 0.4849, + "step": 3166 + }, + { + "epoch": 0.19285692537222543, + "grad_norm": 1.0620798484467922, + "learning_rate": 4.988030643960437e-06, + "loss": 0.484, + "step": 3167 + }, + { + "epoch": 0.1929178211491033, + "grad_norm": 1.020650753693526, + "learning_rate": 4.9880228436739234e-06, + "loss": 0.4841, + "step": 3168 + }, + { + "epoch": 0.19297871692598118, + "grad_norm": 0.9957039716568098, + "learning_rate": 4.988015040852663e-06, + "loss": 0.5475, + "step": 3169 + }, + { + "epoch": 0.19303961270285905, + "grad_norm": 1.1303408708347111, + "learning_rate": 4.988007235496665e-06, + "loss": 0.5083, + "step": 3170 + }, + { + "epoch": 0.19310050847973692, + "grad_norm": 1.0073877406230685, + "learning_rate": 4.987999427605936e-06, + "loss": 0.4947, + "step": 3171 + }, + { + "epoch": 0.1931614042566148, + "grad_norm": 1.1028715398145161, + "learning_rate": 4.987991617180486e-06, + "loss": 0.5387, + "step": 3172 + }, + { + "epoch": 0.19322230003349267, + "grad_norm": 1.1315143459191934, + "learning_rate": 4.987983804220321e-06, + "loss": 0.5107, + "step": 3173 + }, + { + "epoch": 0.19328319581037054, + "grad_norm": 1.0332452839090394, + "learning_rate": 4.98797598872545e-06, + "loss": 0.5467, + "step": 3174 + }, + { + "epoch": 0.1933440915872484, + "grad_norm": 1.054755810374611, + "learning_rate": 4.9879681706958815e-06, + "loss": 0.4783, + "step": 3175 + }, + { + "epoch": 0.19340498736412629, + "grad_norm": 1.1216639429077573, + "learning_rate": 4.987960350131622e-06, + "loss": 0.4728, + "step": 3176 + }, + { + "epoch": 0.19346588314100416, + "grad_norm": 1.0781769963575571, + "learning_rate": 4.987952527032681e-06, + "loss": 0.5095, + "step": 3177 + }, + { + "epoch": 0.19352677891788206, + "grad_norm": 1.011018969240438, + "learning_rate": 4.987944701399065e-06, + "loss": 0.4925, + "step": 3178 + }, + { + "epoch": 0.19358767469475993, + "grad_norm": 1.0065626894473172, + "learning_rate": 4.987936873230783e-06, + "loss": 0.507, + "step": 3179 + }, + { + "epoch": 0.1936485704716378, + "grad_norm": 1.1183723990474843, + "learning_rate": 4.987929042527842e-06, + "loss": 0.4946, + "step": 3180 + }, + { + "epoch": 0.19370946624851568, + "grad_norm": 1.0779075195242143, + "learning_rate": 4.987921209290251e-06, + "loss": 0.4723, + "step": 3181 + }, + { + "epoch": 0.19377036202539355, + "grad_norm": 1.0714898262510395, + "learning_rate": 4.9879133735180185e-06, + "loss": 0.4979, + "step": 3182 + }, + { + "epoch": 0.19383125780227142, + "grad_norm": 1.019113870456135, + "learning_rate": 4.9879055352111505e-06, + "loss": 0.4565, + "step": 3183 + }, + { + "epoch": 0.1938921535791493, + "grad_norm": 1.0106802990403352, + "learning_rate": 4.987897694369657e-06, + "loss": 0.4722, + "step": 3184 + }, + { + "epoch": 0.19395304935602717, + "grad_norm": 1.1867391149736086, + "learning_rate": 4.987889850993544e-06, + "loss": 0.53, + "step": 3185 + }, + { + "epoch": 0.19401394513290504, + "grad_norm": 1.179822480692507, + "learning_rate": 4.987882005082821e-06, + "loss": 0.5782, + "step": 3186 + }, + { + "epoch": 0.19407484090978291, + "grad_norm": 1.113594489933749, + "learning_rate": 4.987874156637496e-06, + "loss": 0.5135, + "step": 3187 + }, + { + "epoch": 0.1941357366866608, + "grad_norm": 1.0123597016044323, + "learning_rate": 4.987866305657576e-06, + "loss": 0.5036, + "step": 3188 + }, + { + "epoch": 0.19419663246353866, + "grad_norm": 0.9990215332766489, + "learning_rate": 4.9878584521430696e-06, + "loss": 0.5208, + "step": 3189 + }, + { + "epoch": 0.19425752824041653, + "grad_norm": 1.0188402684799958, + "learning_rate": 4.987850596093985e-06, + "loss": 0.4787, + "step": 3190 + }, + { + "epoch": 0.1943184240172944, + "grad_norm": 1.0556735327733713, + "learning_rate": 4.987842737510329e-06, + "loss": 0.5044, + "step": 3191 + }, + { + "epoch": 0.19437931979417228, + "grad_norm": 0.9917501303232851, + "learning_rate": 4.987834876392112e-06, + "loss": 0.5365, + "step": 3192 + }, + { + "epoch": 0.19444021557105015, + "grad_norm": 1.1099584227472574, + "learning_rate": 4.987827012739339e-06, + "loss": 0.5065, + "step": 3193 + }, + { + "epoch": 0.19450111134792802, + "grad_norm": 1.1198590335615046, + "learning_rate": 4.987819146552021e-06, + "loss": 0.5225, + "step": 3194 + }, + { + "epoch": 0.1945620071248059, + "grad_norm": 1.0709168771645194, + "learning_rate": 4.9878112778301635e-06, + "loss": 0.5158, + "step": 3195 + }, + { + "epoch": 0.19462290290168377, + "grad_norm": 1.1093254576762202, + "learning_rate": 4.9878034065737765e-06, + "loss": 0.5036, + "step": 3196 + }, + { + "epoch": 0.19468379867856164, + "grad_norm": 1.0809266317558084, + "learning_rate": 4.987795532782866e-06, + "loss": 0.449, + "step": 3197 + }, + { + "epoch": 0.19474469445543952, + "grad_norm": 1.127964626464581, + "learning_rate": 4.987787656457443e-06, + "loss": 0.5623, + "step": 3198 + }, + { + "epoch": 0.1948055902323174, + "grad_norm": 1.0776356894319874, + "learning_rate": 4.987779777597511e-06, + "loss": 0.53, + "step": 3199 + }, + { + "epoch": 0.19486648600919526, + "grad_norm": 1.0444538601826825, + "learning_rate": 4.987771896203083e-06, + "loss": 0.5808, + "step": 3200 + }, + { + "epoch": 0.19492738178607313, + "grad_norm": 1.0282497304504314, + "learning_rate": 4.987764012274164e-06, + "loss": 0.5859, + "step": 3201 + }, + { + "epoch": 0.194988277562951, + "grad_norm": 0.9318011969766758, + "learning_rate": 4.987756125810762e-06, + "loss": 0.6012, + "step": 3202 + }, + { + "epoch": 0.19504917333982888, + "grad_norm": 0.9649537481630093, + "learning_rate": 4.987748236812886e-06, + "loss": 0.5001, + "step": 3203 + }, + { + "epoch": 0.19511006911670675, + "grad_norm": 1.0468080889407116, + "learning_rate": 4.987740345280545e-06, + "loss": 0.4992, + "step": 3204 + }, + { + "epoch": 0.19517096489358463, + "grad_norm": 0.9910361032193095, + "learning_rate": 4.987732451213745e-06, + "loss": 0.5306, + "step": 3205 + }, + { + "epoch": 0.1952318606704625, + "grad_norm": 1.0470719515921814, + "learning_rate": 4.987724554612494e-06, + "loss": 0.5152, + "step": 3206 + }, + { + "epoch": 0.19529275644734037, + "grad_norm": 1.024158393063088, + "learning_rate": 4.987716655476802e-06, + "loss": 0.5597, + "step": 3207 + }, + { + "epoch": 0.19535365222421824, + "grad_norm": 1.0758850720680124, + "learning_rate": 4.987708753806676e-06, + "loss": 0.5469, + "step": 3208 + }, + { + "epoch": 0.19541454800109612, + "grad_norm": 1.0604326777606379, + "learning_rate": 4.987700849602124e-06, + "loss": 0.4863, + "step": 3209 + }, + { + "epoch": 0.195475443777974, + "grad_norm": 1.189213039268939, + "learning_rate": 4.987692942863153e-06, + "loss": 0.5077, + "step": 3210 + }, + { + "epoch": 0.19553633955485186, + "grad_norm": 1.1369208137103608, + "learning_rate": 4.9876850335897746e-06, + "loss": 0.5088, + "step": 3211 + }, + { + "epoch": 0.19559723533172974, + "grad_norm": 0.9957412994886607, + "learning_rate": 4.987677121781993e-06, + "loss": 0.5545, + "step": 3212 + }, + { + "epoch": 0.1956581311086076, + "grad_norm": 1.0648796716483842, + "learning_rate": 4.987669207439817e-06, + "loss": 0.5798, + "step": 3213 + }, + { + "epoch": 0.19571902688548548, + "grad_norm": 1.0579624207967906, + "learning_rate": 4.987661290563256e-06, + "loss": 0.5222, + "step": 3214 + }, + { + "epoch": 0.19577992266236335, + "grad_norm": 1.098008610553635, + "learning_rate": 4.987653371152318e-06, + "loss": 0.4694, + "step": 3215 + }, + { + "epoch": 0.19584081843924123, + "grad_norm": 0.9915189296561396, + "learning_rate": 4.987645449207009e-06, + "loss": 0.5393, + "step": 3216 + }, + { + "epoch": 0.1959017142161191, + "grad_norm": 0.9755372838196928, + "learning_rate": 4.98763752472734e-06, + "loss": 0.5299, + "step": 3217 + }, + { + "epoch": 0.19596260999299697, + "grad_norm": 0.9608275593306992, + "learning_rate": 4.987629597713317e-06, + "loss": 0.4954, + "step": 3218 + }, + { + "epoch": 0.19602350576987487, + "grad_norm": 1.0009387703745733, + "learning_rate": 4.987621668164948e-06, + "loss": 0.551, + "step": 3219 + }, + { + "epoch": 0.19608440154675275, + "grad_norm": 1.004700234895283, + "learning_rate": 4.987613736082243e-06, + "loss": 0.515, + "step": 3220 + }, + { + "epoch": 0.19614529732363062, + "grad_norm": 1.0240680083799376, + "learning_rate": 4.987605801465208e-06, + "loss": 0.5023, + "step": 3221 + }, + { + "epoch": 0.1962061931005085, + "grad_norm": 1.1353277963427204, + "learning_rate": 4.987597864313852e-06, + "loss": 0.4733, + "step": 3222 + }, + { + "epoch": 0.19626708887738636, + "grad_norm": 1.1323368548481039, + "learning_rate": 4.987589924628183e-06, + "loss": 0.4665, + "step": 3223 + }, + { + "epoch": 0.19632798465426424, + "grad_norm": 1.0083857958056308, + "learning_rate": 4.987581982408209e-06, + "loss": 0.4698, + "step": 3224 + }, + { + "epoch": 0.1963888804311421, + "grad_norm": 1.101876561519886, + "learning_rate": 4.987574037653939e-06, + "loss": 0.5201, + "step": 3225 + }, + { + "epoch": 0.19644977620801998, + "grad_norm": 1.0385357724438522, + "learning_rate": 4.98756609036538e-06, + "loss": 0.4949, + "step": 3226 + }, + { + "epoch": 0.19651067198489786, + "grad_norm": 1.0989170185838546, + "learning_rate": 4.98755814054254e-06, + "loss": 0.4988, + "step": 3227 + }, + { + "epoch": 0.19657156776177573, + "grad_norm": 1.0997438874409085, + "learning_rate": 4.987550188185428e-06, + "loss": 0.4942, + "step": 3228 + }, + { + "epoch": 0.1966324635386536, + "grad_norm": 1.0221703705411995, + "learning_rate": 4.987542233294051e-06, + "loss": 0.4727, + "step": 3229 + }, + { + "epoch": 0.19669335931553147, + "grad_norm": 1.1484923993384024, + "learning_rate": 4.987534275868418e-06, + "loss": 0.5414, + "step": 3230 + }, + { + "epoch": 0.19675425509240935, + "grad_norm": 1.1288849628210769, + "learning_rate": 4.987526315908536e-06, + "loss": 0.4611, + "step": 3231 + }, + { + "epoch": 0.19681515086928722, + "grad_norm": 1.049018543826783, + "learning_rate": 4.987518353414415e-06, + "loss": 0.5222, + "step": 3232 + }, + { + "epoch": 0.1968760466461651, + "grad_norm": 1.0882157825362022, + "learning_rate": 4.9875103883860625e-06, + "loss": 0.3788, + "step": 3233 + }, + { + "epoch": 0.19693694242304297, + "grad_norm": 1.03730123452156, + "learning_rate": 4.9875024208234846e-06, + "loss": 0.4871, + "step": 3234 + }, + { + "epoch": 0.19699783819992084, + "grad_norm": 1.1340140823314577, + "learning_rate": 4.987494450726692e-06, + "loss": 0.4752, + "step": 3235 + }, + { + "epoch": 0.1970587339767987, + "grad_norm": 1.0889020381029029, + "learning_rate": 4.987486478095691e-06, + "loss": 0.4133, + "step": 3236 + }, + { + "epoch": 0.19711962975367658, + "grad_norm": 1.0835730306400124, + "learning_rate": 4.987478502930491e-06, + "loss": 0.5124, + "step": 3237 + }, + { + "epoch": 0.19718052553055446, + "grad_norm": 1.011117593475404, + "learning_rate": 4.987470525231099e-06, + "loss": 0.5564, + "step": 3238 + }, + { + "epoch": 0.19724142130743233, + "grad_norm": 1.051143300157674, + "learning_rate": 4.987462544997525e-06, + "loss": 0.5737, + "step": 3239 + }, + { + "epoch": 0.1973023170843102, + "grad_norm": 1.1615105684506082, + "learning_rate": 4.987454562229776e-06, + "loss": 0.4485, + "step": 3240 + }, + { + "epoch": 0.19736321286118808, + "grad_norm": 0.9894589031397166, + "learning_rate": 4.987446576927858e-06, + "loss": 0.51, + "step": 3241 + }, + { + "epoch": 0.19742410863806595, + "grad_norm": 1.0264900609834389, + "learning_rate": 4.987438589091783e-06, + "loss": 0.4957, + "step": 3242 + }, + { + "epoch": 0.19748500441494382, + "grad_norm": 1.08499568341251, + "learning_rate": 4.987430598721556e-06, + "loss": 0.5249, + "step": 3243 + }, + { + "epoch": 0.1975459001918217, + "grad_norm": 1.0583340324508366, + "learning_rate": 4.987422605817187e-06, + "loss": 0.6071, + "step": 3244 + }, + { + "epoch": 0.19760679596869957, + "grad_norm": 0.9490820205758849, + "learning_rate": 4.987414610378683e-06, + "loss": 0.5929, + "step": 3245 + }, + { + "epoch": 0.19766769174557744, + "grad_norm": 0.9525404416991204, + "learning_rate": 4.987406612406054e-06, + "loss": 0.5539, + "step": 3246 + }, + { + "epoch": 0.1977285875224553, + "grad_norm": 1.12214901266567, + "learning_rate": 4.987398611899306e-06, + "loss": 0.5092, + "step": 3247 + }, + { + "epoch": 0.19778948329933319, + "grad_norm": 1.1169872755403059, + "learning_rate": 4.987390608858448e-06, + "loss": 0.4791, + "step": 3248 + }, + { + "epoch": 0.19785037907621106, + "grad_norm": 1.0713775062892696, + "learning_rate": 4.987382603283488e-06, + "loss": 0.4942, + "step": 3249 + }, + { + "epoch": 0.19791127485308893, + "grad_norm": 0.9445533732674111, + "learning_rate": 4.987374595174435e-06, + "loss": 0.5835, + "step": 3250 + }, + { + "epoch": 0.1979721706299668, + "grad_norm": 1.0836758146828003, + "learning_rate": 4.987366584531296e-06, + "loss": 0.5126, + "step": 3251 + }, + { + "epoch": 0.19803306640684468, + "grad_norm": 1.0338484031463975, + "learning_rate": 4.98735857135408e-06, + "loss": 0.5069, + "step": 3252 + }, + { + "epoch": 0.19809396218372255, + "grad_norm": 1.0338727038890463, + "learning_rate": 4.987350555642794e-06, + "loss": 0.5635, + "step": 3253 + }, + { + "epoch": 0.19815485796060042, + "grad_norm": 1.0000815441163642, + "learning_rate": 4.9873425373974475e-06, + "loss": 0.6111, + "step": 3254 + }, + { + "epoch": 0.1982157537374783, + "grad_norm": 1.0655956089027716, + "learning_rate": 4.9873345166180485e-06, + "loss": 0.4633, + "step": 3255 + }, + { + "epoch": 0.19827664951435617, + "grad_norm": 0.9905773164134161, + "learning_rate": 4.987326493304605e-06, + "loss": 0.53, + "step": 3256 + }, + { + "epoch": 0.19833754529123404, + "grad_norm": 1.0420324221297466, + "learning_rate": 4.987318467457124e-06, + "loss": 0.4682, + "step": 3257 + }, + { + "epoch": 0.1983984410681119, + "grad_norm": 1.0889203880835376, + "learning_rate": 4.987310439075615e-06, + "loss": 0.4741, + "step": 3258 + }, + { + "epoch": 0.1984593368449898, + "grad_norm": 1.1308007608181079, + "learning_rate": 4.987302408160086e-06, + "loss": 0.5388, + "step": 3259 + }, + { + "epoch": 0.1985202326218677, + "grad_norm": 1.039987577738257, + "learning_rate": 4.987294374710544e-06, + "loss": 0.4757, + "step": 3260 + }, + { + "epoch": 0.19858112839874556, + "grad_norm": 1.040228995440859, + "learning_rate": 4.987286338727e-06, + "loss": 0.505, + "step": 3261 + }, + { + "epoch": 0.19864202417562343, + "grad_norm": 0.9541377439078932, + "learning_rate": 4.9872783002094595e-06, + "loss": 0.5473, + "step": 3262 + }, + { + "epoch": 0.1987029199525013, + "grad_norm": 1.1159772968677604, + "learning_rate": 4.987270259157931e-06, + "loss": 0.4967, + "step": 3263 + }, + { + "epoch": 0.19876381572937918, + "grad_norm": 1.177849498917227, + "learning_rate": 4.987262215572424e-06, + "loss": 0.4678, + "step": 3264 + }, + { + "epoch": 0.19882471150625705, + "grad_norm": 1.0662771493794183, + "learning_rate": 4.987254169452946e-06, + "loss": 0.5119, + "step": 3265 + }, + { + "epoch": 0.19888560728313492, + "grad_norm": 1.1110075222709621, + "learning_rate": 4.987246120799505e-06, + "loss": 0.5244, + "step": 3266 + }, + { + "epoch": 0.1989465030600128, + "grad_norm": 0.98697652632359, + "learning_rate": 4.9872380696121094e-06, + "loss": 0.4993, + "step": 3267 + }, + { + "epoch": 0.19900739883689067, + "grad_norm": 0.9643858950732165, + "learning_rate": 4.987230015890767e-06, + "loss": 0.5422, + "step": 3268 + }, + { + "epoch": 0.19906829461376854, + "grad_norm": 0.9848910186917911, + "learning_rate": 4.987221959635487e-06, + "loss": 0.5099, + "step": 3269 + }, + { + "epoch": 0.19912919039064642, + "grad_norm": 1.097377092250161, + "learning_rate": 4.987213900846277e-06, + "loss": 0.4259, + "step": 3270 + }, + { + "epoch": 0.1991900861675243, + "grad_norm": 1.0694059273531464, + "learning_rate": 4.9872058395231445e-06, + "loss": 0.5585, + "step": 3271 + }, + { + "epoch": 0.19925098194440216, + "grad_norm": 1.0010181390120696, + "learning_rate": 4.9871977756660995e-06, + "loss": 0.49, + "step": 3272 + }, + { + "epoch": 0.19931187772128003, + "grad_norm": 1.0747089242863546, + "learning_rate": 4.987189709275148e-06, + "loss": 0.4767, + "step": 3273 + }, + { + "epoch": 0.1993727734981579, + "grad_norm": 1.0788187211315003, + "learning_rate": 4.9871816403502996e-06, + "loss": 0.5468, + "step": 3274 + }, + { + "epoch": 0.19943366927503578, + "grad_norm": 0.9606248181650854, + "learning_rate": 4.9871735688915624e-06, + "loss": 0.561, + "step": 3275 + }, + { + "epoch": 0.19949456505191365, + "grad_norm": 1.0398606353948758, + "learning_rate": 4.987165494898945e-06, + "loss": 0.5608, + "step": 3276 + }, + { + "epoch": 0.19955546082879153, + "grad_norm": 1.1536631261615098, + "learning_rate": 4.9871574183724546e-06, + "loss": 0.5284, + "step": 3277 + }, + { + "epoch": 0.1996163566056694, + "grad_norm": 1.0089164246926836, + "learning_rate": 4.9871493393121e-06, + "loss": 0.4509, + "step": 3278 + }, + { + "epoch": 0.19967725238254727, + "grad_norm": 1.0255828391987065, + "learning_rate": 4.9871412577178885e-06, + "loss": 0.5086, + "step": 3279 + }, + { + "epoch": 0.19973814815942514, + "grad_norm": 1.1069261317185266, + "learning_rate": 4.98713317358983e-06, + "loss": 0.4645, + "step": 3280 + }, + { + "epoch": 0.19979904393630302, + "grad_norm": 1.0567233258915523, + "learning_rate": 4.987125086927932e-06, + "loss": 0.4467, + "step": 3281 + }, + { + "epoch": 0.1998599397131809, + "grad_norm": 1.108093924729973, + "learning_rate": 4.987116997732202e-06, + "loss": 0.465, + "step": 3282 + }, + { + "epoch": 0.19992083549005876, + "grad_norm": 1.0149309046494535, + "learning_rate": 4.98710890600265e-06, + "loss": 0.576, + "step": 3283 + }, + { + "epoch": 0.19998173126693664, + "grad_norm": 1.0608697256492043, + "learning_rate": 4.9871008117392825e-06, + "loss": 0.626, + "step": 3284 + }, + { + "epoch": 0.2000426270438145, + "grad_norm": 1.095432273508629, + "learning_rate": 4.987092714942109e-06, + "loss": 0.4668, + "step": 3285 + }, + { + "epoch": 0.20010352282069238, + "grad_norm": 1.094811650694829, + "learning_rate": 4.987084615611137e-06, + "loss": 0.4503, + "step": 3286 + }, + { + "epoch": 0.20016441859757025, + "grad_norm": 1.0091459501254498, + "learning_rate": 4.987076513746374e-06, + "loss": 0.5257, + "step": 3287 + }, + { + "epoch": 0.20022531437444813, + "grad_norm": 1.0122473937109446, + "learning_rate": 4.987068409347829e-06, + "loss": 0.5459, + "step": 3288 + }, + { + "epoch": 0.200286210151326, + "grad_norm": 0.9767133508998462, + "learning_rate": 4.9870603024155114e-06, + "loss": 0.5262, + "step": 3289 + }, + { + "epoch": 0.20034710592820387, + "grad_norm": 0.9933332212638218, + "learning_rate": 4.9870521929494285e-06, + "loss": 0.5575, + "step": 3290 + }, + { + "epoch": 0.20040800170508175, + "grad_norm": 1.0583728963142625, + "learning_rate": 4.987044080949588e-06, + "loss": 0.4821, + "step": 3291 + }, + { + "epoch": 0.20046889748195962, + "grad_norm": 1.0747770401529795, + "learning_rate": 4.987035966415999e-06, + "loss": 0.4895, + "step": 3292 + }, + { + "epoch": 0.2005297932588375, + "grad_norm": 0.9942917386885082, + "learning_rate": 4.98702784934867e-06, + "loss": 0.531, + "step": 3293 + }, + { + "epoch": 0.20059068903571536, + "grad_norm": 1.110187542883993, + "learning_rate": 4.987019729747607e-06, + "loss": 0.5135, + "step": 3294 + }, + { + "epoch": 0.20065158481259324, + "grad_norm": 1.0042705987009908, + "learning_rate": 4.987011607612822e-06, + "loss": 0.5461, + "step": 3295 + }, + { + "epoch": 0.2007124805894711, + "grad_norm": 1.0017859098734687, + "learning_rate": 4.98700348294432e-06, + "loss": 0.532, + "step": 3296 + }, + { + "epoch": 0.20077337636634898, + "grad_norm": 1.1463614753245046, + "learning_rate": 4.986995355742111e-06, + "loss": 0.4894, + "step": 3297 + }, + { + "epoch": 0.20083427214322686, + "grad_norm": 1.0617719027465773, + "learning_rate": 4.986987226006203e-06, + "loss": 0.5287, + "step": 3298 + }, + { + "epoch": 0.20089516792010473, + "grad_norm": 0.993686291201932, + "learning_rate": 4.986979093736604e-06, + "loss": 0.5054, + "step": 3299 + }, + { + "epoch": 0.2009560636969826, + "grad_norm": 1.102933250528604, + "learning_rate": 4.9869709589333225e-06, + "loss": 0.5319, + "step": 3300 + }, + { + "epoch": 0.2010169594738605, + "grad_norm": 1.1126797466076614, + "learning_rate": 4.986962821596366e-06, + "loss": 0.4823, + "step": 3301 + }, + { + "epoch": 0.20107785525073837, + "grad_norm": 1.0737820214461593, + "learning_rate": 4.986954681725744e-06, + "loss": 0.5171, + "step": 3302 + }, + { + "epoch": 0.20113875102761625, + "grad_norm": 1.152140377499693, + "learning_rate": 4.986946539321464e-06, + "loss": 0.4746, + "step": 3303 + }, + { + "epoch": 0.20119964680449412, + "grad_norm": 1.0693642283184854, + "learning_rate": 4.986938394383536e-06, + "loss": 0.5235, + "step": 3304 + }, + { + "epoch": 0.201260542581372, + "grad_norm": 1.0599626883407467, + "learning_rate": 4.986930246911965e-06, + "loss": 0.5502, + "step": 3305 + }, + { + "epoch": 0.20132143835824987, + "grad_norm": 1.0067186482890351, + "learning_rate": 4.9869220969067625e-06, + "loss": 0.5103, + "step": 3306 + }, + { + "epoch": 0.20138233413512774, + "grad_norm": 1.0175526948454368, + "learning_rate": 4.986913944367935e-06, + "loss": 0.5041, + "step": 3307 + }, + { + "epoch": 0.2014432299120056, + "grad_norm": 1.1387175062507005, + "learning_rate": 4.986905789295491e-06, + "loss": 0.5012, + "step": 3308 + }, + { + "epoch": 0.20150412568888348, + "grad_norm": 0.9673195860958823, + "learning_rate": 4.986897631689439e-06, + "loss": 0.5562, + "step": 3309 + }, + { + "epoch": 0.20156502146576136, + "grad_norm": 1.1145058741417575, + "learning_rate": 4.986889471549787e-06, + "loss": 0.4741, + "step": 3310 + }, + { + "epoch": 0.20162591724263923, + "grad_norm": 1.0166466270766141, + "learning_rate": 4.986881308876545e-06, + "loss": 0.5706, + "step": 3311 + }, + { + "epoch": 0.2016868130195171, + "grad_norm": 1.047014327400255, + "learning_rate": 4.9868731436697195e-06, + "loss": 0.5123, + "step": 3312 + }, + { + "epoch": 0.20174770879639498, + "grad_norm": 1.050735268460259, + "learning_rate": 4.986864975929319e-06, + "loss": 0.492, + "step": 3313 + }, + { + "epoch": 0.20180860457327285, + "grad_norm": 1.0072982925679759, + "learning_rate": 4.986856805655352e-06, + "loss": 0.5905, + "step": 3314 + }, + { + "epoch": 0.20186950035015072, + "grad_norm": 1.1102615015847528, + "learning_rate": 4.986848632847828e-06, + "loss": 0.4757, + "step": 3315 + }, + { + "epoch": 0.2019303961270286, + "grad_norm": 1.048428158651563, + "learning_rate": 4.986840457506753e-06, + "loss": 0.5044, + "step": 3316 + }, + { + "epoch": 0.20199129190390647, + "grad_norm": 1.0132849452519372, + "learning_rate": 4.9868322796321374e-06, + "loss": 0.4869, + "step": 3317 + }, + { + "epoch": 0.20205218768078434, + "grad_norm": 1.1138280123136912, + "learning_rate": 4.986824099223989e-06, + "loss": 0.4942, + "step": 3318 + }, + { + "epoch": 0.2021130834576622, + "grad_norm": 1.0710063841488668, + "learning_rate": 4.986815916282316e-06, + "loss": 0.5541, + "step": 3319 + }, + { + "epoch": 0.20217397923454009, + "grad_norm": 1.1063171518502126, + "learning_rate": 4.986807730807126e-06, + "loss": 0.4835, + "step": 3320 + }, + { + "epoch": 0.20223487501141796, + "grad_norm": 1.0375946362557098, + "learning_rate": 4.986799542798428e-06, + "loss": 0.4848, + "step": 3321 + }, + { + "epoch": 0.20229577078829583, + "grad_norm": 1.0358265988606228, + "learning_rate": 4.98679135225623e-06, + "loss": 0.527, + "step": 3322 + }, + { + "epoch": 0.2023566665651737, + "grad_norm": 1.052288514084644, + "learning_rate": 4.986783159180542e-06, + "loss": 0.5433, + "step": 3323 + }, + { + "epoch": 0.20241756234205158, + "grad_norm": 0.989652400649439, + "learning_rate": 4.9867749635713705e-06, + "loss": 0.4691, + "step": 3324 + }, + { + "epoch": 0.20247845811892945, + "grad_norm": 1.0954906183485644, + "learning_rate": 4.986766765428725e-06, + "loss": 0.5477, + "step": 3325 + }, + { + "epoch": 0.20253935389580732, + "grad_norm": 1.0210922192930263, + "learning_rate": 4.986758564752611e-06, + "loss": 0.4924, + "step": 3326 + }, + { + "epoch": 0.2026002496726852, + "grad_norm": 1.0151261482240455, + "learning_rate": 4.9867503615430405e-06, + "loss": 0.5209, + "step": 3327 + }, + { + "epoch": 0.20266114544956307, + "grad_norm": 0.9408445596080487, + "learning_rate": 4.986742155800021e-06, + "loss": 0.5175, + "step": 3328 + }, + { + "epoch": 0.20272204122644094, + "grad_norm": 0.9780996428076116, + "learning_rate": 4.986733947523559e-06, + "loss": 0.5233, + "step": 3329 + }, + { + "epoch": 0.20278293700331881, + "grad_norm": 1.044894465973774, + "learning_rate": 4.986725736713665e-06, + "loss": 0.5182, + "step": 3330 + }, + { + "epoch": 0.2028438327801967, + "grad_norm": 1.1171331250363923, + "learning_rate": 4.986717523370347e-06, + "loss": 0.4896, + "step": 3331 + }, + { + "epoch": 0.20290472855707456, + "grad_norm": 1.087967683151893, + "learning_rate": 4.986709307493612e-06, + "loss": 0.4683, + "step": 3332 + }, + { + "epoch": 0.20296562433395243, + "grad_norm": 1.0503185235958243, + "learning_rate": 4.986701089083469e-06, + "loss": 0.4693, + "step": 3333 + }, + { + "epoch": 0.2030265201108303, + "grad_norm": 1.0402067480999657, + "learning_rate": 4.986692868139927e-06, + "loss": 0.4788, + "step": 3334 + }, + { + "epoch": 0.20308741588770818, + "grad_norm": 1.147482342337665, + "learning_rate": 4.9866846446629945e-06, + "loss": 0.4196, + "step": 3335 + }, + { + "epoch": 0.20314831166458605, + "grad_norm": 1.0753876506153601, + "learning_rate": 4.9866764186526785e-06, + "loss": 0.5033, + "step": 3336 + }, + { + "epoch": 0.20320920744146392, + "grad_norm": 1.0123111662564122, + "learning_rate": 4.9866681901089895e-06, + "loss": 0.5663, + "step": 3337 + }, + { + "epoch": 0.2032701032183418, + "grad_norm": 1.1262164570299773, + "learning_rate": 4.986659959031933e-06, + "loss": 0.4584, + "step": 3338 + }, + { + "epoch": 0.20333099899521967, + "grad_norm": 1.1155186822766288, + "learning_rate": 4.986651725421521e-06, + "loss": 0.5432, + "step": 3339 + }, + { + "epoch": 0.20339189477209754, + "grad_norm": 1.0894071884784136, + "learning_rate": 4.9866434892777585e-06, + "loss": 0.4938, + "step": 3340 + }, + { + "epoch": 0.20345279054897542, + "grad_norm": 1.0363771424641588, + "learning_rate": 4.986635250600655e-06, + "loss": 0.5224, + "step": 3341 + }, + { + "epoch": 0.20351368632585332, + "grad_norm": 1.133566764808604, + "learning_rate": 4.98662700939022e-06, + "loss": 0.3984, + "step": 3342 + }, + { + "epoch": 0.2035745821027312, + "grad_norm": 1.0685882293543991, + "learning_rate": 4.9866187656464615e-06, + "loss": 0.4643, + "step": 3343 + }, + { + "epoch": 0.20363547787960906, + "grad_norm": 0.987209535978606, + "learning_rate": 4.986610519369387e-06, + "loss": 0.5005, + "step": 3344 + }, + { + "epoch": 0.20369637365648693, + "grad_norm": 1.1178819745644832, + "learning_rate": 4.986602270559006e-06, + "loss": 0.5444, + "step": 3345 + }, + { + "epoch": 0.2037572694333648, + "grad_norm": 1.0915211907272573, + "learning_rate": 4.986594019215325e-06, + "loss": 0.5156, + "step": 3346 + }, + { + "epoch": 0.20381816521024268, + "grad_norm": 1.152092863493988, + "learning_rate": 4.986585765338354e-06, + "loss": 0.4125, + "step": 3347 + }, + { + "epoch": 0.20387906098712055, + "grad_norm": 1.025759109200939, + "learning_rate": 4.9865775089281024e-06, + "loss": 0.5248, + "step": 3348 + }, + { + "epoch": 0.20393995676399843, + "grad_norm": 0.9932647874078001, + "learning_rate": 4.986569249984576e-06, + "loss": 0.4896, + "step": 3349 + }, + { + "epoch": 0.2040008525408763, + "grad_norm": 1.1229488453282515, + "learning_rate": 4.986560988507785e-06, + "loss": 0.5239, + "step": 3350 + }, + { + "epoch": 0.20406174831775417, + "grad_norm": 1.0177920936344513, + "learning_rate": 4.986552724497738e-06, + "loss": 0.4959, + "step": 3351 + }, + { + "epoch": 0.20412264409463204, + "grad_norm": 1.003248207223814, + "learning_rate": 4.986544457954441e-06, + "loss": 0.4983, + "step": 3352 + }, + { + "epoch": 0.20418353987150992, + "grad_norm": 1.0861140906980653, + "learning_rate": 4.986536188877906e-06, + "loss": 0.5177, + "step": 3353 + }, + { + "epoch": 0.2042444356483878, + "grad_norm": 1.1093893669079051, + "learning_rate": 4.986527917268139e-06, + "loss": 0.4544, + "step": 3354 + }, + { + "epoch": 0.20430533142526566, + "grad_norm": 1.0645426714778683, + "learning_rate": 4.986519643125149e-06, + "loss": 0.4315, + "step": 3355 + }, + { + "epoch": 0.20436622720214354, + "grad_norm": 1.0326903738144806, + "learning_rate": 4.986511366448944e-06, + "loss": 0.5273, + "step": 3356 + }, + { + "epoch": 0.2044271229790214, + "grad_norm": 1.0340632032752586, + "learning_rate": 4.986503087239534e-06, + "loss": 0.5672, + "step": 3357 + }, + { + "epoch": 0.20448801875589928, + "grad_norm": 1.0691658746712573, + "learning_rate": 4.9864948054969256e-06, + "loss": 0.524, + "step": 3358 + }, + { + "epoch": 0.20454891453277715, + "grad_norm": 1.0388806798261574, + "learning_rate": 4.9864865212211274e-06, + "loss": 0.4799, + "step": 3359 + }, + { + "epoch": 0.20460981030965503, + "grad_norm": 0.966858752434892, + "learning_rate": 4.98647823441215e-06, + "loss": 0.4699, + "step": 3360 + }, + { + "epoch": 0.2046707060865329, + "grad_norm": 1.0690088160802844, + "learning_rate": 4.986469945069999e-06, + "loss": 0.4837, + "step": 3361 + }, + { + "epoch": 0.20473160186341077, + "grad_norm": 1.003578311311874, + "learning_rate": 4.986461653194686e-06, + "loss": 0.5539, + "step": 3362 + }, + { + "epoch": 0.20479249764028865, + "grad_norm": 1.0170848959881245, + "learning_rate": 4.986453358786215e-06, + "loss": 0.5086, + "step": 3363 + }, + { + "epoch": 0.20485339341716652, + "grad_norm": 1.0827242354903512, + "learning_rate": 4.986445061844598e-06, + "loss": 0.4656, + "step": 3364 + }, + { + "epoch": 0.2049142891940444, + "grad_norm": 1.1184554917327494, + "learning_rate": 4.986436762369843e-06, + "loss": 0.4553, + "step": 3365 + }, + { + "epoch": 0.20497518497092226, + "grad_norm": 1.021868820623027, + "learning_rate": 4.986428460361957e-06, + "loss": 0.4461, + "step": 3366 + }, + { + "epoch": 0.20503608074780014, + "grad_norm": 1.0868505385617973, + "learning_rate": 4.98642015582095e-06, + "loss": 0.5267, + "step": 3367 + }, + { + "epoch": 0.205096976524678, + "grad_norm": 1.1205419876513374, + "learning_rate": 4.98641184874683e-06, + "loss": 0.4917, + "step": 3368 + }, + { + "epoch": 0.20515787230155588, + "grad_norm": 1.0996248736569136, + "learning_rate": 4.986403539139605e-06, + "loss": 0.4591, + "step": 3369 + }, + { + "epoch": 0.20521876807843376, + "grad_norm": 1.0516618436055971, + "learning_rate": 4.986395226999283e-06, + "loss": 0.4667, + "step": 3370 + }, + { + "epoch": 0.20527966385531163, + "grad_norm": 1.0007442944346292, + "learning_rate": 4.986386912325875e-06, + "loss": 0.5217, + "step": 3371 + }, + { + "epoch": 0.2053405596321895, + "grad_norm": 1.040729873926254, + "learning_rate": 4.9863785951193865e-06, + "loss": 0.4846, + "step": 3372 + }, + { + "epoch": 0.20540145540906737, + "grad_norm": 1.0142181337458642, + "learning_rate": 4.9863702753798274e-06, + "loss": 0.4692, + "step": 3373 + }, + { + "epoch": 0.20546235118594525, + "grad_norm": 1.0364876348632364, + "learning_rate": 4.986361953107206e-06, + "loss": 0.4692, + "step": 3374 + }, + { + "epoch": 0.20552324696282312, + "grad_norm": 1.0922650813828012, + "learning_rate": 4.986353628301531e-06, + "loss": 0.5334, + "step": 3375 + }, + { + "epoch": 0.205584142739701, + "grad_norm": 1.0268471838514983, + "learning_rate": 4.98634530096281e-06, + "loss": 0.5126, + "step": 3376 + }, + { + "epoch": 0.20564503851657887, + "grad_norm": 1.1582607107527993, + "learning_rate": 4.986336971091052e-06, + "loss": 0.5305, + "step": 3377 + }, + { + "epoch": 0.20570593429345674, + "grad_norm": 0.9863219337817485, + "learning_rate": 4.986328638686267e-06, + "loss": 0.5187, + "step": 3378 + }, + { + "epoch": 0.2057668300703346, + "grad_norm": 1.0953596970757749, + "learning_rate": 4.986320303748461e-06, + "loss": 0.4974, + "step": 3379 + }, + { + "epoch": 0.20582772584721248, + "grad_norm": 1.0637393551941463, + "learning_rate": 4.9863119662776434e-06, + "loss": 0.4702, + "step": 3380 + }, + { + "epoch": 0.20588862162409036, + "grad_norm": 1.0478465505533385, + "learning_rate": 4.986303626273823e-06, + "loss": 0.4939, + "step": 3381 + }, + { + "epoch": 0.20594951740096823, + "grad_norm": 1.0960624627059992, + "learning_rate": 4.986295283737008e-06, + "loss": 0.4522, + "step": 3382 + }, + { + "epoch": 0.20601041317784613, + "grad_norm": 1.0709429836965356, + "learning_rate": 4.986286938667208e-06, + "loss": 0.5081, + "step": 3383 + }, + { + "epoch": 0.206071308954724, + "grad_norm": 1.0986072260745445, + "learning_rate": 4.9862785910644295e-06, + "loss": 0.544, + "step": 3384 + }, + { + "epoch": 0.20613220473160188, + "grad_norm": 1.1546722413965487, + "learning_rate": 4.986270240928683e-06, + "loss": 0.4541, + "step": 3385 + }, + { + "epoch": 0.20619310050847975, + "grad_norm": 0.9614205633665729, + "learning_rate": 4.986261888259975e-06, + "loss": 0.5079, + "step": 3386 + }, + { + "epoch": 0.20625399628535762, + "grad_norm": 1.0883739180843135, + "learning_rate": 4.9862535330583164e-06, + "loss": 0.5129, + "step": 3387 + }, + { + "epoch": 0.2063148920622355, + "grad_norm": 1.06568311901635, + "learning_rate": 4.986245175323714e-06, + "loss": 0.5093, + "step": 3388 + }, + { + "epoch": 0.20637578783911337, + "grad_norm": 1.0569617073446542, + "learning_rate": 4.986236815056176e-06, + "loss": 0.5201, + "step": 3389 + }, + { + "epoch": 0.20643668361599124, + "grad_norm": 1.1540327681327762, + "learning_rate": 4.986228452255712e-06, + "loss": 0.4767, + "step": 3390 + }, + { + "epoch": 0.2064975793928691, + "grad_norm": 0.9760444907196393, + "learning_rate": 4.98622008692233e-06, + "loss": 0.5079, + "step": 3391 + }, + { + "epoch": 0.20655847516974699, + "grad_norm": 1.1036537418487622, + "learning_rate": 4.9862117190560385e-06, + "loss": 0.4738, + "step": 3392 + }, + { + "epoch": 0.20661937094662486, + "grad_norm": 0.9919256674953753, + "learning_rate": 4.986203348656847e-06, + "loss": 0.5166, + "step": 3393 + }, + { + "epoch": 0.20668026672350273, + "grad_norm": 0.969749415413153, + "learning_rate": 4.986194975724762e-06, + "loss": 0.5141, + "step": 3394 + }, + { + "epoch": 0.2067411625003806, + "grad_norm": 1.0135668935345605, + "learning_rate": 4.986186600259794e-06, + "loss": 0.5408, + "step": 3395 + }, + { + "epoch": 0.20680205827725848, + "grad_norm": 1.055962805258759, + "learning_rate": 4.986178222261951e-06, + "loss": 0.4965, + "step": 3396 + }, + { + "epoch": 0.20686295405413635, + "grad_norm": 1.0316484236326458, + "learning_rate": 4.986169841731241e-06, + "loss": 0.545, + "step": 3397 + }, + { + "epoch": 0.20692384983101422, + "grad_norm": 1.1087929403470216, + "learning_rate": 4.986161458667672e-06, + "loss": 0.5027, + "step": 3398 + }, + { + "epoch": 0.2069847456078921, + "grad_norm": 1.1548011490893866, + "learning_rate": 4.986153073071255e-06, + "loss": 0.4853, + "step": 3399 + }, + { + "epoch": 0.20704564138476997, + "grad_norm": 0.9934473689603875, + "learning_rate": 4.986144684941996e-06, + "loss": 0.5286, + "step": 3400 + }, + { + "epoch": 0.20710653716164784, + "grad_norm": 1.1314787191840912, + "learning_rate": 4.986136294279905e-06, + "loss": 0.5435, + "step": 3401 + }, + { + "epoch": 0.20716743293852571, + "grad_norm": 1.0004601347553874, + "learning_rate": 4.9861279010849895e-06, + "loss": 0.5378, + "step": 3402 + }, + { + "epoch": 0.2072283287154036, + "grad_norm": 0.9826265238611142, + "learning_rate": 4.9861195053572585e-06, + "loss": 0.5161, + "step": 3403 + }, + { + "epoch": 0.20728922449228146, + "grad_norm": 1.084329443166551, + "learning_rate": 4.986111107096721e-06, + "loss": 0.418, + "step": 3404 + }, + { + "epoch": 0.20735012026915933, + "grad_norm": 1.0061218247419947, + "learning_rate": 4.986102706303385e-06, + "loss": 0.5051, + "step": 3405 + }, + { + "epoch": 0.2074110160460372, + "grad_norm": 0.942620301166675, + "learning_rate": 4.9860943029772595e-06, + "loss": 0.5051, + "step": 3406 + }, + { + "epoch": 0.20747191182291508, + "grad_norm": 1.0624389223994877, + "learning_rate": 4.986085897118353e-06, + "loss": 0.5228, + "step": 3407 + }, + { + "epoch": 0.20753280759979295, + "grad_norm": 1.09212321133681, + "learning_rate": 4.986077488726673e-06, + "loss": 0.4826, + "step": 3408 + }, + { + "epoch": 0.20759370337667082, + "grad_norm": 1.085195564819186, + "learning_rate": 4.98606907780223e-06, + "loss": 0.4622, + "step": 3409 + }, + { + "epoch": 0.2076545991535487, + "grad_norm": 1.0023270186637236, + "learning_rate": 4.986060664345031e-06, + "loss": 0.4676, + "step": 3410 + }, + { + "epoch": 0.20771549493042657, + "grad_norm": 1.0620773476378462, + "learning_rate": 4.986052248355085e-06, + "loss": 0.6146, + "step": 3411 + }, + { + "epoch": 0.20777639070730444, + "grad_norm": 1.0422806168664056, + "learning_rate": 4.986043829832401e-06, + "loss": 0.5124, + "step": 3412 + }, + { + "epoch": 0.20783728648418232, + "grad_norm": 1.0607825592847067, + "learning_rate": 4.986035408776987e-06, + "loss": 0.4782, + "step": 3413 + }, + { + "epoch": 0.2078981822610602, + "grad_norm": 1.192102561220855, + "learning_rate": 4.986026985188851e-06, + "loss": 0.5143, + "step": 3414 + }, + { + "epoch": 0.20795907803793806, + "grad_norm": 1.0214296132110936, + "learning_rate": 4.9860185590680035e-06, + "loss": 0.52, + "step": 3415 + }, + { + "epoch": 0.20801997381481593, + "grad_norm": 1.0539077983103642, + "learning_rate": 4.986010130414452e-06, + "loss": 0.5097, + "step": 3416 + }, + { + "epoch": 0.2080808695916938, + "grad_norm": 1.0200432864729752, + "learning_rate": 4.986001699228205e-06, + "loss": 0.496, + "step": 3417 + }, + { + "epoch": 0.20814176536857168, + "grad_norm": 1.1307539374485989, + "learning_rate": 4.98599326550927e-06, + "loss": 0.4416, + "step": 3418 + }, + { + "epoch": 0.20820266114544955, + "grad_norm": 1.0322278011578812, + "learning_rate": 4.985984829257658e-06, + "loss": 0.5084, + "step": 3419 + }, + { + "epoch": 0.20826355692232743, + "grad_norm": 1.0533178084295167, + "learning_rate": 4.985976390473376e-06, + "loss": 0.4713, + "step": 3420 + }, + { + "epoch": 0.2083244526992053, + "grad_norm": 0.9310979720058788, + "learning_rate": 4.9859679491564325e-06, + "loss": 0.5298, + "step": 3421 + }, + { + "epoch": 0.20838534847608317, + "grad_norm": 1.0137734345663125, + "learning_rate": 4.985959505306838e-06, + "loss": 0.5057, + "step": 3422 + }, + { + "epoch": 0.20844624425296104, + "grad_norm": 0.9421587125344855, + "learning_rate": 4.985951058924598e-06, + "loss": 0.5501, + "step": 3423 + }, + { + "epoch": 0.20850714002983894, + "grad_norm": 1.1892674392937794, + "learning_rate": 4.985942610009723e-06, + "loss": 0.4857, + "step": 3424 + }, + { + "epoch": 0.20856803580671682, + "grad_norm": 1.1396838405584981, + "learning_rate": 4.985934158562222e-06, + "loss": 0.4369, + "step": 3425 + }, + { + "epoch": 0.2086289315835947, + "grad_norm": 0.9814121642898451, + "learning_rate": 4.985925704582103e-06, + "loss": 0.5455, + "step": 3426 + }, + { + "epoch": 0.20868982736047256, + "grad_norm": 0.9518147987516924, + "learning_rate": 4.985917248069374e-06, + "loss": 0.4622, + "step": 3427 + }, + { + "epoch": 0.20875072313735044, + "grad_norm": 0.9634471757280669, + "learning_rate": 4.985908789024044e-06, + "loss": 0.542, + "step": 3428 + }, + { + "epoch": 0.2088116189142283, + "grad_norm": 1.0291249574276764, + "learning_rate": 4.985900327446123e-06, + "loss": 0.5182, + "step": 3429 + }, + { + "epoch": 0.20887251469110618, + "grad_norm": 0.9774471438705676, + "learning_rate": 4.985891863335617e-06, + "loss": 0.5788, + "step": 3430 + }, + { + "epoch": 0.20893341046798405, + "grad_norm": 1.0625802842276713, + "learning_rate": 4.985883396692537e-06, + "loss": 0.5229, + "step": 3431 + }, + { + "epoch": 0.20899430624486193, + "grad_norm": 1.1743163755025108, + "learning_rate": 4.98587492751689e-06, + "loss": 0.5092, + "step": 3432 + }, + { + "epoch": 0.2090552020217398, + "grad_norm": 0.9938690456672714, + "learning_rate": 4.985866455808685e-06, + "loss": 0.5098, + "step": 3433 + }, + { + "epoch": 0.20911609779861767, + "grad_norm": 0.9803903472796017, + "learning_rate": 4.985857981567933e-06, + "loss": 0.4903, + "step": 3434 + }, + { + "epoch": 0.20917699357549555, + "grad_norm": 1.0041164587889277, + "learning_rate": 4.985849504794639e-06, + "loss": 0.6336, + "step": 3435 + }, + { + "epoch": 0.20923788935237342, + "grad_norm": 0.9864747889895915, + "learning_rate": 4.985841025488813e-06, + "loss": 0.4796, + "step": 3436 + }, + { + "epoch": 0.2092987851292513, + "grad_norm": 1.0378498194891863, + "learning_rate": 4.985832543650464e-06, + "loss": 0.5979, + "step": 3437 + }, + { + "epoch": 0.20935968090612916, + "grad_norm": 1.0623352026236608, + "learning_rate": 4.985824059279601e-06, + "loss": 0.5292, + "step": 3438 + }, + { + "epoch": 0.20942057668300704, + "grad_norm": 1.041597733259241, + "learning_rate": 4.985815572376232e-06, + "loss": 0.5221, + "step": 3439 + }, + { + "epoch": 0.2094814724598849, + "grad_norm": 0.9862923145944356, + "learning_rate": 4.985807082940366e-06, + "loss": 0.5218, + "step": 3440 + }, + { + "epoch": 0.20954236823676278, + "grad_norm": 1.0306717182057008, + "learning_rate": 4.985798590972011e-06, + "loss": 0.5578, + "step": 3441 + }, + { + "epoch": 0.20960326401364066, + "grad_norm": 1.0981398010236594, + "learning_rate": 4.985790096471176e-06, + "loss": 0.4816, + "step": 3442 + }, + { + "epoch": 0.20966415979051853, + "grad_norm": 1.0633498537857196, + "learning_rate": 4.98578159943787e-06, + "loss": 0.5428, + "step": 3443 + }, + { + "epoch": 0.2097250555673964, + "grad_norm": 0.9917142399514144, + "learning_rate": 4.985773099872101e-06, + "loss": 0.5201, + "step": 3444 + }, + { + "epoch": 0.20978595134427427, + "grad_norm": 1.021307536019222, + "learning_rate": 4.985764597773878e-06, + "loss": 0.5775, + "step": 3445 + }, + { + "epoch": 0.20984684712115215, + "grad_norm": 0.9920800169832799, + "learning_rate": 4.98575609314321e-06, + "loss": 0.5391, + "step": 3446 + }, + { + "epoch": 0.20990774289803002, + "grad_norm": 1.0151634658486852, + "learning_rate": 4.985747585980106e-06, + "loss": 0.5074, + "step": 3447 + }, + { + "epoch": 0.2099686386749079, + "grad_norm": 1.1519059625027241, + "learning_rate": 4.985739076284573e-06, + "loss": 0.5014, + "step": 3448 + }, + { + "epoch": 0.21002953445178577, + "grad_norm": 1.0127358693173807, + "learning_rate": 4.9857305640566215e-06, + "loss": 0.549, + "step": 3449 + }, + { + "epoch": 0.21009043022866364, + "grad_norm": 1.073346305411959, + "learning_rate": 4.9857220492962585e-06, + "loss": 0.5092, + "step": 3450 + }, + { + "epoch": 0.2101513260055415, + "grad_norm": 1.1016426400727715, + "learning_rate": 4.985713532003495e-06, + "loss": 0.521, + "step": 3451 + }, + { + "epoch": 0.21021222178241938, + "grad_norm": 1.1399352134621787, + "learning_rate": 4.985705012178337e-06, + "loss": 0.4829, + "step": 3452 + }, + { + "epoch": 0.21027311755929726, + "grad_norm": 1.0138555361817831, + "learning_rate": 4.985696489820794e-06, + "loss": 0.5339, + "step": 3453 + }, + { + "epoch": 0.21033401333617513, + "grad_norm": 1.0594137999991409, + "learning_rate": 4.985687964930877e-06, + "loss": 0.4893, + "step": 3454 + }, + { + "epoch": 0.210394909113053, + "grad_norm": 1.1043313401364887, + "learning_rate": 4.985679437508591e-06, + "loss": 0.5144, + "step": 3455 + }, + { + "epoch": 0.21045580488993088, + "grad_norm": 0.9295620543287733, + "learning_rate": 4.985670907553947e-06, + "loss": 0.5588, + "step": 3456 + }, + { + "epoch": 0.21051670066680875, + "grad_norm": 1.1273868066892614, + "learning_rate": 4.985662375066953e-06, + "loss": 0.4481, + "step": 3457 + }, + { + "epoch": 0.21057759644368662, + "grad_norm": 1.130661185162559, + "learning_rate": 4.985653840047618e-06, + "loss": 0.4747, + "step": 3458 + }, + { + "epoch": 0.2106384922205645, + "grad_norm": 1.0631560429826916, + "learning_rate": 4.985645302495951e-06, + "loss": 0.4963, + "step": 3459 + }, + { + "epoch": 0.21069938799744237, + "grad_norm": 1.037820240945483, + "learning_rate": 4.98563676241196e-06, + "loss": 0.613, + "step": 3460 + }, + { + "epoch": 0.21076028377432024, + "grad_norm": 1.0913315676202893, + "learning_rate": 4.985628219795654e-06, + "loss": 0.4636, + "step": 3461 + }, + { + "epoch": 0.2108211795511981, + "grad_norm": 1.1417202151016919, + "learning_rate": 4.985619674647041e-06, + "loss": 0.5134, + "step": 3462 + }, + { + "epoch": 0.21088207532807599, + "grad_norm": 1.0392605503902796, + "learning_rate": 4.985611126966131e-06, + "loss": 0.5117, + "step": 3463 + }, + { + "epoch": 0.21094297110495386, + "grad_norm": 1.1501416206797939, + "learning_rate": 4.985602576752931e-06, + "loss": 0.4167, + "step": 3464 + }, + { + "epoch": 0.21100386688183176, + "grad_norm": 1.0376798210620344, + "learning_rate": 4.985594024007453e-06, + "loss": 0.5404, + "step": 3465 + }, + { + "epoch": 0.21106476265870963, + "grad_norm": 0.9724724850460346, + "learning_rate": 4.9855854687297015e-06, + "loss": 0.5567, + "step": 3466 + }, + { + "epoch": 0.2111256584355875, + "grad_norm": 1.0250783705235627, + "learning_rate": 4.985576910919688e-06, + "loss": 0.5367, + "step": 3467 + }, + { + "epoch": 0.21118655421246538, + "grad_norm": 1.1000705961293658, + "learning_rate": 4.98556835057742e-06, + "loss": 0.5245, + "step": 3468 + }, + { + "epoch": 0.21124744998934325, + "grad_norm": 1.0686925838737458, + "learning_rate": 4.985559787702907e-06, + "loss": 0.4618, + "step": 3469 + }, + { + "epoch": 0.21130834576622112, + "grad_norm": 1.027310685836467, + "learning_rate": 4.985551222296157e-06, + "loss": 0.4456, + "step": 3470 + }, + { + "epoch": 0.211369241543099, + "grad_norm": 1.1275903114729413, + "learning_rate": 4.985542654357179e-06, + "loss": 0.4351, + "step": 3471 + }, + { + "epoch": 0.21143013731997687, + "grad_norm": 1.0629429516532836, + "learning_rate": 4.985534083885983e-06, + "loss": 0.456, + "step": 3472 + }, + { + "epoch": 0.21149103309685474, + "grad_norm": 1.011511535881752, + "learning_rate": 4.985525510882575e-06, + "loss": 0.5043, + "step": 3473 + }, + { + "epoch": 0.21155192887373261, + "grad_norm": 1.0362982612806666, + "learning_rate": 4.985516935346967e-06, + "loss": 0.4918, + "step": 3474 + }, + { + "epoch": 0.2116128246506105, + "grad_norm": 1.006366279800218, + "learning_rate": 4.985508357279164e-06, + "loss": 0.4581, + "step": 3475 + }, + { + "epoch": 0.21167372042748836, + "grad_norm": 1.0577757424818255, + "learning_rate": 4.985499776679178e-06, + "loss": 0.5605, + "step": 3476 + }, + { + "epoch": 0.21173461620436623, + "grad_norm": 1.171379052725659, + "learning_rate": 4.985491193547016e-06, + "loss": 0.4476, + "step": 3477 + }, + { + "epoch": 0.2117955119812441, + "grad_norm": 0.9251357731399327, + "learning_rate": 4.985482607882688e-06, + "loss": 0.4878, + "step": 3478 + }, + { + "epoch": 0.21185640775812198, + "grad_norm": 1.048574997512551, + "learning_rate": 4.9854740196862016e-06, + "loss": 0.4294, + "step": 3479 + }, + { + "epoch": 0.21191730353499985, + "grad_norm": 1.1004148255011466, + "learning_rate": 4.985465428957565e-06, + "loss": 0.4457, + "step": 3480 + }, + { + "epoch": 0.21197819931187772, + "grad_norm": 1.0204893888293909, + "learning_rate": 4.985456835696789e-06, + "loss": 0.5148, + "step": 3481 + }, + { + "epoch": 0.2120390950887556, + "grad_norm": 1.067891817232085, + "learning_rate": 4.9854482399038815e-06, + "loss": 0.475, + "step": 3482 + }, + { + "epoch": 0.21209999086563347, + "grad_norm": 1.039557570794661, + "learning_rate": 4.985439641578851e-06, + "loss": 0.5514, + "step": 3483 + }, + { + "epoch": 0.21216088664251134, + "grad_norm": 1.0628007419834808, + "learning_rate": 4.985431040721706e-06, + "loss": 0.4686, + "step": 3484 + }, + { + "epoch": 0.21222178241938922, + "grad_norm": 1.0182519191535138, + "learning_rate": 4.985422437332454e-06, + "loss": 0.5289, + "step": 3485 + }, + { + "epoch": 0.2122826781962671, + "grad_norm": 1.1351406744537365, + "learning_rate": 4.985413831411107e-06, + "loss": 0.5549, + "step": 3486 + }, + { + "epoch": 0.21234357397314496, + "grad_norm": 1.0013771511123946, + "learning_rate": 4.985405222957672e-06, + "loss": 0.4907, + "step": 3487 + }, + { + "epoch": 0.21240446975002283, + "grad_norm": 1.0512866713654438, + "learning_rate": 4.985396611972158e-06, + "loss": 0.437, + "step": 3488 + }, + { + "epoch": 0.2124653655269007, + "grad_norm": 1.015935754070679, + "learning_rate": 4.9853879984545725e-06, + "loss": 0.5138, + "step": 3489 + }, + { + "epoch": 0.21252626130377858, + "grad_norm": 0.9229779184584319, + "learning_rate": 4.9853793824049255e-06, + "loss": 0.5676, + "step": 3490 + }, + { + "epoch": 0.21258715708065645, + "grad_norm": 1.0616339917435567, + "learning_rate": 4.985370763823227e-06, + "loss": 0.5366, + "step": 3491 + }, + { + "epoch": 0.21264805285753433, + "grad_norm": 1.153546432900535, + "learning_rate": 4.985362142709483e-06, + "loss": 0.47, + "step": 3492 + }, + { + "epoch": 0.2127089486344122, + "grad_norm": 1.2488482074242204, + "learning_rate": 4.985353519063705e-06, + "loss": 0.4284, + "step": 3493 + }, + { + "epoch": 0.21276984441129007, + "grad_norm": 1.03068727457133, + "learning_rate": 4.985344892885899e-06, + "loss": 0.4632, + "step": 3494 + }, + { + "epoch": 0.21283074018816794, + "grad_norm": 1.0548567874148556, + "learning_rate": 4.985336264176077e-06, + "loss": 0.48, + "step": 3495 + }, + { + "epoch": 0.21289163596504582, + "grad_norm": 1.0465640754640029, + "learning_rate": 4.985327632934245e-06, + "loss": 0.5266, + "step": 3496 + }, + { + "epoch": 0.2129525317419237, + "grad_norm": 0.9875018738575668, + "learning_rate": 4.985318999160413e-06, + "loss": 0.5273, + "step": 3497 + }, + { + "epoch": 0.21301342751880156, + "grad_norm": 0.9732033773095238, + "learning_rate": 4.98531036285459e-06, + "loss": 0.5747, + "step": 3498 + }, + { + "epoch": 0.21307432329567944, + "grad_norm": 1.0169449552303647, + "learning_rate": 4.985301724016783e-06, + "loss": 0.5301, + "step": 3499 + }, + { + "epoch": 0.2131352190725573, + "grad_norm": 0.9906611004134511, + "learning_rate": 4.985293082647004e-06, + "loss": 0.468, + "step": 3500 + }, + { + "epoch": 0.21319611484943518, + "grad_norm": 1.063566724987577, + "learning_rate": 4.9852844387452594e-06, + "loss": 0.5189, + "step": 3501 + }, + { + "epoch": 0.21325701062631305, + "grad_norm": 1.021459835910639, + "learning_rate": 4.985275792311559e-06, + "loss": 0.4884, + "step": 3502 + }, + { + "epoch": 0.21331790640319093, + "grad_norm": 1.0510510382871183, + "learning_rate": 4.985267143345912e-06, + "loss": 0.5279, + "step": 3503 + }, + { + "epoch": 0.2133788021800688, + "grad_norm": 1.0764704057660706, + "learning_rate": 4.9852584918483245e-06, + "loss": 0.5573, + "step": 3504 + }, + { + "epoch": 0.21343969795694667, + "grad_norm": 1.0891016060688774, + "learning_rate": 4.985249837818809e-06, + "loss": 0.5386, + "step": 3505 + }, + { + "epoch": 0.21350059373382457, + "grad_norm": 1.0083768185289161, + "learning_rate": 4.985241181257372e-06, + "loss": 0.5355, + "step": 3506 + }, + { + "epoch": 0.21356148951070245, + "grad_norm": 1.0923640763432976, + "learning_rate": 4.985232522164023e-06, + "loss": 0.467, + "step": 3507 + }, + { + "epoch": 0.21362238528758032, + "grad_norm": 0.996959754355739, + "learning_rate": 4.985223860538771e-06, + "loss": 0.5295, + "step": 3508 + }, + { + "epoch": 0.2136832810644582, + "grad_norm": 1.0108604733090936, + "learning_rate": 4.9852151963816246e-06, + "loss": 0.4995, + "step": 3509 + }, + { + "epoch": 0.21374417684133606, + "grad_norm": 1.000422315483787, + "learning_rate": 4.985206529692592e-06, + "loss": 0.463, + "step": 3510 + }, + { + "epoch": 0.21380507261821394, + "grad_norm": 0.9539663020941015, + "learning_rate": 4.9851978604716834e-06, + "loss": 0.5093, + "step": 3511 + }, + { + "epoch": 0.2138659683950918, + "grad_norm": 1.0558769958787477, + "learning_rate": 4.985189188718906e-06, + "loss": 0.5108, + "step": 3512 + }, + { + "epoch": 0.21392686417196968, + "grad_norm": 1.0303940612723397, + "learning_rate": 4.985180514434271e-06, + "loss": 0.4882, + "step": 3513 + }, + { + "epoch": 0.21398775994884756, + "grad_norm": 1.0933666137818052, + "learning_rate": 4.985171837617785e-06, + "loss": 0.5113, + "step": 3514 + }, + { + "epoch": 0.21404865572572543, + "grad_norm": 1.051103193529704, + "learning_rate": 4.985163158269457e-06, + "loss": 0.5468, + "step": 3515 + }, + { + "epoch": 0.2141095515026033, + "grad_norm": 1.0154876720947599, + "learning_rate": 4.985154476389297e-06, + "loss": 0.5057, + "step": 3516 + }, + { + "epoch": 0.21417044727948117, + "grad_norm": 1.1057036632058403, + "learning_rate": 4.985145791977313e-06, + "loss": 0.4818, + "step": 3517 + }, + { + "epoch": 0.21423134305635905, + "grad_norm": 1.0751174025486592, + "learning_rate": 4.985137105033515e-06, + "loss": 0.5502, + "step": 3518 + }, + { + "epoch": 0.21429223883323692, + "grad_norm": 1.0738324363276404, + "learning_rate": 4.98512841555791e-06, + "loss": 0.4905, + "step": 3519 + }, + { + "epoch": 0.2143531346101148, + "grad_norm": 1.0730286578825206, + "learning_rate": 4.985119723550508e-06, + "loss": 0.4685, + "step": 3520 + }, + { + "epoch": 0.21441403038699267, + "grad_norm": 1.0965605335098958, + "learning_rate": 4.985111029011318e-06, + "loss": 0.5192, + "step": 3521 + }, + { + "epoch": 0.21447492616387054, + "grad_norm": 1.1208847682691627, + "learning_rate": 4.985102331940349e-06, + "loss": 0.5268, + "step": 3522 + }, + { + "epoch": 0.2145358219407484, + "grad_norm": 1.0785554716342298, + "learning_rate": 4.985093632337608e-06, + "loss": 0.4822, + "step": 3523 + }, + { + "epoch": 0.21459671771762628, + "grad_norm": 1.095387011068836, + "learning_rate": 4.985084930203107e-06, + "loss": 0.5038, + "step": 3524 + }, + { + "epoch": 0.21465761349450416, + "grad_norm": 1.0114081045631733, + "learning_rate": 4.9850762255368516e-06, + "loss": 0.5226, + "step": 3525 + }, + { + "epoch": 0.21471850927138203, + "grad_norm": 1.0473894231891725, + "learning_rate": 4.985067518338853e-06, + "loss": 0.5279, + "step": 3526 + }, + { + "epoch": 0.2147794050482599, + "grad_norm": 0.9522292131861048, + "learning_rate": 4.985058808609119e-06, + "loss": 0.5498, + "step": 3527 + }, + { + "epoch": 0.21484030082513778, + "grad_norm": 0.9775037670415306, + "learning_rate": 4.985050096347659e-06, + "loss": 0.5122, + "step": 3528 + }, + { + "epoch": 0.21490119660201565, + "grad_norm": 0.9761014522037812, + "learning_rate": 4.985041381554482e-06, + "loss": 0.5251, + "step": 3529 + }, + { + "epoch": 0.21496209237889352, + "grad_norm": 0.9548130416559433, + "learning_rate": 4.9850326642295955e-06, + "loss": 0.5642, + "step": 3530 + }, + { + "epoch": 0.2150229881557714, + "grad_norm": 1.098791477192116, + "learning_rate": 4.98502394437301e-06, + "loss": 0.5843, + "step": 3531 + }, + { + "epoch": 0.21508388393264927, + "grad_norm": 1.1280278014737517, + "learning_rate": 4.985015221984734e-06, + "loss": 0.5021, + "step": 3532 + }, + { + "epoch": 0.21514477970952714, + "grad_norm": 1.0683169191176245, + "learning_rate": 4.985006497064776e-06, + "loss": 0.4651, + "step": 3533 + }, + { + "epoch": 0.215205675486405, + "grad_norm": 1.1095765924125272, + "learning_rate": 4.984997769613145e-06, + "loss": 0.5001, + "step": 3534 + }, + { + "epoch": 0.21526657126328289, + "grad_norm": 0.958966818476649, + "learning_rate": 4.98498903962985e-06, + "loss": 0.4863, + "step": 3535 + }, + { + "epoch": 0.21532746704016076, + "grad_norm": 1.053699370757304, + "learning_rate": 4.984980307114899e-06, + "loss": 0.4917, + "step": 3536 + }, + { + "epoch": 0.21538836281703863, + "grad_norm": 1.084109637735801, + "learning_rate": 4.9849715720683025e-06, + "loss": 0.5081, + "step": 3537 + }, + { + "epoch": 0.2154492585939165, + "grad_norm": 1.0280232838626695, + "learning_rate": 4.984962834490068e-06, + "loss": 0.499, + "step": 3538 + }, + { + "epoch": 0.21551015437079438, + "grad_norm": 1.0303213174295196, + "learning_rate": 4.984954094380205e-06, + "loss": 0.4589, + "step": 3539 + }, + { + "epoch": 0.21557105014767225, + "grad_norm": 1.099978769921647, + "learning_rate": 4.984945351738724e-06, + "loss": 0.5282, + "step": 3540 + }, + { + "epoch": 0.21563194592455012, + "grad_norm": 0.9809994178449754, + "learning_rate": 4.984936606565631e-06, + "loss": 0.5171, + "step": 3541 + }, + { + "epoch": 0.215692841701428, + "grad_norm": 0.953575100214995, + "learning_rate": 4.9849278588609365e-06, + "loss": 0.5332, + "step": 3542 + }, + { + "epoch": 0.21575373747830587, + "grad_norm": 1.0336117583560018, + "learning_rate": 4.984919108624649e-06, + "loss": 0.5347, + "step": 3543 + }, + { + "epoch": 0.21581463325518374, + "grad_norm": 1.0571292826915153, + "learning_rate": 4.984910355856778e-06, + "loss": 0.5419, + "step": 3544 + }, + { + "epoch": 0.2158755290320616, + "grad_norm": 1.028994703868717, + "learning_rate": 4.984901600557332e-06, + "loss": 0.5084, + "step": 3545 + }, + { + "epoch": 0.2159364248089395, + "grad_norm": 1.1484746818838043, + "learning_rate": 4.984892842726319e-06, + "loss": 0.5062, + "step": 3546 + }, + { + "epoch": 0.2159973205858174, + "grad_norm": 1.0988222949539712, + "learning_rate": 4.98488408236375e-06, + "loss": 0.5173, + "step": 3547 + }, + { + "epoch": 0.21605821636269526, + "grad_norm": 1.0036478010112153, + "learning_rate": 4.984875319469632e-06, + "loss": 0.5002, + "step": 3548 + }, + { + "epoch": 0.21611911213957313, + "grad_norm": 1.1435748735014262, + "learning_rate": 4.984866554043975e-06, + "loss": 0.4802, + "step": 3549 + }, + { + "epoch": 0.216180007916451, + "grad_norm": 1.0682719494387507, + "learning_rate": 4.984857786086787e-06, + "loss": 0.4908, + "step": 3550 + }, + { + "epoch": 0.21624090369332888, + "grad_norm": 1.0320908165818885, + "learning_rate": 4.984849015598079e-06, + "loss": 0.5319, + "step": 3551 + }, + { + "epoch": 0.21630179947020675, + "grad_norm": 1.1103984367486937, + "learning_rate": 4.984840242577857e-06, + "loss": 0.5164, + "step": 3552 + }, + { + "epoch": 0.21636269524708462, + "grad_norm": 1.1232057330201966, + "learning_rate": 4.984831467026132e-06, + "loss": 0.488, + "step": 3553 + }, + { + "epoch": 0.2164235910239625, + "grad_norm": 1.1552259871373736, + "learning_rate": 4.984822688942913e-06, + "loss": 0.4937, + "step": 3554 + }, + { + "epoch": 0.21648448680084037, + "grad_norm": 1.1197892754975105, + "learning_rate": 4.984813908328208e-06, + "loss": 0.443, + "step": 3555 + }, + { + "epoch": 0.21654538257771824, + "grad_norm": 1.0812298419807627, + "learning_rate": 4.984805125182026e-06, + "loss": 0.5099, + "step": 3556 + }, + { + "epoch": 0.21660627835459612, + "grad_norm": 1.0961155046352729, + "learning_rate": 4.984796339504376e-06, + "loss": 0.4763, + "step": 3557 + }, + { + "epoch": 0.216667174131474, + "grad_norm": 1.0045700051174846, + "learning_rate": 4.984787551295267e-06, + "loss": 0.563, + "step": 3558 + }, + { + "epoch": 0.21672806990835186, + "grad_norm": 1.0595934139111627, + "learning_rate": 4.9847787605547085e-06, + "loss": 0.474, + "step": 3559 + }, + { + "epoch": 0.21678896568522973, + "grad_norm": 1.0220847896769076, + "learning_rate": 4.984769967282708e-06, + "loss": 0.5005, + "step": 3560 + }, + { + "epoch": 0.2168498614621076, + "grad_norm": 1.0653039885978497, + "learning_rate": 4.9847611714792775e-06, + "loss": 0.4699, + "step": 3561 + }, + { + "epoch": 0.21691075723898548, + "grad_norm": 0.9920550940915338, + "learning_rate": 4.9847523731444225e-06, + "loss": 0.5728, + "step": 3562 + }, + { + "epoch": 0.21697165301586335, + "grad_norm": 1.0517730083971346, + "learning_rate": 4.984743572278155e-06, + "loss": 0.4926, + "step": 3563 + }, + { + "epoch": 0.21703254879274123, + "grad_norm": 1.0137027294087486, + "learning_rate": 4.98473476888048e-06, + "loss": 0.5179, + "step": 3564 + }, + { + "epoch": 0.2170934445696191, + "grad_norm": 1.0186563884219648, + "learning_rate": 4.984725962951411e-06, + "loss": 0.5447, + "step": 3565 + }, + { + "epoch": 0.21715434034649697, + "grad_norm": 1.0512192738034583, + "learning_rate": 4.984717154490953e-06, + "loss": 0.5287, + "step": 3566 + }, + { + "epoch": 0.21721523612337484, + "grad_norm": 1.0744107659301123, + "learning_rate": 4.984708343499119e-06, + "loss": 0.4599, + "step": 3567 + }, + { + "epoch": 0.21727613190025272, + "grad_norm": 1.167163280112619, + "learning_rate": 4.984699529975914e-06, + "loss": 0.5466, + "step": 3568 + }, + { + "epoch": 0.2173370276771306, + "grad_norm": 1.0101606815034203, + "learning_rate": 4.9846907139213495e-06, + "loss": 0.54, + "step": 3569 + }, + { + "epoch": 0.21739792345400846, + "grad_norm": 1.0709126962876254, + "learning_rate": 4.9846818953354335e-06, + "loss": 0.4721, + "step": 3570 + }, + { + "epoch": 0.21745881923088634, + "grad_norm": 1.0657408028159654, + "learning_rate": 4.984673074218176e-06, + "loss": 0.4949, + "step": 3571 + }, + { + "epoch": 0.2175197150077642, + "grad_norm": 1.1163570870729875, + "learning_rate": 4.984664250569584e-06, + "loss": 0.5076, + "step": 3572 + }, + { + "epoch": 0.21758061078464208, + "grad_norm": 0.9456135808870202, + "learning_rate": 4.984655424389669e-06, + "loss": 0.558, + "step": 3573 + }, + { + "epoch": 0.21764150656151995, + "grad_norm": 0.9889784372565126, + "learning_rate": 4.984646595678438e-06, + "loss": 0.4962, + "step": 3574 + }, + { + "epoch": 0.21770240233839783, + "grad_norm": 1.1411530581985125, + "learning_rate": 4.984637764435901e-06, + "loss": 0.4664, + "step": 3575 + }, + { + "epoch": 0.2177632981152757, + "grad_norm": 1.096702531386223, + "learning_rate": 4.984628930662067e-06, + "loss": 0.4679, + "step": 3576 + }, + { + "epoch": 0.21782419389215357, + "grad_norm": 1.1625168691277445, + "learning_rate": 4.984620094356943e-06, + "loss": 0.5637, + "step": 3577 + }, + { + "epoch": 0.21788508966903145, + "grad_norm": 0.9719153646059796, + "learning_rate": 4.984611255520542e-06, + "loss": 0.4535, + "step": 3578 + }, + { + "epoch": 0.21794598544590932, + "grad_norm": 1.060071160533184, + "learning_rate": 4.984602414152869e-06, + "loss": 0.5626, + "step": 3579 + }, + { + "epoch": 0.2180068812227872, + "grad_norm": 1.0608807475998072, + "learning_rate": 4.984593570253935e-06, + "loss": 0.5655, + "step": 3580 + }, + { + "epoch": 0.21806777699966506, + "grad_norm": 1.0273762418682866, + "learning_rate": 4.984584723823749e-06, + "loss": 0.4848, + "step": 3581 + }, + { + "epoch": 0.21812867277654294, + "grad_norm": 1.016711172642323, + "learning_rate": 4.98457587486232e-06, + "loss": 0.5558, + "step": 3582 + }, + { + "epoch": 0.2181895685534208, + "grad_norm": 1.009328895916967, + "learning_rate": 4.984567023369656e-06, + "loss": 0.5056, + "step": 3583 + }, + { + "epoch": 0.21825046433029868, + "grad_norm": 1.0150473649731746, + "learning_rate": 4.984558169345768e-06, + "loss": 0.4747, + "step": 3584 + }, + { + "epoch": 0.21831136010717656, + "grad_norm": 0.9866278639253534, + "learning_rate": 4.984549312790663e-06, + "loss": 0.5068, + "step": 3585 + }, + { + "epoch": 0.21837225588405443, + "grad_norm": 1.0177449673468346, + "learning_rate": 4.9845404537043515e-06, + "loss": 0.5561, + "step": 3586 + }, + { + "epoch": 0.2184331516609323, + "grad_norm": 0.969978004435348, + "learning_rate": 4.984531592086841e-06, + "loss": 0.5391, + "step": 3587 + }, + { + "epoch": 0.2184940474378102, + "grad_norm": 1.0729564760040857, + "learning_rate": 4.984522727938142e-06, + "loss": 0.4734, + "step": 3588 + }, + { + "epoch": 0.21855494321468807, + "grad_norm": 1.0534362309954948, + "learning_rate": 4.9845138612582625e-06, + "loss": 0.5239, + "step": 3589 + }, + { + "epoch": 0.21861583899156595, + "grad_norm": 1.0845357001814593, + "learning_rate": 4.984504992047212e-06, + "loss": 0.4826, + "step": 3590 + }, + { + "epoch": 0.21867673476844382, + "grad_norm": 1.0322439791410114, + "learning_rate": 4.984496120304999e-06, + "loss": 0.462, + "step": 3591 + }, + { + "epoch": 0.2187376305453217, + "grad_norm": 1.0466083461751259, + "learning_rate": 4.984487246031633e-06, + "loss": 0.5313, + "step": 3592 + }, + { + "epoch": 0.21879852632219957, + "grad_norm": 1.0533933393503696, + "learning_rate": 4.9844783692271235e-06, + "loss": 0.4398, + "step": 3593 + }, + { + "epoch": 0.21885942209907744, + "grad_norm": 0.9835983853941652, + "learning_rate": 4.984469489891479e-06, + "loss": 0.5397, + "step": 3594 + }, + { + "epoch": 0.2189203178759553, + "grad_norm": 1.0399830933307308, + "learning_rate": 4.984460608024709e-06, + "loss": 0.4741, + "step": 3595 + }, + { + "epoch": 0.21898121365283318, + "grad_norm": 1.0853151252509252, + "learning_rate": 4.984451723626822e-06, + "loss": 0.4736, + "step": 3596 + }, + { + "epoch": 0.21904210942971106, + "grad_norm": 1.084687281491857, + "learning_rate": 4.984442836697827e-06, + "loss": 0.4979, + "step": 3597 + }, + { + "epoch": 0.21910300520658893, + "grad_norm": 1.0932520844224118, + "learning_rate": 4.984433947237734e-06, + "loss": 0.4506, + "step": 3598 + }, + { + "epoch": 0.2191639009834668, + "grad_norm": 0.9686437538333063, + "learning_rate": 4.984425055246551e-06, + "loss": 0.5443, + "step": 3599 + }, + { + "epoch": 0.21922479676034468, + "grad_norm": 1.0894462606556667, + "learning_rate": 4.984416160724287e-06, + "loss": 0.5715, + "step": 3600 + }, + { + "epoch": 0.21928569253722255, + "grad_norm": 0.9718910607669609, + "learning_rate": 4.984407263670952e-06, + "loss": 0.5711, + "step": 3601 + }, + { + "epoch": 0.21934658831410042, + "grad_norm": 1.0251470225750992, + "learning_rate": 4.984398364086554e-06, + "loss": 0.5538, + "step": 3602 + }, + { + "epoch": 0.2194074840909783, + "grad_norm": 1.0792308118946277, + "learning_rate": 4.984389461971103e-06, + "loss": 0.507, + "step": 3603 + }, + { + "epoch": 0.21946837986785617, + "grad_norm": 0.9264043216483323, + "learning_rate": 4.9843805573246065e-06, + "loss": 0.4842, + "step": 3604 + }, + { + "epoch": 0.21952927564473404, + "grad_norm": 1.00976698604612, + "learning_rate": 4.984371650147077e-06, + "loss": 0.509, + "step": 3605 + }, + { + "epoch": 0.2195901714216119, + "grad_norm": 1.105847716734821, + "learning_rate": 4.984362740438519e-06, + "loss": 0.4759, + "step": 3606 + }, + { + "epoch": 0.21965106719848979, + "grad_norm": 1.0957670612753891, + "learning_rate": 4.984353828198945e-06, + "loss": 0.5235, + "step": 3607 + }, + { + "epoch": 0.21971196297536766, + "grad_norm": 0.9517713398996043, + "learning_rate": 4.984344913428363e-06, + "loss": 0.5589, + "step": 3608 + }, + { + "epoch": 0.21977285875224553, + "grad_norm": 0.9814473910069393, + "learning_rate": 4.984335996126782e-06, + "loss": 0.4486, + "step": 3609 + }, + { + "epoch": 0.2198337545291234, + "grad_norm": 1.0469018074397163, + "learning_rate": 4.984327076294211e-06, + "loss": 0.4684, + "step": 3610 + }, + { + "epoch": 0.21989465030600128, + "grad_norm": 1.030444647433261, + "learning_rate": 4.98431815393066e-06, + "loss": 0.4877, + "step": 3611 + }, + { + "epoch": 0.21995554608287915, + "grad_norm": 1.0580250183044841, + "learning_rate": 4.984309229036136e-06, + "loss": 0.55, + "step": 3612 + }, + { + "epoch": 0.22001644185975702, + "grad_norm": 0.9753491655572889, + "learning_rate": 4.98430030161065e-06, + "loss": 0.529, + "step": 3613 + }, + { + "epoch": 0.2200773376366349, + "grad_norm": 1.0277327911265826, + "learning_rate": 4.98429137165421e-06, + "loss": 0.4669, + "step": 3614 + }, + { + "epoch": 0.22013823341351277, + "grad_norm": 0.9892479774981794, + "learning_rate": 4.984282439166827e-06, + "loss": 0.4905, + "step": 3615 + }, + { + "epoch": 0.22019912919039064, + "grad_norm": 0.9821434408365656, + "learning_rate": 4.984273504148507e-06, + "loss": 0.4962, + "step": 3616 + }, + { + "epoch": 0.22026002496726851, + "grad_norm": 1.034569566341808, + "learning_rate": 4.984264566599262e-06, + "loss": 0.4871, + "step": 3617 + }, + { + "epoch": 0.2203209207441464, + "grad_norm": 1.0528372794915533, + "learning_rate": 4.9842556265190995e-06, + "loss": 0.5077, + "step": 3618 + }, + { + "epoch": 0.22038181652102426, + "grad_norm": 1.1016868895006773, + "learning_rate": 4.984246683908029e-06, + "loss": 0.4887, + "step": 3619 + }, + { + "epoch": 0.22044271229790213, + "grad_norm": 1.060326373708191, + "learning_rate": 4.98423773876606e-06, + "loss": 0.4705, + "step": 3620 + }, + { + "epoch": 0.22050360807478, + "grad_norm": 1.2313197749777403, + "learning_rate": 4.984228791093201e-06, + "loss": 0.5148, + "step": 3621 + }, + { + "epoch": 0.22056450385165788, + "grad_norm": 1.0093074464343585, + "learning_rate": 4.9842198408894604e-06, + "loss": 0.4955, + "step": 3622 + }, + { + "epoch": 0.22062539962853575, + "grad_norm": 0.9538930438794321, + "learning_rate": 4.98421088815485e-06, + "loss": 0.5591, + "step": 3623 + }, + { + "epoch": 0.22068629540541362, + "grad_norm": 1.0680132811883842, + "learning_rate": 4.984201932889376e-06, + "loss": 0.4778, + "step": 3624 + }, + { + "epoch": 0.2207471911822915, + "grad_norm": 1.0676372254971267, + "learning_rate": 4.984192975093049e-06, + "loss": 0.5423, + "step": 3625 + }, + { + "epoch": 0.22080808695916937, + "grad_norm": 1.0092946899408097, + "learning_rate": 4.984184014765878e-06, + "loss": 0.5252, + "step": 3626 + }, + { + "epoch": 0.22086898273604724, + "grad_norm": 0.9630429618084504, + "learning_rate": 4.984175051907872e-06, + "loss": 0.5383, + "step": 3627 + }, + { + "epoch": 0.22092987851292512, + "grad_norm": 1.095491873356134, + "learning_rate": 4.98416608651904e-06, + "loss": 0.4845, + "step": 3628 + }, + { + "epoch": 0.22099077428980302, + "grad_norm": 1.0198616066440156, + "learning_rate": 4.984157118599391e-06, + "loss": 0.5227, + "step": 3629 + }, + { + "epoch": 0.2210516700666809, + "grad_norm": 0.9264900035243322, + "learning_rate": 4.984148148148935e-06, + "loss": 0.6027, + "step": 3630 + }, + { + "epoch": 0.22111256584355876, + "grad_norm": 0.9920303895009099, + "learning_rate": 4.984139175167679e-06, + "loss": 0.4753, + "step": 3631 + }, + { + "epoch": 0.22117346162043663, + "grad_norm": 1.0689095519860532, + "learning_rate": 4.984130199655636e-06, + "loss": 0.464, + "step": 3632 + }, + { + "epoch": 0.2212343573973145, + "grad_norm": 1.0693238030282803, + "learning_rate": 4.984121221612811e-06, + "loss": 0.4765, + "step": 3633 + }, + { + "epoch": 0.22129525317419238, + "grad_norm": 0.9561980051294979, + "learning_rate": 4.984112241039216e-06, + "loss": 0.5137, + "step": 3634 + }, + { + "epoch": 0.22135614895107025, + "grad_norm": 1.1110219735739506, + "learning_rate": 4.984103257934858e-06, + "loss": 0.4915, + "step": 3635 + }, + { + "epoch": 0.22141704472794813, + "grad_norm": 1.0085938542613546, + "learning_rate": 4.984094272299748e-06, + "loss": 0.5537, + "step": 3636 + }, + { + "epoch": 0.221477940504826, + "grad_norm": 1.0206446369423188, + "learning_rate": 4.9840852841338946e-06, + "loss": 0.4968, + "step": 3637 + }, + { + "epoch": 0.22153883628170387, + "grad_norm": 1.008104305805527, + "learning_rate": 4.984076293437306e-06, + "loss": 0.4517, + "step": 3638 + }, + { + "epoch": 0.22159973205858174, + "grad_norm": 1.0090671849446387, + "learning_rate": 4.984067300209993e-06, + "loss": 0.5659, + "step": 3639 + }, + { + "epoch": 0.22166062783545962, + "grad_norm": 1.0786615203573802, + "learning_rate": 4.984058304451963e-06, + "loss": 0.4951, + "step": 3640 + }, + { + "epoch": 0.2217215236123375, + "grad_norm": 0.9701775348709315, + "learning_rate": 4.984049306163227e-06, + "loss": 0.5241, + "step": 3641 + }, + { + "epoch": 0.22178241938921536, + "grad_norm": 1.0102500306416036, + "learning_rate": 4.984040305343792e-06, + "loss": 0.4857, + "step": 3642 + }, + { + "epoch": 0.22184331516609324, + "grad_norm": 0.9569484349056119, + "learning_rate": 4.98403130199367e-06, + "loss": 0.5094, + "step": 3643 + }, + { + "epoch": 0.2219042109429711, + "grad_norm": 1.0412011668717063, + "learning_rate": 4.984022296112867e-06, + "loss": 0.5427, + "step": 3644 + }, + { + "epoch": 0.22196510671984898, + "grad_norm": 1.0459647000536478, + "learning_rate": 4.984013287701394e-06, + "loss": 0.5008, + "step": 3645 + }, + { + "epoch": 0.22202600249672685, + "grad_norm": 1.1306911614441362, + "learning_rate": 4.98400427675926e-06, + "loss": 0.4445, + "step": 3646 + }, + { + "epoch": 0.22208689827360473, + "grad_norm": 1.1329550795422219, + "learning_rate": 4.9839952632864745e-06, + "loss": 0.4966, + "step": 3647 + }, + { + "epoch": 0.2221477940504826, + "grad_norm": 0.9554822254406653, + "learning_rate": 4.983986247283046e-06, + "loss": 0.4955, + "step": 3648 + }, + { + "epoch": 0.22220868982736047, + "grad_norm": 1.1513353066225238, + "learning_rate": 4.983977228748984e-06, + "loss": 0.4747, + "step": 3649 + }, + { + "epoch": 0.22226958560423835, + "grad_norm": 1.090172423498002, + "learning_rate": 4.983968207684298e-06, + "loss": 0.5289, + "step": 3650 + }, + { + "epoch": 0.22233048138111622, + "grad_norm": 1.1265133214903589, + "learning_rate": 4.983959184088996e-06, + "loss": 0.4129, + "step": 3651 + }, + { + "epoch": 0.2223913771579941, + "grad_norm": 1.196251804387111, + "learning_rate": 4.983950157963089e-06, + "loss": 0.5338, + "step": 3652 + }, + { + "epoch": 0.22245227293487196, + "grad_norm": 1.0334343219976312, + "learning_rate": 4.983941129306585e-06, + "loss": 0.4532, + "step": 3653 + }, + { + "epoch": 0.22251316871174984, + "grad_norm": 1.014178843726231, + "learning_rate": 4.983932098119493e-06, + "loss": 0.5225, + "step": 3654 + }, + { + "epoch": 0.2225740644886277, + "grad_norm": 1.060130626201651, + "learning_rate": 4.983923064401823e-06, + "loss": 0.4889, + "step": 3655 + }, + { + "epoch": 0.22263496026550558, + "grad_norm": 1.0125945711546855, + "learning_rate": 4.9839140281535835e-06, + "loss": 0.5325, + "step": 3656 + }, + { + "epoch": 0.22269585604238346, + "grad_norm": 0.9769895410889771, + "learning_rate": 4.983904989374783e-06, + "loss": 0.5207, + "step": 3657 + }, + { + "epoch": 0.22275675181926133, + "grad_norm": 1.0570311616504617, + "learning_rate": 4.983895948065433e-06, + "loss": 0.5231, + "step": 3658 + }, + { + "epoch": 0.2228176475961392, + "grad_norm": 0.9925997999763783, + "learning_rate": 4.983886904225543e-06, + "loss": 0.4429, + "step": 3659 + }, + { + "epoch": 0.22287854337301707, + "grad_norm": 0.958117719090135, + "learning_rate": 4.983877857855118e-06, + "loss": 0.5546, + "step": 3660 + }, + { + "epoch": 0.22293943914989495, + "grad_norm": 1.081575798347445, + "learning_rate": 4.983868808954171e-06, + "loss": 0.5281, + "step": 3661 + }, + { + "epoch": 0.22300033492677282, + "grad_norm": 1.0057490391327615, + "learning_rate": 4.98385975752271e-06, + "loss": 0.5107, + "step": 3662 + }, + { + "epoch": 0.2230612307036507, + "grad_norm": 1.002430332098149, + "learning_rate": 4.9838507035607445e-06, + "loss": 0.4415, + "step": 3663 + }, + { + "epoch": 0.22312212648052857, + "grad_norm": 1.1517345079342831, + "learning_rate": 4.983841647068284e-06, + "loss": 0.4909, + "step": 3664 + }, + { + "epoch": 0.22318302225740644, + "grad_norm": 1.2061091479996402, + "learning_rate": 4.983832588045336e-06, + "loss": 0.5002, + "step": 3665 + }, + { + "epoch": 0.2232439180342843, + "grad_norm": 1.0618157442830711, + "learning_rate": 4.9838235264919115e-06, + "loss": 0.4973, + "step": 3666 + }, + { + "epoch": 0.22330481381116218, + "grad_norm": 1.0569504257258773, + "learning_rate": 4.98381446240802e-06, + "loss": 0.4688, + "step": 3667 + }, + { + "epoch": 0.22336570958804006, + "grad_norm": 1.0840193511230167, + "learning_rate": 4.983805395793669e-06, + "loss": 0.5157, + "step": 3668 + }, + { + "epoch": 0.22342660536491793, + "grad_norm": 0.9771170397587104, + "learning_rate": 4.983796326648869e-06, + "loss": 0.5174, + "step": 3669 + }, + { + "epoch": 0.22348750114179583, + "grad_norm": 1.0689973424421182, + "learning_rate": 4.983787254973629e-06, + "loss": 0.4876, + "step": 3670 + }, + { + "epoch": 0.2235483969186737, + "grad_norm": 1.022888645024949, + "learning_rate": 4.983778180767958e-06, + "loss": 0.5207, + "step": 3671 + }, + { + "epoch": 0.22360929269555158, + "grad_norm": 1.074167007857084, + "learning_rate": 4.9837691040318656e-06, + "loss": 0.5736, + "step": 3672 + }, + { + "epoch": 0.22367018847242945, + "grad_norm": 1.0146785158032263, + "learning_rate": 4.983760024765361e-06, + "loss": 0.4905, + "step": 3673 + }, + { + "epoch": 0.22373108424930732, + "grad_norm": 0.9852477276164616, + "learning_rate": 4.983750942968452e-06, + "loss": 0.5199, + "step": 3674 + }, + { + "epoch": 0.2237919800261852, + "grad_norm": 0.9520716641224147, + "learning_rate": 4.983741858641151e-06, + "loss": 0.5013, + "step": 3675 + }, + { + "epoch": 0.22385287580306307, + "grad_norm": 0.9625468738064992, + "learning_rate": 4.983732771783465e-06, + "loss": 0.5036, + "step": 3676 + }, + { + "epoch": 0.22391377157994094, + "grad_norm": 0.9921370188712887, + "learning_rate": 4.983723682395404e-06, + "loss": 0.5196, + "step": 3677 + }, + { + "epoch": 0.2239746673568188, + "grad_norm": 1.1441418990127838, + "learning_rate": 4.983714590476976e-06, + "loss": 0.4116, + "step": 3678 + }, + { + "epoch": 0.22403556313369669, + "grad_norm": 1.082491842997823, + "learning_rate": 4.983705496028192e-06, + "loss": 0.4806, + "step": 3679 + }, + { + "epoch": 0.22409645891057456, + "grad_norm": 1.0370195137056684, + "learning_rate": 4.98369639904906e-06, + "loss": 0.5195, + "step": 3680 + }, + { + "epoch": 0.22415735468745243, + "grad_norm": 1.0333843525215605, + "learning_rate": 4.9836872995395905e-06, + "loss": 0.4907, + "step": 3681 + }, + { + "epoch": 0.2242182504643303, + "grad_norm": 1.114320515939684, + "learning_rate": 4.983678197499791e-06, + "loss": 0.4709, + "step": 3682 + }, + { + "epoch": 0.22427914624120818, + "grad_norm": 1.1162487546310778, + "learning_rate": 4.9836690929296715e-06, + "loss": 0.4929, + "step": 3683 + }, + { + "epoch": 0.22434004201808605, + "grad_norm": 1.0495223185092992, + "learning_rate": 4.983659985829242e-06, + "loss": 0.5095, + "step": 3684 + }, + { + "epoch": 0.22440093779496392, + "grad_norm": 1.0558277149045903, + "learning_rate": 4.983650876198513e-06, + "loss": 0.484, + "step": 3685 + }, + { + "epoch": 0.2244618335718418, + "grad_norm": 1.0446666886000473, + "learning_rate": 4.98364176403749e-06, + "loss": 0.5479, + "step": 3686 + }, + { + "epoch": 0.22452272934871967, + "grad_norm": 1.0617704341336716, + "learning_rate": 4.983632649346185e-06, + "loss": 0.5014, + "step": 3687 + }, + { + "epoch": 0.22458362512559754, + "grad_norm": 0.9390263602169476, + "learning_rate": 4.9836235321246064e-06, + "loss": 0.4904, + "step": 3688 + }, + { + "epoch": 0.22464452090247541, + "grad_norm": 1.0827146730456525, + "learning_rate": 4.983614412372765e-06, + "loss": 0.4799, + "step": 3689 + }, + { + "epoch": 0.2247054166793533, + "grad_norm": 0.9868236134692797, + "learning_rate": 4.983605290090668e-06, + "loss": 0.4918, + "step": 3690 + }, + { + "epoch": 0.22476631245623116, + "grad_norm": 1.0775289876324547, + "learning_rate": 4.983596165278325e-06, + "loss": 0.4907, + "step": 3691 + }, + { + "epoch": 0.22482720823310903, + "grad_norm": 1.057323032753069, + "learning_rate": 4.9835870379357464e-06, + "loss": 0.5981, + "step": 3692 + }, + { + "epoch": 0.2248881040099869, + "grad_norm": 1.1134981490237437, + "learning_rate": 4.983577908062941e-06, + "loss": 0.4775, + "step": 3693 + }, + { + "epoch": 0.22494899978686478, + "grad_norm": 1.0518086110848555, + "learning_rate": 4.983568775659918e-06, + "loss": 0.4894, + "step": 3694 + }, + { + "epoch": 0.22500989556374265, + "grad_norm": 1.1918477590989611, + "learning_rate": 4.9835596407266875e-06, + "loss": 0.4432, + "step": 3695 + }, + { + "epoch": 0.22507079134062052, + "grad_norm": 1.1633733543908427, + "learning_rate": 4.9835505032632576e-06, + "loss": 0.538, + "step": 3696 + }, + { + "epoch": 0.2251316871174984, + "grad_norm": 1.0535741243759975, + "learning_rate": 4.983541363269638e-06, + "loss": 0.4881, + "step": 3697 + }, + { + "epoch": 0.22519258289437627, + "grad_norm": 0.9987444995260891, + "learning_rate": 4.983532220745838e-06, + "loss": 0.5471, + "step": 3698 + }, + { + "epoch": 0.22525347867125414, + "grad_norm": 1.043180854127297, + "learning_rate": 4.9835230756918664e-06, + "loss": 0.4816, + "step": 3699 + }, + { + "epoch": 0.22531437444813202, + "grad_norm": 1.0712837106295863, + "learning_rate": 4.983513928107734e-06, + "loss": 0.4569, + "step": 3700 + }, + { + "epoch": 0.2253752702250099, + "grad_norm": 1.0014453767979237, + "learning_rate": 4.983504777993449e-06, + "loss": 0.4622, + "step": 3701 + }, + { + "epoch": 0.22543616600188776, + "grad_norm": 1.012923974362725, + "learning_rate": 4.983495625349021e-06, + "loss": 0.5966, + "step": 3702 + }, + { + "epoch": 0.22549706177876563, + "grad_norm": 1.059595012427275, + "learning_rate": 4.983486470174459e-06, + "loss": 0.5185, + "step": 3703 + }, + { + "epoch": 0.2255579575556435, + "grad_norm": 1.0344802691444002, + "learning_rate": 4.9834773124697735e-06, + "loss": 0.5062, + "step": 3704 + }, + { + "epoch": 0.22561885333252138, + "grad_norm": 1.0743234176377614, + "learning_rate": 4.983468152234972e-06, + "loss": 0.4726, + "step": 3705 + }, + { + "epoch": 0.22567974910939925, + "grad_norm": 1.076741005843409, + "learning_rate": 4.983458989470065e-06, + "loss": 0.4622, + "step": 3706 + }, + { + "epoch": 0.22574064488627713, + "grad_norm": 1.0315842567351479, + "learning_rate": 4.983449824175062e-06, + "loss": 0.4549, + "step": 3707 + }, + { + "epoch": 0.225801540663155, + "grad_norm": 1.1133313367540218, + "learning_rate": 4.983440656349972e-06, + "loss": 0.5903, + "step": 3708 + }, + { + "epoch": 0.22586243644003287, + "grad_norm": 0.9322357440896087, + "learning_rate": 4.983431485994803e-06, + "loss": 0.515, + "step": 3709 + }, + { + "epoch": 0.22592333221691074, + "grad_norm": 1.0658420975407386, + "learning_rate": 4.9834223131095675e-06, + "loss": 0.4275, + "step": 3710 + }, + { + "epoch": 0.22598422799378864, + "grad_norm": 0.9495233903246977, + "learning_rate": 4.9834131376942715e-06, + "loss": 0.4973, + "step": 3711 + }, + { + "epoch": 0.22604512377066652, + "grad_norm": 1.1228274437326409, + "learning_rate": 4.983403959748927e-06, + "loss": 0.5674, + "step": 3712 + }, + { + "epoch": 0.2261060195475444, + "grad_norm": 1.101785497436263, + "learning_rate": 4.983394779273542e-06, + "loss": 0.4185, + "step": 3713 + }, + { + "epoch": 0.22616691532442226, + "grad_norm": 1.1604830390741283, + "learning_rate": 4.983385596268125e-06, + "loss": 0.435, + "step": 3714 + }, + { + "epoch": 0.22622781110130014, + "grad_norm": 1.0703028572822528, + "learning_rate": 4.983376410732686e-06, + "loss": 0.474, + "step": 3715 + }, + { + "epoch": 0.226288706878178, + "grad_norm": 1.0011493893344998, + "learning_rate": 4.983367222667237e-06, + "loss": 0.5219, + "step": 3716 + }, + { + "epoch": 0.22634960265505588, + "grad_norm": 1.11984783985255, + "learning_rate": 4.983358032071783e-06, + "loss": 0.4307, + "step": 3717 + }, + { + "epoch": 0.22641049843193375, + "grad_norm": 1.1210717957555651, + "learning_rate": 4.983348838946337e-06, + "loss": 0.5006, + "step": 3718 + }, + { + "epoch": 0.22647139420881163, + "grad_norm": 0.9514429573211931, + "learning_rate": 4.983339643290906e-06, + "loss": 0.6271, + "step": 3719 + }, + { + "epoch": 0.2265322899856895, + "grad_norm": 0.986467215810796, + "learning_rate": 4.9833304451055e-06, + "loss": 0.5453, + "step": 3720 + }, + { + "epoch": 0.22659318576256737, + "grad_norm": 1.0471072271111141, + "learning_rate": 4.983321244390129e-06, + "loss": 0.4969, + "step": 3721 + }, + { + "epoch": 0.22665408153944525, + "grad_norm": 1.041484121284085, + "learning_rate": 4.983312041144802e-06, + "loss": 0.5207, + "step": 3722 + }, + { + "epoch": 0.22671497731632312, + "grad_norm": 1.0381568525385074, + "learning_rate": 4.983302835369528e-06, + "loss": 0.4827, + "step": 3723 + }, + { + "epoch": 0.226775873093201, + "grad_norm": 1.0971295190937118, + "learning_rate": 4.983293627064317e-06, + "loss": 0.503, + "step": 3724 + }, + { + "epoch": 0.22683676887007886, + "grad_norm": 1.0120260811476922, + "learning_rate": 4.983284416229178e-06, + "loss": 0.5802, + "step": 3725 + }, + { + "epoch": 0.22689766464695674, + "grad_norm": 1.051108740907145, + "learning_rate": 4.983275202864121e-06, + "loss": 0.5611, + "step": 3726 + }, + { + "epoch": 0.2269585604238346, + "grad_norm": 1.0139916412471541, + "learning_rate": 4.983265986969153e-06, + "loss": 0.5115, + "step": 3727 + }, + { + "epoch": 0.22701945620071248, + "grad_norm": 1.03856992740413, + "learning_rate": 4.983256768544287e-06, + "loss": 0.5295, + "step": 3728 + }, + { + "epoch": 0.22708035197759036, + "grad_norm": 1.0703636822112685, + "learning_rate": 4.9832475475895305e-06, + "loss": 0.5519, + "step": 3729 + }, + { + "epoch": 0.22714124775446823, + "grad_norm": 1.0218200703347118, + "learning_rate": 4.983238324104893e-06, + "loss": 0.5112, + "step": 3730 + }, + { + "epoch": 0.2272021435313461, + "grad_norm": 0.9787784585950156, + "learning_rate": 4.983229098090383e-06, + "loss": 0.5202, + "step": 3731 + }, + { + "epoch": 0.22726303930822397, + "grad_norm": 1.039645387408998, + "learning_rate": 4.983219869546012e-06, + "loss": 0.4975, + "step": 3732 + }, + { + "epoch": 0.22732393508510185, + "grad_norm": 1.0861992566900163, + "learning_rate": 4.983210638471787e-06, + "loss": 0.4357, + "step": 3733 + }, + { + "epoch": 0.22738483086197972, + "grad_norm": 1.0138069141267996, + "learning_rate": 4.98320140486772e-06, + "loss": 0.4893, + "step": 3734 + }, + { + "epoch": 0.2274457266388576, + "grad_norm": 0.9739057975920656, + "learning_rate": 4.983192168733818e-06, + "loss": 0.5305, + "step": 3735 + }, + { + "epoch": 0.22750662241573547, + "grad_norm": 1.008614745399796, + "learning_rate": 4.983182930070092e-06, + "loss": 0.561, + "step": 3736 + }, + { + "epoch": 0.22756751819261334, + "grad_norm": 1.0260249812941757, + "learning_rate": 4.983173688876551e-06, + "loss": 0.4524, + "step": 3737 + }, + { + "epoch": 0.2276284139694912, + "grad_norm": 1.063459758955186, + "learning_rate": 4.983164445153203e-06, + "loss": 0.4781, + "step": 3738 + }, + { + "epoch": 0.22768930974636908, + "grad_norm": 0.9990103337566727, + "learning_rate": 4.98315519890006e-06, + "loss": 0.4808, + "step": 3739 + }, + { + "epoch": 0.22775020552324696, + "grad_norm": 0.9878451659167987, + "learning_rate": 4.98314595011713e-06, + "loss": 0.4986, + "step": 3740 + }, + { + "epoch": 0.22781110130012483, + "grad_norm": 1.0431726882212, + "learning_rate": 4.983136698804422e-06, + "loss": 0.4869, + "step": 3741 + }, + { + "epoch": 0.2278719970770027, + "grad_norm": 0.9770139117393136, + "learning_rate": 4.983127444961946e-06, + "loss": 0.4889, + "step": 3742 + }, + { + "epoch": 0.22793289285388058, + "grad_norm": 0.9823464157712205, + "learning_rate": 4.983118188589712e-06, + "loss": 0.5104, + "step": 3743 + }, + { + "epoch": 0.22799378863075845, + "grad_norm": 0.982543162772777, + "learning_rate": 4.9831089296877275e-06, + "loss": 0.5377, + "step": 3744 + }, + { + "epoch": 0.22805468440763632, + "grad_norm": 1.103635489431834, + "learning_rate": 4.9830996682560045e-06, + "loss": 0.4814, + "step": 3745 + }, + { + "epoch": 0.2281155801845142, + "grad_norm": 0.9842092093558494, + "learning_rate": 4.983090404294551e-06, + "loss": 0.5401, + "step": 3746 + }, + { + "epoch": 0.22817647596139207, + "grad_norm": 0.9758647394495704, + "learning_rate": 4.983081137803376e-06, + "loss": 0.4883, + "step": 3747 + }, + { + "epoch": 0.22823737173826994, + "grad_norm": 1.0546473892803705, + "learning_rate": 4.9830718687824905e-06, + "loss": 0.5106, + "step": 3748 + }, + { + "epoch": 0.2282982675151478, + "grad_norm": 1.0949829093951071, + "learning_rate": 4.983062597231903e-06, + "loss": 0.5018, + "step": 3749 + }, + { + "epoch": 0.22835916329202569, + "grad_norm": 1.0863763305553835, + "learning_rate": 4.983053323151622e-06, + "loss": 0.4438, + "step": 3750 + }, + { + "epoch": 0.22842005906890356, + "grad_norm": 1.063410478764649, + "learning_rate": 4.9830440465416575e-06, + "loss": 0.5212, + "step": 3751 + }, + { + "epoch": 0.22848095484578146, + "grad_norm": 1.063333577899775, + "learning_rate": 4.9830347674020205e-06, + "loss": 0.5321, + "step": 3752 + }, + { + "epoch": 0.22854185062265933, + "grad_norm": 1.0512383326853638, + "learning_rate": 4.9830254857327195e-06, + "loss": 0.4444, + "step": 3753 + }, + { + "epoch": 0.2286027463995372, + "grad_norm": 0.99803840389353, + "learning_rate": 4.983016201533763e-06, + "loss": 0.4991, + "step": 3754 + }, + { + "epoch": 0.22866364217641508, + "grad_norm": 1.1491105743240242, + "learning_rate": 4.983006914805162e-06, + "loss": 0.4441, + "step": 3755 + }, + { + "epoch": 0.22872453795329295, + "grad_norm": 1.085619264445055, + "learning_rate": 4.982997625546924e-06, + "loss": 0.5339, + "step": 3756 + }, + { + "epoch": 0.22878543373017082, + "grad_norm": 1.091534765480261, + "learning_rate": 4.9829883337590604e-06, + "loss": 0.5051, + "step": 3757 + }, + { + "epoch": 0.2288463295070487, + "grad_norm": 1.0520591915932902, + "learning_rate": 4.98297903944158e-06, + "loss": 0.5479, + "step": 3758 + }, + { + "epoch": 0.22890722528392657, + "grad_norm": 1.0829531219335513, + "learning_rate": 4.982969742594492e-06, + "loss": 0.5295, + "step": 3759 + }, + { + "epoch": 0.22896812106080444, + "grad_norm": 1.033727312221422, + "learning_rate": 4.982960443217806e-06, + "loss": 0.4715, + "step": 3760 + }, + { + "epoch": 0.22902901683768231, + "grad_norm": 1.0535824532244455, + "learning_rate": 4.982951141311532e-06, + "loss": 0.5663, + "step": 3761 + }, + { + "epoch": 0.2290899126145602, + "grad_norm": 1.0740155996153367, + "learning_rate": 4.982941836875678e-06, + "loss": 0.4532, + "step": 3762 + }, + { + "epoch": 0.22915080839143806, + "grad_norm": 1.1986277408454418, + "learning_rate": 4.9829325299102546e-06, + "loss": 0.4685, + "step": 3763 + }, + { + "epoch": 0.22921170416831593, + "grad_norm": 1.0989822249752235, + "learning_rate": 4.9829232204152724e-06, + "loss": 0.5125, + "step": 3764 + }, + { + "epoch": 0.2292725999451938, + "grad_norm": 1.0621132622167233, + "learning_rate": 4.982913908390738e-06, + "loss": 0.4913, + "step": 3765 + }, + { + "epoch": 0.22933349572207168, + "grad_norm": 0.9579994766495308, + "learning_rate": 4.982904593836664e-06, + "loss": 0.5299, + "step": 3766 + }, + { + "epoch": 0.22939439149894955, + "grad_norm": 0.9674949931008232, + "learning_rate": 4.982895276753058e-06, + "loss": 0.5313, + "step": 3767 + }, + { + "epoch": 0.22945528727582742, + "grad_norm": 0.9651965632897885, + "learning_rate": 4.982885957139929e-06, + "loss": 0.5284, + "step": 3768 + }, + { + "epoch": 0.2295161830527053, + "grad_norm": 0.9448646809301025, + "learning_rate": 4.982876634997289e-06, + "loss": 0.525, + "step": 3769 + }, + { + "epoch": 0.22957707882958317, + "grad_norm": 0.9747603509445841, + "learning_rate": 4.982867310325145e-06, + "loss": 0.4866, + "step": 3770 + }, + { + "epoch": 0.22963797460646104, + "grad_norm": 1.0547304817317602, + "learning_rate": 4.982857983123507e-06, + "loss": 0.4712, + "step": 3771 + }, + { + "epoch": 0.22969887038333892, + "grad_norm": 1.153717996715709, + "learning_rate": 4.982848653392385e-06, + "loss": 0.5307, + "step": 3772 + }, + { + "epoch": 0.2297597661602168, + "grad_norm": 1.0154104695975763, + "learning_rate": 4.982839321131789e-06, + "loss": 0.4935, + "step": 3773 + }, + { + "epoch": 0.22982066193709466, + "grad_norm": 1.0637298873311283, + "learning_rate": 4.982829986341727e-06, + "loss": 0.4535, + "step": 3774 + }, + { + "epoch": 0.22988155771397253, + "grad_norm": 1.1134841933219644, + "learning_rate": 4.982820649022211e-06, + "loss": 0.4788, + "step": 3775 + }, + { + "epoch": 0.2299424534908504, + "grad_norm": 1.0386476660849633, + "learning_rate": 4.982811309173248e-06, + "loss": 0.473, + "step": 3776 + }, + { + "epoch": 0.23000334926772828, + "grad_norm": 1.1598616542466147, + "learning_rate": 4.982801966794848e-06, + "loss": 0.5752, + "step": 3777 + }, + { + "epoch": 0.23006424504460615, + "grad_norm": 0.9742574963680173, + "learning_rate": 4.9827926218870216e-06, + "loss": 0.5078, + "step": 3778 + }, + { + "epoch": 0.23012514082148403, + "grad_norm": 1.2297493764213325, + "learning_rate": 4.982783274449777e-06, + "loss": 0.5363, + "step": 3779 + }, + { + "epoch": 0.2301860365983619, + "grad_norm": 1.1011349807886763, + "learning_rate": 4.982773924483125e-06, + "loss": 0.54, + "step": 3780 + }, + { + "epoch": 0.23024693237523977, + "grad_norm": 0.9641627572847817, + "learning_rate": 4.982764571987076e-06, + "loss": 0.4881, + "step": 3781 + }, + { + "epoch": 0.23030782815211764, + "grad_norm": 1.0468765928448454, + "learning_rate": 4.982755216961636e-06, + "loss": 0.5417, + "step": 3782 + }, + { + "epoch": 0.23036872392899552, + "grad_norm": 1.075605059446278, + "learning_rate": 4.9827458594068166e-06, + "loss": 0.4417, + "step": 3783 + }, + { + "epoch": 0.2304296197058734, + "grad_norm": 1.0417261647629539, + "learning_rate": 4.982736499322628e-06, + "loss": 0.5416, + "step": 3784 + }, + { + "epoch": 0.23049051548275126, + "grad_norm": 0.9902458633143093, + "learning_rate": 4.982727136709079e-06, + "loss": 0.5156, + "step": 3785 + }, + { + "epoch": 0.23055141125962914, + "grad_norm": 1.1281322812824437, + "learning_rate": 4.982717771566179e-06, + "loss": 0.4668, + "step": 3786 + }, + { + "epoch": 0.230612307036507, + "grad_norm": 1.142327503666585, + "learning_rate": 4.982708403893938e-06, + "loss": 0.4397, + "step": 3787 + }, + { + "epoch": 0.23067320281338488, + "grad_norm": 1.062585017330708, + "learning_rate": 4.9826990336923655e-06, + "loss": 0.4774, + "step": 3788 + }, + { + "epoch": 0.23073409859026275, + "grad_norm": 1.023809843879714, + "learning_rate": 4.98268966096147e-06, + "loss": 0.5477, + "step": 3789 + }, + { + "epoch": 0.23079499436714063, + "grad_norm": 1.0612345190241597, + "learning_rate": 4.982680285701264e-06, + "loss": 0.4798, + "step": 3790 + }, + { + "epoch": 0.2308558901440185, + "grad_norm": 1.1553056950701304, + "learning_rate": 4.982670907911752e-06, + "loss": 0.513, + "step": 3791 + }, + { + "epoch": 0.23091678592089637, + "grad_norm": 0.9796706360210359, + "learning_rate": 4.982661527592948e-06, + "loss": 0.4996, + "step": 3792 + }, + { + "epoch": 0.23097768169777427, + "grad_norm": 1.0466354903084, + "learning_rate": 4.98265214474486e-06, + "loss": 0.4377, + "step": 3793 + }, + { + "epoch": 0.23103857747465215, + "grad_norm": 0.9942405146274848, + "learning_rate": 4.982642759367498e-06, + "loss": 0.571, + "step": 3794 + }, + { + "epoch": 0.23109947325153002, + "grad_norm": 1.0229228387141278, + "learning_rate": 4.982633371460871e-06, + "loss": 0.5136, + "step": 3795 + }, + { + "epoch": 0.2311603690284079, + "grad_norm": 1.1800761973790914, + "learning_rate": 4.982623981024988e-06, + "loss": 0.4595, + "step": 3796 + }, + { + "epoch": 0.23122126480528576, + "grad_norm": 1.0204336624096733, + "learning_rate": 4.98261458805986e-06, + "loss": 0.5252, + "step": 3797 + }, + { + "epoch": 0.23128216058216364, + "grad_norm": 1.0547336894868837, + "learning_rate": 4.982605192565496e-06, + "loss": 0.4981, + "step": 3798 + }, + { + "epoch": 0.2313430563590415, + "grad_norm": 1.1199248328784053, + "learning_rate": 4.982595794541905e-06, + "loss": 0.4495, + "step": 3799 + }, + { + "epoch": 0.23140395213591938, + "grad_norm": 1.0505605392382793, + "learning_rate": 4.982586393989097e-06, + "loss": 0.5581, + "step": 3800 + }, + { + "epoch": 0.23146484791279726, + "grad_norm": 1.0284623778455193, + "learning_rate": 4.982576990907082e-06, + "loss": 0.4494, + "step": 3801 + }, + { + "epoch": 0.23152574368967513, + "grad_norm": 1.0621531115318543, + "learning_rate": 4.982567585295869e-06, + "loss": 0.4533, + "step": 3802 + }, + { + "epoch": 0.231586639466553, + "grad_norm": 0.9640816358508384, + "learning_rate": 4.9825581771554676e-06, + "loss": 0.5023, + "step": 3803 + }, + { + "epoch": 0.23164753524343087, + "grad_norm": 1.0460499917416157, + "learning_rate": 4.982548766485887e-06, + "loss": 0.4662, + "step": 3804 + }, + { + "epoch": 0.23170843102030875, + "grad_norm": 1.0955118280072782, + "learning_rate": 4.982539353287138e-06, + "loss": 0.5127, + "step": 3805 + }, + { + "epoch": 0.23176932679718662, + "grad_norm": 1.009534681983333, + "learning_rate": 4.98252993755923e-06, + "loss": 0.4661, + "step": 3806 + }, + { + "epoch": 0.2318302225740645, + "grad_norm": 1.0375906465286762, + "learning_rate": 4.982520519302171e-06, + "loss": 0.5758, + "step": 3807 + }, + { + "epoch": 0.23189111835094237, + "grad_norm": 1.0648615877641143, + "learning_rate": 4.982511098515972e-06, + "loss": 0.4788, + "step": 3808 + }, + { + "epoch": 0.23195201412782024, + "grad_norm": 1.3337664058384422, + "learning_rate": 4.982501675200643e-06, + "loss": 0.4849, + "step": 3809 + }, + { + "epoch": 0.2320129099046981, + "grad_norm": 1.0584047017085294, + "learning_rate": 4.982492249356193e-06, + "loss": 0.4879, + "step": 3810 + }, + { + "epoch": 0.23207380568157598, + "grad_norm": 0.98869554572663, + "learning_rate": 4.98248282098263e-06, + "loss": 0.5296, + "step": 3811 + }, + { + "epoch": 0.23213470145845386, + "grad_norm": 0.9499119626236463, + "learning_rate": 4.982473390079967e-06, + "loss": 0.4766, + "step": 3812 + }, + { + "epoch": 0.23219559723533173, + "grad_norm": 1.0425475486754014, + "learning_rate": 4.98246395664821e-06, + "loss": 0.5391, + "step": 3813 + }, + { + "epoch": 0.2322564930122096, + "grad_norm": 0.944786267455095, + "learning_rate": 4.982454520687372e-06, + "loss": 0.4945, + "step": 3814 + }, + { + "epoch": 0.23231738878908748, + "grad_norm": 1.0700137252726838, + "learning_rate": 4.98244508219746e-06, + "loss": 0.438, + "step": 3815 + }, + { + "epoch": 0.23237828456596535, + "grad_norm": 1.2079890915181453, + "learning_rate": 4.982435641178485e-06, + "loss": 0.5075, + "step": 3816 + }, + { + "epoch": 0.23243918034284322, + "grad_norm": 1.117078882967973, + "learning_rate": 4.982426197630456e-06, + "loss": 0.4564, + "step": 3817 + }, + { + "epoch": 0.2325000761197211, + "grad_norm": 0.9990147064543815, + "learning_rate": 4.982416751553382e-06, + "loss": 0.4502, + "step": 3818 + }, + { + "epoch": 0.23256097189659897, + "grad_norm": 0.9820821333565848, + "learning_rate": 4.982407302947274e-06, + "loss": 0.5457, + "step": 3819 + }, + { + "epoch": 0.23262186767347684, + "grad_norm": 1.0459675197701963, + "learning_rate": 4.982397851812141e-06, + "loss": 0.4591, + "step": 3820 + }, + { + "epoch": 0.2326827634503547, + "grad_norm": 1.0171638870211217, + "learning_rate": 4.982388398147993e-06, + "loss": 0.4959, + "step": 3821 + }, + { + "epoch": 0.23274365922723259, + "grad_norm": 1.0164604820815775, + "learning_rate": 4.98237894195484e-06, + "loss": 0.5417, + "step": 3822 + }, + { + "epoch": 0.23280455500411046, + "grad_norm": 1.0523767418549692, + "learning_rate": 4.9823694832326896e-06, + "loss": 0.5251, + "step": 3823 + }, + { + "epoch": 0.23286545078098833, + "grad_norm": 1.0092754418317391, + "learning_rate": 4.9823600219815524e-06, + "loss": 0.5107, + "step": 3824 + }, + { + "epoch": 0.2329263465578662, + "grad_norm": 1.0846120039292853, + "learning_rate": 4.98235055820144e-06, + "loss": 0.4939, + "step": 3825 + }, + { + "epoch": 0.23298724233474408, + "grad_norm": 0.9365409400473113, + "learning_rate": 4.98234109189236e-06, + "loss": 0.5241, + "step": 3826 + }, + { + "epoch": 0.23304813811162195, + "grad_norm": 1.2008215572195, + "learning_rate": 4.982331623054322e-06, + "loss": 0.4544, + "step": 3827 + }, + { + "epoch": 0.23310903388849982, + "grad_norm": 1.0836030611654375, + "learning_rate": 4.982322151687337e-06, + "loss": 0.4512, + "step": 3828 + }, + { + "epoch": 0.2331699296653777, + "grad_norm": 0.996632494570029, + "learning_rate": 4.982312677791412e-06, + "loss": 0.5216, + "step": 3829 + }, + { + "epoch": 0.23323082544225557, + "grad_norm": 1.0870396209756983, + "learning_rate": 4.98230320136656e-06, + "loss": 0.5191, + "step": 3830 + }, + { + "epoch": 0.23329172121913344, + "grad_norm": 1.0885430782899022, + "learning_rate": 4.982293722412788e-06, + "loss": 0.4931, + "step": 3831 + }, + { + "epoch": 0.23335261699601131, + "grad_norm": 1.0411532500417353, + "learning_rate": 4.982284240930108e-06, + "loss": 0.4847, + "step": 3832 + }, + { + "epoch": 0.2334135127728892, + "grad_norm": 0.9495249639793224, + "learning_rate": 4.9822747569185285e-06, + "loss": 0.5101, + "step": 3833 + }, + { + "epoch": 0.2334744085497671, + "grad_norm": 0.9733246788767889, + "learning_rate": 4.982265270378058e-06, + "loss": 0.5195, + "step": 3834 + }, + { + "epoch": 0.23353530432664496, + "grad_norm": 1.0229040522921637, + "learning_rate": 4.982255781308708e-06, + "loss": 0.5119, + "step": 3835 + }, + { + "epoch": 0.23359620010352283, + "grad_norm": 0.9958938347043992, + "learning_rate": 4.982246289710487e-06, + "loss": 0.4776, + "step": 3836 + }, + { + "epoch": 0.2336570958804007, + "grad_norm": 1.0403162323912059, + "learning_rate": 4.982236795583406e-06, + "loss": 0.472, + "step": 3837 + }, + { + "epoch": 0.23371799165727858, + "grad_norm": 1.0519931448520232, + "learning_rate": 4.982227298927472e-06, + "loss": 0.4447, + "step": 3838 + }, + { + "epoch": 0.23377888743415645, + "grad_norm": 1.0378064639186813, + "learning_rate": 4.982217799742698e-06, + "loss": 0.5366, + "step": 3839 + }, + { + "epoch": 0.23383978321103432, + "grad_norm": 1.0818034000699541, + "learning_rate": 4.982208298029091e-06, + "loss": 0.4888, + "step": 3840 + }, + { + "epoch": 0.2339006789879122, + "grad_norm": 1.175437327163238, + "learning_rate": 4.982198793786663e-06, + "loss": 0.5101, + "step": 3841 + }, + { + "epoch": 0.23396157476479007, + "grad_norm": 1.0566467311075343, + "learning_rate": 4.982189287015422e-06, + "loss": 0.4733, + "step": 3842 + }, + { + "epoch": 0.23402247054166794, + "grad_norm": 1.0707565480995254, + "learning_rate": 4.982179777715378e-06, + "loss": 0.5785, + "step": 3843 + }, + { + "epoch": 0.23408336631854582, + "grad_norm": 1.0700436711890222, + "learning_rate": 4.982170265886541e-06, + "loss": 0.4977, + "step": 3844 + }, + { + "epoch": 0.2341442620954237, + "grad_norm": 1.098822585849228, + "learning_rate": 4.98216075152892e-06, + "loss": 0.4551, + "step": 3845 + }, + { + "epoch": 0.23420515787230156, + "grad_norm": 1.0519589768889273, + "learning_rate": 4.9821512346425256e-06, + "loss": 0.4543, + "step": 3846 + }, + { + "epoch": 0.23426605364917943, + "grad_norm": 1.0738597681100417, + "learning_rate": 4.982141715227367e-06, + "loss": 0.5853, + "step": 3847 + }, + { + "epoch": 0.2343269494260573, + "grad_norm": 1.0392373729285156, + "learning_rate": 4.982132193283454e-06, + "loss": 0.4779, + "step": 3848 + }, + { + "epoch": 0.23438784520293518, + "grad_norm": 1.0108312273955453, + "learning_rate": 4.9821226688107975e-06, + "loss": 0.5576, + "step": 3849 + }, + { + "epoch": 0.23444874097981305, + "grad_norm": 1.0639551485639465, + "learning_rate": 4.982113141809405e-06, + "loss": 0.5381, + "step": 3850 + }, + { + "epoch": 0.23450963675669093, + "grad_norm": 1.0655376642922674, + "learning_rate": 4.982103612279286e-06, + "loss": 0.4868, + "step": 3851 + }, + { + "epoch": 0.2345705325335688, + "grad_norm": 1.0307836145689413, + "learning_rate": 4.982094080220453e-06, + "loss": 0.5237, + "step": 3852 + }, + { + "epoch": 0.23463142831044667, + "grad_norm": 1.0679601921184858, + "learning_rate": 4.982084545632914e-06, + "loss": 0.5092, + "step": 3853 + }, + { + "epoch": 0.23469232408732454, + "grad_norm": 1.0788151164893014, + "learning_rate": 4.982075008516679e-06, + "loss": 0.4694, + "step": 3854 + }, + { + "epoch": 0.23475321986420242, + "grad_norm": 1.0281501245210427, + "learning_rate": 4.982065468871758e-06, + "loss": 0.5036, + "step": 3855 + }, + { + "epoch": 0.2348141156410803, + "grad_norm": 0.9885288356406656, + "learning_rate": 4.982055926698159e-06, + "loss": 0.466, + "step": 3856 + }, + { + "epoch": 0.23487501141795816, + "grad_norm": 1.2061176699736167, + "learning_rate": 4.982046381995893e-06, + "loss": 0.4748, + "step": 3857 + }, + { + "epoch": 0.23493590719483604, + "grad_norm": 1.0190371253426487, + "learning_rate": 4.98203683476497e-06, + "loss": 0.5097, + "step": 3858 + }, + { + "epoch": 0.2349968029717139, + "grad_norm": 1.0830686555594666, + "learning_rate": 4.982027285005401e-06, + "loss": 0.48, + "step": 3859 + }, + { + "epoch": 0.23505769874859178, + "grad_norm": 1.0502416164318502, + "learning_rate": 4.982017732717192e-06, + "loss": 0.4821, + "step": 3860 + }, + { + "epoch": 0.23511859452546965, + "grad_norm": 1.076564570294196, + "learning_rate": 4.982008177900357e-06, + "loss": 0.4881, + "step": 3861 + }, + { + "epoch": 0.23517949030234753, + "grad_norm": 0.9689128247214893, + "learning_rate": 4.981998620554903e-06, + "loss": 0.5256, + "step": 3862 + }, + { + "epoch": 0.2352403860792254, + "grad_norm": 1.065895180227061, + "learning_rate": 4.98198906068084e-06, + "loss": 0.4895, + "step": 3863 + }, + { + "epoch": 0.23530128185610327, + "grad_norm": 1.1180781524926755, + "learning_rate": 4.981979498278177e-06, + "loss": 0.5389, + "step": 3864 + }, + { + "epoch": 0.23536217763298115, + "grad_norm": 1.0021346328938914, + "learning_rate": 4.981969933346928e-06, + "loss": 0.511, + "step": 3865 + }, + { + "epoch": 0.23542307340985902, + "grad_norm": 1.0032140385091155, + "learning_rate": 4.981960365887097e-06, + "loss": 0.52, + "step": 3866 + }, + { + "epoch": 0.2354839691867369, + "grad_norm": 1.0011375464230556, + "learning_rate": 4.981950795898697e-06, + "loss": 0.4715, + "step": 3867 + }, + { + "epoch": 0.23554486496361476, + "grad_norm": 1.0455351226805487, + "learning_rate": 4.981941223381738e-06, + "loss": 0.5135, + "step": 3868 + }, + { + "epoch": 0.23560576074049264, + "grad_norm": 1.062242012562604, + "learning_rate": 4.981931648336227e-06, + "loss": 0.5245, + "step": 3869 + }, + { + "epoch": 0.2356666565173705, + "grad_norm": 1.0330487181979797, + "learning_rate": 4.9819220707621775e-06, + "loss": 0.4784, + "step": 3870 + }, + { + "epoch": 0.23572755229424838, + "grad_norm": 1.126825096415133, + "learning_rate": 4.981912490659596e-06, + "loss": 0.4936, + "step": 3871 + }, + { + "epoch": 0.23578844807112626, + "grad_norm": 1.0668034062262801, + "learning_rate": 4.981902908028495e-06, + "loss": 0.5194, + "step": 3872 + }, + { + "epoch": 0.23584934384800413, + "grad_norm": 1.0262789877590757, + "learning_rate": 4.981893322868882e-06, + "loss": 0.4753, + "step": 3873 + }, + { + "epoch": 0.235910239624882, + "grad_norm": 1.0451493364225681, + "learning_rate": 4.981883735180768e-06, + "loss": 0.4709, + "step": 3874 + }, + { + "epoch": 0.2359711354017599, + "grad_norm": 1.0146841095027543, + "learning_rate": 4.981874144964163e-06, + "loss": 0.4888, + "step": 3875 + }, + { + "epoch": 0.23603203117863777, + "grad_norm": 0.9819293684451167, + "learning_rate": 4.981864552219075e-06, + "loss": 0.4835, + "step": 3876 + }, + { + "epoch": 0.23609292695551565, + "grad_norm": 1.0807813321779165, + "learning_rate": 4.981854956945515e-06, + "loss": 0.5086, + "step": 3877 + }, + { + "epoch": 0.23615382273239352, + "grad_norm": 0.9715465191850327, + "learning_rate": 4.981845359143494e-06, + "loss": 0.4951, + "step": 3878 + }, + { + "epoch": 0.2362147185092714, + "grad_norm": 1.0350111665870085, + "learning_rate": 4.98183575881302e-06, + "loss": 0.4245, + "step": 3879 + }, + { + "epoch": 0.23627561428614927, + "grad_norm": 1.01600806522499, + "learning_rate": 4.981826155954103e-06, + "loss": 0.519, + "step": 3880 + }, + { + "epoch": 0.23633651006302714, + "grad_norm": 0.9737789153392198, + "learning_rate": 4.9818165505667536e-06, + "loss": 0.5098, + "step": 3881 + }, + { + "epoch": 0.236397405839905, + "grad_norm": 1.0651804131560318, + "learning_rate": 4.981806942650981e-06, + "loss": 0.5035, + "step": 3882 + }, + { + "epoch": 0.23645830161678288, + "grad_norm": 1.1498474751041823, + "learning_rate": 4.981797332206795e-06, + "loss": 0.458, + "step": 3883 + }, + { + "epoch": 0.23651919739366076, + "grad_norm": 1.0544522407420063, + "learning_rate": 4.981787719234205e-06, + "loss": 0.6176, + "step": 3884 + }, + { + "epoch": 0.23658009317053863, + "grad_norm": 1.1425704680457498, + "learning_rate": 4.9817781037332215e-06, + "loss": 0.5464, + "step": 3885 + }, + { + "epoch": 0.2366409889474165, + "grad_norm": 1.0807275330861723, + "learning_rate": 4.981768485703855e-06, + "loss": 0.5054, + "step": 3886 + }, + { + "epoch": 0.23670188472429438, + "grad_norm": 0.9432275179281085, + "learning_rate": 4.981758865146114e-06, + "loss": 0.5435, + "step": 3887 + }, + { + "epoch": 0.23676278050117225, + "grad_norm": 1.0248797506331115, + "learning_rate": 4.981749242060008e-06, + "loss": 0.5162, + "step": 3888 + }, + { + "epoch": 0.23682367627805012, + "grad_norm": 1.1713447427139956, + "learning_rate": 4.981739616445548e-06, + "loss": 0.4598, + "step": 3889 + }, + { + "epoch": 0.236884572054928, + "grad_norm": 0.9602508483493223, + "learning_rate": 4.981729988302742e-06, + "loss": 0.5085, + "step": 3890 + }, + { + "epoch": 0.23694546783180587, + "grad_norm": 1.0081868297096883, + "learning_rate": 4.981720357631603e-06, + "loss": 0.5222, + "step": 3891 + }, + { + "epoch": 0.23700636360868374, + "grad_norm": 1.1334398717836685, + "learning_rate": 4.981710724432137e-06, + "loss": 0.4358, + "step": 3892 + }, + { + "epoch": 0.2370672593855616, + "grad_norm": 1.0789928405409182, + "learning_rate": 4.981701088704357e-06, + "loss": 0.4915, + "step": 3893 + }, + { + "epoch": 0.23712815516243949, + "grad_norm": 0.9333092956936851, + "learning_rate": 4.981691450448271e-06, + "loss": 0.5311, + "step": 3894 + }, + { + "epoch": 0.23718905093931736, + "grad_norm": 0.970634485315829, + "learning_rate": 4.98168180966389e-06, + "loss": 0.5341, + "step": 3895 + }, + { + "epoch": 0.23724994671619523, + "grad_norm": 1.098918880833628, + "learning_rate": 4.981672166351222e-06, + "loss": 0.479, + "step": 3896 + }, + { + "epoch": 0.2373108424930731, + "grad_norm": 1.1228804885932946, + "learning_rate": 4.981662520510279e-06, + "loss": 0.4406, + "step": 3897 + }, + { + "epoch": 0.23737173826995098, + "grad_norm": 1.0694775369208267, + "learning_rate": 4.98165287214107e-06, + "loss": 0.4419, + "step": 3898 + }, + { + "epoch": 0.23743263404682885, + "grad_norm": 1.1900453664695665, + "learning_rate": 4.981643221243605e-06, + "loss": 0.5115, + "step": 3899 + }, + { + "epoch": 0.23749352982370672, + "grad_norm": 1.0362608355146108, + "learning_rate": 4.981633567817892e-06, + "loss": 0.4489, + "step": 3900 + }, + { + "epoch": 0.2375544256005846, + "grad_norm": 0.9472354606155396, + "learning_rate": 4.981623911863943e-06, + "loss": 0.5418, + "step": 3901 + }, + { + "epoch": 0.23761532137746247, + "grad_norm": 0.9938938717684741, + "learning_rate": 4.981614253381768e-06, + "loss": 0.4852, + "step": 3902 + }, + { + "epoch": 0.23767621715434034, + "grad_norm": 0.9912178938837476, + "learning_rate": 4.981604592371374e-06, + "loss": 0.5059, + "step": 3903 + }, + { + "epoch": 0.23773711293121821, + "grad_norm": 1.044531536435821, + "learning_rate": 4.981594928832775e-06, + "loss": 0.4876, + "step": 3904 + }, + { + "epoch": 0.2377980087080961, + "grad_norm": 1.1475467112652176, + "learning_rate": 4.981585262765977e-06, + "loss": 0.4651, + "step": 3905 + }, + { + "epoch": 0.23785890448497396, + "grad_norm": 0.9858567571544226, + "learning_rate": 4.981575594170992e-06, + "loss": 0.5315, + "step": 3906 + }, + { + "epoch": 0.23791980026185183, + "grad_norm": 1.0374816118155141, + "learning_rate": 4.9815659230478294e-06, + "loss": 0.5672, + "step": 3907 + }, + { + "epoch": 0.2379806960387297, + "grad_norm": 1.1392921341157074, + "learning_rate": 4.981556249396499e-06, + "loss": 0.4911, + "step": 3908 + }, + { + "epoch": 0.23804159181560758, + "grad_norm": 1.0694331414665301, + "learning_rate": 4.981546573217012e-06, + "loss": 0.4916, + "step": 3909 + }, + { + "epoch": 0.23810248759248545, + "grad_norm": 1.0792893709593037, + "learning_rate": 4.981536894509376e-06, + "loss": 0.4003, + "step": 3910 + }, + { + "epoch": 0.23816338336936332, + "grad_norm": 1.092934003316643, + "learning_rate": 4.981527213273602e-06, + "loss": 0.4643, + "step": 3911 + }, + { + "epoch": 0.2382242791462412, + "grad_norm": 0.9979972158199573, + "learning_rate": 4.981517529509699e-06, + "loss": 0.4867, + "step": 3912 + }, + { + "epoch": 0.23828517492311907, + "grad_norm": 0.9966536004811701, + "learning_rate": 4.9815078432176776e-06, + "loss": 0.4804, + "step": 3913 + }, + { + "epoch": 0.23834607069999694, + "grad_norm": 0.9825489700215086, + "learning_rate": 4.981498154397548e-06, + "loss": 0.4778, + "step": 3914 + }, + { + "epoch": 0.23840696647687482, + "grad_norm": 0.9642892241895192, + "learning_rate": 4.981488463049319e-06, + "loss": 0.4672, + "step": 3915 + }, + { + "epoch": 0.23846786225375272, + "grad_norm": 1.0831058952908323, + "learning_rate": 4.9814787691730014e-06, + "loss": 0.4712, + "step": 3916 + }, + { + "epoch": 0.2385287580306306, + "grad_norm": 1.0608022055758801, + "learning_rate": 4.9814690727686054e-06, + "loss": 0.4738, + "step": 3917 + }, + { + "epoch": 0.23858965380750846, + "grad_norm": 1.1138543891616683, + "learning_rate": 4.98145937383614e-06, + "loss": 0.4711, + "step": 3918 + }, + { + "epoch": 0.23865054958438633, + "grad_norm": 1.1228455447025976, + "learning_rate": 4.981449672375616e-06, + "loss": 0.4437, + "step": 3919 + }, + { + "epoch": 0.2387114453612642, + "grad_norm": 1.1634016713532676, + "learning_rate": 4.981439968387042e-06, + "loss": 0.4423, + "step": 3920 + }, + { + "epoch": 0.23877234113814208, + "grad_norm": 0.9618365475583119, + "learning_rate": 4.981430261870428e-06, + "loss": 0.5306, + "step": 3921 + }, + { + "epoch": 0.23883323691501995, + "grad_norm": 1.0888769927045006, + "learning_rate": 4.981420552825785e-06, + "loss": 0.3921, + "step": 3922 + }, + { + "epoch": 0.23889413269189783, + "grad_norm": 1.0191494698485075, + "learning_rate": 4.981410841253122e-06, + "loss": 0.4678, + "step": 3923 + }, + { + "epoch": 0.2389550284687757, + "grad_norm": 1.0576193964025224, + "learning_rate": 4.98140112715245e-06, + "loss": 0.5155, + "step": 3924 + }, + { + "epoch": 0.23901592424565357, + "grad_norm": 0.9975335704920769, + "learning_rate": 4.981391410523778e-06, + "loss": 0.5018, + "step": 3925 + }, + { + "epoch": 0.23907682002253144, + "grad_norm": 0.9666107341337267, + "learning_rate": 4.981381691367115e-06, + "loss": 0.5332, + "step": 3926 + }, + { + "epoch": 0.23913771579940932, + "grad_norm": 1.075008870286327, + "learning_rate": 4.981371969682473e-06, + "loss": 0.5204, + "step": 3927 + }, + { + "epoch": 0.2391986115762872, + "grad_norm": 1.0604127460205013, + "learning_rate": 4.981362245469861e-06, + "loss": 0.4466, + "step": 3928 + }, + { + "epoch": 0.23925950735316506, + "grad_norm": 1.1110483906807513, + "learning_rate": 4.981352518729288e-06, + "loss": 0.4301, + "step": 3929 + }, + { + "epoch": 0.23932040313004294, + "grad_norm": 1.0405925594177867, + "learning_rate": 4.981342789460765e-06, + "loss": 0.4538, + "step": 3930 + }, + { + "epoch": 0.2393812989069208, + "grad_norm": 1.095902736973017, + "learning_rate": 4.981333057664301e-06, + "loss": 0.4771, + "step": 3931 + }, + { + "epoch": 0.23944219468379868, + "grad_norm": 1.0871895585949507, + "learning_rate": 4.981323323339908e-06, + "loss": 0.4666, + "step": 3932 + }, + { + "epoch": 0.23950309046067655, + "grad_norm": 1.004904803544844, + "learning_rate": 4.981313586487592e-06, + "loss": 0.555, + "step": 3933 + }, + { + "epoch": 0.23956398623755443, + "grad_norm": 0.9785748165773344, + "learning_rate": 4.9813038471073675e-06, + "loss": 0.4934, + "step": 3934 + }, + { + "epoch": 0.2396248820144323, + "grad_norm": 1.1514220508932829, + "learning_rate": 4.98129410519924e-06, + "loss": 0.508, + "step": 3935 + }, + { + "epoch": 0.23968577779131017, + "grad_norm": 1.126632232726212, + "learning_rate": 4.981284360763223e-06, + "loss": 0.4725, + "step": 3936 + }, + { + "epoch": 0.23974667356818805, + "grad_norm": 1.1104459412336685, + "learning_rate": 4.981274613799326e-06, + "loss": 0.4908, + "step": 3937 + }, + { + "epoch": 0.23980756934506592, + "grad_norm": 1.0015585494423838, + "learning_rate": 4.9812648643075565e-06, + "loss": 0.4781, + "step": 3938 + }, + { + "epoch": 0.2398684651219438, + "grad_norm": 1.0613048664861662, + "learning_rate": 4.981255112287927e-06, + "loss": 0.4719, + "step": 3939 + }, + { + "epoch": 0.23992936089882166, + "grad_norm": 1.0569753426344481, + "learning_rate": 4.981245357740445e-06, + "loss": 0.4735, + "step": 3940 + }, + { + "epoch": 0.23999025667569954, + "grad_norm": 1.02952409737359, + "learning_rate": 4.981235600665123e-06, + "loss": 0.4783, + "step": 3941 + }, + { + "epoch": 0.2400511524525774, + "grad_norm": 1.1377462622443633, + "learning_rate": 4.98122584106197e-06, + "loss": 0.4262, + "step": 3942 + }, + { + "epoch": 0.24011204822945528, + "grad_norm": 1.044449172766818, + "learning_rate": 4.981216078930995e-06, + "loss": 0.4426, + "step": 3943 + }, + { + "epoch": 0.24017294400633316, + "grad_norm": 1.0730142311762407, + "learning_rate": 4.981206314272209e-06, + "loss": 0.4636, + "step": 3944 + }, + { + "epoch": 0.24023383978321103, + "grad_norm": 0.9998714418062362, + "learning_rate": 4.981196547085621e-06, + "loss": 0.4564, + "step": 3945 + }, + { + "epoch": 0.2402947355600889, + "grad_norm": 1.0598691526020294, + "learning_rate": 4.981186777371242e-06, + "loss": 0.5353, + "step": 3946 + }, + { + "epoch": 0.24035563133696677, + "grad_norm": 0.9769154563733607, + "learning_rate": 4.981177005129081e-06, + "loss": 0.4823, + "step": 3947 + }, + { + "epoch": 0.24041652711384465, + "grad_norm": 0.961327574289567, + "learning_rate": 4.981167230359149e-06, + "loss": 0.4714, + "step": 3948 + }, + { + "epoch": 0.24047742289072252, + "grad_norm": 0.9978192388210497, + "learning_rate": 4.981157453061455e-06, + "loss": 0.4732, + "step": 3949 + }, + { + "epoch": 0.2405383186676004, + "grad_norm": 1.0713354454535151, + "learning_rate": 4.98114767323601e-06, + "loss": 0.4579, + "step": 3950 + }, + { + "epoch": 0.24059921444447827, + "grad_norm": 1.0095220045052913, + "learning_rate": 4.981137890882823e-06, + "loss": 0.5067, + "step": 3951 + }, + { + "epoch": 0.24066011022135614, + "grad_norm": 1.075339491107288, + "learning_rate": 4.981128106001905e-06, + "loss": 0.4282, + "step": 3952 + }, + { + "epoch": 0.240721005998234, + "grad_norm": 1.095219922186481, + "learning_rate": 4.981118318593264e-06, + "loss": 0.4924, + "step": 3953 + }, + { + "epoch": 0.24078190177511188, + "grad_norm": 1.1045119001333732, + "learning_rate": 4.981108528656911e-06, + "loss": 0.4679, + "step": 3954 + }, + { + "epoch": 0.24084279755198976, + "grad_norm": 0.9635518077210926, + "learning_rate": 4.981098736192858e-06, + "loss": 0.4797, + "step": 3955 + }, + { + "epoch": 0.24090369332886763, + "grad_norm": 1.0210566894029653, + "learning_rate": 4.981088941201112e-06, + "loss": 0.5049, + "step": 3956 + }, + { + "epoch": 0.24096458910574553, + "grad_norm": 0.9559871961420698, + "learning_rate": 4.981079143681684e-06, + "loss": 0.4951, + "step": 3957 + }, + { + "epoch": 0.2410254848826234, + "grad_norm": 1.0795045209190195, + "learning_rate": 4.981069343634585e-06, + "loss": 0.5025, + "step": 3958 + }, + { + "epoch": 0.24108638065950128, + "grad_norm": 1.1300204395004465, + "learning_rate": 4.9810595410598235e-06, + "loss": 0.4895, + "step": 3959 + }, + { + "epoch": 0.24114727643637915, + "grad_norm": 0.9838211994650045, + "learning_rate": 4.98104973595741e-06, + "loss": 0.5363, + "step": 3960 + }, + { + "epoch": 0.24120817221325702, + "grad_norm": 1.0785077332375315, + "learning_rate": 4.981039928327355e-06, + "loss": 0.5479, + "step": 3961 + }, + { + "epoch": 0.2412690679901349, + "grad_norm": 1.063926859725005, + "learning_rate": 4.981030118169668e-06, + "loss": 0.4728, + "step": 3962 + }, + { + "epoch": 0.24132996376701277, + "grad_norm": 0.9862348353858558, + "learning_rate": 4.9810203054843585e-06, + "loss": 0.4924, + "step": 3963 + }, + { + "epoch": 0.24139085954389064, + "grad_norm": 1.0163397634386007, + "learning_rate": 4.9810104902714385e-06, + "loss": 0.5491, + "step": 3964 + }, + { + "epoch": 0.2414517553207685, + "grad_norm": 1.0165579977874617, + "learning_rate": 4.981000672530915e-06, + "loss": 0.4803, + "step": 3965 + }, + { + "epoch": 0.24151265109764639, + "grad_norm": 0.9895921826492333, + "learning_rate": 4.980990852262801e-06, + "loss": 0.475, + "step": 3966 + }, + { + "epoch": 0.24157354687452426, + "grad_norm": 1.035936162379611, + "learning_rate": 4.980981029467105e-06, + "loss": 0.4773, + "step": 3967 + }, + { + "epoch": 0.24163444265140213, + "grad_norm": 1.013006033202925, + "learning_rate": 4.980971204143836e-06, + "loss": 0.4569, + "step": 3968 + }, + { + "epoch": 0.24169533842828, + "grad_norm": 1.0551194986178474, + "learning_rate": 4.9809613762930055e-06, + "loss": 0.485, + "step": 3969 + }, + { + "epoch": 0.24175623420515788, + "grad_norm": 1.1507083223337693, + "learning_rate": 4.980951545914624e-06, + "loss": 0.5063, + "step": 3970 + }, + { + "epoch": 0.24181712998203575, + "grad_norm": 1.0461358547580937, + "learning_rate": 4.9809417130087e-06, + "loss": 0.4579, + "step": 3971 + }, + { + "epoch": 0.24187802575891362, + "grad_norm": 0.998368584289763, + "learning_rate": 4.980931877575243e-06, + "loss": 0.5198, + "step": 3972 + }, + { + "epoch": 0.2419389215357915, + "grad_norm": 1.0838067302088368, + "learning_rate": 4.980922039614266e-06, + "loss": 0.4573, + "step": 3973 + }, + { + "epoch": 0.24199981731266937, + "grad_norm": 1.0069672412338886, + "learning_rate": 4.980912199125777e-06, + "loss": 0.4866, + "step": 3974 + }, + { + "epoch": 0.24206071308954724, + "grad_norm": 1.0148604498702731, + "learning_rate": 4.980902356109785e-06, + "loss": 0.4708, + "step": 3975 + }, + { + "epoch": 0.24212160886642511, + "grad_norm": 1.0763108040390619, + "learning_rate": 4.980892510566302e-06, + "loss": 0.4883, + "step": 3976 + }, + { + "epoch": 0.242182504643303, + "grad_norm": 1.0913310873392597, + "learning_rate": 4.980882662495337e-06, + "loss": 0.476, + "step": 3977 + }, + { + "epoch": 0.24224340042018086, + "grad_norm": 0.996802464865154, + "learning_rate": 4.980872811896901e-06, + "loss": 0.4669, + "step": 3978 + }, + { + "epoch": 0.24230429619705873, + "grad_norm": 1.034666349440827, + "learning_rate": 4.980862958771003e-06, + "loss": 0.5209, + "step": 3979 + }, + { + "epoch": 0.2423651919739366, + "grad_norm": 0.9861692302363315, + "learning_rate": 4.9808531031176536e-06, + "loss": 0.4981, + "step": 3980 + }, + { + "epoch": 0.24242608775081448, + "grad_norm": 1.087336605196495, + "learning_rate": 4.980843244936863e-06, + "loss": 0.5262, + "step": 3981 + }, + { + "epoch": 0.24248698352769235, + "grad_norm": 1.0232919813720867, + "learning_rate": 4.980833384228639e-06, + "loss": 0.5339, + "step": 3982 + }, + { + "epoch": 0.24254787930457022, + "grad_norm": 1.07595523289145, + "learning_rate": 4.980823520992996e-06, + "loss": 0.5525, + "step": 3983 + }, + { + "epoch": 0.2426087750814481, + "grad_norm": 0.9941656645381992, + "learning_rate": 4.9808136552299405e-06, + "loss": 0.4539, + "step": 3984 + }, + { + "epoch": 0.24266967085832597, + "grad_norm": 1.1062746740187028, + "learning_rate": 4.980803786939483e-06, + "loss": 0.5902, + "step": 3985 + }, + { + "epoch": 0.24273056663520384, + "grad_norm": 0.9898559340482247, + "learning_rate": 4.980793916121634e-06, + "loss": 0.4973, + "step": 3986 + }, + { + "epoch": 0.24279146241208172, + "grad_norm": 0.9724419177871614, + "learning_rate": 4.980784042776405e-06, + "loss": 0.5342, + "step": 3987 + }, + { + "epoch": 0.2428523581889596, + "grad_norm": 1.0092637836903222, + "learning_rate": 4.980774166903805e-06, + "loss": 0.5411, + "step": 3988 + }, + { + "epoch": 0.24291325396583746, + "grad_norm": 0.9810377705616926, + "learning_rate": 4.980764288503842e-06, + "loss": 0.5145, + "step": 3989 + }, + { + "epoch": 0.24297414974271533, + "grad_norm": 1.173416463618114, + "learning_rate": 4.980754407576529e-06, + "loss": 0.4499, + "step": 3990 + }, + { + "epoch": 0.2430350455195932, + "grad_norm": 1.057009586378205, + "learning_rate": 4.980744524121875e-06, + "loss": 0.4887, + "step": 3991 + }, + { + "epoch": 0.24309594129647108, + "grad_norm": 1.0483724546123938, + "learning_rate": 4.98073463813989e-06, + "loss": 0.4671, + "step": 3992 + }, + { + "epoch": 0.24315683707334895, + "grad_norm": 1.046457993681976, + "learning_rate": 4.980724749630584e-06, + "loss": 0.523, + "step": 3993 + }, + { + "epoch": 0.24321773285022683, + "grad_norm": 1.0630418179830168, + "learning_rate": 4.980714858593968e-06, + "loss": 0.4919, + "step": 3994 + }, + { + "epoch": 0.2432786286271047, + "grad_norm": 1.0033300830710334, + "learning_rate": 4.980704965030051e-06, + "loss": 0.4965, + "step": 3995 + }, + { + "epoch": 0.24333952440398257, + "grad_norm": 1.0115447315897508, + "learning_rate": 4.980695068938843e-06, + "loss": 0.5206, + "step": 3996 + }, + { + "epoch": 0.24340042018086044, + "grad_norm": 1.055933035873064, + "learning_rate": 4.9806851703203544e-06, + "loss": 0.4656, + "step": 3997 + }, + { + "epoch": 0.24346131595773834, + "grad_norm": 1.0218407646082692, + "learning_rate": 4.980675269174595e-06, + "loss": 0.5238, + "step": 3998 + }, + { + "epoch": 0.24352221173461622, + "grad_norm": 0.9938841169388715, + "learning_rate": 4.980665365501576e-06, + "loss": 0.4672, + "step": 3999 + }, + { + "epoch": 0.2435831075114941, + "grad_norm": 1.1641254323710215, + "learning_rate": 4.980655459301306e-06, + "loss": 0.4988, + "step": 4000 + }, + { + "epoch": 0.24364400328837196, + "grad_norm": 1.0238304967561251, + "learning_rate": 4.980645550573796e-06, + "loss": 0.5456, + "step": 4001 + }, + { + "epoch": 0.24370489906524984, + "grad_norm": 1.0823559074900375, + "learning_rate": 4.980635639319056e-06, + "loss": 0.5108, + "step": 4002 + }, + { + "epoch": 0.2437657948421277, + "grad_norm": 1.0243597246959946, + "learning_rate": 4.980625725537096e-06, + "loss": 0.4389, + "step": 4003 + }, + { + "epoch": 0.24382669061900558, + "grad_norm": 1.0644638006831746, + "learning_rate": 4.980615809227926e-06, + "loss": 0.5159, + "step": 4004 + }, + { + "epoch": 0.24388758639588345, + "grad_norm": 1.0789842000925718, + "learning_rate": 4.9806058903915564e-06, + "loss": 0.5357, + "step": 4005 + }, + { + "epoch": 0.24394848217276133, + "grad_norm": 0.9732498506005204, + "learning_rate": 4.980595969027997e-06, + "loss": 0.5447, + "step": 4006 + }, + { + "epoch": 0.2440093779496392, + "grad_norm": 1.0748426903119561, + "learning_rate": 4.980586045137258e-06, + "loss": 0.5047, + "step": 4007 + }, + { + "epoch": 0.24407027372651707, + "grad_norm": 1.0795414120277669, + "learning_rate": 4.980576118719349e-06, + "loss": 0.5538, + "step": 4008 + }, + { + "epoch": 0.24413116950339495, + "grad_norm": 1.0659217326543589, + "learning_rate": 4.980566189774281e-06, + "loss": 0.4887, + "step": 4009 + }, + { + "epoch": 0.24419206528027282, + "grad_norm": 1.1396392578667893, + "learning_rate": 4.9805562583020635e-06, + "loss": 0.5016, + "step": 4010 + }, + { + "epoch": 0.2442529610571507, + "grad_norm": 0.9996244897587777, + "learning_rate": 4.9805463243027075e-06, + "loss": 0.5326, + "step": 4011 + }, + { + "epoch": 0.24431385683402856, + "grad_norm": 1.0071131070996013, + "learning_rate": 4.980536387776223e-06, + "loss": 0.4641, + "step": 4012 + }, + { + "epoch": 0.24437475261090644, + "grad_norm": 1.1866664007019112, + "learning_rate": 4.980526448722618e-06, + "loss": 0.4079, + "step": 4013 + }, + { + "epoch": 0.2444356483877843, + "grad_norm": 1.0587619483146928, + "learning_rate": 4.980516507141905e-06, + "loss": 0.5803, + "step": 4014 + }, + { + "epoch": 0.24449654416466218, + "grad_norm": 1.0919355368108241, + "learning_rate": 4.980506563034093e-06, + "loss": 0.4559, + "step": 4015 + }, + { + "epoch": 0.24455743994154006, + "grad_norm": 1.0158099728579484, + "learning_rate": 4.980496616399193e-06, + "loss": 0.4985, + "step": 4016 + }, + { + "epoch": 0.24461833571841793, + "grad_norm": 1.0740886439674215, + "learning_rate": 4.980486667237214e-06, + "loss": 0.5066, + "step": 4017 + }, + { + "epoch": 0.2446792314952958, + "grad_norm": 1.0006847598784376, + "learning_rate": 4.980476715548168e-06, + "loss": 0.4898, + "step": 4018 + }, + { + "epoch": 0.24474012727217367, + "grad_norm": 1.0621754487005695, + "learning_rate": 4.980466761332062e-06, + "loss": 0.468, + "step": 4019 + }, + { + "epoch": 0.24480102304905155, + "grad_norm": 1.15891190995193, + "learning_rate": 4.980456804588909e-06, + "loss": 0.4464, + "step": 4020 + }, + { + "epoch": 0.24486191882592942, + "grad_norm": 1.0019641333661502, + "learning_rate": 4.980446845318719e-06, + "loss": 0.4767, + "step": 4021 + }, + { + "epoch": 0.2449228146028073, + "grad_norm": 1.0295919691410458, + "learning_rate": 4.9804368835215e-06, + "loss": 0.5165, + "step": 4022 + }, + { + "epoch": 0.24498371037968517, + "grad_norm": 1.0369562867508109, + "learning_rate": 4.980426919197264e-06, + "loss": 0.4599, + "step": 4023 + }, + { + "epoch": 0.24504460615656304, + "grad_norm": 1.1074672357947812, + "learning_rate": 4.980416952346021e-06, + "loss": 0.5096, + "step": 4024 + }, + { + "epoch": 0.2451055019334409, + "grad_norm": 0.9789128940265874, + "learning_rate": 4.98040698296778e-06, + "loss": 0.4942, + "step": 4025 + }, + { + "epoch": 0.24516639771031878, + "grad_norm": 1.0460970738651634, + "learning_rate": 4.9803970110625534e-06, + "loss": 0.4968, + "step": 4026 + }, + { + "epoch": 0.24522729348719666, + "grad_norm": 1.0629216237523387, + "learning_rate": 4.980387036630349e-06, + "loss": 0.547, + "step": 4027 + }, + { + "epoch": 0.24528818926407453, + "grad_norm": 1.164032218003646, + "learning_rate": 4.9803770596711776e-06, + "loss": 0.6029, + "step": 4028 + }, + { + "epoch": 0.2453490850409524, + "grad_norm": 0.9663628459343706, + "learning_rate": 4.980367080185049e-06, + "loss": 0.5524, + "step": 4029 + }, + { + "epoch": 0.24540998081783028, + "grad_norm": 1.1010770407895025, + "learning_rate": 4.9803570981719754e-06, + "loss": 0.4611, + "step": 4030 + }, + { + "epoch": 0.24547087659470815, + "grad_norm": 1.083246705212952, + "learning_rate": 4.980347113631965e-06, + "loss": 0.4624, + "step": 4031 + }, + { + "epoch": 0.24553177237158602, + "grad_norm": 1.0194927797668465, + "learning_rate": 4.980337126565028e-06, + "loss": 0.4913, + "step": 4032 + }, + { + "epoch": 0.2455926681484639, + "grad_norm": 1.172362379969205, + "learning_rate": 4.9803271369711755e-06, + "loss": 0.4462, + "step": 4033 + }, + { + "epoch": 0.24565356392534177, + "grad_norm": 1.0586378079941408, + "learning_rate": 4.980317144850418e-06, + "loss": 0.4474, + "step": 4034 + }, + { + "epoch": 0.24571445970221964, + "grad_norm": 1.0170352631767654, + "learning_rate": 4.980307150202763e-06, + "loss": 0.4452, + "step": 4035 + }, + { + "epoch": 0.2457753554790975, + "grad_norm": 1.003540808893636, + "learning_rate": 4.9802971530282246e-06, + "loss": 0.4932, + "step": 4036 + }, + { + "epoch": 0.24583625125597539, + "grad_norm": 1.064182337339439, + "learning_rate": 4.980287153326811e-06, + "loss": 0.5131, + "step": 4037 + }, + { + "epoch": 0.24589714703285326, + "grad_norm": 1.1401741818542015, + "learning_rate": 4.980277151098531e-06, + "loss": 0.5118, + "step": 4038 + }, + { + "epoch": 0.24595804280973116, + "grad_norm": 1.080294713711817, + "learning_rate": 4.980267146343397e-06, + "loss": 0.4568, + "step": 4039 + }, + { + "epoch": 0.24601893858660903, + "grad_norm": 0.9277651808981804, + "learning_rate": 4.980257139061418e-06, + "loss": 0.5858, + "step": 4040 + }, + { + "epoch": 0.2460798343634869, + "grad_norm": 1.0105043748207745, + "learning_rate": 4.980247129252606e-06, + "loss": 0.4405, + "step": 4041 + }, + { + "epoch": 0.24614073014036478, + "grad_norm": 1.1110149791379806, + "learning_rate": 4.980237116916968e-06, + "loss": 0.4843, + "step": 4042 + }, + { + "epoch": 0.24620162591724265, + "grad_norm": 1.0384427803059315, + "learning_rate": 4.980227102054516e-06, + "loss": 0.5017, + "step": 4043 + }, + { + "epoch": 0.24626252169412052, + "grad_norm": 1.22973896460569, + "learning_rate": 4.980217084665262e-06, + "loss": 0.544, + "step": 4044 + }, + { + "epoch": 0.2463234174709984, + "grad_norm": 0.978521131571482, + "learning_rate": 4.980207064749213e-06, + "loss": 0.5043, + "step": 4045 + }, + { + "epoch": 0.24638431324787627, + "grad_norm": 1.0010742465486158, + "learning_rate": 4.980197042306381e-06, + "loss": 0.4834, + "step": 4046 + }, + { + "epoch": 0.24644520902475414, + "grad_norm": 1.0431143905293672, + "learning_rate": 4.980187017336776e-06, + "loss": 0.5018, + "step": 4047 + }, + { + "epoch": 0.24650610480163201, + "grad_norm": 1.0715819186175437, + "learning_rate": 4.980176989840407e-06, + "loss": 0.4797, + "step": 4048 + }, + { + "epoch": 0.2465670005785099, + "grad_norm": 1.0220762831111998, + "learning_rate": 4.980166959817286e-06, + "loss": 0.4825, + "step": 4049 + }, + { + "epoch": 0.24662789635538776, + "grad_norm": 1.0437768033387822, + "learning_rate": 4.980156927267423e-06, + "loss": 0.4642, + "step": 4050 + }, + { + "epoch": 0.24668879213226563, + "grad_norm": 1.061155408362368, + "learning_rate": 4.9801468921908274e-06, + "loss": 0.4727, + "step": 4051 + }, + { + "epoch": 0.2467496879091435, + "grad_norm": 1.0317208885620504, + "learning_rate": 4.98013685458751e-06, + "loss": 0.4814, + "step": 4052 + }, + { + "epoch": 0.24681058368602138, + "grad_norm": 1.0216354268933265, + "learning_rate": 4.98012681445748e-06, + "loss": 0.4962, + "step": 4053 + }, + { + "epoch": 0.24687147946289925, + "grad_norm": 1.1471889571653067, + "learning_rate": 4.9801167718007485e-06, + "loss": 0.4634, + "step": 4054 + }, + { + "epoch": 0.24693237523977712, + "grad_norm": 1.1510750703636465, + "learning_rate": 4.980106726617325e-06, + "loss": 0.4158, + "step": 4055 + }, + { + "epoch": 0.246993271016655, + "grad_norm": 0.9341595212909969, + "learning_rate": 4.980096678907221e-06, + "loss": 0.4951, + "step": 4056 + }, + { + "epoch": 0.24705416679353287, + "grad_norm": 1.045234948700589, + "learning_rate": 4.980086628670446e-06, + "loss": 0.4324, + "step": 4057 + }, + { + "epoch": 0.24711506257041074, + "grad_norm": 1.1191113315707526, + "learning_rate": 4.980076575907012e-06, + "loss": 0.4584, + "step": 4058 + }, + { + "epoch": 0.24717595834728862, + "grad_norm": 1.0945937111881174, + "learning_rate": 4.980066520616925e-06, + "loss": 0.4683, + "step": 4059 + }, + { + "epoch": 0.2472368541241665, + "grad_norm": 1.0595285414507942, + "learning_rate": 4.9800564628001994e-06, + "loss": 0.4684, + "step": 4060 + }, + { + "epoch": 0.24729774990104436, + "grad_norm": 1.000051272441026, + "learning_rate": 4.980046402456842e-06, + "loss": 0.5755, + "step": 4061 + }, + { + "epoch": 0.24735864567792223, + "grad_norm": 0.9818512975848286, + "learning_rate": 4.980036339586866e-06, + "loss": 0.4791, + "step": 4062 + }, + { + "epoch": 0.2474195414548001, + "grad_norm": 1.0681448766617214, + "learning_rate": 4.980026274190281e-06, + "loss": 0.4613, + "step": 4063 + }, + { + "epoch": 0.24748043723167798, + "grad_norm": 1.0693337548766981, + "learning_rate": 4.980016206267096e-06, + "loss": 0.4851, + "step": 4064 + }, + { + "epoch": 0.24754133300855585, + "grad_norm": 1.0036654605171769, + "learning_rate": 4.980006135817323e-06, + "loss": 0.4982, + "step": 4065 + }, + { + "epoch": 0.24760222878543373, + "grad_norm": 1.028256514658881, + "learning_rate": 4.979996062840971e-06, + "loss": 0.4772, + "step": 4066 + }, + { + "epoch": 0.2476631245623116, + "grad_norm": 1.076886769947766, + "learning_rate": 4.97998598733805e-06, + "loss": 0.4875, + "step": 4067 + }, + { + "epoch": 0.24772402033918947, + "grad_norm": 1.0385852521376226, + "learning_rate": 4.979975909308571e-06, + "loss": 0.5058, + "step": 4068 + }, + { + "epoch": 0.24778491611606734, + "grad_norm": 1.0700170905245607, + "learning_rate": 4.979965828752545e-06, + "loss": 0.4759, + "step": 4069 + }, + { + "epoch": 0.24784581189294522, + "grad_norm": 0.9716376714722894, + "learning_rate": 4.97995574566998e-06, + "loss": 0.4914, + "step": 4070 + }, + { + "epoch": 0.2479067076698231, + "grad_norm": 1.0828394714745335, + "learning_rate": 4.9799456600608885e-06, + "loss": 0.5114, + "step": 4071 + }, + { + "epoch": 0.24796760344670096, + "grad_norm": 1.0152494712246694, + "learning_rate": 4.979935571925279e-06, + "loss": 0.5366, + "step": 4072 + }, + { + "epoch": 0.24802849922357884, + "grad_norm": 1.0281693308243371, + "learning_rate": 4.979925481263164e-06, + "loss": 0.4745, + "step": 4073 + }, + { + "epoch": 0.2480893950004567, + "grad_norm": 1.0646273844784562, + "learning_rate": 4.979915388074552e-06, + "loss": 0.4867, + "step": 4074 + }, + { + "epoch": 0.24815029077733458, + "grad_norm": 1.1061261018372028, + "learning_rate": 4.979905292359453e-06, + "loss": 0.4927, + "step": 4075 + }, + { + "epoch": 0.24821118655421245, + "grad_norm": 1.0417640668700485, + "learning_rate": 4.9798951941178796e-06, + "loss": 0.5214, + "step": 4076 + }, + { + "epoch": 0.24827208233109033, + "grad_norm": 1.140714941493114, + "learning_rate": 4.9798850933498386e-06, + "loss": 0.4962, + "step": 4077 + }, + { + "epoch": 0.2483329781079682, + "grad_norm": 1.0071443913580735, + "learning_rate": 4.979874990055344e-06, + "loss": 0.4742, + "step": 4078 + }, + { + "epoch": 0.24839387388484607, + "grad_norm": 1.0299271579688773, + "learning_rate": 4.979864884234403e-06, + "loss": 0.5371, + "step": 4079 + }, + { + "epoch": 0.24845476966172397, + "grad_norm": 1.0050134246702687, + "learning_rate": 4.979854775887028e-06, + "loss": 0.5459, + "step": 4080 + }, + { + "epoch": 0.24851566543860185, + "grad_norm": 1.0678455697398428, + "learning_rate": 4.979844665013228e-06, + "loss": 0.4715, + "step": 4081 + }, + { + "epoch": 0.24857656121547972, + "grad_norm": 1.0967388695385776, + "learning_rate": 4.9798345516130145e-06, + "loss": 0.5079, + "step": 4082 + }, + { + "epoch": 0.2486374569923576, + "grad_norm": 0.9832118892987423, + "learning_rate": 4.9798244356863965e-06, + "loss": 0.5087, + "step": 4083 + }, + { + "epoch": 0.24869835276923546, + "grad_norm": 1.091102193383833, + "learning_rate": 4.979814317233386e-06, + "loss": 0.5316, + "step": 4084 + }, + { + "epoch": 0.24875924854611334, + "grad_norm": 1.0760202016016025, + "learning_rate": 4.979804196253991e-06, + "loss": 0.5174, + "step": 4085 + }, + { + "epoch": 0.2488201443229912, + "grad_norm": 1.113949521304509, + "learning_rate": 4.979794072748223e-06, + "loss": 0.5546, + "step": 4086 + }, + { + "epoch": 0.24888104009986908, + "grad_norm": 1.0308811635090351, + "learning_rate": 4.979783946716093e-06, + "loss": 0.4318, + "step": 4087 + }, + { + "epoch": 0.24894193587674696, + "grad_norm": 1.0471032550218016, + "learning_rate": 4.979773818157611e-06, + "loss": 0.5334, + "step": 4088 + }, + { + "epoch": 0.24900283165362483, + "grad_norm": 1.0185352393438665, + "learning_rate": 4.979763687072786e-06, + "loss": 0.4286, + "step": 4089 + }, + { + "epoch": 0.2490637274305027, + "grad_norm": 1.107312594157445, + "learning_rate": 4.979753553461629e-06, + "loss": 0.4691, + "step": 4090 + }, + { + "epoch": 0.24912462320738057, + "grad_norm": 1.0104684034902969, + "learning_rate": 4.979743417324152e-06, + "loss": 0.452, + "step": 4091 + }, + { + "epoch": 0.24918551898425845, + "grad_norm": 1.1199953168193517, + "learning_rate": 4.9797332786603636e-06, + "loss": 0.5057, + "step": 4092 + }, + { + "epoch": 0.24924641476113632, + "grad_norm": 0.9727778821403755, + "learning_rate": 4.979723137470274e-06, + "loss": 0.5081, + "step": 4093 + }, + { + "epoch": 0.2493073105380142, + "grad_norm": 1.0691921342663646, + "learning_rate": 4.979712993753894e-06, + "loss": 0.4949, + "step": 4094 + }, + { + "epoch": 0.24936820631489207, + "grad_norm": 1.0843819083071615, + "learning_rate": 4.979702847511234e-06, + "loss": 0.468, + "step": 4095 + }, + { + "epoch": 0.24942910209176994, + "grad_norm": 1.0715450346901974, + "learning_rate": 4.9796926987423044e-06, + "loss": 0.5044, + "step": 4096 + }, + { + "epoch": 0.2494899978686478, + "grad_norm": 1.0060816728733095, + "learning_rate": 4.979682547447116e-06, + "loss": 0.5073, + "step": 4097 + }, + { + "epoch": 0.24955089364552568, + "grad_norm": 1.1190226358202244, + "learning_rate": 4.979672393625678e-06, + "loss": 0.4648, + "step": 4098 + }, + { + "epoch": 0.24961178942240356, + "grad_norm": 1.0181212731833285, + "learning_rate": 4.979662237278001e-06, + "loss": 0.4981, + "step": 4099 + }, + { + "epoch": 0.24967268519928143, + "grad_norm": 1.0828455774550572, + "learning_rate": 4.9796520784040965e-06, + "loss": 0.4842, + "step": 4100 + }, + { + "epoch": 0.2497335809761593, + "grad_norm": 1.0778122058270494, + "learning_rate": 4.9796419170039735e-06, + "loss": 0.5259, + "step": 4101 + }, + { + "epoch": 0.24979447675303718, + "grad_norm": 1.0315906124517946, + "learning_rate": 4.9796317530776425e-06, + "loss": 0.4723, + "step": 4102 + }, + { + "epoch": 0.24985537252991505, + "grad_norm": 1.0926782001658024, + "learning_rate": 4.979621586625115e-06, + "loss": 0.456, + "step": 4103 + }, + { + "epoch": 0.24991626830679292, + "grad_norm": 1.0432754744016668, + "learning_rate": 4.9796114176464004e-06, + "loss": 0.489, + "step": 4104 + }, + { + "epoch": 0.2499771640836708, + "grad_norm": 1.033326490141144, + "learning_rate": 4.97960124614151e-06, + "loss": 0.4921, + "step": 4105 + }, + { + "epoch": 0.25003805986054867, + "grad_norm": 1.096642719075634, + "learning_rate": 4.979591072110452e-06, + "loss": 0.4716, + "step": 4106 + }, + { + "epoch": 0.25009895563742657, + "grad_norm": 0.9750718328363718, + "learning_rate": 4.979580895553239e-06, + "loss": 0.4655, + "step": 4107 + }, + { + "epoch": 0.2501598514143044, + "grad_norm": 1.012688092888798, + "learning_rate": 4.9795707164698795e-06, + "loss": 0.4353, + "step": 4108 + }, + { + "epoch": 0.2502207471911823, + "grad_norm": 1.0147710814499373, + "learning_rate": 4.979560534860386e-06, + "loss": 0.4684, + "step": 4109 + }, + { + "epoch": 0.25028164296806016, + "grad_norm": 0.9918673151042803, + "learning_rate": 4.979550350724767e-06, + "loss": 0.5073, + "step": 4110 + }, + { + "epoch": 0.25034253874493806, + "grad_norm": 1.109311329508295, + "learning_rate": 4.9795401640630344e-06, + "loss": 0.5044, + "step": 4111 + }, + { + "epoch": 0.2504034345218159, + "grad_norm": 1.1718645033859496, + "learning_rate": 4.979529974875198e-06, + "loss": 0.4709, + "step": 4112 + }, + { + "epoch": 0.2504643302986938, + "grad_norm": 1.0999434240423733, + "learning_rate": 4.979519783161267e-06, + "loss": 0.5104, + "step": 4113 + }, + { + "epoch": 0.25052522607557165, + "grad_norm": 0.9788093189487905, + "learning_rate": 4.979509588921253e-06, + "loss": 0.5219, + "step": 4114 + }, + { + "epoch": 0.25058612185244955, + "grad_norm": 1.0778914399727857, + "learning_rate": 4.979499392155167e-06, + "loss": 0.5075, + "step": 4115 + }, + { + "epoch": 0.2506470176293274, + "grad_norm": 0.9919202284117123, + "learning_rate": 4.979489192863018e-06, + "loss": 0.5145, + "step": 4116 + }, + { + "epoch": 0.2507079134062053, + "grad_norm": 1.0732303419814995, + "learning_rate": 4.979478991044817e-06, + "loss": 0.4183, + "step": 4117 + }, + { + "epoch": 0.25076880918308314, + "grad_norm": 1.0511951353102449, + "learning_rate": 4.979468786700575e-06, + "loss": 0.5488, + "step": 4118 + }, + { + "epoch": 0.25082970495996104, + "grad_norm": 1.0049120470257444, + "learning_rate": 4.9794585798303e-06, + "loss": 0.4664, + "step": 4119 + }, + { + "epoch": 0.2508906007368389, + "grad_norm": 1.0715032549144652, + "learning_rate": 4.979448370434005e-06, + "loss": 0.4556, + "step": 4120 + }, + { + "epoch": 0.2509514965137168, + "grad_norm": 1.019426803154069, + "learning_rate": 4.9794381585117e-06, + "loss": 0.4818, + "step": 4121 + }, + { + "epoch": 0.25101239229059463, + "grad_norm": 1.048612656105177, + "learning_rate": 4.979427944063395e-06, + "loss": 0.5093, + "step": 4122 + }, + { + "epoch": 0.25107328806747253, + "grad_norm": 1.0885206263313183, + "learning_rate": 4.9794177270891e-06, + "loss": 0.4505, + "step": 4123 + }, + { + "epoch": 0.2511341838443504, + "grad_norm": 1.0424366941911747, + "learning_rate": 4.979407507588826e-06, + "loss": 0.5328, + "step": 4124 + }, + { + "epoch": 0.2511950796212283, + "grad_norm": 1.1625526891895688, + "learning_rate": 4.9793972855625835e-06, + "loss": 0.4897, + "step": 4125 + }, + { + "epoch": 0.2512559753981061, + "grad_norm": 1.2204232291472044, + "learning_rate": 4.979387061010383e-06, + "loss": 0.4635, + "step": 4126 + }, + { + "epoch": 0.251316871174984, + "grad_norm": 1.0754352903023399, + "learning_rate": 4.979376833932232e-06, + "loss": 0.4943, + "step": 4127 + }, + { + "epoch": 0.25137776695186187, + "grad_norm": 1.0110800797881907, + "learning_rate": 4.979366604328146e-06, + "loss": 0.5372, + "step": 4128 + }, + { + "epoch": 0.25143866272873977, + "grad_norm": 1.0383897452970186, + "learning_rate": 4.979356372198132e-06, + "loss": 0.4356, + "step": 4129 + }, + { + "epoch": 0.2514995585056176, + "grad_norm": 1.0997965841925141, + "learning_rate": 4.979346137542201e-06, + "loss": 0.4589, + "step": 4130 + }, + { + "epoch": 0.2515604542824955, + "grad_norm": 1.081326626004732, + "learning_rate": 4.9793359003603635e-06, + "loss": 0.4116, + "step": 4131 + }, + { + "epoch": 0.25162135005937336, + "grad_norm": 1.058970934680491, + "learning_rate": 4.979325660652631e-06, + "loss": 0.4512, + "step": 4132 + }, + { + "epoch": 0.25168224583625126, + "grad_norm": 1.008470136479926, + "learning_rate": 4.9793154184190125e-06, + "loss": 0.5127, + "step": 4133 + }, + { + "epoch": 0.2517431416131291, + "grad_norm": 1.1439014811908312, + "learning_rate": 4.9793051736595184e-06, + "loss": 0.4655, + "step": 4134 + }, + { + "epoch": 0.251804037390007, + "grad_norm": 0.9328527247655396, + "learning_rate": 4.979294926374161e-06, + "loss": 0.5281, + "step": 4135 + }, + { + "epoch": 0.25186493316688485, + "grad_norm": 1.0436449221328277, + "learning_rate": 4.979284676562949e-06, + "loss": 0.5128, + "step": 4136 + }, + { + "epoch": 0.25192582894376275, + "grad_norm": 0.9914704300457361, + "learning_rate": 4.979274424225893e-06, + "loss": 0.4919, + "step": 4137 + }, + { + "epoch": 0.2519867247206406, + "grad_norm": 1.1364053009868753, + "learning_rate": 4.979264169363004e-06, + "loss": 0.4957, + "step": 4138 + }, + { + "epoch": 0.2520476204975185, + "grad_norm": 1.0104032715797329, + "learning_rate": 4.979253911974293e-06, + "loss": 0.4811, + "step": 4139 + }, + { + "epoch": 0.25210851627439634, + "grad_norm": 1.0392181370191977, + "learning_rate": 4.979243652059768e-06, + "loss": 0.5528, + "step": 4140 + }, + { + "epoch": 0.25216941205127424, + "grad_norm": 1.162146789235497, + "learning_rate": 4.979233389619442e-06, + "loss": 0.5166, + "step": 4141 + }, + { + "epoch": 0.25223030782815214, + "grad_norm": 1.0341102367764374, + "learning_rate": 4.979223124653325e-06, + "loss": 0.5049, + "step": 4142 + }, + { + "epoch": 0.25229120360503, + "grad_norm": 0.9692932494412276, + "learning_rate": 4.979212857161427e-06, + "loss": 0.5524, + "step": 4143 + }, + { + "epoch": 0.2523520993819079, + "grad_norm": 1.1388945980509473, + "learning_rate": 4.9792025871437575e-06, + "loss": 0.4474, + "step": 4144 + }, + { + "epoch": 0.25241299515878574, + "grad_norm": 1.030906906364773, + "learning_rate": 4.979192314600328e-06, + "loss": 0.4906, + "step": 4145 + }, + { + "epoch": 0.25247389093566364, + "grad_norm": 0.9929995381984421, + "learning_rate": 4.97918203953115e-06, + "loss": 0.4742, + "step": 4146 + }, + { + "epoch": 0.2525347867125415, + "grad_norm": 1.024009856756656, + "learning_rate": 4.979171761936232e-06, + "loss": 0.464, + "step": 4147 + }, + { + "epoch": 0.2525956824894194, + "grad_norm": 1.1332398245652562, + "learning_rate": 4.979161481815586e-06, + "loss": 0.4617, + "step": 4148 + }, + { + "epoch": 0.2526565782662972, + "grad_norm": 1.0533726541771493, + "learning_rate": 4.979151199169222e-06, + "loss": 0.518, + "step": 4149 + }, + { + "epoch": 0.2527174740431751, + "grad_norm": 1.0708588601490654, + "learning_rate": 4.979140913997149e-06, + "loss": 0.4909, + "step": 4150 + }, + { + "epoch": 0.252778369820053, + "grad_norm": 1.032916508991088, + "learning_rate": 4.97913062629938e-06, + "loss": 0.4884, + "step": 4151 + }, + { + "epoch": 0.2528392655969309, + "grad_norm": 1.0566661494754832, + "learning_rate": 4.979120336075924e-06, + "loss": 0.5484, + "step": 4152 + }, + { + "epoch": 0.2529001613738087, + "grad_norm": 1.0466549068352444, + "learning_rate": 4.979110043326792e-06, + "loss": 0.4934, + "step": 4153 + }, + { + "epoch": 0.2529610571506866, + "grad_norm": 1.0472775919575747, + "learning_rate": 4.979099748051993e-06, + "loss": 0.5217, + "step": 4154 + }, + { + "epoch": 0.25302195292756446, + "grad_norm": 1.0514932646442379, + "learning_rate": 4.97908945025154e-06, + "loss": 0.4807, + "step": 4155 + }, + { + "epoch": 0.25308284870444236, + "grad_norm": 1.1061138303584577, + "learning_rate": 4.979079149925442e-06, + "loss": 0.5459, + "step": 4156 + }, + { + "epoch": 0.2531437444813202, + "grad_norm": 1.0191603246198486, + "learning_rate": 4.979068847073709e-06, + "loss": 0.5288, + "step": 4157 + }, + { + "epoch": 0.2532046402581981, + "grad_norm": 1.0003569480110428, + "learning_rate": 4.979058541696352e-06, + "loss": 0.5441, + "step": 4158 + }, + { + "epoch": 0.25326553603507596, + "grad_norm": 0.9362808410261741, + "learning_rate": 4.979048233793384e-06, + "loss": 0.5071, + "step": 4159 + }, + { + "epoch": 0.25332643181195386, + "grad_norm": 1.0742269446734907, + "learning_rate": 4.979037923364811e-06, + "loss": 0.4332, + "step": 4160 + }, + { + "epoch": 0.2533873275888317, + "grad_norm": 0.9855766244969387, + "learning_rate": 4.979027610410646e-06, + "loss": 0.5086, + "step": 4161 + }, + { + "epoch": 0.2534482233657096, + "grad_norm": 1.1278314384107002, + "learning_rate": 4.979017294930899e-06, + "loss": 0.4736, + "step": 4162 + }, + { + "epoch": 0.25350911914258745, + "grad_norm": 0.9558370979447068, + "learning_rate": 4.979006976925581e-06, + "loss": 0.5234, + "step": 4163 + }, + { + "epoch": 0.25357001491946535, + "grad_norm": 0.9746740877516773, + "learning_rate": 4.978996656394703e-06, + "loss": 0.4822, + "step": 4164 + }, + { + "epoch": 0.2536309106963432, + "grad_norm": 1.1014775878261802, + "learning_rate": 4.978986333338274e-06, + "loss": 0.4491, + "step": 4165 + }, + { + "epoch": 0.2536918064732211, + "grad_norm": 0.9569538547411932, + "learning_rate": 4.9789760077563055e-06, + "loss": 0.5367, + "step": 4166 + }, + { + "epoch": 0.25375270225009894, + "grad_norm": 1.1140712564573125, + "learning_rate": 4.9789656796488076e-06, + "loss": 0.5159, + "step": 4167 + }, + { + "epoch": 0.25381359802697684, + "grad_norm": 1.053877046817155, + "learning_rate": 4.978955349015791e-06, + "loss": 0.499, + "step": 4168 + }, + { + "epoch": 0.2538744938038547, + "grad_norm": 1.0853577006147443, + "learning_rate": 4.978945015857266e-06, + "loss": 0.5109, + "step": 4169 + }, + { + "epoch": 0.2539353895807326, + "grad_norm": 1.1016307401561245, + "learning_rate": 4.978934680173244e-06, + "loss": 0.4605, + "step": 4170 + }, + { + "epoch": 0.25399628535761043, + "grad_norm": 0.9534127971337051, + "learning_rate": 4.978924341963735e-06, + "loss": 0.5075, + "step": 4171 + }, + { + "epoch": 0.25405718113448833, + "grad_norm": 1.0781941642096409, + "learning_rate": 4.978914001228748e-06, + "loss": 0.5225, + "step": 4172 + }, + { + "epoch": 0.2541180769113662, + "grad_norm": 1.08527085172192, + "learning_rate": 4.978903657968297e-06, + "loss": 0.4705, + "step": 4173 + }, + { + "epoch": 0.2541789726882441, + "grad_norm": 1.0290740372083687, + "learning_rate": 4.978893312182389e-06, + "loss": 0.4714, + "step": 4174 + }, + { + "epoch": 0.2542398684651219, + "grad_norm": 1.0492170381502892, + "learning_rate": 4.978882963871037e-06, + "loss": 0.5033, + "step": 4175 + }, + { + "epoch": 0.2543007642419998, + "grad_norm": 1.0511507435786167, + "learning_rate": 4.978872613034249e-06, + "loss": 0.4911, + "step": 4176 + }, + { + "epoch": 0.25436166001887767, + "grad_norm": 0.9820379609128026, + "learning_rate": 4.978862259672039e-06, + "loss": 0.4902, + "step": 4177 + }, + { + "epoch": 0.25442255579575557, + "grad_norm": 1.0340677093724016, + "learning_rate": 4.978851903784415e-06, + "loss": 0.5229, + "step": 4178 + }, + { + "epoch": 0.2544834515726334, + "grad_norm": 1.0103367798177947, + "learning_rate": 4.978841545371388e-06, + "loss": 0.474, + "step": 4179 + }, + { + "epoch": 0.2545443473495113, + "grad_norm": 1.029827609507553, + "learning_rate": 4.978831184432969e-06, + "loss": 0.4959, + "step": 4180 + }, + { + "epoch": 0.25460524312638916, + "grad_norm": 1.0942576000649247, + "learning_rate": 4.978820820969168e-06, + "loss": 0.4642, + "step": 4181 + }, + { + "epoch": 0.25466613890326706, + "grad_norm": 1.1100581032080492, + "learning_rate": 4.978810454979996e-06, + "loss": 0.4515, + "step": 4182 + }, + { + "epoch": 0.25472703468014496, + "grad_norm": 1.060117416015849, + "learning_rate": 4.978800086465463e-06, + "loss": 0.535, + "step": 4183 + }, + { + "epoch": 0.2547879304570228, + "grad_norm": 1.0101689360513257, + "learning_rate": 4.97878971542558e-06, + "loss": 0.4896, + "step": 4184 + }, + { + "epoch": 0.2548488262339007, + "grad_norm": 1.1296517723716737, + "learning_rate": 4.978779341860359e-06, + "loss": 0.4844, + "step": 4185 + }, + { + "epoch": 0.25490972201077855, + "grad_norm": 1.0579622957646002, + "learning_rate": 4.978768965769808e-06, + "loss": 0.4353, + "step": 4186 + }, + { + "epoch": 0.25497061778765645, + "grad_norm": 1.0614958001688264, + "learning_rate": 4.978758587153939e-06, + "loss": 0.4434, + "step": 4187 + }, + { + "epoch": 0.2550315135645343, + "grad_norm": 0.9727906895567442, + "learning_rate": 4.978748206012762e-06, + "loss": 0.498, + "step": 4188 + }, + { + "epoch": 0.2550924093414122, + "grad_norm": 1.0281678092083033, + "learning_rate": 4.978737822346288e-06, + "loss": 0.5253, + "step": 4189 + }, + { + "epoch": 0.25515330511829004, + "grad_norm": 0.9999432270305109, + "learning_rate": 4.978727436154528e-06, + "loss": 0.4916, + "step": 4190 + }, + { + "epoch": 0.25521420089516794, + "grad_norm": 1.0293004772282734, + "learning_rate": 4.978717047437492e-06, + "loss": 0.4947, + "step": 4191 + }, + { + "epoch": 0.2552750966720458, + "grad_norm": 1.038356096631454, + "learning_rate": 4.978706656195189e-06, + "loss": 0.48, + "step": 4192 + }, + { + "epoch": 0.2553359924489237, + "grad_norm": 0.9446618063275546, + "learning_rate": 4.978696262427633e-06, + "loss": 0.5548, + "step": 4193 + }, + { + "epoch": 0.25539688822580153, + "grad_norm": 0.9958883782489781, + "learning_rate": 4.978685866134831e-06, + "loss": 0.4668, + "step": 4194 + }, + { + "epoch": 0.25545778400267943, + "grad_norm": 1.0433187912463948, + "learning_rate": 4.978675467316797e-06, + "loss": 0.4805, + "step": 4195 + }, + { + "epoch": 0.2555186797795573, + "grad_norm": 1.0541820555256631, + "learning_rate": 4.97866506597354e-06, + "loss": 0.4549, + "step": 4196 + }, + { + "epoch": 0.2555795755564352, + "grad_norm": 0.9766950683850496, + "learning_rate": 4.97865466210507e-06, + "loss": 0.5005, + "step": 4197 + }, + { + "epoch": 0.255640471333313, + "grad_norm": 0.9905923095647241, + "learning_rate": 4.978644255711398e-06, + "loss": 0.5438, + "step": 4198 + }, + { + "epoch": 0.2557013671101909, + "grad_norm": 1.0931326311768408, + "learning_rate": 4.978633846792534e-06, + "loss": 0.4977, + "step": 4199 + }, + { + "epoch": 0.25576226288706877, + "grad_norm": 1.2469322012662378, + "learning_rate": 4.978623435348491e-06, + "loss": 0.4039, + "step": 4200 + }, + { + "epoch": 0.25582315866394667, + "grad_norm": 1.055044216967742, + "learning_rate": 4.978613021379277e-06, + "loss": 0.5135, + "step": 4201 + }, + { + "epoch": 0.2558840544408245, + "grad_norm": 1.0291426875152827, + "learning_rate": 4.978602604884904e-06, + "loss": 0.5147, + "step": 4202 + }, + { + "epoch": 0.2559449502177024, + "grad_norm": 1.0211735220211073, + "learning_rate": 4.978592185865382e-06, + "loss": 0.5193, + "step": 4203 + }, + { + "epoch": 0.25600584599458026, + "grad_norm": 1.0602656642046662, + "learning_rate": 4.978581764320723e-06, + "loss": 0.5496, + "step": 4204 + }, + { + "epoch": 0.25606674177145816, + "grad_norm": 0.9881742480427127, + "learning_rate": 4.978571340250934e-06, + "loss": 0.5129, + "step": 4205 + }, + { + "epoch": 0.256127637548336, + "grad_norm": 1.0319056581591093, + "learning_rate": 4.97856091365603e-06, + "loss": 0.5508, + "step": 4206 + }, + { + "epoch": 0.2561885333252139, + "grad_norm": 1.07999513234695, + "learning_rate": 4.978550484536019e-06, + "loss": 0.4807, + "step": 4207 + }, + { + "epoch": 0.25624942910209175, + "grad_norm": 0.9803902791518019, + "learning_rate": 4.9785400528909125e-06, + "loss": 0.5661, + "step": 4208 + }, + { + "epoch": 0.25631032487896965, + "grad_norm": 1.0810527048554208, + "learning_rate": 4.97852961872072e-06, + "loss": 0.5131, + "step": 4209 + }, + { + "epoch": 0.2563712206558475, + "grad_norm": 1.1207671734520945, + "learning_rate": 4.978519182025454e-06, + "loss": 0.5026, + "step": 4210 + }, + { + "epoch": 0.2564321164327254, + "grad_norm": 0.9689875556670429, + "learning_rate": 4.978508742805124e-06, + "loss": 0.5735, + "step": 4211 + }, + { + "epoch": 0.25649301220960324, + "grad_norm": 0.941172409597495, + "learning_rate": 4.978498301059741e-06, + "loss": 0.5182, + "step": 4212 + }, + { + "epoch": 0.25655390798648114, + "grad_norm": 1.071014205702067, + "learning_rate": 4.978487856789315e-06, + "loss": 0.502, + "step": 4213 + }, + { + "epoch": 0.256614803763359, + "grad_norm": 1.0540079699414602, + "learning_rate": 4.978477409993858e-06, + "loss": 0.4867, + "step": 4214 + }, + { + "epoch": 0.2566756995402369, + "grad_norm": 0.9637814721103485, + "learning_rate": 4.978466960673379e-06, + "loss": 0.4793, + "step": 4215 + }, + { + "epoch": 0.25673659531711474, + "grad_norm": 1.0343710898755503, + "learning_rate": 4.978456508827889e-06, + "loss": 0.4816, + "step": 4216 + }, + { + "epoch": 0.25679749109399264, + "grad_norm": 0.9320148273073705, + "learning_rate": 4.9784460544573995e-06, + "loss": 0.4705, + "step": 4217 + }, + { + "epoch": 0.2568583868708705, + "grad_norm": 1.0242868107841634, + "learning_rate": 4.978435597561921e-06, + "loss": 0.5024, + "step": 4218 + }, + { + "epoch": 0.2569192826477484, + "grad_norm": 1.1823610389442316, + "learning_rate": 4.978425138141464e-06, + "loss": 0.5353, + "step": 4219 + }, + { + "epoch": 0.2569801784246262, + "grad_norm": 1.073732466317486, + "learning_rate": 4.978414676196038e-06, + "loss": 0.4986, + "step": 4220 + }, + { + "epoch": 0.2570410742015041, + "grad_norm": 1.0030209351930515, + "learning_rate": 4.9784042117256554e-06, + "loss": 0.4656, + "step": 4221 + }, + { + "epoch": 0.257101969978382, + "grad_norm": 1.0382458787150262, + "learning_rate": 4.978393744730325e-06, + "loss": 0.4876, + "step": 4222 + }, + { + "epoch": 0.2571628657552599, + "grad_norm": 1.0490564102230748, + "learning_rate": 4.97838327521006e-06, + "loss": 0.4887, + "step": 4223 + }, + { + "epoch": 0.2572237615321378, + "grad_norm": 1.033725005726756, + "learning_rate": 4.978372803164869e-06, + "loss": 0.5052, + "step": 4224 + }, + { + "epoch": 0.2572846573090156, + "grad_norm": 1.0831191261186595, + "learning_rate": 4.978362328594764e-06, + "loss": 0.5021, + "step": 4225 + }, + { + "epoch": 0.2573455530858935, + "grad_norm": 1.0971426837345069, + "learning_rate": 4.978351851499754e-06, + "loss": 0.4924, + "step": 4226 + }, + { + "epoch": 0.25740644886277136, + "grad_norm": 1.0744029494279794, + "learning_rate": 4.978341371879851e-06, + "loss": 0.5027, + "step": 4227 + }, + { + "epoch": 0.25746734463964926, + "grad_norm": 0.9827699685925225, + "learning_rate": 4.978330889735065e-06, + "loss": 0.4865, + "step": 4228 + }, + { + "epoch": 0.2575282404165271, + "grad_norm": 0.9974188085635367, + "learning_rate": 4.978320405065409e-06, + "loss": 0.4585, + "step": 4229 + }, + { + "epoch": 0.257589136193405, + "grad_norm": 0.978456155134026, + "learning_rate": 4.978309917870889e-06, + "loss": 0.569, + "step": 4230 + }, + { + "epoch": 0.25765003197028286, + "grad_norm": 1.1579867021086172, + "learning_rate": 4.978299428151521e-06, + "loss": 0.474, + "step": 4231 + }, + { + "epoch": 0.25771092774716076, + "grad_norm": 1.0473087383438258, + "learning_rate": 4.978288935907311e-06, + "loss": 0.4966, + "step": 4232 + }, + { + "epoch": 0.2577718235240386, + "grad_norm": 0.9967552602289788, + "learning_rate": 4.9782784411382725e-06, + "loss": 0.5216, + "step": 4233 + }, + { + "epoch": 0.2578327193009165, + "grad_norm": 1.0176393277804336, + "learning_rate": 4.9782679438444145e-06, + "loss": 0.4934, + "step": 4234 + }, + { + "epoch": 0.25789361507779435, + "grad_norm": 1.1326064167984315, + "learning_rate": 4.97825744402575e-06, + "loss": 0.4598, + "step": 4235 + }, + { + "epoch": 0.25795451085467225, + "grad_norm": 1.0306949749424994, + "learning_rate": 4.9782469416822875e-06, + "loss": 0.5326, + "step": 4236 + }, + { + "epoch": 0.2580154066315501, + "grad_norm": 1.0748249851648723, + "learning_rate": 4.978236436814039e-06, + "loss": 0.5117, + "step": 4237 + }, + { + "epoch": 0.258076302408428, + "grad_norm": 1.2044453051898507, + "learning_rate": 4.978225929421015e-06, + "loss": 0.4523, + "step": 4238 + }, + { + "epoch": 0.25813719818530584, + "grad_norm": 1.108017369888361, + "learning_rate": 4.978215419503225e-06, + "loss": 0.4406, + "step": 4239 + }, + { + "epoch": 0.25819809396218374, + "grad_norm": 0.9600632509785169, + "learning_rate": 4.978204907060682e-06, + "loss": 0.5853, + "step": 4240 + }, + { + "epoch": 0.2582589897390616, + "grad_norm": 1.0761812498758252, + "learning_rate": 4.978194392093394e-06, + "loss": 0.4826, + "step": 4241 + }, + { + "epoch": 0.2583198855159395, + "grad_norm": 1.0125908976507134, + "learning_rate": 4.978183874601374e-06, + "loss": 0.546, + "step": 4242 + }, + { + "epoch": 0.25838078129281733, + "grad_norm": 1.073181298364281, + "learning_rate": 4.978173354584631e-06, + "loss": 0.4267, + "step": 4243 + }, + { + "epoch": 0.25844167706969523, + "grad_norm": 1.0036312998609556, + "learning_rate": 4.978162832043177e-06, + "loss": 0.5088, + "step": 4244 + }, + { + "epoch": 0.2585025728465731, + "grad_norm": 1.0487677187090338, + "learning_rate": 4.9781523069770225e-06, + "loss": 0.55, + "step": 4245 + }, + { + "epoch": 0.258563468623451, + "grad_norm": 0.9755267068784758, + "learning_rate": 4.9781417793861774e-06, + "loss": 0.5813, + "step": 4246 + }, + { + "epoch": 0.2586243644003288, + "grad_norm": 1.1448288695262951, + "learning_rate": 4.978131249270653e-06, + "loss": 0.4329, + "step": 4247 + }, + { + "epoch": 0.2586852601772067, + "grad_norm": 1.0551150234867765, + "learning_rate": 4.97812071663046e-06, + "loss": 0.4371, + "step": 4248 + }, + { + "epoch": 0.25874615595408457, + "grad_norm": 0.9624072199708438, + "learning_rate": 4.97811018146561e-06, + "loss": 0.4931, + "step": 4249 + }, + { + "epoch": 0.25880705173096247, + "grad_norm": 0.9853927971698032, + "learning_rate": 4.978099643776112e-06, + "loss": 0.5128, + "step": 4250 + }, + { + "epoch": 0.2588679475078403, + "grad_norm": 1.0725962901389934, + "learning_rate": 4.978089103561977e-06, + "loss": 0.5178, + "step": 4251 + }, + { + "epoch": 0.2589288432847182, + "grad_norm": 0.9901615825689618, + "learning_rate": 4.978078560823218e-06, + "loss": 0.568, + "step": 4252 + }, + { + "epoch": 0.25898973906159606, + "grad_norm": 1.0329609440910292, + "learning_rate": 4.978068015559843e-06, + "loss": 0.5103, + "step": 4253 + }, + { + "epoch": 0.25905063483847396, + "grad_norm": 0.9533486786150255, + "learning_rate": 4.978057467771864e-06, + "loss": 0.4963, + "step": 4254 + }, + { + "epoch": 0.2591115306153518, + "grad_norm": 1.0855233833630096, + "learning_rate": 4.978046917459291e-06, + "loss": 0.4526, + "step": 4255 + }, + { + "epoch": 0.2591724263922297, + "grad_norm": 1.1810234774248214, + "learning_rate": 4.978036364622137e-06, + "loss": 0.515, + "step": 4256 + }, + { + "epoch": 0.25923332216910755, + "grad_norm": 1.041638872334355, + "learning_rate": 4.97802580926041e-06, + "loss": 0.5419, + "step": 4257 + }, + { + "epoch": 0.25929421794598545, + "grad_norm": 1.1540591418643193, + "learning_rate": 4.978015251374122e-06, + "loss": 0.524, + "step": 4258 + }, + { + "epoch": 0.2593551137228633, + "grad_norm": 1.124239086945665, + "learning_rate": 4.978004690963283e-06, + "loss": 0.4644, + "step": 4259 + }, + { + "epoch": 0.2594160094997412, + "grad_norm": 1.117926095729487, + "learning_rate": 4.977994128027905e-06, + "loss": 0.5884, + "step": 4260 + }, + { + "epoch": 0.25947690527661904, + "grad_norm": 1.1238041370968503, + "learning_rate": 4.977983562567998e-06, + "loss": 0.3885, + "step": 4261 + }, + { + "epoch": 0.25953780105349694, + "grad_norm": 1.0714017147236954, + "learning_rate": 4.977972994583572e-06, + "loss": 0.4952, + "step": 4262 + }, + { + "epoch": 0.2595986968303748, + "grad_norm": 1.01552877879411, + "learning_rate": 4.97796242407464e-06, + "loss": 0.483, + "step": 4263 + }, + { + "epoch": 0.2596595926072527, + "grad_norm": 0.942634612529687, + "learning_rate": 4.97795185104121e-06, + "loss": 0.4899, + "step": 4264 + }, + { + "epoch": 0.2597204883841306, + "grad_norm": 1.09531801400908, + "learning_rate": 4.977941275483296e-06, + "loss": 0.5114, + "step": 4265 + }, + { + "epoch": 0.25978138416100843, + "grad_norm": 1.0779535377756604, + "learning_rate": 4.977930697400906e-06, + "loss": 0.4039, + "step": 4266 + }, + { + "epoch": 0.25984227993788633, + "grad_norm": 1.0136437141692827, + "learning_rate": 4.977920116794051e-06, + "loss": 0.4652, + "step": 4267 + }, + { + "epoch": 0.2599031757147642, + "grad_norm": 1.0589417739299216, + "learning_rate": 4.977909533662743e-06, + "loss": 0.4684, + "step": 4268 + }, + { + "epoch": 0.2599640714916421, + "grad_norm": 0.9587308548363149, + "learning_rate": 4.9778989480069925e-06, + "loss": 0.4878, + "step": 4269 + }, + { + "epoch": 0.2600249672685199, + "grad_norm": 1.0157762348419908, + "learning_rate": 4.97788835982681e-06, + "loss": 0.5149, + "step": 4270 + }, + { + "epoch": 0.2600858630453978, + "grad_norm": 1.1259763393611677, + "learning_rate": 4.977877769122206e-06, + "loss": 0.4843, + "step": 4271 + }, + { + "epoch": 0.26014675882227567, + "grad_norm": 1.1188293093277009, + "learning_rate": 4.977867175893192e-06, + "loss": 0.4957, + "step": 4272 + }, + { + "epoch": 0.26020765459915357, + "grad_norm": 1.1140873862102008, + "learning_rate": 4.977856580139779e-06, + "loss": 0.5025, + "step": 4273 + }, + { + "epoch": 0.2602685503760314, + "grad_norm": 0.9636388380568758, + "learning_rate": 4.977845981861976e-06, + "loss": 0.4486, + "step": 4274 + }, + { + "epoch": 0.2603294461529093, + "grad_norm": 0.9637820228761064, + "learning_rate": 4.977835381059796e-06, + "loss": 0.5167, + "step": 4275 + }, + { + "epoch": 0.26039034192978716, + "grad_norm": 1.0768274451780855, + "learning_rate": 4.977824777733249e-06, + "loss": 0.4303, + "step": 4276 + }, + { + "epoch": 0.26045123770666506, + "grad_norm": 1.1382926212249227, + "learning_rate": 4.977814171882345e-06, + "loss": 0.544, + "step": 4277 + }, + { + "epoch": 0.2605121334835429, + "grad_norm": 1.0282627569036695, + "learning_rate": 4.977803563507095e-06, + "loss": 0.5232, + "step": 4278 + }, + { + "epoch": 0.2605730292604208, + "grad_norm": 1.00510390184354, + "learning_rate": 4.9777929526075105e-06, + "loss": 0.4845, + "step": 4279 + }, + { + "epoch": 0.26063392503729865, + "grad_norm": 1.067855329614217, + "learning_rate": 4.977782339183603e-06, + "loss": 0.5123, + "step": 4280 + }, + { + "epoch": 0.26069482081417655, + "grad_norm": 1.038534839688906, + "learning_rate": 4.977771723235382e-06, + "loss": 0.4639, + "step": 4281 + }, + { + "epoch": 0.2607557165910544, + "grad_norm": 1.0501360348326798, + "learning_rate": 4.9777611047628586e-06, + "loss": 0.4676, + "step": 4282 + }, + { + "epoch": 0.2608166123679323, + "grad_norm": 1.065577098548266, + "learning_rate": 4.977750483766043e-06, + "loss": 0.4715, + "step": 4283 + }, + { + "epoch": 0.26087750814481014, + "grad_norm": 1.0834853020486452, + "learning_rate": 4.9777398602449475e-06, + "loss": 0.4875, + "step": 4284 + }, + { + "epoch": 0.26093840392168804, + "grad_norm": 0.978118767035738, + "learning_rate": 4.977729234199582e-06, + "loss": 0.4897, + "step": 4285 + }, + { + "epoch": 0.2609992996985659, + "grad_norm": 1.0699002504174069, + "learning_rate": 4.977718605629957e-06, + "loss": 0.4324, + "step": 4286 + }, + { + "epoch": 0.2610601954754438, + "grad_norm": 1.013377163353115, + "learning_rate": 4.977707974536084e-06, + "loss": 0.4707, + "step": 4287 + }, + { + "epoch": 0.26112109125232164, + "grad_norm": 1.0045144261941663, + "learning_rate": 4.977697340917974e-06, + "loss": 0.4545, + "step": 4288 + }, + { + "epoch": 0.26118198702919954, + "grad_norm": 1.0584961345851316, + "learning_rate": 4.977686704775637e-06, + "loss": 0.4983, + "step": 4289 + }, + { + "epoch": 0.2612428828060774, + "grad_norm": 0.968778604362165, + "learning_rate": 4.977676066109085e-06, + "loss": 0.4988, + "step": 4290 + }, + { + "epoch": 0.2613037785829553, + "grad_norm": 1.022396362376389, + "learning_rate": 4.9776654249183274e-06, + "loss": 0.5028, + "step": 4291 + }, + { + "epoch": 0.2613646743598331, + "grad_norm": 1.0162845627740082, + "learning_rate": 4.977654781203376e-06, + "loss": 0.4666, + "step": 4292 + }, + { + "epoch": 0.261425570136711, + "grad_norm": 1.0017807837817576, + "learning_rate": 4.977644134964241e-06, + "loss": 0.5125, + "step": 4293 + }, + { + "epoch": 0.2614864659135889, + "grad_norm": 1.1348776154397495, + "learning_rate": 4.977633486200935e-06, + "loss": 0.4503, + "step": 4294 + }, + { + "epoch": 0.2615473616904668, + "grad_norm": 1.1150880712971536, + "learning_rate": 4.977622834913466e-06, + "loss": 0.4216, + "step": 4295 + }, + { + "epoch": 0.2616082574673446, + "grad_norm": 1.0894259943137183, + "learning_rate": 4.977612181101848e-06, + "loss": 0.4475, + "step": 4296 + }, + { + "epoch": 0.2616691532442225, + "grad_norm": 0.940953715135138, + "learning_rate": 4.977601524766088e-06, + "loss": 0.5452, + "step": 4297 + }, + { + "epoch": 0.26173004902110036, + "grad_norm": 0.9570907587309244, + "learning_rate": 4.977590865906201e-06, + "loss": 0.4962, + "step": 4298 + }, + { + "epoch": 0.26179094479797826, + "grad_norm": 1.0007821943971995, + "learning_rate": 4.977580204522195e-06, + "loss": 0.4907, + "step": 4299 + }, + { + "epoch": 0.2618518405748561, + "grad_norm": 1.0127129184215837, + "learning_rate": 4.977569540614082e-06, + "loss": 0.444, + "step": 4300 + }, + { + "epoch": 0.261912736351734, + "grad_norm": 1.0142659358135995, + "learning_rate": 4.9775588741818725e-06, + "loss": 0.4765, + "step": 4301 + }, + { + "epoch": 0.26197363212861186, + "grad_norm": 1.0181709861307093, + "learning_rate": 4.977548205225578e-06, + "loss": 0.4756, + "step": 4302 + }, + { + "epoch": 0.26203452790548976, + "grad_norm": 1.0626007642696378, + "learning_rate": 4.9775375337452084e-06, + "loss": 0.4392, + "step": 4303 + }, + { + "epoch": 0.2620954236823676, + "grad_norm": 1.0454049875565132, + "learning_rate": 4.9775268597407755e-06, + "loss": 0.4982, + "step": 4304 + }, + { + "epoch": 0.2621563194592455, + "grad_norm": 1.0421598798185556, + "learning_rate": 4.9775161832122895e-06, + "loss": 0.4981, + "step": 4305 + }, + { + "epoch": 0.2622172152361234, + "grad_norm": 1.0333363082363236, + "learning_rate": 4.977505504159762e-06, + "loss": 0.5367, + "step": 4306 + }, + { + "epoch": 0.26227811101300125, + "grad_norm": 1.0731533518403562, + "learning_rate": 4.977494822583203e-06, + "loss": 0.527, + "step": 4307 + }, + { + "epoch": 0.26233900678987915, + "grad_norm": 0.9551649506659368, + "learning_rate": 4.977484138482623e-06, + "loss": 0.5567, + "step": 4308 + }, + { + "epoch": 0.262399902566757, + "grad_norm": 1.0640274248725643, + "learning_rate": 4.977473451858035e-06, + "loss": 0.4364, + "step": 4309 + }, + { + "epoch": 0.2624607983436349, + "grad_norm": 0.9473526878786953, + "learning_rate": 4.977462762709448e-06, + "loss": 0.4354, + "step": 4310 + }, + { + "epoch": 0.26252169412051274, + "grad_norm": 1.0361984120691552, + "learning_rate": 4.9774520710368735e-06, + "loss": 0.5468, + "step": 4311 + }, + { + "epoch": 0.26258258989739064, + "grad_norm": 1.0394626294338078, + "learning_rate": 4.977441376840322e-06, + "loss": 0.4764, + "step": 4312 + }, + { + "epoch": 0.2626434856742685, + "grad_norm": 1.0406970081999007, + "learning_rate": 4.977430680119805e-06, + "loss": 0.5059, + "step": 4313 + }, + { + "epoch": 0.2627043814511464, + "grad_norm": 1.0363104108750198, + "learning_rate": 4.977419980875333e-06, + "loss": 0.4186, + "step": 4314 + }, + { + "epoch": 0.26276527722802423, + "grad_norm": 1.0059821671402036, + "learning_rate": 4.977409279106917e-06, + "loss": 0.4361, + "step": 4315 + }, + { + "epoch": 0.26282617300490213, + "grad_norm": 1.0802657802253421, + "learning_rate": 4.977398574814568e-06, + "loss": 0.4852, + "step": 4316 + }, + { + "epoch": 0.26288706878178, + "grad_norm": 1.1456122633134054, + "learning_rate": 4.977387867998297e-06, + "loss": 0.5115, + "step": 4317 + }, + { + "epoch": 0.2629479645586579, + "grad_norm": 1.045722282311691, + "learning_rate": 4.977377158658115e-06, + "loss": 0.4555, + "step": 4318 + }, + { + "epoch": 0.2630088603355357, + "grad_norm": 1.0896920353135542, + "learning_rate": 4.977366446794033e-06, + "loss": 0.4239, + "step": 4319 + }, + { + "epoch": 0.2630697561124136, + "grad_norm": 1.0699394954477637, + "learning_rate": 4.977355732406061e-06, + "loss": 0.4822, + "step": 4320 + }, + { + "epoch": 0.26313065188929147, + "grad_norm": 0.9766821282349897, + "learning_rate": 4.977345015494209e-06, + "loss": 0.4862, + "step": 4321 + }, + { + "epoch": 0.26319154766616937, + "grad_norm": 1.1053355692566227, + "learning_rate": 4.9773342960584915e-06, + "loss": 0.4851, + "step": 4322 + }, + { + "epoch": 0.2632524434430472, + "grad_norm": 1.018503428640529, + "learning_rate": 4.9773235740989164e-06, + "loss": 0.5575, + "step": 4323 + }, + { + "epoch": 0.2633133392199251, + "grad_norm": 1.0028411391887961, + "learning_rate": 4.977312849615496e-06, + "loss": 0.4936, + "step": 4324 + }, + { + "epoch": 0.26337423499680296, + "grad_norm": 1.0559339974776063, + "learning_rate": 4.9773021226082404e-06, + "loss": 0.5517, + "step": 4325 + }, + { + "epoch": 0.26343513077368086, + "grad_norm": 1.1000362626639208, + "learning_rate": 4.977291393077161e-06, + "loss": 0.519, + "step": 4326 + }, + { + "epoch": 0.2634960265505587, + "grad_norm": 1.0210900857082925, + "learning_rate": 4.977280661022269e-06, + "loss": 0.5035, + "step": 4327 + }, + { + "epoch": 0.2635569223274366, + "grad_norm": 1.0277690079920427, + "learning_rate": 4.977269926443574e-06, + "loss": 0.4654, + "step": 4328 + }, + { + "epoch": 0.26361781810431445, + "grad_norm": 1.120759748383794, + "learning_rate": 4.977259189341089e-06, + "loss": 0.441, + "step": 4329 + }, + { + "epoch": 0.26367871388119235, + "grad_norm": 0.9874544665210461, + "learning_rate": 4.977248449714823e-06, + "loss": 0.5062, + "step": 4330 + }, + { + "epoch": 0.2637396096580702, + "grad_norm": 1.0561744958732404, + "learning_rate": 4.977237707564788e-06, + "loss": 0.4172, + "step": 4331 + }, + { + "epoch": 0.2638005054349481, + "grad_norm": 0.986054957686327, + "learning_rate": 4.977226962890995e-06, + "loss": 0.5042, + "step": 4332 + }, + { + "epoch": 0.26386140121182594, + "grad_norm": 1.0440583067792675, + "learning_rate": 4.977216215693455e-06, + "loss": 0.498, + "step": 4333 + }, + { + "epoch": 0.26392229698870384, + "grad_norm": 1.0663844313234603, + "learning_rate": 4.977205465972178e-06, + "loss": 0.5236, + "step": 4334 + }, + { + "epoch": 0.2639831927655817, + "grad_norm": 1.1234499157634448, + "learning_rate": 4.977194713727176e-06, + "loss": 0.4513, + "step": 4335 + }, + { + "epoch": 0.2640440885424596, + "grad_norm": 1.1172306240221723, + "learning_rate": 4.977183958958459e-06, + "loss": 0.4015, + "step": 4336 + }, + { + "epoch": 0.26410498431933743, + "grad_norm": 1.0161273181749533, + "learning_rate": 4.977173201666039e-06, + "loss": 0.5514, + "step": 4337 + }, + { + "epoch": 0.26416588009621533, + "grad_norm": 0.9584198544726118, + "learning_rate": 4.977162441849926e-06, + "loss": 0.5385, + "step": 4338 + }, + { + "epoch": 0.2642267758730932, + "grad_norm": 0.9640264754177881, + "learning_rate": 4.977151679510132e-06, + "loss": 0.4803, + "step": 4339 + }, + { + "epoch": 0.2642876716499711, + "grad_norm": 1.032633862675662, + "learning_rate": 4.977140914646667e-06, + "loss": 0.4715, + "step": 4340 + }, + { + "epoch": 0.2643485674268489, + "grad_norm": 1.0094407181409029, + "learning_rate": 4.977130147259542e-06, + "loss": 0.5096, + "step": 4341 + }, + { + "epoch": 0.2644094632037268, + "grad_norm": 1.0082669080801097, + "learning_rate": 4.977119377348769e-06, + "loss": 0.4645, + "step": 4342 + }, + { + "epoch": 0.26447035898060467, + "grad_norm": 1.027447285772334, + "learning_rate": 4.977108604914358e-06, + "loss": 0.5001, + "step": 4343 + }, + { + "epoch": 0.26453125475748257, + "grad_norm": 1.0065727171629335, + "learning_rate": 4.97709782995632e-06, + "loss": 0.4945, + "step": 4344 + }, + { + "epoch": 0.2645921505343604, + "grad_norm": 1.114292049934933, + "learning_rate": 4.9770870524746665e-06, + "loss": 0.4696, + "step": 4345 + }, + { + "epoch": 0.2646530463112383, + "grad_norm": 1.0552448988805994, + "learning_rate": 4.9770762724694075e-06, + "loss": 0.4895, + "step": 4346 + }, + { + "epoch": 0.2647139420881162, + "grad_norm": 1.1037957951529809, + "learning_rate": 4.977065489940555e-06, + "loss": 0.5408, + "step": 4347 + }, + { + "epoch": 0.26477483786499406, + "grad_norm": 1.097299755292922, + "learning_rate": 4.977054704888121e-06, + "loss": 0.4763, + "step": 4348 + }, + { + "epoch": 0.26483573364187196, + "grad_norm": 1.0743878331203938, + "learning_rate": 4.977043917312114e-06, + "loss": 0.4896, + "step": 4349 + }, + { + "epoch": 0.2648966294187498, + "grad_norm": 1.0150852482870096, + "learning_rate": 4.977033127212546e-06, + "loss": 0.4762, + "step": 4350 + }, + { + "epoch": 0.2649575251956277, + "grad_norm": 1.0086515029591878, + "learning_rate": 4.977022334589429e-06, + "loss": 0.4339, + "step": 4351 + }, + { + "epoch": 0.26501842097250555, + "grad_norm": 0.9987272172733684, + "learning_rate": 4.977011539442772e-06, + "loss": 0.5528, + "step": 4352 + }, + { + "epoch": 0.26507931674938345, + "grad_norm": 1.054165274425705, + "learning_rate": 4.977000741772588e-06, + "loss": 0.4715, + "step": 4353 + }, + { + "epoch": 0.2651402125262613, + "grad_norm": 1.0419955603961981, + "learning_rate": 4.976989941578887e-06, + "loss": 0.5373, + "step": 4354 + }, + { + "epoch": 0.2652011083031392, + "grad_norm": 1.0321080502177797, + "learning_rate": 4.97697913886168e-06, + "loss": 0.4814, + "step": 4355 + }, + { + "epoch": 0.26526200408001704, + "grad_norm": 1.0810933561846277, + "learning_rate": 4.976968333620979e-06, + "loss": 0.4827, + "step": 4356 + }, + { + "epoch": 0.26532289985689494, + "grad_norm": 1.0207253521093855, + "learning_rate": 4.976957525856792e-06, + "loss": 0.5221, + "step": 4357 + }, + { + "epoch": 0.2653837956337728, + "grad_norm": 1.17321367054963, + "learning_rate": 4.976946715569134e-06, + "loss": 0.4556, + "step": 4358 + }, + { + "epoch": 0.2654446914106507, + "grad_norm": 1.0147553963300695, + "learning_rate": 4.976935902758013e-06, + "loss": 0.571, + "step": 4359 + }, + { + "epoch": 0.26550558718752854, + "grad_norm": 1.020235458825297, + "learning_rate": 4.9769250874234426e-06, + "loss": 0.451, + "step": 4360 + }, + { + "epoch": 0.26556648296440644, + "grad_norm": 1.14808281358581, + "learning_rate": 4.9769142695654315e-06, + "loss": 0.4002, + "step": 4361 + }, + { + "epoch": 0.2656273787412843, + "grad_norm": 1.0251636258269496, + "learning_rate": 4.9769034491839915e-06, + "loss": 0.5079, + "step": 4362 + }, + { + "epoch": 0.2656882745181622, + "grad_norm": 1.1266061794202493, + "learning_rate": 4.9768926262791345e-06, + "loss": 0.4378, + "step": 4363 + }, + { + "epoch": 0.26574917029504, + "grad_norm": 1.0865826008875394, + "learning_rate": 4.97688180085087e-06, + "loss": 0.54, + "step": 4364 + }, + { + "epoch": 0.2658100660719179, + "grad_norm": 1.0139983573162212, + "learning_rate": 4.97687097289921e-06, + "loss": 0.5011, + "step": 4365 + }, + { + "epoch": 0.2658709618487958, + "grad_norm": 0.9772287862586538, + "learning_rate": 4.976860142424166e-06, + "loss": 0.5427, + "step": 4366 + }, + { + "epoch": 0.2659318576256737, + "grad_norm": 1.0268370817171697, + "learning_rate": 4.976849309425749e-06, + "loss": 0.4734, + "step": 4367 + }, + { + "epoch": 0.2659927534025515, + "grad_norm": 1.0819112876359447, + "learning_rate": 4.976838473903968e-06, + "loss": 0.454, + "step": 4368 + }, + { + "epoch": 0.2660536491794294, + "grad_norm": 1.0770013827128522, + "learning_rate": 4.976827635858835e-06, + "loss": 0.4988, + "step": 4369 + }, + { + "epoch": 0.26611454495630726, + "grad_norm": 1.0540707284022202, + "learning_rate": 4.976816795290363e-06, + "loss": 0.4419, + "step": 4370 + }, + { + "epoch": 0.26617544073318516, + "grad_norm": 1.1372541228406503, + "learning_rate": 4.9768059521985605e-06, + "loss": 0.4492, + "step": 4371 + }, + { + "epoch": 0.266236336510063, + "grad_norm": 1.110961564554303, + "learning_rate": 4.97679510658344e-06, + "loss": 0.4698, + "step": 4372 + }, + { + "epoch": 0.2662972322869409, + "grad_norm": 1.0070681981635305, + "learning_rate": 4.976784258445012e-06, + "loss": 0.4946, + "step": 4373 + }, + { + "epoch": 0.26635812806381876, + "grad_norm": 1.0196780582753202, + "learning_rate": 4.976773407783288e-06, + "loss": 0.4841, + "step": 4374 + }, + { + "epoch": 0.26641902384069666, + "grad_norm": 1.0420550140186162, + "learning_rate": 4.976762554598279e-06, + "loss": 0.4326, + "step": 4375 + }, + { + "epoch": 0.2664799196175745, + "grad_norm": 1.0409552586153321, + "learning_rate": 4.976751698889995e-06, + "loss": 0.4636, + "step": 4376 + }, + { + "epoch": 0.2665408153944524, + "grad_norm": 1.0165885898830007, + "learning_rate": 4.976740840658448e-06, + "loss": 0.4777, + "step": 4377 + }, + { + "epoch": 0.26660171117133025, + "grad_norm": 1.0975648681363115, + "learning_rate": 4.976729979903649e-06, + "loss": 0.4756, + "step": 4378 + }, + { + "epoch": 0.26666260694820815, + "grad_norm": 1.0695407608266332, + "learning_rate": 4.976719116625609e-06, + "loss": 0.4242, + "step": 4379 + }, + { + "epoch": 0.266723502725086, + "grad_norm": 1.0340389145936968, + "learning_rate": 4.9767082508243395e-06, + "loss": 0.5151, + "step": 4380 + }, + { + "epoch": 0.2667843985019639, + "grad_norm": 1.0068532637288095, + "learning_rate": 4.976697382499851e-06, + "loss": 0.4863, + "step": 4381 + }, + { + "epoch": 0.26684529427884174, + "grad_norm": 1.0064594888792282, + "learning_rate": 4.976686511652154e-06, + "loss": 0.4761, + "step": 4382 + }, + { + "epoch": 0.26690619005571964, + "grad_norm": 1.0349332565802773, + "learning_rate": 4.976675638281261e-06, + "loss": 0.5031, + "step": 4383 + }, + { + "epoch": 0.2669670858325975, + "grad_norm": 1.0568372862624784, + "learning_rate": 4.976664762387182e-06, + "loss": 0.449, + "step": 4384 + }, + { + "epoch": 0.2670279816094754, + "grad_norm": 1.1411673248029799, + "learning_rate": 4.976653883969929e-06, + "loss": 0.5113, + "step": 4385 + }, + { + "epoch": 0.26708887738635323, + "grad_norm": 1.1340002754458671, + "learning_rate": 4.9766430030295125e-06, + "loss": 0.4356, + "step": 4386 + }, + { + "epoch": 0.26714977316323113, + "grad_norm": 1.138906348827003, + "learning_rate": 4.976632119565943e-06, + "loss": 0.4357, + "step": 4387 + }, + { + "epoch": 0.26721066894010903, + "grad_norm": 1.014261505582291, + "learning_rate": 4.976621233579232e-06, + "loss": 0.449, + "step": 4388 + }, + { + "epoch": 0.2672715647169869, + "grad_norm": 1.050366961464326, + "learning_rate": 4.976610345069391e-06, + "loss": 0.5458, + "step": 4389 + }, + { + "epoch": 0.2673324604938648, + "grad_norm": 1.0603582387501844, + "learning_rate": 4.976599454036431e-06, + "loss": 0.4201, + "step": 4390 + }, + { + "epoch": 0.2673933562707426, + "grad_norm": 1.062619885434541, + "learning_rate": 4.976588560480363e-06, + "loss": 0.474, + "step": 4391 + }, + { + "epoch": 0.2674542520476205, + "grad_norm": 1.0168994942941592, + "learning_rate": 4.976577664401197e-06, + "loss": 0.545, + "step": 4392 + }, + { + "epoch": 0.26751514782449837, + "grad_norm": 0.9406511130832385, + "learning_rate": 4.976566765798947e-06, + "loss": 0.4931, + "step": 4393 + }, + { + "epoch": 0.26757604360137627, + "grad_norm": 1.0113987021329334, + "learning_rate": 4.9765558646736215e-06, + "loss": 0.4644, + "step": 4394 + }, + { + "epoch": 0.2676369393782541, + "grad_norm": 1.017110493534631, + "learning_rate": 4.976544961025233e-06, + "loss": 0.4504, + "step": 4395 + }, + { + "epoch": 0.267697835155132, + "grad_norm": 1.0878250064427246, + "learning_rate": 4.976534054853791e-06, + "loss": 0.4683, + "step": 4396 + }, + { + "epoch": 0.26775873093200986, + "grad_norm": 1.0258750537128132, + "learning_rate": 4.976523146159308e-06, + "loss": 0.4857, + "step": 4397 + }, + { + "epoch": 0.26781962670888776, + "grad_norm": 1.0107425443028046, + "learning_rate": 4.976512234941795e-06, + "loss": 0.4608, + "step": 4398 + }, + { + "epoch": 0.2678805224857656, + "grad_norm": 0.9745965241548192, + "learning_rate": 4.9765013212012615e-06, + "loss": 0.4939, + "step": 4399 + }, + { + "epoch": 0.2679414182626435, + "grad_norm": 1.0459573942415852, + "learning_rate": 4.976490404937721e-06, + "loss": 0.4888, + "step": 4400 + }, + { + "epoch": 0.26800231403952135, + "grad_norm": 1.019864783110917, + "learning_rate": 4.9764794861511836e-06, + "loss": 0.4607, + "step": 4401 + }, + { + "epoch": 0.26806320981639925, + "grad_norm": 1.1156308737967318, + "learning_rate": 4.97646856484166e-06, + "loss": 0.5177, + "step": 4402 + }, + { + "epoch": 0.2681241055932771, + "grad_norm": 1.0773627886316834, + "learning_rate": 4.976457641009163e-06, + "loss": 0.5894, + "step": 4403 + }, + { + "epoch": 0.268185001370155, + "grad_norm": 1.1219653729338495, + "learning_rate": 4.9764467146537e-06, + "loss": 0.4862, + "step": 4404 + }, + { + "epoch": 0.26824589714703284, + "grad_norm": 0.9912662278053233, + "learning_rate": 4.976435785775287e-06, + "loss": 0.4043, + "step": 4405 + }, + { + "epoch": 0.26830679292391074, + "grad_norm": 0.9823827410536216, + "learning_rate": 4.976424854373931e-06, + "loss": 0.5294, + "step": 4406 + }, + { + "epoch": 0.2683676887007886, + "grad_norm": 1.0657462305039678, + "learning_rate": 4.976413920449646e-06, + "loss": 0.5092, + "step": 4407 + }, + { + "epoch": 0.2684285844776665, + "grad_norm": 0.9829879409521822, + "learning_rate": 4.9764029840024404e-06, + "loss": 0.4739, + "step": 4408 + }, + { + "epoch": 0.26848948025454433, + "grad_norm": 0.9770474846633947, + "learning_rate": 4.976392045032329e-06, + "loss": 0.5325, + "step": 4409 + }, + { + "epoch": 0.26855037603142223, + "grad_norm": 1.0558584206208204, + "learning_rate": 4.9763811035393195e-06, + "loss": 0.503, + "step": 4410 + }, + { + "epoch": 0.2686112718083001, + "grad_norm": 1.0009338150408398, + "learning_rate": 4.976370159523425e-06, + "loss": 0.5022, + "step": 4411 + }, + { + "epoch": 0.268672167585178, + "grad_norm": 1.1033997551796788, + "learning_rate": 4.976359212984656e-06, + "loss": 0.434, + "step": 4412 + }, + { + "epoch": 0.2687330633620558, + "grad_norm": 1.1271835147992066, + "learning_rate": 4.976348263923023e-06, + "loss": 0.4828, + "step": 4413 + }, + { + "epoch": 0.2687939591389337, + "grad_norm": 1.1134581710673057, + "learning_rate": 4.976337312338539e-06, + "loss": 0.5035, + "step": 4414 + }, + { + "epoch": 0.26885485491581157, + "grad_norm": 1.0817602792966154, + "learning_rate": 4.976326358231214e-06, + "loss": 0.4897, + "step": 4415 + }, + { + "epoch": 0.26891575069268947, + "grad_norm": 1.1142431907666506, + "learning_rate": 4.976315401601058e-06, + "loss": 0.4613, + "step": 4416 + }, + { + "epoch": 0.2689766464695673, + "grad_norm": 1.097183010557061, + "learning_rate": 4.976304442448085e-06, + "loss": 0.4789, + "step": 4417 + }, + { + "epoch": 0.2690375422464452, + "grad_norm": 0.9964858381201339, + "learning_rate": 4.976293480772303e-06, + "loss": 0.5264, + "step": 4418 + }, + { + "epoch": 0.26909843802332306, + "grad_norm": 1.0682460826581925, + "learning_rate": 4.976282516573725e-06, + "loss": 0.4246, + "step": 4419 + }, + { + "epoch": 0.26915933380020096, + "grad_norm": 1.0084504539440458, + "learning_rate": 4.976271549852362e-06, + "loss": 0.5377, + "step": 4420 + }, + { + "epoch": 0.2692202295770788, + "grad_norm": 1.075596665143942, + "learning_rate": 4.976260580608225e-06, + "loss": 0.5317, + "step": 4421 + }, + { + "epoch": 0.2692811253539567, + "grad_norm": 1.005750424669973, + "learning_rate": 4.976249608841326e-06, + "loss": 0.478, + "step": 4422 + }, + { + "epoch": 0.26934202113083455, + "grad_norm": 1.0433690481972324, + "learning_rate": 4.976238634551674e-06, + "loss": 0.4681, + "step": 4423 + }, + { + "epoch": 0.26940291690771245, + "grad_norm": 1.0288650069469758, + "learning_rate": 4.9762276577392825e-06, + "loss": 0.439, + "step": 4424 + }, + { + "epoch": 0.2694638126845903, + "grad_norm": 0.9814101075371204, + "learning_rate": 4.976216678404161e-06, + "loss": 0.522, + "step": 4425 + }, + { + "epoch": 0.2695247084614682, + "grad_norm": 1.0458428259281112, + "learning_rate": 4.976205696546322e-06, + "loss": 0.541, + "step": 4426 + }, + { + "epoch": 0.26958560423834604, + "grad_norm": 1.079700616128, + "learning_rate": 4.976194712165776e-06, + "loss": 0.4967, + "step": 4427 + }, + { + "epoch": 0.26964650001522394, + "grad_norm": 1.0272168999256106, + "learning_rate": 4.976183725262535e-06, + "loss": 0.4508, + "step": 4428 + }, + { + "epoch": 0.26970739579210185, + "grad_norm": 1.059247503342494, + "learning_rate": 4.976172735836608e-06, + "loss": 0.4244, + "step": 4429 + }, + { + "epoch": 0.2697682915689797, + "grad_norm": 0.956842662043063, + "learning_rate": 4.9761617438880096e-06, + "loss": 0.4745, + "step": 4430 + }, + { + "epoch": 0.2698291873458576, + "grad_norm": 1.0662412094258193, + "learning_rate": 4.9761507494167485e-06, + "loss": 0.4084, + "step": 4431 + }, + { + "epoch": 0.26989008312273544, + "grad_norm": 1.1112081527484614, + "learning_rate": 4.976139752422835e-06, + "loss": 0.5512, + "step": 4432 + }, + { + "epoch": 0.26995097889961334, + "grad_norm": 0.970206655778794, + "learning_rate": 4.9761287529062835e-06, + "loss": 0.4458, + "step": 4433 + }, + { + "epoch": 0.2700118746764912, + "grad_norm": 1.1203493732330476, + "learning_rate": 4.9761177508671024e-06, + "loss": 0.5057, + "step": 4434 + }, + { + "epoch": 0.2700727704533691, + "grad_norm": 1.1457439143542683, + "learning_rate": 4.976106746305305e-06, + "loss": 0.59, + "step": 4435 + }, + { + "epoch": 0.2701336662302469, + "grad_norm": 1.0613666475326669, + "learning_rate": 4.976095739220901e-06, + "loss": 0.4542, + "step": 4436 + }, + { + "epoch": 0.2701945620071248, + "grad_norm": 1.0464802081409736, + "learning_rate": 4.976084729613902e-06, + "loss": 0.4988, + "step": 4437 + }, + { + "epoch": 0.2702554577840027, + "grad_norm": 1.0564456692772686, + "learning_rate": 4.97607371748432e-06, + "loss": 0.5161, + "step": 4438 + }, + { + "epoch": 0.2703163535608806, + "grad_norm": 1.072540959654046, + "learning_rate": 4.976062702832165e-06, + "loss": 0.4231, + "step": 4439 + }, + { + "epoch": 0.2703772493377584, + "grad_norm": 0.9824362664896312, + "learning_rate": 4.976051685657449e-06, + "loss": 0.5169, + "step": 4440 + }, + { + "epoch": 0.2704381451146363, + "grad_norm": 1.0788906647134073, + "learning_rate": 4.976040665960182e-06, + "loss": 0.4993, + "step": 4441 + }, + { + "epoch": 0.27049904089151416, + "grad_norm": 1.082051669160645, + "learning_rate": 4.9760296437403775e-06, + "loss": 0.4676, + "step": 4442 + }, + { + "epoch": 0.27055993666839206, + "grad_norm": 1.0389767547839033, + "learning_rate": 4.976018618998045e-06, + "loss": 0.4852, + "step": 4443 + }, + { + "epoch": 0.2706208324452699, + "grad_norm": 1.086810987832344, + "learning_rate": 4.976007591733196e-06, + "loss": 0.4446, + "step": 4444 + }, + { + "epoch": 0.2706817282221478, + "grad_norm": 1.0343512729732083, + "learning_rate": 4.975996561945843e-06, + "loss": 0.4739, + "step": 4445 + }, + { + "epoch": 0.27074262399902566, + "grad_norm": 0.9537894730798898, + "learning_rate": 4.975985529635995e-06, + "loss": 0.4604, + "step": 4446 + }, + { + "epoch": 0.27080351977590356, + "grad_norm": 1.0691686146989552, + "learning_rate": 4.975974494803665e-06, + "loss": 0.4685, + "step": 4447 + }, + { + "epoch": 0.2708644155527814, + "grad_norm": 1.0913486304050501, + "learning_rate": 4.975963457448863e-06, + "loss": 0.4596, + "step": 4448 + }, + { + "epoch": 0.2709253113296593, + "grad_norm": 1.0068630415888546, + "learning_rate": 4.975952417571602e-06, + "loss": 0.5686, + "step": 4449 + }, + { + "epoch": 0.27098620710653715, + "grad_norm": 1.0215947306536421, + "learning_rate": 4.975941375171892e-06, + "loss": 0.4959, + "step": 4450 + }, + { + "epoch": 0.27104710288341505, + "grad_norm": 1.0803173162270017, + "learning_rate": 4.975930330249744e-06, + "loss": 0.4797, + "step": 4451 + }, + { + "epoch": 0.2711079986602929, + "grad_norm": 1.0426949842015742, + "learning_rate": 4.9759192828051695e-06, + "loss": 0.5442, + "step": 4452 + }, + { + "epoch": 0.2711688944371708, + "grad_norm": 1.1503672825543385, + "learning_rate": 4.97590823283818e-06, + "loss": 0.447, + "step": 4453 + }, + { + "epoch": 0.27122979021404864, + "grad_norm": 1.013373296147792, + "learning_rate": 4.975897180348787e-06, + "loss": 0.5112, + "step": 4454 + }, + { + "epoch": 0.27129068599092654, + "grad_norm": 1.1027728736857432, + "learning_rate": 4.975886125337001e-06, + "loss": 0.4525, + "step": 4455 + }, + { + "epoch": 0.2713515817678044, + "grad_norm": 0.9213065596685084, + "learning_rate": 4.975875067802834e-06, + "loss": 0.4908, + "step": 4456 + }, + { + "epoch": 0.2714124775446823, + "grad_norm": 0.9858457999571967, + "learning_rate": 4.9758640077462975e-06, + "loss": 0.4937, + "step": 4457 + }, + { + "epoch": 0.27147337332156013, + "grad_norm": 0.9929199108853294, + "learning_rate": 4.975852945167401e-06, + "loss": 0.5213, + "step": 4458 + }, + { + "epoch": 0.27153426909843803, + "grad_norm": 1.0103972904654441, + "learning_rate": 4.975841880066158e-06, + "loss": 0.4619, + "step": 4459 + }, + { + "epoch": 0.2715951648753159, + "grad_norm": 1.1867365452642582, + "learning_rate": 4.975830812442578e-06, + "loss": 0.4135, + "step": 4460 + }, + { + "epoch": 0.2716560606521938, + "grad_norm": 1.0384594884672478, + "learning_rate": 4.975819742296673e-06, + "loss": 0.4933, + "step": 4461 + }, + { + "epoch": 0.2717169564290716, + "grad_norm": 1.0068192119822117, + "learning_rate": 4.975808669628454e-06, + "loss": 0.4558, + "step": 4462 + }, + { + "epoch": 0.2717778522059495, + "grad_norm": 0.9872926569081232, + "learning_rate": 4.975797594437934e-06, + "loss": 0.543, + "step": 4463 + }, + { + "epoch": 0.27183874798282737, + "grad_norm": 1.0941928090846957, + "learning_rate": 4.975786516725122e-06, + "loss": 0.4674, + "step": 4464 + }, + { + "epoch": 0.27189964375970527, + "grad_norm": 1.1215934125587363, + "learning_rate": 4.97577543649003e-06, + "loss": 0.4948, + "step": 4465 + }, + { + "epoch": 0.2719605395365831, + "grad_norm": 1.0610809233759988, + "learning_rate": 4.97576435373267e-06, + "loss": 0.4614, + "step": 4466 + }, + { + "epoch": 0.272021435313461, + "grad_norm": 1.0057548612492557, + "learning_rate": 4.975753268453053e-06, + "loss": 0.58, + "step": 4467 + }, + { + "epoch": 0.27208233109033886, + "grad_norm": 1.0944960883320163, + "learning_rate": 4.975742180651189e-06, + "loss": 0.5156, + "step": 4468 + }, + { + "epoch": 0.27214322686721676, + "grad_norm": 1.048478898087235, + "learning_rate": 4.9757310903270915e-06, + "loss": 0.5405, + "step": 4469 + }, + { + "epoch": 0.27220412264409466, + "grad_norm": 1.1215155811322124, + "learning_rate": 4.97571999748077e-06, + "loss": 0.395, + "step": 4470 + }, + { + "epoch": 0.2722650184209725, + "grad_norm": 1.0629578972070972, + "learning_rate": 4.975708902112236e-06, + "loss": 0.4951, + "step": 4471 + }, + { + "epoch": 0.2723259141978504, + "grad_norm": 1.0102856157072042, + "learning_rate": 4.975697804221502e-06, + "loss": 0.5028, + "step": 4472 + }, + { + "epoch": 0.27238680997472825, + "grad_norm": 1.0748887604062813, + "learning_rate": 4.975686703808578e-06, + "loss": 0.4456, + "step": 4473 + }, + { + "epoch": 0.27244770575160615, + "grad_norm": 0.9946581293670533, + "learning_rate": 4.9756756008734765e-06, + "loss": 0.5488, + "step": 4474 + }, + { + "epoch": 0.272508601528484, + "grad_norm": 1.0734974048784114, + "learning_rate": 4.975664495416208e-06, + "loss": 0.5052, + "step": 4475 + }, + { + "epoch": 0.2725694973053619, + "grad_norm": 1.0072654775129275, + "learning_rate": 4.975653387436784e-06, + "loss": 0.4897, + "step": 4476 + }, + { + "epoch": 0.27263039308223974, + "grad_norm": 0.9879429429312616, + "learning_rate": 4.975642276935215e-06, + "loss": 0.5322, + "step": 4477 + }, + { + "epoch": 0.27269128885911764, + "grad_norm": 0.9897476303302353, + "learning_rate": 4.975631163911514e-06, + "loss": 0.5422, + "step": 4478 + }, + { + "epoch": 0.2727521846359955, + "grad_norm": 1.1103495428579688, + "learning_rate": 4.975620048365691e-06, + "loss": 0.4914, + "step": 4479 + }, + { + "epoch": 0.2728130804128734, + "grad_norm": 1.1414974600240053, + "learning_rate": 4.975608930297758e-06, + "loss": 0.4038, + "step": 4480 + }, + { + "epoch": 0.27287397618975123, + "grad_norm": 1.0694029678091672, + "learning_rate": 4.975597809707726e-06, + "loss": 0.4682, + "step": 4481 + }, + { + "epoch": 0.27293487196662913, + "grad_norm": 1.0842356050777733, + "learning_rate": 4.975586686595606e-06, + "loss": 0.4373, + "step": 4482 + }, + { + "epoch": 0.272995767743507, + "grad_norm": 1.0574403291024899, + "learning_rate": 4.97557556096141e-06, + "loss": 0.5496, + "step": 4483 + }, + { + "epoch": 0.2730566635203849, + "grad_norm": 1.018783336696444, + "learning_rate": 4.97556443280515e-06, + "loss": 0.5104, + "step": 4484 + }, + { + "epoch": 0.2731175592972627, + "grad_norm": 1.0108983451075466, + "learning_rate": 4.975553302126835e-06, + "loss": 0.5155, + "step": 4485 + }, + { + "epoch": 0.2731784550741406, + "grad_norm": 1.025428177881471, + "learning_rate": 4.975542168926478e-06, + "loss": 0.4805, + "step": 4486 + }, + { + "epoch": 0.27323935085101847, + "grad_norm": 0.9457978776539547, + "learning_rate": 4.975531033204091e-06, + "loss": 0.4838, + "step": 4487 + }, + { + "epoch": 0.27330024662789637, + "grad_norm": 1.138844057285715, + "learning_rate": 4.9755198949596845e-06, + "loss": 0.4427, + "step": 4488 + }, + { + "epoch": 0.2733611424047742, + "grad_norm": 1.1383000024592502, + "learning_rate": 4.975508754193269e-06, + "loss": 0.388, + "step": 4489 + }, + { + "epoch": 0.2734220381816521, + "grad_norm": 0.998222388175142, + "learning_rate": 4.975497610904857e-06, + "loss": 0.5157, + "step": 4490 + }, + { + "epoch": 0.27348293395852996, + "grad_norm": 1.0095845720604335, + "learning_rate": 4.97548646509446e-06, + "loss": 0.4487, + "step": 4491 + }, + { + "epoch": 0.27354382973540786, + "grad_norm": 1.0074487096671911, + "learning_rate": 4.975475316762088e-06, + "loss": 0.5181, + "step": 4492 + }, + { + "epoch": 0.2736047255122857, + "grad_norm": 1.0431429768644922, + "learning_rate": 4.975464165907754e-06, + "loss": 0.4747, + "step": 4493 + }, + { + "epoch": 0.2736656212891636, + "grad_norm": 1.1419738572300158, + "learning_rate": 4.975453012531468e-06, + "loss": 0.5005, + "step": 4494 + }, + { + "epoch": 0.27372651706604145, + "grad_norm": 1.0343185248817903, + "learning_rate": 4.975441856633242e-06, + "loss": 0.4625, + "step": 4495 + }, + { + "epoch": 0.27378741284291935, + "grad_norm": 0.9946557946683363, + "learning_rate": 4.975430698213086e-06, + "loss": 0.4766, + "step": 4496 + }, + { + "epoch": 0.2738483086197972, + "grad_norm": 1.0474812329227, + "learning_rate": 4.975419537271015e-06, + "loss": 0.4548, + "step": 4497 + }, + { + "epoch": 0.2739092043966751, + "grad_norm": 0.9489301512931004, + "learning_rate": 4.9754083738070365e-06, + "loss": 0.5525, + "step": 4498 + }, + { + "epoch": 0.27397010017355294, + "grad_norm": 0.9692453461615222, + "learning_rate": 4.975397207821164e-06, + "loss": 0.4701, + "step": 4499 + }, + { + "epoch": 0.27403099595043084, + "grad_norm": 1.0153891493953173, + "learning_rate": 4.975386039313409e-06, + "loss": 0.5228, + "step": 4500 + }, + { + "epoch": 0.2740918917273087, + "grad_norm": 1.072565542563807, + "learning_rate": 4.975374868283781e-06, + "loss": 0.4811, + "step": 4501 + }, + { + "epoch": 0.2741527875041866, + "grad_norm": 1.1572040245207893, + "learning_rate": 4.9753636947322935e-06, + "loss": 0.4042, + "step": 4502 + }, + { + "epoch": 0.27421368328106444, + "grad_norm": 1.0534981826597418, + "learning_rate": 4.975352518658956e-06, + "loss": 0.4748, + "step": 4503 + }, + { + "epoch": 0.27427457905794234, + "grad_norm": 1.1459813113207349, + "learning_rate": 4.975341340063781e-06, + "loss": 0.4426, + "step": 4504 + }, + { + "epoch": 0.2743354748348202, + "grad_norm": 1.0087890415508596, + "learning_rate": 4.97533015894678e-06, + "loss": 0.508, + "step": 4505 + }, + { + "epoch": 0.2743963706116981, + "grad_norm": 0.9553312958911184, + "learning_rate": 4.975318975307965e-06, + "loss": 0.5137, + "step": 4506 + }, + { + "epoch": 0.2744572663885759, + "grad_norm": 1.042179807229924, + "learning_rate": 4.975307789147345e-06, + "loss": 0.4656, + "step": 4507 + }, + { + "epoch": 0.2745181621654538, + "grad_norm": 1.049033759414271, + "learning_rate": 4.975296600464933e-06, + "loss": 0.456, + "step": 4508 + }, + { + "epoch": 0.2745790579423317, + "grad_norm": 0.9821456174632861, + "learning_rate": 4.975285409260741e-06, + "loss": 0.5966, + "step": 4509 + }, + { + "epoch": 0.2746399537192096, + "grad_norm": 1.0007379512346062, + "learning_rate": 4.975274215534779e-06, + "loss": 0.548, + "step": 4510 + }, + { + "epoch": 0.2747008494960875, + "grad_norm": 1.0890924094953054, + "learning_rate": 4.97526301928706e-06, + "loss": 0.5104, + "step": 4511 + }, + { + "epoch": 0.2747617452729653, + "grad_norm": 0.9266339541865934, + "learning_rate": 4.975251820517594e-06, + "loss": 0.5375, + "step": 4512 + }, + { + "epoch": 0.2748226410498432, + "grad_norm": 1.1183458758568983, + "learning_rate": 4.975240619226393e-06, + "loss": 0.4478, + "step": 4513 + }, + { + "epoch": 0.27488353682672106, + "grad_norm": 1.0053628935260082, + "learning_rate": 4.975229415413467e-06, + "loss": 0.4352, + "step": 4514 + }, + { + "epoch": 0.27494443260359896, + "grad_norm": 1.03478844979292, + "learning_rate": 4.9752182090788305e-06, + "loss": 0.4888, + "step": 4515 + }, + { + "epoch": 0.2750053283804768, + "grad_norm": 0.9893724614170148, + "learning_rate": 4.975207000222493e-06, + "loss": 0.5361, + "step": 4516 + }, + { + "epoch": 0.2750662241573547, + "grad_norm": 1.1779556499017647, + "learning_rate": 4.975195788844465e-06, + "loss": 0.4826, + "step": 4517 + }, + { + "epoch": 0.27512711993423256, + "grad_norm": 1.0476268180417607, + "learning_rate": 4.97518457494476e-06, + "loss": 0.4631, + "step": 4518 + }, + { + "epoch": 0.27518801571111046, + "grad_norm": 1.178884416014151, + "learning_rate": 4.975173358523387e-06, + "loss": 0.5538, + "step": 4519 + }, + { + "epoch": 0.2752489114879883, + "grad_norm": 1.1139300601536508, + "learning_rate": 4.975162139580361e-06, + "loss": 0.5183, + "step": 4520 + }, + { + "epoch": 0.2753098072648662, + "grad_norm": 1.1238221489133138, + "learning_rate": 4.97515091811569e-06, + "loss": 0.4569, + "step": 4521 + }, + { + "epoch": 0.27537070304174405, + "grad_norm": 1.0914274173155876, + "learning_rate": 4.975139694129387e-06, + "loss": 0.4458, + "step": 4522 + }, + { + "epoch": 0.27543159881862195, + "grad_norm": 1.0329476485317803, + "learning_rate": 4.975128467621463e-06, + "loss": 0.5038, + "step": 4523 + }, + { + "epoch": 0.2754924945954998, + "grad_norm": 0.931211401851377, + "learning_rate": 4.9751172385919296e-06, + "loss": 0.5629, + "step": 4524 + }, + { + "epoch": 0.2755533903723777, + "grad_norm": 1.0348829529104817, + "learning_rate": 4.975106007040798e-06, + "loss": 0.47, + "step": 4525 + }, + { + "epoch": 0.27561428614925554, + "grad_norm": 1.0094432060740046, + "learning_rate": 4.97509477296808e-06, + "loss": 0.4989, + "step": 4526 + }, + { + "epoch": 0.27567518192613344, + "grad_norm": 0.9986655877443573, + "learning_rate": 4.975083536373787e-06, + "loss": 0.4786, + "step": 4527 + }, + { + "epoch": 0.2757360777030113, + "grad_norm": 1.0281978840569816, + "learning_rate": 4.97507229725793e-06, + "loss": 0.4466, + "step": 4528 + }, + { + "epoch": 0.2757969734798892, + "grad_norm": 1.0212676743550224, + "learning_rate": 4.975061055620522e-06, + "loss": 0.4868, + "step": 4529 + }, + { + "epoch": 0.27585786925676703, + "grad_norm": 1.026343146217889, + "learning_rate": 4.975049811461572e-06, + "loss": 0.5076, + "step": 4530 + }, + { + "epoch": 0.27591876503364493, + "grad_norm": 0.9353891155087947, + "learning_rate": 4.975038564781093e-06, + "loss": 0.5939, + "step": 4531 + }, + { + "epoch": 0.2759796608105228, + "grad_norm": 1.066934029665653, + "learning_rate": 4.975027315579096e-06, + "loss": 0.4703, + "step": 4532 + }, + { + "epoch": 0.2760405565874007, + "grad_norm": 1.0208765227067973, + "learning_rate": 4.975016063855593e-06, + "loss": 0.4518, + "step": 4533 + }, + { + "epoch": 0.2761014523642785, + "grad_norm": 1.1329676531452095, + "learning_rate": 4.975004809610595e-06, + "loss": 0.4185, + "step": 4534 + }, + { + "epoch": 0.2761623481411564, + "grad_norm": 1.0720534721928394, + "learning_rate": 4.974993552844113e-06, + "loss": 0.466, + "step": 4535 + }, + { + "epoch": 0.27622324391803427, + "grad_norm": 1.0786891694593534, + "learning_rate": 4.97498229355616e-06, + "loss": 0.4909, + "step": 4536 + }, + { + "epoch": 0.27628413969491217, + "grad_norm": 1.1212879032191305, + "learning_rate": 4.974971031746746e-06, + "loss": 0.4928, + "step": 4537 + }, + { + "epoch": 0.27634503547179, + "grad_norm": 1.028677454906405, + "learning_rate": 4.974959767415883e-06, + "loss": 0.4307, + "step": 4538 + }, + { + "epoch": 0.2764059312486679, + "grad_norm": 1.1351870470963519, + "learning_rate": 4.9749485005635824e-06, + "loss": 0.5501, + "step": 4539 + }, + { + "epoch": 0.27646682702554576, + "grad_norm": 1.0686052551647105, + "learning_rate": 4.974937231189856e-06, + "loss": 0.4738, + "step": 4540 + }, + { + "epoch": 0.27652772280242366, + "grad_norm": 0.9361727731960461, + "learning_rate": 4.974925959294714e-06, + "loss": 0.5297, + "step": 4541 + }, + { + "epoch": 0.2765886185793015, + "grad_norm": 1.0698132580030086, + "learning_rate": 4.974914684878171e-06, + "loss": 0.5101, + "step": 4542 + }, + { + "epoch": 0.2766495143561794, + "grad_norm": 1.060675256750301, + "learning_rate": 4.974903407940235e-06, + "loss": 0.5332, + "step": 4543 + }, + { + "epoch": 0.27671041013305725, + "grad_norm": 1.1309975213652022, + "learning_rate": 4.974892128480918e-06, + "loss": 0.4687, + "step": 4544 + }, + { + "epoch": 0.27677130590993515, + "grad_norm": 1.009036892808231, + "learning_rate": 4.974880846500234e-06, + "loss": 0.4748, + "step": 4545 + }, + { + "epoch": 0.276832201686813, + "grad_norm": 1.0449230789285644, + "learning_rate": 4.9748695619981915e-06, + "loss": 0.4929, + "step": 4546 + }, + { + "epoch": 0.2768930974636909, + "grad_norm": 1.0199089003497832, + "learning_rate": 4.974858274974804e-06, + "loss": 0.5441, + "step": 4547 + }, + { + "epoch": 0.27695399324056874, + "grad_norm": 1.1434815154998659, + "learning_rate": 4.9748469854300825e-06, + "loss": 0.4949, + "step": 4548 + }, + { + "epoch": 0.27701488901744664, + "grad_norm": 1.1440369611154046, + "learning_rate": 4.974835693364038e-06, + "loss": 0.4367, + "step": 4549 + }, + { + "epoch": 0.2770757847943245, + "grad_norm": 1.0273528228783757, + "learning_rate": 4.974824398776683e-06, + "loss": 0.4976, + "step": 4550 + }, + { + "epoch": 0.2771366805712024, + "grad_norm": 1.0716610519976235, + "learning_rate": 4.974813101668027e-06, + "loss": 0.4989, + "step": 4551 + }, + { + "epoch": 0.2771975763480803, + "grad_norm": 1.0245322017150305, + "learning_rate": 4.974801802038084e-06, + "loss": 0.4785, + "step": 4552 + }, + { + "epoch": 0.27725847212495813, + "grad_norm": 1.0798005224087914, + "learning_rate": 4.974790499886864e-06, + "loss": 0.4733, + "step": 4553 + }, + { + "epoch": 0.27731936790183603, + "grad_norm": 1.005031619947371, + "learning_rate": 4.974779195214379e-06, + "loss": 0.5116, + "step": 4554 + }, + { + "epoch": 0.2773802636787139, + "grad_norm": 1.0305962255213543, + "learning_rate": 4.974767888020641e-06, + "loss": 0.5732, + "step": 4555 + }, + { + "epoch": 0.2774411594555918, + "grad_norm": 1.1116007690004073, + "learning_rate": 4.9747565783056595e-06, + "loss": 0.4098, + "step": 4556 + }, + { + "epoch": 0.2775020552324696, + "grad_norm": 0.924419994776228, + "learning_rate": 4.974745266069449e-06, + "loss": 0.5045, + "step": 4557 + }, + { + "epoch": 0.2775629510093475, + "grad_norm": 0.9868637309408224, + "learning_rate": 4.974733951312018e-06, + "loss": 0.4744, + "step": 4558 + }, + { + "epoch": 0.27762384678622537, + "grad_norm": 0.9203293663841893, + "learning_rate": 4.974722634033381e-06, + "loss": 0.4877, + "step": 4559 + }, + { + "epoch": 0.27768474256310327, + "grad_norm": 1.0688045640254733, + "learning_rate": 4.974711314233547e-06, + "loss": 0.4234, + "step": 4560 + }, + { + "epoch": 0.2777456383399811, + "grad_norm": 0.9720535469061055, + "learning_rate": 4.9746999919125286e-06, + "loss": 0.4952, + "step": 4561 + }, + { + "epoch": 0.277806534116859, + "grad_norm": 1.06935647498941, + "learning_rate": 4.974688667070337e-06, + "loss": 0.4465, + "step": 4562 + }, + { + "epoch": 0.27786742989373686, + "grad_norm": 1.1243863428298293, + "learning_rate": 4.974677339706985e-06, + "loss": 0.5234, + "step": 4563 + }, + { + "epoch": 0.27792832567061476, + "grad_norm": 1.0386550860335932, + "learning_rate": 4.974666009822483e-06, + "loss": 0.5367, + "step": 4564 + }, + { + "epoch": 0.2779892214474926, + "grad_norm": 1.07606205646196, + "learning_rate": 4.9746546774168425e-06, + "loss": 0.457, + "step": 4565 + }, + { + "epoch": 0.2780501172243705, + "grad_norm": 1.0714993195717417, + "learning_rate": 4.974643342490075e-06, + "loss": 0.5629, + "step": 4566 + }, + { + "epoch": 0.27811101300124835, + "grad_norm": 1.212352818662428, + "learning_rate": 4.974632005042193e-06, + "loss": 0.4697, + "step": 4567 + }, + { + "epoch": 0.27817190877812625, + "grad_norm": 1.1103089346093213, + "learning_rate": 4.974620665073206e-06, + "loss": 0.472, + "step": 4568 + }, + { + "epoch": 0.2782328045550041, + "grad_norm": 1.0328659888414227, + "learning_rate": 4.974609322583128e-06, + "loss": 0.4625, + "step": 4569 + }, + { + "epoch": 0.278293700331882, + "grad_norm": 1.0610389328029726, + "learning_rate": 4.9745979775719695e-06, + "loss": 0.4863, + "step": 4570 + }, + { + "epoch": 0.27835459610875984, + "grad_norm": 0.9907113266279443, + "learning_rate": 4.974586630039742e-06, + "loss": 0.5188, + "step": 4571 + }, + { + "epoch": 0.27841549188563774, + "grad_norm": 1.1154091729269733, + "learning_rate": 4.974575279986457e-06, + "loss": 0.4004, + "step": 4572 + }, + { + "epoch": 0.2784763876625156, + "grad_norm": 1.0258034593453, + "learning_rate": 4.974563927412126e-06, + "loss": 0.5028, + "step": 4573 + }, + { + "epoch": 0.2785372834393935, + "grad_norm": 1.0085041328201212, + "learning_rate": 4.974552572316761e-06, + "loss": 0.4478, + "step": 4574 + }, + { + "epoch": 0.27859817921627134, + "grad_norm": 0.9781428028855677, + "learning_rate": 4.974541214700373e-06, + "loss": 0.5524, + "step": 4575 + }, + { + "epoch": 0.27865907499314924, + "grad_norm": 1.083171713083971, + "learning_rate": 4.974529854562974e-06, + "loss": 0.4561, + "step": 4576 + }, + { + "epoch": 0.2787199707700271, + "grad_norm": 0.9827390806844892, + "learning_rate": 4.974518491904575e-06, + "loss": 0.513, + "step": 4577 + }, + { + "epoch": 0.278780866546905, + "grad_norm": 1.026292423857707, + "learning_rate": 4.974507126725189e-06, + "loss": 0.48, + "step": 4578 + }, + { + "epoch": 0.2788417623237828, + "grad_norm": 1.1178729913558718, + "learning_rate": 4.974495759024826e-06, + "loss": 0.5243, + "step": 4579 + }, + { + "epoch": 0.2789026581006607, + "grad_norm": 1.1612994353227497, + "learning_rate": 4.974484388803498e-06, + "loss": 0.4897, + "step": 4580 + }, + { + "epoch": 0.2789635538775386, + "grad_norm": 1.0539582294263017, + "learning_rate": 4.974473016061217e-06, + "loss": 0.4803, + "step": 4581 + }, + { + "epoch": 0.2790244496544165, + "grad_norm": 1.0696493958122695, + "learning_rate": 4.974461640797993e-06, + "loss": 0.4452, + "step": 4582 + }, + { + "epoch": 0.2790853454312943, + "grad_norm": 1.0796209148549096, + "learning_rate": 4.974450263013841e-06, + "loss": 0.4821, + "step": 4583 + }, + { + "epoch": 0.2791462412081722, + "grad_norm": 1.0730082544227209, + "learning_rate": 4.974438882708769e-06, + "loss": 0.4551, + "step": 4584 + }, + { + "epoch": 0.27920713698505006, + "grad_norm": 0.9596530709790342, + "learning_rate": 4.974427499882791e-06, + "loss": 0.4954, + "step": 4585 + }, + { + "epoch": 0.27926803276192796, + "grad_norm": 0.9680698427867895, + "learning_rate": 4.974416114535918e-06, + "loss": 0.5363, + "step": 4586 + }, + { + "epoch": 0.2793289285388058, + "grad_norm": 0.9623275461777904, + "learning_rate": 4.9744047266681605e-06, + "loss": 0.4728, + "step": 4587 + }, + { + "epoch": 0.2793898243156837, + "grad_norm": 0.9929946937987327, + "learning_rate": 4.974393336279531e-06, + "loss": 0.4405, + "step": 4588 + }, + { + "epoch": 0.27945072009256156, + "grad_norm": 1.0794427799341026, + "learning_rate": 4.974381943370041e-06, + "loss": 0.4346, + "step": 4589 + }, + { + "epoch": 0.27951161586943946, + "grad_norm": 1.0202960529196963, + "learning_rate": 4.974370547939703e-06, + "loss": 0.4557, + "step": 4590 + }, + { + "epoch": 0.2795725116463173, + "grad_norm": 1.1149402707996452, + "learning_rate": 4.9743591499885265e-06, + "loss": 0.4533, + "step": 4591 + }, + { + "epoch": 0.2796334074231952, + "grad_norm": 0.9852305906007178, + "learning_rate": 4.974347749516525e-06, + "loss": 0.5502, + "step": 4592 + }, + { + "epoch": 0.2796943032000731, + "grad_norm": 1.1350702105853916, + "learning_rate": 4.974336346523709e-06, + "loss": 0.4456, + "step": 4593 + }, + { + "epoch": 0.27975519897695095, + "grad_norm": 1.1021820770173585, + "learning_rate": 4.974324941010091e-06, + "loss": 0.3837, + "step": 4594 + }, + { + "epoch": 0.27981609475382885, + "grad_norm": 1.082470539502784, + "learning_rate": 4.974313532975682e-06, + "loss": 0.443, + "step": 4595 + }, + { + "epoch": 0.2798769905307067, + "grad_norm": 1.053272779692528, + "learning_rate": 4.974302122420494e-06, + "loss": 0.4887, + "step": 4596 + }, + { + "epoch": 0.2799378863075846, + "grad_norm": 1.10596466745438, + "learning_rate": 4.974290709344538e-06, + "loss": 0.4987, + "step": 4597 + }, + { + "epoch": 0.27999878208446244, + "grad_norm": 1.0473945581376343, + "learning_rate": 4.974279293747827e-06, + "loss": 0.474, + "step": 4598 + }, + { + "epoch": 0.28005967786134034, + "grad_norm": 1.0201581560993809, + "learning_rate": 4.974267875630371e-06, + "loss": 0.5588, + "step": 4599 + }, + { + "epoch": 0.2801205736382182, + "grad_norm": 1.0866793330344389, + "learning_rate": 4.974256454992182e-06, + "loss": 0.5499, + "step": 4600 + }, + { + "epoch": 0.2801814694150961, + "grad_norm": 1.068825589242229, + "learning_rate": 4.974245031833273e-06, + "loss": 0.4945, + "step": 4601 + }, + { + "epoch": 0.28024236519197393, + "grad_norm": 1.0237197198136996, + "learning_rate": 4.974233606153654e-06, + "loss": 0.4696, + "step": 4602 + }, + { + "epoch": 0.28030326096885183, + "grad_norm": 1.0888122428385953, + "learning_rate": 4.974222177953337e-06, + "loss": 0.4627, + "step": 4603 + }, + { + "epoch": 0.2803641567457297, + "grad_norm": 1.0707359070700317, + "learning_rate": 4.974210747232334e-06, + "loss": 0.5273, + "step": 4604 + }, + { + "epoch": 0.2804250525226076, + "grad_norm": 1.0980437875655291, + "learning_rate": 4.974199313990657e-06, + "loss": 0.5327, + "step": 4605 + }, + { + "epoch": 0.2804859482994854, + "grad_norm": 1.131239624917363, + "learning_rate": 4.974187878228317e-06, + "loss": 0.5368, + "step": 4606 + }, + { + "epoch": 0.2805468440763633, + "grad_norm": 1.000346198465179, + "learning_rate": 4.9741764399453255e-06, + "loss": 0.5162, + "step": 4607 + }, + { + "epoch": 0.28060773985324117, + "grad_norm": 1.0427021515391084, + "learning_rate": 4.974164999141695e-06, + "loss": 0.4481, + "step": 4608 + }, + { + "epoch": 0.28066863563011907, + "grad_norm": 1.0223011866827316, + "learning_rate": 4.974153555817436e-06, + "loss": 0.4527, + "step": 4609 + }, + { + "epoch": 0.2807295314069969, + "grad_norm": 0.9886757758868444, + "learning_rate": 4.974142109972561e-06, + "loss": 0.5339, + "step": 4610 + }, + { + "epoch": 0.2807904271838748, + "grad_norm": 0.9910506688228174, + "learning_rate": 4.974130661607082e-06, + "loss": 0.486, + "step": 4611 + }, + { + "epoch": 0.28085132296075266, + "grad_norm": 1.1011653632605596, + "learning_rate": 4.974119210721009e-06, + "loss": 0.4869, + "step": 4612 + }, + { + "epoch": 0.28091221873763056, + "grad_norm": 1.0996542370440674, + "learning_rate": 4.9741077573143556e-06, + "loss": 0.4548, + "step": 4613 + }, + { + "epoch": 0.2809731145145084, + "grad_norm": 1.1194430013200274, + "learning_rate": 4.9740963013871326e-06, + "loss": 0.4981, + "step": 4614 + }, + { + "epoch": 0.2810340102913863, + "grad_norm": 0.9847304283512232, + "learning_rate": 4.974084842939352e-06, + "loss": 0.4917, + "step": 4615 + }, + { + "epoch": 0.28109490606826415, + "grad_norm": 1.1291570175285386, + "learning_rate": 4.9740733819710244e-06, + "loss": 0.4281, + "step": 4616 + }, + { + "epoch": 0.28115580184514205, + "grad_norm": 1.0390376706961921, + "learning_rate": 4.974061918482163e-06, + "loss": 0.4556, + "step": 4617 + }, + { + "epoch": 0.2812166976220199, + "grad_norm": 0.9913756910496051, + "learning_rate": 4.974050452472778e-06, + "loss": 0.4544, + "step": 4618 + }, + { + "epoch": 0.2812775933988978, + "grad_norm": 1.0541364262871262, + "learning_rate": 4.974038983942883e-06, + "loss": 0.4625, + "step": 4619 + }, + { + "epoch": 0.28133848917577564, + "grad_norm": 1.155417036478199, + "learning_rate": 4.974027512892487e-06, + "loss": 0.5164, + "step": 4620 + }, + { + "epoch": 0.28139938495265354, + "grad_norm": 1.0689193363124667, + "learning_rate": 4.9740160393216044e-06, + "loss": 0.4574, + "step": 4621 + }, + { + "epoch": 0.2814602807295314, + "grad_norm": 1.033246874371159, + "learning_rate": 4.9740045632302454e-06, + "loss": 0.4683, + "step": 4622 + }, + { + "epoch": 0.2815211765064093, + "grad_norm": 1.0436443237715383, + "learning_rate": 4.973993084618422e-06, + "loss": 0.5467, + "step": 4623 + }, + { + "epoch": 0.28158207228328713, + "grad_norm": 1.0946314176605765, + "learning_rate": 4.973981603486146e-06, + "loss": 0.452, + "step": 4624 + }, + { + "epoch": 0.28164296806016503, + "grad_norm": 1.087304093817119, + "learning_rate": 4.973970119833429e-06, + "loss": 0.4493, + "step": 4625 + }, + { + "epoch": 0.2817038638370429, + "grad_norm": 0.9667378538899482, + "learning_rate": 4.973958633660282e-06, + "loss": 0.5557, + "step": 4626 + }, + { + "epoch": 0.2817647596139208, + "grad_norm": 0.9766330285349364, + "learning_rate": 4.9739471449667185e-06, + "loss": 0.5205, + "step": 4627 + }, + { + "epoch": 0.2818256553907986, + "grad_norm": 1.0663448103933508, + "learning_rate": 4.973935653752748e-06, + "loss": 0.406, + "step": 4628 + }, + { + "epoch": 0.2818865511676765, + "grad_norm": 1.0515382056367437, + "learning_rate": 4.973924160018384e-06, + "loss": 0.5005, + "step": 4629 + }, + { + "epoch": 0.28194744694455437, + "grad_norm": 1.0130909809785542, + "learning_rate": 4.973912663763637e-06, + "loss": 0.3989, + "step": 4630 + }, + { + "epoch": 0.28200834272143227, + "grad_norm": 1.026796559734027, + "learning_rate": 4.973901164988519e-06, + "loss": 0.4731, + "step": 4631 + }, + { + "epoch": 0.2820692384983101, + "grad_norm": 1.152524347426922, + "learning_rate": 4.973889663693043e-06, + "loss": 0.3822, + "step": 4632 + }, + { + "epoch": 0.282130134275188, + "grad_norm": 1.0017076752125538, + "learning_rate": 4.973878159877219e-06, + "loss": 0.4934, + "step": 4633 + }, + { + "epoch": 0.2821910300520659, + "grad_norm": 1.0388927262375904, + "learning_rate": 4.973866653541059e-06, + "loss": 0.4757, + "step": 4634 + }, + { + "epoch": 0.28225192582894376, + "grad_norm": 1.0645883424660485, + "learning_rate": 4.973855144684575e-06, + "loss": 0.4805, + "step": 4635 + }, + { + "epoch": 0.28231282160582166, + "grad_norm": 0.9439628919409012, + "learning_rate": 4.97384363330778e-06, + "loss": 0.4257, + "step": 4636 + }, + { + "epoch": 0.2823737173826995, + "grad_norm": 1.0121190102895237, + "learning_rate": 4.973832119410684e-06, + "loss": 0.5217, + "step": 4637 + }, + { + "epoch": 0.2824346131595774, + "grad_norm": 1.1420730778082306, + "learning_rate": 4.973820602993299e-06, + "loss": 0.4117, + "step": 4638 + }, + { + "epoch": 0.28249550893645525, + "grad_norm": 1.048553229256215, + "learning_rate": 4.9738090840556366e-06, + "loss": 0.4687, + "step": 4639 + }, + { + "epoch": 0.28255640471333315, + "grad_norm": 1.0081929820590643, + "learning_rate": 4.97379756259771e-06, + "loss": 0.4772, + "step": 4640 + }, + { + "epoch": 0.282617300490211, + "grad_norm": 1.0891687628826903, + "learning_rate": 4.973786038619529e-06, + "loss": 0.4808, + "step": 4641 + }, + { + "epoch": 0.2826781962670889, + "grad_norm": 1.073926738490453, + "learning_rate": 4.973774512121106e-06, + "loss": 0.4807, + "step": 4642 + }, + { + "epoch": 0.28273909204396674, + "grad_norm": 1.0057425075213005, + "learning_rate": 4.9737629831024535e-06, + "loss": 0.4736, + "step": 4643 + }, + { + "epoch": 0.28279998782084464, + "grad_norm": 1.0382081541340094, + "learning_rate": 4.9737514515635834e-06, + "loss": 0.4463, + "step": 4644 + }, + { + "epoch": 0.2828608835977225, + "grad_norm": 1.0657272248471759, + "learning_rate": 4.973739917504506e-06, + "loss": 0.501, + "step": 4645 + }, + { + "epoch": 0.2829217793746004, + "grad_norm": 0.9985653987865056, + "learning_rate": 4.973728380925233e-06, + "loss": 0.4546, + "step": 4646 + }, + { + "epoch": 0.28298267515147824, + "grad_norm": 1.066880491907266, + "learning_rate": 4.973716841825778e-06, + "loss": 0.4843, + "step": 4647 + }, + { + "epoch": 0.28304357092835614, + "grad_norm": 0.9983068339548419, + "learning_rate": 4.973705300206151e-06, + "loss": 0.546, + "step": 4648 + }, + { + "epoch": 0.283104466705234, + "grad_norm": 0.990750672894937, + "learning_rate": 4.973693756066366e-06, + "loss": 0.5265, + "step": 4649 + }, + { + "epoch": 0.2831653624821119, + "grad_norm": 1.063250874884526, + "learning_rate": 4.9736822094064315e-06, + "loss": 0.4556, + "step": 4650 + }, + { + "epoch": 0.2832262582589897, + "grad_norm": 0.9803211366918892, + "learning_rate": 4.973670660226362e-06, + "loss": 0.5145, + "step": 4651 + }, + { + "epoch": 0.2832871540358676, + "grad_norm": 1.048694899210618, + "learning_rate": 4.973659108526167e-06, + "loss": 0.5309, + "step": 4652 + }, + { + "epoch": 0.2833480498127455, + "grad_norm": 1.0189837222152456, + "learning_rate": 4.9736475543058615e-06, + "loss": 0.4516, + "step": 4653 + }, + { + "epoch": 0.2834089455896234, + "grad_norm": 1.05285072475693, + "learning_rate": 4.973635997565454e-06, + "loss": 0.4692, + "step": 4654 + }, + { + "epoch": 0.2834698413665012, + "grad_norm": 1.1118935415768019, + "learning_rate": 4.973624438304958e-06, + "loss": 0.4648, + "step": 4655 + }, + { + "epoch": 0.2835307371433791, + "grad_norm": 1.1028015286073, + "learning_rate": 4.973612876524384e-06, + "loss": 0.4893, + "step": 4656 + }, + { + "epoch": 0.28359163292025696, + "grad_norm": 1.1658919995930832, + "learning_rate": 4.973601312223746e-06, + "loss": 0.4712, + "step": 4657 + }, + { + "epoch": 0.28365252869713486, + "grad_norm": 1.0419561840124933, + "learning_rate": 4.9735897454030535e-06, + "loss": 0.5032, + "step": 4658 + }, + { + "epoch": 0.2837134244740127, + "grad_norm": 1.007101935375357, + "learning_rate": 4.973578176062319e-06, + "loss": 0.499, + "step": 4659 + }, + { + "epoch": 0.2837743202508906, + "grad_norm": 1.0406600213348685, + "learning_rate": 4.973566604201555e-06, + "loss": 0.5025, + "step": 4660 + }, + { + "epoch": 0.28383521602776846, + "grad_norm": 1.088350375895477, + "learning_rate": 4.973555029820773e-06, + "loss": 0.4906, + "step": 4661 + }, + { + "epoch": 0.28389611180464636, + "grad_norm": 1.0962362733100885, + "learning_rate": 4.973543452919984e-06, + "loss": 0.4687, + "step": 4662 + }, + { + "epoch": 0.2839570075815242, + "grad_norm": 1.070420505585791, + "learning_rate": 4.973531873499201e-06, + "loss": 0.4662, + "step": 4663 + }, + { + "epoch": 0.2840179033584021, + "grad_norm": 1.0185875932208615, + "learning_rate": 4.973520291558434e-06, + "loss": 0.4724, + "step": 4664 + }, + { + "epoch": 0.28407879913527995, + "grad_norm": 1.0013579278526856, + "learning_rate": 4.973508707097697e-06, + "loss": 0.5003, + "step": 4665 + }, + { + "epoch": 0.28413969491215785, + "grad_norm": 1.1279419409257365, + "learning_rate": 4.973497120117e-06, + "loss": 0.3769, + "step": 4666 + }, + { + "epoch": 0.2842005906890357, + "grad_norm": 1.0530970500456756, + "learning_rate": 4.973485530616357e-06, + "loss": 0.5231, + "step": 4667 + }, + { + "epoch": 0.2842614864659136, + "grad_norm": 0.9724607159765419, + "learning_rate": 4.973473938595778e-06, + "loss": 0.5297, + "step": 4668 + }, + { + "epoch": 0.28432238224279144, + "grad_norm": 1.1531224031555793, + "learning_rate": 4.973462344055274e-06, + "loss": 0.4434, + "step": 4669 + }, + { + "epoch": 0.28438327801966934, + "grad_norm": 1.0645034555021813, + "learning_rate": 4.973450746994858e-06, + "loss": 0.462, + "step": 4670 + }, + { + "epoch": 0.2844441737965472, + "grad_norm": 1.2049028408549063, + "learning_rate": 4.9734391474145425e-06, + "loss": 0.4402, + "step": 4671 + }, + { + "epoch": 0.2845050695734251, + "grad_norm": 1.1233107066322292, + "learning_rate": 4.973427545314339e-06, + "loss": 0.5186, + "step": 4672 + }, + { + "epoch": 0.28456596535030293, + "grad_norm": 0.9398154591560276, + "learning_rate": 4.973415940694258e-06, + "loss": 0.485, + "step": 4673 + }, + { + "epoch": 0.28462686112718083, + "grad_norm": 1.105104055204243, + "learning_rate": 4.973404333554313e-06, + "loss": 0.5331, + "step": 4674 + }, + { + "epoch": 0.28468775690405873, + "grad_norm": 1.1359242049194815, + "learning_rate": 4.973392723894515e-06, + "loss": 0.4393, + "step": 4675 + }, + { + "epoch": 0.2847486526809366, + "grad_norm": 1.0633411351122808, + "learning_rate": 4.973381111714875e-06, + "loss": 0.4402, + "step": 4676 + }, + { + "epoch": 0.2848095484578145, + "grad_norm": 1.0391710569886838, + "learning_rate": 4.973369497015407e-06, + "loss": 0.4141, + "step": 4677 + }, + { + "epoch": 0.2848704442346923, + "grad_norm": 1.1706556911644022, + "learning_rate": 4.97335787979612e-06, + "loss": 0.4214, + "step": 4678 + }, + { + "epoch": 0.2849313400115702, + "grad_norm": 1.0418689594273873, + "learning_rate": 4.973346260057029e-06, + "loss": 0.5106, + "step": 4679 + }, + { + "epoch": 0.28499223578844807, + "grad_norm": 1.0583510159711462, + "learning_rate": 4.9733346377981435e-06, + "loss": 0.4594, + "step": 4680 + }, + { + "epoch": 0.28505313156532597, + "grad_norm": 1.1171088356710959, + "learning_rate": 4.9733230130194765e-06, + "loss": 0.4836, + "step": 4681 + }, + { + "epoch": 0.2851140273422038, + "grad_norm": 1.155565368345447, + "learning_rate": 4.973311385721039e-06, + "loss": 0.3825, + "step": 4682 + }, + { + "epoch": 0.2851749231190817, + "grad_norm": 0.9694443596982596, + "learning_rate": 4.9732997559028435e-06, + "loss": 0.4423, + "step": 4683 + }, + { + "epoch": 0.28523581889595956, + "grad_norm": 1.0094346065310182, + "learning_rate": 4.973288123564902e-06, + "loss": 0.5395, + "step": 4684 + }, + { + "epoch": 0.28529671467283746, + "grad_norm": 0.9810789295502576, + "learning_rate": 4.973276488707225e-06, + "loss": 0.4744, + "step": 4685 + }, + { + "epoch": 0.2853576104497153, + "grad_norm": 1.1039947741116771, + "learning_rate": 4.973264851329826e-06, + "loss": 0.4326, + "step": 4686 + }, + { + "epoch": 0.2854185062265932, + "grad_norm": 1.001397718424464, + "learning_rate": 4.973253211432716e-06, + "loss": 0.49, + "step": 4687 + }, + { + "epoch": 0.28547940200347105, + "grad_norm": 1.1575953911228627, + "learning_rate": 4.973241569015908e-06, + "loss": 0.4736, + "step": 4688 + }, + { + "epoch": 0.28554029778034895, + "grad_norm": 0.9602654236493253, + "learning_rate": 4.973229924079412e-06, + "loss": 0.4718, + "step": 4689 + }, + { + "epoch": 0.2856011935572268, + "grad_norm": 0.9644655709567413, + "learning_rate": 4.973218276623241e-06, + "loss": 0.5089, + "step": 4690 + }, + { + "epoch": 0.2856620893341047, + "grad_norm": 1.0551895350523648, + "learning_rate": 4.973206626647405e-06, + "loss": 0.4773, + "step": 4691 + }, + { + "epoch": 0.28572298511098254, + "grad_norm": 1.0286427581142652, + "learning_rate": 4.9731949741519195e-06, + "loss": 0.4441, + "step": 4692 + }, + { + "epoch": 0.28578388088786044, + "grad_norm": 1.0163658791390553, + "learning_rate": 4.973183319136794e-06, + "loss": 0.4767, + "step": 4693 + }, + { + "epoch": 0.2858447766647383, + "grad_norm": 0.996080171327085, + "learning_rate": 4.973171661602041e-06, + "loss": 0.5078, + "step": 4694 + }, + { + "epoch": 0.2859056724416162, + "grad_norm": 1.0732081602855155, + "learning_rate": 4.973160001547671e-06, + "loss": 0.4647, + "step": 4695 + }, + { + "epoch": 0.28596656821849403, + "grad_norm": 1.020824221157327, + "learning_rate": 4.973148338973699e-06, + "loss": 0.4751, + "step": 4696 + }, + { + "epoch": 0.28602746399537193, + "grad_norm": 1.0494388006649895, + "learning_rate": 4.9731366738801335e-06, + "loss": 0.45, + "step": 4697 + }, + { + "epoch": 0.2860883597722498, + "grad_norm": 1.055545929817841, + "learning_rate": 4.9731250062669875e-06, + "loss": 0.471, + "step": 4698 + }, + { + "epoch": 0.2861492555491277, + "grad_norm": 0.9867309154038033, + "learning_rate": 4.973113336134273e-06, + "loss": 0.4499, + "step": 4699 + }, + { + "epoch": 0.2862101513260055, + "grad_norm": 1.0063646733575238, + "learning_rate": 4.973101663482003e-06, + "loss": 0.451, + "step": 4700 + }, + { + "epoch": 0.2862710471028834, + "grad_norm": 1.0609078783150212, + "learning_rate": 4.973089988310188e-06, + "loss": 0.4531, + "step": 4701 + }, + { + "epoch": 0.28633194287976127, + "grad_norm": 1.0730073786456584, + "learning_rate": 4.973078310618841e-06, + "loss": 0.5255, + "step": 4702 + }, + { + "epoch": 0.28639283865663917, + "grad_norm": 1.1248862390305878, + "learning_rate": 4.973066630407972e-06, + "loss": 0.4439, + "step": 4703 + }, + { + "epoch": 0.286453734433517, + "grad_norm": 0.9933135941911766, + "learning_rate": 4.973054947677595e-06, + "loss": 0.4675, + "step": 4704 + }, + { + "epoch": 0.2865146302103949, + "grad_norm": 0.9414926372565166, + "learning_rate": 4.973043262427721e-06, + "loss": 0.5144, + "step": 4705 + }, + { + "epoch": 0.28657552598727276, + "grad_norm": 0.9874056264991097, + "learning_rate": 4.9730315746583615e-06, + "loss": 0.4634, + "step": 4706 + }, + { + "epoch": 0.28663642176415066, + "grad_norm": 1.128537110383014, + "learning_rate": 4.973019884369529e-06, + "loss": 0.4798, + "step": 4707 + }, + { + "epoch": 0.2866973175410285, + "grad_norm": 1.0167722187253343, + "learning_rate": 4.973008191561236e-06, + "loss": 0.5504, + "step": 4708 + }, + { + "epoch": 0.2867582133179064, + "grad_norm": 1.1185837411907142, + "learning_rate": 4.972996496233493e-06, + "loss": 0.4776, + "step": 4709 + }, + { + "epoch": 0.28681910909478425, + "grad_norm": 1.077771661182427, + "learning_rate": 4.972984798386312e-06, + "loss": 0.4664, + "step": 4710 + }, + { + "epoch": 0.28688000487166215, + "grad_norm": 1.0738304531420348, + "learning_rate": 4.972973098019706e-06, + "loss": 0.4411, + "step": 4711 + }, + { + "epoch": 0.28694090064854, + "grad_norm": 0.9434725575289545, + "learning_rate": 4.9729613951336875e-06, + "loss": 0.4914, + "step": 4712 + }, + { + "epoch": 0.2870017964254179, + "grad_norm": 1.0528966074546295, + "learning_rate": 4.972949689728266e-06, + "loss": 0.5273, + "step": 4713 + }, + { + "epoch": 0.28706269220229574, + "grad_norm": 1.0252659699497773, + "learning_rate": 4.9729379818034555e-06, + "loss": 0.4478, + "step": 4714 + }, + { + "epoch": 0.28712358797917364, + "grad_norm": 0.9975629182601389, + "learning_rate": 4.972926271359266e-06, + "loss": 0.4614, + "step": 4715 + }, + { + "epoch": 0.28718448375605155, + "grad_norm": 1.0258591495356872, + "learning_rate": 4.972914558395712e-06, + "loss": 0.4686, + "step": 4716 + }, + { + "epoch": 0.2872453795329294, + "grad_norm": 0.9962126040578033, + "learning_rate": 4.9729028429128036e-06, + "loss": 0.5112, + "step": 4717 + }, + { + "epoch": 0.2873062753098073, + "grad_norm": 1.1329417152472516, + "learning_rate": 4.972891124910553e-06, + "loss": 0.4401, + "step": 4718 + }, + { + "epoch": 0.28736717108668514, + "grad_norm": 0.9116977013754667, + "learning_rate": 4.9728794043889725e-06, + "loss": 0.4756, + "step": 4719 + }, + { + "epoch": 0.28742806686356304, + "grad_norm": 0.9901742647889742, + "learning_rate": 4.972867681348074e-06, + "loss": 0.4991, + "step": 4720 + }, + { + "epoch": 0.2874889626404409, + "grad_norm": 0.9837915727425174, + "learning_rate": 4.972855955787869e-06, + "loss": 0.5259, + "step": 4721 + }, + { + "epoch": 0.2875498584173188, + "grad_norm": 1.0174789677409508, + "learning_rate": 4.97284422770837e-06, + "loss": 0.4911, + "step": 4722 + }, + { + "epoch": 0.2876107541941966, + "grad_norm": 1.0731255807174873, + "learning_rate": 4.972832497109589e-06, + "loss": 0.4619, + "step": 4723 + }, + { + "epoch": 0.28767164997107453, + "grad_norm": 0.9725578509437919, + "learning_rate": 4.972820763991537e-06, + "loss": 0.527, + "step": 4724 + }, + { + "epoch": 0.2877325457479524, + "grad_norm": 1.0495665780666017, + "learning_rate": 4.972809028354227e-06, + "loss": 0.4909, + "step": 4725 + }, + { + "epoch": 0.2877934415248303, + "grad_norm": 1.0423182443885124, + "learning_rate": 4.9727972901976706e-06, + "loss": 0.4518, + "step": 4726 + }, + { + "epoch": 0.2878543373017081, + "grad_norm": 1.0886534859007742, + "learning_rate": 4.972785549521879e-06, + "loss": 0.4615, + "step": 4727 + }, + { + "epoch": 0.287915233078586, + "grad_norm": 1.0700171890221695, + "learning_rate": 4.972773806326866e-06, + "loss": 0.4825, + "step": 4728 + }, + { + "epoch": 0.28797612885546386, + "grad_norm": 1.0578715606430413, + "learning_rate": 4.972762060612642e-06, + "loss": 0.5314, + "step": 4729 + }, + { + "epoch": 0.28803702463234176, + "grad_norm": 1.0201424455163153, + "learning_rate": 4.9727503123792194e-06, + "loss": 0.469, + "step": 4730 + }, + { + "epoch": 0.2880979204092196, + "grad_norm": 1.1160743752889675, + "learning_rate": 4.97273856162661e-06, + "loss": 0.4889, + "step": 4731 + }, + { + "epoch": 0.2881588161860975, + "grad_norm": 1.0659674382808033, + "learning_rate": 4.972726808354826e-06, + "loss": 0.4176, + "step": 4732 + }, + { + "epoch": 0.28821971196297536, + "grad_norm": 1.0219002165435425, + "learning_rate": 4.97271505256388e-06, + "loss": 0.4327, + "step": 4733 + }, + { + "epoch": 0.28828060773985326, + "grad_norm": 1.0640357157172742, + "learning_rate": 4.972703294253783e-06, + "loss": 0.4486, + "step": 4734 + }, + { + "epoch": 0.2883415035167311, + "grad_norm": 0.9502939590049496, + "learning_rate": 4.972691533424547e-06, + "loss": 0.5425, + "step": 4735 + }, + { + "epoch": 0.288402399293609, + "grad_norm": 0.9701555308105064, + "learning_rate": 4.972679770076184e-06, + "loss": 0.4585, + "step": 4736 + }, + { + "epoch": 0.28846329507048685, + "grad_norm": 1.053332841307033, + "learning_rate": 4.9726680042087075e-06, + "loss": 0.5075, + "step": 4737 + }, + { + "epoch": 0.28852419084736475, + "grad_norm": 1.0066423133391196, + "learning_rate": 4.972656235822126e-06, + "loss": 0.4624, + "step": 4738 + }, + { + "epoch": 0.2885850866242426, + "grad_norm": 0.9849663383178267, + "learning_rate": 4.972644464916457e-06, + "loss": 0.475, + "step": 4739 + }, + { + "epoch": 0.2886459824011205, + "grad_norm": 0.9309961440560315, + "learning_rate": 4.972632691491706e-06, + "loss": 0.4924, + "step": 4740 + }, + { + "epoch": 0.28870687817799834, + "grad_norm": 0.9774073795962573, + "learning_rate": 4.97262091554789e-06, + "loss": 0.4413, + "step": 4741 + }, + { + "epoch": 0.28876777395487624, + "grad_norm": 1.0909227621255146, + "learning_rate": 4.9726091370850184e-06, + "loss": 0.4851, + "step": 4742 + }, + { + "epoch": 0.2888286697317541, + "grad_norm": 1.113155070574979, + "learning_rate": 4.972597356103105e-06, + "loss": 0.4505, + "step": 4743 + }, + { + "epoch": 0.288889565508632, + "grad_norm": 1.023033233086492, + "learning_rate": 4.97258557260216e-06, + "loss": 0.4518, + "step": 4744 + }, + { + "epoch": 0.28895046128550983, + "grad_norm": 1.1645962776536078, + "learning_rate": 4.972573786582196e-06, + "loss": 0.4551, + "step": 4745 + }, + { + "epoch": 0.28901135706238773, + "grad_norm": 1.1200221221112634, + "learning_rate": 4.972561998043226e-06, + "loss": 0.5194, + "step": 4746 + }, + { + "epoch": 0.2890722528392656, + "grad_norm": 1.040908297965486, + "learning_rate": 4.972550206985261e-06, + "loss": 0.463, + "step": 4747 + }, + { + "epoch": 0.2891331486161435, + "grad_norm": 0.8915600745369252, + "learning_rate": 4.972538413408313e-06, + "loss": 0.5875, + "step": 4748 + }, + { + "epoch": 0.2891940443930213, + "grad_norm": 1.0707615852962402, + "learning_rate": 4.972526617312394e-06, + "loss": 0.5364, + "step": 4749 + }, + { + "epoch": 0.2892549401698992, + "grad_norm": 1.0698570697574992, + "learning_rate": 4.972514818697517e-06, + "loss": 0.4349, + "step": 4750 + }, + { + "epoch": 0.28931583594677707, + "grad_norm": 0.982335570378541, + "learning_rate": 4.972503017563693e-06, + "loss": 0.5261, + "step": 4751 + }, + { + "epoch": 0.28937673172365497, + "grad_norm": 1.0327696798469783, + "learning_rate": 4.972491213910935e-06, + "loss": 0.5493, + "step": 4752 + }, + { + "epoch": 0.2894376275005328, + "grad_norm": 1.051675242759539, + "learning_rate": 4.972479407739254e-06, + "loss": 0.4846, + "step": 4753 + }, + { + "epoch": 0.2894985232774107, + "grad_norm": 1.0031282073542507, + "learning_rate": 4.972467599048661e-06, + "loss": 0.5086, + "step": 4754 + }, + { + "epoch": 0.28955941905428856, + "grad_norm": 1.044137618687895, + "learning_rate": 4.972455787839171e-06, + "loss": 0.4747, + "step": 4755 + }, + { + "epoch": 0.28962031483116646, + "grad_norm": 1.0207176671493368, + "learning_rate": 4.972443974110794e-06, + "loss": 0.4231, + "step": 4756 + }, + { + "epoch": 0.28968121060804436, + "grad_norm": 1.0011586841552058, + "learning_rate": 4.972432157863543e-06, + "loss": 0.495, + "step": 4757 + }, + { + "epoch": 0.2897421063849222, + "grad_norm": 1.0774572599097945, + "learning_rate": 4.9724203390974285e-06, + "loss": 0.4689, + "step": 4758 + }, + { + "epoch": 0.2898030021618001, + "grad_norm": 1.0609840365252408, + "learning_rate": 4.972408517812463e-06, + "loss": 0.4939, + "step": 4759 + }, + { + "epoch": 0.28986389793867795, + "grad_norm": 1.0474812550029045, + "learning_rate": 4.972396694008661e-06, + "loss": 0.4502, + "step": 4760 + }, + { + "epoch": 0.28992479371555585, + "grad_norm": 1.0430997808774765, + "learning_rate": 4.972384867686032e-06, + "loss": 0.4567, + "step": 4761 + }, + { + "epoch": 0.2899856894924337, + "grad_norm": 0.9779573644339782, + "learning_rate": 4.972373038844588e-06, + "loss": 0.4804, + "step": 4762 + }, + { + "epoch": 0.2900465852693116, + "grad_norm": 1.034753223786268, + "learning_rate": 4.972361207484343e-06, + "loss": 0.5065, + "step": 4763 + }, + { + "epoch": 0.29010748104618944, + "grad_norm": 1.0891701448226778, + "learning_rate": 4.972349373605306e-06, + "loss": 0.4558, + "step": 4764 + }, + { + "epoch": 0.29016837682306734, + "grad_norm": 1.0812175562328907, + "learning_rate": 4.9723375372074925e-06, + "loss": 0.4807, + "step": 4765 + }, + { + "epoch": 0.2902292725999452, + "grad_norm": 1.0554342213807921, + "learning_rate": 4.972325698290912e-06, + "loss": 0.4815, + "step": 4766 + }, + { + "epoch": 0.2902901683768231, + "grad_norm": 1.005379516266243, + "learning_rate": 4.972313856855579e-06, + "loss": 0.4711, + "step": 4767 + }, + { + "epoch": 0.29035106415370093, + "grad_norm": 1.00752076055098, + "learning_rate": 4.972302012901502e-06, + "loss": 0.5696, + "step": 4768 + }, + { + "epoch": 0.29041195993057883, + "grad_norm": 1.022231933994263, + "learning_rate": 4.972290166428696e-06, + "loss": 0.5062, + "step": 4769 + }, + { + "epoch": 0.2904728557074567, + "grad_norm": 1.1480401985359985, + "learning_rate": 4.972278317437172e-06, + "loss": 0.5061, + "step": 4770 + }, + { + "epoch": 0.2905337514843346, + "grad_norm": 1.0674872986785338, + "learning_rate": 4.9722664659269425e-06, + "loss": 0.4831, + "step": 4771 + }, + { + "epoch": 0.2905946472612124, + "grad_norm": 0.9593935999866446, + "learning_rate": 4.97225461189802e-06, + "loss": 0.4284, + "step": 4772 + }, + { + "epoch": 0.2906555430380903, + "grad_norm": 1.090204668501177, + "learning_rate": 4.972242755350414e-06, + "loss": 0.4231, + "step": 4773 + }, + { + "epoch": 0.29071643881496817, + "grad_norm": 1.0287395195040085, + "learning_rate": 4.9722308962841404e-06, + "loss": 0.4961, + "step": 4774 + }, + { + "epoch": 0.29077733459184607, + "grad_norm": 1.0340315535984126, + "learning_rate": 4.9722190346992086e-06, + "loss": 0.4385, + "step": 4775 + }, + { + "epoch": 0.2908382303687239, + "grad_norm": 1.0598351307063223, + "learning_rate": 4.972207170595631e-06, + "loss": 0.4274, + "step": 4776 + }, + { + "epoch": 0.2908991261456018, + "grad_norm": 1.0805359941856578, + "learning_rate": 4.9721953039734205e-06, + "loss": 0.4455, + "step": 4777 + }, + { + "epoch": 0.29096002192247966, + "grad_norm": 1.0593504189174017, + "learning_rate": 4.97218343483259e-06, + "loss": 0.5208, + "step": 4778 + }, + { + "epoch": 0.29102091769935756, + "grad_norm": 1.1214931935013783, + "learning_rate": 4.972171563173148e-06, + "loss": 0.4783, + "step": 4779 + }, + { + "epoch": 0.2910818134762354, + "grad_norm": 0.9628611238140816, + "learning_rate": 4.97215968899511e-06, + "loss": 0.4854, + "step": 4780 + }, + { + "epoch": 0.2911427092531133, + "grad_norm": 1.126221575836851, + "learning_rate": 4.972147812298488e-06, + "loss": 0.476, + "step": 4781 + }, + { + "epoch": 0.29120360502999115, + "grad_norm": 1.0135910084410833, + "learning_rate": 4.972135933083292e-06, + "loss": 0.4892, + "step": 4782 + }, + { + "epoch": 0.29126450080686905, + "grad_norm": 1.0229343489482596, + "learning_rate": 4.972124051349536e-06, + "loss": 0.4702, + "step": 4783 + }, + { + "epoch": 0.2913253965837469, + "grad_norm": 1.1424463619979517, + "learning_rate": 4.972112167097232e-06, + "loss": 0.4367, + "step": 4784 + }, + { + "epoch": 0.2913862923606248, + "grad_norm": 0.9914508699565783, + "learning_rate": 4.972100280326391e-06, + "loss": 0.4638, + "step": 4785 + }, + { + "epoch": 0.29144718813750264, + "grad_norm": 1.0171157248670357, + "learning_rate": 4.972088391037025e-06, + "loss": 0.4387, + "step": 4786 + }, + { + "epoch": 0.29150808391438054, + "grad_norm": 0.9784027906863423, + "learning_rate": 4.972076499229147e-06, + "loss": 0.4966, + "step": 4787 + }, + { + "epoch": 0.2915689796912584, + "grad_norm": 1.0055150309818668, + "learning_rate": 4.9720646049027696e-06, + "loss": 0.5309, + "step": 4788 + }, + { + "epoch": 0.2916298754681363, + "grad_norm": 1.0014871072057334, + "learning_rate": 4.972052708057903e-06, + "loss": 0.5574, + "step": 4789 + }, + { + "epoch": 0.29169077124501414, + "grad_norm": 0.995324811172232, + "learning_rate": 4.972040808694562e-06, + "loss": 0.5252, + "step": 4790 + }, + { + "epoch": 0.29175166702189204, + "grad_norm": 1.111665299270616, + "learning_rate": 4.972028906812756e-06, + "loss": 0.5209, + "step": 4791 + }, + { + "epoch": 0.2918125627987699, + "grad_norm": 1.0747105124472116, + "learning_rate": 4.972017002412499e-06, + "loss": 0.5201, + "step": 4792 + }, + { + "epoch": 0.2918734585756478, + "grad_norm": 1.0813534937886593, + "learning_rate": 4.972005095493801e-06, + "loss": 0.4594, + "step": 4793 + }, + { + "epoch": 0.2919343543525256, + "grad_norm": 1.072709981955552, + "learning_rate": 4.971993186056677e-06, + "loss": 0.5882, + "step": 4794 + }, + { + "epoch": 0.2919952501294035, + "grad_norm": 1.0736773102784318, + "learning_rate": 4.971981274101138e-06, + "loss": 0.4497, + "step": 4795 + }, + { + "epoch": 0.2920561459062814, + "grad_norm": 1.0359312270416807, + "learning_rate": 4.971969359627195e-06, + "loss": 0.473, + "step": 4796 + }, + { + "epoch": 0.2921170416831593, + "grad_norm": 1.05184395070069, + "learning_rate": 4.971957442634862e-06, + "loss": 0.4594, + "step": 4797 + }, + { + "epoch": 0.2921779374600372, + "grad_norm": 1.040352866062599, + "learning_rate": 4.9719455231241495e-06, + "loss": 0.4206, + "step": 4798 + }, + { + "epoch": 0.292238833236915, + "grad_norm": 1.0176778505756297, + "learning_rate": 4.97193360109507e-06, + "loss": 0.467, + "step": 4799 + }, + { + "epoch": 0.2922997290137929, + "grad_norm": 1.051365992232664, + "learning_rate": 4.9719216765476365e-06, + "loss": 0.4812, + "step": 4800 + }, + { + "epoch": 0.29236062479067076, + "grad_norm": 1.0416310771766466, + "learning_rate": 4.97190974948186e-06, + "loss": 0.4347, + "step": 4801 + }, + { + "epoch": 0.29242152056754867, + "grad_norm": 1.1363797863306264, + "learning_rate": 4.971897819897754e-06, + "loss": 0.4592, + "step": 4802 + }, + { + "epoch": 0.2924824163444265, + "grad_norm": 1.1133976442531408, + "learning_rate": 4.97188588779533e-06, + "loss": 0.4395, + "step": 4803 + }, + { + "epoch": 0.2925433121213044, + "grad_norm": 0.9874988612944314, + "learning_rate": 4.971873953174599e-06, + "loss": 0.472, + "step": 4804 + }, + { + "epoch": 0.29260420789818226, + "grad_norm": 1.0050341293871587, + "learning_rate": 4.971862016035575e-06, + "loss": 0.5105, + "step": 4805 + }, + { + "epoch": 0.29266510367506016, + "grad_norm": 1.0327202732675025, + "learning_rate": 4.971850076378269e-06, + "loss": 0.5046, + "step": 4806 + }, + { + "epoch": 0.292725999451938, + "grad_norm": 1.07862621025045, + "learning_rate": 4.971838134202693e-06, + "loss": 0.4708, + "step": 4807 + }, + { + "epoch": 0.2927868952288159, + "grad_norm": 1.01748033454078, + "learning_rate": 4.9718261895088606e-06, + "loss": 0.5198, + "step": 4808 + }, + { + "epoch": 0.29284779100569375, + "grad_norm": 1.0667496034583155, + "learning_rate": 4.971814242296783e-06, + "loss": 0.4643, + "step": 4809 + }, + { + "epoch": 0.29290868678257165, + "grad_norm": 1.0902498964003675, + "learning_rate": 4.971802292566473e-06, + "loss": 0.473, + "step": 4810 + }, + { + "epoch": 0.2929695825594495, + "grad_norm": 1.0222849067855517, + "learning_rate": 4.971790340317941e-06, + "loss": 0.4628, + "step": 4811 + }, + { + "epoch": 0.2930304783363274, + "grad_norm": 1.1209966152087487, + "learning_rate": 4.971778385551201e-06, + "loss": 0.4671, + "step": 4812 + }, + { + "epoch": 0.29309137411320524, + "grad_norm": 1.1039957858247014, + "learning_rate": 4.971766428266265e-06, + "loss": 0.457, + "step": 4813 + }, + { + "epoch": 0.29315226989008314, + "grad_norm": 1.052150822024785, + "learning_rate": 4.971754468463143e-06, + "loss": 0.4371, + "step": 4814 + }, + { + "epoch": 0.293213165666961, + "grad_norm": 1.0239998313858532, + "learning_rate": 4.971742506141851e-06, + "loss": 0.4692, + "step": 4815 + }, + { + "epoch": 0.2932740614438389, + "grad_norm": 1.053941936512643, + "learning_rate": 4.971730541302398e-06, + "loss": 0.5143, + "step": 4816 + }, + { + "epoch": 0.29333495722071673, + "grad_norm": 0.9948505417445542, + "learning_rate": 4.971718573944798e-06, + "loss": 0.4417, + "step": 4817 + }, + { + "epoch": 0.29339585299759463, + "grad_norm": 0.9765091291099964, + "learning_rate": 4.971706604069062e-06, + "loss": 0.4976, + "step": 4818 + }, + { + "epoch": 0.2934567487744725, + "grad_norm": 0.9537958749552902, + "learning_rate": 4.971694631675202e-06, + "loss": 0.5069, + "step": 4819 + }, + { + "epoch": 0.2935176445513504, + "grad_norm": 1.06761782677484, + "learning_rate": 4.971682656763232e-06, + "loss": 0.4264, + "step": 4820 + }, + { + "epoch": 0.2935785403282282, + "grad_norm": 1.0297210003996151, + "learning_rate": 4.971670679333163e-06, + "loss": 0.4511, + "step": 4821 + }, + { + "epoch": 0.2936394361051061, + "grad_norm": 1.0394078793851205, + "learning_rate": 4.9716586993850065e-06, + "loss": 0.5085, + "step": 4822 + }, + { + "epoch": 0.29370033188198397, + "grad_norm": 1.075192650029821, + "learning_rate": 4.971646716918776e-06, + "loss": 0.4515, + "step": 4823 + }, + { + "epoch": 0.29376122765886187, + "grad_norm": 1.1831655892858455, + "learning_rate": 4.971634731934484e-06, + "loss": 0.4438, + "step": 4824 + }, + { + "epoch": 0.2938221234357397, + "grad_norm": 1.1648850444769714, + "learning_rate": 4.9716227444321404e-06, + "loss": 0.536, + "step": 4825 + }, + { + "epoch": 0.2938830192126176, + "grad_norm": 1.05762962989318, + "learning_rate": 4.97161075441176e-06, + "loss": 0.4844, + "step": 4826 + }, + { + "epoch": 0.29394391498949546, + "grad_norm": 1.0995277772389986, + "learning_rate": 4.9715987618733545e-06, + "loss": 0.4554, + "step": 4827 + }, + { + "epoch": 0.29400481076637336, + "grad_norm": 0.9919672859080824, + "learning_rate": 4.971586766816934e-06, + "loss": 0.5282, + "step": 4828 + }, + { + "epoch": 0.2940657065432512, + "grad_norm": 1.0079271330534496, + "learning_rate": 4.971574769242513e-06, + "loss": 0.4553, + "step": 4829 + }, + { + "epoch": 0.2941266023201291, + "grad_norm": 1.0718786386302184, + "learning_rate": 4.9715627691501035e-06, + "loss": 0.496, + "step": 4830 + }, + { + "epoch": 0.29418749809700695, + "grad_norm": 1.0672204782586219, + "learning_rate": 4.971550766539717e-06, + "loss": 0.483, + "step": 4831 + }, + { + "epoch": 0.29424839387388485, + "grad_norm": 0.9015131557870956, + "learning_rate": 4.971538761411365e-06, + "loss": 0.5412, + "step": 4832 + }, + { + "epoch": 0.2943092896507627, + "grad_norm": 0.9246459182538004, + "learning_rate": 4.971526753765063e-06, + "loss": 0.5131, + "step": 4833 + }, + { + "epoch": 0.2943701854276406, + "grad_norm": 1.1056043076485484, + "learning_rate": 4.971514743600818e-06, + "loss": 0.4715, + "step": 4834 + }, + { + "epoch": 0.29443108120451844, + "grad_norm": 1.0393158368332567, + "learning_rate": 4.971502730918647e-06, + "loss": 0.5302, + "step": 4835 + }, + { + "epoch": 0.29449197698139634, + "grad_norm": 1.0384521124702424, + "learning_rate": 4.97149071571856e-06, + "loss": 0.4186, + "step": 4836 + }, + { + "epoch": 0.2945528727582742, + "grad_norm": 1.0757410077415215, + "learning_rate": 4.971478698000569e-06, + "loss": 0.498, + "step": 4837 + }, + { + "epoch": 0.2946137685351521, + "grad_norm": 1.0723112617625008, + "learning_rate": 4.971466677764688e-06, + "loss": 0.5109, + "step": 4838 + }, + { + "epoch": 0.29467466431203, + "grad_norm": 1.0252239066869873, + "learning_rate": 4.971454655010928e-06, + "loss": 0.5318, + "step": 4839 + }, + { + "epoch": 0.29473556008890783, + "grad_norm": 1.1389200308018306, + "learning_rate": 4.9714426297393e-06, + "loss": 0.4988, + "step": 4840 + }, + { + "epoch": 0.29479645586578573, + "grad_norm": 1.0016745137239622, + "learning_rate": 4.9714306019498194e-06, + "loss": 0.5087, + "step": 4841 + }, + { + "epoch": 0.2948573516426636, + "grad_norm": 1.107955912530081, + "learning_rate": 4.971418571642496e-06, + "loss": 0.4666, + "step": 4842 + }, + { + "epoch": 0.2949182474195415, + "grad_norm": 1.0156062092705862, + "learning_rate": 4.971406538817343e-06, + "loss": 0.5455, + "step": 4843 + }, + { + "epoch": 0.2949791431964193, + "grad_norm": 0.9823986876375775, + "learning_rate": 4.971394503474372e-06, + "loss": 0.4908, + "step": 4844 + }, + { + "epoch": 0.2950400389732972, + "grad_norm": 1.0844771717356114, + "learning_rate": 4.971382465613595e-06, + "loss": 0.5026, + "step": 4845 + }, + { + "epoch": 0.29510093475017507, + "grad_norm": 0.9932265481016237, + "learning_rate": 4.9713704252350255e-06, + "loss": 0.4848, + "step": 4846 + }, + { + "epoch": 0.29516183052705297, + "grad_norm": 0.9229270490735245, + "learning_rate": 4.9713583823386755e-06, + "loss": 0.4561, + "step": 4847 + }, + { + "epoch": 0.2952227263039308, + "grad_norm": 1.0480023672081658, + "learning_rate": 4.971346336924557e-06, + "loss": 0.4709, + "step": 4848 + }, + { + "epoch": 0.2952836220808087, + "grad_norm": 1.1553046809451752, + "learning_rate": 4.971334288992682e-06, + "loss": 0.4506, + "step": 4849 + }, + { + "epoch": 0.29534451785768656, + "grad_norm": 1.0235689628937377, + "learning_rate": 4.971322238543063e-06, + "loss": 0.4662, + "step": 4850 + }, + { + "epoch": 0.29540541363456446, + "grad_norm": 1.0030682011168108, + "learning_rate": 4.971310185575712e-06, + "loss": 0.5229, + "step": 4851 + }, + { + "epoch": 0.2954663094114423, + "grad_norm": 0.9466202229837539, + "learning_rate": 4.971298130090642e-06, + "loss": 0.4742, + "step": 4852 + }, + { + "epoch": 0.2955272051883202, + "grad_norm": 1.0174807641639394, + "learning_rate": 4.971286072087865e-06, + "loss": 0.4863, + "step": 4853 + }, + { + "epoch": 0.29558810096519805, + "grad_norm": 1.0266351219169283, + "learning_rate": 4.971274011567393e-06, + "loss": 0.5345, + "step": 4854 + }, + { + "epoch": 0.29564899674207595, + "grad_norm": 0.9985926109621863, + "learning_rate": 4.971261948529238e-06, + "loss": 0.4463, + "step": 4855 + }, + { + "epoch": 0.2957098925189538, + "grad_norm": 1.085510585632155, + "learning_rate": 4.971249882973413e-06, + "loss": 0.4967, + "step": 4856 + }, + { + "epoch": 0.2957707882958317, + "grad_norm": 1.0932477969532728, + "learning_rate": 4.971237814899929e-06, + "loss": 0.4515, + "step": 4857 + }, + { + "epoch": 0.29583168407270954, + "grad_norm": 1.0030162121491937, + "learning_rate": 4.971225744308801e-06, + "loss": 0.4642, + "step": 4858 + }, + { + "epoch": 0.29589257984958744, + "grad_norm": 1.009744697227879, + "learning_rate": 4.971213671200039e-06, + "loss": 0.5239, + "step": 4859 + }, + { + "epoch": 0.2959534756264653, + "grad_norm": 1.0471457183020163, + "learning_rate": 4.9712015955736555e-06, + "loss": 0.4552, + "step": 4860 + }, + { + "epoch": 0.2960143714033432, + "grad_norm": 1.0497935863716459, + "learning_rate": 4.971189517429663e-06, + "loss": 0.569, + "step": 4861 + }, + { + "epoch": 0.29607526718022104, + "grad_norm": 1.007044464279966, + "learning_rate": 4.971177436768076e-06, + "loss": 0.4451, + "step": 4862 + }, + { + "epoch": 0.29613616295709894, + "grad_norm": 0.9890551267158382, + "learning_rate": 4.971165353588903e-06, + "loss": 0.5338, + "step": 4863 + }, + { + "epoch": 0.2961970587339768, + "grad_norm": 0.9431820076291278, + "learning_rate": 4.971153267892158e-06, + "loss": 0.5514, + "step": 4864 + }, + { + "epoch": 0.2962579545108547, + "grad_norm": 1.033908603432793, + "learning_rate": 4.9711411796778535e-06, + "loss": 0.5008, + "step": 4865 + }, + { + "epoch": 0.2963188502877325, + "grad_norm": 0.9447531498033565, + "learning_rate": 4.971129088946002e-06, + "loss": 0.5244, + "step": 4866 + }, + { + "epoch": 0.2963797460646104, + "grad_norm": 1.061203322664377, + "learning_rate": 4.971116995696616e-06, + "loss": 0.4973, + "step": 4867 + }, + { + "epoch": 0.2964406418414883, + "grad_norm": 0.9902563421174313, + "learning_rate": 4.9711048999297066e-06, + "loss": 0.4634, + "step": 4868 + }, + { + "epoch": 0.2965015376183662, + "grad_norm": 1.0125656064119415, + "learning_rate": 4.971092801645288e-06, + "loss": 0.4714, + "step": 4869 + }, + { + "epoch": 0.296562433395244, + "grad_norm": 1.0300337139781535, + "learning_rate": 4.97108070084337e-06, + "loss": 0.4046, + "step": 4870 + }, + { + "epoch": 0.2966233291721219, + "grad_norm": 1.04022587714178, + "learning_rate": 4.971068597523967e-06, + "loss": 0.458, + "step": 4871 + }, + { + "epoch": 0.29668422494899976, + "grad_norm": 1.0289249325119165, + "learning_rate": 4.971056491687091e-06, + "loss": 0.5, + "step": 4872 + }, + { + "epoch": 0.29674512072587766, + "grad_norm": 1.102774062818966, + "learning_rate": 4.971044383332754e-06, + "loss": 0.4491, + "step": 4873 + }, + { + "epoch": 0.2968060165027555, + "grad_norm": 1.115079665441173, + "learning_rate": 4.971032272460968e-06, + "loss": 0.4792, + "step": 4874 + }, + { + "epoch": 0.2968669122796334, + "grad_norm": 0.9911162031642672, + "learning_rate": 4.971020159071746e-06, + "loss": 0.4984, + "step": 4875 + }, + { + "epoch": 0.29692780805651126, + "grad_norm": 1.0555939149805413, + "learning_rate": 4.971008043165099e-06, + "loss": 0.4247, + "step": 4876 + }, + { + "epoch": 0.29698870383338916, + "grad_norm": 0.8867457640478145, + "learning_rate": 4.9709959247410416e-06, + "loss": 0.5644, + "step": 4877 + }, + { + "epoch": 0.297049599610267, + "grad_norm": 1.0220004366799313, + "learning_rate": 4.970983803799585e-06, + "loss": 0.5971, + "step": 4878 + }, + { + "epoch": 0.2971104953871449, + "grad_norm": 1.029484631742006, + "learning_rate": 4.97097168034074e-06, + "loss": 0.424, + "step": 4879 + }, + { + "epoch": 0.2971713911640228, + "grad_norm": 1.090916531850651, + "learning_rate": 4.9709595543645214e-06, + "loss": 0.4751, + "step": 4880 + }, + { + "epoch": 0.29723228694090065, + "grad_norm": 1.0629882021911303, + "learning_rate": 4.970947425870941e-06, + "loss": 0.4762, + "step": 4881 + }, + { + "epoch": 0.29729318271777855, + "grad_norm": 1.0265015947012308, + "learning_rate": 4.970935294860011e-06, + "loss": 0.4908, + "step": 4882 + }, + { + "epoch": 0.2973540784946564, + "grad_norm": 1.0461443775660983, + "learning_rate": 4.970923161331742e-06, + "loss": 0.4458, + "step": 4883 + }, + { + "epoch": 0.2974149742715343, + "grad_norm": 1.131969517283659, + "learning_rate": 4.9709110252861495e-06, + "loss": 0.4508, + "step": 4884 + }, + { + "epoch": 0.29747587004841214, + "grad_norm": 1.0998958444008191, + "learning_rate": 4.970898886723243e-06, + "loss": 0.4972, + "step": 4885 + }, + { + "epoch": 0.29753676582529004, + "grad_norm": 1.1282203045889958, + "learning_rate": 4.970886745643037e-06, + "loss": 0.4492, + "step": 4886 + }, + { + "epoch": 0.2975976616021679, + "grad_norm": 1.1454009550033348, + "learning_rate": 4.9708746020455425e-06, + "loss": 0.4458, + "step": 4887 + }, + { + "epoch": 0.2976585573790458, + "grad_norm": 1.0536151385086105, + "learning_rate": 4.9708624559307715e-06, + "loss": 0.4983, + "step": 4888 + }, + { + "epoch": 0.29771945315592363, + "grad_norm": 1.0874530053775349, + "learning_rate": 4.970850307298739e-06, + "loss": 0.4378, + "step": 4889 + }, + { + "epoch": 0.29778034893280153, + "grad_norm": 0.9943597894730497, + "learning_rate": 4.970838156149454e-06, + "loss": 0.4896, + "step": 4890 + }, + { + "epoch": 0.2978412447096794, + "grad_norm": 1.007813531665907, + "learning_rate": 4.970826002482932e-06, + "loss": 0.4563, + "step": 4891 + }, + { + "epoch": 0.2979021404865573, + "grad_norm": 1.086224912871932, + "learning_rate": 4.970813846299182e-06, + "loss": 0.4488, + "step": 4892 + }, + { + "epoch": 0.2979630362634351, + "grad_norm": 1.0928734238732427, + "learning_rate": 4.970801687598219e-06, + "loss": 0.5447, + "step": 4893 + }, + { + "epoch": 0.298023932040313, + "grad_norm": 0.9948436134611929, + "learning_rate": 4.970789526380055e-06, + "loss": 0.5704, + "step": 4894 + }, + { + "epoch": 0.29808482781719087, + "grad_norm": 1.0808752379975455, + "learning_rate": 4.970777362644701e-06, + "loss": 0.4692, + "step": 4895 + }, + { + "epoch": 0.29814572359406877, + "grad_norm": 1.077961080737291, + "learning_rate": 4.9707651963921715e-06, + "loss": 0.4707, + "step": 4896 + }, + { + "epoch": 0.2982066193709466, + "grad_norm": 1.113027361491597, + "learning_rate": 4.970753027622478e-06, + "loss": 0.4322, + "step": 4897 + }, + { + "epoch": 0.2982675151478245, + "grad_norm": 0.9511885397633729, + "learning_rate": 4.970740856335632e-06, + "loss": 0.5285, + "step": 4898 + }, + { + "epoch": 0.29832841092470236, + "grad_norm": 0.9412862006638815, + "learning_rate": 4.9707286825316455e-06, + "loss": 0.5816, + "step": 4899 + }, + { + "epoch": 0.29838930670158026, + "grad_norm": 1.0745105592253101, + "learning_rate": 4.970716506210534e-06, + "loss": 0.3787, + "step": 4900 + }, + { + "epoch": 0.2984502024784581, + "grad_norm": 1.0376669266079135, + "learning_rate": 4.970704327372306e-06, + "loss": 0.4825, + "step": 4901 + }, + { + "epoch": 0.298511098255336, + "grad_norm": 0.9746671124778157, + "learning_rate": 4.970692146016978e-06, + "loss": 0.462, + "step": 4902 + }, + { + "epoch": 0.29857199403221385, + "grad_norm": 1.064091446651441, + "learning_rate": 4.970679962144559e-06, + "loss": 0.5118, + "step": 4903 + }, + { + "epoch": 0.29863288980909175, + "grad_norm": 1.0020884533476857, + "learning_rate": 4.970667775755062e-06, + "loss": 0.4327, + "step": 4904 + }, + { + "epoch": 0.2986937855859696, + "grad_norm": 1.1060257797771755, + "learning_rate": 4.970655586848501e-06, + "loss": 0.485, + "step": 4905 + }, + { + "epoch": 0.2987546813628475, + "grad_norm": 1.0354379751158758, + "learning_rate": 4.970643395424887e-06, + "loss": 0.5292, + "step": 4906 + }, + { + "epoch": 0.29881557713972534, + "grad_norm": 0.9672198935409871, + "learning_rate": 4.970631201484233e-06, + "loss": 0.5057, + "step": 4907 + }, + { + "epoch": 0.29887647291660324, + "grad_norm": 1.025190500280572, + "learning_rate": 4.970619005026552e-06, + "loss": 0.4334, + "step": 4908 + }, + { + "epoch": 0.2989373686934811, + "grad_norm": 0.9544513997733172, + "learning_rate": 4.970606806051855e-06, + "loss": 0.5025, + "step": 4909 + }, + { + "epoch": 0.298998264470359, + "grad_norm": 1.0971734322360107, + "learning_rate": 4.9705946045601545e-06, + "loss": 0.4755, + "step": 4910 + }, + { + "epoch": 0.29905916024723683, + "grad_norm": 1.0089385376374211, + "learning_rate": 4.970582400551465e-06, + "loss": 0.4129, + "step": 4911 + }, + { + "epoch": 0.29912005602411473, + "grad_norm": 0.9970472842722582, + "learning_rate": 4.970570194025797e-06, + "loss": 0.544, + "step": 4912 + }, + { + "epoch": 0.2991809518009926, + "grad_norm": 0.9367809987966663, + "learning_rate": 4.9705579849831625e-06, + "loss": 0.5503, + "step": 4913 + }, + { + "epoch": 0.2992418475778705, + "grad_norm": 0.9665656727969053, + "learning_rate": 4.970545773423576e-06, + "loss": 0.5635, + "step": 4914 + }, + { + "epoch": 0.2993027433547483, + "grad_norm": 1.0116630551454882, + "learning_rate": 4.970533559347048e-06, + "loss": 0.537, + "step": 4915 + }, + { + "epoch": 0.2993636391316262, + "grad_norm": 1.0811467475488183, + "learning_rate": 4.970521342753592e-06, + "loss": 0.4906, + "step": 4916 + }, + { + "epoch": 0.29942453490850407, + "grad_norm": 1.158031940201155, + "learning_rate": 4.970509123643221e-06, + "loss": 0.4508, + "step": 4917 + }, + { + "epoch": 0.29948543068538197, + "grad_norm": 1.0032629738709806, + "learning_rate": 4.970496902015946e-06, + "loss": 0.5344, + "step": 4918 + }, + { + "epoch": 0.2995463264622598, + "grad_norm": 1.0441834257734484, + "learning_rate": 4.97048467787178e-06, + "loss": 0.5096, + "step": 4919 + }, + { + "epoch": 0.2996072222391377, + "grad_norm": 0.9461991098391789, + "learning_rate": 4.970472451210736e-06, + "loss": 0.4882, + "step": 4920 + }, + { + "epoch": 0.2996681180160156, + "grad_norm": 1.041258229816044, + "learning_rate": 4.970460222032826e-06, + "loss": 0.4444, + "step": 4921 + }, + { + "epoch": 0.29972901379289346, + "grad_norm": 1.0304099557873891, + "learning_rate": 4.970447990338062e-06, + "loss": 0.4045, + "step": 4922 + }, + { + "epoch": 0.29978990956977136, + "grad_norm": 1.0571806200448772, + "learning_rate": 4.970435756126457e-06, + "loss": 0.5022, + "step": 4923 + }, + { + "epoch": 0.2998508053466492, + "grad_norm": 0.9922260081628608, + "learning_rate": 4.970423519398024e-06, + "loss": 0.4612, + "step": 4924 + }, + { + "epoch": 0.2999117011235271, + "grad_norm": 0.930098604083178, + "learning_rate": 4.970411280152775e-06, + "loss": 0.4313, + "step": 4925 + }, + { + "epoch": 0.29997259690040495, + "grad_norm": 0.9896062587220559, + "learning_rate": 4.970399038390722e-06, + "loss": 0.4939, + "step": 4926 + }, + { + "epoch": 0.30003349267728285, + "grad_norm": 1.0116079342050286, + "learning_rate": 4.970386794111878e-06, + "loss": 0.521, + "step": 4927 + }, + { + "epoch": 0.3000943884541607, + "grad_norm": 1.0741947807389676, + "learning_rate": 4.970374547316255e-06, + "loss": 0.3916, + "step": 4928 + }, + { + "epoch": 0.3001552842310386, + "grad_norm": 1.0367597529218504, + "learning_rate": 4.970362298003866e-06, + "loss": 0.5125, + "step": 4929 + }, + { + "epoch": 0.30021618000791644, + "grad_norm": 0.996159428712316, + "learning_rate": 4.970350046174722e-06, + "loss": 0.5217, + "step": 4930 + }, + { + "epoch": 0.30027707578479435, + "grad_norm": 1.0363403184846296, + "learning_rate": 4.970337791828838e-06, + "loss": 0.4369, + "step": 4931 + }, + { + "epoch": 0.3003379715616722, + "grad_norm": 1.0768081556048565, + "learning_rate": 4.970325534966225e-06, + "loss": 0.4493, + "step": 4932 + }, + { + "epoch": 0.3003988673385501, + "grad_norm": 1.0026265013282036, + "learning_rate": 4.970313275586896e-06, + "loss": 0.5137, + "step": 4933 + }, + { + "epoch": 0.30045976311542794, + "grad_norm": 1.0987459436003237, + "learning_rate": 4.970301013690863e-06, + "loss": 0.5247, + "step": 4934 + }, + { + "epoch": 0.30052065889230584, + "grad_norm": 1.0482526571114168, + "learning_rate": 4.970288749278138e-06, + "loss": 0.4666, + "step": 4935 + }, + { + "epoch": 0.3005815546691837, + "grad_norm": 1.0986605778996261, + "learning_rate": 4.970276482348735e-06, + "loss": 0.4672, + "step": 4936 + }, + { + "epoch": 0.3006424504460616, + "grad_norm": 0.9700473276413938, + "learning_rate": 4.970264212902666e-06, + "loss": 0.4612, + "step": 4937 + }, + { + "epoch": 0.3007033462229394, + "grad_norm": 0.9351974785949779, + "learning_rate": 4.9702519409399425e-06, + "loss": 0.5214, + "step": 4938 + }, + { + "epoch": 0.30076424199981733, + "grad_norm": 1.0746187056870868, + "learning_rate": 4.9702396664605775e-06, + "loss": 0.4145, + "step": 4939 + }, + { + "epoch": 0.3008251377766952, + "grad_norm": 0.9674212275122395, + "learning_rate": 4.9702273894645845e-06, + "loss": 0.4984, + "step": 4940 + }, + { + "epoch": 0.3008860335535731, + "grad_norm": 0.9683697142512124, + "learning_rate": 4.970215109951974e-06, + "loss": 0.381, + "step": 4941 + }, + { + "epoch": 0.3009469293304509, + "grad_norm": 1.092221477305765, + "learning_rate": 4.970202827922761e-06, + "loss": 0.423, + "step": 4942 + }, + { + "epoch": 0.3010078251073288, + "grad_norm": 1.0743770050663917, + "learning_rate": 4.970190543376956e-06, + "loss": 0.4397, + "step": 4943 + }, + { + "epoch": 0.30106872088420666, + "grad_norm": 1.0914523101053157, + "learning_rate": 4.970178256314572e-06, + "loss": 0.4852, + "step": 4944 + }, + { + "epoch": 0.30112961666108456, + "grad_norm": 1.1043565913553344, + "learning_rate": 4.9701659667356226e-06, + "loss": 0.5266, + "step": 4945 + }, + { + "epoch": 0.3011905124379624, + "grad_norm": 0.9871830260563703, + "learning_rate": 4.9701536746401195e-06, + "loss": 0.5606, + "step": 4946 + }, + { + "epoch": 0.3012514082148403, + "grad_norm": 1.0222764072237511, + "learning_rate": 4.970141380028074e-06, + "loss": 0.4598, + "step": 4947 + }, + { + "epoch": 0.30131230399171816, + "grad_norm": 1.0538527754215283, + "learning_rate": 4.9701290828995e-06, + "loss": 0.4639, + "step": 4948 + }, + { + "epoch": 0.30137319976859606, + "grad_norm": 1.0739045959355669, + "learning_rate": 4.97011678325441e-06, + "loss": 0.5091, + "step": 4949 + }, + { + "epoch": 0.3014340955454739, + "grad_norm": 1.1058232656757216, + "learning_rate": 4.9701044810928176e-06, + "loss": 0.5602, + "step": 4950 + }, + { + "epoch": 0.3014949913223518, + "grad_norm": 1.0530086276240438, + "learning_rate": 4.970092176414733e-06, + "loss": 0.4823, + "step": 4951 + }, + { + "epoch": 0.30155588709922965, + "grad_norm": 0.9372584514495159, + "learning_rate": 4.97007986922017e-06, + "loss": 0.5124, + "step": 4952 + }, + { + "epoch": 0.30161678287610755, + "grad_norm": 1.0559995320259994, + "learning_rate": 4.970067559509141e-06, + "loss": 0.4647, + "step": 4953 + }, + { + "epoch": 0.3016776786529854, + "grad_norm": 1.0619723836633261, + "learning_rate": 4.970055247281659e-06, + "loss": 0.4502, + "step": 4954 + }, + { + "epoch": 0.3017385744298633, + "grad_norm": 1.024638769021098, + "learning_rate": 4.970042932537736e-06, + "loss": 0.4919, + "step": 4955 + }, + { + "epoch": 0.30179947020674114, + "grad_norm": 0.9967199226849933, + "learning_rate": 4.9700306152773834e-06, + "loss": 0.4502, + "step": 4956 + }, + { + "epoch": 0.30186036598361904, + "grad_norm": 1.0785394208417376, + "learning_rate": 4.970018295500616e-06, + "loss": 0.4564, + "step": 4957 + }, + { + "epoch": 0.3019212617604969, + "grad_norm": 1.000096409171083, + "learning_rate": 4.970005973207445e-06, + "loss": 0.5181, + "step": 4958 + }, + { + "epoch": 0.3019821575373748, + "grad_norm": 0.9958971917404045, + "learning_rate": 4.969993648397883e-06, + "loss": 0.5094, + "step": 4959 + }, + { + "epoch": 0.30204305331425263, + "grad_norm": 0.9940823611028659, + "learning_rate": 4.969981321071944e-06, + "loss": 0.4417, + "step": 4960 + }, + { + "epoch": 0.30210394909113053, + "grad_norm": 1.0217535981750054, + "learning_rate": 4.969968991229638e-06, + "loss": 0.5152, + "step": 4961 + }, + { + "epoch": 0.30216484486800843, + "grad_norm": 0.9855225430336216, + "learning_rate": 4.969956658870979e-06, + "loss": 0.5403, + "step": 4962 + }, + { + "epoch": 0.3022257406448863, + "grad_norm": 0.9569724378388705, + "learning_rate": 4.969944323995981e-06, + "loss": 0.5128, + "step": 4963 + }, + { + "epoch": 0.3022866364217642, + "grad_norm": 0.9639554113376013, + "learning_rate": 4.969931986604654e-06, + "loss": 0.5367, + "step": 4964 + }, + { + "epoch": 0.302347532198642, + "grad_norm": 1.0579788801911894, + "learning_rate": 4.9699196466970115e-06, + "loss": 0.4801, + "step": 4965 + }, + { + "epoch": 0.3024084279755199, + "grad_norm": 1.00629867912399, + "learning_rate": 4.969907304273066e-06, + "loss": 0.4452, + "step": 4966 + }, + { + "epoch": 0.30246932375239777, + "grad_norm": 1.0670320992625448, + "learning_rate": 4.969894959332832e-06, + "loss": 0.4884, + "step": 4967 + }, + { + "epoch": 0.30253021952927567, + "grad_norm": 0.9542363826120015, + "learning_rate": 4.9698826118763184e-06, + "loss": 0.5613, + "step": 4968 + }, + { + "epoch": 0.3025911153061535, + "grad_norm": 1.115617210135161, + "learning_rate": 4.96987026190354e-06, + "loss": 0.5094, + "step": 4969 + }, + { + "epoch": 0.3026520110830314, + "grad_norm": 1.1117591512215368, + "learning_rate": 4.96985790941451e-06, + "loss": 0.4503, + "step": 4970 + }, + { + "epoch": 0.30271290685990926, + "grad_norm": 1.0806770180372833, + "learning_rate": 4.96984555440924e-06, + "loss": 0.4749, + "step": 4971 + }, + { + "epoch": 0.30277380263678716, + "grad_norm": 1.0081326013919, + "learning_rate": 4.969833196887742e-06, + "loss": 0.4936, + "step": 4972 + }, + { + "epoch": 0.302834698413665, + "grad_norm": 1.0186379491389177, + "learning_rate": 4.96982083685003e-06, + "loss": 0.5091, + "step": 4973 + }, + { + "epoch": 0.3028955941905429, + "grad_norm": 1.100908921332688, + "learning_rate": 4.969808474296115e-06, + "loss": 0.4479, + "step": 4974 + }, + { + "epoch": 0.30295648996742075, + "grad_norm": 1.07350988141062, + "learning_rate": 4.969796109226012e-06, + "loss": 0.4445, + "step": 4975 + }, + { + "epoch": 0.30301738574429865, + "grad_norm": 0.9530436938468588, + "learning_rate": 4.9697837416397306e-06, + "loss": 0.5198, + "step": 4976 + }, + { + "epoch": 0.3030782815211765, + "grad_norm": 0.9945665592491477, + "learning_rate": 4.9697713715372856e-06, + "loss": 0.4591, + "step": 4977 + }, + { + "epoch": 0.3031391772980544, + "grad_norm": 1.0053297070215237, + "learning_rate": 4.969758998918689e-06, + "loss": 0.4552, + "step": 4978 + }, + { + "epoch": 0.30320007307493224, + "grad_norm": 1.0870505843231055, + "learning_rate": 4.969746623783952e-06, + "loss": 0.4596, + "step": 4979 + }, + { + "epoch": 0.30326096885181014, + "grad_norm": 1.0396522444392808, + "learning_rate": 4.96973424613309e-06, + "loss": 0.4567, + "step": 4980 + }, + { + "epoch": 0.303321864628688, + "grad_norm": 1.0052590566589814, + "learning_rate": 4.969721865966114e-06, + "loss": 0.4319, + "step": 4981 + }, + { + "epoch": 0.3033827604055659, + "grad_norm": 0.9317154800671471, + "learning_rate": 4.969709483283036e-06, + "loss": 0.4798, + "step": 4982 + }, + { + "epoch": 0.30344365618244373, + "grad_norm": 1.1220444538016756, + "learning_rate": 4.969697098083869e-06, + "loss": 0.4959, + "step": 4983 + }, + { + "epoch": 0.30350455195932163, + "grad_norm": 1.0554496777567757, + "learning_rate": 4.969684710368627e-06, + "loss": 0.4815, + "step": 4984 + }, + { + "epoch": 0.3035654477361995, + "grad_norm": 1.056794190099181, + "learning_rate": 4.9696723201373206e-06, + "loss": 0.4307, + "step": 4985 + }, + { + "epoch": 0.3036263435130774, + "grad_norm": 1.1441655192539453, + "learning_rate": 4.969659927389965e-06, + "loss": 0.4838, + "step": 4986 + }, + { + "epoch": 0.3036872392899552, + "grad_norm": 1.0537504840483045, + "learning_rate": 4.9696475321265695e-06, + "loss": 0.4837, + "step": 4987 + }, + { + "epoch": 0.3037481350668331, + "grad_norm": 1.1890437172975241, + "learning_rate": 4.969635134347149e-06, + "loss": 0.5165, + "step": 4988 + }, + { + "epoch": 0.30380903084371097, + "grad_norm": 1.1490539826670503, + "learning_rate": 4.969622734051716e-06, + "loss": 0.4706, + "step": 4989 + }, + { + "epoch": 0.30386992662058887, + "grad_norm": 0.9563754614759905, + "learning_rate": 4.969610331240282e-06, + "loss": 0.4848, + "step": 4990 + }, + { + "epoch": 0.3039308223974667, + "grad_norm": 1.0951977915389661, + "learning_rate": 4.96959792591286e-06, + "loss": 0.4497, + "step": 4991 + }, + { + "epoch": 0.3039917181743446, + "grad_norm": 1.0589506197677945, + "learning_rate": 4.969585518069464e-06, + "loss": 0.4564, + "step": 4992 + }, + { + "epoch": 0.30405261395122246, + "grad_norm": 1.0671811817128787, + "learning_rate": 4.969573107710105e-06, + "loss": 0.461, + "step": 4993 + }, + { + "epoch": 0.30411350972810036, + "grad_norm": 1.1065230531020118, + "learning_rate": 4.9695606948347965e-06, + "loss": 0.447, + "step": 4994 + }, + { + "epoch": 0.3041744055049782, + "grad_norm": 0.9726336567875957, + "learning_rate": 4.969548279443551e-06, + "loss": 0.5581, + "step": 4995 + }, + { + "epoch": 0.3042353012818561, + "grad_norm": 1.0260527294191515, + "learning_rate": 4.969535861536381e-06, + "loss": 0.4652, + "step": 4996 + }, + { + "epoch": 0.30429619705873395, + "grad_norm": 1.0108517557901224, + "learning_rate": 4.969523441113299e-06, + "loss": 0.4707, + "step": 4997 + }, + { + "epoch": 0.30435709283561185, + "grad_norm": 1.0445585005639557, + "learning_rate": 4.969511018174318e-06, + "loss": 0.4344, + "step": 4998 + }, + { + "epoch": 0.3044179886124897, + "grad_norm": 1.0873952703230902, + "learning_rate": 4.969498592719451e-06, + "loss": 0.5257, + "step": 4999 + }, + { + "epoch": 0.3044788843893676, + "grad_norm": 1.1057028032311849, + "learning_rate": 4.969486164748709e-06, + "loss": 0.4188, + "step": 5000 + }, + { + "epoch": 0.30453978016624544, + "grad_norm": 1.0201680373179371, + "learning_rate": 4.969473734262107e-06, + "loss": 0.5477, + "step": 5001 + }, + { + "epoch": 0.30460067594312334, + "grad_norm": 1.1364511722538246, + "learning_rate": 4.969461301259656e-06, + "loss": 0.4366, + "step": 5002 + }, + { + "epoch": 0.30466157172000125, + "grad_norm": 0.9807959708660955, + "learning_rate": 4.96944886574137e-06, + "loss": 0.4909, + "step": 5003 + }, + { + "epoch": 0.3047224674968791, + "grad_norm": 1.1101296552951776, + "learning_rate": 4.969436427707259e-06, + "loss": 0.4691, + "step": 5004 + }, + { + "epoch": 0.304783363273757, + "grad_norm": 0.9424665522338885, + "learning_rate": 4.969423987157339e-06, + "loss": 0.6356, + "step": 5005 + }, + { + "epoch": 0.30484425905063484, + "grad_norm": 1.0166249668896945, + "learning_rate": 4.969411544091621e-06, + "loss": 0.5134, + "step": 5006 + }, + { + "epoch": 0.30490515482751274, + "grad_norm": 1.1648069619512398, + "learning_rate": 4.969399098510117e-06, + "loss": 0.4889, + "step": 5007 + }, + { + "epoch": 0.3049660506043906, + "grad_norm": 0.9554607090158274, + "learning_rate": 4.969386650412842e-06, + "loss": 0.5166, + "step": 5008 + }, + { + "epoch": 0.3050269463812685, + "grad_norm": 1.1020627120608435, + "learning_rate": 4.969374199799806e-06, + "loss": 0.4572, + "step": 5009 + }, + { + "epoch": 0.3050878421581463, + "grad_norm": 0.9868726583114441, + "learning_rate": 4.969361746671023e-06, + "loss": 0.4702, + "step": 5010 + }, + { + "epoch": 0.30514873793502423, + "grad_norm": 0.9377812862188304, + "learning_rate": 4.969349291026506e-06, + "loss": 0.5252, + "step": 5011 + }, + { + "epoch": 0.3052096337119021, + "grad_norm": 1.0536112229525696, + "learning_rate": 4.969336832866267e-06, + "loss": 0.5047, + "step": 5012 + }, + { + "epoch": 0.30527052948878, + "grad_norm": 0.9655928116633052, + "learning_rate": 4.96932437219032e-06, + "loss": 0.5467, + "step": 5013 + }, + { + "epoch": 0.3053314252656578, + "grad_norm": 1.070286254947455, + "learning_rate": 4.969311908998675e-06, + "loss": 0.4305, + "step": 5014 + }, + { + "epoch": 0.3053923210425357, + "grad_norm": 1.0855605670040693, + "learning_rate": 4.969299443291347e-06, + "loss": 0.4247, + "step": 5015 + }, + { + "epoch": 0.30545321681941356, + "grad_norm": 1.1361712417855092, + "learning_rate": 4.969286975068348e-06, + "loss": 0.4928, + "step": 5016 + }, + { + "epoch": 0.30551411259629146, + "grad_norm": 0.9662548616404487, + "learning_rate": 4.969274504329691e-06, + "loss": 0.6052, + "step": 5017 + }, + { + "epoch": 0.3055750083731693, + "grad_norm": 1.0053558559868432, + "learning_rate": 4.969262031075389e-06, + "loss": 0.4718, + "step": 5018 + }, + { + "epoch": 0.3056359041500472, + "grad_norm": 1.2701794717639687, + "learning_rate": 4.969249555305453e-06, + "loss": 0.387, + "step": 5019 + }, + { + "epoch": 0.30569679992692506, + "grad_norm": 1.061238983425306, + "learning_rate": 4.969237077019898e-06, + "loss": 0.5114, + "step": 5020 + }, + { + "epoch": 0.30575769570380296, + "grad_norm": 1.069934828866935, + "learning_rate": 4.969224596218735e-06, + "loss": 0.508, + "step": 5021 + }, + { + "epoch": 0.3058185914806808, + "grad_norm": 1.1212024661436197, + "learning_rate": 4.969212112901978e-06, + "loss": 0.4915, + "step": 5022 + }, + { + "epoch": 0.3058794872575587, + "grad_norm": 1.0801032555903776, + "learning_rate": 4.969199627069638e-06, + "loss": 0.4946, + "step": 5023 + }, + { + "epoch": 0.30594038303443655, + "grad_norm": 1.0810401536120458, + "learning_rate": 4.969187138721729e-06, + "loss": 0.4711, + "step": 5024 + }, + { + "epoch": 0.30600127881131445, + "grad_norm": 0.9587972296622962, + "learning_rate": 4.969174647858264e-06, + "loss": 0.5004, + "step": 5025 + }, + { + "epoch": 0.3060621745881923, + "grad_norm": 1.0443569915971467, + "learning_rate": 4.969162154479254e-06, + "loss": 0.4568, + "step": 5026 + }, + { + "epoch": 0.3061230703650702, + "grad_norm": 1.015833541196289, + "learning_rate": 4.969149658584715e-06, + "loss": 0.4753, + "step": 5027 + }, + { + "epoch": 0.30618396614194804, + "grad_norm": 1.0575446519982281, + "learning_rate": 4.969137160174656e-06, + "loss": 0.4726, + "step": 5028 + }, + { + "epoch": 0.30624486191882594, + "grad_norm": 0.8879352768580389, + "learning_rate": 4.969124659249092e-06, + "loss": 0.5069, + "step": 5029 + }, + { + "epoch": 0.3063057576957038, + "grad_norm": 1.014302063898416, + "learning_rate": 4.969112155808035e-06, + "loss": 0.4967, + "step": 5030 + }, + { + "epoch": 0.3063666534725817, + "grad_norm": 0.940337816097191, + "learning_rate": 4.969099649851498e-06, + "loss": 0.5277, + "step": 5031 + }, + { + "epoch": 0.30642754924945953, + "grad_norm": 0.9723108242904902, + "learning_rate": 4.969087141379494e-06, + "loss": 0.513, + "step": 5032 + }, + { + "epoch": 0.30648844502633743, + "grad_norm": 0.9740742373183502, + "learning_rate": 4.969074630392035e-06, + "loss": 0.4904, + "step": 5033 + }, + { + "epoch": 0.3065493408032153, + "grad_norm": 1.0704686597381328, + "learning_rate": 4.969062116889134e-06, + "loss": 0.4263, + "step": 5034 + }, + { + "epoch": 0.3066102365800932, + "grad_norm": 0.8754239257694846, + "learning_rate": 4.969049600870803e-06, + "loss": 0.5116, + "step": 5035 + }, + { + "epoch": 0.306671132356971, + "grad_norm": 0.971147412670188, + "learning_rate": 4.969037082337057e-06, + "loss": 0.5095, + "step": 5036 + }, + { + "epoch": 0.3067320281338489, + "grad_norm": 1.0838238313668744, + "learning_rate": 4.969024561287906e-06, + "loss": 0.5192, + "step": 5037 + }, + { + "epoch": 0.30679292391072677, + "grad_norm": 1.11567003505524, + "learning_rate": 4.969012037723365e-06, + "loss": 0.47, + "step": 5038 + }, + { + "epoch": 0.30685381968760467, + "grad_norm": 0.9959563481863839, + "learning_rate": 4.968999511643447e-06, + "loss": 0.4451, + "step": 5039 + }, + { + "epoch": 0.3069147154644825, + "grad_norm": 1.0542652420655783, + "learning_rate": 4.9689869830481615e-06, + "loss": 0.4868, + "step": 5040 + }, + { + "epoch": 0.3069756112413604, + "grad_norm": 1.0530860007088592, + "learning_rate": 4.968974451937524e-06, + "loss": 0.4623, + "step": 5041 + }, + { + "epoch": 0.30703650701823826, + "grad_norm": 1.074190756999078, + "learning_rate": 4.9689619183115475e-06, + "loss": 0.5144, + "step": 5042 + }, + { + "epoch": 0.30709740279511616, + "grad_norm": 1.050353344463161, + "learning_rate": 4.968949382170243e-06, + "loss": 0.5338, + "step": 5043 + }, + { + "epoch": 0.30715829857199406, + "grad_norm": 1.0333597542937802, + "learning_rate": 4.968936843513625e-06, + "loss": 0.461, + "step": 5044 + }, + { + "epoch": 0.3072191943488719, + "grad_norm": 0.980100562457861, + "learning_rate": 4.968924302341705e-06, + "loss": 0.4419, + "step": 5045 + }, + { + "epoch": 0.3072800901257498, + "grad_norm": 0.9717956113468027, + "learning_rate": 4.968911758654497e-06, + "loss": 0.5354, + "step": 5046 + }, + { + "epoch": 0.30734098590262765, + "grad_norm": 1.127850226045452, + "learning_rate": 4.968899212452012e-06, + "loss": 0.4106, + "step": 5047 + }, + { + "epoch": 0.30740188167950555, + "grad_norm": 1.0591115216972031, + "learning_rate": 4.968886663734264e-06, + "loss": 0.4375, + "step": 5048 + }, + { + "epoch": 0.3074627774563834, + "grad_norm": 1.027267476965639, + "learning_rate": 4.968874112501265e-06, + "loss": 0.5204, + "step": 5049 + }, + { + "epoch": 0.3075236732332613, + "grad_norm": 1.0714324761912237, + "learning_rate": 4.96886155875303e-06, + "loss": 0.5085, + "step": 5050 + }, + { + "epoch": 0.30758456901013914, + "grad_norm": 0.9752043858718045, + "learning_rate": 4.968849002489568e-06, + "loss": 0.5429, + "step": 5051 + }, + { + "epoch": 0.30764546478701704, + "grad_norm": 1.137806290175147, + "learning_rate": 4.968836443710896e-06, + "loss": 0.516, + "step": 5052 + }, + { + "epoch": 0.3077063605638949, + "grad_norm": 0.974064054812871, + "learning_rate": 4.968823882417025e-06, + "loss": 0.4885, + "step": 5053 + }, + { + "epoch": 0.3077672563407728, + "grad_norm": 1.0232338716419167, + "learning_rate": 4.968811318607966e-06, + "loss": 0.509, + "step": 5054 + }, + { + "epoch": 0.30782815211765063, + "grad_norm": 1.0083135216252797, + "learning_rate": 4.9687987522837335e-06, + "loss": 0.4285, + "step": 5055 + }, + { + "epoch": 0.30788904789452853, + "grad_norm": 1.0845860705975552, + "learning_rate": 4.968786183444341e-06, + "loss": 0.4052, + "step": 5056 + }, + { + "epoch": 0.3079499436714064, + "grad_norm": 1.0534882327030928, + "learning_rate": 4.9687736120898e-06, + "loss": 0.4867, + "step": 5057 + }, + { + "epoch": 0.3080108394482843, + "grad_norm": 1.0667864091272876, + "learning_rate": 4.968761038220124e-06, + "loss": 0.5373, + "step": 5058 + }, + { + "epoch": 0.3080717352251621, + "grad_norm": 1.0367658192750488, + "learning_rate": 4.968748461835325e-06, + "loss": 0.4686, + "step": 5059 + }, + { + "epoch": 0.30813263100204, + "grad_norm": 1.0541846078424237, + "learning_rate": 4.968735882935417e-06, + "loss": 0.4511, + "step": 5060 + }, + { + "epoch": 0.30819352677891787, + "grad_norm": 0.9888586037578275, + "learning_rate": 4.968723301520412e-06, + "loss": 0.4806, + "step": 5061 + }, + { + "epoch": 0.30825442255579577, + "grad_norm": 1.1114018732140467, + "learning_rate": 4.968710717590323e-06, + "loss": 0.489, + "step": 5062 + }, + { + "epoch": 0.3083153183326736, + "grad_norm": 1.0598257964304432, + "learning_rate": 4.9686981311451625e-06, + "loss": 0.4597, + "step": 5063 + }, + { + "epoch": 0.3083762141095515, + "grad_norm": 1.0213098930937954, + "learning_rate": 4.968685542184944e-06, + "loss": 0.4477, + "step": 5064 + }, + { + "epoch": 0.30843710988642936, + "grad_norm": 0.9507471240203729, + "learning_rate": 4.9686729507096805e-06, + "loss": 0.4955, + "step": 5065 + }, + { + "epoch": 0.30849800566330726, + "grad_norm": 1.0526737689602832, + "learning_rate": 4.9686603567193835e-06, + "loss": 0.5024, + "step": 5066 + }, + { + "epoch": 0.3085589014401851, + "grad_norm": 1.0513545801709065, + "learning_rate": 4.968647760214067e-06, + "loss": 0.4418, + "step": 5067 + }, + { + "epoch": 0.308619797217063, + "grad_norm": 1.0477543196529404, + "learning_rate": 4.968635161193744e-06, + "loss": 0.5105, + "step": 5068 + }, + { + "epoch": 0.30868069299394085, + "grad_norm": 1.0050112654618055, + "learning_rate": 4.968622559658426e-06, + "loss": 0.4538, + "step": 5069 + }, + { + "epoch": 0.30874158877081875, + "grad_norm": 1.012554264613682, + "learning_rate": 4.968609955608127e-06, + "loss": 0.4636, + "step": 5070 + }, + { + "epoch": 0.3088024845476966, + "grad_norm": 0.9837771697737474, + "learning_rate": 4.96859734904286e-06, + "loss": 0.5029, + "step": 5071 + }, + { + "epoch": 0.3088633803245745, + "grad_norm": 1.0452234768330588, + "learning_rate": 4.968584739962636e-06, + "loss": 0.4956, + "step": 5072 + }, + { + "epoch": 0.30892427610145234, + "grad_norm": 1.039993185266732, + "learning_rate": 4.968572128367471e-06, + "loss": 0.4769, + "step": 5073 + }, + { + "epoch": 0.30898517187833024, + "grad_norm": 1.0826373627090515, + "learning_rate": 4.968559514257375e-06, + "loss": 0.5284, + "step": 5074 + }, + { + "epoch": 0.3090460676552081, + "grad_norm": 1.1588836476979387, + "learning_rate": 4.968546897632361e-06, + "loss": 0.4498, + "step": 5075 + }, + { + "epoch": 0.309106963432086, + "grad_norm": 1.000615609684844, + "learning_rate": 4.968534278492444e-06, + "loss": 0.5167, + "step": 5076 + }, + { + "epoch": 0.30916785920896384, + "grad_norm": 1.0134349593726322, + "learning_rate": 4.968521656837636e-06, + "loss": 0.509, + "step": 5077 + }, + { + "epoch": 0.30922875498584174, + "grad_norm": 1.0398419098266587, + "learning_rate": 4.968509032667948e-06, + "loss": 0.443, + "step": 5078 + }, + { + "epoch": 0.3092896507627196, + "grad_norm": 0.9936108879203397, + "learning_rate": 4.9684964059833954e-06, + "loss": 0.558, + "step": 5079 + }, + { + "epoch": 0.3093505465395975, + "grad_norm": 1.020787624313198, + "learning_rate": 4.9684837767839895e-06, + "loss": 0.5189, + "step": 5080 + }, + { + "epoch": 0.3094114423164753, + "grad_norm": 0.9700081666130015, + "learning_rate": 4.968471145069744e-06, + "loss": 0.5096, + "step": 5081 + }, + { + "epoch": 0.3094723380933532, + "grad_norm": 0.9979033864017931, + "learning_rate": 4.968458510840671e-06, + "loss": 0.4847, + "step": 5082 + }, + { + "epoch": 0.3095332338702311, + "grad_norm": 0.9726461193713638, + "learning_rate": 4.968445874096784e-06, + "loss": 0.4918, + "step": 5083 + }, + { + "epoch": 0.309594129647109, + "grad_norm": 1.0153879660121452, + "learning_rate": 4.968433234838096e-06, + "loss": 0.4582, + "step": 5084 + }, + { + "epoch": 0.3096550254239869, + "grad_norm": 0.9955649452741717, + "learning_rate": 4.968420593064619e-06, + "loss": 0.4572, + "step": 5085 + }, + { + "epoch": 0.3097159212008647, + "grad_norm": 0.9812778680233103, + "learning_rate": 4.968407948776367e-06, + "loss": 0.4824, + "step": 5086 + }, + { + "epoch": 0.3097768169777426, + "grad_norm": 1.1208105316877057, + "learning_rate": 4.968395301973351e-06, + "loss": 0.4982, + "step": 5087 + }, + { + "epoch": 0.30983771275462046, + "grad_norm": 0.9699231743767815, + "learning_rate": 4.9683826526555865e-06, + "loss": 0.5065, + "step": 5088 + }, + { + "epoch": 0.30989860853149837, + "grad_norm": 1.0573450582967472, + "learning_rate": 4.968370000823085e-06, + "loss": 0.4841, + "step": 5089 + }, + { + "epoch": 0.3099595043083762, + "grad_norm": 1.0157389475456855, + "learning_rate": 4.968357346475859e-06, + "loss": 0.4668, + "step": 5090 + }, + { + "epoch": 0.3100204000852541, + "grad_norm": 1.1064073390383085, + "learning_rate": 4.968344689613922e-06, + "loss": 0.4344, + "step": 5091 + }, + { + "epoch": 0.31008129586213196, + "grad_norm": 1.1153826298547915, + "learning_rate": 4.968332030237287e-06, + "loss": 0.4326, + "step": 5092 + }, + { + "epoch": 0.31014219163900986, + "grad_norm": 1.0178356409543432, + "learning_rate": 4.968319368345967e-06, + "loss": 0.4453, + "step": 5093 + }, + { + "epoch": 0.3102030874158877, + "grad_norm": 0.9964832654763887, + "learning_rate": 4.9683067039399734e-06, + "loss": 0.5119, + "step": 5094 + }, + { + "epoch": 0.3102639831927656, + "grad_norm": 1.122896887707491, + "learning_rate": 4.968294037019321e-06, + "loss": 0.5246, + "step": 5095 + }, + { + "epoch": 0.31032487896964345, + "grad_norm": 1.1399021266120526, + "learning_rate": 4.968281367584021e-06, + "loss": 0.4137, + "step": 5096 + }, + { + "epoch": 0.31038577474652135, + "grad_norm": 1.031661005012351, + "learning_rate": 4.968268695634089e-06, + "loss": 0.4745, + "step": 5097 + }, + { + "epoch": 0.3104466705233992, + "grad_norm": 1.0158564539496315, + "learning_rate": 4.9682560211695345e-06, + "loss": 0.4997, + "step": 5098 + }, + { + "epoch": 0.3105075663002771, + "grad_norm": 0.9593264633507248, + "learning_rate": 4.968243344190373e-06, + "loss": 0.4777, + "step": 5099 + }, + { + "epoch": 0.31056846207715494, + "grad_norm": 1.0480055508162904, + "learning_rate": 4.968230664696616e-06, + "loss": 0.4946, + "step": 5100 + }, + { + "epoch": 0.31062935785403284, + "grad_norm": 1.032845949627706, + "learning_rate": 4.968217982688277e-06, + "loss": 0.5317, + "step": 5101 + }, + { + "epoch": 0.3106902536309107, + "grad_norm": 1.133953215255313, + "learning_rate": 4.968205298165369e-06, + "loss": 0.5025, + "step": 5102 + }, + { + "epoch": 0.3107511494077886, + "grad_norm": 1.0218764623507441, + "learning_rate": 4.968192611127905e-06, + "loss": 0.4897, + "step": 5103 + }, + { + "epoch": 0.31081204518466643, + "grad_norm": 1.1106721040050749, + "learning_rate": 4.968179921575897e-06, + "loss": 0.4719, + "step": 5104 + }, + { + "epoch": 0.31087294096154433, + "grad_norm": 1.0185108763173394, + "learning_rate": 4.96816722950936e-06, + "loss": 0.4818, + "step": 5105 + }, + { + "epoch": 0.3109338367384222, + "grad_norm": 0.9494301165279092, + "learning_rate": 4.968154534928305e-06, + "loss": 0.5536, + "step": 5106 + }, + { + "epoch": 0.3109947325153001, + "grad_norm": 1.0454517746604666, + "learning_rate": 4.968141837832744e-06, + "loss": 0.417, + "step": 5107 + }, + { + "epoch": 0.3110556282921779, + "grad_norm": 1.0458485000057145, + "learning_rate": 4.968129138222693e-06, + "loss": 0.5538, + "step": 5108 + }, + { + "epoch": 0.3111165240690558, + "grad_norm": 0.9817175401241943, + "learning_rate": 4.9681164360981625e-06, + "loss": 0.506, + "step": 5109 + }, + { + "epoch": 0.31117741984593367, + "grad_norm": 1.064079450782691, + "learning_rate": 4.968103731459166e-06, + "loss": 0.4019, + "step": 5110 + }, + { + "epoch": 0.31123831562281157, + "grad_norm": 0.9733204030831321, + "learning_rate": 4.968091024305718e-06, + "loss": 0.5133, + "step": 5111 + }, + { + "epoch": 0.3112992113996894, + "grad_norm": 1.148137120243835, + "learning_rate": 4.96807831463783e-06, + "loss": 0.4694, + "step": 5112 + }, + { + "epoch": 0.3113601071765673, + "grad_norm": 1.0348870612271959, + "learning_rate": 4.968065602455514e-06, + "loss": 0.4992, + "step": 5113 + }, + { + "epoch": 0.31142100295344516, + "grad_norm": 1.063855979632825, + "learning_rate": 4.9680528877587855e-06, + "loss": 0.4604, + "step": 5114 + }, + { + "epoch": 0.31148189873032306, + "grad_norm": 1.1151070492788793, + "learning_rate": 4.968040170547655e-06, + "loss": 0.4178, + "step": 5115 + }, + { + "epoch": 0.3115427945072009, + "grad_norm": 1.034407546536507, + "learning_rate": 4.968027450822136e-06, + "loss": 0.5001, + "step": 5116 + }, + { + "epoch": 0.3116036902840788, + "grad_norm": 1.053614645029064, + "learning_rate": 4.9680147285822434e-06, + "loss": 0.4987, + "step": 5117 + }, + { + "epoch": 0.31166458606095665, + "grad_norm": 1.1884723976853817, + "learning_rate": 4.968002003827988e-06, + "loss": 0.4855, + "step": 5118 + }, + { + "epoch": 0.31172548183783455, + "grad_norm": 1.0712409407223522, + "learning_rate": 4.967989276559383e-06, + "loss": 0.4187, + "step": 5119 + }, + { + "epoch": 0.3117863776147124, + "grad_norm": 1.0925613597560053, + "learning_rate": 4.967976546776442e-06, + "loss": 0.4286, + "step": 5120 + }, + { + "epoch": 0.3118472733915903, + "grad_norm": 1.1249124687109406, + "learning_rate": 4.967963814479178e-06, + "loss": 0.5909, + "step": 5121 + }, + { + "epoch": 0.31190816916846814, + "grad_norm": 1.0960844276090487, + "learning_rate": 4.967951079667604e-06, + "loss": 0.4515, + "step": 5122 + }, + { + "epoch": 0.31196906494534604, + "grad_norm": 1.066627781438501, + "learning_rate": 4.967938342341734e-06, + "loss": 0.4454, + "step": 5123 + }, + { + "epoch": 0.3120299607222239, + "grad_norm": 1.0523619315652433, + "learning_rate": 4.967925602501578e-06, + "loss": 0.479, + "step": 5124 + }, + { + "epoch": 0.3120908564991018, + "grad_norm": 0.9827881358594166, + "learning_rate": 4.967912860147151e-06, + "loss": 0.5163, + "step": 5125 + }, + { + "epoch": 0.3121517522759797, + "grad_norm": 1.0217208418963493, + "learning_rate": 4.9679001152784655e-06, + "loss": 0.5244, + "step": 5126 + }, + { + "epoch": 0.31221264805285753, + "grad_norm": 1.0079779412882612, + "learning_rate": 4.967887367895535e-06, + "loss": 0.483, + "step": 5127 + }, + { + "epoch": 0.31227354382973543, + "grad_norm": 1.0092697160057587, + "learning_rate": 4.9678746179983715e-06, + "loss": 0.453, + "step": 5128 + }, + { + "epoch": 0.3123344396066133, + "grad_norm": 1.0610488532758704, + "learning_rate": 4.967861865586989e-06, + "loss": 0.5073, + "step": 5129 + }, + { + "epoch": 0.3123953353834912, + "grad_norm": 1.0575312906545655, + "learning_rate": 4.967849110661401e-06, + "loss": 0.4659, + "step": 5130 + }, + { + "epoch": 0.312456231160369, + "grad_norm": 1.004383684049922, + "learning_rate": 4.967836353221619e-06, + "loss": 0.4958, + "step": 5131 + }, + { + "epoch": 0.3125171269372469, + "grad_norm": 0.9821399252078046, + "learning_rate": 4.967823593267657e-06, + "loss": 0.4943, + "step": 5132 + }, + { + "epoch": 0.31257802271412477, + "grad_norm": 1.032152585769433, + "learning_rate": 4.967810830799527e-06, + "loss": 0.4551, + "step": 5133 + }, + { + "epoch": 0.31263891849100267, + "grad_norm": 0.9960900980174974, + "learning_rate": 4.967798065817243e-06, + "loss": 0.5007, + "step": 5134 + }, + { + "epoch": 0.3126998142678805, + "grad_norm": 1.0701554295589961, + "learning_rate": 4.967785298320817e-06, + "loss": 0.4512, + "step": 5135 + }, + { + "epoch": 0.3127607100447584, + "grad_norm": 1.0544001297742749, + "learning_rate": 4.9677725283102635e-06, + "loss": 0.4735, + "step": 5136 + }, + { + "epoch": 0.31282160582163626, + "grad_norm": 1.021011333390711, + "learning_rate": 4.967759755785594e-06, + "loss": 0.5076, + "step": 5137 + }, + { + "epoch": 0.31288250159851416, + "grad_norm": 1.039385029928438, + "learning_rate": 4.967746980746823e-06, + "loss": 0.4372, + "step": 5138 + }, + { + "epoch": 0.312943397375392, + "grad_norm": 0.975909786746416, + "learning_rate": 4.9677342031939625e-06, + "loss": 0.5526, + "step": 5139 + }, + { + "epoch": 0.3130042931522699, + "grad_norm": 0.9753685698177517, + "learning_rate": 4.967721423127025e-06, + "loss": 0.5066, + "step": 5140 + }, + { + "epoch": 0.31306518892914775, + "grad_norm": 1.002098027327078, + "learning_rate": 4.9677086405460244e-06, + "loss": 0.4998, + "step": 5141 + }, + { + "epoch": 0.31312608470602565, + "grad_norm": 1.0592575398938675, + "learning_rate": 4.967695855450974e-06, + "loss": 0.4155, + "step": 5142 + }, + { + "epoch": 0.3131869804829035, + "grad_norm": 1.0743444798906776, + "learning_rate": 4.967683067841887e-06, + "loss": 0.422, + "step": 5143 + }, + { + "epoch": 0.3132478762597814, + "grad_norm": 0.9861369460124829, + "learning_rate": 4.967670277718774e-06, + "loss": 0.4857, + "step": 5144 + }, + { + "epoch": 0.31330877203665924, + "grad_norm": 1.0726165682837618, + "learning_rate": 4.96765748508165e-06, + "loss": 0.4527, + "step": 5145 + }, + { + "epoch": 0.31336966781353714, + "grad_norm": 1.0947909078801936, + "learning_rate": 4.9676446899305295e-06, + "loss": 0.4085, + "step": 5146 + }, + { + "epoch": 0.313430563590415, + "grad_norm": 1.0179623226618648, + "learning_rate": 4.9676318922654234e-06, + "loss": 0.4754, + "step": 5147 + }, + { + "epoch": 0.3134914593672929, + "grad_norm": 1.0838382599504857, + "learning_rate": 4.967619092086344e-06, + "loss": 0.4496, + "step": 5148 + }, + { + "epoch": 0.31355235514417074, + "grad_norm": 1.120816199417892, + "learning_rate": 4.967606289393306e-06, + "loss": 0.4312, + "step": 5149 + }, + { + "epoch": 0.31361325092104864, + "grad_norm": 1.0244105066156357, + "learning_rate": 4.967593484186323e-06, + "loss": 0.3969, + "step": 5150 + }, + { + "epoch": 0.3136741466979265, + "grad_norm": 1.0787757718424287, + "learning_rate": 4.967580676465407e-06, + "loss": 0.5213, + "step": 5151 + }, + { + "epoch": 0.3137350424748044, + "grad_norm": 1.0717575960639651, + "learning_rate": 4.967567866230571e-06, + "loss": 0.516, + "step": 5152 + }, + { + "epoch": 0.3137959382516822, + "grad_norm": 1.0765708026960146, + "learning_rate": 4.967555053481827e-06, + "loss": 0.4871, + "step": 5153 + }, + { + "epoch": 0.3138568340285601, + "grad_norm": 0.9747025637472024, + "learning_rate": 4.96754223821919e-06, + "loss": 0.5041, + "step": 5154 + }, + { + "epoch": 0.313917729805438, + "grad_norm": 1.0232788887105977, + "learning_rate": 4.967529420442672e-06, + "loss": 0.4866, + "step": 5155 + }, + { + "epoch": 0.3139786255823159, + "grad_norm": 1.0560063321253057, + "learning_rate": 4.967516600152287e-06, + "loss": 0.4731, + "step": 5156 + }, + { + "epoch": 0.3140395213591937, + "grad_norm": 1.1460914100110102, + "learning_rate": 4.967503777348046e-06, + "loss": 0.5381, + "step": 5157 + }, + { + "epoch": 0.3141004171360716, + "grad_norm": 0.9913844222994829, + "learning_rate": 4.967490952029965e-06, + "loss": 0.5296, + "step": 5158 + }, + { + "epoch": 0.31416131291294946, + "grad_norm": 1.0245303722363153, + "learning_rate": 4.967478124198054e-06, + "loss": 0.5088, + "step": 5159 + }, + { + "epoch": 0.31422220868982736, + "grad_norm": 1.0087074505602667, + "learning_rate": 4.967465293852329e-06, + "loss": 0.5282, + "step": 5160 + }, + { + "epoch": 0.3142831044667052, + "grad_norm": 1.10107940406176, + "learning_rate": 4.967452460992802e-06, + "loss": 0.4259, + "step": 5161 + }, + { + "epoch": 0.3143440002435831, + "grad_norm": 1.0154212951906314, + "learning_rate": 4.9674396256194845e-06, + "loss": 0.4858, + "step": 5162 + }, + { + "epoch": 0.31440489602046096, + "grad_norm": 1.1509542254432117, + "learning_rate": 4.967426787732391e-06, + "loss": 0.423, + "step": 5163 + }, + { + "epoch": 0.31446579179733886, + "grad_norm": 1.0771217347106425, + "learning_rate": 4.967413947331535e-06, + "loss": 0.4695, + "step": 5164 + }, + { + "epoch": 0.3145266875742167, + "grad_norm": 1.060254826777119, + "learning_rate": 4.967401104416928e-06, + "loss": 0.4748, + "step": 5165 + }, + { + "epoch": 0.3145875833510946, + "grad_norm": 1.0237747775346377, + "learning_rate": 4.9673882589885845e-06, + "loss": 0.5199, + "step": 5166 + }, + { + "epoch": 0.3146484791279725, + "grad_norm": 0.9954987116537547, + "learning_rate": 4.967375411046518e-06, + "loss": 0.4783, + "step": 5167 + }, + { + "epoch": 0.31470937490485035, + "grad_norm": 1.0664643953101764, + "learning_rate": 4.967362560590739e-06, + "loss": 0.4752, + "step": 5168 + }, + { + "epoch": 0.31477027068172825, + "grad_norm": 0.9759886291301523, + "learning_rate": 4.967349707621264e-06, + "loss": 0.4868, + "step": 5169 + }, + { + "epoch": 0.3148311664586061, + "grad_norm": 1.0932100184401297, + "learning_rate": 4.967336852138104e-06, + "loss": 0.4455, + "step": 5170 + }, + { + "epoch": 0.314892062235484, + "grad_norm": 1.0494460756915363, + "learning_rate": 4.967323994141272e-06, + "loss": 0.4364, + "step": 5171 + }, + { + "epoch": 0.31495295801236184, + "grad_norm": 0.9736215824638996, + "learning_rate": 4.9673111336307815e-06, + "loss": 0.4842, + "step": 5172 + }, + { + "epoch": 0.31501385378923974, + "grad_norm": 1.0507166522675302, + "learning_rate": 4.967298270606646e-06, + "loss": 0.5474, + "step": 5173 + }, + { + "epoch": 0.3150747495661176, + "grad_norm": 0.9590860179643425, + "learning_rate": 4.967285405068878e-06, + "loss": 0.481, + "step": 5174 + }, + { + "epoch": 0.3151356453429955, + "grad_norm": 1.0849201526111079, + "learning_rate": 4.967272537017492e-06, + "loss": 0.5294, + "step": 5175 + }, + { + "epoch": 0.31519654111987333, + "grad_norm": 1.0213313137864841, + "learning_rate": 4.967259666452499e-06, + "loss": 0.4842, + "step": 5176 + }, + { + "epoch": 0.31525743689675123, + "grad_norm": 1.0674407979718614, + "learning_rate": 4.967246793373914e-06, + "loss": 0.4931, + "step": 5177 + }, + { + "epoch": 0.3153183326736291, + "grad_norm": 1.025745581144172, + "learning_rate": 4.9672339177817485e-06, + "loss": 0.4831, + "step": 5178 + }, + { + "epoch": 0.315379228450507, + "grad_norm": 1.0603844106779188, + "learning_rate": 4.967221039676016e-06, + "loss": 0.4979, + "step": 5179 + }, + { + "epoch": 0.3154401242273848, + "grad_norm": 0.9233946628773994, + "learning_rate": 4.967208159056731e-06, + "loss": 0.4885, + "step": 5180 + }, + { + "epoch": 0.3155010200042627, + "grad_norm": 1.0014458175507566, + "learning_rate": 4.967195275923905e-06, + "loss": 0.5563, + "step": 5181 + }, + { + "epoch": 0.31556191578114057, + "grad_norm": 0.9964051295852098, + "learning_rate": 4.967182390277553e-06, + "loss": 0.4737, + "step": 5182 + }, + { + "epoch": 0.31562281155801847, + "grad_norm": 1.0367698456229089, + "learning_rate": 4.967169502117685e-06, + "loss": 0.4109, + "step": 5183 + }, + { + "epoch": 0.3156837073348963, + "grad_norm": 0.9961950497326734, + "learning_rate": 4.967156611444317e-06, + "loss": 0.4956, + "step": 5184 + }, + { + "epoch": 0.3157446031117742, + "grad_norm": 1.0139530269654236, + "learning_rate": 4.967143718257461e-06, + "loss": 0.464, + "step": 5185 + }, + { + "epoch": 0.31580549888865206, + "grad_norm": 1.0147739040637103, + "learning_rate": 4.9671308225571305e-06, + "loss": 0.4849, + "step": 5186 + }, + { + "epoch": 0.31586639466552996, + "grad_norm": 1.057885802561059, + "learning_rate": 4.9671179243433385e-06, + "loss": 0.4254, + "step": 5187 + }, + { + "epoch": 0.3159272904424078, + "grad_norm": 1.013440651821097, + "learning_rate": 4.967105023616097e-06, + "loss": 0.4657, + "step": 5188 + }, + { + "epoch": 0.3159881862192857, + "grad_norm": 1.1951178228896153, + "learning_rate": 4.967092120375422e-06, + "loss": 0.4406, + "step": 5189 + }, + { + "epoch": 0.31604908199616355, + "grad_norm": 1.016042717519446, + "learning_rate": 4.967079214621323e-06, + "loss": 0.4075, + "step": 5190 + }, + { + "epoch": 0.31610997777304145, + "grad_norm": 1.0347652402172578, + "learning_rate": 4.967066306353816e-06, + "loss": 0.4536, + "step": 5191 + }, + { + "epoch": 0.3161708735499193, + "grad_norm": 1.0853364831517276, + "learning_rate": 4.967053395572913e-06, + "loss": 0.4254, + "step": 5192 + }, + { + "epoch": 0.3162317693267972, + "grad_norm": 1.1021755350750566, + "learning_rate": 4.9670404822786266e-06, + "loss": 0.4185, + "step": 5193 + }, + { + "epoch": 0.31629266510367504, + "grad_norm": 1.0347735606678234, + "learning_rate": 4.967027566470972e-06, + "loss": 0.5126, + "step": 5194 + }, + { + "epoch": 0.31635356088055294, + "grad_norm": 1.073878364720785, + "learning_rate": 4.96701464814996e-06, + "loss": 0.4959, + "step": 5195 + }, + { + "epoch": 0.3164144566574308, + "grad_norm": 0.9737813411200141, + "learning_rate": 4.9670017273156045e-06, + "loss": 0.4686, + "step": 5196 + }, + { + "epoch": 0.3164753524343087, + "grad_norm": 1.0282399951065826, + "learning_rate": 4.96698880396792e-06, + "loss": 0.5368, + "step": 5197 + }, + { + "epoch": 0.31653624821118653, + "grad_norm": 0.993455119842797, + "learning_rate": 4.966975878106918e-06, + "loss": 0.516, + "step": 5198 + }, + { + "epoch": 0.31659714398806443, + "grad_norm": 1.0767053677612632, + "learning_rate": 4.9669629497326126e-06, + "loss": 0.4495, + "step": 5199 + }, + { + "epoch": 0.3166580397649423, + "grad_norm": 1.1250318652652074, + "learning_rate": 4.966950018845016e-06, + "loss": 0.5841, + "step": 5200 + }, + { + "epoch": 0.3167189355418202, + "grad_norm": 1.0580688737587123, + "learning_rate": 4.966937085444142e-06, + "loss": 0.5384, + "step": 5201 + }, + { + "epoch": 0.316779831318698, + "grad_norm": 1.0396226465300666, + "learning_rate": 4.966924149530005e-06, + "loss": 0.4715, + "step": 5202 + }, + { + "epoch": 0.3168407270955759, + "grad_norm": 1.0241968073086807, + "learning_rate": 4.9669112111026154e-06, + "loss": 0.5179, + "step": 5203 + }, + { + "epoch": 0.31690162287245377, + "grad_norm": 0.9896675493427827, + "learning_rate": 4.966898270161988e-06, + "loss": 0.4685, + "step": 5204 + }, + { + "epoch": 0.31696251864933167, + "grad_norm": 0.9761343316758749, + "learning_rate": 4.9668853267081375e-06, + "loss": 0.4637, + "step": 5205 + }, + { + "epoch": 0.3170234144262095, + "grad_norm": 1.0495267309248282, + "learning_rate": 4.966872380741074e-06, + "loss": 0.482, + "step": 5206 + }, + { + "epoch": 0.3170843102030874, + "grad_norm": 1.0766868032854158, + "learning_rate": 4.966859432260813e-06, + "loss": 0.42, + "step": 5207 + }, + { + "epoch": 0.3171452059799653, + "grad_norm": 1.0263533337914674, + "learning_rate": 4.966846481267367e-06, + "loss": 0.5597, + "step": 5208 + }, + { + "epoch": 0.31720610175684316, + "grad_norm": 1.031027424343123, + "learning_rate": 4.966833527760749e-06, + "loss": 0.4957, + "step": 5209 + }, + { + "epoch": 0.31726699753372106, + "grad_norm": 1.0356210009409395, + "learning_rate": 4.9668205717409715e-06, + "loss": 0.5433, + "step": 5210 + }, + { + "epoch": 0.3173278933105989, + "grad_norm": 1.002360090822641, + "learning_rate": 4.966807613208049e-06, + "loss": 0.4447, + "step": 5211 + }, + { + "epoch": 0.3173887890874768, + "grad_norm": 0.9490554765320061, + "learning_rate": 4.966794652161995e-06, + "loss": 0.5111, + "step": 5212 + }, + { + "epoch": 0.31744968486435465, + "grad_norm": 0.9913620172122898, + "learning_rate": 4.966781688602821e-06, + "loss": 0.494, + "step": 5213 + }, + { + "epoch": 0.31751058064123255, + "grad_norm": 1.1134517565072355, + "learning_rate": 4.966768722530541e-06, + "loss": 0.458, + "step": 5214 + }, + { + "epoch": 0.3175714764181104, + "grad_norm": 1.128614289283085, + "learning_rate": 4.966755753945168e-06, + "loss": 0.527, + "step": 5215 + }, + { + "epoch": 0.3176323721949883, + "grad_norm": 1.0461813842968541, + "learning_rate": 4.966742782846717e-06, + "loss": 0.5075, + "step": 5216 + }, + { + "epoch": 0.31769326797186614, + "grad_norm": 0.9111136105332313, + "learning_rate": 4.966729809235199e-06, + "loss": 0.5192, + "step": 5217 + }, + { + "epoch": 0.31775416374874405, + "grad_norm": 1.0140206842036108, + "learning_rate": 4.966716833110627e-06, + "loss": 0.5137, + "step": 5218 + }, + { + "epoch": 0.3178150595256219, + "grad_norm": 1.1004716199105786, + "learning_rate": 4.966703854473016e-06, + "loss": 0.4183, + "step": 5219 + }, + { + "epoch": 0.3178759553024998, + "grad_norm": 1.0680326300216205, + "learning_rate": 4.966690873322379e-06, + "loss": 0.52, + "step": 5220 + }, + { + "epoch": 0.31793685107937764, + "grad_norm": 1.1294550521149913, + "learning_rate": 4.966677889658727e-06, + "loss": 0.4894, + "step": 5221 + }, + { + "epoch": 0.31799774685625554, + "grad_norm": 0.9989332897459482, + "learning_rate": 4.966664903482077e-06, + "loss": 0.4466, + "step": 5222 + }, + { + "epoch": 0.3180586426331334, + "grad_norm": 1.0663824450711468, + "learning_rate": 4.966651914792438e-06, + "loss": 0.4638, + "step": 5223 + }, + { + "epoch": 0.3181195384100113, + "grad_norm": 0.9978886955754611, + "learning_rate": 4.966638923589826e-06, + "loss": 0.5356, + "step": 5224 + }, + { + "epoch": 0.3181804341868891, + "grad_norm": 0.9647445514580674, + "learning_rate": 4.966625929874254e-06, + "loss": 0.501, + "step": 5225 + }, + { + "epoch": 0.31824132996376703, + "grad_norm": 1.1570338910985234, + "learning_rate": 4.966612933645735e-06, + "loss": 0.4763, + "step": 5226 + }, + { + "epoch": 0.3183022257406449, + "grad_norm": 1.029038009408433, + "learning_rate": 4.966599934904281e-06, + "loss": 0.4855, + "step": 5227 + }, + { + "epoch": 0.3183631215175228, + "grad_norm": 1.1025559631038306, + "learning_rate": 4.966586933649907e-06, + "loss": 0.4692, + "step": 5228 + }, + { + "epoch": 0.3184240172944006, + "grad_norm": 1.0797213652909536, + "learning_rate": 4.966573929882625e-06, + "loss": 0.5103, + "step": 5229 + }, + { + "epoch": 0.3184849130712785, + "grad_norm": 1.1191169021320893, + "learning_rate": 4.96656092360245e-06, + "loss": 0.4976, + "step": 5230 + }, + { + "epoch": 0.31854580884815636, + "grad_norm": 0.9840511039611001, + "learning_rate": 4.966547914809393e-06, + "loss": 0.5079, + "step": 5231 + }, + { + "epoch": 0.31860670462503426, + "grad_norm": 0.9762653856323117, + "learning_rate": 4.966534903503469e-06, + "loss": 0.482, + "step": 5232 + }, + { + "epoch": 0.3186676004019121, + "grad_norm": 1.0087535788662907, + "learning_rate": 4.96652188968469e-06, + "loss": 0.4537, + "step": 5233 + }, + { + "epoch": 0.31872849617879, + "grad_norm": 1.0025825450585968, + "learning_rate": 4.966508873353069e-06, + "loss": 0.4183, + "step": 5234 + }, + { + "epoch": 0.31878939195566786, + "grad_norm": 0.9670296322272832, + "learning_rate": 4.966495854508621e-06, + "loss": 0.5255, + "step": 5235 + }, + { + "epoch": 0.31885028773254576, + "grad_norm": 1.0929291076365029, + "learning_rate": 4.9664828331513585e-06, + "loss": 0.4897, + "step": 5236 + }, + { + "epoch": 0.3189111835094236, + "grad_norm": 1.0674021647660434, + "learning_rate": 4.966469809281294e-06, + "loss": 0.4615, + "step": 5237 + }, + { + "epoch": 0.3189720792863015, + "grad_norm": 1.1161719536886323, + "learning_rate": 4.966456782898441e-06, + "loss": 0.4645, + "step": 5238 + }, + { + "epoch": 0.31903297506317935, + "grad_norm": 1.009593753939317, + "learning_rate": 4.9664437540028135e-06, + "loss": 0.5382, + "step": 5239 + }, + { + "epoch": 0.31909387084005725, + "grad_norm": 1.025123557208146, + "learning_rate": 4.966430722594424e-06, + "loss": 0.4427, + "step": 5240 + }, + { + "epoch": 0.3191547666169351, + "grad_norm": 1.143765240046962, + "learning_rate": 4.966417688673287e-06, + "loss": 0.5381, + "step": 5241 + }, + { + "epoch": 0.319215662393813, + "grad_norm": 1.0561533678595723, + "learning_rate": 4.966404652239415e-06, + "loss": 0.5628, + "step": 5242 + }, + { + "epoch": 0.31927655817069084, + "grad_norm": 1.025167043313535, + "learning_rate": 4.9663916132928205e-06, + "loss": 0.4419, + "step": 5243 + }, + { + "epoch": 0.31933745394756874, + "grad_norm": 1.0549666885150353, + "learning_rate": 4.9663785718335176e-06, + "loss": 0.4255, + "step": 5244 + }, + { + "epoch": 0.3193983497244466, + "grad_norm": 1.0856236094064156, + "learning_rate": 4.966365527861519e-06, + "loss": 0.439, + "step": 5245 + }, + { + "epoch": 0.3194592455013245, + "grad_norm": 1.15186293614338, + "learning_rate": 4.96635248137684e-06, + "loss": 0.5097, + "step": 5246 + }, + { + "epoch": 0.31952014127820233, + "grad_norm": 1.0176238180586639, + "learning_rate": 4.966339432379491e-06, + "loss": 0.4666, + "step": 5247 + }, + { + "epoch": 0.31958103705508023, + "grad_norm": 1.1417038735488148, + "learning_rate": 4.9663263808694876e-06, + "loss": 0.4531, + "step": 5248 + }, + { + "epoch": 0.31964193283195813, + "grad_norm": 1.0007650137426092, + "learning_rate": 4.966313326846842e-06, + "loss": 0.4612, + "step": 5249 + }, + { + "epoch": 0.319702828608836, + "grad_norm": 0.9898036601313632, + "learning_rate": 4.966300270311567e-06, + "loss": 0.4463, + "step": 5250 + }, + { + "epoch": 0.3197637243857139, + "grad_norm": 0.9564685110755365, + "learning_rate": 4.966287211263678e-06, + "loss": 0.5054, + "step": 5251 + }, + { + "epoch": 0.3198246201625917, + "grad_norm": 0.9770561361436657, + "learning_rate": 4.966274149703185e-06, + "loss": 0.4654, + "step": 5252 + }, + { + "epoch": 0.3198855159394696, + "grad_norm": 0.9772496025927199, + "learning_rate": 4.966261085630104e-06, + "loss": 0.5085, + "step": 5253 + }, + { + "epoch": 0.31994641171634747, + "grad_norm": 1.0741625834500306, + "learning_rate": 4.966248019044447e-06, + "loss": 0.4822, + "step": 5254 + }, + { + "epoch": 0.32000730749322537, + "grad_norm": 1.068926811779887, + "learning_rate": 4.966234949946228e-06, + "loss": 0.4671, + "step": 5255 + }, + { + "epoch": 0.3200682032701032, + "grad_norm": 0.968541046503007, + "learning_rate": 4.966221878335461e-06, + "loss": 0.4737, + "step": 5256 + }, + { + "epoch": 0.3201290990469811, + "grad_norm": 0.9821241799272363, + "learning_rate": 4.966208804212157e-06, + "loss": 0.4644, + "step": 5257 + }, + { + "epoch": 0.32018999482385896, + "grad_norm": 0.9798028953200881, + "learning_rate": 4.966195727576332e-06, + "loss": 0.4217, + "step": 5258 + }, + { + "epoch": 0.32025089060073686, + "grad_norm": 1.048403243728136, + "learning_rate": 4.966182648427997e-06, + "loss": 0.4725, + "step": 5259 + }, + { + "epoch": 0.3203117863776147, + "grad_norm": 1.0648190038950944, + "learning_rate": 4.966169566767168e-06, + "loss": 0.4249, + "step": 5260 + }, + { + "epoch": 0.3203726821544926, + "grad_norm": 1.0419075697536015, + "learning_rate": 4.966156482593856e-06, + "loss": 0.4533, + "step": 5261 + }, + { + "epoch": 0.32043357793137045, + "grad_norm": 0.9828492802872503, + "learning_rate": 4.966143395908074e-06, + "loss": 0.5005, + "step": 5262 + }, + { + "epoch": 0.32049447370824835, + "grad_norm": 1.1086211802171517, + "learning_rate": 4.966130306709837e-06, + "loss": 0.4951, + "step": 5263 + }, + { + "epoch": 0.3205553694851262, + "grad_norm": 1.061070651711483, + "learning_rate": 4.966117214999157e-06, + "loss": 0.495, + "step": 5264 + }, + { + "epoch": 0.3206162652620041, + "grad_norm": 1.0764906693215783, + "learning_rate": 4.966104120776049e-06, + "loss": 0.4396, + "step": 5265 + }, + { + "epoch": 0.32067716103888194, + "grad_norm": 1.111089952714541, + "learning_rate": 4.9660910240405265e-06, + "loss": 0.4162, + "step": 5266 + }, + { + "epoch": 0.32073805681575984, + "grad_norm": 1.0201253099730319, + "learning_rate": 4.966077924792601e-06, + "loss": 0.5672, + "step": 5267 + }, + { + "epoch": 0.3207989525926377, + "grad_norm": 1.1863800859546674, + "learning_rate": 4.966064823032285e-06, + "loss": 0.4919, + "step": 5268 + }, + { + "epoch": 0.3208598483695156, + "grad_norm": 0.9702455405691068, + "learning_rate": 4.966051718759595e-06, + "loss": 0.5095, + "step": 5269 + }, + { + "epoch": 0.32092074414639343, + "grad_norm": 1.0361345843297132, + "learning_rate": 4.966038611974542e-06, + "loss": 0.4029, + "step": 5270 + }, + { + "epoch": 0.32098163992327133, + "grad_norm": 0.9872437037376248, + "learning_rate": 4.966025502677141e-06, + "loss": 0.4718, + "step": 5271 + }, + { + "epoch": 0.3210425357001492, + "grad_norm": 0.9829421450411355, + "learning_rate": 4.966012390867404e-06, + "loss": 0.4704, + "step": 5272 + }, + { + "epoch": 0.3211034314770271, + "grad_norm": 1.0313116473488897, + "learning_rate": 4.965999276545344e-06, + "loss": 0.5036, + "step": 5273 + }, + { + "epoch": 0.3211643272539049, + "grad_norm": 0.9768598128963751, + "learning_rate": 4.9659861597109764e-06, + "loss": 0.5167, + "step": 5274 + }, + { + "epoch": 0.3212252230307828, + "grad_norm": 0.9619050468890626, + "learning_rate": 4.965973040364313e-06, + "loss": 0.4939, + "step": 5275 + }, + { + "epoch": 0.32128611880766067, + "grad_norm": 0.9890244680008914, + "learning_rate": 4.965959918505368e-06, + "loss": 0.4962, + "step": 5276 + }, + { + "epoch": 0.32134701458453857, + "grad_norm": 0.9743627367802712, + "learning_rate": 4.965946794134153e-06, + "loss": 0.4483, + "step": 5277 + }, + { + "epoch": 0.3214079103614164, + "grad_norm": 1.0020512047248733, + "learning_rate": 4.965933667250683e-06, + "loss": 0.5542, + "step": 5278 + }, + { + "epoch": 0.3214688061382943, + "grad_norm": 1.0179662819624535, + "learning_rate": 4.965920537854973e-06, + "loss": 0.5295, + "step": 5279 + }, + { + "epoch": 0.32152970191517216, + "grad_norm": 1.1371379918934763, + "learning_rate": 4.965907405947033e-06, + "loss": 0.4577, + "step": 5280 + }, + { + "epoch": 0.32159059769205006, + "grad_norm": 1.0756820680722052, + "learning_rate": 4.965894271526877e-06, + "loss": 0.4688, + "step": 5281 + }, + { + "epoch": 0.3216514934689279, + "grad_norm": 1.0193450225007683, + "learning_rate": 4.96588113459452e-06, + "loss": 0.4601, + "step": 5282 + }, + { + "epoch": 0.3217123892458058, + "grad_norm": 0.9924944930726552, + "learning_rate": 4.965867995149974e-06, + "loss": 0.4626, + "step": 5283 + }, + { + "epoch": 0.32177328502268365, + "grad_norm": 0.985337322923826, + "learning_rate": 4.965854853193254e-06, + "loss": 0.426, + "step": 5284 + }, + { + "epoch": 0.32183418079956155, + "grad_norm": 1.0695885892582881, + "learning_rate": 4.965841708724372e-06, + "loss": 0.4779, + "step": 5285 + }, + { + "epoch": 0.3218950765764394, + "grad_norm": 1.0538907059785014, + "learning_rate": 4.965828561743341e-06, + "loss": 0.5583, + "step": 5286 + }, + { + "epoch": 0.3219559723533173, + "grad_norm": 1.1126699506278155, + "learning_rate": 4.965815412250176e-06, + "loss": 0.4254, + "step": 5287 + }, + { + "epoch": 0.32201686813019514, + "grad_norm": 0.9950146090409945, + "learning_rate": 4.965802260244889e-06, + "loss": 0.5558, + "step": 5288 + }, + { + "epoch": 0.32207776390707304, + "grad_norm": 0.9452268100931123, + "learning_rate": 4.965789105727494e-06, + "loss": 0.4676, + "step": 5289 + }, + { + "epoch": 0.32213865968395095, + "grad_norm": 1.0053869812239304, + "learning_rate": 4.965775948698005e-06, + "loss": 0.4563, + "step": 5290 + }, + { + "epoch": 0.3221995554608288, + "grad_norm": 0.9610110891673767, + "learning_rate": 4.965762789156434e-06, + "loss": 0.5339, + "step": 5291 + }, + { + "epoch": 0.3222604512377067, + "grad_norm": 1.0587284397526124, + "learning_rate": 4.965749627102795e-06, + "loss": 0.5871, + "step": 5292 + }, + { + "epoch": 0.32232134701458454, + "grad_norm": 0.972153841318407, + "learning_rate": 4.965736462537102e-06, + "loss": 0.4952, + "step": 5293 + }, + { + "epoch": 0.32238224279146244, + "grad_norm": 1.070224980729414, + "learning_rate": 4.965723295459367e-06, + "loss": 0.4721, + "step": 5294 + }, + { + "epoch": 0.3224431385683403, + "grad_norm": 0.9643074738844868, + "learning_rate": 4.965710125869606e-06, + "loss": 0.4769, + "step": 5295 + }, + { + "epoch": 0.3225040343452182, + "grad_norm": 1.1419679449094908, + "learning_rate": 4.9656969537678295e-06, + "loss": 0.4254, + "step": 5296 + }, + { + "epoch": 0.322564930122096, + "grad_norm": 1.0764453189450254, + "learning_rate": 4.965683779154053e-06, + "loss": 0.4374, + "step": 5297 + }, + { + "epoch": 0.32262582589897393, + "grad_norm": 1.056704701081694, + "learning_rate": 4.965670602028289e-06, + "loss": 0.4288, + "step": 5298 + }, + { + "epoch": 0.3226867216758518, + "grad_norm": 1.0610201679692526, + "learning_rate": 4.9656574223905505e-06, + "loss": 0.4075, + "step": 5299 + }, + { + "epoch": 0.3227476174527297, + "grad_norm": 1.117401846272463, + "learning_rate": 4.965644240240852e-06, + "loss": 0.4105, + "step": 5300 + }, + { + "epoch": 0.3228085132296075, + "grad_norm": 1.0307577306493014, + "learning_rate": 4.965631055579206e-06, + "loss": 0.4711, + "step": 5301 + }, + { + "epoch": 0.3228694090064854, + "grad_norm": 1.0700149978808409, + "learning_rate": 4.965617868405627e-06, + "loss": 0.5088, + "step": 5302 + }, + { + "epoch": 0.32293030478336326, + "grad_norm": 0.9520757970127525, + "learning_rate": 4.965604678720128e-06, + "loss": 0.4448, + "step": 5303 + }, + { + "epoch": 0.32299120056024117, + "grad_norm": 1.0613253632885506, + "learning_rate": 4.96559148652272e-06, + "loss": 0.5065, + "step": 5304 + }, + { + "epoch": 0.323052096337119, + "grad_norm": 1.0565151070456502, + "learning_rate": 4.965578291813421e-06, + "loss": 0.4871, + "step": 5305 + }, + { + "epoch": 0.3231129921139969, + "grad_norm": 1.0640647937929493, + "learning_rate": 4.9655650945922405e-06, + "loss": 0.511, + "step": 5306 + }, + { + "epoch": 0.32317388789087476, + "grad_norm": 1.0819492954087684, + "learning_rate": 4.965551894859195e-06, + "loss": 0.4456, + "step": 5307 + }, + { + "epoch": 0.32323478366775266, + "grad_norm": 1.0198192745648615, + "learning_rate": 4.965538692614296e-06, + "loss": 0.4916, + "step": 5308 + }, + { + "epoch": 0.3232956794446305, + "grad_norm": 1.1016141622231534, + "learning_rate": 4.965525487857557e-06, + "loss": 0.477, + "step": 5309 + }, + { + "epoch": 0.3233565752215084, + "grad_norm": 1.077204908433035, + "learning_rate": 4.965512280588992e-06, + "loss": 0.481, + "step": 5310 + }, + { + "epoch": 0.32341747099838625, + "grad_norm": 1.043149138754214, + "learning_rate": 4.9654990708086144e-06, + "loss": 0.4458, + "step": 5311 + }, + { + "epoch": 0.32347836677526415, + "grad_norm": 0.9855101405555371, + "learning_rate": 4.965485858516438e-06, + "loss": 0.4582, + "step": 5312 + }, + { + "epoch": 0.323539262552142, + "grad_norm": 1.0765677385199608, + "learning_rate": 4.965472643712476e-06, + "loss": 0.4472, + "step": 5313 + }, + { + "epoch": 0.3236001583290199, + "grad_norm": 1.1872396982243296, + "learning_rate": 4.965459426396741e-06, + "loss": 0.3781, + "step": 5314 + }, + { + "epoch": 0.32366105410589774, + "grad_norm": 1.1200319154638796, + "learning_rate": 4.965446206569248e-06, + "loss": 0.4386, + "step": 5315 + }, + { + "epoch": 0.32372194988277564, + "grad_norm": 0.9977326650695858, + "learning_rate": 4.9654329842300086e-06, + "loss": 0.4871, + "step": 5316 + }, + { + "epoch": 0.3237828456596535, + "grad_norm": 1.0099712277387412, + "learning_rate": 4.965419759379038e-06, + "loss": 0.4685, + "step": 5317 + }, + { + "epoch": 0.3238437414365314, + "grad_norm": 1.1268878314238044, + "learning_rate": 4.965406532016349e-06, + "loss": 0.4252, + "step": 5318 + }, + { + "epoch": 0.32390463721340923, + "grad_norm": 0.9835658880865078, + "learning_rate": 4.965393302141955e-06, + "loss": 0.461, + "step": 5319 + }, + { + "epoch": 0.32396553299028713, + "grad_norm": 1.0878997959472292, + "learning_rate": 4.96538006975587e-06, + "loss": 0.4924, + "step": 5320 + }, + { + "epoch": 0.324026428767165, + "grad_norm": 0.9921212242171656, + "learning_rate": 4.965366834858107e-06, + "loss": 0.4598, + "step": 5321 + }, + { + "epoch": 0.3240873245440429, + "grad_norm": 0.9783533620252665, + "learning_rate": 4.9653535974486785e-06, + "loss": 0.4669, + "step": 5322 + }, + { + "epoch": 0.3241482203209207, + "grad_norm": 1.0069718686664204, + "learning_rate": 4.965340357527599e-06, + "loss": 0.4498, + "step": 5323 + }, + { + "epoch": 0.3242091160977986, + "grad_norm": 1.031074544643116, + "learning_rate": 4.965327115094883e-06, + "loss": 0.4873, + "step": 5324 + }, + { + "epoch": 0.32427001187467647, + "grad_norm": 0.9720821887839692, + "learning_rate": 4.965313870150543e-06, + "loss": 0.4784, + "step": 5325 + }, + { + "epoch": 0.32433090765155437, + "grad_norm": 1.080914841018448, + "learning_rate": 4.965300622694592e-06, + "loss": 0.4335, + "step": 5326 + }, + { + "epoch": 0.3243918034284322, + "grad_norm": 1.1547449342331648, + "learning_rate": 4.965287372727044e-06, + "loss": 0.4447, + "step": 5327 + }, + { + "epoch": 0.3244526992053101, + "grad_norm": 0.988456558822998, + "learning_rate": 4.965274120247913e-06, + "loss": 0.5163, + "step": 5328 + }, + { + "epoch": 0.32451359498218796, + "grad_norm": 0.9846730006081421, + "learning_rate": 4.965260865257211e-06, + "loss": 0.4846, + "step": 5329 + }, + { + "epoch": 0.32457449075906586, + "grad_norm": 0.9886267070484621, + "learning_rate": 4.965247607754953e-06, + "loss": 0.5415, + "step": 5330 + }, + { + "epoch": 0.32463538653594376, + "grad_norm": 1.0616080706166082, + "learning_rate": 4.965234347741153e-06, + "loss": 0.4372, + "step": 5331 + }, + { + "epoch": 0.3246962823128216, + "grad_norm": 0.9987927420750937, + "learning_rate": 4.965221085215822e-06, + "loss": 0.4569, + "step": 5332 + }, + { + "epoch": 0.3247571780896995, + "grad_norm": 1.1556869754195997, + "learning_rate": 4.965207820178976e-06, + "loss": 0.4332, + "step": 5333 + }, + { + "epoch": 0.32481807386657735, + "grad_norm": 1.0192941584090898, + "learning_rate": 4.965194552630626e-06, + "loss": 0.5315, + "step": 5334 + }, + { + "epoch": 0.32487896964345525, + "grad_norm": 1.073933369757585, + "learning_rate": 4.965181282570788e-06, + "loss": 0.4076, + "step": 5335 + }, + { + "epoch": 0.3249398654203331, + "grad_norm": 1.0615701894643383, + "learning_rate": 4.965168009999475e-06, + "loss": 0.5207, + "step": 5336 + }, + { + "epoch": 0.325000761197211, + "grad_norm": 0.984261603503034, + "learning_rate": 4.9651547349166995e-06, + "loss": 0.4913, + "step": 5337 + }, + { + "epoch": 0.32506165697408884, + "grad_norm": 1.1139616091061613, + "learning_rate": 4.9651414573224765e-06, + "loss": 0.4661, + "step": 5338 + }, + { + "epoch": 0.32512255275096674, + "grad_norm": 1.094189127583266, + "learning_rate": 4.965128177216818e-06, + "loss": 0.5265, + "step": 5339 + }, + { + "epoch": 0.3251834485278446, + "grad_norm": 1.008719583973579, + "learning_rate": 4.965114894599738e-06, + "loss": 0.4561, + "step": 5340 + }, + { + "epoch": 0.3252443443047225, + "grad_norm": 1.0722981632029192, + "learning_rate": 4.96510160947125e-06, + "loss": 0.4262, + "step": 5341 + }, + { + "epoch": 0.32530524008160033, + "grad_norm": 1.0179378811840754, + "learning_rate": 4.965088321831368e-06, + "loss": 0.4908, + "step": 5342 + }, + { + "epoch": 0.32536613585847823, + "grad_norm": 1.0327603279127926, + "learning_rate": 4.9650750316801055e-06, + "loss": 0.4315, + "step": 5343 + }, + { + "epoch": 0.3254270316353561, + "grad_norm": 1.0780054161332941, + "learning_rate": 4.965061739017476e-06, + "loss": 0.5261, + "step": 5344 + }, + { + "epoch": 0.325487927412234, + "grad_norm": 0.9890190782699287, + "learning_rate": 4.965048443843492e-06, + "loss": 0.5124, + "step": 5345 + }, + { + "epoch": 0.3255488231891118, + "grad_norm": 1.101394270623113, + "learning_rate": 4.965035146158168e-06, + "loss": 0.4496, + "step": 5346 + }, + { + "epoch": 0.3256097189659897, + "grad_norm": 1.0851466227412605, + "learning_rate": 4.965021845961518e-06, + "loss": 0.4856, + "step": 5347 + }, + { + "epoch": 0.32567061474286757, + "grad_norm": 1.1513990287261306, + "learning_rate": 4.965008543253555e-06, + "loss": 0.4532, + "step": 5348 + }, + { + "epoch": 0.32573151051974547, + "grad_norm": 1.0386476694136604, + "learning_rate": 4.964995238034293e-06, + "loss": 0.5142, + "step": 5349 + }, + { + "epoch": 0.3257924062966233, + "grad_norm": 0.950925600667484, + "learning_rate": 4.9649819303037445e-06, + "loss": 0.4809, + "step": 5350 + }, + { + "epoch": 0.3258533020735012, + "grad_norm": 1.0129824416937943, + "learning_rate": 4.964968620061923e-06, + "loss": 0.4814, + "step": 5351 + }, + { + "epoch": 0.32591419785037906, + "grad_norm": 0.9279766887430841, + "learning_rate": 4.964955307308844e-06, + "loss": 0.4539, + "step": 5352 + }, + { + "epoch": 0.32597509362725696, + "grad_norm": 1.1061614525701533, + "learning_rate": 4.964941992044519e-06, + "loss": 0.4349, + "step": 5353 + }, + { + "epoch": 0.3260359894041348, + "grad_norm": 1.0897452988626748, + "learning_rate": 4.964928674268963e-06, + "loss": 0.4311, + "step": 5354 + }, + { + "epoch": 0.3260968851810127, + "grad_norm": 1.0547096849660595, + "learning_rate": 4.964915353982188e-06, + "loss": 0.4275, + "step": 5355 + }, + { + "epoch": 0.32615778095789055, + "grad_norm": 1.0057016679472364, + "learning_rate": 4.964902031184209e-06, + "loss": 0.4835, + "step": 5356 + }, + { + "epoch": 0.32621867673476845, + "grad_norm": 1.0545616558165871, + "learning_rate": 4.964888705875039e-06, + "loss": 0.5273, + "step": 5357 + }, + { + "epoch": 0.3262795725116463, + "grad_norm": 1.0155965948594166, + "learning_rate": 4.964875378054691e-06, + "loss": 0.4924, + "step": 5358 + }, + { + "epoch": 0.3263404682885242, + "grad_norm": 1.040208702741414, + "learning_rate": 4.96486204772318e-06, + "loss": 0.4501, + "step": 5359 + }, + { + "epoch": 0.32640136406540204, + "grad_norm": 1.021457205364964, + "learning_rate": 4.964848714880519e-06, + "loss": 0.5381, + "step": 5360 + }, + { + "epoch": 0.32646225984227994, + "grad_norm": 1.0641789508140373, + "learning_rate": 4.964835379526721e-06, + "loss": 0.4833, + "step": 5361 + }, + { + "epoch": 0.3265231556191578, + "grad_norm": 1.0576304485658274, + "learning_rate": 4.9648220416618e-06, + "loss": 0.4407, + "step": 5362 + }, + { + "epoch": 0.3265840513960357, + "grad_norm": 1.0751180401059997, + "learning_rate": 4.964808701285769e-06, + "loss": 0.4613, + "step": 5363 + }, + { + "epoch": 0.32664494717291354, + "grad_norm": 1.0772977097087046, + "learning_rate": 4.964795358398643e-06, + "loss": 0.5177, + "step": 5364 + }, + { + "epoch": 0.32670584294979144, + "grad_norm": 1.0782283784960045, + "learning_rate": 4.964782013000434e-06, + "loss": 0.436, + "step": 5365 + }, + { + "epoch": 0.3267667387266693, + "grad_norm": 0.9558280746938584, + "learning_rate": 4.9647686650911564e-06, + "loss": 0.5557, + "step": 5366 + }, + { + "epoch": 0.3268276345035472, + "grad_norm": 1.1076045154840382, + "learning_rate": 4.9647553146708245e-06, + "loss": 0.4184, + "step": 5367 + }, + { + "epoch": 0.326888530280425, + "grad_norm": 1.1327151340657735, + "learning_rate": 4.964741961739451e-06, + "loss": 0.4176, + "step": 5368 + }, + { + "epoch": 0.3269494260573029, + "grad_norm": 0.8830045005616807, + "learning_rate": 4.964728606297049e-06, + "loss": 0.4958, + "step": 5369 + }, + { + "epoch": 0.3270103218341808, + "grad_norm": 0.9075753978306286, + "learning_rate": 4.964715248343633e-06, + "loss": 0.4596, + "step": 5370 + }, + { + "epoch": 0.3270712176110587, + "grad_norm": 0.9818494618398115, + "learning_rate": 4.964701887879217e-06, + "loss": 0.4984, + "step": 5371 + }, + { + "epoch": 0.3271321133879366, + "grad_norm": 1.0503757624137744, + "learning_rate": 4.9646885249038125e-06, + "loss": 0.4976, + "step": 5372 + }, + { + "epoch": 0.3271930091648144, + "grad_norm": 0.9731003496914412, + "learning_rate": 4.964675159417435e-06, + "loss": 0.4822, + "step": 5373 + }, + { + "epoch": 0.3272539049416923, + "grad_norm": 1.1027689165334487, + "learning_rate": 4.964661791420099e-06, + "loss": 0.4568, + "step": 5374 + }, + { + "epoch": 0.32731480071857016, + "grad_norm": 1.0858673668462981, + "learning_rate": 4.9646484209118155e-06, + "loss": 0.4582, + "step": 5375 + }, + { + "epoch": 0.32737569649544807, + "grad_norm": 1.028278038283817, + "learning_rate": 4.9646350478925996e-06, + "loss": 0.5011, + "step": 5376 + }, + { + "epoch": 0.3274365922723259, + "grad_norm": 1.088005083893556, + "learning_rate": 4.9646216723624654e-06, + "loss": 0.4059, + "step": 5377 + }, + { + "epoch": 0.3274974880492038, + "grad_norm": 0.947120316717683, + "learning_rate": 4.964608294321425e-06, + "loss": 0.5386, + "step": 5378 + }, + { + "epoch": 0.32755838382608166, + "grad_norm": 0.9470164285235089, + "learning_rate": 4.964594913769493e-06, + "loss": 0.47, + "step": 5379 + }, + { + "epoch": 0.32761927960295956, + "grad_norm": 1.1868135286086066, + "learning_rate": 4.964581530706683e-06, + "loss": 0.4866, + "step": 5380 + }, + { + "epoch": 0.3276801753798374, + "grad_norm": 1.001401552103552, + "learning_rate": 4.964568145133009e-06, + "loss": 0.4338, + "step": 5381 + }, + { + "epoch": 0.3277410711567153, + "grad_norm": 1.0315774635152515, + "learning_rate": 4.964554757048485e-06, + "loss": 0.4845, + "step": 5382 + }, + { + "epoch": 0.32780196693359315, + "grad_norm": 1.0442480601581638, + "learning_rate": 4.964541366453123e-06, + "loss": 0.447, + "step": 5383 + }, + { + "epoch": 0.32786286271047105, + "grad_norm": 1.1397749467727523, + "learning_rate": 4.964527973346937e-06, + "loss": 0.4735, + "step": 5384 + }, + { + "epoch": 0.3279237584873489, + "grad_norm": 1.0626966001195395, + "learning_rate": 4.964514577729942e-06, + "loss": 0.489, + "step": 5385 + }, + { + "epoch": 0.3279846542642268, + "grad_norm": 1.0722669429586085, + "learning_rate": 4.9645011796021504e-06, + "loss": 0.4748, + "step": 5386 + }, + { + "epoch": 0.32804555004110464, + "grad_norm": 0.9462916754987175, + "learning_rate": 4.964487778963576e-06, + "loss": 0.5013, + "step": 5387 + }, + { + "epoch": 0.32810644581798254, + "grad_norm": 0.9956001805486306, + "learning_rate": 4.964474375814233e-06, + "loss": 0.4569, + "step": 5388 + }, + { + "epoch": 0.3281673415948604, + "grad_norm": 1.137134913352318, + "learning_rate": 4.964460970154135e-06, + "loss": 0.536, + "step": 5389 + }, + { + "epoch": 0.3282282373717383, + "grad_norm": 0.9837019148836657, + "learning_rate": 4.964447561983295e-06, + "loss": 0.4917, + "step": 5390 + }, + { + "epoch": 0.32828913314861613, + "grad_norm": 1.0730390825775635, + "learning_rate": 4.964434151301727e-06, + "loss": 0.455, + "step": 5391 + }, + { + "epoch": 0.32835002892549403, + "grad_norm": 0.9270143503860433, + "learning_rate": 4.964420738109444e-06, + "loss": 0.5408, + "step": 5392 + }, + { + "epoch": 0.3284109247023719, + "grad_norm": 1.1396571593796767, + "learning_rate": 4.964407322406462e-06, + "loss": 0.4307, + "step": 5393 + }, + { + "epoch": 0.3284718204792498, + "grad_norm": 1.0743446899044908, + "learning_rate": 4.964393904192792e-06, + "loss": 0.3741, + "step": 5394 + }, + { + "epoch": 0.3285327162561276, + "grad_norm": 1.1047715492085728, + "learning_rate": 4.964380483468449e-06, + "loss": 0.454, + "step": 5395 + }, + { + "epoch": 0.3285936120330055, + "grad_norm": 0.9806631932585276, + "learning_rate": 4.964367060233446e-06, + "loss": 0.5139, + "step": 5396 + }, + { + "epoch": 0.32865450780988337, + "grad_norm": 0.9338535706512983, + "learning_rate": 4.964353634487797e-06, + "loss": 0.5112, + "step": 5397 + }, + { + "epoch": 0.32871540358676127, + "grad_norm": 1.123641408760829, + "learning_rate": 4.964340206231517e-06, + "loss": 0.4356, + "step": 5398 + }, + { + "epoch": 0.3287762993636391, + "grad_norm": 0.9743659064129739, + "learning_rate": 4.964326775464617e-06, + "loss": 0.4592, + "step": 5399 + }, + { + "epoch": 0.328837195140517, + "grad_norm": 1.0349769667503785, + "learning_rate": 4.964313342187113e-06, + "loss": 0.5403, + "step": 5400 + }, + { + "epoch": 0.32889809091739486, + "grad_norm": 1.1339998390696693, + "learning_rate": 4.964299906399018e-06, + "loss": 0.5128, + "step": 5401 + }, + { + "epoch": 0.32895898669427276, + "grad_norm": 1.1184052542292011, + "learning_rate": 4.964286468100345e-06, + "loss": 0.4683, + "step": 5402 + }, + { + "epoch": 0.3290198824711506, + "grad_norm": 0.9714638785079127, + "learning_rate": 4.964273027291108e-06, + "loss": 0.5015, + "step": 5403 + }, + { + "epoch": 0.3290807782480285, + "grad_norm": 1.0568907775863363, + "learning_rate": 4.964259583971321e-06, + "loss": 0.483, + "step": 5404 + }, + { + "epoch": 0.32914167402490635, + "grad_norm": 1.117095373015537, + "learning_rate": 4.964246138140998e-06, + "loss": 0.4285, + "step": 5405 + }, + { + "epoch": 0.32920256980178425, + "grad_norm": 0.9697678735348156, + "learning_rate": 4.9642326898001515e-06, + "loss": 0.5114, + "step": 5406 + }, + { + "epoch": 0.3292634655786621, + "grad_norm": 0.9712316104822428, + "learning_rate": 4.964219238948797e-06, + "loss": 0.4441, + "step": 5407 + }, + { + "epoch": 0.32932436135554, + "grad_norm": 1.0388912220808995, + "learning_rate": 4.964205785586946e-06, + "loss": 0.4579, + "step": 5408 + }, + { + "epoch": 0.32938525713241784, + "grad_norm": 1.0309979453948228, + "learning_rate": 4.964192329714614e-06, + "loss": 0.4698, + "step": 5409 + }, + { + "epoch": 0.32944615290929574, + "grad_norm": 0.9597145294836522, + "learning_rate": 4.964178871331815e-06, + "loss": 0.4822, + "step": 5410 + }, + { + "epoch": 0.3295070486861736, + "grad_norm": 0.9624164536638524, + "learning_rate": 4.964165410438561e-06, + "loss": 0.501, + "step": 5411 + }, + { + "epoch": 0.3295679444630515, + "grad_norm": 1.0763367020611825, + "learning_rate": 4.964151947034866e-06, + "loss": 0.4181, + "step": 5412 + }, + { + "epoch": 0.3296288402399294, + "grad_norm": 1.0062063974384863, + "learning_rate": 4.964138481120744e-06, + "loss": 0.4763, + "step": 5413 + }, + { + "epoch": 0.32968973601680723, + "grad_norm": 1.1178471707010247, + "learning_rate": 4.9641250126962096e-06, + "loss": 0.3912, + "step": 5414 + }, + { + "epoch": 0.32975063179368513, + "grad_norm": 1.033427396122199, + "learning_rate": 4.964111541761276e-06, + "loss": 0.4271, + "step": 5415 + }, + { + "epoch": 0.329811527570563, + "grad_norm": 0.9866621834527847, + "learning_rate": 4.964098068315957e-06, + "loss": 0.4995, + "step": 5416 + }, + { + "epoch": 0.3298724233474409, + "grad_norm": 1.0528661614638362, + "learning_rate": 4.964084592360266e-06, + "loss": 0.4794, + "step": 5417 + }, + { + "epoch": 0.3299333191243187, + "grad_norm": 1.0273846240964828, + "learning_rate": 4.964071113894216e-06, + "loss": 0.4941, + "step": 5418 + }, + { + "epoch": 0.3299942149011966, + "grad_norm": 1.0541288170644354, + "learning_rate": 4.964057632917822e-06, + "loss": 0.4453, + "step": 5419 + }, + { + "epoch": 0.33005511067807447, + "grad_norm": 1.0649541000167286, + "learning_rate": 4.964044149431098e-06, + "loss": 0.4539, + "step": 5420 + }, + { + "epoch": 0.33011600645495237, + "grad_norm": 1.0364344678781128, + "learning_rate": 4.964030663434056e-06, + "loss": 0.4992, + "step": 5421 + }, + { + "epoch": 0.3301769022318302, + "grad_norm": 0.9806860632941039, + "learning_rate": 4.964017174926712e-06, + "loss": 0.4538, + "step": 5422 + }, + { + "epoch": 0.3302377980087081, + "grad_norm": 1.0229303168272106, + "learning_rate": 4.964003683909077e-06, + "loss": 0.4968, + "step": 5423 + }, + { + "epoch": 0.33029869378558596, + "grad_norm": 1.0792256940812617, + "learning_rate": 4.963990190381167e-06, + "loss": 0.4384, + "step": 5424 + }, + { + "epoch": 0.33035958956246386, + "grad_norm": 1.042252819269745, + "learning_rate": 4.963976694342996e-06, + "loss": 0.3868, + "step": 5425 + }, + { + "epoch": 0.3304204853393417, + "grad_norm": 1.0559778282782248, + "learning_rate": 4.963963195794575e-06, + "loss": 0.4702, + "step": 5426 + }, + { + "epoch": 0.3304813811162196, + "grad_norm": 1.0870539336646599, + "learning_rate": 4.963949694735921e-06, + "loss": 0.51, + "step": 5427 + }, + { + "epoch": 0.33054227689309745, + "grad_norm": 1.0072136530574587, + "learning_rate": 4.963936191167046e-06, + "loss": 0.4827, + "step": 5428 + }, + { + "epoch": 0.33060317266997535, + "grad_norm": 1.1170090963847326, + "learning_rate": 4.963922685087963e-06, + "loss": 0.4239, + "step": 5429 + }, + { + "epoch": 0.3306640684468532, + "grad_norm": 1.0481183750689573, + "learning_rate": 4.963909176498688e-06, + "loss": 0.4437, + "step": 5430 + }, + { + "epoch": 0.3307249642237311, + "grad_norm": 0.8880822790510647, + "learning_rate": 4.963895665399233e-06, + "loss": 0.5046, + "step": 5431 + }, + { + "epoch": 0.33078586000060894, + "grad_norm": 1.0035844730962415, + "learning_rate": 4.963882151789612e-06, + "loss": 0.4857, + "step": 5432 + }, + { + "epoch": 0.33084675577748685, + "grad_norm": 1.0290637350324672, + "learning_rate": 4.9638686356698394e-06, + "loss": 0.4359, + "step": 5433 + }, + { + "epoch": 0.3309076515543647, + "grad_norm": 1.0819670890670623, + "learning_rate": 4.963855117039929e-06, + "loss": 0.4514, + "step": 5434 + }, + { + "epoch": 0.3309685473312426, + "grad_norm": 1.080207468945585, + "learning_rate": 4.963841595899895e-06, + "loss": 0.4598, + "step": 5435 + }, + { + "epoch": 0.33102944310812044, + "grad_norm": 1.0308081078963887, + "learning_rate": 4.9638280722497485e-06, + "loss": 0.5477, + "step": 5436 + }, + { + "epoch": 0.33109033888499834, + "grad_norm": 0.9613796086822501, + "learning_rate": 4.963814546089506e-06, + "loss": 0.5367, + "step": 5437 + }, + { + "epoch": 0.3311512346618762, + "grad_norm": 1.0010787298460404, + "learning_rate": 4.963801017419181e-06, + "loss": 0.5394, + "step": 5438 + }, + { + "epoch": 0.3312121304387541, + "grad_norm": 1.0956372680802082, + "learning_rate": 4.963787486238786e-06, + "loss": 0.5289, + "step": 5439 + }, + { + "epoch": 0.3312730262156319, + "grad_norm": 0.992601990764145, + "learning_rate": 4.9637739525483354e-06, + "loss": 0.4492, + "step": 5440 + }, + { + "epoch": 0.33133392199250983, + "grad_norm": 1.1226320864084574, + "learning_rate": 4.963760416347844e-06, + "loss": 0.4303, + "step": 5441 + }, + { + "epoch": 0.3313948177693877, + "grad_norm": 1.0782019358011363, + "learning_rate": 4.963746877637325e-06, + "loss": 0.492, + "step": 5442 + }, + { + "epoch": 0.3314557135462656, + "grad_norm": 0.9484354428199512, + "learning_rate": 4.96373333641679e-06, + "loss": 0.4472, + "step": 5443 + }, + { + "epoch": 0.3315166093231434, + "grad_norm": 1.066198859636273, + "learning_rate": 4.963719792686255e-06, + "loss": 0.5247, + "step": 5444 + }, + { + "epoch": 0.3315775051000213, + "grad_norm": 1.0880514686525211, + "learning_rate": 4.9637062464457354e-06, + "loss": 0.4862, + "step": 5445 + }, + { + "epoch": 0.33163840087689916, + "grad_norm": 0.911019691347691, + "learning_rate": 4.963692697695242e-06, + "loss": 0.5239, + "step": 5446 + }, + { + "epoch": 0.33169929665377706, + "grad_norm": 1.0820910881678925, + "learning_rate": 4.96367914643479e-06, + "loss": 0.4286, + "step": 5447 + }, + { + "epoch": 0.3317601924306549, + "grad_norm": 1.1776323043445802, + "learning_rate": 4.9636655926643924e-06, + "loss": 0.4, + "step": 5448 + }, + { + "epoch": 0.3318210882075328, + "grad_norm": 1.1294972148065272, + "learning_rate": 4.963652036384063e-06, + "loss": 0.45, + "step": 5449 + }, + { + "epoch": 0.33188198398441066, + "grad_norm": 0.9890870749237546, + "learning_rate": 4.9636384775938175e-06, + "loss": 0.4509, + "step": 5450 + }, + { + "epoch": 0.33194287976128856, + "grad_norm": 0.994416354369958, + "learning_rate": 4.9636249162936676e-06, + "loss": 0.53, + "step": 5451 + }, + { + "epoch": 0.3320037755381664, + "grad_norm": 1.068848024002241, + "learning_rate": 4.963611352483629e-06, + "loss": 0.3888, + "step": 5452 + }, + { + "epoch": 0.3320646713150443, + "grad_norm": 1.023899277228583, + "learning_rate": 4.9635977861637124e-06, + "loss": 0.4777, + "step": 5453 + }, + { + "epoch": 0.3321255670919222, + "grad_norm": 1.1046941304248536, + "learning_rate": 4.963584217333934e-06, + "loss": 0.5565, + "step": 5454 + }, + { + "epoch": 0.33218646286880005, + "grad_norm": 1.08951279230911, + "learning_rate": 4.963570645994309e-06, + "loss": 0.581, + "step": 5455 + }, + { + "epoch": 0.33224735864567795, + "grad_norm": 1.0719677464544386, + "learning_rate": 4.963557072144848e-06, + "loss": 0.41, + "step": 5456 + }, + { + "epoch": 0.3323082544225558, + "grad_norm": 1.094704639846148, + "learning_rate": 4.963543495785566e-06, + "loss": 0.4453, + "step": 5457 + }, + { + "epoch": 0.3323691501994337, + "grad_norm": 1.0042156682781147, + "learning_rate": 4.9635299169164775e-06, + "loss": 0.436, + "step": 5458 + }, + { + "epoch": 0.33243004597631154, + "grad_norm": 1.026486183088245, + "learning_rate": 4.963516335537596e-06, + "loss": 0.415, + "step": 5459 + }, + { + "epoch": 0.33249094175318944, + "grad_norm": 1.046028918069639, + "learning_rate": 4.963502751648935e-06, + "loss": 0.4124, + "step": 5460 + }, + { + "epoch": 0.3325518375300673, + "grad_norm": 1.021622403480536, + "learning_rate": 4.9634891652505095e-06, + "loss": 0.4716, + "step": 5461 + }, + { + "epoch": 0.3326127333069452, + "grad_norm": 0.967656841791181, + "learning_rate": 4.963475576342332e-06, + "loss": 0.4586, + "step": 5462 + }, + { + "epoch": 0.33267362908382303, + "grad_norm": 1.1391238058271225, + "learning_rate": 4.963461984924417e-06, + "loss": 0.4351, + "step": 5463 + }, + { + "epoch": 0.33273452486070093, + "grad_norm": 1.0398187860315462, + "learning_rate": 4.9634483909967775e-06, + "loss": 0.4196, + "step": 5464 + }, + { + "epoch": 0.3327954206375788, + "grad_norm": 0.98932566230204, + "learning_rate": 4.963434794559428e-06, + "loss": 0.4763, + "step": 5465 + }, + { + "epoch": 0.3328563164144567, + "grad_norm": 1.0757058210340913, + "learning_rate": 4.963421195612383e-06, + "loss": 0.4943, + "step": 5466 + }, + { + "epoch": 0.3329172121913345, + "grad_norm": 1.057375107313085, + "learning_rate": 4.963407594155655e-06, + "loss": 0.4846, + "step": 5467 + }, + { + "epoch": 0.3329781079682124, + "grad_norm": 0.9857885324239551, + "learning_rate": 4.9633939901892596e-06, + "loss": 0.5008, + "step": 5468 + }, + { + "epoch": 0.33303900374509027, + "grad_norm": 1.0999194030465618, + "learning_rate": 4.963380383713209e-06, + "loss": 0.4573, + "step": 5469 + }, + { + "epoch": 0.33309989952196817, + "grad_norm": 0.9870979836576529, + "learning_rate": 4.963366774727517e-06, + "loss": 0.5592, + "step": 5470 + }, + { + "epoch": 0.333160795298846, + "grad_norm": 1.0810344232376545, + "learning_rate": 4.963353163232199e-06, + "loss": 0.4939, + "step": 5471 + }, + { + "epoch": 0.3332216910757239, + "grad_norm": 1.0433660697769396, + "learning_rate": 4.963339549227268e-06, + "loss": 0.5012, + "step": 5472 + }, + { + "epoch": 0.33328258685260176, + "grad_norm": 1.0559575031849227, + "learning_rate": 4.963325932712738e-06, + "loss": 0.4193, + "step": 5473 + }, + { + "epoch": 0.33334348262947966, + "grad_norm": 0.9953519095696776, + "learning_rate": 4.963312313688622e-06, + "loss": 0.4959, + "step": 5474 + }, + { + "epoch": 0.3334043784063575, + "grad_norm": 1.0241206205944402, + "learning_rate": 4.963298692154935e-06, + "loss": 0.5166, + "step": 5475 + }, + { + "epoch": 0.3334652741832354, + "grad_norm": 1.0239230951437497, + "learning_rate": 4.963285068111691e-06, + "loss": 0.4584, + "step": 5476 + }, + { + "epoch": 0.33352616996011325, + "grad_norm": 1.1678809764886993, + "learning_rate": 4.9632714415589024e-06, + "loss": 0.4169, + "step": 5477 + }, + { + "epoch": 0.33358706573699115, + "grad_norm": 1.0385473588197178, + "learning_rate": 4.963257812496584e-06, + "loss": 0.4141, + "step": 5478 + }, + { + "epoch": 0.333647961513869, + "grad_norm": 1.0220960096496718, + "learning_rate": 4.9632441809247515e-06, + "loss": 0.5284, + "step": 5479 + }, + { + "epoch": 0.3337088572907469, + "grad_norm": 1.0432802573727098, + "learning_rate": 4.963230546843416e-06, + "loss": 0.4795, + "step": 5480 + }, + { + "epoch": 0.33376975306762474, + "grad_norm": 1.0830698283878704, + "learning_rate": 4.963216910252592e-06, + "loss": 0.4329, + "step": 5481 + }, + { + "epoch": 0.33383064884450264, + "grad_norm": 1.0762261321415172, + "learning_rate": 4.963203271152294e-06, + "loss": 0.4501, + "step": 5482 + }, + { + "epoch": 0.3338915446213805, + "grad_norm": 1.1168518251486845, + "learning_rate": 4.963189629542536e-06, + "loss": 0.5113, + "step": 5483 + }, + { + "epoch": 0.3339524403982584, + "grad_norm": 1.0288510755370004, + "learning_rate": 4.963175985423332e-06, + "loss": 0.4346, + "step": 5484 + }, + { + "epoch": 0.33401333617513623, + "grad_norm": 1.0437154257323538, + "learning_rate": 4.9631623387946945e-06, + "loss": 0.4529, + "step": 5485 + }, + { + "epoch": 0.33407423195201413, + "grad_norm": 1.0223367476402339, + "learning_rate": 4.963148689656639e-06, + "loss": 0.5127, + "step": 5486 + }, + { + "epoch": 0.334135127728892, + "grad_norm": 1.079032480092341, + "learning_rate": 4.963135038009179e-06, + "loss": 0.5195, + "step": 5487 + }, + { + "epoch": 0.3341960235057699, + "grad_norm": 0.9846920424435558, + "learning_rate": 4.963121383852327e-06, + "loss": 0.5396, + "step": 5488 + }, + { + "epoch": 0.3342569192826477, + "grad_norm": 0.9694738074810227, + "learning_rate": 4.9631077271861e-06, + "loss": 0.5246, + "step": 5489 + }, + { + "epoch": 0.3343178150595256, + "grad_norm": 1.0507225517804835, + "learning_rate": 4.963094068010509e-06, + "loss": 0.401, + "step": 5490 + }, + { + "epoch": 0.33437871083640347, + "grad_norm": 1.002495150312498, + "learning_rate": 4.963080406325569e-06, + "loss": 0.4704, + "step": 5491 + }, + { + "epoch": 0.33443960661328137, + "grad_norm": 0.9606246703400887, + "learning_rate": 4.963066742131294e-06, + "loss": 0.4853, + "step": 5492 + }, + { + "epoch": 0.3345005023901592, + "grad_norm": 1.0825687954548766, + "learning_rate": 4.963053075427698e-06, + "loss": 0.451, + "step": 5493 + }, + { + "epoch": 0.3345613981670371, + "grad_norm": 1.0962474530407518, + "learning_rate": 4.963039406214795e-06, + "loss": 0.4985, + "step": 5494 + }, + { + "epoch": 0.334622293943915, + "grad_norm": 0.9938268046395152, + "learning_rate": 4.963025734492598e-06, + "loss": 0.4989, + "step": 5495 + }, + { + "epoch": 0.33468318972079286, + "grad_norm": 0.9238709635140457, + "learning_rate": 4.963012060261122e-06, + "loss": 0.4748, + "step": 5496 + }, + { + "epoch": 0.33474408549767076, + "grad_norm": 0.9664590296366121, + "learning_rate": 4.96299838352038e-06, + "loss": 0.5354, + "step": 5497 + }, + { + "epoch": 0.3348049812745486, + "grad_norm": 1.0527483460761131, + "learning_rate": 4.9629847042703875e-06, + "loss": 0.5154, + "step": 5498 + }, + { + "epoch": 0.3348658770514265, + "grad_norm": 1.0692439011714907, + "learning_rate": 4.962971022511156e-06, + "loss": 0.4907, + "step": 5499 + }, + { + "epoch": 0.33492677282830435, + "grad_norm": 1.0589933375752845, + "learning_rate": 4.962957338242702e-06, + "loss": 0.4557, + "step": 5500 + }, + { + "epoch": 0.33498766860518225, + "grad_norm": 1.027313891252013, + "learning_rate": 4.962943651465038e-06, + "loss": 0.506, + "step": 5501 + }, + { + "epoch": 0.3350485643820601, + "grad_norm": 1.0271901038231992, + "learning_rate": 4.962929962178178e-06, + "loss": 0.494, + "step": 5502 + }, + { + "epoch": 0.335109460158938, + "grad_norm": 0.959753186141344, + "learning_rate": 4.962916270382135e-06, + "loss": 0.449, + "step": 5503 + }, + { + "epoch": 0.33517035593581584, + "grad_norm": 0.9914862276363401, + "learning_rate": 4.962902576076926e-06, + "loss": 0.4826, + "step": 5504 + }, + { + "epoch": 0.33523125171269375, + "grad_norm": 1.0831841027974176, + "learning_rate": 4.962888879262562e-06, + "loss": 0.4667, + "step": 5505 + }, + { + "epoch": 0.3352921474895716, + "grad_norm": 1.0001744999974964, + "learning_rate": 4.962875179939059e-06, + "loss": 0.5162, + "step": 5506 + }, + { + "epoch": 0.3353530432664495, + "grad_norm": 1.0142209829484197, + "learning_rate": 4.962861478106429e-06, + "loss": 0.4651, + "step": 5507 + }, + { + "epoch": 0.33541393904332734, + "grad_norm": 1.1116097413931931, + "learning_rate": 4.9628477737646875e-06, + "loss": 0.4644, + "step": 5508 + }, + { + "epoch": 0.33547483482020524, + "grad_norm": 1.1061175440939948, + "learning_rate": 4.962834066913848e-06, + "loss": 0.3786, + "step": 5509 + }, + { + "epoch": 0.3355357305970831, + "grad_norm": 1.02906196609887, + "learning_rate": 4.962820357553923e-06, + "loss": 0.4542, + "step": 5510 + }, + { + "epoch": 0.335596626373961, + "grad_norm": 1.0422831138291224, + "learning_rate": 4.962806645684929e-06, + "loss": 0.4747, + "step": 5511 + }, + { + "epoch": 0.3356575221508388, + "grad_norm": 0.9681455111410203, + "learning_rate": 4.962792931306879e-06, + "loss": 0.4611, + "step": 5512 + }, + { + "epoch": 0.33571841792771673, + "grad_norm": 1.0970848977689245, + "learning_rate": 4.9627792144197865e-06, + "loss": 0.4016, + "step": 5513 + }, + { + "epoch": 0.3357793137045946, + "grad_norm": 1.0270202703251063, + "learning_rate": 4.962765495023666e-06, + "loss": 0.5064, + "step": 5514 + }, + { + "epoch": 0.3358402094814725, + "grad_norm": 1.0254995249268837, + "learning_rate": 4.96275177311853e-06, + "loss": 0.4702, + "step": 5515 + }, + { + "epoch": 0.3359011052583503, + "grad_norm": 1.0180901319268902, + "learning_rate": 4.962738048704395e-06, + "loss": 0.4932, + "step": 5516 + }, + { + "epoch": 0.3359620010352282, + "grad_norm": 1.1175326342045697, + "learning_rate": 4.962724321781274e-06, + "loss": 0.5194, + "step": 5517 + }, + { + "epoch": 0.33602289681210606, + "grad_norm": 1.0450951741461172, + "learning_rate": 4.962710592349179e-06, + "loss": 0.5675, + "step": 5518 + }, + { + "epoch": 0.33608379258898397, + "grad_norm": 1.0221441558580742, + "learning_rate": 4.9626968604081276e-06, + "loss": 0.4613, + "step": 5519 + }, + { + "epoch": 0.3361446883658618, + "grad_norm": 0.9942915142332897, + "learning_rate": 4.96268312595813e-06, + "loss": 0.4139, + "step": 5520 + }, + { + "epoch": 0.3362055841427397, + "grad_norm": 1.1729353931758837, + "learning_rate": 4.962669388999203e-06, + "loss": 0.4896, + "step": 5521 + }, + { + "epoch": 0.33626647991961756, + "grad_norm": 1.0883732962307489, + "learning_rate": 4.962655649531359e-06, + "loss": 0.4794, + "step": 5522 + }, + { + "epoch": 0.33632737569649546, + "grad_norm": 1.0254852366577396, + "learning_rate": 4.962641907554614e-06, + "loss": 0.45, + "step": 5523 + }, + { + "epoch": 0.3363882714733733, + "grad_norm": 1.0045212636591576, + "learning_rate": 4.9626281630689785e-06, + "loss": 0.4477, + "step": 5524 + }, + { + "epoch": 0.3364491672502512, + "grad_norm": 0.9887324057258692, + "learning_rate": 4.962614416074471e-06, + "loss": 0.4243, + "step": 5525 + }, + { + "epoch": 0.33651006302712905, + "grad_norm": 0.9768461626998454, + "learning_rate": 4.962600666571101e-06, + "loss": 0.5187, + "step": 5526 + }, + { + "epoch": 0.33657095880400695, + "grad_norm": 1.0554313198084948, + "learning_rate": 4.962586914558885e-06, + "loss": 0.4331, + "step": 5527 + }, + { + "epoch": 0.3366318545808848, + "grad_norm": 1.0673360824592304, + "learning_rate": 4.9625731600378376e-06, + "loss": 0.4843, + "step": 5528 + }, + { + "epoch": 0.3366927503577627, + "grad_norm": 1.0521973535555944, + "learning_rate": 4.962559403007972e-06, + "loss": 0.4557, + "step": 5529 + }, + { + "epoch": 0.33675364613464054, + "grad_norm": 1.0934680922278455, + "learning_rate": 4.962545643469302e-06, + "loss": 0.4363, + "step": 5530 + }, + { + "epoch": 0.33681454191151844, + "grad_norm": 1.0085175347878144, + "learning_rate": 4.962531881421841e-06, + "loss": 0.5079, + "step": 5531 + }, + { + "epoch": 0.3368754376883963, + "grad_norm": 1.0404968686041185, + "learning_rate": 4.962518116865604e-06, + "loss": 0.4483, + "step": 5532 + }, + { + "epoch": 0.3369363334652742, + "grad_norm": 1.0310382237979936, + "learning_rate": 4.9625043498006045e-06, + "loss": 0.4745, + "step": 5533 + }, + { + "epoch": 0.33699722924215203, + "grad_norm": 1.0413296318529957, + "learning_rate": 4.962490580226857e-06, + "loss": 0.4773, + "step": 5534 + }, + { + "epoch": 0.33705812501902993, + "grad_norm": 1.014905820907169, + "learning_rate": 4.962476808144375e-06, + "loss": 0.5304, + "step": 5535 + }, + { + "epoch": 0.33711902079590783, + "grad_norm": 1.1095458098426498, + "learning_rate": 4.962463033553173e-06, + "loss": 0.5041, + "step": 5536 + }, + { + "epoch": 0.3371799165727857, + "grad_norm": 1.084200056462048, + "learning_rate": 4.962449256453265e-06, + "loss": 0.4252, + "step": 5537 + }, + { + "epoch": 0.3372408123496636, + "grad_norm": 1.0963605753865362, + "learning_rate": 4.962435476844665e-06, + "loss": 0.4345, + "step": 5538 + }, + { + "epoch": 0.3373017081265414, + "grad_norm": 1.0315776212690646, + "learning_rate": 4.962421694727387e-06, + "loss": 0.546, + "step": 5539 + }, + { + "epoch": 0.3373626039034193, + "grad_norm": 1.0922348913790492, + "learning_rate": 4.962407910101445e-06, + "loss": 0.4893, + "step": 5540 + }, + { + "epoch": 0.33742349968029717, + "grad_norm": 0.9654528251870261, + "learning_rate": 4.962394122966852e-06, + "loss": 0.6033, + "step": 5541 + }, + { + "epoch": 0.33748439545717507, + "grad_norm": 0.993438182086029, + "learning_rate": 4.962380333323624e-06, + "loss": 0.5223, + "step": 5542 + }, + { + "epoch": 0.3375452912340529, + "grad_norm": 1.0590263899349757, + "learning_rate": 4.962366541171775e-06, + "loss": 0.5032, + "step": 5543 + }, + { + "epoch": 0.3376061870109308, + "grad_norm": 1.072597367105068, + "learning_rate": 4.962352746511316e-06, + "loss": 0.4818, + "step": 5544 + }, + { + "epoch": 0.33766708278780866, + "grad_norm": 0.9960144763519876, + "learning_rate": 4.9623389493422645e-06, + "loss": 0.4623, + "step": 5545 + }, + { + "epoch": 0.33772797856468656, + "grad_norm": 1.0953880954161435, + "learning_rate": 4.962325149664633e-06, + "loss": 0.427, + "step": 5546 + }, + { + "epoch": 0.3377888743415644, + "grad_norm": 0.9846365091281676, + "learning_rate": 4.962311347478437e-06, + "loss": 0.5004, + "step": 5547 + }, + { + "epoch": 0.3378497701184423, + "grad_norm": 0.9949995475042839, + "learning_rate": 4.962297542783688e-06, + "loss": 0.4812, + "step": 5548 + }, + { + "epoch": 0.33791066589532015, + "grad_norm": 1.0370429277594186, + "learning_rate": 4.962283735580402e-06, + "loss": 0.5085, + "step": 5549 + }, + { + "epoch": 0.33797156167219805, + "grad_norm": 1.0859098641791944, + "learning_rate": 4.962269925868592e-06, + "loss": 0.4408, + "step": 5550 + }, + { + "epoch": 0.3380324574490759, + "grad_norm": 1.0384870391780494, + "learning_rate": 4.962256113648273e-06, + "loss": 0.431, + "step": 5551 + }, + { + "epoch": 0.3380933532259538, + "grad_norm": 1.0175394710184629, + "learning_rate": 4.962242298919459e-06, + "loss": 0.4756, + "step": 5552 + }, + { + "epoch": 0.33815424900283164, + "grad_norm": 1.0786890827509201, + "learning_rate": 4.962228481682163e-06, + "loss": 0.4692, + "step": 5553 + }, + { + "epoch": 0.33821514477970954, + "grad_norm": 1.0232071129618827, + "learning_rate": 4.962214661936399e-06, + "loss": 0.4591, + "step": 5554 + }, + { + "epoch": 0.3382760405565874, + "grad_norm": 1.0623207792519755, + "learning_rate": 4.962200839682184e-06, + "loss": 0.4971, + "step": 5555 + }, + { + "epoch": 0.3383369363334653, + "grad_norm": 0.9703087954547531, + "learning_rate": 4.962187014919529e-06, + "loss": 0.5453, + "step": 5556 + }, + { + "epoch": 0.33839783211034313, + "grad_norm": 1.1001028552047638, + "learning_rate": 4.962173187648449e-06, + "loss": 0.4244, + "step": 5557 + }, + { + "epoch": 0.33845872788722103, + "grad_norm": 1.1124683735891283, + "learning_rate": 4.962159357868958e-06, + "loss": 0.4616, + "step": 5558 + }, + { + "epoch": 0.3385196236640989, + "grad_norm": 0.981638287156133, + "learning_rate": 4.96214552558107e-06, + "loss": 0.5994, + "step": 5559 + }, + { + "epoch": 0.3385805194409768, + "grad_norm": 1.069777206265205, + "learning_rate": 4.9621316907848005e-06, + "loss": 0.4735, + "step": 5560 + }, + { + "epoch": 0.3386414152178546, + "grad_norm": 1.0527493086400335, + "learning_rate": 4.9621178534801616e-06, + "loss": 0.4905, + "step": 5561 + }, + { + "epoch": 0.3387023109947325, + "grad_norm": 1.069528426473951, + "learning_rate": 4.962104013667168e-06, + "loss": 0.4497, + "step": 5562 + }, + { + "epoch": 0.33876320677161037, + "grad_norm": 1.107800298861036, + "learning_rate": 4.9620901713458346e-06, + "loss": 0.4431, + "step": 5563 + }, + { + "epoch": 0.33882410254848827, + "grad_norm": 0.9253220320227247, + "learning_rate": 4.9620763265161745e-06, + "loss": 0.464, + "step": 5564 + }, + { + "epoch": 0.3388849983253661, + "grad_norm": 0.9487566967567834, + "learning_rate": 4.9620624791782024e-06, + "loss": 0.4906, + "step": 5565 + }, + { + "epoch": 0.338945894102244, + "grad_norm": 1.0055018201093775, + "learning_rate": 4.962048629331933e-06, + "loss": 0.4729, + "step": 5566 + }, + { + "epoch": 0.33900678987912186, + "grad_norm": 0.9820888440397437, + "learning_rate": 4.962034776977378e-06, + "loss": 0.4875, + "step": 5567 + }, + { + "epoch": 0.33906768565599976, + "grad_norm": 0.9723027549575624, + "learning_rate": 4.962020922114554e-06, + "loss": 0.4523, + "step": 5568 + }, + { + "epoch": 0.3391285814328776, + "grad_norm": 1.108546411289178, + "learning_rate": 4.962007064743474e-06, + "loss": 0.4299, + "step": 5569 + }, + { + "epoch": 0.3391894772097555, + "grad_norm": 1.0422890020414157, + "learning_rate": 4.961993204864153e-06, + "loss": 0.4966, + "step": 5570 + }, + { + "epoch": 0.33925037298663335, + "grad_norm": 1.085296619340662, + "learning_rate": 4.961979342476604e-06, + "loss": 0.5023, + "step": 5571 + }, + { + "epoch": 0.33931126876351125, + "grad_norm": 1.0529649088549593, + "learning_rate": 4.961965477580842e-06, + "loss": 0.485, + "step": 5572 + }, + { + "epoch": 0.3393721645403891, + "grad_norm": 1.0283427403485526, + "learning_rate": 4.9619516101768805e-06, + "loss": 0.4629, + "step": 5573 + }, + { + "epoch": 0.339433060317267, + "grad_norm": 1.0584310698189001, + "learning_rate": 4.961937740264734e-06, + "loss": 0.4209, + "step": 5574 + }, + { + "epoch": 0.33949395609414484, + "grad_norm": 0.9624264252813494, + "learning_rate": 4.961923867844417e-06, + "loss": 0.4988, + "step": 5575 + }, + { + "epoch": 0.33955485187102274, + "grad_norm": 0.99323746098715, + "learning_rate": 4.961909992915942e-06, + "loss": 0.4972, + "step": 5576 + }, + { + "epoch": 0.33961574764790065, + "grad_norm": 1.0380606362015534, + "learning_rate": 4.961896115479325e-06, + "loss": 0.4491, + "step": 5577 + }, + { + "epoch": 0.3396766434247785, + "grad_norm": 1.0031794340890787, + "learning_rate": 4.961882235534579e-06, + "loss": 0.496, + "step": 5578 + }, + { + "epoch": 0.3397375392016564, + "grad_norm": 1.0957758457425746, + "learning_rate": 4.961868353081719e-06, + "loss": 0.4489, + "step": 5579 + }, + { + "epoch": 0.33979843497853424, + "grad_norm": 1.0221330393120551, + "learning_rate": 4.961854468120758e-06, + "loss": 0.4336, + "step": 5580 + }, + { + "epoch": 0.33985933075541214, + "grad_norm": 1.0619400509002852, + "learning_rate": 4.961840580651712e-06, + "loss": 0.4991, + "step": 5581 + }, + { + "epoch": 0.33992022653229, + "grad_norm": 1.05542987533867, + "learning_rate": 4.961826690674594e-06, + "loss": 0.4383, + "step": 5582 + }, + { + "epoch": 0.3399811223091679, + "grad_norm": 1.015524801434588, + "learning_rate": 4.9618127981894174e-06, + "loss": 0.4037, + "step": 5583 + }, + { + "epoch": 0.3400420180860457, + "grad_norm": 1.1299523854036753, + "learning_rate": 4.961798903196197e-06, + "loss": 0.4936, + "step": 5584 + }, + { + "epoch": 0.34010291386292363, + "grad_norm": 0.9663474635114704, + "learning_rate": 4.9617850056949475e-06, + "loss": 0.4695, + "step": 5585 + }, + { + "epoch": 0.3401638096398015, + "grad_norm": 1.061756275473981, + "learning_rate": 4.961771105685682e-06, + "loss": 0.466, + "step": 5586 + }, + { + "epoch": 0.3402247054166794, + "grad_norm": 0.987591954888018, + "learning_rate": 4.961757203168416e-06, + "loss": 0.4688, + "step": 5587 + }, + { + "epoch": 0.3402856011935572, + "grad_norm": 1.0015294950416767, + "learning_rate": 4.9617432981431626e-06, + "loss": 0.4567, + "step": 5588 + }, + { + "epoch": 0.3403464969704351, + "grad_norm": 1.1170674558823095, + "learning_rate": 4.961729390609936e-06, + "loss": 0.4753, + "step": 5589 + }, + { + "epoch": 0.34040739274731296, + "grad_norm": 1.1729381293389722, + "learning_rate": 4.961715480568752e-06, + "loss": 0.4766, + "step": 5590 + }, + { + "epoch": 0.34046828852419087, + "grad_norm": 1.0215106213239569, + "learning_rate": 4.961701568019622e-06, + "loss": 0.4295, + "step": 5591 + }, + { + "epoch": 0.3405291843010687, + "grad_norm": 1.023329040385912, + "learning_rate": 4.961687652962562e-06, + "loss": 0.495, + "step": 5592 + }, + { + "epoch": 0.3405900800779466, + "grad_norm": 0.9593049934470675, + "learning_rate": 4.961673735397587e-06, + "loss": 0.5607, + "step": 5593 + }, + { + "epoch": 0.34065097585482446, + "grad_norm": 0.9298767115242889, + "learning_rate": 4.961659815324708e-06, + "loss": 0.4724, + "step": 5594 + }, + { + "epoch": 0.34071187163170236, + "grad_norm": 1.0121578000942217, + "learning_rate": 4.961645892743942e-06, + "loss": 0.4806, + "step": 5595 + }, + { + "epoch": 0.3407727674085802, + "grad_norm": 1.0336142986235242, + "learning_rate": 4.9616319676553025e-06, + "loss": 0.461, + "step": 5596 + }, + { + "epoch": 0.3408336631854581, + "grad_norm": 1.0255784611557572, + "learning_rate": 4.961618040058803e-06, + "loss": 0.4358, + "step": 5597 + }, + { + "epoch": 0.34089455896233595, + "grad_norm": 0.9979684092275197, + "learning_rate": 4.961604109954459e-06, + "loss": 0.4598, + "step": 5598 + }, + { + "epoch": 0.34095545473921385, + "grad_norm": 0.9958950510636841, + "learning_rate": 4.961590177342284e-06, + "loss": 0.5286, + "step": 5599 + }, + { + "epoch": 0.3410163505160917, + "grad_norm": 0.9745449150922775, + "learning_rate": 4.961576242222291e-06, + "loss": 0.456, + "step": 5600 + }, + { + "epoch": 0.3410772462929696, + "grad_norm": 1.0260590815321449, + "learning_rate": 4.9615623045944965e-06, + "loss": 0.4676, + "step": 5601 + }, + { + "epoch": 0.34113814206984744, + "grad_norm": 1.0842244711815199, + "learning_rate": 4.961548364458913e-06, + "loss": 0.5256, + "step": 5602 + }, + { + "epoch": 0.34119903784672534, + "grad_norm": 1.0316298614161954, + "learning_rate": 4.961534421815556e-06, + "loss": 0.4878, + "step": 5603 + }, + { + "epoch": 0.3412599336236032, + "grad_norm": 1.1487780507952183, + "learning_rate": 4.961520476664437e-06, + "loss": 0.5012, + "step": 5604 + }, + { + "epoch": 0.3413208294004811, + "grad_norm": 1.0443431021798713, + "learning_rate": 4.961506529005574e-06, + "loss": 0.4873, + "step": 5605 + }, + { + "epoch": 0.34138172517735893, + "grad_norm": 1.0655039628310419, + "learning_rate": 4.9614925788389775e-06, + "loss": 0.4312, + "step": 5606 + }, + { + "epoch": 0.34144262095423683, + "grad_norm": 1.0064817281735896, + "learning_rate": 4.961478626164665e-06, + "loss": 0.4442, + "step": 5607 + }, + { + "epoch": 0.3415035167311147, + "grad_norm": 1.1743422755999875, + "learning_rate": 4.961464670982649e-06, + "loss": 0.4294, + "step": 5608 + }, + { + "epoch": 0.3415644125079926, + "grad_norm": 1.0966685459104737, + "learning_rate": 4.961450713292943e-06, + "loss": 0.3855, + "step": 5609 + }, + { + "epoch": 0.3416253082848704, + "grad_norm": 1.0150917467026392, + "learning_rate": 4.961436753095563e-06, + "loss": 0.4759, + "step": 5610 + }, + { + "epoch": 0.3416862040617483, + "grad_norm": 0.9801768867768611, + "learning_rate": 4.961422790390523e-06, + "loss": 0.446, + "step": 5611 + }, + { + "epoch": 0.34174709983862617, + "grad_norm": 1.0725737275865193, + "learning_rate": 4.961408825177836e-06, + "loss": 0.4373, + "step": 5612 + }, + { + "epoch": 0.34180799561550407, + "grad_norm": 1.0480673162240057, + "learning_rate": 4.961394857457517e-06, + "loss": 0.4836, + "step": 5613 + }, + { + "epoch": 0.3418688913923819, + "grad_norm": 1.1027977224534764, + "learning_rate": 4.96138088722958e-06, + "loss": 0.4574, + "step": 5614 + }, + { + "epoch": 0.3419297871692598, + "grad_norm": 1.130673200431905, + "learning_rate": 4.961366914494039e-06, + "loss": 0.4035, + "step": 5615 + }, + { + "epoch": 0.34199068294613766, + "grad_norm": 0.9849183726980638, + "learning_rate": 4.96135293925091e-06, + "loss": 0.4955, + "step": 5616 + }, + { + "epoch": 0.34205157872301556, + "grad_norm": 1.045965158223417, + "learning_rate": 4.961338961500204e-06, + "loss": 0.4505, + "step": 5617 + }, + { + "epoch": 0.34211247449989346, + "grad_norm": 0.9975112023473243, + "learning_rate": 4.961324981241937e-06, + "loss": 0.4591, + "step": 5618 + }, + { + "epoch": 0.3421733702767713, + "grad_norm": 1.0586182199500822, + "learning_rate": 4.9613109984761245e-06, + "loss": 0.4756, + "step": 5619 + }, + { + "epoch": 0.3422342660536492, + "grad_norm": 1.0256741785020347, + "learning_rate": 4.961297013202779e-06, + "loss": 0.4275, + "step": 5620 + }, + { + "epoch": 0.34229516183052705, + "grad_norm": 0.9846891669603921, + "learning_rate": 4.9612830254219156e-06, + "loss": 0.4681, + "step": 5621 + }, + { + "epoch": 0.34235605760740495, + "grad_norm": 1.0090202661632917, + "learning_rate": 4.961269035133548e-06, + "loss": 0.4691, + "step": 5622 + }, + { + "epoch": 0.3424169533842828, + "grad_norm": 0.9905174371914138, + "learning_rate": 4.9612550423376904e-06, + "loss": 0.4731, + "step": 5623 + }, + { + "epoch": 0.3424778491611607, + "grad_norm": 0.9531418191249211, + "learning_rate": 4.961241047034358e-06, + "loss": 0.5425, + "step": 5624 + }, + { + "epoch": 0.34253874493803854, + "grad_norm": 0.9279481362300305, + "learning_rate": 4.9612270492235636e-06, + "loss": 0.4718, + "step": 5625 + }, + { + "epoch": 0.34259964071491644, + "grad_norm": 1.0862052341421298, + "learning_rate": 4.961213048905322e-06, + "loss": 0.4284, + "step": 5626 + }, + { + "epoch": 0.3426605364917943, + "grad_norm": 0.916370633544873, + "learning_rate": 4.961199046079649e-06, + "loss": 0.4743, + "step": 5627 + }, + { + "epoch": 0.3427214322686722, + "grad_norm": 1.0138858166364146, + "learning_rate": 4.961185040746557e-06, + "loss": 0.5828, + "step": 5628 + }, + { + "epoch": 0.34278232804555003, + "grad_norm": 1.0202718224692027, + "learning_rate": 4.96117103290606e-06, + "loss": 0.5379, + "step": 5629 + }, + { + "epoch": 0.34284322382242793, + "grad_norm": 1.0343804047555951, + "learning_rate": 4.961157022558174e-06, + "loss": 0.4568, + "step": 5630 + }, + { + "epoch": 0.3429041195993058, + "grad_norm": 1.1155331769684116, + "learning_rate": 4.961143009702912e-06, + "loss": 0.4833, + "step": 5631 + }, + { + "epoch": 0.3429650153761837, + "grad_norm": 1.0354984791495347, + "learning_rate": 4.961128994340289e-06, + "loss": 0.5245, + "step": 5632 + }, + { + "epoch": 0.3430259111530615, + "grad_norm": 0.9740000093623228, + "learning_rate": 4.961114976470319e-06, + "loss": 0.5669, + "step": 5633 + }, + { + "epoch": 0.3430868069299394, + "grad_norm": 1.0151826179857024, + "learning_rate": 4.961100956093016e-06, + "loss": 0.4942, + "step": 5634 + }, + { + "epoch": 0.34314770270681727, + "grad_norm": 1.0080470599195328, + "learning_rate": 4.961086933208395e-06, + "loss": 0.465, + "step": 5635 + }, + { + "epoch": 0.34320859848369517, + "grad_norm": 1.0318118824119142, + "learning_rate": 4.961072907816469e-06, + "loss": 0.427, + "step": 5636 + }, + { + "epoch": 0.343269494260573, + "grad_norm": 0.9875664878180967, + "learning_rate": 4.961058879917254e-06, + "loss": 0.4264, + "step": 5637 + }, + { + "epoch": 0.3433303900374509, + "grad_norm": 0.9578899588734768, + "learning_rate": 4.9610448495107625e-06, + "loss": 0.5521, + "step": 5638 + }, + { + "epoch": 0.34339128581432876, + "grad_norm": 1.05420091933313, + "learning_rate": 4.96103081659701e-06, + "loss": 0.4763, + "step": 5639 + }, + { + "epoch": 0.34345218159120666, + "grad_norm": 1.0349023744875652, + "learning_rate": 4.961016781176011e-06, + "loss": 0.5043, + "step": 5640 + }, + { + "epoch": 0.3435130773680845, + "grad_norm": 1.0619941791693353, + "learning_rate": 4.961002743247778e-06, + "loss": 0.4812, + "step": 5641 + }, + { + "epoch": 0.3435739731449624, + "grad_norm": 1.0731964729291112, + "learning_rate": 4.960988702812328e-06, + "loss": 0.4655, + "step": 5642 + }, + { + "epoch": 0.34363486892184025, + "grad_norm": 1.014331556778732, + "learning_rate": 4.9609746598696725e-06, + "loss": 0.5042, + "step": 5643 + }, + { + "epoch": 0.34369576469871815, + "grad_norm": 0.9868360407811839, + "learning_rate": 4.960960614419829e-06, + "loss": 0.4958, + "step": 5644 + }, + { + "epoch": 0.343756660475596, + "grad_norm": 1.0809061114853022, + "learning_rate": 4.960946566462809e-06, + "loss": 0.4879, + "step": 5645 + }, + { + "epoch": 0.3438175562524739, + "grad_norm": 0.9935008868849504, + "learning_rate": 4.960932515998627e-06, + "loss": 0.4904, + "step": 5646 + }, + { + "epoch": 0.34387845202935174, + "grad_norm": 1.0673034950317544, + "learning_rate": 4.960918463027299e-06, + "loss": 0.4159, + "step": 5647 + }, + { + "epoch": 0.34393934780622964, + "grad_norm": 0.974466060089914, + "learning_rate": 4.960904407548837e-06, + "loss": 0.4361, + "step": 5648 + }, + { + "epoch": 0.3440002435831075, + "grad_norm": 1.0167017874111102, + "learning_rate": 4.960890349563259e-06, + "loss": 0.4721, + "step": 5649 + }, + { + "epoch": 0.3440611393599854, + "grad_norm": 1.0613997743337014, + "learning_rate": 4.9608762890705766e-06, + "loss": 0.5159, + "step": 5650 + }, + { + "epoch": 0.34412203513686324, + "grad_norm": 1.004023828700836, + "learning_rate": 4.960862226070804e-06, + "loss": 0.4968, + "step": 5651 + }, + { + "epoch": 0.34418293091374114, + "grad_norm": 0.9775736840899638, + "learning_rate": 4.960848160563956e-06, + "loss": 0.5332, + "step": 5652 + }, + { + "epoch": 0.344243826690619, + "grad_norm": 0.9985045631810867, + "learning_rate": 4.9608340925500475e-06, + "loss": 0.5184, + "step": 5653 + }, + { + "epoch": 0.3443047224674969, + "grad_norm": 0.923574054457734, + "learning_rate": 4.960820022029092e-06, + "loss": 0.5591, + "step": 5654 + }, + { + "epoch": 0.3443656182443747, + "grad_norm": 1.0216207850905528, + "learning_rate": 4.960805949001104e-06, + "loss": 0.4944, + "step": 5655 + }, + { + "epoch": 0.3444265140212526, + "grad_norm": 1.0041097731700754, + "learning_rate": 4.960791873466099e-06, + "loss": 0.5156, + "step": 5656 + }, + { + "epoch": 0.3444874097981305, + "grad_norm": 1.032719814141192, + "learning_rate": 4.96077779542409e-06, + "loss": 0.4354, + "step": 5657 + }, + { + "epoch": 0.3445483055750084, + "grad_norm": 1.0743660596059932, + "learning_rate": 4.960763714875091e-06, + "loss": 0.5199, + "step": 5658 + }, + { + "epoch": 0.3446092013518863, + "grad_norm": 0.9966300902164862, + "learning_rate": 4.960749631819118e-06, + "loss": 0.5035, + "step": 5659 + }, + { + "epoch": 0.3446700971287641, + "grad_norm": 1.1555321367694555, + "learning_rate": 4.960735546256183e-06, + "loss": 0.4497, + "step": 5660 + }, + { + "epoch": 0.344730992905642, + "grad_norm": 0.9918514713904871, + "learning_rate": 4.9607214581863035e-06, + "loss": 0.4709, + "step": 5661 + }, + { + "epoch": 0.34479188868251986, + "grad_norm": 0.9987306927661563, + "learning_rate": 4.960707367609492e-06, + "loss": 0.4519, + "step": 5662 + }, + { + "epoch": 0.34485278445939777, + "grad_norm": 1.0940217911128838, + "learning_rate": 4.9606932745257616e-06, + "loss": 0.4128, + "step": 5663 + }, + { + "epoch": 0.3449136802362756, + "grad_norm": 1.1165815690080039, + "learning_rate": 4.96067917893513e-06, + "loss": 0.5675, + "step": 5664 + }, + { + "epoch": 0.3449745760131535, + "grad_norm": 1.0324173832993693, + "learning_rate": 4.960665080837608e-06, + "loss": 0.4196, + "step": 5665 + }, + { + "epoch": 0.34503547179003136, + "grad_norm": 1.0136229581496292, + "learning_rate": 4.960650980233212e-06, + "loss": 0.4778, + "step": 5666 + }, + { + "epoch": 0.34509636756690926, + "grad_norm": 1.160777096600884, + "learning_rate": 4.960636877121957e-06, + "loss": 0.4243, + "step": 5667 + }, + { + "epoch": 0.3451572633437871, + "grad_norm": 0.9889867512317283, + "learning_rate": 4.960622771503854e-06, + "loss": 0.4209, + "step": 5668 + }, + { + "epoch": 0.345218159120665, + "grad_norm": 1.023075860982401, + "learning_rate": 4.960608663378922e-06, + "loss": 0.4136, + "step": 5669 + }, + { + "epoch": 0.34527905489754285, + "grad_norm": 1.0524811911416028, + "learning_rate": 4.960594552747172e-06, + "loss": 0.4621, + "step": 5670 + }, + { + "epoch": 0.34533995067442075, + "grad_norm": 1.0295215205010304, + "learning_rate": 4.96058043960862e-06, + "loss": 0.4204, + "step": 5671 + }, + { + "epoch": 0.3454008464512986, + "grad_norm": 0.9229953517575232, + "learning_rate": 4.960566323963279e-06, + "loss": 0.5216, + "step": 5672 + }, + { + "epoch": 0.3454617422281765, + "grad_norm": 1.0804878771593358, + "learning_rate": 4.960552205811164e-06, + "loss": 0.5068, + "step": 5673 + }, + { + "epoch": 0.34552263800505434, + "grad_norm": 1.2100715262384056, + "learning_rate": 4.960538085152291e-06, + "loss": 0.4159, + "step": 5674 + }, + { + "epoch": 0.34558353378193224, + "grad_norm": 1.1576596188348447, + "learning_rate": 4.960523961986672e-06, + "loss": 0.4367, + "step": 5675 + }, + { + "epoch": 0.3456444295588101, + "grad_norm": 1.0486615824620424, + "learning_rate": 4.960509836314322e-06, + "loss": 0.4639, + "step": 5676 + }, + { + "epoch": 0.345705325335688, + "grad_norm": 1.117942636130738, + "learning_rate": 4.960495708135257e-06, + "loss": 0.4514, + "step": 5677 + }, + { + "epoch": 0.34576622111256583, + "grad_norm": 1.0934865049733116, + "learning_rate": 4.960481577449489e-06, + "loss": 0.5083, + "step": 5678 + }, + { + "epoch": 0.34582711688944373, + "grad_norm": 1.0023496549396533, + "learning_rate": 4.960467444257033e-06, + "loss": 0.4396, + "step": 5679 + }, + { + "epoch": 0.3458880126663216, + "grad_norm": 1.0453654522991345, + "learning_rate": 4.960453308557906e-06, + "loss": 0.4881, + "step": 5680 + }, + { + "epoch": 0.3459489084431995, + "grad_norm": 0.9772829635813838, + "learning_rate": 4.960439170352118e-06, + "loss": 0.5267, + "step": 5681 + }, + { + "epoch": 0.3460098042200773, + "grad_norm": 1.0089227243252519, + "learning_rate": 4.960425029639687e-06, + "loss": 0.4637, + "step": 5682 + }, + { + "epoch": 0.3460706999969552, + "grad_norm": 0.9624478226480379, + "learning_rate": 4.960410886420625e-06, + "loss": 0.4134, + "step": 5683 + }, + { + "epoch": 0.34613159577383307, + "grad_norm": 0.9275128420210255, + "learning_rate": 4.9603967406949495e-06, + "loss": 0.5378, + "step": 5684 + }, + { + "epoch": 0.34619249155071097, + "grad_norm": 0.9956527276118627, + "learning_rate": 4.960382592462672e-06, + "loss": 0.5263, + "step": 5685 + }, + { + "epoch": 0.3462533873275888, + "grad_norm": 1.0148816202869815, + "learning_rate": 4.960368441723808e-06, + "loss": 0.4665, + "step": 5686 + }, + { + "epoch": 0.3463142831044667, + "grad_norm": 1.0065839277913275, + "learning_rate": 4.960354288478371e-06, + "loss": 0.4151, + "step": 5687 + }, + { + "epoch": 0.34637517888134456, + "grad_norm": 0.9965703626913762, + "learning_rate": 4.9603401327263766e-06, + "loss": 0.4389, + "step": 5688 + }, + { + "epoch": 0.34643607465822246, + "grad_norm": 1.0036181489623104, + "learning_rate": 4.960325974467839e-06, + "loss": 0.4367, + "step": 5689 + }, + { + "epoch": 0.3464969704351003, + "grad_norm": 0.9404187418381386, + "learning_rate": 4.960311813702773e-06, + "loss": 0.468, + "step": 5690 + }, + { + "epoch": 0.3465578662119782, + "grad_norm": 1.1916480016027462, + "learning_rate": 4.960297650431192e-06, + "loss": 0.4135, + "step": 5691 + }, + { + "epoch": 0.34661876198885605, + "grad_norm": 1.0224795366854924, + "learning_rate": 4.96028348465311e-06, + "loss": 0.4666, + "step": 5692 + }, + { + "epoch": 0.34667965776573395, + "grad_norm": 0.9889907338598624, + "learning_rate": 4.960269316368543e-06, + "loss": 0.4931, + "step": 5693 + }, + { + "epoch": 0.3467405535426118, + "grad_norm": 1.1108683280749272, + "learning_rate": 4.960255145577505e-06, + "loss": 0.4215, + "step": 5694 + }, + { + "epoch": 0.3468014493194897, + "grad_norm": 1.1071219017619884, + "learning_rate": 4.960240972280011e-06, + "loss": 0.4472, + "step": 5695 + }, + { + "epoch": 0.34686234509636754, + "grad_norm": 1.1282742104845087, + "learning_rate": 4.960226796476073e-06, + "loss": 0.3933, + "step": 5696 + }, + { + "epoch": 0.34692324087324544, + "grad_norm": 1.1075404835902984, + "learning_rate": 4.960212618165709e-06, + "loss": 0.4583, + "step": 5697 + }, + { + "epoch": 0.3469841366501233, + "grad_norm": 1.1116122039272691, + "learning_rate": 4.96019843734893e-06, + "loss": 0.5328, + "step": 5698 + }, + { + "epoch": 0.3470450324270012, + "grad_norm": 0.9217011815339985, + "learning_rate": 4.9601842540257526e-06, + "loss": 0.4653, + "step": 5699 + }, + { + "epoch": 0.3471059282038791, + "grad_norm": 1.0535260701902927, + "learning_rate": 4.9601700681961896e-06, + "loss": 0.4345, + "step": 5700 + }, + { + "epoch": 0.34716682398075693, + "grad_norm": 0.9362950464501706, + "learning_rate": 4.960155879860258e-06, + "loss": 0.4385, + "step": 5701 + }, + { + "epoch": 0.34722771975763483, + "grad_norm": 0.9356506497247293, + "learning_rate": 4.96014168901797e-06, + "loss": 0.5069, + "step": 5702 + }, + { + "epoch": 0.3472886155345127, + "grad_norm": 0.9902603646381289, + "learning_rate": 4.96012749566934e-06, + "loss": 0.4851, + "step": 5703 + }, + { + "epoch": 0.3473495113113906, + "grad_norm": 1.0292583811286604, + "learning_rate": 4.960113299814384e-06, + "loss": 0.4764, + "step": 5704 + }, + { + "epoch": 0.3474104070882684, + "grad_norm": 1.025857166693417, + "learning_rate": 4.960099101453116e-06, + "loss": 0.4585, + "step": 5705 + }, + { + "epoch": 0.3474713028651463, + "grad_norm": 1.0280971457704164, + "learning_rate": 4.96008490058555e-06, + "loss": 0.411, + "step": 5706 + }, + { + "epoch": 0.34753219864202417, + "grad_norm": 1.0392252099942065, + "learning_rate": 4.960070697211701e-06, + "loss": 0.5064, + "step": 5707 + }, + { + "epoch": 0.34759309441890207, + "grad_norm": 0.9781378285298085, + "learning_rate": 4.960056491331583e-06, + "loss": 0.463, + "step": 5708 + }, + { + "epoch": 0.3476539901957799, + "grad_norm": 1.0328726950891054, + "learning_rate": 4.96004228294521e-06, + "loss": 0.4445, + "step": 5709 + }, + { + "epoch": 0.3477148859726578, + "grad_norm": 1.0503879307668702, + "learning_rate": 4.9600280720525975e-06, + "loss": 0.4459, + "step": 5710 + }, + { + "epoch": 0.34777578174953566, + "grad_norm": 0.965358957823711, + "learning_rate": 4.9600138586537595e-06, + "loss": 0.4461, + "step": 5711 + }, + { + "epoch": 0.34783667752641356, + "grad_norm": 1.1426253572654612, + "learning_rate": 4.95999964274871e-06, + "loss": 0.4468, + "step": 5712 + }, + { + "epoch": 0.3478975733032914, + "grad_norm": 1.0010894020433327, + "learning_rate": 4.959985424337464e-06, + "loss": 0.4505, + "step": 5713 + }, + { + "epoch": 0.3479584690801693, + "grad_norm": 1.0956194348160897, + "learning_rate": 4.9599712034200375e-06, + "loss": 0.4717, + "step": 5714 + }, + { + "epoch": 0.34801936485704715, + "grad_norm": 0.9932082966721392, + "learning_rate": 4.959956979996442e-06, + "loss": 0.4164, + "step": 5715 + }, + { + "epoch": 0.34808026063392505, + "grad_norm": 1.1718896757585866, + "learning_rate": 4.959942754066694e-06, + "loss": 0.4279, + "step": 5716 + }, + { + "epoch": 0.3481411564108029, + "grad_norm": 1.0855481587546072, + "learning_rate": 4.959928525630808e-06, + "loss": 0.4999, + "step": 5717 + }, + { + "epoch": 0.3482020521876808, + "grad_norm": 1.028419086348198, + "learning_rate": 4.959914294688797e-06, + "loss": 0.5112, + "step": 5718 + }, + { + "epoch": 0.34826294796455864, + "grad_norm": 1.0176693392470961, + "learning_rate": 4.959900061240677e-06, + "loss": 0.5188, + "step": 5719 + }, + { + "epoch": 0.34832384374143655, + "grad_norm": 1.0220073784281685, + "learning_rate": 4.959885825286462e-06, + "loss": 0.426, + "step": 5720 + }, + { + "epoch": 0.3483847395183144, + "grad_norm": 1.061960759379996, + "learning_rate": 4.959871586826166e-06, + "loss": 0.4764, + "step": 5721 + }, + { + "epoch": 0.3484456352951923, + "grad_norm": 0.9283718037103699, + "learning_rate": 4.959857345859804e-06, + "loss": 0.5389, + "step": 5722 + }, + { + "epoch": 0.34850653107207014, + "grad_norm": 1.1635623162157163, + "learning_rate": 4.959843102387391e-06, + "loss": 0.4511, + "step": 5723 + }, + { + "epoch": 0.34856742684894804, + "grad_norm": 0.9881200507994939, + "learning_rate": 4.95982885640894e-06, + "loss": 0.497, + "step": 5724 + }, + { + "epoch": 0.3486283226258259, + "grad_norm": 1.1225750254237012, + "learning_rate": 4.959814607924468e-06, + "loss": 0.4682, + "step": 5725 + }, + { + "epoch": 0.3486892184027038, + "grad_norm": 1.0054768693201863, + "learning_rate": 4.959800356933987e-06, + "loss": 0.4167, + "step": 5726 + }, + { + "epoch": 0.3487501141795816, + "grad_norm": 0.9440241898611947, + "learning_rate": 4.9597861034375124e-06, + "loss": 0.496, + "step": 5727 + }, + { + "epoch": 0.34881100995645953, + "grad_norm": 1.1928984138330974, + "learning_rate": 4.959771847435059e-06, + "loss": 0.4457, + "step": 5728 + }, + { + "epoch": 0.3488719057333374, + "grad_norm": 1.003615962700787, + "learning_rate": 4.959757588926642e-06, + "loss": 0.4507, + "step": 5729 + }, + { + "epoch": 0.3489328015102153, + "grad_norm": 1.032212559849315, + "learning_rate": 4.959743327912274e-06, + "loss": 0.4756, + "step": 5730 + }, + { + "epoch": 0.3489936972870931, + "grad_norm": 0.9533832621902606, + "learning_rate": 4.959729064391972e-06, + "loss": 0.4804, + "step": 5731 + }, + { + "epoch": 0.349054593063971, + "grad_norm": 1.01980927652901, + "learning_rate": 4.959714798365748e-06, + "loss": 0.4225, + "step": 5732 + }, + { + "epoch": 0.34911548884084886, + "grad_norm": 1.0262226854397782, + "learning_rate": 4.9597005298336175e-06, + "loss": 0.4545, + "step": 5733 + }, + { + "epoch": 0.34917638461772676, + "grad_norm": 0.9638389569729167, + "learning_rate": 4.959686258795596e-06, + "loss": 0.5079, + "step": 5734 + }, + { + "epoch": 0.3492372803946046, + "grad_norm": 1.0259882669534421, + "learning_rate": 4.959671985251697e-06, + "loss": 0.4428, + "step": 5735 + }, + { + "epoch": 0.3492981761714825, + "grad_norm": 1.085503716431905, + "learning_rate": 4.959657709201935e-06, + "loss": 0.5251, + "step": 5736 + }, + { + "epoch": 0.34935907194836036, + "grad_norm": 1.0068612525470007, + "learning_rate": 4.959643430646326e-06, + "loss": 0.5312, + "step": 5737 + }, + { + "epoch": 0.34941996772523826, + "grad_norm": 1.022567661574769, + "learning_rate": 4.959629149584883e-06, + "loss": 0.5777, + "step": 5738 + }, + { + "epoch": 0.3494808635021161, + "grad_norm": 0.9841091286510206, + "learning_rate": 4.95961486601762e-06, + "loss": 0.4298, + "step": 5739 + }, + { + "epoch": 0.349541759278994, + "grad_norm": 0.9716153451005802, + "learning_rate": 4.959600579944553e-06, + "loss": 0.4698, + "step": 5740 + }, + { + "epoch": 0.3496026550558719, + "grad_norm": 1.0073133319487793, + "learning_rate": 4.959586291365696e-06, + "loss": 0.4678, + "step": 5741 + }, + { + "epoch": 0.34966355083274975, + "grad_norm": 1.0060876512794463, + "learning_rate": 4.959572000281064e-06, + "loss": 0.4198, + "step": 5742 + }, + { + "epoch": 0.34972444660962765, + "grad_norm": 1.0899541469149239, + "learning_rate": 4.959557706690671e-06, + "loss": 0.4173, + "step": 5743 + }, + { + "epoch": 0.3497853423865055, + "grad_norm": 1.0653359371349262, + "learning_rate": 4.959543410594532e-06, + "loss": 0.4405, + "step": 5744 + }, + { + "epoch": 0.3498462381633834, + "grad_norm": 1.0426734887392606, + "learning_rate": 4.95952911199266e-06, + "loss": 0.4563, + "step": 5745 + }, + { + "epoch": 0.34990713394026124, + "grad_norm": 1.015641237606067, + "learning_rate": 4.959514810885072e-06, + "loss": 0.4967, + "step": 5746 + }, + { + "epoch": 0.34996802971713914, + "grad_norm": 1.072761470336275, + "learning_rate": 4.9595005072717815e-06, + "loss": 0.496, + "step": 5747 + }, + { + "epoch": 0.350028925494017, + "grad_norm": 0.9827751685332833, + "learning_rate": 4.959486201152803e-06, + "loss": 0.5152, + "step": 5748 + }, + { + "epoch": 0.3500898212708949, + "grad_norm": 1.09392015966596, + "learning_rate": 4.959471892528151e-06, + "loss": 0.5136, + "step": 5749 + }, + { + "epoch": 0.35015071704777273, + "grad_norm": 0.9968944583252702, + "learning_rate": 4.95945758139784e-06, + "loss": 0.4502, + "step": 5750 + }, + { + "epoch": 0.35021161282465063, + "grad_norm": 1.0782845028477257, + "learning_rate": 4.959443267761886e-06, + "loss": 0.4837, + "step": 5751 + }, + { + "epoch": 0.3502725086015285, + "grad_norm": 1.0970497043931706, + "learning_rate": 4.9594289516203e-06, + "loss": 0.5353, + "step": 5752 + }, + { + "epoch": 0.3503334043784064, + "grad_norm": 1.0612421716639384, + "learning_rate": 4.9594146329731e-06, + "loss": 0.469, + "step": 5753 + }, + { + "epoch": 0.3503943001552842, + "grad_norm": 1.1095159828830734, + "learning_rate": 4.9594003118203e-06, + "loss": 0.4698, + "step": 5754 + }, + { + "epoch": 0.3504551959321621, + "grad_norm": 0.8824566643327829, + "learning_rate": 4.959385988161914e-06, + "loss": 0.5397, + "step": 5755 + }, + { + "epoch": 0.35051609170903997, + "grad_norm": 0.9516776107329558, + "learning_rate": 4.959371661997956e-06, + "loss": 0.5067, + "step": 5756 + }, + { + "epoch": 0.35057698748591787, + "grad_norm": 0.9872365455371239, + "learning_rate": 4.959357333328442e-06, + "loss": 0.528, + "step": 5757 + }, + { + "epoch": 0.3506378832627957, + "grad_norm": 1.1557805577736708, + "learning_rate": 4.959343002153385e-06, + "loss": 0.5139, + "step": 5758 + }, + { + "epoch": 0.3506987790396736, + "grad_norm": 1.1113972547059587, + "learning_rate": 4.959328668472801e-06, + "loss": 0.4952, + "step": 5759 + }, + { + "epoch": 0.35075967481655146, + "grad_norm": 1.0037757119469608, + "learning_rate": 4.9593143322867046e-06, + "loss": 0.5121, + "step": 5760 + }, + { + "epoch": 0.35082057059342936, + "grad_norm": 1.074127398095707, + "learning_rate": 4.959299993595109e-06, + "loss": 0.4739, + "step": 5761 + }, + { + "epoch": 0.3508814663703072, + "grad_norm": 0.9672592946902565, + "learning_rate": 4.95928565239803e-06, + "loss": 0.474, + "step": 5762 + }, + { + "epoch": 0.3509423621471851, + "grad_norm": 0.9176553143674648, + "learning_rate": 4.9592713086954824e-06, + "loss": 0.4978, + "step": 5763 + }, + { + "epoch": 0.35100325792406295, + "grad_norm": 1.0235887818533327, + "learning_rate": 4.95925696248748e-06, + "loss": 0.5063, + "step": 5764 + }, + { + "epoch": 0.35106415370094085, + "grad_norm": 1.0355108771081158, + "learning_rate": 4.9592426137740375e-06, + "loss": 0.4973, + "step": 5765 + }, + { + "epoch": 0.3511250494778187, + "grad_norm": 1.0868327951935244, + "learning_rate": 4.95922826255517e-06, + "loss": 0.442, + "step": 5766 + }, + { + "epoch": 0.3511859452546966, + "grad_norm": 0.9943808499495443, + "learning_rate": 4.959213908830892e-06, + "loss": 0.5062, + "step": 5767 + }, + { + "epoch": 0.35124684103157444, + "grad_norm": 1.0091813973218928, + "learning_rate": 4.959199552601217e-06, + "loss": 0.5116, + "step": 5768 + }, + { + "epoch": 0.35130773680845234, + "grad_norm": 1.0203425801653885, + "learning_rate": 4.959185193866161e-06, + "loss": 0.5121, + "step": 5769 + }, + { + "epoch": 0.3513686325853302, + "grad_norm": 1.11459253870132, + "learning_rate": 4.95917083262574e-06, + "loss": 0.492, + "step": 5770 + }, + { + "epoch": 0.3514295283622081, + "grad_norm": 0.9990058670329051, + "learning_rate": 4.9591564688799655e-06, + "loss": 0.4879, + "step": 5771 + }, + { + "epoch": 0.35149042413908593, + "grad_norm": 1.1077713452197215, + "learning_rate": 4.9591421026288535e-06, + "loss": 0.4592, + "step": 5772 + }, + { + "epoch": 0.35155131991596383, + "grad_norm": 1.149168380532495, + "learning_rate": 4.959127733872419e-06, + "loss": 0.4174, + "step": 5773 + }, + { + "epoch": 0.3516122156928417, + "grad_norm": 0.9341431746724786, + "learning_rate": 4.959113362610676e-06, + "loss": 0.5192, + "step": 5774 + }, + { + "epoch": 0.3516731114697196, + "grad_norm": 1.0733498007118232, + "learning_rate": 4.9590989888436395e-06, + "loss": 0.4049, + "step": 5775 + }, + { + "epoch": 0.3517340072465974, + "grad_norm": 1.1435281853543617, + "learning_rate": 4.959084612571323e-06, + "loss": 0.4904, + "step": 5776 + }, + { + "epoch": 0.3517949030234753, + "grad_norm": 1.0227590791259593, + "learning_rate": 4.959070233793743e-06, + "loss": 0.4812, + "step": 5777 + }, + { + "epoch": 0.35185579880035317, + "grad_norm": 0.9915332243407481, + "learning_rate": 4.959055852510914e-06, + "loss": 0.5722, + "step": 5778 + }, + { + "epoch": 0.35191669457723107, + "grad_norm": 1.0291565911177785, + "learning_rate": 4.95904146872285e-06, + "loss": 0.4638, + "step": 5779 + }, + { + "epoch": 0.3519775903541089, + "grad_norm": 1.045572065162049, + "learning_rate": 4.959027082429565e-06, + "loss": 0.3721, + "step": 5780 + }, + { + "epoch": 0.3520384861309868, + "grad_norm": 0.939145688087458, + "learning_rate": 4.959012693631075e-06, + "loss": 0.489, + "step": 5781 + }, + { + "epoch": 0.3520993819078647, + "grad_norm": 1.0354191048711159, + "learning_rate": 4.958998302327394e-06, + "loss": 0.4825, + "step": 5782 + }, + { + "epoch": 0.35216027768474256, + "grad_norm": 0.9369769537465518, + "learning_rate": 4.958983908518536e-06, + "loss": 0.429, + "step": 5783 + }, + { + "epoch": 0.35222117346162046, + "grad_norm": 0.9706822409635127, + "learning_rate": 4.958969512204517e-06, + "loss": 0.4796, + "step": 5784 + }, + { + "epoch": 0.3522820692384983, + "grad_norm": 0.917289769046061, + "learning_rate": 4.95895511338535e-06, + "loss": 0.5213, + "step": 5785 + }, + { + "epoch": 0.3523429650153762, + "grad_norm": 1.0470036822451954, + "learning_rate": 4.958940712061051e-06, + "loss": 0.5088, + "step": 5786 + }, + { + "epoch": 0.35240386079225405, + "grad_norm": 1.0563567434829375, + "learning_rate": 4.958926308231635e-06, + "loss": 0.4855, + "step": 5787 + }, + { + "epoch": 0.35246475656913195, + "grad_norm": 1.1407818060378725, + "learning_rate": 4.958911901897115e-06, + "loss": 0.4277, + "step": 5788 + }, + { + "epoch": 0.3525256523460098, + "grad_norm": 1.0143565966791586, + "learning_rate": 4.958897493057507e-06, + "loss": 0.5061, + "step": 5789 + }, + { + "epoch": 0.3525865481228877, + "grad_norm": 1.1845835356507584, + "learning_rate": 4.9588830817128265e-06, + "loss": 0.3821, + "step": 5790 + }, + { + "epoch": 0.35264744389976554, + "grad_norm": 0.9801943779325643, + "learning_rate": 4.958868667863086e-06, + "loss": 0.5043, + "step": 5791 + }, + { + "epoch": 0.35270833967664345, + "grad_norm": 1.063502689389363, + "learning_rate": 4.958854251508301e-06, + "loss": 0.5099, + "step": 5792 + }, + { + "epoch": 0.3527692354535213, + "grad_norm": 1.0864233367652305, + "learning_rate": 4.958839832648487e-06, + "loss": 0.4546, + "step": 5793 + }, + { + "epoch": 0.3528301312303992, + "grad_norm": 1.0554420087824048, + "learning_rate": 4.958825411283658e-06, + "loss": 0.4402, + "step": 5794 + }, + { + "epoch": 0.35289102700727704, + "grad_norm": 0.9555852888090719, + "learning_rate": 4.958810987413828e-06, + "loss": 0.4644, + "step": 5795 + }, + { + "epoch": 0.35295192278415494, + "grad_norm": 1.0803828855612032, + "learning_rate": 4.958796561039014e-06, + "loss": 0.4347, + "step": 5796 + }, + { + "epoch": 0.3530128185610328, + "grad_norm": 1.0269595084933079, + "learning_rate": 4.958782132159228e-06, + "loss": 0.4503, + "step": 5797 + }, + { + "epoch": 0.3530737143379107, + "grad_norm": 1.0500386790331417, + "learning_rate": 4.958767700774486e-06, + "loss": 0.45, + "step": 5798 + }, + { + "epoch": 0.3531346101147885, + "grad_norm": 0.9927885055534464, + "learning_rate": 4.958753266884803e-06, + "loss": 0.5282, + "step": 5799 + }, + { + "epoch": 0.35319550589166643, + "grad_norm": 1.1094743893515495, + "learning_rate": 4.958738830490193e-06, + "loss": 0.4271, + "step": 5800 + }, + { + "epoch": 0.3532564016685443, + "grad_norm": 0.9892202154896496, + "learning_rate": 4.958724391590671e-06, + "loss": 0.4822, + "step": 5801 + }, + { + "epoch": 0.3533172974454222, + "grad_norm": 1.024965452247808, + "learning_rate": 4.958709950186253e-06, + "loss": 0.4534, + "step": 5802 + }, + { + "epoch": 0.3533781932223, + "grad_norm": 1.1534593107672708, + "learning_rate": 4.95869550627695e-06, + "loss": 0.4392, + "step": 5803 + }, + { + "epoch": 0.3534390889991779, + "grad_norm": 0.8905709010255881, + "learning_rate": 4.958681059862781e-06, + "loss": 0.4984, + "step": 5804 + }, + { + "epoch": 0.35349998477605576, + "grad_norm": 1.0194239868218125, + "learning_rate": 4.958666610943758e-06, + "loss": 0.4286, + "step": 5805 + }, + { + "epoch": 0.35356088055293367, + "grad_norm": 1.0081393567153918, + "learning_rate": 4.958652159519896e-06, + "loss": 0.4657, + "step": 5806 + }, + { + "epoch": 0.3536217763298115, + "grad_norm": 1.0615990008753917, + "learning_rate": 4.958637705591211e-06, + "loss": 0.4956, + "step": 5807 + }, + { + "epoch": 0.3536826721066894, + "grad_norm": 1.0766787303443972, + "learning_rate": 4.958623249157718e-06, + "loss": 0.4099, + "step": 5808 + }, + { + "epoch": 0.35374356788356726, + "grad_norm": 1.002634271266341, + "learning_rate": 4.95860879021943e-06, + "loss": 0.48, + "step": 5809 + }, + { + "epoch": 0.35380446366044516, + "grad_norm": 1.091201787901894, + "learning_rate": 4.9585943287763625e-06, + "loss": 0.4733, + "step": 5810 + }, + { + "epoch": 0.353865359437323, + "grad_norm": 1.031762857670462, + "learning_rate": 4.95857986482853e-06, + "loss": 0.4952, + "step": 5811 + }, + { + "epoch": 0.3539262552142009, + "grad_norm": 0.9891647247891716, + "learning_rate": 4.958565398375947e-06, + "loss": 0.4236, + "step": 5812 + }, + { + "epoch": 0.35398715099107875, + "grad_norm": 1.0209388457909196, + "learning_rate": 4.9585509294186295e-06, + "loss": 0.4106, + "step": 5813 + }, + { + "epoch": 0.35404804676795665, + "grad_norm": 1.1386349403009695, + "learning_rate": 4.958536457956591e-06, + "loss": 0.4293, + "step": 5814 + }, + { + "epoch": 0.3541089425448345, + "grad_norm": 1.0565524029271371, + "learning_rate": 4.958521983989847e-06, + "loss": 0.453, + "step": 5815 + }, + { + "epoch": 0.3541698383217124, + "grad_norm": 1.068358881615297, + "learning_rate": 4.9585075075184115e-06, + "loss": 0.5053, + "step": 5816 + }, + { + "epoch": 0.35423073409859024, + "grad_norm": 1.0022441318320334, + "learning_rate": 4.958493028542299e-06, + "loss": 0.4499, + "step": 5817 + }, + { + "epoch": 0.35429162987546814, + "grad_norm": 1.1444720478209816, + "learning_rate": 4.958478547061526e-06, + "loss": 0.4881, + "step": 5818 + }, + { + "epoch": 0.354352525652346, + "grad_norm": 1.0149181365284936, + "learning_rate": 4.9584640630761066e-06, + "loss": 0.55, + "step": 5819 + }, + { + "epoch": 0.3544134214292239, + "grad_norm": 1.0907623858407682, + "learning_rate": 4.958449576586054e-06, + "loss": 0.422, + "step": 5820 + }, + { + "epoch": 0.35447431720610173, + "grad_norm": 0.9901606957254873, + "learning_rate": 4.9584350875913854e-06, + "loss": 0.5226, + "step": 5821 + }, + { + "epoch": 0.35453521298297963, + "grad_norm": 1.0566735381028693, + "learning_rate": 4.958420596092113e-06, + "loss": 0.4576, + "step": 5822 + }, + { + "epoch": 0.35459610875985753, + "grad_norm": 1.113906757411018, + "learning_rate": 4.958406102088253e-06, + "loss": 0.4568, + "step": 5823 + }, + { + "epoch": 0.3546570045367354, + "grad_norm": 0.9879151231720895, + "learning_rate": 4.95839160557982e-06, + "loss": 0.4293, + "step": 5824 + }, + { + "epoch": 0.3547179003136133, + "grad_norm": 1.0056470562277258, + "learning_rate": 4.95837710656683e-06, + "loss": 0.4639, + "step": 5825 + }, + { + "epoch": 0.3547787960904911, + "grad_norm": 1.0008365360185616, + "learning_rate": 4.958362605049295e-06, + "loss": 0.5364, + "step": 5826 + }, + { + "epoch": 0.354839691867369, + "grad_norm": 0.9719234095444994, + "learning_rate": 4.958348101027232e-06, + "loss": 0.4231, + "step": 5827 + }, + { + "epoch": 0.35490058764424687, + "grad_norm": 0.9959546951866003, + "learning_rate": 4.958333594500654e-06, + "loss": 0.4415, + "step": 5828 + }, + { + "epoch": 0.35496148342112477, + "grad_norm": 0.9918594752136062, + "learning_rate": 4.958319085469578e-06, + "loss": 0.4973, + "step": 5829 + }, + { + "epoch": 0.3550223791980026, + "grad_norm": 0.915755926993677, + "learning_rate": 4.958304573934017e-06, + "loss": 0.5565, + "step": 5830 + }, + { + "epoch": 0.3550832749748805, + "grad_norm": 1.0460126732799004, + "learning_rate": 4.9582900598939874e-06, + "loss": 0.4983, + "step": 5831 + }, + { + "epoch": 0.35514417075175836, + "grad_norm": 0.922635357327736, + "learning_rate": 4.958275543349501e-06, + "loss": 0.4742, + "step": 5832 + }, + { + "epoch": 0.35520506652863626, + "grad_norm": 1.0413815637127861, + "learning_rate": 4.958261024300577e-06, + "loss": 0.4961, + "step": 5833 + }, + { + "epoch": 0.3552659623055141, + "grad_norm": 1.0001349248573175, + "learning_rate": 4.958246502747226e-06, + "loss": 0.4509, + "step": 5834 + }, + { + "epoch": 0.355326858082392, + "grad_norm": 0.9623168925560192, + "learning_rate": 4.958231978689465e-06, + "loss": 0.5614, + "step": 5835 + }, + { + "epoch": 0.35538775385926985, + "grad_norm": 1.1469875781620058, + "learning_rate": 4.9582174521273095e-06, + "loss": 0.4463, + "step": 5836 + }, + { + "epoch": 0.35544864963614775, + "grad_norm": 0.9587321610466831, + "learning_rate": 4.958202923060772e-06, + "loss": 0.463, + "step": 5837 + }, + { + "epoch": 0.3555095454130256, + "grad_norm": 1.0598560657927787, + "learning_rate": 4.958188391489869e-06, + "loss": 0.5083, + "step": 5838 + }, + { + "epoch": 0.3555704411899035, + "grad_norm": 0.9499490475287559, + "learning_rate": 4.958173857414615e-06, + "loss": 0.4736, + "step": 5839 + }, + { + "epoch": 0.35563133696678134, + "grad_norm": 1.0367236745305477, + "learning_rate": 4.958159320835023e-06, + "loss": 0.4283, + "step": 5840 + }, + { + "epoch": 0.35569223274365924, + "grad_norm": 0.9668126320429052, + "learning_rate": 4.95814478175111e-06, + "loss": 0.5009, + "step": 5841 + }, + { + "epoch": 0.3557531285205371, + "grad_norm": 0.9602854132970177, + "learning_rate": 4.958130240162891e-06, + "loss": 0.5055, + "step": 5842 + }, + { + "epoch": 0.355814024297415, + "grad_norm": 0.9878791948395165, + "learning_rate": 4.95811569607038e-06, + "loss": 0.4559, + "step": 5843 + }, + { + "epoch": 0.35587492007429283, + "grad_norm": 1.0240913335563584, + "learning_rate": 4.958101149473591e-06, + "loss": 0.4832, + "step": 5844 + }, + { + "epoch": 0.35593581585117073, + "grad_norm": 1.1151448624456637, + "learning_rate": 4.95808660037254e-06, + "loss": 0.4577, + "step": 5845 + }, + { + "epoch": 0.3559967116280486, + "grad_norm": 1.0637491124643705, + "learning_rate": 4.958072048767243e-06, + "loss": 0.4481, + "step": 5846 + }, + { + "epoch": 0.3560576074049265, + "grad_norm": 1.10364794145267, + "learning_rate": 4.958057494657711e-06, + "loss": 0.5197, + "step": 5847 + }, + { + "epoch": 0.3561185031818043, + "grad_norm": 0.9576133491012419, + "learning_rate": 4.958042938043962e-06, + "loss": 0.5178, + "step": 5848 + }, + { + "epoch": 0.3561793989586822, + "grad_norm": 1.0760492963852222, + "learning_rate": 4.95802837892601e-06, + "loss": 0.4917, + "step": 5849 + }, + { + "epoch": 0.35624029473556007, + "grad_norm": 1.0367700574232612, + "learning_rate": 4.958013817303871e-06, + "loss": 0.5404, + "step": 5850 + }, + { + "epoch": 0.35630119051243797, + "grad_norm": 1.053555659651629, + "learning_rate": 4.957999253177557e-06, + "loss": 0.4468, + "step": 5851 + }, + { + "epoch": 0.3563620862893158, + "grad_norm": 1.0442939819887946, + "learning_rate": 4.9579846865470845e-06, + "loss": 0.4519, + "step": 5852 + }, + { + "epoch": 0.3564229820661937, + "grad_norm": 1.0204468980682833, + "learning_rate": 4.957970117412468e-06, + "loss": 0.5102, + "step": 5853 + }, + { + "epoch": 0.35648387784307156, + "grad_norm": 1.0682810745415983, + "learning_rate": 4.957955545773724e-06, + "loss": 0.435, + "step": 5854 + }, + { + "epoch": 0.35654477361994946, + "grad_norm": 1.040436312857044, + "learning_rate": 4.957940971630866e-06, + "loss": 0.4493, + "step": 5855 + }, + { + "epoch": 0.3566056693968273, + "grad_norm": 1.1527778248080882, + "learning_rate": 4.957926394983908e-06, + "loss": 0.5319, + "step": 5856 + }, + { + "epoch": 0.3566665651737052, + "grad_norm": 1.0840078558014805, + "learning_rate": 4.957911815832865e-06, + "loss": 0.4274, + "step": 5857 + }, + { + "epoch": 0.35672746095058305, + "grad_norm": 1.0958302519497423, + "learning_rate": 4.9578972341777544e-06, + "loss": 0.4354, + "step": 5858 + }, + { + "epoch": 0.35678835672746095, + "grad_norm": 0.9617409420146571, + "learning_rate": 4.957882650018588e-06, + "loss": 0.5326, + "step": 5859 + }, + { + "epoch": 0.3568492525043388, + "grad_norm": 1.0476802978180613, + "learning_rate": 4.957868063355382e-06, + "loss": 0.4547, + "step": 5860 + }, + { + "epoch": 0.3569101482812167, + "grad_norm": 1.0283087315519477, + "learning_rate": 4.9578534741881514e-06, + "loss": 0.4609, + "step": 5861 + }, + { + "epoch": 0.35697104405809454, + "grad_norm": 1.0909251695228843, + "learning_rate": 4.957838882516911e-06, + "loss": 0.4631, + "step": 5862 + }, + { + "epoch": 0.35703193983497244, + "grad_norm": 1.11477768071488, + "learning_rate": 4.957824288341675e-06, + "loss": 0.4779, + "step": 5863 + }, + { + "epoch": 0.35709283561185035, + "grad_norm": 1.0802016087609425, + "learning_rate": 4.9578096916624584e-06, + "loss": 0.4671, + "step": 5864 + }, + { + "epoch": 0.3571537313887282, + "grad_norm": 0.9580290017027775, + "learning_rate": 4.957795092479276e-06, + "loss": 0.5113, + "step": 5865 + }, + { + "epoch": 0.3572146271656061, + "grad_norm": 1.0112167303461215, + "learning_rate": 4.957780490792145e-06, + "loss": 0.4752, + "step": 5866 + }, + { + "epoch": 0.35727552294248394, + "grad_norm": 1.1463074162289153, + "learning_rate": 4.957765886601076e-06, + "loss": 0.3739, + "step": 5867 + }, + { + "epoch": 0.35733641871936184, + "grad_norm": 0.9852992350315424, + "learning_rate": 4.957751279906088e-06, + "loss": 0.4712, + "step": 5868 + }, + { + "epoch": 0.3573973144962397, + "grad_norm": 1.0597298246839522, + "learning_rate": 4.957736670707193e-06, + "loss": 0.4082, + "step": 5869 + }, + { + "epoch": 0.3574582102731176, + "grad_norm": 1.036383031105337, + "learning_rate": 4.957722059004408e-06, + "loss": 0.453, + "step": 5870 + }, + { + "epoch": 0.3575191060499954, + "grad_norm": 1.0157959772736564, + "learning_rate": 4.957707444797746e-06, + "loss": 0.5129, + "step": 5871 + }, + { + "epoch": 0.35758000182687333, + "grad_norm": 0.9954113804903888, + "learning_rate": 4.957692828087223e-06, + "loss": 0.4758, + "step": 5872 + }, + { + "epoch": 0.3576408976037512, + "grad_norm": 1.0486211271368207, + "learning_rate": 4.957678208872854e-06, + "loss": 0.4482, + "step": 5873 + }, + { + "epoch": 0.3577017933806291, + "grad_norm": 1.0919893504218119, + "learning_rate": 4.957663587154653e-06, + "loss": 0.4534, + "step": 5874 + }, + { + "epoch": 0.3577626891575069, + "grad_norm": 1.0218558136429827, + "learning_rate": 4.957648962932635e-06, + "loss": 0.4999, + "step": 5875 + }, + { + "epoch": 0.3578235849343848, + "grad_norm": 0.982785928377609, + "learning_rate": 4.9576343362068165e-06, + "loss": 0.5139, + "step": 5876 + }, + { + "epoch": 0.35788448071126266, + "grad_norm": 1.0088206267948443, + "learning_rate": 4.95761970697721e-06, + "loss": 0.4172, + "step": 5877 + }, + { + "epoch": 0.35794537648814057, + "grad_norm": 0.9669306785284851, + "learning_rate": 4.957605075243833e-06, + "loss": 0.4672, + "step": 5878 + }, + { + "epoch": 0.3580062722650184, + "grad_norm": 0.9708568619355379, + "learning_rate": 4.9575904410066985e-06, + "loss": 0.4848, + "step": 5879 + }, + { + "epoch": 0.3580671680418963, + "grad_norm": 0.9762028652241797, + "learning_rate": 4.9575758042658215e-06, + "loss": 0.4817, + "step": 5880 + }, + { + "epoch": 0.35812806381877416, + "grad_norm": 1.0297337999808611, + "learning_rate": 4.9575611650212176e-06, + "loss": 0.5041, + "step": 5881 + }, + { + "epoch": 0.35818895959565206, + "grad_norm": 1.0710275766550497, + "learning_rate": 4.957546523272902e-06, + "loss": 0.489, + "step": 5882 + }, + { + "epoch": 0.3582498553725299, + "grad_norm": 1.0105072600979745, + "learning_rate": 4.957531879020888e-06, + "loss": 0.4489, + "step": 5883 + }, + { + "epoch": 0.3583107511494078, + "grad_norm": 1.0257201529159878, + "learning_rate": 4.9575172322651935e-06, + "loss": 0.4916, + "step": 5884 + }, + { + "epoch": 0.35837164692628565, + "grad_norm": 1.026349944225731, + "learning_rate": 4.95750258300583e-06, + "loss": 0.4709, + "step": 5885 + }, + { + "epoch": 0.35843254270316355, + "grad_norm": 1.0441627146862915, + "learning_rate": 4.957487931242814e-06, + "loss": 0.4669, + "step": 5886 + }, + { + "epoch": 0.3584934384800414, + "grad_norm": 0.9887847953389347, + "learning_rate": 4.957473276976161e-06, + "loss": 0.4836, + "step": 5887 + }, + { + "epoch": 0.3585543342569193, + "grad_norm": 1.0970195300581362, + "learning_rate": 4.957458620205885e-06, + "loss": 0.4464, + "step": 5888 + }, + { + "epoch": 0.35861523003379714, + "grad_norm": 1.1252875438933703, + "learning_rate": 4.957443960932001e-06, + "loss": 0.4573, + "step": 5889 + }, + { + "epoch": 0.35867612581067504, + "grad_norm": 1.059057300506433, + "learning_rate": 4.957429299154525e-06, + "loss": 0.3822, + "step": 5890 + }, + { + "epoch": 0.3587370215875529, + "grad_norm": 0.9952782178007933, + "learning_rate": 4.957414634873469e-06, + "loss": 0.4964, + "step": 5891 + }, + { + "epoch": 0.3587979173644308, + "grad_norm": 1.0432259786015223, + "learning_rate": 4.957399968088853e-06, + "loss": 0.4186, + "step": 5892 + }, + { + "epoch": 0.35885881314130863, + "grad_norm": 1.0519930358030436, + "learning_rate": 4.9573852988006875e-06, + "loss": 0.4921, + "step": 5893 + }, + { + "epoch": 0.35891970891818653, + "grad_norm": 1.0053258016141355, + "learning_rate": 4.957370627008989e-06, + "loss": 0.4484, + "step": 5894 + }, + { + "epoch": 0.3589806046950644, + "grad_norm": 1.020268077113043, + "learning_rate": 4.957355952713772e-06, + "loss": 0.4695, + "step": 5895 + }, + { + "epoch": 0.3590415004719423, + "grad_norm": 0.9997404981494037, + "learning_rate": 4.957341275915053e-06, + "loss": 0.517, + "step": 5896 + }, + { + "epoch": 0.3591023962488201, + "grad_norm": 1.0143904090800628, + "learning_rate": 4.957326596612845e-06, + "loss": 0.3909, + "step": 5897 + }, + { + "epoch": 0.359163292025698, + "grad_norm": 0.9973650985281276, + "learning_rate": 4.957311914807164e-06, + "loss": 0.5095, + "step": 5898 + }, + { + "epoch": 0.35922418780257587, + "grad_norm": 1.0274053871835211, + "learning_rate": 4.9572972304980235e-06, + "loss": 0.5233, + "step": 5899 + }, + { + "epoch": 0.35928508357945377, + "grad_norm": 0.9770482494953905, + "learning_rate": 4.957282543685441e-06, + "loss": 0.4522, + "step": 5900 + }, + { + "epoch": 0.3593459793563316, + "grad_norm": 1.0679404824122398, + "learning_rate": 4.9572678543694305e-06, + "loss": 0.4161, + "step": 5901 + }, + { + "epoch": 0.3594068751332095, + "grad_norm": 1.084481851770722, + "learning_rate": 4.957253162550006e-06, + "loss": 0.4829, + "step": 5902 + }, + { + "epoch": 0.35946777091008736, + "grad_norm": 0.9951622234708237, + "learning_rate": 4.957238468227183e-06, + "loss": 0.4336, + "step": 5903 + }, + { + "epoch": 0.35952866668696526, + "grad_norm": 1.0325057298968152, + "learning_rate": 4.957223771400977e-06, + "loss": 0.4713, + "step": 5904 + }, + { + "epoch": 0.35958956246384316, + "grad_norm": 0.9891865094815111, + "learning_rate": 4.957209072071402e-06, + "loss": 0.4927, + "step": 5905 + }, + { + "epoch": 0.359650458240721, + "grad_norm": 1.0428126020874233, + "learning_rate": 4.957194370238473e-06, + "loss": 0.4297, + "step": 5906 + }, + { + "epoch": 0.3597113540175989, + "grad_norm": 1.0278613965896726, + "learning_rate": 4.957179665902206e-06, + "loss": 0.4625, + "step": 5907 + }, + { + "epoch": 0.35977224979447675, + "grad_norm": 1.0280263358718158, + "learning_rate": 4.957164959062616e-06, + "loss": 0.4736, + "step": 5908 + }, + { + "epoch": 0.35983314557135465, + "grad_norm": 1.0602336656236964, + "learning_rate": 4.957150249719716e-06, + "loss": 0.5164, + "step": 5909 + }, + { + "epoch": 0.3598940413482325, + "grad_norm": 1.0546492461331096, + "learning_rate": 4.957135537873524e-06, + "loss": 0.4243, + "step": 5910 + }, + { + "epoch": 0.3599549371251104, + "grad_norm": 1.0677673155637681, + "learning_rate": 4.957120823524053e-06, + "loss": 0.4208, + "step": 5911 + }, + { + "epoch": 0.36001583290198824, + "grad_norm": 1.0344323393209731, + "learning_rate": 4.957106106671318e-06, + "loss": 0.4674, + "step": 5912 + }, + { + "epoch": 0.36007672867886614, + "grad_norm": 0.985177967980951, + "learning_rate": 4.957091387315334e-06, + "loss": 0.5185, + "step": 5913 + }, + { + "epoch": 0.360137624455744, + "grad_norm": 0.9915911080927179, + "learning_rate": 4.957076665456117e-06, + "loss": 0.5027, + "step": 5914 + }, + { + "epoch": 0.3601985202326219, + "grad_norm": 1.0913910746436848, + "learning_rate": 4.957061941093681e-06, + "loss": 0.4411, + "step": 5915 + }, + { + "epoch": 0.36025941600949973, + "grad_norm": 1.1780768474503296, + "learning_rate": 4.957047214228042e-06, + "loss": 0.4678, + "step": 5916 + }, + { + "epoch": 0.36032031178637763, + "grad_norm": 0.9855363408467105, + "learning_rate": 4.957032484859214e-06, + "loss": 0.4825, + "step": 5917 + }, + { + "epoch": 0.3603812075632555, + "grad_norm": 0.9639579403525318, + "learning_rate": 4.957017752987212e-06, + "loss": 0.4485, + "step": 5918 + }, + { + "epoch": 0.3604421033401334, + "grad_norm": 1.085007529057979, + "learning_rate": 4.957003018612052e-06, + "loss": 0.4171, + "step": 5919 + }, + { + "epoch": 0.3605029991170112, + "grad_norm": 1.0193559091832947, + "learning_rate": 4.956988281733748e-06, + "loss": 0.4327, + "step": 5920 + }, + { + "epoch": 0.3605638948938891, + "grad_norm": 1.0616037944886991, + "learning_rate": 4.956973542352316e-06, + "loss": 0.4418, + "step": 5921 + }, + { + "epoch": 0.36062479067076697, + "grad_norm": 0.9602900213930493, + "learning_rate": 4.9569588004677695e-06, + "loss": 0.5582, + "step": 5922 + }, + { + "epoch": 0.36068568644764487, + "grad_norm": 1.1299709087996834, + "learning_rate": 4.9569440560801256e-06, + "loss": 0.4631, + "step": 5923 + }, + { + "epoch": 0.3607465822245227, + "grad_norm": 0.9877585563639111, + "learning_rate": 4.956929309189397e-06, + "loss": 0.4663, + "step": 5924 + }, + { + "epoch": 0.3608074780014006, + "grad_norm": 1.0526525284122275, + "learning_rate": 4.956914559795601e-06, + "loss": 0.4537, + "step": 5925 + }, + { + "epoch": 0.36086837377827846, + "grad_norm": 0.9971557306201985, + "learning_rate": 4.956899807898751e-06, + "loss": 0.4661, + "step": 5926 + }, + { + "epoch": 0.36092926955515636, + "grad_norm": 0.9939795988172753, + "learning_rate": 4.956885053498862e-06, + "loss": 0.5441, + "step": 5927 + }, + { + "epoch": 0.3609901653320342, + "grad_norm": 1.01095820218077, + "learning_rate": 4.95687029659595e-06, + "loss": 0.4489, + "step": 5928 + }, + { + "epoch": 0.3610510611089121, + "grad_norm": 1.043583789849136, + "learning_rate": 4.95685553719003e-06, + "loss": 0.4151, + "step": 5929 + }, + { + "epoch": 0.36111195688578995, + "grad_norm": 1.0294594766415035, + "learning_rate": 4.956840775281117e-06, + "loss": 0.4502, + "step": 5930 + }, + { + "epoch": 0.36117285266266785, + "grad_norm": 1.0995680493589644, + "learning_rate": 4.956826010869224e-06, + "loss": 0.4109, + "step": 5931 + }, + { + "epoch": 0.3612337484395457, + "grad_norm": 1.0974756502020424, + "learning_rate": 4.95681124395437e-06, + "loss": 0.4954, + "step": 5932 + }, + { + "epoch": 0.3612946442164236, + "grad_norm": 1.048101158622435, + "learning_rate": 4.956796474536566e-06, + "loss": 0.506, + "step": 5933 + }, + { + "epoch": 0.36135553999330144, + "grad_norm": 1.127577634585211, + "learning_rate": 4.95678170261583e-06, + "loss": 0.4771, + "step": 5934 + }, + { + "epoch": 0.36141643577017935, + "grad_norm": 1.0314552170178095, + "learning_rate": 4.956766928192175e-06, + "loss": 0.4116, + "step": 5935 + }, + { + "epoch": 0.3614773315470572, + "grad_norm": 1.0705066783684076, + "learning_rate": 4.956752151265617e-06, + "loss": 0.4073, + "step": 5936 + }, + { + "epoch": 0.3615382273239351, + "grad_norm": 0.9840030042519587, + "learning_rate": 4.956737371836172e-06, + "loss": 0.4869, + "step": 5937 + }, + { + "epoch": 0.36159912310081294, + "grad_norm": 1.0446227760409263, + "learning_rate": 4.956722589903853e-06, + "loss": 0.4484, + "step": 5938 + }, + { + "epoch": 0.36166001887769084, + "grad_norm": 1.0987168159869072, + "learning_rate": 4.956707805468677e-06, + "loss": 0.4669, + "step": 5939 + }, + { + "epoch": 0.3617209146545687, + "grad_norm": 1.0578534812732725, + "learning_rate": 4.956693018530657e-06, + "loss": 0.4622, + "step": 5940 + }, + { + "epoch": 0.3617818104314466, + "grad_norm": 0.983878117425382, + "learning_rate": 4.95667822908981e-06, + "loss": 0.5055, + "step": 5941 + }, + { + "epoch": 0.3618427062083244, + "grad_norm": 1.0774122997102273, + "learning_rate": 4.956663437146149e-06, + "loss": 0.441, + "step": 5942 + }, + { + "epoch": 0.36190360198520233, + "grad_norm": 0.9521728231100681, + "learning_rate": 4.956648642699693e-06, + "loss": 0.4861, + "step": 5943 + }, + { + "epoch": 0.3619644977620802, + "grad_norm": 0.9183915096261009, + "learning_rate": 4.9566338457504525e-06, + "loss": 0.504, + "step": 5944 + }, + { + "epoch": 0.3620253935389581, + "grad_norm": 1.058534718060545, + "learning_rate": 4.956619046298444e-06, + "loss": 0.471, + "step": 5945 + }, + { + "epoch": 0.362086289315836, + "grad_norm": 1.1738857336130304, + "learning_rate": 4.956604244343684e-06, + "loss": 0.4049, + "step": 5946 + }, + { + "epoch": 0.3621471850927138, + "grad_norm": 0.9804009311101864, + "learning_rate": 4.9565894398861875e-06, + "loss": 0.481, + "step": 5947 + }, + { + "epoch": 0.3622080808695917, + "grad_norm": 1.038915448254017, + "learning_rate": 4.9565746329259675e-06, + "loss": 0.3975, + "step": 5948 + }, + { + "epoch": 0.36226897664646956, + "grad_norm": 1.1301154290974451, + "learning_rate": 4.956559823463041e-06, + "loss": 0.4331, + "step": 5949 + }, + { + "epoch": 0.36232987242334747, + "grad_norm": 1.02384675174766, + "learning_rate": 4.9565450114974224e-06, + "loss": 0.4588, + "step": 5950 + }, + { + "epoch": 0.3623907682002253, + "grad_norm": 1.0812968585719447, + "learning_rate": 4.956530197029127e-06, + "loss": 0.4273, + "step": 5951 + }, + { + "epoch": 0.3624516639771032, + "grad_norm": 1.0427850818041422, + "learning_rate": 4.9565153800581685e-06, + "loss": 0.5305, + "step": 5952 + }, + { + "epoch": 0.36251255975398106, + "grad_norm": 1.1440136500790477, + "learning_rate": 4.956500560584565e-06, + "loss": 0.4043, + "step": 5953 + }, + { + "epoch": 0.36257345553085896, + "grad_norm": 1.0986524171727288, + "learning_rate": 4.9564857386083285e-06, + "loss": 0.413, + "step": 5954 + }, + { + "epoch": 0.3626343513077368, + "grad_norm": 1.0234778749836893, + "learning_rate": 4.956470914129475e-06, + "loss": 0.4574, + "step": 5955 + }, + { + "epoch": 0.3626952470846147, + "grad_norm": 1.023343561098871, + "learning_rate": 4.956456087148022e-06, + "loss": 0.502, + "step": 5956 + }, + { + "epoch": 0.36275614286149255, + "grad_norm": 1.0805837502807247, + "learning_rate": 4.956441257663981e-06, + "loss": 0.417, + "step": 5957 + }, + { + "epoch": 0.36281703863837045, + "grad_norm": 1.0675542983078854, + "learning_rate": 4.956426425677369e-06, + "loss": 0.4407, + "step": 5958 + }, + { + "epoch": 0.3628779344152483, + "grad_norm": 1.1287329944727151, + "learning_rate": 4.9564115911882005e-06, + "loss": 0.4814, + "step": 5959 + }, + { + "epoch": 0.3629388301921262, + "grad_norm": 0.9876383151567801, + "learning_rate": 4.956396754196492e-06, + "loss": 0.4352, + "step": 5960 + }, + { + "epoch": 0.36299972596900404, + "grad_norm": 1.067772991433254, + "learning_rate": 4.956381914702256e-06, + "loss": 0.4523, + "step": 5961 + }, + { + "epoch": 0.36306062174588194, + "grad_norm": 0.8623014141089806, + "learning_rate": 4.95636707270551e-06, + "loss": 0.5631, + "step": 5962 + }, + { + "epoch": 0.3631215175227598, + "grad_norm": 0.9841535355028717, + "learning_rate": 4.956352228206269e-06, + "loss": 0.4709, + "step": 5963 + }, + { + "epoch": 0.3631824132996377, + "grad_norm": 1.0352731938351536, + "learning_rate": 4.956337381204547e-06, + "loss": 0.5257, + "step": 5964 + }, + { + "epoch": 0.36324330907651553, + "grad_norm": 1.019479988963735, + "learning_rate": 4.956322531700359e-06, + "loss": 0.4511, + "step": 5965 + }, + { + "epoch": 0.36330420485339343, + "grad_norm": 0.947186149821031, + "learning_rate": 4.956307679693721e-06, + "loss": 0.4594, + "step": 5966 + }, + { + "epoch": 0.3633651006302713, + "grad_norm": 0.9860605755331532, + "learning_rate": 4.956292825184647e-06, + "loss": 0.5054, + "step": 5967 + }, + { + "epoch": 0.3634259964071492, + "grad_norm": 1.0246779620366455, + "learning_rate": 4.956277968173155e-06, + "loss": 0.5152, + "step": 5968 + }, + { + "epoch": 0.363486892184027, + "grad_norm": 1.0529668846959945, + "learning_rate": 4.956263108659256e-06, + "loss": 0.449, + "step": 5969 + }, + { + "epoch": 0.3635477879609049, + "grad_norm": 0.9493275473743187, + "learning_rate": 4.956248246642968e-06, + "loss": 0.4919, + "step": 5970 + }, + { + "epoch": 0.36360868373778277, + "grad_norm": 0.9768143101890996, + "learning_rate": 4.956233382124306e-06, + "loss": 0.5518, + "step": 5971 + }, + { + "epoch": 0.36366957951466067, + "grad_norm": 1.1833682463256738, + "learning_rate": 4.956218515103283e-06, + "loss": 0.3932, + "step": 5972 + }, + { + "epoch": 0.3637304752915385, + "grad_norm": 1.0201007541003366, + "learning_rate": 4.9562036455799165e-06, + "loss": 0.4542, + "step": 5973 + }, + { + "epoch": 0.3637913710684164, + "grad_norm": 1.034413844084137, + "learning_rate": 4.956188773554221e-06, + "loss": 0.4675, + "step": 5974 + }, + { + "epoch": 0.36385226684529426, + "grad_norm": 1.0167314745307234, + "learning_rate": 4.956173899026212e-06, + "loss": 0.4901, + "step": 5975 + }, + { + "epoch": 0.36391316262217216, + "grad_norm": 1.0073842995215903, + "learning_rate": 4.956159021995903e-06, + "loss": 0.4073, + "step": 5976 + }, + { + "epoch": 0.36397405839905, + "grad_norm": 1.024695318359912, + "learning_rate": 4.95614414246331e-06, + "loss": 0.4977, + "step": 5977 + }, + { + "epoch": 0.3640349541759279, + "grad_norm": 1.0150093288673316, + "learning_rate": 4.956129260428449e-06, + "loss": 0.5101, + "step": 5978 + }, + { + "epoch": 0.36409584995280575, + "grad_norm": 1.0179480388545166, + "learning_rate": 4.956114375891335e-06, + "loss": 0.3947, + "step": 5979 + }, + { + "epoch": 0.36415674572968365, + "grad_norm": 1.0016715084236207, + "learning_rate": 4.956099488851982e-06, + "loss": 0.4469, + "step": 5980 + }, + { + "epoch": 0.3642176415065615, + "grad_norm": 0.9754138383953196, + "learning_rate": 4.956084599310407e-06, + "loss": 0.4588, + "step": 5981 + }, + { + "epoch": 0.3642785372834394, + "grad_norm": 1.0712835761578299, + "learning_rate": 4.956069707266623e-06, + "loss": 0.4862, + "step": 5982 + }, + { + "epoch": 0.36433943306031724, + "grad_norm": 1.091722692119818, + "learning_rate": 4.956054812720646e-06, + "loss": 0.4276, + "step": 5983 + }, + { + "epoch": 0.36440032883719514, + "grad_norm": 0.9838303738300328, + "learning_rate": 4.956039915672492e-06, + "loss": 0.4839, + "step": 5984 + }, + { + "epoch": 0.364461224614073, + "grad_norm": 1.0320232625659702, + "learning_rate": 4.956025016122176e-06, + "loss": 0.4403, + "step": 5985 + }, + { + "epoch": 0.3645221203909509, + "grad_norm": 1.0298611050228113, + "learning_rate": 4.956010114069712e-06, + "loss": 0.4566, + "step": 5986 + }, + { + "epoch": 0.3645830161678288, + "grad_norm": 0.9812459930684417, + "learning_rate": 4.955995209515117e-06, + "loss": 0.4977, + "step": 5987 + }, + { + "epoch": 0.36464391194470663, + "grad_norm": 0.9667187988899829, + "learning_rate": 4.9559803024584045e-06, + "loss": 0.5351, + "step": 5988 + }, + { + "epoch": 0.36470480772158453, + "grad_norm": 1.1037723351518227, + "learning_rate": 4.95596539289959e-06, + "loss": 0.49, + "step": 5989 + }, + { + "epoch": 0.3647657034984624, + "grad_norm": 1.0186171287888655, + "learning_rate": 4.955950480838689e-06, + "loss": 0.456, + "step": 5990 + }, + { + "epoch": 0.3648265992753403, + "grad_norm": 0.9995478932858419, + "learning_rate": 4.955935566275717e-06, + "loss": 0.4914, + "step": 5991 + }, + { + "epoch": 0.3648874950522181, + "grad_norm": 0.9940440476334215, + "learning_rate": 4.955920649210689e-06, + "loss": 0.4102, + "step": 5992 + }, + { + "epoch": 0.364948390829096, + "grad_norm": 1.0086267726181692, + "learning_rate": 4.95590572964362e-06, + "loss": 0.4235, + "step": 5993 + }, + { + "epoch": 0.36500928660597387, + "grad_norm": 0.9654380608696763, + "learning_rate": 4.955890807574525e-06, + "loss": 0.4388, + "step": 5994 + }, + { + "epoch": 0.36507018238285177, + "grad_norm": 1.0325453798195798, + "learning_rate": 4.9558758830034205e-06, + "loss": 0.4888, + "step": 5995 + }, + { + "epoch": 0.3651310781597296, + "grad_norm": 0.9358713903856345, + "learning_rate": 4.955860955930319e-06, + "loss": 0.4719, + "step": 5996 + }, + { + "epoch": 0.3651919739366075, + "grad_norm": 1.0536437970184025, + "learning_rate": 4.955846026355239e-06, + "loss": 0.4528, + "step": 5997 + }, + { + "epoch": 0.36525286971348536, + "grad_norm": 1.0938475489143789, + "learning_rate": 4.955831094278194e-06, + "loss": 0.457, + "step": 5998 + }, + { + "epoch": 0.36531376549036326, + "grad_norm": 1.116730665115478, + "learning_rate": 4.9558161596991985e-06, + "loss": 0.4586, + "step": 5999 + }, + { + "epoch": 0.3653746612672411, + "grad_norm": 0.9505505325234672, + "learning_rate": 4.955801222618269e-06, + "loss": 0.4008, + "step": 6000 + }, + { + "epoch": 0.365435557044119, + "grad_norm": 1.021256397977252, + "learning_rate": 4.95578628303542e-06, + "loss": 0.5127, + "step": 6001 + }, + { + "epoch": 0.36549645282099685, + "grad_norm": 0.9759332871170654, + "learning_rate": 4.955771340950667e-06, + "loss": 0.4663, + "step": 6002 + }, + { + "epoch": 0.36555734859787475, + "grad_norm": 1.0590600732059887, + "learning_rate": 4.955756396364026e-06, + "loss": 0.4634, + "step": 6003 + }, + { + "epoch": 0.3656182443747526, + "grad_norm": 1.0997285392935592, + "learning_rate": 4.955741449275511e-06, + "loss": 0.5017, + "step": 6004 + }, + { + "epoch": 0.3656791401516305, + "grad_norm": 1.1000296468542754, + "learning_rate": 4.955726499685137e-06, + "loss": 0.4298, + "step": 6005 + }, + { + "epoch": 0.36574003592850834, + "grad_norm": 0.9606171714324424, + "learning_rate": 4.955711547592921e-06, + "loss": 0.4857, + "step": 6006 + }, + { + "epoch": 0.36580093170538625, + "grad_norm": 0.919421173462739, + "learning_rate": 4.9556965929988765e-06, + "loss": 0.5104, + "step": 6007 + }, + { + "epoch": 0.3658618274822641, + "grad_norm": 1.0373120712610218, + "learning_rate": 4.955681635903019e-06, + "loss": 0.4738, + "step": 6008 + }, + { + "epoch": 0.365922723259142, + "grad_norm": 0.9709169043290344, + "learning_rate": 4.955666676305365e-06, + "loss": 0.4863, + "step": 6009 + }, + { + "epoch": 0.36598361903601984, + "grad_norm": 1.0685762786283473, + "learning_rate": 4.955651714205928e-06, + "loss": 0.472, + "step": 6010 + }, + { + "epoch": 0.36604451481289774, + "grad_norm": 1.0657628988556218, + "learning_rate": 4.955636749604725e-06, + "loss": 0.5078, + "step": 6011 + }, + { + "epoch": 0.3661054105897756, + "grad_norm": 0.9785638903386237, + "learning_rate": 4.95562178250177e-06, + "loss": 0.4608, + "step": 6012 + }, + { + "epoch": 0.3661663063666535, + "grad_norm": 1.1537497740726192, + "learning_rate": 4.955606812897078e-06, + "loss": 0.3935, + "step": 6013 + }, + { + "epoch": 0.3662272021435313, + "grad_norm": 1.0416295782592686, + "learning_rate": 4.955591840790665e-06, + "loss": 0.5162, + "step": 6014 + }, + { + "epoch": 0.36628809792040923, + "grad_norm": 1.0631604355628037, + "learning_rate": 4.955576866182547e-06, + "loss": 0.4686, + "step": 6015 + }, + { + "epoch": 0.3663489936972871, + "grad_norm": 1.0502663742363367, + "learning_rate": 4.955561889072737e-06, + "loss": 0.4213, + "step": 6016 + }, + { + "epoch": 0.366409889474165, + "grad_norm": 1.0149225982633372, + "learning_rate": 4.955546909461253e-06, + "loss": 0.4587, + "step": 6017 + }, + { + "epoch": 0.3664707852510428, + "grad_norm": 1.0271081565307223, + "learning_rate": 4.955531927348107e-06, + "loss": 0.5337, + "step": 6018 + }, + { + "epoch": 0.3665316810279207, + "grad_norm": 1.1108051625370898, + "learning_rate": 4.955516942733318e-06, + "loss": 0.4563, + "step": 6019 + }, + { + "epoch": 0.36659257680479856, + "grad_norm": 0.9519553958321096, + "learning_rate": 4.955501955616898e-06, + "loss": 0.4804, + "step": 6020 + }, + { + "epoch": 0.36665347258167647, + "grad_norm": 1.0456210389853222, + "learning_rate": 4.955486965998865e-06, + "loss": 0.4787, + "step": 6021 + }, + { + "epoch": 0.3667143683585543, + "grad_norm": 1.0253843107756424, + "learning_rate": 4.955471973879231e-06, + "loss": 0.4328, + "step": 6022 + }, + { + "epoch": 0.3667752641354322, + "grad_norm": 1.0423304462864216, + "learning_rate": 4.955456979258016e-06, + "loss": 0.4682, + "step": 6023 + }, + { + "epoch": 0.36683615991231006, + "grad_norm": 1.047818811220035, + "learning_rate": 4.95544198213523e-06, + "loss": 0.5409, + "step": 6024 + }, + { + "epoch": 0.36689705568918796, + "grad_norm": 1.0606418688587953, + "learning_rate": 4.955426982510891e-06, + "loss": 0.5127, + "step": 6025 + }, + { + "epoch": 0.3669579514660658, + "grad_norm": 0.9867698889774916, + "learning_rate": 4.955411980385015e-06, + "loss": 0.4608, + "step": 6026 + }, + { + "epoch": 0.3670188472429437, + "grad_norm": 1.0099545253258095, + "learning_rate": 4.9553969757576165e-06, + "loss": 0.4698, + "step": 6027 + }, + { + "epoch": 0.3670797430198216, + "grad_norm": 1.0705889010400158, + "learning_rate": 4.95538196862871e-06, + "loss": 0.4076, + "step": 6028 + }, + { + "epoch": 0.36714063879669945, + "grad_norm": 1.024103218441094, + "learning_rate": 4.955366958998312e-06, + "loss": 0.4874, + "step": 6029 + }, + { + "epoch": 0.36720153457357735, + "grad_norm": 0.9852691860321268, + "learning_rate": 4.955351946866436e-06, + "loss": 0.4724, + "step": 6030 + }, + { + "epoch": 0.3672624303504552, + "grad_norm": 1.0402369169787309, + "learning_rate": 4.9553369322331e-06, + "loss": 0.452, + "step": 6031 + }, + { + "epoch": 0.3673233261273331, + "grad_norm": 0.9553346835539287, + "learning_rate": 4.955321915098317e-06, + "loss": 0.4913, + "step": 6032 + }, + { + "epoch": 0.36738422190421094, + "grad_norm": 0.9794896006905828, + "learning_rate": 4.955306895462102e-06, + "loss": 0.5166, + "step": 6033 + }, + { + "epoch": 0.36744511768108884, + "grad_norm": 1.0365308352216436, + "learning_rate": 4.955291873324473e-06, + "loss": 0.4651, + "step": 6034 + }, + { + "epoch": 0.3675060134579667, + "grad_norm": 1.09533701446043, + "learning_rate": 4.955276848685443e-06, + "loss": 0.4701, + "step": 6035 + }, + { + "epoch": 0.3675669092348446, + "grad_norm": 1.0814475241467412, + "learning_rate": 4.9552618215450285e-06, + "loss": 0.5105, + "step": 6036 + }, + { + "epoch": 0.36762780501172243, + "grad_norm": 1.0344096176145035, + "learning_rate": 4.955246791903243e-06, + "loss": 0.5064, + "step": 6037 + }, + { + "epoch": 0.36768870078860033, + "grad_norm": 1.0290174303916395, + "learning_rate": 4.9552317597601055e-06, + "loss": 0.4078, + "step": 6038 + }, + { + "epoch": 0.3677495965654782, + "grad_norm": 1.1745850195385723, + "learning_rate": 4.955216725115627e-06, + "loss": 0.4817, + "step": 6039 + }, + { + "epoch": 0.3678104923423561, + "grad_norm": 0.97557027350353, + "learning_rate": 4.955201687969825e-06, + "loss": 0.4909, + "step": 6040 + }, + { + "epoch": 0.3678713881192339, + "grad_norm": 0.9680485546357052, + "learning_rate": 4.955186648322715e-06, + "loss": 0.4914, + "step": 6041 + }, + { + "epoch": 0.3679322838961118, + "grad_norm": 0.8846316862344714, + "learning_rate": 4.955171606174312e-06, + "loss": 0.5009, + "step": 6042 + }, + { + "epoch": 0.36799317967298967, + "grad_norm": 0.9674266886887368, + "learning_rate": 4.95515656152463e-06, + "loss": 0.4967, + "step": 6043 + }, + { + "epoch": 0.36805407544986757, + "grad_norm": 1.0134008747659073, + "learning_rate": 4.955141514373687e-06, + "loss": 0.3898, + "step": 6044 + }, + { + "epoch": 0.3681149712267454, + "grad_norm": 1.0433509500993623, + "learning_rate": 4.9551264647214955e-06, + "loss": 0.437, + "step": 6045 + }, + { + "epoch": 0.3681758670036233, + "grad_norm": 0.9983442725998677, + "learning_rate": 4.955111412568073e-06, + "loss": 0.5487, + "step": 6046 + }, + { + "epoch": 0.36823676278050116, + "grad_norm": 1.0066431644756593, + "learning_rate": 4.955096357913435e-06, + "loss": 0.512, + "step": 6047 + }, + { + "epoch": 0.36829765855737906, + "grad_norm": 1.1210776266589981, + "learning_rate": 4.9550813007575945e-06, + "loss": 0.4774, + "step": 6048 + }, + { + "epoch": 0.3683585543342569, + "grad_norm": 1.0949529535010654, + "learning_rate": 4.955066241100569e-06, + "loss": 0.4192, + "step": 6049 + }, + { + "epoch": 0.3684194501111348, + "grad_norm": 0.9638401208699781, + "learning_rate": 4.955051178942372e-06, + "loss": 0.5206, + "step": 6050 + }, + { + "epoch": 0.36848034588801265, + "grad_norm": 1.028199744820986, + "learning_rate": 4.95503611428302e-06, + "loss": 0.4653, + "step": 6051 + }, + { + "epoch": 0.36854124166489055, + "grad_norm": 1.1405147084861502, + "learning_rate": 4.95502104712253e-06, + "loss": 0.4159, + "step": 6052 + }, + { + "epoch": 0.3686021374417684, + "grad_norm": 1.1301073887584574, + "learning_rate": 4.955005977460914e-06, + "loss": 0.4956, + "step": 6053 + }, + { + "epoch": 0.3686630332186463, + "grad_norm": 1.0975708534220976, + "learning_rate": 4.954990905298189e-06, + "loss": 0.4949, + "step": 6054 + }, + { + "epoch": 0.36872392899552414, + "grad_norm": 0.9895300878603349, + "learning_rate": 4.954975830634371e-06, + "loss": 0.4879, + "step": 6055 + }, + { + "epoch": 0.36878482477240204, + "grad_norm": 1.0745962331105468, + "learning_rate": 4.954960753469474e-06, + "loss": 0.457, + "step": 6056 + }, + { + "epoch": 0.3688457205492799, + "grad_norm": 1.0059322602475251, + "learning_rate": 4.954945673803515e-06, + "loss": 0.5096, + "step": 6057 + }, + { + "epoch": 0.3689066163261578, + "grad_norm": 1.0084416368074998, + "learning_rate": 4.9549305916365075e-06, + "loss": 0.4301, + "step": 6058 + }, + { + "epoch": 0.36896751210303563, + "grad_norm": 1.0305587135721996, + "learning_rate": 4.954915506968469e-06, + "loss": 0.4747, + "step": 6059 + }, + { + "epoch": 0.36902840787991353, + "grad_norm": 0.9716687695603177, + "learning_rate": 4.9549004197994125e-06, + "loss": 0.4863, + "step": 6060 + }, + { + "epoch": 0.3690893036567914, + "grad_norm": 0.9810671103624888, + "learning_rate": 4.954885330129354e-06, + "loss": 0.4467, + "step": 6061 + }, + { + "epoch": 0.3691501994336693, + "grad_norm": 1.034810106768394, + "learning_rate": 4.95487023795831e-06, + "loss": 0.4752, + "step": 6062 + }, + { + "epoch": 0.3692110952105471, + "grad_norm": 1.0378566595676086, + "learning_rate": 4.954855143286295e-06, + "loss": 0.4306, + "step": 6063 + }, + { + "epoch": 0.369271990987425, + "grad_norm": 1.0567889337948464, + "learning_rate": 4.954840046113325e-06, + "loss": 0.3971, + "step": 6064 + }, + { + "epoch": 0.36933288676430287, + "grad_norm": 1.1027685733653938, + "learning_rate": 4.954824946439415e-06, + "loss": 0.4378, + "step": 6065 + }, + { + "epoch": 0.36939378254118077, + "grad_norm": 0.9590174783990574, + "learning_rate": 4.95480984426458e-06, + "loss": 0.5305, + "step": 6066 + }, + { + "epoch": 0.3694546783180586, + "grad_norm": 1.0847789332032582, + "learning_rate": 4.954794739588836e-06, + "loss": 0.4464, + "step": 6067 + }, + { + "epoch": 0.3695155740949365, + "grad_norm": 1.0942638428917637, + "learning_rate": 4.954779632412198e-06, + "loss": 0.4889, + "step": 6068 + }, + { + "epoch": 0.3695764698718144, + "grad_norm": 0.9424948764647954, + "learning_rate": 4.954764522734682e-06, + "loss": 0.4215, + "step": 6069 + }, + { + "epoch": 0.36963736564869226, + "grad_norm": 1.0054119313548815, + "learning_rate": 4.954749410556302e-06, + "loss": 0.5487, + "step": 6070 + }, + { + "epoch": 0.36969826142557016, + "grad_norm": 1.1102618596128098, + "learning_rate": 4.954734295877075e-06, + "loss": 0.4143, + "step": 6071 + }, + { + "epoch": 0.369759157202448, + "grad_norm": 1.0259845404721895, + "learning_rate": 4.954719178697016e-06, + "loss": 0.4459, + "step": 6072 + }, + { + "epoch": 0.3698200529793259, + "grad_norm": 1.0364802762629588, + "learning_rate": 4.95470405901614e-06, + "loss": 0.5384, + "step": 6073 + }, + { + "epoch": 0.36988094875620375, + "grad_norm": 1.1429468762962434, + "learning_rate": 4.954688936834462e-06, + "loss": 0.4409, + "step": 6074 + }, + { + "epoch": 0.36994184453308165, + "grad_norm": 1.072767632031658, + "learning_rate": 4.954673812151999e-06, + "loss": 0.4664, + "step": 6075 + }, + { + "epoch": 0.3700027403099595, + "grad_norm": 1.0869173396947693, + "learning_rate": 4.954658684968764e-06, + "loss": 0.4754, + "step": 6076 + }, + { + "epoch": 0.3700636360868374, + "grad_norm": 0.9948745705375578, + "learning_rate": 4.954643555284775e-06, + "loss": 0.4835, + "step": 6077 + }, + { + "epoch": 0.37012453186371524, + "grad_norm": 1.0322567953581128, + "learning_rate": 4.954628423100045e-06, + "loss": 0.4464, + "step": 6078 + }, + { + "epoch": 0.37018542764059315, + "grad_norm": 1.0684679895885498, + "learning_rate": 4.954613288414591e-06, + "loss": 0.5106, + "step": 6079 + }, + { + "epoch": 0.370246323417471, + "grad_norm": 1.0232438934766566, + "learning_rate": 4.954598151228429e-06, + "loss": 0.4597, + "step": 6080 + }, + { + "epoch": 0.3703072191943489, + "grad_norm": 0.9034695952972958, + "learning_rate": 4.954583011541573e-06, + "loss": 0.4753, + "step": 6081 + }, + { + "epoch": 0.37036811497122674, + "grad_norm": 1.0449125507087937, + "learning_rate": 4.954567869354038e-06, + "loss": 0.4922, + "step": 6082 + }, + { + "epoch": 0.37042901074810464, + "grad_norm": 1.058438369347969, + "learning_rate": 4.954552724665841e-06, + "loss": 0.4701, + "step": 6083 + }, + { + "epoch": 0.3704899065249825, + "grad_norm": 0.9660142148584423, + "learning_rate": 4.9545375774769964e-06, + "loss": 0.4915, + "step": 6084 + }, + { + "epoch": 0.3705508023018604, + "grad_norm": 0.9410817689930323, + "learning_rate": 4.95452242778752e-06, + "loss": 0.5209, + "step": 6085 + }, + { + "epoch": 0.3706116980787382, + "grad_norm": 0.9858879722914147, + "learning_rate": 4.9545072755974276e-06, + "loss": 0.4638, + "step": 6086 + }, + { + "epoch": 0.37067259385561613, + "grad_norm": 1.0017233899074118, + "learning_rate": 4.9544921209067335e-06, + "loss": 0.4854, + "step": 6087 + }, + { + "epoch": 0.370733489632494, + "grad_norm": 1.0060773089033128, + "learning_rate": 4.954476963715454e-06, + "loss": 0.4412, + "step": 6088 + }, + { + "epoch": 0.3707943854093719, + "grad_norm": 1.0502068690349289, + "learning_rate": 4.954461804023605e-06, + "loss": 0.4349, + "step": 6089 + }, + { + "epoch": 0.3708552811862497, + "grad_norm": 1.0363591502693998, + "learning_rate": 4.954446641831201e-06, + "loss": 0.4911, + "step": 6090 + }, + { + "epoch": 0.3709161769631276, + "grad_norm": 1.0238895045994616, + "learning_rate": 4.954431477138257e-06, + "loss": 0.4598, + "step": 6091 + }, + { + "epoch": 0.37097707274000546, + "grad_norm": 0.9526263054408701, + "learning_rate": 4.954416309944791e-06, + "loss": 0.5171, + "step": 6092 + }, + { + "epoch": 0.37103796851688337, + "grad_norm": 0.997042799459519, + "learning_rate": 4.954401140250816e-06, + "loss": 0.4822, + "step": 6093 + }, + { + "epoch": 0.3710988642937612, + "grad_norm": 1.1046195987917462, + "learning_rate": 4.9543859680563475e-06, + "loss": 0.3984, + "step": 6094 + }, + { + "epoch": 0.3711597600706391, + "grad_norm": 0.9597612087611525, + "learning_rate": 4.954370793361402e-06, + "loss": 0.5071, + "step": 6095 + }, + { + "epoch": 0.37122065584751696, + "grad_norm": 1.0755943660287053, + "learning_rate": 4.954355616165994e-06, + "loss": 0.5206, + "step": 6096 + }, + { + "epoch": 0.37128155162439486, + "grad_norm": 1.0621742565773706, + "learning_rate": 4.9543404364701404e-06, + "loss": 0.4729, + "step": 6097 + }, + { + "epoch": 0.3713424474012727, + "grad_norm": 1.0538214593528707, + "learning_rate": 4.954325254273855e-06, + "loss": 0.4323, + "step": 6098 + }, + { + "epoch": 0.3714033431781506, + "grad_norm": 0.9873853518348582, + "learning_rate": 4.954310069577155e-06, + "loss": 0.5291, + "step": 6099 + }, + { + "epoch": 0.37146423895502845, + "grad_norm": 1.0598009226958216, + "learning_rate": 4.954294882380054e-06, + "loss": 0.3927, + "step": 6100 + }, + { + "epoch": 0.37152513473190635, + "grad_norm": 1.063536912977621, + "learning_rate": 4.9542796926825685e-06, + "loss": 0.5062, + "step": 6101 + }, + { + "epoch": 0.3715860305087842, + "grad_norm": 1.0735508768594233, + "learning_rate": 4.954264500484714e-06, + "loss": 0.4916, + "step": 6102 + }, + { + "epoch": 0.3716469262856621, + "grad_norm": 1.0194401851895718, + "learning_rate": 4.954249305786507e-06, + "loss": 0.4428, + "step": 6103 + }, + { + "epoch": 0.37170782206253994, + "grad_norm": 1.0560808791635838, + "learning_rate": 4.954234108587961e-06, + "loss": 0.4506, + "step": 6104 + }, + { + "epoch": 0.37176871783941784, + "grad_norm": 1.0159919928390322, + "learning_rate": 4.954218908889092e-06, + "loss": 0.47, + "step": 6105 + }, + { + "epoch": 0.3718296136162957, + "grad_norm": 1.0820125867904415, + "learning_rate": 4.954203706689916e-06, + "loss": 0.4268, + "step": 6106 + }, + { + "epoch": 0.3718905093931736, + "grad_norm": 1.01980879642156, + "learning_rate": 4.954188501990448e-06, + "loss": 0.4527, + "step": 6107 + }, + { + "epoch": 0.37195140517005143, + "grad_norm": 0.9241601317563577, + "learning_rate": 4.954173294790704e-06, + "loss": 0.5504, + "step": 6108 + }, + { + "epoch": 0.37201230094692933, + "grad_norm": 1.0373536617585366, + "learning_rate": 4.9541580850907e-06, + "loss": 0.5363, + "step": 6109 + }, + { + "epoch": 0.37207319672380723, + "grad_norm": 1.0140116317667849, + "learning_rate": 4.9541428728904495e-06, + "loss": 0.4757, + "step": 6110 + }, + { + "epoch": 0.3721340925006851, + "grad_norm": 1.0990217645759348, + "learning_rate": 4.95412765818997e-06, + "loss": 0.4849, + "step": 6111 + }, + { + "epoch": 0.372194988277563, + "grad_norm": 1.0212356605158104, + "learning_rate": 4.954112440989276e-06, + "loss": 0.5498, + "step": 6112 + }, + { + "epoch": 0.3722558840544408, + "grad_norm": 1.047226500152906, + "learning_rate": 4.954097221288383e-06, + "loss": 0.4602, + "step": 6113 + }, + { + "epoch": 0.3723167798313187, + "grad_norm": 1.0981836497191761, + "learning_rate": 4.954081999087308e-06, + "loss": 0.4812, + "step": 6114 + }, + { + "epoch": 0.37237767560819657, + "grad_norm": 1.073866617456297, + "learning_rate": 4.954066774386064e-06, + "loss": 0.6086, + "step": 6115 + }, + { + "epoch": 0.37243857138507447, + "grad_norm": 0.9990920889602521, + "learning_rate": 4.954051547184669e-06, + "loss": 0.449, + "step": 6116 + }, + { + "epoch": 0.3724994671619523, + "grad_norm": 1.0541604918254546, + "learning_rate": 4.9540363174831356e-06, + "loss": 0.4687, + "step": 6117 + }, + { + "epoch": 0.3725603629388302, + "grad_norm": 1.1229637065959743, + "learning_rate": 4.954021085281482e-06, + "loss": 0.434, + "step": 6118 + }, + { + "epoch": 0.37262125871570806, + "grad_norm": 1.0202659804724348, + "learning_rate": 4.954005850579723e-06, + "loss": 0.4851, + "step": 6119 + }, + { + "epoch": 0.37268215449258596, + "grad_norm": 1.041344192943728, + "learning_rate": 4.953990613377873e-06, + "loss": 0.4382, + "step": 6120 + }, + { + "epoch": 0.3727430502694638, + "grad_norm": 1.0027217709813083, + "learning_rate": 4.953975373675949e-06, + "loss": 0.5181, + "step": 6121 + }, + { + "epoch": 0.3728039460463417, + "grad_norm": 1.0310060595052497, + "learning_rate": 4.953960131473966e-06, + "loss": 0.4167, + "step": 6122 + }, + { + "epoch": 0.37286484182321955, + "grad_norm": 1.1895355606761902, + "learning_rate": 4.95394488677194e-06, + "loss": 0.4363, + "step": 6123 + }, + { + "epoch": 0.37292573760009745, + "grad_norm": 0.9094697475379501, + "learning_rate": 4.953929639569885e-06, + "loss": 0.5366, + "step": 6124 + }, + { + "epoch": 0.3729866333769753, + "grad_norm": 1.0389224536890518, + "learning_rate": 4.953914389867818e-06, + "loss": 0.4928, + "step": 6125 + }, + { + "epoch": 0.3730475291538532, + "grad_norm": 1.0267695025354686, + "learning_rate": 4.953899137665753e-06, + "loss": 0.4427, + "step": 6126 + }, + { + "epoch": 0.37310842493073104, + "grad_norm": 1.0473766467288306, + "learning_rate": 4.953883882963708e-06, + "loss": 0.5087, + "step": 6127 + }, + { + "epoch": 0.37316932070760894, + "grad_norm": 1.0243769315825, + "learning_rate": 4.953868625761696e-06, + "loss": 0.556, + "step": 6128 + }, + { + "epoch": 0.3732302164844868, + "grad_norm": 0.9466014485031404, + "learning_rate": 4.953853366059734e-06, + "loss": 0.5144, + "step": 6129 + }, + { + "epoch": 0.3732911122613647, + "grad_norm": 1.0017801704081422, + "learning_rate": 4.9538381038578374e-06, + "loss": 0.4799, + "step": 6130 + }, + { + "epoch": 0.37335200803824253, + "grad_norm": 1.0363506130719404, + "learning_rate": 4.953822839156022e-06, + "loss": 0.4601, + "step": 6131 + }, + { + "epoch": 0.37341290381512043, + "grad_norm": 0.9763303598556224, + "learning_rate": 4.953807571954302e-06, + "loss": 0.4925, + "step": 6132 + }, + { + "epoch": 0.3734737995919983, + "grad_norm": 0.9516255623893157, + "learning_rate": 4.953792302252695e-06, + "loss": 0.5199, + "step": 6133 + }, + { + "epoch": 0.3735346953688762, + "grad_norm": 0.9014335626651085, + "learning_rate": 4.953777030051215e-06, + "loss": 0.5183, + "step": 6134 + }, + { + "epoch": 0.373595591145754, + "grad_norm": 1.0050082648600522, + "learning_rate": 4.953761755349877e-06, + "loss": 0.4341, + "step": 6135 + }, + { + "epoch": 0.3736564869226319, + "grad_norm": 0.9825530707025701, + "learning_rate": 4.953746478148698e-06, + "loss": 0.4474, + "step": 6136 + }, + { + "epoch": 0.37371738269950977, + "grad_norm": 0.9302173059085405, + "learning_rate": 4.953731198447693e-06, + "loss": 0.5643, + "step": 6137 + }, + { + "epoch": 0.37377827847638767, + "grad_norm": 0.9803722319527821, + "learning_rate": 4.953715916246878e-06, + "loss": 0.4923, + "step": 6138 + }, + { + "epoch": 0.3738391742532655, + "grad_norm": 0.9957650385078488, + "learning_rate": 4.9537006315462684e-06, + "loss": 0.4892, + "step": 6139 + }, + { + "epoch": 0.3739000700301434, + "grad_norm": 1.0974340599194536, + "learning_rate": 4.95368534434588e-06, + "loss": 0.4103, + "step": 6140 + }, + { + "epoch": 0.37396096580702126, + "grad_norm": 1.073060455757614, + "learning_rate": 4.953670054645728e-06, + "loss": 0.4801, + "step": 6141 + }, + { + "epoch": 0.37402186158389916, + "grad_norm": 1.0647491116141334, + "learning_rate": 4.953654762445826e-06, + "loss": 0.4984, + "step": 6142 + }, + { + "epoch": 0.374082757360777, + "grad_norm": 1.0290826558041104, + "learning_rate": 4.953639467746193e-06, + "loss": 0.4676, + "step": 6143 + }, + { + "epoch": 0.3741436531376549, + "grad_norm": 1.046587490271967, + "learning_rate": 4.953624170546843e-06, + "loss": 0.4253, + "step": 6144 + }, + { + "epoch": 0.37420454891453275, + "grad_norm": 1.0033067829455395, + "learning_rate": 4.953608870847792e-06, + "loss": 0.4652, + "step": 6145 + }, + { + "epoch": 0.37426544469141065, + "grad_norm": 1.1079004201652993, + "learning_rate": 4.953593568649056e-06, + "loss": 0.435, + "step": 6146 + }, + { + "epoch": 0.3743263404682885, + "grad_norm": 0.9643262574794592, + "learning_rate": 4.953578263950648e-06, + "loss": 0.5506, + "step": 6147 + }, + { + "epoch": 0.3743872362451664, + "grad_norm": 0.9575902076910581, + "learning_rate": 4.953562956752586e-06, + "loss": 0.4709, + "step": 6148 + }, + { + "epoch": 0.37444813202204424, + "grad_norm": 1.0589348702647976, + "learning_rate": 4.953547647054886e-06, + "loss": 0.4923, + "step": 6149 + }, + { + "epoch": 0.37450902779892215, + "grad_norm": 1.068619495962225, + "learning_rate": 4.953532334857562e-06, + "loss": 0.4658, + "step": 6150 + }, + { + "epoch": 0.37456992357580005, + "grad_norm": 1.1161213603072657, + "learning_rate": 4.953517020160631e-06, + "loss": 0.5561, + "step": 6151 + }, + { + "epoch": 0.3746308193526779, + "grad_norm": 1.0509426105898099, + "learning_rate": 4.953501702964108e-06, + "loss": 0.4808, + "step": 6152 + }, + { + "epoch": 0.3746917151295558, + "grad_norm": 0.9832603466438111, + "learning_rate": 4.953486383268007e-06, + "loss": 0.4736, + "step": 6153 + }, + { + "epoch": 0.37475261090643364, + "grad_norm": 1.0271924230370846, + "learning_rate": 4.953471061072346e-06, + "loss": 0.4857, + "step": 6154 + }, + { + "epoch": 0.37481350668331154, + "grad_norm": 1.0317837385936457, + "learning_rate": 4.953455736377139e-06, + "loss": 0.4438, + "step": 6155 + }, + { + "epoch": 0.3748744024601894, + "grad_norm": 1.02294520616393, + "learning_rate": 4.953440409182403e-06, + "loss": 0.4781, + "step": 6156 + }, + { + "epoch": 0.3749352982370673, + "grad_norm": 1.0095291472368366, + "learning_rate": 4.953425079488153e-06, + "loss": 0.4298, + "step": 6157 + }, + { + "epoch": 0.3749961940139451, + "grad_norm": 1.0888590612633355, + "learning_rate": 4.953409747294404e-06, + "loss": 0.4343, + "step": 6158 + }, + { + "epoch": 0.37505708979082303, + "grad_norm": 0.9842790028670121, + "learning_rate": 4.953394412601173e-06, + "loss": 0.5136, + "step": 6159 + }, + { + "epoch": 0.3751179855677009, + "grad_norm": 0.9562141188727699, + "learning_rate": 4.9533790754084735e-06, + "loss": 0.4466, + "step": 6160 + }, + { + "epoch": 0.3751788813445788, + "grad_norm": 0.9984343361121111, + "learning_rate": 4.953363735716323e-06, + "loss": 0.5395, + "step": 6161 + }, + { + "epoch": 0.3752397771214566, + "grad_norm": 1.0240308269306835, + "learning_rate": 4.953348393524737e-06, + "loss": 0.458, + "step": 6162 + }, + { + "epoch": 0.3753006728983345, + "grad_norm": 0.9852447625744957, + "learning_rate": 4.95333304883373e-06, + "loss": 0.4641, + "step": 6163 + }, + { + "epoch": 0.37536156867521236, + "grad_norm": 0.9741788854259957, + "learning_rate": 4.953317701643319e-06, + "loss": 0.5103, + "step": 6164 + }, + { + "epoch": 0.37542246445209027, + "grad_norm": 1.0355692747788448, + "learning_rate": 4.953302351953519e-06, + "loss": 0.5272, + "step": 6165 + }, + { + "epoch": 0.3754833602289681, + "grad_norm": 1.0457964105727904, + "learning_rate": 4.953286999764345e-06, + "loss": 0.4951, + "step": 6166 + }, + { + "epoch": 0.375544256005846, + "grad_norm": 1.0890380484400917, + "learning_rate": 4.953271645075814e-06, + "loss": 0.4346, + "step": 6167 + }, + { + "epoch": 0.37560515178272386, + "grad_norm": 1.072500356743128, + "learning_rate": 4.9532562878879396e-06, + "loss": 0.4383, + "step": 6168 + }, + { + "epoch": 0.37566604755960176, + "grad_norm": 0.9970849884038931, + "learning_rate": 4.953240928200739e-06, + "loss": 0.4644, + "step": 6169 + }, + { + "epoch": 0.3757269433364796, + "grad_norm": 0.9980886541314371, + "learning_rate": 4.9532255660142285e-06, + "loss": 0.4869, + "step": 6170 + }, + { + "epoch": 0.3757878391133575, + "grad_norm": 1.0309199997376262, + "learning_rate": 4.953210201328421e-06, + "loss": 0.4262, + "step": 6171 + }, + { + "epoch": 0.37584873489023535, + "grad_norm": 0.8990704806792376, + "learning_rate": 4.953194834143336e-06, + "loss": 0.4966, + "step": 6172 + }, + { + "epoch": 0.37590963066711325, + "grad_norm": 1.0757119196155462, + "learning_rate": 4.953179464458986e-06, + "loss": 0.4052, + "step": 6173 + }, + { + "epoch": 0.3759705264439911, + "grad_norm": 1.0643230192229978, + "learning_rate": 4.953164092275387e-06, + "loss": 0.444, + "step": 6174 + }, + { + "epoch": 0.376031422220869, + "grad_norm": 0.980050619842328, + "learning_rate": 4.953148717592558e-06, + "loss": 0.4971, + "step": 6175 + }, + { + "epoch": 0.37609231799774684, + "grad_norm": 1.013853357514565, + "learning_rate": 4.95313334041051e-06, + "loss": 0.5215, + "step": 6176 + }, + { + "epoch": 0.37615321377462474, + "grad_norm": 0.9844636395894656, + "learning_rate": 4.9531179607292615e-06, + "loss": 0.4188, + "step": 6177 + }, + { + "epoch": 0.3762141095515026, + "grad_norm": 1.020164356858937, + "learning_rate": 4.953102578548826e-06, + "loss": 0.4529, + "step": 6178 + }, + { + "epoch": 0.3762750053283805, + "grad_norm": 1.0136758977238693, + "learning_rate": 4.953087193869222e-06, + "loss": 0.5333, + "step": 6179 + }, + { + "epoch": 0.37633590110525833, + "grad_norm": 1.045925776525004, + "learning_rate": 4.953071806690464e-06, + "loss": 0.4543, + "step": 6180 + }, + { + "epoch": 0.37639679688213623, + "grad_norm": 1.0945586473812847, + "learning_rate": 4.953056417012566e-06, + "loss": 0.4479, + "step": 6181 + }, + { + "epoch": 0.3764576926590141, + "grad_norm": 0.9811205125559901, + "learning_rate": 4.953041024835547e-06, + "loss": 0.4765, + "step": 6182 + }, + { + "epoch": 0.376518588435892, + "grad_norm": 1.0656309261242096, + "learning_rate": 4.95302563015942e-06, + "loss": 0.4847, + "step": 6183 + }, + { + "epoch": 0.3765794842127698, + "grad_norm": 0.9913339404445668, + "learning_rate": 4.953010232984201e-06, + "loss": 0.4827, + "step": 6184 + }, + { + "epoch": 0.3766403799896477, + "grad_norm": 0.9852353912959414, + "learning_rate": 4.9529948333099065e-06, + "loss": 0.491, + "step": 6185 + }, + { + "epoch": 0.37670127576652557, + "grad_norm": 0.9703577965585669, + "learning_rate": 4.952979431136552e-06, + "loss": 0.4628, + "step": 6186 + }, + { + "epoch": 0.37676217154340347, + "grad_norm": 1.0577392062727597, + "learning_rate": 4.952964026464153e-06, + "loss": 0.4791, + "step": 6187 + }, + { + "epoch": 0.3768230673202813, + "grad_norm": 1.1478201272336088, + "learning_rate": 4.952948619292726e-06, + "loss": 0.4543, + "step": 6188 + }, + { + "epoch": 0.3768839630971592, + "grad_norm": 1.0693702257778634, + "learning_rate": 4.952933209622284e-06, + "loss": 0.4169, + "step": 6189 + }, + { + "epoch": 0.37694485887403706, + "grad_norm": 0.9955803189377422, + "learning_rate": 4.952917797452846e-06, + "loss": 0.4691, + "step": 6190 + }, + { + "epoch": 0.37700575465091496, + "grad_norm": 0.995354876690036, + "learning_rate": 4.952902382784426e-06, + "loss": 0.4667, + "step": 6191 + }, + { + "epoch": 0.37706665042779286, + "grad_norm": 0.9577729107918449, + "learning_rate": 4.952886965617041e-06, + "loss": 0.5257, + "step": 6192 + }, + { + "epoch": 0.3771275462046707, + "grad_norm": 1.0989628830497782, + "learning_rate": 4.952871545950705e-06, + "loss": 0.4851, + "step": 6193 + }, + { + "epoch": 0.3771884419815486, + "grad_norm": 1.0154341430773257, + "learning_rate": 4.952856123785434e-06, + "loss": 0.4332, + "step": 6194 + }, + { + "epoch": 0.37724933775842645, + "grad_norm": 1.1597653654831614, + "learning_rate": 4.952840699121245e-06, + "loss": 0.4601, + "step": 6195 + }, + { + "epoch": 0.37731023353530435, + "grad_norm": 1.0350233295971298, + "learning_rate": 4.952825271958152e-06, + "loss": 0.5036, + "step": 6196 + }, + { + "epoch": 0.3773711293121822, + "grad_norm": 1.1035056635057086, + "learning_rate": 4.952809842296172e-06, + "loss": 0.4948, + "step": 6197 + }, + { + "epoch": 0.3774320250890601, + "grad_norm": 1.0234245508532396, + "learning_rate": 4.952794410135321e-06, + "loss": 0.4698, + "step": 6198 + }, + { + "epoch": 0.37749292086593794, + "grad_norm": 1.0629453313845538, + "learning_rate": 4.952778975475613e-06, + "loss": 0.4815, + "step": 6199 + }, + { + "epoch": 0.37755381664281584, + "grad_norm": 1.0705433424571682, + "learning_rate": 4.9527635383170655e-06, + "loss": 0.4697, + "step": 6200 + }, + { + "epoch": 0.3776147124196937, + "grad_norm": 1.049011210470263, + "learning_rate": 4.952748098659693e-06, + "loss": 0.4231, + "step": 6201 + }, + { + "epoch": 0.3776756081965716, + "grad_norm": 0.972325848312185, + "learning_rate": 4.952732656503512e-06, + "loss": 0.4466, + "step": 6202 + }, + { + "epoch": 0.37773650397344943, + "grad_norm": 1.0795183826554908, + "learning_rate": 4.952717211848538e-06, + "loss": 0.4424, + "step": 6203 + }, + { + "epoch": 0.37779739975032733, + "grad_norm": 1.0008056586509155, + "learning_rate": 4.9527017646947865e-06, + "loss": 0.5545, + "step": 6204 + }, + { + "epoch": 0.3778582955272052, + "grad_norm": 1.0817458904496795, + "learning_rate": 4.952686315042274e-06, + "loss": 0.4345, + "step": 6205 + }, + { + "epoch": 0.3779191913040831, + "grad_norm": 1.050696133224878, + "learning_rate": 4.952670862891015e-06, + "loss": 0.5068, + "step": 6206 + }, + { + "epoch": 0.3779800870809609, + "grad_norm": 1.0929178505966588, + "learning_rate": 4.952655408241026e-06, + "loss": 0.4426, + "step": 6207 + }, + { + "epoch": 0.3780409828578388, + "grad_norm": 1.0153995707253862, + "learning_rate": 4.952639951092323e-06, + "loss": 0.4602, + "step": 6208 + }, + { + "epoch": 0.37810187863471667, + "grad_norm": 0.996852187375733, + "learning_rate": 4.952624491444921e-06, + "loss": 0.5184, + "step": 6209 + }, + { + "epoch": 0.37816277441159457, + "grad_norm": 0.9686057163343564, + "learning_rate": 4.952609029298837e-06, + "loss": 0.4386, + "step": 6210 + }, + { + "epoch": 0.3782236701884724, + "grad_norm": 1.0037116449006789, + "learning_rate": 4.9525935646540845e-06, + "loss": 0.4522, + "step": 6211 + }, + { + "epoch": 0.3782845659653503, + "grad_norm": 0.9243195603940001, + "learning_rate": 4.9525780975106815e-06, + "loss": 0.4753, + "step": 6212 + }, + { + "epoch": 0.37834546174222816, + "grad_norm": 1.1284643716511435, + "learning_rate": 4.952562627868643e-06, + "loss": 0.5019, + "step": 6213 + }, + { + "epoch": 0.37840635751910606, + "grad_norm": 1.073720050997467, + "learning_rate": 4.952547155727985e-06, + "loss": 0.506, + "step": 6214 + }, + { + "epoch": 0.3784672532959839, + "grad_norm": 1.0095802267605534, + "learning_rate": 4.952531681088722e-06, + "loss": 0.4704, + "step": 6215 + }, + { + "epoch": 0.3785281490728618, + "grad_norm": 1.0455801742115363, + "learning_rate": 4.952516203950872e-06, + "loss": 0.4576, + "step": 6216 + }, + { + "epoch": 0.37858904484973965, + "grad_norm": 1.0704037789117022, + "learning_rate": 4.952500724314448e-06, + "loss": 0.5675, + "step": 6217 + }, + { + "epoch": 0.37864994062661755, + "grad_norm": 0.993369759369137, + "learning_rate": 4.9524852421794686e-06, + "loss": 0.5043, + "step": 6218 + }, + { + "epoch": 0.3787108364034954, + "grad_norm": 0.9372756117925566, + "learning_rate": 4.952469757545947e-06, + "loss": 0.4908, + "step": 6219 + }, + { + "epoch": 0.3787717321803733, + "grad_norm": 1.00421733561364, + "learning_rate": 4.952454270413901e-06, + "loss": 0.4742, + "step": 6220 + }, + { + "epoch": 0.37883262795725114, + "grad_norm": 0.9897462864340599, + "learning_rate": 4.952438780783346e-06, + "loss": 0.4487, + "step": 6221 + }, + { + "epoch": 0.37889352373412905, + "grad_norm": 0.9802841160126721, + "learning_rate": 4.9524232886542965e-06, + "loss": 0.5085, + "step": 6222 + }, + { + "epoch": 0.3789544195110069, + "grad_norm": 1.08209453123268, + "learning_rate": 4.952407794026769e-06, + "loss": 0.5276, + "step": 6223 + }, + { + "epoch": 0.3790153152878848, + "grad_norm": 1.1109795394218331, + "learning_rate": 4.95239229690078e-06, + "loss": 0.4576, + "step": 6224 + }, + { + "epoch": 0.37907621106476264, + "grad_norm": 1.0333893349874026, + "learning_rate": 4.952376797276345e-06, + "loss": 0.4184, + "step": 6225 + }, + { + "epoch": 0.37913710684164054, + "grad_norm": 0.9752766358523337, + "learning_rate": 4.952361295153478e-06, + "loss": 0.4997, + "step": 6226 + }, + { + "epoch": 0.3791980026185184, + "grad_norm": 0.9972305524623175, + "learning_rate": 4.952345790532198e-06, + "loss": 0.5858, + "step": 6227 + }, + { + "epoch": 0.3792588983953963, + "grad_norm": 0.9446620691671298, + "learning_rate": 4.9523302834125184e-06, + "loss": 0.4564, + "step": 6228 + }, + { + "epoch": 0.3793197941722741, + "grad_norm": 1.042712257849186, + "learning_rate": 4.952314773794455e-06, + "loss": 0.435, + "step": 6229 + }, + { + "epoch": 0.37938068994915203, + "grad_norm": 1.0900231308235555, + "learning_rate": 4.952299261678026e-06, + "loss": 0.4128, + "step": 6230 + }, + { + "epoch": 0.3794415857260299, + "grad_norm": 0.9164856881570241, + "learning_rate": 4.952283747063244e-06, + "loss": 0.5018, + "step": 6231 + }, + { + "epoch": 0.3795024815029078, + "grad_norm": 1.1361870099637192, + "learning_rate": 4.952268229950127e-06, + "loss": 0.417, + "step": 6232 + }, + { + "epoch": 0.3795633772797857, + "grad_norm": 0.9626721398291819, + "learning_rate": 4.95225271033869e-06, + "loss": 0.5219, + "step": 6233 + }, + { + "epoch": 0.3796242730566635, + "grad_norm": 0.9727525931375427, + "learning_rate": 4.952237188228949e-06, + "loss": 0.4977, + "step": 6234 + }, + { + "epoch": 0.3796851688335414, + "grad_norm": 1.004405098320813, + "learning_rate": 4.952221663620919e-06, + "loss": 0.4724, + "step": 6235 + }, + { + "epoch": 0.37974606461041926, + "grad_norm": 1.1164785614518142, + "learning_rate": 4.9522061365146174e-06, + "loss": 0.4137, + "step": 6236 + }, + { + "epoch": 0.37980696038729717, + "grad_norm": 0.9804789548605373, + "learning_rate": 4.952190606910059e-06, + "loss": 0.4662, + "step": 6237 + }, + { + "epoch": 0.379867856164175, + "grad_norm": 1.13653909086519, + "learning_rate": 4.9521750748072595e-06, + "loss": 0.4249, + "step": 6238 + }, + { + "epoch": 0.3799287519410529, + "grad_norm": 1.0929730281563852, + "learning_rate": 4.952159540206235e-06, + "loss": 0.4921, + "step": 6239 + }, + { + "epoch": 0.37998964771793076, + "grad_norm": 1.0281427120240756, + "learning_rate": 4.952144003107002e-06, + "loss": 0.4553, + "step": 6240 + }, + { + "epoch": 0.38005054349480866, + "grad_norm": 1.0070830411846645, + "learning_rate": 4.952128463509575e-06, + "loss": 0.5227, + "step": 6241 + }, + { + "epoch": 0.3801114392716865, + "grad_norm": 1.0930901440538727, + "learning_rate": 4.952112921413971e-06, + "loss": 0.4669, + "step": 6242 + }, + { + "epoch": 0.3801723350485644, + "grad_norm": 1.02836947665105, + "learning_rate": 4.952097376820205e-06, + "loss": 0.4578, + "step": 6243 + }, + { + "epoch": 0.38023323082544225, + "grad_norm": 1.0892749352566953, + "learning_rate": 4.952081829728293e-06, + "loss": 0.4743, + "step": 6244 + }, + { + "epoch": 0.38029412660232015, + "grad_norm": 1.0153884488125613, + "learning_rate": 4.952066280138251e-06, + "loss": 0.4645, + "step": 6245 + }, + { + "epoch": 0.380355022379198, + "grad_norm": 0.9864106958562094, + "learning_rate": 4.952050728050095e-06, + "loss": 0.4205, + "step": 6246 + }, + { + "epoch": 0.3804159181560759, + "grad_norm": 1.0049757359824008, + "learning_rate": 4.952035173463842e-06, + "loss": 0.4135, + "step": 6247 + }, + { + "epoch": 0.38047681393295374, + "grad_norm": 1.0159484148363829, + "learning_rate": 4.952019616379504e-06, + "loss": 0.4321, + "step": 6248 + }, + { + "epoch": 0.38053770970983164, + "grad_norm": 1.0696473176536077, + "learning_rate": 4.952004056797102e-06, + "loss": 0.4201, + "step": 6249 + }, + { + "epoch": 0.3805986054867095, + "grad_norm": 1.1040639358735755, + "learning_rate": 4.951988494716648e-06, + "loss": 0.4271, + "step": 6250 + }, + { + "epoch": 0.3806595012635874, + "grad_norm": 1.118637791045271, + "learning_rate": 4.951972930138158e-06, + "loss": 0.3868, + "step": 6251 + }, + { + "epoch": 0.38072039704046523, + "grad_norm": 1.0435345437374535, + "learning_rate": 4.95195736306165e-06, + "loss": 0.4849, + "step": 6252 + }, + { + "epoch": 0.38078129281734313, + "grad_norm": 1.0614130094816925, + "learning_rate": 4.951941793487139e-06, + "loss": 0.4447, + "step": 6253 + }, + { + "epoch": 0.380842188594221, + "grad_norm": 1.069316871908195, + "learning_rate": 4.9519262214146405e-06, + "loss": 0.4898, + "step": 6254 + }, + { + "epoch": 0.3809030843710989, + "grad_norm": 1.026078165766074, + "learning_rate": 4.951910646844171e-06, + "loss": 0.4809, + "step": 6255 + }, + { + "epoch": 0.3809639801479767, + "grad_norm": 1.0666980292771364, + "learning_rate": 4.951895069775745e-06, + "loss": 0.4211, + "step": 6256 + }, + { + "epoch": 0.3810248759248546, + "grad_norm": 1.161233580882707, + "learning_rate": 4.9518794902093805e-06, + "loss": 0.5046, + "step": 6257 + }, + { + "epoch": 0.38108577170173247, + "grad_norm": 1.036297906163775, + "learning_rate": 4.9518639081450916e-06, + "loss": 0.4653, + "step": 6258 + }, + { + "epoch": 0.38114666747861037, + "grad_norm": 1.0521314296015953, + "learning_rate": 4.951848323582894e-06, + "loss": 0.4273, + "step": 6259 + }, + { + "epoch": 0.3812075632554882, + "grad_norm": 1.0364005615682215, + "learning_rate": 4.951832736522805e-06, + "loss": 0.4899, + "step": 6260 + }, + { + "epoch": 0.3812684590323661, + "grad_norm": 1.0431333054210508, + "learning_rate": 4.95181714696484e-06, + "loss": 0.5022, + "step": 6261 + }, + { + "epoch": 0.38132935480924396, + "grad_norm": 0.991350302050278, + "learning_rate": 4.951801554909014e-06, + "loss": 0.4713, + "step": 6262 + }, + { + "epoch": 0.38139025058612186, + "grad_norm": 1.0995484496965902, + "learning_rate": 4.9517859603553435e-06, + "loss": 0.4642, + "step": 6263 + }, + { + "epoch": 0.3814511463629997, + "grad_norm": 0.9327002905026383, + "learning_rate": 4.951770363303845e-06, + "loss": 0.4621, + "step": 6264 + }, + { + "epoch": 0.3815120421398776, + "grad_norm": 1.1065400786897703, + "learning_rate": 4.951754763754534e-06, + "loss": 0.4296, + "step": 6265 + }, + { + "epoch": 0.38157293791675545, + "grad_norm": 0.9693962913893135, + "learning_rate": 4.9517391617074254e-06, + "loss": 0.5042, + "step": 6266 + }, + { + "epoch": 0.38163383369363335, + "grad_norm": 0.9612616425016631, + "learning_rate": 4.9517235571625365e-06, + "loss": 0.5007, + "step": 6267 + }, + { + "epoch": 0.3816947294705112, + "grad_norm": 0.9885319349198695, + "learning_rate": 4.951707950119882e-06, + "loss": 0.4903, + "step": 6268 + }, + { + "epoch": 0.3817556252473891, + "grad_norm": 1.0732904246142683, + "learning_rate": 4.951692340579479e-06, + "loss": 0.3523, + "step": 6269 + }, + { + "epoch": 0.38181652102426694, + "grad_norm": 1.0934070778918439, + "learning_rate": 4.951676728541343e-06, + "loss": 0.4431, + "step": 6270 + }, + { + "epoch": 0.38187741680114484, + "grad_norm": 1.0561758063332374, + "learning_rate": 4.951661114005489e-06, + "loss": 0.4692, + "step": 6271 + }, + { + "epoch": 0.3819383125780227, + "grad_norm": 1.0886785864989046, + "learning_rate": 4.951645496971934e-06, + "loss": 0.439, + "step": 6272 + }, + { + "epoch": 0.3819992083549006, + "grad_norm": 1.0224700171846646, + "learning_rate": 4.9516298774406935e-06, + "loss": 0.556, + "step": 6273 + }, + { + "epoch": 0.3820601041317785, + "grad_norm": 1.0112538637726123, + "learning_rate": 4.951614255411784e-06, + "loss": 0.4427, + "step": 6274 + }, + { + "epoch": 0.38212099990865633, + "grad_norm": 1.0183300223259741, + "learning_rate": 4.95159863088522e-06, + "loss": 0.4656, + "step": 6275 + }, + { + "epoch": 0.38218189568553423, + "grad_norm": 0.9815627274042307, + "learning_rate": 4.951583003861019e-06, + "loss": 0.4641, + "step": 6276 + }, + { + "epoch": 0.3822427914624121, + "grad_norm": 1.0536320380956545, + "learning_rate": 4.951567374339196e-06, + "loss": 0.461, + "step": 6277 + }, + { + "epoch": 0.38230368723929, + "grad_norm": 0.9870150930796193, + "learning_rate": 4.951551742319767e-06, + "loss": 0.4815, + "step": 6278 + }, + { + "epoch": 0.3823645830161678, + "grad_norm": 1.0477554406875553, + "learning_rate": 4.951536107802748e-06, + "loss": 0.4596, + "step": 6279 + }, + { + "epoch": 0.3824254787930457, + "grad_norm": 1.0427985948656824, + "learning_rate": 4.951520470788155e-06, + "loss": 0.451, + "step": 6280 + }, + { + "epoch": 0.38248637456992357, + "grad_norm": 0.9687833709778435, + "learning_rate": 4.951504831276004e-06, + "loss": 0.4649, + "step": 6281 + }, + { + "epoch": 0.38254727034680147, + "grad_norm": 1.1079216903732936, + "learning_rate": 4.951489189266311e-06, + "loss": 0.5008, + "step": 6282 + }, + { + "epoch": 0.3826081661236793, + "grad_norm": 1.0733837330347158, + "learning_rate": 4.951473544759091e-06, + "loss": 0.4135, + "step": 6283 + }, + { + "epoch": 0.3826690619005572, + "grad_norm": 1.1374250415957654, + "learning_rate": 4.951457897754362e-06, + "loss": 0.4503, + "step": 6284 + }, + { + "epoch": 0.38272995767743506, + "grad_norm": 1.0541976972930818, + "learning_rate": 4.951442248252138e-06, + "loss": 0.4179, + "step": 6285 + }, + { + "epoch": 0.38279085345431296, + "grad_norm": 0.9703335909200909, + "learning_rate": 4.9514265962524345e-06, + "loss": 0.4916, + "step": 6286 + }, + { + "epoch": 0.3828517492311908, + "grad_norm": 1.1470399838132395, + "learning_rate": 4.95141094175527e-06, + "loss": 0.3831, + "step": 6287 + }, + { + "epoch": 0.3829126450080687, + "grad_norm": 1.0352791791056986, + "learning_rate": 4.951395284760659e-06, + "loss": 0.4925, + "step": 6288 + }, + { + "epoch": 0.38297354078494655, + "grad_norm": 1.0032438516201547, + "learning_rate": 4.951379625268617e-06, + "loss": 0.4574, + "step": 6289 + }, + { + "epoch": 0.38303443656182445, + "grad_norm": 0.9575547639593858, + "learning_rate": 4.9513639632791604e-06, + "loss": 0.5458, + "step": 6290 + }, + { + "epoch": 0.3830953323387023, + "grad_norm": 1.0581680895226544, + "learning_rate": 4.951348298792305e-06, + "loss": 0.4311, + "step": 6291 + }, + { + "epoch": 0.3831562281155802, + "grad_norm": 1.0363448371098116, + "learning_rate": 4.9513326318080676e-06, + "loss": 0.3808, + "step": 6292 + }, + { + "epoch": 0.38321712389245804, + "grad_norm": 0.9293598243125225, + "learning_rate": 4.951316962326463e-06, + "loss": 0.5046, + "step": 6293 + }, + { + "epoch": 0.38327801966933595, + "grad_norm": 0.9725648967626845, + "learning_rate": 4.951301290347508e-06, + "loss": 0.4771, + "step": 6294 + }, + { + "epoch": 0.3833389154462138, + "grad_norm": 0.9759156124826759, + "learning_rate": 4.951285615871218e-06, + "loss": 0.449, + "step": 6295 + }, + { + "epoch": 0.3833998112230917, + "grad_norm": 1.0099389898525486, + "learning_rate": 4.951269938897608e-06, + "loss": 0.44, + "step": 6296 + }, + { + "epoch": 0.38346070699996954, + "grad_norm": 0.8882066173676824, + "learning_rate": 4.951254259426697e-06, + "loss": 0.488, + "step": 6297 + }, + { + "epoch": 0.38352160277684744, + "grad_norm": 0.9538309096341363, + "learning_rate": 4.951238577458498e-06, + "loss": 0.5115, + "step": 6298 + }, + { + "epoch": 0.3835824985537253, + "grad_norm": 0.9577774296060666, + "learning_rate": 4.951222892993028e-06, + "loss": 0.4848, + "step": 6299 + }, + { + "epoch": 0.3836433943306032, + "grad_norm": 1.031842149859397, + "learning_rate": 4.951207206030304e-06, + "loss": 0.4174, + "step": 6300 + }, + { + "epoch": 0.383704290107481, + "grad_norm": 1.0099961951670025, + "learning_rate": 4.951191516570341e-06, + "loss": 0.4345, + "step": 6301 + }, + { + "epoch": 0.38376518588435893, + "grad_norm": 0.9994833194291857, + "learning_rate": 4.951175824613154e-06, + "loss": 0.4523, + "step": 6302 + }, + { + "epoch": 0.3838260816612368, + "grad_norm": 0.975923365231895, + "learning_rate": 4.951160130158761e-06, + "loss": 0.5299, + "step": 6303 + }, + { + "epoch": 0.3838869774381147, + "grad_norm": 1.0167478766946068, + "learning_rate": 4.951144433207177e-06, + "loss": 0.4806, + "step": 6304 + }, + { + "epoch": 0.3839478732149925, + "grad_norm": 0.9591071841807449, + "learning_rate": 4.951128733758417e-06, + "loss": 0.4393, + "step": 6305 + }, + { + "epoch": 0.3840087689918704, + "grad_norm": 0.9020663304136843, + "learning_rate": 4.951113031812499e-06, + "loss": 0.5134, + "step": 6306 + }, + { + "epoch": 0.38406966476874826, + "grad_norm": 1.0174936306932998, + "learning_rate": 4.951097327369438e-06, + "loss": 0.5146, + "step": 6307 + }, + { + "epoch": 0.38413056054562617, + "grad_norm": 1.1060376082039234, + "learning_rate": 4.951081620429249e-06, + "loss": 0.4308, + "step": 6308 + }, + { + "epoch": 0.384191456322504, + "grad_norm": 1.127919647077675, + "learning_rate": 4.95106591099195e-06, + "loss": 0.3983, + "step": 6309 + }, + { + "epoch": 0.3842523520993819, + "grad_norm": 1.0464443322218624, + "learning_rate": 4.951050199057555e-06, + "loss": 0.4463, + "step": 6310 + }, + { + "epoch": 0.38431324787625976, + "grad_norm": 0.9897857095101403, + "learning_rate": 4.9510344846260826e-06, + "loss": 0.4453, + "step": 6311 + }, + { + "epoch": 0.38437414365313766, + "grad_norm": 1.062831658290762, + "learning_rate": 4.9510187676975466e-06, + "loss": 0.5417, + "step": 6312 + }, + { + "epoch": 0.3844350394300155, + "grad_norm": 0.9826009044801425, + "learning_rate": 4.9510030482719625e-06, + "loss": 0.4641, + "step": 6313 + }, + { + "epoch": 0.3844959352068934, + "grad_norm": 1.0826786330717952, + "learning_rate": 4.950987326349348e-06, + "loss": 0.3838, + "step": 6314 + }, + { + "epoch": 0.3845568309837713, + "grad_norm": 1.0299401644359154, + "learning_rate": 4.95097160192972e-06, + "loss": 0.4427, + "step": 6315 + }, + { + "epoch": 0.38461772676064915, + "grad_norm": 1.0499529938159415, + "learning_rate": 4.9509558750130916e-06, + "loss": 0.4765, + "step": 6316 + }, + { + "epoch": 0.38467862253752705, + "grad_norm": 0.9357582353703262, + "learning_rate": 4.950940145599481e-06, + "loss": 0.4818, + "step": 6317 + }, + { + "epoch": 0.3847395183144049, + "grad_norm": 0.9677693747364238, + "learning_rate": 4.950924413688903e-06, + "loss": 0.4947, + "step": 6318 + }, + { + "epoch": 0.3848004140912828, + "grad_norm": 1.0017366576068247, + "learning_rate": 4.950908679281374e-06, + "loss": 0.4579, + "step": 6319 + }, + { + "epoch": 0.38486130986816064, + "grad_norm": 1.0031319154965612, + "learning_rate": 4.9508929423769105e-06, + "loss": 0.4868, + "step": 6320 + }, + { + "epoch": 0.38492220564503854, + "grad_norm": 0.9774797267008417, + "learning_rate": 4.950877202975528e-06, + "loss": 0.4838, + "step": 6321 + }, + { + "epoch": 0.3849831014219164, + "grad_norm": 1.0841681249180901, + "learning_rate": 4.950861461077244e-06, + "loss": 0.4935, + "step": 6322 + }, + { + "epoch": 0.3850439971987943, + "grad_norm": 1.0737697935394341, + "learning_rate": 4.950845716682072e-06, + "loss": 0.4739, + "step": 6323 + }, + { + "epoch": 0.38510489297567213, + "grad_norm": 1.0567879435946645, + "learning_rate": 4.95082996979003e-06, + "loss": 0.4831, + "step": 6324 + }, + { + "epoch": 0.38516578875255003, + "grad_norm": 0.9938692628286614, + "learning_rate": 4.950814220401133e-06, + "loss": 0.4326, + "step": 6325 + }, + { + "epoch": 0.3852266845294279, + "grad_norm": 0.9233785259969335, + "learning_rate": 4.9507984685153976e-06, + "loss": 0.4525, + "step": 6326 + }, + { + "epoch": 0.3852875803063058, + "grad_norm": 0.9988075676640437, + "learning_rate": 4.950782714132839e-06, + "loss": 0.4367, + "step": 6327 + }, + { + "epoch": 0.3853484760831836, + "grad_norm": 1.0425921576268418, + "learning_rate": 4.950766957253475e-06, + "loss": 0.5339, + "step": 6328 + }, + { + "epoch": 0.3854093718600615, + "grad_norm": 1.069788153121016, + "learning_rate": 4.95075119787732e-06, + "loss": 0.4592, + "step": 6329 + }, + { + "epoch": 0.38547026763693937, + "grad_norm": 1.0299339714363, + "learning_rate": 4.950735436004391e-06, + "loss": 0.4523, + "step": 6330 + }, + { + "epoch": 0.38553116341381727, + "grad_norm": 1.0589802274255886, + "learning_rate": 4.950719671634704e-06, + "loss": 0.4117, + "step": 6331 + }, + { + "epoch": 0.3855920591906951, + "grad_norm": 1.0133058708597933, + "learning_rate": 4.950703904768274e-06, + "loss": 0.4576, + "step": 6332 + }, + { + "epoch": 0.385652954967573, + "grad_norm": 0.9746412170036715, + "learning_rate": 4.950688135405117e-06, + "loss": 0.4607, + "step": 6333 + }, + { + "epoch": 0.38571385074445086, + "grad_norm": 1.0293865916261402, + "learning_rate": 4.950672363545252e-06, + "loss": 0.4659, + "step": 6334 + }, + { + "epoch": 0.38577474652132876, + "grad_norm": 0.9617523538421171, + "learning_rate": 4.950656589188692e-06, + "loss": 0.4994, + "step": 6335 + }, + { + "epoch": 0.3858356422982066, + "grad_norm": 1.021319383343782, + "learning_rate": 4.950640812335453e-06, + "loss": 0.5104, + "step": 6336 + }, + { + "epoch": 0.3858965380750845, + "grad_norm": 1.09580280013842, + "learning_rate": 4.950625032985553e-06, + "loss": 0.4486, + "step": 6337 + }, + { + "epoch": 0.38595743385196235, + "grad_norm": 1.0415210951526357, + "learning_rate": 4.950609251139007e-06, + "loss": 0.4334, + "step": 6338 + }, + { + "epoch": 0.38601832962884025, + "grad_norm": 0.9862360800425973, + "learning_rate": 4.950593466795831e-06, + "loss": 0.5197, + "step": 6339 + }, + { + "epoch": 0.3860792254057181, + "grad_norm": 0.9834832456924228, + "learning_rate": 4.950577679956042e-06, + "loss": 0.4651, + "step": 6340 + }, + { + "epoch": 0.386140121182596, + "grad_norm": 0.9984921965084532, + "learning_rate": 4.950561890619655e-06, + "loss": 0.4542, + "step": 6341 + }, + { + "epoch": 0.38620101695947384, + "grad_norm": 1.0178287250763662, + "learning_rate": 4.9505460987866865e-06, + "loss": 0.4449, + "step": 6342 + }, + { + "epoch": 0.38626191273635174, + "grad_norm": 0.9604318383867301, + "learning_rate": 4.950530304457153e-06, + "loss": 0.4541, + "step": 6343 + }, + { + "epoch": 0.3863228085132296, + "grad_norm": 1.0549476877572637, + "learning_rate": 4.95051450763107e-06, + "loss": 0.4043, + "step": 6344 + }, + { + "epoch": 0.3863837042901075, + "grad_norm": 1.0326286140729468, + "learning_rate": 4.950498708308453e-06, + "loss": 0.5147, + "step": 6345 + }, + { + "epoch": 0.38644460006698533, + "grad_norm": 1.1162201919780466, + "learning_rate": 4.95048290648932e-06, + "loss": 0.52, + "step": 6346 + }, + { + "epoch": 0.38650549584386323, + "grad_norm": 1.034397085970403, + "learning_rate": 4.950467102173685e-06, + "loss": 0.4726, + "step": 6347 + }, + { + "epoch": 0.3865663916207411, + "grad_norm": 0.9652204857462271, + "learning_rate": 4.950451295361566e-06, + "loss": 0.4378, + "step": 6348 + }, + { + "epoch": 0.386627287397619, + "grad_norm": 1.0750926888151, + "learning_rate": 4.950435486052977e-06, + "loss": 0.4595, + "step": 6349 + }, + { + "epoch": 0.3866881831744968, + "grad_norm": 1.0465697294244056, + "learning_rate": 4.950419674247936e-06, + "loss": 0.3774, + "step": 6350 + }, + { + "epoch": 0.3867490789513747, + "grad_norm": 1.043732891998748, + "learning_rate": 4.950403859946459e-06, + "loss": 0.4025, + "step": 6351 + }, + { + "epoch": 0.38680997472825257, + "grad_norm": 1.1313624927388177, + "learning_rate": 4.95038804314856e-06, + "loss": 0.397, + "step": 6352 + }, + { + "epoch": 0.38687087050513047, + "grad_norm": 0.9773335860486658, + "learning_rate": 4.950372223854257e-06, + "loss": 0.477, + "step": 6353 + }, + { + "epoch": 0.3869317662820083, + "grad_norm": 1.0442801353347437, + "learning_rate": 4.9503564020635665e-06, + "loss": 0.4611, + "step": 6354 + }, + { + "epoch": 0.3869926620588862, + "grad_norm": 1.0109439780256262, + "learning_rate": 4.950340577776503e-06, + "loss": 0.4819, + "step": 6355 + }, + { + "epoch": 0.3870535578357641, + "grad_norm": 0.9846806036666983, + "learning_rate": 4.950324750993084e-06, + "loss": 0.5232, + "step": 6356 + }, + { + "epoch": 0.38711445361264196, + "grad_norm": 1.0129264953085642, + "learning_rate": 4.950308921713326e-06, + "loss": 0.4405, + "step": 6357 + }, + { + "epoch": 0.38717534938951986, + "grad_norm": 0.9584811355138483, + "learning_rate": 4.9502930899372425e-06, + "loss": 0.4713, + "step": 6358 + }, + { + "epoch": 0.3872362451663977, + "grad_norm": 1.0314283740941406, + "learning_rate": 4.950277255664852e-06, + "loss": 0.4829, + "step": 6359 + }, + { + "epoch": 0.3872971409432756, + "grad_norm": 1.0528286439091543, + "learning_rate": 4.9502614188961705e-06, + "loss": 0.4391, + "step": 6360 + }, + { + "epoch": 0.38735803672015345, + "grad_norm": 0.9153242588790159, + "learning_rate": 4.950245579631213e-06, + "loss": 0.4926, + "step": 6361 + }, + { + "epoch": 0.38741893249703135, + "grad_norm": 0.9491558865287852, + "learning_rate": 4.950229737869996e-06, + "loss": 0.5056, + "step": 6362 + }, + { + "epoch": 0.3874798282739092, + "grad_norm": 0.9722872635357207, + "learning_rate": 4.950213893612537e-06, + "loss": 0.5005, + "step": 6363 + }, + { + "epoch": 0.3875407240507871, + "grad_norm": 1.0194452022904033, + "learning_rate": 4.95019804685885e-06, + "loss": 0.4566, + "step": 6364 + }, + { + "epoch": 0.38760161982766494, + "grad_norm": 1.1100276754679128, + "learning_rate": 4.9501821976089525e-06, + "loss": 0.4949, + "step": 6365 + }, + { + "epoch": 0.38766251560454285, + "grad_norm": 1.0017967062357778, + "learning_rate": 4.9501663458628604e-06, + "loss": 0.59, + "step": 6366 + }, + { + "epoch": 0.3877234113814207, + "grad_norm": 1.0242852998016498, + "learning_rate": 4.95015049162059e-06, + "loss": 0.5047, + "step": 6367 + }, + { + "epoch": 0.3877843071582986, + "grad_norm": 0.9705342051729012, + "learning_rate": 4.950134634882156e-06, + "loss": 0.5191, + "step": 6368 + }, + { + "epoch": 0.38784520293517644, + "grad_norm": 1.0516550382813907, + "learning_rate": 4.950118775647577e-06, + "loss": 0.4498, + "step": 6369 + }, + { + "epoch": 0.38790609871205434, + "grad_norm": 0.9790921764229468, + "learning_rate": 4.9501029139168676e-06, + "loss": 0.4741, + "step": 6370 + }, + { + "epoch": 0.3879669944889322, + "grad_norm": 1.0218587945704436, + "learning_rate": 4.950087049690044e-06, + "loss": 0.4473, + "step": 6371 + }, + { + "epoch": 0.3880278902658101, + "grad_norm": 1.108322294893642, + "learning_rate": 4.950071182967124e-06, + "loss": 0.4343, + "step": 6372 + }, + { + "epoch": 0.3880887860426879, + "grad_norm": 1.0207817249666158, + "learning_rate": 4.950055313748121e-06, + "loss": 0.4173, + "step": 6373 + }, + { + "epoch": 0.38814968181956583, + "grad_norm": 1.1128852110451526, + "learning_rate": 4.950039442033053e-06, + "loss": 0.4209, + "step": 6374 + }, + { + "epoch": 0.3882105775964437, + "grad_norm": 1.0307718920415765, + "learning_rate": 4.950023567821936e-06, + "loss": 0.4645, + "step": 6375 + }, + { + "epoch": 0.3882714733733216, + "grad_norm": 1.092118192468334, + "learning_rate": 4.950007691114785e-06, + "loss": 0.383, + "step": 6376 + }, + { + "epoch": 0.3883323691501994, + "grad_norm": 0.9781980319924812, + "learning_rate": 4.949991811911618e-06, + "loss": 0.4395, + "step": 6377 + }, + { + "epoch": 0.3883932649270773, + "grad_norm": 1.0055344303853269, + "learning_rate": 4.949975930212449e-06, + "loss": 0.4991, + "step": 6378 + }, + { + "epoch": 0.38845416070395516, + "grad_norm": 0.9903163766491825, + "learning_rate": 4.9499600460172966e-06, + "loss": 0.5108, + "step": 6379 + }, + { + "epoch": 0.38851505648083307, + "grad_norm": 1.0658710892624426, + "learning_rate": 4.949944159326177e-06, + "loss": 0.4793, + "step": 6380 + }, + { + "epoch": 0.3885759522577109, + "grad_norm": 0.955866144066806, + "learning_rate": 4.949928270139102e-06, + "loss": 0.4597, + "step": 6381 + }, + { + "epoch": 0.3886368480345888, + "grad_norm": 1.037838939103643, + "learning_rate": 4.949912378456095e-06, + "loss": 0.468, + "step": 6382 + }, + { + "epoch": 0.38869774381146666, + "grad_norm": 1.089090201834181, + "learning_rate": 4.9498964842771655e-06, + "loss": 0.4768, + "step": 6383 + }, + { + "epoch": 0.38875863958834456, + "grad_norm": 1.028548741793015, + "learning_rate": 4.9498805876023326e-06, + "loss": 0.4874, + "step": 6384 + }, + { + "epoch": 0.3888195353652224, + "grad_norm": 1.0300341607630452, + "learning_rate": 4.949864688431613e-06, + "loss": 0.4334, + "step": 6385 + }, + { + "epoch": 0.3888804311421003, + "grad_norm": 1.0343773590551648, + "learning_rate": 4.949848786765022e-06, + "loss": 0.4085, + "step": 6386 + }, + { + "epoch": 0.38894132691897815, + "grad_norm": 1.020244767064479, + "learning_rate": 4.949832882602575e-06, + "loss": 0.4477, + "step": 6387 + }, + { + "epoch": 0.38900222269585605, + "grad_norm": 1.0818046106205355, + "learning_rate": 4.949816975944291e-06, + "loss": 0.4636, + "step": 6388 + }, + { + "epoch": 0.3890631184727339, + "grad_norm": 1.0761303694228894, + "learning_rate": 4.949801066790183e-06, + "loss": 0.4394, + "step": 6389 + }, + { + "epoch": 0.3891240142496118, + "grad_norm": 0.9858558044605464, + "learning_rate": 4.9497851551402696e-06, + "loss": 0.4851, + "step": 6390 + }, + { + "epoch": 0.38918491002648964, + "grad_norm": 1.1734249088344224, + "learning_rate": 4.949769240994566e-06, + "loss": 0.3898, + "step": 6391 + }, + { + "epoch": 0.38924580580336754, + "grad_norm": 1.001976606781902, + "learning_rate": 4.9497533243530875e-06, + "loss": 0.5069, + "step": 6392 + }, + { + "epoch": 0.3893067015802454, + "grad_norm": 0.983230641995478, + "learning_rate": 4.949737405215851e-06, + "loss": 0.4726, + "step": 6393 + }, + { + "epoch": 0.3893675973571233, + "grad_norm": 1.1193679350367232, + "learning_rate": 4.949721483582874e-06, + "loss": 0.4416, + "step": 6394 + }, + { + "epoch": 0.38942849313400113, + "grad_norm": 1.0463244515416639, + "learning_rate": 4.949705559454171e-06, + "loss": 0.513, + "step": 6395 + }, + { + "epoch": 0.38948938891087903, + "grad_norm": 1.036486722908571, + "learning_rate": 4.949689632829759e-06, + "loss": 0.4364, + "step": 6396 + }, + { + "epoch": 0.38955028468775693, + "grad_norm": 0.9283724485925434, + "learning_rate": 4.9496737037096554e-06, + "loss": 0.4897, + "step": 6397 + }, + { + "epoch": 0.3896111804646348, + "grad_norm": 1.0854169975371695, + "learning_rate": 4.949657772093874e-06, + "loss": 0.4051, + "step": 6398 + }, + { + "epoch": 0.3896720762415127, + "grad_norm": 0.9436845055650138, + "learning_rate": 4.949641837982432e-06, + "loss": 0.4693, + "step": 6399 + }, + { + "epoch": 0.3897329720183905, + "grad_norm": 0.9743088924783367, + "learning_rate": 4.949625901375346e-06, + "loss": 0.4314, + "step": 6400 + }, + { + "epoch": 0.3897938677952684, + "grad_norm": 0.9513981680753608, + "learning_rate": 4.949609962272632e-06, + "loss": 0.4555, + "step": 6401 + }, + { + "epoch": 0.38985476357214627, + "grad_norm": 1.0289160525081091, + "learning_rate": 4.949594020674307e-06, + "loss": 0.4491, + "step": 6402 + }, + { + "epoch": 0.38991565934902417, + "grad_norm": 1.0359937213846744, + "learning_rate": 4.949578076580386e-06, + "loss": 0.4131, + "step": 6403 + }, + { + "epoch": 0.389976555125902, + "grad_norm": 1.095500343064474, + "learning_rate": 4.9495621299908856e-06, + "loss": 0.4535, + "step": 6404 + }, + { + "epoch": 0.3900374509027799, + "grad_norm": 0.9804538295758262, + "learning_rate": 4.949546180905823e-06, + "loss": 0.4709, + "step": 6405 + }, + { + "epoch": 0.39009834667965776, + "grad_norm": 1.0587418875725434, + "learning_rate": 4.949530229325213e-06, + "loss": 0.4586, + "step": 6406 + }, + { + "epoch": 0.39015924245653566, + "grad_norm": 0.9981034345597596, + "learning_rate": 4.949514275249073e-06, + "loss": 0.4476, + "step": 6407 + }, + { + "epoch": 0.3902201382334135, + "grad_norm": 1.1667540784634831, + "learning_rate": 4.949498318677418e-06, + "loss": 0.4337, + "step": 6408 + }, + { + "epoch": 0.3902810340102914, + "grad_norm": 1.0526914219028676, + "learning_rate": 4.949482359610266e-06, + "loss": 0.4729, + "step": 6409 + }, + { + "epoch": 0.39034192978716925, + "grad_norm": 1.002888721275547, + "learning_rate": 4.949466398047631e-06, + "loss": 0.4699, + "step": 6410 + }, + { + "epoch": 0.39040282556404715, + "grad_norm": 1.1457258667360766, + "learning_rate": 4.949450433989532e-06, + "loss": 0.425, + "step": 6411 + }, + { + "epoch": 0.390463721340925, + "grad_norm": 0.9057428941329452, + "learning_rate": 4.949434467435983e-06, + "loss": 0.5117, + "step": 6412 + }, + { + "epoch": 0.3905246171178029, + "grad_norm": 1.0717151246082979, + "learning_rate": 4.9494184983870004e-06, + "loss": 0.4373, + "step": 6413 + }, + { + "epoch": 0.39058551289468074, + "grad_norm": 1.0859096426886905, + "learning_rate": 4.949402526842603e-06, + "loss": 0.4339, + "step": 6414 + }, + { + "epoch": 0.39064640867155864, + "grad_norm": 0.9693654325716548, + "learning_rate": 4.949386552802804e-06, + "loss": 0.4326, + "step": 6415 + }, + { + "epoch": 0.3907073044484365, + "grad_norm": 1.039531561933885, + "learning_rate": 4.949370576267621e-06, + "loss": 0.4725, + "step": 6416 + }, + { + "epoch": 0.3907682002253144, + "grad_norm": 1.018161470092905, + "learning_rate": 4.94935459723707e-06, + "loss": 0.4976, + "step": 6417 + }, + { + "epoch": 0.39082909600219223, + "grad_norm": 1.0508811763691752, + "learning_rate": 4.949338615711168e-06, + "loss": 0.4201, + "step": 6418 + }, + { + "epoch": 0.39088999177907013, + "grad_norm": 1.0394987893515955, + "learning_rate": 4.949322631689931e-06, + "loss": 0.4268, + "step": 6419 + }, + { + "epoch": 0.390950887555948, + "grad_norm": 1.1179017800443736, + "learning_rate": 4.949306645173374e-06, + "loss": 0.4208, + "step": 6420 + }, + { + "epoch": 0.3910117833328259, + "grad_norm": 1.0867508673843966, + "learning_rate": 4.949290656161515e-06, + "loss": 0.4548, + "step": 6421 + }, + { + "epoch": 0.3910726791097037, + "grad_norm": 1.0371169986256918, + "learning_rate": 4.94927466465437e-06, + "loss": 0.4749, + "step": 6422 + }, + { + "epoch": 0.3911335748865816, + "grad_norm": 1.0223176413706252, + "learning_rate": 4.949258670651954e-06, + "loss": 0.4958, + "step": 6423 + }, + { + "epoch": 0.39119447066345947, + "grad_norm": 1.0710793600640567, + "learning_rate": 4.949242674154285e-06, + "loss": 0.4748, + "step": 6424 + }, + { + "epoch": 0.39125536644033737, + "grad_norm": 1.0320421846257315, + "learning_rate": 4.949226675161378e-06, + "loss": 0.5239, + "step": 6425 + }, + { + "epoch": 0.3913162622172152, + "grad_norm": 0.9808642727280072, + "learning_rate": 4.949210673673249e-06, + "loss": 0.5171, + "step": 6426 + }, + { + "epoch": 0.3913771579940931, + "grad_norm": 0.9638762169362743, + "learning_rate": 4.949194669689916e-06, + "loss": 0.4602, + "step": 6427 + }, + { + "epoch": 0.39143805377097096, + "grad_norm": 1.10705159905279, + "learning_rate": 4.949178663211395e-06, + "loss": 0.461, + "step": 6428 + }, + { + "epoch": 0.39149894954784886, + "grad_norm": 0.9874469001262415, + "learning_rate": 4.9491626542377006e-06, + "loss": 0.4886, + "step": 6429 + }, + { + "epoch": 0.3915598453247267, + "grad_norm": 1.0263978920176118, + "learning_rate": 4.94914664276885e-06, + "loss": 0.4554, + "step": 6430 + }, + { + "epoch": 0.3916207411016046, + "grad_norm": 1.0402655802576883, + "learning_rate": 4.949130628804861e-06, + "loss": 0.4102, + "step": 6431 + }, + { + "epoch": 0.39168163687848245, + "grad_norm": 1.0351270535429105, + "learning_rate": 4.949114612345748e-06, + "loss": 0.4439, + "step": 6432 + }, + { + "epoch": 0.39174253265536035, + "grad_norm": 1.0198294751986667, + "learning_rate": 4.949098593391528e-06, + "loss": 0.4814, + "step": 6433 + }, + { + "epoch": 0.3918034284322382, + "grad_norm": 1.0967396881767202, + "learning_rate": 4.949082571942218e-06, + "loss": 0.5059, + "step": 6434 + }, + { + "epoch": 0.3918643242091161, + "grad_norm": 0.9887629801354777, + "learning_rate": 4.9490665479978316e-06, + "loss": 0.4539, + "step": 6435 + }, + { + "epoch": 0.39192521998599394, + "grad_norm": 1.0604697240470458, + "learning_rate": 4.949050521558388e-06, + "loss": 0.4268, + "step": 6436 + }, + { + "epoch": 0.39198611576287185, + "grad_norm": 1.0236796330147706, + "learning_rate": 4.949034492623904e-06, + "loss": 0.4788, + "step": 6437 + }, + { + "epoch": 0.39204701153974975, + "grad_norm": 1.017613259220687, + "learning_rate": 4.949018461194393e-06, + "loss": 0.4482, + "step": 6438 + }, + { + "epoch": 0.3921079073166276, + "grad_norm": 1.0358924502190139, + "learning_rate": 4.949002427269873e-06, + "loss": 0.5405, + "step": 6439 + }, + { + "epoch": 0.3921688030935055, + "grad_norm": 1.032433050517823, + "learning_rate": 4.948986390850361e-06, + "loss": 0.4395, + "step": 6440 + }, + { + "epoch": 0.39222969887038334, + "grad_norm": 1.1115396199919994, + "learning_rate": 4.948970351935872e-06, + "loss": 0.4314, + "step": 6441 + }, + { + "epoch": 0.39229059464726124, + "grad_norm": 1.000528830011874, + "learning_rate": 4.948954310526423e-06, + "loss": 0.502, + "step": 6442 + }, + { + "epoch": 0.3923514904241391, + "grad_norm": 1.0782873772755406, + "learning_rate": 4.948938266622031e-06, + "loss": 0.4496, + "step": 6443 + }, + { + "epoch": 0.392412386201017, + "grad_norm": 0.9672023743986548, + "learning_rate": 4.948922220222712e-06, + "loss": 0.4377, + "step": 6444 + }, + { + "epoch": 0.39247328197789483, + "grad_norm": 1.0346246655089741, + "learning_rate": 4.94890617132848e-06, + "loss": 0.4615, + "step": 6445 + }, + { + "epoch": 0.39253417775477273, + "grad_norm": 1.0878804057998182, + "learning_rate": 4.948890119939355e-06, + "loss": 0.409, + "step": 6446 + }, + { + "epoch": 0.3925950735316506, + "grad_norm": 1.0045287329813242, + "learning_rate": 4.94887406605535e-06, + "loss": 0.4279, + "step": 6447 + }, + { + "epoch": 0.3926559693085285, + "grad_norm": 1.0318972399000113, + "learning_rate": 4.948858009676485e-06, + "loss": 0.4857, + "step": 6448 + }, + { + "epoch": 0.3927168650854063, + "grad_norm": 1.0838431529349921, + "learning_rate": 4.948841950802773e-06, + "loss": 0.3708, + "step": 6449 + }, + { + "epoch": 0.3927777608622842, + "grad_norm": 0.9717468363514001, + "learning_rate": 4.9488258894342324e-06, + "loss": 0.4311, + "step": 6450 + }, + { + "epoch": 0.39283865663916206, + "grad_norm": 0.9703269931960113, + "learning_rate": 4.9488098255708785e-06, + "loss": 0.5271, + "step": 6451 + }, + { + "epoch": 0.39289955241603997, + "grad_norm": 1.1114263463520497, + "learning_rate": 4.948793759212728e-06, + "loss": 0.4279, + "step": 6452 + }, + { + "epoch": 0.3929604481929178, + "grad_norm": 1.0678365255039153, + "learning_rate": 4.948777690359797e-06, + "loss": 0.5031, + "step": 6453 + }, + { + "epoch": 0.3930213439697957, + "grad_norm": 0.998497825416097, + "learning_rate": 4.948761619012103e-06, + "loss": 0.5006, + "step": 6454 + }, + { + "epoch": 0.39308223974667356, + "grad_norm": 1.031057566662535, + "learning_rate": 4.948745545169661e-06, + "loss": 0.3981, + "step": 6455 + }, + { + "epoch": 0.39314313552355146, + "grad_norm": 0.9623529938978084, + "learning_rate": 4.948729468832488e-06, + "loss": 0.4629, + "step": 6456 + }, + { + "epoch": 0.3932040313004293, + "grad_norm": 1.0377313859569048, + "learning_rate": 4.948713390000602e-06, + "loss": 0.3808, + "step": 6457 + }, + { + "epoch": 0.3932649270773072, + "grad_norm": 1.0244824541916986, + "learning_rate": 4.948697308674015e-06, + "loss": 0.5736, + "step": 6458 + }, + { + "epoch": 0.39332582285418505, + "grad_norm": 1.0785960886965278, + "learning_rate": 4.948681224852747e-06, + "loss": 0.4555, + "step": 6459 + }, + { + "epoch": 0.39338671863106295, + "grad_norm": 1.0511672935768919, + "learning_rate": 4.948665138536815e-06, + "loss": 0.4254, + "step": 6460 + }, + { + "epoch": 0.3934476144079408, + "grad_norm": 1.0624724139093964, + "learning_rate": 4.9486490497262315e-06, + "loss": 0.4677, + "step": 6461 + }, + { + "epoch": 0.3935085101848187, + "grad_norm": 0.9718296701232991, + "learning_rate": 4.948632958421017e-06, + "loss": 0.4242, + "step": 6462 + }, + { + "epoch": 0.39356940596169654, + "grad_norm": 0.9544938235410488, + "learning_rate": 4.948616864621185e-06, + "loss": 0.4776, + "step": 6463 + }, + { + "epoch": 0.39363030173857444, + "grad_norm": 1.1054803517763, + "learning_rate": 4.9486007683267546e-06, + "loss": 0.4477, + "step": 6464 + }, + { + "epoch": 0.3936911975154523, + "grad_norm": 1.0140846009746516, + "learning_rate": 4.948584669537739e-06, + "loss": 0.4943, + "step": 6465 + }, + { + "epoch": 0.3937520932923302, + "grad_norm": 1.0220391721938995, + "learning_rate": 4.948568568254157e-06, + "loss": 0.4761, + "step": 6466 + }, + { + "epoch": 0.39381298906920803, + "grad_norm": 1.042525990063406, + "learning_rate": 4.948552464476024e-06, + "loss": 0.4675, + "step": 6467 + }, + { + "epoch": 0.39387388484608593, + "grad_norm": 0.9845955242898616, + "learning_rate": 4.9485363582033575e-06, + "loss": 0.527, + "step": 6468 + }, + { + "epoch": 0.3939347806229638, + "grad_norm": 1.0368760717175556, + "learning_rate": 4.948520249436171e-06, + "loss": 0.4646, + "step": 6469 + }, + { + "epoch": 0.3939956763998417, + "grad_norm": 1.001248633990056, + "learning_rate": 4.948504138174486e-06, + "loss": 0.4583, + "step": 6470 + }, + { + "epoch": 0.3940565721767195, + "grad_norm": 1.009651585270744, + "learning_rate": 4.948488024418314e-06, + "loss": 0.4492, + "step": 6471 + }, + { + "epoch": 0.3941174679535974, + "grad_norm": 1.1302571377132657, + "learning_rate": 4.948471908167674e-06, + "loss": 0.4013, + "step": 6472 + }, + { + "epoch": 0.39417836373047527, + "grad_norm": 1.0719566729986971, + "learning_rate": 4.94845578942258e-06, + "loss": 0.5387, + "step": 6473 + }, + { + "epoch": 0.39423925950735317, + "grad_norm": 1.029262966180084, + "learning_rate": 4.948439668183052e-06, + "loss": 0.4762, + "step": 6474 + }, + { + "epoch": 0.394300155284231, + "grad_norm": 1.0974460904626753, + "learning_rate": 4.948423544449104e-06, + "loss": 0.4118, + "step": 6475 + }, + { + "epoch": 0.3943610510611089, + "grad_norm": 1.0277007191096985, + "learning_rate": 4.948407418220753e-06, + "loss": 0.4624, + "step": 6476 + }, + { + "epoch": 0.39442194683798676, + "grad_norm": 1.025700176627893, + "learning_rate": 4.9483912894980155e-06, + "loss": 0.556, + "step": 6477 + }, + { + "epoch": 0.39448284261486466, + "grad_norm": 0.9266151296256855, + "learning_rate": 4.948375158280908e-06, + "loss": 0.5032, + "step": 6478 + }, + { + "epoch": 0.39454373839174256, + "grad_norm": 1.0949692238261866, + "learning_rate": 4.948359024569446e-06, + "loss": 0.358, + "step": 6479 + }, + { + "epoch": 0.3946046341686204, + "grad_norm": 1.010789416719517, + "learning_rate": 4.948342888363648e-06, + "loss": 0.4303, + "step": 6480 + }, + { + "epoch": 0.3946655299454983, + "grad_norm": 1.0073926061873475, + "learning_rate": 4.9483267496635276e-06, + "loss": 0.4613, + "step": 6481 + }, + { + "epoch": 0.39472642572237615, + "grad_norm": 1.0830390168258455, + "learning_rate": 4.948310608469105e-06, + "loss": 0.4169, + "step": 6482 + }, + { + "epoch": 0.39478732149925405, + "grad_norm": 0.9082412609995786, + "learning_rate": 4.948294464780392e-06, + "loss": 0.5287, + "step": 6483 + }, + { + "epoch": 0.3948482172761319, + "grad_norm": 1.0174505966043244, + "learning_rate": 4.948278318597409e-06, + "loss": 0.5159, + "step": 6484 + }, + { + "epoch": 0.3949091130530098, + "grad_norm": 1.1654913854715987, + "learning_rate": 4.948262169920171e-06, + "loss": 0.3722, + "step": 6485 + }, + { + "epoch": 0.39497000882988764, + "grad_norm": 1.057110028290338, + "learning_rate": 4.9482460187486935e-06, + "loss": 0.4169, + "step": 6486 + }, + { + "epoch": 0.39503090460676554, + "grad_norm": 1.0008736666228037, + "learning_rate": 4.948229865082994e-06, + "loss": 0.4602, + "step": 6487 + }, + { + "epoch": 0.3950918003836434, + "grad_norm": 1.0239879164830714, + "learning_rate": 4.948213708923089e-06, + "loss": 0.4862, + "step": 6488 + }, + { + "epoch": 0.3951526961605213, + "grad_norm": 1.0323246004984061, + "learning_rate": 4.948197550268996e-06, + "loss": 0.4918, + "step": 6489 + }, + { + "epoch": 0.39521359193739913, + "grad_norm": 1.0584163038383547, + "learning_rate": 4.948181389120729e-06, + "loss": 0.4552, + "step": 6490 + }, + { + "epoch": 0.39527448771427703, + "grad_norm": 1.022820022150606, + "learning_rate": 4.948165225478305e-06, + "loss": 0.4897, + "step": 6491 + }, + { + "epoch": 0.3953353834911549, + "grad_norm": 1.0847522775517304, + "learning_rate": 4.9481490593417425e-06, + "loss": 0.3986, + "step": 6492 + }, + { + "epoch": 0.3953962792680328, + "grad_norm": 1.0461923331357679, + "learning_rate": 4.948132890711056e-06, + "loss": 0.4575, + "step": 6493 + }, + { + "epoch": 0.3954571750449106, + "grad_norm": 1.0337714357752312, + "learning_rate": 4.948116719586263e-06, + "loss": 0.5078, + "step": 6494 + }, + { + "epoch": 0.3955180708217885, + "grad_norm": 1.0266052119871885, + "learning_rate": 4.948100545967379e-06, + "loss": 0.4351, + "step": 6495 + }, + { + "epoch": 0.39557896659866637, + "grad_norm": 1.019261509570704, + "learning_rate": 4.948084369854422e-06, + "loss": 0.4864, + "step": 6496 + }, + { + "epoch": 0.39563986237554427, + "grad_norm": 0.9803086986371645, + "learning_rate": 4.948068191247407e-06, + "loss": 0.488, + "step": 6497 + }, + { + "epoch": 0.3957007581524221, + "grad_norm": 1.0788566782239037, + "learning_rate": 4.948052010146351e-06, + "loss": 0.4293, + "step": 6498 + }, + { + "epoch": 0.3957616539293, + "grad_norm": 1.0734299986468638, + "learning_rate": 4.94803582655127e-06, + "loss": 0.452, + "step": 6499 + }, + { + "epoch": 0.39582254970617786, + "grad_norm": 0.975914232009204, + "learning_rate": 4.948019640462182e-06, + "loss": 0.4881, + "step": 6500 + }, + { + "epoch": 0.39588344548305576, + "grad_norm": 1.091114477557011, + "learning_rate": 4.948003451879102e-06, + "loss": 0.4268, + "step": 6501 + }, + { + "epoch": 0.3959443412599336, + "grad_norm": 1.088198886869426, + "learning_rate": 4.947987260802047e-06, + "loss": 0.5054, + "step": 6502 + }, + { + "epoch": 0.3960052370368115, + "grad_norm": 0.9995289795792761, + "learning_rate": 4.947971067231033e-06, + "loss": 0.4368, + "step": 6503 + }, + { + "epoch": 0.39606613281368935, + "grad_norm": 0.97425331421946, + "learning_rate": 4.947954871166077e-06, + "loss": 0.5129, + "step": 6504 + }, + { + "epoch": 0.39612702859056725, + "grad_norm": 0.9426490347654777, + "learning_rate": 4.947938672607197e-06, + "loss": 0.4706, + "step": 6505 + }, + { + "epoch": 0.3961879243674451, + "grad_norm": 1.0429396081061701, + "learning_rate": 4.947922471554406e-06, + "loss": 0.521, + "step": 6506 + }, + { + "epoch": 0.396248820144323, + "grad_norm": 0.9947111676712126, + "learning_rate": 4.947906268007724e-06, + "loss": 0.4804, + "step": 6507 + }, + { + "epoch": 0.39630971592120084, + "grad_norm": 1.0823182885660778, + "learning_rate": 4.947890061967165e-06, + "loss": 0.519, + "step": 6508 + }, + { + "epoch": 0.39637061169807875, + "grad_norm": 1.0430001834039864, + "learning_rate": 4.947873853432746e-06, + "loss": 0.4785, + "step": 6509 + }, + { + "epoch": 0.3964315074749566, + "grad_norm": 1.1019163182565934, + "learning_rate": 4.947857642404485e-06, + "loss": 0.4758, + "step": 6510 + }, + { + "epoch": 0.3964924032518345, + "grad_norm": 1.1026050417291915, + "learning_rate": 4.9478414288823985e-06, + "loss": 0.4557, + "step": 6511 + }, + { + "epoch": 0.39655329902871234, + "grad_norm": 0.9975771259242836, + "learning_rate": 4.9478252128665e-06, + "loss": 0.4943, + "step": 6512 + }, + { + "epoch": 0.39661419480559024, + "grad_norm": 1.0954940384131266, + "learning_rate": 4.94780899435681e-06, + "loss": 0.4081, + "step": 6513 + }, + { + "epoch": 0.3966750905824681, + "grad_norm": 1.1051175777140128, + "learning_rate": 4.947792773353342e-06, + "loss": 0.4302, + "step": 6514 + }, + { + "epoch": 0.396735986359346, + "grad_norm": 0.9684373798013078, + "learning_rate": 4.947776549856113e-06, + "loss": 0.475, + "step": 6515 + }, + { + "epoch": 0.3967968821362238, + "grad_norm": 1.0816205751732373, + "learning_rate": 4.9477603238651404e-06, + "loss": 0.4666, + "step": 6516 + }, + { + "epoch": 0.39685777791310173, + "grad_norm": 0.9950805926758415, + "learning_rate": 4.947744095380441e-06, + "loss": 0.4685, + "step": 6517 + }, + { + "epoch": 0.3969186736899796, + "grad_norm": 0.9447651986656109, + "learning_rate": 4.947727864402031e-06, + "loss": 0.565, + "step": 6518 + }, + { + "epoch": 0.3969795694668575, + "grad_norm": 0.945473546250564, + "learning_rate": 4.947711630929926e-06, + "loss": 0.4739, + "step": 6519 + }, + { + "epoch": 0.3970404652437354, + "grad_norm": 1.0322168569104946, + "learning_rate": 4.947695394964145e-06, + "loss": 0.4722, + "step": 6520 + }, + { + "epoch": 0.3971013610206132, + "grad_norm": 1.0327284307238587, + "learning_rate": 4.9476791565047015e-06, + "loss": 0.4081, + "step": 6521 + }, + { + "epoch": 0.3971622567974911, + "grad_norm": 1.0108242143617996, + "learning_rate": 4.947662915551613e-06, + "loss": 0.4372, + "step": 6522 + }, + { + "epoch": 0.39722315257436897, + "grad_norm": 0.9655296518886991, + "learning_rate": 4.947646672104896e-06, + "loss": 0.4924, + "step": 6523 + }, + { + "epoch": 0.39728404835124687, + "grad_norm": 1.0367814808441709, + "learning_rate": 4.947630426164568e-06, + "loss": 0.4642, + "step": 6524 + }, + { + "epoch": 0.3973449441281247, + "grad_norm": 1.0845735298863415, + "learning_rate": 4.947614177730646e-06, + "loss": 0.4651, + "step": 6525 + }, + { + "epoch": 0.3974058399050026, + "grad_norm": 1.0014622626455083, + "learning_rate": 4.947597926803145e-06, + "loss": 0.4873, + "step": 6526 + }, + { + "epoch": 0.39746673568188046, + "grad_norm": 1.031301034757973, + "learning_rate": 4.947581673382081e-06, + "loss": 0.4359, + "step": 6527 + }, + { + "epoch": 0.39752763145875836, + "grad_norm": 0.9543975107479361, + "learning_rate": 4.947565417467473e-06, + "loss": 0.414, + "step": 6528 + }, + { + "epoch": 0.3975885272356362, + "grad_norm": 0.9785787544688984, + "learning_rate": 4.947549159059336e-06, + "loss": 0.4852, + "step": 6529 + }, + { + "epoch": 0.3976494230125141, + "grad_norm": 1.0724041661849852, + "learning_rate": 4.947532898157687e-06, + "loss": 0.4991, + "step": 6530 + }, + { + "epoch": 0.39771031878939195, + "grad_norm": 1.0087176660349486, + "learning_rate": 4.947516634762542e-06, + "loss": 0.4831, + "step": 6531 + }, + { + "epoch": 0.39777121456626985, + "grad_norm": 1.112213087054264, + "learning_rate": 4.947500368873918e-06, + "loss": 0.3793, + "step": 6532 + }, + { + "epoch": 0.3978321103431477, + "grad_norm": 1.0394408909255153, + "learning_rate": 4.947484100491831e-06, + "loss": 0.5601, + "step": 6533 + }, + { + "epoch": 0.3978930061200256, + "grad_norm": 1.0222896474340062, + "learning_rate": 4.947467829616299e-06, + "loss": 0.4509, + "step": 6534 + }, + { + "epoch": 0.39795390189690344, + "grad_norm": 1.0262778525328218, + "learning_rate": 4.947451556247337e-06, + "loss": 0.4982, + "step": 6535 + }, + { + "epoch": 0.39801479767378134, + "grad_norm": 1.1346300440773969, + "learning_rate": 4.947435280384962e-06, + "loss": 0.4761, + "step": 6536 + }, + { + "epoch": 0.3980756934506592, + "grad_norm": 1.0583747872859475, + "learning_rate": 4.947419002029192e-06, + "loss": 0.4967, + "step": 6537 + }, + { + "epoch": 0.3981365892275371, + "grad_norm": 1.0352071227161725, + "learning_rate": 4.947402721180041e-06, + "loss": 0.4668, + "step": 6538 + }, + { + "epoch": 0.39819748500441493, + "grad_norm": 0.9971490688662645, + "learning_rate": 4.947386437837528e-06, + "loss": 0.4535, + "step": 6539 + }, + { + "epoch": 0.39825838078129283, + "grad_norm": 1.0056813649165552, + "learning_rate": 4.947370152001668e-06, + "loss": 0.4367, + "step": 6540 + }, + { + "epoch": 0.3983192765581707, + "grad_norm": 0.9983936379206212, + "learning_rate": 4.947353863672479e-06, + "loss": 0.4827, + "step": 6541 + }, + { + "epoch": 0.3983801723350486, + "grad_norm": 1.0004451122499012, + "learning_rate": 4.947337572849976e-06, + "loss": 0.469, + "step": 6542 + }, + { + "epoch": 0.3984410681119264, + "grad_norm": 0.9961731750853224, + "learning_rate": 4.947321279534177e-06, + "loss": 0.4654, + "step": 6543 + }, + { + "epoch": 0.3985019638888043, + "grad_norm": 1.06412289240787, + "learning_rate": 4.9473049837250975e-06, + "loss": 0.4592, + "step": 6544 + }, + { + "epoch": 0.39856285966568217, + "grad_norm": 1.1695403901109822, + "learning_rate": 4.9472886854227545e-06, + "loss": 0.4161, + "step": 6545 + }, + { + "epoch": 0.39862375544256007, + "grad_norm": 1.0529119675251486, + "learning_rate": 4.947272384627164e-06, + "loss": 0.5033, + "step": 6546 + }, + { + "epoch": 0.3986846512194379, + "grad_norm": 1.0583170759372587, + "learning_rate": 4.947256081338345e-06, + "loss": 0.4516, + "step": 6547 + }, + { + "epoch": 0.3987455469963158, + "grad_norm": 1.012833920961575, + "learning_rate": 4.947239775556311e-06, + "loss": 0.4518, + "step": 6548 + }, + { + "epoch": 0.39880644277319366, + "grad_norm": 1.0027157767339108, + "learning_rate": 4.947223467281081e-06, + "loss": 0.4234, + "step": 6549 + }, + { + "epoch": 0.39886733855007156, + "grad_norm": 0.9966675929842216, + "learning_rate": 4.947207156512669e-06, + "loss": 0.4248, + "step": 6550 + }, + { + "epoch": 0.3989282343269494, + "grad_norm": 1.0755477766885828, + "learning_rate": 4.947190843251095e-06, + "loss": 0.3812, + "step": 6551 + }, + { + "epoch": 0.3989891301038273, + "grad_norm": 0.9059389918218469, + "learning_rate": 4.947174527496373e-06, + "loss": 0.5247, + "step": 6552 + }, + { + "epoch": 0.39905002588070515, + "grad_norm": 1.0218955694648424, + "learning_rate": 4.9471582092485205e-06, + "loss": 0.4089, + "step": 6553 + }, + { + "epoch": 0.39911092165758305, + "grad_norm": 0.9083334547219732, + "learning_rate": 4.947141888507554e-06, + "loss": 0.4877, + "step": 6554 + }, + { + "epoch": 0.3991718174344609, + "grad_norm": 1.0789965061790998, + "learning_rate": 4.947125565273491e-06, + "loss": 0.4174, + "step": 6555 + }, + { + "epoch": 0.3992327132113388, + "grad_norm": 0.9567252750359567, + "learning_rate": 4.947109239546346e-06, + "loss": 0.3761, + "step": 6556 + }, + { + "epoch": 0.39929360898821664, + "grad_norm": 0.9413218537559443, + "learning_rate": 4.947092911326138e-06, + "loss": 0.4639, + "step": 6557 + }, + { + "epoch": 0.39935450476509454, + "grad_norm": 0.9667361585119515, + "learning_rate": 4.947076580612882e-06, + "loss": 0.495, + "step": 6558 + }, + { + "epoch": 0.3994154005419724, + "grad_norm": 1.017271073449504, + "learning_rate": 4.947060247406595e-06, + "loss": 0.4085, + "step": 6559 + }, + { + "epoch": 0.3994762963188503, + "grad_norm": 1.0556644535546538, + "learning_rate": 4.947043911707295e-06, + "loss": 0.4487, + "step": 6560 + }, + { + "epoch": 0.3995371920957282, + "grad_norm": 1.0455232907619971, + "learning_rate": 4.947027573514998e-06, + "loss": 0.4676, + "step": 6561 + }, + { + "epoch": 0.39959808787260603, + "grad_norm": 0.9615664279043101, + "learning_rate": 4.947011232829718e-06, + "loss": 0.4539, + "step": 6562 + }, + { + "epoch": 0.39965898364948393, + "grad_norm": 0.9710643008091684, + "learning_rate": 4.946994889651475e-06, + "loss": 0.5044, + "step": 6563 + }, + { + "epoch": 0.3997198794263618, + "grad_norm": 1.034732298267663, + "learning_rate": 4.946978543980286e-06, + "loss": 0.4517, + "step": 6564 + }, + { + "epoch": 0.3997807752032397, + "grad_norm": 1.0793708730920715, + "learning_rate": 4.946962195816164e-06, + "loss": 0.4123, + "step": 6565 + }, + { + "epoch": 0.3998416709801175, + "grad_norm": 0.9822606611698914, + "learning_rate": 4.946945845159128e-06, + "loss": 0.4616, + "step": 6566 + }, + { + "epoch": 0.3999025667569954, + "grad_norm": 0.9827930553491175, + "learning_rate": 4.946929492009194e-06, + "loss": 0.4642, + "step": 6567 + }, + { + "epoch": 0.39996346253387327, + "grad_norm": 0.9557909714089836, + "learning_rate": 4.946913136366382e-06, + "loss": 0.5097, + "step": 6568 + }, + { + "epoch": 0.40002435831075117, + "grad_norm": 0.9786406107816, + "learning_rate": 4.946896778230703e-06, + "loss": 0.4888, + "step": 6569 + }, + { + "epoch": 0.400085254087629, + "grad_norm": 0.9946275718825446, + "learning_rate": 4.9468804176021765e-06, + "loss": 0.5047, + "step": 6570 + }, + { + "epoch": 0.4001461498645069, + "grad_norm": 1.017472922671421, + "learning_rate": 4.94686405448082e-06, + "loss": 0.4509, + "step": 6571 + }, + { + "epoch": 0.40020704564138476, + "grad_norm": 1.076557742801801, + "learning_rate": 4.946847688866649e-06, + "loss": 0.4215, + "step": 6572 + }, + { + "epoch": 0.40026794141826266, + "grad_norm": 1.031094833928564, + "learning_rate": 4.946831320759681e-06, + "loss": 0.4383, + "step": 6573 + }, + { + "epoch": 0.4003288371951405, + "grad_norm": 1.1200673021004341, + "learning_rate": 4.946814950159932e-06, + "loss": 0.4872, + "step": 6574 + }, + { + "epoch": 0.4003897329720184, + "grad_norm": 1.0947191191573395, + "learning_rate": 4.946798577067418e-06, + "loss": 0.5046, + "step": 6575 + }, + { + "epoch": 0.40045062874889625, + "grad_norm": 0.9869059891992266, + "learning_rate": 4.9467822014821565e-06, + "loss": 0.443, + "step": 6576 + }, + { + "epoch": 0.40051152452577415, + "grad_norm": 0.9977131179648576, + "learning_rate": 4.946765823404165e-06, + "loss": 0.4572, + "step": 6577 + }, + { + "epoch": 0.400572420302652, + "grad_norm": 1.0774977066962765, + "learning_rate": 4.946749442833459e-06, + "loss": 0.4235, + "step": 6578 + }, + { + "epoch": 0.4006333160795299, + "grad_norm": 1.0132843445102016, + "learning_rate": 4.946733059770056e-06, + "loss": 0.4904, + "step": 6579 + }, + { + "epoch": 0.40069421185640774, + "grad_norm": 1.0300050884502108, + "learning_rate": 4.946716674213971e-06, + "loss": 0.4562, + "step": 6580 + }, + { + "epoch": 0.40075510763328565, + "grad_norm": 0.9928883885837788, + "learning_rate": 4.946700286165222e-06, + "loss": 0.4675, + "step": 6581 + }, + { + "epoch": 0.4008160034101635, + "grad_norm": 1.0812247191648359, + "learning_rate": 4.946683895623827e-06, + "loss": 0.4643, + "step": 6582 + }, + { + "epoch": 0.4008768991870414, + "grad_norm": 1.0254633552400654, + "learning_rate": 4.9466675025898005e-06, + "loss": 0.48, + "step": 6583 + }, + { + "epoch": 0.40093779496391924, + "grad_norm": 1.0442349981247927, + "learning_rate": 4.94665110706316e-06, + "loss": 0.4824, + "step": 6584 + }, + { + "epoch": 0.40099869074079714, + "grad_norm": 1.0534345517378687, + "learning_rate": 4.946634709043923e-06, + "loss": 0.4728, + "step": 6585 + }, + { + "epoch": 0.401059586517675, + "grad_norm": 1.0179866935000745, + "learning_rate": 4.946618308532104e-06, + "loss": 0.4838, + "step": 6586 + }, + { + "epoch": 0.4011204822945529, + "grad_norm": 1.0219996799198927, + "learning_rate": 4.946601905527722e-06, + "loss": 0.4232, + "step": 6587 + }, + { + "epoch": 0.4011813780714307, + "grad_norm": 1.0911595192766976, + "learning_rate": 4.946585500030793e-06, + "loss": 0.45, + "step": 6588 + }, + { + "epoch": 0.40124227384830863, + "grad_norm": 1.0568195371895879, + "learning_rate": 4.946569092041333e-06, + "loss": 0.4834, + "step": 6589 + }, + { + "epoch": 0.4013031696251865, + "grad_norm": 1.0779564100385264, + "learning_rate": 4.946552681559359e-06, + "loss": 0.4421, + "step": 6590 + }, + { + "epoch": 0.4013640654020644, + "grad_norm": 0.9076087489357724, + "learning_rate": 4.946536268584889e-06, + "loss": 0.547, + "step": 6591 + }, + { + "epoch": 0.4014249611789422, + "grad_norm": 1.0013712185686476, + "learning_rate": 4.946519853117938e-06, + "loss": 0.4742, + "step": 6592 + }, + { + "epoch": 0.4014858569558201, + "grad_norm": 1.0445008374273772, + "learning_rate": 4.946503435158524e-06, + "loss": 0.4672, + "step": 6593 + }, + { + "epoch": 0.40154675273269796, + "grad_norm": 1.0652395859907484, + "learning_rate": 4.946487014706662e-06, + "loss": 0.4163, + "step": 6594 + }, + { + "epoch": 0.40160764850957587, + "grad_norm": 1.057977246071148, + "learning_rate": 4.946470591762371e-06, + "loss": 0.467, + "step": 6595 + }, + { + "epoch": 0.4016685442864537, + "grad_norm": 1.038058716012395, + "learning_rate": 4.946454166325666e-06, + "loss": 0.408, + "step": 6596 + }, + { + "epoch": 0.4017294400633316, + "grad_norm": 1.0495072795066371, + "learning_rate": 4.946437738396566e-06, + "loss": 0.4936, + "step": 6597 + }, + { + "epoch": 0.40179033584020946, + "grad_norm": 1.1129283343785081, + "learning_rate": 4.946421307975084e-06, + "loss": 0.4123, + "step": 6598 + }, + { + "epoch": 0.40185123161708736, + "grad_norm": 1.0199263742751334, + "learning_rate": 4.946404875061239e-06, + "loss": 0.4216, + "step": 6599 + }, + { + "epoch": 0.4019121273939652, + "grad_norm": 1.0416929879566055, + "learning_rate": 4.9463884396550486e-06, + "loss": 0.4883, + "step": 6600 + }, + { + "epoch": 0.4019730231708431, + "grad_norm": 1.0611144923758355, + "learning_rate": 4.9463720017565285e-06, + "loss": 0.4913, + "step": 6601 + }, + { + "epoch": 0.402033918947721, + "grad_norm": 1.117211279988071, + "learning_rate": 4.946355561365695e-06, + "loss": 0.4386, + "step": 6602 + }, + { + "epoch": 0.40209481472459885, + "grad_norm": 1.0684428075005177, + "learning_rate": 4.946339118482565e-06, + "loss": 0.4591, + "step": 6603 + }, + { + "epoch": 0.40215571050147675, + "grad_norm": 1.0089313704796288, + "learning_rate": 4.946322673107156e-06, + "loss": 0.5375, + "step": 6604 + }, + { + "epoch": 0.4022166062783546, + "grad_norm": 0.9772857440522033, + "learning_rate": 4.946306225239485e-06, + "loss": 0.4488, + "step": 6605 + }, + { + "epoch": 0.4022775020552325, + "grad_norm": 1.064018442010904, + "learning_rate": 4.9462897748795666e-06, + "loss": 0.4478, + "step": 6606 + }, + { + "epoch": 0.40233839783211034, + "grad_norm": 1.1279416930762094, + "learning_rate": 4.946273322027421e-06, + "loss": 0.4664, + "step": 6607 + }, + { + "epoch": 0.40239929360898824, + "grad_norm": 0.9446393817596883, + "learning_rate": 4.946256866683061e-06, + "loss": 0.4936, + "step": 6608 + }, + { + "epoch": 0.4024601893858661, + "grad_norm": 0.9511819359215096, + "learning_rate": 4.946240408846506e-06, + "loss": 0.5084, + "step": 6609 + }, + { + "epoch": 0.402521085162744, + "grad_norm": 0.9780463089957568, + "learning_rate": 4.946223948517773e-06, + "loss": 0.4419, + "step": 6610 + }, + { + "epoch": 0.40258198093962183, + "grad_norm": 1.0351771317542822, + "learning_rate": 4.946207485696877e-06, + "loss": 0.4497, + "step": 6611 + }, + { + "epoch": 0.40264287671649973, + "grad_norm": 0.9895463320503008, + "learning_rate": 4.946191020383836e-06, + "loss": 0.4747, + "step": 6612 + }, + { + "epoch": 0.4027037724933776, + "grad_norm": 0.901757878967526, + "learning_rate": 4.946174552578666e-06, + "loss": 0.5089, + "step": 6613 + }, + { + "epoch": 0.4027646682702555, + "grad_norm": 0.9348675005204007, + "learning_rate": 4.946158082281385e-06, + "loss": 0.4814, + "step": 6614 + }, + { + "epoch": 0.4028255640471333, + "grad_norm": 1.1255618337858617, + "learning_rate": 4.946141609492008e-06, + "loss": 0.4494, + "step": 6615 + }, + { + "epoch": 0.4028864598240112, + "grad_norm": 0.949678088250914, + "learning_rate": 4.946125134210553e-06, + "loss": 0.4807, + "step": 6616 + }, + { + "epoch": 0.40294735560088907, + "grad_norm": 0.9730130298022076, + "learning_rate": 4.946108656437038e-06, + "loss": 0.4346, + "step": 6617 + }, + { + "epoch": 0.40300825137776697, + "grad_norm": 0.975727488943458, + "learning_rate": 4.946092176171476e-06, + "loss": 0.446, + "step": 6618 + }, + { + "epoch": 0.4030691471546448, + "grad_norm": 1.0943043002863264, + "learning_rate": 4.946075693413888e-06, + "loss": 0.4146, + "step": 6619 + }, + { + "epoch": 0.4031300429315227, + "grad_norm": 1.0347636305902441, + "learning_rate": 4.946059208164288e-06, + "loss": 0.4164, + "step": 6620 + }, + { + "epoch": 0.40319093870840056, + "grad_norm": 1.1078003332288346, + "learning_rate": 4.9460427204226946e-06, + "loss": 0.3673, + "step": 6621 + }, + { + "epoch": 0.40325183448527846, + "grad_norm": 1.0474121368054397, + "learning_rate": 4.946026230189123e-06, + "loss": 0.4206, + "step": 6622 + }, + { + "epoch": 0.4033127302621563, + "grad_norm": 1.1336112493673625, + "learning_rate": 4.946009737463591e-06, + "loss": 0.4085, + "step": 6623 + }, + { + "epoch": 0.4033736260390342, + "grad_norm": 1.1136706699490937, + "learning_rate": 4.945993242246115e-06, + "loss": 0.3558, + "step": 6624 + }, + { + "epoch": 0.40343452181591205, + "grad_norm": 1.040069305863878, + "learning_rate": 4.945976744536712e-06, + "loss": 0.4446, + "step": 6625 + }, + { + "epoch": 0.40349541759278995, + "grad_norm": 0.958122681359473, + "learning_rate": 4.945960244335399e-06, + "loss": 0.4332, + "step": 6626 + }, + { + "epoch": 0.4035563133696678, + "grad_norm": 1.0219220070206931, + "learning_rate": 4.945943741642192e-06, + "loss": 0.4443, + "step": 6627 + }, + { + "epoch": 0.4036172091465457, + "grad_norm": 1.045838610606303, + "learning_rate": 4.945927236457109e-06, + "loss": 0.4227, + "step": 6628 + }, + { + "epoch": 0.40367810492342354, + "grad_norm": 1.0869177408032331, + "learning_rate": 4.945910728780166e-06, + "loss": 0.4242, + "step": 6629 + }, + { + "epoch": 0.40373900070030144, + "grad_norm": 0.9392177538505984, + "learning_rate": 4.945894218611379e-06, + "loss": 0.4752, + "step": 6630 + }, + { + "epoch": 0.4037998964771793, + "grad_norm": 0.9931698642359261, + "learning_rate": 4.945877705950768e-06, + "loss": 0.4577, + "step": 6631 + }, + { + "epoch": 0.4038607922540572, + "grad_norm": 1.016312175392132, + "learning_rate": 4.945861190798346e-06, + "loss": 0.46, + "step": 6632 + }, + { + "epoch": 0.40392168803093503, + "grad_norm": 0.9874197612195236, + "learning_rate": 4.945844673154132e-06, + "loss": 0.4628, + "step": 6633 + }, + { + "epoch": 0.40398258380781293, + "grad_norm": 1.0498423490693753, + "learning_rate": 4.945828153018142e-06, + "loss": 0.4437, + "step": 6634 + }, + { + "epoch": 0.4040434795846908, + "grad_norm": 1.0167677565796143, + "learning_rate": 4.945811630390393e-06, + "loss": 0.4927, + "step": 6635 + }, + { + "epoch": 0.4041043753615687, + "grad_norm": 1.0483591050859034, + "learning_rate": 4.945795105270902e-06, + "loss": 0.4683, + "step": 6636 + }, + { + "epoch": 0.4041652711384465, + "grad_norm": 0.9775478297340271, + "learning_rate": 4.945778577659685e-06, + "loss": 0.4235, + "step": 6637 + }, + { + "epoch": 0.4042261669153244, + "grad_norm": 0.9430243492567629, + "learning_rate": 4.945762047556762e-06, + "loss": 0.457, + "step": 6638 + }, + { + "epoch": 0.40428706269220227, + "grad_norm": 1.010269492211598, + "learning_rate": 4.945745514962146e-06, + "loss": 0.454, + "step": 6639 + }, + { + "epoch": 0.40434795846908017, + "grad_norm": 0.9789429300109349, + "learning_rate": 4.945728979875855e-06, + "loss": 0.454, + "step": 6640 + }, + { + "epoch": 0.404408854245958, + "grad_norm": 1.0583166536783295, + "learning_rate": 4.9457124422979065e-06, + "loss": 0.4697, + "step": 6641 + }, + { + "epoch": 0.4044697500228359, + "grad_norm": 1.0851364361235794, + "learning_rate": 4.9456959022283166e-06, + "loss": 0.4221, + "step": 6642 + }, + { + "epoch": 0.4045306457997138, + "grad_norm": 0.9893949395400436, + "learning_rate": 4.945679359667104e-06, + "loss": 0.4824, + "step": 6643 + }, + { + "epoch": 0.40459154157659166, + "grad_norm": 1.0627961645400779, + "learning_rate": 4.945662814614283e-06, + "loss": 0.5258, + "step": 6644 + }, + { + "epoch": 0.40465243735346956, + "grad_norm": 1.0985016720425447, + "learning_rate": 4.945646267069872e-06, + "loss": 0.4977, + "step": 6645 + }, + { + "epoch": 0.4047133331303474, + "grad_norm": 1.0211739669414928, + "learning_rate": 4.945629717033887e-06, + "loss": 0.5105, + "step": 6646 + }, + { + "epoch": 0.4047742289072253, + "grad_norm": 1.0160885515459321, + "learning_rate": 4.945613164506346e-06, + "loss": 0.5045, + "step": 6647 + }, + { + "epoch": 0.40483512468410315, + "grad_norm": 1.035994695189384, + "learning_rate": 4.945596609487264e-06, + "loss": 0.4594, + "step": 6648 + }, + { + "epoch": 0.40489602046098105, + "grad_norm": 1.006656094810818, + "learning_rate": 4.94558005197666e-06, + "loss": 0.4802, + "step": 6649 + }, + { + "epoch": 0.4049569162378589, + "grad_norm": 0.981285954460504, + "learning_rate": 4.945563491974549e-06, + "loss": 0.5065, + "step": 6650 + }, + { + "epoch": 0.4050178120147368, + "grad_norm": 1.0379616041949076, + "learning_rate": 4.945546929480949e-06, + "loss": 0.4966, + "step": 6651 + }, + { + "epoch": 0.40507870779161465, + "grad_norm": 1.106957255623764, + "learning_rate": 4.945530364495878e-06, + "loss": 0.4899, + "step": 6652 + }, + { + "epoch": 0.40513960356849255, + "grad_norm": 1.0787633871045479, + "learning_rate": 4.94551379701935e-06, + "loss": 0.5257, + "step": 6653 + }, + { + "epoch": 0.4052004993453704, + "grad_norm": 0.950382297989184, + "learning_rate": 4.945497227051383e-06, + "loss": 0.5004, + "step": 6654 + }, + { + "epoch": 0.4052613951222483, + "grad_norm": 1.0506229565878198, + "learning_rate": 4.945480654591995e-06, + "loss": 0.4222, + "step": 6655 + }, + { + "epoch": 0.40532229089912614, + "grad_norm": 1.0075961222832215, + "learning_rate": 4.945464079641202e-06, + "loss": 0.4278, + "step": 6656 + }, + { + "epoch": 0.40538318667600404, + "grad_norm": 1.0817210996558646, + "learning_rate": 4.945447502199022e-06, + "loss": 0.5138, + "step": 6657 + }, + { + "epoch": 0.4054440824528819, + "grad_norm": 1.0209670782847522, + "learning_rate": 4.94543092226547e-06, + "loss": 0.4512, + "step": 6658 + }, + { + "epoch": 0.4055049782297598, + "grad_norm": 1.0768190041842782, + "learning_rate": 4.945414339840564e-06, + "loss": 0.5006, + "step": 6659 + }, + { + "epoch": 0.40556587400663763, + "grad_norm": 0.9657491906021435, + "learning_rate": 4.945397754924321e-06, + "loss": 0.4847, + "step": 6660 + }, + { + "epoch": 0.40562676978351553, + "grad_norm": 1.0149772317483845, + "learning_rate": 4.945381167516757e-06, + "loss": 0.4168, + "step": 6661 + }, + { + "epoch": 0.4056876655603934, + "grad_norm": 1.0810540713682428, + "learning_rate": 4.94536457761789e-06, + "loss": 0.4643, + "step": 6662 + }, + { + "epoch": 0.4057485613372713, + "grad_norm": 1.0867155193472318, + "learning_rate": 4.9453479852277365e-06, + "loss": 0.4654, + "step": 6663 + }, + { + "epoch": 0.4058094571141491, + "grad_norm": 1.1262435647496907, + "learning_rate": 4.945331390346313e-06, + "loss": 0.4161, + "step": 6664 + }, + { + "epoch": 0.405870352891027, + "grad_norm": 1.0477130097695773, + "learning_rate": 4.945314792973637e-06, + "loss": 0.4756, + "step": 6665 + }, + { + "epoch": 0.40593124866790486, + "grad_norm": 1.109142833103786, + "learning_rate": 4.945298193109724e-06, + "loss": 0.4108, + "step": 6666 + }, + { + "epoch": 0.40599214444478277, + "grad_norm": 1.005426170608154, + "learning_rate": 4.945281590754594e-06, + "loss": 0.4809, + "step": 6667 + }, + { + "epoch": 0.4060530402216606, + "grad_norm": 1.0471041202980755, + "learning_rate": 4.94526498590826e-06, + "loss": 0.463, + "step": 6668 + }, + { + "epoch": 0.4061139359985385, + "grad_norm": 0.994416248324322, + "learning_rate": 4.945248378570742e-06, + "loss": 0.5132, + "step": 6669 + }, + { + "epoch": 0.40617483177541636, + "grad_norm": 0.9873409029533523, + "learning_rate": 4.945231768742056e-06, + "loss": 0.4913, + "step": 6670 + }, + { + "epoch": 0.40623572755229426, + "grad_norm": 0.9981060158355257, + "learning_rate": 4.945215156422218e-06, + "loss": 0.5431, + "step": 6671 + }, + { + "epoch": 0.4062966233291721, + "grad_norm": 1.0421756792110932, + "learning_rate": 4.9451985416112454e-06, + "loss": 0.4517, + "step": 6672 + }, + { + "epoch": 0.40635751910605, + "grad_norm": 1.0193830552198533, + "learning_rate": 4.945181924309157e-06, + "loss": 0.5006, + "step": 6673 + }, + { + "epoch": 0.40641841488292785, + "grad_norm": 1.0844356886310536, + "learning_rate": 4.945165304515967e-06, + "loss": 0.4078, + "step": 6674 + }, + { + "epoch": 0.40647931065980575, + "grad_norm": 1.0345165481787382, + "learning_rate": 4.945148682231693e-06, + "loss": 0.4778, + "step": 6675 + }, + { + "epoch": 0.4065402064366836, + "grad_norm": 1.004329960288904, + "learning_rate": 4.9451320574563526e-06, + "loss": 0.4706, + "step": 6676 + }, + { + "epoch": 0.4066011022135615, + "grad_norm": 0.9520204509209097, + "learning_rate": 4.945115430189963e-06, + "loss": 0.4113, + "step": 6677 + }, + { + "epoch": 0.40666199799043934, + "grad_norm": 0.94849453574712, + "learning_rate": 4.945098800432539e-06, + "loss": 0.5866, + "step": 6678 + }, + { + "epoch": 0.40672289376731724, + "grad_norm": 1.0860889128365272, + "learning_rate": 4.945082168184101e-06, + "loss": 0.4677, + "step": 6679 + }, + { + "epoch": 0.4067837895441951, + "grad_norm": 0.9869910608487658, + "learning_rate": 4.945065533444664e-06, + "loss": 0.5197, + "step": 6680 + }, + { + "epoch": 0.406844685321073, + "grad_norm": 1.0089254919242165, + "learning_rate": 4.945048896214245e-06, + "loss": 0.4742, + "step": 6681 + }, + { + "epoch": 0.40690558109795083, + "grad_norm": 1.0084734913950713, + "learning_rate": 4.94503225649286e-06, + "loss": 0.426, + "step": 6682 + }, + { + "epoch": 0.40696647687482873, + "grad_norm": 1.0329446804307516, + "learning_rate": 4.945015614280528e-06, + "loss": 0.4366, + "step": 6683 + }, + { + "epoch": 0.40702737265170663, + "grad_norm": 1.0046084576683327, + "learning_rate": 4.944998969577264e-06, + "loss": 0.4574, + "step": 6684 + }, + { + "epoch": 0.4070882684285845, + "grad_norm": 1.0281620903084059, + "learning_rate": 4.944982322383087e-06, + "loss": 0.4489, + "step": 6685 + }, + { + "epoch": 0.4071491642054624, + "grad_norm": 0.8914379802002593, + "learning_rate": 4.944965672698012e-06, + "loss": 0.4759, + "step": 6686 + }, + { + "epoch": 0.4072100599823402, + "grad_norm": 1.0370100421389123, + "learning_rate": 4.944949020522057e-06, + "loss": 0.4885, + "step": 6687 + }, + { + "epoch": 0.4072709557592181, + "grad_norm": 0.983616919601663, + "learning_rate": 4.944932365855239e-06, + "loss": 0.4582, + "step": 6688 + }, + { + "epoch": 0.40733185153609597, + "grad_norm": 1.0435684997000871, + "learning_rate": 4.9449157086975745e-06, + "loss": 0.4575, + "step": 6689 + }, + { + "epoch": 0.40739274731297387, + "grad_norm": 1.0865145845359823, + "learning_rate": 4.9448990490490814e-06, + "loss": 0.4582, + "step": 6690 + }, + { + "epoch": 0.4074536430898517, + "grad_norm": 1.0742096092797522, + "learning_rate": 4.944882386909775e-06, + "loss": 0.4259, + "step": 6691 + }, + { + "epoch": 0.4075145388667296, + "grad_norm": 0.930384760634701, + "learning_rate": 4.944865722279674e-06, + "loss": 0.4871, + "step": 6692 + }, + { + "epoch": 0.40757543464360746, + "grad_norm": 1.001946029292173, + "learning_rate": 4.9448490551587935e-06, + "loss": 0.4468, + "step": 6693 + }, + { + "epoch": 0.40763633042048536, + "grad_norm": 0.9554432733694705, + "learning_rate": 4.9448323855471525e-06, + "loss": 0.525, + "step": 6694 + }, + { + "epoch": 0.4076972261973632, + "grad_norm": 1.0815075819730753, + "learning_rate": 4.944815713444767e-06, + "loss": 0.5249, + "step": 6695 + }, + { + "epoch": 0.4077581219742411, + "grad_norm": 1.0293090460657082, + "learning_rate": 4.944799038851654e-06, + "loss": 0.397, + "step": 6696 + }, + { + "epoch": 0.40781901775111895, + "grad_norm": 0.9303032125507049, + "learning_rate": 4.944782361767831e-06, + "loss": 0.4801, + "step": 6697 + }, + { + "epoch": 0.40787991352799685, + "grad_norm": 1.0268888776749006, + "learning_rate": 4.944765682193314e-06, + "loss": 0.4628, + "step": 6698 + }, + { + "epoch": 0.4079408093048747, + "grad_norm": 1.0188565943642405, + "learning_rate": 4.944749000128121e-06, + "loss": 0.4924, + "step": 6699 + }, + { + "epoch": 0.4080017050817526, + "grad_norm": 1.020215858203693, + "learning_rate": 4.944732315572268e-06, + "loss": 0.4415, + "step": 6700 + }, + { + "epoch": 0.40806260085863044, + "grad_norm": 0.9840132129306629, + "learning_rate": 4.944715628525773e-06, + "loss": 0.4504, + "step": 6701 + }, + { + "epoch": 0.40812349663550834, + "grad_norm": 1.0068266591093407, + "learning_rate": 4.944698938988652e-06, + "loss": 0.4229, + "step": 6702 + }, + { + "epoch": 0.4081843924123862, + "grad_norm": 0.952035795783395, + "learning_rate": 4.944682246960924e-06, + "loss": 0.5029, + "step": 6703 + }, + { + "epoch": 0.4082452881892641, + "grad_norm": 0.9858671268977195, + "learning_rate": 4.9446655524426025e-06, + "loss": 0.4646, + "step": 6704 + }, + { + "epoch": 0.40830618396614193, + "grad_norm": 0.923908915817451, + "learning_rate": 4.944648855433708e-06, + "loss": 0.4739, + "step": 6705 + }, + { + "epoch": 0.40836707974301983, + "grad_norm": 1.1185319364167279, + "learning_rate": 4.944632155934255e-06, + "loss": 0.475, + "step": 6706 + }, + { + "epoch": 0.4084279755198977, + "grad_norm": 1.036283228280885, + "learning_rate": 4.944615453944262e-06, + "loss": 0.4348, + "step": 6707 + }, + { + "epoch": 0.4084888712967756, + "grad_norm": 1.1145052772211916, + "learning_rate": 4.944598749463747e-06, + "loss": 0.4593, + "step": 6708 + }, + { + "epoch": 0.4085497670736534, + "grad_norm": 0.9485359611084758, + "learning_rate": 4.944582042492724e-06, + "loss": 0.4693, + "step": 6709 + }, + { + "epoch": 0.4086106628505313, + "grad_norm": 1.0668005966206968, + "learning_rate": 4.944565333031212e-06, + "loss": 0.4394, + "step": 6710 + }, + { + "epoch": 0.40867155862740917, + "grad_norm": 1.0222293631590265, + "learning_rate": 4.944548621079228e-06, + "loss": 0.4047, + "step": 6711 + }, + { + "epoch": 0.40873245440428707, + "grad_norm": 0.9643308499331188, + "learning_rate": 4.944531906636788e-06, + "loss": 0.5091, + "step": 6712 + }, + { + "epoch": 0.4087933501811649, + "grad_norm": 1.060432369802787, + "learning_rate": 4.94451518970391e-06, + "loss": 0.537, + "step": 6713 + }, + { + "epoch": 0.4088542459580428, + "grad_norm": 0.928959985487889, + "learning_rate": 4.944498470280611e-06, + "loss": 0.5001, + "step": 6714 + }, + { + "epoch": 0.40891514173492066, + "grad_norm": 1.0875724566304659, + "learning_rate": 4.944481748366907e-06, + "loss": 0.4202, + "step": 6715 + }, + { + "epoch": 0.40897603751179856, + "grad_norm": 1.0836018331340507, + "learning_rate": 4.944465023962817e-06, + "loss": 0.4505, + "step": 6716 + }, + { + "epoch": 0.4090369332886764, + "grad_norm": 0.8545310963817723, + "learning_rate": 4.944448297068356e-06, + "loss": 0.4559, + "step": 6717 + }, + { + "epoch": 0.4090978290655543, + "grad_norm": 1.060080489617337, + "learning_rate": 4.944431567683542e-06, + "loss": 0.4538, + "step": 6718 + }, + { + "epoch": 0.40915872484243215, + "grad_norm": 1.0103559856620417, + "learning_rate": 4.944414835808392e-06, + "loss": 0.4304, + "step": 6719 + }, + { + "epoch": 0.40921962061931005, + "grad_norm": 1.002989135687286, + "learning_rate": 4.944398101442924e-06, + "loss": 0.4599, + "step": 6720 + }, + { + "epoch": 0.4092805163961879, + "grad_norm": 1.0818826984455117, + "learning_rate": 4.944381364587153e-06, + "loss": 0.5167, + "step": 6721 + }, + { + "epoch": 0.4093414121730658, + "grad_norm": 1.0876267794527799, + "learning_rate": 4.944364625241097e-06, + "loss": 0.4367, + "step": 6722 + }, + { + "epoch": 0.40940230794994364, + "grad_norm": 1.0098676137137774, + "learning_rate": 4.944347883404774e-06, + "loss": 0.4264, + "step": 6723 + }, + { + "epoch": 0.40946320372682155, + "grad_norm": 1.0568717965761136, + "learning_rate": 4.944331139078199e-06, + "loss": 0.4143, + "step": 6724 + }, + { + "epoch": 0.40952409950369945, + "grad_norm": 1.0904362054010797, + "learning_rate": 4.944314392261391e-06, + "loss": 0.4755, + "step": 6725 + }, + { + "epoch": 0.4095849952805773, + "grad_norm": 0.9696689941460934, + "learning_rate": 4.944297642954366e-06, + "loss": 0.4925, + "step": 6726 + }, + { + "epoch": 0.4096458910574552, + "grad_norm": 1.0666180087146333, + "learning_rate": 4.944280891157141e-06, + "loss": 0.4656, + "step": 6727 + }, + { + "epoch": 0.40970678683433304, + "grad_norm": 1.0098686920821747, + "learning_rate": 4.944264136869735e-06, + "loss": 0.5508, + "step": 6728 + }, + { + "epoch": 0.40976768261121094, + "grad_norm": 1.0529423991921798, + "learning_rate": 4.944247380092162e-06, + "loss": 0.4087, + "step": 6729 + }, + { + "epoch": 0.4098285783880888, + "grad_norm": 0.999879308634886, + "learning_rate": 4.9442306208244414e-06, + "loss": 0.4793, + "step": 6730 + }, + { + "epoch": 0.4098894741649667, + "grad_norm": 0.9924622734971119, + "learning_rate": 4.944213859066589e-06, + "loss": 0.4379, + "step": 6731 + }, + { + "epoch": 0.40995036994184453, + "grad_norm": 1.0217386197670701, + "learning_rate": 4.9441970948186225e-06, + "loss": 0.4708, + "step": 6732 + }, + { + "epoch": 0.41001126571872243, + "grad_norm": 1.029719685727215, + "learning_rate": 4.944180328080559e-06, + "loss": 0.4419, + "step": 6733 + }, + { + "epoch": 0.4100721614956003, + "grad_norm": 1.0478672141657515, + "learning_rate": 4.944163558852415e-06, + "loss": 0.4492, + "step": 6734 + }, + { + "epoch": 0.4101330572724782, + "grad_norm": 1.1029547167888019, + "learning_rate": 4.944146787134208e-06, + "loss": 0.43, + "step": 6735 + }, + { + "epoch": 0.410193953049356, + "grad_norm": 1.0169160993904964, + "learning_rate": 4.944130012925956e-06, + "loss": 0.4127, + "step": 6736 + }, + { + "epoch": 0.4102548488262339, + "grad_norm": 0.9794905116700386, + "learning_rate": 4.944113236227675e-06, + "loss": 0.4116, + "step": 6737 + }, + { + "epoch": 0.41031574460311176, + "grad_norm": 1.070837862594222, + "learning_rate": 4.944096457039381e-06, + "loss": 0.4608, + "step": 6738 + }, + { + "epoch": 0.41037664037998967, + "grad_norm": 1.0050328706507772, + "learning_rate": 4.944079675361093e-06, + "loss": 0.468, + "step": 6739 + }, + { + "epoch": 0.4104375361568675, + "grad_norm": 0.981759634667728, + "learning_rate": 4.944062891192827e-06, + "loss": 0.4507, + "step": 6740 + }, + { + "epoch": 0.4104984319337454, + "grad_norm": 1.0380821496662191, + "learning_rate": 4.944046104534601e-06, + "loss": 0.525, + "step": 6741 + }, + { + "epoch": 0.41055932771062326, + "grad_norm": 1.0823745979913144, + "learning_rate": 4.944029315386432e-06, + "loss": 0.4262, + "step": 6742 + }, + { + "epoch": 0.41062022348750116, + "grad_norm": 1.0273293293480372, + "learning_rate": 4.944012523748336e-06, + "loss": 0.3945, + "step": 6743 + }, + { + "epoch": 0.410681119264379, + "grad_norm": 1.0720322012909538, + "learning_rate": 4.943995729620331e-06, + "loss": 0.4662, + "step": 6744 + }, + { + "epoch": 0.4107420150412569, + "grad_norm": 1.002043929288563, + "learning_rate": 4.9439789330024345e-06, + "loss": 0.4669, + "step": 6745 + }, + { + "epoch": 0.41080291081813475, + "grad_norm": 1.0974571996476026, + "learning_rate": 4.943962133894663e-06, + "loss": 0.5056, + "step": 6746 + }, + { + "epoch": 0.41086380659501265, + "grad_norm": 0.9691172780268781, + "learning_rate": 4.943945332297033e-06, + "loss": 0.4896, + "step": 6747 + }, + { + "epoch": 0.4109247023718905, + "grad_norm": 1.0721424785622715, + "learning_rate": 4.943928528209563e-06, + "loss": 0.4711, + "step": 6748 + }, + { + "epoch": 0.4109855981487684, + "grad_norm": 0.9851322487791622, + "learning_rate": 4.943911721632269e-06, + "loss": 0.4567, + "step": 6749 + }, + { + "epoch": 0.41104649392564624, + "grad_norm": 0.9483531647279703, + "learning_rate": 4.94389491256517e-06, + "loss": 0.4561, + "step": 6750 + }, + { + "epoch": 0.41110738970252414, + "grad_norm": 1.0018603564018513, + "learning_rate": 4.943878101008279e-06, + "loss": 0.4796, + "step": 6751 + }, + { + "epoch": 0.411168285479402, + "grad_norm": 0.8903500295075826, + "learning_rate": 4.943861286961618e-06, + "loss": 0.49, + "step": 6752 + }, + { + "epoch": 0.4112291812562799, + "grad_norm": 1.0532833745481989, + "learning_rate": 4.943844470425201e-06, + "loss": 0.4194, + "step": 6753 + }, + { + "epoch": 0.41129007703315773, + "grad_norm": 0.9819534586629612, + "learning_rate": 4.943827651399046e-06, + "loss": 0.4155, + "step": 6754 + }, + { + "epoch": 0.41135097281003563, + "grad_norm": 0.9947032617392229, + "learning_rate": 4.94381082988317e-06, + "loss": 0.402, + "step": 6755 + }, + { + "epoch": 0.4114118685869135, + "grad_norm": 1.0198809970215537, + "learning_rate": 4.9437940058775915e-06, + "loss": 0.4541, + "step": 6756 + }, + { + "epoch": 0.4114727643637914, + "grad_norm": 0.9197697888585517, + "learning_rate": 4.943777179382326e-06, + "loss": 0.441, + "step": 6757 + }, + { + "epoch": 0.4115336601406692, + "grad_norm": 1.0174090870619086, + "learning_rate": 4.94376035039739e-06, + "loss": 0.408, + "step": 6758 + }, + { + "epoch": 0.4115945559175471, + "grad_norm": 0.9534368364997908, + "learning_rate": 4.943743518922803e-06, + "loss": 0.5241, + "step": 6759 + }, + { + "epoch": 0.41165545169442497, + "grad_norm": 0.9868723331644971, + "learning_rate": 4.94372668495858e-06, + "loss": 0.4207, + "step": 6760 + }, + { + "epoch": 0.41171634747130287, + "grad_norm": 0.9477300980189315, + "learning_rate": 4.94370984850474e-06, + "loss": 0.4881, + "step": 6761 + }, + { + "epoch": 0.4117772432481807, + "grad_norm": 0.998744230796042, + "learning_rate": 4.943693009561299e-06, + "loss": 0.4873, + "step": 6762 + }, + { + "epoch": 0.4118381390250586, + "grad_norm": 0.9980137023069672, + "learning_rate": 4.9436761681282735e-06, + "loss": 0.422, + "step": 6763 + }, + { + "epoch": 0.41189903480193646, + "grad_norm": 1.0557998313884067, + "learning_rate": 4.9436593242056825e-06, + "loss": 0.4402, + "step": 6764 + }, + { + "epoch": 0.41195993057881436, + "grad_norm": 0.9786107629308654, + "learning_rate": 4.943642477793541e-06, + "loss": 0.4734, + "step": 6765 + }, + { + "epoch": 0.41202082635569226, + "grad_norm": 1.0635073938989958, + "learning_rate": 4.943625628891869e-06, + "loss": 0.4209, + "step": 6766 + }, + { + "epoch": 0.4120817221325701, + "grad_norm": 1.0447689119438484, + "learning_rate": 4.9436087775006816e-06, + "loss": 0.4064, + "step": 6767 + }, + { + "epoch": 0.412142617909448, + "grad_norm": 0.9885624959591489, + "learning_rate": 4.943591923619996e-06, + "loss": 0.4632, + "step": 6768 + }, + { + "epoch": 0.41220351368632585, + "grad_norm": 1.0321444833467972, + "learning_rate": 4.94357506724983e-06, + "loss": 0.448, + "step": 6769 + }, + { + "epoch": 0.41226440946320375, + "grad_norm": 1.0567984266604329, + "learning_rate": 4.9435582083902e-06, + "loss": 0.4499, + "step": 6770 + }, + { + "epoch": 0.4123253052400816, + "grad_norm": 1.0333362181084311, + "learning_rate": 4.943541347041123e-06, + "loss": 0.414, + "step": 6771 + }, + { + "epoch": 0.4123862010169595, + "grad_norm": 1.0434836377333123, + "learning_rate": 4.943524483202619e-06, + "loss": 0.4301, + "step": 6772 + }, + { + "epoch": 0.41244709679383734, + "grad_norm": 0.9670221315085933, + "learning_rate": 4.943507616874701e-06, + "loss": 0.47, + "step": 6773 + }, + { + "epoch": 0.41250799257071524, + "grad_norm": 1.0166455582346872, + "learning_rate": 4.943490748057389e-06, + "loss": 0.4613, + "step": 6774 + }, + { + "epoch": 0.4125688883475931, + "grad_norm": 0.9180969882589104, + "learning_rate": 4.9434738767507e-06, + "loss": 0.4868, + "step": 6775 + }, + { + "epoch": 0.412629784124471, + "grad_norm": 0.9478919982040631, + "learning_rate": 4.94345700295465e-06, + "loss": 0.4407, + "step": 6776 + }, + { + "epoch": 0.41269067990134883, + "grad_norm": 1.0460595085776514, + "learning_rate": 4.943440126669257e-06, + "loss": 0.4544, + "step": 6777 + }, + { + "epoch": 0.41275157567822673, + "grad_norm": 0.9847697783376154, + "learning_rate": 4.943423247894538e-06, + "loss": 0.4867, + "step": 6778 + }, + { + "epoch": 0.4128124714551046, + "grad_norm": 1.062858694834803, + "learning_rate": 4.9434063666305106e-06, + "loss": 0.461, + "step": 6779 + }, + { + "epoch": 0.4128733672319825, + "grad_norm": 0.9821259254032865, + "learning_rate": 4.943389482877191e-06, + "loss": 0.4395, + "step": 6780 + }, + { + "epoch": 0.4129342630088603, + "grad_norm": 1.1248902069158755, + "learning_rate": 4.943372596634597e-06, + "loss": 0.4419, + "step": 6781 + }, + { + "epoch": 0.4129951587857382, + "grad_norm": 0.9042316403213582, + "learning_rate": 4.943355707902746e-06, + "loss": 0.4689, + "step": 6782 + }, + { + "epoch": 0.41305605456261607, + "grad_norm": 0.9654124460514951, + "learning_rate": 4.943338816681656e-06, + "loss": 0.517, + "step": 6783 + }, + { + "epoch": 0.41311695033949397, + "grad_norm": 1.1803449653262577, + "learning_rate": 4.943321922971341e-06, + "loss": 0.4234, + "step": 6784 + }, + { + "epoch": 0.4131778461163718, + "grad_norm": 1.1612485629910922, + "learning_rate": 4.9433050267718215e-06, + "loss": 0.4124, + "step": 6785 + }, + { + "epoch": 0.4132387418932497, + "grad_norm": 1.049631664908007, + "learning_rate": 4.943288128083114e-06, + "loss": 0.4193, + "step": 6786 + }, + { + "epoch": 0.41329963767012756, + "grad_norm": 0.9914567144909291, + "learning_rate": 4.943271226905235e-06, + "loss": 0.4787, + "step": 6787 + }, + { + "epoch": 0.41336053344700546, + "grad_norm": 1.0608861050554201, + "learning_rate": 4.943254323238203e-06, + "loss": 0.4741, + "step": 6788 + }, + { + "epoch": 0.4134214292238833, + "grad_norm": 1.060794531458104, + "learning_rate": 4.943237417082033e-06, + "loss": 0.4717, + "step": 6789 + }, + { + "epoch": 0.4134823250007612, + "grad_norm": 0.973501865922472, + "learning_rate": 4.9432205084367435e-06, + "loss": 0.4749, + "step": 6790 + }, + { + "epoch": 0.41354322077763905, + "grad_norm": 1.0646456301480747, + "learning_rate": 4.943203597302353e-06, + "loss": 0.4778, + "step": 6791 + }, + { + "epoch": 0.41360411655451695, + "grad_norm": 0.9572461253627881, + "learning_rate": 4.943186683678877e-06, + "loss": 0.4704, + "step": 6792 + }, + { + "epoch": 0.4136650123313948, + "grad_norm": 1.1471744644673219, + "learning_rate": 4.943169767566332e-06, + "loss": 0.464, + "step": 6793 + }, + { + "epoch": 0.4137259081082727, + "grad_norm": 1.0336118734251631, + "learning_rate": 4.943152848964737e-06, + "loss": 0.4629, + "step": 6794 + }, + { + "epoch": 0.41378680388515054, + "grad_norm": 1.039273175867859, + "learning_rate": 4.94313592787411e-06, + "loss": 0.4621, + "step": 6795 + }, + { + "epoch": 0.41384769966202845, + "grad_norm": 1.008522401387392, + "learning_rate": 4.943119004294465e-06, + "loss": 0.4359, + "step": 6796 + }, + { + "epoch": 0.4139085954389063, + "grad_norm": 0.979222050372445, + "learning_rate": 4.943102078225822e-06, + "loss": 0.4275, + "step": 6797 + }, + { + "epoch": 0.4139694912157842, + "grad_norm": 1.00341617713044, + "learning_rate": 4.943085149668198e-06, + "loss": 0.5462, + "step": 6798 + }, + { + "epoch": 0.41403038699266204, + "grad_norm": 0.9552965251448519, + "learning_rate": 4.9430682186216085e-06, + "loss": 0.4704, + "step": 6799 + }, + { + "epoch": 0.41409128276953994, + "grad_norm": 1.0681545147264424, + "learning_rate": 4.943051285086073e-06, + "loss": 0.3974, + "step": 6800 + }, + { + "epoch": 0.4141521785464178, + "grad_norm": 0.9940082916902812, + "learning_rate": 4.943034349061607e-06, + "loss": 0.451, + "step": 6801 + }, + { + "epoch": 0.4142130743232957, + "grad_norm": 1.0132671724440077, + "learning_rate": 4.943017410548229e-06, + "loss": 0.4883, + "step": 6802 + }, + { + "epoch": 0.4142739701001735, + "grad_norm": 1.0009226612245565, + "learning_rate": 4.943000469545955e-06, + "loss": 0.4889, + "step": 6803 + }, + { + "epoch": 0.41433486587705143, + "grad_norm": 1.0748639109544533, + "learning_rate": 4.9429835260548036e-06, + "loss": 0.4712, + "step": 6804 + }, + { + "epoch": 0.4143957616539293, + "grad_norm": 0.9690104264775442, + "learning_rate": 4.942966580074791e-06, + "loss": 0.4734, + "step": 6805 + }, + { + "epoch": 0.4144566574308072, + "grad_norm": 1.1059848947799513, + "learning_rate": 4.942949631605934e-06, + "loss": 0.4701, + "step": 6806 + }, + { + "epoch": 0.4145175532076851, + "grad_norm": 0.952757488334652, + "learning_rate": 4.942932680648252e-06, + "loss": 0.4841, + "step": 6807 + }, + { + "epoch": 0.4145784489845629, + "grad_norm": 1.0099677128591031, + "learning_rate": 4.9429157272017596e-06, + "loss": 0.5375, + "step": 6808 + }, + { + "epoch": 0.4146393447614408, + "grad_norm": 0.9829177374626441, + "learning_rate": 4.942898771266477e-06, + "loss": 0.4869, + "step": 6809 + }, + { + "epoch": 0.41470024053831867, + "grad_norm": 1.057961497046725, + "learning_rate": 4.9428818128424185e-06, + "loss": 0.4214, + "step": 6810 + }, + { + "epoch": 0.41476113631519657, + "grad_norm": 0.9912656397236677, + "learning_rate": 4.942864851929604e-06, + "loss": 0.46, + "step": 6811 + }, + { + "epoch": 0.4148220320920744, + "grad_norm": 1.028369446250695, + "learning_rate": 4.942847888528048e-06, + "loss": 0.4767, + "step": 6812 + }, + { + "epoch": 0.4148829278689523, + "grad_norm": 0.9862082543533841, + "learning_rate": 4.942830922637771e-06, + "loss": 0.4169, + "step": 6813 + }, + { + "epoch": 0.41494382364583016, + "grad_norm": 1.1074348089556743, + "learning_rate": 4.942813954258788e-06, + "loss": 0.3985, + "step": 6814 + }, + { + "epoch": 0.41500471942270806, + "grad_norm": 1.0161963103919882, + "learning_rate": 4.942796983391117e-06, + "loss": 0.4254, + "step": 6815 + }, + { + "epoch": 0.4150656151995859, + "grad_norm": 1.0244669601342575, + "learning_rate": 4.942780010034775e-06, + "loss": 0.4026, + "step": 6816 + }, + { + "epoch": 0.4151265109764638, + "grad_norm": 1.0917856802311112, + "learning_rate": 4.94276303418978e-06, + "loss": 0.4105, + "step": 6817 + }, + { + "epoch": 0.41518740675334165, + "grad_norm": 1.052161868649712, + "learning_rate": 4.942746055856149e-06, + "loss": 0.4321, + "step": 6818 + }, + { + "epoch": 0.41524830253021955, + "grad_norm": 0.9107145635290448, + "learning_rate": 4.942729075033899e-06, + "loss": 0.4737, + "step": 6819 + }, + { + "epoch": 0.4153091983070974, + "grad_norm": 1.039918216975511, + "learning_rate": 4.942712091723047e-06, + "loss": 0.4918, + "step": 6820 + }, + { + "epoch": 0.4153700940839753, + "grad_norm": 1.0525197649763014, + "learning_rate": 4.94269510592361e-06, + "loss": 0.5322, + "step": 6821 + }, + { + "epoch": 0.41543098986085314, + "grad_norm": 0.9659388209789216, + "learning_rate": 4.942678117635606e-06, + "loss": 0.4363, + "step": 6822 + }, + { + "epoch": 0.41549188563773104, + "grad_norm": 1.0603329134559794, + "learning_rate": 4.942661126859054e-06, + "loss": 0.4179, + "step": 6823 + }, + { + "epoch": 0.4155527814146089, + "grad_norm": 1.0984792136856611, + "learning_rate": 4.942644133593968e-06, + "loss": 0.4103, + "step": 6824 + }, + { + "epoch": 0.4156136771914868, + "grad_norm": 1.0759904219541907, + "learning_rate": 4.942627137840368e-06, + "loss": 0.4679, + "step": 6825 + }, + { + "epoch": 0.41567457296836463, + "grad_norm": 1.0273048519290848, + "learning_rate": 4.94261013959827e-06, + "loss": 0.4636, + "step": 6826 + }, + { + "epoch": 0.41573546874524253, + "grad_norm": 1.162891098539938, + "learning_rate": 4.9425931388676914e-06, + "loss": 0.3908, + "step": 6827 + }, + { + "epoch": 0.4157963645221204, + "grad_norm": 1.0041962584079447, + "learning_rate": 4.9425761356486505e-06, + "loss": 0.4085, + "step": 6828 + }, + { + "epoch": 0.4158572602989983, + "grad_norm": 0.9740106675948086, + "learning_rate": 4.942559129941162e-06, + "loss": 0.51, + "step": 6829 + }, + { + "epoch": 0.4159181560758761, + "grad_norm": 1.0861354330503674, + "learning_rate": 4.942542121745246e-06, + "loss": 0.426, + "step": 6830 + }, + { + "epoch": 0.415979051852754, + "grad_norm": 0.9454357756083693, + "learning_rate": 4.942525111060919e-06, + "loss": 0.5018, + "step": 6831 + }, + { + "epoch": 0.41603994762963187, + "grad_norm": 0.9722476886051543, + "learning_rate": 4.9425080978881985e-06, + "loss": 0.4901, + "step": 6832 + }, + { + "epoch": 0.41610084340650977, + "grad_norm": 0.8960286819501887, + "learning_rate": 4.942491082227101e-06, + "loss": 0.4933, + "step": 6833 + }, + { + "epoch": 0.4161617391833876, + "grad_norm": 1.0008648144708634, + "learning_rate": 4.942474064077645e-06, + "loss": 0.4902, + "step": 6834 + }, + { + "epoch": 0.4162226349602655, + "grad_norm": 0.9464946889453728, + "learning_rate": 4.942457043439847e-06, + "loss": 0.4705, + "step": 6835 + }, + { + "epoch": 0.41628353073714336, + "grad_norm": 0.9656297672802567, + "learning_rate": 4.9424400203137234e-06, + "loss": 0.5228, + "step": 6836 + }, + { + "epoch": 0.41634442651402126, + "grad_norm": 0.9919261750370889, + "learning_rate": 4.942422994699294e-06, + "loss": 0.4433, + "step": 6837 + }, + { + "epoch": 0.4164053222908991, + "grad_norm": 0.9873219164221029, + "learning_rate": 4.942405966596575e-06, + "loss": 0.476, + "step": 6838 + }, + { + "epoch": 0.416466218067777, + "grad_norm": 0.987957985416052, + "learning_rate": 4.942388936005583e-06, + "loss": 0.4979, + "step": 6839 + }, + { + "epoch": 0.41652711384465485, + "grad_norm": 1.0404752038064775, + "learning_rate": 4.942371902926336e-06, + "loss": 0.5145, + "step": 6840 + }, + { + "epoch": 0.41658800962153275, + "grad_norm": 1.0538313397133223, + "learning_rate": 4.942354867358851e-06, + "loss": 0.517, + "step": 6841 + }, + { + "epoch": 0.4166489053984106, + "grad_norm": 1.0383410983474008, + "learning_rate": 4.9423378293031455e-06, + "loss": 0.3913, + "step": 6842 + }, + { + "epoch": 0.4167098011752885, + "grad_norm": 1.123362470464405, + "learning_rate": 4.942320788759238e-06, + "loss": 0.4412, + "step": 6843 + }, + { + "epoch": 0.41677069695216634, + "grad_norm": 1.0249139330998889, + "learning_rate": 4.9423037457271435e-06, + "loss": 0.4932, + "step": 6844 + }, + { + "epoch": 0.41683159272904424, + "grad_norm": 1.0656893583718048, + "learning_rate": 4.942286700206882e-06, + "loss": 0.4442, + "step": 6845 + }, + { + "epoch": 0.4168924885059221, + "grad_norm": 1.1644674614728867, + "learning_rate": 4.942269652198469e-06, + "loss": 0.4233, + "step": 6846 + }, + { + "epoch": 0.4169533842828, + "grad_norm": 0.9538522630605323, + "learning_rate": 4.942252601701922e-06, + "loss": 0.5007, + "step": 6847 + }, + { + "epoch": 0.4170142800596779, + "grad_norm": 1.0962449387433535, + "learning_rate": 4.9422355487172595e-06, + "loss": 0.4241, + "step": 6848 + }, + { + "epoch": 0.41707517583655573, + "grad_norm": 0.9817802894654833, + "learning_rate": 4.9422184932444985e-06, + "loss": 0.456, + "step": 6849 + }, + { + "epoch": 0.41713607161343363, + "grad_norm": 0.9000607449586668, + "learning_rate": 4.942201435283655e-06, + "loss": 0.5276, + "step": 6850 + }, + { + "epoch": 0.4171969673903115, + "grad_norm": 1.0604764998010032, + "learning_rate": 4.942184374834748e-06, + "loss": 0.4852, + "step": 6851 + }, + { + "epoch": 0.4172578631671894, + "grad_norm": 0.9968369625823899, + "learning_rate": 4.942167311897794e-06, + "loss": 0.4403, + "step": 6852 + }, + { + "epoch": 0.4173187589440672, + "grad_norm": 1.0704684099916066, + "learning_rate": 4.942150246472811e-06, + "loss": 0.4229, + "step": 6853 + }, + { + "epoch": 0.4173796547209451, + "grad_norm": 0.9809211314189279, + "learning_rate": 4.942133178559817e-06, + "loss": 0.433, + "step": 6854 + }, + { + "epoch": 0.41744055049782297, + "grad_norm": 0.9346671139997385, + "learning_rate": 4.942116108158827e-06, + "loss": 0.5152, + "step": 6855 + }, + { + "epoch": 0.41750144627470087, + "grad_norm": 1.045172133262651, + "learning_rate": 4.942099035269861e-06, + "loss": 0.4717, + "step": 6856 + }, + { + "epoch": 0.4175623420515787, + "grad_norm": 1.0250211935850575, + "learning_rate": 4.9420819598929345e-06, + "loss": 0.4345, + "step": 6857 + }, + { + "epoch": 0.4176232378284566, + "grad_norm": 1.0511953046137035, + "learning_rate": 4.942064882028066e-06, + "loss": 0.35, + "step": 6858 + }, + { + "epoch": 0.41768413360533446, + "grad_norm": 1.062879784129, + "learning_rate": 4.942047801675273e-06, + "loss": 0.4061, + "step": 6859 + }, + { + "epoch": 0.41774502938221236, + "grad_norm": 1.0613837309883836, + "learning_rate": 4.9420307188345715e-06, + "loss": 0.4147, + "step": 6860 + }, + { + "epoch": 0.4178059251590902, + "grad_norm": 1.0941500369068267, + "learning_rate": 4.942013633505981e-06, + "loss": 0.4336, + "step": 6861 + }, + { + "epoch": 0.4178668209359681, + "grad_norm": 0.974718384931401, + "learning_rate": 4.941996545689517e-06, + "loss": 0.5506, + "step": 6862 + }, + { + "epoch": 0.41792771671284595, + "grad_norm": 1.063171870687541, + "learning_rate": 4.941979455385198e-06, + "loss": 0.4875, + "step": 6863 + }, + { + "epoch": 0.41798861248972385, + "grad_norm": 0.9846422099923258, + "learning_rate": 4.9419623625930416e-06, + "loss": 0.4058, + "step": 6864 + }, + { + "epoch": 0.4180495082666017, + "grad_norm": 1.0885060770262147, + "learning_rate": 4.9419452673130634e-06, + "loss": 0.4565, + "step": 6865 + }, + { + "epoch": 0.4181104040434796, + "grad_norm": 1.0423094319467916, + "learning_rate": 4.9419281695452835e-06, + "loss": 0.4278, + "step": 6866 + }, + { + "epoch": 0.41817129982035744, + "grad_norm": 1.0407434778162934, + "learning_rate": 4.941911069289717e-06, + "loss": 0.503, + "step": 6867 + }, + { + "epoch": 0.41823219559723535, + "grad_norm": 0.9965874931382847, + "learning_rate": 4.941893966546382e-06, + "loss": 0.5091, + "step": 6868 + }, + { + "epoch": 0.4182930913741132, + "grad_norm": 0.9963306773458843, + "learning_rate": 4.941876861315297e-06, + "loss": 0.5021, + "step": 6869 + }, + { + "epoch": 0.4183539871509911, + "grad_norm": 1.0250793043329531, + "learning_rate": 4.941859753596478e-06, + "loss": 0.4978, + "step": 6870 + }, + { + "epoch": 0.41841488292786894, + "grad_norm": 1.0078232991129537, + "learning_rate": 4.941842643389944e-06, + "loss": 0.4764, + "step": 6871 + }, + { + "epoch": 0.41847577870474684, + "grad_norm": 1.0162420226564404, + "learning_rate": 4.9418255306957104e-06, + "loss": 0.4625, + "step": 6872 + }, + { + "epoch": 0.4185366744816247, + "grad_norm": 0.9460217016201417, + "learning_rate": 4.941808415513797e-06, + "loss": 0.524, + "step": 6873 + }, + { + "epoch": 0.4185975702585026, + "grad_norm": 1.0549058703695566, + "learning_rate": 4.941791297844219e-06, + "loss": 0.4452, + "step": 6874 + }, + { + "epoch": 0.4186584660353804, + "grad_norm": 1.2020984150361766, + "learning_rate": 4.941774177686995e-06, + "loss": 0.4219, + "step": 6875 + }, + { + "epoch": 0.41871936181225833, + "grad_norm": 1.0007596780315, + "learning_rate": 4.941757055042142e-06, + "loss": 0.503, + "step": 6876 + }, + { + "epoch": 0.4187802575891362, + "grad_norm": 1.1181984840185075, + "learning_rate": 4.941739929909678e-06, + "loss": 0.4766, + "step": 6877 + }, + { + "epoch": 0.4188411533660141, + "grad_norm": 0.912080109012682, + "learning_rate": 4.94172280228962e-06, + "loss": 0.4962, + "step": 6878 + }, + { + "epoch": 0.4189020491428919, + "grad_norm": 0.9655020385409282, + "learning_rate": 4.941705672181986e-06, + "loss": 0.4699, + "step": 6879 + }, + { + "epoch": 0.4189629449197698, + "grad_norm": 0.9687992523380886, + "learning_rate": 4.941688539586792e-06, + "loss": 0.5059, + "step": 6880 + }, + { + "epoch": 0.41902384069664766, + "grad_norm": 1.0538788123897214, + "learning_rate": 4.941671404504057e-06, + "loss": 0.419, + "step": 6881 + }, + { + "epoch": 0.41908473647352557, + "grad_norm": 1.0622840658163935, + "learning_rate": 4.941654266933798e-06, + "loss": 0.5385, + "step": 6882 + }, + { + "epoch": 0.4191456322504034, + "grad_norm": 1.0499347185917822, + "learning_rate": 4.9416371268760324e-06, + "loss": 0.4334, + "step": 6883 + }, + { + "epoch": 0.4192065280272813, + "grad_norm": 0.9712858806753586, + "learning_rate": 4.941619984330778e-06, + "loss": 0.5458, + "step": 6884 + }, + { + "epoch": 0.41926742380415916, + "grad_norm": 0.9886011600022616, + "learning_rate": 4.941602839298052e-06, + "loss": 0.4035, + "step": 6885 + }, + { + "epoch": 0.41932831958103706, + "grad_norm": 1.0383942830119033, + "learning_rate": 4.941585691777871e-06, + "loss": 0.5407, + "step": 6886 + }, + { + "epoch": 0.4193892153579149, + "grad_norm": 0.9732462017570155, + "learning_rate": 4.941568541770254e-06, + "loss": 0.4953, + "step": 6887 + }, + { + "epoch": 0.4194501111347928, + "grad_norm": 1.0264314182407677, + "learning_rate": 4.941551389275217e-06, + "loss": 0.5197, + "step": 6888 + }, + { + "epoch": 0.4195110069116707, + "grad_norm": 1.1401794132365943, + "learning_rate": 4.941534234292779e-06, + "loss": 0.499, + "step": 6889 + }, + { + "epoch": 0.41957190268854855, + "grad_norm": 1.106085455370546, + "learning_rate": 4.941517076822956e-06, + "loss": 0.4266, + "step": 6890 + }, + { + "epoch": 0.41963279846542645, + "grad_norm": 1.0667242305949913, + "learning_rate": 4.941499916865766e-06, + "loss": 0.4282, + "step": 6891 + }, + { + "epoch": 0.4196936942423043, + "grad_norm": 0.9990849518386831, + "learning_rate": 4.941482754421228e-06, + "loss": 0.4242, + "step": 6892 + }, + { + "epoch": 0.4197545900191822, + "grad_norm": 1.105274589788735, + "learning_rate": 4.941465589489357e-06, + "loss": 0.3902, + "step": 6893 + }, + { + "epoch": 0.41981548579606004, + "grad_norm": 0.9879110956221834, + "learning_rate": 4.941448422070172e-06, + "loss": 0.4571, + "step": 6894 + }, + { + "epoch": 0.41987638157293794, + "grad_norm": 1.0690678568181395, + "learning_rate": 4.94143125216369e-06, + "loss": 0.3889, + "step": 6895 + }, + { + "epoch": 0.4199372773498158, + "grad_norm": 1.023216283616811, + "learning_rate": 4.941414079769928e-06, + "loss": 0.4716, + "step": 6896 + }, + { + "epoch": 0.4199981731266937, + "grad_norm": 1.0209390256409066, + "learning_rate": 4.941396904888904e-06, + "loss": 0.4626, + "step": 6897 + }, + { + "epoch": 0.42005906890357153, + "grad_norm": 1.0705466584749161, + "learning_rate": 4.941379727520637e-06, + "loss": 0.42, + "step": 6898 + }, + { + "epoch": 0.42011996468044943, + "grad_norm": 1.067935802391655, + "learning_rate": 4.941362547665142e-06, + "loss": 0.4499, + "step": 6899 + }, + { + "epoch": 0.4201808604573273, + "grad_norm": 1.127342237934069, + "learning_rate": 4.941345365322438e-06, + "loss": 0.4114, + "step": 6900 + }, + { + "epoch": 0.4202417562342052, + "grad_norm": 1.094638635822157, + "learning_rate": 4.941328180492542e-06, + "loss": 0.47, + "step": 6901 + }, + { + "epoch": 0.420302652011083, + "grad_norm": 1.0499743193556885, + "learning_rate": 4.941310993175472e-06, + "loss": 0.4324, + "step": 6902 + }, + { + "epoch": 0.4203635477879609, + "grad_norm": 1.1380361237811498, + "learning_rate": 4.9412938033712445e-06, + "loss": 0.458, + "step": 6903 + }, + { + "epoch": 0.42042444356483877, + "grad_norm": 1.0659975519755147, + "learning_rate": 4.941276611079878e-06, + "loss": 0.4193, + "step": 6904 + }, + { + "epoch": 0.42048533934171667, + "grad_norm": 1.051110424902886, + "learning_rate": 4.941259416301389e-06, + "loss": 0.4484, + "step": 6905 + }, + { + "epoch": 0.4205462351185945, + "grad_norm": 1.0082776263554576, + "learning_rate": 4.941242219035797e-06, + "loss": 0.4079, + "step": 6906 + }, + { + "epoch": 0.4206071308954724, + "grad_norm": 0.9628877538210004, + "learning_rate": 4.941225019283117e-06, + "loss": 0.5375, + "step": 6907 + }, + { + "epoch": 0.42066802667235026, + "grad_norm": 0.9846569445600125, + "learning_rate": 4.941207817043367e-06, + "loss": 0.4459, + "step": 6908 + }, + { + "epoch": 0.42072892244922816, + "grad_norm": 0.9875306726484664, + "learning_rate": 4.941190612316567e-06, + "loss": 0.4539, + "step": 6909 + }, + { + "epoch": 0.420789818226106, + "grad_norm": 0.9892958157818199, + "learning_rate": 4.941173405102732e-06, + "loss": 0.5263, + "step": 6910 + }, + { + "epoch": 0.4208507140029839, + "grad_norm": 1.0099534363314326, + "learning_rate": 4.9411561954018805e-06, + "loss": 0.4468, + "step": 6911 + }, + { + "epoch": 0.42091160977986175, + "grad_norm": 0.9642688490727765, + "learning_rate": 4.94113898321403e-06, + "loss": 0.3988, + "step": 6912 + }, + { + "epoch": 0.42097250555673965, + "grad_norm": 1.0131563727076405, + "learning_rate": 4.941121768539197e-06, + "loss": 0.3994, + "step": 6913 + }, + { + "epoch": 0.4210334013336175, + "grad_norm": 1.0816180077408069, + "learning_rate": 4.941104551377401e-06, + "loss": 0.4549, + "step": 6914 + }, + { + "epoch": 0.4210942971104954, + "grad_norm": 1.0201244990317855, + "learning_rate": 4.9410873317286574e-06, + "loss": 0.445, + "step": 6915 + }, + { + "epoch": 0.42115519288737324, + "grad_norm": 0.9551500950919936, + "learning_rate": 4.941070109592986e-06, + "loss": 0.4816, + "step": 6916 + }, + { + "epoch": 0.42121608866425114, + "grad_norm": 1.0950605847046275, + "learning_rate": 4.941052884970401e-06, + "loss": 0.4703, + "step": 6917 + }, + { + "epoch": 0.421276984441129, + "grad_norm": 0.9294230563678175, + "learning_rate": 4.941035657860924e-06, + "loss": 0.4971, + "step": 6918 + }, + { + "epoch": 0.4213378802180069, + "grad_norm": 0.9465827844722455, + "learning_rate": 4.94101842826457e-06, + "loss": 0.4762, + "step": 6919 + }, + { + "epoch": 0.42139877599488473, + "grad_norm": 1.0101937414633266, + "learning_rate": 4.941001196181358e-06, + "loss": 0.5217, + "step": 6920 + }, + { + "epoch": 0.42145967177176263, + "grad_norm": 0.9511398627331545, + "learning_rate": 4.940983961611303e-06, + "loss": 0.5055, + "step": 6921 + }, + { + "epoch": 0.4215205675486405, + "grad_norm": 1.0522016635259785, + "learning_rate": 4.940966724554426e-06, + "loss": 0.4067, + "step": 6922 + }, + { + "epoch": 0.4215814633255184, + "grad_norm": 1.011642882444406, + "learning_rate": 4.9409494850107415e-06, + "loss": 0.4376, + "step": 6923 + }, + { + "epoch": 0.4216423591023962, + "grad_norm": 1.1592787021280624, + "learning_rate": 4.940932242980269e-06, + "loss": 0.4919, + "step": 6924 + }, + { + "epoch": 0.4217032548792741, + "grad_norm": 1.0414739667384096, + "learning_rate": 4.9409149984630255e-06, + "loss": 0.4616, + "step": 6925 + }, + { + "epoch": 0.42176415065615197, + "grad_norm": 1.0225456730650626, + "learning_rate": 4.940897751459029e-06, + "loss": 0.4917, + "step": 6926 + }, + { + "epoch": 0.42182504643302987, + "grad_norm": 1.0549180102049043, + "learning_rate": 4.940880501968296e-06, + "loss": 0.4267, + "step": 6927 + }, + { + "epoch": 0.4218859422099077, + "grad_norm": 0.9937189901096996, + "learning_rate": 4.940863249990844e-06, + "loss": 0.5189, + "step": 6928 + }, + { + "epoch": 0.4219468379867856, + "grad_norm": 0.933368787936607, + "learning_rate": 4.940845995526692e-06, + "loss": 0.5361, + "step": 6929 + }, + { + "epoch": 0.4220077337636635, + "grad_norm": 0.9688773327842239, + "learning_rate": 4.940828738575857e-06, + "loss": 0.4893, + "step": 6930 + }, + { + "epoch": 0.42206862954054136, + "grad_norm": 0.9901015755147361, + "learning_rate": 4.940811479138357e-06, + "loss": 0.456, + "step": 6931 + }, + { + "epoch": 0.42212952531741926, + "grad_norm": 0.9130600547781128, + "learning_rate": 4.9407942172142086e-06, + "loss": 0.4943, + "step": 6932 + }, + { + "epoch": 0.4221904210942971, + "grad_norm": 1.0213028245959568, + "learning_rate": 4.940776952803429e-06, + "loss": 0.444, + "step": 6933 + }, + { + "epoch": 0.422251316871175, + "grad_norm": 1.0325883463393994, + "learning_rate": 4.940759685906037e-06, + "loss": 0.4367, + "step": 6934 + }, + { + "epoch": 0.42231221264805285, + "grad_norm": 1.0502623858798636, + "learning_rate": 4.94074241652205e-06, + "loss": 0.4313, + "step": 6935 + }, + { + "epoch": 0.42237310842493075, + "grad_norm": 1.0103354402243365, + "learning_rate": 4.940725144651486e-06, + "loss": 0.46, + "step": 6936 + }, + { + "epoch": 0.4224340042018086, + "grad_norm": 1.1111605491441987, + "learning_rate": 4.940707870294361e-06, + "loss": 0.4144, + "step": 6937 + }, + { + "epoch": 0.4224948999786865, + "grad_norm": 0.9398467260123436, + "learning_rate": 4.940690593450694e-06, + "loss": 0.4687, + "step": 6938 + }, + { + "epoch": 0.42255579575556435, + "grad_norm": 1.0778569928292203, + "learning_rate": 4.940673314120502e-06, + "loss": 0.4953, + "step": 6939 + }, + { + "epoch": 0.42261669153244225, + "grad_norm": 1.0190092085487956, + "learning_rate": 4.9406560323038025e-06, + "loss": 0.4664, + "step": 6940 + }, + { + "epoch": 0.4226775873093201, + "grad_norm": 1.0850365984810622, + "learning_rate": 4.940638748000613e-06, + "loss": 0.3761, + "step": 6941 + }, + { + "epoch": 0.422738483086198, + "grad_norm": 1.0182462937170973, + "learning_rate": 4.940621461210952e-06, + "loss": 0.4922, + "step": 6942 + }, + { + "epoch": 0.42279937886307584, + "grad_norm": 1.036285186292872, + "learning_rate": 4.940604171934838e-06, + "loss": 0.473, + "step": 6943 + }, + { + "epoch": 0.42286027463995374, + "grad_norm": 0.9966408759446921, + "learning_rate": 4.940586880172285e-06, + "loss": 0.4845, + "step": 6944 + }, + { + "epoch": 0.4229211704168316, + "grad_norm": 1.1042813444911208, + "learning_rate": 4.940569585923314e-06, + "loss": 0.4452, + "step": 6945 + }, + { + "epoch": 0.4229820661937095, + "grad_norm": 0.9462982840365924, + "learning_rate": 4.940552289187941e-06, + "loss": 0.471, + "step": 6946 + }, + { + "epoch": 0.42304296197058733, + "grad_norm": 1.0263902240352818, + "learning_rate": 4.940534989966184e-06, + "loss": 0.4618, + "step": 6947 + }, + { + "epoch": 0.42310385774746523, + "grad_norm": 1.0240783833293283, + "learning_rate": 4.9405176882580615e-06, + "loss": 0.4582, + "step": 6948 + }, + { + "epoch": 0.4231647535243431, + "grad_norm": 0.9655430895600866, + "learning_rate": 4.94050038406359e-06, + "loss": 0.4545, + "step": 6949 + }, + { + "epoch": 0.423225649301221, + "grad_norm": 1.0040303291766122, + "learning_rate": 4.940483077382788e-06, + "loss": 0.5301, + "step": 6950 + }, + { + "epoch": 0.4232865450780988, + "grad_norm": 1.023317926634678, + "learning_rate": 4.9404657682156706e-06, + "loss": 0.5076, + "step": 6951 + }, + { + "epoch": 0.4233474408549767, + "grad_norm": 0.939752325179063, + "learning_rate": 4.940448456562259e-06, + "loss": 0.4691, + "step": 6952 + }, + { + "epoch": 0.42340833663185456, + "grad_norm": 0.9526210286356107, + "learning_rate": 4.940431142422569e-06, + "loss": 0.4963, + "step": 6953 + }, + { + "epoch": 0.42346923240873247, + "grad_norm": 1.016852620299676, + "learning_rate": 4.940413825796618e-06, + "loss": 0.46, + "step": 6954 + }, + { + "epoch": 0.4235301281856103, + "grad_norm": 1.0694550369532958, + "learning_rate": 4.940396506684425e-06, + "loss": 0.4286, + "step": 6955 + }, + { + "epoch": 0.4235910239624882, + "grad_norm": 0.9734987843897014, + "learning_rate": 4.940379185086006e-06, + "loss": 0.4699, + "step": 6956 + }, + { + "epoch": 0.42365191973936606, + "grad_norm": 0.9699360162739222, + "learning_rate": 4.94036186100138e-06, + "loss": 0.4282, + "step": 6957 + }, + { + "epoch": 0.42371281551624396, + "grad_norm": 0.9924920003044363, + "learning_rate": 4.940344534430564e-06, + "loss": 0.4784, + "step": 6958 + }, + { + "epoch": 0.4237737112931218, + "grad_norm": 1.0577513434377066, + "learning_rate": 4.940327205373575e-06, + "loss": 0.427, + "step": 6959 + }, + { + "epoch": 0.4238346070699997, + "grad_norm": 1.053350188417864, + "learning_rate": 4.9403098738304325e-06, + "loss": 0.4944, + "step": 6960 + }, + { + "epoch": 0.42389550284687755, + "grad_norm": 1.0888298051671244, + "learning_rate": 4.940292539801152e-06, + "loss": 0.4796, + "step": 6961 + }, + { + "epoch": 0.42395639862375545, + "grad_norm": 1.0266124968848056, + "learning_rate": 4.940275203285752e-06, + "loss": 0.4347, + "step": 6962 + }, + { + "epoch": 0.4240172944006333, + "grad_norm": 1.0185203262010492, + "learning_rate": 4.940257864284251e-06, + "loss": 0.4149, + "step": 6963 + }, + { + "epoch": 0.4240781901775112, + "grad_norm": 1.0345933575748654, + "learning_rate": 4.940240522796667e-06, + "loss": 0.4982, + "step": 6964 + }, + { + "epoch": 0.42413908595438904, + "grad_norm": 1.0865316898672372, + "learning_rate": 4.940223178823015e-06, + "loss": 0.4263, + "step": 6965 + }, + { + "epoch": 0.42419998173126694, + "grad_norm": 1.0813336133565872, + "learning_rate": 4.940205832363315e-06, + "loss": 0.4009, + "step": 6966 + }, + { + "epoch": 0.4242608775081448, + "grad_norm": 1.1460867232664789, + "learning_rate": 4.940188483417584e-06, + "loss": 0.4624, + "step": 6967 + }, + { + "epoch": 0.4243217732850227, + "grad_norm": 1.0807137576078003, + "learning_rate": 4.940171131985841e-06, + "loss": 0.3855, + "step": 6968 + }, + { + "epoch": 0.42438266906190053, + "grad_norm": 1.037062783983756, + "learning_rate": 4.9401537780681e-06, + "loss": 0.4516, + "step": 6969 + }, + { + "epoch": 0.42444356483877843, + "grad_norm": 0.9823518976166427, + "learning_rate": 4.940136421664382e-06, + "loss": 0.4968, + "step": 6970 + }, + { + "epoch": 0.42450446061565633, + "grad_norm": 1.1052043429512128, + "learning_rate": 4.940119062774704e-06, + "loss": 0.4663, + "step": 6971 + }, + { + "epoch": 0.4245653563925342, + "grad_norm": 1.0081892111525674, + "learning_rate": 4.9401017013990835e-06, + "loss": 0.4321, + "step": 6972 + }, + { + "epoch": 0.4246262521694121, + "grad_norm": 1.05698260687011, + "learning_rate": 4.940084337537539e-06, + "loss": 0.4531, + "step": 6973 + }, + { + "epoch": 0.4246871479462899, + "grad_norm": 1.0508671002882937, + "learning_rate": 4.940066971190086e-06, + "loss": 0.4412, + "step": 6974 + }, + { + "epoch": 0.4247480437231678, + "grad_norm": 1.0355220017765956, + "learning_rate": 4.9400496023567435e-06, + "loss": 0.4368, + "step": 6975 + }, + { + "epoch": 0.42480893950004567, + "grad_norm": 0.9359909007787997, + "learning_rate": 4.94003223103753e-06, + "loss": 0.5068, + "step": 6976 + }, + { + "epoch": 0.42486983527692357, + "grad_norm": 1.0488866243554347, + "learning_rate": 4.9400148572324616e-06, + "loss": 0.4409, + "step": 6977 + }, + { + "epoch": 0.4249307310538014, + "grad_norm": 1.1119019771236567, + "learning_rate": 4.939997480941557e-06, + "loss": 0.4512, + "step": 6978 + }, + { + "epoch": 0.4249916268306793, + "grad_norm": 1.0475749867905801, + "learning_rate": 4.9399801021648345e-06, + "loss": 0.4189, + "step": 6979 + }, + { + "epoch": 0.42505252260755716, + "grad_norm": 1.0425743986077347, + "learning_rate": 4.93996272090231e-06, + "loss": 0.4665, + "step": 6980 + }, + { + "epoch": 0.42511341838443506, + "grad_norm": 1.036238305388462, + "learning_rate": 4.939945337154002e-06, + "loss": 0.4562, + "step": 6981 + }, + { + "epoch": 0.4251743141613129, + "grad_norm": 1.0276347560595651, + "learning_rate": 4.939927950919929e-06, + "loss": 0.445, + "step": 6982 + }, + { + "epoch": 0.4252352099381908, + "grad_norm": 0.9397353233528682, + "learning_rate": 4.939910562200109e-06, + "loss": 0.432, + "step": 6983 + }, + { + "epoch": 0.42529610571506865, + "grad_norm": 0.9975536852744393, + "learning_rate": 4.939893170994558e-06, + "loss": 0.4674, + "step": 6984 + }, + { + "epoch": 0.42535700149194655, + "grad_norm": 0.9982259401448434, + "learning_rate": 4.939875777303293e-06, + "loss": 0.4532, + "step": 6985 + }, + { + "epoch": 0.4254178972688244, + "grad_norm": 1.0871870478115369, + "learning_rate": 4.939858381126335e-06, + "loss": 0.4633, + "step": 6986 + }, + { + "epoch": 0.4254787930457023, + "grad_norm": 1.031815737756965, + "learning_rate": 4.939840982463701e-06, + "loss": 0.4845, + "step": 6987 + }, + { + "epoch": 0.42553968882258014, + "grad_norm": 0.9954404292900767, + "learning_rate": 4.939823581315406e-06, + "loss": 0.4531, + "step": 6988 + }, + { + "epoch": 0.42560058459945804, + "grad_norm": 0.9461123495623435, + "learning_rate": 4.93980617768147e-06, + "loss": 0.4805, + "step": 6989 + }, + { + "epoch": 0.4256614803763359, + "grad_norm": 1.061790910746563, + "learning_rate": 4.9397887715619095e-06, + "loss": 0.4138, + "step": 6990 + }, + { + "epoch": 0.4257223761532138, + "grad_norm": 1.0331710756406984, + "learning_rate": 4.939771362956744e-06, + "loss": 0.4417, + "step": 6991 + }, + { + "epoch": 0.42578327193009163, + "grad_norm": 0.9834644011403237, + "learning_rate": 4.93975395186599e-06, + "loss": 0.5487, + "step": 6992 + }, + { + "epoch": 0.42584416770696953, + "grad_norm": 1.0399382430146316, + "learning_rate": 4.939736538289665e-06, + "loss": 0.4075, + "step": 6993 + }, + { + "epoch": 0.4259050634838474, + "grad_norm": 1.0008168972534452, + "learning_rate": 4.9397191222277875e-06, + "loss": 0.4554, + "step": 6994 + }, + { + "epoch": 0.4259659592607253, + "grad_norm": 1.0289114825750996, + "learning_rate": 4.939701703680374e-06, + "loss": 0.4397, + "step": 6995 + }, + { + "epoch": 0.4260268550376031, + "grad_norm": 1.0160030388953158, + "learning_rate": 4.939684282647444e-06, + "loss": 0.4397, + "step": 6996 + }, + { + "epoch": 0.426087750814481, + "grad_norm": 0.9449815308929997, + "learning_rate": 4.939666859129015e-06, + "loss": 0.5058, + "step": 6997 + }, + { + "epoch": 0.42614864659135887, + "grad_norm": 0.9928772780151173, + "learning_rate": 4.939649433125102e-06, + "loss": 0.4974, + "step": 6998 + }, + { + "epoch": 0.42620954236823677, + "grad_norm": 1.0461101072063617, + "learning_rate": 4.939632004635727e-06, + "loss": 0.4282, + "step": 6999 + }, + { + "epoch": 0.4262704381451146, + "grad_norm": 1.1414301276456822, + "learning_rate": 4.939614573660905e-06, + "loss": 0.3853, + "step": 7000 + }, + { + "epoch": 0.4263313339219925, + "grad_norm": 1.0132488989882424, + "learning_rate": 4.939597140200654e-06, + "loss": 0.4472, + "step": 7001 + }, + { + "epoch": 0.42639222969887036, + "grad_norm": 1.0871401730293724, + "learning_rate": 4.939579704254992e-06, + "loss": 0.4561, + "step": 7002 + }, + { + "epoch": 0.42645312547574826, + "grad_norm": 0.9156798432951566, + "learning_rate": 4.939562265823938e-06, + "loss": 0.4602, + "step": 7003 + }, + { + "epoch": 0.4265140212526261, + "grad_norm": 1.007524054944773, + "learning_rate": 4.9395448249075076e-06, + "loss": 0.462, + "step": 7004 + }, + { + "epoch": 0.426574917029504, + "grad_norm": 0.9845664493797267, + "learning_rate": 4.93952738150572e-06, + "loss": 0.4913, + "step": 7005 + }, + { + "epoch": 0.42663581280638185, + "grad_norm": 1.010788130061817, + "learning_rate": 4.9395099356185924e-06, + "loss": 0.4069, + "step": 7006 + }, + { + "epoch": 0.42669670858325975, + "grad_norm": 0.9786437593105952, + "learning_rate": 4.939492487246142e-06, + "loss": 0.4602, + "step": 7007 + }, + { + "epoch": 0.4267576043601376, + "grad_norm": 0.9709057479772282, + "learning_rate": 4.9394750363883896e-06, + "loss": 0.4298, + "step": 7008 + }, + { + "epoch": 0.4268185001370155, + "grad_norm": 1.0288104877823887, + "learning_rate": 4.939457583045349e-06, + "loss": 0.4191, + "step": 7009 + }, + { + "epoch": 0.42687939591389334, + "grad_norm": 1.015892927957105, + "learning_rate": 4.93944012721704e-06, + "loss": 0.4367, + "step": 7010 + }, + { + "epoch": 0.42694029169077125, + "grad_norm": 0.9799115205592929, + "learning_rate": 4.939422668903481e-06, + "loss": 0.4556, + "step": 7011 + }, + { + "epoch": 0.42700118746764915, + "grad_norm": 1.0076651290312408, + "learning_rate": 4.939405208104688e-06, + "loss": 0.5135, + "step": 7012 + }, + { + "epoch": 0.427062083244527, + "grad_norm": 1.049547453357106, + "learning_rate": 4.93938774482068e-06, + "loss": 0.494, + "step": 7013 + }, + { + "epoch": 0.4271229790214049, + "grad_norm": 1.0812495410903404, + "learning_rate": 4.939370279051475e-06, + "loss": 0.4653, + "step": 7014 + }, + { + "epoch": 0.42718387479828274, + "grad_norm": 0.9357447919133179, + "learning_rate": 4.9393528107970896e-06, + "loss": 0.5002, + "step": 7015 + }, + { + "epoch": 0.42724477057516064, + "grad_norm": 1.0776502412126743, + "learning_rate": 4.9393353400575425e-06, + "loss": 0.483, + "step": 7016 + }, + { + "epoch": 0.4273056663520385, + "grad_norm": 1.1827740899761419, + "learning_rate": 4.939317866832852e-06, + "loss": 0.4777, + "step": 7017 + }, + { + "epoch": 0.4273665621289164, + "grad_norm": 0.988918966312077, + "learning_rate": 4.939300391123033e-06, + "loss": 0.4941, + "step": 7018 + }, + { + "epoch": 0.42742745790579423, + "grad_norm": 1.0050840318116836, + "learning_rate": 4.939282912928107e-06, + "loss": 0.5217, + "step": 7019 + }, + { + "epoch": 0.42748835368267213, + "grad_norm": 0.959543566063092, + "learning_rate": 4.93926543224809e-06, + "loss": 0.4435, + "step": 7020 + }, + { + "epoch": 0.42754924945955, + "grad_norm": 1.0258445364224773, + "learning_rate": 4.939247949083e-06, + "loss": 0.4912, + "step": 7021 + }, + { + "epoch": 0.4276101452364279, + "grad_norm": 1.10453499998866, + "learning_rate": 4.939230463432856e-06, + "loss": 0.4586, + "step": 7022 + }, + { + "epoch": 0.4276710410133057, + "grad_norm": 1.1633509632478276, + "learning_rate": 4.939212975297674e-06, + "loss": 0.4827, + "step": 7023 + }, + { + "epoch": 0.4277319367901836, + "grad_norm": 1.0373306803719449, + "learning_rate": 4.9391954846774716e-06, + "loss": 0.4251, + "step": 7024 + }, + { + "epoch": 0.42779283256706147, + "grad_norm": 1.0670837195703933, + "learning_rate": 4.9391779915722686e-06, + "loss": 0.3951, + "step": 7025 + }, + { + "epoch": 0.42785372834393937, + "grad_norm": 0.9892444512888067, + "learning_rate": 4.939160495982082e-06, + "loss": 0.5216, + "step": 7026 + }, + { + "epoch": 0.4279146241208172, + "grad_norm": 0.975094729194285, + "learning_rate": 4.939142997906928e-06, + "loss": 0.4477, + "step": 7027 + }, + { + "epoch": 0.4279755198976951, + "grad_norm": 1.128155909276017, + "learning_rate": 4.939125497346827e-06, + "loss": 0.4278, + "step": 7028 + }, + { + "epoch": 0.42803641567457296, + "grad_norm": 0.9663970108909329, + "learning_rate": 4.939107994301795e-06, + "loss": 0.4687, + "step": 7029 + }, + { + "epoch": 0.42809731145145086, + "grad_norm": 1.1018415609152197, + "learning_rate": 4.939090488771851e-06, + "loss": 0.4554, + "step": 7030 + }, + { + "epoch": 0.4281582072283287, + "grad_norm": 1.0320985947704786, + "learning_rate": 4.9390729807570125e-06, + "loss": 0.5207, + "step": 7031 + }, + { + "epoch": 0.4282191030052066, + "grad_norm": 1.0480163601673853, + "learning_rate": 4.939055470257297e-06, + "loss": 0.5109, + "step": 7032 + }, + { + "epoch": 0.42827999878208445, + "grad_norm": 0.919250292677916, + "learning_rate": 4.939037957272722e-06, + "loss": 0.484, + "step": 7033 + }, + { + "epoch": 0.42834089455896235, + "grad_norm": 1.080636214689831, + "learning_rate": 4.939020441803306e-06, + "loss": 0.4524, + "step": 7034 + }, + { + "epoch": 0.4284017903358402, + "grad_norm": 1.0291258677070394, + "learning_rate": 4.939002923849067e-06, + "loss": 0.4291, + "step": 7035 + }, + { + "epoch": 0.4284626861127181, + "grad_norm": 0.9974736867058733, + "learning_rate": 4.9389854034100225e-06, + "loss": 0.4401, + "step": 7036 + }, + { + "epoch": 0.42852358188959594, + "grad_norm": 1.0208852748916548, + "learning_rate": 4.93896788048619e-06, + "loss": 0.4536, + "step": 7037 + }, + { + "epoch": 0.42858447766647384, + "grad_norm": 1.0826261654472114, + "learning_rate": 4.938950355077588e-06, + "loss": 0.395, + "step": 7038 + }, + { + "epoch": 0.4286453734433517, + "grad_norm": 0.9741257489773556, + "learning_rate": 4.938932827184234e-06, + "loss": 0.4231, + "step": 7039 + }, + { + "epoch": 0.4287062692202296, + "grad_norm": 1.115673413612196, + "learning_rate": 4.938915296806146e-06, + "loss": 0.3191, + "step": 7040 + }, + { + "epoch": 0.42876716499710743, + "grad_norm": 1.047165031527899, + "learning_rate": 4.938897763943342e-06, + "loss": 0.4989, + "step": 7041 + }, + { + "epoch": 0.42882806077398533, + "grad_norm": 1.0994218651925871, + "learning_rate": 4.9388802285958395e-06, + "loss": 0.4177, + "step": 7042 + }, + { + "epoch": 0.4288889565508632, + "grad_norm": 0.9783260455830464, + "learning_rate": 4.938862690763656e-06, + "loss": 0.4223, + "step": 7043 + }, + { + "epoch": 0.4289498523277411, + "grad_norm": 0.9410314266824901, + "learning_rate": 4.9388451504468104e-06, + "loss": 0.4797, + "step": 7044 + }, + { + "epoch": 0.4290107481046189, + "grad_norm": 1.0506841370610267, + "learning_rate": 4.9388276076453204e-06, + "loss": 0.4284, + "step": 7045 + }, + { + "epoch": 0.4290716438814968, + "grad_norm": 1.1415820217103596, + "learning_rate": 4.938810062359203e-06, + "loss": 0.4177, + "step": 7046 + }, + { + "epoch": 0.42913253965837467, + "grad_norm": 1.045195023465164, + "learning_rate": 4.938792514588478e-06, + "loss": 0.4635, + "step": 7047 + }, + { + "epoch": 0.42919343543525257, + "grad_norm": 1.0570571583024042, + "learning_rate": 4.9387749643331595e-06, + "loss": 0.4932, + "step": 7048 + }, + { + "epoch": 0.4292543312121304, + "grad_norm": 1.1010580995669788, + "learning_rate": 4.938757411593269e-06, + "loss": 0.3948, + "step": 7049 + }, + { + "epoch": 0.4293152269890083, + "grad_norm": 0.9869463240108883, + "learning_rate": 4.938739856368823e-06, + "loss": 0.4858, + "step": 7050 + }, + { + "epoch": 0.42937612276588616, + "grad_norm": 1.0118590730723809, + "learning_rate": 4.93872229865984e-06, + "loss": 0.4482, + "step": 7051 + }, + { + "epoch": 0.42943701854276406, + "grad_norm": 1.020531129344489, + "learning_rate": 4.938704738466337e-06, + "loss": 0.4023, + "step": 7052 + }, + { + "epoch": 0.42949791431964196, + "grad_norm": 1.022125571620922, + "learning_rate": 4.938687175788332e-06, + "loss": 0.4691, + "step": 7053 + }, + { + "epoch": 0.4295588100965198, + "grad_norm": 0.9438495236002634, + "learning_rate": 4.938669610625844e-06, + "loss": 0.4883, + "step": 7054 + }, + { + "epoch": 0.4296197058733977, + "grad_norm": 0.9244424379671831, + "learning_rate": 4.93865204297889e-06, + "loss": 0.4896, + "step": 7055 + }, + { + "epoch": 0.42968060165027555, + "grad_norm": 1.0258114350264504, + "learning_rate": 4.938634472847488e-06, + "loss": 0.4078, + "step": 7056 + }, + { + "epoch": 0.42974149742715345, + "grad_norm": 1.024124548343945, + "learning_rate": 4.938616900231655e-06, + "loss": 0.4585, + "step": 7057 + }, + { + "epoch": 0.4298023932040313, + "grad_norm": 1.0145381530997983, + "learning_rate": 4.938599325131411e-06, + "loss": 0.4264, + "step": 7058 + }, + { + "epoch": 0.4298632889809092, + "grad_norm": 0.9960456051855848, + "learning_rate": 4.938581747546772e-06, + "loss": 0.4768, + "step": 7059 + }, + { + "epoch": 0.42992418475778704, + "grad_norm": 1.0615262590762542, + "learning_rate": 4.938564167477756e-06, + "loss": 0.5009, + "step": 7060 + }, + { + "epoch": 0.42998508053466494, + "grad_norm": 0.9334120670965571, + "learning_rate": 4.938546584924383e-06, + "loss": 0.4297, + "step": 7061 + }, + { + "epoch": 0.4300459763115428, + "grad_norm": 0.900686769691645, + "learning_rate": 4.938528999886668e-06, + "loss": 0.5149, + "step": 7062 + }, + { + "epoch": 0.4301068720884207, + "grad_norm": 1.0058545884451588, + "learning_rate": 4.938511412364631e-06, + "loss": 0.4695, + "step": 7063 + }, + { + "epoch": 0.43016776786529853, + "grad_norm": 0.9317177303969639, + "learning_rate": 4.93849382235829e-06, + "loss": 0.4507, + "step": 7064 + }, + { + "epoch": 0.43022866364217643, + "grad_norm": 1.0119626154968677, + "learning_rate": 4.938476229867661e-06, + "loss": 0.4751, + "step": 7065 + }, + { + "epoch": 0.4302895594190543, + "grad_norm": 0.9542196061948863, + "learning_rate": 4.938458634892764e-06, + "loss": 0.4637, + "step": 7066 + }, + { + "epoch": 0.4303504551959322, + "grad_norm": 0.9909306481961384, + "learning_rate": 4.938441037433615e-06, + "loss": 0.4667, + "step": 7067 + }, + { + "epoch": 0.43041135097281, + "grad_norm": 1.0925770574580504, + "learning_rate": 4.938423437490234e-06, + "loss": 0.4681, + "step": 7068 + }, + { + "epoch": 0.4304722467496879, + "grad_norm": 0.9455375216037856, + "learning_rate": 4.938405835062638e-06, + "loss": 0.462, + "step": 7069 + }, + { + "epoch": 0.43053314252656577, + "grad_norm": 1.043306104927692, + "learning_rate": 4.938388230150845e-06, + "loss": 0.4522, + "step": 7070 + }, + { + "epoch": 0.43059403830344367, + "grad_norm": 1.046471829822673, + "learning_rate": 4.938370622754871e-06, + "loss": 0.4018, + "step": 7071 + }, + { + "epoch": 0.4306549340803215, + "grad_norm": 0.9750519760104542, + "learning_rate": 4.938353012874737e-06, + "loss": 0.444, + "step": 7072 + }, + { + "epoch": 0.4307158298571994, + "grad_norm": 1.0094488131082406, + "learning_rate": 4.93833540051046e-06, + "loss": 0.414, + "step": 7073 + }, + { + "epoch": 0.43077672563407726, + "grad_norm": 1.0360162073716277, + "learning_rate": 4.9383177856620565e-06, + "loss": 0.4056, + "step": 7074 + }, + { + "epoch": 0.43083762141095516, + "grad_norm": 1.0513415709796703, + "learning_rate": 4.938300168329546e-06, + "loss": 0.4048, + "step": 7075 + }, + { + "epoch": 0.430898517187833, + "grad_norm": 1.0814690150944943, + "learning_rate": 4.938282548512947e-06, + "loss": 0.4047, + "step": 7076 + }, + { + "epoch": 0.4309594129647109, + "grad_norm": 0.9744654361607861, + "learning_rate": 4.938264926212275e-06, + "loss": 0.4442, + "step": 7077 + }, + { + "epoch": 0.43102030874158875, + "grad_norm": 1.0310410107901322, + "learning_rate": 4.93824730142755e-06, + "loss": 0.5355, + "step": 7078 + }, + { + "epoch": 0.43108120451846665, + "grad_norm": 0.9898391366501592, + "learning_rate": 4.9382296741587885e-06, + "loss": 0.4513, + "step": 7079 + }, + { + "epoch": 0.4311421002953445, + "grad_norm": 1.0758256026831732, + "learning_rate": 4.9382120444060105e-06, + "loss": 0.4865, + "step": 7080 + }, + { + "epoch": 0.4312029960722224, + "grad_norm": 1.1210735686802078, + "learning_rate": 4.938194412169233e-06, + "loss": 0.4859, + "step": 7081 + }, + { + "epoch": 0.43126389184910024, + "grad_norm": 0.9350051929199468, + "learning_rate": 4.938176777448472e-06, + "loss": 0.4947, + "step": 7082 + }, + { + "epoch": 0.43132478762597815, + "grad_norm": 0.9611407998792437, + "learning_rate": 4.938159140243749e-06, + "loss": 0.4854, + "step": 7083 + }, + { + "epoch": 0.431385683402856, + "grad_norm": 0.9451077421184301, + "learning_rate": 4.938141500555079e-06, + "loss": 0.4819, + "step": 7084 + }, + { + "epoch": 0.4314465791797339, + "grad_norm": 1.0636730032304016, + "learning_rate": 4.938123858382482e-06, + "loss": 0.4743, + "step": 7085 + }, + { + "epoch": 0.43150747495661174, + "grad_norm": 1.0837472605590237, + "learning_rate": 4.938106213725974e-06, + "loss": 0.4348, + "step": 7086 + }, + { + "epoch": 0.43156837073348964, + "grad_norm": 1.0207173981345758, + "learning_rate": 4.938088566585575e-06, + "loss": 0.4961, + "step": 7087 + }, + { + "epoch": 0.4316292665103675, + "grad_norm": 1.0745283895063125, + "learning_rate": 4.938070916961302e-06, + "loss": 0.4249, + "step": 7088 + }, + { + "epoch": 0.4316901622872454, + "grad_norm": 1.002591862767097, + "learning_rate": 4.938053264853172e-06, + "loss": 0.487, + "step": 7089 + }, + { + "epoch": 0.4317510580641232, + "grad_norm": 0.8909262595633126, + "learning_rate": 4.938035610261206e-06, + "loss": 0.4699, + "step": 7090 + }, + { + "epoch": 0.43181195384100113, + "grad_norm": 1.0114105696667288, + "learning_rate": 4.938017953185417e-06, + "loss": 0.5073, + "step": 7091 + }, + { + "epoch": 0.431872849617879, + "grad_norm": 0.981222062755004, + "learning_rate": 4.938000293625829e-06, + "loss": 0.5417, + "step": 7092 + }, + { + "epoch": 0.4319337453947569, + "grad_norm": 1.0385996015307957, + "learning_rate": 4.937982631582456e-06, + "loss": 0.4599, + "step": 7093 + }, + { + "epoch": 0.4319946411716348, + "grad_norm": 1.038099459853274, + "learning_rate": 4.937964967055317e-06, + "loss": 0.5162, + "step": 7094 + }, + { + "epoch": 0.4320555369485126, + "grad_norm": 1.06028020528734, + "learning_rate": 4.937947300044429e-06, + "loss": 0.4552, + "step": 7095 + }, + { + "epoch": 0.4321164327253905, + "grad_norm": 1.001448843544287, + "learning_rate": 4.937929630549812e-06, + "loss": 0.4439, + "step": 7096 + }, + { + "epoch": 0.43217732850226837, + "grad_norm": 1.0250595034826888, + "learning_rate": 4.937911958571483e-06, + "loss": 0.4475, + "step": 7097 + }, + { + "epoch": 0.43223822427914627, + "grad_norm": 1.089475594478964, + "learning_rate": 4.937894284109459e-06, + "loss": 0.4648, + "step": 7098 + }, + { + "epoch": 0.4322991200560241, + "grad_norm": 1.0957134463833573, + "learning_rate": 4.93787660716376e-06, + "loss": 0.4614, + "step": 7099 + }, + { + "epoch": 0.432360015832902, + "grad_norm": 1.021985553124796, + "learning_rate": 4.9378589277344025e-06, + "loss": 0.4668, + "step": 7100 + }, + { + "epoch": 0.43242091160977986, + "grad_norm": 1.1135632458497244, + "learning_rate": 4.937841245821405e-06, + "loss": 0.4224, + "step": 7101 + }, + { + "epoch": 0.43248180738665776, + "grad_norm": 0.9931257958147263, + "learning_rate": 4.937823561424786e-06, + "loss": 0.4186, + "step": 7102 + }, + { + "epoch": 0.4325427031635356, + "grad_norm": 1.041509459659007, + "learning_rate": 4.937805874544564e-06, + "loss": 0.4598, + "step": 7103 + }, + { + "epoch": 0.4326035989404135, + "grad_norm": 0.9980360074141074, + "learning_rate": 4.9377881851807545e-06, + "loss": 0.4812, + "step": 7104 + }, + { + "epoch": 0.43266449471729135, + "grad_norm": 0.9994704626844962, + "learning_rate": 4.937770493333377e-06, + "loss": 0.4748, + "step": 7105 + }, + { + "epoch": 0.43272539049416925, + "grad_norm": 1.1272729232202325, + "learning_rate": 4.9377527990024496e-06, + "loss": 0.4199, + "step": 7106 + }, + { + "epoch": 0.4327862862710471, + "grad_norm": 1.0944139497897314, + "learning_rate": 4.937735102187991e-06, + "loss": 0.4408, + "step": 7107 + }, + { + "epoch": 0.432847182047925, + "grad_norm": 1.029679406756991, + "learning_rate": 4.937717402890019e-06, + "loss": 0.5163, + "step": 7108 + }, + { + "epoch": 0.43290807782480284, + "grad_norm": 0.9499059092808482, + "learning_rate": 4.93769970110855e-06, + "loss": 0.4656, + "step": 7109 + }, + { + "epoch": 0.43296897360168074, + "grad_norm": 1.1086874523553165, + "learning_rate": 4.937681996843604e-06, + "loss": 0.4515, + "step": 7110 + }, + { + "epoch": 0.4330298693785586, + "grad_norm": 0.9631739919881378, + "learning_rate": 4.937664290095198e-06, + "loss": 0.5049, + "step": 7111 + }, + { + "epoch": 0.4330907651554365, + "grad_norm": 1.055631652184079, + "learning_rate": 4.93764658086335e-06, + "loss": 0.4644, + "step": 7112 + }, + { + "epoch": 0.43315166093231433, + "grad_norm": 1.0281573128798953, + "learning_rate": 4.937628869148079e-06, + "loss": 0.4316, + "step": 7113 + }, + { + "epoch": 0.43321255670919223, + "grad_norm": 1.0434776608518104, + "learning_rate": 4.937611154949401e-06, + "loss": 0.4324, + "step": 7114 + }, + { + "epoch": 0.4332734524860701, + "grad_norm": 1.032575094663712, + "learning_rate": 4.937593438267337e-06, + "loss": 0.4465, + "step": 7115 + }, + { + "epoch": 0.433334348262948, + "grad_norm": 0.9960139920846053, + "learning_rate": 4.937575719101903e-06, + "loss": 0.4775, + "step": 7116 + }, + { + "epoch": 0.4333952440398258, + "grad_norm": 0.9876161841400812, + "learning_rate": 4.937557997453118e-06, + "loss": 0.4448, + "step": 7117 + }, + { + "epoch": 0.4334561398167037, + "grad_norm": 0.9876500196684243, + "learning_rate": 4.937540273320998e-06, + "loss": 0.4888, + "step": 7118 + }, + { + "epoch": 0.43351703559358157, + "grad_norm": 1.068044536534033, + "learning_rate": 4.937522546705564e-06, + "loss": 0.4212, + "step": 7119 + }, + { + "epoch": 0.43357793137045947, + "grad_norm": 1.0103701515483334, + "learning_rate": 4.937504817606832e-06, + "loss": 0.4818, + "step": 7120 + }, + { + "epoch": 0.4336388271473373, + "grad_norm": 0.9583081254053605, + "learning_rate": 4.937487086024821e-06, + "loss": 0.4835, + "step": 7121 + }, + { + "epoch": 0.4336997229242152, + "grad_norm": 1.0574715654278497, + "learning_rate": 4.9374693519595495e-06, + "loss": 0.458, + "step": 7122 + }, + { + "epoch": 0.43376061870109306, + "grad_norm": 1.0104196517506814, + "learning_rate": 4.937451615411034e-06, + "loss": 0.438, + "step": 7123 + }, + { + "epoch": 0.43382151447797096, + "grad_norm": 1.080897204134669, + "learning_rate": 4.937433876379294e-06, + "loss": 0.4884, + "step": 7124 + }, + { + "epoch": 0.4338824102548488, + "grad_norm": 1.0463319778066493, + "learning_rate": 4.937416134864347e-06, + "loss": 0.5322, + "step": 7125 + }, + { + "epoch": 0.4339433060317267, + "grad_norm": 1.0300216660184212, + "learning_rate": 4.937398390866211e-06, + "loss": 0.4583, + "step": 7126 + }, + { + "epoch": 0.43400420180860455, + "grad_norm": 1.10495803879854, + "learning_rate": 4.9373806443849045e-06, + "loss": 0.4302, + "step": 7127 + }, + { + "epoch": 0.43406509758548245, + "grad_norm": 1.0546796925582107, + "learning_rate": 4.937362895420445e-06, + "loss": 0.4467, + "step": 7128 + }, + { + "epoch": 0.4341259933623603, + "grad_norm": 0.9038056004819819, + "learning_rate": 4.937345143972851e-06, + "loss": 0.491, + "step": 7129 + }, + { + "epoch": 0.4341868891392382, + "grad_norm": 0.9847286495927526, + "learning_rate": 4.9373273900421405e-06, + "loss": 0.4733, + "step": 7130 + }, + { + "epoch": 0.43424778491611604, + "grad_norm": 1.0392213564169699, + "learning_rate": 4.937309633628331e-06, + "loss": 0.3771, + "step": 7131 + }, + { + "epoch": 0.43430868069299394, + "grad_norm": 1.0839067393012354, + "learning_rate": 4.937291874731441e-06, + "loss": 0.4596, + "step": 7132 + }, + { + "epoch": 0.4343695764698718, + "grad_norm": 1.0022123441914526, + "learning_rate": 4.93727411335149e-06, + "loss": 0.4679, + "step": 7133 + }, + { + "epoch": 0.4344304722467497, + "grad_norm": 1.0456732221606384, + "learning_rate": 4.937256349488493e-06, + "loss": 0.4628, + "step": 7134 + }, + { + "epoch": 0.4344913680236276, + "grad_norm": 1.0818087495984272, + "learning_rate": 4.937238583142472e-06, + "loss": 0.4391, + "step": 7135 + }, + { + "epoch": 0.43455226380050543, + "grad_norm": 0.9542866460723721, + "learning_rate": 4.937220814313441e-06, + "loss": 0.4264, + "step": 7136 + }, + { + "epoch": 0.43461315957738333, + "grad_norm": 1.0429017390458684, + "learning_rate": 4.937203043001421e-06, + "loss": 0.431, + "step": 7137 + }, + { + "epoch": 0.4346740553542612, + "grad_norm": 1.0534520453253942, + "learning_rate": 4.937185269206429e-06, + "loss": 0.4842, + "step": 7138 + }, + { + "epoch": 0.4347349511311391, + "grad_norm": 1.1381490184625294, + "learning_rate": 4.937167492928484e-06, + "loss": 0.3983, + "step": 7139 + }, + { + "epoch": 0.4347958469080169, + "grad_norm": 1.0748994133462277, + "learning_rate": 4.937149714167603e-06, + "loss": 0.3898, + "step": 7140 + }, + { + "epoch": 0.4348567426848948, + "grad_norm": 1.0231879655903215, + "learning_rate": 4.9371319329238045e-06, + "loss": 0.5229, + "step": 7141 + }, + { + "epoch": 0.43491763846177267, + "grad_norm": 1.000445344457481, + "learning_rate": 4.937114149197106e-06, + "loss": 0.4402, + "step": 7142 + }, + { + "epoch": 0.43497853423865057, + "grad_norm": 1.0995731814675522, + "learning_rate": 4.937096362987528e-06, + "loss": 0.441, + "step": 7143 + }, + { + "epoch": 0.4350394300155284, + "grad_norm": 0.9941223416568419, + "learning_rate": 4.937078574295085e-06, + "loss": 0.4522, + "step": 7144 + }, + { + "epoch": 0.4351003257924063, + "grad_norm": 1.0353178014160083, + "learning_rate": 4.937060783119798e-06, + "loss": 0.4671, + "step": 7145 + }, + { + "epoch": 0.43516122156928416, + "grad_norm": 1.0942088113085093, + "learning_rate": 4.937042989461684e-06, + "loss": 0.3881, + "step": 7146 + }, + { + "epoch": 0.43522211734616206, + "grad_norm": 0.9841655784277671, + "learning_rate": 4.937025193320762e-06, + "loss": 0.4676, + "step": 7147 + }, + { + "epoch": 0.4352830131230399, + "grad_norm": 1.0228441162541126, + "learning_rate": 4.937007394697048e-06, + "loss": 0.44, + "step": 7148 + }, + { + "epoch": 0.4353439088999178, + "grad_norm": 1.0599002253958922, + "learning_rate": 4.936989593590562e-06, + "loss": 0.4843, + "step": 7149 + }, + { + "epoch": 0.43540480467679565, + "grad_norm": 1.0445870357288252, + "learning_rate": 4.936971790001322e-06, + "loss": 0.3858, + "step": 7150 + }, + { + "epoch": 0.43546570045367355, + "grad_norm": 1.0187340694127824, + "learning_rate": 4.936953983929346e-06, + "loss": 0.3981, + "step": 7151 + }, + { + "epoch": 0.4355265962305514, + "grad_norm": 1.0087027700608464, + "learning_rate": 4.936936175374652e-06, + "loss": 0.4597, + "step": 7152 + }, + { + "epoch": 0.4355874920074293, + "grad_norm": 1.0345240063179189, + "learning_rate": 4.9369183643372566e-06, + "loss": 0.4461, + "step": 7153 + }, + { + "epoch": 0.43564838778430715, + "grad_norm": 1.0704355390742406, + "learning_rate": 4.936900550817181e-06, + "loss": 0.4531, + "step": 7154 + }, + { + "epoch": 0.43570928356118505, + "grad_norm": 1.078352773229725, + "learning_rate": 4.936882734814441e-06, + "loss": 0.3979, + "step": 7155 + }, + { + "epoch": 0.4357701793380629, + "grad_norm": 1.088381708106692, + "learning_rate": 4.936864916329056e-06, + "loss": 0.4411, + "step": 7156 + }, + { + "epoch": 0.4358310751149408, + "grad_norm": 0.9887209002503845, + "learning_rate": 4.936847095361044e-06, + "loss": 0.4778, + "step": 7157 + }, + { + "epoch": 0.43589197089181864, + "grad_norm": 1.0452793698523841, + "learning_rate": 4.936829271910421e-06, + "loss": 0.4359, + "step": 7158 + }, + { + "epoch": 0.43595286666869654, + "grad_norm": 0.8962171297415206, + "learning_rate": 4.936811445977209e-06, + "loss": 0.4463, + "step": 7159 + }, + { + "epoch": 0.4360137624455744, + "grad_norm": 1.0433837634886487, + "learning_rate": 4.9367936175614235e-06, + "loss": 0.4706, + "step": 7160 + }, + { + "epoch": 0.4360746582224523, + "grad_norm": 0.9979812537217342, + "learning_rate": 4.936775786663084e-06, + "loss": 0.4139, + "step": 7161 + }, + { + "epoch": 0.43613555399933013, + "grad_norm": 1.092315302636205, + "learning_rate": 4.936757953282207e-06, + "loss": 0.4066, + "step": 7162 + }, + { + "epoch": 0.43619644977620803, + "grad_norm": 0.9506331849030417, + "learning_rate": 4.9367401174188115e-06, + "loss": 0.4844, + "step": 7163 + }, + { + "epoch": 0.4362573455530859, + "grad_norm": 0.993679120924777, + "learning_rate": 4.9367222790729165e-06, + "loss": 0.4197, + "step": 7164 + }, + { + "epoch": 0.4363182413299638, + "grad_norm": 1.0041227154625225, + "learning_rate": 4.936704438244539e-06, + "loss": 0.4434, + "step": 7165 + }, + { + "epoch": 0.4363791371068416, + "grad_norm": 1.0668273479298038, + "learning_rate": 4.936686594933699e-06, + "loss": 0.4385, + "step": 7166 + }, + { + "epoch": 0.4364400328837195, + "grad_norm": 1.1055154774973153, + "learning_rate": 4.936668749140412e-06, + "loss": 0.5047, + "step": 7167 + }, + { + "epoch": 0.43650092866059736, + "grad_norm": 1.045481017536549, + "learning_rate": 4.936650900864698e-06, + "loss": 0.5166, + "step": 7168 + }, + { + "epoch": 0.43656182443747527, + "grad_norm": 1.1268498257145763, + "learning_rate": 4.936633050106574e-06, + "loss": 0.4728, + "step": 7169 + }, + { + "epoch": 0.4366227202143531, + "grad_norm": 0.987426318748915, + "learning_rate": 4.93661519686606e-06, + "loss": 0.4913, + "step": 7170 + }, + { + "epoch": 0.436683615991231, + "grad_norm": 1.0029035149527672, + "learning_rate": 4.936597341143172e-06, + "loss": 0.5284, + "step": 7171 + }, + { + "epoch": 0.43674451176810886, + "grad_norm": 1.1662672609119245, + "learning_rate": 4.936579482937931e-06, + "loss": 0.4409, + "step": 7172 + }, + { + "epoch": 0.43680540754498676, + "grad_norm": 1.0668791488875486, + "learning_rate": 4.936561622250352e-06, + "loss": 0.4519, + "step": 7173 + }, + { + "epoch": 0.4368663033218646, + "grad_norm": 1.0053309065504548, + "learning_rate": 4.936543759080455e-06, + "loss": 0.4368, + "step": 7174 + }, + { + "epoch": 0.4369271990987425, + "grad_norm": 1.0482231232373325, + "learning_rate": 4.936525893428258e-06, + "loss": 0.4934, + "step": 7175 + }, + { + "epoch": 0.4369880948756204, + "grad_norm": 0.9671227301366416, + "learning_rate": 4.936508025293779e-06, + "loss": 0.4621, + "step": 7176 + }, + { + "epoch": 0.43704899065249825, + "grad_norm": 0.9267957636017232, + "learning_rate": 4.936490154677036e-06, + "loss": 0.4814, + "step": 7177 + }, + { + "epoch": 0.43710988642937615, + "grad_norm": 0.9981997568321205, + "learning_rate": 4.936472281578047e-06, + "loss": 0.4838, + "step": 7178 + }, + { + "epoch": 0.437170782206254, + "grad_norm": 1.0785307981781407, + "learning_rate": 4.936454405996832e-06, + "loss": 0.4347, + "step": 7179 + }, + { + "epoch": 0.4372316779831319, + "grad_norm": 0.9728557170372407, + "learning_rate": 4.936436527933408e-06, + "loss": 0.4831, + "step": 7180 + }, + { + "epoch": 0.43729257376000974, + "grad_norm": 1.0352113688882916, + "learning_rate": 4.936418647387792e-06, + "loss": 0.5007, + "step": 7181 + }, + { + "epoch": 0.43735346953688764, + "grad_norm": 0.9690031655265869, + "learning_rate": 4.936400764360004e-06, + "loss": 0.3916, + "step": 7182 + }, + { + "epoch": 0.4374143653137655, + "grad_norm": 1.0310503252623207, + "learning_rate": 4.936382878850061e-06, + "loss": 0.4876, + "step": 7183 + }, + { + "epoch": 0.4374752610906434, + "grad_norm": 1.037427995809394, + "learning_rate": 4.936364990857983e-06, + "loss": 0.4498, + "step": 7184 + }, + { + "epoch": 0.43753615686752123, + "grad_norm": 1.1295536169305984, + "learning_rate": 4.936347100383786e-06, + "loss": 0.3962, + "step": 7185 + }, + { + "epoch": 0.43759705264439913, + "grad_norm": 0.9930325715715437, + "learning_rate": 4.936329207427489e-06, + "loss": 0.4374, + "step": 7186 + }, + { + "epoch": 0.437657948421277, + "grad_norm": 0.9697658784871676, + "learning_rate": 4.936311311989111e-06, + "loss": 0.5, + "step": 7187 + }, + { + "epoch": 0.4377188441981549, + "grad_norm": 1.1303722125677462, + "learning_rate": 4.9362934140686695e-06, + "loss": 0.4517, + "step": 7188 + }, + { + "epoch": 0.4377797399750327, + "grad_norm": 0.9691531038673106, + "learning_rate": 4.936275513666183e-06, + "loss": 0.4261, + "step": 7189 + }, + { + "epoch": 0.4378406357519106, + "grad_norm": 1.0629793003859291, + "learning_rate": 4.93625761078167e-06, + "loss": 0.4431, + "step": 7190 + }, + { + "epoch": 0.43790153152878847, + "grad_norm": 1.0349330836770902, + "learning_rate": 4.9362397054151476e-06, + "loss": 0.4626, + "step": 7191 + }, + { + "epoch": 0.43796242730566637, + "grad_norm": 1.048817896677172, + "learning_rate": 4.936221797566636e-06, + "loss": 0.4892, + "step": 7192 + }, + { + "epoch": 0.4380233230825442, + "grad_norm": 1.121295311179444, + "learning_rate": 4.936203887236151e-06, + "loss": 0.3971, + "step": 7193 + }, + { + "epoch": 0.4380842188594221, + "grad_norm": 1.0662702267845394, + "learning_rate": 4.9361859744237126e-06, + "loss": 0.45, + "step": 7194 + }, + { + "epoch": 0.43814511463629996, + "grad_norm": 1.1005947748928273, + "learning_rate": 4.936168059129339e-06, + "loss": 0.4183, + "step": 7195 + }, + { + "epoch": 0.43820601041317786, + "grad_norm": 0.9789167929872251, + "learning_rate": 4.936150141353047e-06, + "loss": 0.4119, + "step": 7196 + }, + { + "epoch": 0.4382669061900557, + "grad_norm": 1.0940556702760498, + "learning_rate": 4.936132221094857e-06, + "loss": 0.4404, + "step": 7197 + }, + { + "epoch": 0.4383278019669336, + "grad_norm": 1.0527833425271735, + "learning_rate": 4.936114298354786e-06, + "loss": 0.4573, + "step": 7198 + }, + { + "epoch": 0.43838869774381145, + "grad_norm": 1.0065547709341545, + "learning_rate": 4.9360963731328515e-06, + "loss": 0.4219, + "step": 7199 + }, + { + "epoch": 0.43844959352068935, + "grad_norm": 0.954669757886232, + "learning_rate": 4.9360784454290735e-06, + "loss": 0.4299, + "step": 7200 + }, + { + "epoch": 0.4385104892975672, + "grad_norm": 1.0280405412167883, + "learning_rate": 4.9360605152434695e-06, + "loss": 0.4603, + "step": 7201 + }, + { + "epoch": 0.4385713850744451, + "grad_norm": 1.0437248658431337, + "learning_rate": 4.936042582576057e-06, + "loss": 0.4543, + "step": 7202 + }, + { + "epoch": 0.43863228085132294, + "grad_norm": 0.9619793927138537, + "learning_rate": 4.9360246474268555e-06, + "loss": 0.4374, + "step": 7203 + }, + { + "epoch": 0.43869317662820084, + "grad_norm": 1.0626300043110717, + "learning_rate": 4.936006709795883e-06, + "loss": 0.4924, + "step": 7204 + }, + { + "epoch": 0.4387540724050787, + "grad_norm": 1.0816309333946716, + "learning_rate": 4.935988769683157e-06, + "loss": 0.4228, + "step": 7205 + }, + { + "epoch": 0.4388149681819566, + "grad_norm": 0.9673930963357514, + "learning_rate": 4.935970827088696e-06, + "loss": 0.4527, + "step": 7206 + }, + { + "epoch": 0.43887586395883443, + "grad_norm": 1.0682925071184193, + "learning_rate": 4.935952882012519e-06, + "loss": 0.423, + "step": 7207 + }, + { + "epoch": 0.43893675973571233, + "grad_norm": 1.032240291367969, + "learning_rate": 4.935934934454644e-06, + "loss": 0.462, + "step": 7208 + }, + { + "epoch": 0.4389976555125902, + "grad_norm": 1.0415584351563743, + "learning_rate": 4.935916984415089e-06, + "loss": 0.4683, + "step": 7209 + }, + { + "epoch": 0.4390585512894681, + "grad_norm": 0.9724967979983258, + "learning_rate": 4.935899031893873e-06, + "loss": 0.4272, + "step": 7210 + }, + { + "epoch": 0.4391194470663459, + "grad_norm": 1.1418079767400402, + "learning_rate": 4.935881076891013e-06, + "loss": 0.3959, + "step": 7211 + }, + { + "epoch": 0.4391803428432238, + "grad_norm": 1.0189868568635436, + "learning_rate": 4.935863119406528e-06, + "loss": 0.4802, + "step": 7212 + }, + { + "epoch": 0.43924123862010167, + "grad_norm": 0.9818597928960978, + "learning_rate": 4.935845159440435e-06, + "loss": 0.4793, + "step": 7213 + }, + { + "epoch": 0.43930213439697957, + "grad_norm": 0.9181510147765516, + "learning_rate": 4.935827196992756e-06, + "loss": 0.4552, + "step": 7214 + }, + { + "epoch": 0.4393630301738574, + "grad_norm": 1.1066069877537608, + "learning_rate": 4.935809232063505e-06, + "loss": 0.4433, + "step": 7215 + }, + { + "epoch": 0.4394239259507353, + "grad_norm": 0.981504036068618, + "learning_rate": 4.935791264652704e-06, + "loss": 0.4417, + "step": 7216 + }, + { + "epoch": 0.4394848217276132, + "grad_norm": 0.9979948817766211, + "learning_rate": 4.935773294760369e-06, + "loss": 0.4908, + "step": 7217 + }, + { + "epoch": 0.43954571750449106, + "grad_norm": 0.9124366034028293, + "learning_rate": 4.935755322386517e-06, + "loss": 0.5051, + "step": 7218 + }, + { + "epoch": 0.43960661328136896, + "grad_norm": 1.0597469747462063, + "learning_rate": 4.93573734753117e-06, + "loss": 0.4416, + "step": 7219 + }, + { + "epoch": 0.4396675090582468, + "grad_norm": 1.056285256349971, + "learning_rate": 4.935719370194344e-06, + "loss": 0.3897, + "step": 7220 + }, + { + "epoch": 0.4397284048351247, + "grad_norm": 1.1765673109023065, + "learning_rate": 4.935701390376057e-06, + "loss": 0.4886, + "step": 7221 + }, + { + "epoch": 0.43978930061200255, + "grad_norm": 1.034978892894336, + "learning_rate": 4.9356834080763295e-06, + "loss": 0.4227, + "step": 7222 + }, + { + "epoch": 0.43985019638888045, + "grad_norm": 1.0463863572630805, + "learning_rate": 4.935665423295177e-06, + "loss": 0.412, + "step": 7223 + }, + { + "epoch": 0.4399110921657583, + "grad_norm": 0.9686586931233305, + "learning_rate": 4.9356474360326204e-06, + "loss": 0.43, + "step": 7224 + }, + { + "epoch": 0.4399719879426362, + "grad_norm": 1.0622755281453584, + "learning_rate": 4.935629446288676e-06, + "loss": 0.4799, + "step": 7225 + }, + { + "epoch": 0.44003288371951405, + "grad_norm": 1.0320147104850963, + "learning_rate": 4.935611454063364e-06, + "loss": 0.4375, + "step": 7226 + }, + { + "epoch": 0.44009377949639195, + "grad_norm": 0.9745034858177507, + "learning_rate": 4.9355934593567e-06, + "loss": 0.4211, + "step": 7227 + }, + { + "epoch": 0.4401546752732698, + "grad_norm": 1.1221695585023308, + "learning_rate": 4.935575462168705e-06, + "loss": 0.428, + "step": 7228 + }, + { + "epoch": 0.4402155710501477, + "grad_norm": 0.9455276973337272, + "learning_rate": 4.935557462499396e-06, + "loss": 0.5145, + "step": 7229 + }, + { + "epoch": 0.44027646682702554, + "grad_norm": 0.9939544126851209, + "learning_rate": 4.935539460348793e-06, + "loss": 0.4706, + "step": 7230 + }, + { + "epoch": 0.44033736260390344, + "grad_norm": 1.1837427424144529, + "learning_rate": 4.935521455716912e-06, + "loss": 0.4222, + "step": 7231 + }, + { + "epoch": 0.4403982583807813, + "grad_norm": 1.0943471616089913, + "learning_rate": 4.935503448603772e-06, + "loss": 0.4218, + "step": 7232 + }, + { + "epoch": 0.4404591541576592, + "grad_norm": 1.060743223269677, + "learning_rate": 4.935485439009392e-06, + "loss": 0.4804, + "step": 7233 + }, + { + "epoch": 0.44052004993453703, + "grad_norm": 1.0901607468779393, + "learning_rate": 4.93546742693379e-06, + "loss": 0.4651, + "step": 7234 + }, + { + "epoch": 0.44058094571141493, + "grad_norm": 1.0579541650059776, + "learning_rate": 4.935449412376985e-06, + "loss": 0.4449, + "step": 7235 + }, + { + "epoch": 0.4406418414882928, + "grad_norm": 1.0074546239209186, + "learning_rate": 4.935431395338994e-06, + "loss": 0.4901, + "step": 7236 + }, + { + "epoch": 0.4407027372651707, + "grad_norm": 0.9787527195166797, + "learning_rate": 4.9354133758198365e-06, + "loss": 0.4207, + "step": 7237 + }, + { + "epoch": 0.4407636330420485, + "grad_norm": 0.9987710653268562, + "learning_rate": 4.935395353819531e-06, + "loss": 0.4728, + "step": 7238 + }, + { + "epoch": 0.4408245288189264, + "grad_norm": 1.0397361189019958, + "learning_rate": 4.9353773293380945e-06, + "loss": 0.503, + "step": 7239 + }, + { + "epoch": 0.44088542459580427, + "grad_norm": 1.0714125721108727, + "learning_rate": 4.935359302375547e-06, + "loss": 0.4311, + "step": 7240 + }, + { + "epoch": 0.44094632037268217, + "grad_norm": 0.9328561512269657, + "learning_rate": 4.9353412729319054e-06, + "loss": 0.5172, + "step": 7241 + }, + { + "epoch": 0.44100721614956, + "grad_norm": 1.0970715116289957, + "learning_rate": 4.935323241007189e-06, + "loss": 0.4041, + "step": 7242 + }, + { + "epoch": 0.4410681119264379, + "grad_norm": 1.1222463345439022, + "learning_rate": 4.935305206601415e-06, + "loss": 0.4134, + "step": 7243 + }, + { + "epoch": 0.44112900770331576, + "grad_norm": 1.0283764620826261, + "learning_rate": 4.935287169714604e-06, + "loss": 0.4262, + "step": 7244 + }, + { + "epoch": 0.44118990348019366, + "grad_norm": 0.9915555241159775, + "learning_rate": 4.935269130346772e-06, + "loss": 0.4731, + "step": 7245 + }, + { + "epoch": 0.4412507992570715, + "grad_norm": 1.006928867150729, + "learning_rate": 4.93525108849794e-06, + "loss": 0.4542, + "step": 7246 + }, + { + "epoch": 0.4413116950339494, + "grad_norm": 0.9480828485734594, + "learning_rate": 4.935233044168123e-06, + "loss": 0.4606, + "step": 7247 + }, + { + "epoch": 0.44137259081082725, + "grad_norm": 1.0545184174735345, + "learning_rate": 4.935214997357343e-06, + "loss": 0.4338, + "step": 7248 + }, + { + "epoch": 0.44143348658770515, + "grad_norm": 1.048325753737733, + "learning_rate": 4.9351969480656155e-06, + "loss": 0.4684, + "step": 7249 + }, + { + "epoch": 0.441494382364583, + "grad_norm": 1.1162227191266734, + "learning_rate": 4.93517889629296e-06, + "loss": 0.397, + "step": 7250 + }, + { + "epoch": 0.4415552781414609, + "grad_norm": 1.0175302705806375, + "learning_rate": 4.9351608420393945e-06, + "loss": 0.4383, + "step": 7251 + }, + { + "epoch": 0.44161617391833874, + "grad_norm": 0.980201468796836, + "learning_rate": 4.935142785304939e-06, + "loss": 0.5573, + "step": 7252 + }, + { + "epoch": 0.44167706969521664, + "grad_norm": 0.9212483096615662, + "learning_rate": 4.93512472608961e-06, + "loss": 0.4731, + "step": 7253 + }, + { + "epoch": 0.4417379654720945, + "grad_norm": 0.9399981971024571, + "learning_rate": 4.9351066643934265e-06, + "loss": 0.5709, + "step": 7254 + }, + { + "epoch": 0.4417988612489724, + "grad_norm": 1.0339409178673993, + "learning_rate": 4.9350886002164065e-06, + "loss": 0.5009, + "step": 7255 + }, + { + "epoch": 0.44185975702585023, + "grad_norm": 0.99834007858473, + "learning_rate": 4.935070533558569e-06, + "loss": 0.4185, + "step": 7256 + }, + { + "epoch": 0.44192065280272813, + "grad_norm": 1.0624000752243477, + "learning_rate": 4.935052464419933e-06, + "loss": 0.4682, + "step": 7257 + }, + { + "epoch": 0.44198154857960603, + "grad_norm": 0.973687227151754, + "learning_rate": 4.935034392800516e-06, + "loss": 0.5543, + "step": 7258 + }, + { + "epoch": 0.4420424443564839, + "grad_norm": 1.0068182193734654, + "learning_rate": 4.9350163187003365e-06, + "loss": 0.4552, + "step": 7259 + }, + { + "epoch": 0.4421033401333618, + "grad_norm": 0.9843116036250437, + "learning_rate": 4.934998242119412e-06, + "loss": 0.4141, + "step": 7260 + }, + { + "epoch": 0.4421642359102396, + "grad_norm": 1.027228857976661, + "learning_rate": 4.934980163057763e-06, + "loss": 0.4377, + "step": 7261 + }, + { + "epoch": 0.4422251316871175, + "grad_norm": 0.9952628868174551, + "learning_rate": 4.934962081515406e-06, + "loss": 0.45, + "step": 7262 + }, + { + "epoch": 0.44228602746399537, + "grad_norm": 1.0404973070159145, + "learning_rate": 4.934943997492362e-06, + "loss": 0.4257, + "step": 7263 + }, + { + "epoch": 0.44234692324087327, + "grad_norm": 1.0497437472552167, + "learning_rate": 4.934925910988646e-06, + "loss": 0.4401, + "step": 7264 + }, + { + "epoch": 0.4424078190177511, + "grad_norm": 1.0203829488020648, + "learning_rate": 4.934907822004279e-06, + "loss": 0.4826, + "step": 7265 + }, + { + "epoch": 0.442468714794629, + "grad_norm": 0.965779806275302, + "learning_rate": 4.934889730539278e-06, + "loss": 0.468, + "step": 7266 + }, + { + "epoch": 0.44252961057150686, + "grad_norm": 1.1110817052238522, + "learning_rate": 4.9348716365936625e-06, + "loss": 0.4116, + "step": 7267 + }, + { + "epoch": 0.44259050634838476, + "grad_norm": 1.0502052498915118, + "learning_rate": 4.9348535401674495e-06, + "loss": 0.4554, + "step": 7268 + }, + { + "epoch": 0.4426514021252626, + "grad_norm": 0.9995581945257171, + "learning_rate": 4.934835441260659e-06, + "loss": 0.4096, + "step": 7269 + }, + { + "epoch": 0.4427122979021405, + "grad_norm": 0.9411506518630881, + "learning_rate": 4.934817339873309e-06, + "loss": 0.46, + "step": 7270 + }, + { + "epoch": 0.44277319367901835, + "grad_norm": 1.0338899812622364, + "learning_rate": 4.934799236005417e-06, + "loss": 0.4019, + "step": 7271 + }, + { + "epoch": 0.44283408945589625, + "grad_norm": 1.0467458656485693, + "learning_rate": 4.934781129657002e-06, + "loss": 0.4861, + "step": 7272 + }, + { + "epoch": 0.4428949852327741, + "grad_norm": 0.9814579153885988, + "learning_rate": 4.934763020828084e-06, + "loss": 0.4664, + "step": 7273 + }, + { + "epoch": 0.442955881009652, + "grad_norm": 1.001744679966709, + "learning_rate": 4.934744909518679e-06, + "loss": 0.4201, + "step": 7274 + }, + { + "epoch": 0.44301677678652984, + "grad_norm": 1.1118500804279066, + "learning_rate": 4.934726795728806e-06, + "loss": 0.4945, + "step": 7275 + }, + { + "epoch": 0.44307767256340774, + "grad_norm": 0.984748827990905, + "learning_rate": 4.934708679458486e-06, + "loss": 0.4588, + "step": 7276 + }, + { + "epoch": 0.4431385683402856, + "grad_norm": 1.111309715531734, + "learning_rate": 4.934690560707733e-06, + "loss": 0.4331, + "step": 7277 + }, + { + "epoch": 0.4431994641171635, + "grad_norm": 1.0880103374217203, + "learning_rate": 4.934672439476569e-06, + "loss": 0.4029, + "step": 7278 + }, + { + "epoch": 0.44326035989404133, + "grad_norm": 1.0024598913450125, + "learning_rate": 4.934654315765012e-06, + "loss": 0.4102, + "step": 7279 + }, + { + "epoch": 0.44332125567091923, + "grad_norm": 0.9812509377506362, + "learning_rate": 4.934636189573079e-06, + "loss": 0.4824, + "step": 7280 + }, + { + "epoch": 0.4433821514477971, + "grad_norm": 0.986229534933318, + "learning_rate": 4.934618060900789e-06, + "loss": 0.4734, + "step": 7281 + }, + { + "epoch": 0.443443047224675, + "grad_norm": 1.0394075385850179, + "learning_rate": 4.934599929748161e-06, + "loss": 0.4671, + "step": 7282 + }, + { + "epoch": 0.4435039430015528, + "grad_norm": 0.9701527796737699, + "learning_rate": 4.934581796115213e-06, + "loss": 0.4522, + "step": 7283 + }, + { + "epoch": 0.4435648387784307, + "grad_norm": 0.9287471671589884, + "learning_rate": 4.934563660001964e-06, + "loss": 0.4317, + "step": 7284 + }, + { + "epoch": 0.44362573455530857, + "grad_norm": 0.992955101088432, + "learning_rate": 4.934545521408433e-06, + "loss": 0.4483, + "step": 7285 + }, + { + "epoch": 0.44368663033218647, + "grad_norm": 1.0800469122952792, + "learning_rate": 4.934527380334636e-06, + "loss": 0.4826, + "step": 7286 + }, + { + "epoch": 0.4437475261090643, + "grad_norm": 1.019098113015284, + "learning_rate": 4.934509236780593e-06, + "loss": 0.4416, + "step": 7287 + }, + { + "epoch": 0.4438084218859422, + "grad_norm": 0.9956572795705897, + "learning_rate": 4.9344910907463246e-06, + "loss": 0.4239, + "step": 7288 + }, + { + "epoch": 0.44386931766282006, + "grad_norm": 1.0032293861974197, + "learning_rate": 4.934472942231846e-06, + "loss": 0.4889, + "step": 7289 + }, + { + "epoch": 0.44393021343969796, + "grad_norm": 1.0791753977792846, + "learning_rate": 4.934454791237177e-06, + "loss": 0.4341, + "step": 7290 + }, + { + "epoch": 0.4439911092165758, + "grad_norm": 1.0351698214727922, + "learning_rate": 4.934436637762337e-06, + "loss": 0.372, + "step": 7291 + }, + { + "epoch": 0.4440520049934537, + "grad_norm": 0.997017010764517, + "learning_rate": 4.934418481807342e-06, + "loss": 0.4451, + "step": 7292 + }, + { + "epoch": 0.44411290077033155, + "grad_norm": 1.003930700282819, + "learning_rate": 4.934400323372213e-06, + "loss": 0.5023, + "step": 7293 + }, + { + "epoch": 0.44417379654720945, + "grad_norm": 1.0858477059927678, + "learning_rate": 4.9343821624569675e-06, + "loss": 0.4453, + "step": 7294 + }, + { + "epoch": 0.4442346923240873, + "grad_norm": 0.9669911845327068, + "learning_rate": 4.9343639990616234e-06, + "loss": 0.4515, + "step": 7295 + }, + { + "epoch": 0.4442955881009652, + "grad_norm": 1.0738477376536049, + "learning_rate": 4.934345833186201e-06, + "loss": 0.4189, + "step": 7296 + }, + { + "epoch": 0.44435648387784304, + "grad_norm": 0.9888363452631758, + "learning_rate": 4.934327664830717e-06, + "loss": 0.4929, + "step": 7297 + }, + { + "epoch": 0.44441737965472095, + "grad_norm": 1.0163203845157402, + "learning_rate": 4.934309493995191e-06, + "loss": 0.4685, + "step": 7298 + }, + { + "epoch": 0.44447827543159885, + "grad_norm": 0.9834149795001147, + "learning_rate": 4.934291320679641e-06, + "loss": 0.4571, + "step": 7299 + }, + { + "epoch": 0.4445391712084767, + "grad_norm": 1.0112421769174458, + "learning_rate": 4.934273144884085e-06, + "loss": 0.4842, + "step": 7300 + }, + { + "epoch": 0.4446000669853546, + "grad_norm": 1.0251551149354612, + "learning_rate": 4.934254966608543e-06, + "loss": 0.4434, + "step": 7301 + }, + { + "epoch": 0.44466096276223244, + "grad_norm": 1.0385533563452014, + "learning_rate": 4.934236785853032e-06, + "loss": 0.45, + "step": 7302 + }, + { + "epoch": 0.44472185853911034, + "grad_norm": 1.0272465991243627, + "learning_rate": 4.9342186026175705e-06, + "loss": 0.4941, + "step": 7303 + }, + { + "epoch": 0.4447827543159882, + "grad_norm": 1.0091444629616264, + "learning_rate": 4.93420041690218e-06, + "loss": 0.4544, + "step": 7304 + }, + { + "epoch": 0.4448436500928661, + "grad_norm": 1.1017991407737997, + "learning_rate": 4.934182228706875e-06, + "loss": 0.4521, + "step": 7305 + }, + { + "epoch": 0.44490454586974393, + "grad_norm": 0.9520079579142149, + "learning_rate": 4.934164038031676e-06, + "loss": 0.4144, + "step": 7306 + }, + { + "epoch": 0.44496544164662183, + "grad_norm": 0.9853790608542587, + "learning_rate": 4.934145844876601e-06, + "loss": 0.4546, + "step": 7307 + }, + { + "epoch": 0.4450263374234997, + "grad_norm": 0.9927983734101863, + "learning_rate": 4.934127649241669e-06, + "loss": 0.4899, + "step": 7308 + }, + { + "epoch": 0.4450872332003776, + "grad_norm": 1.065237586880423, + "learning_rate": 4.934109451126899e-06, + "loss": 0.4616, + "step": 7309 + }, + { + "epoch": 0.4451481289772554, + "grad_norm": 1.0599718971366985, + "learning_rate": 4.934091250532308e-06, + "loss": 0.4483, + "step": 7310 + }, + { + "epoch": 0.4452090247541333, + "grad_norm": 1.038722053106346, + "learning_rate": 4.934073047457915e-06, + "loss": 0.4764, + "step": 7311 + }, + { + "epoch": 0.44526992053101117, + "grad_norm": 1.034930102970806, + "learning_rate": 4.934054841903741e-06, + "loss": 0.496, + "step": 7312 + }, + { + "epoch": 0.44533081630788907, + "grad_norm": 1.0263504429633639, + "learning_rate": 4.934036633869801e-06, + "loss": 0.4985, + "step": 7313 + }, + { + "epoch": 0.4453917120847669, + "grad_norm": 0.954428480082317, + "learning_rate": 4.934018423356116e-06, + "loss": 0.4612, + "step": 7314 + }, + { + "epoch": 0.4454526078616448, + "grad_norm": 1.0380811013425528, + "learning_rate": 4.934000210362703e-06, + "loss": 0.4657, + "step": 7315 + }, + { + "epoch": 0.44551350363852266, + "grad_norm": 0.9495157565936104, + "learning_rate": 4.933981994889581e-06, + "loss": 0.4711, + "step": 7316 + }, + { + "epoch": 0.44557439941540056, + "grad_norm": 1.0904241048974326, + "learning_rate": 4.933963776936769e-06, + "loss": 0.3848, + "step": 7317 + }, + { + "epoch": 0.4456352951922784, + "grad_norm": 1.0373619497252407, + "learning_rate": 4.933945556504285e-06, + "loss": 0.4438, + "step": 7318 + }, + { + "epoch": 0.4456961909691563, + "grad_norm": 1.0336632995772932, + "learning_rate": 4.933927333592149e-06, + "loss": 0.3951, + "step": 7319 + }, + { + "epoch": 0.44575708674603415, + "grad_norm": 1.0799052088267815, + "learning_rate": 4.933909108200377e-06, + "loss": 0.4595, + "step": 7320 + }, + { + "epoch": 0.44581798252291205, + "grad_norm": 1.0099203850808276, + "learning_rate": 4.9338908803289896e-06, + "loss": 0.4398, + "step": 7321 + }, + { + "epoch": 0.4458788782997899, + "grad_norm": 1.1121078312599473, + "learning_rate": 4.933872649978005e-06, + "loss": 0.425, + "step": 7322 + }, + { + "epoch": 0.4459397740766678, + "grad_norm": 1.0883079788901158, + "learning_rate": 4.933854417147441e-06, + "loss": 0.4651, + "step": 7323 + }, + { + "epoch": 0.44600066985354564, + "grad_norm": 0.935454529114295, + "learning_rate": 4.9338361818373174e-06, + "loss": 0.4861, + "step": 7324 + }, + { + "epoch": 0.44606156563042354, + "grad_norm": 1.0128105645785883, + "learning_rate": 4.9338179440476515e-06, + "loss": 0.4766, + "step": 7325 + }, + { + "epoch": 0.4461224614073014, + "grad_norm": 0.9554429473832693, + "learning_rate": 4.9337997037784635e-06, + "loss": 0.4631, + "step": 7326 + }, + { + "epoch": 0.4461833571841793, + "grad_norm": 1.0575443557323876, + "learning_rate": 4.9337814610297695e-06, + "loss": 0.4145, + "step": 7327 + }, + { + "epoch": 0.44624425296105713, + "grad_norm": 0.9671778998589864, + "learning_rate": 4.933763215801591e-06, + "loss": 0.465, + "step": 7328 + }, + { + "epoch": 0.44630514873793503, + "grad_norm": 1.0531062950625234, + "learning_rate": 4.933744968093944e-06, + "loss": 0.4208, + "step": 7329 + }, + { + "epoch": 0.4463660445148129, + "grad_norm": 1.034118447337677, + "learning_rate": 4.933726717906849e-06, + "loss": 0.4414, + "step": 7330 + }, + { + "epoch": 0.4464269402916908, + "grad_norm": 1.023714250189287, + "learning_rate": 4.933708465240323e-06, + "loss": 0.4582, + "step": 7331 + }, + { + "epoch": 0.4464878360685686, + "grad_norm": 0.985682795812779, + "learning_rate": 4.933690210094386e-06, + "loss": 0.429, + "step": 7332 + }, + { + "epoch": 0.4465487318454465, + "grad_norm": 0.9788396541696571, + "learning_rate": 4.933671952469056e-06, + "loss": 0.5453, + "step": 7333 + }, + { + "epoch": 0.44660962762232437, + "grad_norm": 1.0665741088419052, + "learning_rate": 4.933653692364352e-06, + "loss": 0.4297, + "step": 7334 + }, + { + "epoch": 0.44667052339920227, + "grad_norm": 1.0167996962054406, + "learning_rate": 4.933635429780291e-06, + "loss": 0.4956, + "step": 7335 + }, + { + "epoch": 0.4467314191760801, + "grad_norm": 1.0956211682592765, + "learning_rate": 4.933617164716894e-06, + "loss": 0.3805, + "step": 7336 + }, + { + "epoch": 0.446792314952958, + "grad_norm": 1.0044193106606811, + "learning_rate": 4.933598897174178e-06, + "loss": 0.4546, + "step": 7337 + }, + { + "epoch": 0.44685321072983586, + "grad_norm": 0.8911059740857264, + "learning_rate": 4.933580627152162e-06, + "loss": 0.5478, + "step": 7338 + }, + { + "epoch": 0.44691410650671376, + "grad_norm": 1.0516666623508266, + "learning_rate": 4.9335623546508645e-06, + "loss": 0.4319, + "step": 7339 + }, + { + "epoch": 0.44697500228359166, + "grad_norm": 1.1005769286440323, + "learning_rate": 4.933544079670304e-06, + "loss": 0.4622, + "step": 7340 + }, + { + "epoch": 0.4470358980604695, + "grad_norm": 1.0804731155690794, + "learning_rate": 4.9335258022105e-06, + "loss": 0.4825, + "step": 7341 + }, + { + "epoch": 0.4470967938373474, + "grad_norm": 1.011506050620435, + "learning_rate": 4.93350752227147e-06, + "loss": 0.4792, + "step": 7342 + }, + { + "epoch": 0.44715768961422525, + "grad_norm": 1.0197844438136592, + "learning_rate": 4.933489239853234e-06, + "loss": 0.4629, + "step": 7343 + }, + { + "epoch": 0.44721858539110315, + "grad_norm": 1.06995840354818, + "learning_rate": 4.933470954955809e-06, + "loss": 0.491, + "step": 7344 + }, + { + "epoch": 0.447279481167981, + "grad_norm": 1.0575762770118746, + "learning_rate": 4.933452667579215e-06, + "loss": 0.498, + "step": 7345 + }, + { + "epoch": 0.4473403769448589, + "grad_norm": 1.0357950807121394, + "learning_rate": 4.933434377723469e-06, + "loss": 0.4427, + "step": 7346 + }, + { + "epoch": 0.44740127272173674, + "grad_norm": 0.9570855681319796, + "learning_rate": 4.9334160853885915e-06, + "loss": 0.4354, + "step": 7347 + }, + { + "epoch": 0.44746216849861464, + "grad_norm": 1.1157934181011848, + "learning_rate": 4.9333977905746e-06, + "loss": 0.4187, + "step": 7348 + }, + { + "epoch": 0.4475230642754925, + "grad_norm": 0.9729921894865784, + "learning_rate": 4.933379493281514e-06, + "loss": 0.4602, + "step": 7349 + }, + { + "epoch": 0.4475839600523704, + "grad_norm": 1.0449068895370273, + "learning_rate": 4.933361193509351e-06, + "loss": 0.3993, + "step": 7350 + }, + { + "epoch": 0.44764485582924823, + "grad_norm": 1.0273846338227244, + "learning_rate": 4.93334289125813e-06, + "loss": 0.4268, + "step": 7351 + }, + { + "epoch": 0.44770575160612613, + "grad_norm": 0.9884138865101164, + "learning_rate": 4.93332458652787e-06, + "loss": 0.4483, + "step": 7352 + }, + { + "epoch": 0.447766647383004, + "grad_norm": 1.0281421547585625, + "learning_rate": 4.93330627931859e-06, + "loss": 0.444, + "step": 7353 + }, + { + "epoch": 0.4478275431598819, + "grad_norm": 1.1575063893309854, + "learning_rate": 4.9332879696303074e-06, + "loss": 0.3997, + "step": 7354 + }, + { + "epoch": 0.4478884389367597, + "grad_norm": 1.024448311510132, + "learning_rate": 4.933269657463041e-06, + "loss": 0.4614, + "step": 7355 + }, + { + "epoch": 0.4479493347136376, + "grad_norm": 1.008793560689516, + "learning_rate": 4.933251342816811e-06, + "loss": 0.4771, + "step": 7356 + }, + { + "epoch": 0.44801023049051547, + "grad_norm": 1.0806585620957594, + "learning_rate": 4.933233025691636e-06, + "loss": 0.4117, + "step": 7357 + }, + { + "epoch": 0.44807112626739337, + "grad_norm": 1.0498729405285028, + "learning_rate": 4.933214706087533e-06, + "loss": 0.4597, + "step": 7358 + }, + { + "epoch": 0.4481320220442712, + "grad_norm": 1.052800215888779, + "learning_rate": 4.933196384004521e-06, + "loss": 0.4693, + "step": 7359 + }, + { + "epoch": 0.4481929178211491, + "grad_norm": 1.001559399962401, + "learning_rate": 4.933178059442619e-06, + "loss": 0.4292, + "step": 7360 + }, + { + "epoch": 0.44825381359802696, + "grad_norm": 1.1343155857473608, + "learning_rate": 4.9331597324018465e-06, + "loss": 0.4597, + "step": 7361 + }, + { + "epoch": 0.44831470937490486, + "grad_norm": 1.0199986625525408, + "learning_rate": 4.933141402882221e-06, + "loss": 0.489, + "step": 7362 + }, + { + "epoch": 0.4483756051517827, + "grad_norm": 1.0554293629555378, + "learning_rate": 4.9331230708837614e-06, + "loss": 0.411, + "step": 7363 + }, + { + "epoch": 0.4484365009286606, + "grad_norm": 0.9396617443827018, + "learning_rate": 4.933104736406487e-06, + "loss": 0.4759, + "step": 7364 + }, + { + "epoch": 0.44849739670553845, + "grad_norm": 0.9522359679257205, + "learning_rate": 4.933086399450417e-06, + "loss": 0.4548, + "step": 7365 + }, + { + "epoch": 0.44855829248241635, + "grad_norm": 0.9864468913911251, + "learning_rate": 4.933068060015568e-06, + "loss": 0.4564, + "step": 7366 + }, + { + "epoch": 0.4486191882592942, + "grad_norm": 0.9759141361861251, + "learning_rate": 4.93304971810196e-06, + "loss": 0.4664, + "step": 7367 + }, + { + "epoch": 0.4486800840361721, + "grad_norm": 1.0393496508651203, + "learning_rate": 4.933031373709611e-06, + "loss": 0.4052, + "step": 7368 + }, + { + "epoch": 0.44874097981304994, + "grad_norm": 1.0948315726222801, + "learning_rate": 4.933013026838542e-06, + "loss": 0.4644, + "step": 7369 + }, + { + "epoch": 0.44880187558992785, + "grad_norm": 0.9850671821355472, + "learning_rate": 4.932994677488769e-06, + "loss": 0.4178, + "step": 7370 + }, + { + "epoch": 0.4488627713668057, + "grad_norm": 1.0140755182510848, + "learning_rate": 4.932976325660311e-06, + "loss": 0.4368, + "step": 7371 + }, + { + "epoch": 0.4489236671436836, + "grad_norm": 0.9858879986588635, + "learning_rate": 4.9329579713531875e-06, + "loss": 0.3999, + "step": 7372 + }, + { + "epoch": 0.44898456292056144, + "grad_norm": 1.0578891250550975, + "learning_rate": 4.932939614567417e-06, + "loss": 0.4774, + "step": 7373 + }, + { + "epoch": 0.44904545869743934, + "grad_norm": 1.0497506030507293, + "learning_rate": 4.932921255303018e-06, + "loss": 0.4742, + "step": 7374 + }, + { + "epoch": 0.4491063544743172, + "grad_norm": 0.9740334260044125, + "learning_rate": 4.932902893560011e-06, + "loss": 0.4694, + "step": 7375 + }, + { + "epoch": 0.4491672502511951, + "grad_norm": 1.0023026894570104, + "learning_rate": 4.932884529338411e-06, + "loss": 0.469, + "step": 7376 + }, + { + "epoch": 0.4492281460280729, + "grad_norm": 1.0883474015768375, + "learning_rate": 4.93286616263824e-06, + "loss": 0.5051, + "step": 7377 + }, + { + "epoch": 0.44928904180495083, + "grad_norm": 0.984819282909335, + "learning_rate": 4.932847793459515e-06, + "loss": 0.5028, + "step": 7378 + }, + { + "epoch": 0.4493499375818287, + "grad_norm": 0.9321292941557476, + "learning_rate": 4.932829421802256e-06, + "loss": 0.4777, + "step": 7379 + }, + { + "epoch": 0.4494108333587066, + "grad_norm": 1.040894490390894, + "learning_rate": 4.932811047666481e-06, + "loss": 0.4315, + "step": 7380 + }, + { + "epoch": 0.4494717291355845, + "grad_norm": 0.9587327130572632, + "learning_rate": 4.9327926710522075e-06, + "loss": 0.4477, + "step": 7381 + }, + { + "epoch": 0.4495326249124623, + "grad_norm": 1.0072849984966425, + "learning_rate": 4.932774291959456e-06, + "loss": 0.4022, + "step": 7382 + }, + { + "epoch": 0.4495935206893402, + "grad_norm": 0.9710007579410049, + "learning_rate": 4.932755910388244e-06, + "loss": 0.4978, + "step": 7383 + }, + { + "epoch": 0.44965441646621807, + "grad_norm": 1.019490889778817, + "learning_rate": 4.932737526338592e-06, + "loss": 0.4681, + "step": 7384 + }, + { + "epoch": 0.44971531224309597, + "grad_norm": 1.004426210696819, + "learning_rate": 4.932719139810518e-06, + "loss": 0.4405, + "step": 7385 + }, + { + "epoch": 0.4497762080199738, + "grad_norm": 1.0103786058658966, + "learning_rate": 4.932700750804039e-06, + "loss": 0.4533, + "step": 7386 + }, + { + "epoch": 0.4498371037968517, + "grad_norm": 1.032088413317692, + "learning_rate": 4.932682359319175e-06, + "loss": 0.4447, + "step": 7387 + }, + { + "epoch": 0.44989799957372956, + "grad_norm": 0.9599091748829901, + "learning_rate": 4.932663965355945e-06, + "loss": 0.4709, + "step": 7388 + }, + { + "epoch": 0.44995889535060746, + "grad_norm": 1.0541048425774984, + "learning_rate": 4.932645568914368e-06, + "loss": 0.4279, + "step": 7389 + }, + { + "epoch": 0.4500197911274853, + "grad_norm": 0.953646309566318, + "learning_rate": 4.932627169994462e-06, + "loss": 0.4288, + "step": 7390 + }, + { + "epoch": 0.4500806869043632, + "grad_norm": 0.990383763401407, + "learning_rate": 4.9326087685962464e-06, + "loss": 0.4812, + "step": 7391 + }, + { + "epoch": 0.45014158268124105, + "grad_norm": 1.0816636635080314, + "learning_rate": 4.932590364719739e-06, + "loss": 0.4106, + "step": 7392 + }, + { + "epoch": 0.45020247845811895, + "grad_norm": 1.0516646062964516, + "learning_rate": 4.93257195836496e-06, + "loss": 0.4602, + "step": 7393 + }, + { + "epoch": 0.4502633742349968, + "grad_norm": 0.9922952796717079, + "learning_rate": 4.9325535495319265e-06, + "loss": 0.4789, + "step": 7394 + }, + { + "epoch": 0.4503242700118747, + "grad_norm": 1.066287826136348, + "learning_rate": 4.932535138220658e-06, + "loss": 0.4563, + "step": 7395 + }, + { + "epoch": 0.45038516578875254, + "grad_norm": 0.9866583131117493, + "learning_rate": 4.9325167244311735e-06, + "loss": 0.4569, + "step": 7396 + }, + { + "epoch": 0.45044606156563044, + "grad_norm": 1.046049212308855, + "learning_rate": 4.932498308163492e-06, + "loss": 0.435, + "step": 7397 + }, + { + "epoch": 0.4505069573425083, + "grad_norm": 1.0174775064348012, + "learning_rate": 4.9324798894176304e-06, + "loss": 0.4134, + "step": 7398 + }, + { + "epoch": 0.4505678531193862, + "grad_norm": 0.9928422339472004, + "learning_rate": 4.93246146819361e-06, + "loss": 0.4897, + "step": 7399 + }, + { + "epoch": 0.45062874889626403, + "grad_norm": 1.0225066656987338, + "learning_rate": 4.932443044491449e-06, + "loss": 0.438, + "step": 7400 + }, + { + "epoch": 0.45068964467314193, + "grad_norm": 1.0333694042270587, + "learning_rate": 4.932424618311164e-06, + "loss": 0.4288, + "step": 7401 + }, + { + "epoch": 0.4507505404500198, + "grad_norm": 1.044552232029265, + "learning_rate": 4.932406189652776e-06, + "loss": 0.4699, + "step": 7402 + }, + { + "epoch": 0.4508114362268977, + "grad_norm": 1.1133610355051136, + "learning_rate": 4.932387758516303e-06, + "loss": 0.4369, + "step": 7403 + }, + { + "epoch": 0.4508723320037755, + "grad_norm": 1.034803014701502, + "learning_rate": 4.9323693249017645e-06, + "loss": 0.5156, + "step": 7404 + }, + { + "epoch": 0.4509332277806534, + "grad_norm": 0.958011165186229, + "learning_rate": 4.932350888809178e-06, + "loss": 0.4394, + "step": 7405 + }, + { + "epoch": 0.45099412355753127, + "grad_norm": 1.0424422644344555, + "learning_rate": 4.932332450238564e-06, + "loss": 0.4388, + "step": 7406 + }, + { + "epoch": 0.45105501933440917, + "grad_norm": 0.9361314976681343, + "learning_rate": 4.932314009189939e-06, + "loss": 0.496, + "step": 7407 + }, + { + "epoch": 0.451115915111287, + "grad_norm": 1.0161443015567817, + "learning_rate": 4.932295565663324e-06, + "loss": 0.4873, + "step": 7408 + }, + { + "epoch": 0.4511768108881649, + "grad_norm": 0.9881028396273039, + "learning_rate": 4.9322771196587366e-06, + "loss": 0.4673, + "step": 7409 + }, + { + "epoch": 0.45123770666504276, + "grad_norm": 1.0295053595321386, + "learning_rate": 4.9322586711761954e-06, + "loss": 0.4377, + "step": 7410 + }, + { + "epoch": 0.45129860244192066, + "grad_norm": 1.0196667018598555, + "learning_rate": 4.93224022021572e-06, + "loss": 0.4031, + "step": 7411 + }, + { + "epoch": 0.4513594982187985, + "grad_norm": 1.1143769930354233, + "learning_rate": 4.932221766777329e-06, + "loss": 0.488, + "step": 7412 + }, + { + "epoch": 0.4514203939956764, + "grad_norm": 0.9793706253849925, + "learning_rate": 4.9322033108610415e-06, + "loss": 0.4515, + "step": 7413 + }, + { + "epoch": 0.45148128977255425, + "grad_norm": 0.9171379828777257, + "learning_rate": 4.932184852466875e-06, + "loss": 0.5312, + "step": 7414 + }, + { + "epoch": 0.45154218554943215, + "grad_norm": 1.1238103757092197, + "learning_rate": 4.932166391594849e-06, + "loss": 0.3837, + "step": 7415 + }, + { + "epoch": 0.45160308132631, + "grad_norm": 1.0453290304203857, + "learning_rate": 4.932147928244983e-06, + "loss": 0.4052, + "step": 7416 + }, + { + "epoch": 0.4516639771031879, + "grad_norm": 1.0435206731007836, + "learning_rate": 4.932129462417295e-06, + "loss": 0.3995, + "step": 7417 + }, + { + "epoch": 0.45172487288006574, + "grad_norm": 0.9692313671030741, + "learning_rate": 4.932110994111804e-06, + "loss": 0.4601, + "step": 7418 + }, + { + "epoch": 0.45178576865694364, + "grad_norm": 1.0920226357206932, + "learning_rate": 4.932092523328529e-06, + "loss": 0.4426, + "step": 7419 + }, + { + "epoch": 0.4518466644338215, + "grad_norm": 0.9827759654288231, + "learning_rate": 4.932074050067489e-06, + "loss": 0.4769, + "step": 7420 + }, + { + "epoch": 0.4519075602106994, + "grad_norm": 0.9604123447303515, + "learning_rate": 4.932055574328703e-06, + "loss": 0.5136, + "step": 7421 + }, + { + "epoch": 0.4519684559875773, + "grad_norm": 1.1061074096492636, + "learning_rate": 4.932037096112188e-06, + "loss": 0.4086, + "step": 7422 + }, + { + "epoch": 0.45202935176445513, + "grad_norm": 0.9119312571408844, + "learning_rate": 4.932018615417965e-06, + "loss": 0.4664, + "step": 7423 + }, + { + "epoch": 0.45209024754133303, + "grad_norm": 1.0956459662832594, + "learning_rate": 4.932000132246052e-06, + "loss": 0.4722, + "step": 7424 + }, + { + "epoch": 0.4521511433182109, + "grad_norm": 0.9765489660554908, + "learning_rate": 4.931981646596467e-06, + "loss": 0.5148, + "step": 7425 + }, + { + "epoch": 0.4522120390950888, + "grad_norm": 0.9482616089848922, + "learning_rate": 4.93196315846923e-06, + "loss": 0.501, + "step": 7426 + }, + { + "epoch": 0.4522729348719666, + "grad_norm": 0.9219764286645625, + "learning_rate": 4.93194466786436e-06, + "loss": 0.5119, + "step": 7427 + }, + { + "epoch": 0.4523338306488445, + "grad_norm": 0.8977244425429998, + "learning_rate": 4.9319261747818745e-06, + "loss": 0.5052, + "step": 7428 + }, + { + "epoch": 0.45239472642572237, + "grad_norm": 1.0632710897612336, + "learning_rate": 4.9319076792217945e-06, + "loss": 0.5076, + "step": 7429 + }, + { + "epoch": 0.45245562220260027, + "grad_norm": 1.0322116319178465, + "learning_rate": 4.931889181184136e-06, + "loss": 0.4614, + "step": 7430 + }, + { + "epoch": 0.4525165179794781, + "grad_norm": 0.967841519970793, + "learning_rate": 4.931870680668921e-06, + "loss": 0.4332, + "step": 7431 + }, + { + "epoch": 0.452577413756356, + "grad_norm": 1.0185679294404733, + "learning_rate": 4.931852177676165e-06, + "loss": 0.4894, + "step": 7432 + }, + { + "epoch": 0.45263830953323386, + "grad_norm": 1.0243172122855078, + "learning_rate": 4.93183367220589e-06, + "loss": 0.4414, + "step": 7433 + }, + { + "epoch": 0.45269920531011176, + "grad_norm": 1.0415089280291496, + "learning_rate": 4.9318151642581124e-06, + "loss": 0.474, + "step": 7434 + }, + { + "epoch": 0.4527601010869896, + "grad_norm": 1.0532491504784331, + "learning_rate": 4.931796653832852e-06, + "loss": 0.433, + "step": 7435 + }, + { + "epoch": 0.4528209968638675, + "grad_norm": 1.0852362332552357, + "learning_rate": 4.931778140930129e-06, + "loss": 0.5107, + "step": 7436 + }, + { + "epoch": 0.45288189264074535, + "grad_norm": 1.1313698331925133, + "learning_rate": 4.93175962554996e-06, + "loss": 0.5146, + "step": 7437 + }, + { + "epoch": 0.45294278841762325, + "grad_norm": 0.9112947023860014, + "learning_rate": 4.931741107692365e-06, + "loss": 0.4847, + "step": 7438 + }, + { + "epoch": 0.4530036841945011, + "grad_norm": 1.0283730301138805, + "learning_rate": 4.931722587357363e-06, + "loss": 0.4956, + "step": 7439 + }, + { + "epoch": 0.453064579971379, + "grad_norm": 0.9670402875596027, + "learning_rate": 4.931704064544972e-06, + "loss": 0.4187, + "step": 7440 + }, + { + "epoch": 0.45312547574825685, + "grad_norm": 1.050040178071287, + "learning_rate": 4.931685539255212e-06, + "loss": 0.4466, + "step": 7441 + }, + { + "epoch": 0.45318637152513475, + "grad_norm": 1.0079823213233787, + "learning_rate": 4.9316670114881015e-06, + "loss": 0.3866, + "step": 7442 + }, + { + "epoch": 0.4532472673020126, + "grad_norm": 0.977790453270162, + "learning_rate": 4.931648481243658e-06, + "loss": 0.4157, + "step": 7443 + }, + { + "epoch": 0.4533081630788905, + "grad_norm": 0.9531603672688395, + "learning_rate": 4.931629948521903e-06, + "loss": 0.5459, + "step": 7444 + }, + { + "epoch": 0.45336905885576834, + "grad_norm": 0.9825180514462307, + "learning_rate": 4.9316114133228535e-06, + "loss": 0.4478, + "step": 7445 + }, + { + "epoch": 0.45342995463264624, + "grad_norm": 1.057454661908373, + "learning_rate": 4.931592875646528e-06, + "loss": 0.3496, + "step": 7446 + }, + { + "epoch": 0.4534908504095241, + "grad_norm": 1.001964031632285, + "learning_rate": 4.931574335492947e-06, + "loss": 0.4253, + "step": 7447 + }, + { + "epoch": 0.453551746186402, + "grad_norm": 1.0576911617806861, + "learning_rate": 4.93155579286213e-06, + "loss": 0.5164, + "step": 7448 + }, + { + "epoch": 0.45361264196327983, + "grad_norm": 1.117385416761931, + "learning_rate": 4.9315372477540925e-06, + "loss": 0.3975, + "step": 7449 + }, + { + "epoch": 0.45367353774015773, + "grad_norm": 1.0122672243821254, + "learning_rate": 4.931518700168856e-06, + "loss": 0.4614, + "step": 7450 + }, + { + "epoch": 0.4537344335170356, + "grad_norm": 1.045692578117085, + "learning_rate": 4.931500150106438e-06, + "loss": 0.513, + "step": 7451 + }, + { + "epoch": 0.4537953292939135, + "grad_norm": 1.0322849987095075, + "learning_rate": 4.931481597566859e-06, + "loss": 0.3914, + "step": 7452 + }, + { + "epoch": 0.4538562250707913, + "grad_norm": 1.0999710079983553, + "learning_rate": 4.931463042550137e-06, + "loss": 0.4317, + "step": 7453 + }, + { + "epoch": 0.4539171208476692, + "grad_norm": 1.0173154975137277, + "learning_rate": 4.9314444850562915e-06, + "loss": 0.5106, + "step": 7454 + }, + { + "epoch": 0.45397801662454706, + "grad_norm": 1.0924462945813784, + "learning_rate": 4.931425925085341e-06, + "loss": 0.3801, + "step": 7455 + }, + { + "epoch": 0.45403891240142497, + "grad_norm": 0.9627186875424879, + "learning_rate": 4.9314073626373035e-06, + "loss": 0.4619, + "step": 7456 + }, + { + "epoch": 0.4540998081783028, + "grad_norm": 1.0202912697824114, + "learning_rate": 4.931388797712199e-06, + "loss": 0.4955, + "step": 7457 + }, + { + "epoch": 0.4541607039551807, + "grad_norm": 1.0384823720395626, + "learning_rate": 4.931370230310046e-06, + "loss": 0.4189, + "step": 7458 + }, + { + "epoch": 0.45422159973205856, + "grad_norm": 1.1297759875148612, + "learning_rate": 4.931351660430864e-06, + "loss": 0.5056, + "step": 7459 + }, + { + "epoch": 0.45428249550893646, + "grad_norm": 0.9786160384520766, + "learning_rate": 4.931333088074671e-06, + "loss": 0.4992, + "step": 7460 + }, + { + "epoch": 0.4543433912858143, + "grad_norm": 1.0102342748821773, + "learning_rate": 4.931314513241487e-06, + "loss": 0.4572, + "step": 7461 + }, + { + "epoch": 0.4544042870626922, + "grad_norm": 0.9803542341789964, + "learning_rate": 4.9312959359313295e-06, + "loss": 0.4224, + "step": 7462 + }, + { + "epoch": 0.4544651828395701, + "grad_norm": 1.062361032753291, + "learning_rate": 4.931277356144219e-06, + "loss": 0.5363, + "step": 7463 + }, + { + "epoch": 0.45452607861644795, + "grad_norm": 0.9520614847969547, + "learning_rate": 4.931258773880173e-06, + "loss": 0.4115, + "step": 7464 + }, + { + "epoch": 0.45458697439332585, + "grad_norm": 1.0743819299433177, + "learning_rate": 4.931240189139212e-06, + "loss": 0.3898, + "step": 7465 + }, + { + "epoch": 0.4546478701702037, + "grad_norm": 1.04325207532882, + "learning_rate": 4.9312216019213535e-06, + "loss": 0.4217, + "step": 7466 + }, + { + "epoch": 0.4547087659470816, + "grad_norm": 1.0684224933553412, + "learning_rate": 4.931203012226617e-06, + "loss": 0.439, + "step": 7467 + }, + { + "epoch": 0.45476966172395944, + "grad_norm": 0.9999332256504392, + "learning_rate": 4.931184420055022e-06, + "loss": 0.4613, + "step": 7468 + }, + { + "epoch": 0.45483055750083734, + "grad_norm": 0.9560195846115628, + "learning_rate": 4.9311658254065855e-06, + "loss": 0.4641, + "step": 7469 + }, + { + "epoch": 0.4548914532777152, + "grad_norm": 0.9593908031029864, + "learning_rate": 4.931147228281329e-06, + "loss": 0.443, + "step": 7470 + }, + { + "epoch": 0.4549523490545931, + "grad_norm": 1.0649298515788572, + "learning_rate": 4.931128628679269e-06, + "loss": 0.4253, + "step": 7471 + }, + { + "epoch": 0.45501324483147093, + "grad_norm": 1.0768283177028168, + "learning_rate": 4.931110026600428e-06, + "loss": 0.3643, + "step": 7472 + }, + { + "epoch": 0.45507414060834883, + "grad_norm": 0.9892102656019351, + "learning_rate": 4.9310914220448204e-06, + "loss": 0.4667, + "step": 7473 + }, + { + "epoch": 0.4551350363852267, + "grad_norm": 1.055975054613745, + "learning_rate": 4.931072815012468e-06, + "loss": 0.398, + "step": 7474 + }, + { + "epoch": 0.4551959321621046, + "grad_norm": 0.9885264764814726, + "learning_rate": 4.931054205503389e-06, + "loss": 0.4848, + "step": 7475 + }, + { + "epoch": 0.4552568279389824, + "grad_norm": 0.9233090031968517, + "learning_rate": 4.931035593517604e-06, + "loss": 0.4957, + "step": 7476 + }, + { + "epoch": 0.4553177237158603, + "grad_norm": 1.1333029128908831, + "learning_rate": 4.931016979055129e-06, + "loss": 0.3885, + "step": 7477 + }, + { + "epoch": 0.45537861949273817, + "grad_norm": 0.9874134045451549, + "learning_rate": 4.930998362115985e-06, + "loss": 0.4461, + "step": 7478 + }, + { + "epoch": 0.45543951526961607, + "grad_norm": 1.0504876720381844, + "learning_rate": 4.930979742700189e-06, + "loss": 0.4515, + "step": 7479 + }, + { + "epoch": 0.4555004110464939, + "grad_norm": 1.0180022271555333, + "learning_rate": 4.930961120807763e-06, + "loss": 0.4745, + "step": 7480 + }, + { + "epoch": 0.4555613068233718, + "grad_norm": 1.0594802464631854, + "learning_rate": 4.930942496438725e-06, + "loss": 0.4482, + "step": 7481 + }, + { + "epoch": 0.45562220260024966, + "grad_norm": 1.0491069043792614, + "learning_rate": 4.930923869593091e-06, + "loss": 0.4397, + "step": 7482 + }, + { + "epoch": 0.45568309837712756, + "grad_norm": 1.1734696150189163, + "learning_rate": 4.930905240270884e-06, + "loss": 0.4539, + "step": 7483 + }, + { + "epoch": 0.4557439941540054, + "grad_norm": 0.9836566056564968, + "learning_rate": 4.930886608472121e-06, + "loss": 0.5187, + "step": 7484 + }, + { + "epoch": 0.4558048899308833, + "grad_norm": 0.9885284381198015, + "learning_rate": 4.930867974196821e-06, + "loss": 0.5666, + "step": 7485 + }, + { + "epoch": 0.45586578570776115, + "grad_norm": 0.9818507192199726, + "learning_rate": 4.930849337445003e-06, + "loss": 0.4527, + "step": 7486 + }, + { + "epoch": 0.45592668148463905, + "grad_norm": 0.9728924064122573, + "learning_rate": 4.930830698216687e-06, + "loss": 0.4337, + "step": 7487 + }, + { + "epoch": 0.4559875772615169, + "grad_norm": 1.0033425341363154, + "learning_rate": 4.930812056511891e-06, + "loss": 0.5056, + "step": 7488 + }, + { + "epoch": 0.4560484730383948, + "grad_norm": 0.936444447247266, + "learning_rate": 4.930793412330633e-06, + "loss": 0.5234, + "step": 7489 + }, + { + "epoch": 0.45610936881527264, + "grad_norm": 1.1374258775125845, + "learning_rate": 4.930774765672935e-06, + "loss": 0.4548, + "step": 7490 + }, + { + "epoch": 0.45617026459215054, + "grad_norm": 1.0031159258031455, + "learning_rate": 4.930756116538814e-06, + "loss": 0.4104, + "step": 7491 + }, + { + "epoch": 0.4562311603690284, + "grad_norm": 0.9582834435979617, + "learning_rate": 4.9307374649282876e-06, + "loss": 0.4305, + "step": 7492 + }, + { + "epoch": 0.4562920561459063, + "grad_norm": 0.9715925603774189, + "learning_rate": 4.930718810841378e-06, + "loss": 0.3838, + "step": 7493 + }, + { + "epoch": 0.45635295192278413, + "grad_norm": 1.0539054527374427, + "learning_rate": 4.930700154278102e-06, + "loss": 0.3982, + "step": 7494 + }, + { + "epoch": 0.45641384769966203, + "grad_norm": 0.9949069057557306, + "learning_rate": 4.93068149523848e-06, + "loss": 0.5215, + "step": 7495 + }, + { + "epoch": 0.4564747434765399, + "grad_norm": 0.9943428022957613, + "learning_rate": 4.930662833722529e-06, + "loss": 0.3905, + "step": 7496 + }, + { + "epoch": 0.4565356392534178, + "grad_norm": 1.132542891858728, + "learning_rate": 4.93064416973027e-06, + "loss": 0.5353, + "step": 7497 + }, + { + "epoch": 0.4565965350302956, + "grad_norm": 0.8884026275770566, + "learning_rate": 4.930625503261721e-06, + "loss": 0.4737, + "step": 7498 + }, + { + "epoch": 0.4566574308071735, + "grad_norm": 1.1078601586549588, + "learning_rate": 4.930606834316901e-06, + "loss": 0.464, + "step": 7499 + }, + { + "epoch": 0.45671832658405137, + "grad_norm": 0.9734168930640096, + "learning_rate": 4.9305881628958296e-06, + "loss": 0.4716, + "step": 7500 + }, + { + "epoch": 0.45677922236092927, + "grad_norm": 1.0921746107630095, + "learning_rate": 4.930569488998526e-06, + "loss": 0.4642, + "step": 7501 + }, + { + "epoch": 0.4568401181378071, + "grad_norm": 1.0521411894396548, + "learning_rate": 4.9305508126250075e-06, + "loss": 0.4966, + "step": 7502 + }, + { + "epoch": 0.456901013914685, + "grad_norm": 1.0380368809115907, + "learning_rate": 4.930532133775295e-06, + "loss": 0.4385, + "step": 7503 + }, + { + "epoch": 0.4569619096915629, + "grad_norm": 1.0325931273850661, + "learning_rate": 4.930513452449407e-06, + "loss": 0.4297, + "step": 7504 + }, + { + "epoch": 0.45702280546844076, + "grad_norm": 1.0622764401700204, + "learning_rate": 4.930494768647363e-06, + "loss": 0.4514, + "step": 7505 + }, + { + "epoch": 0.45708370124531866, + "grad_norm": 0.9832442275245167, + "learning_rate": 4.93047608236918e-06, + "loss": 0.4627, + "step": 7506 + }, + { + "epoch": 0.4571445970221965, + "grad_norm": 1.058689960736043, + "learning_rate": 4.930457393614879e-06, + "loss": 0.441, + "step": 7507 + }, + { + "epoch": 0.4572054927990744, + "grad_norm": 0.9752313915726505, + "learning_rate": 4.930438702384479e-06, + "loss": 0.4501, + "step": 7508 + }, + { + "epoch": 0.45726638857595225, + "grad_norm": 1.0314483096204048, + "learning_rate": 4.930420008677999e-06, + "loss": 0.3729, + "step": 7509 + }, + { + "epoch": 0.45732728435283015, + "grad_norm": 1.0916503272328653, + "learning_rate": 4.930401312495457e-06, + "loss": 0.3906, + "step": 7510 + }, + { + "epoch": 0.457388180129708, + "grad_norm": 0.9630200016680743, + "learning_rate": 4.9303826138368726e-06, + "loss": 0.4432, + "step": 7511 + }, + { + "epoch": 0.4574490759065859, + "grad_norm": 1.0242004615436482, + "learning_rate": 4.9303639127022655e-06, + "loss": 0.4591, + "step": 7512 + }, + { + "epoch": 0.45750997168346375, + "grad_norm": 1.0014878244502106, + "learning_rate": 4.930345209091654e-06, + "loss": 0.5157, + "step": 7513 + }, + { + "epoch": 0.45757086746034165, + "grad_norm": 1.0521044078066697, + "learning_rate": 4.930326503005057e-06, + "loss": 0.4461, + "step": 7514 + }, + { + "epoch": 0.4576317632372195, + "grad_norm": 1.042549987169014, + "learning_rate": 4.9303077944424936e-06, + "loss": 0.4541, + "step": 7515 + }, + { + "epoch": 0.4576926590140974, + "grad_norm": 1.0316256782952804, + "learning_rate": 4.930289083403984e-06, + "loss": 0.4364, + "step": 7516 + }, + { + "epoch": 0.45775355479097524, + "grad_norm": 0.9317699256616583, + "learning_rate": 4.930270369889546e-06, + "loss": 0.5461, + "step": 7517 + }, + { + "epoch": 0.45781445056785314, + "grad_norm": 1.081722536214359, + "learning_rate": 4.930251653899198e-06, + "loss": 0.4996, + "step": 7518 + }, + { + "epoch": 0.457875346344731, + "grad_norm": 0.9841456108840312, + "learning_rate": 4.930232935432961e-06, + "loss": 0.5144, + "step": 7519 + }, + { + "epoch": 0.4579362421216089, + "grad_norm": 0.9855558336315361, + "learning_rate": 4.930214214490854e-06, + "loss": 0.4514, + "step": 7520 + }, + { + "epoch": 0.45799713789848673, + "grad_norm": 1.075516890838076, + "learning_rate": 4.9301954910728945e-06, + "loss": 0.4113, + "step": 7521 + }, + { + "epoch": 0.45805803367536463, + "grad_norm": 1.0163754333723383, + "learning_rate": 4.930176765179103e-06, + "loss": 0.5286, + "step": 7522 + }, + { + "epoch": 0.4581189294522425, + "grad_norm": 0.9462027308074071, + "learning_rate": 4.930158036809497e-06, + "loss": 0.4974, + "step": 7523 + }, + { + "epoch": 0.4581798252291204, + "grad_norm": 1.0379358895183994, + "learning_rate": 4.930139305964097e-06, + "loss": 0.3762, + "step": 7524 + }, + { + "epoch": 0.4582407210059982, + "grad_norm": 1.0017991463997684, + "learning_rate": 4.930120572642922e-06, + "loss": 0.5435, + "step": 7525 + }, + { + "epoch": 0.4583016167828761, + "grad_norm": 0.9084664066250825, + "learning_rate": 4.930101836845989e-06, + "loss": 0.4791, + "step": 7526 + }, + { + "epoch": 0.45836251255975397, + "grad_norm": 1.1231264444624431, + "learning_rate": 4.930083098573321e-06, + "loss": 0.4402, + "step": 7527 + }, + { + "epoch": 0.45842340833663187, + "grad_norm": 1.0453918044836152, + "learning_rate": 4.9300643578249345e-06, + "loss": 0.4928, + "step": 7528 + }, + { + "epoch": 0.4584843041135097, + "grad_norm": 0.9772756452333127, + "learning_rate": 4.930045614600848e-06, + "loss": 0.48, + "step": 7529 + }, + { + "epoch": 0.4585451998903876, + "grad_norm": 1.0424289519188832, + "learning_rate": 4.930026868901082e-06, + "loss": 0.3834, + "step": 7530 + }, + { + "epoch": 0.45860609566726546, + "grad_norm": 1.0546189956073915, + "learning_rate": 4.930008120725655e-06, + "loss": 0.4572, + "step": 7531 + }, + { + "epoch": 0.45866699144414336, + "grad_norm": 1.0503025611051562, + "learning_rate": 4.929989370074586e-06, + "loss": 0.4158, + "step": 7532 + }, + { + "epoch": 0.4587278872210212, + "grad_norm": 1.0168427918556346, + "learning_rate": 4.929970616947895e-06, + "loss": 0.5061, + "step": 7533 + }, + { + "epoch": 0.4587887829978991, + "grad_norm": 1.1095434703367997, + "learning_rate": 4.9299518613456e-06, + "loss": 0.3918, + "step": 7534 + }, + { + "epoch": 0.45884967877477695, + "grad_norm": 1.2100016338819057, + "learning_rate": 4.92993310326772e-06, + "loss": 0.4674, + "step": 7535 + }, + { + "epoch": 0.45891057455165485, + "grad_norm": 1.0597040902343557, + "learning_rate": 4.9299143427142755e-06, + "loss": 0.4364, + "step": 7536 + }, + { + "epoch": 0.4589714703285327, + "grad_norm": 1.005481209558817, + "learning_rate": 4.9298955796852846e-06, + "loss": 0.4347, + "step": 7537 + }, + { + "epoch": 0.4590323661054106, + "grad_norm": 1.0551208181648255, + "learning_rate": 4.929876814180767e-06, + "loss": 0.4534, + "step": 7538 + }, + { + "epoch": 0.45909326188228844, + "grad_norm": 1.001615128724044, + "learning_rate": 4.9298580462007405e-06, + "loss": 0.4149, + "step": 7539 + }, + { + "epoch": 0.45915415765916634, + "grad_norm": 0.9213864998245573, + "learning_rate": 4.9298392757452264e-06, + "loss": 0.4411, + "step": 7540 + }, + { + "epoch": 0.4592150534360442, + "grad_norm": 0.9986711055397143, + "learning_rate": 4.929820502814242e-06, + "loss": 0.4809, + "step": 7541 + }, + { + "epoch": 0.4592759492129221, + "grad_norm": 1.1508463533482525, + "learning_rate": 4.929801727407806e-06, + "loss": 0.3933, + "step": 7542 + }, + { + "epoch": 0.45933684498979993, + "grad_norm": 0.9751892808769443, + "learning_rate": 4.929782949525939e-06, + "loss": 0.4488, + "step": 7543 + }, + { + "epoch": 0.45939774076667783, + "grad_norm": 1.026229353312782, + "learning_rate": 4.92976416916866e-06, + "loss": 0.3533, + "step": 7544 + }, + { + "epoch": 0.45945863654355573, + "grad_norm": 0.9889928925079071, + "learning_rate": 4.929745386335989e-06, + "loss": 0.43, + "step": 7545 + }, + { + "epoch": 0.4595195323204336, + "grad_norm": 0.9465705149328636, + "learning_rate": 4.929726601027942e-06, + "loss": 0.4665, + "step": 7546 + }, + { + "epoch": 0.4595804280973115, + "grad_norm": 1.0659465063617921, + "learning_rate": 4.929707813244541e-06, + "loss": 0.4881, + "step": 7547 + }, + { + "epoch": 0.4596413238741893, + "grad_norm": 1.0159503366589955, + "learning_rate": 4.929689022985804e-06, + "loss": 0.504, + "step": 7548 + }, + { + "epoch": 0.4597022196510672, + "grad_norm": 1.064733707484081, + "learning_rate": 4.929670230251749e-06, + "loss": 0.4358, + "step": 7549 + }, + { + "epoch": 0.45976311542794507, + "grad_norm": 1.1028307437732447, + "learning_rate": 4.929651435042399e-06, + "loss": 0.4905, + "step": 7550 + }, + { + "epoch": 0.45982401120482297, + "grad_norm": 1.0022974802994955, + "learning_rate": 4.929632637357769e-06, + "loss": 0.4857, + "step": 7551 + }, + { + "epoch": 0.4598849069817008, + "grad_norm": 1.0726958745313238, + "learning_rate": 4.92961383719788e-06, + "loss": 0.4775, + "step": 7552 + }, + { + "epoch": 0.4599458027585787, + "grad_norm": 0.9027288811766855, + "learning_rate": 4.9295950345627515e-06, + "loss": 0.4907, + "step": 7553 + }, + { + "epoch": 0.46000669853545656, + "grad_norm": 1.0499487469923832, + "learning_rate": 4.929576229452402e-06, + "loss": 0.433, + "step": 7554 + }, + { + "epoch": 0.46006759431233446, + "grad_norm": 1.091029088690275, + "learning_rate": 4.92955742186685e-06, + "loss": 0.4296, + "step": 7555 + }, + { + "epoch": 0.4601284900892123, + "grad_norm": 0.9995079711512255, + "learning_rate": 4.9295386118061166e-06, + "loss": 0.5194, + "step": 7556 + }, + { + "epoch": 0.4601893858660902, + "grad_norm": 1.009060149415835, + "learning_rate": 4.929519799270218e-06, + "loss": 0.4091, + "step": 7557 + }, + { + "epoch": 0.46025028164296805, + "grad_norm": 0.9136975817981404, + "learning_rate": 4.929500984259177e-06, + "loss": 0.4338, + "step": 7558 + }, + { + "epoch": 0.46031117741984595, + "grad_norm": 1.0826718735467324, + "learning_rate": 4.92948216677301e-06, + "loss": 0.4252, + "step": 7559 + }, + { + "epoch": 0.4603720731967238, + "grad_norm": 0.9604009851000546, + "learning_rate": 4.929463346811737e-06, + "loss": 0.499, + "step": 7560 + }, + { + "epoch": 0.4604329689736017, + "grad_norm": 0.9813974724640105, + "learning_rate": 4.929444524375378e-06, + "loss": 0.4392, + "step": 7561 + }, + { + "epoch": 0.46049386475047954, + "grad_norm": 1.063118380434815, + "learning_rate": 4.92942569946395e-06, + "loss": 0.3963, + "step": 7562 + }, + { + "epoch": 0.46055476052735744, + "grad_norm": 1.07046367103653, + "learning_rate": 4.929406872077475e-06, + "loss": 0.5129, + "step": 7563 + }, + { + "epoch": 0.4606156563042353, + "grad_norm": 1.086895600592622, + "learning_rate": 4.92938804221597e-06, + "loss": 0.4056, + "step": 7564 + }, + { + "epoch": 0.4606765520811132, + "grad_norm": 1.0139457428090042, + "learning_rate": 4.929369209879455e-06, + "loss": 0.5269, + "step": 7565 + }, + { + "epoch": 0.46073744785799103, + "grad_norm": 0.9702604452889886, + "learning_rate": 4.92935037506795e-06, + "loss": 0.4919, + "step": 7566 + }, + { + "epoch": 0.46079834363486893, + "grad_norm": 1.002226044609161, + "learning_rate": 4.9293315377814725e-06, + "loss": 0.5121, + "step": 7567 + }, + { + "epoch": 0.4608592394117468, + "grad_norm": 1.0095442036299604, + "learning_rate": 4.929312698020042e-06, + "loss": 0.4493, + "step": 7568 + }, + { + "epoch": 0.4609201351886247, + "grad_norm": 0.9902589957371549, + "learning_rate": 4.929293855783679e-06, + "loss": 0.4065, + "step": 7569 + }, + { + "epoch": 0.4609810309655025, + "grad_norm": 1.0753203389561983, + "learning_rate": 4.929275011072402e-06, + "loss": 0.3933, + "step": 7570 + }, + { + "epoch": 0.4610419267423804, + "grad_norm": 1.0495891809228035, + "learning_rate": 4.92925616388623e-06, + "loss": 0.483, + "step": 7571 + }, + { + "epoch": 0.46110282251925827, + "grad_norm": 0.9734117552229143, + "learning_rate": 4.929237314225183e-06, + "loss": 0.4319, + "step": 7572 + }, + { + "epoch": 0.46116371829613617, + "grad_norm": 1.079311241081342, + "learning_rate": 4.9292184620892786e-06, + "loss": 0.3836, + "step": 7573 + }, + { + "epoch": 0.461224614073014, + "grad_norm": 0.9409308280817809, + "learning_rate": 4.929199607478537e-06, + "loss": 0.5032, + "step": 7574 + }, + { + "epoch": 0.4612855098498919, + "grad_norm": 1.0099444043043042, + "learning_rate": 4.9291807503929776e-06, + "loss": 0.4243, + "step": 7575 + }, + { + "epoch": 0.46134640562676976, + "grad_norm": 1.0210989065420248, + "learning_rate": 4.92916189083262e-06, + "loss": 0.4747, + "step": 7576 + }, + { + "epoch": 0.46140730140364766, + "grad_norm": 1.0916214177184014, + "learning_rate": 4.929143028797482e-06, + "loss": 0.4182, + "step": 7577 + }, + { + "epoch": 0.4614681971805255, + "grad_norm": 0.9855177019503384, + "learning_rate": 4.929124164287583e-06, + "loss": 0.4252, + "step": 7578 + }, + { + "epoch": 0.4615290929574034, + "grad_norm": 1.0521792637314435, + "learning_rate": 4.929105297302944e-06, + "loss": 0.435, + "step": 7579 + }, + { + "epoch": 0.46158998873428125, + "grad_norm": 1.0669719071455088, + "learning_rate": 4.929086427843582e-06, + "loss": 0.4312, + "step": 7580 + }, + { + "epoch": 0.46165088451115915, + "grad_norm": 1.041478762960134, + "learning_rate": 4.929067555909518e-06, + "loss": 0.432, + "step": 7581 + }, + { + "epoch": 0.461711780288037, + "grad_norm": 1.1269842749148524, + "learning_rate": 4.92904868150077e-06, + "loss": 0.4099, + "step": 7582 + }, + { + "epoch": 0.4617726760649149, + "grad_norm": 0.9919979384020785, + "learning_rate": 4.929029804617358e-06, + "loss": 0.4825, + "step": 7583 + }, + { + "epoch": 0.46183357184179274, + "grad_norm": 1.1441739722244624, + "learning_rate": 4.929010925259301e-06, + "loss": 0.4338, + "step": 7584 + }, + { + "epoch": 0.46189446761867065, + "grad_norm": 1.0084421410109006, + "learning_rate": 4.928992043426618e-06, + "loss": 0.4043, + "step": 7585 + }, + { + "epoch": 0.46195536339554855, + "grad_norm": 0.9952459549276826, + "learning_rate": 4.928973159119329e-06, + "loss": 0.465, + "step": 7586 + }, + { + "epoch": 0.4620162591724264, + "grad_norm": 1.0610623597593538, + "learning_rate": 4.928954272337452e-06, + "loss": 0.4116, + "step": 7587 + }, + { + "epoch": 0.4620771549493043, + "grad_norm": 1.022572383288637, + "learning_rate": 4.928935383081006e-06, + "loss": 0.4482, + "step": 7588 + }, + { + "epoch": 0.46213805072618214, + "grad_norm": 0.9876568878205371, + "learning_rate": 4.928916491350013e-06, + "loss": 0.4329, + "step": 7589 + }, + { + "epoch": 0.46219894650306004, + "grad_norm": 1.0641490164862422, + "learning_rate": 4.9288975971444894e-06, + "loss": 0.502, + "step": 7590 + }, + { + "epoch": 0.4622598422799379, + "grad_norm": 1.1784223183164226, + "learning_rate": 4.928878700464455e-06, + "loss": 0.4455, + "step": 7591 + }, + { + "epoch": 0.4623207380568158, + "grad_norm": 1.0820989709108406, + "learning_rate": 4.92885980130993e-06, + "loss": 0.4191, + "step": 7592 + }, + { + "epoch": 0.46238163383369363, + "grad_norm": 0.9310370197444274, + "learning_rate": 4.9288408996809336e-06, + "loss": 0.3919, + "step": 7593 + }, + { + "epoch": 0.46244252961057153, + "grad_norm": 0.9157950556278837, + "learning_rate": 4.928821995577484e-06, + "loss": 0.4315, + "step": 7594 + }, + { + "epoch": 0.4625034253874494, + "grad_norm": 0.9975233751837584, + "learning_rate": 4.928803088999601e-06, + "loss": 0.489, + "step": 7595 + }, + { + "epoch": 0.4625643211643273, + "grad_norm": 0.9878521013166861, + "learning_rate": 4.928784179947304e-06, + "loss": 0.4533, + "step": 7596 + }, + { + "epoch": 0.4626252169412051, + "grad_norm": 1.0894898551273833, + "learning_rate": 4.928765268420612e-06, + "loss": 0.5034, + "step": 7597 + }, + { + "epoch": 0.462686112718083, + "grad_norm": 0.9652295741886595, + "learning_rate": 4.928746354419545e-06, + "loss": 0.4752, + "step": 7598 + }, + { + "epoch": 0.46274700849496087, + "grad_norm": 1.1210953404120552, + "learning_rate": 4.928727437944121e-06, + "loss": 0.4572, + "step": 7599 + }, + { + "epoch": 0.46280790427183877, + "grad_norm": 0.9735352425991124, + "learning_rate": 4.9287085189943605e-06, + "loss": 0.3999, + "step": 7600 + }, + { + "epoch": 0.4628688000487166, + "grad_norm": 1.0989988935035802, + "learning_rate": 4.928689597570282e-06, + "loss": 0.4177, + "step": 7601 + }, + { + "epoch": 0.4629296958255945, + "grad_norm": 0.9810649435574759, + "learning_rate": 4.928670673671905e-06, + "loss": 0.4834, + "step": 7602 + }, + { + "epoch": 0.46299059160247236, + "grad_norm": 1.1010718065408838, + "learning_rate": 4.928651747299249e-06, + "loss": 0.3738, + "step": 7603 + }, + { + "epoch": 0.46305148737935026, + "grad_norm": 1.1191444521343479, + "learning_rate": 4.928632818452333e-06, + "loss": 0.4334, + "step": 7604 + }, + { + "epoch": 0.4631123831562281, + "grad_norm": 1.0705104233656986, + "learning_rate": 4.928613887131177e-06, + "loss": 0.4947, + "step": 7605 + }, + { + "epoch": 0.463173278933106, + "grad_norm": 1.0019640845066566, + "learning_rate": 4.928594953335799e-06, + "loss": 0.4177, + "step": 7606 + }, + { + "epoch": 0.46323417470998385, + "grad_norm": 1.1232584033667814, + "learning_rate": 4.928576017066219e-06, + "loss": 0.4671, + "step": 7607 + }, + { + "epoch": 0.46329507048686175, + "grad_norm": 1.0469750915513945, + "learning_rate": 4.928557078322456e-06, + "loss": 0.4169, + "step": 7608 + }, + { + "epoch": 0.4633559662637396, + "grad_norm": 1.0667094113904936, + "learning_rate": 4.928538137104531e-06, + "loss": 0.4514, + "step": 7609 + }, + { + "epoch": 0.4634168620406175, + "grad_norm": 1.0185845055250407, + "learning_rate": 4.92851919341246e-06, + "loss": 0.4763, + "step": 7610 + }, + { + "epoch": 0.46347775781749534, + "grad_norm": 1.0163390101870884, + "learning_rate": 4.928500247246265e-06, + "loss": 0.4885, + "step": 7611 + }, + { + "epoch": 0.46353865359437324, + "grad_norm": 1.000844439630944, + "learning_rate": 4.928481298605964e-06, + "loss": 0.499, + "step": 7612 + }, + { + "epoch": 0.4635995493712511, + "grad_norm": 0.9347967910835702, + "learning_rate": 4.9284623474915775e-06, + "loss": 0.4572, + "step": 7613 + }, + { + "epoch": 0.463660445148129, + "grad_norm": 1.162218432974772, + "learning_rate": 4.928443393903123e-06, + "loss": 0.3837, + "step": 7614 + }, + { + "epoch": 0.46372134092500683, + "grad_norm": 0.950265645036933, + "learning_rate": 4.928424437840621e-06, + "loss": 0.5138, + "step": 7615 + }, + { + "epoch": 0.46378223670188473, + "grad_norm": 1.0622084155210925, + "learning_rate": 4.928405479304092e-06, + "loss": 0.3995, + "step": 7616 + }, + { + "epoch": 0.4638431324787626, + "grad_norm": 0.978875377452531, + "learning_rate": 4.9283865182935525e-06, + "loss": 0.4737, + "step": 7617 + }, + { + "epoch": 0.4639040282556405, + "grad_norm": 1.0044154964693548, + "learning_rate": 4.928367554809024e-06, + "loss": 0.411, + "step": 7618 + }, + { + "epoch": 0.4639649240325183, + "grad_norm": 1.0953620104459298, + "learning_rate": 4.928348588850525e-06, + "loss": 0.432, + "step": 7619 + }, + { + "epoch": 0.4640258198093962, + "grad_norm": 0.9916408321402969, + "learning_rate": 4.9283296204180745e-06, + "loss": 0.4389, + "step": 7620 + }, + { + "epoch": 0.46408671558627407, + "grad_norm": 0.9056091409857854, + "learning_rate": 4.9283106495116925e-06, + "loss": 0.6078, + "step": 7621 + }, + { + "epoch": 0.46414761136315197, + "grad_norm": 1.01892139871481, + "learning_rate": 4.928291676131398e-06, + "loss": 0.5061, + "step": 7622 + }, + { + "epoch": 0.4642085071400298, + "grad_norm": 1.0450195982574246, + "learning_rate": 4.928272700277211e-06, + "loss": 0.4001, + "step": 7623 + }, + { + "epoch": 0.4642694029169077, + "grad_norm": 0.9437841633790067, + "learning_rate": 4.9282537219491504e-06, + "loss": 0.4521, + "step": 7624 + }, + { + "epoch": 0.46433029869378556, + "grad_norm": 1.1311030365603758, + "learning_rate": 4.928234741147236e-06, + "loss": 0.4161, + "step": 7625 + }, + { + "epoch": 0.46439119447066346, + "grad_norm": 1.0367859273636353, + "learning_rate": 4.928215757871484e-06, + "loss": 0.4314, + "step": 7626 + }, + { + "epoch": 0.46445209024754136, + "grad_norm": 1.088875636571779, + "learning_rate": 4.928196772121918e-06, + "loss": 0.4296, + "step": 7627 + }, + { + "epoch": 0.4645129860244192, + "grad_norm": 0.9750620581644552, + "learning_rate": 4.9281777838985555e-06, + "loss": 0.4804, + "step": 7628 + }, + { + "epoch": 0.4645738818012971, + "grad_norm": 1.2127363814913221, + "learning_rate": 4.928158793201416e-06, + "loss": 0.4695, + "step": 7629 + }, + { + "epoch": 0.46463477757817495, + "grad_norm": 1.0291111438993001, + "learning_rate": 4.9281398000305184e-06, + "loss": 0.4278, + "step": 7630 + }, + { + "epoch": 0.46469567335505285, + "grad_norm": 0.9958459690235238, + "learning_rate": 4.928120804385883e-06, + "loss": 0.4542, + "step": 7631 + }, + { + "epoch": 0.4647565691319307, + "grad_norm": 1.0617515484327118, + "learning_rate": 4.928101806267528e-06, + "loss": 0.4504, + "step": 7632 + }, + { + "epoch": 0.4648174649088086, + "grad_norm": 0.8938063506599627, + "learning_rate": 4.928082805675474e-06, + "loss": 0.4782, + "step": 7633 + }, + { + "epoch": 0.46487836068568644, + "grad_norm": 1.0376757294131043, + "learning_rate": 4.928063802609739e-06, + "loss": 0.4083, + "step": 7634 + }, + { + "epoch": 0.46493925646256434, + "grad_norm": 1.0361247554989692, + "learning_rate": 4.928044797070343e-06, + "loss": 0.409, + "step": 7635 + }, + { + "epoch": 0.4650001522394422, + "grad_norm": 1.0296844075198965, + "learning_rate": 4.928025789057307e-06, + "loss": 0.4084, + "step": 7636 + }, + { + "epoch": 0.4650610480163201, + "grad_norm": 0.9827526782293275, + "learning_rate": 4.928006778570648e-06, + "loss": 0.4348, + "step": 7637 + }, + { + "epoch": 0.46512194379319793, + "grad_norm": 1.0025267208509367, + "learning_rate": 4.927987765610385e-06, + "loss": 0.4899, + "step": 7638 + }, + { + "epoch": 0.46518283957007583, + "grad_norm": 1.1348953214451187, + "learning_rate": 4.927968750176539e-06, + "loss": 0.4442, + "step": 7639 + }, + { + "epoch": 0.4652437353469537, + "grad_norm": 1.0620135017751582, + "learning_rate": 4.92794973226913e-06, + "loss": 0.4245, + "step": 7640 + }, + { + "epoch": 0.4653046311238316, + "grad_norm": 0.9752909043449395, + "learning_rate": 4.927930711888176e-06, + "loss": 0.4163, + "step": 7641 + }, + { + "epoch": 0.4653655269007094, + "grad_norm": 0.9893467950217454, + "learning_rate": 4.927911689033696e-06, + "loss": 0.4523, + "step": 7642 + }, + { + "epoch": 0.4654264226775873, + "grad_norm": 0.9484197625549362, + "learning_rate": 4.92789266370571e-06, + "loss": 0.4133, + "step": 7643 + }, + { + "epoch": 0.46548731845446517, + "grad_norm": 1.0912158110834504, + "learning_rate": 4.927873635904238e-06, + "loss": 0.4157, + "step": 7644 + }, + { + "epoch": 0.46554821423134307, + "grad_norm": 0.9783675976642013, + "learning_rate": 4.927854605629298e-06, + "loss": 0.5595, + "step": 7645 + }, + { + "epoch": 0.4656091100082209, + "grad_norm": 1.0453667302348142, + "learning_rate": 4.927835572880911e-06, + "loss": 0.4233, + "step": 7646 + }, + { + "epoch": 0.4656700057850988, + "grad_norm": 1.0463317975892357, + "learning_rate": 4.927816537659096e-06, + "loss": 0.4264, + "step": 7647 + }, + { + "epoch": 0.46573090156197666, + "grad_norm": 1.0362478179955625, + "learning_rate": 4.927797499963871e-06, + "loss": 0.391, + "step": 7648 + }, + { + "epoch": 0.46579179733885456, + "grad_norm": 1.1584774111363845, + "learning_rate": 4.927778459795257e-06, + "loss": 0.4581, + "step": 7649 + }, + { + "epoch": 0.4658526931157324, + "grad_norm": 1.0322993677950245, + "learning_rate": 4.9277594171532724e-06, + "loss": 0.4274, + "step": 7650 + }, + { + "epoch": 0.4659135888926103, + "grad_norm": 0.9380571129069819, + "learning_rate": 4.9277403720379375e-06, + "loss": 0.422, + "step": 7651 + }, + { + "epoch": 0.46597448466948815, + "grad_norm": 1.0561504981801202, + "learning_rate": 4.927721324449271e-06, + "loss": 0.4032, + "step": 7652 + }, + { + "epoch": 0.46603538044636605, + "grad_norm": 1.0264559572876004, + "learning_rate": 4.927702274387291e-06, + "loss": 0.4815, + "step": 7653 + }, + { + "epoch": 0.4660962762232439, + "grad_norm": 1.0897660760935657, + "learning_rate": 4.9276832218520206e-06, + "loss": 0.3849, + "step": 7654 + }, + { + "epoch": 0.4661571720001218, + "grad_norm": 1.0144308599617984, + "learning_rate": 4.927664166843476e-06, + "loss": 0.4325, + "step": 7655 + }, + { + "epoch": 0.46621806777699965, + "grad_norm": 1.0680879975034205, + "learning_rate": 4.927645109361678e-06, + "loss": 0.4681, + "step": 7656 + }, + { + "epoch": 0.46627896355387755, + "grad_norm": 1.0643753542929504, + "learning_rate": 4.927626049406645e-06, + "loss": 0.4791, + "step": 7657 + }, + { + "epoch": 0.4663398593307554, + "grad_norm": 1.041630881783114, + "learning_rate": 4.927606986978397e-06, + "loss": 0.4575, + "step": 7658 + }, + { + "epoch": 0.4664007551076333, + "grad_norm": 0.9490433529765393, + "learning_rate": 4.927587922076955e-06, + "loss": 0.4392, + "step": 7659 + }, + { + "epoch": 0.46646165088451114, + "grad_norm": 1.0892995676588761, + "learning_rate": 4.927568854702335e-06, + "loss": 0.4078, + "step": 7660 + }, + { + "epoch": 0.46652254666138904, + "grad_norm": 1.0835261110195402, + "learning_rate": 4.92754978485456e-06, + "loss": 0.4069, + "step": 7661 + }, + { + "epoch": 0.4665834424382669, + "grad_norm": 1.0466853767161652, + "learning_rate": 4.927530712533646e-06, + "loss": 0.3695, + "step": 7662 + }, + { + "epoch": 0.4666443382151448, + "grad_norm": 1.161681575239277, + "learning_rate": 4.927511637739615e-06, + "loss": 0.3776, + "step": 7663 + }, + { + "epoch": 0.46670523399202263, + "grad_norm": 0.9614058024255653, + "learning_rate": 4.927492560472485e-06, + "loss": 0.474, + "step": 7664 + }, + { + "epoch": 0.46676612976890053, + "grad_norm": 1.0030665227967253, + "learning_rate": 4.927473480732278e-06, + "loss": 0.4642, + "step": 7665 + }, + { + "epoch": 0.4668270255457784, + "grad_norm": 1.0909636455942067, + "learning_rate": 4.92745439851901e-06, + "loss": 0.4355, + "step": 7666 + }, + { + "epoch": 0.4668879213226563, + "grad_norm": 0.9828793290015077, + "learning_rate": 4.927435313832702e-06, + "loss": 0.4861, + "step": 7667 + }, + { + "epoch": 0.4669488170995342, + "grad_norm": 0.9645753765687112, + "learning_rate": 4.927416226673373e-06, + "loss": 0.5025, + "step": 7668 + }, + { + "epoch": 0.467009712876412, + "grad_norm": 1.0001272006325554, + "learning_rate": 4.9273971370410435e-06, + "loss": 0.4013, + "step": 7669 + }, + { + "epoch": 0.4670706086532899, + "grad_norm": 1.0598383538279728, + "learning_rate": 4.9273780449357326e-06, + "loss": 0.4149, + "step": 7670 + }, + { + "epoch": 0.46713150443016777, + "grad_norm": 0.9705183100673982, + "learning_rate": 4.927358950357458e-06, + "loss": 0.4911, + "step": 7671 + }, + { + "epoch": 0.46719240020704567, + "grad_norm": 0.9481058903755106, + "learning_rate": 4.927339853306242e-06, + "loss": 0.4494, + "step": 7672 + }, + { + "epoch": 0.4672532959839235, + "grad_norm": 1.0534525574238431, + "learning_rate": 4.927320753782102e-06, + "loss": 0.3668, + "step": 7673 + }, + { + "epoch": 0.4673141917608014, + "grad_norm": 0.9119961102369724, + "learning_rate": 4.927301651785058e-06, + "loss": 0.4719, + "step": 7674 + }, + { + "epoch": 0.46737508753767926, + "grad_norm": 1.0035231314059332, + "learning_rate": 4.92728254731513e-06, + "loss": 0.443, + "step": 7675 + }, + { + "epoch": 0.46743598331455716, + "grad_norm": 0.9597924529555051, + "learning_rate": 4.927263440372336e-06, + "loss": 0.4735, + "step": 7676 + }, + { + "epoch": 0.467496879091435, + "grad_norm": 1.0289945849676323, + "learning_rate": 4.927244330956697e-06, + "loss": 0.45, + "step": 7677 + }, + { + "epoch": 0.4675577748683129, + "grad_norm": 1.1087502836640106, + "learning_rate": 4.927225219068232e-06, + "loss": 0.461, + "step": 7678 + }, + { + "epoch": 0.46761867064519075, + "grad_norm": 0.9601914447040139, + "learning_rate": 4.927206104706961e-06, + "loss": 0.4925, + "step": 7679 + }, + { + "epoch": 0.46767956642206865, + "grad_norm": 1.0415008268147596, + "learning_rate": 4.9271869878729025e-06, + "loss": 0.4777, + "step": 7680 + }, + { + "epoch": 0.4677404621989465, + "grad_norm": 0.9774495266855375, + "learning_rate": 4.927167868566076e-06, + "loss": 0.4352, + "step": 7681 + }, + { + "epoch": 0.4678013579758244, + "grad_norm": 0.9954536929571373, + "learning_rate": 4.927148746786502e-06, + "loss": 0.433, + "step": 7682 + }, + { + "epoch": 0.46786225375270224, + "grad_norm": 1.0383783750403672, + "learning_rate": 4.927129622534199e-06, + "loss": 0.4341, + "step": 7683 + }, + { + "epoch": 0.46792314952958014, + "grad_norm": 0.9623783943662301, + "learning_rate": 4.927110495809186e-06, + "loss": 0.4313, + "step": 7684 + }, + { + "epoch": 0.467984045306458, + "grad_norm": 0.9626287332961333, + "learning_rate": 4.927091366611484e-06, + "loss": 0.4678, + "step": 7685 + }, + { + "epoch": 0.4680449410833359, + "grad_norm": 0.9463294696084522, + "learning_rate": 4.9270722349411126e-06, + "loss": 0.4759, + "step": 7686 + }, + { + "epoch": 0.46810583686021373, + "grad_norm": 0.9912633668094067, + "learning_rate": 4.927053100798089e-06, + "loss": 0.4768, + "step": 7687 + }, + { + "epoch": 0.46816673263709163, + "grad_norm": 1.094998328358252, + "learning_rate": 4.927033964182435e-06, + "loss": 0.4405, + "step": 7688 + }, + { + "epoch": 0.4682276284139695, + "grad_norm": 1.0351095714685652, + "learning_rate": 4.927014825094169e-06, + "loss": 0.412, + "step": 7689 + }, + { + "epoch": 0.4682885241908474, + "grad_norm": 0.9522854295881967, + "learning_rate": 4.926995683533311e-06, + "loss": 0.4874, + "step": 7690 + }, + { + "epoch": 0.4683494199677252, + "grad_norm": 1.0528573951797966, + "learning_rate": 4.92697653949988e-06, + "loss": 0.4949, + "step": 7691 + }, + { + "epoch": 0.4684103157446031, + "grad_norm": 0.9915743604893201, + "learning_rate": 4.926957392993896e-06, + "loss": 0.4997, + "step": 7692 + }, + { + "epoch": 0.46847121152148097, + "grad_norm": 1.010807176759435, + "learning_rate": 4.9269382440153775e-06, + "loss": 0.4521, + "step": 7693 + }, + { + "epoch": 0.46853210729835887, + "grad_norm": 1.0180361052187372, + "learning_rate": 4.926919092564346e-06, + "loss": 0.4433, + "step": 7694 + }, + { + "epoch": 0.4685930030752367, + "grad_norm": 1.0754533858727, + "learning_rate": 4.926899938640819e-06, + "loss": 0.3937, + "step": 7695 + }, + { + "epoch": 0.4686538988521146, + "grad_norm": 1.0563565926871157, + "learning_rate": 4.926880782244817e-06, + "loss": 0.4895, + "step": 7696 + }, + { + "epoch": 0.46871479462899246, + "grad_norm": 0.9576807171010912, + "learning_rate": 4.926861623376359e-06, + "loss": 0.505, + "step": 7697 + }, + { + "epoch": 0.46877569040587036, + "grad_norm": 1.0309468632104022, + "learning_rate": 4.926842462035465e-06, + "loss": 0.403, + "step": 7698 + }, + { + "epoch": 0.4688365861827482, + "grad_norm": 0.9482427636023683, + "learning_rate": 4.926823298222155e-06, + "loss": 0.4676, + "step": 7699 + }, + { + "epoch": 0.4688974819596261, + "grad_norm": 0.9661802435079537, + "learning_rate": 4.9268041319364464e-06, + "loss": 0.44, + "step": 7700 + }, + { + "epoch": 0.46895837773650395, + "grad_norm": 1.0036840594080843, + "learning_rate": 4.926784963178361e-06, + "loss": 0.5273, + "step": 7701 + }, + { + "epoch": 0.46901927351338185, + "grad_norm": 1.0503811653194584, + "learning_rate": 4.926765791947918e-06, + "loss": 0.4265, + "step": 7702 + }, + { + "epoch": 0.4690801692902597, + "grad_norm": 1.0128933333243577, + "learning_rate": 4.9267466182451365e-06, + "loss": 0.4256, + "step": 7703 + }, + { + "epoch": 0.4691410650671376, + "grad_norm": 1.0034273654911956, + "learning_rate": 4.926727442070036e-06, + "loss": 0.4633, + "step": 7704 + }, + { + "epoch": 0.46920196084401544, + "grad_norm": 0.9788428313812036, + "learning_rate": 4.926708263422635e-06, + "loss": 0.4663, + "step": 7705 + }, + { + "epoch": 0.46926285662089334, + "grad_norm": 1.040998371945736, + "learning_rate": 4.9266890823029555e-06, + "loss": 0.4415, + "step": 7706 + }, + { + "epoch": 0.4693237523977712, + "grad_norm": 1.0293363235921387, + "learning_rate": 4.926669898711014e-06, + "loss": 0.4858, + "step": 7707 + }, + { + "epoch": 0.4693846481746491, + "grad_norm": 1.0175238090395105, + "learning_rate": 4.926650712646833e-06, + "loss": 0.3608, + "step": 7708 + }, + { + "epoch": 0.469445543951527, + "grad_norm": 1.0622884548575686, + "learning_rate": 4.92663152411043e-06, + "loss": 0.439, + "step": 7709 + }, + { + "epoch": 0.46950643972840483, + "grad_norm": 0.9466761591449251, + "learning_rate": 4.926612333101825e-06, + "loss": 0.4369, + "step": 7710 + }, + { + "epoch": 0.46956733550528273, + "grad_norm": 1.049271839896156, + "learning_rate": 4.92659313962104e-06, + "loss": 0.4313, + "step": 7711 + }, + { + "epoch": 0.4696282312821606, + "grad_norm": 1.0284185742092373, + "learning_rate": 4.92657394366809e-06, + "loss": 0.4061, + "step": 7712 + }, + { + "epoch": 0.4696891270590385, + "grad_norm": 1.0507145401824207, + "learning_rate": 4.926554745242998e-06, + "loss": 0.4237, + "step": 7713 + }, + { + "epoch": 0.4697500228359163, + "grad_norm": 1.0201326925088912, + "learning_rate": 4.926535544345781e-06, + "loss": 0.4936, + "step": 7714 + }, + { + "epoch": 0.4698109186127942, + "grad_norm": 1.133385676535608, + "learning_rate": 4.926516340976462e-06, + "loss": 0.4436, + "step": 7715 + }, + { + "epoch": 0.46987181438967207, + "grad_norm": 1.0529822116228675, + "learning_rate": 4.926497135135057e-06, + "loss": 0.4274, + "step": 7716 + }, + { + "epoch": 0.46993271016654997, + "grad_norm": 1.1169899381801027, + "learning_rate": 4.926477926821588e-06, + "loss": 0.5441, + "step": 7717 + }, + { + "epoch": 0.4699936059434278, + "grad_norm": 1.0129149760901277, + "learning_rate": 4.926458716036073e-06, + "loss": 0.4685, + "step": 7718 + }, + { + "epoch": 0.4700545017203057, + "grad_norm": 0.9646583623965019, + "learning_rate": 4.926439502778534e-06, + "loss": 0.4521, + "step": 7719 + }, + { + "epoch": 0.47011539749718356, + "grad_norm": 1.1466650027470413, + "learning_rate": 4.926420287048988e-06, + "loss": 0.4758, + "step": 7720 + }, + { + "epoch": 0.47017629327406146, + "grad_norm": 1.0151899693450965, + "learning_rate": 4.926401068847455e-06, + "loss": 0.4368, + "step": 7721 + }, + { + "epoch": 0.4702371890509393, + "grad_norm": 0.9726014318216967, + "learning_rate": 4.9263818481739545e-06, + "loss": 0.4926, + "step": 7722 + }, + { + "epoch": 0.4702980848278172, + "grad_norm": 1.021411817995389, + "learning_rate": 4.926362625028508e-06, + "loss": 0.4757, + "step": 7723 + }, + { + "epoch": 0.47035898060469505, + "grad_norm": 1.0254683664811686, + "learning_rate": 4.926343399411133e-06, + "loss": 0.4694, + "step": 7724 + }, + { + "epoch": 0.47041987638157295, + "grad_norm": 1.0354215358283432, + "learning_rate": 4.92632417132185e-06, + "loss": 0.4122, + "step": 7725 + }, + { + "epoch": 0.4704807721584508, + "grad_norm": 0.9443556291955643, + "learning_rate": 4.926304940760677e-06, + "loss": 0.4332, + "step": 7726 + }, + { + "epoch": 0.4705416679353287, + "grad_norm": 0.9652615479526111, + "learning_rate": 4.926285707727637e-06, + "loss": 0.4397, + "step": 7727 + }, + { + "epoch": 0.47060256371220655, + "grad_norm": 1.031656062840175, + "learning_rate": 4.926266472222747e-06, + "loss": 0.5471, + "step": 7728 + }, + { + "epoch": 0.47066345948908445, + "grad_norm": 0.9836783348151777, + "learning_rate": 4.926247234246026e-06, + "loss": 0.4411, + "step": 7729 + }, + { + "epoch": 0.4707243552659623, + "grad_norm": 1.1320949952925659, + "learning_rate": 4.9262279937974965e-06, + "loss": 0.3705, + "step": 7730 + }, + { + "epoch": 0.4707852510428402, + "grad_norm": 0.9627823226980137, + "learning_rate": 4.926208750877176e-06, + "loss": 0.4423, + "step": 7731 + }, + { + "epoch": 0.47084614681971804, + "grad_norm": 1.1372338024307416, + "learning_rate": 4.926189505485084e-06, + "loss": 0.4021, + "step": 7732 + }, + { + "epoch": 0.47090704259659594, + "grad_norm": 0.9572550362731801, + "learning_rate": 4.92617025762124e-06, + "loss": 0.4488, + "step": 7733 + }, + { + "epoch": 0.4709679383734738, + "grad_norm": 1.1560003747873562, + "learning_rate": 4.926151007285665e-06, + "loss": 0.4598, + "step": 7734 + }, + { + "epoch": 0.4710288341503517, + "grad_norm": 1.0360169995901114, + "learning_rate": 4.926131754478378e-06, + "loss": 0.4783, + "step": 7735 + }, + { + "epoch": 0.47108972992722953, + "grad_norm": 1.119948431459298, + "learning_rate": 4.926112499199397e-06, + "loss": 0.3681, + "step": 7736 + }, + { + "epoch": 0.47115062570410743, + "grad_norm": 1.006681334954362, + "learning_rate": 4.926093241448744e-06, + "loss": 0.4279, + "step": 7737 + }, + { + "epoch": 0.4712115214809853, + "grad_norm": 0.9687544473564933, + "learning_rate": 4.926073981226438e-06, + "loss": 0.4839, + "step": 7738 + }, + { + "epoch": 0.4712724172578632, + "grad_norm": 0.992418803740307, + "learning_rate": 4.926054718532498e-06, + "loss": 0.4579, + "step": 7739 + }, + { + "epoch": 0.471333313034741, + "grad_norm": 0.9822768862577064, + "learning_rate": 4.9260354533669445e-06, + "loss": 0.4906, + "step": 7740 + }, + { + "epoch": 0.4713942088116189, + "grad_norm": 1.0150658394682686, + "learning_rate": 4.926016185729795e-06, + "loss": 0.4437, + "step": 7741 + }, + { + "epoch": 0.47145510458849677, + "grad_norm": 0.9837951078577479, + "learning_rate": 4.9259969156210715e-06, + "loss": 0.4485, + "step": 7742 + }, + { + "epoch": 0.47151600036537467, + "grad_norm": 1.0639008860255574, + "learning_rate": 4.925977643040793e-06, + "loss": 0.4487, + "step": 7743 + }, + { + "epoch": 0.4715768961422525, + "grad_norm": 0.9526577567380115, + "learning_rate": 4.925958367988979e-06, + "loss": 0.5094, + "step": 7744 + }, + { + "epoch": 0.4716377919191304, + "grad_norm": 1.0462308286694084, + "learning_rate": 4.925939090465649e-06, + "loss": 0.4386, + "step": 7745 + }, + { + "epoch": 0.47169868769600826, + "grad_norm": 0.893477644574072, + "learning_rate": 4.925919810470822e-06, + "loss": 0.5628, + "step": 7746 + }, + { + "epoch": 0.47175958347288616, + "grad_norm": 0.9515201982012085, + "learning_rate": 4.925900528004519e-06, + "loss": 0.525, + "step": 7747 + }, + { + "epoch": 0.471820479249764, + "grad_norm": 1.0717786029547074, + "learning_rate": 4.9258812430667584e-06, + "loss": 0.4573, + "step": 7748 + }, + { + "epoch": 0.4718813750266419, + "grad_norm": 1.115333596998544, + "learning_rate": 4.925861955657561e-06, + "loss": 0.3795, + "step": 7749 + }, + { + "epoch": 0.4719422708035198, + "grad_norm": 0.9870915216516557, + "learning_rate": 4.925842665776946e-06, + "loss": 0.4354, + "step": 7750 + }, + { + "epoch": 0.47200316658039765, + "grad_norm": 1.0777863389604139, + "learning_rate": 4.925823373424932e-06, + "loss": 0.4354, + "step": 7751 + }, + { + "epoch": 0.47206406235727555, + "grad_norm": 1.0393061721189427, + "learning_rate": 4.92580407860154e-06, + "loss": 0.4248, + "step": 7752 + }, + { + "epoch": 0.4721249581341534, + "grad_norm": 0.949481627016758, + "learning_rate": 4.9257847813067895e-06, + "loss": 0.5284, + "step": 7753 + }, + { + "epoch": 0.4721858539110313, + "grad_norm": 1.0403917593860008, + "learning_rate": 4.9257654815407e-06, + "loss": 0.4602, + "step": 7754 + }, + { + "epoch": 0.47224674968790914, + "grad_norm": 1.0734734112865407, + "learning_rate": 4.92574617930329e-06, + "loss": 0.4274, + "step": 7755 + }, + { + "epoch": 0.47230764546478704, + "grad_norm": 0.9362964867235002, + "learning_rate": 4.9257268745945815e-06, + "loss": 0.4444, + "step": 7756 + }, + { + "epoch": 0.4723685412416649, + "grad_norm": 1.0171912785952155, + "learning_rate": 4.925707567414592e-06, + "loss": 0.5178, + "step": 7757 + }, + { + "epoch": 0.4724294370185428, + "grad_norm": 0.9670750519677896, + "learning_rate": 4.925688257763344e-06, + "loss": 0.5288, + "step": 7758 + }, + { + "epoch": 0.47249033279542063, + "grad_norm": 1.0180554882550479, + "learning_rate": 4.925668945640854e-06, + "loss": 0.4256, + "step": 7759 + }, + { + "epoch": 0.47255122857229853, + "grad_norm": 0.9689095889624965, + "learning_rate": 4.925649631047142e-06, + "loss": 0.4728, + "step": 7760 + }, + { + "epoch": 0.4726121243491764, + "grad_norm": 1.0857811287994044, + "learning_rate": 4.92563031398223e-06, + "loss": 0.4719, + "step": 7761 + }, + { + "epoch": 0.4726730201260543, + "grad_norm": 1.0120909227529948, + "learning_rate": 4.925610994446135e-06, + "loss": 0.4004, + "step": 7762 + }, + { + "epoch": 0.4727339159029321, + "grad_norm": 0.9966617597265285, + "learning_rate": 4.9255916724388784e-06, + "loss": 0.4588, + "step": 7763 + }, + { + "epoch": 0.47279481167981, + "grad_norm": 0.9624118166109759, + "learning_rate": 4.925572347960479e-06, + "loss": 0.512, + "step": 7764 + }, + { + "epoch": 0.47285570745668787, + "grad_norm": 0.9001970005886655, + "learning_rate": 4.925553021010958e-06, + "loss": 0.4627, + "step": 7765 + }, + { + "epoch": 0.47291660323356577, + "grad_norm": 1.0737295209833155, + "learning_rate": 4.925533691590333e-06, + "loss": 0.4736, + "step": 7766 + }, + { + "epoch": 0.4729774990104436, + "grad_norm": 1.089498915002385, + "learning_rate": 4.925514359698626e-06, + "loss": 0.4196, + "step": 7767 + }, + { + "epoch": 0.4730383947873215, + "grad_norm": 0.9892429052431857, + "learning_rate": 4.925495025335853e-06, + "loss": 0.5289, + "step": 7768 + }, + { + "epoch": 0.47309929056419936, + "grad_norm": 1.0684740194268285, + "learning_rate": 4.9254756885020386e-06, + "loss": 0.3718, + "step": 7769 + }, + { + "epoch": 0.47316018634107726, + "grad_norm": 0.993038851819892, + "learning_rate": 4.925456349197198e-06, + "loss": 0.4426, + "step": 7770 + }, + { + "epoch": 0.4732210821179551, + "grad_norm": 1.0115755435156408, + "learning_rate": 4.925437007421354e-06, + "loss": 0.5306, + "step": 7771 + }, + { + "epoch": 0.473281977894833, + "grad_norm": 1.026962188252694, + "learning_rate": 4.925417663174526e-06, + "loss": 0.5068, + "step": 7772 + }, + { + "epoch": 0.47334287367171085, + "grad_norm": 1.0069287075586042, + "learning_rate": 4.925398316456732e-06, + "loss": 0.4971, + "step": 7773 + }, + { + "epoch": 0.47340376944858875, + "grad_norm": 1.0470998399547213, + "learning_rate": 4.925378967267993e-06, + "loss": 0.428, + "step": 7774 + }, + { + "epoch": 0.4734646652254666, + "grad_norm": 0.9500340542869317, + "learning_rate": 4.925359615608328e-06, + "loss": 0.4229, + "step": 7775 + }, + { + "epoch": 0.4735255610023445, + "grad_norm": 1.010664310597724, + "learning_rate": 4.9253402614777566e-06, + "loss": 0.4055, + "step": 7776 + }, + { + "epoch": 0.47358645677922234, + "grad_norm": 0.9208917385519862, + "learning_rate": 4.925320904876299e-06, + "loss": 0.4092, + "step": 7777 + }, + { + "epoch": 0.47364735255610024, + "grad_norm": 1.0155265562757188, + "learning_rate": 4.925301545803976e-06, + "loss": 0.4575, + "step": 7778 + }, + { + "epoch": 0.4737082483329781, + "grad_norm": 1.04914739105807, + "learning_rate": 4.9252821842608055e-06, + "loss": 0.4249, + "step": 7779 + }, + { + "epoch": 0.473769144109856, + "grad_norm": 0.9429747632709398, + "learning_rate": 4.925262820246808e-06, + "loss": 0.4494, + "step": 7780 + }, + { + "epoch": 0.47383003988673383, + "grad_norm": 1.1349626237761383, + "learning_rate": 4.925243453762003e-06, + "loss": 0.4192, + "step": 7781 + }, + { + "epoch": 0.47389093566361173, + "grad_norm": 1.0329739891357557, + "learning_rate": 4.9252240848064105e-06, + "loss": 0.4187, + "step": 7782 + }, + { + "epoch": 0.4739518314404896, + "grad_norm": 1.068266495376303, + "learning_rate": 4.92520471338005e-06, + "loss": 0.4552, + "step": 7783 + }, + { + "epoch": 0.4740127272173675, + "grad_norm": 0.9801043271400582, + "learning_rate": 4.925185339482942e-06, + "loss": 0.4789, + "step": 7784 + }, + { + "epoch": 0.4740736229942453, + "grad_norm": 1.0228860236485937, + "learning_rate": 4.925165963115104e-06, + "loss": 0.5032, + "step": 7785 + }, + { + "epoch": 0.4741345187711232, + "grad_norm": 0.9503651870575994, + "learning_rate": 4.925146584276559e-06, + "loss": 0.4757, + "step": 7786 + }, + { + "epoch": 0.47419541454800107, + "grad_norm": 0.9743408872452328, + "learning_rate": 4.925127202967325e-06, + "loss": 0.4682, + "step": 7787 + }, + { + "epoch": 0.47425631032487897, + "grad_norm": 0.9759899180080719, + "learning_rate": 4.925107819187421e-06, + "loss": 0.3926, + "step": 7788 + }, + { + "epoch": 0.4743172061017568, + "grad_norm": 0.9716325107241462, + "learning_rate": 4.925088432936868e-06, + "loss": 0.4965, + "step": 7789 + }, + { + "epoch": 0.4743781018786347, + "grad_norm": 1.0856680063408937, + "learning_rate": 4.925069044215685e-06, + "loss": 0.4215, + "step": 7790 + }, + { + "epoch": 0.4744389976555126, + "grad_norm": 1.0149887712881516, + "learning_rate": 4.925049653023892e-06, + "loss": 0.4195, + "step": 7791 + }, + { + "epoch": 0.47449989343239046, + "grad_norm": 1.0476744258508803, + "learning_rate": 4.9250302593615095e-06, + "loss": 0.4361, + "step": 7792 + }, + { + "epoch": 0.47456078920926836, + "grad_norm": 0.9994981610750653, + "learning_rate": 4.925010863228556e-06, + "loss": 0.4294, + "step": 7793 + }, + { + "epoch": 0.4746216849861462, + "grad_norm": 1.066529338140254, + "learning_rate": 4.924991464625052e-06, + "loss": 0.4569, + "step": 7794 + }, + { + "epoch": 0.4746825807630241, + "grad_norm": 0.929313357118677, + "learning_rate": 4.924972063551017e-06, + "loss": 0.4796, + "step": 7795 + }, + { + "epoch": 0.47474347653990195, + "grad_norm": 0.9629881553772892, + "learning_rate": 4.924952660006471e-06, + "loss": 0.4406, + "step": 7796 + }, + { + "epoch": 0.47480437231677985, + "grad_norm": 1.0091275201765249, + "learning_rate": 4.924933253991434e-06, + "loss": 0.4462, + "step": 7797 + }, + { + "epoch": 0.4748652680936577, + "grad_norm": 0.9765013792836544, + "learning_rate": 4.924913845505925e-06, + "loss": 0.4217, + "step": 7798 + }, + { + "epoch": 0.4749261638705356, + "grad_norm": 1.053575559951427, + "learning_rate": 4.924894434549964e-06, + "loss": 0.4386, + "step": 7799 + }, + { + "epoch": 0.47498705964741345, + "grad_norm": 0.9587020906790237, + "learning_rate": 4.924875021123572e-06, + "loss": 0.463, + "step": 7800 + }, + { + "epoch": 0.47504795542429135, + "grad_norm": 0.9102038153747581, + "learning_rate": 4.9248556052267675e-06, + "loss": 0.5019, + "step": 7801 + }, + { + "epoch": 0.4751088512011692, + "grad_norm": 1.1049008472912878, + "learning_rate": 4.9248361868595695e-06, + "loss": 0.3609, + "step": 7802 + }, + { + "epoch": 0.4751697469780471, + "grad_norm": 1.043541981583164, + "learning_rate": 4.924816766021999e-06, + "loss": 0.4939, + "step": 7803 + }, + { + "epoch": 0.47523064275492494, + "grad_norm": 0.9225950429123436, + "learning_rate": 4.9247973427140765e-06, + "loss": 0.457, + "step": 7804 + }, + { + "epoch": 0.47529153853180284, + "grad_norm": 1.079885270712922, + "learning_rate": 4.92477791693582e-06, + "loss": 0.4096, + "step": 7805 + }, + { + "epoch": 0.4753524343086807, + "grad_norm": 1.0095968486999618, + "learning_rate": 4.92475848868725e-06, + "loss": 0.4824, + "step": 7806 + }, + { + "epoch": 0.4754133300855586, + "grad_norm": 0.9874366680328446, + "learning_rate": 4.9247390579683875e-06, + "loss": 0.528, + "step": 7807 + }, + { + "epoch": 0.47547422586243643, + "grad_norm": 1.081383162255722, + "learning_rate": 4.9247196247792505e-06, + "loss": 0.4856, + "step": 7808 + }, + { + "epoch": 0.47553512163931433, + "grad_norm": 1.0731889420457679, + "learning_rate": 4.92470018911986e-06, + "loss": 0.4167, + "step": 7809 + }, + { + "epoch": 0.4755960174161922, + "grad_norm": 0.9982155939879239, + "learning_rate": 4.924680750990235e-06, + "loss": 0.5294, + "step": 7810 + }, + { + "epoch": 0.4756569131930701, + "grad_norm": 0.9746235069099861, + "learning_rate": 4.924661310390396e-06, + "loss": 0.4508, + "step": 7811 + }, + { + "epoch": 0.4757178089699479, + "grad_norm": 0.9908791459392685, + "learning_rate": 4.924641867320362e-06, + "loss": 0.438, + "step": 7812 + }, + { + "epoch": 0.4757787047468258, + "grad_norm": 0.9561970839451989, + "learning_rate": 4.924622421780153e-06, + "loss": 0.4612, + "step": 7813 + }, + { + "epoch": 0.47583960052370367, + "grad_norm": 0.9665653687865994, + "learning_rate": 4.924602973769789e-06, + "loss": 0.4991, + "step": 7814 + }, + { + "epoch": 0.47590049630058157, + "grad_norm": 0.989005236095111, + "learning_rate": 4.924583523289291e-06, + "loss": 0.5615, + "step": 7815 + }, + { + "epoch": 0.4759613920774594, + "grad_norm": 0.9682715211191737, + "learning_rate": 4.924564070338677e-06, + "loss": 0.4157, + "step": 7816 + }, + { + "epoch": 0.4760222878543373, + "grad_norm": 1.0883486848145243, + "learning_rate": 4.924544614917969e-06, + "loss": 0.337, + "step": 7817 + }, + { + "epoch": 0.47608318363121516, + "grad_norm": 1.011174711334035, + "learning_rate": 4.9245251570271835e-06, + "loss": 0.4857, + "step": 7818 + }, + { + "epoch": 0.47614407940809306, + "grad_norm": 0.9394792611853726, + "learning_rate": 4.9245056966663425e-06, + "loss": 0.4412, + "step": 7819 + }, + { + "epoch": 0.4762049751849709, + "grad_norm": 0.9770173386997126, + "learning_rate": 4.924486233835466e-06, + "loss": 0.4379, + "step": 7820 + }, + { + "epoch": 0.4762658709618488, + "grad_norm": 0.9982410460307748, + "learning_rate": 4.924466768534574e-06, + "loss": 0.5312, + "step": 7821 + }, + { + "epoch": 0.47632676673872665, + "grad_norm": 1.1834540640760616, + "learning_rate": 4.924447300763684e-06, + "loss": 0.3715, + "step": 7822 + }, + { + "epoch": 0.47638766251560455, + "grad_norm": 1.0331772957957757, + "learning_rate": 4.9244278305228186e-06, + "loss": 0.4121, + "step": 7823 + }, + { + "epoch": 0.4764485582924824, + "grad_norm": 1.0147257890600725, + "learning_rate": 4.924408357811996e-06, + "loss": 0.446, + "step": 7824 + }, + { + "epoch": 0.4765094540693603, + "grad_norm": 0.9452575635936044, + "learning_rate": 4.924388882631237e-06, + "loss": 0.5244, + "step": 7825 + }, + { + "epoch": 0.47657034984623814, + "grad_norm": 1.0066106727172548, + "learning_rate": 4.924369404980561e-06, + "loss": 0.456, + "step": 7826 + }, + { + "epoch": 0.47663124562311604, + "grad_norm": 1.1051012229939703, + "learning_rate": 4.924349924859987e-06, + "loss": 0.476, + "step": 7827 + }, + { + "epoch": 0.4766921413999939, + "grad_norm": 1.0648419925777408, + "learning_rate": 4.9243304422695374e-06, + "loss": 0.4327, + "step": 7828 + }, + { + "epoch": 0.4767530371768718, + "grad_norm": 1.0142298930498597, + "learning_rate": 4.924310957209228e-06, + "loss": 0.4278, + "step": 7829 + }, + { + "epoch": 0.47681393295374963, + "grad_norm": 1.0532335206180257, + "learning_rate": 4.924291469679083e-06, + "loss": 0.408, + "step": 7830 + }, + { + "epoch": 0.47687482873062753, + "grad_norm": 0.9756109566293438, + "learning_rate": 4.924271979679119e-06, + "loss": 0.5007, + "step": 7831 + }, + { + "epoch": 0.47693572450750543, + "grad_norm": 1.012016040774282, + "learning_rate": 4.9242524872093575e-06, + "loss": 0.4141, + "step": 7832 + }, + { + "epoch": 0.4769966202843833, + "grad_norm": 1.068312841801762, + "learning_rate": 4.924232992269819e-06, + "loss": 0.4693, + "step": 7833 + }, + { + "epoch": 0.4770575160612612, + "grad_norm": 0.9828228611873799, + "learning_rate": 4.924213494860521e-06, + "loss": 0.4872, + "step": 7834 + }, + { + "epoch": 0.477118411838139, + "grad_norm": 1.0955454204726225, + "learning_rate": 4.924193994981485e-06, + "loss": 0.4367, + "step": 7835 + }, + { + "epoch": 0.4771793076150169, + "grad_norm": 1.0058604196175607, + "learning_rate": 4.9241744926327315e-06, + "loss": 0.4601, + "step": 7836 + }, + { + "epoch": 0.47724020339189477, + "grad_norm": 1.0756495307780478, + "learning_rate": 4.924154987814278e-06, + "loss": 0.4524, + "step": 7837 + }, + { + "epoch": 0.47730109916877267, + "grad_norm": 1.1146701611586034, + "learning_rate": 4.924135480526146e-06, + "loss": 0.5111, + "step": 7838 + }, + { + "epoch": 0.4773619949456505, + "grad_norm": 1.025221167196256, + "learning_rate": 4.924115970768356e-06, + "loss": 0.5002, + "step": 7839 + }, + { + "epoch": 0.4774228907225284, + "grad_norm": 0.9647559158481427, + "learning_rate": 4.924096458540927e-06, + "loss": 0.4874, + "step": 7840 + }, + { + "epoch": 0.47748378649940626, + "grad_norm": 1.035138945727463, + "learning_rate": 4.9240769438438785e-06, + "loss": 0.5147, + "step": 7841 + }, + { + "epoch": 0.47754468227628416, + "grad_norm": 1.1406159559528635, + "learning_rate": 4.924057426677231e-06, + "loss": 0.4346, + "step": 7842 + }, + { + "epoch": 0.477605578053162, + "grad_norm": 1.0013573823825817, + "learning_rate": 4.924037907041004e-06, + "loss": 0.4629, + "step": 7843 + }, + { + "epoch": 0.4776664738300399, + "grad_norm": 1.103614890509314, + "learning_rate": 4.924018384935217e-06, + "loss": 0.4007, + "step": 7844 + }, + { + "epoch": 0.47772736960691775, + "grad_norm": 1.0356693897839127, + "learning_rate": 4.923998860359892e-06, + "loss": 0.4751, + "step": 7845 + }, + { + "epoch": 0.47778826538379565, + "grad_norm": 0.901229651308117, + "learning_rate": 4.923979333315046e-06, + "loss": 0.4984, + "step": 7846 + }, + { + "epoch": 0.4778491611606735, + "grad_norm": 0.9739306183076951, + "learning_rate": 4.923959803800701e-06, + "loss": 0.5115, + "step": 7847 + }, + { + "epoch": 0.4779100569375514, + "grad_norm": 0.9514378991340509, + "learning_rate": 4.923940271816876e-06, + "loss": 0.4879, + "step": 7848 + }, + { + "epoch": 0.47797095271442924, + "grad_norm": 0.9488148363461857, + "learning_rate": 4.923920737363591e-06, + "loss": 0.4686, + "step": 7849 + }, + { + "epoch": 0.47803184849130714, + "grad_norm": 1.0033302033389198, + "learning_rate": 4.923901200440866e-06, + "loss": 0.4578, + "step": 7850 + }, + { + "epoch": 0.478092744268185, + "grad_norm": 1.049315169645408, + "learning_rate": 4.92388166104872e-06, + "loss": 0.4004, + "step": 7851 + }, + { + "epoch": 0.4781536400450629, + "grad_norm": 1.0706657059044082, + "learning_rate": 4.923862119187175e-06, + "loss": 0.4567, + "step": 7852 + }, + { + "epoch": 0.47821453582194073, + "grad_norm": 1.0886684377502163, + "learning_rate": 4.923842574856249e-06, + "loss": 0.425, + "step": 7853 + }, + { + "epoch": 0.47827543159881863, + "grad_norm": 1.0527286758684709, + "learning_rate": 4.923823028055963e-06, + "loss": 0.4584, + "step": 7854 + }, + { + "epoch": 0.4783363273756965, + "grad_norm": 1.1751317684616565, + "learning_rate": 4.923803478786336e-06, + "loss": 0.4502, + "step": 7855 + }, + { + "epoch": 0.4783972231525744, + "grad_norm": 0.9912939882228421, + "learning_rate": 4.923783927047389e-06, + "loss": 0.469, + "step": 7856 + }, + { + "epoch": 0.4784581189294522, + "grad_norm": 0.9864251930832775, + "learning_rate": 4.92376437283914e-06, + "loss": 0.449, + "step": 7857 + }, + { + "epoch": 0.4785190147063301, + "grad_norm": 0.9569558581210431, + "learning_rate": 4.923744816161612e-06, + "loss": 0.4898, + "step": 7858 + }, + { + "epoch": 0.47857991048320797, + "grad_norm": 1.028970603476688, + "learning_rate": 4.923725257014822e-06, + "loss": 0.4894, + "step": 7859 + }, + { + "epoch": 0.47864080626008587, + "grad_norm": 1.0625748050054888, + "learning_rate": 4.9237056953987916e-06, + "loss": 0.4678, + "step": 7860 + }, + { + "epoch": 0.4787017020369637, + "grad_norm": 1.0678047291285526, + "learning_rate": 4.92368613131354e-06, + "loss": 0.3885, + "step": 7861 + }, + { + "epoch": 0.4787625978138416, + "grad_norm": 1.0585342749505084, + "learning_rate": 4.9236665647590874e-06, + "loss": 0.4159, + "step": 7862 + }, + { + "epoch": 0.47882349359071946, + "grad_norm": 1.065197045642679, + "learning_rate": 4.923646995735453e-06, + "loss": 0.4147, + "step": 7863 + }, + { + "epoch": 0.47888438936759736, + "grad_norm": 0.9960969524117911, + "learning_rate": 4.923627424242659e-06, + "loss": 0.4179, + "step": 7864 + }, + { + "epoch": 0.4789452851444752, + "grad_norm": 1.1464200660436568, + "learning_rate": 4.923607850280723e-06, + "loss": 0.4558, + "step": 7865 + }, + { + "epoch": 0.4790061809213531, + "grad_norm": 1.09375029063638, + "learning_rate": 4.923588273849665e-06, + "loss": 0.4398, + "step": 7866 + }, + { + "epoch": 0.47906707669823095, + "grad_norm": 1.040824827206896, + "learning_rate": 4.923568694949507e-06, + "loss": 0.4103, + "step": 7867 + }, + { + "epoch": 0.47912797247510885, + "grad_norm": 0.998775706236044, + "learning_rate": 4.923549113580267e-06, + "loss": 0.4311, + "step": 7868 + }, + { + "epoch": 0.4791888682519867, + "grad_norm": 1.1314448509269035, + "learning_rate": 4.923529529741965e-06, + "loss": 0.4146, + "step": 7869 + }, + { + "epoch": 0.4792497640288646, + "grad_norm": 0.9432483194717016, + "learning_rate": 4.9235099434346225e-06, + "loss": 0.5085, + "step": 7870 + }, + { + "epoch": 0.47931065980574245, + "grad_norm": 0.9863752728617716, + "learning_rate": 4.923490354658258e-06, + "loss": 0.5114, + "step": 7871 + }, + { + "epoch": 0.47937155558262035, + "grad_norm": 1.0558458624143452, + "learning_rate": 4.923470763412892e-06, + "loss": 0.4175, + "step": 7872 + }, + { + "epoch": 0.47943245135949825, + "grad_norm": 1.0294227874959367, + "learning_rate": 4.923451169698544e-06, + "loss": 0.4768, + "step": 7873 + }, + { + "epoch": 0.4794933471363761, + "grad_norm": 1.0390912647741328, + "learning_rate": 4.923431573515234e-06, + "loss": 0.4202, + "step": 7874 + }, + { + "epoch": 0.479554242913254, + "grad_norm": 0.9756542426658389, + "learning_rate": 4.923411974862983e-06, + "loss": 0.4772, + "step": 7875 + }, + { + "epoch": 0.47961513869013184, + "grad_norm": 1.0077304300998033, + "learning_rate": 4.92339237374181e-06, + "loss": 0.4435, + "step": 7876 + }, + { + "epoch": 0.47967603446700974, + "grad_norm": 1.0563469741227343, + "learning_rate": 4.923372770151735e-06, + "loss": 0.4201, + "step": 7877 + }, + { + "epoch": 0.4797369302438876, + "grad_norm": 1.0220057016281017, + "learning_rate": 4.923353164092779e-06, + "loss": 0.4656, + "step": 7878 + }, + { + "epoch": 0.4797978260207655, + "grad_norm": 1.012410455358937, + "learning_rate": 4.923333555564961e-06, + "loss": 0.3816, + "step": 7879 + }, + { + "epoch": 0.47985872179764333, + "grad_norm": 1.0606797352226653, + "learning_rate": 4.923313944568301e-06, + "loss": 0.4383, + "step": 7880 + }, + { + "epoch": 0.47991961757452123, + "grad_norm": 1.0535545012653686, + "learning_rate": 4.923294331102819e-06, + "loss": 0.5192, + "step": 7881 + }, + { + "epoch": 0.4799805133513991, + "grad_norm": 0.982587903395987, + "learning_rate": 4.923274715168535e-06, + "loss": 0.4337, + "step": 7882 + }, + { + "epoch": 0.480041409128277, + "grad_norm": 1.019434951889444, + "learning_rate": 4.923255096765469e-06, + "loss": 0.4442, + "step": 7883 + }, + { + "epoch": 0.4801023049051548, + "grad_norm": 0.9972563483514296, + "learning_rate": 4.9232354758936415e-06, + "loss": 0.3927, + "step": 7884 + }, + { + "epoch": 0.4801632006820327, + "grad_norm": 1.0352109061086123, + "learning_rate": 4.923215852553072e-06, + "loss": 0.3827, + "step": 7885 + }, + { + "epoch": 0.48022409645891057, + "grad_norm": 1.0410378042838015, + "learning_rate": 4.923196226743781e-06, + "loss": 0.4139, + "step": 7886 + }, + { + "epoch": 0.48028499223578847, + "grad_norm": 1.0554983447814543, + "learning_rate": 4.9231765984657866e-06, + "loss": 0.4023, + "step": 7887 + }, + { + "epoch": 0.4803458880126663, + "grad_norm": 1.0165822172630277, + "learning_rate": 4.9231569677191115e-06, + "loss": 0.5247, + "step": 7888 + }, + { + "epoch": 0.4804067837895442, + "grad_norm": 0.9837561501983513, + "learning_rate": 4.923137334503774e-06, + "loss": 0.4747, + "step": 7889 + }, + { + "epoch": 0.48046767956642206, + "grad_norm": 0.9784759590367564, + "learning_rate": 4.923117698819795e-06, + "loss": 0.4484, + "step": 7890 + }, + { + "epoch": 0.48052857534329996, + "grad_norm": 0.9658029965958851, + "learning_rate": 4.923098060667193e-06, + "loss": 0.4797, + "step": 7891 + }, + { + "epoch": 0.4805894711201778, + "grad_norm": 1.0501407103536022, + "learning_rate": 4.92307842004599e-06, + "loss": 0.4234, + "step": 7892 + }, + { + "epoch": 0.4806503668970557, + "grad_norm": 1.0480612055237402, + "learning_rate": 4.923058776956205e-06, + "loss": 0.4004, + "step": 7893 + }, + { + "epoch": 0.48071126267393355, + "grad_norm": 0.9853367210086413, + "learning_rate": 4.923039131397859e-06, + "loss": 0.5272, + "step": 7894 + }, + { + "epoch": 0.48077215845081145, + "grad_norm": 0.9643620351291262, + "learning_rate": 4.92301948337097e-06, + "loss": 0.4449, + "step": 7895 + }, + { + "epoch": 0.4808330542276893, + "grad_norm": 1.025990693418907, + "learning_rate": 4.922999832875559e-06, + "loss": 0.4862, + "step": 7896 + }, + { + "epoch": 0.4808939500045672, + "grad_norm": 0.9218246073732708, + "learning_rate": 4.922980179911646e-06, + "loss": 0.4807, + "step": 7897 + }, + { + "epoch": 0.48095484578144504, + "grad_norm": 1.0748152230463843, + "learning_rate": 4.922960524479252e-06, + "loss": 0.4776, + "step": 7898 + }, + { + "epoch": 0.48101574155832294, + "grad_norm": 1.0416168583769394, + "learning_rate": 4.922940866578395e-06, + "loss": 0.4524, + "step": 7899 + }, + { + "epoch": 0.4810766373352008, + "grad_norm": 1.1152748754660538, + "learning_rate": 4.922921206209098e-06, + "loss": 0.3931, + "step": 7900 + }, + { + "epoch": 0.4811375331120787, + "grad_norm": 1.113954344124321, + "learning_rate": 4.9229015433713775e-06, + "loss": 0.3787, + "step": 7901 + }, + { + "epoch": 0.48119842888895653, + "grad_norm": 0.950233073059309, + "learning_rate": 4.922881878065257e-06, + "loss": 0.5285, + "step": 7902 + }, + { + "epoch": 0.48125932466583443, + "grad_norm": 0.9735875391570572, + "learning_rate": 4.922862210290753e-06, + "loss": 0.4997, + "step": 7903 + }, + { + "epoch": 0.4813202204427123, + "grad_norm": 1.0038621293031798, + "learning_rate": 4.922842540047888e-06, + "loss": 0.4674, + "step": 7904 + }, + { + "epoch": 0.4813811162195902, + "grad_norm": 1.0259213342883926, + "learning_rate": 4.922822867336682e-06, + "loss": 0.4136, + "step": 7905 + }, + { + "epoch": 0.481442011996468, + "grad_norm": 1.057572075381844, + "learning_rate": 4.922803192157154e-06, + "loss": 0.4193, + "step": 7906 + }, + { + "epoch": 0.4815029077733459, + "grad_norm": 1.0486919384583269, + "learning_rate": 4.922783514509324e-06, + "loss": 0.4928, + "step": 7907 + }, + { + "epoch": 0.48156380355022377, + "grad_norm": 0.997656033443066, + "learning_rate": 4.9227638343932134e-06, + "loss": 0.477, + "step": 7908 + }, + { + "epoch": 0.48162469932710167, + "grad_norm": 0.97036659352707, + "learning_rate": 4.922744151808841e-06, + "loss": 0.4477, + "step": 7909 + }, + { + "epoch": 0.4816855951039795, + "grad_norm": 1.0927254671134752, + "learning_rate": 4.9227244667562275e-06, + "loss": 0.4511, + "step": 7910 + }, + { + "epoch": 0.4817464908808574, + "grad_norm": 0.9891435077760861, + "learning_rate": 4.922704779235392e-06, + "loss": 0.4425, + "step": 7911 + }, + { + "epoch": 0.48180738665773526, + "grad_norm": 1.0237762323793513, + "learning_rate": 4.922685089246356e-06, + "loss": 0.433, + "step": 7912 + }, + { + "epoch": 0.48186828243461316, + "grad_norm": 0.962276916317085, + "learning_rate": 4.922665396789138e-06, + "loss": 0.504, + "step": 7913 + }, + { + "epoch": 0.48192917821149106, + "grad_norm": 1.034595092271063, + "learning_rate": 4.92264570186376e-06, + "loss": 0.4528, + "step": 7914 + }, + { + "epoch": 0.4819900739883689, + "grad_norm": 1.0804426991594718, + "learning_rate": 4.9226260044702405e-06, + "loss": 0.4481, + "step": 7915 + }, + { + "epoch": 0.4820509697652468, + "grad_norm": 1.0128760106259007, + "learning_rate": 4.922606304608599e-06, + "loss": 0.4265, + "step": 7916 + }, + { + "epoch": 0.48211186554212465, + "grad_norm": 0.9494994184209335, + "learning_rate": 4.922586602278857e-06, + "loss": 0.4495, + "step": 7917 + }, + { + "epoch": 0.48217276131900255, + "grad_norm": 0.9782591099078753, + "learning_rate": 4.922566897481035e-06, + "loss": 0.5181, + "step": 7918 + }, + { + "epoch": 0.4822336570958804, + "grad_norm": 1.0967188623609754, + "learning_rate": 4.922547190215151e-06, + "loss": 0.4658, + "step": 7919 + }, + { + "epoch": 0.4822945528727583, + "grad_norm": 1.0096062766855762, + "learning_rate": 4.922527480481227e-06, + "loss": 0.459, + "step": 7920 + }, + { + "epoch": 0.48235544864963614, + "grad_norm": 1.0886457061784531, + "learning_rate": 4.922507768279283e-06, + "loss": 0.4479, + "step": 7921 + }, + { + "epoch": 0.48241634442651404, + "grad_norm": 1.10100575342885, + "learning_rate": 4.922488053609338e-06, + "loss": 0.3825, + "step": 7922 + }, + { + "epoch": 0.4824772402033919, + "grad_norm": 1.0741119647976012, + "learning_rate": 4.922468336471412e-06, + "loss": 0.3835, + "step": 7923 + }, + { + "epoch": 0.4825381359802698, + "grad_norm": 1.0873006902207674, + "learning_rate": 4.922448616865525e-06, + "loss": 0.4642, + "step": 7924 + }, + { + "epoch": 0.48259903175714763, + "grad_norm": 1.0974956763657853, + "learning_rate": 4.922428894791699e-06, + "loss": 0.4615, + "step": 7925 + }, + { + "epoch": 0.48265992753402553, + "grad_norm": 1.0040826452044647, + "learning_rate": 4.922409170249952e-06, + "loss": 0.4693, + "step": 7926 + }, + { + "epoch": 0.4827208233109034, + "grad_norm": 0.9921562449181646, + "learning_rate": 4.922389443240306e-06, + "loss": 0.5262, + "step": 7927 + }, + { + "epoch": 0.4827817190877813, + "grad_norm": 0.9862638137260942, + "learning_rate": 4.922369713762779e-06, + "loss": 0.3964, + "step": 7928 + }, + { + "epoch": 0.4828426148646591, + "grad_norm": 1.1053665400020976, + "learning_rate": 4.922349981817393e-06, + "loss": 0.4081, + "step": 7929 + }, + { + "epoch": 0.482903510641537, + "grad_norm": 1.0117168529619478, + "learning_rate": 4.9223302474041664e-06, + "loss": 0.4727, + "step": 7930 + }, + { + "epoch": 0.48296440641841487, + "grad_norm": 1.0535484838524576, + "learning_rate": 4.9223105105231204e-06, + "loss": 0.4622, + "step": 7931 + }, + { + "epoch": 0.48302530219529277, + "grad_norm": 1.0233342370922682, + "learning_rate": 4.922290771174275e-06, + "loss": 0.4771, + "step": 7932 + }, + { + "epoch": 0.4830861979721706, + "grad_norm": 1.0793297446876529, + "learning_rate": 4.92227102935765e-06, + "loss": 0.4318, + "step": 7933 + }, + { + "epoch": 0.4831470937490485, + "grad_norm": 0.9829002043413015, + "learning_rate": 4.9222512850732655e-06, + "loss": 0.4368, + "step": 7934 + }, + { + "epoch": 0.48320798952592636, + "grad_norm": 1.1107457879530838, + "learning_rate": 4.9222315383211414e-06, + "loss": 0.4309, + "step": 7935 + }, + { + "epoch": 0.48326888530280426, + "grad_norm": 1.0121964804552517, + "learning_rate": 4.922211789101299e-06, + "loss": 0.4738, + "step": 7936 + }, + { + "epoch": 0.4833297810796821, + "grad_norm": 1.0193175313572862, + "learning_rate": 4.922192037413757e-06, + "loss": 0.4547, + "step": 7937 + }, + { + "epoch": 0.48339067685656, + "grad_norm": 0.982600459038201, + "learning_rate": 4.922172283258536e-06, + "loss": 0.4339, + "step": 7938 + }, + { + "epoch": 0.48345157263343785, + "grad_norm": 0.9854948460403133, + "learning_rate": 4.922152526635656e-06, + "loss": 0.463, + "step": 7939 + }, + { + "epoch": 0.48351246841031575, + "grad_norm": 1.020066827575857, + "learning_rate": 4.922132767545138e-06, + "loss": 0.4729, + "step": 7940 + }, + { + "epoch": 0.4835733641871936, + "grad_norm": 0.9033682970621506, + "learning_rate": 4.922113005987001e-06, + "loss": 0.4624, + "step": 7941 + }, + { + "epoch": 0.4836342599640715, + "grad_norm": 0.9636125307591183, + "learning_rate": 4.9220932419612666e-06, + "loss": 0.4428, + "step": 7942 + }, + { + "epoch": 0.48369515574094935, + "grad_norm": 1.1197644814616747, + "learning_rate": 4.922073475467953e-06, + "loss": 0.4261, + "step": 7943 + }, + { + "epoch": 0.48375605151782725, + "grad_norm": 1.0247417378541759, + "learning_rate": 4.922053706507082e-06, + "loss": 0.523, + "step": 7944 + }, + { + "epoch": 0.4838169472947051, + "grad_norm": 0.9794658360629525, + "learning_rate": 4.922033935078672e-06, + "loss": 0.4265, + "step": 7945 + }, + { + "epoch": 0.483877843071583, + "grad_norm": 1.0347247697944977, + "learning_rate": 4.922014161182745e-06, + "loss": 0.4623, + "step": 7946 + }, + { + "epoch": 0.48393873884846084, + "grad_norm": 0.980059293928669, + "learning_rate": 4.92199438481932e-06, + "loss": 0.4277, + "step": 7947 + }, + { + "epoch": 0.48399963462533874, + "grad_norm": 0.975003791967864, + "learning_rate": 4.921974605988418e-06, + "loss": 0.3828, + "step": 7948 + }, + { + "epoch": 0.4840605304022166, + "grad_norm": 0.9351412627305169, + "learning_rate": 4.921954824690058e-06, + "loss": 0.4645, + "step": 7949 + }, + { + "epoch": 0.4841214261790945, + "grad_norm": 1.0823464672155862, + "learning_rate": 4.9219350409242615e-06, + "loss": 0.4899, + "step": 7950 + }, + { + "epoch": 0.48418232195597233, + "grad_norm": 0.9845235159648198, + "learning_rate": 4.9219152546910475e-06, + "loss": 0.4547, + "step": 7951 + }, + { + "epoch": 0.48424321773285023, + "grad_norm": 1.1136223173417257, + "learning_rate": 4.921895465990436e-06, + "loss": 0.4401, + "step": 7952 + }, + { + "epoch": 0.4843041135097281, + "grad_norm": 0.9742489774588703, + "learning_rate": 4.9218756748224486e-06, + "loss": 0.4628, + "step": 7953 + }, + { + "epoch": 0.484365009286606, + "grad_norm": 1.0309690689819015, + "learning_rate": 4.921855881187104e-06, + "loss": 0.4847, + "step": 7954 + }, + { + "epoch": 0.4844259050634839, + "grad_norm": 0.9568106475994315, + "learning_rate": 4.921836085084424e-06, + "loss": 0.4893, + "step": 7955 + }, + { + "epoch": 0.4844868008403617, + "grad_norm": 1.0237703878164413, + "learning_rate": 4.921816286514427e-06, + "loss": 0.4647, + "step": 7956 + }, + { + "epoch": 0.4845476966172396, + "grad_norm": 1.012699575184095, + "learning_rate": 4.9217964854771335e-06, + "loss": 0.4609, + "step": 7957 + }, + { + "epoch": 0.48460859239411747, + "grad_norm": 1.0540713888785154, + "learning_rate": 4.9217766819725645e-06, + "loss": 0.3774, + "step": 7958 + }, + { + "epoch": 0.48466948817099537, + "grad_norm": 1.146130131286072, + "learning_rate": 4.92175687600074e-06, + "loss": 0.421, + "step": 7959 + }, + { + "epoch": 0.4847303839478732, + "grad_norm": 0.94137690640435, + "learning_rate": 4.92173706756168e-06, + "loss": 0.4549, + "step": 7960 + }, + { + "epoch": 0.4847912797247511, + "grad_norm": 1.0637084758050934, + "learning_rate": 4.921717256655404e-06, + "loss": 0.4453, + "step": 7961 + }, + { + "epoch": 0.48485217550162896, + "grad_norm": 1.1546618636732184, + "learning_rate": 4.921697443281933e-06, + "loss": 0.4739, + "step": 7962 + }, + { + "epoch": 0.48491307127850686, + "grad_norm": 0.9792338277182128, + "learning_rate": 4.921677627441287e-06, + "loss": 0.4074, + "step": 7963 + }, + { + "epoch": 0.4849739670553847, + "grad_norm": 1.0206599422489484, + "learning_rate": 4.921657809133487e-06, + "loss": 0.4661, + "step": 7964 + }, + { + "epoch": 0.4850348628322626, + "grad_norm": 1.0144802303807454, + "learning_rate": 4.921637988358552e-06, + "loss": 0.4313, + "step": 7965 + }, + { + "epoch": 0.48509575860914045, + "grad_norm": 1.1039674176391412, + "learning_rate": 4.921618165116502e-06, + "loss": 0.4153, + "step": 7966 + }, + { + "epoch": 0.48515665438601835, + "grad_norm": 1.019893603968001, + "learning_rate": 4.921598339407359e-06, + "loss": 0.4533, + "step": 7967 + }, + { + "epoch": 0.4852175501628962, + "grad_norm": 0.9428209112300382, + "learning_rate": 4.92157851123114e-06, + "loss": 0.4846, + "step": 7968 + }, + { + "epoch": 0.4852784459397741, + "grad_norm": 0.9872696093579632, + "learning_rate": 4.9215586805878685e-06, + "loss": 0.4866, + "step": 7969 + }, + { + "epoch": 0.48533934171665194, + "grad_norm": 0.9417583852811761, + "learning_rate": 4.9215388474775625e-06, + "loss": 0.5044, + "step": 7970 + }, + { + "epoch": 0.48540023749352984, + "grad_norm": 1.141420263801147, + "learning_rate": 4.921519011900244e-06, + "loss": 0.3828, + "step": 7971 + }, + { + "epoch": 0.4854611332704077, + "grad_norm": 1.051340934903097, + "learning_rate": 4.921499173855932e-06, + "loss": 0.5194, + "step": 7972 + }, + { + "epoch": 0.4855220290472856, + "grad_norm": 0.9908905632877723, + "learning_rate": 4.921479333344647e-06, + "loss": 0.4342, + "step": 7973 + }, + { + "epoch": 0.48558292482416343, + "grad_norm": 1.0011673432853476, + "learning_rate": 4.921459490366409e-06, + "loss": 0.407, + "step": 7974 + }, + { + "epoch": 0.48564382060104133, + "grad_norm": 1.0951426784345395, + "learning_rate": 4.921439644921238e-06, + "loss": 0.4361, + "step": 7975 + }, + { + "epoch": 0.4857047163779192, + "grad_norm": 0.9488355706300599, + "learning_rate": 4.921419797009156e-06, + "loss": 0.481, + "step": 7976 + }, + { + "epoch": 0.4857656121547971, + "grad_norm": 1.0361587797132952, + "learning_rate": 4.921399946630181e-06, + "loss": 0.4131, + "step": 7977 + }, + { + "epoch": 0.4858265079316749, + "grad_norm": 1.0700163177716675, + "learning_rate": 4.9213800937843336e-06, + "loss": 0.4492, + "step": 7978 + }, + { + "epoch": 0.4858874037085528, + "grad_norm": 1.0210193020178382, + "learning_rate": 4.9213602384716355e-06, + "loss": 0.4565, + "step": 7979 + }, + { + "epoch": 0.48594829948543067, + "grad_norm": 0.9775229118105335, + "learning_rate": 4.921340380692105e-06, + "loss": 0.4373, + "step": 7980 + }, + { + "epoch": 0.48600919526230857, + "grad_norm": 1.055355479585573, + "learning_rate": 4.9213205204457635e-06, + "loss": 0.3894, + "step": 7981 + }, + { + "epoch": 0.4860700910391864, + "grad_norm": 0.9951700075618828, + "learning_rate": 4.921300657732632e-06, + "loss": 0.4398, + "step": 7982 + }, + { + "epoch": 0.4861309868160643, + "grad_norm": 1.0054379686572856, + "learning_rate": 4.921280792552728e-06, + "loss": 0.522, + "step": 7983 + }, + { + "epoch": 0.48619188259294216, + "grad_norm": 1.0807861038525592, + "learning_rate": 4.9212609249060745e-06, + "loss": 0.4038, + "step": 7984 + }, + { + "epoch": 0.48625277836982006, + "grad_norm": 1.0232286765378233, + "learning_rate": 4.92124105479269e-06, + "loss": 0.4687, + "step": 7985 + }, + { + "epoch": 0.4863136741466979, + "grad_norm": 1.013742788583379, + "learning_rate": 4.921221182212596e-06, + "loss": 0.472, + "step": 7986 + }, + { + "epoch": 0.4863745699235758, + "grad_norm": 1.0486032539845596, + "learning_rate": 4.921201307165813e-06, + "loss": 0.3924, + "step": 7987 + }, + { + "epoch": 0.48643546570045365, + "grad_norm": 1.0274657954816304, + "learning_rate": 4.92118142965236e-06, + "loss": 0.4823, + "step": 7988 + }, + { + "epoch": 0.48649636147733155, + "grad_norm": 1.056918451773835, + "learning_rate": 4.921161549672257e-06, + "loss": 0.48, + "step": 7989 + }, + { + "epoch": 0.4865572572542094, + "grad_norm": 1.062226007709204, + "learning_rate": 4.921141667225525e-06, + "loss": 0.4237, + "step": 7990 + }, + { + "epoch": 0.4866181530310873, + "grad_norm": 0.9846895147137211, + "learning_rate": 4.921121782312185e-06, + "loss": 0.4432, + "step": 7991 + }, + { + "epoch": 0.48667904880796514, + "grad_norm": 1.1226596774636786, + "learning_rate": 4.921101894932255e-06, + "loss": 0.4541, + "step": 7992 + }, + { + "epoch": 0.48673994458484304, + "grad_norm": 0.9629659979917229, + "learning_rate": 4.921082005085758e-06, + "loss": 0.4598, + "step": 7993 + }, + { + "epoch": 0.4868008403617209, + "grad_norm": 0.9164007033590652, + "learning_rate": 4.921062112772713e-06, + "loss": 0.4888, + "step": 7994 + }, + { + "epoch": 0.4868617361385988, + "grad_norm": 0.8941515397298405, + "learning_rate": 4.92104221799314e-06, + "loss": 0.551, + "step": 7995 + }, + { + "epoch": 0.4869226319154767, + "grad_norm": 1.0022750263006093, + "learning_rate": 4.92102232074706e-06, + "loss": 0.3735, + "step": 7996 + }, + { + "epoch": 0.48698352769235453, + "grad_norm": 1.0089537295625515, + "learning_rate": 4.921002421034492e-06, + "loss": 0.4162, + "step": 7997 + }, + { + "epoch": 0.48704442346923243, + "grad_norm": 1.057787796714543, + "learning_rate": 4.920982518855457e-06, + "loss": 0.3961, + "step": 7998 + }, + { + "epoch": 0.4871053192461103, + "grad_norm": 0.9977302033781488, + "learning_rate": 4.920962614209975e-06, + "loss": 0.4299, + "step": 7999 + }, + { + "epoch": 0.4871662150229882, + "grad_norm": 1.0020680499173515, + "learning_rate": 4.9209427070980676e-06, + "loss": 0.4731, + "step": 8000 + }, + { + "epoch": 0.487227110799866, + "grad_norm": 1.0709666632629997, + "learning_rate": 4.920922797519754e-06, + "loss": 0.4621, + "step": 8001 + }, + { + "epoch": 0.4872880065767439, + "grad_norm": 1.0080617782377896, + "learning_rate": 4.920902885475054e-06, + "loss": 0.5048, + "step": 8002 + }, + { + "epoch": 0.48734890235362177, + "grad_norm": 1.0112079367883446, + "learning_rate": 4.920882970963989e-06, + "loss": 0.5761, + "step": 8003 + }, + { + "epoch": 0.48740979813049967, + "grad_norm": 1.008416969047499, + "learning_rate": 4.9208630539865785e-06, + "loss": 0.4133, + "step": 8004 + }, + { + "epoch": 0.4874706939073775, + "grad_norm": 0.995478563914207, + "learning_rate": 4.9208431345428435e-06, + "loss": 0.4179, + "step": 8005 + }, + { + "epoch": 0.4875315896842554, + "grad_norm": 1.0003357637162318, + "learning_rate": 4.920823212632803e-06, + "loss": 0.4939, + "step": 8006 + }, + { + "epoch": 0.48759248546113326, + "grad_norm": 0.9835572417096737, + "learning_rate": 4.9208032882564795e-06, + "loss": 0.4581, + "step": 8007 + }, + { + "epoch": 0.48765338123801116, + "grad_norm": 1.0155853236204315, + "learning_rate": 4.920783361413891e-06, + "loss": 0.4896, + "step": 8008 + }, + { + "epoch": 0.487714277014889, + "grad_norm": 1.028835160197496, + "learning_rate": 4.920763432105059e-06, + "loss": 0.5508, + "step": 8009 + }, + { + "epoch": 0.4877751727917669, + "grad_norm": 1.0450611019841203, + "learning_rate": 4.920743500330003e-06, + "loss": 0.4605, + "step": 8010 + }, + { + "epoch": 0.48783606856864475, + "grad_norm": 0.9565060507582658, + "learning_rate": 4.920723566088744e-06, + "loss": 0.437, + "step": 8011 + }, + { + "epoch": 0.48789696434552265, + "grad_norm": 1.088734881646107, + "learning_rate": 4.9207036293813026e-06, + "loss": 0.4037, + "step": 8012 + }, + { + "epoch": 0.4879578601224005, + "grad_norm": 1.1322386613920252, + "learning_rate": 4.9206836902076985e-06, + "loss": 0.453, + "step": 8013 + }, + { + "epoch": 0.4880187558992784, + "grad_norm": 1.0494468425418582, + "learning_rate": 4.920663748567952e-06, + "loss": 0.4546, + "step": 8014 + }, + { + "epoch": 0.48807965167615625, + "grad_norm": 0.931701498021204, + "learning_rate": 4.9206438044620845e-06, + "loss": 0.4252, + "step": 8015 + }, + { + "epoch": 0.48814054745303415, + "grad_norm": 1.0429978544273575, + "learning_rate": 4.920623857890114e-06, + "loss": 0.4427, + "step": 8016 + }, + { + "epoch": 0.488201443229912, + "grad_norm": 0.9313364420329617, + "learning_rate": 4.920603908852063e-06, + "loss": 0.4496, + "step": 8017 + }, + { + "epoch": 0.4882623390067899, + "grad_norm": 1.0620471949480574, + "learning_rate": 4.920583957347951e-06, + "loss": 0.3813, + "step": 8018 + }, + { + "epoch": 0.48832323478366774, + "grad_norm": 1.038858768194379, + "learning_rate": 4.920564003377799e-06, + "loss": 0.4627, + "step": 8019 + }, + { + "epoch": 0.48838413056054564, + "grad_norm": 1.0220912629193024, + "learning_rate": 4.920544046941626e-06, + "loss": 0.5205, + "step": 8020 + }, + { + "epoch": 0.4884450263374235, + "grad_norm": 0.9875015018540917, + "learning_rate": 4.920524088039452e-06, + "loss": 0.4526, + "step": 8021 + }, + { + "epoch": 0.4885059221143014, + "grad_norm": 1.0379279305164748, + "learning_rate": 4.9205041266713e-06, + "loss": 0.4752, + "step": 8022 + }, + { + "epoch": 0.48856681789117923, + "grad_norm": 0.9365778429382883, + "learning_rate": 4.920484162837188e-06, + "loss": 0.4747, + "step": 8023 + }, + { + "epoch": 0.48862771366805713, + "grad_norm": 0.9571074726097896, + "learning_rate": 4.920464196537138e-06, + "loss": 0.4742, + "step": 8024 + }, + { + "epoch": 0.488688609444935, + "grad_norm": 1.0347596720019425, + "learning_rate": 4.920444227771168e-06, + "loss": 0.5049, + "step": 8025 + }, + { + "epoch": 0.4887495052218129, + "grad_norm": 1.0654210190093547, + "learning_rate": 4.920424256539301e-06, + "loss": 0.4631, + "step": 8026 + }, + { + "epoch": 0.4888104009986907, + "grad_norm": 0.9159655943941315, + "learning_rate": 4.920404282841556e-06, + "loss": 0.4714, + "step": 8027 + }, + { + "epoch": 0.4888712967755686, + "grad_norm": 0.9985151889739446, + "learning_rate": 4.9203843066779524e-06, + "loss": 0.4396, + "step": 8028 + }, + { + "epoch": 0.48893219255244647, + "grad_norm": 0.9441608371001387, + "learning_rate": 4.920364328048512e-06, + "loss": 0.4671, + "step": 8029 + }, + { + "epoch": 0.48899308832932437, + "grad_norm": 0.9327922066741289, + "learning_rate": 4.920344346953255e-06, + "loss": 0.4462, + "step": 8030 + }, + { + "epoch": 0.4890539841062022, + "grad_norm": 0.9859026724634845, + "learning_rate": 4.920324363392201e-06, + "loss": 0.4854, + "step": 8031 + }, + { + "epoch": 0.4891148798830801, + "grad_norm": 1.016732243714942, + "learning_rate": 4.920304377365371e-06, + "loss": 0.4327, + "step": 8032 + }, + { + "epoch": 0.48917577565995796, + "grad_norm": 0.9280353992351276, + "learning_rate": 4.920284388872786e-06, + "loss": 0.4894, + "step": 8033 + }, + { + "epoch": 0.48923667143683586, + "grad_norm": 1.049832329509541, + "learning_rate": 4.920264397914465e-06, + "loss": 0.4702, + "step": 8034 + }, + { + "epoch": 0.4892975672137137, + "grad_norm": 1.0038479692032838, + "learning_rate": 4.920244404490429e-06, + "loss": 0.3902, + "step": 8035 + }, + { + "epoch": 0.4893584629905916, + "grad_norm": 1.1355634063794144, + "learning_rate": 4.920224408600698e-06, + "loss": 0.3989, + "step": 8036 + }, + { + "epoch": 0.4894193587674695, + "grad_norm": 1.1194513614812003, + "learning_rate": 4.920204410245294e-06, + "loss": 0.4502, + "step": 8037 + }, + { + "epoch": 0.48948025454434735, + "grad_norm": 0.9565423245759755, + "learning_rate": 4.920184409424234e-06, + "loss": 0.4429, + "step": 8038 + }, + { + "epoch": 0.48954115032122525, + "grad_norm": 0.9160655407848018, + "learning_rate": 4.920164406137542e-06, + "loss": 0.4506, + "step": 8039 + }, + { + "epoch": 0.4896020460981031, + "grad_norm": 1.08957844015387, + "learning_rate": 4.920144400385236e-06, + "loss": 0.4261, + "step": 8040 + }, + { + "epoch": 0.489662941874981, + "grad_norm": 1.0861452112358896, + "learning_rate": 4.920124392167338e-06, + "loss": 0.3868, + "step": 8041 + }, + { + "epoch": 0.48972383765185884, + "grad_norm": 1.0264672085058304, + "learning_rate": 4.920104381483867e-06, + "loss": 0.4727, + "step": 8042 + }, + { + "epoch": 0.48978473342873674, + "grad_norm": 0.9026135435420843, + "learning_rate": 4.9200843683348446e-06, + "loss": 0.503, + "step": 8043 + }, + { + "epoch": 0.4898456292056146, + "grad_norm": 1.0385045836468512, + "learning_rate": 4.92006435272029e-06, + "loss": 0.434, + "step": 8044 + }, + { + "epoch": 0.4899065249824925, + "grad_norm": 1.074960486874617, + "learning_rate": 4.9200443346402235e-06, + "loss": 0.4677, + "step": 8045 + }, + { + "epoch": 0.48996742075937033, + "grad_norm": 1.1396488737696826, + "learning_rate": 4.920024314094667e-06, + "loss": 0.4076, + "step": 8046 + }, + { + "epoch": 0.49002831653624823, + "grad_norm": 1.0087960716999669, + "learning_rate": 4.92000429108364e-06, + "loss": 0.4715, + "step": 8047 + }, + { + "epoch": 0.4900892123131261, + "grad_norm": 1.0804118681311017, + "learning_rate": 4.919984265607163e-06, + "loss": 0.4288, + "step": 8048 + }, + { + "epoch": 0.490150108090004, + "grad_norm": 1.0907159838100127, + "learning_rate": 4.9199642376652555e-06, + "loss": 0.396, + "step": 8049 + }, + { + "epoch": 0.4902110038668818, + "grad_norm": 1.004740332843962, + "learning_rate": 4.919944207257939e-06, + "loss": 0.4962, + "step": 8050 + }, + { + "epoch": 0.4902718996437597, + "grad_norm": 1.0138651237532124, + "learning_rate": 4.919924174385235e-06, + "loss": 0.4034, + "step": 8051 + }, + { + "epoch": 0.49033279542063757, + "grad_norm": 0.9694637976437604, + "learning_rate": 4.919904139047161e-06, + "loss": 0.4605, + "step": 8052 + }, + { + "epoch": 0.49039369119751547, + "grad_norm": 1.034784180853784, + "learning_rate": 4.9198841012437404e-06, + "loss": 0.3999, + "step": 8053 + }, + { + "epoch": 0.4904545869743933, + "grad_norm": 0.984529937508583, + "learning_rate": 4.9198640609749914e-06, + "loss": 0.4564, + "step": 8054 + }, + { + "epoch": 0.4905154827512712, + "grad_norm": 0.9594560793883391, + "learning_rate": 4.919844018240934e-06, + "loss": 0.4697, + "step": 8055 + }, + { + "epoch": 0.49057637852814906, + "grad_norm": 1.0413165533246962, + "learning_rate": 4.919823973041592e-06, + "loss": 0.4786, + "step": 8056 + }, + { + "epoch": 0.49063727430502696, + "grad_norm": 1.013063647999685, + "learning_rate": 4.919803925376983e-06, + "loss": 0.486, + "step": 8057 + }, + { + "epoch": 0.4906981700819048, + "grad_norm": 1.0079955392914566, + "learning_rate": 4.919783875247127e-06, + "loss": 0.4332, + "step": 8058 + }, + { + "epoch": 0.4907590658587827, + "grad_norm": 1.0180684053629159, + "learning_rate": 4.919763822652047e-06, + "loss": 0.45, + "step": 8059 + }, + { + "epoch": 0.49081996163566055, + "grad_norm": 1.0017973175459582, + "learning_rate": 4.919743767591761e-06, + "loss": 0.4243, + "step": 8060 + }, + { + "epoch": 0.49088085741253845, + "grad_norm": 1.0970847601040552, + "learning_rate": 4.91972371006629e-06, + "loss": 0.3721, + "step": 8061 + }, + { + "epoch": 0.4909417531894163, + "grad_norm": 1.084820527115071, + "learning_rate": 4.919703650075655e-06, + "loss": 0.4333, + "step": 8062 + }, + { + "epoch": 0.4910026489662942, + "grad_norm": 1.0546884248865018, + "learning_rate": 4.919683587619877e-06, + "loss": 0.4134, + "step": 8063 + }, + { + "epoch": 0.49106354474317204, + "grad_norm": 0.9535205026260998, + "learning_rate": 4.919663522698975e-06, + "loss": 0.4485, + "step": 8064 + }, + { + "epoch": 0.49112444052004994, + "grad_norm": 0.9655064828575778, + "learning_rate": 4.919643455312971e-06, + "loss": 0.4708, + "step": 8065 + }, + { + "epoch": 0.4911853362969278, + "grad_norm": 1.0159595213059351, + "learning_rate": 4.9196233854618836e-06, + "loss": 0.4662, + "step": 8066 + }, + { + "epoch": 0.4912462320738057, + "grad_norm": 1.0238250873934178, + "learning_rate": 4.9196033131457345e-06, + "loss": 0.4473, + "step": 8067 + }, + { + "epoch": 0.49130712785068353, + "grad_norm": 1.0508095589744568, + "learning_rate": 4.919583238364543e-06, + "loss": 0.4326, + "step": 8068 + }, + { + "epoch": 0.49136802362756143, + "grad_norm": 0.9602103110086964, + "learning_rate": 4.919563161118332e-06, + "loss": 0.4399, + "step": 8069 + }, + { + "epoch": 0.4914289194044393, + "grad_norm": 1.084631688693081, + "learning_rate": 4.91954308140712e-06, + "loss": 0.4614, + "step": 8070 + }, + { + "epoch": 0.4914898151813172, + "grad_norm": 1.0137383357942342, + "learning_rate": 4.919522999230927e-06, + "loss": 0.3403, + "step": 8071 + }, + { + "epoch": 0.491550710958195, + "grad_norm": 1.112084809382153, + "learning_rate": 4.919502914589774e-06, + "loss": 0.5026, + "step": 8072 + }, + { + "epoch": 0.4916116067350729, + "grad_norm": 1.0375613155320973, + "learning_rate": 4.9194828274836835e-06, + "loss": 0.4953, + "step": 8073 + }, + { + "epoch": 0.49167250251195077, + "grad_norm": 1.0408901959003212, + "learning_rate": 4.919462737912673e-06, + "loss": 0.4469, + "step": 8074 + }, + { + "epoch": 0.49173339828882867, + "grad_norm": 0.9748469525468232, + "learning_rate": 4.919442645876764e-06, + "loss": 0.4996, + "step": 8075 + }, + { + "epoch": 0.4917942940657065, + "grad_norm": 0.9684985926461172, + "learning_rate": 4.919422551375978e-06, + "loss": 0.4577, + "step": 8076 + }, + { + "epoch": 0.4918551898425844, + "grad_norm": 0.9724524290113563, + "learning_rate": 4.919402454410334e-06, + "loss": 0.4783, + "step": 8077 + }, + { + "epoch": 0.4919160856194623, + "grad_norm": 1.089651621742167, + "learning_rate": 4.919382354979853e-06, + "loss": 0.4295, + "step": 8078 + }, + { + "epoch": 0.49197698139634016, + "grad_norm": 1.089079732592264, + "learning_rate": 4.919362253084555e-06, + "loss": 0.4471, + "step": 8079 + }, + { + "epoch": 0.49203787717321806, + "grad_norm": 0.9920573749595991, + "learning_rate": 4.919342148724462e-06, + "loss": 0.4551, + "step": 8080 + }, + { + "epoch": 0.4920987729500959, + "grad_norm": 0.946541720536682, + "learning_rate": 4.919322041899593e-06, + "loss": 0.4318, + "step": 8081 + }, + { + "epoch": 0.4921596687269738, + "grad_norm": 1.0659633141740823, + "learning_rate": 4.919301932609969e-06, + "loss": 0.4152, + "step": 8082 + }, + { + "epoch": 0.49222056450385165, + "grad_norm": 1.0826836250575225, + "learning_rate": 4.919281820855611e-06, + "loss": 0.4343, + "step": 8083 + }, + { + "epoch": 0.49228146028072955, + "grad_norm": 1.0524243974760075, + "learning_rate": 4.919261706636539e-06, + "loss": 0.4758, + "step": 8084 + }, + { + "epoch": 0.4923423560576074, + "grad_norm": 0.9896917227028547, + "learning_rate": 4.9192415899527726e-06, + "loss": 0.4914, + "step": 8085 + }, + { + "epoch": 0.4924032518344853, + "grad_norm": 1.067385802454045, + "learning_rate": 4.919221470804334e-06, + "loss": 0.5249, + "step": 8086 + }, + { + "epoch": 0.49246414761136315, + "grad_norm": 0.9741054559193126, + "learning_rate": 4.919201349191242e-06, + "loss": 0.4697, + "step": 8087 + }, + { + "epoch": 0.49252504338824105, + "grad_norm": 1.0082826218215133, + "learning_rate": 4.919181225113519e-06, + "loss": 0.4116, + "step": 8088 + }, + { + "epoch": 0.4925859391651189, + "grad_norm": 1.0417836051219382, + "learning_rate": 4.9191610985711835e-06, + "loss": 0.4345, + "step": 8089 + }, + { + "epoch": 0.4926468349419968, + "grad_norm": 0.9843690676505404, + "learning_rate": 4.919140969564258e-06, + "loss": 0.4422, + "step": 8090 + }, + { + "epoch": 0.49270773071887464, + "grad_norm": 1.0415321701925813, + "learning_rate": 4.919120838092762e-06, + "loss": 0.3559, + "step": 8091 + }, + { + "epoch": 0.49276862649575254, + "grad_norm": 1.0027228944425317, + "learning_rate": 4.919100704156715e-06, + "loss": 0.4646, + "step": 8092 + }, + { + "epoch": 0.4928295222726304, + "grad_norm": 0.9877650370402005, + "learning_rate": 4.919080567756138e-06, + "loss": 0.4123, + "step": 8093 + }, + { + "epoch": 0.4928904180495083, + "grad_norm": 1.0174534841170109, + "learning_rate": 4.919060428891053e-06, + "loss": 0.4202, + "step": 8094 + }, + { + "epoch": 0.49295131382638613, + "grad_norm": 1.0617537433375441, + "learning_rate": 4.9190402875614795e-06, + "loss": 0.4106, + "step": 8095 + }, + { + "epoch": 0.49301220960326403, + "grad_norm": 1.0366636543083951, + "learning_rate": 4.919020143767439e-06, + "loss": 0.4621, + "step": 8096 + }, + { + "epoch": 0.4930731053801419, + "grad_norm": 1.0017159616171059, + "learning_rate": 4.91899999750895e-06, + "loss": 0.4428, + "step": 8097 + }, + { + "epoch": 0.4931340011570198, + "grad_norm": 0.9884960148010516, + "learning_rate": 4.9189798487860334e-06, + "loss": 0.5088, + "step": 8098 + }, + { + "epoch": 0.4931948969338976, + "grad_norm": 1.0289836124844878, + "learning_rate": 4.918959697598711e-06, + "loss": 0.483, + "step": 8099 + }, + { + "epoch": 0.4932557927107755, + "grad_norm": 1.1156220007915405, + "learning_rate": 4.918939543947004e-06, + "loss": 0.4546, + "step": 8100 + }, + { + "epoch": 0.49331668848765337, + "grad_norm": 1.0504200230737526, + "learning_rate": 4.91891938783093e-06, + "loss": 0.4777, + "step": 8101 + }, + { + "epoch": 0.49337758426453127, + "grad_norm": 1.0014002158327706, + "learning_rate": 4.918899229250513e-06, + "loss": 0.4287, + "step": 8102 + }, + { + "epoch": 0.4934384800414091, + "grad_norm": 0.9949501430201955, + "learning_rate": 4.91887906820577e-06, + "loss": 0.4559, + "step": 8103 + }, + { + "epoch": 0.493499375818287, + "grad_norm": 0.9963877649177929, + "learning_rate": 4.918858904696723e-06, + "loss": 0.4233, + "step": 8104 + }, + { + "epoch": 0.49356027159516486, + "grad_norm": 0.8991892506626378, + "learning_rate": 4.918838738723394e-06, + "loss": 0.5124, + "step": 8105 + }, + { + "epoch": 0.49362116737204276, + "grad_norm": 1.006919810493341, + "learning_rate": 4.918818570285802e-06, + "loss": 0.4498, + "step": 8106 + }, + { + "epoch": 0.4936820631489206, + "grad_norm": 1.0105523869628354, + "learning_rate": 4.9187983993839685e-06, + "loss": 0.398, + "step": 8107 + }, + { + "epoch": 0.4937429589257985, + "grad_norm": 1.095360357612358, + "learning_rate": 4.918778226017913e-06, + "loss": 0.3874, + "step": 8108 + }, + { + "epoch": 0.49380385470267635, + "grad_norm": 0.9735703889704923, + "learning_rate": 4.918758050187656e-06, + "loss": 0.4086, + "step": 8109 + }, + { + "epoch": 0.49386475047955425, + "grad_norm": 0.9714167635014171, + "learning_rate": 4.918737871893219e-06, + "loss": 0.4421, + "step": 8110 + }, + { + "epoch": 0.4939256462564321, + "grad_norm": 1.006417389422371, + "learning_rate": 4.918717691134622e-06, + "loss": 0.4604, + "step": 8111 + }, + { + "epoch": 0.49398654203331, + "grad_norm": 0.9071787944093037, + "learning_rate": 4.918697507911887e-06, + "loss": 0.4403, + "step": 8112 + }, + { + "epoch": 0.49404743781018784, + "grad_norm": 1.0723694785000542, + "learning_rate": 4.918677322225032e-06, + "loss": 0.429, + "step": 8113 + }, + { + "epoch": 0.49410833358706574, + "grad_norm": 1.0871159414746923, + "learning_rate": 4.918657134074079e-06, + "loss": 0.5409, + "step": 8114 + }, + { + "epoch": 0.4941692293639436, + "grad_norm": 0.9941480989079814, + "learning_rate": 4.918636943459048e-06, + "loss": 0.4448, + "step": 8115 + }, + { + "epoch": 0.4942301251408215, + "grad_norm": 0.9071400587030436, + "learning_rate": 4.918616750379961e-06, + "loss": 0.4548, + "step": 8116 + }, + { + "epoch": 0.49429102091769933, + "grad_norm": 0.9915316150366994, + "learning_rate": 4.918596554836837e-06, + "loss": 0.4504, + "step": 8117 + }, + { + "epoch": 0.49435191669457723, + "grad_norm": 0.9958739855011819, + "learning_rate": 4.9185763568296976e-06, + "loss": 0.4852, + "step": 8118 + }, + { + "epoch": 0.49441281247145513, + "grad_norm": 1.004390416669053, + "learning_rate": 4.918556156358561e-06, + "loss": 0.5132, + "step": 8119 + }, + { + "epoch": 0.494473708248333, + "grad_norm": 0.9568226733259297, + "learning_rate": 4.918535953423452e-06, + "loss": 0.4257, + "step": 8120 + }, + { + "epoch": 0.4945346040252109, + "grad_norm": 1.0472644042802433, + "learning_rate": 4.918515748024388e-06, + "loss": 0.4567, + "step": 8121 + }, + { + "epoch": 0.4945954998020887, + "grad_norm": 1.0600347806100552, + "learning_rate": 4.91849554016139e-06, + "loss": 0.4704, + "step": 8122 + }, + { + "epoch": 0.4946563955789666, + "grad_norm": 1.0247981039141725, + "learning_rate": 4.918475329834479e-06, + "loss": 0.4496, + "step": 8123 + }, + { + "epoch": 0.49471729135584447, + "grad_norm": 0.9868859029251281, + "learning_rate": 4.9184551170436765e-06, + "loss": 0.4985, + "step": 8124 + }, + { + "epoch": 0.49477818713272237, + "grad_norm": 1.0961287814016782, + "learning_rate": 4.918434901789002e-06, + "loss": 0.4002, + "step": 8125 + }, + { + "epoch": 0.4948390829096002, + "grad_norm": 0.9537477151440101, + "learning_rate": 4.918414684070476e-06, + "loss": 0.4924, + "step": 8126 + }, + { + "epoch": 0.4948999786864781, + "grad_norm": 0.9270651620884479, + "learning_rate": 4.918394463888119e-06, + "loss": 0.4482, + "step": 8127 + }, + { + "epoch": 0.49496087446335596, + "grad_norm": 1.0046207622390604, + "learning_rate": 4.918374241241953e-06, + "loss": 0.4242, + "step": 8128 + }, + { + "epoch": 0.49502177024023386, + "grad_norm": 1.0630493193830604, + "learning_rate": 4.918354016131997e-06, + "loss": 0.4321, + "step": 8129 + }, + { + "epoch": 0.4950826660171117, + "grad_norm": 0.9818218296382605, + "learning_rate": 4.918333788558272e-06, + "loss": 0.4433, + "step": 8130 + }, + { + "epoch": 0.4951435617939896, + "grad_norm": 0.9682117054559934, + "learning_rate": 4.9183135585207985e-06, + "loss": 0.4565, + "step": 8131 + }, + { + "epoch": 0.49520445757086745, + "grad_norm": 1.0063793473447191, + "learning_rate": 4.918293326019598e-06, + "loss": 0.4598, + "step": 8132 + }, + { + "epoch": 0.49526535334774535, + "grad_norm": 0.9672511209836169, + "learning_rate": 4.91827309105469e-06, + "loss": 0.4993, + "step": 8133 + }, + { + "epoch": 0.4953262491246232, + "grad_norm": 1.0502608466022711, + "learning_rate": 4.918252853626097e-06, + "loss": 0.5022, + "step": 8134 + }, + { + "epoch": 0.4953871449015011, + "grad_norm": 1.036154425598418, + "learning_rate": 4.918232613733837e-06, + "loss": 0.4627, + "step": 8135 + }, + { + "epoch": 0.49544804067837894, + "grad_norm": 1.0233377861058675, + "learning_rate": 4.918212371377933e-06, + "loss": 0.42, + "step": 8136 + }, + { + "epoch": 0.49550893645525684, + "grad_norm": 1.1578974463534433, + "learning_rate": 4.918192126558403e-06, + "loss": 0.3813, + "step": 8137 + }, + { + "epoch": 0.4955698322321347, + "grad_norm": 0.9768691011506934, + "learning_rate": 4.918171879275271e-06, + "loss": 0.4481, + "step": 8138 + }, + { + "epoch": 0.4956307280090126, + "grad_norm": 1.0222166124137368, + "learning_rate": 4.918151629528554e-06, + "loss": 0.479, + "step": 8139 + }, + { + "epoch": 0.49569162378589043, + "grad_norm": 1.0486015114532554, + "learning_rate": 4.918131377318275e-06, + "loss": 0.5014, + "step": 8140 + }, + { + "epoch": 0.49575251956276833, + "grad_norm": 1.083065564536226, + "learning_rate": 4.918111122644455e-06, + "loss": 0.495, + "step": 8141 + }, + { + "epoch": 0.4958134153396462, + "grad_norm": 0.9658712785337827, + "learning_rate": 4.9180908655071125e-06, + "loss": 0.4671, + "step": 8142 + }, + { + "epoch": 0.4958743111165241, + "grad_norm": 0.9586686270142962, + "learning_rate": 4.91807060590627e-06, + "loss": 0.4345, + "step": 8143 + }, + { + "epoch": 0.4959352068934019, + "grad_norm": 0.9636944709875712, + "learning_rate": 4.918050343841946e-06, + "loss": 0.3878, + "step": 8144 + }, + { + "epoch": 0.4959961026702798, + "grad_norm": 0.9479035699755927, + "learning_rate": 4.918030079314164e-06, + "loss": 0.5078, + "step": 8145 + }, + { + "epoch": 0.49605699844715767, + "grad_norm": 1.041170738275936, + "learning_rate": 4.918009812322942e-06, + "loss": 0.4046, + "step": 8146 + }, + { + "epoch": 0.49611789422403557, + "grad_norm": 1.0311371362680355, + "learning_rate": 4.917989542868303e-06, + "loss": 0.5028, + "step": 8147 + }, + { + "epoch": 0.4961787900009134, + "grad_norm": 1.1122042232241365, + "learning_rate": 4.917969270950267e-06, + "loss": 0.4345, + "step": 8148 + }, + { + "epoch": 0.4962396857777913, + "grad_norm": 0.9773813606675987, + "learning_rate": 4.917948996568853e-06, + "loss": 0.4218, + "step": 8149 + }, + { + "epoch": 0.49630058155466916, + "grad_norm": 1.0576642782837606, + "learning_rate": 4.917928719724083e-06, + "loss": 0.3931, + "step": 8150 + }, + { + "epoch": 0.49636147733154706, + "grad_norm": 1.074174695441281, + "learning_rate": 4.917908440415978e-06, + "loss": 0.4624, + "step": 8151 + }, + { + "epoch": 0.4964223731084249, + "grad_norm": 1.093246308757962, + "learning_rate": 4.917888158644558e-06, + "loss": 0.4661, + "step": 8152 + }, + { + "epoch": 0.4964832688853028, + "grad_norm": 1.0829162427095747, + "learning_rate": 4.917867874409844e-06, + "loss": 0.3768, + "step": 8153 + }, + { + "epoch": 0.49654416466218065, + "grad_norm": 1.0070691378189665, + "learning_rate": 4.9178475877118555e-06, + "loss": 0.464, + "step": 8154 + }, + { + "epoch": 0.49660506043905855, + "grad_norm": 1.058318008237389, + "learning_rate": 4.917827298550616e-06, + "loss": 0.4267, + "step": 8155 + }, + { + "epoch": 0.4966659562159364, + "grad_norm": 0.9544272375538142, + "learning_rate": 4.917807006926142e-06, + "loss": 0.4709, + "step": 8156 + }, + { + "epoch": 0.4967268519928143, + "grad_norm": 0.9673105475254751, + "learning_rate": 4.917786712838458e-06, + "loss": 0.4533, + "step": 8157 + }, + { + "epoch": 0.49678774776969215, + "grad_norm": 0.9828223951159409, + "learning_rate": 4.917766416287583e-06, + "loss": 0.4132, + "step": 8158 + }, + { + "epoch": 0.49684864354657005, + "grad_norm": 0.9169751641210473, + "learning_rate": 4.917746117273537e-06, + "loss": 0.4081, + "step": 8159 + }, + { + "epoch": 0.49690953932344795, + "grad_norm": 0.9970867319160535, + "learning_rate": 4.917725815796343e-06, + "loss": 0.4079, + "step": 8160 + }, + { + "epoch": 0.4969704351003258, + "grad_norm": 1.090067192557687, + "learning_rate": 4.917705511856019e-06, + "loss": 0.3956, + "step": 8161 + }, + { + "epoch": 0.4970313308772037, + "grad_norm": 1.0189859434240494, + "learning_rate": 4.9176852054525874e-06, + "loss": 0.4442, + "step": 8162 + }, + { + "epoch": 0.49709222665408154, + "grad_norm": 1.0835303949174442, + "learning_rate": 4.917664896586068e-06, + "loss": 0.4399, + "step": 8163 + }, + { + "epoch": 0.49715312243095944, + "grad_norm": 1.0590645815824062, + "learning_rate": 4.917644585256483e-06, + "loss": 0.4457, + "step": 8164 + }, + { + "epoch": 0.4972140182078373, + "grad_norm": 1.0462459021004604, + "learning_rate": 4.91762427146385e-06, + "loss": 0.4732, + "step": 8165 + }, + { + "epoch": 0.4972749139847152, + "grad_norm": 1.0165255182493564, + "learning_rate": 4.917603955208193e-06, + "loss": 0.4655, + "step": 8166 + }, + { + "epoch": 0.49733580976159303, + "grad_norm": 1.0217482896945707, + "learning_rate": 4.917583636489531e-06, + "loss": 0.415, + "step": 8167 + }, + { + "epoch": 0.49739670553847093, + "grad_norm": 1.0160453796250852, + "learning_rate": 4.917563315307886e-06, + "loss": 0.4078, + "step": 8168 + }, + { + "epoch": 0.4974576013153488, + "grad_norm": 1.0034090458548997, + "learning_rate": 4.9175429916632765e-06, + "loss": 0.4685, + "step": 8169 + }, + { + "epoch": 0.4975184970922267, + "grad_norm": 0.9420605582108704, + "learning_rate": 4.917522665555725e-06, + "loss": 0.4316, + "step": 8170 + }, + { + "epoch": 0.4975793928691045, + "grad_norm": 1.0526562349653095, + "learning_rate": 4.917502336985252e-06, + "loss": 0.4594, + "step": 8171 + }, + { + "epoch": 0.4976402886459824, + "grad_norm": 1.0129020893325242, + "learning_rate": 4.917482005951877e-06, + "loss": 0.3971, + "step": 8172 + }, + { + "epoch": 0.49770118442286027, + "grad_norm": 0.9672689563154542, + "learning_rate": 4.917461672455621e-06, + "loss": 0.4365, + "step": 8173 + }, + { + "epoch": 0.49776208019973817, + "grad_norm": 0.971674287864659, + "learning_rate": 4.917441336496507e-06, + "loss": 0.4402, + "step": 8174 + }, + { + "epoch": 0.497822975976616, + "grad_norm": 1.0233035933510897, + "learning_rate": 4.917420998074553e-06, + "loss": 0.4579, + "step": 8175 + }, + { + "epoch": 0.4978838717534939, + "grad_norm": 1.0094057829336482, + "learning_rate": 4.917400657189782e-06, + "loss": 0.4694, + "step": 8176 + }, + { + "epoch": 0.49794476753037176, + "grad_norm": 0.9620899579678347, + "learning_rate": 4.917380313842211e-06, + "loss": 0.4536, + "step": 8177 + }, + { + "epoch": 0.49800566330724966, + "grad_norm": 1.1018083141226889, + "learning_rate": 4.9173599680318656e-06, + "loss": 0.3739, + "step": 8178 + }, + { + "epoch": 0.4980665590841275, + "grad_norm": 0.9449979623939699, + "learning_rate": 4.917339619758763e-06, + "loss": 0.4461, + "step": 8179 + }, + { + "epoch": 0.4981274548610054, + "grad_norm": 1.1063965958529518, + "learning_rate": 4.917319269022926e-06, + "loss": 0.3829, + "step": 8180 + }, + { + "epoch": 0.49818835063788325, + "grad_norm": 1.04554551795822, + "learning_rate": 4.917298915824373e-06, + "loss": 0.4151, + "step": 8181 + }, + { + "epoch": 0.49824924641476115, + "grad_norm": 0.984836427139911, + "learning_rate": 4.917278560163127e-06, + "loss": 0.4118, + "step": 8182 + }, + { + "epoch": 0.498310142191639, + "grad_norm": 1.0127643156207746, + "learning_rate": 4.917258202039208e-06, + "loss": 0.4529, + "step": 8183 + }, + { + "epoch": 0.4983710379685169, + "grad_norm": 0.9841168354639265, + "learning_rate": 4.917237841452636e-06, + "loss": 0.4692, + "step": 8184 + }, + { + "epoch": 0.49843193374539474, + "grad_norm": 1.0990110245599027, + "learning_rate": 4.917217478403433e-06, + "loss": 0.4236, + "step": 8185 + }, + { + "epoch": 0.49849282952227264, + "grad_norm": 1.0343184279129185, + "learning_rate": 4.917197112891619e-06, + "loss": 0.4334, + "step": 8186 + }, + { + "epoch": 0.4985537252991505, + "grad_norm": 0.9594152447720348, + "learning_rate": 4.9171767449172135e-06, + "loss": 0.4733, + "step": 8187 + }, + { + "epoch": 0.4986146210760284, + "grad_norm": 0.9689518930331954, + "learning_rate": 4.91715637448024e-06, + "loss": 0.456, + "step": 8188 + }, + { + "epoch": 0.49867551685290623, + "grad_norm": 0.950243010042346, + "learning_rate": 4.917136001580718e-06, + "loss": 0.4518, + "step": 8189 + }, + { + "epoch": 0.49873641262978413, + "grad_norm": 1.1122961928059587, + "learning_rate": 4.917115626218667e-06, + "loss": 0.4422, + "step": 8190 + }, + { + "epoch": 0.498797308406662, + "grad_norm": 0.944733457422201, + "learning_rate": 4.91709524839411e-06, + "loss": 0.4939, + "step": 8191 + }, + { + "epoch": 0.4988582041835399, + "grad_norm": 1.119157341153504, + "learning_rate": 4.917074868107066e-06, + "loss": 0.4298, + "step": 8192 + }, + { + "epoch": 0.4989190999604177, + "grad_norm": 1.009924609283643, + "learning_rate": 4.917054485357556e-06, + "loss": 0.4431, + "step": 8193 + }, + { + "epoch": 0.4989799957372956, + "grad_norm": 1.006448993308707, + "learning_rate": 4.917034100145602e-06, + "loss": 0.4411, + "step": 8194 + }, + { + "epoch": 0.49904089151417347, + "grad_norm": 0.9907666183702191, + "learning_rate": 4.917013712471223e-06, + "loss": 0.4486, + "step": 8195 + }, + { + "epoch": 0.49910178729105137, + "grad_norm": 1.034966824030098, + "learning_rate": 4.9169933223344414e-06, + "loss": 0.4532, + "step": 8196 + }, + { + "epoch": 0.4991626830679292, + "grad_norm": 0.9579517025431222, + "learning_rate": 4.916972929735277e-06, + "loss": 0.478, + "step": 8197 + }, + { + "epoch": 0.4992235788448071, + "grad_norm": 1.0961163345691023, + "learning_rate": 4.916952534673751e-06, + "loss": 0.4506, + "step": 8198 + }, + { + "epoch": 0.49928447462168496, + "grad_norm": 0.9385516994565956, + "learning_rate": 4.916932137149884e-06, + "loss": 0.4537, + "step": 8199 + }, + { + "epoch": 0.49934537039856286, + "grad_norm": 1.0775490835277353, + "learning_rate": 4.916911737163697e-06, + "loss": 0.4721, + "step": 8200 + }, + { + "epoch": 0.49940626617544076, + "grad_norm": 1.1363332661261607, + "learning_rate": 4.916891334715209e-06, + "loss": 0.4869, + "step": 8201 + }, + { + "epoch": 0.4994671619523186, + "grad_norm": 1.007338522020527, + "learning_rate": 4.9168709298044435e-06, + "loss": 0.3878, + "step": 8202 + }, + { + "epoch": 0.4995280577291965, + "grad_norm": 1.0444620808301754, + "learning_rate": 4.916850522431421e-06, + "loss": 0.4575, + "step": 8203 + }, + { + "epoch": 0.49958895350607435, + "grad_norm": 1.1001685314112895, + "learning_rate": 4.9168301125961605e-06, + "loss": 0.4805, + "step": 8204 + }, + { + "epoch": 0.49964984928295225, + "grad_norm": 1.0739547325909828, + "learning_rate": 4.916809700298683e-06, + "loss": 0.4642, + "step": 8205 + }, + { + "epoch": 0.4997107450598301, + "grad_norm": 0.9829807041441971, + "learning_rate": 4.916789285539012e-06, + "loss": 0.5093, + "step": 8206 + }, + { + "epoch": 0.499771640836708, + "grad_norm": 0.9978110054093994, + "learning_rate": 4.916768868317165e-06, + "loss": 0.3949, + "step": 8207 + }, + { + "epoch": 0.49983253661358584, + "grad_norm": 0.9455366354666822, + "learning_rate": 4.916748448633164e-06, + "loss": 0.439, + "step": 8208 + }, + { + "epoch": 0.49989343239046374, + "grad_norm": 0.984081038402461, + "learning_rate": 4.91672802648703e-06, + "loss": 0.4903, + "step": 8209 + }, + { + "epoch": 0.4999543281673416, + "grad_norm": 1.0409040967347063, + "learning_rate": 4.916707601878784e-06, + "loss": 0.4805, + "step": 8210 + }, + { + "epoch": 0.5000152239442195, + "grad_norm": 1.0480250458757248, + "learning_rate": 4.9166871748084465e-06, + "loss": 0.4206, + "step": 8211 + }, + { + "epoch": 0.5000761197210973, + "grad_norm": 1.0145994317566482, + "learning_rate": 4.9166667452760386e-06, + "loss": 0.4063, + "step": 8212 + }, + { + "epoch": 0.5001370154979752, + "grad_norm": 0.9394175919420614, + "learning_rate": 4.916646313281581e-06, + "loss": 0.474, + "step": 8213 + }, + { + "epoch": 0.5001979112748531, + "grad_norm": 0.9274721105867872, + "learning_rate": 4.9166258788250945e-06, + "loss": 0.4957, + "step": 8214 + }, + { + "epoch": 0.500258807051731, + "grad_norm": 1.0036903485637496, + "learning_rate": 4.916605441906599e-06, + "loss": 0.469, + "step": 8215 + }, + { + "epoch": 0.5003197028286088, + "grad_norm": 0.9999253042644379, + "learning_rate": 4.916585002526116e-06, + "loss": 0.4386, + "step": 8216 + }, + { + "epoch": 0.5003805986054867, + "grad_norm": 0.9868294279755585, + "learning_rate": 4.916564560683667e-06, + "loss": 0.4961, + "step": 8217 + }, + { + "epoch": 0.5004414943823646, + "grad_norm": 1.0022694750048855, + "learning_rate": 4.916544116379272e-06, + "loss": 0.4428, + "step": 8218 + }, + { + "epoch": 0.5005023901592425, + "grad_norm": 0.9998850947992919, + "learning_rate": 4.9165236696129525e-06, + "loss": 0.4757, + "step": 8219 + }, + { + "epoch": 0.5005632859361203, + "grad_norm": 1.0351620842097746, + "learning_rate": 4.916503220384729e-06, + "loss": 0.4338, + "step": 8220 + }, + { + "epoch": 0.5006241817129982, + "grad_norm": 0.9965977713668277, + "learning_rate": 4.916482768694622e-06, + "loss": 0.4024, + "step": 8221 + }, + { + "epoch": 0.5006850774898761, + "grad_norm": 1.0679580412654521, + "learning_rate": 4.916462314542652e-06, + "loss": 0.4832, + "step": 8222 + }, + { + "epoch": 0.500745973266754, + "grad_norm": 1.0147994144366952, + "learning_rate": 4.916441857928841e-06, + "loss": 0.4707, + "step": 8223 + }, + { + "epoch": 0.5008068690436318, + "grad_norm": 1.0384837162024914, + "learning_rate": 4.9164213988532095e-06, + "loss": 0.45, + "step": 8224 + }, + { + "epoch": 0.5008677648205097, + "grad_norm": 0.9233647113003438, + "learning_rate": 4.916400937315778e-06, + "loss": 0.4857, + "step": 8225 + }, + { + "epoch": 0.5009286605973876, + "grad_norm": 1.0481836201791592, + "learning_rate": 4.916380473316567e-06, + "loss": 0.4157, + "step": 8226 + }, + { + "epoch": 0.5009895563742655, + "grad_norm": 1.0579267945372604, + "learning_rate": 4.916360006855598e-06, + "loss": 0.4348, + "step": 8227 + }, + { + "epoch": 0.5010504521511433, + "grad_norm": 1.0276274343940126, + "learning_rate": 4.916339537932892e-06, + "loss": 0.4083, + "step": 8228 + }, + { + "epoch": 0.5011113479280211, + "grad_norm": 0.976714312949374, + "learning_rate": 4.916319066548469e-06, + "loss": 0.4709, + "step": 8229 + }, + { + "epoch": 0.5011722437048991, + "grad_norm": 0.9893852784867486, + "learning_rate": 4.916298592702351e-06, + "loss": 0.4093, + "step": 8230 + }, + { + "epoch": 0.501233139481777, + "grad_norm": 0.9823610273146445, + "learning_rate": 4.916278116394559e-06, + "loss": 0.4406, + "step": 8231 + }, + { + "epoch": 0.5012940352586548, + "grad_norm": 0.9764931398563241, + "learning_rate": 4.9162576376251105e-06, + "loss": 0.488, + "step": 8232 + }, + { + "epoch": 0.5013549310355326, + "grad_norm": 1.0032445283560945, + "learning_rate": 4.9162371563940306e-06, + "loss": 0.4642, + "step": 8233 + }, + { + "epoch": 0.5014158268124106, + "grad_norm": 1.0735056912489804, + "learning_rate": 4.916216672701338e-06, + "loss": 0.5199, + "step": 8234 + }, + { + "epoch": 0.5014767225892884, + "grad_norm": 0.9775604638835911, + "learning_rate": 4.916196186547055e-06, + "loss": 0.5136, + "step": 8235 + }, + { + "epoch": 0.5015376183661663, + "grad_norm": 1.1143816282517427, + "learning_rate": 4.916175697931202e-06, + "loss": 0.4157, + "step": 8236 + }, + { + "epoch": 0.5015985141430441, + "grad_norm": 0.9992515736209108, + "learning_rate": 4.916155206853797e-06, + "loss": 0.4728, + "step": 8237 + }, + { + "epoch": 0.5016594099199221, + "grad_norm": 1.0393543395726315, + "learning_rate": 4.916134713314865e-06, + "loss": 0.5082, + "step": 8238 + }, + { + "epoch": 0.5017203056967999, + "grad_norm": 1.0358413755037308, + "learning_rate": 4.916114217314425e-06, + "loss": 0.4426, + "step": 8239 + }, + { + "epoch": 0.5017812014736778, + "grad_norm": 1.0484381639354292, + "learning_rate": 4.9160937188524985e-06, + "loss": 0.5234, + "step": 8240 + }, + { + "epoch": 0.5018420972505556, + "grad_norm": 0.9875411352258142, + "learning_rate": 4.916073217929105e-06, + "loss": 0.4476, + "step": 8241 + }, + { + "epoch": 0.5019029930274336, + "grad_norm": 0.9395806925342817, + "learning_rate": 4.916052714544267e-06, + "loss": 0.3989, + "step": 8242 + }, + { + "epoch": 0.5019638888043114, + "grad_norm": 1.0254407524947904, + "learning_rate": 4.916032208698005e-06, + "loss": 0.476, + "step": 8243 + }, + { + "epoch": 0.5020247845811893, + "grad_norm": 1.1529773228645783, + "learning_rate": 4.916011700390339e-06, + "loss": 0.5255, + "step": 8244 + }, + { + "epoch": 0.5020856803580672, + "grad_norm": 1.0536446660788277, + "learning_rate": 4.915991189621291e-06, + "loss": 0.438, + "step": 8245 + }, + { + "epoch": 0.5021465761349451, + "grad_norm": 1.0152265406425665, + "learning_rate": 4.91597067639088e-06, + "loss": 0.4866, + "step": 8246 + }, + { + "epoch": 0.5022074719118229, + "grad_norm": 1.0316557456143374, + "learning_rate": 4.91595016069913e-06, + "loss": 0.3939, + "step": 8247 + }, + { + "epoch": 0.5022683676887008, + "grad_norm": 1.0144372551832406, + "learning_rate": 4.91592964254606e-06, + "loss": 0.4971, + "step": 8248 + }, + { + "epoch": 0.5023292634655787, + "grad_norm": 1.0400045879322612, + "learning_rate": 4.91590912193169e-06, + "loss": 0.4425, + "step": 8249 + }, + { + "epoch": 0.5023901592424566, + "grad_norm": 1.0461969911883602, + "learning_rate": 4.915888598856043e-06, + "loss": 0.3637, + "step": 8250 + }, + { + "epoch": 0.5024510550193344, + "grad_norm": 1.0019643185015186, + "learning_rate": 4.915868073319139e-06, + "loss": 0.4897, + "step": 8251 + }, + { + "epoch": 0.5025119507962122, + "grad_norm": 1.0717368881923035, + "learning_rate": 4.915847545320998e-06, + "loss": 0.4, + "step": 8252 + }, + { + "epoch": 0.5025728465730902, + "grad_norm": 0.9394574510048871, + "learning_rate": 4.915827014861642e-06, + "loss": 0.4667, + "step": 8253 + }, + { + "epoch": 0.502633742349968, + "grad_norm": 1.0615033974577013, + "learning_rate": 4.915806481941092e-06, + "loss": 0.4117, + "step": 8254 + }, + { + "epoch": 0.5026946381268459, + "grad_norm": 0.9915078331065026, + "learning_rate": 4.915785946559368e-06, + "loss": 0.4582, + "step": 8255 + }, + { + "epoch": 0.5027555339037237, + "grad_norm": 1.0126725443286524, + "learning_rate": 4.915765408716493e-06, + "loss": 0.3915, + "step": 8256 + }, + { + "epoch": 0.5028164296806017, + "grad_norm": 0.9862259775354372, + "learning_rate": 4.915744868412485e-06, + "loss": 0.4092, + "step": 8257 + }, + { + "epoch": 0.5028773254574795, + "grad_norm": 0.9171979317808733, + "learning_rate": 4.915724325647366e-06, + "loss": 0.4162, + "step": 8258 + }, + { + "epoch": 0.5029382212343574, + "grad_norm": 0.9507235284111752, + "learning_rate": 4.915703780421158e-06, + "loss": 0.5114, + "step": 8259 + }, + { + "epoch": 0.5029991170112352, + "grad_norm": 1.0803325461191755, + "learning_rate": 4.9156832327338824e-06, + "loss": 0.4487, + "step": 8260 + }, + { + "epoch": 0.5030600127881132, + "grad_norm": 1.0627254688448204, + "learning_rate": 4.915662682585557e-06, + "loss": 0.4602, + "step": 8261 + }, + { + "epoch": 0.503120908564991, + "grad_norm": 1.0029364352551888, + "learning_rate": 4.915642129976206e-06, + "loss": 0.4287, + "step": 8262 + }, + { + "epoch": 0.5031818043418689, + "grad_norm": 1.0888711217012454, + "learning_rate": 4.915621574905848e-06, + "loss": 0.3784, + "step": 8263 + }, + { + "epoch": 0.5032427001187467, + "grad_norm": 1.107091429760823, + "learning_rate": 4.915601017374505e-06, + "loss": 0.4159, + "step": 8264 + }, + { + "epoch": 0.5033035958956247, + "grad_norm": 1.0304781192306083, + "learning_rate": 4.915580457382199e-06, + "loss": 0.3973, + "step": 8265 + }, + { + "epoch": 0.5033644916725025, + "grad_norm": 0.9796838278486356, + "learning_rate": 4.915559894928949e-06, + "loss": 0.4607, + "step": 8266 + }, + { + "epoch": 0.5034253874493804, + "grad_norm": 1.0754241880028292, + "learning_rate": 4.915539330014777e-06, + "loss": 0.3989, + "step": 8267 + }, + { + "epoch": 0.5034862832262582, + "grad_norm": 1.0463408956625047, + "learning_rate": 4.915518762639704e-06, + "loss": 0.3612, + "step": 8268 + }, + { + "epoch": 0.5035471790031362, + "grad_norm": 1.0381687053039401, + "learning_rate": 4.91549819280375e-06, + "loss": 0.4584, + "step": 8269 + }, + { + "epoch": 0.503608074780014, + "grad_norm": 1.0366912361771785, + "learning_rate": 4.915477620506938e-06, + "loss": 0.3887, + "step": 8270 + }, + { + "epoch": 0.5036689705568919, + "grad_norm": 1.0807263963187934, + "learning_rate": 4.915457045749286e-06, + "loss": 0.3983, + "step": 8271 + }, + { + "epoch": 0.5037298663337697, + "grad_norm": 1.0357646327027121, + "learning_rate": 4.915436468530818e-06, + "loss": 0.4657, + "step": 8272 + }, + { + "epoch": 0.5037907621106477, + "grad_norm": 1.1299981560299117, + "learning_rate": 4.915415888851552e-06, + "loss": 0.406, + "step": 8273 + }, + { + "epoch": 0.5038516578875255, + "grad_norm": 1.07981096573318, + "learning_rate": 4.915395306711512e-06, + "loss": 0.4186, + "step": 8274 + }, + { + "epoch": 0.5039125536644034, + "grad_norm": 1.0338958248376084, + "learning_rate": 4.9153747221107165e-06, + "loss": 0.4468, + "step": 8275 + }, + { + "epoch": 0.5039734494412812, + "grad_norm": 1.1255851702874975, + "learning_rate": 4.915354135049188e-06, + "loss": 0.3992, + "step": 8276 + }, + { + "epoch": 0.5040343452181592, + "grad_norm": 1.0460064955506139, + "learning_rate": 4.915333545526947e-06, + "loss": 0.3929, + "step": 8277 + }, + { + "epoch": 0.504095240995037, + "grad_norm": 0.9324358832034654, + "learning_rate": 4.915312953544014e-06, + "loss": 0.4956, + "step": 8278 + }, + { + "epoch": 0.5041561367719148, + "grad_norm": 1.040378827469332, + "learning_rate": 4.91529235910041e-06, + "loss": 0.444, + "step": 8279 + }, + { + "epoch": 0.5042170325487927, + "grad_norm": 0.9537687603045987, + "learning_rate": 4.915271762196157e-06, + "loss": 0.4431, + "step": 8280 + }, + { + "epoch": 0.5042779283256706, + "grad_norm": 0.9896045444587342, + "learning_rate": 4.915251162831275e-06, + "loss": 0.4772, + "step": 8281 + }, + { + "epoch": 0.5043388241025485, + "grad_norm": 1.006162136359954, + "learning_rate": 4.915230561005786e-06, + "loss": 0.4556, + "step": 8282 + }, + { + "epoch": 0.5043997198794263, + "grad_norm": 1.0402607912122777, + "learning_rate": 4.91520995671971e-06, + "loss": 0.4673, + "step": 8283 + }, + { + "epoch": 0.5044606156563043, + "grad_norm": 0.9849612284226241, + "learning_rate": 4.915189349973067e-06, + "loss": 0.4218, + "step": 8284 + }, + { + "epoch": 0.5045215114331821, + "grad_norm": 1.026237572462583, + "learning_rate": 4.915168740765881e-06, + "loss": 0.3869, + "step": 8285 + }, + { + "epoch": 0.50458240721006, + "grad_norm": 1.0563493524306387, + "learning_rate": 4.9151481290981704e-06, + "loss": 0.4623, + "step": 8286 + }, + { + "epoch": 0.5046433029869378, + "grad_norm": 1.063708049463425, + "learning_rate": 4.915127514969958e-06, + "loss": 0.3717, + "step": 8287 + }, + { + "epoch": 0.5047041987638158, + "grad_norm": 0.9688402312337515, + "learning_rate": 4.915106898381263e-06, + "loss": 0.4437, + "step": 8288 + }, + { + "epoch": 0.5047650945406936, + "grad_norm": 1.0898900656281534, + "learning_rate": 4.915086279332108e-06, + "loss": 0.4593, + "step": 8289 + }, + { + "epoch": 0.5048259903175715, + "grad_norm": 1.0204725616634112, + "learning_rate": 4.915065657822513e-06, + "loss": 0.4422, + "step": 8290 + }, + { + "epoch": 0.5048868860944493, + "grad_norm": 1.060951258884856, + "learning_rate": 4.9150450338524994e-06, + "loss": 0.4453, + "step": 8291 + }, + { + "epoch": 0.5049477818713273, + "grad_norm": 1.0488727367199226, + "learning_rate": 4.915024407422088e-06, + "loss": 0.4869, + "step": 8292 + }, + { + "epoch": 0.5050086776482051, + "grad_norm": 0.9973367304505518, + "learning_rate": 4.9150037785313e-06, + "loss": 0.4751, + "step": 8293 + }, + { + "epoch": 0.505069573425083, + "grad_norm": 1.0041781806411556, + "learning_rate": 4.914983147180157e-06, + "loss": 0.4784, + "step": 8294 + }, + { + "epoch": 0.5051304692019608, + "grad_norm": 1.0053949992239375, + "learning_rate": 4.914962513368678e-06, + "loss": 0.4112, + "step": 8295 + }, + { + "epoch": 0.5051913649788388, + "grad_norm": 1.0102482771945485, + "learning_rate": 4.914941877096886e-06, + "loss": 0.4358, + "step": 8296 + }, + { + "epoch": 0.5052522607557166, + "grad_norm": 0.9400136516164629, + "learning_rate": 4.914921238364801e-06, + "loss": 0.4362, + "step": 8297 + }, + { + "epoch": 0.5053131565325945, + "grad_norm": 1.0658411930712786, + "learning_rate": 4.914900597172445e-06, + "loss": 0.4299, + "step": 8298 + }, + { + "epoch": 0.5053740523094723, + "grad_norm": 1.0542708307860371, + "learning_rate": 4.914879953519839e-06, + "loss": 0.3676, + "step": 8299 + }, + { + "epoch": 0.5054349480863503, + "grad_norm": 1.0326239939818618, + "learning_rate": 4.914859307407003e-06, + "loss": 0.4243, + "step": 8300 + }, + { + "epoch": 0.5054958438632281, + "grad_norm": 1.0241476505786227, + "learning_rate": 4.914838658833958e-06, + "loss": 0.4155, + "step": 8301 + }, + { + "epoch": 0.505556739640106, + "grad_norm": 1.1091070889105576, + "learning_rate": 4.914818007800727e-06, + "loss": 0.4402, + "step": 8302 + }, + { + "epoch": 0.5056176354169838, + "grad_norm": 0.9666538695842777, + "learning_rate": 4.9147973543073276e-06, + "loss": 0.5333, + "step": 8303 + }, + { + "epoch": 0.5056785311938617, + "grad_norm": 1.0150865762257362, + "learning_rate": 4.914776698353784e-06, + "loss": 0.4591, + "step": 8304 + }, + { + "epoch": 0.5057394269707396, + "grad_norm": 1.0326748755812742, + "learning_rate": 4.914756039940117e-06, + "loss": 0.5059, + "step": 8305 + }, + { + "epoch": 0.5058003227476174, + "grad_norm": 1.0457924315364449, + "learning_rate": 4.914735379066346e-06, + "loss": 0.4461, + "step": 8306 + }, + { + "epoch": 0.5058612185244953, + "grad_norm": 1.0216206494729632, + "learning_rate": 4.914714715732492e-06, + "loss": 0.4418, + "step": 8307 + }, + { + "epoch": 0.5059221143013732, + "grad_norm": 0.9871352476041194, + "learning_rate": 4.914694049938577e-06, + "loss": 0.4254, + "step": 8308 + }, + { + "epoch": 0.5059830100782511, + "grad_norm": 1.0214446154939365, + "learning_rate": 4.9146733816846225e-06, + "loss": 0.3926, + "step": 8309 + }, + { + "epoch": 0.5060439058551289, + "grad_norm": 1.0616279918197338, + "learning_rate": 4.914652710970649e-06, + "loss": 0.477, + "step": 8310 + }, + { + "epoch": 0.5061048016320068, + "grad_norm": 1.0134004684090823, + "learning_rate": 4.914632037796678e-06, + "loss": 0.4148, + "step": 8311 + }, + { + "epoch": 0.5061656974088847, + "grad_norm": 1.1925115885691253, + "learning_rate": 4.914611362162729e-06, + "loss": 0.3875, + "step": 8312 + }, + { + "epoch": 0.5062265931857626, + "grad_norm": 1.0099236502268316, + "learning_rate": 4.914590684068825e-06, + "loss": 0.488, + "step": 8313 + }, + { + "epoch": 0.5062874889626404, + "grad_norm": 1.0039073484711418, + "learning_rate": 4.914570003514986e-06, + "loss": 0.3933, + "step": 8314 + }, + { + "epoch": 0.5063483847395183, + "grad_norm": 0.9437550476752262, + "learning_rate": 4.914549320501233e-06, + "loss": 0.4859, + "step": 8315 + }, + { + "epoch": 0.5064092805163962, + "grad_norm": 1.0111999075686624, + "learning_rate": 4.914528635027587e-06, + "loss": 0.4504, + "step": 8316 + }, + { + "epoch": 0.5064701762932741, + "grad_norm": 0.9470213066969483, + "learning_rate": 4.91450794709407e-06, + "loss": 0.4816, + "step": 8317 + }, + { + "epoch": 0.5065310720701519, + "grad_norm": 1.1097473516461551, + "learning_rate": 4.914487256700702e-06, + "loss": 0.4353, + "step": 8318 + }, + { + "epoch": 0.5065919678470298, + "grad_norm": 0.9954929331598709, + "learning_rate": 4.914466563847506e-06, + "loss": 0.4617, + "step": 8319 + }, + { + "epoch": 0.5066528636239077, + "grad_norm": 1.069707185137774, + "learning_rate": 4.9144458685345e-06, + "loss": 0.4106, + "step": 8320 + }, + { + "epoch": 0.5067137594007856, + "grad_norm": 1.0464476337862136, + "learning_rate": 4.9144251707617075e-06, + "loss": 0.4339, + "step": 8321 + }, + { + "epoch": 0.5067746551776634, + "grad_norm": 0.9919252943358514, + "learning_rate": 4.9144044705291485e-06, + "loss": 0.4404, + "step": 8322 + }, + { + "epoch": 0.5068355509545412, + "grad_norm": 0.9466043425594095, + "learning_rate": 4.914383767836845e-06, + "loss": 0.458, + "step": 8323 + }, + { + "epoch": 0.5068964467314192, + "grad_norm": 1.1011213939156848, + "learning_rate": 4.914363062684817e-06, + "loss": 0.3261, + "step": 8324 + }, + { + "epoch": 0.506957342508297, + "grad_norm": 0.9979217699075146, + "learning_rate": 4.9143423550730855e-06, + "loss": 0.4541, + "step": 8325 + }, + { + "epoch": 0.5070182382851749, + "grad_norm": 0.9834147994434927, + "learning_rate": 4.914321645001673e-06, + "loss": 0.4963, + "step": 8326 + }, + { + "epoch": 0.5070791340620528, + "grad_norm": 1.0492195484212699, + "learning_rate": 4.9143009324706e-06, + "loss": 0.4847, + "step": 8327 + }, + { + "epoch": 0.5071400298389307, + "grad_norm": 1.016417550733733, + "learning_rate": 4.914280217479887e-06, + "loss": 0.473, + "step": 8328 + }, + { + "epoch": 0.5072009256158085, + "grad_norm": 1.0182377299280172, + "learning_rate": 4.914259500029555e-06, + "loss": 0.4736, + "step": 8329 + }, + { + "epoch": 0.5072618213926864, + "grad_norm": 1.0037140057036749, + "learning_rate": 4.914238780119626e-06, + "loss": 0.4364, + "step": 8330 + }, + { + "epoch": 0.5073227171695643, + "grad_norm": 0.9721494990536464, + "learning_rate": 4.914218057750122e-06, + "loss": 0.4239, + "step": 8331 + }, + { + "epoch": 0.5073836129464422, + "grad_norm": 1.0656576420109438, + "learning_rate": 4.9141973329210615e-06, + "loss": 0.3891, + "step": 8332 + }, + { + "epoch": 0.50744450872332, + "grad_norm": 1.0628729584771017, + "learning_rate": 4.914176605632468e-06, + "loss": 0.44, + "step": 8333 + }, + { + "epoch": 0.5075054045001979, + "grad_norm": 0.9471711293977645, + "learning_rate": 4.9141558758843604e-06, + "loss": 0.4685, + "step": 8334 + }, + { + "epoch": 0.5075663002770758, + "grad_norm": 1.0310575844058707, + "learning_rate": 4.914135143676762e-06, + "loss": 0.3987, + "step": 8335 + }, + { + "epoch": 0.5076271960539537, + "grad_norm": 0.990817508974282, + "learning_rate": 4.914114409009692e-06, + "loss": 0.4617, + "step": 8336 + }, + { + "epoch": 0.5076880918308315, + "grad_norm": 1.0363793901871479, + "learning_rate": 4.914093671883172e-06, + "loss": 0.4644, + "step": 8337 + }, + { + "epoch": 0.5077489876077094, + "grad_norm": 0.9707434914999458, + "learning_rate": 4.9140729322972244e-06, + "loss": 0.4968, + "step": 8338 + }, + { + "epoch": 0.5078098833845873, + "grad_norm": 1.076350549996223, + "learning_rate": 4.91405219025187e-06, + "loss": 0.4074, + "step": 8339 + }, + { + "epoch": 0.5078707791614652, + "grad_norm": 0.9911276080171267, + "learning_rate": 4.914031445747128e-06, + "loss": 0.446, + "step": 8340 + }, + { + "epoch": 0.507931674938343, + "grad_norm": 1.0058385411552266, + "learning_rate": 4.914010698783023e-06, + "loss": 0.4371, + "step": 8341 + }, + { + "epoch": 0.5079925707152209, + "grad_norm": 0.9361126697013923, + "learning_rate": 4.913989949359572e-06, + "loss": 0.4533, + "step": 8342 + }, + { + "epoch": 0.5080534664920988, + "grad_norm": 1.0708746418660227, + "learning_rate": 4.9139691974768e-06, + "loss": 0.3593, + "step": 8343 + }, + { + "epoch": 0.5081143622689767, + "grad_norm": 1.0377831110645175, + "learning_rate": 4.913948443134725e-06, + "loss": 0.3768, + "step": 8344 + }, + { + "epoch": 0.5081752580458545, + "grad_norm": 1.0426603165772104, + "learning_rate": 4.91392768633337e-06, + "loss": 0.4409, + "step": 8345 + }, + { + "epoch": 0.5082361538227324, + "grad_norm": 0.8941212896477583, + "learning_rate": 4.913906927072756e-06, + "loss": 0.44, + "step": 8346 + }, + { + "epoch": 0.5082970495996103, + "grad_norm": 1.054908517111049, + "learning_rate": 4.913886165352903e-06, + "loss": 0.4071, + "step": 8347 + }, + { + "epoch": 0.5083579453764882, + "grad_norm": 1.0530048247330266, + "learning_rate": 4.913865401173833e-06, + "loss": 0.4707, + "step": 8348 + }, + { + "epoch": 0.508418841153366, + "grad_norm": 1.031584491721372, + "learning_rate": 4.913844634535568e-06, + "loss": 0.4645, + "step": 8349 + }, + { + "epoch": 0.5084797369302438, + "grad_norm": 0.9768995285006518, + "learning_rate": 4.913823865438128e-06, + "loss": 0.4481, + "step": 8350 + }, + { + "epoch": 0.5085406327071218, + "grad_norm": 1.1039179135621746, + "learning_rate": 4.913803093881534e-06, + "loss": 0.4048, + "step": 8351 + }, + { + "epoch": 0.5086015284839996, + "grad_norm": 1.0602222486081818, + "learning_rate": 4.913782319865808e-06, + "loss": 0.5185, + "step": 8352 + }, + { + "epoch": 0.5086624242608775, + "grad_norm": 0.9885854949091166, + "learning_rate": 4.91376154339097e-06, + "loss": 0.4504, + "step": 8353 + }, + { + "epoch": 0.5087233200377553, + "grad_norm": 0.9835070458261894, + "learning_rate": 4.913740764457043e-06, + "loss": 0.4768, + "step": 8354 + }, + { + "epoch": 0.5087842158146333, + "grad_norm": 1.0742952891830189, + "learning_rate": 4.913719983064046e-06, + "loss": 0.4074, + "step": 8355 + }, + { + "epoch": 0.5088451115915111, + "grad_norm": 0.9931307436862242, + "learning_rate": 4.913699199212002e-06, + "loss": 0.5284, + "step": 8356 + }, + { + "epoch": 0.508906007368389, + "grad_norm": 1.023639466741578, + "learning_rate": 4.9136784129009315e-06, + "loss": 0.4539, + "step": 8357 + }, + { + "epoch": 0.5089669031452668, + "grad_norm": 0.9259048715667947, + "learning_rate": 4.913657624130855e-06, + "loss": 0.5447, + "step": 8358 + }, + { + "epoch": 0.5090277989221448, + "grad_norm": 1.0914558874153848, + "learning_rate": 4.913636832901795e-06, + "loss": 0.3547, + "step": 8359 + }, + { + "epoch": 0.5090886946990226, + "grad_norm": 1.0411354883064328, + "learning_rate": 4.913616039213772e-06, + "loss": 0.4501, + "step": 8360 + }, + { + "epoch": 0.5091495904759005, + "grad_norm": 0.9553274346369984, + "learning_rate": 4.913595243066807e-06, + "loss": 0.453, + "step": 8361 + }, + { + "epoch": 0.5092104862527783, + "grad_norm": 0.9990871746243446, + "learning_rate": 4.9135744444609205e-06, + "loss": 0.403, + "step": 8362 + }, + { + "epoch": 0.5092713820296563, + "grad_norm": 0.9348599314767481, + "learning_rate": 4.913553643396135e-06, + "loss": 0.4265, + "step": 8363 + }, + { + "epoch": 0.5093322778065341, + "grad_norm": 1.118863835059077, + "learning_rate": 4.913532839872472e-06, + "loss": 0.5769, + "step": 8364 + }, + { + "epoch": 0.509393173583412, + "grad_norm": 0.9113127215893202, + "learning_rate": 4.913512033889951e-06, + "loss": 0.442, + "step": 8365 + }, + { + "epoch": 0.5094540693602899, + "grad_norm": 1.0411392670370716, + "learning_rate": 4.913491225448595e-06, + "loss": 0.4473, + "step": 8366 + }, + { + "epoch": 0.5095149651371678, + "grad_norm": 1.003543302476234, + "learning_rate": 4.9134704145484234e-06, + "loss": 0.5599, + "step": 8367 + }, + { + "epoch": 0.5095758609140456, + "grad_norm": 0.9626467661165781, + "learning_rate": 4.9134496011894585e-06, + "loss": 0.4497, + "step": 8368 + }, + { + "epoch": 0.5096367566909235, + "grad_norm": 1.1384088697145076, + "learning_rate": 4.913428785371722e-06, + "loss": 0.3724, + "step": 8369 + }, + { + "epoch": 0.5096976524678014, + "grad_norm": 1.1154512122480995, + "learning_rate": 4.913407967095234e-06, + "loss": 0.3949, + "step": 8370 + }, + { + "epoch": 0.5097585482446793, + "grad_norm": 0.9459712748263016, + "learning_rate": 4.913387146360016e-06, + "loss": 0.4895, + "step": 8371 + }, + { + "epoch": 0.5098194440215571, + "grad_norm": 0.9659752285093669, + "learning_rate": 4.91336632316609e-06, + "loss": 0.4805, + "step": 8372 + }, + { + "epoch": 0.509880339798435, + "grad_norm": 1.0042262700015703, + "learning_rate": 4.913345497513475e-06, + "loss": 0.4386, + "step": 8373 + }, + { + "epoch": 0.5099412355753129, + "grad_norm": 1.1119054899249983, + "learning_rate": 4.9133246694021954e-06, + "loss": 0.3548, + "step": 8374 + }, + { + "epoch": 0.5100021313521907, + "grad_norm": 0.9397885622201619, + "learning_rate": 4.91330383883227e-06, + "loss": 0.4651, + "step": 8375 + }, + { + "epoch": 0.5100630271290686, + "grad_norm": 1.1339627803565115, + "learning_rate": 4.913283005803721e-06, + "loss": 0.4226, + "step": 8376 + }, + { + "epoch": 0.5101239229059464, + "grad_norm": 1.0077708356833879, + "learning_rate": 4.91326217031657e-06, + "loss": 0.481, + "step": 8377 + }, + { + "epoch": 0.5101848186828244, + "grad_norm": 1.0492771484330186, + "learning_rate": 4.913241332370836e-06, + "loss": 0.4331, + "step": 8378 + }, + { + "epoch": 0.5102457144597022, + "grad_norm": 1.0231970200712788, + "learning_rate": 4.913220491966544e-06, + "loss": 0.4382, + "step": 8379 + }, + { + "epoch": 0.5103066102365801, + "grad_norm": 0.9619565495699908, + "learning_rate": 4.913199649103712e-06, + "loss": 0.5045, + "step": 8380 + }, + { + "epoch": 0.5103675060134579, + "grad_norm": 0.9548818090952357, + "learning_rate": 4.913178803782362e-06, + "loss": 0.3526, + "step": 8381 + }, + { + "epoch": 0.5104284017903359, + "grad_norm": 0.9533941085553209, + "learning_rate": 4.913157956002517e-06, + "loss": 0.4558, + "step": 8382 + }, + { + "epoch": 0.5104892975672137, + "grad_norm": 1.0517089460743336, + "learning_rate": 4.9131371057641955e-06, + "loss": 0.3951, + "step": 8383 + }, + { + "epoch": 0.5105501933440916, + "grad_norm": 0.993623613425874, + "learning_rate": 4.913116253067421e-06, + "loss": 0.4002, + "step": 8384 + }, + { + "epoch": 0.5106110891209694, + "grad_norm": 1.0307291291300542, + "learning_rate": 4.913095397912213e-06, + "loss": 0.4368, + "step": 8385 + }, + { + "epoch": 0.5106719848978474, + "grad_norm": 1.0230254227818123, + "learning_rate": 4.913074540298594e-06, + "loss": 0.4332, + "step": 8386 + }, + { + "epoch": 0.5107328806747252, + "grad_norm": 1.1603527616479106, + "learning_rate": 4.913053680226585e-06, + "loss": 0.4682, + "step": 8387 + }, + { + "epoch": 0.5107937764516031, + "grad_norm": 0.9548680056877453, + "learning_rate": 4.913032817696207e-06, + "loss": 0.4846, + "step": 8388 + }, + { + "epoch": 0.5108546722284809, + "grad_norm": 0.9806049848799697, + "learning_rate": 4.913011952707481e-06, + "loss": 0.4819, + "step": 8389 + }, + { + "epoch": 0.5109155680053589, + "grad_norm": 0.9304744287937748, + "learning_rate": 4.912991085260429e-06, + "loss": 0.5185, + "step": 8390 + }, + { + "epoch": 0.5109764637822367, + "grad_norm": 0.9836515582706606, + "learning_rate": 4.912970215355071e-06, + "loss": 0.4716, + "step": 8391 + }, + { + "epoch": 0.5110373595591146, + "grad_norm": 0.9730393771590212, + "learning_rate": 4.912949342991431e-06, + "loss": 0.4543, + "step": 8392 + }, + { + "epoch": 0.5110982553359924, + "grad_norm": 1.0485816515233146, + "learning_rate": 4.912928468169526e-06, + "loss": 0.3801, + "step": 8393 + }, + { + "epoch": 0.5111591511128704, + "grad_norm": 0.9932666431982463, + "learning_rate": 4.912907590889382e-06, + "loss": 0.4222, + "step": 8394 + }, + { + "epoch": 0.5112200468897482, + "grad_norm": 1.0881757474487819, + "learning_rate": 4.912886711151016e-06, + "loss": 0.4045, + "step": 8395 + }, + { + "epoch": 0.511280942666626, + "grad_norm": 1.012564998766804, + "learning_rate": 4.912865828954452e-06, + "loss": 0.4815, + "step": 8396 + }, + { + "epoch": 0.5113418384435039, + "grad_norm": 1.0463083401154663, + "learning_rate": 4.912844944299711e-06, + "loss": 0.4141, + "step": 8397 + }, + { + "epoch": 0.5114027342203818, + "grad_norm": 0.9974795804351037, + "learning_rate": 4.912824057186812e-06, + "loss": 0.4308, + "step": 8398 + }, + { + "epoch": 0.5114636299972597, + "grad_norm": 1.0366122887017641, + "learning_rate": 4.912803167615779e-06, + "loss": 0.4145, + "step": 8399 + }, + { + "epoch": 0.5115245257741375, + "grad_norm": 1.0067252352360911, + "learning_rate": 4.912782275586633e-06, + "loss": 0.5697, + "step": 8400 + }, + { + "epoch": 0.5115854215510154, + "grad_norm": 1.001718032990911, + "learning_rate": 4.9127613810993944e-06, + "loss": 0.4922, + "step": 8401 + }, + { + "epoch": 0.5116463173278933, + "grad_norm": 0.9918498049919918, + "learning_rate": 4.912740484154084e-06, + "loss": 0.4553, + "step": 8402 + }, + { + "epoch": 0.5117072131047712, + "grad_norm": 0.9721091927316241, + "learning_rate": 4.912719584750724e-06, + "loss": 0.443, + "step": 8403 + }, + { + "epoch": 0.511768108881649, + "grad_norm": 1.0150561008980563, + "learning_rate": 4.912698682889335e-06, + "loss": 0.4335, + "step": 8404 + }, + { + "epoch": 0.5118290046585269, + "grad_norm": 0.9946162151035344, + "learning_rate": 4.91267777856994e-06, + "loss": 0.4887, + "step": 8405 + }, + { + "epoch": 0.5118899004354048, + "grad_norm": 1.0057445635716233, + "learning_rate": 4.912656871792558e-06, + "loss": 0.4076, + "step": 8406 + }, + { + "epoch": 0.5119507962122827, + "grad_norm": 0.9649266008262128, + "learning_rate": 4.912635962557212e-06, + "loss": 0.5564, + "step": 8407 + }, + { + "epoch": 0.5120116919891605, + "grad_norm": 0.9770937225148982, + "learning_rate": 4.912615050863922e-06, + "loss": 0.4535, + "step": 8408 + }, + { + "epoch": 0.5120725877660385, + "grad_norm": 0.922849345646547, + "learning_rate": 4.9125941367127106e-06, + "loss": 0.4535, + "step": 8409 + }, + { + "epoch": 0.5121334835429163, + "grad_norm": 1.1852770004521096, + "learning_rate": 4.912573220103597e-06, + "loss": 0.5331, + "step": 8410 + }, + { + "epoch": 0.5121943793197942, + "grad_norm": 0.965356296269453, + "learning_rate": 4.912552301036605e-06, + "loss": 0.4621, + "step": 8411 + }, + { + "epoch": 0.512255275096672, + "grad_norm": 1.1056658776634585, + "learning_rate": 4.912531379511756e-06, + "loss": 0.4024, + "step": 8412 + }, + { + "epoch": 0.51231617087355, + "grad_norm": 1.0273795444948508, + "learning_rate": 4.912510455529068e-06, + "loss": 0.4026, + "step": 8413 + }, + { + "epoch": 0.5123770666504278, + "grad_norm": 1.0438057926091828, + "learning_rate": 4.912489529088566e-06, + "loss": 0.4359, + "step": 8414 + }, + { + "epoch": 0.5124379624273057, + "grad_norm": 0.965303225296784, + "learning_rate": 4.9124686001902694e-06, + "loss": 0.4808, + "step": 8415 + }, + { + "epoch": 0.5124988582041835, + "grad_norm": 1.0430818622105011, + "learning_rate": 4.912447668834201e-06, + "loss": 0.4414, + "step": 8416 + }, + { + "epoch": 0.5125597539810615, + "grad_norm": 1.0526091323931046, + "learning_rate": 4.9124267350203795e-06, + "loss": 0.4271, + "step": 8417 + }, + { + "epoch": 0.5126206497579393, + "grad_norm": 1.0582039957313756, + "learning_rate": 4.912405798748828e-06, + "loss": 0.418, + "step": 8418 + }, + { + "epoch": 0.5126815455348172, + "grad_norm": 0.9850947531620889, + "learning_rate": 4.912384860019568e-06, + "loss": 0.4668, + "step": 8419 + }, + { + "epoch": 0.512742441311695, + "grad_norm": 0.8712377516091484, + "learning_rate": 4.91236391883262e-06, + "loss": 0.4664, + "step": 8420 + }, + { + "epoch": 0.512803337088573, + "grad_norm": 0.9218514852230555, + "learning_rate": 4.912342975188007e-06, + "loss": 0.5046, + "step": 8421 + }, + { + "epoch": 0.5128642328654508, + "grad_norm": 1.0380545241707875, + "learning_rate": 4.912322029085747e-06, + "loss": 0.4404, + "step": 8422 + }, + { + "epoch": 0.5129251286423286, + "grad_norm": 1.1155271477503226, + "learning_rate": 4.912301080525865e-06, + "loss": 0.4352, + "step": 8423 + }, + { + "epoch": 0.5129860244192065, + "grad_norm": 1.019989869185658, + "learning_rate": 4.912280129508381e-06, + "loss": 0.3883, + "step": 8424 + }, + { + "epoch": 0.5130469201960844, + "grad_norm": 1.0251040700238876, + "learning_rate": 4.912259176033316e-06, + "loss": 0.5462, + "step": 8425 + }, + { + "epoch": 0.5131078159729623, + "grad_norm": 1.1090634572957057, + "learning_rate": 4.912238220100691e-06, + "loss": 0.4301, + "step": 8426 + }, + { + "epoch": 0.5131687117498401, + "grad_norm": 1.013746033034442, + "learning_rate": 4.912217261710528e-06, + "loss": 0.406, + "step": 8427 + }, + { + "epoch": 0.513229607526718, + "grad_norm": 1.026696732846761, + "learning_rate": 4.912196300862849e-06, + "loss": 0.4284, + "step": 8428 + }, + { + "epoch": 0.5132905033035959, + "grad_norm": 0.9908049767640289, + "learning_rate": 4.912175337557673e-06, + "loss": 0.4708, + "step": 8429 + }, + { + "epoch": 0.5133513990804738, + "grad_norm": 1.0850701671306282, + "learning_rate": 4.912154371795024e-06, + "loss": 0.3703, + "step": 8430 + }, + { + "epoch": 0.5134122948573516, + "grad_norm": 1.0007829141117706, + "learning_rate": 4.912133403574922e-06, + "loss": 0.3822, + "step": 8431 + }, + { + "epoch": 0.5134731906342295, + "grad_norm": 1.0075273220340242, + "learning_rate": 4.912112432897389e-06, + "loss": 0.457, + "step": 8432 + }, + { + "epoch": 0.5135340864111074, + "grad_norm": 0.9759069119517849, + "learning_rate": 4.912091459762446e-06, + "loss": 0.4186, + "step": 8433 + }, + { + "epoch": 0.5135949821879853, + "grad_norm": 1.0122306169432858, + "learning_rate": 4.912070484170114e-06, + "loss": 0.425, + "step": 8434 + }, + { + "epoch": 0.5136558779648631, + "grad_norm": 0.9344659956812211, + "learning_rate": 4.912049506120415e-06, + "loss": 0.4332, + "step": 8435 + }, + { + "epoch": 0.513716773741741, + "grad_norm": 0.9494548942595549, + "learning_rate": 4.912028525613369e-06, + "loss": 0.4528, + "step": 8436 + }, + { + "epoch": 0.5137776695186189, + "grad_norm": 1.0393703021044038, + "learning_rate": 4.912007542649e-06, + "loss": 0.4567, + "step": 8437 + }, + { + "epoch": 0.5138385652954968, + "grad_norm": 1.0870179957080606, + "learning_rate": 4.9119865572273275e-06, + "loss": 0.4037, + "step": 8438 + }, + { + "epoch": 0.5138994610723746, + "grad_norm": 1.0523643051678813, + "learning_rate": 4.9119655693483725e-06, + "loss": 0.4203, + "step": 8439 + }, + { + "epoch": 0.5139603568492525, + "grad_norm": 1.0402705545857474, + "learning_rate": 4.911944579012158e-06, + "loss": 0.4475, + "step": 8440 + }, + { + "epoch": 0.5140212526261304, + "grad_norm": 1.014740853577183, + "learning_rate": 4.911923586218704e-06, + "loss": 0.4585, + "step": 8441 + }, + { + "epoch": 0.5140821484030083, + "grad_norm": 1.0142274170129753, + "learning_rate": 4.911902590968033e-06, + "loss": 0.4326, + "step": 8442 + }, + { + "epoch": 0.5141430441798861, + "grad_norm": 0.9283971007770191, + "learning_rate": 4.911881593260165e-06, + "loss": 0.494, + "step": 8443 + }, + { + "epoch": 0.514203939956764, + "grad_norm": 0.9580052683633056, + "learning_rate": 4.911860593095123e-06, + "loss": 0.4897, + "step": 8444 + }, + { + "epoch": 0.5142648357336419, + "grad_norm": 1.0041426053917983, + "learning_rate": 4.911839590472926e-06, + "loss": 0.488, + "step": 8445 + }, + { + "epoch": 0.5143257315105197, + "grad_norm": 1.14140945692166, + "learning_rate": 4.9118185853935985e-06, + "loss": 0.4407, + "step": 8446 + }, + { + "epoch": 0.5143866272873976, + "grad_norm": 1.0722712362269513, + "learning_rate": 4.91179757785716e-06, + "loss": 0.4409, + "step": 8447 + }, + { + "epoch": 0.5144475230642755, + "grad_norm": 1.0033388964352685, + "learning_rate": 4.911776567863632e-06, + "loss": 0.4158, + "step": 8448 + }, + { + "epoch": 0.5145084188411534, + "grad_norm": 1.100653924078908, + "learning_rate": 4.911755555413037e-06, + "loss": 0.38, + "step": 8449 + }, + { + "epoch": 0.5145693146180312, + "grad_norm": 1.0434774063998744, + "learning_rate": 4.911734540505394e-06, + "loss": 0.435, + "step": 8450 + }, + { + "epoch": 0.5146302103949091, + "grad_norm": 0.9025153318873368, + "learning_rate": 4.911713523140728e-06, + "loss": 0.4436, + "step": 8451 + }, + { + "epoch": 0.514691106171787, + "grad_norm": 1.0328874321409025, + "learning_rate": 4.9116925033190565e-06, + "loss": 0.4074, + "step": 8452 + }, + { + "epoch": 0.5147520019486649, + "grad_norm": 0.9798001808923298, + "learning_rate": 4.9116714810404044e-06, + "loss": 0.4232, + "step": 8453 + }, + { + "epoch": 0.5148128977255427, + "grad_norm": 1.0295017164748859, + "learning_rate": 4.911650456304791e-06, + "loss": 0.4506, + "step": 8454 + }, + { + "epoch": 0.5148737935024206, + "grad_norm": 1.089833808808818, + "learning_rate": 4.911629429112237e-06, + "loss": 0.5084, + "step": 8455 + }, + { + "epoch": 0.5149346892792985, + "grad_norm": 1.0292077814581837, + "learning_rate": 4.9116083994627665e-06, + "loss": 0.3676, + "step": 8456 + }, + { + "epoch": 0.5149955850561764, + "grad_norm": 1.0883013092546447, + "learning_rate": 4.911587367356399e-06, + "loss": 0.4276, + "step": 8457 + }, + { + "epoch": 0.5150564808330542, + "grad_norm": 0.9608396695936109, + "learning_rate": 4.9115663327931565e-06, + "loss": 0.469, + "step": 8458 + }, + { + "epoch": 0.5151173766099321, + "grad_norm": 0.9862974055385412, + "learning_rate": 4.911545295773061e-06, + "loss": 0.392, + "step": 8459 + }, + { + "epoch": 0.51517827238681, + "grad_norm": 1.003856311726001, + "learning_rate": 4.911524256296132e-06, + "loss": 0.4544, + "step": 8460 + }, + { + "epoch": 0.5152391681636879, + "grad_norm": 0.9887337333733689, + "learning_rate": 4.911503214362393e-06, + "loss": 0.4856, + "step": 8461 + }, + { + "epoch": 0.5153000639405657, + "grad_norm": 0.993151341056759, + "learning_rate": 4.911482169971865e-06, + "loss": 0.433, + "step": 8462 + }, + { + "epoch": 0.5153609597174436, + "grad_norm": 1.0735498477122658, + "learning_rate": 4.911461123124569e-06, + "loss": 0.4482, + "step": 8463 + }, + { + "epoch": 0.5154218554943215, + "grad_norm": 1.0243907074086478, + "learning_rate": 4.911440073820526e-06, + "loss": 0.4487, + "step": 8464 + }, + { + "epoch": 0.5154827512711994, + "grad_norm": 1.1450193493240166, + "learning_rate": 4.911419022059758e-06, + "loss": 0.4319, + "step": 8465 + }, + { + "epoch": 0.5155436470480772, + "grad_norm": 0.9824072731517923, + "learning_rate": 4.911397967842287e-06, + "loss": 0.423, + "step": 8466 + }, + { + "epoch": 0.515604542824955, + "grad_norm": 1.0064065558311388, + "learning_rate": 4.911376911168133e-06, + "loss": 0.4717, + "step": 8467 + }, + { + "epoch": 0.515665438601833, + "grad_norm": 1.1108949455415347, + "learning_rate": 4.911355852037319e-06, + "loss": 0.4048, + "step": 8468 + }, + { + "epoch": 0.5157263343787108, + "grad_norm": 1.0029442633115988, + "learning_rate": 4.911334790449866e-06, + "loss": 0.5004, + "step": 8469 + }, + { + "epoch": 0.5157872301555887, + "grad_norm": 1.021941689995531, + "learning_rate": 4.911313726405795e-06, + "loss": 0.4012, + "step": 8470 + }, + { + "epoch": 0.5158481259324665, + "grad_norm": 1.043655050543881, + "learning_rate": 4.911292659905127e-06, + "loss": 0.3725, + "step": 8471 + }, + { + "epoch": 0.5159090217093445, + "grad_norm": 1.040148672271331, + "learning_rate": 4.911271590947885e-06, + "loss": 0.4213, + "step": 8472 + }, + { + "epoch": 0.5159699174862223, + "grad_norm": 0.9453114935801371, + "learning_rate": 4.9112505195340895e-06, + "loss": 0.5085, + "step": 8473 + }, + { + "epoch": 0.5160308132631002, + "grad_norm": 1.0849582312803816, + "learning_rate": 4.911229445663762e-06, + "loss": 0.3661, + "step": 8474 + }, + { + "epoch": 0.516091709039978, + "grad_norm": 1.0524934644369004, + "learning_rate": 4.911208369336924e-06, + "loss": 0.4337, + "step": 8475 + }, + { + "epoch": 0.516152604816856, + "grad_norm": 0.9953907029088835, + "learning_rate": 4.911187290553597e-06, + "loss": 0.4508, + "step": 8476 + }, + { + "epoch": 0.5162135005937338, + "grad_norm": 1.0380506194040227, + "learning_rate": 4.911166209313804e-06, + "loss": 0.3895, + "step": 8477 + }, + { + "epoch": 0.5162743963706117, + "grad_norm": 1.072896733811343, + "learning_rate": 4.911145125617563e-06, + "loss": 0.3811, + "step": 8478 + }, + { + "epoch": 0.5163352921474895, + "grad_norm": 1.0167724072012883, + "learning_rate": 4.911124039464898e-06, + "loss": 0.4178, + "step": 8479 + }, + { + "epoch": 0.5163961879243675, + "grad_norm": 1.049714212422888, + "learning_rate": 4.91110295085583e-06, + "loss": 0.4929, + "step": 8480 + }, + { + "epoch": 0.5164570837012453, + "grad_norm": 1.0276311726272507, + "learning_rate": 4.911081859790381e-06, + "loss": 0.4346, + "step": 8481 + }, + { + "epoch": 0.5165179794781232, + "grad_norm": 1.020600771227954, + "learning_rate": 4.911060766268571e-06, + "loss": 0.4397, + "step": 8482 + }, + { + "epoch": 0.516578875255001, + "grad_norm": 1.0035353816150139, + "learning_rate": 4.911039670290423e-06, + "loss": 0.4429, + "step": 8483 + }, + { + "epoch": 0.516639771031879, + "grad_norm": 1.040148096626415, + "learning_rate": 4.911018571855958e-06, + "loss": 0.399, + "step": 8484 + }, + { + "epoch": 0.5167006668087568, + "grad_norm": 1.0186620529587431, + "learning_rate": 4.910997470965197e-06, + "loss": 0.4025, + "step": 8485 + }, + { + "epoch": 0.5167615625856347, + "grad_norm": 1.0059681471808914, + "learning_rate": 4.910976367618162e-06, + "loss": 0.4532, + "step": 8486 + }, + { + "epoch": 0.5168224583625125, + "grad_norm": 1.0164417333815376, + "learning_rate": 4.910955261814875e-06, + "loss": 0.4455, + "step": 8487 + }, + { + "epoch": 0.5168833541393905, + "grad_norm": 1.06850118562162, + "learning_rate": 4.910934153555356e-06, + "loss": 0.4667, + "step": 8488 + }, + { + "epoch": 0.5169442499162683, + "grad_norm": 1.0306563772689188, + "learning_rate": 4.910913042839628e-06, + "loss": 0.4155, + "step": 8489 + }, + { + "epoch": 0.5170051456931462, + "grad_norm": 1.1072363042126, + "learning_rate": 4.9108919296677115e-06, + "loss": 0.4303, + "step": 8490 + }, + { + "epoch": 0.5170660414700241, + "grad_norm": 1.0723334918693797, + "learning_rate": 4.9108708140396285e-06, + "loss": 0.4003, + "step": 8491 + }, + { + "epoch": 0.517126937246902, + "grad_norm": 0.957646305421407, + "learning_rate": 4.910849695955401e-06, + "loss": 0.5028, + "step": 8492 + }, + { + "epoch": 0.5171878330237798, + "grad_norm": 1.0389641880288205, + "learning_rate": 4.91082857541505e-06, + "loss": 0.4442, + "step": 8493 + }, + { + "epoch": 0.5172487288006576, + "grad_norm": 1.03852566222708, + "learning_rate": 4.910807452418595e-06, + "loss": 0.4367, + "step": 8494 + }, + { + "epoch": 0.5173096245775356, + "grad_norm": 0.9281115298259606, + "learning_rate": 4.910786326966062e-06, + "loss": 0.5177, + "step": 8495 + }, + { + "epoch": 0.5173705203544134, + "grad_norm": 1.00634406987426, + "learning_rate": 4.910765199057469e-06, + "loss": 0.4025, + "step": 8496 + }, + { + "epoch": 0.5174314161312913, + "grad_norm": 0.9773881246680165, + "learning_rate": 4.910744068692839e-06, + "loss": 0.3981, + "step": 8497 + }, + { + "epoch": 0.5174923119081691, + "grad_norm": 1.0267502468167742, + "learning_rate": 4.910722935872192e-06, + "loss": 0.3874, + "step": 8498 + }, + { + "epoch": 0.5175532076850471, + "grad_norm": 0.9627900544955451, + "learning_rate": 4.9107018005955514e-06, + "loss": 0.5265, + "step": 8499 + }, + { + "epoch": 0.5176141034619249, + "grad_norm": 0.9645622052251106, + "learning_rate": 4.9106806628629375e-06, + "loss": 0.451, + "step": 8500 + }, + { + "epoch": 0.5176749992388028, + "grad_norm": 1.046499386533939, + "learning_rate": 4.910659522674373e-06, + "loss": 0.4072, + "step": 8501 + }, + { + "epoch": 0.5177358950156806, + "grad_norm": 0.9850173359834665, + "learning_rate": 4.910638380029878e-06, + "loss": 0.4257, + "step": 8502 + }, + { + "epoch": 0.5177967907925586, + "grad_norm": 1.070602392580532, + "learning_rate": 4.910617234929474e-06, + "loss": 0.4689, + "step": 8503 + }, + { + "epoch": 0.5178576865694364, + "grad_norm": 0.9330635034122995, + "learning_rate": 4.910596087373185e-06, + "loss": 0.4812, + "step": 8504 + }, + { + "epoch": 0.5179185823463143, + "grad_norm": 1.0158654378875354, + "learning_rate": 4.91057493736103e-06, + "loss": 0.4629, + "step": 8505 + }, + { + "epoch": 0.5179794781231921, + "grad_norm": 0.9900830825083944, + "learning_rate": 4.910553784893032e-06, + "loss": 0.4572, + "step": 8506 + }, + { + "epoch": 0.5180403739000701, + "grad_norm": 0.9927107773054996, + "learning_rate": 4.910532629969211e-06, + "loss": 0.4356, + "step": 8507 + }, + { + "epoch": 0.5181012696769479, + "grad_norm": 1.0238318644473723, + "learning_rate": 4.91051147258959e-06, + "loss": 0.4644, + "step": 8508 + }, + { + "epoch": 0.5181621654538258, + "grad_norm": 0.9753912766782754, + "learning_rate": 4.910490312754189e-06, + "loss": 0.4878, + "step": 8509 + }, + { + "epoch": 0.5182230612307036, + "grad_norm": 1.0618222639512411, + "learning_rate": 4.910469150463032e-06, + "loss": 0.4762, + "step": 8510 + }, + { + "epoch": 0.5182839570075816, + "grad_norm": 1.0263624553715798, + "learning_rate": 4.910447985716139e-06, + "loss": 0.4753, + "step": 8511 + }, + { + "epoch": 0.5183448527844594, + "grad_norm": 1.0705815823546188, + "learning_rate": 4.910426818513531e-06, + "loss": 0.4226, + "step": 8512 + }, + { + "epoch": 0.5184057485613373, + "grad_norm": 1.099261409009667, + "learning_rate": 4.91040564885523e-06, + "loss": 0.4044, + "step": 8513 + }, + { + "epoch": 0.5184666443382151, + "grad_norm": 0.9403892280000513, + "learning_rate": 4.910384476741259e-06, + "loss": 0.4856, + "step": 8514 + }, + { + "epoch": 0.518527540115093, + "grad_norm": 0.9820781948079325, + "learning_rate": 4.910363302171638e-06, + "loss": 0.4578, + "step": 8515 + }, + { + "epoch": 0.5185884358919709, + "grad_norm": 1.0930137170104077, + "learning_rate": 4.910342125146388e-06, + "loss": 0.4633, + "step": 8516 + }, + { + "epoch": 0.5186493316688487, + "grad_norm": 1.0100846074381227, + "learning_rate": 4.910320945665533e-06, + "loss": 0.4062, + "step": 8517 + }, + { + "epoch": 0.5187102274457266, + "grad_norm": 1.007245208114069, + "learning_rate": 4.9102997637290916e-06, + "loss": 0.4417, + "step": 8518 + }, + { + "epoch": 0.5187711232226045, + "grad_norm": 0.935317689531168, + "learning_rate": 4.910278579337088e-06, + "loss": 0.4902, + "step": 8519 + }, + { + "epoch": 0.5188320189994824, + "grad_norm": 1.0604969761816085, + "learning_rate": 4.9102573924895425e-06, + "loss": 0.4258, + "step": 8520 + }, + { + "epoch": 0.5188929147763602, + "grad_norm": 0.9180161583405663, + "learning_rate": 4.910236203186477e-06, + "loss": 0.5055, + "step": 8521 + }, + { + "epoch": 0.5189538105532381, + "grad_norm": 1.050296640191336, + "learning_rate": 4.910215011427913e-06, + "loss": 0.4404, + "step": 8522 + }, + { + "epoch": 0.519014706330116, + "grad_norm": 1.097510830135985, + "learning_rate": 4.9101938172138715e-06, + "loss": 0.4222, + "step": 8523 + }, + { + "epoch": 0.5190756021069939, + "grad_norm": 0.9958235819415049, + "learning_rate": 4.910172620544376e-06, + "loss": 0.4116, + "step": 8524 + }, + { + "epoch": 0.5191364978838717, + "grad_norm": 1.079547298661236, + "learning_rate": 4.9101514214194455e-06, + "loss": 0.4751, + "step": 8525 + }, + { + "epoch": 0.5191973936607496, + "grad_norm": 0.9537767069196704, + "learning_rate": 4.9101302198391024e-06, + "loss": 0.4585, + "step": 8526 + }, + { + "epoch": 0.5192582894376275, + "grad_norm": 1.0669123182442621, + "learning_rate": 4.910109015803369e-06, + "loss": 0.4885, + "step": 8527 + }, + { + "epoch": 0.5193191852145054, + "grad_norm": 1.1297966733723799, + "learning_rate": 4.910087809312268e-06, + "loss": 0.3974, + "step": 8528 + }, + { + "epoch": 0.5193800809913832, + "grad_norm": 1.0047312859758235, + "learning_rate": 4.910066600365818e-06, + "loss": 0.3938, + "step": 8529 + }, + { + "epoch": 0.5194409767682612, + "grad_norm": 0.968724197029759, + "learning_rate": 4.910045388964043e-06, + "loss": 0.4406, + "step": 8530 + }, + { + "epoch": 0.519501872545139, + "grad_norm": 1.0580682690607364, + "learning_rate": 4.910024175106965e-06, + "loss": 0.3966, + "step": 8531 + }, + { + "epoch": 0.5195627683220169, + "grad_norm": 0.989322365004037, + "learning_rate": 4.910002958794603e-06, + "loss": 0.4308, + "step": 8532 + }, + { + "epoch": 0.5196236640988947, + "grad_norm": 1.0194055259978139, + "learning_rate": 4.90998174002698e-06, + "loss": 0.4357, + "step": 8533 + }, + { + "epoch": 0.5196845598757727, + "grad_norm": 1.122550011907693, + "learning_rate": 4.909960518804119e-06, + "loss": 0.4983, + "step": 8534 + }, + { + "epoch": 0.5197454556526505, + "grad_norm": 1.0079324526169813, + "learning_rate": 4.909939295126039e-06, + "loss": 0.4441, + "step": 8535 + }, + { + "epoch": 0.5198063514295284, + "grad_norm": 1.0724240324963896, + "learning_rate": 4.9099180689927625e-06, + "loss": 0.3923, + "step": 8536 + }, + { + "epoch": 0.5198672472064062, + "grad_norm": 1.0532737058240282, + "learning_rate": 4.909896840404313e-06, + "loss": 0.4271, + "step": 8537 + }, + { + "epoch": 0.5199281429832842, + "grad_norm": 1.0264900140508462, + "learning_rate": 4.90987560936071e-06, + "loss": 0.3938, + "step": 8538 + }, + { + "epoch": 0.519989038760162, + "grad_norm": 1.0839328629072278, + "learning_rate": 4.909854375861977e-06, + "loss": 0.4141, + "step": 8539 + }, + { + "epoch": 0.5200499345370398, + "grad_norm": 1.0822952042198026, + "learning_rate": 4.909833139908132e-06, + "loss": 0.451, + "step": 8540 + }, + { + "epoch": 0.5201108303139177, + "grad_norm": 0.9469833507626185, + "learning_rate": 4.909811901499201e-06, + "loss": 0.4468, + "step": 8541 + }, + { + "epoch": 0.5201717260907956, + "grad_norm": 1.0526331913040057, + "learning_rate": 4.909790660635204e-06, + "loss": 0.428, + "step": 8542 + }, + { + "epoch": 0.5202326218676735, + "grad_norm": 0.9861260713235551, + "learning_rate": 4.909769417316161e-06, + "loss": 0.4162, + "step": 8543 + }, + { + "epoch": 0.5202935176445513, + "grad_norm": 1.1059967802886375, + "learning_rate": 4.909748171542096e-06, + "loss": 0.4234, + "step": 8544 + }, + { + "epoch": 0.5203544134214292, + "grad_norm": 1.0069147589839431, + "learning_rate": 4.90972692331303e-06, + "loss": 0.448, + "step": 8545 + }, + { + "epoch": 0.5204153091983071, + "grad_norm": 0.9854213962080972, + "learning_rate": 4.909705672628983e-06, + "loss": 0.4921, + "step": 8546 + }, + { + "epoch": 0.520476204975185, + "grad_norm": 0.9877311700513816, + "learning_rate": 4.909684419489978e-06, + "loss": 0.4276, + "step": 8547 + }, + { + "epoch": 0.5205371007520628, + "grad_norm": 0.9703319290295793, + "learning_rate": 4.909663163896038e-06, + "loss": 0.4354, + "step": 8548 + }, + { + "epoch": 0.5205979965289407, + "grad_norm": 0.9622661070066005, + "learning_rate": 4.909641905847182e-06, + "loss": 0.5042, + "step": 8549 + }, + { + "epoch": 0.5206588923058186, + "grad_norm": 1.086421671938841, + "learning_rate": 4.9096206453434335e-06, + "loss": 0.3643, + "step": 8550 + }, + { + "epoch": 0.5207197880826965, + "grad_norm": 0.9771798159318373, + "learning_rate": 4.909599382384814e-06, + "loss": 0.4996, + "step": 8551 + }, + { + "epoch": 0.5207806838595743, + "grad_norm": 1.0178334778373153, + "learning_rate": 4.909578116971344e-06, + "loss": 0.506, + "step": 8552 + }, + { + "epoch": 0.5208415796364522, + "grad_norm": 0.9287424056197428, + "learning_rate": 4.909556849103047e-06, + "loss": 0.5818, + "step": 8553 + }, + { + "epoch": 0.5209024754133301, + "grad_norm": 0.9507438361434665, + "learning_rate": 4.909535578779942e-06, + "loss": 0.4574, + "step": 8554 + }, + { + "epoch": 0.520963371190208, + "grad_norm": 1.0132260420863415, + "learning_rate": 4.909514306002053e-06, + "loss": 0.4902, + "step": 8555 + }, + { + "epoch": 0.5210242669670858, + "grad_norm": 1.0050808834346157, + "learning_rate": 4.909493030769401e-06, + "loss": 0.4869, + "step": 8556 + }, + { + "epoch": 0.5210851627439637, + "grad_norm": 1.080482054607036, + "learning_rate": 4.909471753082008e-06, + "loss": 0.5146, + "step": 8557 + }, + { + "epoch": 0.5211460585208416, + "grad_norm": 0.9739238257131994, + "learning_rate": 4.909450472939894e-06, + "loss": 0.4508, + "step": 8558 + }, + { + "epoch": 0.5212069542977195, + "grad_norm": 1.0047970096155356, + "learning_rate": 4.909429190343083e-06, + "loss": 0.4347, + "step": 8559 + }, + { + "epoch": 0.5212678500745973, + "grad_norm": 1.0890094693190449, + "learning_rate": 4.9094079052915955e-06, + "loss": 0.3998, + "step": 8560 + }, + { + "epoch": 0.5213287458514752, + "grad_norm": 0.9999003008812141, + "learning_rate": 4.909386617785453e-06, + "loss": 0.3905, + "step": 8561 + }, + { + "epoch": 0.5213896416283531, + "grad_norm": 1.1082567262808092, + "learning_rate": 4.909365327824678e-06, + "loss": 0.4972, + "step": 8562 + }, + { + "epoch": 0.521450537405231, + "grad_norm": 1.0398443094202605, + "learning_rate": 4.909344035409292e-06, + "loss": 0.4764, + "step": 8563 + }, + { + "epoch": 0.5215114331821088, + "grad_norm": 1.179039381991679, + "learning_rate": 4.909322740539315e-06, + "loss": 0.3886, + "step": 8564 + }, + { + "epoch": 0.5215723289589866, + "grad_norm": 1.0709518909569247, + "learning_rate": 4.909301443214771e-06, + "loss": 0.4443, + "step": 8565 + }, + { + "epoch": 0.5216332247358646, + "grad_norm": 0.993361651035601, + "learning_rate": 4.909280143435681e-06, + "loss": 0.4507, + "step": 8566 + }, + { + "epoch": 0.5216941205127424, + "grad_norm": 0.9933309332281406, + "learning_rate": 4.9092588412020655e-06, + "loss": 0.4852, + "step": 8567 + }, + { + "epoch": 0.5217550162896203, + "grad_norm": 1.004341810579752, + "learning_rate": 4.9092375365139476e-06, + "loss": 0.5049, + "step": 8568 + }, + { + "epoch": 0.5218159120664981, + "grad_norm": 1.005925084296951, + "learning_rate": 4.909216229371349e-06, + "loss": 0.4767, + "step": 8569 + }, + { + "epoch": 0.5218768078433761, + "grad_norm": 1.0613242700721062, + "learning_rate": 4.909194919774291e-06, + "loss": 0.394, + "step": 8570 + }, + { + "epoch": 0.5219377036202539, + "grad_norm": 1.0007937054978953, + "learning_rate": 4.909173607722794e-06, + "loss": 0.4703, + "step": 8571 + }, + { + "epoch": 0.5219985993971318, + "grad_norm": 1.048748555508546, + "learning_rate": 4.909152293216884e-06, + "loss": 0.4244, + "step": 8572 + }, + { + "epoch": 0.5220594951740097, + "grad_norm": 1.0751504288771165, + "learning_rate": 4.909130976256577e-06, + "loss": 0.4441, + "step": 8573 + }, + { + "epoch": 0.5221203909508876, + "grad_norm": 0.9251333183612322, + "learning_rate": 4.909109656841899e-06, + "loss": 0.5035, + "step": 8574 + }, + { + "epoch": 0.5221812867277654, + "grad_norm": 0.9447279046450302, + "learning_rate": 4.909088334972869e-06, + "loss": 0.478, + "step": 8575 + }, + { + "epoch": 0.5222421825046433, + "grad_norm": 0.9924336584396978, + "learning_rate": 4.90906701064951e-06, + "loss": 0.3629, + "step": 8576 + }, + { + "epoch": 0.5223030782815212, + "grad_norm": 0.97495461832459, + "learning_rate": 4.909045683871844e-06, + "loss": 0.4278, + "step": 8577 + }, + { + "epoch": 0.5223639740583991, + "grad_norm": 0.8923684705253473, + "learning_rate": 4.909024354639893e-06, + "loss": 0.518, + "step": 8578 + }, + { + "epoch": 0.5224248698352769, + "grad_norm": 1.059870722574201, + "learning_rate": 4.909003022953677e-06, + "loss": 0.3904, + "step": 8579 + }, + { + "epoch": 0.5224857656121548, + "grad_norm": 1.045844596820486, + "learning_rate": 4.908981688813219e-06, + "loss": 0.4465, + "step": 8580 + }, + { + "epoch": 0.5225466613890327, + "grad_norm": 1.1093706086675974, + "learning_rate": 4.9089603522185405e-06, + "loss": 0.4037, + "step": 8581 + }, + { + "epoch": 0.5226075571659106, + "grad_norm": 1.0319238638501393, + "learning_rate": 4.908939013169664e-06, + "loss": 0.4608, + "step": 8582 + }, + { + "epoch": 0.5226684529427884, + "grad_norm": 1.0371798651497728, + "learning_rate": 4.90891767166661e-06, + "loss": 0.429, + "step": 8583 + }, + { + "epoch": 0.5227293487196663, + "grad_norm": 1.0611791849182528, + "learning_rate": 4.908896327709401e-06, + "loss": 0.4169, + "step": 8584 + }, + { + "epoch": 0.5227902444965442, + "grad_norm": 0.9476058978281375, + "learning_rate": 4.908874981298058e-06, + "loss": 0.5096, + "step": 8585 + }, + { + "epoch": 0.522851140273422, + "grad_norm": 0.985369683840825, + "learning_rate": 4.908853632432603e-06, + "loss": 0.4688, + "step": 8586 + }, + { + "epoch": 0.5229120360502999, + "grad_norm": 1.0587492043522326, + "learning_rate": 4.908832281113059e-06, + "loss": 0.486, + "step": 8587 + }, + { + "epoch": 0.5229729318271777, + "grad_norm": 1.010244296352972, + "learning_rate": 4.908810927339447e-06, + "loss": 0.4404, + "step": 8588 + }, + { + "epoch": 0.5230338276040557, + "grad_norm": 1.0783497228729821, + "learning_rate": 4.908789571111787e-06, + "loss": 0.446, + "step": 8589 + }, + { + "epoch": 0.5230947233809335, + "grad_norm": 1.097091672016871, + "learning_rate": 4.908768212430103e-06, + "loss": 0.383, + "step": 8590 + }, + { + "epoch": 0.5231556191578114, + "grad_norm": 1.1096635780654283, + "learning_rate": 4.908746851294416e-06, + "loss": 0.4059, + "step": 8591 + }, + { + "epoch": 0.5232165149346892, + "grad_norm": 0.9805638749163436, + "learning_rate": 4.908725487704748e-06, + "loss": 0.4295, + "step": 8592 + }, + { + "epoch": 0.5232774107115672, + "grad_norm": 1.1123935367999478, + "learning_rate": 4.90870412166112e-06, + "loss": 0.3729, + "step": 8593 + }, + { + "epoch": 0.523338306488445, + "grad_norm": 0.9615833634245132, + "learning_rate": 4.908682753163555e-06, + "loss": 0.4746, + "step": 8594 + }, + { + "epoch": 0.5233992022653229, + "grad_norm": 1.0269781556033306, + "learning_rate": 4.908661382212074e-06, + "loss": 0.3893, + "step": 8595 + }, + { + "epoch": 0.5234600980422007, + "grad_norm": 1.047228432623373, + "learning_rate": 4.908640008806699e-06, + "loss": 0.4775, + "step": 8596 + }, + { + "epoch": 0.5235209938190787, + "grad_norm": 1.080967681990089, + "learning_rate": 4.908618632947451e-06, + "loss": 0.346, + "step": 8597 + }, + { + "epoch": 0.5235818895959565, + "grad_norm": 1.0039487792280293, + "learning_rate": 4.908597254634353e-06, + "loss": 0.4418, + "step": 8598 + }, + { + "epoch": 0.5236427853728344, + "grad_norm": 0.9629250064886133, + "learning_rate": 4.908575873867426e-06, + "loss": 0.4018, + "step": 8599 + }, + { + "epoch": 0.5237036811497122, + "grad_norm": 1.064982413648252, + "learning_rate": 4.908554490646692e-06, + "loss": 0.3664, + "step": 8600 + }, + { + "epoch": 0.5237645769265902, + "grad_norm": 1.0304836259168377, + "learning_rate": 4.908533104972172e-06, + "loss": 0.4555, + "step": 8601 + }, + { + "epoch": 0.523825472703468, + "grad_norm": 1.045217394511248, + "learning_rate": 4.90851171684389e-06, + "loss": 0.4528, + "step": 8602 + }, + { + "epoch": 0.5238863684803459, + "grad_norm": 1.0437525396799523, + "learning_rate": 4.908490326261866e-06, + "loss": 0.4242, + "step": 8603 + }, + { + "epoch": 0.5239472642572237, + "grad_norm": 1.082089262959652, + "learning_rate": 4.90846893322612e-06, + "loss": 0.4021, + "step": 8604 + }, + { + "epoch": 0.5240081600341017, + "grad_norm": 1.0386080126578288, + "learning_rate": 4.908447537736678e-06, + "loss": 0.3951, + "step": 8605 + }, + { + "epoch": 0.5240690558109795, + "grad_norm": 0.9950677891595794, + "learning_rate": 4.908426139793559e-06, + "loss": 0.4483, + "step": 8606 + }, + { + "epoch": 0.5241299515878574, + "grad_norm": 1.0829898421081936, + "learning_rate": 4.9084047393967865e-06, + "loss": 0.451, + "step": 8607 + }, + { + "epoch": 0.5241908473647352, + "grad_norm": 0.988941641451332, + "learning_rate": 4.90838333654638e-06, + "loss": 0.4305, + "step": 8608 + }, + { + "epoch": 0.5242517431416132, + "grad_norm": 1.064481378458262, + "learning_rate": 4.9083619312423645e-06, + "loss": 0.4715, + "step": 8609 + }, + { + "epoch": 0.524312638918491, + "grad_norm": 0.9805571934911115, + "learning_rate": 4.9083405234847585e-06, + "loss": 0.4729, + "step": 8610 + }, + { + "epoch": 0.5243735346953688, + "grad_norm": 1.024173288075929, + "learning_rate": 4.908319113273585e-06, + "loss": 0.4432, + "step": 8611 + }, + { + "epoch": 0.5244344304722468, + "grad_norm": 0.9052510977516766, + "learning_rate": 4.908297700608867e-06, + "loss": 0.5192, + "step": 8612 + }, + { + "epoch": 0.5244953262491246, + "grad_norm": 1.0756558643506708, + "learning_rate": 4.9082762854906255e-06, + "loss": 0.4701, + "step": 8613 + }, + { + "epoch": 0.5245562220260025, + "grad_norm": 0.9864461816242682, + "learning_rate": 4.908254867918882e-06, + "loss": 0.4782, + "step": 8614 + }, + { + "epoch": 0.5246171178028803, + "grad_norm": 0.9987403082282066, + "learning_rate": 4.908233447893657e-06, + "loss": 0.5045, + "step": 8615 + }, + { + "epoch": 0.5246780135797583, + "grad_norm": 1.0410319903660332, + "learning_rate": 4.908212025414975e-06, + "loss": 0.4284, + "step": 8616 + }, + { + "epoch": 0.5247389093566361, + "grad_norm": 1.0096077316768, + "learning_rate": 4.908190600482857e-06, + "loss": 0.4224, + "step": 8617 + }, + { + "epoch": 0.524799805133514, + "grad_norm": 1.1294957992021777, + "learning_rate": 4.908169173097324e-06, + "loss": 0.3957, + "step": 8618 + }, + { + "epoch": 0.5248607009103918, + "grad_norm": 1.0582737626826615, + "learning_rate": 4.908147743258398e-06, + "loss": 0.4428, + "step": 8619 + }, + { + "epoch": 0.5249215966872698, + "grad_norm": 1.0419813358002803, + "learning_rate": 4.908126310966102e-06, + "loss": 0.3955, + "step": 8620 + }, + { + "epoch": 0.5249824924641476, + "grad_norm": 1.061008664793576, + "learning_rate": 4.908104876220456e-06, + "loss": 0.4239, + "step": 8621 + }, + { + "epoch": 0.5250433882410255, + "grad_norm": 0.9915056826088376, + "learning_rate": 4.9080834390214835e-06, + "loss": 0.4605, + "step": 8622 + }, + { + "epoch": 0.5251042840179033, + "grad_norm": 1.0614033005008872, + "learning_rate": 4.908061999369206e-06, + "loss": 0.3828, + "step": 8623 + }, + { + "epoch": 0.5251651797947813, + "grad_norm": 0.9922521838049723, + "learning_rate": 4.908040557263644e-06, + "loss": 0.4383, + "step": 8624 + }, + { + "epoch": 0.5252260755716591, + "grad_norm": 0.9827094969531334, + "learning_rate": 4.9080191127048205e-06, + "loss": 0.4459, + "step": 8625 + }, + { + "epoch": 0.525286971348537, + "grad_norm": 0.9894034142603675, + "learning_rate": 4.9079976656927575e-06, + "loss": 0.4884, + "step": 8626 + }, + { + "epoch": 0.5253478671254148, + "grad_norm": 1.18666371885818, + "learning_rate": 4.907976216227477e-06, + "loss": 0.3497, + "step": 8627 + }, + { + "epoch": 0.5254087629022928, + "grad_norm": 0.9250696971399086, + "learning_rate": 4.907954764308999e-06, + "loss": 0.4594, + "step": 8628 + }, + { + "epoch": 0.5254696586791706, + "grad_norm": 0.9209968220627324, + "learning_rate": 4.907933309937348e-06, + "loss": 0.469, + "step": 8629 + }, + { + "epoch": 0.5255305544560485, + "grad_norm": 0.9444083746979887, + "learning_rate": 4.907911853112545e-06, + "loss": 0.4651, + "step": 8630 + }, + { + "epoch": 0.5255914502329263, + "grad_norm": 0.9541938566226762, + "learning_rate": 4.9078903938346115e-06, + "loss": 0.4717, + "step": 8631 + }, + { + "epoch": 0.5256523460098043, + "grad_norm": 1.0342753290718625, + "learning_rate": 4.907868932103568e-06, + "loss": 0.465, + "step": 8632 + }, + { + "epoch": 0.5257132417866821, + "grad_norm": 1.0195001078362789, + "learning_rate": 4.907847467919438e-06, + "loss": 0.4472, + "step": 8633 + }, + { + "epoch": 0.52577413756356, + "grad_norm": 0.976932492785576, + "learning_rate": 4.907826001282244e-06, + "loss": 0.4091, + "step": 8634 + }, + { + "epoch": 0.5258350333404378, + "grad_norm": 0.9154422567781239, + "learning_rate": 4.907804532192006e-06, + "loss": 0.5042, + "step": 8635 + }, + { + "epoch": 0.5258959291173158, + "grad_norm": 1.0324163022464197, + "learning_rate": 4.907783060648747e-06, + "loss": 0.4251, + "step": 8636 + }, + { + "epoch": 0.5259568248941936, + "grad_norm": 1.0559374009139482, + "learning_rate": 4.907761586652489e-06, + "loss": 0.4124, + "step": 8637 + }, + { + "epoch": 0.5260177206710714, + "grad_norm": 1.0785883294176724, + "learning_rate": 4.907740110203253e-06, + "loss": 0.4596, + "step": 8638 + }, + { + "epoch": 0.5260786164479493, + "grad_norm": 1.0236998784463347, + "learning_rate": 4.907718631301062e-06, + "loss": 0.4092, + "step": 8639 + }, + { + "epoch": 0.5261395122248272, + "grad_norm": 1.0793349342128926, + "learning_rate": 4.907697149945937e-06, + "loss": 0.4352, + "step": 8640 + }, + { + "epoch": 0.5262004080017051, + "grad_norm": 0.922268870072163, + "learning_rate": 4.9076756661379e-06, + "loss": 0.4975, + "step": 8641 + }, + { + "epoch": 0.5262613037785829, + "grad_norm": 1.0395961911322877, + "learning_rate": 4.907654179876974e-06, + "loss": 0.4634, + "step": 8642 + }, + { + "epoch": 0.5263221995554608, + "grad_norm": 1.0577431494325895, + "learning_rate": 4.907632691163179e-06, + "loss": 0.4454, + "step": 8643 + }, + { + "epoch": 0.5263830953323387, + "grad_norm": 0.9388528118179404, + "learning_rate": 4.907611199996538e-06, + "loss": 0.4632, + "step": 8644 + }, + { + "epoch": 0.5264439911092166, + "grad_norm": 1.0812358641424438, + "learning_rate": 4.907589706377074e-06, + "loss": 0.3976, + "step": 8645 + }, + { + "epoch": 0.5265048868860944, + "grad_norm": 1.0725698691855294, + "learning_rate": 4.907568210304806e-06, + "loss": 0.4471, + "step": 8646 + }, + { + "epoch": 0.5265657826629723, + "grad_norm": 0.9764757181582497, + "learning_rate": 4.907546711779758e-06, + "loss": 0.4081, + "step": 8647 + }, + { + "epoch": 0.5266266784398502, + "grad_norm": 1.0537232263096385, + "learning_rate": 4.907525210801952e-06, + "loss": 0.3588, + "step": 8648 + }, + { + "epoch": 0.5266875742167281, + "grad_norm": 1.0510049548377782, + "learning_rate": 4.9075037073714096e-06, + "loss": 0.4151, + "step": 8649 + }, + { + "epoch": 0.5267484699936059, + "grad_norm": 0.9510812354584224, + "learning_rate": 4.907482201488151e-06, + "loss": 0.4554, + "step": 8650 + }, + { + "epoch": 0.5268093657704838, + "grad_norm": 0.962666980302078, + "learning_rate": 4.9074606931522004e-06, + "loss": 0.4401, + "step": 8651 + }, + { + "epoch": 0.5268702615473617, + "grad_norm": 1.0535242954957635, + "learning_rate": 4.907439182363579e-06, + "loss": 0.4162, + "step": 8652 + }, + { + "epoch": 0.5269311573242396, + "grad_norm": 1.1004633883943122, + "learning_rate": 4.907417669122309e-06, + "loss": 0.3947, + "step": 8653 + }, + { + "epoch": 0.5269920531011174, + "grad_norm": 0.9639376108339724, + "learning_rate": 4.907396153428412e-06, + "loss": 0.4197, + "step": 8654 + }, + { + "epoch": 0.5270529488779954, + "grad_norm": 0.9733913423270477, + "learning_rate": 4.907374635281909e-06, + "loss": 0.4359, + "step": 8655 + }, + { + "epoch": 0.5271138446548732, + "grad_norm": 1.101162133005141, + "learning_rate": 4.9073531146828235e-06, + "loss": 0.4278, + "step": 8656 + }, + { + "epoch": 0.527174740431751, + "grad_norm": 1.004328726405885, + "learning_rate": 4.907331591631176e-06, + "loss": 0.4493, + "step": 8657 + }, + { + "epoch": 0.5272356362086289, + "grad_norm": 0.9980087873444597, + "learning_rate": 4.907310066126989e-06, + "loss": 0.4709, + "step": 8658 + }, + { + "epoch": 0.5272965319855069, + "grad_norm": 0.9784452192093017, + "learning_rate": 4.907288538170286e-06, + "loss": 0.3895, + "step": 8659 + }, + { + "epoch": 0.5273574277623847, + "grad_norm": 1.009269919863873, + "learning_rate": 4.907267007761086e-06, + "loss": 0.4385, + "step": 8660 + }, + { + "epoch": 0.5274183235392625, + "grad_norm": 1.0384660329229294, + "learning_rate": 4.907245474899413e-06, + "loss": 0.3901, + "step": 8661 + }, + { + "epoch": 0.5274792193161404, + "grad_norm": 1.0858985767544092, + "learning_rate": 4.907223939585289e-06, + "loss": 0.3965, + "step": 8662 + }, + { + "epoch": 0.5275401150930183, + "grad_norm": 1.042810872035256, + "learning_rate": 4.907202401818734e-06, + "loss": 0.4186, + "step": 8663 + }, + { + "epoch": 0.5276010108698962, + "grad_norm": 0.9519915252487742, + "learning_rate": 4.9071808615997715e-06, + "loss": 0.4575, + "step": 8664 + }, + { + "epoch": 0.527661906646774, + "grad_norm": 0.9673020232753384, + "learning_rate": 4.907159318928424e-06, + "loss": 0.3967, + "step": 8665 + }, + { + "epoch": 0.5277228024236519, + "grad_norm": 0.9670526816881253, + "learning_rate": 4.907137773804712e-06, + "loss": 0.4232, + "step": 8666 + }, + { + "epoch": 0.5277836982005298, + "grad_norm": 0.961258478847096, + "learning_rate": 4.9071162262286584e-06, + "loss": 0.471, + "step": 8667 + }, + { + "epoch": 0.5278445939774077, + "grad_norm": 1.04275145064952, + "learning_rate": 4.907094676200285e-06, + "loss": 0.3943, + "step": 8668 + }, + { + "epoch": 0.5279054897542855, + "grad_norm": 0.9880556117057019, + "learning_rate": 4.907073123719614e-06, + "loss": 0.4309, + "step": 8669 + }, + { + "epoch": 0.5279663855311634, + "grad_norm": 0.9590079987284565, + "learning_rate": 4.9070515687866646e-06, + "loss": 0.5206, + "step": 8670 + }, + { + "epoch": 0.5280272813080413, + "grad_norm": 1.0962493200920396, + "learning_rate": 4.9070300114014634e-06, + "loss": 0.4246, + "step": 8671 + }, + { + "epoch": 0.5280881770849192, + "grad_norm": 1.035139352568009, + "learning_rate": 4.907008451564029e-06, + "loss": 0.357, + "step": 8672 + }, + { + "epoch": 0.528149072861797, + "grad_norm": 1.0818560676680617, + "learning_rate": 4.906986889274385e-06, + "loss": 0.396, + "step": 8673 + }, + { + "epoch": 0.5282099686386749, + "grad_norm": 0.9854932695207087, + "learning_rate": 4.906965324532553e-06, + "loss": 0.4505, + "step": 8674 + }, + { + "epoch": 0.5282708644155528, + "grad_norm": 1.021685762370257, + "learning_rate": 4.906943757338555e-06, + "loss": 0.4353, + "step": 8675 + }, + { + "epoch": 0.5283317601924307, + "grad_norm": 0.9097502531819667, + "learning_rate": 4.906922187692411e-06, + "loss": 0.4536, + "step": 8676 + }, + { + "epoch": 0.5283926559693085, + "grad_norm": 0.9501273324481635, + "learning_rate": 4.906900615594146e-06, + "loss": 0.4618, + "step": 8677 + }, + { + "epoch": 0.5284535517461864, + "grad_norm": 1.0546268526284397, + "learning_rate": 4.906879041043781e-06, + "loss": 0.4546, + "step": 8678 + }, + { + "epoch": 0.5285144475230643, + "grad_norm": 1.0925514727639773, + "learning_rate": 4.906857464041337e-06, + "loss": 0.4819, + "step": 8679 + }, + { + "epoch": 0.5285753432999422, + "grad_norm": 1.0286801792611513, + "learning_rate": 4.906835884586837e-06, + "loss": 0.4331, + "step": 8680 + }, + { + "epoch": 0.52863623907682, + "grad_norm": 0.979042280010626, + "learning_rate": 4.906814302680303e-06, + "loss": 0.415, + "step": 8681 + }, + { + "epoch": 0.5286971348536978, + "grad_norm": 0.9870111520550247, + "learning_rate": 4.906792718321756e-06, + "loss": 0.444, + "step": 8682 + }, + { + "epoch": 0.5287580306305758, + "grad_norm": 1.0454747971733986, + "learning_rate": 4.906771131511219e-06, + "loss": 0.4014, + "step": 8683 + }, + { + "epoch": 0.5288189264074536, + "grad_norm": 1.025445005511119, + "learning_rate": 4.906749542248713e-06, + "loss": 0.4246, + "step": 8684 + }, + { + "epoch": 0.5288798221843315, + "grad_norm": 0.9596554621402509, + "learning_rate": 4.906727950534261e-06, + "loss": 0.5256, + "step": 8685 + }, + { + "epoch": 0.5289407179612093, + "grad_norm": 1.0386465160592442, + "learning_rate": 4.906706356367884e-06, + "loss": 0.4177, + "step": 8686 + }, + { + "epoch": 0.5290016137380873, + "grad_norm": 0.9244013563008245, + "learning_rate": 4.906684759749606e-06, + "loss": 0.4547, + "step": 8687 + }, + { + "epoch": 0.5290625095149651, + "grad_norm": 1.0570770541438366, + "learning_rate": 4.906663160679446e-06, + "loss": 0.5328, + "step": 8688 + }, + { + "epoch": 0.529123405291843, + "grad_norm": 1.0140280698483142, + "learning_rate": 4.906641559157429e-06, + "loss": 0.421, + "step": 8689 + }, + { + "epoch": 0.5291843010687208, + "grad_norm": 0.9950802818935448, + "learning_rate": 4.906619955183574e-06, + "loss": 0.4379, + "step": 8690 + }, + { + "epoch": 0.5292451968455988, + "grad_norm": 1.047885927624752, + "learning_rate": 4.906598348757906e-06, + "loss": 0.4687, + "step": 8691 + }, + { + "epoch": 0.5293060926224766, + "grad_norm": 1.0579585264406997, + "learning_rate": 4.906576739880445e-06, + "loss": 0.4608, + "step": 8692 + }, + { + "epoch": 0.5293669883993545, + "grad_norm": 0.9932486569126717, + "learning_rate": 4.906555128551215e-06, + "loss": 0.4961, + "step": 8693 + }, + { + "epoch": 0.5294278841762324, + "grad_norm": 1.0230074479461995, + "learning_rate": 4.906533514770236e-06, + "loss": 0.4788, + "step": 8694 + }, + { + "epoch": 0.5294887799531103, + "grad_norm": 1.020165081619229, + "learning_rate": 4.906511898537529e-06, + "loss": 0.4626, + "step": 8695 + }, + { + "epoch": 0.5295496757299881, + "grad_norm": 1.0014202507404533, + "learning_rate": 4.906490279853119e-06, + "loss": 0.4372, + "step": 8696 + }, + { + "epoch": 0.529610571506866, + "grad_norm": 1.0617510343561023, + "learning_rate": 4.906468658717028e-06, + "loss": 0.4356, + "step": 8697 + }, + { + "epoch": 0.5296714672837439, + "grad_norm": 0.9776535730457042, + "learning_rate": 4.906447035129275e-06, + "loss": 0.4864, + "step": 8698 + }, + { + "epoch": 0.5297323630606218, + "grad_norm": 1.1170049803494546, + "learning_rate": 4.906425409089884e-06, + "loss": 0.5044, + "step": 8699 + }, + { + "epoch": 0.5297932588374996, + "grad_norm": 1.0436514092736897, + "learning_rate": 4.906403780598878e-06, + "loss": 0.4224, + "step": 8700 + }, + { + "epoch": 0.5298541546143775, + "grad_norm": 1.08524318144234, + "learning_rate": 4.906382149656276e-06, + "loss": 0.4214, + "step": 8701 + }, + { + "epoch": 0.5299150503912554, + "grad_norm": 0.9301643832932852, + "learning_rate": 4.906360516262103e-06, + "loss": 0.4342, + "step": 8702 + }, + { + "epoch": 0.5299759461681333, + "grad_norm": 1.0966291478449885, + "learning_rate": 4.90633888041638e-06, + "loss": 0.3889, + "step": 8703 + }, + { + "epoch": 0.5300368419450111, + "grad_norm": 1.0332902328840952, + "learning_rate": 4.906317242119129e-06, + "loss": 0.4197, + "step": 8704 + }, + { + "epoch": 0.530097737721889, + "grad_norm": 1.1099748102830016, + "learning_rate": 4.9062956013703715e-06, + "loss": 0.3713, + "step": 8705 + }, + { + "epoch": 0.5301586334987669, + "grad_norm": 0.9063707111686679, + "learning_rate": 4.9062739581701305e-06, + "loss": 0.4561, + "step": 8706 + }, + { + "epoch": 0.5302195292756448, + "grad_norm": 0.934222281269506, + "learning_rate": 4.906252312518427e-06, + "loss": 0.4393, + "step": 8707 + }, + { + "epoch": 0.5302804250525226, + "grad_norm": 0.990864890633535, + "learning_rate": 4.906230664415285e-06, + "loss": 0.4182, + "step": 8708 + }, + { + "epoch": 0.5303413208294004, + "grad_norm": 1.0316404143381441, + "learning_rate": 4.906209013860724e-06, + "loss": 0.4466, + "step": 8709 + }, + { + "epoch": 0.5304022166062784, + "grad_norm": 1.0717021261374389, + "learning_rate": 4.906187360854767e-06, + "loss": 0.4136, + "step": 8710 + }, + { + "epoch": 0.5304631123831562, + "grad_norm": 1.0020392772445623, + "learning_rate": 4.906165705397437e-06, + "loss": 0.486, + "step": 8711 + }, + { + "epoch": 0.5305240081600341, + "grad_norm": 1.138828811596824, + "learning_rate": 4.9061440474887555e-06, + "loss": 0.3913, + "step": 8712 + }, + { + "epoch": 0.5305849039369119, + "grad_norm": 1.1272036761586537, + "learning_rate": 4.906122387128744e-06, + "loss": 0.4069, + "step": 8713 + }, + { + "epoch": 0.5306457997137899, + "grad_norm": 1.0389452917672306, + "learning_rate": 4.9061007243174264e-06, + "loss": 0.405, + "step": 8714 + }, + { + "epoch": 0.5307066954906677, + "grad_norm": 1.0088414259670804, + "learning_rate": 4.906079059054822e-06, + "loss": 0.4436, + "step": 8715 + }, + { + "epoch": 0.5307675912675456, + "grad_norm": 1.0937289043370835, + "learning_rate": 4.906057391340955e-06, + "loss": 0.4258, + "step": 8716 + }, + { + "epoch": 0.5308284870444234, + "grad_norm": 1.0246028030814502, + "learning_rate": 4.906035721175846e-06, + "loss": 0.4382, + "step": 8717 + }, + { + "epoch": 0.5308893828213014, + "grad_norm": 0.9856671649701163, + "learning_rate": 4.906014048559519e-06, + "loss": 0.4357, + "step": 8718 + }, + { + "epoch": 0.5309502785981792, + "grad_norm": 1.001662247948854, + "learning_rate": 4.9059923734919935e-06, + "loss": 0.449, + "step": 8719 + }, + { + "epoch": 0.5310111743750571, + "grad_norm": 1.0025093013116513, + "learning_rate": 4.905970695973294e-06, + "loss": 0.4812, + "step": 8720 + }, + { + "epoch": 0.5310720701519349, + "grad_norm": 1.1128729768398418, + "learning_rate": 4.905949016003441e-06, + "loss": 0.4085, + "step": 8721 + }, + { + "epoch": 0.5311329659288129, + "grad_norm": 1.0063646148986256, + "learning_rate": 4.905927333582458e-06, + "loss": 0.3862, + "step": 8722 + }, + { + "epoch": 0.5311938617056907, + "grad_norm": 0.9867164060888354, + "learning_rate": 4.905905648710365e-06, + "loss": 0.439, + "step": 8723 + }, + { + "epoch": 0.5312547574825686, + "grad_norm": 1.0585233170796657, + "learning_rate": 4.905883961387186e-06, + "loss": 0.3687, + "step": 8724 + }, + { + "epoch": 0.5313156532594464, + "grad_norm": 0.9461483421154513, + "learning_rate": 4.905862271612943e-06, + "loss": 0.4346, + "step": 8725 + }, + { + "epoch": 0.5313765490363244, + "grad_norm": 1.0787975125573193, + "learning_rate": 4.905840579387657e-06, + "loss": 0.3967, + "step": 8726 + }, + { + "epoch": 0.5314374448132022, + "grad_norm": 1.0822194085012988, + "learning_rate": 4.90581888471135e-06, + "loss": 0.4037, + "step": 8727 + }, + { + "epoch": 0.53149834059008, + "grad_norm": 1.0163598933692621, + "learning_rate": 4.905797187584046e-06, + "loss": 0.4351, + "step": 8728 + }, + { + "epoch": 0.5315592363669579, + "grad_norm": 0.9609090344664282, + "learning_rate": 4.9057754880057655e-06, + "loss": 0.4939, + "step": 8729 + }, + { + "epoch": 0.5316201321438359, + "grad_norm": 1.022449714076184, + "learning_rate": 4.90575378597653e-06, + "loss": 0.4614, + "step": 8730 + }, + { + "epoch": 0.5316810279207137, + "grad_norm": 1.0304374344565999, + "learning_rate": 4.905732081496363e-06, + "loss": 0.4688, + "step": 8731 + }, + { + "epoch": 0.5317419236975915, + "grad_norm": 0.9994016320705547, + "learning_rate": 4.905710374565287e-06, + "loss": 0.5489, + "step": 8732 + }, + { + "epoch": 0.5318028194744694, + "grad_norm": 1.0032565723342126, + "learning_rate": 4.905688665183323e-06, + "loss": 0.4501, + "step": 8733 + }, + { + "epoch": 0.5318637152513473, + "grad_norm": 1.0259536587782734, + "learning_rate": 4.905666953350492e-06, + "loss": 0.5034, + "step": 8734 + }, + { + "epoch": 0.5319246110282252, + "grad_norm": 1.0051392407148565, + "learning_rate": 4.9056452390668194e-06, + "loss": 0.4433, + "step": 8735 + }, + { + "epoch": 0.531985506805103, + "grad_norm": 0.9800169250858429, + "learning_rate": 4.9056235223323246e-06, + "loss": 0.4702, + "step": 8736 + }, + { + "epoch": 0.532046402581981, + "grad_norm": 1.1171674267207985, + "learning_rate": 4.9056018031470305e-06, + "loss": 0.4799, + "step": 8737 + }, + { + "epoch": 0.5321072983588588, + "grad_norm": 1.008190221194554, + "learning_rate": 4.905580081510959e-06, + "loss": 0.3909, + "step": 8738 + }, + { + "epoch": 0.5321681941357367, + "grad_norm": 0.9594979894706758, + "learning_rate": 4.905558357424134e-06, + "loss": 0.4713, + "step": 8739 + }, + { + "epoch": 0.5322290899126145, + "grad_norm": 1.062968908943712, + "learning_rate": 4.905536630886575e-06, + "loss": 0.4063, + "step": 8740 + }, + { + "epoch": 0.5322899856894925, + "grad_norm": 0.9742996303057286, + "learning_rate": 4.905514901898305e-06, + "loss": 0.3916, + "step": 8741 + }, + { + "epoch": 0.5323508814663703, + "grad_norm": 0.9569778152376321, + "learning_rate": 4.905493170459347e-06, + "loss": 0.4504, + "step": 8742 + }, + { + "epoch": 0.5324117772432482, + "grad_norm": 1.0304368923521612, + "learning_rate": 4.905471436569722e-06, + "loss": 0.3889, + "step": 8743 + }, + { + "epoch": 0.532472673020126, + "grad_norm": 0.9852150162347811, + "learning_rate": 4.9054497002294535e-06, + "loss": 0.44, + "step": 8744 + }, + { + "epoch": 0.532533568797004, + "grad_norm": 1.075806785366784, + "learning_rate": 4.905427961438562e-06, + "loss": 0.4095, + "step": 8745 + }, + { + "epoch": 0.5325944645738818, + "grad_norm": 1.0699621230704972, + "learning_rate": 4.905406220197071e-06, + "loss": 0.4605, + "step": 8746 + }, + { + "epoch": 0.5326553603507597, + "grad_norm": 1.0053910759695674, + "learning_rate": 4.905384476505002e-06, + "loss": 0.4038, + "step": 8747 + }, + { + "epoch": 0.5327162561276375, + "grad_norm": 1.037684752189671, + "learning_rate": 4.905362730362377e-06, + "loss": 0.42, + "step": 8748 + }, + { + "epoch": 0.5327771519045155, + "grad_norm": 1.0274708574926694, + "learning_rate": 4.90534098176922e-06, + "loss": 0.3908, + "step": 8749 + }, + { + "epoch": 0.5328380476813933, + "grad_norm": 1.0570935108673545, + "learning_rate": 4.905319230725551e-06, + "loss": 0.3938, + "step": 8750 + }, + { + "epoch": 0.5328989434582712, + "grad_norm": 0.9857516658549654, + "learning_rate": 4.905297477231391e-06, + "loss": 0.4669, + "step": 8751 + }, + { + "epoch": 0.532959839235149, + "grad_norm": 1.0385053647868687, + "learning_rate": 4.905275721286766e-06, + "loss": 0.4627, + "step": 8752 + }, + { + "epoch": 0.533020735012027, + "grad_norm": 1.0973752387943303, + "learning_rate": 4.905253962891695e-06, + "loss": 0.4351, + "step": 8753 + }, + { + "epoch": 0.5330816307889048, + "grad_norm": 1.0689368652540898, + "learning_rate": 4.905232202046202e-06, + "loss": 0.4567, + "step": 8754 + }, + { + "epoch": 0.5331425265657826, + "grad_norm": 1.0740372421697455, + "learning_rate": 4.905210438750308e-06, + "loss": 0.4151, + "step": 8755 + }, + { + "epoch": 0.5332034223426605, + "grad_norm": 0.9468507087579875, + "learning_rate": 4.905188673004035e-06, + "loss": 0.5107, + "step": 8756 + }, + { + "epoch": 0.5332643181195384, + "grad_norm": 1.1229700425012341, + "learning_rate": 4.9051669048074065e-06, + "loss": 0.4258, + "step": 8757 + }, + { + "epoch": 0.5333252138964163, + "grad_norm": 0.9766980053264821, + "learning_rate": 4.905145134160444e-06, + "loss": 0.4721, + "step": 8758 + }, + { + "epoch": 0.5333861096732941, + "grad_norm": 0.9612861356490919, + "learning_rate": 4.90512336106317e-06, + "loss": 0.4308, + "step": 8759 + }, + { + "epoch": 0.533447005450172, + "grad_norm": 1.0043371440407733, + "learning_rate": 4.905101585515605e-06, + "loss": 0.4136, + "step": 8760 + }, + { + "epoch": 0.5335079012270499, + "grad_norm": 1.0765601628355332, + "learning_rate": 4.905079807517774e-06, + "loss": 0.404, + "step": 8761 + }, + { + "epoch": 0.5335687970039278, + "grad_norm": 1.0043085460550436, + "learning_rate": 4.9050580270696966e-06, + "loss": 0.4481, + "step": 8762 + }, + { + "epoch": 0.5336296927808056, + "grad_norm": 0.9660890720582136, + "learning_rate": 4.905036244171397e-06, + "loss": 0.4451, + "step": 8763 + }, + { + "epoch": 0.5336905885576835, + "grad_norm": 1.0462376542213785, + "learning_rate": 4.905014458822896e-06, + "loss": 0.3843, + "step": 8764 + }, + { + "epoch": 0.5337514843345614, + "grad_norm": 1.038883608764162, + "learning_rate": 4.9049926710242165e-06, + "loss": 0.421, + "step": 8765 + }, + { + "epoch": 0.5338123801114393, + "grad_norm": 1.0616582394308498, + "learning_rate": 4.90497088077538e-06, + "loss": 0.382, + "step": 8766 + }, + { + "epoch": 0.5338732758883171, + "grad_norm": 1.0261922058606552, + "learning_rate": 4.90494908807641e-06, + "loss": 0.4359, + "step": 8767 + }, + { + "epoch": 0.533934171665195, + "grad_norm": 0.9650478593224517, + "learning_rate": 4.904927292927326e-06, + "loss": 0.4992, + "step": 8768 + }, + { + "epoch": 0.5339950674420729, + "grad_norm": 0.9994832657457573, + "learning_rate": 4.904905495328154e-06, + "loss": 0.547, + "step": 8769 + }, + { + "epoch": 0.5340559632189508, + "grad_norm": 1.0375986443698353, + "learning_rate": 4.904883695278914e-06, + "loss": 0.457, + "step": 8770 + }, + { + "epoch": 0.5341168589958286, + "grad_norm": 1.0944006234817953, + "learning_rate": 4.904861892779627e-06, + "loss": 0.4498, + "step": 8771 + }, + { + "epoch": 0.5341777547727065, + "grad_norm": 1.0957930382182608, + "learning_rate": 4.904840087830319e-06, + "loss": 0.3732, + "step": 8772 + }, + { + "epoch": 0.5342386505495844, + "grad_norm": 1.0134468530843659, + "learning_rate": 4.904818280431009e-06, + "loss": 0.4822, + "step": 8773 + }, + { + "epoch": 0.5342995463264623, + "grad_norm": 1.050837921066168, + "learning_rate": 4.9047964705817195e-06, + "loss": 0.545, + "step": 8774 + }, + { + "epoch": 0.5343604421033401, + "grad_norm": 1.024303889551598, + "learning_rate": 4.904774658282474e-06, + "loss": 0.4579, + "step": 8775 + }, + { + "epoch": 0.5344213378802181, + "grad_norm": 0.9932702955947269, + "learning_rate": 4.904752843533294e-06, + "loss": 0.4356, + "step": 8776 + }, + { + "epoch": 0.5344822336570959, + "grad_norm": 1.0348159877891001, + "learning_rate": 4.904731026334201e-06, + "loss": 0.4531, + "step": 8777 + }, + { + "epoch": 0.5345431294339738, + "grad_norm": 0.9339069260548718, + "learning_rate": 4.904709206685219e-06, + "loss": 0.4879, + "step": 8778 + }, + { + "epoch": 0.5346040252108516, + "grad_norm": 1.0048092773644812, + "learning_rate": 4.9046873845863685e-06, + "loss": 0.439, + "step": 8779 + }, + { + "epoch": 0.5346649209877296, + "grad_norm": 0.9711277014533346, + "learning_rate": 4.904665560037673e-06, + "loss": 0.4312, + "step": 8780 + }, + { + "epoch": 0.5347258167646074, + "grad_norm": 1.0585575291747384, + "learning_rate": 4.904643733039154e-06, + "loss": 0.4212, + "step": 8781 + }, + { + "epoch": 0.5347867125414852, + "grad_norm": 0.9395755760188568, + "learning_rate": 4.904621903590833e-06, + "loss": 0.4441, + "step": 8782 + }, + { + "epoch": 0.5348476083183631, + "grad_norm": 1.0915142408969474, + "learning_rate": 4.904600071692735e-06, + "loss": 0.4436, + "step": 8783 + }, + { + "epoch": 0.534908504095241, + "grad_norm": 0.9459315658044363, + "learning_rate": 4.904578237344881e-06, + "loss": 0.434, + "step": 8784 + }, + { + "epoch": 0.5349693998721189, + "grad_norm": 0.9996070589557743, + "learning_rate": 4.90455640054729e-06, + "loss": 0.4831, + "step": 8785 + }, + { + "epoch": 0.5350302956489967, + "grad_norm": 1.1350240538844638, + "learning_rate": 4.904534561299988e-06, + "loss": 0.4755, + "step": 8786 + }, + { + "epoch": 0.5350911914258746, + "grad_norm": 0.9992085807911638, + "learning_rate": 4.904512719602997e-06, + "loss": 0.5021, + "step": 8787 + }, + { + "epoch": 0.5351520872027525, + "grad_norm": 0.9918694646848969, + "learning_rate": 4.904490875456338e-06, + "loss": 0.3957, + "step": 8788 + }, + { + "epoch": 0.5352129829796304, + "grad_norm": 0.9786070677779807, + "learning_rate": 4.904469028860034e-06, + "loss": 0.4052, + "step": 8789 + }, + { + "epoch": 0.5352738787565082, + "grad_norm": 0.9949264699607133, + "learning_rate": 4.904447179814106e-06, + "loss": 0.437, + "step": 8790 + }, + { + "epoch": 0.5353347745333861, + "grad_norm": 1.0154700729262642, + "learning_rate": 4.904425328318578e-06, + "loss": 0.4612, + "step": 8791 + }, + { + "epoch": 0.535395670310264, + "grad_norm": 0.9917205282077092, + "learning_rate": 4.904403474373472e-06, + "loss": 0.4586, + "step": 8792 + }, + { + "epoch": 0.5354565660871419, + "grad_norm": 0.940997628609883, + "learning_rate": 4.904381617978808e-06, + "loss": 0.5066, + "step": 8793 + }, + { + "epoch": 0.5355174618640197, + "grad_norm": 1.0450016569423546, + "learning_rate": 4.9043597591346116e-06, + "loss": 0.5113, + "step": 8794 + }, + { + "epoch": 0.5355783576408976, + "grad_norm": 1.00657127059955, + "learning_rate": 4.9043378978409025e-06, + "loss": 0.485, + "step": 8795 + }, + { + "epoch": 0.5356392534177755, + "grad_norm": 1.1164223008766654, + "learning_rate": 4.9043160340977045e-06, + "loss": 0.3727, + "step": 8796 + }, + { + "epoch": 0.5357001491946534, + "grad_norm": 1.0324556960034639, + "learning_rate": 4.904294167905039e-06, + "loss": 0.442, + "step": 8797 + }, + { + "epoch": 0.5357610449715312, + "grad_norm": 0.9589159940925782, + "learning_rate": 4.9042722992629285e-06, + "loss": 0.4768, + "step": 8798 + }, + { + "epoch": 0.535821940748409, + "grad_norm": 0.9313720046792108, + "learning_rate": 4.904250428171395e-06, + "loss": 0.4927, + "step": 8799 + }, + { + "epoch": 0.535882836525287, + "grad_norm": 1.061595623721985, + "learning_rate": 4.904228554630462e-06, + "loss": 0.4173, + "step": 8800 + }, + { + "epoch": 0.5359437323021649, + "grad_norm": 1.0583501394066324, + "learning_rate": 4.904206678640151e-06, + "loss": 0.3789, + "step": 8801 + }, + { + "epoch": 0.5360046280790427, + "grad_norm": 0.9240166682586353, + "learning_rate": 4.904184800200483e-06, + "loss": 0.3884, + "step": 8802 + }, + { + "epoch": 0.5360655238559205, + "grad_norm": 1.0256614625649487, + "learning_rate": 4.904162919311482e-06, + "loss": 0.4185, + "step": 8803 + }, + { + "epoch": 0.5361264196327985, + "grad_norm": 0.9813338540486756, + "learning_rate": 4.9041410359731715e-06, + "loss": 0.3975, + "step": 8804 + }, + { + "epoch": 0.5361873154096763, + "grad_norm": 0.97938612375245, + "learning_rate": 4.90411915018557e-06, + "loss": 0.4064, + "step": 8805 + }, + { + "epoch": 0.5362482111865542, + "grad_norm": 0.9873492805913616, + "learning_rate": 4.904097261948703e-06, + "loss": 0.4818, + "step": 8806 + }, + { + "epoch": 0.536309106963432, + "grad_norm": 1.0202545104547442, + "learning_rate": 4.904075371262591e-06, + "loss": 0.4631, + "step": 8807 + }, + { + "epoch": 0.53637000274031, + "grad_norm": 0.9484341088293203, + "learning_rate": 4.904053478127258e-06, + "loss": 0.4629, + "step": 8808 + }, + { + "epoch": 0.5364308985171878, + "grad_norm": 1.0709646173071097, + "learning_rate": 4.904031582542724e-06, + "loss": 0.4296, + "step": 8809 + }, + { + "epoch": 0.5364917942940657, + "grad_norm": 1.0498595027359283, + "learning_rate": 4.904009684509013e-06, + "loss": 0.4568, + "step": 8810 + }, + { + "epoch": 0.5365526900709435, + "grad_norm": 0.9726768306486795, + "learning_rate": 4.903987784026148e-06, + "loss": 0.4426, + "step": 8811 + }, + { + "epoch": 0.5366135858478215, + "grad_norm": 1.112422756449798, + "learning_rate": 4.90396588109415e-06, + "loss": 0.3904, + "step": 8812 + }, + { + "epoch": 0.5366744816246993, + "grad_norm": 1.0689192291580103, + "learning_rate": 4.9039439757130405e-06, + "loss": 0.3537, + "step": 8813 + }, + { + "epoch": 0.5367353774015772, + "grad_norm": 1.0726688944028444, + "learning_rate": 4.903922067882842e-06, + "loss": 0.4273, + "step": 8814 + }, + { + "epoch": 0.536796273178455, + "grad_norm": 0.9602869252724383, + "learning_rate": 4.90390015760358e-06, + "loss": 0.4137, + "step": 8815 + }, + { + "epoch": 0.536857168955333, + "grad_norm": 1.0388449374825388, + "learning_rate": 4.903878244875273e-06, + "loss": 0.466, + "step": 8816 + }, + { + "epoch": 0.5369180647322108, + "grad_norm": 1.0524195127841307, + "learning_rate": 4.903856329697945e-06, + "loss": 0.4108, + "step": 8817 + }, + { + "epoch": 0.5369789605090887, + "grad_norm": 1.0253290852891346, + "learning_rate": 4.903834412071619e-06, + "loss": 0.4459, + "step": 8818 + }, + { + "epoch": 0.5370398562859666, + "grad_norm": 1.0117352633420251, + "learning_rate": 4.903812491996316e-06, + "loss": 0.4045, + "step": 8819 + }, + { + "epoch": 0.5371007520628445, + "grad_norm": 1.0039684651192462, + "learning_rate": 4.903790569472059e-06, + "loss": 0.4704, + "step": 8820 + }, + { + "epoch": 0.5371616478397223, + "grad_norm": 0.9657809964932154, + "learning_rate": 4.903768644498869e-06, + "loss": 0.4982, + "step": 8821 + }, + { + "epoch": 0.5372225436166002, + "grad_norm": 1.054597286073078, + "learning_rate": 4.903746717076771e-06, + "loss": 0.4178, + "step": 8822 + }, + { + "epoch": 0.5372834393934781, + "grad_norm": 1.051380464729637, + "learning_rate": 4.9037247872057845e-06, + "loss": 0.4488, + "step": 8823 + }, + { + "epoch": 0.537344335170356, + "grad_norm": 1.1308981081890037, + "learning_rate": 4.9037028548859335e-06, + "loss": 0.3828, + "step": 8824 + }, + { + "epoch": 0.5374052309472338, + "grad_norm": 1.0021234573756461, + "learning_rate": 4.903680920117241e-06, + "loss": 0.5184, + "step": 8825 + }, + { + "epoch": 0.5374661267241116, + "grad_norm": 0.9811528227479334, + "learning_rate": 4.9036589828997275e-06, + "loss": 0.3856, + "step": 8826 + }, + { + "epoch": 0.5375270225009896, + "grad_norm": 1.02332120538785, + "learning_rate": 4.903637043233417e-06, + "loss": 0.4265, + "step": 8827 + }, + { + "epoch": 0.5375879182778674, + "grad_norm": 1.0230925146966947, + "learning_rate": 4.90361510111833e-06, + "loss": 0.4474, + "step": 8828 + }, + { + "epoch": 0.5376488140547453, + "grad_norm": 1.0179551345996987, + "learning_rate": 4.90359315655449e-06, + "loss": 0.3741, + "step": 8829 + }, + { + "epoch": 0.5377097098316231, + "grad_norm": 0.9541475659903479, + "learning_rate": 4.90357120954192e-06, + "loss": 0.4648, + "step": 8830 + }, + { + "epoch": 0.5377706056085011, + "grad_norm": 1.0376019325349968, + "learning_rate": 4.90354926008064e-06, + "loss": 0.4628, + "step": 8831 + }, + { + "epoch": 0.5378315013853789, + "grad_norm": 0.9855375105446861, + "learning_rate": 4.9035273081706755e-06, + "loss": 0.4687, + "step": 8832 + }, + { + "epoch": 0.5378923971622568, + "grad_norm": 1.0309103844940484, + "learning_rate": 4.903505353812048e-06, + "loss": 0.4463, + "step": 8833 + }, + { + "epoch": 0.5379532929391346, + "grad_norm": 1.0648159767662762, + "learning_rate": 4.903483397004778e-06, + "loss": 0.4459, + "step": 8834 + }, + { + "epoch": 0.5380141887160126, + "grad_norm": 1.010336319411342, + "learning_rate": 4.9034614377488884e-06, + "loss": 0.4192, + "step": 8835 + }, + { + "epoch": 0.5380750844928904, + "grad_norm": 1.0298395880804645, + "learning_rate": 4.903439476044404e-06, + "loss": 0.4824, + "step": 8836 + }, + { + "epoch": 0.5381359802697683, + "grad_norm": 1.0776871308226332, + "learning_rate": 4.903417511891344e-06, + "loss": 0.4441, + "step": 8837 + }, + { + "epoch": 0.5381968760466461, + "grad_norm": 1.0526490307239313, + "learning_rate": 4.903395545289733e-06, + "loss": 0.5071, + "step": 8838 + }, + { + "epoch": 0.5382577718235241, + "grad_norm": 1.0287330478348344, + "learning_rate": 4.903373576239593e-06, + "loss": 0.4513, + "step": 8839 + }, + { + "epoch": 0.5383186676004019, + "grad_norm": 1.0845234709421214, + "learning_rate": 4.903351604740945e-06, + "loss": 0.418, + "step": 8840 + }, + { + "epoch": 0.5383795633772798, + "grad_norm": 1.0030205951362872, + "learning_rate": 4.9033296307938124e-06, + "loss": 0.4657, + "step": 8841 + }, + { + "epoch": 0.5384404591541576, + "grad_norm": 0.9381692854624355, + "learning_rate": 4.903307654398218e-06, + "loss": 0.4327, + "step": 8842 + }, + { + "epoch": 0.5385013549310356, + "grad_norm": 1.0272255985141678, + "learning_rate": 4.903285675554184e-06, + "loss": 0.4453, + "step": 8843 + }, + { + "epoch": 0.5385622507079134, + "grad_norm": 1.0401714014152261, + "learning_rate": 4.903263694261731e-06, + "loss": 0.4577, + "step": 8844 + }, + { + "epoch": 0.5386231464847913, + "grad_norm": 0.941303466182871, + "learning_rate": 4.903241710520885e-06, + "loss": 0.4712, + "step": 8845 + }, + { + "epoch": 0.5386840422616691, + "grad_norm": 0.9946522202917325, + "learning_rate": 4.903219724331665e-06, + "loss": 0.3975, + "step": 8846 + }, + { + "epoch": 0.5387449380385471, + "grad_norm": 1.0717968396725852, + "learning_rate": 4.903197735694095e-06, + "loss": 0.3976, + "step": 8847 + }, + { + "epoch": 0.5388058338154249, + "grad_norm": 1.01367962792509, + "learning_rate": 4.903175744608198e-06, + "loss": 0.427, + "step": 8848 + }, + { + "epoch": 0.5388667295923028, + "grad_norm": 1.0522411469946624, + "learning_rate": 4.903153751073995e-06, + "loss": 0.4879, + "step": 8849 + }, + { + "epoch": 0.5389276253691806, + "grad_norm": 0.993170247402638, + "learning_rate": 4.903131755091508e-06, + "loss": 0.4838, + "step": 8850 + }, + { + "epoch": 0.5389885211460586, + "grad_norm": 1.0752090358587159, + "learning_rate": 4.903109756660761e-06, + "loss": 0.4024, + "step": 8851 + }, + { + "epoch": 0.5390494169229364, + "grad_norm": 1.0440214799456065, + "learning_rate": 4.903087755781776e-06, + "loss": 0.4498, + "step": 8852 + }, + { + "epoch": 0.5391103126998142, + "grad_norm": 0.9877865406440135, + "learning_rate": 4.903065752454575e-06, + "loss": 0.4302, + "step": 8853 + }, + { + "epoch": 0.5391712084766921, + "grad_norm": 1.08453171099103, + "learning_rate": 4.903043746679179e-06, + "loss": 0.4092, + "step": 8854 + }, + { + "epoch": 0.53923210425357, + "grad_norm": 1.0389533420644828, + "learning_rate": 4.903021738455614e-06, + "loss": 0.4086, + "step": 8855 + }, + { + "epoch": 0.5392930000304479, + "grad_norm": 0.9970936852939618, + "learning_rate": 4.9029997277839e-06, + "loss": 0.405, + "step": 8856 + }, + { + "epoch": 0.5393538958073257, + "grad_norm": 1.0905517999513383, + "learning_rate": 4.90297771466406e-06, + "loss": 0.3442, + "step": 8857 + }, + { + "epoch": 0.5394147915842037, + "grad_norm": 1.0732683306534652, + "learning_rate": 4.902955699096116e-06, + "loss": 0.4268, + "step": 8858 + }, + { + "epoch": 0.5394756873610815, + "grad_norm": 1.101338700524692, + "learning_rate": 4.90293368108009e-06, + "loss": 0.5061, + "step": 8859 + }, + { + "epoch": 0.5395365831379594, + "grad_norm": 0.9329099532433348, + "learning_rate": 4.902911660616006e-06, + "loss": 0.4347, + "step": 8860 + }, + { + "epoch": 0.5395974789148372, + "grad_norm": 0.9764863023422466, + "learning_rate": 4.902889637703885e-06, + "loss": 0.4851, + "step": 8861 + }, + { + "epoch": 0.5396583746917152, + "grad_norm": 1.0872564542646372, + "learning_rate": 4.9028676123437505e-06, + "loss": 0.401, + "step": 8862 + }, + { + "epoch": 0.539719270468593, + "grad_norm": 0.9920214231312706, + "learning_rate": 4.902845584535624e-06, + "loss": 0.4953, + "step": 8863 + }, + { + "epoch": 0.5397801662454709, + "grad_norm": 0.9979111421377957, + "learning_rate": 4.902823554279529e-06, + "loss": 0.5019, + "step": 8864 + }, + { + "epoch": 0.5398410620223487, + "grad_norm": 0.9955528469603008, + "learning_rate": 4.902801521575487e-06, + "loss": 0.4342, + "step": 8865 + }, + { + "epoch": 0.5399019577992267, + "grad_norm": 1.0207735327251055, + "learning_rate": 4.90277948642352e-06, + "loss": 0.4674, + "step": 8866 + }, + { + "epoch": 0.5399628535761045, + "grad_norm": 0.9769556514279572, + "learning_rate": 4.902757448823652e-06, + "loss": 0.4096, + "step": 8867 + }, + { + "epoch": 0.5400237493529824, + "grad_norm": 0.9576339332072332, + "learning_rate": 4.902735408775905e-06, + "loss": 0.4136, + "step": 8868 + }, + { + "epoch": 0.5400846451298602, + "grad_norm": 1.0482401245099875, + "learning_rate": 4.9027133662803e-06, + "loss": 0.4078, + "step": 8869 + }, + { + "epoch": 0.5401455409067382, + "grad_norm": 1.0287046016943167, + "learning_rate": 4.902691321336862e-06, + "loss": 0.4329, + "step": 8870 + }, + { + "epoch": 0.540206436683616, + "grad_norm": 1.0232259925380958, + "learning_rate": 4.902669273945611e-06, + "loss": 0.4127, + "step": 8871 + }, + { + "epoch": 0.5402673324604939, + "grad_norm": 1.0589645610055267, + "learning_rate": 4.902647224106571e-06, + "loss": 0.4196, + "step": 8872 + }, + { + "epoch": 0.5403282282373717, + "grad_norm": 1.000839325247304, + "learning_rate": 4.902625171819764e-06, + "loss": 0.4775, + "step": 8873 + }, + { + "epoch": 0.5403891240142497, + "grad_norm": 0.9861767899914893, + "learning_rate": 4.902603117085212e-06, + "loss": 0.4219, + "step": 8874 + }, + { + "epoch": 0.5404500197911275, + "grad_norm": 1.004271742363558, + "learning_rate": 4.902581059902937e-06, + "loss": 0.409, + "step": 8875 + }, + { + "epoch": 0.5405109155680053, + "grad_norm": 1.0201300534311897, + "learning_rate": 4.902559000272964e-06, + "loss": 0.4537, + "step": 8876 + }, + { + "epoch": 0.5405718113448832, + "grad_norm": 1.03991207759013, + "learning_rate": 4.902536938195314e-06, + "loss": 0.4532, + "step": 8877 + }, + { + "epoch": 0.5406327071217611, + "grad_norm": 1.0373048939428122, + "learning_rate": 4.902514873670008e-06, + "loss": 0.3876, + "step": 8878 + }, + { + "epoch": 0.540693602898639, + "grad_norm": 0.9461691829399309, + "learning_rate": 4.9024928066970704e-06, + "loss": 0.4831, + "step": 8879 + }, + { + "epoch": 0.5407544986755168, + "grad_norm": 0.9879195117996892, + "learning_rate": 4.902470737276523e-06, + "loss": 0.4583, + "step": 8880 + }, + { + "epoch": 0.5408153944523947, + "grad_norm": 0.9584364151564985, + "learning_rate": 4.902448665408389e-06, + "loss": 0.5165, + "step": 8881 + }, + { + "epoch": 0.5408762902292726, + "grad_norm": 1.0660366294467463, + "learning_rate": 4.902426591092689e-06, + "loss": 0.4173, + "step": 8882 + }, + { + "epoch": 0.5409371860061505, + "grad_norm": 1.0487804032230656, + "learning_rate": 4.9024045143294475e-06, + "loss": 0.4333, + "step": 8883 + }, + { + "epoch": 0.5409980817830283, + "grad_norm": 0.8945133941845145, + "learning_rate": 4.902382435118687e-06, + "loss": 0.4115, + "step": 8884 + }, + { + "epoch": 0.5410589775599062, + "grad_norm": 1.0199408564256711, + "learning_rate": 4.902360353460428e-06, + "loss": 0.4579, + "step": 8885 + }, + { + "epoch": 0.5411198733367841, + "grad_norm": 0.9270151034042129, + "learning_rate": 4.902338269354694e-06, + "loss": 0.4737, + "step": 8886 + }, + { + "epoch": 0.541180769113662, + "grad_norm": 1.0186201846749932, + "learning_rate": 4.902316182801508e-06, + "loss": 0.3954, + "step": 8887 + }, + { + "epoch": 0.5412416648905398, + "grad_norm": 0.9923973448887533, + "learning_rate": 4.9022940938008935e-06, + "loss": 0.4376, + "step": 8888 + }, + { + "epoch": 0.5413025606674177, + "grad_norm": 1.0087482090687485, + "learning_rate": 4.90227200235287e-06, + "loss": 0.4249, + "step": 8889 + }, + { + "epoch": 0.5413634564442956, + "grad_norm": 1.0020860144059982, + "learning_rate": 4.902249908457463e-06, + "loss": 0.4758, + "step": 8890 + }, + { + "epoch": 0.5414243522211735, + "grad_norm": 1.1032359925162003, + "learning_rate": 4.9022278121146924e-06, + "loss": 0.3657, + "step": 8891 + }, + { + "epoch": 0.5414852479980513, + "grad_norm": 1.223045184540932, + "learning_rate": 4.902205713324584e-06, + "loss": 0.3625, + "step": 8892 + }, + { + "epoch": 0.5415461437749292, + "grad_norm": 0.9931090614886199, + "learning_rate": 4.902183612087157e-06, + "loss": 0.4498, + "step": 8893 + }, + { + "epoch": 0.5416070395518071, + "grad_norm": 1.0551892826971234, + "learning_rate": 4.9021615084024355e-06, + "loss": 0.4462, + "step": 8894 + }, + { + "epoch": 0.541667935328685, + "grad_norm": 1.00153207834239, + "learning_rate": 4.902139402270442e-06, + "loss": 0.4483, + "step": 8895 + }, + { + "epoch": 0.5417288311055628, + "grad_norm": 0.974574119721614, + "learning_rate": 4.902117293691198e-06, + "loss": 0.489, + "step": 8896 + }, + { + "epoch": 0.5417897268824406, + "grad_norm": 1.0134628352495076, + "learning_rate": 4.9020951826647275e-06, + "loss": 0.416, + "step": 8897 + }, + { + "epoch": 0.5418506226593186, + "grad_norm": 0.9703892933961767, + "learning_rate": 4.902073069191052e-06, + "loss": 0.4201, + "step": 8898 + }, + { + "epoch": 0.5419115184361964, + "grad_norm": 1.0315246484356395, + "learning_rate": 4.902050953270195e-06, + "loss": 0.4275, + "step": 8899 + }, + { + "epoch": 0.5419724142130743, + "grad_norm": 0.933090801152509, + "learning_rate": 4.902028834902178e-06, + "loss": 0.5033, + "step": 8900 + }, + { + "epoch": 0.5420333099899522, + "grad_norm": 0.9815196691669624, + "learning_rate": 4.902006714087024e-06, + "loss": 0.4741, + "step": 8901 + }, + { + "epoch": 0.5420942057668301, + "grad_norm": 0.9835866223602149, + "learning_rate": 4.901984590824756e-06, + "loss": 0.3935, + "step": 8902 + }, + { + "epoch": 0.5421551015437079, + "grad_norm": 1.0143513292452317, + "learning_rate": 4.901962465115395e-06, + "loss": 0.4801, + "step": 8903 + }, + { + "epoch": 0.5422159973205858, + "grad_norm": 1.0880112836982585, + "learning_rate": 4.901940336958966e-06, + "loss": 0.4202, + "step": 8904 + }, + { + "epoch": 0.5422768930974637, + "grad_norm": 1.0032374371832526, + "learning_rate": 4.901918206355489e-06, + "loss": 0.4711, + "step": 8905 + }, + { + "epoch": 0.5423377888743416, + "grad_norm": 0.9662535298987077, + "learning_rate": 4.901896073304988e-06, + "loss": 0.4311, + "step": 8906 + }, + { + "epoch": 0.5423986846512194, + "grad_norm": 0.9866414038154725, + "learning_rate": 4.901873937807485e-06, + "loss": 0.4335, + "step": 8907 + }, + { + "epoch": 0.5424595804280973, + "grad_norm": 0.9519312169526684, + "learning_rate": 4.9018517998630035e-06, + "loss": 0.4947, + "step": 8908 + }, + { + "epoch": 0.5425204762049752, + "grad_norm": 1.0582036325647166, + "learning_rate": 4.901829659471565e-06, + "loss": 0.5192, + "step": 8909 + }, + { + "epoch": 0.5425813719818531, + "grad_norm": 1.0353608485774621, + "learning_rate": 4.901807516633192e-06, + "loss": 0.4598, + "step": 8910 + }, + { + "epoch": 0.5426422677587309, + "grad_norm": 1.1302940914216706, + "learning_rate": 4.9017853713479076e-06, + "loss": 0.4071, + "step": 8911 + }, + { + "epoch": 0.5427031635356088, + "grad_norm": 0.9520792995531941, + "learning_rate": 4.901763223615734e-06, + "loss": 0.5464, + "step": 8912 + }, + { + "epoch": 0.5427640593124867, + "grad_norm": 1.1003806436729262, + "learning_rate": 4.901741073436694e-06, + "loss": 0.3981, + "step": 8913 + }, + { + "epoch": 0.5428249550893646, + "grad_norm": 0.9674469046018422, + "learning_rate": 4.9017189208108105e-06, + "loss": 0.3694, + "step": 8914 + }, + { + "epoch": 0.5428858508662424, + "grad_norm": 1.0130848829064534, + "learning_rate": 4.901696765738105e-06, + "loss": 0.4869, + "step": 8915 + }, + { + "epoch": 0.5429467466431203, + "grad_norm": 1.0129330487926758, + "learning_rate": 4.901674608218602e-06, + "loss": 0.4297, + "step": 8916 + }, + { + "epoch": 0.5430076424199982, + "grad_norm": 0.9545024730750731, + "learning_rate": 4.901652448252322e-06, + "loss": 0.4736, + "step": 8917 + }, + { + "epoch": 0.5430685381968761, + "grad_norm": 1.0419807280359532, + "learning_rate": 4.901630285839288e-06, + "loss": 0.3747, + "step": 8918 + }, + { + "epoch": 0.5431294339737539, + "grad_norm": 1.0130568115230265, + "learning_rate": 4.901608120979524e-06, + "loss": 0.4449, + "step": 8919 + }, + { + "epoch": 0.5431903297506318, + "grad_norm": 1.0496652867983303, + "learning_rate": 4.9015859536730515e-06, + "loss": 0.4215, + "step": 8920 + }, + { + "epoch": 0.5432512255275097, + "grad_norm": 1.0987429990855002, + "learning_rate": 4.901563783919892e-06, + "loss": 0.4314, + "step": 8921 + }, + { + "epoch": 0.5433121213043876, + "grad_norm": 1.0700775860605538, + "learning_rate": 4.901541611720071e-06, + "loss": 0.4167, + "step": 8922 + }, + { + "epoch": 0.5433730170812654, + "grad_norm": 1.1207390656198697, + "learning_rate": 4.901519437073608e-06, + "loss": 0.3534, + "step": 8923 + }, + { + "epoch": 0.5434339128581432, + "grad_norm": 1.0801499741414353, + "learning_rate": 4.901497259980528e-06, + "loss": 0.4931, + "step": 8924 + }, + { + "epoch": 0.5434948086350212, + "grad_norm": 0.9655098758474467, + "learning_rate": 4.901475080440851e-06, + "loss": 0.4839, + "step": 8925 + }, + { + "epoch": 0.543555704411899, + "grad_norm": 0.9674769171518295, + "learning_rate": 4.901452898454602e-06, + "loss": 0.5099, + "step": 8926 + }, + { + "epoch": 0.5436166001887769, + "grad_norm": 0.9871705993449964, + "learning_rate": 4.901430714021803e-06, + "loss": 0.4276, + "step": 8927 + }, + { + "epoch": 0.5436774959656547, + "grad_norm": 1.0395913959397551, + "learning_rate": 4.901408527142476e-06, + "loss": 0.4233, + "step": 8928 + }, + { + "epoch": 0.5437383917425327, + "grad_norm": 0.9989833225393461, + "learning_rate": 4.901386337816644e-06, + "loss": 0.4151, + "step": 8929 + }, + { + "epoch": 0.5437992875194105, + "grad_norm": 1.0798515366340824, + "learning_rate": 4.901364146044329e-06, + "loss": 0.4985, + "step": 8930 + }, + { + "epoch": 0.5438601832962884, + "grad_norm": 1.0220621480356735, + "learning_rate": 4.901341951825554e-06, + "loss": 0.391, + "step": 8931 + }, + { + "epoch": 0.5439210790731662, + "grad_norm": 0.9769031592429293, + "learning_rate": 4.901319755160343e-06, + "loss": 0.4927, + "step": 8932 + }, + { + "epoch": 0.5439819748500442, + "grad_norm": 0.9215514897142753, + "learning_rate": 4.901297556048716e-06, + "loss": 0.4532, + "step": 8933 + }, + { + "epoch": 0.544042870626922, + "grad_norm": 1.0126451967653152, + "learning_rate": 4.901275354490698e-06, + "loss": 0.402, + "step": 8934 + }, + { + "epoch": 0.5441037664037999, + "grad_norm": 1.2072645014427823, + "learning_rate": 4.90125315048631e-06, + "loss": 0.4212, + "step": 8935 + }, + { + "epoch": 0.5441646621806777, + "grad_norm": 0.9289309152891018, + "learning_rate": 4.901230944035576e-06, + "loss": 0.4807, + "step": 8936 + }, + { + "epoch": 0.5442255579575557, + "grad_norm": 1.0477291976383798, + "learning_rate": 4.901208735138518e-06, + "loss": 0.3785, + "step": 8937 + }, + { + "epoch": 0.5442864537344335, + "grad_norm": 0.9460827972316779, + "learning_rate": 4.9011865237951565e-06, + "loss": 0.446, + "step": 8938 + }, + { + "epoch": 0.5443473495113114, + "grad_norm": 1.0010018614789926, + "learning_rate": 4.901164310005518e-06, + "loss": 0.4002, + "step": 8939 + }, + { + "epoch": 0.5444082452881893, + "grad_norm": 1.0854554939288723, + "learning_rate": 4.901142093769622e-06, + "loss": 0.4148, + "step": 8940 + }, + { + "epoch": 0.5444691410650672, + "grad_norm": 1.0125329292099854, + "learning_rate": 4.901119875087493e-06, + "loss": 0.5017, + "step": 8941 + }, + { + "epoch": 0.544530036841945, + "grad_norm": 0.9907717178176005, + "learning_rate": 4.901097653959152e-06, + "loss": 0.4239, + "step": 8942 + }, + { + "epoch": 0.5445909326188229, + "grad_norm": 1.0550177388907076, + "learning_rate": 4.9010754303846245e-06, + "loss": 0.4536, + "step": 8943 + }, + { + "epoch": 0.5446518283957008, + "grad_norm": 1.0989897776438597, + "learning_rate": 4.90105320436393e-06, + "loss": 0.4475, + "step": 8944 + }, + { + "epoch": 0.5447127241725787, + "grad_norm": 1.031343423731824, + "learning_rate": 4.901030975897093e-06, + "loss": 0.3719, + "step": 8945 + }, + { + "epoch": 0.5447736199494565, + "grad_norm": 1.0259965881699198, + "learning_rate": 4.901008744984135e-06, + "loss": 0.4273, + "step": 8946 + }, + { + "epoch": 0.5448345157263343, + "grad_norm": 1.0158436113804388, + "learning_rate": 4.90098651162508e-06, + "loss": 0.4706, + "step": 8947 + }, + { + "epoch": 0.5448954115032123, + "grad_norm": 0.9748302548073634, + "learning_rate": 4.9009642758199485e-06, + "loss": 0.4128, + "step": 8948 + }, + { + "epoch": 0.5449563072800901, + "grad_norm": 1.046436780999672, + "learning_rate": 4.9009420375687656e-06, + "loss": 0.4323, + "step": 8949 + }, + { + "epoch": 0.545017203056968, + "grad_norm": 1.05348852353459, + "learning_rate": 4.900919796871553e-06, + "loss": 0.4845, + "step": 8950 + }, + { + "epoch": 0.5450780988338458, + "grad_norm": 1.0373818515434676, + "learning_rate": 4.900897553728333e-06, + "loss": 0.4859, + "step": 8951 + }, + { + "epoch": 0.5451389946107238, + "grad_norm": 1.0507610609646176, + "learning_rate": 4.900875308139128e-06, + "loss": 0.3823, + "step": 8952 + }, + { + "epoch": 0.5451998903876016, + "grad_norm": 1.0074919735075514, + "learning_rate": 4.900853060103962e-06, + "loss": 0.5041, + "step": 8953 + }, + { + "epoch": 0.5452607861644795, + "grad_norm": 1.0489952912199811, + "learning_rate": 4.9008308096228555e-06, + "loss": 0.4137, + "step": 8954 + }, + { + "epoch": 0.5453216819413573, + "grad_norm": 1.0402824999097957, + "learning_rate": 4.900808556695833e-06, + "loss": 0.3919, + "step": 8955 + }, + { + "epoch": 0.5453825777182353, + "grad_norm": 1.0177215247221825, + "learning_rate": 4.900786301322918e-06, + "loss": 0.3996, + "step": 8956 + }, + { + "epoch": 0.5454434734951131, + "grad_norm": 1.0780882326022836, + "learning_rate": 4.90076404350413e-06, + "loss": 0.4074, + "step": 8957 + }, + { + "epoch": 0.545504369271991, + "grad_norm": 1.0180188191686608, + "learning_rate": 4.900741783239494e-06, + "loss": 0.4072, + "step": 8958 + }, + { + "epoch": 0.5455652650488688, + "grad_norm": 1.000036144862882, + "learning_rate": 4.900719520529032e-06, + "loss": 0.471, + "step": 8959 + }, + { + "epoch": 0.5456261608257468, + "grad_norm": 1.0500171970538323, + "learning_rate": 4.9006972553727684e-06, + "loss": 0.407, + "step": 8960 + }, + { + "epoch": 0.5456870566026246, + "grad_norm": 1.0683432339332328, + "learning_rate": 4.900674987770723e-06, + "loss": 0.434, + "step": 8961 + }, + { + "epoch": 0.5457479523795025, + "grad_norm": 0.9957522796663678, + "learning_rate": 4.9006527177229204e-06, + "loss": 0.4344, + "step": 8962 + }, + { + "epoch": 0.5458088481563803, + "grad_norm": 0.9512621127794519, + "learning_rate": 4.900630445229382e-06, + "loss": 0.4539, + "step": 8963 + }, + { + "epoch": 0.5458697439332583, + "grad_norm": 1.0432996265654644, + "learning_rate": 4.900608170290132e-06, + "loss": 0.4024, + "step": 8964 + }, + { + "epoch": 0.5459306397101361, + "grad_norm": 0.9971106076725595, + "learning_rate": 4.900585892905192e-06, + "loss": 0.427, + "step": 8965 + }, + { + "epoch": 0.545991535487014, + "grad_norm": 0.9706567485259235, + "learning_rate": 4.900563613074585e-06, + "loss": 0.4271, + "step": 8966 + }, + { + "epoch": 0.5460524312638918, + "grad_norm": 1.0456911665990392, + "learning_rate": 4.900541330798333e-06, + "loss": 0.4634, + "step": 8967 + }, + { + "epoch": 0.5461133270407698, + "grad_norm": 0.9689891479810051, + "learning_rate": 4.900519046076461e-06, + "loss": 0.5268, + "step": 8968 + }, + { + "epoch": 0.5461742228176476, + "grad_norm": 0.9650428813368853, + "learning_rate": 4.9004967589089886e-06, + "loss": 0.4521, + "step": 8969 + }, + { + "epoch": 0.5462351185945254, + "grad_norm": 0.9758876073231528, + "learning_rate": 4.90047446929594e-06, + "loss": 0.4453, + "step": 8970 + }, + { + "epoch": 0.5462960143714033, + "grad_norm": 1.0439859981459325, + "learning_rate": 4.900452177237339e-06, + "loss": 0.4329, + "step": 8971 + }, + { + "epoch": 0.5463569101482812, + "grad_norm": 1.1382351317207993, + "learning_rate": 4.9004298827332064e-06, + "loss": 0.4283, + "step": 8972 + }, + { + "epoch": 0.5464178059251591, + "grad_norm": 0.993985752905156, + "learning_rate": 4.900407585783566e-06, + "loss": 0.4091, + "step": 8973 + }, + { + "epoch": 0.5464787017020369, + "grad_norm": 0.9950931102396884, + "learning_rate": 4.900385286388441e-06, + "loss": 0.428, + "step": 8974 + }, + { + "epoch": 0.5465395974789148, + "grad_norm": 1.009999190152401, + "learning_rate": 4.9003629845478525e-06, + "loss": 0.405, + "step": 8975 + }, + { + "epoch": 0.5466004932557927, + "grad_norm": 1.0108485085229586, + "learning_rate": 4.900340680261824e-06, + "loss": 0.4212, + "step": 8976 + }, + { + "epoch": 0.5466613890326706, + "grad_norm": 0.9800219577848399, + "learning_rate": 4.900318373530379e-06, + "loss": 0.4273, + "step": 8977 + }, + { + "epoch": 0.5467222848095484, + "grad_norm": 1.0249096879639978, + "learning_rate": 4.900296064353539e-06, + "loss": 0.433, + "step": 8978 + }, + { + "epoch": 0.5467831805864263, + "grad_norm": 1.0848659277891461, + "learning_rate": 4.900273752731327e-06, + "loss": 0.4603, + "step": 8979 + }, + { + "epoch": 0.5468440763633042, + "grad_norm": 1.0700905438406876, + "learning_rate": 4.900251438663767e-06, + "loss": 0.3497, + "step": 8980 + }, + { + "epoch": 0.5469049721401821, + "grad_norm": 1.0320707832694578, + "learning_rate": 4.90022912215088e-06, + "loss": 0.436, + "step": 8981 + }, + { + "epoch": 0.5469658679170599, + "grad_norm": 1.096144588450918, + "learning_rate": 4.900206803192689e-06, + "loss": 0.4445, + "step": 8982 + }, + { + "epoch": 0.5470267636939379, + "grad_norm": 1.0097227256611396, + "learning_rate": 4.900184481789219e-06, + "loss": 0.467, + "step": 8983 + }, + { + "epoch": 0.5470876594708157, + "grad_norm": 1.0356760729985095, + "learning_rate": 4.900162157940489e-06, + "loss": 0.4172, + "step": 8984 + }, + { + "epoch": 0.5471485552476936, + "grad_norm": 1.0396456112281323, + "learning_rate": 4.900139831646525e-06, + "loss": 0.4203, + "step": 8985 + }, + { + "epoch": 0.5472094510245714, + "grad_norm": 1.0453438896667957, + "learning_rate": 4.900117502907348e-06, + "loss": 0.4634, + "step": 8986 + }, + { + "epoch": 0.5472703468014494, + "grad_norm": 0.9499232145696499, + "learning_rate": 4.90009517172298e-06, + "loss": 0.4947, + "step": 8987 + }, + { + "epoch": 0.5473312425783272, + "grad_norm": 1.0110734136733643, + "learning_rate": 4.900072838093447e-06, + "loss": 0.437, + "step": 8988 + }, + { + "epoch": 0.5473921383552051, + "grad_norm": 0.9120110357484021, + "learning_rate": 4.900050502018769e-06, + "loss": 0.4573, + "step": 8989 + }, + { + "epoch": 0.5474530341320829, + "grad_norm": 0.9723480077582616, + "learning_rate": 4.900028163498969e-06, + "loss": 0.4547, + "step": 8990 + }, + { + "epoch": 0.5475139299089609, + "grad_norm": 0.9885974602250519, + "learning_rate": 4.90000582253407e-06, + "loss": 0.3958, + "step": 8991 + }, + { + "epoch": 0.5475748256858387, + "grad_norm": 0.9668665319718945, + "learning_rate": 4.899983479124095e-06, + "loss": 0.5045, + "step": 8992 + }, + { + "epoch": 0.5476357214627166, + "grad_norm": 0.9788642398979872, + "learning_rate": 4.899961133269068e-06, + "loss": 0.4257, + "step": 8993 + }, + { + "epoch": 0.5476966172395944, + "grad_norm": 1.155974549260999, + "learning_rate": 4.8999387849690095e-06, + "loss": 0.4009, + "step": 8994 + }, + { + "epoch": 0.5477575130164724, + "grad_norm": 1.0189496762466024, + "learning_rate": 4.899916434223943e-06, + "loss": 0.4013, + "step": 8995 + }, + { + "epoch": 0.5478184087933502, + "grad_norm": 1.0134530804758481, + "learning_rate": 4.899894081033892e-06, + "loss": 0.4532, + "step": 8996 + }, + { + "epoch": 0.547879304570228, + "grad_norm": 1.0218258676344636, + "learning_rate": 4.899871725398879e-06, + "loss": 0.4367, + "step": 8997 + }, + { + "epoch": 0.5479402003471059, + "grad_norm": 0.9575944766331325, + "learning_rate": 4.899849367318927e-06, + "loss": 0.5155, + "step": 8998 + }, + { + "epoch": 0.5480010961239838, + "grad_norm": 1.0418626626016998, + "learning_rate": 4.899827006794057e-06, + "loss": 0.4093, + "step": 8999 + }, + { + "epoch": 0.5480619919008617, + "grad_norm": 1.0353685940215032, + "learning_rate": 4.899804643824293e-06, + "loss": 0.4201, + "step": 9000 + }, + { + "epoch": 0.5481228876777395, + "grad_norm": 0.99502125088026, + "learning_rate": 4.899782278409659e-06, + "loss": 0.5091, + "step": 9001 + }, + { + "epoch": 0.5481837834546174, + "grad_norm": 1.0679402486755503, + "learning_rate": 4.899759910550176e-06, + "loss": 0.4507, + "step": 9002 + }, + { + "epoch": 0.5482446792314953, + "grad_norm": 1.0670375855880365, + "learning_rate": 4.899737540245868e-06, + "loss": 0.3838, + "step": 9003 + }, + { + "epoch": 0.5483055750083732, + "grad_norm": 0.9447484355153937, + "learning_rate": 4.899715167496757e-06, + "loss": 0.4516, + "step": 9004 + }, + { + "epoch": 0.548366470785251, + "grad_norm": 1.0730951363504209, + "learning_rate": 4.899692792302867e-06, + "loss": 0.4607, + "step": 9005 + }, + { + "epoch": 0.5484273665621289, + "grad_norm": 1.0345965314598469, + "learning_rate": 4.899670414664219e-06, + "loss": 0.4783, + "step": 9006 + }, + { + "epoch": 0.5484882623390068, + "grad_norm": 0.9827269555835229, + "learning_rate": 4.899648034580837e-06, + "loss": 0.4739, + "step": 9007 + }, + { + "epoch": 0.5485491581158847, + "grad_norm": 0.9538864876473504, + "learning_rate": 4.899625652052743e-06, + "loss": 0.3935, + "step": 9008 + }, + { + "epoch": 0.5486100538927625, + "grad_norm": 1.050128833131509, + "learning_rate": 4.8996032670799605e-06, + "loss": 0.4407, + "step": 9009 + }, + { + "epoch": 0.5486709496696404, + "grad_norm": 1.0131784400231922, + "learning_rate": 4.899580879662512e-06, + "loss": 0.397, + "step": 9010 + }, + { + "epoch": 0.5487318454465183, + "grad_norm": 0.9909706222306999, + "learning_rate": 4.899558489800421e-06, + "loss": 0.5054, + "step": 9011 + }, + { + "epoch": 0.5487927412233962, + "grad_norm": 0.9140809968330661, + "learning_rate": 4.899536097493709e-06, + "loss": 0.4286, + "step": 9012 + }, + { + "epoch": 0.548853637000274, + "grad_norm": 1.084130762904841, + "learning_rate": 4.899513702742399e-06, + "loss": 0.4919, + "step": 9013 + }, + { + "epoch": 0.5489145327771519, + "grad_norm": 0.9326545667771342, + "learning_rate": 4.899491305546515e-06, + "loss": 0.5249, + "step": 9014 + }, + { + "epoch": 0.5489754285540298, + "grad_norm": 1.0389566471687737, + "learning_rate": 4.899468905906079e-06, + "loss": 0.4165, + "step": 9015 + }, + { + "epoch": 0.5490363243309077, + "grad_norm": 1.1014505810158368, + "learning_rate": 4.8994465038211144e-06, + "loss": 0.3985, + "step": 9016 + }, + { + "epoch": 0.5490972201077855, + "grad_norm": 1.0119326984902728, + "learning_rate": 4.899424099291644e-06, + "loss": 0.4933, + "step": 9017 + }, + { + "epoch": 0.5491581158846633, + "grad_norm": 0.9692475355650202, + "learning_rate": 4.89940169231769e-06, + "loss": 0.4761, + "step": 9018 + }, + { + "epoch": 0.5492190116615413, + "grad_norm": 1.109415162497102, + "learning_rate": 4.899379282899275e-06, + "loss": 0.4382, + "step": 9019 + }, + { + "epoch": 0.5492799074384191, + "grad_norm": 1.0117053642867677, + "learning_rate": 4.8993568710364216e-06, + "loss": 0.4123, + "step": 9020 + }, + { + "epoch": 0.549340803215297, + "grad_norm": 0.9917956541749631, + "learning_rate": 4.899334456729154e-06, + "loss": 0.4891, + "step": 9021 + }, + { + "epoch": 0.549401698992175, + "grad_norm": 0.9759650254516902, + "learning_rate": 4.899312039977495e-06, + "loss": 0.5072, + "step": 9022 + }, + { + "epoch": 0.5494625947690528, + "grad_norm": 1.0504498619144949, + "learning_rate": 4.899289620781466e-06, + "loss": 0.4204, + "step": 9023 + }, + { + "epoch": 0.5495234905459306, + "grad_norm": 0.8918778348505546, + "learning_rate": 4.89926719914109e-06, + "loss": 0.474, + "step": 9024 + }, + { + "epoch": 0.5495843863228085, + "grad_norm": 0.99785371567302, + "learning_rate": 4.899244775056391e-06, + "loss": 0.3917, + "step": 9025 + }, + { + "epoch": 0.5496452820996864, + "grad_norm": 0.9743917064889352, + "learning_rate": 4.899222348527391e-06, + "loss": 0.455, + "step": 9026 + }, + { + "epoch": 0.5497061778765643, + "grad_norm": 1.0181743202525608, + "learning_rate": 4.899199919554114e-06, + "loss": 0.4944, + "step": 9027 + }, + { + "epoch": 0.5497670736534421, + "grad_norm": 1.0141522760264423, + "learning_rate": 4.8991774881365825e-06, + "loss": 0.4915, + "step": 9028 + }, + { + "epoch": 0.54982796943032, + "grad_norm": 1.0357996724552931, + "learning_rate": 4.8991550542748176e-06, + "loss": 0.3881, + "step": 9029 + }, + { + "epoch": 0.5498888652071979, + "grad_norm": 0.994273870956782, + "learning_rate": 4.899132617968843e-06, + "loss": 0.3866, + "step": 9030 + }, + { + "epoch": 0.5499497609840758, + "grad_norm": 1.0673178087347641, + "learning_rate": 4.899110179218684e-06, + "loss": 0.3773, + "step": 9031 + }, + { + "epoch": 0.5500106567609536, + "grad_norm": 0.9425243397855395, + "learning_rate": 4.899087738024359e-06, + "loss": 0.4852, + "step": 9032 + }, + { + "epoch": 0.5500715525378315, + "grad_norm": 0.9477242713652129, + "learning_rate": 4.899065294385895e-06, + "loss": 0.5299, + "step": 9033 + }, + { + "epoch": 0.5501324483147094, + "grad_norm": 1.112768941167182, + "learning_rate": 4.899042848303313e-06, + "loss": 0.4059, + "step": 9034 + }, + { + "epoch": 0.5501933440915873, + "grad_norm": 1.0870374326359908, + "learning_rate": 4.899020399776635e-06, + "loss": 0.4547, + "step": 9035 + }, + { + "epoch": 0.5502542398684651, + "grad_norm": 0.9567941476240601, + "learning_rate": 4.8989979488058856e-06, + "loss": 0.5155, + "step": 9036 + }, + { + "epoch": 0.550315135645343, + "grad_norm": 1.0357715101363119, + "learning_rate": 4.8989754953910876e-06, + "loss": 0.3671, + "step": 9037 + }, + { + "epoch": 0.5503760314222209, + "grad_norm": 1.033819141314235, + "learning_rate": 4.898953039532262e-06, + "loss": 0.4203, + "step": 9038 + }, + { + "epoch": 0.5504369271990988, + "grad_norm": 1.0433679263236952, + "learning_rate": 4.898930581229434e-06, + "loss": 0.4066, + "step": 9039 + }, + { + "epoch": 0.5504978229759766, + "grad_norm": 1.1639050921105845, + "learning_rate": 4.898908120482625e-06, + "loss": 0.3736, + "step": 9040 + }, + { + "epoch": 0.5505587187528544, + "grad_norm": 1.0442212759434482, + "learning_rate": 4.898885657291858e-06, + "loss": 0.3919, + "step": 9041 + }, + { + "epoch": 0.5506196145297324, + "grad_norm": 0.969614975381669, + "learning_rate": 4.898863191657156e-06, + "loss": 0.4588, + "step": 9042 + }, + { + "epoch": 0.5506805103066102, + "grad_norm": 1.0593695005003196, + "learning_rate": 4.898840723578543e-06, + "loss": 0.4518, + "step": 9043 + }, + { + "epoch": 0.5507414060834881, + "grad_norm": 1.0449949596737582, + "learning_rate": 4.89881825305604e-06, + "loss": 0.3953, + "step": 9044 + }, + { + "epoch": 0.5508023018603659, + "grad_norm": 1.0006828722896524, + "learning_rate": 4.898795780089671e-06, + "loss": 0.4203, + "step": 9045 + }, + { + "epoch": 0.5508631976372439, + "grad_norm": 1.0066323717361543, + "learning_rate": 4.8987733046794595e-06, + "loss": 0.4351, + "step": 9046 + }, + { + "epoch": 0.5509240934141217, + "grad_norm": 1.0021024498154272, + "learning_rate": 4.8987508268254265e-06, + "loss": 0.4665, + "step": 9047 + }, + { + "epoch": 0.5509849891909996, + "grad_norm": 1.1546715888156356, + "learning_rate": 4.898728346527597e-06, + "loss": 0.4336, + "step": 9048 + }, + { + "epoch": 0.5510458849678774, + "grad_norm": 1.0143066779639083, + "learning_rate": 4.8987058637859915e-06, + "loss": 0.5775, + "step": 9049 + }, + { + "epoch": 0.5511067807447554, + "grad_norm": 0.9682533968305279, + "learning_rate": 4.898683378600636e-06, + "loss": 0.4818, + "step": 9050 + }, + { + "epoch": 0.5511676765216332, + "grad_norm": 0.9631780310887736, + "learning_rate": 4.8986608909715515e-06, + "loss": 0.4378, + "step": 9051 + }, + { + "epoch": 0.5512285722985111, + "grad_norm": 1.0405426247982914, + "learning_rate": 4.89863840089876e-06, + "loss": 0.4296, + "step": 9052 + }, + { + "epoch": 0.5512894680753889, + "grad_norm": 1.0898582868241078, + "learning_rate": 4.898615908382287e-06, + "loss": 0.3864, + "step": 9053 + }, + { + "epoch": 0.5513503638522669, + "grad_norm": 0.9608962304237125, + "learning_rate": 4.8985934134221525e-06, + "loss": 0.4717, + "step": 9054 + }, + { + "epoch": 0.5514112596291447, + "grad_norm": 0.9992939956511765, + "learning_rate": 4.898570916018382e-06, + "loss": 0.4217, + "step": 9055 + }, + { + "epoch": 0.5514721554060226, + "grad_norm": 1.0317359487815474, + "learning_rate": 4.898548416170997e-06, + "loss": 0.4638, + "step": 9056 + }, + { + "epoch": 0.5515330511829004, + "grad_norm": 1.087033222803651, + "learning_rate": 4.898525913880021e-06, + "loss": 0.4612, + "step": 9057 + }, + { + "epoch": 0.5515939469597784, + "grad_norm": 0.969149442863387, + "learning_rate": 4.898503409145477e-06, + "loss": 0.4629, + "step": 9058 + }, + { + "epoch": 0.5516548427366562, + "grad_norm": 0.9542682458931984, + "learning_rate": 4.898480901967386e-06, + "loss": 0.4477, + "step": 9059 + }, + { + "epoch": 0.5517157385135341, + "grad_norm": 1.003130536873456, + "learning_rate": 4.898458392345774e-06, + "loss": 0.4259, + "step": 9060 + }, + { + "epoch": 0.5517766342904119, + "grad_norm": 1.1243036034558258, + "learning_rate": 4.898435880280662e-06, + "loss": 0.3631, + "step": 9061 + }, + { + "epoch": 0.5518375300672899, + "grad_norm": 1.0617791843582305, + "learning_rate": 4.898413365772073e-06, + "loss": 0.4481, + "step": 9062 + }, + { + "epoch": 0.5518984258441677, + "grad_norm": 0.9717063126675078, + "learning_rate": 4.898390848820031e-06, + "loss": 0.4372, + "step": 9063 + }, + { + "epoch": 0.5519593216210456, + "grad_norm": 1.048003001054004, + "learning_rate": 4.898368329424558e-06, + "loss": 0.4214, + "step": 9064 + }, + { + "epoch": 0.5520202173979235, + "grad_norm": 1.0342977561149602, + "learning_rate": 4.898345807585677e-06, + "loss": 0.4785, + "step": 9065 + }, + { + "epoch": 0.5520811131748014, + "grad_norm": 1.0393049108171206, + "learning_rate": 4.8983232833034115e-06, + "loss": 0.4397, + "step": 9066 + }, + { + "epoch": 0.5521420089516792, + "grad_norm": 1.0065747307682549, + "learning_rate": 4.898300756577783e-06, + "loss": 0.3931, + "step": 9067 + }, + { + "epoch": 0.552202904728557, + "grad_norm": 0.958701198438337, + "learning_rate": 4.898278227408817e-06, + "loss": 0.5001, + "step": 9068 + }, + { + "epoch": 0.552263800505435, + "grad_norm": 0.9228973271852196, + "learning_rate": 4.898255695796535e-06, + "loss": 0.5337, + "step": 9069 + }, + { + "epoch": 0.5523246962823128, + "grad_norm": 0.9792370560025941, + "learning_rate": 4.8982331617409585e-06, + "loss": 0.4672, + "step": 9070 + }, + { + "epoch": 0.5523855920591907, + "grad_norm": 1.0655396788771094, + "learning_rate": 4.898210625242113e-06, + "loss": 0.4639, + "step": 9071 + }, + { + "epoch": 0.5524464878360685, + "grad_norm": 0.9107581671678769, + "learning_rate": 4.89818808630002e-06, + "loss": 0.5012, + "step": 9072 + }, + { + "epoch": 0.5525073836129465, + "grad_norm": 1.0235627222660169, + "learning_rate": 4.898165544914704e-06, + "loss": 0.3979, + "step": 9073 + }, + { + "epoch": 0.5525682793898243, + "grad_norm": 1.0977185566509728, + "learning_rate": 4.898143001086185e-06, + "loss": 0.4257, + "step": 9074 + }, + { + "epoch": 0.5526291751667022, + "grad_norm": 0.9618695469496367, + "learning_rate": 4.898120454814489e-06, + "loss": 0.5063, + "step": 9075 + }, + { + "epoch": 0.55269007094358, + "grad_norm": 0.9907696571011866, + "learning_rate": 4.898097906099637e-06, + "loss": 0.4301, + "step": 9076 + }, + { + "epoch": 0.552750966720458, + "grad_norm": 1.0394681676757282, + "learning_rate": 4.898075354941654e-06, + "loss": 0.3928, + "step": 9077 + }, + { + "epoch": 0.5528118624973358, + "grad_norm": 0.9905757824761319, + "learning_rate": 4.8980528013405606e-06, + "loss": 0.4462, + "step": 9078 + }, + { + "epoch": 0.5528727582742137, + "grad_norm": 0.94204621102323, + "learning_rate": 4.89803024529638e-06, + "loss": 0.4491, + "step": 9079 + }, + { + "epoch": 0.5529336540510915, + "grad_norm": 1.0477722424529374, + "learning_rate": 4.898007686809137e-06, + "loss": 0.4354, + "step": 9080 + }, + { + "epoch": 0.5529945498279695, + "grad_norm": 0.9886693058678864, + "learning_rate": 4.897985125878855e-06, + "loss": 0.4553, + "step": 9081 + }, + { + "epoch": 0.5530554456048473, + "grad_norm": 1.024584156313258, + "learning_rate": 4.897962562505554e-06, + "loss": 0.4497, + "step": 9082 + }, + { + "epoch": 0.5531163413817252, + "grad_norm": 0.9744827104112511, + "learning_rate": 4.897939996689259e-06, + "loss": 0.4667, + "step": 9083 + }, + { + "epoch": 0.553177237158603, + "grad_norm": 1.028281168072449, + "learning_rate": 4.8979174284299915e-06, + "loss": 0.4408, + "step": 9084 + }, + { + "epoch": 0.553238132935481, + "grad_norm": 1.0060014013311611, + "learning_rate": 4.897894857727777e-06, + "loss": 0.4614, + "step": 9085 + }, + { + "epoch": 0.5532990287123588, + "grad_norm": 1.017050888086095, + "learning_rate": 4.897872284582637e-06, + "loss": 0.4487, + "step": 9086 + }, + { + "epoch": 0.5533599244892367, + "grad_norm": 1.0954089303950032, + "learning_rate": 4.897849708994594e-06, + "loss": 0.394, + "step": 9087 + }, + { + "epoch": 0.5534208202661145, + "grad_norm": 1.0356366446526937, + "learning_rate": 4.897827130963672e-06, + "loss": 0.4483, + "step": 9088 + }, + { + "epoch": 0.5534817160429925, + "grad_norm": 1.029998481638611, + "learning_rate": 4.897804550489893e-06, + "loss": 0.4225, + "step": 9089 + }, + { + "epoch": 0.5535426118198703, + "grad_norm": 0.9863508922844936, + "learning_rate": 4.897781967573281e-06, + "loss": 0.4302, + "step": 9090 + }, + { + "epoch": 0.5536035075967481, + "grad_norm": 1.0342699786132956, + "learning_rate": 4.897759382213858e-06, + "loss": 0.4511, + "step": 9091 + }, + { + "epoch": 0.553664403373626, + "grad_norm": 1.0192552128847645, + "learning_rate": 4.897736794411649e-06, + "loss": 0.395, + "step": 9092 + }, + { + "epoch": 0.553725299150504, + "grad_norm": 0.9518102678163921, + "learning_rate": 4.897714204166674e-06, + "loss": 0.4959, + "step": 9093 + }, + { + "epoch": 0.5537861949273818, + "grad_norm": 0.9778466771234922, + "learning_rate": 4.897691611478959e-06, + "loss": 0.4273, + "step": 9094 + }, + { + "epoch": 0.5538470907042596, + "grad_norm": 0.9788513878871554, + "learning_rate": 4.897669016348524e-06, + "loss": 0.3909, + "step": 9095 + }, + { + "epoch": 0.5539079864811375, + "grad_norm": 1.023694145627476, + "learning_rate": 4.897646418775395e-06, + "loss": 0.4499, + "step": 9096 + }, + { + "epoch": 0.5539688822580154, + "grad_norm": 0.9547651028499715, + "learning_rate": 4.897623818759594e-06, + "loss": 0.5295, + "step": 9097 + }, + { + "epoch": 0.5540297780348933, + "grad_norm": 1.037161628414527, + "learning_rate": 4.897601216301142e-06, + "loss": 0.5507, + "step": 9098 + }, + { + "epoch": 0.5540906738117711, + "grad_norm": 1.1587498540482322, + "learning_rate": 4.8975786114000655e-06, + "loss": 0.4126, + "step": 9099 + }, + { + "epoch": 0.554151569588649, + "grad_norm": 1.008987423295109, + "learning_rate": 4.897556004056385e-06, + "loss": 0.4504, + "step": 9100 + }, + { + "epoch": 0.5542124653655269, + "grad_norm": 1.0862140165965926, + "learning_rate": 4.897533394270124e-06, + "loss": 0.4327, + "step": 9101 + }, + { + "epoch": 0.5542733611424048, + "grad_norm": 1.0517522160168178, + "learning_rate": 4.897510782041306e-06, + "loss": 0.4499, + "step": 9102 + }, + { + "epoch": 0.5543342569192826, + "grad_norm": 1.056100051533361, + "learning_rate": 4.8974881673699536e-06, + "loss": 0.4179, + "step": 9103 + }, + { + "epoch": 0.5543951526961606, + "grad_norm": 0.9961931730113159, + "learning_rate": 4.897465550256091e-06, + "loss": 0.4345, + "step": 9104 + }, + { + "epoch": 0.5544560484730384, + "grad_norm": 1.0684009808666894, + "learning_rate": 4.897442930699739e-06, + "loss": 0.5535, + "step": 9105 + }, + { + "epoch": 0.5545169442499163, + "grad_norm": 0.9857924191982209, + "learning_rate": 4.8974203087009235e-06, + "loss": 0.4789, + "step": 9106 + }, + { + "epoch": 0.5545778400267941, + "grad_norm": 0.9806724658838202, + "learning_rate": 4.897397684259665e-06, + "loss": 0.4345, + "step": 9107 + }, + { + "epoch": 0.5546387358036721, + "grad_norm": 1.0793959838506535, + "learning_rate": 4.897375057375988e-06, + "loss": 0.3608, + "step": 9108 + }, + { + "epoch": 0.5546996315805499, + "grad_norm": 1.1671503832786316, + "learning_rate": 4.897352428049915e-06, + "loss": 0.5035, + "step": 9109 + }, + { + "epoch": 0.5547605273574278, + "grad_norm": 0.9818374637415751, + "learning_rate": 4.897329796281469e-06, + "loss": 0.4612, + "step": 9110 + }, + { + "epoch": 0.5548214231343056, + "grad_norm": 1.0327339744339257, + "learning_rate": 4.897307162070674e-06, + "loss": 0.428, + "step": 9111 + }, + { + "epoch": 0.5548823189111836, + "grad_norm": 0.9838852294622201, + "learning_rate": 4.897284525417552e-06, + "loss": 0.4161, + "step": 9112 + }, + { + "epoch": 0.5549432146880614, + "grad_norm": 1.075592768719986, + "learning_rate": 4.8972618863221255e-06, + "loss": 0.4077, + "step": 9113 + }, + { + "epoch": 0.5550041104649392, + "grad_norm": 1.0722522231859704, + "learning_rate": 4.897239244784419e-06, + "loss": 0.3867, + "step": 9114 + }, + { + "epoch": 0.5550650062418171, + "grad_norm": 1.0238413345004247, + "learning_rate": 4.8972166008044555e-06, + "loss": 0.4317, + "step": 9115 + }, + { + "epoch": 0.555125902018695, + "grad_norm": 1.06634334858935, + "learning_rate": 4.897193954382257e-06, + "loss": 0.4114, + "step": 9116 + }, + { + "epoch": 0.5551867977955729, + "grad_norm": 1.0120067851696828, + "learning_rate": 4.897171305517847e-06, + "loss": 0.441, + "step": 9117 + }, + { + "epoch": 0.5552476935724507, + "grad_norm": 0.9709422525993272, + "learning_rate": 4.897148654211249e-06, + "loss": 0.4362, + "step": 9118 + }, + { + "epoch": 0.5553085893493286, + "grad_norm": 1.0201405059699884, + "learning_rate": 4.8971260004624855e-06, + "loss": 0.4395, + "step": 9119 + }, + { + "epoch": 0.5553694851262065, + "grad_norm": 0.9312664279329116, + "learning_rate": 4.89710334427158e-06, + "loss": 0.4333, + "step": 9120 + }, + { + "epoch": 0.5554303809030844, + "grad_norm": 1.0042737022996053, + "learning_rate": 4.897080685638556e-06, + "loss": 0.4381, + "step": 9121 + }, + { + "epoch": 0.5554912766799622, + "grad_norm": 1.098877779017519, + "learning_rate": 4.897058024563436e-06, + "loss": 0.4456, + "step": 9122 + }, + { + "epoch": 0.5555521724568401, + "grad_norm": 0.9983138652656423, + "learning_rate": 4.897035361046243e-06, + "loss": 0.4775, + "step": 9123 + }, + { + "epoch": 0.555613068233718, + "grad_norm": 0.9592377529407315, + "learning_rate": 4.897012695086999e-06, + "loss": 0.4991, + "step": 9124 + }, + { + "epoch": 0.5556739640105959, + "grad_norm": 0.9976240364300897, + "learning_rate": 4.89699002668573e-06, + "loss": 0.4165, + "step": 9125 + }, + { + "epoch": 0.5557348597874737, + "grad_norm": 1.0553247381500803, + "learning_rate": 4.8969673558424566e-06, + "loss": 0.4202, + "step": 9126 + }, + { + "epoch": 0.5557957555643516, + "grad_norm": 1.1159863153430303, + "learning_rate": 4.896944682557202e-06, + "loss": 0.403, + "step": 9127 + }, + { + "epoch": 0.5558566513412295, + "grad_norm": 1.065866929763409, + "learning_rate": 4.896922006829991e-06, + "loss": 0.4281, + "step": 9128 + }, + { + "epoch": 0.5559175471181074, + "grad_norm": 0.9637661432504475, + "learning_rate": 4.8968993286608455e-06, + "loss": 0.4434, + "step": 9129 + }, + { + "epoch": 0.5559784428949852, + "grad_norm": 1.012520423351713, + "learning_rate": 4.896876648049789e-06, + "loss": 0.4464, + "step": 9130 + }, + { + "epoch": 0.5560393386718631, + "grad_norm": 1.0471762686065909, + "learning_rate": 4.896853964996844e-06, + "loss": 0.4256, + "step": 9131 + }, + { + "epoch": 0.556100234448741, + "grad_norm": 1.1705552679407196, + "learning_rate": 4.896831279502034e-06, + "loss": 0.391, + "step": 9132 + }, + { + "epoch": 0.5561611302256189, + "grad_norm": 1.0472258727574546, + "learning_rate": 4.8968085915653816e-06, + "loss": 0.4156, + "step": 9133 + }, + { + "epoch": 0.5562220260024967, + "grad_norm": 0.9950670637244436, + "learning_rate": 4.896785901186912e-06, + "loss": 0.4317, + "step": 9134 + }, + { + "epoch": 0.5562829217793746, + "grad_norm": 1.0795237661186172, + "learning_rate": 4.896763208366646e-06, + "loss": 0.3875, + "step": 9135 + }, + { + "epoch": 0.5563438175562525, + "grad_norm": 0.9716613136166841, + "learning_rate": 4.896740513104607e-06, + "loss": 0.4583, + "step": 9136 + }, + { + "epoch": 0.5564047133331304, + "grad_norm": 0.9730242008135007, + "learning_rate": 4.8967178154008185e-06, + "loss": 0.3965, + "step": 9137 + }, + { + "epoch": 0.5564656091100082, + "grad_norm": 0.9920646238217078, + "learning_rate": 4.8966951152553044e-06, + "loss": 0.4593, + "step": 9138 + }, + { + "epoch": 0.556526504886886, + "grad_norm": 0.9732694504856815, + "learning_rate": 4.8966724126680874e-06, + "loss": 0.4783, + "step": 9139 + }, + { + "epoch": 0.556587400663764, + "grad_norm": 1.0124395195474356, + "learning_rate": 4.8966497076391895e-06, + "loss": 0.4825, + "step": 9140 + }, + { + "epoch": 0.5566482964406418, + "grad_norm": 1.0070556173019225, + "learning_rate": 4.896627000168635e-06, + "loss": 0.4418, + "step": 9141 + }, + { + "epoch": 0.5567091922175197, + "grad_norm": 0.992757271514245, + "learning_rate": 4.896604290256447e-06, + "loss": 0.4259, + "step": 9142 + }, + { + "epoch": 0.5567700879943975, + "grad_norm": 0.9942829716015631, + "learning_rate": 4.8965815779026485e-06, + "loss": 0.4857, + "step": 9143 + }, + { + "epoch": 0.5568309837712755, + "grad_norm": 0.9794536901789086, + "learning_rate": 4.896558863107262e-06, + "loss": 0.4469, + "step": 9144 + }, + { + "epoch": 0.5568918795481533, + "grad_norm": 1.051433235354601, + "learning_rate": 4.896536145870311e-06, + "loss": 0.4053, + "step": 9145 + }, + { + "epoch": 0.5569527753250312, + "grad_norm": 0.9530838897845336, + "learning_rate": 4.896513426191819e-06, + "loss": 0.4386, + "step": 9146 + }, + { + "epoch": 0.5570136711019091, + "grad_norm": 0.9242208214677272, + "learning_rate": 4.896490704071809e-06, + "loss": 0.4917, + "step": 9147 + }, + { + "epoch": 0.557074566878787, + "grad_norm": 0.9492972824810553, + "learning_rate": 4.896467979510304e-06, + "loss": 0.5002, + "step": 9148 + }, + { + "epoch": 0.5571354626556648, + "grad_norm": 0.9635836143555062, + "learning_rate": 4.896445252507328e-06, + "loss": 0.4772, + "step": 9149 + }, + { + "epoch": 0.5571963584325427, + "grad_norm": 1.0131144433491344, + "learning_rate": 4.896422523062902e-06, + "loss": 0.4407, + "step": 9150 + }, + { + "epoch": 0.5572572542094206, + "grad_norm": 0.9868867286633011, + "learning_rate": 4.896399791177052e-06, + "loss": 0.439, + "step": 9151 + }, + { + "epoch": 0.5573181499862985, + "grad_norm": 1.1099553117984544, + "learning_rate": 4.896377056849799e-06, + "loss": 0.4652, + "step": 9152 + }, + { + "epoch": 0.5573790457631763, + "grad_norm": 1.0052265190438716, + "learning_rate": 4.896354320081167e-06, + "loss": 0.4717, + "step": 9153 + }, + { + "epoch": 0.5574399415400542, + "grad_norm": 1.1416129307742378, + "learning_rate": 4.896331580871179e-06, + "loss": 0.4196, + "step": 9154 + }, + { + "epoch": 0.5575008373169321, + "grad_norm": 0.9716221116245307, + "learning_rate": 4.896308839219859e-06, + "loss": 0.4425, + "step": 9155 + }, + { + "epoch": 0.55756173309381, + "grad_norm": 1.0143139968496608, + "learning_rate": 4.896286095127228e-06, + "loss": 0.4543, + "step": 9156 + }, + { + "epoch": 0.5576226288706878, + "grad_norm": 0.9985135471418014, + "learning_rate": 4.896263348593311e-06, + "loss": 0.5116, + "step": 9157 + }, + { + "epoch": 0.5576835246475657, + "grad_norm": 1.0091168824695556, + "learning_rate": 4.896240599618131e-06, + "loss": 0.4276, + "step": 9158 + }, + { + "epoch": 0.5577444204244436, + "grad_norm": 0.9811524731235305, + "learning_rate": 4.896217848201711e-06, + "loss": 0.4145, + "step": 9159 + }, + { + "epoch": 0.5578053162013215, + "grad_norm": 0.9593341226247409, + "learning_rate": 4.896195094344074e-06, + "loss": 0.4405, + "step": 9160 + }, + { + "epoch": 0.5578662119781993, + "grad_norm": 1.0138640933617602, + "learning_rate": 4.8961723380452435e-06, + "loss": 0.4038, + "step": 9161 + }, + { + "epoch": 0.5579271077550771, + "grad_norm": 0.9802039667980471, + "learning_rate": 4.896149579305242e-06, + "loss": 0.4138, + "step": 9162 + }, + { + "epoch": 0.5579880035319551, + "grad_norm": 0.9589738469505491, + "learning_rate": 4.896126818124092e-06, + "loss": 0.5118, + "step": 9163 + }, + { + "epoch": 0.558048899308833, + "grad_norm": 1.0048077348230833, + "learning_rate": 4.896104054501819e-06, + "loss": 0.4572, + "step": 9164 + }, + { + "epoch": 0.5581097950857108, + "grad_norm": 0.9601289570224437, + "learning_rate": 4.896081288438446e-06, + "loss": 0.4567, + "step": 9165 + }, + { + "epoch": 0.5581706908625886, + "grad_norm": 1.1222340505055666, + "learning_rate": 4.896058519933994e-06, + "loss": 0.4029, + "step": 9166 + }, + { + "epoch": 0.5582315866394666, + "grad_norm": 0.9643287131319781, + "learning_rate": 4.8960357489884865e-06, + "loss": 0.4417, + "step": 9167 + }, + { + "epoch": 0.5582924824163444, + "grad_norm": 1.0916014028117393, + "learning_rate": 4.8960129756019494e-06, + "loss": 0.4071, + "step": 9168 + }, + { + "epoch": 0.5583533781932223, + "grad_norm": 1.067687224398344, + "learning_rate": 4.895990199774403e-06, + "loss": 0.4195, + "step": 9169 + }, + { + "epoch": 0.5584142739701001, + "grad_norm": 1.0855736699365204, + "learning_rate": 4.895967421505872e-06, + "loss": 0.4606, + "step": 9170 + }, + { + "epoch": 0.5584751697469781, + "grad_norm": 0.9823735618839102, + "learning_rate": 4.89594464079638e-06, + "loss": 0.3859, + "step": 9171 + }, + { + "epoch": 0.5585360655238559, + "grad_norm": 1.0661210126984195, + "learning_rate": 4.895921857645948e-06, + "loss": 0.4672, + "step": 9172 + }, + { + "epoch": 0.5585969613007338, + "grad_norm": 1.050253575631273, + "learning_rate": 4.8958990720546015e-06, + "loss": 0.3548, + "step": 9173 + }, + { + "epoch": 0.5586578570776116, + "grad_norm": 1.0127676775946528, + "learning_rate": 4.8958762840223625e-06, + "loss": 0.473, + "step": 9174 + }, + { + "epoch": 0.5587187528544896, + "grad_norm": 1.1687527436372769, + "learning_rate": 4.895853493549254e-06, + "loss": 0.412, + "step": 9175 + }, + { + "epoch": 0.5587796486313674, + "grad_norm": 1.029097288443375, + "learning_rate": 4.895830700635301e-06, + "loss": 0.4688, + "step": 9176 + }, + { + "epoch": 0.5588405444082453, + "grad_norm": 1.0017371240363449, + "learning_rate": 4.895807905280525e-06, + "loss": 0.4704, + "step": 9177 + }, + { + "epoch": 0.5589014401851231, + "grad_norm": 1.0778908552147404, + "learning_rate": 4.895785107484949e-06, + "loss": 0.3744, + "step": 9178 + }, + { + "epoch": 0.5589623359620011, + "grad_norm": 1.0668123868971715, + "learning_rate": 4.895762307248598e-06, + "loss": 0.4529, + "step": 9179 + }, + { + "epoch": 0.5590232317388789, + "grad_norm": 0.9870375250245115, + "learning_rate": 4.895739504571494e-06, + "loss": 0.4172, + "step": 9180 + }, + { + "epoch": 0.5590841275157568, + "grad_norm": 1.0057472657255924, + "learning_rate": 4.89571669945366e-06, + "loss": 0.461, + "step": 9181 + }, + { + "epoch": 0.5591450232926346, + "grad_norm": 1.015190890122496, + "learning_rate": 4.895693891895119e-06, + "loss": 0.4156, + "step": 9182 + }, + { + "epoch": 0.5592059190695126, + "grad_norm": 0.9822233702678532, + "learning_rate": 4.895671081895896e-06, + "loss": 0.4789, + "step": 9183 + }, + { + "epoch": 0.5592668148463904, + "grad_norm": 0.9450037985469742, + "learning_rate": 4.895648269456013e-06, + "loss": 0.5032, + "step": 9184 + }, + { + "epoch": 0.5593277106232682, + "grad_norm": 1.0205665907682937, + "learning_rate": 4.895625454575492e-06, + "loss": 0.4609, + "step": 9185 + }, + { + "epoch": 0.5593886064001462, + "grad_norm": 0.9828916129619789, + "learning_rate": 4.895602637254359e-06, + "loss": 0.5232, + "step": 9186 + }, + { + "epoch": 0.559449502177024, + "grad_norm": 0.9514985794361981, + "learning_rate": 4.895579817492636e-06, + "loss": 0.4403, + "step": 9187 + }, + { + "epoch": 0.5595103979539019, + "grad_norm": 0.9903127259899945, + "learning_rate": 4.895556995290345e-06, + "loss": 0.4317, + "step": 9188 + }, + { + "epoch": 0.5595712937307797, + "grad_norm": 0.950645751541365, + "learning_rate": 4.895534170647511e-06, + "loss": 0.4806, + "step": 9189 + }, + { + "epoch": 0.5596321895076577, + "grad_norm": 0.8843228042359194, + "learning_rate": 4.895511343564156e-06, + "loss": 0.4401, + "step": 9190 + }, + { + "epoch": 0.5596930852845355, + "grad_norm": 1.0398363161573863, + "learning_rate": 4.895488514040305e-06, + "loss": 0.449, + "step": 9191 + }, + { + "epoch": 0.5597539810614134, + "grad_norm": 1.0962752419881194, + "learning_rate": 4.8954656820759795e-06, + "loss": 0.4283, + "step": 9192 + }, + { + "epoch": 0.5598148768382912, + "grad_norm": 1.0961820343805753, + "learning_rate": 4.895442847671203e-06, + "loss": 0.4239, + "step": 9193 + }, + { + "epoch": 0.5598757726151692, + "grad_norm": 1.1308180053298689, + "learning_rate": 4.8954200108259996e-06, + "loss": 0.3879, + "step": 9194 + }, + { + "epoch": 0.559936668392047, + "grad_norm": 0.9708266779813461, + "learning_rate": 4.895397171540392e-06, + "loss": 0.5452, + "step": 9195 + }, + { + "epoch": 0.5599975641689249, + "grad_norm": 0.9835247055760461, + "learning_rate": 4.895374329814404e-06, + "loss": 0.498, + "step": 9196 + }, + { + "epoch": 0.5600584599458027, + "grad_norm": 0.9402026275830635, + "learning_rate": 4.895351485648058e-06, + "loss": 0.409, + "step": 9197 + }, + { + "epoch": 0.5601193557226807, + "grad_norm": 1.1863097502275068, + "learning_rate": 4.895328639041377e-06, + "loss": 0.4126, + "step": 9198 + }, + { + "epoch": 0.5601802514995585, + "grad_norm": 0.9701656463124728, + "learning_rate": 4.8953057899943854e-06, + "loss": 0.4369, + "step": 9199 + }, + { + "epoch": 0.5602411472764364, + "grad_norm": 1.0622244143693285, + "learning_rate": 4.895282938507107e-06, + "loss": 0.4261, + "step": 9200 + }, + { + "epoch": 0.5603020430533142, + "grad_norm": 1.0993281041331062, + "learning_rate": 4.895260084579564e-06, + "loss": 0.4245, + "step": 9201 + }, + { + "epoch": 0.5603629388301922, + "grad_norm": 1.18520920670481, + "learning_rate": 4.895237228211779e-06, + "loss": 0.4685, + "step": 9202 + }, + { + "epoch": 0.56042383460707, + "grad_norm": 1.0236174809892358, + "learning_rate": 4.895214369403776e-06, + "loss": 0.4718, + "step": 9203 + }, + { + "epoch": 0.5604847303839479, + "grad_norm": 1.0225241884871008, + "learning_rate": 4.895191508155579e-06, + "loss": 0.4492, + "step": 9204 + }, + { + "epoch": 0.5605456261608257, + "grad_norm": 1.0357347467041795, + "learning_rate": 4.89516864446721e-06, + "loss": 0.4142, + "step": 9205 + }, + { + "epoch": 0.5606065219377037, + "grad_norm": 0.9919224950111134, + "learning_rate": 4.8951457783386935e-06, + "loss": 0.4645, + "step": 9206 + }, + { + "epoch": 0.5606674177145815, + "grad_norm": 0.9554684412947334, + "learning_rate": 4.895122909770053e-06, + "loss": 0.438, + "step": 9207 + }, + { + "epoch": 0.5607283134914594, + "grad_norm": 1.0187733047093734, + "learning_rate": 4.895100038761309e-06, + "loss": 0.3962, + "step": 9208 + }, + { + "epoch": 0.5607892092683372, + "grad_norm": 0.9787894354582112, + "learning_rate": 4.895077165312488e-06, + "loss": 0.4363, + "step": 9209 + }, + { + "epoch": 0.5608501050452152, + "grad_norm": 1.0293381367104961, + "learning_rate": 4.895054289423613e-06, + "loss": 0.4801, + "step": 9210 + }, + { + "epoch": 0.560911000822093, + "grad_norm": 0.9436225256888625, + "learning_rate": 4.895031411094706e-06, + "loss": 0.4357, + "step": 9211 + }, + { + "epoch": 0.5609718965989708, + "grad_norm": 1.0656958229743962, + "learning_rate": 4.895008530325791e-06, + "loss": 0.4014, + "step": 9212 + }, + { + "epoch": 0.5610327923758487, + "grad_norm": 0.930950565132015, + "learning_rate": 4.89498564711689e-06, + "loss": 0.4206, + "step": 9213 + }, + { + "epoch": 0.5610936881527266, + "grad_norm": 1.0342713778857962, + "learning_rate": 4.8949627614680285e-06, + "loss": 0.4191, + "step": 9214 + }, + { + "epoch": 0.5611545839296045, + "grad_norm": 0.9807338333310092, + "learning_rate": 4.894939873379229e-06, + "loss": 0.4663, + "step": 9215 + }, + { + "epoch": 0.5612154797064823, + "grad_norm": 1.0656435899855634, + "learning_rate": 4.894916982850513e-06, + "loss": 0.4152, + "step": 9216 + }, + { + "epoch": 0.5612763754833602, + "grad_norm": 0.9962990175875913, + "learning_rate": 4.8948940898819065e-06, + "loss": 0.4778, + "step": 9217 + }, + { + "epoch": 0.5613372712602381, + "grad_norm": 0.9947069622049759, + "learning_rate": 4.894871194473432e-06, + "loss": 0.5099, + "step": 9218 + }, + { + "epoch": 0.561398167037116, + "grad_norm": 0.9342283487639395, + "learning_rate": 4.894848296625112e-06, + "loss": 0.4266, + "step": 9219 + }, + { + "epoch": 0.5614590628139938, + "grad_norm": 1.0115770149892098, + "learning_rate": 4.89482539633697e-06, + "loss": 0.495, + "step": 9220 + }, + { + "epoch": 0.5615199585908717, + "grad_norm": 0.9209054638408092, + "learning_rate": 4.89480249360903e-06, + "loss": 0.5158, + "step": 9221 + }, + { + "epoch": 0.5615808543677496, + "grad_norm": 0.9681319608101672, + "learning_rate": 4.894779588441315e-06, + "loss": 0.4839, + "step": 9222 + }, + { + "epoch": 0.5616417501446275, + "grad_norm": 1.017031712109911, + "learning_rate": 4.8947566808338486e-06, + "loss": 0.4217, + "step": 9223 + }, + { + "epoch": 0.5617026459215053, + "grad_norm": 1.0125889161920931, + "learning_rate": 4.894733770786654e-06, + "loss": 0.3732, + "step": 9224 + }, + { + "epoch": 0.5617635416983832, + "grad_norm": 1.0030631545066522, + "learning_rate": 4.894710858299754e-06, + "loss": 0.4864, + "step": 9225 + }, + { + "epoch": 0.5618244374752611, + "grad_norm": 1.0088736393600553, + "learning_rate": 4.894687943373172e-06, + "loss": 0.482, + "step": 9226 + }, + { + "epoch": 0.561885333252139, + "grad_norm": 0.9844573280355334, + "learning_rate": 4.894665026006932e-06, + "loss": 0.4703, + "step": 9227 + }, + { + "epoch": 0.5619462290290168, + "grad_norm": 1.0114829866484136, + "learning_rate": 4.894642106201057e-06, + "loss": 0.4009, + "step": 9228 + }, + { + "epoch": 0.5620071248058948, + "grad_norm": 1.052082929759196, + "learning_rate": 4.894619183955571e-06, + "loss": 0.4124, + "step": 9229 + }, + { + "epoch": 0.5620680205827726, + "grad_norm": 1.0998684313130778, + "learning_rate": 4.894596259270496e-06, + "loss": 0.4703, + "step": 9230 + }, + { + "epoch": 0.5621289163596505, + "grad_norm": 1.0697578672731571, + "learning_rate": 4.894573332145857e-06, + "loss": 0.4139, + "step": 9231 + }, + { + "epoch": 0.5621898121365283, + "grad_norm": 1.0886014318532153, + "learning_rate": 4.894550402581676e-06, + "loss": 0.3856, + "step": 9232 + }, + { + "epoch": 0.5622507079134063, + "grad_norm": 1.0717645525041453, + "learning_rate": 4.8945274705779765e-06, + "loss": 0.3854, + "step": 9233 + }, + { + "epoch": 0.5623116036902841, + "grad_norm": 1.0279936894551784, + "learning_rate": 4.894504536134783e-06, + "loss": 0.4115, + "step": 9234 + }, + { + "epoch": 0.562372499467162, + "grad_norm": 1.0316514711544158, + "learning_rate": 4.894481599252118e-06, + "loss": 0.4223, + "step": 9235 + }, + { + "epoch": 0.5624333952440398, + "grad_norm": 0.9669462050950944, + "learning_rate": 4.894458659930004e-06, + "loss": 0.4479, + "step": 9236 + }, + { + "epoch": 0.5624942910209177, + "grad_norm": 1.1113719001641447, + "learning_rate": 4.894435718168466e-06, + "loss": 0.4075, + "step": 9237 + }, + { + "epoch": 0.5625551867977956, + "grad_norm": 1.0219541807527854, + "learning_rate": 4.8944127739675265e-06, + "loss": 0.4532, + "step": 9238 + }, + { + "epoch": 0.5626160825746734, + "grad_norm": 0.9498780633813266, + "learning_rate": 4.8943898273272086e-06, + "loss": 0.5314, + "step": 9239 + }, + { + "epoch": 0.5626769783515513, + "grad_norm": 0.9916928891600824, + "learning_rate": 4.8943668782475365e-06, + "loss": 0.4872, + "step": 9240 + }, + { + "epoch": 0.5627378741284292, + "grad_norm": 0.9514294108586197, + "learning_rate": 4.894343926728533e-06, + "loss": 0.4698, + "step": 9241 + }, + { + "epoch": 0.5627987699053071, + "grad_norm": 1.1181298876129586, + "learning_rate": 4.8943209727702225e-06, + "loss": 0.4507, + "step": 9242 + }, + { + "epoch": 0.5628596656821849, + "grad_norm": 0.967793655564799, + "learning_rate": 4.894298016372627e-06, + "loss": 0.5547, + "step": 9243 + }, + { + "epoch": 0.5629205614590628, + "grad_norm": 1.0609366561851938, + "learning_rate": 4.89427505753577e-06, + "loss": 0.4581, + "step": 9244 + }, + { + "epoch": 0.5629814572359407, + "grad_norm": 0.9882979443354972, + "learning_rate": 4.894252096259676e-06, + "loss": 0.4617, + "step": 9245 + }, + { + "epoch": 0.5630423530128186, + "grad_norm": 1.0496860765395124, + "learning_rate": 4.894229132544368e-06, + "loss": 0.4751, + "step": 9246 + }, + { + "epoch": 0.5631032487896964, + "grad_norm": 1.000913071194918, + "learning_rate": 4.894206166389869e-06, + "loss": 0.504, + "step": 9247 + }, + { + "epoch": 0.5631641445665743, + "grad_norm": 0.9845006821376394, + "learning_rate": 4.894183197796202e-06, + "loss": 0.4779, + "step": 9248 + }, + { + "epoch": 0.5632250403434522, + "grad_norm": 0.9440603191964834, + "learning_rate": 4.894160226763391e-06, + "loss": 0.4304, + "step": 9249 + }, + { + "epoch": 0.5632859361203301, + "grad_norm": 0.9191864708142403, + "learning_rate": 4.89413725329146e-06, + "loss": 0.4759, + "step": 9250 + }, + { + "epoch": 0.5633468318972079, + "grad_norm": 1.0540545956342742, + "learning_rate": 4.894114277380431e-06, + "loss": 0.4457, + "step": 9251 + }, + { + "epoch": 0.5634077276740858, + "grad_norm": 1.1009948507775242, + "learning_rate": 4.894091299030328e-06, + "loss": 0.4325, + "step": 9252 + }, + { + "epoch": 0.5634686234509637, + "grad_norm": 0.9912366301685033, + "learning_rate": 4.894068318241176e-06, + "loss": 0.4604, + "step": 9253 + }, + { + "epoch": 0.5635295192278416, + "grad_norm": 1.0869753452039386, + "learning_rate": 4.894045335012994e-06, + "loss": 0.5428, + "step": 9254 + }, + { + "epoch": 0.5635904150047194, + "grad_norm": 0.9163259771270561, + "learning_rate": 4.894022349345811e-06, + "loss": 0.4389, + "step": 9255 + }, + { + "epoch": 0.5636513107815972, + "grad_norm": 1.000040898464272, + "learning_rate": 4.893999361239647e-06, + "loss": 0.4315, + "step": 9256 + }, + { + "epoch": 0.5637122065584752, + "grad_norm": 0.9923868884488661, + "learning_rate": 4.893976370694526e-06, + "loss": 0.431, + "step": 9257 + }, + { + "epoch": 0.563773102335353, + "grad_norm": 0.9102537989018168, + "learning_rate": 4.893953377710472e-06, + "loss": 0.4319, + "step": 9258 + }, + { + "epoch": 0.5638339981122309, + "grad_norm": 0.9406426855516754, + "learning_rate": 4.8939303822875086e-06, + "loss": 0.5025, + "step": 9259 + }, + { + "epoch": 0.5638948938891087, + "grad_norm": 1.0512705233242046, + "learning_rate": 4.893907384425658e-06, + "loss": 0.3899, + "step": 9260 + }, + { + "epoch": 0.5639557896659867, + "grad_norm": 1.070630819014053, + "learning_rate": 4.893884384124945e-06, + "loss": 0.4334, + "step": 9261 + }, + { + "epoch": 0.5640166854428645, + "grad_norm": 0.9875820416341962, + "learning_rate": 4.893861381385392e-06, + "loss": 0.4852, + "step": 9262 + }, + { + "epoch": 0.5640775812197424, + "grad_norm": 0.9448607156760805, + "learning_rate": 4.8938383762070216e-06, + "loss": 0.4648, + "step": 9263 + }, + { + "epoch": 0.5641384769966202, + "grad_norm": 1.0064683306169757, + "learning_rate": 4.8938153685898605e-06, + "loss": 0.415, + "step": 9264 + }, + { + "epoch": 0.5641993727734982, + "grad_norm": 1.0264211133994332, + "learning_rate": 4.893792358533929e-06, + "loss": 0.4249, + "step": 9265 + }, + { + "epoch": 0.564260268550376, + "grad_norm": 1.0181152766562214, + "learning_rate": 4.893769346039251e-06, + "loss": 0.3988, + "step": 9266 + }, + { + "epoch": 0.5643211643272539, + "grad_norm": 0.9964228120028205, + "learning_rate": 4.893746331105851e-06, + "loss": 0.5014, + "step": 9267 + }, + { + "epoch": 0.5643820601041318, + "grad_norm": 1.0329048200335953, + "learning_rate": 4.893723313733753e-06, + "loss": 0.4242, + "step": 9268 + }, + { + "epoch": 0.5644429558810097, + "grad_norm": 1.0781313999289748, + "learning_rate": 4.893700293922978e-06, + "loss": 0.3801, + "step": 9269 + }, + { + "epoch": 0.5645038516578875, + "grad_norm": 1.098398182774067, + "learning_rate": 4.893677271673552e-06, + "loss": 0.4444, + "step": 9270 + }, + { + "epoch": 0.5645647474347654, + "grad_norm": 1.0224626049828103, + "learning_rate": 4.893654246985496e-06, + "loss": 0.4468, + "step": 9271 + }, + { + "epoch": 0.5646256432116433, + "grad_norm": 1.0088038860803554, + "learning_rate": 4.893631219858836e-06, + "loss": 0.4858, + "step": 9272 + }, + { + "epoch": 0.5646865389885212, + "grad_norm": 0.9452083254050637, + "learning_rate": 4.893608190293595e-06, + "loss": 0.4605, + "step": 9273 + }, + { + "epoch": 0.564747434765399, + "grad_norm": 1.1143116151210435, + "learning_rate": 4.893585158289794e-06, + "loss": 0.4818, + "step": 9274 + }, + { + "epoch": 0.5648083305422769, + "grad_norm": 1.0936321283554393, + "learning_rate": 4.89356212384746e-06, + "loss": 0.4603, + "step": 9275 + }, + { + "epoch": 0.5648692263191548, + "grad_norm": 0.9803366953254639, + "learning_rate": 4.893539086966613e-06, + "loss": 0.444, + "step": 9276 + }, + { + "epoch": 0.5649301220960327, + "grad_norm": 1.039015474229476, + "learning_rate": 4.893516047647279e-06, + "loss": 0.398, + "step": 9277 + }, + { + "epoch": 0.5649910178729105, + "grad_norm": 0.9375510736647629, + "learning_rate": 4.89349300588948e-06, + "loss": 0.5208, + "step": 9278 + }, + { + "epoch": 0.5650519136497884, + "grad_norm": 0.9937385852583618, + "learning_rate": 4.893469961693241e-06, + "loss": 0.412, + "step": 9279 + }, + { + "epoch": 0.5651128094266663, + "grad_norm": 1.0287556041173167, + "learning_rate": 4.893446915058584e-06, + "loss": 0.4346, + "step": 9280 + }, + { + "epoch": 0.5651737052035442, + "grad_norm": 1.0172706539222098, + "learning_rate": 4.893423865985532e-06, + "loss": 0.4588, + "step": 9281 + }, + { + "epoch": 0.565234600980422, + "grad_norm": 1.1697594581026671, + "learning_rate": 4.893400814474112e-06, + "loss": 0.4046, + "step": 9282 + }, + { + "epoch": 0.5652954967572998, + "grad_norm": 1.0421036350722908, + "learning_rate": 4.893377760524343e-06, + "loss": 0.421, + "step": 9283 + }, + { + "epoch": 0.5653563925341778, + "grad_norm": 0.9357465242971182, + "learning_rate": 4.893354704136251e-06, + "loss": 0.4213, + "step": 9284 + }, + { + "epoch": 0.5654172883110556, + "grad_norm": 0.979915391961865, + "learning_rate": 4.89333164530986e-06, + "loss": 0.459, + "step": 9285 + }, + { + "epoch": 0.5654781840879335, + "grad_norm": 1.0899135487733107, + "learning_rate": 4.893308584045191e-06, + "loss": 0.4142, + "step": 9286 + }, + { + "epoch": 0.5655390798648113, + "grad_norm": 1.002771675473478, + "learning_rate": 4.893285520342269e-06, + "loss": 0.4067, + "step": 9287 + }, + { + "epoch": 0.5655999756416893, + "grad_norm": 1.0180164413520179, + "learning_rate": 4.893262454201118e-06, + "loss": 0.4056, + "step": 9288 + }, + { + "epoch": 0.5656608714185671, + "grad_norm": 0.9989754739999211, + "learning_rate": 4.893239385621762e-06, + "loss": 0.4696, + "step": 9289 + }, + { + "epoch": 0.565721767195445, + "grad_norm": 1.0716116889036513, + "learning_rate": 4.893216314604222e-06, + "loss": 0.384, + "step": 9290 + }, + { + "epoch": 0.5657826629723228, + "grad_norm": 0.9698667439719804, + "learning_rate": 4.893193241148524e-06, + "loss": 0.4502, + "step": 9291 + }, + { + "epoch": 0.5658435587492008, + "grad_norm": 1.0069437505457348, + "learning_rate": 4.893170165254689e-06, + "loss": 0.5045, + "step": 9292 + }, + { + "epoch": 0.5659044545260786, + "grad_norm": 0.9965539113359894, + "learning_rate": 4.893147086922744e-06, + "loss": 0.4113, + "step": 9293 + }, + { + "epoch": 0.5659653503029565, + "grad_norm": 0.9234872763686947, + "learning_rate": 4.893124006152709e-06, + "loss": 0.4944, + "step": 9294 + }, + { + "epoch": 0.5660262460798343, + "grad_norm": 1.0198269785723681, + "learning_rate": 4.893100922944609e-06, + "loss": 0.443, + "step": 9295 + }, + { + "epoch": 0.5660871418567123, + "grad_norm": 0.946106712061539, + "learning_rate": 4.893077837298468e-06, + "loss": 0.4254, + "step": 9296 + }, + { + "epoch": 0.5661480376335901, + "grad_norm": 1.0307156946485607, + "learning_rate": 4.893054749214309e-06, + "loss": 0.3698, + "step": 9297 + }, + { + "epoch": 0.566208933410468, + "grad_norm": 0.949434150052729, + "learning_rate": 4.893031658692155e-06, + "loss": 0.4088, + "step": 9298 + }, + { + "epoch": 0.5662698291873458, + "grad_norm": 1.095014796261569, + "learning_rate": 4.893008565732031e-06, + "loss": 0.4759, + "step": 9299 + }, + { + "epoch": 0.5663307249642238, + "grad_norm": 1.0537214698858768, + "learning_rate": 4.892985470333959e-06, + "loss": 0.4216, + "step": 9300 + }, + { + "epoch": 0.5663916207411016, + "grad_norm": 1.000979947964106, + "learning_rate": 4.892962372497965e-06, + "loss": 0.4421, + "step": 9301 + }, + { + "epoch": 0.5664525165179795, + "grad_norm": 1.0209239583492644, + "learning_rate": 4.8929392722240685e-06, + "loss": 0.403, + "step": 9302 + }, + { + "epoch": 0.5665134122948573, + "grad_norm": 0.9328117304237262, + "learning_rate": 4.892916169512296e-06, + "loss": 0.4443, + "step": 9303 + }, + { + "epoch": 0.5665743080717353, + "grad_norm": 1.0674417387785506, + "learning_rate": 4.89289306436267e-06, + "loss": 0.4276, + "step": 9304 + }, + { + "epoch": 0.5666352038486131, + "grad_norm": 1.050475270178406, + "learning_rate": 4.892869956775215e-06, + "loss": 0.379, + "step": 9305 + }, + { + "epoch": 0.566696099625491, + "grad_norm": 1.0011887520448552, + "learning_rate": 4.892846846749953e-06, + "loss": 0.3922, + "step": 9306 + }, + { + "epoch": 0.5667569954023688, + "grad_norm": 0.9635743263583344, + "learning_rate": 4.892823734286909e-06, + "loss": 0.5407, + "step": 9307 + }, + { + "epoch": 0.5668178911792467, + "grad_norm": 1.0025158343756477, + "learning_rate": 4.892800619386105e-06, + "loss": 0.4725, + "step": 9308 + }, + { + "epoch": 0.5668787869561246, + "grad_norm": 0.9371038275850317, + "learning_rate": 4.892777502047567e-06, + "loss": 0.39, + "step": 9309 + }, + { + "epoch": 0.5669396827330024, + "grad_norm": 1.161988213199571, + "learning_rate": 4.8927543822713155e-06, + "loss": 0.4594, + "step": 9310 + }, + { + "epoch": 0.5670005785098804, + "grad_norm": 0.995419032109646, + "learning_rate": 4.892731260057376e-06, + "loss": 0.4825, + "step": 9311 + }, + { + "epoch": 0.5670614742867582, + "grad_norm": 1.027356923893648, + "learning_rate": 4.892708135405771e-06, + "loss": 0.4423, + "step": 9312 + }, + { + "epoch": 0.5671223700636361, + "grad_norm": 1.088936779366635, + "learning_rate": 4.8926850083165265e-06, + "loss": 0.4186, + "step": 9313 + }, + { + "epoch": 0.5671832658405139, + "grad_norm": 0.9495010892779436, + "learning_rate": 4.892661878789663e-06, + "loss": 0.4208, + "step": 9314 + }, + { + "epoch": 0.5672441616173919, + "grad_norm": 0.9624709938820786, + "learning_rate": 4.892638746825206e-06, + "loss": 0.4823, + "step": 9315 + }, + { + "epoch": 0.5673050573942697, + "grad_norm": 0.9881182354076107, + "learning_rate": 4.8926156124231775e-06, + "loss": 0.4377, + "step": 9316 + }, + { + "epoch": 0.5673659531711476, + "grad_norm": 1.0137694864429754, + "learning_rate": 4.892592475583603e-06, + "loss": 0.3835, + "step": 9317 + }, + { + "epoch": 0.5674268489480254, + "grad_norm": 0.9076523005065854, + "learning_rate": 4.892569336306504e-06, + "loss": 0.4321, + "step": 9318 + }, + { + "epoch": 0.5674877447249034, + "grad_norm": 1.0160675465684528, + "learning_rate": 4.892546194591906e-06, + "loss": 0.4056, + "step": 9319 + }, + { + "epoch": 0.5675486405017812, + "grad_norm": 1.0299267715099873, + "learning_rate": 4.892523050439832e-06, + "loss": 0.5276, + "step": 9320 + }, + { + "epoch": 0.5676095362786591, + "grad_norm": 1.1022294750528137, + "learning_rate": 4.892499903850304e-06, + "loss": 0.4135, + "step": 9321 + }, + { + "epoch": 0.5676704320555369, + "grad_norm": 1.0056878788768921, + "learning_rate": 4.892476754823347e-06, + "loss": 0.3772, + "step": 9322 + }, + { + "epoch": 0.5677313278324149, + "grad_norm": 0.9461576960051912, + "learning_rate": 4.892453603358984e-06, + "loss": 0.4283, + "step": 9323 + }, + { + "epoch": 0.5677922236092927, + "grad_norm": 1.026965268070405, + "learning_rate": 4.892430449457241e-06, + "loss": 0.4433, + "step": 9324 + }, + { + "epoch": 0.5678531193861706, + "grad_norm": 0.9927933339482645, + "learning_rate": 4.892407293118137e-06, + "loss": 0.4301, + "step": 9325 + }, + { + "epoch": 0.5679140151630484, + "grad_norm": 0.9482284125028106, + "learning_rate": 4.8923841343417e-06, + "loss": 0.4396, + "step": 9326 + }, + { + "epoch": 0.5679749109399264, + "grad_norm": 0.99862928704095, + "learning_rate": 4.892360973127952e-06, + "loss": 0.4783, + "step": 9327 + }, + { + "epoch": 0.5680358067168042, + "grad_norm": 0.9220984233509822, + "learning_rate": 4.892337809476916e-06, + "loss": 0.4342, + "step": 9328 + }, + { + "epoch": 0.568096702493682, + "grad_norm": 1.0796461464868563, + "learning_rate": 4.892314643388615e-06, + "loss": 0.3529, + "step": 9329 + }, + { + "epoch": 0.5681575982705599, + "grad_norm": 1.056231427204922, + "learning_rate": 4.892291474863075e-06, + "loss": 0.4579, + "step": 9330 + }, + { + "epoch": 0.5682184940474378, + "grad_norm": 1.0069251053652046, + "learning_rate": 4.892268303900317e-06, + "loss": 0.466, + "step": 9331 + }, + { + "epoch": 0.5682793898243157, + "grad_norm": 1.0577959813253968, + "learning_rate": 4.892245130500366e-06, + "loss": 0.4321, + "step": 9332 + }, + { + "epoch": 0.5683402856011935, + "grad_norm": 1.0858142254009773, + "learning_rate": 4.892221954663246e-06, + "loss": 0.3468, + "step": 9333 + }, + { + "epoch": 0.5684011813780714, + "grad_norm": 0.9993206505096119, + "learning_rate": 4.89219877638898e-06, + "loss": 0.4546, + "step": 9334 + }, + { + "epoch": 0.5684620771549493, + "grad_norm": 0.995075924423158, + "learning_rate": 4.892175595677591e-06, + "loss": 0.4351, + "step": 9335 + }, + { + "epoch": 0.5685229729318272, + "grad_norm": 1.1160441827216658, + "learning_rate": 4.8921524125291035e-06, + "loss": 0.4229, + "step": 9336 + }, + { + "epoch": 0.568583868708705, + "grad_norm": 1.0419350254324737, + "learning_rate": 4.8921292269435406e-06, + "loss": 0.3712, + "step": 9337 + }, + { + "epoch": 0.5686447644855829, + "grad_norm": 0.9349486240771577, + "learning_rate": 4.892106038920927e-06, + "loss": 0.431, + "step": 9338 + }, + { + "epoch": 0.5687056602624608, + "grad_norm": 1.0513498425223342, + "learning_rate": 4.892082848461285e-06, + "loss": 0.4563, + "step": 9339 + }, + { + "epoch": 0.5687665560393387, + "grad_norm": 1.0438248237173668, + "learning_rate": 4.892059655564638e-06, + "loss": 0.4363, + "step": 9340 + }, + { + "epoch": 0.5688274518162165, + "grad_norm": 0.9580903407759845, + "learning_rate": 4.892036460231011e-06, + "loss": 0.5285, + "step": 9341 + }, + { + "epoch": 0.5688883475930944, + "grad_norm": 1.019131703794979, + "learning_rate": 4.892013262460428e-06, + "loss": 0.5092, + "step": 9342 + }, + { + "epoch": 0.5689492433699723, + "grad_norm": 0.979360238639532, + "learning_rate": 4.89199006225291e-06, + "loss": 0.4223, + "step": 9343 + }, + { + "epoch": 0.5690101391468502, + "grad_norm": 0.938651213673721, + "learning_rate": 4.891966859608483e-06, + "loss": 0.4531, + "step": 9344 + }, + { + "epoch": 0.569071034923728, + "grad_norm": 1.0210493622098138, + "learning_rate": 4.8919436545271695e-06, + "loss": 0.4286, + "step": 9345 + }, + { + "epoch": 0.5691319307006059, + "grad_norm": 0.9972716101113916, + "learning_rate": 4.8919204470089945e-06, + "loss": 0.4126, + "step": 9346 + }, + { + "epoch": 0.5691928264774838, + "grad_norm": 0.9895442692773325, + "learning_rate": 4.8918972370539795e-06, + "loss": 0.4093, + "step": 9347 + }, + { + "epoch": 0.5692537222543617, + "grad_norm": 0.9790525658573141, + "learning_rate": 4.89187402466215e-06, + "loss": 0.539, + "step": 9348 + }, + { + "epoch": 0.5693146180312395, + "grad_norm": 1.0389361625603417, + "learning_rate": 4.891850809833529e-06, + "loss": 0.4462, + "step": 9349 + }, + { + "epoch": 0.5693755138081175, + "grad_norm": 1.1346266983198365, + "learning_rate": 4.891827592568139e-06, + "loss": 0.492, + "step": 9350 + }, + { + "epoch": 0.5694364095849953, + "grad_norm": 0.8964758603756409, + "learning_rate": 4.891804372866006e-06, + "loss": 0.5095, + "step": 9351 + }, + { + "epoch": 0.5694973053618732, + "grad_norm": 1.0097963462453667, + "learning_rate": 4.891781150727152e-06, + "loss": 0.5049, + "step": 9352 + }, + { + "epoch": 0.569558201138751, + "grad_norm": 1.074657447721578, + "learning_rate": 4.8917579261516015e-06, + "loss": 0.4115, + "step": 9353 + }, + { + "epoch": 0.569619096915629, + "grad_norm": 0.9674433399725826, + "learning_rate": 4.8917346991393775e-06, + "loss": 0.47, + "step": 9354 + }, + { + "epoch": 0.5696799926925068, + "grad_norm": 1.081549768918777, + "learning_rate": 4.891711469690505e-06, + "loss": 0.4303, + "step": 9355 + }, + { + "epoch": 0.5697408884693846, + "grad_norm": 1.0300639616936433, + "learning_rate": 4.8916882378050045e-06, + "loss": 0.4795, + "step": 9356 + }, + { + "epoch": 0.5698017842462625, + "grad_norm": 1.0461974092055903, + "learning_rate": 4.891665003482903e-06, + "loss": 0.4676, + "step": 9357 + }, + { + "epoch": 0.5698626800231404, + "grad_norm": 0.9117033151989818, + "learning_rate": 4.8916417667242225e-06, + "loss": 0.4511, + "step": 9358 + }, + { + "epoch": 0.5699235758000183, + "grad_norm": 1.0400546399701118, + "learning_rate": 4.891618527528987e-06, + "loss": 0.4423, + "step": 9359 + }, + { + "epoch": 0.5699844715768961, + "grad_norm": 1.035567168174677, + "learning_rate": 4.89159528589722e-06, + "loss": 0.4544, + "step": 9360 + }, + { + "epoch": 0.570045367353774, + "grad_norm": 0.9491706458038978, + "learning_rate": 4.891572041828947e-06, + "loss": 0.4839, + "step": 9361 + }, + { + "epoch": 0.5701062631306519, + "grad_norm": 0.9409398322658107, + "learning_rate": 4.891548795324189e-06, + "loss": 0.5077, + "step": 9362 + }, + { + "epoch": 0.5701671589075298, + "grad_norm": 0.9190691268308558, + "learning_rate": 4.8915255463829705e-06, + "loss": 0.4952, + "step": 9363 + }, + { + "epoch": 0.5702280546844076, + "grad_norm": 1.0295286657597031, + "learning_rate": 4.891502295005316e-06, + "loss": 0.5254, + "step": 9364 + }, + { + "epoch": 0.5702889504612855, + "grad_norm": 1.0266326492758422, + "learning_rate": 4.891479041191248e-06, + "loss": 0.4706, + "step": 9365 + }, + { + "epoch": 0.5703498462381634, + "grad_norm": 0.9840561492451313, + "learning_rate": 4.891455784940792e-06, + "loss": 0.4035, + "step": 9366 + }, + { + "epoch": 0.5704107420150413, + "grad_norm": 1.0304100859162493, + "learning_rate": 4.89143252625397e-06, + "loss": 0.3962, + "step": 9367 + }, + { + "epoch": 0.5704716377919191, + "grad_norm": 0.9387973579336008, + "learning_rate": 4.891409265130806e-06, + "loss": 0.5336, + "step": 9368 + }, + { + "epoch": 0.570532533568797, + "grad_norm": 1.072806035574443, + "learning_rate": 4.891386001571324e-06, + "loss": 0.4208, + "step": 9369 + }, + { + "epoch": 0.5705934293456749, + "grad_norm": 0.9879807185450689, + "learning_rate": 4.891362735575547e-06, + "loss": 0.4602, + "step": 9370 + }, + { + "epoch": 0.5706543251225528, + "grad_norm": 1.0295454185870905, + "learning_rate": 4.891339467143501e-06, + "loss": 0.3805, + "step": 9371 + }, + { + "epoch": 0.5707152208994306, + "grad_norm": 1.0100978492403467, + "learning_rate": 4.891316196275208e-06, + "loss": 0.4196, + "step": 9372 + }, + { + "epoch": 0.5707761166763085, + "grad_norm": 1.00116284345243, + "learning_rate": 4.891292922970691e-06, + "loss": 0.3986, + "step": 9373 + }, + { + "epoch": 0.5708370124531864, + "grad_norm": 1.1213232355307516, + "learning_rate": 4.891269647229974e-06, + "loss": 0.3944, + "step": 9374 + }, + { + "epoch": 0.5708979082300643, + "grad_norm": 1.007970880255596, + "learning_rate": 4.891246369053082e-06, + "loss": 0.4237, + "step": 9375 + }, + { + "epoch": 0.5709588040069421, + "grad_norm": 1.0138152620008793, + "learning_rate": 4.891223088440038e-06, + "loss": 0.3982, + "step": 9376 + }, + { + "epoch": 0.57101969978382, + "grad_norm": 0.9254760511372696, + "learning_rate": 4.891199805390865e-06, + "loss": 0.4721, + "step": 9377 + }, + { + "epoch": 0.5710805955606979, + "grad_norm": 1.0186114564020974, + "learning_rate": 4.891176519905587e-06, + "loss": 0.535, + "step": 9378 + }, + { + "epoch": 0.5711414913375757, + "grad_norm": 0.9780655347876226, + "learning_rate": 4.8911532319842296e-06, + "loss": 0.4462, + "step": 9379 + }, + { + "epoch": 0.5712023871144536, + "grad_norm": 0.8784390460681749, + "learning_rate": 4.891129941626814e-06, + "loss": 0.4209, + "step": 9380 + }, + { + "epoch": 0.5712632828913314, + "grad_norm": 1.0944610010173474, + "learning_rate": 4.891106648833365e-06, + "loss": 0.4497, + "step": 9381 + }, + { + "epoch": 0.5713241786682094, + "grad_norm": 1.0517239827012022, + "learning_rate": 4.891083353603906e-06, + "loss": 0.4361, + "step": 9382 + }, + { + "epoch": 0.5713850744450872, + "grad_norm": 0.9732294502226145, + "learning_rate": 4.891060055938462e-06, + "loss": 0.4378, + "step": 9383 + }, + { + "epoch": 0.5714459702219651, + "grad_norm": 1.0256177988412967, + "learning_rate": 4.891036755837055e-06, + "loss": 0.384, + "step": 9384 + }, + { + "epoch": 0.5715068659988429, + "grad_norm": 1.0859048103592106, + "learning_rate": 4.89101345329971e-06, + "loss": 0.3922, + "step": 9385 + }, + { + "epoch": 0.5715677617757209, + "grad_norm": 1.0507793496705617, + "learning_rate": 4.890990148326449e-06, + "loss": 0.4078, + "step": 9386 + }, + { + "epoch": 0.5716286575525987, + "grad_norm": 0.9804091475128045, + "learning_rate": 4.890966840917298e-06, + "loss": 0.45, + "step": 9387 + }, + { + "epoch": 0.5716895533294766, + "grad_norm": 1.06034360153772, + "learning_rate": 4.8909435310722795e-06, + "loss": 0.4502, + "step": 9388 + }, + { + "epoch": 0.5717504491063544, + "grad_norm": 1.0032823468275116, + "learning_rate": 4.890920218791417e-06, + "loss": 0.4319, + "step": 9389 + }, + { + "epoch": 0.5718113448832324, + "grad_norm": 1.063998500535485, + "learning_rate": 4.890896904074735e-06, + "loss": 0.4135, + "step": 9390 + }, + { + "epoch": 0.5718722406601102, + "grad_norm": 0.9945714399187732, + "learning_rate": 4.890873586922257e-06, + "loss": 0.4266, + "step": 9391 + }, + { + "epoch": 0.5719331364369881, + "grad_norm": 1.0007749908417345, + "learning_rate": 4.8908502673340064e-06, + "loss": 0.4285, + "step": 9392 + }, + { + "epoch": 0.571994032213866, + "grad_norm": 1.041974637640333, + "learning_rate": 4.890826945310008e-06, + "loss": 0.469, + "step": 9393 + }, + { + "epoch": 0.5720549279907439, + "grad_norm": 0.9751626826193333, + "learning_rate": 4.890803620850284e-06, + "loss": 0.4102, + "step": 9394 + }, + { + "epoch": 0.5721158237676217, + "grad_norm": 0.9829842839254317, + "learning_rate": 4.890780293954859e-06, + "loss": 0.4274, + "step": 9395 + }, + { + "epoch": 0.5721767195444996, + "grad_norm": 1.0388923425220666, + "learning_rate": 4.890756964623758e-06, + "loss": 0.4584, + "step": 9396 + }, + { + "epoch": 0.5722376153213775, + "grad_norm": 0.9036341463345481, + "learning_rate": 4.890733632857001e-06, + "loss": 0.4971, + "step": 9397 + }, + { + "epoch": 0.5722985110982554, + "grad_norm": 1.0581726435200098, + "learning_rate": 4.890710298654616e-06, + "loss": 0.4288, + "step": 9398 + }, + { + "epoch": 0.5723594068751332, + "grad_norm": 0.9276120966611262, + "learning_rate": 4.890686962016625e-06, + "loss": 0.4108, + "step": 9399 + }, + { + "epoch": 0.572420302652011, + "grad_norm": 1.036245113803797, + "learning_rate": 4.890663622943052e-06, + "loss": 0.4072, + "step": 9400 + }, + { + "epoch": 0.572481198428889, + "grad_norm": 0.9962643495755602, + "learning_rate": 4.890640281433921e-06, + "loss": 0.4308, + "step": 9401 + }, + { + "epoch": 0.5725420942057668, + "grad_norm": 1.088250977377567, + "learning_rate": 4.890616937489254e-06, + "loss": 0.4474, + "step": 9402 + }, + { + "epoch": 0.5726029899826447, + "grad_norm": 1.0175181258214732, + "learning_rate": 4.890593591109077e-06, + "loss": 0.3936, + "step": 9403 + }, + { + "epoch": 0.5726638857595225, + "grad_norm": 1.062529999584938, + "learning_rate": 4.890570242293413e-06, + "loss": 0.4619, + "step": 9404 + }, + { + "epoch": 0.5727247815364005, + "grad_norm": 1.01667239588101, + "learning_rate": 4.890546891042285e-06, + "loss": 0.4063, + "step": 9405 + }, + { + "epoch": 0.5727856773132783, + "grad_norm": 0.9828486477963094, + "learning_rate": 4.8905235373557184e-06, + "loss": 0.5123, + "step": 9406 + }, + { + "epoch": 0.5728465730901562, + "grad_norm": 0.9958103390003955, + "learning_rate": 4.890500181233735e-06, + "loss": 0.4378, + "step": 9407 + }, + { + "epoch": 0.572907468867034, + "grad_norm": 1.0215709589800954, + "learning_rate": 4.890476822676361e-06, + "loss": 0.4505, + "step": 9408 + }, + { + "epoch": 0.572968364643912, + "grad_norm": 1.0249922626862609, + "learning_rate": 4.890453461683619e-06, + "loss": 0.4104, + "step": 9409 + }, + { + "epoch": 0.5730292604207898, + "grad_norm": 0.974359171049537, + "learning_rate": 4.8904300982555316e-06, + "loss": 0.4453, + "step": 9410 + }, + { + "epoch": 0.5730901561976677, + "grad_norm": 1.0153651105108288, + "learning_rate": 4.890406732392125e-06, + "loss": 0.4571, + "step": 9411 + }, + { + "epoch": 0.5731510519745455, + "grad_norm": 0.9387794617229666, + "learning_rate": 4.89038336409342e-06, + "loss": 0.4101, + "step": 9412 + }, + { + "epoch": 0.5732119477514235, + "grad_norm": 0.9941092864847351, + "learning_rate": 4.890359993359443e-06, + "loss": 0.4498, + "step": 9413 + }, + { + "epoch": 0.5732728435283013, + "grad_norm": 0.9617038285386772, + "learning_rate": 4.890336620190217e-06, + "loss": 0.4838, + "step": 9414 + }, + { + "epoch": 0.5733337393051792, + "grad_norm": 0.9727242958458981, + "learning_rate": 4.890313244585766e-06, + "loss": 0.4005, + "step": 9415 + }, + { + "epoch": 0.573394635082057, + "grad_norm": 1.0427655919653431, + "learning_rate": 4.8902898665461125e-06, + "loss": 0.4195, + "step": 9416 + }, + { + "epoch": 0.573455530858935, + "grad_norm": 1.0375513711347377, + "learning_rate": 4.890266486071283e-06, + "loss": 0.5029, + "step": 9417 + }, + { + "epoch": 0.5735164266358128, + "grad_norm": 0.9682331251637882, + "learning_rate": 4.890243103161298e-06, + "loss": 0.4103, + "step": 9418 + }, + { + "epoch": 0.5735773224126907, + "grad_norm": 0.9666975351452883, + "learning_rate": 4.8902197178161845e-06, + "loss": 0.4114, + "step": 9419 + }, + { + "epoch": 0.5736382181895685, + "grad_norm": 1.109028610983371, + "learning_rate": 4.890196330035964e-06, + "loss": 0.4558, + "step": 9420 + }, + { + "epoch": 0.5736991139664465, + "grad_norm": 0.9718241511622159, + "learning_rate": 4.890172939820662e-06, + "loss": 0.4787, + "step": 9421 + }, + { + "epoch": 0.5737600097433243, + "grad_norm": 1.061501390195266, + "learning_rate": 4.8901495471703005e-06, + "loss": 0.3511, + "step": 9422 + }, + { + "epoch": 0.5738209055202022, + "grad_norm": 0.9577262139298655, + "learning_rate": 4.890126152084905e-06, + "loss": 0.467, + "step": 9423 + }, + { + "epoch": 0.57388180129708, + "grad_norm": 1.040346023414469, + "learning_rate": 4.890102754564499e-06, + "loss": 0.4605, + "step": 9424 + }, + { + "epoch": 0.573942697073958, + "grad_norm": 1.0456802386771595, + "learning_rate": 4.890079354609105e-06, + "loss": 0.3965, + "step": 9425 + }, + { + "epoch": 0.5740035928508358, + "grad_norm": 1.0007008208854393, + "learning_rate": 4.890055952218748e-06, + "loss": 0.3868, + "step": 9426 + }, + { + "epoch": 0.5740644886277136, + "grad_norm": 1.0147868314791175, + "learning_rate": 4.890032547393452e-06, + "loss": 0.4052, + "step": 9427 + }, + { + "epoch": 0.5741253844045915, + "grad_norm": 0.9610362534680947, + "learning_rate": 4.89000914013324e-06, + "loss": 0.4266, + "step": 9428 + }, + { + "epoch": 0.5741862801814694, + "grad_norm": 0.963121683006829, + "learning_rate": 4.889985730438137e-06, + "loss": 0.4398, + "step": 9429 + }, + { + "epoch": 0.5742471759583473, + "grad_norm": 1.0453673853655177, + "learning_rate": 4.889962318308167e-06, + "loss": 0.4383, + "step": 9430 + }, + { + "epoch": 0.5743080717352251, + "grad_norm": 0.9877453081841948, + "learning_rate": 4.889938903743352e-06, + "loss": 0.4682, + "step": 9431 + }, + { + "epoch": 0.5743689675121031, + "grad_norm": 0.9848722756537913, + "learning_rate": 4.889915486743717e-06, + "loss": 0.5206, + "step": 9432 + }, + { + "epoch": 0.5744298632889809, + "grad_norm": 0.9961937331662283, + "learning_rate": 4.889892067309286e-06, + "loss": 0.5019, + "step": 9433 + }, + { + "epoch": 0.5744907590658588, + "grad_norm": 0.9791726314271921, + "learning_rate": 4.889868645440082e-06, + "loss": 0.4251, + "step": 9434 + }, + { + "epoch": 0.5745516548427366, + "grad_norm": 1.0590573915239012, + "learning_rate": 4.889845221136131e-06, + "loss": 0.3725, + "step": 9435 + }, + { + "epoch": 0.5746125506196146, + "grad_norm": 0.9457971492843854, + "learning_rate": 4.889821794397454e-06, + "loss": 0.4957, + "step": 9436 + }, + { + "epoch": 0.5746734463964924, + "grad_norm": 1.046761747446296, + "learning_rate": 4.889798365224077e-06, + "loss": 0.4267, + "step": 9437 + }, + { + "epoch": 0.5747343421733703, + "grad_norm": 1.1022440323606468, + "learning_rate": 4.889774933616023e-06, + "loss": 0.3755, + "step": 9438 + }, + { + "epoch": 0.5747952379502481, + "grad_norm": 0.969851307196148, + "learning_rate": 4.889751499573316e-06, + "loss": 0.4024, + "step": 9439 + }, + { + "epoch": 0.5748561337271261, + "grad_norm": 1.0705654469196284, + "learning_rate": 4.889728063095979e-06, + "loss": 0.4183, + "step": 9440 + }, + { + "epoch": 0.5749170295040039, + "grad_norm": 0.9983234581543022, + "learning_rate": 4.889704624184037e-06, + "loss": 0.4054, + "step": 9441 + }, + { + "epoch": 0.5749779252808818, + "grad_norm": 1.0046149545956458, + "learning_rate": 4.8896811828375145e-06, + "loss": 0.4584, + "step": 9442 + }, + { + "epoch": 0.5750388210577596, + "grad_norm": 0.9553908000739708, + "learning_rate": 4.889657739056434e-06, + "loss": 0.4423, + "step": 9443 + }, + { + "epoch": 0.5750997168346376, + "grad_norm": 0.9464377952207956, + "learning_rate": 4.88963429284082e-06, + "loss": 0.4232, + "step": 9444 + }, + { + "epoch": 0.5751606126115154, + "grad_norm": 1.0000560121004485, + "learning_rate": 4.889610844190696e-06, + "loss": 0.4552, + "step": 9445 + }, + { + "epoch": 0.5752215083883933, + "grad_norm": 0.9316433835092195, + "learning_rate": 4.889587393106086e-06, + "loss": 0.436, + "step": 9446 + }, + { + "epoch": 0.5752824041652711, + "grad_norm": 0.9523645178214907, + "learning_rate": 4.8895639395870145e-06, + "loss": 0.4416, + "step": 9447 + }, + { + "epoch": 0.5753432999421491, + "grad_norm": 1.0120548848684785, + "learning_rate": 4.889540483633504e-06, + "loss": 0.4356, + "step": 9448 + }, + { + "epoch": 0.5754041957190269, + "grad_norm": 0.9617531494139145, + "learning_rate": 4.889517025245581e-06, + "loss": 0.4788, + "step": 9449 + }, + { + "epoch": 0.5754650914959047, + "grad_norm": 0.9336247930856026, + "learning_rate": 4.889493564423267e-06, + "loss": 0.4876, + "step": 9450 + }, + { + "epoch": 0.5755259872727826, + "grad_norm": 0.9926712782253093, + "learning_rate": 4.889470101166586e-06, + "loss": 0.4757, + "step": 9451 + }, + { + "epoch": 0.5755868830496605, + "grad_norm": 0.982883776448768, + "learning_rate": 4.889446635475563e-06, + "loss": 0.4856, + "step": 9452 + }, + { + "epoch": 0.5756477788265384, + "grad_norm": 0.9633164197418969, + "learning_rate": 4.889423167350221e-06, + "loss": 0.4568, + "step": 9453 + }, + { + "epoch": 0.5757086746034162, + "grad_norm": 1.042033558264612, + "learning_rate": 4.889399696790586e-06, + "loss": 0.4175, + "step": 9454 + }, + { + "epoch": 0.5757695703802941, + "grad_norm": 1.064727900995122, + "learning_rate": 4.8893762237966785e-06, + "loss": 0.4366, + "step": 9455 + }, + { + "epoch": 0.575830466157172, + "grad_norm": 1.101871938790798, + "learning_rate": 4.889352748368524e-06, + "loss": 0.4363, + "step": 9456 + }, + { + "epoch": 0.5758913619340499, + "grad_norm": 1.0089922789288257, + "learning_rate": 4.8893292705061475e-06, + "loss": 0.4534, + "step": 9457 + }, + { + "epoch": 0.5759522577109277, + "grad_norm": 0.8833144954555872, + "learning_rate": 4.889305790209573e-06, + "loss": 0.4537, + "step": 9458 + }, + { + "epoch": 0.5760131534878056, + "grad_norm": 1.0034581108480702, + "learning_rate": 4.889282307478822e-06, + "loss": 0.4319, + "step": 9459 + }, + { + "epoch": 0.5760740492646835, + "grad_norm": 0.9950050001768067, + "learning_rate": 4.8892588223139196e-06, + "loss": 0.4125, + "step": 9460 + }, + { + "epoch": 0.5761349450415614, + "grad_norm": 0.8885288628939704, + "learning_rate": 4.889235334714891e-06, + "loss": 0.4657, + "step": 9461 + }, + { + "epoch": 0.5761958408184392, + "grad_norm": 0.9723149258675022, + "learning_rate": 4.889211844681758e-06, + "loss": 0.4456, + "step": 9462 + }, + { + "epoch": 0.5762567365953171, + "grad_norm": 0.9854556948843798, + "learning_rate": 4.8891883522145465e-06, + "loss": 0.446, + "step": 9463 + }, + { + "epoch": 0.576317632372195, + "grad_norm": 0.9936267408885218, + "learning_rate": 4.88916485731328e-06, + "loss": 0.4907, + "step": 9464 + }, + { + "epoch": 0.5763785281490729, + "grad_norm": 0.9972876534685613, + "learning_rate": 4.889141359977981e-06, + "loss": 0.3812, + "step": 9465 + }, + { + "epoch": 0.5764394239259507, + "grad_norm": 1.02677190201874, + "learning_rate": 4.889117860208675e-06, + "loss": 0.42, + "step": 9466 + }, + { + "epoch": 0.5765003197028286, + "grad_norm": 1.0324181362975626, + "learning_rate": 4.889094358005385e-06, + "loss": 0.4748, + "step": 9467 + }, + { + "epoch": 0.5765612154797065, + "grad_norm": 1.0607436910306827, + "learning_rate": 4.889070853368136e-06, + "loss": 0.5091, + "step": 9468 + }, + { + "epoch": 0.5766221112565844, + "grad_norm": 0.9931317746305519, + "learning_rate": 4.889047346296951e-06, + "loss": 0.4342, + "step": 9469 + }, + { + "epoch": 0.5766830070334622, + "grad_norm": 1.037751904395643, + "learning_rate": 4.8890238367918544e-06, + "loss": 0.3941, + "step": 9470 + }, + { + "epoch": 0.57674390281034, + "grad_norm": 0.9982655460030023, + "learning_rate": 4.889000324852869e-06, + "loss": 0.4624, + "step": 9471 + }, + { + "epoch": 0.576804798587218, + "grad_norm": 1.0241172467201318, + "learning_rate": 4.888976810480021e-06, + "loss": 0.4279, + "step": 9472 + }, + { + "epoch": 0.5768656943640958, + "grad_norm": 0.9874885288221167, + "learning_rate": 4.888953293673332e-06, + "loss": 0.3917, + "step": 9473 + }, + { + "epoch": 0.5769265901409737, + "grad_norm": 0.9644727481963272, + "learning_rate": 4.888929774432828e-06, + "loss": 0.4289, + "step": 9474 + }, + { + "epoch": 0.5769874859178517, + "grad_norm": 1.0834286509653337, + "learning_rate": 4.888906252758531e-06, + "loss": 0.4696, + "step": 9475 + }, + { + "epoch": 0.5770483816947295, + "grad_norm": 0.9553082723188735, + "learning_rate": 4.888882728650467e-06, + "loss": 0.3981, + "step": 9476 + }, + { + "epoch": 0.5771092774716073, + "grad_norm": 1.0228748443589197, + "learning_rate": 4.888859202108658e-06, + "loss": 0.4506, + "step": 9477 + }, + { + "epoch": 0.5771701732484852, + "grad_norm": 0.9300157458403906, + "learning_rate": 4.88883567313313e-06, + "loss": 0.504, + "step": 9478 + }, + { + "epoch": 0.5772310690253631, + "grad_norm": 1.1160620842695181, + "learning_rate": 4.888812141723904e-06, + "loss": 0.4458, + "step": 9479 + }, + { + "epoch": 0.577291964802241, + "grad_norm": 1.0143260337135989, + "learning_rate": 4.888788607881008e-06, + "loss": 0.4829, + "step": 9480 + }, + { + "epoch": 0.5773528605791188, + "grad_norm": 1.0129376003685493, + "learning_rate": 4.888765071604464e-06, + "loss": 0.4496, + "step": 9481 + }, + { + "epoch": 0.5774137563559967, + "grad_norm": 1.0512172305647327, + "learning_rate": 4.888741532894294e-06, + "loss": 0.4025, + "step": 9482 + }, + { + "epoch": 0.5774746521328746, + "grad_norm": 1.056897816132524, + "learning_rate": 4.888717991750525e-06, + "loss": 0.3918, + "step": 9483 + }, + { + "epoch": 0.5775355479097525, + "grad_norm": 0.917425333692416, + "learning_rate": 4.888694448173179e-06, + "loss": 0.4676, + "step": 9484 + }, + { + "epoch": 0.5775964436866303, + "grad_norm": 1.0697161907723725, + "learning_rate": 4.8886709021622805e-06, + "loss": 0.4622, + "step": 9485 + }, + { + "epoch": 0.5776573394635082, + "grad_norm": 1.0384099376080302, + "learning_rate": 4.888647353717855e-06, + "loss": 0.43, + "step": 9486 + }, + { + "epoch": 0.5777182352403861, + "grad_norm": 1.043422185444454, + "learning_rate": 4.8886238028399245e-06, + "loss": 0.462, + "step": 9487 + }, + { + "epoch": 0.577779131017264, + "grad_norm": 0.9971323874274284, + "learning_rate": 4.8886002495285134e-06, + "loss": 0.4358, + "step": 9488 + }, + { + "epoch": 0.5778400267941418, + "grad_norm": 1.0520446056577373, + "learning_rate": 4.888576693783646e-06, + "loss": 0.4878, + "step": 9489 + }, + { + "epoch": 0.5779009225710197, + "grad_norm": 0.9257988638759518, + "learning_rate": 4.888553135605347e-06, + "loss": 0.4403, + "step": 9490 + }, + { + "epoch": 0.5779618183478976, + "grad_norm": 0.9149574988309309, + "learning_rate": 4.88852957499364e-06, + "loss": 0.5187, + "step": 9491 + }, + { + "epoch": 0.5780227141247755, + "grad_norm": 1.001656157410161, + "learning_rate": 4.888506011948549e-06, + "loss": 0.3761, + "step": 9492 + }, + { + "epoch": 0.5780836099016533, + "grad_norm": 0.9687731795536906, + "learning_rate": 4.8884824464700964e-06, + "loss": 0.4584, + "step": 9493 + }, + { + "epoch": 0.5781445056785312, + "grad_norm": 0.9659815967917587, + "learning_rate": 4.888458878558307e-06, + "loss": 0.4634, + "step": 9494 + }, + { + "epoch": 0.5782054014554091, + "grad_norm": 0.9805757204639726, + "learning_rate": 4.888435308213208e-06, + "loss": 0.4657, + "step": 9495 + }, + { + "epoch": 0.578266297232287, + "grad_norm": 0.9428449075345413, + "learning_rate": 4.888411735434818e-06, + "loss": 0.4593, + "step": 9496 + }, + { + "epoch": 0.5783271930091648, + "grad_norm": 1.0640704086901103, + "learning_rate": 4.888388160223165e-06, + "loss": 0.423, + "step": 9497 + }, + { + "epoch": 0.5783880887860426, + "grad_norm": 1.0432179386128113, + "learning_rate": 4.888364582578272e-06, + "loss": 0.474, + "step": 9498 + }, + { + "epoch": 0.5784489845629206, + "grad_norm": 1.0016205227680535, + "learning_rate": 4.888341002500163e-06, + "loss": 0.425, + "step": 9499 + }, + { + "epoch": 0.5785098803397984, + "grad_norm": 1.0740850120105498, + "learning_rate": 4.888317419988862e-06, + "loss": 0.3995, + "step": 9500 + }, + { + "epoch": 0.5785707761166763, + "grad_norm": 1.076620694708439, + "learning_rate": 4.888293835044392e-06, + "loss": 0.4826, + "step": 9501 + }, + { + "epoch": 0.5786316718935541, + "grad_norm": 1.0185884666105, + "learning_rate": 4.888270247666778e-06, + "loss": 0.4439, + "step": 9502 + }, + { + "epoch": 0.5786925676704321, + "grad_norm": 0.9794383150125585, + "learning_rate": 4.888246657856044e-06, + "loss": 0.458, + "step": 9503 + }, + { + "epoch": 0.5787534634473099, + "grad_norm": 1.0801181394012376, + "learning_rate": 4.8882230656122144e-06, + "loss": 0.4787, + "step": 9504 + }, + { + "epoch": 0.5788143592241878, + "grad_norm": 0.9407676170505936, + "learning_rate": 4.888199470935312e-06, + "loss": 0.4921, + "step": 9505 + }, + { + "epoch": 0.5788752550010656, + "grad_norm": 1.0332461650870075, + "learning_rate": 4.8881758738253626e-06, + "loss": 0.4127, + "step": 9506 + }, + { + "epoch": 0.5789361507779436, + "grad_norm": 1.0797638969464967, + "learning_rate": 4.8881522742823884e-06, + "loss": 0.5054, + "step": 9507 + }, + { + "epoch": 0.5789970465548214, + "grad_norm": 1.0543612909849747, + "learning_rate": 4.888128672306415e-06, + "loss": 0.4374, + "step": 9508 + }, + { + "epoch": 0.5790579423316993, + "grad_norm": 0.9819219838000429, + "learning_rate": 4.888105067897465e-06, + "loss": 0.48, + "step": 9509 + }, + { + "epoch": 0.5791188381085771, + "grad_norm": 0.9791414292322748, + "learning_rate": 4.888081461055564e-06, + "loss": 0.4411, + "step": 9510 + }, + { + "epoch": 0.5791797338854551, + "grad_norm": 0.9624895705847591, + "learning_rate": 4.888057851780735e-06, + "loss": 0.4757, + "step": 9511 + }, + { + "epoch": 0.5792406296623329, + "grad_norm": 0.9709567227410253, + "learning_rate": 4.888034240073002e-06, + "loss": 0.463, + "step": 9512 + }, + { + "epoch": 0.5793015254392108, + "grad_norm": 0.9774875693706339, + "learning_rate": 4.88801062593239e-06, + "loss": 0.4097, + "step": 9513 + }, + { + "epoch": 0.5793624212160887, + "grad_norm": 0.9029218739915197, + "learning_rate": 4.887987009358922e-06, + "loss": 0.4583, + "step": 9514 + }, + { + "epoch": 0.5794233169929666, + "grad_norm": 1.0769981790961625, + "learning_rate": 4.887963390352622e-06, + "loss": 0.4226, + "step": 9515 + }, + { + "epoch": 0.5794842127698444, + "grad_norm": 0.9720077668050596, + "learning_rate": 4.887939768913515e-06, + "loss": 0.4411, + "step": 9516 + }, + { + "epoch": 0.5795451085467223, + "grad_norm": 1.0081241590202354, + "learning_rate": 4.887916145041625e-06, + "loss": 0.4438, + "step": 9517 + }, + { + "epoch": 0.5796060043236002, + "grad_norm": 0.9996729758498737, + "learning_rate": 4.887892518736975e-06, + "loss": 0.4461, + "step": 9518 + }, + { + "epoch": 0.579666900100478, + "grad_norm": 0.9280900925589571, + "learning_rate": 4.887868889999591e-06, + "loss": 0.4288, + "step": 9519 + }, + { + "epoch": 0.5797277958773559, + "grad_norm": 1.0060998435656472, + "learning_rate": 4.887845258829495e-06, + "loss": 0.4282, + "step": 9520 + }, + { + "epoch": 0.5797886916542337, + "grad_norm": 1.002095564860163, + "learning_rate": 4.887821625226711e-06, + "loss": 0.4328, + "step": 9521 + }, + { + "epoch": 0.5798495874311117, + "grad_norm": 1.0057760839827123, + "learning_rate": 4.887797989191265e-06, + "loss": 0.4354, + "step": 9522 + }, + { + "epoch": 0.5799104832079895, + "grad_norm": 1.0440420637322412, + "learning_rate": 4.88777435072318e-06, + "loss": 0.5175, + "step": 9523 + }, + { + "epoch": 0.5799713789848674, + "grad_norm": 1.0488222560248122, + "learning_rate": 4.88775070982248e-06, + "loss": 0.4601, + "step": 9524 + }, + { + "epoch": 0.5800322747617452, + "grad_norm": 1.0794966975942555, + "learning_rate": 4.887727066489189e-06, + "loss": 0.4573, + "step": 9525 + }, + { + "epoch": 0.5800931705386232, + "grad_norm": 0.9909137167984896, + "learning_rate": 4.887703420723331e-06, + "loss": 0.3785, + "step": 9526 + }, + { + "epoch": 0.580154066315501, + "grad_norm": 0.9232397476116864, + "learning_rate": 4.8876797725249314e-06, + "loss": 0.4442, + "step": 9527 + }, + { + "epoch": 0.5802149620923789, + "grad_norm": 1.0666333463349018, + "learning_rate": 4.887656121894013e-06, + "loss": 0.415, + "step": 9528 + }, + { + "epoch": 0.5802758578692567, + "grad_norm": 1.0143764913081055, + "learning_rate": 4.8876324688306e-06, + "loss": 0.4712, + "step": 9529 + }, + { + "epoch": 0.5803367536461347, + "grad_norm": 1.0567398244981496, + "learning_rate": 4.887608813334716e-06, + "loss": 0.3918, + "step": 9530 + }, + { + "epoch": 0.5803976494230125, + "grad_norm": 0.983807810388449, + "learning_rate": 4.887585155406387e-06, + "loss": 0.454, + "step": 9531 + }, + { + "epoch": 0.5804585451998904, + "grad_norm": 1.0624276329929798, + "learning_rate": 4.887561495045635e-06, + "loss": 0.4329, + "step": 9532 + }, + { + "epoch": 0.5805194409767682, + "grad_norm": 0.9372234521681235, + "learning_rate": 4.8875378322524855e-06, + "loss": 0.4914, + "step": 9533 + }, + { + "epoch": 0.5805803367536462, + "grad_norm": 1.0806131694848602, + "learning_rate": 4.887514167026962e-06, + "loss": 0.3945, + "step": 9534 + }, + { + "epoch": 0.580641232530524, + "grad_norm": 0.9351622616054343, + "learning_rate": 4.887490499369088e-06, + "loss": 0.3948, + "step": 9535 + }, + { + "epoch": 0.5807021283074019, + "grad_norm": 1.0340915101160828, + "learning_rate": 4.88746682927889e-06, + "loss": 0.4743, + "step": 9536 + }, + { + "epoch": 0.5807630240842797, + "grad_norm": 1.127958064296269, + "learning_rate": 4.887443156756389e-06, + "loss": 0.3857, + "step": 9537 + }, + { + "epoch": 0.5808239198611577, + "grad_norm": 0.9694755436024614, + "learning_rate": 4.887419481801611e-06, + "loss": 0.4697, + "step": 9538 + }, + { + "epoch": 0.5808848156380355, + "grad_norm": 0.9290389066929521, + "learning_rate": 4.887395804414579e-06, + "loss": 0.491, + "step": 9539 + }, + { + "epoch": 0.5809457114149134, + "grad_norm": 1.097065193468815, + "learning_rate": 4.8873721245953186e-06, + "loss": 0.4456, + "step": 9540 + }, + { + "epoch": 0.5810066071917912, + "grad_norm": 1.0535759038538592, + "learning_rate": 4.887348442343853e-06, + "loss": 0.3827, + "step": 9541 + }, + { + "epoch": 0.5810675029686692, + "grad_norm": 0.9755258730070128, + "learning_rate": 4.887324757660206e-06, + "loss": 0.4435, + "step": 9542 + }, + { + "epoch": 0.581128398745547, + "grad_norm": 0.9730400639435711, + "learning_rate": 4.887301070544402e-06, + "loss": 0.434, + "step": 9543 + }, + { + "epoch": 0.5811892945224248, + "grad_norm": 1.0037514054470107, + "learning_rate": 4.887277380996466e-06, + "loss": 0.4467, + "step": 9544 + }, + { + "epoch": 0.5812501902993027, + "grad_norm": 0.9345942853172606, + "learning_rate": 4.887253689016421e-06, + "loss": 0.4335, + "step": 9545 + }, + { + "epoch": 0.5813110860761806, + "grad_norm": 0.9927159848734683, + "learning_rate": 4.8872299946042925e-06, + "loss": 0.3988, + "step": 9546 + }, + { + "epoch": 0.5813719818530585, + "grad_norm": 1.0194467236907447, + "learning_rate": 4.8872062977601035e-06, + "loss": 0.4659, + "step": 9547 + }, + { + "epoch": 0.5814328776299363, + "grad_norm": 1.0287216969242896, + "learning_rate": 4.887182598483877e-06, + "loss": 0.4184, + "step": 9548 + }, + { + "epoch": 0.5814937734068142, + "grad_norm": 0.9120354007475737, + "learning_rate": 4.8871588967756395e-06, + "loss": 0.5385, + "step": 9549 + }, + { + "epoch": 0.5815546691836921, + "grad_norm": 1.0393554320328393, + "learning_rate": 4.8871351926354136e-06, + "loss": 0.3953, + "step": 9550 + }, + { + "epoch": 0.58161556496057, + "grad_norm": 0.9982551893708046, + "learning_rate": 4.887111486063225e-06, + "loss": 0.4171, + "step": 9551 + }, + { + "epoch": 0.5816764607374478, + "grad_norm": 1.073255309850338, + "learning_rate": 4.887087777059096e-06, + "loss": 0.4838, + "step": 9552 + }, + { + "epoch": 0.5817373565143257, + "grad_norm": 0.9923198724109161, + "learning_rate": 4.887064065623052e-06, + "loss": 0.4039, + "step": 9553 + }, + { + "epoch": 0.5817982522912036, + "grad_norm": 0.9776123942166841, + "learning_rate": 4.887040351755117e-06, + "loss": 0.4526, + "step": 9554 + }, + { + "epoch": 0.5818591480680815, + "grad_norm": 1.0003459508299586, + "learning_rate": 4.8870166354553135e-06, + "loss": 0.4167, + "step": 9555 + }, + { + "epoch": 0.5819200438449593, + "grad_norm": 1.1116296082811943, + "learning_rate": 4.886992916723669e-06, + "loss": 0.4353, + "step": 9556 + }, + { + "epoch": 0.5819809396218373, + "grad_norm": 0.9747919158964375, + "learning_rate": 4.886969195560205e-06, + "loss": 0.4605, + "step": 9557 + }, + { + "epoch": 0.5820418353987151, + "grad_norm": 1.1190247108470268, + "learning_rate": 4.886945471964946e-06, + "loss": 0.4234, + "step": 9558 + }, + { + "epoch": 0.582102731175593, + "grad_norm": 0.9337703438506001, + "learning_rate": 4.886921745937916e-06, + "loss": 0.4744, + "step": 9559 + }, + { + "epoch": 0.5821636269524708, + "grad_norm": 0.9108307081686399, + "learning_rate": 4.886898017479142e-06, + "loss": 0.5216, + "step": 9560 + }, + { + "epoch": 0.5822245227293488, + "grad_norm": 1.0261059720426948, + "learning_rate": 4.886874286588644e-06, + "loss": 0.5385, + "step": 9561 + }, + { + "epoch": 0.5822854185062266, + "grad_norm": 0.9593046134320602, + "learning_rate": 4.886850553266448e-06, + "loss": 0.4085, + "step": 9562 + }, + { + "epoch": 0.5823463142831045, + "grad_norm": 0.9880067290546138, + "learning_rate": 4.886826817512579e-06, + "loss": 0.4885, + "step": 9563 + }, + { + "epoch": 0.5824072100599823, + "grad_norm": 1.0444584566497375, + "learning_rate": 4.886803079327061e-06, + "loss": 0.4179, + "step": 9564 + }, + { + "epoch": 0.5824681058368603, + "grad_norm": 0.9797967990288933, + "learning_rate": 4.886779338709917e-06, + "loss": 0.4405, + "step": 9565 + }, + { + "epoch": 0.5825290016137381, + "grad_norm": 0.9537196417611215, + "learning_rate": 4.886755595661171e-06, + "loss": 0.4458, + "step": 9566 + }, + { + "epoch": 0.582589897390616, + "grad_norm": 0.9883574325452839, + "learning_rate": 4.8867318501808494e-06, + "loss": 0.4199, + "step": 9567 + }, + { + "epoch": 0.5826507931674938, + "grad_norm": 1.0021804789867692, + "learning_rate": 4.8867081022689746e-06, + "loss": 0.4024, + "step": 9568 + }, + { + "epoch": 0.5827116889443718, + "grad_norm": 0.9851796709648275, + "learning_rate": 4.8866843519255705e-06, + "loss": 0.4782, + "step": 9569 + }, + { + "epoch": 0.5827725847212496, + "grad_norm": 0.9170318570545717, + "learning_rate": 4.886660599150663e-06, + "loss": 0.458, + "step": 9570 + }, + { + "epoch": 0.5828334804981274, + "grad_norm": 0.9994776360150854, + "learning_rate": 4.886636843944275e-06, + "loss": 0.466, + "step": 9571 + }, + { + "epoch": 0.5828943762750053, + "grad_norm": 1.00214746466086, + "learning_rate": 4.886613086306431e-06, + "loss": 0.4274, + "step": 9572 + }, + { + "epoch": 0.5829552720518832, + "grad_norm": 0.9572418614097293, + "learning_rate": 4.886589326237154e-06, + "loss": 0.4308, + "step": 9573 + }, + { + "epoch": 0.5830161678287611, + "grad_norm": 0.9893843758324284, + "learning_rate": 4.886565563736471e-06, + "loss": 0.4253, + "step": 9574 + }, + { + "epoch": 0.5830770636056389, + "grad_norm": 0.9133811373155678, + "learning_rate": 4.886541798804404e-06, + "loss": 0.4627, + "step": 9575 + }, + { + "epoch": 0.5831379593825168, + "grad_norm": 1.0210001029552174, + "learning_rate": 4.8865180314409785e-06, + "loss": 0.4677, + "step": 9576 + }, + { + "epoch": 0.5831988551593947, + "grad_norm": 0.9245696754272876, + "learning_rate": 4.8864942616462175e-06, + "loss": 0.5014, + "step": 9577 + }, + { + "epoch": 0.5832597509362726, + "grad_norm": 0.9701413669447793, + "learning_rate": 4.886470489420145e-06, + "loss": 0.4455, + "step": 9578 + }, + { + "epoch": 0.5833206467131504, + "grad_norm": 1.0088749702322481, + "learning_rate": 4.886446714762787e-06, + "loss": 0.459, + "step": 9579 + }, + { + "epoch": 0.5833815424900283, + "grad_norm": 1.0397868789322866, + "learning_rate": 4.886422937674167e-06, + "loss": 0.4353, + "step": 9580 + }, + { + "epoch": 0.5834424382669062, + "grad_norm": 1.0594519584135944, + "learning_rate": 4.886399158154308e-06, + "loss": 0.4594, + "step": 9581 + }, + { + "epoch": 0.5835033340437841, + "grad_norm": 1.0689908354148103, + "learning_rate": 4.886375376203235e-06, + "loss": 0.4235, + "step": 9582 + }, + { + "epoch": 0.5835642298206619, + "grad_norm": 0.993476873484271, + "learning_rate": 4.886351591820974e-06, + "loss": 0.4525, + "step": 9583 + }, + { + "epoch": 0.5836251255975398, + "grad_norm": 1.046777871712002, + "learning_rate": 4.8863278050075455e-06, + "loss": 0.4315, + "step": 9584 + }, + { + "epoch": 0.5836860213744177, + "grad_norm": 0.9589599888740887, + "learning_rate": 4.886304015762977e-06, + "loss": 0.4639, + "step": 9585 + }, + { + "epoch": 0.5837469171512956, + "grad_norm": 0.9442356977439631, + "learning_rate": 4.886280224087292e-06, + "loss": 0.453, + "step": 9586 + }, + { + "epoch": 0.5838078129281734, + "grad_norm": 1.0335732686731733, + "learning_rate": 4.886256429980514e-06, + "loss": 0.4687, + "step": 9587 + }, + { + "epoch": 0.5838687087050513, + "grad_norm": 0.9702814197150169, + "learning_rate": 4.886232633442667e-06, + "loss": 0.503, + "step": 9588 + }, + { + "epoch": 0.5839296044819292, + "grad_norm": 0.9872842390038018, + "learning_rate": 4.8862088344737756e-06, + "loss": 0.4497, + "step": 9589 + }, + { + "epoch": 0.583990500258807, + "grad_norm": 0.9689165837624784, + "learning_rate": 4.886185033073866e-06, + "loss": 0.4211, + "step": 9590 + }, + { + "epoch": 0.5840513960356849, + "grad_norm": 1.0070726685257505, + "learning_rate": 4.8861612292429585e-06, + "loss": 0.4356, + "step": 9591 + }, + { + "epoch": 0.5841122918125627, + "grad_norm": 0.9511086733903529, + "learning_rate": 4.886137422981081e-06, + "loss": 0.3899, + "step": 9592 + }, + { + "epoch": 0.5841731875894407, + "grad_norm": 1.0240660014905054, + "learning_rate": 4.886113614288256e-06, + "loss": 0.4864, + "step": 9593 + }, + { + "epoch": 0.5842340833663185, + "grad_norm": 1.0211031794119292, + "learning_rate": 4.8860898031645074e-06, + "loss": 0.3849, + "step": 9594 + }, + { + "epoch": 0.5842949791431964, + "grad_norm": 1.0102212425740509, + "learning_rate": 4.88606598960986e-06, + "loss": 0.4687, + "step": 9595 + }, + { + "epoch": 0.5843558749200743, + "grad_norm": 0.9717321933447369, + "learning_rate": 4.886042173624339e-06, + "loss": 0.431, + "step": 9596 + }, + { + "epoch": 0.5844167706969522, + "grad_norm": 1.079139926806279, + "learning_rate": 4.886018355207968e-06, + "loss": 0.4443, + "step": 9597 + }, + { + "epoch": 0.58447766647383, + "grad_norm": 1.1126793711356269, + "learning_rate": 4.885994534360771e-06, + "loss": 0.4016, + "step": 9598 + }, + { + "epoch": 0.5845385622507079, + "grad_norm": 0.9808864635692627, + "learning_rate": 4.885970711082772e-06, + "loss": 0.4903, + "step": 9599 + }, + { + "epoch": 0.5845994580275858, + "grad_norm": 1.0050627380454258, + "learning_rate": 4.885946885373995e-06, + "loss": 0.4636, + "step": 9600 + }, + { + "epoch": 0.5846603538044637, + "grad_norm": 0.9990014653736038, + "learning_rate": 4.8859230572344665e-06, + "loss": 0.416, + "step": 9601 + }, + { + "epoch": 0.5847212495813415, + "grad_norm": 1.0300965525544938, + "learning_rate": 4.885899226664208e-06, + "loss": 0.4028, + "step": 9602 + }, + { + "epoch": 0.5847821453582194, + "grad_norm": 0.9852467912177515, + "learning_rate": 4.8858753936632455e-06, + "loss": 0.4116, + "step": 9603 + }, + { + "epoch": 0.5848430411350973, + "grad_norm": 1.0280910187920924, + "learning_rate": 4.8858515582316024e-06, + "loss": 0.5167, + "step": 9604 + }, + { + "epoch": 0.5849039369119752, + "grad_norm": 1.0067687670759042, + "learning_rate": 4.8858277203693035e-06, + "loss": 0.4962, + "step": 9605 + }, + { + "epoch": 0.584964832688853, + "grad_norm": 1.1273177570201933, + "learning_rate": 4.885803880076373e-06, + "loss": 0.4008, + "step": 9606 + }, + { + "epoch": 0.5850257284657309, + "grad_norm": 0.9783042001717102, + "learning_rate": 4.885780037352835e-06, + "loss": 0.4141, + "step": 9607 + }, + { + "epoch": 0.5850866242426088, + "grad_norm": 1.0138668284709125, + "learning_rate": 4.885756192198714e-06, + "loss": 0.4132, + "step": 9608 + }, + { + "epoch": 0.5851475200194867, + "grad_norm": 1.0643321949045874, + "learning_rate": 4.885732344614035e-06, + "loss": 0.3962, + "step": 9609 + }, + { + "epoch": 0.5852084157963645, + "grad_norm": 1.056570315281462, + "learning_rate": 4.8857084945988194e-06, + "loss": 0.3751, + "step": 9610 + }, + { + "epoch": 0.5852693115732424, + "grad_norm": 1.0021464866958703, + "learning_rate": 4.885684642153096e-06, + "loss": 0.4753, + "step": 9611 + }, + { + "epoch": 0.5853302073501203, + "grad_norm": 0.9952785684719744, + "learning_rate": 4.885660787276885e-06, + "loss": 0.4409, + "step": 9612 + }, + { + "epoch": 0.5853911031269982, + "grad_norm": 0.9771974376178882, + "learning_rate": 4.885636929970213e-06, + "loss": 0.4357, + "step": 9613 + }, + { + "epoch": 0.585451998903876, + "grad_norm": 0.9707089059971216, + "learning_rate": 4.885613070233103e-06, + "loss": 0.425, + "step": 9614 + }, + { + "epoch": 0.5855128946807538, + "grad_norm": 1.0127092309146852, + "learning_rate": 4.885589208065581e-06, + "loss": 0.4221, + "step": 9615 + }, + { + "epoch": 0.5855737904576318, + "grad_norm": 0.9711667141895214, + "learning_rate": 4.885565343467669e-06, + "loss": 0.3996, + "step": 9616 + }, + { + "epoch": 0.5856346862345096, + "grad_norm": 1.0244948811501553, + "learning_rate": 4.885541476439394e-06, + "loss": 0.4233, + "step": 9617 + }, + { + "epoch": 0.5856955820113875, + "grad_norm": 0.9841156736712181, + "learning_rate": 4.885517606980778e-06, + "loss": 0.4266, + "step": 9618 + }, + { + "epoch": 0.5857564777882653, + "grad_norm": 1.007418557910247, + "learning_rate": 4.885493735091847e-06, + "loss": 0.3648, + "step": 9619 + }, + { + "epoch": 0.5858173735651433, + "grad_norm": 0.9713623148568125, + "learning_rate": 4.885469860772625e-06, + "loss": 0.4405, + "step": 9620 + }, + { + "epoch": 0.5858782693420211, + "grad_norm": 1.0445823649404093, + "learning_rate": 4.885445984023135e-06, + "loss": 0.4477, + "step": 9621 + }, + { + "epoch": 0.585939165118899, + "grad_norm": 1.0749275684969615, + "learning_rate": 4.885422104843402e-06, + "loss": 0.3282, + "step": 9622 + }, + { + "epoch": 0.5860000608957768, + "grad_norm": 1.0468984247309843, + "learning_rate": 4.8853982232334515e-06, + "loss": 0.4186, + "step": 9623 + }, + { + "epoch": 0.5860609566726548, + "grad_norm": 1.0128901108310606, + "learning_rate": 4.8853743391933065e-06, + "loss": 0.4323, + "step": 9624 + }, + { + "epoch": 0.5861218524495326, + "grad_norm": 0.9882272146382319, + "learning_rate": 4.885350452722991e-06, + "loss": 0.4305, + "step": 9625 + }, + { + "epoch": 0.5861827482264105, + "grad_norm": 0.9675271212804571, + "learning_rate": 4.885326563822531e-06, + "loss": 0.4446, + "step": 9626 + }, + { + "epoch": 0.5862436440032883, + "grad_norm": 1.1071311287057548, + "learning_rate": 4.885302672491949e-06, + "loss": 0.3814, + "step": 9627 + }, + { + "epoch": 0.5863045397801663, + "grad_norm": 1.0496182655690944, + "learning_rate": 4.885278778731272e-06, + "loss": 0.4634, + "step": 9628 + }, + { + "epoch": 0.5863654355570441, + "grad_norm": 1.0034680037553634, + "learning_rate": 4.8852548825405214e-06, + "loss": 0.4273, + "step": 9629 + }, + { + "epoch": 0.586426331333922, + "grad_norm": 1.0672315212969967, + "learning_rate": 4.8852309839197224e-06, + "loss": 0.4836, + "step": 9630 + }, + { + "epoch": 0.5864872271107998, + "grad_norm": 1.0009049933308662, + "learning_rate": 4.8852070828688994e-06, + "loss": 0.4987, + "step": 9631 + }, + { + "epoch": 0.5865481228876778, + "grad_norm": 1.036278186885521, + "learning_rate": 4.885183179388078e-06, + "loss": 0.3797, + "step": 9632 + }, + { + "epoch": 0.5866090186645556, + "grad_norm": 0.9782240540881616, + "learning_rate": 4.88515927347728e-06, + "loss": 0.4375, + "step": 9633 + }, + { + "epoch": 0.5866699144414335, + "grad_norm": 0.982997798935774, + "learning_rate": 4.885135365136533e-06, + "loss": 0.4776, + "step": 9634 + }, + { + "epoch": 0.5867308102183113, + "grad_norm": 1.0563291361188238, + "learning_rate": 4.885111454365859e-06, + "loss": 0.3741, + "step": 9635 + }, + { + "epoch": 0.5867917059951893, + "grad_norm": 0.9653301663878798, + "learning_rate": 4.885087541165283e-06, + "loss": 0.4679, + "step": 9636 + }, + { + "epoch": 0.5868526017720671, + "grad_norm": 1.0000764511716806, + "learning_rate": 4.88506362553483e-06, + "loss": 0.4029, + "step": 9637 + }, + { + "epoch": 0.586913497548945, + "grad_norm": 1.0228157231428059, + "learning_rate": 4.885039707474523e-06, + "loss": 0.4199, + "step": 9638 + }, + { + "epoch": 0.5869743933258229, + "grad_norm": 1.0200035340964764, + "learning_rate": 4.885015786984387e-06, + "loss": 0.4147, + "step": 9639 + }, + { + "epoch": 0.5870352891027008, + "grad_norm": 0.9759766974238483, + "learning_rate": 4.884991864064448e-06, + "loss": 0.4959, + "step": 9640 + }, + { + "epoch": 0.5870961848795786, + "grad_norm": 0.9339991283380894, + "learning_rate": 4.8849679387147274e-06, + "loss": 0.4758, + "step": 9641 + }, + { + "epoch": 0.5871570806564564, + "grad_norm": 1.0058808570627156, + "learning_rate": 4.884944010935251e-06, + "loss": 0.453, + "step": 9642 + }, + { + "epoch": 0.5872179764333344, + "grad_norm": 1.0836902339006176, + "learning_rate": 4.884920080726044e-06, + "loss": 0.3824, + "step": 9643 + }, + { + "epoch": 0.5872788722102122, + "grad_norm": 1.0229798822410292, + "learning_rate": 4.88489614808713e-06, + "loss": 0.4774, + "step": 9644 + }, + { + "epoch": 0.5873397679870901, + "grad_norm": 0.9814107567927879, + "learning_rate": 4.8848722130185326e-06, + "loss": 0.4004, + "step": 9645 + }, + { + "epoch": 0.5874006637639679, + "grad_norm": 1.09989209829444, + "learning_rate": 4.884848275520277e-06, + "loss": 0.462, + "step": 9646 + }, + { + "epoch": 0.5874615595408459, + "grad_norm": 0.9631198031152383, + "learning_rate": 4.884824335592389e-06, + "loss": 0.4381, + "step": 9647 + }, + { + "epoch": 0.5875224553177237, + "grad_norm": 0.9540722750362942, + "learning_rate": 4.88480039323489e-06, + "loss": 0.3974, + "step": 9648 + }, + { + "epoch": 0.5875833510946016, + "grad_norm": 1.066946163836782, + "learning_rate": 4.884776448447806e-06, + "loss": 0.4114, + "step": 9649 + }, + { + "epoch": 0.5876442468714794, + "grad_norm": 1.0459779040755035, + "learning_rate": 4.884752501231163e-06, + "loss": 0.4109, + "step": 9650 + }, + { + "epoch": 0.5877051426483574, + "grad_norm": 1.0397564155806067, + "learning_rate": 4.884728551584982e-06, + "loss": 0.447, + "step": 9651 + }, + { + "epoch": 0.5877660384252352, + "grad_norm": 1.0737769351183941, + "learning_rate": 4.88470459950929e-06, + "loss": 0.3891, + "step": 9652 + }, + { + "epoch": 0.5878269342021131, + "grad_norm": 1.0157245122395637, + "learning_rate": 4.88468064500411e-06, + "loss": 0.4533, + "step": 9653 + }, + { + "epoch": 0.5878878299789909, + "grad_norm": 1.0378477071902692, + "learning_rate": 4.8846566880694665e-06, + "loss": 0.3639, + "step": 9654 + }, + { + "epoch": 0.5879487257558689, + "grad_norm": 0.9707491852164921, + "learning_rate": 4.884632728705386e-06, + "loss": 0.3999, + "step": 9655 + }, + { + "epoch": 0.5880096215327467, + "grad_norm": 1.012759715195501, + "learning_rate": 4.88460876691189e-06, + "loss": 0.4616, + "step": 9656 + }, + { + "epoch": 0.5880705173096246, + "grad_norm": 0.9234896163860435, + "learning_rate": 4.884584802689004e-06, + "loss": 0.4709, + "step": 9657 + }, + { + "epoch": 0.5881314130865024, + "grad_norm": 1.0034254713853277, + "learning_rate": 4.884560836036753e-06, + "loss": 0.4459, + "step": 9658 + }, + { + "epoch": 0.5881923088633804, + "grad_norm": 1.1420029658487076, + "learning_rate": 4.884536866955161e-06, + "loss": 0.4169, + "step": 9659 + }, + { + "epoch": 0.5882532046402582, + "grad_norm": 1.0491362803757969, + "learning_rate": 4.884512895444253e-06, + "loss": 0.4125, + "step": 9660 + }, + { + "epoch": 0.588314100417136, + "grad_norm": 1.014159412775763, + "learning_rate": 4.884488921504052e-06, + "loss": 0.4946, + "step": 9661 + }, + { + "epoch": 0.5883749961940139, + "grad_norm": 0.9279050775731195, + "learning_rate": 4.884464945134584e-06, + "loss": 0.4608, + "step": 9662 + }, + { + "epoch": 0.5884358919708919, + "grad_norm": 0.9889455753958296, + "learning_rate": 4.884440966335871e-06, + "loss": 0.4902, + "step": 9663 + }, + { + "epoch": 0.5884967877477697, + "grad_norm": 1.0744273060283034, + "learning_rate": 4.884416985107941e-06, + "loss": 0.3743, + "step": 9664 + }, + { + "epoch": 0.5885576835246475, + "grad_norm": 1.0534841583396517, + "learning_rate": 4.884393001450816e-06, + "loss": 0.3996, + "step": 9665 + }, + { + "epoch": 0.5886185793015254, + "grad_norm": 0.9822992619669229, + "learning_rate": 4.884369015364521e-06, + "loss": 0.4648, + "step": 9666 + }, + { + "epoch": 0.5886794750784033, + "grad_norm": 0.9658268036907003, + "learning_rate": 4.88434502684908e-06, + "loss": 0.4888, + "step": 9667 + }, + { + "epoch": 0.5887403708552812, + "grad_norm": 1.0863854613745771, + "learning_rate": 4.884321035904518e-06, + "loss": 0.3806, + "step": 9668 + }, + { + "epoch": 0.588801266632159, + "grad_norm": 1.1211581958205767, + "learning_rate": 4.88429704253086e-06, + "loss": 0.4143, + "step": 9669 + }, + { + "epoch": 0.5888621624090369, + "grad_norm": 0.9851467401303592, + "learning_rate": 4.884273046728129e-06, + "loss": 0.4988, + "step": 9670 + }, + { + "epoch": 0.5889230581859148, + "grad_norm": 1.096790370969812, + "learning_rate": 4.88424904849635e-06, + "loss": 0.3903, + "step": 9671 + }, + { + "epoch": 0.5889839539627927, + "grad_norm": 1.0635668521204418, + "learning_rate": 4.884225047835548e-06, + "loss": 0.4413, + "step": 9672 + }, + { + "epoch": 0.5890448497396705, + "grad_norm": 1.0800605253035191, + "learning_rate": 4.884201044745747e-06, + "loss": 0.3936, + "step": 9673 + }, + { + "epoch": 0.5891057455165484, + "grad_norm": 1.08322355710681, + "learning_rate": 4.884177039226972e-06, + "loss": 0.3971, + "step": 9674 + }, + { + "epoch": 0.5891666412934263, + "grad_norm": 1.0295178231499635, + "learning_rate": 4.884153031279247e-06, + "loss": 0.4068, + "step": 9675 + }, + { + "epoch": 0.5892275370703042, + "grad_norm": 1.046566446321984, + "learning_rate": 4.884129020902596e-06, + "loss": 0.4025, + "step": 9676 + }, + { + "epoch": 0.589288432847182, + "grad_norm": 0.9809061187620245, + "learning_rate": 4.884105008097044e-06, + "loss": 0.4201, + "step": 9677 + }, + { + "epoch": 0.58934932862406, + "grad_norm": 1.0348468615119817, + "learning_rate": 4.884080992862615e-06, + "loss": 0.4159, + "step": 9678 + }, + { + "epoch": 0.5894102244009378, + "grad_norm": 1.0523757921392414, + "learning_rate": 4.884056975199335e-06, + "loss": 0.4131, + "step": 9679 + }, + { + "epoch": 0.5894711201778157, + "grad_norm": 1.0731639927652727, + "learning_rate": 4.884032955107226e-06, + "loss": 0.45, + "step": 9680 + }, + { + "epoch": 0.5895320159546935, + "grad_norm": 0.9913894445650062, + "learning_rate": 4.8840089325863145e-06, + "loss": 0.3971, + "step": 9681 + }, + { + "epoch": 0.5895929117315715, + "grad_norm": 0.9974842973015482, + "learning_rate": 4.883984907636624e-06, + "loss": 0.4347, + "step": 9682 + }, + { + "epoch": 0.5896538075084493, + "grad_norm": 0.9952319056240007, + "learning_rate": 4.88396088025818e-06, + "loss": 0.4618, + "step": 9683 + }, + { + "epoch": 0.5897147032853272, + "grad_norm": 0.8742949382194373, + "learning_rate": 4.883936850451005e-06, + "loss": 0.4671, + "step": 9684 + }, + { + "epoch": 0.589775599062205, + "grad_norm": 1.0312493755352374, + "learning_rate": 4.883912818215125e-06, + "loss": 0.381, + "step": 9685 + }, + { + "epoch": 0.589836494839083, + "grad_norm": 1.008316053221914, + "learning_rate": 4.883888783550564e-06, + "loss": 0.4867, + "step": 9686 + }, + { + "epoch": 0.5898973906159608, + "grad_norm": 1.0829066651228176, + "learning_rate": 4.8838647464573476e-06, + "loss": 0.4257, + "step": 9687 + }, + { + "epoch": 0.5899582863928386, + "grad_norm": 0.9722973993896697, + "learning_rate": 4.8838407069354986e-06, + "loss": 0.415, + "step": 9688 + }, + { + "epoch": 0.5900191821697165, + "grad_norm": 1.1239310070168629, + "learning_rate": 4.883816664985043e-06, + "loss": 0.4071, + "step": 9689 + }, + { + "epoch": 0.5900800779465945, + "grad_norm": 1.0208340731347625, + "learning_rate": 4.8837926206060034e-06, + "loss": 0.4993, + "step": 9690 + }, + { + "epoch": 0.5901409737234723, + "grad_norm": 0.9626090495413732, + "learning_rate": 4.883768573798407e-06, + "loss": 0.4292, + "step": 9691 + }, + { + "epoch": 0.5902018695003501, + "grad_norm": 0.9892050811172851, + "learning_rate": 4.883744524562276e-06, + "loss": 0.4415, + "step": 9692 + }, + { + "epoch": 0.590262765277228, + "grad_norm": 0.9576121414507093, + "learning_rate": 4.883720472897635e-06, + "loss": 0.4706, + "step": 9693 + }, + { + "epoch": 0.5903236610541059, + "grad_norm": 1.032146476403816, + "learning_rate": 4.88369641880451e-06, + "loss": 0.427, + "step": 9694 + }, + { + "epoch": 0.5903845568309838, + "grad_norm": 1.0988009811956738, + "learning_rate": 4.883672362282924e-06, + "loss": 0.4708, + "step": 9695 + }, + { + "epoch": 0.5904454526078616, + "grad_norm": 0.9752591899260711, + "learning_rate": 4.883648303332903e-06, + "loss": 0.4446, + "step": 9696 + }, + { + "epoch": 0.5905063483847395, + "grad_norm": 0.966733789677795, + "learning_rate": 4.88362424195447e-06, + "loss": 0.4279, + "step": 9697 + }, + { + "epoch": 0.5905672441616174, + "grad_norm": 0.9460786654514504, + "learning_rate": 4.88360017814765e-06, + "loss": 0.4349, + "step": 9698 + }, + { + "epoch": 0.5906281399384953, + "grad_norm": 1.0901058724035144, + "learning_rate": 4.883576111912468e-06, + "loss": 0.5122, + "step": 9699 + }, + { + "epoch": 0.5906890357153731, + "grad_norm": 0.9351529067466691, + "learning_rate": 4.883552043248948e-06, + "loss": 0.4166, + "step": 9700 + }, + { + "epoch": 0.590749931492251, + "grad_norm": 0.9396550795603561, + "learning_rate": 4.883527972157115e-06, + "loss": 0.4797, + "step": 9701 + }, + { + "epoch": 0.5908108272691289, + "grad_norm": 1.0731845552418269, + "learning_rate": 4.883503898636993e-06, + "loss": 0.329, + "step": 9702 + }, + { + "epoch": 0.5908717230460068, + "grad_norm": 0.9446268177867572, + "learning_rate": 4.883479822688607e-06, + "loss": 0.4488, + "step": 9703 + }, + { + "epoch": 0.5909326188228846, + "grad_norm": 0.920372451301502, + "learning_rate": 4.883455744311982e-06, + "loss": 0.5012, + "step": 9704 + }, + { + "epoch": 0.5909935145997625, + "grad_norm": 0.9628506063624375, + "learning_rate": 4.883431663507141e-06, + "loss": 0.4537, + "step": 9705 + }, + { + "epoch": 0.5910544103766404, + "grad_norm": 1.0556709235263773, + "learning_rate": 4.88340758027411e-06, + "loss": 0.4578, + "step": 9706 + }, + { + "epoch": 0.5911153061535183, + "grad_norm": 1.0206292955033143, + "learning_rate": 4.8833834946129115e-06, + "loss": 0.376, + "step": 9707 + }, + { + "epoch": 0.5911762019303961, + "grad_norm": 0.9318685347850197, + "learning_rate": 4.883359406523572e-06, + "loss": 0.4203, + "step": 9708 + }, + { + "epoch": 0.591237097707274, + "grad_norm": 1.0405734488763283, + "learning_rate": 4.8833353160061165e-06, + "loss": 0.3999, + "step": 9709 + }, + { + "epoch": 0.5912979934841519, + "grad_norm": 1.0611346980063765, + "learning_rate": 4.883311223060568e-06, + "loss": 0.4031, + "step": 9710 + }, + { + "epoch": 0.5913588892610298, + "grad_norm": 0.9714338550006842, + "learning_rate": 4.883287127686951e-06, + "loss": 0.5174, + "step": 9711 + }, + { + "epoch": 0.5914197850379076, + "grad_norm": 1.0270869219022765, + "learning_rate": 4.883263029885291e-06, + "loss": 0.4455, + "step": 9712 + }, + { + "epoch": 0.5914806808147854, + "grad_norm": 1.0071269984232882, + "learning_rate": 4.883238929655613e-06, + "loss": 0.436, + "step": 9713 + }, + { + "epoch": 0.5915415765916634, + "grad_norm": 1.0261310980464589, + "learning_rate": 4.88321482699794e-06, + "loss": 0.4479, + "step": 9714 + }, + { + "epoch": 0.5916024723685412, + "grad_norm": 0.9711954173966225, + "learning_rate": 4.883190721912297e-06, + "loss": 0.3757, + "step": 9715 + }, + { + "epoch": 0.5916633681454191, + "grad_norm": 1.022982023227119, + "learning_rate": 4.883166614398709e-06, + "loss": 0.41, + "step": 9716 + }, + { + "epoch": 0.5917242639222969, + "grad_norm": 1.047727458222814, + "learning_rate": 4.8831425044572e-06, + "loss": 0.3369, + "step": 9717 + }, + { + "epoch": 0.5917851596991749, + "grad_norm": 1.1340657772809188, + "learning_rate": 4.8831183920877955e-06, + "loss": 0.3761, + "step": 9718 + }, + { + "epoch": 0.5918460554760527, + "grad_norm": 1.057353698209581, + "learning_rate": 4.88309427729052e-06, + "loss": 0.4133, + "step": 9719 + }, + { + "epoch": 0.5919069512529306, + "grad_norm": 1.0284021955633182, + "learning_rate": 4.883070160065398e-06, + "loss": 0.435, + "step": 9720 + }, + { + "epoch": 0.5919678470298085, + "grad_norm": 0.9920368570037874, + "learning_rate": 4.8830460404124515e-06, + "loss": 0.452, + "step": 9721 + }, + { + "epoch": 0.5920287428066864, + "grad_norm": 1.0899576920677871, + "learning_rate": 4.883021918331709e-06, + "loss": 0.3878, + "step": 9722 + }, + { + "epoch": 0.5920896385835642, + "grad_norm": 1.1275466434742971, + "learning_rate": 4.882997793823193e-06, + "loss": 0.3944, + "step": 9723 + }, + { + "epoch": 0.5921505343604421, + "grad_norm": 1.015015082973947, + "learning_rate": 4.882973666886928e-06, + "loss": 0.4151, + "step": 9724 + }, + { + "epoch": 0.59221143013732, + "grad_norm": 0.9908116707871136, + "learning_rate": 4.882949537522939e-06, + "loss": 0.4269, + "step": 9725 + }, + { + "epoch": 0.5922723259141979, + "grad_norm": 1.0316560330464488, + "learning_rate": 4.88292540573125e-06, + "loss": 0.4056, + "step": 9726 + }, + { + "epoch": 0.5923332216910757, + "grad_norm": 0.9973277142374563, + "learning_rate": 4.882901271511887e-06, + "loss": 0.376, + "step": 9727 + }, + { + "epoch": 0.5923941174679536, + "grad_norm": 0.9977034106824456, + "learning_rate": 4.882877134864874e-06, + "loss": 0.4643, + "step": 9728 + }, + { + "epoch": 0.5924550132448315, + "grad_norm": 1.0213160513702757, + "learning_rate": 4.8828529957902344e-06, + "loss": 0.4136, + "step": 9729 + }, + { + "epoch": 0.5925159090217094, + "grad_norm": 1.0076794485200815, + "learning_rate": 4.8828288542879945e-06, + "loss": 0.4662, + "step": 9730 + }, + { + "epoch": 0.5925768047985872, + "grad_norm": 1.0306801977305657, + "learning_rate": 4.8828047103581785e-06, + "loss": 0.4613, + "step": 9731 + }, + { + "epoch": 0.592637700575465, + "grad_norm": 0.9251238034747933, + "learning_rate": 4.88278056400081e-06, + "loss": 0.4743, + "step": 9732 + }, + { + "epoch": 0.592698596352343, + "grad_norm": 1.0813571185355588, + "learning_rate": 4.882756415215914e-06, + "loss": 0.4053, + "step": 9733 + }, + { + "epoch": 0.5927594921292209, + "grad_norm": 0.9569792410923399, + "learning_rate": 4.882732264003515e-06, + "loss": 0.4273, + "step": 9734 + }, + { + "epoch": 0.5928203879060987, + "grad_norm": 1.00944099847808, + "learning_rate": 4.882708110363639e-06, + "loss": 0.5148, + "step": 9735 + }, + { + "epoch": 0.5928812836829765, + "grad_norm": 1.1201791339048195, + "learning_rate": 4.882683954296308e-06, + "loss": 0.4333, + "step": 9736 + }, + { + "epoch": 0.5929421794598545, + "grad_norm": 1.0092398897659467, + "learning_rate": 4.88265979580155e-06, + "loss": 0.4578, + "step": 9737 + }, + { + "epoch": 0.5930030752367323, + "grad_norm": 0.9786116392468198, + "learning_rate": 4.882635634879386e-06, + "loss": 0.4742, + "step": 9738 + }, + { + "epoch": 0.5930639710136102, + "grad_norm": 1.031263392336418, + "learning_rate": 4.882611471529843e-06, + "loss": 0.4147, + "step": 9739 + }, + { + "epoch": 0.593124866790488, + "grad_norm": 1.0034001014290148, + "learning_rate": 4.882587305752945e-06, + "loss": 0.458, + "step": 9740 + }, + { + "epoch": 0.593185762567366, + "grad_norm": 0.961522739532033, + "learning_rate": 4.882563137548717e-06, + "loss": 0.4195, + "step": 9741 + }, + { + "epoch": 0.5932466583442438, + "grad_norm": 0.9782021170651812, + "learning_rate": 4.882538966917183e-06, + "loss": 0.456, + "step": 9742 + }, + { + "epoch": 0.5933075541211217, + "grad_norm": 1.0934842354837233, + "learning_rate": 4.882514793858367e-06, + "loss": 0.3852, + "step": 9743 + }, + { + "epoch": 0.5933684498979995, + "grad_norm": 1.0080983128675267, + "learning_rate": 4.8824906183722954e-06, + "loss": 0.4501, + "step": 9744 + }, + { + "epoch": 0.5934293456748775, + "grad_norm": 1.081300190634941, + "learning_rate": 4.8824664404589916e-06, + "loss": 0.4005, + "step": 9745 + }, + { + "epoch": 0.5934902414517553, + "grad_norm": 1.024059830994691, + "learning_rate": 4.882442260118481e-06, + "loss": 0.3764, + "step": 9746 + }, + { + "epoch": 0.5935511372286332, + "grad_norm": 1.0261807247388144, + "learning_rate": 4.882418077350787e-06, + "loss": 0.4341, + "step": 9747 + }, + { + "epoch": 0.593612033005511, + "grad_norm": 1.0219368502561605, + "learning_rate": 4.882393892155936e-06, + "loss": 0.4253, + "step": 9748 + }, + { + "epoch": 0.593672928782389, + "grad_norm": 0.8911294903399883, + "learning_rate": 4.88236970453395e-06, + "loss": 0.4649, + "step": 9749 + }, + { + "epoch": 0.5937338245592668, + "grad_norm": 1.0967143814556828, + "learning_rate": 4.8823455144848564e-06, + "loss": 0.3522, + "step": 9750 + }, + { + "epoch": 0.5937947203361447, + "grad_norm": 1.0171770137002798, + "learning_rate": 4.882321322008679e-06, + "loss": 0.4102, + "step": 9751 + }, + { + "epoch": 0.5938556161130225, + "grad_norm": 1.0187260009911858, + "learning_rate": 4.882297127105441e-06, + "loss": 0.3833, + "step": 9752 + }, + { + "epoch": 0.5939165118899005, + "grad_norm": 1.0273685748141261, + "learning_rate": 4.882272929775169e-06, + "loss": 0.3807, + "step": 9753 + }, + { + "epoch": 0.5939774076667783, + "grad_norm": 1.0062049223455047, + "learning_rate": 4.882248730017887e-06, + "loss": 0.4757, + "step": 9754 + }, + { + "epoch": 0.5940383034436562, + "grad_norm": 1.0091535154758209, + "learning_rate": 4.882224527833619e-06, + "loss": 0.4235, + "step": 9755 + }, + { + "epoch": 0.594099199220534, + "grad_norm": 1.021921513134861, + "learning_rate": 4.88220032322239e-06, + "loss": 0.4215, + "step": 9756 + }, + { + "epoch": 0.594160094997412, + "grad_norm": 0.9827812420838841, + "learning_rate": 4.882176116184226e-06, + "loss": 0.3992, + "step": 9757 + }, + { + "epoch": 0.5942209907742898, + "grad_norm": 0.9558739956617124, + "learning_rate": 4.88215190671915e-06, + "loss": 0.5002, + "step": 9758 + }, + { + "epoch": 0.5942818865511676, + "grad_norm": 1.0858849275199804, + "learning_rate": 4.882127694827187e-06, + "loss": 0.4314, + "step": 9759 + }, + { + "epoch": 0.5943427823280456, + "grad_norm": 0.964025947068204, + "learning_rate": 4.882103480508361e-06, + "loss": 0.4776, + "step": 9760 + }, + { + "epoch": 0.5944036781049234, + "grad_norm": 1.0613844983191594, + "learning_rate": 4.882079263762699e-06, + "loss": 0.4071, + "step": 9761 + }, + { + "epoch": 0.5944645738818013, + "grad_norm": 0.996934056850432, + "learning_rate": 4.882055044590224e-06, + "loss": 0.4388, + "step": 9762 + }, + { + "epoch": 0.5945254696586791, + "grad_norm": 1.055844113251742, + "learning_rate": 4.882030822990959e-06, + "loss": 0.4472, + "step": 9763 + }, + { + "epoch": 0.5945863654355571, + "grad_norm": 1.0086478318276335, + "learning_rate": 4.882006598964933e-06, + "loss": 0.4027, + "step": 9764 + }, + { + "epoch": 0.5946472612124349, + "grad_norm": 0.9942230859181521, + "learning_rate": 4.881982372512166e-06, + "loss": 0.401, + "step": 9765 + }, + { + "epoch": 0.5947081569893128, + "grad_norm": 0.955996591128363, + "learning_rate": 4.8819581436326865e-06, + "loss": 0.416, + "step": 9766 + }, + { + "epoch": 0.5947690527661906, + "grad_norm": 0.9047310844349515, + "learning_rate": 4.881933912326517e-06, + "loss": 0.4389, + "step": 9767 + }, + { + "epoch": 0.5948299485430686, + "grad_norm": 0.9998021377121236, + "learning_rate": 4.881909678593682e-06, + "loss": 0.445, + "step": 9768 + }, + { + "epoch": 0.5948908443199464, + "grad_norm": 0.9872461514123612, + "learning_rate": 4.881885442434209e-06, + "loss": 0.4883, + "step": 9769 + }, + { + "epoch": 0.5949517400968243, + "grad_norm": 1.128134128276327, + "learning_rate": 4.881861203848119e-06, + "loss": 0.3994, + "step": 9770 + }, + { + "epoch": 0.5950126358737021, + "grad_norm": 1.0638039733724742, + "learning_rate": 4.881836962835438e-06, + "loss": 0.3514, + "step": 9771 + }, + { + "epoch": 0.5950735316505801, + "grad_norm": 0.9666976837990677, + "learning_rate": 4.881812719396192e-06, + "loss": 0.4176, + "step": 9772 + }, + { + "epoch": 0.5951344274274579, + "grad_norm": 1.0247860256265193, + "learning_rate": 4.881788473530404e-06, + "loss": 0.4741, + "step": 9773 + }, + { + "epoch": 0.5951953232043358, + "grad_norm": 0.9567973051418809, + "learning_rate": 4.881764225238101e-06, + "loss": 0.4633, + "step": 9774 + }, + { + "epoch": 0.5952562189812136, + "grad_norm": 1.0544415443275925, + "learning_rate": 4.881739974519304e-06, + "loss": 0.3756, + "step": 9775 + }, + { + "epoch": 0.5953171147580916, + "grad_norm": 0.9528082405950534, + "learning_rate": 4.88171572137404e-06, + "loss": 0.4674, + "step": 9776 + }, + { + "epoch": 0.5953780105349694, + "grad_norm": 1.0585204475473118, + "learning_rate": 4.881691465802335e-06, + "loss": 0.4067, + "step": 9777 + }, + { + "epoch": 0.5954389063118473, + "grad_norm": 0.9462414355348874, + "learning_rate": 4.8816672078042116e-06, + "loss": 0.4378, + "step": 9778 + }, + { + "epoch": 0.5954998020887251, + "grad_norm": 1.0576293720588952, + "learning_rate": 4.881642947379695e-06, + "loss": 0.5007, + "step": 9779 + }, + { + "epoch": 0.5955606978656031, + "grad_norm": 1.0046113189192918, + "learning_rate": 4.881618684528811e-06, + "loss": 0.4672, + "step": 9780 + }, + { + "epoch": 0.5956215936424809, + "grad_norm": 0.9928865034357476, + "learning_rate": 4.881594419251582e-06, + "loss": 0.4695, + "step": 9781 + }, + { + "epoch": 0.5956824894193588, + "grad_norm": 0.9617574232232546, + "learning_rate": 4.881570151548035e-06, + "loss": 0.3909, + "step": 9782 + }, + { + "epoch": 0.5957433851962366, + "grad_norm": 0.9673513979824221, + "learning_rate": 4.881545881418193e-06, + "loss": 0.4533, + "step": 9783 + }, + { + "epoch": 0.5958042809731146, + "grad_norm": 1.0238662929068107, + "learning_rate": 4.881521608862082e-06, + "loss": 0.4082, + "step": 9784 + }, + { + "epoch": 0.5958651767499924, + "grad_norm": 1.0233041267377314, + "learning_rate": 4.881497333879727e-06, + "loss": 0.4108, + "step": 9785 + }, + { + "epoch": 0.5959260725268702, + "grad_norm": 0.980897592802786, + "learning_rate": 4.881473056471151e-06, + "loss": 0.4431, + "step": 9786 + }, + { + "epoch": 0.5959869683037481, + "grad_norm": 1.0564190214702227, + "learning_rate": 4.88144877663638e-06, + "loss": 0.3986, + "step": 9787 + }, + { + "epoch": 0.596047864080626, + "grad_norm": 1.0515868067497238, + "learning_rate": 4.881424494375439e-06, + "loss": 0.4963, + "step": 9788 + }, + { + "epoch": 0.5961087598575039, + "grad_norm": 1.1133724505862508, + "learning_rate": 4.881400209688352e-06, + "loss": 0.433, + "step": 9789 + }, + { + "epoch": 0.5961696556343817, + "grad_norm": 1.0092366499501382, + "learning_rate": 4.881375922575145e-06, + "loss": 0.4224, + "step": 9790 + }, + { + "epoch": 0.5962305514112596, + "grad_norm": 1.0352807151632633, + "learning_rate": 4.88135163303584e-06, + "loss": 0.4016, + "step": 9791 + }, + { + "epoch": 0.5962914471881375, + "grad_norm": 1.0419915316739743, + "learning_rate": 4.881327341070464e-06, + "loss": 0.3703, + "step": 9792 + }, + { + "epoch": 0.5963523429650154, + "grad_norm": 0.977875370514956, + "learning_rate": 4.881303046679041e-06, + "loss": 0.3999, + "step": 9793 + }, + { + "epoch": 0.5964132387418932, + "grad_norm": 0.958919773567644, + "learning_rate": 4.881278749861597e-06, + "loss": 0.4752, + "step": 9794 + }, + { + "epoch": 0.5964741345187711, + "grad_norm": 0.9800087699948364, + "learning_rate": 4.881254450618154e-06, + "loss": 0.4624, + "step": 9795 + }, + { + "epoch": 0.596535030295649, + "grad_norm": 0.9502233313475077, + "learning_rate": 4.88123014894874e-06, + "loss": 0.406, + "step": 9796 + }, + { + "epoch": 0.5965959260725269, + "grad_norm": 0.9817887719741865, + "learning_rate": 4.8812058448533785e-06, + "loss": 0.4981, + "step": 9797 + }, + { + "epoch": 0.5966568218494047, + "grad_norm": 0.9878741987021886, + "learning_rate": 4.8811815383320925e-06, + "loss": 0.4189, + "step": 9798 + }, + { + "epoch": 0.5967177176262826, + "grad_norm": 1.1469256334901603, + "learning_rate": 4.881157229384909e-06, + "loss": 0.3885, + "step": 9799 + }, + { + "epoch": 0.5967786134031605, + "grad_norm": 0.9365708185339667, + "learning_rate": 4.8811329180118525e-06, + "loss": 0.4159, + "step": 9800 + }, + { + "epoch": 0.5968395091800384, + "grad_norm": 0.939655461719101, + "learning_rate": 4.881108604212947e-06, + "loss": 0.4526, + "step": 9801 + }, + { + "epoch": 0.5969004049569162, + "grad_norm": 0.9563429281435447, + "learning_rate": 4.881084287988217e-06, + "loss": 0.5068, + "step": 9802 + }, + { + "epoch": 0.5969613007337942, + "grad_norm": 1.030925675806955, + "learning_rate": 4.881059969337688e-06, + "loss": 0.4721, + "step": 9803 + }, + { + "epoch": 0.597022196510672, + "grad_norm": 1.0402651631719706, + "learning_rate": 4.881035648261384e-06, + "loss": 0.4327, + "step": 9804 + }, + { + "epoch": 0.5970830922875499, + "grad_norm": 1.041153617379042, + "learning_rate": 4.8810113247593315e-06, + "loss": 0.5226, + "step": 9805 + }, + { + "epoch": 0.5971439880644277, + "grad_norm": 1.0836819736122332, + "learning_rate": 4.880986998831554e-06, + "loss": 0.4373, + "step": 9806 + }, + { + "epoch": 0.5972048838413057, + "grad_norm": 1.027748996228458, + "learning_rate": 4.880962670478076e-06, + "loss": 0.3974, + "step": 9807 + }, + { + "epoch": 0.5972657796181835, + "grad_norm": 0.9913504575667438, + "learning_rate": 4.880938339698924e-06, + "loss": 0.5515, + "step": 9808 + }, + { + "epoch": 0.5973266753950613, + "grad_norm": 1.029567634513626, + "learning_rate": 4.8809140064941196e-06, + "loss": 0.3967, + "step": 9809 + }, + { + "epoch": 0.5973875711719392, + "grad_norm": 0.9614103964866992, + "learning_rate": 4.880889670863691e-06, + "loss": 0.4526, + "step": 9810 + }, + { + "epoch": 0.5974484669488171, + "grad_norm": 1.0180431755030492, + "learning_rate": 4.8808653328076605e-06, + "loss": 0.4132, + "step": 9811 + }, + { + "epoch": 0.597509362725695, + "grad_norm": 0.9755488573305774, + "learning_rate": 4.880840992326055e-06, + "loss": 0.4639, + "step": 9812 + }, + { + "epoch": 0.5975702585025728, + "grad_norm": 1.0388016866453664, + "learning_rate": 4.880816649418897e-06, + "loss": 0.4791, + "step": 9813 + }, + { + "epoch": 0.5976311542794507, + "grad_norm": 1.0557172182610817, + "learning_rate": 4.8807923040862135e-06, + "loss": 0.3882, + "step": 9814 + }, + { + "epoch": 0.5976920500563286, + "grad_norm": 0.9565379912267681, + "learning_rate": 4.880767956328027e-06, + "loss": 0.4538, + "step": 9815 + }, + { + "epoch": 0.5977529458332065, + "grad_norm": 0.9636187304926181, + "learning_rate": 4.880743606144365e-06, + "loss": 0.4982, + "step": 9816 + }, + { + "epoch": 0.5978138416100843, + "grad_norm": 1.0021747726989465, + "learning_rate": 4.88071925353525e-06, + "loss": 0.4082, + "step": 9817 + }, + { + "epoch": 0.5978747373869622, + "grad_norm": 0.9291781253678632, + "learning_rate": 4.880694898500709e-06, + "loss": 0.4649, + "step": 9818 + }, + { + "epoch": 0.5979356331638401, + "grad_norm": 1.0216259736153015, + "learning_rate": 4.880670541040764e-06, + "loss": 0.4533, + "step": 9819 + }, + { + "epoch": 0.597996528940718, + "grad_norm": 0.9991481146189904, + "learning_rate": 4.880646181155442e-06, + "loss": 0.4484, + "step": 9820 + }, + { + "epoch": 0.5980574247175958, + "grad_norm": 1.069505205352075, + "learning_rate": 4.880621818844767e-06, + "loss": 0.4057, + "step": 9821 + }, + { + "epoch": 0.5981183204944737, + "grad_norm": 1.0641575142701016, + "learning_rate": 4.880597454108764e-06, + "loss": 0.464, + "step": 9822 + }, + { + "epoch": 0.5981792162713516, + "grad_norm": 0.9708372652981204, + "learning_rate": 4.8805730869474575e-06, + "loss": 0.4522, + "step": 9823 + }, + { + "epoch": 0.5982401120482295, + "grad_norm": 0.9990314804126853, + "learning_rate": 4.880548717360874e-06, + "loss": 0.4568, + "step": 9824 + }, + { + "epoch": 0.5983010078251073, + "grad_norm": 1.0448339710898413, + "learning_rate": 4.880524345349036e-06, + "loss": 0.4338, + "step": 9825 + }, + { + "epoch": 0.5983619036019852, + "grad_norm": 1.0233129166102464, + "learning_rate": 4.880499970911969e-06, + "loss": 0.3893, + "step": 9826 + }, + { + "epoch": 0.5984227993788631, + "grad_norm": 1.0414047001080011, + "learning_rate": 4.880475594049698e-06, + "loss": 0.4495, + "step": 9827 + }, + { + "epoch": 0.598483695155741, + "grad_norm": 1.0380219155404506, + "learning_rate": 4.880451214762249e-06, + "loss": 0.402, + "step": 9828 + }, + { + "epoch": 0.5985445909326188, + "grad_norm": 1.0099993881890783, + "learning_rate": 4.880426833049645e-06, + "loss": 0.4546, + "step": 9829 + }, + { + "epoch": 0.5986054867094966, + "grad_norm": 0.9933688215144763, + "learning_rate": 4.880402448911912e-06, + "loss": 0.4577, + "step": 9830 + }, + { + "epoch": 0.5986663824863746, + "grad_norm": 1.004306768632997, + "learning_rate": 4.8803780623490734e-06, + "loss": 0.4226, + "step": 9831 + }, + { + "epoch": 0.5987272782632524, + "grad_norm": 1.0140740729513753, + "learning_rate": 4.880353673361157e-06, + "loss": 0.4227, + "step": 9832 + }, + { + "epoch": 0.5987881740401303, + "grad_norm": 1.0479556183008456, + "learning_rate": 4.880329281948184e-06, + "loss": 0.3861, + "step": 9833 + }, + { + "epoch": 0.5988490698170081, + "grad_norm": 1.1123167226466035, + "learning_rate": 4.8803048881101825e-06, + "loss": 0.3916, + "step": 9834 + }, + { + "epoch": 0.5989099655938861, + "grad_norm": 1.0037017077177102, + "learning_rate": 4.8802804918471746e-06, + "loss": 0.4431, + "step": 9835 + }, + { + "epoch": 0.5989708613707639, + "grad_norm": 0.9583207631808922, + "learning_rate": 4.880256093159187e-06, + "loss": 0.4611, + "step": 9836 + }, + { + "epoch": 0.5990317571476418, + "grad_norm": 1.067034406625459, + "learning_rate": 4.880231692046244e-06, + "loss": 0.4776, + "step": 9837 + }, + { + "epoch": 0.5990926529245196, + "grad_norm": 0.9463256099204874, + "learning_rate": 4.88020728850837e-06, + "loss": 0.4106, + "step": 9838 + }, + { + "epoch": 0.5991535487013976, + "grad_norm": 0.9504231826466979, + "learning_rate": 4.880182882545591e-06, + "loss": 0.4633, + "step": 9839 + }, + { + "epoch": 0.5992144444782754, + "grad_norm": 0.9430634195649348, + "learning_rate": 4.880158474157931e-06, + "loss": 0.433, + "step": 9840 + }, + { + "epoch": 0.5992753402551533, + "grad_norm": 1.0782419298681312, + "learning_rate": 4.880134063345415e-06, + "loss": 0.4963, + "step": 9841 + }, + { + "epoch": 0.5993362360320312, + "grad_norm": 1.0378391864130183, + "learning_rate": 4.880109650108067e-06, + "loss": 0.451, + "step": 9842 + }, + { + "epoch": 0.5993971318089091, + "grad_norm": 1.0770519748951415, + "learning_rate": 4.880085234445913e-06, + "loss": 0.3952, + "step": 9843 + }, + { + "epoch": 0.5994580275857869, + "grad_norm": 0.9548532555685743, + "learning_rate": 4.880060816358979e-06, + "loss": 0.4286, + "step": 9844 + }, + { + "epoch": 0.5995189233626648, + "grad_norm": 1.0840081956179692, + "learning_rate": 4.880036395847288e-06, + "loss": 0.408, + "step": 9845 + }, + { + "epoch": 0.5995798191395427, + "grad_norm": 1.0604589188436184, + "learning_rate": 4.880011972910865e-06, + "loss": 0.4549, + "step": 9846 + }, + { + "epoch": 0.5996407149164206, + "grad_norm": 1.023069775395982, + "learning_rate": 4.879987547549735e-06, + "loss": 0.4152, + "step": 9847 + }, + { + "epoch": 0.5997016106932984, + "grad_norm": 1.01301607830543, + "learning_rate": 4.879963119763924e-06, + "loss": 0.4778, + "step": 9848 + }, + { + "epoch": 0.5997625064701763, + "grad_norm": 0.9691680257195163, + "learning_rate": 4.879938689553455e-06, + "loss": 0.4154, + "step": 9849 + }, + { + "epoch": 0.5998234022470542, + "grad_norm": 0.9605349690684272, + "learning_rate": 4.879914256918355e-06, + "loss": 0.4643, + "step": 9850 + }, + { + "epoch": 0.5998842980239321, + "grad_norm": 0.9836913389743651, + "learning_rate": 4.879889821858647e-06, + "loss": 0.3856, + "step": 9851 + }, + { + "epoch": 0.5999451938008099, + "grad_norm": 1.0497961953583186, + "learning_rate": 4.8798653843743575e-06, + "loss": 0.4406, + "step": 9852 + }, + { + "epoch": 0.6000060895776878, + "grad_norm": 1.096003649247723, + "learning_rate": 4.87984094446551e-06, + "loss": 0.4017, + "step": 9853 + }, + { + "epoch": 0.6000669853545657, + "grad_norm": 0.9305195537376325, + "learning_rate": 4.8798165021321306e-06, + "loss": 0.4763, + "step": 9854 + }, + { + "epoch": 0.6001278811314436, + "grad_norm": 1.0077791948266153, + "learning_rate": 4.879792057374243e-06, + "loss": 0.4515, + "step": 9855 + }, + { + "epoch": 0.6001887769083214, + "grad_norm": 0.9721837972591665, + "learning_rate": 4.879767610191874e-06, + "loss": 0.4456, + "step": 9856 + }, + { + "epoch": 0.6002496726851992, + "grad_norm": 1.0942635006580617, + "learning_rate": 4.8797431605850456e-06, + "loss": 0.3347, + "step": 9857 + }, + { + "epoch": 0.6003105684620772, + "grad_norm": 1.0412642081256265, + "learning_rate": 4.879718708553785e-06, + "loss": 0.4136, + "step": 9858 + }, + { + "epoch": 0.600371464238955, + "grad_norm": 0.9324047472078808, + "learning_rate": 4.879694254098117e-06, + "loss": 0.4091, + "step": 9859 + }, + { + "epoch": 0.6004323600158329, + "grad_norm": 1.0942349310836252, + "learning_rate": 4.879669797218065e-06, + "loss": 0.3847, + "step": 9860 + }, + { + "epoch": 0.6004932557927107, + "grad_norm": 0.9831767996899281, + "learning_rate": 4.879645337913656e-06, + "loss": 0.4844, + "step": 9861 + }, + { + "epoch": 0.6005541515695887, + "grad_norm": 0.9708666061225684, + "learning_rate": 4.879620876184912e-06, + "loss": 0.4614, + "step": 9862 + }, + { + "epoch": 0.6006150473464665, + "grad_norm": 1.071455413839032, + "learning_rate": 4.879596412031862e-06, + "loss": 0.4381, + "step": 9863 + }, + { + "epoch": 0.6006759431233444, + "grad_norm": 1.0634338228471185, + "learning_rate": 4.879571945454526e-06, + "loss": 0.4837, + "step": 9864 + }, + { + "epoch": 0.6007368389002222, + "grad_norm": 1.034242832051259, + "learning_rate": 4.879547476452933e-06, + "loss": 0.4181, + "step": 9865 + }, + { + "epoch": 0.6007977346771002, + "grad_norm": 1.0550014927414983, + "learning_rate": 4.8795230050271075e-06, + "loss": 0.4295, + "step": 9866 + }, + { + "epoch": 0.600858630453978, + "grad_norm": 1.0288821838221631, + "learning_rate": 4.8794985311770726e-06, + "loss": 0.4044, + "step": 9867 + }, + { + "epoch": 0.6009195262308559, + "grad_norm": 0.9799947995661565, + "learning_rate": 4.879474054902854e-06, + "loss": 0.4374, + "step": 9868 + }, + { + "epoch": 0.6009804220077337, + "grad_norm": 0.9700030585892502, + "learning_rate": 4.879449576204477e-06, + "loss": 0.4324, + "step": 9869 + }, + { + "epoch": 0.6010413177846117, + "grad_norm": 0.9502215147562701, + "learning_rate": 4.879425095081965e-06, + "loss": 0.4731, + "step": 9870 + }, + { + "epoch": 0.6011022135614895, + "grad_norm": 1.0028341109639676, + "learning_rate": 4.879400611535345e-06, + "loss": 0.4471, + "step": 9871 + }, + { + "epoch": 0.6011631093383674, + "grad_norm": 1.0176645027515456, + "learning_rate": 4.879376125564642e-06, + "loss": 0.4317, + "step": 9872 + }, + { + "epoch": 0.6012240051152452, + "grad_norm": 1.020206824762612, + "learning_rate": 4.879351637169879e-06, + "loss": 0.4213, + "step": 9873 + }, + { + "epoch": 0.6012849008921232, + "grad_norm": 1.0043254890192173, + "learning_rate": 4.8793271463510825e-06, + "loss": 0.4283, + "step": 9874 + }, + { + "epoch": 0.601345796669001, + "grad_norm": 0.9747826952435772, + "learning_rate": 4.8793026531082764e-06, + "loss": 0.4836, + "step": 9875 + }, + { + "epoch": 0.6014066924458789, + "grad_norm": 1.0229407548882612, + "learning_rate": 4.8792781574414874e-06, + "loss": 0.5035, + "step": 9876 + }, + { + "epoch": 0.6014675882227567, + "grad_norm": 1.132232071806221, + "learning_rate": 4.8792536593507374e-06, + "loss": 0.381, + "step": 9877 + }, + { + "epoch": 0.6015284839996347, + "grad_norm": 1.0226588847639697, + "learning_rate": 4.879229158836055e-06, + "loss": 0.4235, + "step": 9878 + }, + { + "epoch": 0.6015893797765125, + "grad_norm": 0.981876945338781, + "learning_rate": 4.879204655897463e-06, + "loss": 0.451, + "step": 9879 + }, + { + "epoch": 0.6016502755533903, + "grad_norm": 1.018489084216555, + "learning_rate": 4.879180150534986e-06, + "loss": 0.3749, + "step": 9880 + }, + { + "epoch": 0.6017111713302682, + "grad_norm": 1.0420433237497555, + "learning_rate": 4.8791556427486505e-06, + "loss": 0.4167, + "step": 9881 + }, + { + "epoch": 0.6017720671071461, + "grad_norm": 0.9918911372459923, + "learning_rate": 4.87913113253848e-06, + "loss": 0.4106, + "step": 9882 + }, + { + "epoch": 0.601832962884024, + "grad_norm": 0.9422744452483528, + "learning_rate": 4.8791066199045016e-06, + "loss": 0.4572, + "step": 9883 + }, + { + "epoch": 0.6018938586609018, + "grad_norm": 1.0343298378433774, + "learning_rate": 4.879082104846737e-06, + "loss": 0.3556, + "step": 9884 + }, + { + "epoch": 0.6019547544377798, + "grad_norm": 1.0676502426066348, + "learning_rate": 4.879057587365214e-06, + "loss": 0.3644, + "step": 9885 + }, + { + "epoch": 0.6020156502146576, + "grad_norm": 0.9149136181513977, + "learning_rate": 4.879033067459956e-06, + "loss": 0.5038, + "step": 9886 + }, + { + "epoch": 0.6020765459915355, + "grad_norm": 1.0158592251955638, + "learning_rate": 4.87900854513099e-06, + "loss": 0.4011, + "step": 9887 + }, + { + "epoch": 0.6021374417684133, + "grad_norm": 1.0435704872844849, + "learning_rate": 4.878984020378338e-06, + "loss": 0.4283, + "step": 9888 + }, + { + "epoch": 0.6021983375452913, + "grad_norm": 1.0673256041146097, + "learning_rate": 4.878959493202026e-06, + "loss": 0.3733, + "step": 9889 + }, + { + "epoch": 0.6022592333221691, + "grad_norm": 0.9951571856981565, + "learning_rate": 4.878934963602082e-06, + "loss": 0.4716, + "step": 9890 + }, + { + "epoch": 0.602320129099047, + "grad_norm": 1.0097412258467693, + "learning_rate": 4.878910431578527e-06, + "loss": 0.4594, + "step": 9891 + }, + { + "epoch": 0.6023810248759248, + "grad_norm": 0.9488863121071889, + "learning_rate": 4.878885897131388e-06, + "loss": 0.4502, + "step": 9892 + }, + { + "epoch": 0.6024419206528028, + "grad_norm": 0.9307001002969415, + "learning_rate": 4.878861360260688e-06, + "loss": 0.5138, + "step": 9893 + }, + { + "epoch": 0.6025028164296806, + "grad_norm": 1.0571478864201693, + "learning_rate": 4.878836820966455e-06, + "loss": 0.4831, + "step": 9894 + }, + { + "epoch": 0.6025637122065585, + "grad_norm": 1.0404070534735195, + "learning_rate": 4.8788122792487125e-06, + "loss": 0.4249, + "step": 9895 + }, + { + "epoch": 0.6026246079834363, + "grad_norm": 1.0201767354016187, + "learning_rate": 4.878787735107485e-06, + "loss": 0.4206, + "step": 9896 + }, + { + "epoch": 0.6026855037603143, + "grad_norm": 1.0596430052044847, + "learning_rate": 4.878763188542799e-06, + "loss": 0.3673, + "step": 9897 + }, + { + "epoch": 0.6027463995371921, + "grad_norm": 0.9941537123411395, + "learning_rate": 4.8787386395546775e-06, + "loss": 0.4288, + "step": 9898 + }, + { + "epoch": 0.60280729531407, + "grad_norm": 0.978572159937003, + "learning_rate": 4.878714088143146e-06, + "loss": 0.4214, + "step": 9899 + }, + { + "epoch": 0.6028681910909478, + "grad_norm": 0.9846366975793795, + "learning_rate": 4.878689534308231e-06, + "loss": 0.4662, + "step": 9900 + }, + { + "epoch": 0.6029290868678258, + "grad_norm": 0.9490386021913603, + "learning_rate": 4.8786649780499565e-06, + "loss": 0.5227, + "step": 9901 + }, + { + "epoch": 0.6029899826447036, + "grad_norm": 1.0283558982253838, + "learning_rate": 4.878640419368347e-06, + "loss": 0.4287, + "step": 9902 + }, + { + "epoch": 0.6030508784215814, + "grad_norm": 1.0077145220006956, + "learning_rate": 4.878615858263429e-06, + "loss": 0.4195, + "step": 9903 + }, + { + "epoch": 0.6031117741984593, + "grad_norm": 1.011581912955907, + "learning_rate": 4.878591294735226e-06, + "loss": 0.4132, + "step": 9904 + }, + { + "epoch": 0.6031726699753373, + "grad_norm": 1.116288490451952, + "learning_rate": 4.878566728783764e-06, + "loss": 0.4283, + "step": 9905 + }, + { + "epoch": 0.6032335657522151, + "grad_norm": 1.0589156367869947, + "learning_rate": 4.878542160409067e-06, + "loss": 0.4356, + "step": 9906 + }, + { + "epoch": 0.6032944615290929, + "grad_norm": 0.9050795000833913, + "learning_rate": 4.8785175896111615e-06, + "loss": 0.4616, + "step": 9907 + }, + { + "epoch": 0.6033553573059708, + "grad_norm": 1.1304641800447734, + "learning_rate": 4.878493016390071e-06, + "loss": 0.4249, + "step": 9908 + }, + { + "epoch": 0.6034162530828487, + "grad_norm": 0.9975819757171518, + "learning_rate": 4.878468440745822e-06, + "loss": 0.4802, + "step": 9909 + }, + { + "epoch": 0.6034771488597266, + "grad_norm": 1.0559966704601744, + "learning_rate": 4.878443862678438e-06, + "loss": 0.404, + "step": 9910 + }, + { + "epoch": 0.6035380446366044, + "grad_norm": 0.999116558559276, + "learning_rate": 4.878419282187946e-06, + "loss": 0.4641, + "step": 9911 + }, + { + "epoch": 0.6035989404134823, + "grad_norm": 1.0234395681634731, + "learning_rate": 4.878394699274369e-06, + "loss": 0.4085, + "step": 9912 + }, + { + "epoch": 0.6036598361903602, + "grad_norm": 1.0624901541251919, + "learning_rate": 4.8783701139377325e-06, + "loss": 0.5284, + "step": 9913 + }, + { + "epoch": 0.6037207319672381, + "grad_norm": 1.0938064610993294, + "learning_rate": 4.878345526178063e-06, + "loss": 0.4066, + "step": 9914 + }, + { + "epoch": 0.6037816277441159, + "grad_norm": 1.0005131663555185, + "learning_rate": 4.878320935995385e-06, + "loss": 0.4189, + "step": 9915 + }, + { + "epoch": 0.6038425235209938, + "grad_norm": 1.0194084699386263, + "learning_rate": 4.878296343389721e-06, + "loss": 0.4225, + "step": 9916 + }, + { + "epoch": 0.6039034192978717, + "grad_norm": 1.034342999110174, + "learning_rate": 4.878271748361099e-06, + "loss": 0.3661, + "step": 9917 + }, + { + "epoch": 0.6039643150747496, + "grad_norm": 0.9977054733055476, + "learning_rate": 4.878247150909544e-06, + "loss": 0.4968, + "step": 9918 + }, + { + "epoch": 0.6040252108516274, + "grad_norm": 1.02195275003352, + "learning_rate": 4.87822255103508e-06, + "loss": 0.3751, + "step": 9919 + }, + { + "epoch": 0.6040861066285053, + "grad_norm": 0.9905628768883296, + "learning_rate": 4.878197948737732e-06, + "loss": 0.3903, + "step": 9920 + }, + { + "epoch": 0.6041470024053832, + "grad_norm": 1.110461206442431, + "learning_rate": 4.878173344017525e-06, + "loss": 0.4065, + "step": 9921 + }, + { + "epoch": 0.6042078981822611, + "grad_norm": 1.0307457883505162, + "learning_rate": 4.878148736874485e-06, + "loss": 0.5041, + "step": 9922 + }, + { + "epoch": 0.6042687939591389, + "grad_norm": 1.0587743470760775, + "learning_rate": 4.878124127308637e-06, + "loss": 0.4797, + "step": 9923 + }, + { + "epoch": 0.6043296897360169, + "grad_norm": 1.0732128666499416, + "learning_rate": 4.878099515320004e-06, + "loss": 0.4841, + "step": 9924 + }, + { + "epoch": 0.6043905855128947, + "grad_norm": 1.1502605397051695, + "learning_rate": 4.878074900908614e-06, + "loss": 0.4077, + "step": 9925 + }, + { + "epoch": 0.6044514812897726, + "grad_norm": 1.0328232203253713, + "learning_rate": 4.87805028407449e-06, + "loss": 0.4516, + "step": 9926 + }, + { + "epoch": 0.6045123770666504, + "grad_norm": 0.9724540826730335, + "learning_rate": 4.878025664817658e-06, + "loss": 0.4137, + "step": 9927 + }, + { + "epoch": 0.6045732728435284, + "grad_norm": 1.0328594044184871, + "learning_rate": 4.878001043138143e-06, + "loss": 0.5014, + "step": 9928 + }, + { + "epoch": 0.6046341686204062, + "grad_norm": 1.002254562999832, + "learning_rate": 4.87797641903597e-06, + "loss": 0.4557, + "step": 9929 + }, + { + "epoch": 0.604695064397284, + "grad_norm": 1.115673329768141, + "learning_rate": 4.877951792511164e-06, + "loss": 0.433, + "step": 9930 + }, + { + "epoch": 0.6047559601741619, + "grad_norm": 1.0038307293927342, + "learning_rate": 4.87792716356375e-06, + "loss": 0.4341, + "step": 9931 + }, + { + "epoch": 0.6048168559510398, + "grad_norm": 1.017155122442959, + "learning_rate": 4.877902532193754e-06, + "loss": 0.477, + "step": 9932 + }, + { + "epoch": 0.6048777517279177, + "grad_norm": 1.0406850711716804, + "learning_rate": 4.8778778984012e-06, + "loss": 0.4436, + "step": 9933 + }, + { + "epoch": 0.6049386475047955, + "grad_norm": 1.0037549212515826, + "learning_rate": 4.877853262186113e-06, + "loss": 0.4457, + "step": 9934 + }, + { + "epoch": 0.6049995432816734, + "grad_norm": 0.9117312882065775, + "learning_rate": 4.877828623548519e-06, + "loss": 0.5326, + "step": 9935 + }, + { + "epoch": 0.6050604390585513, + "grad_norm": 1.0498564272566655, + "learning_rate": 4.877803982488443e-06, + "loss": 0.4456, + "step": 9936 + }, + { + "epoch": 0.6051213348354292, + "grad_norm": 1.0768416162883125, + "learning_rate": 4.877779339005909e-06, + "loss": 0.4225, + "step": 9937 + }, + { + "epoch": 0.605182230612307, + "grad_norm": 1.040279113583083, + "learning_rate": 4.877754693100943e-06, + "loss": 0.4607, + "step": 9938 + }, + { + "epoch": 0.6052431263891849, + "grad_norm": 1.1227669345640523, + "learning_rate": 4.87773004477357e-06, + "loss": 0.3499, + "step": 9939 + }, + { + "epoch": 0.6053040221660628, + "grad_norm": 1.0404127765082687, + "learning_rate": 4.877705394023814e-06, + "loss": 0.4422, + "step": 9940 + }, + { + "epoch": 0.6053649179429407, + "grad_norm": 0.99368873298561, + "learning_rate": 4.8776807408517026e-06, + "loss": 0.4171, + "step": 9941 + }, + { + "epoch": 0.6054258137198185, + "grad_norm": 1.1464573106875378, + "learning_rate": 4.877656085257259e-06, + "loss": 0.3906, + "step": 9942 + }, + { + "epoch": 0.6054867094966964, + "grad_norm": 0.9341322085299187, + "learning_rate": 4.877631427240509e-06, + "loss": 0.4815, + "step": 9943 + }, + { + "epoch": 0.6055476052735743, + "grad_norm": 1.0501389554629608, + "learning_rate": 4.877606766801478e-06, + "loss": 0.3746, + "step": 9944 + }, + { + "epoch": 0.6056085010504522, + "grad_norm": 0.9724022128405656, + "learning_rate": 4.877582103940189e-06, + "loss": 0.3829, + "step": 9945 + }, + { + "epoch": 0.60566939682733, + "grad_norm": 0.9521180017550209, + "learning_rate": 4.8775574386566704e-06, + "loss": 0.4526, + "step": 9946 + }, + { + "epoch": 0.6057302926042079, + "grad_norm": 0.964839550382307, + "learning_rate": 4.877532770950946e-06, + "loss": 0.4029, + "step": 9947 + }, + { + "epoch": 0.6057911883810858, + "grad_norm": 1.0334365117163657, + "learning_rate": 4.877508100823039e-06, + "loss": 0.4503, + "step": 9948 + }, + { + "epoch": 0.6058520841579637, + "grad_norm": 0.9779227178436383, + "learning_rate": 4.8774834282729775e-06, + "loss": 0.4364, + "step": 9949 + }, + { + "epoch": 0.6059129799348415, + "grad_norm": 1.0090851061210424, + "learning_rate": 4.877458753300784e-06, + "loss": 0.4217, + "step": 9950 + }, + { + "epoch": 0.6059738757117193, + "grad_norm": 0.9609580864603836, + "learning_rate": 4.877434075906486e-06, + "loss": 0.5072, + "step": 9951 + }, + { + "epoch": 0.6060347714885973, + "grad_norm": 1.1117573522054047, + "learning_rate": 4.877409396090107e-06, + "loss": 0.4369, + "step": 9952 + }, + { + "epoch": 0.6060956672654751, + "grad_norm": 0.9530741355670725, + "learning_rate": 4.877384713851674e-06, + "loss": 0.4082, + "step": 9953 + }, + { + "epoch": 0.606156563042353, + "grad_norm": 0.9087070382005531, + "learning_rate": 4.877360029191209e-06, + "loss": 0.4158, + "step": 9954 + }, + { + "epoch": 0.6062174588192308, + "grad_norm": 1.0650291860280852, + "learning_rate": 4.877335342108741e-06, + "loss": 0.389, + "step": 9955 + }, + { + "epoch": 0.6062783545961088, + "grad_norm": 0.9867702771872968, + "learning_rate": 4.877310652604292e-06, + "loss": 0.4277, + "step": 9956 + }, + { + "epoch": 0.6063392503729866, + "grad_norm": 1.052579341688216, + "learning_rate": 4.877285960677889e-06, + "loss": 0.4838, + "step": 9957 + }, + { + "epoch": 0.6064001461498645, + "grad_norm": 1.0537474002294454, + "learning_rate": 4.877261266329556e-06, + "loss": 0.5296, + "step": 9958 + }, + { + "epoch": 0.6064610419267423, + "grad_norm": 1.108823351711405, + "learning_rate": 4.877236569559318e-06, + "loss": 0.4898, + "step": 9959 + }, + { + "epoch": 0.6065219377036203, + "grad_norm": 0.9956379313357218, + "learning_rate": 4.877211870367202e-06, + "loss": 0.3792, + "step": 9960 + }, + { + "epoch": 0.6065828334804981, + "grad_norm": 1.0537440221342018, + "learning_rate": 4.877187168753231e-06, + "loss": 0.4325, + "step": 9961 + }, + { + "epoch": 0.606643729257376, + "grad_norm": 1.0889898783303684, + "learning_rate": 4.877162464717432e-06, + "loss": 0.477, + "step": 9962 + }, + { + "epoch": 0.6067046250342538, + "grad_norm": 0.9874194498192986, + "learning_rate": 4.8771377582598284e-06, + "loss": 0.4421, + "step": 9963 + }, + { + "epoch": 0.6067655208111318, + "grad_norm": 1.1093363624948709, + "learning_rate": 4.877113049380446e-06, + "loss": 0.3716, + "step": 9964 + }, + { + "epoch": 0.6068264165880096, + "grad_norm": 0.9445859240526714, + "learning_rate": 4.877088338079312e-06, + "loss": 0.4583, + "step": 9965 + }, + { + "epoch": 0.6068873123648875, + "grad_norm": 1.0619799162686079, + "learning_rate": 4.877063624356448e-06, + "loss": 0.4138, + "step": 9966 + }, + { + "epoch": 0.6069482081417654, + "grad_norm": 0.984655852355331, + "learning_rate": 4.877038908211882e-06, + "loss": 0.4178, + "step": 9967 + }, + { + "epoch": 0.6070091039186433, + "grad_norm": 1.041936494895896, + "learning_rate": 4.877014189645639e-06, + "loss": 0.4288, + "step": 9968 + }, + { + "epoch": 0.6070699996955211, + "grad_norm": 1.0188135868117887, + "learning_rate": 4.876989468657742e-06, + "loss": 0.462, + "step": 9969 + }, + { + "epoch": 0.607130895472399, + "grad_norm": 1.1041921256105993, + "learning_rate": 4.8769647452482175e-06, + "loss": 0.3749, + "step": 9970 + }, + { + "epoch": 0.6071917912492769, + "grad_norm": 1.07216870451328, + "learning_rate": 4.876940019417091e-06, + "loss": 0.4151, + "step": 9971 + }, + { + "epoch": 0.6072526870261548, + "grad_norm": 1.0419821295009157, + "learning_rate": 4.876915291164388e-06, + "loss": 0.4842, + "step": 9972 + }, + { + "epoch": 0.6073135828030326, + "grad_norm": 1.0147577420018665, + "learning_rate": 4.8768905604901326e-06, + "loss": 0.4227, + "step": 9973 + }, + { + "epoch": 0.6073744785799104, + "grad_norm": 0.9647188081354123, + "learning_rate": 4.87686582739435e-06, + "loss": 0.4467, + "step": 9974 + }, + { + "epoch": 0.6074353743567884, + "grad_norm": 0.991285769955547, + "learning_rate": 4.876841091877067e-06, + "loss": 0.452, + "step": 9975 + }, + { + "epoch": 0.6074962701336662, + "grad_norm": 1.001318224419885, + "learning_rate": 4.8768163539383075e-06, + "loss": 0.4685, + "step": 9976 + }, + { + "epoch": 0.6075571659105441, + "grad_norm": 1.023810745815772, + "learning_rate": 4.876791613578097e-06, + "loss": 0.4029, + "step": 9977 + }, + { + "epoch": 0.6076180616874219, + "grad_norm": 1.009034256706948, + "learning_rate": 4.87676687079646e-06, + "loss": 0.4174, + "step": 9978 + }, + { + "epoch": 0.6076789574642999, + "grad_norm": 0.9579247339253645, + "learning_rate": 4.8767421255934225e-06, + "loss": 0.4231, + "step": 9979 + }, + { + "epoch": 0.6077398532411777, + "grad_norm": 0.9872622859352331, + "learning_rate": 4.876717377969009e-06, + "loss": 0.4239, + "step": 9980 + }, + { + "epoch": 0.6078007490180556, + "grad_norm": 1.0483182972482024, + "learning_rate": 4.876692627923246e-06, + "loss": 0.4328, + "step": 9981 + }, + { + "epoch": 0.6078616447949334, + "grad_norm": 1.024677424110546, + "learning_rate": 4.8766678754561584e-06, + "loss": 0.3821, + "step": 9982 + }, + { + "epoch": 0.6079225405718114, + "grad_norm": 1.0067440333422737, + "learning_rate": 4.87664312056777e-06, + "loss": 0.4827, + "step": 9983 + }, + { + "epoch": 0.6079834363486892, + "grad_norm": 0.9643001946457717, + "learning_rate": 4.876618363258108e-06, + "loss": 0.4927, + "step": 9984 + }, + { + "epoch": 0.6080443321255671, + "grad_norm": 1.0798266588326952, + "learning_rate": 4.876593603527196e-06, + "loss": 0.3689, + "step": 9985 + }, + { + "epoch": 0.6081052279024449, + "grad_norm": 0.9876318093576433, + "learning_rate": 4.876568841375059e-06, + "loss": 0.4209, + "step": 9986 + }, + { + "epoch": 0.6081661236793229, + "grad_norm": 0.9959468045978758, + "learning_rate": 4.876544076801724e-06, + "loss": 0.5039, + "step": 9987 + }, + { + "epoch": 0.6082270194562007, + "grad_norm": 1.0075920800480112, + "learning_rate": 4.876519309807216e-06, + "loss": 0.4059, + "step": 9988 + }, + { + "epoch": 0.6082879152330786, + "grad_norm": 1.0519466455320574, + "learning_rate": 4.876494540391559e-06, + "loss": 0.3946, + "step": 9989 + }, + { + "epoch": 0.6083488110099564, + "grad_norm": 1.0643730475703035, + "learning_rate": 4.876469768554778e-06, + "loss": 0.4265, + "step": 9990 + }, + { + "epoch": 0.6084097067868344, + "grad_norm": 0.9681130582564519, + "learning_rate": 4.876444994296899e-06, + "loss": 0.4491, + "step": 9991 + }, + { + "epoch": 0.6084706025637122, + "grad_norm": 0.9684232495976419, + "learning_rate": 4.876420217617949e-06, + "loss": 0.4903, + "step": 9992 + }, + { + "epoch": 0.6085314983405901, + "grad_norm": 1.0378996632904975, + "learning_rate": 4.87639543851795e-06, + "loss": 0.4519, + "step": 9993 + }, + { + "epoch": 0.6085923941174679, + "grad_norm": 1.0272524595813448, + "learning_rate": 4.876370656996929e-06, + "loss": 0.4844, + "step": 9994 + }, + { + "epoch": 0.6086532898943459, + "grad_norm": 1.0606580523047209, + "learning_rate": 4.876345873054911e-06, + "loss": 0.4852, + "step": 9995 + }, + { + "epoch": 0.6087141856712237, + "grad_norm": 0.9153879153531528, + "learning_rate": 4.876321086691921e-06, + "loss": 0.456, + "step": 9996 + }, + { + "epoch": 0.6087750814481016, + "grad_norm": 0.9685952517923018, + "learning_rate": 4.876296297907985e-06, + "loss": 0.4578, + "step": 9997 + }, + { + "epoch": 0.6088359772249794, + "grad_norm": 1.0354785499121386, + "learning_rate": 4.876271506703127e-06, + "loss": 0.4665, + "step": 9998 + }, + { + "epoch": 0.6088968730018574, + "grad_norm": 1.025802979190463, + "learning_rate": 4.8762467130773734e-06, + "loss": 0.4317, + "step": 9999 + }, + { + "epoch": 0.6089577687787352, + "grad_norm": 0.996778409186113, + "learning_rate": 4.87622191703075e-06, + "loss": 0.3939, + "step": 10000 + }, + { + "epoch": 0.609018664555613, + "grad_norm": 1.0595706094921649, + "learning_rate": 4.876197118563279e-06, + "loss": 0.4395, + "step": 10001 + }, + { + "epoch": 0.6090795603324909, + "grad_norm": 1.0035431047908228, + "learning_rate": 4.87617231767499e-06, + "loss": 0.4295, + "step": 10002 + }, + { + "epoch": 0.6091404561093688, + "grad_norm": 1.00204181389756, + "learning_rate": 4.876147514365905e-06, + "loss": 0.4758, + "step": 10003 + }, + { + "epoch": 0.6092013518862467, + "grad_norm": 1.0035075445722785, + "learning_rate": 4.87612270863605e-06, + "loss": 0.3988, + "step": 10004 + }, + { + "epoch": 0.6092622476631245, + "grad_norm": 0.9627635868945686, + "learning_rate": 4.876097900485452e-06, + "loss": 0.4338, + "step": 10005 + }, + { + "epoch": 0.6093231434400025, + "grad_norm": 0.9369399507630256, + "learning_rate": 4.876073089914133e-06, + "loss": 0.4184, + "step": 10006 + }, + { + "epoch": 0.6093840392168803, + "grad_norm": 1.0109302159230957, + "learning_rate": 4.8760482769221205e-06, + "loss": 0.4595, + "step": 10007 + }, + { + "epoch": 0.6094449349937582, + "grad_norm": 1.0660449442259101, + "learning_rate": 4.87602346150944e-06, + "loss": 0.4349, + "step": 10008 + }, + { + "epoch": 0.609505830770636, + "grad_norm": 0.9900742798187294, + "learning_rate": 4.875998643676117e-06, + "loss": 0.4168, + "step": 10009 + }, + { + "epoch": 0.609566726547514, + "grad_norm": 1.1366397509620938, + "learning_rate": 4.875973823422174e-06, + "loss": 0.3712, + "step": 10010 + }, + { + "epoch": 0.6096276223243918, + "grad_norm": 1.029892565611792, + "learning_rate": 4.87594900074764e-06, + "loss": 0.4192, + "step": 10011 + }, + { + "epoch": 0.6096885181012697, + "grad_norm": 0.9709671623472146, + "learning_rate": 4.8759241756525376e-06, + "loss": 0.4498, + "step": 10012 + }, + { + "epoch": 0.6097494138781475, + "grad_norm": 0.978026102417563, + "learning_rate": 4.875899348136893e-06, + "loss": 0.4201, + "step": 10013 + }, + { + "epoch": 0.6098103096550255, + "grad_norm": 0.9314615303394074, + "learning_rate": 4.875874518200732e-06, + "loss": 0.4549, + "step": 10014 + }, + { + "epoch": 0.6098712054319033, + "grad_norm": 1.1136188648887775, + "learning_rate": 4.87584968584408e-06, + "loss": 0.3929, + "step": 10015 + }, + { + "epoch": 0.6099321012087812, + "grad_norm": 1.020421588789804, + "learning_rate": 4.87582485106696e-06, + "loss": 0.4027, + "step": 10016 + }, + { + "epoch": 0.609992996985659, + "grad_norm": 0.9977077962103746, + "learning_rate": 4.8758000138694005e-06, + "loss": 0.4725, + "step": 10017 + }, + { + "epoch": 0.610053892762537, + "grad_norm": 0.9631222526492104, + "learning_rate": 4.875775174251425e-06, + "loss": 0.4188, + "step": 10018 + }, + { + "epoch": 0.6101147885394148, + "grad_norm": 0.9802633995619623, + "learning_rate": 4.875750332213059e-06, + "loss": 0.3978, + "step": 10019 + }, + { + "epoch": 0.6101756843162927, + "grad_norm": 0.9890283779539129, + "learning_rate": 4.875725487754328e-06, + "loss": 0.3498, + "step": 10020 + }, + { + "epoch": 0.6102365800931705, + "grad_norm": 1.0600155733968257, + "learning_rate": 4.875700640875257e-06, + "loss": 0.4017, + "step": 10021 + }, + { + "epoch": 0.6102974758700485, + "grad_norm": 1.0802683982079877, + "learning_rate": 4.875675791575872e-06, + "loss": 0.4199, + "step": 10022 + }, + { + "epoch": 0.6103583716469263, + "grad_norm": 1.0270983551134587, + "learning_rate": 4.875650939856198e-06, + "loss": 0.4667, + "step": 10023 + }, + { + "epoch": 0.6104192674238041, + "grad_norm": 1.039611703972009, + "learning_rate": 4.8756260857162596e-06, + "loss": 0.3881, + "step": 10024 + }, + { + "epoch": 0.610480163200682, + "grad_norm": 1.028038590583007, + "learning_rate": 4.875601229156083e-06, + "loss": 0.4249, + "step": 10025 + }, + { + "epoch": 0.61054105897756, + "grad_norm": 0.9548039320266061, + "learning_rate": 4.875576370175694e-06, + "loss": 0.4809, + "step": 10026 + }, + { + "epoch": 0.6106019547544378, + "grad_norm": 1.0002581375935269, + "learning_rate": 4.875551508775116e-06, + "loss": 0.4002, + "step": 10027 + }, + { + "epoch": 0.6106628505313156, + "grad_norm": 1.0238403132325045, + "learning_rate": 4.875526644954376e-06, + "loss": 0.4288, + "step": 10028 + }, + { + "epoch": 0.6107237463081935, + "grad_norm": 1.0776581319585352, + "learning_rate": 4.8755017787134995e-06, + "loss": 0.4265, + "step": 10029 + }, + { + "epoch": 0.6107846420850714, + "grad_norm": 1.0670226453455811, + "learning_rate": 4.87547691005251e-06, + "loss": 0.4567, + "step": 10030 + }, + { + "epoch": 0.6108455378619493, + "grad_norm": 1.0320613564647547, + "learning_rate": 4.875452038971435e-06, + "loss": 0.3807, + "step": 10031 + }, + { + "epoch": 0.6109064336388271, + "grad_norm": 0.9875347235275463, + "learning_rate": 4.875427165470298e-06, + "loss": 0.4481, + "step": 10032 + }, + { + "epoch": 0.610967329415705, + "grad_norm": 1.0448468527864547, + "learning_rate": 4.875402289549126e-06, + "loss": 0.3908, + "step": 10033 + }, + { + "epoch": 0.6110282251925829, + "grad_norm": 1.0753979659547437, + "learning_rate": 4.8753774112079435e-06, + "loss": 0.3936, + "step": 10034 + }, + { + "epoch": 0.6110891209694608, + "grad_norm": 0.9509959441475685, + "learning_rate": 4.8753525304467755e-06, + "loss": 0.4803, + "step": 10035 + }, + { + "epoch": 0.6111500167463386, + "grad_norm": 0.8735506422493521, + "learning_rate": 4.875327647265647e-06, + "loss": 0.4551, + "step": 10036 + }, + { + "epoch": 0.6112109125232165, + "grad_norm": 1.1154360210936556, + "learning_rate": 4.875302761664585e-06, + "loss": 0.3643, + "step": 10037 + }, + { + "epoch": 0.6112718083000944, + "grad_norm": 0.9681198791102753, + "learning_rate": 4.875277873643614e-06, + "loss": 0.5004, + "step": 10038 + }, + { + "epoch": 0.6113327040769723, + "grad_norm": 0.9961033750280378, + "learning_rate": 4.875252983202759e-06, + "loss": 0.3662, + "step": 10039 + }, + { + "epoch": 0.6113935998538501, + "grad_norm": 1.0481751055576898, + "learning_rate": 4.875228090342046e-06, + "loss": 0.4463, + "step": 10040 + }, + { + "epoch": 0.611454495630728, + "grad_norm": 1.0914321666511901, + "learning_rate": 4.8752031950615e-06, + "loss": 0.4054, + "step": 10041 + }, + { + "epoch": 0.6115153914076059, + "grad_norm": 0.924880756758412, + "learning_rate": 4.875178297361146e-06, + "loss": 0.4176, + "step": 10042 + }, + { + "epoch": 0.6115762871844838, + "grad_norm": 0.9578882479028762, + "learning_rate": 4.8751533972410094e-06, + "loss": 0.4199, + "step": 10043 + }, + { + "epoch": 0.6116371829613616, + "grad_norm": 1.0938828556596811, + "learning_rate": 4.875128494701117e-06, + "loss": 0.4214, + "step": 10044 + }, + { + "epoch": 0.6116980787382394, + "grad_norm": 1.039812640820652, + "learning_rate": 4.875103589741491e-06, + "loss": 0.4893, + "step": 10045 + }, + { + "epoch": 0.6117589745151174, + "grad_norm": 0.9454298173310649, + "learning_rate": 4.875078682362161e-06, + "loss": 0.4761, + "step": 10046 + }, + { + "epoch": 0.6118198702919952, + "grad_norm": 1.0734792520603356, + "learning_rate": 4.875053772563149e-06, + "loss": 0.4369, + "step": 10047 + }, + { + "epoch": 0.6118807660688731, + "grad_norm": 0.9397893906939193, + "learning_rate": 4.875028860344482e-06, + "loss": 0.4132, + "step": 10048 + }, + { + "epoch": 0.611941661845751, + "grad_norm": 1.0584948502992835, + "learning_rate": 4.875003945706185e-06, + "loss": 0.4549, + "step": 10049 + }, + { + "epoch": 0.6120025576226289, + "grad_norm": 1.049462280629462, + "learning_rate": 4.874979028648283e-06, + "loss": 0.3985, + "step": 10050 + }, + { + "epoch": 0.6120634533995067, + "grad_norm": 0.9811575494373137, + "learning_rate": 4.874954109170803e-06, + "loss": 0.3604, + "step": 10051 + }, + { + "epoch": 0.6121243491763846, + "grad_norm": 0.992535628908721, + "learning_rate": 4.8749291872737685e-06, + "loss": 0.4438, + "step": 10052 + }, + { + "epoch": 0.6121852449532625, + "grad_norm": 0.9699008616493864, + "learning_rate": 4.874904262957205e-06, + "loss": 0.4156, + "step": 10053 + }, + { + "epoch": 0.6122461407301404, + "grad_norm": 1.0231140095268247, + "learning_rate": 4.874879336221138e-06, + "loss": 0.4888, + "step": 10054 + }, + { + "epoch": 0.6123070365070182, + "grad_norm": 1.0103414427922108, + "learning_rate": 4.874854407065594e-06, + "loss": 0.4339, + "step": 10055 + }, + { + "epoch": 0.6123679322838961, + "grad_norm": 1.0691774149078372, + "learning_rate": 4.874829475490598e-06, + "loss": 0.3805, + "step": 10056 + }, + { + "epoch": 0.612428828060774, + "grad_norm": 1.0616583371320654, + "learning_rate": 4.874804541496175e-06, + "loss": 0.4091, + "step": 10057 + }, + { + "epoch": 0.6124897238376519, + "grad_norm": 0.9593692988650551, + "learning_rate": 4.87477960508235e-06, + "loss": 0.458, + "step": 10058 + }, + { + "epoch": 0.6125506196145297, + "grad_norm": 1.0621029012630139, + "learning_rate": 4.87475466624915e-06, + "loss": 0.4659, + "step": 10059 + }, + { + "epoch": 0.6126115153914076, + "grad_norm": 1.0740759078186448, + "learning_rate": 4.874729724996598e-06, + "loss": 0.4414, + "step": 10060 + }, + { + "epoch": 0.6126724111682855, + "grad_norm": 0.9364817114361618, + "learning_rate": 4.874704781324721e-06, + "loss": 0.4692, + "step": 10061 + }, + { + "epoch": 0.6127333069451634, + "grad_norm": 0.9755960022745901, + "learning_rate": 4.874679835233545e-06, + "loss": 0.4405, + "step": 10062 + }, + { + "epoch": 0.6127942027220412, + "grad_norm": 1.0242916144923782, + "learning_rate": 4.8746548867230935e-06, + "loss": 0.4179, + "step": 10063 + }, + { + "epoch": 0.6128550984989191, + "grad_norm": 1.086806736099031, + "learning_rate": 4.8746299357933935e-06, + "loss": 0.3942, + "step": 10064 + }, + { + "epoch": 0.612915994275797, + "grad_norm": 1.0324298670404959, + "learning_rate": 4.87460498244447e-06, + "loss": 0.4165, + "step": 10065 + }, + { + "epoch": 0.6129768900526749, + "grad_norm": 1.0332740623588699, + "learning_rate": 4.874580026676347e-06, + "loss": 0.4115, + "step": 10066 + }, + { + "epoch": 0.6130377858295527, + "grad_norm": 0.9867399171470187, + "learning_rate": 4.874555068489053e-06, + "loss": 0.4462, + "step": 10067 + }, + { + "epoch": 0.6130986816064306, + "grad_norm": 0.9614339553658057, + "learning_rate": 4.8745301078826114e-06, + "loss": 0.4789, + "step": 10068 + }, + { + "epoch": 0.6131595773833085, + "grad_norm": 0.997664545126744, + "learning_rate": 4.874505144857047e-06, + "loss": 0.4299, + "step": 10069 + }, + { + "epoch": 0.6132204731601864, + "grad_norm": 0.9212100901442909, + "learning_rate": 4.874480179412386e-06, + "loss": 0.4581, + "step": 10070 + }, + { + "epoch": 0.6132813689370642, + "grad_norm": 1.0492504627394215, + "learning_rate": 4.874455211548655e-06, + "loss": 0.3845, + "step": 10071 + }, + { + "epoch": 0.613342264713942, + "grad_norm": 1.0904013692289443, + "learning_rate": 4.874430241265879e-06, + "loss": 0.3955, + "step": 10072 + }, + { + "epoch": 0.61340316049082, + "grad_norm": 1.0744177487900513, + "learning_rate": 4.874405268564081e-06, + "loss": 0.3883, + "step": 10073 + }, + { + "epoch": 0.6134640562676978, + "grad_norm": 1.0290481161326175, + "learning_rate": 4.8743802934432895e-06, + "loss": 0.5109, + "step": 10074 + }, + { + "epoch": 0.6135249520445757, + "grad_norm": 0.9914545674595681, + "learning_rate": 4.8743553159035284e-06, + "loss": 0.4411, + "step": 10075 + }, + { + "epoch": 0.6135858478214535, + "grad_norm": 1.1120497215527043, + "learning_rate": 4.874330335944823e-06, + "loss": 0.3788, + "step": 10076 + }, + { + "epoch": 0.6136467435983315, + "grad_norm": 1.009505059990995, + "learning_rate": 4.8743053535672e-06, + "loss": 0.4904, + "step": 10077 + }, + { + "epoch": 0.6137076393752093, + "grad_norm": 1.0229842927153605, + "learning_rate": 4.874280368770683e-06, + "loss": 0.4693, + "step": 10078 + }, + { + "epoch": 0.6137685351520872, + "grad_norm": 1.04115085736904, + "learning_rate": 4.8742553815552994e-06, + "loss": 0.4316, + "step": 10079 + }, + { + "epoch": 0.613829430928965, + "grad_norm": 0.984709925759965, + "learning_rate": 4.8742303919210735e-06, + "loss": 0.4664, + "step": 10080 + }, + { + "epoch": 0.613890326705843, + "grad_norm": 1.0862216864356777, + "learning_rate": 4.874205399868031e-06, + "loss": 0.4109, + "step": 10081 + }, + { + "epoch": 0.6139512224827208, + "grad_norm": 0.9923500436036335, + "learning_rate": 4.874180405396198e-06, + "loss": 0.4474, + "step": 10082 + }, + { + "epoch": 0.6140121182595987, + "grad_norm": 0.9338152511680745, + "learning_rate": 4.874155408505599e-06, + "loss": 0.4671, + "step": 10083 + }, + { + "epoch": 0.6140730140364765, + "grad_norm": 1.0068447946368406, + "learning_rate": 4.874130409196259e-06, + "loss": 0.4827, + "step": 10084 + }, + { + "epoch": 0.6141339098133545, + "grad_norm": 1.0382673927565558, + "learning_rate": 4.874105407468205e-06, + "loss": 0.4482, + "step": 10085 + }, + { + "epoch": 0.6141948055902323, + "grad_norm": 0.9497582300162176, + "learning_rate": 4.874080403321462e-06, + "loss": 0.4375, + "step": 10086 + }, + { + "epoch": 0.6142557013671102, + "grad_norm": 0.8794633423939784, + "learning_rate": 4.874055396756056e-06, + "loss": 0.4551, + "step": 10087 + }, + { + "epoch": 0.6143165971439881, + "grad_norm": 1.0739349221179213, + "learning_rate": 4.87403038777201e-06, + "loss": 0.3878, + "step": 10088 + }, + { + "epoch": 0.614377492920866, + "grad_norm": 0.944228177764833, + "learning_rate": 4.8740053763693515e-06, + "loss": 0.3803, + "step": 10089 + }, + { + "epoch": 0.6144383886977438, + "grad_norm": 0.9638620818117034, + "learning_rate": 4.8739803625481065e-06, + "loss": 0.4482, + "step": 10090 + }, + { + "epoch": 0.6144992844746217, + "grad_norm": 0.9421651877479948, + "learning_rate": 4.8739553463082995e-06, + "loss": 0.4539, + "step": 10091 + }, + { + "epoch": 0.6145601802514996, + "grad_norm": 0.9842412990793195, + "learning_rate": 4.873930327649956e-06, + "loss": 0.398, + "step": 10092 + }, + { + "epoch": 0.6146210760283775, + "grad_norm": 1.0344889580471128, + "learning_rate": 4.873905306573101e-06, + "loss": 0.4146, + "step": 10093 + }, + { + "epoch": 0.6146819718052553, + "grad_norm": 1.1582824343436218, + "learning_rate": 4.873880283077762e-06, + "loss": 0.4172, + "step": 10094 + }, + { + "epoch": 0.6147428675821331, + "grad_norm": 1.0421477562140686, + "learning_rate": 4.873855257163962e-06, + "loss": 0.3523, + "step": 10095 + }, + { + "epoch": 0.6148037633590111, + "grad_norm": 1.101787152544934, + "learning_rate": 4.873830228831728e-06, + "loss": 0.4064, + "step": 10096 + }, + { + "epoch": 0.614864659135889, + "grad_norm": 1.0296360825479558, + "learning_rate": 4.873805198081085e-06, + "loss": 0.3991, + "step": 10097 + }, + { + "epoch": 0.6149255549127668, + "grad_norm": 1.0758662847130374, + "learning_rate": 4.873780164912058e-06, + "loss": 0.4053, + "step": 10098 + }, + { + "epoch": 0.6149864506896446, + "grad_norm": 1.061490405683345, + "learning_rate": 4.8737551293246745e-06, + "loss": 0.4016, + "step": 10099 + }, + { + "epoch": 0.6150473464665226, + "grad_norm": 1.1088110037402774, + "learning_rate": 4.8737300913189575e-06, + "loss": 0.5159, + "step": 10100 + }, + { + "epoch": 0.6151082422434004, + "grad_norm": 0.9503151942531283, + "learning_rate": 4.873705050894934e-06, + "loss": 0.4701, + "step": 10101 + }, + { + "epoch": 0.6151691380202783, + "grad_norm": 0.9799644535407999, + "learning_rate": 4.8736800080526295e-06, + "loss": 0.4613, + "step": 10102 + }, + { + "epoch": 0.6152300337971561, + "grad_norm": 1.064244694968557, + "learning_rate": 4.873654962792069e-06, + "loss": 0.4018, + "step": 10103 + }, + { + "epoch": 0.6152909295740341, + "grad_norm": 1.0601721422870511, + "learning_rate": 4.873629915113278e-06, + "loss": 0.3977, + "step": 10104 + }, + { + "epoch": 0.6153518253509119, + "grad_norm": 0.9846736553266396, + "learning_rate": 4.873604865016282e-06, + "loss": 0.4077, + "step": 10105 + }, + { + "epoch": 0.6154127211277898, + "grad_norm": 0.9940763799695538, + "learning_rate": 4.873579812501107e-06, + "loss": 0.4156, + "step": 10106 + }, + { + "epoch": 0.6154736169046676, + "grad_norm": 0.9619182355032825, + "learning_rate": 4.873554757567778e-06, + "loss": 0.4309, + "step": 10107 + }, + { + "epoch": 0.6155345126815456, + "grad_norm": 1.0047438251519343, + "learning_rate": 4.873529700216321e-06, + "loss": 0.4288, + "step": 10108 + }, + { + "epoch": 0.6155954084584234, + "grad_norm": 1.029435538357581, + "learning_rate": 4.8735046404467615e-06, + "loss": 0.4074, + "step": 10109 + }, + { + "epoch": 0.6156563042353013, + "grad_norm": 0.9295633348294393, + "learning_rate": 4.873479578259125e-06, + "loss": 0.4514, + "step": 10110 + }, + { + "epoch": 0.6157172000121791, + "grad_norm": 1.0195950448858315, + "learning_rate": 4.8734545136534364e-06, + "loss": 0.4321, + "step": 10111 + }, + { + "epoch": 0.6157780957890571, + "grad_norm": 1.0329493219437111, + "learning_rate": 4.873429446629721e-06, + "loss": 0.4486, + "step": 10112 + }, + { + "epoch": 0.6158389915659349, + "grad_norm": 1.0588248887124778, + "learning_rate": 4.873404377188006e-06, + "loss": 0.4283, + "step": 10113 + }, + { + "epoch": 0.6158998873428128, + "grad_norm": 1.027970346149615, + "learning_rate": 4.8733793053283155e-06, + "loss": 0.482, + "step": 10114 + }, + { + "epoch": 0.6159607831196906, + "grad_norm": 1.0058183138861274, + "learning_rate": 4.873354231050676e-06, + "loss": 0.4351, + "step": 10115 + }, + { + "epoch": 0.6160216788965686, + "grad_norm": 1.0287588321371484, + "learning_rate": 4.873329154355112e-06, + "loss": 0.3792, + "step": 10116 + }, + { + "epoch": 0.6160825746734464, + "grad_norm": 0.9942092373143694, + "learning_rate": 4.873304075241649e-06, + "loss": 0.4596, + "step": 10117 + }, + { + "epoch": 0.6161434704503242, + "grad_norm": 0.9955053416729597, + "learning_rate": 4.873278993710315e-06, + "loss": 0.4579, + "step": 10118 + }, + { + "epoch": 0.6162043662272021, + "grad_norm": 0.9561670634464088, + "learning_rate": 4.873253909761132e-06, + "loss": 0.4187, + "step": 10119 + }, + { + "epoch": 0.61626526200408, + "grad_norm": 0.9475457246298538, + "learning_rate": 4.873228823394128e-06, + "loss": 0.4475, + "step": 10120 + }, + { + "epoch": 0.6163261577809579, + "grad_norm": 0.9777161154858452, + "learning_rate": 4.873203734609328e-06, + "loss": 0.3686, + "step": 10121 + }, + { + "epoch": 0.6163870535578357, + "grad_norm": 1.003818073770187, + "learning_rate": 4.873178643406757e-06, + "loss": 0.448, + "step": 10122 + }, + { + "epoch": 0.6164479493347136, + "grad_norm": 1.0536437382140798, + "learning_rate": 4.8731535497864414e-06, + "loss": 0.3979, + "step": 10123 + }, + { + "epoch": 0.6165088451115915, + "grad_norm": 1.0041398219702156, + "learning_rate": 4.873128453748406e-06, + "loss": 0.4281, + "step": 10124 + }, + { + "epoch": 0.6165697408884694, + "grad_norm": 1.1387350098131235, + "learning_rate": 4.8731033552926765e-06, + "loss": 0.3837, + "step": 10125 + }, + { + "epoch": 0.6166306366653472, + "grad_norm": 1.01587528395367, + "learning_rate": 4.873078254419279e-06, + "loss": 0.3816, + "step": 10126 + }, + { + "epoch": 0.6166915324422251, + "grad_norm": 1.0467830154380897, + "learning_rate": 4.873053151128238e-06, + "loss": 0.4491, + "step": 10127 + }, + { + "epoch": 0.616752428219103, + "grad_norm": 0.9774566723314404, + "learning_rate": 4.873028045419581e-06, + "loss": 0.4175, + "step": 10128 + }, + { + "epoch": 0.6168133239959809, + "grad_norm": 0.9330009159815903, + "learning_rate": 4.8730029372933315e-06, + "loss": 0.4943, + "step": 10129 + }, + { + "epoch": 0.6168742197728587, + "grad_norm": 0.9526000671629751, + "learning_rate": 4.872977826749515e-06, + "loss": 0.4155, + "step": 10130 + }, + { + "epoch": 0.6169351155497367, + "grad_norm": 0.9985920990217747, + "learning_rate": 4.8729527137881596e-06, + "loss": 0.4241, + "step": 10131 + }, + { + "epoch": 0.6169960113266145, + "grad_norm": 0.9416841660943072, + "learning_rate": 4.87292759840929e-06, + "loss": 0.4659, + "step": 10132 + }, + { + "epoch": 0.6170569071034924, + "grad_norm": 0.9830072387337656, + "learning_rate": 4.872902480612929e-06, + "loss": 0.3972, + "step": 10133 + }, + { + "epoch": 0.6171178028803702, + "grad_norm": 0.948112311128051, + "learning_rate": 4.872877360399105e-06, + "loss": 0.4607, + "step": 10134 + }, + { + "epoch": 0.6171786986572482, + "grad_norm": 0.9753997728334182, + "learning_rate": 4.872852237767844e-06, + "loss": 0.4862, + "step": 10135 + }, + { + "epoch": 0.617239594434126, + "grad_norm": 0.9724469783385365, + "learning_rate": 4.872827112719169e-06, + "loss": 0.4673, + "step": 10136 + }, + { + "epoch": 0.6173004902110039, + "grad_norm": 0.9993898395978047, + "learning_rate": 4.872801985253107e-06, + "loss": 0.4388, + "step": 10137 + }, + { + "epoch": 0.6173613859878817, + "grad_norm": 1.052562968727812, + "learning_rate": 4.872776855369685e-06, + "loss": 0.4206, + "step": 10138 + }, + { + "epoch": 0.6174222817647597, + "grad_norm": 0.9907245099210927, + "learning_rate": 4.872751723068926e-06, + "loss": 0.4192, + "step": 10139 + }, + { + "epoch": 0.6174831775416375, + "grad_norm": 1.0412712132563169, + "learning_rate": 4.872726588350858e-06, + "loss": 0.4534, + "step": 10140 + }, + { + "epoch": 0.6175440733185154, + "grad_norm": 1.1324079402341651, + "learning_rate": 4.872701451215505e-06, + "loss": 0.398, + "step": 10141 + }, + { + "epoch": 0.6176049690953932, + "grad_norm": 1.0984948457551813, + "learning_rate": 4.872676311662893e-06, + "loss": 0.4469, + "step": 10142 + }, + { + "epoch": 0.6176658648722712, + "grad_norm": 0.9535034469423387, + "learning_rate": 4.872651169693048e-06, + "loss": 0.5592, + "step": 10143 + }, + { + "epoch": 0.617726760649149, + "grad_norm": 1.0112075085106746, + "learning_rate": 4.8726260253059945e-06, + "loss": 0.4471, + "step": 10144 + }, + { + "epoch": 0.6177876564260268, + "grad_norm": 0.9957773759064715, + "learning_rate": 4.87260087850176e-06, + "loss": 0.4098, + "step": 10145 + }, + { + "epoch": 0.6178485522029047, + "grad_norm": 1.1193761101411752, + "learning_rate": 4.872575729280368e-06, + "loss": 0.4167, + "step": 10146 + }, + { + "epoch": 0.6179094479797826, + "grad_norm": 1.1094699653767133, + "learning_rate": 4.8725505776418455e-06, + "loss": 0.3988, + "step": 10147 + }, + { + "epoch": 0.6179703437566605, + "grad_norm": 1.0493418904856562, + "learning_rate": 4.872525423586219e-06, + "loss": 0.4754, + "step": 10148 + }, + { + "epoch": 0.6180312395335383, + "grad_norm": 1.0175700712064546, + "learning_rate": 4.8725002671135115e-06, + "loss": 0.4842, + "step": 10149 + }, + { + "epoch": 0.6180921353104162, + "grad_norm": 1.0201002402610972, + "learning_rate": 4.87247510822375e-06, + "loss": 0.4417, + "step": 10150 + }, + { + "epoch": 0.6181530310872941, + "grad_norm": 1.0614145487723157, + "learning_rate": 4.8724499469169604e-06, + "loss": 0.4043, + "step": 10151 + }, + { + "epoch": 0.618213926864172, + "grad_norm": 1.0579796421329977, + "learning_rate": 4.872424783193168e-06, + "loss": 0.3653, + "step": 10152 + }, + { + "epoch": 0.6182748226410498, + "grad_norm": 1.037405647140387, + "learning_rate": 4.8723996170524e-06, + "loss": 0.4532, + "step": 10153 + }, + { + "epoch": 0.6183357184179277, + "grad_norm": 1.036525704747745, + "learning_rate": 4.872374448494679e-06, + "loss": 0.4008, + "step": 10154 + }, + { + "epoch": 0.6183966141948056, + "grad_norm": 0.9761851675705131, + "learning_rate": 4.872349277520033e-06, + "loss": 0.4572, + "step": 10155 + }, + { + "epoch": 0.6184575099716835, + "grad_norm": 1.1042169548941942, + "learning_rate": 4.8723241041284865e-06, + "loss": 0.4536, + "step": 10156 + }, + { + "epoch": 0.6185184057485613, + "grad_norm": 0.9826540417354842, + "learning_rate": 4.872298928320066e-06, + "loss": 0.4335, + "step": 10157 + }, + { + "epoch": 0.6185793015254392, + "grad_norm": 0.9836212724974163, + "learning_rate": 4.8722737500947955e-06, + "loss": 0.4535, + "step": 10158 + }, + { + "epoch": 0.6186401973023171, + "grad_norm": 1.002854819084662, + "learning_rate": 4.8722485694527036e-06, + "loss": 0.4283, + "step": 10159 + }, + { + "epoch": 0.618701093079195, + "grad_norm": 0.9758789462510689, + "learning_rate": 4.872223386393813e-06, + "loss": 0.4658, + "step": 10160 + }, + { + "epoch": 0.6187619888560728, + "grad_norm": 1.0396513198355128, + "learning_rate": 4.872198200918151e-06, + "loss": 0.4733, + "step": 10161 + }, + { + "epoch": 0.6188228846329507, + "grad_norm": 1.008726053040934, + "learning_rate": 4.872173013025742e-06, + "loss": 0.4356, + "step": 10162 + }, + { + "epoch": 0.6188837804098286, + "grad_norm": 0.9817788412365361, + "learning_rate": 4.872147822716613e-06, + "loss": 0.385, + "step": 10163 + }, + { + "epoch": 0.6189446761867065, + "grad_norm": 1.058989083270772, + "learning_rate": 4.87212262999079e-06, + "loss": 0.3996, + "step": 10164 + }, + { + "epoch": 0.6190055719635843, + "grad_norm": 0.9519357692832644, + "learning_rate": 4.872097434848296e-06, + "loss": 0.4207, + "step": 10165 + }, + { + "epoch": 0.6190664677404621, + "grad_norm": 1.1275994939525418, + "learning_rate": 4.8720722372891596e-06, + "loss": 0.3509, + "step": 10166 + }, + { + "epoch": 0.6191273635173401, + "grad_norm": 0.9983710531618606, + "learning_rate": 4.872047037313405e-06, + "loss": 0.3858, + "step": 10167 + }, + { + "epoch": 0.619188259294218, + "grad_norm": 0.99480025893478, + "learning_rate": 4.872021834921059e-06, + "loss": 0.449, + "step": 10168 + }, + { + "epoch": 0.6192491550710958, + "grad_norm": 1.151367702590325, + "learning_rate": 4.8719966301121454e-06, + "loss": 0.4045, + "step": 10169 + }, + { + "epoch": 0.6193100508479737, + "grad_norm": 0.9313437442836764, + "learning_rate": 4.871971422886691e-06, + "loss": 0.4461, + "step": 10170 + }, + { + "epoch": 0.6193709466248516, + "grad_norm": 0.9447881511592159, + "learning_rate": 4.871946213244721e-06, + "loss": 0.4468, + "step": 10171 + }, + { + "epoch": 0.6194318424017294, + "grad_norm": 1.073973391771461, + "learning_rate": 4.871921001186263e-06, + "loss": 0.4179, + "step": 10172 + }, + { + "epoch": 0.6194927381786073, + "grad_norm": 0.9950331398771944, + "learning_rate": 4.8718957867113404e-06, + "loss": 0.4517, + "step": 10173 + }, + { + "epoch": 0.6195536339554852, + "grad_norm": 1.0266428531164578, + "learning_rate": 4.87187056981998e-06, + "loss": 0.4667, + "step": 10174 + }, + { + "epoch": 0.6196145297323631, + "grad_norm": 0.9751882751514573, + "learning_rate": 4.871845350512207e-06, + "loss": 0.3926, + "step": 10175 + }, + { + "epoch": 0.6196754255092409, + "grad_norm": 1.151709148298338, + "learning_rate": 4.871820128788047e-06, + "loss": 0.4337, + "step": 10176 + }, + { + "epoch": 0.6197363212861188, + "grad_norm": 1.0439446276229345, + "learning_rate": 4.871794904647526e-06, + "loss": 0.4332, + "step": 10177 + }, + { + "epoch": 0.6197972170629967, + "grad_norm": 1.0336091349664442, + "learning_rate": 4.87176967809067e-06, + "loss": 0.3652, + "step": 10178 + }, + { + "epoch": 0.6198581128398746, + "grad_norm": 1.0299520468431214, + "learning_rate": 4.871744449117504e-06, + "loss": 0.4288, + "step": 10179 + }, + { + "epoch": 0.6199190086167524, + "grad_norm": 1.068287550377862, + "learning_rate": 4.8717192177280545e-06, + "loss": 0.4021, + "step": 10180 + }, + { + "epoch": 0.6199799043936303, + "grad_norm": 1.0984712545475015, + "learning_rate": 4.871693983922346e-06, + "loss": 0.4151, + "step": 10181 + }, + { + "epoch": 0.6200408001705082, + "grad_norm": 1.0249343077239341, + "learning_rate": 4.871668747700405e-06, + "loss": 0.4659, + "step": 10182 + }, + { + "epoch": 0.6201016959473861, + "grad_norm": 1.0480632880346734, + "learning_rate": 4.871643509062258e-06, + "loss": 0.4222, + "step": 10183 + }, + { + "epoch": 0.6201625917242639, + "grad_norm": 1.105342234253779, + "learning_rate": 4.87161826800793e-06, + "loss": 0.4258, + "step": 10184 + }, + { + "epoch": 0.6202234875011418, + "grad_norm": 1.0458980069737756, + "learning_rate": 4.871593024537446e-06, + "loss": 0.4147, + "step": 10185 + }, + { + "epoch": 0.6202843832780197, + "grad_norm": 1.0270929650022815, + "learning_rate": 4.871567778650833e-06, + "loss": 0.4134, + "step": 10186 + }, + { + "epoch": 0.6203452790548976, + "grad_norm": 0.9877400695372828, + "learning_rate": 4.871542530348115e-06, + "loss": 0.4757, + "step": 10187 + }, + { + "epoch": 0.6204061748317754, + "grad_norm": 1.00355057179203, + "learning_rate": 4.87151727962932e-06, + "loss": 0.386, + "step": 10188 + }, + { + "epoch": 0.6204670706086532, + "grad_norm": 1.0494436550119168, + "learning_rate": 4.871492026494471e-06, + "loss": 0.3719, + "step": 10189 + }, + { + "epoch": 0.6205279663855312, + "grad_norm": 0.979873423603282, + "learning_rate": 4.871466770943597e-06, + "loss": 0.3948, + "step": 10190 + }, + { + "epoch": 0.620588862162409, + "grad_norm": 0.9683075108880842, + "learning_rate": 4.871441512976721e-06, + "loss": 0.393, + "step": 10191 + }, + { + "epoch": 0.6206497579392869, + "grad_norm": 0.9217320149346065, + "learning_rate": 4.871416252593869e-06, + "loss": 0.4902, + "step": 10192 + }, + { + "epoch": 0.6207106537161647, + "grad_norm": 1.0128653694934038, + "learning_rate": 4.871390989795068e-06, + "loss": 0.4292, + "step": 10193 + }, + { + "epoch": 0.6207715494930427, + "grad_norm": 0.9799055398199474, + "learning_rate": 4.871365724580344e-06, + "loss": 0.4051, + "step": 10194 + }, + { + "epoch": 0.6208324452699205, + "grad_norm": 1.1385211136001516, + "learning_rate": 4.871340456949721e-06, + "loss": 0.3952, + "step": 10195 + }, + { + "epoch": 0.6208933410467984, + "grad_norm": 1.007520135784371, + "learning_rate": 4.871315186903226e-06, + "loss": 0.4285, + "step": 10196 + }, + { + "epoch": 0.6209542368236762, + "grad_norm": 0.9138425440341805, + "learning_rate": 4.871289914440884e-06, + "loss": 0.4833, + "step": 10197 + }, + { + "epoch": 0.6210151326005542, + "grad_norm": 0.9629418320518208, + "learning_rate": 4.871264639562722e-06, + "loss": 0.4415, + "step": 10198 + }, + { + "epoch": 0.621076028377432, + "grad_norm": 1.0611352518464539, + "learning_rate": 4.871239362268764e-06, + "loss": 0.3795, + "step": 10199 + }, + { + "epoch": 0.6211369241543099, + "grad_norm": 1.0873942479632914, + "learning_rate": 4.871214082559037e-06, + "loss": 0.4016, + "step": 10200 + }, + { + "epoch": 0.6211978199311877, + "grad_norm": 0.9391409503612073, + "learning_rate": 4.871188800433566e-06, + "loss": 0.4873, + "step": 10201 + }, + { + "epoch": 0.6212587157080657, + "grad_norm": 0.9164751299207241, + "learning_rate": 4.871163515892378e-06, + "loss": 0.439, + "step": 10202 + }, + { + "epoch": 0.6213196114849435, + "grad_norm": 1.116679853319958, + "learning_rate": 4.871138228935497e-06, + "loss": 0.4239, + "step": 10203 + }, + { + "epoch": 0.6213805072618214, + "grad_norm": 1.0495819152560872, + "learning_rate": 4.871112939562949e-06, + "loss": 0.4744, + "step": 10204 + }, + { + "epoch": 0.6214414030386992, + "grad_norm": 0.9317719735277602, + "learning_rate": 4.871087647774762e-06, + "loss": 0.5191, + "step": 10205 + }, + { + "epoch": 0.6215022988155772, + "grad_norm": 0.9879788377795928, + "learning_rate": 4.87106235357096e-06, + "loss": 0.3991, + "step": 10206 + }, + { + "epoch": 0.621563194592455, + "grad_norm": 0.9315443922842102, + "learning_rate": 4.871037056951569e-06, + "loss": 0.4287, + "step": 10207 + }, + { + "epoch": 0.6216240903693329, + "grad_norm": 1.062690701780089, + "learning_rate": 4.871011757916614e-06, + "loss": 0.42, + "step": 10208 + }, + { + "epoch": 0.6216849861462107, + "grad_norm": 0.9520656158989858, + "learning_rate": 4.870986456466121e-06, + "loss": 0.4453, + "step": 10209 + }, + { + "epoch": 0.6217458819230887, + "grad_norm": 1.0328140227420068, + "learning_rate": 4.870961152600118e-06, + "loss": 0.4748, + "step": 10210 + }, + { + "epoch": 0.6218067776999665, + "grad_norm": 0.9195233331484847, + "learning_rate": 4.8709358463186276e-06, + "loss": 0.5052, + "step": 10211 + }, + { + "epoch": 0.6218676734768444, + "grad_norm": 0.9730935464393338, + "learning_rate": 4.870910537621678e-06, + "loss": 0.404, + "step": 10212 + }, + { + "epoch": 0.6219285692537223, + "grad_norm": 0.9257754802854459, + "learning_rate": 4.870885226509294e-06, + "loss": 0.5058, + "step": 10213 + }, + { + "epoch": 0.6219894650306002, + "grad_norm": 0.9757669905970598, + "learning_rate": 4.870859912981501e-06, + "loss": 0.5286, + "step": 10214 + }, + { + "epoch": 0.622050360807478, + "grad_norm": 0.9746952052177718, + "learning_rate": 4.870834597038325e-06, + "loss": 0.4992, + "step": 10215 + }, + { + "epoch": 0.6221112565843558, + "grad_norm": 1.050179048518058, + "learning_rate": 4.870809278679793e-06, + "loss": 0.4061, + "step": 10216 + }, + { + "epoch": 0.6221721523612338, + "grad_norm": 1.0479782582143147, + "learning_rate": 4.870783957905929e-06, + "loss": 0.4537, + "step": 10217 + }, + { + "epoch": 0.6222330481381116, + "grad_norm": 0.9428512621689263, + "learning_rate": 4.87075863471676e-06, + "loss": 0.4475, + "step": 10218 + }, + { + "epoch": 0.6222939439149895, + "grad_norm": 0.9549469471556176, + "learning_rate": 4.870733309112311e-06, + "loss": 0.4361, + "step": 10219 + }, + { + "epoch": 0.6223548396918673, + "grad_norm": 0.9579530958562986, + "learning_rate": 4.8707079810926085e-06, + "loss": 0.4781, + "step": 10220 + }, + { + "epoch": 0.6224157354687453, + "grad_norm": 0.9604557277638592, + "learning_rate": 4.870682650657678e-06, + "loss": 0.4402, + "step": 10221 + }, + { + "epoch": 0.6224766312456231, + "grad_norm": 0.9893430320393144, + "learning_rate": 4.870657317807544e-06, + "loss": 0.4995, + "step": 10222 + }, + { + "epoch": 0.622537527022501, + "grad_norm": 1.0076701405567001, + "learning_rate": 4.8706319825422355e-06, + "loss": 0.4437, + "step": 10223 + }, + { + "epoch": 0.6225984227993788, + "grad_norm": 1.0207206653356906, + "learning_rate": 4.870606644861776e-06, + "loss": 0.3947, + "step": 10224 + }, + { + "epoch": 0.6226593185762568, + "grad_norm": 1.027771891943847, + "learning_rate": 4.870581304766191e-06, + "loss": 0.3538, + "step": 10225 + }, + { + "epoch": 0.6227202143531346, + "grad_norm": 0.9833114843657974, + "learning_rate": 4.870555962255508e-06, + "loss": 0.4001, + "step": 10226 + }, + { + "epoch": 0.6227811101300125, + "grad_norm": 0.9434760067518807, + "learning_rate": 4.8705306173297506e-06, + "loss": 0.4453, + "step": 10227 + }, + { + "epoch": 0.6228420059068903, + "grad_norm": 1.0214021321568811, + "learning_rate": 4.870505269988946e-06, + "loss": 0.4316, + "step": 10228 + }, + { + "epoch": 0.6229029016837683, + "grad_norm": 1.0158314556289965, + "learning_rate": 4.870479920233121e-06, + "loss": 0.4656, + "step": 10229 + }, + { + "epoch": 0.6229637974606461, + "grad_norm": 1.0314236567344308, + "learning_rate": 4.870454568062301e-06, + "loss": 0.3562, + "step": 10230 + }, + { + "epoch": 0.623024693237524, + "grad_norm": 0.9743029154379005, + "learning_rate": 4.870429213476509e-06, + "loss": 0.4587, + "step": 10231 + }, + { + "epoch": 0.6230855890144018, + "grad_norm": 0.9774068393706826, + "learning_rate": 4.870403856475775e-06, + "loss": 0.4373, + "step": 10232 + }, + { + "epoch": 0.6231464847912798, + "grad_norm": 0.9550055783674218, + "learning_rate": 4.870378497060121e-06, + "loss": 0.4011, + "step": 10233 + }, + { + "epoch": 0.6232073805681576, + "grad_norm": 0.9659876273849167, + "learning_rate": 4.8703531352295755e-06, + "loss": 0.5239, + "step": 10234 + }, + { + "epoch": 0.6232682763450355, + "grad_norm": 1.0676658025678, + "learning_rate": 4.870327770984164e-06, + "loss": 0.4977, + "step": 10235 + }, + { + "epoch": 0.6233291721219133, + "grad_norm": 0.9171623772728816, + "learning_rate": 4.870302404323911e-06, + "loss": 0.4482, + "step": 10236 + }, + { + "epoch": 0.6233900678987913, + "grad_norm": 0.9696636547616743, + "learning_rate": 4.8702770352488435e-06, + "loss": 0.5066, + "step": 10237 + }, + { + "epoch": 0.6234509636756691, + "grad_norm": 0.9934457574819626, + "learning_rate": 4.8702516637589876e-06, + "loss": 0.5183, + "step": 10238 + }, + { + "epoch": 0.623511859452547, + "grad_norm": 1.0021215192152966, + "learning_rate": 4.870226289854369e-06, + "loss": 0.4934, + "step": 10239 + }, + { + "epoch": 0.6235727552294248, + "grad_norm": 0.9223575131994084, + "learning_rate": 4.870200913535011e-06, + "loss": 0.4329, + "step": 10240 + }, + { + "epoch": 0.6236336510063027, + "grad_norm": 0.9099486804507819, + "learning_rate": 4.8701755348009424e-06, + "loss": 0.4787, + "step": 10241 + }, + { + "epoch": 0.6236945467831806, + "grad_norm": 0.9587630479725446, + "learning_rate": 4.8701501536521894e-06, + "loss": 0.4084, + "step": 10242 + }, + { + "epoch": 0.6237554425600584, + "grad_norm": 0.9747359874343962, + "learning_rate": 4.870124770088776e-06, + "loss": 0.4918, + "step": 10243 + }, + { + "epoch": 0.6238163383369363, + "grad_norm": 0.9779551046521395, + "learning_rate": 4.870099384110729e-06, + "loss": 0.4768, + "step": 10244 + }, + { + "epoch": 0.6238772341138142, + "grad_norm": 0.9904580893853915, + "learning_rate": 4.870073995718073e-06, + "loss": 0.4601, + "step": 10245 + }, + { + "epoch": 0.6239381298906921, + "grad_norm": 0.9639910568367576, + "learning_rate": 4.870048604910836e-06, + "loss": 0.4697, + "step": 10246 + }, + { + "epoch": 0.6239990256675699, + "grad_norm": 1.0975303730449235, + "learning_rate": 4.870023211689042e-06, + "loss": 0.5234, + "step": 10247 + }, + { + "epoch": 0.6240599214444478, + "grad_norm": 0.9660923247202844, + "learning_rate": 4.869997816052718e-06, + "loss": 0.436, + "step": 10248 + }, + { + "epoch": 0.6241208172213257, + "grad_norm": 0.9287138759937723, + "learning_rate": 4.86997241800189e-06, + "loss": 0.4857, + "step": 10249 + }, + { + "epoch": 0.6241817129982036, + "grad_norm": 0.9902220226574756, + "learning_rate": 4.869947017536583e-06, + "loss": 0.4295, + "step": 10250 + }, + { + "epoch": 0.6242426087750814, + "grad_norm": 0.9490800038641688, + "learning_rate": 4.869921614656824e-06, + "loss": 0.4602, + "step": 10251 + }, + { + "epoch": 0.6243035045519594, + "grad_norm": 0.9879425657167527, + "learning_rate": 4.869896209362637e-06, + "loss": 0.406, + "step": 10252 + }, + { + "epoch": 0.6243644003288372, + "grad_norm": 1.0035636207884144, + "learning_rate": 4.869870801654048e-06, + "loss": 0.3914, + "step": 10253 + }, + { + "epoch": 0.6244252961057151, + "grad_norm": 1.04346727408117, + "learning_rate": 4.869845391531086e-06, + "loss": 0.4322, + "step": 10254 + }, + { + "epoch": 0.6244861918825929, + "grad_norm": 0.947991406251578, + "learning_rate": 4.8698199789937736e-06, + "loss": 0.4956, + "step": 10255 + }, + { + "epoch": 0.6245470876594709, + "grad_norm": 0.9961977500162436, + "learning_rate": 4.869794564042139e-06, + "loss": 0.5447, + "step": 10256 + }, + { + "epoch": 0.6246079834363487, + "grad_norm": 1.0437947154672151, + "learning_rate": 4.869769146676206e-06, + "loss": 0.4383, + "step": 10257 + }, + { + "epoch": 0.6246688792132266, + "grad_norm": 0.9632471355296727, + "learning_rate": 4.869743726896002e-06, + "loss": 0.4195, + "step": 10258 + }, + { + "epoch": 0.6247297749901044, + "grad_norm": 0.9913914300792533, + "learning_rate": 4.869718304701552e-06, + "loss": 0.4065, + "step": 10259 + }, + { + "epoch": 0.6247906707669824, + "grad_norm": 1.0888486043934331, + "learning_rate": 4.869692880092882e-06, + "loss": 0.4212, + "step": 10260 + }, + { + "epoch": 0.6248515665438602, + "grad_norm": 0.9593228423039306, + "learning_rate": 4.869667453070018e-06, + "loss": 0.4396, + "step": 10261 + }, + { + "epoch": 0.624912462320738, + "grad_norm": 0.9419087699230605, + "learning_rate": 4.869642023632987e-06, + "loss": 0.4439, + "step": 10262 + }, + { + "epoch": 0.6249733580976159, + "grad_norm": 1.0163031975885237, + "learning_rate": 4.869616591781814e-06, + "loss": 0.4586, + "step": 10263 + }, + { + "epoch": 0.6250342538744939, + "grad_norm": 0.9843515307108532, + "learning_rate": 4.869591157516525e-06, + "loss": 0.4386, + "step": 10264 + }, + { + "epoch": 0.6250951496513717, + "grad_norm": 0.9610888942925475, + "learning_rate": 4.869565720837144e-06, + "loss": 0.4866, + "step": 10265 + }, + { + "epoch": 0.6251560454282495, + "grad_norm": 1.109179210995592, + "learning_rate": 4.8695402817437e-06, + "loss": 0.4608, + "step": 10266 + }, + { + "epoch": 0.6252169412051274, + "grad_norm": 1.045382667123221, + "learning_rate": 4.869514840236218e-06, + "loss": 0.4292, + "step": 10267 + }, + { + "epoch": 0.6252778369820053, + "grad_norm": 1.0915773591237599, + "learning_rate": 4.8694893963147225e-06, + "loss": 0.4518, + "step": 10268 + }, + { + "epoch": 0.6253387327588832, + "grad_norm": 0.9804062527928352, + "learning_rate": 4.869463949979241e-06, + "loss": 0.4579, + "step": 10269 + }, + { + "epoch": 0.625399628535761, + "grad_norm": 1.0000678037297366, + "learning_rate": 4.869438501229799e-06, + "loss": 0.4024, + "step": 10270 + }, + { + "epoch": 0.6254605243126389, + "grad_norm": 1.0001661301373619, + "learning_rate": 4.869413050066423e-06, + "loss": 0.4782, + "step": 10271 + }, + { + "epoch": 0.6255214200895168, + "grad_norm": 0.989490962893769, + "learning_rate": 4.869387596489137e-06, + "loss": 0.4077, + "step": 10272 + }, + { + "epoch": 0.6255823158663947, + "grad_norm": 0.9883459892486186, + "learning_rate": 4.869362140497969e-06, + "loss": 0.3723, + "step": 10273 + }, + { + "epoch": 0.6256432116432725, + "grad_norm": 0.9185953526426247, + "learning_rate": 4.869336682092943e-06, + "loss": 0.4539, + "step": 10274 + }, + { + "epoch": 0.6257041074201504, + "grad_norm": 1.0940991371187578, + "learning_rate": 4.869311221274087e-06, + "loss": 0.4198, + "step": 10275 + }, + { + "epoch": 0.6257650031970283, + "grad_norm": 1.070659659711464, + "learning_rate": 4.869285758041426e-06, + "loss": 0.4208, + "step": 10276 + }, + { + "epoch": 0.6258258989739062, + "grad_norm": 1.0018811883136127, + "learning_rate": 4.869260292394986e-06, + "loss": 0.3889, + "step": 10277 + }, + { + "epoch": 0.625886794750784, + "grad_norm": 1.0242161628361364, + "learning_rate": 4.869234824334792e-06, + "loss": 0.428, + "step": 10278 + }, + { + "epoch": 0.6259476905276619, + "grad_norm": 0.9871521558900025, + "learning_rate": 4.869209353860872e-06, + "loss": 0.4536, + "step": 10279 + }, + { + "epoch": 0.6260085863045398, + "grad_norm": 0.9864308230038217, + "learning_rate": 4.86918388097325e-06, + "loss": 0.4628, + "step": 10280 + }, + { + "epoch": 0.6260694820814177, + "grad_norm": 0.9341239402486599, + "learning_rate": 4.8691584056719535e-06, + "loss": 0.4378, + "step": 10281 + }, + { + "epoch": 0.6261303778582955, + "grad_norm": 1.0443684876976718, + "learning_rate": 4.869132927957007e-06, + "loss": 0.4137, + "step": 10282 + }, + { + "epoch": 0.6261912736351734, + "grad_norm": 1.0371239589031196, + "learning_rate": 4.8691074478284365e-06, + "loss": 0.4683, + "step": 10283 + }, + { + "epoch": 0.6262521694120513, + "grad_norm": 0.9333097659616004, + "learning_rate": 4.869081965286269e-06, + "loss": 0.4453, + "step": 10284 + }, + { + "epoch": 0.6263130651889292, + "grad_norm": 0.9486207393755195, + "learning_rate": 4.869056480330531e-06, + "loss": 0.412, + "step": 10285 + }, + { + "epoch": 0.626373960965807, + "grad_norm": 0.9527330179298661, + "learning_rate": 4.869030992961247e-06, + "loss": 0.3832, + "step": 10286 + }, + { + "epoch": 0.6264348567426848, + "grad_norm": 1.0416300369126414, + "learning_rate": 4.869005503178443e-06, + "loss": 0.3885, + "step": 10287 + }, + { + "epoch": 0.6264957525195628, + "grad_norm": 0.9812553787637386, + "learning_rate": 4.868980010982146e-06, + "loss": 0.447, + "step": 10288 + }, + { + "epoch": 0.6265566482964406, + "grad_norm": 1.0124282736701922, + "learning_rate": 4.868954516372381e-06, + "loss": 0.4505, + "step": 10289 + }, + { + "epoch": 0.6266175440733185, + "grad_norm": 1.0591310245181251, + "learning_rate": 4.8689290193491745e-06, + "loss": 0.4731, + "step": 10290 + }, + { + "epoch": 0.6266784398501963, + "grad_norm": 1.0027442487594191, + "learning_rate": 4.8689035199125525e-06, + "loss": 0.4587, + "step": 10291 + }, + { + "epoch": 0.6267393356270743, + "grad_norm": 1.0351641140019445, + "learning_rate": 4.868878018062541e-06, + "loss": 0.4738, + "step": 10292 + }, + { + "epoch": 0.6268002314039521, + "grad_norm": 1.0343268976316977, + "learning_rate": 4.868852513799166e-06, + "loss": 0.4475, + "step": 10293 + }, + { + "epoch": 0.62686112718083, + "grad_norm": 1.0051988451878833, + "learning_rate": 4.868827007122452e-06, + "loss": 0.4857, + "step": 10294 + }, + { + "epoch": 0.6269220229577079, + "grad_norm": 1.0304327906116784, + "learning_rate": 4.868801498032428e-06, + "loss": 0.5078, + "step": 10295 + }, + { + "epoch": 0.6269829187345858, + "grad_norm": 1.163886925992294, + "learning_rate": 4.8687759865291176e-06, + "loss": 0.3962, + "step": 10296 + }, + { + "epoch": 0.6270438145114636, + "grad_norm": 0.8808721547796513, + "learning_rate": 4.868750472612547e-06, + "loss": 0.4407, + "step": 10297 + }, + { + "epoch": 0.6271047102883415, + "grad_norm": 1.052021619623573, + "learning_rate": 4.868724956282743e-06, + "loss": 0.3919, + "step": 10298 + }, + { + "epoch": 0.6271656060652194, + "grad_norm": 1.0316894575853717, + "learning_rate": 4.868699437539731e-06, + "loss": 0.4257, + "step": 10299 + }, + { + "epoch": 0.6272265018420973, + "grad_norm": 0.9593653012075105, + "learning_rate": 4.8686739163835385e-06, + "loss": 0.5019, + "step": 10300 + }, + { + "epoch": 0.6272873976189751, + "grad_norm": 0.9910091455442681, + "learning_rate": 4.868648392814189e-06, + "loss": 0.4444, + "step": 10301 + }, + { + "epoch": 0.627348293395853, + "grad_norm": 0.9709702305199369, + "learning_rate": 4.86862286683171e-06, + "loss": 0.4687, + "step": 10302 + }, + { + "epoch": 0.6274091891727309, + "grad_norm": 1.0914034192097404, + "learning_rate": 4.868597338436128e-06, + "loss": 0.432, + "step": 10303 + }, + { + "epoch": 0.6274700849496088, + "grad_norm": 1.0136828315517252, + "learning_rate": 4.868571807627467e-06, + "loss": 0.4447, + "step": 10304 + }, + { + "epoch": 0.6275309807264866, + "grad_norm": 0.9868803999179172, + "learning_rate": 4.868546274405755e-06, + "loss": 0.4572, + "step": 10305 + }, + { + "epoch": 0.6275918765033645, + "grad_norm": 1.040788968285088, + "learning_rate": 4.868520738771017e-06, + "loss": 0.3381, + "step": 10306 + }, + { + "epoch": 0.6276527722802424, + "grad_norm": 1.0243718655112801, + "learning_rate": 4.86849520072328e-06, + "loss": 0.4708, + "step": 10307 + }, + { + "epoch": 0.6277136680571203, + "grad_norm": 1.0602954869555883, + "learning_rate": 4.868469660262569e-06, + "loss": 0.4518, + "step": 10308 + }, + { + "epoch": 0.6277745638339981, + "grad_norm": 0.9259339382272751, + "learning_rate": 4.86844411738891e-06, + "loss": 0.5032, + "step": 10309 + }, + { + "epoch": 0.627835459610876, + "grad_norm": 0.9263734091290801, + "learning_rate": 4.868418572102329e-06, + "loss": 0.5028, + "step": 10310 + }, + { + "epoch": 0.6278963553877539, + "grad_norm": 1.0088398614260259, + "learning_rate": 4.868393024402853e-06, + "loss": 0.4745, + "step": 10311 + }, + { + "epoch": 0.6279572511646317, + "grad_norm": 0.9922638489816875, + "learning_rate": 4.868367474290508e-06, + "loss": 0.3784, + "step": 10312 + }, + { + "epoch": 0.6280181469415096, + "grad_norm": 0.8826703085072444, + "learning_rate": 4.868341921765319e-06, + "loss": 0.5368, + "step": 10313 + }, + { + "epoch": 0.6280790427183874, + "grad_norm": 1.0399312517483812, + "learning_rate": 4.868316366827312e-06, + "loss": 0.3852, + "step": 10314 + }, + { + "epoch": 0.6281399384952654, + "grad_norm": 1.0042102315927661, + "learning_rate": 4.868290809476514e-06, + "loss": 0.4609, + "step": 10315 + }, + { + "epoch": 0.6282008342721432, + "grad_norm": 0.9390160119227783, + "learning_rate": 4.86826524971295e-06, + "loss": 0.4167, + "step": 10316 + }, + { + "epoch": 0.6282617300490211, + "grad_norm": 0.9519302383968793, + "learning_rate": 4.868239687536648e-06, + "loss": 0.4481, + "step": 10317 + }, + { + "epoch": 0.6283226258258989, + "grad_norm": 0.9317436325253816, + "learning_rate": 4.868214122947631e-06, + "loss": 0.4388, + "step": 10318 + }, + { + "epoch": 0.6283835216027769, + "grad_norm": 0.9431254560786274, + "learning_rate": 4.868188555945928e-06, + "loss": 0.3953, + "step": 10319 + }, + { + "epoch": 0.6284444173796547, + "grad_norm": 1.0300891413974795, + "learning_rate": 4.868162986531563e-06, + "loss": 0.3755, + "step": 10320 + }, + { + "epoch": 0.6285053131565326, + "grad_norm": 1.0232167189662749, + "learning_rate": 4.868137414704563e-06, + "loss": 0.4746, + "step": 10321 + }, + { + "epoch": 0.6285662089334104, + "grad_norm": 0.9992797529766038, + "learning_rate": 4.868111840464954e-06, + "loss": 0.3881, + "step": 10322 + }, + { + "epoch": 0.6286271047102884, + "grad_norm": 1.1050026966756987, + "learning_rate": 4.868086263812761e-06, + "loss": 0.4576, + "step": 10323 + }, + { + "epoch": 0.6286880004871662, + "grad_norm": 1.027962526392509, + "learning_rate": 4.8680606847480115e-06, + "loss": 0.429, + "step": 10324 + }, + { + "epoch": 0.6287488962640441, + "grad_norm": 1.053925224447216, + "learning_rate": 4.868035103270732e-06, + "loss": 0.4113, + "step": 10325 + }, + { + "epoch": 0.6288097920409219, + "grad_norm": 1.0349376382510238, + "learning_rate": 4.8680095193809464e-06, + "loss": 0.422, + "step": 10326 + }, + { + "epoch": 0.6288706878177999, + "grad_norm": 1.0197236016247622, + "learning_rate": 4.867983933078682e-06, + "loss": 0.4642, + "step": 10327 + }, + { + "epoch": 0.6289315835946777, + "grad_norm": 1.0528818836790839, + "learning_rate": 4.867958344363965e-06, + "loss": 0.4046, + "step": 10328 + }, + { + "epoch": 0.6289924793715556, + "grad_norm": 1.0143250856587274, + "learning_rate": 4.867932753236821e-06, + "loss": 0.3971, + "step": 10329 + }, + { + "epoch": 0.6290533751484334, + "grad_norm": 1.125569929349754, + "learning_rate": 4.867907159697277e-06, + "loss": 0.4315, + "step": 10330 + }, + { + "epoch": 0.6291142709253114, + "grad_norm": 1.008485078782145, + "learning_rate": 4.867881563745358e-06, + "loss": 0.4312, + "step": 10331 + }, + { + "epoch": 0.6291751667021892, + "grad_norm": 1.0181781524421272, + "learning_rate": 4.86785596538109e-06, + "loss": 0.4734, + "step": 10332 + }, + { + "epoch": 0.629236062479067, + "grad_norm": 1.0861470976213885, + "learning_rate": 4.8678303646045e-06, + "loss": 0.3935, + "step": 10333 + }, + { + "epoch": 0.629296958255945, + "grad_norm": 0.9667465532740931, + "learning_rate": 4.8678047614156145e-06, + "loss": 0.4126, + "step": 10334 + }, + { + "epoch": 0.6293578540328228, + "grad_norm": 0.9346300730634888, + "learning_rate": 4.867779155814458e-06, + "loss": 0.5151, + "step": 10335 + }, + { + "epoch": 0.6294187498097007, + "grad_norm": 0.9915396854195064, + "learning_rate": 4.867753547801057e-06, + "loss": 0.5089, + "step": 10336 + }, + { + "epoch": 0.6294796455865785, + "grad_norm": 0.9698646356642212, + "learning_rate": 4.867727937375438e-06, + "loss": 0.4603, + "step": 10337 + }, + { + "epoch": 0.6295405413634565, + "grad_norm": 0.9284116964876804, + "learning_rate": 4.8677023245376274e-06, + "loss": 0.5023, + "step": 10338 + }, + { + "epoch": 0.6296014371403343, + "grad_norm": 0.9991763071209717, + "learning_rate": 4.867676709287651e-06, + "loss": 0.4599, + "step": 10339 + }, + { + "epoch": 0.6296623329172122, + "grad_norm": 1.0823420020410905, + "learning_rate": 4.867651091625534e-06, + "loss": 0.4232, + "step": 10340 + }, + { + "epoch": 0.62972322869409, + "grad_norm": 1.1081754149149037, + "learning_rate": 4.867625471551304e-06, + "loss": 0.4264, + "step": 10341 + }, + { + "epoch": 0.629784124470968, + "grad_norm": 1.0117311587638225, + "learning_rate": 4.8675998490649865e-06, + "loss": 0.3885, + "step": 10342 + }, + { + "epoch": 0.6298450202478458, + "grad_norm": 0.9576106353183063, + "learning_rate": 4.867574224166607e-06, + "loss": 0.4214, + "step": 10343 + }, + { + "epoch": 0.6299059160247237, + "grad_norm": 1.00169930990106, + "learning_rate": 4.867548596856193e-06, + "loss": 0.3963, + "step": 10344 + }, + { + "epoch": 0.6299668118016015, + "grad_norm": 1.0768588027076964, + "learning_rate": 4.867522967133769e-06, + "loss": 0.3859, + "step": 10345 + }, + { + "epoch": 0.6300277075784795, + "grad_norm": 0.9792156813685233, + "learning_rate": 4.867497334999362e-06, + "loss": 0.4183, + "step": 10346 + }, + { + "epoch": 0.6300886033553573, + "grad_norm": 0.9368754751024237, + "learning_rate": 4.867471700452997e-06, + "loss": 0.4047, + "step": 10347 + }, + { + "epoch": 0.6301494991322352, + "grad_norm": 1.083599759370685, + "learning_rate": 4.867446063494702e-06, + "loss": 0.407, + "step": 10348 + }, + { + "epoch": 0.630210394909113, + "grad_norm": 1.0031696896890923, + "learning_rate": 4.867420424124502e-06, + "loss": 0.4101, + "step": 10349 + }, + { + "epoch": 0.630271290685991, + "grad_norm": 1.0906045300419116, + "learning_rate": 4.8673947823424225e-06, + "loss": 0.4176, + "step": 10350 + }, + { + "epoch": 0.6303321864628688, + "grad_norm": 1.0857660602392667, + "learning_rate": 4.867369138148492e-06, + "loss": 0.4181, + "step": 10351 + }, + { + "epoch": 0.6303930822397467, + "grad_norm": 1.06322870834352, + "learning_rate": 4.867343491542734e-06, + "loss": 0.4548, + "step": 10352 + }, + { + "epoch": 0.6304539780166245, + "grad_norm": 1.1164840864987193, + "learning_rate": 4.867317842525176e-06, + "loss": 0.3688, + "step": 10353 + }, + { + "epoch": 0.6305148737935025, + "grad_norm": 0.9935924316303018, + "learning_rate": 4.867292191095844e-06, + "loss": 0.4944, + "step": 10354 + }, + { + "epoch": 0.6305757695703803, + "grad_norm": 1.0181999237590895, + "learning_rate": 4.867266537254763e-06, + "loss": 0.444, + "step": 10355 + }, + { + "epoch": 0.6306366653472582, + "grad_norm": 0.9512887760142956, + "learning_rate": 4.867240881001961e-06, + "loss": 0.5163, + "step": 10356 + }, + { + "epoch": 0.630697561124136, + "grad_norm": 0.9584702762179481, + "learning_rate": 4.867215222337463e-06, + "loss": 0.4537, + "step": 10357 + }, + { + "epoch": 0.630758456901014, + "grad_norm": 0.9714419967360499, + "learning_rate": 4.867189561261296e-06, + "loss": 0.4191, + "step": 10358 + }, + { + "epoch": 0.6308193526778918, + "grad_norm": 1.0075217881778118, + "learning_rate": 4.867163897773484e-06, + "loss": 0.4008, + "step": 10359 + }, + { + "epoch": 0.6308802484547696, + "grad_norm": 0.9051657412018207, + "learning_rate": 4.867138231874056e-06, + "loss": 0.4979, + "step": 10360 + }, + { + "epoch": 0.6309411442316475, + "grad_norm": 0.970971248413331, + "learning_rate": 4.867112563563036e-06, + "loss": 0.4032, + "step": 10361 + }, + { + "epoch": 0.6310020400085254, + "grad_norm": 1.0641582623070573, + "learning_rate": 4.8670868928404505e-06, + "loss": 0.435, + "step": 10362 + }, + { + "epoch": 0.6310629357854033, + "grad_norm": 0.9799487570146793, + "learning_rate": 4.867061219706327e-06, + "loss": 0.4473, + "step": 10363 + }, + { + "epoch": 0.6311238315622811, + "grad_norm": 0.9572394943389859, + "learning_rate": 4.867035544160691e-06, + "loss": 0.4188, + "step": 10364 + }, + { + "epoch": 0.631184727339159, + "grad_norm": 1.0073979709994954, + "learning_rate": 4.867009866203567e-06, + "loss": 0.3409, + "step": 10365 + }, + { + "epoch": 0.6312456231160369, + "grad_norm": 0.9264839483902201, + "learning_rate": 4.866984185834984e-06, + "loss": 0.4864, + "step": 10366 + }, + { + "epoch": 0.6313065188929148, + "grad_norm": 1.0610665501953758, + "learning_rate": 4.866958503054966e-06, + "loss": 0.3405, + "step": 10367 + }, + { + "epoch": 0.6313674146697926, + "grad_norm": 1.0286861854513558, + "learning_rate": 4.86693281786354e-06, + "loss": 0.4438, + "step": 10368 + }, + { + "epoch": 0.6314283104466705, + "grad_norm": 0.9848950229339616, + "learning_rate": 4.866907130260732e-06, + "loss": 0.4729, + "step": 10369 + }, + { + "epoch": 0.6314892062235484, + "grad_norm": 1.1184190544570647, + "learning_rate": 4.866881440246568e-06, + "loss": 0.3455, + "step": 10370 + }, + { + "epoch": 0.6315501020004263, + "grad_norm": 0.986603598896068, + "learning_rate": 4.866855747821075e-06, + "loss": 0.4172, + "step": 10371 + }, + { + "epoch": 0.6316109977773041, + "grad_norm": 1.0490491784683118, + "learning_rate": 4.8668300529842784e-06, + "loss": 0.3916, + "step": 10372 + }, + { + "epoch": 0.631671893554182, + "grad_norm": 1.061059763888569, + "learning_rate": 4.866804355736204e-06, + "loss": 0.463, + "step": 10373 + }, + { + "epoch": 0.6317327893310599, + "grad_norm": 1.1465970666386112, + "learning_rate": 4.8667786560768795e-06, + "loss": 0.3692, + "step": 10374 + }, + { + "epoch": 0.6317936851079378, + "grad_norm": 1.0469773336270207, + "learning_rate": 4.86675295400633e-06, + "loss": 0.355, + "step": 10375 + }, + { + "epoch": 0.6318545808848156, + "grad_norm": 0.9872897562716081, + "learning_rate": 4.866727249524581e-06, + "loss": 0.4576, + "step": 10376 + }, + { + "epoch": 0.6319154766616936, + "grad_norm": 0.9785481132390957, + "learning_rate": 4.86670154263166e-06, + "loss": 0.4282, + "step": 10377 + }, + { + "epoch": 0.6319763724385714, + "grad_norm": 1.0171098370653024, + "learning_rate": 4.866675833327592e-06, + "loss": 0.4568, + "step": 10378 + }, + { + "epoch": 0.6320372682154493, + "grad_norm": 1.0065766066074258, + "learning_rate": 4.866650121612404e-06, + "loss": 0.4242, + "step": 10379 + }, + { + "epoch": 0.6320981639923271, + "grad_norm": 0.9487094926100337, + "learning_rate": 4.866624407486123e-06, + "loss": 0.4323, + "step": 10380 + }, + { + "epoch": 0.6321590597692051, + "grad_norm": 0.8814952523501804, + "learning_rate": 4.866598690948774e-06, + "loss": 0.5025, + "step": 10381 + }, + { + "epoch": 0.6322199555460829, + "grad_norm": 1.0585946882182165, + "learning_rate": 4.866572972000383e-06, + "loss": 0.4142, + "step": 10382 + }, + { + "epoch": 0.6322808513229607, + "grad_norm": 0.9901589004998231, + "learning_rate": 4.866547250640976e-06, + "loss": 0.4976, + "step": 10383 + }, + { + "epoch": 0.6323417470998386, + "grad_norm": 1.026224585354919, + "learning_rate": 4.866521526870582e-06, + "loss": 0.4021, + "step": 10384 + }, + { + "epoch": 0.6324026428767165, + "grad_norm": 1.0284015762306626, + "learning_rate": 4.866495800689223e-06, + "loss": 0.3709, + "step": 10385 + }, + { + "epoch": 0.6324635386535944, + "grad_norm": 0.9404620390291032, + "learning_rate": 4.866470072096928e-06, + "loss": 0.4592, + "step": 10386 + }, + { + "epoch": 0.6325244344304722, + "grad_norm": 1.0290091053872912, + "learning_rate": 4.866444341093722e-06, + "loss": 0.4204, + "step": 10387 + }, + { + "epoch": 0.6325853302073501, + "grad_norm": 1.0228651313565837, + "learning_rate": 4.866418607679633e-06, + "loss": 0.3761, + "step": 10388 + }, + { + "epoch": 0.632646225984228, + "grad_norm": 0.9895507001675763, + "learning_rate": 4.866392871854685e-06, + "loss": 0.4451, + "step": 10389 + }, + { + "epoch": 0.6327071217611059, + "grad_norm": 1.0480704379991768, + "learning_rate": 4.866367133618905e-06, + "loss": 0.4194, + "step": 10390 + }, + { + "epoch": 0.6327680175379837, + "grad_norm": 1.069524341083823, + "learning_rate": 4.86634139297232e-06, + "loss": 0.4399, + "step": 10391 + }, + { + "epoch": 0.6328289133148616, + "grad_norm": 1.0004671145429824, + "learning_rate": 4.866315649914955e-06, + "loss": 0.4248, + "step": 10392 + }, + { + "epoch": 0.6328898090917395, + "grad_norm": 0.9511201512994187, + "learning_rate": 4.866289904446837e-06, + "loss": 0.4937, + "step": 10393 + }, + { + "epoch": 0.6329507048686174, + "grad_norm": 0.9678206646715661, + "learning_rate": 4.866264156567992e-06, + "loss": 0.4592, + "step": 10394 + }, + { + "epoch": 0.6330116006454952, + "grad_norm": 1.0112319382814867, + "learning_rate": 4.866238406278446e-06, + "loss": 0.3971, + "step": 10395 + }, + { + "epoch": 0.6330724964223731, + "grad_norm": 0.988487743498981, + "learning_rate": 4.866212653578226e-06, + "loss": 0.4293, + "step": 10396 + }, + { + "epoch": 0.633133392199251, + "grad_norm": 0.990583525526602, + "learning_rate": 4.866186898467358e-06, + "loss": 0.4452, + "step": 10397 + }, + { + "epoch": 0.6331942879761289, + "grad_norm": 1.04108510065041, + "learning_rate": 4.8661611409458675e-06, + "loss": 0.4532, + "step": 10398 + }, + { + "epoch": 0.6332551837530067, + "grad_norm": 1.0487171351137403, + "learning_rate": 4.8661353810137814e-06, + "loss": 0.4786, + "step": 10399 + }, + { + "epoch": 0.6333160795298846, + "grad_norm": 1.056617641764981, + "learning_rate": 4.866109618671125e-06, + "loss": 0.4614, + "step": 10400 + }, + { + "epoch": 0.6333769753067625, + "grad_norm": 0.9258110298698121, + "learning_rate": 4.866083853917927e-06, + "loss": 0.4297, + "step": 10401 + }, + { + "epoch": 0.6334378710836404, + "grad_norm": 1.001745905748793, + "learning_rate": 4.8660580867542105e-06, + "loss": 0.4345, + "step": 10402 + }, + { + "epoch": 0.6334987668605182, + "grad_norm": 0.9449175046521987, + "learning_rate": 4.866032317180004e-06, + "loss": 0.44, + "step": 10403 + }, + { + "epoch": 0.633559662637396, + "grad_norm": 0.9177540021195816, + "learning_rate": 4.866006545195332e-06, + "loss": 0.4955, + "step": 10404 + }, + { + "epoch": 0.633620558414274, + "grad_norm": 1.0347955811938176, + "learning_rate": 4.8659807708002225e-06, + "loss": 0.4572, + "step": 10405 + }, + { + "epoch": 0.6336814541911518, + "grad_norm": 0.9250418558781744, + "learning_rate": 4.865954993994701e-06, + "loss": 0.4125, + "step": 10406 + }, + { + "epoch": 0.6337423499680297, + "grad_norm": 0.9298633895851703, + "learning_rate": 4.865929214778794e-06, + "loss": 0.4216, + "step": 10407 + }, + { + "epoch": 0.6338032457449075, + "grad_norm": 1.0559698114080889, + "learning_rate": 4.865903433152526e-06, + "loss": 0.4234, + "step": 10408 + }, + { + "epoch": 0.6338641415217855, + "grad_norm": 1.0093914077421455, + "learning_rate": 4.865877649115927e-06, + "loss": 0.4226, + "step": 10409 + }, + { + "epoch": 0.6339250372986633, + "grad_norm": 1.018304970590069, + "learning_rate": 4.865851862669019e-06, + "loss": 0.3963, + "step": 10410 + }, + { + "epoch": 0.6339859330755412, + "grad_norm": 0.9662757945931353, + "learning_rate": 4.865826073811831e-06, + "loss": 0.5033, + "step": 10411 + }, + { + "epoch": 0.634046828852419, + "grad_norm": 0.9915797070060142, + "learning_rate": 4.86580028254439e-06, + "loss": 0.4553, + "step": 10412 + }, + { + "epoch": 0.634107724629297, + "grad_norm": 1.000482018890975, + "learning_rate": 4.865774488866719e-06, + "loss": 0.4384, + "step": 10413 + }, + { + "epoch": 0.6341686204061748, + "grad_norm": 1.062829526118386, + "learning_rate": 4.865748692778847e-06, + "loss": 0.4212, + "step": 10414 + }, + { + "epoch": 0.6342295161830527, + "grad_norm": 1.0395414696728766, + "learning_rate": 4.865722894280799e-06, + "loss": 0.425, + "step": 10415 + }, + { + "epoch": 0.6342904119599306, + "grad_norm": 0.9746239192455095, + "learning_rate": 4.865697093372602e-06, + "loss": 0.4403, + "step": 10416 + }, + { + "epoch": 0.6343513077368085, + "grad_norm": 0.9888622954599039, + "learning_rate": 4.865671290054282e-06, + "loss": 0.3983, + "step": 10417 + }, + { + "epoch": 0.6344122035136863, + "grad_norm": 0.990825233105617, + "learning_rate": 4.865645484325865e-06, + "loss": 0.3962, + "step": 10418 + }, + { + "epoch": 0.6344730992905642, + "grad_norm": 1.0632040549572872, + "learning_rate": 4.8656196761873775e-06, + "loss": 0.4639, + "step": 10419 + }, + { + "epoch": 0.6345339950674421, + "grad_norm": 0.9723892408940062, + "learning_rate": 4.865593865638846e-06, + "loss": 0.4775, + "step": 10420 + }, + { + "epoch": 0.63459489084432, + "grad_norm": 1.0794100565685762, + "learning_rate": 4.865568052680297e-06, + "loss": 0.4715, + "step": 10421 + }, + { + "epoch": 0.6346557866211978, + "grad_norm": 0.948410454375544, + "learning_rate": 4.8655422373117565e-06, + "loss": 0.4661, + "step": 10422 + }, + { + "epoch": 0.6347166823980757, + "grad_norm": 1.078001186343749, + "learning_rate": 4.86551641953325e-06, + "loss": 0.4972, + "step": 10423 + }, + { + "epoch": 0.6347775781749536, + "grad_norm": 1.0325806571228857, + "learning_rate": 4.865490599344806e-06, + "loss": 0.4478, + "step": 10424 + }, + { + "epoch": 0.6348384739518315, + "grad_norm": 0.9707627448046251, + "learning_rate": 4.865464776746447e-06, + "loss": 0.4995, + "step": 10425 + }, + { + "epoch": 0.6348993697287093, + "grad_norm": 1.0618554926364447, + "learning_rate": 4.865438951738203e-06, + "loss": 0.373, + "step": 10426 + }, + { + "epoch": 0.6349602655055872, + "grad_norm": 0.9709627936456289, + "learning_rate": 4.8654131243200995e-06, + "loss": 0.421, + "step": 10427 + }, + { + "epoch": 0.6350211612824651, + "grad_norm": 0.977268671419569, + "learning_rate": 4.865387294492162e-06, + "loss": 0.4009, + "step": 10428 + }, + { + "epoch": 0.635082057059343, + "grad_norm": 0.9416859720000199, + "learning_rate": 4.865361462254417e-06, + "loss": 0.4373, + "step": 10429 + }, + { + "epoch": 0.6351429528362208, + "grad_norm": 1.0044485209194258, + "learning_rate": 4.86533562760689e-06, + "loss": 0.3666, + "step": 10430 + }, + { + "epoch": 0.6352038486130986, + "grad_norm": 1.1020967439348468, + "learning_rate": 4.865309790549609e-06, + "loss": 0.485, + "step": 10431 + }, + { + "epoch": 0.6352647443899766, + "grad_norm": 1.0211863662260419, + "learning_rate": 4.8652839510826e-06, + "loss": 0.4517, + "step": 10432 + }, + { + "epoch": 0.6353256401668544, + "grad_norm": 0.954149315279896, + "learning_rate": 4.8652581092058885e-06, + "loss": 0.4973, + "step": 10433 + }, + { + "epoch": 0.6353865359437323, + "grad_norm": 1.0958206034141407, + "learning_rate": 4.8652322649195014e-06, + "loss": 0.4645, + "step": 10434 + }, + { + "epoch": 0.6354474317206101, + "grad_norm": 1.02882679985465, + "learning_rate": 4.865206418223464e-06, + "loss": 0.3873, + "step": 10435 + }, + { + "epoch": 0.6355083274974881, + "grad_norm": 1.2358700040954418, + "learning_rate": 4.865180569117804e-06, + "loss": 0.4279, + "step": 10436 + }, + { + "epoch": 0.6355692232743659, + "grad_norm": 1.1213259082953315, + "learning_rate": 4.8651547176025475e-06, + "loss": 0.4436, + "step": 10437 + }, + { + "epoch": 0.6356301190512438, + "grad_norm": 1.0519235979219017, + "learning_rate": 4.865128863677721e-06, + "loss": 0.4003, + "step": 10438 + }, + { + "epoch": 0.6356910148281216, + "grad_norm": 1.0050243065666382, + "learning_rate": 4.865103007343349e-06, + "loss": 0.4481, + "step": 10439 + }, + { + "epoch": 0.6357519106049996, + "grad_norm": 1.009382582486942, + "learning_rate": 4.86507714859946e-06, + "loss": 0.4641, + "step": 10440 + }, + { + "epoch": 0.6358128063818774, + "grad_norm": 0.996463376409283, + "learning_rate": 4.86505128744608e-06, + "loss": 0.422, + "step": 10441 + }, + { + "epoch": 0.6358737021587553, + "grad_norm": 0.9745785872369009, + "learning_rate": 4.865025423883234e-06, + "loss": 0.4199, + "step": 10442 + }, + { + "epoch": 0.6359345979356331, + "grad_norm": 1.0147453141278968, + "learning_rate": 4.86499955791095e-06, + "loss": 0.4168, + "step": 10443 + }, + { + "epoch": 0.6359954937125111, + "grad_norm": 0.9325475071077306, + "learning_rate": 4.864973689529253e-06, + "loss": 0.4289, + "step": 10444 + }, + { + "epoch": 0.6360563894893889, + "grad_norm": 1.0018704710709703, + "learning_rate": 4.864947818738171e-06, + "loss": 0.4676, + "step": 10445 + }, + { + "epoch": 0.6361172852662668, + "grad_norm": 1.0734788629625402, + "learning_rate": 4.864921945537728e-06, + "loss": 0.4132, + "step": 10446 + }, + { + "epoch": 0.6361781810431446, + "grad_norm": 0.9208616807170211, + "learning_rate": 4.864896069927952e-06, + "loss": 0.4535, + "step": 10447 + }, + { + "epoch": 0.6362390768200226, + "grad_norm": 0.980701249489754, + "learning_rate": 4.86487019190887e-06, + "loss": 0.4209, + "step": 10448 + }, + { + "epoch": 0.6362999725969004, + "grad_norm": 1.0403130677307426, + "learning_rate": 4.8648443114805064e-06, + "loss": 0.3992, + "step": 10449 + }, + { + "epoch": 0.6363608683737783, + "grad_norm": 0.9781101816548462, + "learning_rate": 4.8648184286428895e-06, + "loss": 0.441, + "step": 10450 + }, + { + "epoch": 0.6364217641506561, + "grad_norm": 1.0494607214084335, + "learning_rate": 4.864792543396044e-06, + "loss": 0.3969, + "step": 10451 + }, + { + "epoch": 0.6364826599275341, + "grad_norm": 1.0530152240878847, + "learning_rate": 4.864766655739998e-06, + "loss": 0.4273, + "step": 10452 + }, + { + "epoch": 0.6365435557044119, + "grad_norm": 1.0453758724079434, + "learning_rate": 4.864740765674776e-06, + "loss": 0.4045, + "step": 10453 + }, + { + "epoch": 0.6366044514812897, + "grad_norm": 1.0419257755097646, + "learning_rate": 4.864714873200405e-06, + "loss": 0.3839, + "step": 10454 + }, + { + "epoch": 0.6366653472581676, + "grad_norm": 1.0373802824504132, + "learning_rate": 4.864688978316913e-06, + "loss": 0.399, + "step": 10455 + }, + { + "epoch": 0.6367262430350455, + "grad_norm": 0.9097512226659265, + "learning_rate": 4.864663081024323e-06, + "loss": 0.4811, + "step": 10456 + }, + { + "epoch": 0.6367871388119234, + "grad_norm": 1.0025578587184372, + "learning_rate": 4.864637181322665e-06, + "loss": 0.3863, + "step": 10457 + }, + { + "epoch": 0.6368480345888012, + "grad_norm": 1.0362047497149913, + "learning_rate": 4.864611279211964e-06, + "loss": 0.4545, + "step": 10458 + }, + { + "epoch": 0.6369089303656792, + "grad_norm": 1.0449944079854985, + "learning_rate": 4.864585374692244e-06, + "loss": 0.4733, + "step": 10459 + }, + { + "epoch": 0.636969826142557, + "grad_norm": 1.0611132116099582, + "learning_rate": 4.864559467763536e-06, + "loss": 0.4156, + "step": 10460 + }, + { + "epoch": 0.6370307219194349, + "grad_norm": 1.093915622937792, + "learning_rate": 4.864533558425863e-06, + "loss": 0.4426, + "step": 10461 + }, + { + "epoch": 0.6370916176963127, + "grad_norm": 1.038342138092537, + "learning_rate": 4.864507646679253e-06, + "loss": 0.45, + "step": 10462 + }, + { + "epoch": 0.6371525134731907, + "grad_norm": 0.9423435212600103, + "learning_rate": 4.864481732523731e-06, + "loss": 0.4923, + "step": 10463 + }, + { + "epoch": 0.6372134092500685, + "grad_norm": 0.9531322048791934, + "learning_rate": 4.864455815959324e-06, + "loss": 0.4424, + "step": 10464 + }, + { + "epoch": 0.6372743050269464, + "grad_norm": 1.0481643282078918, + "learning_rate": 4.864429896986059e-06, + "loss": 0.4, + "step": 10465 + }, + { + "epoch": 0.6373352008038242, + "grad_norm": 0.9356627998225571, + "learning_rate": 4.864403975603962e-06, + "loss": 0.497, + "step": 10466 + }, + { + "epoch": 0.6373960965807022, + "grad_norm": 1.00232441651331, + "learning_rate": 4.864378051813059e-06, + "loss": 0.447, + "step": 10467 + }, + { + "epoch": 0.63745699235758, + "grad_norm": 1.025285718208498, + "learning_rate": 4.8643521256133775e-06, + "loss": 0.3876, + "step": 10468 + }, + { + "epoch": 0.6375178881344579, + "grad_norm": 1.0295592385620431, + "learning_rate": 4.864326197004943e-06, + "loss": 0.4607, + "step": 10469 + }, + { + "epoch": 0.6375787839113357, + "grad_norm": 0.9319293605517959, + "learning_rate": 4.864300265987782e-06, + "loss": 0.4286, + "step": 10470 + }, + { + "epoch": 0.6376396796882137, + "grad_norm": 1.0364564194487667, + "learning_rate": 4.8642743325619215e-06, + "loss": 0.3772, + "step": 10471 + }, + { + "epoch": 0.6377005754650915, + "grad_norm": 1.0282074682914495, + "learning_rate": 4.864248396727386e-06, + "loss": 0.417, + "step": 10472 + }, + { + "epoch": 0.6377614712419694, + "grad_norm": 1.0351136540690913, + "learning_rate": 4.864222458484205e-06, + "loss": 0.4826, + "step": 10473 + }, + { + "epoch": 0.6378223670188472, + "grad_norm": 0.9774776931342042, + "learning_rate": 4.864196517832402e-06, + "loss": 0.37, + "step": 10474 + }, + { + "epoch": 0.6378832627957252, + "grad_norm": 0.9833472212798267, + "learning_rate": 4.8641705747720055e-06, + "loss": 0.5041, + "step": 10475 + }, + { + "epoch": 0.637944158572603, + "grad_norm": 1.0325380460680846, + "learning_rate": 4.864144629303041e-06, + "loss": 0.4451, + "step": 10476 + }, + { + "epoch": 0.6380050543494808, + "grad_norm": 1.0752380402093091, + "learning_rate": 4.864118681425536e-06, + "loss": 0.4375, + "step": 10477 + }, + { + "epoch": 0.6380659501263587, + "grad_norm": 0.9680681917646777, + "learning_rate": 4.8640927311395145e-06, + "loss": 0.3944, + "step": 10478 + }, + { + "epoch": 0.6381268459032367, + "grad_norm": 0.9378440530928487, + "learning_rate": 4.864066778445006e-06, + "loss": 0.4269, + "step": 10479 + }, + { + "epoch": 0.6381877416801145, + "grad_norm": 1.0771138770301083, + "learning_rate": 4.864040823342034e-06, + "loss": 0.4458, + "step": 10480 + }, + { + "epoch": 0.6382486374569923, + "grad_norm": 1.0400747296656099, + "learning_rate": 4.864014865830627e-06, + "loss": 0.4278, + "step": 10481 + }, + { + "epoch": 0.6383095332338702, + "grad_norm": 0.9582303797171784, + "learning_rate": 4.8639889059108105e-06, + "loss": 0.4361, + "step": 10482 + }, + { + "epoch": 0.6383704290107481, + "grad_norm": 1.0429244704130767, + "learning_rate": 4.8639629435826116e-06, + "loss": 0.4249, + "step": 10483 + }, + { + "epoch": 0.638431324787626, + "grad_norm": 0.9930595814204902, + "learning_rate": 4.8639369788460565e-06, + "loss": 0.4425, + "step": 10484 + }, + { + "epoch": 0.6384922205645038, + "grad_norm": 0.9931961802576459, + "learning_rate": 4.863911011701171e-06, + "loss": 0.4233, + "step": 10485 + }, + { + "epoch": 0.6385531163413817, + "grad_norm": 1.0402408916153487, + "learning_rate": 4.863885042147983e-06, + "loss": 0.4617, + "step": 10486 + }, + { + "epoch": 0.6386140121182596, + "grad_norm": 0.9935281246492349, + "learning_rate": 4.863859070186518e-06, + "loss": 0.401, + "step": 10487 + }, + { + "epoch": 0.6386749078951375, + "grad_norm": 0.9774275913208501, + "learning_rate": 4.863833095816802e-06, + "loss": 0.3951, + "step": 10488 + }, + { + "epoch": 0.6387358036720153, + "grad_norm": 0.9893172283388673, + "learning_rate": 4.863807119038862e-06, + "loss": 0.4417, + "step": 10489 + }, + { + "epoch": 0.6387966994488932, + "grad_norm": 1.0180228517266803, + "learning_rate": 4.863781139852724e-06, + "loss": 0.402, + "step": 10490 + }, + { + "epoch": 0.6388575952257711, + "grad_norm": 1.084012621163214, + "learning_rate": 4.8637551582584154e-06, + "loss": 0.3852, + "step": 10491 + }, + { + "epoch": 0.638918491002649, + "grad_norm": 0.9960360333901718, + "learning_rate": 4.863729174255963e-06, + "loss": 0.4929, + "step": 10492 + }, + { + "epoch": 0.6389793867795268, + "grad_norm": 1.0095364898145294, + "learning_rate": 4.863703187845391e-06, + "loss": 0.3759, + "step": 10493 + }, + { + "epoch": 0.6390402825564047, + "grad_norm": 0.9966193620919752, + "learning_rate": 4.863677199026729e-06, + "loss": 0.3775, + "step": 10494 + }, + { + "epoch": 0.6391011783332826, + "grad_norm": 1.0198583190930866, + "learning_rate": 4.8636512078e-06, + "loss": 0.4168, + "step": 10495 + }, + { + "epoch": 0.6391620741101605, + "grad_norm": 0.9998282452334478, + "learning_rate": 4.863625214165234e-06, + "loss": 0.386, + "step": 10496 + }, + { + "epoch": 0.6392229698870383, + "grad_norm": 1.0624025870431606, + "learning_rate": 4.8635992181224545e-06, + "loss": 0.3783, + "step": 10497 + }, + { + "epoch": 0.6392838656639163, + "grad_norm": 1.0536573002126386, + "learning_rate": 4.863573219671689e-06, + "loss": 0.4219, + "step": 10498 + }, + { + "epoch": 0.6393447614407941, + "grad_norm": 0.9416078386197718, + "learning_rate": 4.863547218812965e-06, + "loss": 0.4472, + "step": 10499 + }, + { + "epoch": 0.639405657217672, + "grad_norm": 1.0000183025663993, + "learning_rate": 4.863521215546309e-06, + "loss": 0.4034, + "step": 10500 + }, + { + "epoch": 0.6394665529945498, + "grad_norm": 1.0848268092568731, + "learning_rate": 4.863495209871745e-06, + "loss": 0.4118, + "step": 10501 + }, + { + "epoch": 0.6395274487714278, + "grad_norm": 1.0138404438747586, + "learning_rate": 4.863469201789302e-06, + "loss": 0.5071, + "step": 10502 + }, + { + "epoch": 0.6395883445483056, + "grad_norm": 1.0293094173411066, + "learning_rate": 4.863443191299006e-06, + "loss": 0.4814, + "step": 10503 + }, + { + "epoch": 0.6396492403251834, + "grad_norm": 1.0366165849860016, + "learning_rate": 4.863417178400883e-06, + "loss": 0.4799, + "step": 10504 + }, + { + "epoch": 0.6397101361020613, + "grad_norm": 1.0230464621168636, + "learning_rate": 4.86339116309496e-06, + "loss": 0.4466, + "step": 10505 + }, + { + "epoch": 0.6397710318789392, + "grad_norm": 0.964540870168137, + "learning_rate": 4.863365145381263e-06, + "loss": 0.4135, + "step": 10506 + }, + { + "epoch": 0.6398319276558171, + "grad_norm": 0.9675728658946712, + "learning_rate": 4.863339125259818e-06, + "loss": 0.4156, + "step": 10507 + }, + { + "epoch": 0.6398928234326949, + "grad_norm": 0.9299309157546336, + "learning_rate": 4.863313102730653e-06, + "loss": 0.4894, + "step": 10508 + }, + { + "epoch": 0.6399537192095728, + "grad_norm": 0.9449918049675945, + "learning_rate": 4.8632870777937945e-06, + "loss": 0.4776, + "step": 10509 + }, + { + "epoch": 0.6400146149864507, + "grad_norm": 1.080519615626287, + "learning_rate": 4.863261050449268e-06, + "loss": 0.4317, + "step": 10510 + }, + { + "epoch": 0.6400755107633286, + "grad_norm": 1.0173063181073534, + "learning_rate": 4.8632350206970995e-06, + "loss": 0.4448, + "step": 10511 + }, + { + "epoch": 0.6401364065402064, + "grad_norm": 0.9347319193742483, + "learning_rate": 4.863208988537316e-06, + "loss": 0.441, + "step": 10512 + }, + { + "epoch": 0.6401973023170843, + "grad_norm": 1.07520474689411, + "learning_rate": 4.863182953969945e-06, + "loss": 0.4152, + "step": 10513 + }, + { + "epoch": 0.6402581980939622, + "grad_norm": 0.9632309080077461, + "learning_rate": 4.8631569169950124e-06, + "loss": 0.4567, + "step": 10514 + }, + { + "epoch": 0.6403190938708401, + "grad_norm": 1.0273541534928041, + "learning_rate": 4.863130877612544e-06, + "loss": 0.4422, + "step": 10515 + }, + { + "epoch": 0.6403799896477179, + "grad_norm": 0.9001765894865732, + "learning_rate": 4.863104835822567e-06, + "loss": 0.4511, + "step": 10516 + }, + { + "epoch": 0.6404408854245958, + "grad_norm": 0.9559153310458257, + "learning_rate": 4.8630787916251085e-06, + "loss": 0.4225, + "step": 10517 + }, + { + "epoch": 0.6405017812014737, + "grad_norm": 1.0293190573869537, + "learning_rate": 4.863052745020195e-06, + "loss": 0.4136, + "step": 10518 + }, + { + "epoch": 0.6405626769783516, + "grad_norm": 0.9468335072922939, + "learning_rate": 4.863026696007851e-06, + "loss": 0.4801, + "step": 10519 + }, + { + "epoch": 0.6406235727552294, + "grad_norm": 0.9721746571992215, + "learning_rate": 4.863000644588105e-06, + "loss": 0.4961, + "step": 10520 + }, + { + "epoch": 0.6406844685321073, + "grad_norm": 0.9839959662314072, + "learning_rate": 4.862974590760984e-06, + "loss": 0.4511, + "step": 10521 + }, + { + "epoch": 0.6407453643089852, + "grad_norm": 1.0195019973545816, + "learning_rate": 4.862948534526513e-06, + "loss": 0.3779, + "step": 10522 + }, + { + "epoch": 0.640806260085863, + "grad_norm": 1.1107046478545188, + "learning_rate": 4.862922475884719e-06, + "loss": 0.3876, + "step": 10523 + }, + { + "epoch": 0.6408671558627409, + "grad_norm": 0.9718796780824402, + "learning_rate": 4.862896414835628e-06, + "loss": 0.4283, + "step": 10524 + }, + { + "epoch": 0.6409280516396187, + "grad_norm": 0.9756063249034618, + "learning_rate": 4.8628703513792685e-06, + "loss": 0.4505, + "step": 10525 + }, + { + "epoch": 0.6409889474164967, + "grad_norm": 0.9749293460524524, + "learning_rate": 4.862844285515665e-06, + "loss": 0.4733, + "step": 10526 + }, + { + "epoch": 0.6410498431933745, + "grad_norm": 0.9472756398333103, + "learning_rate": 4.862818217244846e-06, + "loss": 0.426, + "step": 10527 + }, + { + "epoch": 0.6411107389702524, + "grad_norm": 1.0595198469002445, + "learning_rate": 4.862792146566836e-06, + "loss": 0.387, + "step": 10528 + }, + { + "epoch": 0.6411716347471302, + "grad_norm": 1.0631497906865348, + "learning_rate": 4.862766073481663e-06, + "loss": 0.442, + "step": 10529 + }, + { + "epoch": 0.6412325305240082, + "grad_norm": 1.016567990152607, + "learning_rate": 4.862739997989353e-06, + "loss": 0.4097, + "step": 10530 + }, + { + "epoch": 0.641293426300886, + "grad_norm": 1.0195766228933933, + "learning_rate": 4.862713920089932e-06, + "loss": 0.3875, + "step": 10531 + }, + { + "epoch": 0.6413543220777639, + "grad_norm": 1.076873593565555, + "learning_rate": 4.862687839783427e-06, + "loss": 0.4434, + "step": 10532 + }, + { + "epoch": 0.6414152178546417, + "grad_norm": 1.0485515952167586, + "learning_rate": 4.862661757069866e-06, + "loss": 0.3783, + "step": 10533 + }, + { + "epoch": 0.6414761136315197, + "grad_norm": 1.1655003998110576, + "learning_rate": 4.862635671949273e-06, + "loss": 0.4249, + "step": 10534 + }, + { + "epoch": 0.6415370094083975, + "grad_norm": 0.9780702284755981, + "learning_rate": 4.862609584421677e-06, + "loss": 0.3895, + "step": 10535 + }, + { + "epoch": 0.6415979051852754, + "grad_norm": 1.054723238612435, + "learning_rate": 4.862583494487103e-06, + "loss": 0.3965, + "step": 10536 + }, + { + "epoch": 0.6416588009621532, + "grad_norm": 0.993881126802651, + "learning_rate": 4.862557402145578e-06, + "loss": 0.4997, + "step": 10537 + }, + { + "epoch": 0.6417196967390312, + "grad_norm": 1.0487776067884285, + "learning_rate": 4.862531307397129e-06, + "loss": 0.4818, + "step": 10538 + }, + { + "epoch": 0.641780592515909, + "grad_norm": 1.011829212984355, + "learning_rate": 4.862505210241781e-06, + "loss": 0.5037, + "step": 10539 + }, + { + "epoch": 0.6418414882927869, + "grad_norm": 1.0539047208803565, + "learning_rate": 4.862479110679563e-06, + "loss": 0.4392, + "step": 10540 + }, + { + "epoch": 0.6419023840696648, + "grad_norm": 1.0254773507187236, + "learning_rate": 4.862453008710501e-06, + "loss": 0.4228, + "step": 10541 + }, + { + "epoch": 0.6419632798465427, + "grad_norm": 1.053257645153928, + "learning_rate": 4.862426904334619e-06, + "loss": 0.3985, + "step": 10542 + }, + { + "epoch": 0.6420241756234205, + "grad_norm": 1.0125597134818534, + "learning_rate": 4.8624007975519475e-06, + "loss": 0.4591, + "step": 10543 + }, + { + "epoch": 0.6420850714002984, + "grad_norm": 1.0515699249895656, + "learning_rate": 4.86237468836251e-06, + "loss": 0.4268, + "step": 10544 + }, + { + "epoch": 0.6421459671771763, + "grad_norm": 1.002866397820736, + "learning_rate": 4.8623485767663345e-06, + "loss": 0.4594, + "step": 10545 + }, + { + "epoch": 0.6422068629540542, + "grad_norm": 1.0310487822238719, + "learning_rate": 4.862322462763448e-06, + "loss": 0.3848, + "step": 10546 + }, + { + "epoch": 0.642267758730932, + "grad_norm": 0.9940128141854734, + "learning_rate": 4.862296346353876e-06, + "loss": 0.3906, + "step": 10547 + }, + { + "epoch": 0.6423286545078098, + "grad_norm": 1.1003965326099998, + "learning_rate": 4.862270227537645e-06, + "loss": 0.421, + "step": 10548 + }, + { + "epoch": 0.6423895502846878, + "grad_norm": 0.9839989421941795, + "learning_rate": 4.862244106314783e-06, + "loss": 0.5188, + "step": 10549 + }, + { + "epoch": 0.6424504460615656, + "grad_norm": 1.0900412835169846, + "learning_rate": 4.862217982685316e-06, + "loss": 0.3584, + "step": 10550 + }, + { + "epoch": 0.6425113418384435, + "grad_norm": 0.9427219780320818, + "learning_rate": 4.86219185664927e-06, + "loss": 0.4898, + "step": 10551 + }, + { + "epoch": 0.6425722376153213, + "grad_norm": 1.0361452660179906, + "learning_rate": 4.862165728206673e-06, + "loss": 0.4173, + "step": 10552 + }, + { + "epoch": 0.6426331333921993, + "grad_norm": 1.03327684514905, + "learning_rate": 4.8621395973575495e-06, + "loss": 0.3804, + "step": 10553 + }, + { + "epoch": 0.6426940291690771, + "grad_norm": 1.0224569933066328, + "learning_rate": 4.862113464101927e-06, + "loss": 0.4337, + "step": 10554 + }, + { + "epoch": 0.642754924945955, + "grad_norm": 1.0179264588936878, + "learning_rate": 4.862087328439833e-06, + "loss": 0.433, + "step": 10555 + }, + { + "epoch": 0.6428158207228328, + "grad_norm": 1.0102334429052768, + "learning_rate": 4.862061190371293e-06, + "loss": 0.434, + "step": 10556 + }, + { + "epoch": 0.6428767164997108, + "grad_norm": 0.9679839623852085, + "learning_rate": 4.862035049896335e-06, + "loss": 0.4484, + "step": 10557 + }, + { + "epoch": 0.6429376122765886, + "grad_norm": 1.0469364912224843, + "learning_rate": 4.8620089070149844e-06, + "loss": 0.3979, + "step": 10558 + }, + { + "epoch": 0.6429985080534665, + "grad_norm": 0.9830834280367098, + "learning_rate": 4.861982761727268e-06, + "loss": 0.4508, + "step": 10559 + }, + { + "epoch": 0.6430594038303443, + "grad_norm": 0.9589099549627871, + "learning_rate": 4.861956614033214e-06, + "loss": 0.4162, + "step": 10560 + }, + { + "epoch": 0.6431202996072223, + "grad_norm": 1.0125484010309092, + "learning_rate": 4.861930463932847e-06, + "loss": 0.4184, + "step": 10561 + }, + { + "epoch": 0.6431811953841001, + "grad_norm": 1.0156335309349453, + "learning_rate": 4.861904311426193e-06, + "loss": 0.4435, + "step": 10562 + }, + { + "epoch": 0.643242091160978, + "grad_norm": 1.0022993086357432, + "learning_rate": 4.861878156513282e-06, + "loss": 0.4584, + "step": 10563 + }, + { + "epoch": 0.6433029869378558, + "grad_norm": 1.0021368020826897, + "learning_rate": 4.8618519991941374e-06, + "loss": 0.456, + "step": 10564 + }, + { + "epoch": 0.6433638827147338, + "grad_norm": 0.9524699463431355, + "learning_rate": 4.861825839468787e-06, + "loss": 0.4502, + "step": 10565 + }, + { + "epoch": 0.6434247784916116, + "grad_norm": 0.9022657401882825, + "learning_rate": 4.861799677337257e-06, + "loss": 0.5595, + "step": 10566 + }, + { + "epoch": 0.6434856742684895, + "grad_norm": 0.9842984151533907, + "learning_rate": 4.8617735127995765e-06, + "loss": 0.4279, + "step": 10567 + }, + { + "epoch": 0.6435465700453673, + "grad_norm": 0.9458596201978712, + "learning_rate": 4.86174734585577e-06, + "loss": 0.4302, + "step": 10568 + }, + { + "epoch": 0.6436074658222453, + "grad_norm": 0.9805318328049424, + "learning_rate": 4.8617211765058635e-06, + "loss": 0.4374, + "step": 10569 + }, + { + "epoch": 0.6436683615991231, + "grad_norm": 1.094754626526192, + "learning_rate": 4.861695004749885e-06, + "loss": 0.4321, + "step": 10570 + }, + { + "epoch": 0.643729257376001, + "grad_norm": 1.0340138489607757, + "learning_rate": 4.86166883058786e-06, + "loss": 0.4632, + "step": 10571 + }, + { + "epoch": 0.6437901531528788, + "grad_norm": 1.0292772666070702, + "learning_rate": 4.861642654019816e-06, + "loss": 0.4439, + "step": 10572 + }, + { + "epoch": 0.6438510489297568, + "grad_norm": 1.1031224644172177, + "learning_rate": 4.86161647504578e-06, + "loss": 0.4146, + "step": 10573 + }, + { + "epoch": 0.6439119447066346, + "grad_norm": 1.00012964646356, + "learning_rate": 4.8615902936657785e-06, + "loss": 0.3792, + "step": 10574 + }, + { + "epoch": 0.6439728404835124, + "grad_norm": 1.1054000764351017, + "learning_rate": 4.861564109879837e-06, + "loss": 0.4186, + "step": 10575 + }, + { + "epoch": 0.6440337362603903, + "grad_norm": 1.09096944334143, + "learning_rate": 4.861537923687984e-06, + "loss": 0.5083, + "step": 10576 + }, + { + "epoch": 0.6440946320372682, + "grad_norm": 0.9168812022425431, + "learning_rate": 4.861511735090245e-06, + "loss": 0.4831, + "step": 10577 + }, + { + "epoch": 0.6441555278141461, + "grad_norm": 1.0157093412725704, + "learning_rate": 4.861485544086647e-06, + "loss": 0.4152, + "step": 10578 + }, + { + "epoch": 0.6442164235910239, + "grad_norm": 1.0132668594178684, + "learning_rate": 4.8614593506772165e-06, + "loss": 0.4363, + "step": 10579 + }, + { + "epoch": 0.6442773193679019, + "grad_norm": 0.983651190301926, + "learning_rate": 4.86143315486198e-06, + "loss": 0.4656, + "step": 10580 + }, + { + "epoch": 0.6443382151447797, + "grad_norm": 0.985335827961475, + "learning_rate": 4.861406956640965e-06, + "loss": 0.4824, + "step": 10581 + }, + { + "epoch": 0.6443991109216576, + "grad_norm": 1.011704032408697, + "learning_rate": 4.861380756014197e-06, + "loss": 0.3731, + "step": 10582 + }, + { + "epoch": 0.6444600066985354, + "grad_norm": 0.9382838173175052, + "learning_rate": 4.8613545529817045e-06, + "loss": 0.445, + "step": 10583 + }, + { + "epoch": 0.6445209024754134, + "grad_norm": 0.948635455780823, + "learning_rate": 4.861328347543512e-06, + "loss": 0.4058, + "step": 10584 + }, + { + "epoch": 0.6445817982522912, + "grad_norm": 1.0442759721282102, + "learning_rate": 4.861302139699647e-06, + "loss": 0.4053, + "step": 10585 + }, + { + "epoch": 0.6446426940291691, + "grad_norm": 1.069627153244621, + "learning_rate": 4.861275929450138e-06, + "loss": 0.3977, + "step": 10586 + }, + { + "epoch": 0.6447035898060469, + "grad_norm": 1.0541556615210181, + "learning_rate": 4.861249716795009e-06, + "loss": 0.3778, + "step": 10587 + }, + { + "epoch": 0.6447644855829249, + "grad_norm": 1.0298664547800662, + "learning_rate": 4.861223501734288e-06, + "loss": 0.43, + "step": 10588 + }, + { + "epoch": 0.6448253813598027, + "grad_norm": 1.1343787331172799, + "learning_rate": 4.861197284268002e-06, + "loss": 0.4129, + "step": 10589 + }, + { + "epoch": 0.6448862771366806, + "grad_norm": 0.9240051554861916, + "learning_rate": 4.861171064396177e-06, + "loss": 0.4824, + "step": 10590 + }, + { + "epoch": 0.6449471729135584, + "grad_norm": 1.0885333994513031, + "learning_rate": 4.86114484211884e-06, + "loss": 0.4941, + "step": 10591 + }, + { + "epoch": 0.6450080686904364, + "grad_norm": 0.9481604520687096, + "learning_rate": 4.861118617436018e-06, + "loss": 0.5242, + "step": 10592 + }, + { + "epoch": 0.6450689644673142, + "grad_norm": 0.9833201036494563, + "learning_rate": 4.861092390347737e-06, + "loss": 0.4506, + "step": 10593 + }, + { + "epoch": 0.645129860244192, + "grad_norm": 1.0476752482734253, + "learning_rate": 4.861066160854025e-06, + "loss": 0.4164, + "step": 10594 + }, + { + "epoch": 0.6451907560210699, + "grad_norm": 0.9584868947363762, + "learning_rate": 4.861039928954907e-06, + "loss": 0.4459, + "step": 10595 + }, + { + "epoch": 0.6452516517979479, + "grad_norm": 1.032973447414365, + "learning_rate": 4.861013694650412e-06, + "loss": 0.4211, + "step": 10596 + }, + { + "epoch": 0.6453125475748257, + "grad_norm": 0.9700104599556907, + "learning_rate": 4.8609874579405634e-06, + "loss": 0.4311, + "step": 10597 + }, + { + "epoch": 0.6453734433517035, + "grad_norm": 1.0606984811701334, + "learning_rate": 4.860961218825391e-06, + "loss": 0.4563, + "step": 10598 + }, + { + "epoch": 0.6454343391285814, + "grad_norm": 0.9342832339736323, + "learning_rate": 4.86093497730492e-06, + "loss": 0.4702, + "step": 10599 + }, + { + "epoch": 0.6454952349054593, + "grad_norm": 0.9417992402968991, + "learning_rate": 4.860908733379177e-06, + "loss": 0.4319, + "step": 10600 + }, + { + "epoch": 0.6455561306823372, + "grad_norm": 0.9412000634458526, + "learning_rate": 4.860882487048191e-06, + "loss": 0.4173, + "step": 10601 + }, + { + "epoch": 0.645617026459215, + "grad_norm": 0.9239432284732534, + "learning_rate": 4.860856238311985e-06, + "loss": 0.4942, + "step": 10602 + }, + { + "epoch": 0.6456779222360929, + "grad_norm": 1.0499882206099673, + "learning_rate": 4.860829987170589e-06, + "loss": 0.3366, + "step": 10603 + }, + { + "epoch": 0.6457388180129708, + "grad_norm": 1.0504061599912495, + "learning_rate": 4.860803733624029e-06, + "loss": 0.4044, + "step": 10604 + }, + { + "epoch": 0.6457997137898487, + "grad_norm": 1.0373497829498273, + "learning_rate": 4.8607774776723295e-06, + "loss": 0.4718, + "step": 10605 + }, + { + "epoch": 0.6458606095667265, + "grad_norm": 1.0146417692090641, + "learning_rate": 4.86075121931552e-06, + "loss": 0.4093, + "step": 10606 + }, + { + "epoch": 0.6459215053436044, + "grad_norm": 0.9417356538974917, + "learning_rate": 4.860724958553627e-06, + "loss": 0.4057, + "step": 10607 + }, + { + "epoch": 0.6459824011204823, + "grad_norm": 1.0297456428110283, + "learning_rate": 4.8606986953866755e-06, + "loss": 0.4159, + "step": 10608 + }, + { + "epoch": 0.6460432968973602, + "grad_norm": 1.0710066576426787, + "learning_rate": 4.860672429814693e-06, + "loss": 0.4144, + "step": 10609 + }, + { + "epoch": 0.646104192674238, + "grad_norm": 0.9392884773793859, + "learning_rate": 4.860646161837707e-06, + "loss": 0.4437, + "step": 10610 + }, + { + "epoch": 0.6461650884511159, + "grad_norm": 1.1254855306299756, + "learning_rate": 4.8606198914557435e-06, + "loss": 0.4043, + "step": 10611 + }, + { + "epoch": 0.6462259842279938, + "grad_norm": 1.0471533422939518, + "learning_rate": 4.86059361866883e-06, + "loss": 0.4201, + "step": 10612 + }, + { + "epoch": 0.6462868800048717, + "grad_norm": 1.0977293069534626, + "learning_rate": 4.860567343476992e-06, + "loss": 0.3661, + "step": 10613 + }, + { + "epoch": 0.6463477757817495, + "grad_norm": 1.0256161482272752, + "learning_rate": 4.8605410658802575e-06, + "loss": 0.4616, + "step": 10614 + }, + { + "epoch": 0.6464086715586274, + "grad_norm": 1.1036500214891647, + "learning_rate": 4.860514785878653e-06, + "loss": 0.3644, + "step": 10615 + }, + { + "epoch": 0.6464695673355053, + "grad_norm": 0.9719366773505089, + "learning_rate": 4.8604885034722046e-06, + "loss": 0.4511, + "step": 10616 + }, + { + "epoch": 0.6465304631123832, + "grad_norm": 0.9674253036732576, + "learning_rate": 4.860462218660941e-06, + "loss": 0.4622, + "step": 10617 + }, + { + "epoch": 0.646591358889261, + "grad_norm": 0.9786652879088036, + "learning_rate": 4.8604359314448855e-06, + "loss": 0.3732, + "step": 10618 + }, + { + "epoch": 0.6466522546661388, + "grad_norm": 0.9959242395705412, + "learning_rate": 4.860409641824069e-06, + "loss": 0.3993, + "step": 10619 + }, + { + "epoch": 0.6467131504430168, + "grad_norm": 0.9439834018518124, + "learning_rate": 4.860383349798515e-06, + "loss": 0.3786, + "step": 10620 + }, + { + "epoch": 0.6467740462198946, + "grad_norm": 0.8792514081759222, + "learning_rate": 4.860357055368252e-06, + "loss": 0.4326, + "step": 10621 + }, + { + "epoch": 0.6468349419967725, + "grad_norm": 1.0144294024559652, + "learning_rate": 4.860330758533306e-06, + "loss": 0.4605, + "step": 10622 + }, + { + "epoch": 0.6468958377736505, + "grad_norm": 1.0219267376617747, + "learning_rate": 4.860304459293704e-06, + "loss": 0.4349, + "step": 10623 + }, + { + "epoch": 0.6469567335505283, + "grad_norm": 0.8827630427690252, + "learning_rate": 4.860278157649473e-06, + "loss": 0.5489, + "step": 10624 + }, + { + "epoch": 0.6470176293274061, + "grad_norm": 0.9830358237525374, + "learning_rate": 4.860251853600639e-06, + "loss": 0.4527, + "step": 10625 + }, + { + "epoch": 0.647078525104284, + "grad_norm": 1.0440736721862096, + "learning_rate": 4.8602255471472305e-06, + "loss": 0.4427, + "step": 10626 + }, + { + "epoch": 0.6471394208811619, + "grad_norm": 1.0720179850777591, + "learning_rate": 4.860199238289273e-06, + "loss": 0.3746, + "step": 10627 + }, + { + "epoch": 0.6472003166580398, + "grad_norm": 1.0320554621199058, + "learning_rate": 4.8601729270267936e-06, + "loss": 0.4238, + "step": 10628 + }, + { + "epoch": 0.6472612124349176, + "grad_norm": 0.9817958213716119, + "learning_rate": 4.8601466133598194e-06, + "loss": 0.398, + "step": 10629 + }, + { + "epoch": 0.6473221082117955, + "grad_norm": 0.9496541960316972, + "learning_rate": 4.860120297288376e-06, + "loss": 0.3772, + "step": 10630 + }, + { + "epoch": 0.6473830039886734, + "grad_norm": 1.1144729605555717, + "learning_rate": 4.860093978812492e-06, + "loss": 0.4077, + "step": 10631 + }, + { + "epoch": 0.6474438997655513, + "grad_norm": 0.9836043163003929, + "learning_rate": 4.860067657932194e-06, + "loss": 0.4036, + "step": 10632 + }, + { + "epoch": 0.6475047955424291, + "grad_norm": 0.9466139843802468, + "learning_rate": 4.860041334647506e-06, + "loss": 0.3983, + "step": 10633 + }, + { + "epoch": 0.647565691319307, + "grad_norm": 0.9942693031407045, + "learning_rate": 4.860015008958459e-06, + "loss": 0.4973, + "step": 10634 + }, + { + "epoch": 0.6476265870961849, + "grad_norm": 1.035845185231962, + "learning_rate": 4.859988680865076e-06, + "loss": 0.4663, + "step": 10635 + }, + { + "epoch": 0.6476874828730628, + "grad_norm": 1.026442761719514, + "learning_rate": 4.859962350367386e-06, + "loss": 0.4219, + "step": 10636 + }, + { + "epoch": 0.6477483786499406, + "grad_norm": 1.0217587126982839, + "learning_rate": 4.859936017465416e-06, + "loss": 0.4196, + "step": 10637 + }, + { + "epoch": 0.6478092744268185, + "grad_norm": 1.0626767788090579, + "learning_rate": 4.859909682159193e-06, + "loss": 0.4248, + "step": 10638 + }, + { + "epoch": 0.6478701702036964, + "grad_norm": 1.0291506906471348, + "learning_rate": 4.859883344448742e-06, + "loss": 0.3923, + "step": 10639 + }, + { + "epoch": 0.6479310659805743, + "grad_norm": 0.9813056066887623, + "learning_rate": 4.859857004334091e-06, + "loss": 0.4733, + "step": 10640 + }, + { + "epoch": 0.6479919617574521, + "grad_norm": 0.9859994866983671, + "learning_rate": 4.8598306618152664e-06, + "loss": 0.4026, + "step": 10641 + }, + { + "epoch": 0.64805285753433, + "grad_norm": 1.0423887493906887, + "learning_rate": 4.859804316892297e-06, + "loss": 0.4649, + "step": 10642 + }, + { + "epoch": 0.6481137533112079, + "grad_norm": 0.8914889209378303, + "learning_rate": 4.859777969565206e-06, + "loss": 0.4697, + "step": 10643 + }, + { + "epoch": 0.6481746490880858, + "grad_norm": 1.0102158761430937, + "learning_rate": 4.859751619834023e-06, + "loss": 0.4172, + "step": 10644 + }, + { + "epoch": 0.6482355448649636, + "grad_norm": 0.9964017094674331, + "learning_rate": 4.859725267698775e-06, + "loss": 0.4132, + "step": 10645 + }, + { + "epoch": 0.6482964406418414, + "grad_norm": 1.0233815464113951, + "learning_rate": 4.859698913159487e-06, + "loss": 0.3736, + "step": 10646 + }, + { + "epoch": 0.6483573364187194, + "grad_norm": 1.0297604727954635, + "learning_rate": 4.859672556216188e-06, + "loss": 0.4578, + "step": 10647 + }, + { + "epoch": 0.6484182321955972, + "grad_norm": 1.006084685849315, + "learning_rate": 4.859646196868902e-06, + "loss": 0.445, + "step": 10648 + }, + { + "epoch": 0.6484791279724751, + "grad_norm": 1.0363119028222503, + "learning_rate": 4.859619835117658e-06, + "loss": 0.4583, + "step": 10649 + }, + { + "epoch": 0.6485400237493529, + "grad_norm": 1.0433746718677357, + "learning_rate": 4.859593470962483e-06, + "loss": 0.4591, + "step": 10650 + }, + { + "epoch": 0.6486009195262309, + "grad_norm": 1.0310143734703534, + "learning_rate": 4.859567104403403e-06, + "loss": 0.4025, + "step": 10651 + }, + { + "epoch": 0.6486618153031087, + "grad_norm": 0.9870467361887348, + "learning_rate": 4.859540735440444e-06, + "loss": 0.4891, + "step": 10652 + }, + { + "epoch": 0.6487227110799866, + "grad_norm": 0.9472903416572835, + "learning_rate": 4.859514364073635e-06, + "loss": 0.4823, + "step": 10653 + }, + { + "epoch": 0.6487836068568644, + "grad_norm": 0.9912509371406741, + "learning_rate": 4.859487990303002e-06, + "loss": 0.3794, + "step": 10654 + }, + { + "epoch": 0.6488445026337424, + "grad_norm": 1.1319620000005475, + "learning_rate": 4.859461614128571e-06, + "loss": 0.3412, + "step": 10655 + }, + { + "epoch": 0.6489053984106202, + "grad_norm": 1.0292678105772615, + "learning_rate": 4.85943523555037e-06, + "loss": 0.3801, + "step": 10656 + }, + { + "epoch": 0.6489662941874981, + "grad_norm": 0.9103173378716486, + "learning_rate": 4.859408854568426e-06, + "loss": 0.4441, + "step": 10657 + }, + { + "epoch": 0.6490271899643759, + "grad_norm": 1.0429425181199843, + "learning_rate": 4.859382471182764e-06, + "loss": 0.4502, + "step": 10658 + }, + { + "epoch": 0.6490880857412539, + "grad_norm": 1.037609507653976, + "learning_rate": 4.859356085393413e-06, + "loss": 0.4197, + "step": 10659 + }, + { + "epoch": 0.6491489815181317, + "grad_norm": 0.9862251231824257, + "learning_rate": 4.8593296972003985e-06, + "loss": 0.4162, + "step": 10660 + }, + { + "epoch": 0.6492098772950096, + "grad_norm": 1.0305954588686652, + "learning_rate": 4.859303306603749e-06, + "loss": 0.4595, + "step": 10661 + }, + { + "epoch": 0.6492707730718875, + "grad_norm": 0.9581883732483011, + "learning_rate": 4.8592769136034904e-06, + "loss": 0.4509, + "step": 10662 + }, + { + "epoch": 0.6493316688487654, + "grad_norm": 1.0087284648763222, + "learning_rate": 4.859250518199649e-06, + "loss": 0.4368, + "step": 10663 + }, + { + "epoch": 0.6493925646256432, + "grad_norm": 1.0229859744820975, + "learning_rate": 4.859224120392251e-06, + "loss": 0.3712, + "step": 10664 + }, + { + "epoch": 0.649453460402521, + "grad_norm": 1.022801817983442, + "learning_rate": 4.859197720181325e-06, + "loss": 0.5019, + "step": 10665 + }, + { + "epoch": 0.649514356179399, + "grad_norm": 1.072557980758065, + "learning_rate": 4.859171317566899e-06, + "loss": 0.3893, + "step": 10666 + }, + { + "epoch": 0.6495752519562769, + "grad_norm": 1.1194125750510233, + "learning_rate": 4.859144912548998e-06, + "loss": 0.4082, + "step": 10667 + }, + { + "epoch": 0.6496361477331547, + "grad_norm": 0.9396890913180205, + "learning_rate": 4.859118505127648e-06, + "loss": 0.4262, + "step": 10668 + }, + { + "epoch": 0.6496970435100325, + "grad_norm": 1.1152007588487818, + "learning_rate": 4.859092095302877e-06, + "loss": 0.4085, + "step": 10669 + }, + { + "epoch": 0.6497579392869105, + "grad_norm": 0.9713256623739546, + "learning_rate": 4.859065683074713e-06, + "loss": 0.4329, + "step": 10670 + }, + { + "epoch": 0.6498188350637883, + "grad_norm": 0.9369298052104601, + "learning_rate": 4.859039268443182e-06, + "loss": 0.471, + "step": 10671 + }, + { + "epoch": 0.6498797308406662, + "grad_norm": 1.0420272651560964, + "learning_rate": 4.85901285140831e-06, + "loss": 0.4724, + "step": 10672 + }, + { + "epoch": 0.649940626617544, + "grad_norm": 0.9939708411469895, + "learning_rate": 4.8589864319701255e-06, + "loss": 0.508, + "step": 10673 + }, + { + "epoch": 0.650001522394422, + "grad_norm": 1.009237313606144, + "learning_rate": 4.858960010128654e-06, + "loss": 0.5236, + "step": 10674 + }, + { + "epoch": 0.6500624181712998, + "grad_norm": 1.0774387535092405, + "learning_rate": 4.858933585883924e-06, + "loss": 0.4163, + "step": 10675 + }, + { + "epoch": 0.6501233139481777, + "grad_norm": 1.033062953629723, + "learning_rate": 4.85890715923596e-06, + "loss": 0.4205, + "step": 10676 + }, + { + "epoch": 0.6501842097250555, + "grad_norm": 1.0481376839371173, + "learning_rate": 4.858880730184792e-06, + "loss": 0.3485, + "step": 10677 + }, + { + "epoch": 0.6502451055019335, + "grad_norm": 0.99566230215343, + "learning_rate": 4.858854298730444e-06, + "loss": 0.4334, + "step": 10678 + }, + { + "epoch": 0.6503060012788113, + "grad_norm": 0.9479956664933922, + "learning_rate": 4.858827864872946e-06, + "loss": 0.4592, + "step": 10679 + }, + { + "epoch": 0.6503668970556892, + "grad_norm": 1.052936621145186, + "learning_rate": 4.858801428612322e-06, + "loss": 0.3908, + "step": 10680 + }, + { + "epoch": 0.650427792832567, + "grad_norm": 0.9818836025839305, + "learning_rate": 4.8587749899486004e-06, + "loss": 0.4614, + "step": 10681 + }, + { + "epoch": 0.650488688609445, + "grad_norm": 0.9259698224919435, + "learning_rate": 4.858748548881808e-06, + "loss": 0.4241, + "step": 10682 + }, + { + "epoch": 0.6505495843863228, + "grad_norm": 0.9550681254024396, + "learning_rate": 4.858722105411971e-06, + "loss": 0.4654, + "step": 10683 + }, + { + "epoch": 0.6506104801632007, + "grad_norm": 0.9610651328243288, + "learning_rate": 4.858695659539118e-06, + "loss": 0.4856, + "step": 10684 + }, + { + "epoch": 0.6506713759400785, + "grad_norm": 0.9544138172629343, + "learning_rate": 4.858669211263274e-06, + "loss": 0.4354, + "step": 10685 + }, + { + "epoch": 0.6507322717169565, + "grad_norm": 0.9337836669963537, + "learning_rate": 4.858642760584467e-06, + "loss": 0.4267, + "step": 10686 + }, + { + "epoch": 0.6507931674938343, + "grad_norm": 0.9917273241413078, + "learning_rate": 4.8586163075027246e-06, + "loss": 0.3587, + "step": 10687 + }, + { + "epoch": 0.6508540632707122, + "grad_norm": 0.9648653216176402, + "learning_rate": 4.858589852018072e-06, + "loss": 0.4789, + "step": 10688 + }, + { + "epoch": 0.65091495904759, + "grad_norm": 1.0430624098842887, + "learning_rate": 4.8585633941305375e-06, + "loss": 0.4045, + "step": 10689 + }, + { + "epoch": 0.650975854824468, + "grad_norm": 0.9935817332677301, + "learning_rate": 4.858536933840148e-06, + "loss": 0.4556, + "step": 10690 + }, + { + "epoch": 0.6510367506013458, + "grad_norm": 0.9671771362561605, + "learning_rate": 4.858510471146929e-06, + "loss": 0.4065, + "step": 10691 + }, + { + "epoch": 0.6510976463782236, + "grad_norm": 1.0234464417737643, + "learning_rate": 4.858484006050909e-06, + "loss": 0.4505, + "step": 10692 + }, + { + "epoch": 0.6511585421551015, + "grad_norm": 0.9266049072245872, + "learning_rate": 4.858457538552115e-06, + "loss": 0.4866, + "step": 10693 + }, + { + "epoch": 0.6512194379319795, + "grad_norm": 0.9033237944253941, + "learning_rate": 4.858431068650573e-06, + "loss": 0.401, + "step": 10694 + }, + { + "epoch": 0.6512803337088573, + "grad_norm": 0.9612529398443316, + "learning_rate": 4.858404596346311e-06, + "loss": 0.4832, + "step": 10695 + }, + { + "epoch": 0.6513412294857351, + "grad_norm": 0.9395114675822827, + "learning_rate": 4.858378121639355e-06, + "loss": 0.4837, + "step": 10696 + }, + { + "epoch": 0.651402125262613, + "grad_norm": 1.137799721237349, + "learning_rate": 4.858351644529733e-06, + "loss": 0.3865, + "step": 10697 + }, + { + "epoch": 0.6514630210394909, + "grad_norm": 1.0946503487400454, + "learning_rate": 4.85832516501747e-06, + "loss": 0.4932, + "step": 10698 + }, + { + "epoch": 0.6515239168163688, + "grad_norm": 1.028777868261304, + "learning_rate": 4.858298683102596e-06, + "loss": 0.4222, + "step": 10699 + }, + { + "epoch": 0.6515848125932466, + "grad_norm": 0.9893352631743837, + "learning_rate": 4.858272198785136e-06, + "loss": 0.4188, + "step": 10700 + }, + { + "epoch": 0.6516457083701245, + "grad_norm": 0.9843026608753719, + "learning_rate": 4.858245712065116e-06, + "loss": 0.3372, + "step": 10701 + }, + { + "epoch": 0.6517066041470024, + "grad_norm": 0.9749933012872901, + "learning_rate": 4.8582192229425655e-06, + "loss": 0.4634, + "step": 10702 + }, + { + "epoch": 0.6517674999238803, + "grad_norm": 1.0341485483031116, + "learning_rate": 4.8581927314175105e-06, + "loss": 0.4336, + "step": 10703 + }, + { + "epoch": 0.6518283957007581, + "grad_norm": 1.0557905615772547, + "learning_rate": 4.858166237489977e-06, + "loss": 0.5053, + "step": 10704 + }, + { + "epoch": 0.6518892914776361, + "grad_norm": 1.0570168134738445, + "learning_rate": 4.858139741159993e-06, + "loss": 0.4306, + "step": 10705 + }, + { + "epoch": 0.6519501872545139, + "grad_norm": 0.9676884578191283, + "learning_rate": 4.858113242427585e-06, + "loss": 0.4685, + "step": 10706 + }, + { + "epoch": 0.6520110830313918, + "grad_norm": 1.0189915429827314, + "learning_rate": 4.8580867412927805e-06, + "loss": 0.4115, + "step": 10707 + }, + { + "epoch": 0.6520719788082696, + "grad_norm": 1.0625261069320295, + "learning_rate": 4.8580602377556065e-06, + "loss": 0.4111, + "step": 10708 + }, + { + "epoch": 0.6521328745851476, + "grad_norm": 0.9495024814257299, + "learning_rate": 4.8580337318160895e-06, + "loss": 0.3733, + "step": 10709 + }, + { + "epoch": 0.6521937703620254, + "grad_norm": 1.0731315772072556, + "learning_rate": 4.858007223474257e-06, + "loss": 0.4358, + "step": 10710 + }, + { + "epoch": 0.6522546661389033, + "grad_norm": 0.961276446081222, + "learning_rate": 4.857980712730136e-06, + "loss": 0.4756, + "step": 10711 + }, + { + "epoch": 0.6523155619157811, + "grad_norm": 1.0194406293484586, + "learning_rate": 4.8579541995837525e-06, + "loss": 0.451, + "step": 10712 + }, + { + "epoch": 0.6523764576926591, + "grad_norm": 0.9981251257453106, + "learning_rate": 4.857927684035135e-06, + "loss": 0.3572, + "step": 10713 + }, + { + "epoch": 0.6524373534695369, + "grad_norm": 0.9403347365441006, + "learning_rate": 4.857901166084309e-06, + "loss": 0.541, + "step": 10714 + }, + { + "epoch": 0.6524982492464148, + "grad_norm": 0.9532194752543874, + "learning_rate": 4.857874645731303e-06, + "loss": 0.4245, + "step": 10715 + }, + { + "epoch": 0.6525591450232926, + "grad_norm": 1.0406020934547693, + "learning_rate": 4.857848122976143e-06, + "loss": 0.3767, + "step": 10716 + }, + { + "epoch": 0.6526200408001706, + "grad_norm": 1.0956538225673307, + "learning_rate": 4.857821597818856e-06, + "loss": 0.3542, + "step": 10717 + }, + { + "epoch": 0.6526809365770484, + "grad_norm": 0.9711629524436183, + "learning_rate": 4.85779507025947e-06, + "loss": 0.4003, + "step": 10718 + }, + { + "epoch": 0.6527418323539262, + "grad_norm": 0.9759212594827058, + "learning_rate": 4.857768540298011e-06, + "loss": 0.4046, + "step": 10719 + }, + { + "epoch": 0.6528027281308041, + "grad_norm": 0.9699785116532157, + "learning_rate": 4.8577420079345074e-06, + "loss": 0.4647, + "step": 10720 + }, + { + "epoch": 0.652863623907682, + "grad_norm": 0.9401942569199842, + "learning_rate": 4.857715473168984e-06, + "loss": 0.4303, + "step": 10721 + }, + { + "epoch": 0.6529245196845599, + "grad_norm": 1.0453623475150757, + "learning_rate": 4.85768893600147e-06, + "loss": 0.4922, + "step": 10722 + }, + { + "epoch": 0.6529854154614377, + "grad_norm": 1.0467463166256505, + "learning_rate": 4.8576623964319915e-06, + "loss": 0.3586, + "step": 10723 + }, + { + "epoch": 0.6530463112383156, + "grad_norm": 1.0204763284827776, + "learning_rate": 4.857635854460575e-06, + "loss": 0.4184, + "step": 10724 + }, + { + "epoch": 0.6531072070151935, + "grad_norm": 1.0422439618482238, + "learning_rate": 4.857609310087248e-06, + "loss": 0.3433, + "step": 10725 + }, + { + "epoch": 0.6531681027920714, + "grad_norm": 1.050696278330967, + "learning_rate": 4.857582763312038e-06, + "loss": 0.4424, + "step": 10726 + }, + { + "epoch": 0.6532289985689492, + "grad_norm": 0.9467275845788353, + "learning_rate": 4.857556214134972e-06, + "loss": 0.4143, + "step": 10727 + }, + { + "epoch": 0.6532898943458271, + "grad_norm": 1.0329357814537132, + "learning_rate": 4.857529662556076e-06, + "loss": 0.4169, + "step": 10728 + }, + { + "epoch": 0.653350790122705, + "grad_norm": 0.9958468668249771, + "learning_rate": 4.857503108575378e-06, + "loss": 0.4913, + "step": 10729 + }, + { + "epoch": 0.6534116858995829, + "grad_norm": 1.0763908683601642, + "learning_rate": 4.857476552192905e-06, + "loss": 0.392, + "step": 10730 + }, + { + "epoch": 0.6534725816764607, + "grad_norm": 1.0744690955034248, + "learning_rate": 4.857449993408684e-06, + "loss": 0.393, + "step": 10731 + }, + { + "epoch": 0.6535334774533386, + "grad_norm": 0.9655141987199217, + "learning_rate": 4.857423432222742e-06, + "loss": 0.415, + "step": 10732 + }, + { + "epoch": 0.6535943732302165, + "grad_norm": 1.0662907234364187, + "learning_rate": 4.857396868635106e-06, + "loss": 0.4476, + "step": 10733 + }, + { + "epoch": 0.6536552690070944, + "grad_norm": 1.046673475330789, + "learning_rate": 4.857370302645803e-06, + "loss": 0.3891, + "step": 10734 + }, + { + "epoch": 0.6537161647839722, + "grad_norm": 1.147244245667816, + "learning_rate": 4.85734373425486e-06, + "loss": 0.4246, + "step": 10735 + }, + { + "epoch": 0.65377706056085, + "grad_norm": 0.9921685316868103, + "learning_rate": 4.857317163462304e-06, + "loss": 0.4296, + "step": 10736 + }, + { + "epoch": 0.653837956337728, + "grad_norm": 1.0847830075560723, + "learning_rate": 4.857290590268163e-06, + "loss": 0.4484, + "step": 10737 + }, + { + "epoch": 0.6538988521146059, + "grad_norm": 1.007160418401543, + "learning_rate": 4.857264014672464e-06, + "loss": 0.4495, + "step": 10738 + }, + { + "epoch": 0.6539597478914837, + "grad_norm": 0.9926879175114219, + "learning_rate": 4.8572374366752315e-06, + "loss": 0.3769, + "step": 10739 + }, + { + "epoch": 0.6540206436683615, + "grad_norm": 1.1509461261026492, + "learning_rate": 4.8572108562764955e-06, + "loss": 0.4507, + "step": 10740 + }, + { + "epoch": 0.6540815394452395, + "grad_norm": 0.9391565512492822, + "learning_rate": 4.857184273476282e-06, + "loss": 0.5096, + "step": 10741 + }, + { + "epoch": 0.6541424352221173, + "grad_norm": 0.9853403113013137, + "learning_rate": 4.857157688274619e-06, + "loss": 0.4034, + "step": 10742 + }, + { + "epoch": 0.6542033309989952, + "grad_norm": 0.9269359583419198, + "learning_rate": 4.857131100671531e-06, + "loss": 0.4691, + "step": 10743 + }, + { + "epoch": 0.6542642267758731, + "grad_norm": 0.9679062074285353, + "learning_rate": 4.857104510667048e-06, + "loss": 0.4629, + "step": 10744 + }, + { + "epoch": 0.654325122552751, + "grad_norm": 1.0100083284931827, + "learning_rate": 4.857077918261196e-06, + "loss": 0.4129, + "step": 10745 + }, + { + "epoch": 0.6543860183296288, + "grad_norm": 1.04930277961787, + "learning_rate": 4.857051323454001e-06, + "loss": 0.4248, + "step": 10746 + }, + { + "epoch": 0.6544469141065067, + "grad_norm": 0.9645797982230644, + "learning_rate": 4.8570247262454925e-06, + "loss": 0.4391, + "step": 10747 + }, + { + "epoch": 0.6545078098833846, + "grad_norm": 0.9830814495841744, + "learning_rate": 4.856998126635696e-06, + "loss": 0.3637, + "step": 10748 + }, + { + "epoch": 0.6545687056602625, + "grad_norm": 0.978827601388164, + "learning_rate": 4.8569715246246375e-06, + "loss": 0.4218, + "step": 10749 + }, + { + "epoch": 0.6546296014371403, + "grad_norm": 0.9754306382072941, + "learning_rate": 4.8569449202123464e-06, + "loss": 0.4487, + "step": 10750 + }, + { + "epoch": 0.6546904972140182, + "grad_norm": 0.9829180596031601, + "learning_rate": 4.856918313398849e-06, + "loss": 0.4825, + "step": 10751 + }, + { + "epoch": 0.6547513929908961, + "grad_norm": 1.1170795418622772, + "learning_rate": 4.8568917041841725e-06, + "loss": 0.3375, + "step": 10752 + }, + { + "epoch": 0.654812288767774, + "grad_norm": 0.9417487792178735, + "learning_rate": 4.856865092568342e-06, + "loss": 0.4899, + "step": 10753 + }, + { + "epoch": 0.6548731845446518, + "grad_norm": 0.948848379626938, + "learning_rate": 4.856838478551389e-06, + "loss": 0.4664, + "step": 10754 + }, + { + "epoch": 0.6549340803215297, + "grad_norm": 0.9563320434867972, + "learning_rate": 4.856811862133336e-06, + "loss": 0.4471, + "step": 10755 + }, + { + "epoch": 0.6549949760984076, + "grad_norm": 0.9584456954032616, + "learning_rate": 4.8567852433142126e-06, + "loss": 0.4431, + "step": 10756 + }, + { + "epoch": 0.6550558718752855, + "grad_norm": 1.0256552824964695, + "learning_rate": 4.856758622094045e-06, + "loss": 0.3622, + "step": 10757 + }, + { + "epoch": 0.6551167676521633, + "grad_norm": 0.9907082994774545, + "learning_rate": 4.856731998472861e-06, + "loss": 0.4052, + "step": 10758 + }, + { + "epoch": 0.6551776634290412, + "grad_norm": 0.9790284733732244, + "learning_rate": 4.856705372450688e-06, + "loss": 0.4207, + "step": 10759 + }, + { + "epoch": 0.6552385592059191, + "grad_norm": 0.9243484942820699, + "learning_rate": 4.856678744027552e-06, + "loss": 0.4518, + "step": 10760 + }, + { + "epoch": 0.655299454982797, + "grad_norm": 1.0013797022704551, + "learning_rate": 4.856652113203481e-06, + "loss": 0.453, + "step": 10761 + }, + { + "epoch": 0.6553603507596748, + "grad_norm": 1.0029105110741636, + "learning_rate": 4.856625479978501e-06, + "loss": 0.3817, + "step": 10762 + }, + { + "epoch": 0.6554212465365526, + "grad_norm": 1.0694943399547647, + "learning_rate": 4.856598844352641e-06, + "loss": 0.3378, + "step": 10763 + }, + { + "epoch": 0.6554821423134306, + "grad_norm": 1.1061939405459822, + "learning_rate": 4.856572206325926e-06, + "loss": 0.399, + "step": 10764 + }, + { + "epoch": 0.6555430380903084, + "grad_norm": 1.0926247203147652, + "learning_rate": 4.856545565898385e-06, + "loss": 0.3561, + "step": 10765 + }, + { + "epoch": 0.6556039338671863, + "grad_norm": 1.051960238452815, + "learning_rate": 4.856518923070045e-06, + "loss": 0.3406, + "step": 10766 + }, + { + "epoch": 0.6556648296440641, + "grad_norm": 1.0159974486765333, + "learning_rate": 4.856492277840931e-06, + "loss": 0.4179, + "step": 10767 + }, + { + "epoch": 0.6557257254209421, + "grad_norm": 0.9765655227085539, + "learning_rate": 4.856465630211072e-06, + "loss": 0.4734, + "step": 10768 + }, + { + "epoch": 0.6557866211978199, + "grad_norm": 1.0537941682078442, + "learning_rate": 4.856438980180495e-06, + "loss": 0.3736, + "step": 10769 + }, + { + "epoch": 0.6558475169746978, + "grad_norm": 0.9573059429241525, + "learning_rate": 4.856412327749228e-06, + "loss": 0.4014, + "step": 10770 + }, + { + "epoch": 0.6559084127515756, + "grad_norm": 0.9622716159230342, + "learning_rate": 4.856385672917296e-06, + "loss": 0.44, + "step": 10771 + }, + { + "epoch": 0.6559693085284536, + "grad_norm": 1.0650682665359055, + "learning_rate": 4.856359015684728e-06, + "loss": 0.385, + "step": 10772 + }, + { + "epoch": 0.6560302043053314, + "grad_norm": 1.0338981254780606, + "learning_rate": 4.856332356051549e-06, + "loss": 0.4039, + "step": 10773 + }, + { + "epoch": 0.6560911000822093, + "grad_norm": 1.072052738929831, + "learning_rate": 4.856305694017789e-06, + "loss": 0.4089, + "step": 10774 + }, + { + "epoch": 0.6561519958590871, + "grad_norm": 1.0204894041609656, + "learning_rate": 4.856279029583474e-06, + "loss": 0.4043, + "step": 10775 + }, + { + "epoch": 0.6562128916359651, + "grad_norm": 1.0351774791788668, + "learning_rate": 4.85625236274863e-06, + "loss": 0.4665, + "step": 10776 + }, + { + "epoch": 0.6562737874128429, + "grad_norm": 1.071851814807069, + "learning_rate": 4.856225693513285e-06, + "loss": 0.424, + "step": 10777 + }, + { + "epoch": 0.6563346831897208, + "grad_norm": 0.9640595529797289, + "learning_rate": 4.856199021877467e-06, + "loss": 0.546, + "step": 10778 + }, + { + "epoch": 0.6563955789665986, + "grad_norm": 0.9800550096117806, + "learning_rate": 4.856172347841202e-06, + "loss": 0.4301, + "step": 10779 + }, + { + "epoch": 0.6564564747434766, + "grad_norm": 1.0200755949153983, + "learning_rate": 4.856145671404518e-06, + "loss": 0.5025, + "step": 10780 + }, + { + "epoch": 0.6565173705203544, + "grad_norm": 1.1131010288238652, + "learning_rate": 4.85611899256744e-06, + "loss": 0.3556, + "step": 10781 + }, + { + "epoch": 0.6565782662972323, + "grad_norm": 0.9712977650329988, + "learning_rate": 4.856092311329999e-06, + "loss": 0.4345, + "step": 10782 + }, + { + "epoch": 0.6566391620741101, + "grad_norm": 1.0729957182008134, + "learning_rate": 4.856065627692219e-06, + "loss": 0.3951, + "step": 10783 + }, + { + "epoch": 0.6567000578509881, + "grad_norm": 1.0702283179709389, + "learning_rate": 4.856038941654129e-06, + "loss": 0.3927, + "step": 10784 + }, + { + "epoch": 0.6567609536278659, + "grad_norm": 0.9580746678624003, + "learning_rate": 4.856012253215755e-06, + "loss": 0.4257, + "step": 10785 + }, + { + "epoch": 0.6568218494047438, + "grad_norm": 0.9651476344287314, + "learning_rate": 4.855985562377126e-06, + "loss": 0.3858, + "step": 10786 + }, + { + "epoch": 0.6568827451816217, + "grad_norm": 1.1853069845632158, + "learning_rate": 4.855958869138267e-06, + "loss": 0.3742, + "step": 10787 + }, + { + "epoch": 0.6569436409584996, + "grad_norm": 1.0489965898374678, + "learning_rate": 4.855932173499205e-06, + "loss": 0.4321, + "step": 10788 + }, + { + "epoch": 0.6570045367353774, + "grad_norm": 0.9598825803075277, + "learning_rate": 4.855905475459969e-06, + "loss": 0.45, + "step": 10789 + }, + { + "epoch": 0.6570654325122552, + "grad_norm": 0.9509724149428022, + "learning_rate": 4.8558787750205864e-06, + "loss": 0.4441, + "step": 10790 + }, + { + "epoch": 0.6571263282891332, + "grad_norm": 1.0656564053292, + "learning_rate": 4.855852072181082e-06, + "loss": 0.3805, + "step": 10791 + }, + { + "epoch": 0.657187224066011, + "grad_norm": 0.996813649998882, + "learning_rate": 4.855825366941486e-06, + "loss": 0.4364, + "step": 10792 + }, + { + "epoch": 0.6572481198428889, + "grad_norm": 0.9843171079276983, + "learning_rate": 4.855798659301824e-06, + "loss": 0.4038, + "step": 10793 + }, + { + "epoch": 0.6573090156197667, + "grad_norm": 1.0590338405172262, + "learning_rate": 4.855771949262122e-06, + "loss": 0.3943, + "step": 10794 + }, + { + "epoch": 0.6573699113966447, + "grad_norm": 1.125462923697314, + "learning_rate": 4.855745236822409e-06, + "loss": 0.3872, + "step": 10795 + }, + { + "epoch": 0.6574308071735225, + "grad_norm": 1.0648361646952091, + "learning_rate": 4.855718521982712e-06, + "loss": 0.4325, + "step": 10796 + }, + { + "epoch": 0.6574917029504004, + "grad_norm": 0.9565555989029692, + "learning_rate": 4.855691804743058e-06, + "loss": 0.4447, + "step": 10797 + }, + { + "epoch": 0.6575525987272782, + "grad_norm": 0.9808608336968754, + "learning_rate": 4.855665085103474e-06, + "loss": 0.4441, + "step": 10798 + }, + { + "epoch": 0.6576134945041562, + "grad_norm": 1.0209004757063764, + "learning_rate": 4.8556383630639875e-06, + "loss": 0.4017, + "step": 10799 + }, + { + "epoch": 0.657674390281034, + "grad_norm": 0.9218579143758534, + "learning_rate": 4.855611638624626e-06, + "loss": 0.4519, + "step": 10800 + }, + { + "epoch": 0.6577352860579119, + "grad_norm": 1.0202803998141703, + "learning_rate": 4.855584911785415e-06, + "loss": 0.3975, + "step": 10801 + }, + { + "epoch": 0.6577961818347897, + "grad_norm": 1.0948467084260454, + "learning_rate": 4.855558182546384e-06, + "loss": 0.5135, + "step": 10802 + }, + { + "epoch": 0.6578570776116677, + "grad_norm": 0.9459887011897772, + "learning_rate": 4.855531450907559e-06, + "loss": 0.4664, + "step": 10803 + }, + { + "epoch": 0.6579179733885455, + "grad_norm": 0.9419682695900831, + "learning_rate": 4.855504716868969e-06, + "loss": 0.4776, + "step": 10804 + }, + { + "epoch": 0.6579788691654234, + "grad_norm": 0.9758994529363422, + "learning_rate": 4.855477980430638e-06, + "loss": 0.4369, + "step": 10805 + }, + { + "epoch": 0.6580397649423012, + "grad_norm": 1.0154377947582789, + "learning_rate": 4.855451241592595e-06, + "loss": 0.4039, + "step": 10806 + }, + { + "epoch": 0.6581006607191792, + "grad_norm": 1.0569041541619129, + "learning_rate": 4.855424500354868e-06, + "loss": 0.3733, + "step": 10807 + }, + { + "epoch": 0.658161556496057, + "grad_norm": 1.0025228561619548, + "learning_rate": 4.8553977567174835e-06, + "loss": 0.4611, + "step": 10808 + }, + { + "epoch": 0.6582224522729349, + "grad_norm": 0.9779107944851193, + "learning_rate": 4.855371010680469e-06, + "loss": 0.4262, + "step": 10809 + }, + { + "epoch": 0.6582833480498127, + "grad_norm": 0.9446621038395492, + "learning_rate": 4.85534426224385e-06, + "loss": 0.4206, + "step": 10810 + }, + { + "epoch": 0.6583442438266907, + "grad_norm": 0.9430154911736195, + "learning_rate": 4.855317511407657e-06, + "loss": 0.4324, + "step": 10811 + }, + { + "epoch": 0.6584051396035685, + "grad_norm": 0.9779738220226666, + "learning_rate": 4.855290758171914e-06, + "loss": 0.4125, + "step": 10812 + }, + { + "epoch": 0.6584660353804463, + "grad_norm": 0.9607162059751695, + "learning_rate": 4.855264002536651e-06, + "loss": 0.4807, + "step": 10813 + }, + { + "epoch": 0.6585269311573242, + "grad_norm": 0.9972801012218467, + "learning_rate": 4.855237244501894e-06, + "loss": 0.459, + "step": 10814 + }, + { + "epoch": 0.6585878269342021, + "grad_norm": 1.0256271483864585, + "learning_rate": 4.8552104840676695e-06, + "loss": 0.3966, + "step": 10815 + }, + { + "epoch": 0.65864872271108, + "grad_norm": 1.022833038141481, + "learning_rate": 4.855183721234006e-06, + "loss": 0.3378, + "step": 10816 + }, + { + "epoch": 0.6587096184879578, + "grad_norm": 1.0016843807279425, + "learning_rate": 4.855156956000929e-06, + "loss": 0.4557, + "step": 10817 + }, + { + "epoch": 0.6587705142648357, + "grad_norm": 0.9571910597949741, + "learning_rate": 4.855130188368469e-06, + "loss": 0.4454, + "step": 10818 + }, + { + "epoch": 0.6588314100417136, + "grad_norm": 1.0995592298417767, + "learning_rate": 4.85510341833665e-06, + "loss": 0.3528, + "step": 10819 + }, + { + "epoch": 0.6588923058185915, + "grad_norm": 1.0503220062084164, + "learning_rate": 4.855076645905501e-06, + "loss": 0.4054, + "step": 10820 + }, + { + "epoch": 0.6589532015954693, + "grad_norm": 1.0228470607695097, + "learning_rate": 4.855049871075049e-06, + "loss": 0.415, + "step": 10821 + }, + { + "epoch": 0.6590140973723472, + "grad_norm": 0.9892227328979069, + "learning_rate": 4.855023093845321e-06, + "loss": 0.4371, + "step": 10822 + }, + { + "epoch": 0.6590749931492251, + "grad_norm": 1.0743902170065447, + "learning_rate": 4.854996314216345e-06, + "loss": 0.3942, + "step": 10823 + }, + { + "epoch": 0.659135888926103, + "grad_norm": 0.9653726504000136, + "learning_rate": 4.8549695321881465e-06, + "loss": 0.4199, + "step": 10824 + }, + { + "epoch": 0.6591967847029808, + "grad_norm": 1.0532695381362671, + "learning_rate": 4.854942747760755e-06, + "loss": 0.4013, + "step": 10825 + }, + { + "epoch": 0.6592576804798588, + "grad_norm": 1.0027866446685398, + "learning_rate": 4.854915960934197e-06, + "loss": 0.4408, + "step": 10826 + }, + { + "epoch": 0.6593185762567366, + "grad_norm": 1.01040603190071, + "learning_rate": 4.854889171708499e-06, + "loss": 0.4209, + "step": 10827 + }, + { + "epoch": 0.6593794720336145, + "grad_norm": 1.0010781429089783, + "learning_rate": 4.854862380083689e-06, + "loss": 0.4112, + "step": 10828 + }, + { + "epoch": 0.6594403678104923, + "grad_norm": 0.998675058843228, + "learning_rate": 4.854835586059794e-06, + "loss": 0.4351, + "step": 10829 + }, + { + "epoch": 0.6595012635873703, + "grad_norm": 1.011450894409111, + "learning_rate": 4.854808789636841e-06, + "loss": 0.3968, + "step": 10830 + }, + { + "epoch": 0.6595621593642481, + "grad_norm": 1.0196342394126334, + "learning_rate": 4.854781990814858e-06, + "loss": 0.4055, + "step": 10831 + }, + { + "epoch": 0.659623055141126, + "grad_norm": 0.9723168386992315, + "learning_rate": 4.854755189593873e-06, + "loss": 0.5126, + "step": 10832 + }, + { + "epoch": 0.6596839509180038, + "grad_norm": 1.015619856613477, + "learning_rate": 4.854728385973912e-06, + "loss": 0.4468, + "step": 10833 + }, + { + "epoch": 0.6597448466948818, + "grad_norm": 1.0313117032991197, + "learning_rate": 4.854701579955003e-06, + "loss": 0.4057, + "step": 10834 + }, + { + "epoch": 0.6598057424717596, + "grad_norm": 1.0683823536022974, + "learning_rate": 4.854674771537172e-06, + "loss": 0.4111, + "step": 10835 + }, + { + "epoch": 0.6598666382486374, + "grad_norm": 1.031793404667392, + "learning_rate": 4.854647960720448e-06, + "loss": 0.3708, + "step": 10836 + }, + { + "epoch": 0.6599275340255153, + "grad_norm": 0.9126686532607116, + "learning_rate": 4.854621147504857e-06, + "loss": 0.4549, + "step": 10837 + }, + { + "epoch": 0.6599884298023933, + "grad_norm": 0.9949450711158766, + "learning_rate": 4.854594331890427e-06, + "loss": 0.4183, + "step": 10838 + }, + { + "epoch": 0.6600493255792711, + "grad_norm": 1.0060331475400075, + "learning_rate": 4.854567513877185e-06, + "loss": 0.3552, + "step": 10839 + }, + { + "epoch": 0.6601102213561489, + "grad_norm": 1.1225469407054418, + "learning_rate": 4.854540693465159e-06, + "loss": 0.4175, + "step": 10840 + }, + { + "epoch": 0.6601711171330268, + "grad_norm": 1.0289088618741509, + "learning_rate": 4.854513870654377e-06, + "loss": 0.3989, + "step": 10841 + }, + { + "epoch": 0.6602320129099047, + "grad_norm": 0.9430542271577445, + "learning_rate": 4.854487045444864e-06, + "loss": 0.4825, + "step": 10842 + }, + { + "epoch": 0.6602929086867826, + "grad_norm": 1.0018488583915521, + "learning_rate": 4.854460217836648e-06, + "loss": 0.4951, + "step": 10843 + }, + { + "epoch": 0.6603538044636604, + "grad_norm": 1.0240148695009852, + "learning_rate": 4.854433387829758e-06, + "loss": 0.4042, + "step": 10844 + }, + { + "epoch": 0.6604147002405383, + "grad_norm": 1.0233765370192025, + "learning_rate": 4.85440655542422e-06, + "loss": 0.4158, + "step": 10845 + }, + { + "epoch": 0.6604755960174162, + "grad_norm": 1.1001802769640023, + "learning_rate": 4.854379720620062e-06, + "loss": 0.432, + "step": 10846 + }, + { + "epoch": 0.6605364917942941, + "grad_norm": 0.982984358122493, + "learning_rate": 4.85435288341731e-06, + "loss": 0.4137, + "step": 10847 + }, + { + "epoch": 0.6605973875711719, + "grad_norm": 0.9928911370022958, + "learning_rate": 4.8543260438159925e-06, + "loss": 0.4187, + "step": 10848 + }, + { + "epoch": 0.6606582833480498, + "grad_norm": 1.0563511301511919, + "learning_rate": 4.854299201816137e-06, + "loss": 0.3508, + "step": 10849 + }, + { + "epoch": 0.6607191791249277, + "grad_norm": 0.9477503629557527, + "learning_rate": 4.85427235741777e-06, + "loss": 0.4473, + "step": 10850 + }, + { + "epoch": 0.6607800749018056, + "grad_norm": 0.9599696341997122, + "learning_rate": 4.854245510620919e-06, + "loss": 0.4091, + "step": 10851 + }, + { + "epoch": 0.6608409706786834, + "grad_norm": 0.9838231738279957, + "learning_rate": 4.854218661425612e-06, + "loss": 0.4394, + "step": 10852 + }, + { + "epoch": 0.6609018664555613, + "grad_norm": 1.085831090676276, + "learning_rate": 4.854191809831876e-06, + "loss": 0.3928, + "step": 10853 + }, + { + "epoch": 0.6609627622324392, + "grad_norm": 0.9619572524556399, + "learning_rate": 4.854164955839738e-06, + "loss": 0.485, + "step": 10854 + }, + { + "epoch": 0.6610236580093171, + "grad_norm": 0.9414999978185774, + "learning_rate": 4.854138099449226e-06, + "loss": 0.4434, + "step": 10855 + }, + { + "epoch": 0.6610845537861949, + "grad_norm": 1.00313205181737, + "learning_rate": 4.854111240660367e-06, + "loss": 0.4181, + "step": 10856 + }, + { + "epoch": 0.6611454495630728, + "grad_norm": 0.9594359161041118, + "learning_rate": 4.854084379473188e-06, + "loss": 0.4059, + "step": 10857 + }, + { + "epoch": 0.6612063453399507, + "grad_norm": 0.9606120209868656, + "learning_rate": 4.854057515887718e-06, + "loss": 0.4113, + "step": 10858 + }, + { + "epoch": 0.6612672411168286, + "grad_norm": 1.040199813717488, + "learning_rate": 4.854030649903982e-06, + "loss": 0.4133, + "step": 10859 + }, + { + "epoch": 0.6613281368937064, + "grad_norm": 0.9839927159084053, + "learning_rate": 4.854003781522008e-06, + "loss": 0.4172, + "step": 10860 + }, + { + "epoch": 0.6613890326705842, + "grad_norm": 0.9905012268521387, + "learning_rate": 4.8539769107418254e-06, + "loss": 0.409, + "step": 10861 + }, + { + "epoch": 0.6614499284474622, + "grad_norm": 1.0094622299287923, + "learning_rate": 4.853950037563459e-06, + "loss": 0.4039, + "step": 10862 + }, + { + "epoch": 0.66151082422434, + "grad_norm": 0.9324575502217681, + "learning_rate": 4.853923161986937e-06, + "loss": 0.4453, + "step": 10863 + }, + { + "epoch": 0.6615717200012179, + "grad_norm": 0.9582996123417824, + "learning_rate": 4.853896284012289e-06, + "loss": 0.4848, + "step": 10864 + }, + { + "epoch": 0.6616326157780957, + "grad_norm": 1.0531461679095455, + "learning_rate": 4.853869403639538e-06, + "loss": 0.3833, + "step": 10865 + }, + { + "epoch": 0.6616935115549737, + "grad_norm": 1.0269765638891026, + "learning_rate": 4.853842520868715e-06, + "loss": 0.4333, + "step": 10866 + }, + { + "epoch": 0.6617544073318515, + "grad_norm": 1.0167994106099834, + "learning_rate": 4.853815635699846e-06, + "loss": 0.3793, + "step": 10867 + }, + { + "epoch": 0.6618153031087294, + "grad_norm": 0.9768474222463055, + "learning_rate": 4.853788748132959e-06, + "loss": 0.3989, + "step": 10868 + }, + { + "epoch": 0.6618761988856073, + "grad_norm": 1.0245015223022833, + "learning_rate": 4.8537618581680805e-06, + "loss": 0.4022, + "step": 10869 + }, + { + "epoch": 0.6619370946624852, + "grad_norm": 1.0230132985087168, + "learning_rate": 4.853734965805238e-06, + "loss": 0.4503, + "step": 10870 + }, + { + "epoch": 0.661997990439363, + "grad_norm": 0.9649894843790323, + "learning_rate": 4.85370807104446e-06, + "loss": 0.3898, + "step": 10871 + }, + { + "epoch": 0.6620588862162409, + "grad_norm": 1.0332590869451515, + "learning_rate": 4.853681173885773e-06, + "loss": 0.3893, + "step": 10872 + }, + { + "epoch": 0.6621197819931188, + "grad_norm": 0.944717320027584, + "learning_rate": 4.853654274329203e-06, + "loss": 0.497, + "step": 10873 + }, + { + "epoch": 0.6621806777699967, + "grad_norm": 1.0307152275513787, + "learning_rate": 4.853627372374781e-06, + "loss": 0.4199, + "step": 10874 + }, + { + "epoch": 0.6622415735468745, + "grad_norm": 1.040253766535061, + "learning_rate": 4.853600468022532e-06, + "loss": 0.417, + "step": 10875 + }, + { + "epoch": 0.6623024693237524, + "grad_norm": 1.0396760455720562, + "learning_rate": 4.853573561272483e-06, + "loss": 0.3669, + "step": 10876 + }, + { + "epoch": 0.6623633651006303, + "grad_norm": 0.9818632165809128, + "learning_rate": 4.853546652124663e-06, + "loss": 0.403, + "step": 10877 + }, + { + "epoch": 0.6624242608775082, + "grad_norm": 1.0127830985217798, + "learning_rate": 4.853519740579098e-06, + "loss": 0.3824, + "step": 10878 + }, + { + "epoch": 0.662485156654386, + "grad_norm": 1.0157384049928673, + "learning_rate": 4.853492826635816e-06, + "loss": 0.4911, + "step": 10879 + }, + { + "epoch": 0.6625460524312639, + "grad_norm": 1.0616934353734908, + "learning_rate": 4.853465910294845e-06, + "loss": 0.4785, + "step": 10880 + }, + { + "epoch": 0.6626069482081418, + "grad_norm": 1.0178553146392049, + "learning_rate": 4.853438991556212e-06, + "loss": 0.4057, + "step": 10881 + }, + { + "epoch": 0.6626678439850197, + "grad_norm": 1.0612842946713013, + "learning_rate": 4.853412070419944e-06, + "loss": 0.3625, + "step": 10882 + }, + { + "epoch": 0.6627287397618975, + "grad_norm": 0.9998781184756224, + "learning_rate": 4.8533851468860686e-06, + "loss": 0.3747, + "step": 10883 + }, + { + "epoch": 0.6627896355387753, + "grad_norm": 0.9540266637506134, + "learning_rate": 4.8533582209546125e-06, + "loss": 0.4868, + "step": 10884 + }, + { + "epoch": 0.6628505313156533, + "grad_norm": 0.9517293619638534, + "learning_rate": 4.853331292625605e-06, + "loss": 0.4338, + "step": 10885 + }, + { + "epoch": 0.6629114270925311, + "grad_norm": 1.0299812487976, + "learning_rate": 4.853304361899072e-06, + "loss": 0.4268, + "step": 10886 + }, + { + "epoch": 0.662972322869409, + "grad_norm": 1.0228174071762608, + "learning_rate": 4.8532774287750415e-06, + "loss": 0.4326, + "step": 10887 + }, + { + "epoch": 0.6630332186462868, + "grad_norm": 1.0172093203294825, + "learning_rate": 4.853250493253542e-06, + "loss": 0.3807, + "step": 10888 + }, + { + "epoch": 0.6630941144231648, + "grad_norm": 1.0231744581008393, + "learning_rate": 4.853223555334599e-06, + "loss": 0.3956, + "step": 10889 + }, + { + "epoch": 0.6631550102000426, + "grad_norm": 0.973215085165014, + "learning_rate": 4.8531966150182405e-06, + "loss": 0.5366, + "step": 10890 + }, + { + "epoch": 0.6632159059769205, + "grad_norm": 0.9638885532071572, + "learning_rate": 4.8531696723044944e-06, + "loss": 0.425, + "step": 10891 + }, + { + "epoch": 0.6632768017537983, + "grad_norm": 0.9422792124475408, + "learning_rate": 4.853142727193388e-06, + "loss": 0.4153, + "step": 10892 + }, + { + "epoch": 0.6633376975306763, + "grad_norm": 0.9757795412452663, + "learning_rate": 4.853115779684948e-06, + "loss": 0.4318, + "step": 10893 + }, + { + "epoch": 0.6633985933075541, + "grad_norm": 0.9056458284232386, + "learning_rate": 4.853088829779204e-06, + "loss": 0.5079, + "step": 10894 + }, + { + "epoch": 0.663459489084432, + "grad_norm": 0.9370676969589701, + "learning_rate": 4.853061877476181e-06, + "loss": 0.4604, + "step": 10895 + }, + { + "epoch": 0.6635203848613098, + "grad_norm": 1.036326703767138, + "learning_rate": 4.853034922775908e-06, + "loss": 0.3522, + "step": 10896 + }, + { + "epoch": 0.6635812806381878, + "grad_norm": 0.9718583070792979, + "learning_rate": 4.8530079656784115e-06, + "loss": 0.4135, + "step": 10897 + }, + { + "epoch": 0.6636421764150656, + "grad_norm": 1.0225612061259486, + "learning_rate": 4.852981006183719e-06, + "loss": 0.4707, + "step": 10898 + }, + { + "epoch": 0.6637030721919435, + "grad_norm": 1.0237765423759144, + "learning_rate": 4.8529540442918584e-06, + "loss": 0.4326, + "step": 10899 + }, + { + "epoch": 0.6637639679688213, + "grad_norm": 1.0498573902125752, + "learning_rate": 4.8529270800028584e-06, + "loss": 0.451, + "step": 10900 + }, + { + "epoch": 0.6638248637456993, + "grad_norm": 1.0193540352588217, + "learning_rate": 4.852900113316744e-06, + "loss": 0.409, + "step": 10901 + }, + { + "epoch": 0.6638857595225771, + "grad_norm": 1.032179311943782, + "learning_rate": 4.8528731442335445e-06, + "loss": 0.3751, + "step": 10902 + }, + { + "epoch": 0.663946655299455, + "grad_norm": 1.050378272422504, + "learning_rate": 4.852846172753287e-06, + "loss": 0.3709, + "step": 10903 + }, + { + "epoch": 0.6640075510763328, + "grad_norm": 0.9236642645886319, + "learning_rate": 4.852819198875999e-06, + "loss": 0.4645, + "step": 10904 + }, + { + "epoch": 0.6640684468532108, + "grad_norm": 0.9688001458454807, + "learning_rate": 4.852792222601706e-06, + "loss": 0.4517, + "step": 10905 + }, + { + "epoch": 0.6641293426300886, + "grad_norm": 0.9295537950605691, + "learning_rate": 4.852765243930438e-06, + "loss": 0.4518, + "step": 10906 + }, + { + "epoch": 0.6641902384069664, + "grad_norm": 0.9646217230357522, + "learning_rate": 4.852738262862223e-06, + "loss": 0.3758, + "step": 10907 + }, + { + "epoch": 0.6642511341838444, + "grad_norm": 1.0190966463579725, + "learning_rate": 4.852711279397086e-06, + "loss": 0.4101, + "step": 10908 + }, + { + "epoch": 0.6643120299607223, + "grad_norm": 0.9427329548273702, + "learning_rate": 4.852684293535056e-06, + "loss": 0.4732, + "step": 10909 + }, + { + "epoch": 0.6643729257376001, + "grad_norm": 1.0534966076492536, + "learning_rate": 4.85265730527616e-06, + "loss": 0.3814, + "step": 10910 + }, + { + "epoch": 0.6644338215144779, + "grad_norm": 1.0438908750842169, + "learning_rate": 4.852630314620426e-06, + "loss": 0.4517, + "step": 10911 + }, + { + "epoch": 0.6644947172913559, + "grad_norm": 0.9170702049388648, + "learning_rate": 4.85260332156788e-06, + "loss": 0.4241, + "step": 10912 + }, + { + "epoch": 0.6645556130682337, + "grad_norm": 1.0922586010142585, + "learning_rate": 4.852576326118551e-06, + "loss": 0.3522, + "step": 10913 + }, + { + "epoch": 0.6646165088451116, + "grad_norm": 0.9485925712235919, + "learning_rate": 4.852549328272467e-06, + "loss": 0.4687, + "step": 10914 + }, + { + "epoch": 0.6646774046219894, + "grad_norm": 1.0119460776553728, + "learning_rate": 4.852522328029654e-06, + "loss": 0.4085, + "step": 10915 + }, + { + "epoch": 0.6647383003988674, + "grad_norm": 1.0354685286977583, + "learning_rate": 4.85249532539014e-06, + "loss": 0.3575, + "step": 10916 + }, + { + "epoch": 0.6647991961757452, + "grad_norm": 1.186882478175263, + "learning_rate": 4.852468320353954e-06, + "loss": 0.4266, + "step": 10917 + }, + { + "epoch": 0.6648600919526231, + "grad_norm": 0.9491023127818863, + "learning_rate": 4.8524413129211215e-06, + "loss": 0.4509, + "step": 10918 + }, + { + "epoch": 0.6649209877295009, + "grad_norm": 1.051952172581502, + "learning_rate": 4.8524143030916695e-06, + "loss": 0.4673, + "step": 10919 + }, + { + "epoch": 0.6649818835063789, + "grad_norm": 1.0268053475001107, + "learning_rate": 4.852387290865628e-06, + "loss": 0.3939, + "step": 10920 + }, + { + "epoch": 0.6650427792832567, + "grad_norm": 1.0024289129522694, + "learning_rate": 4.852360276243023e-06, + "loss": 0.4125, + "step": 10921 + }, + { + "epoch": 0.6651036750601346, + "grad_norm": 0.9730569222008284, + "learning_rate": 4.852333259223882e-06, + "loss": 0.4208, + "step": 10922 + }, + { + "epoch": 0.6651645708370124, + "grad_norm": 1.0539722679162622, + "learning_rate": 4.852306239808233e-06, + "loss": 0.3827, + "step": 10923 + }, + { + "epoch": 0.6652254666138904, + "grad_norm": 1.242524141745074, + "learning_rate": 4.8522792179961035e-06, + "loss": 0.3817, + "step": 10924 + }, + { + "epoch": 0.6652863623907682, + "grad_norm": 0.9307748737408275, + "learning_rate": 4.85225219378752e-06, + "loss": 0.4458, + "step": 10925 + }, + { + "epoch": 0.6653472581676461, + "grad_norm": 0.9605486084454907, + "learning_rate": 4.852225167182511e-06, + "loss": 0.422, + "step": 10926 + }, + { + "epoch": 0.6654081539445239, + "grad_norm": 0.9940655969459177, + "learning_rate": 4.852198138181105e-06, + "loss": 0.5045, + "step": 10927 + }, + { + "epoch": 0.6654690497214019, + "grad_norm": 0.9765896749760535, + "learning_rate": 4.8521711067833275e-06, + "loss": 0.4202, + "step": 10928 + }, + { + "epoch": 0.6655299454982797, + "grad_norm": 0.9439601374605454, + "learning_rate": 4.852144072989208e-06, + "loss": 0.4047, + "step": 10929 + }, + { + "epoch": 0.6655908412751576, + "grad_norm": 1.0078550397237753, + "learning_rate": 4.852117036798772e-06, + "loss": 0.4342, + "step": 10930 + }, + { + "epoch": 0.6656517370520354, + "grad_norm": 0.9867723388945756, + "learning_rate": 4.852089998212049e-06, + "loss": 0.4411, + "step": 10931 + }, + { + "epoch": 0.6657126328289134, + "grad_norm": 1.0631918780024967, + "learning_rate": 4.852062957229065e-06, + "loss": 0.3904, + "step": 10932 + }, + { + "epoch": 0.6657735286057912, + "grad_norm": 1.0366536193781506, + "learning_rate": 4.852035913849847e-06, + "loss": 0.4109, + "step": 10933 + }, + { + "epoch": 0.665834424382669, + "grad_norm": 0.9541742496952883, + "learning_rate": 4.852008868074425e-06, + "loss": 0.404, + "step": 10934 + }, + { + "epoch": 0.6658953201595469, + "grad_norm": 1.0947693304406823, + "learning_rate": 4.851981819902825e-06, + "loss": 0.362, + "step": 10935 + }, + { + "epoch": 0.6659562159364248, + "grad_norm": 0.9835329512986996, + "learning_rate": 4.851954769335075e-06, + "loss": 0.439, + "step": 10936 + }, + { + "epoch": 0.6660171117133027, + "grad_norm": 0.9677577398852311, + "learning_rate": 4.851927716371202e-06, + "loss": 0.3689, + "step": 10937 + }, + { + "epoch": 0.6660780074901805, + "grad_norm": 0.997654117552148, + "learning_rate": 4.851900661011235e-06, + "loss": 0.4486, + "step": 10938 + }, + { + "epoch": 0.6661389032670584, + "grad_norm": 1.0058419485605494, + "learning_rate": 4.851873603255199e-06, + "loss": 0.3514, + "step": 10939 + }, + { + "epoch": 0.6661997990439363, + "grad_norm": 1.0655833982672573, + "learning_rate": 4.851846543103124e-06, + "loss": 0.4271, + "step": 10940 + }, + { + "epoch": 0.6662606948208142, + "grad_norm": 1.0638302380451383, + "learning_rate": 4.851819480555036e-06, + "loss": 0.4242, + "step": 10941 + }, + { + "epoch": 0.666321590597692, + "grad_norm": 1.008323628272093, + "learning_rate": 4.851792415610964e-06, + "loss": 0.4467, + "step": 10942 + }, + { + "epoch": 0.6663824863745699, + "grad_norm": 1.0920253097760089, + "learning_rate": 4.8517653482709344e-06, + "loss": 0.4325, + "step": 10943 + }, + { + "epoch": 0.6664433821514478, + "grad_norm": 1.0265132649551747, + "learning_rate": 4.851738278534976e-06, + "loss": 0.3892, + "step": 10944 + }, + { + "epoch": 0.6665042779283257, + "grad_norm": 0.9706098920142473, + "learning_rate": 4.851711206403115e-06, + "loss": 0.4242, + "step": 10945 + }, + { + "epoch": 0.6665651737052035, + "grad_norm": 0.9999648311075655, + "learning_rate": 4.851684131875378e-06, + "loss": 0.468, + "step": 10946 + }, + { + "epoch": 0.6666260694820814, + "grad_norm": 0.9543740121172526, + "learning_rate": 4.8516570549517964e-06, + "loss": 0.4717, + "step": 10947 + }, + { + "epoch": 0.6666869652589593, + "grad_norm": 0.9623932364405504, + "learning_rate": 4.851629975632393e-06, + "loss": 0.4958, + "step": 10948 + }, + { + "epoch": 0.6667478610358372, + "grad_norm": 0.9944568410010683, + "learning_rate": 4.8516028939172e-06, + "loss": 0.3915, + "step": 10949 + }, + { + "epoch": 0.666808756812715, + "grad_norm": 1.0531670552288612, + "learning_rate": 4.8515758098062425e-06, + "loss": 0.4441, + "step": 10950 + }, + { + "epoch": 0.666869652589593, + "grad_norm": 0.9374210531466822, + "learning_rate": 4.851548723299548e-06, + "loss": 0.4008, + "step": 10951 + }, + { + "epoch": 0.6669305483664708, + "grad_norm": 1.0171582096174043, + "learning_rate": 4.851521634397145e-06, + "loss": 0.4145, + "step": 10952 + }, + { + "epoch": 0.6669914441433487, + "grad_norm": 1.137726548631766, + "learning_rate": 4.85149454309906e-06, + "loss": 0.4179, + "step": 10953 + }, + { + "epoch": 0.6670523399202265, + "grad_norm": 1.0717028781808087, + "learning_rate": 4.851467449405321e-06, + "loss": 0.4137, + "step": 10954 + }, + { + "epoch": 0.6671132356971045, + "grad_norm": 1.0237320162248034, + "learning_rate": 4.851440353315957e-06, + "loss": 0.3859, + "step": 10955 + }, + { + "epoch": 0.6671741314739823, + "grad_norm": 0.9510467498668178, + "learning_rate": 4.851413254830994e-06, + "loss": 0.4273, + "step": 10956 + }, + { + "epoch": 0.6672350272508601, + "grad_norm": 1.0012707886940226, + "learning_rate": 4.851386153950461e-06, + "loss": 0.3847, + "step": 10957 + }, + { + "epoch": 0.667295923027738, + "grad_norm": 1.0227496499018105, + "learning_rate": 4.851359050674382e-06, + "loss": 0.3639, + "step": 10958 + }, + { + "epoch": 0.667356818804616, + "grad_norm": 0.9959812910431032, + "learning_rate": 4.85133194500279e-06, + "loss": 0.4248, + "step": 10959 + }, + { + "epoch": 0.6674177145814938, + "grad_norm": 0.9259170627481387, + "learning_rate": 4.851304836935709e-06, + "loss": 0.4806, + "step": 10960 + }, + { + "epoch": 0.6674786103583716, + "grad_norm": 1.0748945059954411, + "learning_rate": 4.8512777264731665e-06, + "loss": 0.3906, + "step": 10961 + }, + { + "epoch": 0.6675395061352495, + "grad_norm": 1.0197708827351524, + "learning_rate": 4.851250613615192e-06, + "loss": 0.4918, + "step": 10962 + }, + { + "epoch": 0.6676004019121274, + "grad_norm": 1.030589773659939, + "learning_rate": 4.851223498361812e-06, + "loss": 0.427, + "step": 10963 + }, + { + "epoch": 0.6676612976890053, + "grad_norm": 0.9692609249464885, + "learning_rate": 4.851196380713055e-06, + "loss": 0.442, + "step": 10964 + }, + { + "epoch": 0.6677221934658831, + "grad_norm": 1.0420586698976888, + "learning_rate": 4.851169260668948e-06, + "loss": 0.4274, + "step": 10965 + }, + { + "epoch": 0.667783089242761, + "grad_norm": 0.9879932649577083, + "learning_rate": 4.851142138229517e-06, + "loss": 0.4404, + "step": 10966 + }, + { + "epoch": 0.6678439850196389, + "grad_norm": 0.9795325802123572, + "learning_rate": 4.851115013394793e-06, + "loss": 0.4302, + "step": 10967 + }, + { + "epoch": 0.6679048807965168, + "grad_norm": 1.0406697789433454, + "learning_rate": 4.851087886164802e-06, + "loss": 0.412, + "step": 10968 + }, + { + "epoch": 0.6679657765733946, + "grad_norm": 1.0183234417604294, + "learning_rate": 4.8510607565395705e-06, + "loss": 0.4353, + "step": 10969 + }, + { + "epoch": 0.6680266723502725, + "grad_norm": 0.9569717217839143, + "learning_rate": 4.851033624519128e-06, + "loss": 0.4412, + "step": 10970 + }, + { + "epoch": 0.6680875681271504, + "grad_norm": 1.0280002093134524, + "learning_rate": 4.8510064901035e-06, + "loss": 0.4498, + "step": 10971 + }, + { + "epoch": 0.6681484639040283, + "grad_norm": 1.0306306169723762, + "learning_rate": 4.850979353292717e-06, + "loss": 0.4016, + "step": 10972 + }, + { + "epoch": 0.6682093596809061, + "grad_norm": 1.036425435635285, + "learning_rate": 4.850952214086805e-06, + "loss": 0.411, + "step": 10973 + }, + { + "epoch": 0.668270255457784, + "grad_norm": 1.0009645537431864, + "learning_rate": 4.850925072485791e-06, + "loss": 0.366, + "step": 10974 + }, + { + "epoch": 0.6683311512346619, + "grad_norm": 1.0305837942660647, + "learning_rate": 4.850897928489704e-06, + "loss": 0.4244, + "step": 10975 + }, + { + "epoch": 0.6683920470115398, + "grad_norm": 1.015149378561846, + "learning_rate": 4.85087078209857e-06, + "loss": 0.4183, + "step": 10976 + }, + { + "epoch": 0.6684529427884176, + "grad_norm": 1.0209767617554988, + "learning_rate": 4.850843633312418e-06, + "loss": 0.438, + "step": 10977 + }, + { + "epoch": 0.6685138385652954, + "grad_norm": 0.9704049823719034, + "learning_rate": 4.850816482131277e-06, + "loss": 0.5421, + "step": 10978 + }, + { + "epoch": 0.6685747343421734, + "grad_norm": 1.04083457530483, + "learning_rate": 4.850789328555172e-06, + "loss": 0.434, + "step": 10979 + }, + { + "epoch": 0.6686356301190512, + "grad_norm": 1.0165274694912019, + "learning_rate": 4.8507621725841315e-06, + "loss": 0.5068, + "step": 10980 + }, + { + "epoch": 0.6686965258959291, + "grad_norm": 1.052370837579109, + "learning_rate": 4.850735014218184e-06, + "loss": 0.3767, + "step": 10981 + }, + { + "epoch": 0.6687574216728069, + "grad_norm": 1.1785140134153098, + "learning_rate": 4.850707853457356e-06, + "loss": 0.4402, + "step": 10982 + }, + { + "epoch": 0.6688183174496849, + "grad_norm": 1.0238153596857873, + "learning_rate": 4.850680690301676e-06, + "loss": 0.4644, + "step": 10983 + }, + { + "epoch": 0.6688792132265627, + "grad_norm": 1.0091040359102175, + "learning_rate": 4.85065352475117e-06, + "loss": 0.4131, + "step": 10984 + }, + { + "epoch": 0.6689401090034406, + "grad_norm": 1.020237959503554, + "learning_rate": 4.850626356805869e-06, + "loss": 0.4644, + "step": 10985 + }, + { + "epoch": 0.6690010047803184, + "grad_norm": 0.9168453829018834, + "learning_rate": 4.850599186465798e-06, + "loss": 0.4402, + "step": 10986 + }, + { + "epoch": 0.6690619005571964, + "grad_norm": 0.9988071096921363, + "learning_rate": 4.850572013730985e-06, + "loss": 0.4802, + "step": 10987 + }, + { + "epoch": 0.6691227963340742, + "grad_norm": 1.1480246250390511, + "learning_rate": 4.850544838601458e-06, + "loss": 0.4077, + "step": 10988 + }, + { + "epoch": 0.6691836921109521, + "grad_norm": 1.047557088723968, + "learning_rate": 4.850517661077246e-06, + "loss": 0.4681, + "step": 10989 + }, + { + "epoch": 0.66924458788783, + "grad_norm": 0.9436739949572789, + "learning_rate": 4.850490481158374e-06, + "loss": 0.4778, + "step": 10990 + }, + { + "epoch": 0.6693054836647079, + "grad_norm": 1.0443355717699387, + "learning_rate": 4.850463298844872e-06, + "loss": 0.3949, + "step": 10991 + }, + { + "epoch": 0.6693663794415857, + "grad_norm": 1.0280256450285195, + "learning_rate": 4.8504361141367676e-06, + "loss": 0.4363, + "step": 10992 + }, + { + "epoch": 0.6694272752184636, + "grad_norm": 0.9715675499030834, + "learning_rate": 4.850408927034086e-06, + "loss": 0.3727, + "step": 10993 + }, + { + "epoch": 0.6694881709953415, + "grad_norm": 1.0506465119531778, + "learning_rate": 4.8503817375368574e-06, + "loss": 0.4431, + "step": 10994 + }, + { + "epoch": 0.6695490667722194, + "grad_norm": 1.043807222102904, + "learning_rate": 4.8503545456451095e-06, + "loss": 0.3944, + "step": 10995 + }, + { + "epoch": 0.6696099625490972, + "grad_norm": 0.9331654500821939, + "learning_rate": 4.850327351358869e-06, + "loss": 0.5255, + "step": 10996 + }, + { + "epoch": 0.6696708583259751, + "grad_norm": 0.9804529174212946, + "learning_rate": 4.850300154678164e-06, + "loss": 0.3732, + "step": 10997 + }, + { + "epoch": 0.669731754102853, + "grad_norm": 0.9724646343062724, + "learning_rate": 4.850272955603021e-06, + "loss": 0.3947, + "step": 10998 + }, + { + "epoch": 0.6697926498797309, + "grad_norm": 0.9766786036223919, + "learning_rate": 4.850245754133469e-06, + "loss": 0.4026, + "step": 10999 + }, + { + "epoch": 0.6698535456566087, + "grad_norm": 1.0129337998731796, + "learning_rate": 4.850218550269536e-06, + "loss": 0.4812, + "step": 11000 + }, + { + "epoch": 0.6699144414334866, + "grad_norm": 1.002523986784341, + "learning_rate": 4.850191344011249e-06, + "loss": 0.4388, + "step": 11001 + }, + { + "epoch": 0.6699753372103645, + "grad_norm": 1.065831999239812, + "learning_rate": 4.850164135358636e-06, + "loss": 0.4046, + "step": 11002 + }, + { + "epoch": 0.6700362329872424, + "grad_norm": 0.9867182527266276, + "learning_rate": 4.850136924311724e-06, + "loss": 0.4374, + "step": 11003 + }, + { + "epoch": 0.6700971287641202, + "grad_norm": 1.0295490877572095, + "learning_rate": 4.850109710870543e-06, + "loss": 0.3951, + "step": 11004 + }, + { + "epoch": 0.670158024540998, + "grad_norm": 1.0079562917941012, + "learning_rate": 4.850082495035118e-06, + "loss": 0.4412, + "step": 11005 + }, + { + "epoch": 0.670218920317876, + "grad_norm": 1.0109812255744688, + "learning_rate": 4.850055276805478e-06, + "loss": 0.4609, + "step": 11006 + }, + { + "epoch": 0.6702798160947538, + "grad_norm": 0.9825009139870144, + "learning_rate": 4.850028056181651e-06, + "loss": 0.4731, + "step": 11007 + }, + { + "epoch": 0.6703407118716317, + "grad_norm": 1.0146682728435632, + "learning_rate": 4.850000833163663e-06, + "loss": 0.327, + "step": 11008 + }, + { + "epoch": 0.6704016076485095, + "grad_norm": 0.9777628004064945, + "learning_rate": 4.849973607751543e-06, + "loss": 0.3658, + "step": 11009 + }, + { + "epoch": 0.6704625034253875, + "grad_norm": 1.0863574423200937, + "learning_rate": 4.84994637994532e-06, + "loss": 0.4764, + "step": 11010 + }, + { + "epoch": 0.6705233992022653, + "grad_norm": 1.0418806465046708, + "learning_rate": 4.8499191497450195e-06, + "loss": 0.3992, + "step": 11011 + }, + { + "epoch": 0.6705842949791432, + "grad_norm": 0.9646707418597216, + "learning_rate": 4.849891917150671e-06, + "loss": 0.4929, + "step": 11012 + }, + { + "epoch": 0.670645190756021, + "grad_norm": 0.9341890342279993, + "learning_rate": 4.849864682162301e-06, + "loss": 0.4568, + "step": 11013 + }, + { + "epoch": 0.670706086532899, + "grad_norm": 1.0234021571315992, + "learning_rate": 4.849837444779938e-06, + "loss": 0.4158, + "step": 11014 + }, + { + "epoch": 0.6707669823097768, + "grad_norm": 0.9968830537504527, + "learning_rate": 4.849810205003609e-06, + "loss": 0.4497, + "step": 11015 + }, + { + "epoch": 0.6708278780866547, + "grad_norm": 0.9743198236906094, + "learning_rate": 4.849782962833344e-06, + "loss": 0.4056, + "step": 11016 + }, + { + "epoch": 0.6708887738635325, + "grad_norm": 0.9551392188890868, + "learning_rate": 4.849755718269167e-06, + "loss": 0.49, + "step": 11017 + }, + { + "epoch": 0.6709496696404105, + "grad_norm": 1.009235700735248, + "learning_rate": 4.849728471311108e-06, + "loss": 0.4674, + "step": 11018 + }, + { + "epoch": 0.6710105654172883, + "grad_norm": 1.086455086953784, + "learning_rate": 4.849701221959194e-06, + "loss": 0.3847, + "step": 11019 + }, + { + "epoch": 0.6710714611941662, + "grad_norm": 0.9102766291121078, + "learning_rate": 4.8496739702134546e-06, + "loss": 0.5001, + "step": 11020 + }, + { + "epoch": 0.671132356971044, + "grad_norm": 1.0481247700252898, + "learning_rate": 4.849646716073916e-06, + "loss": 0.4402, + "step": 11021 + }, + { + "epoch": 0.671193252747922, + "grad_norm": 1.0210478517404513, + "learning_rate": 4.849619459540606e-06, + "loss": 0.4338, + "step": 11022 + }, + { + "epoch": 0.6712541485247998, + "grad_norm": 0.9791742257599879, + "learning_rate": 4.8495922006135515e-06, + "loss": 0.4164, + "step": 11023 + }, + { + "epoch": 0.6713150443016777, + "grad_norm": 1.0881237367425283, + "learning_rate": 4.849564939292782e-06, + "loss": 0.3367, + "step": 11024 + }, + { + "epoch": 0.6713759400785555, + "grad_norm": 0.9931840154566002, + "learning_rate": 4.849537675578326e-06, + "loss": 0.4222, + "step": 11025 + }, + { + "epoch": 0.6714368358554335, + "grad_norm": 1.0556362916724997, + "learning_rate": 4.849510409470207e-06, + "loss": 0.4417, + "step": 11026 + }, + { + "epoch": 0.6714977316323113, + "grad_norm": 0.9254307557904018, + "learning_rate": 4.849483140968457e-06, + "loss": 0.415, + "step": 11027 + }, + { + "epoch": 0.6715586274091891, + "grad_norm": 0.983868618570989, + "learning_rate": 4.849455870073103e-06, + "loss": 0.4021, + "step": 11028 + }, + { + "epoch": 0.671619523186067, + "grad_norm": 0.9291175752405493, + "learning_rate": 4.849428596784173e-06, + "loss": 0.4006, + "step": 11029 + }, + { + "epoch": 0.671680418962945, + "grad_norm": 0.994028921491516, + "learning_rate": 4.849401321101692e-06, + "loss": 0.4206, + "step": 11030 + }, + { + "epoch": 0.6717413147398228, + "grad_norm": 1.057449612909826, + "learning_rate": 4.849374043025691e-06, + "loss": 0.4339, + "step": 11031 + }, + { + "epoch": 0.6718022105167006, + "grad_norm": 0.9690503499568918, + "learning_rate": 4.849346762556196e-06, + "loss": 0.4211, + "step": 11032 + }, + { + "epoch": 0.6718631062935786, + "grad_norm": 0.9939976988844786, + "learning_rate": 4.849319479693235e-06, + "loss": 0.4453, + "step": 11033 + }, + { + "epoch": 0.6719240020704564, + "grad_norm": 1.002269462288247, + "learning_rate": 4.8492921944368375e-06, + "loss": 0.3666, + "step": 11034 + }, + { + "epoch": 0.6719848978473343, + "grad_norm": 1.04398563514531, + "learning_rate": 4.849264906787029e-06, + "loss": 0.3939, + "step": 11035 + }, + { + "epoch": 0.6720457936242121, + "grad_norm": 0.9986164828072213, + "learning_rate": 4.849237616743838e-06, + "loss": 0.3652, + "step": 11036 + }, + { + "epoch": 0.6721066894010901, + "grad_norm": 0.933507319220859, + "learning_rate": 4.849210324307293e-06, + "loss": 0.4779, + "step": 11037 + }, + { + "epoch": 0.6721675851779679, + "grad_norm": 1.0280931698885212, + "learning_rate": 4.849183029477421e-06, + "loss": 0.3724, + "step": 11038 + }, + { + "epoch": 0.6722284809548458, + "grad_norm": 1.0095338732830854, + "learning_rate": 4.849155732254251e-06, + "loss": 0.3753, + "step": 11039 + }, + { + "epoch": 0.6722893767317236, + "grad_norm": 1.1347114823635263, + "learning_rate": 4.8491284326378096e-06, + "loss": 0.4553, + "step": 11040 + }, + { + "epoch": 0.6723502725086016, + "grad_norm": 1.0633277232418699, + "learning_rate": 4.8491011306281244e-06, + "loss": 0.3869, + "step": 11041 + }, + { + "epoch": 0.6724111682854794, + "grad_norm": 1.0424508846829528, + "learning_rate": 4.849073826225225e-06, + "loss": 0.3987, + "step": 11042 + }, + { + "epoch": 0.6724720640623573, + "grad_norm": 0.9931704981268175, + "learning_rate": 4.849046519429137e-06, + "loss": 0.4443, + "step": 11043 + }, + { + "epoch": 0.6725329598392351, + "grad_norm": 0.9654313159849062, + "learning_rate": 4.8490192102398896e-06, + "loss": 0.472, + "step": 11044 + }, + { + "epoch": 0.6725938556161131, + "grad_norm": 1.0500651648992991, + "learning_rate": 4.84899189865751e-06, + "loss": 0.3854, + "step": 11045 + }, + { + "epoch": 0.6726547513929909, + "grad_norm": 1.0513220800966414, + "learning_rate": 4.848964584682027e-06, + "loss": 0.3564, + "step": 11046 + }, + { + "epoch": 0.6727156471698688, + "grad_norm": 1.0615214250958254, + "learning_rate": 4.848937268313467e-06, + "loss": 0.3707, + "step": 11047 + }, + { + "epoch": 0.6727765429467466, + "grad_norm": 0.9355517455552463, + "learning_rate": 4.848909949551859e-06, + "loss": 0.4687, + "step": 11048 + }, + { + "epoch": 0.6728374387236246, + "grad_norm": 1.001244378120841, + "learning_rate": 4.84888262839723e-06, + "loss": 0.3808, + "step": 11049 + }, + { + "epoch": 0.6728983345005024, + "grad_norm": 1.0421128810551652, + "learning_rate": 4.848855304849608e-06, + "loss": 0.375, + "step": 11050 + }, + { + "epoch": 0.6729592302773802, + "grad_norm": 0.952989093846905, + "learning_rate": 4.848827978909021e-06, + "loss": 0.4751, + "step": 11051 + }, + { + "epoch": 0.6730201260542581, + "grad_norm": 0.9607962012235028, + "learning_rate": 4.848800650575498e-06, + "loss": 0.4713, + "step": 11052 + }, + { + "epoch": 0.673081021831136, + "grad_norm": 1.0777805747795686, + "learning_rate": 4.848773319849065e-06, + "loss": 0.4373, + "step": 11053 + }, + { + "epoch": 0.6731419176080139, + "grad_norm": 0.9190848021105836, + "learning_rate": 4.848745986729751e-06, + "loss": 0.4763, + "step": 11054 + }, + { + "epoch": 0.6732028133848917, + "grad_norm": 0.9381170375017944, + "learning_rate": 4.848718651217582e-06, + "loss": 0.4081, + "step": 11055 + }, + { + "epoch": 0.6732637091617696, + "grad_norm": 0.9970841588050136, + "learning_rate": 4.848691313312589e-06, + "loss": 0.3946, + "step": 11056 + }, + { + "epoch": 0.6733246049386475, + "grad_norm": 1.0329881476568976, + "learning_rate": 4.848663973014797e-06, + "loss": 0.4003, + "step": 11057 + }, + { + "epoch": 0.6733855007155254, + "grad_norm": 0.9996666960366568, + "learning_rate": 4.848636630324236e-06, + "loss": 0.4234, + "step": 11058 + }, + { + "epoch": 0.6734463964924032, + "grad_norm": 1.0486557032226116, + "learning_rate": 4.848609285240932e-06, + "loss": 0.4583, + "step": 11059 + }, + { + "epoch": 0.6735072922692811, + "grad_norm": 1.0016169356804683, + "learning_rate": 4.848581937764914e-06, + "loss": 0.466, + "step": 11060 + }, + { + "epoch": 0.673568188046159, + "grad_norm": 1.051695050638161, + "learning_rate": 4.848554587896209e-06, + "loss": 0.3998, + "step": 11061 + }, + { + "epoch": 0.6736290838230369, + "grad_norm": 1.0209556682263567, + "learning_rate": 4.848527235634846e-06, + "loss": 0.4387, + "step": 11062 + }, + { + "epoch": 0.6736899795999147, + "grad_norm": 0.9945242767973426, + "learning_rate": 4.848499880980853e-06, + "loss": 0.4215, + "step": 11063 + }, + { + "epoch": 0.6737508753767926, + "grad_norm": 1.0035401848764889, + "learning_rate": 4.848472523934256e-06, + "loss": 0.4764, + "step": 11064 + }, + { + "epoch": 0.6738117711536705, + "grad_norm": 1.036368904807259, + "learning_rate": 4.848445164495084e-06, + "loss": 0.4468, + "step": 11065 + }, + { + "epoch": 0.6738726669305484, + "grad_norm": 0.9424424370912071, + "learning_rate": 4.848417802663366e-06, + "loss": 0.4434, + "step": 11066 + }, + { + "epoch": 0.6739335627074262, + "grad_norm": 0.9893956347286609, + "learning_rate": 4.8483904384391276e-06, + "loss": 0.4172, + "step": 11067 + }, + { + "epoch": 0.6739944584843041, + "grad_norm": 1.0431070671074523, + "learning_rate": 4.8483630718223974e-06, + "loss": 0.4747, + "step": 11068 + }, + { + "epoch": 0.674055354261182, + "grad_norm": 1.0066370765701975, + "learning_rate": 4.848335702813205e-06, + "loss": 0.4041, + "step": 11069 + }, + { + "epoch": 0.6741162500380599, + "grad_norm": 0.9770245859674555, + "learning_rate": 4.848308331411576e-06, + "loss": 0.458, + "step": 11070 + }, + { + "epoch": 0.6741771458149377, + "grad_norm": 1.008226099238263, + "learning_rate": 4.8482809576175405e-06, + "loss": 0.3604, + "step": 11071 + }, + { + "epoch": 0.6742380415918157, + "grad_norm": 0.954000950880963, + "learning_rate": 4.848253581431124e-06, + "loss": 0.5101, + "step": 11072 + }, + { + "epoch": 0.6742989373686935, + "grad_norm": 1.0551629980393427, + "learning_rate": 4.848226202852356e-06, + "loss": 0.5367, + "step": 11073 + }, + { + "epoch": 0.6743598331455714, + "grad_norm": 0.9364871432238147, + "learning_rate": 4.848198821881264e-06, + "loss": 0.4848, + "step": 11074 + }, + { + "epoch": 0.6744207289224492, + "grad_norm": 1.1019848787871274, + "learning_rate": 4.848171438517877e-06, + "loss": 0.447, + "step": 11075 + }, + { + "epoch": 0.6744816246993272, + "grad_norm": 0.9658020881781938, + "learning_rate": 4.848144052762221e-06, + "loss": 0.436, + "step": 11076 + }, + { + "epoch": 0.674542520476205, + "grad_norm": 0.9464544843500194, + "learning_rate": 4.848116664614324e-06, + "loss": 0.4624, + "step": 11077 + }, + { + "epoch": 0.6746034162530828, + "grad_norm": 0.9830562147622031, + "learning_rate": 4.8480892740742145e-06, + "loss": 0.4377, + "step": 11078 + }, + { + "epoch": 0.6746643120299607, + "grad_norm": 0.934948814646265, + "learning_rate": 4.848061881141921e-06, + "loss": 0.5022, + "step": 11079 + }, + { + "epoch": 0.6747252078068386, + "grad_norm": 1.0479006423804473, + "learning_rate": 4.848034485817471e-06, + "loss": 0.4225, + "step": 11080 + }, + { + "epoch": 0.6747861035837165, + "grad_norm": 1.0438474899371928, + "learning_rate": 4.848007088100892e-06, + "loss": 0.377, + "step": 11081 + }, + { + "epoch": 0.6748469993605943, + "grad_norm": 0.9246003755595069, + "learning_rate": 4.847979687992212e-06, + "loss": 0.4459, + "step": 11082 + }, + { + "epoch": 0.6749078951374722, + "grad_norm": 0.9463017277176468, + "learning_rate": 4.84795228549146e-06, + "loss": 0.4438, + "step": 11083 + }, + { + "epoch": 0.6749687909143501, + "grad_norm": 1.0795723111551443, + "learning_rate": 4.847924880598662e-06, + "loss": 0.4303, + "step": 11084 + }, + { + "epoch": 0.675029686691228, + "grad_norm": 0.9780673382319126, + "learning_rate": 4.847897473313847e-06, + "loss": 0.3871, + "step": 11085 + }, + { + "epoch": 0.6750905824681058, + "grad_norm": 1.0962200242230788, + "learning_rate": 4.8478700636370435e-06, + "loss": 0.4026, + "step": 11086 + }, + { + "epoch": 0.6751514782449837, + "grad_norm": 1.0833928997621967, + "learning_rate": 4.847842651568278e-06, + "loss": 0.4142, + "step": 11087 + }, + { + "epoch": 0.6752123740218616, + "grad_norm": 0.926046760790489, + "learning_rate": 4.84781523710758e-06, + "loss": 0.4661, + "step": 11088 + }, + { + "epoch": 0.6752732697987395, + "grad_norm": 0.9860629389591611, + "learning_rate": 4.847787820254976e-06, + "loss": 0.4081, + "step": 11089 + }, + { + "epoch": 0.6753341655756173, + "grad_norm": 1.0106140354002344, + "learning_rate": 4.847760401010495e-06, + "loss": 0.3965, + "step": 11090 + }, + { + "epoch": 0.6753950613524952, + "grad_norm": 0.9760027853677836, + "learning_rate": 4.847732979374165e-06, + "loss": 0.4733, + "step": 11091 + }, + { + "epoch": 0.6754559571293731, + "grad_norm": 0.9815180684571929, + "learning_rate": 4.847705555346013e-06, + "loss": 0.4398, + "step": 11092 + }, + { + "epoch": 0.675516852906251, + "grad_norm": 0.988699202758586, + "learning_rate": 4.847678128926067e-06, + "loss": 0.3667, + "step": 11093 + }, + { + "epoch": 0.6755777486831288, + "grad_norm": 1.060188559196745, + "learning_rate": 4.847650700114355e-06, + "loss": 0.3953, + "step": 11094 + }, + { + "epoch": 0.6756386444600067, + "grad_norm": 1.0354818467631446, + "learning_rate": 4.847623268910906e-06, + "loss": 0.4182, + "step": 11095 + }, + { + "epoch": 0.6756995402368846, + "grad_norm": 0.9707223055953975, + "learning_rate": 4.847595835315747e-06, + "loss": 0.4583, + "step": 11096 + }, + { + "epoch": 0.6757604360137625, + "grad_norm": 0.9425341327915804, + "learning_rate": 4.8475683993289065e-06, + "loss": 0.4699, + "step": 11097 + }, + { + "epoch": 0.6758213317906403, + "grad_norm": 0.988243289197217, + "learning_rate": 4.847540960950412e-06, + "loss": 0.4534, + "step": 11098 + }, + { + "epoch": 0.6758822275675181, + "grad_norm": 0.9528666235786516, + "learning_rate": 4.8475135201802915e-06, + "loss": 0.4906, + "step": 11099 + }, + { + "epoch": 0.6759431233443961, + "grad_norm": 1.0411895378733695, + "learning_rate": 4.847486077018573e-06, + "loss": 0.4077, + "step": 11100 + }, + { + "epoch": 0.676004019121274, + "grad_norm": 0.957753289934023, + "learning_rate": 4.847458631465285e-06, + "loss": 0.4565, + "step": 11101 + }, + { + "epoch": 0.6760649148981518, + "grad_norm": 0.8902918200899529, + "learning_rate": 4.847431183520454e-06, + "loss": 0.4632, + "step": 11102 + }, + { + "epoch": 0.6761258106750296, + "grad_norm": 0.9471954268624351, + "learning_rate": 4.84740373318411e-06, + "loss": 0.4373, + "step": 11103 + }, + { + "epoch": 0.6761867064519076, + "grad_norm": 0.9665624841816434, + "learning_rate": 4.847376280456279e-06, + "loss": 0.4253, + "step": 11104 + }, + { + "epoch": 0.6762476022287854, + "grad_norm": 1.0404658092708357, + "learning_rate": 4.847348825336989e-06, + "loss": 0.3605, + "step": 11105 + }, + { + "epoch": 0.6763084980056633, + "grad_norm": 0.9781461412325797, + "learning_rate": 4.847321367826271e-06, + "loss": 0.4391, + "step": 11106 + }, + { + "epoch": 0.6763693937825411, + "grad_norm": 1.0524852009434085, + "learning_rate": 4.8472939079241496e-06, + "loss": 0.4383, + "step": 11107 + }, + { + "epoch": 0.6764302895594191, + "grad_norm": 1.0256936030222337, + "learning_rate": 4.847266445630654e-06, + "loss": 0.4375, + "step": 11108 + }, + { + "epoch": 0.6764911853362969, + "grad_norm": 0.990346062282462, + "learning_rate": 4.847238980945812e-06, + "loss": 0.464, + "step": 11109 + }, + { + "epoch": 0.6765520811131748, + "grad_norm": 1.0635451459793475, + "learning_rate": 4.847211513869652e-06, + "loss": 0.3744, + "step": 11110 + }, + { + "epoch": 0.6766129768900526, + "grad_norm": 1.041623817307115, + "learning_rate": 4.847184044402202e-06, + "loss": 0.4826, + "step": 11111 + }, + { + "epoch": 0.6766738726669306, + "grad_norm": 1.0267300373809238, + "learning_rate": 4.847156572543489e-06, + "loss": 0.4678, + "step": 11112 + }, + { + "epoch": 0.6767347684438084, + "grad_norm": 1.0037743038873563, + "learning_rate": 4.847129098293542e-06, + "loss": 0.4797, + "step": 11113 + }, + { + "epoch": 0.6767956642206863, + "grad_norm": 0.9871416522647254, + "learning_rate": 4.8471016216523894e-06, + "loss": 0.4172, + "step": 11114 + }, + { + "epoch": 0.6768565599975642, + "grad_norm": 0.9913200123797524, + "learning_rate": 4.847074142620057e-06, + "loss": 0.4101, + "step": 11115 + }, + { + "epoch": 0.6769174557744421, + "grad_norm": 1.1320525907300296, + "learning_rate": 4.847046661196575e-06, + "loss": 0.4233, + "step": 11116 + }, + { + "epoch": 0.6769783515513199, + "grad_norm": 0.9957202135279261, + "learning_rate": 4.847019177381971e-06, + "loss": 0.4671, + "step": 11117 + }, + { + "epoch": 0.6770392473281978, + "grad_norm": 1.1341819874707992, + "learning_rate": 4.846991691176272e-06, + "loss": 0.4971, + "step": 11118 + }, + { + "epoch": 0.6771001431050757, + "grad_norm": 0.9418739783193932, + "learning_rate": 4.846964202579507e-06, + "loss": 0.4659, + "step": 11119 + }, + { + "epoch": 0.6771610388819536, + "grad_norm": 1.0808784653439325, + "learning_rate": 4.846936711591704e-06, + "loss": 0.3435, + "step": 11120 + }, + { + "epoch": 0.6772219346588314, + "grad_norm": 0.897539636139626, + "learning_rate": 4.84690921821289e-06, + "loss": 0.4632, + "step": 11121 + }, + { + "epoch": 0.6772828304357092, + "grad_norm": 1.0219239840177803, + "learning_rate": 4.8468817224430945e-06, + "loss": 0.4535, + "step": 11122 + }, + { + "epoch": 0.6773437262125872, + "grad_norm": 1.011731048083088, + "learning_rate": 4.846854224282344e-06, + "loss": 0.443, + "step": 11123 + }, + { + "epoch": 0.677404621989465, + "grad_norm": 1.0998338425114227, + "learning_rate": 4.846826723730668e-06, + "loss": 0.4133, + "step": 11124 + }, + { + "epoch": 0.6774655177663429, + "grad_norm": 1.0591216557392324, + "learning_rate": 4.846799220788092e-06, + "loss": 0.4077, + "step": 11125 + }, + { + "epoch": 0.6775264135432207, + "grad_norm": 1.053723352050714, + "learning_rate": 4.846771715454648e-06, + "loss": 0.359, + "step": 11126 + }, + { + "epoch": 0.6775873093200987, + "grad_norm": 0.9676087862253189, + "learning_rate": 4.846744207730361e-06, + "loss": 0.4608, + "step": 11127 + }, + { + "epoch": 0.6776482050969765, + "grad_norm": 0.9931438177941839, + "learning_rate": 4.846716697615259e-06, + "loss": 0.3801, + "step": 11128 + }, + { + "epoch": 0.6777091008738544, + "grad_norm": 1.017543346463844, + "learning_rate": 4.846689185109371e-06, + "loss": 0.434, + "step": 11129 + }, + { + "epoch": 0.6777699966507322, + "grad_norm": 1.0176375655800487, + "learning_rate": 4.846661670212725e-06, + "loss": 0.4445, + "step": 11130 + }, + { + "epoch": 0.6778308924276102, + "grad_norm": 0.9382093962805554, + "learning_rate": 4.846634152925349e-06, + "loss": 0.4779, + "step": 11131 + }, + { + "epoch": 0.677891788204488, + "grad_norm": 0.9290558087889601, + "learning_rate": 4.846606633247271e-06, + "loss": 0.4348, + "step": 11132 + }, + { + "epoch": 0.6779526839813659, + "grad_norm": 1.1085710457658675, + "learning_rate": 4.846579111178519e-06, + "loss": 0.3731, + "step": 11133 + }, + { + "epoch": 0.6780135797582437, + "grad_norm": 1.0300956818120826, + "learning_rate": 4.846551586719121e-06, + "loss": 0.4566, + "step": 11134 + }, + { + "epoch": 0.6780744755351217, + "grad_norm": 1.0311922283378039, + "learning_rate": 4.846524059869104e-06, + "loss": 0.4061, + "step": 11135 + }, + { + "epoch": 0.6781353713119995, + "grad_norm": 0.9557337487719128, + "learning_rate": 4.8464965306284985e-06, + "loss": 0.4412, + "step": 11136 + }, + { + "epoch": 0.6781962670888774, + "grad_norm": 1.0499453501349596, + "learning_rate": 4.846468998997331e-06, + "loss": 0.366, + "step": 11137 + }, + { + "epoch": 0.6782571628657552, + "grad_norm": 0.975057548164393, + "learning_rate": 4.846441464975629e-06, + "loss": 0.5317, + "step": 11138 + }, + { + "epoch": 0.6783180586426332, + "grad_norm": 1.2705392577685275, + "learning_rate": 4.846413928563421e-06, + "loss": 0.4445, + "step": 11139 + }, + { + "epoch": 0.678378954419511, + "grad_norm": 1.004350398321868, + "learning_rate": 4.846386389760737e-06, + "loss": 0.4231, + "step": 11140 + }, + { + "epoch": 0.6784398501963889, + "grad_norm": 1.1312956550051845, + "learning_rate": 4.8463588485676015e-06, + "loss": 0.4222, + "step": 11141 + }, + { + "epoch": 0.6785007459732667, + "grad_norm": 0.9950605167268929, + "learning_rate": 4.846331304984045e-06, + "loss": 0.4322, + "step": 11142 + }, + { + "epoch": 0.6785616417501447, + "grad_norm": 0.960732872054951, + "learning_rate": 4.846303759010094e-06, + "loss": 0.4428, + "step": 11143 + }, + { + "epoch": 0.6786225375270225, + "grad_norm": 1.0146748797392138, + "learning_rate": 4.846276210645779e-06, + "loss": 0.457, + "step": 11144 + }, + { + "epoch": 0.6786834333039004, + "grad_norm": 0.9723270679074975, + "learning_rate": 4.846248659891126e-06, + "loss": 0.4171, + "step": 11145 + }, + { + "epoch": 0.6787443290807782, + "grad_norm": 0.9400445911519925, + "learning_rate": 4.846221106746164e-06, + "loss": 0.414, + "step": 11146 + }, + { + "epoch": 0.6788052248576562, + "grad_norm": 1.0350943154385943, + "learning_rate": 4.84619355121092e-06, + "loss": 0.4273, + "step": 11147 + }, + { + "epoch": 0.678866120634534, + "grad_norm": 0.9150563858129426, + "learning_rate": 4.846165993285423e-06, + "loss": 0.4648, + "step": 11148 + }, + { + "epoch": 0.6789270164114118, + "grad_norm": 0.9916965693247667, + "learning_rate": 4.846138432969702e-06, + "loss": 0.4373, + "step": 11149 + }, + { + "epoch": 0.6789879121882897, + "grad_norm": 1.0058421370177222, + "learning_rate": 4.846110870263782e-06, + "loss": 0.492, + "step": 11150 + }, + { + "epoch": 0.6790488079651676, + "grad_norm": 1.0284930380450996, + "learning_rate": 4.846083305167694e-06, + "loss": 0.3883, + "step": 11151 + }, + { + "epoch": 0.6791097037420455, + "grad_norm": 0.9690462598100611, + "learning_rate": 4.846055737681465e-06, + "loss": 0.4886, + "step": 11152 + }, + { + "epoch": 0.6791705995189233, + "grad_norm": 1.0037636671790746, + "learning_rate": 4.8460281678051235e-06, + "loss": 0.3983, + "step": 11153 + }, + { + "epoch": 0.6792314952958013, + "grad_norm": 1.007200284318885, + "learning_rate": 4.8460005955386964e-06, + "loss": 0.4079, + "step": 11154 + }, + { + "epoch": 0.6792923910726791, + "grad_norm": 1.0290659902925707, + "learning_rate": 4.845973020882214e-06, + "loss": 0.432, + "step": 11155 + }, + { + "epoch": 0.679353286849557, + "grad_norm": 0.9479707433724124, + "learning_rate": 4.8459454438357024e-06, + "loss": 0.4623, + "step": 11156 + }, + { + "epoch": 0.6794141826264348, + "grad_norm": 0.9549281840954996, + "learning_rate": 4.84591786439919e-06, + "loss": 0.5325, + "step": 11157 + }, + { + "epoch": 0.6794750784033128, + "grad_norm": 0.9385432140526372, + "learning_rate": 4.845890282572706e-06, + "loss": 0.4687, + "step": 11158 + }, + { + "epoch": 0.6795359741801906, + "grad_norm": 1.0181695562684667, + "learning_rate": 4.845862698356277e-06, + "loss": 0.4177, + "step": 11159 + }, + { + "epoch": 0.6795968699570685, + "grad_norm": 1.1156105485611822, + "learning_rate": 4.845835111749933e-06, + "loss": 0.4098, + "step": 11160 + }, + { + "epoch": 0.6796577657339463, + "grad_norm": 0.9692022761144092, + "learning_rate": 4.8458075227536995e-06, + "loss": 0.4115, + "step": 11161 + }, + { + "epoch": 0.6797186615108243, + "grad_norm": 1.0688773372288656, + "learning_rate": 4.845779931367607e-06, + "loss": 0.401, + "step": 11162 + }, + { + "epoch": 0.6797795572877021, + "grad_norm": 0.9385719543209087, + "learning_rate": 4.845752337591682e-06, + "loss": 0.4805, + "step": 11163 + }, + { + "epoch": 0.67984045306458, + "grad_norm": 1.150507514820652, + "learning_rate": 4.845724741425954e-06, + "loss": 0.3831, + "step": 11164 + }, + { + "epoch": 0.6799013488414578, + "grad_norm": 1.0339319295692415, + "learning_rate": 4.84569714287045e-06, + "loss": 0.4153, + "step": 11165 + }, + { + "epoch": 0.6799622446183358, + "grad_norm": 1.029999006895575, + "learning_rate": 4.845669541925198e-06, + "loss": 0.4231, + "step": 11166 + }, + { + "epoch": 0.6800231403952136, + "grad_norm": 1.0879459178503468, + "learning_rate": 4.845641938590228e-06, + "loss": 0.4064, + "step": 11167 + }, + { + "epoch": 0.6800840361720915, + "grad_norm": 0.9708916042839965, + "learning_rate": 4.845614332865566e-06, + "loss": 0.4401, + "step": 11168 + }, + { + "epoch": 0.6801449319489693, + "grad_norm": 0.9440924255380537, + "learning_rate": 4.845586724751241e-06, + "loss": 0.426, + "step": 11169 + }, + { + "epoch": 0.6802058277258473, + "grad_norm": 1.0787774986984227, + "learning_rate": 4.84555911424728e-06, + "loss": 0.3752, + "step": 11170 + }, + { + "epoch": 0.6802667235027251, + "grad_norm": 1.0063941050619316, + "learning_rate": 4.845531501353714e-06, + "loss": 0.4148, + "step": 11171 + }, + { + "epoch": 0.680327619279603, + "grad_norm": 1.018146567067235, + "learning_rate": 4.845503886070568e-06, + "loss": 0.4191, + "step": 11172 + }, + { + "epoch": 0.6803885150564808, + "grad_norm": 1.054932953175995, + "learning_rate": 4.845476268397872e-06, + "loss": 0.4525, + "step": 11173 + }, + { + "epoch": 0.6804494108333587, + "grad_norm": 0.9929617864020082, + "learning_rate": 4.8454486483356526e-06, + "loss": 0.4617, + "step": 11174 + }, + { + "epoch": 0.6805103066102366, + "grad_norm": 1.0142235326869604, + "learning_rate": 4.84542102588394e-06, + "loss": 0.4088, + "step": 11175 + }, + { + "epoch": 0.6805712023871144, + "grad_norm": 1.018384752519241, + "learning_rate": 4.84539340104276e-06, + "loss": 0.4018, + "step": 11176 + }, + { + "epoch": 0.6806320981639923, + "grad_norm": 0.9330932700806039, + "learning_rate": 4.845365773812143e-06, + "loss": 0.4517, + "step": 11177 + }, + { + "epoch": 0.6806929939408702, + "grad_norm": 1.098962689559958, + "learning_rate": 4.845338144192116e-06, + "loss": 0.4357, + "step": 11178 + }, + { + "epoch": 0.6807538897177481, + "grad_norm": 1.140970759987273, + "learning_rate": 4.845310512182707e-06, + "loss": 0.3979, + "step": 11179 + }, + { + "epoch": 0.6808147854946259, + "grad_norm": 1.0701940078310788, + "learning_rate": 4.845282877783945e-06, + "loss": 0.4286, + "step": 11180 + }, + { + "epoch": 0.6808756812715038, + "grad_norm": 1.0129667103894955, + "learning_rate": 4.845255240995856e-06, + "loss": 0.4325, + "step": 11181 + }, + { + "epoch": 0.6809365770483817, + "grad_norm": 1.0161220592514184, + "learning_rate": 4.845227601818472e-06, + "loss": 0.4312, + "step": 11182 + }, + { + "epoch": 0.6809974728252596, + "grad_norm": 0.9286197409768241, + "learning_rate": 4.8451999602518175e-06, + "loss": 0.4096, + "step": 11183 + }, + { + "epoch": 0.6810583686021374, + "grad_norm": 1.0064229392890214, + "learning_rate": 4.845172316295922e-06, + "loss": 0.4148, + "step": 11184 + }, + { + "epoch": 0.6811192643790153, + "grad_norm": 1.0813968318452267, + "learning_rate": 4.8451446699508145e-06, + "loss": 0.393, + "step": 11185 + }, + { + "epoch": 0.6811801601558932, + "grad_norm": 0.9123667537223084, + "learning_rate": 4.845117021216521e-06, + "loss": 0.4608, + "step": 11186 + }, + { + "epoch": 0.6812410559327711, + "grad_norm": 1.1851900880047013, + "learning_rate": 4.845089370093073e-06, + "loss": 0.5489, + "step": 11187 + }, + { + "epoch": 0.6813019517096489, + "grad_norm": 0.9354402103397539, + "learning_rate": 4.845061716580495e-06, + "loss": 0.4317, + "step": 11188 + }, + { + "epoch": 0.6813628474865268, + "grad_norm": 0.881504801822446, + "learning_rate": 4.845034060678817e-06, + "loss": 0.43, + "step": 11189 + }, + { + "epoch": 0.6814237432634047, + "grad_norm": 1.0990378561794805, + "learning_rate": 4.845006402388067e-06, + "loss": 0.4057, + "step": 11190 + }, + { + "epoch": 0.6814846390402826, + "grad_norm": 1.036142632607258, + "learning_rate": 4.844978741708274e-06, + "loss": 0.3712, + "step": 11191 + }, + { + "epoch": 0.6815455348171604, + "grad_norm": 0.9670343928321802, + "learning_rate": 4.844951078639465e-06, + "loss": 0.4672, + "step": 11192 + }, + { + "epoch": 0.6816064305940382, + "grad_norm": 1.0059186704092156, + "learning_rate": 4.844923413181668e-06, + "loss": 0.3869, + "step": 11193 + }, + { + "epoch": 0.6816673263709162, + "grad_norm": 1.069398140590521, + "learning_rate": 4.8448957453349135e-06, + "loss": 0.4573, + "step": 11194 + }, + { + "epoch": 0.681728222147794, + "grad_norm": 0.9830114165903263, + "learning_rate": 4.844868075099227e-06, + "loss": 0.4523, + "step": 11195 + }, + { + "epoch": 0.6817891179246719, + "grad_norm": 1.044673592561948, + "learning_rate": 4.844840402474637e-06, + "loss": 0.3958, + "step": 11196 + }, + { + "epoch": 0.6818500137015499, + "grad_norm": 1.0126290863258713, + "learning_rate": 4.844812727461174e-06, + "loss": 0.4525, + "step": 11197 + }, + { + "epoch": 0.6819109094784277, + "grad_norm": 1.039060264053878, + "learning_rate": 4.844785050058862e-06, + "loss": 0.4461, + "step": 11198 + }, + { + "epoch": 0.6819718052553055, + "grad_norm": 0.9882938836934559, + "learning_rate": 4.8447573702677335e-06, + "loss": 0.5071, + "step": 11199 + }, + { + "epoch": 0.6820327010321834, + "grad_norm": 0.952277102920067, + "learning_rate": 4.844729688087815e-06, + "loss": 0.5249, + "step": 11200 + }, + { + "epoch": 0.6820935968090613, + "grad_norm": 0.9753202211406046, + "learning_rate": 4.844702003519134e-06, + "loss": 0.4007, + "step": 11201 + }, + { + "epoch": 0.6821544925859392, + "grad_norm": 1.0211282860211328, + "learning_rate": 4.84467431656172e-06, + "loss": 0.4265, + "step": 11202 + }, + { + "epoch": 0.682215388362817, + "grad_norm": 0.9595245078671515, + "learning_rate": 4.8446466272156e-06, + "loss": 0.4776, + "step": 11203 + }, + { + "epoch": 0.6822762841396949, + "grad_norm": 1.0410038359171714, + "learning_rate": 4.844618935480803e-06, + "loss": 0.3834, + "step": 11204 + }, + { + "epoch": 0.6823371799165728, + "grad_norm": 1.0204892429755141, + "learning_rate": 4.844591241357357e-06, + "loss": 0.4388, + "step": 11205 + }, + { + "epoch": 0.6823980756934507, + "grad_norm": 1.038304577386356, + "learning_rate": 4.84456354484529e-06, + "loss": 0.4767, + "step": 11206 + }, + { + "epoch": 0.6824589714703285, + "grad_norm": 1.074157933528745, + "learning_rate": 4.844535845944631e-06, + "loss": 0.4016, + "step": 11207 + }, + { + "epoch": 0.6825198672472064, + "grad_norm": 0.9948603751448493, + "learning_rate": 4.844508144655407e-06, + "loss": 0.4389, + "step": 11208 + }, + { + "epoch": 0.6825807630240843, + "grad_norm": 0.8929747309636394, + "learning_rate": 4.844480440977647e-06, + "loss": 0.4426, + "step": 11209 + }, + { + "epoch": 0.6826416588009622, + "grad_norm": 0.9751930849496584, + "learning_rate": 4.844452734911379e-06, + "loss": 0.4079, + "step": 11210 + }, + { + "epoch": 0.68270255457784, + "grad_norm": 0.9386377698819374, + "learning_rate": 4.844425026456631e-06, + "loss": 0.4948, + "step": 11211 + }, + { + "epoch": 0.6827634503547179, + "grad_norm": 1.0171549573266911, + "learning_rate": 4.8443973156134325e-06, + "loss": 0.3883, + "step": 11212 + }, + { + "epoch": 0.6828243461315958, + "grad_norm": 1.0791576091440063, + "learning_rate": 4.84436960238181e-06, + "loss": 0.4441, + "step": 11213 + }, + { + "epoch": 0.6828852419084737, + "grad_norm": 0.9818667774564157, + "learning_rate": 4.844341886761792e-06, + "loss": 0.4113, + "step": 11214 + }, + { + "epoch": 0.6829461376853515, + "grad_norm": 1.0135370058988455, + "learning_rate": 4.844314168753408e-06, + "loss": 0.398, + "step": 11215 + }, + { + "epoch": 0.6830070334622294, + "grad_norm": 1.0415898241292145, + "learning_rate": 4.844286448356685e-06, + "loss": 0.3822, + "step": 11216 + }, + { + "epoch": 0.6830679292391073, + "grad_norm": 0.9846388364245625, + "learning_rate": 4.844258725571652e-06, + "loss": 0.4383, + "step": 11217 + }, + { + "epoch": 0.6831288250159852, + "grad_norm": 0.9622492795542101, + "learning_rate": 4.8442310003983375e-06, + "loss": 0.4453, + "step": 11218 + }, + { + "epoch": 0.683189720792863, + "grad_norm": 1.0047119943008171, + "learning_rate": 4.844203272836769e-06, + "loss": 0.4746, + "step": 11219 + }, + { + "epoch": 0.6832506165697408, + "grad_norm": 1.0255105606795734, + "learning_rate": 4.8441755428869745e-06, + "loss": 0.4721, + "step": 11220 + }, + { + "epoch": 0.6833115123466188, + "grad_norm": 1.0512832431751105, + "learning_rate": 4.844147810548983e-06, + "loss": 0.3971, + "step": 11221 + }, + { + "epoch": 0.6833724081234966, + "grad_norm": 0.9928947185443288, + "learning_rate": 4.844120075822822e-06, + "loss": 0.3966, + "step": 11222 + }, + { + "epoch": 0.6834333039003745, + "grad_norm": 1.024833880039256, + "learning_rate": 4.84409233870852e-06, + "loss": 0.4582, + "step": 11223 + }, + { + "epoch": 0.6834941996772523, + "grad_norm": 0.9832639667088942, + "learning_rate": 4.844064599206107e-06, + "loss": 0.4349, + "step": 11224 + }, + { + "epoch": 0.6835550954541303, + "grad_norm": 0.9645340322561644, + "learning_rate": 4.844036857315609e-06, + "loss": 0.4011, + "step": 11225 + }, + { + "epoch": 0.6836159912310081, + "grad_norm": 0.981428713061004, + "learning_rate": 4.8440091130370535e-06, + "loss": 0.3886, + "step": 11226 + }, + { + "epoch": 0.683676887007886, + "grad_norm": 0.9976808827971598, + "learning_rate": 4.843981366370472e-06, + "loss": 0.4541, + "step": 11227 + }, + { + "epoch": 0.6837377827847638, + "grad_norm": 1.0736484182545374, + "learning_rate": 4.843953617315889e-06, + "loss": 0.3744, + "step": 11228 + }, + { + "epoch": 0.6837986785616418, + "grad_norm": 0.926185990014509, + "learning_rate": 4.843925865873337e-06, + "loss": 0.4378, + "step": 11229 + }, + { + "epoch": 0.6838595743385196, + "grad_norm": 0.9982186911168145, + "learning_rate": 4.8438981120428415e-06, + "loss": 0.3703, + "step": 11230 + }, + { + "epoch": 0.6839204701153975, + "grad_norm": 1.0052295673088774, + "learning_rate": 4.84387035582443e-06, + "loss": 0.3824, + "step": 11231 + }, + { + "epoch": 0.6839813658922753, + "grad_norm": 0.9572075434041996, + "learning_rate": 4.843842597218134e-06, + "loss": 0.4062, + "step": 11232 + }, + { + "epoch": 0.6840422616691533, + "grad_norm": 1.0098180183831877, + "learning_rate": 4.843814836223979e-06, + "loss": 0.4061, + "step": 11233 + }, + { + "epoch": 0.6841031574460311, + "grad_norm": 0.9460291632391699, + "learning_rate": 4.843787072841995e-06, + "loss": 0.5189, + "step": 11234 + }, + { + "epoch": 0.684164053222909, + "grad_norm": 0.985827786791382, + "learning_rate": 4.843759307072208e-06, + "loss": 0.4643, + "step": 11235 + }, + { + "epoch": 0.6842249489997869, + "grad_norm": 1.0461205574647343, + "learning_rate": 4.843731538914649e-06, + "loss": 0.4247, + "step": 11236 + }, + { + "epoch": 0.6842858447766648, + "grad_norm": 0.9441757014928721, + "learning_rate": 4.843703768369344e-06, + "loss": 0.4244, + "step": 11237 + }, + { + "epoch": 0.6843467405535426, + "grad_norm": 1.0612689190940585, + "learning_rate": 4.843675995436323e-06, + "loss": 0.3645, + "step": 11238 + }, + { + "epoch": 0.6844076363304205, + "grad_norm": 0.9750205298094589, + "learning_rate": 4.843648220115613e-06, + "loss": 0.3915, + "step": 11239 + }, + { + "epoch": 0.6844685321072984, + "grad_norm": 1.039665497678461, + "learning_rate": 4.843620442407243e-06, + "loss": 0.3757, + "step": 11240 + }, + { + "epoch": 0.6845294278841763, + "grad_norm": 0.8864663690381395, + "learning_rate": 4.843592662311243e-06, + "loss": 0.4716, + "step": 11241 + }, + { + "epoch": 0.6845903236610541, + "grad_norm": 1.0198458591988109, + "learning_rate": 4.843564879827637e-06, + "loss": 0.4231, + "step": 11242 + }, + { + "epoch": 0.684651219437932, + "grad_norm": 0.9817675290207709, + "learning_rate": 4.843537094956457e-06, + "loss": 0.3803, + "step": 11243 + }, + { + "epoch": 0.6847121152148099, + "grad_norm": 1.0329122273257827, + "learning_rate": 4.84350930769773e-06, + "loss": 0.4704, + "step": 11244 + }, + { + "epoch": 0.6847730109916877, + "grad_norm": 1.0137607919492677, + "learning_rate": 4.843481518051485e-06, + "loss": 0.4131, + "step": 11245 + }, + { + "epoch": 0.6848339067685656, + "grad_norm": 0.9525504890283382, + "learning_rate": 4.843453726017749e-06, + "loss": 0.4243, + "step": 11246 + }, + { + "epoch": 0.6848948025454434, + "grad_norm": 0.9876747185579196, + "learning_rate": 4.843425931596551e-06, + "loss": 0.4544, + "step": 11247 + }, + { + "epoch": 0.6849556983223214, + "grad_norm": 1.0713725610229106, + "learning_rate": 4.84339813478792e-06, + "loss": 0.3893, + "step": 11248 + }, + { + "epoch": 0.6850165940991992, + "grad_norm": 0.9662468483184822, + "learning_rate": 4.8433703355918835e-06, + "loss": 0.4149, + "step": 11249 + }, + { + "epoch": 0.6850774898760771, + "grad_norm": 1.005996281961638, + "learning_rate": 4.84334253400847e-06, + "loss": 0.4544, + "step": 11250 + }, + { + "epoch": 0.6851383856529549, + "grad_norm": 0.9705706177845823, + "learning_rate": 4.843314730037708e-06, + "loss": 0.4048, + "step": 11251 + }, + { + "epoch": 0.6851992814298329, + "grad_norm": 1.0193555995797134, + "learning_rate": 4.843286923679625e-06, + "loss": 0.4249, + "step": 11252 + }, + { + "epoch": 0.6852601772067107, + "grad_norm": 1.0334784569731588, + "learning_rate": 4.84325911493425e-06, + "loss": 0.4279, + "step": 11253 + }, + { + "epoch": 0.6853210729835886, + "grad_norm": 1.0374098479439555, + "learning_rate": 4.843231303801612e-06, + "loss": 0.4091, + "step": 11254 + }, + { + "epoch": 0.6853819687604664, + "grad_norm": 0.9238504095077457, + "learning_rate": 4.843203490281738e-06, + "loss": 0.3966, + "step": 11255 + }, + { + "epoch": 0.6854428645373444, + "grad_norm": 0.9430530020942196, + "learning_rate": 4.843175674374658e-06, + "loss": 0.4649, + "step": 11256 + }, + { + "epoch": 0.6855037603142222, + "grad_norm": 1.0801191008340738, + "learning_rate": 4.843147856080399e-06, + "loss": 0.4139, + "step": 11257 + }, + { + "epoch": 0.6855646560911001, + "grad_norm": 0.9975839185748404, + "learning_rate": 4.843120035398989e-06, + "loss": 0.4893, + "step": 11258 + }, + { + "epoch": 0.6856255518679779, + "grad_norm": 0.9428970495872931, + "learning_rate": 4.843092212330458e-06, + "loss": 0.4533, + "step": 11259 + }, + { + "epoch": 0.6856864476448559, + "grad_norm": 0.8605558369046944, + "learning_rate": 4.843064386874833e-06, + "loss": 0.499, + "step": 11260 + }, + { + "epoch": 0.6857473434217337, + "grad_norm": 1.1321842605961614, + "learning_rate": 4.843036559032142e-06, + "loss": 0.4009, + "step": 11261 + }, + { + "epoch": 0.6858082391986116, + "grad_norm": 1.0544654866638243, + "learning_rate": 4.843008728802415e-06, + "loss": 0.3655, + "step": 11262 + }, + { + "epoch": 0.6858691349754894, + "grad_norm": 1.0662882174223647, + "learning_rate": 4.842980896185679e-06, + "loss": 0.4346, + "step": 11263 + }, + { + "epoch": 0.6859300307523674, + "grad_norm": 1.1204143942781162, + "learning_rate": 4.842953061181963e-06, + "loss": 0.4011, + "step": 11264 + }, + { + "epoch": 0.6859909265292452, + "grad_norm": 1.0200140889963956, + "learning_rate": 4.842925223791295e-06, + "loss": 0.4647, + "step": 11265 + }, + { + "epoch": 0.686051822306123, + "grad_norm": 0.9493264007786301, + "learning_rate": 4.842897384013703e-06, + "loss": 0.4535, + "step": 11266 + }, + { + "epoch": 0.6861127180830009, + "grad_norm": 0.9418050492194633, + "learning_rate": 4.842869541849217e-06, + "loss": 0.4724, + "step": 11267 + }, + { + "epoch": 0.6861736138598789, + "grad_norm": 0.9561393518978135, + "learning_rate": 4.842841697297863e-06, + "loss": 0.4265, + "step": 11268 + }, + { + "epoch": 0.6862345096367567, + "grad_norm": 1.0098906743059768, + "learning_rate": 4.842813850359671e-06, + "loss": 0.4527, + "step": 11269 + }, + { + "epoch": 0.6862954054136345, + "grad_norm": 0.9674613452003347, + "learning_rate": 4.842786001034669e-06, + "loss": 0.4287, + "step": 11270 + }, + { + "epoch": 0.6863563011905124, + "grad_norm": 1.0414185152389743, + "learning_rate": 4.842758149322886e-06, + "loss": 0.4083, + "step": 11271 + }, + { + "epoch": 0.6864171969673903, + "grad_norm": 0.9623247434690707, + "learning_rate": 4.842730295224348e-06, + "loss": 0.3521, + "step": 11272 + }, + { + "epoch": 0.6864780927442682, + "grad_norm": 1.0436833956221936, + "learning_rate": 4.842702438739086e-06, + "loss": 0.4729, + "step": 11273 + }, + { + "epoch": 0.686538988521146, + "grad_norm": 0.9281000714625977, + "learning_rate": 4.842674579867128e-06, + "loss": 0.4345, + "step": 11274 + }, + { + "epoch": 0.6865998842980239, + "grad_norm": 1.0838043599362799, + "learning_rate": 4.842646718608502e-06, + "loss": 0.3557, + "step": 11275 + }, + { + "epoch": 0.6866607800749018, + "grad_norm": 1.0990434870905257, + "learning_rate": 4.842618854963236e-06, + "loss": 0.3664, + "step": 11276 + }, + { + "epoch": 0.6867216758517797, + "grad_norm": 1.0608007547966467, + "learning_rate": 4.8425909889313576e-06, + "loss": 0.4984, + "step": 11277 + }, + { + "epoch": 0.6867825716286575, + "grad_norm": 0.9535063844589946, + "learning_rate": 4.842563120512897e-06, + "loss": 0.3574, + "step": 11278 + }, + { + "epoch": 0.6868434674055355, + "grad_norm": 0.961817483588357, + "learning_rate": 4.842535249707882e-06, + "loss": 0.4371, + "step": 11279 + }, + { + "epoch": 0.6869043631824133, + "grad_norm": 1.032068037654684, + "learning_rate": 4.8425073765163406e-06, + "loss": 0.4894, + "step": 11280 + }, + { + "epoch": 0.6869652589592912, + "grad_norm": 0.9407896627068993, + "learning_rate": 4.842479500938301e-06, + "loss": 0.4372, + "step": 11281 + }, + { + "epoch": 0.687026154736169, + "grad_norm": 1.0344977859933948, + "learning_rate": 4.842451622973793e-06, + "loss": 0.4004, + "step": 11282 + }, + { + "epoch": 0.687087050513047, + "grad_norm": 1.0558638037903785, + "learning_rate": 4.842423742622842e-06, + "loss": 0.3768, + "step": 11283 + }, + { + "epoch": 0.6871479462899248, + "grad_norm": 1.1445253995792053, + "learning_rate": 4.8423958598854805e-06, + "loss": 0.4693, + "step": 11284 + }, + { + "epoch": 0.6872088420668027, + "grad_norm": 0.9985815456301655, + "learning_rate": 4.842367974761734e-06, + "loss": 0.4245, + "step": 11285 + }, + { + "epoch": 0.6872697378436805, + "grad_norm": 0.9367015980705219, + "learning_rate": 4.842340087251631e-06, + "loss": 0.3916, + "step": 11286 + }, + { + "epoch": 0.6873306336205585, + "grad_norm": 0.9322574606641667, + "learning_rate": 4.842312197355202e-06, + "loss": 0.4306, + "step": 11287 + }, + { + "epoch": 0.6873915293974363, + "grad_norm": 1.002912258507457, + "learning_rate": 4.842284305072473e-06, + "loss": 0.5383, + "step": 11288 + }, + { + "epoch": 0.6874524251743142, + "grad_norm": 0.9516063133959023, + "learning_rate": 4.842256410403474e-06, + "loss": 0.5401, + "step": 11289 + }, + { + "epoch": 0.687513320951192, + "grad_norm": 0.9891542747808003, + "learning_rate": 4.842228513348233e-06, + "loss": 0.3561, + "step": 11290 + }, + { + "epoch": 0.68757421672807, + "grad_norm": 1.03352242427367, + "learning_rate": 4.842200613906777e-06, + "loss": 0.48, + "step": 11291 + }, + { + "epoch": 0.6876351125049478, + "grad_norm": 1.0062412974062995, + "learning_rate": 4.8421727120791375e-06, + "loss": 0.4675, + "step": 11292 + }, + { + "epoch": 0.6876960082818256, + "grad_norm": 0.9853099483359881, + "learning_rate": 4.84214480786534e-06, + "loss": 0.3527, + "step": 11293 + }, + { + "epoch": 0.6877569040587035, + "grad_norm": 0.9789435676100101, + "learning_rate": 4.842116901265414e-06, + "loss": 0.4387, + "step": 11294 + }, + { + "epoch": 0.6878177998355814, + "grad_norm": 0.9716340815773928, + "learning_rate": 4.842088992279388e-06, + "loss": 0.445, + "step": 11295 + }, + { + "epoch": 0.6878786956124593, + "grad_norm": 1.0102016312954316, + "learning_rate": 4.842061080907291e-06, + "loss": 0.4366, + "step": 11296 + }, + { + "epoch": 0.6879395913893371, + "grad_norm": 1.0225156743759556, + "learning_rate": 4.842033167149151e-06, + "loss": 0.3986, + "step": 11297 + }, + { + "epoch": 0.688000487166215, + "grad_norm": 1.0465328061901993, + "learning_rate": 4.842005251004996e-06, + "loss": 0.4601, + "step": 11298 + }, + { + "epoch": 0.6880613829430929, + "grad_norm": 0.9973217046860448, + "learning_rate": 4.841977332474854e-06, + "loss": 0.4452, + "step": 11299 + }, + { + "epoch": 0.6881222787199708, + "grad_norm": 1.0187092042127226, + "learning_rate": 4.841949411558755e-06, + "loss": 0.4132, + "step": 11300 + }, + { + "epoch": 0.6881831744968486, + "grad_norm": 1.1043841660731006, + "learning_rate": 4.8419214882567266e-06, + "loss": 0.3771, + "step": 11301 + }, + { + "epoch": 0.6882440702737265, + "grad_norm": 1.014670893093701, + "learning_rate": 4.841893562568797e-06, + "loss": 0.3588, + "step": 11302 + }, + { + "epoch": 0.6883049660506044, + "grad_norm": 0.9498147776207586, + "learning_rate": 4.841865634494996e-06, + "loss": 0.4903, + "step": 11303 + }, + { + "epoch": 0.6883658618274823, + "grad_norm": 1.074984951287645, + "learning_rate": 4.84183770403535e-06, + "loss": 0.3969, + "step": 11304 + }, + { + "epoch": 0.6884267576043601, + "grad_norm": 0.9850134054172418, + "learning_rate": 4.841809771189888e-06, + "loss": 0.4144, + "step": 11305 + }, + { + "epoch": 0.688487653381238, + "grad_norm": 0.9896562835408252, + "learning_rate": 4.841781835958639e-06, + "loss": 0.4377, + "step": 11306 + }, + { + "epoch": 0.6885485491581159, + "grad_norm": 1.034392716992224, + "learning_rate": 4.841753898341632e-06, + "loss": 0.3945, + "step": 11307 + }, + { + "epoch": 0.6886094449349938, + "grad_norm": 0.9530158878886967, + "learning_rate": 4.841725958338895e-06, + "loss": 0.4325, + "step": 11308 + }, + { + "epoch": 0.6886703407118716, + "grad_norm": 1.0304004355358947, + "learning_rate": 4.8416980159504555e-06, + "loss": 0.427, + "step": 11309 + }, + { + "epoch": 0.6887312364887495, + "grad_norm": 0.958545451750549, + "learning_rate": 4.841670071176343e-06, + "loss": 0.4881, + "step": 11310 + }, + { + "epoch": 0.6887921322656274, + "grad_norm": 1.1087024016311717, + "learning_rate": 4.841642124016586e-06, + "loss": 0.3937, + "step": 11311 + }, + { + "epoch": 0.6888530280425053, + "grad_norm": 1.0652572389717716, + "learning_rate": 4.841614174471212e-06, + "loss": 0.3963, + "step": 11312 + }, + { + "epoch": 0.6889139238193831, + "grad_norm": 1.0934253319213703, + "learning_rate": 4.841586222540251e-06, + "loss": 0.4927, + "step": 11313 + }, + { + "epoch": 0.688974819596261, + "grad_norm": 0.944849597380424, + "learning_rate": 4.84155826822373e-06, + "loss": 0.4627, + "step": 11314 + }, + { + "epoch": 0.6890357153731389, + "grad_norm": 1.108322134523907, + "learning_rate": 4.841530311521679e-06, + "loss": 0.3836, + "step": 11315 + }, + { + "epoch": 0.6890966111500167, + "grad_norm": 0.9577221999891868, + "learning_rate": 4.841502352434125e-06, + "loss": 0.4969, + "step": 11316 + }, + { + "epoch": 0.6891575069268946, + "grad_norm": 1.0578898570265776, + "learning_rate": 4.841474390961097e-06, + "loss": 0.4113, + "step": 11317 + }, + { + "epoch": 0.6892184027037725, + "grad_norm": 0.997581703517481, + "learning_rate": 4.841446427102624e-06, + "loss": 0.4243, + "step": 11318 + }, + { + "epoch": 0.6892792984806504, + "grad_norm": 0.9859796143611578, + "learning_rate": 4.841418460858733e-06, + "loss": 0.4558, + "step": 11319 + }, + { + "epoch": 0.6893401942575282, + "grad_norm": 0.9372024043877244, + "learning_rate": 4.841390492229454e-06, + "loss": 0.472, + "step": 11320 + }, + { + "epoch": 0.6894010900344061, + "grad_norm": 0.9678088542103643, + "learning_rate": 4.841362521214816e-06, + "loss": 0.3923, + "step": 11321 + }, + { + "epoch": 0.689461985811284, + "grad_norm": 1.051735309133603, + "learning_rate": 4.8413345478148455e-06, + "loss": 0.4234, + "step": 11322 + }, + { + "epoch": 0.6895228815881619, + "grad_norm": 1.0110765105944437, + "learning_rate": 4.841306572029573e-06, + "loss": 0.4555, + "step": 11323 + }, + { + "epoch": 0.6895837773650397, + "grad_norm": 0.9038997783119908, + "learning_rate": 4.841278593859025e-06, + "loss": 0.4973, + "step": 11324 + }, + { + "epoch": 0.6896446731419176, + "grad_norm": 0.9427757399110027, + "learning_rate": 4.841250613303232e-06, + "loss": 0.5462, + "step": 11325 + }, + { + "epoch": 0.6897055689187955, + "grad_norm": 1.0054352749681015, + "learning_rate": 4.8412226303622206e-06, + "loss": 0.437, + "step": 11326 + }, + { + "epoch": 0.6897664646956734, + "grad_norm": 0.9297029494696228, + "learning_rate": 4.84119464503602e-06, + "loss": 0.4611, + "step": 11327 + }, + { + "epoch": 0.6898273604725512, + "grad_norm": 0.9881317155057507, + "learning_rate": 4.841166657324661e-06, + "loss": 0.4231, + "step": 11328 + }, + { + "epoch": 0.6898882562494291, + "grad_norm": 1.0485649675237259, + "learning_rate": 4.841138667228168e-06, + "loss": 0.3834, + "step": 11329 + }, + { + "epoch": 0.689949152026307, + "grad_norm": 1.0458900139245302, + "learning_rate": 4.841110674746573e-06, + "loss": 0.3816, + "step": 11330 + }, + { + "epoch": 0.6900100478031849, + "grad_norm": 0.9877477748252222, + "learning_rate": 4.841082679879902e-06, + "loss": 0.4379, + "step": 11331 + }, + { + "epoch": 0.6900709435800627, + "grad_norm": 0.9852565744597732, + "learning_rate": 4.841054682628185e-06, + "loss": 0.3685, + "step": 11332 + }, + { + "epoch": 0.6901318393569406, + "grad_norm": 1.0842550198423666, + "learning_rate": 4.84102668299145e-06, + "loss": 0.3523, + "step": 11333 + }, + { + "epoch": 0.6901927351338185, + "grad_norm": 1.0454659218731026, + "learning_rate": 4.840998680969726e-06, + "loss": 0.3909, + "step": 11334 + }, + { + "epoch": 0.6902536309106964, + "grad_norm": 0.9680514844998669, + "learning_rate": 4.840970676563041e-06, + "loss": 0.4076, + "step": 11335 + }, + { + "epoch": 0.6903145266875742, + "grad_norm": 1.0587870173296337, + "learning_rate": 4.8409426697714244e-06, + "loss": 0.4163, + "step": 11336 + }, + { + "epoch": 0.690375422464452, + "grad_norm": 0.929973069124356, + "learning_rate": 4.840914660594903e-06, + "loss": 0.4329, + "step": 11337 + }, + { + "epoch": 0.69043631824133, + "grad_norm": 1.0241341616539632, + "learning_rate": 4.840886649033508e-06, + "loss": 0.4166, + "step": 11338 + }, + { + "epoch": 0.6904972140182078, + "grad_norm": 0.9734607294162269, + "learning_rate": 4.840858635087265e-06, + "loss": 0.4331, + "step": 11339 + }, + { + "epoch": 0.6905581097950857, + "grad_norm": 0.9320558075766164, + "learning_rate": 4.840830618756204e-06, + "loss": 0.4473, + "step": 11340 + }, + { + "epoch": 0.6906190055719635, + "grad_norm": 1.0313175763271245, + "learning_rate": 4.840802600040354e-06, + "loss": 0.3903, + "step": 11341 + }, + { + "epoch": 0.6906799013488415, + "grad_norm": 0.9883327917516034, + "learning_rate": 4.840774578939743e-06, + "loss": 0.4057, + "step": 11342 + }, + { + "epoch": 0.6907407971257193, + "grad_norm": 1.0052011614293905, + "learning_rate": 4.840746555454399e-06, + "loss": 0.4072, + "step": 11343 + }, + { + "epoch": 0.6908016929025972, + "grad_norm": 1.0416028269088753, + "learning_rate": 4.840718529584351e-06, + "loss": 0.3604, + "step": 11344 + }, + { + "epoch": 0.690862588679475, + "grad_norm": 1.106720988727926, + "learning_rate": 4.840690501329628e-06, + "loss": 0.3985, + "step": 11345 + }, + { + "epoch": 0.690923484456353, + "grad_norm": 1.0379346740245485, + "learning_rate": 4.840662470690259e-06, + "loss": 0.4226, + "step": 11346 + }, + { + "epoch": 0.6909843802332308, + "grad_norm": 0.9786893103981033, + "learning_rate": 4.8406344376662695e-06, + "loss": 0.4222, + "step": 11347 + }, + { + "epoch": 0.6910452760101087, + "grad_norm": 1.0299936076983354, + "learning_rate": 4.840606402257691e-06, + "loss": 0.4206, + "step": 11348 + }, + { + "epoch": 0.6911061717869865, + "grad_norm": 1.049512196551491, + "learning_rate": 4.840578364464553e-06, + "loss": 0.3879, + "step": 11349 + }, + { + "epoch": 0.6911670675638645, + "grad_norm": 1.079732591225169, + "learning_rate": 4.840550324286881e-06, + "loss": 0.4382, + "step": 11350 + }, + { + "epoch": 0.6912279633407423, + "grad_norm": 0.9844343094206075, + "learning_rate": 4.840522281724706e-06, + "loss": 0.4319, + "step": 11351 + }, + { + "epoch": 0.6912888591176202, + "grad_norm": 1.056501198263205, + "learning_rate": 4.840494236778054e-06, + "loss": 0.365, + "step": 11352 + }, + { + "epoch": 0.691349754894498, + "grad_norm": 0.9926846876643634, + "learning_rate": 4.840466189446956e-06, + "loss": 0.4539, + "step": 11353 + }, + { + "epoch": 0.691410650671376, + "grad_norm": 1.0195358679089632, + "learning_rate": 4.840438139731441e-06, + "loss": 0.396, + "step": 11354 + }, + { + "epoch": 0.6914715464482538, + "grad_norm": 0.9682313150456536, + "learning_rate": 4.840410087631534e-06, + "loss": 0.4297, + "step": 11355 + }, + { + "epoch": 0.6915324422251317, + "grad_norm": 1.0611880901428044, + "learning_rate": 4.8403820331472675e-06, + "loss": 0.4021, + "step": 11356 + }, + { + "epoch": 0.6915933380020095, + "grad_norm": 0.9389218989115373, + "learning_rate": 4.840353976278668e-06, + "loss": 0.4233, + "step": 11357 + }, + { + "epoch": 0.6916542337788875, + "grad_norm": 0.9351485353529757, + "learning_rate": 4.840325917025764e-06, + "loss": 0.4142, + "step": 11358 + }, + { + "epoch": 0.6917151295557653, + "grad_norm": 0.9839699459071233, + "learning_rate": 4.840297855388585e-06, + "loss": 0.4004, + "step": 11359 + }, + { + "epoch": 0.6917760253326432, + "grad_norm": 0.9937192073033502, + "learning_rate": 4.840269791367159e-06, + "loss": 0.4434, + "step": 11360 + }, + { + "epoch": 0.6918369211095211, + "grad_norm": 0.9701236730925777, + "learning_rate": 4.840241724961515e-06, + "loss": 0.4389, + "step": 11361 + }, + { + "epoch": 0.691897816886399, + "grad_norm": 1.0725691938099917, + "learning_rate": 4.840213656171682e-06, + "loss": 0.378, + "step": 11362 + }, + { + "epoch": 0.6919587126632768, + "grad_norm": 1.0769902311618988, + "learning_rate": 4.840185584997687e-06, + "loss": 0.4217, + "step": 11363 + }, + { + "epoch": 0.6920196084401546, + "grad_norm": 1.032824693669767, + "learning_rate": 4.84015751143956e-06, + "loss": 0.4449, + "step": 11364 + }, + { + "epoch": 0.6920805042170326, + "grad_norm": 0.9577944066307317, + "learning_rate": 4.840129435497329e-06, + "loss": 0.4268, + "step": 11365 + }, + { + "epoch": 0.6921413999939104, + "grad_norm": 1.0488195297463971, + "learning_rate": 4.840101357171023e-06, + "loss": 0.3799, + "step": 11366 + }, + { + "epoch": 0.6922022957707883, + "grad_norm": 0.8708481146449458, + "learning_rate": 4.84007327646067e-06, + "loss": 0.4709, + "step": 11367 + }, + { + "epoch": 0.6922631915476661, + "grad_norm": 0.9954767377868325, + "learning_rate": 4.8400451933662996e-06, + "loss": 0.4379, + "step": 11368 + }, + { + "epoch": 0.6923240873245441, + "grad_norm": 0.9960574660027427, + "learning_rate": 4.84001710788794e-06, + "loss": 0.4828, + "step": 11369 + }, + { + "epoch": 0.6923849831014219, + "grad_norm": 1.0541861486595525, + "learning_rate": 4.839989020025618e-06, + "loss": 0.4155, + "step": 11370 + }, + { + "epoch": 0.6924458788782998, + "grad_norm": 1.0601150028556532, + "learning_rate": 4.839960929779366e-06, + "loss": 0.3672, + "step": 11371 + }, + { + "epoch": 0.6925067746551776, + "grad_norm": 0.9854474456468896, + "learning_rate": 4.8399328371492095e-06, + "loss": 0.4731, + "step": 11372 + }, + { + "epoch": 0.6925676704320556, + "grad_norm": 1.0151418738122249, + "learning_rate": 4.839904742135178e-06, + "loss": 0.4147, + "step": 11373 + }, + { + "epoch": 0.6926285662089334, + "grad_norm": 1.0136057330833823, + "learning_rate": 4.839876644737299e-06, + "loss": 0.4085, + "step": 11374 + }, + { + "epoch": 0.6926894619858113, + "grad_norm": 0.9742038470179067, + "learning_rate": 4.8398485449556045e-06, + "loss": 0.4537, + "step": 11375 + }, + { + "epoch": 0.6927503577626891, + "grad_norm": 1.0330853810288068, + "learning_rate": 4.83982044279012e-06, + "loss": 0.3733, + "step": 11376 + }, + { + "epoch": 0.6928112535395671, + "grad_norm": 1.042839556992057, + "learning_rate": 4.839792338240875e-06, + "loss": 0.495, + "step": 11377 + }, + { + "epoch": 0.6928721493164449, + "grad_norm": 1.0062166629696345, + "learning_rate": 4.839764231307898e-06, + "loss": 0.4354, + "step": 11378 + }, + { + "epoch": 0.6929330450933228, + "grad_norm": 0.9987771812513735, + "learning_rate": 4.839736121991218e-06, + "loss": 0.4409, + "step": 11379 + }, + { + "epoch": 0.6929939408702006, + "grad_norm": 1.053139526236559, + "learning_rate": 4.839708010290863e-06, + "loss": 0.4589, + "step": 11380 + }, + { + "epoch": 0.6930548366470786, + "grad_norm": 0.9360736209203455, + "learning_rate": 4.839679896206864e-06, + "loss": 0.399, + "step": 11381 + }, + { + "epoch": 0.6931157324239564, + "grad_norm": 0.973805821620556, + "learning_rate": 4.8396517797392456e-06, + "loss": 0.417, + "step": 11382 + }, + { + "epoch": 0.6931766282008343, + "grad_norm": 1.0486834633240913, + "learning_rate": 4.8396236608880396e-06, + "loss": 0.4361, + "step": 11383 + }, + { + "epoch": 0.6932375239777121, + "grad_norm": 1.0094092894466493, + "learning_rate": 4.8395955396532735e-06, + "loss": 0.3943, + "step": 11384 + }, + { + "epoch": 0.6932984197545901, + "grad_norm": 0.9884880382678684, + "learning_rate": 4.839567416034976e-06, + "loss": 0.4197, + "step": 11385 + }, + { + "epoch": 0.6933593155314679, + "grad_norm": 1.0041386993627106, + "learning_rate": 4.839539290033176e-06, + "loss": 0.4413, + "step": 11386 + }, + { + "epoch": 0.6934202113083457, + "grad_norm": 1.0892161590429361, + "learning_rate": 4.8395111616479016e-06, + "loss": 0.4624, + "step": 11387 + }, + { + "epoch": 0.6934811070852236, + "grad_norm": 0.9480407319216112, + "learning_rate": 4.8394830308791826e-06, + "loss": 0.422, + "step": 11388 + }, + { + "epoch": 0.6935420028621015, + "grad_norm": 0.9623480822875962, + "learning_rate": 4.8394548977270475e-06, + "loss": 0.4866, + "step": 11389 + }, + { + "epoch": 0.6936028986389794, + "grad_norm": 1.0481417975220757, + "learning_rate": 4.839426762191523e-06, + "loss": 0.4352, + "step": 11390 + }, + { + "epoch": 0.6936637944158572, + "grad_norm": 1.1122118745774152, + "learning_rate": 4.8393986242726394e-06, + "loss": 0.4529, + "step": 11391 + }, + { + "epoch": 0.6937246901927351, + "grad_norm": 0.9256962684609247, + "learning_rate": 4.839370483970426e-06, + "loss": 0.4666, + "step": 11392 + }, + { + "epoch": 0.693785585969613, + "grad_norm": 0.9855019993785048, + "learning_rate": 4.8393423412849094e-06, + "loss": 0.4293, + "step": 11393 + }, + { + "epoch": 0.6938464817464909, + "grad_norm": 1.019955343677854, + "learning_rate": 4.83931419621612e-06, + "loss": 0.431, + "step": 11394 + }, + { + "epoch": 0.6939073775233687, + "grad_norm": 1.0650052258900848, + "learning_rate": 4.839286048764086e-06, + "loss": 0.4784, + "step": 11395 + }, + { + "epoch": 0.6939682733002466, + "grad_norm": 0.9740329098786928, + "learning_rate": 4.839257898928836e-06, + "loss": 0.486, + "step": 11396 + }, + { + "epoch": 0.6940291690771245, + "grad_norm": 0.9443226227925521, + "learning_rate": 4.839229746710399e-06, + "loss": 0.4681, + "step": 11397 + }, + { + "epoch": 0.6940900648540024, + "grad_norm": 1.0183997903779232, + "learning_rate": 4.839201592108802e-06, + "loss": 0.4047, + "step": 11398 + }, + { + "epoch": 0.6941509606308802, + "grad_norm": 0.9487110943027699, + "learning_rate": 4.839173435124076e-06, + "loss": 0.5005, + "step": 11399 + }, + { + "epoch": 0.6942118564077582, + "grad_norm": 0.964650694393175, + "learning_rate": 4.83914527575625e-06, + "loss": 0.3708, + "step": 11400 + }, + { + "epoch": 0.694272752184636, + "grad_norm": 1.0216570147411703, + "learning_rate": 4.83911711400535e-06, + "loss": 0.4746, + "step": 11401 + }, + { + "epoch": 0.6943336479615139, + "grad_norm": 0.9723556698957829, + "learning_rate": 4.839088949871406e-06, + "loss": 0.4836, + "step": 11402 + }, + { + "epoch": 0.6943945437383917, + "grad_norm": 1.0045298806316483, + "learning_rate": 4.8390607833544466e-06, + "loss": 0.467, + "step": 11403 + }, + { + "epoch": 0.6944554395152697, + "grad_norm": 0.9519166157749933, + "learning_rate": 4.839032614454502e-06, + "loss": 0.4301, + "step": 11404 + }, + { + "epoch": 0.6945163352921475, + "grad_norm": 1.0323794791812253, + "learning_rate": 4.839004443171599e-06, + "loss": 0.3966, + "step": 11405 + }, + { + "epoch": 0.6945772310690254, + "grad_norm": 0.9288905086356615, + "learning_rate": 4.838976269505766e-06, + "loss": 0.3881, + "step": 11406 + }, + { + "epoch": 0.6946381268459032, + "grad_norm": 1.0885762398348904, + "learning_rate": 4.838948093457034e-06, + "loss": 0.4232, + "step": 11407 + }, + { + "epoch": 0.6946990226227812, + "grad_norm": 1.1016038548535343, + "learning_rate": 4.8389199150254295e-06, + "loss": 0.3953, + "step": 11408 + }, + { + "epoch": 0.694759918399659, + "grad_norm": 0.9095310915985301, + "learning_rate": 4.838891734210982e-06, + "loss": 0.4261, + "step": 11409 + }, + { + "epoch": 0.6948208141765368, + "grad_norm": 1.022869549341025, + "learning_rate": 4.83886355101372e-06, + "loss": 0.3925, + "step": 11410 + }, + { + "epoch": 0.6948817099534147, + "grad_norm": 1.0243669806132112, + "learning_rate": 4.838835365433674e-06, + "loss": 0.422, + "step": 11411 + }, + { + "epoch": 0.6949426057302927, + "grad_norm": 1.069145544635632, + "learning_rate": 4.838807177470869e-06, + "loss": 0.4408, + "step": 11412 + }, + { + "epoch": 0.6950035015071705, + "grad_norm": 0.9920981961145152, + "learning_rate": 4.8387789871253374e-06, + "loss": 0.3985, + "step": 11413 + }, + { + "epoch": 0.6950643972840483, + "grad_norm": 0.9183843751581654, + "learning_rate": 4.838750794397105e-06, + "loss": 0.4216, + "step": 11414 + }, + { + "epoch": 0.6951252930609262, + "grad_norm": 1.0431412650104979, + "learning_rate": 4.838722599286203e-06, + "loss": 0.4548, + "step": 11415 + }, + { + "epoch": 0.6951861888378041, + "grad_norm": 1.0040955173283366, + "learning_rate": 4.838694401792659e-06, + "loss": 0.4311, + "step": 11416 + }, + { + "epoch": 0.695247084614682, + "grad_norm": 1.0344028234313296, + "learning_rate": 4.838666201916502e-06, + "loss": 0.3874, + "step": 11417 + }, + { + "epoch": 0.6953079803915598, + "grad_norm": 1.0009040636839321, + "learning_rate": 4.83863799965776e-06, + "loss": 0.4257, + "step": 11418 + }, + { + "epoch": 0.6953688761684377, + "grad_norm": 0.9438579542601448, + "learning_rate": 4.838609795016463e-06, + "loss": 0.4408, + "step": 11419 + }, + { + "epoch": 0.6954297719453156, + "grad_norm": 1.002323910718985, + "learning_rate": 4.838581587992637e-06, + "loss": 0.4531, + "step": 11420 + }, + { + "epoch": 0.6954906677221935, + "grad_norm": 1.0095397879634267, + "learning_rate": 4.838553378586315e-06, + "loss": 0.4522, + "step": 11421 + }, + { + "epoch": 0.6955515634990713, + "grad_norm": 0.9779097850985147, + "learning_rate": 4.838525166797522e-06, + "loss": 0.4354, + "step": 11422 + }, + { + "epoch": 0.6956124592759492, + "grad_norm": 1.0674557111128677, + "learning_rate": 4.838496952626288e-06, + "loss": 0.4232, + "step": 11423 + }, + { + "epoch": 0.6956733550528271, + "grad_norm": 0.9527260559711861, + "learning_rate": 4.838468736072643e-06, + "loss": 0.4363, + "step": 11424 + }, + { + "epoch": 0.695734250829705, + "grad_norm": 1.026364259996868, + "learning_rate": 4.838440517136614e-06, + "loss": 0.3805, + "step": 11425 + }, + { + "epoch": 0.6957951466065828, + "grad_norm": 0.9020389270789557, + "learning_rate": 4.838412295818231e-06, + "loss": 0.4651, + "step": 11426 + }, + { + "epoch": 0.6958560423834607, + "grad_norm": 1.0301919625766391, + "learning_rate": 4.8383840721175215e-06, + "loss": 0.3472, + "step": 11427 + }, + { + "epoch": 0.6959169381603386, + "grad_norm": 0.9389428061191223, + "learning_rate": 4.838355846034515e-06, + "loss": 0.4163, + "step": 11428 + }, + { + "epoch": 0.6959778339372165, + "grad_norm": 0.9520920091233587, + "learning_rate": 4.8383276175692405e-06, + "loss": 0.4045, + "step": 11429 + }, + { + "epoch": 0.6960387297140943, + "grad_norm": 0.9904019878005922, + "learning_rate": 4.838299386721726e-06, + "loss": 0.4161, + "step": 11430 + }, + { + "epoch": 0.6960996254909722, + "grad_norm": 0.9371068958497569, + "learning_rate": 4.838271153492001e-06, + "loss": 0.4002, + "step": 11431 + }, + { + "epoch": 0.6961605212678501, + "grad_norm": 1.0762509862371568, + "learning_rate": 4.838242917880095e-06, + "loss": 0.385, + "step": 11432 + }, + { + "epoch": 0.696221417044728, + "grad_norm": 1.0244724872955884, + "learning_rate": 4.838214679886034e-06, + "loss": 0.4434, + "step": 11433 + }, + { + "epoch": 0.6962823128216058, + "grad_norm": 0.9642245897255973, + "learning_rate": 4.83818643950985e-06, + "loss": 0.4291, + "step": 11434 + }, + { + "epoch": 0.6963432085984836, + "grad_norm": 1.0658638762244768, + "learning_rate": 4.838158196751569e-06, + "loss": 0.4698, + "step": 11435 + }, + { + "epoch": 0.6964041043753616, + "grad_norm": 0.9586917812467968, + "learning_rate": 4.838129951611221e-06, + "loss": 0.3948, + "step": 11436 + }, + { + "epoch": 0.6964650001522394, + "grad_norm": 0.9345607906692025, + "learning_rate": 4.838101704088836e-06, + "loss": 0.4648, + "step": 11437 + }, + { + "epoch": 0.6965258959291173, + "grad_norm": 1.0270951041668255, + "learning_rate": 4.8380734541844405e-06, + "loss": 0.4072, + "step": 11438 + }, + { + "epoch": 0.6965867917059951, + "grad_norm": 0.9052877654137222, + "learning_rate": 4.838045201898065e-06, + "loss": 0.3952, + "step": 11439 + }, + { + "epoch": 0.6966476874828731, + "grad_norm": 1.0235585310704285, + "learning_rate": 4.838016947229737e-06, + "loss": 0.472, + "step": 11440 + }, + { + "epoch": 0.6967085832597509, + "grad_norm": 0.9741091179006983, + "learning_rate": 4.837988690179487e-06, + "loss": 0.5209, + "step": 11441 + }, + { + "epoch": 0.6967694790366288, + "grad_norm": 1.0100471246191258, + "learning_rate": 4.837960430747341e-06, + "loss": 0.3455, + "step": 11442 + }, + { + "epoch": 0.6968303748135067, + "grad_norm": 0.9270123776022846, + "learning_rate": 4.837932168933332e-06, + "loss": 0.4229, + "step": 11443 + }, + { + "epoch": 0.6968912705903846, + "grad_norm": 1.038714862979794, + "learning_rate": 4.837903904737484e-06, + "loss": 0.4834, + "step": 11444 + }, + { + "epoch": 0.6969521663672624, + "grad_norm": 1.072848365788316, + "learning_rate": 4.83787563815983e-06, + "loss": 0.4398, + "step": 11445 + }, + { + "epoch": 0.6970130621441403, + "grad_norm": 1.03116653560015, + "learning_rate": 4.837847369200396e-06, + "loss": 0.4462, + "step": 11446 + }, + { + "epoch": 0.6970739579210182, + "grad_norm": 1.09527902901931, + "learning_rate": 4.837819097859211e-06, + "loss": 0.3823, + "step": 11447 + }, + { + "epoch": 0.6971348536978961, + "grad_norm": 0.9875285156692952, + "learning_rate": 4.837790824136306e-06, + "loss": 0.3719, + "step": 11448 + }, + { + "epoch": 0.6971957494747739, + "grad_norm": 1.0730101591209258, + "learning_rate": 4.837762548031707e-06, + "loss": 0.405, + "step": 11449 + }, + { + "epoch": 0.6972566452516518, + "grad_norm": 1.1241888780613858, + "learning_rate": 4.837734269545444e-06, + "loss": 0.3777, + "step": 11450 + }, + { + "epoch": 0.6973175410285297, + "grad_norm": 0.9885068155640494, + "learning_rate": 4.837705988677547e-06, + "loss": 0.4357, + "step": 11451 + }, + { + "epoch": 0.6973784368054076, + "grad_norm": 0.9981771019634312, + "learning_rate": 4.837677705428043e-06, + "loss": 0.4764, + "step": 11452 + }, + { + "epoch": 0.6974393325822854, + "grad_norm": 0.9353454430154761, + "learning_rate": 4.837649419796962e-06, + "loss": 0.4624, + "step": 11453 + }, + { + "epoch": 0.6975002283591633, + "grad_norm": 0.9794756411903768, + "learning_rate": 4.837621131784333e-06, + "loss": 0.4853, + "step": 11454 + }, + { + "epoch": 0.6975611241360412, + "grad_norm": 1.0139403418113513, + "learning_rate": 4.837592841390183e-06, + "loss": 0.4118, + "step": 11455 + }, + { + "epoch": 0.6976220199129191, + "grad_norm": 0.9578798244457989, + "learning_rate": 4.8375645486145425e-06, + "loss": 0.4482, + "step": 11456 + }, + { + "epoch": 0.6976829156897969, + "grad_norm": 1.0211739317921076, + "learning_rate": 4.837536253457439e-06, + "loss": 0.4186, + "step": 11457 + }, + { + "epoch": 0.6977438114666747, + "grad_norm": 1.0022263550728083, + "learning_rate": 4.837507955918903e-06, + "loss": 0.4216, + "step": 11458 + }, + { + "epoch": 0.6978047072435527, + "grad_norm": 1.0171329494740489, + "learning_rate": 4.837479655998963e-06, + "loss": 0.3982, + "step": 11459 + }, + { + "epoch": 0.6978656030204305, + "grad_norm": 1.0326314104121936, + "learning_rate": 4.837451353697647e-06, + "loss": 0.4288, + "step": 11460 + }, + { + "epoch": 0.6979264987973084, + "grad_norm": 1.0136101940052682, + "learning_rate": 4.837423049014983e-06, + "loss": 0.4315, + "step": 11461 + }, + { + "epoch": 0.6979873945741862, + "grad_norm": 0.9525145715955237, + "learning_rate": 4.837394741951003e-06, + "loss": 0.5251, + "step": 11462 + }, + { + "epoch": 0.6980482903510642, + "grad_norm": 1.0099499972431945, + "learning_rate": 4.837366432505733e-06, + "loss": 0.3773, + "step": 11463 + }, + { + "epoch": 0.698109186127942, + "grad_norm": 0.9650364481010996, + "learning_rate": 4.8373381206792015e-06, + "loss": 0.4197, + "step": 11464 + }, + { + "epoch": 0.6981700819048199, + "grad_norm": 1.0206920021943873, + "learning_rate": 4.8373098064714395e-06, + "loss": 0.3744, + "step": 11465 + }, + { + "epoch": 0.6982309776816977, + "grad_norm": 1.0612863353522581, + "learning_rate": 4.837281489882475e-06, + "loss": 0.3719, + "step": 11466 + }, + { + "epoch": 0.6982918734585757, + "grad_norm": 1.0220486120755252, + "learning_rate": 4.837253170912336e-06, + "loss": 0.4556, + "step": 11467 + }, + { + "epoch": 0.6983527692354535, + "grad_norm": 0.9425623466822135, + "learning_rate": 4.837224849561053e-06, + "loss": 0.4419, + "step": 11468 + }, + { + "epoch": 0.6984136650123314, + "grad_norm": 0.9348899325236141, + "learning_rate": 4.837196525828653e-06, + "loss": 0.384, + "step": 11469 + }, + { + "epoch": 0.6984745607892092, + "grad_norm": 0.937626671244369, + "learning_rate": 4.837168199715166e-06, + "loss": 0.4493, + "step": 11470 + }, + { + "epoch": 0.6985354565660872, + "grad_norm": 0.9593967207558884, + "learning_rate": 4.837139871220622e-06, + "loss": 0.4472, + "step": 11471 + }, + { + "epoch": 0.698596352342965, + "grad_norm": 0.98934341834821, + "learning_rate": 4.837111540345046e-06, + "loss": 0.3762, + "step": 11472 + }, + { + "epoch": 0.6986572481198429, + "grad_norm": 1.0023931652133746, + "learning_rate": 4.837083207088471e-06, + "loss": 0.4241, + "step": 11473 + }, + { + "epoch": 0.6987181438967207, + "grad_norm": 0.9715594047667024, + "learning_rate": 4.837054871450925e-06, + "loss": 0.4472, + "step": 11474 + }, + { + "epoch": 0.6987790396735987, + "grad_norm": 1.0313791862244066, + "learning_rate": 4.837026533432434e-06, + "loss": 0.4175, + "step": 11475 + }, + { + "epoch": 0.6988399354504765, + "grad_norm": 1.0338079684732597, + "learning_rate": 4.83699819303303e-06, + "loss": 0.3919, + "step": 11476 + }, + { + "epoch": 0.6989008312273544, + "grad_norm": 1.0138168212247602, + "learning_rate": 4.83696985025274e-06, + "loss": 0.4337, + "step": 11477 + }, + { + "epoch": 0.6989617270042322, + "grad_norm": 1.030962466763444, + "learning_rate": 4.836941505091596e-06, + "loss": 0.3497, + "step": 11478 + }, + { + "epoch": 0.6990226227811102, + "grad_norm": 1.0641747418770564, + "learning_rate": 4.836913157549622e-06, + "loss": 0.3638, + "step": 11479 + }, + { + "epoch": 0.699083518557988, + "grad_norm": 0.9095917260504603, + "learning_rate": 4.836884807626852e-06, + "loss": 0.4232, + "step": 11480 + }, + { + "epoch": 0.6991444143348658, + "grad_norm": 1.018079043890744, + "learning_rate": 4.83685645532331e-06, + "loss": 0.3931, + "step": 11481 + }, + { + "epoch": 0.6992053101117438, + "grad_norm": 1.0873624963277422, + "learning_rate": 4.836828100639029e-06, + "loss": 0.4041, + "step": 11482 + }, + { + "epoch": 0.6992662058886217, + "grad_norm": 1.0488000453589126, + "learning_rate": 4.836799743574035e-06, + "loss": 0.4151, + "step": 11483 + }, + { + "epoch": 0.6993271016654995, + "grad_norm": 0.982943096510685, + "learning_rate": 4.836771384128357e-06, + "loss": 0.4622, + "step": 11484 + }, + { + "epoch": 0.6993879974423773, + "grad_norm": 1.074005012184541, + "learning_rate": 4.836743022302027e-06, + "loss": 0.4014, + "step": 11485 + }, + { + "epoch": 0.6994488932192553, + "grad_norm": 1.0444894003673866, + "learning_rate": 4.836714658095071e-06, + "loss": 0.5284, + "step": 11486 + }, + { + "epoch": 0.6995097889961331, + "grad_norm": 0.9017833723653855, + "learning_rate": 4.8366862915075186e-06, + "loss": 0.3974, + "step": 11487 + }, + { + "epoch": 0.699570684773011, + "grad_norm": 1.0467471315411339, + "learning_rate": 4.8366579225393985e-06, + "loss": 0.4077, + "step": 11488 + }, + { + "epoch": 0.6996315805498888, + "grad_norm": 1.074745535425246, + "learning_rate": 4.8366295511907405e-06, + "loss": 0.3854, + "step": 11489 + }, + { + "epoch": 0.6996924763267668, + "grad_norm": 0.9605347183396125, + "learning_rate": 4.836601177461573e-06, + "loss": 0.4329, + "step": 11490 + }, + { + "epoch": 0.6997533721036446, + "grad_norm": 1.0405296195890514, + "learning_rate": 4.836572801351924e-06, + "loss": 0.4833, + "step": 11491 + }, + { + "epoch": 0.6998142678805225, + "grad_norm": 1.0085136683523543, + "learning_rate": 4.836544422861823e-06, + "loss": 0.4376, + "step": 11492 + }, + { + "epoch": 0.6998751636574003, + "grad_norm": 0.9748071312499873, + "learning_rate": 4.8365160419913e-06, + "loss": 0.4104, + "step": 11493 + }, + { + "epoch": 0.6999360594342783, + "grad_norm": 0.948988121124257, + "learning_rate": 4.836487658740383e-06, + "loss": 0.4324, + "step": 11494 + }, + { + "epoch": 0.6999969552111561, + "grad_norm": 1.0236301289229592, + "learning_rate": 4.8364592731091005e-06, + "loss": 0.3409, + "step": 11495 + }, + { + "epoch": 0.700057850988034, + "grad_norm": 0.9904850704550154, + "learning_rate": 4.836430885097482e-06, + "loss": 0.3918, + "step": 11496 + }, + { + "epoch": 0.7001187467649118, + "grad_norm": 1.0350110053190906, + "learning_rate": 4.836402494705556e-06, + "loss": 0.3849, + "step": 11497 + }, + { + "epoch": 0.7001796425417898, + "grad_norm": 1.032768137078956, + "learning_rate": 4.836374101933352e-06, + "loss": 0.4092, + "step": 11498 + }, + { + "epoch": 0.7002405383186676, + "grad_norm": 1.0261125111920155, + "learning_rate": 4.836345706780899e-06, + "loss": 0.4403, + "step": 11499 + }, + { + "epoch": 0.7003014340955455, + "grad_norm": 0.9264258306040256, + "learning_rate": 4.836317309248225e-06, + "loss": 0.4402, + "step": 11500 + }, + { + "epoch": 0.7003623298724233, + "grad_norm": 0.8843967243030083, + "learning_rate": 4.836288909335361e-06, + "loss": 0.4168, + "step": 11501 + }, + { + "epoch": 0.7004232256493013, + "grad_norm": 1.032521565171973, + "learning_rate": 4.836260507042333e-06, + "loss": 0.3648, + "step": 11502 + }, + { + "epoch": 0.7004841214261791, + "grad_norm": 1.0214215634052441, + "learning_rate": 4.83623210236917e-06, + "loss": 0.4734, + "step": 11503 + }, + { + "epoch": 0.700545017203057, + "grad_norm": 1.0161578959015172, + "learning_rate": 4.836203695315905e-06, + "loss": 0.4547, + "step": 11504 + }, + { + "epoch": 0.7006059129799348, + "grad_norm": 1.0413279204467953, + "learning_rate": 4.836175285882563e-06, + "loss": 0.372, + "step": 11505 + }, + { + "epoch": 0.7006668087568128, + "grad_norm": 1.0343707345762168, + "learning_rate": 4.836146874069174e-06, + "loss": 0.5022, + "step": 11506 + }, + { + "epoch": 0.7007277045336906, + "grad_norm": 1.047416242099494, + "learning_rate": 4.836118459875768e-06, + "loss": 0.4203, + "step": 11507 + }, + { + "epoch": 0.7007886003105684, + "grad_norm": 1.0458577399782358, + "learning_rate": 4.836090043302372e-06, + "loss": 0.4571, + "step": 11508 + }, + { + "epoch": 0.7008494960874463, + "grad_norm": 1.0558340841861351, + "learning_rate": 4.836061624349016e-06, + "loss": 0.4368, + "step": 11509 + }, + { + "epoch": 0.7009103918643242, + "grad_norm": 1.047100839116054, + "learning_rate": 4.8360332030157305e-06, + "loss": 0.4182, + "step": 11510 + }, + { + "epoch": 0.7009712876412021, + "grad_norm": 0.9810176844462284, + "learning_rate": 4.836004779302542e-06, + "loss": 0.4822, + "step": 11511 + }, + { + "epoch": 0.7010321834180799, + "grad_norm": 1.1120739271842945, + "learning_rate": 4.835976353209481e-06, + "loss": 0.3931, + "step": 11512 + }, + { + "epoch": 0.7010930791949578, + "grad_norm": 0.9546661834239346, + "learning_rate": 4.835947924736575e-06, + "loss": 0.5164, + "step": 11513 + }, + { + "epoch": 0.7011539749718357, + "grad_norm": 1.075355811369139, + "learning_rate": 4.8359194938838535e-06, + "loss": 0.3434, + "step": 11514 + }, + { + "epoch": 0.7012148707487136, + "grad_norm": 1.0440390780956357, + "learning_rate": 4.835891060651346e-06, + "loss": 0.5223, + "step": 11515 + }, + { + "epoch": 0.7012757665255914, + "grad_norm": 0.9083148587812518, + "learning_rate": 4.835862625039081e-06, + "loss": 0.4025, + "step": 11516 + }, + { + "epoch": 0.7013366623024693, + "grad_norm": 1.0325581716048235, + "learning_rate": 4.835834187047089e-06, + "loss": 0.4811, + "step": 11517 + }, + { + "epoch": 0.7013975580793472, + "grad_norm": 1.0930028983115883, + "learning_rate": 4.835805746675397e-06, + "loss": 0.3565, + "step": 11518 + }, + { + "epoch": 0.7014584538562251, + "grad_norm": 0.9714177500440017, + "learning_rate": 4.835777303924035e-06, + "loss": 0.5051, + "step": 11519 + }, + { + "epoch": 0.7015193496331029, + "grad_norm": 0.9663911163494339, + "learning_rate": 4.835748858793031e-06, + "loss": 0.4737, + "step": 11520 + }, + { + "epoch": 0.7015802454099808, + "grad_norm": 0.9465199115724305, + "learning_rate": 4.835720411282415e-06, + "loss": 0.4704, + "step": 11521 + }, + { + "epoch": 0.7016411411868587, + "grad_norm": 0.9345818239043169, + "learning_rate": 4.835691961392216e-06, + "loss": 0.4436, + "step": 11522 + }, + { + "epoch": 0.7017020369637366, + "grad_norm": 1.108554938610742, + "learning_rate": 4.835663509122462e-06, + "loss": 0.4298, + "step": 11523 + }, + { + "epoch": 0.7017629327406144, + "grad_norm": 1.0007535927198812, + "learning_rate": 4.8356350544731825e-06, + "loss": 0.4126, + "step": 11524 + }, + { + "epoch": 0.7018238285174924, + "grad_norm": 1.1170741580234544, + "learning_rate": 4.835606597444407e-06, + "loss": 0.3962, + "step": 11525 + }, + { + "epoch": 0.7018847242943702, + "grad_norm": 1.0866068724342588, + "learning_rate": 4.835578138036164e-06, + "loss": 0.3965, + "step": 11526 + }, + { + "epoch": 0.701945620071248, + "grad_norm": 1.0253035642385462, + "learning_rate": 4.835549676248482e-06, + "loss": 0.4052, + "step": 11527 + }, + { + "epoch": 0.7020065158481259, + "grad_norm": 0.9861272940065823, + "learning_rate": 4.835521212081391e-06, + "loss": 0.4202, + "step": 11528 + }, + { + "epoch": 0.7020674116250039, + "grad_norm": 1.017280301322898, + "learning_rate": 4.83549274553492e-06, + "loss": 0.3517, + "step": 11529 + }, + { + "epoch": 0.7021283074018817, + "grad_norm": 0.9948039969673824, + "learning_rate": 4.8354642766090964e-06, + "loss": 0.3903, + "step": 11530 + }, + { + "epoch": 0.7021892031787595, + "grad_norm": 0.9415203712562841, + "learning_rate": 4.8354358053039515e-06, + "loss": 0.4857, + "step": 11531 + }, + { + "epoch": 0.7022500989556374, + "grad_norm": 1.0519619557613291, + "learning_rate": 4.835407331619513e-06, + "loss": 0.394, + "step": 11532 + }, + { + "epoch": 0.7023109947325153, + "grad_norm": 1.0135458440714837, + "learning_rate": 4.835378855555809e-06, + "loss": 0.4393, + "step": 11533 + }, + { + "epoch": 0.7023718905093932, + "grad_norm": 0.9549331181075554, + "learning_rate": 4.835350377112871e-06, + "loss": 0.469, + "step": 11534 + }, + { + "epoch": 0.702432786286271, + "grad_norm": 1.025388614974714, + "learning_rate": 4.835321896290725e-06, + "loss": 0.4197, + "step": 11535 + }, + { + "epoch": 0.7024936820631489, + "grad_norm": 0.9963495519852628, + "learning_rate": 4.8352934130894025e-06, + "loss": 0.3605, + "step": 11536 + }, + { + "epoch": 0.7025545778400268, + "grad_norm": 1.0057167103372973, + "learning_rate": 4.835264927508932e-06, + "loss": 0.4324, + "step": 11537 + }, + { + "epoch": 0.7026154736169047, + "grad_norm": 1.0241068049449804, + "learning_rate": 4.835236439549341e-06, + "loss": 0.5003, + "step": 11538 + }, + { + "epoch": 0.7026763693937825, + "grad_norm": 0.987345202406134, + "learning_rate": 4.835207949210661e-06, + "loss": 0.4239, + "step": 11539 + }, + { + "epoch": 0.7027372651706604, + "grad_norm": 0.9895197779022825, + "learning_rate": 4.835179456492919e-06, + "loss": 0.4266, + "step": 11540 + }, + { + "epoch": 0.7027981609475383, + "grad_norm": 1.006072185877477, + "learning_rate": 4.835150961396145e-06, + "loss": 0.5089, + "step": 11541 + }, + { + "epoch": 0.7028590567244162, + "grad_norm": 0.9262385038404428, + "learning_rate": 4.835122463920368e-06, + "loss": 0.4468, + "step": 11542 + }, + { + "epoch": 0.702919952501294, + "grad_norm": 0.9751814685940107, + "learning_rate": 4.835093964065616e-06, + "loss": 0.4916, + "step": 11543 + }, + { + "epoch": 0.7029808482781719, + "grad_norm": 0.9889261187742306, + "learning_rate": 4.835065461831919e-06, + "loss": 0.3382, + "step": 11544 + }, + { + "epoch": 0.7030417440550498, + "grad_norm": 0.9540894677500805, + "learning_rate": 4.835036957219306e-06, + "loss": 0.3892, + "step": 11545 + }, + { + "epoch": 0.7031026398319277, + "grad_norm": 1.0223482010502378, + "learning_rate": 4.835008450227806e-06, + "loss": 0.4079, + "step": 11546 + }, + { + "epoch": 0.7031635356088055, + "grad_norm": 1.1141193886701088, + "learning_rate": 4.834979940857448e-06, + "loss": 0.3852, + "step": 11547 + }, + { + "epoch": 0.7032244313856834, + "grad_norm": 1.0057748433051552, + "learning_rate": 4.83495142910826e-06, + "loss": 0.459, + "step": 11548 + }, + { + "epoch": 0.7032853271625613, + "grad_norm": 1.0585273252013785, + "learning_rate": 4.834922914980274e-06, + "loss": 0.4528, + "step": 11549 + }, + { + "epoch": 0.7033462229394392, + "grad_norm": 1.0721968023376454, + "learning_rate": 4.8348943984735154e-06, + "loss": 0.4347, + "step": 11550 + }, + { + "epoch": 0.703407118716317, + "grad_norm": 0.9773179886462723, + "learning_rate": 4.834865879588016e-06, + "loss": 0.4288, + "step": 11551 + }, + { + "epoch": 0.7034680144931948, + "grad_norm": 1.0237950385049175, + "learning_rate": 4.834837358323803e-06, + "loss": 0.4454, + "step": 11552 + }, + { + "epoch": 0.7035289102700728, + "grad_norm": 0.9398818817057194, + "learning_rate": 4.834808834680906e-06, + "loss": 0.506, + "step": 11553 + }, + { + "epoch": 0.7035898060469506, + "grad_norm": 0.928762974990354, + "learning_rate": 4.834780308659355e-06, + "loss": 0.4244, + "step": 11554 + }, + { + "epoch": 0.7036507018238285, + "grad_norm": 1.0090288259922147, + "learning_rate": 4.834751780259178e-06, + "loss": 0.4336, + "step": 11555 + }, + { + "epoch": 0.7037115976007063, + "grad_norm": 1.0559205527693198, + "learning_rate": 4.8347232494804045e-06, + "loss": 0.461, + "step": 11556 + }, + { + "epoch": 0.7037724933775843, + "grad_norm": 0.9783249454435727, + "learning_rate": 4.834694716323064e-06, + "loss": 0.4393, + "step": 11557 + }, + { + "epoch": 0.7038333891544621, + "grad_norm": 1.0258455936831192, + "learning_rate": 4.834666180787184e-06, + "loss": 0.4347, + "step": 11558 + }, + { + "epoch": 0.70389428493134, + "grad_norm": 1.0453803827977217, + "learning_rate": 4.834637642872796e-06, + "loss": 0.3996, + "step": 11559 + }, + { + "epoch": 0.7039551807082178, + "grad_norm": 1.0417274287270377, + "learning_rate": 4.834609102579927e-06, + "loss": 0.5049, + "step": 11560 + }, + { + "epoch": 0.7040160764850958, + "grad_norm": 0.9721103729140425, + "learning_rate": 4.834580559908607e-06, + "loss": 0.4027, + "step": 11561 + }, + { + "epoch": 0.7040769722619736, + "grad_norm": 1.033959509560581, + "learning_rate": 4.834552014858864e-06, + "loss": 0.4753, + "step": 11562 + }, + { + "epoch": 0.7041378680388515, + "grad_norm": 0.9945621230860887, + "learning_rate": 4.834523467430729e-06, + "loss": 0.3935, + "step": 11563 + }, + { + "epoch": 0.7041987638157294, + "grad_norm": 0.9667322078412678, + "learning_rate": 4.8344949176242295e-06, + "loss": 0.4901, + "step": 11564 + }, + { + "epoch": 0.7042596595926073, + "grad_norm": 0.9318235526843194, + "learning_rate": 4.834466365439396e-06, + "loss": 0.4218, + "step": 11565 + }, + { + "epoch": 0.7043205553694851, + "grad_norm": 0.9261132904656905, + "learning_rate": 4.834437810876255e-06, + "loss": 0.4638, + "step": 11566 + }, + { + "epoch": 0.704381451146363, + "grad_norm": 1.0666959003799072, + "learning_rate": 4.834409253934838e-06, + "loss": 0.3872, + "step": 11567 + }, + { + "epoch": 0.7044423469232409, + "grad_norm": 1.047901042378349, + "learning_rate": 4.834380694615174e-06, + "loss": 0.3865, + "step": 11568 + }, + { + "epoch": 0.7045032427001188, + "grad_norm": 0.9391899168956402, + "learning_rate": 4.834352132917291e-06, + "loss": 0.4562, + "step": 11569 + }, + { + "epoch": 0.7045641384769966, + "grad_norm": 0.9019880072679285, + "learning_rate": 4.8343235688412185e-06, + "loss": 0.4994, + "step": 11570 + }, + { + "epoch": 0.7046250342538745, + "grad_norm": 0.9956217887333346, + "learning_rate": 4.834295002386986e-06, + "loss": 0.3982, + "step": 11571 + }, + { + "epoch": 0.7046859300307524, + "grad_norm": 1.0685688134348816, + "learning_rate": 4.834266433554622e-06, + "loss": 0.401, + "step": 11572 + }, + { + "epoch": 0.7047468258076303, + "grad_norm": 1.034314801648896, + "learning_rate": 4.834237862344157e-06, + "loss": 0.4102, + "step": 11573 + }, + { + "epoch": 0.7048077215845081, + "grad_norm": 0.962129886643552, + "learning_rate": 4.834209288755617e-06, + "loss": 0.4392, + "step": 11574 + }, + { + "epoch": 0.704868617361386, + "grad_norm": 1.0122334402403776, + "learning_rate": 4.834180712789035e-06, + "loss": 0.3693, + "step": 11575 + }, + { + "epoch": 0.7049295131382639, + "grad_norm": 1.0095000874357678, + "learning_rate": 4.834152134444436e-06, + "loss": 0.3833, + "step": 11576 + }, + { + "epoch": 0.7049904089151418, + "grad_norm": 1.0198751492969913, + "learning_rate": 4.8341235537218536e-06, + "loss": 0.3933, + "step": 11577 + }, + { + "epoch": 0.7050513046920196, + "grad_norm": 1.0203538912669734, + "learning_rate": 4.834094970621313e-06, + "loss": 0.4044, + "step": 11578 + }, + { + "epoch": 0.7051122004688974, + "grad_norm": 1.0402256147703026, + "learning_rate": 4.834066385142846e-06, + "loss": 0.4121, + "step": 11579 + }, + { + "epoch": 0.7051730962457754, + "grad_norm": 0.9501913451895768, + "learning_rate": 4.8340377972864805e-06, + "loss": 0.4621, + "step": 11580 + }, + { + "epoch": 0.7052339920226532, + "grad_norm": 1.045826179185689, + "learning_rate": 4.834009207052246e-06, + "loss": 0.4632, + "step": 11581 + }, + { + "epoch": 0.7052948877995311, + "grad_norm": 1.1768973610584024, + "learning_rate": 4.833980614440172e-06, + "loss": 0.4113, + "step": 11582 + }, + { + "epoch": 0.7053557835764089, + "grad_norm": 0.9615457022268701, + "learning_rate": 4.833952019450286e-06, + "loss": 0.4297, + "step": 11583 + }, + { + "epoch": 0.7054166793532869, + "grad_norm": 1.0441611857361048, + "learning_rate": 4.833923422082618e-06, + "loss": 0.4117, + "step": 11584 + }, + { + "epoch": 0.7054775751301647, + "grad_norm": 1.0946093583871606, + "learning_rate": 4.8338948223371975e-06, + "loss": 0.3995, + "step": 11585 + }, + { + "epoch": 0.7055384709070426, + "grad_norm": 1.0076751309731942, + "learning_rate": 4.833866220214054e-06, + "loss": 0.4084, + "step": 11586 + }, + { + "epoch": 0.7055993666839204, + "grad_norm": 1.0106513522430476, + "learning_rate": 4.833837615713215e-06, + "loss": 0.3841, + "step": 11587 + }, + { + "epoch": 0.7056602624607984, + "grad_norm": 1.0327056686609102, + "learning_rate": 4.833809008834712e-06, + "loss": 0.3999, + "step": 11588 + }, + { + "epoch": 0.7057211582376762, + "grad_norm": 1.025644333639754, + "learning_rate": 4.8337803995785726e-06, + "loss": 0.4648, + "step": 11589 + }, + { + "epoch": 0.7057820540145541, + "grad_norm": 0.9363517862324277, + "learning_rate": 4.833751787944826e-06, + "loss": 0.4997, + "step": 11590 + }, + { + "epoch": 0.7058429497914319, + "grad_norm": 0.9281627738545095, + "learning_rate": 4.833723173933502e-06, + "loss": 0.4037, + "step": 11591 + }, + { + "epoch": 0.7059038455683099, + "grad_norm": 1.0068821711513685, + "learning_rate": 4.833694557544629e-06, + "loss": 0.4542, + "step": 11592 + }, + { + "epoch": 0.7059647413451877, + "grad_norm": 1.074060530587589, + "learning_rate": 4.833665938778237e-06, + "loss": 0.3469, + "step": 11593 + }, + { + "epoch": 0.7060256371220656, + "grad_norm": 1.048625352696612, + "learning_rate": 4.833637317634354e-06, + "loss": 0.4635, + "step": 11594 + }, + { + "epoch": 0.7060865328989434, + "grad_norm": 1.126155941945451, + "learning_rate": 4.83360869411301e-06, + "loss": 0.4047, + "step": 11595 + }, + { + "epoch": 0.7061474286758214, + "grad_norm": 0.9039758023325897, + "learning_rate": 4.833580068214234e-06, + "loss": 0.5026, + "step": 11596 + }, + { + "epoch": 0.7062083244526992, + "grad_norm": 1.024133421238663, + "learning_rate": 4.8335514399380554e-06, + "loss": 0.4086, + "step": 11597 + }, + { + "epoch": 0.706269220229577, + "grad_norm": 1.0197080081474164, + "learning_rate": 4.8335228092845036e-06, + "loss": 0.4125, + "step": 11598 + }, + { + "epoch": 0.7063301160064549, + "grad_norm": 1.0403813810395186, + "learning_rate": 4.8334941762536055e-06, + "loss": 0.4294, + "step": 11599 + }, + { + "epoch": 0.7063910117833329, + "grad_norm": 0.9438435549210288, + "learning_rate": 4.833465540845393e-06, + "loss": 0.3933, + "step": 11600 + }, + { + "epoch": 0.7064519075602107, + "grad_norm": 0.979060154044363, + "learning_rate": 4.833436903059895e-06, + "loss": 0.4673, + "step": 11601 + }, + { + "epoch": 0.7065128033370885, + "grad_norm": 1.0317526108652888, + "learning_rate": 4.83340826289714e-06, + "loss": 0.3866, + "step": 11602 + }, + { + "epoch": 0.7065736991139664, + "grad_norm": 0.9465495672240413, + "learning_rate": 4.833379620357157e-06, + "loss": 0.4922, + "step": 11603 + }, + { + "epoch": 0.7066345948908443, + "grad_norm": 0.9603809539428895, + "learning_rate": 4.833350975439974e-06, + "loss": 0.4441, + "step": 11604 + }, + { + "epoch": 0.7066954906677222, + "grad_norm": 0.9529554389626578, + "learning_rate": 4.833322328145623e-06, + "loss": 0.4011, + "step": 11605 + }, + { + "epoch": 0.7067563864446, + "grad_norm": 1.0945577078028956, + "learning_rate": 4.833293678474131e-06, + "loss": 0.4196, + "step": 11606 + }, + { + "epoch": 0.706817282221478, + "grad_norm": 1.0803894070111963, + "learning_rate": 4.833265026425529e-06, + "loss": 0.4496, + "step": 11607 + }, + { + "epoch": 0.7068781779983558, + "grad_norm": 0.9456626733113144, + "learning_rate": 4.833236371999844e-06, + "loss": 0.4435, + "step": 11608 + }, + { + "epoch": 0.7069390737752337, + "grad_norm": 0.8991330700409316, + "learning_rate": 4.833207715197106e-06, + "loss": 0.4344, + "step": 11609 + }, + { + "epoch": 0.7069999695521115, + "grad_norm": 0.9572348252505024, + "learning_rate": 4.8331790560173455e-06, + "loss": 0.4622, + "step": 11610 + }, + { + "epoch": 0.7070608653289895, + "grad_norm": 0.9979612523835043, + "learning_rate": 4.83315039446059e-06, + "loss": 0.3805, + "step": 11611 + }, + { + "epoch": 0.7071217611058673, + "grad_norm": 1.0665840471397094, + "learning_rate": 4.83312173052687e-06, + "loss": 0.4736, + "step": 11612 + }, + { + "epoch": 0.7071826568827452, + "grad_norm": 0.9714073597522496, + "learning_rate": 4.833093064216213e-06, + "loss": 0.4061, + "step": 11613 + }, + { + "epoch": 0.707243552659623, + "grad_norm": 1.0349760694206358, + "learning_rate": 4.83306439552865e-06, + "loss": 0.4319, + "step": 11614 + }, + { + "epoch": 0.707304448436501, + "grad_norm": 1.1806173759334897, + "learning_rate": 4.83303572446421e-06, + "loss": 0.4483, + "step": 11615 + }, + { + "epoch": 0.7073653442133788, + "grad_norm": 1.0385797247354969, + "learning_rate": 4.833007051022921e-06, + "loss": 0.5579, + "step": 11616 + }, + { + "epoch": 0.7074262399902567, + "grad_norm": 1.0451859991806551, + "learning_rate": 4.8329783752048136e-06, + "loss": 0.4302, + "step": 11617 + }, + { + "epoch": 0.7074871357671345, + "grad_norm": 0.9549136397089851, + "learning_rate": 4.8329496970099155e-06, + "loss": 0.4181, + "step": 11618 + }, + { + "epoch": 0.7075480315440125, + "grad_norm": 1.1217326914813628, + "learning_rate": 4.832921016438257e-06, + "loss": 0.3816, + "step": 11619 + }, + { + "epoch": 0.7076089273208903, + "grad_norm": 1.0360887236375638, + "learning_rate": 4.832892333489868e-06, + "loss": 0.3943, + "step": 11620 + }, + { + "epoch": 0.7076698230977682, + "grad_norm": 1.107392004222655, + "learning_rate": 4.832863648164775e-06, + "loss": 0.4424, + "step": 11621 + }, + { + "epoch": 0.707730718874646, + "grad_norm": 1.030131379888995, + "learning_rate": 4.8328349604630105e-06, + "loss": 0.4016, + "step": 11622 + }, + { + "epoch": 0.707791614651524, + "grad_norm": 1.0057736212644066, + "learning_rate": 4.832806270384602e-06, + "loss": 0.3667, + "step": 11623 + }, + { + "epoch": 0.7078525104284018, + "grad_norm": 0.9649343790221249, + "learning_rate": 4.832777577929578e-06, + "loss": 0.3773, + "step": 11624 + }, + { + "epoch": 0.7079134062052796, + "grad_norm": 0.94743736480503, + "learning_rate": 4.832748883097969e-06, + "loss": 0.4117, + "step": 11625 + }, + { + "epoch": 0.7079743019821575, + "grad_norm": 1.088074797613643, + "learning_rate": 4.832720185889806e-06, + "loss": 0.3769, + "step": 11626 + }, + { + "epoch": 0.7080351977590355, + "grad_norm": 0.9300256063164335, + "learning_rate": 4.832691486305114e-06, + "loss": 0.4827, + "step": 11627 + }, + { + "epoch": 0.7080960935359133, + "grad_norm": 1.000906755277509, + "learning_rate": 4.832662784343925e-06, + "loss": 0.4563, + "step": 11628 + }, + { + "epoch": 0.7081569893127911, + "grad_norm": 0.9537776055893235, + "learning_rate": 4.832634080006267e-06, + "loss": 0.397, + "step": 11629 + }, + { + "epoch": 0.708217885089669, + "grad_norm": 1.0018291348667572, + "learning_rate": 4.8326053732921705e-06, + "loss": 0.4257, + "step": 11630 + }, + { + "epoch": 0.7082787808665469, + "grad_norm": 1.0069904970946277, + "learning_rate": 4.832576664201664e-06, + "loss": 0.4706, + "step": 11631 + }, + { + "epoch": 0.7083396766434248, + "grad_norm": 1.0563548422466478, + "learning_rate": 4.8325479527347776e-06, + "loss": 0.4455, + "step": 11632 + }, + { + "epoch": 0.7084005724203026, + "grad_norm": 1.0127761170869656, + "learning_rate": 4.832519238891539e-06, + "loss": 0.4474, + "step": 11633 + }, + { + "epoch": 0.7084614681971805, + "grad_norm": 0.973026238654497, + "learning_rate": 4.832490522671978e-06, + "loss": 0.464, + "step": 11634 + }, + { + "epoch": 0.7085223639740584, + "grad_norm": 1.0054566868052837, + "learning_rate": 4.832461804076125e-06, + "loss": 0.4334, + "step": 11635 + }, + { + "epoch": 0.7085832597509363, + "grad_norm": 1.1021451757797747, + "learning_rate": 4.832433083104009e-06, + "loss": 0.4309, + "step": 11636 + }, + { + "epoch": 0.7086441555278141, + "grad_norm": 1.0239717295174438, + "learning_rate": 4.8324043597556575e-06, + "loss": 0.4192, + "step": 11637 + }, + { + "epoch": 0.708705051304692, + "grad_norm": 0.9282863303737752, + "learning_rate": 4.832375634031101e-06, + "loss": 0.4945, + "step": 11638 + }, + { + "epoch": 0.7087659470815699, + "grad_norm": 1.0107945875917363, + "learning_rate": 4.83234690593037e-06, + "loss": 0.4309, + "step": 11639 + }, + { + "epoch": 0.7088268428584478, + "grad_norm": 1.0864187539895134, + "learning_rate": 4.832318175453491e-06, + "loss": 0.4504, + "step": 11640 + }, + { + "epoch": 0.7088877386353256, + "grad_norm": 1.0323023294986042, + "learning_rate": 4.8322894426004954e-06, + "loss": 0.5347, + "step": 11641 + }, + { + "epoch": 0.7089486344122035, + "grad_norm": 0.9775427769153656, + "learning_rate": 4.832260707371411e-06, + "loss": 0.4392, + "step": 11642 + }, + { + "epoch": 0.7090095301890814, + "grad_norm": 1.0944188236738777, + "learning_rate": 4.832231969766268e-06, + "loss": 0.3796, + "step": 11643 + }, + { + "epoch": 0.7090704259659593, + "grad_norm": 1.0410045851016207, + "learning_rate": 4.832203229785096e-06, + "loss": 0.4101, + "step": 11644 + }, + { + "epoch": 0.7091313217428371, + "grad_norm": 0.9222617587014451, + "learning_rate": 4.832174487427924e-06, + "loss": 0.467, + "step": 11645 + }, + { + "epoch": 0.7091922175197151, + "grad_norm": 1.0112131089435477, + "learning_rate": 4.832145742694781e-06, + "loss": 0.4212, + "step": 11646 + }, + { + "epoch": 0.7092531132965929, + "grad_norm": 1.07802834863394, + "learning_rate": 4.832116995585696e-06, + "loss": 0.3989, + "step": 11647 + }, + { + "epoch": 0.7093140090734708, + "grad_norm": 0.9690299250289776, + "learning_rate": 4.832088246100699e-06, + "loss": 0.3885, + "step": 11648 + }, + { + "epoch": 0.7093749048503486, + "grad_norm": 1.0239044019784, + "learning_rate": 4.832059494239819e-06, + "loss": 0.4542, + "step": 11649 + }, + { + "epoch": 0.7094358006272266, + "grad_norm": 0.9794458823423287, + "learning_rate": 4.8320307400030855e-06, + "loss": 0.467, + "step": 11650 + }, + { + "epoch": 0.7094966964041044, + "grad_norm": 0.8910905690836756, + "learning_rate": 4.832001983390528e-06, + "loss": 0.4296, + "step": 11651 + }, + { + "epoch": 0.7095575921809822, + "grad_norm": 1.0596061516826256, + "learning_rate": 4.8319732244021746e-06, + "loss": 0.4423, + "step": 11652 + }, + { + "epoch": 0.7096184879578601, + "grad_norm": 0.9897085058211019, + "learning_rate": 4.831944463038055e-06, + "loss": 0.4138, + "step": 11653 + }, + { + "epoch": 0.709679383734738, + "grad_norm": 0.9908430575054042, + "learning_rate": 4.831915699298199e-06, + "loss": 0.4487, + "step": 11654 + }, + { + "epoch": 0.7097402795116159, + "grad_norm": 0.9337273081221814, + "learning_rate": 4.831886933182637e-06, + "loss": 0.5129, + "step": 11655 + }, + { + "epoch": 0.7098011752884937, + "grad_norm": 0.9676320394830529, + "learning_rate": 4.8318581646913954e-06, + "loss": 0.4503, + "step": 11656 + }, + { + "epoch": 0.7098620710653716, + "grad_norm": 1.0278941982200958, + "learning_rate": 4.831829393824506e-06, + "loss": 0.4398, + "step": 11657 + }, + { + "epoch": 0.7099229668422495, + "grad_norm": 0.9354288251007071, + "learning_rate": 4.831800620581997e-06, + "loss": 0.5074, + "step": 11658 + }, + { + "epoch": 0.7099838626191274, + "grad_norm": 0.96636553552685, + "learning_rate": 4.831771844963898e-06, + "loss": 0.4367, + "step": 11659 + }, + { + "epoch": 0.7100447583960052, + "grad_norm": 1.0731436155255993, + "learning_rate": 4.831743066970239e-06, + "loss": 0.3347, + "step": 11660 + }, + { + "epoch": 0.7101056541728831, + "grad_norm": 1.0325482227013598, + "learning_rate": 4.831714286601048e-06, + "loss": 0.3876, + "step": 11661 + }, + { + "epoch": 0.710166549949761, + "grad_norm": 0.9465462306387549, + "learning_rate": 4.831685503856356e-06, + "loss": 0.4283, + "step": 11662 + }, + { + "epoch": 0.7102274457266389, + "grad_norm": 0.9830960475971922, + "learning_rate": 4.83165671873619e-06, + "loss": 0.4866, + "step": 11663 + }, + { + "epoch": 0.7102883415035167, + "grad_norm": 1.0724713958836214, + "learning_rate": 4.831627931240581e-06, + "loss": 0.4398, + "step": 11664 + }, + { + "epoch": 0.7103492372803946, + "grad_norm": 1.0052330248484767, + "learning_rate": 4.8315991413695586e-06, + "loss": 0.3791, + "step": 11665 + }, + { + "epoch": 0.7104101330572725, + "grad_norm": 0.9398558130752938, + "learning_rate": 4.8315703491231505e-06, + "loss": 0.5526, + "step": 11666 + }, + { + "epoch": 0.7104710288341504, + "grad_norm": 1.0569467049273693, + "learning_rate": 4.831541554501388e-06, + "loss": 0.3812, + "step": 11667 + }, + { + "epoch": 0.7105319246110282, + "grad_norm": 0.9527090526215563, + "learning_rate": 4.831512757504298e-06, + "loss": 0.477, + "step": 11668 + }, + { + "epoch": 0.710592820387906, + "grad_norm": 0.9387864395319958, + "learning_rate": 4.8314839581319125e-06, + "loss": 0.4383, + "step": 11669 + }, + { + "epoch": 0.710653716164784, + "grad_norm": 0.942313781350403, + "learning_rate": 4.831455156384259e-06, + "loss": 0.4493, + "step": 11670 + }, + { + "epoch": 0.7107146119416619, + "grad_norm": 1.0505553513462955, + "learning_rate": 4.831426352261367e-06, + "loss": 0.4096, + "step": 11671 + }, + { + "epoch": 0.7107755077185397, + "grad_norm": 0.9475832387329387, + "learning_rate": 4.831397545763268e-06, + "loss": 0.429, + "step": 11672 + }, + { + "epoch": 0.7108364034954175, + "grad_norm": 0.9783774235356271, + "learning_rate": 4.831368736889988e-06, + "loss": 0.3846, + "step": 11673 + }, + { + "epoch": 0.7108972992722955, + "grad_norm": 1.0389285368630732, + "learning_rate": 4.8313399256415585e-06, + "loss": 0.384, + "step": 11674 + }, + { + "epoch": 0.7109581950491733, + "grad_norm": 1.0247542560183267, + "learning_rate": 4.831311112018009e-06, + "loss": 0.3959, + "step": 11675 + }, + { + "epoch": 0.7110190908260512, + "grad_norm": 1.0385698497611364, + "learning_rate": 4.831282296019367e-06, + "loss": 0.4498, + "step": 11676 + }, + { + "epoch": 0.711079986602929, + "grad_norm": 0.9698620699384151, + "learning_rate": 4.831253477645664e-06, + "loss": 0.3759, + "step": 11677 + }, + { + "epoch": 0.711140882379807, + "grad_norm": 0.9414164312705617, + "learning_rate": 4.831224656896928e-06, + "loss": 0.4638, + "step": 11678 + }, + { + "epoch": 0.7112017781566848, + "grad_norm": 0.9443419845228004, + "learning_rate": 4.831195833773189e-06, + "loss": 0.4414, + "step": 11679 + }, + { + "epoch": 0.7112626739335627, + "grad_norm": 1.1217164524900252, + "learning_rate": 4.831167008274477e-06, + "loss": 0.3961, + "step": 11680 + }, + { + "epoch": 0.7113235697104405, + "grad_norm": 1.1025583228349143, + "learning_rate": 4.83113818040082e-06, + "loss": 0.4009, + "step": 11681 + }, + { + "epoch": 0.7113844654873185, + "grad_norm": 0.9367160719202969, + "learning_rate": 4.831109350152246e-06, + "loss": 0.4982, + "step": 11682 + }, + { + "epoch": 0.7114453612641963, + "grad_norm": 1.1206267628948015, + "learning_rate": 4.831080517528788e-06, + "loss": 0.3353, + "step": 11683 + }, + { + "epoch": 0.7115062570410742, + "grad_norm": 0.9442577432739455, + "learning_rate": 4.831051682530474e-06, + "loss": 0.499, + "step": 11684 + }, + { + "epoch": 0.711567152817952, + "grad_norm": 0.9881320254512179, + "learning_rate": 4.831022845157333e-06, + "loss": 0.3913, + "step": 11685 + }, + { + "epoch": 0.71162804859483, + "grad_norm": 0.9883487916171778, + "learning_rate": 4.830994005409393e-06, + "loss": 0.4421, + "step": 11686 + }, + { + "epoch": 0.7116889443717078, + "grad_norm": 1.0337960216962854, + "learning_rate": 4.830965163286686e-06, + "loss": 0.4525, + "step": 11687 + }, + { + "epoch": 0.7117498401485857, + "grad_norm": 1.1244564746699635, + "learning_rate": 4.8309363187892395e-06, + "loss": 0.4074, + "step": 11688 + }, + { + "epoch": 0.7118107359254636, + "grad_norm": 1.0898021378059362, + "learning_rate": 4.830907471917084e-06, + "loss": 0.4497, + "step": 11689 + }, + { + "epoch": 0.7118716317023415, + "grad_norm": 1.0491316498123704, + "learning_rate": 4.830878622670248e-06, + "loss": 0.3688, + "step": 11690 + }, + { + "epoch": 0.7119325274792193, + "grad_norm": 0.9840429006354926, + "learning_rate": 4.830849771048761e-06, + "loss": 0.46, + "step": 11691 + }, + { + "epoch": 0.7119934232560972, + "grad_norm": 1.041639585875487, + "learning_rate": 4.830820917052654e-06, + "loss": 0.513, + "step": 11692 + }, + { + "epoch": 0.7120543190329751, + "grad_norm": 0.9858801450209914, + "learning_rate": 4.830792060681954e-06, + "loss": 0.4389, + "step": 11693 + }, + { + "epoch": 0.712115214809853, + "grad_norm": 0.9835866965594479, + "learning_rate": 4.8307632019366924e-06, + "loss": 0.3508, + "step": 11694 + }, + { + "epoch": 0.7121761105867308, + "grad_norm": 0.9407301973161473, + "learning_rate": 4.830734340816897e-06, + "loss": 0.4117, + "step": 11695 + }, + { + "epoch": 0.7122370063636086, + "grad_norm": 0.916663985793612, + "learning_rate": 4.830705477322598e-06, + "loss": 0.4612, + "step": 11696 + }, + { + "epoch": 0.7122979021404866, + "grad_norm": 0.9966045366679273, + "learning_rate": 4.830676611453824e-06, + "loss": 0.3891, + "step": 11697 + }, + { + "epoch": 0.7123587979173645, + "grad_norm": 1.0039907985092857, + "learning_rate": 4.830647743210607e-06, + "loss": 0.4641, + "step": 11698 + }, + { + "epoch": 0.7124196936942423, + "grad_norm": 0.9990704161616812, + "learning_rate": 4.830618872592973e-06, + "loss": 0.4076, + "step": 11699 + }, + { + "epoch": 0.7124805894711201, + "grad_norm": 0.9723059550324888, + "learning_rate": 4.830589999600953e-06, + "loss": 0.4551, + "step": 11700 + }, + { + "epoch": 0.7125414852479981, + "grad_norm": 1.0594435021628468, + "learning_rate": 4.830561124234577e-06, + "loss": 0.3991, + "step": 11701 + }, + { + "epoch": 0.7126023810248759, + "grad_norm": 0.9950241610786015, + "learning_rate": 4.830532246493874e-06, + "loss": 0.4544, + "step": 11702 + }, + { + "epoch": 0.7126632768017538, + "grad_norm": 1.0830620924359553, + "learning_rate": 4.830503366378872e-06, + "loss": 0.4797, + "step": 11703 + }, + { + "epoch": 0.7127241725786316, + "grad_norm": 0.9939241888664512, + "learning_rate": 4.830474483889603e-06, + "loss": 0.4196, + "step": 11704 + }, + { + "epoch": 0.7127850683555096, + "grad_norm": 0.9842793010368324, + "learning_rate": 4.830445599026095e-06, + "loss": 0.4012, + "step": 11705 + }, + { + "epoch": 0.7128459641323874, + "grad_norm": 1.019120454437109, + "learning_rate": 4.830416711788376e-06, + "loss": 0.4739, + "step": 11706 + }, + { + "epoch": 0.7129068599092653, + "grad_norm": 0.9353653860045301, + "learning_rate": 4.830387822176478e-06, + "loss": 0.4882, + "step": 11707 + }, + { + "epoch": 0.7129677556861431, + "grad_norm": 0.9526812610520835, + "learning_rate": 4.830358930190429e-06, + "loss": 0.4577, + "step": 11708 + }, + { + "epoch": 0.7130286514630211, + "grad_norm": 0.9465169081893752, + "learning_rate": 4.830330035830259e-06, + "loss": 0.497, + "step": 11709 + }, + { + "epoch": 0.7130895472398989, + "grad_norm": 0.9732175381871365, + "learning_rate": 4.830301139095997e-06, + "loss": 0.4045, + "step": 11710 + }, + { + "epoch": 0.7131504430167768, + "grad_norm": 0.9744428701047677, + "learning_rate": 4.8302722399876725e-06, + "loss": 0.3848, + "step": 11711 + }, + { + "epoch": 0.7132113387936546, + "grad_norm": 1.0284269646156845, + "learning_rate": 4.830243338505316e-06, + "loss": 0.4663, + "step": 11712 + }, + { + "epoch": 0.7132722345705326, + "grad_norm": 0.9896821197234854, + "learning_rate": 4.8302144346489556e-06, + "loss": 0.4913, + "step": 11713 + }, + { + "epoch": 0.7133331303474104, + "grad_norm": 0.930627416918415, + "learning_rate": 4.8301855284186206e-06, + "loss": 0.4048, + "step": 11714 + }, + { + "epoch": 0.7133940261242883, + "grad_norm": 0.9693421497627284, + "learning_rate": 4.830156619814341e-06, + "loss": 0.4572, + "step": 11715 + }, + { + "epoch": 0.7134549219011661, + "grad_norm": 0.9927473917436499, + "learning_rate": 4.830127708836147e-06, + "loss": 0.3829, + "step": 11716 + }, + { + "epoch": 0.7135158176780441, + "grad_norm": 1.1334838402406675, + "learning_rate": 4.830098795484067e-06, + "loss": 0.3614, + "step": 11717 + }, + { + "epoch": 0.7135767134549219, + "grad_norm": 1.0305240446730524, + "learning_rate": 4.830069879758131e-06, + "loss": 0.4425, + "step": 11718 + }, + { + "epoch": 0.7136376092317998, + "grad_norm": 1.0277970169676076, + "learning_rate": 4.8300409616583675e-06, + "loss": 0.4148, + "step": 11719 + }, + { + "epoch": 0.7136985050086776, + "grad_norm": 0.950095572765355, + "learning_rate": 4.830012041184808e-06, + "loss": 0.4786, + "step": 11720 + }, + { + "epoch": 0.7137594007855556, + "grad_norm": 1.0495868073349228, + "learning_rate": 4.829983118337479e-06, + "loss": 0.4211, + "step": 11721 + }, + { + "epoch": 0.7138202965624334, + "grad_norm": 1.060132610991573, + "learning_rate": 4.8299541931164125e-06, + "loss": 0.4117, + "step": 11722 + }, + { + "epoch": 0.7138811923393112, + "grad_norm": 0.9783561284863928, + "learning_rate": 4.829925265521637e-06, + "loss": 0.4629, + "step": 11723 + }, + { + "epoch": 0.7139420881161891, + "grad_norm": 1.0690543673740276, + "learning_rate": 4.829896335553183e-06, + "loss": 0.3931, + "step": 11724 + }, + { + "epoch": 0.714002983893067, + "grad_norm": 1.1472488785964106, + "learning_rate": 4.829867403211078e-06, + "loss": 0.3954, + "step": 11725 + }, + { + "epoch": 0.7140638796699449, + "grad_norm": 1.0958146832648181, + "learning_rate": 4.8298384684953534e-06, + "loss": 0.4284, + "step": 11726 + }, + { + "epoch": 0.7141247754468227, + "grad_norm": 0.9259942758446917, + "learning_rate": 4.829809531406037e-06, + "loss": 0.4665, + "step": 11727 + }, + { + "epoch": 0.7141856712237007, + "grad_norm": 0.9641763685239386, + "learning_rate": 4.82978059194316e-06, + "loss": 0.4057, + "step": 11728 + }, + { + "epoch": 0.7142465670005785, + "grad_norm": 0.9768519009718721, + "learning_rate": 4.8297516501067495e-06, + "loss": 0.4774, + "step": 11729 + }, + { + "epoch": 0.7143074627774564, + "grad_norm": 0.998764332929245, + "learning_rate": 4.829722705896838e-06, + "loss": 0.4117, + "step": 11730 + }, + { + "epoch": 0.7143683585543342, + "grad_norm": 1.0532673859810127, + "learning_rate": 4.829693759313452e-06, + "loss": 0.5365, + "step": 11731 + }, + { + "epoch": 0.7144292543312122, + "grad_norm": 1.0360018966903741, + "learning_rate": 4.8296648103566235e-06, + "loss": 0.3922, + "step": 11732 + }, + { + "epoch": 0.71449015010809, + "grad_norm": 0.8963947244573238, + "learning_rate": 4.82963585902638e-06, + "loss": 0.5312, + "step": 11733 + }, + { + "epoch": 0.7145510458849679, + "grad_norm": 1.0258578027960725, + "learning_rate": 4.829606905322753e-06, + "loss": 0.4262, + "step": 11734 + }, + { + "epoch": 0.7146119416618457, + "grad_norm": 1.0676994405179683, + "learning_rate": 4.829577949245771e-06, + "loss": 0.3794, + "step": 11735 + }, + { + "epoch": 0.7146728374387237, + "grad_norm": 0.9603674888842298, + "learning_rate": 4.8295489907954615e-06, + "loss": 0.4599, + "step": 11736 + }, + { + "epoch": 0.7147337332156015, + "grad_norm": 1.020917586219788, + "learning_rate": 4.829520029971858e-06, + "loss": 0.4417, + "step": 11737 + }, + { + "epoch": 0.7147946289924794, + "grad_norm": 1.0297231020354902, + "learning_rate": 4.829491066774987e-06, + "loss": 0.4878, + "step": 11738 + }, + { + "epoch": 0.7148555247693572, + "grad_norm": 0.9820015952413239, + "learning_rate": 4.829462101204879e-06, + "loss": 0.3889, + "step": 11739 + }, + { + "epoch": 0.7149164205462352, + "grad_norm": 0.9553652232447319, + "learning_rate": 4.8294331332615635e-06, + "loss": 0.4274, + "step": 11740 + }, + { + "epoch": 0.714977316323113, + "grad_norm": 1.0079160902171571, + "learning_rate": 4.829404162945071e-06, + "loss": 0.445, + "step": 11741 + }, + { + "epoch": 0.7150382120999909, + "grad_norm": 0.9598702667906507, + "learning_rate": 4.829375190255428e-06, + "loss": 0.4037, + "step": 11742 + }, + { + "epoch": 0.7150991078768687, + "grad_norm": 0.9143868338199806, + "learning_rate": 4.829346215192667e-06, + "loss": 0.5236, + "step": 11743 + }, + { + "epoch": 0.7151600036537467, + "grad_norm": 0.9733407795177734, + "learning_rate": 4.8293172377568166e-06, + "loss": 0.4431, + "step": 11744 + }, + { + "epoch": 0.7152208994306245, + "grad_norm": 0.9972160563110352, + "learning_rate": 4.829288257947906e-06, + "loss": 0.4348, + "step": 11745 + }, + { + "epoch": 0.7152817952075023, + "grad_norm": 0.9587069826947798, + "learning_rate": 4.829259275765965e-06, + "loss": 0.5559, + "step": 11746 + }, + { + "epoch": 0.7153426909843802, + "grad_norm": 0.9473968378386369, + "learning_rate": 4.829230291211023e-06, + "loss": 0.4137, + "step": 11747 + }, + { + "epoch": 0.7154035867612581, + "grad_norm": 0.9860750340767941, + "learning_rate": 4.82920130428311e-06, + "loss": 0.3765, + "step": 11748 + }, + { + "epoch": 0.715464482538136, + "grad_norm": 1.0037142497623661, + "learning_rate": 4.8291723149822546e-06, + "loss": 0.3768, + "step": 11749 + }, + { + "epoch": 0.7155253783150138, + "grad_norm": 1.0081409492163733, + "learning_rate": 4.8291433233084875e-06, + "loss": 0.3881, + "step": 11750 + }, + { + "epoch": 0.7155862740918917, + "grad_norm": 0.9887092442324034, + "learning_rate": 4.829114329261837e-06, + "loss": 0.4867, + "step": 11751 + }, + { + "epoch": 0.7156471698687696, + "grad_norm": 1.02746419174354, + "learning_rate": 4.829085332842333e-06, + "loss": 0.3645, + "step": 11752 + }, + { + "epoch": 0.7157080656456475, + "grad_norm": 0.9259031038368843, + "learning_rate": 4.8290563340500066e-06, + "loss": 0.4879, + "step": 11753 + }, + { + "epoch": 0.7157689614225253, + "grad_norm": 0.9821681583728322, + "learning_rate": 4.829027332884885e-06, + "loss": 0.4961, + "step": 11754 + }, + { + "epoch": 0.7158298571994032, + "grad_norm": 0.9715976117593521, + "learning_rate": 4.828998329346999e-06, + "loss": 0.4788, + "step": 11755 + }, + { + "epoch": 0.7158907529762811, + "grad_norm": 1.1102758003988338, + "learning_rate": 4.8289693234363775e-06, + "loss": 0.3816, + "step": 11756 + }, + { + "epoch": 0.715951648753159, + "grad_norm": 0.9956478201908447, + "learning_rate": 4.8289403151530515e-06, + "loss": 0.4338, + "step": 11757 + }, + { + "epoch": 0.7160125445300368, + "grad_norm": 1.034753398310687, + "learning_rate": 4.828911304497048e-06, + "loss": 0.4077, + "step": 11758 + }, + { + "epoch": 0.7160734403069147, + "grad_norm": 1.0765610158365777, + "learning_rate": 4.8288822914684e-06, + "loss": 0.4608, + "step": 11759 + }, + { + "epoch": 0.7161343360837926, + "grad_norm": 0.9023519620506719, + "learning_rate": 4.828853276067133e-06, + "loss": 0.5212, + "step": 11760 + }, + { + "epoch": 0.7161952318606705, + "grad_norm": 1.051486515161748, + "learning_rate": 4.82882425829328e-06, + "loss": 0.4369, + "step": 11761 + }, + { + "epoch": 0.7162561276375483, + "grad_norm": 0.9729547416105265, + "learning_rate": 4.82879523814687e-06, + "loss": 0.4746, + "step": 11762 + }, + { + "epoch": 0.7163170234144262, + "grad_norm": 0.9797967220647058, + "learning_rate": 4.82876621562793e-06, + "loss": 0.4473, + "step": 11763 + }, + { + "epoch": 0.7163779191913041, + "grad_norm": 1.1236264803399807, + "learning_rate": 4.828737190736493e-06, + "loss": 0.4235, + "step": 11764 + }, + { + "epoch": 0.716438814968182, + "grad_norm": 1.005219963346824, + "learning_rate": 4.828708163472586e-06, + "loss": 0.3922, + "step": 11765 + }, + { + "epoch": 0.7164997107450598, + "grad_norm": 1.0736163549043922, + "learning_rate": 4.82867913383624e-06, + "loss": 0.406, + "step": 11766 + }, + { + "epoch": 0.7165606065219376, + "grad_norm": 1.0276643154289558, + "learning_rate": 4.828650101827484e-06, + "loss": 0.3751, + "step": 11767 + }, + { + "epoch": 0.7166215022988156, + "grad_norm": 0.9677439378424713, + "learning_rate": 4.8286210674463475e-06, + "loss": 0.403, + "step": 11768 + }, + { + "epoch": 0.7166823980756934, + "grad_norm": 1.0471078602924335, + "learning_rate": 4.82859203069286e-06, + "loss": 0.4429, + "step": 11769 + }, + { + "epoch": 0.7167432938525713, + "grad_norm": 0.9980610237891964, + "learning_rate": 4.828562991567052e-06, + "loss": 0.4482, + "step": 11770 + }, + { + "epoch": 0.7168041896294493, + "grad_norm": 1.0003062331060173, + "learning_rate": 4.828533950068952e-06, + "loss": 0.427, + "step": 11771 + }, + { + "epoch": 0.7168650854063271, + "grad_norm": 1.0068978603627419, + "learning_rate": 4.82850490619859e-06, + "loss": 0.4473, + "step": 11772 + }, + { + "epoch": 0.7169259811832049, + "grad_norm": 0.9928299045554683, + "learning_rate": 4.828475859955996e-06, + "loss": 0.4782, + "step": 11773 + }, + { + "epoch": 0.7169868769600828, + "grad_norm": 0.9014837018230694, + "learning_rate": 4.828446811341199e-06, + "loss": 0.3956, + "step": 11774 + }, + { + "epoch": 0.7170477727369607, + "grad_norm": 1.1104095971015011, + "learning_rate": 4.8284177603542295e-06, + "loss": 0.4192, + "step": 11775 + }, + { + "epoch": 0.7171086685138386, + "grad_norm": 0.9526365921680005, + "learning_rate": 4.828388706995115e-06, + "loss": 0.3956, + "step": 11776 + }, + { + "epoch": 0.7171695642907164, + "grad_norm": 0.9634802685535866, + "learning_rate": 4.828359651263888e-06, + "loss": 0.4405, + "step": 11777 + }, + { + "epoch": 0.7172304600675943, + "grad_norm": 0.915223023068366, + "learning_rate": 4.828330593160575e-06, + "loss": 0.5035, + "step": 11778 + }, + { + "epoch": 0.7172913558444722, + "grad_norm": 1.0162303629209009, + "learning_rate": 4.828301532685208e-06, + "loss": 0.4231, + "step": 11779 + }, + { + "epoch": 0.7173522516213501, + "grad_norm": 0.9453872680852735, + "learning_rate": 4.8282724698378155e-06, + "loss": 0.4372, + "step": 11780 + }, + { + "epoch": 0.7174131473982279, + "grad_norm": 1.0222210059189947, + "learning_rate": 4.828243404618428e-06, + "loss": 0.428, + "step": 11781 + }, + { + "epoch": 0.7174740431751058, + "grad_norm": 0.9420441301567557, + "learning_rate": 4.828214337027074e-06, + "loss": 0.4121, + "step": 11782 + }, + { + "epoch": 0.7175349389519837, + "grad_norm": 0.9778107784245841, + "learning_rate": 4.828185267063784e-06, + "loss": 0.4416, + "step": 11783 + }, + { + "epoch": 0.7175958347288616, + "grad_norm": 1.0200915438318312, + "learning_rate": 4.828156194728587e-06, + "loss": 0.4659, + "step": 11784 + }, + { + "epoch": 0.7176567305057394, + "grad_norm": 1.0483481838381041, + "learning_rate": 4.828127120021513e-06, + "loss": 0.3789, + "step": 11785 + }, + { + "epoch": 0.7177176262826173, + "grad_norm": 1.089808503230547, + "learning_rate": 4.828098042942592e-06, + "loss": 0.397, + "step": 11786 + }, + { + "epoch": 0.7177785220594952, + "grad_norm": 0.9987970176753054, + "learning_rate": 4.828068963491852e-06, + "loss": 0.419, + "step": 11787 + }, + { + "epoch": 0.7178394178363731, + "grad_norm": 0.9782190278751962, + "learning_rate": 4.828039881669325e-06, + "loss": 0.4062, + "step": 11788 + }, + { + "epoch": 0.7179003136132509, + "grad_norm": 1.0343790783580715, + "learning_rate": 4.828010797475038e-06, + "loss": 0.3973, + "step": 11789 + }, + { + "epoch": 0.7179612093901288, + "grad_norm": 1.047176493710062, + "learning_rate": 4.827981710909023e-06, + "loss": 0.4447, + "step": 11790 + }, + { + "epoch": 0.7180221051670067, + "grad_norm": 1.0679042191676564, + "learning_rate": 4.827952621971309e-06, + "loss": 0.4592, + "step": 11791 + }, + { + "epoch": 0.7180830009438846, + "grad_norm": 1.0040756035842613, + "learning_rate": 4.8279235306619245e-06, + "loss": 0.4308, + "step": 11792 + }, + { + "epoch": 0.7181438967207624, + "grad_norm": 1.0286913905986093, + "learning_rate": 4.8278944369809e-06, + "loss": 0.3967, + "step": 11793 + }, + { + "epoch": 0.7182047924976402, + "grad_norm": 1.0823636820854983, + "learning_rate": 4.827865340928265e-06, + "loss": 0.3401, + "step": 11794 + }, + { + "epoch": 0.7182656882745182, + "grad_norm": 1.0156799435993655, + "learning_rate": 4.8278362425040495e-06, + "loss": 0.4367, + "step": 11795 + }, + { + "epoch": 0.718326584051396, + "grad_norm": 0.9475407313971973, + "learning_rate": 4.827807141708282e-06, + "loss": 0.4235, + "step": 11796 + }, + { + "epoch": 0.7183874798282739, + "grad_norm": 1.0529652456590535, + "learning_rate": 4.827778038540993e-06, + "loss": 0.3197, + "step": 11797 + }, + { + "epoch": 0.7184483756051517, + "grad_norm": 1.0359141670185719, + "learning_rate": 4.827748933002213e-06, + "loss": 0.3974, + "step": 11798 + }, + { + "epoch": 0.7185092713820297, + "grad_norm": 0.9444886464282132, + "learning_rate": 4.827719825091971e-06, + "loss": 0.4616, + "step": 11799 + }, + { + "epoch": 0.7185701671589075, + "grad_norm": 1.0722858309169112, + "learning_rate": 4.827690714810296e-06, + "loss": 0.4406, + "step": 11800 + }, + { + "epoch": 0.7186310629357854, + "grad_norm": 1.0369155356205877, + "learning_rate": 4.827661602157218e-06, + "loss": 0.449, + "step": 11801 + }, + { + "epoch": 0.7186919587126632, + "grad_norm": 0.9705328155517297, + "learning_rate": 4.8276324871327664e-06, + "loss": 0.4431, + "step": 11802 + }, + { + "epoch": 0.7187528544895412, + "grad_norm": 0.9470376441510959, + "learning_rate": 4.827603369736972e-06, + "loss": 0.4856, + "step": 11803 + }, + { + "epoch": 0.718813750266419, + "grad_norm": 1.0259982806068388, + "learning_rate": 4.827574249969863e-06, + "loss": 0.383, + "step": 11804 + }, + { + "epoch": 0.7188746460432969, + "grad_norm": 1.1476563493895104, + "learning_rate": 4.82754512783147e-06, + "loss": 0.3762, + "step": 11805 + }, + { + "epoch": 0.7189355418201747, + "grad_norm": 1.0273197794602482, + "learning_rate": 4.827516003321823e-06, + "loss": 0.4168, + "step": 11806 + }, + { + "epoch": 0.7189964375970527, + "grad_norm": 0.9587689887503626, + "learning_rate": 4.82748687644095e-06, + "loss": 0.3868, + "step": 11807 + }, + { + "epoch": 0.7190573333739305, + "grad_norm": 0.9541120730042973, + "learning_rate": 4.8274577471888825e-06, + "loss": 0.4366, + "step": 11808 + }, + { + "epoch": 0.7191182291508084, + "grad_norm": 1.0802514658248699, + "learning_rate": 4.827428615565649e-06, + "loss": 0.3977, + "step": 11809 + }, + { + "epoch": 0.7191791249276863, + "grad_norm": 1.0292504926402102, + "learning_rate": 4.8273994815712795e-06, + "loss": 0.4022, + "step": 11810 + }, + { + "epoch": 0.7192400207045642, + "grad_norm": 1.1188849811761565, + "learning_rate": 4.827370345205804e-06, + "loss": 0.3432, + "step": 11811 + }, + { + "epoch": 0.719300916481442, + "grad_norm": 1.0382884428764458, + "learning_rate": 4.827341206469253e-06, + "loss": 0.409, + "step": 11812 + }, + { + "epoch": 0.7193618122583199, + "grad_norm": 0.9443184375907074, + "learning_rate": 4.827312065361654e-06, + "loss": 0.5082, + "step": 11813 + }, + { + "epoch": 0.7194227080351978, + "grad_norm": 1.0149599367312607, + "learning_rate": 4.827282921883038e-06, + "loss": 0.3806, + "step": 11814 + }, + { + "epoch": 0.7194836038120757, + "grad_norm": 1.0468153501833901, + "learning_rate": 4.8272537760334345e-06, + "loss": 0.4358, + "step": 11815 + }, + { + "epoch": 0.7195444995889535, + "grad_norm": 0.9726841643101438, + "learning_rate": 4.827224627812873e-06, + "loss": 0.3828, + "step": 11816 + }, + { + "epoch": 0.7196053953658313, + "grad_norm": 0.9380914196830467, + "learning_rate": 4.8271954772213845e-06, + "loss": 0.4018, + "step": 11817 + }, + { + "epoch": 0.7196662911427093, + "grad_norm": 0.9777844004070382, + "learning_rate": 4.827166324258997e-06, + "loss": 0.4602, + "step": 11818 + }, + { + "epoch": 0.7197271869195871, + "grad_norm": 0.9795705023998904, + "learning_rate": 4.827137168925741e-06, + "loss": 0.4213, + "step": 11819 + }, + { + "epoch": 0.719788082696465, + "grad_norm": 1.0325320485632017, + "learning_rate": 4.827108011221645e-06, + "loss": 0.4326, + "step": 11820 + }, + { + "epoch": 0.7198489784733428, + "grad_norm": 1.0183587547037154, + "learning_rate": 4.827078851146741e-06, + "loss": 0.4924, + "step": 11821 + }, + { + "epoch": 0.7199098742502208, + "grad_norm": 1.009695329061626, + "learning_rate": 4.827049688701057e-06, + "loss": 0.4716, + "step": 11822 + }, + { + "epoch": 0.7199707700270986, + "grad_norm": 1.0378815938786188, + "learning_rate": 4.827020523884623e-06, + "loss": 0.4772, + "step": 11823 + }, + { + "epoch": 0.7200316658039765, + "grad_norm": 0.9805963434811339, + "learning_rate": 4.826991356697469e-06, + "loss": 0.4817, + "step": 11824 + }, + { + "epoch": 0.7200925615808543, + "grad_norm": 1.0380179118156336, + "learning_rate": 4.826962187139625e-06, + "loss": 0.3346, + "step": 11825 + }, + { + "epoch": 0.7201534573577323, + "grad_norm": 0.9126604776266244, + "learning_rate": 4.82693301521112e-06, + "loss": 0.4566, + "step": 11826 + }, + { + "epoch": 0.7202143531346101, + "grad_norm": 1.010577144430645, + "learning_rate": 4.826903840911984e-06, + "loss": 0.4508, + "step": 11827 + }, + { + "epoch": 0.720275248911488, + "grad_norm": 0.994951963359578, + "learning_rate": 4.826874664242246e-06, + "loss": 0.3656, + "step": 11828 + }, + { + "epoch": 0.7203361446883658, + "grad_norm": 1.0199330809497111, + "learning_rate": 4.8268454852019376e-06, + "loss": 0.4554, + "step": 11829 + }, + { + "epoch": 0.7203970404652438, + "grad_norm": 0.925808131715066, + "learning_rate": 4.826816303791087e-06, + "loss": 0.4225, + "step": 11830 + }, + { + "epoch": 0.7204579362421216, + "grad_norm": 0.9455795294956787, + "learning_rate": 4.826787120009725e-06, + "loss": 0.4869, + "step": 11831 + }, + { + "epoch": 0.7205188320189995, + "grad_norm": 1.0722972792976844, + "learning_rate": 4.82675793385788e-06, + "loss": 0.4238, + "step": 11832 + }, + { + "epoch": 0.7205797277958773, + "grad_norm": 1.051796553540098, + "learning_rate": 4.826728745335581e-06, + "loss": 0.3602, + "step": 11833 + }, + { + "epoch": 0.7206406235727553, + "grad_norm": 1.1265752906334203, + "learning_rate": 4.826699554442861e-06, + "loss": 0.5233, + "step": 11834 + }, + { + "epoch": 0.7207015193496331, + "grad_norm": 0.9494942823021382, + "learning_rate": 4.8266703611797474e-06, + "loss": 0.4359, + "step": 11835 + }, + { + "epoch": 0.720762415126511, + "grad_norm": 1.009930760817381, + "learning_rate": 4.82664116554627e-06, + "loss": 0.4707, + "step": 11836 + }, + { + "epoch": 0.7208233109033888, + "grad_norm": 1.0921767155833768, + "learning_rate": 4.82661196754246e-06, + "loss": 0.3843, + "step": 11837 + }, + { + "epoch": 0.7208842066802668, + "grad_norm": 1.0106056147346578, + "learning_rate": 4.826582767168345e-06, + "loss": 0.4353, + "step": 11838 + }, + { + "epoch": 0.7209451024571446, + "grad_norm": 1.103776872381024, + "learning_rate": 4.826553564423956e-06, + "loss": 0.3829, + "step": 11839 + }, + { + "epoch": 0.7210059982340224, + "grad_norm": 1.0545782645965567, + "learning_rate": 4.826524359309323e-06, + "loss": 0.5038, + "step": 11840 + }, + { + "epoch": 0.7210668940109003, + "grad_norm": 0.9156878937987778, + "learning_rate": 4.826495151824475e-06, + "loss": 0.3874, + "step": 11841 + }, + { + "epoch": 0.7211277897877783, + "grad_norm": 0.9256594597169081, + "learning_rate": 4.826465941969442e-06, + "loss": 0.4783, + "step": 11842 + }, + { + "epoch": 0.7211886855646561, + "grad_norm": 1.0731106568940747, + "learning_rate": 4.826436729744255e-06, + "loss": 0.3745, + "step": 11843 + }, + { + "epoch": 0.7212495813415339, + "grad_norm": 1.0897770595422245, + "learning_rate": 4.826407515148942e-06, + "loss": 0.3952, + "step": 11844 + }, + { + "epoch": 0.7213104771184118, + "grad_norm": 1.019281830059066, + "learning_rate": 4.8263782981835325e-06, + "loss": 0.4536, + "step": 11845 + }, + { + "epoch": 0.7213713728952897, + "grad_norm": 0.9842570026791093, + "learning_rate": 4.826349078848058e-06, + "loss": 0.4601, + "step": 11846 + }, + { + "epoch": 0.7214322686721676, + "grad_norm": 1.008126906298017, + "learning_rate": 4.826319857142547e-06, + "loss": 0.4429, + "step": 11847 + }, + { + "epoch": 0.7214931644490454, + "grad_norm": 0.9818334416230893, + "learning_rate": 4.82629063306703e-06, + "loss": 0.4007, + "step": 11848 + }, + { + "epoch": 0.7215540602259233, + "grad_norm": 0.9301085934379173, + "learning_rate": 4.8262614066215365e-06, + "loss": 0.471, + "step": 11849 + }, + { + "epoch": 0.7216149560028012, + "grad_norm": 0.91349594098339, + "learning_rate": 4.8262321778060965e-06, + "loss": 0.4245, + "step": 11850 + }, + { + "epoch": 0.7216758517796791, + "grad_norm": 0.9412203111455544, + "learning_rate": 4.8262029466207396e-06, + "loss": 0.4276, + "step": 11851 + }, + { + "epoch": 0.7217367475565569, + "grad_norm": 0.9603506981765906, + "learning_rate": 4.8261737130654954e-06, + "loss": 0.4705, + "step": 11852 + }, + { + "epoch": 0.7217976433334349, + "grad_norm": 0.9981577274424864, + "learning_rate": 4.826144477140393e-06, + "loss": 0.424, + "step": 11853 + }, + { + "epoch": 0.7218585391103127, + "grad_norm": 1.0204844246522182, + "learning_rate": 4.826115238845463e-06, + "loss": 0.3385, + "step": 11854 + }, + { + "epoch": 0.7219194348871906, + "grad_norm": 0.9775623358669062, + "learning_rate": 4.826085998180736e-06, + "loss": 0.4112, + "step": 11855 + }, + { + "epoch": 0.7219803306640684, + "grad_norm": 1.0614103856252375, + "learning_rate": 4.8260567551462404e-06, + "loss": 0.454, + "step": 11856 + }, + { + "epoch": 0.7220412264409464, + "grad_norm": 1.00395074077946, + "learning_rate": 4.8260275097420066e-06, + "loss": 0.4309, + "step": 11857 + }, + { + "epoch": 0.7221021222178242, + "grad_norm": 1.0986175154507325, + "learning_rate": 4.825998261968064e-06, + "loss": 0.4926, + "step": 11858 + }, + { + "epoch": 0.7221630179947021, + "grad_norm": 0.9899064407347183, + "learning_rate": 4.825969011824444e-06, + "loss": 0.4051, + "step": 11859 + }, + { + "epoch": 0.7222239137715799, + "grad_norm": 0.9951349248435766, + "learning_rate": 4.825939759311174e-06, + "loss": 0.4271, + "step": 11860 + }, + { + "epoch": 0.7222848095484579, + "grad_norm": 0.9606713202151921, + "learning_rate": 4.825910504428285e-06, + "loss": 0.5056, + "step": 11861 + }, + { + "epoch": 0.7223457053253357, + "grad_norm": 1.0622777036227589, + "learning_rate": 4.825881247175807e-06, + "loss": 0.3193, + "step": 11862 + }, + { + "epoch": 0.7224066011022136, + "grad_norm": 0.9177672403022674, + "learning_rate": 4.825851987553769e-06, + "loss": 0.4381, + "step": 11863 + }, + { + "epoch": 0.7224674968790914, + "grad_norm": 1.1991537386910225, + "learning_rate": 4.825822725562203e-06, + "loss": 0.3448, + "step": 11864 + }, + { + "epoch": 0.7225283926559694, + "grad_norm": 0.9947887728377144, + "learning_rate": 4.825793461201136e-06, + "loss": 0.463, + "step": 11865 + }, + { + "epoch": 0.7225892884328472, + "grad_norm": 0.9885236848812046, + "learning_rate": 4.825764194470599e-06, + "loss": 0.4433, + "step": 11866 + }, + { + "epoch": 0.722650184209725, + "grad_norm": 0.9392812632686315, + "learning_rate": 4.825734925370621e-06, + "loss": 0.4648, + "step": 11867 + }, + { + "epoch": 0.7227110799866029, + "grad_norm": 1.1333399975671017, + "learning_rate": 4.825705653901234e-06, + "loss": 0.3658, + "step": 11868 + }, + { + "epoch": 0.7227719757634808, + "grad_norm": 1.0405899225181496, + "learning_rate": 4.825676380062465e-06, + "loss": 0.3977, + "step": 11869 + }, + { + "epoch": 0.7228328715403587, + "grad_norm": 0.9614422504836262, + "learning_rate": 4.8256471038543465e-06, + "loss": 0.4976, + "step": 11870 + }, + { + "epoch": 0.7228937673172365, + "grad_norm": 1.00370068945994, + "learning_rate": 4.825617825276907e-06, + "loss": 0.3464, + "step": 11871 + }, + { + "epoch": 0.7229546630941144, + "grad_norm": 1.1612430667318658, + "learning_rate": 4.825588544330175e-06, + "loss": 0.4223, + "step": 11872 + }, + { + "epoch": 0.7230155588709923, + "grad_norm": 1.0067501755317505, + "learning_rate": 4.825559261014183e-06, + "loss": 0.4152, + "step": 11873 + }, + { + "epoch": 0.7230764546478702, + "grad_norm": 0.9703471407517762, + "learning_rate": 4.8255299753289594e-06, + "loss": 0.4342, + "step": 11874 + }, + { + "epoch": 0.723137350424748, + "grad_norm": 0.998158739434957, + "learning_rate": 4.825500687274535e-06, + "loss": 0.44, + "step": 11875 + }, + { + "epoch": 0.7231982462016259, + "grad_norm": 1.0760577043580213, + "learning_rate": 4.825471396850938e-06, + "loss": 0.4215, + "step": 11876 + }, + { + "epoch": 0.7232591419785038, + "grad_norm": 1.1023016701483817, + "learning_rate": 4.825442104058199e-06, + "loss": 0.3955, + "step": 11877 + }, + { + "epoch": 0.7233200377553817, + "grad_norm": 1.083910742321289, + "learning_rate": 4.825412808896348e-06, + "loss": 0.481, + "step": 11878 + }, + { + "epoch": 0.7233809335322595, + "grad_norm": 1.0545826567462377, + "learning_rate": 4.825383511365415e-06, + "loss": 0.3828, + "step": 11879 + }, + { + "epoch": 0.7234418293091374, + "grad_norm": 1.1247152232109814, + "learning_rate": 4.82535421146543e-06, + "loss": 0.3415, + "step": 11880 + }, + { + "epoch": 0.7235027250860153, + "grad_norm": 1.0305143936319492, + "learning_rate": 4.825324909196422e-06, + "loss": 0.46, + "step": 11881 + }, + { + "epoch": 0.7235636208628932, + "grad_norm": 1.0858280026564187, + "learning_rate": 4.825295604558421e-06, + "loss": 0.3747, + "step": 11882 + }, + { + "epoch": 0.723624516639771, + "grad_norm": 1.052305178267875, + "learning_rate": 4.825266297551457e-06, + "loss": 0.3753, + "step": 11883 + }, + { + "epoch": 0.7236854124166489, + "grad_norm": 0.9866489346669776, + "learning_rate": 4.82523698817556e-06, + "loss": 0.4081, + "step": 11884 + }, + { + "epoch": 0.7237463081935268, + "grad_norm": 1.1088641403291803, + "learning_rate": 4.825207676430761e-06, + "loss": 0.3247, + "step": 11885 + }, + { + "epoch": 0.7238072039704047, + "grad_norm": 0.9453208918184867, + "learning_rate": 4.825178362317088e-06, + "loss": 0.4289, + "step": 11886 + }, + { + "epoch": 0.7238680997472825, + "grad_norm": 1.0198068242955614, + "learning_rate": 4.825149045834572e-06, + "loss": 0.3737, + "step": 11887 + }, + { + "epoch": 0.7239289955241603, + "grad_norm": 0.944446975549897, + "learning_rate": 4.825119726983241e-06, + "loss": 0.4653, + "step": 11888 + }, + { + "epoch": 0.7239898913010383, + "grad_norm": 1.0236773401555772, + "learning_rate": 4.825090405763129e-06, + "loss": 0.3918, + "step": 11889 + }, + { + "epoch": 0.7240507870779161, + "grad_norm": 1.150364636860069, + "learning_rate": 4.825061082174262e-06, + "loss": 0.4935, + "step": 11890 + }, + { + "epoch": 0.724111682854794, + "grad_norm": 0.9238322468652062, + "learning_rate": 4.82503175621667e-06, + "loss": 0.3851, + "step": 11891 + }, + { + "epoch": 0.724172578631672, + "grad_norm": 0.9601477109249512, + "learning_rate": 4.825002427890385e-06, + "loss": 0.4304, + "step": 11892 + }, + { + "epoch": 0.7242334744085498, + "grad_norm": 0.9312143563367804, + "learning_rate": 4.8249730971954364e-06, + "loss": 0.4653, + "step": 11893 + }, + { + "epoch": 0.7242943701854276, + "grad_norm": 0.9819347858212228, + "learning_rate": 4.824943764131853e-06, + "loss": 0.3819, + "step": 11894 + }, + { + "epoch": 0.7243552659623055, + "grad_norm": 1.0027703128743302, + "learning_rate": 4.824914428699665e-06, + "loss": 0.4487, + "step": 11895 + }, + { + "epoch": 0.7244161617391834, + "grad_norm": 1.0349207442489745, + "learning_rate": 4.824885090898903e-06, + "loss": 0.427, + "step": 11896 + }, + { + "epoch": 0.7244770575160613, + "grad_norm": 0.9433122798659715, + "learning_rate": 4.824855750729596e-06, + "loss": 0.4429, + "step": 11897 + }, + { + "epoch": 0.7245379532929391, + "grad_norm": 1.0665253522783584, + "learning_rate": 4.824826408191774e-06, + "loss": 0.4292, + "step": 11898 + }, + { + "epoch": 0.724598849069817, + "grad_norm": 0.9493631256839333, + "learning_rate": 4.824797063285468e-06, + "loss": 0.4325, + "step": 11899 + }, + { + "epoch": 0.7246597448466949, + "grad_norm": 0.9834817920108143, + "learning_rate": 4.824767716010707e-06, + "loss": 0.4304, + "step": 11900 + }, + { + "epoch": 0.7247206406235728, + "grad_norm": 1.0421497789114849, + "learning_rate": 4.82473836636752e-06, + "loss": 0.3499, + "step": 11901 + }, + { + "epoch": 0.7247815364004506, + "grad_norm": 1.0526421956427343, + "learning_rate": 4.824709014355939e-06, + "loss": 0.4162, + "step": 11902 + }, + { + "epoch": 0.7248424321773285, + "grad_norm": 1.073062565843298, + "learning_rate": 4.824679659975992e-06, + "loss": 0.4296, + "step": 11903 + }, + { + "epoch": 0.7249033279542064, + "grad_norm": 1.0988277211205375, + "learning_rate": 4.82465030322771e-06, + "loss": 0.3618, + "step": 11904 + }, + { + "epoch": 0.7249642237310843, + "grad_norm": 0.9998073310613875, + "learning_rate": 4.824620944111122e-06, + "loss": 0.4281, + "step": 11905 + }, + { + "epoch": 0.7250251195079621, + "grad_norm": 0.9751645034477902, + "learning_rate": 4.82459158262626e-06, + "loss": 0.4099, + "step": 11906 + }, + { + "epoch": 0.72508601528484, + "grad_norm": 0.9445381429660272, + "learning_rate": 4.824562218773152e-06, + "loss": 0.4979, + "step": 11907 + }, + { + "epoch": 0.7251469110617179, + "grad_norm": 1.0350889214146917, + "learning_rate": 4.824532852551828e-06, + "loss": 0.4028, + "step": 11908 + }, + { + "epoch": 0.7252078068385958, + "grad_norm": 1.074229397149009, + "learning_rate": 4.824503483962318e-06, + "loss": 0.435, + "step": 11909 + }, + { + "epoch": 0.7252687026154736, + "grad_norm": 0.9848645925568652, + "learning_rate": 4.8244741130046525e-06, + "loss": 0.4803, + "step": 11910 + }, + { + "epoch": 0.7253295983923514, + "grad_norm": 1.0026836519630018, + "learning_rate": 4.824444739678861e-06, + "loss": 0.4012, + "step": 11911 + }, + { + "epoch": 0.7253904941692294, + "grad_norm": 1.0119818455119052, + "learning_rate": 4.824415363984973e-06, + "loss": 0.341, + "step": 11912 + }, + { + "epoch": 0.7254513899461073, + "grad_norm": 0.9184923704292113, + "learning_rate": 4.824385985923019e-06, + "loss": 0.4691, + "step": 11913 + }, + { + "epoch": 0.7255122857229851, + "grad_norm": 1.0429043425514954, + "learning_rate": 4.82435660549303e-06, + "loss": 0.4112, + "step": 11914 + }, + { + "epoch": 0.7255731814998629, + "grad_norm": 0.9808493953779334, + "learning_rate": 4.824327222695034e-06, + "loss": 0.5003, + "step": 11915 + }, + { + "epoch": 0.7256340772767409, + "grad_norm": 0.9917307945685514, + "learning_rate": 4.824297837529062e-06, + "loss": 0.4416, + "step": 11916 + }, + { + "epoch": 0.7256949730536187, + "grad_norm": 0.9640565695389408, + "learning_rate": 4.824268449995143e-06, + "loss": 0.4247, + "step": 11917 + }, + { + "epoch": 0.7257558688304966, + "grad_norm": 1.0384473200197182, + "learning_rate": 4.8242390600933084e-06, + "loss": 0.3706, + "step": 11918 + }, + { + "epoch": 0.7258167646073744, + "grad_norm": 0.9954518932106681, + "learning_rate": 4.824209667823587e-06, + "loss": 0.4291, + "step": 11919 + }, + { + "epoch": 0.7258776603842524, + "grad_norm": 1.0630226321895193, + "learning_rate": 4.8241802731860095e-06, + "loss": 0.3892, + "step": 11920 + }, + { + "epoch": 0.7259385561611302, + "grad_norm": 0.9541653833376803, + "learning_rate": 4.824150876180606e-06, + "loss": 0.4342, + "step": 11921 + }, + { + "epoch": 0.7259994519380081, + "grad_norm": 1.0696871065082691, + "learning_rate": 4.824121476807405e-06, + "loss": 0.3995, + "step": 11922 + }, + { + "epoch": 0.7260603477148859, + "grad_norm": 0.9914628533216577, + "learning_rate": 4.8240920750664365e-06, + "loss": 0.4182, + "step": 11923 + }, + { + "epoch": 0.7261212434917639, + "grad_norm": 1.0436624199098192, + "learning_rate": 4.824062670957733e-06, + "loss": 0.4271, + "step": 11924 + }, + { + "epoch": 0.7261821392686417, + "grad_norm": 0.989060135922268, + "learning_rate": 4.824033264481322e-06, + "loss": 0.3857, + "step": 11925 + }, + { + "epoch": 0.7262430350455196, + "grad_norm": 0.9461311226298055, + "learning_rate": 4.824003855637234e-06, + "loss": 0.4068, + "step": 11926 + }, + { + "epoch": 0.7263039308223974, + "grad_norm": 0.9503453691206647, + "learning_rate": 4.8239744444255e-06, + "loss": 0.4833, + "step": 11927 + }, + { + "epoch": 0.7263648265992754, + "grad_norm": 0.9737602270361837, + "learning_rate": 4.823945030846149e-06, + "loss": 0.4411, + "step": 11928 + }, + { + "epoch": 0.7264257223761532, + "grad_norm": 1.005842968738042, + "learning_rate": 4.823915614899211e-06, + "loss": 0.4171, + "step": 11929 + }, + { + "epoch": 0.7264866181530311, + "grad_norm": 1.0742941212471138, + "learning_rate": 4.823886196584715e-06, + "loss": 0.4636, + "step": 11930 + }, + { + "epoch": 0.7265475139299089, + "grad_norm": 1.0724880048379293, + "learning_rate": 4.823856775902693e-06, + "loss": 0.3745, + "step": 11931 + }, + { + "epoch": 0.7266084097067869, + "grad_norm": 1.0071401487712863, + "learning_rate": 4.823827352853174e-06, + "loss": 0.3601, + "step": 11932 + }, + { + "epoch": 0.7266693054836647, + "grad_norm": 1.0142497766530318, + "learning_rate": 4.823797927436188e-06, + "loss": 0.4138, + "step": 11933 + }, + { + "epoch": 0.7267302012605426, + "grad_norm": 0.9789725353905481, + "learning_rate": 4.823768499651764e-06, + "loss": 0.4565, + "step": 11934 + }, + { + "epoch": 0.7267910970374205, + "grad_norm": 0.9187324759479093, + "learning_rate": 4.823739069499934e-06, + "loss": 0.449, + "step": 11935 + }, + { + "epoch": 0.7268519928142984, + "grad_norm": 1.0510660911719867, + "learning_rate": 4.823709636980728e-06, + "loss": 0.3942, + "step": 11936 + }, + { + "epoch": 0.7269128885911762, + "grad_norm": 1.0552611919986514, + "learning_rate": 4.823680202094173e-06, + "loss": 0.3879, + "step": 11937 + }, + { + "epoch": 0.726973784368054, + "grad_norm": 1.0060130717633562, + "learning_rate": 4.823650764840302e-06, + "loss": 0.482, + "step": 11938 + }, + { + "epoch": 0.727034680144932, + "grad_norm": 0.9012602974187273, + "learning_rate": 4.8236213252191435e-06, + "loss": 0.4847, + "step": 11939 + }, + { + "epoch": 0.7270955759218098, + "grad_norm": 0.9646788637139624, + "learning_rate": 4.8235918832307286e-06, + "loss": 0.4187, + "step": 11940 + }, + { + "epoch": 0.7271564716986877, + "grad_norm": 1.0375176486445323, + "learning_rate": 4.823562438875086e-06, + "loss": 0.3851, + "step": 11941 + }, + { + "epoch": 0.7272173674755655, + "grad_norm": 0.9909226255420537, + "learning_rate": 4.8235329921522464e-06, + "loss": 0.4022, + "step": 11942 + }, + { + "epoch": 0.7272782632524435, + "grad_norm": 1.026020678743728, + "learning_rate": 4.823503543062239e-06, + "loss": 0.4515, + "step": 11943 + }, + { + "epoch": 0.7273391590293213, + "grad_norm": 0.9732449011727738, + "learning_rate": 4.8234740916050956e-06, + "loss": 0.4032, + "step": 11944 + }, + { + "epoch": 0.7274000548061992, + "grad_norm": 0.968616563294295, + "learning_rate": 4.823444637780844e-06, + "loss": 0.4382, + "step": 11945 + }, + { + "epoch": 0.727460950583077, + "grad_norm": 0.9676827188091601, + "learning_rate": 4.8234151815895165e-06, + "loss": 0.4432, + "step": 11946 + }, + { + "epoch": 0.727521846359955, + "grad_norm": 0.9854025062083545, + "learning_rate": 4.8233857230311406e-06, + "loss": 0.4339, + "step": 11947 + }, + { + "epoch": 0.7275827421368328, + "grad_norm": 0.9931304460397146, + "learning_rate": 4.823356262105749e-06, + "loss": 0.4582, + "step": 11948 + }, + { + "epoch": 0.7276436379137107, + "grad_norm": 1.1222843836320797, + "learning_rate": 4.823326798813369e-06, + "loss": 0.3852, + "step": 11949 + }, + { + "epoch": 0.7277045336905885, + "grad_norm": 0.9175910678512722, + "learning_rate": 4.823297333154033e-06, + "loss": 0.4205, + "step": 11950 + }, + { + "epoch": 0.7277654294674665, + "grad_norm": 0.9282645972121089, + "learning_rate": 4.82326786512777e-06, + "loss": 0.4658, + "step": 11951 + }, + { + "epoch": 0.7278263252443443, + "grad_norm": 0.8936287127351322, + "learning_rate": 4.82323839473461e-06, + "loss": 0.4615, + "step": 11952 + }, + { + "epoch": 0.7278872210212222, + "grad_norm": 1.1130583047972558, + "learning_rate": 4.823208921974583e-06, + "loss": 0.3525, + "step": 11953 + }, + { + "epoch": 0.7279481167981, + "grad_norm": 1.0309079589521295, + "learning_rate": 4.823179446847717e-06, + "loss": 0.4135, + "step": 11954 + }, + { + "epoch": 0.728009012574978, + "grad_norm": 1.0981963803122798, + "learning_rate": 4.823149969354047e-06, + "loss": 0.3988, + "step": 11955 + }, + { + "epoch": 0.7280699083518558, + "grad_norm": 0.9487122389244117, + "learning_rate": 4.823120489493599e-06, + "loss": 0.4435, + "step": 11956 + }, + { + "epoch": 0.7281308041287337, + "grad_norm": 1.0026302640073124, + "learning_rate": 4.823091007266404e-06, + "loss": 0.396, + "step": 11957 + }, + { + "epoch": 0.7281916999056115, + "grad_norm": 0.9312921206765016, + "learning_rate": 4.823061522672492e-06, + "loss": 0.4049, + "step": 11958 + }, + { + "epoch": 0.7282525956824895, + "grad_norm": 0.935054709576301, + "learning_rate": 4.823032035711893e-06, + "loss": 0.4792, + "step": 11959 + }, + { + "epoch": 0.7283134914593673, + "grad_norm": 1.0072983951117631, + "learning_rate": 4.8230025463846385e-06, + "loss": 0.4074, + "step": 11960 + }, + { + "epoch": 0.7283743872362451, + "grad_norm": 0.982896362434727, + "learning_rate": 4.822973054690756e-06, + "loss": 0.4607, + "step": 11961 + }, + { + "epoch": 0.728435283013123, + "grad_norm": 1.0160681795746687, + "learning_rate": 4.822943560630278e-06, + "loss": 0.4187, + "step": 11962 + }, + { + "epoch": 0.728496178790001, + "grad_norm": 1.097525498223729, + "learning_rate": 4.822914064203232e-06, + "loss": 0.3708, + "step": 11963 + }, + { + "epoch": 0.7285570745668788, + "grad_norm": 1.0129821605220293, + "learning_rate": 4.82288456540965e-06, + "loss": 0.4265, + "step": 11964 + }, + { + "epoch": 0.7286179703437566, + "grad_norm": 0.9974082068158995, + "learning_rate": 4.822855064249562e-06, + "loss": 0.4328, + "step": 11965 + }, + { + "epoch": 0.7286788661206345, + "grad_norm": 0.9801915534291052, + "learning_rate": 4.822825560722998e-06, + "loss": 0.5339, + "step": 11966 + }, + { + "epoch": 0.7287397618975124, + "grad_norm": 1.0148540650170996, + "learning_rate": 4.8227960548299865e-06, + "loss": 0.4492, + "step": 11967 + }, + { + "epoch": 0.7288006576743903, + "grad_norm": 1.0348933178571533, + "learning_rate": 4.822766546570559e-06, + "loss": 0.4676, + "step": 11968 + }, + { + "epoch": 0.7288615534512681, + "grad_norm": 1.0194861591938913, + "learning_rate": 4.822737035944746e-06, + "loss": 0.4145, + "step": 11969 + }, + { + "epoch": 0.728922449228146, + "grad_norm": 0.9815789066319779, + "learning_rate": 4.822707522952575e-06, + "loss": 0.4293, + "step": 11970 + }, + { + "epoch": 0.7289833450050239, + "grad_norm": 1.0766636690766507, + "learning_rate": 4.822678007594079e-06, + "loss": 0.4151, + "step": 11971 + }, + { + "epoch": 0.7290442407819018, + "grad_norm": 1.0432004902493195, + "learning_rate": 4.822648489869287e-06, + "loss": 0.4003, + "step": 11972 + }, + { + "epoch": 0.7291051365587796, + "grad_norm": 1.0115945079854143, + "learning_rate": 4.822618969778229e-06, + "loss": 0.413, + "step": 11973 + }, + { + "epoch": 0.7291660323356576, + "grad_norm": 0.9884360539863319, + "learning_rate": 4.8225894473209354e-06, + "loss": 0.4457, + "step": 11974 + }, + { + "epoch": 0.7292269281125354, + "grad_norm": 0.9553745184990143, + "learning_rate": 4.822559922497435e-06, + "loss": 0.4285, + "step": 11975 + }, + { + "epoch": 0.7292878238894133, + "grad_norm": 0.9633817981255595, + "learning_rate": 4.8225303953077595e-06, + "loss": 0.5685, + "step": 11976 + }, + { + "epoch": 0.7293487196662911, + "grad_norm": 0.9630985516604308, + "learning_rate": 4.822500865751938e-06, + "loss": 0.4158, + "step": 11977 + }, + { + "epoch": 0.7294096154431691, + "grad_norm": 1.0153858678929804, + "learning_rate": 4.822471333830001e-06, + "loss": 0.4111, + "step": 11978 + }, + { + "epoch": 0.7294705112200469, + "grad_norm": 1.1248482151679597, + "learning_rate": 4.822441799541979e-06, + "loss": 0.4529, + "step": 11979 + }, + { + "epoch": 0.7295314069969248, + "grad_norm": 0.9485384284495083, + "learning_rate": 4.822412262887901e-06, + "loss": 0.4515, + "step": 11980 + }, + { + "epoch": 0.7295923027738026, + "grad_norm": 0.9683339849001861, + "learning_rate": 4.822382723867798e-06, + "loss": 0.4199, + "step": 11981 + }, + { + "epoch": 0.7296531985506806, + "grad_norm": 0.9821148542644204, + "learning_rate": 4.8223531824816996e-06, + "loss": 0.4669, + "step": 11982 + }, + { + "epoch": 0.7297140943275584, + "grad_norm": 0.9264973361187309, + "learning_rate": 4.822323638729636e-06, + "loss": 0.4506, + "step": 11983 + }, + { + "epoch": 0.7297749901044362, + "grad_norm": 1.019484374981726, + "learning_rate": 4.822294092611637e-06, + "loss": 0.3865, + "step": 11984 + }, + { + "epoch": 0.7298358858813141, + "grad_norm": 1.0246700758504652, + "learning_rate": 4.822264544127734e-06, + "loss": 0.3627, + "step": 11985 + }, + { + "epoch": 0.729896781658192, + "grad_norm": 1.093574696209523, + "learning_rate": 4.822234993277956e-06, + "loss": 0.3975, + "step": 11986 + }, + { + "epoch": 0.7299576774350699, + "grad_norm": 1.0116855852215985, + "learning_rate": 4.822205440062333e-06, + "loss": 0.3921, + "step": 11987 + }, + { + "epoch": 0.7300185732119477, + "grad_norm": 0.9572246479298739, + "learning_rate": 4.822175884480895e-06, + "loss": 0.4622, + "step": 11988 + }, + { + "epoch": 0.7300794689888256, + "grad_norm": 1.0923868516473958, + "learning_rate": 4.822146326533673e-06, + "loss": 0.4473, + "step": 11989 + }, + { + "epoch": 0.7301403647657035, + "grad_norm": 1.064489113310927, + "learning_rate": 4.822116766220696e-06, + "loss": 0.3827, + "step": 11990 + }, + { + "epoch": 0.7302012605425814, + "grad_norm": 0.9500843171760008, + "learning_rate": 4.8220872035419954e-06, + "loss": 0.402, + "step": 11991 + }, + { + "epoch": 0.7302621563194592, + "grad_norm": 0.9701662034465359, + "learning_rate": 4.8220576384976e-06, + "loss": 0.4116, + "step": 11992 + }, + { + "epoch": 0.7303230520963371, + "grad_norm": 0.9621032843047648, + "learning_rate": 4.822028071087541e-06, + "loss": 0.4078, + "step": 11993 + }, + { + "epoch": 0.730383947873215, + "grad_norm": 0.9596326671099255, + "learning_rate": 4.821998501311847e-06, + "loss": 0.4291, + "step": 11994 + }, + { + "epoch": 0.7304448436500929, + "grad_norm": 1.0958533781948059, + "learning_rate": 4.821968929170551e-06, + "loss": 0.4695, + "step": 11995 + }, + { + "epoch": 0.7305057394269707, + "grad_norm": 0.9619648872608005, + "learning_rate": 4.8219393546636806e-06, + "loss": 0.5191, + "step": 11996 + }, + { + "epoch": 0.7305666352038486, + "grad_norm": 1.016253130647302, + "learning_rate": 4.8219097777912666e-06, + "loss": 0.4461, + "step": 11997 + }, + { + "epoch": 0.7306275309807265, + "grad_norm": 0.8793823172575211, + "learning_rate": 4.821880198553339e-06, + "loss": 0.4631, + "step": 11998 + }, + { + "epoch": 0.7306884267576044, + "grad_norm": 1.0150451378206824, + "learning_rate": 4.821850616949929e-06, + "loss": 0.4233, + "step": 11999 + }, + { + "epoch": 0.7307493225344822, + "grad_norm": 1.0055072758677053, + "learning_rate": 4.821821032981064e-06, + "loss": 0.4385, + "step": 12000 + }, + { + "epoch": 0.7308102183113601, + "grad_norm": 1.0046486779332475, + "learning_rate": 4.821791446646777e-06, + "loss": 0.4203, + "step": 12001 + }, + { + "epoch": 0.730871114088238, + "grad_norm": 0.9685275863948677, + "learning_rate": 4.821761857947098e-06, + "loss": 0.465, + "step": 12002 + }, + { + "epoch": 0.7309320098651159, + "grad_norm": 0.9702474518947402, + "learning_rate": 4.821732266882055e-06, + "loss": 0.3983, + "step": 12003 + }, + { + "epoch": 0.7309929056419937, + "grad_norm": 0.9409758255428782, + "learning_rate": 4.82170267345168e-06, + "loss": 0.4822, + "step": 12004 + }, + { + "epoch": 0.7310538014188716, + "grad_norm": 1.0293229627173834, + "learning_rate": 4.821673077656003e-06, + "loss": 0.4199, + "step": 12005 + }, + { + "epoch": 0.7311146971957495, + "grad_norm": 0.9722145664566345, + "learning_rate": 4.821643479495053e-06, + "loss": 0.3871, + "step": 12006 + }, + { + "epoch": 0.7311755929726274, + "grad_norm": 1.055563317280209, + "learning_rate": 4.821613878968862e-06, + "loss": 0.4155, + "step": 12007 + }, + { + "epoch": 0.7312364887495052, + "grad_norm": 1.0260875418937454, + "learning_rate": 4.821584276077458e-06, + "loss": 0.5142, + "step": 12008 + }, + { + "epoch": 0.731297384526383, + "grad_norm": 0.9891806748597383, + "learning_rate": 4.821554670820873e-06, + "loss": 0.4184, + "step": 12009 + }, + { + "epoch": 0.731358280303261, + "grad_norm": 1.1756900065770384, + "learning_rate": 4.821525063199135e-06, + "loss": 0.4231, + "step": 12010 + }, + { + "epoch": 0.7314191760801388, + "grad_norm": 0.9936031273102771, + "learning_rate": 4.821495453212277e-06, + "loss": 0.4711, + "step": 12011 + }, + { + "epoch": 0.7314800718570167, + "grad_norm": 1.0417195854484191, + "learning_rate": 4.821465840860326e-06, + "loss": 0.3736, + "step": 12012 + }, + { + "epoch": 0.7315409676338945, + "grad_norm": 0.9937433000389105, + "learning_rate": 4.8214362261433155e-06, + "loss": 0.4189, + "step": 12013 + }, + { + "epoch": 0.7316018634107725, + "grad_norm": 0.8798868585796044, + "learning_rate": 4.821406609061273e-06, + "loss": 0.521, + "step": 12014 + }, + { + "epoch": 0.7316627591876503, + "grad_norm": 1.041360954791574, + "learning_rate": 4.8213769896142306e-06, + "loss": 0.4172, + "step": 12015 + }, + { + "epoch": 0.7317236549645282, + "grad_norm": 1.0387193879671772, + "learning_rate": 4.821347367802218e-06, + "loss": 0.4071, + "step": 12016 + }, + { + "epoch": 0.7317845507414061, + "grad_norm": 0.9622071243076626, + "learning_rate": 4.821317743625263e-06, + "loss": 0.413, + "step": 12017 + }, + { + "epoch": 0.731845446518284, + "grad_norm": 1.0081732010315931, + "learning_rate": 4.821288117083399e-06, + "loss": 0.404, + "step": 12018 + }, + { + "epoch": 0.7319063422951618, + "grad_norm": 1.1028126700277774, + "learning_rate": 4.821258488176656e-06, + "loss": 0.444, + "step": 12019 + }, + { + "epoch": 0.7319672380720397, + "grad_norm": 1.0145653595661983, + "learning_rate": 4.821228856905062e-06, + "loss": 0.4628, + "step": 12020 + }, + { + "epoch": 0.7320281338489176, + "grad_norm": 1.0638290079133759, + "learning_rate": 4.8211992232686474e-06, + "loss": 0.3611, + "step": 12021 + }, + { + "epoch": 0.7320890296257955, + "grad_norm": 0.9343452660165699, + "learning_rate": 4.821169587267444e-06, + "loss": 0.4227, + "step": 12022 + }, + { + "epoch": 0.7321499254026733, + "grad_norm": 0.9511693006606449, + "learning_rate": 4.821139948901482e-06, + "loss": 0.3873, + "step": 12023 + }, + { + "epoch": 0.7322108211795512, + "grad_norm": 1.024545470857719, + "learning_rate": 4.82111030817079e-06, + "loss": 0.375, + "step": 12024 + }, + { + "epoch": 0.7322717169564291, + "grad_norm": 0.928284304154627, + "learning_rate": 4.8210806650753994e-06, + "loss": 0.4775, + "step": 12025 + }, + { + "epoch": 0.732332612733307, + "grad_norm": 1.0099875238593246, + "learning_rate": 4.8210510196153396e-06, + "loss": 0.4347, + "step": 12026 + }, + { + "epoch": 0.7323935085101848, + "grad_norm": 1.055663032861314, + "learning_rate": 4.8210213717906415e-06, + "loss": 0.472, + "step": 12027 + }, + { + "epoch": 0.7324544042870627, + "grad_norm": 0.9745184141597961, + "learning_rate": 4.820991721601336e-06, + "loss": 0.5176, + "step": 12028 + }, + { + "epoch": 0.7325153000639406, + "grad_norm": 0.9900273598808249, + "learning_rate": 4.820962069047451e-06, + "loss": 0.4278, + "step": 12029 + }, + { + "epoch": 0.7325761958408185, + "grad_norm": 1.0452341717788247, + "learning_rate": 4.820932414129019e-06, + "loss": 0.4477, + "step": 12030 + }, + { + "epoch": 0.7326370916176963, + "grad_norm": 1.05545735286896, + "learning_rate": 4.82090275684607e-06, + "loss": 0.4586, + "step": 12031 + }, + { + "epoch": 0.7326979873945741, + "grad_norm": 1.017540196050174, + "learning_rate": 4.820873097198632e-06, + "loss": 0.4081, + "step": 12032 + }, + { + "epoch": 0.7327588831714521, + "grad_norm": 1.0389182699629265, + "learning_rate": 4.8208434351867375e-06, + "loss": 0.3658, + "step": 12033 + }, + { + "epoch": 0.73281977894833, + "grad_norm": 1.0032081580163243, + "learning_rate": 4.820813770810416e-06, + "loss": 0.4226, + "step": 12034 + }, + { + "epoch": 0.7328806747252078, + "grad_norm": 1.0630420721782685, + "learning_rate": 4.820784104069698e-06, + "loss": 0.4032, + "step": 12035 + }, + { + "epoch": 0.7329415705020856, + "grad_norm": 0.9701133304425897, + "learning_rate": 4.8207544349646115e-06, + "loss": 0.432, + "step": 12036 + }, + { + "epoch": 0.7330024662789636, + "grad_norm": 0.9223626941051282, + "learning_rate": 4.82072476349519e-06, + "loss": 0.4134, + "step": 12037 + }, + { + "epoch": 0.7330633620558414, + "grad_norm": 1.012298315028513, + "learning_rate": 4.820695089661463e-06, + "loss": 0.4405, + "step": 12038 + }, + { + "epoch": 0.7331242578327193, + "grad_norm": 0.9405701629749376, + "learning_rate": 4.820665413463459e-06, + "loss": 0.3597, + "step": 12039 + }, + { + "epoch": 0.7331851536095971, + "grad_norm": 0.9710294821758905, + "learning_rate": 4.82063573490121e-06, + "loss": 0.4118, + "step": 12040 + }, + { + "epoch": 0.7332460493864751, + "grad_norm": 1.0058944488052235, + "learning_rate": 4.820606053974746e-06, + "loss": 0.3618, + "step": 12041 + }, + { + "epoch": 0.7333069451633529, + "grad_norm": 0.8788979374695549, + "learning_rate": 4.820576370684096e-06, + "loss": 0.4224, + "step": 12042 + }, + { + "epoch": 0.7333678409402308, + "grad_norm": 0.9849742268748367, + "learning_rate": 4.820546685029292e-06, + "loss": 0.4101, + "step": 12043 + }, + { + "epoch": 0.7334287367171086, + "grad_norm": 0.9619758760426178, + "learning_rate": 4.820516997010361e-06, + "loss": 0.4447, + "step": 12044 + }, + { + "epoch": 0.7334896324939866, + "grad_norm": 0.9895230401142099, + "learning_rate": 4.820487306627337e-06, + "loss": 0.42, + "step": 12045 + }, + { + "epoch": 0.7335505282708644, + "grad_norm": 1.127849502488636, + "learning_rate": 4.8204576138802495e-06, + "loss": 0.3576, + "step": 12046 + }, + { + "epoch": 0.7336114240477423, + "grad_norm": 1.0082979323737642, + "learning_rate": 4.820427918769127e-06, + "loss": 0.5181, + "step": 12047 + }, + { + "epoch": 0.7336723198246201, + "grad_norm": 1.0214554611036732, + "learning_rate": 4.820398221294002e-06, + "loss": 0.3946, + "step": 12048 + }, + { + "epoch": 0.7337332156014981, + "grad_norm": 1.0590821457009187, + "learning_rate": 4.820368521454902e-06, + "loss": 0.3844, + "step": 12049 + }, + { + "epoch": 0.7337941113783759, + "grad_norm": 0.9931890662134282, + "learning_rate": 4.82033881925186e-06, + "loss": 0.3925, + "step": 12050 + }, + { + "epoch": 0.7338550071552538, + "grad_norm": 1.0539921346327037, + "learning_rate": 4.820309114684903e-06, + "loss": 0.3577, + "step": 12051 + }, + { + "epoch": 0.7339159029321316, + "grad_norm": 1.0435711495137592, + "learning_rate": 4.820279407754066e-06, + "loss": 0.455, + "step": 12052 + }, + { + "epoch": 0.7339767987090096, + "grad_norm": 0.9700581486250969, + "learning_rate": 4.820249698459375e-06, + "loss": 0.3972, + "step": 12053 + }, + { + "epoch": 0.7340376944858874, + "grad_norm": 1.0181601830214946, + "learning_rate": 4.820219986800862e-06, + "loss": 0.4408, + "step": 12054 + }, + { + "epoch": 0.7340985902627652, + "grad_norm": 1.1024162694368327, + "learning_rate": 4.8201902727785574e-06, + "loss": 0.4062, + "step": 12055 + }, + { + "epoch": 0.7341594860396432, + "grad_norm": 1.0198798396228075, + "learning_rate": 4.820160556392491e-06, + "loss": 0.4712, + "step": 12056 + }, + { + "epoch": 0.734220381816521, + "grad_norm": 1.0478318325700735, + "learning_rate": 4.820130837642694e-06, + "loss": 0.4831, + "step": 12057 + }, + { + "epoch": 0.7342812775933989, + "grad_norm": 0.9755435420697164, + "learning_rate": 4.820101116529195e-06, + "loss": 0.4415, + "step": 12058 + }, + { + "epoch": 0.7343421733702767, + "grad_norm": 0.8997453818903313, + "learning_rate": 4.820071393052025e-06, + "loss": 0.4881, + "step": 12059 + }, + { + "epoch": 0.7344030691471547, + "grad_norm": 0.9017572964114545, + "learning_rate": 4.820041667211215e-06, + "loss": 0.518, + "step": 12060 + }, + { + "epoch": 0.7344639649240325, + "grad_norm": 1.0356824531257376, + "learning_rate": 4.8200119390067956e-06, + "loss": 0.4097, + "step": 12061 + }, + { + "epoch": 0.7345248607009104, + "grad_norm": 0.894928055528807, + "learning_rate": 4.819982208438795e-06, + "loss": 0.4694, + "step": 12062 + }, + { + "epoch": 0.7345857564777882, + "grad_norm": 0.962048115957244, + "learning_rate": 4.819952475507246e-06, + "loss": 0.4216, + "step": 12063 + }, + { + "epoch": 0.7346466522546662, + "grad_norm": 1.0273742599801803, + "learning_rate": 4.819922740212176e-06, + "loss": 0.425, + "step": 12064 + }, + { + "epoch": 0.734707548031544, + "grad_norm": 1.029273132636173, + "learning_rate": 4.819893002553618e-06, + "loss": 0.3902, + "step": 12065 + }, + { + "epoch": 0.7347684438084219, + "grad_norm": 0.95789191138158, + "learning_rate": 4.819863262531601e-06, + "loss": 0.4675, + "step": 12066 + }, + { + "epoch": 0.7348293395852997, + "grad_norm": 1.1518271206443993, + "learning_rate": 4.819833520146156e-06, + "loss": 0.3998, + "step": 12067 + }, + { + "epoch": 0.7348902353621777, + "grad_norm": 0.9925611556359, + "learning_rate": 4.8198037753973125e-06, + "loss": 0.4089, + "step": 12068 + }, + { + "epoch": 0.7349511311390555, + "grad_norm": 0.955870330897976, + "learning_rate": 4.819774028285101e-06, + "loss": 0.4178, + "step": 12069 + }, + { + "epoch": 0.7350120269159334, + "grad_norm": 1.033577865370774, + "learning_rate": 4.819744278809552e-06, + "loss": 0.4843, + "step": 12070 + }, + { + "epoch": 0.7350729226928112, + "grad_norm": 1.0714516814857633, + "learning_rate": 4.819714526970696e-06, + "loss": 0.3792, + "step": 12071 + }, + { + "epoch": 0.7351338184696892, + "grad_norm": 0.9543094997636596, + "learning_rate": 4.819684772768562e-06, + "loss": 0.4496, + "step": 12072 + }, + { + "epoch": 0.735194714246567, + "grad_norm": 1.0067721686036444, + "learning_rate": 4.819655016203183e-06, + "loss": 0.4338, + "step": 12073 + }, + { + "epoch": 0.7352556100234449, + "grad_norm": 1.0407644870415091, + "learning_rate": 4.819625257274587e-06, + "loss": 0.3863, + "step": 12074 + }, + { + "epoch": 0.7353165058003227, + "grad_norm": 0.9551254252525678, + "learning_rate": 4.819595495982805e-06, + "loss": 0.4296, + "step": 12075 + }, + { + "epoch": 0.7353774015772007, + "grad_norm": 0.9805092023551991, + "learning_rate": 4.819565732327868e-06, + "loss": 0.3473, + "step": 12076 + }, + { + "epoch": 0.7354382973540785, + "grad_norm": 1.0768568629195594, + "learning_rate": 4.819535966309804e-06, + "loss": 0.4307, + "step": 12077 + }, + { + "epoch": 0.7354991931309564, + "grad_norm": 1.00699246341728, + "learning_rate": 4.819506197928646e-06, + "loss": 0.432, + "step": 12078 + }, + { + "epoch": 0.7355600889078342, + "grad_norm": 0.9344000081833338, + "learning_rate": 4.819476427184424e-06, + "loss": 0.4188, + "step": 12079 + }, + { + "epoch": 0.7356209846847122, + "grad_norm": 0.9736248672457057, + "learning_rate": 4.819446654077166e-06, + "loss": 0.4127, + "step": 12080 + }, + { + "epoch": 0.73568188046159, + "grad_norm": 0.9307329522045851, + "learning_rate": 4.8194168786069055e-06, + "loss": 0.4839, + "step": 12081 + }, + { + "epoch": 0.7357427762384678, + "grad_norm": 0.9184725788740027, + "learning_rate": 4.81938710077367e-06, + "loss": 0.4674, + "step": 12082 + }, + { + "epoch": 0.7358036720153457, + "grad_norm": 0.9549321617744012, + "learning_rate": 4.819357320577492e-06, + "loss": 0.3653, + "step": 12083 + }, + { + "epoch": 0.7358645677922236, + "grad_norm": 0.963485331279195, + "learning_rate": 4.819327538018401e-06, + "loss": 0.4292, + "step": 12084 + }, + { + "epoch": 0.7359254635691015, + "grad_norm": 1.0623736118164255, + "learning_rate": 4.8192977530964275e-06, + "loss": 0.4158, + "step": 12085 + }, + { + "epoch": 0.7359863593459793, + "grad_norm": 1.1216143983440585, + "learning_rate": 4.819267965811602e-06, + "loss": 0.4829, + "step": 12086 + }, + { + "epoch": 0.7360472551228572, + "grad_norm": 1.1291801890230215, + "learning_rate": 4.8192381761639525e-06, + "loss": 0.425, + "step": 12087 + }, + { + "epoch": 0.7361081508997351, + "grad_norm": 0.9684599096809373, + "learning_rate": 4.819208384153513e-06, + "loss": 0.4324, + "step": 12088 + }, + { + "epoch": 0.736169046676613, + "grad_norm": 0.970209004675152, + "learning_rate": 4.819178589780313e-06, + "loss": 0.3928, + "step": 12089 + }, + { + "epoch": 0.7362299424534908, + "grad_norm": 1.0563450835737553, + "learning_rate": 4.819148793044381e-06, + "loss": 0.3901, + "step": 12090 + }, + { + "epoch": 0.7362908382303687, + "grad_norm": 0.9493088411313318, + "learning_rate": 4.819118993945747e-06, + "loss": 0.4418, + "step": 12091 + }, + { + "epoch": 0.7363517340072466, + "grad_norm": 0.9918855426958799, + "learning_rate": 4.819089192484444e-06, + "loss": 0.41, + "step": 12092 + }, + { + "epoch": 0.7364126297841245, + "grad_norm": 0.9586563690526835, + "learning_rate": 4.819059388660502e-06, + "loss": 0.4044, + "step": 12093 + }, + { + "epoch": 0.7364735255610023, + "grad_norm": 0.9463275447696194, + "learning_rate": 4.8190295824739495e-06, + "loss": 0.4335, + "step": 12094 + }, + { + "epoch": 0.7365344213378802, + "grad_norm": 0.8918016330244029, + "learning_rate": 4.818999773924818e-06, + "loss": 0.4782, + "step": 12095 + }, + { + "epoch": 0.7365953171147581, + "grad_norm": 1.1082928666950682, + "learning_rate": 4.818969963013138e-06, + "loss": 0.4374, + "step": 12096 + }, + { + "epoch": 0.736656212891636, + "grad_norm": 1.0195664519343115, + "learning_rate": 4.81894014973894e-06, + "loss": 0.3588, + "step": 12097 + }, + { + "epoch": 0.7367171086685138, + "grad_norm": 0.9162703363168513, + "learning_rate": 4.818910334102254e-06, + "loss": 0.406, + "step": 12098 + }, + { + "epoch": 0.7367780044453918, + "grad_norm": 1.001149244644632, + "learning_rate": 4.818880516103109e-06, + "loss": 0.4032, + "step": 12099 + }, + { + "epoch": 0.7368389002222696, + "grad_norm": 0.9630122343703958, + "learning_rate": 4.8188506957415385e-06, + "loss": 0.4746, + "step": 12100 + }, + { + "epoch": 0.7368997959991475, + "grad_norm": 0.9522097225863442, + "learning_rate": 4.81882087301757e-06, + "loss": 0.4303, + "step": 12101 + }, + { + "epoch": 0.7369606917760253, + "grad_norm": 1.0278483783178112, + "learning_rate": 4.818791047931235e-06, + "loss": 0.3686, + "step": 12102 + }, + { + "epoch": 0.7370215875529033, + "grad_norm": 0.934131264522919, + "learning_rate": 4.818761220482564e-06, + "loss": 0.4733, + "step": 12103 + }, + { + "epoch": 0.7370824833297811, + "grad_norm": 1.0439380725738088, + "learning_rate": 4.8187313906715886e-06, + "loss": 0.4506, + "step": 12104 + }, + { + "epoch": 0.737143379106659, + "grad_norm": 1.0688529319037954, + "learning_rate": 4.818701558498336e-06, + "loss": 0.3993, + "step": 12105 + }, + { + "epoch": 0.7372042748835368, + "grad_norm": 1.0353023581276455, + "learning_rate": 4.818671723962839e-06, + "loss": 0.3969, + "step": 12106 + }, + { + "epoch": 0.7372651706604147, + "grad_norm": 1.0238344711699083, + "learning_rate": 4.8186418870651274e-06, + "loss": 0.3864, + "step": 12107 + }, + { + "epoch": 0.7373260664372926, + "grad_norm": 0.9645236550094799, + "learning_rate": 4.818612047805232e-06, + "loss": 0.3569, + "step": 12108 + }, + { + "epoch": 0.7373869622141704, + "grad_norm": 0.9359024582350095, + "learning_rate": 4.8185822061831835e-06, + "loss": 0.3802, + "step": 12109 + }, + { + "epoch": 0.7374478579910483, + "grad_norm": 0.9767897574055296, + "learning_rate": 4.81855236219901e-06, + "loss": 0.4791, + "step": 12110 + }, + { + "epoch": 0.7375087537679262, + "grad_norm": 1.059045960983005, + "learning_rate": 4.818522515852745e-06, + "loss": 0.4051, + "step": 12111 + }, + { + "epoch": 0.7375696495448041, + "grad_norm": 1.0889544457404958, + "learning_rate": 4.818492667144417e-06, + "loss": 0.4981, + "step": 12112 + }, + { + "epoch": 0.7376305453216819, + "grad_norm": 1.0291457642220447, + "learning_rate": 4.818462816074056e-06, + "loss": 0.4474, + "step": 12113 + }, + { + "epoch": 0.7376914410985598, + "grad_norm": 0.8666679589924906, + "learning_rate": 4.818432962641695e-06, + "loss": 0.4676, + "step": 12114 + }, + { + "epoch": 0.7377523368754377, + "grad_norm": 0.9633241697221968, + "learning_rate": 4.818403106847361e-06, + "loss": 0.4395, + "step": 12115 + }, + { + "epoch": 0.7378132326523156, + "grad_norm": 0.9221012917141802, + "learning_rate": 4.818373248691087e-06, + "loss": 0.4323, + "step": 12116 + }, + { + "epoch": 0.7378741284291934, + "grad_norm": 1.0078381135479304, + "learning_rate": 4.818343388172903e-06, + "loss": 0.3737, + "step": 12117 + }, + { + "epoch": 0.7379350242060713, + "grad_norm": 0.9943967844427501, + "learning_rate": 4.818313525292838e-06, + "loss": 0.3881, + "step": 12118 + }, + { + "epoch": 0.7379959199829492, + "grad_norm": 0.9922199193823389, + "learning_rate": 4.818283660050924e-06, + "loss": 0.4302, + "step": 12119 + }, + { + "epoch": 0.7380568157598271, + "grad_norm": 1.0032036653461884, + "learning_rate": 4.81825379244719e-06, + "loss": 0.3862, + "step": 12120 + }, + { + "epoch": 0.7381177115367049, + "grad_norm": 1.047793627043778, + "learning_rate": 4.818223922481669e-06, + "loss": 0.3712, + "step": 12121 + }, + { + "epoch": 0.7381786073135828, + "grad_norm": 1.0309792110958207, + "learning_rate": 4.818194050154388e-06, + "loss": 0.3847, + "step": 12122 + }, + { + "epoch": 0.7382395030904607, + "grad_norm": 1.0284781718673932, + "learning_rate": 4.8181641754653794e-06, + "loss": 0.4658, + "step": 12123 + }, + { + "epoch": 0.7383003988673386, + "grad_norm": 1.0926820808624436, + "learning_rate": 4.818134298414674e-06, + "loss": 0.3858, + "step": 12124 + }, + { + "epoch": 0.7383612946442164, + "grad_norm": 1.0136608419497641, + "learning_rate": 4.8181044190023e-06, + "loss": 0.3965, + "step": 12125 + }, + { + "epoch": 0.7384221904210942, + "grad_norm": 1.0607452658379464, + "learning_rate": 4.818074537228291e-06, + "loss": 0.4326, + "step": 12126 + }, + { + "epoch": 0.7384830861979722, + "grad_norm": 1.0284355632957003, + "learning_rate": 4.818044653092675e-06, + "loss": 0.4207, + "step": 12127 + }, + { + "epoch": 0.73854398197485, + "grad_norm": 1.0327289458753486, + "learning_rate": 4.8180147665954835e-06, + "loss": 0.4022, + "step": 12128 + }, + { + "epoch": 0.7386048777517279, + "grad_norm": 1.0120806807410294, + "learning_rate": 4.817984877736747e-06, + "loss": 0.414, + "step": 12129 + }, + { + "epoch": 0.7386657735286057, + "grad_norm": 0.912615262270647, + "learning_rate": 4.8179549865164955e-06, + "loss": 0.4565, + "step": 12130 + }, + { + "epoch": 0.7387266693054837, + "grad_norm": 1.0122344262509246, + "learning_rate": 4.81792509293476e-06, + "loss": 0.411, + "step": 12131 + }, + { + "epoch": 0.7387875650823615, + "grad_norm": 0.9420065209964164, + "learning_rate": 4.81789519699157e-06, + "loss": 0.4352, + "step": 12132 + }, + { + "epoch": 0.7388484608592394, + "grad_norm": 1.0289671296877398, + "learning_rate": 4.8178652986869575e-06, + "loss": 0.3866, + "step": 12133 + }, + { + "epoch": 0.7389093566361172, + "grad_norm": 0.9733952405860914, + "learning_rate": 4.817835398020951e-06, + "loss": 0.4167, + "step": 12134 + }, + { + "epoch": 0.7389702524129952, + "grad_norm": 1.0184684109793198, + "learning_rate": 4.817805494993583e-06, + "loss": 0.4982, + "step": 12135 + }, + { + "epoch": 0.739031148189873, + "grad_norm": 1.0448301735656675, + "learning_rate": 4.817775589604882e-06, + "loss": 0.3988, + "step": 12136 + }, + { + "epoch": 0.7390920439667509, + "grad_norm": 1.0702176170759765, + "learning_rate": 4.8177456818548795e-06, + "loss": 0.4526, + "step": 12137 + }, + { + "epoch": 0.7391529397436288, + "grad_norm": 0.9813194358705501, + "learning_rate": 4.817715771743606e-06, + "loss": 0.3969, + "step": 12138 + }, + { + "epoch": 0.7392138355205067, + "grad_norm": 1.0525816798167458, + "learning_rate": 4.817685859271092e-06, + "loss": 0.404, + "step": 12139 + }, + { + "epoch": 0.7392747312973845, + "grad_norm": 1.0695144258048952, + "learning_rate": 4.817655944437368e-06, + "loss": 0.5061, + "step": 12140 + }, + { + "epoch": 0.7393356270742624, + "grad_norm": 1.0174395427317418, + "learning_rate": 4.817626027242465e-06, + "loss": 0.4096, + "step": 12141 + }, + { + "epoch": 0.7393965228511403, + "grad_norm": 0.9825339338688607, + "learning_rate": 4.817596107686412e-06, + "loss": 0.4196, + "step": 12142 + }, + { + "epoch": 0.7394574186280182, + "grad_norm": 1.02708727804392, + "learning_rate": 4.81756618576924e-06, + "loss": 0.4082, + "step": 12143 + }, + { + "epoch": 0.739518314404896, + "grad_norm": 1.1038531837234433, + "learning_rate": 4.8175362614909794e-06, + "loss": 0.3764, + "step": 12144 + }, + { + "epoch": 0.7395792101817739, + "grad_norm": 0.9134845682466807, + "learning_rate": 4.8175063348516615e-06, + "loss": 0.4626, + "step": 12145 + }, + { + "epoch": 0.7396401059586518, + "grad_norm": 1.0167452097339769, + "learning_rate": 4.817476405851317e-06, + "loss": 0.4125, + "step": 12146 + }, + { + "epoch": 0.7397010017355297, + "grad_norm": 1.0152499648457323, + "learning_rate": 4.817446474489975e-06, + "loss": 0.5082, + "step": 12147 + }, + { + "epoch": 0.7397618975124075, + "grad_norm": 0.9557162359913786, + "learning_rate": 4.817416540767667e-06, + "loss": 0.4095, + "step": 12148 + }, + { + "epoch": 0.7398227932892854, + "grad_norm": 1.0057511195148734, + "learning_rate": 4.817386604684423e-06, + "loss": 0.4162, + "step": 12149 + }, + { + "epoch": 0.7398836890661633, + "grad_norm": 1.0167664260324005, + "learning_rate": 4.8173566662402736e-06, + "loss": 0.4148, + "step": 12150 + }, + { + "epoch": 0.7399445848430412, + "grad_norm": 0.9950880634527898, + "learning_rate": 4.8173267254352495e-06, + "loss": 0.4476, + "step": 12151 + }, + { + "epoch": 0.740005480619919, + "grad_norm": 0.9693162208857585, + "learning_rate": 4.817296782269382e-06, + "loss": 0.3772, + "step": 12152 + }, + { + "epoch": 0.7400663763967968, + "grad_norm": 0.9775125409971821, + "learning_rate": 4.817266836742699e-06, + "loss": 0.3877, + "step": 12153 + }, + { + "epoch": 0.7401272721736748, + "grad_norm": 0.9661775965484798, + "learning_rate": 4.817236888855234e-06, + "loss": 0.4447, + "step": 12154 + }, + { + "epoch": 0.7401881679505526, + "grad_norm": 1.0086717113498187, + "learning_rate": 4.817206938607016e-06, + "loss": 0.4113, + "step": 12155 + }, + { + "epoch": 0.7402490637274305, + "grad_norm": 1.0212288662124582, + "learning_rate": 4.817176985998075e-06, + "loss": 0.4528, + "step": 12156 + }, + { + "epoch": 0.7403099595043083, + "grad_norm": 0.9441250887727409, + "learning_rate": 4.8171470310284426e-06, + "loss": 0.431, + "step": 12157 + }, + { + "epoch": 0.7403708552811863, + "grad_norm": 0.9736672650650974, + "learning_rate": 4.8171170736981495e-06, + "loss": 0.4302, + "step": 12158 + }, + { + "epoch": 0.7404317510580641, + "grad_norm": 1.0493311490887487, + "learning_rate": 4.8170871140072265e-06, + "loss": 0.3579, + "step": 12159 + }, + { + "epoch": 0.740492646834942, + "grad_norm": 1.0427100727312535, + "learning_rate": 4.817057151955702e-06, + "loss": 0.4335, + "step": 12160 + }, + { + "epoch": 0.7405535426118198, + "grad_norm": 1.0090595293906004, + "learning_rate": 4.817027187543608e-06, + "loss": 0.4768, + "step": 12161 + }, + { + "epoch": 0.7406144383886978, + "grad_norm": 1.1180068065685733, + "learning_rate": 4.816997220770975e-06, + "loss": 0.5142, + "step": 12162 + }, + { + "epoch": 0.7406753341655756, + "grad_norm": 0.9367910179620716, + "learning_rate": 4.816967251637833e-06, + "loss": 0.416, + "step": 12163 + }, + { + "epoch": 0.7407362299424535, + "grad_norm": 1.070390704410206, + "learning_rate": 4.816937280144213e-06, + "loss": 0.4217, + "step": 12164 + }, + { + "epoch": 0.7407971257193313, + "grad_norm": 0.9436362425291729, + "learning_rate": 4.8169073062901465e-06, + "loss": 0.4677, + "step": 12165 + }, + { + "epoch": 0.7408580214962093, + "grad_norm": 0.9003955453495184, + "learning_rate": 4.816877330075662e-06, + "loss": 0.4629, + "step": 12166 + }, + { + "epoch": 0.7409189172730871, + "grad_norm": 1.0585629167268815, + "learning_rate": 4.816847351500792e-06, + "loss": 0.4022, + "step": 12167 + }, + { + "epoch": 0.740979813049965, + "grad_norm": 1.0217614678117142, + "learning_rate": 4.816817370565565e-06, + "loss": 0.4144, + "step": 12168 + }, + { + "epoch": 0.7410407088268428, + "grad_norm": 0.9641186966775398, + "learning_rate": 4.816787387270013e-06, + "loss": 0.3909, + "step": 12169 + }, + { + "epoch": 0.7411016046037208, + "grad_norm": 0.9315945725994502, + "learning_rate": 4.816757401614166e-06, + "loss": 0.4686, + "step": 12170 + }, + { + "epoch": 0.7411625003805986, + "grad_norm": 0.9612465807991062, + "learning_rate": 4.8167274135980554e-06, + "loss": 0.4744, + "step": 12171 + }, + { + "epoch": 0.7412233961574765, + "grad_norm": 1.001179990086669, + "learning_rate": 4.8166974232217105e-06, + "loss": 0.4529, + "step": 12172 + }, + { + "epoch": 0.7412842919343543, + "grad_norm": 0.9947830181154241, + "learning_rate": 4.8166674304851625e-06, + "loss": 0.42, + "step": 12173 + }, + { + "epoch": 0.7413451877112323, + "grad_norm": 0.934105601686934, + "learning_rate": 4.816637435388443e-06, + "loss": 0.4558, + "step": 12174 + }, + { + "epoch": 0.7414060834881101, + "grad_norm": 1.0641719156366751, + "learning_rate": 4.81660743793158e-06, + "loss": 0.5144, + "step": 12175 + }, + { + "epoch": 0.741466979264988, + "grad_norm": 1.109440991708071, + "learning_rate": 4.8165774381146055e-06, + "loss": 0.3999, + "step": 12176 + }, + { + "epoch": 0.7415278750418658, + "grad_norm": 1.0790882006160119, + "learning_rate": 4.816547435937551e-06, + "loss": 0.4335, + "step": 12177 + }, + { + "epoch": 0.7415887708187437, + "grad_norm": 1.0425240823512385, + "learning_rate": 4.816517431400446e-06, + "loss": 0.3958, + "step": 12178 + }, + { + "epoch": 0.7416496665956216, + "grad_norm": 1.0056853999237299, + "learning_rate": 4.81648742450332e-06, + "loss": 0.4368, + "step": 12179 + }, + { + "epoch": 0.7417105623724994, + "grad_norm": 1.099892990376384, + "learning_rate": 4.816457415246206e-06, + "loss": 0.3262, + "step": 12180 + }, + { + "epoch": 0.7417714581493774, + "grad_norm": 1.0464761920568422, + "learning_rate": 4.816427403629133e-06, + "loss": 0.4947, + "step": 12181 + }, + { + "epoch": 0.7418323539262552, + "grad_norm": 0.9393412176110904, + "learning_rate": 4.816397389652131e-06, + "loss": 0.4253, + "step": 12182 + }, + { + "epoch": 0.7418932497031331, + "grad_norm": 0.9424920009166102, + "learning_rate": 4.816367373315233e-06, + "loss": 0.4715, + "step": 12183 + }, + { + "epoch": 0.7419541454800109, + "grad_norm": 1.0327445694226143, + "learning_rate": 4.816337354618468e-06, + "loss": 0.3939, + "step": 12184 + }, + { + "epoch": 0.7420150412568889, + "grad_norm": 0.9451788018444015, + "learning_rate": 4.816307333561866e-06, + "loss": 0.4335, + "step": 12185 + }, + { + "epoch": 0.7420759370337667, + "grad_norm": 1.045556139394314, + "learning_rate": 4.816277310145458e-06, + "loss": 0.3956, + "step": 12186 + }, + { + "epoch": 0.7421368328106446, + "grad_norm": 1.007577020133167, + "learning_rate": 4.816247284369276e-06, + "loss": 0.4367, + "step": 12187 + }, + { + "epoch": 0.7421977285875224, + "grad_norm": 0.9632267308530007, + "learning_rate": 4.816217256233348e-06, + "loss": 0.4962, + "step": 12188 + }, + { + "epoch": 0.7422586243644004, + "grad_norm": 0.9964262336418948, + "learning_rate": 4.816187225737706e-06, + "loss": 0.4958, + "step": 12189 + }, + { + "epoch": 0.7423195201412782, + "grad_norm": 1.012167974992191, + "learning_rate": 4.816157192882382e-06, + "loss": 0.3865, + "step": 12190 + }, + { + "epoch": 0.7423804159181561, + "grad_norm": 1.0278391819567274, + "learning_rate": 4.816127157667404e-06, + "loss": 0.4494, + "step": 12191 + }, + { + "epoch": 0.7424413116950339, + "grad_norm": 0.9925319424363196, + "learning_rate": 4.816097120092804e-06, + "loss": 0.4493, + "step": 12192 + }, + { + "epoch": 0.7425022074719119, + "grad_norm": 1.042290741252736, + "learning_rate": 4.816067080158613e-06, + "loss": 0.4387, + "step": 12193 + }, + { + "epoch": 0.7425631032487897, + "grad_norm": 0.9213351488471182, + "learning_rate": 4.816037037864861e-06, + "loss": 0.4324, + "step": 12194 + }, + { + "epoch": 0.7426239990256676, + "grad_norm": 0.9903337213877166, + "learning_rate": 4.816006993211578e-06, + "loss": 0.3927, + "step": 12195 + }, + { + "epoch": 0.7426848948025454, + "grad_norm": 1.0851421382963395, + "learning_rate": 4.815976946198795e-06, + "loss": 0.4129, + "step": 12196 + }, + { + "epoch": 0.7427457905794234, + "grad_norm": 0.9715898484563145, + "learning_rate": 4.815946896826544e-06, + "loss": 0.4297, + "step": 12197 + }, + { + "epoch": 0.7428066863563012, + "grad_norm": 0.9825788240802872, + "learning_rate": 4.815916845094853e-06, + "loss": 0.4453, + "step": 12198 + }, + { + "epoch": 0.742867582133179, + "grad_norm": 1.036014505033384, + "learning_rate": 4.815886791003756e-06, + "loss": 0.3983, + "step": 12199 + }, + { + "epoch": 0.7429284779100569, + "grad_norm": 1.0519263058157606, + "learning_rate": 4.81585673455328e-06, + "loss": 0.4517, + "step": 12200 + }, + { + "epoch": 0.7429893736869349, + "grad_norm": 1.0187218544960244, + "learning_rate": 4.815826675743458e-06, + "loss": 0.4289, + "step": 12201 + }, + { + "epoch": 0.7430502694638127, + "grad_norm": 1.0093136576770292, + "learning_rate": 4.815796614574319e-06, + "loss": 0.4313, + "step": 12202 + }, + { + "epoch": 0.7431111652406905, + "grad_norm": 1.0164740962769823, + "learning_rate": 4.8157665510458965e-06, + "loss": 0.4603, + "step": 12203 + }, + { + "epoch": 0.7431720610175684, + "grad_norm": 1.0631133963966357, + "learning_rate": 4.815736485158218e-06, + "loss": 0.3308, + "step": 12204 + }, + { + "epoch": 0.7432329567944463, + "grad_norm": 1.174674845379732, + "learning_rate": 4.815706416911316e-06, + "loss": 0.4148, + "step": 12205 + }, + { + "epoch": 0.7432938525713242, + "grad_norm": 0.9966385844350696, + "learning_rate": 4.815676346305219e-06, + "loss": 0.4065, + "step": 12206 + }, + { + "epoch": 0.743354748348202, + "grad_norm": 1.0133345567573697, + "learning_rate": 4.81564627333996e-06, + "loss": 0.4196, + "step": 12207 + }, + { + "epoch": 0.7434156441250799, + "grad_norm": 1.0109886915316053, + "learning_rate": 4.815616198015568e-06, + "loss": 0.4204, + "step": 12208 + }, + { + "epoch": 0.7434765399019578, + "grad_norm": 1.002180839926736, + "learning_rate": 4.815586120332076e-06, + "loss": 0.393, + "step": 12209 + }, + { + "epoch": 0.7435374356788357, + "grad_norm": 1.0846934170555027, + "learning_rate": 4.8155560402895115e-06, + "loss": 0.4398, + "step": 12210 + }, + { + "epoch": 0.7435983314557135, + "grad_norm": 0.9612579188834157, + "learning_rate": 4.8155259578879064e-06, + "loss": 0.4202, + "step": 12211 + }, + { + "epoch": 0.7436592272325914, + "grad_norm": 1.0669313189020362, + "learning_rate": 4.815495873127293e-06, + "loss": 0.4158, + "step": 12212 + }, + { + "epoch": 0.7437201230094693, + "grad_norm": 0.9810470020463001, + "learning_rate": 4.8154657860077e-06, + "loss": 0.4318, + "step": 12213 + }, + { + "epoch": 0.7437810187863472, + "grad_norm": 0.9759829613210826, + "learning_rate": 4.815435696529158e-06, + "loss": 0.4662, + "step": 12214 + }, + { + "epoch": 0.743841914563225, + "grad_norm": 1.0183479850108965, + "learning_rate": 4.815405604691698e-06, + "loss": 0.4375, + "step": 12215 + }, + { + "epoch": 0.7439028103401029, + "grad_norm": 1.0174736575431373, + "learning_rate": 4.8153755104953525e-06, + "loss": 0.4002, + "step": 12216 + }, + { + "epoch": 0.7439637061169808, + "grad_norm": 1.014767168128371, + "learning_rate": 4.8153454139401496e-06, + "loss": 0.4052, + "step": 12217 + }, + { + "epoch": 0.7440246018938587, + "grad_norm": 1.0803926863936018, + "learning_rate": 4.815315315026121e-06, + "loss": 0.397, + "step": 12218 + }, + { + "epoch": 0.7440854976707365, + "grad_norm": 1.0205399036187741, + "learning_rate": 4.815285213753298e-06, + "loss": 0.3654, + "step": 12219 + }, + { + "epoch": 0.7441463934476145, + "grad_norm": 0.9590637565562924, + "learning_rate": 4.81525511012171e-06, + "loss": 0.4071, + "step": 12220 + }, + { + "epoch": 0.7442072892244923, + "grad_norm": 1.0811406122769704, + "learning_rate": 4.815225004131387e-06, + "loss": 0.3457, + "step": 12221 + }, + { + "epoch": 0.7442681850013702, + "grad_norm": 1.059280277381278, + "learning_rate": 4.815194895782363e-06, + "loss": 0.4255, + "step": 12222 + }, + { + "epoch": 0.744329080778248, + "grad_norm": 0.9997453422201993, + "learning_rate": 4.815164785074665e-06, + "loss": 0.4065, + "step": 12223 + }, + { + "epoch": 0.744389976555126, + "grad_norm": 1.0212763505975353, + "learning_rate": 4.815134672008326e-06, + "loss": 0.5241, + "step": 12224 + }, + { + "epoch": 0.7444508723320038, + "grad_norm": 1.089602119974823, + "learning_rate": 4.815104556583375e-06, + "loss": 0.3826, + "step": 12225 + }, + { + "epoch": 0.7445117681088816, + "grad_norm": 0.984820718682522, + "learning_rate": 4.815074438799845e-06, + "loss": 0.5168, + "step": 12226 + }, + { + "epoch": 0.7445726638857595, + "grad_norm": 1.0743847456126208, + "learning_rate": 4.815044318657765e-06, + "loss": 0.4135, + "step": 12227 + }, + { + "epoch": 0.7446335596626374, + "grad_norm": 0.9910066059204116, + "learning_rate": 4.815014196157165e-06, + "loss": 0.4379, + "step": 12228 + }, + { + "epoch": 0.7446944554395153, + "grad_norm": 0.975892808756109, + "learning_rate": 4.814984071298078e-06, + "loss": 0.4262, + "step": 12229 + }, + { + "epoch": 0.7447553512163931, + "grad_norm": 1.0385843870669031, + "learning_rate": 4.814953944080532e-06, + "loss": 0.4337, + "step": 12230 + }, + { + "epoch": 0.744816246993271, + "grad_norm": 1.0061499610246374, + "learning_rate": 4.814923814504559e-06, + "loss": 0.4642, + "step": 12231 + }, + { + "epoch": 0.7448771427701489, + "grad_norm": 0.9902205745016802, + "learning_rate": 4.814893682570191e-06, + "loss": 0.3966, + "step": 12232 + }, + { + "epoch": 0.7449380385470268, + "grad_norm": 0.987828851743519, + "learning_rate": 4.814863548277457e-06, + "loss": 0.3508, + "step": 12233 + }, + { + "epoch": 0.7449989343239046, + "grad_norm": 0.9926326286666151, + "learning_rate": 4.814833411626389e-06, + "loss": 0.3684, + "step": 12234 + }, + { + "epoch": 0.7450598301007825, + "grad_norm": 0.9158862809697386, + "learning_rate": 4.814803272617015e-06, + "loss": 0.4519, + "step": 12235 + }, + { + "epoch": 0.7451207258776604, + "grad_norm": 1.0133282040201685, + "learning_rate": 4.814773131249368e-06, + "loss": 0.4588, + "step": 12236 + }, + { + "epoch": 0.7451816216545383, + "grad_norm": 0.9762108841340611, + "learning_rate": 4.814742987523479e-06, + "loss": 0.4121, + "step": 12237 + }, + { + "epoch": 0.7452425174314161, + "grad_norm": 0.952748103012947, + "learning_rate": 4.814712841439378e-06, + "loss": 0.453, + "step": 12238 + }, + { + "epoch": 0.745303413208294, + "grad_norm": 0.936465770251524, + "learning_rate": 4.814682692997095e-06, + "loss": 0.4288, + "step": 12239 + }, + { + "epoch": 0.7453643089851719, + "grad_norm": 1.0056510233272604, + "learning_rate": 4.814652542196661e-06, + "loss": 0.4164, + "step": 12240 + }, + { + "epoch": 0.7454252047620498, + "grad_norm": 1.0686165195181443, + "learning_rate": 4.814622389038109e-06, + "loss": 0.4248, + "step": 12241 + }, + { + "epoch": 0.7454861005389276, + "grad_norm": 0.8937740767248115, + "learning_rate": 4.814592233521467e-06, + "loss": 0.4734, + "step": 12242 + }, + { + "epoch": 0.7455469963158055, + "grad_norm": 0.9998011763768885, + "learning_rate": 4.814562075646766e-06, + "loss": 0.4089, + "step": 12243 + }, + { + "epoch": 0.7456078920926834, + "grad_norm": 0.9914133159007127, + "learning_rate": 4.814531915414037e-06, + "loss": 0.4144, + "step": 12244 + }, + { + "epoch": 0.7456687878695613, + "grad_norm": 1.0589472636416417, + "learning_rate": 4.814501752823312e-06, + "loss": 0.3884, + "step": 12245 + }, + { + "epoch": 0.7457296836464391, + "grad_norm": 0.9003501502851283, + "learning_rate": 4.81447158787462e-06, + "loss": 0.5168, + "step": 12246 + }, + { + "epoch": 0.745790579423317, + "grad_norm": 1.0228227970624784, + "learning_rate": 4.814441420567993e-06, + "loss": 0.4027, + "step": 12247 + }, + { + "epoch": 0.7458514752001949, + "grad_norm": 0.9475740542630768, + "learning_rate": 4.8144112509034605e-06, + "loss": 0.4412, + "step": 12248 + }, + { + "epoch": 0.7459123709770727, + "grad_norm": 0.980825728280638, + "learning_rate": 4.814381078881055e-06, + "loss": 0.437, + "step": 12249 + }, + { + "epoch": 0.7459732667539506, + "grad_norm": 0.929099087401875, + "learning_rate": 4.8143509045008055e-06, + "loss": 0.5301, + "step": 12250 + }, + { + "epoch": 0.7460341625308284, + "grad_norm": 1.132399407359302, + "learning_rate": 4.814320727762743e-06, + "loss": 0.3551, + "step": 12251 + }, + { + "epoch": 0.7460950583077064, + "grad_norm": 0.9642367887406145, + "learning_rate": 4.814290548666899e-06, + "loss": 0.4156, + "step": 12252 + }, + { + "epoch": 0.7461559540845842, + "grad_norm": 0.9516691513268866, + "learning_rate": 4.814260367213305e-06, + "loss": 0.4633, + "step": 12253 + }, + { + "epoch": 0.7462168498614621, + "grad_norm": 0.9131973073747602, + "learning_rate": 4.814230183401989e-06, + "loss": 0.4605, + "step": 12254 + }, + { + "epoch": 0.7462777456383399, + "grad_norm": 0.9911363877097429, + "learning_rate": 4.814199997232984e-06, + "loss": 0.4059, + "step": 12255 + }, + { + "epoch": 0.7463386414152179, + "grad_norm": 1.0283117405631912, + "learning_rate": 4.814169808706321e-06, + "loss": 0.4476, + "step": 12256 + }, + { + "epoch": 0.7463995371920957, + "grad_norm": 0.9813424474588548, + "learning_rate": 4.814139617822029e-06, + "loss": 0.3984, + "step": 12257 + }, + { + "epoch": 0.7464604329689736, + "grad_norm": 0.9247926497256053, + "learning_rate": 4.814109424580139e-06, + "loss": 0.4869, + "step": 12258 + }, + { + "epoch": 0.7465213287458514, + "grad_norm": 1.0370918361663588, + "learning_rate": 4.8140792289806836e-06, + "loss": 0.4263, + "step": 12259 + }, + { + "epoch": 0.7465822245227294, + "grad_norm": 1.1092828575725153, + "learning_rate": 4.814049031023692e-06, + "loss": 0.478, + "step": 12260 + }, + { + "epoch": 0.7466431202996072, + "grad_norm": 0.9011618510151125, + "learning_rate": 4.814018830709195e-06, + "loss": 0.4839, + "step": 12261 + }, + { + "epoch": 0.7467040160764851, + "grad_norm": 1.0316608110726813, + "learning_rate": 4.813988628037224e-06, + "loss": 0.4393, + "step": 12262 + }, + { + "epoch": 0.746764911853363, + "grad_norm": 0.9860081212606925, + "learning_rate": 4.813958423007809e-06, + "loss": 0.4207, + "step": 12263 + }, + { + "epoch": 0.7468258076302409, + "grad_norm": 1.0278281909475404, + "learning_rate": 4.813928215620983e-06, + "loss": 0.3445, + "step": 12264 + }, + { + "epoch": 0.7468867034071187, + "grad_norm": 0.9935971342637204, + "learning_rate": 4.813898005876774e-06, + "loss": 0.5231, + "step": 12265 + }, + { + "epoch": 0.7469475991839966, + "grad_norm": 1.0666041098548305, + "learning_rate": 4.813867793775213e-06, + "loss": 0.3438, + "step": 12266 + }, + { + "epoch": 0.7470084949608745, + "grad_norm": 0.9836410079112765, + "learning_rate": 4.8138375793163325e-06, + "loss": 0.4143, + "step": 12267 + }, + { + "epoch": 0.7470693907377524, + "grad_norm": 1.096534278348939, + "learning_rate": 4.8138073625001626e-06, + "loss": 0.4192, + "step": 12268 + }, + { + "epoch": 0.7471302865146302, + "grad_norm": 1.0204730463667335, + "learning_rate": 4.813777143326733e-06, + "loss": 0.4436, + "step": 12269 + }, + { + "epoch": 0.747191182291508, + "grad_norm": 0.9730378322472946, + "learning_rate": 4.813746921796077e-06, + "loss": 0.4254, + "step": 12270 + }, + { + "epoch": 0.747252078068386, + "grad_norm": 0.9417024959462177, + "learning_rate": 4.813716697908222e-06, + "loss": 0.4897, + "step": 12271 + }, + { + "epoch": 0.7473129738452639, + "grad_norm": 1.0541430853389668, + "learning_rate": 4.813686471663201e-06, + "loss": 0.4232, + "step": 12272 + }, + { + "epoch": 0.7473738696221417, + "grad_norm": 0.9620401601988975, + "learning_rate": 4.813656243061045e-06, + "loss": 0.4611, + "step": 12273 + }, + { + "epoch": 0.7474347653990195, + "grad_norm": 1.0379245200184148, + "learning_rate": 4.813626012101783e-06, + "loss": 0.4391, + "step": 12274 + }, + { + "epoch": 0.7474956611758975, + "grad_norm": 1.0838534249535798, + "learning_rate": 4.813595778785447e-06, + "loss": 0.3883, + "step": 12275 + }, + { + "epoch": 0.7475565569527753, + "grad_norm": 1.002137465487846, + "learning_rate": 4.813565543112068e-06, + "loss": 0.4123, + "step": 12276 + }, + { + "epoch": 0.7476174527296532, + "grad_norm": 0.9758194409790849, + "learning_rate": 4.813535305081677e-06, + "loss": 0.4481, + "step": 12277 + }, + { + "epoch": 0.747678348506531, + "grad_norm": 0.9614700932512557, + "learning_rate": 4.813505064694305e-06, + "loss": 0.4262, + "step": 12278 + }, + { + "epoch": 0.747739244283409, + "grad_norm": 1.0116153424629948, + "learning_rate": 4.81347482194998e-06, + "loss": 0.4186, + "step": 12279 + }, + { + "epoch": 0.7478001400602868, + "grad_norm": 1.0371148776455827, + "learning_rate": 4.813444576848737e-06, + "loss": 0.3447, + "step": 12280 + }, + { + "epoch": 0.7478610358371647, + "grad_norm": 0.9793886897669134, + "learning_rate": 4.8134143293906035e-06, + "loss": 0.4886, + "step": 12281 + }, + { + "epoch": 0.7479219316140425, + "grad_norm": 1.0774093635417374, + "learning_rate": 4.813384079575612e-06, + "loss": 0.4085, + "step": 12282 + }, + { + "epoch": 0.7479828273909205, + "grad_norm": 1.014754360816434, + "learning_rate": 4.813353827403793e-06, + "loss": 0.4754, + "step": 12283 + }, + { + "epoch": 0.7480437231677983, + "grad_norm": 1.0532766475858641, + "learning_rate": 4.813323572875177e-06, + "loss": 0.3863, + "step": 12284 + }, + { + "epoch": 0.7481046189446762, + "grad_norm": 0.9919269812201309, + "learning_rate": 4.813293315989796e-06, + "loss": 0.3753, + "step": 12285 + }, + { + "epoch": 0.748165514721554, + "grad_norm": 0.9352144324711303, + "learning_rate": 4.813263056747678e-06, + "loss": 0.4943, + "step": 12286 + }, + { + "epoch": 0.748226410498432, + "grad_norm": 0.9213641621180542, + "learning_rate": 4.813232795148856e-06, + "loss": 0.4312, + "step": 12287 + }, + { + "epoch": 0.7482873062753098, + "grad_norm": 0.9679397474840722, + "learning_rate": 4.813202531193362e-06, + "loss": 0.4388, + "step": 12288 + }, + { + "epoch": 0.7483482020521877, + "grad_norm": 1.0335001286209866, + "learning_rate": 4.813172264881224e-06, + "loss": 0.3943, + "step": 12289 + }, + { + "epoch": 0.7484090978290655, + "grad_norm": 1.0495274769224527, + "learning_rate": 4.813141996212476e-06, + "loss": 0.4407, + "step": 12290 + }, + { + "epoch": 0.7484699936059435, + "grad_norm": 1.0108350339419736, + "learning_rate": 4.813111725187145e-06, + "loss": 0.3602, + "step": 12291 + }, + { + "epoch": 0.7485308893828213, + "grad_norm": 0.9643979901560995, + "learning_rate": 4.813081451805265e-06, + "loss": 0.4641, + "step": 12292 + }, + { + "epoch": 0.7485917851596992, + "grad_norm": 1.0060289310667294, + "learning_rate": 4.813051176066865e-06, + "loss": 0.382, + "step": 12293 + }, + { + "epoch": 0.748652680936577, + "grad_norm": 0.9880285200518549, + "learning_rate": 4.813020897971977e-06, + "loss": 0.379, + "step": 12294 + }, + { + "epoch": 0.748713576713455, + "grad_norm": 0.8950879926252695, + "learning_rate": 4.812990617520632e-06, + "loss": 0.5283, + "step": 12295 + }, + { + "epoch": 0.7487744724903328, + "grad_norm": 0.9695172381072482, + "learning_rate": 4.812960334712859e-06, + "loss": 0.4346, + "step": 12296 + }, + { + "epoch": 0.7488353682672106, + "grad_norm": 0.9973975300149688, + "learning_rate": 4.812930049548691e-06, + "loss": 0.4207, + "step": 12297 + }, + { + "epoch": 0.7488962640440885, + "grad_norm": 0.9583911738670096, + "learning_rate": 4.812899762028157e-06, + "loss": 0.4307, + "step": 12298 + }, + { + "epoch": 0.7489571598209664, + "grad_norm": 1.0019186592049503, + "learning_rate": 4.81286947215129e-06, + "loss": 0.4189, + "step": 12299 + }, + { + "epoch": 0.7490180555978443, + "grad_norm": 1.1734439175513747, + "learning_rate": 4.812839179918118e-06, + "loss": 0.4192, + "step": 12300 + }, + { + "epoch": 0.7490789513747221, + "grad_norm": 1.0254582852322824, + "learning_rate": 4.812808885328675e-06, + "loss": 0.3789, + "step": 12301 + }, + { + "epoch": 0.7491398471516001, + "grad_norm": 1.0525653251184606, + "learning_rate": 4.81277858838299e-06, + "loss": 0.4243, + "step": 12302 + }, + { + "epoch": 0.7492007429284779, + "grad_norm": 1.1007430531796032, + "learning_rate": 4.812748289081095e-06, + "loss": 0.4519, + "step": 12303 + }, + { + "epoch": 0.7492616387053558, + "grad_norm": 0.9713752325142969, + "learning_rate": 4.812717987423019e-06, + "loss": 0.4324, + "step": 12304 + }, + { + "epoch": 0.7493225344822336, + "grad_norm": 1.0108503752648759, + "learning_rate": 4.812687683408794e-06, + "loss": 0.4025, + "step": 12305 + }, + { + "epoch": 0.7493834302591116, + "grad_norm": 0.9488452085725082, + "learning_rate": 4.8126573770384514e-06, + "loss": 0.4602, + "step": 12306 + }, + { + "epoch": 0.7494443260359894, + "grad_norm": 1.1165617589576224, + "learning_rate": 4.812627068312021e-06, + "loss": 0.3731, + "step": 12307 + }, + { + "epoch": 0.7495052218128673, + "grad_norm": 1.055521247840377, + "learning_rate": 4.812596757229535e-06, + "loss": 0.3727, + "step": 12308 + }, + { + "epoch": 0.7495661175897451, + "grad_norm": 0.975890490555377, + "learning_rate": 4.8125664437910236e-06, + "loss": 0.4281, + "step": 12309 + }, + { + "epoch": 0.7496270133666231, + "grad_norm": 1.0117786538856486, + "learning_rate": 4.812536127996517e-06, + "loss": 0.4074, + "step": 12310 + }, + { + "epoch": 0.7496879091435009, + "grad_norm": 1.0128203270499294, + "learning_rate": 4.8125058098460465e-06, + "loss": 0.4082, + "step": 12311 + }, + { + "epoch": 0.7497488049203788, + "grad_norm": 0.9722004697868167, + "learning_rate": 4.812475489339644e-06, + "loss": 0.4233, + "step": 12312 + }, + { + "epoch": 0.7498097006972566, + "grad_norm": 0.9825128719316545, + "learning_rate": 4.812445166477338e-06, + "loss": 0.4556, + "step": 12313 + }, + { + "epoch": 0.7498705964741346, + "grad_norm": 0.9905440822839195, + "learning_rate": 4.812414841259162e-06, + "loss": 0.4546, + "step": 12314 + }, + { + "epoch": 0.7499314922510124, + "grad_norm": 1.1106274246281742, + "learning_rate": 4.812384513685146e-06, + "loss": 0.3569, + "step": 12315 + }, + { + "epoch": 0.7499923880278903, + "grad_norm": 1.0819797409365086, + "learning_rate": 4.812354183755321e-06, + "loss": 0.3898, + "step": 12316 + }, + { + "epoch": 0.7500532838047681, + "grad_norm": 0.9882773714576718, + "learning_rate": 4.812323851469717e-06, + "loss": 0.4251, + "step": 12317 + }, + { + "epoch": 0.7501141795816461, + "grad_norm": 1.008322597379449, + "learning_rate": 4.8122935168283655e-06, + "loss": 0.375, + "step": 12318 + }, + { + "epoch": 0.7501750753585239, + "grad_norm": 0.9736262993884812, + "learning_rate": 4.812263179831298e-06, + "loss": 0.4431, + "step": 12319 + }, + { + "epoch": 0.7502359711354017, + "grad_norm": 0.9674666698440513, + "learning_rate": 4.812232840478544e-06, + "loss": 0.4071, + "step": 12320 + }, + { + "epoch": 0.7502968669122796, + "grad_norm": 1.0523655580396456, + "learning_rate": 4.812202498770136e-06, + "loss": 0.3711, + "step": 12321 + }, + { + "epoch": 0.7503577626891575, + "grad_norm": 0.9893269333343842, + "learning_rate": 4.812172154706104e-06, + "loss": 0.4121, + "step": 12322 + }, + { + "epoch": 0.7504186584660354, + "grad_norm": 1.0559963658651106, + "learning_rate": 4.8121418082864785e-06, + "loss": 0.4313, + "step": 12323 + }, + { + "epoch": 0.7504795542429132, + "grad_norm": 1.0079017931513619, + "learning_rate": 4.812111459511291e-06, + "loss": 0.4798, + "step": 12324 + }, + { + "epoch": 0.7505404500197911, + "grad_norm": 1.008379518459967, + "learning_rate": 4.812081108380573e-06, + "loss": 0.4533, + "step": 12325 + }, + { + "epoch": 0.750601345796669, + "grad_norm": 0.9476445091355604, + "learning_rate": 4.812050754894355e-06, + "loss": 0.3766, + "step": 12326 + }, + { + "epoch": 0.7506622415735469, + "grad_norm": 0.97398451964911, + "learning_rate": 4.8120203990526675e-06, + "loss": 0.4201, + "step": 12327 + }, + { + "epoch": 0.7507231373504247, + "grad_norm": 0.9915000405914315, + "learning_rate": 4.811990040855542e-06, + "loss": 0.4865, + "step": 12328 + }, + { + "epoch": 0.7507840331273026, + "grad_norm": 0.9872488830916155, + "learning_rate": 4.811959680303009e-06, + "loss": 0.4238, + "step": 12329 + }, + { + "epoch": 0.7508449289041805, + "grad_norm": 1.1087528334327257, + "learning_rate": 4.8119293173950985e-06, + "loss": 0.4181, + "step": 12330 + }, + { + "epoch": 0.7509058246810584, + "grad_norm": 1.1045730614667975, + "learning_rate": 4.811898952131844e-06, + "loss": 0.4776, + "step": 12331 + }, + { + "epoch": 0.7509667204579362, + "grad_norm": 0.9712475237486787, + "learning_rate": 4.811868584513274e-06, + "loss": 0.4022, + "step": 12332 + }, + { + "epoch": 0.7510276162348141, + "grad_norm": 0.9648414724037931, + "learning_rate": 4.81183821453942e-06, + "loss": 0.4199, + "step": 12333 + }, + { + "epoch": 0.751088512011692, + "grad_norm": 0.9705026206546331, + "learning_rate": 4.8118078422103146e-06, + "loss": 0.4709, + "step": 12334 + }, + { + "epoch": 0.7511494077885699, + "grad_norm": 0.9149588499951282, + "learning_rate": 4.811777467525986e-06, + "loss": 0.5017, + "step": 12335 + }, + { + "epoch": 0.7512103035654477, + "grad_norm": 1.0265765597572774, + "learning_rate": 4.8117470904864675e-06, + "loss": 0.424, + "step": 12336 + }, + { + "epoch": 0.7512711993423256, + "grad_norm": 0.9793874969733943, + "learning_rate": 4.811716711091789e-06, + "loss": 0.4184, + "step": 12337 + }, + { + "epoch": 0.7513320951192035, + "grad_norm": 0.93073238982874, + "learning_rate": 4.811686329341981e-06, + "loss": 0.4506, + "step": 12338 + }, + { + "epoch": 0.7513929908960814, + "grad_norm": 1.0220831307647777, + "learning_rate": 4.811655945237076e-06, + "loss": 0.4127, + "step": 12339 + }, + { + "epoch": 0.7514538866729592, + "grad_norm": 1.1175867862778524, + "learning_rate": 4.811625558777103e-06, + "loss": 0.445, + "step": 12340 + }, + { + "epoch": 0.751514782449837, + "grad_norm": 1.0358051379922266, + "learning_rate": 4.811595169962094e-06, + "loss": 0.3993, + "step": 12341 + }, + { + "epoch": 0.751575678226715, + "grad_norm": 0.9977881447989416, + "learning_rate": 4.811564778792081e-06, + "loss": 0.4288, + "step": 12342 + }, + { + "epoch": 0.7516365740035928, + "grad_norm": 0.972948132920042, + "learning_rate": 4.811534385267093e-06, + "loss": 0.4144, + "step": 12343 + }, + { + "epoch": 0.7516974697804707, + "grad_norm": 0.977304837372828, + "learning_rate": 4.811503989387161e-06, + "loss": 0.3951, + "step": 12344 + }, + { + "epoch": 0.7517583655573487, + "grad_norm": 0.9263947534957987, + "learning_rate": 4.8114735911523194e-06, + "loss": 0.4432, + "step": 12345 + }, + { + "epoch": 0.7518192613342265, + "grad_norm": 1.052108067613392, + "learning_rate": 4.811443190562595e-06, + "loss": 0.3971, + "step": 12346 + }, + { + "epoch": 0.7518801571111043, + "grad_norm": 1.020184665545349, + "learning_rate": 4.811412787618019e-06, + "loss": 0.4099, + "step": 12347 + }, + { + "epoch": 0.7519410528879822, + "grad_norm": 0.9945935176275129, + "learning_rate": 4.811382382318626e-06, + "loss": 0.4198, + "step": 12348 + }, + { + "epoch": 0.7520019486648601, + "grad_norm": 1.033576862450199, + "learning_rate": 4.811351974664443e-06, + "loss": 0.4689, + "step": 12349 + }, + { + "epoch": 0.752062844441738, + "grad_norm": 1.0568862895754938, + "learning_rate": 4.811321564655503e-06, + "loss": 0.4182, + "step": 12350 + }, + { + "epoch": 0.7521237402186158, + "grad_norm": 1.0498217610793703, + "learning_rate": 4.811291152291838e-06, + "loss": 0.4168, + "step": 12351 + }, + { + "epoch": 0.7521846359954937, + "grad_norm": 1.0484903319991397, + "learning_rate": 4.811260737573476e-06, + "loss": 0.491, + "step": 12352 + }, + { + "epoch": 0.7522455317723716, + "grad_norm": 1.0617869080382007, + "learning_rate": 4.8112303205004504e-06, + "loss": 0.3775, + "step": 12353 + }, + { + "epoch": 0.7523064275492495, + "grad_norm": 1.030532633564949, + "learning_rate": 4.811199901072792e-06, + "loss": 0.3964, + "step": 12354 + }, + { + "epoch": 0.7523673233261273, + "grad_norm": 1.0021773551409783, + "learning_rate": 4.8111694792905295e-06, + "loss": 0.429, + "step": 12355 + }, + { + "epoch": 0.7524282191030052, + "grad_norm": 0.893616106407899, + "learning_rate": 4.811139055153697e-06, + "loss": 0.4899, + "step": 12356 + }, + { + "epoch": 0.7524891148798831, + "grad_norm": 0.9872002398409527, + "learning_rate": 4.811108628662323e-06, + "loss": 0.3667, + "step": 12357 + }, + { + "epoch": 0.752550010656761, + "grad_norm": 0.9900281059047101, + "learning_rate": 4.8110781998164404e-06, + "loss": 0.4571, + "step": 12358 + }, + { + "epoch": 0.7526109064336388, + "grad_norm": 0.9732979317561089, + "learning_rate": 4.811047768616079e-06, + "loss": 0.403, + "step": 12359 + }, + { + "epoch": 0.7526718022105167, + "grad_norm": 1.001959075045483, + "learning_rate": 4.811017335061271e-06, + "loss": 0.4197, + "step": 12360 + }, + { + "epoch": 0.7527326979873946, + "grad_norm": 0.984923937017285, + "learning_rate": 4.810986899152046e-06, + "loss": 0.3433, + "step": 12361 + }, + { + "epoch": 0.7527935937642725, + "grad_norm": 1.0569738170053693, + "learning_rate": 4.810956460888435e-06, + "loss": 0.455, + "step": 12362 + }, + { + "epoch": 0.7528544895411503, + "grad_norm": 0.9864381346690697, + "learning_rate": 4.81092602027047e-06, + "loss": 0.3991, + "step": 12363 + }, + { + "epoch": 0.7529153853180282, + "grad_norm": 1.0633455081951562, + "learning_rate": 4.810895577298182e-06, + "loss": 0.4238, + "step": 12364 + }, + { + "epoch": 0.7529762810949061, + "grad_norm": 1.03396952752175, + "learning_rate": 4.810865131971602e-06, + "loss": 0.3746, + "step": 12365 + }, + { + "epoch": 0.753037176871784, + "grad_norm": 1.0141009874069113, + "learning_rate": 4.810834684290759e-06, + "loss": 0.4286, + "step": 12366 + }, + { + "epoch": 0.7530980726486618, + "grad_norm": 0.941625234515785, + "learning_rate": 4.810804234255687e-06, + "loss": 0.4758, + "step": 12367 + }, + { + "epoch": 0.7531589684255396, + "grad_norm": 1.037452207724749, + "learning_rate": 4.810773781866415e-06, + "loss": 0.397, + "step": 12368 + }, + { + "epoch": 0.7532198642024176, + "grad_norm": 1.018397586975057, + "learning_rate": 4.810743327122975e-06, + "loss": 0.4204, + "step": 12369 + }, + { + "epoch": 0.7532807599792954, + "grad_norm": 0.9378034611504955, + "learning_rate": 4.810712870025398e-06, + "loss": 0.4671, + "step": 12370 + }, + { + "epoch": 0.7533416557561733, + "grad_norm": 1.0739268774281843, + "learning_rate": 4.810682410573715e-06, + "loss": 0.361, + "step": 12371 + }, + { + "epoch": 0.7534025515330511, + "grad_norm": 0.9949907741936609, + "learning_rate": 4.810651948767956e-06, + "loss": 0.4002, + "step": 12372 + }, + { + "epoch": 0.7534634473099291, + "grad_norm": 0.9982543422215596, + "learning_rate": 4.810621484608153e-06, + "loss": 0.3998, + "step": 12373 + }, + { + "epoch": 0.7535243430868069, + "grad_norm": 0.9284246453333874, + "learning_rate": 4.810591018094337e-06, + "loss": 0.4415, + "step": 12374 + }, + { + "epoch": 0.7535852388636848, + "grad_norm": 0.9852965101209674, + "learning_rate": 4.81056054922654e-06, + "loss": 0.4009, + "step": 12375 + }, + { + "epoch": 0.7536461346405626, + "grad_norm": 0.9380978223011259, + "learning_rate": 4.81053007800479e-06, + "loss": 0.4506, + "step": 12376 + }, + { + "epoch": 0.7537070304174406, + "grad_norm": 0.981059935437083, + "learning_rate": 4.810499604429121e-06, + "loss": 0.3863, + "step": 12377 + }, + { + "epoch": 0.7537679261943184, + "grad_norm": 1.1655371675704194, + "learning_rate": 4.810469128499563e-06, + "loss": 0.3674, + "step": 12378 + }, + { + "epoch": 0.7538288219711963, + "grad_norm": 1.028522873005049, + "learning_rate": 4.8104386502161475e-06, + "loss": 0.4415, + "step": 12379 + }, + { + "epoch": 0.7538897177480741, + "grad_norm": 1.0532465855329123, + "learning_rate": 4.810408169578905e-06, + "loss": 0.4153, + "step": 12380 + }, + { + "epoch": 0.7539506135249521, + "grad_norm": 1.014709008656167, + "learning_rate": 4.810377686587866e-06, + "loss": 0.4534, + "step": 12381 + }, + { + "epoch": 0.7540115093018299, + "grad_norm": 1.1025260086615374, + "learning_rate": 4.810347201243063e-06, + "loss": 0.3875, + "step": 12382 + }, + { + "epoch": 0.7540724050787078, + "grad_norm": 1.0493117414273294, + "learning_rate": 4.810316713544526e-06, + "loss": 0.3774, + "step": 12383 + }, + { + "epoch": 0.7541333008555857, + "grad_norm": 0.9586791937438466, + "learning_rate": 4.810286223492286e-06, + "loss": 0.4465, + "step": 12384 + }, + { + "epoch": 0.7541941966324636, + "grad_norm": 1.0482966165655714, + "learning_rate": 4.8102557310863744e-06, + "loss": 0.4374, + "step": 12385 + }, + { + "epoch": 0.7542550924093414, + "grad_norm": 0.9736165763863719, + "learning_rate": 4.810225236326822e-06, + "loss": 0.406, + "step": 12386 + }, + { + "epoch": 0.7543159881862193, + "grad_norm": 0.9678778099739466, + "learning_rate": 4.810194739213661e-06, + "loss": 0.4033, + "step": 12387 + }, + { + "epoch": 0.7543768839630972, + "grad_norm": 1.0465439302259196, + "learning_rate": 4.810164239746922e-06, + "loss": 0.3886, + "step": 12388 + }, + { + "epoch": 0.7544377797399751, + "grad_norm": 1.0128325341382574, + "learning_rate": 4.810133737926635e-06, + "loss": 0.3722, + "step": 12389 + }, + { + "epoch": 0.7544986755168529, + "grad_norm": 0.9610734686413759, + "learning_rate": 4.810103233752832e-06, + "loss": 0.4467, + "step": 12390 + }, + { + "epoch": 0.7545595712937307, + "grad_norm": 0.9543063465047177, + "learning_rate": 4.8100727272255435e-06, + "loss": 0.3999, + "step": 12391 + }, + { + "epoch": 0.7546204670706087, + "grad_norm": 1.0244616762967285, + "learning_rate": 4.810042218344802e-06, + "loss": 0.3721, + "step": 12392 + }, + { + "epoch": 0.7546813628474865, + "grad_norm": 1.0382764586077218, + "learning_rate": 4.810011707110636e-06, + "loss": 0.4016, + "step": 12393 + }, + { + "epoch": 0.7547422586243644, + "grad_norm": 0.9768761016618149, + "learning_rate": 4.809981193523079e-06, + "loss": 0.3856, + "step": 12394 + }, + { + "epoch": 0.7548031544012422, + "grad_norm": 0.9151653211657594, + "learning_rate": 4.809950677582161e-06, + "loss": 0.4534, + "step": 12395 + }, + { + "epoch": 0.7548640501781202, + "grad_norm": 0.9173508294997843, + "learning_rate": 4.809920159287913e-06, + "loss": 0.4876, + "step": 12396 + }, + { + "epoch": 0.754924945954998, + "grad_norm": 0.9606646898535601, + "learning_rate": 4.809889638640367e-06, + "loss": 0.4027, + "step": 12397 + }, + { + "epoch": 0.7549858417318759, + "grad_norm": 0.9915439900351306, + "learning_rate": 4.8098591156395526e-06, + "loss": 0.4347, + "step": 12398 + }, + { + "epoch": 0.7550467375087537, + "grad_norm": 1.0079857813052715, + "learning_rate": 4.8098285902855025e-06, + "loss": 0.4212, + "step": 12399 + }, + { + "epoch": 0.7551076332856317, + "grad_norm": 1.1036615503831404, + "learning_rate": 4.809798062578247e-06, + "loss": 0.5055, + "step": 12400 + }, + { + "epoch": 0.7551685290625095, + "grad_norm": 1.07655150956737, + "learning_rate": 4.809767532517817e-06, + "loss": 0.4122, + "step": 12401 + }, + { + "epoch": 0.7552294248393874, + "grad_norm": 1.0094064370224898, + "learning_rate": 4.809737000104244e-06, + "loss": 0.4492, + "step": 12402 + }, + { + "epoch": 0.7552903206162652, + "grad_norm": 0.987737826916768, + "learning_rate": 4.809706465337559e-06, + "loss": 0.466, + "step": 12403 + }, + { + "epoch": 0.7553512163931432, + "grad_norm": 0.9380185134940173, + "learning_rate": 4.809675928217793e-06, + "loss": 0.4492, + "step": 12404 + }, + { + "epoch": 0.755412112170021, + "grad_norm": 1.0034409568199456, + "learning_rate": 4.809645388744977e-06, + "loss": 0.5012, + "step": 12405 + }, + { + "epoch": 0.7554730079468989, + "grad_norm": 1.0473521501446477, + "learning_rate": 4.809614846919142e-06, + "loss": 0.4403, + "step": 12406 + }, + { + "epoch": 0.7555339037237767, + "grad_norm": 1.0726584562474966, + "learning_rate": 4.80958430274032e-06, + "loss": 0.3484, + "step": 12407 + }, + { + "epoch": 0.7555947995006547, + "grad_norm": 0.9549203599400969, + "learning_rate": 4.809553756208541e-06, + "loss": 0.4546, + "step": 12408 + }, + { + "epoch": 0.7556556952775325, + "grad_norm": 0.9720756285337271, + "learning_rate": 4.809523207323837e-06, + "loss": 0.4075, + "step": 12409 + }, + { + "epoch": 0.7557165910544104, + "grad_norm": 1.0062419997835073, + "learning_rate": 4.809492656086239e-06, + "loss": 0.3827, + "step": 12410 + }, + { + "epoch": 0.7557774868312882, + "grad_norm": 0.9054141776911234, + "learning_rate": 4.809462102495778e-06, + "loss": 0.4216, + "step": 12411 + }, + { + "epoch": 0.7558383826081662, + "grad_norm": 0.9979617683893383, + "learning_rate": 4.809431546552484e-06, + "loss": 0.344, + "step": 12412 + }, + { + "epoch": 0.755899278385044, + "grad_norm": 1.0434306839784375, + "learning_rate": 4.809400988256391e-06, + "loss": 0.4049, + "step": 12413 + }, + { + "epoch": 0.7559601741619218, + "grad_norm": 0.9421426280251548, + "learning_rate": 4.809370427607527e-06, + "loss": 0.4015, + "step": 12414 + }, + { + "epoch": 0.7560210699387997, + "grad_norm": 1.0558876369311696, + "learning_rate": 4.809339864605924e-06, + "loss": 0.4436, + "step": 12415 + }, + { + "epoch": 0.7560819657156777, + "grad_norm": 0.9415055221287417, + "learning_rate": 4.809309299251614e-06, + "loss": 0.4456, + "step": 12416 + }, + { + "epoch": 0.7561428614925555, + "grad_norm": 0.9853949796421223, + "learning_rate": 4.8092787315446285e-06, + "loss": 0.4884, + "step": 12417 + }, + { + "epoch": 0.7562037572694333, + "grad_norm": 1.0171121944221688, + "learning_rate": 4.809248161484998e-06, + "loss": 0.3879, + "step": 12418 + }, + { + "epoch": 0.7562646530463112, + "grad_norm": 1.0327613204097648, + "learning_rate": 4.8092175890727515e-06, + "loss": 0.4317, + "step": 12419 + }, + { + "epoch": 0.7563255488231891, + "grad_norm": 1.056997949950676, + "learning_rate": 4.809187014307924e-06, + "loss": 0.4515, + "step": 12420 + }, + { + "epoch": 0.756386444600067, + "grad_norm": 1.0867847869669407, + "learning_rate": 4.809156437190543e-06, + "loss": 0.3409, + "step": 12421 + }, + { + "epoch": 0.7564473403769448, + "grad_norm": 0.9542082885043561, + "learning_rate": 4.809125857720643e-06, + "loss": 0.4375, + "step": 12422 + }, + { + "epoch": 0.7565082361538227, + "grad_norm": 1.0639430123930549, + "learning_rate": 4.809095275898253e-06, + "loss": 0.3855, + "step": 12423 + }, + { + "epoch": 0.7565691319307006, + "grad_norm": 0.9846381721049937, + "learning_rate": 4.809064691723405e-06, + "loss": 0.4319, + "step": 12424 + }, + { + "epoch": 0.7566300277075785, + "grad_norm": 1.0332733893802126, + "learning_rate": 4.80903410519613e-06, + "loss": 0.4504, + "step": 12425 + }, + { + "epoch": 0.7566909234844563, + "grad_norm": 1.1395892109282388, + "learning_rate": 4.809003516316458e-06, + "loss": 0.4286, + "step": 12426 + }, + { + "epoch": 0.7567518192613343, + "grad_norm": 0.9746244079223141, + "learning_rate": 4.808972925084423e-06, + "loss": 0.4336, + "step": 12427 + }, + { + "epoch": 0.7568127150382121, + "grad_norm": 1.0738085221173823, + "learning_rate": 4.808942331500053e-06, + "loss": 0.404, + "step": 12428 + }, + { + "epoch": 0.75687361081509, + "grad_norm": 1.0078212766286887, + "learning_rate": 4.808911735563381e-06, + "loss": 0.4095, + "step": 12429 + }, + { + "epoch": 0.7569345065919678, + "grad_norm": 0.978292945184026, + "learning_rate": 4.808881137274437e-06, + "loss": 0.4442, + "step": 12430 + }, + { + "epoch": 0.7569954023688458, + "grad_norm": 1.031753008014521, + "learning_rate": 4.808850536633254e-06, + "loss": 0.3744, + "step": 12431 + }, + { + "epoch": 0.7570562981457236, + "grad_norm": 1.0103909253409988, + "learning_rate": 4.808819933639862e-06, + "loss": 0.3972, + "step": 12432 + }, + { + "epoch": 0.7571171939226015, + "grad_norm": 1.0434079017171214, + "learning_rate": 4.808789328294291e-06, + "loss": 0.4155, + "step": 12433 + }, + { + "epoch": 0.7571780896994793, + "grad_norm": 1.0644222843695381, + "learning_rate": 4.808758720596574e-06, + "loss": 0.4138, + "step": 12434 + }, + { + "epoch": 0.7572389854763573, + "grad_norm": 1.0507465186279414, + "learning_rate": 4.808728110546743e-06, + "loss": 0.4238, + "step": 12435 + }, + { + "epoch": 0.7572998812532351, + "grad_norm": 1.0187775886036936, + "learning_rate": 4.808697498144827e-06, + "loss": 0.4259, + "step": 12436 + }, + { + "epoch": 0.757360777030113, + "grad_norm": 1.1550285084886995, + "learning_rate": 4.808666883390858e-06, + "loss": 0.4163, + "step": 12437 + }, + { + "epoch": 0.7574216728069908, + "grad_norm": 1.002057694880759, + "learning_rate": 4.8086362662848666e-06, + "loss": 0.4213, + "step": 12438 + }, + { + "epoch": 0.7574825685838688, + "grad_norm": 0.9750664069789279, + "learning_rate": 4.808605646826885e-06, + "loss": 0.4189, + "step": 12439 + }, + { + "epoch": 0.7575434643607466, + "grad_norm": 1.1544956165189932, + "learning_rate": 4.8085750250169436e-06, + "loss": 0.3495, + "step": 12440 + }, + { + "epoch": 0.7576043601376244, + "grad_norm": 1.0013812526029884, + "learning_rate": 4.808544400855074e-06, + "loss": 0.4399, + "step": 12441 + }, + { + "epoch": 0.7576652559145023, + "grad_norm": 1.0369536403457245, + "learning_rate": 4.808513774341308e-06, + "loss": 0.3675, + "step": 12442 + }, + { + "epoch": 0.7577261516913802, + "grad_norm": 0.9897815885637692, + "learning_rate": 4.808483145475675e-06, + "loss": 0.4049, + "step": 12443 + }, + { + "epoch": 0.7577870474682581, + "grad_norm": 1.0186293592163977, + "learning_rate": 4.808452514258208e-06, + "loss": 0.4025, + "step": 12444 + }, + { + "epoch": 0.7578479432451359, + "grad_norm": 0.9726095202325272, + "learning_rate": 4.808421880688939e-06, + "loss": 0.4491, + "step": 12445 + }, + { + "epoch": 0.7579088390220138, + "grad_norm": 1.0679540464847368, + "learning_rate": 4.808391244767896e-06, + "loss": 0.3401, + "step": 12446 + }, + { + "epoch": 0.7579697347988917, + "grad_norm": 0.950873853816483, + "learning_rate": 4.808360606495112e-06, + "loss": 0.4436, + "step": 12447 + }, + { + "epoch": 0.7580306305757696, + "grad_norm": 1.0256805836796974, + "learning_rate": 4.808329965870619e-06, + "loss": 0.3845, + "step": 12448 + }, + { + "epoch": 0.7580915263526474, + "grad_norm": 1.0212846656414925, + "learning_rate": 4.8082993228944465e-06, + "loss": 0.3308, + "step": 12449 + }, + { + "epoch": 0.7581524221295253, + "grad_norm": 1.0176175489453259, + "learning_rate": 4.808268677566628e-06, + "loss": 0.4649, + "step": 12450 + }, + { + "epoch": 0.7582133179064032, + "grad_norm": 1.031974078067242, + "learning_rate": 4.808238029887192e-06, + "loss": 0.4415, + "step": 12451 + }, + { + "epoch": 0.7582742136832811, + "grad_norm": 0.9296084994985522, + "learning_rate": 4.808207379856172e-06, + "loss": 0.4035, + "step": 12452 + }, + { + "epoch": 0.7583351094601589, + "grad_norm": 0.952298001745192, + "learning_rate": 4.808176727473598e-06, + "loss": 0.4611, + "step": 12453 + }, + { + "epoch": 0.7583960052370368, + "grad_norm": 1.0448952397949351, + "learning_rate": 4.808146072739501e-06, + "loss": 0.379, + "step": 12454 + }, + { + "epoch": 0.7584569010139147, + "grad_norm": 0.9077611806458112, + "learning_rate": 4.808115415653913e-06, + "loss": 0.4589, + "step": 12455 + }, + { + "epoch": 0.7585177967907926, + "grad_norm": 1.0752327039062297, + "learning_rate": 4.808084756216866e-06, + "loss": 0.4036, + "step": 12456 + }, + { + "epoch": 0.7585786925676704, + "grad_norm": 0.9538466104683283, + "learning_rate": 4.808054094428389e-06, + "loss": 0.4155, + "step": 12457 + }, + { + "epoch": 0.7586395883445483, + "grad_norm": 0.9727105889116691, + "learning_rate": 4.8080234302885156e-06, + "loss": 0.4299, + "step": 12458 + }, + { + "epoch": 0.7587004841214262, + "grad_norm": 0.975073163452236, + "learning_rate": 4.807992763797275e-06, + "loss": 0.5055, + "step": 12459 + }, + { + "epoch": 0.7587613798983041, + "grad_norm": 0.9632715269294374, + "learning_rate": 4.8079620949547005e-06, + "loss": 0.442, + "step": 12460 + }, + { + "epoch": 0.7588222756751819, + "grad_norm": 0.9904228318005891, + "learning_rate": 4.807931423760821e-06, + "loss": 0.4501, + "step": 12461 + }, + { + "epoch": 0.7588831714520597, + "grad_norm": 0.986932505510325, + "learning_rate": 4.8079007502156695e-06, + "loss": 0.3689, + "step": 12462 + }, + { + "epoch": 0.7589440672289377, + "grad_norm": 0.9790320170718706, + "learning_rate": 4.807870074319276e-06, + "loss": 0.4643, + "step": 12463 + }, + { + "epoch": 0.7590049630058155, + "grad_norm": 1.0019039672192838, + "learning_rate": 4.807839396071673e-06, + "loss": 0.3783, + "step": 12464 + }, + { + "epoch": 0.7590658587826934, + "grad_norm": 1.0311013019089268, + "learning_rate": 4.807808715472891e-06, + "loss": 0.3935, + "step": 12465 + }, + { + "epoch": 0.7591267545595713, + "grad_norm": 1.066117657206384, + "learning_rate": 4.807778032522961e-06, + "loss": 0.4024, + "step": 12466 + }, + { + "epoch": 0.7591876503364492, + "grad_norm": 1.0631956745896582, + "learning_rate": 4.807747347221916e-06, + "loss": 0.3913, + "step": 12467 + }, + { + "epoch": 0.759248546113327, + "grad_norm": 0.9744231628313069, + "learning_rate": 4.807716659569786e-06, + "loss": 0.4456, + "step": 12468 + }, + { + "epoch": 0.7593094418902049, + "grad_norm": 0.9964454948781631, + "learning_rate": 4.8076859695666015e-06, + "loss": 0.4201, + "step": 12469 + }, + { + "epoch": 0.7593703376670828, + "grad_norm": 1.0217604036600902, + "learning_rate": 4.807655277212394e-06, + "loss": 0.4646, + "step": 12470 + }, + { + "epoch": 0.7594312334439607, + "grad_norm": 0.988540804467585, + "learning_rate": 4.8076245825071965e-06, + "loss": 0.5213, + "step": 12471 + }, + { + "epoch": 0.7594921292208385, + "grad_norm": 0.9932404245243046, + "learning_rate": 4.807593885451038e-06, + "loss": 0.348, + "step": 12472 + }, + { + "epoch": 0.7595530249977164, + "grad_norm": 1.0673801623267165, + "learning_rate": 4.807563186043951e-06, + "loss": 0.4421, + "step": 12473 + }, + { + "epoch": 0.7596139207745943, + "grad_norm": 1.1291387352375988, + "learning_rate": 4.8075324842859674e-06, + "loss": 0.4362, + "step": 12474 + }, + { + "epoch": 0.7596748165514722, + "grad_norm": 1.0329497160142884, + "learning_rate": 4.807501780177117e-06, + "loss": 0.3985, + "step": 12475 + }, + { + "epoch": 0.75973571232835, + "grad_norm": 1.0291306979400154, + "learning_rate": 4.8074710737174315e-06, + "loss": 0.4251, + "step": 12476 + }, + { + "epoch": 0.7597966081052279, + "grad_norm": 0.9721994179835373, + "learning_rate": 4.807440364906944e-06, + "loss": 0.4401, + "step": 12477 + }, + { + "epoch": 0.7598575038821058, + "grad_norm": 1.0647018849560177, + "learning_rate": 4.807409653745683e-06, + "loss": 0.3856, + "step": 12478 + }, + { + "epoch": 0.7599183996589837, + "grad_norm": 0.9224753561674639, + "learning_rate": 4.8073789402336805e-06, + "loss": 0.4555, + "step": 12479 + }, + { + "epoch": 0.7599792954358615, + "grad_norm": 1.0315393310535683, + "learning_rate": 4.807348224370969e-06, + "loss": 0.3764, + "step": 12480 + }, + { + "epoch": 0.7600401912127394, + "grad_norm": 1.0174156638175613, + "learning_rate": 4.807317506157579e-06, + "loss": 0.4212, + "step": 12481 + }, + { + "epoch": 0.7601010869896173, + "grad_norm": 1.0383442640282994, + "learning_rate": 4.807286785593542e-06, + "loss": 0.4088, + "step": 12482 + }, + { + "epoch": 0.7601619827664952, + "grad_norm": 0.9199307246791882, + "learning_rate": 4.807256062678889e-06, + "loss": 0.466, + "step": 12483 + }, + { + "epoch": 0.760222878543373, + "grad_norm": 0.9583357898731003, + "learning_rate": 4.807225337413651e-06, + "loss": 0.4656, + "step": 12484 + }, + { + "epoch": 0.7602837743202508, + "grad_norm": 0.9836466558978731, + "learning_rate": 4.807194609797861e-06, + "loss": 0.436, + "step": 12485 + }, + { + "epoch": 0.7603446700971288, + "grad_norm": 0.9239764502816191, + "learning_rate": 4.807163879831548e-06, + "loss": 0.4073, + "step": 12486 + }, + { + "epoch": 0.7604055658740067, + "grad_norm": 0.998785434746075, + "learning_rate": 4.8071331475147455e-06, + "loss": 0.381, + "step": 12487 + }, + { + "epoch": 0.7604664616508845, + "grad_norm": 0.9617777312170951, + "learning_rate": 4.807102412847483e-06, + "loss": 0.4301, + "step": 12488 + }, + { + "epoch": 0.7605273574277623, + "grad_norm": 0.9448727493603354, + "learning_rate": 4.8070716758297915e-06, + "loss": 0.4744, + "step": 12489 + }, + { + "epoch": 0.7605882532046403, + "grad_norm": 1.1172301028525833, + "learning_rate": 4.807040936461705e-06, + "loss": 0.3627, + "step": 12490 + }, + { + "epoch": 0.7606491489815181, + "grad_norm": 1.0036954281770427, + "learning_rate": 4.807010194743253e-06, + "loss": 0.377, + "step": 12491 + }, + { + "epoch": 0.760710044758396, + "grad_norm": 0.9753131853087437, + "learning_rate": 4.806979450674467e-06, + "loss": 0.4685, + "step": 12492 + }, + { + "epoch": 0.7607709405352738, + "grad_norm": 1.035638880791734, + "learning_rate": 4.806948704255377e-06, + "loss": 0.3879, + "step": 12493 + }, + { + "epoch": 0.7608318363121518, + "grad_norm": 0.9917964992295697, + "learning_rate": 4.806917955486017e-06, + "loss": 0.4438, + "step": 12494 + }, + { + "epoch": 0.7608927320890296, + "grad_norm": 1.0095410093128676, + "learning_rate": 4.806887204366416e-06, + "loss": 0.397, + "step": 12495 + }, + { + "epoch": 0.7609536278659075, + "grad_norm": 1.0024203175148279, + "learning_rate": 4.806856450896608e-06, + "loss": 0.4715, + "step": 12496 + }, + { + "epoch": 0.7610145236427853, + "grad_norm": 0.950035790754671, + "learning_rate": 4.80682569507662e-06, + "loss": 0.4135, + "step": 12497 + }, + { + "epoch": 0.7610754194196633, + "grad_norm": 0.9351053649094054, + "learning_rate": 4.8067949369064884e-06, + "loss": 0.3942, + "step": 12498 + }, + { + "epoch": 0.7611363151965411, + "grad_norm": 1.0382738049320923, + "learning_rate": 4.80676417638624e-06, + "loss": 0.4489, + "step": 12499 + }, + { + "epoch": 0.761197210973419, + "grad_norm": 0.9598869068429333, + "learning_rate": 4.806733413515909e-06, + "loss": 0.3781, + "step": 12500 + }, + { + "epoch": 0.7612581067502968, + "grad_norm": 1.0881799881492675, + "learning_rate": 4.806702648295527e-06, + "loss": 0.3647, + "step": 12501 + }, + { + "epoch": 0.7613190025271748, + "grad_norm": 0.9736240598964426, + "learning_rate": 4.8066718807251234e-06, + "loss": 0.4529, + "step": 12502 + }, + { + "epoch": 0.7613798983040526, + "grad_norm": 1.0246996244671531, + "learning_rate": 4.8066411108047305e-06, + "loss": 0.3671, + "step": 12503 + }, + { + "epoch": 0.7614407940809305, + "grad_norm": 0.9237087715298364, + "learning_rate": 4.806610338534379e-06, + "loss": 0.4983, + "step": 12504 + }, + { + "epoch": 0.7615016898578083, + "grad_norm": 0.9302584621988275, + "learning_rate": 4.806579563914102e-06, + "loss": 0.4089, + "step": 12505 + }, + { + "epoch": 0.7615625856346863, + "grad_norm": 1.061618702504618, + "learning_rate": 4.806548786943929e-06, + "loss": 0.4121, + "step": 12506 + }, + { + "epoch": 0.7616234814115641, + "grad_norm": 1.0236360654147305, + "learning_rate": 4.806518007623892e-06, + "loss": 0.4143, + "step": 12507 + }, + { + "epoch": 0.761684377188442, + "grad_norm": 0.9683428414613496, + "learning_rate": 4.806487225954023e-06, + "loss": 0.4566, + "step": 12508 + }, + { + "epoch": 0.7617452729653199, + "grad_norm": 1.0217545949980338, + "learning_rate": 4.806456441934351e-06, + "loss": 0.462, + "step": 12509 + }, + { + "epoch": 0.7618061687421978, + "grad_norm": 0.9826687895110144, + "learning_rate": 4.8064256555649115e-06, + "loss": 0.4029, + "step": 12510 + }, + { + "epoch": 0.7618670645190756, + "grad_norm": 0.9880433588099539, + "learning_rate": 4.806394866845733e-06, + "loss": 0.5184, + "step": 12511 + }, + { + "epoch": 0.7619279602959534, + "grad_norm": 0.9591118756557824, + "learning_rate": 4.806364075776847e-06, + "loss": 0.4168, + "step": 12512 + }, + { + "epoch": 0.7619888560728314, + "grad_norm": 1.0944445204437943, + "learning_rate": 4.806333282358284e-06, + "loss": 0.3199, + "step": 12513 + }, + { + "epoch": 0.7620497518497092, + "grad_norm": 0.9568371004221425, + "learning_rate": 4.806302486590078e-06, + "loss": 0.4028, + "step": 12514 + }, + { + "epoch": 0.7621106476265871, + "grad_norm": 1.0297378511816715, + "learning_rate": 4.806271688472259e-06, + "loss": 0.4713, + "step": 12515 + }, + { + "epoch": 0.7621715434034649, + "grad_norm": 1.1341178367689082, + "learning_rate": 4.806240888004858e-06, + "loss": 0.3857, + "step": 12516 + }, + { + "epoch": 0.7622324391803429, + "grad_norm": 0.9508147549880874, + "learning_rate": 4.8062100851879065e-06, + "loss": 0.4131, + "step": 12517 + }, + { + "epoch": 0.7622933349572207, + "grad_norm": 0.9687241582086175, + "learning_rate": 4.806179280021436e-06, + "loss": 0.4907, + "step": 12518 + }, + { + "epoch": 0.7623542307340986, + "grad_norm": 0.9431386366888977, + "learning_rate": 4.806148472505479e-06, + "loss": 0.4098, + "step": 12519 + }, + { + "epoch": 0.7624151265109764, + "grad_norm": 0.9871075682341156, + "learning_rate": 4.806117662640065e-06, + "loss": 0.3746, + "step": 12520 + }, + { + "epoch": 0.7624760222878544, + "grad_norm": 0.9527065265086795, + "learning_rate": 4.806086850425226e-06, + "loss": 0.4387, + "step": 12521 + }, + { + "epoch": 0.7625369180647322, + "grad_norm": 1.0309216870945175, + "learning_rate": 4.806056035860994e-06, + "loss": 0.4515, + "step": 12522 + }, + { + "epoch": 0.7625978138416101, + "grad_norm": 1.011335155409051, + "learning_rate": 4.806025218947401e-06, + "loss": 0.3969, + "step": 12523 + }, + { + "epoch": 0.7626587096184879, + "grad_norm": 0.9495985284555469, + "learning_rate": 4.805994399684476e-06, + "loss": 0.424, + "step": 12524 + }, + { + "epoch": 0.7627196053953659, + "grad_norm": 1.0452696968583866, + "learning_rate": 4.805963578072253e-06, + "loss": 0.3665, + "step": 12525 + }, + { + "epoch": 0.7627805011722437, + "grad_norm": 0.9866351720325519, + "learning_rate": 4.8059327541107614e-06, + "loss": 0.4324, + "step": 12526 + }, + { + "epoch": 0.7628413969491216, + "grad_norm": 1.0379946719183464, + "learning_rate": 4.805901927800034e-06, + "loss": 0.4112, + "step": 12527 + }, + { + "epoch": 0.7629022927259994, + "grad_norm": 0.9847418633513918, + "learning_rate": 4.805871099140101e-06, + "loss": 0.4646, + "step": 12528 + }, + { + "epoch": 0.7629631885028774, + "grad_norm": 0.9338497056402852, + "learning_rate": 4.805840268130996e-06, + "loss": 0.5008, + "step": 12529 + }, + { + "epoch": 0.7630240842797552, + "grad_norm": 1.0955494311884408, + "learning_rate": 4.805809434772747e-06, + "loss": 0.4354, + "step": 12530 + }, + { + "epoch": 0.763084980056633, + "grad_norm": 1.0707346527035655, + "learning_rate": 4.805778599065388e-06, + "loss": 0.4138, + "step": 12531 + }, + { + "epoch": 0.7631458758335109, + "grad_norm": 0.9687917382345025, + "learning_rate": 4.80574776100895e-06, + "loss": 0.41, + "step": 12532 + }, + { + "epoch": 0.7632067716103889, + "grad_norm": 1.0395322235396829, + "learning_rate": 4.8057169206034635e-06, + "loss": 0.4605, + "step": 12533 + }, + { + "epoch": 0.7632676673872667, + "grad_norm": 0.9467360326985292, + "learning_rate": 4.8056860778489614e-06, + "loss": 0.473, + "step": 12534 + }, + { + "epoch": 0.7633285631641445, + "grad_norm": 0.9871966171856148, + "learning_rate": 4.805655232745474e-06, + "loss": 0.4302, + "step": 12535 + }, + { + "epoch": 0.7633894589410224, + "grad_norm": 1.0423043732348023, + "learning_rate": 4.805624385293033e-06, + "loss": 0.4247, + "step": 12536 + }, + { + "epoch": 0.7634503547179003, + "grad_norm": 1.0052322278304489, + "learning_rate": 4.80559353549167e-06, + "loss": 0.3925, + "step": 12537 + }, + { + "epoch": 0.7635112504947782, + "grad_norm": 0.9963160232730389, + "learning_rate": 4.805562683341415e-06, + "loss": 0.4469, + "step": 12538 + }, + { + "epoch": 0.763572146271656, + "grad_norm": 0.9904816539460317, + "learning_rate": 4.805531828842301e-06, + "loss": 0.3958, + "step": 12539 + }, + { + "epoch": 0.7636330420485339, + "grad_norm": 1.0170119073639117, + "learning_rate": 4.80550097199436e-06, + "loss": 0.3763, + "step": 12540 + }, + { + "epoch": 0.7636939378254118, + "grad_norm": 0.8967643962219198, + "learning_rate": 4.805470112797622e-06, + "loss": 0.4504, + "step": 12541 + }, + { + "epoch": 0.7637548336022897, + "grad_norm": 0.9173773950140155, + "learning_rate": 4.80543925125212e-06, + "loss": 0.3979, + "step": 12542 + }, + { + "epoch": 0.7638157293791675, + "grad_norm": 0.9589642718184688, + "learning_rate": 4.805408387357883e-06, + "loss": 0.4447, + "step": 12543 + }, + { + "epoch": 0.7638766251560454, + "grad_norm": 0.9487311800628426, + "learning_rate": 4.805377521114945e-06, + "loss": 0.4252, + "step": 12544 + }, + { + "epoch": 0.7639375209329233, + "grad_norm": 0.9520501607754458, + "learning_rate": 4.805346652523335e-06, + "loss": 0.4513, + "step": 12545 + }, + { + "epoch": 0.7639984167098012, + "grad_norm": 0.991016064875841, + "learning_rate": 4.805315781583086e-06, + "loss": 0.4209, + "step": 12546 + }, + { + "epoch": 0.764059312486679, + "grad_norm": 0.9225509377755874, + "learning_rate": 4.805284908294231e-06, + "loss": 0.4252, + "step": 12547 + }, + { + "epoch": 0.764120208263557, + "grad_norm": 0.9360118783032118, + "learning_rate": 4.8052540326567975e-06, + "loss": 0.4594, + "step": 12548 + }, + { + "epoch": 0.7641811040404348, + "grad_norm": 1.119036934264317, + "learning_rate": 4.8052231546708205e-06, + "loss": 0.4043, + "step": 12549 + }, + { + "epoch": 0.7642419998173127, + "grad_norm": 1.0765404178580096, + "learning_rate": 4.8051922743363296e-06, + "loss": 0.4201, + "step": 12550 + }, + { + "epoch": 0.7643028955941905, + "grad_norm": 1.0011952528362265, + "learning_rate": 4.805161391653357e-06, + "loss": 0.3594, + "step": 12551 + }, + { + "epoch": 0.7643637913710685, + "grad_norm": 0.973971146280345, + "learning_rate": 4.805130506621933e-06, + "loss": 0.4537, + "step": 12552 + }, + { + "epoch": 0.7644246871479463, + "grad_norm": 1.0980115433236601, + "learning_rate": 4.805099619242091e-06, + "loss": 0.3658, + "step": 12553 + }, + { + "epoch": 0.7644855829248242, + "grad_norm": 0.9859959607378946, + "learning_rate": 4.805068729513861e-06, + "loss": 0.4786, + "step": 12554 + }, + { + "epoch": 0.764546478701702, + "grad_norm": 0.9292547591034541, + "learning_rate": 4.8050378374372745e-06, + "loss": 0.4575, + "step": 12555 + }, + { + "epoch": 0.76460737447858, + "grad_norm": 0.9729623650638003, + "learning_rate": 4.805006943012364e-06, + "loss": 0.4333, + "step": 12556 + }, + { + "epoch": 0.7646682702554578, + "grad_norm": 1.036512286673859, + "learning_rate": 4.80497604623916e-06, + "loss": 0.4165, + "step": 12557 + }, + { + "epoch": 0.7647291660323356, + "grad_norm": 0.9639932347397182, + "learning_rate": 4.804945147117694e-06, + "loss": 0.3974, + "step": 12558 + }, + { + "epoch": 0.7647900618092135, + "grad_norm": 0.900703038471696, + "learning_rate": 4.804914245647999e-06, + "loss": 0.434, + "step": 12559 + }, + { + "epoch": 0.7648509575860915, + "grad_norm": 1.0387812565174752, + "learning_rate": 4.804883341830104e-06, + "loss": 0.4348, + "step": 12560 + }, + { + "epoch": 0.7649118533629693, + "grad_norm": 1.0613855394926914, + "learning_rate": 4.804852435664042e-06, + "loss": 0.426, + "step": 12561 + }, + { + "epoch": 0.7649727491398471, + "grad_norm": 0.9770436222498448, + "learning_rate": 4.804821527149845e-06, + "loss": 0.4817, + "step": 12562 + }, + { + "epoch": 0.765033644916725, + "grad_norm": 0.9991489272507644, + "learning_rate": 4.804790616287543e-06, + "loss": 0.4281, + "step": 12563 + }, + { + "epoch": 0.7650945406936029, + "grad_norm": 1.0477934186305602, + "learning_rate": 4.804759703077169e-06, + "loss": 0.4166, + "step": 12564 + }, + { + "epoch": 0.7651554364704808, + "grad_norm": 0.9417096555920945, + "learning_rate": 4.804728787518753e-06, + "loss": 0.4568, + "step": 12565 + }, + { + "epoch": 0.7652163322473586, + "grad_norm": 0.9834801999016792, + "learning_rate": 4.804697869612328e-06, + "loss": 0.4644, + "step": 12566 + }, + { + "epoch": 0.7652772280242365, + "grad_norm": 0.966542586137245, + "learning_rate": 4.804666949357924e-06, + "loss": 0.4217, + "step": 12567 + }, + { + "epoch": 0.7653381238011144, + "grad_norm": 1.0188582727751374, + "learning_rate": 4.8046360267555735e-06, + "loss": 0.454, + "step": 12568 + }, + { + "epoch": 0.7653990195779923, + "grad_norm": 1.0116469969366324, + "learning_rate": 4.804605101805308e-06, + "loss": 0.4229, + "step": 12569 + }, + { + "epoch": 0.7654599153548701, + "grad_norm": 0.9968985011077774, + "learning_rate": 4.804574174507159e-06, + "loss": 0.4275, + "step": 12570 + }, + { + "epoch": 0.765520811131748, + "grad_norm": 1.0559324499114444, + "learning_rate": 4.8045432448611564e-06, + "loss": 0.3978, + "step": 12571 + }, + { + "epoch": 0.7655817069086259, + "grad_norm": 1.077019692199011, + "learning_rate": 4.804512312867335e-06, + "loss": 0.4258, + "step": 12572 + }, + { + "epoch": 0.7656426026855038, + "grad_norm": 0.9334792755296023, + "learning_rate": 4.804481378525722e-06, + "loss": 0.4967, + "step": 12573 + }, + { + "epoch": 0.7657034984623816, + "grad_norm": 0.93832401522952, + "learning_rate": 4.804450441836352e-06, + "loss": 0.4894, + "step": 12574 + }, + { + "epoch": 0.7657643942392595, + "grad_norm": 0.9425209622240545, + "learning_rate": 4.804419502799257e-06, + "loss": 0.4488, + "step": 12575 + }, + { + "epoch": 0.7658252900161374, + "grad_norm": 1.0181288685766494, + "learning_rate": 4.804388561414467e-06, + "loss": 0.3639, + "step": 12576 + }, + { + "epoch": 0.7658861857930153, + "grad_norm": 1.0107928784211384, + "learning_rate": 4.804357617682013e-06, + "loss": 0.4181, + "step": 12577 + }, + { + "epoch": 0.7659470815698931, + "grad_norm": 0.98100241778763, + "learning_rate": 4.804326671601928e-06, + "loss": 0.4179, + "step": 12578 + }, + { + "epoch": 0.766007977346771, + "grad_norm": 0.9992966996296273, + "learning_rate": 4.804295723174243e-06, + "loss": 0.3528, + "step": 12579 + }, + { + "epoch": 0.7660688731236489, + "grad_norm": 0.9387834273803584, + "learning_rate": 4.8042647723989885e-06, + "loss": 0.4638, + "step": 12580 + }, + { + "epoch": 0.7661297689005268, + "grad_norm": 1.0069666554960681, + "learning_rate": 4.8042338192761984e-06, + "loss": 0.4018, + "step": 12581 + }, + { + "epoch": 0.7661906646774046, + "grad_norm": 1.0298628029007917, + "learning_rate": 4.8042028638059015e-06, + "loss": 0.4105, + "step": 12582 + }, + { + "epoch": 0.7662515604542824, + "grad_norm": 0.9182515426073626, + "learning_rate": 4.8041719059881306e-06, + "loss": 0.4609, + "step": 12583 + }, + { + "epoch": 0.7663124562311604, + "grad_norm": 1.0337453185773118, + "learning_rate": 4.804140945822918e-06, + "loss": 0.3764, + "step": 12584 + }, + { + "epoch": 0.7663733520080382, + "grad_norm": 0.9833040168447316, + "learning_rate": 4.804109983310294e-06, + "loss": 0.4274, + "step": 12585 + }, + { + "epoch": 0.7664342477849161, + "grad_norm": 1.0036000530600213, + "learning_rate": 4.804079018450291e-06, + "loss": 0.4565, + "step": 12586 + }, + { + "epoch": 0.7664951435617939, + "grad_norm": 1.035152107496607, + "learning_rate": 4.804048051242941e-06, + "loss": 0.3899, + "step": 12587 + }, + { + "epoch": 0.7665560393386719, + "grad_norm": 0.9184991395908584, + "learning_rate": 4.804017081688273e-06, + "loss": 0.4224, + "step": 12588 + }, + { + "epoch": 0.7666169351155497, + "grad_norm": 0.9878407489088152, + "learning_rate": 4.8039861097863214e-06, + "loss": 0.4836, + "step": 12589 + }, + { + "epoch": 0.7666778308924276, + "grad_norm": 1.1543786468625958, + "learning_rate": 4.803955135537118e-06, + "loss": 0.4345, + "step": 12590 + }, + { + "epoch": 0.7667387266693055, + "grad_norm": 0.910254365907776, + "learning_rate": 4.803924158940691e-06, + "loss": 0.4255, + "step": 12591 + }, + { + "epoch": 0.7667996224461834, + "grad_norm": 1.025952534686121, + "learning_rate": 4.803893179997074e-06, + "loss": 0.411, + "step": 12592 + }, + { + "epoch": 0.7668605182230612, + "grad_norm": 1.046218155683437, + "learning_rate": 4.803862198706299e-06, + "loss": 0.3866, + "step": 12593 + }, + { + "epoch": 0.7669214139999391, + "grad_norm": 1.0589780849596693, + "learning_rate": 4.803831215068397e-06, + "loss": 0.4382, + "step": 12594 + }, + { + "epoch": 0.766982309776817, + "grad_norm": 1.052955662194168, + "learning_rate": 4.803800229083399e-06, + "loss": 0.3845, + "step": 12595 + }, + { + "epoch": 0.7670432055536949, + "grad_norm": 1.0610082450067015, + "learning_rate": 4.8037692407513384e-06, + "loss": 0.3853, + "step": 12596 + }, + { + "epoch": 0.7671041013305727, + "grad_norm": 0.9412921201285303, + "learning_rate": 4.803738250072245e-06, + "loss": 0.5013, + "step": 12597 + }, + { + "epoch": 0.7671649971074506, + "grad_norm": 1.0371766943101792, + "learning_rate": 4.803707257046151e-06, + "loss": 0.3477, + "step": 12598 + }, + { + "epoch": 0.7672258928843285, + "grad_norm": 1.0130634304400947, + "learning_rate": 4.803676261673088e-06, + "loss": 0.4912, + "step": 12599 + }, + { + "epoch": 0.7672867886612064, + "grad_norm": 0.9466090054735065, + "learning_rate": 4.803645263953088e-06, + "loss": 0.4216, + "step": 12600 + }, + { + "epoch": 0.7673476844380842, + "grad_norm": 0.9304172426923855, + "learning_rate": 4.803614263886182e-06, + "loss": 0.4325, + "step": 12601 + }, + { + "epoch": 0.767408580214962, + "grad_norm": 0.975900885597734, + "learning_rate": 4.803583261472401e-06, + "loss": 0.4768, + "step": 12602 + }, + { + "epoch": 0.76746947599184, + "grad_norm": 0.9997808089803476, + "learning_rate": 4.8035522567117775e-06, + "loss": 0.3681, + "step": 12603 + }, + { + "epoch": 0.7675303717687179, + "grad_norm": 1.0314713804752604, + "learning_rate": 4.803521249604343e-06, + "loss": 0.3823, + "step": 12604 + }, + { + "epoch": 0.7675912675455957, + "grad_norm": 1.098004635210432, + "learning_rate": 4.803490240150129e-06, + "loss": 0.4015, + "step": 12605 + }, + { + "epoch": 0.7676521633224735, + "grad_norm": 0.9214800413270566, + "learning_rate": 4.803459228349166e-06, + "loss": 0.4467, + "step": 12606 + }, + { + "epoch": 0.7677130590993515, + "grad_norm": 0.8947648411725629, + "learning_rate": 4.8034282142014885e-06, + "loss": 0.4599, + "step": 12607 + }, + { + "epoch": 0.7677739548762293, + "grad_norm": 1.060833870135549, + "learning_rate": 4.803397197707125e-06, + "loss": 0.43, + "step": 12608 + }, + { + "epoch": 0.7678348506531072, + "grad_norm": 1.0756907205075208, + "learning_rate": 4.803366178866109e-06, + "loss": 0.3864, + "step": 12609 + }, + { + "epoch": 0.767895746429985, + "grad_norm": 0.9661705519587169, + "learning_rate": 4.803335157678471e-06, + "loss": 0.4203, + "step": 12610 + }, + { + "epoch": 0.767956642206863, + "grad_norm": 0.9974005892350608, + "learning_rate": 4.803304134144242e-06, + "loss": 0.4048, + "step": 12611 + }, + { + "epoch": 0.7680175379837408, + "grad_norm": 0.8969173336407608, + "learning_rate": 4.8032731082634566e-06, + "loss": 0.4476, + "step": 12612 + }, + { + "epoch": 0.7680784337606187, + "grad_norm": 1.037017947021021, + "learning_rate": 4.803242080036143e-06, + "loss": 0.4093, + "step": 12613 + }, + { + "epoch": 0.7681393295374965, + "grad_norm": 1.0041774972007484, + "learning_rate": 4.803211049462335e-06, + "loss": 0.4448, + "step": 12614 + }, + { + "epoch": 0.7682002253143745, + "grad_norm": 1.0246668337704186, + "learning_rate": 4.803180016542063e-06, + "loss": 0.3628, + "step": 12615 + }, + { + "epoch": 0.7682611210912523, + "grad_norm": 0.8933915937939234, + "learning_rate": 4.80314898127536e-06, + "loss": 0.4526, + "step": 12616 + }, + { + "epoch": 0.7683220168681302, + "grad_norm": 0.9701471238385304, + "learning_rate": 4.8031179436622555e-06, + "loss": 0.4196, + "step": 12617 + }, + { + "epoch": 0.768382912645008, + "grad_norm": 0.8955210731646142, + "learning_rate": 4.8030869037027835e-06, + "loss": 0.4664, + "step": 12618 + }, + { + "epoch": 0.768443808421886, + "grad_norm": 0.9445072536912256, + "learning_rate": 4.8030558613969735e-06, + "loss": 0.4246, + "step": 12619 + }, + { + "epoch": 0.7685047041987638, + "grad_norm": 0.9629234205940567, + "learning_rate": 4.8030248167448586e-06, + "loss": 0.4263, + "step": 12620 + }, + { + "epoch": 0.7685655999756417, + "grad_norm": 0.9589643081253837, + "learning_rate": 4.80299376974647e-06, + "loss": 0.4639, + "step": 12621 + }, + { + "epoch": 0.7686264957525195, + "grad_norm": 1.0361508637514558, + "learning_rate": 4.802962720401838e-06, + "loss": 0.4194, + "step": 12622 + }, + { + "epoch": 0.7686873915293975, + "grad_norm": 0.9804078118268289, + "learning_rate": 4.802931668710996e-06, + "loss": 0.4291, + "step": 12623 + }, + { + "epoch": 0.7687482873062753, + "grad_norm": 1.026629131849047, + "learning_rate": 4.802900614673977e-06, + "loss": 0.463, + "step": 12624 + }, + { + "epoch": 0.7688091830831532, + "grad_norm": 0.9019759596083552, + "learning_rate": 4.802869558290808e-06, + "loss": 0.4889, + "step": 12625 + }, + { + "epoch": 0.768870078860031, + "grad_norm": 1.1360917698850084, + "learning_rate": 4.802838499561525e-06, + "loss": 0.4012, + "step": 12626 + }, + { + "epoch": 0.768930974636909, + "grad_norm": 0.8969421086165633, + "learning_rate": 4.802807438486158e-06, + "loss": 0.4386, + "step": 12627 + }, + { + "epoch": 0.7689918704137868, + "grad_norm": 1.0022663244303727, + "learning_rate": 4.802776375064737e-06, + "loss": 0.4226, + "step": 12628 + }, + { + "epoch": 0.7690527661906646, + "grad_norm": 0.996263081805426, + "learning_rate": 4.802745309297297e-06, + "loss": 0.3901, + "step": 12629 + }, + { + "epoch": 0.7691136619675426, + "grad_norm": 1.0921359832234794, + "learning_rate": 4.802714241183868e-06, + "loss": 0.4024, + "step": 12630 + }, + { + "epoch": 0.7691745577444205, + "grad_norm": 0.9743170151972802, + "learning_rate": 4.802683170724481e-06, + "loss": 0.362, + "step": 12631 + }, + { + "epoch": 0.7692354535212983, + "grad_norm": 0.9736399285803987, + "learning_rate": 4.802652097919168e-06, + "loss": 0.4575, + "step": 12632 + }, + { + "epoch": 0.7692963492981761, + "grad_norm": 1.0270985921851563, + "learning_rate": 4.802621022767961e-06, + "loss": 0.4175, + "step": 12633 + }, + { + "epoch": 0.7693572450750541, + "grad_norm": 1.0390329738332285, + "learning_rate": 4.802589945270893e-06, + "loss": 0.3986, + "step": 12634 + }, + { + "epoch": 0.7694181408519319, + "grad_norm": 1.0259955597455737, + "learning_rate": 4.802558865427993e-06, + "loss": 0.441, + "step": 12635 + }, + { + "epoch": 0.7694790366288098, + "grad_norm": 1.017452192815951, + "learning_rate": 4.8025277832392934e-06, + "loss": 0.463, + "step": 12636 + }, + { + "epoch": 0.7695399324056876, + "grad_norm": 1.1142150153999297, + "learning_rate": 4.802496698704827e-06, + "loss": 0.4357, + "step": 12637 + }, + { + "epoch": 0.7696008281825656, + "grad_norm": 0.9723547272539911, + "learning_rate": 4.802465611824625e-06, + "loss": 0.4285, + "step": 12638 + }, + { + "epoch": 0.7696617239594434, + "grad_norm": 1.0064220973657143, + "learning_rate": 4.802434522598718e-06, + "loss": 0.4207, + "step": 12639 + }, + { + "epoch": 0.7697226197363213, + "grad_norm": 1.0559601496772824, + "learning_rate": 4.802403431027139e-06, + "loss": 0.4187, + "step": 12640 + }, + { + "epoch": 0.7697835155131991, + "grad_norm": 1.056265174561772, + "learning_rate": 4.802372337109921e-06, + "loss": 0.4149, + "step": 12641 + }, + { + "epoch": 0.7698444112900771, + "grad_norm": 1.005448972238836, + "learning_rate": 4.802341240847092e-06, + "loss": 0.423, + "step": 12642 + }, + { + "epoch": 0.7699053070669549, + "grad_norm": 0.9545275868470788, + "learning_rate": 4.802310142238686e-06, + "loss": 0.4451, + "step": 12643 + }, + { + "epoch": 0.7699662028438328, + "grad_norm": 1.0177299187401134, + "learning_rate": 4.802279041284735e-06, + "loss": 0.4121, + "step": 12644 + }, + { + "epoch": 0.7700270986207106, + "grad_norm": 0.903537137284966, + "learning_rate": 4.802247937985268e-06, + "loss": 0.4034, + "step": 12645 + }, + { + "epoch": 0.7700879943975886, + "grad_norm": 0.9411206459255883, + "learning_rate": 4.8022168323403205e-06, + "loss": 0.4311, + "step": 12646 + }, + { + "epoch": 0.7701488901744664, + "grad_norm": 0.9054842252882854, + "learning_rate": 4.802185724349922e-06, + "loss": 0.438, + "step": 12647 + }, + { + "epoch": 0.7702097859513443, + "grad_norm": 0.975703558573771, + "learning_rate": 4.802154614014104e-06, + "loss": 0.3952, + "step": 12648 + }, + { + "epoch": 0.7702706817282221, + "grad_norm": 1.05512648158568, + "learning_rate": 4.802123501332899e-06, + "loss": 0.4049, + "step": 12649 + }, + { + "epoch": 0.7703315775051001, + "grad_norm": 0.8906571133623628, + "learning_rate": 4.802092386306339e-06, + "loss": 0.4541, + "step": 12650 + }, + { + "epoch": 0.7703924732819779, + "grad_norm": 1.0496540033915807, + "learning_rate": 4.802061268934455e-06, + "loss": 0.4066, + "step": 12651 + }, + { + "epoch": 0.7704533690588558, + "grad_norm": 0.95360861992862, + "learning_rate": 4.802030149217278e-06, + "loss": 0.4539, + "step": 12652 + }, + { + "epoch": 0.7705142648357336, + "grad_norm": 0.8976303272242726, + "learning_rate": 4.801999027154841e-06, + "loss": 0.4865, + "step": 12653 + }, + { + "epoch": 0.7705751606126116, + "grad_norm": 1.0052899405641105, + "learning_rate": 4.801967902747175e-06, + "loss": 0.3697, + "step": 12654 + }, + { + "epoch": 0.7706360563894894, + "grad_norm": 1.0038626460032876, + "learning_rate": 4.801936775994313e-06, + "loss": 0.5101, + "step": 12655 + }, + { + "epoch": 0.7706969521663672, + "grad_norm": 0.9752146550479063, + "learning_rate": 4.8019056468962854e-06, + "loss": 0.3917, + "step": 12656 + }, + { + "epoch": 0.7707578479432451, + "grad_norm": 1.110168221858251, + "learning_rate": 4.801874515453123e-06, + "loss": 0.4003, + "step": 12657 + }, + { + "epoch": 0.770818743720123, + "grad_norm": 0.9286283944986045, + "learning_rate": 4.80184338166486e-06, + "loss": 0.409, + "step": 12658 + }, + { + "epoch": 0.7708796394970009, + "grad_norm": 0.9731121447558871, + "learning_rate": 4.8018122455315265e-06, + "loss": 0.4395, + "step": 12659 + }, + { + "epoch": 0.7709405352738787, + "grad_norm": 1.0613710170133643, + "learning_rate": 4.8017811070531535e-06, + "loss": 0.5259, + "step": 12660 + }, + { + "epoch": 0.7710014310507566, + "grad_norm": 1.043499640428673, + "learning_rate": 4.801749966229775e-06, + "loss": 0.386, + "step": 12661 + }, + { + "epoch": 0.7710623268276345, + "grad_norm": 0.9430514638525392, + "learning_rate": 4.801718823061421e-06, + "loss": 0.4623, + "step": 12662 + }, + { + "epoch": 0.7711232226045124, + "grad_norm": 1.0817024583647998, + "learning_rate": 4.8016876775481245e-06, + "loss": 0.4097, + "step": 12663 + }, + { + "epoch": 0.7711841183813902, + "grad_norm": 1.005005283282771, + "learning_rate": 4.801656529689915e-06, + "loss": 0.39, + "step": 12664 + }, + { + "epoch": 0.7712450141582681, + "grad_norm": 1.015952710843648, + "learning_rate": 4.801625379486827e-06, + "loss": 0.4579, + "step": 12665 + }, + { + "epoch": 0.771305909935146, + "grad_norm": 0.9901017912202364, + "learning_rate": 4.80159422693889e-06, + "loss": 0.3483, + "step": 12666 + }, + { + "epoch": 0.7713668057120239, + "grad_norm": 1.105338027884472, + "learning_rate": 4.801563072046137e-06, + "loss": 0.3739, + "step": 12667 + }, + { + "epoch": 0.7714277014889017, + "grad_norm": 0.9689809911952333, + "learning_rate": 4.801531914808599e-06, + "loss": 0.4203, + "step": 12668 + }, + { + "epoch": 0.7714885972657796, + "grad_norm": 0.9774860202606791, + "learning_rate": 4.801500755226309e-06, + "loss": 0.4158, + "step": 12669 + }, + { + "epoch": 0.7715494930426575, + "grad_norm": 1.0065519345066691, + "learning_rate": 4.801469593299297e-06, + "loss": 0.4246, + "step": 12670 + }, + { + "epoch": 0.7716103888195354, + "grad_norm": 0.9529282075578572, + "learning_rate": 4.801438429027596e-06, + "loss": 0.4995, + "step": 12671 + }, + { + "epoch": 0.7716712845964132, + "grad_norm": 1.1116592094723539, + "learning_rate": 4.801407262411238e-06, + "loss": 0.532, + "step": 12672 + }, + { + "epoch": 0.7717321803732912, + "grad_norm": 1.0950331943483549, + "learning_rate": 4.801376093450254e-06, + "loss": 0.4204, + "step": 12673 + }, + { + "epoch": 0.771793076150169, + "grad_norm": 1.0207110661403602, + "learning_rate": 4.801344922144675e-06, + "loss": 0.404, + "step": 12674 + }, + { + "epoch": 0.7718539719270469, + "grad_norm": 1.1244751222529765, + "learning_rate": 4.801313748494534e-06, + "loss": 0.391, + "step": 12675 + }, + { + "epoch": 0.7719148677039247, + "grad_norm": 0.9938689265908248, + "learning_rate": 4.801282572499862e-06, + "loss": 0.3982, + "step": 12676 + }, + { + "epoch": 0.7719757634808027, + "grad_norm": 1.00231172645757, + "learning_rate": 4.801251394160692e-06, + "loss": 0.4486, + "step": 12677 + }, + { + "epoch": 0.7720366592576805, + "grad_norm": 1.0431143498289785, + "learning_rate": 4.801220213477054e-06, + "loss": 0.4132, + "step": 12678 + }, + { + "epoch": 0.7720975550345583, + "grad_norm": 0.9561574458878275, + "learning_rate": 4.801189030448982e-06, + "loss": 0.3965, + "step": 12679 + }, + { + "epoch": 0.7721584508114362, + "grad_norm": 1.0399670414838171, + "learning_rate": 4.801157845076506e-06, + "loss": 0.4004, + "step": 12680 + }, + { + "epoch": 0.7722193465883141, + "grad_norm": 1.192979816766944, + "learning_rate": 4.801126657359658e-06, + "loss": 0.4072, + "step": 12681 + }, + { + "epoch": 0.772280242365192, + "grad_norm": 0.933655079013556, + "learning_rate": 4.801095467298469e-06, + "loss": 0.4494, + "step": 12682 + }, + { + "epoch": 0.7723411381420698, + "grad_norm": 1.0592140985468064, + "learning_rate": 4.801064274892973e-06, + "loss": 0.4053, + "step": 12683 + }, + { + "epoch": 0.7724020339189477, + "grad_norm": 0.9755088705756415, + "learning_rate": 4.8010330801432e-06, + "loss": 0.4832, + "step": 12684 + }, + { + "epoch": 0.7724629296958256, + "grad_norm": 0.946876663609507, + "learning_rate": 4.801001883049183e-06, + "loss": 0.4214, + "step": 12685 + }, + { + "epoch": 0.7725238254727035, + "grad_norm": 0.9514549711870877, + "learning_rate": 4.800970683610953e-06, + "loss": 0.4809, + "step": 12686 + }, + { + "epoch": 0.7725847212495813, + "grad_norm": 0.9568317290402534, + "learning_rate": 4.800939481828542e-06, + "loss": 0.3791, + "step": 12687 + }, + { + "epoch": 0.7726456170264592, + "grad_norm": 1.216802621569982, + "learning_rate": 4.8009082777019814e-06, + "loss": 0.4019, + "step": 12688 + }, + { + "epoch": 0.7727065128033371, + "grad_norm": 1.008081835083653, + "learning_rate": 4.800877071231302e-06, + "loss": 0.3771, + "step": 12689 + }, + { + "epoch": 0.772767408580215, + "grad_norm": 0.9856782933843785, + "learning_rate": 4.800845862416539e-06, + "loss": 0.3887, + "step": 12690 + }, + { + "epoch": 0.7728283043570928, + "grad_norm": 1.0609963123640778, + "learning_rate": 4.8008146512577205e-06, + "loss": 0.4351, + "step": 12691 + }, + { + "epoch": 0.7728892001339707, + "grad_norm": 1.0487416783100605, + "learning_rate": 4.8007834377548815e-06, + "loss": 0.4088, + "step": 12692 + }, + { + "epoch": 0.7729500959108486, + "grad_norm": 1.0607608785569909, + "learning_rate": 4.800752221908051e-06, + "loss": 0.4483, + "step": 12693 + }, + { + "epoch": 0.7730109916877265, + "grad_norm": 1.0600376715475583, + "learning_rate": 4.800721003717261e-06, + "loss": 0.3566, + "step": 12694 + }, + { + "epoch": 0.7730718874646043, + "grad_norm": 1.047764020265285, + "learning_rate": 4.800689783182546e-06, + "loss": 0.3733, + "step": 12695 + }, + { + "epoch": 0.7731327832414822, + "grad_norm": 0.9782622753430016, + "learning_rate": 4.800658560303936e-06, + "loss": 0.4028, + "step": 12696 + }, + { + "epoch": 0.7731936790183601, + "grad_norm": 0.9348671416319975, + "learning_rate": 4.8006273350814625e-06, + "loss": 0.4703, + "step": 12697 + }, + { + "epoch": 0.773254574795238, + "grad_norm": 1.0043435212727452, + "learning_rate": 4.800596107515158e-06, + "loss": 0.4538, + "step": 12698 + }, + { + "epoch": 0.7733154705721158, + "grad_norm": 1.0599066982513412, + "learning_rate": 4.800564877605053e-06, + "loss": 0.39, + "step": 12699 + }, + { + "epoch": 0.7733763663489936, + "grad_norm": 1.001094131208797, + "learning_rate": 4.800533645351181e-06, + "loss": 0.4132, + "step": 12700 + }, + { + "epoch": 0.7734372621258716, + "grad_norm": 0.9742651528032131, + "learning_rate": 4.800502410753573e-06, + "loss": 0.5091, + "step": 12701 + }, + { + "epoch": 0.7734981579027495, + "grad_norm": 0.9864469126868797, + "learning_rate": 4.800471173812261e-06, + "loss": 0.4042, + "step": 12702 + }, + { + "epoch": 0.7735590536796273, + "grad_norm": 1.0692106250574938, + "learning_rate": 4.800439934527276e-06, + "loss": 0.3876, + "step": 12703 + }, + { + "epoch": 0.7736199494565051, + "grad_norm": 0.9158790208301233, + "learning_rate": 4.800408692898652e-06, + "loss": 0.3992, + "step": 12704 + }, + { + "epoch": 0.7736808452333831, + "grad_norm": 0.9200341453128864, + "learning_rate": 4.80037744892642e-06, + "loss": 0.4358, + "step": 12705 + }, + { + "epoch": 0.7737417410102609, + "grad_norm": 0.9953126657645472, + "learning_rate": 4.800346202610609e-06, + "loss": 0.3529, + "step": 12706 + }, + { + "epoch": 0.7738026367871388, + "grad_norm": 0.9663602090422002, + "learning_rate": 4.800314953951255e-06, + "loss": 0.4611, + "step": 12707 + }, + { + "epoch": 0.7738635325640166, + "grad_norm": 1.0769749914849045, + "learning_rate": 4.800283702948387e-06, + "loss": 0.4783, + "step": 12708 + }, + { + "epoch": 0.7739244283408946, + "grad_norm": 0.9912723938213266, + "learning_rate": 4.800252449602038e-06, + "loss": 0.4319, + "step": 12709 + }, + { + "epoch": 0.7739853241177724, + "grad_norm": 1.0169686627501315, + "learning_rate": 4.80022119391224e-06, + "loss": 0.4139, + "step": 12710 + }, + { + "epoch": 0.7740462198946503, + "grad_norm": 0.9500381119941365, + "learning_rate": 4.800189935879024e-06, + "loss": 0.4825, + "step": 12711 + }, + { + "epoch": 0.7741071156715282, + "grad_norm": 1.0339343804387326, + "learning_rate": 4.800158675502423e-06, + "loss": 0.4295, + "step": 12712 + }, + { + "epoch": 0.7741680114484061, + "grad_norm": 1.0588777059040317, + "learning_rate": 4.800127412782467e-06, + "loss": 0.3571, + "step": 12713 + }, + { + "epoch": 0.7742289072252839, + "grad_norm": 0.9710747901885496, + "learning_rate": 4.80009614771919e-06, + "loss": 0.3901, + "step": 12714 + }, + { + "epoch": 0.7742898030021618, + "grad_norm": 1.0579805636422748, + "learning_rate": 4.800064880312623e-06, + "loss": 0.4125, + "step": 12715 + }, + { + "epoch": 0.7743506987790397, + "grad_norm": 1.0198325939051576, + "learning_rate": 4.800033610562797e-06, + "loss": 0.4346, + "step": 12716 + }, + { + "epoch": 0.7744115945559176, + "grad_norm": 0.9698731924935259, + "learning_rate": 4.8000023384697446e-06, + "loss": 0.4015, + "step": 12717 + }, + { + "epoch": 0.7744724903327954, + "grad_norm": 0.8714762472100042, + "learning_rate": 4.799971064033498e-06, + "loss": 0.4531, + "step": 12718 + }, + { + "epoch": 0.7745333861096733, + "grad_norm": 0.9600109631834646, + "learning_rate": 4.799939787254089e-06, + "loss": 0.4546, + "step": 12719 + }, + { + "epoch": 0.7745942818865512, + "grad_norm": 1.0339501300424396, + "learning_rate": 4.799908508131548e-06, + "loss": 0.4167, + "step": 12720 + }, + { + "epoch": 0.7746551776634291, + "grad_norm": 1.0050795542550588, + "learning_rate": 4.799877226665909e-06, + "loss": 0.3932, + "step": 12721 + }, + { + "epoch": 0.7747160734403069, + "grad_norm": 0.968929434055138, + "learning_rate": 4.7998459428572035e-06, + "loss": 0.4265, + "step": 12722 + }, + { + "epoch": 0.7747769692171848, + "grad_norm": 0.9588346370168667, + "learning_rate": 4.7998146567054615e-06, + "loss": 0.4765, + "step": 12723 + }, + { + "epoch": 0.7748378649940627, + "grad_norm": 1.0824923279680896, + "learning_rate": 4.799783368210716e-06, + "loss": 0.3861, + "step": 12724 + }, + { + "epoch": 0.7748987607709406, + "grad_norm": 1.0957824857552747, + "learning_rate": 4.799752077373e-06, + "loss": 0.4418, + "step": 12725 + }, + { + "epoch": 0.7749596565478184, + "grad_norm": 0.9335638627284314, + "learning_rate": 4.799720784192343e-06, + "loss": 0.3947, + "step": 12726 + }, + { + "epoch": 0.7750205523246962, + "grad_norm": 0.9991282214427661, + "learning_rate": 4.79968948866878e-06, + "loss": 0.4247, + "step": 12727 + }, + { + "epoch": 0.7750814481015742, + "grad_norm": 0.9249605507765726, + "learning_rate": 4.799658190802341e-06, + "loss": 0.4496, + "step": 12728 + }, + { + "epoch": 0.775142343878452, + "grad_norm": 0.9638104167054681, + "learning_rate": 4.799626890593057e-06, + "loss": 0.4691, + "step": 12729 + }, + { + "epoch": 0.7752032396553299, + "grad_norm": 0.9928037851739773, + "learning_rate": 4.799595588040962e-06, + "loss": 0.4166, + "step": 12730 + }, + { + "epoch": 0.7752641354322077, + "grad_norm": 0.9966345844210762, + "learning_rate": 4.799564283146085e-06, + "loss": 0.362, + "step": 12731 + }, + { + "epoch": 0.7753250312090857, + "grad_norm": 0.9648176475153312, + "learning_rate": 4.799532975908462e-06, + "loss": 0.4167, + "step": 12732 + }, + { + "epoch": 0.7753859269859635, + "grad_norm": 0.9317295503574077, + "learning_rate": 4.799501666328121e-06, + "loss": 0.4766, + "step": 12733 + }, + { + "epoch": 0.7754468227628414, + "grad_norm": 0.9526612907688714, + "learning_rate": 4.799470354405096e-06, + "loss": 0.417, + "step": 12734 + }, + { + "epoch": 0.7755077185397192, + "grad_norm": 0.9886158399801629, + "learning_rate": 4.7994390401394186e-06, + "loss": 0.4666, + "step": 12735 + }, + { + "epoch": 0.7755686143165972, + "grad_norm": 1.059541629244231, + "learning_rate": 4.7994077235311205e-06, + "loss": 0.4404, + "step": 12736 + }, + { + "epoch": 0.775629510093475, + "grad_norm": 0.9461848031847704, + "learning_rate": 4.799376404580234e-06, + "loss": 0.45, + "step": 12737 + }, + { + "epoch": 0.7756904058703529, + "grad_norm": 1.0903868988863243, + "learning_rate": 4.799345083286789e-06, + "loss": 0.3854, + "step": 12738 + }, + { + "epoch": 0.7757513016472307, + "grad_norm": 0.9382180938468018, + "learning_rate": 4.799313759650821e-06, + "loss": 0.3938, + "step": 12739 + }, + { + "epoch": 0.7758121974241087, + "grad_norm": 0.9597552904768974, + "learning_rate": 4.799282433672359e-06, + "loss": 0.5245, + "step": 12740 + }, + { + "epoch": 0.7758730932009865, + "grad_norm": 0.9431983513094354, + "learning_rate": 4.799251105351436e-06, + "loss": 0.483, + "step": 12741 + }, + { + "epoch": 0.7759339889778644, + "grad_norm": 0.9754442734252652, + "learning_rate": 4.7992197746880835e-06, + "loss": 0.3688, + "step": 12742 + }, + { + "epoch": 0.7759948847547422, + "grad_norm": 0.9662570475037163, + "learning_rate": 4.799188441682335e-06, + "loss": 0.5038, + "step": 12743 + }, + { + "epoch": 0.7760557805316202, + "grad_norm": 1.028960521601434, + "learning_rate": 4.799157106334219e-06, + "loss": 0.4061, + "step": 12744 + }, + { + "epoch": 0.776116676308498, + "grad_norm": 0.969280692339814, + "learning_rate": 4.799125768643771e-06, + "loss": 0.4058, + "step": 12745 + }, + { + "epoch": 0.7761775720853759, + "grad_norm": 1.0446533473919941, + "learning_rate": 4.799094428611021e-06, + "loss": 0.409, + "step": 12746 + }, + { + "epoch": 0.7762384678622537, + "grad_norm": 1.0211637093271846, + "learning_rate": 4.799063086236001e-06, + "loss": 0.4727, + "step": 12747 + }, + { + "epoch": 0.7762993636391317, + "grad_norm": 0.9794278315519896, + "learning_rate": 4.799031741518744e-06, + "loss": 0.4002, + "step": 12748 + }, + { + "epoch": 0.7763602594160095, + "grad_norm": 0.9345440031732596, + "learning_rate": 4.799000394459281e-06, + "loss": 0.4123, + "step": 12749 + }, + { + "epoch": 0.7764211551928873, + "grad_norm": 1.0647051361435813, + "learning_rate": 4.798969045057644e-06, + "loss": 0.4058, + "step": 12750 + }, + { + "epoch": 0.7764820509697652, + "grad_norm": 0.9612875836639482, + "learning_rate": 4.798937693313866e-06, + "loss": 0.4261, + "step": 12751 + }, + { + "epoch": 0.7765429467466431, + "grad_norm": 0.9994985354016155, + "learning_rate": 4.798906339227977e-06, + "loss": 0.465, + "step": 12752 + }, + { + "epoch": 0.776603842523521, + "grad_norm": 0.9630755923777948, + "learning_rate": 4.79887498280001e-06, + "loss": 0.4289, + "step": 12753 + }, + { + "epoch": 0.7766647383003988, + "grad_norm": 0.9874275738090238, + "learning_rate": 4.798843624029998e-06, + "loss": 0.3554, + "step": 12754 + }, + { + "epoch": 0.7767256340772768, + "grad_norm": 0.9479337519353525, + "learning_rate": 4.7988122629179714e-06, + "loss": 0.4333, + "step": 12755 + }, + { + "epoch": 0.7767865298541546, + "grad_norm": 1.0036983352690803, + "learning_rate": 4.798780899463963e-06, + "loss": 0.4322, + "step": 12756 + }, + { + "epoch": 0.7768474256310325, + "grad_norm": 1.085301289730312, + "learning_rate": 4.7987495336680035e-06, + "loss": 0.4158, + "step": 12757 + }, + { + "epoch": 0.7769083214079103, + "grad_norm": 0.8609500249366987, + "learning_rate": 4.798718165530127e-06, + "loss": 0.4332, + "step": 12758 + }, + { + "epoch": 0.7769692171847883, + "grad_norm": 1.0794009857723101, + "learning_rate": 4.798686795050363e-06, + "loss": 0.4303, + "step": 12759 + }, + { + "epoch": 0.7770301129616661, + "grad_norm": 1.0956604547440882, + "learning_rate": 4.798655422228745e-06, + "loss": 0.4489, + "step": 12760 + }, + { + "epoch": 0.777091008738544, + "grad_norm": 1.0373063763376975, + "learning_rate": 4.798624047065305e-06, + "loss": 0.427, + "step": 12761 + }, + { + "epoch": 0.7771519045154218, + "grad_norm": 1.1664659118185998, + "learning_rate": 4.798592669560075e-06, + "loss": 0.4064, + "step": 12762 + }, + { + "epoch": 0.7772128002922998, + "grad_norm": 1.0083009697414163, + "learning_rate": 4.7985612897130855e-06, + "loss": 0.4524, + "step": 12763 + }, + { + "epoch": 0.7772736960691776, + "grad_norm": 1.048681669670957, + "learning_rate": 4.79852990752437e-06, + "loss": 0.4156, + "step": 12764 + }, + { + "epoch": 0.7773345918460555, + "grad_norm": 1.1683726738865992, + "learning_rate": 4.79849852299396e-06, + "loss": 0.3252, + "step": 12765 + }, + { + "epoch": 0.7773954876229333, + "grad_norm": 0.9847686537520561, + "learning_rate": 4.798467136121888e-06, + "loss": 0.4469, + "step": 12766 + }, + { + "epoch": 0.7774563833998113, + "grad_norm": 0.9852109065025174, + "learning_rate": 4.798435746908185e-06, + "loss": 0.4337, + "step": 12767 + }, + { + "epoch": 0.7775172791766891, + "grad_norm": 0.9821287078627041, + "learning_rate": 4.7984043553528836e-06, + "loss": 0.4073, + "step": 12768 + }, + { + "epoch": 0.777578174953567, + "grad_norm": 0.9930289502484219, + "learning_rate": 4.798372961456016e-06, + "loss": 0.3928, + "step": 12769 + }, + { + "epoch": 0.7776390707304448, + "grad_norm": 1.0186779929275227, + "learning_rate": 4.798341565217612e-06, + "loss": 0.4145, + "step": 12770 + }, + { + "epoch": 0.7776999665073228, + "grad_norm": 1.0198155463010654, + "learning_rate": 4.7983101666377075e-06, + "loss": 0.3857, + "step": 12771 + }, + { + "epoch": 0.7777608622842006, + "grad_norm": 0.9593915319522637, + "learning_rate": 4.798278765716332e-06, + "loss": 0.5208, + "step": 12772 + }, + { + "epoch": 0.7778217580610784, + "grad_norm": 0.9935869486571158, + "learning_rate": 4.798247362453517e-06, + "loss": 0.4415, + "step": 12773 + }, + { + "epoch": 0.7778826538379563, + "grad_norm": 1.0490683500660074, + "learning_rate": 4.798215956849296e-06, + "loss": 0.3758, + "step": 12774 + }, + { + "epoch": 0.7779435496148343, + "grad_norm": 1.0765214423945684, + "learning_rate": 4.798184548903701e-06, + "loss": 0.4063, + "step": 12775 + }, + { + "epoch": 0.7780044453917121, + "grad_norm": 0.9264864798523126, + "learning_rate": 4.798153138616762e-06, + "loss": 0.4458, + "step": 12776 + }, + { + "epoch": 0.7780653411685899, + "grad_norm": 0.9720729685871387, + "learning_rate": 4.798121725988513e-06, + "loss": 0.4837, + "step": 12777 + }, + { + "epoch": 0.7781262369454678, + "grad_norm": 0.9973684504844111, + "learning_rate": 4.7980903110189845e-06, + "loss": 0.455, + "step": 12778 + }, + { + "epoch": 0.7781871327223457, + "grad_norm": 1.055255083788754, + "learning_rate": 4.798058893708211e-06, + "loss": 0.4337, + "step": 12779 + }, + { + "epoch": 0.7782480284992236, + "grad_norm": 0.907918163311152, + "learning_rate": 4.798027474056222e-06, + "loss": 0.445, + "step": 12780 + }, + { + "epoch": 0.7783089242761014, + "grad_norm": 1.0520497499460064, + "learning_rate": 4.797996052063051e-06, + "loss": 0.4021, + "step": 12781 + }, + { + "epoch": 0.7783698200529793, + "grad_norm": 1.1245632149016211, + "learning_rate": 4.797964627728728e-06, + "loss": 0.363, + "step": 12782 + }, + { + "epoch": 0.7784307158298572, + "grad_norm": 0.9872457851482981, + "learning_rate": 4.797933201053288e-06, + "loss": 0.3846, + "step": 12783 + }, + { + "epoch": 0.7784916116067351, + "grad_norm": 0.9993506050641364, + "learning_rate": 4.797901772036761e-06, + "loss": 0.3542, + "step": 12784 + }, + { + "epoch": 0.7785525073836129, + "grad_norm": 1.008716569144231, + "learning_rate": 4.797870340679178e-06, + "loss": 0.422, + "step": 12785 + }, + { + "epoch": 0.7786134031604908, + "grad_norm": 0.9973762083169658, + "learning_rate": 4.797838906980574e-06, + "loss": 0.3828, + "step": 12786 + }, + { + "epoch": 0.7786742989373687, + "grad_norm": 0.9033273009290488, + "learning_rate": 4.7978074709409785e-06, + "loss": 0.5164, + "step": 12787 + }, + { + "epoch": 0.7787351947142466, + "grad_norm": 0.9896868013077841, + "learning_rate": 4.797776032560425e-06, + "loss": 0.4402, + "step": 12788 + }, + { + "epoch": 0.7787960904911244, + "grad_norm": 0.9584125410905074, + "learning_rate": 4.797744591838946e-06, + "loss": 0.4579, + "step": 12789 + }, + { + "epoch": 0.7788569862680023, + "grad_norm": 1.063242733427789, + "learning_rate": 4.797713148776571e-06, + "loss": 0.3891, + "step": 12790 + }, + { + "epoch": 0.7789178820448802, + "grad_norm": 1.0070165022964943, + "learning_rate": 4.797681703373335e-06, + "loss": 0.4084, + "step": 12791 + }, + { + "epoch": 0.7789787778217581, + "grad_norm": 0.9041982865318591, + "learning_rate": 4.797650255629268e-06, + "loss": 0.4127, + "step": 12792 + }, + { + "epoch": 0.7790396735986359, + "grad_norm": 0.9858132195813079, + "learning_rate": 4.7976188055444024e-06, + "loss": 0.4364, + "step": 12793 + }, + { + "epoch": 0.7791005693755139, + "grad_norm": 1.019744812167505, + "learning_rate": 4.797587353118771e-06, + "loss": 0.3967, + "step": 12794 + }, + { + "epoch": 0.7791614651523917, + "grad_norm": 0.973166362329539, + "learning_rate": 4.797555898352405e-06, + "loss": 0.3787, + "step": 12795 + }, + { + "epoch": 0.7792223609292696, + "grad_norm": 1.072161459172859, + "learning_rate": 4.7975244412453374e-06, + "loss": 0.4229, + "step": 12796 + }, + { + "epoch": 0.7792832567061474, + "grad_norm": 1.0416809406414087, + "learning_rate": 4.797492981797599e-06, + "loss": 0.3626, + "step": 12797 + }, + { + "epoch": 0.7793441524830254, + "grad_norm": 1.0881862698079736, + "learning_rate": 4.797461520009224e-06, + "loss": 0.4397, + "step": 12798 + }, + { + "epoch": 0.7794050482599032, + "grad_norm": 1.01074004655159, + "learning_rate": 4.797430055880241e-06, + "loss": 0.38, + "step": 12799 + }, + { + "epoch": 0.779465944036781, + "grad_norm": 1.0631276364154467, + "learning_rate": 4.797398589410685e-06, + "loss": 0.4473, + "step": 12800 + }, + { + "epoch": 0.7795268398136589, + "grad_norm": 1.0222430483251312, + "learning_rate": 4.797367120600586e-06, + "loss": 0.3778, + "step": 12801 + }, + { + "epoch": 0.7795877355905368, + "grad_norm": 1.0424995974577056, + "learning_rate": 4.797335649449979e-06, + "loss": 0.4025, + "step": 12802 + }, + { + "epoch": 0.7796486313674147, + "grad_norm": 1.1028181899910887, + "learning_rate": 4.797304175958893e-06, + "loss": 0.4502, + "step": 12803 + }, + { + "epoch": 0.7797095271442925, + "grad_norm": 0.9713276274351405, + "learning_rate": 4.797272700127361e-06, + "loss": 0.4239, + "step": 12804 + }, + { + "epoch": 0.7797704229211704, + "grad_norm": 0.9555573712698266, + "learning_rate": 4.797241221955417e-06, + "loss": 0.4437, + "step": 12805 + }, + { + "epoch": 0.7798313186980483, + "grad_norm": 0.980005136418736, + "learning_rate": 4.79720974144309e-06, + "loss": 0.4002, + "step": 12806 + }, + { + "epoch": 0.7798922144749262, + "grad_norm": 0.9995773089239456, + "learning_rate": 4.797178258590413e-06, + "loss": 0.3364, + "step": 12807 + }, + { + "epoch": 0.779953110251804, + "grad_norm": 1.028087874416001, + "learning_rate": 4.79714677339742e-06, + "loss": 0.3762, + "step": 12808 + }, + { + "epoch": 0.7800140060286819, + "grad_norm": 0.9496771339857385, + "learning_rate": 4.79711528586414e-06, + "loss": 0.3668, + "step": 12809 + }, + { + "epoch": 0.7800749018055598, + "grad_norm": 1.0122754656369684, + "learning_rate": 4.797083795990608e-06, + "loss": 0.446, + "step": 12810 + }, + { + "epoch": 0.7801357975824377, + "grad_norm": 1.0433416267624525, + "learning_rate": 4.797052303776854e-06, + "loss": 0.3535, + "step": 12811 + }, + { + "epoch": 0.7801966933593155, + "grad_norm": 1.0012122440517095, + "learning_rate": 4.797020809222912e-06, + "loss": 0.403, + "step": 12812 + }, + { + "epoch": 0.7802575891361934, + "grad_norm": 1.0635328392632353, + "learning_rate": 4.796989312328812e-06, + "loss": 0.4044, + "step": 12813 + }, + { + "epoch": 0.7803184849130713, + "grad_norm": 0.9726084434261154, + "learning_rate": 4.7969578130945875e-06, + "loss": 0.4389, + "step": 12814 + }, + { + "epoch": 0.7803793806899492, + "grad_norm": 1.0214884972892766, + "learning_rate": 4.796926311520269e-06, + "loss": 0.4118, + "step": 12815 + }, + { + "epoch": 0.780440276466827, + "grad_norm": 1.0684364585779667, + "learning_rate": 4.79689480760589e-06, + "loss": 0.4472, + "step": 12816 + }, + { + "epoch": 0.7805011722437049, + "grad_norm": 1.1455545517963175, + "learning_rate": 4.796863301351484e-06, + "loss": 0.3635, + "step": 12817 + }, + { + "epoch": 0.7805620680205828, + "grad_norm": 1.0460279133999044, + "learning_rate": 4.79683179275708e-06, + "loss": 0.4347, + "step": 12818 + }, + { + "epoch": 0.7806229637974607, + "grad_norm": 1.0505046188143252, + "learning_rate": 4.796800281822712e-06, + "loss": 0.4367, + "step": 12819 + }, + { + "epoch": 0.7806838595743385, + "grad_norm": 0.9879575268733163, + "learning_rate": 4.796768768548412e-06, + "loss": 0.3751, + "step": 12820 + }, + { + "epoch": 0.7807447553512163, + "grad_norm": 1.0306770607768605, + "learning_rate": 4.7967372529342115e-06, + "loss": 0.3553, + "step": 12821 + }, + { + "epoch": 0.7808056511280943, + "grad_norm": 1.0758189151865516, + "learning_rate": 4.7967057349801425e-06, + "loss": 0.4956, + "step": 12822 + }, + { + "epoch": 0.7808665469049721, + "grad_norm": 0.9210023477772008, + "learning_rate": 4.796674214686237e-06, + "loss": 0.436, + "step": 12823 + }, + { + "epoch": 0.78092744268185, + "grad_norm": 0.9074874410601753, + "learning_rate": 4.796642692052528e-06, + "loss": 0.4196, + "step": 12824 + }, + { + "epoch": 0.7809883384587278, + "grad_norm": 0.9832570039909039, + "learning_rate": 4.796611167079048e-06, + "loss": 0.4052, + "step": 12825 + }, + { + "epoch": 0.7810492342356058, + "grad_norm": 1.084886114066157, + "learning_rate": 4.796579639765827e-06, + "loss": 0.3692, + "step": 12826 + }, + { + "epoch": 0.7811101300124836, + "grad_norm": 1.0361687526743337, + "learning_rate": 4.796548110112899e-06, + "loss": 0.3949, + "step": 12827 + }, + { + "epoch": 0.7811710257893615, + "grad_norm": 1.0876396279445812, + "learning_rate": 4.796516578120296e-06, + "loss": 0.4219, + "step": 12828 + }, + { + "epoch": 0.7812319215662393, + "grad_norm": 0.9157589240359834, + "learning_rate": 4.796485043788049e-06, + "loss": 0.4603, + "step": 12829 + }, + { + "epoch": 0.7812928173431173, + "grad_norm": 1.0318689401764523, + "learning_rate": 4.7964535071161915e-06, + "loss": 0.3985, + "step": 12830 + }, + { + "epoch": 0.7813537131199951, + "grad_norm": 0.9908554252661861, + "learning_rate": 4.796421968104754e-06, + "loss": 0.3971, + "step": 12831 + }, + { + "epoch": 0.781414608896873, + "grad_norm": 0.9628971134864286, + "learning_rate": 4.79639042675377e-06, + "loss": 0.4147, + "step": 12832 + }, + { + "epoch": 0.7814755046737508, + "grad_norm": 1.0005666140897411, + "learning_rate": 4.79635888306327e-06, + "loss": 0.4557, + "step": 12833 + }, + { + "epoch": 0.7815364004506288, + "grad_norm": 1.086180236485511, + "learning_rate": 4.796327337033289e-06, + "loss": 0.3957, + "step": 12834 + }, + { + "epoch": 0.7815972962275066, + "grad_norm": 1.048861265631583, + "learning_rate": 4.796295788663857e-06, + "loss": 0.4929, + "step": 12835 + }, + { + "epoch": 0.7816581920043845, + "grad_norm": 0.9776006533247312, + "learning_rate": 4.796264237955006e-06, + "loss": 0.4405, + "step": 12836 + }, + { + "epoch": 0.7817190877812624, + "grad_norm": 0.945325966107918, + "learning_rate": 4.796232684906769e-06, + "loss": 0.3904, + "step": 12837 + }, + { + "epoch": 0.7817799835581403, + "grad_norm": 1.0102361178619939, + "learning_rate": 4.796201129519178e-06, + "loss": 0.3396, + "step": 12838 + }, + { + "epoch": 0.7818408793350181, + "grad_norm": 0.959833760323073, + "learning_rate": 4.796169571792265e-06, + "loss": 0.4062, + "step": 12839 + }, + { + "epoch": 0.781901775111896, + "grad_norm": 0.9075921805983578, + "learning_rate": 4.796138011726063e-06, + "loss": 0.4916, + "step": 12840 + }, + { + "epoch": 0.7819626708887739, + "grad_norm": 0.9737043527605134, + "learning_rate": 4.7961064493206025e-06, + "loss": 0.4078, + "step": 12841 + }, + { + "epoch": 0.7820235666656518, + "grad_norm": 1.0653239689361933, + "learning_rate": 4.796074884575917e-06, + "loss": 0.3966, + "step": 12842 + }, + { + "epoch": 0.7820844624425296, + "grad_norm": 0.9524832561256481, + "learning_rate": 4.796043317492037e-06, + "loss": 0.4088, + "step": 12843 + }, + { + "epoch": 0.7821453582194074, + "grad_norm": 0.9328848706877827, + "learning_rate": 4.796011748068997e-06, + "loss": 0.4515, + "step": 12844 + }, + { + "epoch": 0.7822062539962854, + "grad_norm": 1.0768418748519641, + "learning_rate": 4.795980176306827e-06, + "loss": 0.398, + "step": 12845 + }, + { + "epoch": 0.7822671497731633, + "grad_norm": 0.982921226063071, + "learning_rate": 4.7959486022055605e-06, + "loss": 0.4462, + "step": 12846 + }, + { + "epoch": 0.7823280455500411, + "grad_norm": 1.0103401753330628, + "learning_rate": 4.79591702576523e-06, + "loss": 0.4392, + "step": 12847 + }, + { + "epoch": 0.7823889413269189, + "grad_norm": 0.9644529320466012, + "learning_rate": 4.795885446985866e-06, + "loss": 0.4359, + "step": 12848 + }, + { + "epoch": 0.7824498371037969, + "grad_norm": 1.1109401577095772, + "learning_rate": 4.795853865867502e-06, + "loss": 0.3904, + "step": 12849 + }, + { + "epoch": 0.7825107328806747, + "grad_norm": 1.082984609692948, + "learning_rate": 4.79582228241017e-06, + "loss": 0.4141, + "step": 12850 + }, + { + "epoch": 0.7825716286575526, + "grad_norm": 1.0192818259210161, + "learning_rate": 4.7957906966139015e-06, + "loss": 0.4208, + "step": 12851 + }, + { + "epoch": 0.7826325244344304, + "grad_norm": 0.978162708517692, + "learning_rate": 4.79575910847873e-06, + "loss": 0.4211, + "step": 12852 + }, + { + "epoch": 0.7826934202113084, + "grad_norm": 0.9541693251481568, + "learning_rate": 4.7957275180046855e-06, + "loss": 0.4129, + "step": 12853 + }, + { + "epoch": 0.7827543159881862, + "grad_norm": 0.9349369764600014, + "learning_rate": 4.795695925191803e-06, + "loss": 0.4449, + "step": 12854 + }, + { + "epoch": 0.7828152117650641, + "grad_norm": 0.9571242131418672, + "learning_rate": 4.795664330040113e-06, + "loss": 0.4449, + "step": 12855 + }, + { + "epoch": 0.7828761075419419, + "grad_norm": 0.9358842783420489, + "learning_rate": 4.7956327325496465e-06, + "loss": 0.4203, + "step": 12856 + }, + { + "epoch": 0.7829370033188199, + "grad_norm": 0.980900931964412, + "learning_rate": 4.7956011327204385e-06, + "loss": 0.375, + "step": 12857 + }, + { + "epoch": 0.7829978990956977, + "grad_norm": 0.8810238882887181, + "learning_rate": 4.795569530552519e-06, + "loss": 0.4706, + "step": 12858 + }, + { + "epoch": 0.7830587948725756, + "grad_norm": 1.023147424325759, + "learning_rate": 4.795537926045922e-06, + "loss": 0.3743, + "step": 12859 + }, + { + "epoch": 0.7831196906494534, + "grad_norm": 0.960254045000249, + "learning_rate": 4.795506319200678e-06, + "loss": 0.4149, + "step": 12860 + }, + { + "epoch": 0.7831805864263314, + "grad_norm": 0.8908554427662069, + "learning_rate": 4.7954747100168196e-06, + "loss": 0.4121, + "step": 12861 + }, + { + "epoch": 0.7832414822032092, + "grad_norm": 1.0403735590071252, + "learning_rate": 4.79544309849438e-06, + "loss": 0.4083, + "step": 12862 + }, + { + "epoch": 0.7833023779800871, + "grad_norm": 0.9682667589307011, + "learning_rate": 4.79541148463339e-06, + "loss": 0.4196, + "step": 12863 + }, + { + "epoch": 0.7833632737569649, + "grad_norm": 1.0762041864462106, + "learning_rate": 4.795379868433883e-06, + "loss": 0.4922, + "step": 12864 + }, + { + "epoch": 0.7834241695338429, + "grad_norm": 1.0052463697169283, + "learning_rate": 4.79534824989589e-06, + "loss": 0.4284, + "step": 12865 + }, + { + "epoch": 0.7834850653107207, + "grad_norm": 0.9671273385438368, + "learning_rate": 4.795316629019445e-06, + "loss": 0.4236, + "step": 12866 + }, + { + "epoch": 0.7835459610875986, + "grad_norm": 1.0605437169202174, + "learning_rate": 4.795285005804578e-06, + "loss": 0.4144, + "step": 12867 + }, + { + "epoch": 0.7836068568644764, + "grad_norm": 1.0172343713292156, + "learning_rate": 4.7952533802513235e-06, + "loss": 0.5292, + "step": 12868 + }, + { + "epoch": 0.7836677526413544, + "grad_norm": 1.046418582600929, + "learning_rate": 4.795221752359712e-06, + "loss": 0.431, + "step": 12869 + }, + { + "epoch": 0.7837286484182322, + "grad_norm": 1.0472635287720442, + "learning_rate": 4.795190122129777e-06, + "loss": 0.4572, + "step": 12870 + }, + { + "epoch": 0.78378954419511, + "grad_norm": 0.9697705074797274, + "learning_rate": 4.795158489561549e-06, + "loss": 0.3888, + "step": 12871 + }, + { + "epoch": 0.7838504399719879, + "grad_norm": 1.1708838348029407, + "learning_rate": 4.795126854655062e-06, + "loss": 0.3677, + "step": 12872 + }, + { + "epoch": 0.7839113357488658, + "grad_norm": 0.9317513153445716, + "learning_rate": 4.795095217410347e-06, + "loss": 0.4595, + "step": 12873 + }, + { + "epoch": 0.7839722315257437, + "grad_norm": 0.8934882485294031, + "learning_rate": 4.795063577827437e-06, + "loss": 0.4922, + "step": 12874 + }, + { + "epoch": 0.7840331273026215, + "grad_norm": 0.9668294318761956, + "learning_rate": 4.7950319359063635e-06, + "loss": 0.4591, + "step": 12875 + }, + { + "epoch": 0.7840940230794995, + "grad_norm": 0.9807857292638039, + "learning_rate": 4.7950002916471596e-06, + "loss": 0.4688, + "step": 12876 + }, + { + "epoch": 0.7841549188563773, + "grad_norm": 1.0218684783875596, + "learning_rate": 4.794968645049857e-06, + "loss": 0.4243, + "step": 12877 + }, + { + "epoch": 0.7842158146332552, + "grad_norm": 1.035569391817696, + "learning_rate": 4.794936996114488e-06, + "loss": 0.4697, + "step": 12878 + }, + { + "epoch": 0.784276710410133, + "grad_norm": 1.0202629195924544, + "learning_rate": 4.794905344841085e-06, + "loss": 0.3718, + "step": 12879 + }, + { + "epoch": 0.784337606187011, + "grad_norm": 0.9641352167077727, + "learning_rate": 4.79487369122968e-06, + "loss": 0.403, + "step": 12880 + }, + { + "epoch": 0.7843985019638888, + "grad_norm": 1.0097900273810863, + "learning_rate": 4.794842035280305e-06, + "loss": 0.3686, + "step": 12881 + }, + { + "epoch": 0.7844593977407667, + "grad_norm": 0.9526534981588575, + "learning_rate": 4.7948103769929934e-06, + "loss": 0.4754, + "step": 12882 + }, + { + "epoch": 0.7845202935176445, + "grad_norm": 0.9187082128188052, + "learning_rate": 4.794778716367776e-06, + "loss": 0.4487, + "step": 12883 + }, + { + "epoch": 0.7845811892945225, + "grad_norm": 1.0393354104164108, + "learning_rate": 4.794747053404686e-06, + "loss": 0.3953, + "step": 12884 + }, + { + "epoch": 0.7846420850714003, + "grad_norm": 1.0615278812486624, + "learning_rate": 4.794715388103756e-06, + "loss": 0.3574, + "step": 12885 + }, + { + "epoch": 0.7847029808482782, + "grad_norm": 1.0244687375575712, + "learning_rate": 4.794683720465016e-06, + "loss": 0.3851, + "step": 12886 + }, + { + "epoch": 0.784763876625156, + "grad_norm": 1.02126760696053, + "learning_rate": 4.794652050488502e-06, + "loss": 0.3692, + "step": 12887 + }, + { + "epoch": 0.784824772402034, + "grad_norm": 0.9975872930506405, + "learning_rate": 4.794620378174244e-06, + "loss": 0.3628, + "step": 12888 + }, + { + "epoch": 0.7848856681789118, + "grad_norm": 1.0227525552844992, + "learning_rate": 4.794588703522273e-06, + "loss": 0.415, + "step": 12889 + }, + { + "epoch": 0.7849465639557897, + "grad_norm": 1.0661786251946384, + "learning_rate": 4.794557026532623e-06, + "loss": 0.4329, + "step": 12890 + }, + { + "epoch": 0.7850074597326675, + "grad_norm": 0.9999104409745524, + "learning_rate": 4.794525347205328e-06, + "loss": 0.3489, + "step": 12891 + }, + { + "epoch": 0.7850683555095455, + "grad_norm": 0.9959467499594046, + "learning_rate": 4.794493665540416e-06, + "loss": 0.4317, + "step": 12892 + }, + { + "epoch": 0.7851292512864233, + "grad_norm": 1.0712144039766136, + "learning_rate": 4.794461981537922e-06, + "loss": 0.4133, + "step": 12893 + }, + { + "epoch": 0.7851901470633011, + "grad_norm": 1.024845974381754, + "learning_rate": 4.7944302951978784e-06, + "loss": 0.4104, + "step": 12894 + }, + { + "epoch": 0.785251042840179, + "grad_norm": 1.1174941958684097, + "learning_rate": 4.7943986065203175e-06, + "loss": 0.4139, + "step": 12895 + }, + { + "epoch": 0.785311938617057, + "grad_norm": 1.0433295013726978, + "learning_rate": 4.794366915505269e-06, + "loss": 0.3756, + "step": 12896 + }, + { + "epoch": 0.7853728343939348, + "grad_norm": 1.0671084280466054, + "learning_rate": 4.794335222152769e-06, + "loss": 0.4224, + "step": 12897 + }, + { + "epoch": 0.7854337301708126, + "grad_norm": 1.0146339589688236, + "learning_rate": 4.794303526462848e-06, + "loss": 0.4337, + "step": 12898 + }, + { + "epoch": 0.7854946259476905, + "grad_norm": 1.0043904314311702, + "learning_rate": 4.7942718284355374e-06, + "loss": 0.4037, + "step": 12899 + }, + { + "epoch": 0.7855555217245684, + "grad_norm": 0.9763805511500581, + "learning_rate": 4.794240128070871e-06, + "loss": 0.4666, + "step": 12900 + }, + { + "epoch": 0.7856164175014463, + "grad_norm": 1.043597788929724, + "learning_rate": 4.79420842536888e-06, + "loss": 0.3377, + "step": 12901 + }, + { + "epoch": 0.7856773132783241, + "grad_norm": 0.9469370781444413, + "learning_rate": 4.7941767203295975e-06, + "loss": 0.4803, + "step": 12902 + }, + { + "epoch": 0.785738209055202, + "grad_norm": 0.9023152198746117, + "learning_rate": 4.794145012953055e-06, + "loss": 0.397, + "step": 12903 + }, + { + "epoch": 0.7857991048320799, + "grad_norm": 1.0756775644350782, + "learning_rate": 4.794113303239285e-06, + "loss": 0.3717, + "step": 12904 + }, + { + "epoch": 0.7858600006089578, + "grad_norm": 0.9856236979887598, + "learning_rate": 4.794081591188321e-06, + "loss": 0.4051, + "step": 12905 + }, + { + "epoch": 0.7859208963858356, + "grad_norm": 1.0240854542560522, + "learning_rate": 4.794049876800194e-06, + "loss": 0.3792, + "step": 12906 + }, + { + "epoch": 0.7859817921627135, + "grad_norm": 0.90482093139294, + "learning_rate": 4.794018160074936e-06, + "loss": 0.456, + "step": 12907 + }, + { + "epoch": 0.7860426879395914, + "grad_norm": 1.0275846567735452, + "learning_rate": 4.7939864410125806e-06, + "loss": 0.3666, + "step": 12908 + }, + { + "epoch": 0.7861035837164693, + "grad_norm": 0.9790047215100995, + "learning_rate": 4.7939547196131595e-06, + "loss": 0.486, + "step": 12909 + }, + { + "epoch": 0.7861644794933471, + "grad_norm": 0.9349329648029272, + "learning_rate": 4.793922995876705e-06, + "loss": 0.4521, + "step": 12910 + }, + { + "epoch": 0.786225375270225, + "grad_norm": 1.0595566833926624, + "learning_rate": 4.793891269803249e-06, + "loss": 0.4006, + "step": 12911 + }, + { + "epoch": 0.7862862710471029, + "grad_norm": 1.0102711795974932, + "learning_rate": 4.7938595413928256e-06, + "loss": 0.3875, + "step": 12912 + }, + { + "epoch": 0.7863471668239808, + "grad_norm": 0.9793263316914373, + "learning_rate": 4.793827810645465e-06, + "loss": 0.4518, + "step": 12913 + }, + { + "epoch": 0.7864080626008586, + "grad_norm": 0.9977961732985887, + "learning_rate": 4.793796077561201e-06, + "loss": 0.4989, + "step": 12914 + }, + { + "epoch": 0.7864689583777364, + "grad_norm": 1.0212263194847488, + "learning_rate": 4.793764342140064e-06, + "loss": 0.3948, + "step": 12915 + }, + { + "epoch": 0.7865298541546144, + "grad_norm": 0.9484044299583645, + "learning_rate": 4.793732604382088e-06, + "loss": 0.3921, + "step": 12916 + }, + { + "epoch": 0.7865907499314923, + "grad_norm": 1.079220016404707, + "learning_rate": 4.793700864287304e-06, + "loss": 0.3788, + "step": 12917 + }, + { + "epoch": 0.7866516457083701, + "grad_norm": 1.0248970380977322, + "learning_rate": 4.793669121855746e-06, + "loss": 0.4033, + "step": 12918 + }, + { + "epoch": 0.786712541485248, + "grad_norm": 1.003521515433648, + "learning_rate": 4.7936373770874465e-06, + "loss": 0.443, + "step": 12919 + }, + { + "epoch": 0.7867734372621259, + "grad_norm": 1.0279651597677282, + "learning_rate": 4.793605629982436e-06, + "loss": 0.471, + "step": 12920 + }, + { + "epoch": 0.7868343330390037, + "grad_norm": 1.1712135414067317, + "learning_rate": 4.793573880540749e-06, + "loss": 0.3938, + "step": 12921 + }, + { + "epoch": 0.7868952288158816, + "grad_norm": 0.9909624343055037, + "learning_rate": 4.7935421287624155e-06, + "loss": 0.433, + "step": 12922 + }, + { + "epoch": 0.7869561245927595, + "grad_norm": 1.0146791431709279, + "learning_rate": 4.793510374647468e-06, + "loss": 0.4126, + "step": 12923 + }, + { + "epoch": 0.7870170203696374, + "grad_norm": 0.9128743907626945, + "learning_rate": 4.793478618195942e-06, + "loss": 0.4648, + "step": 12924 + }, + { + "epoch": 0.7870779161465152, + "grad_norm": 0.9661176043331805, + "learning_rate": 4.793446859407865e-06, + "loss": 0.4041, + "step": 12925 + }, + { + "epoch": 0.7871388119233931, + "grad_norm": 0.9569190082308467, + "learning_rate": 4.793415098283274e-06, + "loss": 0.495, + "step": 12926 + }, + { + "epoch": 0.787199707700271, + "grad_norm": 0.9721736829401232, + "learning_rate": 4.793383334822199e-06, + "loss": 0.4069, + "step": 12927 + }, + { + "epoch": 0.7872606034771489, + "grad_norm": 0.9842871127431253, + "learning_rate": 4.793351569024673e-06, + "loss": 0.4168, + "step": 12928 + }, + { + "epoch": 0.7873214992540267, + "grad_norm": 1.0142372323654951, + "learning_rate": 4.793319800890728e-06, + "loss": 0.4164, + "step": 12929 + }, + { + "epoch": 0.7873823950309046, + "grad_norm": 0.976551636973649, + "learning_rate": 4.7932880304203955e-06, + "loss": 0.4351, + "step": 12930 + }, + { + "epoch": 0.7874432908077825, + "grad_norm": 0.9868929348986406, + "learning_rate": 4.79325625761371e-06, + "loss": 0.4771, + "step": 12931 + }, + { + "epoch": 0.7875041865846604, + "grad_norm": 1.0053471979684898, + "learning_rate": 4.793224482470702e-06, + "loss": 0.3778, + "step": 12932 + }, + { + "epoch": 0.7875650823615382, + "grad_norm": 1.1122634747682087, + "learning_rate": 4.793192704991405e-06, + "loss": 0.4097, + "step": 12933 + }, + { + "epoch": 0.7876259781384161, + "grad_norm": 1.0642560835353265, + "learning_rate": 4.793160925175851e-06, + "loss": 0.3846, + "step": 12934 + }, + { + "epoch": 0.787686873915294, + "grad_norm": 0.8879993061993314, + "learning_rate": 4.793129143024072e-06, + "loss": 0.4427, + "step": 12935 + }, + { + "epoch": 0.7877477696921719, + "grad_norm": 1.0025889164828852, + "learning_rate": 4.7930973585361005e-06, + "loss": 0.4488, + "step": 12936 + }, + { + "epoch": 0.7878086654690497, + "grad_norm": 1.0101154277323274, + "learning_rate": 4.79306557171197e-06, + "loss": 0.4804, + "step": 12937 + }, + { + "epoch": 0.7878695612459276, + "grad_norm": 1.047500190821884, + "learning_rate": 4.793033782551711e-06, + "loss": 0.4399, + "step": 12938 + }, + { + "epoch": 0.7879304570228055, + "grad_norm": 1.0269307899497024, + "learning_rate": 4.793001991055357e-06, + "loss": 0.4256, + "step": 12939 + }, + { + "epoch": 0.7879913527996834, + "grad_norm": 0.9545591670488273, + "learning_rate": 4.792970197222941e-06, + "loss": 0.41, + "step": 12940 + }, + { + "epoch": 0.7880522485765612, + "grad_norm": 1.0485973962026218, + "learning_rate": 4.792938401054494e-06, + "loss": 0.3504, + "step": 12941 + }, + { + "epoch": 0.788113144353439, + "grad_norm": 1.0870141380010974, + "learning_rate": 4.792906602550049e-06, + "loss": 0.3618, + "step": 12942 + }, + { + "epoch": 0.788174040130317, + "grad_norm": 1.001411248108663, + "learning_rate": 4.792874801709639e-06, + "loss": 0.4482, + "step": 12943 + }, + { + "epoch": 0.7882349359071948, + "grad_norm": 1.0430088162832554, + "learning_rate": 4.792842998533295e-06, + "loss": 0.3903, + "step": 12944 + }, + { + "epoch": 0.7882958316840727, + "grad_norm": 0.9707251520224953, + "learning_rate": 4.792811193021051e-06, + "loss": 0.4317, + "step": 12945 + }, + { + "epoch": 0.7883567274609505, + "grad_norm": 0.9243960780233895, + "learning_rate": 4.792779385172938e-06, + "loss": 0.4178, + "step": 12946 + }, + { + "epoch": 0.7884176232378285, + "grad_norm": 1.035810671681013, + "learning_rate": 4.79274757498899e-06, + "loss": 0.3801, + "step": 12947 + }, + { + "epoch": 0.7884785190147063, + "grad_norm": 0.9718994568770585, + "learning_rate": 4.792715762469237e-06, + "loss": 0.3779, + "step": 12948 + }, + { + "epoch": 0.7885394147915842, + "grad_norm": 1.0232930783610976, + "learning_rate": 4.792683947613714e-06, + "loss": 0.387, + "step": 12949 + }, + { + "epoch": 0.788600310568462, + "grad_norm": 1.067041498837982, + "learning_rate": 4.792652130422451e-06, + "loss": 0.3583, + "step": 12950 + }, + { + "epoch": 0.78866120634534, + "grad_norm": 0.9392653931822716, + "learning_rate": 4.792620310895483e-06, + "loss": 0.4559, + "step": 12951 + }, + { + "epoch": 0.7887221021222178, + "grad_norm": 0.962813022245222, + "learning_rate": 4.792588489032841e-06, + "loss": 0.4206, + "step": 12952 + }, + { + "epoch": 0.7887829978990957, + "grad_norm": 0.9957777331645866, + "learning_rate": 4.792556664834557e-06, + "loss": 0.4352, + "step": 12953 + }, + { + "epoch": 0.7888438936759735, + "grad_norm": 0.9509757047143039, + "learning_rate": 4.792524838300664e-06, + "loss": 0.3776, + "step": 12954 + }, + { + "epoch": 0.7889047894528515, + "grad_norm": 0.9305695049244579, + "learning_rate": 4.792493009431194e-06, + "loss": 0.3923, + "step": 12955 + }, + { + "epoch": 0.7889656852297293, + "grad_norm": 1.0305036633407438, + "learning_rate": 4.7924611782261805e-06, + "loss": 0.448, + "step": 12956 + }, + { + "epoch": 0.7890265810066072, + "grad_norm": 1.0298824831735653, + "learning_rate": 4.792429344685655e-06, + "loss": 0.3823, + "step": 12957 + }, + { + "epoch": 0.7890874767834851, + "grad_norm": 1.0154344516927205, + "learning_rate": 4.79239750880965e-06, + "loss": 0.4657, + "step": 12958 + }, + { + "epoch": 0.789148372560363, + "grad_norm": 0.9543571037672499, + "learning_rate": 4.792365670598198e-06, + "loss": 0.4628, + "step": 12959 + }, + { + "epoch": 0.7892092683372408, + "grad_norm": 0.980910099512124, + "learning_rate": 4.792333830051332e-06, + "loss": 0.4105, + "step": 12960 + }, + { + "epoch": 0.7892701641141187, + "grad_norm": 0.9364774147730054, + "learning_rate": 4.792301987169083e-06, + "loss": 0.4441, + "step": 12961 + }, + { + "epoch": 0.7893310598909966, + "grad_norm": 1.1226827275893603, + "learning_rate": 4.792270141951485e-06, + "loss": 0.4485, + "step": 12962 + }, + { + "epoch": 0.7893919556678745, + "grad_norm": 0.9601573779105891, + "learning_rate": 4.79223829439857e-06, + "loss": 0.4143, + "step": 12963 + }, + { + "epoch": 0.7894528514447523, + "grad_norm": 1.019212698012444, + "learning_rate": 4.79220644451037e-06, + "loss": 0.4586, + "step": 12964 + }, + { + "epoch": 0.7895137472216301, + "grad_norm": 1.006203557818235, + "learning_rate": 4.792174592286918e-06, + "loss": 0.3769, + "step": 12965 + }, + { + "epoch": 0.7895746429985081, + "grad_norm": 0.9884614351414648, + "learning_rate": 4.792142737728246e-06, + "loss": 0.4649, + "step": 12966 + }, + { + "epoch": 0.789635538775386, + "grad_norm": 0.9729654139248642, + "learning_rate": 4.792110880834387e-06, + "loss": 0.4393, + "step": 12967 + }, + { + "epoch": 0.7896964345522638, + "grad_norm": 0.9967765611543823, + "learning_rate": 4.792079021605372e-06, + "loss": 0.3891, + "step": 12968 + }, + { + "epoch": 0.7897573303291416, + "grad_norm": 1.0875606968053437, + "learning_rate": 4.792047160041235e-06, + "loss": 0.4308, + "step": 12969 + }, + { + "epoch": 0.7898182261060196, + "grad_norm": 1.0595879822949106, + "learning_rate": 4.792015296142009e-06, + "loss": 0.3743, + "step": 12970 + }, + { + "epoch": 0.7898791218828974, + "grad_norm": 0.9932196198479512, + "learning_rate": 4.791983429907725e-06, + "loss": 0.4202, + "step": 12971 + }, + { + "epoch": 0.7899400176597753, + "grad_norm": 0.9845329085425347, + "learning_rate": 4.791951561338415e-06, + "loss": 0.4585, + "step": 12972 + }, + { + "epoch": 0.7900009134366531, + "grad_norm": 1.0410751584425433, + "learning_rate": 4.7919196904341136e-06, + "loss": 0.4473, + "step": 12973 + }, + { + "epoch": 0.7900618092135311, + "grad_norm": 0.9600949168363881, + "learning_rate": 4.7918878171948514e-06, + "loss": 0.3744, + "step": 12974 + }, + { + "epoch": 0.7901227049904089, + "grad_norm": 1.1049195473597426, + "learning_rate": 4.791855941620662e-06, + "loss": 0.3794, + "step": 12975 + }, + { + "epoch": 0.7901836007672868, + "grad_norm": 0.9663529991742598, + "learning_rate": 4.7918240637115764e-06, + "loss": 0.4141, + "step": 12976 + }, + { + "epoch": 0.7902444965441646, + "grad_norm": 1.0141659910076672, + "learning_rate": 4.79179218346763e-06, + "loss": 0.3659, + "step": 12977 + }, + { + "epoch": 0.7903053923210426, + "grad_norm": 0.9168498620228736, + "learning_rate": 4.791760300888851e-06, + "loss": 0.4252, + "step": 12978 + }, + { + "epoch": 0.7903662880979204, + "grad_norm": 1.0009224533190306, + "learning_rate": 4.791728415975276e-06, + "loss": 0.4053, + "step": 12979 + }, + { + "epoch": 0.7904271838747983, + "grad_norm": 1.0202099817613546, + "learning_rate": 4.791696528726934e-06, + "loss": 0.3653, + "step": 12980 + }, + { + "epoch": 0.7904880796516761, + "grad_norm": 0.9507590863719507, + "learning_rate": 4.791664639143861e-06, + "loss": 0.4585, + "step": 12981 + }, + { + "epoch": 0.7905489754285541, + "grad_norm": 0.9789463604589671, + "learning_rate": 4.791632747226087e-06, + "loss": 0.3785, + "step": 12982 + }, + { + "epoch": 0.7906098712054319, + "grad_norm": 1.0679895507083395, + "learning_rate": 4.791600852973646e-06, + "loss": 0.4377, + "step": 12983 + }, + { + "epoch": 0.7906707669823098, + "grad_norm": 1.0234547031990031, + "learning_rate": 4.7915689563865685e-06, + "loss": 0.3704, + "step": 12984 + }, + { + "epoch": 0.7907316627591876, + "grad_norm": 1.050602079170671, + "learning_rate": 4.791537057464889e-06, + "loss": 0.3606, + "step": 12985 + }, + { + "epoch": 0.7907925585360656, + "grad_norm": 0.9770070592912589, + "learning_rate": 4.791505156208639e-06, + "loss": 0.3879, + "step": 12986 + }, + { + "epoch": 0.7908534543129434, + "grad_norm": 1.1364992292863207, + "learning_rate": 4.791473252617851e-06, + "loss": 0.4281, + "step": 12987 + }, + { + "epoch": 0.7909143500898212, + "grad_norm": 0.9541622588555151, + "learning_rate": 4.791441346692558e-06, + "loss": 0.4289, + "step": 12988 + }, + { + "epoch": 0.7909752458666991, + "grad_norm": 0.93012834413104, + "learning_rate": 4.791409438432792e-06, + "loss": 0.3873, + "step": 12989 + }, + { + "epoch": 0.791036141643577, + "grad_norm": 1.0367374683961261, + "learning_rate": 4.791377527838585e-06, + "loss": 0.4178, + "step": 12990 + }, + { + "epoch": 0.7910970374204549, + "grad_norm": 0.9982302857299669, + "learning_rate": 4.7913456149099716e-06, + "loss": 0.4107, + "step": 12991 + }, + { + "epoch": 0.7911579331973327, + "grad_norm": 1.0081698547556646, + "learning_rate": 4.7913136996469816e-06, + "loss": 0.4402, + "step": 12992 + }, + { + "epoch": 0.7912188289742106, + "grad_norm": 0.9684391885631602, + "learning_rate": 4.791281782049649e-06, + "loss": 0.4642, + "step": 12993 + }, + { + "epoch": 0.7912797247510885, + "grad_norm": 0.96605732957712, + "learning_rate": 4.791249862118007e-06, + "loss": 0.4477, + "step": 12994 + }, + { + "epoch": 0.7913406205279664, + "grad_norm": 0.9557348308050183, + "learning_rate": 4.791217939852087e-06, + "loss": 0.396, + "step": 12995 + }, + { + "epoch": 0.7914015163048442, + "grad_norm": 0.9325004898798374, + "learning_rate": 4.791186015251922e-06, + "loss": 0.5007, + "step": 12996 + }, + { + "epoch": 0.7914624120817221, + "grad_norm": 1.0137076872093322, + "learning_rate": 4.791154088317544e-06, + "loss": 0.4922, + "step": 12997 + }, + { + "epoch": 0.7915233078586, + "grad_norm": 0.9829883808320226, + "learning_rate": 4.791122159048985e-06, + "loss": 0.4674, + "step": 12998 + }, + { + "epoch": 0.7915842036354779, + "grad_norm": 0.9178965403757321, + "learning_rate": 4.7910902274462805e-06, + "loss": 0.3896, + "step": 12999 + }, + { + "epoch": 0.7916450994123557, + "grad_norm": 1.0228572300230716, + "learning_rate": 4.791058293509458e-06, + "loss": 0.4255, + "step": 13000 + }, + { + "epoch": 0.7917059951892337, + "grad_norm": 1.009407054575552, + "learning_rate": 4.791026357238555e-06, + "loss": 0.4877, + "step": 13001 + }, + { + "epoch": 0.7917668909661115, + "grad_norm": 0.9814496071182438, + "learning_rate": 4.790994418633602e-06, + "loss": 0.3974, + "step": 13002 + }, + { + "epoch": 0.7918277867429894, + "grad_norm": 0.8973295515826255, + "learning_rate": 4.790962477694631e-06, + "loss": 0.4529, + "step": 13003 + }, + { + "epoch": 0.7918886825198672, + "grad_norm": 1.0285638351728583, + "learning_rate": 4.790930534421675e-06, + "loss": 0.4055, + "step": 13004 + }, + { + "epoch": 0.7919495782967452, + "grad_norm": 0.8820718770651046, + "learning_rate": 4.790898588814767e-06, + "loss": 0.4414, + "step": 13005 + }, + { + "epoch": 0.792010474073623, + "grad_norm": 1.0358980705997967, + "learning_rate": 4.790866640873939e-06, + "loss": 0.4491, + "step": 13006 + }, + { + "epoch": 0.7920713698505009, + "grad_norm": 0.9969429834778242, + "learning_rate": 4.7908346905992235e-06, + "loss": 0.4377, + "step": 13007 + }, + { + "epoch": 0.7921322656273787, + "grad_norm": 0.9254358145899755, + "learning_rate": 4.790802737990653e-06, + "loss": 0.4427, + "step": 13008 + }, + { + "epoch": 0.7921931614042567, + "grad_norm": 0.9610759806533696, + "learning_rate": 4.790770783048261e-06, + "loss": 0.3757, + "step": 13009 + }, + { + "epoch": 0.7922540571811345, + "grad_norm": 1.011351952240975, + "learning_rate": 4.790738825772078e-06, + "loss": 0.3938, + "step": 13010 + }, + { + "epoch": 0.7923149529580124, + "grad_norm": 0.9329506590712717, + "learning_rate": 4.79070686616214e-06, + "loss": 0.3963, + "step": 13011 + }, + { + "epoch": 0.7923758487348902, + "grad_norm": 1.0167837272495637, + "learning_rate": 4.790674904218476e-06, + "loss": 0.4443, + "step": 13012 + }, + { + "epoch": 0.7924367445117682, + "grad_norm": 1.048388910997831, + "learning_rate": 4.79064293994112e-06, + "loss": 0.4519, + "step": 13013 + }, + { + "epoch": 0.792497640288646, + "grad_norm": 1.0354324893134752, + "learning_rate": 4.790610973330105e-06, + "loss": 0.3992, + "step": 13014 + }, + { + "epoch": 0.7925585360655238, + "grad_norm": 0.9509923127733703, + "learning_rate": 4.7905790043854635e-06, + "loss": 0.4302, + "step": 13015 + }, + { + "epoch": 0.7926194318424017, + "grad_norm": 1.004326030787231, + "learning_rate": 4.7905470331072276e-06, + "loss": 0.409, + "step": 13016 + }, + { + "epoch": 0.7926803276192796, + "grad_norm": 0.9121881396996515, + "learning_rate": 4.790515059495429e-06, + "loss": 0.4141, + "step": 13017 + }, + { + "epoch": 0.7927412233961575, + "grad_norm": 0.9695677152812116, + "learning_rate": 4.790483083550103e-06, + "loss": 0.4755, + "step": 13018 + }, + { + "epoch": 0.7928021191730353, + "grad_norm": 1.0345423776736118, + "learning_rate": 4.7904511052712785e-06, + "loss": 0.4256, + "step": 13019 + }, + { + "epoch": 0.7928630149499132, + "grad_norm": 0.9949393864771411, + "learning_rate": 4.790419124658992e-06, + "loss": 0.4441, + "step": 13020 + }, + { + "epoch": 0.7929239107267911, + "grad_norm": 0.9907856921565877, + "learning_rate": 4.790387141713273e-06, + "loss": 0.3298, + "step": 13021 + }, + { + "epoch": 0.792984806503669, + "grad_norm": 1.036254576734454, + "learning_rate": 4.7903551564341555e-06, + "loss": 0.4327, + "step": 13022 + }, + { + "epoch": 0.7930457022805468, + "grad_norm": 1.0290755745187108, + "learning_rate": 4.790323168821671e-06, + "loss": 0.415, + "step": 13023 + }, + { + "epoch": 0.7931065980574247, + "grad_norm": 1.0271117682464672, + "learning_rate": 4.790291178875854e-06, + "loss": 0.4182, + "step": 13024 + }, + { + "epoch": 0.7931674938343026, + "grad_norm": 1.0023448838707445, + "learning_rate": 4.790259186596735e-06, + "loss": 0.4337, + "step": 13025 + }, + { + "epoch": 0.7932283896111805, + "grad_norm": 0.9688865274127839, + "learning_rate": 4.7902271919843475e-06, + "loss": 0.3481, + "step": 13026 + }, + { + "epoch": 0.7932892853880583, + "grad_norm": 1.0520334712998705, + "learning_rate": 4.7901951950387254e-06, + "loss": 0.4009, + "step": 13027 + }, + { + "epoch": 0.7933501811649362, + "grad_norm": 0.9725116093670895, + "learning_rate": 4.790163195759899e-06, + "loss": 0.3676, + "step": 13028 + }, + { + "epoch": 0.7934110769418141, + "grad_norm": 1.0071774039855832, + "learning_rate": 4.7901311941479025e-06, + "loss": 0.4932, + "step": 13029 + }, + { + "epoch": 0.793471972718692, + "grad_norm": 1.0825275247095951, + "learning_rate": 4.790099190202769e-06, + "loss": 0.4319, + "step": 13030 + }, + { + "epoch": 0.7935328684955698, + "grad_norm": 1.0043533817135768, + "learning_rate": 4.790067183924528e-06, + "loss": 0.4328, + "step": 13031 + }, + { + "epoch": 0.7935937642724477, + "grad_norm": 0.9989818785241754, + "learning_rate": 4.790035175313214e-06, + "loss": 0.5233, + "step": 13032 + }, + { + "epoch": 0.7936546600493256, + "grad_norm": 0.997167443411966, + "learning_rate": 4.790003164368862e-06, + "loss": 0.3864, + "step": 13033 + }, + { + "epoch": 0.7937155558262035, + "grad_norm": 1.0804682103670809, + "learning_rate": 4.789971151091501e-06, + "loss": 0.3764, + "step": 13034 + }, + { + "epoch": 0.7937764516030813, + "grad_norm": 1.0715017546611214, + "learning_rate": 4.7899391354811645e-06, + "loss": 0.3872, + "step": 13035 + }, + { + "epoch": 0.7938373473799591, + "grad_norm": 1.0285945431551002, + "learning_rate": 4.789907117537886e-06, + "loss": 0.4723, + "step": 13036 + }, + { + "epoch": 0.7938982431568371, + "grad_norm": 1.0685093860561565, + "learning_rate": 4.789875097261698e-06, + "loss": 0.3748, + "step": 13037 + }, + { + "epoch": 0.793959138933715, + "grad_norm": 0.9664246837128945, + "learning_rate": 4.7898430746526334e-06, + "loss": 0.4538, + "step": 13038 + }, + { + "epoch": 0.7940200347105928, + "grad_norm": 1.0148388744532721, + "learning_rate": 4.789811049710723e-06, + "loss": 0.3897, + "step": 13039 + }, + { + "epoch": 0.7940809304874707, + "grad_norm": 1.0682527107350723, + "learning_rate": 4.7897790224360015e-06, + "loss": 0.4373, + "step": 13040 + }, + { + "epoch": 0.7941418262643486, + "grad_norm": 0.9731773136693981, + "learning_rate": 4.789746992828501e-06, + "loss": 0.4643, + "step": 13041 + }, + { + "epoch": 0.7942027220412264, + "grad_norm": 0.9604591439389215, + "learning_rate": 4.789714960888254e-06, + "loss": 0.3885, + "step": 13042 + }, + { + "epoch": 0.7942636178181043, + "grad_norm": 0.971043339411984, + "learning_rate": 4.7896829266152925e-06, + "loss": 0.4356, + "step": 13043 + }, + { + "epoch": 0.7943245135949822, + "grad_norm": 1.0579292347069968, + "learning_rate": 4.78965089000965e-06, + "loss": 0.3872, + "step": 13044 + }, + { + "epoch": 0.7943854093718601, + "grad_norm": 0.961775938108673, + "learning_rate": 4.789618851071358e-06, + "loss": 0.4246, + "step": 13045 + }, + { + "epoch": 0.7944463051487379, + "grad_norm": 0.9299690165198281, + "learning_rate": 4.78958680980045e-06, + "loss": 0.4312, + "step": 13046 + }, + { + "epoch": 0.7945072009256158, + "grad_norm": 1.0116203314883903, + "learning_rate": 4.789554766196959e-06, + "loss": 0.3681, + "step": 13047 + }, + { + "epoch": 0.7945680967024937, + "grad_norm": 0.9907736008666257, + "learning_rate": 4.789522720260917e-06, + "loss": 0.4616, + "step": 13048 + }, + { + "epoch": 0.7946289924793716, + "grad_norm": 0.9299679194617333, + "learning_rate": 4.789490671992357e-06, + "loss": 0.4851, + "step": 13049 + }, + { + "epoch": 0.7946898882562494, + "grad_norm": 1.1052068546332339, + "learning_rate": 4.789458621391312e-06, + "loss": 0.3549, + "step": 13050 + }, + { + "epoch": 0.7947507840331273, + "grad_norm": 0.9709547401214411, + "learning_rate": 4.789426568457814e-06, + "loss": 0.4637, + "step": 13051 + }, + { + "epoch": 0.7948116798100052, + "grad_norm": 0.9688248764491729, + "learning_rate": 4.7893945131918955e-06, + "loss": 0.3901, + "step": 13052 + }, + { + "epoch": 0.7948725755868831, + "grad_norm": 1.0603614333799358, + "learning_rate": 4.789362455593589e-06, + "loss": 0.4013, + "step": 13053 + }, + { + "epoch": 0.7949334713637609, + "grad_norm": 1.0183677425435957, + "learning_rate": 4.789330395662928e-06, + "loss": 0.4052, + "step": 13054 + }, + { + "epoch": 0.7949943671406388, + "grad_norm": 1.0373103004736366, + "learning_rate": 4.789298333399944e-06, + "loss": 0.4768, + "step": 13055 + }, + { + "epoch": 0.7950552629175167, + "grad_norm": 1.0404691089932336, + "learning_rate": 4.789266268804672e-06, + "loss": 0.405, + "step": 13056 + }, + { + "epoch": 0.7951161586943946, + "grad_norm": 1.0428080263540251, + "learning_rate": 4.789234201877142e-06, + "loss": 0.4269, + "step": 13057 + }, + { + "epoch": 0.7951770544712724, + "grad_norm": 0.9135755333256684, + "learning_rate": 4.789202132617389e-06, + "loss": 0.455, + "step": 13058 + }, + { + "epoch": 0.7952379502481502, + "grad_norm": 0.9463712228879706, + "learning_rate": 4.7891700610254436e-06, + "loss": 0.4437, + "step": 13059 + }, + { + "epoch": 0.7952988460250282, + "grad_norm": 0.9868840898407625, + "learning_rate": 4.7891379871013385e-06, + "loss": 0.4667, + "step": 13060 + }, + { + "epoch": 0.795359741801906, + "grad_norm": 1.0553519247348429, + "learning_rate": 4.7891059108451086e-06, + "loss": 0.3529, + "step": 13061 + }, + { + "epoch": 0.7954206375787839, + "grad_norm": 0.9630657984397558, + "learning_rate": 4.789073832256784e-06, + "loss": 0.4402, + "step": 13062 + }, + { + "epoch": 0.7954815333556617, + "grad_norm": 0.9743070887142377, + "learning_rate": 4.7890417513364e-06, + "loss": 0.419, + "step": 13063 + }, + { + "epoch": 0.7955424291325397, + "grad_norm": 1.037846392345482, + "learning_rate": 4.789009668083987e-06, + "loss": 0.4177, + "step": 13064 + }, + { + "epoch": 0.7956033249094175, + "grad_norm": 0.9467664387682367, + "learning_rate": 4.788977582499579e-06, + "loss": 0.3897, + "step": 13065 + }, + { + "epoch": 0.7956642206862954, + "grad_norm": 0.9538212930844834, + "learning_rate": 4.788945494583208e-06, + "loss": 0.4367, + "step": 13066 + }, + { + "epoch": 0.7957251164631732, + "grad_norm": 0.9878064305130954, + "learning_rate": 4.788913404334906e-06, + "loss": 0.4484, + "step": 13067 + }, + { + "epoch": 0.7957860122400512, + "grad_norm": 0.9257777221996661, + "learning_rate": 4.788881311754708e-06, + "loss": 0.4446, + "step": 13068 + }, + { + "epoch": 0.795846908016929, + "grad_norm": 1.0066298026361347, + "learning_rate": 4.788849216842645e-06, + "loss": 0.3536, + "step": 13069 + }, + { + "epoch": 0.7959078037938069, + "grad_norm": 1.0226652830231007, + "learning_rate": 4.7888171195987495e-06, + "loss": 0.4053, + "step": 13070 + }, + { + "epoch": 0.7959686995706847, + "grad_norm": 1.15987851236605, + "learning_rate": 4.788785020023055e-06, + "loss": 0.4601, + "step": 13071 + }, + { + "epoch": 0.7960295953475627, + "grad_norm": 1.0421085600834548, + "learning_rate": 4.788752918115593e-06, + "loss": 0.4281, + "step": 13072 + }, + { + "epoch": 0.7960904911244405, + "grad_norm": 1.0799787846164688, + "learning_rate": 4.788720813876398e-06, + "loss": 0.3772, + "step": 13073 + }, + { + "epoch": 0.7961513869013184, + "grad_norm": 0.9713244906903589, + "learning_rate": 4.788688707305502e-06, + "loss": 0.4314, + "step": 13074 + }, + { + "epoch": 0.7962122826781962, + "grad_norm": 1.031149270701065, + "learning_rate": 4.788656598402938e-06, + "loss": 0.3898, + "step": 13075 + }, + { + "epoch": 0.7962731784550742, + "grad_norm": 1.0643551819181785, + "learning_rate": 4.788624487168737e-06, + "loss": 0.4196, + "step": 13076 + }, + { + "epoch": 0.796334074231952, + "grad_norm": 1.0738579331743787, + "learning_rate": 4.788592373602934e-06, + "loss": 0.5126, + "step": 13077 + }, + { + "epoch": 0.7963949700088299, + "grad_norm": 1.0640777484390231, + "learning_rate": 4.78856025770556e-06, + "loss": 0.459, + "step": 13078 + }, + { + "epoch": 0.7964558657857077, + "grad_norm": 1.0519501991158213, + "learning_rate": 4.788528139476648e-06, + "loss": 0.4415, + "step": 13079 + }, + { + "epoch": 0.7965167615625857, + "grad_norm": 0.9803701917145277, + "learning_rate": 4.788496018916232e-06, + "loss": 0.4699, + "step": 13080 + }, + { + "epoch": 0.7965776573394635, + "grad_norm": 0.9649900167282086, + "learning_rate": 4.788463896024343e-06, + "loss": 0.4743, + "step": 13081 + }, + { + "epoch": 0.7966385531163414, + "grad_norm": 0.9985399065563603, + "learning_rate": 4.788431770801015e-06, + "loss": 0.428, + "step": 13082 + }, + { + "epoch": 0.7966994488932193, + "grad_norm": 1.0188167394479217, + "learning_rate": 4.788399643246281e-06, + "loss": 0.3607, + "step": 13083 + }, + { + "epoch": 0.7967603446700972, + "grad_norm": 0.9634332959447907, + "learning_rate": 4.788367513360173e-06, + "loss": 0.4438, + "step": 13084 + }, + { + "epoch": 0.796821240446975, + "grad_norm": 0.9554391045761035, + "learning_rate": 4.788335381142722e-06, + "loss": 0.4863, + "step": 13085 + }, + { + "epoch": 0.7968821362238528, + "grad_norm": 0.9298080591578196, + "learning_rate": 4.788303246593964e-06, + "loss": 0.4665, + "step": 13086 + }, + { + "epoch": 0.7969430320007308, + "grad_norm": 0.9586546112872101, + "learning_rate": 4.78827110971393e-06, + "loss": 0.4248, + "step": 13087 + }, + { + "epoch": 0.7970039277776086, + "grad_norm": 0.9268489670341614, + "learning_rate": 4.788238970502653e-06, + "loss": 0.489, + "step": 13088 + }, + { + "epoch": 0.7970648235544865, + "grad_norm": 1.0428930833748025, + "learning_rate": 4.788206828960165e-06, + "loss": 0.4115, + "step": 13089 + }, + { + "epoch": 0.7971257193313643, + "grad_norm": 1.002735533007527, + "learning_rate": 4.7881746850865e-06, + "loss": 0.412, + "step": 13090 + }, + { + "epoch": 0.7971866151082423, + "grad_norm": 1.0280004152563724, + "learning_rate": 4.78814253888169e-06, + "loss": 0.3263, + "step": 13091 + }, + { + "epoch": 0.7972475108851201, + "grad_norm": 0.9606524861522716, + "learning_rate": 4.788110390345768e-06, + "loss": 0.4384, + "step": 13092 + }, + { + "epoch": 0.797308406661998, + "grad_norm": 1.147689259449308, + "learning_rate": 4.788078239478767e-06, + "loss": 0.3664, + "step": 13093 + }, + { + "epoch": 0.7973693024388758, + "grad_norm": 1.0411402759216755, + "learning_rate": 4.78804608628072e-06, + "loss": 0.5126, + "step": 13094 + }, + { + "epoch": 0.7974301982157538, + "grad_norm": 1.013949702319973, + "learning_rate": 4.788013930751658e-06, + "loss": 0.3918, + "step": 13095 + }, + { + "epoch": 0.7974910939926316, + "grad_norm": 1.070775196350441, + "learning_rate": 4.787981772891616e-06, + "loss": 0.4077, + "step": 13096 + }, + { + "epoch": 0.7975519897695095, + "grad_norm": 0.9664539862067882, + "learning_rate": 4.787949612700625e-06, + "loss": 0.4663, + "step": 13097 + }, + { + "epoch": 0.7976128855463873, + "grad_norm": 0.9852753468675022, + "learning_rate": 4.787917450178719e-06, + "loss": 0.5049, + "step": 13098 + }, + { + "epoch": 0.7976737813232653, + "grad_norm": 1.035198255646841, + "learning_rate": 4.787885285325929e-06, + "loss": 0.4117, + "step": 13099 + }, + { + "epoch": 0.7977346771001431, + "grad_norm": 0.9666567580274309, + "learning_rate": 4.7878531181422904e-06, + "loss": 0.4344, + "step": 13100 + }, + { + "epoch": 0.797795572877021, + "grad_norm": 0.9860369430396799, + "learning_rate": 4.787820948627835e-06, + "loss": 0.3891, + "step": 13101 + }, + { + "epoch": 0.7978564686538988, + "grad_norm": 1.0515559716151577, + "learning_rate": 4.787788776782593e-06, + "loss": 0.3332, + "step": 13102 + }, + { + "epoch": 0.7979173644307768, + "grad_norm": 1.0057289988253428, + "learning_rate": 4.787756602606602e-06, + "loss": 0.3918, + "step": 13103 + }, + { + "epoch": 0.7979782602076546, + "grad_norm": 0.9769353007783873, + "learning_rate": 4.787724426099891e-06, + "loss": 0.3846, + "step": 13104 + }, + { + "epoch": 0.7980391559845325, + "grad_norm": 0.9847849835451379, + "learning_rate": 4.787692247262494e-06, + "loss": 0.4657, + "step": 13105 + }, + { + "epoch": 0.7981000517614103, + "grad_norm": 1.0450465443999626, + "learning_rate": 4.787660066094443e-06, + "loss": 0.4297, + "step": 13106 + }, + { + "epoch": 0.7981609475382883, + "grad_norm": 1.004363822832171, + "learning_rate": 4.787627882595772e-06, + "loss": 0.409, + "step": 13107 + }, + { + "epoch": 0.7982218433151661, + "grad_norm": 1.0308675683040727, + "learning_rate": 4.787595696766514e-06, + "loss": 0.3849, + "step": 13108 + }, + { + "epoch": 0.798282739092044, + "grad_norm": 1.0089673885086086, + "learning_rate": 4.7875635086067e-06, + "loss": 0.385, + "step": 13109 + }, + { + "epoch": 0.7983436348689218, + "grad_norm": 1.0009221969591424, + "learning_rate": 4.787531318116364e-06, + "loss": 0.4481, + "step": 13110 + }, + { + "epoch": 0.7984045306457997, + "grad_norm": 0.9542972859630674, + "learning_rate": 4.7874991252955395e-06, + "loss": 0.4509, + "step": 13111 + }, + { + "epoch": 0.7984654264226776, + "grad_norm": 1.0407079061787465, + "learning_rate": 4.787466930144257e-06, + "loss": 0.4287, + "step": 13112 + }, + { + "epoch": 0.7985263221995554, + "grad_norm": 1.0064328495814334, + "learning_rate": 4.787434732662552e-06, + "loss": 0.4187, + "step": 13113 + }, + { + "epoch": 0.7985872179764333, + "grad_norm": 1.020611092232792, + "learning_rate": 4.787402532850456e-06, + "loss": 0.4285, + "step": 13114 + }, + { + "epoch": 0.7986481137533112, + "grad_norm": 1.038301323327143, + "learning_rate": 4.787370330708001e-06, + "loss": 0.3663, + "step": 13115 + }, + { + "epoch": 0.7987090095301891, + "grad_norm": 1.0586553675423438, + "learning_rate": 4.787338126235221e-06, + "loss": 0.3228, + "step": 13116 + }, + { + "epoch": 0.7987699053070669, + "grad_norm": 1.055734465875612, + "learning_rate": 4.787305919432149e-06, + "loss": 0.4463, + "step": 13117 + }, + { + "epoch": 0.7988308010839448, + "grad_norm": 0.9761613225274606, + "learning_rate": 4.787273710298817e-06, + "loss": 0.3966, + "step": 13118 + }, + { + "epoch": 0.7988916968608227, + "grad_norm": 0.9749798978401041, + "learning_rate": 4.7872414988352576e-06, + "loss": 0.3874, + "step": 13119 + }, + { + "epoch": 0.7989525926377006, + "grad_norm": 1.0177598536873331, + "learning_rate": 4.787209285041505e-06, + "loss": 0.4438, + "step": 13120 + }, + { + "epoch": 0.7990134884145784, + "grad_norm": 0.9594037322562391, + "learning_rate": 4.78717706891759e-06, + "loss": 0.3971, + "step": 13121 + }, + { + "epoch": 0.7990743841914564, + "grad_norm": 1.021872825230866, + "learning_rate": 4.787144850463547e-06, + "loss": 0.3902, + "step": 13122 + }, + { + "epoch": 0.7991352799683342, + "grad_norm": 0.9764749693989682, + "learning_rate": 4.787112629679409e-06, + "loss": 0.3963, + "step": 13123 + }, + { + "epoch": 0.7991961757452121, + "grad_norm": 1.0004199430597418, + "learning_rate": 4.787080406565208e-06, + "loss": 0.3943, + "step": 13124 + }, + { + "epoch": 0.7992570715220899, + "grad_norm": 1.014950045816023, + "learning_rate": 4.787048181120977e-06, + "loss": 0.3585, + "step": 13125 + }, + { + "epoch": 0.7993179672989679, + "grad_norm": 0.974569652884851, + "learning_rate": 4.787015953346749e-06, + "loss": 0.3991, + "step": 13126 + }, + { + "epoch": 0.7993788630758457, + "grad_norm": 0.9762549881702526, + "learning_rate": 4.786983723242555e-06, + "loss": 0.5037, + "step": 13127 + }, + { + "epoch": 0.7994397588527236, + "grad_norm": 0.9918746150894123, + "learning_rate": 4.786951490808431e-06, + "loss": 0.3896, + "step": 13128 + }, + { + "epoch": 0.7995006546296014, + "grad_norm": 0.9807158937339595, + "learning_rate": 4.7869192560444085e-06, + "loss": 0.4624, + "step": 13129 + }, + { + "epoch": 0.7995615504064794, + "grad_norm": 0.9846496565671631, + "learning_rate": 4.78688701895052e-06, + "loss": 0.442, + "step": 13130 + }, + { + "epoch": 0.7996224461833572, + "grad_norm": 1.0330892024139977, + "learning_rate": 4.786854779526799e-06, + "loss": 0.4158, + "step": 13131 + }, + { + "epoch": 0.799683341960235, + "grad_norm": 0.9984501661545877, + "learning_rate": 4.786822537773277e-06, + "loss": 0.467, + "step": 13132 + }, + { + "epoch": 0.7997442377371129, + "grad_norm": 1.06431859783503, + "learning_rate": 4.786790293689989e-06, + "loss": 0.4397, + "step": 13133 + }, + { + "epoch": 0.7998051335139909, + "grad_norm": 0.9933308382860222, + "learning_rate": 4.786758047276965e-06, + "loss": 0.4192, + "step": 13134 + }, + { + "epoch": 0.7998660292908687, + "grad_norm": 1.0510869102866331, + "learning_rate": 4.786725798534241e-06, + "loss": 0.3632, + "step": 13135 + }, + { + "epoch": 0.7999269250677465, + "grad_norm": 1.0211327568250854, + "learning_rate": 4.786693547461847e-06, + "loss": 0.3152, + "step": 13136 + }, + { + "epoch": 0.7999878208446244, + "grad_norm": 0.9636601704004367, + "learning_rate": 4.786661294059818e-06, + "loss": 0.3764, + "step": 13137 + }, + { + "epoch": 0.8000487166215023, + "grad_norm": 0.9865908124346997, + "learning_rate": 4.786629038328185e-06, + "loss": 0.3568, + "step": 13138 + }, + { + "epoch": 0.8001096123983802, + "grad_norm": 0.9950150995913035, + "learning_rate": 4.786596780266983e-06, + "loss": 0.507, + "step": 13139 + }, + { + "epoch": 0.800170508175258, + "grad_norm": 0.9724259679627666, + "learning_rate": 4.7865645198762435e-06, + "loss": 0.4182, + "step": 13140 + }, + { + "epoch": 0.8002314039521359, + "grad_norm": 1.0124746468444845, + "learning_rate": 4.786532257155999e-06, + "loss": 0.3914, + "step": 13141 + }, + { + "epoch": 0.8002922997290138, + "grad_norm": 0.9413555608438043, + "learning_rate": 4.786499992106284e-06, + "loss": 0.4542, + "step": 13142 + }, + { + "epoch": 0.8003531955058917, + "grad_norm": 1.0268195342358373, + "learning_rate": 4.7864677247271296e-06, + "loss": 0.4532, + "step": 13143 + }, + { + "epoch": 0.8004140912827695, + "grad_norm": 1.0360115008013693, + "learning_rate": 4.7864354550185695e-06, + "loss": 0.4152, + "step": 13144 + }, + { + "epoch": 0.8004749870596474, + "grad_norm": 0.9903613344711152, + "learning_rate": 4.786403182980636e-06, + "loss": 0.4069, + "step": 13145 + }, + { + "epoch": 0.8005358828365253, + "grad_norm": 1.094066571306657, + "learning_rate": 4.786370908613363e-06, + "loss": 0.4007, + "step": 13146 + }, + { + "epoch": 0.8005967786134032, + "grad_norm": 0.9111457623317688, + "learning_rate": 4.786338631916783e-06, + "loss": 0.4904, + "step": 13147 + }, + { + "epoch": 0.800657674390281, + "grad_norm": 1.0098981468622485, + "learning_rate": 4.786306352890928e-06, + "loss": 0.396, + "step": 13148 + }, + { + "epoch": 0.8007185701671589, + "grad_norm": 1.0623615939158355, + "learning_rate": 4.786274071535833e-06, + "loss": 0.404, + "step": 13149 + }, + { + "epoch": 0.8007794659440368, + "grad_norm": 1.0517275915812303, + "learning_rate": 4.786241787851529e-06, + "loss": 0.4039, + "step": 13150 + }, + { + "epoch": 0.8008403617209147, + "grad_norm": 1.0827530271661898, + "learning_rate": 4.786209501838049e-06, + "loss": 0.4121, + "step": 13151 + }, + { + "epoch": 0.8009012574977925, + "grad_norm": 0.9952051633923994, + "learning_rate": 4.7861772134954264e-06, + "loss": 0.3914, + "step": 13152 + }, + { + "epoch": 0.8009621532746704, + "grad_norm": 1.0198755908077475, + "learning_rate": 4.786144922823694e-06, + "loss": 0.3779, + "step": 13153 + }, + { + "epoch": 0.8010230490515483, + "grad_norm": 1.0597213731618218, + "learning_rate": 4.786112629822885e-06, + "loss": 0.3659, + "step": 13154 + }, + { + "epoch": 0.8010839448284262, + "grad_norm": 1.029125435129854, + "learning_rate": 4.7860803344930314e-06, + "loss": 0.4241, + "step": 13155 + }, + { + "epoch": 0.801144840605304, + "grad_norm": 1.0065133286706367, + "learning_rate": 4.786048036834166e-06, + "loss": 0.3944, + "step": 13156 + }, + { + "epoch": 0.8012057363821818, + "grad_norm": 0.9703796821590648, + "learning_rate": 4.786015736846324e-06, + "loss": 0.3974, + "step": 13157 + }, + { + "epoch": 0.8012666321590598, + "grad_norm": 1.0414932278207567, + "learning_rate": 4.7859834345295365e-06, + "loss": 0.3809, + "step": 13158 + }, + { + "epoch": 0.8013275279359376, + "grad_norm": 0.9532654992074717, + "learning_rate": 4.785951129883835e-06, + "loss": 0.4413, + "step": 13159 + }, + { + "epoch": 0.8013884237128155, + "grad_norm": 1.0368422986386985, + "learning_rate": 4.785918822909256e-06, + "loss": 0.4207, + "step": 13160 + }, + { + "epoch": 0.8014493194896933, + "grad_norm": 0.9642913755086415, + "learning_rate": 4.78588651360583e-06, + "loss": 0.438, + "step": 13161 + }, + { + "epoch": 0.8015102152665713, + "grad_norm": 0.9121874427600382, + "learning_rate": 4.785854201973589e-06, + "loss": 0.4528, + "step": 13162 + }, + { + "epoch": 0.8015711110434491, + "grad_norm": 1.0109968313446436, + "learning_rate": 4.785821888012568e-06, + "loss": 0.4914, + "step": 13163 + }, + { + "epoch": 0.801632006820327, + "grad_norm": 1.0880775800837412, + "learning_rate": 4.7857895717228e-06, + "loss": 0.3926, + "step": 13164 + }, + { + "epoch": 0.8016929025972049, + "grad_norm": 0.9971086511914816, + "learning_rate": 4.785757253104316e-06, + "loss": 0.4846, + "step": 13165 + }, + { + "epoch": 0.8017537983740828, + "grad_norm": 1.043789764539818, + "learning_rate": 4.785724932157151e-06, + "loss": 0.4003, + "step": 13166 + }, + { + "epoch": 0.8018146941509606, + "grad_norm": 0.9099470010365567, + "learning_rate": 4.785692608881336e-06, + "loss": 0.4375, + "step": 13167 + }, + { + "epoch": 0.8018755899278385, + "grad_norm": 1.0223337426486865, + "learning_rate": 4.785660283276906e-06, + "loss": 0.3802, + "step": 13168 + }, + { + "epoch": 0.8019364857047164, + "grad_norm": 1.045647013383843, + "learning_rate": 4.785627955343892e-06, + "loss": 0.3634, + "step": 13169 + }, + { + "epoch": 0.8019973814815943, + "grad_norm": 0.9990106028361039, + "learning_rate": 4.7855956250823275e-06, + "loss": 0.4143, + "step": 13170 + }, + { + "epoch": 0.8020582772584721, + "grad_norm": 0.9851209746040018, + "learning_rate": 4.785563292492247e-06, + "loss": 0.4626, + "step": 13171 + }, + { + "epoch": 0.80211917303535, + "grad_norm": 1.0063895870868207, + "learning_rate": 4.785530957573681e-06, + "loss": 0.4303, + "step": 13172 + }, + { + "epoch": 0.8021800688122279, + "grad_norm": 0.986980180621702, + "learning_rate": 4.785498620326664e-06, + "loss": 0.4367, + "step": 13173 + }, + { + "epoch": 0.8022409645891058, + "grad_norm": 1.0599339993255303, + "learning_rate": 4.785466280751228e-06, + "loss": 0.3993, + "step": 13174 + }, + { + "epoch": 0.8023018603659836, + "grad_norm": 1.0342789501763228, + "learning_rate": 4.7854339388474065e-06, + "loss": 0.434, + "step": 13175 + }, + { + "epoch": 0.8023627561428615, + "grad_norm": 0.8922946551168803, + "learning_rate": 4.7854015946152335e-06, + "loss": 0.4733, + "step": 13176 + }, + { + "epoch": 0.8024236519197394, + "grad_norm": 0.983272585757078, + "learning_rate": 4.78536924805474e-06, + "loss": 0.4196, + "step": 13177 + }, + { + "epoch": 0.8024845476966173, + "grad_norm": 0.9916147296174822, + "learning_rate": 4.78533689916596e-06, + "loss": 0.4161, + "step": 13178 + }, + { + "epoch": 0.8025454434734951, + "grad_norm": 1.015661750638271, + "learning_rate": 4.785304547948925e-06, + "loss": 0.4465, + "step": 13179 + }, + { + "epoch": 0.802606339250373, + "grad_norm": 0.9158700382034447, + "learning_rate": 4.785272194403672e-06, + "loss": 0.4103, + "step": 13180 + }, + { + "epoch": 0.8026672350272509, + "grad_norm": 0.9383203010177089, + "learning_rate": 4.785239838530229e-06, + "loss": 0.4421, + "step": 13181 + }, + { + "epoch": 0.8027281308041287, + "grad_norm": 1.014750681888262, + "learning_rate": 4.785207480328632e-06, + "loss": 0.4422, + "step": 13182 + }, + { + "epoch": 0.8027890265810066, + "grad_norm": 1.1121100319850088, + "learning_rate": 4.785175119798913e-06, + "loss": 0.4526, + "step": 13183 + }, + { + "epoch": 0.8028499223578844, + "grad_norm": 1.1570470280240017, + "learning_rate": 4.785142756941105e-06, + "loss": 0.3778, + "step": 13184 + }, + { + "epoch": 0.8029108181347624, + "grad_norm": 1.1689540836005545, + "learning_rate": 4.785110391755241e-06, + "loss": 0.4105, + "step": 13185 + }, + { + "epoch": 0.8029717139116402, + "grad_norm": 0.9379515384181218, + "learning_rate": 4.7850780242413545e-06, + "loss": 0.4671, + "step": 13186 + }, + { + "epoch": 0.8030326096885181, + "grad_norm": 1.035949658611482, + "learning_rate": 4.785045654399477e-06, + "loss": 0.4529, + "step": 13187 + }, + { + "epoch": 0.8030935054653959, + "grad_norm": 1.119323854869936, + "learning_rate": 4.785013282229644e-06, + "loss": 0.4428, + "step": 13188 + }, + { + "epoch": 0.8031544012422739, + "grad_norm": 1.1064161436193036, + "learning_rate": 4.784980907731886e-06, + "loss": 0.4819, + "step": 13189 + }, + { + "epoch": 0.8032152970191517, + "grad_norm": 1.0194378235894175, + "learning_rate": 4.784948530906237e-06, + "loss": 0.3775, + "step": 13190 + }, + { + "epoch": 0.8032761927960296, + "grad_norm": 0.9442597739261845, + "learning_rate": 4.78491615175273e-06, + "loss": 0.455, + "step": 13191 + }, + { + "epoch": 0.8033370885729074, + "grad_norm": 1.006986449671345, + "learning_rate": 4.784883770271398e-06, + "loss": 0.4234, + "step": 13192 + }, + { + "epoch": 0.8033979843497854, + "grad_norm": 1.011499904117075, + "learning_rate": 4.7848513864622735e-06, + "loss": 0.4151, + "step": 13193 + }, + { + "epoch": 0.8034588801266632, + "grad_norm": 0.9157772929688146, + "learning_rate": 4.784819000325391e-06, + "loss": 0.4049, + "step": 13194 + }, + { + "epoch": 0.8035197759035411, + "grad_norm": 1.0946604004582863, + "learning_rate": 4.784786611860781e-06, + "loss": 0.3807, + "step": 13195 + }, + { + "epoch": 0.8035806716804189, + "grad_norm": 1.0263453116639618, + "learning_rate": 4.784754221068479e-06, + "loss": 0.4373, + "step": 13196 + }, + { + "epoch": 0.8036415674572969, + "grad_norm": 0.8976769034787738, + "learning_rate": 4.784721827948517e-06, + "loss": 0.4722, + "step": 13197 + }, + { + "epoch": 0.8037024632341747, + "grad_norm": 0.9757375185715305, + "learning_rate": 4.7846894325009276e-06, + "loss": 0.3988, + "step": 13198 + }, + { + "epoch": 0.8037633590110526, + "grad_norm": 0.9717982998688813, + "learning_rate": 4.784657034725744e-06, + "loss": 0.3866, + "step": 13199 + }, + { + "epoch": 0.8038242547879304, + "grad_norm": 0.9535736436662046, + "learning_rate": 4.784624634622999e-06, + "loss": 0.4746, + "step": 13200 + }, + { + "epoch": 0.8038851505648084, + "grad_norm": 1.0111205194761341, + "learning_rate": 4.784592232192726e-06, + "loss": 0.4218, + "step": 13201 + }, + { + "epoch": 0.8039460463416862, + "grad_norm": 0.950055396235979, + "learning_rate": 4.784559827434958e-06, + "loss": 0.4569, + "step": 13202 + }, + { + "epoch": 0.804006942118564, + "grad_norm": 1.0524091037264438, + "learning_rate": 4.7845274203497285e-06, + "loss": 0.4157, + "step": 13203 + }, + { + "epoch": 0.804067837895442, + "grad_norm": 1.0886182165730047, + "learning_rate": 4.784495010937069e-06, + "loss": 0.3655, + "step": 13204 + }, + { + "epoch": 0.8041287336723199, + "grad_norm": 0.9654211462535479, + "learning_rate": 4.784462599197014e-06, + "loss": 0.5271, + "step": 13205 + }, + { + "epoch": 0.8041896294491977, + "grad_norm": 1.0301366097665918, + "learning_rate": 4.784430185129596e-06, + "loss": 0.3898, + "step": 13206 + }, + { + "epoch": 0.8042505252260755, + "grad_norm": 0.961534830703482, + "learning_rate": 4.784397768734848e-06, + "loss": 0.4316, + "step": 13207 + }, + { + "epoch": 0.8043114210029535, + "grad_norm": 0.9413100259466298, + "learning_rate": 4.784365350012803e-06, + "loss": 0.3699, + "step": 13208 + }, + { + "epoch": 0.8043723167798313, + "grad_norm": 1.0054538794674432, + "learning_rate": 4.784332928963494e-06, + "loss": 0.422, + "step": 13209 + }, + { + "epoch": 0.8044332125567092, + "grad_norm": 0.9351016288729934, + "learning_rate": 4.7843005055869546e-06, + "loss": 0.4444, + "step": 13210 + }, + { + "epoch": 0.804494108333587, + "grad_norm": 0.9926050442790793, + "learning_rate": 4.784268079883217e-06, + "loss": 0.4102, + "step": 13211 + }, + { + "epoch": 0.804555004110465, + "grad_norm": 0.9980844817934545, + "learning_rate": 4.784235651852315e-06, + "loss": 0.3877, + "step": 13212 + }, + { + "epoch": 0.8046158998873428, + "grad_norm": 0.9944913672215905, + "learning_rate": 4.784203221494281e-06, + "loss": 0.3603, + "step": 13213 + }, + { + "epoch": 0.8046767956642207, + "grad_norm": 1.044154938100063, + "learning_rate": 4.784170788809147e-06, + "loss": 0.3912, + "step": 13214 + }, + { + "epoch": 0.8047376914410985, + "grad_norm": 1.0410658587868105, + "learning_rate": 4.784138353796949e-06, + "loss": 0.4234, + "step": 13215 + }, + { + "epoch": 0.8047985872179765, + "grad_norm": 0.9359740067829851, + "learning_rate": 4.7841059164577175e-06, + "loss": 0.3988, + "step": 13216 + }, + { + "epoch": 0.8048594829948543, + "grad_norm": 1.0501989405803982, + "learning_rate": 4.784073476791487e-06, + "loss": 0.3863, + "step": 13217 + }, + { + "epoch": 0.8049203787717322, + "grad_norm": 1.0265168063386847, + "learning_rate": 4.7840410347982894e-06, + "loss": 0.3944, + "step": 13218 + }, + { + "epoch": 0.80498127454861, + "grad_norm": 0.9031841582011395, + "learning_rate": 4.784008590478157e-06, + "loss": 0.4371, + "step": 13219 + }, + { + "epoch": 0.805042170325488, + "grad_norm": 0.9852664053015633, + "learning_rate": 4.783976143831126e-06, + "loss": 0.3956, + "step": 13220 + }, + { + "epoch": 0.8051030661023658, + "grad_norm": 0.9955472159488067, + "learning_rate": 4.783943694857227e-06, + "loss": 0.4064, + "step": 13221 + }, + { + "epoch": 0.8051639618792437, + "grad_norm": 1.1196179523333576, + "learning_rate": 4.783911243556494e-06, + "loss": 0.4092, + "step": 13222 + }, + { + "epoch": 0.8052248576561215, + "grad_norm": 1.0180194041292392, + "learning_rate": 4.78387878992896e-06, + "loss": 0.359, + "step": 13223 + }, + { + "epoch": 0.8052857534329995, + "grad_norm": 0.9624016745789218, + "learning_rate": 4.783846333974656e-06, + "loss": 0.4275, + "step": 13224 + }, + { + "epoch": 0.8053466492098773, + "grad_norm": 1.0064564531697995, + "learning_rate": 4.783813875693618e-06, + "loss": 0.4116, + "step": 13225 + }, + { + "epoch": 0.8054075449867552, + "grad_norm": 0.9921100485414344, + "learning_rate": 4.783781415085879e-06, + "loss": 0.488, + "step": 13226 + }, + { + "epoch": 0.805468440763633, + "grad_norm": 1.021714875695211, + "learning_rate": 4.783748952151469e-06, + "loss": 0.416, + "step": 13227 + }, + { + "epoch": 0.805529336540511, + "grad_norm": 1.038465467020534, + "learning_rate": 4.783716486890424e-06, + "loss": 0.3726, + "step": 13228 + }, + { + "epoch": 0.8055902323173888, + "grad_norm": 1.070676691792393, + "learning_rate": 4.783684019302776e-06, + "loss": 0.3864, + "step": 13229 + }, + { + "epoch": 0.8056511280942666, + "grad_norm": 1.0506980899958869, + "learning_rate": 4.7836515493885585e-06, + "loss": 0.4126, + "step": 13230 + }, + { + "epoch": 0.8057120238711445, + "grad_norm": 1.018856503238034, + "learning_rate": 4.783619077147804e-06, + "loss": 0.4537, + "step": 13231 + }, + { + "epoch": 0.8057729196480224, + "grad_norm": 0.9518414703994326, + "learning_rate": 4.783586602580546e-06, + "loss": 0.3888, + "step": 13232 + }, + { + "epoch": 0.8058338154249003, + "grad_norm": 0.9497156673568098, + "learning_rate": 4.783554125686817e-06, + "loss": 0.4208, + "step": 13233 + }, + { + "epoch": 0.8058947112017781, + "grad_norm": 0.9758858165250323, + "learning_rate": 4.783521646466651e-06, + "loss": 0.4107, + "step": 13234 + }, + { + "epoch": 0.805955606978656, + "grad_norm": 0.931480229493935, + "learning_rate": 4.783489164920081e-06, + "loss": 0.4258, + "step": 13235 + }, + { + "epoch": 0.8060165027555339, + "grad_norm": 0.9804244204067465, + "learning_rate": 4.783456681047139e-06, + "loss": 0.41, + "step": 13236 + }, + { + "epoch": 0.8060773985324118, + "grad_norm": 1.094507400953937, + "learning_rate": 4.783424194847859e-06, + "loss": 0.3835, + "step": 13237 + }, + { + "epoch": 0.8061382943092896, + "grad_norm": 0.9907787438981636, + "learning_rate": 4.783391706322274e-06, + "loss": 0.3684, + "step": 13238 + }, + { + "epoch": 0.8061991900861675, + "grad_norm": 0.9927662478835689, + "learning_rate": 4.7833592154704165e-06, + "loss": 0.3495, + "step": 13239 + }, + { + "epoch": 0.8062600858630454, + "grad_norm": 0.9851808546164983, + "learning_rate": 4.783326722292321e-06, + "loss": 0.4511, + "step": 13240 + }, + { + "epoch": 0.8063209816399233, + "grad_norm": 1.0827647936102978, + "learning_rate": 4.783294226788019e-06, + "loss": 0.3457, + "step": 13241 + }, + { + "epoch": 0.8063818774168011, + "grad_norm": 0.9574431919851554, + "learning_rate": 4.783261728957544e-06, + "loss": 0.4284, + "step": 13242 + }, + { + "epoch": 0.806442773193679, + "grad_norm": 1.0766050059714636, + "learning_rate": 4.7832292288009306e-06, + "loss": 0.4518, + "step": 13243 + }, + { + "epoch": 0.8065036689705569, + "grad_norm": 1.0239593398047784, + "learning_rate": 4.78319672631821e-06, + "loss": 0.4008, + "step": 13244 + }, + { + "epoch": 0.8065645647474348, + "grad_norm": 1.0465578000139852, + "learning_rate": 4.7831642215094165e-06, + "loss": 0.4326, + "step": 13245 + }, + { + "epoch": 0.8066254605243126, + "grad_norm": 1.0236195842544231, + "learning_rate": 4.783131714374582e-06, + "loss": 0.4463, + "step": 13246 + }, + { + "epoch": 0.8066863563011906, + "grad_norm": 0.9887690778401448, + "learning_rate": 4.783099204913741e-06, + "loss": 0.3827, + "step": 13247 + }, + { + "epoch": 0.8067472520780684, + "grad_norm": 0.9669395025459301, + "learning_rate": 4.7830666931269255e-06, + "loss": 0.4802, + "step": 13248 + }, + { + "epoch": 0.8068081478549463, + "grad_norm": 1.035513306662622, + "learning_rate": 4.78303417901417e-06, + "loss": 0.3492, + "step": 13249 + }, + { + "epoch": 0.8068690436318241, + "grad_norm": 1.039698662997154, + "learning_rate": 4.783001662575506e-06, + "loss": 0.3927, + "step": 13250 + }, + { + "epoch": 0.8069299394087021, + "grad_norm": 0.9374506647363836, + "learning_rate": 4.782969143810967e-06, + "loss": 0.4324, + "step": 13251 + }, + { + "epoch": 0.8069908351855799, + "grad_norm": 1.0274174704420262, + "learning_rate": 4.7829366227205875e-06, + "loss": 0.4136, + "step": 13252 + }, + { + "epoch": 0.8070517309624577, + "grad_norm": 0.9786149015765341, + "learning_rate": 4.782904099304399e-06, + "loss": 0.4852, + "step": 13253 + }, + { + "epoch": 0.8071126267393356, + "grad_norm": 1.0025478311403773, + "learning_rate": 4.7828715735624355e-06, + "loss": 0.3992, + "step": 13254 + }, + { + "epoch": 0.8071735225162135, + "grad_norm": 0.9251075525917501, + "learning_rate": 4.78283904549473e-06, + "loss": 0.4485, + "step": 13255 + }, + { + "epoch": 0.8072344182930914, + "grad_norm": 0.9136292866134662, + "learning_rate": 4.7828065151013155e-06, + "loss": 0.4882, + "step": 13256 + }, + { + "epoch": 0.8072953140699692, + "grad_norm": 1.0466608420961687, + "learning_rate": 4.782773982382225e-06, + "loss": 0.4199, + "step": 13257 + }, + { + "epoch": 0.8073562098468471, + "grad_norm": 0.9852715899183601, + "learning_rate": 4.782741447337492e-06, + "loss": 0.4718, + "step": 13258 + }, + { + "epoch": 0.807417105623725, + "grad_norm": 1.0708808513175008, + "learning_rate": 4.782708909967149e-06, + "loss": 0.3775, + "step": 13259 + }, + { + "epoch": 0.8074780014006029, + "grad_norm": 1.0140752373412834, + "learning_rate": 4.78267637027123e-06, + "loss": 0.4046, + "step": 13260 + }, + { + "epoch": 0.8075388971774807, + "grad_norm": 1.0387144241210922, + "learning_rate": 4.782643828249768e-06, + "loss": 0.4034, + "step": 13261 + }, + { + "epoch": 0.8075997929543586, + "grad_norm": 0.9994474102608057, + "learning_rate": 4.782611283902795e-06, + "loss": 0.4042, + "step": 13262 + }, + { + "epoch": 0.8076606887312365, + "grad_norm": 1.0521592890268618, + "learning_rate": 4.7825787372303455e-06, + "loss": 0.4049, + "step": 13263 + }, + { + "epoch": 0.8077215845081144, + "grad_norm": 0.9260633137673026, + "learning_rate": 4.782546188232453e-06, + "loss": 0.4361, + "step": 13264 + }, + { + "epoch": 0.8077824802849922, + "grad_norm": 0.9427947943795814, + "learning_rate": 4.7825136369091495e-06, + "loss": 0.4626, + "step": 13265 + }, + { + "epoch": 0.8078433760618701, + "grad_norm": 1.032656383849258, + "learning_rate": 4.782481083260468e-06, + "loss": 0.3642, + "step": 13266 + }, + { + "epoch": 0.807904271838748, + "grad_norm": 1.0928056459491706, + "learning_rate": 4.7824485272864425e-06, + "loss": 0.3358, + "step": 13267 + }, + { + "epoch": 0.8079651676156259, + "grad_norm": 1.095743837149125, + "learning_rate": 4.782415968987106e-06, + "loss": 0.403, + "step": 13268 + }, + { + "epoch": 0.8080260633925037, + "grad_norm": 1.0693167464772018, + "learning_rate": 4.782383408362492e-06, + "loss": 0.4118, + "step": 13269 + }, + { + "epoch": 0.8080869591693816, + "grad_norm": 0.9016532899203505, + "learning_rate": 4.782350845412633e-06, + "loss": 0.4666, + "step": 13270 + }, + { + "epoch": 0.8081478549462595, + "grad_norm": 0.988883526529999, + "learning_rate": 4.7823182801375626e-06, + "loss": 0.4021, + "step": 13271 + }, + { + "epoch": 0.8082087507231374, + "grad_norm": 1.1442402781270804, + "learning_rate": 4.7822857125373134e-06, + "loss": 0.4724, + "step": 13272 + }, + { + "epoch": 0.8082696465000152, + "grad_norm": 0.9298332698316908, + "learning_rate": 4.782253142611919e-06, + "loss": 0.4569, + "step": 13273 + }, + { + "epoch": 0.808330542276893, + "grad_norm": 0.9599847733906339, + "learning_rate": 4.7822205703614124e-06, + "loss": 0.391, + "step": 13274 + }, + { + "epoch": 0.808391438053771, + "grad_norm": 1.055580947279908, + "learning_rate": 4.7821879957858275e-06, + "loss": 0.3479, + "step": 13275 + }, + { + "epoch": 0.8084523338306489, + "grad_norm": 1.0070944532081858, + "learning_rate": 4.782155418885196e-06, + "loss": 0.503, + "step": 13276 + }, + { + "epoch": 0.8085132296075267, + "grad_norm": 0.9661588825795513, + "learning_rate": 4.782122839659552e-06, + "loss": 0.3885, + "step": 13277 + }, + { + "epoch": 0.8085741253844045, + "grad_norm": 1.00076897157924, + "learning_rate": 4.7820902581089305e-06, + "loss": 0.3868, + "step": 13278 + }, + { + "epoch": 0.8086350211612825, + "grad_norm": 1.01774324633492, + "learning_rate": 4.782057674233362e-06, + "loss": 0.3823, + "step": 13279 + }, + { + "epoch": 0.8086959169381603, + "grad_norm": 0.9566907485962821, + "learning_rate": 4.782025088032879e-06, + "loss": 0.422, + "step": 13280 + }, + { + "epoch": 0.8087568127150382, + "grad_norm": 0.9296948769203092, + "learning_rate": 4.781992499507518e-06, + "loss": 0.4547, + "step": 13281 + }, + { + "epoch": 0.808817708491916, + "grad_norm": 1.045382620124847, + "learning_rate": 4.78195990865731e-06, + "loss": 0.3944, + "step": 13282 + }, + { + "epoch": 0.808878604268794, + "grad_norm": 1.045540527308778, + "learning_rate": 4.781927315482289e-06, + "loss": 0.3863, + "step": 13283 + }, + { + "epoch": 0.8089395000456718, + "grad_norm": 0.9437692962840368, + "learning_rate": 4.781894719982487e-06, + "loss": 0.4667, + "step": 13284 + }, + { + "epoch": 0.8090003958225497, + "grad_norm": 1.0604640142172435, + "learning_rate": 4.781862122157939e-06, + "loss": 0.3747, + "step": 13285 + }, + { + "epoch": 0.8090612915994276, + "grad_norm": 1.018818523291482, + "learning_rate": 4.781829522008676e-06, + "loss": 0.3722, + "step": 13286 + }, + { + "epoch": 0.8091221873763055, + "grad_norm": 0.9491905916529033, + "learning_rate": 4.7817969195347345e-06, + "loss": 0.4437, + "step": 13287 + }, + { + "epoch": 0.8091830831531833, + "grad_norm": 0.9893819270921902, + "learning_rate": 4.781764314736144e-06, + "loss": 0.514, + "step": 13288 + }, + { + "epoch": 0.8092439789300612, + "grad_norm": 1.0015987721222477, + "learning_rate": 4.78173170761294e-06, + "loss": 0.3927, + "step": 13289 + }, + { + "epoch": 0.8093048747069391, + "grad_norm": 1.0220601295547251, + "learning_rate": 4.7816990981651555e-06, + "loss": 0.3861, + "step": 13290 + }, + { + "epoch": 0.809365770483817, + "grad_norm": 1.040661661531611, + "learning_rate": 4.7816664863928235e-06, + "loss": 0.3553, + "step": 13291 + }, + { + "epoch": 0.8094266662606948, + "grad_norm": 1.037178867236975, + "learning_rate": 4.781633872295977e-06, + "loss": 0.3767, + "step": 13292 + }, + { + "epoch": 0.8094875620375727, + "grad_norm": 1.0053114318612661, + "learning_rate": 4.78160125587465e-06, + "loss": 0.4613, + "step": 13293 + }, + { + "epoch": 0.8095484578144506, + "grad_norm": 1.063251981703087, + "learning_rate": 4.781568637128874e-06, + "loss": 0.3663, + "step": 13294 + }, + { + "epoch": 0.8096093535913285, + "grad_norm": 1.060796323942673, + "learning_rate": 4.781536016058683e-06, + "loss": 0.3337, + "step": 13295 + }, + { + "epoch": 0.8096702493682063, + "grad_norm": 1.0213890971133368, + "learning_rate": 4.781503392664111e-06, + "loss": 0.4785, + "step": 13296 + }, + { + "epoch": 0.8097311451450842, + "grad_norm": 0.9597344366675314, + "learning_rate": 4.781470766945191e-06, + "loss": 0.3915, + "step": 13297 + }, + { + "epoch": 0.8097920409219621, + "grad_norm": 1.0132532309296804, + "learning_rate": 4.781438138901956e-06, + "loss": 0.4226, + "step": 13298 + }, + { + "epoch": 0.80985293669884, + "grad_norm": 0.9585703173260982, + "learning_rate": 4.781405508534439e-06, + "loss": 0.4384, + "step": 13299 + }, + { + "epoch": 0.8099138324757178, + "grad_norm": 1.0460955110554961, + "learning_rate": 4.781372875842675e-06, + "loss": 0.3841, + "step": 13300 + }, + { + "epoch": 0.8099747282525956, + "grad_norm": 1.0695192195534293, + "learning_rate": 4.781340240826694e-06, + "loss": 0.3939, + "step": 13301 + }, + { + "epoch": 0.8100356240294736, + "grad_norm": 0.997248974055564, + "learning_rate": 4.7813076034865315e-06, + "loss": 0.451, + "step": 13302 + }, + { + "epoch": 0.8100965198063514, + "grad_norm": 0.9995238406766132, + "learning_rate": 4.78127496382222e-06, + "loss": 0.4338, + "step": 13303 + }, + { + "epoch": 0.8101574155832293, + "grad_norm": 0.9708306754924818, + "learning_rate": 4.781242321833793e-06, + "loss": 0.3636, + "step": 13304 + }, + { + "epoch": 0.8102183113601071, + "grad_norm": 1.0409843348783472, + "learning_rate": 4.781209677521284e-06, + "loss": 0.4358, + "step": 13305 + }, + { + "epoch": 0.8102792071369851, + "grad_norm": 1.0086510759997114, + "learning_rate": 4.781177030884726e-06, + "loss": 0.4288, + "step": 13306 + }, + { + "epoch": 0.8103401029138629, + "grad_norm": 1.0692399440625924, + "learning_rate": 4.781144381924152e-06, + "loss": 0.4031, + "step": 13307 + }, + { + "epoch": 0.8104009986907408, + "grad_norm": 1.0781831302021365, + "learning_rate": 4.781111730639596e-06, + "loss": 0.3445, + "step": 13308 + }, + { + "epoch": 0.8104618944676186, + "grad_norm": 0.9734659060159916, + "learning_rate": 4.7810790770310896e-06, + "loss": 0.4719, + "step": 13309 + }, + { + "epoch": 0.8105227902444966, + "grad_norm": 0.9944569932450318, + "learning_rate": 4.7810464210986685e-06, + "loss": 0.4378, + "step": 13310 + }, + { + "epoch": 0.8105836860213744, + "grad_norm": 1.0187668903442915, + "learning_rate": 4.781013762842365e-06, + "loss": 0.4535, + "step": 13311 + }, + { + "epoch": 0.8106445817982523, + "grad_norm": 1.0329412931610287, + "learning_rate": 4.780981102262211e-06, + "loss": 0.4028, + "step": 13312 + }, + { + "epoch": 0.8107054775751301, + "grad_norm": 1.0587975503092533, + "learning_rate": 4.780948439358242e-06, + "loss": 0.3595, + "step": 13313 + }, + { + "epoch": 0.8107663733520081, + "grad_norm": 0.9848788899729789, + "learning_rate": 4.780915774130489e-06, + "loss": 0.4108, + "step": 13314 + }, + { + "epoch": 0.8108272691288859, + "grad_norm": 1.0306068469620742, + "learning_rate": 4.780883106578987e-06, + "loss": 0.399, + "step": 13315 + }, + { + "epoch": 0.8108881649057638, + "grad_norm": 1.0486913776549014, + "learning_rate": 4.780850436703769e-06, + "loss": 0.4197, + "step": 13316 + }, + { + "epoch": 0.8109490606826416, + "grad_norm": 1.004033178941646, + "learning_rate": 4.780817764504868e-06, + "loss": 0.3652, + "step": 13317 + }, + { + "epoch": 0.8110099564595196, + "grad_norm": 1.0026931599982967, + "learning_rate": 4.7807850899823164e-06, + "loss": 0.3974, + "step": 13318 + }, + { + "epoch": 0.8110708522363974, + "grad_norm": 1.0227637072286933, + "learning_rate": 4.78075241313615e-06, + "loss": 0.3834, + "step": 13319 + }, + { + "epoch": 0.8111317480132753, + "grad_norm": 0.9863466464675043, + "learning_rate": 4.780719733966399e-06, + "loss": 0.3788, + "step": 13320 + }, + { + "epoch": 0.8111926437901531, + "grad_norm": 1.0701107248300978, + "learning_rate": 4.780687052473098e-06, + "loss": 0.4163, + "step": 13321 + }, + { + "epoch": 0.8112535395670311, + "grad_norm": 1.0031299606145974, + "learning_rate": 4.7806543686562814e-06, + "loss": 0.4327, + "step": 13322 + }, + { + "epoch": 0.8113144353439089, + "grad_norm": 0.9772987177673577, + "learning_rate": 4.780621682515981e-06, + "loss": 0.5127, + "step": 13323 + }, + { + "epoch": 0.8113753311207867, + "grad_norm": 1.0564548290806535, + "learning_rate": 4.780588994052231e-06, + "loss": 0.3592, + "step": 13324 + }, + { + "epoch": 0.8114362268976646, + "grad_norm": 0.9956217776906531, + "learning_rate": 4.780556303265064e-06, + "loss": 0.4832, + "step": 13325 + }, + { + "epoch": 0.8114971226745425, + "grad_norm": 1.09606343806533, + "learning_rate": 4.780523610154514e-06, + "loss": 0.4399, + "step": 13326 + }, + { + "epoch": 0.8115580184514204, + "grad_norm": 0.9811290824270568, + "learning_rate": 4.780490914720613e-06, + "loss": 0.3824, + "step": 13327 + }, + { + "epoch": 0.8116189142282982, + "grad_norm": 1.015426271138587, + "learning_rate": 4.780458216963396e-06, + "loss": 0.4339, + "step": 13328 + }, + { + "epoch": 0.8116798100051762, + "grad_norm": 1.00010402907508, + "learning_rate": 4.780425516882896e-06, + "loss": 0.3735, + "step": 13329 + }, + { + "epoch": 0.811740705782054, + "grad_norm": 0.9680688939069201, + "learning_rate": 4.780392814479146e-06, + "loss": 0.3704, + "step": 13330 + }, + { + "epoch": 0.8118016015589319, + "grad_norm": 0.9467480558129089, + "learning_rate": 4.780360109752178e-06, + "loss": 0.3989, + "step": 13331 + }, + { + "epoch": 0.8118624973358097, + "grad_norm": 1.071194356569108, + "learning_rate": 4.7803274027020276e-06, + "loss": 0.3576, + "step": 13332 + }, + { + "epoch": 0.8119233931126877, + "grad_norm": 1.038134649099734, + "learning_rate": 4.780294693328727e-06, + "loss": 0.324, + "step": 13333 + }, + { + "epoch": 0.8119842888895655, + "grad_norm": 0.9497379632568382, + "learning_rate": 4.780261981632309e-06, + "loss": 0.4092, + "step": 13334 + }, + { + "epoch": 0.8120451846664434, + "grad_norm": 1.0259961474049022, + "learning_rate": 4.780229267612808e-06, + "loss": 0.3703, + "step": 13335 + }, + { + "epoch": 0.8121060804433212, + "grad_norm": 1.0717858200749883, + "learning_rate": 4.780196551270256e-06, + "loss": 0.3599, + "step": 13336 + }, + { + "epoch": 0.8121669762201992, + "grad_norm": 1.0326033610547198, + "learning_rate": 4.780163832604688e-06, + "loss": 0.4568, + "step": 13337 + }, + { + "epoch": 0.812227871997077, + "grad_norm": 0.9850093125555969, + "learning_rate": 4.780131111616136e-06, + "loss": 0.396, + "step": 13338 + }, + { + "epoch": 0.8122887677739549, + "grad_norm": 1.152234018525591, + "learning_rate": 4.780098388304634e-06, + "loss": 0.3822, + "step": 13339 + }, + { + "epoch": 0.8123496635508327, + "grad_norm": 0.9199128001994564, + "learning_rate": 4.780065662670215e-06, + "loss": 0.4567, + "step": 13340 + }, + { + "epoch": 0.8124105593277107, + "grad_norm": 0.9965727191361305, + "learning_rate": 4.780032934712913e-06, + "loss": 0.4357, + "step": 13341 + }, + { + "epoch": 0.8124714551045885, + "grad_norm": 1.0761193183379802, + "learning_rate": 4.780000204432761e-06, + "loss": 0.395, + "step": 13342 + }, + { + "epoch": 0.8125323508814664, + "grad_norm": 0.9996567859468025, + "learning_rate": 4.779967471829792e-06, + "loss": 0.4523, + "step": 13343 + }, + { + "epoch": 0.8125932466583442, + "grad_norm": 1.0209884657386783, + "learning_rate": 4.779934736904039e-06, + "loss": 0.4103, + "step": 13344 + }, + { + "epoch": 0.8126541424352222, + "grad_norm": 1.0555973261658496, + "learning_rate": 4.7799019996555365e-06, + "loss": 0.3812, + "step": 13345 + }, + { + "epoch": 0.8127150382121, + "grad_norm": 0.9584890792556215, + "learning_rate": 4.779869260084317e-06, + "loss": 0.3874, + "step": 13346 + }, + { + "epoch": 0.8127759339889779, + "grad_norm": 0.9979550079286955, + "learning_rate": 4.779836518190414e-06, + "loss": 0.4001, + "step": 13347 + }, + { + "epoch": 0.8128368297658557, + "grad_norm": 1.047684553465827, + "learning_rate": 4.7798037739738604e-06, + "loss": 0.3809, + "step": 13348 + }, + { + "epoch": 0.8128977255427337, + "grad_norm": 0.916372756577186, + "learning_rate": 4.779771027434691e-06, + "loss": 0.4459, + "step": 13349 + }, + { + "epoch": 0.8129586213196115, + "grad_norm": 0.9870419013976851, + "learning_rate": 4.779738278572938e-06, + "loss": 0.427, + "step": 13350 + }, + { + "epoch": 0.8130195170964893, + "grad_norm": 1.0616662012627316, + "learning_rate": 4.779705527388635e-06, + "loss": 0.3507, + "step": 13351 + }, + { + "epoch": 0.8130804128733672, + "grad_norm": 0.9824062166490907, + "learning_rate": 4.779672773881816e-06, + "loss": 0.402, + "step": 13352 + }, + { + "epoch": 0.8131413086502451, + "grad_norm": 1.0061864068907143, + "learning_rate": 4.779640018052513e-06, + "loss": 0.4717, + "step": 13353 + }, + { + "epoch": 0.813202204427123, + "grad_norm": 0.9660954222801982, + "learning_rate": 4.77960725990076e-06, + "loss": 0.413, + "step": 13354 + }, + { + "epoch": 0.8132631002040008, + "grad_norm": 0.9874181513974819, + "learning_rate": 4.779574499426591e-06, + "loss": 0.4569, + "step": 13355 + }, + { + "epoch": 0.8133239959808787, + "grad_norm": 0.9339066755066711, + "learning_rate": 4.779541736630039e-06, + "loss": 0.4488, + "step": 13356 + }, + { + "epoch": 0.8133848917577566, + "grad_norm": 1.0338457188938865, + "learning_rate": 4.779508971511136e-06, + "loss": 0.4174, + "step": 13357 + }, + { + "epoch": 0.8134457875346345, + "grad_norm": 1.0244616771930983, + "learning_rate": 4.7794762040699186e-06, + "loss": 0.5477, + "step": 13358 + }, + { + "epoch": 0.8135066833115123, + "grad_norm": 0.9528471307272485, + "learning_rate": 4.779443434306417e-06, + "loss": 0.4517, + "step": 13359 + }, + { + "epoch": 0.8135675790883902, + "grad_norm": 1.044954030492398, + "learning_rate": 4.779410662220667e-06, + "loss": 0.3789, + "step": 13360 + }, + { + "epoch": 0.8136284748652681, + "grad_norm": 1.0604082444467147, + "learning_rate": 4.7793778878127e-06, + "loss": 0.3695, + "step": 13361 + }, + { + "epoch": 0.813689370642146, + "grad_norm": 1.0093505196527703, + "learning_rate": 4.779345111082549e-06, + "loss": 0.3865, + "step": 13362 + }, + { + "epoch": 0.8137502664190238, + "grad_norm": 1.0724915949971447, + "learning_rate": 4.7793123320302505e-06, + "loss": 0.3682, + "step": 13363 + }, + { + "epoch": 0.8138111621959017, + "grad_norm": 0.965945494689146, + "learning_rate": 4.7792795506558354e-06, + "loss": 0.4126, + "step": 13364 + }, + { + "epoch": 0.8138720579727796, + "grad_norm": 0.9344586402936288, + "learning_rate": 4.779246766959337e-06, + "loss": 0.4866, + "step": 13365 + }, + { + "epoch": 0.8139329537496575, + "grad_norm": 1.1047848330342682, + "learning_rate": 4.77921398094079e-06, + "loss": 0.4287, + "step": 13366 + }, + { + "epoch": 0.8139938495265353, + "grad_norm": 0.9976850591151769, + "learning_rate": 4.779181192600227e-06, + "loss": 0.4669, + "step": 13367 + }, + { + "epoch": 0.8140547453034133, + "grad_norm": 1.0000175263550934, + "learning_rate": 4.779148401937682e-06, + "loss": 0.3706, + "step": 13368 + }, + { + "epoch": 0.8141156410802911, + "grad_norm": 1.0305007956604444, + "learning_rate": 4.779115608953188e-06, + "loss": 0.4012, + "step": 13369 + }, + { + "epoch": 0.814176536857169, + "grad_norm": 0.9725042635826425, + "learning_rate": 4.779082813646777e-06, + "loss": 0.4404, + "step": 13370 + }, + { + "epoch": 0.8142374326340468, + "grad_norm": 1.01645416964533, + "learning_rate": 4.779050016018485e-06, + "loss": 0.4228, + "step": 13371 + }, + { + "epoch": 0.8142983284109248, + "grad_norm": 1.03463224456493, + "learning_rate": 4.779017216068345e-06, + "loss": 0.383, + "step": 13372 + }, + { + "epoch": 0.8143592241878026, + "grad_norm": 1.128344249716388, + "learning_rate": 4.7789844137963884e-06, + "loss": 0.3841, + "step": 13373 + }, + { + "epoch": 0.8144201199646804, + "grad_norm": 1.1198128819721997, + "learning_rate": 4.77895160920265e-06, + "loss": 0.3941, + "step": 13374 + }, + { + "epoch": 0.8144810157415583, + "grad_norm": 0.9907765570286627, + "learning_rate": 4.778918802287163e-06, + "loss": 0.4283, + "step": 13375 + }, + { + "epoch": 0.8145419115184362, + "grad_norm": 1.0478828524486132, + "learning_rate": 4.778885993049962e-06, + "loss": 0.4046, + "step": 13376 + }, + { + "epoch": 0.8146028072953141, + "grad_norm": 0.9593661943744682, + "learning_rate": 4.778853181491078e-06, + "loss": 0.4894, + "step": 13377 + }, + { + "epoch": 0.8146637030721919, + "grad_norm": 0.9370956564269131, + "learning_rate": 4.778820367610546e-06, + "loss": 0.4144, + "step": 13378 + }, + { + "epoch": 0.8147245988490698, + "grad_norm": 0.9574122437504947, + "learning_rate": 4.7787875514084e-06, + "loss": 0.4634, + "step": 13379 + }, + { + "epoch": 0.8147854946259477, + "grad_norm": 1.0165393880875606, + "learning_rate": 4.778754732884672e-06, + "loss": 0.4687, + "step": 13380 + }, + { + "epoch": 0.8148463904028256, + "grad_norm": 0.9927725793006987, + "learning_rate": 4.778721912039396e-06, + "loss": 0.4586, + "step": 13381 + }, + { + "epoch": 0.8149072861797034, + "grad_norm": 0.9393977147613531, + "learning_rate": 4.778689088872606e-06, + "loss": 0.3945, + "step": 13382 + }, + { + "epoch": 0.8149681819565813, + "grad_norm": 1.0221273635170953, + "learning_rate": 4.778656263384334e-06, + "loss": 0.349, + "step": 13383 + }, + { + "epoch": 0.8150290777334592, + "grad_norm": 0.972248377570832, + "learning_rate": 4.778623435574615e-06, + "loss": 0.4522, + "step": 13384 + }, + { + "epoch": 0.8150899735103371, + "grad_norm": 0.9742179021291648, + "learning_rate": 4.778590605443482e-06, + "loss": 0.5457, + "step": 13385 + }, + { + "epoch": 0.8151508692872149, + "grad_norm": 0.9738151576698231, + "learning_rate": 4.778557772990968e-06, + "loss": 0.4129, + "step": 13386 + }, + { + "epoch": 0.8152117650640928, + "grad_norm": 1.0002533993438896, + "learning_rate": 4.778524938217107e-06, + "loss": 0.4219, + "step": 13387 + }, + { + "epoch": 0.8152726608409707, + "grad_norm": 1.0127351565607332, + "learning_rate": 4.778492101121932e-06, + "loss": 0.4311, + "step": 13388 + }, + { + "epoch": 0.8153335566178486, + "grad_norm": 0.9577399254721662, + "learning_rate": 4.7784592617054755e-06, + "loss": 0.4519, + "step": 13389 + }, + { + "epoch": 0.8153944523947264, + "grad_norm": 0.9944582521270345, + "learning_rate": 4.778426419967774e-06, + "loss": 0.4352, + "step": 13390 + }, + { + "epoch": 0.8154553481716043, + "grad_norm": 1.0485277780808706, + "learning_rate": 4.778393575908858e-06, + "loss": 0.3884, + "step": 13391 + }, + { + "epoch": 0.8155162439484822, + "grad_norm": 1.0213411522770655, + "learning_rate": 4.778360729528762e-06, + "loss": 0.4504, + "step": 13392 + }, + { + "epoch": 0.8155771397253601, + "grad_norm": 1.0460845462198833, + "learning_rate": 4.77832788082752e-06, + "loss": 0.4119, + "step": 13393 + }, + { + "epoch": 0.8156380355022379, + "grad_norm": 1.0081985163129172, + "learning_rate": 4.778295029805164e-06, + "loss": 0.3915, + "step": 13394 + }, + { + "epoch": 0.8156989312791157, + "grad_norm": 1.0263803317986844, + "learning_rate": 4.778262176461728e-06, + "loss": 0.3292, + "step": 13395 + }, + { + "epoch": 0.8157598270559937, + "grad_norm": 1.0057087475332867, + "learning_rate": 4.778229320797248e-06, + "loss": 0.4555, + "step": 13396 + }, + { + "epoch": 0.8158207228328715, + "grad_norm": 1.0500175206913316, + "learning_rate": 4.7781964628117535e-06, + "loss": 0.4189, + "step": 13397 + }, + { + "epoch": 0.8158816186097494, + "grad_norm": 1.0073988319154752, + "learning_rate": 4.778163602505281e-06, + "loss": 0.3989, + "step": 13398 + }, + { + "epoch": 0.8159425143866272, + "grad_norm": 1.0685314358626297, + "learning_rate": 4.778130739877862e-06, + "loss": 0.5127, + "step": 13399 + }, + { + "epoch": 0.8160034101635052, + "grad_norm": 0.9440412184501553, + "learning_rate": 4.778097874929531e-06, + "loss": 0.4443, + "step": 13400 + }, + { + "epoch": 0.816064305940383, + "grad_norm": 1.0697784634396805, + "learning_rate": 4.7780650076603205e-06, + "loss": 0.4154, + "step": 13401 + }, + { + "epoch": 0.8161252017172609, + "grad_norm": 0.9384123319815725, + "learning_rate": 4.778032138070266e-06, + "loss": 0.451, + "step": 13402 + }, + { + "epoch": 0.8161860974941387, + "grad_norm": 0.9812581945115503, + "learning_rate": 4.777999266159398e-06, + "loss": 0.3997, + "step": 13403 + }, + { + "epoch": 0.8162469932710167, + "grad_norm": 1.0227854398049268, + "learning_rate": 4.777966391927754e-06, + "loss": 0.4714, + "step": 13404 + }, + { + "epoch": 0.8163078890478945, + "grad_norm": 0.9409718573535637, + "learning_rate": 4.777933515375364e-06, + "loss": 0.4035, + "step": 13405 + }, + { + "epoch": 0.8163687848247724, + "grad_norm": 0.9788828286681553, + "learning_rate": 4.777900636502263e-06, + "loss": 0.3675, + "step": 13406 + }, + { + "epoch": 0.8164296806016502, + "grad_norm": 0.9108802728785759, + "learning_rate": 4.777867755308484e-06, + "loss": 0.4347, + "step": 13407 + }, + { + "epoch": 0.8164905763785282, + "grad_norm": 1.0050231574729613, + "learning_rate": 4.7778348717940606e-06, + "loss": 0.4471, + "step": 13408 + }, + { + "epoch": 0.816551472155406, + "grad_norm": 1.0115688425745177, + "learning_rate": 4.777801985959026e-06, + "loss": 0.4416, + "step": 13409 + }, + { + "epoch": 0.8166123679322839, + "grad_norm": 1.0125040689397702, + "learning_rate": 4.777769097803414e-06, + "loss": 0.3711, + "step": 13410 + }, + { + "epoch": 0.8166732637091618, + "grad_norm": 0.9740545154533906, + "learning_rate": 4.777736207327259e-06, + "loss": 0.3974, + "step": 13411 + }, + { + "epoch": 0.8167341594860397, + "grad_norm": 0.9655558962795969, + "learning_rate": 4.777703314530594e-06, + "loss": 0.4605, + "step": 13412 + }, + { + "epoch": 0.8167950552629175, + "grad_norm": 0.9772582698912465, + "learning_rate": 4.7776704194134516e-06, + "loss": 0.3637, + "step": 13413 + }, + { + "epoch": 0.8168559510397954, + "grad_norm": 0.9153880705830465, + "learning_rate": 4.777637521975866e-06, + "loss": 0.4716, + "step": 13414 + }, + { + "epoch": 0.8169168468166733, + "grad_norm": 1.0257810041275537, + "learning_rate": 4.777604622217871e-06, + "loss": 0.389, + "step": 13415 + }, + { + "epoch": 0.8169777425935512, + "grad_norm": 1.0448941035318218, + "learning_rate": 4.777571720139499e-06, + "loss": 0.4502, + "step": 13416 + }, + { + "epoch": 0.817038638370429, + "grad_norm": 1.042533372089832, + "learning_rate": 4.777538815740784e-06, + "loss": 0.3842, + "step": 13417 + }, + { + "epoch": 0.8170995341473068, + "grad_norm": 1.0421301372322194, + "learning_rate": 4.777505909021761e-06, + "loss": 0.3495, + "step": 13418 + }, + { + "epoch": 0.8171604299241848, + "grad_norm": 1.0178883713871727, + "learning_rate": 4.777472999982462e-06, + "loss": 0.4841, + "step": 13419 + }, + { + "epoch": 0.8172213257010627, + "grad_norm": 0.9821682745744857, + "learning_rate": 4.77744008862292e-06, + "loss": 0.4636, + "step": 13420 + }, + { + "epoch": 0.8172822214779405, + "grad_norm": 1.0514907336954784, + "learning_rate": 4.7774071749431705e-06, + "loss": 0.3801, + "step": 13421 + }, + { + "epoch": 0.8173431172548183, + "grad_norm": 1.0088842750653617, + "learning_rate": 4.7773742589432455e-06, + "loss": 0.4088, + "step": 13422 + }, + { + "epoch": 0.8174040130316963, + "grad_norm": 1.0120850864864122, + "learning_rate": 4.777341340623179e-06, + "loss": 0.3875, + "step": 13423 + }, + { + "epoch": 0.8174649088085741, + "grad_norm": 0.9982505753175821, + "learning_rate": 4.777308419983005e-06, + "loss": 0.3825, + "step": 13424 + }, + { + "epoch": 0.817525804585452, + "grad_norm": 1.0688968096809648, + "learning_rate": 4.777275497022755e-06, + "loss": 0.3455, + "step": 13425 + }, + { + "epoch": 0.8175867003623298, + "grad_norm": 0.9964866533268517, + "learning_rate": 4.777242571742465e-06, + "loss": 0.3819, + "step": 13426 + }, + { + "epoch": 0.8176475961392078, + "grad_norm": 0.9309203033245744, + "learning_rate": 4.777209644142168e-06, + "loss": 0.4685, + "step": 13427 + }, + { + "epoch": 0.8177084919160856, + "grad_norm": 0.9712532698814959, + "learning_rate": 4.777176714221896e-06, + "loss": 0.3961, + "step": 13428 + }, + { + "epoch": 0.8177693876929635, + "grad_norm": 1.0598892514812088, + "learning_rate": 4.777143781981685e-06, + "loss": 0.41, + "step": 13429 + }, + { + "epoch": 0.8178302834698413, + "grad_norm": 0.9998922825328327, + "learning_rate": 4.777110847421566e-06, + "loss": 0.483, + "step": 13430 + }, + { + "epoch": 0.8178911792467193, + "grad_norm": 1.0815483913836708, + "learning_rate": 4.777077910541575e-06, + "loss": 0.3692, + "step": 13431 + }, + { + "epoch": 0.8179520750235971, + "grad_norm": 1.1246468682802795, + "learning_rate": 4.777044971341745e-06, + "loss": 0.3625, + "step": 13432 + }, + { + "epoch": 0.818012970800475, + "grad_norm": 1.0479988075368347, + "learning_rate": 4.777012029822107e-06, + "loss": 0.4295, + "step": 13433 + }, + { + "epoch": 0.8180738665773528, + "grad_norm": 1.0406935545153877, + "learning_rate": 4.776979085982697e-06, + "loss": 0.3452, + "step": 13434 + }, + { + "epoch": 0.8181347623542308, + "grad_norm": 1.0994050303971685, + "learning_rate": 4.776946139823549e-06, + "loss": 0.3882, + "step": 13435 + }, + { + "epoch": 0.8181956581311086, + "grad_norm": 0.9677167195028884, + "learning_rate": 4.776913191344695e-06, + "loss": 0.43, + "step": 13436 + }, + { + "epoch": 0.8182565539079865, + "grad_norm": 1.0646355081135084, + "learning_rate": 4.776880240546169e-06, + "loss": 0.3369, + "step": 13437 + }, + { + "epoch": 0.8183174496848643, + "grad_norm": 0.9951432588324068, + "learning_rate": 4.776847287428005e-06, + "loss": 0.4051, + "step": 13438 + }, + { + "epoch": 0.8183783454617423, + "grad_norm": 0.9830192842560461, + "learning_rate": 4.776814331990236e-06, + "loss": 0.4156, + "step": 13439 + }, + { + "epoch": 0.8184392412386201, + "grad_norm": 1.0204848044355712, + "learning_rate": 4.776781374232896e-06, + "loss": 0.4161, + "step": 13440 + }, + { + "epoch": 0.818500137015498, + "grad_norm": 0.9611301347868175, + "learning_rate": 4.776748414156019e-06, + "loss": 0.4268, + "step": 13441 + }, + { + "epoch": 0.8185610327923758, + "grad_norm": 1.0598988486124887, + "learning_rate": 4.776715451759637e-06, + "loss": 0.3834, + "step": 13442 + }, + { + "epoch": 0.8186219285692538, + "grad_norm": 1.0745148527965442, + "learning_rate": 4.776682487043786e-06, + "loss": 0.3853, + "step": 13443 + }, + { + "epoch": 0.8186828243461316, + "grad_norm": 1.0336440305587833, + "learning_rate": 4.776649520008498e-06, + "loss": 0.3974, + "step": 13444 + }, + { + "epoch": 0.8187437201230094, + "grad_norm": 0.9842696306896921, + "learning_rate": 4.7766165506538055e-06, + "loss": 0.4089, + "step": 13445 + }, + { + "epoch": 0.8188046158998873, + "grad_norm": 0.9902231263680222, + "learning_rate": 4.776583578979744e-06, + "loss": 0.3697, + "step": 13446 + }, + { + "epoch": 0.8188655116767652, + "grad_norm": 1.0690212573924367, + "learning_rate": 4.776550604986346e-06, + "loss": 0.3548, + "step": 13447 + }, + { + "epoch": 0.8189264074536431, + "grad_norm": 1.1282245854084234, + "learning_rate": 4.776517628673647e-06, + "loss": 0.3345, + "step": 13448 + }, + { + "epoch": 0.8189873032305209, + "grad_norm": 1.0049447745590216, + "learning_rate": 4.776484650041678e-06, + "loss": 0.4615, + "step": 13449 + }, + { + "epoch": 0.8190481990073989, + "grad_norm": 0.9598431268874669, + "learning_rate": 4.776451669090475e-06, + "loss": 0.4267, + "step": 13450 + }, + { + "epoch": 0.8191090947842767, + "grad_norm": 0.9914468187547912, + "learning_rate": 4.776418685820069e-06, + "loss": 0.4411, + "step": 13451 + }, + { + "epoch": 0.8191699905611546, + "grad_norm": 1.0180367193362867, + "learning_rate": 4.776385700230496e-06, + "loss": 0.4269, + "step": 13452 + }, + { + "epoch": 0.8192308863380324, + "grad_norm": 1.0083597715012693, + "learning_rate": 4.776352712321788e-06, + "loss": 0.4471, + "step": 13453 + }, + { + "epoch": 0.8192917821149104, + "grad_norm": 1.0451415835897218, + "learning_rate": 4.77631972209398e-06, + "loss": 0.425, + "step": 13454 + }, + { + "epoch": 0.8193526778917882, + "grad_norm": 0.9429674898102955, + "learning_rate": 4.776286729547104e-06, + "loss": 0.4026, + "step": 13455 + }, + { + "epoch": 0.8194135736686661, + "grad_norm": 1.135776767112741, + "learning_rate": 4.776253734681194e-06, + "loss": 0.4271, + "step": 13456 + }, + { + "epoch": 0.8194744694455439, + "grad_norm": 0.9931582123504419, + "learning_rate": 4.7762207374962845e-06, + "loss": 0.3848, + "step": 13457 + }, + { + "epoch": 0.8195353652224219, + "grad_norm": 0.9615150129950969, + "learning_rate": 4.776187737992408e-06, + "loss": 0.4296, + "step": 13458 + }, + { + "epoch": 0.8195962609992997, + "grad_norm": 1.0067778813210402, + "learning_rate": 4.776154736169599e-06, + "loss": 0.4216, + "step": 13459 + }, + { + "epoch": 0.8196571567761776, + "grad_norm": 0.9448472658764921, + "learning_rate": 4.7761217320278915e-06, + "loss": 0.4117, + "step": 13460 + }, + { + "epoch": 0.8197180525530554, + "grad_norm": 1.0067807576582068, + "learning_rate": 4.7760887255673185e-06, + "loss": 0.4716, + "step": 13461 + }, + { + "epoch": 0.8197789483299334, + "grad_norm": 0.9802605998204461, + "learning_rate": 4.776055716787913e-06, + "loss": 0.4421, + "step": 13462 + }, + { + "epoch": 0.8198398441068112, + "grad_norm": 1.001786604316204, + "learning_rate": 4.77602270568971e-06, + "loss": 0.5117, + "step": 13463 + }, + { + "epoch": 0.8199007398836891, + "grad_norm": 1.043454221359388, + "learning_rate": 4.775989692272742e-06, + "loss": 0.367, + "step": 13464 + }, + { + "epoch": 0.8199616356605669, + "grad_norm": 1.0139520501840382, + "learning_rate": 4.775956676537044e-06, + "loss": 0.4366, + "step": 13465 + }, + { + "epoch": 0.8200225314374449, + "grad_norm": 1.06265554282373, + "learning_rate": 4.775923658482647e-06, + "loss": 0.3857, + "step": 13466 + }, + { + "epoch": 0.8200834272143227, + "grad_norm": 1.0149743987636843, + "learning_rate": 4.775890638109587e-06, + "loss": 0.3868, + "step": 13467 + }, + { + "epoch": 0.8201443229912005, + "grad_norm": 0.957710613761784, + "learning_rate": 4.775857615417897e-06, + "loss": 0.408, + "step": 13468 + }, + { + "epoch": 0.8202052187680784, + "grad_norm": 1.0300768560051345, + "learning_rate": 4.775824590407611e-06, + "loss": 0.3904, + "step": 13469 + }, + { + "epoch": 0.8202661145449563, + "grad_norm": 1.0594440594240069, + "learning_rate": 4.7757915630787614e-06, + "loss": 0.4226, + "step": 13470 + }, + { + "epoch": 0.8203270103218342, + "grad_norm": 0.9543696727644081, + "learning_rate": 4.775758533431382e-06, + "loss": 0.4613, + "step": 13471 + }, + { + "epoch": 0.820387906098712, + "grad_norm": 1.1653374341276246, + "learning_rate": 4.775725501465509e-06, + "loss": 0.4632, + "step": 13472 + }, + { + "epoch": 0.8204488018755899, + "grad_norm": 1.0372093646923863, + "learning_rate": 4.775692467181173e-06, + "loss": 0.4193, + "step": 13473 + }, + { + "epoch": 0.8205096976524678, + "grad_norm": 1.006461135853921, + "learning_rate": 4.7756594305784094e-06, + "loss": 0.4027, + "step": 13474 + }, + { + "epoch": 0.8205705934293457, + "grad_norm": 1.0631245028889007, + "learning_rate": 4.775626391657251e-06, + "loss": 0.3581, + "step": 13475 + }, + { + "epoch": 0.8206314892062235, + "grad_norm": 0.9834043908429875, + "learning_rate": 4.775593350417732e-06, + "loss": 0.4612, + "step": 13476 + }, + { + "epoch": 0.8206923849831014, + "grad_norm": 1.097043836590727, + "learning_rate": 4.775560306859885e-06, + "loss": 0.4889, + "step": 13477 + }, + { + "epoch": 0.8207532807599793, + "grad_norm": 1.034149607883703, + "learning_rate": 4.775527260983745e-06, + "loss": 0.492, + "step": 13478 + }, + { + "epoch": 0.8208141765368572, + "grad_norm": 0.9429307972324549, + "learning_rate": 4.775494212789346e-06, + "loss": 0.4292, + "step": 13479 + }, + { + "epoch": 0.820875072313735, + "grad_norm": 1.0533773934141786, + "learning_rate": 4.77546116227672e-06, + "loss": 0.4422, + "step": 13480 + }, + { + "epoch": 0.8209359680906129, + "grad_norm": 0.9227913112647228, + "learning_rate": 4.775428109445901e-06, + "loss": 0.4158, + "step": 13481 + }, + { + "epoch": 0.8209968638674908, + "grad_norm": 0.9535799387267573, + "learning_rate": 4.775395054296924e-06, + "loss": 0.441, + "step": 13482 + }, + { + "epoch": 0.8210577596443687, + "grad_norm": 0.9206212482714071, + "learning_rate": 4.775361996829821e-06, + "loss": 0.4319, + "step": 13483 + }, + { + "epoch": 0.8211186554212465, + "grad_norm": 0.9594499882269678, + "learning_rate": 4.775328937044627e-06, + "loss": 0.4382, + "step": 13484 + }, + { + "epoch": 0.8211795511981244, + "grad_norm": 0.9905347912644554, + "learning_rate": 4.775295874941375e-06, + "loss": 0.3828, + "step": 13485 + }, + { + "epoch": 0.8212404469750023, + "grad_norm": 0.9988188308820448, + "learning_rate": 4.7752628105201e-06, + "loss": 0.3632, + "step": 13486 + }, + { + "epoch": 0.8213013427518802, + "grad_norm": 1.0167540667764083, + "learning_rate": 4.775229743780833e-06, + "loss": 0.4052, + "step": 13487 + }, + { + "epoch": 0.821362238528758, + "grad_norm": 1.0308115490896776, + "learning_rate": 4.775196674723609e-06, + "loss": 0.4661, + "step": 13488 + }, + { + "epoch": 0.8214231343056358, + "grad_norm": 1.0043712527042132, + "learning_rate": 4.775163603348462e-06, + "loss": 0.389, + "step": 13489 + }, + { + "epoch": 0.8214840300825138, + "grad_norm": 0.9769446372952727, + "learning_rate": 4.775130529655428e-06, + "loss": 0.4669, + "step": 13490 + }, + { + "epoch": 0.8215449258593917, + "grad_norm": 1.0005890411504994, + "learning_rate": 4.775097453644536e-06, + "loss": 0.4012, + "step": 13491 + }, + { + "epoch": 0.8216058216362695, + "grad_norm": 1.1102174201242931, + "learning_rate": 4.7750643753158225e-06, + "loss": 0.3324, + "step": 13492 + }, + { + "epoch": 0.8216667174131475, + "grad_norm": 1.0212705687337285, + "learning_rate": 4.7750312946693215e-06, + "loss": 0.4022, + "step": 13493 + }, + { + "epoch": 0.8217276131900253, + "grad_norm": 1.1236631085889879, + "learning_rate": 4.7749982117050656e-06, + "loss": 0.3427, + "step": 13494 + }, + { + "epoch": 0.8217885089669031, + "grad_norm": 0.9382586675894744, + "learning_rate": 4.774965126423088e-06, + "loss": 0.4506, + "step": 13495 + }, + { + "epoch": 0.821849404743781, + "grad_norm": 0.9616985375026155, + "learning_rate": 4.774932038823423e-06, + "loss": 0.4375, + "step": 13496 + }, + { + "epoch": 0.8219103005206589, + "grad_norm": 0.9726549113905312, + "learning_rate": 4.774898948906106e-06, + "loss": 0.3676, + "step": 13497 + }, + { + "epoch": 0.8219711962975368, + "grad_norm": 1.0546273423240393, + "learning_rate": 4.774865856671168e-06, + "loss": 0.3623, + "step": 13498 + }, + { + "epoch": 0.8220320920744146, + "grad_norm": 1.0135123910260238, + "learning_rate": 4.774832762118645e-06, + "loss": 0.3912, + "step": 13499 + }, + { + "epoch": 0.8220929878512925, + "grad_norm": 1.0636945138697584, + "learning_rate": 4.774799665248569e-06, + "loss": 0.4264, + "step": 13500 + }, + { + "epoch": 0.8221538836281704, + "grad_norm": 0.9693294080638706, + "learning_rate": 4.774766566060974e-06, + "loss": 0.4695, + "step": 13501 + }, + { + "epoch": 0.8222147794050483, + "grad_norm": 1.1257853860923985, + "learning_rate": 4.7747334645558955e-06, + "loss": 0.3154, + "step": 13502 + }, + { + "epoch": 0.8222756751819261, + "grad_norm": 0.9699749078938937, + "learning_rate": 4.774700360733364e-06, + "loss": 0.4049, + "step": 13503 + }, + { + "epoch": 0.822336570958804, + "grad_norm": 1.0784029373124446, + "learning_rate": 4.774667254593417e-06, + "loss": 0.3631, + "step": 13504 + }, + { + "epoch": 0.8223974667356819, + "grad_norm": 1.0160847752678939, + "learning_rate": 4.774634146136086e-06, + "loss": 0.3639, + "step": 13505 + }, + { + "epoch": 0.8224583625125598, + "grad_norm": 1.0341304838237146, + "learning_rate": 4.774601035361404e-06, + "loss": 0.4148, + "step": 13506 + }, + { + "epoch": 0.8225192582894376, + "grad_norm": 0.990667568540425, + "learning_rate": 4.774567922269406e-06, + "loss": 0.3663, + "step": 13507 + }, + { + "epoch": 0.8225801540663155, + "grad_norm": 0.9156384945974227, + "learning_rate": 4.7745348068601256e-06, + "loss": 0.4853, + "step": 13508 + }, + { + "epoch": 0.8226410498431934, + "grad_norm": 0.9780007569237504, + "learning_rate": 4.774501689133596e-06, + "loss": 0.393, + "step": 13509 + }, + { + "epoch": 0.8227019456200713, + "grad_norm": 1.0634981265425572, + "learning_rate": 4.774468569089852e-06, + "loss": 0.452, + "step": 13510 + }, + { + "epoch": 0.8227628413969491, + "grad_norm": 0.9720343628326041, + "learning_rate": 4.7744354467289265e-06, + "loss": 0.3557, + "step": 13511 + }, + { + "epoch": 0.822823737173827, + "grad_norm": 1.0337141767819757, + "learning_rate": 4.774402322050854e-06, + "loss": 0.4045, + "step": 13512 + }, + { + "epoch": 0.8228846329507049, + "grad_norm": 0.9603361292791281, + "learning_rate": 4.774369195055667e-06, + "loss": 0.3963, + "step": 13513 + }, + { + "epoch": 0.8229455287275828, + "grad_norm": 0.9276926411206511, + "learning_rate": 4.7743360657434e-06, + "loss": 0.402, + "step": 13514 + }, + { + "epoch": 0.8230064245044606, + "grad_norm": 0.9711941079184805, + "learning_rate": 4.774302934114087e-06, + "loss": 0.4978, + "step": 13515 + }, + { + "epoch": 0.8230673202813384, + "grad_norm": 0.9763588167469501, + "learning_rate": 4.7742698001677615e-06, + "loss": 0.4517, + "step": 13516 + }, + { + "epoch": 0.8231282160582164, + "grad_norm": 0.9237951347284915, + "learning_rate": 4.774236663904457e-06, + "loss": 0.4384, + "step": 13517 + }, + { + "epoch": 0.8231891118350942, + "grad_norm": 0.9798578142251217, + "learning_rate": 4.774203525324207e-06, + "loss": 0.4598, + "step": 13518 + }, + { + "epoch": 0.8232500076119721, + "grad_norm": 1.0296864746862853, + "learning_rate": 4.774170384427046e-06, + "loss": 0.3939, + "step": 13519 + }, + { + "epoch": 0.8233109033888499, + "grad_norm": 1.030605896859927, + "learning_rate": 4.774137241213008e-06, + "loss": 0.417, + "step": 13520 + }, + { + "epoch": 0.8233717991657279, + "grad_norm": 0.9964401529194284, + "learning_rate": 4.774104095682126e-06, + "loss": 0.3801, + "step": 13521 + }, + { + "epoch": 0.8234326949426057, + "grad_norm": 0.9721780697278843, + "learning_rate": 4.774070947834434e-06, + "loss": 0.4638, + "step": 13522 + }, + { + "epoch": 0.8234935907194836, + "grad_norm": 1.0390388262113415, + "learning_rate": 4.774037797669966e-06, + "loss": 0.3454, + "step": 13523 + }, + { + "epoch": 0.8235544864963614, + "grad_norm": 1.0866914285146685, + "learning_rate": 4.7740046451887555e-06, + "loss": 0.3695, + "step": 13524 + }, + { + "epoch": 0.8236153822732394, + "grad_norm": 0.8865320017040234, + "learning_rate": 4.773971490390836e-06, + "loss": 0.404, + "step": 13525 + }, + { + "epoch": 0.8236762780501172, + "grad_norm": 0.9767454603245083, + "learning_rate": 4.773938333276242e-06, + "loss": 0.4643, + "step": 13526 + }, + { + "epoch": 0.8237371738269951, + "grad_norm": 1.019679728096081, + "learning_rate": 4.773905173845006e-06, + "loss": 0.3858, + "step": 13527 + }, + { + "epoch": 0.8237980696038729, + "grad_norm": 1.05330947267765, + "learning_rate": 4.773872012097164e-06, + "loss": 0.3982, + "step": 13528 + }, + { + "epoch": 0.8238589653807509, + "grad_norm": 0.9852882069608082, + "learning_rate": 4.773838848032748e-06, + "loss": 0.4184, + "step": 13529 + }, + { + "epoch": 0.8239198611576287, + "grad_norm": 0.9889396260188494, + "learning_rate": 4.7738056816517915e-06, + "loss": 0.4654, + "step": 13530 + }, + { + "epoch": 0.8239807569345066, + "grad_norm": 0.984845126658746, + "learning_rate": 4.77377251295433e-06, + "loss": 0.399, + "step": 13531 + }, + { + "epoch": 0.8240416527113845, + "grad_norm": 1.0013339500368195, + "learning_rate": 4.773739341940396e-06, + "loss": 0.3834, + "step": 13532 + }, + { + "epoch": 0.8241025484882624, + "grad_norm": 1.0841941202300134, + "learning_rate": 4.773706168610024e-06, + "loss": 0.3775, + "step": 13533 + }, + { + "epoch": 0.8241634442651402, + "grad_norm": 0.9801069903155866, + "learning_rate": 4.773672992963247e-06, + "loss": 0.4767, + "step": 13534 + }, + { + "epoch": 0.824224340042018, + "grad_norm": 1.0087525765984802, + "learning_rate": 4.7736398150000996e-06, + "loss": 0.3971, + "step": 13535 + }, + { + "epoch": 0.824285235818896, + "grad_norm": 0.9969768159024629, + "learning_rate": 4.773606634720615e-06, + "loss": 0.3786, + "step": 13536 + }, + { + "epoch": 0.8243461315957739, + "grad_norm": 1.008531891284475, + "learning_rate": 4.7735734521248266e-06, + "loss": 0.4004, + "step": 13537 + }, + { + "epoch": 0.8244070273726517, + "grad_norm": 1.0824725047915493, + "learning_rate": 4.773540267212769e-06, + "loss": 0.321, + "step": 13538 + }, + { + "epoch": 0.8244679231495295, + "grad_norm": 0.989070290217063, + "learning_rate": 4.773507079984477e-06, + "loss": 0.4554, + "step": 13539 + }, + { + "epoch": 0.8245288189264075, + "grad_norm": 1.0782717333046072, + "learning_rate": 4.773473890439983e-06, + "loss": 0.3744, + "step": 13540 + }, + { + "epoch": 0.8245897147032853, + "grad_norm": 0.9221216466309802, + "learning_rate": 4.7734406985793205e-06, + "loss": 0.469, + "step": 13541 + }, + { + "epoch": 0.8246506104801632, + "grad_norm": 1.0401618112793145, + "learning_rate": 4.773407504402524e-06, + "loss": 0.4457, + "step": 13542 + }, + { + "epoch": 0.824711506257041, + "grad_norm": 0.9413967166659266, + "learning_rate": 4.773374307909626e-06, + "loss": 0.4161, + "step": 13543 + }, + { + "epoch": 0.824772402033919, + "grad_norm": 0.949656906652463, + "learning_rate": 4.7733411091006636e-06, + "loss": 0.4098, + "step": 13544 + }, + { + "epoch": 0.8248332978107968, + "grad_norm": 0.9953792729675063, + "learning_rate": 4.773307907975667e-06, + "loss": 0.4379, + "step": 13545 + }, + { + "epoch": 0.8248941935876747, + "grad_norm": 0.9575533915584148, + "learning_rate": 4.773274704534673e-06, + "loss": 0.4423, + "step": 13546 + }, + { + "epoch": 0.8249550893645525, + "grad_norm": 1.0116942263001274, + "learning_rate": 4.7732414987777125e-06, + "loss": 0.3947, + "step": 13547 + }, + { + "epoch": 0.8250159851414305, + "grad_norm": 1.006917167240661, + "learning_rate": 4.773208290704822e-06, + "loss": 0.4142, + "step": 13548 + }, + { + "epoch": 0.8250768809183083, + "grad_norm": 1.0171411476795478, + "learning_rate": 4.773175080316033e-06, + "loss": 0.4802, + "step": 13549 + }, + { + "epoch": 0.8251377766951862, + "grad_norm": 0.9779522141423729, + "learning_rate": 4.773141867611382e-06, + "loss": 0.392, + "step": 13550 + }, + { + "epoch": 0.825198672472064, + "grad_norm": 0.9957095854583876, + "learning_rate": 4.7731086525909e-06, + "loss": 0.3699, + "step": 13551 + }, + { + "epoch": 0.825259568248942, + "grad_norm": 1.100846684410702, + "learning_rate": 4.773075435254622e-06, + "loss": 0.4127, + "step": 13552 + }, + { + "epoch": 0.8253204640258198, + "grad_norm": 1.0361930156552481, + "learning_rate": 4.773042215602584e-06, + "loss": 0.3785, + "step": 13553 + }, + { + "epoch": 0.8253813598026977, + "grad_norm": 0.9211846858439277, + "learning_rate": 4.773008993634815e-06, + "loss": 0.5275, + "step": 13554 + }, + { + "epoch": 0.8254422555795755, + "grad_norm": 1.032660193494922, + "learning_rate": 4.772975769351353e-06, + "loss": 0.4274, + "step": 13555 + }, + { + "epoch": 0.8255031513564535, + "grad_norm": 1.0549616717675123, + "learning_rate": 4.772942542752231e-06, + "loss": 0.4128, + "step": 13556 + }, + { + "epoch": 0.8255640471333313, + "grad_norm": 0.9947873915353787, + "learning_rate": 4.7729093138374825e-06, + "loss": 0.3924, + "step": 13557 + }, + { + "epoch": 0.8256249429102092, + "grad_norm": 1.0087446995957607, + "learning_rate": 4.7728760826071404e-06, + "loss": 0.4061, + "step": 13558 + }, + { + "epoch": 0.825685838687087, + "grad_norm": 0.9987033700404577, + "learning_rate": 4.772842849061241e-06, + "loss": 0.4278, + "step": 13559 + }, + { + "epoch": 0.825746734463965, + "grad_norm": 0.9371893695305235, + "learning_rate": 4.7728096131998145e-06, + "loss": 0.4536, + "step": 13560 + }, + { + "epoch": 0.8258076302408428, + "grad_norm": 1.0265318069848441, + "learning_rate": 4.7727763750228985e-06, + "loss": 0.3632, + "step": 13561 + }, + { + "epoch": 0.8258685260177206, + "grad_norm": 0.9204108371966472, + "learning_rate": 4.772743134530524e-06, + "loss": 0.4548, + "step": 13562 + }, + { + "epoch": 0.8259294217945985, + "grad_norm": 0.9618027149440074, + "learning_rate": 4.772709891722726e-06, + "loss": 0.4308, + "step": 13563 + }, + { + "epoch": 0.8259903175714765, + "grad_norm": 1.091897077298049, + "learning_rate": 4.772676646599539e-06, + "loss": 0.3641, + "step": 13564 + }, + { + "epoch": 0.8260512133483543, + "grad_norm": 0.9594430196691368, + "learning_rate": 4.772643399160997e-06, + "loss": 0.4167, + "step": 13565 + }, + { + "epoch": 0.8261121091252321, + "grad_norm": 1.0154938607137296, + "learning_rate": 4.772610149407132e-06, + "loss": 0.4168, + "step": 13566 + }, + { + "epoch": 0.82617300490211, + "grad_norm": 0.9718023890609474, + "learning_rate": 4.772576897337979e-06, + "loss": 0.4527, + "step": 13567 + }, + { + "epoch": 0.8262339006789879, + "grad_norm": 1.0625463329812852, + "learning_rate": 4.772543642953572e-06, + "loss": 0.4782, + "step": 13568 + }, + { + "epoch": 0.8262947964558658, + "grad_norm": 0.9895881595254984, + "learning_rate": 4.772510386253945e-06, + "loss": 0.4579, + "step": 13569 + }, + { + "epoch": 0.8263556922327436, + "grad_norm": 1.0275622127685855, + "learning_rate": 4.772477127239132e-06, + "loss": 0.3773, + "step": 13570 + }, + { + "epoch": 0.8264165880096215, + "grad_norm": 1.0272181121141595, + "learning_rate": 4.772443865909167e-06, + "loss": 0.3556, + "step": 13571 + }, + { + "epoch": 0.8264774837864994, + "grad_norm": 0.9677806950102685, + "learning_rate": 4.772410602264082e-06, + "loss": 0.4831, + "step": 13572 + }, + { + "epoch": 0.8265383795633773, + "grad_norm": 1.0265725352656223, + "learning_rate": 4.772377336303913e-06, + "loss": 0.3961, + "step": 13573 + }, + { + "epoch": 0.8265992753402551, + "grad_norm": 0.9933571705027584, + "learning_rate": 4.7723440680286935e-06, + "loss": 0.4067, + "step": 13574 + }, + { + "epoch": 0.8266601711171331, + "grad_norm": 1.0114268136120395, + "learning_rate": 4.7723107974384566e-06, + "loss": 0.3949, + "step": 13575 + }, + { + "epoch": 0.8267210668940109, + "grad_norm": 0.9898038960160417, + "learning_rate": 4.772277524533237e-06, + "loss": 0.4374, + "step": 13576 + }, + { + "epoch": 0.8267819626708888, + "grad_norm": 1.012103208289907, + "learning_rate": 4.772244249313068e-06, + "loss": 0.4307, + "step": 13577 + }, + { + "epoch": 0.8268428584477666, + "grad_norm": 1.109569608781535, + "learning_rate": 4.772210971777984e-06, + "loss": 0.422, + "step": 13578 + }, + { + "epoch": 0.8269037542246446, + "grad_norm": 0.9885898761458884, + "learning_rate": 4.772177691928019e-06, + "loss": 0.4178, + "step": 13579 + }, + { + "epoch": 0.8269646500015224, + "grad_norm": 1.0122949730600657, + "learning_rate": 4.772144409763206e-06, + "loss": 0.3863, + "step": 13580 + }, + { + "epoch": 0.8270255457784003, + "grad_norm": 0.9286954230869864, + "learning_rate": 4.77211112528358e-06, + "loss": 0.4485, + "step": 13581 + }, + { + "epoch": 0.8270864415552781, + "grad_norm": 1.1387091714014534, + "learning_rate": 4.772077838489174e-06, + "loss": 0.4124, + "step": 13582 + }, + { + "epoch": 0.8271473373321561, + "grad_norm": 0.975597920258727, + "learning_rate": 4.772044549380023e-06, + "loss": 0.4187, + "step": 13583 + }, + { + "epoch": 0.8272082331090339, + "grad_norm": 1.0505758522223048, + "learning_rate": 4.7720112579561595e-06, + "loss": 0.4443, + "step": 13584 + }, + { + "epoch": 0.8272691288859118, + "grad_norm": 1.083441869565888, + "learning_rate": 4.771977964217619e-06, + "loss": 0.3538, + "step": 13585 + }, + { + "epoch": 0.8273300246627896, + "grad_norm": 1.0261246647578364, + "learning_rate": 4.771944668164434e-06, + "loss": 0.4602, + "step": 13586 + }, + { + "epoch": 0.8273909204396676, + "grad_norm": 0.9465333096515725, + "learning_rate": 4.771911369796639e-06, + "loss": 0.4681, + "step": 13587 + }, + { + "epoch": 0.8274518162165454, + "grad_norm": 0.9470603695619325, + "learning_rate": 4.771878069114269e-06, + "loss": 0.4093, + "step": 13588 + }, + { + "epoch": 0.8275127119934232, + "grad_norm": 0.9440530435761456, + "learning_rate": 4.771844766117355e-06, + "loss": 0.4508, + "step": 13589 + }, + { + "epoch": 0.8275736077703011, + "grad_norm": 0.9957379940534045, + "learning_rate": 4.771811460805934e-06, + "loss": 0.4508, + "step": 13590 + }, + { + "epoch": 0.827634503547179, + "grad_norm": 0.9405696468319878, + "learning_rate": 4.771778153180038e-06, + "loss": 0.4387, + "step": 13591 + }, + { + "epoch": 0.8276953993240569, + "grad_norm": 0.9858016931942265, + "learning_rate": 4.771744843239702e-06, + "loss": 0.4378, + "step": 13592 + }, + { + "epoch": 0.8277562951009347, + "grad_norm": 1.0064394794610163, + "learning_rate": 4.77171153098496e-06, + "loss": 0.3486, + "step": 13593 + }, + { + "epoch": 0.8278171908778126, + "grad_norm": 0.9517343924062538, + "learning_rate": 4.771678216415845e-06, + "loss": 0.3352, + "step": 13594 + }, + { + "epoch": 0.8278780866546905, + "grad_norm": 0.9365424431736886, + "learning_rate": 4.771644899532391e-06, + "loss": 0.4304, + "step": 13595 + }, + { + "epoch": 0.8279389824315684, + "grad_norm": 0.9312277212394438, + "learning_rate": 4.771611580334634e-06, + "loss": 0.3709, + "step": 13596 + }, + { + "epoch": 0.8279998782084462, + "grad_norm": 0.9422289142077387, + "learning_rate": 4.771578258822605e-06, + "loss": 0.4803, + "step": 13597 + }, + { + "epoch": 0.8280607739853241, + "grad_norm": 0.9571032389205657, + "learning_rate": 4.77154493499634e-06, + "loss": 0.4175, + "step": 13598 + }, + { + "epoch": 0.828121669762202, + "grad_norm": 1.0407764493681382, + "learning_rate": 4.771511608855872e-06, + "loss": 0.3964, + "step": 13599 + }, + { + "epoch": 0.8281825655390799, + "grad_norm": 1.0048755164702692, + "learning_rate": 4.771478280401235e-06, + "loss": 0.4268, + "step": 13600 + }, + { + "epoch": 0.8282434613159577, + "grad_norm": 1.0292827277576875, + "learning_rate": 4.7714449496324635e-06, + "loss": 0.3668, + "step": 13601 + }, + { + "epoch": 0.8283043570928356, + "grad_norm": 0.9659463127060847, + "learning_rate": 4.77141161654959e-06, + "loss": 0.4321, + "step": 13602 + }, + { + "epoch": 0.8283652528697135, + "grad_norm": 0.9919540392132843, + "learning_rate": 4.771378281152651e-06, + "loss": 0.4227, + "step": 13603 + }, + { + "epoch": 0.8284261486465914, + "grad_norm": 1.1735543895099643, + "learning_rate": 4.771344943441679e-06, + "loss": 0.3693, + "step": 13604 + }, + { + "epoch": 0.8284870444234692, + "grad_norm": 0.9936213857689242, + "learning_rate": 4.771311603416707e-06, + "loss": 0.4864, + "step": 13605 + }, + { + "epoch": 0.828547940200347, + "grad_norm": 1.0207865813806758, + "learning_rate": 4.7712782610777705e-06, + "loss": 0.4569, + "step": 13606 + }, + { + "epoch": 0.828608835977225, + "grad_norm": 0.97195680843358, + "learning_rate": 4.771244916424903e-06, + "loss": 0.4252, + "step": 13607 + }, + { + "epoch": 0.8286697317541029, + "grad_norm": 1.0014637371098265, + "learning_rate": 4.771211569458138e-06, + "loss": 0.361, + "step": 13608 + }, + { + "epoch": 0.8287306275309807, + "grad_norm": 0.995168647649751, + "learning_rate": 4.7711782201775105e-06, + "loss": 0.421, + "step": 13609 + }, + { + "epoch": 0.8287915233078585, + "grad_norm": 0.9676422760461195, + "learning_rate": 4.771144868583053e-06, + "loss": 0.4305, + "step": 13610 + }, + { + "epoch": 0.8288524190847365, + "grad_norm": 1.066965346854118, + "learning_rate": 4.771111514674801e-06, + "loss": 0.3716, + "step": 13611 + }, + { + "epoch": 0.8289133148616143, + "grad_norm": 1.0379735091976146, + "learning_rate": 4.771078158452788e-06, + "loss": 0.4255, + "step": 13612 + }, + { + "epoch": 0.8289742106384922, + "grad_norm": 0.9630970164554443, + "learning_rate": 4.771044799917047e-06, + "loss": 0.4165, + "step": 13613 + }, + { + "epoch": 0.8290351064153701, + "grad_norm": 0.9889105230394116, + "learning_rate": 4.771011439067613e-06, + "loss": 0.4247, + "step": 13614 + }, + { + "epoch": 0.829096002192248, + "grad_norm": 1.0012054537902568, + "learning_rate": 4.77097807590452e-06, + "loss": 0.4294, + "step": 13615 + }, + { + "epoch": 0.8291568979691258, + "grad_norm": 0.9880105210136206, + "learning_rate": 4.770944710427802e-06, + "loss": 0.4585, + "step": 13616 + }, + { + "epoch": 0.8292177937460037, + "grad_norm": 0.9914356844710202, + "learning_rate": 4.7709113426374924e-06, + "loss": 0.5219, + "step": 13617 + }, + { + "epoch": 0.8292786895228816, + "grad_norm": 0.9773014704263622, + "learning_rate": 4.770877972533625e-06, + "loss": 0.4, + "step": 13618 + }, + { + "epoch": 0.8293395852997595, + "grad_norm": 1.0261775408742606, + "learning_rate": 4.770844600116236e-06, + "loss": 0.3747, + "step": 13619 + }, + { + "epoch": 0.8294004810766373, + "grad_norm": 0.9668389221386277, + "learning_rate": 4.7708112253853565e-06, + "loss": 0.3968, + "step": 13620 + }, + { + "epoch": 0.8294613768535152, + "grad_norm": 0.9766730857704164, + "learning_rate": 4.770777848341022e-06, + "loss": 0.3889, + "step": 13621 + }, + { + "epoch": 0.8295222726303931, + "grad_norm": 0.9906548305725494, + "learning_rate": 4.770744468983266e-06, + "loss": 0.3756, + "step": 13622 + }, + { + "epoch": 0.829583168407271, + "grad_norm": 1.0013000165495418, + "learning_rate": 4.770711087312122e-06, + "loss": 0.409, + "step": 13623 + }, + { + "epoch": 0.8296440641841488, + "grad_norm": 0.9857885831699157, + "learning_rate": 4.7706777033276264e-06, + "loss": 0.4437, + "step": 13624 + }, + { + "epoch": 0.8297049599610267, + "grad_norm": 0.9963912148767377, + "learning_rate": 4.770644317029811e-06, + "loss": 0.3701, + "step": 13625 + }, + { + "epoch": 0.8297658557379046, + "grad_norm": 1.1179414700060968, + "learning_rate": 4.77061092841871e-06, + "loss": 0.4029, + "step": 13626 + }, + { + "epoch": 0.8298267515147825, + "grad_norm": 1.0547751522887852, + "learning_rate": 4.770577537494357e-06, + "loss": 0.4242, + "step": 13627 + }, + { + "epoch": 0.8298876472916603, + "grad_norm": 1.0453691859660936, + "learning_rate": 4.7705441442567886e-06, + "loss": 0.374, + "step": 13628 + }, + { + "epoch": 0.8299485430685382, + "grad_norm": 1.0567612471544658, + "learning_rate": 4.770510748706037e-06, + "loss": 0.4178, + "step": 13629 + }, + { + "epoch": 0.8300094388454161, + "grad_norm": 0.9947701595941904, + "learning_rate": 4.770477350842134e-06, + "loss": 0.4339, + "step": 13630 + }, + { + "epoch": 0.830070334622294, + "grad_norm": 0.9865708652256412, + "learning_rate": 4.770443950665118e-06, + "loss": 0.3906, + "step": 13631 + }, + { + "epoch": 0.8301312303991718, + "grad_norm": 1.032003353722166, + "learning_rate": 4.7704105481750205e-06, + "loss": 0.429, + "step": 13632 + }, + { + "epoch": 0.8301921261760496, + "grad_norm": 1.0592059071809672, + "learning_rate": 4.770377143371875e-06, + "loss": 0.4076, + "step": 13633 + }, + { + "epoch": 0.8302530219529276, + "grad_norm": 0.941156311494253, + "learning_rate": 4.770343736255717e-06, + "loss": 0.4058, + "step": 13634 + }, + { + "epoch": 0.8303139177298055, + "grad_norm": 0.9959721993024002, + "learning_rate": 4.770310326826581e-06, + "loss": 0.3995, + "step": 13635 + }, + { + "epoch": 0.8303748135066833, + "grad_norm": 0.9872295052297176, + "learning_rate": 4.770276915084498e-06, + "loss": 0.4831, + "step": 13636 + }, + { + "epoch": 0.8304357092835611, + "grad_norm": 0.9757976081779488, + "learning_rate": 4.770243501029506e-06, + "loss": 0.4704, + "step": 13637 + }, + { + "epoch": 0.8304966050604391, + "grad_norm": 0.9577938709782847, + "learning_rate": 4.770210084661636e-06, + "loss": 0.4433, + "step": 13638 + }, + { + "epoch": 0.8305575008373169, + "grad_norm": 1.0053987555825414, + "learning_rate": 4.770176665980924e-06, + "loss": 0.4521, + "step": 13639 + }, + { + "epoch": 0.8306183966141948, + "grad_norm": 0.9285058527284112, + "learning_rate": 4.770143244987403e-06, + "loss": 0.4044, + "step": 13640 + }, + { + "epoch": 0.8306792923910726, + "grad_norm": 1.0954934351289651, + "learning_rate": 4.770109821681107e-06, + "loss": 0.5028, + "step": 13641 + }, + { + "epoch": 0.8307401881679506, + "grad_norm": 1.000513990890976, + "learning_rate": 4.77007639606207e-06, + "loss": 0.4048, + "step": 13642 + }, + { + "epoch": 0.8308010839448284, + "grad_norm": 1.0685930287318008, + "learning_rate": 4.770042968130327e-06, + "loss": 0.4252, + "step": 13643 + }, + { + "epoch": 0.8308619797217063, + "grad_norm": 1.203196848294939, + "learning_rate": 4.770009537885911e-06, + "loss": 0.4096, + "step": 13644 + }, + { + "epoch": 0.8309228754985841, + "grad_norm": 1.0387812565481487, + "learning_rate": 4.769976105328856e-06, + "loss": 0.5284, + "step": 13645 + }, + { + "epoch": 0.8309837712754621, + "grad_norm": 1.1185640953114997, + "learning_rate": 4.769942670459198e-06, + "loss": 0.4069, + "step": 13646 + }, + { + "epoch": 0.8310446670523399, + "grad_norm": 0.9537486790954954, + "learning_rate": 4.769909233276968e-06, + "loss": 0.3955, + "step": 13647 + }, + { + "epoch": 0.8311055628292178, + "grad_norm": 1.0042765123582427, + "learning_rate": 4.769875793782202e-06, + "loss": 0.3509, + "step": 13648 + }, + { + "epoch": 0.8311664586060956, + "grad_norm": 0.9757714082231317, + "learning_rate": 4.7698423519749346e-06, + "loss": 0.4379, + "step": 13649 + }, + { + "epoch": 0.8312273543829736, + "grad_norm": 0.9972299033283395, + "learning_rate": 4.769808907855199e-06, + "loss": 0.397, + "step": 13650 + }, + { + "epoch": 0.8312882501598514, + "grad_norm": 1.0042080053075157, + "learning_rate": 4.769775461423029e-06, + "loss": 0.3567, + "step": 13651 + }, + { + "epoch": 0.8313491459367293, + "grad_norm": 1.0383446783138868, + "learning_rate": 4.769742012678459e-06, + "loss": 0.3381, + "step": 13652 + }, + { + "epoch": 0.8314100417136071, + "grad_norm": 0.9435605997045952, + "learning_rate": 4.7697085616215226e-06, + "loss": 0.4275, + "step": 13653 + }, + { + "epoch": 0.8314709374904851, + "grad_norm": 0.9650736599896385, + "learning_rate": 4.769675108252254e-06, + "loss": 0.4272, + "step": 13654 + }, + { + "epoch": 0.8315318332673629, + "grad_norm": 0.9914705144702968, + "learning_rate": 4.769641652570688e-06, + "loss": 0.3353, + "step": 13655 + }, + { + "epoch": 0.8315927290442408, + "grad_norm": 0.9477147504207332, + "learning_rate": 4.769608194576859e-06, + "loss": 0.4581, + "step": 13656 + }, + { + "epoch": 0.8316536248211187, + "grad_norm": 1.0477694649435214, + "learning_rate": 4.769574734270799e-06, + "loss": 0.4104, + "step": 13657 + }, + { + "epoch": 0.8317145205979966, + "grad_norm": 1.1169083636980095, + "learning_rate": 4.769541271652545e-06, + "loss": 0.3845, + "step": 13658 + }, + { + "epoch": 0.8317754163748744, + "grad_norm": 1.0662126976914, + "learning_rate": 4.769507806722128e-06, + "loss": 0.352, + "step": 13659 + }, + { + "epoch": 0.8318363121517522, + "grad_norm": 1.1102898226899292, + "learning_rate": 4.769474339479584e-06, + "loss": 0.4267, + "step": 13660 + }, + { + "epoch": 0.8318972079286302, + "grad_norm": 1.0177101527282002, + "learning_rate": 4.769440869924948e-06, + "loss": 0.3692, + "step": 13661 + }, + { + "epoch": 0.831958103705508, + "grad_norm": 1.0047573593755954, + "learning_rate": 4.769407398058252e-06, + "loss": 0.4639, + "step": 13662 + }, + { + "epoch": 0.8320189994823859, + "grad_norm": 0.9765216545240666, + "learning_rate": 4.76937392387953e-06, + "loss": 0.3825, + "step": 13663 + }, + { + "epoch": 0.8320798952592637, + "grad_norm": 1.0012776092189197, + "learning_rate": 4.769340447388819e-06, + "loss": 0.4221, + "step": 13664 + }, + { + "epoch": 0.8321407910361417, + "grad_norm": 1.0185504719006224, + "learning_rate": 4.7693069685861494e-06, + "loss": 0.3842, + "step": 13665 + }, + { + "epoch": 0.8322016868130195, + "grad_norm": 1.0241999046938268, + "learning_rate": 4.769273487471557e-06, + "loss": 0.4241, + "step": 13666 + }, + { + "epoch": 0.8322625825898974, + "grad_norm": 0.9324056238248658, + "learning_rate": 4.769240004045077e-06, + "loss": 0.5575, + "step": 13667 + }, + { + "epoch": 0.8323234783667752, + "grad_norm": 0.940766787411794, + "learning_rate": 4.769206518306741e-06, + "loss": 0.4662, + "step": 13668 + }, + { + "epoch": 0.8323843741436532, + "grad_norm": 1.004053722628415, + "learning_rate": 4.769173030256586e-06, + "loss": 0.4038, + "step": 13669 + }, + { + "epoch": 0.832445269920531, + "grad_norm": 0.9344086121008316, + "learning_rate": 4.769139539894645e-06, + "loss": 0.4283, + "step": 13670 + }, + { + "epoch": 0.8325061656974089, + "grad_norm": 1.0388060966922399, + "learning_rate": 4.769106047220951e-06, + "loss": 0.3641, + "step": 13671 + }, + { + "epoch": 0.8325670614742867, + "grad_norm": 1.0008910216442506, + "learning_rate": 4.769072552235539e-06, + "loss": 0.3968, + "step": 13672 + }, + { + "epoch": 0.8326279572511647, + "grad_norm": 0.962308515299304, + "learning_rate": 4.769039054938444e-06, + "loss": 0.4113, + "step": 13673 + }, + { + "epoch": 0.8326888530280425, + "grad_norm": 0.999883482596564, + "learning_rate": 4.769005555329698e-06, + "loss": 0.406, + "step": 13674 + }, + { + "epoch": 0.8327497488049204, + "grad_norm": 0.9832738853748295, + "learning_rate": 4.768972053409336e-06, + "loss": 0.4145, + "step": 13675 + }, + { + "epoch": 0.8328106445817982, + "grad_norm": 0.9564261218280773, + "learning_rate": 4.7689385491773934e-06, + "loss": 0.4226, + "step": 13676 + }, + { + "epoch": 0.8328715403586762, + "grad_norm": 1.0678668683033232, + "learning_rate": 4.768905042633902e-06, + "loss": 0.4327, + "step": 13677 + }, + { + "epoch": 0.832932436135554, + "grad_norm": 1.0383567321900598, + "learning_rate": 4.7688715337788995e-06, + "loss": 0.3894, + "step": 13678 + }, + { + "epoch": 0.8329933319124319, + "grad_norm": 0.9322774307425099, + "learning_rate": 4.768838022612417e-06, + "loss": 0.4277, + "step": 13679 + }, + { + "epoch": 0.8330542276893097, + "grad_norm": 1.0120028975675293, + "learning_rate": 4.768804509134488e-06, + "loss": 0.4781, + "step": 13680 + }, + { + "epoch": 0.8331151234661877, + "grad_norm": 0.9464085710342116, + "learning_rate": 4.76877099334515e-06, + "loss": 0.4179, + "step": 13681 + }, + { + "epoch": 0.8331760192430655, + "grad_norm": 1.0629902061270184, + "learning_rate": 4.7687374752444345e-06, + "loss": 0.4654, + "step": 13682 + }, + { + "epoch": 0.8332369150199433, + "grad_norm": 1.069355973440086, + "learning_rate": 4.768703954832376e-06, + "loss": 0.4149, + "step": 13683 + }, + { + "epoch": 0.8332978107968212, + "grad_norm": 1.065279978065118, + "learning_rate": 4.76867043210901e-06, + "loss": 0.452, + "step": 13684 + }, + { + "epoch": 0.8333587065736991, + "grad_norm": 1.0094258818930122, + "learning_rate": 4.768636907074369e-06, + "loss": 0.4046, + "step": 13685 + }, + { + "epoch": 0.833419602350577, + "grad_norm": 0.9898183969812605, + "learning_rate": 4.768603379728489e-06, + "loss": 0.3867, + "step": 13686 + }, + { + "epoch": 0.8334804981274548, + "grad_norm": 0.9721517145764698, + "learning_rate": 4.7685698500714016e-06, + "loss": 0.4204, + "step": 13687 + }, + { + "epoch": 0.8335413939043327, + "grad_norm": 1.042411066640602, + "learning_rate": 4.768536318103143e-06, + "loss": 0.4126, + "step": 13688 + }, + { + "epoch": 0.8336022896812106, + "grad_norm": 0.961790620085323, + "learning_rate": 4.768502783823748e-06, + "loss": 0.4825, + "step": 13689 + }, + { + "epoch": 0.8336631854580885, + "grad_norm": 1.0723975853720944, + "learning_rate": 4.7684692472332476e-06, + "loss": 0.3924, + "step": 13690 + }, + { + "epoch": 0.8337240812349663, + "grad_norm": 0.9494083275641326, + "learning_rate": 4.768435708331678e-06, + "loss": 0.4516, + "step": 13691 + }, + { + "epoch": 0.8337849770118442, + "grad_norm": 1.017271972214318, + "learning_rate": 4.768402167119074e-06, + "loss": 0.4393, + "step": 13692 + }, + { + "epoch": 0.8338458727887221, + "grad_norm": 0.8745565724804664, + "learning_rate": 4.768368623595469e-06, + "loss": 0.4332, + "step": 13693 + }, + { + "epoch": 0.8339067685656, + "grad_norm": 0.9686757664888994, + "learning_rate": 4.768335077760897e-06, + "loss": 0.4802, + "step": 13694 + }, + { + "epoch": 0.8339676643424778, + "grad_norm": 0.9534891899133318, + "learning_rate": 4.768301529615394e-06, + "loss": 0.4818, + "step": 13695 + }, + { + "epoch": 0.8340285601193558, + "grad_norm": 0.9549648431285899, + "learning_rate": 4.76826797915899e-06, + "loss": 0.4734, + "step": 13696 + }, + { + "epoch": 0.8340894558962336, + "grad_norm": 1.0023289516888927, + "learning_rate": 4.768234426391723e-06, + "loss": 0.4288, + "step": 13697 + }, + { + "epoch": 0.8341503516731115, + "grad_norm": 1.1129436281992988, + "learning_rate": 4.768200871313626e-06, + "loss": 0.3937, + "step": 13698 + }, + { + "epoch": 0.8342112474499893, + "grad_norm": 1.0502931326493978, + "learning_rate": 4.768167313924733e-06, + "loss": 0.4084, + "step": 13699 + }, + { + "epoch": 0.8342721432268673, + "grad_norm": 0.876905315707353, + "learning_rate": 4.768133754225077e-06, + "loss": 0.4349, + "step": 13700 + }, + { + "epoch": 0.8343330390037451, + "grad_norm": 1.0354648707495813, + "learning_rate": 4.768100192214695e-06, + "loss": 0.3457, + "step": 13701 + }, + { + "epoch": 0.834393934780623, + "grad_norm": 0.99866977724207, + "learning_rate": 4.768066627893619e-06, + "loss": 0.3741, + "step": 13702 + }, + { + "epoch": 0.8344548305575008, + "grad_norm": 0.9449683059272791, + "learning_rate": 4.768033061261885e-06, + "loss": 0.4455, + "step": 13703 + }, + { + "epoch": 0.8345157263343788, + "grad_norm": 1.051035516755037, + "learning_rate": 4.767999492319525e-06, + "loss": 0.4377, + "step": 13704 + }, + { + "epoch": 0.8345766221112566, + "grad_norm": 0.9549477156635945, + "learning_rate": 4.767965921066575e-06, + "loss": 0.3795, + "step": 13705 + }, + { + "epoch": 0.8346375178881345, + "grad_norm": 0.9411978799885229, + "learning_rate": 4.767932347503068e-06, + "loss": 0.4501, + "step": 13706 + }, + { + "epoch": 0.8346984136650123, + "grad_norm": 1.0015568991610122, + "learning_rate": 4.7678987716290384e-06, + "loss": 0.4224, + "step": 13707 + }, + { + "epoch": 0.8347593094418903, + "grad_norm": 1.0824629902773815, + "learning_rate": 4.767865193444521e-06, + "loss": 0.3543, + "step": 13708 + }, + { + "epoch": 0.8348202052187681, + "grad_norm": 0.9760555350664815, + "learning_rate": 4.7678316129495496e-06, + "loss": 0.4059, + "step": 13709 + }, + { + "epoch": 0.8348811009956459, + "grad_norm": 1.08015593371916, + "learning_rate": 4.767798030144158e-06, + "loss": 0.4034, + "step": 13710 + }, + { + "epoch": 0.8349419967725238, + "grad_norm": 1.0968882229232604, + "learning_rate": 4.767764445028382e-06, + "loss": 0.3876, + "step": 13711 + }, + { + "epoch": 0.8350028925494017, + "grad_norm": 0.9308203229298343, + "learning_rate": 4.767730857602254e-06, + "loss": 0.4639, + "step": 13712 + }, + { + "epoch": 0.8350637883262796, + "grad_norm": 1.0260117188725324, + "learning_rate": 4.76769726786581e-06, + "loss": 0.4369, + "step": 13713 + }, + { + "epoch": 0.8351246841031574, + "grad_norm": 0.9562593404823762, + "learning_rate": 4.767663675819082e-06, + "loss": 0.4222, + "step": 13714 + }, + { + "epoch": 0.8351855798800353, + "grad_norm": 0.8615280650127894, + "learning_rate": 4.767630081462106e-06, + "loss": 0.4948, + "step": 13715 + }, + { + "epoch": 0.8352464756569132, + "grad_norm": 0.9822638647132412, + "learning_rate": 4.767596484794915e-06, + "loss": 0.4193, + "step": 13716 + }, + { + "epoch": 0.8353073714337911, + "grad_norm": 1.0109978792008114, + "learning_rate": 4.767562885817544e-06, + "loss": 0.3877, + "step": 13717 + }, + { + "epoch": 0.8353682672106689, + "grad_norm": 0.9604316733381794, + "learning_rate": 4.767529284530028e-06, + "loss": 0.4398, + "step": 13718 + }, + { + "epoch": 0.8354291629875468, + "grad_norm": 1.0349523485500436, + "learning_rate": 4.767495680932399e-06, + "loss": 0.4313, + "step": 13719 + }, + { + "epoch": 0.8354900587644247, + "grad_norm": 0.9376598424100143, + "learning_rate": 4.767462075024694e-06, + "loss": 0.4184, + "step": 13720 + }, + { + "epoch": 0.8355509545413026, + "grad_norm": 1.0041013651770316, + "learning_rate": 4.767428466806945e-06, + "loss": 0.4507, + "step": 13721 + }, + { + "epoch": 0.8356118503181804, + "grad_norm": 0.9660830704535923, + "learning_rate": 4.767394856279186e-06, + "loss": 0.4669, + "step": 13722 + }, + { + "epoch": 0.8356727460950583, + "grad_norm": 1.0084234988248317, + "learning_rate": 4.767361243441453e-06, + "loss": 0.3627, + "step": 13723 + }, + { + "epoch": 0.8357336418719362, + "grad_norm": 1.0856652632228647, + "learning_rate": 4.76732762829378e-06, + "loss": 0.4221, + "step": 13724 + }, + { + "epoch": 0.8357945376488141, + "grad_norm": 1.0627617427006, + "learning_rate": 4.7672940108362e-06, + "loss": 0.3964, + "step": 13725 + }, + { + "epoch": 0.8358554334256919, + "grad_norm": 0.9804603795004702, + "learning_rate": 4.767260391068749e-06, + "loss": 0.4096, + "step": 13726 + }, + { + "epoch": 0.8359163292025698, + "grad_norm": 1.06639098464143, + "learning_rate": 4.76722676899146e-06, + "loss": 0.4089, + "step": 13727 + }, + { + "epoch": 0.8359772249794477, + "grad_norm": 0.9708773569455487, + "learning_rate": 4.767193144604367e-06, + "loss": 0.3769, + "step": 13728 + }, + { + "epoch": 0.8360381207563256, + "grad_norm": 1.0106443104070382, + "learning_rate": 4.767159517907505e-06, + "loss": 0.3417, + "step": 13729 + }, + { + "epoch": 0.8360990165332034, + "grad_norm": 0.993788122197714, + "learning_rate": 4.767125888900907e-06, + "loss": 0.4589, + "step": 13730 + }, + { + "epoch": 0.8361599123100812, + "grad_norm": 1.0230934926663406, + "learning_rate": 4.76709225758461e-06, + "loss": 0.3952, + "step": 13731 + }, + { + "epoch": 0.8362208080869592, + "grad_norm": 0.9173756456041495, + "learning_rate": 4.767058623958646e-06, + "loss": 0.4816, + "step": 13732 + }, + { + "epoch": 0.836281703863837, + "grad_norm": 0.9937307289827457, + "learning_rate": 4.767024988023049e-06, + "loss": 0.4322, + "step": 13733 + }, + { + "epoch": 0.8363425996407149, + "grad_norm": 1.0246186258710317, + "learning_rate": 4.766991349777855e-06, + "loss": 0.4136, + "step": 13734 + }, + { + "epoch": 0.8364034954175927, + "grad_norm": 1.0152268444377495, + "learning_rate": 4.766957709223097e-06, + "loss": 0.4003, + "step": 13735 + }, + { + "epoch": 0.8364643911944707, + "grad_norm": 0.9337317534172133, + "learning_rate": 4.76692406635881e-06, + "loss": 0.4466, + "step": 13736 + }, + { + "epoch": 0.8365252869713485, + "grad_norm": 0.9905350543085012, + "learning_rate": 4.7668904211850275e-06, + "loss": 0.4079, + "step": 13737 + }, + { + "epoch": 0.8365861827482264, + "grad_norm": 1.0295761095261537, + "learning_rate": 4.766856773701784e-06, + "loss": 0.428, + "step": 13738 + }, + { + "epoch": 0.8366470785251043, + "grad_norm": 0.9422565319818954, + "learning_rate": 4.7668231239091145e-06, + "loss": 0.4366, + "step": 13739 + }, + { + "epoch": 0.8367079743019822, + "grad_norm": 1.0070493089303085, + "learning_rate": 4.766789471807052e-06, + "loss": 0.3937, + "step": 13740 + }, + { + "epoch": 0.83676887007886, + "grad_norm": 1.0844249761253828, + "learning_rate": 4.766755817395632e-06, + "loss": 0.423, + "step": 13741 + }, + { + "epoch": 0.8368297658557379, + "grad_norm": 0.9450753141754159, + "learning_rate": 4.766722160674888e-06, + "loss": 0.3989, + "step": 13742 + }, + { + "epoch": 0.8368906616326158, + "grad_norm": 1.047961012912035, + "learning_rate": 4.766688501644855e-06, + "loss": 0.3559, + "step": 13743 + }, + { + "epoch": 0.8369515574094937, + "grad_norm": 0.9379563605759806, + "learning_rate": 4.766654840305566e-06, + "loss": 0.3904, + "step": 13744 + }, + { + "epoch": 0.8370124531863715, + "grad_norm": 0.9123664904588519, + "learning_rate": 4.766621176657058e-06, + "loss": 0.4488, + "step": 13745 + }, + { + "epoch": 0.8370733489632494, + "grad_norm": 0.974474394447223, + "learning_rate": 4.766587510699362e-06, + "loss": 0.3637, + "step": 13746 + }, + { + "epoch": 0.8371342447401273, + "grad_norm": 1.002279298424339, + "learning_rate": 4.766553842432514e-06, + "loss": 0.3747, + "step": 13747 + }, + { + "epoch": 0.8371951405170052, + "grad_norm": 0.9585255056415724, + "learning_rate": 4.766520171856548e-06, + "loss": 0.4167, + "step": 13748 + }, + { + "epoch": 0.837256036293883, + "grad_norm": 1.1026073840398753, + "learning_rate": 4.7664864989714985e-06, + "loss": 0.4369, + "step": 13749 + }, + { + "epoch": 0.8373169320707609, + "grad_norm": 1.0809134324232725, + "learning_rate": 4.7664528237774e-06, + "loss": 0.372, + "step": 13750 + }, + { + "epoch": 0.8373778278476388, + "grad_norm": 0.9871831360359894, + "learning_rate": 4.766419146274286e-06, + "loss": 0.4484, + "step": 13751 + }, + { + "epoch": 0.8374387236245167, + "grad_norm": 1.1026521389811694, + "learning_rate": 4.766385466462191e-06, + "loss": 0.3806, + "step": 13752 + }, + { + "epoch": 0.8374996194013945, + "grad_norm": 1.1002516649822633, + "learning_rate": 4.76635178434115e-06, + "loss": 0.3747, + "step": 13753 + }, + { + "epoch": 0.8375605151782723, + "grad_norm": 0.9706956168963501, + "learning_rate": 4.7663180999111975e-06, + "loss": 0.4719, + "step": 13754 + }, + { + "epoch": 0.8376214109551503, + "grad_norm": 0.9831438683552574, + "learning_rate": 4.766284413172367e-06, + "loss": 0.4589, + "step": 13755 + }, + { + "epoch": 0.8376823067320281, + "grad_norm": 1.0443188953812754, + "learning_rate": 4.7662507241246925e-06, + "loss": 0.3699, + "step": 13756 + }, + { + "epoch": 0.837743202508906, + "grad_norm": 0.9886215386707945, + "learning_rate": 4.766217032768209e-06, + "loss": 0.4141, + "step": 13757 + }, + { + "epoch": 0.8378040982857838, + "grad_norm": 0.9754760225363541, + "learning_rate": 4.766183339102951e-06, + "loss": 0.4077, + "step": 13758 + }, + { + "epoch": 0.8378649940626618, + "grad_norm": 0.9620003081736435, + "learning_rate": 4.766149643128952e-06, + "loss": 0.3866, + "step": 13759 + }, + { + "epoch": 0.8379258898395396, + "grad_norm": 1.0017409955277659, + "learning_rate": 4.766115944846248e-06, + "loss": 0.3497, + "step": 13760 + }, + { + "epoch": 0.8379867856164175, + "grad_norm": 1.0085300203288665, + "learning_rate": 4.766082244254871e-06, + "loss": 0.4252, + "step": 13761 + }, + { + "epoch": 0.8380476813932953, + "grad_norm": 1.0370586799816952, + "learning_rate": 4.766048541354857e-06, + "loss": 0.3487, + "step": 13762 + }, + { + "epoch": 0.8381085771701733, + "grad_norm": 1.0355317827721586, + "learning_rate": 4.76601483614624e-06, + "loss": 0.407, + "step": 13763 + }, + { + "epoch": 0.8381694729470511, + "grad_norm": 1.010426355404685, + "learning_rate": 4.765981128629054e-06, + "loss": 0.4508, + "step": 13764 + }, + { + "epoch": 0.838230368723929, + "grad_norm": 0.9854225275279045, + "learning_rate": 4.765947418803334e-06, + "loss": 0.3899, + "step": 13765 + }, + { + "epoch": 0.8382912645008068, + "grad_norm": 0.9664873079252655, + "learning_rate": 4.765913706669113e-06, + "loss": 0.3766, + "step": 13766 + }, + { + "epoch": 0.8383521602776848, + "grad_norm": 0.9832793513514666, + "learning_rate": 4.765879992226426e-06, + "loss": 0.4508, + "step": 13767 + }, + { + "epoch": 0.8384130560545626, + "grad_norm": 0.9477937810975237, + "learning_rate": 4.765846275475309e-06, + "loss": 0.4209, + "step": 13768 + }, + { + "epoch": 0.8384739518314405, + "grad_norm": 1.1017718878950247, + "learning_rate": 4.7658125564157945e-06, + "loss": 0.3774, + "step": 13769 + }, + { + "epoch": 0.8385348476083183, + "grad_norm": 1.1410762822588363, + "learning_rate": 4.765778835047916e-06, + "loss": 0.4129, + "step": 13770 + }, + { + "epoch": 0.8385957433851963, + "grad_norm": 0.9915717912848404, + "learning_rate": 4.765745111371711e-06, + "loss": 0.3454, + "step": 13771 + }, + { + "epoch": 0.8386566391620741, + "grad_norm": 1.0271542491069467, + "learning_rate": 4.76571138538721e-06, + "loss": 0.5246, + "step": 13772 + }, + { + "epoch": 0.838717534938952, + "grad_norm": 1.018214698588612, + "learning_rate": 4.765677657094451e-06, + "loss": 0.4077, + "step": 13773 + }, + { + "epoch": 0.8387784307158298, + "grad_norm": 1.0237384569727714, + "learning_rate": 4.765643926493466e-06, + "loss": 0.4364, + "step": 13774 + }, + { + "epoch": 0.8388393264927078, + "grad_norm": 0.9580345316551615, + "learning_rate": 4.76561019358429e-06, + "loss": 0.4371, + "step": 13775 + }, + { + "epoch": 0.8389002222695856, + "grad_norm": 1.1676902212562073, + "learning_rate": 4.765576458366958e-06, + "loss": 0.3403, + "step": 13776 + }, + { + "epoch": 0.8389611180464634, + "grad_norm": 1.0288546936020566, + "learning_rate": 4.765542720841503e-06, + "loss": 0.3608, + "step": 13777 + }, + { + "epoch": 0.8390220138233414, + "grad_norm": 1.0613051612156503, + "learning_rate": 4.765508981007961e-06, + "loss": 0.3712, + "step": 13778 + }, + { + "epoch": 0.8390829096002193, + "grad_norm": 0.9755180000459381, + "learning_rate": 4.765475238866365e-06, + "loss": 0.4131, + "step": 13779 + }, + { + "epoch": 0.8391438053770971, + "grad_norm": 1.029446291778636, + "learning_rate": 4.7654414944167505e-06, + "loss": 0.4186, + "step": 13780 + }, + { + "epoch": 0.8392047011539749, + "grad_norm": 1.0326434765771781, + "learning_rate": 4.765407747659151e-06, + "loss": 0.3593, + "step": 13781 + }, + { + "epoch": 0.8392655969308529, + "grad_norm": 0.9804771913378901, + "learning_rate": 4.765373998593601e-06, + "loss": 0.3734, + "step": 13782 + }, + { + "epoch": 0.8393264927077307, + "grad_norm": 1.062570146003364, + "learning_rate": 4.7653402472201345e-06, + "loss": 0.3545, + "step": 13783 + }, + { + "epoch": 0.8393873884846086, + "grad_norm": 0.9054143524446101, + "learning_rate": 4.765306493538787e-06, + "loss": 0.4447, + "step": 13784 + }, + { + "epoch": 0.8394482842614864, + "grad_norm": 0.9864043799617974, + "learning_rate": 4.765272737549593e-06, + "loss": 0.4098, + "step": 13785 + }, + { + "epoch": 0.8395091800383644, + "grad_norm": 0.8841579548605218, + "learning_rate": 4.765238979252586e-06, + "loss": 0.5067, + "step": 13786 + }, + { + "epoch": 0.8395700758152422, + "grad_norm": 1.0555430316701166, + "learning_rate": 4.7652052186478e-06, + "loss": 0.401, + "step": 13787 + }, + { + "epoch": 0.8396309715921201, + "grad_norm": 0.9226530758617256, + "learning_rate": 4.76517145573527e-06, + "loss": 0.4207, + "step": 13788 + }, + { + "epoch": 0.8396918673689979, + "grad_norm": 1.0699075783992635, + "learning_rate": 4.765137690515031e-06, + "loss": 0.3621, + "step": 13789 + }, + { + "epoch": 0.8397527631458759, + "grad_norm": 1.063433643140824, + "learning_rate": 4.765103922987117e-06, + "loss": 0.4204, + "step": 13790 + }, + { + "epoch": 0.8398136589227537, + "grad_norm": 1.0292836709961728, + "learning_rate": 4.765070153151561e-06, + "loss": 0.3864, + "step": 13791 + }, + { + "epoch": 0.8398745546996316, + "grad_norm": 0.934640356457134, + "learning_rate": 4.7650363810084e-06, + "loss": 0.4309, + "step": 13792 + }, + { + "epoch": 0.8399354504765094, + "grad_norm": 0.9099219543121125, + "learning_rate": 4.765002606557666e-06, + "loss": 0.4892, + "step": 13793 + }, + { + "epoch": 0.8399963462533874, + "grad_norm": 0.9622430003537962, + "learning_rate": 4.7649688297993946e-06, + "loss": 0.4253, + "step": 13794 + }, + { + "epoch": 0.8400572420302652, + "grad_norm": 0.9175593617209713, + "learning_rate": 4.764935050733621e-06, + "loss": 0.483, + "step": 13795 + }, + { + "epoch": 0.8401181378071431, + "grad_norm": 1.0018808999748128, + "learning_rate": 4.764901269360377e-06, + "loss": 0.3912, + "step": 13796 + }, + { + "epoch": 0.8401790335840209, + "grad_norm": 1.0412074623468497, + "learning_rate": 4.7648674856797e-06, + "loss": 0.3899, + "step": 13797 + }, + { + "epoch": 0.8402399293608989, + "grad_norm": 1.0478918416247154, + "learning_rate": 4.764833699691623e-06, + "loss": 0.4533, + "step": 13798 + }, + { + "epoch": 0.8403008251377767, + "grad_norm": 1.1239957406775996, + "learning_rate": 4.764799911396179e-06, + "loss": 0.4192, + "step": 13799 + }, + { + "epoch": 0.8403617209146546, + "grad_norm": 1.0009265621808145, + "learning_rate": 4.764766120793406e-06, + "loss": 0.4032, + "step": 13800 + }, + { + "epoch": 0.8404226166915324, + "grad_norm": 1.0576137173053912, + "learning_rate": 4.7647323278833355e-06, + "loss": 0.4265, + "step": 13801 + }, + { + "epoch": 0.8404835124684104, + "grad_norm": 0.9895348449201645, + "learning_rate": 4.764698532666003e-06, + "loss": 0.4261, + "step": 13802 + }, + { + "epoch": 0.8405444082452882, + "grad_norm": 0.9756990363619527, + "learning_rate": 4.764664735141443e-06, + "loss": 0.4485, + "step": 13803 + }, + { + "epoch": 0.840605304022166, + "grad_norm": 0.9576593964995735, + "learning_rate": 4.764630935309689e-06, + "loss": 0.3928, + "step": 13804 + }, + { + "epoch": 0.8406661997990439, + "grad_norm": 0.9760106245186904, + "learning_rate": 4.764597133170776e-06, + "loss": 0.493, + "step": 13805 + }, + { + "epoch": 0.8407270955759218, + "grad_norm": 1.050895876055306, + "learning_rate": 4.764563328724739e-06, + "loss": 0.3916, + "step": 13806 + }, + { + "epoch": 0.8407879913527997, + "grad_norm": 0.9305464195688168, + "learning_rate": 4.764529521971612e-06, + "loss": 0.4288, + "step": 13807 + }, + { + "epoch": 0.8408488871296775, + "grad_norm": 0.9698170290107911, + "learning_rate": 4.764495712911429e-06, + "loss": 0.4236, + "step": 13808 + }, + { + "epoch": 0.8409097829065554, + "grad_norm": 1.1710386628072325, + "learning_rate": 4.764461901544225e-06, + "loss": 0.4353, + "step": 13809 + }, + { + "epoch": 0.8409706786834333, + "grad_norm": 0.9797966466611386, + "learning_rate": 4.7644280878700345e-06, + "loss": 0.4312, + "step": 13810 + }, + { + "epoch": 0.8410315744603112, + "grad_norm": 1.002897553078735, + "learning_rate": 4.764394271888891e-06, + "loss": 0.4273, + "step": 13811 + }, + { + "epoch": 0.841092470237189, + "grad_norm": 0.946675749611563, + "learning_rate": 4.76436045360083e-06, + "loss": 0.4544, + "step": 13812 + }, + { + "epoch": 0.8411533660140669, + "grad_norm": 0.9794263514741629, + "learning_rate": 4.764326633005887e-06, + "loss": 0.4229, + "step": 13813 + }, + { + "epoch": 0.8412142617909448, + "grad_norm": 1.0573425777588825, + "learning_rate": 4.764292810104093e-06, + "loss": 0.3752, + "step": 13814 + }, + { + "epoch": 0.8412751575678227, + "grad_norm": 0.9633469650098133, + "learning_rate": 4.7642589848954855e-06, + "loss": 0.3901, + "step": 13815 + }, + { + "epoch": 0.8413360533447005, + "grad_norm": 1.0008074856420324, + "learning_rate": 4.764225157380098e-06, + "loss": 0.3553, + "step": 13816 + }, + { + "epoch": 0.8413969491215784, + "grad_norm": 0.9019151594473391, + "learning_rate": 4.764191327557966e-06, + "loss": 0.499, + "step": 13817 + }, + { + "epoch": 0.8414578448984563, + "grad_norm": 1.0043232687008012, + "learning_rate": 4.764157495429121e-06, + "loss": 0.453, + "step": 13818 + }, + { + "epoch": 0.8415187406753342, + "grad_norm": 0.9707617195586231, + "learning_rate": 4.764123660993601e-06, + "loss": 0.3804, + "step": 13819 + }, + { + "epoch": 0.841579636452212, + "grad_norm": 0.974098266104385, + "learning_rate": 4.7640898242514385e-06, + "loss": 0.3757, + "step": 13820 + }, + { + "epoch": 0.84164053222909, + "grad_norm": 1.0789624382030916, + "learning_rate": 4.764055985202668e-06, + "loss": 0.4438, + "step": 13821 + }, + { + "epoch": 0.8417014280059678, + "grad_norm": 1.04426854284939, + "learning_rate": 4.764022143847324e-06, + "loss": 0.3996, + "step": 13822 + }, + { + "epoch": 0.8417623237828457, + "grad_norm": 0.9908966997610237, + "learning_rate": 4.7639883001854426e-06, + "loss": 0.3938, + "step": 13823 + }, + { + "epoch": 0.8418232195597235, + "grad_norm": 1.114778215446856, + "learning_rate": 4.763954454217055e-06, + "loss": 0.447, + "step": 13824 + }, + { + "epoch": 0.8418841153366015, + "grad_norm": 0.9773674666451796, + "learning_rate": 4.763920605942199e-06, + "loss": 0.4082, + "step": 13825 + }, + { + "epoch": 0.8419450111134793, + "grad_norm": 1.0077103988147151, + "learning_rate": 4.763886755360908e-06, + "loss": 0.4479, + "step": 13826 + }, + { + "epoch": 0.8420059068903571, + "grad_norm": 1.0752678326785907, + "learning_rate": 4.763852902473216e-06, + "loss": 0.3937, + "step": 13827 + }, + { + "epoch": 0.842066802667235, + "grad_norm": 0.9991542233730061, + "learning_rate": 4.763819047279158e-06, + "loss": 0.4167, + "step": 13828 + }, + { + "epoch": 0.842127698444113, + "grad_norm": 1.030765964857511, + "learning_rate": 4.763785189778768e-06, + "loss": 0.4371, + "step": 13829 + }, + { + "epoch": 0.8421885942209908, + "grad_norm": 0.9548280478338499, + "learning_rate": 4.76375132997208e-06, + "loss": 0.4775, + "step": 13830 + }, + { + "epoch": 0.8422494899978686, + "grad_norm": 0.9252271703727993, + "learning_rate": 4.76371746785913e-06, + "loss": 0.4206, + "step": 13831 + }, + { + "epoch": 0.8423103857747465, + "grad_norm": 1.027041337474208, + "learning_rate": 4.763683603439951e-06, + "loss": 0.3653, + "step": 13832 + }, + { + "epoch": 0.8423712815516244, + "grad_norm": 0.9859107637238524, + "learning_rate": 4.763649736714579e-06, + "loss": 0.4185, + "step": 13833 + }, + { + "epoch": 0.8424321773285023, + "grad_norm": 0.9177008752510332, + "learning_rate": 4.763615867683047e-06, + "loss": 0.3967, + "step": 13834 + }, + { + "epoch": 0.8424930731053801, + "grad_norm": 0.9632235268722295, + "learning_rate": 4.763581996345391e-06, + "loss": 0.4644, + "step": 13835 + }, + { + "epoch": 0.842553968882258, + "grad_norm": 0.9459923063180311, + "learning_rate": 4.763548122701644e-06, + "loss": 0.4874, + "step": 13836 + }, + { + "epoch": 0.8426148646591359, + "grad_norm": 0.9799090751486921, + "learning_rate": 4.763514246751841e-06, + "loss": 0.382, + "step": 13837 + }, + { + "epoch": 0.8426757604360138, + "grad_norm": 1.0489619806728654, + "learning_rate": 4.763480368496018e-06, + "loss": 0.4155, + "step": 13838 + }, + { + "epoch": 0.8427366562128916, + "grad_norm": 1.0359963088560675, + "learning_rate": 4.763446487934208e-06, + "loss": 0.3688, + "step": 13839 + }, + { + "epoch": 0.8427975519897695, + "grad_norm": 0.9986548792348645, + "learning_rate": 4.763412605066445e-06, + "loss": 0.384, + "step": 13840 + }, + { + "epoch": 0.8428584477666474, + "grad_norm": 1.023786443320976, + "learning_rate": 4.763378719892765e-06, + "loss": 0.4971, + "step": 13841 + }, + { + "epoch": 0.8429193435435253, + "grad_norm": 1.1178113103615048, + "learning_rate": 4.763344832413201e-06, + "loss": 0.3994, + "step": 13842 + }, + { + "epoch": 0.8429802393204031, + "grad_norm": 0.9732744756341627, + "learning_rate": 4.763310942627789e-06, + "loss": 0.4628, + "step": 13843 + }, + { + "epoch": 0.843041135097281, + "grad_norm": 0.9849479976264284, + "learning_rate": 4.7632770505365625e-06, + "loss": 0.4072, + "step": 13844 + }, + { + "epoch": 0.8431020308741589, + "grad_norm": 1.0111888642345805, + "learning_rate": 4.763243156139556e-06, + "loss": 0.4369, + "step": 13845 + }, + { + "epoch": 0.8431629266510368, + "grad_norm": 0.9413544907972571, + "learning_rate": 4.763209259436805e-06, + "loss": 0.4651, + "step": 13846 + }, + { + "epoch": 0.8432238224279146, + "grad_norm": 0.9598460361292558, + "learning_rate": 4.763175360428344e-06, + "loss": 0.4382, + "step": 13847 + }, + { + "epoch": 0.8432847182047924, + "grad_norm": 0.9443828462949749, + "learning_rate": 4.763141459114207e-06, + "loss": 0.4041, + "step": 13848 + }, + { + "epoch": 0.8433456139816704, + "grad_norm": 1.0090772169265827, + "learning_rate": 4.763107555494427e-06, + "loss": 0.4299, + "step": 13849 + }, + { + "epoch": 0.8434065097585483, + "grad_norm": 1.0581874916868588, + "learning_rate": 4.763073649569041e-06, + "loss": 0.4018, + "step": 13850 + }, + { + "epoch": 0.8434674055354261, + "grad_norm": 0.9976544949491846, + "learning_rate": 4.763039741338083e-06, + "loss": 0.4129, + "step": 13851 + }, + { + "epoch": 0.8435283013123039, + "grad_norm": 0.9925426357412104, + "learning_rate": 4.763005830801587e-06, + "loss": 0.4117, + "step": 13852 + }, + { + "epoch": 0.8435891970891819, + "grad_norm": 1.0099151534180353, + "learning_rate": 4.762971917959587e-06, + "loss": 0.3794, + "step": 13853 + }, + { + "epoch": 0.8436500928660597, + "grad_norm": 1.0103876221147239, + "learning_rate": 4.762938002812119e-06, + "loss": 0.4238, + "step": 13854 + }, + { + "epoch": 0.8437109886429376, + "grad_norm": 1.020538102700544, + "learning_rate": 4.7629040853592166e-06, + "loss": 0.424, + "step": 13855 + }, + { + "epoch": 0.8437718844198154, + "grad_norm": 1.0596496064320335, + "learning_rate": 4.762870165600915e-06, + "loss": 0.4502, + "step": 13856 + }, + { + "epoch": 0.8438327801966934, + "grad_norm": 1.0073453475476093, + "learning_rate": 4.762836243537247e-06, + "loss": 0.4069, + "step": 13857 + }, + { + "epoch": 0.8438936759735712, + "grad_norm": 1.0335004462193953, + "learning_rate": 4.7628023191682495e-06, + "loss": 0.367, + "step": 13858 + }, + { + "epoch": 0.8439545717504491, + "grad_norm": 0.9225528377561096, + "learning_rate": 4.762768392493956e-06, + "loss": 0.4528, + "step": 13859 + }, + { + "epoch": 0.844015467527327, + "grad_norm": 1.0084753510650797, + "learning_rate": 4.762734463514401e-06, + "loss": 0.4158, + "step": 13860 + }, + { + "epoch": 0.8440763633042049, + "grad_norm": 1.0466079334588356, + "learning_rate": 4.76270053222962e-06, + "loss": 0.4012, + "step": 13861 + }, + { + "epoch": 0.8441372590810827, + "grad_norm": 1.145150242669326, + "learning_rate": 4.762666598639646e-06, + "loss": 0.3769, + "step": 13862 + }, + { + "epoch": 0.8441981548579606, + "grad_norm": 0.9760487110992617, + "learning_rate": 4.762632662744513e-06, + "loss": 0.4138, + "step": 13863 + }, + { + "epoch": 0.8442590506348385, + "grad_norm": 0.9371964615761901, + "learning_rate": 4.7625987245442584e-06, + "loss": 0.4384, + "step": 13864 + }, + { + "epoch": 0.8443199464117164, + "grad_norm": 0.9935450624569454, + "learning_rate": 4.762564784038915e-06, + "loss": 0.4404, + "step": 13865 + }, + { + "epoch": 0.8443808421885942, + "grad_norm": 0.9939383533335019, + "learning_rate": 4.762530841228518e-06, + "loss": 0.4756, + "step": 13866 + }, + { + "epoch": 0.8444417379654721, + "grad_norm": 1.0426784544331311, + "learning_rate": 4.762496896113101e-06, + "loss": 0.3451, + "step": 13867 + }, + { + "epoch": 0.84450263374235, + "grad_norm": 1.0486614349259789, + "learning_rate": 4.7624629486927e-06, + "loss": 0.3614, + "step": 13868 + }, + { + "epoch": 0.8445635295192279, + "grad_norm": 0.9677824318113679, + "learning_rate": 4.762428998967348e-06, + "loss": 0.4254, + "step": 13869 + }, + { + "epoch": 0.8446244252961057, + "grad_norm": 0.9453981645356412, + "learning_rate": 4.7623950469370805e-06, + "loss": 0.4036, + "step": 13870 + }, + { + "epoch": 0.8446853210729836, + "grad_norm": 0.8752953421773605, + "learning_rate": 4.7623610926019315e-06, + "loss": 0.4821, + "step": 13871 + }, + { + "epoch": 0.8447462168498615, + "grad_norm": 1.0450710177654754, + "learning_rate": 4.762327135961937e-06, + "loss": 0.4373, + "step": 13872 + }, + { + "epoch": 0.8448071126267394, + "grad_norm": 0.9784993373559642, + "learning_rate": 4.76229317701713e-06, + "loss": 0.4673, + "step": 13873 + }, + { + "epoch": 0.8448680084036172, + "grad_norm": 1.0574412612006892, + "learning_rate": 4.762259215767547e-06, + "loss": 0.348, + "step": 13874 + }, + { + "epoch": 0.844928904180495, + "grad_norm": 1.1282907899775851, + "learning_rate": 4.76222525221322e-06, + "loss": 0.3843, + "step": 13875 + }, + { + "epoch": 0.844989799957373, + "grad_norm": 1.033439210615238, + "learning_rate": 4.762191286354185e-06, + "loss": 0.3787, + "step": 13876 + }, + { + "epoch": 0.8450506957342508, + "grad_norm": 1.0007319101646832, + "learning_rate": 4.762157318190477e-06, + "loss": 0.4202, + "step": 13877 + }, + { + "epoch": 0.8451115915111287, + "grad_norm": 1.0559660470697367, + "learning_rate": 4.762123347722129e-06, + "loss": 0.373, + "step": 13878 + }, + { + "epoch": 0.8451724872880065, + "grad_norm": 1.052159336650759, + "learning_rate": 4.762089374949178e-06, + "loss": 0.4409, + "step": 13879 + }, + { + "epoch": 0.8452333830648845, + "grad_norm": 1.0254255963139327, + "learning_rate": 4.762055399871657e-06, + "loss": 0.3361, + "step": 13880 + }, + { + "epoch": 0.8452942788417623, + "grad_norm": 1.0039518047723228, + "learning_rate": 4.7620214224896014e-06, + "loss": 0.5025, + "step": 13881 + }, + { + "epoch": 0.8453551746186402, + "grad_norm": 1.0010576736180736, + "learning_rate": 4.761987442803045e-06, + "loss": 0.4588, + "step": 13882 + }, + { + "epoch": 0.845416070395518, + "grad_norm": 1.0585853561011962, + "learning_rate": 4.761953460812023e-06, + "loss": 0.421, + "step": 13883 + }, + { + "epoch": 0.845476966172396, + "grad_norm": 0.974925317980617, + "learning_rate": 4.76191947651657e-06, + "loss": 0.4611, + "step": 13884 + }, + { + "epoch": 0.8455378619492738, + "grad_norm": 0.9504996833718273, + "learning_rate": 4.76188548991672e-06, + "loss": 0.4237, + "step": 13885 + }, + { + "epoch": 0.8455987577261517, + "grad_norm": 1.0569086571818842, + "learning_rate": 4.761851501012509e-06, + "loss": 0.4215, + "step": 13886 + }, + { + "epoch": 0.8456596535030295, + "grad_norm": 0.9963299495467646, + "learning_rate": 4.7618175098039696e-06, + "loss": 0.3859, + "step": 13887 + }, + { + "epoch": 0.8457205492799075, + "grad_norm": 0.9988024684035763, + "learning_rate": 4.761783516291138e-06, + "loss": 0.5225, + "step": 13888 + }, + { + "epoch": 0.8457814450567853, + "grad_norm": 1.0383947829743048, + "learning_rate": 4.7617495204740485e-06, + "loss": 0.3999, + "step": 13889 + }, + { + "epoch": 0.8458423408336632, + "grad_norm": 0.9584225339842402, + "learning_rate": 4.761715522352736e-06, + "loss": 0.4469, + "step": 13890 + }, + { + "epoch": 0.845903236610541, + "grad_norm": 0.9902564681697921, + "learning_rate": 4.761681521927234e-06, + "loss": 0.4722, + "step": 13891 + }, + { + "epoch": 0.845964132387419, + "grad_norm": 0.971475544890025, + "learning_rate": 4.761647519197578e-06, + "loss": 0.3966, + "step": 13892 + }, + { + "epoch": 0.8460250281642968, + "grad_norm": 0.9343684275726005, + "learning_rate": 4.761613514163803e-06, + "loss": 0.3986, + "step": 13893 + }, + { + "epoch": 0.8460859239411747, + "grad_norm": 1.0518805473851798, + "learning_rate": 4.761579506825942e-06, + "loss": 0.4795, + "step": 13894 + }, + { + "epoch": 0.8461468197180525, + "grad_norm": 0.9500151351185737, + "learning_rate": 4.761545497184032e-06, + "loss": 0.459, + "step": 13895 + }, + { + "epoch": 0.8462077154949305, + "grad_norm": 1.0077502748742826, + "learning_rate": 4.761511485238106e-06, + "loss": 0.3758, + "step": 13896 + }, + { + "epoch": 0.8462686112718083, + "grad_norm": 1.0715453193001954, + "learning_rate": 4.761477470988199e-06, + "loss": 0.4213, + "step": 13897 + }, + { + "epoch": 0.8463295070486861, + "grad_norm": 0.9772800026322429, + "learning_rate": 4.761443454434346e-06, + "loss": 0.392, + "step": 13898 + }, + { + "epoch": 0.846390402825564, + "grad_norm": 0.9700637431978252, + "learning_rate": 4.7614094355765816e-06, + "loss": 0.3898, + "step": 13899 + }, + { + "epoch": 0.846451298602442, + "grad_norm": 1.011433186182143, + "learning_rate": 4.76137541441494e-06, + "loss": 0.43, + "step": 13900 + }, + { + "epoch": 0.8465121943793198, + "grad_norm": 0.8432159067273428, + "learning_rate": 4.761341390949456e-06, + "loss": 0.4724, + "step": 13901 + }, + { + "epoch": 0.8465730901561976, + "grad_norm": 0.9720952116237898, + "learning_rate": 4.761307365180165e-06, + "loss": 0.366, + "step": 13902 + }, + { + "epoch": 0.8466339859330756, + "grad_norm": 0.9992143253595954, + "learning_rate": 4.7612733371071e-06, + "loss": 0.4157, + "step": 13903 + }, + { + "epoch": 0.8466948817099534, + "grad_norm": 0.9419336708575511, + "learning_rate": 4.761239306730299e-06, + "loss": 0.4259, + "step": 13904 + }, + { + "epoch": 0.8467557774868313, + "grad_norm": 1.0189177214815006, + "learning_rate": 4.7612052740497925e-06, + "loss": 0.3661, + "step": 13905 + }, + { + "epoch": 0.8468166732637091, + "grad_norm": 0.9608374223596708, + "learning_rate": 4.761171239065617e-06, + "loss": 0.4793, + "step": 13906 + }, + { + "epoch": 0.8468775690405871, + "grad_norm": 1.002000182482249, + "learning_rate": 4.761137201777808e-06, + "loss": 0.4195, + "step": 13907 + }, + { + "epoch": 0.8469384648174649, + "grad_norm": 1.0493594839201175, + "learning_rate": 4.761103162186399e-06, + "loss": 0.473, + "step": 13908 + }, + { + "epoch": 0.8469993605943428, + "grad_norm": 0.9836582585907899, + "learning_rate": 4.761069120291425e-06, + "loss": 0.4286, + "step": 13909 + }, + { + "epoch": 0.8470602563712206, + "grad_norm": 1.009142598194822, + "learning_rate": 4.761035076092922e-06, + "loss": 0.4018, + "step": 13910 + }, + { + "epoch": 0.8471211521480986, + "grad_norm": 1.0040668509882165, + "learning_rate": 4.761001029590922e-06, + "loss": 0.395, + "step": 13911 + }, + { + "epoch": 0.8471820479249764, + "grad_norm": 1.0182566867884784, + "learning_rate": 4.760966980785461e-06, + "loss": 0.4445, + "step": 13912 + }, + { + "epoch": 0.8472429437018543, + "grad_norm": 1.1235585989400318, + "learning_rate": 4.760932929676575e-06, + "loss": 0.4383, + "step": 13913 + }, + { + "epoch": 0.8473038394787321, + "grad_norm": 0.9643987364156974, + "learning_rate": 4.760898876264297e-06, + "loss": 0.4261, + "step": 13914 + }, + { + "epoch": 0.8473647352556101, + "grad_norm": 0.9527094613798729, + "learning_rate": 4.760864820548662e-06, + "loss": 0.4378, + "step": 13915 + }, + { + "epoch": 0.8474256310324879, + "grad_norm": 0.9088716923575447, + "learning_rate": 4.760830762529705e-06, + "loss": 0.4364, + "step": 13916 + }, + { + "epoch": 0.8474865268093658, + "grad_norm": 1.0129231052799255, + "learning_rate": 4.760796702207461e-06, + "loss": 0.3929, + "step": 13917 + }, + { + "epoch": 0.8475474225862436, + "grad_norm": 0.9883392524098676, + "learning_rate": 4.760762639581964e-06, + "loss": 0.4795, + "step": 13918 + }, + { + "epoch": 0.8476083183631216, + "grad_norm": 0.9800227299476805, + "learning_rate": 4.760728574653249e-06, + "loss": 0.3966, + "step": 13919 + }, + { + "epoch": 0.8476692141399994, + "grad_norm": 1.0234851295763037, + "learning_rate": 4.760694507421352e-06, + "loss": 0.4273, + "step": 13920 + }, + { + "epoch": 0.8477301099168773, + "grad_norm": 0.9884814101096651, + "learning_rate": 4.760660437886304e-06, + "loss": 0.3952, + "step": 13921 + }, + { + "epoch": 0.8477910056937551, + "grad_norm": 1.0466594252425803, + "learning_rate": 4.7606263660481444e-06, + "loss": 0.3555, + "step": 13922 + }, + { + "epoch": 0.847851901470633, + "grad_norm": 0.9853034657529396, + "learning_rate": 4.7605922919069045e-06, + "loss": 0.4909, + "step": 13923 + }, + { + "epoch": 0.8479127972475109, + "grad_norm": 0.9043589784898665, + "learning_rate": 4.760558215462621e-06, + "loss": 0.4366, + "step": 13924 + }, + { + "epoch": 0.8479736930243887, + "grad_norm": 0.9868215724674804, + "learning_rate": 4.760524136715327e-06, + "loss": 0.409, + "step": 13925 + }, + { + "epoch": 0.8480345888012666, + "grad_norm": 0.9497228670847645, + "learning_rate": 4.760490055665058e-06, + "loss": 0.4281, + "step": 13926 + }, + { + "epoch": 0.8480954845781445, + "grad_norm": 0.9940339908842402, + "learning_rate": 4.760455972311849e-06, + "loss": 0.4634, + "step": 13927 + }, + { + "epoch": 0.8481563803550224, + "grad_norm": 1.077449073540843, + "learning_rate": 4.760421886655734e-06, + "loss": 0.4025, + "step": 13928 + }, + { + "epoch": 0.8482172761319002, + "grad_norm": 1.107358937769684, + "learning_rate": 4.760387798696748e-06, + "loss": 0.43, + "step": 13929 + }, + { + "epoch": 0.8482781719087781, + "grad_norm": 1.0199584101396346, + "learning_rate": 4.760353708434927e-06, + "loss": 0.434, + "step": 13930 + }, + { + "epoch": 0.848339067685656, + "grad_norm": 1.0240338243539266, + "learning_rate": 4.760319615870305e-06, + "loss": 0.3935, + "step": 13931 + }, + { + "epoch": 0.8483999634625339, + "grad_norm": 1.1095027986903128, + "learning_rate": 4.760285521002915e-06, + "loss": 0.4781, + "step": 13932 + }, + { + "epoch": 0.8484608592394117, + "grad_norm": 1.0852490347466872, + "learning_rate": 4.760251423832793e-06, + "loss": 0.3411, + "step": 13933 + }, + { + "epoch": 0.8485217550162896, + "grad_norm": 1.200608491823153, + "learning_rate": 4.760217324359975e-06, + "loss": 0.3214, + "step": 13934 + }, + { + "epoch": 0.8485826507931675, + "grad_norm": 1.0970712720226827, + "learning_rate": 4.7601832225844935e-06, + "loss": 0.5147, + "step": 13935 + }, + { + "epoch": 0.8486435465700454, + "grad_norm": 1.0354408711712304, + "learning_rate": 4.760149118506385e-06, + "loss": 0.3895, + "step": 13936 + }, + { + "epoch": 0.8487044423469232, + "grad_norm": 1.0193392788039282, + "learning_rate": 4.760115012125684e-06, + "loss": 0.462, + "step": 13937 + }, + { + "epoch": 0.8487653381238011, + "grad_norm": 1.1066559941724345, + "learning_rate": 4.760080903442423e-06, + "loss": 0.4641, + "step": 13938 + }, + { + "epoch": 0.848826233900679, + "grad_norm": 0.9541305925620216, + "learning_rate": 4.760046792456641e-06, + "loss": 0.452, + "step": 13939 + }, + { + "epoch": 0.8488871296775569, + "grad_norm": 1.0027284404427077, + "learning_rate": 4.760012679168369e-06, + "loss": 0.4431, + "step": 13940 + }, + { + "epoch": 0.8489480254544347, + "grad_norm": 0.9578605742116526, + "learning_rate": 4.759978563577643e-06, + "loss": 0.4415, + "step": 13941 + }, + { + "epoch": 0.8490089212313127, + "grad_norm": 1.0149148389690419, + "learning_rate": 4.759944445684498e-06, + "loss": 0.4502, + "step": 13942 + }, + { + "epoch": 0.8490698170081905, + "grad_norm": 0.9738609172426246, + "learning_rate": 4.7599103254889684e-06, + "loss": 0.4372, + "step": 13943 + }, + { + "epoch": 0.8491307127850684, + "grad_norm": 1.0516779255134163, + "learning_rate": 4.759876202991089e-06, + "loss": 0.4671, + "step": 13944 + }, + { + "epoch": 0.8491916085619462, + "grad_norm": 1.0866368230561927, + "learning_rate": 4.759842078190895e-06, + "loss": 0.4791, + "step": 13945 + }, + { + "epoch": 0.8492525043388242, + "grad_norm": 0.9141235039131221, + "learning_rate": 4.759807951088421e-06, + "loss": 0.4422, + "step": 13946 + }, + { + "epoch": 0.849313400115702, + "grad_norm": 1.063541927341962, + "learning_rate": 4.759773821683702e-06, + "loss": 0.3436, + "step": 13947 + }, + { + "epoch": 0.8493742958925798, + "grad_norm": 0.9488881838186528, + "learning_rate": 4.759739689976772e-06, + "loss": 0.3971, + "step": 13948 + }, + { + "epoch": 0.8494351916694577, + "grad_norm": 0.8942015536256573, + "learning_rate": 4.759705555967666e-06, + "loss": 0.4531, + "step": 13949 + }, + { + "epoch": 0.8494960874463356, + "grad_norm": 0.9605394236043014, + "learning_rate": 4.7596714196564185e-06, + "loss": 0.3975, + "step": 13950 + }, + { + "epoch": 0.8495569832232135, + "grad_norm": 0.9989341787335478, + "learning_rate": 4.759637281043066e-06, + "loss": 0.4812, + "step": 13951 + }, + { + "epoch": 0.8496178790000913, + "grad_norm": 0.9630986272558636, + "learning_rate": 4.759603140127641e-06, + "loss": 0.436, + "step": 13952 + }, + { + "epoch": 0.8496787747769692, + "grad_norm": 0.9865994344206316, + "learning_rate": 4.75956899691018e-06, + "loss": 0.4433, + "step": 13953 + }, + { + "epoch": 0.8497396705538471, + "grad_norm": 1.0196872864878002, + "learning_rate": 4.759534851390716e-06, + "loss": 0.3877, + "step": 13954 + }, + { + "epoch": 0.849800566330725, + "grad_norm": 1.0382571328531558, + "learning_rate": 4.759500703569286e-06, + "loss": 0.353, + "step": 13955 + }, + { + "epoch": 0.8498614621076028, + "grad_norm": 0.9702583217457971, + "learning_rate": 4.759466553445923e-06, + "loss": 0.4581, + "step": 13956 + }, + { + "epoch": 0.8499223578844807, + "grad_norm": 0.9320664784744476, + "learning_rate": 4.759432401020662e-06, + "loss": 0.3607, + "step": 13957 + }, + { + "epoch": 0.8499832536613586, + "grad_norm": 0.9790896987806237, + "learning_rate": 4.75939824629354e-06, + "loss": 0.4654, + "step": 13958 + }, + { + "epoch": 0.8500441494382365, + "grad_norm": 1.0096768764098651, + "learning_rate": 4.759364089264589e-06, + "loss": 0.4002, + "step": 13959 + }, + { + "epoch": 0.8501050452151143, + "grad_norm": 1.0052308668196859, + "learning_rate": 4.7593299299338444e-06, + "loss": 0.4327, + "step": 13960 + }, + { + "epoch": 0.8501659409919922, + "grad_norm": 1.0359214447246083, + "learning_rate": 4.7592957683013415e-06, + "loss": 0.3767, + "step": 13961 + }, + { + "epoch": 0.8502268367688701, + "grad_norm": 1.0312680324982249, + "learning_rate": 4.7592616043671155e-06, + "loss": 0.4089, + "step": 13962 + }, + { + "epoch": 0.850287732545748, + "grad_norm": 0.9998133901762825, + "learning_rate": 4.759227438131201e-06, + "loss": 0.4098, + "step": 13963 + }, + { + "epoch": 0.8503486283226258, + "grad_norm": 0.9935865534097635, + "learning_rate": 4.759193269593633e-06, + "loss": 0.4073, + "step": 13964 + }, + { + "epoch": 0.8504095240995037, + "grad_norm": 1.1486469202122285, + "learning_rate": 4.759159098754444e-06, + "loss": 0.4009, + "step": 13965 + }, + { + "epoch": 0.8504704198763816, + "grad_norm": 1.0015936989216674, + "learning_rate": 4.759124925613672e-06, + "loss": 0.4471, + "step": 13966 + }, + { + "epoch": 0.8505313156532595, + "grad_norm": 1.043390036208736, + "learning_rate": 4.759090750171351e-06, + "loss": 0.4114, + "step": 13967 + }, + { + "epoch": 0.8505922114301373, + "grad_norm": 0.9590432890666265, + "learning_rate": 4.759056572427514e-06, + "loss": 0.4487, + "step": 13968 + }, + { + "epoch": 0.8506531072070151, + "grad_norm": 0.9083458254306219, + "learning_rate": 4.759022392382198e-06, + "loss": 0.423, + "step": 13969 + }, + { + "epoch": 0.8507140029838931, + "grad_norm": 1.0466789310546927, + "learning_rate": 4.758988210035436e-06, + "loss": 0.4261, + "step": 13970 + }, + { + "epoch": 0.850774898760771, + "grad_norm": 1.0213329286118624, + "learning_rate": 4.758954025387265e-06, + "loss": 0.3843, + "step": 13971 + }, + { + "epoch": 0.8508357945376488, + "grad_norm": 1.001375246185868, + "learning_rate": 4.7589198384377175e-06, + "loss": 0.4247, + "step": 13972 + }, + { + "epoch": 0.8508966903145266, + "grad_norm": 0.995783413874708, + "learning_rate": 4.758885649186831e-06, + "loss": 0.436, + "step": 13973 + }, + { + "epoch": 0.8509575860914046, + "grad_norm": 1.003958036038364, + "learning_rate": 4.758851457634638e-06, + "loss": 0.4007, + "step": 13974 + }, + { + "epoch": 0.8510184818682824, + "grad_norm": 1.0508642166712598, + "learning_rate": 4.758817263781173e-06, + "loss": 0.4092, + "step": 13975 + }, + { + "epoch": 0.8510793776451603, + "grad_norm": 1.016756707448115, + "learning_rate": 4.758783067626473e-06, + "loss": 0.4035, + "step": 13976 + }, + { + "epoch": 0.8511402734220381, + "grad_norm": 0.9862923598967659, + "learning_rate": 4.758748869170571e-06, + "loss": 0.4043, + "step": 13977 + }, + { + "epoch": 0.8512011691989161, + "grad_norm": 1.003545438374843, + "learning_rate": 4.758714668413503e-06, + "loss": 0.4198, + "step": 13978 + }, + { + "epoch": 0.8512620649757939, + "grad_norm": 1.0112858846325923, + "learning_rate": 4.7586804653553045e-06, + "loss": 0.4625, + "step": 13979 + }, + { + "epoch": 0.8513229607526718, + "grad_norm": 0.9559285953572779, + "learning_rate": 4.758646259996008e-06, + "loss": 0.4287, + "step": 13980 + }, + { + "epoch": 0.8513838565295496, + "grad_norm": 1.0423994184770582, + "learning_rate": 4.75861205233565e-06, + "loss": 0.4458, + "step": 13981 + }, + { + "epoch": 0.8514447523064276, + "grad_norm": 1.0064282690774926, + "learning_rate": 4.758577842374265e-06, + "loss": 0.4048, + "step": 13982 + }, + { + "epoch": 0.8515056480833054, + "grad_norm": 0.9650945512697064, + "learning_rate": 4.758543630111888e-06, + "loss": 0.4529, + "step": 13983 + }, + { + "epoch": 0.8515665438601833, + "grad_norm": 0.8834503705455306, + "learning_rate": 4.758509415548553e-06, + "loss": 0.4421, + "step": 13984 + }, + { + "epoch": 0.8516274396370612, + "grad_norm": 0.982865871977215, + "learning_rate": 4.758475198684297e-06, + "loss": 0.4149, + "step": 13985 + }, + { + "epoch": 0.8516883354139391, + "grad_norm": 0.9911666060196924, + "learning_rate": 4.758440979519152e-06, + "loss": 0.4307, + "step": 13986 + }, + { + "epoch": 0.8517492311908169, + "grad_norm": 0.9933505251707011, + "learning_rate": 4.758406758053156e-06, + "loss": 0.3652, + "step": 13987 + }, + { + "epoch": 0.8518101269676948, + "grad_norm": 0.9767510857137565, + "learning_rate": 4.758372534286341e-06, + "loss": 0.3861, + "step": 13988 + }, + { + "epoch": 0.8518710227445727, + "grad_norm": 0.971097333126795, + "learning_rate": 4.758338308218743e-06, + "loss": 0.4532, + "step": 13989 + }, + { + "epoch": 0.8519319185214506, + "grad_norm": 1.0328271718696347, + "learning_rate": 4.758304079850396e-06, + "loss": 0.4096, + "step": 13990 + }, + { + "epoch": 0.8519928142983284, + "grad_norm": 1.0168522325440816, + "learning_rate": 4.758269849181337e-06, + "loss": 0.4177, + "step": 13991 + }, + { + "epoch": 0.8520537100752062, + "grad_norm": 0.9842842140264606, + "learning_rate": 4.758235616211599e-06, + "loss": 0.4624, + "step": 13992 + }, + { + "epoch": 0.8521146058520842, + "grad_norm": 1.0663829618734606, + "learning_rate": 4.758201380941219e-06, + "loss": 0.4526, + "step": 13993 + }, + { + "epoch": 0.852175501628962, + "grad_norm": 1.0201306817266644, + "learning_rate": 4.758167143370228e-06, + "loss": 0.4241, + "step": 13994 + }, + { + "epoch": 0.8522363974058399, + "grad_norm": 1.054125756338609, + "learning_rate": 4.758132903498665e-06, + "loss": 0.3881, + "step": 13995 + }, + { + "epoch": 0.8522972931827177, + "grad_norm": 0.9332150659037413, + "learning_rate": 4.758098661326562e-06, + "loss": 0.4095, + "step": 13996 + }, + { + "epoch": 0.8523581889595957, + "grad_norm": 0.9850742110120959, + "learning_rate": 4.7580644168539565e-06, + "loss": 0.4098, + "step": 13997 + }, + { + "epoch": 0.8524190847364735, + "grad_norm": 0.9603006640478216, + "learning_rate": 4.758030170080881e-06, + "loss": 0.4105, + "step": 13998 + }, + { + "epoch": 0.8524799805133514, + "grad_norm": 0.9610911904206996, + "learning_rate": 4.757995921007371e-06, + "loss": 0.4475, + "step": 13999 + }, + { + "epoch": 0.8525408762902292, + "grad_norm": 1.0439355259210825, + "learning_rate": 4.757961669633462e-06, + "loss": 0.4951, + "step": 14000 + }, + { + "epoch": 0.8526017720671072, + "grad_norm": 1.0707373519110992, + "learning_rate": 4.757927415959189e-06, + "loss": 0.3733, + "step": 14001 + }, + { + "epoch": 0.852662667843985, + "grad_norm": 1.035437681758609, + "learning_rate": 4.757893159984586e-06, + "loss": 0.4472, + "step": 14002 + }, + { + "epoch": 0.8527235636208629, + "grad_norm": 1.1096511459081684, + "learning_rate": 4.757858901709689e-06, + "loss": 0.414, + "step": 14003 + }, + { + "epoch": 0.8527844593977407, + "grad_norm": 0.9564707234396949, + "learning_rate": 4.757824641134531e-06, + "loss": 0.4046, + "step": 14004 + }, + { + "epoch": 0.8528453551746187, + "grad_norm": 1.0118822470032713, + "learning_rate": 4.75779037825915e-06, + "loss": 0.408, + "step": 14005 + }, + { + "epoch": 0.8529062509514965, + "grad_norm": 0.9455087388057719, + "learning_rate": 4.7577561130835775e-06, + "loss": 0.4011, + "step": 14006 + }, + { + "epoch": 0.8529671467283744, + "grad_norm": 1.045479159020315, + "learning_rate": 4.757721845607851e-06, + "loss": 0.4446, + "step": 14007 + }, + { + "epoch": 0.8530280425052522, + "grad_norm": 0.9796763497581727, + "learning_rate": 4.757687575832004e-06, + "loss": 0.4068, + "step": 14008 + }, + { + "epoch": 0.8530889382821302, + "grad_norm": 1.004583660363609, + "learning_rate": 4.757653303756072e-06, + "loss": 0.4265, + "step": 14009 + }, + { + "epoch": 0.853149834059008, + "grad_norm": 1.060285235060137, + "learning_rate": 4.757619029380089e-06, + "loss": 0.422, + "step": 14010 + }, + { + "epoch": 0.8532107298358859, + "grad_norm": 0.9790290706269182, + "learning_rate": 4.757584752704092e-06, + "loss": 0.4697, + "step": 14011 + }, + { + "epoch": 0.8532716256127637, + "grad_norm": 0.9684321370077192, + "learning_rate": 4.757550473728114e-06, + "loss": 0.4064, + "step": 14012 + }, + { + "epoch": 0.8533325213896417, + "grad_norm": 1.0081389527016287, + "learning_rate": 4.75751619245219e-06, + "loss": 0.4749, + "step": 14013 + }, + { + "epoch": 0.8533934171665195, + "grad_norm": 1.0562389827846699, + "learning_rate": 4.757481908876356e-06, + "loss": 0.4251, + "step": 14014 + }, + { + "epoch": 0.8534543129433974, + "grad_norm": 0.9852927350001829, + "learning_rate": 4.757447623000646e-06, + "loss": 0.4077, + "step": 14015 + }, + { + "epoch": 0.8535152087202752, + "grad_norm": 1.019935333110812, + "learning_rate": 4.7574133348250955e-06, + "loss": 0.4222, + "step": 14016 + }, + { + "epoch": 0.8535761044971532, + "grad_norm": 0.984061328281806, + "learning_rate": 4.757379044349739e-06, + "loss": 0.386, + "step": 14017 + }, + { + "epoch": 0.853637000274031, + "grad_norm": 0.9766380297997526, + "learning_rate": 4.757344751574612e-06, + "loss": 0.4079, + "step": 14018 + }, + { + "epoch": 0.8536978960509088, + "grad_norm": 0.9609642104445718, + "learning_rate": 4.7573104564997494e-06, + "loss": 0.3877, + "step": 14019 + }, + { + "epoch": 0.8537587918277867, + "grad_norm": 0.89595863208561, + "learning_rate": 4.757276159125186e-06, + "loss": 0.4741, + "step": 14020 + }, + { + "epoch": 0.8538196876046646, + "grad_norm": 1.000400728338152, + "learning_rate": 4.7572418594509555e-06, + "loss": 0.3699, + "step": 14021 + }, + { + "epoch": 0.8538805833815425, + "grad_norm": 0.9452010827170487, + "learning_rate": 4.757207557477095e-06, + "loss": 0.4047, + "step": 14022 + }, + { + "epoch": 0.8539414791584203, + "grad_norm": 1.0002890388292658, + "learning_rate": 4.757173253203637e-06, + "loss": 0.407, + "step": 14023 + }, + { + "epoch": 0.8540023749352983, + "grad_norm": 0.995535838673118, + "learning_rate": 4.757138946630619e-06, + "loss": 0.4107, + "step": 14024 + }, + { + "epoch": 0.8540632707121761, + "grad_norm": 0.9487858292901878, + "learning_rate": 4.7571046377580745e-06, + "loss": 0.3963, + "step": 14025 + }, + { + "epoch": 0.854124166489054, + "grad_norm": 1.0707819362540487, + "learning_rate": 4.757070326586038e-06, + "loss": 0.3938, + "step": 14026 + }, + { + "epoch": 0.8541850622659318, + "grad_norm": 1.0700425934866176, + "learning_rate": 4.757036013114546e-06, + "loss": 0.4421, + "step": 14027 + }, + { + "epoch": 0.8542459580428098, + "grad_norm": 1.03853669672067, + "learning_rate": 4.757001697343633e-06, + "loss": 0.3594, + "step": 14028 + }, + { + "epoch": 0.8543068538196876, + "grad_norm": 1.0360748657170515, + "learning_rate": 4.756967379273333e-06, + "loss": 0.4149, + "step": 14029 + }, + { + "epoch": 0.8543677495965655, + "grad_norm": 1.025281904187832, + "learning_rate": 4.756933058903681e-06, + "loss": 0.3912, + "step": 14030 + }, + { + "epoch": 0.8544286453734433, + "grad_norm": 1.0235801580146635, + "learning_rate": 4.7568987362347135e-06, + "loss": 0.354, + "step": 14031 + }, + { + "epoch": 0.8544895411503213, + "grad_norm": 0.9907292115627687, + "learning_rate": 4.756864411266464e-06, + "loss": 0.3983, + "step": 14032 + }, + { + "epoch": 0.8545504369271991, + "grad_norm": 0.9689359371533346, + "learning_rate": 4.7568300839989685e-06, + "loss": 0.4575, + "step": 14033 + }, + { + "epoch": 0.854611332704077, + "grad_norm": 1.0339043201058293, + "learning_rate": 4.756795754432261e-06, + "loss": 0.4009, + "step": 14034 + }, + { + "epoch": 0.8546722284809548, + "grad_norm": 0.9818804303004974, + "learning_rate": 4.756761422566377e-06, + "loss": 0.4334, + "step": 14035 + }, + { + "epoch": 0.8547331242578328, + "grad_norm": 1.073042157743819, + "learning_rate": 4.756727088401351e-06, + "loss": 0.294, + "step": 14036 + }, + { + "epoch": 0.8547940200347106, + "grad_norm": 1.0376232049063427, + "learning_rate": 4.7566927519372184e-06, + "loss": 0.3216, + "step": 14037 + }, + { + "epoch": 0.8548549158115885, + "grad_norm": 0.9411437089719284, + "learning_rate": 4.756658413174015e-06, + "loss": 0.4261, + "step": 14038 + }, + { + "epoch": 0.8549158115884663, + "grad_norm": 1.033004136635273, + "learning_rate": 4.756624072111774e-06, + "loss": 0.4394, + "step": 14039 + }, + { + "epoch": 0.8549767073653443, + "grad_norm": 1.020885728088282, + "learning_rate": 4.756589728750532e-06, + "loss": 0.3721, + "step": 14040 + }, + { + "epoch": 0.8550376031422221, + "grad_norm": 1.0055861385467508, + "learning_rate": 4.7565553830903225e-06, + "loss": 0.3767, + "step": 14041 + }, + { + "epoch": 0.8550984989191, + "grad_norm": 1.0585353465391913, + "learning_rate": 4.756521035131182e-06, + "loss": 0.4052, + "step": 14042 + }, + { + "epoch": 0.8551593946959778, + "grad_norm": 1.0227456575007172, + "learning_rate": 4.756486684873144e-06, + "loss": 0.3213, + "step": 14043 + }, + { + "epoch": 0.8552202904728557, + "grad_norm": 0.9716193889623629, + "learning_rate": 4.756452332316245e-06, + "loss": 0.4526, + "step": 14044 + }, + { + "epoch": 0.8552811862497336, + "grad_norm": 1.0888219502375367, + "learning_rate": 4.756417977460519e-06, + "loss": 0.4265, + "step": 14045 + }, + { + "epoch": 0.8553420820266114, + "grad_norm": 0.9602620457042285, + "learning_rate": 4.756383620306002e-06, + "loss": 0.4128, + "step": 14046 + }, + { + "epoch": 0.8554029778034893, + "grad_norm": 0.9735252676668618, + "learning_rate": 4.756349260852728e-06, + "loss": 0.4197, + "step": 14047 + }, + { + "epoch": 0.8554638735803672, + "grad_norm": 1.0606047338935605, + "learning_rate": 4.756314899100731e-06, + "loss": 0.3927, + "step": 14048 + }, + { + "epoch": 0.8555247693572451, + "grad_norm": 0.9657624036887292, + "learning_rate": 4.7562805350500485e-06, + "loss": 0.4603, + "step": 14049 + }, + { + "epoch": 0.8555856651341229, + "grad_norm": 0.9603846135435387, + "learning_rate": 4.756246168700714e-06, + "loss": 0.4371, + "step": 14050 + }, + { + "epoch": 0.8556465609110008, + "grad_norm": 1.048895433761498, + "learning_rate": 4.7562118000527624e-06, + "loss": 0.3633, + "step": 14051 + }, + { + "epoch": 0.8557074566878787, + "grad_norm": 1.0061532057917182, + "learning_rate": 4.75617742910623e-06, + "loss": 0.3823, + "step": 14052 + }, + { + "epoch": 0.8557683524647566, + "grad_norm": 0.9278482699938039, + "learning_rate": 4.7561430558611505e-06, + "loss": 0.4211, + "step": 14053 + }, + { + "epoch": 0.8558292482416344, + "grad_norm": 0.9369403511477437, + "learning_rate": 4.75610868031756e-06, + "loss": 0.4304, + "step": 14054 + }, + { + "epoch": 0.8558901440185123, + "grad_norm": 1.000128293972222, + "learning_rate": 4.7560743024754915e-06, + "loss": 0.3884, + "step": 14055 + }, + { + "epoch": 0.8559510397953902, + "grad_norm": 1.0024100531451638, + "learning_rate": 4.756039922334982e-06, + "loss": 0.4148, + "step": 14056 + }, + { + "epoch": 0.8560119355722681, + "grad_norm": 1.1105955630113145, + "learning_rate": 4.756005539896066e-06, + "loss": 0.3837, + "step": 14057 + }, + { + "epoch": 0.8560728313491459, + "grad_norm": 0.931905393229969, + "learning_rate": 4.755971155158778e-06, + "loss": 0.4729, + "step": 14058 + }, + { + "epoch": 0.8561337271260238, + "grad_norm": 1.0358416587695012, + "learning_rate": 4.755936768123155e-06, + "loss": 0.4414, + "step": 14059 + }, + { + "epoch": 0.8561946229029017, + "grad_norm": 0.935376137675858, + "learning_rate": 4.7559023787892285e-06, + "loss": 0.4807, + "step": 14060 + }, + { + "epoch": 0.8562555186797796, + "grad_norm": 1.02311757548586, + "learning_rate": 4.755867987157037e-06, + "loss": 0.4257, + "step": 14061 + }, + { + "epoch": 0.8563164144566574, + "grad_norm": 0.9232477591580349, + "learning_rate": 4.755833593226614e-06, + "loss": 0.433, + "step": 14062 + }, + { + "epoch": 0.8563773102335352, + "grad_norm": 0.9482034605803383, + "learning_rate": 4.755799196997994e-06, + "loss": 0.3817, + "step": 14063 + }, + { + "epoch": 0.8564382060104132, + "grad_norm": 1.0073902104922963, + "learning_rate": 4.755764798471213e-06, + "loss": 0.3369, + "step": 14064 + }, + { + "epoch": 0.856499101787291, + "grad_norm": 0.8994731543266342, + "learning_rate": 4.755730397646306e-06, + "loss": 0.4319, + "step": 14065 + }, + { + "epoch": 0.8565599975641689, + "grad_norm": 0.9702855882321023, + "learning_rate": 4.755695994523307e-06, + "loss": 0.3885, + "step": 14066 + }, + { + "epoch": 0.8566208933410469, + "grad_norm": 0.9391277351251075, + "learning_rate": 4.7556615891022525e-06, + "loss": 0.4202, + "step": 14067 + }, + { + "epoch": 0.8566817891179247, + "grad_norm": 0.9394065750627769, + "learning_rate": 4.755627181383177e-06, + "loss": 0.3767, + "step": 14068 + }, + { + "epoch": 0.8567426848948025, + "grad_norm": 0.9423821027598617, + "learning_rate": 4.7555927713661156e-06, + "loss": 0.4339, + "step": 14069 + }, + { + "epoch": 0.8568035806716804, + "grad_norm": 0.9789368009047269, + "learning_rate": 4.7555583590511024e-06, + "loss": 0.4252, + "step": 14070 + }, + { + "epoch": 0.8568644764485583, + "grad_norm": 1.033706110333744, + "learning_rate": 4.755523944438173e-06, + "loss": 0.4294, + "step": 14071 + }, + { + "epoch": 0.8569253722254362, + "grad_norm": 0.9354091537228418, + "learning_rate": 4.755489527527364e-06, + "loss": 0.3829, + "step": 14072 + }, + { + "epoch": 0.856986268002314, + "grad_norm": 1.0461525710729, + "learning_rate": 4.755455108318708e-06, + "loss": 0.3765, + "step": 14073 + }, + { + "epoch": 0.8570471637791919, + "grad_norm": 0.965808820125706, + "learning_rate": 4.755420686812241e-06, + "loss": 0.4715, + "step": 14074 + }, + { + "epoch": 0.8571080595560698, + "grad_norm": 1.0290338620817487, + "learning_rate": 4.7553862630079994e-06, + "loss": 0.4188, + "step": 14075 + }, + { + "epoch": 0.8571689553329477, + "grad_norm": 0.9121597862000657, + "learning_rate": 4.755351836906017e-06, + "loss": 0.4919, + "step": 14076 + }, + { + "epoch": 0.8572298511098255, + "grad_norm": 0.9407696656223788, + "learning_rate": 4.755317408506328e-06, + "loss": 0.4062, + "step": 14077 + }, + { + "epoch": 0.8572907468867034, + "grad_norm": 1.022208110531049, + "learning_rate": 4.755282977808969e-06, + "loss": 0.4149, + "step": 14078 + }, + { + "epoch": 0.8573516426635813, + "grad_norm": 0.9514623216508314, + "learning_rate": 4.7552485448139755e-06, + "loss": 0.4106, + "step": 14079 + }, + { + "epoch": 0.8574125384404592, + "grad_norm": 1.0898964818721504, + "learning_rate": 4.755214109521381e-06, + "loss": 0.4839, + "step": 14080 + }, + { + "epoch": 0.857473434217337, + "grad_norm": 1.040606150092237, + "learning_rate": 4.755179671931221e-06, + "loss": 0.3704, + "step": 14081 + }, + { + "epoch": 0.8575343299942149, + "grad_norm": 1.0065694556120701, + "learning_rate": 4.755145232043531e-06, + "loss": 0.3985, + "step": 14082 + }, + { + "epoch": 0.8575952257710928, + "grad_norm": 1.0624029763509182, + "learning_rate": 4.7551107898583456e-06, + "loss": 0.3984, + "step": 14083 + }, + { + "epoch": 0.8576561215479707, + "grad_norm": 1.0123178462419282, + "learning_rate": 4.755076345375701e-06, + "loss": 0.3833, + "step": 14084 + }, + { + "epoch": 0.8577170173248485, + "grad_norm": 1.0106504541794692, + "learning_rate": 4.755041898595631e-06, + "loss": 0.4016, + "step": 14085 + }, + { + "epoch": 0.8577779131017264, + "grad_norm": 1.050307563367751, + "learning_rate": 4.755007449518172e-06, + "loss": 0.4134, + "step": 14086 + }, + { + "epoch": 0.8578388088786043, + "grad_norm": 1.003355929800399, + "learning_rate": 4.754972998143358e-06, + "loss": 0.411, + "step": 14087 + }, + { + "epoch": 0.8578997046554822, + "grad_norm": 1.083252420601163, + "learning_rate": 4.754938544471223e-06, + "loss": 0.3807, + "step": 14088 + }, + { + "epoch": 0.85796060043236, + "grad_norm": 0.9524834795723431, + "learning_rate": 4.754904088501805e-06, + "loss": 0.4634, + "step": 14089 + }, + { + "epoch": 0.8580214962092378, + "grad_norm": 0.9707557172926664, + "learning_rate": 4.7548696302351365e-06, + "loss": 0.3901, + "step": 14090 + }, + { + "epoch": 0.8580823919861158, + "grad_norm": 1.032517712542958, + "learning_rate": 4.754835169671255e-06, + "loss": 0.4179, + "step": 14091 + }, + { + "epoch": 0.8581432877629936, + "grad_norm": 0.9828920727356496, + "learning_rate": 4.754800706810193e-06, + "loss": 0.3977, + "step": 14092 + }, + { + "epoch": 0.8582041835398715, + "grad_norm": 1.0155887748819625, + "learning_rate": 4.754766241651988e-06, + "loss": 0.3394, + "step": 14093 + }, + { + "epoch": 0.8582650793167493, + "grad_norm": 1.1019287843323395, + "learning_rate": 4.754731774196673e-06, + "loss": 0.3897, + "step": 14094 + }, + { + "epoch": 0.8583259750936273, + "grad_norm": 1.0239915634836094, + "learning_rate": 4.754697304444285e-06, + "loss": 0.381, + "step": 14095 + }, + { + "epoch": 0.8583868708705051, + "grad_norm": 1.0095861445691272, + "learning_rate": 4.754662832394858e-06, + "loss": 0.4263, + "step": 14096 + }, + { + "epoch": 0.858447766647383, + "grad_norm": 0.9483654233346034, + "learning_rate": 4.754628358048427e-06, + "loss": 0.4077, + "step": 14097 + }, + { + "epoch": 0.8585086624242608, + "grad_norm": 0.9781894798896903, + "learning_rate": 4.754593881405028e-06, + "loss": 0.4502, + "step": 14098 + }, + { + "epoch": 0.8585695582011388, + "grad_norm": 0.9357454068692527, + "learning_rate": 4.754559402464696e-06, + "loss": 0.4659, + "step": 14099 + }, + { + "epoch": 0.8586304539780166, + "grad_norm": 0.9992690777713152, + "learning_rate": 4.754524921227465e-06, + "loss": 0.4313, + "step": 14100 + }, + { + "epoch": 0.8586913497548945, + "grad_norm": 1.1037214980651875, + "learning_rate": 4.754490437693371e-06, + "loss": 0.3726, + "step": 14101 + }, + { + "epoch": 0.8587522455317723, + "grad_norm": 1.1070041606092553, + "learning_rate": 4.754455951862449e-06, + "loss": 0.3981, + "step": 14102 + }, + { + "epoch": 0.8588131413086503, + "grad_norm": 1.0002504927764582, + "learning_rate": 4.754421463734734e-06, + "loss": 0.4149, + "step": 14103 + }, + { + "epoch": 0.8588740370855281, + "grad_norm": 1.0502541330006367, + "learning_rate": 4.754386973310261e-06, + "loss": 0.4051, + "step": 14104 + }, + { + "epoch": 0.858934932862406, + "grad_norm": 1.0180162379025703, + "learning_rate": 4.754352480589067e-06, + "loss": 0.406, + "step": 14105 + }, + { + "epoch": 0.8589958286392839, + "grad_norm": 1.0934353486501813, + "learning_rate": 4.754317985571184e-06, + "loss": 0.4026, + "step": 14106 + }, + { + "epoch": 0.8590567244161618, + "grad_norm": 1.0038259926043334, + "learning_rate": 4.754283488256649e-06, + "loss": 0.3749, + "step": 14107 + }, + { + "epoch": 0.8591176201930396, + "grad_norm": 1.1486232330937745, + "learning_rate": 4.754248988645498e-06, + "loss": 0.3705, + "step": 14108 + }, + { + "epoch": 0.8591785159699175, + "grad_norm": 1.005670162520737, + "learning_rate": 4.7542144867377636e-06, + "loss": 0.408, + "step": 14109 + }, + { + "epoch": 0.8592394117467954, + "grad_norm": 0.9976388242333479, + "learning_rate": 4.754179982533483e-06, + "loss": 0.4288, + "step": 14110 + }, + { + "epoch": 0.8593003075236733, + "grad_norm": 0.99905034546202, + "learning_rate": 4.75414547603269e-06, + "loss": 0.4051, + "step": 14111 + }, + { + "epoch": 0.8593612033005511, + "grad_norm": 1.0776930872892905, + "learning_rate": 4.754110967235421e-06, + "loss": 0.3838, + "step": 14112 + }, + { + "epoch": 0.859422099077429, + "grad_norm": 1.026357019122579, + "learning_rate": 4.75407645614171e-06, + "loss": 0.4288, + "step": 14113 + }, + { + "epoch": 0.8594829948543069, + "grad_norm": 1.0490601910710045, + "learning_rate": 4.754041942751594e-06, + "loss": 0.4141, + "step": 14114 + }, + { + "epoch": 0.8595438906311847, + "grad_norm": 0.9861702299817668, + "learning_rate": 4.754007427065107e-06, + "loss": 0.3654, + "step": 14115 + }, + { + "epoch": 0.8596047864080626, + "grad_norm": 1.0314718938844778, + "learning_rate": 4.753972909082283e-06, + "loss": 0.437, + "step": 14116 + }, + { + "epoch": 0.8596656821849404, + "grad_norm": 1.0390550796354903, + "learning_rate": 4.753938388803159e-06, + "loss": 0.4314, + "step": 14117 + }, + { + "epoch": 0.8597265779618184, + "grad_norm": 0.9798020741804969, + "learning_rate": 4.753903866227769e-06, + "loss": 0.4012, + "step": 14118 + }, + { + "epoch": 0.8597874737386962, + "grad_norm": 0.9883960374484567, + "learning_rate": 4.7538693413561485e-06, + "loss": 0.4143, + "step": 14119 + }, + { + "epoch": 0.8598483695155741, + "grad_norm": 1.0507158435924657, + "learning_rate": 4.753834814188333e-06, + "loss": 0.3843, + "step": 14120 + }, + { + "epoch": 0.8599092652924519, + "grad_norm": 0.9483895647071777, + "learning_rate": 4.753800284724357e-06, + "loss": 0.3694, + "step": 14121 + }, + { + "epoch": 0.8599701610693299, + "grad_norm": 0.9712971871962569, + "learning_rate": 4.753765752964257e-06, + "loss": 0.3513, + "step": 14122 + }, + { + "epoch": 0.8600310568462077, + "grad_norm": 1.136098649008431, + "learning_rate": 4.753731218908066e-06, + "loss": 0.3874, + "step": 14123 + }, + { + "epoch": 0.8600919526230856, + "grad_norm": 1.004681122359956, + "learning_rate": 4.753696682555822e-06, + "loss": 0.3887, + "step": 14124 + }, + { + "epoch": 0.8601528483999634, + "grad_norm": 1.0243127637083373, + "learning_rate": 4.753662143907558e-06, + "loss": 0.4422, + "step": 14125 + }, + { + "epoch": 0.8602137441768414, + "grad_norm": 0.9998281402213173, + "learning_rate": 4.75362760296331e-06, + "loss": 0.5365, + "step": 14126 + }, + { + "epoch": 0.8602746399537192, + "grad_norm": 0.9862071707108815, + "learning_rate": 4.753593059723113e-06, + "loss": 0.4357, + "step": 14127 + }, + { + "epoch": 0.8603355357305971, + "grad_norm": 0.9141221573558151, + "learning_rate": 4.753558514187003e-06, + "loss": 0.4874, + "step": 14128 + }, + { + "epoch": 0.8603964315074749, + "grad_norm": 1.0199284883618982, + "learning_rate": 4.753523966355013e-06, + "loss": 0.3743, + "step": 14129 + }, + { + "epoch": 0.8604573272843529, + "grad_norm": 0.9448367787358394, + "learning_rate": 4.753489416227179e-06, + "loss": 0.4464, + "step": 14130 + }, + { + "epoch": 0.8605182230612307, + "grad_norm": 1.0915725562453598, + "learning_rate": 4.753454863803539e-06, + "loss": 0.3728, + "step": 14131 + }, + { + "epoch": 0.8605791188381086, + "grad_norm": 0.9337673306093512, + "learning_rate": 4.753420309084125e-06, + "loss": 0.4303, + "step": 14132 + }, + { + "epoch": 0.8606400146149864, + "grad_norm": 0.9630721813211156, + "learning_rate": 4.753385752068973e-06, + "loss": 0.4178, + "step": 14133 + }, + { + "epoch": 0.8607009103918644, + "grad_norm": 0.9509920806871713, + "learning_rate": 4.753351192758119e-06, + "loss": 0.433, + "step": 14134 + }, + { + "epoch": 0.8607618061687422, + "grad_norm": 1.244476609296183, + "learning_rate": 4.7533166311515975e-06, + "loss": 0.4444, + "step": 14135 + }, + { + "epoch": 0.86082270194562, + "grad_norm": 0.947889618585394, + "learning_rate": 4.753282067249444e-06, + "loss": 0.4419, + "step": 14136 + }, + { + "epoch": 0.8608835977224979, + "grad_norm": 1.1322706919420078, + "learning_rate": 4.7532475010516935e-06, + "loss": 0.377, + "step": 14137 + }, + { + "epoch": 0.8609444934993759, + "grad_norm": 0.9381695428330157, + "learning_rate": 4.753212932558381e-06, + "loss": 0.3748, + "step": 14138 + }, + { + "epoch": 0.8610053892762537, + "grad_norm": 0.9792442439465211, + "learning_rate": 4.753178361769542e-06, + "loss": 0.4846, + "step": 14139 + }, + { + "epoch": 0.8610662850531315, + "grad_norm": 0.9952201308564917, + "learning_rate": 4.753143788685212e-06, + "loss": 0.4794, + "step": 14140 + }, + { + "epoch": 0.8611271808300094, + "grad_norm": 1.027525517916498, + "learning_rate": 4.753109213305425e-06, + "loss": 0.4081, + "step": 14141 + }, + { + "epoch": 0.8611880766068873, + "grad_norm": 1.0392197638790155, + "learning_rate": 4.7530746356302195e-06, + "loss": 0.3755, + "step": 14142 + }, + { + "epoch": 0.8612489723837652, + "grad_norm": 0.9879900897638734, + "learning_rate": 4.753040055659627e-06, + "loss": 0.3814, + "step": 14143 + }, + { + "epoch": 0.861309868160643, + "grad_norm": 1.047841673652561, + "learning_rate": 4.753005473393684e-06, + "loss": 0.3706, + "step": 14144 + }, + { + "epoch": 0.8613707639375209, + "grad_norm": 1.0905700929282238, + "learning_rate": 4.752970888832425e-06, + "loss": 0.3638, + "step": 14145 + }, + { + "epoch": 0.8614316597143988, + "grad_norm": 1.0329191258243253, + "learning_rate": 4.752936301975888e-06, + "loss": 0.402, + "step": 14146 + }, + { + "epoch": 0.8614925554912767, + "grad_norm": 1.0151634585563674, + "learning_rate": 4.752901712824105e-06, + "loss": 0.4159, + "step": 14147 + }, + { + "epoch": 0.8615534512681545, + "grad_norm": 0.9695726425538382, + "learning_rate": 4.752867121377113e-06, + "loss": 0.4346, + "step": 14148 + }, + { + "epoch": 0.8616143470450325, + "grad_norm": 1.1197464204988457, + "learning_rate": 4.7528325276349465e-06, + "loss": 0.4145, + "step": 14149 + }, + { + "epoch": 0.8616752428219103, + "grad_norm": 1.0539474348363347, + "learning_rate": 4.752797931597642e-06, + "loss": 0.3511, + "step": 14150 + }, + { + "epoch": 0.8617361385987882, + "grad_norm": 0.9719157169938533, + "learning_rate": 4.752763333265233e-06, + "loss": 0.4214, + "step": 14151 + }, + { + "epoch": 0.861797034375666, + "grad_norm": 0.9619694157884096, + "learning_rate": 4.7527287326377555e-06, + "loss": 0.4151, + "step": 14152 + }, + { + "epoch": 0.861857930152544, + "grad_norm": 1.0135815056998745, + "learning_rate": 4.752694129715245e-06, + "loss": 0.4047, + "step": 14153 + }, + { + "epoch": 0.8619188259294218, + "grad_norm": 0.9936058554078275, + "learning_rate": 4.7526595244977364e-06, + "loss": 0.3433, + "step": 14154 + }, + { + "epoch": 0.8619797217062997, + "grad_norm": 0.9981325628005401, + "learning_rate": 4.7526249169852655e-06, + "loss": 0.3767, + "step": 14155 + }, + { + "epoch": 0.8620406174831775, + "grad_norm": 1.026313810723485, + "learning_rate": 4.7525903071778665e-06, + "loss": 0.3567, + "step": 14156 + }, + { + "epoch": 0.8621015132600555, + "grad_norm": 1.0521859110153233, + "learning_rate": 4.752555695075576e-06, + "loss": 0.3645, + "step": 14157 + }, + { + "epoch": 0.8621624090369333, + "grad_norm": 1.048458720340386, + "learning_rate": 4.7525210806784285e-06, + "loss": 0.3733, + "step": 14158 + }, + { + "epoch": 0.8622233048138112, + "grad_norm": 0.9186071843569338, + "learning_rate": 4.752486463986458e-06, + "loss": 0.3954, + "step": 14159 + }, + { + "epoch": 0.862284200590689, + "grad_norm": 0.9728939641371933, + "learning_rate": 4.752451844999703e-06, + "loss": 0.4859, + "step": 14160 + }, + { + "epoch": 0.862345096367567, + "grad_norm": 1.0747705402911483, + "learning_rate": 4.752417223718197e-06, + "loss": 0.4026, + "step": 14161 + }, + { + "epoch": 0.8624059921444448, + "grad_norm": 1.110957309199019, + "learning_rate": 4.752382600141974e-06, + "loss": 0.3908, + "step": 14162 + }, + { + "epoch": 0.8624668879213226, + "grad_norm": 0.9913114731196313, + "learning_rate": 4.752347974271071e-06, + "loss": 0.4515, + "step": 14163 + }, + { + "epoch": 0.8625277836982005, + "grad_norm": 0.9965158158588485, + "learning_rate": 4.752313346105522e-06, + "loss": 0.4164, + "step": 14164 + }, + { + "epoch": 0.8625886794750784, + "grad_norm": 0.9733236113347703, + "learning_rate": 4.752278715645364e-06, + "loss": 0.404, + "step": 14165 + }, + { + "epoch": 0.8626495752519563, + "grad_norm": 0.9845612699720149, + "learning_rate": 4.75224408289063e-06, + "loss": 0.3762, + "step": 14166 + }, + { + "epoch": 0.8627104710288341, + "grad_norm": 1.0317353161069913, + "learning_rate": 4.752209447841358e-06, + "loss": 0.4019, + "step": 14167 + }, + { + "epoch": 0.862771366805712, + "grad_norm": 1.0449663061844445, + "learning_rate": 4.752174810497581e-06, + "loss": 0.4284, + "step": 14168 + }, + { + "epoch": 0.8628322625825899, + "grad_norm": 0.9155977455715557, + "learning_rate": 4.752140170859335e-06, + "loss": 0.4238, + "step": 14169 + }, + { + "epoch": 0.8628931583594678, + "grad_norm": 1.0627808726393986, + "learning_rate": 4.752105528926656e-06, + "loss": 0.3482, + "step": 14170 + }, + { + "epoch": 0.8629540541363456, + "grad_norm": 1.016940089352355, + "learning_rate": 4.7520708846995785e-06, + "loss": 0.4314, + "step": 14171 + }, + { + "epoch": 0.8630149499132235, + "grad_norm": 0.9805359516165089, + "learning_rate": 4.752036238178139e-06, + "loss": 0.4316, + "step": 14172 + }, + { + "epoch": 0.8630758456901014, + "grad_norm": 1.048128597071623, + "learning_rate": 4.752001589362369e-06, + "loss": 0.4525, + "step": 14173 + }, + { + "epoch": 0.8631367414669793, + "grad_norm": 0.9405976180922178, + "learning_rate": 4.75196693825231e-06, + "loss": 0.3899, + "step": 14174 + }, + { + "epoch": 0.8631976372438571, + "grad_norm": 1.000532677466723, + "learning_rate": 4.751932284847991e-06, + "loss": 0.3641, + "step": 14175 + }, + { + "epoch": 0.863258533020735, + "grad_norm": 0.8922678004378656, + "learning_rate": 4.751897629149451e-06, + "loss": 0.4829, + "step": 14176 + }, + { + "epoch": 0.8633194287976129, + "grad_norm": 0.9244112186393043, + "learning_rate": 4.7518629711567255e-06, + "loss": 0.4522, + "step": 14177 + }, + { + "epoch": 0.8633803245744908, + "grad_norm": 1.0608701742373678, + "learning_rate": 4.751828310869848e-06, + "loss": 0.3477, + "step": 14178 + }, + { + "epoch": 0.8634412203513686, + "grad_norm": 0.9853285246341431, + "learning_rate": 4.751793648288855e-06, + "loss": 0.3675, + "step": 14179 + }, + { + "epoch": 0.8635021161282465, + "grad_norm": 0.978321563305757, + "learning_rate": 4.7517589834137815e-06, + "loss": 0.4855, + "step": 14180 + }, + { + "epoch": 0.8635630119051244, + "grad_norm": 1.0033013218283102, + "learning_rate": 4.751724316244662e-06, + "loss": 0.3558, + "step": 14181 + }, + { + "epoch": 0.8636239076820023, + "grad_norm": 1.0448356610861929, + "learning_rate": 4.751689646781534e-06, + "loss": 0.4478, + "step": 14182 + }, + { + "epoch": 0.8636848034588801, + "grad_norm": 1.0955280921000632, + "learning_rate": 4.7516549750244306e-06, + "loss": 0.378, + "step": 14183 + }, + { + "epoch": 0.863745699235758, + "grad_norm": 0.9958489996599663, + "learning_rate": 4.751620300973387e-06, + "loss": 0.4319, + "step": 14184 + }, + { + "epoch": 0.8638065950126359, + "grad_norm": 0.995257627415673, + "learning_rate": 4.751585624628441e-06, + "loss": 0.4317, + "step": 14185 + }, + { + "epoch": 0.8638674907895137, + "grad_norm": 1.099670727895086, + "learning_rate": 4.751550945989626e-06, + "loss": 0.3327, + "step": 14186 + }, + { + "epoch": 0.8639283865663916, + "grad_norm": 0.94818889843359, + "learning_rate": 4.7515162650569776e-06, + "loss": 0.3877, + "step": 14187 + }, + { + "epoch": 0.8639892823432695, + "grad_norm": 1.0058311215661222, + "learning_rate": 4.751481581830531e-06, + "loss": 0.3452, + "step": 14188 + }, + { + "epoch": 0.8640501781201474, + "grad_norm": 0.987742882740327, + "learning_rate": 4.751446896310322e-06, + "loss": 0.4334, + "step": 14189 + }, + { + "epoch": 0.8641110738970252, + "grad_norm": 1.0366055038707322, + "learning_rate": 4.751412208496385e-06, + "loss": 0.4488, + "step": 14190 + }, + { + "epoch": 0.8641719696739031, + "grad_norm": 1.0294509542352837, + "learning_rate": 4.751377518388757e-06, + "loss": 0.4026, + "step": 14191 + }, + { + "epoch": 0.864232865450781, + "grad_norm": 0.9730649227309303, + "learning_rate": 4.751342825987472e-06, + "loss": 0.4217, + "step": 14192 + }, + { + "epoch": 0.8642937612276589, + "grad_norm": 0.9805901961367383, + "learning_rate": 4.751308131292566e-06, + "loss": 0.4103, + "step": 14193 + }, + { + "epoch": 0.8643546570045367, + "grad_norm": 1.0381155001499212, + "learning_rate": 4.7512734343040735e-06, + "loss": 0.4126, + "step": 14194 + }, + { + "epoch": 0.8644155527814146, + "grad_norm": 0.9749744506647263, + "learning_rate": 4.751238735022031e-06, + "loss": 0.458, + "step": 14195 + }, + { + "epoch": 0.8644764485582925, + "grad_norm": 0.9441913967450875, + "learning_rate": 4.751204033446473e-06, + "loss": 0.4156, + "step": 14196 + }, + { + "epoch": 0.8645373443351704, + "grad_norm": 1.0969419287062119, + "learning_rate": 4.7511693295774354e-06, + "loss": 0.3669, + "step": 14197 + }, + { + "epoch": 0.8645982401120482, + "grad_norm": 0.9644133875112566, + "learning_rate": 4.751134623414953e-06, + "loss": 0.4611, + "step": 14198 + }, + { + "epoch": 0.8646591358889261, + "grad_norm": 1.0952955923117451, + "learning_rate": 4.751099914959061e-06, + "loss": 0.4672, + "step": 14199 + }, + { + "epoch": 0.864720031665804, + "grad_norm": 1.1015255029778872, + "learning_rate": 4.751065204209796e-06, + "loss": 0.4429, + "step": 14200 + }, + { + "epoch": 0.8647809274426819, + "grad_norm": 0.9988706168041221, + "learning_rate": 4.751030491167192e-06, + "loss": 0.4811, + "step": 14201 + }, + { + "epoch": 0.8648418232195597, + "grad_norm": 1.0022041929404475, + "learning_rate": 4.750995775831286e-06, + "loss": 0.4089, + "step": 14202 + }, + { + "epoch": 0.8649027189964376, + "grad_norm": 0.9679102952979749, + "learning_rate": 4.750961058202111e-06, + "loss": 0.4489, + "step": 14203 + }, + { + "epoch": 0.8649636147733155, + "grad_norm": 0.9360701775507747, + "learning_rate": 4.750926338279704e-06, + "loss": 0.5162, + "step": 14204 + }, + { + "epoch": 0.8650245105501934, + "grad_norm": 0.9761905819108645, + "learning_rate": 4.750891616064101e-06, + "loss": 0.4272, + "step": 14205 + }, + { + "epoch": 0.8650854063270712, + "grad_norm": 0.9701984029115782, + "learning_rate": 4.750856891555335e-06, + "loss": 0.3779, + "step": 14206 + }, + { + "epoch": 0.865146302103949, + "grad_norm": 1.003406226863636, + "learning_rate": 4.750822164753443e-06, + "loss": 0.4605, + "step": 14207 + }, + { + "epoch": 0.865207197880827, + "grad_norm": 0.9178837620702155, + "learning_rate": 4.750787435658462e-06, + "loss": 0.4772, + "step": 14208 + }, + { + "epoch": 0.8652680936577049, + "grad_norm": 0.9388383665008293, + "learning_rate": 4.750752704270424e-06, + "loss": 0.3875, + "step": 14209 + }, + { + "epoch": 0.8653289894345827, + "grad_norm": 1.062885506072342, + "learning_rate": 4.750717970589365e-06, + "loss": 0.3768, + "step": 14210 + }, + { + "epoch": 0.8653898852114605, + "grad_norm": 1.0906621889983952, + "learning_rate": 4.750683234615323e-06, + "loss": 0.3851, + "step": 14211 + }, + { + "epoch": 0.8654507809883385, + "grad_norm": 0.9510745377062487, + "learning_rate": 4.750648496348332e-06, + "loss": 0.4187, + "step": 14212 + }, + { + "epoch": 0.8655116767652163, + "grad_norm": 1.1376240941565903, + "learning_rate": 4.750613755788426e-06, + "loss": 0.3253, + "step": 14213 + }, + { + "epoch": 0.8655725725420942, + "grad_norm": 0.9588441782769065, + "learning_rate": 4.750579012935642e-06, + "loss": 0.4242, + "step": 14214 + }, + { + "epoch": 0.865633468318972, + "grad_norm": 0.8835401274038787, + "learning_rate": 4.750544267790015e-06, + "loss": 0.4829, + "step": 14215 + }, + { + "epoch": 0.86569436409585, + "grad_norm": 1.0106193715342537, + "learning_rate": 4.750509520351579e-06, + "loss": 0.4974, + "step": 14216 + }, + { + "epoch": 0.8657552598727278, + "grad_norm": 1.0503839864529707, + "learning_rate": 4.750474770620372e-06, + "loss": 0.4187, + "step": 14217 + }, + { + "epoch": 0.8658161556496057, + "grad_norm": 0.9845119304576267, + "learning_rate": 4.750440018596428e-06, + "loss": 0.453, + "step": 14218 + }, + { + "epoch": 0.8658770514264835, + "grad_norm": 1.087457902427173, + "learning_rate": 4.7504052642797825e-06, + "loss": 0.4691, + "step": 14219 + }, + { + "epoch": 0.8659379472033615, + "grad_norm": 0.9941270980752415, + "learning_rate": 4.750370507670471e-06, + "loss": 0.44, + "step": 14220 + }, + { + "epoch": 0.8659988429802393, + "grad_norm": 0.8715453259223432, + "learning_rate": 4.750335748768527e-06, + "loss": 0.4561, + "step": 14221 + }, + { + "epoch": 0.8660597387571172, + "grad_norm": 1.0324317707268376, + "learning_rate": 4.7503009875739906e-06, + "loss": 0.3683, + "step": 14222 + }, + { + "epoch": 0.866120634533995, + "grad_norm": 0.9769254423241222, + "learning_rate": 4.750266224086892e-06, + "loss": 0.4125, + "step": 14223 + }, + { + "epoch": 0.866181530310873, + "grad_norm": 0.9943052699338825, + "learning_rate": 4.75023145830727e-06, + "loss": 0.4397, + "step": 14224 + }, + { + "epoch": 0.8662424260877508, + "grad_norm": 0.9961365798959542, + "learning_rate": 4.7501966902351596e-06, + "loss": 0.4399, + "step": 14225 + }, + { + "epoch": 0.8663033218646287, + "grad_norm": 0.979216589274489, + "learning_rate": 4.750161919870594e-06, + "loss": 0.449, + "step": 14226 + }, + { + "epoch": 0.8663642176415065, + "grad_norm": 0.9354080856889154, + "learning_rate": 4.7501271472136115e-06, + "loss": 0.4029, + "step": 14227 + }, + { + "epoch": 0.8664251134183845, + "grad_norm": 0.9586183739897005, + "learning_rate": 4.750092372264246e-06, + "loss": 0.4618, + "step": 14228 + }, + { + "epoch": 0.8664860091952623, + "grad_norm": 0.9590122035706182, + "learning_rate": 4.750057595022533e-06, + "loss": 0.386, + "step": 14229 + }, + { + "epoch": 0.8665469049721402, + "grad_norm": 0.9828983424611952, + "learning_rate": 4.750022815488507e-06, + "loss": 0.4425, + "step": 14230 + }, + { + "epoch": 0.8666078007490181, + "grad_norm": 0.9473102852608781, + "learning_rate": 4.7499880336622065e-06, + "loss": 0.4194, + "step": 14231 + }, + { + "epoch": 0.866668696525896, + "grad_norm": 1.0401741735860455, + "learning_rate": 4.749953249543664e-06, + "loss": 0.3555, + "step": 14232 + }, + { + "epoch": 0.8667295923027738, + "grad_norm": 1.0515630303635473, + "learning_rate": 4.749918463132916e-06, + "loss": 0.4052, + "step": 14233 + }, + { + "epoch": 0.8667904880796516, + "grad_norm": 0.9951321766124468, + "learning_rate": 4.749883674429998e-06, + "loss": 0.3718, + "step": 14234 + }, + { + "epoch": 0.8668513838565296, + "grad_norm": 0.9951829513940772, + "learning_rate": 4.749848883434945e-06, + "loss": 0.408, + "step": 14235 + }, + { + "epoch": 0.8669122796334074, + "grad_norm": 0.9398794321720849, + "learning_rate": 4.749814090147793e-06, + "loss": 0.4502, + "step": 14236 + }, + { + "epoch": 0.8669731754102853, + "grad_norm": 1.0024061263350292, + "learning_rate": 4.749779294568577e-06, + "loss": 0.3758, + "step": 14237 + }, + { + "epoch": 0.8670340711871631, + "grad_norm": 0.9838807873402277, + "learning_rate": 4.749744496697333e-06, + "loss": 0.4053, + "step": 14238 + }, + { + "epoch": 0.8670949669640411, + "grad_norm": 1.0218318001450457, + "learning_rate": 4.749709696534095e-06, + "loss": 0.3959, + "step": 14239 + }, + { + "epoch": 0.8671558627409189, + "grad_norm": 1.1283005164168296, + "learning_rate": 4.7496748940789005e-06, + "loss": 0.3531, + "step": 14240 + }, + { + "epoch": 0.8672167585177968, + "grad_norm": 0.9870958457949249, + "learning_rate": 4.749640089331784e-06, + "loss": 0.3654, + "step": 14241 + }, + { + "epoch": 0.8672776542946746, + "grad_norm": 1.013486311629105, + "learning_rate": 4.749605282292781e-06, + "loss": 0.4443, + "step": 14242 + }, + { + "epoch": 0.8673385500715526, + "grad_norm": 1.0678933730246456, + "learning_rate": 4.749570472961926e-06, + "loss": 0.3924, + "step": 14243 + }, + { + "epoch": 0.8673994458484304, + "grad_norm": 1.0740229846839666, + "learning_rate": 4.749535661339256e-06, + "loss": 0.4057, + "step": 14244 + }, + { + "epoch": 0.8674603416253083, + "grad_norm": 0.9466520797208909, + "learning_rate": 4.749500847424806e-06, + "loss": 0.4393, + "step": 14245 + }, + { + "epoch": 0.8675212374021861, + "grad_norm": 1.0736569899571506, + "learning_rate": 4.749466031218612e-06, + "loss": 0.3731, + "step": 14246 + }, + { + "epoch": 0.8675821331790641, + "grad_norm": 0.998258835866514, + "learning_rate": 4.749431212720707e-06, + "loss": 0.4453, + "step": 14247 + }, + { + "epoch": 0.8676430289559419, + "grad_norm": 0.9730819652668152, + "learning_rate": 4.749396391931129e-06, + "loss": 0.4512, + "step": 14248 + }, + { + "epoch": 0.8677039247328198, + "grad_norm": 0.9921290284166121, + "learning_rate": 4.749361568849913e-06, + "loss": 0.3916, + "step": 14249 + }, + { + "epoch": 0.8677648205096976, + "grad_norm": 1.0107859118468954, + "learning_rate": 4.749326743477094e-06, + "loss": 0.4273, + "step": 14250 + }, + { + "epoch": 0.8678257162865756, + "grad_norm": 1.0302470572154248, + "learning_rate": 4.749291915812709e-06, + "loss": 0.4039, + "step": 14251 + }, + { + "epoch": 0.8678866120634534, + "grad_norm": 1.0347617198554289, + "learning_rate": 4.7492570858567896e-06, + "loss": 0.3955, + "step": 14252 + }, + { + "epoch": 0.8679475078403313, + "grad_norm": 1.0191658029917607, + "learning_rate": 4.749222253609375e-06, + "loss": 0.3968, + "step": 14253 + }, + { + "epoch": 0.8680084036172091, + "grad_norm": 0.9216259528166217, + "learning_rate": 4.749187419070501e-06, + "loss": 0.5267, + "step": 14254 + }, + { + "epoch": 0.8680692993940871, + "grad_norm": 0.9309285765533821, + "learning_rate": 4.749152582240199e-06, + "loss": 0.3927, + "step": 14255 + }, + { + "epoch": 0.8681301951709649, + "grad_norm": 1.0532226379771763, + "learning_rate": 4.749117743118509e-06, + "loss": 0.344, + "step": 14256 + }, + { + "epoch": 0.8681910909478427, + "grad_norm": 1.0224503275321977, + "learning_rate": 4.749082901705464e-06, + "loss": 0.4362, + "step": 14257 + }, + { + "epoch": 0.8682519867247206, + "grad_norm": 0.969390871137734, + "learning_rate": 4.7490480580011005e-06, + "loss": 0.3693, + "step": 14258 + }, + { + "epoch": 0.8683128825015985, + "grad_norm": 0.9764304066239455, + "learning_rate": 4.749013212005453e-06, + "loss": 0.4589, + "step": 14259 + }, + { + "epoch": 0.8683737782784764, + "grad_norm": 1.0100388453451017, + "learning_rate": 4.7489783637185585e-06, + "loss": 0.3998, + "step": 14260 + }, + { + "epoch": 0.8684346740553542, + "grad_norm": 0.9161703493750296, + "learning_rate": 4.74894351314045e-06, + "loss": 0.4483, + "step": 14261 + }, + { + "epoch": 0.8684955698322321, + "grad_norm": 1.0305471666292367, + "learning_rate": 4.748908660271167e-06, + "loss": 0.4225, + "step": 14262 + }, + { + "epoch": 0.86855646560911, + "grad_norm": 0.9864086863411614, + "learning_rate": 4.74887380511074e-06, + "loss": 0.4438, + "step": 14263 + }, + { + "epoch": 0.8686173613859879, + "grad_norm": 1.097034479264504, + "learning_rate": 4.7488389476592086e-06, + "loss": 0.3878, + "step": 14264 + }, + { + "epoch": 0.8686782571628657, + "grad_norm": 1.0656897816455941, + "learning_rate": 4.748804087916607e-06, + "loss": 0.4145, + "step": 14265 + }, + { + "epoch": 0.8687391529397436, + "grad_norm": 1.0341376224279653, + "learning_rate": 4.74876922588297e-06, + "loss": 0.3899, + "step": 14266 + }, + { + "epoch": 0.8688000487166215, + "grad_norm": 1.0481527726873585, + "learning_rate": 4.748734361558334e-06, + "loss": 0.4181, + "step": 14267 + }, + { + "epoch": 0.8688609444934994, + "grad_norm": 1.0925903245978432, + "learning_rate": 4.748699494942733e-06, + "loss": 0.448, + "step": 14268 + }, + { + "epoch": 0.8689218402703772, + "grad_norm": 0.9679282961215118, + "learning_rate": 4.748664626036205e-06, + "loss": 0.3919, + "step": 14269 + }, + { + "epoch": 0.8689827360472552, + "grad_norm": 0.9119885565080313, + "learning_rate": 4.7486297548387836e-06, + "loss": 0.3697, + "step": 14270 + }, + { + "epoch": 0.869043631824133, + "grad_norm": 0.9987721045336917, + "learning_rate": 4.748594881350506e-06, + "loss": 0.3976, + "step": 14271 + }, + { + "epoch": 0.8691045276010109, + "grad_norm": 0.98813673698971, + "learning_rate": 4.748560005571405e-06, + "loss": 0.4088, + "step": 14272 + }, + { + "epoch": 0.8691654233778887, + "grad_norm": 1.0195350404828947, + "learning_rate": 4.748525127501519e-06, + "loss": 0.3782, + "step": 14273 + }, + { + "epoch": 0.8692263191547667, + "grad_norm": 1.0393798716059302, + "learning_rate": 4.748490247140882e-06, + "loss": 0.4557, + "step": 14274 + }, + { + "epoch": 0.8692872149316445, + "grad_norm": 0.9877094963405013, + "learning_rate": 4.74845536448953e-06, + "loss": 0.3971, + "step": 14275 + }, + { + "epoch": 0.8693481107085224, + "grad_norm": 1.0684658677639698, + "learning_rate": 4.748420479547499e-06, + "loss": 0.3688, + "step": 14276 + }, + { + "epoch": 0.8694090064854002, + "grad_norm": 1.0654901872605023, + "learning_rate": 4.748385592314822e-06, + "loss": 0.3513, + "step": 14277 + }, + { + "epoch": 0.8694699022622782, + "grad_norm": 1.0155180753712914, + "learning_rate": 4.748350702791539e-06, + "loss": 0.3533, + "step": 14278 + }, + { + "epoch": 0.869530798039156, + "grad_norm": 0.9457721257996816, + "learning_rate": 4.748315810977681e-06, + "loss": 0.5021, + "step": 14279 + }, + { + "epoch": 0.8695916938160339, + "grad_norm": 0.9482563442738191, + "learning_rate": 4.7482809168732865e-06, + "loss": 0.4607, + "step": 14280 + }, + { + "epoch": 0.8696525895929117, + "grad_norm": 0.99582108795415, + "learning_rate": 4.74824602047839e-06, + "loss": 0.3544, + "step": 14281 + }, + { + "epoch": 0.8697134853697897, + "grad_norm": 0.9570312018639087, + "learning_rate": 4.7482111217930275e-06, + "loss": 0.424, + "step": 14282 + }, + { + "epoch": 0.8697743811466675, + "grad_norm": 1.0234826255997802, + "learning_rate": 4.748176220817235e-06, + "loss": 0.3902, + "step": 14283 + }, + { + "epoch": 0.8698352769235453, + "grad_norm": 0.9802731167405594, + "learning_rate": 4.748141317551046e-06, + "loss": 0.3417, + "step": 14284 + }, + { + "epoch": 0.8698961727004232, + "grad_norm": 0.9221245469306375, + "learning_rate": 4.748106411994497e-06, + "loss": 0.4315, + "step": 14285 + }, + { + "epoch": 0.8699570684773011, + "grad_norm": 1.0117663481694097, + "learning_rate": 4.7480715041476254e-06, + "loss": 0.4545, + "step": 14286 + }, + { + "epoch": 0.870017964254179, + "grad_norm": 1.0442999098921886, + "learning_rate": 4.748036594010465e-06, + "loss": 0.3697, + "step": 14287 + }, + { + "epoch": 0.8700788600310568, + "grad_norm": 0.9536818877779124, + "learning_rate": 4.748001681583051e-06, + "loss": 0.4424, + "step": 14288 + }, + { + "epoch": 0.8701397558079347, + "grad_norm": 1.0709190311468193, + "learning_rate": 4.7479667668654195e-06, + "loss": 0.4105, + "step": 14289 + }, + { + "epoch": 0.8702006515848126, + "grad_norm": 0.9686633898453828, + "learning_rate": 4.747931849857606e-06, + "loss": 0.3823, + "step": 14290 + }, + { + "epoch": 0.8702615473616905, + "grad_norm": 0.9785168589888881, + "learning_rate": 4.747896930559647e-06, + "loss": 0.4075, + "step": 14291 + }, + { + "epoch": 0.8703224431385683, + "grad_norm": 0.9624291907254737, + "learning_rate": 4.7478620089715774e-06, + "loss": 0.4757, + "step": 14292 + }, + { + "epoch": 0.8703833389154462, + "grad_norm": 0.9224074804872048, + "learning_rate": 4.747827085093432e-06, + "loss": 0.4593, + "step": 14293 + }, + { + "epoch": 0.8704442346923241, + "grad_norm": 0.9793837885888096, + "learning_rate": 4.747792158925248e-06, + "loss": 0.432, + "step": 14294 + }, + { + "epoch": 0.870505130469202, + "grad_norm": 0.9828868832667006, + "learning_rate": 4.74775723046706e-06, + "loss": 0.3813, + "step": 14295 + }, + { + "epoch": 0.8705660262460798, + "grad_norm": 0.9557529316388698, + "learning_rate": 4.747722299718903e-06, + "loss": 0.4824, + "step": 14296 + }, + { + "epoch": 0.8706269220229577, + "grad_norm": 1.0585226411290451, + "learning_rate": 4.747687366680813e-06, + "loss": 0.434, + "step": 14297 + }, + { + "epoch": 0.8706878177998356, + "grad_norm": 0.9488664211071531, + "learning_rate": 4.7476524313528276e-06, + "loss": 0.456, + "step": 14298 + }, + { + "epoch": 0.8707487135767135, + "grad_norm": 1.0155332066117344, + "learning_rate": 4.747617493734978e-06, + "loss": 0.4264, + "step": 14299 + }, + { + "epoch": 0.8708096093535913, + "grad_norm": 0.9887385630890742, + "learning_rate": 4.7475825538273045e-06, + "loss": 0.3473, + "step": 14300 + }, + { + "epoch": 0.8708705051304692, + "grad_norm": 1.0218984596153982, + "learning_rate": 4.74754761162984e-06, + "loss": 0.3876, + "step": 14301 + }, + { + "epoch": 0.8709314009073471, + "grad_norm": 1.0318245902788525, + "learning_rate": 4.747512667142621e-06, + "loss": 0.3729, + "step": 14302 + }, + { + "epoch": 0.870992296684225, + "grad_norm": 0.9990484240519166, + "learning_rate": 4.747477720365682e-06, + "loss": 0.4128, + "step": 14303 + }, + { + "epoch": 0.8710531924611028, + "grad_norm": 1.00030817106462, + "learning_rate": 4.74744277129906e-06, + "loss": 0.4275, + "step": 14304 + }, + { + "epoch": 0.8711140882379806, + "grad_norm": 0.9160681091605117, + "learning_rate": 4.74740781994279e-06, + "loss": 0.4377, + "step": 14305 + }, + { + "epoch": 0.8711749840148586, + "grad_norm": 0.9633232171486275, + "learning_rate": 4.747372866296908e-06, + "loss": 0.4128, + "step": 14306 + }, + { + "epoch": 0.8712358797917364, + "grad_norm": 0.9657000371297081, + "learning_rate": 4.747337910361448e-06, + "loss": 0.3686, + "step": 14307 + }, + { + "epoch": 0.8712967755686143, + "grad_norm": 0.9471914083096904, + "learning_rate": 4.7473029521364485e-06, + "loss": 0.3983, + "step": 14308 + }, + { + "epoch": 0.8713576713454921, + "grad_norm": 0.9350823546805849, + "learning_rate": 4.747267991621942e-06, + "loss": 0.4103, + "step": 14309 + }, + { + "epoch": 0.8714185671223701, + "grad_norm": 1.0549103260163408, + "learning_rate": 4.747233028817967e-06, + "loss": 0.3814, + "step": 14310 + }, + { + "epoch": 0.8714794628992479, + "grad_norm": 1.0883858010403118, + "learning_rate": 4.7471980637245565e-06, + "loss": 0.3879, + "step": 14311 + }, + { + "epoch": 0.8715403586761258, + "grad_norm": 1.051689725630461, + "learning_rate": 4.747163096341748e-06, + "loss": 0.4616, + "step": 14312 + }, + { + "epoch": 0.8716012544530037, + "grad_norm": 1.0748383359184512, + "learning_rate": 4.747128126669577e-06, + "loss": 0.4112, + "step": 14313 + }, + { + "epoch": 0.8716621502298816, + "grad_norm": 0.987286150919315, + "learning_rate": 4.747093154708077e-06, + "loss": 0.3946, + "step": 14314 + }, + { + "epoch": 0.8717230460067594, + "grad_norm": 1.068084405764322, + "learning_rate": 4.747058180457287e-06, + "loss": 0.4173, + "step": 14315 + }, + { + "epoch": 0.8717839417836373, + "grad_norm": 0.9802416258682105, + "learning_rate": 4.747023203917239e-06, + "loss": 0.3846, + "step": 14316 + }, + { + "epoch": 0.8718448375605152, + "grad_norm": 1.0392321322258906, + "learning_rate": 4.7469882250879716e-06, + "loss": 0.4085, + "step": 14317 + }, + { + "epoch": 0.8719057333373931, + "grad_norm": 1.018710083181439, + "learning_rate": 4.746953243969519e-06, + "loss": 0.3828, + "step": 14318 + }, + { + "epoch": 0.8719666291142709, + "grad_norm": 1.0963680391879769, + "learning_rate": 4.746918260561918e-06, + "loss": 0.4083, + "step": 14319 + }, + { + "epoch": 0.8720275248911488, + "grad_norm": 1.067015239856877, + "learning_rate": 4.746883274865203e-06, + "loss": 0.454, + "step": 14320 + }, + { + "epoch": 0.8720884206680267, + "grad_norm": 1.0506953501710419, + "learning_rate": 4.746848286879409e-06, + "loss": 0.3375, + "step": 14321 + }, + { + "epoch": 0.8721493164449046, + "grad_norm": 1.08758022996793, + "learning_rate": 4.746813296604573e-06, + "loss": 0.3464, + "step": 14322 + }, + { + "epoch": 0.8722102122217824, + "grad_norm": 1.0521398282810073, + "learning_rate": 4.746778304040731e-06, + "loss": 0.419, + "step": 14323 + }, + { + "epoch": 0.8722711079986603, + "grad_norm": 1.014534967065755, + "learning_rate": 4.746743309187918e-06, + "loss": 0.3995, + "step": 14324 + }, + { + "epoch": 0.8723320037755382, + "grad_norm": 1.0305805736491895, + "learning_rate": 4.746708312046169e-06, + "loss": 0.4281, + "step": 14325 + }, + { + "epoch": 0.8723928995524161, + "grad_norm": 1.0006850945318384, + "learning_rate": 4.746673312615521e-06, + "loss": 0.4329, + "step": 14326 + }, + { + "epoch": 0.8724537953292939, + "grad_norm": 0.8947384359780896, + "learning_rate": 4.746638310896008e-06, + "loss": 0.4317, + "step": 14327 + }, + { + "epoch": 0.8725146911061717, + "grad_norm": 0.9847122917108299, + "learning_rate": 4.746603306887667e-06, + "loss": 0.3337, + "step": 14328 + }, + { + "epoch": 0.8725755868830497, + "grad_norm": 1.03908897720167, + "learning_rate": 4.746568300590534e-06, + "loss": 0.4342, + "step": 14329 + }, + { + "epoch": 0.8726364826599275, + "grad_norm": 1.0166532596537192, + "learning_rate": 4.746533292004643e-06, + "loss": 0.4709, + "step": 14330 + }, + { + "epoch": 0.8726973784368054, + "grad_norm": 1.0350680636007226, + "learning_rate": 4.746498281130031e-06, + "loss": 0.3759, + "step": 14331 + }, + { + "epoch": 0.8727582742136832, + "grad_norm": 1.0197211556983352, + "learning_rate": 4.746463267966733e-06, + "loss": 0.4094, + "step": 14332 + }, + { + "epoch": 0.8728191699905612, + "grad_norm": 1.0193718874800592, + "learning_rate": 4.7464282525147855e-06, + "loss": 0.4027, + "step": 14333 + }, + { + "epoch": 0.872880065767439, + "grad_norm": 1.0515085143019682, + "learning_rate": 4.746393234774223e-06, + "loss": 0.4128, + "step": 14334 + }, + { + "epoch": 0.8729409615443169, + "grad_norm": 1.0434243744484284, + "learning_rate": 4.746358214745082e-06, + "loss": 0.431, + "step": 14335 + }, + { + "epoch": 0.8730018573211947, + "grad_norm": 1.0421668148958283, + "learning_rate": 4.746323192427398e-06, + "loss": 0.3518, + "step": 14336 + }, + { + "epoch": 0.8730627530980727, + "grad_norm": 0.9746852372464369, + "learning_rate": 4.746288167821207e-06, + "loss": 0.4203, + "step": 14337 + }, + { + "epoch": 0.8731236488749505, + "grad_norm": 1.042933227089408, + "learning_rate": 4.7462531409265435e-06, + "loss": 0.4145, + "step": 14338 + }, + { + "epoch": 0.8731845446518284, + "grad_norm": 1.0246865061613373, + "learning_rate": 4.746218111743444e-06, + "loss": 0.3938, + "step": 14339 + }, + { + "epoch": 0.8732454404287062, + "grad_norm": 0.9156374559199164, + "learning_rate": 4.746183080271944e-06, + "loss": 0.4415, + "step": 14340 + }, + { + "epoch": 0.8733063362055842, + "grad_norm": 1.0297669827618254, + "learning_rate": 4.74614804651208e-06, + "loss": 0.4009, + "step": 14341 + }, + { + "epoch": 0.873367231982462, + "grad_norm": 0.9169661141457784, + "learning_rate": 4.7461130104638875e-06, + "loss": 0.4078, + "step": 14342 + }, + { + "epoch": 0.8734281277593399, + "grad_norm": 0.9641554388667539, + "learning_rate": 4.746077972127402e-06, + "loss": 0.4722, + "step": 14343 + }, + { + "epoch": 0.8734890235362177, + "grad_norm": 0.9775251593023352, + "learning_rate": 4.746042931502657e-06, + "loss": 0.3988, + "step": 14344 + }, + { + "epoch": 0.8735499193130957, + "grad_norm": 1.0506716675835905, + "learning_rate": 4.746007888589692e-06, + "loss": 0.3942, + "step": 14345 + }, + { + "epoch": 0.8736108150899735, + "grad_norm": 0.9897715548020586, + "learning_rate": 4.74597284338854e-06, + "loss": 0.3895, + "step": 14346 + }, + { + "epoch": 0.8736717108668514, + "grad_norm": 0.9655440838831585, + "learning_rate": 4.745937795899238e-06, + "loss": 0.3625, + "step": 14347 + }, + { + "epoch": 0.8737326066437292, + "grad_norm": 0.8949135782939489, + "learning_rate": 4.745902746121821e-06, + "loss": 0.4454, + "step": 14348 + }, + { + "epoch": 0.8737935024206072, + "grad_norm": 0.9541092247889399, + "learning_rate": 4.745867694056326e-06, + "loss": 0.3588, + "step": 14349 + }, + { + "epoch": 0.873854398197485, + "grad_norm": 0.9530381476602772, + "learning_rate": 4.745832639702786e-06, + "loss": 0.4485, + "step": 14350 + }, + { + "epoch": 0.8739152939743629, + "grad_norm": 0.9935903308410027, + "learning_rate": 4.745797583061239e-06, + "loss": 0.3905, + "step": 14351 + }, + { + "epoch": 0.8739761897512408, + "grad_norm": 1.0641148734921375, + "learning_rate": 4.74576252413172e-06, + "loss": 0.3903, + "step": 14352 + }, + { + "epoch": 0.8740370855281187, + "grad_norm": 0.9679986456332468, + "learning_rate": 4.745727462914265e-06, + "loss": 0.3872, + "step": 14353 + }, + { + "epoch": 0.8740979813049965, + "grad_norm": 1.062461590637756, + "learning_rate": 4.74569239940891e-06, + "loss": 0.4127, + "step": 14354 + }, + { + "epoch": 0.8741588770818743, + "grad_norm": 0.9685707791369721, + "learning_rate": 4.74565733361569e-06, + "loss": 0.4151, + "step": 14355 + }, + { + "epoch": 0.8742197728587523, + "grad_norm": 1.0383533912385268, + "learning_rate": 4.745622265534641e-06, + "loss": 0.3944, + "step": 14356 + }, + { + "epoch": 0.8742806686356301, + "grad_norm": 1.0467607838374884, + "learning_rate": 4.745587195165798e-06, + "loss": 0.3825, + "step": 14357 + }, + { + "epoch": 0.874341564412508, + "grad_norm": 0.9638530150843526, + "learning_rate": 4.745552122509199e-06, + "loss": 0.4166, + "step": 14358 + }, + { + "epoch": 0.8744024601893858, + "grad_norm": 1.0165084740413666, + "learning_rate": 4.745517047564876e-06, + "loss": 0.3782, + "step": 14359 + }, + { + "epoch": 0.8744633559662638, + "grad_norm": 1.030579467535583, + "learning_rate": 4.745481970332869e-06, + "loss": 0.4035, + "step": 14360 + }, + { + "epoch": 0.8745242517431416, + "grad_norm": 0.9397432408993766, + "learning_rate": 4.745446890813211e-06, + "loss": 0.4542, + "step": 14361 + }, + { + "epoch": 0.8745851475200195, + "grad_norm": 1.004207472311812, + "learning_rate": 4.745411809005937e-06, + "loss": 0.3267, + "step": 14362 + }, + { + "epoch": 0.8746460432968973, + "grad_norm": 0.9634486047436805, + "learning_rate": 4.745376724911086e-06, + "loss": 0.458, + "step": 14363 + }, + { + "epoch": 0.8747069390737753, + "grad_norm": 0.9964816656500883, + "learning_rate": 4.7453416385286915e-06, + "loss": 0.4218, + "step": 14364 + }, + { + "epoch": 0.8747678348506531, + "grad_norm": 1.01218599664229, + "learning_rate": 4.745306549858789e-06, + "loss": 0.4434, + "step": 14365 + }, + { + "epoch": 0.874828730627531, + "grad_norm": 1.1058918093922911, + "learning_rate": 4.7452714589014156e-06, + "loss": 0.3965, + "step": 14366 + }, + { + "epoch": 0.8748896264044088, + "grad_norm": 1.0249230021201379, + "learning_rate": 4.745236365656606e-06, + "loss": 0.3694, + "step": 14367 + }, + { + "epoch": 0.8749505221812868, + "grad_norm": 1.0600431038755311, + "learning_rate": 4.745201270124396e-06, + "loss": 0.4025, + "step": 14368 + }, + { + "epoch": 0.8750114179581646, + "grad_norm": 0.9613532152972636, + "learning_rate": 4.745166172304821e-06, + "loss": 0.3883, + "step": 14369 + }, + { + "epoch": 0.8750723137350425, + "grad_norm": 1.0667217449451023, + "learning_rate": 4.745131072197919e-06, + "loss": 0.4014, + "step": 14370 + }, + { + "epoch": 0.8751332095119203, + "grad_norm": 0.9663288010063927, + "learning_rate": 4.745095969803724e-06, + "loss": 0.4541, + "step": 14371 + }, + { + "epoch": 0.8751941052887983, + "grad_norm": 1.0601928256452788, + "learning_rate": 4.74506086512227e-06, + "loss": 0.4355, + "step": 14372 + }, + { + "epoch": 0.8752550010656761, + "grad_norm": 0.9655970975252673, + "learning_rate": 4.745025758153597e-06, + "loss": 0.443, + "step": 14373 + }, + { + "epoch": 0.875315896842554, + "grad_norm": 1.039619767062742, + "learning_rate": 4.7449906488977375e-06, + "loss": 0.3509, + "step": 14374 + }, + { + "epoch": 0.8753767926194318, + "grad_norm": 0.9575384539164524, + "learning_rate": 4.7449555373547275e-06, + "loss": 0.4351, + "step": 14375 + }, + { + "epoch": 0.8754376883963098, + "grad_norm": 1.026881262520265, + "learning_rate": 4.744920423524604e-06, + "loss": 0.464, + "step": 14376 + }, + { + "epoch": 0.8754985841731876, + "grad_norm": 1.0119858116952871, + "learning_rate": 4.744885307407402e-06, + "loss": 0.4301, + "step": 14377 + }, + { + "epoch": 0.8755594799500654, + "grad_norm": 0.969602476869353, + "learning_rate": 4.744850189003158e-06, + "loss": 0.4155, + "step": 14378 + }, + { + "epoch": 0.8756203757269433, + "grad_norm": 1.0164888747115899, + "learning_rate": 4.744815068311907e-06, + "loss": 0.3746, + "step": 14379 + }, + { + "epoch": 0.8756812715038212, + "grad_norm": 0.9406593981624397, + "learning_rate": 4.7447799453336855e-06, + "loss": 0.3628, + "step": 14380 + }, + { + "epoch": 0.8757421672806991, + "grad_norm": 0.8976707500484432, + "learning_rate": 4.744744820068528e-06, + "loss": 0.4549, + "step": 14381 + }, + { + "epoch": 0.8758030630575769, + "grad_norm": 1.0320550107952682, + "learning_rate": 4.744709692516472e-06, + "loss": 0.4727, + "step": 14382 + }, + { + "epoch": 0.8758639588344548, + "grad_norm": 0.9349979820003352, + "learning_rate": 4.744674562677552e-06, + "loss": 0.3949, + "step": 14383 + }, + { + "epoch": 0.8759248546113327, + "grad_norm": 1.0265376188253845, + "learning_rate": 4.744639430551804e-06, + "loss": 0.3943, + "step": 14384 + }, + { + "epoch": 0.8759857503882106, + "grad_norm": 1.0635586233694831, + "learning_rate": 4.744604296139265e-06, + "loss": 0.4279, + "step": 14385 + }, + { + "epoch": 0.8760466461650884, + "grad_norm": 1.004594114507389, + "learning_rate": 4.744569159439969e-06, + "loss": 0.382, + "step": 14386 + }, + { + "epoch": 0.8761075419419663, + "grad_norm": 1.175795449408467, + "learning_rate": 4.744534020453952e-06, + "loss": 0.4632, + "step": 14387 + }, + { + "epoch": 0.8761684377188442, + "grad_norm": 0.9423059123249811, + "learning_rate": 4.744498879181252e-06, + "loss": 0.4403, + "step": 14388 + }, + { + "epoch": 0.8762293334957221, + "grad_norm": 0.9726217359886837, + "learning_rate": 4.744463735621903e-06, + "loss": 0.4055, + "step": 14389 + }, + { + "epoch": 0.8762902292725999, + "grad_norm": 1.053190965753798, + "learning_rate": 4.74442858977594e-06, + "loss": 0.5099, + "step": 14390 + }, + { + "epoch": 0.8763511250494778, + "grad_norm": 1.024482957803972, + "learning_rate": 4.7443934416434e-06, + "loss": 0.438, + "step": 14391 + }, + { + "epoch": 0.8764120208263557, + "grad_norm": 1.0304328108683454, + "learning_rate": 4.744358291224318e-06, + "loss": 0.4376, + "step": 14392 + }, + { + "epoch": 0.8764729166032336, + "grad_norm": 0.9343081393389727, + "learning_rate": 4.744323138518732e-06, + "loss": 0.3625, + "step": 14393 + }, + { + "epoch": 0.8765338123801114, + "grad_norm": 0.9683336110720762, + "learning_rate": 4.7442879835266755e-06, + "loss": 0.4814, + "step": 14394 + }, + { + "epoch": 0.8765947081569894, + "grad_norm": 0.9245148297609843, + "learning_rate": 4.744252826248185e-06, + "loss": 0.4851, + "step": 14395 + }, + { + "epoch": 0.8766556039338672, + "grad_norm": 1.0313129670997532, + "learning_rate": 4.744217666683296e-06, + "loss": 0.368, + "step": 14396 + }, + { + "epoch": 0.8767164997107451, + "grad_norm": 0.8995618521559422, + "learning_rate": 4.744182504832046e-06, + "loss": 0.3905, + "step": 14397 + }, + { + "epoch": 0.8767773954876229, + "grad_norm": 0.9929068076728901, + "learning_rate": 4.744147340694468e-06, + "loss": 0.3444, + "step": 14398 + }, + { + "epoch": 0.8768382912645009, + "grad_norm": 0.9722866648729838, + "learning_rate": 4.7441121742706e-06, + "loss": 0.4689, + "step": 14399 + }, + { + "epoch": 0.8768991870413787, + "grad_norm": 0.8688339528516025, + "learning_rate": 4.744077005560478e-06, + "loss": 0.5063, + "step": 14400 + }, + { + "epoch": 0.8769600828182565, + "grad_norm": 1.0890388329133727, + "learning_rate": 4.7440418345641355e-06, + "loss": 0.3927, + "step": 14401 + }, + { + "epoch": 0.8770209785951344, + "grad_norm": 1.061535901509664, + "learning_rate": 4.744006661281611e-06, + "loss": 0.3812, + "step": 14402 + }, + { + "epoch": 0.8770818743720123, + "grad_norm": 1.0685463348578699, + "learning_rate": 4.743971485712938e-06, + "loss": 0.3497, + "step": 14403 + }, + { + "epoch": 0.8771427701488902, + "grad_norm": 0.9528442019629739, + "learning_rate": 4.743936307858155e-06, + "loss": 0.4471, + "step": 14404 + }, + { + "epoch": 0.877203665925768, + "grad_norm": 1.0238892470373346, + "learning_rate": 4.7439011277172954e-06, + "loss": 0.3771, + "step": 14405 + }, + { + "epoch": 0.8772645617026459, + "grad_norm": 0.9336897387720962, + "learning_rate": 4.743865945290396e-06, + "loss": 0.4904, + "step": 14406 + }, + { + "epoch": 0.8773254574795238, + "grad_norm": 0.9167781626024873, + "learning_rate": 4.743830760577493e-06, + "loss": 0.395, + "step": 14407 + }, + { + "epoch": 0.8773863532564017, + "grad_norm": 0.9939001089351472, + "learning_rate": 4.743795573578621e-06, + "loss": 0.4672, + "step": 14408 + }, + { + "epoch": 0.8774472490332795, + "grad_norm": 0.971078828848291, + "learning_rate": 4.743760384293818e-06, + "loss": 0.3498, + "step": 14409 + }, + { + "epoch": 0.8775081448101574, + "grad_norm": 1.0613812126345659, + "learning_rate": 4.743725192723118e-06, + "loss": 0.391, + "step": 14410 + }, + { + "epoch": 0.8775690405870353, + "grad_norm": 0.9519135997866905, + "learning_rate": 4.7436899988665566e-06, + "loss": 0.3892, + "step": 14411 + }, + { + "epoch": 0.8776299363639132, + "grad_norm": 1.0452575813511789, + "learning_rate": 4.743654802724171e-06, + "loss": 0.3582, + "step": 14412 + }, + { + "epoch": 0.877690832140791, + "grad_norm": 0.9726390417988041, + "learning_rate": 4.743619604295997e-06, + "loss": 0.363, + "step": 14413 + }, + { + "epoch": 0.8777517279176689, + "grad_norm": 0.9792958801803574, + "learning_rate": 4.74358440358207e-06, + "loss": 0.4734, + "step": 14414 + }, + { + "epoch": 0.8778126236945468, + "grad_norm": 1.1392311998606721, + "learning_rate": 4.743549200582426e-06, + "loss": 0.4014, + "step": 14415 + }, + { + "epoch": 0.8778735194714247, + "grad_norm": 0.9380533404588, + "learning_rate": 4.7435139952971e-06, + "loss": 0.4034, + "step": 14416 + }, + { + "epoch": 0.8779344152483025, + "grad_norm": 0.9706027203762945, + "learning_rate": 4.743478787726129e-06, + "loss": 0.4937, + "step": 14417 + }, + { + "epoch": 0.8779953110251804, + "grad_norm": 0.9762276962741057, + "learning_rate": 4.743443577869548e-06, + "loss": 0.3892, + "step": 14418 + }, + { + "epoch": 0.8780562068020583, + "grad_norm": 0.9206202015783388, + "learning_rate": 4.743408365727394e-06, + "loss": 0.4789, + "step": 14419 + }, + { + "epoch": 0.8781171025789362, + "grad_norm": 1.031879802242589, + "learning_rate": 4.743373151299701e-06, + "loss": 0.4163, + "step": 14420 + }, + { + "epoch": 0.878177998355814, + "grad_norm": 0.975500077453231, + "learning_rate": 4.743337934586507e-06, + "loss": 0.4259, + "step": 14421 + }, + { + "epoch": 0.8782388941326918, + "grad_norm": 1.0448210527766117, + "learning_rate": 4.743302715587847e-06, + "loss": 0.3864, + "step": 14422 + }, + { + "epoch": 0.8782997899095698, + "grad_norm": 1.0244783446925496, + "learning_rate": 4.743267494303757e-06, + "loss": 0.3672, + "step": 14423 + }, + { + "epoch": 0.8783606856864477, + "grad_norm": 1.0445066208321934, + "learning_rate": 4.743232270734273e-06, + "loss": 0.357, + "step": 14424 + }, + { + "epoch": 0.8784215814633255, + "grad_norm": 1.0177206673479557, + "learning_rate": 4.74319704487943e-06, + "loss": 0.3889, + "step": 14425 + }, + { + "epoch": 0.8784824772402033, + "grad_norm": 1.1658081075032096, + "learning_rate": 4.743161816739264e-06, + "loss": 0.3547, + "step": 14426 + }, + { + "epoch": 0.8785433730170813, + "grad_norm": 1.055745062495525, + "learning_rate": 4.743126586313812e-06, + "loss": 0.4391, + "step": 14427 + }, + { + "epoch": 0.8786042687939591, + "grad_norm": 0.9359382707602398, + "learning_rate": 4.74309135360311e-06, + "loss": 0.4133, + "step": 14428 + }, + { + "epoch": 0.878665164570837, + "grad_norm": 1.0042960756907824, + "learning_rate": 4.743056118607192e-06, + "loss": 0.4628, + "step": 14429 + }, + { + "epoch": 0.8787260603477148, + "grad_norm": 0.9509043228809609, + "learning_rate": 4.743020881326095e-06, + "loss": 0.4887, + "step": 14430 + }, + { + "epoch": 0.8787869561245928, + "grad_norm": 0.9669517463959684, + "learning_rate": 4.7429856417598555e-06, + "loss": 0.4157, + "step": 14431 + }, + { + "epoch": 0.8788478519014706, + "grad_norm": 1.003313329356823, + "learning_rate": 4.742950399908509e-06, + "loss": 0.4508, + "step": 14432 + }, + { + "epoch": 0.8789087476783485, + "grad_norm": 1.0015259271359314, + "learning_rate": 4.742915155772091e-06, + "loss": 0.3558, + "step": 14433 + }, + { + "epoch": 0.8789696434552264, + "grad_norm": 1.167555854986982, + "learning_rate": 4.7428799093506376e-06, + "loss": 0.4121, + "step": 14434 + }, + { + "epoch": 0.8790305392321043, + "grad_norm": 0.9482355630527127, + "learning_rate": 4.742844660644185e-06, + "loss": 0.3704, + "step": 14435 + }, + { + "epoch": 0.8790914350089821, + "grad_norm": 1.000911796618158, + "learning_rate": 4.742809409652769e-06, + "loss": 0.4219, + "step": 14436 + }, + { + "epoch": 0.87915233078586, + "grad_norm": 0.9992487083977356, + "learning_rate": 4.742774156376425e-06, + "loss": 0.3919, + "step": 14437 + }, + { + "epoch": 0.8792132265627379, + "grad_norm": 1.0351006362039092, + "learning_rate": 4.742738900815189e-06, + "loss": 0.4318, + "step": 14438 + }, + { + "epoch": 0.8792741223396158, + "grad_norm": 0.925115198762538, + "learning_rate": 4.742703642969098e-06, + "loss": 0.4025, + "step": 14439 + }, + { + "epoch": 0.8793350181164936, + "grad_norm": 1.058299836633651, + "learning_rate": 4.742668382838187e-06, + "loss": 0.4147, + "step": 14440 + }, + { + "epoch": 0.8793959138933715, + "grad_norm": 0.9241320270156539, + "learning_rate": 4.7426331204224916e-06, + "loss": 0.4897, + "step": 14441 + }, + { + "epoch": 0.8794568096702494, + "grad_norm": 0.9515500374146644, + "learning_rate": 4.742597855722049e-06, + "loss": 0.4313, + "step": 14442 + }, + { + "epoch": 0.8795177054471273, + "grad_norm": 1.0400679513583302, + "learning_rate": 4.742562588736893e-06, + "loss": 0.3705, + "step": 14443 + }, + { + "epoch": 0.8795786012240051, + "grad_norm": 0.9192023971923061, + "learning_rate": 4.742527319467062e-06, + "loss": 0.4384, + "step": 14444 + }, + { + "epoch": 0.879639497000883, + "grad_norm": 0.9621774272884461, + "learning_rate": 4.742492047912591e-06, + "loss": 0.4552, + "step": 14445 + }, + { + "epoch": 0.8797003927777609, + "grad_norm": 0.9465818948980886, + "learning_rate": 4.742456774073515e-06, + "loss": 0.4363, + "step": 14446 + }, + { + "epoch": 0.8797612885546388, + "grad_norm": 0.9821128021278347, + "learning_rate": 4.7424214979498704e-06, + "loss": 0.4737, + "step": 14447 + }, + { + "epoch": 0.8798221843315166, + "grad_norm": 1.0859573100848932, + "learning_rate": 4.742386219541695e-06, + "loss": 0.4309, + "step": 14448 + }, + { + "epoch": 0.8798830801083944, + "grad_norm": 1.0506883955268131, + "learning_rate": 4.742350938849022e-06, + "loss": 0.4014, + "step": 14449 + }, + { + "epoch": 0.8799439758852724, + "grad_norm": 0.9895280230595283, + "learning_rate": 4.742315655871888e-06, + "loss": 0.403, + "step": 14450 + }, + { + "epoch": 0.8800048716621502, + "grad_norm": 1.105224283559004, + "learning_rate": 4.74228037061033e-06, + "loss": 0.3648, + "step": 14451 + }, + { + "epoch": 0.8800657674390281, + "grad_norm": 0.8947364629666656, + "learning_rate": 4.742245083064383e-06, + "loss": 0.4656, + "step": 14452 + }, + { + "epoch": 0.8801266632159059, + "grad_norm": 0.9453040101969821, + "learning_rate": 4.742209793234084e-06, + "loss": 0.4599, + "step": 14453 + }, + { + "epoch": 0.8801875589927839, + "grad_norm": 1.0216202012038815, + "learning_rate": 4.742174501119468e-06, + "loss": 0.3395, + "step": 14454 + }, + { + "epoch": 0.8802484547696617, + "grad_norm": 1.0131361951322082, + "learning_rate": 4.742139206720571e-06, + "loss": 0.4658, + "step": 14455 + }, + { + "epoch": 0.8803093505465396, + "grad_norm": 0.935257081619263, + "learning_rate": 4.7421039100374286e-06, + "loss": 0.4562, + "step": 14456 + }, + { + "epoch": 0.8803702463234174, + "grad_norm": 1.0259473946453035, + "learning_rate": 4.742068611070079e-06, + "loss": 0.3848, + "step": 14457 + }, + { + "epoch": 0.8804311421002954, + "grad_norm": 0.9720554818087248, + "learning_rate": 4.742033309818556e-06, + "loss": 0.3811, + "step": 14458 + }, + { + "epoch": 0.8804920378771732, + "grad_norm": 0.9951536421472483, + "learning_rate": 4.741998006282895e-06, + "loss": 0.3777, + "step": 14459 + }, + { + "epoch": 0.8805529336540511, + "grad_norm": 0.9388363141604015, + "learning_rate": 4.741962700463134e-06, + "loss": 0.4092, + "step": 14460 + }, + { + "epoch": 0.8806138294309289, + "grad_norm": 0.9625898568113833, + "learning_rate": 4.741927392359308e-06, + "loss": 0.4184, + "step": 14461 + }, + { + "epoch": 0.8806747252078069, + "grad_norm": 1.003878293621341, + "learning_rate": 4.741892081971452e-06, + "loss": 0.4249, + "step": 14462 + }, + { + "epoch": 0.8807356209846847, + "grad_norm": 0.9551810673544985, + "learning_rate": 4.7418567692996045e-06, + "loss": 0.4684, + "step": 14463 + }, + { + "epoch": 0.8807965167615626, + "grad_norm": 0.9891350787541618, + "learning_rate": 4.741821454343799e-06, + "loss": 0.3722, + "step": 14464 + }, + { + "epoch": 0.8808574125384404, + "grad_norm": 1.0753429091806381, + "learning_rate": 4.741786137104072e-06, + "loss": 0.4401, + "step": 14465 + }, + { + "epoch": 0.8809183083153184, + "grad_norm": 1.0516177667003155, + "learning_rate": 4.7417508175804605e-06, + "loss": 0.4251, + "step": 14466 + }, + { + "epoch": 0.8809792040921962, + "grad_norm": 1.0719702357771819, + "learning_rate": 4.7417154957729996e-06, + "loss": 0.4327, + "step": 14467 + }, + { + "epoch": 0.8810400998690741, + "grad_norm": 1.1418843012405901, + "learning_rate": 4.741680171681726e-06, + "loss": 0.3783, + "step": 14468 + }, + { + "epoch": 0.8811009956459519, + "grad_norm": 1.0099262637994455, + "learning_rate": 4.7416448453066755e-06, + "loss": 0.4486, + "step": 14469 + }, + { + "epoch": 0.8811618914228299, + "grad_norm": 1.0392709085390979, + "learning_rate": 4.741609516647883e-06, + "loss": 0.3912, + "step": 14470 + }, + { + "epoch": 0.8812227871997077, + "grad_norm": 1.076457762524847, + "learning_rate": 4.741574185705386e-06, + "loss": 0.4055, + "step": 14471 + }, + { + "epoch": 0.8812836829765855, + "grad_norm": 1.036784156645465, + "learning_rate": 4.741538852479219e-06, + "loss": 0.3681, + "step": 14472 + }, + { + "epoch": 0.8813445787534634, + "grad_norm": 1.033463375381724, + "learning_rate": 4.741503516969419e-06, + "loss": 0.4232, + "step": 14473 + }, + { + "epoch": 0.8814054745303413, + "grad_norm": 1.0212112956976376, + "learning_rate": 4.741468179176023e-06, + "loss": 0.3746, + "step": 14474 + }, + { + "epoch": 0.8814663703072192, + "grad_norm": 0.9799223306723667, + "learning_rate": 4.741432839099065e-06, + "loss": 0.3985, + "step": 14475 + }, + { + "epoch": 0.881527266084097, + "grad_norm": 1.0787352389293985, + "learning_rate": 4.741397496738581e-06, + "loss": 0.4216, + "step": 14476 + }, + { + "epoch": 0.881588161860975, + "grad_norm": 0.9513724893172208, + "learning_rate": 4.741362152094609e-06, + "loss": 0.4241, + "step": 14477 + }, + { + "epoch": 0.8816490576378528, + "grad_norm": 1.0153780941122073, + "learning_rate": 4.7413268051671835e-06, + "loss": 0.4313, + "step": 14478 + }, + { + "epoch": 0.8817099534147307, + "grad_norm": 1.0002500872878237, + "learning_rate": 4.741291455956341e-06, + "loss": 0.3805, + "step": 14479 + }, + { + "epoch": 0.8817708491916085, + "grad_norm": 1.0430512321235892, + "learning_rate": 4.741256104462117e-06, + "loss": 0.4516, + "step": 14480 + }, + { + "epoch": 0.8818317449684865, + "grad_norm": 0.9621022429803102, + "learning_rate": 4.741220750684547e-06, + "loss": 0.4294, + "step": 14481 + }, + { + "epoch": 0.8818926407453643, + "grad_norm": 0.9743183864822187, + "learning_rate": 4.741185394623669e-06, + "loss": 0.3455, + "step": 14482 + }, + { + "epoch": 0.8819535365222422, + "grad_norm": 0.994725342386019, + "learning_rate": 4.741150036279518e-06, + "loss": 0.445, + "step": 14483 + }, + { + "epoch": 0.88201443229912, + "grad_norm": 1.0007569098127698, + "learning_rate": 4.74111467565213e-06, + "loss": 0.3869, + "step": 14484 + }, + { + "epoch": 0.882075328075998, + "grad_norm": 0.9862990478071, + "learning_rate": 4.7410793127415415e-06, + "loss": 0.4386, + "step": 14485 + }, + { + "epoch": 0.8821362238528758, + "grad_norm": 0.9269452060136568, + "learning_rate": 4.741043947547787e-06, + "loss": 0.3801, + "step": 14486 + }, + { + "epoch": 0.8821971196297537, + "grad_norm": 0.9752199397949666, + "learning_rate": 4.741008580070904e-06, + "loss": 0.4088, + "step": 14487 + }, + { + "epoch": 0.8822580154066315, + "grad_norm": 1.0385083128285115, + "learning_rate": 4.740973210310927e-06, + "loss": 0.3753, + "step": 14488 + }, + { + "epoch": 0.8823189111835095, + "grad_norm": 1.1016489575715414, + "learning_rate": 4.7409378382678945e-06, + "loss": 0.3967, + "step": 14489 + }, + { + "epoch": 0.8823798069603873, + "grad_norm": 1.0943107943224744, + "learning_rate": 4.7409024639418404e-06, + "loss": 0.4531, + "step": 14490 + }, + { + "epoch": 0.8824407027372652, + "grad_norm": 0.9672239679121124, + "learning_rate": 4.740867087332801e-06, + "loss": 0.4118, + "step": 14491 + }, + { + "epoch": 0.882501598514143, + "grad_norm": 0.960082584214183, + "learning_rate": 4.7408317084408136e-06, + "loss": 0.4057, + "step": 14492 + }, + { + "epoch": 0.882562494291021, + "grad_norm": 0.9730187147699458, + "learning_rate": 4.740796327265914e-06, + "loss": 0.348, + "step": 14493 + }, + { + "epoch": 0.8826233900678988, + "grad_norm": 1.075373567106094, + "learning_rate": 4.740760943808136e-06, + "loss": 0.4804, + "step": 14494 + }, + { + "epoch": 0.8826842858447767, + "grad_norm": 0.9690075145166939, + "learning_rate": 4.740725558067519e-06, + "loss": 0.3885, + "step": 14495 + }, + { + "epoch": 0.8827451816216545, + "grad_norm": 0.9762235285553776, + "learning_rate": 4.7406901700440964e-06, + "loss": 0.464, + "step": 14496 + }, + { + "epoch": 0.8828060773985325, + "grad_norm": 0.9712096347198031, + "learning_rate": 4.740654779737905e-06, + "loss": 0.4031, + "step": 14497 + }, + { + "epoch": 0.8828669731754103, + "grad_norm": 0.9469470136721643, + "learning_rate": 4.740619387148982e-06, + "loss": 0.4568, + "step": 14498 + }, + { + "epoch": 0.8829278689522881, + "grad_norm": 0.9335031815847059, + "learning_rate": 4.740583992277362e-06, + "loss": 0.526, + "step": 14499 + }, + { + "epoch": 0.882988764729166, + "grad_norm": 0.9942093896989689, + "learning_rate": 4.740548595123082e-06, + "loss": 0.462, + "step": 14500 + }, + { + "epoch": 0.8830496605060439, + "grad_norm": 0.955547330543164, + "learning_rate": 4.7405131956861775e-06, + "loss": 0.4061, + "step": 14501 + }, + { + "epoch": 0.8831105562829218, + "grad_norm": 1.0464130323653558, + "learning_rate": 4.740477793966685e-06, + "loss": 0.4067, + "step": 14502 + }, + { + "epoch": 0.8831714520597996, + "grad_norm": 0.9780745999920086, + "learning_rate": 4.740442389964639e-06, + "loss": 0.4259, + "step": 14503 + }, + { + "epoch": 0.8832323478366775, + "grad_norm": 0.9512465893580787, + "learning_rate": 4.7404069836800785e-06, + "loss": 0.4266, + "step": 14504 + }, + { + "epoch": 0.8832932436135554, + "grad_norm": 1.0665511732805326, + "learning_rate": 4.740371575113037e-06, + "loss": 0.4193, + "step": 14505 + }, + { + "epoch": 0.8833541393904333, + "grad_norm": 1.1521430298133832, + "learning_rate": 4.740336164263551e-06, + "loss": 0.4075, + "step": 14506 + }, + { + "epoch": 0.8834150351673111, + "grad_norm": 0.9126513460664654, + "learning_rate": 4.740300751131659e-06, + "loss": 0.4712, + "step": 14507 + }, + { + "epoch": 0.883475930944189, + "grad_norm": 0.9889226756543323, + "learning_rate": 4.740265335717394e-06, + "loss": 0.4268, + "step": 14508 + }, + { + "epoch": 0.8835368267210669, + "grad_norm": 1.0668023546145835, + "learning_rate": 4.740229918020793e-06, + "loss": 0.4045, + "step": 14509 + }, + { + "epoch": 0.8835977224979448, + "grad_norm": 0.8995727790628836, + "learning_rate": 4.740194498041893e-06, + "loss": 0.4514, + "step": 14510 + }, + { + "epoch": 0.8836586182748226, + "grad_norm": 0.9223442167738378, + "learning_rate": 4.740159075780729e-06, + "loss": 0.3788, + "step": 14511 + }, + { + "epoch": 0.8837195140517005, + "grad_norm": 0.929875080616968, + "learning_rate": 4.740123651237337e-06, + "loss": 0.4404, + "step": 14512 + }, + { + "epoch": 0.8837804098285784, + "grad_norm": 1.0517312122988491, + "learning_rate": 4.740088224411754e-06, + "loss": 0.4, + "step": 14513 + }, + { + "epoch": 0.8838413056054563, + "grad_norm": 1.0797091997367152, + "learning_rate": 4.740052795304017e-06, + "loss": 0.4398, + "step": 14514 + }, + { + "epoch": 0.8839022013823341, + "grad_norm": 1.0009173508664315, + "learning_rate": 4.7400173639141585e-06, + "loss": 0.3526, + "step": 14515 + }, + { + "epoch": 0.8839630971592121, + "grad_norm": 0.9956458884866951, + "learning_rate": 4.739981930242218e-06, + "loss": 0.4336, + "step": 14516 + }, + { + "epoch": 0.8840239929360899, + "grad_norm": 0.9334818039767662, + "learning_rate": 4.739946494288231e-06, + "loss": 0.5231, + "step": 14517 + }, + { + "epoch": 0.8840848887129678, + "grad_norm": 1.0611088204710637, + "learning_rate": 4.739911056052233e-06, + "loss": 0.3727, + "step": 14518 + }, + { + "epoch": 0.8841457844898456, + "grad_norm": 1.0033002277385017, + "learning_rate": 4.7398756155342596e-06, + "loss": 0.4419, + "step": 14519 + }, + { + "epoch": 0.8842066802667236, + "grad_norm": 0.9812325064032946, + "learning_rate": 4.7398401727343474e-06, + "loss": 0.4221, + "step": 14520 + }, + { + "epoch": 0.8842675760436014, + "grad_norm": 0.9744488003566376, + "learning_rate": 4.739804727652533e-06, + "loss": 0.464, + "step": 14521 + }, + { + "epoch": 0.8843284718204792, + "grad_norm": 1.0097808030990718, + "learning_rate": 4.739769280288852e-06, + "loss": 0.3958, + "step": 14522 + }, + { + "epoch": 0.8843893675973571, + "grad_norm": 1.035779224903127, + "learning_rate": 4.739733830643341e-06, + "loss": 0.4384, + "step": 14523 + }, + { + "epoch": 0.884450263374235, + "grad_norm": 1.0814706857304213, + "learning_rate": 4.739698378716035e-06, + "loss": 0.3515, + "step": 14524 + }, + { + "epoch": 0.8845111591511129, + "grad_norm": 1.0000554484321795, + "learning_rate": 4.739662924506971e-06, + "loss": 0.4392, + "step": 14525 + }, + { + "epoch": 0.8845720549279907, + "grad_norm": 1.0113120509901972, + "learning_rate": 4.7396274680161855e-06, + "loss": 0.4121, + "step": 14526 + }, + { + "epoch": 0.8846329507048686, + "grad_norm": 1.0409825449464758, + "learning_rate": 4.739592009243713e-06, + "loss": 0.3944, + "step": 14527 + }, + { + "epoch": 0.8846938464817465, + "grad_norm": 1.0028406306299844, + "learning_rate": 4.739556548189592e-06, + "loss": 0.4013, + "step": 14528 + }, + { + "epoch": 0.8847547422586244, + "grad_norm": 1.068934250744627, + "learning_rate": 4.739521084853857e-06, + "loss": 0.3823, + "step": 14529 + }, + { + "epoch": 0.8848156380355022, + "grad_norm": 0.9517523908959626, + "learning_rate": 4.739485619236544e-06, + "loss": 0.4481, + "step": 14530 + }, + { + "epoch": 0.8848765338123801, + "grad_norm": 0.9780323110317466, + "learning_rate": 4.73945015133769e-06, + "loss": 0.407, + "step": 14531 + }, + { + "epoch": 0.884937429589258, + "grad_norm": 1.0108050103446857, + "learning_rate": 4.7394146811573305e-06, + "loss": 0.3992, + "step": 14532 + }, + { + "epoch": 0.8849983253661359, + "grad_norm": 1.0877438190450344, + "learning_rate": 4.739379208695502e-06, + "loss": 0.4209, + "step": 14533 + }, + { + "epoch": 0.8850592211430137, + "grad_norm": 1.028004449326137, + "learning_rate": 4.739343733952241e-06, + "loss": 0.4361, + "step": 14534 + }, + { + "epoch": 0.8851201169198916, + "grad_norm": 0.9624677112586669, + "learning_rate": 4.739308256927582e-06, + "loss": 0.4773, + "step": 14535 + }, + { + "epoch": 0.8851810126967695, + "grad_norm": 0.9280116839476207, + "learning_rate": 4.739272777621563e-06, + "loss": 0.4672, + "step": 14536 + }, + { + "epoch": 0.8852419084736474, + "grad_norm": 0.9535842624709401, + "learning_rate": 4.73923729603422e-06, + "loss": 0.4545, + "step": 14537 + }, + { + "epoch": 0.8853028042505252, + "grad_norm": 1.0244863569298093, + "learning_rate": 4.739201812165588e-06, + "loss": 0.2957, + "step": 14538 + }, + { + "epoch": 0.8853637000274031, + "grad_norm": 1.051072244332287, + "learning_rate": 4.739166326015704e-06, + "loss": 0.433, + "step": 14539 + }, + { + "epoch": 0.885424595804281, + "grad_norm": 1.0650461557430668, + "learning_rate": 4.7391308375846035e-06, + "loss": 0.3717, + "step": 14540 + }, + { + "epoch": 0.8854854915811589, + "grad_norm": 0.996565039582909, + "learning_rate": 4.739095346872324e-06, + "loss": 0.4119, + "step": 14541 + }, + { + "epoch": 0.8855463873580367, + "grad_norm": 1.045540283584445, + "learning_rate": 4.739059853878899e-06, + "loss": 0.4147, + "step": 14542 + }, + { + "epoch": 0.8856072831349145, + "grad_norm": 1.0123750728287209, + "learning_rate": 4.739024358604368e-06, + "loss": 0.4381, + "step": 14543 + }, + { + "epoch": 0.8856681789117925, + "grad_norm": 0.968466843738504, + "learning_rate": 4.7389888610487645e-06, + "loss": 0.4439, + "step": 14544 + }, + { + "epoch": 0.8857290746886703, + "grad_norm": 0.9092569144038399, + "learning_rate": 4.738953361212126e-06, + "loss": 0.4834, + "step": 14545 + }, + { + "epoch": 0.8857899704655482, + "grad_norm": 0.9112460649501349, + "learning_rate": 4.738917859094488e-06, + "loss": 0.4395, + "step": 14546 + }, + { + "epoch": 0.885850866242426, + "grad_norm": 1.0231523199334638, + "learning_rate": 4.738882354695888e-06, + "loss": 0.3921, + "step": 14547 + }, + { + "epoch": 0.885911762019304, + "grad_norm": 1.0413209133824624, + "learning_rate": 4.73884684801636e-06, + "loss": 0.3243, + "step": 14548 + }, + { + "epoch": 0.8859726577961818, + "grad_norm": 0.9371246234564123, + "learning_rate": 4.738811339055943e-06, + "loss": 0.4432, + "step": 14549 + }, + { + "epoch": 0.8860335535730597, + "grad_norm": 1.0524331633837578, + "learning_rate": 4.73877582781467e-06, + "loss": 0.4003, + "step": 14550 + }, + { + "epoch": 0.8860944493499375, + "grad_norm": 1.015795811502872, + "learning_rate": 4.73874031429258e-06, + "loss": 0.4075, + "step": 14551 + }, + { + "epoch": 0.8861553451268155, + "grad_norm": 0.9708388627278547, + "learning_rate": 4.738704798489707e-06, + "loss": 0.434, + "step": 14552 + }, + { + "epoch": 0.8862162409036933, + "grad_norm": 0.9300749920371756, + "learning_rate": 4.738669280406088e-06, + "loss": 0.4218, + "step": 14553 + }, + { + "epoch": 0.8862771366805712, + "grad_norm": 0.976299281692475, + "learning_rate": 4.738633760041761e-06, + "loss": 0.4097, + "step": 14554 + }, + { + "epoch": 0.886338032457449, + "grad_norm": 1.0538313456559718, + "learning_rate": 4.738598237396759e-06, + "loss": 0.477, + "step": 14555 + }, + { + "epoch": 0.886398928234327, + "grad_norm": 0.9879550330562429, + "learning_rate": 4.738562712471119e-06, + "loss": 0.4551, + "step": 14556 + }, + { + "epoch": 0.8864598240112048, + "grad_norm": 0.9578665359830799, + "learning_rate": 4.7385271852648785e-06, + "loss": 0.3763, + "step": 14557 + }, + { + "epoch": 0.8865207197880827, + "grad_norm": 0.9067445593756864, + "learning_rate": 4.738491655778074e-06, + "loss": 0.491, + "step": 14558 + }, + { + "epoch": 0.8865816155649606, + "grad_norm": 0.9731341255582402, + "learning_rate": 4.73845612401074e-06, + "loss": 0.3917, + "step": 14559 + }, + { + "epoch": 0.8866425113418385, + "grad_norm": 0.9339879793126641, + "learning_rate": 4.738420589962912e-06, + "loss": 0.4691, + "step": 14560 + }, + { + "epoch": 0.8867034071187163, + "grad_norm": 1.0560254196146126, + "learning_rate": 4.73838505363463e-06, + "loss": 0.3551, + "step": 14561 + }, + { + "epoch": 0.8867643028955942, + "grad_norm": 1.0649635513873796, + "learning_rate": 4.738349515025926e-06, + "loss": 0.365, + "step": 14562 + }, + { + "epoch": 0.8868251986724721, + "grad_norm": 0.9748495733102497, + "learning_rate": 4.738313974136839e-06, + "loss": 0.4319, + "step": 14563 + }, + { + "epoch": 0.88688609444935, + "grad_norm": 0.988174441167375, + "learning_rate": 4.738278430967405e-06, + "loss": 0.3858, + "step": 14564 + }, + { + "epoch": 0.8869469902262278, + "grad_norm": 1.1106843820268293, + "learning_rate": 4.7382428855176585e-06, + "loss": 0.3797, + "step": 14565 + }, + { + "epoch": 0.8870078860031057, + "grad_norm": 0.9940972027690443, + "learning_rate": 4.738207337787637e-06, + "loss": 0.3502, + "step": 14566 + }, + { + "epoch": 0.8870687817799836, + "grad_norm": 0.9880489497593371, + "learning_rate": 4.7381717877773756e-06, + "loss": 0.4851, + "step": 14567 + }, + { + "epoch": 0.8871296775568615, + "grad_norm": 0.9139355845570356, + "learning_rate": 4.738136235486912e-06, + "loss": 0.4494, + "step": 14568 + }, + { + "epoch": 0.8871905733337393, + "grad_norm": 1.0939381003043427, + "learning_rate": 4.7381006809162825e-06, + "loss": 0.3689, + "step": 14569 + }, + { + "epoch": 0.8872514691106171, + "grad_norm": 0.9686008646424754, + "learning_rate": 4.73806512406552e-06, + "loss": 0.4243, + "step": 14570 + }, + { + "epoch": 0.8873123648874951, + "grad_norm": 0.9796330666161722, + "learning_rate": 4.7380295649346664e-06, + "loss": 0.3928, + "step": 14571 + }, + { + "epoch": 0.8873732606643729, + "grad_norm": 1.0046364993327526, + "learning_rate": 4.737994003523752e-06, + "loss": 0.3906, + "step": 14572 + }, + { + "epoch": 0.8874341564412508, + "grad_norm": 0.9152075073051713, + "learning_rate": 4.737958439832818e-06, + "loss": 0.4662, + "step": 14573 + }, + { + "epoch": 0.8874950522181286, + "grad_norm": 1.013044527235108, + "learning_rate": 4.737922873861898e-06, + "loss": 0.396, + "step": 14574 + }, + { + "epoch": 0.8875559479950066, + "grad_norm": 1.0499203970147273, + "learning_rate": 4.737887305611028e-06, + "loss": 0.3929, + "step": 14575 + }, + { + "epoch": 0.8876168437718844, + "grad_norm": 0.9450213899542637, + "learning_rate": 4.737851735080245e-06, + "loss": 0.407, + "step": 14576 + }, + { + "epoch": 0.8876777395487623, + "grad_norm": 1.015764705188937, + "learning_rate": 4.737816162269586e-06, + "loss": 0.471, + "step": 14577 + }, + { + "epoch": 0.8877386353256401, + "grad_norm": 0.9533202782802459, + "learning_rate": 4.737780587179086e-06, + "loss": 0.3714, + "step": 14578 + }, + { + "epoch": 0.8877995311025181, + "grad_norm": 0.9226234147268944, + "learning_rate": 4.7377450098087814e-06, + "loss": 0.4274, + "step": 14579 + }, + { + "epoch": 0.8878604268793959, + "grad_norm": 1.0396868897339313, + "learning_rate": 4.737709430158709e-06, + "loss": 0.3849, + "step": 14580 + }, + { + "epoch": 0.8879213226562738, + "grad_norm": 0.9961534043787854, + "learning_rate": 4.737673848228905e-06, + "loss": 0.4264, + "step": 14581 + }, + { + "epoch": 0.8879822184331516, + "grad_norm": 1.0009029228484168, + "learning_rate": 4.737638264019406e-06, + "loss": 0.3365, + "step": 14582 + }, + { + "epoch": 0.8880431142100296, + "grad_norm": 1.0152726504596241, + "learning_rate": 4.737602677530246e-06, + "loss": 0.3612, + "step": 14583 + }, + { + "epoch": 0.8881040099869074, + "grad_norm": 1.1067812720613983, + "learning_rate": 4.737567088761463e-06, + "loss": 0.3433, + "step": 14584 + }, + { + "epoch": 0.8881649057637853, + "grad_norm": 0.9979882513661466, + "learning_rate": 4.737531497713094e-06, + "loss": 0.4591, + "step": 14585 + }, + { + "epoch": 0.8882258015406631, + "grad_norm": 0.9514773959268985, + "learning_rate": 4.737495904385174e-06, + "loss": 0.5134, + "step": 14586 + }, + { + "epoch": 0.8882866973175411, + "grad_norm": 1.0492515787383712, + "learning_rate": 4.73746030877774e-06, + "loss": 0.4016, + "step": 14587 + }, + { + "epoch": 0.8883475930944189, + "grad_norm": 1.1161398062167265, + "learning_rate": 4.737424710890829e-06, + "loss": 0.3593, + "step": 14588 + }, + { + "epoch": 0.8884084888712968, + "grad_norm": 1.0431128722378011, + "learning_rate": 4.737389110724474e-06, + "loss": 0.3768, + "step": 14589 + }, + { + "epoch": 0.8884693846481746, + "grad_norm": 1.0005263833416898, + "learning_rate": 4.7373535082787146e-06, + "loss": 0.3559, + "step": 14590 + }, + { + "epoch": 0.8885302804250526, + "grad_norm": 0.9214437359646214, + "learning_rate": 4.737317903553587e-06, + "loss": 0.4516, + "step": 14591 + }, + { + "epoch": 0.8885911762019304, + "grad_norm": 0.998183505884088, + "learning_rate": 4.737282296549125e-06, + "loss": 0.4453, + "step": 14592 + }, + { + "epoch": 0.8886520719788082, + "grad_norm": 0.9548685817502276, + "learning_rate": 4.737246687265367e-06, + "loss": 0.4398, + "step": 14593 + }, + { + "epoch": 0.8887129677556861, + "grad_norm": 0.9961354594993673, + "learning_rate": 4.7372110757023485e-06, + "loss": 0.3978, + "step": 14594 + }, + { + "epoch": 0.888773863532564, + "grad_norm": 1.035383326108782, + "learning_rate": 4.737175461860105e-06, + "loss": 0.3908, + "step": 14595 + }, + { + "epoch": 0.8888347593094419, + "grad_norm": 1.022749155360547, + "learning_rate": 4.737139845738674e-06, + "loss": 0.3792, + "step": 14596 + }, + { + "epoch": 0.8888956550863197, + "grad_norm": 0.9357577114958264, + "learning_rate": 4.7371042273380925e-06, + "loss": 0.4248, + "step": 14597 + }, + { + "epoch": 0.8889565508631977, + "grad_norm": 1.0079603174449427, + "learning_rate": 4.737068606658395e-06, + "loss": 0.4077, + "step": 14598 + }, + { + "epoch": 0.8890174466400755, + "grad_norm": 1.003386186318365, + "learning_rate": 4.737032983699618e-06, + "loss": 0.3684, + "step": 14599 + }, + { + "epoch": 0.8890783424169534, + "grad_norm": 0.9742030279061273, + "learning_rate": 4.736997358461799e-06, + "loss": 0.4047, + "step": 14600 + }, + { + "epoch": 0.8891392381938312, + "grad_norm": 0.9460953874903515, + "learning_rate": 4.736961730944973e-06, + "loss": 0.4052, + "step": 14601 + }, + { + "epoch": 0.8892001339707092, + "grad_norm": 0.9936709614953948, + "learning_rate": 4.736926101149178e-06, + "loss": 0.3533, + "step": 14602 + }, + { + "epoch": 0.889261029747587, + "grad_norm": 0.9227494887170096, + "learning_rate": 4.736890469074449e-06, + "loss": 0.4544, + "step": 14603 + }, + { + "epoch": 0.8893219255244649, + "grad_norm": 0.9274576375511996, + "learning_rate": 4.7368548347208206e-06, + "loss": 0.407, + "step": 14604 + }, + { + "epoch": 0.8893828213013427, + "grad_norm": 0.9796836128178578, + "learning_rate": 4.7368191980883325e-06, + "loss": 0.3979, + "step": 14605 + }, + { + "epoch": 0.8894437170782207, + "grad_norm": 1.0915524606697988, + "learning_rate": 4.73678355917702e-06, + "loss": 0.3814, + "step": 14606 + }, + { + "epoch": 0.8895046128550985, + "grad_norm": 1.024656065511365, + "learning_rate": 4.736747917986918e-06, + "loss": 0.4274, + "step": 14607 + }, + { + "epoch": 0.8895655086319764, + "grad_norm": 0.9998399522715631, + "learning_rate": 4.736712274518064e-06, + "loss": 0.3751, + "step": 14608 + }, + { + "epoch": 0.8896264044088542, + "grad_norm": 1.0257726576492439, + "learning_rate": 4.7366766287704944e-06, + "loss": 0.4415, + "step": 14609 + }, + { + "epoch": 0.8896873001857322, + "grad_norm": 1.0261594184745948, + "learning_rate": 4.736640980744245e-06, + "loss": 0.3986, + "step": 14610 + }, + { + "epoch": 0.88974819596261, + "grad_norm": 1.0334604159601593, + "learning_rate": 4.736605330439352e-06, + "loss": 0.36, + "step": 14611 + }, + { + "epoch": 0.8898090917394879, + "grad_norm": 0.994751901516981, + "learning_rate": 4.736569677855852e-06, + "loss": 0.4558, + "step": 14612 + }, + { + "epoch": 0.8898699875163657, + "grad_norm": 1.019122242156888, + "learning_rate": 4.736534022993782e-06, + "loss": 0.4652, + "step": 14613 + }, + { + "epoch": 0.8899308832932437, + "grad_norm": 0.9245441315251041, + "learning_rate": 4.736498365853177e-06, + "loss": 0.402, + "step": 14614 + }, + { + "epoch": 0.8899917790701215, + "grad_norm": 0.9699428489998548, + "learning_rate": 4.736462706434073e-06, + "loss": 0.4314, + "step": 14615 + }, + { + "epoch": 0.8900526748469993, + "grad_norm": 0.9132132429191736, + "learning_rate": 4.736427044736509e-06, + "loss": 0.4377, + "step": 14616 + }, + { + "epoch": 0.8901135706238772, + "grad_norm": 1.006824293757874, + "learning_rate": 4.7363913807605184e-06, + "loss": 0.4566, + "step": 14617 + }, + { + "epoch": 0.8901744664007551, + "grad_norm": 1.0670463485532913, + "learning_rate": 4.73635571450614e-06, + "loss": 0.3668, + "step": 14618 + }, + { + "epoch": 0.890235362177633, + "grad_norm": 1.0242944012069313, + "learning_rate": 4.736320045973407e-06, + "loss": 0.3176, + "step": 14619 + }, + { + "epoch": 0.8902962579545108, + "grad_norm": 1.0121866475918242, + "learning_rate": 4.73628437516236e-06, + "loss": 0.431, + "step": 14620 + }, + { + "epoch": 0.8903571537313887, + "grad_norm": 0.994089111212936, + "learning_rate": 4.7362487020730315e-06, + "loss": 0.4041, + "step": 14621 + }, + { + "epoch": 0.8904180495082666, + "grad_norm": 1.0005333840825674, + "learning_rate": 4.73621302670546e-06, + "loss": 0.407, + "step": 14622 + }, + { + "epoch": 0.8904789452851445, + "grad_norm": 1.0018002921576594, + "learning_rate": 4.73617734905968e-06, + "loss": 0.3399, + "step": 14623 + }, + { + "epoch": 0.8905398410620223, + "grad_norm": 0.9588241043248339, + "learning_rate": 4.73614166913573e-06, + "loss": 0.4644, + "step": 14624 + }, + { + "epoch": 0.8906007368389002, + "grad_norm": 0.9648544430104163, + "learning_rate": 4.736105986933645e-06, + "loss": 0.4308, + "step": 14625 + }, + { + "epoch": 0.8906616326157781, + "grad_norm": 0.980354168900484, + "learning_rate": 4.736070302453462e-06, + "loss": 0.4581, + "step": 14626 + }, + { + "epoch": 0.890722528392656, + "grad_norm": 1.1266998185458954, + "learning_rate": 4.736034615695217e-06, + "loss": 0.4156, + "step": 14627 + }, + { + "epoch": 0.8907834241695338, + "grad_norm": 1.002022123367537, + "learning_rate": 4.735998926658946e-06, + "loss": 0.3851, + "step": 14628 + }, + { + "epoch": 0.8908443199464117, + "grad_norm": 0.9746119256458304, + "learning_rate": 4.735963235344686e-06, + "loss": 0.4715, + "step": 14629 + }, + { + "epoch": 0.8909052157232896, + "grad_norm": 0.9924099913939048, + "learning_rate": 4.735927541752473e-06, + "loss": 0.4454, + "step": 14630 + }, + { + "epoch": 0.8909661115001675, + "grad_norm": 1.0295877685709038, + "learning_rate": 4.735891845882345e-06, + "loss": 0.4584, + "step": 14631 + }, + { + "epoch": 0.8910270072770453, + "grad_norm": 1.0280112256970313, + "learning_rate": 4.7358561477343345e-06, + "loss": 0.3963, + "step": 14632 + }, + { + "epoch": 0.8910879030539232, + "grad_norm": 1.1537098841804116, + "learning_rate": 4.7358204473084816e-06, + "loss": 0.4103, + "step": 14633 + }, + { + "epoch": 0.8911487988308011, + "grad_norm": 0.9859567136798626, + "learning_rate": 4.735784744604821e-06, + "loss": 0.4091, + "step": 14634 + }, + { + "epoch": 0.891209694607679, + "grad_norm": 0.9956400892416488, + "learning_rate": 4.73574903962339e-06, + "loss": 0.4844, + "step": 14635 + }, + { + "epoch": 0.8912705903845568, + "grad_norm": 0.8785410173930918, + "learning_rate": 4.735713332364223e-06, + "loss": 0.4777, + "step": 14636 + }, + { + "epoch": 0.8913314861614346, + "grad_norm": 1.1514180265151868, + "learning_rate": 4.735677622827359e-06, + "loss": 0.3878, + "step": 14637 + }, + { + "epoch": 0.8913923819383126, + "grad_norm": 0.9380558875321965, + "learning_rate": 4.735641911012833e-06, + "loss": 0.4564, + "step": 14638 + }, + { + "epoch": 0.8914532777151905, + "grad_norm": 0.9584839151769291, + "learning_rate": 4.735606196920681e-06, + "loss": 0.4152, + "step": 14639 + }, + { + "epoch": 0.8915141734920683, + "grad_norm": 0.9925864861992524, + "learning_rate": 4.7355704805509395e-06, + "loss": 0.3937, + "step": 14640 + }, + { + "epoch": 0.8915750692689463, + "grad_norm": 0.9673594386249813, + "learning_rate": 4.735534761903646e-06, + "loss": 0.3844, + "step": 14641 + }, + { + "epoch": 0.8916359650458241, + "grad_norm": 1.034176335610969, + "learning_rate": 4.735499040978836e-06, + "loss": 0.3648, + "step": 14642 + }, + { + "epoch": 0.8916968608227019, + "grad_norm": 0.990426877633575, + "learning_rate": 4.735463317776547e-06, + "loss": 0.3715, + "step": 14643 + }, + { + "epoch": 0.8917577565995798, + "grad_norm": 0.9706206319147549, + "learning_rate": 4.735427592296813e-06, + "loss": 0.4322, + "step": 14644 + }, + { + "epoch": 0.8918186523764577, + "grad_norm": 1.0953443265410499, + "learning_rate": 4.735391864539672e-06, + "loss": 0.3846, + "step": 14645 + }, + { + "epoch": 0.8918795481533356, + "grad_norm": 1.0611402412902053, + "learning_rate": 4.73535613450516e-06, + "loss": 0.404, + "step": 14646 + }, + { + "epoch": 0.8919404439302134, + "grad_norm": 0.9673528231781577, + "learning_rate": 4.735320402193315e-06, + "loss": 0.4297, + "step": 14647 + }, + { + "epoch": 0.8920013397070913, + "grad_norm": 1.0016450466391027, + "learning_rate": 4.7352846676041706e-06, + "loss": 0.4093, + "step": 14648 + }, + { + "epoch": 0.8920622354839692, + "grad_norm": 1.0498053406900354, + "learning_rate": 4.735248930737766e-06, + "loss": 0.4057, + "step": 14649 + }, + { + "epoch": 0.8921231312608471, + "grad_norm": 0.9697142725083524, + "learning_rate": 4.735213191594136e-06, + "loss": 0.421, + "step": 14650 + }, + { + "epoch": 0.8921840270377249, + "grad_norm": 1.0404258118824574, + "learning_rate": 4.735177450173316e-06, + "loss": 0.3444, + "step": 14651 + }, + { + "epoch": 0.8922449228146028, + "grad_norm": 1.0157197500918906, + "learning_rate": 4.735141706475345e-06, + "loss": 0.4117, + "step": 14652 + }, + { + "epoch": 0.8923058185914807, + "grad_norm": 1.0414127420652828, + "learning_rate": 4.735105960500258e-06, + "loss": 0.476, + "step": 14653 + }, + { + "epoch": 0.8923667143683586, + "grad_norm": 1.0291053424446857, + "learning_rate": 4.735070212248091e-06, + "loss": 0.4641, + "step": 14654 + }, + { + "epoch": 0.8924276101452364, + "grad_norm": 1.006003237925429, + "learning_rate": 4.735034461718881e-06, + "loss": 0.4355, + "step": 14655 + }, + { + "epoch": 0.8924885059221143, + "grad_norm": 0.9688892531018964, + "learning_rate": 4.734998708912664e-06, + "loss": 0.4336, + "step": 14656 + }, + { + "epoch": 0.8925494016989922, + "grad_norm": 1.0659299277655565, + "learning_rate": 4.7349629538294775e-06, + "loss": 0.3986, + "step": 14657 + }, + { + "epoch": 0.8926102974758701, + "grad_norm": 1.0549191479425488, + "learning_rate": 4.734927196469356e-06, + "loss": 0.3714, + "step": 14658 + }, + { + "epoch": 0.8926711932527479, + "grad_norm": 1.067801520051494, + "learning_rate": 4.734891436832339e-06, + "loss": 0.3607, + "step": 14659 + }, + { + "epoch": 0.8927320890296258, + "grad_norm": 1.0073310761697678, + "learning_rate": 4.734855674918461e-06, + "loss": 0.3763, + "step": 14660 + }, + { + "epoch": 0.8927929848065037, + "grad_norm": 0.961418788464633, + "learning_rate": 4.734819910727757e-06, + "loss": 0.4342, + "step": 14661 + }, + { + "epoch": 0.8928538805833816, + "grad_norm": 0.9470477728309236, + "learning_rate": 4.734784144260265e-06, + "loss": 0.4048, + "step": 14662 + }, + { + "epoch": 0.8929147763602594, + "grad_norm": 1.0572607251670336, + "learning_rate": 4.734748375516023e-06, + "loss": 0.4263, + "step": 14663 + }, + { + "epoch": 0.8929756721371372, + "grad_norm": 1.023993439182348, + "learning_rate": 4.734712604495064e-06, + "loss": 0.3604, + "step": 14664 + }, + { + "epoch": 0.8930365679140152, + "grad_norm": 1.0543530250277289, + "learning_rate": 4.734676831197427e-06, + "loss": 0.4434, + "step": 14665 + }, + { + "epoch": 0.893097463690893, + "grad_norm": 0.9716947185638822, + "learning_rate": 4.7346410556231485e-06, + "loss": 0.3986, + "step": 14666 + }, + { + "epoch": 0.8931583594677709, + "grad_norm": 0.9679657080687325, + "learning_rate": 4.734605277772263e-06, + "loss": 0.4703, + "step": 14667 + }, + { + "epoch": 0.8932192552446487, + "grad_norm": 1.0575564692730302, + "learning_rate": 4.734569497644809e-06, + "loss": 0.3912, + "step": 14668 + }, + { + "epoch": 0.8932801510215267, + "grad_norm": 0.9727063874295306, + "learning_rate": 4.734533715240821e-06, + "loss": 0.3548, + "step": 14669 + }, + { + "epoch": 0.8933410467984045, + "grad_norm": 1.0526341085795003, + "learning_rate": 4.734497930560337e-06, + "loss": 0.3915, + "step": 14670 + }, + { + "epoch": 0.8934019425752824, + "grad_norm": 1.0130256445105974, + "learning_rate": 4.734462143603393e-06, + "loss": 0.3593, + "step": 14671 + }, + { + "epoch": 0.8934628383521602, + "grad_norm": 0.9570914577395336, + "learning_rate": 4.734426354370025e-06, + "loss": 0.4474, + "step": 14672 + }, + { + "epoch": 0.8935237341290382, + "grad_norm": 0.9781701993300214, + "learning_rate": 4.734390562860271e-06, + "loss": 0.4253, + "step": 14673 + }, + { + "epoch": 0.893584629905916, + "grad_norm": 1.0142796074820146, + "learning_rate": 4.734354769074165e-06, + "loss": 0.384, + "step": 14674 + }, + { + "epoch": 0.8936455256827939, + "grad_norm": 0.9513996891251644, + "learning_rate": 4.7343189730117465e-06, + "loss": 0.3911, + "step": 14675 + }, + { + "epoch": 0.8937064214596717, + "grad_norm": 0.9842042834339912, + "learning_rate": 4.734283174673049e-06, + "loss": 0.4261, + "step": 14676 + }, + { + "epoch": 0.8937673172365497, + "grad_norm": 1.048173042501158, + "learning_rate": 4.734247374058111e-06, + "loss": 0.4926, + "step": 14677 + }, + { + "epoch": 0.8938282130134275, + "grad_norm": 0.9452615484103196, + "learning_rate": 4.734211571166966e-06, + "loss": 0.4533, + "step": 14678 + }, + { + "epoch": 0.8938891087903054, + "grad_norm": 0.9990234184611058, + "learning_rate": 4.734175765999656e-06, + "loss": 0.47, + "step": 14679 + }, + { + "epoch": 0.8939500045671833, + "grad_norm": 1.0048850342038116, + "learning_rate": 4.734139958556212e-06, + "loss": 0.3952, + "step": 14680 + }, + { + "epoch": 0.8940109003440612, + "grad_norm": 1.0158039621397243, + "learning_rate": 4.734104148836673e-06, + "loss": 0.3723, + "step": 14681 + }, + { + "epoch": 0.894071796120939, + "grad_norm": 0.9153864253838628, + "learning_rate": 4.734068336841076e-06, + "loss": 0.4445, + "step": 14682 + }, + { + "epoch": 0.8941326918978169, + "grad_norm": 0.9327293362966655, + "learning_rate": 4.734032522569456e-06, + "loss": 0.415, + "step": 14683 + }, + { + "epoch": 0.8941935876746948, + "grad_norm": 1.0569485098883837, + "learning_rate": 4.7339967060218505e-06, + "loss": 0.4291, + "step": 14684 + }, + { + "epoch": 0.8942544834515727, + "grad_norm": 0.9852569916298514, + "learning_rate": 4.733960887198295e-06, + "loss": 0.4163, + "step": 14685 + }, + { + "epoch": 0.8943153792284505, + "grad_norm": 1.0989183686945219, + "learning_rate": 4.7339250660988265e-06, + "loss": 0.473, + "step": 14686 + }, + { + "epoch": 0.8943762750053283, + "grad_norm": 0.9252179304020337, + "learning_rate": 4.7338892427234824e-06, + "loss": 0.4169, + "step": 14687 + }, + { + "epoch": 0.8944371707822063, + "grad_norm": 1.0134298606135095, + "learning_rate": 4.733853417072298e-06, + "loss": 0.3601, + "step": 14688 + }, + { + "epoch": 0.8944980665590841, + "grad_norm": 1.0652894125660364, + "learning_rate": 4.73381758914531e-06, + "loss": 0.4269, + "step": 14689 + }, + { + "epoch": 0.894558962335962, + "grad_norm": 1.192962807106631, + "learning_rate": 4.733781758942556e-06, + "loss": 0.384, + "step": 14690 + }, + { + "epoch": 0.8946198581128398, + "grad_norm": 0.9207254629605826, + "learning_rate": 4.733745926464071e-06, + "loss": 0.4159, + "step": 14691 + }, + { + "epoch": 0.8946807538897178, + "grad_norm": 0.9469570635078458, + "learning_rate": 4.733710091709891e-06, + "loss": 0.4357, + "step": 14692 + }, + { + "epoch": 0.8947416496665956, + "grad_norm": 0.9995858184623855, + "learning_rate": 4.733674254680055e-06, + "loss": 0.3888, + "step": 14693 + }, + { + "epoch": 0.8948025454434735, + "grad_norm": 0.9513177349981244, + "learning_rate": 4.733638415374597e-06, + "loss": 0.4136, + "step": 14694 + }, + { + "epoch": 0.8948634412203513, + "grad_norm": 0.9445966473081283, + "learning_rate": 4.733602573793555e-06, + "loss": 0.4229, + "step": 14695 + }, + { + "epoch": 0.8949243369972293, + "grad_norm": 1.0139849934168779, + "learning_rate": 4.733566729936966e-06, + "loss": 0.4581, + "step": 14696 + }, + { + "epoch": 0.8949852327741071, + "grad_norm": 1.005606096421508, + "learning_rate": 4.733530883804865e-06, + "loss": 0.353, + "step": 14697 + }, + { + "epoch": 0.895046128550985, + "grad_norm": 1.0675601201156681, + "learning_rate": 4.733495035397289e-06, + "loss": 0.4253, + "step": 14698 + }, + { + "epoch": 0.8951070243278628, + "grad_norm": 1.0378307106960727, + "learning_rate": 4.733459184714276e-06, + "loss": 0.3358, + "step": 14699 + }, + { + "epoch": 0.8951679201047408, + "grad_norm": 1.0607124607269152, + "learning_rate": 4.733423331755859e-06, + "loss": 0.4902, + "step": 14700 + }, + { + "epoch": 0.8952288158816186, + "grad_norm": 1.0296967250532527, + "learning_rate": 4.733387476522077e-06, + "loss": 0.3611, + "step": 14701 + }, + { + "epoch": 0.8952897116584965, + "grad_norm": 0.9864394851781558, + "learning_rate": 4.733351619012968e-06, + "loss": 0.5028, + "step": 14702 + }, + { + "epoch": 0.8953506074353743, + "grad_norm": 1.117776067109716, + "learning_rate": 4.733315759228566e-06, + "loss": 0.3493, + "step": 14703 + }, + { + "epoch": 0.8954115032122523, + "grad_norm": 1.0121899872909834, + "learning_rate": 4.733279897168908e-06, + "loss": 0.386, + "step": 14704 + }, + { + "epoch": 0.8954723989891301, + "grad_norm": 0.9564989183900076, + "learning_rate": 4.733244032834031e-06, + "loss": 0.4569, + "step": 14705 + }, + { + "epoch": 0.895533294766008, + "grad_norm": 0.9110541267131114, + "learning_rate": 4.733208166223972e-06, + "loss": 0.3976, + "step": 14706 + }, + { + "epoch": 0.8955941905428858, + "grad_norm": 0.9727164074399459, + "learning_rate": 4.733172297338766e-06, + "loss": 0.4406, + "step": 14707 + }, + { + "epoch": 0.8956550863197638, + "grad_norm": 1.0618540373469474, + "learning_rate": 4.7331364261784515e-06, + "loss": 0.3532, + "step": 14708 + }, + { + "epoch": 0.8957159820966416, + "grad_norm": 0.9154899278624605, + "learning_rate": 4.733100552743063e-06, + "loss": 0.4731, + "step": 14709 + }, + { + "epoch": 0.8957768778735195, + "grad_norm": 1.0227803423058903, + "learning_rate": 4.733064677032639e-06, + "loss": 0.3983, + "step": 14710 + }, + { + "epoch": 0.8958377736503973, + "grad_norm": 0.9803137597237719, + "learning_rate": 4.733028799047215e-06, + "loss": 0.3979, + "step": 14711 + }, + { + "epoch": 0.8958986694272753, + "grad_norm": 0.9287760626871222, + "learning_rate": 4.732992918786828e-06, + "loss": 0.3669, + "step": 14712 + }, + { + "epoch": 0.8959595652041531, + "grad_norm": 0.9993217848536802, + "learning_rate": 4.732957036251513e-06, + "loss": 0.3857, + "step": 14713 + }, + { + "epoch": 0.8960204609810309, + "grad_norm": 0.9462551372987617, + "learning_rate": 4.732921151441308e-06, + "loss": 0.4663, + "step": 14714 + }, + { + "epoch": 0.8960813567579088, + "grad_norm": 1.0739304470077295, + "learning_rate": 4.73288526435625e-06, + "loss": 0.4159, + "step": 14715 + }, + { + "epoch": 0.8961422525347867, + "grad_norm": 0.981191994806953, + "learning_rate": 4.732849374996375e-06, + "loss": 0.4423, + "step": 14716 + }, + { + "epoch": 0.8962031483116646, + "grad_norm": 0.9984110732125402, + "learning_rate": 4.732813483361719e-06, + "loss": 0.3184, + "step": 14717 + }, + { + "epoch": 0.8962640440885424, + "grad_norm": 1.0761881238140336, + "learning_rate": 4.7327775894523196e-06, + "loss": 0.3781, + "step": 14718 + }, + { + "epoch": 0.8963249398654203, + "grad_norm": 1.0641899016074947, + "learning_rate": 4.732741693268212e-06, + "loss": 0.3845, + "step": 14719 + }, + { + "epoch": 0.8963858356422982, + "grad_norm": 1.0054020560551704, + "learning_rate": 4.732705794809434e-06, + "loss": 0.3993, + "step": 14720 + }, + { + "epoch": 0.8964467314191761, + "grad_norm": 1.0611634113338613, + "learning_rate": 4.732669894076022e-06, + "loss": 0.4468, + "step": 14721 + }, + { + "epoch": 0.8965076271960539, + "grad_norm": 1.1170818422553623, + "learning_rate": 4.732633991068012e-06, + "loss": 0.4673, + "step": 14722 + }, + { + "epoch": 0.8965685229729319, + "grad_norm": 0.991453295440396, + "learning_rate": 4.732598085785441e-06, + "loss": 0.4426, + "step": 14723 + }, + { + "epoch": 0.8966294187498097, + "grad_norm": 1.0715156529665135, + "learning_rate": 4.732562178228346e-06, + "loss": 0.3926, + "step": 14724 + }, + { + "epoch": 0.8966903145266876, + "grad_norm": 1.0419064972844359, + "learning_rate": 4.732526268396762e-06, + "loss": 0.3954, + "step": 14725 + }, + { + "epoch": 0.8967512103035654, + "grad_norm": 0.9166140599744076, + "learning_rate": 4.732490356290727e-06, + "loss": 0.3853, + "step": 14726 + }, + { + "epoch": 0.8968121060804434, + "grad_norm": 0.9231511803663452, + "learning_rate": 4.732454441910278e-06, + "loss": 0.4536, + "step": 14727 + }, + { + "epoch": 0.8968730018573212, + "grad_norm": 0.9575226763746894, + "learning_rate": 4.73241852525545e-06, + "loss": 0.4033, + "step": 14728 + }, + { + "epoch": 0.8969338976341991, + "grad_norm": 1.0010951786763047, + "learning_rate": 4.732382606326281e-06, + "loss": 0.3718, + "step": 14729 + }, + { + "epoch": 0.8969947934110769, + "grad_norm": 1.0166352354157981, + "learning_rate": 4.7323466851228065e-06, + "loss": 0.3923, + "step": 14730 + }, + { + "epoch": 0.8970556891879549, + "grad_norm": 1.0034345342145254, + "learning_rate": 4.732310761645064e-06, + "loss": 0.3744, + "step": 14731 + }, + { + "epoch": 0.8971165849648327, + "grad_norm": 1.0100774760039217, + "learning_rate": 4.732274835893089e-06, + "loss": 0.3482, + "step": 14732 + }, + { + "epoch": 0.8971774807417106, + "grad_norm": 0.929102043103311, + "learning_rate": 4.73223890786692e-06, + "loss": 0.4223, + "step": 14733 + }, + { + "epoch": 0.8972383765185884, + "grad_norm": 0.9597049810394762, + "learning_rate": 4.732202977566592e-06, + "loss": 0.3863, + "step": 14734 + }, + { + "epoch": 0.8972992722954664, + "grad_norm": 1.0724699609271977, + "learning_rate": 4.732167044992142e-06, + "loss": 0.4111, + "step": 14735 + }, + { + "epoch": 0.8973601680723442, + "grad_norm": 1.0068181649095078, + "learning_rate": 4.732131110143606e-06, + "loss": 0.4028, + "step": 14736 + }, + { + "epoch": 0.897421063849222, + "grad_norm": 1.0886380642541515, + "learning_rate": 4.732095173021022e-06, + "loss": 0.3789, + "step": 14737 + }, + { + "epoch": 0.8974819596260999, + "grad_norm": 1.0239022753990905, + "learning_rate": 4.732059233624426e-06, + "loss": 0.4187, + "step": 14738 + }, + { + "epoch": 0.8975428554029778, + "grad_norm": 1.1160763073008986, + "learning_rate": 4.732023291953854e-06, + "loss": 0.3623, + "step": 14739 + }, + { + "epoch": 0.8976037511798557, + "grad_norm": 1.0418638385193137, + "learning_rate": 4.731987348009342e-06, + "loss": 0.3544, + "step": 14740 + }, + { + "epoch": 0.8976646469567335, + "grad_norm": 1.0604419704570462, + "learning_rate": 4.731951401790929e-06, + "loss": 0.4797, + "step": 14741 + }, + { + "epoch": 0.8977255427336114, + "grad_norm": 1.0459616370909977, + "learning_rate": 4.73191545329865e-06, + "loss": 0.3644, + "step": 14742 + }, + { + "epoch": 0.8977864385104893, + "grad_norm": 1.0503479894051482, + "learning_rate": 4.731879502532541e-06, + "loss": 0.3587, + "step": 14743 + }, + { + "epoch": 0.8978473342873672, + "grad_norm": 1.0438555838284191, + "learning_rate": 4.731843549492642e-06, + "loss": 0.3974, + "step": 14744 + }, + { + "epoch": 0.897908230064245, + "grad_norm": 1.055149610260259, + "learning_rate": 4.731807594178985e-06, + "loss": 0.4164, + "step": 14745 + }, + { + "epoch": 0.8979691258411229, + "grad_norm": 0.9914131120730288, + "learning_rate": 4.73177163659161e-06, + "loss": 0.421, + "step": 14746 + }, + { + "epoch": 0.8980300216180008, + "grad_norm": 0.9707508112285465, + "learning_rate": 4.731735676730552e-06, + "loss": 0.4512, + "step": 14747 + }, + { + "epoch": 0.8980909173948787, + "grad_norm": 0.869955730811528, + "learning_rate": 4.731699714595847e-06, + "loss": 0.4094, + "step": 14748 + }, + { + "epoch": 0.8981518131717565, + "grad_norm": 1.008107194814038, + "learning_rate": 4.731663750187535e-06, + "loss": 0.4271, + "step": 14749 + }, + { + "epoch": 0.8982127089486344, + "grad_norm": 1.0213011302098292, + "learning_rate": 4.731627783505649e-06, + "loss": 0.4712, + "step": 14750 + }, + { + "epoch": 0.8982736047255123, + "grad_norm": 0.9542058023358428, + "learning_rate": 4.731591814550227e-06, + "loss": 0.4665, + "step": 14751 + }, + { + "epoch": 0.8983345005023902, + "grad_norm": 0.9707793814382296, + "learning_rate": 4.731555843321306e-06, + "loss": 0.3811, + "step": 14752 + }, + { + "epoch": 0.898395396279268, + "grad_norm": 1.0552986344716142, + "learning_rate": 4.731519869818923e-06, + "loss": 0.3998, + "step": 14753 + }, + { + "epoch": 0.8984562920561459, + "grad_norm": 0.9807014870650854, + "learning_rate": 4.731483894043112e-06, + "loss": 0.4542, + "step": 14754 + }, + { + "epoch": 0.8985171878330238, + "grad_norm": 1.1314993627728256, + "learning_rate": 4.731447915993913e-06, + "loss": 0.4667, + "step": 14755 + }, + { + "epoch": 0.8985780836099017, + "grad_norm": 0.9142938121223234, + "learning_rate": 4.731411935671361e-06, + "loss": 0.432, + "step": 14756 + }, + { + "epoch": 0.8986389793867795, + "grad_norm": 0.9913725748276836, + "learning_rate": 4.731375953075492e-06, + "loss": 0.3615, + "step": 14757 + }, + { + "epoch": 0.8986998751636573, + "grad_norm": 0.8774392196655394, + "learning_rate": 4.731339968206346e-06, + "loss": 0.4273, + "step": 14758 + }, + { + "epoch": 0.8987607709405353, + "grad_norm": 1.1518554928211755, + "learning_rate": 4.731303981063955e-06, + "loss": 0.3939, + "step": 14759 + }, + { + "epoch": 0.8988216667174131, + "grad_norm": 1.0455817464090764, + "learning_rate": 4.731267991648358e-06, + "loss": 0.3957, + "step": 14760 + }, + { + "epoch": 0.898882562494291, + "grad_norm": 1.042051472600766, + "learning_rate": 4.731231999959592e-06, + "loss": 0.4704, + "step": 14761 + }, + { + "epoch": 0.898943458271169, + "grad_norm": 1.0015059015367607, + "learning_rate": 4.731196005997694e-06, + "loss": 0.3673, + "step": 14762 + }, + { + "epoch": 0.8990043540480468, + "grad_norm": 1.0388060417198681, + "learning_rate": 4.7311600097627e-06, + "loss": 0.4578, + "step": 14763 + }, + { + "epoch": 0.8990652498249246, + "grad_norm": 0.9964583984724342, + "learning_rate": 4.7311240112546455e-06, + "loss": 0.4447, + "step": 14764 + }, + { + "epoch": 0.8991261456018025, + "grad_norm": 1.0195197892620804, + "learning_rate": 4.731088010473568e-06, + "loss": 0.3801, + "step": 14765 + }, + { + "epoch": 0.8991870413786804, + "grad_norm": 1.038309092796527, + "learning_rate": 4.731052007419505e-06, + "loss": 0.4433, + "step": 14766 + }, + { + "epoch": 0.8992479371555583, + "grad_norm": 1.0227243909538348, + "learning_rate": 4.731016002092493e-06, + "loss": 0.4236, + "step": 14767 + }, + { + "epoch": 0.8993088329324361, + "grad_norm": 0.9666285478380208, + "learning_rate": 4.730979994492568e-06, + "loss": 0.411, + "step": 14768 + }, + { + "epoch": 0.899369728709314, + "grad_norm": 0.9780046206951722, + "learning_rate": 4.730943984619767e-06, + "loss": 0.4905, + "step": 14769 + }, + { + "epoch": 0.8994306244861919, + "grad_norm": 1.0567119155366314, + "learning_rate": 4.7309079724741254e-06, + "loss": 0.4109, + "step": 14770 + }, + { + "epoch": 0.8994915202630698, + "grad_norm": 0.992353812060162, + "learning_rate": 4.730871958055683e-06, + "loss": 0.3801, + "step": 14771 + }, + { + "epoch": 0.8995524160399476, + "grad_norm": 1.020114426196907, + "learning_rate": 4.7308359413644735e-06, + "loss": 0.3626, + "step": 14772 + }, + { + "epoch": 0.8996133118168255, + "grad_norm": 1.1487624329203525, + "learning_rate": 4.730799922400535e-06, + "loss": 0.3897, + "step": 14773 + }, + { + "epoch": 0.8996742075937034, + "grad_norm": 0.9925832630156053, + "learning_rate": 4.730763901163904e-06, + "loss": 0.3653, + "step": 14774 + }, + { + "epoch": 0.8997351033705813, + "grad_norm": 0.9864631293247157, + "learning_rate": 4.730727877654617e-06, + "loss": 0.5114, + "step": 14775 + }, + { + "epoch": 0.8997959991474591, + "grad_norm": 1.153742223786449, + "learning_rate": 4.730691851872712e-06, + "loss": 0.446, + "step": 14776 + }, + { + "epoch": 0.899856894924337, + "grad_norm": 0.9973605172942891, + "learning_rate": 4.7306558238182235e-06, + "loss": 0.3845, + "step": 14777 + }, + { + "epoch": 0.8999177907012149, + "grad_norm": 1.0194518511812543, + "learning_rate": 4.730619793491189e-06, + "loss": 0.4354, + "step": 14778 + }, + { + "epoch": 0.8999786864780928, + "grad_norm": 0.9204705537472149, + "learning_rate": 4.730583760891645e-06, + "loss": 0.4347, + "step": 14779 + }, + { + "epoch": 0.9000395822549706, + "grad_norm": 1.052898337078873, + "learning_rate": 4.7305477260196295e-06, + "loss": 0.3883, + "step": 14780 + }, + { + "epoch": 0.9001004780318484, + "grad_norm": 0.9497108228291742, + "learning_rate": 4.730511688875178e-06, + "loss": 0.4295, + "step": 14781 + }, + { + "epoch": 0.9001613738087264, + "grad_norm": 1.016478442477868, + "learning_rate": 4.730475649458328e-06, + "loss": 0.4333, + "step": 14782 + }, + { + "epoch": 0.9002222695856043, + "grad_norm": 0.9726688595511491, + "learning_rate": 4.730439607769115e-06, + "loss": 0.4604, + "step": 14783 + }, + { + "epoch": 0.9002831653624821, + "grad_norm": 1.121019950945538, + "learning_rate": 4.730403563807577e-06, + "loss": 0.4173, + "step": 14784 + }, + { + "epoch": 0.9003440611393599, + "grad_norm": 1.0410375759983872, + "learning_rate": 4.730367517573751e-06, + "loss": 0.4372, + "step": 14785 + }, + { + "epoch": 0.9004049569162379, + "grad_norm": 0.9305528610280218, + "learning_rate": 4.730331469067672e-06, + "loss": 0.5091, + "step": 14786 + }, + { + "epoch": 0.9004658526931157, + "grad_norm": 0.9471030758191482, + "learning_rate": 4.730295418289377e-06, + "loss": 0.4621, + "step": 14787 + }, + { + "epoch": 0.9005267484699936, + "grad_norm": 0.9306959989767687, + "learning_rate": 4.730259365238904e-06, + "loss": 0.4366, + "step": 14788 + }, + { + "epoch": 0.9005876442468714, + "grad_norm": 0.9597833773202952, + "learning_rate": 4.7302233099162895e-06, + "loss": 0.3805, + "step": 14789 + }, + { + "epoch": 0.9006485400237494, + "grad_norm": 1.0749522932404074, + "learning_rate": 4.73018725232157e-06, + "loss": 0.4183, + "step": 14790 + }, + { + "epoch": 0.9007094358006272, + "grad_norm": 0.9990489339249387, + "learning_rate": 4.730151192454781e-06, + "loss": 0.3973, + "step": 14791 + }, + { + "epoch": 0.9007703315775051, + "grad_norm": 1.0013704592495516, + "learning_rate": 4.730115130315962e-06, + "loss": 0.4219, + "step": 14792 + }, + { + "epoch": 0.9008312273543829, + "grad_norm": 0.985527784796445, + "learning_rate": 4.7300790659051475e-06, + "loss": 0.3971, + "step": 14793 + }, + { + "epoch": 0.9008921231312609, + "grad_norm": 0.9798391551459623, + "learning_rate": 4.730042999222374e-06, + "loss": 0.4729, + "step": 14794 + }, + { + "epoch": 0.9009530189081387, + "grad_norm": 0.9363670129373202, + "learning_rate": 4.73000693026768e-06, + "loss": 0.4811, + "step": 14795 + }, + { + "epoch": 0.9010139146850166, + "grad_norm": 1.0301040303411657, + "learning_rate": 4.7299708590411005e-06, + "loss": 0.4579, + "step": 14796 + }, + { + "epoch": 0.9010748104618944, + "grad_norm": 1.015743690492752, + "learning_rate": 4.729934785542673e-06, + "loss": 0.4439, + "step": 14797 + }, + { + "epoch": 0.9011357062387724, + "grad_norm": 0.9411048001831362, + "learning_rate": 4.7298987097724344e-06, + "loss": 0.384, + "step": 14798 + }, + { + "epoch": 0.9011966020156502, + "grad_norm": 0.9372641697109522, + "learning_rate": 4.729862631730423e-06, + "loss": 0.5279, + "step": 14799 + }, + { + "epoch": 0.9012574977925281, + "grad_norm": 1.0424258859928048, + "learning_rate": 4.729826551416672e-06, + "loss": 0.3492, + "step": 14800 + }, + { + "epoch": 0.9013183935694059, + "grad_norm": 1.0005954731815978, + "learning_rate": 4.72979046883122e-06, + "loss": 0.4516, + "step": 14801 + }, + { + "epoch": 0.9013792893462839, + "grad_norm": 1.089272883168513, + "learning_rate": 4.729754383974105e-06, + "loss": 0.391, + "step": 14802 + }, + { + "epoch": 0.9014401851231617, + "grad_norm": 1.0737590575313611, + "learning_rate": 4.729718296845362e-06, + "loss": 0.3713, + "step": 14803 + }, + { + "epoch": 0.9015010809000396, + "grad_norm": 0.9571207211635987, + "learning_rate": 4.729682207445029e-06, + "loss": 0.4572, + "step": 14804 + }, + { + "epoch": 0.9015619766769175, + "grad_norm": 0.9990047028792214, + "learning_rate": 4.729646115773141e-06, + "loss": 0.3632, + "step": 14805 + }, + { + "epoch": 0.9016228724537954, + "grad_norm": 1.003551484855728, + "learning_rate": 4.729610021829737e-06, + "loss": 0.4016, + "step": 14806 + }, + { + "epoch": 0.9016837682306732, + "grad_norm": 1.0119161941646717, + "learning_rate": 4.729573925614852e-06, + "loss": 0.429, + "step": 14807 + }, + { + "epoch": 0.901744664007551, + "grad_norm": 0.9737816529391146, + "learning_rate": 4.729537827128523e-06, + "loss": 0.4639, + "step": 14808 + }, + { + "epoch": 0.901805559784429, + "grad_norm": 0.9910335454828204, + "learning_rate": 4.729501726370788e-06, + "loss": 0.4157, + "step": 14809 + }, + { + "epoch": 0.9018664555613068, + "grad_norm": 1.0336034659241622, + "learning_rate": 4.729465623341683e-06, + "loss": 0.375, + "step": 14810 + }, + { + "epoch": 0.9019273513381847, + "grad_norm": 0.9388442506544576, + "learning_rate": 4.729429518041244e-06, + "loss": 0.4744, + "step": 14811 + }, + { + "epoch": 0.9019882471150625, + "grad_norm": 0.9475017236710784, + "learning_rate": 4.729393410469509e-06, + "loss": 0.3941, + "step": 14812 + }, + { + "epoch": 0.9020491428919405, + "grad_norm": 0.8853918380031226, + "learning_rate": 4.729357300626514e-06, + "loss": 0.4197, + "step": 14813 + }, + { + "epoch": 0.9021100386688183, + "grad_norm": 1.0385634609507775, + "learning_rate": 4.729321188512297e-06, + "loss": 0.3792, + "step": 14814 + }, + { + "epoch": 0.9021709344456962, + "grad_norm": 0.944215505421361, + "learning_rate": 4.729285074126894e-06, + "loss": 0.4251, + "step": 14815 + }, + { + "epoch": 0.902231830222574, + "grad_norm": 1.123894122655681, + "learning_rate": 4.7292489574703405e-06, + "loss": 0.4577, + "step": 14816 + }, + { + "epoch": 0.902292725999452, + "grad_norm": 1.020361586872317, + "learning_rate": 4.729212838542676e-06, + "loss": 0.3863, + "step": 14817 + }, + { + "epoch": 0.9023536217763298, + "grad_norm": 1.0914122132781925, + "learning_rate": 4.729176717343934e-06, + "loss": 0.397, + "step": 14818 + }, + { + "epoch": 0.9024145175532077, + "grad_norm": 0.9390169430525022, + "learning_rate": 4.7291405938741545e-06, + "loss": 0.427, + "step": 14819 + }, + { + "epoch": 0.9024754133300855, + "grad_norm": 0.9797231605951189, + "learning_rate": 4.729104468133373e-06, + "loss": 0.4605, + "step": 14820 + }, + { + "epoch": 0.9025363091069635, + "grad_norm": 0.9655470762728652, + "learning_rate": 4.7290683401216256e-06, + "loss": 0.4123, + "step": 14821 + }, + { + "epoch": 0.9025972048838413, + "grad_norm": 0.9626369490201921, + "learning_rate": 4.72903220983895e-06, + "loss": 0.3489, + "step": 14822 + }, + { + "epoch": 0.9026581006607192, + "grad_norm": 0.9406730116807526, + "learning_rate": 4.728996077285382e-06, + "loss": 0.4052, + "step": 14823 + }, + { + "epoch": 0.902718996437597, + "grad_norm": 0.9420504430850849, + "learning_rate": 4.72895994246096e-06, + "loss": 0.4306, + "step": 14824 + }, + { + "epoch": 0.902779892214475, + "grad_norm": 0.9833369101726708, + "learning_rate": 4.72892380536572e-06, + "loss": 0.4253, + "step": 14825 + }, + { + "epoch": 0.9028407879913528, + "grad_norm": 1.0193364339021498, + "learning_rate": 4.728887665999698e-06, + "loss": 0.4099, + "step": 14826 + }, + { + "epoch": 0.9029016837682307, + "grad_norm": 0.9605888922180169, + "learning_rate": 4.728851524362932e-06, + "loss": 0.3976, + "step": 14827 + }, + { + "epoch": 0.9029625795451085, + "grad_norm": 1.0090926751965617, + "learning_rate": 4.728815380455458e-06, + "loss": 0.4046, + "step": 14828 + }, + { + "epoch": 0.9030234753219865, + "grad_norm": 1.0348669491286246, + "learning_rate": 4.7287792342773146e-06, + "loss": 0.4883, + "step": 14829 + }, + { + "epoch": 0.9030843710988643, + "grad_norm": 0.9738730698790232, + "learning_rate": 4.728743085828535e-06, + "loss": 0.4454, + "step": 14830 + }, + { + "epoch": 0.9031452668757421, + "grad_norm": 0.8365110598024909, + "learning_rate": 4.72870693510916e-06, + "loss": 0.4833, + "step": 14831 + }, + { + "epoch": 0.90320616265262, + "grad_norm": 1.0372147133093788, + "learning_rate": 4.728670782119224e-06, + "loss": 0.427, + "step": 14832 + }, + { + "epoch": 0.903267058429498, + "grad_norm": 1.0013894293994208, + "learning_rate": 4.728634626858765e-06, + "loss": 0.4732, + "step": 14833 + }, + { + "epoch": 0.9033279542063758, + "grad_norm": 0.9582368589481151, + "learning_rate": 4.72859846932782e-06, + "loss": 0.4188, + "step": 14834 + }, + { + "epoch": 0.9033888499832536, + "grad_norm": 0.9969409881088852, + "learning_rate": 4.728562309526424e-06, + "loss": 0.4663, + "step": 14835 + }, + { + "epoch": 0.9034497457601315, + "grad_norm": 0.9838821781498216, + "learning_rate": 4.728526147454615e-06, + "loss": 0.4162, + "step": 14836 + }, + { + "epoch": 0.9035106415370094, + "grad_norm": 1.0496801264888749, + "learning_rate": 4.728489983112431e-06, + "loss": 0.4072, + "step": 14837 + }, + { + "epoch": 0.9035715373138873, + "grad_norm": 0.9652857517272564, + "learning_rate": 4.728453816499907e-06, + "loss": 0.4095, + "step": 14838 + }, + { + "epoch": 0.9036324330907651, + "grad_norm": 1.085994976831757, + "learning_rate": 4.728417647617081e-06, + "loss": 0.325, + "step": 14839 + }, + { + "epoch": 0.903693328867643, + "grad_norm": 1.0458537778075485, + "learning_rate": 4.728381476463989e-06, + "loss": 0.3917, + "step": 14840 + }, + { + "epoch": 0.9037542246445209, + "grad_norm": 0.9855108911461644, + "learning_rate": 4.7283453030406676e-06, + "loss": 0.4566, + "step": 14841 + }, + { + "epoch": 0.9038151204213988, + "grad_norm": 1.034201134530925, + "learning_rate": 4.728309127347156e-06, + "loss": 0.3468, + "step": 14842 + }, + { + "epoch": 0.9038760161982766, + "grad_norm": 1.1342085995779174, + "learning_rate": 4.7282729493834875e-06, + "loss": 0.4119, + "step": 14843 + }, + { + "epoch": 0.9039369119751546, + "grad_norm": 1.1159248996923845, + "learning_rate": 4.728236769149702e-06, + "loss": 0.363, + "step": 14844 + }, + { + "epoch": 0.9039978077520324, + "grad_norm": 1.0032037054143343, + "learning_rate": 4.728200586645836e-06, + "loss": 0.4182, + "step": 14845 + }, + { + "epoch": 0.9040587035289103, + "grad_norm": 1.0352299163496819, + "learning_rate": 4.7281644018719245e-06, + "loss": 0.3823, + "step": 14846 + }, + { + "epoch": 0.9041195993057881, + "grad_norm": 1.0404633123947689, + "learning_rate": 4.728128214828005e-06, + "loss": 0.3801, + "step": 14847 + }, + { + "epoch": 0.9041804950826661, + "grad_norm": 1.0010231774824867, + "learning_rate": 4.728092025514116e-06, + "loss": 0.397, + "step": 14848 + }, + { + "epoch": 0.9042413908595439, + "grad_norm": 0.9075356793895574, + "learning_rate": 4.728055833930292e-06, + "loss": 0.4762, + "step": 14849 + }, + { + "epoch": 0.9043022866364218, + "grad_norm": 0.9490866445502599, + "learning_rate": 4.728019640076572e-06, + "loss": 0.4392, + "step": 14850 + }, + { + "epoch": 0.9043631824132996, + "grad_norm": 0.9732598437708929, + "learning_rate": 4.727983443952991e-06, + "loss": 0.3716, + "step": 14851 + }, + { + "epoch": 0.9044240781901776, + "grad_norm": 1.0630556064486916, + "learning_rate": 4.7279472455595875e-06, + "loss": 0.3414, + "step": 14852 + }, + { + "epoch": 0.9044849739670554, + "grad_norm": 0.9759529056694707, + "learning_rate": 4.727911044896397e-06, + "loss": 0.4099, + "step": 14853 + }, + { + "epoch": 0.9045458697439333, + "grad_norm": 0.9944907989341534, + "learning_rate": 4.727874841963458e-06, + "loss": 0.3671, + "step": 14854 + }, + { + "epoch": 0.9046067655208111, + "grad_norm": 1.0596977910478598, + "learning_rate": 4.727838636760806e-06, + "loss": 0.3602, + "step": 14855 + }, + { + "epoch": 0.904667661297689, + "grad_norm": 1.028935036924502, + "learning_rate": 4.727802429288478e-06, + "loss": 0.3976, + "step": 14856 + }, + { + "epoch": 0.9047285570745669, + "grad_norm": 0.9426398728100358, + "learning_rate": 4.727766219546511e-06, + "loss": 0.3739, + "step": 14857 + }, + { + "epoch": 0.9047894528514447, + "grad_norm": 0.9583587547224909, + "learning_rate": 4.727730007534943e-06, + "loss": 0.4795, + "step": 14858 + }, + { + "epoch": 0.9048503486283226, + "grad_norm": 1.0310276492831034, + "learning_rate": 4.7276937932538095e-06, + "loss": 0.3823, + "step": 14859 + }, + { + "epoch": 0.9049112444052005, + "grad_norm": 1.0699780587773946, + "learning_rate": 4.727657576703148e-06, + "loss": 0.4116, + "step": 14860 + }, + { + "epoch": 0.9049721401820784, + "grad_norm": 0.9736711275377722, + "learning_rate": 4.727621357882995e-06, + "loss": 0.4026, + "step": 14861 + }, + { + "epoch": 0.9050330359589562, + "grad_norm": 1.0061309927956792, + "learning_rate": 4.7275851367933875e-06, + "loss": 0.3773, + "step": 14862 + }, + { + "epoch": 0.9050939317358341, + "grad_norm": 0.9711557650724193, + "learning_rate": 4.7275489134343635e-06, + "loss": 0.4398, + "step": 14863 + }, + { + "epoch": 0.905154827512712, + "grad_norm": 1.0024806697650968, + "learning_rate": 4.727512687805959e-06, + "loss": 0.4187, + "step": 14864 + }, + { + "epoch": 0.9052157232895899, + "grad_norm": 1.0013663244918263, + "learning_rate": 4.72747645990821e-06, + "loss": 0.4306, + "step": 14865 + }, + { + "epoch": 0.9052766190664677, + "grad_norm": 1.023665389934853, + "learning_rate": 4.727440229741155e-06, + "loss": 0.4026, + "step": 14866 + }, + { + "epoch": 0.9053375148433456, + "grad_norm": 1.1612437642212712, + "learning_rate": 4.727403997304829e-06, + "loss": 0.3983, + "step": 14867 + }, + { + "epoch": 0.9053984106202235, + "grad_norm": 1.027436407402743, + "learning_rate": 4.727367762599271e-06, + "loss": 0.4136, + "step": 14868 + }, + { + "epoch": 0.9054593063971014, + "grad_norm": 0.9665913997564279, + "learning_rate": 4.727331525624517e-06, + "loss": 0.4425, + "step": 14869 + }, + { + "epoch": 0.9055202021739792, + "grad_norm": 0.9969291776886547, + "learning_rate": 4.727295286380604e-06, + "loss": 0.4151, + "step": 14870 + }, + { + "epoch": 0.9055810979508571, + "grad_norm": 0.950909838693013, + "learning_rate": 4.727259044867569e-06, + "loss": 0.3989, + "step": 14871 + }, + { + "epoch": 0.905641993727735, + "grad_norm": 1.0029000498267313, + "learning_rate": 4.727222801085448e-06, + "loss": 0.4143, + "step": 14872 + }, + { + "epoch": 0.9057028895046129, + "grad_norm": 1.0478629657393956, + "learning_rate": 4.727186555034279e-06, + "loss": 0.3761, + "step": 14873 + }, + { + "epoch": 0.9057637852814907, + "grad_norm": 0.9498415255920489, + "learning_rate": 4.727150306714099e-06, + "loss": 0.4175, + "step": 14874 + }, + { + "epoch": 0.9058246810583686, + "grad_norm": 0.9149214422196207, + "learning_rate": 4.727114056124945e-06, + "loss": 0.4417, + "step": 14875 + }, + { + "epoch": 0.9058855768352465, + "grad_norm": 1.0313091451358256, + "learning_rate": 4.727077803266853e-06, + "loss": 0.3824, + "step": 14876 + }, + { + "epoch": 0.9059464726121244, + "grad_norm": 1.0974724817162929, + "learning_rate": 4.7270415481398606e-06, + "loss": 0.4002, + "step": 14877 + }, + { + "epoch": 0.9060073683890022, + "grad_norm": 1.0294902623499242, + "learning_rate": 4.727005290744004e-06, + "loss": 0.3516, + "step": 14878 + }, + { + "epoch": 0.90606826416588, + "grad_norm": 1.0250604563254309, + "learning_rate": 4.726969031079321e-06, + "loss": 0.3738, + "step": 14879 + }, + { + "epoch": 0.906129159942758, + "grad_norm": 0.9445220667005653, + "learning_rate": 4.726932769145848e-06, + "loss": 0.3809, + "step": 14880 + }, + { + "epoch": 0.9061900557196358, + "grad_norm": 1.0904720058074975, + "learning_rate": 4.726896504943623e-06, + "loss": 0.3751, + "step": 14881 + }, + { + "epoch": 0.9062509514965137, + "grad_norm": 1.0773976174477522, + "learning_rate": 4.726860238472681e-06, + "loss": 0.418, + "step": 14882 + }, + { + "epoch": 0.9063118472733915, + "grad_norm": 0.9931250540890058, + "learning_rate": 4.7268239697330615e-06, + "loss": 0.4152, + "step": 14883 + }, + { + "epoch": 0.9063727430502695, + "grad_norm": 0.9292463845702691, + "learning_rate": 4.726787698724799e-06, + "loss": 0.4611, + "step": 14884 + }, + { + "epoch": 0.9064336388271473, + "grad_norm": 1.0711182794464353, + "learning_rate": 4.726751425447932e-06, + "loss": 0.444, + "step": 14885 + }, + { + "epoch": 0.9064945346040252, + "grad_norm": 1.0490070329998662, + "learning_rate": 4.7267151499024964e-06, + "loss": 0.3973, + "step": 14886 + }, + { + "epoch": 0.9065554303809031, + "grad_norm": 1.070607015584213, + "learning_rate": 4.726678872088531e-06, + "loss": 0.4148, + "step": 14887 + }, + { + "epoch": 0.906616326157781, + "grad_norm": 1.0285693515721346, + "learning_rate": 4.72664259200607e-06, + "loss": 0.3866, + "step": 14888 + }, + { + "epoch": 0.9066772219346588, + "grad_norm": 1.1383177830048818, + "learning_rate": 4.726606309655152e-06, + "loss": 0.4067, + "step": 14889 + }, + { + "epoch": 0.9067381177115367, + "grad_norm": 1.0973012610933814, + "learning_rate": 4.726570025035814e-06, + "loss": 0.3492, + "step": 14890 + }, + { + "epoch": 0.9067990134884146, + "grad_norm": 1.0579636713429232, + "learning_rate": 4.726533738148093e-06, + "loss": 0.3404, + "step": 14891 + }, + { + "epoch": 0.9068599092652925, + "grad_norm": 1.077943751573989, + "learning_rate": 4.726497448992026e-06, + "loss": 0.4367, + "step": 14892 + }, + { + "epoch": 0.9069208050421703, + "grad_norm": 0.9082715592367657, + "learning_rate": 4.726461157567649e-06, + "loss": 0.4658, + "step": 14893 + }, + { + "epoch": 0.9069817008190482, + "grad_norm": 0.9722878506491679, + "learning_rate": 4.726424863875e-06, + "loss": 0.4444, + "step": 14894 + }, + { + "epoch": 0.9070425965959261, + "grad_norm": 1.0301734846403434, + "learning_rate": 4.726388567914115e-06, + "loss": 0.3668, + "step": 14895 + }, + { + "epoch": 0.907103492372804, + "grad_norm": 0.96866534592879, + "learning_rate": 4.7263522696850325e-06, + "loss": 0.3944, + "step": 14896 + }, + { + "epoch": 0.9071643881496818, + "grad_norm": 0.9557392598218502, + "learning_rate": 4.726315969187788e-06, + "loss": 0.4246, + "step": 14897 + }, + { + "epoch": 0.9072252839265597, + "grad_norm": 0.9711393365910623, + "learning_rate": 4.72627966642242e-06, + "loss": 0.418, + "step": 14898 + }, + { + "epoch": 0.9072861797034376, + "grad_norm": 1.1109768572236163, + "learning_rate": 4.726243361388963e-06, + "loss": 0.3611, + "step": 14899 + }, + { + "epoch": 0.9073470754803155, + "grad_norm": 1.0241070257837055, + "learning_rate": 4.726207054087457e-06, + "loss": 0.4072, + "step": 14900 + }, + { + "epoch": 0.9074079712571933, + "grad_norm": 0.8899440320061562, + "learning_rate": 4.726170744517937e-06, + "loss": 0.5137, + "step": 14901 + }, + { + "epoch": 0.9074688670340711, + "grad_norm": 0.9870884294764678, + "learning_rate": 4.72613443268044e-06, + "loss": 0.3949, + "step": 14902 + }, + { + "epoch": 0.9075297628109491, + "grad_norm": 0.9593826690627496, + "learning_rate": 4.726098118575005e-06, + "loss": 0.4355, + "step": 14903 + }, + { + "epoch": 0.907590658587827, + "grad_norm": 0.9624023011116711, + "learning_rate": 4.7260618022016655e-06, + "loss": 0.4374, + "step": 14904 + }, + { + "epoch": 0.9076515543647048, + "grad_norm": 0.8912388616667314, + "learning_rate": 4.726025483560463e-06, + "loss": 0.4364, + "step": 14905 + }, + { + "epoch": 0.9077124501415826, + "grad_norm": 0.9611205496066912, + "learning_rate": 4.72598916265143e-06, + "loss": 0.4073, + "step": 14906 + }, + { + "epoch": 0.9077733459184606, + "grad_norm": 1.0248610125825077, + "learning_rate": 4.725952839474606e-06, + "loss": 0.4031, + "step": 14907 + }, + { + "epoch": 0.9078342416953384, + "grad_norm": 0.9833748002470936, + "learning_rate": 4.725916514030028e-06, + "loss": 0.3966, + "step": 14908 + }, + { + "epoch": 0.9078951374722163, + "grad_norm": 1.0282781195366881, + "learning_rate": 4.725880186317732e-06, + "loss": 0.4381, + "step": 14909 + }, + { + "epoch": 0.9079560332490941, + "grad_norm": 1.0437459347416853, + "learning_rate": 4.725843856337755e-06, + "loss": 0.4252, + "step": 14910 + }, + { + "epoch": 0.9080169290259721, + "grad_norm": 1.010249868744938, + "learning_rate": 4.725807524090136e-06, + "loss": 0.3685, + "step": 14911 + }, + { + "epoch": 0.9080778248028499, + "grad_norm": 1.0338705863167936, + "learning_rate": 4.725771189574909e-06, + "loss": 0.3647, + "step": 14912 + }, + { + "epoch": 0.9081387205797278, + "grad_norm": 1.0742468086518162, + "learning_rate": 4.725734852792114e-06, + "loss": 0.4049, + "step": 14913 + }, + { + "epoch": 0.9081996163566056, + "grad_norm": 1.056904683874264, + "learning_rate": 4.725698513741786e-06, + "loss": 0.38, + "step": 14914 + }, + { + "epoch": 0.9082605121334836, + "grad_norm": 1.012539450188778, + "learning_rate": 4.725662172423963e-06, + "loss": 0.3921, + "step": 14915 + }, + { + "epoch": 0.9083214079103614, + "grad_norm": 0.9878089713652595, + "learning_rate": 4.7256258288386806e-06, + "loss": 0.3578, + "step": 14916 + }, + { + "epoch": 0.9083823036872393, + "grad_norm": 1.02997036398044, + "learning_rate": 4.7255894829859776e-06, + "loss": 0.3742, + "step": 14917 + }, + { + "epoch": 0.9084431994641171, + "grad_norm": 1.049385193546294, + "learning_rate": 4.725553134865891e-06, + "loss": 0.4522, + "step": 14918 + }, + { + "epoch": 0.9085040952409951, + "grad_norm": 0.9832032155618525, + "learning_rate": 4.725516784478456e-06, + "loss": 0.3834, + "step": 14919 + }, + { + "epoch": 0.9085649910178729, + "grad_norm": 0.910805775992434, + "learning_rate": 4.725480431823711e-06, + "loss": 0.3928, + "step": 14920 + }, + { + "epoch": 0.9086258867947508, + "grad_norm": 1.0180771857857496, + "learning_rate": 4.725444076901693e-06, + "loss": 0.4316, + "step": 14921 + }, + { + "epoch": 0.9086867825716286, + "grad_norm": 1.0034725759445844, + "learning_rate": 4.72540771971244e-06, + "loss": 0.4146, + "step": 14922 + }, + { + "epoch": 0.9087476783485066, + "grad_norm": 0.9982558747416732, + "learning_rate": 4.725371360255986e-06, + "loss": 0.3965, + "step": 14923 + }, + { + "epoch": 0.9088085741253844, + "grad_norm": 1.0048542259883397, + "learning_rate": 4.725334998532371e-06, + "loss": 0.4476, + "step": 14924 + }, + { + "epoch": 0.9088694699022623, + "grad_norm": 0.9939359957757747, + "learning_rate": 4.72529863454163e-06, + "loss": 0.3092, + "step": 14925 + }, + { + "epoch": 0.9089303656791402, + "grad_norm": 0.9549355973417422, + "learning_rate": 4.725262268283802e-06, + "loss": 0.3707, + "step": 14926 + }, + { + "epoch": 0.908991261456018, + "grad_norm": 0.9492501128826466, + "learning_rate": 4.725225899758922e-06, + "loss": 0.3464, + "step": 14927 + }, + { + "epoch": 0.9090521572328959, + "grad_norm": 0.9530148613684094, + "learning_rate": 4.725189528967029e-06, + "loss": 0.3964, + "step": 14928 + }, + { + "epoch": 0.9091130530097737, + "grad_norm": 1.0209305866782512, + "learning_rate": 4.72515315590816e-06, + "loss": 0.359, + "step": 14929 + }, + { + "epoch": 0.9091739487866517, + "grad_norm": 1.0430865631341795, + "learning_rate": 4.72511678058235e-06, + "loss": 0.3851, + "step": 14930 + }, + { + "epoch": 0.9092348445635295, + "grad_norm": 1.0751806981801215, + "learning_rate": 4.725080402989637e-06, + "loss": 0.3821, + "step": 14931 + }, + { + "epoch": 0.9092957403404074, + "grad_norm": 1.0529444247702364, + "learning_rate": 4.725044023130058e-06, + "loss": 0.3836, + "step": 14932 + }, + { + "epoch": 0.9093566361172852, + "grad_norm": 0.9862351549471231, + "learning_rate": 4.725007641003652e-06, + "loss": 0.4629, + "step": 14933 + }, + { + "epoch": 0.9094175318941632, + "grad_norm": 0.9940773911113044, + "learning_rate": 4.724971256610454e-06, + "loss": 0.4413, + "step": 14934 + }, + { + "epoch": 0.909478427671041, + "grad_norm": 1.0413900784349897, + "learning_rate": 4.724934869950501e-06, + "loss": 0.3479, + "step": 14935 + }, + { + "epoch": 0.9095393234479189, + "grad_norm": 1.0248775184646142, + "learning_rate": 4.724898481023831e-06, + "loss": 0.403, + "step": 14936 + }, + { + "epoch": 0.9096002192247967, + "grad_norm": 1.0000560044318227, + "learning_rate": 4.7248620898304805e-06, + "loss": 0.4304, + "step": 14937 + }, + { + "epoch": 0.9096611150016747, + "grad_norm": 1.028917708285986, + "learning_rate": 4.724825696370487e-06, + "loss": 0.36, + "step": 14938 + }, + { + "epoch": 0.9097220107785525, + "grad_norm": 1.0143870524323737, + "learning_rate": 4.724789300643887e-06, + "loss": 0.3658, + "step": 14939 + }, + { + "epoch": 0.9097829065554304, + "grad_norm": 1.0048910785469676, + "learning_rate": 4.724752902650718e-06, + "loss": 0.5247, + "step": 14940 + }, + { + "epoch": 0.9098438023323082, + "grad_norm": 1.0727921533845994, + "learning_rate": 4.724716502391017e-06, + "loss": 0.3204, + "step": 14941 + }, + { + "epoch": 0.9099046981091862, + "grad_norm": 0.9668445069482415, + "learning_rate": 4.724680099864821e-06, + "loss": 0.4059, + "step": 14942 + }, + { + "epoch": 0.909965593886064, + "grad_norm": 1.036396180487488, + "learning_rate": 4.7246436950721675e-06, + "loss": 0.3603, + "step": 14943 + }, + { + "epoch": 0.9100264896629419, + "grad_norm": 1.0217764219079177, + "learning_rate": 4.724607288013093e-06, + "loss": 0.4416, + "step": 14944 + }, + { + "epoch": 0.9100873854398197, + "grad_norm": 0.9835025765660783, + "learning_rate": 4.724570878687636e-06, + "loss": 0.394, + "step": 14945 + }, + { + "epoch": 0.9101482812166977, + "grad_norm": 0.9767661571594418, + "learning_rate": 4.724534467095831e-06, + "loss": 0.4984, + "step": 14946 + }, + { + "epoch": 0.9102091769935755, + "grad_norm": 1.083893611533474, + "learning_rate": 4.724498053237716e-06, + "loss": 0.4909, + "step": 14947 + }, + { + "epoch": 0.9102700727704534, + "grad_norm": 0.9665108988026272, + "learning_rate": 4.72446163711333e-06, + "loss": 0.3786, + "step": 14948 + }, + { + "epoch": 0.9103309685473312, + "grad_norm": 0.970906468678085, + "learning_rate": 4.724425218722708e-06, + "loss": 0.3866, + "step": 14949 + }, + { + "epoch": 0.9103918643242092, + "grad_norm": 0.9908226104055374, + "learning_rate": 4.724388798065888e-06, + "loss": 0.3765, + "step": 14950 + }, + { + "epoch": 0.910452760101087, + "grad_norm": 0.9897293644959326, + "learning_rate": 4.724352375142908e-06, + "loss": 0.3785, + "step": 14951 + }, + { + "epoch": 0.9105136558779648, + "grad_norm": 0.9422974393419119, + "learning_rate": 4.724315949953803e-06, + "loss": 0.4087, + "step": 14952 + }, + { + "epoch": 0.9105745516548427, + "grad_norm": 0.9464790108298221, + "learning_rate": 4.724279522498611e-06, + "loss": 0.4167, + "step": 14953 + }, + { + "epoch": 0.9106354474317206, + "grad_norm": 0.9122735427487563, + "learning_rate": 4.72424309277737e-06, + "loss": 0.4407, + "step": 14954 + }, + { + "epoch": 0.9106963432085985, + "grad_norm": 0.9762661848487343, + "learning_rate": 4.724206660790115e-06, + "loss": 0.4385, + "step": 14955 + }, + { + "epoch": 0.9107572389854763, + "grad_norm": 1.0334183947853566, + "learning_rate": 4.724170226536886e-06, + "loss": 0.3579, + "step": 14956 + }, + { + "epoch": 0.9108181347623542, + "grad_norm": 0.9904488165785493, + "learning_rate": 4.7241337900177176e-06, + "loss": 0.4277, + "step": 14957 + }, + { + "epoch": 0.9108790305392321, + "grad_norm": 1.0115762128412644, + "learning_rate": 4.724097351232648e-06, + "loss": 0.3867, + "step": 14958 + }, + { + "epoch": 0.91093992631611, + "grad_norm": 1.035794961404478, + "learning_rate": 4.724060910181715e-06, + "loss": 0.4619, + "step": 14959 + }, + { + "epoch": 0.9110008220929878, + "grad_norm": 0.9968112668469694, + "learning_rate": 4.724024466864955e-06, + "loss": 0.3826, + "step": 14960 + }, + { + "epoch": 0.9110617178698657, + "grad_norm": 1.0848336371199114, + "learning_rate": 4.723988021282404e-06, + "loss": 0.3466, + "step": 14961 + }, + { + "epoch": 0.9111226136467436, + "grad_norm": 1.0201790706200213, + "learning_rate": 4.723951573434101e-06, + "loss": 0.436, + "step": 14962 + }, + { + "epoch": 0.9111835094236215, + "grad_norm": 1.0098323312152258, + "learning_rate": 4.7239151233200824e-06, + "loss": 0.4307, + "step": 14963 + }, + { + "epoch": 0.9112444052004993, + "grad_norm": 1.0521336779675072, + "learning_rate": 4.723878670940385e-06, + "loss": 0.3787, + "step": 14964 + }, + { + "epoch": 0.9113053009773772, + "grad_norm": 1.0220070468559035, + "learning_rate": 4.723842216295047e-06, + "loss": 0.4299, + "step": 14965 + }, + { + "epoch": 0.9113661967542551, + "grad_norm": 1.0789880594220458, + "learning_rate": 4.723805759384104e-06, + "loss": 0.3704, + "step": 14966 + }, + { + "epoch": 0.911427092531133, + "grad_norm": 1.0215743874309136, + "learning_rate": 4.723769300207595e-06, + "loss": 0.3824, + "step": 14967 + }, + { + "epoch": 0.9114879883080108, + "grad_norm": 1.0531916942433444, + "learning_rate": 4.723732838765554e-06, + "loss": 0.5084, + "step": 14968 + }, + { + "epoch": 0.9115488840848888, + "grad_norm": 0.9501874912394844, + "learning_rate": 4.723696375058022e-06, + "loss": 0.4472, + "step": 14969 + }, + { + "epoch": 0.9116097798617666, + "grad_norm": 1.0271728378817784, + "learning_rate": 4.723659909085034e-06, + "loss": 0.3425, + "step": 14970 + }, + { + "epoch": 0.9116706756386445, + "grad_norm": 0.9771140115498127, + "learning_rate": 4.7236234408466275e-06, + "loss": 0.429, + "step": 14971 + }, + { + "epoch": 0.9117315714155223, + "grad_norm": 0.9970812795902219, + "learning_rate": 4.723586970342839e-06, + "loss": 0.3186, + "step": 14972 + }, + { + "epoch": 0.9117924671924003, + "grad_norm": 1.025030967272079, + "learning_rate": 4.723550497573707e-06, + "loss": 0.455, + "step": 14973 + }, + { + "epoch": 0.9118533629692781, + "grad_norm": 1.0840283903797991, + "learning_rate": 4.723514022539268e-06, + "loss": 0.3115, + "step": 14974 + }, + { + "epoch": 0.911914258746156, + "grad_norm": 1.1523307765227133, + "learning_rate": 4.723477545239559e-06, + "loss": 0.3858, + "step": 14975 + }, + { + "epoch": 0.9119751545230338, + "grad_norm": 0.9514979709303614, + "learning_rate": 4.723441065674617e-06, + "loss": 0.4275, + "step": 14976 + }, + { + "epoch": 0.9120360502999117, + "grad_norm": 1.0540302656757676, + "learning_rate": 4.72340458384448e-06, + "loss": 0.3743, + "step": 14977 + }, + { + "epoch": 0.9120969460767896, + "grad_norm": 0.9753095598599023, + "learning_rate": 4.723368099749184e-06, + "loss": 0.4127, + "step": 14978 + }, + { + "epoch": 0.9121578418536674, + "grad_norm": 1.0105378254106239, + "learning_rate": 4.723331613388767e-06, + "loss": 0.4451, + "step": 14979 + }, + { + "epoch": 0.9122187376305453, + "grad_norm": 0.9632915745435403, + "learning_rate": 4.723295124763266e-06, + "loss": 0.4104, + "step": 14980 + }, + { + "epoch": 0.9122796334074232, + "grad_norm": 1.060115577712447, + "learning_rate": 4.723258633872719e-06, + "loss": 0.3937, + "step": 14981 + }, + { + "epoch": 0.9123405291843011, + "grad_norm": 1.1339457145401919, + "learning_rate": 4.723222140717161e-06, + "loss": 0.3439, + "step": 14982 + }, + { + "epoch": 0.9124014249611789, + "grad_norm": 0.9275045106759068, + "learning_rate": 4.723185645296631e-06, + "loss": 0.4251, + "step": 14983 + }, + { + "epoch": 0.9124623207380568, + "grad_norm": 0.9159507457375337, + "learning_rate": 4.723149147611167e-06, + "loss": 0.4459, + "step": 14984 + }, + { + "epoch": 0.9125232165149347, + "grad_norm": 1.0773321791293073, + "learning_rate": 4.723112647660803e-06, + "loss": 0.3919, + "step": 14985 + }, + { + "epoch": 0.9125841122918126, + "grad_norm": 0.9779050825228353, + "learning_rate": 4.723076145445579e-06, + "loss": 0.4251, + "step": 14986 + }, + { + "epoch": 0.9126450080686904, + "grad_norm": 0.9391226310811666, + "learning_rate": 4.7230396409655305e-06, + "loss": 0.4322, + "step": 14987 + }, + { + "epoch": 0.9127059038455683, + "grad_norm": 1.0329015883324062, + "learning_rate": 4.723003134220696e-06, + "loss": 0.4463, + "step": 14988 + }, + { + "epoch": 0.9127667996224462, + "grad_norm": 0.9463682306392299, + "learning_rate": 4.722966625211112e-06, + "loss": 0.4003, + "step": 14989 + }, + { + "epoch": 0.9128276953993241, + "grad_norm": 1.0013983430477529, + "learning_rate": 4.7229301139368155e-06, + "loss": 0.4198, + "step": 14990 + }, + { + "epoch": 0.9128885911762019, + "grad_norm": 1.0834086612265763, + "learning_rate": 4.722893600397845e-06, + "loss": 0.3478, + "step": 14991 + }, + { + "epoch": 0.9129494869530798, + "grad_norm": 1.0346523341910343, + "learning_rate": 4.722857084594236e-06, + "loss": 0.4195, + "step": 14992 + }, + { + "epoch": 0.9130103827299577, + "grad_norm": 0.9867254540656516, + "learning_rate": 4.722820566526026e-06, + "loss": 0.4156, + "step": 14993 + }, + { + "epoch": 0.9130712785068356, + "grad_norm": 1.00312647791664, + "learning_rate": 4.722784046193254e-06, + "loss": 0.4269, + "step": 14994 + }, + { + "epoch": 0.9131321742837134, + "grad_norm": 1.0963528846196704, + "learning_rate": 4.722747523595954e-06, + "loss": 0.3593, + "step": 14995 + }, + { + "epoch": 0.9131930700605912, + "grad_norm": 1.071025537010091, + "learning_rate": 4.722710998734167e-06, + "loss": 0.3612, + "step": 14996 + }, + { + "epoch": 0.9132539658374692, + "grad_norm": 1.004769938143866, + "learning_rate": 4.7226744716079265e-06, + "loss": 0.4504, + "step": 14997 + }, + { + "epoch": 0.913314861614347, + "grad_norm": 1.0043200970171524, + "learning_rate": 4.7226379422172725e-06, + "loss": 0.3392, + "step": 14998 + }, + { + "epoch": 0.9133757573912249, + "grad_norm": 0.9716895667921257, + "learning_rate": 4.722601410562241e-06, + "loss": 0.4005, + "step": 14999 + }, + { + "epoch": 0.9134366531681027, + "grad_norm": 1.0176323244781045, + "learning_rate": 4.722564876642869e-06, + "loss": 0.3756, + "step": 15000 + }, + { + "epoch": 0.9134975489449807, + "grad_norm": 1.0552289809834459, + "learning_rate": 4.722528340459194e-06, + "loss": 0.3558, + "step": 15001 + }, + { + "epoch": 0.9135584447218585, + "grad_norm": 1.0757181563745803, + "learning_rate": 4.722491802011254e-06, + "loss": 0.4037, + "step": 15002 + }, + { + "epoch": 0.9136193404987364, + "grad_norm": 0.9823919375341932, + "learning_rate": 4.722455261299085e-06, + "loss": 0.4395, + "step": 15003 + }, + { + "epoch": 0.9136802362756142, + "grad_norm": 0.9871433704966969, + "learning_rate": 4.722418718322726e-06, + "loss": 0.4427, + "step": 15004 + }, + { + "epoch": 0.9137411320524922, + "grad_norm": 1.0838873172155823, + "learning_rate": 4.722382173082212e-06, + "loss": 0.4138, + "step": 15005 + }, + { + "epoch": 0.91380202782937, + "grad_norm": 1.0596091114436383, + "learning_rate": 4.722345625577581e-06, + "loss": 0.3345, + "step": 15006 + }, + { + "epoch": 0.9138629236062479, + "grad_norm": 1.023306379738522, + "learning_rate": 4.72230907580887e-06, + "loss": 0.3481, + "step": 15007 + }, + { + "epoch": 0.9139238193831258, + "grad_norm": 1.076602518470696, + "learning_rate": 4.722272523776118e-06, + "loss": 0.4027, + "step": 15008 + }, + { + "epoch": 0.9139847151600037, + "grad_norm": 1.0530377578282493, + "learning_rate": 4.72223596947936e-06, + "loss": 0.3847, + "step": 15009 + }, + { + "epoch": 0.9140456109368815, + "grad_norm": 1.102155632495736, + "learning_rate": 4.7221994129186355e-06, + "loss": 0.3591, + "step": 15010 + }, + { + "epoch": 0.9141065067137594, + "grad_norm": 1.0615437746545688, + "learning_rate": 4.722162854093979e-06, + "loss": 0.3909, + "step": 15011 + }, + { + "epoch": 0.9141674024906373, + "grad_norm": 1.0424599494755233, + "learning_rate": 4.72212629300543e-06, + "loss": 0.4574, + "step": 15012 + }, + { + "epoch": 0.9142282982675152, + "grad_norm": 1.0185123058569263, + "learning_rate": 4.722089729653025e-06, + "loss": 0.4836, + "step": 15013 + }, + { + "epoch": 0.914289194044393, + "grad_norm": 0.9384150878234403, + "learning_rate": 4.7220531640368e-06, + "loss": 0.4168, + "step": 15014 + }, + { + "epoch": 0.9143500898212709, + "grad_norm": 0.9930805742151175, + "learning_rate": 4.7220165961567955e-06, + "loss": 0.3848, + "step": 15015 + }, + { + "epoch": 0.9144109855981488, + "grad_norm": 1.0763083207118982, + "learning_rate": 4.721980026013045e-06, + "loss": 0.4786, + "step": 15016 + }, + { + "epoch": 0.9144718813750267, + "grad_norm": 0.9811980954749207, + "learning_rate": 4.721943453605588e-06, + "loss": 0.371, + "step": 15017 + }, + { + "epoch": 0.9145327771519045, + "grad_norm": 0.9802601060791288, + "learning_rate": 4.721906878934461e-06, + "loss": 0.4138, + "step": 15018 + }, + { + "epoch": 0.9145936729287824, + "grad_norm": 0.9280266025599198, + "learning_rate": 4.721870301999702e-06, + "loss": 0.4837, + "step": 15019 + }, + { + "epoch": 0.9146545687056603, + "grad_norm": 0.9779027138059663, + "learning_rate": 4.721833722801347e-06, + "loss": 0.4322, + "step": 15020 + }, + { + "epoch": 0.9147154644825382, + "grad_norm": 1.0908389785883215, + "learning_rate": 4.721797141339435e-06, + "loss": 0.4271, + "step": 15021 + }, + { + "epoch": 0.914776360259416, + "grad_norm": 0.9797539429866874, + "learning_rate": 4.721760557614002e-06, + "loss": 0.374, + "step": 15022 + }, + { + "epoch": 0.9148372560362938, + "grad_norm": 0.9728058567777116, + "learning_rate": 4.721723971625085e-06, + "loss": 0.429, + "step": 15023 + }, + { + "epoch": 0.9148981518131718, + "grad_norm": 1.1022509700299148, + "learning_rate": 4.721687383372721e-06, + "loss": 0.34, + "step": 15024 + }, + { + "epoch": 0.9149590475900496, + "grad_norm": 0.9346908068168155, + "learning_rate": 4.7216507928569494e-06, + "loss": 0.4367, + "step": 15025 + }, + { + "epoch": 0.9150199433669275, + "grad_norm": 1.174477661182615, + "learning_rate": 4.7216142000778056e-06, + "loss": 0.353, + "step": 15026 + }, + { + "epoch": 0.9150808391438053, + "grad_norm": 1.0448288012530251, + "learning_rate": 4.721577605035328e-06, + "loss": 0.3744, + "step": 15027 + }, + { + "epoch": 0.9151417349206833, + "grad_norm": 0.9873005380418324, + "learning_rate": 4.7215410077295535e-06, + "loss": 0.3865, + "step": 15028 + }, + { + "epoch": 0.9152026306975611, + "grad_norm": 0.9322665819943107, + "learning_rate": 4.721504408160518e-06, + "loss": 0.4591, + "step": 15029 + }, + { + "epoch": 0.915263526474439, + "grad_norm": 1.0145006852157241, + "learning_rate": 4.721467806328261e-06, + "loss": 0.3838, + "step": 15030 + }, + { + "epoch": 0.9153244222513168, + "grad_norm": 1.0221205956870945, + "learning_rate": 4.721431202232818e-06, + "loss": 0.4157, + "step": 15031 + }, + { + "epoch": 0.9153853180281948, + "grad_norm": 0.9383631743181826, + "learning_rate": 4.721394595874228e-06, + "loss": 0.3966, + "step": 15032 + }, + { + "epoch": 0.9154462138050726, + "grad_norm": 1.0663542187774748, + "learning_rate": 4.721357987252526e-06, + "loss": 0.3355, + "step": 15033 + }, + { + "epoch": 0.9155071095819505, + "grad_norm": 1.0742416277913374, + "learning_rate": 4.7213213763677525e-06, + "loss": 0.3513, + "step": 15034 + }, + { + "epoch": 0.9155680053588283, + "grad_norm": 0.9798802842366126, + "learning_rate": 4.721284763219942e-06, + "loss": 0.4091, + "step": 15035 + }, + { + "epoch": 0.9156289011357063, + "grad_norm": 1.0470499973717413, + "learning_rate": 4.7212481478091325e-06, + "loss": 0.4115, + "step": 15036 + }, + { + "epoch": 0.9156897969125841, + "grad_norm": 1.0320572082489456, + "learning_rate": 4.7212115301353625e-06, + "loss": 0.4898, + "step": 15037 + }, + { + "epoch": 0.915750692689462, + "grad_norm": 1.0027535348870784, + "learning_rate": 4.721174910198667e-06, + "loss": 0.4148, + "step": 15038 + }, + { + "epoch": 0.9158115884663398, + "grad_norm": 0.9491449301893058, + "learning_rate": 4.721138287999086e-06, + "loss": 0.4233, + "step": 15039 + }, + { + "epoch": 0.9158724842432178, + "grad_norm": 1.0697937020996129, + "learning_rate": 4.721101663536655e-06, + "loss": 0.465, + "step": 15040 + }, + { + "epoch": 0.9159333800200956, + "grad_norm": 1.0742917167435924, + "learning_rate": 4.721065036811411e-06, + "loss": 0.4897, + "step": 15041 + }, + { + "epoch": 0.9159942757969735, + "grad_norm": 1.0475926576008683, + "learning_rate": 4.7210284078233935e-06, + "loss": 0.4397, + "step": 15042 + }, + { + "epoch": 0.9160551715738513, + "grad_norm": 0.9639375553661539, + "learning_rate": 4.7209917765726375e-06, + "loss": 0.4458, + "step": 15043 + }, + { + "epoch": 0.9161160673507293, + "grad_norm": 0.9528680998972826, + "learning_rate": 4.720955143059182e-06, + "loss": 0.3868, + "step": 15044 + }, + { + "epoch": 0.9161769631276071, + "grad_norm": 1.0522592536649065, + "learning_rate": 4.720918507283063e-06, + "loss": 0.4341, + "step": 15045 + }, + { + "epoch": 0.916237858904485, + "grad_norm": 0.9985658967888413, + "learning_rate": 4.720881869244318e-06, + "loss": 0.4071, + "step": 15046 + }, + { + "epoch": 0.9162987546813628, + "grad_norm": 1.0847347334408004, + "learning_rate": 4.720845228942985e-06, + "loss": 0.4279, + "step": 15047 + }, + { + "epoch": 0.9163596504582407, + "grad_norm": 0.9211901591402532, + "learning_rate": 4.720808586379102e-06, + "loss": 0.4714, + "step": 15048 + }, + { + "epoch": 0.9164205462351186, + "grad_norm": 0.9696631533017847, + "learning_rate": 4.720771941552705e-06, + "loss": 0.4308, + "step": 15049 + }, + { + "epoch": 0.9164814420119964, + "grad_norm": 1.0108401523081778, + "learning_rate": 4.720735294463831e-06, + "loss": 0.4319, + "step": 15050 + }, + { + "epoch": 0.9165423377888744, + "grad_norm": 1.0037320288030351, + "learning_rate": 4.720698645112518e-06, + "loss": 0.3694, + "step": 15051 + }, + { + "epoch": 0.9166032335657522, + "grad_norm": 1.0476083766948043, + "learning_rate": 4.720661993498804e-06, + "loss": 0.3922, + "step": 15052 + }, + { + "epoch": 0.9166641293426301, + "grad_norm": 0.994649595145193, + "learning_rate": 4.720625339622725e-06, + "loss": 0.4845, + "step": 15053 + }, + { + "epoch": 0.9167250251195079, + "grad_norm": 1.0380023215834777, + "learning_rate": 4.720588683484321e-06, + "loss": 0.4119, + "step": 15054 + }, + { + "epoch": 0.9167859208963859, + "grad_norm": 0.9256707925644008, + "learning_rate": 4.720552025083626e-06, + "loss": 0.4362, + "step": 15055 + }, + { + "epoch": 0.9168468166732637, + "grad_norm": 0.9341959969602399, + "learning_rate": 4.720515364420678e-06, + "loss": 0.4776, + "step": 15056 + }, + { + "epoch": 0.9169077124501416, + "grad_norm": 0.9872894967827803, + "learning_rate": 4.7204787014955165e-06, + "loss": 0.3992, + "step": 15057 + }, + { + "epoch": 0.9169686082270194, + "grad_norm": 1.0169405718434452, + "learning_rate": 4.720442036308176e-06, + "loss": 0.4146, + "step": 15058 + }, + { + "epoch": 0.9170295040038974, + "grad_norm": 0.9754867213696448, + "learning_rate": 4.720405368858696e-06, + "loss": 0.4159, + "step": 15059 + }, + { + "epoch": 0.9170903997807752, + "grad_norm": 1.047426368694935, + "learning_rate": 4.720368699147114e-06, + "loss": 0.4332, + "step": 15060 + }, + { + "epoch": 0.9171512955576531, + "grad_norm": 1.0154219098718318, + "learning_rate": 4.720332027173466e-06, + "loss": 0.3785, + "step": 15061 + }, + { + "epoch": 0.9172121913345309, + "grad_norm": 1.0266681461638059, + "learning_rate": 4.72029535293779e-06, + "loss": 0.3238, + "step": 15062 + }, + { + "epoch": 0.9172730871114089, + "grad_norm": 1.0178284478540545, + "learning_rate": 4.7202586764401225e-06, + "loss": 0.4495, + "step": 15063 + }, + { + "epoch": 0.9173339828882867, + "grad_norm": 1.0195936282945028, + "learning_rate": 4.720221997680502e-06, + "loss": 0.4057, + "step": 15064 + }, + { + "epoch": 0.9173948786651646, + "grad_norm": 1.093853001019347, + "learning_rate": 4.720185316658966e-06, + "loss": 0.3888, + "step": 15065 + }, + { + "epoch": 0.9174557744420424, + "grad_norm": 1.011191755902728, + "learning_rate": 4.720148633375551e-06, + "loss": 0.4374, + "step": 15066 + }, + { + "epoch": 0.9175166702189204, + "grad_norm": 0.9611093613791839, + "learning_rate": 4.720111947830295e-06, + "loss": 0.3875, + "step": 15067 + }, + { + "epoch": 0.9175775659957982, + "grad_norm": 1.0084085438338937, + "learning_rate": 4.7200752600232345e-06, + "loss": 0.4684, + "step": 15068 + }, + { + "epoch": 0.917638461772676, + "grad_norm": 0.9628017498465442, + "learning_rate": 4.720038569954408e-06, + "loss": 0.3532, + "step": 15069 + }, + { + "epoch": 0.9176993575495539, + "grad_norm": 1.0985781715222256, + "learning_rate": 4.720001877623852e-06, + "loss": 0.4037, + "step": 15070 + }, + { + "epoch": 0.9177602533264319, + "grad_norm": 1.011493513170904, + "learning_rate": 4.7199651830316055e-06, + "loss": 0.4292, + "step": 15071 + }, + { + "epoch": 0.9178211491033097, + "grad_norm": 0.9818242762133107, + "learning_rate": 4.719928486177703e-06, + "loss": 0.4205, + "step": 15072 + }, + { + "epoch": 0.9178820448801875, + "grad_norm": 0.9634036263404434, + "learning_rate": 4.719891787062184e-06, + "loss": 0.3634, + "step": 15073 + }, + { + "epoch": 0.9179429406570654, + "grad_norm": 1.116488374037647, + "learning_rate": 4.719855085685085e-06, + "loss": 0.4319, + "step": 15074 + }, + { + "epoch": 0.9180038364339433, + "grad_norm": 1.02148416947821, + "learning_rate": 4.719818382046445e-06, + "loss": 0.3961, + "step": 15075 + }, + { + "epoch": 0.9180647322108212, + "grad_norm": 1.0894822421118777, + "learning_rate": 4.719781676146299e-06, + "loss": 0.3576, + "step": 15076 + }, + { + "epoch": 0.918125627987699, + "grad_norm": 0.9357003190678439, + "learning_rate": 4.719744967984686e-06, + "loss": 0.4011, + "step": 15077 + }, + { + "epoch": 0.9181865237645769, + "grad_norm": 0.9655973010233426, + "learning_rate": 4.7197082575616435e-06, + "loss": 0.4913, + "step": 15078 + }, + { + "epoch": 0.9182474195414548, + "grad_norm": 0.9460434734930288, + "learning_rate": 4.719671544877208e-06, + "loss": 0.3672, + "step": 15079 + }, + { + "epoch": 0.9183083153183327, + "grad_norm": 0.9691746685258105, + "learning_rate": 4.719634829931417e-06, + "loss": 0.3938, + "step": 15080 + }, + { + "epoch": 0.9183692110952105, + "grad_norm": 0.9943669661935959, + "learning_rate": 4.7195981127243085e-06, + "loss": 0.3753, + "step": 15081 + }, + { + "epoch": 0.9184301068720884, + "grad_norm": 1.016156076818688, + "learning_rate": 4.719561393255919e-06, + "loss": 0.3685, + "step": 15082 + }, + { + "epoch": 0.9184910026489663, + "grad_norm": 1.007896428095135, + "learning_rate": 4.7195246715262875e-06, + "loss": 0.4614, + "step": 15083 + }, + { + "epoch": 0.9185518984258442, + "grad_norm": 0.9468517489569229, + "learning_rate": 4.719487947535449e-06, + "loss": 0.405, + "step": 15084 + }, + { + "epoch": 0.918612794202722, + "grad_norm": 0.9595632356327112, + "learning_rate": 4.719451221283444e-06, + "loss": 0.4307, + "step": 15085 + }, + { + "epoch": 0.9186736899795999, + "grad_norm": 1.0669010531294392, + "learning_rate": 4.719414492770307e-06, + "loss": 0.3782, + "step": 15086 + }, + { + "epoch": 0.9187345857564778, + "grad_norm": 1.0050646274045738, + "learning_rate": 4.719377761996077e-06, + "loss": 0.449, + "step": 15087 + }, + { + "epoch": 0.9187954815333557, + "grad_norm": 1.0616328769954568, + "learning_rate": 4.719341028960791e-06, + "loss": 0.3803, + "step": 15088 + }, + { + "epoch": 0.9188563773102335, + "grad_norm": 0.9838773152140824, + "learning_rate": 4.719304293664487e-06, + "loss": 0.4362, + "step": 15089 + }, + { + "epoch": 0.9189172730871115, + "grad_norm": 0.9289253087083972, + "learning_rate": 4.719267556107202e-06, + "loss": 0.4788, + "step": 15090 + }, + { + "epoch": 0.9189781688639893, + "grad_norm": 1.0259766633694427, + "learning_rate": 4.7192308162889725e-06, + "loss": 0.4288, + "step": 15091 + }, + { + "epoch": 0.9190390646408672, + "grad_norm": 1.1266204015783647, + "learning_rate": 4.719194074209837e-06, + "loss": 0.3954, + "step": 15092 + }, + { + "epoch": 0.919099960417745, + "grad_norm": 0.9794425839814231, + "learning_rate": 4.7191573298698336e-06, + "loss": 0.4154, + "step": 15093 + }, + { + "epoch": 0.919160856194623, + "grad_norm": 0.9545434905514993, + "learning_rate": 4.719120583268998e-06, + "loss": 0.4519, + "step": 15094 + }, + { + "epoch": 0.9192217519715008, + "grad_norm": 1.06154858954236, + "learning_rate": 4.7190838344073685e-06, + "loss": 0.4276, + "step": 15095 + }, + { + "epoch": 0.9192826477483786, + "grad_norm": 1.0767167760322247, + "learning_rate": 4.719047083284982e-06, + "loss": 0.3799, + "step": 15096 + }, + { + "epoch": 0.9193435435252565, + "grad_norm": 0.931970772824337, + "learning_rate": 4.719010329901877e-06, + "loss": 0.405, + "step": 15097 + }, + { + "epoch": 0.9194044393021344, + "grad_norm": 0.8831825408962046, + "learning_rate": 4.718973574258091e-06, + "loss": 0.4544, + "step": 15098 + }, + { + "epoch": 0.9194653350790123, + "grad_norm": 0.9783875958743765, + "learning_rate": 4.718936816353661e-06, + "loss": 0.387, + "step": 15099 + }, + { + "epoch": 0.9195262308558901, + "grad_norm": 0.9709028760261574, + "learning_rate": 4.718900056188623e-06, + "loss": 0.4444, + "step": 15100 + }, + { + "epoch": 0.919587126632768, + "grad_norm": 1.0402211759008193, + "learning_rate": 4.718863293763017e-06, + "loss": 0.4415, + "step": 15101 + }, + { + "epoch": 0.9196480224096459, + "grad_norm": 1.1062002975471414, + "learning_rate": 4.718826529076879e-06, + "loss": 0.4088, + "step": 15102 + }, + { + "epoch": 0.9197089181865238, + "grad_norm": 1.0980191951276508, + "learning_rate": 4.718789762130246e-06, + "loss": 0.3614, + "step": 15103 + }, + { + "epoch": 0.9197698139634016, + "grad_norm": 0.9852381067379413, + "learning_rate": 4.718752992923157e-06, + "loss": 0.3951, + "step": 15104 + }, + { + "epoch": 0.9198307097402795, + "grad_norm": 0.8961018141869102, + "learning_rate": 4.7187162214556475e-06, + "loss": 0.4101, + "step": 15105 + }, + { + "epoch": 0.9198916055171574, + "grad_norm": 1.0525093394101792, + "learning_rate": 4.718679447727756e-06, + "loss": 0.4397, + "step": 15106 + }, + { + "epoch": 0.9199525012940353, + "grad_norm": 0.973706975161938, + "learning_rate": 4.718642671739521e-06, + "loss": 0.3878, + "step": 15107 + }, + { + "epoch": 0.9200133970709131, + "grad_norm": 0.9816058709493978, + "learning_rate": 4.7186058934909785e-06, + "loss": 0.3712, + "step": 15108 + }, + { + "epoch": 0.920074292847791, + "grad_norm": 1.0647075014355016, + "learning_rate": 4.718569112982167e-06, + "loss": 0.4018, + "step": 15109 + }, + { + "epoch": 0.9201351886246689, + "grad_norm": 0.9538812336991171, + "learning_rate": 4.718532330213123e-06, + "loss": 0.4397, + "step": 15110 + }, + { + "epoch": 0.9201960844015468, + "grad_norm": 0.9842778428599118, + "learning_rate": 4.718495545183884e-06, + "loss": 0.3885, + "step": 15111 + }, + { + "epoch": 0.9202569801784246, + "grad_norm": 0.9789750900925541, + "learning_rate": 4.718458757894489e-06, + "loss": 0.5702, + "step": 15112 + }, + { + "epoch": 0.9203178759553025, + "grad_norm": 0.9812416218397388, + "learning_rate": 4.7184219683449725e-06, + "loss": 0.4301, + "step": 15113 + }, + { + "epoch": 0.9203787717321804, + "grad_norm": 1.0910179961752013, + "learning_rate": 4.718385176535375e-06, + "loss": 0.3894, + "step": 15114 + }, + { + "epoch": 0.9204396675090583, + "grad_norm": 0.9229789844956473, + "learning_rate": 4.718348382465733e-06, + "loss": 0.3917, + "step": 15115 + }, + { + "epoch": 0.9205005632859361, + "grad_norm": 0.9824905923491243, + "learning_rate": 4.718311586136083e-06, + "loss": 0.4064, + "step": 15116 + }, + { + "epoch": 0.920561459062814, + "grad_norm": 1.0146586225653231, + "learning_rate": 4.718274787546464e-06, + "loss": 0.4106, + "step": 15117 + }, + { + "epoch": 0.9206223548396919, + "grad_norm": 0.9739981295458661, + "learning_rate": 4.718237986696913e-06, + "loss": 0.3702, + "step": 15118 + }, + { + "epoch": 0.9206832506165697, + "grad_norm": 0.9939186126821224, + "learning_rate": 4.718201183587466e-06, + "loss": 0.4191, + "step": 15119 + }, + { + "epoch": 0.9207441463934476, + "grad_norm": 1.0169674315448858, + "learning_rate": 4.718164378218163e-06, + "loss": 0.4077, + "step": 15120 + }, + { + "epoch": 0.9208050421703254, + "grad_norm": 1.0276154225213914, + "learning_rate": 4.71812757058904e-06, + "loss": 0.3753, + "step": 15121 + }, + { + "epoch": 0.9208659379472034, + "grad_norm": 1.0496377342914462, + "learning_rate": 4.718090760700134e-06, + "loss": 0.3317, + "step": 15122 + }, + { + "epoch": 0.9209268337240812, + "grad_norm": 1.0038212454903428, + "learning_rate": 4.718053948551483e-06, + "loss": 0.379, + "step": 15123 + }, + { + "epoch": 0.9209877295009591, + "grad_norm": 1.0005269657355402, + "learning_rate": 4.718017134143126e-06, + "loss": 0.3999, + "step": 15124 + }, + { + "epoch": 0.9210486252778369, + "grad_norm": 1.0062659297553551, + "learning_rate": 4.717980317475099e-06, + "loss": 0.4097, + "step": 15125 + }, + { + "epoch": 0.9211095210547149, + "grad_norm": 1.023422731873376, + "learning_rate": 4.717943498547439e-06, + "loss": 0.426, + "step": 15126 + }, + { + "epoch": 0.9211704168315927, + "grad_norm": 1.0023852298341924, + "learning_rate": 4.717906677360185e-06, + "loss": 0.3807, + "step": 15127 + }, + { + "epoch": 0.9212313126084706, + "grad_norm": 0.963207179991146, + "learning_rate": 4.717869853913373e-06, + "loss": 0.4092, + "step": 15128 + }, + { + "epoch": 0.9212922083853484, + "grad_norm": 0.9171218115762179, + "learning_rate": 4.717833028207042e-06, + "loss": 0.4023, + "step": 15129 + }, + { + "epoch": 0.9213531041622264, + "grad_norm": 0.9973502148581731, + "learning_rate": 4.717796200241229e-06, + "loss": 0.3575, + "step": 15130 + }, + { + "epoch": 0.9214139999391042, + "grad_norm": 0.9138260003433619, + "learning_rate": 4.7177593700159706e-06, + "loss": 0.3942, + "step": 15131 + }, + { + "epoch": 0.9214748957159821, + "grad_norm": 0.9993344621340532, + "learning_rate": 4.717722537531306e-06, + "loss": 0.3982, + "step": 15132 + }, + { + "epoch": 0.92153579149286, + "grad_norm": 0.9066495345322211, + "learning_rate": 4.717685702787271e-06, + "loss": 0.451, + "step": 15133 + }, + { + "epoch": 0.9215966872697379, + "grad_norm": 1.1033657582675611, + "learning_rate": 4.7176488657839045e-06, + "loss": 0.3431, + "step": 15134 + }, + { + "epoch": 0.9216575830466157, + "grad_norm": 0.9356528447976336, + "learning_rate": 4.7176120265212435e-06, + "loss": 0.4271, + "step": 15135 + }, + { + "epoch": 0.9217184788234936, + "grad_norm": 1.03566046896529, + "learning_rate": 4.7175751849993245e-06, + "loss": 0.3567, + "step": 15136 + }, + { + "epoch": 0.9217793746003715, + "grad_norm": 1.047305113473478, + "learning_rate": 4.717538341218187e-06, + "loss": 0.4098, + "step": 15137 + }, + { + "epoch": 0.9218402703772494, + "grad_norm": 1.001195802948034, + "learning_rate": 4.717501495177867e-06, + "loss": 0.4037, + "step": 15138 + }, + { + "epoch": 0.9219011661541272, + "grad_norm": 1.0856575971794655, + "learning_rate": 4.717464646878403e-06, + "loss": 0.4402, + "step": 15139 + }, + { + "epoch": 0.921962061931005, + "grad_norm": 0.9428470848675857, + "learning_rate": 4.7174277963198316e-06, + "loss": 0.3935, + "step": 15140 + }, + { + "epoch": 0.922022957707883, + "grad_norm": 0.9267807448541827, + "learning_rate": 4.717390943502191e-06, + "loss": 0.4453, + "step": 15141 + }, + { + "epoch": 0.9220838534847609, + "grad_norm": 1.0823946887269713, + "learning_rate": 4.717354088425518e-06, + "loss": 0.3747, + "step": 15142 + }, + { + "epoch": 0.9221447492616387, + "grad_norm": 0.9409249067263157, + "learning_rate": 4.7173172310898525e-06, + "loss": 0.395, + "step": 15143 + }, + { + "epoch": 0.9222056450385165, + "grad_norm": 1.0429689011327823, + "learning_rate": 4.7172803714952294e-06, + "loss": 0.3737, + "step": 15144 + }, + { + "epoch": 0.9222665408153945, + "grad_norm": 0.9624782375565075, + "learning_rate": 4.717243509641687e-06, + "loss": 0.4032, + "step": 15145 + }, + { + "epoch": 0.9223274365922723, + "grad_norm": 0.9659642184670693, + "learning_rate": 4.717206645529263e-06, + "loss": 0.4433, + "step": 15146 + }, + { + "epoch": 0.9223883323691502, + "grad_norm": 1.0014046744480891, + "learning_rate": 4.7171697791579954e-06, + "loss": 0.3637, + "step": 15147 + }, + { + "epoch": 0.922449228146028, + "grad_norm": 0.9909102450725672, + "learning_rate": 4.71713291052792e-06, + "loss": 0.3875, + "step": 15148 + }, + { + "epoch": 0.922510123922906, + "grad_norm": 1.0983338568826102, + "learning_rate": 4.717096039639077e-06, + "loss": 0.4005, + "step": 15149 + }, + { + "epoch": 0.9225710196997838, + "grad_norm": 1.0665131933456138, + "learning_rate": 4.717059166491502e-06, + "loss": 0.3489, + "step": 15150 + }, + { + "epoch": 0.9226319154766617, + "grad_norm": 1.0036756037557473, + "learning_rate": 4.717022291085234e-06, + "loss": 0.34, + "step": 15151 + }, + { + "epoch": 0.9226928112535395, + "grad_norm": 1.011877112576629, + "learning_rate": 4.716985413420309e-06, + "loss": 0.3773, + "step": 15152 + }, + { + "epoch": 0.9227537070304175, + "grad_norm": 0.9270677274547438, + "learning_rate": 4.716948533496766e-06, + "loss": 0.4593, + "step": 15153 + }, + { + "epoch": 0.9228146028072953, + "grad_norm": 1.0802917218072303, + "learning_rate": 4.716911651314641e-06, + "loss": 0.4642, + "step": 15154 + }, + { + "epoch": 0.9228754985841732, + "grad_norm": 1.0342017869343334, + "learning_rate": 4.7168747668739735e-06, + "loss": 0.4352, + "step": 15155 + }, + { + "epoch": 0.922936394361051, + "grad_norm": 0.9997536023360486, + "learning_rate": 4.716837880174799e-06, + "loss": 0.3664, + "step": 15156 + }, + { + "epoch": 0.922997290137929, + "grad_norm": 0.9058624514928368, + "learning_rate": 4.716800991217157e-06, + "loss": 0.4191, + "step": 15157 + }, + { + "epoch": 0.9230581859148068, + "grad_norm": 0.9977722378614469, + "learning_rate": 4.716764100001084e-06, + "loss": 0.4643, + "step": 15158 + }, + { + "epoch": 0.9231190816916847, + "grad_norm": 0.9073043971536905, + "learning_rate": 4.716727206526618e-06, + "loss": 0.4373, + "step": 15159 + }, + { + "epoch": 0.9231799774685625, + "grad_norm": 0.9564312795487191, + "learning_rate": 4.716690310793797e-06, + "loss": 0.3781, + "step": 15160 + }, + { + "epoch": 0.9232408732454405, + "grad_norm": 0.94060228951378, + "learning_rate": 4.716653412802656e-06, + "loss": 0.3957, + "step": 15161 + }, + { + "epoch": 0.9233017690223183, + "grad_norm": 0.9415927542711248, + "learning_rate": 4.716616512553237e-06, + "loss": 0.3925, + "step": 15162 + }, + { + "epoch": 0.9233626647991962, + "grad_norm": 1.0215518587938137, + "learning_rate": 4.716579610045573e-06, + "loss": 0.4002, + "step": 15163 + }, + { + "epoch": 0.923423560576074, + "grad_norm": 0.9801999403595391, + "learning_rate": 4.716542705279705e-06, + "loss": 0.4144, + "step": 15164 + }, + { + "epoch": 0.923484456352952, + "grad_norm": 1.1955913932182116, + "learning_rate": 4.71650579825567e-06, + "loss": 0.3776, + "step": 15165 + }, + { + "epoch": 0.9235453521298298, + "grad_norm": 0.9712040943939568, + "learning_rate": 4.716468888973504e-06, + "loss": 0.4183, + "step": 15166 + }, + { + "epoch": 0.9236062479067076, + "grad_norm": 1.0473258779539687, + "learning_rate": 4.716431977433246e-06, + "loss": 0.3536, + "step": 15167 + }, + { + "epoch": 0.9236671436835855, + "grad_norm": 0.9948905951820991, + "learning_rate": 4.716395063634932e-06, + "loss": 0.4415, + "step": 15168 + }, + { + "epoch": 0.9237280394604634, + "grad_norm": 1.1570007891770617, + "learning_rate": 4.7163581475786025e-06, + "loss": 0.3625, + "step": 15169 + }, + { + "epoch": 0.9237889352373413, + "grad_norm": 0.9464153259126534, + "learning_rate": 4.716321229264292e-06, + "loss": 0.3724, + "step": 15170 + }, + { + "epoch": 0.9238498310142191, + "grad_norm": 1.0258583102248726, + "learning_rate": 4.716284308692041e-06, + "loss": 0.4305, + "step": 15171 + }, + { + "epoch": 0.9239107267910971, + "grad_norm": 0.9351496375613665, + "learning_rate": 4.716247385861884e-06, + "loss": 0.4062, + "step": 15172 + }, + { + "epoch": 0.9239716225679749, + "grad_norm": 0.9263705301490915, + "learning_rate": 4.716210460773861e-06, + "loss": 0.4001, + "step": 15173 + }, + { + "epoch": 0.9240325183448528, + "grad_norm": 1.1016339008378304, + "learning_rate": 4.7161735334280086e-06, + "loss": 0.4126, + "step": 15174 + }, + { + "epoch": 0.9240934141217306, + "grad_norm": 0.9954556689951171, + "learning_rate": 4.716136603824365e-06, + "loss": 0.3978, + "step": 15175 + }, + { + "epoch": 0.9241543098986086, + "grad_norm": 0.9588765147825776, + "learning_rate": 4.716099671962968e-06, + "loss": 0.4858, + "step": 15176 + }, + { + "epoch": 0.9242152056754864, + "grad_norm": 1.022668310042247, + "learning_rate": 4.716062737843854e-06, + "loss": 0.4224, + "step": 15177 + }, + { + "epoch": 0.9242761014523643, + "grad_norm": 1.0615440775139358, + "learning_rate": 4.716025801467061e-06, + "loss": 0.452, + "step": 15178 + }, + { + "epoch": 0.9243369972292421, + "grad_norm": 1.030570594995032, + "learning_rate": 4.715988862832628e-06, + "loss": 0.4026, + "step": 15179 + }, + { + "epoch": 0.9243978930061201, + "grad_norm": 0.9825857389361724, + "learning_rate": 4.715951921940591e-06, + "loss": 0.3659, + "step": 15180 + }, + { + "epoch": 0.9244587887829979, + "grad_norm": 1.0123672177631844, + "learning_rate": 4.715914978790989e-06, + "loss": 0.4157, + "step": 15181 + }, + { + "epoch": 0.9245196845598758, + "grad_norm": 1.004375001039204, + "learning_rate": 4.715878033383857e-06, + "loss": 0.4022, + "step": 15182 + }, + { + "epoch": 0.9245805803367536, + "grad_norm": 0.9875237975923132, + "learning_rate": 4.715841085719236e-06, + "loss": 0.4286, + "step": 15183 + }, + { + "epoch": 0.9246414761136316, + "grad_norm": 1.0620928548018476, + "learning_rate": 4.715804135797162e-06, + "loss": 0.4225, + "step": 15184 + }, + { + "epoch": 0.9247023718905094, + "grad_norm": 0.9872736885329042, + "learning_rate": 4.715767183617673e-06, + "loss": 0.4123, + "step": 15185 + }, + { + "epoch": 0.9247632676673873, + "grad_norm": 0.9542536372281955, + "learning_rate": 4.715730229180806e-06, + "loss": 0.428, + "step": 15186 + }, + { + "epoch": 0.9248241634442651, + "grad_norm": 1.1352211254556939, + "learning_rate": 4.7156932724866e-06, + "loss": 0.376, + "step": 15187 + }, + { + "epoch": 0.9248850592211431, + "grad_norm": 1.0565625292246545, + "learning_rate": 4.715656313535091e-06, + "loss": 0.4346, + "step": 15188 + }, + { + "epoch": 0.9249459549980209, + "grad_norm": 0.9517270302201438, + "learning_rate": 4.715619352326317e-06, + "loss": 0.4523, + "step": 15189 + }, + { + "epoch": 0.9250068507748987, + "grad_norm": 0.9529897508327908, + "learning_rate": 4.715582388860317e-06, + "loss": 0.4198, + "step": 15190 + }, + { + "epoch": 0.9250677465517766, + "grad_norm": 0.9652013240093414, + "learning_rate": 4.715545423137126e-06, + "loss": 0.4421, + "step": 15191 + }, + { + "epoch": 0.9251286423286545, + "grad_norm": 0.9933862955358957, + "learning_rate": 4.715508455156786e-06, + "loss": 0.4136, + "step": 15192 + }, + { + "epoch": 0.9251895381055324, + "grad_norm": 0.971198456725615, + "learning_rate": 4.715471484919331e-06, + "loss": 0.3864, + "step": 15193 + }, + { + "epoch": 0.9252504338824102, + "grad_norm": 1.0490974306308565, + "learning_rate": 4.715434512424799e-06, + "loss": 0.3651, + "step": 15194 + }, + { + "epoch": 0.9253113296592881, + "grad_norm": 1.0619887138625241, + "learning_rate": 4.715397537673228e-06, + "loss": 0.4221, + "step": 15195 + }, + { + "epoch": 0.925372225436166, + "grad_norm": 1.0326348305413413, + "learning_rate": 4.715360560664658e-06, + "loss": 0.436, + "step": 15196 + }, + { + "epoch": 0.9254331212130439, + "grad_norm": 1.0182119951410404, + "learning_rate": 4.715323581399123e-06, + "loss": 0.3832, + "step": 15197 + }, + { + "epoch": 0.9254940169899217, + "grad_norm": 1.000241877764681, + "learning_rate": 4.715286599876664e-06, + "loss": 0.4231, + "step": 15198 + }, + { + "epoch": 0.9255549127667996, + "grad_norm": 0.9951831388159181, + "learning_rate": 4.7152496160973145e-06, + "loss": 0.4474, + "step": 15199 + }, + { + "epoch": 0.9256158085436775, + "grad_norm": 0.9683952084672196, + "learning_rate": 4.7152126300611164e-06, + "loss": 0.3632, + "step": 15200 + }, + { + "epoch": 0.9256767043205554, + "grad_norm": 0.9756849157093773, + "learning_rate": 4.715175641768106e-06, + "loss": 0.4036, + "step": 15201 + }, + { + "epoch": 0.9257376000974332, + "grad_norm": 1.0621510963682494, + "learning_rate": 4.715138651218321e-06, + "loss": 0.3839, + "step": 15202 + }, + { + "epoch": 0.9257984958743111, + "grad_norm": 1.0247053795352161, + "learning_rate": 4.7151016584117985e-06, + "loss": 0.4281, + "step": 15203 + }, + { + "epoch": 0.925859391651189, + "grad_norm": 0.9861612998770383, + "learning_rate": 4.7150646633485754e-06, + "loss": 0.4222, + "step": 15204 + }, + { + "epoch": 0.9259202874280669, + "grad_norm": 0.998445341043185, + "learning_rate": 4.715027666028691e-06, + "loss": 0.3771, + "step": 15205 + }, + { + "epoch": 0.9259811832049447, + "grad_norm": 1.0132554694829294, + "learning_rate": 4.714990666452183e-06, + "loss": 0.4022, + "step": 15206 + }, + { + "epoch": 0.9260420789818226, + "grad_norm": 0.9840812044616607, + "learning_rate": 4.714953664619088e-06, + "loss": 0.3941, + "step": 15207 + }, + { + "epoch": 0.9261029747587005, + "grad_norm": 1.022204861977761, + "learning_rate": 4.714916660529444e-06, + "loss": 0.3758, + "step": 15208 + }, + { + "epoch": 0.9261638705355784, + "grad_norm": 1.0655664045060487, + "learning_rate": 4.71487965418329e-06, + "loss": 0.4427, + "step": 15209 + }, + { + "epoch": 0.9262247663124562, + "grad_norm": 1.0834443276082617, + "learning_rate": 4.714842645580662e-06, + "loss": 0.3796, + "step": 15210 + }, + { + "epoch": 0.926285662089334, + "grad_norm": 1.044692846572255, + "learning_rate": 4.7148056347215984e-06, + "loss": 0.4119, + "step": 15211 + }, + { + "epoch": 0.926346557866212, + "grad_norm": 1.0250385834678999, + "learning_rate": 4.714768621606137e-06, + "loss": 0.385, + "step": 15212 + }, + { + "epoch": 0.9264074536430899, + "grad_norm": 1.0210541482306676, + "learning_rate": 4.714731606234315e-06, + "loss": 0.3778, + "step": 15213 + }, + { + "epoch": 0.9264683494199677, + "grad_norm": 0.9679488299422847, + "learning_rate": 4.714694588606171e-06, + "loss": 0.4262, + "step": 15214 + }, + { + "epoch": 0.9265292451968457, + "grad_norm": 0.9869571988652257, + "learning_rate": 4.7146575687217426e-06, + "loss": 0.4272, + "step": 15215 + }, + { + "epoch": 0.9265901409737235, + "grad_norm": 0.9385894586618572, + "learning_rate": 4.714620546581066e-06, + "loss": 0.391, + "step": 15216 + }, + { + "epoch": 0.9266510367506013, + "grad_norm": 1.0472318927790987, + "learning_rate": 4.71458352218418e-06, + "loss": 0.358, + "step": 15217 + }, + { + "epoch": 0.9267119325274792, + "grad_norm": 0.9995019327626069, + "learning_rate": 4.714546495531123e-06, + "loss": 0.3471, + "step": 15218 + }, + { + "epoch": 0.9267728283043571, + "grad_norm": 0.9530155430731841, + "learning_rate": 4.714509466621933e-06, + "loss": 0.4732, + "step": 15219 + }, + { + "epoch": 0.926833724081235, + "grad_norm": 0.9826383600350933, + "learning_rate": 4.714472435456645e-06, + "loss": 0.4115, + "step": 15220 + }, + { + "epoch": 0.9268946198581128, + "grad_norm": 0.9453885587138082, + "learning_rate": 4.714435402035299e-06, + "loss": 0.4098, + "step": 15221 + }, + { + "epoch": 0.9269555156349907, + "grad_norm": 1.0205048724795083, + "learning_rate": 4.714398366357933e-06, + "loss": 0.4535, + "step": 15222 + }, + { + "epoch": 0.9270164114118686, + "grad_norm": 0.937227991961561, + "learning_rate": 4.714361328424583e-06, + "loss": 0.4326, + "step": 15223 + }, + { + "epoch": 0.9270773071887465, + "grad_norm": 1.0336669488747956, + "learning_rate": 4.714324288235289e-06, + "loss": 0.3901, + "step": 15224 + }, + { + "epoch": 0.9271382029656243, + "grad_norm": 0.9773692495119285, + "learning_rate": 4.714287245790086e-06, + "loss": 0.4262, + "step": 15225 + }, + { + "epoch": 0.9271990987425022, + "grad_norm": 0.9707886559619588, + "learning_rate": 4.714250201089014e-06, + "loss": 0.3752, + "step": 15226 + }, + { + "epoch": 0.9272599945193801, + "grad_norm": 1.0558005561669455, + "learning_rate": 4.71421315413211e-06, + "loss": 0.4631, + "step": 15227 + }, + { + "epoch": 0.927320890296258, + "grad_norm": 0.9672782318797569, + "learning_rate": 4.714176104919411e-06, + "loss": 0.3953, + "step": 15228 + }, + { + "epoch": 0.9273817860731358, + "grad_norm": 1.0680403145602033, + "learning_rate": 4.714139053450956e-06, + "loss": 0.4203, + "step": 15229 + }, + { + "epoch": 0.9274426818500137, + "grad_norm": 0.9736826789638839, + "learning_rate": 4.714101999726783e-06, + "loss": 0.4607, + "step": 15230 + }, + { + "epoch": 0.9275035776268916, + "grad_norm": 0.9251150294217217, + "learning_rate": 4.714064943746927e-06, + "loss": 0.3946, + "step": 15231 + }, + { + "epoch": 0.9275644734037695, + "grad_norm": 0.9242193871157857, + "learning_rate": 4.714027885511429e-06, + "loss": 0.4041, + "step": 15232 + }, + { + "epoch": 0.9276253691806473, + "grad_norm": 1.0092426116772308, + "learning_rate": 4.713990825020325e-06, + "loss": 0.4644, + "step": 15233 + }, + { + "epoch": 0.9276862649575252, + "grad_norm": 0.9573693650242362, + "learning_rate": 4.713953762273653e-06, + "loss": 0.441, + "step": 15234 + }, + { + "epoch": 0.9277471607344031, + "grad_norm": 1.0961043728803506, + "learning_rate": 4.713916697271451e-06, + "loss": 0.4009, + "step": 15235 + }, + { + "epoch": 0.927808056511281, + "grad_norm": 0.9425187574131461, + "learning_rate": 4.7138796300137566e-06, + "loss": 0.3742, + "step": 15236 + }, + { + "epoch": 0.9278689522881588, + "grad_norm": 0.9571273444326102, + "learning_rate": 4.713842560500608e-06, + "loss": 0.4072, + "step": 15237 + }, + { + "epoch": 0.9279298480650366, + "grad_norm": 0.96550653368326, + "learning_rate": 4.713805488732042e-06, + "loss": 0.4399, + "step": 15238 + }, + { + "epoch": 0.9279907438419146, + "grad_norm": 1.0535281054842889, + "learning_rate": 4.713768414708099e-06, + "loss": 0.4641, + "step": 15239 + }, + { + "epoch": 0.9280516396187924, + "grad_norm": 0.992209305367028, + "learning_rate": 4.713731338428813e-06, + "loss": 0.467, + "step": 15240 + }, + { + "epoch": 0.9281125353956703, + "grad_norm": 1.00929572780801, + "learning_rate": 4.713694259894224e-06, + "loss": 0.3942, + "step": 15241 + }, + { + "epoch": 0.9281734311725481, + "grad_norm": 0.9429702508775255, + "learning_rate": 4.713657179104368e-06, + "loss": 0.4224, + "step": 15242 + }, + { + "epoch": 0.9282343269494261, + "grad_norm": 0.9698854269209315, + "learning_rate": 4.713620096059286e-06, + "loss": 0.4071, + "step": 15243 + }, + { + "epoch": 0.9282952227263039, + "grad_norm": 0.9462553068018321, + "learning_rate": 4.713583010759013e-06, + "loss": 0.4923, + "step": 15244 + }, + { + "epoch": 0.9283561185031818, + "grad_norm": 1.1030196074859464, + "learning_rate": 4.7135459232035874e-06, + "loss": 0.397, + "step": 15245 + }, + { + "epoch": 0.9284170142800596, + "grad_norm": 0.9315910937767609, + "learning_rate": 4.713508833393048e-06, + "loss": 0.4672, + "step": 15246 + }, + { + "epoch": 0.9284779100569376, + "grad_norm": 1.1335813879857888, + "learning_rate": 4.713471741327431e-06, + "loss": 0.3697, + "step": 15247 + }, + { + "epoch": 0.9285388058338154, + "grad_norm": 0.9585945624373037, + "learning_rate": 4.713434647006776e-06, + "loss": 0.4122, + "step": 15248 + }, + { + "epoch": 0.9285997016106933, + "grad_norm": 1.0407433951746445, + "learning_rate": 4.713397550431119e-06, + "loss": 0.4128, + "step": 15249 + }, + { + "epoch": 0.9286605973875711, + "grad_norm": 1.0297048572320606, + "learning_rate": 4.713360451600499e-06, + "loss": 0.4081, + "step": 15250 + }, + { + "epoch": 0.9287214931644491, + "grad_norm": 1.047755197632074, + "learning_rate": 4.713323350514953e-06, + "loss": 0.4632, + "step": 15251 + }, + { + "epoch": 0.9287823889413269, + "grad_norm": 0.9546352026056543, + "learning_rate": 4.713286247174519e-06, + "loss": 0.3913, + "step": 15252 + }, + { + "epoch": 0.9288432847182048, + "grad_norm": 1.0358045313460855, + "learning_rate": 4.713249141579236e-06, + "loss": 0.3612, + "step": 15253 + }, + { + "epoch": 0.9289041804950827, + "grad_norm": 1.0310854124686157, + "learning_rate": 4.7132120337291395e-06, + "loss": 0.4835, + "step": 15254 + }, + { + "epoch": 0.9289650762719606, + "grad_norm": 1.0160112634957572, + "learning_rate": 4.71317492362427e-06, + "loss": 0.364, + "step": 15255 + }, + { + "epoch": 0.9290259720488384, + "grad_norm": 0.9910563332774294, + "learning_rate": 4.713137811264663e-06, + "loss": 0.4415, + "step": 15256 + }, + { + "epoch": 0.9290868678257163, + "grad_norm": 1.013434052058888, + "learning_rate": 4.713100696650358e-06, + "loss": 0.4049, + "step": 15257 + }, + { + "epoch": 0.9291477636025942, + "grad_norm": 1.012102284837412, + "learning_rate": 4.71306357978139e-06, + "loss": 0.36, + "step": 15258 + }, + { + "epoch": 0.9292086593794721, + "grad_norm": 0.9665757457001548, + "learning_rate": 4.713026460657801e-06, + "loss": 0.4054, + "step": 15259 + }, + { + "epoch": 0.9292695551563499, + "grad_norm": 0.933438778121114, + "learning_rate": 4.712989339279626e-06, + "loss": 0.4503, + "step": 15260 + }, + { + "epoch": 0.9293304509332277, + "grad_norm": 1.0091061836432411, + "learning_rate": 4.712952215646903e-06, + "loss": 0.4033, + "step": 15261 + }, + { + "epoch": 0.9293913467101057, + "grad_norm": 0.9332329812095672, + "learning_rate": 4.712915089759671e-06, + "loss": 0.3929, + "step": 15262 + }, + { + "epoch": 0.9294522424869835, + "grad_norm": 1.063532560113865, + "learning_rate": 4.712877961617967e-06, + "loss": 0.3857, + "step": 15263 + }, + { + "epoch": 0.9295131382638614, + "grad_norm": 1.0510369840991762, + "learning_rate": 4.712840831221828e-06, + "loss": 0.4557, + "step": 15264 + }, + { + "epoch": 0.9295740340407392, + "grad_norm": 1.0407479520728677, + "learning_rate": 4.712803698571294e-06, + "loss": 0.3961, + "step": 15265 + }, + { + "epoch": 0.9296349298176172, + "grad_norm": 1.0096216750302132, + "learning_rate": 4.7127665636664014e-06, + "loss": 0.353, + "step": 15266 + }, + { + "epoch": 0.929695825594495, + "grad_norm": 1.006873228288592, + "learning_rate": 4.712729426507187e-06, + "loss": 0.4306, + "step": 15267 + }, + { + "epoch": 0.9297567213713729, + "grad_norm": 1.0290555489635012, + "learning_rate": 4.712692287093691e-06, + "loss": 0.3456, + "step": 15268 + }, + { + "epoch": 0.9298176171482507, + "grad_norm": 0.9943870295773239, + "learning_rate": 4.71265514542595e-06, + "loss": 0.4057, + "step": 15269 + }, + { + "epoch": 0.9298785129251287, + "grad_norm": 0.9763658457577852, + "learning_rate": 4.7126180015040015e-06, + "loss": 0.5063, + "step": 15270 + }, + { + "epoch": 0.9299394087020065, + "grad_norm": 0.9641867810189255, + "learning_rate": 4.712580855327884e-06, + "loss": 0.3949, + "step": 15271 + }, + { + "epoch": 0.9300003044788844, + "grad_norm": 0.9187098914065841, + "learning_rate": 4.712543706897636e-06, + "loss": 0.4107, + "step": 15272 + }, + { + "epoch": 0.9300612002557622, + "grad_norm": 1.0709087889079878, + "learning_rate": 4.712506556213293e-06, + "loss": 0.3645, + "step": 15273 + }, + { + "epoch": 0.9301220960326402, + "grad_norm": 1.0023439068339377, + "learning_rate": 4.7124694032748955e-06, + "loss": 0.4252, + "step": 15274 + }, + { + "epoch": 0.930182991809518, + "grad_norm": 1.1147351126790757, + "learning_rate": 4.712432248082479e-06, + "loss": 0.395, + "step": 15275 + }, + { + "epoch": 0.9302438875863959, + "grad_norm": 0.9056347655265692, + "learning_rate": 4.712395090636084e-06, + "loss": 0.4282, + "step": 15276 + }, + { + "epoch": 0.9303047833632737, + "grad_norm": 1.0190690584146196, + "learning_rate": 4.712357930935746e-06, + "loss": 0.3726, + "step": 15277 + }, + { + "epoch": 0.9303656791401517, + "grad_norm": 0.9935348532689008, + "learning_rate": 4.712320768981503e-06, + "loss": 0.4699, + "step": 15278 + }, + { + "epoch": 0.9304265749170295, + "grad_norm": 1.0971948451187092, + "learning_rate": 4.7122836047733944e-06, + "loss": 0.3566, + "step": 15279 + }, + { + "epoch": 0.9304874706939074, + "grad_norm": 0.9366819138584908, + "learning_rate": 4.712246438311457e-06, + "loss": 0.3678, + "step": 15280 + }, + { + "epoch": 0.9305483664707852, + "grad_norm": 1.0405548251040349, + "learning_rate": 4.71220926959573e-06, + "loss": 0.4132, + "step": 15281 + }, + { + "epoch": 0.9306092622476632, + "grad_norm": 0.9696651165512841, + "learning_rate": 4.712172098626249e-06, + "loss": 0.3683, + "step": 15282 + }, + { + "epoch": 0.930670158024541, + "grad_norm": 1.0446204226499307, + "learning_rate": 4.7121349254030535e-06, + "loss": 0.4325, + "step": 15283 + }, + { + "epoch": 0.9307310538014189, + "grad_norm": 0.9579231851115656, + "learning_rate": 4.712097749926181e-06, + "loss": 0.4054, + "step": 15284 + }, + { + "epoch": 0.9307919495782967, + "grad_norm": 1.0165820270403192, + "learning_rate": 4.7120605721956696e-06, + "loss": 0.4351, + "step": 15285 + }, + { + "epoch": 0.9308528453551747, + "grad_norm": 0.897146005327998, + "learning_rate": 4.712023392211556e-06, + "loss": 0.4186, + "step": 15286 + }, + { + "epoch": 0.9309137411320525, + "grad_norm": 0.9181203963686819, + "learning_rate": 4.711986209973879e-06, + "loss": 0.5203, + "step": 15287 + }, + { + "epoch": 0.9309746369089303, + "grad_norm": 0.9508379766507573, + "learning_rate": 4.7119490254826775e-06, + "loss": 0.4918, + "step": 15288 + }, + { + "epoch": 0.9310355326858082, + "grad_norm": 1.0327413219777577, + "learning_rate": 4.711911838737987e-06, + "loss": 0.4098, + "step": 15289 + }, + { + "epoch": 0.9310964284626861, + "grad_norm": 0.9324983486293862, + "learning_rate": 4.711874649739847e-06, + "loss": 0.4501, + "step": 15290 + }, + { + "epoch": 0.931157324239564, + "grad_norm": 0.9474725768639107, + "learning_rate": 4.711837458488296e-06, + "loss": 0.4258, + "step": 15291 + }, + { + "epoch": 0.9312182200164418, + "grad_norm": 0.900371392957501, + "learning_rate": 4.71180026498337e-06, + "loss": 0.4682, + "step": 15292 + }, + { + "epoch": 0.9312791157933197, + "grad_norm": 0.9554429902396148, + "learning_rate": 4.7117630692251084e-06, + "loss": 0.4396, + "step": 15293 + }, + { + "epoch": 0.9313400115701976, + "grad_norm": 0.9844038597365486, + "learning_rate": 4.7117258712135485e-06, + "loss": 0.4166, + "step": 15294 + }, + { + "epoch": 0.9314009073470755, + "grad_norm": 0.9244529077008092, + "learning_rate": 4.7116886709487285e-06, + "loss": 0.4128, + "step": 15295 + }, + { + "epoch": 0.9314618031239533, + "grad_norm": 1.050315802218992, + "learning_rate": 4.711651468430686e-06, + "loss": 0.4351, + "step": 15296 + }, + { + "epoch": 0.9315226989008313, + "grad_norm": 0.9887979015146541, + "learning_rate": 4.711614263659459e-06, + "loss": 0.4404, + "step": 15297 + }, + { + "epoch": 0.9315835946777091, + "grad_norm": 0.9908534917854082, + "learning_rate": 4.711577056635085e-06, + "loss": 0.4673, + "step": 15298 + }, + { + "epoch": 0.931644490454587, + "grad_norm": 1.0745192387917644, + "learning_rate": 4.711539847357602e-06, + "loss": 0.3221, + "step": 15299 + }, + { + "epoch": 0.9317053862314648, + "grad_norm": 0.947597144402304, + "learning_rate": 4.711502635827049e-06, + "loss": 0.4601, + "step": 15300 + }, + { + "epoch": 0.9317662820083428, + "grad_norm": 0.934028386536339, + "learning_rate": 4.711465422043463e-06, + "loss": 0.3845, + "step": 15301 + }, + { + "epoch": 0.9318271777852206, + "grad_norm": 1.0037556519705346, + "learning_rate": 4.711428206006882e-06, + "loss": 0.3749, + "step": 15302 + }, + { + "epoch": 0.9318880735620985, + "grad_norm": 0.96299390741456, + "learning_rate": 4.711390987717343e-06, + "loss": 0.3734, + "step": 15303 + }, + { + "epoch": 0.9319489693389763, + "grad_norm": 1.00848334826525, + "learning_rate": 4.711353767174885e-06, + "loss": 0.3597, + "step": 15304 + }, + { + "epoch": 0.9320098651158543, + "grad_norm": 0.9234286000249986, + "learning_rate": 4.711316544379547e-06, + "loss": 0.407, + "step": 15305 + }, + { + "epoch": 0.9320707608927321, + "grad_norm": 0.9884932069800747, + "learning_rate": 4.7112793193313645e-06, + "loss": 0.4505, + "step": 15306 + }, + { + "epoch": 0.93213165666961, + "grad_norm": 1.0186963100470807, + "learning_rate": 4.711242092030377e-06, + "loss": 0.3728, + "step": 15307 + }, + { + "epoch": 0.9321925524464878, + "grad_norm": 0.9979358437552822, + "learning_rate": 4.711204862476622e-06, + "loss": 0.5104, + "step": 15308 + }, + { + "epoch": 0.9322534482233658, + "grad_norm": 1.0154225353464241, + "learning_rate": 4.711167630670137e-06, + "loss": 0.4574, + "step": 15309 + }, + { + "epoch": 0.9323143440002436, + "grad_norm": 0.9618255624576327, + "learning_rate": 4.711130396610961e-06, + "loss": 0.3918, + "step": 15310 + }, + { + "epoch": 0.9323752397771214, + "grad_norm": 0.9878605317171026, + "learning_rate": 4.711093160299132e-06, + "loss": 0.4398, + "step": 15311 + }, + { + "epoch": 0.9324361355539993, + "grad_norm": 1.0400059545269782, + "learning_rate": 4.711055921734686e-06, + "loss": 0.3838, + "step": 15312 + }, + { + "epoch": 0.9324970313308772, + "grad_norm": 0.9941510720407297, + "learning_rate": 4.711018680917663e-06, + "loss": 0.4495, + "step": 15313 + }, + { + "epoch": 0.9325579271077551, + "grad_norm": 0.955658079018839, + "learning_rate": 4.710981437848099e-06, + "loss": 0.4466, + "step": 15314 + }, + { + "epoch": 0.9326188228846329, + "grad_norm": 0.9924186734120606, + "learning_rate": 4.710944192526035e-06, + "loss": 0.3489, + "step": 15315 + }, + { + "epoch": 0.9326797186615108, + "grad_norm": 0.9266522139040106, + "learning_rate": 4.710906944951506e-06, + "loss": 0.4869, + "step": 15316 + }, + { + "epoch": 0.9327406144383887, + "grad_norm": 0.9186848505294151, + "learning_rate": 4.71086969512455e-06, + "loss": 0.4238, + "step": 15317 + }, + { + "epoch": 0.9328015102152666, + "grad_norm": 1.0042661775326858, + "learning_rate": 4.710832443045207e-06, + "loss": 0.4044, + "step": 15318 + }, + { + "epoch": 0.9328624059921444, + "grad_norm": 1.103007760321851, + "learning_rate": 4.710795188713514e-06, + "loss": 0.3737, + "step": 15319 + }, + { + "epoch": 0.9329233017690223, + "grad_norm": 0.9604476744806938, + "learning_rate": 4.710757932129508e-06, + "loss": 0.4574, + "step": 15320 + }, + { + "epoch": 0.9329841975459002, + "grad_norm": 0.9866161526133975, + "learning_rate": 4.710720673293229e-06, + "loss": 0.4062, + "step": 15321 + }, + { + "epoch": 0.9330450933227781, + "grad_norm": 1.0142348839687878, + "learning_rate": 4.710683412204713e-06, + "loss": 0.4014, + "step": 15322 + }, + { + "epoch": 0.9331059890996559, + "grad_norm": 1.0526406187783408, + "learning_rate": 4.710646148863998e-06, + "loss": 0.3759, + "step": 15323 + }, + { + "epoch": 0.9331668848765338, + "grad_norm": 0.992496256520619, + "learning_rate": 4.710608883271123e-06, + "loss": 0.3515, + "step": 15324 + }, + { + "epoch": 0.9332277806534117, + "grad_norm": 1.030847540996335, + "learning_rate": 4.710571615426126e-06, + "loss": 0.4022, + "step": 15325 + }, + { + "epoch": 0.9332886764302896, + "grad_norm": 0.9779223920853227, + "learning_rate": 4.710534345329046e-06, + "loss": 0.3961, + "step": 15326 + }, + { + "epoch": 0.9333495722071674, + "grad_norm": 0.9755729731698415, + "learning_rate": 4.710497072979917e-06, + "loss": 0.448, + "step": 15327 + }, + { + "epoch": 0.9334104679840453, + "grad_norm": 1.0291032909556508, + "learning_rate": 4.7104597983787805e-06, + "loss": 0.3803, + "step": 15328 + }, + { + "epoch": 0.9334713637609232, + "grad_norm": 1.0298070003103927, + "learning_rate": 4.710422521525673e-06, + "loss": 0.4577, + "step": 15329 + }, + { + "epoch": 0.9335322595378011, + "grad_norm": 0.9811796549274837, + "learning_rate": 4.710385242420635e-06, + "loss": 0.3965, + "step": 15330 + }, + { + "epoch": 0.9335931553146789, + "grad_norm": 0.9463123849277493, + "learning_rate": 4.710347961063701e-06, + "loss": 0.4695, + "step": 15331 + }, + { + "epoch": 0.9336540510915567, + "grad_norm": 0.9824277306519186, + "learning_rate": 4.71031067745491e-06, + "loss": 0.3511, + "step": 15332 + }, + { + "epoch": 0.9337149468684347, + "grad_norm": 1.0090769375813666, + "learning_rate": 4.710273391594301e-06, + "loss": 0.4065, + "step": 15333 + }, + { + "epoch": 0.9337758426453125, + "grad_norm": 1.0109465842636578, + "learning_rate": 4.710236103481912e-06, + "loss": 0.3874, + "step": 15334 + }, + { + "epoch": 0.9338367384221904, + "grad_norm": 1.111491158100388, + "learning_rate": 4.71019881311778e-06, + "loss": 0.3158, + "step": 15335 + }, + { + "epoch": 0.9338976341990683, + "grad_norm": 0.9983000181078318, + "learning_rate": 4.710161520501944e-06, + "loss": 0.4008, + "step": 15336 + }, + { + "epoch": 0.9339585299759462, + "grad_norm": 0.9940995198126811, + "learning_rate": 4.71012422563444e-06, + "loss": 0.4891, + "step": 15337 + }, + { + "epoch": 0.934019425752824, + "grad_norm": 0.9647420007608748, + "learning_rate": 4.710086928515309e-06, + "loss": 0.4846, + "step": 15338 + }, + { + "epoch": 0.9340803215297019, + "grad_norm": 0.9731612374384189, + "learning_rate": 4.710049629144585e-06, + "loss": 0.4095, + "step": 15339 + }, + { + "epoch": 0.9341412173065798, + "grad_norm": 0.9768744175321294, + "learning_rate": 4.71001232752231e-06, + "loss": 0.4701, + "step": 15340 + }, + { + "epoch": 0.9342021130834577, + "grad_norm": 0.9590285287039639, + "learning_rate": 4.709975023648521e-06, + "loss": 0.4433, + "step": 15341 + }, + { + "epoch": 0.9342630088603355, + "grad_norm": 0.9958794595411469, + "learning_rate": 4.7099377175232545e-06, + "loss": 0.4114, + "step": 15342 + }, + { + "epoch": 0.9343239046372134, + "grad_norm": 0.9816143916027386, + "learning_rate": 4.709900409146549e-06, + "loss": 0.4774, + "step": 15343 + }, + { + "epoch": 0.9343848004140913, + "grad_norm": 0.9220998597814397, + "learning_rate": 4.709863098518444e-06, + "loss": 0.4482, + "step": 15344 + }, + { + "epoch": 0.9344456961909692, + "grad_norm": 0.9864981178539675, + "learning_rate": 4.7098257856389754e-06, + "loss": 0.3638, + "step": 15345 + }, + { + "epoch": 0.934506591967847, + "grad_norm": 0.9590695164322655, + "learning_rate": 4.7097884705081834e-06, + "loss": 0.5061, + "step": 15346 + }, + { + "epoch": 0.9345674877447249, + "grad_norm": 0.9940063358911418, + "learning_rate": 4.709751153126103e-06, + "loss": 0.4143, + "step": 15347 + }, + { + "epoch": 0.9346283835216028, + "grad_norm": 0.8586637229790017, + "learning_rate": 4.709713833492776e-06, + "loss": 0.427, + "step": 15348 + }, + { + "epoch": 0.9346892792984807, + "grad_norm": 0.99089820010201, + "learning_rate": 4.7096765116082374e-06, + "loss": 0.3956, + "step": 15349 + }, + { + "epoch": 0.9347501750753585, + "grad_norm": 0.9872298608589711, + "learning_rate": 4.709639187472526e-06, + "loss": 0.4377, + "step": 15350 + }, + { + "epoch": 0.9348110708522364, + "grad_norm": 0.9826683021901469, + "learning_rate": 4.709601861085681e-06, + "loss": 0.4361, + "step": 15351 + }, + { + "epoch": 0.9348719666291143, + "grad_norm": 0.9318010946812185, + "learning_rate": 4.709564532447739e-06, + "loss": 0.3866, + "step": 15352 + }, + { + "epoch": 0.9349328624059922, + "grad_norm": 0.9494602513241668, + "learning_rate": 4.70952720155874e-06, + "loss": 0.4326, + "step": 15353 + }, + { + "epoch": 0.93499375818287, + "grad_norm": 0.9781365658983473, + "learning_rate": 4.709489868418719e-06, + "loss": 0.4066, + "step": 15354 + }, + { + "epoch": 0.9350546539597479, + "grad_norm": 0.9450665244332705, + "learning_rate": 4.709452533027717e-06, + "loss": 0.4451, + "step": 15355 + }, + { + "epoch": 0.9351155497366258, + "grad_norm": 0.9797510745731755, + "learning_rate": 4.709415195385769e-06, + "loss": 0.4306, + "step": 15356 + }, + { + "epoch": 0.9351764455135037, + "grad_norm": 1.0525709233179905, + "learning_rate": 4.7093778554929156e-06, + "loss": 0.3873, + "step": 15357 + }, + { + "epoch": 0.9352373412903815, + "grad_norm": 1.0118362663210274, + "learning_rate": 4.709340513349194e-06, + "loss": 0.3462, + "step": 15358 + }, + { + "epoch": 0.9352982370672593, + "grad_norm": 0.9807983286445586, + "learning_rate": 4.709303168954642e-06, + "loss": 0.4581, + "step": 15359 + }, + { + "epoch": 0.9353591328441373, + "grad_norm": 1.0160110938635236, + "learning_rate": 4.709265822309298e-06, + "loss": 0.4297, + "step": 15360 + }, + { + "epoch": 0.9354200286210151, + "grad_norm": 1.1292545442921278, + "learning_rate": 4.7092284734132e-06, + "loss": 0.4062, + "step": 15361 + }, + { + "epoch": 0.935480924397893, + "grad_norm": 1.1399370820653394, + "learning_rate": 4.709191122266386e-06, + "loss": 0.3248, + "step": 15362 + }, + { + "epoch": 0.9355418201747708, + "grad_norm": 1.0357134514817865, + "learning_rate": 4.709153768868894e-06, + "loss": 0.3913, + "step": 15363 + }, + { + "epoch": 0.9356027159516488, + "grad_norm": 1.0626177344467198, + "learning_rate": 4.709116413220763e-06, + "loss": 0.3642, + "step": 15364 + }, + { + "epoch": 0.9356636117285266, + "grad_norm": 0.9991812694263088, + "learning_rate": 4.709079055322029e-06, + "loss": 0.386, + "step": 15365 + }, + { + "epoch": 0.9357245075054045, + "grad_norm": 0.9335147385063244, + "learning_rate": 4.709041695172731e-06, + "loss": 0.416, + "step": 15366 + }, + { + "epoch": 0.9357854032822823, + "grad_norm": 1.0292846001707403, + "learning_rate": 4.709004332772908e-06, + "loss": 0.446, + "step": 15367 + }, + { + "epoch": 0.9358462990591603, + "grad_norm": 1.083373513228388, + "learning_rate": 4.708966968122596e-06, + "loss": 0.3449, + "step": 15368 + }, + { + "epoch": 0.9359071948360381, + "grad_norm": 0.9975078064912579, + "learning_rate": 4.7089296012218354e-06, + "loss": 0.4, + "step": 15369 + }, + { + "epoch": 0.935968090612916, + "grad_norm": 0.9929162533078592, + "learning_rate": 4.7088922320706634e-06, + "loss": 0.4135, + "step": 15370 + }, + { + "epoch": 0.9360289863897938, + "grad_norm": 1.0129343012988314, + "learning_rate": 4.708854860669117e-06, + "loss": 0.3697, + "step": 15371 + }, + { + "epoch": 0.9360898821666718, + "grad_norm": 0.9803363544851896, + "learning_rate": 4.708817487017236e-06, + "loss": 0.3818, + "step": 15372 + }, + { + "epoch": 0.9361507779435496, + "grad_norm": 1.0485761603954464, + "learning_rate": 4.708780111115058e-06, + "loss": 0.3908, + "step": 15373 + }, + { + "epoch": 0.9362116737204275, + "grad_norm": 1.0875345740125373, + "learning_rate": 4.70874273296262e-06, + "loss": 0.4132, + "step": 15374 + }, + { + "epoch": 0.9362725694973053, + "grad_norm": 1.0755469705986143, + "learning_rate": 4.708705352559961e-06, + "loss": 0.387, + "step": 15375 + }, + { + "epoch": 0.9363334652741833, + "grad_norm": 0.9868405606295292, + "learning_rate": 4.7086679699071194e-06, + "loss": 0.3911, + "step": 15376 + }, + { + "epoch": 0.9363943610510611, + "grad_norm": 1.004100375907398, + "learning_rate": 4.708630585004132e-06, + "loss": 0.5026, + "step": 15377 + }, + { + "epoch": 0.936455256827939, + "grad_norm": 0.9281969355992117, + "learning_rate": 4.708593197851038e-06, + "loss": 0.4437, + "step": 15378 + }, + { + "epoch": 0.9365161526048169, + "grad_norm": 1.057019724786819, + "learning_rate": 4.708555808447875e-06, + "loss": 0.3605, + "step": 15379 + }, + { + "epoch": 0.9365770483816948, + "grad_norm": 0.9753102793059005, + "learning_rate": 4.708518416794682e-06, + "loss": 0.4009, + "step": 15380 + }, + { + "epoch": 0.9366379441585726, + "grad_norm": 0.9650378163758415, + "learning_rate": 4.708481022891495e-06, + "loss": 0.477, + "step": 15381 + }, + { + "epoch": 0.9366988399354504, + "grad_norm": 0.9413133712292188, + "learning_rate": 4.7084436267383546e-06, + "loss": 0.4846, + "step": 15382 + }, + { + "epoch": 0.9367597357123284, + "grad_norm": 0.9926517949724434, + "learning_rate": 4.708406228335298e-06, + "loss": 0.4582, + "step": 15383 + }, + { + "epoch": 0.9368206314892062, + "grad_norm": 1.0000354722187548, + "learning_rate": 4.708368827682362e-06, + "loss": 0.4916, + "step": 15384 + }, + { + "epoch": 0.9368815272660841, + "grad_norm": 1.0291018933231741, + "learning_rate": 4.708331424779586e-06, + "loss": 0.4383, + "step": 15385 + }, + { + "epoch": 0.9369424230429619, + "grad_norm": 1.0690088228736023, + "learning_rate": 4.708294019627008e-06, + "loss": 0.3779, + "step": 15386 + }, + { + "epoch": 0.9370033188198399, + "grad_norm": 1.1062111064474334, + "learning_rate": 4.708256612224666e-06, + "loss": 0.468, + "step": 15387 + }, + { + "epoch": 0.9370642145967177, + "grad_norm": 0.9734583901359855, + "learning_rate": 4.708219202572598e-06, + "loss": 0.4288, + "step": 15388 + }, + { + "epoch": 0.9371251103735956, + "grad_norm": 0.9519190816355115, + "learning_rate": 4.708181790670843e-06, + "loss": 0.4409, + "step": 15389 + }, + { + "epoch": 0.9371860061504734, + "grad_norm": 0.9334279965095429, + "learning_rate": 4.708144376519437e-06, + "loss": 0.4137, + "step": 15390 + }, + { + "epoch": 0.9372469019273514, + "grad_norm": 0.9894671386255539, + "learning_rate": 4.70810696011842e-06, + "loss": 0.4414, + "step": 15391 + }, + { + "epoch": 0.9373077977042292, + "grad_norm": 0.9940425640993321, + "learning_rate": 4.70806954146783e-06, + "loss": 0.4628, + "step": 15392 + }, + { + "epoch": 0.9373686934811071, + "grad_norm": 0.9262866856843776, + "learning_rate": 4.7080321205677045e-06, + "loss": 0.3902, + "step": 15393 + }, + { + "epoch": 0.9374295892579849, + "grad_norm": 0.9095737643173865, + "learning_rate": 4.707994697418081e-06, + "loss": 0.3646, + "step": 15394 + }, + { + "epoch": 0.9374904850348629, + "grad_norm": 1.0489703743559227, + "learning_rate": 4.7079572720189995e-06, + "loss": 0.4014, + "step": 15395 + }, + { + "epoch": 0.9375513808117407, + "grad_norm": 0.9649591459002353, + "learning_rate": 4.707919844370496e-06, + "loss": 0.4541, + "step": 15396 + }, + { + "epoch": 0.9376122765886186, + "grad_norm": 0.9833198326492133, + "learning_rate": 4.70788241447261e-06, + "loss": 0.35, + "step": 15397 + }, + { + "epoch": 0.9376731723654964, + "grad_norm": 1.002200601116947, + "learning_rate": 4.70784498232538e-06, + "loss": 0.368, + "step": 15398 + }, + { + "epoch": 0.9377340681423744, + "grad_norm": 0.991673582871368, + "learning_rate": 4.707807547928842e-06, + "loss": 0.4262, + "step": 15399 + }, + { + "epoch": 0.9377949639192522, + "grad_norm": 0.9690099166527907, + "learning_rate": 4.707770111283037e-06, + "loss": 0.4943, + "step": 15400 + }, + { + "epoch": 0.9378558596961301, + "grad_norm": 1.0035732464099465, + "learning_rate": 4.707732672388001e-06, + "loss": 0.412, + "step": 15401 + }, + { + "epoch": 0.9379167554730079, + "grad_norm": 1.0727260068310487, + "learning_rate": 4.707695231243773e-06, + "loss": 0.4243, + "step": 15402 + }, + { + "epoch": 0.9379776512498859, + "grad_norm": 0.9798693439422196, + "learning_rate": 4.707657787850391e-06, + "loss": 0.4178, + "step": 15403 + }, + { + "epoch": 0.9380385470267637, + "grad_norm": 0.9896513579972929, + "learning_rate": 4.707620342207893e-06, + "loss": 0.3673, + "step": 15404 + }, + { + "epoch": 0.9380994428036415, + "grad_norm": 1.1163411994364854, + "learning_rate": 4.707582894316317e-06, + "loss": 0.3713, + "step": 15405 + }, + { + "epoch": 0.9381603385805194, + "grad_norm": 1.036964118942981, + "learning_rate": 4.707545444175702e-06, + "loss": 0.4135, + "step": 15406 + }, + { + "epoch": 0.9382212343573973, + "grad_norm": 1.0144037172674887, + "learning_rate": 4.707507991786086e-06, + "loss": 0.4353, + "step": 15407 + }, + { + "epoch": 0.9382821301342752, + "grad_norm": 0.9698343139216431, + "learning_rate": 4.707470537147506e-06, + "loss": 0.3874, + "step": 15408 + }, + { + "epoch": 0.938343025911153, + "grad_norm": 1.0531446778159639, + "learning_rate": 4.707433080260001e-06, + "loss": 0.419, + "step": 15409 + }, + { + "epoch": 0.9384039216880309, + "grad_norm": 0.9801890181185203, + "learning_rate": 4.707395621123609e-06, + "loss": 0.3509, + "step": 15410 + }, + { + "epoch": 0.9384648174649088, + "grad_norm": 1.0819305814255409, + "learning_rate": 4.707358159738369e-06, + "loss": 0.3808, + "step": 15411 + }, + { + "epoch": 0.9385257132417867, + "grad_norm": 0.9833419199172165, + "learning_rate": 4.7073206961043175e-06, + "loss": 0.4272, + "step": 15412 + }, + { + "epoch": 0.9385866090186645, + "grad_norm": 1.0956498161284445, + "learning_rate": 4.707283230221494e-06, + "loss": 0.3855, + "step": 15413 + }, + { + "epoch": 0.9386475047955424, + "grad_norm": 1.0076309617066277, + "learning_rate": 4.707245762089936e-06, + "loss": 0.4553, + "step": 15414 + }, + { + "epoch": 0.9387084005724203, + "grad_norm": 1.1898515238467002, + "learning_rate": 4.7072082917096824e-06, + "loss": 0.385, + "step": 15415 + }, + { + "epoch": 0.9387692963492982, + "grad_norm": 0.9985005580628903, + "learning_rate": 4.70717081908077e-06, + "loss": 0.4434, + "step": 15416 + }, + { + "epoch": 0.938830192126176, + "grad_norm": 1.0832907495622286, + "learning_rate": 4.707133344203239e-06, + "loss": 0.3889, + "step": 15417 + }, + { + "epoch": 0.938891087903054, + "grad_norm": 1.0784188094974478, + "learning_rate": 4.7070958670771255e-06, + "loss": 0.4338, + "step": 15418 + }, + { + "epoch": 0.9389519836799318, + "grad_norm": 1.013991599387046, + "learning_rate": 4.707058387702469e-06, + "loss": 0.3869, + "step": 15419 + }, + { + "epoch": 0.9390128794568097, + "grad_norm": 0.9943142669604172, + "learning_rate": 4.707020906079307e-06, + "loss": 0.3589, + "step": 15420 + }, + { + "epoch": 0.9390737752336875, + "grad_norm": 1.0359601259677766, + "learning_rate": 4.706983422207678e-06, + "loss": 0.4455, + "step": 15421 + }, + { + "epoch": 0.9391346710105655, + "grad_norm": 0.9751422206267297, + "learning_rate": 4.706945936087621e-06, + "loss": 0.4487, + "step": 15422 + }, + { + "epoch": 0.9391955667874433, + "grad_norm": 1.0188870643790606, + "learning_rate": 4.706908447719173e-06, + "loss": 0.4213, + "step": 15423 + }, + { + "epoch": 0.9392564625643212, + "grad_norm": 1.0027445823734804, + "learning_rate": 4.706870957102372e-06, + "loss": 0.39, + "step": 15424 + }, + { + "epoch": 0.939317358341199, + "grad_norm": 0.9476095492936313, + "learning_rate": 4.706833464237257e-06, + "loss": 0.3986, + "step": 15425 + }, + { + "epoch": 0.939378254118077, + "grad_norm": 0.9972864680850855, + "learning_rate": 4.7067959691238655e-06, + "loss": 0.4462, + "step": 15426 + }, + { + "epoch": 0.9394391498949548, + "grad_norm": 0.982892387130019, + "learning_rate": 4.7067584717622375e-06, + "loss": 0.3548, + "step": 15427 + }, + { + "epoch": 0.9395000456718327, + "grad_norm": 0.9577367698644874, + "learning_rate": 4.706720972152409e-06, + "loss": 0.4447, + "step": 15428 + }, + { + "epoch": 0.9395609414487105, + "grad_norm": 1.0706986163155414, + "learning_rate": 4.706683470294418e-06, + "loss": 0.3979, + "step": 15429 + }, + { + "epoch": 0.9396218372255885, + "grad_norm": 0.9980868975606432, + "learning_rate": 4.706645966188306e-06, + "loss": 0.4199, + "step": 15430 + }, + { + "epoch": 0.9396827330024663, + "grad_norm": 1.004751083663695, + "learning_rate": 4.7066084598341075e-06, + "loss": 0.4492, + "step": 15431 + }, + { + "epoch": 0.9397436287793441, + "grad_norm": 1.0914214695600888, + "learning_rate": 4.706570951231863e-06, + "loss": 0.3859, + "step": 15432 + }, + { + "epoch": 0.939804524556222, + "grad_norm": 1.0306524463946876, + "learning_rate": 4.706533440381609e-06, + "loss": 0.4548, + "step": 15433 + }, + { + "epoch": 0.9398654203330999, + "grad_norm": 0.9436146537821517, + "learning_rate": 4.706495927283384e-06, + "loss": 0.395, + "step": 15434 + }, + { + "epoch": 0.9399263161099778, + "grad_norm": 1.0757603017785757, + "learning_rate": 4.706458411937228e-06, + "loss": 0.3696, + "step": 15435 + }, + { + "epoch": 0.9399872118868556, + "grad_norm": 1.1749742418013684, + "learning_rate": 4.706420894343178e-06, + "loss": 0.4026, + "step": 15436 + }, + { + "epoch": 0.9400481076637335, + "grad_norm": 1.0586749027210276, + "learning_rate": 4.706383374501272e-06, + "loss": 0.4173, + "step": 15437 + }, + { + "epoch": 0.9401090034406114, + "grad_norm": 1.1059825757136577, + "learning_rate": 4.706345852411549e-06, + "loss": 0.3934, + "step": 15438 + }, + { + "epoch": 0.9401698992174893, + "grad_norm": 0.9686464012829685, + "learning_rate": 4.706308328074047e-06, + "loss": 0.389, + "step": 15439 + }, + { + "epoch": 0.9402307949943671, + "grad_norm": 1.0101059444837466, + "learning_rate": 4.706270801488803e-06, + "loss": 0.4158, + "step": 15440 + }, + { + "epoch": 0.940291690771245, + "grad_norm": 1.0451306094203274, + "learning_rate": 4.706233272655855e-06, + "loss": 0.4256, + "step": 15441 + }, + { + "epoch": 0.9403525865481229, + "grad_norm": 1.0718110351783363, + "learning_rate": 4.706195741575244e-06, + "loss": 0.3407, + "step": 15442 + }, + { + "epoch": 0.9404134823250008, + "grad_norm": 1.0506453793085544, + "learning_rate": 4.706158208247007e-06, + "loss": 0.3728, + "step": 15443 + }, + { + "epoch": 0.9404743781018786, + "grad_norm": 0.9568823663931211, + "learning_rate": 4.7061206726711814e-06, + "loss": 0.4753, + "step": 15444 + }, + { + "epoch": 0.9405352738787565, + "grad_norm": 1.0121622131639547, + "learning_rate": 4.706083134847806e-06, + "loss": 0.4141, + "step": 15445 + }, + { + "epoch": 0.9405961696556344, + "grad_norm": 0.9803874687190405, + "learning_rate": 4.7060455947769185e-06, + "loss": 0.4903, + "step": 15446 + }, + { + "epoch": 0.9406570654325123, + "grad_norm": 0.8825482690238775, + "learning_rate": 4.7060080524585586e-06, + "loss": 0.449, + "step": 15447 + }, + { + "epoch": 0.9407179612093901, + "grad_norm": 1.0555519003885598, + "learning_rate": 4.705970507892763e-06, + "loss": 0.4681, + "step": 15448 + }, + { + "epoch": 0.940778856986268, + "grad_norm": 0.9381579884549932, + "learning_rate": 4.70593296107957e-06, + "loss": 0.3859, + "step": 15449 + }, + { + "epoch": 0.9408397527631459, + "grad_norm": 0.9803559139432957, + "learning_rate": 4.7058954120190184e-06, + "loss": 0.3943, + "step": 15450 + }, + { + "epoch": 0.9409006485400238, + "grad_norm": 0.9735996436163746, + "learning_rate": 4.705857860711147e-06, + "loss": 0.3875, + "step": 15451 + }, + { + "epoch": 0.9409615443169016, + "grad_norm": 1.025893438755108, + "learning_rate": 4.705820307155993e-06, + "loss": 0.3422, + "step": 15452 + }, + { + "epoch": 0.9410224400937794, + "grad_norm": 1.0418251346725584, + "learning_rate": 4.705782751353596e-06, + "loss": 0.3801, + "step": 15453 + }, + { + "epoch": 0.9410833358706574, + "grad_norm": 1.0443371808776407, + "learning_rate": 4.705745193303993e-06, + "loss": 0.3691, + "step": 15454 + }, + { + "epoch": 0.9411442316475352, + "grad_norm": 0.9878040338624016, + "learning_rate": 4.705707633007222e-06, + "loss": 0.3932, + "step": 15455 + }, + { + "epoch": 0.9412051274244131, + "grad_norm": 1.053212935890553, + "learning_rate": 4.7056700704633225e-06, + "loss": 0.4683, + "step": 15456 + }, + { + "epoch": 0.9412660232012909, + "grad_norm": 1.0280693210369944, + "learning_rate": 4.7056325056723315e-06, + "loss": 0.5544, + "step": 15457 + }, + { + "epoch": 0.9413269189781689, + "grad_norm": 1.0152260774587605, + "learning_rate": 4.705594938634289e-06, + "loss": 0.4972, + "step": 15458 + }, + { + "epoch": 0.9413878147550467, + "grad_norm": 0.9504524877046254, + "learning_rate": 4.705557369349232e-06, + "loss": 0.3448, + "step": 15459 + }, + { + "epoch": 0.9414487105319246, + "grad_norm": 0.9573158191116423, + "learning_rate": 4.7055197978171986e-06, + "loss": 0.4385, + "step": 15460 + }, + { + "epoch": 0.9415096063088025, + "grad_norm": 1.0617285157293435, + "learning_rate": 4.705482224038228e-06, + "loss": 0.353, + "step": 15461 + }, + { + "epoch": 0.9415705020856804, + "grad_norm": 1.1353881274444122, + "learning_rate": 4.705444648012357e-06, + "loss": 0.4543, + "step": 15462 + }, + { + "epoch": 0.9416313978625582, + "grad_norm": 1.0185604847971605, + "learning_rate": 4.705407069739626e-06, + "loss": 0.3343, + "step": 15463 + }, + { + "epoch": 0.9416922936394361, + "grad_norm": 0.983725813353555, + "learning_rate": 4.7053694892200715e-06, + "loss": 0.4131, + "step": 15464 + }, + { + "epoch": 0.941753189416314, + "grad_norm": 0.9789872995830201, + "learning_rate": 4.705331906453733e-06, + "loss": 0.3075, + "step": 15465 + }, + { + "epoch": 0.9418140851931919, + "grad_norm": 1.0066141942178977, + "learning_rate": 4.705294321440647e-06, + "loss": 0.5265, + "step": 15466 + }, + { + "epoch": 0.9418749809700697, + "grad_norm": 0.8958397020651891, + "learning_rate": 4.705256734180854e-06, + "loss": 0.4222, + "step": 15467 + }, + { + "epoch": 0.9419358767469476, + "grad_norm": 1.0611364330035165, + "learning_rate": 4.705219144674391e-06, + "loss": 0.3252, + "step": 15468 + }, + { + "epoch": 0.9419967725238255, + "grad_norm": 0.8995536221709522, + "learning_rate": 4.705181552921296e-06, + "loss": 0.4477, + "step": 15469 + }, + { + "epoch": 0.9420576683007034, + "grad_norm": 0.9901383692017169, + "learning_rate": 4.705143958921609e-06, + "loss": 0.3826, + "step": 15470 + }, + { + "epoch": 0.9421185640775812, + "grad_norm": 0.9538495550098203, + "learning_rate": 4.7051063626753665e-06, + "loss": 0.4115, + "step": 15471 + }, + { + "epoch": 0.9421794598544591, + "grad_norm": 1.0280392069324404, + "learning_rate": 4.7050687641826074e-06, + "loss": 0.4345, + "step": 15472 + }, + { + "epoch": 0.942240355631337, + "grad_norm": 1.0421552741944342, + "learning_rate": 4.705031163443371e-06, + "loss": 0.3917, + "step": 15473 + }, + { + "epoch": 0.9423012514082149, + "grad_norm": 1.0495859264590621, + "learning_rate": 4.704993560457694e-06, + "loss": 0.3983, + "step": 15474 + }, + { + "epoch": 0.9423621471850927, + "grad_norm": 1.0775617544844562, + "learning_rate": 4.704955955225615e-06, + "loss": 0.3752, + "step": 15475 + }, + { + "epoch": 0.9424230429619705, + "grad_norm": 0.9681308880517018, + "learning_rate": 4.704918347747173e-06, + "loss": 0.3985, + "step": 15476 + }, + { + "epoch": 0.9424839387388485, + "grad_norm": 1.053379729286246, + "learning_rate": 4.7048807380224056e-06, + "loss": 0.4136, + "step": 15477 + }, + { + "epoch": 0.9425448345157263, + "grad_norm": 1.1159000292099013, + "learning_rate": 4.704843126051352e-06, + "loss": 0.3272, + "step": 15478 + }, + { + "epoch": 0.9426057302926042, + "grad_norm": 1.0198081540134922, + "learning_rate": 4.704805511834051e-06, + "loss": 0.4213, + "step": 15479 + }, + { + "epoch": 0.942666626069482, + "grad_norm": 0.9931701210255208, + "learning_rate": 4.704767895370539e-06, + "loss": 0.4305, + "step": 15480 + }, + { + "epoch": 0.94272752184636, + "grad_norm": 1.0817863370849214, + "learning_rate": 4.704730276660855e-06, + "loss": 0.4032, + "step": 15481 + }, + { + "epoch": 0.9427884176232378, + "grad_norm": 0.964530428220946, + "learning_rate": 4.704692655705039e-06, + "loss": 0.427, + "step": 15482 + }, + { + "epoch": 0.9428493134001157, + "grad_norm": 0.984671765121385, + "learning_rate": 4.704655032503127e-06, + "loss": 0.4675, + "step": 15483 + }, + { + "epoch": 0.9429102091769935, + "grad_norm": 1.0113758355977183, + "learning_rate": 4.704617407055158e-06, + "loss": 0.4317, + "step": 15484 + }, + { + "epoch": 0.9429711049538715, + "grad_norm": 1.0154902311360607, + "learning_rate": 4.704579779361172e-06, + "loss": 0.4308, + "step": 15485 + }, + { + "epoch": 0.9430320007307493, + "grad_norm": 0.9676523024277223, + "learning_rate": 4.704542149421204e-06, + "loss": 0.3673, + "step": 15486 + }, + { + "epoch": 0.9430928965076272, + "grad_norm": 0.963839826224812, + "learning_rate": 4.704504517235295e-06, + "loss": 0.4578, + "step": 15487 + }, + { + "epoch": 0.943153792284505, + "grad_norm": 1.0241409365376615, + "learning_rate": 4.704466882803483e-06, + "loss": 0.3773, + "step": 15488 + }, + { + "epoch": 0.943214688061383, + "grad_norm": 0.9834144587993434, + "learning_rate": 4.704429246125805e-06, + "loss": 0.3786, + "step": 15489 + }, + { + "epoch": 0.9432755838382608, + "grad_norm": 0.9549649941943512, + "learning_rate": 4.704391607202302e-06, + "loss": 0.4068, + "step": 15490 + }, + { + "epoch": 0.9433364796151387, + "grad_norm": 1.0000502523284394, + "learning_rate": 4.704353966033009e-06, + "loss": 0.393, + "step": 15491 + }, + { + "epoch": 0.9433973753920165, + "grad_norm": 0.8896880253982705, + "learning_rate": 4.704316322617968e-06, + "loss": 0.4723, + "step": 15492 + }, + { + "epoch": 0.9434582711688945, + "grad_norm": 1.0403588150069067, + "learning_rate": 4.7042786769572135e-06, + "loss": 0.4141, + "step": 15493 + }, + { + "epoch": 0.9435191669457723, + "grad_norm": 1.017927171452685, + "learning_rate": 4.704241029050787e-06, + "loss": 0.4424, + "step": 15494 + }, + { + "epoch": 0.9435800627226502, + "grad_norm": 0.9433672604878901, + "learning_rate": 4.704203378898724e-06, + "loss": 0.4084, + "step": 15495 + }, + { + "epoch": 0.943640958499528, + "grad_norm": 1.0115267311074643, + "learning_rate": 4.704165726501065e-06, + "loss": 0.4351, + "step": 15496 + }, + { + "epoch": 0.943701854276406, + "grad_norm": 0.9652277326374739, + "learning_rate": 4.704128071857849e-06, + "loss": 0.5049, + "step": 15497 + }, + { + "epoch": 0.9437627500532838, + "grad_norm": 1.0090132040059794, + "learning_rate": 4.704090414969112e-06, + "loss": 0.3892, + "step": 15498 + }, + { + "epoch": 0.9438236458301617, + "grad_norm": 1.022057269297129, + "learning_rate": 4.7040527558348935e-06, + "loss": 0.3868, + "step": 15499 + }, + { + "epoch": 0.9438845416070396, + "grad_norm": 0.9453988691411991, + "learning_rate": 4.704015094455233e-06, + "loss": 0.3814, + "step": 15500 + }, + { + "epoch": 0.9439454373839175, + "grad_norm": 0.9912474881451627, + "learning_rate": 4.703977430830167e-06, + "loss": 0.3902, + "step": 15501 + }, + { + "epoch": 0.9440063331607953, + "grad_norm": 0.9868863843935762, + "learning_rate": 4.703939764959734e-06, + "loss": 0.3744, + "step": 15502 + }, + { + "epoch": 0.9440672289376731, + "grad_norm": 0.9686046865181784, + "learning_rate": 4.703902096843973e-06, + "loss": 0.4547, + "step": 15503 + }, + { + "epoch": 0.9441281247145511, + "grad_norm": 0.9790956268013826, + "learning_rate": 4.7038644264829234e-06, + "loss": 0.4177, + "step": 15504 + }, + { + "epoch": 0.9441890204914289, + "grad_norm": 0.9908551099169395, + "learning_rate": 4.703826753876622e-06, + "loss": 0.3732, + "step": 15505 + }, + { + "epoch": 0.9442499162683068, + "grad_norm": 1.0087354183424682, + "learning_rate": 4.703789079025108e-06, + "loss": 0.4203, + "step": 15506 + }, + { + "epoch": 0.9443108120451846, + "grad_norm": 1.0230914600605712, + "learning_rate": 4.703751401928419e-06, + "loss": 0.3713, + "step": 15507 + }, + { + "epoch": 0.9443717078220626, + "grad_norm": 0.9827777209166896, + "learning_rate": 4.703713722586594e-06, + "loss": 0.4195, + "step": 15508 + }, + { + "epoch": 0.9444326035989404, + "grad_norm": 0.9812061862075048, + "learning_rate": 4.7036760409996715e-06, + "loss": 0.4825, + "step": 15509 + }, + { + "epoch": 0.9444934993758183, + "grad_norm": 1.0771028553801196, + "learning_rate": 4.70363835716769e-06, + "loss": 0.3808, + "step": 15510 + }, + { + "epoch": 0.9445543951526961, + "grad_norm": 0.9776431672635214, + "learning_rate": 4.7036006710906865e-06, + "loss": 0.3875, + "step": 15511 + }, + { + "epoch": 0.9446152909295741, + "grad_norm": 0.9270154633177324, + "learning_rate": 4.703562982768701e-06, + "loss": 0.3912, + "step": 15512 + }, + { + "epoch": 0.9446761867064519, + "grad_norm": 0.9845279245165681, + "learning_rate": 4.7035252922017715e-06, + "loss": 0.4534, + "step": 15513 + }, + { + "epoch": 0.9447370824833298, + "grad_norm": 1.0050093146162276, + "learning_rate": 4.703487599389936e-06, + "loss": 0.4482, + "step": 15514 + }, + { + "epoch": 0.9447979782602076, + "grad_norm": 1.00499328292079, + "learning_rate": 4.703449904333234e-06, + "loss": 0.4135, + "step": 15515 + }, + { + "epoch": 0.9448588740370856, + "grad_norm": 0.9589489987990313, + "learning_rate": 4.703412207031702e-06, + "loss": 0.3949, + "step": 15516 + }, + { + "epoch": 0.9449197698139634, + "grad_norm": 0.9534089456879171, + "learning_rate": 4.70337450748538e-06, + "loss": 0.3966, + "step": 15517 + }, + { + "epoch": 0.9449806655908413, + "grad_norm": 1.0037893703416099, + "learning_rate": 4.703336805694306e-06, + "loss": 0.3796, + "step": 15518 + }, + { + "epoch": 0.9450415613677191, + "grad_norm": 1.0028872534944488, + "learning_rate": 4.703299101658518e-06, + "loss": 0.3991, + "step": 15519 + }, + { + "epoch": 0.9451024571445971, + "grad_norm": 1.0125270770840555, + "learning_rate": 4.703261395378054e-06, + "loss": 0.3389, + "step": 15520 + }, + { + "epoch": 0.9451633529214749, + "grad_norm": 1.097013680088937, + "learning_rate": 4.703223686852954e-06, + "loss": 0.3996, + "step": 15521 + }, + { + "epoch": 0.9452242486983528, + "grad_norm": 1.0409087804217343, + "learning_rate": 4.703185976083256e-06, + "loss": 0.3979, + "step": 15522 + }, + { + "epoch": 0.9452851444752306, + "grad_norm": 0.9794851800186709, + "learning_rate": 4.703148263068996e-06, + "loss": 0.4396, + "step": 15523 + }, + { + "epoch": 0.9453460402521086, + "grad_norm": 0.9970367824669337, + "learning_rate": 4.703110547810216e-06, + "loss": 0.4164, + "step": 15524 + }, + { + "epoch": 0.9454069360289864, + "grad_norm": 0.9625958970517955, + "learning_rate": 4.703072830306952e-06, + "loss": 0.4475, + "step": 15525 + }, + { + "epoch": 0.9454678318058642, + "grad_norm": 1.0381381996197845, + "learning_rate": 4.703035110559244e-06, + "loss": 0.3717, + "step": 15526 + }, + { + "epoch": 0.9455287275827421, + "grad_norm": 1.0640811935707537, + "learning_rate": 4.702997388567128e-06, + "loss": 0.4086, + "step": 15527 + }, + { + "epoch": 0.94558962335962, + "grad_norm": 0.9275341907682377, + "learning_rate": 4.702959664330646e-06, + "loss": 0.4042, + "step": 15528 + }, + { + "epoch": 0.9456505191364979, + "grad_norm": 0.9550801609846576, + "learning_rate": 4.702921937849834e-06, + "loss": 0.4629, + "step": 15529 + }, + { + "epoch": 0.9457114149133757, + "grad_norm": 1.006225767113423, + "learning_rate": 4.702884209124731e-06, + "loss": 0.3828, + "step": 15530 + }, + { + "epoch": 0.9457723106902536, + "grad_norm": 0.9430286416318324, + "learning_rate": 4.702846478155374e-06, + "loss": 0.4181, + "step": 15531 + }, + { + "epoch": 0.9458332064671315, + "grad_norm": 0.9980446444398846, + "learning_rate": 4.7028087449418046e-06, + "loss": 0.4225, + "step": 15532 + }, + { + "epoch": 0.9458941022440094, + "grad_norm": 0.9946490244858535, + "learning_rate": 4.7027710094840584e-06, + "loss": 0.3862, + "step": 15533 + }, + { + "epoch": 0.9459549980208872, + "grad_norm": 0.9381299907772478, + "learning_rate": 4.702733271782175e-06, + "loss": 0.4871, + "step": 15534 + }, + { + "epoch": 0.9460158937977651, + "grad_norm": 1.0084412346030538, + "learning_rate": 4.702695531836193e-06, + "loss": 0.4084, + "step": 15535 + }, + { + "epoch": 0.946076789574643, + "grad_norm": 1.0117873311187904, + "learning_rate": 4.702657789646151e-06, + "loss": 0.4251, + "step": 15536 + }, + { + "epoch": 0.9461376853515209, + "grad_norm": 0.9068286438575237, + "learning_rate": 4.702620045212086e-06, + "loss": 0.4716, + "step": 15537 + }, + { + "epoch": 0.9461985811283987, + "grad_norm": 1.0008707392188734, + "learning_rate": 4.702582298534038e-06, + "loss": 0.4308, + "step": 15538 + }, + { + "epoch": 0.9462594769052766, + "grad_norm": 0.9717763255854986, + "learning_rate": 4.702544549612045e-06, + "loss": 0.3861, + "step": 15539 + }, + { + "epoch": 0.9463203726821545, + "grad_norm": 0.9301736801922272, + "learning_rate": 4.7025067984461456e-06, + "loss": 0.4189, + "step": 15540 + }, + { + "epoch": 0.9463812684590324, + "grad_norm": 0.9360194855762759, + "learning_rate": 4.702469045036377e-06, + "loss": 0.3905, + "step": 15541 + }, + { + "epoch": 0.9464421642359102, + "grad_norm": 0.9000084338058915, + "learning_rate": 4.7024312893827805e-06, + "loss": 0.4236, + "step": 15542 + }, + { + "epoch": 0.9465030600127882, + "grad_norm": 1.1392660123749154, + "learning_rate": 4.702393531485392e-06, + "loss": 0.4223, + "step": 15543 + }, + { + "epoch": 0.946563955789666, + "grad_norm": 1.103359482048946, + "learning_rate": 4.70235577134425e-06, + "loss": 0.3603, + "step": 15544 + }, + { + "epoch": 0.9466248515665439, + "grad_norm": 1.0622613968886416, + "learning_rate": 4.702318008959394e-06, + "loss": 0.391, + "step": 15545 + }, + { + "epoch": 0.9466857473434217, + "grad_norm": 0.9597630645765111, + "learning_rate": 4.702280244330863e-06, + "loss": 0.4304, + "step": 15546 + }, + { + "epoch": 0.9467466431202997, + "grad_norm": 0.958921110870717, + "learning_rate": 4.702242477458694e-06, + "loss": 0.4371, + "step": 15547 + }, + { + "epoch": 0.9468075388971775, + "grad_norm": 1.0352886015265275, + "learning_rate": 4.7022047083429266e-06, + "loss": 0.3619, + "step": 15548 + }, + { + "epoch": 0.9468684346740553, + "grad_norm": 0.9953438716144426, + "learning_rate": 4.702166936983598e-06, + "loss": 0.3553, + "step": 15549 + }, + { + "epoch": 0.9469293304509332, + "grad_norm": 0.9771117024786976, + "learning_rate": 4.702129163380748e-06, + "loss": 0.3571, + "step": 15550 + }, + { + "epoch": 0.9469902262278111, + "grad_norm": 1.0057866569895668, + "learning_rate": 4.7020913875344156e-06, + "loss": 0.4241, + "step": 15551 + }, + { + "epoch": 0.947051122004689, + "grad_norm": 1.0085581697878474, + "learning_rate": 4.702053609444637e-06, + "loss": 0.417, + "step": 15552 + }, + { + "epoch": 0.9471120177815668, + "grad_norm": 1.003618368957257, + "learning_rate": 4.702015829111452e-06, + "loss": 0.3727, + "step": 15553 + }, + { + "epoch": 0.9471729135584447, + "grad_norm": 0.9286175089969583, + "learning_rate": 4.7019780465349e-06, + "loss": 0.4322, + "step": 15554 + }, + { + "epoch": 0.9472338093353226, + "grad_norm": 0.9262429737965463, + "learning_rate": 4.701940261715018e-06, + "loss": 0.4472, + "step": 15555 + }, + { + "epoch": 0.9472947051122005, + "grad_norm": 1.054134514681312, + "learning_rate": 4.7019024746518445e-06, + "loss": 0.3858, + "step": 15556 + }, + { + "epoch": 0.9473556008890783, + "grad_norm": 0.9808165463772412, + "learning_rate": 4.7018646853454196e-06, + "loss": 0.4414, + "step": 15557 + }, + { + "epoch": 0.9474164966659562, + "grad_norm": 0.9920453752897104, + "learning_rate": 4.70182689379578e-06, + "loss": 0.3799, + "step": 15558 + }, + { + "epoch": 0.9474773924428341, + "grad_norm": 1.033677841776666, + "learning_rate": 4.701789100002965e-06, + "loss": 0.448, + "step": 15559 + }, + { + "epoch": 0.947538288219712, + "grad_norm": 0.9571897925106787, + "learning_rate": 4.701751303967013e-06, + "loss": 0.4365, + "step": 15560 + }, + { + "epoch": 0.9475991839965898, + "grad_norm": 0.9624614550476454, + "learning_rate": 4.701713505687962e-06, + "loss": 0.3789, + "step": 15561 + }, + { + "epoch": 0.9476600797734677, + "grad_norm": 1.1019472503314045, + "learning_rate": 4.701675705165852e-06, + "loss": 0.378, + "step": 15562 + }, + { + "epoch": 0.9477209755503456, + "grad_norm": 0.9782442566258991, + "learning_rate": 4.701637902400721e-06, + "loss": 0.4186, + "step": 15563 + }, + { + "epoch": 0.9477818713272235, + "grad_norm": 0.9701358711659095, + "learning_rate": 4.701600097392606e-06, + "loss": 0.3993, + "step": 15564 + }, + { + "epoch": 0.9478427671041013, + "grad_norm": 1.0368015350985231, + "learning_rate": 4.701562290141547e-06, + "loss": 0.3553, + "step": 15565 + }, + { + "epoch": 0.9479036628809792, + "grad_norm": 1.0048455431293983, + "learning_rate": 4.701524480647583e-06, + "loss": 0.4327, + "step": 15566 + }, + { + "epoch": 0.9479645586578571, + "grad_norm": 1.073030971330014, + "learning_rate": 4.7014866689107494e-06, + "loss": 0.3084, + "step": 15567 + }, + { + "epoch": 0.948025454434735, + "grad_norm": 0.9718652299981534, + "learning_rate": 4.701448854931089e-06, + "loss": 0.4569, + "step": 15568 + }, + { + "epoch": 0.9480863502116128, + "grad_norm": 0.9699419670237773, + "learning_rate": 4.701411038708638e-06, + "loss": 0.5, + "step": 15569 + }, + { + "epoch": 0.9481472459884907, + "grad_norm": 1.0017650325808192, + "learning_rate": 4.701373220243433e-06, + "loss": 0.3473, + "step": 15570 + }, + { + "epoch": 0.9482081417653686, + "grad_norm": 0.9622983217928486, + "learning_rate": 4.7013353995355175e-06, + "loss": 0.4703, + "step": 15571 + }, + { + "epoch": 0.9482690375422465, + "grad_norm": 0.9643408327616423, + "learning_rate": 4.701297576584926e-06, + "loss": 0.4308, + "step": 15572 + }, + { + "epoch": 0.9483299333191243, + "grad_norm": 0.9831214075831657, + "learning_rate": 4.701259751391699e-06, + "loss": 0.4181, + "step": 15573 + }, + { + "epoch": 0.9483908290960021, + "grad_norm": 0.982766548026788, + "learning_rate": 4.701221923955873e-06, + "loss": 0.4036, + "step": 15574 + }, + { + "epoch": 0.9484517248728801, + "grad_norm": 1.047682802211518, + "learning_rate": 4.701184094277489e-06, + "loss": 0.3769, + "step": 15575 + }, + { + "epoch": 0.9485126206497579, + "grad_norm": 0.9454077039856068, + "learning_rate": 4.701146262356584e-06, + "loss": 0.4324, + "step": 15576 + }, + { + "epoch": 0.9485735164266358, + "grad_norm": 0.9897003118435133, + "learning_rate": 4.7011084281931975e-06, + "loss": 0.3986, + "step": 15577 + }, + { + "epoch": 0.9486344122035136, + "grad_norm": 1.0071136090647832, + "learning_rate": 4.701070591787367e-06, + "loss": 0.4565, + "step": 15578 + }, + { + "epoch": 0.9486953079803916, + "grad_norm": 1.1160552920516866, + "learning_rate": 4.701032753139132e-06, + "loss": 0.3673, + "step": 15579 + }, + { + "epoch": 0.9487562037572694, + "grad_norm": 0.9614571654640519, + "learning_rate": 4.70099491224853e-06, + "loss": 0.4172, + "step": 15580 + }, + { + "epoch": 0.9488170995341473, + "grad_norm": 0.9389734559780505, + "learning_rate": 4.7009570691156e-06, + "loss": 0.4182, + "step": 15581 + }, + { + "epoch": 0.9488779953110252, + "grad_norm": 0.9801031918319099, + "learning_rate": 4.7009192237403815e-06, + "loss": 0.3537, + "step": 15582 + }, + { + "epoch": 0.9489388910879031, + "grad_norm": 0.9927674461791578, + "learning_rate": 4.700881376122912e-06, + "loss": 0.3642, + "step": 15583 + }, + { + "epoch": 0.9489997868647809, + "grad_norm": 1.0603558023592061, + "learning_rate": 4.7008435262632295e-06, + "loss": 0.371, + "step": 15584 + }, + { + "epoch": 0.9490606826416588, + "grad_norm": 1.0000832341002155, + "learning_rate": 4.700805674161374e-06, + "loss": 0.3523, + "step": 15585 + }, + { + "epoch": 0.9491215784185367, + "grad_norm": 1.029563552129999, + "learning_rate": 4.700767819817384e-06, + "loss": 0.3783, + "step": 15586 + }, + { + "epoch": 0.9491824741954146, + "grad_norm": 1.0298563030525238, + "learning_rate": 4.700729963231296e-06, + "loss": 0.386, + "step": 15587 + }, + { + "epoch": 0.9492433699722924, + "grad_norm": 1.0041373362089776, + "learning_rate": 4.700692104403152e-06, + "loss": 0.3868, + "step": 15588 + }, + { + "epoch": 0.9493042657491703, + "grad_norm": 0.9739250121387054, + "learning_rate": 4.700654243332987e-06, + "loss": 0.4386, + "step": 15589 + }, + { + "epoch": 0.9493651615260482, + "grad_norm": 1.0904530279375415, + "learning_rate": 4.700616380020842e-06, + "loss": 0.3843, + "step": 15590 + }, + { + "epoch": 0.9494260573029261, + "grad_norm": 0.9085066632927996, + "learning_rate": 4.700578514466755e-06, + "loss": 0.4335, + "step": 15591 + }, + { + "epoch": 0.9494869530798039, + "grad_norm": 0.9441157067701779, + "learning_rate": 4.700540646670764e-06, + "loss": 0.4185, + "step": 15592 + }, + { + "epoch": 0.9495478488566818, + "grad_norm": 1.0970333619999755, + "learning_rate": 4.700502776632907e-06, + "loss": 0.3531, + "step": 15593 + }, + { + "epoch": 0.9496087446335597, + "grad_norm": 0.935210551434011, + "learning_rate": 4.7004649043532245e-06, + "loss": 0.4607, + "step": 15594 + }, + { + "epoch": 0.9496696404104376, + "grad_norm": 1.05370099774575, + "learning_rate": 4.700427029831755e-06, + "loss": 0.4068, + "step": 15595 + }, + { + "epoch": 0.9497305361873154, + "grad_norm": 0.9358616154080452, + "learning_rate": 4.700389153068535e-06, + "loss": 0.4429, + "step": 15596 + }, + { + "epoch": 0.9497914319641932, + "grad_norm": 0.972116111678017, + "learning_rate": 4.7003512740636045e-06, + "loss": 0.4047, + "step": 15597 + }, + { + "epoch": 0.9498523277410712, + "grad_norm": 0.9729992175621054, + "learning_rate": 4.700313392817002e-06, + "loss": 0.4306, + "step": 15598 + }, + { + "epoch": 0.949913223517949, + "grad_norm": 0.9622830333073816, + "learning_rate": 4.700275509328765e-06, + "loss": 0.4305, + "step": 15599 + }, + { + "epoch": 0.9499741192948269, + "grad_norm": 1.0594056545045014, + "learning_rate": 4.7002376235989346e-06, + "loss": 0.4368, + "step": 15600 + }, + { + "epoch": 0.9500350150717047, + "grad_norm": 1.0320311585674933, + "learning_rate": 4.700199735627547e-06, + "loss": 0.4044, + "step": 15601 + }, + { + "epoch": 0.9500959108485827, + "grad_norm": 1.0231034530962981, + "learning_rate": 4.700161845414641e-06, + "loss": 0.407, + "step": 15602 + }, + { + "epoch": 0.9501568066254605, + "grad_norm": 0.9865332180996641, + "learning_rate": 4.700123952960257e-06, + "loss": 0.453, + "step": 15603 + }, + { + "epoch": 0.9502177024023384, + "grad_norm": 0.9146837472132715, + "learning_rate": 4.700086058264433e-06, + "loss": 0.423, + "step": 15604 + }, + { + "epoch": 0.9502785981792162, + "grad_norm": 1.0267453601575915, + "learning_rate": 4.700048161327206e-06, + "loss": 0.387, + "step": 15605 + }, + { + "epoch": 0.9503394939560942, + "grad_norm": 0.9841108459143909, + "learning_rate": 4.700010262148615e-06, + "loss": 0.4509, + "step": 15606 + }, + { + "epoch": 0.950400389732972, + "grad_norm": 0.9408229407139752, + "learning_rate": 4.6999723607287e-06, + "loss": 0.4245, + "step": 15607 + }, + { + "epoch": 0.9504612855098499, + "grad_norm": 0.9608200914118924, + "learning_rate": 4.6999344570675e-06, + "loss": 0.4315, + "step": 15608 + }, + { + "epoch": 0.9505221812867277, + "grad_norm": 0.9445969036560958, + "learning_rate": 4.6998965511650515e-06, + "loss": 0.3887, + "step": 15609 + }, + { + "epoch": 0.9505830770636057, + "grad_norm": 1.0595728581083617, + "learning_rate": 4.699858643021394e-06, + "loss": 0.333, + "step": 15610 + }, + { + "epoch": 0.9506439728404835, + "grad_norm": 0.9939234564822147, + "learning_rate": 4.699820732636566e-06, + "loss": 0.3765, + "step": 15611 + }, + { + "epoch": 0.9507048686173614, + "grad_norm": 1.0705116450905652, + "learning_rate": 4.699782820010607e-06, + "loss": 0.338, + "step": 15612 + }, + { + "epoch": 0.9507657643942392, + "grad_norm": 1.0003480294093325, + "learning_rate": 4.699744905143555e-06, + "loss": 0.4418, + "step": 15613 + }, + { + "epoch": 0.9508266601711172, + "grad_norm": 1.0568685289705162, + "learning_rate": 4.699706988035449e-06, + "loss": 0.3737, + "step": 15614 + }, + { + "epoch": 0.950887555947995, + "grad_norm": 1.096500807320552, + "learning_rate": 4.699669068686326e-06, + "loss": 0.4092, + "step": 15615 + }, + { + "epoch": 0.9509484517248729, + "grad_norm": 0.9564054042388743, + "learning_rate": 4.699631147096227e-06, + "loss": 0.4206, + "step": 15616 + }, + { + "epoch": 0.9510093475017507, + "grad_norm": 1.0010995890412253, + "learning_rate": 4.699593223265189e-06, + "loss": 0.4624, + "step": 15617 + }, + { + "epoch": 0.9510702432786287, + "grad_norm": 1.0438042764947113, + "learning_rate": 4.699555297193251e-06, + "loss": 0.4245, + "step": 15618 + }, + { + "epoch": 0.9511311390555065, + "grad_norm": 0.9076763845606161, + "learning_rate": 4.699517368880452e-06, + "loss": 0.439, + "step": 15619 + }, + { + "epoch": 0.9511920348323843, + "grad_norm": 1.0561007661479311, + "learning_rate": 4.699479438326831e-06, + "loss": 0.419, + "step": 15620 + }, + { + "epoch": 0.9512529306092622, + "grad_norm": 0.9993092577332848, + "learning_rate": 4.699441505532425e-06, + "loss": 0.3617, + "step": 15621 + }, + { + "epoch": 0.9513138263861401, + "grad_norm": 1.0316282002400445, + "learning_rate": 4.699403570497275e-06, + "loss": 0.3942, + "step": 15622 + }, + { + "epoch": 0.951374722163018, + "grad_norm": 0.9547470334138156, + "learning_rate": 4.699365633221417e-06, + "loss": 0.4576, + "step": 15623 + }, + { + "epoch": 0.9514356179398958, + "grad_norm": 1.0033035901256735, + "learning_rate": 4.699327693704891e-06, + "loss": 0.3912, + "step": 15624 + }, + { + "epoch": 0.9514965137167738, + "grad_norm": 0.9618314977206117, + "learning_rate": 4.699289751947737e-06, + "loss": 0.4507, + "step": 15625 + }, + { + "epoch": 0.9515574094936516, + "grad_norm": 0.9360067705291755, + "learning_rate": 4.699251807949992e-06, + "loss": 0.4177, + "step": 15626 + }, + { + "epoch": 0.9516183052705295, + "grad_norm": 0.9470329934328992, + "learning_rate": 4.699213861711694e-06, + "loss": 0.3953, + "step": 15627 + }, + { + "epoch": 0.9516792010474073, + "grad_norm": 1.0578593164466954, + "learning_rate": 4.699175913232884e-06, + "loss": 0.3703, + "step": 15628 + }, + { + "epoch": 0.9517400968242853, + "grad_norm": 0.9667325648726263, + "learning_rate": 4.699137962513598e-06, + "loss": 0.4406, + "step": 15629 + }, + { + "epoch": 0.9518009926011631, + "grad_norm": 1.0297335506645509, + "learning_rate": 4.6991000095538765e-06, + "loss": 0.3825, + "step": 15630 + }, + { + "epoch": 0.951861888378041, + "grad_norm": 0.8882328097276048, + "learning_rate": 4.699062054353758e-06, + "loss": 0.5482, + "step": 15631 + }, + { + "epoch": 0.9519227841549188, + "grad_norm": 0.9880238993904796, + "learning_rate": 4.699024096913281e-06, + "loss": 0.4386, + "step": 15632 + }, + { + "epoch": 0.9519836799317968, + "grad_norm": 1.0108426241359965, + "learning_rate": 4.698986137232483e-06, + "loss": 0.3914, + "step": 15633 + }, + { + "epoch": 0.9520445757086746, + "grad_norm": 1.0084512056491675, + "learning_rate": 4.698948175311404e-06, + "loss": 0.3483, + "step": 15634 + }, + { + "epoch": 0.9521054714855525, + "grad_norm": 1.0649114302843115, + "learning_rate": 4.698910211150083e-06, + "loss": 0.3633, + "step": 15635 + }, + { + "epoch": 0.9521663672624303, + "grad_norm": 0.9707179938013905, + "learning_rate": 4.698872244748557e-06, + "loss": 0.4085, + "step": 15636 + }, + { + "epoch": 0.9522272630393083, + "grad_norm": 0.9187128535690054, + "learning_rate": 4.698834276106866e-06, + "loss": 0.3813, + "step": 15637 + }, + { + "epoch": 0.9522881588161861, + "grad_norm": 0.9866485945689251, + "learning_rate": 4.6987963052250485e-06, + "loss": 0.3184, + "step": 15638 + }, + { + "epoch": 0.952349054593064, + "grad_norm": 1.0096305902180323, + "learning_rate": 4.698758332103143e-06, + "loss": 0.463, + "step": 15639 + }, + { + "epoch": 0.9524099503699418, + "grad_norm": 1.0081833504622464, + "learning_rate": 4.698720356741188e-06, + "loss": 0.4604, + "step": 15640 + }, + { + "epoch": 0.9524708461468198, + "grad_norm": 1.0274082887562395, + "learning_rate": 4.698682379139222e-06, + "loss": 0.3712, + "step": 15641 + }, + { + "epoch": 0.9525317419236976, + "grad_norm": 0.9536413002654134, + "learning_rate": 4.698644399297285e-06, + "loss": 0.4633, + "step": 15642 + }, + { + "epoch": 0.9525926377005755, + "grad_norm": 1.055713221460871, + "learning_rate": 4.698606417215414e-06, + "loss": 0.4318, + "step": 15643 + }, + { + "epoch": 0.9526535334774533, + "grad_norm": 0.9676201584213656, + "learning_rate": 4.6985684328936495e-06, + "loss": 0.4195, + "step": 15644 + }, + { + "epoch": 0.9527144292543313, + "grad_norm": 0.9581430483000152, + "learning_rate": 4.698530446332029e-06, + "loss": 0.4506, + "step": 15645 + }, + { + "epoch": 0.9527753250312091, + "grad_norm": 0.8441703406979063, + "learning_rate": 4.698492457530592e-06, + "loss": 0.493, + "step": 15646 + }, + { + "epoch": 0.9528362208080869, + "grad_norm": 0.9872155670525917, + "learning_rate": 4.698454466489375e-06, + "loss": 0.4228, + "step": 15647 + }, + { + "epoch": 0.9528971165849648, + "grad_norm": 1.0998063473599933, + "learning_rate": 4.698416473208418e-06, + "loss": 0.4464, + "step": 15648 + }, + { + "epoch": 0.9529580123618427, + "grad_norm": 1.1636789484050951, + "learning_rate": 4.6983784776877615e-06, + "loss": 0.3339, + "step": 15649 + }, + { + "epoch": 0.9530189081387206, + "grad_norm": 1.0443972391085898, + "learning_rate": 4.698340479927442e-06, + "loss": 0.413, + "step": 15650 + }, + { + "epoch": 0.9530798039155984, + "grad_norm": 1.0061553404939647, + "learning_rate": 4.698302479927499e-06, + "loss": 0.4529, + "step": 15651 + }, + { + "epoch": 0.9531406996924763, + "grad_norm": 1.0304961213897463, + "learning_rate": 4.698264477687971e-06, + "loss": 0.3688, + "step": 15652 + }, + { + "epoch": 0.9532015954693542, + "grad_norm": 0.9927184132451342, + "learning_rate": 4.698226473208898e-06, + "loss": 0.4177, + "step": 15653 + }, + { + "epoch": 0.9532624912462321, + "grad_norm": 1.1122417141057603, + "learning_rate": 4.6981884664903165e-06, + "loss": 0.4016, + "step": 15654 + }, + { + "epoch": 0.9533233870231099, + "grad_norm": 0.9827812870725391, + "learning_rate": 4.6981504575322665e-06, + "loss": 0.3989, + "step": 15655 + }, + { + "epoch": 0.9533842827999878, + "grad_norm": 0.9779013160711387, + "learning_rate": 4.698112446334786e-06, + "loss": 0.3823, + "step": 15656 + }, + { + "epoch": 0.9534451785768657, + "grad_norm": 0.9939284680210345, + "learning_rate": 4.698074432897915e-06, + "loss": 0.4626, + "step": 15657 + }, + { + "epoch": 0.9535060743537436, + "grad_norm": 0.9428418795383701, + "learning_rate": 4.6980364172216906e-06, + "loss": 0.4094, + "step": 15658 + }, + { + "epoch": 0.9535669701306214, + "grad_norm": 0.9366624597355881, + "learning_rate": 4.697998399306154e-06, + "loss": 0.4511, + "step": 15659 + }, + { + "epoch": 0.9536278659074993, + "grad_norm": 0.9488283265090997, + "learning_rate": 4.69796037915134e-06, + "loss": 0.3899, + "step": 15660 + }, + { + "epoch": 0.9536887616843772, + "grad_norm": 0.9322018337235155, + "learning_rate": 4.697922356757292e-06, + "loss": 0.4215, + "step": 15661 + }, + { + "epoch": 0.9537496574612551, + "grad_norm": 0.9614761424093335, + "learning_rate": 4.697884332124045e-06, + "loss": 0.4071, + "step": 15662 + }, + { + "epoch": 0.9538105532381329, + "grad_norm": 1.0490139384676438, + "learning_rate": 4.69784630525164e-06, + "loss": 0.3741, + "step": 15663 + }, + { + "epoch": 0.9538714490150109, + "grad_norm": 1.1262380351025154, + "learning_rate": 4.697808276140114e-06, + "loss": 0.3756, + "step": 15664 + }, + { + "epoch": 0.9539323447918887, + "grad_norm": 0.9940857511159246, + "learning_rate": 4.697770244789507e-06, + "loss": 0.4129, + "step": 15665 + }, + { + "epoch": 0.9539932405687666, + "grad_norm": 0.9509203069287632, + "learning_rate": 4.697732211199857e-06, + "loss": 0.414, + "step": 15666 + }, + { + "epoch": 0.9540541363456444, + "grad_norm": 0.9571251262329182, + "learning_rate": 4.697694175371203e-06, + "loss": 0.4127, + "step": 15667 + }, + { + "epoch": 0.9541150321225224, + "grad_norm": 0.9941580262164555, + "learning_rate": 4.6976561373035855e-06, + "loss": 0.4018, + "step": 15668 + }, + { + "epoch": 0.9541759278994002, + "grad_norm": 0.9652510855139989, + "learning_rate": 4.69761809699704e-06, + "loss": 0.4502, + "step": 15669 + }, + { + "epoch": 0.954236823676278, + "grad_norm": 1.0351935408929966, + "learning_rate": 4.697580054451608e-06, + "loss": 0.4543, + "step": 15670 + }, + { + "epoch": 0.9542977194531559, + "grad_norm": 1.0337428398519832, + "learning_rate": 4.697542009667326e-06, + "loss": 0.3997, + "step": 15671 + }, + { + "epoch": 0.9543586152300338, + "grad_norm": 1.0311331333647347, + "learning_rate": 4.697503962644234e-06, + "loss": 0.4019, + "step": 15672 + }, + { + "epoch": 0.9544195110069117, + "grad_norm": 0.9876618357775833, + "learning_rate": 4.697465913382372e-06, + "loss": 0.4263, + "step": 15673 + }, + { + "epoch": 0.9544804067837895, + "grad_norm": 1.0668656361842759, + "learning_rate": 4.697427861881776e-06, + "loss": 0.3964, + "step": 15674 + }, + { + "epoch": 0.9545413025606674, + "grad_norm": 0.9661302350439626, + "learning_rate": 4.697389808142487e-06, + "loss": 0.4217, + "step": 15675 + }, + { + "epoch": 0.9546021983375453, + "grad_norm": 0.9445438739345807, + "learning_rate": 4.697351752164542e-06, + "loss": 0.3844, + "step": 15676 + }, + { + "epoch": 0.9546630941144232, + "grad_norm": 0.9757179008107623, + "learning_rate": 4.697313693947981e-06, + "loss": 0.3888, + "step": 15677 + }, + { + "epoch": 0.954723989891301, + "grad_norm": 0.9226742254657161, + "learning_rate": 4.697275633492843e-06, + "loss": 0.3972, + "step": 15678 + }, + { + "epoch": 0.9547848856681789, + "grad_norm": 0.9734147610591362, + "learning_rate": 4.697237570799166e-06, + "loss": 0.4511, + "step": 15679 + }, + { + "epoch": 0.9548457814450568, + "grad_norm": 1.0641735174044935, + "learning_rate": 4.697199505866989e-06, + "loss": 0.4336, + "step": 15680 + }, + { + "epoch": 0.9549066772219347, + "grad_norm": 1.0501629442377078, + "learning_rate": 4.697161438696351e-06, + "loss": 0.3718, + "step": 15681 + }, + { + "epoch": 0.9549675729988125, + "grad_norm": 1.0728654500196944, + "learning_rate": 4.6971233692872906e-06, + "loss": 0.3724, + "step": 15682 + }, + { + "epoch": 0.9550284687756904, + "grad_norm": 1.0287843260284186, + "learning_rate": 4.697085297639846e-06, + "loss": 0.3527, + "step": 15683 + }, + { + "epoch": 0.9550893645525683, + "grad_norm": 0.920318539288463, + "learning_rate": 4.6970472237540575e-06, + "loss": 0.4426, + "step": 15684 + }, + { + "epoch": 0.9551502603294462, + "grad_norm": 1.0436068112146328, + "learning_rate": 4.6970091476299625e-06, + "loss": 0.4313, + "step": 15685 + }, + { + "epoch": 0.955211156106324, + "grad_norm": 1.0035648919887068, + "learning_rate": 4.6969710692676e-06, + "loss": 0.3472, + "step": 15686 + }, + { + "epoch": 0.9552720518832019, + "grad_norm": 1.022543823180928, + "learning_rate": 4.696932988667009e-06, + "loss": 0.455, + "step": 15687 + }, + { + "epoch": 0.9553329476600798, + "grad_norm": 1.0014310958468795, + "learning_rate": 4.696894905828229e-06, + "loss": 0.4361, + "step": 15688 + }, + { + "epoch": 0.9553938434369577, + "grad_norm": 0.8835196887375717, + "learning_rate": 4.696856820751298e-06, + "loss": 0.4071, + "step": 15689 + }, + { + "epoch": 0.9554547392138355, + "grad_norm": 0.9991264152803848, + "learning_rate": 4.696818733436255e-06, + "loss": 0.3512, + "step": 15690 + }, + { + "epoch": 0.9555156349907133, + "grad_norm": 0.950264309661265, + "learning_rate": 4.696780643883138e-06, + "loss": 0.4053, + "step": 15691 + }, + { + "epoch": 0.9555765307675913, + "grad_norm": 1.0961214159591304, + "learning_rate": 4.696742552091987e-06, + "loss": 0.3565, + "step": 15692 + }, + { + "epoch": 0.9556374265444691, + "grad_norm": 1.0229000641314074, + "learning_rate": 4.696704458062841e-06, + "loss": 0.4018, + "step": 15693 + }, + { + "epoch": 0.955698322321347, + "grad_norm": 0.9487103139787225, + "learning_rate": 4.696666361795737e-06, + "loss": 0.4455, + "step": 15694 + }, + { + "epoch": 0.9557592180982248, + "grad_norm": 0.980013653190926, + "learning_rate": 4.696628263290716e-06, + "loss": 0.4011, + "step": 15695 + }, + { + "epoch": 0.9558201138751028, + "grad_norm": 1.0590038209292132, + "learning_rate": 4.696590162547816e-06, + "loss": 0.3327, + "step": 15696 + }, + { + "epoch": 0.9558810096519806, + "grad_norm": 0.9943445902530051, + "learning_rate": 4.696552059567074e-06, + "loss": 0.3615, + "step": 15697 + }, + { + "epoch": 0.9559419054288585, + "grad_norm": 0.9510275502498828, + "learning_rate": 4.6965139543485315e-06, + "loss": 0.4413, + "step": 15698 + }, + { + "epoch": 0.9560028012057363, + "grad_norm": 0.931344766675821, + "learning_rate": 4.696475846892227e-06, + "loss": 0.4272, + "step": 15699 + }, + { + "epoch": 0.9560636969826143, + "grad_norm": 1.0117707245947316, + "learning_rate": 4.696437737198198e-06, + "loss": 0.382, + "step": 15700 + }, + { + "epoch": 0.9561245927594921, + "grad_norm": 0.9994652970418629, + "learning_rate": 4.696399625266484e-06, + "loss": 0.3627, + "step": 15701 + }, + { + "epoch": 0.95618548853637, + "grad_norm": 1.0849399233443984, + "learning_rate": 4.696361511097123e-06, + "loss": 0.3694, + "step": 15702 + }, + { + "epoch": 0.9562463843132478, + "grad_norm": 0.9784022609908697, + "learning_rate": 4.696323394690154e-06, + "loss": 0.321, + "step": 15703 + }, + { + "epoch": 0.9563072800901258, + "grad_norm": 0.9808396160518406, + "learning_rate": 4.696285276045618e-06, + "loss": 0.3738, + "step": 15704 + }, + { + "epoch": 0.9563681758670036, + "grad_norm": 0.9516749225420321, + "learning_rate": 4.696247155163551e-06, + "loss": 0.436, + "step": 15705 + }, + { + "epoch": 0.9564290716438815, + "grad_norm": 1.0557144609047018, + "learning_rate": 4.696209032043994e-06, + "loss": 0.4311, + "step": 15706 + }, + { + "epoch": 0.9564899674207594, + "grad_norm": 1.0941021165659977, + "learning_rate": 4.6961709066869845e-06, + "loss": 0.4216, + "step": 15707 + }, + { + "epoch": 0.9565508631976373, + "grad_norm": 1.0096800112509714, + "learning_rate": 4.6961327790925615e-06, + "loss": 0.4797, + "step": 15708 + }, + { + "epoch": 0.9566117589745151, + "grad_norm": 1.1109377333248698, + "learning_rate": 4.696094649260764e-06, + "loss": 0.3423, + "step": 15709 + }, + { + "epoch": 0.956672654751393, + "grad_norm": 0.9006866038500034, + "learning_rate": 4.696056517191631e-06, + "loss": 0.4028, + "step": 15710 + }, + { + "epoch": 0.9567335505282709, + "grad_norm": 0.9715030210091836, + "learning_rate": 4.696018382885202e-06, + "loss": 0.4671, + "step": 15711 + }, + { + "epoch": 0.9567944463051488, + "grad_norm": 0.9878196583003365, + "learning_rate": 4.695980246341515e-06, + "loss": 0.3596, + "step": 15712 + }, + { + "epoch": 0.9568553420820266, + "grad_norm": 0.9935356112365478, + "learning_rate": 4.6959421075606085e-06, + "loss": 0.4056, + "step": 15713 + }, + { + "epoch": 0.9569162378589045, + "grad_norm": 1.0650829431643776, + "learning_rate": 4.6959039665425225e-06, + "loss": 0.366, + "step": 15714 + }, + { + "epoch": 0.9569771336357824, + "grad_norm": 0.9758756259886117, + "learning_rate": 4.6958658232872945e-06, + "loss": 0.446, + "step": 15715 + }, + { + "epoch": 0.9570380294126603, + "grad_norm": 0.9911332809329517, + "learning_rate": 4.6958276777949645e-06, + "loss": 0.4407, + "step": 15716 + }, + { + "epoch": 0.9570989251895381, + "grad_norm": 1.0191199367999921, + "learning_rate": 4.69578953006557e-06, + "loss": 0.4615, + "step": 15717 + }, + { + "epoch": 0.9571598209664159, + "grad_norm": 1.1242652205769286, + "learning_rate": 4.695751380099151e-06, + "loss": 0.3732, + "step": 15718 + }, + { + "epoch": 0.9572207167432939, + "grad_norm": 0.9482018426759021, + "learning_rate": 4.695713227895747e-06, + "loss": 0.4631, + "step": 15719 + }, + { + "epoch": 0.9572816125201717, + "grad_norm": 1.0317406383652479, + "learning_rate": 4.695675073455396e-06, + "loss": 0.4376, + "step": 15720 + }, + { + "epoch": 0.9573425082970496, + "grad_norm": 1.003845403773925, + "learning_rate": 4.695636916778135e-06, + "loss": 0.4352, + "step": 15721 + }, + { + "epoch": 0.9574034040739274, + "grad_norm": 0.9598619311703361, + "learning_rate": 4.6955987578640075e-06, + "loss": 0.3887, + "step": 15722 + }, + { + "epoch": 0.9574642998508054, + "grad_norm": 1.015105950691334, + "learning_rate": 4.695560596713048e-06, + "loss": 0.4564, + "step": 15723 + }, + { + "epoch": 0.9575251956276832, + "grad_norm": 1.0679915118349994, + "learning_rate": 4.695522433325297e-06, + "loss": 0.3574, + "step": 15724 + }, + { + "epoch": 0.9575860914045611, + "grad_norm": 0.944476804518845, + "learning_rate": 4.6954842677007935e-06, + "loss": 0.4857, + "step": 15725 + }, + { + "epoch": 0.9576469871814389, + "grad_norm": 0.931241471242497, + "learning_rate": 4.695446099839577e-06, + "loss": 0.3716, + "step": 15726 + }, + { + "epoch": 0.9577078829583169, + "grad_norm": 1.029529075046365, + "learning_rate": 4.695407929741685e-06, + "loss": 0.3455, + "step": 15727 + }, + { + "epoch": 0.9577687787351947, + "grad_norm": 1.066290953725699, + "learning_rate": 4.695369757407158e-06, + "loss": 0.3227, + "step": 15728 + }, + { + "epoch": 0.9578296745120726, + "grad_norm": 1.089816399601084, + "learning_rate": 4.695331582836033e-06, + "loss": 0.4285, + "step": 15729 + }, + { + "epoch": 0.9578905702889504, + "grad_norm": 1.0422134865267052, + "learning_rate": 4.695293406028349e-06, + "loss": 0.403, + "step": 15730 + }, + { + "epoch": 0.9579514660658284, + "grad_norm": 1.0062047463030406, + "learning_rate": 4.695255226984147e-06, + "loss": 0.4421, + "step": 15731 + }, + { + "epoch": 0.9580123618427062, + "grad_norm": 0.9317882430977685, + "learning_rate": 4.6952170457034645e-06, + "loss": 0.4323, + "step": 15732 + }, + { + "epoch": 0.9580732576195841, + "grad_norm": 1.2038097141102186, + "learning_rate": 4.695178862186341e-06, + "loss": 0.4225, + "step": 15733 + }, + { + "epoch": 0.9581341533964619, + "grad_norm": 0.9224923525167112, + "learning_rate": 4.695140676432813e-06, + "loss": 0.4013, + "step": 15734 + }, + { + "epoch": 0.9581950491733399, + "grad_norm": 1.028775555884671, + "learning_rate": 4.695102488442923e-06, + "loss": 0.4046, + "step": 15735 + }, + { + "epoch": 0.9582559449502177, + "grad_norm": 1.046320514204825, + "learning_rate": 4.695064298216708e-06, + "loss": 0.3642, + "step": 15736 + }, + { + "epoch": 0.9583168407270956, + "grad_norm": 0.9808866629086053, + "learning_rate": 4.695026105754207e-06, + "loss": 0.3893, + "step": 15737 + }, + { + "epoch": 0.9583777365039734, + "grad_norm": 0.9470480043789162, + "learning_rate": 4.6949879110554585e-06, + "loss": 0.4219, + "step": 15738 + }, + { + "epoch": 0.9584386322808514, + "grad_norm": 0.9608749339805315, + "learning_rate": 4.6949497141205026e-06, + "loss": 0.4534, + "step": 15739 + }, + { + "epoch": 0.9584995280577292, + "grad_norm": 0.9420765756349896, + "learning_rate": 4.694911514949377e-06, + "loss": 0.4439, + "step": 15740 + }, + { + "epoch": 0.958560423834607, + "grad_norm": 1.0073138233744834, + "learning_rate": 4.694873313542122e-06, + "loss": 0.3287, + "step": 15741 + }, + { + "epoch": 0.9586213196114849, + "grad_norm": 0.9643587400231712, + "learning_rate": 4.694835109898775e-06, + "loss": 0.3522, + "step": 15742 + }, + { + "epoch": 0.9586822153883628, + "grad_norm": 1.048760267477773, + "learning_rate": 4.694796904019376e-06, + "loss": 0.4475, + "step": 15743 + }, + { + "epoch": 0.9587431111652407, + "grad_norm": 0.984591243077866, + "learning_rate": 4.694758695903964e-06, + "loss": 0.4204, + "step": 15744 + }, + { + "epoch": 0.9588040069421185, + "grad_norm": 1.0754446895357739, + "learning_rate": 4.694720485552576e-06, + "loss": 0.3698, + "step": 15745 + }, + { + "epoch": 0.9588649027189965, + "grad_norm": 0.9778594218231297, + "learning_rate": 4.694682272965254e-06, + "loss": 0.4128, + "step": 15746 + }, + { + "epoch": 0.9589257984958743, + "grad_norm": 0.9583836940484213, + "learning_rate": 4.694644058142035e-06, + "loss": 0.4174, + "step": 15747 + }, + { + "epoch": 0.9589866942727522, + "grad_norm": 0.9606785204314846, + "learning_rate": 4.694605841082958e-06, + "loss": 0.4157, + "step": 15748 + }, + { + "epoch": 0.95904759004963, + "grad_norm": 1.1420793305495898, + "learning_rate": 4.694567621788062e-06, + "loss": 0.3789, + "step": 15749 + }, + { + "epoch": 0.959108485826508, + "grad_norm": 0.9835700823539903, + "learning_rate": 4.694529400257386e-06, + "loss": 0.4545, + "step": 15750 + }, + { + "epoch": 0.9591693816033858, + "grad_norm": 1.0710150175819058, + "learning_rate": 4.694491176490969e-06, + "loss": 0.3401, + "step": 15751 + }, + { + "epoch": 0.9592302773802637, + "grad_norm": 0.9313321260680536, + "learning_rate": 4.69445295048885e-06, + "loss": 0.4975, + "step": 15752 + }, + { + "epoch": 0.9592911731571415, + "grad_norm": 0.8893346073462153, + "learning_rate": 4.694414722251068e-06, + "loss": 0.4298, + "step": 15753 + }, + { + "epoch": 0.9593520689340195, + "grad_norm": 1.1008427820425883, + "learning_rate": 4.6943764917776625e-06, + "loss": 0.4178, + "step": 15754 + }, + { + "epoch": 0.9594129647108973, + "grad_norm": 1.0771304655759728, + "learning_rate": 4.694338259068672e-06, + "loss": 0.4021, + "step": 15755 + }, + { + "epoch": 0.9594738604877752, + "grad_norm": 0.9433365482357176, + "learning_rate": 4.6943000241241346e-06, + "loss": 0.4226, + "step": 15756 + }, + { + "epoch": 0.959534756264653, + "grad_norm": 0.9441207659395207, + "learning_rate": 4.6942617869440896e-06, + "loss": 0.4802, + "step": 15757 + }, + { + "epoch": 0.959595652041531, + "grad_norm": 1.0335779190564272, + "learning_rate": 4.694223547528577e-06, + "loss": 0.4188, + "step": 15758 + }, + { + "epoch": 0.9596565478184088, + "grad_norm": 0.9924084137138454, + "learning_rate": 4.694185305877636e-06, + "loss": 0.3873, + "step": 15759 + }, + { + "epoch": 0.9597174435952867, + "grad_norm": 0.9557506391192366, + "learning_rate": 4.694147061991303e-06, + "loss": 0.4609, + "step": 15760 + }, + { + "epoch": 0.9597783393721645, + "grad_norm": 0.9999242932042023, + "learning_rate": 4.694108815869619e-06, + "loss": 0.3833, + "step": 15761 + }, + { + "epoch": 0.9598392351490425, + "grad_norm": 0.8902772416025114, + "learning_rate": 4.694070567512623e-06, + "loss": 0.4906, + "step": 15762 + }, + { + "epoch": 0.9599001309259203, + "grad_norm": 1.1359992117520124, + "learning_rate": 4.694032316920353e-06, + "loss": 0.351, + "step": 15763 + }, + { + "epoch": 0.9599610267027981, + "grad_norm": 0.9970164677904052, + "learning_rate": 4.693994064092849e-06, + "loss": 0.3608, + "step": 15764 + }, + { + "epoch": 0.960021922479676, + "grad_norm": 0.9988696425733372, + "learning_rate": 4.693955809030149e-06, + "loss": 0.3571, + "step": 15765 + }, + { + "epoch": 0.960082818256554, + "grad_norm": 1.0110073625310063, + "learning_rate": 4.693917551732293e-06, + "loss": 0.4199, + "step": 15766 + }, + { + "epoch": 0.9601437140334318, + "grad_norm": 0.9794383285953204, + "learning_rate": 4.693879292199319e-06, + "loss": 0.3819, + "step": 15767 + }, + { + "epoch": 0.9602046098103096, + "grad_norm": 1.001025440823613, + "learning_rate": 4.693841030431267e-06, + "loss": 0.4351, + "step": 15768 + }, + { + "epoch": 0.9602655055871875, + "grad_norm": 0.8861663478130368, + "learning_rate": 4.693802766428173e-06, + "loss": 0.4273, + "step": 15769 + }, + { + "epoch": 0.9603264013640654, + "grad_norm": 0.9519167374692189, + "learning_rate": 4.693764500190081e-06, + "loss": 0.4243, + "step": 15770 + }, + { + "epoch": 0.9603872971409433, + "grad_norm": 0.996328119704942, + "learning_rate": 4.6937262317170265e-06, + "loss": 0.4159, + "step": 15771 + }, + { + "epoch": 0.9604481929178211, + "grad_norm": 0.9914796588794976, + "learning_rate": 4.693687961009049e-06, + "loss": 0.4325, + "step": 15772 + }, + { + "epoch": 0.960509088694699, + "grad_norm": 1.0108309424164716, + "learning_rate": 4.693649688066189e-06, + "loss": 0.4029, + "step": 15773 + }, + { + "epoch": 0.9605699844715769, + "grad_norm": 0.9763352069015021, + "learning_rate": 4.693611412888483e-06, + "loss": 0.43, + "step": 15774 + }, + { + "epoch": 0.9606308802484548, + "grad_norm": 1.0567756452478783, + "learning_rate": 4.6935731354759714e-06, + "loss": 0.3813, + "step": 15775 + }, + { + "epoch": 0.9606917760253326, + "grad_norm": 1.004508853696566, + "learning_rate": 4.693534855828694e-06, + "loss": 0.3707, + "step": 15776 + }, + { + "epoch": 0.9607526718022105, + "grad_norm": 0.9300452760079843, + "learning_rate": 4.693496573946688e-06, + "loss": 0.4656, + "step": 15777 + }, + { + "epoch": 0.9608135675790884, + "grad_norm": 1.0386546297718935, + "learning_rate": 4.693458289829994e-06, + "loss": 0.386, + "step": 15778 + }, + { + "epoch": 0.9608744633559663, + "grad_norm": 0.9847513544872992, + "learning_rate": 4.6934200034786495e-06, + "loss": 0.4321, + "step": 15779 + }, + { + "epoch": 0.9609353591328441, + "grad_norm": 0.9877996283285398, + "learning_rate": 4.693381714892695e-06, + "loss": 0.406, + "step": 15780 + }, + { + "epoch": 0.960996254909722, + "grad_norm": 1.0216062285412764, + "learning_rate": 4.6933434240721685e-06, + "loss": 0.3367, + "step": 15781 + }, + { + "epoch": 0.9610571506865999, + "grad_norm": 0.9944385103437783, + "learning_rate": 4.69330513101711e-06, + "loss": 0.3208, + "step": 15782 + }, + { + "epoch": 0.9611180464634778, + "grad_norm": 0.9397786302664747, + "learning_rate": 4.693266835727557e-06, + "loss": 0.3868, + "step": 15783 + }, + { + "epoch": 0.9611789422403556, + "grad_norm": 0.9625896792728974, + "learning_rate": 4.693228538203549e-06, + "loss": 0.4061, + "step": 15784 + }, + { + "epoch": 0.9612398380172334, + "grad_norm": 0.9949666652515011, + "learning_rate": 4.693190238445126e-06, + "loss": 0.3698, + "step": 15785 + }, + { + "epoch": 0.9613007337941114, + "grad_norm": 1.0005494655560976, + "learning_rate": 4.693151936452326e-06, + "loss": 0.3994, + "step": 15786 + }, + { + "epoch": 0.9613616295709893, + "grad_norm": 1.024181979931302, + "learning_rate": 4.693113632225189e-06, + "loss": 0.3552, + "step": 15787 + }, + { + "epoch": 0.9614225253478671, + "grad_norm": 1.0132849052975001, + "learning_rate": 4.693075325763753e-06, + "loss": 0.3715, + "step": 15788 + }, + { + "epoch": 0.961483421124745, + "grad_norm": 0.919745236344976, + "learning_rate": 4.693037017068057e-06, + "loss": 0.3849, + "step": 15789 + }, + { + "epoch": 0.9615443169016229, + "grad_norm": 0.9639691326397435, + "learning_rate": 4.692998706138142e-06, + "loss": 0.4208, + "step": 15790 + }, + { + "epoch": 0.9616052126785007, + "grad_norm": 1.0664708070246323, + "learning_rate": 4.692960392974044e-06, + "loss": 0.3694, + "step": 15791 + }, + { + "epoch": 0.9616661084553786, + "grad_norm": 0.9791377529308887, + "learning_rate": 4.6929220775758045e-06, + "loss": 0.4009, + "step": 15792 + }, + { + "epoch": 0.9617270042322565, + "grad_norm": 0.9765683189857197, + "learning_rate": 4.6928837599434605e-06, + "loss": 0.3656, + "step": 15793 + }, + { + "epoch": 0.9617879000091344, + "grad_norm": 1.002517134881254, + "learning_rate": 4.692845440077053e-06, + "loss": 0.3959, + "step": 15794 + }, + { + "epoch": 0.9618487957860122, + "grad_norm": 0.9290704440374264, + "learning_rate": 4.69280711797662e-06, + "loss": 0.4812, + "step": 15795 + }, + { + "epoch": 0.9619096915628901, + "grad_norm": 0.9560573110387529, + "learning_rate": 4.692768793642201e-06, + "loss": 0.45, + "step": 15796 + }, + { + "epoch": 0.961970587339768, + "grad_norm": 1.0803167557287583, + "learning_rate": 4.692730467073834e-06, + "loss": 0.3529, + "step": 15797 + }, + { + "epoch": 0.9620314831166459, + "grad_norm": 0.977525378022753, + "learning_rate": 4.69269213827156e-06, + "loss": 0.4167, + "step": 15798 + }, + { + "epoch": 0.9620923788935237, + "grad_norm": 1.0021164017111537, + "learning_rate": 4.692653807235416e-06, + "loss": 0.3992, + "step": 15799 + }, + { + "epoch": 0.9621532746704016, + "grad_norm": 1.0264493431275528, + "learning_rate": 4.692615473965441e-06, + "loss": 0.4753, + "step": 15800 + }, + { + "epoch": 0.9622141704472795, + "grad_norm": 0.937541458098711, + "learning_rate": 4.692577138461676e-06, + "loss": 0.4672, + "step": 15801 + }, + { + "epoch": 0.9622750662241574, + "grad_norm": 0.9637609724528147, + "learning_rate": 4.692538800724159e-06, + "loss": 0.4107, + "step": 15802 + }, + { + "epoch": 0.9623359620010352, + "grad_norm": 1.0396252429778858, + "learning_rate": 4.692500460752929e-06, + "loss": 0.4388, + "step": 15803 + }, + { + "epoch": 0.9623968577779131, + "grad_norm": 0.9510715201789364, + "learning_rate": 4.692462118548025e-06, + "loss": 0.4887, + "step": 15804 + }, + { + "epoch": 0.962457753554791, + "grad_norm": 0.995784316165877, + "learning_rate": 4.692423774109486e-06, + "loss": 0.3417, + "step": 15805 + }, + { + "epoch": 0.9625186493316689, + "grad_norm": 1.0571098326366697, + "learning_rate": 4.692385427437352e-06, + "loss": 0.385, + "step": 15806 + }, + { + "epoch": 0.9625795451085467, + "grad_norm": 1.0028828257999942, + "learning_rate": 4.69234707853166e-06, + "loss": 0.3589, + "step": 15807 + }, + { + "epoch": 0.9626404408854246, + "grad_norm": 1.008612896110506, + "learning_rate": 4.6923087273924515e-06, + "loss": 0.4015, + "step": 15808 + }, + { + "epoch": 0.9627013366623025, + "grad_norm": 0.9654175598979908, + "learning_rate": 4.692270374019764e-06, + "loss": 0.4017, + "step": 15809 + }, + { + "epoch": 0.9627622324391804, + "grad_norm": 0.9372863633512717, + "learning_rate": 4.692232018413637e-06, + "loss": 0.4071, + "step": 15810 + }, + { + "epoch": 0.9628231282160582, + "grad_norm": 0.9602604926152468, + "learning_rate": 4.692193660574109e-06, + "loss": 0.4246, + "step": 15811 + }, + { + "epoch": 0.962884023992936, + "grad_norm": 1.0588617419333737, + "learning_rate": 4.692155300501221e-06, + "loss": 0.3748, + "step": 15812 + }, + { + "epoch": 0.962944919769814, + "grad_norm": 0.9649409221784825, + "learning_rate": 4.6921169381950105e-06, + "loss": 0.4312, + "step": 15813 + }, + { + "epoch": 0.9630058155466918, + "grad_norm": 1.0087593528253098, + "learning_rate": 4.692078573655517e-06, + "loss": 0.4463, + "step": 15814 + }, + { + "epoch": 0.9630667113235697, + "grad_norm": 0.98823201636525, + "learning_rate": 4.692040206882777e-06, + "loss": 0.4147, + "step": 15815 + }, + { + "epoch": 0.9631276071004475, + "grad_norm": 1.0375153395070338, + "learning_rate": 4.692001837876835e-06, + "loss": 0.4141, + "step": 15816 + }, + { + "epoch": 0.9631885028773255, + "grad_norm": 1.0584132214821296, + "learning_rate": 4.6919634666377256e-06, + "loss": 0.336, + "step": 15817 + }, + { + "epoch": 0.9632493986542033, + "grad_norm": 0.9754031892162897, + "learning_rate": 4.69192509316549e-06, + "loss": 0.3295, + "step": 15818 + }, + { + "epoch": 0.9633102944310812, + "grad_norm": 0.9760155296964198, + "learning_rate": 4.691886717460166e-06, + "loss": 0.4358, + "step": 15819 + }, + { + "epoch": 0.963371190207959, + "grad_norm": 1.0072080595323507, + "learning_rate": 4.691848339521794e-06, + "loss": 0.4168, + "step": 15820 + }, + { + "epoch": 0.963432085984837, + "grad_norm": 1.060262489860657, + "learning_rate": 4.691809959350413e-06, + "loss": 0.381, + "step": 15821 + }, + { + "epoch": 0.9634929817617148, + "grad_norm": 0.9991656093737155, + "learning_rate": 4.69177157694606e-06, + "loss": 0.3605, + "step": 15822 + }, + { + "epoch": 0.9635538775385927, + "grad_norm": 0.8843874865974511, + "learning_rate": 4.691733192308777e-06, + "loss": 0.4673, + "step": 15823 + }, + { + "epoch": 0.9636147733154705, + "grad_norm": 0.9269182625020483, + "learning_rate": 4.691694805438601e-06, + "loss": 0.4403, + "step": 15824 + }, + { + "epoch": 0.9636756690923485, + "grad_norm": 0.9744768506132919, + "learning_rate": 4.691656416335573e-06, + "loss": 0.4615, + "step": 15825 + }, + { + "epoch": 0.9637365648692263, + "grad_norm": 0.9838176528896124, + "learning_rate": 4.69161802499973e-06, + "loss": 0.3817, + "step": 15826 + }, + { + "epoch": 0.9637974606461042, + "grad_norm": 1.0612904568565014, + "learning_rate": 4.691579631431112e-06, + "loss": 0.4035, + "step": 15827 + }, + { + "epoch": 0.9638583564229821, + "grad_norm": 0.9659688700583825, + "learning_rate": 4.691541235629759e-06, + "loss": 0.4545, + "step": 15828 + }, + { + "epoch": 0.96391925219986, + "grad_norm": 0.995651884280851, + "learning_rate": 4.69150283759571e-06, + "loss": 0.3912, + "step": 15829 + }, + { + "epoch": 0.9639801479767378, + "grad_norm": 0.8855880823696598, + "learning_rate": 4.6914644373290015e-06, + "loss": 0.4518, + "step": 15830 + }, + { + "epoch": 0.9640410437536157, + "grad_norm": 1.027364618633393, + "learning_rate": 4.6914260348296754e-06, + "loss": 0.3895, + "step": 15831 + }, + { + "epoch": 0.9641019395304936, + "grad_norm": 1.0952629359709378, + "learning_rate": 4.69138763009777e-06, + "loss": 0.4023, + "step": 15832 + }, + { + "epoch": 0.9641628353073715, + "grad_norm": 0.9663439796815931, + "learning_rate": 4.6913492231333245e-06, + "loss": 0.5133, + "step": 15833 + }, + { + "epoch": 0.9642237310842493, + "grad_norm": 1.0576759511998128, + "learning_rate": 4.6913108139363784e-06, + "loss": 0.3992, + "step": 15834 + }, + { + "epoch": 0.9642846268611271, + "grad_norm": 1.0063770814939015, + "learning_rate": 4.69127240250697e-06, + "loss": 0.4231, + "step": 15835 + }, + { + "epoch": 0.9643455226380051, + "grad_norm": 1.1061748383666625, + "learning_rate": 4.69123398884514e-06, + "loss": 0.4519, + "step": 15836 + }, + { + "epoch": 0.964406418414883, + "grad_norm": 1.0134290019617422, + "learning_rate": 4.691195572950925e-06, + "loss": 0.4179, + "step": 15837 + }, + { + "epoch": 0.9644673141917608, + "grad_norm": 0.9900740415290714, + "learning_rate": 4.691157154824365e-06, + "loss": 0.4665, + "step": 15838 + }, + { + "epoch": 0.9645282099686386, + "grad_norm": 1.0267689698842135, + "learning_rate": 4.691118734465501e-06, + "loss": 0.4233, + "step": 15839 + }, + { + "epoch": 0.9645891057455166, + "grad_norm": 1.0507351380871315, + "learning_rate": 4.691080311874369e-06, + "loss": 0.4183, + "step": 15840 + }, + { + "epoch": 0.9646500015223944, + "grad_norm": 0.9687389325126978, + "learning_rate": 4.691041887051012e-06, + "loss": 0.4037, + "step": 15841 + }, + { + "epoch": 0.9647108972992723, + "grad_norm": 1.035987055532867, + "learning_rate": 4.691003459995467e-06, + "loss": 0.3898, + "step": 15842 + }, + { + "epoch": 0.9647717930761501, + "grad_norm": 1.056346757791367, + "learning_rate": 4.690965030707772e-06, + "loss": 0.4087, + "step": 15843 + }, + { + "epoch": 0.9648326888530281, + "grad_norm": 1.09556204805555, + "learning_rate": 4.690926599187968e-06, + "loss": 0.3295, + "step": 15844 + }, + { + "epoch": 0.9648935846299059, + "grad_norm": 1.049598059420006, + "learning_rate": 4.690888165436094e-06, + "loss": 0.4009, + "step": 15845 + }, + { + "epoch": 0.9649544804067838, + "grad_norm": 0.9454419686757409, + "learning_rate": 4.690849729452187e-06, + "loss": 0.4471, + "step": 15846 + }, + { + "epoch": 0.9650153761836616, + "grad_norm": 1.0502189339532757, + "learning_rate": 4.690811291236289e-06, + "loss": 0.4175, + "step": 15847 + }, + { + "epoch": 0.9650762719605396, + "grad_norm": 1.0030010266459244, + "learning_rate": 4.690772850788438e-06, + "loss": 0.4189, + "step": 15848 + }, + { + "epoch": 0.9651371677374174, + "grad_norm": 0.9060756434873743, + "learning_rate": 4.690734408108673e-06, + "loss": 0.4271, + "step": 15849 + }, + { + "epoch": 0.9651980635142953, + "grad_norm": 1.0080884783455526, + "learning_rate": 4.690695963197034e-06, + "loss": 0.3768, + "step": 15850 + }, + { + "epoch": 0.9652589592911731, + "grad_norm": 0.976780886076034, + "learning_rate": 4.690657516053558e-06, + "loss": 0.3275, + "step": 15851 + }, + { + "epoch": 0.9653198550680511, + "grad_norm": 0.9890946717856974, + "learning_rate": 4.690619066678287e-06, + "loss": 0.4504, + "step": 15852 + }, + { + "epoch": 0.9653807508449289, + "grad_norm": 1.0067739968877574, + "learning_rate": 4.6905806150712585e-06, + "loss": 0.4006, + "step": 15853 + }, + { + "epoch": 0.9654416466218068, + "grad_norm": 1.0244930900175016, + "learning_rate": 4.6905421612325116e-06, + "loss": 0.4298, + "step": 15854 + }, + { + "epoch": 0.9655025423986846, + "grad_norm": 0.9865775713338815, + "learning_rate": 4.690503705162087e-06, + "loss": 0.434, + "step": 15855 + }, + { + "epoch": 0.9655634381755626, + "grad_norm": 1.0790635214022049, + "learning_rate": 4.690465246860022e-06, + "loss": 0.4132, + "step": 15856 + }, + { + "epoch": 0.9656243339524404, + "grad_norm": 1.0986308145280248, + "learning_rate": 4.690426786326356e-06, + "loss": 0.3909, + "step": 15857 + }, + { + "epoch": 0.9656852297293183, + "grad_norm": 1.0622475838812562, + "learning_rate": 4.69038832356113e-06, + "loss": 0.3684, + "step": 15858 + }, + { + "epoch": 0.9657461255061961, + "grad_norm": 1.0885136287770452, + "learning_rate": 4.690349858564381e-06, + "loss": 0.3492, + "step": 15859 + }, + { + "epoch": 0.965807021283074, + "grad_norm": 1.0102430297875091, + "learning_rate": 4.6903113913361486e-06, + "loss": 0.4358, + "step": 15860 + }, + { + "epoch": 0.9658679170599519, + "grad_norm": 1.1441216125821438, + "learning_rate": 4.690272921876473e-06, + "loss": 0.3889, + "step": 15861 + }, + { + "epoch": 0.9659288128368297, + "grad_norm": 1.0170524910797412, + "learning_rate": 4.690234450185393e-06, + "loss": 0.3368, + "step": 15862 + }, + { + "epoch": 0.9659897086137076, + "grad_norm": 0.8708941146288374, + "learning_rate": 4.690195976262948e-06, + "loss": 0.4357, + "step": 15863 + }, + { + "epoch": 0.9660506043905855, + "grad_norm": 1.01695087329147, + "learning_rate": 4.690157500109177e-06, + "loss": 0.3919, + "step": 15864 + }, + { + "epoch": 0.9661115001674634, + "grad_norm": 0.9911266871535529, + "learning_rate": 4.690119021724119e-06, + "loss": 0.3405, + "step": 15865 + }, + { + "epoch": 0.9661723959443412, + "grad_norm": 0.9656494705734417, + "learning_rate": 4.6900805411078136e-06, + "loss": 0.4516, + "step": 15866 + }, + { + "epoch": 0.9662332917212191, + "grad_norm": 1.0666430880314317, + "learning_rate": 4.690042058260298e-06, + "loss": 0.3851, + "step": 15867 + }, + { + "epoch": 0.966294187498097, + "grad_norm": 1.087192515555129, + "learning_rate": 4.690003573181614e-06, + "loss": 0.3577, + "step": 15868 + }, + { + "epoch": 0.9663550832749749, + "grad_norm": 0.9947394452952126, + "learning_rate": 4.6899650858718005e-06, + "loss": 0.4371, + "step": 15869 + }, + { + "epoch": 0.9664159790518527, + "grad_norm": 1.0283060601365006, + "learning_rate": 4.689926596330895e-06, + "loss": 0.3764, + "step": 15870 + }, + { + "epoch": 0.9664768748287307, + "grad_norm": 0.9364036371411315, + "learning_rate": 4.689888104558939e-06, + "loss": 0.4374, + "step": 15871 + }, + { + "epoch": 0.9665377706056085, + "grad_norm": 1.0639731326872692, + "learning_rate": 4.68984961055597e-06, + "loss": 0.4393, + "step": 15872 + }, + { + "epoch": 0.9665986663824864, + "grad_norm": 0.9881234850993533, + "learning_rate": 4.689811114322027e-06, + "loss": 0.4597, + "step": 15873 + }, + { + "epoch": 0.9666595621593642, + "grad_norm": 0.9250970790063501, + "learning_rate": 4.6897726158571514e-06, + "loss": 0.4163, + "step": 15874 + }, + { + "epoch": 0.9667204579362422, + "grad_norm": 1.0628106339488512, + "learning_rate": 4.6897341151613805e-06, + "loss": 0.3659, + "step": 15875 + }, + { + "epoch": 0.96678135371312, + "grad_norm": 1.014695618935097, + "learning_rate": 4.689695612234753e-06, + "loss": 0.4352, + "step": 15876 + }, + { + "epoch": 0.9668422494899979, + "grad_norm": 0.9967669165116858, + "learning_rate": 4.68965710707731e-06, + "loss": 0.4546, + "step": 15877 + }, + { + "epoch": 0.9669031452668757, + "grad_norm": 1.0055448031243757, + "learning_rate": 4.6896185996890894e-06, + "loss": 0.4137, + "step": 15878 + }, + { + "epoch": 0.9669640410437537, + "grad_norm": 1.0350861369921471, + "learning_rate": 4.689580090070131e-06, + "loss": 0.4226, + "step": 15879 + }, + { + "epoch": 0.9670249368206315, + "grad_norm": 1.0307177487006194, + "learning_rate": 4.6895415782204755e-06, + "loss": 0.4114, + "step": 15880 + }, + { + "epoch": 0.9670858325975094, + "grad_norm": 0.9217982958072088, + "learning_rate": 4.689503064140158e-06, + "loss": 0.4141, + "step": 15881 + }, + { + "epoch": 0.9671467283743872, + "grad_norm": 1.0147661886744066, + "learning_rate": 4.689464547829222e-06, + "loss": 0.3438, + "step": 15882 + }, + { + "epoch": 0.9672076241512652, + "grad_norm": 0.9717961241233196, + "learning_rate": 4.689426029287705e-06, + "loss": 0.392, + "step": 15883 + }, + { + "epoch": 0.967268519928143, + "grad_norm": 1.0439727830282042, + "learning_rate": 4.689387508515646e-06, + "loss": 0.4093, + "step": 15884 + }, + { + "epoch": 0.9673294157050208, + "grad_norm": 0.9510801518729814, + "learning_rate": 4.6893489855130846e-06, + "loss": 0.4186, + "step": 15885 + }, + { + "epoch": 0.9673903114818987, + "grad_norm": 0.9483694457992801, + "learning_rate": 4.689310460280059e-06, + "loss": 0.41, + "step": 15886 + }, + { + "epoch": 0.9674512072587766, + "grad_norm": 0.9522248377888225, + "learning_rate": 4.6892719328166105e-06, + "loss": 0.3971, + "step": 15887 + }, + { + "epoch": 0.9675121030356545, + "grad_norm": 0.922375490417984, + "learning_rate": 4.6892334031227775e-06, + "loss": 0.4445, + "step": 15888 + }, + { + "epoch": 0.9675729988125323, + "grad_norm": 1.0012469631405723, + "learning_rate": 4.689194871198598e-06, + "loss": 0.4133, + "step": 15889 + }, + { + "epoch": 0.9676338945894102, + "grad_norm": 0.9991977178834368, + "learning_rate": 4.689156337044113e-06, + "loss": 0.3789, + "step": 15890 + }, + { + "epoch": 0.9676947903662881, + "grad_norm": 0.9497286629932677, + "learning_rate": 4.689117800659361e-06, + "loss": 0.4118, + "step": 15891 + }, + { + "epoch": 0.967755686143166, + "grad_norm": 0.9762744515131648, + "learning_rate": 4.68907926204438e-06, + "loss": 0.4178, + "step": 15892 + }, + { + "epoch": 0.9678165819200438, + "grad_norm": 0.9569353520977573, + "learning_rate": 4.689040721199212e-06, + "loss": 0.413, + "step": 15893 + }, + { + "epoch": 0.9678774776969217, + "grad_norm": 1.0418386458942637, + "learning_rate": 4.689002178123895e-06, + "loss": 0.3687, + "step": 15894 + }, + { + "epoch": 0.9679383734737996, + "grad_norm": 1.1248825253487913, + "learning_rate": 4.688963632818467e-06, + "loss": 0.3935, + "step": 15895 + }, + { + "epoch": 0.9679992692506775, + "grad_norm": 0.9951865783322256, + "learning_rate": 4.6889250852829695e-06, + "loss": 0.3866, + "step": 15896 + }, + { + "epoch": 0.9680601650275553, + "grad_norm": 1.1026423096020967, + "learning_rate": 4.68888653551744e-06, + "loss": 0.4474, + "step": 15897 + }, + { + "epoch": 0.9681210608044332, + "grad_norm": 0.907102713846249, + "learning_rate": 4.688847983521918e-06, + "loss": 0.4235, + "step": 15898 + }, + { + "epoch": 0.9681819565813111, + "grad_norm": 0.9831096025285408, + "learning_rate": 4.688809429296444e-06, + "loss": 0.3874, + "step": 15899 + }, + { + "epoch": 0.968242852358189, + "grad_norm": 1.0269750976003529, + "learning_rate": 4.688770872841056e-06, + "loss": 0.3669, + "step": 15900 + }, + { + "epoch": 0.9683037481350668, + "grad_norm": 0.9122194651564797, + "learning_rate": 4.688732314155794e-06, + "loss": 0.4374, + "step": 15901 + }, + { + "epoch": 0.9683646439119447, + "grad_norm": 0.9368486820043472, + "learning_rate": 4.688693753240697e-06, + "loss": 0.4075, + "step": 15902 + }, + { + "epoch": 0.9684255396888226, + "grad_norm": 0.9679618498594588, + "learning_rate": 4.6886551900958045e-06, + "loss": 0.3641, + "step": 15903 + }, + { + "epoch": 0.9684864354657005, + "grad_norm": 1.0355077279066274, + "learning_rate": 4.688616624721155e-06, + "loss": 0.4215, + "step": 15904 + }, + { + "epoch": 0.9685473312425783, + "grad_norm": 0.9791544527593093, + "learning_rate": 4.688578057116788e-06, + "loss": 0.4095, + "step": 15905 + }, + { + "epoch": 0.9686082270194561, + "grad_norm": 0.9119179022046497, + "learning_rate": 4.6885394872827445e-06, + "loss": 0.4395, + "step": 15906 + }, + { + "epoch": 0.9686691227963341, + "grad_norm": 0.9968310372065873, + "learning_rate": 4.688500915219062e-06, + "loss": 0.4598, + "step": 15907 + }, + { + "epoch": 0.968730018573212, + "grad_norm": 1.0284101427186363, + "learning_rate": 4.68846234092578e-06, + "loss": 0.4259, + "step": 15908 + }, + { + "epoch": 0.9687909143500898, + "grad_norm": 0.9536168208704818, + "learning_rate": 4.688423764402938e-06, + "loss": 0.4646, + "step": 15909 + }, + { + "epoch": 0.9688518101269677, + "grad_norm": 1.0388340430900842, + "learning_rate": 4.688385185650576e-06, + "loss": 0.4005, + "step": 15910 + }, + { + "epoch": 0.9689127059038456, + "grad_norm": 0.9098044779335425, + "learning_rate": 4.688346604668732e-06, + "loss": 0.4775, + "step": 15911 + }, + { + "epoch": 0.9689736016807234, + "grad_norm": 0.9566539421584879, + "learning_rate": 4.688308021457446e-06, + "loss": 0.3895, + "step": 15912 + }, + { + "epoch": 0.9690344974576013, + "grad_norm": 1.0571020994107407, + "learning_rate": 4.688269436016757e-06, + "loss": 0.3825, + "step": 15913 + }, + { + "epoch": 0.9690953932344792, + "grad_norm": 1.0038160281626856, + "learning_rate": 4.688230848346705e-06, + "loss": 0.3814, + "step": 15914 + }, + { + "epoch": 0.9691562890113571, + "grad_norm": 1.1060756942106234, + "learning_rate": 4.688192258447329e-06, + "loss": 0.4319, + "step": 15915 + }, + { + "epoch": 0.9692171847882349, + "grad_norm": 1.0047644751434825, + "learning_rate": 4.688153666318669e-06, + "loss": 0.351, + "step": 15916 + }, + { + "epoch": 0.9692780805651128, + "grad_norm": 0.9619788737658024, + "learning_rate": 4.688115071960762e-06, + "loss": 0.4826, + "step": 15917 + }, + { + "epoch": 0.9693389763419907, + "grad_norm": 0.996589933214505, + "learning_rate": 4.6880764753736495e-06, + "loss": 0.4106, + "step": 15918 + }, + { + "epoch": 0.9693998721188686, + "grad_norm": 0.9688300900780064, + "learning_rate": 4.68803787655737e-06, + "loss": 0.4706, + "step": 15919 + }, + { + "epoch": 0.9694607678957464, + "grad_norm": 0.97145847202073, + "learning_rate": 4.6879992755119635e-06, + "loss": 0.4102, + "step": 15920 + }, + { + "epoch": 0.9695216636726243, + "grad_norm": 1.0154278137215251, + "learning_rate": 4.6879606722374686e-06, + "loss": 0.4841, + "step": 15921 + }, + { + "epoch": 0.9695825594495022, + "grad_norm": 1.0608306285026856, + "learning_rate": 4.687922066733925e-06, + "loss": 0.3556, + "step": 15922 + }, + { + "epoch": 0.9696434552263801, + "grad_norm": 0.9778408403228259, + "learning_rate": 4.687883459001372e-06, + "loss": 0.416, + "step": 15923 + }, + { + "epoch": 0.9697043510032579, + "grad_norm": 0.9655194474401457, + "learning_rate": 4.6878448490398484e-06, + "loss": 0.4005, + "step": 15924 + }, + { + "epoch": 0.9697652467801358, + "grad_norm": 1.0156227968552582, + "learning_rate": 4.687806236849394e-06, + "loss": 0.4114, + "step": 15925 + }, + { + "epoch": 0.9698261425570137, + "grad_norm": 0.9882781517337215, + "learning_rate": 4.687767622430049e-06, + "loss": 0.4136, + "step": 15926 + }, + { + "epoch": 0.9698870383338916, + "grad_norm": 0.9930767328273243, + "learning_rate": 4.6877290057818505e-06, + "loss": 0.4612, + "step": 15927 + }, + { + "epoch": 0.9699479341107694, + "grad_norm": 1.0592519764859931, + "learning_rate": 4.687690386904839e-06, + "loss": 0.3079, + "step": 15928 + }, + { + "epoch": 0.9700088298876473, + "grad_norm": 1.0082950971871243, + "learning_rate": 4.687651765799055e-06, + "loss": 0.4134, + "step": 15929 + }, + { + "epoch": 0.9700697256645252, + "grad_norm": 1.0065590790825523, + "learning_rate": 4.687613142464537e-06, + "loss": 0.4369, + "step": 15930 + }, + { + "epoch": 0.970130621441403, + "grad_norm": 0.991485189609904, + "learning_rate": 4.687574516901323e-06, + "loss": 0.3636, + "step": 15931 + }, + { + "epoch": 0.9701915172182809, + "grad_norm": 1.0564913429384835, + "learning_rate": 4.687535889109455e-06, + "loss": 0.3984, + "step": 15932 + }, + { + "epoch": 0.9702524129951587, + "grad_norm": 1.098792631361967, + "learning_rate": 4.68749725908897e-06, + "loss": 0.4189, + "step": 15933 + }, + { + "epoch": 0.9703133087720367, + "grad_norm": 0.9447012876306536, + "learning_rate": 4.687458626839908e-06, + "loss": 0.3791, + "step": 15934 + }, + { + "epoch": 0.9703742045489145, + "grad_norm": 1.0226505410169413, + "learning_rate": 4.68741999236231e-06, + "loss": 0.308, + "step": 15935 + }, + { + "epoch": 0.9704351003257924, + "grad_norm": 1.052528657240363, + "learning_rate": 4.687381355656213e-06, + "loss": 0.3424, + "step": 15936 + }, + { + "epoch": 0.9704959961026702, + "grad_norm": 1.0416890751177577, + "learning_rate": 4.687342716721657e-06, + "loss": 0.3989, + "step": 15937 + }, + { + "epoch": 0.9705568918795482, + "grad_norm": 1.1157826994064977, + "learning_rate": 4.687304075558683e-06, + "loss": 0.3624, + "step": 15938 + }, + { + "epoch": 0.970617787656426, + "grad_norm": 1.0691129822216192, + "learning_rate": 4.687265432167328e-06, + "loss": 0.4284, + "step": 15939 + }, + { + "epoch": 0.9706786834333039, + "grad_norm": 1.0599089447101318, + "learning_rate": 4.687226786547633e-06, + "loss": 0.3847, + "step": 15940 + }, + { + "epoch": 0.9707395792101817, + "grad_norm": 1.0329119473446204, + "learning_rate": 4.687188138699636e-06, + "loss": 0.3998, + "step": 15941 + }, + { + "epoch": 0.9708004749870597, + "grad_norm": 1.0570270235228594, + "learning_rate": 4.6871494886233785e-06, + "loss": 0.333, + "step": 15942 + }, + { + "epoch": 0.9708613707639375, + "grad_norm": 0.9717651811095603, + "learning_rate": 4.687110836318897e-06, + "loss": 0.4156, + "step": 15943 + }, + { + "epoch": 0.9709222665408154, + "grad_norm": 0.9394060799503992, + "learning_rate": 4.687072181786234e-06, + "loss": 0.4095, + "step": 15944 + }, + { + "epoch": 0.9709831623176932, + "grad_norm": 1.0040483618485636, + "learning_rate": 4.687033525025426e-06, + "loss": 0.3968, + "step": 15945 + }, + { + "epoch": 0.9710440580945712, + "grad_norm": 1.0814727026091204, + "learning_rate": 4.686994866036515e-06, + "loss": 0.4141, + "step": 15946 + }, + { + "epoch": 0.971104953871449, + "grad_norm": 1.0191127801962128, + "learning_rate": 4.6869562048195375e-06, + "loss": 0.4344, + "step": 15947 + }, + { + "epoch": 0.9711658496483269, + "grad_norm": 0.9588360897853149, + "learning_rate": 4.686917541374536e-06, + "loss": 0.3952, + "step": 15948 + }, + { + "epoch": 0.9712267454252047, + "grad_norm": 0.9349165050264795, + "learning_rate": 4.686878875701548e-06, + "loss": 0.4696, + "step": 15949 + }, + { + "epoch": 0.9712876412020827, + "grad_norm": 0.9743428866362259, + "learning_rate": 4.686840207800613e-06, + "loss": 0.4486, + "step": 15950 + }, + { + "epoch": 0.9713485369789605, + "grad_norm": 1.055602099066651, + "learning_rate": 4.6868015376717705e-06, + "loss": 0.4876, + "step": 15951 + }, + { + "epoch": 0.9714094327558384, + "grad_norm": 0.948615004630432, + "learning_rate": 4.68676286531506e-06, + "loss": 0.415, + "step": 15952 + }, + { + "epoch": 0.9714703285327163, + "grad_norm": 1.000961711797374, + "learning_rate": 4.686724190730521e-06, + "loss": 0.4468, + "step": 15953 + }, + { + "epoch": 0.9715312243095942, + "grad_norm": 1.0524218266420842, + "learning_rate": 4.686685513918193e-06, + "loss": 0.442, + "step": 15954 + }, + { + "epoch": 0.971592120086472, + "grad_norm": 0.9343369735946783, + "learning_rate": 4.686646834878116e-06, + "loss": 0.4425, + "step": 15955 + }, + { + "epoch": 0.9716530158633498, + "grad_norm": 1.017777040556604, + "learning_rate": 4.686608153610328e-06, + "loss": 0.409, + "step": 15956 + }, + { + "epoch": 0.9717139116402278, + "grad_norm": 0.9402862731854399, + "learning_rate": 4.686569470114869e-06, + "loss": 0.3666, + "step": 15957 + }, + { + "epoch": 0.9717748074171056, + "grad_norm": 1.0332892501058129, + "learning_rate": 4.686530784391779e-06, + "loss": 0.3915, + "step": 15958 + }, + { + "epoch": 0.9718357031939835, + "grad_norm": 0.9977419688217737, + "learning_rate": 4.686492096441097e-06, + "loss": 0.4026, + "step": 15959 + }, + { + "epoch": 0.9718965989708613, + "grad_norm": 1.031234448548983, + "learning_rate": 4.686453406262861e-06, + "loss": 0.3856, + "step": 15960 + }, + { + "epoch": 0.9719574947477393, + "grad_norm": 0.99297252267654, + "learning_rate": 4.686414713857113e-06, + "loss": 0.4172, + "step": 15961 + }, + { + "epoch": 0.9720183905246171, + "grad_norm": 1.0475580566098028, + "learning_rate": 4.68637601922389e-06, + "loss": 0.3769, + "step": 15962 + }, + { + "epoch": 0.972079286301495, + "grad_norm": 1.066420324153373, + "learning_rate": 4.686337322363234e-06, + "loss": 0.3696, + "step": 15963 + }, + { + "epoch": 0.9721401820783728, + "grad_norm": 0.9532542010764609, + "learning_rate": 4.686298623275182e-06, + "loss": 0.4785, + "step": 15964 + }, + { + "epoch": 0.9722010778552508, + "grad_norm": 0.9637573288388872, + "learning_rate": 4.686259921959774e-06, + "loss": 0.4497, + "step": 15965 + }, + { + "epoch": 0.9722619736321286, + "grad_norm": 1.010319370745162, + "learning_rate": 4.686221218417051e-06, + "loss": 0.3531, + "step": 15966 + }, + { + "epoch": 0.9723228694090065, + "grad_norm": 0.967503972134798, + "learning_rate": 4.68618251264705e-06, + "loss": 0.4544, + "step": 15967 + }, + { + "epoch": 0.9723837651858843, + "grad_norm": 0.9739802888012055, + "learning_rate": 4.686143804649813e-06, + "loss": 0.3922, + "step": 15968 + }, + { + "epoch": 0.9724446609627623, + "grad_norm": 1.025229240232345, + "learning_rate": 4.686105094425377e-06, + "loss": 0.3668, + "step": 15969 + }, + { + "epoch": 0.9725055567396401, + "grad_norm": 0.9658678968876444, + "learning_rate": 4.686066381973784e-06, + "loss": 0.3997, + "step": 15970 + }, + { + "epoch": 0.972566452516518, + "grad_norm": 0.9757350322620144, + "learning_rate": 4.686027667295071e-06, + "loss": 0.4279, + "step": 15971 + }, + { + "epoch": 0.9726273482933958, + "grad_norm": 0.9715430235184794, + "learning_rate": 4.685988950389278e-06, + "loss": 0.5433, + "step": 15972 + }, + { + "epoch": 0.9726882440702738, + "grad_norm": 1.210115785610484, + "learning_rate": 4.685950231256446e-06, + "loss": 0.3762, + "step": 15973 + }, + { + "epoch": 0.9727491398471516, + "grad_norm": 0.9831189512029663, + "learning_rate": 4.685911509896614e-06, + "loss": 0.4332, + "step": 15974 + }, + { + "epoch": 0.9728100356240295, + "grad_norm": 1.0325789856470784, + "learning_rate": 4.685872786309819e-06, + "loss": 0.4148, + "step": 15975 + }, + { + "epoch": 0.9728709314009073, + "grad_norm": 0.9755967222740352, + "learning_rate": 4.6858340604961025e-06, + "loss": 0.3944, + "step": 15976 + }, + { + "epoch": 0.9729318271777853, + "grad_norm": 0.948039024599039, + "learning_rate": 4.685795332455505e-06, + "loss": 0.3868, + "step": 15977 + }, + { + "epoch": 0.9729927229546631, + "grad_norm": 1.0655919126220634, + "learning_rate": 4.685756602188063e-06, + "loss": 0.3642, + "step": 15978 + }, + { + "epoch": 0.973053618731541, + "grad_norm": 1.0212144827356509, + "learning_rate": 4.6857178696938185e-06, + "loss": 0.3361, + "step": 15979 + }, + { + "epoch": 0.9731145145084188, + "grad_norm": 0.9700608345973162, + "learning_rate": 4.68567913497281e-06, + "loss": 0.3407, + "step": 15980 + }, + { + "epoch": 0.9731754102852967, + "grad_norm": 1.0020086642845822, + "learning_rate": 4.685640398025077e-06, + "loss": 0.4331, + "step": 15981 + }, + { + "epoch": 0.9732363060621746, + "grad_norm": 0.9873600051854985, + "learning_rate": 4.6856016588506595e-06, + "loss": 0.3808, + "step": 15982 + }, + { + "epoch": 0.9732972018390524, + "grad_norm": 0.9811131616228544, + "learning_rate": 4.685562917449596e-06, + "loss": 0.383, + "step": 15983 + }, + { + "epoch": 0.9733580976159303, + "grad_norm": 1.046062444255707, + "learning_rate": 4.685524173821927e-06, + "loss": 0.3694, + "step": 15984 + }, + { + "epoch": 0.9734189933928082, + "grad_norm": 0.9926630444478386, + "learning_rate": 4.685485427967691e-06, + "loss": 0.4494, + "step": 15985 + }, + { + "epoch": 0.9734798891696861, + "grad_norm": 1.1446932847817521, + "learning_rate": 4.685446679886928e-06, + "loss": 0.395, + "step": 15986 + }, + { + "epoch": 0.9735407849465639, + "grad_norm": 0.9870680405697121, + "learning_rate": 4.685407929579676e-06, + "loss": 0.3792, + "step": 15987 + }, + { + "epoch": 0.9736016807234418, + "grad_norm": 0.980355565257264, + "learning_rate": 4.685369177045977e-06, + "loss": 0.4379, + "step": 15988 + }, + { + "epoch": 0.9736625765003197, + "grad_norm": 1.0492152853618824, + "learning_rate": 4.68533042228587e-06, + "loss": 0.3843, + "step": 15989 + }, + { + "epoch": 0.9737234722771976, + "grad_norm": 1.049304062811507, + "learning_rate": 4.685291665299393e-06, + "loss": 0.4153, + "step": 15990 + }, + { + "epoch": 0.9737843680540754, + "grad_norm": 1.0310691224429611, + "learning_rate": 4.685252906086586e-06, + "loss": 0.3648, + "step": 15991 + }, + { + "epoch": 0.9738452638309534, + "grad_norm": 1.055683753610328, + "learning_rate": 4.68521414464749e-06, + "loss": 0.4291, + "step": 15992 + }, + { + "epoch": 0.9739061596078312, + "grad_norm": 0.9634302499981915, + "learning_rate": 4.685175380982142e-06, + "loss": 0.3777, + "step": 15993 + }, + { + "epoch": 0.9739670553847091, + "grad_norm": 0.9949178224365038, + "learning_rate": 4.685136615090583e-06, + "loss": 0.3681, + "step": 15994 + }, + { + "epoch": 0.9740279511615869, + "grad_norm": 0.9452630179459578, + "learning_rate": 4.685097846972852e-06, + "loss": 0.426, + "step": 15995 + }, + { + "epoch": 0.9740888469384649, + "grad_norm": 0.9238130051020246, + "learning_rate": 4.685059076628989e-06, + "loss": 0.4119, + "step": 15996 + }, + { + "epoch": 0.9741497427153427, + "grad_norm": 0.9391574190251094, + "learning_rate": 4.685020304059034e-06, + "loss": 0.389, + "step": 15997 + }, + { + "epoch": 0.9742106384922206, + "grad_norm": 1.0124761542546947, + "learning_rate": 4.684981529263025e-06, + "loss": 0.4154, + "step": 15998 + }, + { + "epoch": 0.9742715342690984, + "grad_norm": 1.0031011174207005, + "learning_rate": 4.684942752241003e-06, + "loss": 0.3752, + "step": 15999 + }, + { + "epoch": 0.9743324300459764, + "grad_norm": 0.9216188016260024, + "learning_rate": 4.684903972993006e-06, + "loss": 0.4321, + "step": 16000 + }, + { + "epoch": 0.9743933258228542, + "grad_norm": 0.9714202823538337, + "learning_rate": 4.684865191519074e-06, + "loss": 0.4519, + "step": 16001 + }, + { + "epoch": 0.974454221599732, + "grad_norm": 1.0265692047779051, + "learning_rate": 4.684826407819247e-06, + "loss": 0.4344, + "step": 16002 + }, + { + "epoch": 0.9745151173766099, + "grad_norm": 1.0192633691189683, + "learning_rate": 4.684787621893565e-06, + "loss": 0.3975, + "step": 16003 + }, + { + "epoch": 0.9745760131534879, + "grad_norm": 1.0615568975536593, + "learning_rate": 4.684748833742066e-06, + "loss": 0.4202, + "step": 16004 + }, + { + "epoch": 0.9746369089303657, + "grad_norm": 1.0407774268240753, + "learning_rate": 4.68471004336479e-06, + "loss": 0.4443, + "step": 16005 + }, + { + "epoch": 0.9746978047072435, + "grad_norm": 0.9862344540869143, + "learning_rate": 4.684671250761778e-06, + "loss": 0.3732, + "step": 16006 + }, + { + "epoch": 0.9747587004841214, + "grad_norm": 0.9781482708734746, + "learning_rate": 4.684632455933068e-06, + "loss": 0.4187, + "step": 16007 + }, + { + "epoch": 0.9748195962609993, + "grad_norm": 0.9233510183948868, + "learning_rate": 4.684593658878699e-06, + "loss": 0.4149, + "step": 16008 + }, + { + "epoch": 0.9748804920378772, + "grad_norm": 0.9631465810750971, + "learning_rate": 4.684554859598712e-06, + "loss": 0.3782, + "step": 16009 + }, + { + "epoch": 0.974941387814755, + "grad_norm": 1.0111324831684168, + "learning_rate": 4.684516058093146e-06, + "loss": 0.4537, + "step": 16010 + }, + { + "epoch": 0.9750022835916329, + "grad_norm": 0.9800329080885978, + "learning_rate": 4.684477254362041e-06, + "loss": 0.3863, + "step": 16011 + }, + { + "epoch": 0.9750631793685108, + "grad_norm": 0.9739330529733358, + "learning_rate": 4.684438448405435e-06, + "loss": 0.4069, + "step": 16012 + }, + { + "epoch": 0.9751240751453887, + "grad_norm": 0.9056320994634349, + "learning_rate": 4.6843996402233685e-06, + "loss": 0.4821, + "step": 16013 + }, + { + "epoch": 0.9751849709222665, + "grad_norm": 1.070811361098962, + "learning_rate": 4.684360829815881e-06, + "loss": 0.3921, + "step": 16014 + }, + { + "epoch": 0.9752458666991444, + "grad_norm": 1.132787129601741, + "learning_rate": 4.684322017183013e-06, + "loss": 0.3407, + "step": 16015 + }, + { + "epoch": 0.9753067624760223, + "grad_norm": 0.9629370840845904, + "learning_rate": 4.684283202324803e-06, + "loss": 0.4623, + "step": 16016 + }, + { + "epoch": 0.9753676582529002, + "grad_norm": 1.0319460595177392, + "learning_rate": 4.68424438524129e-06, + "loss": 0.3712, + "step": 16017 + }, + { + "epoch": 0.975428554029778, + "grad_norm": 0.9710202993822502, + "learning_rate": 4.684205565932514e-06, + "loss": 0.3631, + "step": 16018 + }, + { + "epoch": 0.9754894498066559, + "grad_norm": 0.9532413263246974, + "learning_rate": 4.684166744398517e-06, + "loss": 0.4756, + "step": 16019 + }, + { + "epoch": 0.9755503455835338, + "grad_norm": 1.0028067739005075, + "learning_rate": 4.684127920639334e-06, + "loss": 0.3804, + "step": 16020 + }, + { + "epoch": 0.9756112413604117, + "grad_norm": 1.016908328137009, + "learning_rate": 4.684089094655007e-06, + "loss": 0.444, + "step": 16021 + }, + { + "epoch": 0.9756721371372895, + "grad_norm": 1.0171072179963674, + "learning_rate": 4.684050266445577e-06, + "loss": 0.3616, + "step": 16022 + }, + { + "epoch": 0.9757330329141674, + "grad_norm": 1.0410487637113808, + "learning_rate": 4.684011436011081e-06, + "loss": 0.3861, + "step": 16023 + }, + { + "epoch": 0.9757939286910453, + "grad_norm": 1.0052877869995331, + "learning_rate": 4.6839726033515595e-06, + "loss": 0.3807, + "step": 16024 + }, + { + "epoch": 0.9758548244679232, + "grad_norm": 0.9777534449888076, + "learning_rate": 4.683933768467052e-06, + "loss": 0.3731, + "step": 16025 + }, + { + "epoch": 0.975915720244801, + "grad_norm": 1.0509687262710758, + "learning_rate": 4.683894931357598e-06, + "loss": 0.3571, + "step": 16026 + }, + { + "epoch": 0.9759766160216788, + "grad_norm": 1.0456952324255628, + "learning_rate": 4.683856092023238e-06, + "loss": 0.3664, + "step": 16027 + }, + { + "epoch": 0.9760375117985568, + "grad_norm": 0.9706651837738491, + "learning_rate": 4.683817250464011e-06, + "loss": 0.3833, + "step": 16028 + }, + { + "epoch": 0.9760984075754346, + "grad_norm": 1.0365588028151853, + "learning_rate": 4.6837784066799555e-06, + "loss": 0.4283, + "step": 16029 + }, + { + "epoch": 0.9761593033523125, + "grad_norm": 0.9745347314261119, + "learning_rate": 4.683739560671112e-06, + "loss": 0.4422, + "step": 16030 + }, + { + "epoch": 0.9762201991291903, + "grad_norm": 1.0313202721852124, + "learning_rate": 4.683700712437521e-06, + "loss": 0.3525, + "step": 16031 + }, + { + "epoch": 0.9762810949060683, + "grad_norm": 1.035498949406615, + "learning_rate": 4.68366186197922e-06, + "loss": 0.4596, + "step": 16032 + }, + { + "epoch": 0.9763419906829461, + "grad_norm": 0.9612178120577987, + "learning_rate": 4.683623009296251e-06, + "loss": 0.4352, + "step": 16033 + }, + { + "epoch": 0.976402886459824, + "grad_norm": 0.9797806864327576, + "learning_rate": 4.68358415438865e-06, + "loss": 0.3771, + "step": 16034 + }, + { + "epoch": 0.9764637822367019, + "grad_norm": 0.9752047859004709, + "learning_rate": 4.683545297256461e-06, + "loss": 0.4663, + "step": 16035 + }, + { + "epoch": 0.9765246780135798, + "grad_norm": 0.9731185786964154, + "learning_rate": 4.683506437899721e-06, + "loss": 0.4457, + "step": 16036 + }, + { + "epoch": 0.9765855737904576, + "grad_norm": 0.9933468747484472, + "learning_rate": 4.68346757631847e-06, + "loss": 0.4436, + "step": 16037 + }, + { + "epoch": 0.9766464695673355, + "grad_norm": 1.0240317952931144, + "learning_rate": 4.683428712512748e-06, + "loss": 0.3898, + "step": 16038 + }, + { + "epoch": 0.9767073653442134, + "grad_norm": 1.0264056271255824, + "learning_rate": 4.6833898464825925e-06, + "loss": 0.4137, + "step": 16039 + }, + { + "epoch": 0.9767682611210913, + "grad_norm": 0.9631616144200231, + "learning_rate": 4.6833509782280464e-06, + "loss": 0.4178, + "step": 16040 + }, + { + "epoch": 0.9768291568979691, + "grad_norm": 0.9530754508083609, + "learning_rate": 4.683312107749147e-06, + "loss": 0.4222, + "step": 16041 + }, + { + "epoch": 0.976890052674847, + "grad_norm": 1.0672079009812212, + "learning_rate": 4.6832732350459356e-06, + "loss": 0.3655, + "step": 16042 + }, + { + "epoch": 0.9769509484517249, + "grad_norm": 0.9592154616443241, + "learning_rate": 4.6832343601184495e-06, + "loss": 0.3928, + "step": 16043 + }, + { + "epoch": 0.9770118442286028, + "grad_norm": 1.0079271612807843, + "learning_rate": 4.68319548296673e-06, + "loss": 0.4037, + "step": 16044 + }, + { + "epoch": 0.9770727400054806, + "grad_norm": 1.0143981060373526, + "learning_rate": 4.683156603590817e-06, + "loss": 0.4179, + "step": 16045 + }, + { + "epoch": 0.9771336357823585, + "grad_norm": 1.0190840492449915, + "learning_rate": 4.6831177219907485e-06, + "loss": 0.4234, + "step": 16046 + }, + { + "epoch": 0.9771945315592364, + "grad_norm": 1.0865168038226711, + "learning_rate": 4.683078838166566e-06, + "loss": 0.357, + "step": 16047 + }, + { + "epoch": 0.9772554273361143, + "grad_norm": 1.0384922017307576, + "learning_rate": 4.683039952118308e-06, + "loss": 0.4317, + "step": 16048 + }, + { + "epoch": 0.9773163231129921, + "grad_norm": 0.9503428416860005, + "learning_rate": 4.683001063846014e-06, + "loss": 0.4242, + "step": 16049 + }, + { + "epoch": 0.97737721888987, + "grad_norm": 1.0283600079776865, + "learning_rate": 4.682962173349724e-06, + "loss": 0.391, + "step": 16050 + }, + { + "epoch": 0.9774381146667479, + "grad_norm": 0.8816529970139632, + "learning_rate": 4.682923280629477e-06, + "loss": 0.4116, + "step": 16051 + }, + { + "epoch": 0.9774990104436257, + "grad_norm": 0.95985122434048, + "learning_rate": 4.682884385685313e-06, + "loss": 0.4317, + "step": 16052 + }, + { + "epoch": 0.9775599062205036, + "grad_norm": 1.0209106229034313, + "learning_rate": 4.682845488517273e-06, + "loss": 0.3822, + "step": 16053 + }, + { + "epoch": 0.9776208019973814, + "grad_norm": 0.9375500101544937, + "learning_rate": 4.682806589125395e-06, + "loss": 0.4777, + "step": 16054 + }, + { + "epoch": 0.9776816977742594, + "grad_norm": 0.9150929748119551, + "learning_rate": 4.682767687509719e-06, + "loss": 0.4126, + "step": 16055 + }, + { + "epoch": 0.9777425935511372, + "grad_norm": 0.9337011877757427, + "learning_rate": 4.682728783670284e-06, + "loss": 0.4687, + "step": 16056 + }, + { + "epoch": 0.9778034893280151, + "grad_norm": 1.0577286704055204, + "learning_rate": 4.682689877607131e-06, + "loss": 0.3506, + "step": 16057 + }, + { + "epoch": 0.9778643851048929, + "grad_norm": 1.1379819731834566, + "learning_rate": 4.682650969320298e-06, + "loss": 0.3584, + "step": 16058 + }, + { + "epoch": 0.9779252808817709, + "grad_norm": 0.9222804390114773, + "learning_rate": 4.682612058809826e-06, + "loss": 0.459, + "step": 16059 + }, + { + "epoch": 0.9779861766586487, + "grad_norm": 1.033357419555169, + "learning_rate": 4.682573146075755e-06, + "loss": 0.3622, + "step": 16060 + }, + { + "epoch": 0.9780470724355266, + "grad_norm": 0.982177034213407, + "learning_rate": 4.682534231118123e-06, + "loss": 0.4596, + "step": 16061 + }, + { + "epoch": 0.9781079682124044, + "grad_norm": 1.0305455332771616, + "learning_rate": 4.682495313936971e-06, + "loss": 0.3327, + "step": 16062 + }, + { + "epoch": 0.9781688639892824, + "grad_norm": 1.0270135803702636, + "learning_rate": 4.682456394532339e-06, + "loss": 0.4613, + "step": 16063 + }, + { + "epoch": 0.9782297597661602, + "grad_norm": 1.07859134807598, + "learning_rate": 4.682417472904264e-06, + "loss": 0.3866, + "step": 16064 + }, + { + "epoch": 0.9782906555430381, + "grad_norm": 0.9823713710759832, + "learning_rate": 4.682378549052788e-06, + "loss": 0.3662, + "step": 16065 + }, + { + "epoch": 0.9783515513199159, + "grad_norm": 1.0982957021478394, + "learning_rate": 4.68233962297795e-06, + "loss": 0.3778, + "step": 16066 + }, + { + "epoch": 0.9784124470967939, + "grad_norm": 1.1117175401114243, + "learning_rate": 4.682300694679791e-06, + "loss": 0.4111, + "step": 16067 + }, + { + "epoch": 0.9784733428736717, + "grad_norm": 0.982938359540381, + "learning_rate": 4.6822617641583476e-06, + "loss": 0.3754, + "step": 16068 + }, + { + "epoch": 0.9785342386505496, + "grad_norm": 1.038040913090864, + "learning_rate": 4.682222831413663e-06, + "loss": 0.4205, + "step": 16069 + }, + { + "epoch": 0.9785951344274274, + "grad_norm": 1.0721407747758993, + "learning_rate": 4.682183896445775e-06, + "loss": 0.3953, + "step": 16070 + }, + { + "epoch": 0.9786560302043054, + "grad_norm": 0.9601703143918208, + "learning_rate": 4.682144959254722e-06, + "loss": 0.3689, + "step": 16071 + }, + { + "epoch": 0.9787169259811832, + "grad_norm": 1.0024826632551733, + "learning_rate": 4.682106019840545e-06, + "loss": 0.4083, + "step": 16072 + }, + { + "epoch": 0.978777821758061, + "grad_norm": 0.986938358871002, + "learning_rate": 4.682067078203285e-06, + "loss": 0.4178, + "step": 16073 + }, + { + "epoch": 0.978838717534939, + "grad_norm": 1.0135808545810367, + "learning_rate": 4.682028134342981e-06, + "loss": 0.42, + "step": 16074 + }, + { + "epoch": 0.9788996133118169, + "grad_norm": 0.9500175526140742, + "learning_rate": 4.68198918825967e-06, + "loss": 0.3962, + "step": 16075 + }, + { + "epoch": 0.9789605090886947, + "grad_norm": 1.0466739293789553, + "learning_rate": 4.6819502399533955e-06, + "loss": 0.354, + "step": 16076 + }, + { + "epoch": 0.9790214048655725, + "grad_norm": 1.040095040743104, + "learning_rate": 4.681911289424195e-06, + "loss": 0.419, + "step": 16077 + }, + { + "epoch": 0.9790823006424505, + "grad_norm": 1.0532956551317816, + "learning_rate": 4.681872336672108e-06, + "loss": 0.3961, + "step": 16078 + }, + { + "epoch": 0.9791431964193283, + "grad_norm": 0.9712483846083861, + "learning_rate": 4.6818333816971755e-06, + "loss": 0.4351, + "step": 16079 + }, + { + "epoch": 0.9792040921962062, + "grad_norm": 1.1072268782268198, + "learning_rate": 4.681794424499436e-06, + "loss": 0.3454, + "step": 16080 + }, + { + "epoch": 0.979264987973084, + "grad_norm": 0.9591100671626004, + "learning_rate": 4.68175546507893e-06, + "loss": 0.4756, + "step": 16081 + }, + { + "epoch": 0.979325883749962, + "grad_norm": 0.9997700439303207, + "learning_rate": 4.681716503435697e-06, + "loss": 0.3936, + "step": 16082 + }, + { + "epoch": 0.9793867795268398, + "grad_norm": 1.0006258904805212, + "learning_rate": 4.6816775395697766e-06, + "loss": 0.4062, + "step": 16083 + }, + { + "epoch": 0.9794476753037177, + "grad_norm": 0.986567552035929, + "learning_rate": 4.681638573481209e-06, + "loss": 0.4059, + "step": 16084 + }, + { + "epoch": 0.9795085710805955, + "grad_norm": 0.8847233075292745, + "learning_rate": 4.681599605170033e-06, + "loss": 0.421, + "step": 16085 + }, + { + "epoch": 0.9795694668574735, + "grad_norm": 0.923586534415693, + "learning_rate": 4.681560634636288e-06, + "loss": 0.4319, + "step": 16086 + }, + { + "epoch": 0.9796303626343513, + "grad_norm": 1.000473396834949, + "learning_rate": 4.681521661880015e-06, + "loss": 0.3932, + "step": 16087 + }, + { + "epoch": 0.9796912584112292, + "grad_norm": 0.9796855113818156, + "learning_rate": 4.681482686901253e-06, + "loss": 0.4343, + "step": 16088 + }, + { + "epoch": 0.979752154188107, + "grad_norm": 1.0487325640875638, + "learning_rate": 4.681443709700041e-06, + "loss": 0.4442, + "step": 16089 + }, + { + "epoch": 0.979813049964985, + "grad_norm": 1.1067579600616035, + "learning_rate": 4.681404730276421e-06, + "loss": 0.3424, + "step": 16090 + }, + { + "epoch": 0.9798739457418628, + "grad_norm": 0.9809068158903119, + "learning_rate": 4.68136574863043e-06, + "loss": 0.3976, + "step": 16091 + }, + { + "epoch": 0.9799348415187407, + "grad_norm": 0.9882188067801941, + "learning_rate": 4.68132676476211e-06, + "loss": 0.3704, + "step": 16092 + }, + { + "epoch": 0.9799957372956185, + "grad_norm": 0.9483875069296097, + "learning_rate": 4.681287778671498e-06, + "loss": 0.4677, + "step": 16093 + }, + { + "epoch": 0.9800566330724965, + "grad_norm": 0.9850465541479403, + "learning_rate": 4.681248790358637e-06, + "loss": 0.3428, + "step": 16094 + }, + { + "epoch": 0.9801175288493743, + "grad_norm": 0.9965540536160948, + "learning_rate": 4.681209799823564e-06, + "loss": 0.4024, + "step": 16095 + }, + { + "epoch": 0.9801784246262522, + "grad_norm": 1.086147861315818, + "learning_rate": 4.681170807066321e-06, + "loss": 0.3623, + "step": 16096 + }, + { + "epoch": 0.98023932040313, + "grad_norm": 0.974569709940748, + "learning_rate": 4.681131812086945e-06, + "loss": 0.4729, + "step": 16097 + }, + { + "epoch": 0.980300216180008, + "grad_norm": 1.0420445326599375, + "learning_rate": 4.6810928148854784e-06, + "loss": 0.3781, + "step": 16098 + }, + { + "epoch": 0.9803611119568858, + "grad_norm": 0.9581006188209618, + "learning_rate": 4.68105381546196e-06, + "loss": 0.4588, + "step": 16099 + }, + { + "epoch": 0.9804220077337636, + "grad_norm": 1.0512932397991577, + "learning_rate": 4.681014813816428e-06, + "loss": 0.3714, + "step": 16100 + }, + { + "epoch": 0.9804829035106415, + "grad_norm": 0.9860190586794039, + "learning_rate": 4.680975809948925e-06, + "loss": 0.4234, + "step": 16101 + }, + { + "epoch": 0.9805437992875194, + "grad_norm": 1.0199980204936179, + "learning_rate": 4.680936803859488e-06, + "loss": 0.4325, + "step": 16102 + }, + { + "epoch": 0.9806046950643973, + "grad_norm": 1.0520195224662927, + "learning_rate": 4.680897795548158e-06, + "loss": 0.3956, + "step": 16103 + }, + { + "epoch": 0.9806655908412751, + "grad_norm": 1.0268495921725582, + "learning_rate": 4.680858785014975e-06, + "loss": 0.3218, + "step": 16104 + }, + { + "epoch": 0.980726486618153, + "grad_norm": 0.9578661436979796, + "learning_rate": 4.680819772259978e-06, + "loss": 0.4214, + "step": 16105 + }, + { + "epoch": 0.9807873823950309, + "grad_norm": 1.0167395367763374, + "learning_rate": 4.6807807572832085e-06, + "loss": 0.3483, + "step": 16106 + }, + { + "epoch": 0.9808482781719088, + "grad_norm": 1.0849584240189116, + "learning_rate": 4.6807417400847034e-06, + "loss": 0.3724, + "step": 16107 + }, + { + "epoch": 0.9809091739487866, + "grad_norm": 0.9155856847204409, + "learning_rate": 4.6807027206645054e-06, + "loss": 0.3965, + "step": 16108 + }, + { + "epoch": 0.9809700697256645, + "grad_norm": 1.0047670236376236, + "learning_rate": 4.680663699022651e-06, + "loss": 0.3891, + "step": 16109 + }, + { + "epoch": 0.9810309655025424, + "grad_norm": 0.9708547805231074, + "learning_rate": 4.6806246751591824e-06, + "loss": 0.3698, + "step": 16110 + }, + { + "epoch": 0.9810918612794203, + "grad_norm": 1.0640191062441249, + "learning_rate": 4.68058564907414e-06, + "loss": 0.4137, + "step": 16111 + }, + { + "epoch": 0.9811527570562981, + "grad_norm": 1.010296658180209, + "learning_rate": 4.6805466207675605e-06, + "loss": 0.383, + "step": 16112 + }, + { + "epoch": 0.981213652833176, + "grad_norm": 0.9719134590426363, + "learning_rate": 4.680507590239487e-06, + "loss": 0.3553, + "step": 16113 + }, + { + "epoch": 0.9812745486100539, + "grad_norm": 0.9051143568237352, + "learning_rate": 4.6804685574899566e-06, + "loss": 0.3869, + "step": 16114 + }, + { + "epoch": 0.9813354443869318, + "grad_norm": 1.022842607772571, + "learning_rate": 4.6804295225190095e-06, + "loss": 0.4342, + "step": 16115 + }, + { + "epoch": 0.9813963401638096, + "grad_norm": 1.077251120378669, + "learning_rate": 4.680390485326688e-06, + "loss": 0.3913, + "step": 16116 + }, + { + "epoch": 0.9814572359406876, + "grad_norm": 0.917600285610873, + "learning_rate": 4.680351445913029e-06, + "loss": 0.5001, + "step": 16117 + }, + { + "epoch": 0.9815181317175654, + "grad_norm": 0.9587024442206771, + "learning_rate": 4.680312404278074e-06, + "loss": 0.4067, + "step": 16118 + }, + { + "epoch": 0.9815790274944433, + "grad_norm": 0.9772028519253518, + "learning_rate": 4.680273360421861e-06, + "loss": 0.4616, + "step": 16119 + }, + { + "epoch": 0.9816399232713211, + "grad_norm": 0.972935781132346, + "learning_rate": 4.680234314344431e-06, + "loss": 0.443, + "step": 16120 + }, + { + "epoch": 0.9817008190481991, + "grad_norm": 0.9846664000713535, + "learning_rate": 4.680195266045824e-06, + "loss": 0.4166, + "step": 16121 + }, + { + "epoch": 0.9817617148250769, + "grad_norm": 1.0081747313263045, + "learning_rate": 4.680156215526079e-06, + "loss": 0.4241, + "step": 16122 + }, + { + "epoch": 0.9818226106019547, + "grad_norm": 1.00919927557625, + "learning_rate": 4.6801171627852365e-06, + "loss": 0.3359, + "step": 16123 + }, + { + "epoch": 0.9818835063788326, + "grad_norm": 1.1754687985626877, + "learning_rate": 4.680078107823336e-06, + "loss": 0.3967, + "step": 16124 + }, + { + "epoch": 0.9819444021557105, + "grad_norm": 0.951413644728912, + "learning_rate": 4.680039050640417e-06, + "loss": 0.4555, + "step": 16125 + }, + { + "epoch": 0.9820052979325884, + "grad_norm": 1.0205485398077945, + "learning_rate": 4.67999999123652e-06, + "loss": 0.4468, + "step": 16126 + }, + { + "epoch": 0.9820661937094662, + "grad_norm": 0.9598223799294235, + "learning_rate": 4.679960929611684e-06, + "loss": 0.4091, + "step": 16127 + }, + { + "epoch": 0.9821270894863441, + "grad_norm": 0.9935425064404672, + "learning_rate": 4.679921865765949e-06, + "loss": 0.4437, + "step": 16128 + }, + { + "epoch": 0.982187985263222, + "grad_norm": 1.0177066634659662, + "learning_rate": 4.679882799699355e-06, + "loss": 0.4051, + "step": 16129 + }, + { + "epoch": 0.9822488810400999, + "grad_norm": 1.0050184648421094, + "learning_rate": 4.679843731411942e-06, + "loss": 0.421, + "step": 16130 + }, + { + "epoch": 0.9823097768169777, + "grad_norm": 0.9373526788419596, + "learning_rate": 4.6798046609037495e-06, + "loss": 0.4159, + "step": 16131 + }, + { + "epoch": 0.9823706725938556, + "grad_norm": 1.0370747050299767, + "learning_rate": 4.679765588174817e-06, + "loss": 0.3774, + "step": 16132 + }, + { + "epoch": 0.9824315683707335, + "grad_norm": 1.0387634827386758, + "learning_rate": 4.679726513225185e-06, + "loss": 0.3615, + "step": 16133 + }, + { + "epoch": 0.9824924641476114, + "grad_norm": 0.9990885140282008, + "learning_rate": 4.679687436054893e-06, + "loss": 0.3916, + "step": 16134 + }, + { + "epoch": 0.9825533599244892, + "grad_norm": 0.9850003507870537, + "learning_rate": 4.67964835666398e-06, + "loss": 0.3974, + "step": 16135 + }, + { + "epoch": 0.9826142557013671, + "grad_norm": 1.0243744323561415, + "learning_rate": 4.679609275052487e-06, + "loss": 0.3245, + "step": 16136 + }, + { + "epoch": 0.982675151478245, + "grad_norm": 1.024885198554054, + "learning_rate": 4.679570191220454e-06, + "loss": 0.4235, + "step": 16137 + }, + { + "epoch": 0.9827360472551229, + "grad_norm": 1.0046802751710628, + "learning_rate": 4.6795311051679195e-06, + "loss": 0.3798, + "step": 16138 + }, + { + "epoch": 0.9827969430320007, + "grad_norm": 1.0561550069015258, + "learning_rate": 4.679492016894924e-06, + "loss": 0.4443, + "step": 16139 + }, + { + "epoch": 0.9828578388088786, + "grad_norm": 1.0532830254304235, + "learning_rate": 4.6794529264015076e-06, + "loss": 0.3456, + "step": 16140 + }, + { + "epoch": 0.9829187345857565, + "grad_norm": 1.0292807559862742, + "learning_rate": 4.6794138336877096e-06, + "loss": 0.4771, + "step": 16141 + }, + { + "epoch": 0.9829796303626344, + "grad_norm": 1.0383322886314648, + "learning_rate": 4.679374738753571e-06, + "loss": 0.3654, + "step": 16142 + }, + { + "epoch": 0.9830405261395122, + "grad_norm": 1.0083676325324407, + "learning_rate": 4.6793356415991295e-06, + "loss": 0.3967, + "step": 16143 + }, + { + "epoch": 0.98310142191639, + "grad_norm": 1.0055223413961543, + "learning_rate": 4.679296542224426e-06, + "loss": 0.4801, + "step": 16144 + }, + { + "epoch": 0.983162317693268, + "grad_norm": 1.0066370695198557, + "learning_rate": 4.679257440629502e-06, + "loss": 0.3871, + "step": 16145 + }, + { + "epoch": 0.9832232134701459, + "grad_norm": 0.9884705761222798, + "learning_rate": 4.679218336814395e-06, + "loss": 0.4462, + "step": 16146 + }, + { + "epoch": 0.9832841092470237, + "grad_norm": 0.9981776057508186, + "learning_rate": 4.679179230779145e-06, + "loss": 0.4691, + "step": 16147 + }, + { + "epoch": 0.9833450050239015, + "grad_norm": 0.9959897534524961, + "learning_rate": 4.679140122523794e-06, + "loss": 0.4283, + "step": 16148 + }, + { + "epoch": 0.9834059008007795, + "grad_norm": 1.016166488692342, + "learning_rate": 4.67910101204838e-06, + "loss": 0.3559, + "step": 16149 + }, + { + "epoch": 0.9834667965776573, + "grad_norm": 0.9013003195635438, + "learning_rate": 4.6790618993529415e-06, + "loss": 0.3854, + "step": 16150 + }, + { + "epoch": 0.9835276923545352, + "grad_norm": 0.9991644634550092, + "learning_rate": 4.679022784437521e-06, + "loss": 0.4058, + "step": 16151 + }, + { + "epoch": 0.983588588131413, + "grad_norm": 0.9466693126541921, + "learning_rate": 4.678983667302158e-06, + "loss": 0.3766, + "step": 16152 + }, + { + "epoch": 0.983649483908291, + "grad_norm": 0.9725084170082295, + "learning_rate": 4.678944547946892e-06, + "loss": 0.3669, + "step": 16153 + }, + { + "epoch": 0.9837103796851688, + "grad_norm": 0.9456898728804863, + "learning_rate": 4.678905426371761e-06, + "loss": 0.4579, + "step": 16154 + }, + { + "epoch": 0.9837712754620467, + "grad_norm": 0.9619390252295359, + "learning_rate": 4.6788663025768076e-06, + "loss": 0.3686, + "step": 16155 + }, + { + "epoch": 0.9838321712389246, + "grad_norm": 0.966221203757724, + "learning_rate": 4.678827176562071e-06, + "loss": 0.4125, + "step": 16156 + }, + { + "epoch": 0.9838930670158025, + "grad_norm": 0.9901323566887316, + "learning_rate": 4.67878804832759e-06, + "loss": 0.4424, + "step": 16157 + }, + { + "epoch": 0.9839539627926803, + "grad_norm": 1.0514115386523537, + "learning_rate": 4.6787489178734046e-06, + "loss": 0.3518, + "step": 16158 + }, + { + "epoch": 0.9840148585695582, + "grad_norm": 0.9546906619365239, + "learning_rate": 4.678709785199556e-06, + "loss": 0.4743, + "step": 16159 + }, + { + "epoch": 0.9840757543464361, + "grad_norm": 0.9960451244482077, + "learning_rate": 4.678670650306082e-06, + "loss": 0.3687, + "step": 16160 + }, + { + "epoch": 0.984136650123314, + "grad_norm": 0.9731543488979095, + "learning_rate": 4.678631513193025e-06, + "loss": 0.4185, + "step": 16161 + }, + { + "epoch": 0.9841975459001918, + "grad_norm": 0.9600063053610038, + "learning_rate": 4.678592373860423e-06, + "loss": 0.3614, + "step": 16162 + }, + { + "epoch": 0.9842584416770697, + "grad_norm": 1.0171716950058605, + "learning_rate": 4.678553232308315e-06, + "loss": 0.365, + "step": 16163 + }, + { + "epoch": 0.9843193374539476, + "grad_norm": 1.1952843313463575, + "learning_rate": 4.678514088536744e-06, + "loss": 0.4292, + "step": 16164 + }, + { + "epoch": 0.9843802332308255, + "grad_norm": 0.9909746700323528, + "learning_rate": 4.678474942545748e-06, + "loss": 0.3848, + "step": 16165 + }, + { + "epoch": 0.9844411290077033, + "grad_norm": 1.0011021012177936, + "learning_rate": 4.678435794335367e-06, + "loss": 0.3892, + "step": 16166 + }, + { + "epoch": 0.9845020247845812, + "grad_norm": 0.9882362004365979, + "learning_rate": 4.67839664390564e-06, + "loss": 0.4694, + "step": 16167 + }, + { + "epoch": 0.9845629205614591, + "grad_norm": 1.0026602961566797, + "learning_rate": 4.678357491256608e-06, + "loss": 0.4003, + "step": 16168 + }, + { + "epoch": 0.984623816338337, + "grad_norm": 1.0347155292825287, + "learning_rate": 4.678318336388311e-06, + "loss": 0.446, + "step": 16169 + }, + { + "epoch": 0.9846847121152148, + "grad_norm": 0.9918286802109743, + "learning_rate": 4.678279179300788e-06, + "loss": 0.3348, + "step": 16170 + }, + { + "epoch": 0.9847456078920926, + "grad_norm": 0.9431744769046054, + "learning_rate": 4.67824001999408e-06, + "loss": 0.377, + "step": 16171 + }, + { + "epoch": 0.9848065036689706, + "grad_norm": 1.0351802564948431, + "learning_rate": 4.678200858468226e-06, + "loss": 0.3706, + "step": 16172 + }, + { + "epoch": 0.9848673994458484, + "grad_norm": 1.005463947053026, + "learning_rate": 4.678161694723268e-06, + "loss": 0.4018, + "step": 16173 + }, + { + "epoch": 0.9849282952227263, + "grad_norm": 1.0825281472392674, + "learning_rate": 4.678122528759242e-06, + "loss": 0.4084, + "step": 16174 + }, + { + "epoch": 0.9849891909996041, + "grad_norm": 0.9725223586242351, + "learning_rate": 4.678083360576191e-06, + "loss": 0.409, + "step": 16175 + }, + { + "epoch": 0.9850500867764821, + "grad_norm": 0.9526734131330167, + "learning_rate": 4.678044190174154e-06, + "loss": 0.4568, + "step": 16176 + }, + { + "epoch": 0.9851109825533599, + "grad_norm": 1.0282217868169534, + "learning_rate": 4.67800501755317e-06, + "loss": 0.4687, + "step": 16177 + }, + { + "epoch": 0.9851718783302378, + "grad_norm": 0.989005063183578, + "learning_rate": 4.67796584271328e-06, + "loss": 0.4079, + "step": 16178 + }, + { + "epoch": 0.9852327741071156, + "grad_norm": 1.025664254256082, + "learning_rate": 4.677926665654524e-06, + "loss": 0.3644, + "step": 16179 + }, + { + "epoch": 0.9852936698839936, + "grad_norm": 1.0764304353065983, + "learning_rate": 4.677887486376942e-06, + "loss": 0.3507, + "step": 16180 + }, + { + "epoch": 0.9853545656608714, + "grad_norm": 0.9615466498082826, + "learning_rate": 4.677848304880573e-06, + "loss": 0.4182, + "step": 16181 + }, + { + "epoch": 0.9854154614377493, + "grad_norm": 1.0181817679781093, + "learning_rate": 4.677809121165458e-06, + "loss": 0.3942, + "step": 16182 + }, + { + "epoch": 0.9854763572146271, + "grad_norm": 1.0226787905624835, + "learning_rate": 4.677769935231635e-06, + "loss": 0.4556, + "step": 16183 + }, + { + "epoch": 0.9855372529915051, + "grad_norm": 1.0518877107974478, + "learning_rate": 4.677730747079147e-06, + "loss": 0.374, + "step": 16184 + }, + { + "epoch": 0.9855981487683829, + "grad_norm": 0.9788495478161697, + "learning_rate": 4.677691556708031e-06, + "loss": 0.4208, + "step": 16185 + }, + { + "epoch": 0.9856590445452608, + "grad_norm": 1.0112756967106205, + "learning_rate": 4.6776523641183285e-06, + "loss": 0.454, + "step": 16186 + }, + { + "epoch": 0.9857199403221386, + "grad_norm": 1.0420856428076417, + "learning_rate": 4.677613169310079e-06, + "loss": 0.3666, + "step": 16187 + }, + { + "epoch": 0.9857808360990166, + "grad_norm": 0.9073857087753471, + "learning_rate": 4.677573972283322e-06, + "loss": 0.4557, + "step": 16188 + }, + { + "epoch": 0.9858417318758944, + "grad_norm": 0.9913517049153884, + "learning_rate": 4.677534773038099e-06, + "loss": 0.3973, + "step": 16189 + }, + { + "epoch": 0.9859026276527723, + "grad_norm": 1.1358573787650539, + "learning_rate": 4.677495571574448e-06, + "loss": 0.3669, + "step": 16190 + }, + { + "epoch": 0.9859635234296501, + "grad_norm": 1.0155112052358368, + "learning_rate": 4.6774563678924105e-06, + "loss": 0.4487, + "step": 16191 + }, + { + "epoch": 0.9860244192065281, + "grad_norm": 1.05152131724092, + "learning_rate": 4.677417161992025e-06, + "loss": 0.3828, + "step": 16192 + }, + { + "epoch": 0.9860853149834059, + "grad_norm": 1.0000123891616424, + "learning_rate": 4.677377953873333e-06, + "loss": 0.405, + "step": 16193 + }, + { + "epoch": 0.9861462107602837, + "grad_norm": 0.8862788136490198, + "learning_rate": 4.677338743536372e-06, + "loss": 0.4228, + "step": 16194 + }, + { + "epoch": 0.9862071065371616, + "grad_norm": 1.164779821431365, + "learning_rate": 4.677299530981185e-06, + "loss": 0.3727, + "step": 16195 + }, + { + "epoch": 0.9862680023140395, + "grad_norm": 1.0029167159644998, + "learning_rate": 4.677260316207811e-06, + "loss": 0.4151, + "step": 16196 + }, + { + "epoch": 0.9863288980909174, + "grad_norm": 0.9875876183585724, + "learning_rate": 4.6772210992162885e-06, + "loss": 0.3882, + "step": 16197 + }, + { + "epoch": 0.9863897938677952, + "grad_norm": 1.028817312912461, + "learning_rate": 4.677181880006658e-06, + "loss": 0.4001, + "step": 16198 + }, + { + "epoch": 0.9864506896446732, + "grad_norm": 0.934247123012719, + "learning_rate": 4.677142658578961e-06, + "loss": 0.4619, + "step": 16199 + }, + { + "epoch": 0.986511585421551, + "grad_norm": 1.0240849132709928, + "learning_rate": 4.677103434933235e-06, + "loss": 0.4431, + "step": 16200 + }, + { + "epoch": 0.9865724811984289, + "grad_norm": 0.9464470446721451, + "learning_rate": 4.677064209069522e-06, + "loss": 0.4381, + "step": 16201 + }, + { + "epoch": 0.9866333769753067, + "grad_norm": 1.00541614281982, + "learning_rate": 4.677024980987861e-06, + "loss": 0.5006, + "step": 16202 + }, + { + "epoch": 0.9866942727521847, + "grad_norm": 0.9638180397331491, + "learning_rate": 4.6769857506882935e-06, + "loss": 0.4448, + "step": 16203 + }, + { + "epoch": 0.9867551685290625, + "grad_norm": 0.9398932825829419, + "learning_rate": 4.676946518170856e-06, + "loss": 0.4307, + "step": 16204 + }, + { + "epoch": 0.9868160643059404, + "grad_norm": 0.9380486109255832, + "learning_rate": 4.676907283435592e-06, + "loss": 0.3884, + "step": 16205 + }, + { + "epoch": 0.9868769600828182, + "grad_norm": 0.9621924635022957, + "learning_rate": 4.67686804648254e-06, + "loss": 0.4657, + "step": 16206 + }, + { + "epoch": 0.9869378558596962, + "grad_norm": 0.9660342502006093, + "learning_rate": 4.676828807311739e-06, + "loss": 0.3937, + "step": 16207 + }, + { + "epoch": 0.986998751636574, + "grad_norm": 1.0687941865528416, + "learning_rate": 4.676789565923232e-06, + "loss": 0.3989, + "step": 16208 + }, + { + "epoch": 0.9870596474134519, + "grad_norm": 0.9190212608055365, + "learning_rate": 4.676750322317056e-06, + "loss": 0.4198, + "step": 16209 + }, + { + "epoch": 0.9871205431903297, + "grad_norm": 1.0172618661496406, + "learning_rate": 4.676711076493252e-06, + "loss": 0.3802, + "step": 16210 + }, + { + "epoch": 0.9871814389672077, + "grad_norm": 1.014059429188701, + "learning_rate": 4.67667182845186e-06, + "loss": 0.4297, + "step": 16211 + }, + { + "epoch": 0.9872423347440855, + "grad_norm": 0.9795919558753168, + "learning_rate": 4.67663257819292e-06, + "loss": 0.4043, + "step": 16212 + }, + { + "epoch": 0.9873032305209634, + "grad_norm": 0.9835405054225602, + "learning_rate": 4.676593325716473e-06, + "loss": 0.4463, + "step": 16213 + }, + { + "epoch": 0.9873641262978412, + "grad_norm": 0.922253135448357, + "learning_rate": 4.676554071022557e-06, + "loss": 0.4658, + "step": 16214 + }, + { + "epoch": 0.9874250220747192, + "grad_norm": 0.9430990731540834, + "learning_rate": 4.676514814111213e-06, + "loss": 0.4371, + "step": 16215 + }, + { + "epoch": 0.987485917851597, + "grad_norm": 1.057143890570593, + "learning_rate": 4.67647555498248e-06, + "loss": 0.4254, + "step": 16216 + }, + { + "epoch": 0.9875468136284749, + "grad_norm": 0.9793937589642389, + "learning_rate": 4.6764362936364005e-06, + "loss": 0.4169, + "step": 16217 + }, + { + "epoch": 0.9876077094053527, + "grad_norm": 0.9197135180667426, + "learning_rate": 4.676397030073012e-06, + "loss": 0.42, + "step": 16218 + }, + { + "epoch": 0.9876686051822307, + "grad_norm": 0.9846217698842454, + "learning_rate": 4.676357764292356e-06, + "loss": 0.4141, + "step": 16219 + }, + { + "epoch": 0.9877295009591085, + "grad_norm": 0.9174668917323406, + "learning_rate": 4.676318496294472e-06, + "loss": 0.449, + "step": 16220 + }, + { + "epoch": 0.9877903967359863, + "grad_norm": 0.9889657879589215, + "learning_rate": 4.6762792260794e-06, + "loss": 0.4522, + "step": 16221 + }, + { + "epoch": 0.9878512925128642, + "grad_norm": 0.9508541306782256, + "learning_rate": 4.6762399536471795e-06, + "loss": 0.3814, + "step": 16222 + }, + { + "epoch": 0.9879121882897421, + "grad_norm": 1.0515767055761887, + "learning_rate": 4.676200678997851e-06, + "loss": 0.3875, + "step": 16223 + }, + { + "epoch": 0.98797308406662, + "grad_norm": 0.9603590053641187, + "learning_rate": 4.676161402131455e-06, + "loss": 0.3932, + "step": 16224 + }, + { + "epoch": 0.9880339798434978, + "grad_norm": 1.0117894813880277, + "learning_rate": 4.67612212304803e-06, + "loss": 0.3874, + "step": 16225 + }, + { + "epoch": 0.9880948756203757, + "grad_norm": 1.0522828902592163, + "learning_rate": 4.676082841747618e-06, + "loss": 0.3727, + "step": 16226 + }, + { + "epoch": 0.9881557713972536, + "grad_norm": 0.9776665044221723, + "learning_rate": 4.676043558230258e-06, + "loss": 0.3737, + "step": 16227 + }, + { + "epoch": 0.9882166671741315, + "grad_norm": 1.0727587903272258, + "learning_rate": 4.6760042724959895e-06, + "loss": 0.376, + "step": 16228 + }, + { + "epoch": 0.9882775629510093, + "grad_norm": 1.0232208801492568, + "learning_rate": 4.675964984544854e-06, + "loss": 0.3801, + "step": 16229 + }, + { + "epoch": 0.9883384587278872, + "grad_norm": 0.9738575270929011, + "learning_rate": 4.67592569437689e-06, + "loss": 0.3957, + "step": 16230 + }, + { + "epoch": 0.9883993545047651, + "grad_norm": 0.9930744858831636, + "learning_rate": 4.675886401992138e-06, + "loss": 0.3897, + "step": 16231 + }, + { + "epoch": 0.988460250281643, + "grad_norm": 0.9968090180715501, + "learning_rate": 4.675847107390639e-06, + "loss": 0.3548, + "step": 16232 + }, + { + "epoch": 0.9885211460585208, + "grad_norm": 1.0300464206882167, + "learning_rate": 4.675807810572431e-06, + "loss": 0.3723, + "step": 16233 + }, + { + "epoch": 0.9885820418353987, + "grad_norm": 1.0486667997720778, + "learning_rate": 4.675768511537556e-06, + "loss": 0.4075, + "step": 16234 + }, + { + "epoch": 0.9886429376122766, + "grad_norm": 0.9823663995492986, + "learning_rate": 4.675729210286053e-06, + "loss": 0.4693, + "step": 16235 + }, + { + "epoch": 0.9887038333891545, + "grad_norm": 0.9680351214269808, + "learning_rate": 4.675689906817962e-06, + "loss": 0.4094, + "step": 16236 + }, + { + "epoch": 0.9887647291660323, + "grad_norm": 1.012288028667421, + "learning_rate": 4.675650601133324e-06, + "loss": 0.4484, + "step": 16237 + }, + { + "epoch": 0.9888256249429103, + "grad_norm": 0.9595766429163871, + "learning_rate": 4.6756112932321785e-06, + "loss": 0.4557, + "step": 16238 + }, + { + "epoch": 0.9888865207197881, + "grad_norm": 0.9380333830208059, + "learning_rate": 4.675571983114565e-06, + "loss": 0.3961, + "step": 16239 + }, + { + "epoch": 0.988947416496666, + "grad_norm": 0.994118688232684, + "learning_rate": 4.675532670780524e-06, + "loss": 0.382, + "step": 16240 + }, + { + "epoch": 0.9890083122735438, + "grad_norm": 1.0027412737144537, + "learning_rate": 4.675493356230095e-06, + "loss": 0.3985, + "step": 16241 + }, + { + "epoch": 0.9890692080504218, + "grad_norm": 1.0628135104746894, + "learning_rate": 4.67545403946332e-06, + "loss": 0.3584, + "step": 16242 + }, + { + "epoch": 0.9891301038272996, + "grad_norm": 0.9383163659044449, + "learning_rate": 4.6754147204802365e-06, + "loss": 0.4462, + "step": 16243 + }, + { + "epoch": 0.9891909996041774, + "grad_norm": 0.9652222608521926, + "learning_rate": 4.675375399280886e-06, + "loss": 0.4032, + "step": 16244 + }, + { + "epoch": 0.9892518953810553, + "grad_norm": 0.9751193039935881, + "learning_rate": 4.675336075865309e-06, + "loss": 0.4586, + "step": 16245 + }, + { + "epoch": 0.9893127911579332, + "grad_norm": 0.9550536119656439, + "learning_rate": 4.675296750233543e-06, + "loss": 0.3988, + "step": 16246 + }, + { + "epoch": 0.9893736869348111, + "grad_norm": 0.9971521914687201, + "learning_rate": 4.675257422385632e-06, + "loss": 0.4601, + "step": 16247 + }, + { + "epoch": 0.9894345827116889, + "grad_norm": 0.9660228717676052, + "learning_rate": 4.675218092321613e-06, + "loss": 0.4205, + "step": 16248 + }, + { + "epoch": 0.9894954784885668, + "grad_norm": 0.9985745948185358, + "learning_rate": 4.675178760041526e-06, + "loss": 0.3904, + "step": 16249 + }, + { + "epoch": 0.9895563742654447, + "grad_norm": 0.9786207379776738, + "learning_rate": 4.675139425545414e-06, + "loss": 0.3652, + "step": 16250 + }, + { + "epoch": 0.9896172700423226, + "grad_norm": 1.0894766890740644, + "learning_rate": 4.675100088833314e-06, + "loss": 0.4207, + "step": 16251 + }, + { + "epoch": 0.9896781658192004, + "grad_norm": 0.9918779810691079, + "learning_rate": 4.675060749905268e-06, + "loss": 0.3709, + "step": 16252 + }, + { + "epoch": 0.9897390615960783, + "grad_norm": 1.0569581006333355, + "learning_rate": 4.675021408761316e-06, + "loss": 0.4384, + "step": 16253 + }, + { + "epoch": 0.9897999573729562, + "grad_norm": 1.0388311084455797, + "learning_rate": 4.674982065401495e-06, + "loss": 0.3873, + "step": 16254 + }, + { + "epoch": 0.9898608531498341, + "grad_norm": 1.0331859436626376, + "learning_rate": 4.674942719825849e-06, + "loss": 0.4238, + "step": 16255 + }, + { + "epoch": 0.9899217489267119, + "grad_norm": 0.9713212918706713, + "learning_rate": 4.6749033720344165e-06, + "loss": 0.4441, + "step": 16256 + }, + { + "epoch": 0.9899826447035898, + "grad_norm": 1.0502130850545415, + "learning_rate": 4.674864022027238e-06, + "loss": 0.3587, + "step": 16257 + }, + { + "epoch": 0.9900435404804677, + "grad_norm": 1.0576056258400464, + "learning_rate": 4.674824669804353e-06, + "loss": 0.4433, + "step": 16258 + }, + { + "epoch": 0.9901044362573456, + "grad_norm": 0.995711838618686, + "learning_rate": 4.674785315365801e-06, + "loss": 0.3834, + "step": 16259 + }, + { + "epoch": 0.9901653320342234, + "grad_norm": 1.058693850794683, + "learning_rate": 4.674745958711624e-06, + "loss": 0.4512, + "step": 16260 + }, + { + "epoch": 0.9902262278111013, + "grad_norm": 1.058882062686604, + "learning_rate": 4.67470659984186e-06, + "loss": 0.3542, + "step": 16261 + }, + { + "epoch": 0.9902871235879792, + "grad_norm": 0.9455845117074854, + "learning_rate": 4.6746672387565515e-06, + "loss": 0.4561, + "step": 16262 + }, + { + "epoch": 0.9903480193648571, + "grad_norm": 1.0477889628798283, + "learning_rate": 4.6746278754557364e-06, + "loss": 0.4046, + "step": 16263 + }, + { + "epoch": 0.9904089151417349, + "grad_norm": 1.045334828589822, + "learning_rate": 4.674588509939456e-06, + "loss": 0.3729, + "step": 16264 + }, + { + "epoch": 0.9904698109186127, + "grad_norm": 1.045387815022639, + "learning_rate": 4.67454914220775e-06, + "loss": 0.3531, + "step": 16265 + }, + { + "epoch": 0.9905307066954907, + "grad_norm": 1.068805419381018, + "learning_rate": 4.6745097722606585e-06, + "loss": 0.4229, + "step": 16266 + }, + { + "epoch": 0.9905916024723685, + "grad_norm": 1.0297120174611025, + "learning_rate": 4.674470400098221e-06, + "loss": 0.3651, + "step": 16267 + }, + { + "epoch": 0.9906524982492464, + "grad_norm": 1.0037499631015478, + "learning_rate": 4.6744310257204785e-06, + "loss": 0.3381, + "step": 16268 + }, + { + "epoch": 0.9907133940261242, + "grad_norm": 0.9429108554023938, + "learning_rate": 4.674391649127471e-06, + "loss": 0.4049, + "step": 16269 + }, + { + "epoch": 0.9907742898030022, + "grad_norm": 1.048716010246691, + "learning_rate": 4.674352270319239e-06, + "loss": 0.3775, + "step": 16270 + }, + { + "epoch": 0.99083518557988, + "grad_norm": 1.087254879621394, + "learning_rate": 4.674312889295821e-06, + "loss": 0.41, + "step": 16271 + }, + { + "epoch": 0.9908960813567579, + "grad_norm": 1.010346845811098, + "learning_rate": 4.67427350605726e-06, + "loss": 0.3114, + "step": 16272 + }, + { + "epoch": 0.9909569771336357, + "grad_norm": 0.9940983342052173, + "learning_rate": 4.6742341206035925e-06, + "loss": 0.3603, + "step": 16273 + }, + { + "epoch": 0.9910178729105137, + "grad_norm": 1.0463912584497175, + "learning_rate": 4.674194732934861e-06, + "loss": 0.4234, + "step": 16274 + }, + { + "epoch": 0.9910787686873915, + "grad_norm": 1.0975208322177818, + "learning_rate": 4.674155343051105e-06, + "loss": 0.3633, + "step": 16275 + }, + { + "epoch": 0.9911396644642694, + "grad_norm": 1.0539020486175188, + "learning_rate": 4.674115950952366e-06, + "loss": 0.3935, + "step": 16276 + }, + { + "epoch": 0.9912005602411472, + "grad_norm": 1.0212191875958059, + "learning_rate": 4.674076556638682e-06, + "loss": 0.3857, + "step": 16277 + }, + { + "epoch": 0.9912614560180252, + "grad_norm": 1.0007857106958076, + "learning_rate": 4.6740371601100946e-06, + "loss": 0.4116, + "step": 16278 + }, + { + "epoch": 0.991322351794903, + "grad_norm": 0.9560539274369929, + "learning_rate": 4.673997761366642e-06, + "loss": 0.4426, + "step": 16279 + }, + { + "epoch": 0.9913832475717809, + "grad_norm": 0.9681195240652467, + "learning_rate": 4.673958360408367e-06, + "loss": 0.3437, + "step": 16280 + }, + { + "epoch": 0.9914441433486588, + "grad_norm": 0.9376118157833015, + "learning_rate": 4.673918957235307e-06, + "loss": 0.4358, + "step": 16281 + }, + { + "epoch": 0.9915050391255367, + "grad_norm": 1.088477901801933, + "learning_rate": 4.673879551847505e-06, + "loss": 0.4287, + "step": 16282 + }, + { + "epoch": 0.9915659349024145, + "grad_norm": 1.0542361561539992, + "learning_rate": 4.673840144244999e-06, + "loss": 0.4196, + "step": 16283 + }, + { + "epoch": 0.9916268306792924, + "grad_norm": 1.0166804739813673, + "learning_rate": 4.67380073442783e-06, + "loss": 0.4449, + "step": 16284 + }, + { + "epoch": 0.9916877264561703, + "grad_norm": 1.0871946062490336, + "learning_rate": 4.673761322396038e-06, + "loss": 0.3604, + "step": 16285 + }, + { + "epoch": 0.9917486222330482, + "grad_norm": 1.089631797583479, + "learning_rate": 4.673721908149663e-06, + "loss": 0.3832, + "step": 16286 + }, + { + "epoch": 0.991809518009926, + "grad_norm": 0.9313172533764063, + "learning_rate": 4.673682491688746e-06, + "loss": 0.4965, + "step": 16287 + }, + { + "epoch": 0.9918704137868039, + "grad_norm": 1.032343556521044, + "learning_rate": 4.673643073013325e-06, + "loss": 0.4143, + "step": 16288 + }, + { + "epoch": 0.9919313095636818, + "grad_norm": 0.8816777311605312, + "learning_rate": 4.673603652123443e-06, + "loss": 0.4712, + "step": 16289 + }, + { + "epoch": 0.9919922053405597, + "grad_norm": 1.0215811968922381, + "learning_rate": 4.673564229019139e-06, + "loss": 0.3866, + "step": 16290 + }, + { + "epoch": 0.9920531011174375, + "grad_norm": 1.0082187856503215, + "learning_rate": 4.673524803700452e-06, + "loss": 0.3834, + "step": 16291 + }, + { + "epoch": 0.9921139968943153, + "grad_norm": 1.0788121597477218, + "learning_rate": 4.673485376167423e-06, + "loss": 0.3807, + "step": 16292 + }, + { + "epoch": 0.9921748926711933, + "grad_norm": 0.9656254906201728, + "learning_rate": 4.673445946420093e-06, + "loss": 0.384, + "step": 16293 + }, + { + "epoch": 0.9922357884480711, + "grad_norm": 0.9351576119490655, + "learning_rate": 4.673406514458501e-06, + "loss": 0.4762, + "step": 16294 + }, + { + "epoch": 0.992296684224949, + "grad_norm": 1.0331636279333793, + "learning_rate": 4.673367080282688e-06, + "loss": 0.3496, + "step": 16295 + }, + { + "epoch": 0.9923575800018268, + "grad_norm": 0.9649964983867222, + "learning_rate": 4.673327643892693e-06, + "loss": 0.3909, + "step": 16296 + }, + { + "epoch": 0.9924184757787048, + "grad_norm": 0.9874064750430391, + "learning_rate": 4.673288205288558e-06, + "loss": 0.4001, + "step": 16297 + }, + { + "epoch": 0.9924793715555826, + "grad_norm": 1.1034046345402293, + "learning_rate": 4.673248764470322e-06, + "loss": 0.4108, + "step": 16298 + }, + { + "epoch": 0.9925402673324605, + "grad_norm": 1.0104716180378681, + "learning_rate": 4.673209321438025e-06, + "loss": 0.4031, + "step": 16299 + }, + { + "epoch": 0.9926011631093383, + "grad_norm": 1.0055427585059935, + "learning_rate": 4.673169876191708e-06, + "loss": 0.3588, + "step": 16300 + }, + { + "epoch": 0.9926620588862163, + "grad_norm": 1.0039599996053512, + "learning_rate": 4.67313042873141e-06, + "loss": 0.3987, + "step": 16301 + }, + { + "epoch": 0.9927229546630941, + "grad_norm": 1.0324394408261715, + "learning_rate": 4.673090979057172e-06, + "loss": 0.5085, + "step": 16302 + }, + { + "epoch": 0.992783850439972, + "grad_norm": 1.042344520617762, + "learning_rate": 4.673051527169035e-06, + "loss": 0.4309, + "step": 16303 + }, + { + "epoch": 0.9928447462168498, + "grad_norm": 1.0709503507436016, + "learning_rate": 4.673012073067037e-06, + "loss": 0.4045, + "step": 16304 + }, + { + "epoch": 0.9929056419937278, + "grad_norm": 1.0193122854160725, + "learning_rate": 4.6729726167512206e-06, + "loss": 0.414, + "step": 16305 + }, + { + "epoch": 0.9929665377706056, + "grad_norm": 1.0527142396465807, + "learning_rate": 4.6729331582216245e-06, + "loss": 0.3968, + "step": 16306 + }, + { + "epoch": 0.9930274335474835, + "grad_norm": 0.9593302473500128, + "learning_rate": 4.672893697478288e-06, + "loss": 0.4066, + "step": 16307 + }, + { + "epoch": 0.9930883293243613, + "grad_norm": 1.0440406528333759, + "learning_rate": 4.672854234521254e-06, + "loss": 0.385, + "step": 16308 + }, + { + "epoch": 0.9931492251012393, + "grad_norm": 0.9942177885955104, + "learning_rate": 4.672814769350561e-06, + "loss": 0.3434, + "step": 16309 + }, + { + "epoch": 0.9932101208781171, + "grad_norm": 1.0346839969910682, + "learning_rate": 4.67277530196625e-06, + "loss": 0.3417, + "step": 16310 + }, + { + "epoch": 0.993271016654995, + "grad_norm": 0.9433808844719926, + "learning_rate": 4.67273583236836e-06, + "loss": 0.4159, + "step": 16311 + }, + { + "epoch": 0.9933319124318728, + "grad_norm": 0.9878233813770662, + "learning_rate": 4.672696360556932e-06, + "loss": 0.4439, + "step": 16312 + }, + { + "epoch": 0.9933928082087508, + "grad_norm": 1.0364978064497319, + "learning_rate": 4.672656886532007e-06, + "loss": 0.3355, + "step": 16313 + }, + { + "epoch": 0.9934537039856286, + "grad_norm": 0.9999217265040813, + "learning_rate": 4.6726174102936225e-06, + "loss": 0.421, + "step": 16314 + }, + { + "epoch": 0.9935145997625064, + "grad_norm": 0.9940130745287283, + "learning_rate": 4.672577931841822e-06, + "loss": 0.349, + "step": 16315 + }, + { + "epoch": 0.9935754955393843, + "grad_norm": 1.0401609164790568, + "learning_rate": 4.672538451176643e-06, + "loss": 0.3476, + "step": 16316 + }, + { + "epoch": 0.9936363913162622, + "grad_norm": 1.0099047574044397, + "learning_rate": 4.672498968298128e-06, + "loss": 0.4202, + "step": 16317 + }, + { + "epoch": 0.9936972870931401, + "grad_norm": 0.9872749304097747, + "learning_rate": 4.672459483206316e-06, + "loss": 0.3921, + "step": 16318 + }, + { + "epoch": 0.9937581828700179, + "grad_norm": 1.037475639215681, + "learning_rate": 4.6724199959012474e-06, + "loss": 0.348, + "step": 16319 + }, + { + "epoch": 0.9938190786468959, + "grad_norm": 1.002517908461545, + "learning_rate": 4.672380506382963e-06, + "loss": 0.4794, + "step": 16320 + }, + { + "epoch": 0.9938799744237737, + "grad_norm": 1.0530432539505288, + "learning_rate": 4.672341014651502e-06, + "loss": 0.3368, + "step": 16321 + }, + { + "epoch": 0.9939408702006516, + "grad_norm": 0.9732399869834257, + "learning_rate": 4.6723015207069055e-06, + "loss": 0.4205, + "step": 16322 + }, + { + "epoch": 0.9940017659775294, + "grad_norm": 1.045106658192609, + "learning_rate": 4.672262024549213e-06, + "loss": 0.3985, + "step": 16323 + }, + { + "epoch": 0.9940626617544074, + "grad_norm": 1.0895904773480876, + "learning_rate": 4.6722225261784644e-06, + "loss": 0.3688, + "step": 16324 + }, + { + "epoch": 0.9941235575312852, + "grad_norm": 0.9682953590148965, + "learning_rate": 4.672183025594702e-06, + "loss": 0.475, + "step": 16325 + }, + { + "epoch": 0.9941844533081631, + "grad_norm": 0.9529084986780416, + "learning_rate": 4.672143522797964e-06, + "loss": 0.4622, + "step": 16326 + }, + { + "epoch": 0.9942453490850409, + "grad_norm": 0.9810007894919436, + "learning_rate": 4.672104017788292e-06, + "loss": 0.4108, + "step": 16327 + }, + { + "epoch": 0.9943062448619189, + "grad_norm": 1.069183184988766, + "learning_rate": 4.672064510565725e-06, + "loss": 0.4384, + "step": 16328 + }, + { + "epoch": 0.9943671406387967, + "grad_norm": 0.9309586033970184, + "learning_rate": 4.672025001130304e-06, + "loss": 0.4234, + "step": 16329 + }, + { + "epoch": 0.9944280364156746, + "grad_norm": 0.9803140487046301, + "learning_rate": 4.671985489482069e-06, + "loss": 0.3775, + "step": 16330 + }, + { + "epoch": 0.9944889321925524, + "grad_norm": 1.0324257934129235, + "learning_rate": 4.67194597562106e-06, + "loss": 0.3666, + "step": 16331 + }, + { + "epoch": 0.9945498279694304, + "grad_norm": 1.0292243410557242, + "learning_rate": 4.671906459547319e-06, + "loss": 0.4693, + "step": 16332 + }, + { + "epoch": 0.9946107237463082, + "grad_norm": 0.9547298523674533, + "learning_rate": 4.671866941260883e-06, + "loss": 0.4473, + "step": 16333 + }, + { + "epoch": 0.9946716195231861, + "grad_norm": 1.0410101238831113, + "learning_rate": 4.671827420761795e-06, + "loss": 0.3463, + "step": 16334 + }, + { + "epoch": 0.9947325153000639, + "grad_norm": 1.0170073206605121, + "learning_rate": 4.671787898050094e-06, + "loss": 0.3802, + "step": 16335 + }, + { + "epoch": 0.9947934110769419, + "grad_norm": 1.0040332017322384, + "learning_rate": 4.671748373125821e-06, + "loss": 0.3739, + "step": 16336 + }, + { + "epoch": 0.9948543068538197, + "grad_norm": 0.9305764389871382, + "learning_rate": 4.6717088459890155e-06, + "loss": 0.4706, + "step": 16337 + }, + { + "epoch": 0.9949152026306975, + "grad_norm": 1.066649946797029, + "learning_rate": 4.671669316639719e-06, + "loss": 0.3241, + "step": 16338 + }, + { + "epoch": 0.9949760984075754, + "grad_norm": 0.9878202871356185, + "learning_rate": 4.6716297850779705e-06, + "loss": 0.3692, + "step": 16339 + }, + { + "epoch": 0.9950369941844533, + "grad_norm": 1.0464913951589545, + "learning_rate": 4.67159025130381e-06, + "loss": 0.4081, + "step": 16340 + }, + { + "epoch": 0.9950978899613312, + "grad_norm": 0.998017930044991, + "learning_rate": 4.67155071531728e-06, + "loss": 0.4032, + "step": 16341 + }, + { + "epoch": 0.995158785738209, + "grad_norm": 0.9994437242289504, + "learning_rate": 4.671511177118419e-06, + "loss": 0.4753, + "step": 16342 + }, + { + "epoch": 0.9952196815150869, + "grad_norm": 0.9578218743138102, + "learning_rate": 4.671471636707267e-06, + "loss": 0.4031, + "step": 16343 + }, + { + "epoch": 0.9952805772919648, + "grad_norm": 0.9807301862954403, + "learning_rate": 4.671432094083864e-06, + "loss": 0.3964, + "step": 16344 + }, + { + "epoch": 0.9953414730688427, + "grad_norm": 1.04508241736064, + "learning_rate": 4.6713925492482525e-06, + "loss": 0.373, + "step": 16345 + }, + { + "epoch": 0.9954023688457205, + "grad_norm": 0.9739951100955638, + "learning_rate": 4.67135300220047e-06, + "loss": 0.3399, + "step": 16346 + }, + { + "epoch": 0.9954632646225984, + "grad_norm": 0.9875117413809921, + "learning_rate": 4.6713134529405595e-06, + "loss": 0.4112, + "step": 16347 + }, + { + "epoch": 0.9955241603994763, + "grad_norm": 0.9820014426903569, + "learning_rate": 4.67127390146856e-06, + "loss": 0.3761, + "step": 16348 + }, + { + "epoch": 0.9955850561763542, + "grad_norm": 0.9592900044563676, + "learning_rate": 4.671234347784511e-06, + "loss": 0.3881, + "step": 16349 + }, + { + "epoch": 0.995645951953232, + "grad_norm": 1.0943232527860531, + "learning_rate": 4.671194791888454e-06, + "loss": 0.386, + "step": 16350 + }, + { + "epoch": 0.9957068477301099, + "grad_norm": 1.0374583679423184, + "learning_rate": 4.671155233780429e-06, + "loss": 0.3919, + "step": 16351 + }, + { + "epoch": 0.9957677435069878, + "grad_norm": 0.9594233777540153, + "learning_rate": 4.671115673460476e-06, + "loss": 0.4099, + "step": 16352 + }, + { + "epoch": 0.9958286392838657, + "grad_norm": 1.0215389808999609, + "learning_rate": 4.671076110928635e-06, + "loss": 0.3976, + "step": 16353 + }, + { + "epoch": 0.9958895350607435, + "grad_norm": 1.0232541294811832, + "learning_rate": 4.671036546184948e-06, + "loss": 0.3674, + "step": 16354 + }, + { + "epoch": 0.9959504308376214, + "grad_norm": 1.0115287969654208, + "learning_rate": 4.670996979229454e-06, + "loss": 0.3629, + "step": 16355 + }, + { + "epoch": 0.9960113266144993, + "grad_norm": 1.0503680985403778, + "learning_rate": 4.670957410062193e-06, + "loss": 0.3587, + "step": 16356 + }, + { + "epoch": 0.9960722223913772, + "grad_norm": 1.034383768771892, + "learning_rate": 4.670917838683205e-06, + "loss": 0.4309, + "step": 16357 + }, + { + "epoch": 0.996133118168255, + "grad_norm": 1.0450714434728452, + "learning_rate": 4.6708782650925314e-06, + "loss": 0.4784, + "step": 16358 + }, + { + "epoch": 0.9961940139451329, + "grad_norm": 1.0107185255483133, + "learning_rate": 4.670838689290212e-06, + "loss": 0.4302, + "step": 16359 + }, + { + "epoch": 0.9962549097220108, + "grad_norm": 1.1875008335532649, + "learning_rate": 4.670799111276289e-06, + "loss": 0.3747, + "step": 16360 + }, + { + "epoch": 0.9963158054988887, + "grad_norm": 0.9629892059434939, + "learning_rate": 4.670759531050799e-06, + "loss": 0.4342, + "step": 16361 + }, + { + "epoch": 0.9963767012757665, + "grad_norm": 1.031352796195665, + "learning_rate": 4.670719948613785e-06, + "loss": 0.3628, + "step": 16362 + }, + { + "epoch": 0.9964375970526445, + "grad_norm": 0.9554157292621482, + "learning_rate": 4.670680363965286e-06, + "loss": 0.4285, + "step": 16363 + }, + { + "epoch": 0.9964984928295223, + "grad_norm": 1.0399704544494819, + "learning_rate": 4.6706407771053444e-06, + "loss": 0.3899, + "step": 16364 + }, + { + "epoch": 0.9965593886064001, + "grad_norm": 1.0844311429024895, + "learning_rate": 4.6706011880339985e-06, + "loss": 0.3914, + "step": 16365 + }, + { + "epoch": 0.996620284383278, + "grad_norm": 1.0344217323527458, + "learning_rate": 4.670561596751288e-06, + "loss": 0.3942, + "step": 16366 + }, + { + "epoch": 0.9966811801601559, + "grad_norm": 0.9326651887319554, + "learning_rate": 4.6705220032572565e-06, + "loss": 0.3986, + "step": 16367 + }, + { + "epoch": 0.9967420759370338, + "grad_norm": 0.9620585839178937, + "learning_rate": 4.670482407551941e-06, + "loss": 0.3408, + "step": 16368 + }, + { + "epoch": 0.9968029717139116, + "grad_norm": 0.9680724028117165, + "learning_rate": 4.670442809635384e-06, + "loss": 0.4608, + "step": 16369 + }, + { + "epoch": 0.9968638674907895, + "grad_norm": 1.0301034152723434, + "learning_rate": 4.670403209507624e-06, + "loss": 0.38, + "step": 16370 + }, + { + "epoch": 0.9969247632676674, + "grad_norm": 0.9910474368570111, + "learning_rate": 4.670363607168703e-06, + "loss": 0.4619, + "step": 16371 + }, + { + "epoch": 0.9969856590445453, + "grad_norm": 0.931230593200955, + "learning_rate": 4.670324002618661e-06, + "loss": 0.3936, + "step": 16372 + }, + { + "epoch": 0.9970465548214231, + "grad_norm": 0.9537297679479763, + "learning_rate": 4.670284395857537e-06, + "loss": 0.4222, + "step": 16373 + }, + { + "epoch": 0.997107450598301, + "grad_norm": 1.0583320705781079, + "learning_rate": 4.6702447868853724e-06, + "loss": 0.4056, + "step": 16374 + }, + { + "epoch": 0.9971683463751789, + "grad_norm": 1.0011696460622266, + "learning_rate": 4.6702051757022085e-06, + "loss": 0.3664, + "step": 16375 + }, + { + "epoch": 0.9972292421520568, + "grad_norm": 0.9666764281351278, + "learning_rate": 4.6701655623080845e-06, + "loss": 0.4096, + "step": 16376 + }, + { + "epoch": 0.9972901379289346, + "grad_norm": 0.9531675803440088, + "learning_rate": 4.67012594670304e-06, + "loss": 0.4165, + "step": 16377 + }, + { + "epoch": 0.9973510337058125, + "grad_norm": 1.082327989042442, + "learning_rate": 4.670086328887117e-06, + "loss": 0.3907, + "step": 16378 + }, + { + "epoch": 0.9974119294826904, + "grad_norm": 1.0683460335381358, + "learning_rate": 4.670046708860355e-06, + "loss": 0.4018, + "step": 16379 + }, + { + "epoch": 0.9974728252595683, + "grad_norm": 0.970078006364967, + "learning_rate": 4.670007086622795e-06, + "loss": 0.4625, + "step": 16380 + }, + { + "epoch": 0.9975337210364461, + "grad_norm": 1.0102176753823824, + "learning_rate": 4.669967462174477e-06, + "loss": 0.4453, + "step": 16381 + }, + { + "epoch": 0.997594616813324, + "grad_norm": 0.9865567656183907, + "learning_rate": 4.669927835515441e-06, + "loss": 0.4652, + "step": 16382 + }, + { + "epoch": 0.9976555125902019, + "grad_norm": 1.119278475225428, + "learning_rate": 4.669888206645728e-06, + "loss": 0.3714, + "step": 16383 + }, + { + "epoch": 0.9977164083670798, + "grad_norm": 1.0315483628892081, + "learning_rate": 4.669848575565377e-06, + "loss": 0.3971, + "step": 16384 + }, + { + "epoch": 0.9977773041439576, + "grad_norm": 1.0365113854117551, + "learning_rate": 4.6698089422744296e-06, + "loss": 0.3979, + "step": 16385 + }, + { + "epoch": 0.9978381999208354, + "grad_norm": 0.980647724185664, + "learning_rate": 4.669769306772925e-06, + "loss": 0.407, + "step": 16386 + }, + { + "epoch": 0.9978990956977134, + "grad_norm": 0.9935504122642005, + "learning_rate": 4.669729669060907e-06, + "loss": 0.4237, + "step": 16387 + }, + { + "epoch": 0.9979599914745912, + "grad_norm": 0.9930618616457887, + "learning_rate": 4.669690029138412e-06, + "loss": 0.4295, + "step": 16388 + }, + { + "epoch": 0.9980208872514691, + "grad_norm": 1.0576958614047776, + "learning_rate": 4.669650387005482e-06, + "loss": 0.3786, + "step": 16389 + }, + { + "epoch": 0.9980817830283469, + "grad_norm": 0.972812520703079, + "learning_rate": 4.669610742662157e-06, + "loss": 0.4446, + "step": 16390 + }, + { + "epoch": 0.9981426788052249, + "grad_norm": 1.0037943036141155, + "learning_rate": 4.669571096108478e-06, + "loss": 0.431, + "step": 16391 + }, + { + "epoch": 0.9982035745821027, + "grad_norm": 1.0252114480908174, + "learning_rate": 4.669531447344487e-06, + "loss": 0.4084, + "step": 16392 + }, + { + "epoch": 0.9982644703589806, + "grad_norm": 0.9739527266135527, + "learning_rate": 4.66949179637022e-06, + "loss": 0.4842, + "step": 16393 + }, + { + "epoch": 0.9983253661358584, + "grad_norm": 1.0130386950581087, + "learning_rate": 4.66945214318572e-06, + "loss": 0.3732, + "step": 16394 + }, + { + "epoch": 0.9983862619127364, + "grad_norm": 1.0405944187073484, + "learning_rate": 4.669412487791028e-06, + "loss": 0.36, + "step": 16395 + }, + { + "epoch": 0.9984471576896142, + "grad_norm": 0.9583178599587318, + "learning_rate": 4.669372830186183e-06, + "loss": 0.4552, + "step": 16396 + }, + { + "epoch": 0.9985080534664921, + "grad_norm": 1.0363012610134228, + "learning_rate": 4.669333170371227e-06, + "loss": 0.4199, + "step": 16397 + }, + { + "epoch": 0.9985689492433699, + "grad_norm": 0.9873669037740708, + "learning_rate": 4.6692935083461985e-06, + "loss": 0.4166, + "step": 16398 + }, + { + "epoch": 0.9986298450202479, + "grad_norm": 0.9992838399354966, + "learning_rate": 4.66925384411114e-06, + "loss": 0.3265, + "step": 16399 + }, + { + "epoch": 0.9986907407971257, + "grad_norm": 0.9678830883731682, + "learning_rate": 4.669214177666091e-06, + "loss": 0.3793, + "step": 16400 + }, + { + "epoch": 0.9987516365740036, + "grad_norm": 0.9196016628246546, + "learning_rate": 4.669174509011089e-06, + "loss": 0.4241, + "step": 16401 + }, + { + "epoch": 0.9988125323508815, + "grad_norm": 0.9669122223366493, + "learning_rate": 4.6691348381461795e-06, + "loss": 0.42, + "step": 16402 + }, + { + "epoch": 0.9988734281277594, + "grad_norm": 0.9615998768573056, + "learning_rate": 4.6690951650714e-06, + "loss": 0.4194, + "step": 16403 + }, + { + "epoch": 0.9989343239046372, + "grad_norm": 0.9942423070328413, + "learning_rate": 4.669055489786792e-06, + "loss": 0.3746, + "step": 16404 + }, + { + "epoch": 0.9989952196815151, + "grad_norm": 0.9562264871781344, + "learning_rate": 4.669015812292395e-06, + "loss": 0.3459, + "step": 16405 + }, + { + "epoch": 0.999056115458393, + "grad_norm": 1.0082491958643642, + "learning_rate": 4.668976132588249e-06, + "loss": 0.4328, + "step": 16406 + }, + { + "epoch": 0.9991170112352709, + "grad_norm": 0.9783937948739571, + "learning_rate": 4.668936450674396e-06, + "loss": 0.3405, + "step": 16407 + }, + { + "epoch": 0.9991779070121487, + "grad_norm": 0.9957657852627952, + "learning_rate": 4.668896766550875e-06, + "loss": 0.3499, + "step": 16408 + }, + { + "epoch": 0.9992388027890265, + "grad_norm": 1.0474684196312256, + "learning_rate": 4.668857080217728e-06, + "loss": 0.4395, + "step": 16409 + }, + { + "epoch": 0.9992996985659045, + "grad_norm": 1.0007812562738438, + "learning_rate": 4.668817391674994e-06, + "loss": 0.4676, + "step": 16410 + }, + { + "epoch": 0.9993605943427823, + "grad_norm": 1.020394506399858, + "learning_rate": 4.668777700922715e-06, + "loss": 0.3963, + "step": 16411 + }, + { + "epoch": 0.9994214901196602, + "grad_norm": 1.097071825750329, + "learning_rate": 4.668738007960928e-06, + "loss": 0.3816, + "step": 16412 + }, + { + "epoch": 0.999482385896538, + "grad_norm": 1.0260989571324572, + "learning_rate": 4.6686983127896775e-06, + "loss": 0.4521, + "step": 16413 + }, + { + "epoch": 0.999543281673416, + "grad_norm": 0.9659077091295296, + "learning_rate": 4.668658615409002e-06, + "loss": 0.3752, + "step": 16414 + }, + { + "epoch": 0.9996041774502938, + "grad_norm": 1.0294640623499853, + "learning_rate": 4.668618915818942e-06, + "loss": 0.405, + "step": 16415 + }, + { + "epoch": 0.9996650732271717, + "grad_norm": 0.8393346663369416, + "learning_rate": 4.668579214019538e-06, + "loss": 0.4625, + "step": 16416 + }, + { + "epoch": 0.9997259690040495, + "grad_norm": 0.9758446870082272, + "learning_rate": 4.668539510010831e-06, + "loss": 0.3582, + "step": 16417 + }, + { + "epoch": 0.9997868647809275, + "grad_norm": 0.9648370106636013, + "learning_rate": 4.668499803792861e-06, + "loss": 0.3784, + "step": 16418 + }, + { + "epoch": 0.9998477605578053, + "grad_norm": 0.960900385331089, + "learning_rate": 4.6684600953656676e-06, + "loss": 0.3498, + "step": 16419 + }, + { + "epoch": 0.9999086563346832, + "grad_norm": 0.9539832337050741, + "learning_rate": 4.668420384729293e-06, + "loss": 0.4093, + "step": 16420 + }, + { + "epoch": 0.999969552111561, + "grad_norm": 0.927549372575992, + "learning_rate": 4.668380671883776e-06, + "loss": 0.4226, + "step": 16421 + } + ], + "logging_steps": 1, + "max_steps": 98526, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 16421, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 529625749782528.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}