diff --git "a/qwen3-vl-4b-agentnet-50pect-bsz384-step1824/trainer_state.json" "b/qwen3-vl-4b-agentnet-50pect-bsz384-step1824/trainer_state.json" new file mode 100644--- /dev/null +++ "b/qwen3-vl-4b-agentnet-50pect-bsz384-step1824/trainer_state.json" @@ -0,0 +1,12802 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 10000.0, + "global_step": 1824, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005482456140350877, + "grad_norm": 14.492247581481934, + "learning_rate": 2.173913043478261e-07, + "loss": 1.176474928855896, + "step": 1 + }, + { + "epoch": 0.0010964912280701754, + "grad_norm": 14.619416236877441, + "learning_rate": 4.347826086956522e-07, + "loss": 1.1729357242584229, + "step": 2 + }, + { + "epoch": 0.001644736842105263, + "grad_norm": 14.84022045135498, + "learning_rate": 6.521739130434783e-07, + "loss": 1.2002801895141602, + "step": 3 + }, + { + "epoch": 0.0021929824561403508, + "grad_norm": 14.705394744873047, + "learning_rate": 8.695652173913044e-07, + "loss": 1.1794195175170898, + "step": 4 + }, + { + "epoch": 0.0027412280701754384, + "grad_norm": 14.440802574157715, + "learning_rate": 1.0869565217391306e-06, + "loss": 1.1603755950927734, + "step": 5 + }, + { + "epoch": 0.003289473684210526, + "grad_norm": 13.81436824798584, + "learning_rate": 1.3043478260869566e-06, + "loss": 1.129558801651001, + "step": 6 + }, + { + "epoch": 0.003837719298245614, + "grad_norm": 13.689131736755371, + "learning_rate": 1.521739130434783e-06, + "loss": 1.1161764860153198, + "step": 7 + }, + { + "epoch": 0.0043859649122807015, + "grad_norm": 12.181388854980469, + "learning_rate": 1.7391304347826088e-06, + "loss": 1.0747873783111572, + "step": 8 + }, + { + "epoch": 0.004934210526315789, + "grad_norm": 11.249547958374023, + "learning_rate": 1.956521739130435e-06, + "loss": 1.041851282119751, + "step": 9 + }, + { + "epoch": 0.005482456140350877, + "grad_norm": 7.409827709197998, + "learning_rate": 2.173913043478261e-06, + "loss": 0.9274959564208984, + "step": 10 + }, + { + "epoch": 0.006030701754385965, + "grad_norm": 6.691342353820801, + "learning_rate": 2.391304347826087e-06, + "loss": 0.8955242037773132, + "step": 11 + }, + { + "epoch": 0.006578947368421052, + "grad_norm": 5.925269603729248, + "learning_rate": 2.6086956521739132e-06, + "loss": 0.8827022314071655, + "step": 12 + }, + { + "epoch": 0.00712719298245614, + "grad_norm": 5.35697078704834, + "learning_rate": 2.8260869565217393e-06, + "loss": 0.8560182452201843, + "step": 13 + }, + { + "epoch": 0.007675438596491228, + "grad_norm": 2.069866895675659, + "learning_rate": 3.043478260869566e-06, + "loss": 0.7693394422531128, + "step": 14 + }, + { + "epoch": 0.008223684210526315, + "grad_norm": 1.9829829931259155, + "learning_rate": 3.2608695652173914e-06, + "loss": 0.7666728496551514, + "step": 15 + }, + { + "epoch": 0.008771929824561403, + "grad_norm": 2.0215251445770264, + "learning_rate": 3.4782608695652175e-06, + "loss": 0.7644944190979004, + "step": 16 + }, + { + "epoch": 0.00932017543859649, + "grad_norm": 1.8338474035263062, + "learning_rate": 3.6956521739130436e-06, + "loss": 0.764083743095398, + "step": 17 + }, + { + "epoch": 0.009868421052631578, + "grad_norm": 1.620351791381836, + "learning_rate": 3.91304347826087e-06, + "loss": 0.7492412328720093, + "step": 18 + }, + { + "epoch": 0.010416666666666666, + "grad_norm": 2.259765863418579, + "learning_rate": 4.130434782608696e-06, + "loss": 0.7317222952842712, + "step": 19 + }, + { + "epoch": 0.010964912280701754, + "grad_norm": 2.3859360218048096, + "learning_rate": 4.347826086956522e-06, + "loss": 0.743137776851654, + "step": 20 + }, + { + "epoch": 0.011513157894736841, + "grad_norm": 1.8937352895736694, + "learning_rate": 4.565217391304348e-06, + "loss": 0.7197248935699463, + "step": 21 + }, + { + "epoch": 0.01206140350877193, + "grad_norm": 1.3714830875396729, + "learning_rate": 4.782608695652174e-06, + "loss": 0.7112306356430054, + "step": 22 + }, + { + "epoch": 0.012609649122807017, + "grad_norm": 1.0794036388397217, + "learning_rate": 5e-06, + "loss": 0.7069730758666992, + "step": 23 + }, + { + "epoch": 0.013157894736842105, + "grad_norm": 1.132503628730774, + "learning_rate": 5.2173913043478265e-06, + "loss": 0.7090061902999878, + "step": 24 + }, + { + "epoch": 0.013706140350877192, + "grad_norm": 1.0365830659866333, + "learning_rate": 5.4347826086956525e-06, + "loss": 0.6998612284660339, + "step": 25 + }, + { + "epoch": 0.01425438596491228, + "grad_norm": 0.9834604859352112, + "learning_rate": 5.652173913043479e-06, + "loss": 0.6926441192626953, + "step": 26 + }, + { + "epoch": 0.014802631578947368, + "grad_norm": 1.0544486045837402, + "learning_rate": 5.8695652173913055e-06, + "loss": 0.665709376335144, + "step": 27 + }, + { + "epoch": 0.015350877192982455, + "grad_norm": 0.98713219165802, + "learning_rate": 6.086956521739132e-06, + "loss": 0.6699036955833435, + "step": 28 + }, + { + "epoch": 0.015899122807017545, + "grad_norm": 0.9099668860435486, + "learning_rate": 6.304347826086958e-06, + "loss": 0.6747962236404419, + "step": 29 + }, + { + "epoch": 0.01644736842105263, + "grad_norm": 0.9079251885414124, + "learning_rate": 6.521739130434783e-06, + "loss": 0.6670761108398438, + "step": 30 + }, + { + "epoch": 0.01699561403508772, + "grad_norm": 0.9192748069763184, + "learning_rate": 6.739130434782609e-06, + "loss": 0.6650468707084656, + "step": 31 + }, + { + "epoch": 0.017543859649122806, + "grad_norm": 0.8508347868919373, + "learning_rate": 6.956521739130435e-06, + "loss": 0.6534518003463745, + "step": 32 + }, + { + "epoch": 0.018092105263157895, + "grad_norm": 0.7839338183403015, + "learning_rate": 7.173913043478261e-06, + "loss": 0.6688723564147949, + "step": 33 + }, + { + "epoch": 0.01864035087719298, + "grad_norm": 0.8197664022445679, + "learning_rate": 7.391304347826087e-06, + "loss": 0.6579426527023315, + "step": 34 + }, + { + "epoch": 0.01918859649122807, + "grad_norm": 0.7888069748878479, + "learning_rate": 7.608695652173914e-06, + "loss": 0.6640398502349854, + "step": 35 + }, + { + "epoch": 0.019736842105263157, + "grad_norm": 0.7756356596946716, + "learning_rate": 7.82608695652174e-06, + "loss": 0.6506577730178833, + "step": 36 + }, + { + "epoch": 0.020285087719298246, + "grad_norm": 0.7720519304275513, + "learning_rate": 8.043478260869566e-06, + "loss": 0.6552173495292664, + "step": 37 + }, + { + "epoch": 0.020833333333333332, + "grad_norm": 0.742103099822998, + "learning_rate": 8.260869565217392e-06, + "loss": 0.6349525451660156, + "step": 38 + }, + { + "epoch": 0.02138157894736842, + "grad_norm": 0.6862183213233948, + "learning_rate": 8.478260869565218e-06, + "loss": 0.6491415500640869, + "step": 39 + }, + { + "epoch": 0.021929824561403508, + "grad_norm": 0.6290478110313416, + "learning_rate": 8.695652173913044e-06, + "loss": 0.6458108425140381, + "step": 40 + }, + { + "epoch": 0.022478070175438597, + "grad_norm": 0.6633671522140503, + "learning_rate": 8.91304347826087e-06, + "loss": 0.6461598873138428, + "step": 41 + }, + { + "epoch": 0.023026315789473683, + "grad_norm": 0.846064567565918, + "learning_rate": 9.130434782608697e-06, + "loss": 0.6433469653129578, + "step": 42 + }, + { + "epoch": 0.023574561403508772, + "grad_norm": 0.6658814549446106, + "learning_rate": 9.347826086956523e-06, + "loss": 0.6385210752487183, + "step": 43 + }, + { + "epoch": 0.02412280701754386, + "grad_norm": 0.6573966145515442, + "learning_rate": 9.565217391304349e-06, + "loss": 0.6399390697479248, + "step": 44 + }, + { + "epoch": 0.024671052631578948, + "grad_norm": 0.6428568959236145, + "learning_rate": 9.782608695652175e-06, + "loss": 0.6466711163520813, + "step": 45 + }, + { + "epoch": 0.025219298245614034, + "grad_norm": 0.6197839975357056, + "learning_rate": 1e-05, + "loss": 0.6346309781074524, + "step": 46 + }, + { + "epoch": 0.025767543859649123, + "grad_norm": 0.6529486179351807, + "learning_rate": 1.0217391304347829e-05, + "loss": 0.6368366479873657, + "step": 47 + }, + { + "epoch": 0.02631578947368421, + "grad_norm": 0.6419752836227417, + "learning_rate": 1.0434782608695653e-05, + "loss": 0.6297695636749268, + "step": 48 + }, + { + "epoch": 0.0268640350877193, + "grad_norm": 0.6125337481498718, + "learning_rate": 1.0652173913043479e-05, + "loss": 0.6156260967254639, + "step": 49 + }, + { + "epoch": 0.027412280701754384, + "grad_norm": 0.6913661360740662, + "learning_rate": 1.0869565217391305e-05, + "loss": 0.6182829737663269, + "step": 50 + }, + { + "epoch": 0.027960526315789474, + "grad_norm": 0.6140983700752258, + "learning_rate": 1.1086956521739131e-05, + "loss": 0.6342036724090576, + "step": 51 + }, + { + "epoch": 0.02850877192982456, + "grad_norm": 0.6957651376724243, + "learning_rate": 1.1304347826086957e-05, + "loss": 0.6274335384368896, + "step": 52 + }, + { + "epoch": 0.02905701754385965, + "grad_norm": 0.6550323367118835, + "learning_rate": 1.1521739130434783e-05, + "loss": 0.6252555847167969, + "step": 53 + }, + { + "epoch": 0.029605263157894735, + "grad_norm": 0.6278795003890991, + "learning_rate": 1.1739130434782611e-05, + "loss": 0.6444627642631531, + "step": 54 + }, + { + "epoch": 0.030153508771929825, + "grad_norm": 0.6082501411437988, + "learning_rate": 1.1956521739130435e-05, + "loss": 0.6297181844711304, + "step": 55 + }, + { + "epoch": 0.03070175438596491, + "grad_norm": 0.6487641930580139, + "learning_rate": 1.2173913043478263e-05, + "loss": 0.6289809346199036, + "step": 56 + }, + { + "epoch": 0.03125, + "grad_norm": 0.6447638869285583, + "learning_rate": 1.2391304347826088e-05, + "loss": 0.6308567523956299, + "step": 57 + }, + { + "epoch": 0.03179824561403509, + "grad_norm": 0.7155942916870117, + "learning_rate": 1.2608695652173915e-05, + "loss": 0.6242796182632446, + "step": 58 + }, + { + "epoch": 0.03234649122807018, + "grad_norm": 0.7464308738708496, + "learning_rate": 1.282608695652174e-05, + "loss": 0.631054699420929, + "step": 59 + }, + { + "epoch": 0.03289473684210526, + "grad_norm": 0.7163922786712646, + "learning_rate": 1.3043478260869566e-05, + "loss": 0.6311353445053101, + "step": 60 + }, + { + "epoch": 0.03344298245614035, + "grad_norm": 0.7227900624275208, + "learning_rate": 1.3260869565217392e-05, + "loss": 0.6270331144332886, + "step": 61 + }, + { + "epoch": 0.03399122807017544, + "grad_norm": 0.6737267971038818, + "learning_rate": 1.3478260869565218e-05, + "loss": 0.6225444674491882, + "step": 62 + }, + { + "epoch": 0.03453947368421053, + "grad_norm": 0.8492542505264282, + "learning_rate": 1.3695652173913046e-05, + "loss": 0.611880898475647, + "step": 63 + }, + { + "epoch": 0.03508771929824561, + "grad_norm": 0.6433826088905334, + "learning_rate": 1.391304347826087e-05, + "loss": 0.6079282760620117, + "step": 64 + }, + { + "epoch": 0.0356359649122807, + "grad_norm": 0.7155340909957886, + "learning_rate": 1.4130434782608698e-05, + "loss": 0.6314013004302979, + "step": 65 + }, + { + "epoch": 0.03618421052631579, + "grad_norm": 0.6798359155654907, + "learning_rate": 1.4347826086956522e-05, + "loss": 0.6237474679946899, + "step": 66 + }, + { + "epoch": 0.03673245614035088, + "grad_norm": 0.6824196577072144, + "learning_rate": 1.456521739130435e-05, + "loss": 0.6280307173728943, + "step": 67 + }, + { + "epoch": 0.03728070175438596, + "grad_norm": 0.6930661201477051, + "learning_rate": 1.4782608695652174e-05, + "loss": 0.6118326783180237, + "step": 68 + }, + { + "epoch": 0.03782894736842105, + "grad_norm": 0.6970058083534241, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.6004877090454102, + "step": 69 + }, + { + "epoch": 0.03837719298245614, + "grad_norm": 0.6858695149421692, + "learning_rate": 1.5217391304347828e-05, + "loss": 0.6087049245834351, + "step": 70 + }, + { + "epoch": 0.03892543859649123, + "grad_norm": 0.6748414039611816, + "learning_rate": 1.5434782608695654e-05, + "loss": 0.6046065092086792, + "step": 71 + }, + { + "epoch": 0.039473684210526314, + "grad_norm": 0.6753906011581421, + "learning_rate": 1.565217391304348e-05, + "loss": 0.6173025965690613, + "step": 72 + }, + { + "epoch": 0.0400219298245614, + "grad_norm": 0.6844945549964905, + "learning_rate": 1.5869565217391306e-05, + "loss": 0.6172990202903748, + "step": 73 + }, + { + "epoch": 0.04057017543859649, + "grad_norm": 0.7520304918289185, + "learning_rate": 1.6086956521739132e-05, + "loss": 0.6136633157730103, + "step": 74 + }, + { + "epoch": 0.04111842105263158, + "grad_norm": 0.6270356774330139, + "learning_rate": 1.630434782608696e-05, + "loss": 0.6032594442367554, + "step": 75 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 0.7684335112571716, + "learning_rate": 1.6521739130434785e-05, + "loss": 0.6158262491226196, + "step": 76 + }, + { + "epoch": 0.042214912280701754, + "grad_norm": 0.5890095829963684, + "learning_rate": 1.673913043478261e-05, + "loss": 0.602159857749939, + "step": 77 + }, + { + "epoch": 0.04276315789473684, + "grad_norm": 0.6600304245948792, + "learning_rate": 1.6956521739130437e-05, + "loss": 0.613303542137146, + "step": 78 + }, + { + "epoch": 0.04331140350877193, + "grad_norm": 0.6526859402656555, + "learning_rate": 1.7173913043478263e-05, + "loss": 0.5942760705947876, + "step": 79 + }, + { + "epoch": 0.043859649122807015, + "grad_norm": 0.6946115493774414, + "learning_rate": 1.739130434782609e-05, + "loss": 0.611393928527832, + "step": 80 + }, + { + "epoch": 0.044407894736842105, + "grad_norm": 0.753236711025238, + "learning_rate": 1.7608695652173915e-05, + "loss": 0.6026621460914612, + "step": 81 + }, + { + "epoch": 0.044956140350877194, + "grad_norm": 0.6258090734481812, + "learning_rate": 1.782608695652174e-05, + "loss": 0.6033834218978882, + "step": 82 + }, + { + "epoch": 0.04550438596491228, + "grad_norm": 0.7388920783996582, + "learning_rate": 1.8043478260869567e-05, + "loss": 0.6103101968765259, + "step": 83 + }, + { + "epoch": 0.046052631578947366, + "grad_norm": 0.6599609851837158, + "learning_rate": 1.8260869565217393e-05, + "loss": 0.600335955619812, + "step": 84 + }, + { + "epoch": 0.046600877192982455, + "grad_norm": 0.7393732070922852, + "learning_rate": 1.847826086956522e-05, + "loss": 0.6183399558067322, + "step": 85 + }, + { + "epoch": 0.047149122807017545, + "grad_norm": 0.744738757610321, + "learning_rate": 1.8695652173913045e-05, + "loss": 0.6110402345657349, + "step": 86 + }, + { + "epoch": 0.047697368421052634, + "grad_norm": 0.7020261883735657, + "learning_rate": 1.891304347826087e-05, + "loss": 0.6024118661880493, + "step": 87 + }, + { + "epoch": 0.04824561403508772, + "grad_norm": 0.725536584854126, + "learning_rate": 1.9130434782608697e-05, + "loss": 0.5905160903930664, + "step": 88 + }, + { + "epoch": 0.048793859649122806, + "grad_norm": 0.7676002383232117, + "learning_rate": 1.9347826086956523e-05, + "loss": 0.5904608368873596, + "step": 89 + }, + { + "epoch": 0.049342105263157895, + "grad_norm": 0.7609053254127502, + "learning_rate": 1.956521739130435e-05, + "loss": 0.6026548147201538, + "step": 90 + }, + { + "epoch": 0.049890350877192985, + "grad_norm": 0.7540615200996399, + "learning_rate": 1.9782608695652176e-05, + "loss": 0.5942332744598389, + "step": 91 + }, + { + "epoch": 0.05043859649122807, + "grad_norm": 0.7101155519485474, + "learning_rate": 2e-05, + "loss": 0.5820369720458984, + "step": 92 + }, + { + "epoch": 0.05098684210526316, + "grad_norm": 0.7118958830833435, + "learning_rate": 1.9999983549698757e-05, + "loss": 0.5976592898368835, + "step": 93 + }, + { + "epoch": 0.051535087719298246, + "grad_norm": 0.7405442595481873, + "learning_rate": 1.9999934198849154e-05, + "loss": 0.5931892395019531, + "step": 94 + }, + { + "epoch": 0.052083333333333336, + "grad_norm": 0.6989566683769226, + "learning_rate": 1.999985194761355e-05, + "loss": 0.5946756601333618, + "step": 95 + }, + { + "epoch": 0.05263157894736842, + "grad_norm": 0.7052030563354492, + "learning_rate": 1.9999736796262564e-05, + "loss": 0.5996489524841309, + "step": 96 + }, + { + "epoch": 0.05317982456140351, + "grad_norm": 0.7175880074501038, + "learning_rate": 1.9999588745175048e-05, + "loss": 0.6101623773574829, + "step": 97 + }, + { + "epoch": 0.0537280701754386, + "grad_norm": 0.727748692035675, + "learning_rate": 1.9999407794838095e-05, + "loss": 0.6104993224143982, + "step": 98 + }, + { + "epoch": 0.054276315789473686, + "grad_norm": 0.6567267775535583, + "learning_rate": 1.9999193945847056e-05, + "loss": 0.5962132215499878, + "step": 99 + }, + { + "epoch": 0.05482456140350877, + "grad_norm": 0.7101923227310181, + "learning_rate": 1.9998947198905492e-05, + "loss": 0.5943471193313599, + "step": 100 + }, + { + "epoch": 0.05537280701754386, + "grad_norm": 0.6486233472824097, + "learning_rate": 1.999866755482522e-05, + "loss": 0.5930988788604736, + "step": 101 + }, + { + "epoch": 0.05592105263157895, + "grad_norm": 0.6858486533164978, + "learning_rate": 1.999835501452629e-05, + "loss": 0.5910634994506836, + "step": 102 + }, + { + "epoch": 0.05646929824561404, + "grad_norm": 0.7296199798583984, + "learning_rate": 1.9998009579036976e-05, + "loss": 0.6147328019142151, + "step": 103 + }, + { + "epoch": 0.05701754385964912, + "grad_norm": 0.6833330988883972, + "learning_rate": 1.999763124949378e-05, + "loss": 0.5927976965904236, + "step": 104 + }, + { + "epoch": 0.05756578947368421, + "grad_norm": 0.7425616383552551, + "learning_rate": 1.999722002714143e-05, + "loss": 0.5903884172439575, + "step": 105 + }, + { + "epoch": 0.0581140350877193, + "grad_norm": 0.6378883719444275, + "learning_rate": 1.9996775913332875e-05, + "loss": 0.5842282772064209, + "step": 106 + }, + { + "epoch": 0.05866228070175439, + "grad_norm": 0.7103923559188843, + "learning_rate": 1.999629890952927e-05, + "loss": 0.6085933446884155, + "step": 107 + }, + { + "epoch": 0.05921052631578947, + "grad_norm": 0.660241425037384, + "learning_rate": 1.999578901729999e-05, + "loss": 0.6022235155105591, + "step": 108 + }, + { + "epoch": 0.05975877192982456, + "grad_norm": 0.7913385629653931, + "learning_rate": 1.9995246238322613e-05, + "loss": 0.5908679962158203, + "step": 109 + }, + { + "epoch": 0.06030701754385965, + "grad_norm": 0.6590815186500549, + "learning_rate": 1.9994670574382913e-05, + "loss": 0.6054195761680603, + "step": 110 + }, + { + "epoch": 0.06085526315789474, + "grad_norm": 0.6430388689041138, + "learning_rate": 1.9994062027374856e-05, + "loss": 0.5946575999259949, + "step": 111 + }, + { + "epoch": 0.06140350877192982, + "grad_norm": 0.6510061621665955, + "learning_rate": 1.9993420599300603e-05, + "loss": 0.5910765528678894, + "step": 112 + }, + { + "epoch": 0.06195175438596491, + "grad_norm": 0.6270714998245239, + "learning_rate": 1.999274629227049e-05, + "loss": 0.5879694223403931, + "step": 113 + }, + { + "epoch": 0.0625, + "grad_norm": 0.6500689387321472, + "learning_rate": 1.9992039108503024e-05, + "loss": 0.5984656810760498, + "step": 114 + }, + { + "epoch": 0.06304824561403509, + "grad_norm": 0.6544696092605591, + "learning_rate": 1.9991299050324887e-05, + "loss": 0.5880492925643921, + "step": 115 + }, + { + "epoch": 0.06359649122807018, + "grad_norm": 0.6615366339683533, + "learning_rate": 1.999052612017091e-05, + "loss": 0.589603066444397, + "step": 116 + }, + { + "epoch": 0.06414473684210527, + "grad_norm": 0.6696038842201233, + "learning_rate": 1.9989720320584085e-05, + "loss": 0.5883747339248657, + "step": 117 + }, + { + "epoch": 0.06469298245614036, + "grad_norm": 0.7159208059310913, + "learning_rate": 1.998888165421554e-05, + "loss": 0.5921615958213806, + "step": 118 + }, + { + "epoch": 0.06524122807017543, + "grad_norm": 0.6331257820129395, + "learning_rate": 1.9988010123824534e-05, + "loss": 0.5725486278533936, + "step": 119 + }, + { + "epoch": 0.06578947368421052, + "grad_norm": 0.7334661483764648, + "learning_rate": 1.998710573227846e-05, + "loss": 0.5945963859558105, + "step": 120 + }, + { + "epoch": 0.06633771929824561, + "grad_norm": 0.6409698128700256, + "learning_rate": 1.9986168482552814e-05, + "loss": 0.5865694880485535, + "step": 121 + }, + { + "epoch": 0.0668859649122807, + "grad_norm": 0.7104285955429077, + "learning_rate": 1.998519837773121e-05, + "loss": 0.5784432888031006, + "step": 122 + }, + { + "epoch": 0.06743421052631579, + "grad_norm": 0.6517587900161743, + "learning_rate": 1.998419542100535e-05, + "loss": 0.5863971710205078, + "step": 123 + }, + { + "epoch": 0.06798245614035088, + "grad_norm": 0.6840674877166748, + "learning_rate": 1.9983159615675022e-05, + "loss": 0.5794469118118286, + "step": 124 + }, + { + "epoch": 0.06853070175438597, + "grad_norm": 0.6648241877555847, + "learning_rate": 1.9982090965148086e-05, + "loss": 0.5815461874008179, + "step": 125 + }, + { + "epoch": 0.06907894736842106, + "grad_norm": 0.6593918204307556, + "learning_rate": 1.9980989472940467e-05, + "loss": 0.5929619073867798, + "step": 126 + }, + { + "epoch": 0.06962719298245613, + "grad_norm": 0.6910731792449951, + "learning_rate": 1.9979855142676142e-05, + "loss": 0.5822854042053223, + "step": 127 + }, + { + "epoch": 0.07017543859649122, + "grad_norm": 0.6526376605033875, + "learning_rate": 1.9978687978087125e-05, + "loss": 0.5812543630599976, + "step": 128 + }, + { + "epoch": 0.07072368421052631, + "grad_norm": 0.7658113241195679, + "learning_rate": 1.9977487983013463e-05, + "loss": 0.5871016383171082, + "step": 129 + }, + { + "epoch": 0.0712719298245614, + "grad_norm": 0.6897526979446411, + "learning_rate": 1.9976255161403203e-05, + "loss": 0.575676679611206, + "step": 130 + }, + { + "epoch": 0.07182017543859649, + "grad_norm": 0.7414442896842957, + "learning_rate": 1.9974989517312407e-05, + "loss": 0.5839799642562866, + "step": 131 + }, + { + "epoch": 0.07236842105263158, + "grad_norm": 0.7151236534118652, + "learning_rate": 1.997369105490512e-05, + "loss": 0.5924729704856873, + "step": 132 + }, + { + "epoch": 0.07291666666666667, + "grad_norm": 0.7733912467956543, + "learning_rate": 1.997235977845336e-05, + "loss": 0.5748937129974365, + "step": 133 + }, + { + "epoch": 0.07346491228070176, + "grad_norm": 0.7178257703781128, + "learning_rate": 1.9970995692337113e-05, + "loss": 0.5769957900047302, + "step": 134 + }, + { + "epoch": 0.07401315789473684, + "grad_norm": 0.7747276425361633, + "learning_rate": 1.99695988010443e-05, + "loss": 0.5812219381332397, + "step": 135 + }, + { + "epoch": 0.07456140350877193, + "grad_norm": 0.7646524310112, + "learning_rate": 1.9968169109170775e-05, + "loss": 0.5893447399139404, + "step": 136 + }, + { + "epoch": 0.07510964912280702, + "grad_norm": 0.7229993939399719, + "learning_rate": 1.9966706621420314e-05, + "loss": 0.574189305305481, + "step": 137 + }, + { + "epoch": 0.0756578947368421, + "grad_norm": 0.7084233164787292, + "learning_rate": 1.9965211342604586e-05, + "loss": 0.5811874866485596, + "step": 138 + }, + { + "epoch": 0.0762061403508772, + "grad_norm": 0.6773183941841125, + "learning_rate": 1.9963683277643153e-05, + "loss": 0.5971899628639221, + "step": 139 + }, + { + "epoch": 0.07675438596491228, + "grad_norm": 0.7109978199005127, + "learning_rate": 1.996212243156344e-05, + "loss": 0.5952876806259155, + "step": 140 + }, + { + "epoch": 0.07730263157894737, + "grad_norm": 0.6656246781349182, + "learning_rate": 1.9960528809500724e-05, + "loss": 0.5804654955863953, + "step": 141 + }, + { + "epoch": 0.07785087719298246, + "grad_norm": 0.7269256114959717, + "learning_rate": 1.995890241669811e-05, + "loss": 0.5656623244285583, + "step": 142 + }, + { + "epoch": 0.07839912280701754, + "grad_norm": 0.6435320377349854, + "learning_rate": 1.995724325850654e-05, + "loss": 0.569736897945404, + "step": 143 + }, + { + "epoch": 0.07894736842105263, + "grad_norm": 0.6682058572769165, + "learning_rate": 1.9955551340384745e-05, + "loss": 0.5740841627120972, + "step": 144 + }, + { + "epoch": 0.07949561403508772, + "grad_norm": 0.7501179575920105, + "learning_rate": 1.9953826667899228e-05, + "loss": 0.5707544088363647, + "step": 145 + }, + { + "epoch": 0.0800438596491228, + "grad_norm": 0.6460590362548828, + "learning_rate": 1.995206924672427e-05, + "loss": 0.5712607502937317, + "step": 146 + }, + { + "epoch": 0.0805921052631579, + "grad_norm": 0.656384289264679, + "learning_rate": 1.995027908264189e-05, + "loss": 0.5888504981994629, + "step": 147 + }, + { + "epoch": 0.08114035087719298, + "grad_norm": 0.7195465564727783, + "learning_rate": 1.994845618154184e-05, + "loss": 0.5889352560043335, + "step": 148 + }, + { + "epoch": 0.08168859649122807, + "grad_norm": 0.6684480905532837, + "learning_rate": 1.994660054942157e-05, + "loss": 0.590423047542572, + "step": 149 + }, + { + "epoch": 0.08223684210526316, + "grad_norm": 0.6501762270927429, + "learning_rate": 1.994471219238623e-05, + "loss": 0.5742418766021729, + "step": 150 + }, + { + "epoch": 0.08278508771929824, + "grad_norm": 0.6628527045249939, + "learning_rate": 1.9942791116648617e-05, + "loss": 0.5750651359558105, + "step": 151 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6320564150810242, + "learning_rate": 1.994083732852919e-05, + "loss": 0.5674133896827698, + "step": 152 + }, + { + "epoch": 0.08388157894736842, + "grad_norm": 0.7114735841751099, + "learning_rate": 1.9938850834456036e-05, + "loss": 0.5808606743812561, + "step": 153 + }, + { + "epoch": 0.08442982456140351, + "grad_norm": 0.6949970126152039, + "learning_rate": 1.993683164096483e-05, + "loss": 0.5758386254310608, + "step": 154 + }, + { + "epoch": 0.0849780701754386, + "grad_norm": 0.5890199542045593, + "learning_rate": 1.9934779754698848e-05, + "loss": 0.5657817721366882, + "step": 155 + }, + { + "epoch": 0.08552631578947369, + "grad_norm": 0.632293164730072, + "learning_rate": 1.9932695182408917e-05, + "loss": 0.5779306292533875, + "step": 156 + }, + { + "epoch": 0.08607456140350878, + "grad_norm": 0.6128914952278137, + "learning_rate": 1.9930577930953406e-05, + "loss": 0.570749044418335, + "step": 157 + }, + { + "epoch": 0.08662280701754387, + "grad_norm": 0.6463309526443481, + "learning_rate": 1.99284280072982e-05, + "loss": 0.5824733972549438, + "step": 158 + }, + { + "epoch": 0.08717105263157894, + "grad_norm": 0.5726247429847717, + "learning_rate": 1.992624541851667e-05, + "loss": 0.5759953856468201, + "step": 159 + }, + { + "epoch": 0.08771929824561403, + "grad_norm": 0.6129015684127808, + "learning_rate": 1.9924030171789676e-05, + "loss": 0.5806936621665955, + "step": 160 + }, + { + "epoch": 0.08826754385964912, + "grad_norm": 0.5705315470695496, + "learning_rate": 1.992178227440551e-05, + "loss": 0.5579361915588379, + "step": 161 + }, + { + "epoch": 0.08881578947368421, + "grad_norm": 0.6253746151924133, + "learning_rate": 1.9919501733759882e-05, + "loss": 0.5733797550201416, + "step": 162 + }, + { + "epoch": 0.0893640350877193, + "grad_norm": 0.5756648182868958, + "learning_rate": 1.991718855735592e-05, + "loss": 0.5644710063934326, + "step": 163 + }, + { + "epoch": 0.08991228070175439, + "grad_norm": 0.6245459318161011, + "learning_rate": 1.9914842752804106e-05, + "loss": 0.5706383585929871, + "step": 164 + }, + { + "epoch": 0.09046052631578948, + "grad_norm": 0.5780031681060791, + "learning_rate": 1.9912464327822277e-05, + "loss": 0.5664913058280945, + "step": 165 + }, + { + "epoch": 0.09100877192982457, + "grad_norm": 0.5702175498008728, + "learning_rate": 1.99100532902356e-05, + "loss": 0.5698821544647217, + "step": 166 + }, + { + "epoch": 0.09155701754385964, + "grad_norm": 0.6294623613357544, + "learning_rate": 1.990760964797653e-05, + "loss": 0.5785145163536072, + "step": 167 + }, + { + "epoch": 0.09210526315789473, + "grad_norm": 0.5997359752655029, + "learning_rate": 1.99051334090848e-05, + "loss": 0.5714824199676514, + "step": 168 + }, + { + "epoch": 0.09265350877192982, + "grad_norm": 0.726883053779602, + "learning_rate": 1.9902624581707385e-05, + "loss": 0.5575656890869141, + "step": 169 + }, + { + "epoch": 0.09320175438596491, + "grad_norm": 0.5980774164199829, + "learning_rate": 1.9900083174098474e-05, + "loss": 0.5692469477653503, + "step": 170 + }, + { + "epoch": 0.09375, + "grad_norm": 0.6684207916259766, + "learning_rate": 1.989750919461946e-05, + "loss": 0.5694268941879272, + "step": 171 + }, + { + "epoch": 0.09429824561403509, + "grad_norm": 0.6513474583625793, + "learning_rate": 1.989490265173888e-05, + "loss": 0.5756077170372009, + "step": 172 + }, + { + "epoch": 0.09484649122807018, + "grad_norm": 0.61539626121521, + "learning_rate": 1.989226355403242e-05, + "loss": 0.571203887462616, + "step": 173 + }, + { + "epoch": 0.09539473684210527, + "grad_norm": 0.6571477055549622, + "learning_rate": 1.9889591910182878e-05, + "loss": 0.5581786632537842, + "step": 174 + }, + { + "epoch": 0.09594298245614036, + "grad_norm": 0.644450306892395, + "learning_rate": 1.9886887728980114e-05, + "loss": 0.5708379149436951, + "step": 175 + }, + { + "epoch": 0.09649122807017543, + "grad_norm": 0.6149620413780212, + "learning_rate": 1.9884151019321053e-05, + "loss": 0.5719337463378906, + "step": 176 + }, + { + "epoch": 0.09703947368421052, + "grad_norm": 0.6498218178749084, + "learning_rate": 1.9881381790209634e-05, + "loss": 0.5746930241584778, + "step": 177 + }, + { + "epoch": 0.09758771929824561, + "grad_norm": 0.6144945621490479, + "learning_rate": 1.987858005075678e-05, + "loss": 0.5792109966278076, + "step": 178 + }, + { + "epoch": 0.0981359649122807, + "grad_norm": 0.5841969847679138, + "learning_rate": 1.9875745810180394e-05, + "loss": 0.5848267078399658, + "step": 179 + }, + { + "epoch": 0.09868421052631579, + "grad_norm": 0.6415631175041199, + "learning_rate": 1.987287907780529e-05, + "loss": 0.5754548907279968, + "step": 180 + }, + { + "epoch": 0.09923245614035088, + "grad_norm": 0.5965489149093628, + "learning_rate": 1.9869979863063195e-05, + "loss": 0.5726244449615479, + "step": 181 + }, + { + "epoch": 0.09978070175438597, + "grad_norm": 0.6180820465087891, + "learning_rate": 1.9867048175492697e-05, + "loss": 0.5588217377662659, + "step": 182 + }, + { + "epoch": 0.10032894736842106, + "grad_norm": 0.6616155505180359, + "learning_rate": 1.9864084024739228e-05, + "loss": 0.5637444853782654, + "step": 183 + }, + { + "epoch": 0.10087719298245613, + "grad_norm": 0.582970142364502, + "learning_rate": 1.986108742055502e-05, + "loss": 0.5720033645629883, + "step": 184 + }, + { + "epoch": 0.10142543859649122, + "grad_norm": 0.604560136795044, + "learning_rate": 1.985805837279908e-05, + "loss": 0.5666155815124512, + "step": 185 + }, + { + "epoch": 0.10197368421052631, + "grad_norm": 0.6076090931892395, + "learning_rate": 1.985499689143716e-05, + "loss": 0.5611607432365417, + "step": 186 + }, + { + "epoch": 0.1025219298245614, + "grad_norm": 0.6265007853507996, + "learning_rate": 1.9851902986541714e-05, + "loss": 0.5762375593185425, + "step": 187 + }, + { + "epoch": 0.10307017543859649, + "grad_norm": 0.6510225534439087, + "learning_rate": 1.9848776668291885e-05, + "loss": 0.5727993845939636, + "step": 188 + }, + { + "epoch": 0.10361842105263158, + "grad_norm": 0.6026071906089783, + "learning_rate": 1.9845617946973445e-05, + "loss": 0.5830463171005249, + "step": 189 + }, + { + "epoch": 0.10416666666666667, + "grad_norm": 0.6423436999320984, + "learning_rate": 1.9842426832978766e-05, + "loss": 0.5695741176605225, + "step": 190 + }, + { + "epoch": 0.10471491228070176, + "grad_norm": 0.6096088290214539, + "learning_rate": 1.9839203336806822e-05, + "loss": 0.5536958575248718, + "step": 191 + }, + { + "epoch": 0.10526315789473684, + "grad_norm": 0.6307644248008728, + "learning_rate": 1.9835947469063098e-05, + "loss": 0.565772294998169, + "step": 192 + }, + { + "epoch": 0.10581140350877193, + "grad_norm": 0.6427217125892639, + "learning_rate": 1.98326592404596e-05, + "loss": 0.5632326006889343, + "step": 193 + }, + { + "epoch": 0.10635964912280702, + "grad_norm": 0.6283311247825623, + "learning_rate": 1.9829338661814798e-05, + "loss": 0.5652562975883484, + "step": 194 + }, + { + "epoch": 0.1069078947368421, + "grad_norm": 0.6760656833648682, + "learning_rate": 1.9825985744053594e-05, + "loss": 0.5492938160896301, + "step": 195 + }, + { + "epoch": 0.1074561403508772, + "grad_norm": 0.564399778842926, + "learning_rate": 1.9822600498207292e-05, + "loss": 0.5632847547531128, + "step": 196 + }, + { + "epoch": 0.10800438596491228, + "grad_norm": 0.6520889401435852, + "learning_rate": 1.981918293541355e-05, + "loss": 0.5750704407691956, + "step": 197 + }, + { + "epoch": 0.10855263157894737, + "grad_norm": 0.6204854249954224, + "learning_rate": 1.981573306691636e-05, + "loss": 0.5595630407333374, + "step": 198 + }, + { + "epoch": 0.10910087719298246, + "grad_norm": 0.6573178768157959, + "learning_rate": 1.9812250904065996e-05, + "loss": 0.5650523900985718, + "step": 199 + }, + { + "epoch": 0.10964912280701754, + "grad_norm": 0.6327071189880371, + "learning_rate": 1.9808736458318988e-05, + "loss": 0.5691615343093872, + "step": 200 + }, + { + "epoch": 0.11019736842105263, + "grad_norm": 0.5889390707015991, + "learning_rate": 1.9805189741238063e-05, + "loss": 0.5566825866699219, + "step": 201 + }, + { + "epoch": 0.11074561403508772, + "grad_norm": 0.6135177612304688, + "learning_rate": 1.9801610764492142e-05, + "loss": 0.5492175221443176, + "step": 202 + }, + { + "epoch": 0.1112938596491228, + "grad_norm": 0.6119044423103333, + "learning_rate": 1.9797999539856273e-05, + "loss": 0.55916827917099, + "step": 203 + }, + { + "epoch": 0.1118421052631579, + "grad_norm": 0.6478143930435181, + "learning_rate": 1.9794356079211605e-05, + "loss": 0.5680371522903442, + "step": 204 + }, + { + "epoch": 0.11239035087719298, + "grad_norm": 0.604614794254303, + "learning_rate": 1.979068039454534e-05, + "loss": 0.5741668939590454, + "step": 205 + }, + { + "epoch": 0.11293859649122807, + "grad_norm": 0.6160794496536255, + "learning_rate": 1.97869724979507e-05, + "loss": 0.5514141321182251, + "step": 206 + }, + { + "epoch": 0.11348684210526316, + "grad_norm": 0.6878485679626465, + "learning_rate": 1.9783232401626894e-05, + "loss": 0.5740391612052917, + "step": 207 + }, + { + "epoch": 0.11403508771929824, + "grad_norm": 0.6564989686012268, + "learning_rate": 1.9779460117879058e-05, + "loss": 0.5653142929077148, + "step": 208 + }, + { + "epoch": 0.11458333333333333, + "grad_norm": 0.6507999300956726, + "learning_rate": 1.9775655659118235e-05, + "loss": 0.5714613199234009, + "step": 209 + }, + { + "epoch": 0.11513157894736842, + "grad_norm": 0.5813721418380737, + "learning_rate": 1.9771819037861327e-05, + "loss": 0.5710395574569702, + "step": 210 + }, + { + "epoch": 0.11567982456140351, + "grad_norm": 0.6418222784996033, + "learning_rate": 1.9767950266731045e-05, + "loss": 0.5667479038238525, + "step": 211 + }, + { + "epoch": 0.1162280701754386, + "grad_norm": 0.6286323666572571, + "learning_rate": 1.976404935845588e-05, + "loss": 0.5599182844161987, + "step": 212 + }, + { + "epoch": 0.11677631578947369, + "grad_norm": 0.6144490242004395, + "learning_rate": 1.9760116325870053e-05, + "loss": 0.5594089031219482, + "step": 213 + }, + { + "epoch": 0.11732456140350878, + "grad_norm": 0.5968447327613831, + "learning_rate": 1.9756151181913483e-05, + "loss": 0.5569183826446533, + "step": 214 + }, + { + "epoch": 0.11787280701754387, + "grad_norm": 0.5543148517608643, + "learning_rate": 1.9752153939631733e-05, + "loss": 0.5508428812026978, + "step": 215 + }, + { + "epoch": 0.11842105263157894, + "grad_norm": 0.5568669438362122, + "learning_rate": 1.9748124612175967e-05, + "loss": 0.5641998648643494, + "step": 216 + }, + { + "epoch": 0.11896929824561403, + "grad_norm": 0.5984328985214233, + "learning_rate": 1.974406321280291e-05, + "loss": 0.5496900081634521, + "step": 217 + }, + { + "epoch": 0.11951754385964912, + "grad_norm": 0.7141798138618469, + "learning_rate": 1.9739969754874827e-05, + "loss": 0.54876708984375, + "step": 218 + }, + { + "epoch": 0.12006578947368421, + "grad_norm": 0.5774359703063965, + "learning_rate": 1.9735844251859425e-05, + "loss": 0.5808881521224976, + "step": 219 + }, + { + "epoch": 0.1206140350877193, + "grad_norm": 0.575563371181488, + "learning_rate": 1.9731686717329866e-05, + "loss": 0.5554652810096741, + "step": 220 + }, + { + "epoch": 0.12116228070175439, + "grad_norm": 0.5829038619995117, + "learning_rate": 1.9727497164964683e-05, + "loss": 0.5668386220932007, + "step": 221 + }, + { + "epoch": 0.12171052631578948, + "grad_norm": 0.6911284923553467, + "learning_rate": 1.9723275608547764e-05, + "loss": 0.5543766617774963, + "step": 222 + }, + { + "epoch": 0.12225877192982457, + "grad_norm": 0.5740495324134827, + "learning_rate": 1.971902206196828e-05, + "loss": 0.5608670115470886, + "step": 223 + }, + { + "epoch": 0.12280701754385964, + "grad_norm": 0.5948644876480103, + "learning_rate": 1.971473653922065e-05, + "loss": 0.5639830231666565, + "step": 224 + }, + { + "epoch": 0.12335526315789473, + "grad_norm": 0.5562756061553955, + "learning_rate": 1.9710419054404508e-05, + "loss": 0.5558134913444519, + "step": 225 + }, + { + "epoch": 0.12390350877192982, + "grad_norm": 0.589013934135437, + "learning_rate": 1.9706069621724643e-05, + "loss": 0.553057849407196, + "step": 226 + }, + { + "epoch": 0.12445175438596491, + "grad_norm": 0.5930812358856201, + "learning_rate": 1.970168825549094e-05, + "loss": 0.5508049726486206, + "step": 227 + }, + { + "epoch": 0.125, + "grad_norm": 0.5706969499588013, + "learning_rate": 1.9697274970118365e-05, + "loss": 0.5701100826263428, + "step": 228 + }, + { + "epoch": 0.12554824561403508, + "grad_norm": 0.5633387565612793, + "learning_rate": 1.969282978012689e-05, + "loss": 0.5619994401931763, + "step": 229 + }, + { + "epoch": 0.12609649122807018, + "grad_norm": 0.6061105728149414, + "learning_rate": 1.968835270014146e-05, + "loss": 0.5603735446929932, + "step": 230 + }, + { + "epoch": 0.12664473684210525, + "grad_norm": 0.6145228743553162, + "learning_rate": 1.968384374489194e-05, + "loss": 0.5546954274177551, + "step": 231 + }, + { + "epoch": 0.12719298245614036, + "grad_norm": 0.5804170370101929, + "learning_rate": 1.967930292921306e-05, + "loss": 0.5411268472671509, + "step": 232 + }, + { + "epoch": 0.12774122807017543, + "grad_norm": 0.618490993976593, + "learning_rate": 1.967473026804438e-05, + "loss": 0.5622624158859253, + "step": 233 + }, + { + "epoch": 0.12828947368421054, + "grad_norm": 0.5959983468055725, + "learning_rate": 1.967012577643023e-05, + "loss": 0.5682417154312134, + "step": 234 + }, + { + "epoch": 0.1288377192982456, + "grad_norm": 0.7653847336769104, + "learning_rate": 1.966548946951966e-05, + "loss": 0.563367486000061, + "step": 235 + }, + { + "epoch": 0.12938596491228072, + "grad_norm": 0.5803858637809753, + "learning_rate": 1.9660821362566406e-05, + "loss": 0.5565321445465088, + "step": 236 + }, + { + "epoch": 0.1299342105263158, + "grad_norm": 0.5829041600227356, + "learning_rate": 1.9656121470928818e-05, + "loss": 0.568941593170166, + "step": 237 + }, + { + "epoch": 0.13048245614035087, + "grad_norm": 0.5557065010070801, + "learning_rate": 1.965138981006982e-05, + "loss": 0.5454533100128174, + "step": 238 + }, + { + "epoch": 0.13103070175438597, + "grad_norm": 0.6172096729278564, + "learning_rate": 1.964662639555687e-05, + "loss": 0.5447598695755005, + "step": 239 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 0.5418916344642639, + "learning_rate": 1.9641831243061878e-05, + "loss": 0.5526988506317139, + "step": 240 + }, + { + "epoch": 0.13212719298245615, + "grad_norm": 0.7085768580436707, + "learning_rate": 1.963700436836119e-05, + "loss": 0.5584993958473206, + "step": 241 + }, + { + "epoch": 0.13267543859649122, + "grad_norm": 0.6074377298355103, + "learning_rate": 1.963214578733552e-05, + "loss": 0.564329981803894, + "step": 242 + }, + { + "epoch": 0.13322368421052633, + "grad_norm": 0.6210350394248962, + "learning_rate": 1.9627255515969884e-05, + "loss": 0.5371741056442261, + "step": 243 + }, + { + "epoch": 0.1337719298245614, + "grad_norm": 0.5757085084915161, + "learning_rate": 1.962233357035357e-05, + "loss": 0.5590758323669434, + "step": 244 + }, + { + "epoch": 0.13432017543859648, + "grad_norm": 0.5684416890144348, + "learning_rate": 1.9617379966680077e-05, + "loss": 0.5677273273468018, + "step": 245 + }, + { + "epoch": 0.13486842105263158, + "grad_norm": 0.6423379182815552, + "learning_rate": 1.9612394721247063e-05, + "loss": 0.555820882320404, + "step": 246 + }, + { + "epoch": 0.13541666666666666, + "grad_norm": 0.5935850143432617, + "learning_rate": 1.960737785045628e-05, + "loss": 0.5559649467468262, + "step": 247 + }, + { + "epoch": 0.13596491228070176, + "grad_norm": 0.5883272290229797, + "learning_rate": 1.960232937081354e-05, + "loss": 0.5713253021240234, + "step": 248 + }, + { + "epoch": 0.13651315789473684, + "grad_norm": 0.6154787540435791, + "learning_rate": 1.9597249298928647e-05, + "loss": 0.5542075634002686, + "step": 249 + }, + { + "epoch": 0.13706140350877194, + "grad_norm": 0.5839691162109375, + "learning_rate": 1.9592137651515333e-05, + "loss": 0.5596610307693481, + "step": 250 + }, + { + "epoch": 0.13760964912280702, + "grad_norm": 0.5880065560340881, + "learning_rate": 1.9586994445391236e-05, + "loss": 0.5572192668914795, + "step": 251 + }, + { + "epoch": 0.13815789473684212, + "grad_norm": 0.5767812728881836, + "learning_rate": 1.9581819697477813e-05, + "loss": 0.5562660098075867, + "step": 252 + }, + { + "epoch": 0.1387061403508772, + "grad_norm": 0.57844477891922, + "learning_rate": 1.957661342480029e-05, + "loss": 0.5570098161697388, + "step": 253 + }, + { + "epoch": 0.13925438596491227, + "grad_norm": 0.5954297184944153, + "learning_rate": 1.9571375644487626e-05, + "loss": 0.5651256442070007, + "step": 254 + }, + { + "epoch": 0.13980263157894737, + "grad_norm": 0.6308397054672241, + "learning_rate": 1.956610637377243e-05, + "loss": 0.5574840903282166, + "step": 255 + }, + { + "epoch": 0.14035087719298245, + "grad_norm": 0.5578974485397339, + "learning_rate": 1.9560805629990917e-05, + "loss": 0.5468358993530273, + "step": 256 + }, + { + "epoch": 0.14089912280701755, + "grad_norm": 0.5789008140563965, + "learning_rate": 1.955547343058286e-05, + "loss": 0.5454850196838379, + "step": 257 + }, + { + "epoch": 0.14144736842105263, + "grad_norm": 0.5896825194358826, + "learning_rate": 1.955010979309151e-05, + "loss": 0.5465385317802429, + "step": 258 + }, + { + "epoch": 0.14199561403508773, + "grad_norm": 0.5821171402931213, + "learning_rate": 1.954471473516356e-05, + "loss": 0.5568474531173706, + "step": 259 + }, + { + "epoch": 0.1425438596491228, + "grad_norm": 0.609752893447876, + "learning_rate": 1.953928827454908e-05, + "loss": 0.5320336222648621, + "step": 260 + }, + { + "epoch": 0.14309210526315788, + "grad_norm": 0.5710572600364685, + "learning_rate": 1.953383042910144e-05, + "loss": 0.5608735680580139, + "step": 261 + }, + { + "epoch": 0.14364035087719298, + "grad_norm": 0.5719582438468933, + "learning_rate": 1.9528341216777296e-05, + "loss": 0.5430909991264343, + "step": 262 + }, + { + "epoch": 0.14418859649122806, + "grad_norm": 0.5792491436004639, + "learning_rate": 1.9522820655636475e-05, + "loss": 0.5550388693809509, + "step": 263 + }, + { + "epoch": 0.14473684210526316, + "grad_norm": 0.5587615370750427, + "learning_rate": 1.9517268763841965e-05, + "loss": 0.5363634824752808, + "step": 264 + }, + { + "epoch": 0.14528508771929824, + "grad_norm": 0.6018436551094055, + "learning_rate": 1.9511685559659816e-05, + "loss": 0.542389988899231, + "step": 265 + }, + { + "epoch": 0.14583333333333334, + "grad_norm": 0.5717230439186096, + "learning_rate": 1.950607106145911e-05, + "loss": 0.5556778907775879, + "step": 266 + }, + { + "epoch": 0.14638157894736842, + "grad_norm": 0.5293515920639038, + "learning_rate": 1.950042528771189e-05, + "loss": 0.5441399812698364, + "step": 267 + }, + { + "epoch": 0.14692982456140352, + "grad_norm": 0.5560038089752197, + "learning_rate": 1.9494748256993082e-05, + "loss": 0.5428458452224731, + "step": 268 + }, + { + "epoch": 0.1474780701754386, + "grad_norm": 0.5864049196243286, + "learning_rate": 1.9489039987980466e-05, + "loss": 0.5442084074020386, + "step": 269 + }, + { + "epoch": 0.14802631578947367, + "grad_norm": 0.5393058657646179, + "learning_rate": 1.9483300499454584e-05, + "loss": 0.5405887961387634, + "step": 270 + }, + { + "epoch": 0.14857456140350878, + "grad_norm": 0.5462262630462646, + "learning_rate": 1.9477529810298708e-05, + "loss": 0.5411845445632935, + "step": 271 + }, + { + "epoch": 0.14912280701754385, + "grad_norm": 0.570056676864624, + "learning_rate": 1.9471727939498746e-05, + "loss": 0.5517827868461609, + "step": 272 + }, + { + "epoch": 0.14967105263157895, + "grad_norm": 0.5791317820549011, + "learning_rate": 1.9465894906143206e-05, + "loss": 0.5526028871536255, + "step": 273 + }, + { + "epoch": 0.15021929824561403, + "grad_norm": 0.568991482257843, + "learning_rate": 1.9460030729423116e-05, + "loss": 0.5334166884422302, + "step": 274 + }, + { + "epoch": 0.15076754385964913, + "grad_norm": 0.5713422298431396, + "learning_rate": 1.9454135428631976e-05, + "loss": 0.5433835983276367, + "step": 275 + }, + { + "epoch": 0.1513157894736842, + "grad_norm": 0.5832074880599976, + "learning_rate": 1.9448209023165676e-05, + "loss": 0.5358787178993225, + "step": 276 + }, + { + "epoch": 0.15186403508771928, + "grad_norm": 0.562080979347229, + "learning_rate": 1.944225153252245e-05, + "loss": 0.5675616264343262, + "step": 277 + }, + { + "epoch": 0.1524122807017544, + "grad_norm": 0.5883001089096069, + "learning_rate": 1.9436262976302797e-05, + "loss": 0.5515336990356445, + "step": 278 + }, + { + "epoch": 0.15296052631578946, + "grad_norm": 0.5644806027412415, + "learning_rate": 1.9430243374209432e-05, + "loss": 0.5458020567893982, + "step": 279 + }, + { + "epoch": 0.15350877192982457, + "grad_norm": 0.5437331199645996, + "learning_rate": 1.9424192746047207e-05, + "loss": 0.5555316805839539, + "step": 280 + }, + { + "epoch": 0.15405701754385964, + "grad_norm": 0.571792483329773, + "learning_rate": 1.9418111111723056e-05, + "loss": 0.5325362682342529, + "step": 281 + }, + { + "epoch": 0.15460526315789475, + "grad_norm": 0.5624408721923828, + "learning_rate": 1.9411998491245917e-05, + "loss": 0.5465089082717896, + "step": 282 + }, + { + "epoch": 0.15515350877192982, + "grad_norm": 0.5995544791221619, + "learning_rate": 1.940585490472668e-05, + "loss": 0.5414011478424072, + "step": 283 + }, + { + "epoch": 0.15570175438596492, + "grad_norm": 0.6020653247833252, + "learning_rate": 1.9399680372378122e-05, + "loss": 0.5542804002761841, + "step": 284 + }, + { + "epoch": 0.15625, + "grad_norm": 0.5900858044624329, + "learning_rate": 1.9393474914514817e-05, + "loss": 0.5281364917755127, + "step": 285 + }, + { + "epoch": 0.15679824561403508, + "grad_norm": 0.5869341492652893, + "learning_rate": 1.9387238551553103e-05, + "loss": 0.5379394292831421, + "step": 286 + }, + { + "epoch": 0.15734649122807018, + "grad_norm": 0.6075244545936584, + "learning_rate": 1.9380971304010984e-05, + "loss": 0.5490105152130127, + "step": 287 + }, + { + "epoch": 0.15789473684210525, + "grad_norm": 0.5797610282897949, + "learning_rate": 1.937467319250809e-05, + "loss": 0.5395088195800781, + "step": 288 + }, + { + "epoch": 0.15844298245614036, + "grad_norm": 0.63418048620224, + "learning_rate": 1.9368344237765576e-05, + "loss": 0.5533697605133057, + "step": 289 + }, + { + "epoch": 0.15899122807017543, + "grad_norm": 0.5689693689346313, + "learning_rate": 1.9361984460606093e-05, + "loss": 0.5429133772850037, + "step": 290 + }, + { + "epoch": 0.15953947368421054, + "grad_norm": 0.5940237641334534, + "learning_rate": 1.9355593881953685e-05, + "loss": 0.5357928276062012, + "step": 291 + }, + { + "epoch": 0.1600877192982456, + "grad_norm": 0.5879078507423401, + "learning_rate": 1.9349172522833747e-05, + "loss": 0.5496682524681091, + "step": 292 + }, + { + "epoch": 0.16063596491228072, + "grad_norm": 0.6074959635734558, + "learning_rate": 1.9342720404372938e-05, + "loss": 0.5387532114982605, + "step": 293 + }, + { + "epoch": 0.1611842105263158, + "grad_norm": 0.6277965307235718, + "learning_rate": 1.9336237547799108e-05, + "loss": 0.5568438172340393, + "step": 294 + }, + { + "epoch": 0.16173245614035087, + "grad_norm": 0.5389106869697571, + "learning_rate": 1.9329723974441253e-05, + "loss": 0.5376222133636475, + "step": 295 + }, + { + "epoch": 0.16228070175438597, + "grad_norm": 0.544335126876831, + "learning_rate": 1.932317970572942e-05, + "loss": 0.5440766215324402, + "step": 296 + }, + { + "epoch": 0.16282894736842105, + "grad_norm": 0.6015032529830933, + "learning_rate": 1.9316604763194644e-05, + "loss": 0.545868456363678, + "step": 297 + }, + { + "epoch": 0.16337719298245615, + "grad_norm": 0.5448634624481201, + "learning_rate": 1.9309999168468887e-05, + "loss": 0.5508019328117371, + "step": 298 + }, + { + "epoch": 0.16392543859649122, + "grad_norm": 0.5765957832336426, + "learning_rate": 1.930336294328495e-05, + "loss": 0.5424585342407227, + "step": 299 + }, + { + "epoch": 0.16447368421052633, + "grad_norm": 0.5915053486824036, + "learning_rate": 1.9296696109476417e-05, + "loss": 0.5394760966300964, + "step": 300 + }, + { + "epoch": 0.1650219298245614, + "grad_norm": 0.579708456993103, + "learning_rate": 1.9289998688977573e-05, + "loss": 0.5493593215942383, + "step": 301 + }, + { + "epoch": 0.16557017543859648, + "grad_norm": 0.5751671195030212, + "learning_rate": 1.928327070382333e-05, + "loss": 0.5433315634727478, + "step": 302 + }, + { + "epoch": 0.16611842105263158, + "grad_norm": 0.5499085783958435, + "learning_rate": 1.927651217614917e-05, + "loss": 0.5484105348587036, + "step": 303 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5443341732025146, + "learning_rate": 1.926972312819105e-05, + "loss": 0.5325586199760437, + "step": 304 + }, + { + "epoch": 0.16721491228070176, + "grad_norm": 0.5641672015190125, + "learning_rate": 1.9262903582285352e-05, + "loss": 0.5600139498710632, + "step": 305 + }, + { + "epoch": 0.16776315789473684, + "grad_norm": 0.5435248613357544, + "learning_rate": 1.9256053560868793e-05, + "loss": 0.5464034080505371, + "step": 306 + }, + { + "epoch": 0.16831140350877194, + "grad_norm": 0.5279967784881592, + "learning_rate": 1.9249173086478352e-05, + "loss": 0.5260881781578064, + "step": 307 + }, + { + "epoch": 0.16885964912280702, + "grad_norm": 0.533305287361145, + "learning_rate": 1.9242262181751207e-05, + "loss": 0.5478596091270447, + "step": 308 + }, + { + "epoch": 0.16940789473684212, + "grad_norm": 0.543569028377533, + "learning_rate": 1.9235320869424656e-05, + "loss": 0.5306735038757324, + "step": 309 + }, + { + "epoch": 0.1699561403508772, + "grad_norm": 0.5425512790679932, + "learning_rate": 1.9228349172336023e-05, + "loss": 0.5532375574111938, + "step": 310 + }, + { + "epoch": 0.17050438596491227, + "grad_norm": 0.5322792530059814, + "learning_rate": 1.9221347113422625e-05, + "loss": 0.5421377420425415, + "step": 311 + }, + { + "epoch": 0.17105263157894737, + "grad_norm": 0.5279988646507263, + "learning_rate": 1.9214314715721646e-05, + "loss": 0.544383704662323, + "step": 312 + }, + { + "epoch": 0.17160087719298245, + "grad_norm": 0.5296905040740967, + "learning_rate": 1.9207252002370103e-05, + "loss": 0.5539359450340271, + "step": 313 + }, + { + "epoch": 0.17214912280701755, + "grad_norm": 0.5464092493057251, + "learning_rate": 1.920015899660475e-05, + "loss": 0.5502333641052246, + "step": 314 + }, + { + "epoch": 0.17269736842105263, + "grad_norm": 0.5278061628341675, + "learning_rate": 1.9193035721762002e-05, + "loss": 0.541316032409668, + "step": 315 + }, + { + "epoch": 0.17324561403508773, + "grad_norm": 0.5732148289680481, + "learning_rate": 1.918588220127786e-05, + "loss": 0.5292676687240601, + "step": 316 + }, + { + "epoch": 0.1737938596491228, + "grad_norm": 0.5602945685386658, + "learning_rate": 1.9178698458687844e-05, + "loss": 0.5401516556739807, + "step": 317 + }, + { + "epoch": 0.17434210526315788, + "grad_norm": 0.581879198551178, + "learning_rate": 1.9171484517626893e-05, + "loss": 0.5478727221488953, + "step": 318 + }, + { + "epoch": 0.17489035087719298, + "grad_norm": 0.49734967947006226, + "learning_rate": 1.9164240401829308e-05, + "loss": 0.5426466464996338, + "step": 319 + }, + { + "epoch": 0.17543859649122806, + "grad_norm": 0.6240032315254211, + "learning_rate": 1.915696613512867e-05, + "loss": 0.5309844017028809, + "step": 320 + }, + { + "epoch": 0.17598684210526316, + "grad_norm": 0.543289303779602, + "learning_rate": 1.9149661741457755e-05, + "loss": 0.52463698387146, + "step": 321 + }, + { + "epoch": 0.17653508771929824, + "grad_norm": 0.5531445145606995, + "learning_rate": 1.914232724484845e-05, + "loss": 0.5420910120010376, + "step": 322 + }, + { + "epoch": 0.17708333333333334, + "grad_norm": 0.5311645865440369, + "learning_rate": 1.9134962669431705e-05, + "loss": 0.5336942672729492, + "step": 323 + }, + { + "epoch": 0.17763157894736842, + "grad_norm": 0.5692971348762512, + "learning_rate": 1.912756803943741e-05, + "loss": 0.5401374697685242, + "step": 324 + }, + { + "epoch": 0.17817982456140352, + "grad_norm": 0.5489525198936462, + "learning_rate": 1.9120143379194337e-05, + "loss": 0.5350661277770996, + "step": 325 + }, + { + "epoch": 0.1787280701754386, + "grad_norm": 0.5243253707885742, + "learning_rate": 1.911268871313007e-05, + "loss": 0.5375982522964478, + "step": 326 + }, + { + "epoch": 0.17927631578947367, + "grad_norm": 0.5977224111557007, + "learning_rate": 1.910520406577091e-05, + "loss": 0.5465099215507507, + "step": 327 + }, + { + "epoch": 0.17982456140350878, + "grad_norm": 0.5903772711753845, + "learning_rate": 1.9097689461741805e-05, + "loss": 0.5511510372161865, + "step": 328 + }, + { + "epoch": 0.18037280701754385, + "grad_norm": 0.678267240524292, + "learning_rate": 1.909014492576624e-05, + "loss": 0.528895378112793, + "step": 329 + }, + { + "epoch": 0.18092105263157895, + "grad_norm": 0.5741175413131714, + "learning_rate": 1.9082570482666208e-05, + "loss": 0.5306143760681152, + "step": 330 + }, + { + "epoch": 0.18146929824561403, + "grad_norm": 0.5529358983039856, + "learning_rate": 1.9074966157362072e-05, + "loss": 0.5563026666641235, + "step": 331 + }, + { + "epoch": 0.18201754385964913, + "grad_norm": 0.5264307856559753, + "learning_rate": 1.9067331974872524e-05, + "loss": 0.5446305871009827, + "step": 332 + }, + { + "epoch": 0.1825657894736842, + "grad_norm": 0.5596775412559509, + "learning_rate": 1.9059667960314486e-05, + "loss": 0.5350501537322998, + "step": 333 + }, + { + "epoch": 0.18311403508771928, + "grad_norm": 0.5849246978759766, + "learning_rate": 1.9051974138903028e-05, + "loss": 0.5333458781242371, + "step": 334 + }, + { + "epoch": 0.1836622807017544, + "grad_norm": 0.5496376752853394, + "learning_rate": 1.9044250535951285e-05, + "loss": 0.5393158793449402, + "step": 335 + }, + { + "epoch": 0.18421052631578946, + "grad_norm": 0.5456401109695435, + "learning_rate": 1.903649717687037e-05, + "loss": 0.5372017025947571, + "step": 336 + }, + { + "epoch": 0.18475877192982457, + "grad_norm": 0.5497374534606934, + "learning_rate": 1.902871408716931e-05, + "loss": 0.5300045609474182, + "step": 337 + }, + { + "epoch": 0.18530701754385964, + "grad_norm": 0.5420397520065308, + "learning_rate": 1.902090129245494e-05, + "loss": 0.5352155566215515, + "step": 338 + }, + { + "epoch": 0.18585526315789475, + "grad_norm": 0.5222429037094116, + "learning_rate": 1.9013058818431817e-05, + "loss": 0.5446464419364929, + "step": 339 + }, + { + "epoch": 0.18640350877192982, + "grad_norm": 0.558821976184845, + "learning_rate": 1.9005186690902157e-05, + "loss": 0.5501251220703125, + "step": 340 + }, + { + "epoch": 0.18695175438596492, + "grad_norm": 0.5371629595756531, + "learning_rate": 1.8997284935765735e-05, + "loss": 0.5312443971633911, + "step": 341 + }, + { + "epoch": 0.1875, + "grad_norm": 0.6179410815238953, + "learning_rate": 1.89893535790198e-05, + "loss": 0.5460267066955566, + "step": 342 + }, + { + "epoch": 0.18804824561403508, + "grad_norm": 0.5250634551048279, + "learning_rate": 1.8981392646758996e-05, + "loss": 0.5355827808380127, + "step": 343 + }, + { + "epoch": 0.18859649122807018, + "grad_norm": 0.5826337933540344, + "learning_rate": 1.897340216517527e-05, + "loss": 0.5478694438934326, + "step": 344 + }, + { + "epoch": 0.18914473684210525, + "grad_norm": 0.5900570750236511, + "learning_rate": 1.8965382160557782e-05, + "loss": 0.5379772186279297, + "step": 345 + }, + { + "epoch": 0.18969298245614036, + "grad_norm": 0.5510210990905762, + "learning_rate": 1.8957332659292834e-05, + "loss": 0.5222848653793335, + "step": 346 + }, + { + "epoch": 0.19024122807017543, + "grad_norm": 0.6057240962982178, + "learning_rate": 1.8949253687863774e-05, + "loss": 0.5323337316513062, + "step": 347 + }, + { + "epoch": 0.19078947368421054, + "grad_norm": 0.5485894680023193, + "learning_rate": 1.8941145272850902e-05, + "loss": 0.5386132001876831, + "step": 348 + }, + { + "epoch": 0.1913377192982456, + "grad_norm": 0.585307776927948, + "learning_rate": 1.8933007440931385e-05, + "loss": 0.5313419103622437, + "step": 349 + }, + { + "epoch": 0.19188596491228072, + "grad_norm": 0.551096498966217, + "learning_rate": 1.892484021887919e-05, + "loss": 0.5472394227981567, + "step": 350 + }, + { + "epoch": 0.1924342105263158, + "grad_norm": 0.5347589254379272, + "learning_rate": 1.8916643633564966e-05, + "loss": 0.5171635746955872, + "step": 351 + }, + { + "epoch": 0.19298245614035087, + "grad_norm": 0.5558242201805115, + "learning_rate": 1.8908417711955973e-05, + "loss": 0.529994547367096, + "step": 352 + }, + { + "epoch": 0.19353070175438597, + "grad_norm": 0.5679911971092224, + "learning_rate": 1.8900162481115986e-05, + "loss": 0.5229799151420593, + "step": 353 + }, + { + "epoch": 0.19407894736842105, + "grad_norm": 0.5330626964569092, + "learning_rate": 1.8891877968205213e-05, + "loss": 0.54994136095047, + "step": 354 + }, + { + "epoch": 0.19462719298245615, + "grad_norm": 0.5267235040664673, + "learning_rate": 1.8883564200480208e-05, + "loss": 0.5337533950805664, + "step": 355 + }, + { + "epoch": 0.19517543859649122, + "grad_norm": 0.5202456712722778, + "learning_rate": 1.8875221205293755e-05, + "loss": 0.5240612626075745, + "step": 356 + }, + { + "epoch": 0.19572368421052633, + "grad_norm": 0.550948977470398, + "learning_rate": 1.886684901009482e-05, + "loss": 0.525791585445404, + "step": 357 + }, + { + "epoch": 0.1962719298245614, + "grad_norm": 0.508272647857666, + "learning_rate": 1.8858447642428426e-05, + "loss": 0.5174123048782349, + "step": 358 + }, + { + "epoch": 0.19682017543859648, + "grad_norm": 0.565328061580658, + "learning_rate": 1.885001712993558e-05, + "loss": 0.5382243394851685, + "step": 359 + }, + { + "epoch": 0.19736842105263158, + "grad_norm": 0.5756270885467529, + "learning_rate": 1.8841557500353175e-05, + "loss": 0.5362300276756287, + "step": 360 + }, + { + "epoch": 0.19791666666666666, + "grad_norm": 0.54173344373703, + "learning_rate": 1.8833068781513902e-05, + "loss": 0.5481382012367249, + "step": 361 + }, + { + "epoch": 0.19846491228070176, + "grad_norm": 0.5440255999565125, + "learning_rate": 1.882455100134616e-05, + "loss": 0.5397817492485046, + "step": 362 + }, + { + "epoch": 0.19901315789473684, + "grad_norm": 0.5340250134468079, + "learning_rate": 1.8816004187873956e-05, + "loss": 0.5356112718582153, + "step": 363 + }, + { + "epoch": 0.19956140350877194, + "grad_norm": 0.5714414119720459, + "learning_rate": 1.8807428369216822e-05, + "loss": 0.5300136804580688, + "step": 364 + }, + { + "epoch": 0.20010964912280702, + "grad_norm": 0.5363687872886658, + "learning_rate": 1.879882357358972e-05, + "loss": 0.5302585363388062, + "step": 365 + }, + { + "epoch": 0.20065789473684212, + "grad_norm": 0.5832900404930115, + "learning_rate": 1.8790189829302943e-05, + "loss": 0.5480587482452393, + "step": 366 + }, + { + "epoch": 0.2012061403508772, + "grad_norm": 0.557942807674408, + "learning_rate": 1.878152716476203e-05, + "loss": 0.5448884963989258, + "step": 367 + }, + { + "epoch": 0.20175438596491227, + "grad_norm": 0.5794870853424072, + "learning_rate": 1.8772835608467673e-05, + "loss": 0.534662663936615, + "step": 368 + }, + { + "epoch": 0.20230263157894737, + "grad_norm": 0.5900275707244873, + "learning_rate": 1.876411518901561e-05, + "loss": 0.5280211567878723, + "step": 369 + }, + { + "epoch": 0.20285087719298245, + "grad_norm": 0.5694456696510315, + "learning_rate": 1.875536593509655e-05, + "loss": 0.5381162166595459, + "step": 370 + }, + { + "epoch": 0.20339912280701755, + "grad_norm": 0.5804063677787781, + "learning_rate": 1.874658787549607e-05, + "loss": 0.5450135469436646, + "step": 371 + }, + { + "epoch": 0.20394736842105263, + "grad_norm": 0.5541572570800781, + "learning_rate": 1.87377810390945e-05, + "loss": 0.5326411724090576, + "step": 372 + }, + { + "epoch": 0.20449561403508773, + "grad_norm": 0.5617303252220154, + "learning_rate": 1.8728945454866883e-05, + "loss": 0.5263036489486694, + "step": 373 + }, + { + "epoch": 0.2050438596491228, + "grad_norm": 0.5810173153877258, + "learning_rate": 1.872008115188281e-05, + "loss": 0.5250884890556335, + "step": 374 + }, + { + "epoch": 0.20559210526315788, + "grad_norm": 0.5345124006271362, + "learning_rate": 1.8711188159306374e-05, + "loss": 0.5363500714302063, + "step": 375 + }, + { + "epoch": 0.20614035087719298, + "grad_norm": 0.5278041958808899, + "learning_rate": 1.8702266506396055e-05, + "loss": 0.5263749361038208, + "step": 376 + }, + { + "epoch": 0.20668859649122806, + "grad_norm": 0.5594993233680725, + "learning_rate": 1.8693316222504635e-05, + "loss": 0.5244511961936951, + "step": 377 + }, + { + "epoch": 0.20723684210526316, + "grad_norm": 0.4992246925830841, + "learning_rate": 1.868433733707908e-05, + "loss": 0.5366593599319458, + "step": 378 + }, + { + "epoch": 0.20778508771929824, + "grad_norm": 0.59859699010849, + "learning_rate": 1.867532987966047e-05, + "loss": 0.5325961112976074, + "step": 379 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.5476097464561462, + "learning_rate": 1.8666293879883875e-05, + "loss": 0.5390352606773376, + "step": 380 + }, + { + "epoch": 0.20888157894736842, + "grad_norm": 0.5338640213012695, + "learning_rate": 1.8657229367478286e-05, + "loss": 0.5377339720726013, + "step": 381 + }, + { + "epoch": 0.20942982456140352, + "grad_norm": 0.5540474057197571, + "learning_rate": 1.864813637226649e-05, + "loss": 0.5436443090438843, + "step": 382 + }, + { + "epoch": 0.2099780701754386, + "grad_norm": 0.5551073551177979, + "learning_rate": 1.8639014924164998e-05, + "loss": 0.5364835858345032, + "step": 383 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.5687278509140015, + "learning_rate": 1.862986505318391e-05, + "loss": 0.5310265421867371, + "step": 384 + }, + { + "epoch": 0.21107456140350878, + "grad_norm": 0.5749037265777588, + "learning_rate": 1.8620686789426864e-05, + "loss": 0.534489631652832, + "step": 385 + }, + { + "epoch": 0.21162280701754385, + "grad_norm": 0.5327162742614746, + "learning_rate": 1.8611480163090897e-05, + "loss": 0.5294643640518188, + "step": 386 + }, + { + "epoch": 0.21217105263157895, + "grad_norm": 0.5638524889945984, + "learning_rate": 1.8602245204466363e-05, + "loss": 0.5338449478149414, + "step": 387 + }, + { + "epoch": 0.21271929824561403, + "grad_norm": 0.5824586749076843, + "learning_rate": 1.859298194393683e-05, + "loss": 0.5351516008377075, + "step": 388 + }, + { + "epoch": 0.21326754385964913, + "grad_norm": 0.5394695401191711, + "learning_rate": 1.8583690411978993e-05, + "loss": 0.5316178202629089, + "step": 389 + }, + { + "epoch": 0.2138157894736842, + "grad_norm": 0.5534507632255554, + "learning_rate": 1.857437063916254e-05, + "loss": 0.5347613096237183, + "step": 390 + }, + { + "epoch": 0.21436403508771928, + "grad_norm": 1.921692132949829, + "learning_rate": 1.856502265615009e-05, + "loss": 0.5341858863830566, + "step": 391 + }, + { + "epoch": 0.2149122807017544, + "grad_norm": 0.5521547198295593, + "learning_rate": 1.8555646493697075e-05, + "loss": 0.5187911987304688, + "step": 392 + }, + { + "epoch": 0.21546052631578946, + "grad_norm": 0.5613106489181519, + "learning_rate": 1.8546242182651627e-05, + "loss": 0.5340178608894348, + "step": 393 + }, + { + "epoch": 0.21600877192982457, + "grad_norm": 0.5031777024269104, + "learning_rate": 1.8536809753954502e-05, + "loss": 0.5230652093887329, + "step": 394 + }, + { + "epoch": 0.21655701754385964, + "grad_norm": 0.530637264251709, + "learning_rate": 1.852734923863895e-05, + "loss": 0.5346648693084717, + "step": 395 + }, + { + "epoch": 0.21710526315789475, + "grad_norm": 0.543538510799408, + "learning_rate": 1.851786066783065e-05, + "loss": 0.539505124092102, + "step": 396 + }, + { + "epoch": 0.21765350877192982, + "grad_norm": 0.5140752196311951, + "learning_rate": 1.850834407274756e-05, + "loss": 0.5221940279006958, + "step": 397 + }, + { + "epoch": 0.21820175438596492, + "grad_norm": 0.5462019443511963, + "learning_rate": 1.8498799484699856e-05, + "loss": 0.5088600516319275, + "step": 398 + }, + { + "epoch": 0.21875, + "grad_norm": 0.5661113858222961, + "learning_rate": 1.8489226935089805e-05, + "loss": 0.5322563648223877, + "step": 399 + }, + { + "epoch": 0.21929824561403508, + "grad_norm": 0.5222601890563965, + "learning_rate": 1.847962645541168e-05, + "loss": 0.5383967161178589, + "step": 400 + }, + { + "epoch": 0.21984649122807018, + "grad_norm": 0.5328034162521362, + "learning_rate": 1.8469998077251623e-05, + "loss": 0.5342406630516052, + "step": 401 + }, + { + "epoch": 0.22039473684210525, + "grad_norm": 0.5401366949081421, + "learning_rate": 1.846034183228759e-05, + "loss": 0.5323405861854553, + "step": 402 + }, + { + "epoch": 0.22094298245614036, + "grad_norm": 0.5516491532325745, + "learning_rate": 1.8450657752289204e-05, + "loss": 0.5423470735549927, + "step": 403 + }, + { + "epoch": 0.22149122807017543, + "grad_norm": 0.5484375953674316, + "learning_rate": 1.8440945869117676e-05, + "loss": 0.5268591642379761, + "step": 404 + }, + { + "epoch": 0.22203947368421054, + "grad_norm": 0.542039155960083, + "learning_rate": 1.843120621472568e-05, + "loss": 0.5249344706535339, + "step": 405 + }, + { + "epoch": 0.2225877192982456, + "grad_norm": 0.5442968010902405, + "learning_rate": 1.842143882115727e-05, + "loss": 0.5164295434951782, + "step": 406 + }, + { + "epoch": 0.22313596491228072, + "grad_norm": 0.5494210124015808, + "learning_rate": 1.841164372054776e-05, + "loss": 0.5340598821640015, + "step": 407 + }, + { + "epoch": 0.2236842105263158, + "grad_norm": 0.5597900748252869, + "learning_rate": 1.840182094512362e-05, + "loss": 0.5296148061752319, + "step": 408 + }, + { + "epoch": 0.22423245614035087, + "grad_norm": 0.5344291925430298, + "learning_rate": 1.839197052720237e-05, + "loss": 0.5298682451248169, + "step": 409 + }, + { + "epoch": 0.22478070175438597, + "grad_norm": 0.5179139375686646, + "learning_rate": 1.8382092499192484e-05, + "loss": 0.5287031531333923, + "step": 410 + }, + { + "epoch": 0.22532894736842105, + "grad_norm": 0.5144736170768738, + "learning_rate": 1.8372186893593266e-05, + "loss": 0.5490144491195679, + "step": 411 + }, + { + "epoch": 0.22587719298245615, + "grad_norm": 0.543007493019104, + "learning_rate": 1.8362253742994757e-05, + "loss": 0.5291949510574341, + "step": 412 + }, + { + "epoch": 0.22642543859649122, + "grad_norm": 0.5593482851982117, + "learning_rate": 1.835229308007762e-05, + "loss": 0.5276323556900024, + "step": 413 + }, + { + "epoch": 0.22697368421052633, + "grad_norm": 0.5211661458015442, + "learning_rate": 1.8342304937613034e-05, + "loss": 0.5092172026634216, + "step": 414 + }, + { + "epoch": 0.2275219298245614, + "grad_norm": 0.5620855093002319, + "learning_rate": 1.8332289348462592e-05, + "loss": 0.5306937098503113, + "step": 415 + }, + { + "epoch": 0.22807017543859648, + "grad_norm": 0.4984802007675171, + "learning_rate": 1.8322246345578182e-05, + "loss": 0.5322443246841431, + "step": 416 + }, + { + "epoch": 0.22861842105263158, + "grad_norm": 0.5562717914581299, + "learning_rate": 1.8312175962001894e-05, + "loss": 0.5333069562911987, + "step": 417 + }, + { + "epoch": 0.22916666666666666, + "grad_norm": 0.5334926247596741, + "learning_rate": 1.8302078230865893e-05, + "loss": 0.5284031629562378, + "step": 418 + }, + { + "epoch": 0.22971491228070176, + "grad_norm": 0.5168144702911377, + "learning_rate": 1.829195318539233e-05, + "loss": 0.5266040563583374, + "step": 419 + }, + { + "epoch": 0.23026315789473684, + "grad_norm": 0.5674131512641907, + "learning_rate": 1.8281800858893203e-05, + "loss": 0.5325175523757935, + "step": 420 + }, + { + "epoch": 0.23081140350877194, + "grad_norm": 0.5394864082336426, + "learning_rate": 1.8271621284770282e-05, + "loss": 0.5242091417312622, + "step": 421 + }, + { + "epoch": 0.23135964912280702, + "grad_norm": 0.5389179587364197, + "learning_rate": 1.8261414496514985e-05, + "loss": 0.5218663215637207, + "step": 422 + }, + { + "epoch": 0.23190789473684212, + "grad_norm": 0.5189980268478394, + "learning_rate": 1.8251180527708256e-05, + "loss": 0.5272551774978638, + "step": 423 + }, + { + "epoch": 0.2324561403508772, + "grad_norm": 0.5364722013473511, + "learning_rate": 1.8240919412020467e-05, + "loss": 0.5427874326705933, + "step": 424 + }, + { + "epoch": 0.23300438596491227, + "grad_norm": 0.5321043729782104, + "learning_rate": 1.8230631183211308e-05, + "loss": 0.5246886014938354, + "step": 425 + }, + { + "epoch": 0.23355263157894737, + "grad_norm": 0.5318389534950256, + "learning_rate": 1.8220315875129674e-05, + "loss": 0.5147177577018738, + "step": 426 + }, + { + "epoch": 0.23410087719298245, + "grad_norm": 0.5194035172462463, + "learning_rate": 1.8209973521713545e-05, + "loss": 0.5231367349624634, + "step": 427 + }, + { + "epoch": 0.23464912280701755, + "grad_norm": 0.5205478668212891, + "learning_rate": 1.8199604156989895e-05, + "loss": 0.5181112885475159, + "step": 428 + }, + { + "epoch": 0.23519736842105263, + "grad_norm": 0.5329371094703674, + "learning_rate": 1.818920781507455e-05, + "loss": 0.526331901550293, + "step": 429 + }, + { + "epoch": 0.23574561403508773, + "grad_norm": 0.5039587020874023, + "learning_rate": 1.817878453017211e-05, + "loss": 0.5373213887214661, + "step": 430 + }, + { + "epoch": 0.2362938596491228, + "grad_norm": 0.5267012715339661, + "learning_rate": 1.8168334336575802e-05, + "loss": 0.5282840728759766, + "step": 431 + }, + { + "epoch": 0.23684210526315788, + "grad_norm": 0.5210471749305725, + "learning_rate": 1.81578572686674e-05, + "loss": 0.5187563896179199, + "step": 432 + }, + { + "epoch": 0.23739035087719298, + "grad_norm": 0.5142402648925781, + "learning_rate": 1.8147353360917083e-05, + "loss": 0.5163940787315369, + "step": 433 + }, + { + "epoch": 0.23793859649122806, + "grad_norm": 0.5326563119888306, + "learning_rate": 1.813682264788334e-05, + "loss": 0.5173913836479187, + "step": 434 + }, + { + "epoch": 0.23848684210526316, + "grad_norm": 0.5267801880836487, + "learning_rate": 1.812626516421285e-05, + "loss": 0.5498681664466858, + "step": 435 + }, + { + "epoch": 0.23903508771929824, + "grad_norm": 0.52644944190979, + "learning_rate": 1.8115680944640384e-05, + "loss": 0.5258373022079468, + "step": 436 + }, + { + "epoch": 0.23958333333333334, + "grad_norm": 0.5588622093200684, + "learning_rate": 1.8105070023988646e-05, + "loss": 0.5311025977134705, + "step": 437 + }, + { + "epoch": 0.24013157894736842, + "grad_norm": 0.585602879524231, + "learning_rate": 1.809443243716821e-05, + "loss": 0.521843671798706, + "step": 438 + }, + { + "epoch": 0.24067982456140352, + "grad_norm": 0.5403777956962585, + "learning_rate": 1.808376821917738e-05, + "loss": 0.527265727519989, + "step": 439 + }, + { + "epoch": 0.2412280701754386, + "grad_norm": 0.5174070596694946, + "learning_rate": 1.8073077405102074e-05, + "loss": 0.5219606161117554, + "step": 440 + }, + { + "epoch": 0.24177631578947367, + "grad_norm": 0.5555534362792969, + "learning_rate": 1.806236003011571e-05, + "loss": 0.5224829912185669, + "step": 441 + }, + { + "epoch": 0.24232456140350878, + "grad_norm": 0.5561994910240173, + "learning_rate": 1.8051616129479102e-05, + "loss": 0.5335409641265869, + "step": 442 + }, + { + "epoch": 0.24287280701754385, + "grad_norm": 0.5374264717102051, + "learning_rate": 1.804084573854033e-05, + "loss": 0.5216151475906372, + "step": 443 + }, + { + "epoch": 0.24342105263157895, + "grad_norm": 0.5270918607711792, + "learning_rate": 1.803004889273463e-05, + "loss": 0.5328495502471924, + "step": 444 + }, + { + "epoch": 0.24396929824561403, + "grad_norm": 0.5628570914268494, + "learning_rate": 1.8019225627584275e-05, + "loss": 0.5127445459365845, + "step": 445 + }, + { + "epoch": 0.24451754385964913, + "grad_norm": 0.5093899965286255, + "learning_rate": 1.8008375978698452e-05, + "loss": 0.5272922515869141, + "step": 446 + }, + { + "epoch": 0.2450657894736842, + "grad_norm": 0.5224986672401428, + "learning_rate": 1.799749998177317e-05, + "loss": 0.5233873128890991, + "step": 447 + }, + { + "epoch": 0.24561403508771928, + "grad_norm": 0.6797884106636047, + "learning_rate": 1.7986597672591113e-05, + "loss": 0.5107884407043457, + "step": 448 + }, + { + "epoch": 0.2461622807017544, + "grad_norm": 0.5089065432548523, + "learning_rate": 1.797566908702153e-05, + "loss": 0.521450936794281, + "step": 449 + }, + { + "epoch": 0.24671052631578946, + "grad_norm": 0.5519887208938599, + "learning_rate": 1.7964714261020127e-05, + "loss": 0.5311073660850525, + "step": 450 + }, + { + "epoch": 0.24725877192982457, + "grad_norm": 0.5152913928031921, + "learning_rate": 1.7953733230628942e-05, + "loss": 0.5231930613517761, + "step": 451 + }, + { + "epoch": 0.24780701754385964, + "grad_norm": 0.533416211605072, + "learning_rate": 1.794272603197623e-05, + "loss": 0.5366246700286865, + "step": 452 + }, + { + "epoch": 0.24835526315789475, + "grad_norm": 0.5098294019699097, + "learning_rate": 1.7931692701276332e-05, + "loss": 0.5331299304962158, + "step": 453 + }, + { + "epoch": 0.24890350877192982, + "grad_norm": 0.552507758140564, + "learning_rate": 1.7920633274829577e-05, + "loss": 0.5277151465415955, + "step": 454 + }, + { + "epoch": 0.24945175438596492, + "grad_norm": 0.5532183647155762, + "learning_rate": 1.7909547789022134e-05, + "loss": 0.540197491645813, + "step": 455 + }, + { + "epoch": 0.25, + "grad_norm": 0.5785213112831116, + "learning_rate": 1.789843628032593e-05, + "loss": 0.5247042179107666, + "step": 456 + }, + { + "epoch": 0.2505482456140351, + "grad_norm": 0.5596621632575989, + "learning_rate": 1.7887298785298498e-05, + "loss": 0.5302076935768127, + "step": 457 + }, + { + "epoch": 0.25109649122807015, + "grad_norm": 0.5846956372261047, + "learning_rate": 1.787613534058286e-05, + "loss": 0.5140069723129272, + "step": 458 + }, + { + "epoch": 0.25164473684210525, + "grad_norm": 0.5351735949516296, + "learning_rate": 1.7864945982907426e-05, + "loss": 0.5310229063034058, + "step": 459 + }, + { + "epoch": 0.25219298245614036, + "grad_norm": 0.5385695099830627, + "learning_rate": 1.7853730749085856e-05, + "loss": 0.5263150930404663, + "step": 460 + }, + { + "epoch": 0.25274122807017546, + "grad_norm": 0.5790038704872131, + "learning_rate": 1.784248967601695e-05, + "loss": 0.5242854952812195, + "step": 461 + }, + { + "epoch": 0.2532894736842105, + "grad_norm": 0.5039320588111877, + "learning_rate": 1.7831222800684504e-05, + "loss": 0.5093971490859985, + "step": 462 + }, + { + "epoch": 0.2538377192982456, + "grad_norm": 0.5355396866798401, + "learning_rate": 1.7819930160157228e-05, + "loss": 0.5264978408813477, + "step": 463 + }, + { + "epoch": 0.2543859649122807, + "grad_norm": 0.5939465761184692, + "learning_rate": 1.7808611791588584e-05, + "loss": 0.5278671383857727, + "step": 464 + }, + { + "epoch": 0.25493421052631576, + "grad_norm": 0.5435965061187744, + "learning_rate": 1.779726773221669e-05, + "loss": 0.5312660932540894, + "step": 465 + }, + { + "epoch": 0.25548245614035087, + "grad_norm": 0.5324194431304932, + "learning_rate": 1.7785898019364176e-05, + "loss": 0.5131596922874451, + "step": 466 + }, + { + "epoch": 0.25603070175438597, + "grad_norm": 0.5465553402900696, + "learning_rate": 1.7774502690438097e-05, + "loss": 0.5083457231521606, + "step": 467 + }, + { + "epoch": 0.2565789473684211, + "grad_norm": 0.5563082695007324, + "learning_rate": 1.776308178292976e-05, + "loss": 0.5215815901756287, + "step": 468 + }, + { + "epoch": 0.2571271929824561, + "grad_norm": 0.5333871245384216, + "learning_rate": 1.7751635334414642e-05, + "loss": 0.5281736254692078, + "step": 469 + }, + { + "epoch": 0.2576754385964912, + "grad_norm": 0.5244386196136475, + "learning_rate": 1.7740163382552248e-05, + "loss": 0.5255697965621948, + "step": 470 + }, + { + "epoch": 0.2582236842105263, + "grad_norm": 0.5395222306251526, + "learning_rate": 1.7728665965085995e-05, + "loss": 0.5342221856117249, + "step": 471 + }, + { + "epoch": 0.25877192982456143, + "grad_norm": 0.510644793510437, + "learning_rate": 1.7717143119843078e-05, + "loss": 0.5168928503990173, + "step": 472 + }, + { + "epoch": 0.2593201754385965, + "grad_norm": 0.6781021952629089, + "learning_rate": 1.7705594884734342e-05, + "loss": 0.5226818323135376, + "step": 473 + }, + { + "epoch": 0.2598684210526316, + "grad_norm": 0.5210604071617126, + "learning_rate": 1.769402129775419e-05, + "loss": 0.5066401362419128, + "step": 474 + }, + { + "epoch": 0.2604166666666667, + "grad_norm": 0.5117266774177551, + "learning_rate": 1.768242239698041e-05, + "loss": 0.514409065246582, + "step": 475 + }, + { + "epoch": 0.26096491228070173, + "grad_norm": 0.9269617795944214, + "learning_rate": 1.7670798220574093e-05, + "loss": 0.5230674147605896, + "step": 476 + }, + { + "epoch": 0.26151315789473684, + "grad_norm": 0.620804488658905, + "learning_rate": 1.7659148806779475e-05, + "loss": 0.5233884453773499, + "step": 477 + }, + { + "epoch": 0.26206140350877194, + "grad_norm": 0.5338650941848755, + "learning_rate": 1.764747419392383e-05, + "loss": 0.5138469934463501, + "step": 478 + }, + { + "epoch": 0.26260964912280704, + "grad_norm": 0.548736035823822, + "learning_rate": 1.7635774420417336e-05, + "loss": 0.5272300243377686, + "step": 479 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 0.5450613498687744, + "learning_rate": 1.7624049524752954e-05, + "loss": 0.5213850736618042, + "step": 480 + }, + { + "epoch": 0.2637061403508772, + "grad_norm": 0.5711430907249451, + "learning_rate": 1.76122995455063e-05, + "loss": 0.5210334062576294, + "step": 481 + }, + { + "epoch": 0.2642543859649123, + "grad_norm": 0.5337658524513245, + "learning_rate": 1.7600524521335512e-05, + "loss": 0.5187428593635559, + "step": 482 + }, + { + "epoch": 0.26480263157894735, + "grad_norm": 0.5508308410644531, + "learning_rate": 1.7588724490981125e-05, + "loss": 0.5229613780975342, + "step": 483 + }, + { + "epoch": 0.26535087719298245, + "grad_norm": 0.515425443649292, + "learning_rate": 1.7576899493265952e-05, + "loss": 0.5202820897102356, + "step": 484 + }, + { + "epoch": 0.26589912280701755, + "grad_norm": 0.5129563808441162, + "learning_rate": 1.756504956709495e-05, + "loss": 0.5205594897270203, + "step": 485 + }, + { + "epoch": 0.26644736842105265, + "grad_norm": 0.5160803198814392, + "learning_rate": 1.755317475145509e-05, + "loss": 0.5141991376876831, + "step": 486 + }, + { + "epoch": 0.2669956140350877, + "grad_norm": 0.5115503072738647, + "learning_rate": 1.754127508541523e-05, + "loss": 0.5174502730369568, + "step": 487 + }, + { + "epoch": 0.2675438596491228, + "grad_norm": 0.5334891080856323, + "learning_rate": 1.7529350608125985e-05, + "loss": 0.5169588327407837, + "step": 488 + }, + { + "epoch": 0.2680921052631579, + "grad_norm": 0.5355765223503113, + "learning_rate": 1.7517401358819608e-05, + "loss": 0.5175495147705078, + "step": 489 + }, + { + "epoch": 0.26864035087719296, + "grad_norm": 0.48000240325927734, + "learning_rate": 1.7505427376809848e-05, + "loss": 0.5160954594612122, + "step": 490 + }, + { + "epoch": 0.26918859649122806, + "grad_norm": 0.6343967914581299, + "learning_rate": 1.7493428701491827e-05, + "loss": 0.510797381401062, + "step": 491 + }, + { + "epoch": 0.26973684210526316, + "grad_norm": 0.8650885820388794, + "learning_rate": 1.748140537234191e-05, + "loss": 0.5214411616325378, + "step": 492 + }, + { + "epoch": 0.27028508771929827, + "grad_norm": 0.525759220123291, + "learning_rate": 1.7469357428917574e-05, + "loss": 0.5195897817611694, + "step": 493 + }, + { + "epoch": 0.2708333333333333, + "grad_norm": 0.4793441891670227, + "learning_rate": 1.745728491085728e-05, + "loss": 0.5165450572967529, + "step": 494 + }, + { + "epoch": 0.2713815789473684, + "grad_norm": 0.6296154856681824, + "learning_rate": 1.7445187857880334e-05, + "loss": 0.5223666429519653, + "step": 495 + }, + { + "epoch": 0.2719298245614035, + "grad_norm": 0.49726709723472595, + "learning_rate": 1.743306630978678e-05, + "loss": 0.522741436958313, + "step": 496 + }, + { + "epoch": 0.27247807017543857, + "grad_norm": 0.5264485478401184, + "learning_rate": 1.742092030645723e-05, + "loss": 0.5052363276481628, + "step": 497 + }, + { + "epoch": 0.2730263157894737, + "grad_norm": 0.5733623504638672, + "learning_rate": 1.740874988785277e-05, + "loss": 0.5072427988052368, + "step": 498 + }, + { + "epoch": 0.2735745614035088, + "grad_norm": 0.5145069360733032, + "learning_rate": 1.739655509401482e-05, + "loss": 0.5078324675559998, + "step": 499 + }, + { + "epoch": 0.2741228070175439, + "grad_norm": 0.5487716197967529, + "learning_rate": 1.7384335965064974e-05, + "loss": 0.5313689112663269, + "step": 500 + }, + { + "epoch": 0.2746710526315789, + "grad_norm": 0.5024135708808899, + "learning_rate": 1.737209254120491e-05, + "loss": 0.5236111879348755, + "step": 501 + }, + { + "epoch": 0.27521929824561403, + "grad_norm": 0.5059981942176819, + "learning_rate": 1.735982486271622e-05, + "loss": 0.5159262418746948, + "step": 502 + }, + { + "epoch": 0.27576754385964913, + "grad_norm": 0.5166164636611938, + "learning_rate": 1.7347532969960322e-05, + "loss": 0.5162345767021179, + "step": 503 + }, + { + "epoch": 0.27631578947368424, + "grad_norm": 0.5122005343437195, + "learning_rate": 1.733521690337827e-05, + "loss": 0.49863386154174805, + "step": 504 + }, + { + "epoch": 0.2768640350877193, + "grad_norm": 0.5083131790161133, + "learning_rate": 1.732287670349067e-05, + "loss": 0.5120081901550293, + "step": 505 + }, + { + "epoch": 0.2774122807017544, + "grad_norm": 0.5615320801734924, + "learning_rate": 1.731051241089752e-05, + "loss": 0.5304388403892517, + "step": 506 + }, + { + "epoch": 0.2779605263157895, + "grad_norm": 0.5108237266540527, + "learning_rate": 1.7298124066278094e-05, + "loss": 0.5019451975822449, + "step": 507 + }, + { + "epoch": 0.27850877192982454, + "grad_norm": 0.5400595664978027, + "learning_rate": 1.7285711710390788e-05, + "loss": 0.5011708736419678, + "step": 508 + }, + { + "epoch": 0.27905701754385964, + "grad_norm": 0.5205352902412415, + "learning_rate": 1.7273275384073e-05, + "loss": 0.5174415111541748, + "step": 509 + }, + { + "epoch": 0.27960526315789475, + "grad_norm": 0.5404713153839111, + "learning_rate": 1.726081512824099e-05, + "loss": 0.5089818239212036, + "step": 510 + }, + { + "epoch": 0.28015350877192985, + "grad_norm": 0.5409142374992371, + "learning_rate": 1.724833098388976e-05, + "loss": 0.5215460062026978, + "step": 511 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.5232543349266052, + "learning_rate": 1.723582299209289e-05, + "loss": 0.5150152444839478, + "step": 512 + }, + { + "epoch": 0.28125, + "grad_norm": 0.5587122440338135, + "learning_rate": 1.722329119400243e-05, + "loss": 0.5138131380081177, + "step": 513 + }, + { + "epoch": 0.2817982456140351, + "grad_norm": 0.524645209312439, + "learning_rate": 1.7210735630848747e-05, + "loss": 0.523358941078186, + "step": 514 + }, + { + "epoch": 0.28234649122807015, + "grad_norm": 0.5060891509056091, + "learning_rate": 1.71981563439404e-05, + "loss": 0.518589973449707, + "step": 515 + }, + { + "epoch": 0.28289473684210525, + "grad_norm": 0.5494999885559082, + "learning_rate": 1.7185553374664006e-05, + "loss": 0.5273317098617554, + "step": 516 + }, + { + "epoch": 0.28344298245614036, + "grad_norm": 0.5179638266563416, + "learning_rate": 1.7172926764484085e-05, + "loss": 0.5220479965209961, + "step": 517 + }, + { + "epoch": 0.28399122807017546, + "grad_norm": 0.497627317905426, + "learning_rate": 1.7160276554942953e-05, + "loss": 0.517738401889801, + "step": 518 + }, + { + "epoch": 0.2845394736842105, + "grad_norm": 0.4714062511920929, + "learning_rate": 1.7147602787660563e-05, + "loss": 0.5142806768417358, + "step": 519 + }, + { + "epoch": 0.2850877192982456, + "grad_norm": 0.5007819533348083, + "learning_rate": 1.7134905504334365e-05, + "loss": 0.51795893907547, + "step": 520 + }, + { + "epoch": 0.2856359649122807, + "grad_norm": 0.49607014656066895, + "learning_rate": 1.7122184746739188e-05, + "loss": 0.5078467726707458, + "step": 521 + }, + { + "epoch": 0.28618421052631576, + "grad_norm": 0.49878785014152527, + "learning_rate": 1.7109440556727094e-05, + "loss": 0.5141764283180237, + "step": 522 + }, + { + "epoch": 0.28673245614035087, + "grad_norm": 0.5011985301971436, + "learning_rate": 1.7096672976227236e-05, + "loss": 0.5168810486793518, + "step": 523 + }, + { + "epoch": 0.28728070175438597, + "grad_norm": 0.5092190504074097, + "learning_rate": 1.708388204724572e-05, + "loss": 0.5183935761451721, + "step": 524 + }, + { + "epoch": 0.2878289473684211, + "grad_norm": 0.5125263929367065, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.509937047958374, + "step": 525 + }, + { + "epoch": 0.2883771929824561, + "grad_norm": 0.5333483219146729, + "learning_rate": 1.705823031224611e-05, + "loss": 0.5176279544830322, + "step": 526 + }, + { + "epoch": 0.2889254385964912, + "grad_norm": 0.5202133655548096, + "learning_rate": 1.7045369590623768e-05, + "loss": 0.5100720524787903, + "step": 527 + }, + { + "epoch": 0.2894736842105263, + "grad_norm": 0.5099378228187561, + "learning_rate": 1.7032485689310997e-05, + "loss": 0.5064803957939148, + "step": 528 + }, + { + "epoch": 0.29002192982456143, + "grad_norm": 0.5217911005020142, + "learning_rate": 1.7019578650696614e-05, + "loss": 0.514005184173584, + "step": 529 + }, + { + "epoch": 0.2905701754385965, + "grad_norm": 0.5043156147003174, + "learning_rate": 1.7006648517245547e-05, + "loss": 0.52207350730896, + "step": 530 + }, + { + "epoch": 0.2911184210526316, + "grad_norm": 0.5025941133499146, + "learning_rate": 1.6993695331498723e-05, + "loss": 0.514879584312439, + "step": 531 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.5254765748977661, + "learning_rate": 1.6980719136072892e-05, + "loss": 0.5103027820587158, + "step": 532 + }, + { + "epoch": 0.29221491228070173, + "grad_norm": 0.5231887698173523, + "learning_rate": 1.696771997366053e-05, + "loss": 0.5095311403274536, + "step": 533 + }, + { + "epoch": 0.29276315789473684, + "grad_norm": 0.49643293023109436, + "learning_rate": 1.6954697887029657e-05, + "loss": 0.5114062428474426, + "step": 534 + }, + { + "epoch": 0.29331140350877194, + "grad_norm": 0.5372974276542664, + "learning_rate": 1.6941652919023727e-05, + "loss": 0.5112413167953491, + "step": 535 + }, + { + "epoch": 0.29385964912280704, + "grad_norm": 0.5067369341850281, + "learning_rate": 1.6928585112561468e-05, + "loss": 0.5218123197555542, + "step": 536 + }, + { + "epoch": 0.2944078947368421, + "grad_norm": 0.5153988599777222, + "learning_rate": 1.6915494510636748e-05, + "loss": 0.510006308555603, + "step": 537 + }, + { + "epoch": 0.2949561403508772, + "grad_norm": 0.5265623927116394, + "learning_rate": 1.690238115631844e-05, + "loss": 0.5182325839996338, + "step": 538 + }, + { + "epoch": 0.2955043859649123, + "grad_norm": 0.5706140398979187, + "learning_rate": 1.6889245092750274e-05, + "loss": 0.5318684577941895, + "step": 539 + }, + { + "epoch": 0.29605263157894735, + "grad_norm": 0.48171648383140564, + "learning_rate": 1.687608636315068e-05, + "loss": 0.5149196982383728, + "step": 540 + }, + { + "epoch": 0.29660087719298245, + "grad_norm": 0.5281539559364319, + "learning_rate": 1.686290501081268e-05, + "loss": 0.5077455043792725, + "step": 541 + }, + { + "epoch": 0.29714912280701755, + "grad_norm": 0.5157037973403931, + "learning_rate": 1.6849701079103714e-05, + "loss": 0.49477753043174744, + "step": 542 + }, + { + "epoch": 0.29769736842105265, + "grad_norm": 0.5387939214706421, + "learning_rate": 1.6836474611465515e-05, + "loss": 0.5263862609863281, + "step": 543 + }, + { + "epoch": 0.2982456140350877, + "grad_norm": 0.5387179255485535, + "learning_rate": 1.682322565141395e-05, + "loss": 0.5146876573562622, + "step": 544 + }, + { + "epoch": 0.2987938596491228, + "grad_norm": 0.5182175040245056, + "learning_rate": 1.6809954242538907e-05, + "loss": 0.5151358246803284, + "step": 545 + }, + { + "epoch": 0.2993421052631579, + "grad_norm": 0.5380533337593079, + "learning_rate": 1.6796660428504114e-05, + "loss": 0.5249975919723511, + "step": 546 + }, + { + "epoch": 0.29989035087719296, + "grad_norm": 0.5614789724349976, + "learning_rate": 1.6783344253047027e-05, + "loss": 0.5180106163024902, + "step": 547 + }, + { + "epoch": 0.30043859649122806, + "grad_norm": 0.5239329934120178, + "learning_rate": 1.6770005759978656e-05, + "loss": 0.5265669822692871, + "step": 548 + }, + { + "epoch": 0.30098684210526316, + "grad_norm": 0.540311872959137, + "learning_rate": 1.6756644993183453e-05, + "loss": 0.5158952474594116, + "step": 549 + }, + { + "epoch": 0.30153508771929827, + "grad_norm": 0.5373164415359497, + "learning_rate": 1.6743261996619145e-05, + "loss": 0.4985647201538086, + "step": 550 + }, + { + "epoch": 0.3020833333333333, + "grad_norm": 0.5068860650062561, + "learning_rate": 1.6729856814316595e-05, + "loss": 0.5132797360420227, + "step": 551 + }, + { + "epoch": 0.3026315789473684, + "grad_norm": 0.5771002173423767, + "learning_rate": 1.671642949037966e-05, + "loss": 0.5127143859863281, + "step": 552 + }, + { + "epoch": 0.3031798245614035, + "grad_norm": 0.5291208624839783, + "learning_rate": 1.6702980068985048e-05, + "loss": 0.5070824027061462, + "step": 553 + }, + { + "epoch": 0.30372807017543857, + "grad_norm": 0.5508853793144226, + "learning_rate": 1.668950859438216e-05, + "loss": 0.5169316530227661, + "step": 554 + }, + { + "epoch": 0.3042763157894737, + "grad_norm": 0.5345003604888916, + "learning_rate": 1.6676015110892973e-05, + "loss": 0.5074903964996338, + "step": 555 + }, + { + "epoch": 0.3048245614035088, + "grad_norm": 0.5480985641479492, + "learning_rate": 1.6662499662911843e-05, + "loss": 0.5174945592880249, + "step": 556 + }, + { + "epoch": 0.3053728070175439, + "grad_norm": 0.531358003616333, + "learning_rate": 1.6648962294905416e-05, + "loss": 0.5207014679908752, + "step": 557 + }, + { + "epoch": 0.3059210526315789, + "grad_norm": 0.49597254395484924, + "learning_rate": 1.6635403051412452e-05, + "loss": 0.5044191479682922, + "step": 558 + }, + { + "epoch": 0.30646929824561403, + "grad_norm": 0.5143059492111206, + "learning_rate": 1.6621821977043673e-05, + "loss": 0.5156230926513672, + "step": 559 + }, + { + "epoch": 0.30701754385964913, + "grad_norm": 0.5195500254631042, + "learning_rate": 1.6608219116481633e-05, + "loss": 0.5130860209465027, + "step": 560 + }, + { + "epoch": 0.30756578947368424, + "grad_norm": 0.5337082147598267, + "learning_rate": 1.6594594514480564e-05, + "loss": 0.49173516035079956, + "step": 561 + }, + { + "epoch": 0.3081140350877193, + "grad_norm": 0.5399724841117859, + "learning_rate": 1.6580948215866232e-05, + "loss": 0.5126796960830688, + "step": 562 + }, + { + "epoch": 0.3086622807017544, + "grad_norm": 0.5642949938774109, + "learning_rate": 1.656728026553577e-05, + "loss": 0.507789671421051, + "step": 563 + }, + { + "epoch": 0.3092105263157895, + "grad_norm": 0.5704178214073181, + "learning_rate": 1.655359070845757e-05, + "loss": 0.5131293535232544, + "step": 564 + }, + { + "epoch": 0.30975877192982454, + "grad_norm": 0.5212867856025696, + "learning_rate": 1.6539879589671093e-05, + "loss": 0.5129238367080688, + "step": 565 + }, + { + "epoch": 0.31030701754385964, + "grad_norm": 0.5646382570266724, + "learning_rate": 1.652614695428675e-05, + "loss": 0.49918338656425476, + "step": 566 + }, + { + "epoch": 0.31085526315789475, + "grad_norm": 0.5082690715789795, + "learning_rate": 1.6512392847485733e-05, + "loss": 0.5041823983192444, + "step": 567 + }, + { + "epoch": 0.31140350877192985, + "grad_norm": 0.5300014615058899, + "learning_rate": 1.6498617314519886e-05, + "loss": 0.5186752676963806, + "step": 568 + }, + { + "epoch": 0.3119517543859649, + "grad_norm": 0.554815948009491, + "learning_rate": 1.6484820400711545e-05, + "loss": 0.5164564251899719, + "step": 569 + }, + { + "epoch": 0.3125, + "grad_norm": 0.5678638815879822, + "learning_rate": 1.647100215145338e-05, + "loss": 0.500181257724762, + "step": 570 + }, + { + "epoch": 0.3130482456140351, + "grad_norm": 0.5426273941993713, + "learning_rate": 1.645716261220827e-05, + "loss": 0.5085293054580688, + "step": 571 + }, + { + "epoch": 0.31359649122807015, + "grad_norm": 0.5601498484611511, + "learning_rate": 1.644330182850913e-05, + "loss": 0.5364582538604736, + "step": 572 + }, + { + "epoch": 0.31414473684210525, + "grad_norm": 0.554161012172699, + "learning_rate": 1.6429419845958778e-05, + "loss": 0.5135458707809448, + "step": 573 + }, + { + "epoch": 0.31469298245614036, + "grad_norm": 0.5089998245239258, + "learning_rate": 1.6415516710229767e-05, + "loss": 0.5096344351768494, + "step": 574 + }, + { + "epoch": 0.31524122807017546, + "grad_norm": 0.5251020789146423, + "learning_rate": 1.6401592467064254e-05, + "loss": 0.4982328414916992, + "step": 575 + }, + { + "epoch": 0.3157894736842105, + "grad_norm": 0.5574110150337219, + "learning_rate": 1.6387647162273837e-05, + "loss": 0.49430570006370544, + "step": 576 + }, + { + "epoch": 0.3163377192982456, + "grad_norm": 0.5299574732780457, + "learning_rate": 1.637368084173941e-05, + "loss": 0.5284730792045593, + "step": 577 + }, + { + "epoch": 0.3168859649122807, + "grad_norm": 0.4945147931575775, + "learning_rate": 1.6359693551411008e-05, + "loss": 0.5055221319198608, + "step": 578 + }, + { + "epoch": 0.31743421052631576, + "grad_norm": 1.933510661125183, + "learning_rate": 1.6345685337307656e-05, + "loss": 0.5102857947349548, + "step": 579 + }, + { + "epoch": 0.31798245614035087, + "grad_norm": 0.5664254426956177, + "learning_rate": 1.633165624551723e-05, + "loss": 0.5002626180648804, + "step": 580 + }, + { + "epoch": 0.31853070175438597, + "grad_norm": 0.5354625582695007, + "learning_rate": 1.6317606322196284e-05, + "loss": 0.503082275390625, + "step": 581 + }, + { + "epoch": 0.3190789473684211, + "grad_norm": 0.5543798804283142, + "learning_rate": 1.6303535613569908e-05, + "loss": 0.5081637501716614, + "step": 582 + }, + { + "epoch": 0.3196271929824561, + "grad_norm": 0.5393402576446533, + "learning_rate": 1.6289444165931587e-05, + "loss": 0.4948890209197998, + "step": 583 + }, + { + "epoch": 0.3201754385964912, + "grad_norm": 0.5106928944587708, + "learning_rate": 1.627533202564303e-05, + "loss": 0.5148372650146484, + "step": 584 + }, + { + "epoch": 0.3207236842105263, + "grad_norm": 0.5648923516273499, + "learning_rate": 1.6261199239134023e-05, + "loss": 0.5127902626991272, + "step": 585 + }, + { + "epoch": 0.32127192982456143, + "grad_norm": 0.5218735933303833, + "learning_rate": 1.6247045852902298e-05, + "loss": 0.5089883804321289, + "step": 586 + }, + { + "epoch": 0.3218201754385965, + "grad_norm": 0.5288784503936768, + "learning_rate": 1.623287191351334e-05, + "loss": 0.5053139925003052, + "step": 587 + }, + { + "epoch": 0.3223684210526316, + "grad_norm": 0.5042205452919006, + "learning_rate": 1.6218677467600264e-05, + "loss": 0.49913498759269714, + "step": 588 + }, + { + "epoch": 0.3229166666666667, + "grad_norm": 0.5079101324081421, + "learning_rate": 1.6204462561863652e-05, + "loss": 0.5198526978492737, + "step": 589 + }, + { + "epoch": 0.32346491228070173, + "grad_norm": 0.5735262632369995, + "learning_rate": 1.6190227243071402e-05, + "loss": 0.5066660642623901, + "step": 590 + }, + { + "epoch": 0.32401315789473684, + "grad_norm": 0.5124266743659973, + "learning_rate": 1.617597155805857e-05, + "loss": 0.4869317412376404, + "step": 591 + }, + { + "epoch": 0.32456140350877194, + "grad_norm": 0.5241952538490295, + "learning_rate": 1.6161695553727218e-05, + "loss": 0.500100314617157, + "step": 592 + }, + { + "epoch": 0.32510964912280704, + "grad_norm": 0.5251774787902832, + "learning_rate": 1.6147399277046264e-05, + "loss": 0.5080313086509705, + "step": 593 + }, + { + "epoch": 0.3256578947368421, + "grad_norm": 0.4947800934314728, + "learning_rate": 1.6133082775051312e-05, + "loss": 0.5108361840248108, + "step": 594 + }, + { + "epoch": 0.3262061403508772, + "grad_norm": 0.4975525438785553, + "learning_rate": 1.6118746094844523e-05, + "loss": 0.5160967111587524, + "step": 595 + }, + { + "epoch": 0.3267543859649123, + "grad_norm": 0.5103340744972229, + "learning_rate": 1.6104389283594435e-05, + "loss": 0.5133464336395264, + "step": 596 + }, + { + "epoch": 0.32730263157894735, + "grad_norm": 0.5223587155342102, + "learning_rate": 1.6090012388535825e-05, + "loss": 0.5166263580322266, + "step": 597 + }, + { + "epoch": 0.32785087719298245, + "grad_norm": 0.5182468295097351, + "learning_rate": 1.607561545696954e-05, + "loss": 0.5085626840591431, + "step": 598 + }, + { + "epoch": 0.32839912280701755, + "grad_norm": 0.5165911912918091, + "learning_rate": 1.6061198536262355e-05, + "loss": 0.5134096741676331, + "step": 599 + }, + { + "epoch": 0.32894736842105265, + "grad_norm": 0.5011234283447266, + "learning_rate": 1.604676167384681e-05, + "loss": 0.5025832056999207, + "step": 600 + }, + { + "epoch": 0.3294956140350877, + "grad_norm": 0.5092823505401611, + "learning_rate": 1.6032304917221045e-05, + "loss": 0.5118825435638428, + "step": 601 + }, + { + "epoch": 0.3300438596491228, + "grad_norm": 0.5826072692871094, + "learning_rate": 1.6017828313948667e-05, + "loss": 0.503429651260376, + "step": 602 + }, + { + "epoch": 0.3305921052631579, + "grad_norm": 0.5502949953079224, + "learning_rate": 1.600333191165857e-05, + "loss": 0.5013739466667175, + "step": 603 + }, + { + "epoch": 0.33114035087719296, + "grad_norm": 0.4851742386817932, + "learning_rate": 1.5988815758044794e-05, + "loss": 0.498490571975708, + "step": 604 + }, + { + "epoch": 0.33168859649122806, + "grad_norm": 0.5124604105949402, + "learning_rate": 1.5974279900866353e-05, + "loss": 0.5156079530715942, + "step": 605 + }, + { + "epoch": 0.33223684210526316, + "grad_norm": 0.5280570387840271, + "learning_rate": 1.59597243879471e-05, + "loss": 0.5016382932662964, + "step": 606 + }, + { + "epoch": 0.33278508771929827, + "grad_norm": 0.5246084928512573, + "learning_rate": 1.594514926717554e-05, + "loss": 0.5041351318359375, + "step": 607 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5151121020317078, + "learning_rate": 1.593055458650471e-05, + "loss": 0.4976646900177002, + "step": 608 + }, + { + "epoch": 0.3338815789473684, + "grad_norm": 0.5183243751525879, + "learning_rate": 1.5915940393951975e-05, + "loss": 0.4976310133934021, + "step": 609 + }, + { + "epoch": 0.3344298245614035, + "grad_norm": 0.5183877944946289, + "learning_rate": 1.5901306737598922e-05, + "loss": 0.49520882964134216, + "step": 610 + }, + { + "epoch": 0.33497807017543857, + "grad_norm": 0.5211166739463806, + "learning_rate": 1.5886653665591158e-05, + "loss": 0.5143977403640747, + "step": 611 + }, + { + "epoch": 0.3355263157894737, + "grad_norm": 0.48644551634788513, + "learning_rate": 1.5871981226138174e-05, + "loss": 0.49930331110954285, + "step": 612 + }, + { + "epoch": 0.3360745614035088, + "grad_norm": 0.5271084308624268, + "learning_rate": 1.5857289467513172e-05, + "loss": 0.48993366956710815, + "step": 613 + }, + { + "epoch": 0.3366228070175439, + "grad_norm": 0.5187105536460876, + "learning_rate": 1.584257843805293e-05, + "loss": 0.5129623413085938, + "step": 614 + }, + { + "epoch": 0.3371710526315789, + "grad_norm": 0.5101479887962341, + "learning_rate": 1.582784818615762e-05, + "loss": 0.5065633654594421, + "step": 615 + }, + { + "epoch": 0.33771929824561403, + "grad_norm": 0.4912799298763275, + "learning_rate": 1.581309876029066e-05, + "loss": 0.48601680994033813, + "step": 616 + }, + { + "epoch": 0.33826754385964913, + "grad_norm": 0.4957771301269531, + "learning_rate": 1.5798330208978545e-05, + "loss": 0.4986298680305481, + "step": 617 + }, + { + "epoch": 0.33881578947368424, + "grad_norm": 0.5334628224372864, + "learning_rate": 1.5783542580810702e-05, + "loss": 0.5085287690162659, + "step": 618 + }, + { + "epoch": 0.3393640350877193, + "grad_norm": 0.5106218457221985, + "learning_rate": 1.5768735924439317e-05, + "loss": 0.4843800961971283, + "step": 619 + }, + { + "epoch": 0.3399122807017544, + "grad_norm": 0.5028548836708069, + "learning_rate": 1.5753910288579188e-05, + "loss": 0.4937993884086609, + "step": 620 + }, + { + "epoch": 0.3404605263157895, + "grad_norm": 0.521271288394928, + "learning_rate": 1.573906572200754e-05, + "loss": 0.5151383280754089, + "step": 621 + }, + { + "epoch": 0.34100877192982454, + "grad_norm": 0.5260574817657471, + "learning_rate": 1.5724202273563896e-05, + "loss": 0.5101398229598999, + "step": 622 + }, + { + "epoch": 0.34155701754385964, + "grad_norm": 0.5086724162101746, + "learning_rate": 1.57093199921499e-05, + "loss": 0.5031939744949341, + "step": 623 + }, + { + "epoch": 0.34210526315789475, + "grad_norm": 0.5297996401786804, + "learning_rate": 1.5694418926729148e-05, + "loss": 0.4935210943222046, + "step": 624 + }, + { + "epoch": 0.34265350877192985, + "grad_norm": 0.5105075240135193, + "learning_rate": 1.5679499126327044e-05, + "loss": 0.4937981963157654, + "step": 625 + }, + { + "epoch": 0.3432017543859649, + "grad_norm": 0.4859083592891693, + "learning_rate": 1.566456064003064e-05, + "loss": 0.4915929436683655, + "step": 626 + }, + { + "epoch": 0.34375, + "grad_norm": 0.5445349812507629, + "learning_rate": 1.564960351698844e-05, + "loss": 0.5122596025466919, + "step": 627 + }, + { + "epoch": 0.3442982456140351, + "grad_norm": 0.49146509170532227, + "learning_rate": 1.5634627806410296e-05, + "loss": 0.4980570673942566, + "step": 628 + }, + { + "epoch": 0.34484649122807015, + "grad_norm": 0.5087494254112244, + "learning_rate": 1.561963355756719e-05, + "loss": 0.4985271394252777, + "step": 629 + }, + { + "epoch": 0.34539473684210525, + "grad_norm": 0.5231812000274658, + "learning_rate": 1.56046208197911e-05, + "loss": 0.5052623152732849, + "step": 630 + }, + { + "epoch": 0.34594298245614036, + "grad_norm": 0.5085192322731018, + "learning_rate": 1.5589589642474843e-05, + "loss": 0.5217705965042114, + "step": 631 + }, + { + "epoch": 0.34649122807017546, + "grad_norm": 0.5085150599479675, + "learning_rate": 1.55745400750719e-05, + "loss": 0.5063439607620239, + "step": 632 + }, + { + "epoch": 0.3470394736842105, + "grad_norm": 0.4942718744277954, + "learning_rate": 1.5559472167096252e-05, + "loss": 0.508962869644165, + "step": 633 + }, + { + "epoch": 0.3475877192982456, + "grad_norm": 0.5233277082443237, + "learning_rate": 1.5544385968122225e-05, + "loss": 0.4943055808544159, + "step": 634 + }, + { + "epoch": 0.3481359649122807, + "grad_norm": 0.5197892189025879, + "learning_rate": 1.552928152778432e-05, + "loss": 0.5063210725784302, + "step": 635 + }, + { + "epoch": 0.34868421052631576, + "grad_norm": 0.5159553289413452, + "learning_rate": 1.5514158895777057e-05, + "loss": 0.5054895877838135, + "step": 636 + }, + { + "epoch": 0.34923245614035087, + "grad_norm": 0.49733617901802063, + "learning_rate": 1.5499018121854808e-05, + "loss": 0.49076005816459656, + "step": 637 + }, + { + "epoch": 0.34978070175438597, + "grad_norm": 0.5775347948074341, + "learning_rate": 1.5483859255831628e-05, + "loss": 0.4983357787132263, + "step": 638 + }, + { + "epoch": 0.3503289473684211, + "grad_norm": 0.5254983305931091, + "learning_rate": 1.5468682347581102e-05, + "loss": 0.5100836753845215, + "step": 639 + }, + { + "epoch": 0.3508771929824561, + "grad_norm": 0.5384511947631836, + "learning_rate": 1.5453487447036174e-05, + "loss": 0.517541766166687, + "step": 640 + }, + { + "epoch": 0.3514254385964912, + "grad_norm": 0.5437305569648743, + "learning_rate": 1.5438274604188977e-05, + "loss": 0.5047601461410522, + "step": 641 + }, + { + "epoch": 0.3519736842105263, + "grad_norm": 0.5298881530761719, + "learning_rate": 1.5423043869090687e-05, + "loss": 0.49070271849632263, + "step": 642 + }, + { + "epoch": 0.35252192982456143, + "grad_norm": 0.5131424069404602, + "learning_rate": 1.5407795291851336e-05, + "loss": 0.5067086219787598, + "step": 643 + }, + { + "epoch": 0.3530701754385965, + "grad_norm": 0.538144588470459, + "learning_rate": 1.5392528922639662e-05, + "loss": 0.4947914481163025, + "step": 644 + }, + { + "epoch": 0.3536184210526316, + "grad_norm": 0.5141587257385254, + "learning_rate": 1.537724481168294e-05, + "loss": 0.492933452129364, + "step": 645 + }, + { + "epoch": 0.3541666666666667, + "grad_norm": 0.5233535170555115, + "learning_rate": 1.536194300926682e-05, + "loss": 0.5006459951400757, + "step": 646 + }, + { + "epoch": 0.35471491228070173, + "grad_norm": 0.4955417215824127, + "learning_rate": 1.5346623565735145e-05, + "loss": 0.5072038769721985, + "step": 647 + }, + { + "epoch": 0.35526315789473684, + "grad_norm": 0.4693199098110199, + "learning_rate": 1.533128653148982e-05, + "loss": 0.49651047587394714, + "step": 648 + }, + { + "epoch": 0.35581140350877194, + "grad_norm": 0.5177298784255981, + "learning_rate": 1.53159319569906e-05, + "loss": 0.5096155405044556, + "step": 649 + }, + { + "epoch": 0.35635964912280704, + "grad_norm": 0.5220261216163635, + "learning_rate": 1.5300559892754957e-05, + "loss": 0.503006637096405, + "step": 650 + }, + { + "epoch": 0.3569078947368421, + "grad_norm": 0.4956647753715515, + "learning_rate": 1.5285170389357924e-05, + "loss": 0.492157906293869, + "step": 651 + }, + { + "epoch": 0.3574561403508772, + "grad_norm": 0.5249745845794678, + "learning_rate": 1.5269763497431882e-05, + "loss": 0.5008402466773987, + "step": 652 + }, + { + "epoch": 0.3580043859649123, + "grad_norm": 0.5125779509544373, + "learning_rate": 1.525433926766644e-05, + "loss": 0.49728164076805115, + "step": 653 + }, + { + "epoch": 0.35855263157894735, + "grad_norm": 0.4863375425338745, + "learning_rate": 1.5238897750808242e-05, + "loss": 0.5007228255271912, + "step": 654 + }, + { + "epoch": 0.35910087719298245, + "grad_norm": 0.5670150518417358, + "learning_rate": 1.5223438997660802e-05, + "loss": 0.4918896555900574, + "step": 655 + }, + { + "epoch": 0.35964912280701755, + "grad_norm": 0.5108444690704346, + "learning_rate": 1.520796305908436e-05, + "loss": 0.4987252354621887, + "step": 656 + }, + { + "epoch": 0.36019736842105265, + "grad_norm": 0.48743292689323425, + "learning_rate": 1.5192469985995684e-05, + "loss": 0.5023507475852966, + "step": 657 + }, + { + "epoch": 0.3607456140350877, + "grad_norm": 0.4842602610588074, + "learning_rate": 1.5176959829367907e-05, + "loss": 0.5085281133651733, + "step": 658 + }, + { + "epoch": 0.3612938596491228, + "grad_norm": 0.4879213869571686, + "learning_rate": 1.5161432640230391e-05, + "loss": 0.4956406056880951, + "step": 659 + }, + { + "epoch": 0.3618421052631579, + "grad_norm": 0.4824928343296051, + "learning_rate": 1.5145888469668521e-05, + "loss": 0.5034121870994568, + "step": 660 + }, + { + "epoch": 0.36239035087719296, + "grad_norm": 0.4922201931476593, + "learning_rate": 1.5130327368823551e-05, + "loss": 0.5032166838645935, + "step": 661 + }, + { + "epoch": 0.36293859649122806, + "grad_norm": 0.4866214096546173, + "learning_rate": 1.5114749388892445e-05, + "loss": 0.5006184577941895, + "step": 662 + }, + { + "epoch": 0.36348684210526316, + "grad_norm": 0.48263096809387207, + "learning_rate": 1.509915458112769e-05, + "loss": 0.4939555525779724, + "step": 663 + }, + { + "epoch": 0.36403508771929827, + "grad_norm": 0.47373154759407043, + "learning_rate": 1.508354299683715e-05, + "loss": 0.49878406524658203, + "step": 664 + }, + { + "epoch": 0.3645833333333333, + "grad_norm": 0.48171475529670715, + "learning_rate": 1.5067914687383873e-05, + "loss": 0.5021742582321167, + "step": 665 + }, + { + "epoch": 0.3651315789473684, + "grad_norm": 0.4749904274940491, + "learning_rate": 1.505226970418594e-05, + "loss": 0.4902474880218506, + "step": 666 + }, + { + "epoch": 0.3656798245614035, + "grad_norm": 0.5045875310897827, + "learning_rate": 1.5036608098716287e-05, + "loss": 0.5088133811950684, + "step": 667 + }, + { + "epoch": 0.36622807017543857, + "grad_norm": 0.5372963547706604, + "learning_rate": 1.5020929922502542e-05, + "loss": 0.5023071765899658, + "step": 668 + }, + { + "epoch": 0.3667763157894737, + "grad_norm": 0.4800300598144531, + "learning_rate": 1.5005235227126846e-05, + "loss": 0.505648136138916, + "step": 669 + }, + { + "epoch": 0.3673245614035088, + "grad_norm": 0.4792078137397766, + "learning_rate": 1.4989524064225696e-05, + "loss": 0.5006028413772583, + "step": 670 + }, + { + "epoch": 0.3678728070175439, + "grad_norm": 0.48742419481277466, + "learning_rate": 1.4973796485489764e-05, + "loss": 0.515373170375824, + "step": 671 + }, + { + "epoch": 0.3684210526315789, + "grad_norm": 0.5054008960723877, + "learning_rate": 1.4958052542663728e-05, + "loss": 0.4846934676170349, + "step": 672 + }, + { + "epoch": 0.36896929824561403, + "grad_norm": 0.49790146946907043, + "learning_rate": 1.4942292287546114e-05, + "loss": 0.49283909797668457, + "step": 673 + }, + { + "epoch": 0.36951754385964913, + "grad_norm": 0.5341838002204895, + "learning_rate": 1.4926515771989106e-05, + "loss": 0.5021057724952698, + "step": 674 + }, + { + "epoch": 0.37006578947368424, + "grad_norm": 0.5239306092262268, + "learning_rate": 1.4910723047898392e-05, + "loss": 0.49403518438339233, + "step": 675 + }, + { + "epoch": 0.3706140350877193, + "grad_norm": 0.4920117259025574, + "learning_rate": 1.4894914167232988e-05, + "loss": 0.49569496512413025, + "step": 676 + }, + { + "epoch": 0.3711622807017544, + "grad_norm": 0.49796634912490845, + "learning_rate": 1.4879089182005062e-05, + "loss": 0.5069082975387573, + "step": 677 + }, + { + "epoch": 0.3717105263157895, + "grad_norm": 0.5147514343261719, + "learning_rate": 1.4863248144279769e-05, + "loss": 0.4919740557670593, + "step": 678 + }, + { + "epoch": 0.37225877192982454, + "grad_norm": 0.5094771385192871, + "learning_rate": 1.4847391106175075e-05, + "loss": 0.5015633702278137, + "step": 679 + }, + { + "epoch": 0.37280701754385964, + "grad_norm": 0.5420538783073425, + "learning_rate": 1.4831518119861597e-05, + "loss": 0.48816415667533875, + "step": 680 + }, + { + "epoch": 0.37335526315789475, + "grad_norm": 0.5352575778961182, + "learning_rate": 1.4815629237562411e-05, + "loss": 0.49396681785583496, + "step": 681 + }, + { + "epoch": 0.37390350877192985, + "grad_norm": 0.5042607188224792, + "learning_rate": 1.4799724511552898e-05, + "loss": 0.48261889815330505, + "step": 682 + }, + { + "epoch": 0.3744517543859649, + "grad_norm": 0.5831988453865051, + "learning_rate": 1.4783803994160568e-05, + "loss": 0.4972837567329407, + "step": 683 + }, + { + "epoch": 0.375, + "grad_norm": 0.48803001642227173, + "learning_rate": 1.4767867737764877e-05, + "loss": 0.49230509996414185, + "step": 684 + }, + { + "epoch": 0.3755482456140351, + "grad_norm": 0.5121214985847473, + "learning_rate": 1.4751915794797072e-05, + "loss": 0.5085790157318115, + "step": 685 + }, + { + "epoch": 0.37609649122807015, + "grad_norm": 0.5156803727149963, + "learning_rate": 1.4735948217740006e-05, + "loss": 0.4978635013103485, + "step": 686 + }, + { + "epoch": 0.37664473684210525, + "grad_norm": 0.46112072467803955, + "learning_rate": 1.4719965059127969e-05, + "loss": 0.49163517355918884, + "step": 687 + }, + { + "epoch": 0.37719298245614036, + "grad_norm": 0.5171255469322205, + "learning_rate": 1.4703966371546519e-05, + "loss": 0.5190042853355408, + "step": 688 + }, + { + "epoch": 0.37774122807017546, + "grad_norm": 0.5324544310569763, + "learning_rate": 1.4687952207632296e-05, + "loss": 0.5000194907188416, + "step": 689 + }, + { + "epoch": 0.3782894736842105, + "grad_norm": 0.5022001266479492, + "learning_rate": 1.4671922620072869e-05, + "loss": 0.5025606751441956, + "step": 690 + }, + { + "epoch": 0.3788377192982456, + "grad_norm": 0.5057377219200134, + "learning_rate": 1.4655877661606546e-05, + "loss": 0.49487099051475525, + "step": 691 + }, + { + "epoch": 0.3793859649122807, + "grad_norm": 0.5015015006065369, + "learning_rate": 1.4639817385022206e-05, + "loss": 0.496290385723114, + "step": 692 + }, + { + "epoch": 0.37993421052631576, + "grad_norm": 0.49644434452056885, + "learning_rate": 1.4623741843159124e-05, + "loss": 0.49527817964553833, + "step": 693 + }, + { + "epoch": 0.38048245614035087, + "grad_norm": 0.5267723202705383, + "learning_rate": 1.460765108890681e-05, + "loss": 0.5009031295776367, + "step": 694 + }, + { + "epoch": 0.38103070175438597, + "grad_norm": 0.5172473192214966, + "learning_rate": 1.4591545175204802e-05, + "loss": 0.511361837387085, + "step": 695 + }, + { + "epoch": 0.3815789473684211, + "grad_norm": 0.5121990442276001, + "learning_rate": 1.4575424155042537e-05, + "loss": 0.4846411943435669, + "step": 696 + }, + { + "epoch": 0.3821271929824561, + "grad_norm": 0.508580207824707, + "learning_rate": 1.4559288081459142e-05, + "loss": 0.49809813499450684, + "step": 697 + }, + { + "epoch": 0.3826754385964912, + "grad_norm": 0.5329216718673706, + "learning_rate": 1.4543137007543265e-05, + "loss": 0.5044119358062744, + "step": 698 + }, + { + "epoch": 0.3832236842105263, + "grad_norm": 0.524181604385376, + "learning_rate": 1.4526970986432916e-05, + "loss": 0.4943583011627197, + "step": 699 + }, + { + "epoch": 0.38377192982456143, + "grad_norm": 0.5000477433204651, + "learning_rate": 1.4510790071315278e-05, + "loss": 0.4813290536403656, + "step": 700 + }, + { + "epoch": 0.3843201754385965, + "grad_norm": 0.50678551197052, + "learning_rate": 1.4494594315426535e-05, + "loss": 0.5046131610870361, + "step": 701 + }, + { + "epoch": 0.3848684210526316, + "grad_norm": 0.5136652588844299, + "learning_rate": 1.4478383772051707e-05, + "loss": 0.5002473592758179, + "step": 702 + }, + { + "epoch": 0.3854166666666667, + "grad_norm": 0.5075159668922424, + "learning_rate": 1.4462158494524447e-05, + "loss": 0.4981658458709717, + "step": 703 + }, + { + "epoch": 0.38596491228070173, + "grad_norm": 0.48104289174079895, + "learning_rate": 1.4445918536226904e-05, + "loss": 0.49785614013671875, + "step": 704 + }, + { + "epoch": 0.38651315789473684, + "grad_norm": 0.49635618925094604, + "learning_rate": 1.4429663950589517e-05, + "loss": 0.5005432367324829, + "step": 705 + }, + { + "epoch": 0.38706140350877194, + "grad_norm": 0.5516581535339355, + "learning_rate": 1.441339479109085e-05, + "loss": 0.4959232211112976, + "step": 706 + }, + { + "epoch": 0.38760964912280704, + "grad_norm": 0.5026279091835022, + "learning_rate": 1.439711111125742e-05, + "loss": 0.4876520037651062, + "step": 707 + }, + { + "epoch": 0.3881578947368421, + "grad_norm": 0.5157284736633301, + "learning_rate": 1.4380812964663514e-05, + "loss": 0.486855149269104, + "step": 708 + }, + { + "epoch": 0.3887061403508772, + "grad_norm": 0.5165179967880249, + "learning_rate": 1.4364500404931017e-05, + "loss": 0.4917711615562439, + "step": 709 + }, + { + "epoch": 0.3892543859649123, + "grad_norm": 0.5212239027023315, + "learning_rate": 1.4348173485729233e-05, + "loss": 0.4904009699821472, + "step": 710 + }, + { + "epoch": 0.38980263157894735, + "grad_norm": 0.5552125573158264, + "learning_rate": 1.433183226077471e-05, + "loss": 0.4961950480937958, + "step": 711 + }, + { + "epoch": 0.39035087719298245, + "grad_norm": 0.5230804681777954, + "learning_rate": 1.4315476783831062e-05, + "loss": 0.4872981309890747, + "step": 712 + }, + { + "epoch": 0.39089912280701755, + "grad_norm": 0.5566909313201904, + "learning_rate": 1.4299107108708792e-05, + "loss": 0.4958663880825043, + "step": 713 + }, + { + "epoch": 0.39144736842105265, + "grad_norm": 0.5392793416976929, + "learning_rate": 1.4282723289265122e-05, + "loss": 0.49213212728500366, + "step": 714 + }, + { + "epoch": 0.3919956140350877, + "grad_norm": 0.4880681335926056, + "learning_rate": 1.42663253794038e-05, + "loss": 0.4859275221824646, + "step": 715 + }, + { + "epoch": 0.3925438596491228, + "grad_norm": 0.5517839789390564, + "learning_rate": 1.4249913433074942e-05, + "loss": 0.5010792016983032, + "step": 716 + }, + { + "epoch": 0.3930921052631579, + "grad_norm": 0.49621888995170593, + "learning_rate": 1.4233487504274837e-05, + "loss": 0.49741482734680176, + "step": 717 + }, + { + "epoch": 0.39364035087719296, + "grad_norm": 0.505009651184082, + "learning_rate": 1.4217047647045781e-05, + "loss": 0.49573537707328796, + "step": 718 + }, + { + "epoch": 0.39418859649122806, + "grad_norm": 0.49585750699043274, + "learning_rate": 1.4200593915475895e-05, + "loss": 0.4880044460296631, + "step": 719 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 0.4711382985115051, + "learning_rate": 1.4184126363698949e-05, + "loss": 0.4827781319618225, + "step": 720 + }, + { + "epoch": 0.39528508771929827, + "grad_norm": 0.500230073928833, + "learning_rate": 1.4167645045894176e-05, + "loss": 0.48407694697380066, + "step": 721 + }, + { + "epoch": 0.3958333333333333, + "grad_norm": 0.5288121700286865, + "learning_rate": 1.415115001628611e-05, + "loss": 0.48783645033836365, + "step": 722 + }, + { + "epoch": 0.3963815789473684, + "grad_norm": 0.47950515151023865, + "learning_rate": 1.413464132914439e-05, + "loss": 0.49616605043411255, + "step": 723 + }, + { + "epoch": 0.3969298245614035, + "grad_norm": 0.5053483247756958, + "learning_rate": 1.4118119038783589e-05, + "loss": 0.484591007232666, + "step": 724 + }, + { + "epoch": 0.39747807017543857, + "grad_norm": 0.4833005964756012, + "learning_rate": 1.4101583199563038e-05, + "loss": 0.4897410273551941, + "step": 725 + }, + { + "epoch": 0.3980263157894737, + "grad_norm": 0.5118196606636047, + "learning_rate": 1.4085033865886649e-05, + "loss": 0.4865984320640564, + "step": 726 + }, + { + "epoch": 0.3985745614035088, + "grad_norm": 0.5035130977630615, + "learning_rate": 1.4068471092202723e-05, + "loss": 0.5033597946166992, + "step": 727 + }, + { + "epoch": 0.3991228070175439, + "grad_norm": 0.5169758200645447, + "learning_rate": 1.4051894933003783e-05, + "loss": 0.49889934062957764, + "step": 728 + }, + { + "epoch": 0.3996710526315789, + "grad_norm": 0.5340895652770996, + "learning_rate": 1.4035305442826392e-05, + "loss": 0.5121870040893555, + "step": 729 + }, + { + "epoch": 0.40021929824561403, + "grad_norm": 0.513810932636261, + "learning_rate": 1.4018702676250973e-05, + "loss": 0.46407610177993774, + "step": 730 + }, + { + "epoch": 0.40076754385964913, + "grad_norm": 0.47475552558898926, + "learning_rate": 1.400208668790163e-05, + "loss": 0.5128594636917114, + "step": 731 + }, + { + "epoch": 0.40131578947368424, + "grad_norm": 0.532416045665741, + "learning_rate": 1.3985457532445964e-05, + "loss": 0.4869621694087982, + "step": 732 + }, + { + "epoch": 0.4018640350877193, + "grad_norm": 0.5194771885871887, + "learning_rate": 1.3968815264594895e-05, + "loss": 0.4942741096019745, + "step": 733 + }, + { + "epoch": 0.4024122807017544, + "grad_norm": 0.5377725958824158, + "learning_rate": 1.3952159939102492e-05, + "loss": 0.499100923538208, + "step": 734 + }, + { + "epoch": 0.4029605263157895, + "grad_norm": 0.5000717639923096, + "learning_rate": 1.3935491610765776e-05, + "loss": 0.4920429587364197, + "step": 735 + }, + { + "epoch": 0.40350877192982454, + "grad_norm": 0.5088208913803101, + "learning_rate": 1.3918810334424556e-05, + "loss": 0.49835777282714844, + "step": 736 + }, + { + "epoch": 0.40405701754385964, + "grad_norm": 0.4984104335308075, + "learning_rate": 1.390211616496123e-05, + "loss": 0.492436945438385, + "step": 737 + }, + { + "epoch": 0.40460526315789475, + "grad_norm": 0.531671941280365, + "learning_rate": 1.3885409157300624e-05, + "loss": 0.5021324157714844, + "step": 738 + }, + { + "epoch": 0.40515350877192985, + "grad_norm": 0.521382212638855, + "learning_rate": 1.3868689366409802e-05, + "loss": 0.48615261912345886, + "step": 739 + }, + { + "epoch": 0.4057017543859649, + "grad_norm": 0.4979918301105499, + "learning_rate": 1.3851956847297882e-05, + "loss": 0.48778805136680603, + "step": 740 + }, + { + "epoch": 0.40625, + "grad_norm": 0.5014253258705139, + "learning_rate": 1.3835211655015856e-05, + "loss": 0.5012838840484619, + "step": 741 + }, + { + "epoch": 0.4067982456140351, + "grad_norm": 0.5053998827934265, + "learning_rate": 1.3818453844656423e-05, + "loss": 0.49050578474998474, + "step": 742 + }, + { + "epoch": 0.40734649122807015, + "grad_norm": 0.5173316597938538, + "learning_rate": 1.3801683471353781e-05, + "loss": 0.49842238426208496, + "step": 743 + }, + { + "epoch": 0.40789473684210525, + "grad_norm": 0.5086451172828674, + "learning_rate": 1.3784900590283474e-05, + "loss": 0.5004265308380127, + "step": 744 + }, + { + "epoch": 0.40844298245614036, + "grad_norm": 0.5439842939376831, + "learning_rate": 1.3768105256662194e-05, + "loss": 0.4831674098968506, + "step": 745 + }, + { + "epoch": 0.40899122807017546, + "grad_norm": 0.4946817457675934, + "learning_rate": 1.3751297525747589e-05, + "loss": 0.49196428060531616, + "step": 746 + }, + { + "epoch": 0.4095394736842105, + "grad_norm": 0.5227308869361877, + "learning_rate": 1.3734477452838117e-05, + "loss": 0.490816205739975, + "step": 747 + }, + { + "epoch": 0.4100877192982456, + "grad_norm": 0.560154914855957, + "learning_rate": 1.3717645093272834e-05, + "loss": 0.4868409037590027, + "step": 748 + }, + { + "epoch": 0.4106359649122807, + "grad_norm": 0.5022367835044861, + "learning_rate": 1.3700800502431202e-05, + "loss": 0.4902645945549011, + "step": 749 + }, + { + "epoch": 0.41118421052631576, + "grad_norm": 0.5088184475898743, + "learning_rate": 1.3683943735732956e-05, + "loss": 0.4881845712661743, + "step": 750 + }, + { + "epoch": 0.41173245614035087, + "grad_norm": 0.5308793187141418, + "learning_rate": 1.3667074848637862e-05, + "loss": 0.4919361472129822, + "step": 751 + }, + { + "epoch": 0.41228070175438597, + "grad_norm": 0.48994314670562744, + "learning_rate": 1.3650193896645582e-05, + "loss": 0.4936677813529968, + "step": 752 + }, + { + "epoch": 0.4128289473684211, + "grad_norm": 0.5036956667900085, + "learning_rate": 1.3633300935295468e-05, + "loss": 0.4867483079433441, + "step": 753 + }, + { + "epoch": 0.4133771929824561, + "grad_norm": 0.4999926686286926, + "learning_rate": 1.3616396020166368e-05, + "loss": 0.4871666133403778, + "step": 754 + }, + { + "epoch": 0.4139254385964912, + "grad_norm": 0.4995878338813782, + "learning_rate": 1.3599479206876483e-05, + "loss": 0.49972087144851685, + "step": 755 + }, + { + "epoch": 0.4144736842105263, + "grad_norm": 0.481976717710495, + "learning_rate": 1.3582550551083143e-05, + "loss": 0.49500545859336853, + "step": 756 + }, + { + "epoch": 0.41502192982456143, + "grad_norm": 0.48889246582984924, + "learning_rate": 1.3565610108482645e-05, + "loss": 0.49687570333480835, + "step": 757 + }, + { + "epoch": 0.4155701754385965, + "grad_norm": 0.5260644555091858, + "learning_rate": 1.354865793481007e-05, + "loss": 0.4825992286205292, + "step": 758 + }, + { + "epoch": 0.4161184210526316, + "grad_norm": 0.4933117926120758, + "learning_rate": 1.3531694085839084e-05, + "loss": 0.48860979080200195, + "step": 759 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5438188314437866, + "learning_rate": 1.3514718617381779e-05, + "loss": 0.49466273188591003, + "step": 760 + }, + { + "epoch": 0.41721491228070173, + "grad_norm": 0.5429419875144958, + "learning_rate": 1.3497731585288466e-05, + "loss": 0.496579647064209, + "step": 761 + }, + { + "epoch": 0.41776315789473684, + "grad_norm": 0.4868009090423584, + "learning_rate": 1.3480733045447505e-05, + "loss": 0.4871636629104614, + "step": 762 + }, + { + "epoch": 0.41831140350877194, + "grad_norm": 0.5444782376289368, + "learning_rate": 1.3463723053785114e-05, + "loss": 0.4599669873714447, + "step": 763 + }, + { + "epoch": 0.41885964912280704, + "grad_norm": 0.503890335559845, + "learning_rate": 1.3446701666265192e-05, + "loss": 0.48242682218551636, + "step": 764 + }, + { + "epoch": 0.4194078947368421, + "grad_norm": 0.6804667711257935, + "learning_rate": 1.3429668938889127e-05, + "loss": 0.49555811285972595, + "step": 765 + }, + { + "epoch": 0.4199561403508772, + "grad_norm": 0.5674505829811096, + "learning_rate": 1.341262492769562e-05, + "loss": 0.494424045085907, + "step": 766 + }, + { + "epoch": 0.4205043859649123, + "grad_norm": 0.49492350220680237, + "learning_rate": 1.3395569688760499e-05, + "loss": 0.4926236569881439, + "step": 767 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.48057791590690613, + "learning_rate": 1.3378503278196522e-05, + "loss": 0.48030886054039, + "step": 768 + }, + { + "epoch": 0.42160087719298245, + "grad_norm": 0.49380749464035034, + "learning_rate": 1.336142575215321e-05, + "loss": 0.48291677236557007, + "step": 769 + }, + { + "epoch": 0.42214912280701755, + "grad_norm": 0.5208574533462524, + "learning_rate": 1.3344337166816652e-05, + "loss": 0.482435405254364, + "step": 770 + }, + { + "epoch": 0.42269736842105265, + "grad_norm": 0.48892274498939514, + "learning_rate": 1.3327237578409323e-05, + "loss": 0.48565933108329773, + "step": 771 + }, + { + "epoch": 0.4232456140350877, + "grad_norm": 0.4815227687358856, + "learning_rate": 1.33101270431899e-05, + "loss": 0.48251694440841675, + "step": 772 + }, + { + "epoch": 0.4237938596491228, + "grad_norm": 0.4896012544631958, + "learning_rate": 1.3293005617453075e-05, + "loss": 0.47662657499313354, + "step": 773 + }, + { + "epoch": 0.4243421052631579, + "grad_norm": 0.4705606698989868, + "learning_rate": 1.327587335752937e-05, + "loss": 0.4908991754055023, + "step": 774 + }, + { + "epoch": 0.42489035087719296, + "grad_norm": 0.46313512325286865, + "learning_rate": 1.325873031978495e-05, + "loss": 0.4786376357078552, + "step": 775 + }, + { + "epoch": 0.42543859649122806, + "grad_norm": 0.4871222972869873, + "learning_rate": 1.3241576560621444e-05, + "loss": 0.4758555293083191, + "step": 776 + }, + { + "epoch": 0.42598684210526316, + "grad_norm": 0.4975806474685669, + "learning_rate": 1.3224412136475757e-05, + "loss": 0.4800441265106201, + "step": 777 + }, + { + "epoch": 0.42653508771929827, + "grad_norm": 0.4865629971027374, + "learning_rate": 1.320723710381987e-05, + "loss": 0.4875626564025879, + "step": 778 + }, + { + "epoch": 0.4270833333333333, + "grad_norm": 0.5996370315551758, + "learning_rate": 1.3190051519160685e-05, + "loss": 0.4718167781829834, + "step": 779 + }, + { + "epoch": 0.4276315789473684, + "grad_norm": 0.47141125798225403, + "learning_rate": 1.3172855439039802e-05, + "loss": 0.47652530670166016, + "step": 780 + }, + { + "epoch": 0.4281798245614035, + "grad_norm": 0.48037710785865784, + "learning_rate": 1.3155648920033363e-05, + "loss": 0.4848669469356537, + "step": 781 + }, + { + "epoch": 0.42872807017543857, + "grad_norm": 0.49942100048065186, + "learning_rate": 1.313843201875186e-05, + "loss": 0.48465991020202637, + "step": 782 + }, + { + "epoch": 0.4292763157894737, + "grad_norm": 0.4947258234024048, + "learning_rate": 1.3121204791839926e-05, + "loss": 0.48432278633117676, + "step": 783 + }, + { + "epoch": 0.4298245614035088, + "grad_norm": 0.5264876484870911, + "learning_rate": 1.3103967295976178e-05, + "loss": 0.48953860998153687, + "step": 784 + }, + { + "epoch": 0.4303728070175439, + "grad_norm": 0.5139599442481995, + "learning_rate": 1.3086719587873015e-05, + "loss": 0.48061832785606384, + "step": 785 + }, + { + "epoch": 0.4309210526315789, + "grad_norm": 0.48673495650291443, + "learning_rate": 1.306946172427644e-05, + "loss": 0.47429126501083374, + "step": 786 + }, + { + "epoch": 0.43146929824561403, + "grad_norm": 0.5102007389068604, + "learning_rate": 1.3052193761965861e-05, + "loss": 0.4935256838798523, + "step": 787 + }, + { + "epoch": 0.43201754385964913, + "grad_norm": 0.5277912020683289, + "learning_rate": 1.3034915757753917e-05, + "loss": 0.4868704676628113, + "step": 788 + }, + { + "epoch": 0.43256578947368424, + "grad_norm": 0.49750053882598877, + "learning_rate": 1.3017627768486277e-05, + "loss": 0.4760574698448181, + "step": 789 + }, + { + "epoch": 0.4331140350877193, + "grad_norm": 0.6148756146430969, + "learning_rate": 1.3000329851041474e-05, + "loss": 0.48425477743148804, + "step": 790 + }, + { + "epoch": 0.4336622807017544, + "grad_norm": 0.487529993057251, + "learning_rate": 1.298302206233069e-05, + "loss": 0.49096059799194336, + "step": 791 + }, + { + "epoch": 0.4342105263157895, + "grad_norm": 0.5019875168800354, + "learning_rate": 1.2965704459297603e-05, + "loss": 0.48441147804260254, + "step": 792 + }, + { + "epoch": 0.43475877192982454, + "grad_norm": 0.48055264353752136, + "learning_rate": 1.2948377098918166e-05, + "loss": 0.49248749017715454, + "step": 793 + }, + { + "epoch": 0.43530701754385964, + "grad_norm": 0.4806983470916748, + "learning_rate": 1.2931040038200435e-05, + "loss": 0.46774157881736755, + "step": 794 + }, + { + "epoch": 0.43585526315789475, + "grad_norm": 0.5055975914001465, + "learning_rate": 1.2913693334184387e-05, + "loss": 0.46599435806274414, + "step": 795 + }, + { + "epoch": 0.43640350877192985, + "grad_norm": 0.47909224033355713, + "learning_rate": 1.289633704394173e-05, + "loss": 0.49138325452804565, + "step": 796 + }, + { + "epoch": 0.4369517543859649, + "grad_norm": 0.5127816796302795, + "learning_rate": 1.2878971224575689e-05, + "loss": 0.49381622672080994, + "step": 797 + }, + { + "epoch": 0.4375, + "grad_norm": 0.5159894824028015, + "learning_rate": 1.2861595933220873e-05, + "loss": 0.4878365397453308, + "step": 798 + }, + { + "epoch": 0.4380482456140351, + "grad_norm": 0.48489898443222046, + "learning_rate": 1.2844211227043024e-05, + "loss": 0.4896066188812256, + "step": 799 + }, + { + "epoch": 0.43859649122807015, + "grad_norm": 0.5021419525146484, + "learning_rate": 1.282681716323888e-05, + "loss": 0.48621922731399536, + "step": 800 + }, + { + "epoch": 0.43914473684210525, + "grad_norm": 0.48859429359436035, + "learning_rate": 1.2809413799035961e-05, + "loss": 0.48754948377609253, + "step": 801 + }, + { + "epoch": 0.43969298245614036, + "grad_norm": 0.47973889112472534, + "learning_rate": 1.2792001191692375e-05, + "loss": 0.47506245970726013, + "step": 802 + }, + { + "epoch": 0.44024122807017546, + "grad_norm": 0.4873831272125244, + "learning_rate": 1.2774579398496656e-05, + "loss": 0.48494696617126465, + "step": 803 + }, + { + "epoch": 0.4407894736842105, + "grad_norm": 0.5331482291221619, + "learning_rate": 1.2757148476767553e-05, + "loss": 0.49041277170181274, + "step": 804 + }, + { + "epoch": 0.4413377192982456, + "grad_norm": 0.4975897967815399, + "learning_rate": 1.2739708483853845e-05, + "loss": 0.49118441343307495, + "step": 805 + }, + { + "epoch": 0.4418859649122807, + "grad_norm": 0.49233025312423706, + "learning_rate": 1.2722259477134162e-05, + "loss": 0.4929482936859131, + "step": 806 + }, + { + "epoch": 0.44243421052631576, + "grad_norm": 0.531247079372406, + "learning_rate": 1.2704801514016789e-05, + "loss": 0.4968828856945038, + "step": 807 + }, + { + "epoch": 0.44298245614035087, + "grad_norm": 0.5535720586776733, + "learning_rate": 1.2687334651939471e-05, + "loss": 0.48034191131591797, + "step": 808 + }, + { + "epoch": 0.44353070175438597, + "grad_norm": 0.5043017268180847, + "learning_rate": 1.2669858948369242e-05, + "loss": 0.4804898500442505, + "step": 809 + }, + { + "epoch": 0.4440789473684211, + "grad_norm": 0.5200436115264893, + "learning_rate": 1.2652374460802218e-05, + "loss": 0.4860343933105469, + "step": 810 + }, + { + "epoch": 0.4446271929824561, + "grad_norm": 0.5313208103179932, + "learning_rate": 1.2634881246763415e-05, + "loss": 0.49038752913475037, + "step": 811 + }, + { + "epoch": 0.4451754385964912, + "grad_norm": 0.4981192946434021, + "learning_rate": 1.2617379363806563e-05, + "loss": 0.47683286666870117, + "step": 812 + }, + { + "epoch": 0.4457236842105263, + "grad_norm": 0.4823460280895233, + "learning_rate": 1.2599868869513912e-05, + "loss": 0.4903796911239624, + "step": 813 + }, + { + "epoch": 0.44627192982456143, + "grad_norm": 0.48653507232666016, + "learning_rate": 1.2582349821496041e-05, + "loss": 0.48963838815689087, + "step": 814 + }, + { + "epoch": 0.4468201754385965, + "grad_norm": 0.5077612400054932, + "learning_rate": 1.2564822277391673e-05, + "loss": 0.48124510049819946, + "step": 815 + }, + { + "epoch": 0.4473684210526316, + "grad_norm": 0.49369195103645325, + "learning_rate": 1.2547286294867486e-05, + "loss": 0.4819800555706024, + "step": 816 + }, + { + "epoch": 0.4479166666666667, + "grad_norm": 0.506385087966919, + "learning_rate": 1.252974193161792e-05, + "loss": 0.48602816462516785, + "step": 817 + }, + { + "epoch": 0.44846491228070173, + "grad_norm": 0.49783554673194885, + "learning_rate": 1.2512189245364986e-05, + "loss": 0.5015057921409607, + "step": 818 + }, + { + "epoch": 0.44901315789473684, + "grad_norm": 0.49489626288414, + "learning_rate": 1.2494628293858078e-05, + "loss": 0.4883100986480713, + "step": 819 + }, + { + "epoch": 0.44956140350877194, + "grad_norm": 0.48849615454673767, + "learning_rate": 1.2477059134873784e-05, + "loss": 0.4841647148132324, + "step": 820 + }, + { + "epoch": 0.45010964912280704, + "grad_norm": 0.4946068227291107, + "learning_rate": 1.2459481826215698e-05, + "loss": 0.47616252303123474, + "step": 821 + }, + { + "epoch": 0.4506578947368421, + "grad_norm": 0.4865962862968445, + "learning_rate": 1.2441896425714224e-05, + "loss": 0.4910244643688202, + "step": 822 + }, + { + "epoch": 0.4512061403508772, + "grad_norm": 0.47502800822257996, + "learning_rate": 1.242430299122639e-05, + "loss": 0.4838365912437439, + "step": 823 + }, + { + "epoch": 0.4517543859649123, + "grad_norm": 0.47981613874435425, + "learning_rate": 1.240670158063565e-05, + "loss": 0.4831865429878235, + "step": 824 + }, + { + "epoch": 0.45230263157894735, + "grad_norm": 0.531018853187561, + "learning_rate": 1.238909225185171e-05, + "loss": 0.4798121452331543, + "step": 825 + }, + { + "epoch": 0.45285087719298245, + "grad_norm": 0.4660738408565521, + "learning_rate": 1.2371475062810324e-05, + "loss": 0.47949931025505066, + "step": 826 + }, + { + "epoch": 0.45339912280701755, + "grad_norm": 0.48740774393081665, + "learning_rate": 1.2353850071473103e-05, + "loss": 0.4878680408000946, + "step": 827 + }, + { + "epoch": 0.45394736842105265, + "grad_norm": 0.49357178807258606, + "learning_rate": 1.2336217335827332e-05, + "loss": 0.4781498312950134, + "step": 828 + }, + { + "epoch": 0.4544956140350877, + "grad_norm": 0.5055526494979858, + "learning_rate": 1.231857691388577e-05, + "loss": 0.48239409923553467, + "step": 829 + }, + { + "epoch": 0.4550438596491228, + "grad_norm": 0.5338366031646729, + "learning_rate": 1.2300928863686472e-05, + "loss": 0.4846789240837097, + "step": 830 + }, + { + "epoch": 0.4555921052631579, + "grad_norm": 0.4883231818675995, + "learning_rate": 1.2283273243292583e-05, + "loss": 0.47418880462646484, + "step": 831 + }, + { + "epoch": 0.45614035087719296, + "grad_norm": 0.48572519421577454, + "learning_rate": 1.2265610110792161e-05, + "loss": 0.49370428919792175, + "step": 832 + }, + { + "epoch": 0.45668859649122806, + "grad_norm": 0.5001595616340637, + "learning_rate": 1.2247939524297977e-05, + "loss": 0.48919588327407837, + "step": 833 + }, + { + "epoch": 0.45723684210526316, + "grad_norm": 0.5046356320381165, + "learning_rate": 1.2230261541947316e-05, + "loss": 0.4786142110824585, + "step": 834 + }, + { + "epoch": 0.45778508771929827, + "grad_norm": 0.49759823083877563, + "learning_rate": 1.2212576221901819e-05, + "loss": 0.48940107226371765, + "step": 835 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.4834117293357849, + "learning_rate": 1.2194883622347247e-05, + "loss": 0.4996738135814667, + "step": 836 + }, + { + "epoch": 0.4588815789473684, + "grad_norm": 0.46439656615257263, + "learning_rate": 1.2177183801493313e-05, + "loss": 0.48260292410850525, + "step": 837 + }, + { + "epoch": 0.4594298245614035, + "grad_norm": 0.4949783384799957, + "learning_rate": 1.2159476817573506e-05, + "loss": 0.4749537706375122, + "step": 838 + }, + { + "epoch": 0.45997807017543857, + "grad_norm": 0.4927411377429962, + "learning_rate": 1.2141762728844862e-05, + "loss": 0.484489768743515, + "step": 839 + }, + { + "epoch": 0.4605263157894737, + "grad_norm": 0.4821980595588684, + "learning_rate": 1.21240415935878e-05, + "loss": 0.489577054977417, + "step": 840 + }, + { + "epoch": 0.4610745614035088, + "grad_norm": 0.47517091035842896, + "learning_rate": 1.2106313470105925e-05, + "loss": 0.47467929124832153, + "step": 841 + }, + { + "epoch": 0.4616228070175439, + "grad_norm": 0.5108216404914856, + "learning_rate": 1.2088578416725829e-05, + "loss": 0.4935423731803894, + "step": 842 + }, + { + "epoch": 0.4621710526315789, + "grad_norm": 0.5016485452651978, + "learning_rate": 1.207083649179691e-05, + "loss": 0.4873450696468353, + "step": 843 + }, + { + "epoch": 0.46271929824561403, + "grad_norm": 0.49393925070762634, + "learning_rate": 1.2053087753691173e-05, + "loss": 0.47483953833580017, + "step": 844 + }, + { + "epoch": 0.46326754385964913, + "grad_norm": 0.524784505367279, + "learning_rate": 1.2035332260803026e-05, + "loss": 0.4782135486602783, + "step": 845 + }, + { + "epoch": 0.46381578947368424, + "grad_norm": 0.5134442448616028, + "learning_rate": 1.2017570071549113e-05, + "loss": 0.4677678346633911, + "step": 846 + }, + { + "epoch": 0.4643640350877193, + "grad_norm": 0.5348238348960876, + "learning_rate": 1.1999801244368112e-05, + "loss": 0.47131556272506714, + "step": 847 + }, + { + "epoch": 0.4649122807017544, + "grad_norm": 0.47679224610328674, + "learning_rate": 1.1982025837720533e-05, + "loss": 0.4836603105068207, + "step": 848 + }, + { + "epoch": 0.4654605263157895, + "grad_norm": 0.509976863861084, + "learning_rate": 1.1964243910088532e-05, + "loss": 0.4757232666015625, + "step": 849 + }, + { + "epoch": 0.46600877192982454, + "grad_norm": 0.48536261916160583, + "learning_rate": 1.1946455519975724e-05, + "loss": 0.4884200692176819, + "step": 850 + }, + { + "epoch": 0.46655701754385964, + "grad_norm": 0.49048852920532227, + "learning_rate": 1.1928660725906984e-05, + "loss": 0.4773423671722412, + "step": 851 + }, + { + "epoch": 0.46710526315789475, + "grad_norm": 0.6477540135383606, + "learning_rate": 1.1910859586428258e-05, + "loss": 0.47236260771751404, + "step": 852 + }, + { + "epoch": 0.46765350877192985, + "grad_norm": 0.492621511220932, + "learning_rate": 1.1893052160106364e-05, + "loss": 0.47727149724960327, + "step": 853 + }, + { + "epoch": 0.4682017543859649, + "grad_norm": 0.49619951844215393, + "learning_rate": 1.187523850552881e-05, + "loss": 0.4871925115585327, + "step": 854 + }, + { + "epoch": 0.46875, + "grad_norm": 0.4711047112941742, + "learning_rate": 1.1857418681303592e-05, + "loss": 0.49179786443710327, + "step": 855 + }, + { + "epoch": 0.4692982456140351, + "grad_norm": 0.5045946836471558, + "learning_rate": 1.1839592746059008e-05, + "loss": 0.47911369800567627, + "step": 856 + }, + { + "epoch": 0.46984649122807015, + "grad_norm": 0.532071053981781, + "learning_rate": 1.1821760758443455e-05, + "loss": 0.48660939931869507, + "step": 857 + }, + { + "epoch": 0.47039473684210525, + "grad_norm": 0.49183908104896545, + "learning_rate": 1.1803922777125247e-05, + "loss": 0.49535202980041504, + "step": 858 + }, + { + "epoch": 0.47094298245614036, + "grad_norm": 0.475089967250824, + "learning_rate": 1.178607886079242e-05, + "loss": 0.48570260405540466, + "step": 859 + }, + { + "epoch": 0.47149122807017546, + "grad_norm": 0.4888152778148651, + "learning_rate": 1.1768229068152533e-05, + "loss": 0.4838455319404602, + "step": 860 + }, + { + "epoch": 0.4720394736842105, + "grad_norm": 0.47159093618392944, + "learning_rate": 1.1750373457932477e-05, + "loss": 0.47372183203697205, + "step": 861 + }, + { + "epoch": 0.4725877192982456, + "grad_norm": 0.5381128191947937, + "learning_rate": 1.1732512088878287e-05, + "loss": 0.4868219792842865, + "step": 862 + }, + { + "epoch": 0.4731359649122807, + "grad_norm": 0.47580257058143616, + "learning_rate": 1.1714645019754944e-05, + "loss": 0.4665176272392273, + "step": 863 + }, + { + "epoch": 0.47368421052631576, + "grad_norm": 0.5036393404006958, + "learning_rate": 1.1696772309346183e-05, + "loss": 0.4782337546348572, + "step": 864 + }, + { + "epoch": 0.47423245614035087, + "grad_norm": 0.5104615688323975, + "learning_rate": 1.1678894016454292e-05, + "loss": 0.47208207845687866, + "step": 865 + }, + { + "epoch": 0.47478070175438597, + "grad_norm": 0.509761393070221, + "learning_rate": 1.1661010199899938e-05, + "loss": 0.4760264754295349, + "step": 866 + }, + { + "epoch": 0.4753289473684211, + "grad_norm": 0.4815259277820587, + "learning_rate": 1.1643120918521952e-05, + "loss": 0.4828665256500244, + "step": 867 + }, + { + "epoch": 0.4758771929824561, + "grad_norm": 0.48106735944747925, + "learning_rate": 1.162522623117715e-05, + "loss": 0.47156989574432373, + "step": 868 + }, + { + "epoch": 0.4764254385964912, + "grad_norm": 0.5169423818588257, + "learning_rate": 1.160732619674013e-05, + "loss": 0.4661808907985687, + "step": 869 + }, + { + "epoch": 0.4769736842105263, + "grad_norm": 0.5183056592941284, + "learning_rate": 1.1589420874103081e-05, + "loss": 0.49258583784103394, + "step": 870 + }, + { + "epoch": 0.47752192982456143, + "grad_norm": 0.4648781418800354, + "learning_rate": 1.1571510322175598e-05, + "loss": 0.4734388291835785, + "step": 871 + }, + { + "epoch": 0.4780701754385965, + "grad_norm": 0.5181176662445068, + "learning_rate": 1.1553594599884472e-05, + "loss": 0.4744289517402649, + "step": 872 + }, + { + "epoch": 0.4786184210526316, + "grad_norm": 0.4918321967124939, + "learning_rate": 1.1535673766173512e-05, + "loss": 0.48278099298477173, + "step": 873 + }, + { + "epoch": 0.4791666666666667, + "grad_norm": 0.47864681482315063, + "learning_rate": 1.1517747880003336e-05, + "loss": 0.4869686961174011, + "step": 874 + }, + { + "epoch": 0.47971491228070173, + "grad_norm": 0.5176661610603333, + "learning_rate": 1.14998170003512e-05, + "loss": 0.4895298480987549, + "step": 875 + }, + { + "epoch": 0.48026315789473684, + "grad_norm": 0.5217961072921753, + "learning_rate": 1.1481881186210767e-05, + "loss": 0.4778291881084442, + "step": 876 + }, + { + "epoch": 0.48081140350877194, + "grad_norm": 0.4688926041126251, + "learning_rate": 1.1463940496591946e-05, + "loss": 0.48186585307121277, + "step": 877 + }, + { + "epoch": 0.48135964912280704, + "grad_norm": 0.5088805556297302, + "learning_rate": 1.1445994990520697e-05, + "loss": 0.485612154006958, + "step": 878 + }, + { + "epoch": 0.4819078947368421, + "grad_norm": 0.5077580809593201, + "learning_rate": 1.1428044727038807e-05, + "loss": 0.48806190490722656, + "step": 879 + }, + { + "epoch": 0.4824561403508772, + "grad_norm": 0.4916880428791046, + "learning_rate": 1.1410089765203724e-05, + "loss": 0.49692898988723755, + "step": 880 + }, + { + "epoch": 0.4830043859649123, + "grad_norm": 0.49453645944595337, + "learning_rate": 1.1392130164088364e-05, + "loss": 0.48662543296813965, + "step": 881 + }, + { + "epoch": 0.48355263157894735, + "grad_norm": 0.5179969072341919, + "learning_rate": 1.1374165982780886e-05, + "loss": 0.4894055724143982, + "step": 882 + }, + { + "epoch": 0.48410087719298245, + "grad_norm": 0.5080739855766296, + "learning_rate": 1.1356197280384536e-05, + "loss": 0.4840007424354553, + "step": 883 + }, + { + "epoch": 0.48464912280701755, + "grad_norm": 0.4707460403442383, + "learning_rate": 1.1338224116017425e-05, + "loss": 0.4837796092033386, + "step": 884 + }, + { + "epoch": 0.48519736842105265, + "grad_norm": 0.5029415488243103, + "learning_rate": 1.1320246548812343e-05, + "loss": 0.4835893511772156, + "step": 885 + }, + { + "epoch": 0.4857456140350877, + "grad_norm": 0.5508713126182556, + "learning_rate": 1.1302264637916576e-05, + "loss": 0.49281996488571167, + "step": 886 + }, + { + "epoch": 0.4862938596491228, + "grad_norm": 0.6094909310340881, + "learning_rate": 1.128427844249169e-05, + "loss": 0.483834832906723, + "step": 887 + }, + { + "epoch": 0.4868421052631579, + "grad_norm": 0.45924264192581177, + "learning_rate": 1.1266288021713347e-05, + "loss": 0.4871005415916443, + "step": 888 + }, + { + "epoch": 0.48739035087719296, + "grad_norm": 0.4807246923446655, + "learning_rate": 1.1248293434771127e-05, + "loss": 0.47325021028518677, + "step": 889 + }, + { + "epoch": 0.48793859649122806, + "grad_norm": 0.5099015831947327, + "learning_rate": 1.1230294740868294e-05, + "loss": 0.4735136032104492, + "step": 890 + }, + { + "epoch": 0.48848684210526316, + "grad_norm": 0.4942789673805237, + "learning_rate": 1.1212291999221644e-05, + "loss": 0.4851912260055542, + "step": 891 + }, + { + "epoch": 0.48903508771929827, + "grad_norm": 0.5109583735466003, + "learning_rate": 1.1194285269061277e-05, + "loss": 0.4773210287094116, + "step": 892 + }, + { + "epoch": 0.4895833333333333, + "grad_norm": 0.4867546856403351, + "learning_rate": 1.1176274609630418e-05, + "loss": 0.48137515783309937, + "step": 893 + }, + { + "epoch": 0.4901315789473684, + "grad_norm": 0.48830315470695496, + "learning_rate": 1.1158260080185226e-05, + "loss": 0.4856005609035492, + "step": 894 + }, + { + "epoch": 0.4906798245614035, + "grad_norm": 0.46730145812034607, + "learning_rate": 1.1140241739994589e-05, + "loss": 0.46794044971466064, + "step": 895 + }, + { + "epoch": 0.49122807017543857, + "grad_norm": 0.484876811504364, + "learning_rate": 1.1122219648339925e-05, + "loss": 0.47470468282699585, + "step": 896 + }, + { + "epoch": 0.4917763157894737, + "grad_norm": 0.48526665568351746, + "learning_rate": 1.110419386451501e-05, + "loss": 0.46910759806632996, + "step": 897 + }, + { + "epoch": 0.4923245614035088, + "grad_norm": 0.48200756311416626, + "learning_rate": 1.108616444782575e-05, + "loss": 0.4611569344997406, + "step": 898 + }, + { + "epoch": 0.4928728070175439, + "grad_norm": 0.4781787693500519, + "learning_rate": 1.1068131457590022e-05, + "loss": 0.4725024104118347, + "step": 899 + }, + { + "epoch": 0.4934210526315789, + "grad_norm": 0.4791221618652344, + "learning_rate": 1.1050094953137444e-05, + "loss": 0.4657338857650757, + "step": 900 + }, + { + "epoch": 0.49396929824561403, + "grad_norm": 0.4847608804702759, + "learning_rate": 1.10320549938092e-05, + "loss": 0.470599889755249, + "step": 901 + }, + { + "epoch": 0.49451754385964913, + "grad_norm": 0.49423038959503174, + "learning_rate": 1.1014011638957849e-05, + "loss": 0.47278890013694763, + "step": 902 + }, + { + "epoch": 0.49506578947368424, + "grad_norm": 0.49379685521125793, + "learning_rate": 1.0995964947947114e-05, + "loss": 0.48167523741722107, + "step": 903 + }, + { + "epoch": 0.4956140350877193, + "grad_norm": 0.48116910457611084, + "learning_rate": 1.0977914980151692e-05, + "loss": 0.48413026332855225, + "step": 904 + }, + { + "epoch": 0.4961622807017544, + "grad_norm": 0.47256386280059814, + "learning_rate": 1.095986179495707e-05, + "loss": 0.4625633656978607, + "step": 905 + }, + { + "epoch": 0.4967105263157895, + "grad_norm": 0.45926183462142944, + "learning_rate": 1.0941805451759311e-05, + "loss": 0.47543013095855713, + "step": 906 + }, + { + "epoch": 0.49725877192982454, + "grad_norm": 0.46669498085975647, + "learning_rate": 1.0923746009964873e-05, + "loss": 0.4802945554256439, + "step": 907 + }, + { + "epoch": 0.49780701754385964, + "grad_norm": 0.4964379072189331, + "learning_rate": 1.0905683528990407e-05, + "loss": 0.47815272212028503, + "step": 908 + }, + { + "epoch": 0.49835526315789475, + "grad_norm": 0.48577016592025757, + "learning_rate": 1.0887618068262566e-05, + "loss": 0.4769788980484009, + "step": 909 + }, + { + "epoch": 0.49890350877192985, + "grad_norm": 0.48084649443626404, + "learning_rate": 1.0869549687217802e-05, + "loss": 0.47855353355407715, + "step": 910 + }, + { + "epoch": 0.4994517543859649, + "grad_norm": 0.5205073952674866, + "learning_rate": 1.0851478445302179e-05, + "loss": 0.4734543263912201, + "step": 911 + }, + { + "epoch": 0.5, + "grad_norm": 0.47468581795692444, + "learning_rate": 1.083340440197117e-05, + "loss": 0.4848591089248657, + "step": 912 + }, + { + "epoch": 0.5005482456140351, + "grad_norm": 0.4783199727535248, + "learning_rate": 1.0815327616689468e-05, + "loss": 0.4678228795528412, + "step": 913 + }, + { + "epoch": 0.5010964912280702, + "grad_norm": 0.49609270691871643, + "learning_rate": 1.0797248148930783e-05, + "loss": 0.47647619247436523, + "step": 914 + }, + { + "epoch": 0.5016447368421053, + "grad_norm": 0.4896128475666046, + "learning_rate": 1.0779166058177657e-05, + "loss": 0.485650897026062, + "step": 915 + }, + { + "epoch": 0.5021929824561403, + "grad_norm": 0.47610363364219666, + "learning_rate": 1.0761081403921255e-05, + "loss": 0.4849432110786438, + "step": 916 + }, + { + "epoch": 0.5027412280701754, + "grad_norm": 0.4984195828437805, + "learning_rate": 1.0742994245661178e-05, + "loss": 0.47665756940841675, + "step": 917 + }, + { + "epoch": 0.5032894736842105, + "grad_norm": 0.5013912916183472, + "learning_rate": 1.0724904642905272e-05, + "loss": 0.46333345770835876, + "step": 918 + }, + { + "epoch": 0.5038377192982456, + "grad_norm": 0.5182805061340332, + "learning_rate": 1.0706812655169414e-05, + "loss": 0.4818127155303955, + "step": 919 + }, + { + "epoch": 0.5043859649122807, + "grad_norm": 0.5016067028045654, + "learning_rate": 1.0688718341977336e-05, + "loss": 0.4823892116546631, + "step": 920 + }, + { + "epoch": 0.5049342105263158, + "grad_norm": 0.4810698330402374, + "learning_rate": 1.0670621762860419e-05, + "loss": 0.4859499931335449, + "step": 921 + }, + { + "epoch": 0.5054824561403509, + "grad_norm": 0.5582836866378784, + "learning_rate": 1.0652522977357498e-05, + "loss": 0.47283801436424255, + "step": 922 + }, + { + "epoch": 0.5060307017543859, + "grad_norm": 0.48066940903663635, + "learning_rate": 1.063442204501467e-05, + "loss": 0.4813811182975769, + "step": 923 + }, + { + "epoch": 0.506578947368421, + "grad_norm": 0.47318175435066223, + "learning_rate": 1.0616319025385088e-05, + "loss": 0.47437286376953125, + "step": 924 + }, + { + "epoch": 0.5071271929824561, + "grad_norm": 0.527256429195404, + "learning_rate": 1.059821397802878e-05, + "loss": 0.5002198219299316, + "step": 925 + }, + { + "epoch": 0.5076754385964912, + "grad_norm": 0.49084538221359253, + "learning_rate": 1.0580106962512446e-05, + "loss": 0.4681828022003174, + "step": 926 + }, + { + "epoch": 0.5082236842105263, + "grad_norm": 0.49694955348968506, + "learning_rate": 1.0561998038409252e-05, + "loss": 0.4772576689720154, + "step": 927 + }, + { + "epoch": 0.5087719298245614, + "grad_norm": 0.49907466769218445, + "learning_rate": 1.0543887265298651e-05, + "loss": 0.4743449091911316, + "step": 928 + }, + { + "epoch": 0.5093201754385965, + "grad_norm": 0.47016963362693787, + "learning_rate": 1.0525774702766183e-05, + "loss": 0.4842929244041443, + "step": 929 + }, + { + "epoch": 0.5098684210526315, + "grad_norm": 0.48708009719848633, + "learning_rate": 1.050766041040326e-05, + "loss": 0.49047666788101196, + "step": 930 + }, + { + "epoch": 0.5104166666666666, + "grad_norm": 0.48263296484947205, + "learning_rate": 1.0489544447807006e-05, + "loss": 0.4745303988456726, + "step": 931 + }, + { + "epoch": 0.5109649122807017, + "grad_norm": 0.4735895097255707, + "learning_rate": 1.0471426874580025e-05, + "loss": 0.47902441024780273, + "step": 932 + }, + { + "epoch": 0.5115131578947368, + "grad_norm": 0.477176308631897, + "learning_rate": 1.0453307750330217e-05, + "loss": 0.45984840393066406, + "step": 933 + }, + { + "epoch": 0.5120614035087719, + "grad_norm": 0.5000417828559875, + "learning_rate": 1.0435187134670608e-05, + "loss": 0.4723761975765228, + "step": 934 + }, + { + "epoch": 0.512609649122807, + "grad_norm": 0.4939057230949402, + "learning_rate": 1.0417065087219101e-05, + "loss": 0.4698931574821472, + "step": 935 + }, + { + "epoch": 0.5131578947368421, + "grad_norm": 0.4751085937023163, + "learning_rate": 1.0398941667598328e-05, + "loss": 0.47515150904655457, + "step": 936 + }, + { + "epoch": 0.5137061403508771, + "grad_norm": 0.5196514129638672, + "learning_rate": 1.0380816935435438e-05, + "loss": 0.4751243591308594, + "step": 937 + }, + { + "epoch": 0.5142543859649122, + "grad_norm": 0.46978095173835754, + "learning_rate": 1.0362690950361882e-05, + "loss": 0.4780597686767578, + "step": 938 + }, + { + "epoch": 0.5148026315789473, + "grad_norm": 0.4803449511528015, + "learning_rate": 1.034456377201325e-05, + "loss": 0.472648948431015, + "step": 939 + }, + { + "epoch": 0.5153508771929824, + "grad_norm": 0.47589024901390076, + "learning_rate": 1.0326435460029051e-05, + "loss": 0.4832885265350342, + "step": 940 + }, + { + "epoch": 0.5158991228070176, + "grad_norm": 0.47134554386138916, + "learning_rate": 1.0308306074052517e-05, + "loss": 0.47504305839538574, + "step": 941 + }, + { + "epoch": 0.5164473684210527, + "grad_norm": 0.4921835958957672, + "learning_rate": 1.0290175673730426e-05, + "loss": 0.4666009843349457, + "step": 942 + }, + { + "epoch": 0.5169956140350878, + "grad_norm": 0.4763789772987366, + "learning_rate": 1.0272044318712888e-05, + "loss": 0.4826936721801758, + "step": 943 + }, + { + "epoch": 0.5175438596491229, + "grad_norm": 0.4789031744003296, + "learning_rate": 1.0253912068653147e-05, + "loss": 0.45768702030181885, + "step": 944 + }, + { + "epoch": 0.5180921052631579, + "grad_norm": 0.5070001482963562, + "learning_rate": 1.0235778983207404e-05, + "loss": 0.45878398418426514, + "step": 945 + }, + { + "epoch": 0.518640350877193, + "grad_norm": 0.4759475588798523, + "learning_rate": 1.0217645122034603e-05, + "loss": 0.46790462732315063, + "step": 946 + }, + { + "epoch": 0.5191885964912281, + "grad_norm": 0.507018506526947, + "learning_rate": 1.0199510544796237e-05, + "loss": 0.4877185523509979, + "step": 947 + }, + { + "epoch": 0.5197368421052632, + "grad_norm": 0.4932757616043091, + "learning_rate": 1.0181375311156156e-05, + "loss": 0.4681272506713867, + "step": 948 + }, + { + "epoch": 0.5202850877192983, + "grad_norm": 0.5080598592758179, + "learning_rate": 1.0163239480780376e-05, + "loss": 0.47993144392967224, + "step": 949 + }, + { + "epoch": 0.5208333333333334, + "grad_norm": 0.4807532727718353, + "learning_rate": 1.0145103113336868e-05, + "loss": 0.46426552534103394, + "step": 950 + }, + { + "epoch": 0.5213815789473685, + "grad_norm": 0.4675240218639374, + "learning_rate": 1.0126966268495377e-05, + "loss": 0.4759901762008667, + "step": 951 + }, + { + "epoch": 0.5219298245614035, + "grad_norm": 0.47961559891700745, + "learning_rate": 1.010882900592721e-05, + "loss": 0.46679529547691345, + "step": 952 + }, + { + "epoch": 0.5224780701754386, + "grad_norm": 0.4580441117286682, + "learning_rate": 1.0090691385305057e-05, + "loss": 0.4677969217300415, + "step": 953 + }, + { + "epoch": 0.5230263157894737, + "grad_norm": 0.48743894696235657, + "learning_rate": 1.0072553466302783e-05, + "loss": 0.45729967951774597, + "step": 954 + }, + { + "epoch": 0.5235745614035088, + "grad_norm": 0.480928510427475, + "learning_rate": 1.0054415308595235e-05, + "loss": 0.46743547916412354, + "step": 955 + }, + { + "epoch": 0.5241228070175439, + "grad_norm": 0.4802962839603424, + "learning_rate": 1.0036276971858043e-05, + "loss": 0.4764847755432129, + "step": 956 + }, + { + "epoch": 0.524671052631579, + "grad_norm": 0.4960572123527527, + "learning_rate": 1.0018138515767425e-05, + "loss": 0.4603794515132904, + "step": 957 + }, + { + "epoch": 0.5252192982456141, + "grad_norm": 0.5283850431442261, + "learning_rate": 1e-05, + "loss": 0.46352872252464294, + "step": 958 + }, + { + "epoch": 0.5257675438596491, + "grad_norm": 0.5403750538825989, + "learning_rate": 9.981861484232576e-06, + "loss": 0.4582330584526062, + "step": 959 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.4815463721752167, + "learning_rate": 9.963723028141959e-06, + "loss": 0.4711856544017792, + "step": 960 + }, + { + "epoch": 0.5268640350877193, + "grad_norm": 0.4983313977718353, + "learning_rate": 9.945584691404768e-06, + "loss": 0.4663437306880951, + "step": 961 + }, + { + "epoch": 0.5274122807017544, + "grad_norm": 0.4963984787464142, + "learning_rate": 9.927446533697218e-06, + "loss": 0.4564981460571289, + "step": 962 + }, + { + "epoch": 0.5279605263157895, + "grad_norm": 0.4613352119922638, + "learning_rate": 9.909308614694944e-06, + "loss": 0.4883826673030853, + "step": 963 + }, + { + "epoch": 0.5285087719298246, + "grad_norm": 0.47054144740104675, + "learning_rate": 9.891170994072792e-06, + "loss": 0.4734528660774231, + "step": 964 + }, + { + "epoch": 0.5290570175438597, + "grad_norm": 0.49731600284576416, + "learning_rate": 9.873033731504628e-06, + "loss": 0.4818234443664551, + "step": 965 + }, + { + "epoch": 0.5296052631578947, + "grad_norm": 0.4624738395214081, + "learning_rate": 9.854896886663134e-06, + "loss": 0.47068604826927185, + "step": 966 + }, + { + "epoch": 0.5301535087719298, + "grad_norm": 0.4743126630783081, + "learning_rate": 9.836760519219627e-06, + "loss": 0.48319944739341736, + "step": 967 + }, + { + "epoch": 0.5307017543859649, + "grad_norm": 0.4714062511920929, + "learning_rate": 9.818624688843846e-06, + "loss": 0.4893440902233124, + "step": 968 + }, + { + "epoch": 0.53125, + "grad_norm": 0.5396475791931152, + "learning_rate": 9.800489455203765e-06, + "loss": 0.47514447569847107, + "step": 969 + }, + { + "epoch": 0.5317982456140351, + "grad_norm": 0.46770763397216797, + "learning_rate": 9.7823548779654e-06, + "loss": 0.47769927978515625, + "step": 970 + }, + { + "epoch": 0.5323464912280702, + "grad_norm": 0.5117153525352478, + "learning_rate": 9.764221016792597e-06, + "loss": 0.46777576208114624, + "step": 971 + }, + { + "epoch": 0.5328947368421053, + "grad_norm": 0.4876892864704132, + "learning_rate": 9.746087931346853e-06, + "loss": 0.4674271047115326, + "step": 972 + }, + { + "epoch": 0.5334429824561403, + "grad_norm": 0.5015421509742737, + "learning_rate": 9.727955681287115e-06, + "loss": 0.4751541018486023, + "step": 973 + }, + { + "epoch": 0.5339912280701754, + "grad_norm": 0.5426156520843506, + "learning_rate": 9.709824326269577e-06, + "loss": 0.4698949456214905, + "step": 974 + }, + { + "epoch": 0.5345394736842105, + "grad_norm": 0.5053887963294983, + "learning_rate": 9.691693925947485e-06, + "loss": 0.4778580069541931, + "step": 975 + }, + { + "epoch": 0.5350877192982456, + "grad_norm": 0.476938933134079, + "learning_rate": 9.67356453997095e-06, + "loss": 0.47415146231651306, + "step": 976 + }, + { + "epoch": 0.5356359649122807, + "grad_norm": 0.4961269795894623, + "learning_rate": 9.655436227986752e-06, + "loss": 0.4764622449874878, + "step": 977 + }, + { + "epoch": 0.5361842105263158, + "grad_norm": 0.4789068400859833, + "learning_rate": 9.637309049638121e-06, + "loss": 0.4756825864315033, + "step": 978 + }, + { + "epoch": 0.5367324561403509, + "grad_norm": 0.49088576436042786, + "learning_rate": 9.619183064564564e-06, + "loss": 0.46228572726249695, + "step": 979 + }, + { + "epoch": 0.5372807017543859, + "grad_norm": 0.49654847383499146, + "learning_rate": 9.601058332401674e-06, + "loss": 0.4739253520965576, + "step": 980 + }, + { + "epoch": 0.537828947368421, + "grad_norm": 0.47030875086784363, + "learning_rate": 9.582934912780904e-06, + "loss": 0.48689985275268555, + "step": 981 + }, + { + "epoch": 0.5383771929824561, + "grad_norm": 0.4943940043449402, + "learning_rate": 9.564812865329396e-06, + "loss": 0.4729098677635193, + "step": 982 + }, + { + "epoch": 0.5389254385964912, + "grad_norm": 0.4732706546783447, + "learning_rate": 9.546692249669784e-06, + "loss": 0.4672876298427582, + "step": 983 + }, + { + "epoch": 0.5394736842105263, + "grad_norm": 0.495355486869812, + "learning_rate": 9.52857312541998e-06, + "loss": 0.4782235026359558, + "step": 984 + }, + { + "epoch": 0.5400219298245614, + "grad_norm": 0.4918850064277649, + "learning_rate": 9.510455552192996e-06, + "loss": 0.48038250207901, + "step": 985 + }, + { + "epoch": 0.5405701754385965, + "grad_norm": 0.484025239944458, + "learning_rate": 9.492339589596742e-06, + "loss": 0.4546385705471039, + "step": 986 + }, + { + "epoch": 0.5411184210526315, + "grad_norm": 0.5004245638847351, + "learning_rate": 9.474225297233822e-06, + "loss": 0.4660744071006775, + "step": 987 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.4947008788585663, + "learning_rate": 9.456112734701349e-06, + "loss": 0.4723333716392517, + "step": 988 + }, + { + "epoch": 0.5422149122807017, + "grad_norm": 0.4967457056045532, + "learning_rate": 9.43800196159075e-06, + "loss": 0.4838150441646576, + "step": 989 + }, + { + "epoch": 0.5427631578947368, + "grad_norm": 0.5091995596885681, + "learning_rate": 9.41989303748756e-06, + "loss": 0.4611927270889282, + "step": 990 + }, + { + "epoch": 0.5433114035087719, + "grad_norm": 0.5165793895721436, + "learning_rate": 9.401786021971222e-06, + "loss": 0.4575102925300598, + "step": 991 + }, + { + "epoch": 0.543859649122807, + "grad_norm": 0.4805612862110138, + "learning_rate": 9.383680974614915e-06, + "loss": 0.4832637310028076, + "step": 992 + }, + { + "epoch": 0.5444078947368421, + "grad_norm": 0.5379158854484558, + "learning_rate": 9.365577954985336e-06, + "loss": 0.4655495584011078, + "step": 993 + }, + { + "epoch": 0.5449561403508771, + "grad_norm": 0.4972173571586609, + "learning_rate": 9.347477022642504e-06, + "loss": 0.4600108861923218, + "step": 994 + }, + { + "epoch": 0.5455043859649122, + "grad_norm": 0.48388564586639404, + "learning_rate": 9.329378237139583e-06, + "loss": 0.477608859539032, + "step": 995 + }, + { + "epoch": 0.5460526315789473, + "grad_norm": 0.4924751818180084, + "learning_rate": 9.31128165802267e-06, + "loss": 0.4669070243835449, + "step": 996 + }, + { + "epoch": 0.5466008771929824, + "grad_norm": 0.5232599377632141, + "learning_rate": 9.29318734483059e-06, + "loss": 0.482491672039032, + "step": 997 + }, + { + "epoch": 0.5471491228070176, + "grad_norm": 0.4885530173778534, + "learning_rate": 9.275095357094732e-06, + "loss": 0.483872652053833, + "step": 998 + }, + { + "epoch": 0.5476973684210527, + "grad_norm": 0.4905492961406708, + "learning_rate": 9.257005754338827e-06, + "loss": 0.470367968082428, + "step": 999 + }, + { + "epoch": 0.5482456140350878, + "grad_norm": 0.47469961643218994, + "learning_rate": 9.238918596078747e-06, + "loss": 0.4565841853618622, + "step": 1000 + }, + { + "epoch": 0.5487938596491229, + "grad_norm": 0.5193788409233093, + "learning_rate": 9.220833941822346e-06, + "loss": 0.47289925813674927, + "step": 1001 + }, + { + "epoch": 0.5493421052631579, + "grad_norm": 0.4825241267681122, + "learning_rate": 9.202751851069219e-06, + "loss": 0.4673827588558197, + "step": 1002 + }, + { + "epoch": 0.549890350877193, + "grad_norm": 0.4646058678627014, + "learning_rate": 9.184672383310534e-06, + "loss": 0.4733600616455078, + "step": 1003 + }, + { + "epoch": 0.5504385964912281, + "grad_norm": 0.4764874577522278, + "learning_rate": 9.166595598028833e-06, + "loss": 0.47691139578819275, + "step": 1004 + }, + { + "epoch": 0.5509868421052632, + "grad_norm": 0.5994452834129333, + "learning_rate": 9.148521554697823e-06, + "loss": 0.47110623121261597, + "step": 1005 + }, + { + "epoch": 0.5515350877192983, + "grad_norm": 0.49562346935272217, + "learning_rate": 9.130450312782198e-06, + "loss": 0.44304126501083374, + "step": 1006 + }, + { + "epoch": 0.5520833333333334, + "grad_norm": 0.49325883388519287, + "learning_rate": 9.112381931737437e-06, + "loss": 0.46718454360961914, + "step": 1007 + }, + { + "epoch": 0.5526315789473685, + "grad_norm": 0.48549625277519226, + "learning_rate": 9.094316471009595e-06, + "loss": 0.470458984375, + "step": 1008 + }, + { + "epoch": 0.5531798245614035, + "grad_norm": 0.5088659524917603, + "learning_rate": 9.076253990035132e-06, + "loss": 0.4785817265510559, + "step": 1009 + }, + { + "epoch": 0.5537280701754386, + "grad_norm": 0.4731486439704895, + "learning_rate": 9.058194548240694e-06, + "loss": 0.4724987745285034, + "step": 1010 + }, + { + "epoch": 0.5542763157894737, + "grad_norm": 0.4680763781070709, + "learning_rate": 9.040138205042935e-06, + "loss": 0.4792757034301758, + "step": 1011 + }, + { + "epoch": 0.5548245614035088, + "grad_norm": 0.4451364576816559, + "learning_rate": 9.022085019848313e-06, + "loss": 0.47036516666412354, + "step": 1012 + }, + { + "epoch": 0.5553728070175439, + "grad_norm": 0.508719801902771, + "learning_rate": 9.004035052052891e-06, + "loss": 0.4573447108268738, + "step": 1013 + }, + { + "epoch": 0.555921052631579, + "grad_norm": 0.5096684098243713, + "learning_rate": 8.985988361042153e-06, + "loss": 0.4640441834926605, + "step": 1014 + }, + { + "epoch": 0.5564692982456141, + "grad_norm": 0.47320401668548584, + "learning_rate": 8.967945006190804e-06, + "loss": 0.45948725938796997, + "step": 1015 + }, + { + "epoch": 0.5570175438596491, + "grad_norm": 0.4911516606807709, + "learning_rate": 8.94990504686256e-06, + "loss": 0.4813142716884613, + "step": 1016 + }, + { + "epoch": 0.5575657894736842, + "grad_norm": 0.5072831511497498, + "learning_rate": 8.931868542409983e-06, + "loss": 0.470254510641098, + "step": 1017 + }, + { + "epoch": 0.5581140350877193, + "grad_norm": 0.5038588047027588, + "learning_rate": 8.913835552174251e-06, + "loss": 0.4722591042518616, + "step": 1018 + }, + { + "epoch": 0.5586622807017544, + "grad_norm": 0.45923951268196106, + "learning_rate": 8.895806135484992e-06, + "loss": 0.4707237184047699, + "step": 1019 + }, + { + "epoch": 0.5592105263157895, + "grad_norm": 0.48106175661087036, + "learning_rate": 8.877780351660079e-06, + "loss": 0.46508774161338806, + "step": 1020 + }, + { + "epoch": 0.5597587719298246, + "grad_norm": 0.5078846216201782, + "learning_rate": 8.859758260005416e-06, + "loss": 0.4740423858165741, + "step": 1021 + }, + { + "epoch": 0.5603070175438597, + "grad_norm": 0.440356969833374, + "learning_rate": 8.841739919814775e-06, + "loss": 0.4663727879524231, + "step": 1022 + }, + { + "epoch": 0.5608552631578947, + "grad_norm": 0.4811111092567444, + "learning_rate": 8.823725390369586e-06, + "loss": 0.45025596022605896, + "step": 1023 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.5155603289604187, + "learning_rate": 8.805714730938728e-06, + "loss": 0.47258269786834717, + "step": 1024 + }, + { + "epoch": 0.5619517543859649, + "grad_norm": 0.5031796097755432, + "learning_rate": 8.787708000778358e-06, + "loss": 0.45634135603904724, + "step": 1025 + }, + { + "epoch": 0.5625, + "grad_norm": 0.46268266439437866, + "learning_rate": 8.769705259131707e-06, + "loss": 0.46244633197784424, + "step": 1026 + }, + { + "epoch": 0.5630482456140351, + "grad_norm": 0.4950982928276062, + "learning_rate": 8.751706565228878e-06, + "loss": 0.4840429723262787, + "step": 1027 + }, + { + "epoch": 0.5635964912280702, + "grad_norm": 0.5139880180358887, + "learning_rate": 8.733711978286653e-06, + "loss": 0.4671246409416199, + "step": 1028 + }, + { + "epoch": 0.5641447368421053, + "grad_norm": 0.48547184467315674, + "learning_rate": 8.715721557508315e-06, + "loss": 0.4675450325012207, + "step": 1029 + }, + { + "epoch": 0.5646929824561403, + "grad_norm": 0.4884546101093292, + "learning_rate": 8.697735362083428e-06, + "loss": 0.46358034014701843, + "step": 1030 + }, + { + "epoch": 0.5652412280701754, + "grad_norm": 0.4742810130119324, + "learning_rate": 8.679753451187659e-06, + "loss": 0.47714877128601074, + "step": 1031 + }, + { + "epoch": 0.5657894736842105, + "grad_norm": 0.5046728253364563, + "learning_rate": 8.661775883982577e-06, + "loss": 0.4886934161186218, + "step": 1032 + }, + { + "epoch": 0.5663377192982456, + "grad_norm": 0.4937182664871216, + "learning_rate": 8.643802719615467e-06, + "loss": 0.4588352143764496, + "step": 1033 + }, + { + "epoch": 0.5668859649122807, + "grad_norm": 0.4708634316921234, + "learning_rate": 8.625834017219114e-06, + "loss": 0.4672088027000427, + "step": 1034 + }, + { + "epoch": 0.5674342105263158, + "grad_norm": 0.5143368244171143, + "learning_rate": 8.607869835911638e-06, + "loss": 0.45802921056747437, + "step": 1035 + }, + { + "epoch": 0.5679824561403509, + "grad_norm": 0.4756060540676117, + "learning_rate": 8.589910234796277e-06, + "loss": 0.46481484174728394, + "step": 1036 + }, + { + "epoch": 0.5685307017543859, + "grad_norm": 0.464942991733551, + "learning_rate": 8.571955272961196e-06, + "loss": 0.45955371856689453, + "step": 1037 + }, + { + "epoch": 0.569078947368421, + "grad_norm": 0.4996425211429596, + "learning_rate": 8.554005009479307e-06, + "loss": 0.46826648712158203, + "step": 1038 + }, + { + "epoch": 0.5696271929824561, + "grad_norm": 0.458551824092865, + "learning_rate": 8.536059503408058e-06, + "loss": 0.4795677065849304, + "step": 1039 + }, + { + "epoch": 0.5701754385964912, + "grad_norm": 0.48602548241615295, + "learning_rate": 8.518118813789238e-06, + "loss": 0.4701020419597626, + "step": 1040 + }, + { + "epoch": 0.5707236842105263, + "grad_norm": 0.49667230248451233, + "learning_rate": 8.500182999648804e-06, + "loss": 0.47075098752975464, + "step": 1041 + }, + { + "epoch": 0.5712719298245614, + "grad_norm": 0.46576860547065735, + "learning_rate": 8.482252119996666e-06, + "loss": 0.47423285245895386, + "step": 1042 + }, + { + "epoch": 0.5718201754385965, + "grad_norm": 0.4754815697669983, + "learning_rate": 8.464326233826492e-06, + "loss": 0.46078214049339294, + "step": 1043 + }, + { + "epoch": 0.5723684210526315, + "grad_norm": 0.47265875339508057, + "learning_rate": 8.44640540011553e-06, + "loss": 0.461892306804657, + "step": 1044 + }, + { + "epoch": 0.5729166666666666, + "grad_norm": 0.499237060546875, + "learning_rate": 8.428489677824405e-06, + "loss": 0.4715636372566223, + "step": 1045 + }, + { + "epoch": 0.5734649122807017, + "grad_norm": 0.5131467580795288, + "learning_rate": 8.410579125896924e-06, + "loss": 0.46874022483825684, + "step": 1046 + }, + { + "epoch": 0.5740131578947368, + "grad_norm": 0.46456998586654663, + "learning_rate": 8.392673803259875e-06, + "loss": 0.4896584451198578, + "step": 1047 + }, + { + "epoch": 0.5745614035087719, + "grad_norm": 0.4798382520675659, + "learning_rate": 8.374773768822852e-06, + "loss": 0.46989163756370544, + "step": 1048 + }, + { + "epoch": 0.575109649122807, + "grad_norm": 0.49859294295310974, + "learning_rate": 8.356879081478051e-06, + "loss": 0.46852609515190125, + "step": 1049 + }, + { + "epoch": 0.5756578947368421, + "grad_norm": 0.4816218614578247, + "learning_rate": 8.338989800100065e-06, + "loss": 0.4753386378288269, + "step": 1050 + }, + { + "epoch": 0.5762061403508771, + "grad_norm": 0.4893586039543152, + "learning_rate": 8.32110598354571e-06, + "loss": 0.46335873007774353, + "step": 1051 + }, + { + "epoch": 0.5767543859649122, + "grad_norm": 0.4901929795742035, + "learning_rate": 8.303227690653824e-06, + "loss": 0.4654930830001831, + "step": 1052 + }, + { + "epoch": 0.5773026315789473, + "grad_norm": 0.621925950050354, + "learning_rate": 8.285354980245058e-06, + "loss": 0.4590475559234619, + "step": 1053 + }, + { + "epoch": 0.5778508771929824, + "grad_norm": 0.47231367230415344, + "learning_rate": 8.267487911121714e-06, + "loss": 0.4794731140136719, + "step": 1054 + }, + { + "epoch": 0.5783991228070176, + "grad_norm": 0.47593390941619873, + "learning_rate": 8.249626542067528e-06, + "loss": 0.4682958722114563, + "step": 1055 + }, + { + "epoch": 0.5789473684210527, + "grad_norm": 0.501139760017395, + "learning_rate": 8.231770931847469e-06, + "loss": 0.4564785361289978, + "step": 1056 + }, + { + "epoch": 0.5794956140350878, + "grad_norm": 0.5357538461685181, + "learning_rate": 8.213921139207583e-06, + "loss": 0.4616129994392395, + "step": 1057 + }, + { + "epoch": 0.5800438596491229, + "grad_norm": 0.47568657994270325, + "learning_rate": 8.196077222874755e-06, + "loss": 0.4671221673488617, + "step": 1058 + }, + { + "epoch": 0.5805921052631579, + "grad_norm": 0.4939679801464081, + "learning_rate": 8.178239241556547e-06, + "loss": 0.47691187262535095, + "step": 1059 + }, + { + "epoch": 0.581140350877193, + "grad_norm": 0.49383652210235596, + "learning_rate": 8.160407253940996e-06, + "loss": 0.4642741084098816, + "step": 1060 + }, + { + "epoch": 0.5816885964912281, + "grad_norm": 0.4947352707386017, + "learning_rate": 8.14258131869641e-06, + "loss": 0.45315033197402954, + "step": 1061 + }, + { + "epoch": 0.5822368421052632, + "grad_norm": 0.46344584226608276, + "learning_rate": 8.124761494471191e-06, + "loss": 0.4679107666015625, + "step": 1062 + }, + { + "epoch": 0.5827850877192983, + "grad_norm": 0.4847925901412964, + "learning_rate": 8.106947839893639e-06, + "loss": 0.46107810735702515, + "step": 1063 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.45921164751052856, + "learning_rate": 8.089140413571747e-06, + "loss": 0.4712381958961487, + "step": 1064 + }, + { + "epoch": 0.5838815789473685, + "grad_norm": 0.5184562802314758, + "learning_rate": 8.071339274093018e-06, + "loss": 0.44746991991996765, + "step": 1065 + }, + { + "epoch": 0.5844298245614035, + "grad_norm": 0.48062455654144287, + "learning_rate": 8.05354448002428e-06, + "loss": 0.4749741554260254, + "step": 1066 + }, + { + "epoch": 0.5849780701754386, + "grad_norm": 0.45290520787239075, + "learning_rate": 8.035756089911471e-06, + "loss": 0.4632267653942108, + "step": 1067 + }, + { + "epoch": 0.5855263157894737, + "grad_norm": 0.4724346697330475, + "learning_rate": 8.017974162279469e-06, + "loss": 0.4638788402080536, + "step": 1068 + }, + { + "epoch": 0.5860745614035088, + "grad_norm": 0.48686882853507996, + "learning_rate": 8.00019875563189e-06, + "loss": 0.45769965648651123, + "step": 1069 + }, + { + "epoch": 0.5866228070175439, + "grad_norm": 0.46247026324272156, + "learning_rate": 7.982429928450888e-06, + "loss": 0.45463666319847107, + "step": 1070 + }, + { + "epoch": 0.587171052631579, + "grad_norm": 0.5366244912147522, + "learning_rate": 7.964667739196976e-06, + "loss": 0.47857174277305603, + "step": 1071 + }, + { + "epoch": 0.5877192982456141, + "grad_norm": 0.49044543504714966, + "learning_rate": 7.94691224630883e-06, + "loss": 0.4604954123497009, + "step": 1072 + }, + { + "epoch": 0.5882675438596491, + "grad_norm": 0.49987998604774475, + "learning_rate": 7.929163508203091e-06, + "loss": 0.469890832901001, + "step": 1073 + }, + { + "epoch": 0.5888157894736842, + "grad_norm": 0.5040852427482605, + "learning_rate": 7.91142158327417e-06, + "loss": 0.47207868099212646, + "step": 1074 + }, + { + "epoch": 0.5893640350877193, + "grad_norm": 0.4750802218914032, + "learning_rate": 7.893686529894076e-06, + "loss": 0.47198259830474854, + "step": 1075 + }, + { + "epoch": 0.5899122807017544, + "grad_norm": 0.4758901298046112, + "learning_rate": 7.875958406412206e-06, + "loss": 0.46596193313598633, + "step": 1076 + }, + { + "epoch": 0.5904605263157895, + "grad_norm": 0.4805660545825958, + "learning_rate": 7.858237271155145e-06, + "loss": 0.454732745885849, + "step": 1077 + }, + { + "epoch": 0.5910087719298246, + "grad_norm": 0.4993060529232025, + "learning_rate": 7.840523182426497e-06, + "loss": 0.46462464332580566, + "step": 1078 + }, + { + "epoch": 0.5915570175438597, + "grad_norm": 0.47512087225914, + "learning_rate": 7.82281619850669e-06, + "loss": 0.4709402024745941, + "step": 1079 + }, + { + "epoch": 0.5921052631578947, + "grad_norm": 0.4666202962398529, + "learning_rate": 7.80511637765276e-06, + "loss": 0.451152503490448, + "step": 1080 + }, + { + "epoch": 0.5926535087719298, + "grad_norm": 0.48495787382125854, + "learning_rate": 7.787423778098183e-06, + "loss": 0.4697365164756775, + "step": 1081 + }, + { + "epoch": 0.5932017543859649, + "grad_norm": 0.464061975479126, + "learning_rate": 7.769738458052685e-06, + "loss": 0.46991023421287537, + "step": 1082 + }, + { + "epoch": 0.59375, + "grad_norm": 0.47889429330825806, + "learning_rate": 7.752060475702028e-06, + "loss": 0.4730076491832733, + "step": 1083 + }, + { + "epoch": 0.5942982456140351, + "grad_norm": 0.5057309865951538, + "learning_rate": 7.73438988920784e-06, + "loss": 0.46246618032455444, + "step": 1084 + }, + { + "epoch": 0.5948464912280702, + "grad_norm": 0.47753041982650757, + "learning_rate": 7.716726756707418e-06, + "loss": 0.46080321073532104, + "step": 1085 + }, + { + "epoch": 0.5953947368421053, + "grad_norm": 0.49280911684036255, + "learning_rate": 7.699071136313532e-06, + "loss": 0.46324023604393005, + "step": 1086 + }, + { + "epoch": 0.5959429824561403, + "grad_norm": 0.4621438682079315, + "learning_rate": 7.681423086114232e-06, + "loss": 0.4490150213241577, + "step": 1087 + }, + { + "epoch": 0.5964912280701754, + "grad_norm": 0.4937477111816406, + "learning_rate": 7.663782664172671e-06, + "loss": 0.47565126419067383, + "step": 1088 + }, + { + "epoch": 0.5970394736842105, + "grad_norm": 0.4713328182697296, + "learning_rate": 7.6461499285269e-06, + "loss": 0.4622294306755066, + "step": 1089 + }, + { + "epoch": 0.5975877192982456, + "grad_norm": 0.4742901027202606, + "learning_rate": 7.628524937189678e-06, + "loss": 0.46156418323516846, + "step": 1090 + }, + { + "epoch": 0.5981359649122807, + "grad_norm": 0.5026023983955383, + "learning_rate": 7.610907748148291e-06, + "loss": 0.46600472927093506, + "step": 1091 + }, + { + "epoch": 0.5986842105263158, + "grad_norm": 0.477543443441391, + "learning_rate": 7.593298419364355e-06, + "loss": 0.47159242630004883, + "step": 1092 + }, + { + "epoch": 0.5992324561403509, + "grad_norm": 0.48052772879600525, + "learning_rate": 7.575697008773615e-06, + "loss": 0.44670578837394714, + "step": 1093 + }, + { + "epoch": 0.5997807017543859, + "grad_norm": 0.4850292503833771, + "learning_rate": 7.558103574285779e-06, + "loss": 0.4710618853569031, + "step": 1094 + }, + { + "epoch": 0.600328947368421, + "grad_norm": 0.4854431450366974, + "learning_rate": 7.540518173784306e-06, + "loss": 0.4577372670173645, + "step": 1095 + }, + { + "epoch": 0.6008771929824561, + "grad_norm": 0.465509831905365, + "learning_rate": 7.522940865126218e-06, + "loss": 0.45736953616142273, + "step": 1096 + }, + { + "epoch": 0.6014254385964912, + "grad_norm": 0.4870331585407257, + "learning_rate": 7.505371706141925e-06, + "loss": 0.4602835774421692, + "step": 1097 + }, + { + "epoch": 0.6019736842105263, + "grad_norm": 0.487417608499527, + "learning_rate": 7.4878107546350184e-06, + "loss": 0.4558318257331848, + "step": 1098 + }, + { + "epoch": 0.6025219298245614, + "grad_norm": 0.49326610565185547, + "learning_rate": 7.470258068382079e-06, + "loss": 0.46510666608810425, + "step": 1099 + }, + { + "epoch": 0.6030701754385965, + "grad_norm": 0.49196192622184753, + "learning_rate": 7.452713705132515e-06, + "loss": 0.457396924495697, + "step": 1100 + }, + { + "epoch": 0.6036184210526315, + "grad_norm": 0.46562331914901733, + "learning_rate": 7.435177722608329e-06, + "loss": 0.46910542249679565, + "step": 1101 + }, + { + "epoch": 0.6041666666666666, + "grad_norm": 0.4977482259273529, + "learning_rate": 7.417650178503959e-06, + "loss": 0.4489624500274658, + "step": 1102 + }, + { + "epoch": 0.6047149122807017, + "grad_norm": 0.46233120560646057, + "learning_rate": 7.40013113048609e-06, + "loss": 0.4486108720302582, + "step": 1103 + }, + { + "epoch": 0.6052631578947368, + "grad_norm": 0.4903429448604584, + "learning_rate": 7.382620636193438e-06, + "loss": 0.453243613243103, + "step": 1104 + }, + { + "epoch": 0.6058114035087719, + "grad_norm": 0.4849245250225067, + "learning_rate": 7.3651187532365845e-06, + "loss": 0.4521423280239105, + "step": 1105 + }, + { + "epoch": 0.606359649122807, + "grad_norm": 0.4903513491153717, + "learning_rate": 7.347625539197785e-06, + "loss": 0.4613727033138275, + "step": 1106 + }, + { + "epoch": 0.6069078947368421, + "grad_norm": 0.476204514503479, + "learning_rate": 7.3301410516307595e-06, + "loss": 0.4579714238643646, + "step": 1107 + }, + { + "epoch": 0.6074561403508771, + "grad_norm": 0.47110995650291443, + "learning_rate": 7.312665348060533e-06, + "loss": 0.4564129710197449, + "step": 1108 + }, + { + "epoch": 0.6080043859649122, + "grad_norm": 0.4694063067436218, + "learning_rate": 7.295198485983215e-06, + "loss": 0.4591718316078186, + "step": 1109 + }, + { + "epoch": 0.6085526315789473, + "grad_norm": 0.46599674224853516, + "learning_rate": 7.2777405228658405e-06, + "loss": 0.4712226092815399, + "step": 1110 + }, + { + "epoch": 0.6091008771929824, + "grad_norm": 0.4798458218574524, + "learning_rate": 7.2602915161461605e-06, + "loss": 0.4686587452888489, + "step": 1111 + }, + { + "epoch": 0.6096491228070176, + "grad_norm": 0.48706597089767456, + "learning_rate": 7.242851523232448e-06, + "loss": 0.4550248086452484, + "step": 1112 + }, + { + "epoch": 0.6101973684210527, + "grad_norm": 0.4822862148284912, + "learning_rate": 7.225420601503347e-06, + "loss": 0.4606156349182129, + "step": 1113 + }, + { + "epoch": 0.6107456140350878, + "grad_norm": 0.47604596614837646, + "learning_rate": 7.207998808307628e-06, + "loss": 0.4694164991378784, + "step": 1114 + }, + { + "epoch": 0.6112938596491229, + "grad_norm": 0.4813266396522522, + "learning_rate": 7.190586200964041e-06, + "loss": 0.4651000499725342, + "step": 1115 + }, + { + "epoch": 0.6118421052631579, + "grad_norm": 0.4579448401927948, + "learning_rate": 7.173182836761121e-06, + "loss": 0.4731736183166504, + "step": 1116 + }, + { + "epoch": 0.612390350877193, + "grad_norm": 0.4748271107673645, + "learning_rate": 7.155788772956978e-06, + "loss": 0.4703238010406494, + "step": 1117 + }, + { + "epoch": 0.6129385964912281, + "grad_norm": 0.4719952940940857, + "learning_rate": 7.13840406677913e-06, + "loss": 0.45160242915153503, + "step": 1118 + }, + { + "epoch": 0.6134868421052632, + "grad_norm": 0.4611450731754303, + "learning_rate": 7.121028775424313e-06, + "loss": 0.47287964820861816, + "step": 1119 + }, + { + "epoch": 0.6140350877192983, + "grad_norm": 0.5155686736106873, + "learning_rate": 7.103662956058277e-06, + "loss": 0.476226806640625, + "step": 1120 + }, + { + "epoch": 0.6145833333333334, + "grad_norm": 0.46889781951904297, + "learning_rate": 7.0863066658156124e-06, + "loss": 0.455394446849823, + "step": 1121 + }, + { + "epoch": 0.6151315789473685, + "grad_norm": 0.49264058470726013, + "learning_rate": 7.068959961799569e-06, + "loss": 0.4657437801361084, + "step": 1122 + }, + { + "epoch": 0.6156798245614035, + "grad_norm": 0.454782634973526, + "learning_rate": 7.051622901081838e-06, + "loss": 0.45418664813041687, + "step": 1123 + }, + { + "epoch": 0.6162280701754386, + "grad_norm": 0.487419992685318, + "learning_rate": 7.034295540702398e-06, + "loss": 0.45389851927757263, + "step": 1124 + }, + { + "epoch": 0.6167763157894737, + "grad_norm": 0.46922385692596436, + "learning_rate": 7.016977937669312e-06, + "loss": 0.45923101902008057, + "step": 1125 + }, + { + "epoch": 0.6173245614035088, + "grad_norm": 0.4770352840423584, + "learning_rate": 6.999670148958533e-06, + "loss": 0.4591761827468872, + "step": 1126 + }, + { + "epoch": 0.6178728070175439, + "grad_norm": 0.4865526258945465, + "learning_rate": 6.9823722315137255e-06, + "loss": 0.4650563597679138, + "step": 1127 + }, + { + "epoch": 0.618421052631579, + "grad_norm": 0.47865018248558044, + "learning_rate": 6.9650842422460875e-06, + "loss": 0.4477848410606384, + "step": 1128 + }, + { + "epoch": 0.6189692982456141, + "grad_norm": 0.4948062300682068, + "learning_rate": 6.947806238034143e-06, + "loss": 0.450751394033432, + "step": 1129 + }, + { + "epoch": 0.6195175438596491, + "grad_norm": 0.4848669767379761, + "learning_rate": 6.930538275723562e-06, + "loss": 0.45120009779930115, + "step": 1130 + }, + { + "epoch": 0.6200657894736842, + "grad_norm": 0.5030041933059692, + "learning_rate": 6.9132804121269855e-06, + "loss": 0.468425989151001, + "step": 1131 + }, + { + "epoch": 0.6206140350877193, + "grad_norm": 0.4994588792324066, + "learning_rate": 6.896032704023828e-06, + "loss": 0.44643884897232056, + "step": 1132 + }, + { + "epoch": 0.6211622807017544, + "grad_norm": 0.4888147711753845, + "learning_rate": 6.878795208160077e-06, + "loss": 0.4606155753135681, + "step": 1133 + }, + { + "epoch": 0.6217105263157895, + "grad_norm": 0.49748578667640686, + "learning_rate": 6.861567981248143e-06, + "loss": 0.4452589452266693, + "step": 1134 + }, + { + "epoch": 0.6222587719298246, + "grad_norm": 0.5071046352386475, + "learning_rate": 6.8443510799666375e-06, + "loss": 0.4486559331417084, + "step": 1135 + }, + { + "epoch": 0.6228070175438597, + "grad_norm": 0.4967880845069885, + "learning_rate": 6.827144560960201e-06, + "loss": 0.45914992690086365, + "step": 1136 + }, + { + "epoch": 0.6233552631578947, + "grad_norm": 0.47261181473731995, + "learning_rate": 6.809948480839319e-06, + "loss": 0.4559020400047302, + "step": 1137 + }, + { + "epoch": 0.6239035087719298, + "grad_norm": 0.4834285080432892, + "learning_rate": 6.792762896180133e-06, + "loss": 0.44998395442962646, + "step": 1138 + }, + { + "epoch": 0.6244517543859649, + "grad_norm": 0.4783952236175537, + "learning_rate": 6.7755878635242445e-06, + "loss": 0.45917055010795593, + "step": 1139 + }, + { + "epoch": 0.625, + "grad_norm": 0.4811253249645233, + "learning_rate": 6.758423439378556e-06, + "loss": 0.4707831144332886, + "step": 1140 + }, + { + "epoch": 0.6255482456140351, + "grad_norm": 0.458852082490921, + "learning_rate": 6.741269680215053e-06, + "loss": 0.4482444226741791, + "step": 1141 + }, + { + "epoch": 0.6260964912280702, + "grad_norm": 0.47917571663856506, + "learning_rate": 6.7241266424706365e-06, + "loss": 0.45533668994903564, + "step": 1142 + }, + { + "epoch": 0.6266447368421053, + "grad_norm": 0.4818931221961975, + "learning_rate": 6.706994382546927e-06, + "loss": 0.4754176139831543, + "step": 1143 + }, + { + "epoch": 0.6271929824561403, + "grad_norm": 0.48275595903396606, + "learning_rate": 6.689872956810103e-06, + "loss": 0.4723743200302124, + "step": 1144 + }, + { + "epoch": 0.6277412280701754, + "grad_norm": 0.4792291224002838, + "learning_rate": 6.672762421590682e-06, + "loss": 0.4706377685070038, + "step": 1145 + }, + { + "epoch": 0.6282894736842105, + "grad_norm": 0.49965038895606995, + "learning_rate": 6.655662833183352e-06, + "loss": 0.46659570932388306, + "step": 1146 + }, + { + "epoch": 0.6288377192982456, + "grad_norm": 0.5105276703834534, + "learning_rate": 6.638574247846793e-06, + "loss": 0.46983322501182556, + "step": 1147 + }, + { + "epoch": 0.6293859649122807, + "grad_norm": 0.4503490924835205, + "learning_rate": 6.621496721803482e-06, + "loss": 0.4630698561668396, + "step": 1148 + }, + { + "epoch": 0.6299342105263158, + "grad_norm": 0.4773010313510895, + "learning_rate": 6.604430311239504e-06, + "loss": 0.45521196722984314, + "step": 1149 + }, + { + "epoch": 0.6304824561403509, + "grad_norm": 0.4769289791584015, + "learning_rate": 6.5873750723043805e-06, + "loss": 0.4638155698776245, + "step": 1150 + }, + { + "epoch": 0.6310307017543859, + "grad_norm": 0.4745919108390808, + "learning_rate": 6.570331061110877e-06, + "loss": 0.45071423053741455, + "step": 1151 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 0.470969557762146, + "learning_rate": 6.553298333734812e-06, + "loss": 0.4583868980407715, + "step": 1152 + }, + { + "epoch": 0.6321271929824561, + "grad_norm": 0.4607287645339966, + "learning_rate": 6.53627694621489e-06, + "loss": 0.4590507745742798, + "step": 1153 + }, + { + "epoch": 0.6326754385964912, + "grad_norm": 0.4686935842037201, + "learning_rate": 6.519266954552501e-06, + "loss": 0.4589221477508545, + "step": 1154 + }, + { + "epoch": 0.6332236842105263, + "grad_norm": 0.4886135160923004, + "learning_rate": 6.502268414711534e-06, + "loss": 0.4689658582210541, + "step": 1155 + }, + { + "epoch": 0.6337719298245614, + "grad_norm": 0.48805180191993713, + "learning_rate": 6.485281382618222e-06, + "loss": 0.4538452625274658, + "step": 1156 + }, + { + "epoch": 0.6343201754385965, + "grad_norm": 0.47901463508605957, + "learning_rate": 6.468305914160917e-06, + "loss": 0.4631307125091553, + "step": 1157 + }, + { + "epoch": 0.6348684210526315, + "grad_norm": 0.4804033637046814, + "learning_rate": 6.451342065189932e-06, + "loss": 0.47523754835128784, + "step": 1158 + }, + { + "epoch": 0.6354166666666666, + "grad_norm": 0.4747447073459625, + "learning_rate": 6.434389891517356e-06, + "loss": 0.4520912766456604, + "step": 1159 + }, + { + "epoch": 0.6359649122807017, + "grad_norm": 0.46864253282546997, + "learning_rate": 6.41744944891686e-06, + "loss": 0.44075965881347656, + "step": 1160 + }, + { + "epoch": 0.6365131578947368, + "grad_norm": 0.46973252296447754, + "learning_rate": 6.400520793123519e-06, + "loss": 0.47107774019241333, + "step": 1161 + }, + { + "epoch": 0.6370614035087719, + "grad_norm": 0.4863990843296051, + "learning_rate": 6.383603979833635e-06, + "loss": 0.46418333053588867, + "step": 1162 + }, + { + "epoch": 0.637609649122807, + "grad_norm": 0.45822757482528687, + "learning_rate": 6.366699064704538e-06, + "loss": 0.45333075523376465, + "step": 1163 + }, + { + "epoch": 0.6381578947368421, + "grad_norm": 0.47059157490730286, + "learning_rate": 6.349806103354417e-06, + "loss": 0.4626568555831909, + "step": 1164 + }, + { + "epoch": 0.6387061403508771, + "grad_norm": 0.48919686675071716, + "learning_rate": 6.332925151362139e-06, + "loss": 0.4648444652557373, + "step": 1165 + }, + { + "epoch": 0.6392543859649122, + "grad_norm": 0.4936853349208832, + "learning_rate": 6.316056264267048e-06, + "loss": 0.45927223563194275, + "step": 1166 + }, + { + "epoch": 0.6398026315789473, + "grad_norm": 0.49010518193244934, + "learning_rate": 6.299199497568798e-06, + "loss": 0.4588416516780853, + "step": 1167 + }, + { + "epoch": 0.6403508771929824, + "grad_norm": 0.48247334361076355, + "learning_rate": 6.282354906727171e-06, + "loss": 0.44723185896873474, + "step": 1168 + }, + { + "epoch": 0.6408991228070176, + "grad_norm": 0.49713844060897827, + "learning_rate": 6.265522547161884e-06, + "loss": 0.4614097476005554, + "step": 1169 + }, + { + "epoch": 0.6414473684210527, + "grad_norm": 0.4781050682067871, + "learning_rate": 6.24870247425241e-06, + "loss": 0.45371532440185547, + "step": 1170 + }, + { + "epoch": 0.6419956140350878, + "grad_norm": 0.5242354869842529, + "learning_rate": 6.23189474333781e-06, + "loss": 0.4608764350414276, + "step": 1171 + }, + { + "epoch": 0.6425438596491229, + "grad_norm": 0.5145214200019836, + "learning_rate": 6.215099409716527e-06, + "loss": 0.4503445625305176, + "step": 1172 + }, + { + "epoch": 0.6430921052631579, + "grad_norm": 0.47920140624046326, + "learning_rate": 6.19831652864622e-06, + "loss": 0.44728970527648926, + "step": 1173 + }, + { + "epoch": 0.643640350877193, + "grad_norm": 0.49056777358055115, + "learning_rate": 6.1815461553435784e-06, + "loss": 0.46014583110809326, + "step": 1174 + }, + { + "epoch": 0.6441885964912281, + "grad_norm": 0.46811583638191223, + "learning_rate": 6.164788344984147e-06, + "loss": 0.45799893140792847, + "step": 1175 + }, + { + "epoch": 0.6447368421052632, + "grad_norm": 0.4585595726966858, + "learning_rate": 6.1480431527021235e-06, + "loss": 0.4535917043685913, + "step": 1176 + }, + { + "epoch": 0.6452850877192983, + "grad_norm": 0.4631287753582001, + "learning_rate": 6.1313106335902e-06, + "loss": 0.4646044671535492, + "step": 1177 + }, + { + "epoch": 0.6458333333333334, + "grad_norm": 0.46791666746139526, + "learning_rate": 6.114590842699379e-06, + "loss": 0.45151329040527344, + "step": 1178 + }, + { + "epoch": 0.6463815789473685, + "grad_norm": 0.5028589963912964, + "learning_rate": 6.097883835038773e-06, + "loss": 0.4618037939071655, + "step": 1179 + }, + { + "epoch": 0.6469298245614035, + "grad_norm": 0.46903207898139954, + "learning_rate": 6.081189665575447e-06, + "loss": 0.4541777968406677, + "step": 1180 + }, + { + "epoch": 0.6474780701754386, + "grad_norm": 0.4830973744392395, + "learning_rate": 6.0645083892342274e-06, + "loss": 0.45828375220298767, + "step": 1181 + }, + { + "epoch": 0.6480263157894737, + "grad_norm": 0.46263155341148376, + "learning_rate": 6.047840060897512e-06, + "loss": 0.44502386450767517, + "step": 1182 + }, + { + "epoch": 0.6485745614035088, + "grad_norm": 0.4844227731227875, + "learning_rate": 6.031184735405106e-06, + "loss": 0.4488610625267029, + "step": 1183 + }, + { + "epoch": 0.6491228070175439, + "grad_norm": 1.6031123399734497, + "learning_rate": 6.01454246755404e-06, + "loss": 0.4552503824234009, + "step": 1184 + }, + { + "epoch": 0.649671052631579, + "grad_norm": 0.5204753875732422, + "learning_rate": 5.997913312098374e-06, + "loss": 0.4529512822628021, + "step": 1185 + }, + { + "epoch": 0.6502192982456141, + "grad_norm": 0.46702829003334045, + "learning_rate": 5.981297323749028e-06, + "loss": 0.45574164390563965, + "step": 1186 + }, + { + "epoch": 0.6507675438596491, + "grad_norm": 0.4901115894317627, + "learning_rate": 5.964694557173609e-06, + "loss": 0.46220001578330994, + "step": 1187 + }, + { + "epoch": 0.6513157894736842, + "grad_norm": 0.48896780610084534, + "learning_rate": 5.948105066996221e-06, + "loss": 0.45643150806427, + "step": 1188 + }, + { + "epoch": 0.6518640350877193, + "grad_norm": 0.4706425964832306, + "learning_rate": 5.931528907797281e-06, + "loss": 0.4490327537059784, + "step": 1189 + }, + { + "epoch": 0.6524122807017544, + "grad_norm": 0.46755537390708923, + "learning_rate": 5.914966134113354e-06, + "loss": 0.46012648940086365, + "step": 1190 + }, + { + "epoch": 0.6529605263157895, + "grad_norm": 0.471127986907959, + "learning_rate": 5.898416800436966e-06, + "loss": 0.4527897834777832, + "step": 1191 + }, + { + "epoch": 0.6535087719298246, + "grad_norm": 0.48551979660987854, + "learning_rate": 5.8818809612164155e-06, + "loss": 0.4556393623352051, + "step": 1192 + }, + { + "epoch": 0.6540570175438597, + "grad_norm": 0.47003045678138733, + "learning_rate": 5.865358670855614e-06, + "loss": 0.4481228291988373, + "step": 1193 + }, + { + "epoch": 0.6546052631578947, + "grad_norm": 0.4837598204612732, + "learning_rate": 5.848849983713894e-06, + "loss": 0.44623738527297974, + "step": 1194 + }, + { + "epoch": 0.6551535087719298, + "grad_norm": 0.5016481280326843, + "learning_rate": 5.832354954105826e-06, + "loss": 0.4614674746990204, + "step": 1195 + }, + { + "epoch": 0.6557017543859649, + "grad_norm": 0.46373236179351807, + "learning_rate": 5.815873636301053e-06, + "loss": 0.45254772901535034, + "step": 1196 + }, + { + "epoch": 0.65625, + "grad_norm": 0.48568040132522583, + "learning_rate": 5.799406084524106e-06, + "loss": 0.4651646018028259, + "step": 1197 + }, + { + "epoch": 0.6567982456140351, + "grad_norm": 0.48499056696891785, + "learning_rate": 5.7829523529542185e-06, + "loss": 0.4580027759075165, + "step": 1198 + }, + { + "epoch": 0.6573464912280702, + "grad_norm": 0.48886817693710327, + "learning_rate": 5.766512495725165e-06, + "loss": 0.4550062119960785, + "step": 1199 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 0.4718495011329651, + "learning_rate": 5.7500865669250625e-06, + "loss": 0.4428224563598633, + "step": 1200 + }, + { + "epoch": 0.6584429824561403, + "grad_norm": 0.9899610280990601, + "learning_rate": 5.733674620596202e-06, + "loss": 0.46023988723754883, + "step": 1201 + }, + { + "epoch": 0.6589912280701754, + "grad_norm": 0.4938037395477295, + "learning_rate": 5.717276710734879e-06, + "loss": 0.4411492943763733, + "step": 1202 + }, + { + "epoch": 0.6595394736842105, + "grad_norm": 0.49084752798080444, + "learning_rate": 5.7008928912912096e-06, + "loss": 0.4550079107284546, + "step": 1203 + }, + { + "epoch": 0.6600877192982456, + "grad_norm": 0.48547691106796265, + "learning_rate": 5.68452321616894e-06, + "loss": 0.4425356388092041, + "step": 1204 + }, + { + "epoch": 0.6606359649122807, + "grad_norm": 0.7199835777282715, + "learning_rate": 5.668167739225294e-06, + "loss": 0.4713376462459564, + "step": 1205 + }, + { + "epoch": 0.6611842105263158, + "grad_norm": 0.47304996848106384, + "learning_rate": 5.651826514270773e-06, + "loss": 0.45630842447280884, + "step": 1206 + }, + { + "epoch": 0.6617324561403509, + "grad_norm": 0.45576387643814087, + "learning_rate": 5.635499595068986e-06, + "loss": 0.454340398311615, + "step": 1207 + }, + { + "epoch": 0.6622807017543859, + "grad_norm": 0.47770845890045166, + "learning_rate": 5.619187035336486e-06, + "loss": 0.4564405679702759, + "step": 1208 + }, + { + "epoch": 0.662828947368421, + "grad_norm": 0.4835106134414673, + "learning_rate": 5.602888888742583e-06, + "loss": 0.46910983324050903, + "step": 1209 + }, + { + "epoch": 0.6633771929824561, + "grad_norm": 0.500394344329834, + "learning_rate": 5.586605208909155e-06, + "loss": 0.4477820098400116, + "step": 1210 + }, + { + "epoch": 0.6639254385964912, + "grad_norm": 0.4716354012489319, + "learning_rate": 5.570336049410487e-06, + "loss": 0.4602228105068207, + "step": 1211 + }, + { + "epoch": 0.6644736842105263, + "grad_norm": 0.4881833791732788, + "learning_rate": 5.554081463773098e-06, + "loss": 0.43738436698913574, + "step": 1212 + }, + { + "epoch": 0.6650219298245614, + "grad_norm": 0.6254478693008423, + "learning_rate": 5.537841505475556e-06, + "loss": 0.44146454334259033, + "step": 1213 + }, + { + "epoch": 0.6655701754385965, + "grad_norm": 0.48677903413772583, + "learning_rate": 5.521616227948297e-06, + "loss": 0.44918176531791687, + "step": 1214 + }, + { + "epoch": 0.6661184210526315, + "grad_norm": 0.4938996434211731, + "learning_rate": 5.505405684573466e-06, + "loss": 0.45282015204429626, + "step": 1215 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.47285711765289307, + "learning_rate": 5.489209928684728e-06, + "loss": 0.4627668857574463, + "step": 1216 + }, + { + "epoch": 0.6672149122807017, + "grad_norm": 0.46757328510284424, + "learning_rate": 5.473029013567089e-06, + "loss": 0.4532211422920227, + "step": 1217 + }, + { + "epoch": 0.6677631578947368, + "grad_norm": 0.49316030740737915, + "learning_rate": 5.456862992456738e-06, + "loss": 0.4442199170589447, + "step": 1218 + }, + { + "epoch": 0.6683114035087719, + "grad_norm": 0.4487299919128418, + "learning_rate": 5.440711918540863e-06, + "loss": 0.4583362936973572, + "step": 1219 + }, + { + "epoch": 0.668859649122807, + "grad_norm": 0.47031185030937195, + "learning_rate": 5.424575844957463e-06, + "loss": 0.4489290714263916, + "step": 1220 + }, + { + "epoch": 0.6694078947368421, + "grad_norm": 0.4853135645389557, + "learning_rate": 5.408454824795199e-06, + "loss": 0.4479921758174896, + "step": 1221 + }, + { + "epoch": 0.6699561403508771, + "grad_norm": 0.4838583171367645, + "learning_rate": 5.392348911093198e-06, + "loss": 0.4525047242641449, + "step": 1222 + }, + { + "epoch": 0.6705043859649122, + "grad_norm": 0.485013484954834, + "learning_rate": 5.376258156840874e-06, + "loss": 0.45637011528015137, + "step": 1223 + }, + { + "epoch": 0.6710526315789473, + "grad_norm": 0.4787314832210541, + "learning_rate": 5.360182614977797e-06, + "loss": 0.457485169172287, + "step": 1224 + }, + { + "epoch": 0.6716008771929824, + "grad_norm": 0.46060094237327576, + "learning_rate": 5.344122338393458e-06, + "loss": 0.4558663070201874, + "step": 1225 + }, + { + "epoch": 0.6721491228070176, + "grad_norm": 0.49685534834861755, + "learning_rate": 5.328077379927133e-06, + "loss": 0.46039676666259766, + "step": 1226 + }, + { + "epoch": 0.6726973684210527, + "grad_norm": 0.4821500778198242, + "learning_rate": 5.312047792367707e-06, + "loss": 0.46990689635276794, + "step": 1227 + }, + { + "epoch": 0.6732456140350878, + "grad_norm": 0.49922671914100647, + "learning_rate": 5.296033628453484e-06, + "loss": 0.45971810817718506, + "step": 1228 + }, + { + "epoch": 0.6737938596491229, + "grad_norm": 0.48737815022468567, + "learning_rate": 5.28003494087203e-06, + "loss": 0.4628313481807709, + "step": 1229 + }, + { + "epoch": 0.6743421052631579, + "grad_norm": 0.4753425419330597, + "learning_rate": 5.264051782259996e-06, + "loss": 0.44580182433128357, + "step": 1230 + }, + { + "epoch": 0.674890350877193, + "grad_norm": 0.4703112244606018, + "learning_rate": 5.248084205202932e-06, + "loss": 0.4599781036376953, + "step": 1231 + }, + { + "epoch": 0.6754385964912281, + "grad_norm": 0.5177963376045227, + "learning_rate": 5.232132262235126e-06, + "loss": 0.4688262641429901, + "step": 1232 + }, + { + "epoch": 0.6759868421052632, + "grad_norm": 0.5090923309326172, + "learning_rate": 5.2161960058394375e-06, + "loss": 0.45433613657951355, + "step": 1233 + }, + { + "epoch": 0.6765350877192983, + "grad_norm": 0.481566458940506, + "learning_rate": 5.200275488447104e-06, + "loss": 0.44734376668930054, + "step": 1234 + }, + { + "epoch": 0.6770833333333334, + "grad_norm": 0.4732418954372406, + "learning_rate": 5.184370762437589e-06, + "loss": 0.458044171333313, + "step": 1235 + }, + { + "epoch": 0.6776315789473685, + "grad_norm": 0.4842263162136078, + "learning_rate": 5.168481880138405e-06, + "loss": 0.4429929256439209, + "step": 1236 + }, + { + "epoch": 0.6781798245614035, + "grad_norm": 0.46749964356422424, + "learning_rate": 5.1526088938249275e-06, + "loss": 0.4532344341278076, + "step": 1237 + }, + { + "epoch": 0.6787280701754386, + "grad_norm": 0.48561593890190125, + "learning_rate": 5.136751855720235e-06, + "loss": 0.45579272508621216, + "step": 1238 + }, + { + "epoch": 0.6792763157894737, + "grad_norm": 0.5013870596885681, + "learning_rate": 5.120910817994939e-06, + "loss": 0.45137521624565125, + "step": 1239 + }, + { + "epoch": 0.6798245614035088, + "grad_norm": 0.4608660638332367, + "learning_rate": 5.105085832767014e-06, + "loss": 0.45345020294189453, + "step": 1240 + }, + { + "epoch": 0.6803728070175439, + "grad_norm": 0.47580307722091675, + "learning_rate": 5.0892769521016075e-06, + "loss": 0.46015557646751404, + "step": 1241 + }, + { + "epoch": 0.680921052631579, + "grad_norm": 0.4806741774082184, + "learning_rate": 5.073484228010897e-06, + "loss": 0.436834454536438, + "step": 1242 + }, + { + "epoch": 0.6814692982456141, + "grad_norm": 0.5022061467170715, + "learning_rate": 5.057707712453891e-06, + "loss": 0.45409783720970154, + "step": 1243 + }, + { + "epoch": 0.6820175438596491, + "grad_norm": 0.4873558282852173, + "learning_rate": 5.041947457336273e-06, + "loss": 0.4772658944129944, + "step": 1244 + }, + { + "epoch": 0.6825657894736842, + "grad_norm": 0.46109822392463684, + "learning_rate": 5.0262035145102375e-06, + "loss": 0.4520254135131836, + "step": 1245 + }, + { + "epoch": 0.6831140350877193, + "grad_norm": 0.49353766441345215, + "learning_rate": 5.010475935774306e-06, + "loss": 0.4518451392650604, + "step": 1246 + }, + { + "epoch": 0.6836622807017544, + "grad_norm": 0.483317494392395, + "learning_rate": 4.994764772873158e-06, + "loss": 0.45198535919189453, + "step": 1247 + }, + { + "epoch": 0.6842105263157895, + "grad_norm": 0.4909769594669342, + "learning_rate": 4.979070077497461e-06, + "loss": 0.4568019509315491, + "step": 1248 + }, + { + "epoch": 0.6847587719298246, + "grad_norm": 0.4987059235572815, + "learning_rate": 4.963391901283717e-06, + "loss": 0.4692850708961487, + "step": 1249 + }, + { + "epoch": 0.6853070175438597, + "grad_norm": 0.49539315700531006, + "learning_rate": 4.947730295814063e-06, + "loss": 0.4367455244064331, + "step": 1250 + }, + { + "epoch": 0.6858552631578947, + "grad_norm": 0.4935382604598999, + "learning_rate": 4.932085312616127e-06, + "loss": 0.476955384016037, + "step": 1251 + }, + { + "epoch": 0.6864035087719298, + "grad_norm": 0.4814411997795105, + "learning_rate": 4.9164570031628525e-06, + "loss": 0.459894597530365, + "step": 1252 + }, + { + "epoch": 0.6869517543859649, + "grad_norm": 0.49503499269485474, + "learning_rate": 4.900845418872312e-06, + "loss": 0.44997191429138184, + "step": 1253 + }, + { + "epoch": 0.6875, + "grad_norm": 0.503138542175293, + "learning_rate": 4.885250611107558e-06, + "loss": 0.4328072667121887, + "step": 1254 + }, + { + "epoch": 0.6880482456140351, + "grad_norm": 0.4843796491622925, + "learning_rate": 4.8696726311764485e-06, + "loss": 0.4629124104976654, + "step": 1255 + }, + { + "epoch": 0.6885964912280702, + "grad_norm": 0.531264066696167, + "learning_rate": 4.854111530331482e-06, + "loss": 0.45157432556152344, + "step": 1256 + }, + { + "epoch": 0.6891447368421053, + "grad_norm": 0.4729582667350769, + "learning_rate": 4.8385673597696095e-06, + "loss": 0.4563821256160736, + "step": 1257 + }, + { + "epoch": 0.6896929824561403, + "grad_norm": 0.497354120016098, + "learning_rate": 4.823040170632095e-06, + "loss": 0.45511579513549805, + "step": 1258 + }, + { + "epoch": 0.6902412280701754, + "grad_norm": 0.4853935241699219, + "learning_rate": 4.807530014004325e-06, + "loss": 0.45746946334838867, + "step": 1259 + }, + { + "epoch": 0.6907894736842105, + "grad_norm": 0.48168012499809265, + "learning_rate": 4.792036940915642e-06, + "loss": 0.4588317573070526, + "step": 1260 + }, + { + "epoch": 0.6913377192982456, + "grad_norm": 0.47273921966552734, + "learning_rate": 4.776561002339198e-06, + "loss": 0.4494730234146118, + "step": 1261 + }, + { + "epoch": 0.6918859649122807, + "grad_norm": 0.4824991524219513, + "learning_rate": 4.761102249191763e-06, + "loss": 0.45408129692077637, + "step": 1262 + }, + { + "epoch": 0.6924342105263158, + "grad_norm": 0.494086891412735, + "learning_rate": 4.745660732333561e-06, + "loss": 0.4487881064414978, + "step": 1263 + }, + { + "epoch": 0.6929824561403509, + "grad_norm": 0.4966714680194855, + "learning_rate": 4.730236502568121e-06, + "loss": 0.4485003650188446, + "step": 1264 + }, + { + "epoch": 0.6935307017543859, + "grad_norm": 0.49037015438079834, + "learning_rate": 4.714829610642082e-06, + "loss": 0.45986589789390564, + "step": 1265 + }, + { + "epoch": 0.694078947368421, + "grad_norm": 0.4807148873806, + "learning_rate": 4.699440107245041e-06, + "loss": 0.4428585469722748, + "step": 1266 + }, + { + "epoch": 0.6946271929824561, + "grad_norm": 0.48849642276763916, + "learning_rate": 4.684068043009406e-06, + "loss": 0.4557689130306244, + "step": 1267 + }, + { + "epoch": 0.6951754385964912, + "grad_norm": 0.4828411042690277, + "learning_rate": 4.668713468510187e-06, + "loss": 0.4486156404018402, + "step": 1268 + }, + { + "epoch": 0.6957236842105263, + "grad_norm": 0.4808543026447296, + "learning_rate": 4.6533764342648546e-06, + "loss": 0.4532542824745178, + "step": 1269 + }, + { + "epoch": 0.6962719298245614, + "grad_norm": 0.46090418100357056, + "learning_rate": 4.638056990733184e-06, + "loss": 0.466025710105896, + "step": 1270 + }, + { + "epoch": 0.6968201754385965, + "grad_norm": 0.46584606170654297, + "learning_rate": 4.622755188317059e-06, + "loss": 0.4666459262371063, + "step": 1271 + }, + { + "epoch": 0.6973684210526315, + "grad_norm": 0.48791778087615967, + "learning_rate": 4.607471077360337e-06, + "loss": 0.4741817116737366, + "step": 1272 + }, + { + "epoch": 0.6979166666666666, + "grad_norm": 0.4724322259426117, + "learning_rate": 4.5922047081486665e-06, + "loss": 0.4573628306388855, + "step": 1273 + }, + { + "epoch": 0.6984649122807017, + "grad_norm": 0.48471513390541077, + "learning_rate": 4.576956130909317e-06, + "loss": 0.4618206024169922, + "step": 1274 + }, + { + "epoch": 0.6990131578947368, + "grad_norm": 0.47347649931907654, + "learning_rate": 4.561725395811027e-06, + "loss": 0.44821199774742126, + "step": 1275 + }, + { + "epoch": 0.6995614035087719, + "grad_norm": 0.4988267421722412, + "learning_rate": 4.546512552963831e-06, + "loss": 0.4621202349662781, + "step": 1276 + }, + { + "epoch": 0.700109649122807, + "grad_norm": 0.511737585067749, + "learning_rate": 4.5313176524188995e-06, + "loss": 0.4463235139846802, + "step": 1277 + }, + { + "epoch": 0.7006578947368421, + "grad_norm": 0.46017760038375854, + "learning_rate": 4.516140744168376e-06, + "loss": 0.45576271414756775, + "step": 1278 + }, + { + "epoch": 0.7012061403508771, + "grad_norm": 0.472093790769577, + "learning_rate": 4.500981878145195e-06, + "loss": 0.45104914903640747, + "step": 1279 + }, + { + "epoch": 0.7017543859649122, + "grad_norm": 0.4837212562561035, + "learning_rate": 4.485841104222946e-06, + "loss": 0.45531660318374634, + "step": 1280 + }, + { + "epoch": 0.7023026315789473, + "grad_norm": 0.5072401165962219, + "learning_rate": 4.470718472215683e-06, + "loss": 0.4535329341888428, + "step": 1281 + }, + { + "epoch": 0.7028508771929824, + "grad_norm": 0.5329047441482544, + "learning_rate": 4.455614031877775e-06, + "loss": 0.4547671973705292, + "step": 1282 + }, + { + "epoch": 0.7033991228070176, + "grad_norm": 0.4614371061325073, + "learning_rate": 4.440527832903748e-06, + "loss": 0.4641307294368744, + "step": 1283 + }, + { + "epoch": 0.7039473684210527, + "grad_norm": 0.4439223110675812, + "learning_rate": 4.425459924928102e-06, + "loss": 0.44951772689819336, + "step": 1284 + }, + { + "epoch": 0.7044956140350878, + "grad_norm": 0.4774690568447113, + "learning_rate": 4.4104103575251565e-06, + "loss": 0.4569922685623169, + "step": 1285 + }, + { + "epoch": 0.7050438596491229, + "grad_norm": 0.4845373332500458, + "learning_rate": 4.395379180208904e-06, + "loss": 0.45955735445022583, + "step": 1286 + }, + { + "epoch": 0.7055921052631579, + "grad_norm": 0.45962685346603394, + "learning_rate": 4.380366442432814e-06, + "loss": 0.44685596227645874, + "step": 1287 + }, + { + "epoch": 0.706140350877193, + "grad_norm": 0.46158796548843384, + "learning_rate": 4.365372193589704e-06, + "loss": 0.462146520614624, + "step": 1288 + }, + { + "epoch": 0.7066885964912281, + "grad_norm": 0.49494469165802, + "learning_rate": 4.350396483011559e-06, + "loss": 0.4598894715309143, + "step": 1289 + }, + { + "epoch": 0.7072368421052632, + "grad_norm": 0.47989487648010254, + "learning_rate": 4.335439359969367e-06, + "loss": 0.4486043155193329, + "step": 1290 + }, + { + "epoch": 0.7077850877192983, + "grad_norm": 0.46652752161026, + "learning_rate": 4.320500873672957e-06, + "loss": 0.4489085078239441, + "step": 1291 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.46897727251052856, + "learning_rate": 4.3055810732708584e-06, + "loss": 0.4552903175354004, + "step": 1292 + }, + { + "epoch": 0.7088815789473685, + "grad_norm": 0.461935430765152, + "learning_rate": 4.290680007850105e-06, + "loss": 0.4531635046005249, + "step": 1293 + }, + { + "epoch": 0.7094298245614035, + "grad_norm": 0.4682982861995697, + "learning_rate": 4.275797726436105e-06, + "loss": 0.4542514979839325, + "step": 1294 + }, + { + "epoch": 0.7099780701754386, + "grad_norm": 0.4622385501861572, + "learning_rate": 4.260934277992463e-06, + "loss": 0.45592039823532104, + "step": 1295 + }, + { + "epoch": 0.7105263157894737, + "grad_norm": 0.46560776233673096, + "learning_rate": 4.246089711420817e-06, + "loss": 0.4482199549674988, + "step": 1296 + }, + { + "epoch": 0.7110745614035088, + "grad_norm": 0.4624495208263397, + "learning_rate": 4.231264075560684e-06, + "loss": 0.44374164938926697, + "step": 1297 + }, + { + "epoch": 0.7116228070175439, + "grad_norm": 0.4659866690635681, + "learning_rate": 4.216457419189298e-06, + "loss": 0.4579788148403168, + "step": 1298 + }, + { + "epoch": 0.712171052631579, + "grad_norm": 0.45782938599586487, + "learning_rate": 4.201669791021458e-06, + "loss": 0.4533209204673767, + "step": 1299 + }, + { + "epoch": 0.7127192982456141, + "grad_norm": 0.4743126332759857, + "learning_rate": 4.186901239709342e-06, + "loss": 0.4503573775291443, + "step": 1300 + }, + { + "epoch": 0.7132675438596491, + "grad_norm": 0.4711255729198456, + "learning_rate": 4.172151813842382e-06, + "loss": 0.43793973326683044, + "step": 1301 + }, + { + "epoch": 0.7138157894736842, + "grad_norm": 0.4743192791938782, + "learning_rate": 4.157421561947075e-06, + "loss": 0.4586021304130554, + "step": 1302 + }, + { + "epoch": 0.7143640350877193, + "grad_norm": 0.47850310802459717, + "learning_rate": 4.142710532486831e-06, + "loss": 0.45045897364616394, + "step": 1303 + }, + { + "epoch": 0.7149122807017544, + "grad_norm": 0.45966672897338867, + "learning_rate": 4.12801877386183e-06, + "loss": 0.46221137046813965, + "step": 1304 + }, + { + "epoch": 0.7154605263157895, + "grad_norm": 0.48182302713394165, + "learning_rate": 4.113346334408844e-06, + "loss": 0.4529826045036316, + "step": 1305 + }, + { + "epoch": 0.7160087719298246, + "grad_norm": 0.4576120376586914, + "learning_rate": 4.0986932624010775e-06, + "loss": 0.4573781490325928, + "step": 1306 + }, + { + "epoch": 0.7165570175438597, + "grad_norm": 0.48668473958969116, + "learning_rate": 4.0840596060480266e-06, + "loss": 0.425226628780365, + "step": 1307 + }, + { + "epoch": 0.7171052631578947, + "grad_norm": 0.5012518763542175, + "learning_rate": 4.0694454134952956e-06, + "loss": 0.45593565702438354, + "step": 1308 + }, + { + "epoch": 0.7176535087719298, + "grad_norm": 0.4993784725666046, + "learning_rate": 4.054850732824463e-06, + "loss": 0.4585998058319092, + "step": 1309 + }, + { + "epoch": 0.7182017543859649, + "grad_norm": 0.482063889503479, + "learning_rate": 4.040275612052905e-06, + "loss": 0.44233378767967224, + "step": 1310 + }, + { + "epoch": 0.71875, + "grad_norm": 0.4934483468532562, + "learning_rate": 4.025720099133651e-06, + "loss": 0.44768810272216797, + "step": 1311 + }, + { + "epoch": 0.7192982456140351, + "grad_norm": 0.48552730679512024, + "learning_rate": 4.011184241955212e-06, + "loss": 0.45926231145858765, + "step": 1312 + }, + { + "epoch": 0.7198464912280702, + "grad_norm": 0.5012895464897156, + "learning_rate": 3.996668088341432e-06, + "loss": 0.45643967390060425, + "step": 1313 + }, + { + "epoch": 0.7203947368421053, + "grad_norm": 0.4595252275466919, + "learning_rate": 3.982171686051333e-06, + "loss": 0.4594549834728241, + "step": 1314 + }, + { + "epoch": 0.7209429824561403, + "grad_norm": 0.47561824321746826, + "learning_rate": 3.967695082778958e-06, + "loss": 0.45136356353759766, + "step": 1315 + }, + { + "epoch": 0.7214912280701754, + "grad_norm": 0.46955716609954834, + "learning_rate": 3.953238326153193e-06, + "loss": 0.45608794689178467, + "step": 1316 + }, + { + "epoch": 0.7220394736842105, + "grad_norm": 0.4859420359134674, + "learning_rate": 3.938801463737647e-06, + "loss": 0.4654580056667328, + "step": 1317 + }, + { + "epoch": 0.7225877192982456, + "grad_norm": 0.47013059258461, + "learning_rate": 3.924384543030464e-06, + "loss": 0.4543027877807617, + "step": 1318 + }, + { + "epoch": 0.7231359649122807, + "grad_norm": 0.47090038657188416, + "learning_rate": 3.909987611464179e-06, + "loss": 0.4615466594696045, + "step": 1319 + }, + { + "epoch": 0.7236842105263158, + "grad_norm": 0.4833797216415405, + "learning_rate": 3.895610716405565e-06, + "loss": 0.4522587060928345, + "step": 1320 + }, + { + "epoch": 0.7242324561403509, + "grad_norm": 0.48517337441444397, + "learning_rate": 3.881253905155481e-06, + "loss": 0.44629907608032227, + "step": 1321 + }, + { + "epoch": 0.7247807017543859, + "grad_norm": 0.48679855465888977, + "learning_rate": 3.86691722494869e-06, + "loss": 0.4545023441314697, + "step": 1322 + }, + { + "epoch": 0.725328947368421, + "grad_norm": 0.5102160573005676, + "learning_rate": 3.852600722953741e-06, + "loss": 0.4341837763786316, + "step": 1323 + }, + { + "epoch": 0.7258771929824561, + "grad_norm": 0.4852652847766876, + "learning_rate": 3.838304446272782e-06, + "loss": 0.4571802616119385, + "step": 1324 + }, + { + "epoch": 0.7264254385964912, + "grad_norm": 0.462271511554718, + "learning_rate": 3.82402844194143e-06, + "loss": 0.44814571738243103, + "step": 1325 + }, + { + "epoch": 0.7269736842105263, + "grad_norm": 0.4705876111984253, + "learning_rate": 3.8097727569286003e-06, + "loss": 0.457721084356308, + "step": 1326 + }, + { + "epoch": 0.7275219298245614, + "grad_norm": 0.4641494154930115, + "learning_rate": 3.795537438136352e-06, + "loss": 0.4574616849422455, + "step": 1327 + }, + { + "epoch": 0.7280701754385965, + "grad_norm": 0.4461621046066284, + "learning_rate": 3.7813225323997395e-06, + "loss": 0.45125675201416016, + "step": 1328 + }, + { + "epoch": 0.7286184210526315, + "grad_norm": 0.4990176260471344, + "learning_rate": 3.7671280864866644e-06, + "loss": 0.446943461894989, + "step": 1329 + }, + { + "epoch": 0.7291666666666666, + "grad_norm": 0.4785372018814087, + "learning_rate": 3.7529541470977037e-06, + "loss": 0.4530734419822693, + "step": 1330 + }, + { + "epoch": 0.7297149122807017, + "grad_norm": 0.47964751720428467, + "learning_rate": 3.738800760865975e-06, + "loss": 0.4506794810295105, + "step": 1331 + }, + { + "epoch": 0.7302631578947368, + "grad_norm": 0.4757719039916992, + "learning_rate": 3.7246679743569737e-06, + "loss": 0.4483231008052826, + "step": 1332 + }, + { + "epoch": 0.7308114035087719, + "grad_norm": 0.4769383370876312, + "learning_rate": 3.710555834068418e-06, + "loss": 0.4365041255950928, + "step": 1333 + }, + { + "epoch": 0.731359649122807, + "grad_norm": 0.49054670333862305, + "learning_rate": 3.696464386430093e-06, + "loss": 0.4484644830226898, + "step": 1334 + }, + { + "epoch": 0.7319078947368421, + "grad_norm": 0.4718336760997772, + "learning_rate": 3.6823936778037173e-06, + "loss": 0.4425055980682373, + "step": 1335 + }, + { + "epoch": 0.7324561403508771, + "grad_norm": 0.548764169216156, + "learning_rate": 3.6683437544827704e-06, + "loss": 0.45093807578086853, + "step": 1336 + }, + { + "epoch": 0.7330043859649122, + "grad_norm": 0.47863689064979553, + "learning_rate": 3.6543146626923422e-06, + "loss": 0.4338614344596863, + "step": 1337 + }, + { + "epoch": 0.7335526315789473, + "grad_norm": 0.4642828702926636, + "learning_rate": 3.640306448588996e-06, + "loss": 0.46210381388664246, + "step": 1338 + }, + { + "epoch": 0.7341008771929824, + "grad_norm": 0.47700735926628113, + "learning_rate": 3.626319158260595e-06, + "loss": 0.45029470324516296, + "step": 1339 + }, + { + "epoch": 0.7346491228070176, + "grad_norm": 0.46553367376327515, + "learning_rate": 3.6123528377261663e-06, + "loss": 0.44773992896080017, + "step": 1340 + }, + { + "epoch": 0.7351973684210527, + "grad_norm": 0.47391775250434875, + "learning_rate": 3.598407532935748e-06, + "loss": 0.4471183717250824, + "step": 1341 + }, + { + "epoch": 0.7357456140350878, + "grad_norm": 0.4671850800514221, + "learning_rate": 3.5844832897702363e-06, + "loss": 0.454129695892334, + "step": 1342 + }, + { + "epoch": 0.7362938596491229, + "grad_norm": 0.47181615233421326, + "learning_rate": 3.5705801540412268e-06, + "loss": 0.44526728987693787, + "step": 1343 + }, + { + "epoch": 0.7368421052631579, + "grad_norm": 0.4609008729457855, + "learning_rate": 3.556698171490871e-06, + "loss": 0.45439064502716064, + "step": 1344 + }, + { + "epoch": 0.737390350877193, + "grad_norm": 0.4642679691314697, + "learning_rate": 3.542837387791733e-06, + "loss": 0.4484332501888275, + "step": 1345 + }, + { + "epoch": 0.7379385964912281, + "grad_norm": 0.47382134199142456, + "learning_rate": 3.5289978485466224e-06, + "loss": 0.44568178057670593, + "step": 1346 + }, + { + "epoch": 0.7384868421052632, + "grad_norm": 0.5014953017234802, + "learning_rate": 3.5151795992884575e-06, + "loss": 0.4595361649990082, + "step": 1347 + }, + { + "epoch": 0.7390350877192983, + "grad_norm": 0.4758802354335785, + "learning_rate": 3.501382685480116e-06, + "loss": 0.4526955485343933, + "step": 1348 + }, + { + "epoch": 0.7395833333333334, + "grad_norm": 0.47585156559944153, + "learning_rate": 3.487607152514272e-06, + "loss": 0.44488176703453064, + "step": 1349 + }, + { + "epoch": 0.7401315789473685, + "grad_norm": 0.5094396471977234, + "learning_rate": 3.473853045713255e-06, + "loss": 0.44131070375442505, + "step": 1350 + }, + { + "epoch": 0.7406798245614035, + "grad_norm": 0.47419655323028564, + "learning_rate": 3.460120410328908e-06, + "loss": 0.44449982047080994, + "step": 1351 + }, + { + "epoch": 0.7412280701754386, + "grad_norm": 0.47309622168540955, + "learning_rate": 3.4464092915424328e-06, + "loss": 0.4419247508049011, + "step": 1352 + }, + { + "epoch": 0.7417763157894737, + "grad_norm": 0.46527430415153503, + "learning_rate": 3.4327197344642304e-06, + "loss": 0.4458807408809662, + "step": 1353 + }, + { + "epoch": 0.7423245614035088, + "grad_norm": 0.47822335362434387, + "learning_rate": 3.419051784133773e-06, + "loss": 0.4569171369075775, + "step": 1354 + }, + { + "epoch": 0.7428728070175439, + "grad_norm": 0.49528107047080994, + "learning_rate": 3.4054054855194395e-06, + "loss": 0.44940003752708435, + "step": 1355 + }, + { + "epoch": 0.743421052631579, + "grad_norm": 0.463140070438385, + "learning_rate": 3.3917808835183707e-06, + "loss": 0.45283064246177673, + "step": 1356 + }, + { + "epoch": 0.7439692982456141, + "grad_norm": 0.46392157673835754, + "learning_rate": 3.37817802295633e-06, + "loss": 0.45993560552597046, + "step": 1357 + }, + { + "epoch": 0.7445175438596491, + "grad_norm": 0.6057497262954712, + "learning_rate": 3.3645969485875528e-06, + "loss": 0.4528937041759491, + "step": 1358 + }, + { + "epoch": 0.7450657894736842, + "grad_norm": 0.49526041746139526, + "learning_rate": 3.3510377050945853e-06, + "loss": 0.443331241607666, + "step": 1359 + }, + { + "epoch": 0.7456140350877193, + "grad_norm": 0.5202312469482422, + "learning_rate": 3.337500337088162e-06, + "loss": 0.44786232709884644, + "step": 1360 + }, + { + "epoch": 0.7461622807017544, + "grad_norm": 0.4547555148601532, + "learning_rate": 3.323984889107035e-06, + "loss": 0.44628724455833435, + "step": 1361 + }, + { + "epoch": 0.7467105263157895, + "grad_norm": 0.4743717312812805, + "learning_rate": 3.3104914056178406e-06, + "loss": 0.46816253662109375, + "step": 1362 + }, + { + "epoch": 0.7472587719298246, + "grad_norm": 0.46588048338890076, + "learning_rate": 3.2970199310149543e-06, + "loss": 0.4552823007106781, + "step": 1363 + }, + { + "epoch": 0.7478070175438597, + "grad_norm": 0.4879978895187378, + "learning_rate": 3.283570509620344e-06, + "loss": 0.4521896243095398, + "step": 1364 + }, + { + "epoch": 0.7483552631578947, + "grad_norm": 0.4443570077419281, + "learning_rate": 3.2701431856834087e-06, + "loss": 0.45289620757102966, + "step": 1365 + }, + { + "epoch": 0.7489035087719298, + "grad_norm": 0.4682408571243286, + "learning_rate": 3.2567380033808603e-06, + "loss": 0.4340660572052002, + "step": 1366 + }, + { + "epoch": 0.7494517543859649, + "grad_norm": 0.5005453824996948, + "learning_rate": 3.2433550068165497e-06, + "loss": 0.45293864607810974, + "step": 1367 + }, + { + "epoch": 0.75, + "grad_norm": 0.4666679799556732, + "learning_rate": 3.2299942400213447e-06, + "loss": 0.44803494215011597, + "step": 1368 + }, + { + "epoch": 0.7505482456140351, + "grad_norm": 0.46357494592666626, + "learning_rate": 3.216655746952976e-06, + "loss": 0.44369348883628845, + "step": 1369 + }, + { + "epoch": 0.7510964912280702, + "grad_norm": 0.44632741808891296, + "learning_rate": 3.203339571495887e-06, + "loss": 0.4500495195388794, + "step": 1370 + }, + { + "epoch": 0.7516447368421053, + "grad_norm": 0.4712839126586914, + "learning_rate": 3.190045757461093e-06, + "loss": 0.45239824056625366, + "step": 1371 + }, + { + "epoch": 0.7521929824561403, + "grad_norm": 0.49454811215400696, + "learning_rate": 3.1767743485860514e-06, + "loss": 0.4392992854118347, + "step": 1372 + }, + { + "epoch": 0.7527412280701754, + "grad_norm": 0.4636279046535492, + "learning_rate": 3.1635253885344884e-06, + "loss": 0.4610537886619568, + "step": 1373 + }, + { + "epoch": 0.7532894736842105, + "grad_norm": 0.49633049964904785, + "learning_rate": 3.1502989208962854e-06, + "loss": 0.44744524359703064, + "step": 1374 + }, + { + "epoch": 0.7538377192982456, + "grad_norm": 0.5043641924858093, + "learning_rate": 3.1370949891873213e-06, + "loss": 0.4373456835746765, + "step": 1375 + }, + { + "epoch": 0.7543859649122807, + "grad_norm": 0.4852113425731659, + "learning_rate": 3.123913636849322e-06, + "loss": 0.45185375213623047, + "step": 1376 + }, + { + "epoch": 0.7549342105263158, + "grad_norm": 0.48131412267684937, + "learning_rate": 3.1107549072497324e-06, + "loss": 0.4426053464412689, + "step": 1377 + }, + { + "epoch": 0.7554824561403509, + "grad_norm": 0.4737791121006012, + "learning_rate": 3.097618843681558e-06, + "loss": 0.44092023372650146, + "step": 1378 + }, + { + "epoch": 0.7560307017543859, + "grad_norm": 0.4885827302932739, + "learning_rate": 3.084505489363254e-06, + "loss": 0.4620380401611328, + "step": 1379 + }, + { + "epoch": 0.756578947368421, + "grad_norm": 0.47208940982818604, + "learning_rate": 3.0714148874385376e-06, + "loss": 0.4508278965950012, + "step": 1380 + }, + { + "epoch": 0.7571271929824561, + "grad_norm": 0.4611866772174835, + "learning_rate": 3.058347080976276e-06, + "loss": 0.4432736337184906, + "step": 1381 + }, + { + "epoch": 0.7576754385964912, + "grad_norm": 0.46872127056121826, + "learning_rate": 3.0453021129703465e-06, + "loss": 0.4664454460144043, + "step": 1382 + }, + { + "epoch": 0.7582236842105263, + "grad_norm": 0.48894163966178894, + "learning_rate": 3.0322800263394725e-06, + "loss": 0.437408447265625, + "step": 1383 + }, + { + "epoch": 0.7587719298245614, + "grad_norm": 0.4994237422943115, + "learning_rate": 3.0192808639271065e-06, + "loss": 0.4648517966270447, + "step": 1384 + }, + { + "epoch": 0.7593201754385965, + "grad_norm": 0.47740432620048523, + "learning_rate": 3.0063046685012808e-06, + "loss": 0.4363187551498413, + "step": 1385 + }, + { + "epoch": 0.7598684210526315, + "grad_norm": 0.480413019657135, + "learning_rate": 2.993351482754455e-06, + "loss": 0.44096410274505615, + "step": 1386 + }, + { + "epoch": 0.7604166666666666, + "grad_norm": 0.4761090874671936, + "learning_rate": 2.9804213493033883e-06, + "loss": 0.45182913541793823, + "step": 1387 + }, + { + "epoch": 0.7609649122807017, + "grad_norm": 0.469472736120224, + "learning_rate": 2.967514310689006e-06, + "loss": 0.45764875411987305, + "step": 1388 + }, + { + "epoch": 0.7615131578947368, + "grad_norm": 0.4679282307624817, + "learning_rate": 2.9546304093762357e-06, + "loss": 0.4410996437072754, + "step": 1389 + }, + { + "epoch": 0.7620614035087719, + "grad_norm": 0.47340869903564453, + "learning_rate": 2.9417696877538913e-06, + "loss": 0.43983063101768494, + "step": 1390 + }, + { + "epoch": 0.762609649122807, + "grad_norm": 0.47689974308013916, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.4503350853919983, + "step": 1391 + }, + { + "epoch": 0.7631578947368421, + "grad_norm": 0.5016065835952759, + "learning_rate": 2.9161179527542827e-06, + "loss": 0.44059401750564575, + "step": 1392 + }, + { + "epoch": 0.7637061403508771, + "grad_norm": 0.4677375257015228, + "learning_rate": 2.903327023772766e-06, + "loss": 0.4515905976295471, + "step": 1393 + }, + { + "epoch": 0.7642543859649122, + "grad_norm": 0.47397127747535706, + "learning_rate": 2.890559443272906e-06, + "loss": 0.44616538286209106, + "step": 1394 + }, + { + "epoch": 0.7648026315789473, + "grad_norm": 0.5041709542274475, + "learning_rate": 2.877815253260814e-06, + "loss": 0.42689597606658936, + "step": 1395 + }, + { + "epoch": 0.7653508771929824, + "grad_norm": 0.45834705233573914, + "learning_rate": 2.865094495665638e-06, + "loss": 0.45505937933921814, + "step": 1396 + }, + { + "epoch": 0.7658991228070176, + "grad_norm": 0.460654079914093, + "learning_rate": 2.8523972123394405e-06, + "loss": 0.444200724363327, + "step": 1397 + }, + { + "epoch": 0.7664473684210527, + "grad_norm": 0.4719720780849457, + "learning_rate": 2.839723445057049e-06, + "loss": 0.4539354741573334, + "step": 1398 + }, + { + "epoch": 0.7669956140350878, + "grad_norm": 0.47663259506225586, + "learning_rate": 2.827073235515916e-06, + "loss": 0.4420117735862732, + "step": 1399 + }, + { + "epoch": 0.7675438596491229, + "grad_norm": 0.540690541267395, + "learning_rate": 2.814446625335997e-06, + "loss": 0.46264857053756714, + "step": 1400 + }, + { + "epoch": 0.7680921052631579, + "grad_norm": 0.48664623498916626, + "learning_rate": 2.801843656059602e-06, + "loss": 0.44607076048851013, + "step": 1401 + }, + { + "epoch": 0.768640350877193, + "grad_norm": 0.46868008375167847, + "learning_rate": 2.7892643691512555e-06, + "loss": 0.4463055729866028, + "step": 1402 + }, + { + "epoch": 0.7691885964912281, + "grad_norm": 0.7354031205177307, + "learning_rate": 2.7767088059975732e-06, + "loss": 0.44075483083724976, + "step": 1403 + }, + { + "epoch": 0.7697368421052632, + "grad_norm": 0.45407670736312866, + "learning_rate": 2.764177007907113e-06, + "loss": 0.4547209143638611, + "step": 1404 + }, + { + "epoch": 0.7702850877192983, + "grad_norm": 0.4852718412876129, + "learning_rate": 2.7516690161102376e-06, + "loss": 0.45378145575523376, + "step": 1405 + }, + { + "epoch": 0.7708333333333334, + "grad_norm": 0.4868534505367279, + "learning_rate": 2.7391848717590074e-06, + "loss": 0.43858057260513306, + "step": 1406 + }, + { + "epoch": 0.7713815789473685, + "grad_norm": 0.4446372389793396, + "learning_rate": 2.726724615927003e-06, + "loss": 0.4574624001979828, + "step": 1407 + }, + { + "epoch": 0.7719298245614035, + "grad_norm": 0.4679673910140991, + "learning_rate": 2.714288289609217e-06, + "loss": 0.44254884123802185, + "step": 1408 + }, + { + "epoch": 0.7724780701754386, + "grad_norm": 0.4617109000682831, + "learning_rate": 2.701875933721909e-06, + "loss": 0.44806107878685, + "step": 1409 + }, + { + "epoch": 0.7730263157894737, + "grad_norm": 0.45996034145355225, + "learning_rate": 2.6894875891024796e-06, + "loss": 0.4340808391571045, + "step": 1410 + }, + { + "epoch": 0.7735745614035088, + "grad_norm": 0.4849942624568939, + "learning_rate": 2.677123296509334e-06, + "loss": 0.453971266746521, + "step": 1411 + }, + { + "epoch": 0.7741228070175439, + "grad_norm": 0.4761378765106201, + "learning_rate": 2.6647830966217323e-06, + "loss": 0.4519736170768738, + "step": 1412 + }, + { + "epoch": 0.774671052631579, + "grad_norm": 0.4653758406639099, + "learning_rate": 2.6524670300396827e-06, + "loss": 0.4523582458496094, + "step": 1413 + }, + { + "epoch": 0.7752192982456141, + "grad_norm": 0.489674836397171, + "learning_rate": 2.6401751372837815e-06, + "loss": 0.4477754235267639, + "step": 1414 + }, + { + "epoch": 0.7757675438596491, + "grad_norm": 0.4749486446380615, + "learning_rate": 2.627907458795096e-06, + "loss": 0.43215465545654297, + "step": 1415 + }, + { + "epoch": 0.7763157894736842, + "grad_norm": 0.4954405426979065, + "learning_rate": 2.6156640349350282e-06, + "loss": 0.44484657049179077, + "step": 1416 + }, + { + "epoch": 0.7768640350877193, + "grad_norm": 0.4541327953338623, + "learning_rate": 2.603444905985184e-06, + "loss": 0.4564289450645447, + "step": 1417 + }, + { + "epoch": 0.7774122807017544, + "grad_norm": 0.46660396456718445, + "learning_rate": 2.5912501121472287e-06, + "loss": 0.4459143877029419, + "step": 1418 + }, + { + "epoch": 0.7779605263157895, + "grad_norm": 0.4727344512939453, + "learning_rate": 2.5790796935427744e-06, + "loss": 0.4507994055747986, + "step": 1419 + }, + { + "epoch": 0.7785087719298246, + "grad_norm": 0.48547977209091187, + "learning_rate": 2.5669336902132236e-06, + "loss": 0.45748457312583923, + "step": 1420 + }, + { + "epoch": 0.7790570175438597, + "grad_norm": 0.4882795214653015, + "learning_rate": 2.554812142119665e-06, + "loss": 0.4455465078353882, + "step": 1421 + }, + { + "epoch": 0.7796052631578947, + "grad_norm": 0.47181499004364014, + "learning_rate": 2.542715089142723e-06, + "loss": 0.45293498039245605, + "step": 1422 + }, + { + "epoch": 0.7801535087719298, + "grad_norm": 0.46868982911109924, + "learning_rate": 2.53064257108243e-06, + "loss": 0.4384647607803345, + "step": 1423 + }, + { + "epoch": 0.7807017543859649, + "grad_norm": 0.5356707572937012, + "learning_rate": 2.518594627658092e-06, + "loss": 0.45708268880844116, + "step": 1424 + }, + { + "epoch": 0.78125, + "grad_norm": 0.4938961863517761, + "learning_rate": 2.506571298508176e-06, + "loss": 0.45950478315353394, + "step": 1425 + }, + { + "epoch": 0.7817982456140351, + "grad_norm": 0.46023717522621155, + "learning_rate": 2.4945726231901535e-06, + "loss": 0.45347079634666443, + "step": 1426 + }, + { + "epoch": 0.7823464912280702, + "grad_norm": 0.4642326235771179, + "learning_rate": 2.482598641180393e-06, + "loss": 0.44673532247543335, + "step": 1427 + }, + { + "epoch": 0.7828947368421053, + "grad_norm": 0.49191659688949585, + "learning_rate": 2.470649391874017e-06, + "loss": 0.45227012038230896, + "step": 1428 + }, + { + "epoch": 0.7834429824561403, + "grad_norm": 0.48225194215774536, + "learning_rate": 2.4587249145847757e-06, + "loss": 0.4629409909248352, + "step": 1429 + }, + { + "epoch": 0.7839912280701754, + "grad_norm": 0.4709365665912628, + "learning_rate": 2.446825248544913e-06, + "loss": 0.44721364974975586, + "step": 1430 + }, + { + "epoch": 0.7845394736842105, + "grad_norm": 0.4582255780696869, + "learning_rate": 2.434950432905053e-06, + "loss": 0.444847047328949, + "step": 1431 + }, + { + "epoch": 0.7850877192982456, + "grad_norm": 0.47319498658180237, + "learning_rate": 2.4231005067340507e-06, + "loss": 0.4396522045135498, + "step": 1432 + }, + { + "epoch": 0.7856359649122807, + "grad_norm": 0.47419747710227966, + "learning_rate": 2.411275509018878e-06, + "loss": 0.4477517008781433, + "step": 1433 + }, + { + "epoch": 0.7861842105263158, + "grad_norm": 0.485272616147995, + "learning_rate": 2.3994754786644925e-06, + "loss": 0.43748196959495544, + "step": 1434 + }, + { + "epoch": 0.7867324561403509, + "grad_norm": 0.48166120052337646, + "learning_rate": 2.387700454493703e-06, + "loss": 0.45969265699386597, + "step": 1435 + }, + { + "epoch": 0.7872807017543859, + "grad_norm": 0.48745638132095337, + "learning_rate": 2.3759504752470463e-06, + "loss": 0.4459717273712158, + "step": 1436 + }, + { + "epoch": 0.787828947368421, + "grad_norm": 0.4823463261127472, + "learning_rate": 2.3642255795826654e-06, + "loss": 0.4502348303794861, + "step": 1437 + }, + { + "epoch": 0.7883771929824561, + "grad_norm": 0.4792110025882721, + "learning_rate": 2.3525258060761734e-06, + "loss": 0.46335679292678833, + "step": 1438 + }, + { + "epoch": 0.7889254385964912, + "grad_norm": 0.487899512052536, + "learning_rate": 2.3408511932205256e-06, + "loss": 0.46185988187789917, + "step": 1439 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.45009350776672363, + "learning_rate": 2.329201779425909e-06, + "loss": 0.4611009359359741, + "step": 1440 + }, + { + "epoch": 0.7900219298245614, + "grad_norm": 0.4828180968761444, + "learning_rate": 2.3175776030195917e-06, + "loss": 0.45473694801330566, + "step": 1441 + }, + { + "epoch": 0.7905701754385965, + "grad_norm": 0.4800177812576294, + "learning_rate": 2.3059787022458137e-06, + "loss": 0.43865078687667847, + "step": 1442 + }, + { + "epoch": 0.7911184210526315, + "grad_norm": 0.45250406861305237, + "learning_rate": 2.2944051152656588e-06, + "loss": 0.458981454372406, + "step": 1443 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.4827856421470642, + "learning_rate": 2.2828568801569286e-06, + "loss": 0.4438856840133667, + "step": 1444 + }, + { + "epoch": 0.7922149122807017, + "grad_norm": 0.4579552710056305, + "learning_rate": 2.271334034914009e-06, + "loss": 0.45474669337272644, + "step": 1445 + }, + { + "epoch": 0.7927631578947368, + "grad_norm": 0.4525872468948364, + "learning_rate": 2.2598366174477527e-06, + "loss": 0.44240206480026245, + "step": 1446 + }, + { + "epoch": 0.7933114035087719, + "grad_norm": 0.4654140770435333, + "learning_rate": 2.248364665585362e-06, + "loss": 0.4465499818325043, + "step": 1447 + }, + { + "epoch": 0.793859649122807, + "grad_norm": 0.4569734036922455, + "learning_rate": 2.236918217070244e-06, + "loss": 0.43812352418899536, + "step": 1448 + }, + { + "epoch": 0.7944078947368421, + "grad_norm": 0.4839644730091095, + "learning_rate": 2.225497309561907e-06, + "loss": 0.4401845335960388, + "step": 1449 + }, + { + "epoch": 0.7949561403508771, + "grad_norm": 0.49678677320480347, + "learning_rate": 2.2141019806358254e-06, + "loss": 0.4653699994087219, + "step": 1450 + }, + { + "epoch": 0.7955043859649122, + "grad_norm": 0.48518845438957214, + "learning_rate": 2.2027322677833186e-06, + "loss": 0.44742369651794434, + "step": 1451 + }, + { + "epoch": 0.7960526315789473, + "grad_norm": 0.46840372681617737, + "learning_rate": 2.191388208411421e-06, + "loss": 0.4364306628704071, + "step": 1452 + }, + { + "epoch": 0.7966008771929824, + "grad_norm": 0.45687833428382874, + "learning_rate": 2.1800698398427754e-06, + "loss": 0.46097299456596375, + "step": 1453 + }, + { + "epoch": 0.7971491228070176, + "grad_norm": 0.49113285541534424, + "learning_rate": 2.1687771993155006e-06, + "loss": 0.43846258521080017, + "step": 1454 + }, + { + "epoch": 0.7976973684210527, + "grad_norm": 0.4738796651363373, + "learning_rate": 2.1575103239830554e-06, + "loss": 0.44409045577049255, + "step": 1455 + }, + { + "epoch": 0.7982456140350878, + "grad_norm": 0.457188218832016, + "learning_rate": 2.146269250914147e-06, + "loss": 0.4441969394683838, + "step": 1456 + }, + { + "epoch": 0.7987938596491229, + "grad_norm": 0.4704115092754364, + "learning_rate": 2.1350540170925795e-06, + "loss": 0.4427245557308197, + "step": 1457 + }, + { + "epoch": 0.7993421052631579, + "grad_norm": 0.4717205762863159, + "learning_rate": 2.123864659417144e-06, + "loss": 0.42821723222732544, + "step": 1458 + }, + { + "epoch": 0.799890350877193, + "grad_norm": 0.46827948093414307, + "learning_rate": 2.1127012147015047e-06, + "loss": 0.4556030035018921, + "step": 1459 + }, + { + "epoch": 0.8004385964912281, + "grad_norm": 0.49792206287384033, + "learning_rate": 2.101563719674071e-06, + "loss": 0.4553588330745697, + "step": 1460 + }, + { + "epoch": 0.8009868421052632, + "grad_norm": 0.46498462557792664, + "learning_rate": 2.0904522109778667e-06, + "loss": 0.45313581824302673, + "step": 1461 + }, + { + "epoch": 0.8015350877192983, + "grad_norm": 0.4481178820133209, + "learning_rate": 2.0793667251704296e-06, + "loss": 0.4471014738082886, + "step": 1462 + }, + { + "epoch": 0.8020833333333334, + "grad_norm": 0.46041181683540344, + "learning_rate": 2.068307298723671e-06, + "loss": 0.45470839738845825, + "step": 1463 + }, + { + "epoch": 0.8026315789473685, + "grad_norm": 0.4539787471294403, + "learning_rate": 2.0572739680237718e-06, + "loss": 0.45658087730407715, + "step": 1464 + }, + { + "epoch": 0.8031798245614035, + "grad_norm": 0.458933562040329, + "learning_rate": 2.046266769371059e-06, + "loss": 0.460864782333374, + "step": 1465 + }, + { + "epoch": 0.8037280701754386, + "grad_norm": 0.47983378171920776, + "learning_rate": 2.0352857389798765e-06, + "loss": 0.44127601385116577, + "step": 1466 + }, + { + "epoch": 0.8042763157894737, + "grad_norm": 0.4826447665691376, + "learning_rate": 2.0243309129784727e-06, + "loss": 0.4633498787879944, + "step": 1467 + }, + { + "epoch": 0.8048245614035088, + "grad_norm": 0.588762640953064, + "learning_rate": 2.01340232740889e-06, + "loss": 0.4519672989845276, + "step": 1468 + }, + { + "epoch": 0.8053728070175439, + "grad_norm": 0.4594285786151886, + "learning_rate": 2.0025000182268297e-06, + "loss": 0.4531112313270569, + "step": 1469 + }, + { + "epoch": 0.805921052631579, + "grad_norm": 0.4814896285533905, + "learning_rate": 1.9916240213015458e-06, + "loss": 0.44493603706359863, + "step": 1470 + }, + { + "epoch": 0.8064692982456141, + "grad_norm": 0.48193618655204773, + "learning_rate": 1.9807743724157292e-06, + "loss": 0.42968878149986267, + "step": 1471 + }, + { + "epoch": 0.8070175438596491, + "grad_norm": 0.4833047688007355, + "learning_rate": 1.9699511072653733e-06, + "loss": 0.43403005599975586, + "step": 1472 + }, + { + "epoch": 0.8075657894736842, + "grad_norm": 0.46741804480552673, + "learning_rate": 1.9591542614596715e-06, + "loss": 0.4390289783477783, + "step": 1473 + }, + { + "epoch": 0.8081140350877193, + "grad_norm": 0.47882014513015747, + "learning_rate": 1.948383870520901e-06, + "loss": 0.43885254859924316, + "step": 1474 + }, + { + "epoch": 0.8086622807017544, + "grad_norm": 0.4652293622493744, + "learning_rate": 1.937639969884293e-06, + "loss": 0.45536062121391296, + "step": 1475 + }, + { + "epoch": 0.8092105263157895, + "grad_norm": 0.48102349042892456, + "learning_rate": 1.926922594897932e-06, + "loss": 0.4474731385707855, + "step": 1476 + }, + { + "epoch": 0.8097587719298246, + "grad_norm": 0.4696721136569977, + "learning_rate": 1.9162317808226228e-06, + "loss": 0.4380807876586914, + "step": 1477 + }, + { + "epoch": 0.8103070175438597, + "grad_norm": 0.45643603801727295, + "learning_rate": 1.9055675628317926e-06, + "loss": 0.4397779405117035, + "step": 1478 + }, + { + "epoch": 0.8108552631578947, + "grad_norm": 0.45802298188209534, + "learning_rate": 1.894929976011356e-06, + "loss": 0.4323575496673584, + "step": 1479 + }, + { + "epoch": 0.8114035087719298, + "grad_norm": 0.4565486013889313, + "learning_rate": 1.8843190553596168e-06, + "loss": 0.42069071531295776, + "step": 1480 + }, + { + "epoch": 0.8119517543859649, + "grad_norm": 0.49252307415008545, + "learning_rate": 1.8737348357871477e-06, + "loss": 0.4390181005001068, + "step": 1481 + }, + { + "epoch": 0.8125, + "grad_norm": 0.4725061058998108, + "learning_rate": 1.863177352116664e-06, + "loss": 0.4410583972930908, + "step": 1482 + }, + { + "epoch": 0.8130482456140351, + "grad_norm": 0.4714313745498657, + "learning_rate": 1.8526466390829213e-06, + "loss": 0.43576177954673767, + "step": 1483 + }, + { + "epoch": 0.8135964912280702, + "grad_norm": 0.4661250412464142, + "learning_rate": 1.8421427313326046e-06, + "loss": 0.4356091618537903, + "step": 1484 + }, + { + "epoch": 0.8141447368421053, + "grad_norm": 0.48059454560279846, + "learning_rate": 1.831665663424198e-06, + "loss": 0.44501131772994995, + "step": 1485 + }, + { + "epoch": 0.8146929824561403, + "grad_norm": 0.46940878033638, + "learning_rate": 1.8212154698278906e-06, + "loss": 0.4514904022216797, + "step": 1486 + }, + { + "epoch": 0.8152412280701754, + "grad_norm": 0.5343154668807983, + "learning_rate": 1.8107921849254495e-06, + "loss": 0.4419550895690918, + "step": 1487 + }, + { + "epoch": 0.8157894736842105, + "grad_norm": 0.4681127071380615, + "learning_rate": 1.8003958430101087e-06, + "loss": 0.46082767844200134, + "step": 1488 + }, + { + "epoch": 0.8163377192982456, + "grad_norm": 0.47236642241477966, + "learning_rate": 1.7900264782864552e-06, + "loss": 0.44913601875305176, + "step": 1489 + }, + { + "epoch": 0.8168859649122807, + "grad_norm": 0.4529493451118469, + "learning_rate": 1.7796841248703277e-06, + "loss": 0.45329952239990234, + "step": 1490 + }, + { + "epoch": 0.8174342105263158, + "grad_norm": 0.46615317463874817, + "learning_rate": 1.7693688167886947e-06, + "loss": 0.4398888945579529, + "step": 1491 + }, + { + "epoch": 0.8179824561403509, + "grad_norm": 0.4682173430919647, + "learning_rate": 1.7590805879795358e-06, + "loss": 0.43605130910873413, + "step": 1492 + }, + { + "epoch": 0.8185307017543859, + "grad_norm": 0.44849905371665955, + "learning_rate": 1.7488194722917484e-06, + "loss": 0.44999951124191284, + "step": 1493 + }, + { + "epoch": 0.819078947368421, + "grad_norm": 0.49639979004859924, + "learning_rate": 1.7385855034850185e-06, + "loss": 0.4317818582057953, + "step": 1494 + }, + { + "epoch": 0.8196271929824561, + "grad_norm": 0.48981213569641113, + "learning_rate": 1.7283787152297194e-06, + "loss": 0.43808650970458984, + "step": 1495 + }, + { + "epoch": 0.8201754385964912, + "grad_norm": 0.4760289788246155, + "learning_rate": 1.7181991411067989e-06, + "loss": 0.4499431550502777, + "step": 1496 + }, + { + "epoch": 0.8207236842105263, + "grad_norm": 0.4522970914840698, + "learning_rate": 1.7080468146076745e-06, + "loss": 0.45054715871810913, + "step": 1497 + }, + { + "epoch": 0.8212719298245614, + "grad_norm": 0.4766053855419159, + "learning_rate": 1.6979217691341054e-06, + "loss": 0.45946937799453735, + "step": 1498 + }, + { + "epoch": 0.8218201754385965, + "grad_norm": 0.485436350107193, + "learning_rate": 1.687824037998107e-06, + "loss": 0.44628486037254333, + "step": 1499 + }, + { + "epoch": 0.8223684210526315, + "grad_norm": 0.47168204188346863, + "learning_rate": 1.677753654421821e-06, + "loss": 0.4408082365989685, + "step": 1500 + }, + { + "epoch": 0.8229166666666666, + "grad_norm": 0.478395015001297, + "learning_rate": 1.667710651537412e-06, + "loss": 0.4404899477958679, + "step": 1501 + }, + { + "epoch": 0.8234649122807017, + "grad_norm": 0.4617457687854767, + "learning_rate": 1.6576950623869682e-06, + "loss": 0.4409525394439697, + "step": 1502 + }, + { + "epoch": 0.8240131578947368, + "grad_norm": 0.48547491431236267, + "learning_rate": 1.6477069199223839e-06, + "loss": 0.45365771651268005, + "step": 1503 + }, + { + "epoch": 0.8245614035087719, + "grad_norm": 0.46786943078041077, + "learning_rate": 1.6377462570052438e-06, + "loss": 0.44676321744918823, + "step": 1504 + }, + { + "epoch": 0.825109649122807, + "grad_norm": 0.4777480959892273, + "learning_rate": 1.6278131064067349e-06, + "loss": 0.44479823112487793, + "step": 1505 + }, + { + "epoch": 0.8256578947368421, + "grad_norm": 0.4573366940021515, + "learning_rate": 1.6179075008075162e-06, + "loss": 0.440067857503891, + "step": 1506 + }, + { + "epoch": 0.8262061403508771, + "grad_norm": 0.44294530153274536, + "learning_rate": 1.6080294727976288e-06, + "loss": 0.42653411626815796, + "step": 1507 + }, + { + "epoch": 0.8267543859649122, + "grad_norm": 0.4719429016113281, + "learning_rate": 1.598179054876382e-06, + "loss": 0.46551793813705444, + "step": 1508 + }, + { + "epoch": 0.8273026315789473, + "grad_norm": 0.47400152683258057, + "learning_rate": 1.5883562794522423e-06, + "loss": 0.4564603269100189, + "step": 1509 + }, + { + "epoch": 0.8278508771929824, + "grad_norm": 0.4437645971775055, + "learning_rate": 1.5785611788427336e-06, + "loss": 0.4511438310146332, + "step": 1510 + }, + { + "epoch": 0.8283991228070176, + "grad_norm": 0.47115546464920044, + "learning_rate": 1.568793785274323e-06, + "loss": 0.4462664723396301, + "step": 1511 + }, + { + "epoch": 0.8289473684210527, + "grad_norm": 0.4500909149646759, + "learning_rate": 1.559054130882327e-06, + "loss": 0.44786590337753296, + "step": 1512 + }, + { + "epoch": 0.8294956140350878, + "grad_norm": 0.4785374701023102, + "learning_rate": 1.549342247710799e-06, + "loss": 0.4540751874446869, + "step": 1513 + }, + { + "epoch": 0.8300438596491229, + "grad_norm": 0.4943836033344269, + "learning_rate": 1.5396581677124123e-06, + "loss": 0.4656469523906708, + "step": 1514 + }, + { + "epoch": 0.8305921052631579, + "grad_norm": 0.4699077010154724, + "learning_rate": 1.5300019227483809e-06, + "loss": 0.4323770999908447, + "step": 1515 + }, + { + "epoch": 0.831140350877193, + "grad_norm": 0.48153141140937805, + "learning_rate": 1.5203735445883284e-06, + "loss": 0.4336276054382324, + "step": 1516 + }, + { + "epoch": 0.8316885964912281, + "grad_norm": 0.49159079790115356, + "learning_rate": 1.5107730649101948e-06, + "loss": 0.45548614859580994, + "step": 1517 + }, + { + "epoch": 0.8322368421052632, + "grad_norm": 0.49155518412590027, + "learning_rate": 1.5012005153001463e-06, + "loss": 0.4504159390926361, + "step": 1518 + }, + { + "epoch": 0.8327850877192983, + "grad_norm": 0.4667416214942932, + "learning_rate": 1.4916559272524422e-06, + "loss": 0.4470210075378418, + "step": 1519 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.46270886063575745, + "learning_rate": 1.4821393321693523e-06, + "loss": 0.4300989508628845, + "step": 1520 + }, + { + "epoch": 0.8338815789473685, + "grad_norm": 0.4680396616458893, + "learning_rate": 1.4726507613610496e-06, + "loss": 0.44302886724472046, + "step": 1521 + }, + { + "epoch": 0.8344298245614035, + "grad_norm": 0.48521971702575684, + "learning_rate": 1.4631902460455005e-06, + "loss": 0.43804043531417847, + "step": 1522 + }, + { + "epoch": 0.8349780701754386, + "grad_norm": 0.4627128839492798, + "learning_rate": 1.4537578173483736e-06, + "loss": 0.4490930438041687, + "step": 1523 + }, + { + "epoch": 0.8355263157894737, + "grad_norm": 0.4822083115577698, + "learning_rate": 1.444353506302928e-06, + "loss": 0.4367152452468872, + "step": 1524 + }, + { + "epoch": 0.8360745614035088, + "grad_norm": 0.5016523599624634, + "learning_rate": 1.4349773438499115e-06, + "loss": 0.4512786865234375, + "step": 1525 + }, + { + "epoch": 0.8366228070175439, + "grad_norm": 0.4675818681716919, + "learning_rate": 1.4256293608374627e-06, + "loss": 0.43894022703170776, + "step": 1526 + }, + { + "epoch": 0.837171052631579, + "grad_norm": 0.4565467834472656, + "learning_rate": 1.4163095880210131e-06, + "loss": 0.43741410970687866, + "step": 1527 + }, + { + "epoch": 0.8377192982456141, + "grad_norm": 0.45361560583114624, + "learning_rate": 1.4070180560631708e-06, + "loss": 0.45772549510002136, + "step": 1528 + }, + { + "epoch": 0.8382675438596491, + "grad_norm": 0.4735342860221863, + "learning_rate": 1.397754795533639e-06, + "loss": 0.44077205657958984, + "step": 1529 + }, + { + "epoch": 0.8388157894736842, + "grad_norm": 0.48649099469184875, + "learning_rate": 1.3885198369091057e-06, + "loss": 0.4449783265590668, + "step": 1530 + }, + { + "epoch": 0.8393640350877193, + "grad_norm": 0.4635789096355438, + "learning_rate": 1.3793132105731388e-06, + "loss": 0.4570218324661255, + "step": 1531 + }, + { + "epoch": 0.8399122807017544, + "grad_norm": 0.49008285999298096, + "learning_rate": 1.3701349468160908e-06, + "loss": 0.435431569814682, + "step": 1532 + }, + { + "epoch": 0.8404605263157895, + "grad_norm": 0.4439835846424103, + "learning_rate": 1.360985075835004e-06, + "loss": 0.44734299182891846, + "step": 1533 + }, + { + "epoch": 0.8410087719298246, + "grad_norm": 0.46893739700317383, + "learning_rate": 1.3518636277335085e-06, + "loss": 0.45212846994400024, + "step": 1534 + }, + { + "epoch": 0.8415570175438597, + "grad_norm": 0.4498082995414734, + "learning_rate": 1.3427706325217137e-06, + "loss": 0.4477176070213318, + "step": 1535 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.4555201828479767, + "learning_rate": 1.333706120116126e-06, + "loss": 0.44618985056877136, + "step": 1536 + }, + { + "epoch": 0.8426535087719298, + "grad_norm": 0.47258657217025757, + "learning_rate": 1.324670120339535e-06, + "loss": 0.45410165190696716, + "step": 1537 + }, + { + "epoch": 0.8432017543859649, + "grad_norm": 0.457660973072052, + "learning_rate": 1.3156626629209224e-06, + "loss": 0.45003390312194824, + "step": 1538 + }, + { + "epoch": 0.84375, + "grad_norm": 0.46227186918258667, + "learning_rate": 1.3066837774953667e-06, + "loss": 0.4546055495738983, + "step": 1539 + }, + { + "epoch": 0.8442982456140351, + "grad_norm": 0.4478152394294739, + "learning_rate": 1.2977334936039454e-06, + "loss": 0.43452781438827515, + "step": 1540 + }, + { + "epoch": 0.8448464912280702, + "grad_norm": 0.4613502621650696, + "learning_rate": 1.28881184069363e-06, + "loss": 0.44267815351486206, + "step": 1541 + }, + { + "epoch": 0.8453947368421053, + "grad_norm": 0.5708100199699402, + "learning_rate": 1.2799188481171931e-06, + "loss": 0.4500243365764618, + "step": 1542 + }, + { + "epoch": 0.8459429824561403, + "grad_norm": 0.48053842782974243, + "learning_rate": 1.2710545451331203e-06, + "loss": 0.42664283514022827, + "step": 1543 + }, + { + "epoch": 0.8464912280701754, + "grad_norm": 0.45715469121932983, + "learning_rate": 1.262218960905498e-06, + "loss": 0.4444156587123871, + "step": 1544 + }, + { + "epoch": 0.8470394736842105, + "grad_norm": 0.4761993885040283, + "learning_rate": 1.2534121245039343e-06, + "loss": 0.4384285807609558, + "step": 1545 + }, + { + "epoch": 0.8475877192982456, + "grad_norm": 0.4799306094646454, + "learning_rate": 1.2446340649034517e-06, + "loss": 0.4414188265800476, + "step": 1546 + }, + { + "epoch": 0.8481359649122807, + "grad_norm": 0.4641325771808624, + "learning_rate": 1.2358848109843936e-06, + "loss": 0.45236217975616455, + "step": 1547 + }, + { + "epoch": 0.8486842105263158, + "grad_norm": 0.4714052677154541, + "learning_rate": 1.2271643915323316e-06, + "loss": 0.44093751907348633, + "step": 1548 + }, + { + "epoch": 0.8492324561403509, + "grad_norm": 0.4963100850582123, + "learning_rate": 1.2184728352379715e-06, + "loss": 0.44910988211631775, + "step": 1549 + }, + { + "epoch": 0.8497807017543859, + "grad_norm": 0.4910122752189636, + "learning_rate": 1.209810170697061e-06, + "loss": 0.4461553692817688, + "step": 1550 + }, + { + "epoch": 0.850328947368421, + "grad_norm": 0.474320650100708, + "learning_rate": 1.201176426410282e-06, + "loss": 0.4358775019645691, + "step": 1551 + }, + { + "epoch": 0.8508771929824561, + "grad_norm": 0.4582522511482239, + "learning_rate": 1.1925716307831792e-06, + "loss": 0.45600447058677673, + "step": 1552 + }, + { + "epoch": 0.8514254385964912, + "grad_norm": 0.45246538519859314, + "learning_rate": 1.1839958121260464e-06, + "loss": 0.44419237971305847, + "step": 1553 + }, + { + "epoch": 0.8519736842105263, + "grad_norm": 0.4695712625980377, + "learning_rate": 1.175448998653842e-06, + "loss": 0.4544498324394226, + "step": 1554 + }, + { + "epoch": 0.8525219298245614, + "grad_norm": 0.46356385946273804, + "learning_rate": 1.1669312184860982e-06, + "loss": 0.45395684242248535, + "step": 1555 + }, + { + "epoch": 0.8530701754385965, + "grad_norm": 0.4739828109741211, + "learning_rate": 1.1584424996468269e-06, + "loss": 0.4454335570335388, + "step": 1556 + }, + { + "epoch": 0.8536184210526315, + "grad_norm": 0.4642820358276367, + "learning_rate": 1.1499828700644212e-06, + "loss": 0.445566862821579, + "step": 1557 + }, + { + "epoch": 0.8541666666666666, + "grad_norm": 0.5376269817352295, + "learning_rate": 1.1415523575715758e-06, + "loss": 0.44570326805114746, + "step": 1558 + }, + { + "epoch": 0.8547149122807017, + "grad_norm": 0.47028419375419617, + "learning_rate": 1.1331509899051828e-06, + "loss": 0.4468782842159271, + "step": 1559 + }, + { + "epoch": 0.8552631578947368, + "grad_norm": 0.4601708650588989, + "learning_rate": 1.124778794706245e-06, + "loss": 0.4463876485824585, + "step": 1560 + }, + { + "epoch": 0.8558114035087719, + "grad_norm": 0.4608907699584961, + "learning_rate": 1.116435799519795e-06, + "loss": 0.4424273669719696, + "step": 1561 + }, + { + "epoch": 0.856359649122807, + "grad_norm": 0.4768998622894287, + "learning_rate": 1.1081220317947871e-06, + "loss": 0.4271972179412842, + "step": 1562 + }, + { + "epoch": 0.8569078947368421, + "grad_norm": 0.44866839051246643, + "learning_rate": 1.0998375188840148e-06, + "loss": 0.4346953332424164, + "step": 1563 + }, + { + "epoch": 0.8574561403508771, + "grad_norm": 0.48162323236465454, + "learning_rate": 1.0915822880440309e-06, + "loss": 0.4489680528640747, + "step": 1564 + }, + { + "epoch": 0.8580043859649122, + "grad_norm": 0.45125117897987366, + "learning_rate": 1.0833563664350355e-06, + "loss": 0.4487599730491638, + "step": 1565 + }, + { + "epoch": 0.8585526315789473, + "grad_norm": 0.47349071502685547, + "learning_rate": 1.0751597811208104e-06, + "loss": 0.4338189363479614, + "step": 1566 + }, + { + "epoch": 0.8591008771929824, + "grad_norm": 0.4895882308483124, + "learning_rate": 1.066992559068616e-06, + "loss": 0.44717928767204285, + "step": 1567 + }, + { + "epoch": 0.8596491228070176, + "grad_norm": 0.4759470224380493, + "learning_rate": 1.0588547271491033e-06, + "loss": 0.42937859892845154, + "step": 1568 + }, + { + "epoch": 0.8601973684210527, + "grad_norm": 0.4551708400249481, + "learning_rate": 1.0507463121362283e-06, + "loss": 0.43980643153190613, + "step": 1569 + }, + { + "epoch": 0.8607456140350878, + "grad_norm": 0.4665743112564087, + "learning_rate": 1.0426673407071674e-06, + "loss": 0.4547693729400635, + "step": 1570 + }, + { + "epoch": 0.8612938596491229, + "grad_norm": 0.46718016266822815, + "learning_rate": 1.0346178394422203e-06, + "loss": 0.42919105291366577, + "step": 1571 + }, + { + "epoch": 0.8618421052631579, + "grad_norm": 0.48040589690208435, + "learning_rate": 1.0265978348247319e-06, + "loss": 0.4436418414115906, + "step": 1572 + }, + { + "epoch": 0.862390350877193, + "grad_norm": 0.46300461888313293, + "learning_rate": 1.0186073532410046e-06, + "loss": 0.4462057054042816, + "step": 1573 + }, + { + "epoch": 0.8629385964912281, + "grad_norm": 0.46362096071243286, + "learning_rate": 1.0106464209802013e-06, + "loss": 0.44162052869796753, + "step": 1574 + }, + { + "epoch": 0.8634868421052632, + "grad_norm": 0.4580335319042206, + "learning_rate": 1.0027150642342664e-06, + "loss": 0.4479283094406128, + "step": 1575 + }, + { + "epoch": 0.8640350877192983, + "grad_norm": 0.48166683316230774, + "learning_rate": 9.94813309097844e-07, + "loss": 0.44494980573654175, + "step": 1576 + }, + { + "epoch": 0.8645833333333334, + "grad_norm": 0.4515690803527832, + "learning_rate": 9.869411815681861e-07, + "loss": 0.4551948308944702, + "step": 1577 + }, + { + "epoch": 0.8651315789473685, + "grad_norm": 0.4830446243286133, + "learning_rate": 9.790987075450652e-07, + "loss": 0.4476730227470398, + "step": 1578 + }, + { + "epoch": 0.8656798245614035, + "grad_norm": 0.4780424237251282, + "learning_rate": 9.712859128306906e-07, + "loss": 0.4410339295864105, + "step": 1579 + }, + { + "epoch": 0.8662280701754386, + "grad_norm": 0.46548083424568176, + "learning_rate": 9.635028231296328e-07, + "loss": 0.4511341452598572, + "step": 1580 + }, + { + "epoch": 0.8667763157894737, + "grad_norm": 0.47102057933807373, + "learning_rate": 9.557494640487197e-07, + "loss": 0.4316350221633911, + "step": 1581 + }, + { + "epoch": 0.8673245614035088, + "grad_norm": 0.4755796194076538, + "learning_rate": 9.480258610969739e-07, + "loss": 0.43706297874450684, + "step": 1582 + }, + { + "epoch": 0.8678728070175439, + "grad_norm": 0.46922212839126587, + "learning_rate": 9.403320396855153e-07, + "loss": 0.44665640592575073, + "step": 1583 + }, + { + "epoch": 0.868421052631579, + "grad_norm": 0.44689786434173584, + "learning_rate": 9.326680251274778e-07, + "loss": 0.43797099590301514, + "step": 1584 + }, + { + "epoch": 0.8689692982456141, + "grad_norm": 0.4606018364429474, + "learning_rate": 9.250338426379301e-07, + "loss": 0.4488024115562439, + "step": 1585 + }, + { + "epoch": 0.8695175438596491, + "grad_norm": 0.46374639868736267, + "learning_rate": 9.174295173337965e-07, + "loss": 0.44695234298706055, + "step": 1586 + }, + { + "epoch": 0.8700657894736842, + "grad_norm": 0.45890888571739197, + "learning_rate": 9.098550742337598e-07, + "loss": 0.4423515200614929, + "step": 1587 + }, + { + "epoch": 0.8706140350877193, + "grad_norm": 0.472668319940567, + "learning_rate": 9.023105382581976e-07, + "loss": 0.4320164620876312, + "step": 1588 + }, + { + "epoch": 0.8711622807017544, + "grad_norm": 0.4817354679107666, + "learning_rate": 8.94795934229089e-07, + "loss": 0.43965157866477966, + "step": 1589 + }, + { + "epoch": 0.8717105263157895, + "grad_norm": 0.4685409963130951, + "learning_rate": 8.873112868699329e-07, + "loss": 0.4492000341415405, + "step": 1590 + }, + { + "epoch": 0.8722587719298246, + "grad_norm": 0.46073803305625916, + "learning_rate": 8.798566208056669e-07, + "loss": 0.4389263987541199, + "step": 1591 + }, + { + "epoch": 0.8728070175438597, + "grad_norm": 0.536866307258606, + "learning_rate": 8.724319605625942e-07, + "loss": 0.4503793716430664, + "step": 1592 + }, + { + "epoch": 0.8733552631578947, + "grad_norm": 0.46781212091445923, + "learning_rate": 8.650373305682968e-07, + "loss": 0.4543306827545166, + "step": 1593 + }, + { + "epoch": 0.8739035087719298, + "grad_norm": 0.45546168088912964, + "learning_rate": 8.576727551515473e-07, + "loss": 0.45232880115509033, + "step": 1594 + }, + { + "epoch": 0.8744517543859649, + "grad_norm": 0.46312063932418823, + "learning_rate": 8.503382585422482e-07, + "loss": 0.4270923137664795, + "step": 1595 + }, + { + "epoch": 0.875, + "grad_norm": 0.4830167889595032, + "learning_rate": 8.430338648713332e-07, + "loss": 0.4483802914619446, + "step": 1596 + }, + { + "epoch": 0.8755482456140351, + "grad_norm": 0.4733735918998718, + "learning_rate": 8.357595981706934e-07, + "loss": 0.4472507834434509, + "step": 1597 + }, + { + "epoch": 0.8760964912280702, + "grad_norm": 0.46488088369369507, + "learning_rate": 8.285154823731101e-07, + "loss": 0.45538365840911865, + "step": 1598 + }, + { + "epoch": 0.8766447368421053, + "grad_norm": 0.4749467074871063, + "learning_rate": 8.213015413121584e-07, + "loss": 0.44120535254478455, + "step": 1599 + }, + { + "epoch": 0.8771929824561403, + "grad_norm": 0.47514861822128296, + "learning_rate": 8.141177987221394e-07, + "loss": 0.4361691176891327, + "step": 1600 + }, + { + "epoch": 0.8777412280701754, + "grad_norm": 0.47822439670562744, + "learning_rate": 8.069642782379994e-07, + "loss": 0.4516603648662567, + "step": 1601 + }, + { + "epoch": 0.8782894736842105, + "grad_norm": 0.4655228853225708, + "learning_rate": 7.998410033952497e-07, + "loss": 0.43014663457870483, + "step": 1602 + }, + { + "epoch": 0.8788377192982456, + "grad_norm": 0.4764757752418518, + "learning_rate": 7.927479976298957e-07, + "loss": 0.4478931427001953, + "step": 1603 + }, + { + "epoch": 0.8793859649122807, + "grad_norm": 0.46074727177619934, + "learning_rate": 7.856852842783546e-07, + "loss": 0.44595688581466675, + "step": 1604 + }, + { + "epoch": 0.8799342105263158, + "grad_norm": 0.4583260416984558, + "learning_rate": 7.786528865773779e-07, + "loss": 0.4490140676498413, + "step": 1605 + }, + { + "epoch": 0.8804824561403509, + "grad_norm": 0.4848959445953369, + "learning_rate": 7.716508276639756e-07, + "loss": 0.44253939390182495, + "step": 1606 + }, + { + "epoch": 0.8810307017543859, + "grad_norm": 0.4748147130012512, + "learning_rate": 7.646791305753476e-07, + "loss": 0.45834192633628845, + "step": 1607 + }, + { + "epoch": 0.881578947368421, + "grad_norm": 0.4740658402442932, + "learning_rate": 7.577378182487927e-07, + "loss": 0.43871140480041504, + "step": 1608 + }, + { + "epoch": 0.8821271929824561, + "grad_norm": 0.4656429886817932, + "learning_rate": 7.508269135216495e-07, + "loss": 0.43657320737838745, + "step": 1609 + }, + { + "epoch": 0.8826754385964912, + "grad_norm": 0.5353845357894897, + "learning_rate": 7.439464391312102e-07, + "loss": 0.45518967509269714, + "step": 1610 + }, + { + "epoch": 0.8832236842105263, + "grad_norm": 0.4724985659122467, + "learning_rate": 7.370964177146511e-07, + "loss": 0.43806204199790955, + "step": 1611 + }, + { + "epoch": 0.8837719298245614, + "grad_norm": 0.44743263721466064, + "learning_rate": 7.302768718089548e-07, + "loss": 0.4636176526546478, + "step": 1612 + }, + { + "epoch": 0.8843201754385965, + "grad_norm": 0.45549145340919495, + "learning_rate": 7.234878238508358e-07, + "loss": 0.4512561559677124, + "step": 1613 + }, + { + "epoch": 0.8848684210526315, + "grad_norm": 0.4739094078540802, + "learning_rate": 7.167292961766726e-07, + "loss": 0.44614070653915405, + "step": 1614 + }, + { + "epoch": 0.8854166666666666, + "grad_norm": 0.5969752669334412, + "learning_rate": 7.100013110224313e-07, + "loss": 0.44239649176597595, + "step": 1615 + }, + { + "epoch": 0.8859649122807017, + "grad_norm": 0.46936947107315063, + "learning_rate": 7.033038905235845e-07, + "loss": 0.44706910848617554, + "step": 1616 + }, + { + "epoch": 0.8865131578947368, + "grad_norm": 0.4682731032371521, + "learning_rate": 6.966370567150515e-07, + "loss": 0.4575401544570923, + "step": 1617 + }, + { + "epoch": 0.8870614035087719, + "grad_norm": 0.4480043351650238, + "learning_rate": 6.900008315311147e-07, + "loss": 0.4444824159145355, + "step": 1618 + }, + { + "epoch": 0.887609649122807, + "grad_norm": 0.4583923816680908, + "learning_rate": 6.833952368053565e-07, + "loss": 0.44810950756073, + "step": 1619 + }, + { + "epoch": 0.8881578947368421, + "grad_norm": 0.4623442590236664, + "learning_rate": 6.768202942705837e-07, + "loss": 0.4399760663509369, + "step": 1620 + }, + { + "epoch": 0.8887061403508771, + "grad_norm": 0.4465370774269104, + "learning_rate": 6.702760255587504e-07, + "loss": 0.45121389627456665, + "step": 1621 + }, + { + "epoch": 0.8892543859649122, + "grad_norm": 0.4720335006713867, + "learning_rate": 6.637624522008934e-07, + "loss": 0.4361550211906433, + "step": 1622 + }, + { + "epoch": 0.8898026315789473, + "grad_norm": 0.4659505784511566, + "learning_rate": 6.572795956270661e-07, + "loss": 0.45230960845947266, + "step": 1623 + }, + { + "epoch": 0.8903508771929824, + "grad_norm": 0.4665406346321106, + "learning_rate": 6.508274771662526e-07, + "loss": 0.43126940727233887, + "step": 1624 + }, + { + "epoch": 0.8908991228070176, + "grad_norm": 0.46323761343955994, + "learning_rate": 6.444061180463135e-07, + "loss": 0.43909797072410583, + "step": 1625 + }, + { + "epoch": 0.8914473684210527, + "grad_norm": 0.44538217782974243, + "learning_rate": 6.380155393939092e-07, + "loss": 0.4441220760345459, + "step": 1626 + }, + { + "epoch": 0.8919956140350878, + "grad_norm": 0.46254977583885193, + "learning_rate": 6.316557622344266e-07, + "loss": 0.4325595796108246, + "step": 1627 + }, + { + "epoch": 0.8925438596491229, + "grad_norm": 0.434124618768692, + "learning_rate": 6.253268074919139e-07, + "loss": 0.4580111503601074, + "step": 1628 + }, + { + "epoch": 0.8930921052631579, + "grad_norm": 0.4571579396724701, + "learning_rate": 6.190286959890157e-07, + "loss": 0.45825982093811035, + "step": 1629 + }, + { + "epoch": 0.893640350877193, + "grad_norm": 0.46914803981781006, + "learning_rate": 6.127614484468991e-07, + "loss": 0.4454300105571747, + "step": 1630 + }, + { + "epoch": 0.8941885964912281, + "grad_norm": 0.4706801474094391, + "learning_rate": 6.065250854851834e-07, + "loss": 0.4399760663509369, + "step": 1631 + }, + { + "epoch": 0.8947368421052632, + "grad_norm": 0.45345693826675415, + "learning_rate": 6.003196276218815e-07, + "loss": 0.42961764335632324, + "step": 1632 + }, + { + "epoch": 0.8952850877192983, + "grad_norm": 0.4495810270309448, + "learning_rate": 5.941450952733219e-07, + "loss": 0.43326735496520996, + "step": 1633 + }, + { + "epoch": 0.8958333333333334, + "grad_norm": 0.45630401372909546, + "learning_rate": 5.880015087540858e-07, + "loss": 0.4345873296260834, + "step": 1634 + }, + { + "epoch": 0.8963815789473685, + "grad_norm": 0.4689438045024872, + "learning_rate": 5.818888882769469e-07, + "loss": 0.4507316052913666, + "step": 1635 + }, + { + "epoch": 0.8969298245614035, + "grad_norm": 0.46279099583625793, + "learning_rate": 5.758072539527937e-07, + "loss": 0.4242330491542816, + "step": 1636 + }, + { + "epoch": 0.8974780701754386, + "grad_norm": 0.4933059513568878, + "learning_rate": 5.697566257905685e-07, + "loss": 0.4421001374721527, + "step": 1637 + }, + { + "epoch": 0.8980263157894737, + "grad_norm": 0.4722570776939392, + "learning_rate": 5.63737023697205e-07, + "loss": 0.43669748306274414, + "step": 1638 + }, + { + "epoch": 0.8985745614035088, + "grad_norm": 0.4798142611980438, + "learning_rate": 5.577484674775535e-07, + "loss": 0.4351981580257416, + "step": 1639 + }, + { + "epoch": 0.8991228070175439, + "grad_norm": 0.4653114080429077, + "learning_rate": 5.517909768343255e-07, + "loss": 0.44025319814682007, + "step": 1640 + }, + { + "epoch": 0.899671052631579, + "grad_norm": 0.4510413408279419, + "learning_rate": 5.458645713680255e-07, + "loss": 0.44666802883148193, + "step": 1641 + }, + { + "epoch": 0.9002192982456141, + "grad_norm": 0.4883483946323395, + "learning_rate": 5.399692705768844e-07, + "loss": 0.43601131439208984, + "step": 1642 + }, + { + "epoch": 0.9007675438596491, + "grad_norm": 0.501358151435852, + "learning_rate": 5.341050938567971e-07, + "loss": 0.44397610425949097, + "step": 1643 + }, + { + "epoch": 0.9013157894736842, + "grad_norm": 0.4486638605594635, + "learning_rate": 5.28272060501256e-07, + "loss": 0.43776631355285645, + "step": 1644 + }, + { + "epoch": 0.9018640350877193, + "grad_norm": 0.4545532166957855, + "learning_rate": 5.224701897012941e-07, + "loss": 0.44379138946533203, + "step": 1645 + }, + { + "epoch": 0.9024122807017544, + "grad_norm": 0.46450698375701904, + "learning_rate": 5.166995005454167e-07, + "loss": 0.4377807080745697, + "step": 1646 + }, + { + "epoch": 0.9029605263157895, + "grad_norm": 0.46801090240478516, + "learning_rate": 5.109600120195368e-07, + "loss": 0.4503188133239746, + "step": 1647 + }, + { + "epoch": 0.9035087719298246, + "grad_norm": 0.479351282119751, + "learning_rate": 5.052517430069204e-07, + "loss": 0.44086822867393494, + "step": 1648 + }, + { + "epoch": 0.9040570175438597, + "grad_norm": 0.46688783168792725, + "learning_rate": 4.995747122881134e-07, + "loss": 0.4453203082084656, + "step": 1649 + }, + { + "epoch": 0.9046052631578947, + "grad_norm": 0.4640738368034363, + "learning_rate": 4.939289385408896e-07, + "loss": 0.4435104727745056, + "step": 1650 + }, + { + "epoch": 0.9051535087719298, + "grad_norm": 0.490331768989563, + "learning_rate": 4.883144403401852e-07, + "loss": 0.44754600524902344, + "step": 1651 + }, + { + "epoch": 0.9057017543859649, + "grad_norm": 0.4848006069660187, + "learning_rate": 4.827312361580383e-07, + "loss": 0.44139939546585083, + "step": 1652 + }, + { + "epoch": 0.90625, + "grad_norm": 0.48309126496315, + "learning_rate": 4.771793443635254e-07, + "loss": 0.4521448016166687, + "step": 1653 + }, + { + "epoch": 0.9067982456140351, + "grad_norm": 0.46978119015693665, + "learning_rate": 4.716587832227071e-07, + "loss": 0.43720659613609314, + "step": 1654 + }, + { + "epoch": 0.9073464912280702, + "grad_norm": 0.47674959897994995, + "learning_rate": 4.6616957089856143e-07, + "loss": 0.4457699954509735, + "step": 1655 + }, + { + "epoch": 0.9078947368421053, + "grad_norm": 0.4758906960487366, + "learning_rate": 4.6071172545092414e-07, + "loss": 0.4408598840236664, + "step": 1656 + }, + { + "epoch": 0.9084429824561403, + "grad_norm": 0.4686860740184784, + "learning_rate": 4.552852648364414e-07, + "loss": 0.4513329267501831, + "step": 1657 + }, + { + "epoch": 0.9089912280701754, + "grad_norm": 0.4459623396396637, + "learning_rate": 4.4989020690849315e-07, + "loss": 0.443990558385849, + "step": 1658 + }, + { + "epoch": 0.9095394736842105, + "grad_norm": 0.46643540263175964, + "learning_rate": 4.44526569417143e-07, + "loss": 0.443215548992157, + "step": 1659 + }, + { + "epoch": 0.9100877192982456, + "grad_norm": 0.47309648990631104, + "learning_rate": 4.391943700090839e-07, + "loss": 0.4382048547267914, + "step": 1660 + }, + { + "epoch": 0.9106359649122807, + "grad_norm": 0.4757229685783386, + "learning_rate": 4.338936262275717e-07, + "loss": 0.443150132894516, + "step": 1661 + }, + { + "epoch": 0.9111842105263158, + "grad_norm": 0.48588597774505615, + "learning_rate": 4.286243555123737e-07, + "loss": 0.43010038137435913, + "step": 1662 + }, + { + "epoch": 0.9117324561403509, + "grad_norm": 0.4803837239742279, + "learning_rate": 4.2338657519970903e-07, + "loss": 0.44262608885765076, + "step": 1663 + }, + { + "epoch": 0.9122807017543859, + "grad_norm": 0.45542028546333313, + "learning_rate": 4.1818030252218976e-07, + "loss": 0.45058876276016235, + "step": 1664 + }, + { + "epoch": 0.912828947368421, + "grad_norm": 0.46372881531715393, + "learning_rate": 4.1300555460876415e-07, + "loss": 0.4500831365585327, + "step": 1665 + }, + { + "epoch": 0.9133771929824561, + "grad_norm": 0.4932945668697357, + "learning_rate": 4.0786234848466775e-07, + "loss": 0.43038302659988403, + "step": 1666 + }, + { + "epoch": 0.9139254385964912, + "grad_norm": 0.4446896016597748, + "learning_rate": 4.0275070107135605e-07, + "loss": 0.4467035233974457, + "step": 1667 + }, + { + "epoch": 0.9144736842105263, + "grad_norm": 0.47738441824913025, + "learning_rate": 3.976706291864596e-07, + "loss": 0.4430491328239441, + "step": 1668 + }, + { + "epoch": 0.9150219298245614, + "grad_norm": 0.4746912121772766, + "learning_rate": 3.926221495437199e-07, + "loss": 0.4459044337272644, + "step": 1669 + }, + { + "epoch": 0.9155701754385965, + "grad_norm": 0.4573158919811249, + "learning_rate": 3.8760527875293943e-07, + "loss": 0.4443172216415405, + "step": 1670 + }, + { + "epoch": 0.9161184210526315, + "grad_norm": 0.46117162704467773, + "learning_rate": 3.826200333199237e-07, + "loss": 0.4394051432609558, + "step": 1671 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.463273823261261, + "learning_rate": 3.7766642964643165e-07, + "loss": 0.44520771503448486, + "step": 1672 + }, + { + "epoch": 0.9172149122807017, + "grad_norm": 0.47583577036857605, + "learning_rate": 3.7274448403011975e-07, + "loss": 0.4473021924495697, + "step": 1673 + }, + { + "epoch": 0.9177631578947368, + "grad_norm": 0.47658321261405945, + "learning_rate": 3.678542126644813e-07, + "loss": 0.43678218126296997, + "step": 1674 + }, + { + "epoch": 0.9183114035087719, + "grad_norm": 0.4604431688785553, + "learning_rate": 3.6299563163880836e-07, + "loss": 0.45016664266586304, + "step": 1675 + }, + { + "epoch": 0.918859649122807, + "grad_norm": 0.4587794542312622, + "learning_rate": 3.5816875693812316e-07, + "loss": 0.4506627917289734, + "step": 1676 + }, + { + "epoch": 0.9194078947368421, + "grad_norm": 0.4732038080692291, + "learning_rate": 3.5337360444313354e-07, + "loss": 0.4495949149131775, + "step": 1677 + }, + { + "epoch": 0.9199561403508771, + "grad_norm": 0.4442287087440491, + "learning_rate": 3.486101899301797e-07, + "loss": 0.4385918974876404, + "step": 1678 + }, + { + "epoch": 0.9205043859649122, + "grad_norm": 0.46998754143714905, + "learning_rate": 3.438785290711855e-07, + "loss": 0.44074422121047974, + "step": 1679 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.5042349696159363, + "learning_rate": 3.3917863743359815e-07, + "loss": 0.45951956510543823, + "step": 1680 + }, + { + "epoch": 0.9216008771929824, + "grad_norm": 0.4709184467792511, + "learning_rate": 3.345105304803431e-07, + "loss": 0.4513796269893646, + "step": 1681 + }, + { + "epoch": 0.9221491228070176, + "grad_norm": 0.48918354511260986, + "learning_rate": 3.298742235697749e-07, + "loss": 0.43227851390838623, + "step": 1682 + }, + { + "epoch": 0.9226973684210527, + "grad_norm": 0.5025581121444702, + "learning_rate": 3.252697319556231e-07, + "loss": 0.43624621629714966, + "step": 1683 + }, + { + "epoch": 0.9232456140350878, + "grad_norm": 0.4808434844017029, + "learning_rate": 3.206970707869406e-07, + "loss": 0.4348883628845215, + "step": 1684 + }, + { + "epoch": 0.9237938596491229, + "grad_norm": 0.4720389246940613, + "learning_rate": 3.1615625510806104e-07, + "loss": 0.4509449601173401, + "step": 1685 + }, + { + "epoch": 0.9243421052631579, + "grad_norm": 0.4458601474761963, + "learning_rate": 3.116472998585396e-07, + "loss": 0.4543895423412323, + "step": 1686 + }, + { + "epoch": 0.924890350877193, + "grad_norm": 0.4926338493824005, + "learning_rate": 3.0717021987310967e-07, + "loss": 0.4404997229576111, + "step": 1687 + }, + { + "epoch": 0.9254385964912281, + "grad_norm": 0.4627749025821686, + "learning_rate": 3.0272502988163645e-07, + "loss": 0.4535558223724365, + "step": 1688 + }, + { + "epoch": 0.9259868421052632, + "grad_norm": 0.4702151119709015, + "learning_rate": 2.983117445090622e-07, + "loss": 0.44108468294143677, + "step": 1689 + }, + { + "epoch": 0.9265350877192983, + "grad_norm": 0.46197929978370667, + "learning_rate": 2.939303782753611e-07, + "loss": 0.4439297616481781, + "step": 1690 + }, + { + "epoch": 0.9270833333333334, + "grad_norm": 0.45058155059814453, + "learning_rate": 2.8958094559549345e-07, + "loss": 0.4373398721218109, + "step": 1691 + }, + { + "epoch": 0.9276315789473685, + "grad_norm": 0.45918381214141846, + "learning_rate": 2.8526346077935253e-07, + "loss": 0.447954386472702, + "step": 1692 + }, + { + "epoch": 0.9281798245614035, + "grad_norm": 0.4705754518508911, + "learning_rate": 2.8097793803172456e-07, + "loss": 0.43893513083457947, + "step": 1693 + }, + { + "epoch": 0.9287280701754386, + "grad_norm": 0.4503750205039978, + "learning_rate": 2.767243914522377e-07, + "loss": 0.44709891080856323, + "step": 1694 + }, + { + "epoch": 0.9292763157894737, + "grad_norm": 0.44616571068763733, + "learning_rate": 2.725028350353176e-07, + "loss": 0.4397805631160736, + "step": 1695 + }, + { + "epoch": 0.9298245614035088, + "grad_norm": 0.49138715863227844, + "learning_rate": 2.6831328267013625e-07, + "loss": 0.44462379813194275, + "step": 1696 + }, + { + "epoch": 0.9303728070175439, + "grad_norm": 0.46928104758262634, + "learning_rate": 2.6415574814057765e-07, + "loss": 0.44445496797561646, + "step": 1697 + }, + { + "epoch": 0.930921052631579, + "grad_norm": 0.4646775424480438, + "learning_rate": 2.6003024512517683e-07, + "loss": 0.4469691216945648, + "step": 1698 + }, + { + "epoch": 0.9314692982456141, + "grad_norm": 0.44269227981567383, + "learning_rate": 2.5593678719708746e-07, + "loss": 0.4480845630168915, + "step": 1699 + }, + { + "epoch": 0.9320175438596491, + "grad_norm": 0.458169549703598, + "learning_rate": 2.5187538782403653e-07, + "loss": 0.4372929036617279, + "step": 1700 + }, + { + "epoch": 0.9325657894736842, + "grad_norm": 0.4619954526424408, + "learning_rate": 2.4784606036826974e-07, + "loss": 0.45119795203208923, + "step": 1701 + }, + { + "epoch": 0.9331140350877193, + "grad_norm": 0.45698124170303345, + "learning_rate": 2.4384881808651727e-07, + "loss": 0.4426797032356262, + "step": 1702 + }, + { + "epoch": 0.9336622807017544, + "grad_norm": 0.4546036720275879, + "learning_rate": 2.398836741299482e-07, + "loss": 0.43738052248954773, + "step": 1703 + }, + { + "epoch": 0.9342105263157895, + "grad_norm": 0.44701454043388367, + "learning_rate": 2.3595064154412374e-07, + "loss": 0.44920939207077026, + "step": 1704 + }, + { + "epoch": 0.9347587719298246, + "grad_norm": 0.46189531683921814, + "learning_rate": 2.3204973326895753e-07, + "loss": 0.43537482619285583, + "step": 1705 + }, + { + "epoch": 0.9353070175438597, + "grad_norm": 0.44235292077064514, + "learning_rate": 2.2818096213867657e-07, + "loss": 0.4314463138580322, + "step": 1706 + }, + { + "epoch": 0.9358552631578947, + "grad_norm": 0.44796082377433777, + "learning_rate": 2.2434434088176694e-07, + "loss": 0.4354679584503174, + "step": 1707 + }, + { + "epoch": 0.9364035087719298, + "grad_norm": 0.4733355641365051, + "learning_rate": 2.205398821209459e-07, + "loss": 0.4397640824317932, + "step": 1708 + }, + { + "epoch": 0.9369517543859649, + "grad_norm": 0.4604198634624481, + "learning_rate": 2.167675983731099e-07, + "loss": 0.43632054328918457, + "step": 1709 + }, + { + "epoch": 0.9375, + "grad_norm": 0.4596974849700928, + "learning_rate": 2.1302750204930112e-07, + "loss": 0.43812295794487, + "step": 1710 + }, + { + "epoch": 0.9380482456140351, + "grad_norm": 0.44256773591041565, + "learning_rate": 2.0931960545466311e-07, + "loss": 0.4327971339225769, + "step": 1711 + }, + { + "epoch": 0.9385964912280702, + "grad_norm": 0.4798187017440796, + "learning_rate": 2.0564392078839647e-07, + "loss": 0.4406653344631195, + "step": 1712 + }, + { + "epoch": 0.9391447368421053, + "grad_norm": 0.486609548330307, + "learning_rate": 2.0200046014372644e-07, + "loss": 0.43356356024742126, + "step": 1713 + }, + { + "epoch": 0.9396929824561403, + "grad_norm": 0.6000531911849976, + "learning_rate": 1.9838923550785872e-07, + "loss": 0.4366445541381836, + "step": 1714 + }, + { + "epoch": 0.9402412280701754, + "grad_norm": 0.4627332389354706, + "learning_rate": 1.9481025876193826e-07, + "loss": 0.44243139028549194, + "step": 1715 + }, + { + "epoch": 0.9407894736842105, + "grad_norm": 0.7296271324157715, + "learning_rate": 1.91263541681016e-07, + "loss": 0.439893901348114, + "step": 1716 + }, + { + "epoch": 0.9413377192982456, + "grad_norm": 0.4577433466911316, + "learning_rate": 1.877490959340045e-07, + "loss": 0.43834924697875977, + "step": 1717 + }, + { + "epoch": 0.9418859649122807, + "grad_norm": 0.4576733112335205, + "learning_rate": 1.8426693308364108e-07, + "loss": 0.43985801935195923, + "step": 1718 + }, + { + "epoch": 0.9424342105263158, + "grad_norm": 0.4688190817832947, + "learning_rate": 1.8081706458645154e-07, + "loss": 0.4420456886291504, + "step": 1719 + }, + { + "epoch": 0.9429824561403509, + "grad_norm": 0.45228761434555054, + "learning_rate": 1.7739950179271103e-07, + "loss": 0.4464713931083679, + "step": 1720 + }, + { + "epoch": 0.9435307017543859, + "grad_norm": 0.4589843451976776, + "learning_rate": 1.7401425594640753e-07, + "loss": 0.4457239508628845, + "step": 1721 + }, + { + "epoch": 0.944078947368421, + "grad_norm": 0.45316648483276367, + "learning_rate": 1.7066133818520402e-07, + "loss": 0.45121461153030396, + "step": 1722 + }, + { + "epoch": 0.9446271929824561, + "grad_norm": 0.4696495532989502, + "learning_rate": 1.6734075954040195e-07, + "loss": 0.4448394179344177, + "step": 1723 + }, + { + "epoch": 0.9451754385964912, + "grad_norm": 0.4616459906101227, + "learning_rate": 1.6405253093690344e-07, + "loss": 0.4420175552368164, + "step": 1724 + }, + { + "epoch": 0.9457236842105263, + "grad_norm": 0.4860699474811554, + "learning_rate": 1.6079666319318122e-07, + "loss": 0.4343988299369812, + "step": 1725 + }, + { + "epoch": 0.9462719298245614, + "grad_norm": 0.4608551561832428, + "learning_rate": 1.5757316702123326e-07, + "loss": 0.4510036110877991, + "step": 1726 + }, + { + "epoch": 0.9468201754385965, + "grad_norm": 0.4521878957748413, + "learning_rate": 1.543820530265594e-07, + "loss": 0.4449629783630371, + "step": 1727 + }, + { + "epoch": 0.9473684210526315, + "grad_norm": 0.4613654315471649, + "learning_rate": 1.5122333170811577e-07, + "loss": 0.4393472671508789, + "step": 1728 + }, + { + "epoch": 0.9479166666666666, + "grad_norm": 0.4632744789123535, + "learning_rate": 1.4809701345828487e-07, + "loss": 0.4483092725276947, + "step": 1729 + }, + { + "epoch": 0.9484649122807017, + "grad_norm": 0.4722960293292999, + "learning_rate": 1.4500310856284337e-07, + "loss": 0.4436339735984802, + "step": 1730 + }, + { + "epoch": 0.9490131578947368, + "grad_norm": 0.4626271426677704, + "learning_rate": 1.419416272009233e-07, + "loss": 0.4514046609401703, + "step": 1731 + }, + { + "epoch": 0.9495614035087719, + "grad_norm": 0.46035781502723694, + "learning_rate": 1.3891257944498416e-07, + "loss": 0.44555479288101196, + "step": 1732 + }, + { + "epoch": 0.950109649122807, + "grad_norm": 0.4788576066493988, + "learning_rate": 1.3591597526077527e-07, + "loss": 0.43546605110168457, + "step": 1733 + }, + { + "epoch": 0.9506578947368421, + "grad_norm": 0.4717090129852295, + "learning_rate": 1.3295182450730472e-07, + "loss": 0.4481337070465088, + "step": 1734 + }, + { + "epoch": 0.9512061403508771, + "grad_norm": 0.4677014648914337, + "learning_rate": 1.3002013693680816e-07, + "loss": 0.44089481234550476, + "step": 1735 + }, + { + "epoch": 0.9517543859649122, + "grad_norm": 0.5226771831512451, + "learning_rate": 1.2712092219471228e-07, + "loss": 0.4421740174293518, + "step": 1736 + }, + { + "epoch": 0.9523026315789473, + "grad_norm": 0.46488237380981445, + "learning_rate": 1.2425418981960813e-07, + "loss": 0.4298960566520691, + "step": 1737 + }, + { + "epoch": 0.9528508771929824, + "grad_norm": 0.473090797662735, + "learning_rate": 1.214199492432211e-07, + "loss": 0.4425022006034851, + "step": 1738 + }, + { + "epoch": 0.9533991228070176, + "grad_norm": 0.4689166843891144, + "learning_rate": 1.1861820979036986e-07, + "loss": 0.44378551840782166, + "step": 1739 + }, + { + "epoch": 0.9539473684210527, + "grad_norm": 0.47417959570884705, + "learning_rate": 1.1584898067894867e-07, + "loss": 0.4403073787689209, + "step": 1740 + }, + { + "epoch": 0.9544956140350878, + "grad_norm": 0.4821755886077881, + "learning_rate": 1.131122710198873e-07, + "loss": 0.4455302655696869, + "step": 1741 + }, + { + "epoch": 0.9550438596491229, + "grad_norm": 0.4931686520576477, + "learning_rate": 1.1040808981712448e-07, + "loss": 0.43022045493125916, + "step": 1742 + }, + { + "epoch": 0.9555921052631579, + "grad_norm": 0.4758283197879791, + "learning_rate": 1.0773644596758003e-07, + "loss": 0.4449695348739624, + "step": 1743 + }, + { + "epoch": 0.956140350877193, + "grad_norm": 0.4499591588973999, + "learning_rate": 1.0509734826112395e-07, + "loss": 0.44729599356651306, + "step": 1744 + }, + { + "epoch": 0.9566885964912281, + "grad_norm": 0.4610680937767029, + "learning_rate": 1.0249080538054512e-07, + "loss": 0.4448246359825134, + "step": 1745 + }, + { + "epoch": 0.9572368421052632, + "grad_norm": 0.4604075849056244, + "learning_rate": 9.991682590152707e-08, + "loss": 0.4426057040691376, + "step": 1746 + }, + { + "epoch": 0.9577850877192983, + "grad_norm": 0.5067770481109619, + "learning_rate": 9.737541829261787e-08, + "loss": 0.4603898525238037, + "step": 1747 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.4828222692012787, + "learning_rate": 9.486659091520244e-08, + "loss": 0.44565194845199585, + "step": 1748 + }, + { + "epoch": 0.9588815789473685, + "grad_norm": 0.46997013688087463, + "learning_rate": 9.239035202347147e-08, + "loss": 0.44362831115722656, + "step": 1749 + }, + { + "epoch": 0.9594298245614035, + "grad_norm": 0.5518832802772522, + "learning_rate": 8.994670976440357e-08, + "loss": 0.44277042150497437, + "step": 1750 + }, + { + "epoch": 0.9599780701754386, + "grad_norm": 0.4646471440792084, + "learning_rate": 8.753567217772652e-08, + "loss": 0.44326919317245483, + "step": 1751 + }, + { + "epoch": 0.9605263157894737, + "grad_norm": 0.46489813923835754, + "learning_rate": 8.515724719589835e-08, + "loss": 0.4298398196697235, + "step": 1752 + }, + { + "epoch": 0.9610745614035088, + "grad_norm": 0.9936460852622986, + "learning_rate": 8.281144264408291e-08, + "loss": 0.44022315740585327, + "step": 1753 + }, + { + "epoch": 0.9616228070175439, + "grad_norm": 0.462695449590683, + "learning_rate": 8.04982662401188e-08, + "loss": 0.442985862493515, + "step": 1754 + }, + { + "epoch": 0.962171052631579, + "grad_norm": 0.4724031984806061, + "learning_rate": 7.821772559449381e-08, + "loss": 0.4336352050304413, + "step": 1755 + }, + { + "epoch": 0.9627192982456141, + "grad_norm": 0.4420538544654846, + "learning_rate": 7.596982821032495e-08, + "loss": 0.4361079931259155, + "step": 1756 + }, + { + "epoch": 0.9632675438596491, + "grad_norm": 0.4517374336719513, + "learning_rate": 7.375458148332959e-08, + "loss": 0.4529159963130951, + "step": 1757 + }, + { + "epoch": 0.9638157894736842, + "grad_norm": 0.4620855450630188, + "learning_rate": 7.157199270180326e-08, + "loss": 0.4500415027141571, + "step": 1758 + }, + { + "epoch": 0.9643640350877193, + "grad_norm": 0.47779715061187744, + "learning_rate": 6.942206904659521e-08, + "loss": 0.44449353218078613, + "step": 1759 + }, + { + "epoch": 0.9649122807017544, + "grad_norm": 0.47931793332099915, + "learning_rate": 6.730481759108287e-08, + "loss": 0.4509713053703308, + "step": 1760 + }, + { + "epoch": 0.9654605263157895, + "grad_norm": 0.464688777923584, + "learning_rate": 6.522024530115079e-08, + "loss": 0.4385107755661011, + "step": 1761 + }, + { + "epoch": 0.9660087719298246, + "grad_norm": 0.4776732325553894, + "learning_rate": 6.316835903516949e-08, + "loss": 0.45002496242523193, + "step": 1762 + }, + { + "epoch": 0.9665570175438597, + "grad_norm": 0.4563760757446289, + "learning_rate": 6.114916554396555e-08, + "loss": 0.4582784175872803, + "step": 1763 + }, + { + "epoch": 0.9671052631578947, + "grad_norm": 0.46798089146614075, + "learning_rate": 5.9162671470809343e-08, + "loss": 0.43207693099975586, + "step": 1764 + }, + { + "epoch": 0.9676535087719298, + "grad_norm": 0.5528680086135864, + "learning_rate": 5.720888335138508e-08, + "loss": 0.43807539343833923, + "step": 1765 + }, + { + "epoch": 0.9682017543859649, + "grad_norm": 0.5016975402832031, + "learning_rate": 5.528780761377306e-08, + "loss": 0.46113985776901245, + "step": 1766 + }, + { + "epoch": 0.96875, + "grad_norm": 0.4595453143119812, + "learning_rate": 5.339945057842966e-08, + "loss": 0.4475017488002777, + "step": 1767 + }, + { + "epoch": 0.9692982456140351, + "grad_norm": 0.49467697739601135, + "learning_rate": 5.15438184581607e-08, + "loss": 0.4496666193008423, + "step": 1768 + }, + { + "epoch": 0.9698464912280702, + "grad_norm": 0.4644295573234558, + "learning_rate": 4.9720917358111464e-08, + "loss": 0.44429510831832886, + "step": 1769 + }, + { + "epoch": 0.9703947368421053, + "grad_norm": 0.4744158089160919, + "learning_rate": 4.7930753275733376e-08, + "loss": 0.4378962516784668, + "step": 1770 + }, + { + "epoch": 0.9709429824561403, + "grad_norm": 0.4634189009666443, + "learning_rate": 4.617333210077513e-08, + "loss": 0.4459246098995209, + "step": 1771 + }, + { + "epoch": 0.9714912280701754, + "grad_norm": 0.4679155647754669, + "learning_rate": 4.444865961525824e-08, + "loss": 0.4505774974822998, + "step": 1772 + }, + { + "epoch": 0.9720394736842105, + "grad_norm": 0.488528311252594, + "learning_rate": 4.275674149345821e-08, + "loss": 0.4513344466686249, + "step": 1773 + }, + { + "epoch": 0.9725877192982456, + "grad_norm": 0.4505665898323059, + "learning_rate": 4.109758330188895e-08, + "loss": 0.44093236327171326, + "step": 1774 + }, + { + "epoch": 0.9731359649122807, + "grad_norm": 0.4865361750125885, + "learning_rate": 3.94711904992795e-08, + "loss": 0.4391603171825409, + "step": 1775 + }, + { + "epoch": 0.9736842105263158, + "grad_norm": 0.46886420249938965, + "learning_rate": 3.787756843656287e-08, + "loss": 0.4503663182258606, + "step": 1776 + }, + { + "epoch": 0.9742324561403509, + "grad_norm": 0.46422988176345825, + "learning_rate": 3.6316722356848354e-08, + "loss": 0.4432747960090637, + "step": 1777 + }, + { + "epoch": 0.9747807017543859, + "grad_norm": 0.4637066423892975, + "learning_rate": 3.4788657395414813e-08, + "loss": 0.4446980953216553, + "step": 1778 + }, + { + "epoch": 0.975328947368421, + "grad_norm": 0.48918765783309937, + "learning_rate": 3.3293378579688505e-08, + "loss": 0.43319782614707947, + "step": 1779 + }, + { + "epoch": 0.9758771929824561, + "grad_norm": 0.4590325951576233, + "learning_rate": 3.1830890829226416e-08, + "loss": 0.44327086210250854, + "step": 1780 + }, + { + "epoch": 0.9764254385964912, + "grad_norm": 0.450764536857605, + "learning_rate": 3.0401198955702926e-08, + "loss": 0.44975000619888306, + "step": 1781 + }, + { + "epoch": 0.9769736842105263, + "grad_norm": 0.45482075214385986, + "learning_rate": 2.9004307662887642e-08, + "loss": 0.44603753089904785, + "step": 1782 + }, + { + "epoch": 0.9775219298245614, + "grad_norm": 0.45557013154029846, + "learning_rate": 2.76402215466387e-08, + "loss": 0.4406141936779022, + "step": 1783 + }, + { + "epoch": 0.9780701754385965, + "grad_norm": 0.48736274242401123, + "learning_rate": 2.6308945094880577e-08, + "loss": 0.4546322524547577, + "step": 1784 + }, + { + "epoch": 0.9786184210526315, + "grad_norm": 0.4766358435153961, + "learning_rate": 2.5010482687595206e-08, + "loss": 0.43386587500572205, + "step": 1785 + }, + { + "epoch": 0.9791666666666666, + "grad_norm": 0.5122347474098206, + "learning_rate": 2.3744838596799767e-08, + "loss": 0.42556485533714294, + "step": 1786 + }, + { + "epoch": 0.9797149122807017, + "grad_norm": 0.4523909389972687, + "learning_rate": 2.251201698654115e-08, + "loss": 0.4424014985561371, + "step": 1787 + }, + { + "epoch": 0.9802631578947368, + "grad_norm": 0.48024672269821167, + "learning_rate": 2.1312021912875957e-08, + "loss": 0.44508880376815796, + "step": 1788 + }, + { + "epoch": 0.9808114035087719, + "grad_norm": 0.47597408294677734, + "learning_rate": 2.01448573238594e-08, + "loss": 0.4251779317855835, + "step": 1789 + }, + { + "epoch": 0.981359649122807, + "grad_norm": 0.49238336086273193, + "learning_rate": 1.901052705953532e-08, + "loss": 0.4298170208930969, + "step": 1790 + }, + { + "epoch": 0.9819078947368421, + "grad_norm": 0.4651866853237152, + "learning_rate": 1.7909034851917306e-08, + "loss": 0.42286503314971924, + "step": 1791 + }, + { + "epoch": 0.9824561403508771, + "grad_norm": 0.4722592830657959, + "learning_rate": 1.684038432498092e-08, + "loss": 0.428172767162323, + "step": 1792 + }, + { + "epoch": 0.9830043859649122, + "grad_norm": 0.4582705795764923, + "learning_rate": 1.5804578994652598e-08, + "loss": 0.4320406913757324, + "step": 1793 + }, + { + "epoch": 0.9835526315789473, + "grad_norm": 0.46447935700416565, + "learning_rate": 1.4801622268791893e-08, + "loss": 0.44131776690483093, + "step": 1794 + }, + { + "epoch": 0.9841008771929824, + "grad_norm": 0.4600572884082794, + "learning_rate": 1.3831517447188137e-08, + "loss": 0.46617740392684937, + "step": 1795 + }, + { + "epoch": 0.9846491228070176, + "grad_norm": 0.46110910177230835, + "learning_rate": 1.2894267721543786e-08, + "loss": 0.4432889223098755, + "step": 1796 + }, + { + "epoch": 0.9851973684210527, + "grad_norm": 0.45410728454589844, + "learning_rate": 1.1989876175468872e-08, + "loss": 0.42493683099746704, + "step": 1797 + }, + { + "epoch": 0.9857456140350878, + "grad_norm": 0.47796985507011414, + "learning_rate": 1.1118345784463247e-08, + "loss": 0.42538389563560486, + "step": 1798 + }, + { + "epoch": 0.9862938596491229, + "grad_norm": 0.4609275460243225, + "learning_rate": 1.0279679415916565e-08, + "loss": 0.45832300186157227, + "step": 1799 + }, + { + "epoch": 0.9868421052631579, + "grad_norm": 0.4755944013595581, + "learning_rate": 9.473879829091648e-09, + "loss": 0.4499424397945404, + "step": 1800 + }, + { + "epoch": 0.987390350877193, + "grad_norm": 0.4787357449531555, + "learning_rate": 8.700949675115588e-09, + "loss": 0.44248414039611816, + "step": 1801 + }, + { + "epoch": 0.9879385964912281, + "grad_norm": 0.45411446690559387, + "learning_rate": 7.960891496977541e-09, + "loss": 0.4389973282814026, + "step": 1802 + }, + { + "epoch": 0.9884868421052632, + "grad_norm": 0.4596308469772339, + "learning_rate": 7.253707729513171e-09, + "loss": 0.4519648253917694, + "step": 1803 + }, + { + "epoch": 0.9890350877192983, + "grad_norm": 0.4609762728214264, + "learning_rate": 6.579400699397998e-09, + "loss": 0.44419950246810913, + "step": 1804 + }, + { + "epoch": 0.9895833333333334, + "grad_norm": 0.4751090705394745, + "learning_rate": 5.937972625145172e-09, + "loss": 0.46066397428512573, + "step": 1805 + }, + { + "epoch": 0.9901315789473685, + "grad_norm": 0.4669820964336395, + "learning_rate": 5.329425617088824e-09, + "loss": 0.43845662474632263, + "step": 1806 + }, + { + "epoch": 0.9906798245614035, + "grad_norm": 0.46809566020965576, + "learning_rate": 4.753761677388502e-09, + "loss": 0.4518412947654724, + "step": 1807 + }, + { + "epoch": 0.9912280701754386, + "grad_norm": 0.4599871039390564, + "learning_rate": 4.2109827000103024e-09, + "loss": 0.4281280040740967, + "step": 1808 + }, + { + "epoch": 0.9917763157894737, + "grad_norm": 0.4735415279865265, + "learning_rate": 3.7010904707313057e-09, + "loss": 0.44415637850761414, + "step": 1809 + }, + { + "epoch": 0.9923245614035088, + "grad_norm": 0.4610520005226135, + "learning_rate": 3.224086667128479e-09, + "loss": 0.44374585151672363, + "step": 1810 + }, + { + "epoch": 0.9928728070175439, + "grad_norm": 0.4630661606788635, + "learning_rate": 2.7799728585709005e-09, + "loss": 0.43956732749938965, + "step": 1811 + }, + { + "epoch": 0.993421052631579, + "grad_norm": 0.46847841143608093, + "learning_rate": 2.3687505062208737e-09, + "loss": 0.4338926672935486, + "step": 1812 + }, + { + "epoch": 0.9939692982456141, + "grad_norm": 0.4724494218826294, + "learning_rate": 1.9904209630250417e-09, + "loss": 0.4389767050743103, + "step": 1813 + }, + { + "epoch": 0.9945175438596491, + "grad_norm": 0.47051912546157837, + "learning_rate": 1.644985473709948e-09, + "loss": 0.4498752951622009, + "step": 1814 + }, + { + "epoch": 0.9950657894736842, + "grad_norm": 0.44187289476394653, + "learning_rate": 1.3324451747798173e-09, + "loss": 0.44881471991539, + "step": 1815 + }, + { + "epoch": 0.9956140350877193, + "grad_norm": 0.4773845076560974, + "learning_rate": 1.0528010945098921e-09, + "loss": 0.44216442108154297, + "step": 1816 + }, + { + "epoch": 0.9961622807017544, + "grad_norm": 0.44168299436569214, + "learning_rate": 8.060541529464339e-10, + "loss": 0.45296916365623474, + "step": 1817 + }, + { + "epoch": 0.9967105263157895, + "grad_norm": 0.4654878079891205, + "learning_rate": 5.922051619033919e-10, + "loss": 0.44899439811706543, + "step": 1818 + }, + { + "epoch": 0.9972587719298246, + "grad_norm": 0.47827789187431335, + "learning_rate": 4.112548249546322e-10, + "loss": 0.44498586654663086, + "step": 1819 + }, + { + "epoch": 0.9978070175438597, + "grad_norm": 0.48996180295944214, + "learning_rate": 2.632037374383778e-10, + "loss": 0.4426143169403076, + "step": 1820 + }, + { + "epoch": 0.9983552631578947, + "grad_norm": 0.4770708680152893, + "learning_rate": 1.480523864516581e-10, + "loss": 0.44537267088890076, + "step": 1821 + }, + { + "epoch": 0.9989035087719298, + "grad_norm": 0.5157729387283325, + "learning_rate": 6.580115084919847e-11, + "loss": 0.4398488402366638, + "step": 1822 + }, + { + "epoch": 0.9994517543859649, + "grad_norm": 0.48881420493125916, + "learning_rate": 1.6450301243420285e-11, + "loss": 0.437910258769989, + "step": 1823 + }, + { + "epoch": 1.0, + "grad_norm": 0.4549266993999481, + "learning_rate": 0.0, + "loss": 0.4407366216182709, + "step": 1824 + } + ], + "logging_steps": 1, + "max_steps": 1824, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1659437929638186e+20, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}