{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.309685764109434, "eval_steps": 500, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018477670889584369, "grad_norm": 0.24088887870311737, "learning_rate": 0.0, "loss": 2.3987152576446533, "step": 1 }, { "epoch": 0.00036955341779168737, "grad_norm": 0.26547858119010925, "learning_rate": 4.098360655737705e-08, "loss": 2.774164915084839, "step": 2 }, { "epoch": 0.000554330126687531, "grad_norm": 0.24729517102241516, "learning_rate": 8.19672131147541e-08, "loss": 2.3681344985961914, "step": 3 }, { "epoch": 0.0007391068355833747, "grad_norm": 0.2252364456653595, "learning_rate": 1.2295081967213116e-07, "loss": 2.347365617752075, "step": 4 }, { "epoch": 0.0009238835444792183, "grad_norm": 0.24495361745357513, "learning_rate": 1.639344262295082e-07, "loss": 2.149761199951172, "step": 5 }, { "epoch": 0.001108660253375062, "grad_norm": 0.27319467067718506, "learning_rate": 2.0491803278688524e-07, "loss": 2.494878053665161, "step": 6 }, { "epoch": 0.0012934369622709058, "grad_norm": 0.2361554354429245, "learning_rate": 2.459016393442623e-07, "loss": 2.5770766735076904, "step": 7 }, { "epoch": 0.0014782136711667495, "grad_norm": 0.22133229672908783, "learning_rate": 2.8688524590163937e-07, "loss": 2.042956829071045, "step": 8 }, { "epoch": 0.0016629903800625932, "grad_norm": 0.22019384801387787, "learning_rate": 3.278688524590164e-07, "loss": 2.4631993770599365, "step": 9 }, { "epoch": 0.0018477670889584367, "grad_norm": 0.2294536978006363, "learning_rate": 3.6885245901639347e-07, "loss": 2.508697986602783, "step": 10 }, { "epoch": 0.0020325437978542804, "grad_norm": 0.31911879777908325, "learning_rate": 4.0983606557377047e-07, "loss": 3.188476085662842, "step": 11 }, { "epoch": 0.002217320506750124, "grad_norm": 0.2536254823207855, "learning_rate": 4.508196721311476e-07, "loss": 2.5951364040374756, "step": 12 }, { "epoch": 0.002402097215645968, "grad_norm": 0.2997930347919464, "learning_rate": 4.918032786885246e-07, "loss": 2.9771904945373535, "step": 13 }, { "epoch": 0.0025868739245418115, "grad_norm": 0.21778813004493713, "learning_rate": 5.327868852459017e-07, "loss": 2.44155216217041, "step": 14 }, { "epoch": 0.0027716506334376553, "grad_norm": 0.3283197283744812, "learning_rate": 5.737704918032787e-07, "loss": 2.663536787033081, "step": 15 }, { "epoch": 0.002956427342333499, "grad_norm": 0.25381243228912354, "learning_rate": 6.147540983606558e-07, "loss": 2.3754961490631104, "step": 16 }, { "epoch": 0.0031412040512293427, "grad_norm": 0.33151787519454956, "learning_rate": 6.557377049180328e-07, "loss": 3.3958041667938232, "step": 17 }, { "epoch": 0.0033259807601251864, "grad_norm": 0.3099863827228546, "learning_rate": 6.967213114754098e-07, "loss": 2.711893320083618, "step": 18 }, { "epoch": 0.00351075746902103, "grad_norm": 0.2153325229883194, "learning_rate": 7.377049180327869e-07, "loss": 2.202052116394043, "step": 19 }, { "epoch": 0.0036955341779168734, "grad_norm": 0.24825315177440643, "learning_rate": 7.78688524590164e-07, "loss": 2.1632490158081055, "step": 20 }, { "epoch": 0.003880310886812717, "grad_norm": 0.27885496616363525, "learning_rate": 8.196721311475409e-07, "loss": 2.8247857093811035, "step": 21 }, { "epoch": 0.004065087595708561, "grad_norm": 0.3095180094242096, "learning_rate": 8.606557377049181e-07, "loss": 2.9461660385131836, "step": 22 }, { "epoch": 0.004249864304604405, "grad_norm": 0.2433432936668396, "learning_rate": 9.016393442622952e-07, "loss": 2.6200363636016846, "step": 23 }, { "epoch": 0.004434641013500248, "grad_norm": 0.29434219002723694, "learning_rate": 9.426229508196721e-07, "loss": 2.5439352989196777, "step": 24 }, { "epoch": 0.004619417722396092, "grad_norm": 0.2591114342212677, "learning_rate": 9.836065573770493e-07, "loss": 2.057875394821167, "step": 25 }, { "epoch": 0.004804194431291936, "grad_norm": 0.2714301645755768, "learning_rate": 1.0245901639344263e-06, "loss": 2.739349365234375, "step": 26 }, { "epoch": 0.004988971140187779, "grad_norm": 0.24641598761081696, "learning_rate": 1.0655737704918034e-06, "loss": 2.5039243698120117, "step": 27 }, { "epoch": 0.005173747849083623, "grad_norm": 0.32515525817871094, "learning_rate": 1.1065573770491804e-06, "loss": 2.549954652786255, "step": 28 }, { "epoch": 0.005358524557979466, "grad_norm": 0.3026675879955292, "learning_rate": 1.1475409836065575e-06, "loss": 2.81745982170105, "step": 29 }, { "epoch": 0.0055433012668753105, "grad_norm": 0.2552269995212555, "learning_rate": 1.1885245901639345e-06, "loss": 2.684337615966797, "step": 30 }, { "epoch": 0.005728077975771154, "grad_norm": 0.26018139719963074, "learning_rate": 1.2295081967213116e-06, "loss": 2.580073833465576, "step": 31 }, { "epoch": 0.005912854684666998, "grad_norm": 0.25323620438575745, "learning_rate": 1.2704918032786886e-06, "loss": 2.737957000732422, "step": 32 }, { "epoch": 0.006097631393562841, "grad_norm": 0.2035834640264511, "learning_rate": 1.3114754098360657e-06, "loss": 2.101224422454834, "step": 33 }, { "epoch": 0.006282408102458685, "grad_norm": 0.2545967102050781, "learning_rate": 1.352459016393443e-06, "loss": 2.7195825576782227, "step": 34 }, { "epoch": 0.006467184811354529, "grad_norm": 0.22219978272914886, "learning_rate": 1.3934426229508196e-06, "loss": 2.2762818336486816, "step": 35 }, { "epoch": 0.006651961520250373, "grad_norm": 0.21565257012844086, "learning_rate": 1.4344262295081968e-06, "loss": 2.105567216873169, "step": 36 }, { "epoch": 0.006836738229146216, "grad_norm": 0.21315789222717285, "learning_rate": 1.4754098360655739e-06, "loss": 2.218881607055664, "step": 37 }, { "epoch": 0.00702151493804206, "grad_norm": 0.23275326192378998, "learning_rate": 1.516393442622951e-06, "loss": 2.6363465785980225, "step": 38 }, { "epoch": 0.0072062916469379035, "grad_norm": 0.20184484124183655, "learning_rate": 1.557377049180328e-06, "loss": 2.000232458114624, "step": 39 }, { "epoch": 0.007391068355833747, "grad_norm": 0.18502052128314972, "learning_rate": 1.5983606557377053e-06, "loss": 2.02783465385437, "step": 40 }, { "epoch": 0.007575845064729591, "grad_norm": 0.23034077882766724, "learning_rate": 1.6393442622950819e-06, "loss": 2.4156503677368164, "step": 41 }, { "epoch": 0.007760621773625434, "grad_norm": 0.1947338730096817, "learning_rate": 1.6803278688524592e-06, "loss": 2.017721652984619, "step": 42 }, { "epoch": 0.007945398482521277, "grad_norm": 0.2304108589887619, "learning_rate": 1.7213114754098362e-06, "loss": 2.479938268661499, "step": 43 }, { "epoch": 0.008130175191417122, "grad_norm": 0.20530247688293457, "learning_rate": 1.7622950819672133e-06, "loss": 2.1011605262756348, "step": 44 }, { "epoch": 0.008314951900312966, "grad_norm": 0.2010030895471573, "learning_rate": 1.8032786885245903e-06, "loss": 2.2112722396850586, "step": 45 }, { "epoch": 0.00849972860920881, "grad_norm": 0.23062516748905182, "learning_rate": 1.8442622950819674e-06, "loss": 2.625047445297241, "step": 46 }, { "epoch": 0.008684505318104652, "grad_norm": 0.1795029640197754, "learning_rate": 1.8852459016393442e-06, "loss": 2.0575997829437256, "step": 47 }, { "epoch": 0.008869282027000496, "grad_norm": 0.19284705817699432, "learning_rate": 1.9262295081967215e-06, "loss": 2.186389923095703, "step": 48 }, { "epoch": 0.00905405873589634, "grad_norm": 0.18980354070663452, "learning_rate": 1.9672131147540985e-06, "loss": 2.1369142532348633, "step": 49 }, { "epoch": 0.009238835444792185, "grad_norm": 0.20028269290924072, "learning_rate": 2.0081967213114756e-06, "loss": 2.149282217025757, "step": 50 }, { "epoch": 0.009423612153688027, "grad_norm": 0.1844315528869629, "learning_rate": 2.0491803278688526e-06, "loss": 2.1714837551116943, "step": 51 }, { "epoch": 0.009608388862583871, "grad_norm": 0.19009949266910553, "learning_rate": 2.0901639344262297e-06, "loss": 2.0276408195495605, "step": 52 }, { "epoch": 0.009793165571479715, "grad_norm": 0.1857149451971054, "learning_rate": 2.1311475409836067e-06, "loss": 2.414680004119873, "step": 53 }, { "epoch": 0.009977942280375558, "grad_norm": 0.14500652253627777, "learning_rate": 2.1721311475409838e-06, "loss": 1.8302123546600342, "step": 54 }, { "epoch": 0.010162718989271402, "grad_norm": 0.1605122685432434, "learning_rate": 2.213114754098361e-06, "loss": 2.0301051139831543, "step": 55 }, { "epoch": 0.010347495698167246, "grad_norm": 0.13292397558689117, "learning_rate": 2.254098360655738e-06, "loss": 1.6478054523468018, "step": 56 }, { "epoch": 0.01053227240706309, "grad_norm": 0.19711221754550934, "learning_rate": 2.295081967213115e-06, "loss": 2.2585537433624268, "step": 57 }, { "epoch": 0.010717049115958933, "grad_norm": 0.1681194305419922, "learning_rate": 2.336065573770492e-06, "loss": 1.8417537212371826, "step": 58 }, { "epoch": 0.010901825824854777, "grad_norm": 0.15966543555259705, "learning_rate": 2.377049180327869e-06, "loss": 1.9968725442886353, "step": 59 }, { "epoch": 0.011086602533750621, "grad_norm": 0.15611667931079865, "learning_rate": 2.418032786885246e-06, "loss": 1.7798278331756592, "step": 60 }, { "epoch": 0.011271379242646465, "grad_norm": 0.1719726324081421, "learning_rate": 2.459016393442623e-06, "loss": 2.1844446659088135, "step": 61 }, { "epoch": 0.011456155951542308, "grad_norm": 0.13560648262500763, "learning_rate": 2.5e-06, "loss": 1.4112403392791748, "step": 62 }, { "epoch": 0.011640932660438152, "grad_norm": 0.10778555274009705, "learning_rate": 2.5409836065573773e-06, "loss": 1.4861712455749512, "step": 63 }, { "epoch": 0.011825709369333996, "grad_norm": 0.12593482434749603, "learning_rate": 2.5819672131147543e-06, "loss": 1.7662166357040405, "step": 64 }, { "epoch": 0.012010486078229838, "grad_norm": 0.0996285229921341, "learning_rate": 2.6229508196721314e-06, "loss": 1.5829087495803833, "step": 65 }, { "epoch": 0.012195262787125682, "grad_norm": 0.12871770560741425, "learning_rate": 2.6639344262295084e-06, "loss": 1.5366804599761963, "step": 66 }, { "epoch": 0.012380039496021527, "grad_norm": 0.11756619065999985, "learning_rate": 2.704918032786886e-06, "loss": 1.3947206735610962, "step": 67 }, { "epoch": 0.01256481620491737, "grad_norm": 0.15036584436893463, "learning_rate": 2.745901639344263e-06, "loss": 1.607935905456543, "step": 68 }, { "epoch": 0.012749592913813213, "grad_norm": 0.09538834542036057, "learning_rate": 2.786885245901639e-06, "loss": 1.2980877161026, "step": 69 }, { "epoch": 0.012934369622709057, "grad_norm": 0.12562216818332672, "learning_rate": 2.8278688524590166e-06, "loss": 1.7359843254089355, "step": 70 }, { "epoch": 0.013119146331604901, "grad_norm": 0.09023502469062805, "learning_rate": 2.8688524590163937e-06, "loss": 1.2565981149673462, "step": 71 }, { "epoch": 0.013303923040500746, "grad_norm": 0.11920010298490524, "learning_rate": 2.9098360655737707e-06, "loss": 1.6668967008590698, "step": 72 }, { "epoch": 0.013488699749396588, "grad_norm": 0.10211700946092606, "learning_rate": 2.9508196721311478e-06, "loss": 1.5361316204071045, "step": 73 }, { "epoch": 0.013673476458292432, "grad_norm": 0.1129099503159523, "learning_rate": 2.991803278688525e-06, "loss": 1.5233663320541382, "step": 74 }, { "epoch": 0.013858253167188276, "grad_norm": 0.09841630607843399, "learning_rate": 3.032786885245902e-06, "loss": 1.3280178308486938, "step": 75 }, { "epoch": 0.01404302987608412, "grad_norm": 0.1030346006155014, "learning_rate": 3.073770491803279e-06, "loss": 1.3855637311935425, "step": 76 }, { "epoch": 0.014227806584979963, "grad_norm": 0.1166791021823883, "learning_rate": 3.114754098360656e-06, "loss": 1.692766547203064, "step": 77 }, { "epoch": 0.014412583293875807, "grad_norm": 0.1157117560505867, "learning_rate": 3.155737704918033e-06, "loss": 1.5444836616516113, "step": 78 }, { "epoch": 0.014597360002771651, "grad_norm": 0.1203937903046608, "learning_rate": 3.1967213114754105e-06, "loss": 1.7095307111740112, "step": 79 }, { "epoch": 0.014782136711667494, "grad_norm": 0.09563681483268738, "learning_rate": 3.2377049180327876e-06, "loss": 1.3926582336425781, "step": 80 }, { "epoch": 0.014966913420563338, "grad_norm": 0.13222035765647888, "learning_rate": 3.2786885245901638e-06, "loss": 1.6481196880340576, "step": 81 }, { "epoch": 0.015151690129459182, "grad_norm": 0.12444092333316803, "learning_rate": 3.3196721311475413e-06, "loss": 1.6188586950302124, "step": 82 }, { "epoch": 0.015336466838355026, "grad_norm": 0.1225326806306839, "learning_rate": 3.3606557377049183e-06, "loss": 1.6079766750335693, "step": 83 }, { "epoch": 0.015521243547250868, "grad_norm": 0.07850661873817444, "learning_rate": 3.4016393442622954e-06, "loss": 1.0474170446395874, "step": 84 }, { "epoch": 0.015706020256146713, "grad_norm": 0.10204868763685226, "learning_rate": 3.4426229508196724e-06, "loss": 1.6754392385482788, "step": 85 }, { "epoch": 0.015890796965042555, "grad_norm": 0.12254143506288528, "learning_rate": 3.4836065573770495e-06, "loss": 1.6609466075897217, "step": 86 }, { "epoch": 0.0160755736739384, "grad_norm": 0.10189526528120041, "learning_rate": 3.5245901639344265e-06, "loss": 1.5539140701293945, "step": 87 }, { "epoch": 0.016260350382834243, "grad_norm": 0.09892205893993378, "learning_rate": 3.5655737704918036e-06, "loss": 1.3632971048355103, "step": 88 }, { "epoch": 0.016445127091730086, "grad_norm": 0.11935164034366608, "learning_rate": 3.6065573770491806e-06, "loss": 1.5281620025634766, "step": 89 }, { "epoch": 0.01662990380062593, "grad_norm": 0.10507043451070786, "learning_rate": 3.6475409836065577e-06, "loss": 1.451995849609375, "step": 90 }, { "epoch": 0.016814680509521774, "grad_norm": 0.12459581345319748, "learning_rate": 3.6885245901639347e-06, "loss": 1.6090208292007446, "step": 91 }, { "epoch": 0.01699945721841762, "grad_norm": 0.1009344607591629, "learning_rate": 3.729508196721312e-06, "loss": 1.2886296510696411, "step": 92 }, { "epoch": 0.017184233927313462, "grad_norm": 0.12125498056411743, "learning_rate": 3.7704918032786884e-06, "loss": 1.5595990419387817, "step": 93 }, { "epoch": 0.017369010636209305, "grad_norm": 0.08220856636762619, "learning_rate": 3.811475409836066e-06, "loss": 1.2803837060928345, "step": 94 }, { "epoch": 0.01755378734510515, "grad_norm": 0.09604591131210327, "learning_rate": 3.852459016393443e-06, "loss": 1.0359008312225342, "step": 95 }, { "epoch": 0.017738564054000993, "grad_norm": 0.0831838995218277, "learning_rate": 3.8934426229508196e-06, "loss": 1.4928193092346191, "step": 96 }, { "epoch": 0.017923340762896835, "grad_norm": 0.10466645658016205, "learning_rate": 3.934426229508197e-06, "loss": 1.3826165199279785, "step": 97 }, { "epoch": 0.01810811747179268, "grad_norm": 0.0696515217423439, "learning_rate": 3.975409836065574e-06, "loss": 1.0134645700454712, "step": 98 }, { "epoch": 0.018292894180688524, "grad_norm": 0.11665759235620499, "learning_rate": 4.016393442622951e-06, "loss": 1.7774734497070312, "step": 99 }, { "epoch": 0.01847767088958437, "grad_norm": 0.09961281716823578, "learning_rate": 4.057377049180329e-06, "loss": 1.40458345413208, "step": 100 }, { "epoch": 0.018662447598480212, "grad_norm": 0.12503457069396973, "learning_rate": 4.098360655737705e-06, "loss": 1.4400181770324707, "step": 101 }, { "epoch": 0.018847224307376054, "grad_norm": 0.0715487003326416, "learning_rate": 4.139344262295083e-06, "loss": 1.164671778678894, "step": 102 }, { "epoch": 0.0190320010162719, "grad_norm": 0.11996988952159882, "learning_rate": 4.180327868852459e-06, "loss": 1.4990315437316895, "step": 103 }, { "epoch": 0.019216777725167743, "grad_norm": 0.10172359645366669, "learning_rate": 4.221311475409837e-06, "loss": 1.3712670803070068, "step": 104 }, { "epoch": 0.019401554434063585, "grad_norm": 0.11445247381925583, "learning_rate": 4.2622950819672135e-06, "loss": 1.4764546155929565, "step": 105 }, { "epoch": 0.01958633114295943, "grad_norm": 0.14201143383979797, "learning_rate": 4.30327868852459e-06, "loss": 1.818518042564392, "step": 106 }, { "epoch": 0.019771107851855273, "grad_norm": 0.09214181452989578, "learning_rate": 4.3442622950819676e-06, "loss": 1.3146964311599731, "step": 107 }, { "epoch": 0.019955884560751116, "grad_norm": 0.1036999523639679, "learning_rate": 4.385245901639344e-06, "loss": 1.4357385635375977, "step": 108 }, { "epoch": 0.02014066126964696, "grad_norm": 0.10278178751468658, "learning_rate": 4.426229508196722e-06, "loss": 1.4627935886383057, "step": 109 }, { "epoch": 0.020325437978542804, "grad_norm": 0.08426320552825928, "learning_rate": 4.467213114754098e-06, "loss": 1.5069490671157837, "step": 110 }, { "epoch": 0.02051021468743865, "grad_norm": 0.07356536388397217, "learning_rate": 4.508196721311476e-06, "loss": 0.9656306505203247, "step": 111 }, { "epoch": 0.020694991396334492, "grad_norm": 0.08690712600946426, "learning_rate": 4.549180327868853e-06, "loss": 1.2329204082489014, "step": 112 }, { "epoch": 0.020879768105230335, "grad_norm": 0.08951961249113083, "learning_rate": 4.59016393442623e-06, "loss": 1.258447289466858, "step": 113 }, { "epoch": 0.02106454481412618, "grad_norm": 0.09533683955669403, "learning_rate": 4.631147540983607e-06, "loss": 1.3724256753921509, "step": 114 }, { "epoch": 0.021249321523022023, "grad_norm": 0.09054508060216904, "learning_rate": 4.672131147540984e-06, "loss": 1.235846996307373, "step": 115 }, { "epoch": 0.021434098231917866, "grad_norm": 0.09722615778446198, "learning_rate": 4.7131147540983615e-06, "loss": 1.3113802671432495, "step": 116 }, { "epoch": 0.02161887494081371, "grad_norm": 0.08708129078149796, "learning_rate": 4.754098360655738e-06, "loss": 1.0961401462554932, "step": 117 }, { "epoch": 0.021803651649709554, "grad_norm": 0.08968407660722733, "learning_rate": 4.795081967213115e-06, "loss": 1.1457524299621582, "step": 118 }, { "epoch": 0.021988428358605396, "grad_norm": 0.09171068668365479, "learning_rate": 4.836065573770492e-06, "loss": 1.076693058013916, "step": 119 }, { "epoch": 0.022173205067501242, "grad_norm": 0.09295544773340225, "learning_rate": 4.877049180327869e-06, "loss": 1.3223167657852173, "step": 120 }, { "epoch": 0.022357981776397085, "grad_norm": 0.0667370930314064, "learning_rate": 4.918032786885246e-06, "loss": 0.8422390222549438, "step": 121 }, { "epoch": 0.02254275848529293, "grad_norm": 0.08461110293865204, "learning_rate": 4.959016393442623e-06, "loss": 1.0604937076568604, "step": 122 }, { "epoch": 0.022727535194188773, "grad_norm": 0.11348158866167068, "learning_rate": 5e-06, "loss": 1.3220497369766235, "step": 123 }, { "epoch": 0.022912311903084615, "grad_norm": 0.08554375171661377, "learning_rate": 5.040983606557377e-06, "loss": 1.2320774793624878, "step": 124 }, { "epoch": 0.02309708861198046, "grad_norm": 0.09483008086681366, "learning_rate": 5.0819672131147545e-06, "loss": 1.144835352897644, "step": 125 }, { "epoch": 0.023281865320876304, "grad_norm": 0.12047784775495529, "learning_rate": 5.122950819672131e-06, "loss": 1.6951477527618408, "step": 126 }, { "epoch": 0.023466642029772146, "grad_norm": 0.10329974442720413, "learning_rate": 5.163934426229509e-06, "loss": 1.4667521715164185, "step": 127 }, { "epoch": 0.023651418738667992, "grad_norm": 0.09008090943098068, "learning_rate": 5.204918032786885e-06, "loss": 1.2631990909576416, "step": 128 }, { "epoch": 0.023836195447563834, "grad_norm": 0.08008470386266708, "learning_rate": 5.245901639344263e-06, "loss": 1.1619493961334229, "step": 129 }, { "epoch": 0.024020972156459677, "grad_norm": 0.1004214957356453, "learning_rate": 5.286885245901639e-06, "loss": 1.3743982315063477, "step": 130 }, { "epoch": 0.024205748865355523, "grad_norm": 0.08813057094812393, "learning_rate": 5.327868852459017e-06, "loss": 0.9727039337158203, "step": 131 }, { "epoch": 0.024390525574251365, "grad_norm": 0.095633365213871, "learning_rate": 5.3688524590163935e-06, "loss": 1.322685718536377, "step": 132 }, { "epoch": 0.02457530228314721, "grad_norm": 0.0931473970413208, "learning_rate": 5.409836065573772e-06, "loss": 1.4433943033218384, "step": 133 }, { "epoch": 0.024760078992043053, "grad_norm": 0.08528053015470505, "learning_rate": 5.4508196721311476e-06, "loss": 1.325724482536316, "step": 134 }, { "epoch": 0.024944855700938896, "grad_norm": 0.07085525244474411, "learning_rate": 5.491803278688526e-06, "loss": 0.9489492177963257, "step": 135 }, { "epoch": 0.02512963240983474, "grad_norm": 0.08872570842504501, "learning_rate": 5.5327868852459025e-06, "loss": 1.2950146198272705, "step": 136 }, { "epoch": 0.025314409118730584, "grad_norm": 0.07972602546215057, "learning_rate": 5.573770491803278e-06, "loss": 1.0334540605545044, "step": 137 }, { "epoch": 0.025499185827626426, "grad_norm": 0.08055080473423004, "learning_rate": 5.614754098360657e-06, "loss": 0.9743032455444336, "step": 138 }, { "epoch": 0.025683962536522272, "grad_norm": 0.07908285409212112, "learning_rate": 5.655737704918033e-06, "loss": 1.0162171125411987, "step": 139 }, { "epoch": 0.025868739245418115, "grad_norm": 0.10497123003005981, "learning_rate": 5.696721311475411e-06, "loss": 1.3711857795715332, "step": 140 }, { "epoch": 0.026053515954313957, "grad_norm": 0.09259134531021118, "learning_rate": 5.737704918032787e-06, "loss": 1.1905478239059448, "step": 141 }, { "epoch": 0.026238292663209803, "grad_norm": 0.09423007816076279, "learning_rate": 5.778688524590165e-06, "loss": 1.2282381057739258, "step": 142 }, { "epoch": 0.026423069372105645, "grad_norm": 0.08443727344274521, "learning_rate": 5.8196721311475415e-06, "loss": 1.3001261949539185, "step": 143 }, { "epoch": 0.02660784608100149, "grad_norm": 0.0933670923113823, "learning_rate": 5.860655737704919e-06, "loss": 1.2991963624954224, "step": 144 }, { "epoch": 0.026792622789897334, "grad_norm": 0.09587407857179642, "learning_rate": 5.9016393442622956e-06, "loss": 1.064162015914917, "step": 145 }, { "epoch": 0.026977399498793176, "grad_norm": 0.08887308090925217, "learning_rate": 5.942622950819673e-06, "loss": 1.1303232908248901, "step": 146 }, { "epoch": 0.027162176207689022, "grad_norm": 0.0891450047492981, "learning_rate": 5.98360655737705e-06, "loss": 1.1807233095169067, "step": 147 }, { "epoch": 0.027346952916584864, "grad_norm": 0.09064659476280212, "learning_rate": 6.024590163934426e-06, "loss": 1.0990560054779053, "step": 148 }, { "epoch": 0.027531729625480707, "grad_norm": 0.10074954479932785, "learning_rate": 6.065573770491804e-06, "loss": 1.231015920639038, "step": 149 }, { "epoch": 0.027716506334376553, "grad_norm": 0.07731477916240692, "learning_rate": 6.10655737704918e-06, "loss": 1.0035730600357056, "step": 150 }, { "epoch": 0.027901283043272395, "grad_norm": 0.08103454858064651, "learning_rate": 6.147540983606558e-06, "loss": 1.036755919456482, "step": 151 }, { "epoch": 0.02808605975216824, "grad_norm": 0.10394132882356644, "learning_rate": 6.1885245901639345e-06, "loss": 1.3168418407440186, "step": 152 }, { "epoch": 0.028270836461064083, "grad_norm": 0.1187523603439331, "learning_rate": 6.229508196721312e-06, "loss": 1.3754808902740479, "step": 153 }, { "epoch": 0.028455613169959926, "grad_norm": 0.09438347071409225, "learning_rate": 6.270491803278689e-06, "loss": 1.065092921257019, "step": 154 }, { "epoch": 0.02864038987885577, "grad_norm": 0.08473348617553711, "learning_rate": 6.311475409836066e-06, "loss": 1.0317045450210571, "step": 155 }, { "epoch": 0.028825166587751614, "grad_norm": 0.10343526303768158, "learning_rate": 6.352459016393443e-06, "loss": 1.0830442905426025, "step": 156 }, { "epoch": 0.029009943296647456, "grad_norm": 0.11212706565856934, "learning_rate": 6.393442622950821e-06, "loss": 1.4527738094329834, "step": 157 }, { "epoch": 0.029194720005543302, "grad_norm": 0.08572731912136078, "learning_rate": 6.434426229508197e-06, "loss": 1.1567916870117188, "step": 158 }, { "epoch": 0.029379496714439145, "grad_norm": 0.05398239567875862, "learning_rate": 6.475409836065575e-06, "loss": 0.6827751398086548, "step": 159 }, { "epoch": 0.029564273423334987, "grad_norm": 0.11699340492486954, "learning_rate": 6.516393442622952e-06, "loss": 1.2860791683197021, "step": 160 }, { "epoch": 0.029749050132230833, "grad_norm": 0.07042679190635681, "learning_rate": 6.5573770491803276e-06, "loss": 0.7918217182159424, "step": 161 }, { "epoch": 0.029933826841126675, "grad_norm": 0.08320360630750656, "learning_rate": 6.598360655737706e-06, "loss": 1.0839710235595703, "step": 162 }, { "epoch": 0.03011860355002252, "grad_norm": 0.0856919214129448, "learning_rate": 6.6393442622950825e-06, "loss": 0.9883152842521667, "step": 163 }, { "epoch": 0.030303380258918364, "grad_norm": 0.08802906423807144, "learning_rate": 6.68032786885246e-06, "loss": 1.0608097314834595, "step": 164 }, { "epoch": 0.030488156967814206, "grad_norm": 0.08420740067958832, "learning_rate": 6.721311475409837e-06, "loss": 0.9704433083534241, "step": 165 }, { "epoch": 0.030672933676710052, "grad_norm": 0.08119208365678787, "learning_rate": 6.762295081967214e-06, "loss": 0.8476821184158325, "step": 166 }, { "epoch": 0.030857710385605894, "grad_norm": 0.08200537413358688, "learning_rate": 6.803278688524591e-06, "loss": 0.9476600289344788, "step": 167 }, { "epoch": 0.031042487094501737, "grad_norm": 0.08968322724103928, "learning_rate": 6.844262295081968e-06, "loss": 1.2163679599761963, "step": 168 }, { "epoch": 0.031227263803397583, "grad_norm": 0.08660093694925308, "learning_rate": 6.885245901639345e-06, "loss": 1.174856424331665, "step": 169 }, { "epoch": 0.031412040512293425, "grad_norm": 0.09091655910015106, "learning_rate": 6.926229508196722e-06, "loss": 0.9161083102226257, "step": 170 }, { "epoch": 0.03159681722118927, "grad_norm": 0.10828382521867752, "learning_rate": 6.967213114754099e-06, "loss": 1.1203913688659668, "step": 171 }, { "epoch": 0.03178159393008511, "grad_norm": 0.07517366856336594, "learning_rate": 7.0081967213114756e-06, "loss": 1.1747320890426636, "step": 172 }, { "epoch": 0.03196637063898096, "grad_norm": 0.07769443094730377, "learning_rate": 7.049180327868853e-06, "loss": 0.934011697769165, "step": 173 }, { "epoch": 0.0321511473478768, "grad_norm": 0.104559026658535, "learning_rate": 7.09016393442623e-06, "loss": 1.1921355724334717, "step": 174 }, { "epoch": 0.032335924056772644, "grad_norm": 0.08976680040359497, "learning_rate": 7.131147540983607e-06, "loss": 1.234793782234192, "step": 175 }, { "epoch": 0.03252070076566849, "grad_norm": 0.09020117670297623, "learning_rate": 7.172131147540984e-06, "loss": 1.1587923765182495, "step": 176 }, { "epoch": 0.03270547747456433, "grad_norm": 0.08177059888839722, "learning_rate": 7.213114754098361e-06, "loss": 1.0700960159301758, "step": 177 }, { "epoch": 0.03289025418346017, "grad_norm": 0.0836954414844513, "learning_rate": 7.254098360655738e-06, "loss": 0.9924188852310181, "step": 178 }, { "epoch": 0.03307503089235602, "grad_norm": 0.10337381064891815, "learning_rate": 7.295081967213115e-06, "loss": 1.2393605709075928, "step": 179 }, { "epoch": 0.03325980760125186, "grad_norm": 0.08602118492126465, "learning_rate": 7.336065573770492e-06, "loss": 1.0827878713607788, "step": 180 }, { "epoch": 0.033444584310147706, "grad_norm": 0.09046392887830734, "learning_rate": 7.3770491803278695e-06, "loss": 1.1079829931259155, "step": 181 }, { "epoch": 0.03362936101904355, "grad_norm": 0.10092524439096451, "learning_rate": 7.418032786885246e-06, "loss": 1.2582985162734985, "step": 182 }, { "epoch": 0.03381413772793939, "grad_norm": 0.0789152979850769, "learning_rate": 7.459016393442624e-06, "loss": 0.8313584923744202, "step": 183 }, { "epoch": 0.03399891443683524, "grad_norm": 0.09583103656768799, "learning_rate": 7.500000000000001e-06, "loss": 1.1705000400543213, "step": 184 }, { "epoch": 0.03418369114573108, "grad_norm": 0.09844133257865906, "learning_rate": 7.540983606557377e-06, "loss": 1.3607006072998047, "step": 185 }, { "epoch": 0.034368467854626925, "grad_norm": 0.08308165520429611, "learning_rate": 7.581967213114755e-06, "loss": 1.0205916166305542, "step": 186 }, { "epoch": 0.03455324456352277, "grad_norm": 0.1008264496922493, "learning_rate": 7.622950819672132e-06, "loss": 1.3429181575775146, "step": 187 }, { "epoch": 0.03473802127241861, "grad_norm": 0.09739381819963455, "learning_rate": 7.66393442622951e-06, "loss": 1.2232327461242676, "step": 188 }, { "epoch": 0.03492279798131446, "grad_norm": 0.0783327966928482, "learning_rate": 7.704918032786886e-06, "loss": 0.9063292741775513, "step": 189 }, { "epoch": 0.0351075746902103, "grad_norm": 0.07952532172203064, "learning_rate": 7.745901639344263e-06, "loss": 0.9290217161178589, "step": 190 }, { "epoch": 0.035292351399106144, "grad_norm": 0.1293192207813263, "learning_rate": 7.786885245901639e-06, "loss": 1.3529701232910156, "step": 191 }, { "epoch": 0.035477128108001986, "grad_norm": 0.09185554087162018, "learning_rate": 7.827868852459017e-06, "loss": 1.1313979625701904, "step": 192 }, { "epoch": 0.03566190481689783, "grad_norm": 0.10453785955905914, "learning_rate": 7.868852459016394e-06, "loss": 1.2551578283309937, "step": 193 }, { "epoch": 0.03584668152579367, "grad_norm": 0.11565221101045609, "learning_rate": 7.909836065573772e-06, "loss": 1.2458629608154297, "step": 194 }, { "epoch": 0.03603145823468952, "grad_norm": 0.07788601517677307, "learning_rate": 7.950819672131147e-06, "loss": 0.9804476499557495, "step": 195 }, { "epoch": 0.03621623494358536, "grad_norm": 0.10067404806613922, "learning_rate": 7.991803278688526e-06, "loss": 1.265161156654358, "step": 196 }, { "epoch": 0.036401011652481205, "grad_norm": 0.09044502675533295, "learning_rate": 8.032786885245902e-06, "loss": 1.0315583944320679, "step": 197 }, { "epoch": 0.03658578836137705, "grad_norm": 0.11177962273359299, "learning_rate": 8.073770491803279e-06, "loss": 1.254334807395935, "step": 198 }, { "epoch": 0.03677056507027289, "grad_norm": 0.0948624461889267, "learning_rate": 8.114754098360657e-06, "loss": 1.1573119163513184, "step": 199 }, { "epoch": 0.03695534177916874, "grad_norm": 0.09116200357675552, "learning_rate": 8.155737704918034e-06, "loss": 1.2316014766693115, "step": 200 }, { "epoch": 0.03714011848806458, "grad_norm": 0.09280700981616974, "learning_rate": 8.19672131147541e-06, "loss": 1.0697439908981323, "step": 201 }, { "epoch": 0.037324895196960424, "grad_norm": 0.1117481216788292, "learning_rate": 8.237704918032787e-06, "loss": 1.4836028814315796, "step": 202 }, { "epoch": 0.037509671905856266, "grad_norm": 0.0852770209312439, "learning_rate": 8.278688524590165e-06, "loss": 0.7765272855758667, "step": 203 }, { "epoch": 0.03769444861475211, "grad_norm": 0.10650324076414108, "learning_rate": 8.319672131147542e-06, "loss": 1.1163196563720703, "step": 204 }, { "epoch": 0.03787922532364795, "grad_norm": 0.08654922991991043, "learning_rate": 8.360655737704919e-06, "loss": 1.166925311088562, "step": 205 }, { "epoch": 0.0380640020325438, "grad_norm": 0.10339689999818802, "learning_rate": 8.401639344262295e-06, "loss": 1.0431673526763916, "step": 206 }, { "epoch": 0.03824877874143964, "grad_norm": 0.076447993516922, "learning_rate": 8.442622950819674e-06, "loss": 1.0487338304519653, "step": 207 }, { "epoch": 0.038433555450335485, "grad_norm": 0.09971610456705093, "learning_rate": 8.48360655737705e-06, "loss": 1.163187861442566, "step": 208 }, { "epoch": 0.03861833215923133, "grad_norm": 0.07643406838178635, "learning_rate": 8.524590163934427e-06, "loss": 1.0120989084243774, "step": 209 }, { "epoch": 0.03880310886812717, "grad_norm": 0.1098799780011177, "learning_rate": 8.565573770491804e-06, "loss": 1.110826849937439, "step": 210 }, { "epoch": 0.03898788557702302, "grad_norm": 0.08974554389715195, "learning_rate": 8.60655737704918e-06, "loss": 0.9144413471221924, "step": 211 }, { "epoch": 0.03917266228591886, "grad_norm": 0.09564737975597382, "learning_rate": 8.647540983606559e-06, "loss": 0.9425402283668518, "step": 212 }, { "epoch": 0.039357438994814704, "grad_norm": 0.11915887147188187, "learning_rate": 8.688524590163935e-06, "loss": 0.995085597038269, "step": 213 }, { "epoch": 0.03954221570371055, "grad_norm": 0.09983936697244644, "learning_rate": 8.729508196721312e-06, "loss": 1.0563819408416748, "step": 214 }, { "epoch": 0.03972699241260639, "grad_norm": 0.08968392759561539, "learning_rate": 8.770491803278688e-06, "loss": 1.0094102621078491, "step": 215 }, { "epoch": 0.03991176912150223, "grad_norm": 0.07635963708162308, "learning_rate": 8.811475409836067e-06, "loss": 0.7993847727775574, "step": 216 }, { "epoch": 0.04009654583039808, "grad_norm": 0.10572884231805801, "learning_rate": 8.852459016393443e-06, "loss": 1.058998465538025, "step": 217 }, { "epoch": 0.04028132253929392, "grad_norm": 0.09346319735050201, "learning_rate": 8.893442622950822e-06, "loss": 1.100991129875183, "step": 218 }, { "epoch": 0.040466099248189766, "grad_norm": 0.10200077295303345, "learning_rate": 8.934426229508197e-06, "loss": 0.9690842628479004, "step": 219 }, { "epoch": 0.04065087595708561, "grad_norm": 0.0913805440068245, "learning_rate": 8.975409836065575e-06, "loss": 0.8409448862075806, "step": 220 }, { "epoch": 0.04083565266598145, "grad_norm": 0.0992816761136055, "learning_rate": 9.016393442622952e-06, "loss": 1.1589744091033936, "step": 221 }, { "epoch": 0.0410204293748773, "grad_norm": 0.07575351744890213, "learning_rate": 9.057377049180328e-06, "loss": 0.8102637529373169, "step": 222 }, { "epoch": 0.04120520608377314, "grad_norm": 0.11256787180900574, "learning_rate": 9.098360655737707e-06, "loss": 1.1053478717803955, "step": 223 }, { "epoch": 0.041389982792668985, "grad_norm": 0.08245761692523956, "learning_rate": 9.139344262295083e-06, "loss": 0.6602897644042969, "step": 224 }, { "epoch": 0.04157475950156483, "grad_norm": 0.08066370338201523, "learning_rate": 9.18032786885246e-06, "loss": 0.7633033990859985, "step": 225 }, { "epoch": 0.04175953621046067, "grad_norm": 0.08782163262367249, "learning_rate": 9.221311475409836e-06, "loss": 0.929959237575531, "step": 226 }, { "epoch": 0.04194431291935651, "grad_norm": 0.09314026683568954, "learning_rate": 9.262295081967215e-06, "loss": 1.0352487564086914, "step": 227 }, { "epoch": 0.04212908962825236, "grad_norm": 0.10643976181745529, "learning_rate": 9.303278688524591e-06, "loss": 1.229055404663086, "step": 228 }, { "epoch": 0.042313866337148204, "grad_norm": 0.07235404849052429, "learning_rate": 9.344262295081968e-06, "loss": 0.8788920640945435, "step": 229 }, { "epoch": 0.042498643046044046, "grad_norm": 0.09235186874866486, "learning_rate": 9.385245901639345e-06, "loss": 1.0391452312469482, "step": 230 }, { "epoch": 0.04268341975493989, "grad_norm": 0.13859771192073822, "learning_rate": 9.426229508196723e-06, "loss": 1.4094034433364868, "step": 231 }, { "epoch": 0.04286819646383573, "grad_norm": 0.07954560965299606, "learning_rate": 9.4672131147541e-06, "loss": 1.129581093788147, "step": 232 }, { "epoch": 0.04305297317273158, "grad_norm": 0.10096707195043564, "learning_rate": 9.508196721311476e-06, "loss": 1.051155924797058, "step": 233 }, { "epoch": 0.04323774988162742, "grad_norm": 0.08780834078788757, "learning_rate": 9.549180327868853e-06, "loss": 1.0105705261230469, "step": 234 }, { "epoch": 0.043422526590523265, "grad_norm": 0.07152073830366135, "learning_rate": 9.59016393442623e-06, "loss": 0.9030419588088989, "step": 235 }, { "epoch": 0.04360730329941911, "grad_norm": 0.11264440417289734, "learning_rate": 9.631147540983608e-06, "loss": 1.0321972370147705, "step": 236 }, { "epoch": 0.04379208000831495, "grad_norm": 0.10329741984605789, "learning_rate": 9.672131147540984e-06, "loss": 1.207244634628296, "step": 237 }, { "epoch": 0.04397685671721079, "grad_norm": 0.0821845754981041, "learning_rate": 9.713114754098361e-06, "loss": 0.9656891822814941, "step": 238 }, { "epoch": 0.04416163342610664, "grad_norm": 0.08063480257987976, "learning_rate": 9.754098360655738e-06, "loss": 0.986838161945343, "step": 239 }, { "epoch": 0.044346410135002484, "grad_norm": 0.08364249020814896, "learning_rate": 9.795081967213116e-06, "loss": 0.9730472564697266, "step": 240 }, { "epoch": 0.04453118684389833, "grad_norm": 0.09396041929721832, "learning_rate": 9.836065573770493e-06, "loss": 1.0492497682571411, "step": 241 }, { "epoch": 0.04471596355279417, "grad_norm": 0.09696823358535767, "learning_rate": 9.87704918032787e-06, "loss": 1.0710127353668213, "step": 242 }, { "epoch": 0.04490074026169001, "grad_norm": 0.0735907331109047, "learning_rate": 9.918032786885246e-06, "loss": 0.7238403558731079, "step": 243 }, { "epoch": 0.04508551697058586, "grad_norm": 0.09278254956007004, "learning_rate": 9.959016393442624e-06, "loss": 1.0645323991775513, "step": 244 }, { "epoch": 0.0452702936794817, "grad_norm": 0.10433805733919144, "learning_rate": 1e-05, "loss": 1.2725396156311035, "step": 245 }, { "epoch": 0.045455070388377546, "grad_norm": 0.10844039171934128, "learning_rate": 1.0040983606557377e-05, "loss": 1.019277811050415, "step": 246 }, { "epoch": 0.04563984709727339, "grad_norm": 0.09526235610246658, "learning_rate": 1.0081967213114754e-05, "loss": 1.0928999185562134, "step": 247 }, { "epoch": 0.04582462380616923, "grad_norm": 0.08871227502822876, "learning_rate": 1.0122950819672132e-05, "loss": 0.94351726770401, "step": 248 }, { "epoch": 0.04600940051506507, "grad_norm": 0.0853639468550682, "learning_rate": 1.0163934426229509e-05, "loss": 0.9936844706535339, "step": 249 }, { "epoch": 0.04619417722396092, "grad_norm": 0.11534149944782257, "learning_rate": 1.0204918032786886e-05, "loss": 1.150606393814087, "step": 250 }, { "epoch": 0.046378953932856765, "grad_norm": 0.0720873549580574, "learning_rate": 1.0245901639344262e-05, "loss": 0.6555285453796387, "step": 251 }, { "epoch": 0.04656373064175261, "grad_norm": 0.10066215693950653, "learning_rate": 1.028688524590164e-05, "loss": 0.9825627207756042, "step": 252 }, { "epoch": 0.04674850735064845, "grad_norm": 0.09775615483522415, "learning_rate": 1.0327868852459017e-05, "loss": 0.9665984511375427, "step": 253 }, { "epoch": 0.04693328405954429, "grad_norm": 0.10551930218935013, "learning_rate": 1.0368852459016394e-05, "loss": 1.1738957166671753, "step": 254 }, { "epoch": 0.04711806076844014, "grad_norm": 0.1370166391134262, "learning_rate": 1.040983606557377e-05, "loss": 1.2341063022613525, "step": 255 }, { "epoch": 0.047302837477335984, "grad_norm": 0.11037618666887283, "learning_rate": 1.0450819672131149e-05, "loss": 1.14421546459198, "step": 256 }, { "epoch": 0.047487614186231826, "grad_norm": 0.08367924392223358, "learning_rate": 1.0491803278688525e-05, "loss": 1.0374960899353027, "step": 257 }, { "epoch": 0.04767239089512767, "grad_norm": 0.08623643964529037, "learning_rate": 1.0532786885245902e-05, "loss": 0.9188487529754639, "step": 258 }, { "epoch": 0.04785716760402351, "grad_norm": 0.09963801503181458, "learning_rate": 1.0573770491803279e-05, "loss": 1.2390835285186768, "step": 259 }, { "epoch": 0.04804194431291935, "grad_norm": 0.08543514460325241, "learning_rate": 1.0614754098360655e-05, "loss": 1.0762650966644287, "step": 260 }, { "epoch": 0.0482267210218152, "grad_norm": 0.09559163451194763, "learning_rate": 1.0655737704918034e-05, "loss": 0.8684812784194946, "step": 261 }, { "epoch": 0.048411497730711045, "grad_norm": 0.10531015694141388, "learning_rate": 1.069672131147541e-05, "loss": 1.0792323350906372, "step": 262 }, { "epoch": 0.04859627443960689, "grad_norm": 0.09568508714437485, "learning_rate": 1.0737704918032787e-05, "loss": 1.1282249689102173, "step": 263 }, { "epoch": 0.04878105114850273, "grad_norm": 0.09946364909410477, "learning_rate": 1.0778688524590164e-05, "loss": 1.0437533855438232, "step": 264 }, { "epoch": 0.04896582785739857, "grad_norm": 0.12145375460386276, "learning_rate": 1.0819672131147544e-05, "loss": 1.2372792959213257, "step": 265 }, { "epoch": 0.04915060456629442, "grad_norm": 0.09334026277065277, "learning_rate": 1.0860655737704918e-05, "loss": 1.0724656581878662, "step": 266 }, { "epoch": 0.049335381275190264, "grad_norm": 0.09644033759832382, "learning_rate": 1.0901639344262295e-05, "loss": 1.2204649448394775, "step": 267 }, { "epoch": 0.049520157984086106, "grad_norm": 0.10595440119504929, "learning_rate": 1.0942622950819672e-05, "loss": 1.0160824060440063, "step": 268 }, { "epoch": 0.04970493469298195, "grad_norm": 0.09714332222938538, "learning_rate": 1.0983606557377052e-05, "loss": 0.906075656414032, "step": 269 }, { "epoch": 0.04988971140187779, "grad_norm": 0.09888976812362671, "learning_rate": 1.1024590163934428e-05, "loss": 1.0426760911941528, "step": 270 }, { "epoch": 0.050074488110773634, "grad_norm": 0.09720077365636826, "learning_rate": 1.1065573770491805e-05, "loss": 1.1333513259887695, "step": 271 }, { "epoch": 0.05025926481966948, "grad_norm": 0.09125541895627975, "learning_rate": 1.110655737704918e-05, "loss": 1.1542598009109497, "step": 272 }, { "epoch": 0.050444041528565325, "grad_norm": 0.08100289106369019, "learning_rate": 1.1147540983606557e-05, "loss": 0.8822868466377258, "step": 273 }, { "epoch": 0.05062881823746117, "grad_norm": 0.08017222583293915, "learning_rate": 1.1188524590163937e-05, "loss": 0.8326126337051392, "step": 274 }, { "epoch": 0.05081359494635701, "grad_norm": 0.09766320139169693, "learning_rate": 1.1229508196721313e-05, "loss": 1.1107693910598755, "step": 275 }, { "epoch": 0.05099837165525285, "grad_norm": 0.09009626507759094, "learning_rate": 1.127049180327869e-05, "loss": 0.8500745296478271, "step": 276 }, { "epoch": 0.0511831483641487, "grad_norm": 0.10108991712331772, "learning_rate": 1.1311475409836066e-05, "loss": 0.9838616847991943, "step": 277 }, { "epoch": 0.051367925073044544, "grad_norm": 0.08185333013534546, "learning_rate": 1.1352459016393445e-05, "loss": 0.8202808499336243, "step": 278 }, { "epoch": 0.05155270178194039, "grad_norm": 0.11285862326622009, "learning_rate": 1.1393442622950821e-05, "loss": 1.48050856590271, "step": 279 }, { "epoch": 0.05173747849083623, "grad_norm": 0.09318527579307556, "learning_rate": 1.1434426229508198e-05, "loss": 0.8346209526062012, "step": 280 }, { "epoch": 0.05192225519973207, "grad_norm": 0.09572022408246994, "learning_rate": 1.1475409836065575e-05, "loss": 1.0783284902572632, "step": 281 }, { "epoch": 0.052107031908627914, "grad_norm": 0.11212638020515442, "learning_rate": 1.1516393442622951e-05, "loss": 1.0902286767959595, "step": 282 }, { "epoch": 0.05229180861752376, "grad_norm": 0.10433069616556168, "learning_rate": 1.155737704918033e-05, "loss": 1.058199167251587, "step": 283 }, { "epoch": 0.052476585326419606, "grad_norm": 0.10224845260381699, "learning_rate": 1.1598360655737706e-05, "loss": 1.081488013267517, "step": 284 }, { "epoch": 0.05266136203531545, "grad_norm": 0.10507450252771378, "learning_rate": 1.1639344262295083e-05, "loss": 0.9051607847213745, "step": 285 }, { "epoch": 0.05284613874421129, "grad_norm": 0.0947372242808342, "learning_rate": 1.168032786885246e-05, "loss": 1.0995267629623413, "step": 286 }, { "epoch": 0.05303091545310713, "grad_norm": 0.09245672821998596, "learning_rate": 1.1721311475409838e-05, "loss": 1.012171745300293, "step": 287 }, { "epoch": 0.05321569216200298, "grad_norm": 0.12127245962619781, "learning_rate": 1.1762295081967215e-05, "loss": 1.309299349784851, "step": 288 }, { "epoch": 0.053400468870898825, "grad_norm": 0.1081591546535492, "learning_rate": 1.1803278688524591e-05, "loss": 1.0298875570297241, "step": 289 }, { "epoch": 0.05358524557979467, "grad_norm": 0.11109253019094467, "learning_rate": 1.1844262295081968e-05, "loss": 1.058588981628418, "step": 290 }, { "epoch": 0.05377002228869051, "grad_norm": 0.08653238415718079, "learning_rate": 1.1885245901639346e-05, "loss": 0.898544430732727, "step": 291 }, { "epoch": 0.05395479899758635, "grad_norm": 0.0926504135131836, "learning_rate": 1.1926229508196723e-05, "loss": 0.7896566987037659, "step": 292 }, { "epoch": 0.054139575706482194, "grad_norm": 0.08921337872743607, "learning_rate": 1.19672131147541e-05, "loss": 0.9710261225700378, "step": 293 }, { "epoch": 0.054324352415378044, "grad_norm": 0.07718883454799652, "learning_rate": 1.2008196721311476e-05, "loss": 0.8338017463684082, "step": 294 }, { "epoch": 0.054509129124273886, "grad_norm": 0.0848095566034317, "learning_rate": 1.2049180327868853e-05, "loss": 0.6920614838600159, "step": 295 }, { "epoch": 0.05469390583316973, "grad_norm": 0.08996455371379852, "learning_rate": 1.2090163934426231e-05, "loss": 0.7501096129417419, "step": 296 }, { "epoch": 0.05487868254206557, "grad_norm": 0.07516958564519882, "learning_rate": 1.2131147540983608e-05, "loss": 0.8861922025680542, "step": 297 }, { "epoch": 0.055063459250961413, "grad_norm": 0.10512320697307587, "learning_rate": 1.2172131147540984e-05, "loss": 1.018561840057373, "step": 298 }, { "epoch": 0.05524823595985726, "grad_norm": 0.0865112841129303, "learning_rate": 1.221311475409836e-05, "loss": 0.7376396059989929, "step": 299 }, { "epoch": 0.055433012668753105, "grad_norm": 0.12389017641544342, "learning_rate": 1.2254098360655739e-05, "loss": 1.332137107849121, "step": 300 }, { "epoch": 0.05561778937764895, "grad_norm": 0.11509191244840622, "learning_rate": 1.2295081967213116e-05, "loss": 1.004392385482788, "step": 301 }, { "epoch": 0.05580256608654479, "grad_norm": 0.13411563634872437, "learning_rate": 1.2336065573770492e-05, "loss": 1.276947021484375, "step": 302 }, { "epoch": 0.05598734279544063, "grad_norm": 0.09792132675647736, "learning_rate": 1.2377049180327869e-05, "loss": 0.8487666249275208, "step": 303 }, { "epoch": 0.05617211950433648, "grad_norm": 0.09597836434841156, "learning_rate": 1.2418032786885247e-05, "loss": 0.9482805728912354, "step": 304 }, { "epoch": 0.056356896213232324, "grad_norm": 0.11644294112920761, "learning_rate": 1.2459016393442624e-05, "loss": 0.9525696039199829, "step": 305 }, { "epoch": 0.05654167292212817, "grad_norm": 0.10617950558662415, "learning_rate": 1.25e-05, "loss": 1.0335408449172974, "step": 306 }, { "epoch": 0.05672644963102401, "grad_norm": 0.09923581779003143, "learning_rate": 1.2540983606557377e-05, "loss": 1.0804893970489502, "step": 307 }, { "epoch": 0.05691122633991985, "grad_norm": 0.10484209656715393, "learning_rate": 1.2581967213114754e-05, "loss": 0.920568585395813, "step": 308 }, { "epoch": 0.057096003048815694, "grad_norm": 0.09422818571329117, "learning_rate": 1.2622950819672132e-05, "loss": 1.0048713684082031, "step": 309 }, { "epoch": 0.05728077975771154, "grad_norm": 0.09801265597343445, "learning_rate": 1.2663934426229509e-05, "loss": 0.9483872652053833, "step": 310 }, { "epoch": 0.057465556466607386, "grad_norm": 0.10625987499952316, "learning_rate": 1.2704918032786885e-05, "loss": 1.1098862886428833, "step": 311 }, { "epoch": 0.05765033317550323, "grad_norm": 0.10191453993320465, "learning_rate": 1.2745901639344262e-05, "loss": 0.9445469975471497, "step": 312 }, { "epoch": 0.05783510988439907, "grad_norm": 0.09489865601062775, "learning_rate": 1.2786885245901642e-05, "loss": 0.9424755573272705, "step": 313 }, { "epoch": 0.05801988659329491, "grad_norm": 0.07801543921232224, "learning_rate": 1.2827868852459017e-05, "loss": 0.9401955008506775, "step": 314 }, { "epoch": 0.05820466330219076, "grad_norm": 0.09698277711868286, "learning_rate": 1.2868852459016394e-05, "loss": 0.9096693396568298, "step": 315 }, { "epoch": 0.058389440011086605, "grad_norm": 0.07716673612594604, "learning_rate": 1.290983606557377e-05, "loss": 0.7786237597465515, "step": 316 }, { "epoch": 0.05857421671998245, "grad_norm": 0.09627088904380798, "learning_rate": 1.295081967213115e-05, "loss": 0.9308719635009766, "step": 317 }, { "epoch": 0.05875899342887829, "grad_norm": 0.09469778835773468, "learning_rate": 1.2991803278688527e-05, "loss": 1.0299229621887207, "step": 318 }, { "epoch": 0.05894377013777413, "grad_norm": 0.08667682856321335, "learning_rate": 1.3032786885245904e-05, "loss": 0.785006582736969, "step": 319 }, { "epoch": 0.059128546846669974, "grad_norm": 0.1045043021440506, "learning_rate": 1.3073770491803278e-05, "loss": 0.8180931806564331, "step": 320 }, { "epoch": 0.059313323555565824, "grad_norm": 0.1012943759560585, "learning_rate": 1.3114754098360655e-05, "loss": 1.0428085327148438, "step": 321 }, { "epoch": 0.059498100264461666, "grad_norm": 0.08716757595539093, "learning_rate": 1.3155737704918035e-05, "loss": 1.0261026620864868, "step": 322 }, { "epoch": 0.05968287697335751, "grad_norm": 0.08290330320596695, "learning_rate": 1.3196721311475412e-05, "loss": 0.8196086287498474, "step": 323 }, { "epoch": 0.05986765368225335, "grad_norm": 0.07775544375181198, "learning_rate": 1.3237704918032788e-05, "loss": 0.7242082357406616, "step": 324 }, { "epoch": 0.06005243039114919, "grad_norm": 0.05980847030878067, "learning_rate": 1.3278688524590165e-05, "loss": 0.7295750975608826, "step": 325 }, { "epoch": 0.06023720710004504, "grad_norm": 0.0982968881726265, "learning_rate": 1.3319672131147543e-05, "loss": 0.873098611831665, "step": 326 }, { "epoch": 0.060421983808940885, "grad_norm": 0.0912838950753212, "learning_rate": 1.336065573770492e-05, "loss": 1.3055800199508667, "step": 327 }, { "epoch": 0.06060676051783673, "grad_norm": 0.09126565605401993, "learning_rate": 1.3401639344262297e-05, "loss": 0.836475670337677, "step": 328 }, { "epoch": 0.06079153722673257, "grad_norm": 0.1270003318786621, "learning_rate": 1.3442622950819673e-05, "loss": 1.2076267004013062, "step": 329 }, { "epoch": 0.06097631393562841, "grad_norm": 0.09601800888776779, "learning_rate": 1.3483606557377052e-05, "loss": 1.0199816226959229, "step": 330 }, { "epoch": 0.061161090644524255, "grad_norm": 0.1017669290304184, "learning_rate": 1.3524590163934428e-05, "loss": 1.0680336952209473, "step": 331 }, { "epoch": 0.061345867353420104, "grad_norm": 0.11380743980407715, "learning_rate": 1.3565573770491805e-05, "loss": 1.0062092542648315, "step": 332 }, { "epoch": 0.061530644062315946, "grad_norm": 0.10115383565425873, "learning_rate": 1.3606557377049181e-05, "loss": 1.1867700815200806, "step": 333 }, { "epoch": 0.06171542077121179, "grad_norm": 0.09494657814502716, "learning_rate": 1.3647540983606558e-05, "loss": 0.7801775336265564, "step": 334 }, { "epoch": 0.06190019748010763, "grad_norm": 0.12234895676374435, "learning_rate": 1.3688524590163936e-05, "loss": 1.0539796352386475, "step": 335 }, { "epoch": 0.062084974189003474, "grad_norm": 0.1199754998087883, "learning_rate": 1.3729508196721313e-05, "loss": 1.2007956504821777, "step": 336 }, { "epoch": 0.06226975089789932, "grad_norm": 0.10975956916809082, "learning_rate": 1.377049180327869e-05, "loss": 1.0453523397445679, "step": 337 }, { "epoch": 0.062454527606795165, "grad_norm": 0.09918422996997833, "learning_rate": 1.3811475409836066e-05, "loss": 0.8010137677192688, "step": 338 }, { "epoch": 0.06263930431569101, "grad_norm": 0.0994090810418129, "learning_rate": 1.3852459016393445e-05, "loss": 1.0801807641983032, "step": 339 }, { "epoch": 0.06282408102458685, "grad_norm": 0.10293493419885635, "learning_rate": 1.3893442622950821e-05, "loss": 0.9230149388313293, "step": 340 }, { "epoch": 0.06300885773348269, "grad_norm": 0.10307420045137405, "learning_rate": 1.3934426229508198e-05, "loss": 1.0578651428222656, "step": 341 }, { "epoch": 0.06319363444237854, "grad_norm": 0.1045430526137352, "learning_rate": 1.3975409836065574e-05, "loss": 0.790934681892395, "step": 342 }, { "epoch": 0.06337841115127438, "grad_norm": 0.10155732929706573, "learning_rate": 1.4016393442622951e-05, "loss": 1.1352696418762207, "step": 343 }, { "epoch": 0.06356318786017022, "grad_norm": 0.09721534699201584, "learning_rate": 1.405737704918033e-05, "loss": 0.9279108047485352, "step": 344 }, { "epoch": 0.06374796456906606, "grad_norm": 0.11629458516836166, "learning_rate": 1.4098360655737706e-05, "loss": 1.0785913467407227, "step": 345 }, { "epoch": 0.06393274127796192, "grad_norm": 0.10066460072994232, "learning_rate": 1.4139344262295083e-05, "loss": 1.1896709203720093, "step": 346 }, { "epoch": 0.06411751798685776, "grad_norm": 0.09405656158924103, "learning_rate": 1.418032786885246e-05, "loss": 0.8420297503471375, "step": 347 }, { "epoch": 0.0643022946957536, "grad_norm": 0.08271870762109756, "learning_rate": 1.4221311475409838e-05, "loss": 0.7494404315948486, "step": 348 }, { "epoch": 0.06448707140464945, "grad_norm": 0.1078755334019661, "learning_rate": 1.4262295081967214e-05, "loss": 0.9479129314422607, "step": 349 }, { "epoch": 0.06467184811354529, "grad_norm": 0.0918903723359108, "learning_rate": 1.4303278688524591e-05, "loss": 0.9387383460998535, "step": 350 }, { "epoch": 0.06485662482244113, "grad_norm": 0.11810861527919769, "learning_rate": 1.4344262295081968e-05, "loss": 0.9701591730117798, "step": 351 }, { "epoch": 0.06504140153133697, "grad_norm": 0.09874974936246872, "learning_rate": 1.4385245901639346e-05, "loss": 0.9196026921272278, "step": 352 }, { "epoch": 0.06522617824023282, "grad_norm": 0.0841999500989914, "learning_rate": 1.4426229508196722e-05, "loss": 0.6813482642173767, "step": 353 }, { "epoch": 0.06541095494912866, "grad_norm": 0.09054264426231384, "learning_rate": 1.4467213114754099e-05, "loss": 1.0393775701522827, "step": 354 }, { "epoch": 0.0655957316580245, "grad_norm": 0.09485882520675659, "learning_rate": 1.4508196721311476e-05, "loss": 1.008347988128662, "step": 355 }, { "epoch": 0.06578050836692034, "grad_norm": 0.09545883536338806, "learning_rate": 1.4549180327868852e-05, "loss": 0.8867791295051575, "step": 356 }, { "epoch": 0.0659652850758162, "grad_norm": 0.10870155692100525, "learning_rate": 1.459016393442623e-05, "loss": 1.0850865840911865, "step": 357 }, { "epoch": 0.06615006178471204, "grad_norm": 0.0992053747177124, "learning_rate": 1.4631147540983607e-05, "loss": 1.008838415145874, "step": 358 }, { "epoch": 0.06633483849360788, "grad_norm": 0.08941731601953506, "learning_rate": 1.4672131147540984e-05, "loss": 0.9069108963012695, "step": 359 }, { "epoch": 0.06651961520250373, "grad_norm": 0.11634092032909393, "learning_rate": 1.471311475409836e-05, "loss": 1.202878713607788, "step": 360 }, { "epoch": 0.06670439191139957, "grad_norm": 0.10910794138908386, "learning_rate": 1.4754098360655739e-05, "loss": 1.082908034324646, "step": 361 }, { "epoch": 0.06688916862029541, "grad_norm": 0.0834878534078598, "learning_rate": 1.4795081967213116e-05, "loss": 0.7075738906860352, "step": 362 }, { "epoch": 0.06707394532919125, "grad_norm": 0.10349691659212112, "learning_rate": 1.4836065573770492e-05, "loss": 0.9434449672698975, "step": 363 }, { "epoch": 0.0672587220380871, "grad_norm": 0.11296708881855011, "learning_rate": 1.4877049180327869e-05, "loss": 0.933619499206543, "step": 364 }, { "epoch": 0.06744349874698294, "grad_norm": 0.10705320537090302, "learning_rate": 1.4918032786885249e-05, "loss": 1.2247308492660522, "step": 365 }, { "epoch": 0.06762827545587878, "grad_norm": 0.09186027944087982, "learning_rate": 1.4959016393442625e-05, "loss": 0.9711430072784424, "step": 366 }, { "epoch": 0.06781305216477464, "grad_norm": 0.1063535287976265, "learning_rate": 1.5000000000000002e-05, "loss": 0.7582070231437683, "step": 367 }, { "epoch": 0.06799782887367048, "grad_norm": 0.12384825944900513, "learning_rate": 1.5040983606557377e-05, "loss": 0.9878946542739868, "step": 368 }, { "epoch": 0.06818260558256632, "grad_norm": 0.11601697653532028, "learning_rate": 1.5081967213114754e-05, "loss": 1.1344435214996338, "step": 369 }, { "epoch": 0.06836738229146216, "grad_norm": 0.10092558711767197, "learning_rate": 1.5122950819672134e-05, "loss": 0.8472614288330078, "step": 370 }, { "epoch": 0.068552159000358, "grad_norm": 0.08755794912576675, "learning_rate": 1.516393442622951e-05, "loss": 0.9642510414123535, "step": 371 }, { "epoch": 0.06873693570925385, "grad_norm": 0.11546262353658676, "learning_rate": 1.5204918032786887e-05, "loss": 0.9715655446052551, "step": 372 }, { "epoch": 0.06892171241814969, "grad_norm": 0.10162407904863358, "learning_rate": 1.5245901639344264e-05, "loss": 0.8278041481971741, "step": 373 }, { "epoch": 0.06910648912704553, "grad_norm": 0.12836207449436188, "learning_rate": 1.528688524590164e-05, "loss": 1.1721863746643066, "step": 374 }, { "epoch": 0.06929126583594138, "grad_norm": 0.08836661279201508, "learning_rate": 1.532786885245902e-05, "loss": 0.8702605962753296, "step": 375 }, { "epoch": 0.06947604254483722, "grad_norm": 0.09650567173957825, "learning_rate": 1.5368852459016393e-05, "loss": 0.8430268168449402, "step": 376 }, { "epoch": 0.06966081925373306, "grad_norm": 0.12295356392860413, "learning_rate": 1.5409836065573772e-05, "loss": 1.495065689086914, "step": 377 }, { "epoch": 0.06984559596262892, "grad_norm": 0.09706033766269684, "learning_rate": 1.545081967213115e-05, "loss": 0.7906158566474915, "step": 378 }, { "epoch": 0.07003037267152476, "grad_norm": 0.11473716050386429, "learning_rate": 1.5491803278688525e-05, "loss": 0.9313616752624512, "step": 379 }, { "epoch": 0.0702151493804206, "grad_norm": 0.11317595094442368, "learning_rate": 1.5532786885245903e-05, "loss": 0.9224214553833008, "step": 380 }, { "epoch": 0.07039992608931644, "grad_norm": 0.08347756415605545, "learning_rate": 1.5573770491803278e-05, "loss": 0.9982295632362366, "step": 381 }, { "epoch": 0.07058470279821229, "grad_norm": 0.09798528254032135, "learning_rate": 1.5614754098360657e-05, "loss": 1.0946242809295654, "step": 382 }, { "epoch": 0.07076947950710813, "grad_norm": 0.10663064569234848, "learning_rate": 1.5655737704918035e-05, "loss": 1.1035512685775757, "step": 383 }, { "epoch": 0.07095425621600397, "grad_norm": 0.0784713551402092, "learning_rate": 1.569672131147541e-05, "loss": 0.8944029808044434, "step": 384 }, { "epoch": 0.07113903292489981, "grad_norm": 0.11879897117614746, "learning_rate": 1.5737704918032788e-05, "loss": 0.9249335527420044, "step": 385 }, { "epoch": 0.07132380963379566, "grad_norm": 0.11596395075321198, "learning_rate": 1.5778688524590163e-05, "loss": 1.006790041923523, "step": 386 }, { "epoch": 0.0715085863426915, "grad_norm": 0.10372721403837204, "learning_rate": 1.5819672131147545e-05, "loss": 0.950377881526947, "step": 387 }, { "epoch": 0.07169336305158734, "grad_norm": 0.10973094403743744, "learning_rate": 1.586065573770492e-05, "loss": 0.9215264916419983, "step": 388 }, { "epoch": 0.0718781397604832, "grad_norm": 0.08272566646337509, "learning_rate": 1.5901639344262295e-05, "loss": 0.7860650420188904, "step": 389 }, { "epoch": 0.07206291646937904, "grad_norm": 0.09537763893604279, "learning_rate": 1.5942622950819673e-05, "loss": 0.8267325758934021, "step": 390 }, { "epoch": 0.07224769317827488, "grad_norm": 0.1512487381696701, "learning_rate": 1.598360655737705e-05, "loss": 1.1345775127410889, "step": 391 }, { "epoch": 0.07243246988717073, "grad_norm": 0.07760065793991089, "learning_rate": 1.602459016393443e-05, "loss": 0.6036292314529419, "step": 392 }, { "epoch": 0.07261724659606657, "grad_norm": 0.08124257624149323, "learning_rate": 1.6065573770491805e-05, "loss": 0.7405913472175598, "step": 393 }, { "epoch": 0.07280202330496241, "grad_norm": 0.10059768706560135, "learning_rate": 1.610655737704918e-05, "loss": 0.9785237908363342, "step": 394 }, { "epoch": 0.07298680001385825, "grad_norm": 0.0916321724653244, "learning_rate": 1.6147540983606558e-05, "loss": 0.7635613679885864, "step": 395 }, { "epoch": 0.0731715767227541, "grad_norm": 0.08234703540802002, "learning_rate": 1.6188524590163936e-05, "loss": 0.6982128620147705, "step": 396 }, { "epoch": 0.07335635343164994, "grad_norm": 0.10206922143697739, "learning_rate": 1.6229508196721314e-05, "loss": 0.7990150451660156, "step": 397 }, { "epoch": 0.07354113014054578, "grad_norm": 0.08356337994337082, "learning_rate": 1.627049180327869e-05, "loss": 0.7303667068481445, "step": 398 }, { "epoch": 0.07372590684944162, "grad_norm": 0.11013100296258926, "learning_rate": 1.6311475409836068e-05, "loss": 1.070306658744812, "step": 399 }, { "epoch": 0.07391068355833748, "grad_norm": 0.09661299735307693, "learning_rate": 1.6352459016393446e-05, "loss": 0.8590985536575317, "step": 400 }, { "epoch": 0.07409546026723332, "grad_norm": 0.090263731777668, "learning_rate": 1.639344262295082e-05, "loss": 0.674292266368866, "step": 401 }, { "epoch": 0.07428023697612916, "grad_norm": 0.08033633232116699, "learning_rate": 1.64344262295082e-05, "loss": 0.6512205600738525, "step": 402 }, { "epoch": 0.074465013685025, "grad_norm": 0.10023355484008789, "learning_rate": 1.6475409836065574e-05, "loss": 0.8848332762718201, "step": 403 }, { "epoch": 0.07464979039392085, "grad_norm": 0.09321942925453186, "learning_rate": 1.6516393442622953e-05, "loss": 0.8359254598617554, "step": 404 }, { "epoch": 0.07483456710281669, "grad_norm": 0.07962282747030258, "learning_rate": 1.655737704918033e-05, "loss": 0.9046615362167358, "step": 405 }, { "epoch": 0.07501934381171253, "grad_norm": 0.10793166607618332, "learning_rate": 1.6598360655737706e-05, "loss": 0.9817122220993042, "step": 406 }, { "epoch": 0.07520412052060838, "grad_norm": 0.10675039142370224, "learning_rate": 1.6639344262295084e-05, "loss": 1.2107510566711426, "step": 407 }, { "epoch": 0.07538889722950422, "grad_norm": 0.0928160548210144, "learning_rate": 1.668032786885246e-05, "loss": 0.8065657019615173, "step": 408 }, { "epoch": 0.07557367393840006, "grad_norm": 0.09683706611394882, "learning_rate": 1.6721311475409837e-05, "loss": 1.064256191253662, "step": 409 }, { "epoch": 0.0757584506472959, "grad_norm": 0.09184283018112183, "learning_rate": 1.6762295081967216e-05, "loss": 0.8598893284797668, "step": 410 }, { "epoch": 0.07594322735619176, "grad_norm": 0.09558923542499542, "learning_rate": 1.680327868852459e-05, "loss": 0.8903302550315857, "step": 411 }, { "epoch": 0.0761280040650876, "grad_norm": 0.09565886855125427, "learning_rate": 1.684426229508197e-05, "loss": 0.9265789985656738, "step": 412 }, { "epoch": 0.07631278077398344, "grad_norm": 0.10494910180568695, "learning_rate": 1.6885245901639347e-05, "loss": 0.8625717163085938, "step": 413 }, { "epoch": 0.07649755748287929, "grad_norm": 0.10598883777856827, "learning_rate": 1.6926229508196722e-05, "loss": 1.1451170444488525, "step": 414 }, { "epoch": 0.07668233419177513, "grad_norm": 0.11120012402534485, "learning_rate": 1.69672131147541e-05, "loss": 0.9379380345344543, "step": 415 }, { "epoch": 0.07686711090067097, "grad_norm": 0.09714861214160919, "learning_rate": 1.7008196721311476e-05, "loss": 0.8064665794372559, "step": 416 }, { "epoch": 0.07705188760956681, "grad_norm": 0.11614526808261871, "learning_rate": 1.7049180327868854e-05, "loss": 1.0065346956253052, "step": 417 }, { "epoch": 0.07723666431846266, "grad_norm": 0.09795574098825455, "learning_rate": 1.7090163934426232e-05, "loss": 0.8045728206634521, "step": 418 }, { "epoch": 0.0774214410273585, "grad_norm": 0.08873660117387772, "learning_rate": 1.7131147540983607e-05, "loss": 0.8565447330474854, "step": 419 }, { "epoch": 0.07760621773625434, "grad_norm": 0.09851158410310745, "learning_rate": 1.7172131147540985e-05, "loss": 0.7614431977272034, "step": 420 }, { "epoch": 0.07779099444515018, "grad_norm": 0.09649550169706345, "learning_rate": 1.721311475409836e-05, "loss": 0.9361590147018433, "step": 421 }, { "epoch": 0.07797577115404604, "grad_norm": 0.09242745488882065, "learning_rate": 1.725409836065574e-05, "loss": 0.8737956881523132, "step": 422 }, { "epoch": 0.07816054786294188, "grad_norm": 0.09545590728521347, "learning_rate": 1.7295081967213117e-05, "loss": 0.8091181516647339, "step": 423 }, { "epoch": 0.07834532457183772, "grad_norm": 0.10765133798122406, "learning_rate": 1.7336065573770492e-05, "loss": 0.9091676473617554, "step": 424 }, { "epoch": 0.07853010128073357, "grad_norm": 0.09113151580095291, "learning_rate": 1.737704918032787e-05, "loss": 0.6701531410217285, "step": 425 }, { "epoch": 0.07871487798962941, "grad_norm": 0.11152313649654388, "learning_rate": 1.741803278688525e-05, "loss": 1.0032432079315186, "step": 426 }, { "epoch": 0.07889965469852525, "grad_norm": 0.0958375558257103, "learning_rate": 1.7459016393442624e-05, "loss": 0.7796164155006409, "step": 427 }, { "epoch": 0.0790844314074211, "grad_norm": 0.071872279047966, "learning_rate": 1.7500000000000002e-05, "loss": 0.6129750609397888, "step": 428 }, { "epoch": 0.07926920811631694, "grad_norm": 0.08849525451660156, "learning_rate": 1.7540983606557377e-05, "loss": 0.619393527507782, "step": 429 }, { "epoch": 0.07945398482521278, "grad_norm": 0.07459349185228348, "learning_rate": 1.7581967213114755e-05, "loss": 0.6974170207977295, "step": 430 }, { "epoch": 0.07963876153410862, "grad_norm": 0.0954682007431984, "learning_rate": 1.7622950819672133e-05, "loss": 0.860471785068512, "step": 431 }, { "epoch": 0.07982353824300446, "grad_norm": 0.10465925931930542, "learning_rate": 1.766393442622951e-05, "loss": 1.0079864263534546, "step": 432 }, { "epoch": 0.08000831495190032, "grad_norm": 0.09779639542102814, "learning_rate": 1.7704918032786887e-05, "loss": 0.7794288396835327, "step": 433 }, { "epoch": 0.08019309166079616, "grad_norm": 0.0917540118098259, "learning_rate": 1.774590163934426e-05, "loss": 0.8439017534255981, "step": 434 }, { "epoch": 0.080377868369692, "grad_norm": 0.10367224365472794, "learning_rate": 1.7786885245901643e-05, "loss": 0.9272810816764832, "step": 435 }, { "epoch": 0.08056264507858785, "grad_norm": 0.09312202036380768, "learning_rate": 1.7827868852459018e-05, "loss": 0.9261062145233154, "step": 436 }, { "epoch": 0.08074742178748369, "grad_norm": 0.0932997316122055, "learning_rate": 1.7868852459016393e-05, "loss": 0.7352979183197021, "step": 437 }, { "epoch": 0.08093219849637953, "grad_norm": 0.10954531282186508, "learning_rate": 1.790983606557377e-05, "loss": 1.0063031911849976, "step": 438 }, { "epoch": 0.08111697520527537, "grad_norm": 0.09387508779764175, "learning_rate": 1.795081967213115e-05, "loss": 0.7922593355178833, "step": 439 }, { "epoch": 0.08130175191417122, "grad_norm": 0.09354770928621292, "learning_rate": 1.7991803278688528e-05, "loss": 0.9488433003425598, "step": 440 }, { "epoch": 0.08148652862306706, "grad_norm": 0.13175411522388458, "learning_rate": 1.8032786885245903e-05, "loss": 1.0685322284698486, "step": 441 }, { "epoch": 0.0816713053319629, "grad_norm": 0.09272784739732742, "learning_rate": 1.8073770491803278e-05, "loss": 0.8308841586112976, "step": 442 }, { "epoch": 0.08185608204085874, "grad_norm": 0.09840014576911926, "learning_rate": 1.8114754098360656e-05, "loss": 0.7484462261199951, "step": 443 }, { "epoch": 0.0820408587497546, "grad_norm": 0.10059419274330139, "learning_rate": 1.8155737704918035e-05, "loss": 1.1569838523864746, "step": 444 }, { "epoch": 0.08222563545865044, "grad_norm": 0.09600666910409927, "learning_rate": 1.8196721311475413e-05, "loss": 0.8261542320251465, "step": 445 }, { "epoch": 0.08241041216754628, "grad_norm": 0.09666614979505539, "learning_rate": 1.8237704918032788e-05, "loss": 0.8323838710784912, "step": 446 }, { "epoch": 0.08259518887644213, "grad_norm": 0.09458190947771072, "learning_rate": 1.8278688524590166e-05, "loss": 0.9338691234588623, "step": 447 }, { "epoch": 0.08277996558533797, "grad_norm": 0.09368739277124405, "learning_rate": 1.8319672131147545e-05, "loss": 0.9827497005462646, "step": 448 }, { "epoch": 0.08296474229423381, "grad_norm": 0.08596470952033997, "learning_rate": 1.836065573770492e-05, "loss": 0.9741113185882568, "step": 449 }, { "epoch": 0.08314951900312965, "grad_norm": 0.08872201293706894, "learning_rate": 1.8401639344262298e-05, "loss": 0.9058973789215088, "step": 450 }, { "epoch": 0.0833342957120255, "grad_norm": 0.09660264849662781, "learning_rate": 1.8442622950819673e-05, "loss": 0.9430114030838013, "step": 451 }, { "epoch": 0.08351907242092134, "grad_norm": 0.11228955537080765, "learning_rate": 1.848360655737705e-05, "loss": 0.8239207863807678, "step": 452 }, { "epoch": 0.08370384912981718, "grad_norm": 0.10609623044729233, "learning_rate": 1.852459016393443e-05, "loss": 0.9952499270439148, "step": 453 }, { "epoch": 0.08388862583871302, "grad_norm": 0.11035417765378952, "learning_rate": 1.8565573770491804e-05, "loss": 1.1380780935287476, "step": 454 }, { "epoch": 0.08407340254760888, "grad_norm": 0.11164045333862305, "learning_rate": 1.8606557377049183e-05, "loss": 1.0310348272323608, "step": 455 }, { "epoch": 0.08425817925650472, "grad_norm": 0.11842917650938034, "learning_rate": 1.8647540983606558e-05, "loss": 0.9357023239135742, "step": 456 }, { "epoch": 0.08444295596540057, "grad_norm": 0.09763655066490173, "learning_rate": 1.8688524590163936e-05, "loss": 0.9334539175033569, "step": 457 }, { "epoch": 0.08462773267429641, "grad_norm": 0.11949948966503143, "learning_rate": 1.8729508196721314e-05, "loss": 0.9747587442398071, "step": 458 }, { "epoch": 0.08481250938319225, "grad_norm": 0.10351543128490448, "learning_rate": 1.877049180327869e-05, "loss": 1.0134328603744507, "step": 459 }, { "epoch": 0.08499728609208809, "grad_norm": 0.08718964457511902, "learning_rate": 1.8811475409836068e-05, "loss": 0.6342071890830994, "step": 460 }, { "epoch": 0.08518206280098393, "grad_norm": 0.09610489755868912, "learning_rate": 1.8852459016393446e-05, "loss": 0.9932312369346619, "step": 461 }, { "epoch": 0.08536683950987978, "grad_norm": 0.08185546100139618, "learning_rate": 1.889344262295082e-05, "loss": 0.7839753031730652, "step": 462 }, { "epoch": 0.08555161621877562, "grad_norm": 0.11200262606143951, "learning_rate": 1.89344262295082e-05, "loss": 1.0270265340805054, "step": 463 }, { "epoch": 0.08573639292767146, "grad_norm": 0.11818201094865799, "learning_rate": 1.8975409836065574e-05, "loss": 1.0113353729248047, "step": 464 }, { "epoch": 0.0859211696365673, "grad_norm": 0.10922921448945999, "learning_rate": 1.9016393442622952e-05, "loss": 0.9178203344345093, "step": 465 }, { "epoch": 0.08610594634546316, "grad_norm": 0.09575849026441574, "learning_rate": 1.905737704918033e-05, "loss": 0.7755801677703857, "step": 466 }, { "epoch": 0.086290723054359, "grad_norm": 0.11132930964231491, "learning_rate": 1.9098360655737706e-05, "loss": 1.046008586883545, "step": 467 }, { "epoch": 0.08647549976325485, "grad_norm": 0.07992382347583771, "learning_rate": 1.9139344262295084e-05, "loss": 0.8655605912208557, "step": 468 }, { "epoch": 0.08666027647215069, "grad_norm": 0.09868017584085464, "learning_rate": 1.918032786885246e-05, "loss": 0.8974450826644897, "step": 469 }, { "epoch": 0.08684505318104653, "grad_norm": 0.09519588947296143, "learning_rate": 1.9221311475409837e-05, "loss": 0.8987129926681519, "step": 470 }, { "epoch": 0.08702982988994237, "grad_norm": 0.111813485622406, "learning_rate": 1.9262295081967216e-05, "loss": 0.9828987717628479, "step": 471 }, { "epoch": 0.08721460659883822, "grad_norm": 0.08673518896102905, "learning_rate": 1.930327868852459e-05, "loss": 0.8034600615501404, "step": 472 }, { "epoch": 0.08739938330773406, "grad_norm": 0.0696590393781662, "learning_rate": 1.934426229508197e-05, "loss": 0.6808267831802368, "step": 473 }, { "epoch": 0.0875841600166299, "grad_norm": 0.08259183168411255, "learning_rate": 1.9385245901639347e-05, "loss": 0.8313323855400085, "step": 474 }, { "epoch": 0.08776893672552574, "grad_norm": 0.10918860882520676, "learning_rate": 1.9426229508196722e-05, "loss": 1.0069092512130737, "step": 475 }, { "epoch": 0.08795371343442158, "grad_norm": 0.09052850306034088, "learning_rate": 1.94672131147541e-05, "loss": 0.7818104028701782, "step": 476 }, { "epoch": 0.08813849014331744, "grad_norm": 0.10562017560005188, "learning_rate": 1.9508196721311475e-05, "loss": 0.8447665572166443, "step": 477 }, { "epoch": 0.08832326685221328, "grad_norm": 0.10511931777000427, "learning_rate": 1.9549180327868854e-05, "loss": 1.037174940109253, "step": 478 }, { "epoch": 0.08850804356110913, "grad_norm": 0.11579558998346329, "learning_rate": 1.9590163934426232e-05, "loss": 0.9202028512954712, "step": 479 }, { "epoch": 0.08869282027000497, "grad_norm": 0.10364022850990295, "learning_rate": 1.9631147540983607e-05, "loss": 0.9401568174362183, "step": 480 }, { "epoch": 0.08887759697890081, "grad_norm": 0.08251364529132843, "learning_rate": 1.9672131147540985e-05, "loss": 0.5896936655044556, "step": 481 }, { "epoch": 0.08906237368779665, "grad_norm": 0.10294211655855179, "learning_rate": 1.971311475409836e-05, "loss": 0.7742730379104614, "step": 482 }, { "epoch": 0.0892471503966925, "grad_norm": 0.11383319646120071, "learning_rate": 1.975409836065574e-05, "loss": 1.1207690238952637, "step": 483 }, { "epoch": 0.08943192710558834, "grad_norm": 0.09466154128313065, "learning_rate": 1.9795081967213117e-05, "loss": 0.7100986838340759, "step": 484 }, { "epoch": 0.08961670381448418, "grad_norm": 0.10002315789461136, "learning_rate": 1.9836065573770492e-05, "loss": 0.967641294002533, "step": 485 }, { "epoch": 0.08980148052338002, "grad_norm": 0.1255611926317215, "learning_rate": 1.987704918032787e-05, "loss": 0.9620874524116516, "step": 486 }, { "epoch": 0.08998625723227587, "grad_norm": 0.09591948240995407, "learning_rate": 1.991803278688525e-05, "loss": 0.9446707367897034, "step": 487 }, { "epoch": 0.09017103394117172, "grad_norm": 0.11456209421157837, "learning_rate": 1.9959016393442627e-05, "loss": 1.0979769229888916, "step": 488 }, { "epoch": 0.09035581065006756, "grad_norm": 0.10334254056215286, "learning_rate": 2e-05, "loss": 0.9240891337394714, "step": 489 }, { "epoch": 0.0905405873589634, "grad_norm": 0.09890230000019073, "learning_rate": 1.9999999801015645e-05, "loss": 0.8223688006401062, "step": 490 }, { "epoch": 0.09072536406785925, "grad_norm": 0.10173583775758743, "learning_rate": 1.9999999204062582e-05, "loss": 0.8781738877296448, "step": 491 }, { "epoch": 0.09091014077675509, "grad_norm": 0.09750431776046753, "learning_rate": 1.9999998209140837e-05, "loss": 0.8199716806411743, "step": 492 }, { "epoch": 0.09109491748565093, "grad_norm": 0.09942717105150223, "learning_rate": 1.999999681625045e-05, "loss": 0.7402327656745911, "step": 493 }, { "epoch": 0.09127969419454678, "grad_norm": 0.09399393945932388, "learning_rate": 1.999999502539147e-05, "loss": 0.9684867262840271, "step": 494 }, { "epoch": 0.09146447090344262, "grad_norm": 0.08668413758277893, "learning_rate": 1.999999283656398e-05, "loss": 0.6834291219711304, "step": 495 }, { "epoch": 0.09164924761233846, "grad_norm": 0.10017100721597672, "learning_rate": 1.999999024976806e-05, "loss": 0.9483546018600464, "step": 496 }, { "epoch": 0.0918340243212343, "grad_norm": 0.0832497626543045, "learning_rate": 1.9999987265003815e-05, "loss": 0.5636377930641174, "step": 497 }, { "epoch": 0.09201880103013015, "grad_norm": 0.10710781812667847, "learning_rate": 1.999998388227136e-05, "loss": 0.8883796334266663, "step": 498 }, { "epoch": 0.092203577739026, "grad_norm": 0.10211720317602158, "learning_rate": 1.9999980101570835e-05, "loss": 0.7937446236610413, "step": 499 }, { "epoch": 0.09238835444792184, "grad_norm": 0.08369703590869904, "learning_rate": 1.9999975922902386e-05, "loss": 0.7668294310569763, "step": 500 }, { "epoch": 0.09238835444792184, "eval_loss": 0.9341711401939392, "eval_runtime": 170.5358, "eval_samples_per_second": 106.893, "eval_steps_per_second": 13.364, "step": 500 }, { "epoch": 0.09257313115681769, "grad_norm": 0.10618746280670166, "learning_rate": 1.999997134626618e-05, "loss": 0.7023143768310547, "step": 501 }, { "epoch": 0.09275790786571353, "grad_norm": 0.09422959387302399, "learning_rate": 1.9999966371662403e-05, "loss": 0.9204212427139282, "step": 502 }, { "epoch": 0.09294268457460937, "grad_norm": 0.09210536628961563, "learning_rate": 1.999996099909125e-05, "loss": 0.8752090334892273, "step": 503 }, { "epoch": 0.09312746128350521, "grad_norm": 0.10762644559144974, "learning_rate": 1.9999955228552934e-05, "loss": 0.8156858682632446, "step": 504 }, { "epoch": 0.09331223799240106, "grad_norm": 0.12836356461048126, "learning_rate": 1.999994906004769e-05, "loss": 1.0994911193847656, "step": 505 }, { "epoch": 0.0934970147012969, "grad_norm": 0.12129420787096024, "learning_rate": 1.9999942493575754e-05, "loss": 0.8530201315879822, "step": 506 }, { "epoch": 0.09368179141019274, "grad_norm": 0.0979141965508461, "learning_rate": 1.9999935529137393e-05, "loss": 0.7191640138626099, "step": 507 }, { "epoch": 0.09386656811908858, "grad_norm": 0.08052355796098709, "learning_rate": 1.9999928166732884e-05, "loss": 0.9695467948913574, "step": 508 }, { "epoch": 0.09405134482798443, "grad_norm": 0.09615960717201233, "learning_rate": 1.999992040636252e-05, "loss": 0.7721450328826904, "step": 509 }, { "epoch": 0.09423612153688028, "grad_norm": 0.12196368724107742, "learning_rate": 1.9999912248026613e-05, "loss": 0.9920752644538879, "step": 510 }, { "epoch": 0.09442089824577612, "grad_norm": 0.08245757967233658, "learning_rate": 1.9999903691725478e-05, "loss": 0.8075022101402283, "step": 511 }, { "epoch": 0.09460567495467197, "grad_norm": 0.11245324462652206, "learning_rate": 1.9999894737459466e-05, "loss": 0.8487539291381836, "step": 512 }, { "epoch": 0.09479045166356781, "grad_norm": 0.08481521904468536, "learning_rate": 1.9999885385228928e-05, "loss": 0.6565818786621094, "step": 513 }, { "epoch": 0.09497522837246365, "grad_norm": 0.11278005689382553, "learning_rate": 1.9999875635034237e-05, "loss": 0.8271149396896362, "step": 514 }, { "epoch": 0.0951600050813595, "grad_norm": 0.09750741720199585, "learning_rate": 1.9999865486875784e-05, "loss": 0.8503895401954651, "step": 515 }, { "epoch": 0.09534478179025534, "grad_norm": 0.11001479625701904, "learning_rate": 1.9999854940753964e-05, "loss": 0.9437558054924011, "step": 516 }, { "epoch": 0.09552955849915118, "grad_norm": 0.12122728675603867, "learning_rate": 1.999984399666921e-05, "loss": 0.9913941025733948, "step": 517 }, { "epoch": 0.09571433520804702, "grad_norm": 0.11051741242408752, "learning_rate": 1.9999832654621945e-05, "loss": 0.9222496747970581, "step": 518 }, { "epoch": 0.09589911191694286, "grad_norm": 0.08907787501811981, "learning_rate": 1.999982091461263e-05, "loss": 0.6398655772209167, "step": 519 }, { "epoch": 0.0960838886258387, "grad_norm": 0.10980657488107681, "learning_rate": 1.9999808776641724e-05, "loss": 1.0253318548202515, "step": 520 }, { "epoch": 0.09626866533473456, "grad_norm": 0.09272947907447815, "learning_rate": 1.9999796240709718e-05, "loss": 0.8119353652000427, "step": 521 }, { "epoch": 0.0964534420436304, "grad_norm": 0.07884709537029266, "learning_rate": 1.9999783306817104e-05, "loss": 0.6862602829933167, "step": 522 }, { "epoch": 0.09663821875252625, "grad_norm": 0.08118434995412827, "learning_rate": 1.99997699749644e-05, "loss": 0.580864429473877, "step": 523 }, { "epoch": 0.09682299546142209, "grad_norm": 0.09141118079423904, "learning_rate": 1.999975624515214e-05, "loss": 0.6712226271629333, "step": 524 }, { "epoch": 0.09700777217031793, "grad_norm": 0.11130113899707794, "learning_rate": 1.9999742117380863e-05, "loss": 0.8245023488998413, "step": 525 }, { "epoch": 0.09719254887921377, "grad_norm": 0.08998782932758331, "learning_rate": 1.9999727591651136e-05, "loss": 0.7681167125701904, "step": 526 }, { "epoch": 0.09737732558810962, "grad_norm": 0.10016212612390518, "learning_rate": 1.9999712667963535e-05, "loss": 0.8970088958740234, "step": 527 }, { "epoch": 0.09756210229700546, "grad_norm": 0.0988573282957077, "learning_rate": 1.9999697346318653e-05, "loss": 1.0120849609375, "step": 528 }, { "epoch": 0.0977468790059013, "grad_norm": 0.0811084657907486, "learning_rate": 1.9999681626717105e-05, "loss": 0.6222402453422546, "step": 529 }, { "epoch": 0.09793165571479714, "grad_norm": 0.11052072793245316, "learning_rate": 1.9999665509159513e-05, "loss": 0.8718100786209106, "step": 530 }, { "epoch": 0.09811643242369299, "grad_norm": 0.10513053089380264, "learning_rate": 1.999964899364652e-05, "loss": 0.8752185702323914, "step": 531 }, { "epoch": 0.09830120913258884, "grad_norm": 0.09590096026659012, "learning_rate": 1.999963208017878e-05, "loss": 0.9732754230499268, "step": 532 }, { "epoch": 0.09848598584148469, "grad_norm": 0.07648955285549164, "learning_rate": 1.9999614768756968e-05, "loss": 0.5848802924156189, "step": 533 }, { "epoch": 0.09867076255038053, "grad_norm": 0.09366374462842941, "learning_rate": 1.9999597059381773e-05, "loss": 0.7161018252372742, "step": 534 }, { "epoch": 0.09885553925927637, "grad_norm": 0.10417844355106354, "learning_rate": 1.9999578952053896e-05, "loss": 0.7374823689460754, "step": 535 }, { "epoch": 0.09904031596817221, "grad_norm": 0.07650736719369888, "learning_rate": 1.999956044677407e-05, "loss": 0.678923487663269, "step": 536 }, { "epoch": 0.09922509267706806, "grad_norm": 0.07849035412073135, "learning_rate": 1.9999541543543017e-05, "loss": 0.73483806848526, "step": 537 }, { "epoch": 0.0994098693859639, "grad_norm": 0.09365969151258469, "learning_rate": 1.9999522242361494e-05, "loss": 0.6670161485671997, "step": 538 }, { "epoch": 0.09959464609485974, "grad_norm": 0.08298808336257935, "learning_rate": 1.9999502543230272e-05, "loss": 0.7895097136497498, "step": 539 }, { "epoch": 0.09977942280375558, "grad_norm": 0.09531105309724808, "learning_rate": 1.9999482446150137e-05, "loss": 0.7203987240791321, "step": 540 }, { "epoch": 0.09996419951265142, "grad_norm": 0.10739698261022568, "learning_rate": 1.999946195112188e-05, "loss": 0.8619695901870728, "step": 541 }, { "epoch": 0.10014897622154727, "grad_norm": 0.10465580970048904, "learning_rate": 1.9999441058146324e-05, "loss": 1.1077197790145874, "step": 542 }, { "epoch": 0.10033375293044312, "grad_norm": 0.11346356570720673, "learning_rate": 1.9999419767224296e-05, "loss": 1.1599786281585693, "step": 543 }, { "epoch": 0.10051852963933897, "grad_norm": 0.09236248582601547, "learning_rate": 1.9999398078356648e-05, "loss": 0.8175798654556274, "step": 544 }, { "epoch": 0.10070330634823481, "grad_norm": 0.07506538927555084, "learning_rate": 1.9999375991544237e-05, "loss": 0.7683030962944031, "step": 545 }, { "epoch": 0.10088808305713065, "grad_norm": 0.08747687190771103, "learning_rate": 1.999935350678795e-05, "loss": 0.7463568449020386, "step": 546 }, { "epoch": 0.1010728597660265, "grad_norm": 0.11351469159126282, "learning_rate": 1.9999330624088677e-05, "loss": 0.9395483136177063, "step": 547 }, { "epoch": 0.10125763647492234, "grad_norm": 0.09075150638818741, "learning_rate": 1.9999307343447326e-05, "loss": 0.854822039604187, "step": 548 }, { "epoch": 0.10144241318381818, "grad_norm": 0.08667241036891937, "learning_rate": 1.9999283664864828e-05, "loss": 0.698087215423584, "step": 549 }, { "epoch": 0.10162718989271402, "grad_norm": 0.07823988050222397, "learning_rate": 1.9999259588342124e-05, "loss": 0.7376258969306946, "step": 550 }, { "epoch": 0.10181196660160986, "grad_norm": 0.10926726460456848, "learning_rate": 1.999923511388017e-05, "loss": 1.0011591911315918, "step": 551 }, { "epoch": 0.1019967433105057, "grad_norm": 0.1054510846734047, "learning_rate": 1.9999210241479946e-05, "loss": 1.009000301361084, "step": 552 }, { "epoch": 0.10218152001940155, "grad_norm": 0.08677765727043152, "learning_rate": 1.9999184971142433e-05, "loss": 0.6511209011077881, "step": 553 }, { "epoch": 0.1023662967282974, "grad_norm": 0.09379676729440689, "learning_rate": 1.9999159302868646e-05, "loss": 0.9610671997070312, "step": 554 }, { "epoch": 0.10255107343719325, "grad_norm": 0.09060139954090118, "learning_rate": 1.99991332366596e-05, "loss": 0.883156955242157, "step": 555 }, { "epoch": 0.10273585014608909, "grad_norm": 0.10240011662244797, "learning_rate": 1.9999106772516334e-05, "loss": 1.0079379081726074, "step": 556 }, { "epoch": 0.10292062685498493, "grad_norm": 0.09460246562957764, "learning_rate": 1.9999079910439905e-05, "loss": 0.8006080985069275, "step": 557 }, { "epoch": 0.10310540356388077, "grad_norm": 0.09683685004711151, "learning_rate": 1.9999052650431374e-05, "loss": 0.8120759129524231, "step": 558 }, { "epoch": 0.10329018027277662, "grad_norm": 0.1001666709780693, "learning_rate": 1.9999024992491837e-05, "loss": 0.951696515083313, "step": 559 }, { "epoch": 0.10347495698167246, "grad_norm": 0.08484689146280289, "learning_rate": 1.999899693662238e-05, "loss": 0.6899352073669434, "step": 560 }, { "epoch": 0.1036597336905683, "grad_norm": 0.10029439628124237, "learning_rate": 1.9998968482824134e-05, "loss": 0.9168360829353333, "step": 561 }, { "epoch": 0.10384451039946414, "grad_norm": 0.09970587491989136, "learning_rate": 1.999893963109822e-05, "loss": 0.9419733285903931, "step": 562 }, { "epoch": 0.10402928710835999, "grad_norm": 0.0891055092215538, "learning_rate": 1.9998910381445794e-05, "loss": 0.762864351272583, "step": 563 }, { "epoch": 0.10421406381725583, "grad_norm": 0.0846485048532486, "learning_rate": 1.999888073386802e-05, "loss": 0.7737445831298828, "step": 564 }, { "epoch": 0.10439884052615168, "grad_norm": 0.08326343446969986, "learning_rate": 1.999885068836607e-05, "loss": 0.7202712893486023, "step": 565 }, { "epoch": 0.10458361723504753, "grad_norm": 0.08514825999736786, "learning_rate": 1.999882024494115e-05, "loss": 0.8478882312774658, "step": 566 }, { "epoch": 0.10476839394394337, "grad_norm": 0.07370934635400772, "learning_rate": 1.9998789403594464e-05, "loss": 0.680758535861969, "step": 567 }, { "epoch": 0.10495317065283921, "grad_norm": 0.09160866588354111, "learning_rate": 1.9998758164327242e-05, "loss": 0.8851611018180847, "step": 568 }, { "epoch": 0.10513794736173505, "grad_norm": 0.08800017088651657, "learning_rate": 1.999872652714073e-05, "loss": 0.7214294075965881, "step": 569 }, { "epoch": 0.1053227240706309, "grad_norm": 0.07336652278900146, "learning_rate": 1.999869449203618e-05, "loss": 0.6500799655914307, "step": 570 }, { "epoch": 0.10550750077952674, "grad_norm": 0.09790507704019547, "learning_rate": 1.9998662059014874e-05, "loss": 0.9606900811195374, "step": 571 }, { "epoch": 0.10569227748842258, "grad_norm": 0.08473934978246689, "learning_rate": 1.99986292280781e-05, "loss": 0.7611656785011292, "step": 572 }, { "epoch": 0.10587705419731842, "grad_norm": 0.08579279482364655, "learning_rate": 1.999859599922716e-05, "loss": 0.6954261660575867, "step": 573 }, { "epoch": 0.10606183090621427, "grad_norm": 0.08276309818029404, "learning_rate": 1.9998562372463387e-05, "loss": 0.6657392978668213, "step": 574 }, { "epoch": 0.10624660761511011, "grad_norm": 0.07595104724168777, "learning_rate": 1.9998528347788108e-05, "loss": 0.7012154459953308, "step": 575 }, { "epoch": 0.10643138432400596, "grad_norm": 0.09763680398464203, "learning_rate": 1.9998493925202686e-05, "loss": 0.8681825399398804, "step": 576 }, { "epoch": 0.10661616103290181, "grad_norm": 0.09107891470193863, "learning_rate": 1.9998459104708485e-05, "loss": 0.8562954664230347, "step": 577 }, { "epoch": 0.10680093774179765, "grad_norm": 0.10249120742082596, "learning_rate": 1.999842388630689e-05, "loss": 0.8481846451759338, "step": 578 }, { "epoch": 0.10698571445069349, "grad_norm": 0.09755781292915344, "learning_rate": 1.999838826999931e-05, "loss": 0.7242237329483032, "step": 579 }, { "epoch": 0.10717049115958933, "grad_norm": 0.06296950578689575, "learning_rate": 1.9998352255787155e-05, "loss": 0.6511966586112976, "step": 580 }, { "epoch": 0.10735526786848518, "grad_norm": 0.08668170869350433, "learning_rate": 1.9998315843671862e-05, "loss": 0.8875470161437988, "step": 581 }, { "epoch": 0.10754004457738102, "grad_norm": 0.10093273967504501, "learning_rate": 1.9998279033654883e-05, "loss": 0.8163881301879883, "step": 582 }, { "epoch": 0.10772482128627686, "grad_norm": 0.08768948167562485, "learning_rate": 1.9998241825737675e-05, "loss": 0.7841170430183411, "step": 583 }, { "epoch": 0.1079095979951727, "grad_norm": 0.10083579272031784, "learning_rate": 1.9998204219921722e-05, "loss": 0.8083724975585938, "step": 584 }, { "epoch": 0.10809437470406855, "grad_norm": 0.0848889872431755, "learning_rate": 1.9998166216208522e-05, "loss": 0.6563320159912109, "step": 585 }, { "epoch": 0.10827915141296439, "grad_norm": 0.08785545080900192, "learning_rate": 1.999812781459959e-05, "loss": 0.9830499291419983, "step": 586 }, { "epoch": 0.10846392812186025, "grad_norm": 0.11273134499788284, "learning_rate": 1.9998089015096445e-05, "loss": 1.1327345371246338, "step": 587 }, { "epoch": 0.10864870483075609, "grad_norm": 0.10496936738491058, "learning_rate": 1.999804981770064e-05, "loss": 0.9502683281898499, "step": 588 }, { "epoch": 0.10883348153965193, "grad_norm": 0.07698635756969452, "learning_rate": 1.9998010222413736e-05, "loss": 0.5978910326957703, "step": 589 }, { "epoch": 0.10901825824854777, "grad_norm": 0.11628872156143188, "learning_rate": 1.9997970229237302e-05, "loss": 0.9122211337089539, "step": 590 }, { "epoch": 0.10920303495744361, "grad_norm": 0.10752473771572113, "learning_rate": 1.9997929838172935e-05, "loss": 1.009599208831787, "step": 591 }, { "epoch": 0.10938781166633946, "grad_norm": 0.10253801196813583, "learning_rate": 1.9997889049222233e-05, "loss": 0.8574459552764893, "step": 592 }, { "epoch": 0.1095725883752353, "grad_norm": 0.09937529265880585, "learning_rate": 1.999784786238683e-05, "loss": 0.8006525039672852, "step": 593 }, { "epoch": 0.10975736508413114, "grad_norm": 0.10417422652244568, "learning_rate": 1.9997806277668364e-05, "loss": 1.0326251983642578, "step": 594 }, { "epoch": 0.10994214179302698, "grad_norm": 0.08065392822027206, "learning_rate": 1.9997764295068486e-05, "loss": 0.7957016825675964, "step": 595 }, { "epoch": 0.11012691850192283, "grad_norm": 0.12417735159397125, "learning_rate": 1.9997721914588867e-05, "loss": 1.0323731899261475, "step": 596 }, { "epoch": 0.11031169521081868, "grad_norm": 0.08042848110198975, "learning_rate": 1.9997679136231195e-05, "loss": 0.8618655204772949, "step": 597 }, { "epoch": 0.11049647191971453, "grad_norm": 0.12250402569770813, "learning_rate": 1.999763595999717e-05, "loss": 1.1520699262619019, "step": 598 }, { "epoch": 0.11068124862861037, "grad_norm": 0.08132418990135193, "learning_rate": 1.9997592385888517e-05, "loss": 0.7422507405281067, "step": 599 }, { "epoch": 0.11086602533750621, "grad_norm": 0.10508530586957932, "learning_rate": 1.9997548413906964e-05, "loss": 0.7894071340560913, "step": 600 }, { "epoch": 0.11105080204640205, "grad_norm": 0.10218667984008789, "learning_rate": 1.999750404405426e-05, "loss": 0.8501331210136414, "step": 601 }, { "epoch": 0.1112355787552979, "grad_norm": 0.10211113095283508, "learning_rate": 1.9997459276332174e-05, "loss": 0.9509809017181396, "step": 602 }, { "epoch": 0.11142035546419374, "grad_norm": 0.10265897959470749, "learning_rate": 1.9997414110742488e-05, "loss": 1.0141488313674927, "step": 603 }, { "epoch": 0.11160513217308958, "grad_norm": 0.10459209978580475, "learning_rate": 1.9997368547286996e-05, "loss": 0.9121615886688232, "step": 604 }, { "epoch": 0.11178990888198542, "grad_norm": 0.09054961055517197, "learning_rate": 1.9997322585967516e-05, "loss": 0.808859646320343, "step": 605 }, { "epoch": 0.11197468559088126, "grad_norm": 0.09328276664018631, "learning_rate": 1.9997276226785872e-05, "loss": 0.8714438080787659, "step": 606 }, { "epoch": 0.11215946229977711, "grad_norm": 0.10548422485589981, "learning_rate": 1.9997229469743915e-05, "loss": 0.9935821294784546, "step": 607 }, { "epoch": 0.11234423900867296, "grad_norm": 0.09083138406276703, "learning_rate": 1.99971823148435e-05, "loss": 0.7940509915351868, "step": 608 }, { "epoch": 0.1125290157175688, "grad_norm": 0.11800063401460648, "learning_rate": 1.999713476208651e-05, "loss": 1.1958283185958862, "step": 609 }, { "epoch": 0.11271379242646465, "grad_norm": 0.10174321383237839, "learning_rate": 1.999708681147483e-05, "loss": 0.7750235199928284, "step": 610 }, { "epoch": 0.11289856913536049, "grad_norm": 0.07584797590970993, "learning_rate": 1.9997038463010373e-05, "loss": 0.632562518119812, "step": 611 }, { "epoch": 0.11308334584425633, "grad_norm": 0.08101744204759598, "learning_rate": 1.999698971669506e-05, "loss": 0.5663021206855774, "step": 612 }, { "epoch": 0.11326812255315218, "grad_norm": 0.09835753589868546, "learning_rate": 1.999694057253083e-05, "loss": 0.905450165271759, "step": 613 }, { "epoch": 0.11345289926204802, "grad_norm": 0.09858648478984833, "learning_rate": 1.999689103051965e-05, "loss": 0.92292320728302, "step": 614 }, { "epoch": 0.11363767597094386, "grad_norm": 0.06762873381376266, "learning_rate": 1.9996841090663476e-05, "loss": 0.5619035363197327, "step": 615 }, { "epoch": 0.1138224526798397, "grad_norm": 0.09497879445552826, "learning_rate": 1.9996790752964305e-05, "loss": 0.7997409701347351, "step": 616 }, { "epoch": 0.11400722938873555, "grad_norm": 0.09728559106588364, "learning_rate": 1.9996740017424143e-05, "loss": 1.0197670459747314, "step": 617 }, { "epoch": 0.11419200609763139, "grad_norm": 0.09417632222175598, "learning_rate": 1.9996688884044995e-05, "loss": 0.8190154433250427, "step": 618 }, { "epoch": 0.11437678280652724, "grad_norm": 0.08123410493135452, "learning_rate": 1.999663735282891e-05, "loss": 0.7777723670005798, "step": 619 }, { "epoch": 0.11456155951542309, "grad_norm": 0.08742178231477737, "learning_rate": 1.9996585423777936e-05, "loss": 0.7551705837249756, "step": 620 }, { "epoch": 0.11474633622431893, "grad_norm": 0.07352913916110992, "learning_rate": 1.9996533096894133e-05, "loss": 0.6663444638252258, "step": 621 }, { "epoch": 0.11493111293321477, "grad_norm": 0.07812623679637909, "learning_rate": 1.999648037217959e-05, "loss": 0.7678573131561279, "step": 622 }, { "epoch": 0.11511588964211061, "grad_norm": 0.0896257683634758, "learning_rate": 1.9996427249636403e-05, "loss": 0.7776379585266113, "step": 623 }, { "epoch": 0.11530066635100646, "grad_norm": 0.09168458729982376, "learning_rate": 1.9996373729266687e-05, "loss": 0.828106701374054, "step": 624 }, { "epoch": 0.1154854430599023, "grad_norm": 0.09804865717887878, "learning_rate": 1.999631981107257e-05, "loss": 0.8594550490379333, "step": 625 }, { "epoch": 0.11567021976879814, "grad_norm": 0.11177453398704529, "learning_rate": 1.99962654950562e-05, "loss": 0.9018521904945374, "step": 626 }, { "epoch": 0.11585499647769398, "grad_norm": 0.10924191772937775, "learning_rate": 1.9996210781219738e-05, "loss": 0.9099810719490051, "step": 627 }, { "epoch": 0.11603977318658983, "grad_norm": 0.08882026374340057, "learning_rate": 1.999615566956536e-05, "loss": 0.7561041116714478, "step": 628 }, { "epoch": 0.11622454989548567, "grad_norm": 0.1086808294057846, "learning_rate": 1.999610016009526e-05, "loss": 1.036689043045044, "step": 629 }, { "epoch": 0.11640932660438152, "grad_norm": 0.08189629763364792, "learning_rate": 1.9996044252811647e-05, "loss": 0.7450594305992126, "step": 630 }, { "epoch": 0.11659410331327737, "grad_norm": 0.08234315365552902, "learning_rate": 1.9995987947716746e-05, "loss": 0.6694872379302979, "step": 631 }, { "epoch": 0.11677888002217321, "grad_norm": 0.07986690104007721, "learning_rate": 1.99959312448128e-05, "loss": 0.6215049028396606, "step": 632 }, { "epoch": 0.11696365673106905, "grad_norm": 0.08140669018030167, "learning_rate": 1.9995874144102065e-05, "loss": 0.6296094655990601, "step": 633 }, { "epoch": 0.1171484334399649, "grad_norm": 0.07857182621955872, "learning_rate": 1.9995816645586808e-05, "loss": 0.7098954916000366, "step": 634 }, { "epoch": 0.11733321014886074, "grad_norm": 0.10621404647827148, "learning_rate": 1.9995758749269324e-05, "loss": 1.011143445968628, "step": 635 }, { "epoch": 0.11751798685775658, "grad_norm": 0.09262366592884064, "learning_rate": 1.9995700455151913e-05, "loss": 0.7535731792449951, "step": 636 }, { "epoch": 0.11770276356665242, "grad_norm": 0.07685394585132599, "learning_rate": 1.99956417632369e-05, "loss": 0.7163308262825012, "step": 637 }, { "epoch": 0.11788754027554826, "grad_norm": 0.10388977080583572, "learning_rate": 1.9995582673526613e-05, "loss": 0.8714351654052734, "step": 638 }, { "epoch": 0.1180723169844441, "grad_norm": 0.1180369183421135, "learning_rate": 1.999552318602341e-05, "loss": 1.1385008096694946, "step": 639 }, { "epoch": 0.11825709369333995, "grad_norm": 0.08246862888336182, "learning_rate": 1.9995463300729653e-05, "loss": 0.6757149696350098, "step": 640 }, { "epoch": 0.1184418704022358, "grad_norm": 0.116151362657547, "learning_rate": 1.999540301764773e-05, "loss": 1.0220239162445068, "step": 641 }, { "epoch": 0.11862664711113165, "grad_norm": 0.09925840049982071, "learning_rate": 1.9995342336780042e-05, "loss": 0.8448605537414551, "step": 642 }, { "epoch": 0.11881142382002749, "grad_norm": 0.08795686811208725, "learning_rate": 1.9995281258128994e-05, "loss": 0.9142518043518066, "step": 643 }, { "epoch": 0.11899620052892333, "grad_norm": 0.07408778369426727, "learning_rate": 1.999521978169703e-05, "loss": 0.8021790385246277, "step": 644 }, { "epoch": 0.11918097723781917, "grad_norm": 0.08842435479164124, "learning_rate": 1.9995157907486587e-05, "loss": 0.994310736656189, "step": 645 }, { "epoch": 0.11936575394671502, "grad_norm": 0.10104726999998093, "learning_rate": 1.999509563550013e-05, "loss": 1.126815915107727, "step": 646 }, { "epoch": 0.11955053065561086, "grad_norm": 0.1172916516661644, "learning_rate": 1.9995032965740137e-05, "loss": 1.0974340438842773, "step": 647 }, { "epoch": 0.1197353073645067, "grad_norm": 0.08660288900136948, "learning_rate": 1.9994969898209102e-05, "loss": 0.7137119770050049, "step": 648 }, { "epoch": 0.11992008407340254, "grad_norm": 0.09413591772317886, "learning_rate": 1.9994906432909537e-05, "loss": 0.8359218239784241, "step": 649 }, { "epoch": 0.12010486078229839, "grad_norm": 0.07111218571662903, "learning_rate": 1.9994842569843965e-05, "loss": 0.6207714676856995, "step": 650 }, { "epoch": 0.12028963749119423, "grad_norm": 0.07730361819267273, "learning_rate": 1.999477830901493e-05, "loss": 0.6430050134658813, "step": 651 }, { "epoch": 0.12047441420009009, "grad_norm": 0.09931548684835434, "learning_rate": 1.9994713650424985e-05, "loss": 0.8501662611961365, "step": 652 }, { "epoch": 0.12065919090898593, "grad_norm": 0.12040199339389801, "learning_rate": 1.9994648594076706e-05, "loss": 0.983227014541626, "step": 653 }, { "epoch": 0.12084396761788177, "grad_norm": 0.10506908595561981, "learning_rate": 1.9994583139972686e-05, "loss": 0.8017892241477966, "step": 654 }, { "epoch": 0.12102874432677761, "grad_norm": 0.0854715034365654, "learning_rate": 1.9994517288115522e-05, "loss": 0.7763564586639404, "step": 655 }, { "epoch": 0.12121352103567345, "grad_norm": 0.10104908049106598, "learning_rate": 1.999445103850784e-05, "loss": 0.8201025128364563, "step": 656 }, { "epoch": 0.1213982977445693, "grad_norm": 0.1063561663031578, "learning_rate": 1.9994384391152276e-05, "loss": 0.9748373031616211, "step": 657 }, { "epoch": 0.12158307445346514, "grad_norm": 0.08973393589258194, "learning_rate": 1.999431734605148e-05, "loss": 0.7709097862243652, "step": 658 }, { "epoch": 0.12176785116236098, "grad_norm": 0.09718851745128632, "learning_rate": 1.9994249903208125e-05, "loss": 0.8377055525779724, "step": 659 }, { "epoch": 0.12195262787125682, "grad_norm": 0.10766126215457916, "learning_rate": 1.999418206262489e-05, "loss": 0.9289681315422058, "step": 660 }, { "epoch": 0.12213740458015267, "grad_norm": 0.09482572227716446, "learning_rate": 1.9994113824304476e-05, "loss": 0.8877840638160706, "step": 661 }, { "epoch": 0.12232218128904851, "grad_norm": 0.06976597756147385, "learning_rate": 1.99940451882496e-05, "loss": 0.5023065209388733, "step": 662 }, { "epoch": 0.12250695799794437, "grad_norm": 0.07548683881759644, "learning_rate": 1.9993976154462997e-05, "loss": 0.7584120631217957, "step": 663 }, { "epoch": 0.12269173470684021, "grad_norm": 0.09401997923851013, "learning_rate": 1.9993906722947406e-05, "loss": 0.8495917320251465, "step": 664 }, { "epoch": 0.12287651141573605, "grad_norm": 0.0940614566206932, "learning_rate": 1.9993836893705594e-05, "loss": 0.8613463640213013, "step": 665 }, { "epoch": 0.12306128812463189, "grad_norm": 0.0905088260769844, "learning_rate": 1.999376666674034e-05, "loss": 0.7595023512840271, "step": 666 }, { "epoch": 0.12324606483352774, "grad_norm": 0.08580617606639862, "learning_rate": 1.9993696042054437e-05, "loss": 0.9611132144927979, "step": 667 }, { "epoch": 0.12343084154242358, "grad_norm": 0.09921999275684357, "learning_rate": 1.9993625019650703e-05, "loss": 0.7061740756034851, "step": 668 }, { "epoch": 0.12361561825131942, "grad_norm": 0.09604513645172119, "learning_rate": 1.999355359953196e-05, "loss": 0.8366731405258179, "step": 669 }, { "epoch": 0.12380039496021526, "grad_norm": 0.08084668219089508, "learning_rate": 1.9993481781701044e-05, "loss": 0.6480671167373657, "step": 670 }, { "epoch": 0.1239851716691111, "grad_norm": 0.07487687468528748, "learning_rate": 1.9993409566160822e-05, "loss": 0.6911617517471313, "step": 671 }, { "epoch": 0.12416994837800695, "grad_norm": 0.09577896445989609, "learning_rate": 1.9993336952914165e-05, "loss": 0.9893839359283447, "step": 672 }, { "epoch": 0.12435472508690279, "grad_norm": 0.10856965184211731, "learning_rate": 1.999326394196396e-05, "loss": 0.9420531392097473, "step": 673 }, { "epoch": 0.12453950179579865, "grad_norm": 0.1150006502866745, "learning_rate": 1.9993190533313116e-05, "loss": 0.9741981029510498, "step": 674 }, { "epoch": 0.12472427850469449, "grad_norm": 0.10523250699043274, "learning_rate": 1.9993116726964554e-05, "loss": 0.9235258102416992, "step": 675 }, { "epoch": 0.12490905521359033, "grad_norm": 0.07580921053886414, "learning_rate": 1.9993042522921212e-05, "loss": 0.635983943939209, "step": 676 }, { "epoch": 0.12509383192248616, "grad_norm": 0.0988108292222023, "learning_rate": 1.999296792118604e-05, "loss": 0.740143358707428, "step": 677 }, { "epoch": 0.12527860863138202, "grad_norm": 0.10186348855495453, "learning_rate": 1.9992892921762007e-05, "loss": 1.0258514881134033, "step": 678 }, { "epoch": 0.12546338534027784, "grad_norm": 0.09436734020709991, "learning_rate": 1.9992817524652102e-05, "loss": 0.8242838978767395, "step": 679 }, { "epoch": 0.1256481620491737, "grad_norm": 0.08851488679647446, "learning_rate": 1.9992741729859323e-05, "loss": 0.8003968000411987, "step": 680 }, { "epoch": 0.12583293875806956, "grad_norm": 0.10243381559848785, "learning_rate": 1.9992665537386687e-05, "loss": 0.7095505595207214, "step": 681 }, { "epoch": 0.12601771546696539, "grad_norm": 0.08683772385120392, "learning_rate": 1.9992588947237226e-05, "loss": 0.7240039110183716, "step": 682 }, { "epoch": 0.12620249217586124, "grad_norm": 0.07817398011684418, "learning_rate": 1.9992511959413984e-05, "loss": 0.8571678996086121, "step": 683 }, { "epoch": 0.12638726888475707, "grad_norm": 0.0839422419667244, "learning_rate": 1.9992434573920033e-05, "loss": 0.757088840007782, "step": 684 }, { "epoch": 0.12657204559365293, "grad_norm": 0.09364355355501175, "learning_rate": 1.9992356790758445e-05, "loss": 0.8351529240608215, "step": 685 }, { "epoch": 0.12675682230254876, "grad_norm": 0.10530338436365128, "learning_rate": 1.999227860993232e-05, "loss": 0.9548841118812561, "step": 686 }, { "epoch": 0.1269415990114446, "grad_norm": 0.1037643700838089, "learning_rate": 1.9992200031444768e-05, "loss": 0.7979400753974915, "step": 687 }, { "epoch": 0.12712637572034044, "grad_norm": 0.10997983813285828, "learning_rate": 1.9992121055298917e-05, "loss": 1.0404939651489258, "step": 688 }, { "epoch": 0.1273111524292363, "grad_norm": 0.09578070789575577, "learning_rate": 1.9992041681497908e-05, "loss": 0.8999356031417847, "step": 689 }, { "epoch": 0.12749592913813212, "grad_norm": 0.08665809035301208, "learning_rate": 1.99919619100449e-05, "loss": 0.7991875410079956, "step": 690 }, { "epoch": 0.12768070584702798, "grad_norm": 0.1182815432548523, "learning_rate": 1.9991881740943072e-05, "loss": 1.1467589139938354, "step": 691 }, { "epoch": 0.12786548255592384, "grad_norm": 0.08033980429172516, "learning_rate": 1.9991801174195612e-05, "loss": 0.7506182193756104, "step": 692 }, { "epoch": 0.12805025926481967, "grad_norm": 0.0881318673491478, "learning_rate": 1.9991720209805723e-05, "loss": 0.7702957987785339, "step": 693 }, { "epoch": 0.12823503597371552, "grad_norm": 0.09789463877677917, "learning_rate": 1.999163884777663e-05, "loss": 0.9726911187171936, "step": 694 }, { "epoch": 0.12841981268261135, "grad_norm": 0.08305973559617996, "learning_rate": 1.999155708811157e-05, "loss": 0.6882860064506531, "step": 695 }, { "epoch": 0.1286045893915072, "grad_norm": 0.10566031187772751, "learning_rate": 1.99914749308138e-05, "loss": 0.9091855883598328, "step": 696 }, { "epoch": 0.12878936610040304, "grad_norm": 0.11838914453983307, "learning_rate": 1.9991392375886586e-05, "loss": 0.9343292713165283, "step": 697 }, { "epoch": 0.1289741428092989, "grad_norm": 0.10054321587085724, "learning_rate": 1.9991309423333214e-05, "loss": 0.8488895297050476, "step": 698 }, { "epoch": 0.12915891951819472, "grad_norm": 0.0989433005452156, "learning_rate": 1.9991226073156986e-05, "loss": 0.9436942934989929, "step": 699 }, { "epoch": 0.12934369622709058, "grad_norm": 0.07221207022666931, "learning_rate": 1.999114232536122e-05, "loss": 0.6067855358123779, "step": 700 }, { "epoch": 0.1295284729359864, "grad_norm": 0.07408981025218964, "learning_rate": 1.9991058179949247e-05, "loss": 0.7271761894226074, "step": 701 }, { "epoch": 0.12971324964488226, "grad_norm": 0.08090857416391373, "learning_rate": 1.9990973636924417e-05, "loss": 0.7468402981758118, "step": 702 }, { "epoch": 0.12989802635377812, "grad_norm": 0.10384667664766312, "learning_rate": 1.999088869629009e-05, "loss": 0.8260988593101501, "step": 703 }, { "epoch": 0.13008280306267395, "grad_norm": 0.06383131444454193, "learning_rate": 1.9990803358049653e-05, "loss": 0.6380683183670044, "step": 704 }, { "epoch": 0.1302675797715698, "grad_norm": 0.08272639662027359, "learning_rate": 1.99907176222065e-05, "loss": 0.761116623878479, "step": 705 }, { "epoch": 0.13045235648046563, "grad_norm": 0.10247909277677536, "learning_rate": 1.9990631488764044e-05, "loss": 0.8150011897087097, "step": 706 }, { "epoch": 0.1306371331893615, "grad_norm": 0.09713143110275269, "learning_rate": 1.9990544957725708e-05, "loss": 0.6454431414604187, "step": 707 }, { "epoch": 0.13082190989825732, "grad_norm": 0.10591935366392136, "learning_rate": 1.999045802909494e-05, "loss": 1.0341289043426514, "step": 708 }, { "epoch": 0.13100668660715317, "grad_norm": 0.10145984590053558, "learning_rate": 1.9990370702875203e-05, "loss": 0.7613344192504883, "step": 709 }, { "epoch": 0.131191463316049, "grad_norm": 0.08258189260959625, "learning_rate": 1.9990282979069962e-05, "loss": 0.701318085193634, "step": 710 }, { "epoch": 0.13137624002494486, "grad_norm": 0.07917267829179764, "learning_rate": 1.9990194857682717e-05, "loss": 0.7271844744682312, "step": 711 }, { "epoch": 0.13156101673384069, "grad_norm": 0.07668077200651169, "learning_rate": 1.9990106338716973e-05, "loss": 0.7700219750404358, "step": 712 }, { "epoch": 0.13174579344273654, "grad_norm": 0.06670837849378586, "learning_rate": 1.9990017422176247e-05, "loss": 0.6176334619522095, "step": 713 }, { "epoch": 0.1319305701516324, "grad_norm": 0.08646373450756073, "learning_rate": 1.9989928108064087e-05, "loss": 0.7595674395561218, "step": 714 }, { "epoch": 0.13211534686052823, "grad_norm": 0.10018078237771988, "learning_rate": 1.998983839638404e-05, "loss": 1.067365288734436, "step": 715 }, { "epoch": 0.13230012356942408, "grad_norm": 0.06973787397146225, "learning_rate": 1.998974828713968e-05, "loss": 0.7161954641342163, "step": 716 }, { "epoch": 0.1324849002783199, "grad_norm": 0.08027666807174683, "learning_rate": 1.9989657780334593e-05, "loss": 0.696490466594696, "step": 717 }, { "epoch": 0.13266967698721577, "grad_norm": 0.0808442011475563, "learning_rate": 1.998956687597238e-05, "loss": 0.8137856721878052, "step": 718 }, { "epoch": 0.1328544536961116, "grad_norm": 0.10365045815706253, "learning_rate": 1.9989475574056655e-05, "loss": 0.9946999549865723, "step": 719 }, { "epoch": 0.13303923040500745, "grad_norm": 0.09060733020305634, "learning_rate": 1.998938387459106e-05, "loss": 0.9365242719650269, "step": 720 }, { "epoch": 0.13322400711390328, "grad_norm": 0.09630218148231506, "learning_rate": 1.9989291777579238e-05, "loss": 1.0065200328826904, "step": 721 }, { "epoch": 0.13340878382279914, "grad_norm": 0.08935556560754776, "learning_rate": 1.9989199283024857e-05, "loss": 0.8397243618965149, "step": 722 }, { "epoch": 0.133593560531695, "grad_norm": 0.08932442218065262, "learning_rate": 1.9989106390931595e-05, "loss": 0.8106871843338013, "step": 723 }, { "epoch": 0.13377833724059082, "grad_norm": 0.10723939538002014, "learning_rate": 1.998901310130315e-05, "loss": 0.9284294247627258, "step": 724 }, { "epoch": 0.13396311394948668, "grad_norm": 0.09080146253108978, "learning_rate": 1.9988919414143234e-05, "loss": 0.8117637634277344, "step": 725 }, { "epoch": 0.1341478906583825, "grad_norm": 0.08735419064760208, "learning_rate": 1.998882532945558e-05, "loss": 0.6929842829704285, "step": 726 }, { "epoch": 0.13433266736727836, "grad_norm": 0.09440413117408752, "learning_rate": 1.9988730847243926e-05, "loss": 0.7941557765007019, "step": 727 }, { "epoch": 0.1345174440761742, "grad_norm": 0.08626824617385864, "learning_rate": 1.9988635967512037e-05, "loss": 0.7169809937477112, "step": 728 }, { "epoch": 0.13470222078507005, "grad_norm": 0.07833532989025116, "learning_rate": 1.998854069026369e-05, "loss": 0.6908170580863953, "step": 729 }, { "epoch": 0.13488699749396588, "grad_norm": 0.08950688689947128, "learning_rate": 1.9988445015502668e-05, "loss": 0.8975874185562134, "step": 730 }, { "epoch": 0.13507177420286173, "grad_norm": 0.09731578081846237, "learning_rate": 1.9988348943232787e-05, "loss": 0.8549961447715759, "step": 731 }, { "epoch": 0.13525655091175756, "grad_norm": 0.0867985412478447, "learning_rate": 1.9988252473457867e-05, "loss": 0.7790268659591675, "step": 732 }, { "epoch": 0.13544132762065342, "grad_norm": 0.08507269620895386, "learning_rate": 1.9988155606181747e-05, "loss": 0.7599582672119141, "step": 733 }, { "epoch": 0.13562610432954927, "grad_norm": 0.07925692200660706, "learning_rate": 1.9988058341408282e-05, "loss": 0.6674377918243408, "step": 734 }, { "epoch": 0.1358108810384451, "grad_norm": 0.10991337895393372, "learning_rate": 1.9987960679141344e-05, "loss": 0.9170887470245361, "step": 735 }, { "epoch": 0.13599565774734096, "grad_norm": 0.07741792500019073, "learning_rate": 1.998786261938482e-05, "loss": 0.7736765146255493, "step": 736 }, { "epoch": 0.1361804344562368, "grad_norm": 0.07393886893987656, "learning_rate": 1.9987764162142615e-05, "loss": 0.6246984004974365, "step": 737 }, { "epoch": 0.13636521116513264, "grad_norm": 0.0793522372841835, "learning_rate": 1.998766530741864e-05, "loss": 0.9042505621910095, "step": 738 }, { "epoch": 0.13654998787402847, "grad_norm": 0.08039207756519318, "learning_rate": 1.9987566055216833e-05, "loss": 0.6424958109855652, "step": 739 }, { "epoch": 0.13673476458292433, "grad_norm": 0.08686236292123795, "learning_rate": 1.998746640554115e-05, "loss": 0.8035511374473572, "step": 740 }, { "epoch": 0.13691954129182016, "grad_norm": 0.08280868828296661, "learning_rate": 1.9987366358395542e-05, "loss": 0.7081146240234375, "step": 741 }, { "epoch": 0.137104318000716, "grad_norm": 0.08056240528821945, "learning_rate": 1.9987265913784007e-05, "loss": 0.7108740210533142, "step": 742 }, { "epoch": 0.13728909470961184, "grad_norm": 0.08866320550441742, "learning_rate": 1.998716507171053e-05, "loss": 0.9253249764442444, "step": 743 }, { "epoch": 0.1374738714185077, "grad_norm": 0.09651792794466019, "learning_rate": 1.998706383217913e-05, "loss": 0.9384243488311768, "step": 744 }, { "epoch": 0.13765864812740355, "grad_norm": 0.0859675332903862, "learning_rate": 1.9986962195193836e-05, "loss": 0.9224919676780701, "step": 745 }, { "epoch": 0.13784342483629938, "grad_norm": 0.09163744002580643, "learning_rate": 1.998686016075869e-05, "loss": 0.6864213347434998, "step": 746 }, { "epoch": 0.13802820154519524, "grad_norm": 0.09888351708650589, "learning_rate": 1.9986757728877755e-05, "loss": 0.9802375435829163, "step": 747 }, { "epoch": 0.13821297825409107, "grad_norm": 0.0809028223156929, "learning_rate": 1.998665489955511e-05, "loss": 0.8272873759269714, "step": 748 }, { "epoch": 0.13839775496298692, "grad_norm": 0.08693183958530426, "learning_rate": 1.998655167279484e-05, "loss": 0.7850380539894104, "step": 749 }, { "epoch": 0.13858253167188275, "grad_norm": 0.10577927529811859, "learning_rate": 1.998644804860106e-05, "loss": 1.0953820943832397, "step": 750 }, { "epoch": 0.1387673083807786, "grad_norm": 0.08052216470241547, "learning_rate": 1.998634402697789e-05, "loss": 0.7538807988166809, "step": 751 }, { "epoch": 0.13895208508967444, "grad_norm": 0.08013401925563812, "learning_rate": 1.9986239607929473e-05, "loss": 0.6019425988197327, "step": 752 }, { "epoch": 0.1391368617985703, "grad_norm": 0.08231046795845032, "learning_rate": 1.998613479145996e-05, "loss": 0.7123339772224426, "step": 753 }, { "epoch": 0.13932163850746612, "grad_norm": 0.10204365849494934, "learning_rate": 1.9986029577573526e-05, "loss": 0.8958173394203186, "step": 754 }, { "epoch": 0.13950641521636198, "grad_norm": 0.08089638501405716, "learning_rate": 1.9985923966274357e-05, "loss": 0.9935266375541687, "step": 755 }, { "epoch": 0.13969119192525783, "grad_norm": 0.07984032481908798, "learning_rate": 1.9985817957566655e-05, "loss": 0.6228787899017334, "step": 756 }, { "epoch": 0.13987596863415366, "grad_norm": 0.08934484422206879, "learning_rate": 1.998571155145464e-05, "loss": 0.7877615690231323, "step": 757 }, { "epoch": 0.14006074534304952, "grad_norm": 0.08587730675935745, "learning_rate": 1.9985604747942552e-05, "loss": 0.7033259272575378, "step": 758 }, { "epoch": 0.14024552205194535, "grad_norm": 0.0903143361210823, "learning_rate": 1.998549754703463e-05, "loss": 0.7459292411804199, "step": 759 }, { "epoch": 0.1404302987608412, "grad_norm": 0.08646641671657562, "learning_rate": 1.9985389948735146e-05, "loss": 0.8067314624786377, "step": 760 }, { "epoch": 0.14061507546973703, "grad_norm": 0.10522181540727615, "learning_rate": 1.9985281953048385e-05, "loss": 1.0474854707717896, "step": 761 }, { "epoch": 0.1407998521786329, "grad_norm": 0.07542385160923004, "learning_rate": 1.9985173559978637e-05, "loss": 0.6246185898780823, "step": 762 }, { "epoch": 0.14098462888752872, "grad_norm": 0.09156143665313721, "learning_rate": 1.998506476953023e-05, "loss": 1.0200754404067993, "step": 763 }, { "epoch": 0.14116940559642457, "grad_norm": 0.08457624912261963, "learning_rate": 1.9984955581707476e-05, "loss": 0.6566670536994934, "step": 764 }, { "epoch": 0.1413541823053204, "grad_norm": 0.08702805638313293, "learning_rate": 1.9984845996514735e-05, "loss": 0.7731756567955017, "step": 765 }, { "epoch": 0.14153895901421626, "grad_norm": 0.0885612964630127, "learning_rate": 1.998473601395636e-05, "loss": 0.7161411046981812, "step": 766 }, { "epoch": 0.14172373572311212, "grad_norm": 0.08405031263828278, "learning_rate": 1.9984625634036728e-05, "loss": 0.6718830466270447, "step": 767 }, { "epoch": 0.14190851243200794, "grad_norm": 0.09604194760322571, "learning_rate": 1.9984514856760233e-05, "loss": 0.8609301447868347, "step": 768 }, { "epoch": 0.1420932891409038, "grad_norm": 0.08894237875938416, "learning_rate": 1.998440368213129e-05, "loss": 0.6825015544891357, "step": 769 }, { "epoch": 0.14227806584979963, "grad_norm": 0.0985831692814827, "learning_rate": 1.998429211015431e-05, "loss": 0.8550534248352051, "step": 770 }, { "epoch": 0.14246284255869548, "grad_norm": 0.07364708185195923, "learning_rate": 1.9984180140833745e-05, "loss": 0.58270263671875, "step": 771 }, { "epoch": 0.1426476192675913, "grad_norm": 0.09174038469791412, "learning_rate": 1.9984067774174047e-05, "loss": 0.9174992442131042, "step": 772 }, { "epoch": 0.14283239597648717, "grad_norm": 0.08195028454065323, "learning_rate": 1.9983955010179687e-05, "loss": 0.8114473223686218, "step": 773 }, { "epoch": 0.143017172685383, "grad_norm": 0.09834912419319153, "learning_rate": 1.998384184885515e-05, "loss": 0.7728997468948364, "step": 774 }, { "epoch": 0.14320194939427885, "grad_norm": 0.09782921522855759, "learning_rate": 1.998372829020495e-05, "loss": 0.8147768378257751, "step": 775 }, { "epoch": 0.14338672610317468, "grad_norm": 0.0865607038140297, "learning_rate": 1.9983614334233595e-05, "loss": 0.7355035543441772, "step": 776 }, { "epoch": 0.14357150281207054, "grad_norm": 0.10707329958677292, "learning_rate": 1.9983499980945624e-05, "loss": 0.9970985651016235, "step": 777 }, { "epoch": 0.1437562795209664, "grad_norm": 0.11169784516096115, "learning_rate": 1.998338523034559e-05, "loss": 0.9548088908195496, "step": 778 }, { "epoch": 0.14394105622986222, "grad_norm": 0.089739590883255, "learning_rate": 1.9983270082438054e-05, "loss": 0.8539899587631226, "step": 779 }, { "epoch": 0.14412583293875808, "grad_norm": 0.11042316257953644, "learning_rate": 1.9983154537227607e-05, "loss": 1.1233160495758057, "step": 780 }, { "epoch": 0.1443106096476539, "grad_norm": 0.09637823700904846, "learning_rate": 1.998303859471884e-05, "loss": 1.0442636013031006, "step": 781 }, { "epoch": 0.14449538635654977, "grad_norm": 0.09303180873394012, "learning_rate": 1.9982922254916373e-05, "loss": 0.850719153881073, "step": 782 }, { "epoch": 0.1446801630654456, "grad_norm": 0.10702165216207504, "learning_rate": 1.998280551782483e-05, "loss": 0.9073002338409424, "step": 783 }, { "epoch": 0.14486493977434145, "grad_norm": 0.10565286874771118, "learning_rate": 1.998268838344886e-05, "loss": 0.9817302823066711, "step": 784 }, { "epoch": 0.14504971648323728, "grad_norm": 0.10369947552680969, "learning_rate": 1.9982570851793125e-05, "loss": 0.865040123462677, "step": 785 }, { "epoch": 0.14523449319213314, "grad_norm": 0.09121627360582352, "learning_rate": 1.9982452922862297e-05, "loss": 0.7408377528190613, "step": 786 }, { "epoch": 0.14541926990102896, "grad_norm": 0.09499744325876236, "learning_rate": 1.998233459666108e-05, "loss": 0.903713047504425, "step": 787 }, { "epoch": 0.14560404660992482, "grad_norm": 0.08999454230070114, "learning_rate": 1.9982215873194174e-05, "loss": 0.9278289079666138, "step": 788 }, { "epoch": 0.14578882331882068, "grad_norm": 0.09898703545331955, "learning_rate": 1.998209675246631e-05, "loss": 0.8522279262542725, "step": 789 }, { "epoch": 0.1459736000277165, "grad_norm": 0.08116642385721207, "learning_rate": 1.9981977234482216e-05, "loss": 0.7823371887207031, "step": 790 }, { "epoch": 0.14615837673661236, "grad_norm": 0.09841936081647873, "learning_rate": 1.998185731924667e-05, "loss": 0.6668987274169922, "step": 791 }, { "epoch": 0.1463431534455082, "grad_norm": 0.08227173984050751, "learning_rate": 1.9981737006764422e-05, "loss": 0.7551202178001404, "step": 792 }, { "epoch": 0.14652793015440405, "grad_norm": 0.09930627793073654, "learning_rate": 1.9981616297040274e-05, "loss": 1.0937387943267822, "step": 793 }, { "epoch": 0.14671270686329987, "grad_norm": 0.09635364264249802, "learning_rate": 1.9981495190079023e-05, "loss": 1.0252056121826172, "step": 794 }, { "epoch": 0.14689748357219573, "grad_norm": 0.07074606418609619, "learning_rate": 1.9981373685885496e-05, "loss": 0.6450613141059875, "step": 795 }, { "epoch": 0.14708226028109156, "grad_norm": 0.10457006096839905, "learning_rate": 1.998125178446452e-05, "loss": 0.872199535369873, "step": 796 }, { "epoch": 0.14726703698998742, "grad_norm": 0.10501401871442795, "learning_rate": 1.9981129485820955e-05, "loss": 0.9675683379173279, "step": 797 }, { "epoch": 0.14745181369888324, "grad_norm": 0.08505741506814957, "learning_rate": 1.9981006789959658e-05, "loss": 0.7211896181106567, "step": 798 }, { "epoch": 0.1476365904077791, "grad_norm": 0.08668606728315353, "learning_rate": 1.998088369688552e-05, "loss": 0.7685105204582214, "step": 799 }, { "epoch": 0.14782136711667496, "grad_norm": 0.08549778908491135, "learning_rate": 1.998076020660344e-05, "loss": 0.7868371605873108, "step": 800 }, { "epoch": 0.14800614382557079, "grad_norm": 0.10425841063261032, "learning_rate": 1.9980636319118326e-05, "loss": 0.8326320648193359, "step": 801 }, { "epoch": 0.14819092053446664, "grad_norm": 0.08408709615468979, "learning_rate": 1.998051203443511e-05, "loss": 0.7171183824539185, "step": 802 }, { "epoch": 0.14837569724336247, "grad_norm": 0.09104572981595993, "learning_rate": 1.9980387352558742e-05, "loss": 0.843257486820221, "step": 803 }, { "epoch": 0.14856047395225833, "grad_norm": 0.08411456644535065, "learning_rate": 1.9980262273494184e-05, "loss": 0.6456289887428284, "step": 804 }, { "epoch": 0.14874525066115415, "grad_norm": 0.08256927132606506, "learning_rate": 1.998013679724641e-05, "loss": 0.7349569201469421, "step": 805 }, { "epoch": 0.14893002737005, "grad_norm": 0.07136604189872742, "learning_rate": 1.9980010923820417e-05, "loss": 0.7609940767288208, "step": 806 }, { "epoch": 0.14911480407894584, "grad_norm": 0.09291189163923264, "learning_rate": 1.997988465322121e-05, "loss": 0.7991480231285095, "step": 807 }, { "epoch": 0.1492995807878417, "grad_norm": 0.08440721780061722, "learning_rate": 1.9979757985453818e-05, "loss": 0.7707111835479736, "step": 808 }, { "epoch": 0.14948435749673752, "grad_norm": 0.0729835107922554, "learning_rate": 1.997963092052328e-05, "loss": 0.622437059879303, "step": 809 }, { "epoch": 0.14966913420563338, "grad_norm": 0.08329617232084274, "learning_rate": 1.9979503458434654e-05, "loss": 0.8455030918121338, "step": 810 }, { "epoch": 0.14985391091452924, "grad_norm": 0.07677850127220154, "learning_rate": 1.9979375599193013e-05, "loss": 0.6293231844902039, "step": 811 }, { "epoch": 0.15003868762342507, "grad_norm": 0.10512515902519226, "learning_rate": 1.9979247342803445e-05, "loss": 0.9506365656852722, "step": 812 }, { "epoch": 0.15022346433232092, "grad_norm": 0.0830494686961174, "learning_rate": 1.9979118689271054e-05, "loss": 0.8180676698684692, "step": 813 }, { "epoch": 0.15040824104121675, "grad_norm": 0.09016073495149612, "learning_rate": 1.9978989638600958e-05, "loss": 0.9344583749771118, "step": 814 }, { "epoch": 0.1505930177501126, "grad_norm": 0.09691261500120163, "learning_rate": 1.9978860190798298e-05, "loss": 1.0068974494934082, "step": 815 }, { "epoch": 0.15077779445900844, "grad_norm": 0.08613571524620056, "learning_rate": 1.997873034586822e-05, "loss": 0.7329363226890564, "step": 816 }, { "epoch": 0.1509625711679043, "grad_norm": 0.08310612291097641, "learning_rate": 1.9978600103815894e-05, "loss": 0.7454736232757568, "step": 817 }, { "epoch": 0.15114734787680012, "grad_norm": 0.07982125133275986, "learning_rate": 1.99784694646465e-05, "loss": 0.6723255515098572, "step": 818 }, { "epoch": 0.15133212458569598, "grad_norm": 0.10597433149814606, "learning_rate": 1.997833842836524e-05, "loss": 0.8811939358711243, "step": 819 }, { "epoch": 0.1515169012945918, "grad_norm": 0.09027368575334549, "learning_rate": 1.9978206994977332e-05, "loss": 1.095210313796997, "step": 820 }, { "epoch": 0.15170167800348766, "grad_norm": 0.0823710560798645, "learning_rate": 1.9978075164488004e-05, "loss": 0.6737566590309143, "step": 821 }, { "epoch": 0.15188645471238352, "grad_norm": 0.0979158878326416, "learning_rate": 1.99779429369025e-05, "loss": 0.9108914732933044, "step": 822 }, { "epoch": 0.15207123142127935, "grad_norm": 0.09988109767436981, "learning_rate": 1.9977810312226086e-05, "loss": 0.911587655544281, "step": 823 }, { "epoch": 0.1522560081301752, "grad_norm": 0.08708114922046661, "learning_rate": 1.9977677290464034e-05, "loss": 0.6712893843650818, "step": 824 }, { "epoch": 0.15244078483907103, "grad_norm": 0.07156742364168167, "learning_rate": 1.9977543871621647e-05, "loss": 0.5517778396606445, "step": 825 }, { "epoch": 0.1526255615479669, "grad_norm": 0.08961985260248184, "learning_rate": 1.9977410055704228e-05, "loss": 0.7994911670684814, "step": 826 }, { "epoch": 0.15281033825686272, "grad_norm": 0.08305425941944122, "learning_rate": 1.9977275842717102e-05, "loss": 0.6371324062347412, "step": 827 }, { "epoch": 0.15299511496575857, "grad_norm": 0.08506402373313904, "learning_rate": 1.9977141232665613e-05, "loss": 0.8960465788841248, "step": 828 }, { "epoch": 0.1531798916746544, "grad_norm": 0.0903603583574295, "learning_rate": 1.9977006225555118e-05, "loss": 0.7395355105400085, "step": 829 }, { "epoch": 0.15336466838355026, "grad_norm": 0.10823410004377365, "learning_rate": 1.997687082139099e-05, "loss": 1.0822867155075073, "step": 830 }, { "epoch": 0.15354944509244609, "grad_norm": 0.11097127199172974, "learning_rate": 1.9976735020178616e-05, "loss": 0.9120482206344604, "step": 831 }, { "epoch": 0.15373422180134194, "grad_norm": 0.07994866371154785, "learning_rate": 1.9976598821923403e-05, "loss": 0.775039553642273, "step": 832 }, { "epoch": 0.1539189985102378, "grad_norm": 0.11645234376192093, "learning_rate": 1.9976462226630767e-05, "loss": 1.08674955368042, "step": 833 }, { "epoch": 0.15410377521913363, "grad_norm": 0.09166614711284637, "learning_rate": 1.9976325234306148e-05, "loss": 0.9463396072387695, "step": 834 }, { "epoch": 0.15428855192802948, "grad_norm": 0.078475721180439, "learning_rate": 1.9976187844954997e-05, "loss": 0.7562021613121033, "step": 835 }, { "epoch": 0.1544733286369253, "grad_norm": 0.0811050683259964, "learning_rate": 1.997605005858278e-05, "loss": 0.9685558080673218, "step": 836 }, { "epoch": 0.15465810534582117, "grad_norm": 0.07300540059804916, "learning_rate": 1.9975911875194983e-05, "loss": 0.6694080829620361, "step": 837 }, { "epoch": 0.154842882054717, "grad_norm": 0.09105053544044495, "learning_rate": 1.9975773294797104e-05, "loss": 0.771217405796051, "step": 838 }, { "epoch": 0.15502765876361285, "grad_norm": 0.09635568410158157, "learning_rate": 1.9975634317394655e-05, "loss": 0.9028376340866089, "step": 839 }, { "epoch": 0.15521243547250868, "grad_norm": 0.07771394401788712, "learning_rate": 1.9975494942993173e-05, "loss": 0.9490246176719666, "step": 840 }, { "epoch": 0.15539721218140454, "grad_norm": 0.08951511979103088, "learning_rate": 1.9975355171598205e-05, "loss": 0.7538551092147827, "step": 841 }, { "epoch": 0.15558198889030037, "grad_norm": 0.10040956735610962, "learning_rate": 1.9975215003215306e-05, "loss": 0.8540251851081848, "step": 842 }, { "epoch": 0.15576676559919622, "grad_norm": 0.08941560238599777, "learning_rate": 1.9975074437850057e-05, "loss": 0.8490448594093323, "step": 843 }, { "epoch": 0.15595154230809208, "grad_norm": 0.0840606689453125, "learning_rate": 1.9974933475508055e-05, "loss": 0.8591848611831665, "step": 844 }, { "epoch": 0.1561363190169879, "grad_norm": 0.09096793085336685, "learning_rate": 1.9974792116194908e-05, "loss": 0.8079205751419067, "step": 845 }, { "epoch": 0.15632109572588376, "grad_norm": 0.10489457100629807, "learning_rate": 1.9974650359916243e-05, "loss": 0.9832424521446228, "step": 846 }, { "epoch": 0.1565058724347796, "grad_norm": 0.08821387588977814, "learning_rate": 1.99745082066777e-05, "loss": 0.7868931293487549, "step": 847 }, { "epoch": 0.15669064914367545, "grad_norm": 0.07660951465368271, "learning_rate": 1.9974365656484934e-05, "loss": 0.8489773273468018, "step": 848 }, { "epoch": 0.15687542585257128, "grad_norm": 0.09838790446519852, "learning_rate": 1.9974222709343625e-05, "loss": 0.8385222554206848, "step": 849 }, { "epoch": 0.15706020256146713, "grad_norm": 0.08230140805244446, "learning_rate": 1.9974079365259453e-05, "loss": 0.791317343711853, "step": 850 }, { "epoch": 0.15724497927036296, "grad_norm": 0.08971503376960754, "learning_rate": 1.997393562423813e-05, "loss": 0.8988771438598633, "step": 851 }, { "epoch": 0.15742975597925882, "grad_norm": 0.07249333709478378, "learning_rate": 1.9973791486285373e-05, "loss": 0.639558732509613, "step": 852 }, { "epoch": 0.15761453268815465, "grad_norm": 0.08529103547334671, "learning_rate": 1.997364695140692e-05, "loss": 0.7391307353973389, "step": 853 }, { "epoch": 0.1577993093970505, "grad_norm": 0.06635551154613495, "learning_rate": 1.9973502019608518e-05, "loss": 0.5126902461051941, "step": 854 }, { "epoch": 0.15798408610594636, "grad_norm": 0.07860208302736282, "learning_rate": 1.9973356690895943e-05, "loss": 0.7455308437347412, "step": 855 }, { "epoch": 0.1581688628148422, "grad_norm": 0.0684715062379837, "learning_rate": 1.997321096527497e-05, "loss": 0.5100698471069336, "step": 856 }, { "epoch": 0.15835363952373804, "grad_norm": 0.07221705466508865, "learning_rate": 1.9973064842751408e-05, "loss": 0.6271705031394958, "step": 857 }, { "epoch": 0.15853841623263387, "grad_norm": 0.09156036376953125, "learning_rate": 1.9972918323331062e-05, "loss": 0.872652530670166, "step": 858 }, { "epoch": 0.15872319294152973, "grad_norm": 0.0856616422533989, "learning_rate": 1.9972771407019772e-05, "loss": 0.6149677038192749, "step": 859 }, { "epoch": 0.15890796965042556, "grad_norm": 0.10809344798326492, "learning_rate": 1.997262409382338e-05, "loss": 0.968105673789978, "step": 860 }, { "epoch": 0.1590927463593214, "grad_norm": 0.09980929642915726, "learning_rate": 1.9972476383747748e-05, "loss": 0.8662238717079163, "step": 861 }, { "epoch": 0.15927752306821724, "grad_norm": 0.07973527908325195, "learning_rate": 1.9972328276798758e-05, "loss": 0.6740208864212036, "step": 862 }, { "epoch": 0.1594622997771131, "grad_norm": 0.09533673524856567, "learning_rate": 1.99721797729823e-05, "loss": 0.964790940284729, "step": 863 }, { "epoch": 0.15964707648600893, "grad_norm": 0.08445615321397781, "learning_rate": 1.9972030872304287e-05, "loss": 0.6526170969009399, "step": 864 }, { "epoch": 0.15983185319490478, "grad_norm": 0.09407380223274231, "learning_rate": 1.997188157477064e-05, "loss": 0.820464551448822, "step": 865 }, { "epoch": 0.16001662990380064, "grad_norm": 0.08107048273086548, "learning_rate": 1.997173188038731e-05, "loss": 0.8346737623214722, "step": 866 }, { "epoch": 0.16020140661269647, "grad_norm": 0.10146121680736542, "learning_rate": 1.9971581789160246e-05, "loss": 0.831204354763031, "step": 867 }, { "epoch": 0.16038618332159232, "grad_norm": 0.09238161146640778, "learning_rate": 1.9971431301095423e-05, "loss": 1.003567099571228, "step": 868 }, { "epoch": 0.16057096003048815, "grad_norm": 0.10330084711313248, "learning_rate": 1.9971280416198832e-05, "loss": 0.8877651691436768, "step": 869 }, { "epoch": 0.160755736739384, "grad_norm": 0.08259479701519012, "learning_rate": 1.9971129134476474e-05, "loss": 0.6704717874526978, "step": 870 }, { "epoch": 0.16094051344827984, "grad_norm": 0.07004322111606598, "learning_rate": 1.9970977455934376e-05, "loss": 0.526638388633728, "step": 871 }, { "epoch": 0.1611252901571757, "grad_norm": 0.07713456451892853, "learning_rate": 1.9970825380578568e-05, "loss": 0.5882245898246765, "step": 872 }, { "epoch": 0.16131006686607152, "grad_norm": 0.06343498080968857, "learning_rate": 1.9970672908415106e-05, "loss": 0.6268212199211121, "step": 873 }, { "epoch": 0.16149484357496738, "grad_norm": 0.08529190719127655, "learning_rate": 1.9970520039450057e-05, "loss": 0.8552349209785461, "step": 874 }, { "epoch": 0.1616796202838632, "grad_norm": 0.07656506448984146, "learning_rate": 1.9970366773689504e-05, "loss": 0.685907781124115, "step": 875 }, { "epoch": 0.16186439699275906, "grad_norm": 0.09757979214191437, "learning_rate": 1.9970213111139545e-05, "loss": 0.9193124771118164, "step": 876 }, { "epoch": 0.16204917370165492, "grad_norm": 0.0972999557852745, "learning_rate": 1.99700590518063e-05, "loss": 0.8101493120193481, "step": 877 }, { "epoch": 0.16223395041055075, "grad_norm": 0.08974293619394302, "learning_rate": 1.9969904595695894e-05, "loss": 0.7346316576004028, "step": 878 }, { "epoch": 0.1624187271194466, "grad_norm": 0.10032976418733597, "learning_rate": 1.9969749742814474e-05, "loss": 0.8234612345695496, "step": 879 }, { "epoch": 0.16260350382834243, "grad_norm": 0.10190098732709885, "learning_rate": 1.9969594493168213e-05, "loss": 0.8721520900726318, "step": 880 }, { "epoch": 0.1627882805372383, "grad_norm": 0.10501670837402344, "learning_rate": 1.996943884676328e-05, "loss": 1.0961534976959229, "step": 881 }, { "epoch": 0.16297305724613412, "grad_norm": 0.07484227418899536, "learning_rate": 1.9969282803605866e-05, "loss": 0.571873128414154, "step": 882 }, { "epoch": 0.16315783395502997, "grad_norm": 0.09870273619890213, "learning_rate": 1.9969126363702188e-05, "loss": 0.8259404301643372, "step": 883 }, { "epoch": 0.1633426106639258, "grad_norm": 0.09730734676122665, "learning_rate": 1.9968969527058476e-05, "loss": 0.8662014007568359, "step": 884 }, { "epoch": 0.16352738737282166, "grad_norm": 0.09543541073799133, "learning_rate": 1.996881229368096e-05, "loss": 0.8927724957466125, "step": 885 }, { "epoch": 0.1637121640817175, "grad_norm": 0.0955570712685585, "learning_rate": 1.9968654663575906e-05, "loss": 0.8467931151390076, "step": 886 }, { "epoch": 0.16389694079061334, "grad_norm": 0.10068543255329132, "learning_rate": 1.9968496636749584e-05, "loss": 0.9493212699890137, "step": 887 }, { "epoch": 0.1640817174995092, "grad_norm": 0.08250802755355835, "learning_rate": 1.996833821320828e-05, "loss": 0.625404417514801, "step": 888 }, { "epoch": 0.16426649420840503, "grad_norm": 0.09056300669908524, "learning_rate": 1.9968179392958305e-05, "loss": 0.9785575270652771, "step": 889 }, { "epoch": 0.16445127091730088, "grad_norm": 0.09318284690380096, "learning_rate": 1.9968020176005976e-05, "loss": 0.8643733859062195, "step": 890 }, { "epoch": 0.1646360476261967, "grad_norm": 0.0878918394446373, "learning_rate": 1.996786056235763e-05, "loss": 0.8458439111709595, "step": 891 }, { "epoch": 0.16482082433509257, "grad_norm": 0.07812018692493439, "learning_rate": 1.996770055201962e-05, "loss": 0.7509803175926208, "step": 892 }, { "epoch": 0.1650056010439884, "grad_norm": 0.051492493599653244, "learning_rate": 1.9967540144998313e-05, "loss": 0.3646559417247772, "step": 893 }, { "epoch": 0.16519037775288425, "grad_norm": 0.08322598040103912, "learning_rate": 1.9967379341300092e-05, "loss": 0.7500286102294922, "step": 894 }, { "epoch": 0.16537515446178008, "grad_norm": 0.1015838161110878, "learning_rate": 1.9967218140931358e-05, "loss": 0.8413242697715759, "step": 895 }, { "epoch": 0.16555993117067594, "grad_norm": 0.08555304259061813, "learning_rate": 1.996705654389852e-05, "loss": 0.7918609380722046, "step": 896 }, { "epoch": 0.16574470787957177, "grad_norm": 0.08763028681278229, "learning_rate": 1.996689455020802e-05, "loss": 0.8354541063308716, "step": 897 }, { "epoch": 0.16592948458846762, "grad_norm": 0.09521177411079407, "learning_rate": 1.99667321598663e-05, "loss": 0.9694790244102478, "step": 898 }, { "epoch": 0.16611426129736348, "grad_norm": 0.09489469975233078, "learning_rate": 1.996656937287982e-05, "loss": 0.6468320488929749, "step": 899 }, { "epoch": 0.1662990380062593, "grad_norm": 0.09139697998762131, "learning_rate": 1.9966406189255057e-05, "loss": 0.8143627643585205, "step": 900 }, { "epoch": 0.16648381471515517, "grad_norm": 0.08560251444578171, "learning_rate": 1.9966242608998515e-05, "loss": 0.5542747974395752, "step": 901 }, { "epoch": 0.166668591424051, "grad_norm": 0.07455048710107803, "learning_rate": 1.9966078632116695e-05, "loss": 0.7415421605110168, "step": 902 }, { "epoch": 0.16685336813294685, "grad_norm": 0.06992721557617188, "learning_rate": 1.9965914258616123e-05, "loss": 0.5292272567749023, "step": 903 }, { "epoch": 0.16703814484184268, "grad_norm": 0.08455928415060043, "learning_rate": 1.9965749488503343e-05, "loss": 0.6594451665878296, "step": 904 }, { "epoch": 0.16722292155073853, "grad_norm": 0.09297055006027222, "learning_rate": 1.9965584321784917e-05, "loss": 1.0128505229949951, "step": 905 }, { "epoch": 0.16740769825963436, "grad_norm": 0.07890793681144714, "learning_rate": 1.996541875846741e-05, "loss": 0.5768525004386902, "step": 906 }, { "epoch": 0.16759247496853022, "grad_norm": 0.09260081499814987, "learning_rate": 1.9965252798557413e-05, "loss": 0.7528817057609558, "step": 907 }, { "epoch": 0.16777725167742605, "grad_norm": 0.09680453687906265, "learning_rate": 1.9965086442061533e-05, "loss": 0.7937643527984619, "step": 908 }, { "epoch": 0.1679620283863219, "grad_norm": 0.09023380279541016, "learning_rate": 1.9964919688986392e-05, "loss": 0.7089190483093262, "step": 909 }, { "epoch": 0.16814680509521776, "grad_norm": 0.09799093753099442, "learning_rate": 1.9964752539338618e-05, "loss": 0.8649925589561462, "step": 910 }, { "epoch": 0.1683315818041136, "grad_norm": 0.10270999372005463, "learning_rate": 1.996458499312487e-05, "loss": 0.8653252720832825, "step": 911 }, { "epoch": 0.16851635851300945, "grad_norm": 0.09714650362730026, "learning_rate": 1.9964417050351817e-05, "loss": 0.8030729293823242, "step": 912 }, { "epoch": 0.16870113522190527, "grad_norm": 0.08536910265684128, "learning_rate": 1.996424871102614e-05, "loss": 0.6893182992935181, "step": 913 }, { "epoch": 0.16888591193080113, "grad_norm": 0.08386935293674469, "learning_rate": 1.996407997515454e-05, "loss": 0.7112575173377991, "step": 914 }, { "epoch": 0.16907068863969696, "grad_norm": 0.10440012812614441, "learning_rate": 1.9963910842743726e-05, "loss": 0.8684526681900024, "step": 915 }, { "epoch": 0.16925546534859282, "grad_norm": 0.09515652805566788, "learning_rate": 1.9963741313800437e-05, "loss": 0.7283082008361816, "step": 916 }, { "epoch": 0.16944024205748864, "grad_norm": 0.09991753101348877, "learning_rate": 1.9963571388331417e-05, "loss": 0.8834851384162903, "step": 917 }, { "epoch": 0.1696250187663845, "grad_norm": 0.11581186950206757, "learning_rate": 1.9963401066343424e-05, "loss": 1.0167608261108398, "step": 918 }, { "epoch": 0.16980979547528033, "grad_norm": 0.06342756003141403, "learning_rate": 1.9963230347843242e-05, "loss": 0.6235218048095703, "step": 919 }, { "epoch": 0.16999457218417618, "grad_norm": 0.09533628076314926, "learning_rate": 1.996305923283766e-05, "loss": 0.7711799144744873, "step": 920 }, { "epoch": 0.17017934889307204, "grad_norm": 0.0671616718173027, "learning_rate": 1.9962887721333498e-05, "loss": 0.5659441947937012, "step": 921 }, { "epoch": 0.17036412560196787, "grad_norm": 0.07848536968231201, "learning_rate": 1.996271581333757e-05, "loss": 0.8277614116668701, "step": 922 }, { "epoch": 0.17054890231086373, "grad_norm": 0.09858408570289612, "learning_rate": 1.9962543508856722e-05, "loss": 0.943729817867279, "step": 923 }, { "epoch": 0.17073367901975955, "grad_norm": 0.10257252305746078, "learning_rate": 1.996237080789781e-05, "loss": 0.9582602977752686, "step": 924 }, { "epoch": 0.1709184557286554, "grad_norm": 0.09687380492687225, "learning_rate": 1.996219771046771e-05, "loss": 0.8321186900138855, "step": 925 }, { "epoch": 0.17110323243755124, "grad_norm": 0.0945492833852768, "learning_rate": 1.996202421657331e-05, "loss": 1.0206397771835327, "step": 926 }, { "epoch": 0.1712880091464471, "grad_norm": 0.10808293521404266, "learning_rate": 1.996185032622151e-05, "loss": 1.0324662923812866, "step": 927 }, { "epoch": 0.17147278585534292, "grad_norm": 0.09793483465909958, "learning_rate": 1.9961676039419236e-05, "loss": 0.8650661110877991, "step": 928 }, { "epoch": 0.17165756256423878, "grad_norm": 0.08973977714776993, "learning_rate": 1.9961501356173422e-05, "loss": 0.8832427859306335, "step": 929 }, { "epoch": 0.1718423392731346, "grad_norm": 0.07093001157045364, "learning_rate": 1.996132627649102e-05, "loss": 0.6482492685317993, "step": 930 }, { "epoch": 0.17202711598203047, "grad_norm": 0.07376673072576523, "learning_rate": 1.9961150800378997e-05, "loss": 0.5817755460739136, "step": 931 }, { "epoch": 0.17221189269092632, "grad_norm": 0.09312041848897934, "learning_rate": 1.9960974927844332e-05, "loss": 0.7770845890045166, "step": 932 }, { "epoch": 0.17239666939982215, "grad_norm": 0.08325351029634476, "learning_rate": 1.9960798658894033e-05, "loss": 0.6577749848365784, "step": 933 }, { "epoch": 0.172581446108718, "grad_norm": 0.10464484989643097, "learning_rate": 1.996062199353511e-05, "loss": 1.0084058046340942, "step": 934 }, { "epoch": 0.17276622281761383, "grad_norm": 0.09137419611215591, "learning_rate": 1.9960444931774596e-05, "loss": 0.9034083485603333, "step": 935 }, { "epoch": 0.1729509995265097, "grad_norm": 0.0853210985660553, "learning_rate": 1.9960267473619535e-05, "loss": 0.6832973957061768, "step": 936 }, { "epoch": 0.17313577623540552, "grad_norm": 0.06843896955251694, "learning_rate": 1.9960089619076986e-05, "loss": 0.49686911702156067, "step": 937 }, { "epoch": 0.17332055294430138, "grad_norm": 0.08093714714050293, "learning_rate": 1.9959911368154036e-05, "loss": 0.7712295055389404, "step": 938 }, { "epoch": 0.1735053296531972, "grad_norm": 0.08600125461816788, "learning_rate": 1.9959732720857773e-05, "loss": 0.7024419903755188, "step": 939 }, { "epoch": 0.17369010636209306, "grad_norm": 0.08981095999479294, "learning_rate": 1.995955367719531e-05, "loss": 0.739615261554718, "step": 940 }, { "epoch": 0.1738748830709889, "grad_norm": 0.09242704510688782, "learning_rate": 1.9959374237173768e-05, "loss": 0.867473304271698, "step": 941 }, { "epoch": 0.17405965977988475, "grad_norm": 0.09307640045881271, "learning_rate": 1.995919440080029e-05, "loss": 0.8205432295799255, "step": 942 }, { "epoch": 0.1742444364887806, "grad_norm": 0.09234365820884705, "learning_rate": 1.995901416808203e-05, "loss": 0.6995620727539062, "step": 943 }, { "epoch": 0.17442921319767643, "grad_norm": 0.0816473513841629, "learning_rate": 1.995883353902617e-05, "loss": 0.7264127731323242, "step": 944 }, { "epoch": 0.1746139899065723, "grad_norm": 0.09935896098613739, "learning_rate": 1.9958652513639893e-05, "loss": 0.8626465201377869, "step": 945 }, { "epoch": 0.17479876661546812, "grad_norm": 0.08586036413908005, "learning_rate": 1.9958471091930396e-05, "loss": 0.8120777010917664, "step": 946 }, { "epoch": 0.17498354332436397, "grad_norm": 0.0693439394235611, "learning_rate": 1.9958289273904907e-05, "loss": 0.6296210289001465, "step": 947 }, { "epoch": 0.1751683200332598, "grad_norm": 0.09171350300312042, "learning_rate": 1.9958107059570665e-05, "loss": 0.7480406165122986, "step": 948 }, { "epoch": 0.17535309674215566, "grad_norm": 0.09481046348810196, "learning_rate": 1.9957924448934912e-05, "loss": 0.931344211101532, "step": 949 }, { "epoch": 0.17553787345105148, "grad_norm": 0.09654638916254044, "learning_rate": 1.9957741442004922e-05, "loss": 0.8118683695793152, "step": 950 }, { "epoch": 0.17572265015994734, "grad_norm": 0.07205606251955032, "learning_rate": 1.995755803878798e-05, "loss": 0.46986880898475647, "step": 951 }, { "epoch": 0.17590742686884317, "grad_norm": 0.09873572736978531, "learning_rate": 1.9957374239291373e-05, "loss": 0.8671227693557739, "step": 952 }, { "epoch": 0.17609220357773903, "grad_norm": 0.0714307501912117, "learning_rate": 1.995719004352243e-05, "loss": 0.6845585703849792, "step": 953 }, { "epoch": 0.17627698028663488, "grad_norm": 0.09012128412723541, "learning_rate": 1.9957005451488476e-05, "loss": 0.8618735671043396, "step": 954 }, { "epoch": 0.1764617569955307, "grad_norm": 0.07841545343399048, "learning_rate": 1.9956820463196857e-05, "loss": 0.7780154943466187, "step": 955 }, { "epoch": 0.17664653370442657, "grad_norm": 0.08329493552446365, "learning_rate": 1.9956635078654928e-05, "loss": 0.7111560106277466, "step": 956 }, { "epoch": 0.1768313104133224, "grad_norm": 0.07888112962245941, "learning_rate": 1.995644929787008e-05, "loss": 0.6423460841178894, "step": 957 }, { "epoch": 0.17701608712221825, "grad_norm": 0.08623760938644409, "learning_rate": 1.9956263120849697e-05, "loss": 0.7237339615821838, "step": 958 }, { "epoch": 0.17720086383111408, "grad_norm": 0.09325414150953293, "learning_rate": 1.9956076547601188e-05, "loss": 0.584494948387146, "step": 959 }, { "epoch": 0.17738564054000994, "grad_norm": 0.07906820625066757, "learning_rate": 1.9955889578131984e-05, "loss": 0.6621897220611572, "step": 960 }, { "epoch": 0.17757041724890577, "grad_norm": 0.1097625195980072, "learning_rate": 1.9955702212449522e-05, "loss": 0.8541625142097473, "step": 961 }, { "epoch": 0.17775519395780162, "grad_norm": 0.10267429798841476, "learning_rate": 1.995551445056126e-05, "loss": 0.9353813529014587, "step": 962 }, { "epoch": 0.17793997066669745, "grad_norm": 0.0709155723452568, "learning_rate": 1.995532629247467e-05, "loss": 0.56894850730896, "step": 963 }, { "epoch": 0.1781247473755933, "grad_norm": 0.08225613087415695, "learning_rate": 1.9955137738197243e-05, "loss": 0.7326880693435669, "step": 964 }, { "epoch": 0.17830952408448916, "grad_norm": 0.08972861617803574, "learning_rate": 1.9954948787736476e-05, "loss": 0.6604665517807007, "step": 965 }, { "epoch": 0.178494300793385, "grad_norm": 0.08534478396177292, "learning_rate": 1.995475944109989e-05, "loss": 0.648709774017334, "step": 966 }, { "epoch": 0.17867907750228085, "grad_norm": 0.1054343581199646, "learning_rate": 1.9954569698295024e-05, "loss": 0.9679998159408569, "step": 967 }, { "epoch": 0.17886385421117668, "grad_norm": 0.10303540527820587, "learning_rate": 1.995437955932943e-05, "loss": 0.9443056583404541, "step": 968 }, { "epoch": 0.17904863092007253, "grad_norm": 0.0820276066660881, "learning_rate": 1.9954189024210674e-05, "loss": 0.6332961916923523, "step": 969 }, { "epoch": 0.17923340762896836, "grad_norm": 0.07548929005861282, "learning_rate": 1.995399809294633e-05, "loss": 0.6359805464744568, "step": 970 }, { "epoch": 0.17941818433786422, "grad_norm": 0.09777320176362991, "learning_rate": 1.9953806765544012e-05, "loss": 0.9533307552337646, "step": 971 }, { "epoch": 0.17960296104676005, "grad_norm": 0.097019262611866, "learning_rate": 1.9953615042011326e-05, "loss": 1.0277851819992065, "step": 972 }, { "epoch": 0.1797877377556559, "grad_norm": 0.07848110049962997, "learning_rate": 1.9953422922355895e-05, "loss": 0.5935329794883728, "step": 973 }, { "epoch": 0.17997251446455173, "grad_norm": 0.12135414034128189, "learning_rate": 1.995323040658538e-05, "loss": 1.195263147354126, "step": 974 }, { "epoch": 0.1801572911734476, "grad_norm": 0.11152930557727814, "learning_rate": 1.995303749470743e-05, "loss": 0.8123299479484558, "step": 975 }, { "epoch": 0.18034206788234344, "grad_norm": 0.1028800904750824, "learning_rate": 1.9952844186729728e-05, "loss": 0.8166159391403198, "step": 976 }, { "epoch": 0.18052684459123927, "grad_norm": 0.09579476714134216, "learning_rate": 1.9952650482659966e-05, "loss": 0.936848521232605, "step": 977 }, { "epoch": 0.18071162130013513, "grad_norm": 0.09917067736387253, "learning_rate": 1.995245638250585e-05, "loss": 0.8639838099479675, "step": 978 }, { "epoch": 0.18089639800903096, "grad_norm": 0.0719900131225586, "learning_rate": 1.995226188627511e-05, "loss": 0.7680509686470032, "step": 979 }, { "epoch": 0.1810811747179268, "grad_norm": 0.08661803603172302, "learning_rate": 1.9952066993975486e-05, "loss": 0.7090547680854797, "step": 980 }, { "epoch": 0.18126595142682264, "grad_norm": 0.08709144592285156, "learning_rate": 1.9951871705614727e-05, "loss": 0.7234505414962769, "step": 981 }, { "epoch": 0.1814507281357185, "grad_norm": 0.08482590317726135, "learning_rate": 1.995167602120061e-05, "loss": 0.7665219306945801, "step": 982 }, { "epoch": 0.18163550484461433, "grad_norm": 0.08609596639871597, "learning_rate": 1.9951479940740923e-05, "loss": 0.7475874423980713, "step": 983 }, { "epoch": 0.18182028155351018, "grad_norm": 0.07693618535995483, "learning_rate": 1.9951283464243468e-05, "loss": 0.7607953548431396, "step": 984 }, { "epoch": 0.182005058262406, "grad_norm": 0.10553596913814545, "learning_rate": 1.995108659171607e-05, "loss": 0.9523313045501709, "step": 985 }, { "epoch": 0.18218983497130187, "grad_norm": 0.10550583153963089, "learning_rate": 1.995088932316655e-05, "loss": 0.8637052178382874, "step": 986 }, { "epoch": 0.18237461168019772, "grad_norm": 0.0804867073893547, "learning_rate": 1.995069165860277e-05, "loss": 0.6479719877243042, "step": 987 }, { "epoch": 0.18255938838909355, "grad_norm": 0.07370677590370178, "learning_rate": 1.99504935980326e-05, "loss": 0.49580657482147217, "step": 988 }, { "epoch": 0.1827441650979894, "grad_norm": 0.09654896706342697, "learning_rate": 1.995029514146391e-05, "loss": 0.7312811017036438, "step": 989 }, { "epoch": 0.18292894180688524, "grad_norm": 0.09291725605726242, "learning_rate": 1.9950096288904605e-05, "loss": 0.9061948657035828, "step": 990 }, { "epoch": 0.1831137185157811, "grad_norm": 0.08796926587820053, "learning_rate": 1.9949897040362596e-05, "loss": 0.6325365304946899, "step": 991 }, { "epoch": 0.18329849522467692, "grad_norm": 0.1115405261516571, "learning_rate": 1.9949697395845816e-05, "loss": 1.12819242477417, "step": 992 }, { "epoch": 0.18348327193357278, "grad_norm": 0.09811253100633621, "learning_rate": 1.9949497355362205e-05, "loss": 0.9766569137573242, "step": 993 }, { "epoch": 0.1836680486424686, "grad_norm": 0.07640735805034637, "learning_rate": 1.994929691891973e-05, "loss": 0.7131111025810242, "step": 994 }, { "epoch": 0.18385282535136446, "grad_norm": 0.0764615386724472, "learning_rate": 1.9949096086526368e-05, "loss": 0.6240501403808594, "step": 995 }, { "epoch": 0.1840376020602603, "grad_norm": 0.07657036185264587, "learning_rate": 1.9948894858190108e-05, "loss": 0.8184218406677246, "step": 996 }, { "epoch": 0.18422237876915615, "grad_norm": 0.09979189932346344, "learning_rate": 1.994869323391895e-05, "loss": 0.7589877247810364, "step": 997 }, { "epoch": 0.184407155478052, "grad_norm": 0.0920339897274971, "learning_rate": 1.9948491213720937e-05, "loss": 0.8375788927078247, "step": 998 }, { "epoch": 0.18459193218694783, "grad_norm": 0.09007628262042999, "learning_rate": 1.9948288797604093e-05, "loss": 0.8215615749359131, "step": 999 }, { "epoch": 0.1847767088958437, "grad_norm": 0.0851169154047966, "learning_rate": 1.994808598557648e-05, "loss": 0.7080106139183044, "step": 1000 }, { "epoch": 0.1847767088958437, "eval_loss": 0.8302240371704102, "eval_runtime": 157.4667, "eval_samples_per_second": 115.764, "eval_steps_per_second": 14.473, "step": 1000 }, { "epoch": 0.18496148560473952, "grad_norm": 0.09749092161655426, "learning_rate": 1.994788277764617e-05, "loss": 0.7249320149421692, "step": 1001 }, { "epoch": 0.18514626231363537, "grad_norm": 0.07457061111927032, "learning_rate": 1.9947679173821245e-05, "loss": 0.6512501835823059, "step": 1002 }, { "epoch": 0.1853310390225312, "grad_norm": 0.11633574962615967, "learning_rate": 1.9947475174109814e-05, "loss": 0.9869779348373413, "step": 1003 }, { "epoch": 0.18551581573142706, "grad_norm": 0.08418001234531403, "learning_rate": 1.9947270778519995e-05, "loss": 0.8138560056686401, "step": 1004 }, { "epoch": 0.1857005924403229, "grad_norm": 0.10566221177577972, "learning_rate": 1.9947065987059916e-05, "loss": 0.6288236975669861, "step": 1005 }, { "epoch": 0.18588536914921874, "grad_norm": 0.09031148254871368, "learning_rate": 1.9946860799737732e-05, "loss": 0.7053165435791016, "step": 1006 }, { "epoch": 0.18607014585811457, "grad_norm": 0.09639808535575867, "learning_rate": 1.994665521656161e-05, "loss": 0.9204103350639343, "step": 1007 }, { "epoch": 0.18625492256701043, "grad_norm": 0.08621752262115479, "learning_rate": 1.9946449237539728e-05, "loss": 0.8180477619171143, "step": 1008 }, { "epoch": 0.18643969927590628, "grad_norm": 0.07304370403289795, "learning_rate": 1.9946242862680282e-05, "loss": 0.5348410606384277, "step": 1009 }, { "epoch": 0.1866244759848021, "grad_norm": 0.07953565567731857, "learning_rate": 1.994603609199149e-05, "loss": 0.7202300429344177, "step": 1010 }, { "epoch": 0.18680925269369797, "grad_norm": 0.08353704959154129, "learning_rate": 1.994582892548158e-05, "loss": 0.6676353216171265, "step": 1011 }, { "epoch": 0.1869940294025938, "grad_norm": 0.08431074023246765, "learning_rate": 1.9945621363158795e-05, "loss": 0.7750869393348694, "step": 1012 }, { "epoch": 0.18717880611148965, "grad_norm": 0.10428814589977264, "learning_rate": 1.9945413405031392e-05, "loss": 0.9942725896835327, "step": 1013 }, { "epoch": 0.18736358282038548, "grad_norm": 0.0732867419719696, "learning_rate": 1.9945205051107654e-05, "loss": 0.6770198345184326, "step": 1014 }, { "epoch": 0.18754835952928134, "grad_norm": 0.09832040220499039, "learning_rate": 1.994499630139587e-05, "loss": 0.9014858603477478, "step": 1015 }, { "epoch": 0.18773313623817717, "grad_norm": 0.06784018129110336, "learning_rate": 1.9944787155904346e-05, "loss": 0.5552687048912048, "step": 1016 }, { "epoch": 0.18791791294707302, "grad_norm": 0.06917358934879303, "learning_rate": 1.9944577614641404e-05, "loss": 0.7413119673728943, "step": 1017 }, { "epoch": 0.18810268965596885, "grad_norm": 0.09127135574817657, "learning_rate": 1.9944367677615392e-05, "loss": 0.9209888577461243, "step": 1018 }, { "epoch": 0.1882874663648647, "grad_norm": 0.09378290921449661, "learning_rate": 1.9944157344834655e-05, "loss": 0.7302212715148926, "step": 1019 }, { "epoch": 0.18847224307376056, "grad_norm": 0.08918961882591248, "learning_rate": 1.9943946616307562e-05, "loss": 0.9352380633354187, "step": 1020 }, { "epoch": 0.1886570197826564, "grad_norm": 0.0958271324634552, "learning_rate": 1.9943735492042512e-05, "loss": 0.6923654675483704, "step": 1021 }, { "epoch": 0.18884179649155225, "grad_norm": 0.08914941549301147, "learning_rate": 1.9943523972047894e-05, "loss": 0.701963484287262, "step": 1022 }, { "epoch": 0.18902657320044808, "grad_norm": 0.10291727632284164, "learning_rate": 1.9943312056332135e-05, "loss": 0.9576240181922913, "step": 1023 }, { "epoch": 0.18921134990934393, "grad_norm": 0.09569457173347473, "learning_rate": 1.9943099744903663e-05, "loss": 0.9132493734359741, "step": 1024 }, { "epoch": 0.18939612661823976, "grad_norm": 0.08394474536180496, "learning_rate": 1.9942887037770927e-05, "loss": 0.7519565224647522, "step": 1025 }, { "epoch": 0.18958090332713562, "grad_norm": 0.08643844723701477, "learning_rate": 1.9942673934942398e-05, "loss": 0.8622266054153442, "step": 1026 }, { "epoch": 0.18976568003603145, "grad_norm": 0.08506625890731812, "learning_rate": 1.994246043642655e-05, "loss": 0.7522600889205933, "step": 1027 }, { "epoch": 0.1899504567449273, "grad_norm": 0.0753428116440773, "learning_rate": 1.9942246542231888e-05, "loss": 0.6824458837509155, "step": 1028 }, { "epoch": 0.19013523345382313, "grad_norm": 0.09744856506586075, "learning_rate": 1.994203225236691e-05, "loss": 0.7293679714202881, "step": 1029 }, { "epoch": 0.190320010162719, "grad_norm": 0.08699744939804077, "learning_rate": 1.994181756684016e-05, "loss": 0.8544273972511292, "step": 1030 }, { "epoch": 0.19050478687161485, "grad_norm": 0.08195915818214417, "learning_rate": 1.9941602485660172e-05, "loss": 0.6181380748748779, "step": 1031 }, { "epoch": 0.19068956358051067, "grad_norm": 0.09370932728052139, "learning_rate": 1.994138700883551e-05, "loss": 0.7301265597343445, "step": 1032 }, { "epoch": 0.19087434028940653, "grad_norm": 0.08087928593158722, "learning_rate": 1.9941171136374746e-05, "loss": 0.6871247291564941, "step": 1033 }, { "epoch": 0.19105911699830236, "grad_norm": 0.07140730321407318, "learning_rate": 1.9940954868286476e-05, "loss": 0.576732873916626, "step": 1034 }, { "epoch": 0.19124389370719821, "grad_norm": 0.08278313279151917, "learning_rate": 1.9940738204579298e-05, "loss": 0.7809289693832397, "step": 1035 }, { "epoch": 0.19142867041609404, "grad_norm": 0.09478002786636353, "learning_rate": 1.9940521145261845e-05, "loss": 0.7151646018028259, "step": 1036 }, { "epoch": 0.1916134471249899, "grad_norm": 0.08096028864383698, "learning_rate": 1.994030369034275e-05, "loss": 0.8550668954849243, "step": 1037 }, { "epoch": 0.19179822383388573, "grad_norm": 0.08005808293819427, "learning_rate": 1.994008583983066e-05, "loss": 0.8426173329353333, "step": 1038 }, { "epoch": 0.19198300054278158, "grad_norm": 0.08567973971366882, "learning_rate": 1.993986759373426e-05, "loss": 0.8091012239456177, "step": 1039 }, { "epoch": 0.1921677772516774, "grad_norm": 0.06659490615129471, "learning_rate": 1.9939648952062227e-05, "loss": 0.5663833618164062, "step": 1040 }, { "epoch": 0.19235255396057327, "grad_norm": 0.11582151055335999, "learning_rate": 1.9939429914823258e-05, "loss": 1.0861891508102417, "step": 1041 }, { "epoch": 0.19253733066946913, "grad_norm": 0.07196705043315887, "learning_rate": 1.9939210482026082e-05, "loss": 0.6811832785606384, "step": 1042 }, { "epoch": 0.19272210737836495, "grad_norm": 0.08960779756307602, "learning_rate": 1.9938990653679418e-05, "loss": 0.7905018329620361, "step": 1043 }, { "epoch": 0.1929068840872608, "grad_norm": 0.07718110829591751, "learning_rate": 1.9938770429792026e-05, "loss": 0.753892183303833, "step": 1044 }, { "epoch": 0.19309166079615664, "grad_norm": 0.08389970660209656, "learning_rate": 1.993854981037266e-05, "loss": 0.8010688424110413, "step": 1045 }, { "epoch": 0.1932764375050525, "grad_norm": 0.0901460349559784, "learning_rate": 1.993832879543011e-05, "loss": 0.7262868881225586, "step": 1046 }, { "epoch": 0.19346121421394832, "grad_norm": 0.06715024262666702, "learning_rate": 1.9938107384973165e-05, "loss": 0.6049861907958984, "step": 1047 }, { "epoch": 0.19364599092284418, "grad_norm": 0.0782829076051712, "learning_rate": 1.993788557901064e-05, "loss": 0.7502110004425049, "step": 1048 }, { "epoch": 0.19383076763174, "grad_norm": 0.09568478912115097, "learning_rate": 1.993766337755136e-05, "loss": 0.8624519109725952, "step": 1049 }, { "epoch": 0.19401554434063586, "grad_norm": 0.07993607223033905, "learning_rate": 1.9937440780604164e-05, "loss": 0.7470861077308655, "step": 1050 }, { "epoch": 0.1942003210495317, "grad_norm": 0.08586437255144119, "learning_rate": 1.993721778817792e-05, "loss": 0.7969268560409546, "step": 1051 }, { "epoch": 0.19438509775842755, "grad_norm": 0.07363275438547134, "learning_rate": 1.99369944002815e-05, "loss": 0.5416469573974609, "step": 1052 }, { "epoch": 0.1945698744673234, "grad_norm": 0.0870773121714592, "learning_rate": 1.9936770616923786e-05, "loss": 0.6686387658119202, "step": 1053 }, { "epoch": 0.19475465117621923, "grad_norm": 0.08845312148332596, "learning_rate": 1.9936546438113694e-05, "loss": 0.7606138586997986, "step": 1054 }, { "epoch": 0.1949394278851151, "grad_norm": 0.0819728672504425, "learning_rate": 1.9936321863860136e-05, "loss": 0.6012722849845886, "step": 1055 }, { "epoch": 0.19512420459401092, "grad_norm": 0.10822150856256485, "learning_rate": 1.9936096894172058e-05, "loss": 0.9842246770858765, "step": 1056 }, { "epoch": 0.19530898130290678, "grad_norm": 0.07973014563322067, "learning_rate": 1.9935871529058413e-05, "loss": 0.6289697289466858, "step": 1057 }, { "epoch": 0.1954937580118026, "grad_norm": 0.0844748392701149, "learning_rate": 1.993564576852816e-05, "loss": 0.7360936403274536, "step": 1058 }, { "epoch": 0.19567853472069846, "grad_norm": 0.08219747990369797, "learning_rate": 1.9935419612590295e-05, "loss": 0.6736936569213867, "step": 1059 }, { "epoch": 0.1958633114295943, "grad_norm": 0.07414291799068451, "learning_rate": 1.993519306125381e-05, "loss": 0.7290785312652588, "step": 1060 }, { "epoch": 0.19604808813849015, "grad_norm": 0.08525729924440384, "learning_rate": 1.9934966114527726e-05, "loss": 0.6029162406921387, "step": 1061 }, { "epoch": 0.19623286484738597, "grad_norm": 0.09205019474029541, "learning_rate": 1.9934738772421072e-05, "loss": 0.9196493029594421, "step": 1062 }, { "epoch": 0.19641764155628183, "grad_norm": 0.10401707142591476, "learning_rate": 1.99345110349429e-05, "loss": 0.8627897500991821, "step": 1063 }, { "epoch": 0.1966024182651777, "grad_norm": 0.08633103966712952, "learning_rate": 1.9934282902102266e-05, "loss": 0.7805629372596741, "step": 1064 }, { "epoch": 0.19678719497407351, "grad_norm": 0.08859745413064957, "learning_rate": 1.9934054373908255e-05, "loss": 0.7532527446746826, "step": 1065 }, { "epoch": 0.19697197168296937, "grad_norm": 0.08352278918027878, "learning_rate": 1.993382545036996e-05, "loss": 0.6543923020362854, "step": 1066 }, { "epoch": 0.1971567483918652, "grad_norm": 0.1249319389462471, "learning_rate": 1.993359613149649e-05, "loss": 1.1770665645599365, "step": 1067 }, { "epoch": 0.19734152510076106, "grad_norm": 0.07279996573925018, "learning_rate": 1.993336641729697e-05, "loss": 0.5936124324798584, "step": 1068 }, { "epoch": 0.19752630180965688, "grad_norm": 0.08983556926250458, "learning_rate": 1.9933136307780547e-05, "loss": 0.7313957214355469, "step": 1069 }, { "epoch": 0.19771107851855274, "grad_norm": 0.08589496463537216, "learning_rate": 1.9932905802956375e-05, "loss": 0.7616296410560608, "step": 1070 }, { "epoch": 0.19789585522744857, "grad_norm": 0.0885387435555458, "learning_rate": 1.993267490283363e-05, "loss": 0.7941570281982422, "step": 1071 }, { "epoch": 0.19808063193634443, "grad_norm": 0.09348790347576141, "learning_rate": 1.9932443607421496e-05, "loss": 0.8178659677505493, "step": 1072 }, { "epoch": 0.19826540864524025, "grad_norm": 0.07949327677488327, "learning_rate": 1.9932211916729182e-05, "loss": 0.7278982400894165, "step": 1073 }, { "epoch": 0.1984501853541361, "grad_norm": 0.07637959718704224, "learning_rate": 1.9931979830765907e-05, "loss": 0.6828699707984924, "step": 1074 }, { "epoch": 0.19863496206303197, "grad_norm": 0.08116207271814346, "learning_rate": 1.993174734954091e-05, "loss": 0.641601026058197, "step": 1075 }, { "epoch": 0.1988197387719278, "grad_norm": 0.08993934839963913, "learning_rate": 1.993151447306344e-05, "loss": 0.9200141429901123, "step": 1076 }, { "epoch": 0.19900451548082365, "grad_norm": 0.0704822912812233, "learning_rate": 1.9931281201342765e-05, "loss": 0.5704006552696228, "step": 1077 }, { "epoch": 0.19918929218971948, "grad_norm": 0.08818235993385315, "learning_rate": 1.993104753438817e-05, "loss": 0.8348625302314758, "step": 1078 }, { "epoch": 0.19937406889861534, "grad_norm": 0.09258076548576355, "learning_rate": 1.9930813472208953e-05, "loss": 0.8007818460464478, "step": 1079 }, { "epoch": 0.19955884560751116, "grad_norm": 0.08599089831113815, "learning_rate": 1.993057901481443e-05, "loss": 0.7286479473114014, "step": 1080 }, { "epoch": 0.19974362231640702, "grad_norm": 0.07952600717544556, "learning_rate": 1.9930344162213933e-05, "loss": 0.7580991387367249, "step": 1081 }, { "epoch": 0.19992839902530285, "grad_norm": 0.07922924309968948, "learning_rate": 1.9930108914416803e-05, "loss": 0.6698630452156067, "step": 1082 }, { "epoch": 0.2001131757341987, "grad_norm": 0.10020700097084045, "learning_rate": 1.9929873271432406e-05, "loss": 0.821386456489563, "step": 1083 }, { "epoch": 0.20029795244309453, "grad_norm": 0.103725366294384, "learning_rate": 1.9929637233270117e-05, "loss": 1.2106066942214966, "step": 1084 }, { "epoch": 0.2004827291519904, "grad_norm": 0.08130776882171631, "learning_rate": 1.9929400799939338e-05, "loss": 0.8309373259544373, "step": 1085 }, { "epoch": 0.20066750586088625, "grad_norm": 0.09061729907989502, "learning_rate": 1.992916397144947e-05, "loss": 0.6868097186088562, "step": 1086 }, { "epoch": 0.20085228256978208, "grad_norm": 0.08802130818367004, "learning_rate": 1.992892674780994e-05, "loss": 0.754677414894104, "step": 1087 }, { "epoch": 0.20103705927867793, "grad_norm": 0.09083602577447891, "learning_rate": 1.9928689129030187e-05, "loss": 0.8705667853355408, "step": 1088 }, { "epoch": 0.20122183598757376, "grad_norm": 0.0779944658279419, "learning_rate": 1.992845111511967e-05, "loss": 0.8392444849014282, "step": 1089 }, { "epoch": 0.20140661269646962, "grad_norm": 0.07750675082206726, "learning_rate": 1.9928212706087864e-05, "loss": 0.6429112553596497, "step": 1090 }, { "epoch": 0.20159138940536545, "grad_norm": 0.09703640639781952, "learning_rate": 1.992797390194425e-05, "loss": 0.6411582827568054, "step": 1091 }, { "epoch": 0.2017761661142613, "grad_norm": 0.07213166356086731, "learning_rate": 1.9927734702698335e-05, "loss": 0.5872079133987427, "step": 1092 }, { "epoch": 0.20196094282315713, "grad_norm": 0.07832180708646774, "learning_rate": 1.9927495108359642e-05, "loss": 0.6529865860939026, "step": 1093 }, { "epoch": 0.202145719532053, "grad_norm": 0.08114868402481079, "learning_rate": 1.9927255118937702e-05, "loss": 0.6691939830780029, "step": 1094 }, { "epoch": 0.20233049624094882, "grad_norm": 0.09954454749822617, "learning_rate": 1.9927014734442064e-05, "loss": 0.8835906982421875, "step": 1095 }, { "epoch": 0.20251527294984467, "grad_norm": 0.06392431259155273, "learning_rate": 1.9926773954882298e-05, "loss": 0.501159131526947, "step": 1096 }, { "epoch": 0.20270004965874053, "grad_norm": 0.07308457046747208, "learning_rate": 1.9926532780267988e-05, "loss": 0.5708874464035034, "step": 1097 }, { "epoch": 0.20288482636763636, "grad_norm": 0.07481972128152847, "learning_rate": 1.992629121060873e-05, "loss": 0.5472757816314697, "step": 1098 }, { "epoch": 0.2030696030765322, "grad_norm": 0.08071167767047882, "learning_rate": 1.9926049245914135e-05, "loss": 0.77153080701828, "step": 1099 }, { "epoch": 0.20325437978542804, "grad_norm": 0.0896330252289772, "learning_rate": 1.9925806886193836e-05, "loss": 0.8954538702964783, "step": 1100 }, { "epoch": 0.2034391564943239, "grad_norm": 0.069943867623806, "learning_rate": 1.9925564131457476e-05, "loss": 0.6015023589134216, "step": 1101 }, { "epoch": 0.20362393320321973, "grad_norm": 0.11448749899864197, "learning_rate": 1.9925320981714715e-05, "loss": 0.9205179214477539, "step": 1102 }, { "epoch": 0.20380870991211558, "grad_norm": 0.09403878450393677, "learning_rate": 1.9925077436975235e-05, "loss": 0.7833645343780518, "step": 1103 }, { "epoch": 0.2039934866210114, "grad_norm": 0.08931637555360794, "learning_rate": 1.992483349724872e-05, "loss": 0.7928817868232727, "step": 1104 }, { "epoch": 0.20417826332990727, "grad_norm": 0.07081054151058197, "learning_rate": 1.9924589162544886e-05, "loss": 0.5713711977005005, "step": 1105 }, { "epoch": 0.2043630400388031, "grad_norm": 0.11387964338064194, "learning_rate": 1.992434443287345e-05, "loss": 1.0558032989501953, "step": 1106 }, { "epoch": 0.20454781674769895, "grad_norm": 0.09888028353452682, "learning_rate": 1.9924099308244158e-05, "loss": 0.9759566783905029, "step": 1107 }, { "epoch": 0.2047325934565948, "grad_norm": 0.08901600539684296, "learning_rate": 1.9923853788666762e-05, "loss": 0.7663286924362183, "step": 1108 }, { "epoch": 0.20491737016549064, "grad_norm": 0.07079759985208511, "learning_rate": 1.992360787415103e-05, "loss": 0.679031252861023, "step": 1109 }, { "epoch": 0.2051021468743865, "grad_norm": 0.07839877158403397, "learning_rate": 1.9923361564706755e-05, "loss": 0.7949055433273315, "step": 1110 }, { "epoch": 0.20528692358328232, "grad_norm": 0.09051861613988876, "learning_rate": 1.9923114860343735e-05, "loss": 0.7728940844535828, "step": 1111 }, { "epoch": 0.20547170029217818, "grad_norm": 0.06782103329896927, "learning_rate": 1.992286776107179e-05, "loss": 0.8745223879814148, "step": 1112 }, { "epoch": 0.205656477001074, "grad_norm": 0.08042112737894058, "learning_rate": 1.992262026690075e-05, "loss": 0.823594868183136, "step": 1113 }, { "epoch": 0.20584125370996986, "grad_norm": 0.09936164319515228, "learning_rate": 1.992237237784047e-05, "loss": 0.8101600408554077, "step": 1114 }, { "epoch": 0.2060260304188657, "grad_norm": 0.09016161412000656, "learning_rate": 1.992212409390081e-05, "loss": 0.7377327680587769, "step": 1115 }, { "epoch": 0.20621080712776155, "grad_norm": 0.08577019721269608, "learning_rate": 1.9921875415091655e-05, "loss": 0.6838337779045105, "step": 1116 }, { "epoch": 0.20639558383665738, "grad_norm": 0.07404208183288574, "learning_rate": 1.9921626341422898e-05, "loss": 0.6607319712638855, "step": 1117 }, { "epoch": 0.20658036054555323, "grad_norm": 0.06708002835512161, "learning_rate": 1.9921376872904457e-05, "loss": 0.5383575558662415, "step": 1118 }, { "epoch": 0.2067651372544491, "grad_norm": 0.07707026600837708, "learning_rate": 1.9921127009546256e-05, "loss": 0.6438384056091309, "step": 1119 }, { "epoch": 0.20694991396334492, "grad_norm": 0.09694302827119827, "learning_rate": 1.9920876751358237e-05, "loss": 0.8988054990768433, "step": 1120 }, { "epoch": 0.20713469067224077, "grad_norm": 0.07617320120334625, "learning_rate": 1.9920626098350362e-05, "loss": 0.5821647644042969, "step": 1121 }, { "epoch": 0.2073194673811366, "grad_norm": 0.07221636176109314, "learning_rate": 1.9920375050532605e-05, "loss": 0.7280129790306091, "step": 1122 }, { "epoch": 0.20750424409003246, "grad_norm": 0.0678236186504364, "learning_rate": 1.9920123607914962e-05, "loss": 0.5645017027854919, "step": 1123 }, { "epoch": 0.2076890207989283, "grad_norm": 0.06640082597732544, "learning_rate": 1.991987177050743e-05, "loss": 0.6989132165908813, "step": 1124 }, { "epoch": 0.20787379750782414, "grad_norm": 0.08389496803283691, "learning_rate": 1.991961953832004e-05, "loss": 0.7181524634361267, "step": 1125 }, { "epoch": 0.20805857421671997, "grad_norm": 0.0670672282576561, "learning_rate": 1.9919366911362828e-05, "loss": 0.661719799041748, "step": 1126 }, { "epoch": 0.20824335092561583, "grad_norm": 0.06571565568447113, "learning_rate": 1.9919113889645846e-05, "loss": 0.48099377751350403, "step": 1127 }, { "epoch": 0.20842812763451166, "grad_norm": 0.08513090759515762, "learning_rate": 1.991886047317916e-05, "loss": 0.7070115208625793, "step": 1128 }, { "epoch": 0.2086129043434075, "grad_norm": 0.08598559349775314, "learning_rate": 1.9918606661972863e-05, "loss": 0.9339227676391602, "step": 1129 }, { "epoch": 0.20879768105230337, "grad_norm": 0.0697142630815506, "learning_rate": 1.9918352456037054e-05, "loss": 0.5696187019348145, "step": 1130 }, { "epoch": 0.2089824577611992, "grad_norm": 0.09361305087804794, "learning_rate": 1.9918097855381843e-05, "loss": 0.8069785833358765, "step": 1131 }, { "epoch": 0.20916723447009505, "grad_norm": 0.10314858704805374, "learning_rate": 1.9917842860017372e-05, "loss": 0.9178709387779236, "step": 1132 }, { "epoch": 0.20935201117899088, "grad_norm": 0.0890069231390953, "learning_rate": 1.991758746995378e-05, "loss": 0.7417526245117188, "step": 1133 }, { "epoch": 0.20953678788788674, "grad_norm": 0.07984063774347305, "learning_rate": 1.991733168520124e-05, "loss": 0.6990677118301392, "step": 1134 }, { "epoch": 0.20972156459678257, "grad_norm": 0.0774579867720604, "learning_rate": 1.991707550576992e-05, "loss": 0.7703189253807068, "step": 1135 }, { "epoch": 0.20990634130567842, "grad_norm": 0.07212287187576294, "learning_rate": 1.9916818931670026e-05, "loss": 0.6072457432746887, "step": 1136 }, { "epoch": 0.21009111801457425, "grad_norm": 0.06705975532531738, "learning_rate": 1.9916561962911762e-05, "loss": 0.5408006310462952, "step": 1137 }, { "epoch": 0.2102758947234701, "grad_norm": 0.0907805860042572, "learning_rate": 1.9916304599505358e-05, "loss": 0.8637043833732605, "step": 1138 }, { "epoch": 0.21046067143236594, "grad_norm": 0.0981602668762207, "learning_rate": 1.9916046841461056e-05, "loss": 0.7083920240402222, "step": 1139 }, { "epoch": 0.2106454481412618, "grad_norm": 0.0859125480055809, "learning_rate": 1.9915788688789107e-05, "loss": 0.754231870174408, "step": 1140 }, { "epoch": 0.21083022485015765, "grad_norm": 0.06962268799543381, "learning_rate": 1.9915530141499796e-05, "loss": 0.7630135416984558, "step": 1141 }, { "epoch": 0.21101500155905348, "grad_norm": 0.09127385914325714, "learning_rate": 1.991527119960341e-05, "loss": 1.0107409954071045, "step": 1142 }, { "epoch": 0.21119977826794933, "grad_norm": 0.0875440314412117, "learning_rate": 1.9915011863110244e-05, "loss": 0.6957036256790161, "step": 1143 }, { "epoch": 0.21138455497684516, "grad_norm": 0.0832233801484108, "learning_rate": 1.9914752132030634e-05, "loss": 0.8316786289215088, "step": 1144 }, { "epoch": 0.21156933168574102, "grad_norm": 0.09219091385602951, "learning_rate": 1.99144920063749e-05, "loss": 0.9146633148193359, "step": 1145 }, { "epoch": 0.21175410839463685, "grad_norm": 0.06797627359628677, "learning_rate": 1.991423148615341e-05, "loss": 0.5604722499847412, "step": 1146 }, { "epoch": 0.2119388851035327, "grad_norm": 0.09596192091703415, "learning_rate": 1.9913970571376517e-05, "loss": 0.9451921582221985, "step": 1147 }, { "epoch": 0.21212366181242853, "grad_norm": 0.08437191694974899, "learning_rate": 1.9913709262054616e-05, "loss": 0.8336598873138428, "step": 1148 }, { "epoch": 0.2123084385213244, "grad_norm": 0.07678723335266113, "learning_rate": 1.9913447558198104e-05, "loss": 0.7418199181556702, "step": 1149 }, { "epoch": 0.21249321523022022, "grad_norm": 0.08348348736763, "learning_rate": 1.9913185459817392e-05, "loss": 0.7916473150253296, "step": 1150 }, { "epoch": 0.21267799193911607, "grad_norm": 0.08356853574514389, "learning_rate": 1.9912922966922913e-05, "loss": 0.716545045375824, "step": 1151 }, { "epoch": 0.21286276864801193, "grad_norm": 0.055216237902641296, "learning_rate": 1.9912660079525115e-05, "loss": 0.5315919518470764, "step": 1152 }, { "epoch": 0.21304754535690776, "grad_norm": 0.09753018617630005, "learning_rate": 1.991239679763446e-05, "loss": 1.0574451684951782, "step": 1153 }, { "epoch": 0.21323232206580361, "grad_norm": 0.08376552909612656, "learning_rate": 1.9912133121261422e-05, "loss": 0.9052149057388306, "step": 1154 }, { "epoch": 0.21341709877469944, "grad_norm": 0.09207002073526382, "learning_rate": 1.9911869050416495e-05, "loss": 0.9631213545799255, "step": 1155 }, { "epoch": 0.2136018754835953, "grad_norm": 0.10031726956367493, "learning_rate": 1.9911604585110192e-05, "loss": 0.6098602414131165, "step": 1156 }, { "epoch": 0.21378665219249113, "grad_norm": 0.07222917675971985, "learning_rate": 1.9911339725353036e-05, "loss": 0.5439106225967407, "step": 1157 }, { "epoch": 0.21397142890138698, "grad_norm": 0.08820630609989166, "learning_rate": 1.991107447115557e-05, "loss": 0.6557695269584656, "step": 1158 }, { "epoch": 0.2141562056102828, "grad_norm": 0.09099892526865005, "learning_rate": 1.9910808822528346e-05, "loss": 0.7740167379379272, "step": 1159 }, { "epoch": 0.21434098231917867, "grad_norm": 0.08491285890340805, "learning_rate": 1.9910542779481938e-05, "loss": 0.7722455859184265, "step": 1160 }, { "epoch": 0.2145257590280745, "grad_norm": 0.09214689582586288, "learning_rate": 1.991027634202693e-05, "loss": 0.9850207567214966, "step": 1161 }, { "epoch": 0.21471053573697035, "grad_norm": 0.08727172762155533, "learning_rate": 1.991000951017393e-05, "loss": 0.7745745778083801, "step": 1162 }, { "epoch": 0.2148953124458662, "grad_norm": 0.07708032429218292, "learning_rate": 1.990974228393356e-05, "loss": 0.6668936610221863, "step": 1163 }, { "epoch": 0.21508008915476204, "grad_norm": 0.08658210188150406, "learning_rate": 1.990947466331645e-05, "loss": 0.8688307404518127, "step": 1164 }, { "epoch": 0.2152648658636579, "grad_norm": 0.07693911343812943, "learning_rate": 1.9909206648333247e-05, "loss": 0.7023448944091797, "step": 1165 }, { "epoch": 0.21544964257255372, "grad_norm": 0.0952480137348175, "learning_rate": 1.9908938238994624e-05, "loss": 0.5236302614212036, "step": 1166 }, { "epoch": 0.21563441928144958, "grad_norm": 0.09324135631322861, "learning_rate": 1.9908669435311258e-05, "loss": 0.7592065334320068, "step": 1167 }, { "epoch": 0.2158191959903454, "grad_norm": 0.06865952908992767, "learning_rate": 1.990840023729385e-05, "loss": 0.6358137130737305, "step": 1168 }, { "epoch": 0.21600397269924126, "grad_norm": 0.09976091235876083, "learning_rate": 1.9908130644953118e-05, "loss": 0.8514857888221741, "step": 1169 }, { "epoch": 0.2161887494081371, "grad_norm": 0.07364201545715332, "learning_rate": 1.9907860658299777e-05, "loss": 0.6873587965965271, "step": 1170 }, { "epoch": 0.21637352611703295, "grad_norm": 0.0919695496559143, "learning_rate": 1.9907590277344582e-05, "loss": 0.8332082033157349, "step": 1171 }, { "epoch": 0.21655830282592878, "grad_norm": 0.06787417829036713, "learning_rate": 1.990731950209829e-05, "loss": 0.6495833992958069, "step": 1172 }, { "epoch": 0.21674307953482463, "grad_norm": 0.09035515785217285, "learning_rate": 1.990704833257168e-05, "loss": 0.8606963157653809, "step": 1173 }, { "epoch": 0.2169278562437205, "grad_norm": 0.08666759729385376, "learning_rate": 1.990677676877554e-05, "loss": 0.9564822316169739, "step": 1174 }, { "epoch": 0.21711263295261632, "grad_norm": 0.10453042387962341, "learning_rate": 1.9906504810720676e-05, "loss": 0.8247703909873962, "step": 1175 }, { "epoch": 0.21729740966151218, "grad_norm": 0.08272422850131989, "learning_rate": 1.990623245841792e-05, "loss": 0.8222265839576721, "step": 1176 }, { "epoch": 0.217482186370408, "grad_norm": 0.07431092113256454, "learning_rate": 1.9905959711878104e-05, "loss": 0.644834041595459, "step": 1177 }, { "epoch": 0.21766696307930386, "grad_norm": 0.07192020118236542, "learning_rate": 1.990568657111208e-05, "loss": 0.6560988426208496, "step": 1178 }, { "epoch": 0.2178517397881997, "grad_norm": 0.09463713318109512, "learning_rate": 1.9905413036130722e-05, "loss": 0.6308455467224121, "step": 1179 }, { "epoch": 0.21803651649709554, "grad_norm": 0.07202714681625366, "learning_rate": 1.9905139106944916e-05, "loss": 0.6653580665588379, "step": 1180 }, { "epoch": 0.21822129320599137, "grad_norm": 0.08728264272212982, "learning_rate": 1.990486478356556e-05, "loss": 1.002651572227478, "step": 1181 }, { "epoch": 0.21840606991488723, "grad_norm": 0.06476552039384842, "learning_rate": 1.9904590066003577e-05, "loss": 0.5150159001350403, "step": 1182 }, { "epoch": 0.21859084662378306, "grad_norm": 0.09379825741052628, "learning_rate": 1.99043149542699e-05, "loss": 0.8470251560211182, "step": 1183 }, { "epoch": 0.21877562333267891, "grad_norm": 0.08273567259311676, "learning_rate": 1.990403944837547e-05, "loss": 0.7435149550437927, "step": 1184 }, { "epoch": 0.21896040004157477, "grad_norm": 0.09383412450551987, "learning_rate": 1.9903763548331257e-05, "loss": 0.8953707218170166, "step": 1185 }, { "epoch": 0.2191451767504706, "grad_norm": 0.07706382870674133, "learning_rate": 1.9903487254148236e-05, "loss": 0.7261871099472046, "step": 1186 }, { "epoch": 0.21932995345936646, "grad_norm": 0.08325329422950745, "learning_rate": 1.9903210565837413e-05, "loss": 0.7809126377105713, "step": 1187 }, { "epoch": 0.21951473016826228, "grad_norm": 0.08461137115955353, "learning_rate": 1.9902933483409786e-05, "loss": 0.7430332899093628, "step": 1188 }, { "epoch": 0.21969950687715814, "grad_norm": 0.08576197177171707, "learning_rate": 1.9902656006876394e-05, "loss": 0.9017677307128906, "step": 1189 }, { "epoch": 0.21988428358605397, "grad_norm": 0.08884377777576447, "learning_rate": 1.990237813624827e-05, "loss": 0.8662840127944946, "step": 1190 }, { "epoch": 0.22006906029494983, "grad_norm": 0.0905468612909317, "learning_rate": 1.990209987153648e-05, "loss": 0.7434231042861938, "step": 1191 }, { "epoch": 0.22025383700384565, "grad_norm": 0.09583505243062973, "learning_rate": 1.990182121275209e-05, "loss": 0.8754289150238037, "step": 1192 }, { "epoch": 0.2204386137127415, "grad_norm": 0.08617255091667175, "learning_rate": 1.99015421599062e-05, "loss": 0.8593115210533142, "step": 1193 }, { "epoch": 0.22062339042163737, "grad_norm": 0.07539169490337372, "learning_rate": 1.990126271300991e-05, "loss": 0.7073256969451904, "step": 1194 }, { "epoch": 0.2208081671305332, "grad_norm": 0.07064154744148254, "learning_rate": 1.990098287207434e-05, "loss": 0.7071460485458374, "step": 1195 }, { "epoch": 0.22099294383942905, "grad_norm": 0.09236112982034683, "learning_rate": 1.9900702637110627e-05, "loss": 0.9311996102333069, "step": 1196 }, { "epoch": 0.22117772054832488, "grad_norm": 0.09491878002882004, "learning_rate": 1.9900422008129924e-05, "loss": 0.7254270315170288, "step": 1197 }, { "epoch": 0.22136249725722074, "grad_norm": 0.10193610191345215, "learning_rate": 1.99001409851434e-05, "loss": 0.7491152286529541, "step": 1198 }, { "epoch": 0.22154727396611656, "grad_norm": 0.08911536633968353, "learning_rate": 1.9899859568162237e-05, "loss": 0.7793245315551758, "step": 1199 }, { "epoch": 0.22173205067501242, "grad_norm": 0.08352331072092056, "learning_rate": 1.9899577757197638e-05, "loss": 0.6871762871742249, "step": 1200 }, { "epoch": 0.22191682738390825, "grad_norm": 0.09086652100086212, "learning_rate": 1.9899295552260817e-05, "loss": 0.8809306621551514, "step": 1201 }, { "epoch": 0.2221016040928041, "grad_norm": 0.10989069938659668, "learning_rate": 1.9899012953363002e-05, "loss": 1.0556919574737549, "step": 1202 }, { "epoch": 0.22228638080169993, "grad_norm": 0.0815066546201706, "learning_rate": 1.9898729960515442e-05, "loss": 0.5446941256523132, "step": 1203 }, { "epoch": 0.2224711575105958, "grad_norm": 0.07634031027555466, "learning_rate": 1.98984465737294e-05, "loss": 0.5923195481300354, "step": 1204 }, { "epoch": 0.22265593421949165, "grad_norm": 0.08047515153884888, "learning_rate": 1.989816279301615e-05, "loss": 0.8424436450004578, "step": 1205 }, { "epoch": 0.22284071092838748, "grad_norm": 0.08787591010332108, "learning_rate": 1.989787861838699e-05, "loss": 0.7214845418930054, "step": 1206 }, { "epoch": 0.22302548763728333, "grad_norm": 0.05372117832303047, "learning_rate": 1.9897594049853226e-05, "loss": 0.4613834619522095, "step": 1207 }, { "epoch": 0.22321026434617916, "grad_norm": 0.07950277626514435, "learning_rate": 1.9897309087426185e-05, "loss": 0.7548415064811707, "step": 1208 }, { "epoch": 0.22339504105507502, "grad_norm": 0.06759131699800491, "learning_rate": 1.9897023731117206e-05, "loss": 0.43362218141555786, "step": 1209 }, { "epoch": 0.22357981776397085, "grad_norm": 0.071576789021492, "learning_rate": 1.9896737980937648e-05, "loss": 0.7462440729141235, "step": 1210 }, { "epoch": 0.2237645944728667, "grad_norm": 0.07904816418886185, "learning_rate": 1.9896451836898883e-05, "loss": 0.7481318116188049, "step": 1211 }, { "epoch": 0.22394937118176253, "grad_norm": 0.08989887684583664, "learning_rate": 1.9896165299012292e-05, "loss": 0.7053769826889038, "step": 1212 }, { "epoch": 0.2241341478906584, "grad_norm": 0.09380374103784561, "learning_rate": 1.989587836728929e-05, "loss": 0.8070808053016663, "step": 1213 }, { "epoch": 0.22431892459955421, "grad_norm": 0.09103138744831085, "learning_rate": 1.9895591041741284e-05, "loss": 0.6746754050254822, "step": 1214 }, { "epoch": 0.22450370130845007, "grad_norm": 0.06641924381256104, "learning_rate": 1.9895303322379714e-05, "loss": 0.6270115971565247, "step": 1215 }, { "epoch": 0.22468847801734593, "grad_norm": 0.08196838200092316, "learning_rate": 1.989501520921603e-05, "loss": 0.6588491797447205, "step": 1216 }, { "epoch": 0.22487325472624176, "grad_norm": 0.07861845195293427, "learning_rate": 1.98947267022617e-05, "loss": 0.6215094327926636, "step": 1217 }, { "epoch": 0.2250580314351376, "grad_norm": 0.09896759688854218, "learning_rate": 1.9894437801528205e-05, "loss": 0.903312087059021, "step": 1218 }, { "epoch": 0.22524280814403344, "grad_norm": 0.07655292004346848, "learning_rate": 1.989414850702704e-05, "loss": 0.7730190753936768, "step": 1219 }, { "epoch": 0.2254275848529293, "grad_norm": 0.09886181354522705, "learning_rate": 1.989385881876972e-05, "loss": 0.6895635724067688, "step": 1220 }, { "epoch": 0.22561236156182513, "grad_norm": 0.08377520740032196, "learning_rate": 1.989356873676777e-05, "loss": 0.780552089214325, "step": 1221 }, { "epoch": 0.22579713827072098, "grad_norm": 0.08539623767137527, "learning_rate": 1.9893278261032737e-05, "loss": 0.7185485363006592, "step": 1222 }, { "epoch": 0.2259819149796168, "grad_norm": 0.09074624627828598, "learning_rate": 1.9892987391576188e-05, "loss": 0.7275176048278809, "step": 1223 }, { "epoch": 0.22616669168851267, "grad_norm": 0.08140923827886581, "learning_rate": 1.9892696128409686e-05, "loss": 0.6939892172813416, "step": 1224 }, { "epoch": 0.2263514683974085, "grad_norm": 0.07564108073711395, "learning_rate": 1.9892404471544828e-05, "loss": 0.6892415285110474, "step": 1225 }, { "epoch": 0.22653624510630435, "grad_norm": 0.0782395526766777, "learning_rate": 1.9892112420993225e-05, "loss": 0.7155357599258423, "step": 1226 }, { "epoch": 0.2267210218152002, "grad_norm": 0.08511853218078613, "learning_rate": 1.9891819976766492e-05, "loss": 0.832256019115448, "step": 1227 }, { "epoch": 0.22690579852409604, "grad_norm": 0.08317041397094727, "learning_rate": 1.9891527138876274e-05, "loss": 0.7731190919876099, "step": 1228 }, { "epoch": 0.2270905752329919, "grad_norm": 0.10014408081769943, "learning_rate": 1.9891233907334223e-05, "loss": 0.8443605899810791, "step": 1229 }, { "epoch": 0.22727535194188772, "grad_norm": 0.098563052713871, "learning_rate": 1.9890940282152007e-05, "loss": 0.8996736407279968, "step": 1230 }, { "epoch": 0.22746012865078358, "grad_norm": 0.07454513013362885, "learning_rate": 1.9890646263341315e-05, "loss": 0.5073458552360535, "step": 1231 }, { "epoch": 0.2276449053596794, "grad_norm": 0.07696285098791122, "learning_rate": 1.989035185091384e-05, "loss": 0.5916829705238342, "step": 1232 }, { "epoch": 0.22782968206857526, "grad_norm": 0.0965103730559349, "learning_rate": 1.9890057044881308e-05, "loss": 0.8592946529388428, "step": 1233 }, { "epoch": 0.2280144587774711, "grad_norm": 0.08271915465593338, "learning_rate": 1.988976184525545e-05, "loss": 0.7643226981163025, "step": 1234 }, { "epoch": 0.22819923548636695, "grad_norm": 0.09926889091730118, "learning_rate": 1.988946625204801e-05, "loss": 0.9657240509986877, "step": 1235 }, { "epoch": 0.22838401219526278, "grad_norm": 0.06959125399589539, "learning_rate": 1.988917026527075e-05, "loss": 0.6401278376579285, "step": 1236 }, { "epoch": 0.22856878890415863, "grad_norm": 0.06192828342318535, "learning_rate": 1.9888873884935457e-05, "loss": 0.46463993191719055, "step": 1237 }, { "epoch": 0.2287535656130545, "grad_norm": 0.07837715744972229, "learning_rate": 1.988857711105392e-05, "loss": 0.6322407722473145, "step": 1238 }, { "epoch": 0.22893834232195032, "grad_norm": 0.10930605977773666, "learning_rate": 1.9888279943637946e-05, "loss": 0.9277044534683228, "step": 1239 }, { "epoch": 0.22912311903084617, "grad_norm": 0.09522504359483719, "learning_rate": 1.9887982382699373e-05, "loss": 0.8586084246635437, "step": 1240 }, { "epoch": 0.229307895739742, "grad_norm": 0.07182016968727112, "learning_rate": 1.9887684428250038e-05, "loss": 0.6435913443565369, "step": 1241 }, { "epoch": 0.22949267244863786, "grad_norm": 0.07749020308256149, "learning_rate": 1.9887386080301793e-05, "loss": 0.983945369720459, "step": 1242 }, { "epoch": 0.2296774491575337, "grad_norm": 0.08700563758611679, "learning_rate": 1.988708733886652e-05, "loss": 0.8135854005813599, "step": 1243 }, { "epoch": 0.22986222586642954, "grad_norm": 0.08946280181407928, "learning_rate": 1.98867882039561e-05, "loss": 0.8678815364837646, "step": 1244 }, { "epoch": 0.23004700257532537, "grad_norm": 0.07762499898672104, "learning_rate": 1.988648867558244e-05, "loss": 0.7090401649475098, "step": 1245 }, { "epoch": 0.23023177928422123, "grad_norm": 0.09532805532217026, "learning_rate": 1.9886188753757466e-05, "loss": 0.9154309034347534, "step": 1246 }, { "epoch": 0.23041655599311706, "grad_norm": 0.0796472430229187, "learning_rate": 1.9885888438493106e-05, "loss": 0.6643787026405334, "step": 1247 }, { "epoch": 0.2306013327020129, "grad_norm": 0.07897227257490158, "learning_rate": 1.988558772980132e-05, "loss": 0.6329892873764038, "step": 1248 }, { "epoch": 0.23078610941090877, "grad_norm": 0.09313155710697174, "learning_rate": 1.9885286627694066e-05, "loss": 0.832964301109314, "step": 1249 }, { "epoch": 0.2309708861198046, "grad_norm": 0.06968510150909424, "learning_rate": 1.9884985132183333e-05, "loss": 0.679764449596405, "step": 1250 }, { "epoch": 0.23115566282870045, "grad_norm": 0.08463939279317856, "learning_rate": 1.9884683243281117e-05, "loss": 0.8374265432357788, "step": 1251 }, { "epoch": 0.23134043953759628, "grad_norm": 0.08091641962528229, "learning_rate": 1.9884380960999432e-05, "loss": 0.7396032214164734, "step": 1252 }, { "epoch": 0.23152521624649214, "grad_norm": 0.12453664094209671, "learning_rate": 1.988407828535031e-05, "loss": 1.1132965087890625, "step": 1253 }, { "epoch": 0.23170999295538797, "grad_norm": 0.10481418669223785, "learning_rate": 1.9883775216345797e-05, "loss": 1.0461211204528809, "step": 1254 }, { "epoch": 0.23189476966428382, "grad_norm": 0.09625512361526489, "learning_rate": 1.988347175399795e-05, "loss": 0.8646210432052612, "step": 1255 }, { "epoch": 0.23207954637317965, "grad_norm": 0.0808907076716423, "learning_rate": 1.988316789831885e-05, "loss": 0.8495672345161438, "step": 1256 }, { "epoch": 0.2322643230820755, "grad_norm": 0.06943518668413162, "learning_rate": 1.9882863649320588e-05, "loss": 0.7596601247787476, "step": 1257 }, { "epoch": 0.23244909979097134, "grad_norm": 0.069698266685009, "learning_rate": 1.9882559007015275e-05, "loss": 0.5961692929267883, "step": 1258 }, { "epoch": 0.2326338764998672, "grad_norm": 0.07678410410881042, "learning_rate": 1.988225397141503e-05, "loss": 0.6730903387069702, "step": 1259 }, { "epoch": 0.23281865320876305, "grad_norm": 0.09135778993368149, "learning_rate": 1.9881948542531994e-05, "loss": 0.7469479441642761, "step": 1260 }, { "epoch": 0.23300342991765888, "grad_norm": 0.08337856829166412, "learning_rate": 1.988164272037832e-05, "loss": 0.6452245116233826, "step": 1261 }, { "epoch": 0.23318820662655473, "grad_norm": 0.06979101896286011, "learning_rate": 1.9881336504966187e-05, "loss": 0.6644605398178101, "step": 1262 }, { "epoch": 0.23337298333545056, "grad_norm": 0.07771389931440353, "learning_rate": 1.9881029896307772e-05, "loss": 0.7592771053314209, "step": 1263 }, { "epoch": 0.23355776004434642, "grad_norm": 0.0938957929611206, "learning_rate": 1.9880722894415284e-05, "loss": 0.8080681562423706, "step": 1264 }, { "epoch": 0.23374253675324225, "grad_norm": 0.07659591734409332, "learning_rate": 1.9880415499300936e-05, "loss": 0.6537554264068604, "step": 1265 }, { "epoch": 0.2339273134621381, "grad_norm": 0.07603617012500763, "learning_rate": 1.988010771097696e-05, "loss": 0.688729465007782, "step": 1266 }, { "epoch": 0.23411209017103393, "grad_norm": 0.10170614719390869, "learning_rate": 1.987979952945561e-05, "loss": 0.8643448948860168, "step": 1267 }, { "epoch": 0.2342968668799298, "grad_norm": 0.07562986016273499, "learning_rate": 1.9879490954749152e-05, "loss": 0.8214960098266602, "step": 1268 }, { "epoch": 0.23448164358882562, "grad_norm": 0.09902802109718323, "learning_rate": 1.9879181986869856e-05, "loss": 0.8390169143676758, "step": 1269 }, { "epoch": 0.23466642029772147, "grad_norm": 0.07118549942970276, "learning_rate": 1.9878872625830033e-05, "loss": 0.6914301514625549, "step": 1270 }, { "epoch": 0.23485119700661733, "grad_norm": 0.10551498085260391, "learning_rate": 1.987856287164198e-05, "loss": 1.0575722455978394, "step": 1271 }, { "epoch": 0.23503597371551316, "grad_norm": 0.08981330692768097, "learning_rate": 1.9878252724318034e-05, "loss": 0.704413890838623, "step": 1272 }, { "epoch": 0.23522075042440901, "grad_norm": 0.09239045530557632, "learning_rate": 1.9877942183870534e-05, "loss": 0.8672037124633789, "step": 1273 }, { "epoch": 0.23540552713330484, "grad_norm": 0.07538927346467972, "learning_rate": 1.9877631250311838e-05, "loss": 0.6462569832801819, "step": 1274 }, { "epoch": 0.2355903038422007, "grad_norm": 0.07566707581281662, "learning_rate": 1.9877319923654327e-05, "loss": 0.650894284248352, "step": 1275 }, { "epoch": 0.23577508055109653, "grad_norm": 0.08834036439657211, "learning_rate": 1.987700820391038e-05, "loss": 0.8693649768829346, "step": 1276 }, { "epoch": 0.23595985725999238, "grad_norm": 0.06067048758268356, "learning_rate": 1.9876696091092408e-05, "loss": 0.6727550625801086, "step": 1277 }, { "epoch": 0.2361446339688882, "grad_norm": 0.09425334632396698, "learning_rate": 1.9876383585212832e-05, "loss": 0.7923711538314819, "step": 1278 }, { "epoch": 0.23632941067778407, "grad_norm": 0.09646166861057281, "learning_rate": 1.987607068628409e-05, "loss": 0.8077743053436279, "step": 1279 }, { "epoch": 0.2365141873866799, "grad_norm": 0.06434794515371323, "learning_rate": 1.987575739431863e-05, "loss": 0.6180709004402161, "step": 1280 }, { "epoch": 0.23669896409557575, "grad_norm": 0.0861670970916748, "learning_rate": 1.9875443709328928e-05, "loss": 0.830324649810791, "step": 1281 }, { "epoch": 0.2368837408044716, "grad_norm": 0.07235396653413773, "learning_rate": 1.987512963132746e-05, "loss": 0.6347554922103882, "step": 1282 }, { "epoch": 0.23706851751336744, "grad_norm": 0.08272948861122131, "learning_rate": 1.9874815160326728e-05, "loss": 0.6697772741317749, "step": 1283 }, { "epoch": 0.2372532942222633, "grad_norm": 0.11427222192287445, "learning_rate": 1.9874500296339245e-05, "loss": 0.8781434893608093, "step": 1284 }, { "epoch": 0.23743807093115912, "grad_norm": 0.08316230773925781, "learning_rate": 1.987418503937754e-05, "loss": 0.8479765057563782, "step": 1285 }, { "epoch": 0.23762284764005498, "grad_norm": 0.07936954498291016, "learning_rate": 1.987386938945417e-05, "loss": 0.7314356565475464, "step": 1286 }, { "epoch": 0.2378076243489508, "grad_norm": 0.0801529586315155, "learning_rate": 1.9873553346581688e-05, "loss": 0.5724437832832336, "step": 1287 }, { "epoch": 0.23799240105784666, "grad_norm": 0.09593310952186584, "learning_rate": 1.9873236910772674e-05, "loss": 0.8072952628135681, "step": 1288 }, { "epoch": 0.2381771777667425, "grad_norm": 0.1059904620051384, "learning_rate": 1.987292008203972e-05, "loss": 1.0050170421600342, "step": 1289 }, { "epoch": 0.23836195447563835, "grad_norm": 0.09045623242855072, "learning_rate": 1.9872602860395433e-05, "loss": 0.779002845287323, "step": 1290 }, { "epoch": 0.23854673118453418, "grad_norm": 0.09053946286439896, "learning_rate": 1.987228524585244e-05, "loss": 0.7273655533790588, "step": 1291 }, { "epoch": 0.23873150789343003, "grad_norm": 0.0855286493897438, "learning_rate": 1.987196723842338e-05, "loss": 0.7778965830802917, "step": 1292 }, { "epoch": 0.2389162846023259, "grad_norm": 0.09738358855247498, "learning_rate": 1.9871648838120913e-05, "loss": 0.9179601073265076, "step": 1293 }, { "epoch": 0.23910106131122172, "grad_norm": 0.07933405041694641, "learning_rate": 1.9871330044957703e-05, "loss": 0.7287842035293579, "step": 1294 }, { "epoch": 0.23928583802011757, "grad_norm": 0.09340131282806396, "learning_rate": 1.9871010858946443e-05, "loss": 0.8566576242446899, "step": 1295 }, { "epoch": 0.2394706147290134, "grad_norm": 0.09495611488819122, "learning_rate": 1.987069128009983e-05, "loss": 0.9072137475013733, "step": 1296 }, { "epoch": 0.23965539143790926, "grad_norm": 0.09656231105327606, "learning_rate": 1.987037130843059e-05, "loss": 0.7251742482185364, "step": 1297 }, { "epoch": 0.2398401681468051, "grad_norm": 0.08660931140184402, "learning_rate": 1.987005094395145e-05, "loss": 0.5973469614982605, "step": 1298 }, { "epoch": 0.24002494485570094, "grad_norm": 0.10160975158214569, "learning_rate": 1.986973018667516e-05, "loss": 0.779219925403595, "step": 1299 }, { "epoch": 0.24020972156459677, "grad_norm": 0.07078233361244202, "learning_rate": 1.986940903661449e-05, "loss": 0.69890958070755, "step": 1300 }, { "epoch": 0.24039449827349263, "grad_norm": 0.07332238554954529, "learning_rate": 1.9869087493782217e-05, "loss": 0.7630764245986938, "step": 1301 }, { "epoch": 0.24057927498238846, "grad_norm": 0.07510337233543396, "learning_rate": 1.9868765558191137e-05, "loss": 0.638870894908905, "step": 1302 }, { "epoch": 0.24076405169128431, "grad_norm": 0.08256179094314575, "learning_rate": 1.9868443229854068e-05, "loss": 0.7185980081558228, "step": 1303 }, { "epoch": 0.24094882840018017, "grad_norm": 0.10575845092535019, "learning_rate": 1.9868120508783826e-05, "loss": 0.9325575828552246, "step": 1304 }, { "epoch": 0.241133605109076, "grad_norm": 0.07245877385139465, "learning_rate": 1.986779739499326e-05, "loss": 0.6992596387863159, "step": 1305 }, { "epoch": 0.24131838181797186, "grad_norm": 0.08188942819833755, "learning_rate": 1.9867473888495236e-05, "loss": 0.8546136617660522, "step": 1306 }, { "epoch": 0.24150315852686768, "grad_norm": 0.0791037529706955, "learning_rate": 1.9867149989302623e-05, "loss": 0.6521901488304138, "step": 1307 }, { "epoch": 0.24168793523576354, "grad_norm": 0.08614641427993774, "learning_rate": 1.986682569742831e-05, "loss": 0.7859422564506531, "step": 1308 }, { "epoch": 0.24187271194465937, "grad_norm": 0.08477243781089783, "learning_rate": 1.98665010128852e-05, "loss": 0.8153690695762634, "step": 1309 }, { "epoch": 0.24205748865355523, "grad_norm": 0.0749654546380043, "learning_rate": 1.986617593568622e-05, "loss": 0.6461166739463806, "step": 1310 }, { "epoch": 0.24224226536245105, "grad_norm": 0.07793160527944565, "learning_rate": 1.9865850465844305e-05, "loss": 0.6098483800888062, "step": 1311 }, { "epoch": 0.2424270420713469, "grad_norm": 0.07243506610393524, "learning_rate": 1.9865524603372408e-05, "loss": 0.5794659852981567, "step": 1312 }, { "epoch": 0.24261181878024274, "grad_norm": 0.08559340983629227, "learning_rate": 1.9865198348283497e-05, "loss": 0.6404768824577332, "step": 1313 }, { "epoch": 0.2427965954891386, "grad_norm": 0.10084764659404755, "learning_rate": 1.9864871700590557e-05, "loss": 0.7237789630889893, "step": 1314 }, { "epoch": 0.24298137219803445, "grad_norm": 0.0957445353269577, "learning_rate": 1.9864544660306584e-05, "loss": 0.9082088470458984, "step": 1315 }, { "epoch": 0.24316614890693028, "grad_norm": 0.08300480246543884, "learning_rate": 1.9864217227444594e-05, "loss": 0.8019734025001526, "step": 1316 }, { "epoch": 0.24335092561582614, "grad_norm": 0.09435844421386719, "learning_rate": 1.9863889402017627e-05, "loss": 0.8119986057281494, "step": 1317 }, { "epoch": 0.24353570232472196, "grad_norm": 0.08238641917705536, "learning_rate": 1.9863561184038715e-05, "loss": 0.7308359146118164, "step": 1318 }, { "epoch": 0.24372047903361782, "grad_norm": 0.11390416324138641, "learning_rate": 1.986323257352093e-05, "loss": 0.9338391423225403, "step": 1319 }, { "epoch": 0.24390525574251365, "grad_norm": 0.07044551521539688, "learning_rate": 1.9862903570477345e-05, "loss": 0.5287854671478271, "step": 1320 }, { "epoch": 0.2440900324514095, "grad_norm": 0.06873449683189392, "learning_rate": 1.9862574174921056e-05, "loss": 0.6060889959335327, "step": 1321 }, { "epoch": 0.24427480916030533, "grad_norm": 0.07628993690013885, "learning_rate": 1.9862244386865173e-05, "loss": 0.834665060043335, "step": 1322 }, { "epoch": 0.2444595858692012, "grad_norm": 0.09126506000757217, "learning_rate": 1.9861914206322815e-05, "loss": 1.0127897262573242, "step": 1323 }, { "epoch": 0.24464436257809702, "grad_norm": 0.07502689212560654, "learning_rate": 1.9861583633307127e-05, "loss": 0.7499599456787109, "step": 1324 }, { "epoch": 0.24482913928699288, "grad_norm": 0.08039402216672897, "learning_rate": 1.9861252667831263e-05, "loss": 0.6770073175430298, "step": 1325 }, { "epoch": 0.24501391599588873, "grad_norm": 0.07558752596378326, "learning_rate": 1.9860921309908395e-05, "loss": 0.720396876335144, "step": 1326 }, { "epoch": 0.24519869270478456, "grad_norm": 0.07513535767793655, "learning_rate": 1.986058955955171e-05, "loss": 0.6042259931564331, "step": 1327 }, { "epoch": 0.24538346941368042, "grad_norm": 0.08912438899278641, "learning_rate": 1.9860257416774413e-05, "loss": 0.8705518245697021, "step": 1328 }, { "epoch": 0.24556824612257624, "grad_norm": 0.09717576205730438, "learning_rate": 1.9859924881589715e-05, "loss": 1.0236485004425049, "step": 1329 }, { "epoch": 0.2457530228314721, "grad_norm": 0.07261338829994202, "learning_rate": 1.9859591954010855e-05, "loss": 0.5481549501419067, "step": 1330 }, { "epoch": 0.24593779954036793, "grad_norm": 0.06435700505971909, "learning_rate": 1.9859258634051083e-05, "loss": 0.5956790447235107, "step": 1331 }, { "epoch": 0.24612257624926379, "grad_norm": 0.07993557304143906, "learning_rate": 1.9858924921723665e-05, "loss": 0.5512259006500244, "step": 1332 }, { "epoch": 0.24630735295815961, "grad_norm": 0.1006544828414917, "learning_rate": 1.9858590817041875e-05, "loss": 0.8863869309425354, "step": 1333 }, { "epoch": 0.24649212966705547, "grad_norm": 0.0793013721704483, "learning_rate": 1.9858256320019018e-05, "loss": 0.7920618057250977, "step": 1334 }, { "epoch": 0.2466769063759513, "grad_norm": 0.0840287134051323, "learning_rate": 1.9857921430668402e-05, "loss": 0.7028228044509888, "step": 1335 }, { "epoch": 0.24686168308484716, "grad_norm": 0.09260048717260361, "learning_rate": 1.9857586149003354e-05, "loss": 0.7949702739715576, "step": 1336 }, { "epoch": 0.247046459793743, "grad_norm": 0.08278703689575195, "learning_rate": 1.985725047503722e-05, "loss": 0.8024004697799683, "step": 1337 }, { "epoch": 0.24723123650263884, "grad_norm": 0.07588304579257965, "learning_rate": 1.985691440878335e-05, "loss": 0.648167610168457, "step": 1338 }, { "epoch": 0.2474160132115347, "grad_norm": 0.08276861160993576, "learning_rate": 1.985657795025513e-05, "loss": 0.7005817294120789, "step": 1339 }, { "epoch": 0.24760078992043053, "grad_norm": 0.08604889363050461, "learning_rate": 1.9856241099465944e-05, "loss": 0.8027825951576233, "step": 1340 }, { "epoch": 0.24778556662932638, "grad_norm": 0.08195369690656662, "learning_rate": 1.9855903856429198e-05, "loss": 0.7094834446907043, "step": 1341 }, { "epoch": 0.2479703433382222, "grad_norm": 0.07793563604354858, "learning_rate": 1.9855566221158314e-05, "loss": 0.6649020314216614, "step": 1342 }, { "epoch": 0.24815512004711807, "grad_norm": 0.08001888543367386, "learning_rate": 1.9855228193666724e-05, "loss": 0.7470601797103882, "step": 1343 }, { "epoch": 0.2483398967560139, "grad_norm": 0.0751085951924324, "learning_rate": 1.985488977396789e-05, "loss": 0.646827220916748, "step": 1344 }, { "epoch": 0.24852467346490975, "grad_norm": 0.08412483334541321, "learning_rate": 1.9854550962075273e-05, "loss": 0.7207925319671631, "step": 1345 }, { "epoch": 0.24870945017380558, "grad_norm": 0.09168536216020584, "learning_rate": 1.985421175800236e-05, "loss": 0.8863273859024048, "step": 1346 }, { "epoch": 0.24889422688270144, "grad_norm": 0.09173490107059479, "learning_rate": 1.985387216176265e-05, "loss": 0.7285597324371338, "step": 1347 }, { "epoch": 0.2490790035915973, "grad_norm": 0.0815114825963974, "learning_rate": 1.9853532173369653e-05, "loss": 0.8123331665992737, "step": 1348 }, { "epoch": 0.24926378030049312, "grad_norm": 0.07043108344078064, "learning_rate": 1.9853191792836906e-05, "loss": 0.5145629644393921, "step": 1349 }, { "epoch": 0.24944855700938898, "grad_norm": 0.0833626538515091, "learning_rate": 1.9852851020177955e-05, "loss": 0.7493253946304321, "step": 1350 }, { "epoch": 0.2496333337182848, "grad_norm": 0.07964198291301727, "learning_rate": 1.9852509855406354e-05, "loss": 0.7904506325721741, "step": 1351 }, { "epoch": 0.24981811042718066, "grad_norm": 0.08142036944627762, "learning_rate": 1.9852168298535687e-05, "loss": 0.6080458164215088, "step": 1352 }, { "epoch": 0.2500028871360765, "grad_norm": 0.0822858139872551, "learning_rate": 1.9851826349579547e-05, "loss": 0.7370697259902954, "step": 1353 }, { "epoch": 0.2501876638449723, "grad_norm": 0.0878128632903099, "learning_rate": 1.9851484008551537e-05, "loss": 0.8177471160888672, "step": 1354 }, { "epoch": 0.2503724405538682, "grad_norm": 0.08483421057462692, "learning_rate": 1.9851141275465288e-05, "loss": 0.7663596272468567, "step": 1355 }, { "epoch": 0.25055721726276403, "grad_norm": 0.08922464400529861, "learning_rate": 1.9850798150334434e-05, "loss": 0.70176100730896, "step": 1356 }, { "epoch": 0.25074199397165986, "grad_norm": 0.08818759769201279, "learning_rate": 1.9850454633172632e-05, "loss": 0.8136799335479736, "step": 1357 }, { "epoch": 0.2509267706805557, "grad_norm": 0.09864350408315659, "learning_rate": 1.985011072399356e-05, "loss": 1.0431067943572998, "step": 1358 }, { "epoch": 0.2511115473894516, "grad_norm": 0.06932670623064041, "learning_rate": 1.9849766422810893e-05, "loss": 0.5578855276107788, "step": 1359 }, { "epoch": 0.2512963240983474, "grad_norm": 0.09292439371347427, "learning_rate": 1.984942172963834e-05, "loss": 0.7040930986404419, "step": 1360 }, { "epoch": 0.25148110080724323, "grad_norm": 0.07903002947568893, "learning_rate": 1.9849076644489616e-05, "loss": 0.6327306032180786, "step": 1361 }, { "epoch": 0.2516658775161391, "grad_norm": 0.07349206507205963, "learning_rate": 1.9848731167378457e-05, "loss": 0.6429340243339539, "step": 1362 }, { "epoch": 0.25185065422503494, "grad_norm": 0.08471519500017166, "learning_rate": 1.9848385298318607e-05, "loss": 0.754509449005127, "step": 1363 }, { "epoch": 0.25203543093393077, "grad_norm": 0.07947010546922684, "learning_rate": 1.9848039037323836e-05, "loss": 0.5998097658157349, "step": 1364 }, { "epoch": 0.2522202076428266, "grad_norm": 0.07180643081665039, "learning_rate": 1.984769238440792e-05, "loss": 0.6718336343765259, "step": 1365 }, { "epoch": 0.2524049843517225, "grad_norm": 0.08686842024326324, "learning_rate": 1.9847345339584662e-05, "loss": 0.9123610258102417, "step": 1366 }, { "epoch": 0.2525897610606183, "grad_norm": 0.09164551645517349, "learning_rate": 1.984699790286786e-05, "loss": 0.9346916079521179, "step": 1367 }, { "epoch": 0.25277453776951414, "grad_norm": 0.06970732659101486, "learning_rate": 1.9846650074271356e-05, "loss": 0.758202075958252, "step": 1368 }, { "epoch": 0.25295931447840997, "grad_norm": 0.08139359951019287, "learning_rate": 1.984630185380898e-05, "loss": 0.7334830164909363, "step": 1369 }, { "epoch": 0.25314409118730585, "grad_norm": 0.08255112171173096, "learning_rate": 1.98459532414946e-05, "loss": 0.7164485454559326, "step": 1370 }, { "epoch": 0.2533288678962017, "grad_norm": 0.09030817449092865, "learning_rate": 1.984560423734208e-05, "loss": 0.7118257880210876, "step": 1371 }, { "epoch": 0.2535136446050975, "grad_norm": 0.09159182012081146, "learning_rate": 1.9845254841365316e-05, "loss": 0.8373132944107056, "step": 1372 }, { "epoch": 0.2536984213139934, "grad_norm": 0.08677585422992706, "learning_rate": 1.9844905053578213e-05, "loss": 0.7780888676643372, "step": 1373 }, { "epoch": 0.2538831980228892, "grad_norm": 0.06799543648958206, "learning_rate": 1.984455487399469e-05, "loss": 0.546689510345459, "step": 1374 }, { "epoch": 0.25406797473178505, "grad_norm": 0.06946510821580887, "learning_rate": 1.9844204302628683e-05, "loss": 0.5932323932647705, "step": 1375 }, { "epoch": 0.2542527514406809, "grad_norm": 0.09083466231822968, "learning_rate": 1.984385333949414e-05, "loss": 0.6616789698600769, "step": 1376 }, { "epoch": 0.25443752814957676, "grad_norm": 0.08521077036857605, "learning_rate": 1.9843501984605033e-05, "loss": 0.8017097115516663, "step": 1377 }, { "epoch": 0.2546223048584726, "grad_norm": 0.08863421529531479, "learning_rate": 1.9843150237975343e-05, "loss": 0.8838331699371338, "step": 1378 }, { "epoch": 0.2548070815673684, "grad_norm": 0.1019650399684906, "learning_rate": 1.984279809961907e-05, "loss": 0.7910183072090149, "step": 1379 }, { "epoch": 0.25499185827626425, "grad_norm": 0.0719834640622139, "learning_rate": 1.9842445569550227e-05, "loss": 0.682668149471283, "step": 1380 }, { "epoch": 0.25517663498516013, "grad_norm": 0.08507004380226135, "learning_rate": 1.984209264778284e-05, "loss": 0.8330849409103394, "step": 1381 }, { "epoch": 0.25536141169405596, "grad_norm": 0.10418731719255447, "learning_rate": 1.9841739334330962e-05, "loss": 1.0728949308395386, "step": 1382 }, { "epoch": 0.2555461884029518, "grad_norm": 0.0746791809797287, "learning_rate": 1.984138562920865e-05, "loss": 0.6407696604728699, "step": 1383 }, { "epoch": 0.2557309651118477, "grad_norm": 0.09626784175634384, "learning_rate": 1.9841031532429972e-05, "loss": 0.9932002425193787, "step": 1384 }, { "epoch": 0.2559157418207435, "grad_norm": 0.09764228016138077, "learning_rate": 1.9840677044009035e-05, "loss": 0.7491921782493591, "step": 1385 }, { "epoch": 0.25610051852963933, "grad_norm": 0.08643902093172073, "learning_rate": 1.9840322163959938e-05, "loss": 0.7618559002876282, "step": 1386 }, { "epoch": 0.25628529523853516, "grad_norm": 0.08111730217933655, "learning_rate": 1.9839966892296802e-05, "loss": 0.6650518178939819, "step": 1387 }, { "epoch": 0.25647007194743104, "grad_norm": 0.08415523916482925, "learning_rate": 1.9839611229033774e-05, "loss": 0.8092013001441956, "step": 1388 }, { "epoch": 0.2566548486563269, "grad_norm": 0.07144782692193985, "learning_rate": 1.9839255174185e-05, "loss": 0.6734938621520996, "step": 1389 }, { "epoch": 0.2568396253652227, "grad_norm": 0.09223801642656326, "learning_rate": 1.983889872776465e-05, "loss": 0.7030454277992249, "step": 1390 }, { "epoch": 0.25702440207411853, "grad_norm": 0.09314022213220596, "learning_rate": 1.983854188978692e-05, "loss": 0.8970528841018677, "step": 1391 }, { "epoch": 0.2572091787830144, "grad_norm": 0.0720200166106224, "learning_rate": 1.9838184660265996e-05, "loss": 0.6767599582672119, "step": 1392 }, { "epoch": 0.25739395549191024, "grad_norm": 0.08036263287067413, "learning_rate": 1.983782703921611e-05, "loss": 0.8033877611160278, "step": 1393 }, { "epoch": 0.25757873220080607, "grad_norm": 0.07800677418708801, "learning_rate": 1.983746902665148e-05, "loss": 0.7212039232254028, "step": 1394 }, { "epoch": 0.25776350890970195, "grad_norm": 0.08257672935724258, "learning_rate": 1.9837110622586364e-05, "loss": 0.7464303374290466, "step": 1395 }, { "epoch": 0.2579482856185978, "grad_norm": 0.07256592065095901, "learning_rate": 1.983675182703502e-05, "loss": 0.7287595868110657, "step": 1396 }, { "epoch": 0.2581330623274936, "grad_norm": 0.06713775545358658, "learning_rate": 1.983639264001173e-05, "loss": 0.5811136960983276, "step": 1397 }, { "epoch": 0.25831783903638944, "grad_norm": 0.08436959981918335, "learning_rate": 1.9836033061530785e-05, "loss": 0.6900782585144043, "step": 1398 }, { "epoch": 0.2585026157452853, "grad_norm": 0.08242448419332504, "learning_rate": 1.9835673091606498e-05, "loss": 0.7158823609352112, "step": 1399 }, { "epoch": 0.25868739245418115, "grad_norm": 0.07009007781744003, "learning_rate": 1.9835312730253195e-05, "loss": 0.5768419504165649, "step": 1400 }, { "epoch": 0.258872169163077, "grad_norm": 0.07849988341331482, "learning_rate": 1.983495197748521e-05, "loss": 0.6357064247131348, "step": 1401 }, { "epoch": 0.2590569458719728, "grad_norm": 0.10085313767194748, "learning_rate": 1.9834590833316913e-05, "loss": 0.817133903503418, "step": 1402 }, { "epoch": 0.2592417225808687, "grad_norm": 0.10113980621099472, "learning_rate": 1.983422929776267e-05, "loss": 0.7684735655784607, "step": 1403 }, { "epoch": 0.2594264992897645, "grad_norm": 0.0733640268445015, "learning_rate": 1.9833867370836865e-05, "loss": 0.6575866937637329, "step": 1404 }, { "epoch": 0.25961127599866035, "grad_norm": 0.10067889094352722, "learning_rate": 1.9833505052553905e-05, "loss": 0.8692967891693115, "step": 1405 }, { "epoch": 0.25979605270755624, "grad_norm": 0.07616506516933441, "learning_rate": 1.983314234292821e-05, "loss": 0.5759550333023071, "step": 1406 }, { "epoch": 0.25998082941645206, "grad_norm": 0.13058625161647797, "learning_rate": 1.9832779241974213e-05, "loss": 1.1444944143295288, "step": 1407 }, { "epoch": 0.2601656061253479, "grad_norm": 0.09796320647001266, "learning_rate": 1.9832415749706366e-05, "loss": 0.8220914006233215, "step": 1408 }, { "epoch": 0.2603503828342437, "grad_norm": 0.06864874809980392, "learning_rate": 1.9832051866139133e-05, "loss": 0.6185899972915649, "step": 1409 }, { "epoch": 0.2605351595431396, "grad_norm": 0.06889761239290237, "learning_rate": 1.9831687591286995e-05, "loss": 0.607519805431366, "step": 1410 }, { "epoch": 0.26071993625203543, "grad_norm": 0.07183163613080978, "learning_rate": 1.983132292516445e-05, "loss": 0.6053565740585327, "step": 1411 }, { "epoch": 0.26090471296093126, "grad_norm": 0.07045409828424454, "learning_rate": 1.9830957867786013e-05, "loss": 0.5963341593742371, "step": 1412 }, { "epoch": 0.2610894896698271, "grad_norm": 0.0631919577717781, "learning_rate": 1.983059241916621e-05, "loss": 0.5690242052078247, "step": 1413 }, { "epoch": 0.261274266378723, "grad_norm": 0.08845905214548111, "learning_rate": 1.9830226579319585e-05, "loss": 0.7385035157203674, "step": 1414 }, { "epoch": 0.2614590430876188, "grad_norm": 0.08544179052114487, "learning_rate": 1.9829860348260695e-05, "loss": 0.8613137006759644, "step": 1415 }, { "epoch": 0.26164381979651463, "grad_norm": 0.06891070306301117, "learning_rate": 1.9829493726004117e-05, "loss": 0.5957195162773132, "step": 1416 }, { "epoch": 0.2618285965054105, "grad_norm": 0.07848822325468063, "learning_rate": 1.982912671256444e-05, "loss": 0.6115220189094543, "step": 1417 }, { "epoch": 0.26201337321430634, "grad_norm": 0.10504502058029175, "learning_rate": 1.9828759307956277e-05, "loss": 0.9463704824447632, "step": 1418 }, { "epoch": 0.2621981499232022, "grad_norm": 0.08608116209506989, "learning_rate": 1.982839151219424e-05, "loss": 0.7963468432426453, "step": 1419 }, { "epoch": 0.262382926632098, "grad_norm": 0.09126728773117065, "learning_rate": 1.9828023325292968e-05, "loss": 0.7408127784729004, "step": 1420 }, { "epoch": 0.2625677033409939, "grad_norm": 0.0852249264717102, "learning_rate": 1.982765474726712e-05, "loss": 0.6558241844177246, "step": 1421 }, { "epoch": 0.2627524800498897, "grad_norm": 0.08563613146543503, "learning_rate": 1.9827285778131355e-05, "loss": 0.8080082535743713, "step": 1422 }, { "epoch": 0.26293725675878554, "grad_norm": 0.08746327459812164, "learning_rate": 1.9826916417900363e-05, "loss": 0.7995231747627258, "step": 1423 }, { "epoch": 0.26312203346768137, "grad_norm": 0.08268547058105469, "learning_rate": 1.9826546666588844e-05, "loss": 0.7918660640716553, "step": 1424 }, { "epoch": 0.26330681017657726, "grad_norm": 0.0886000469326973, "learning_rate": 1.982617652421151e-05, "loss": 0.782829225063324, "step": 1425 }, { "epoch": 0.2634915868854731, "grad_norm": 0.0858738124370575, "learning_rate": 1.9825805990783095e-05, "loss": 0.8857966065406799, "step": 1426 }, { "epoch": 0.2636763635943689, "grad_norm": 0.07344409823417664, "learning_rate": 1.982543506631834e-05, "loss": 0.5548909902572632, "step": 1427 }, { "epoch": 0.2638611403032648, "grad_norm": 0.09202000498771667, "learning_rate": 1.9825063750832007e-05, "loss": 0.8485000729560852, "step": 1428 }, { "epoch": 0.2640459170121606, "grad_norm": 0.10929467529058456, "learning_rate": 1.9824692044338876e-05, "loss": 0.9773752093315125, "step": 1429 }, { "epoch": 0.26423069372105645, "grad_norm": 0.08134205639362335, "learning_rate": 1.982431994685374e-05, "loss": 0.710753321647644, "step": 1430 }, { "epoch": 0.2644154704299523, "grad_norm": 0.07787948101758957, "learning_rate": 1.982394745839141e-05, "loss": 0.6895082592964172, "step": 1431 }, { "epoch": 0.26460024713884817, "grad_norm": 0.08764622360467911, "learning_rate": 1.9823574578966704e-05, "loss": 0.7310826182365417, "step": 1432 }, { "epoch": 0.264785023847744, "grad_norm": 0.08674835413694382, "learning_rate": 1.9823201308594465e-05, "loss": 0.7989583015441895, "step": 1433 }, { "epoch": 0.2649698005566398, "grad_norm": 0.08755107969045639, "learning_rate": 1.9822827647289544e-05, "loss": 0.7573221325874329, "step": 1434 }, { "epoch": 0.2651545772655357, "grad_norm": 0.08561009168624878, "learning_rate": 1.982245359506682e-05, "loss": 0.7378658652305603, "step": 1435 }, { "epoch": 0.26533935397443154, "grad_norm": 0.08592981845140457, "learning_rate": 1.9822079151941163e-05, "loss": 0.6005727648735046, "step": 1436 }, { "epoch": 0.26552413068332736, "grad_norm": 0.0772705152630806, "learning_rate": 1.9821704317927492e-05, "loss": 0.5923016667366028, "step": 1437 }, { "epoch": 0.2657089073922232, "grad_norm": 0.06057063862681389, "learning_rate": 1.9821329093040717e-05, "loss": 0.6054449081420898, "step": 1438 }, { "epoch": 0.2658936841011191, "grad_norm": 0.08752863854169846, "learning_rate": 1.982095347729577e-05, "loss": 0.7829182147979736, "step": 1439 }, { "epoch": 0.2660784608100149, "grad_norm": 0.06969458609819412, "learning_rate": 1.98205774707076e-05, "loss": 0.6038533449172974, "step": 1440 }, { "epoch": 0.26626323751891073, "grad_norm": 0.0858176127076149, "learning_rate": 1.9820201073291176e-05, "loss": 0.7014481425285339, "step": 1441 }, { "epoch": 0.26644801422780656, "grad_norm": 0.0728302150964737, "learning_rate": 1.9819824285061466e-05, "loss": 0.636441171169281, "step": 1442 }, { "epoch": 0.26663279093670245, "grad_norm": 0.08914124220609665, "learning_rate": 1.9819447106033476e-05, "loss": 0.7722464799880981, "step": 1443 }, { "epoch": 0.2668175676455983, "grad_norm": 0.07109539955854416, "learning_rate": 1.981906953622221e-05, "loss": 0.6806744337081909, "step": 1444 }, { "epoch": 0.2670023443544941, "grad_norm": 0.07968464493751526, "learning_rate": 1.98186915756427e-05, "loss": 0.7673649191856384, "step": 1445 }, { "epoch": 0.26718712106339, "grad_norm": 0.08722911030054092, "learning_rate": 1.981831322430998e-05, "loss": 0.6988089084625244, "step": 1446 }, { "epoch": 0.2673718977722858, "grad_norm": 0.08498848229646683, "learning_rate": 1.981793448223911e-05, "loss": 0.7942920923233032, "step": 1447 }, { "epoch": 0.26755667448118164, "grad_norm": 0.08321723341941833, "learning_rate": 1.981755534944517e-05, "loss": 0.8039382696151733, "step": 1448 }, { "epoch": 0.2677414511900775, "grad_norm": 0.061859726905822754, "learning_rate": 1.981717582594324e-05, "loss": 0.5708411335945129, "step": 1449 }, { "epoch": 0.26792622789897336, "grad_norm": 0.08954576402902603, "learning_rate": 1.9816795911748422e-05, "loss": 0.621968686580658, "step": 1450 }, { "epoch": 0.2681110046078692, "grad_norm": 0.08501655608415604, "learning_rate": 1.9816415606875844e-05, "loss": 0.8544004559516907, "step": 1451 }, { "epoch": 0.268295781316765, "grad_norm": 0.06574973464012146, "learning_rate": 1.9816034911340635e-05, "loss": 0.6134265065193176, "step": 1452 }, { "epoch": 0.26848055802566084, "grad_norm": 0.04690899699926376, "learning_rate": 1.9815653825157944e-05, "loss": 0.4340095818042755, "step": 1453 }, { "epoch": 0.2686653347345567, "grad_norm": 0.09002692997455597, "learning_rate": 1.9815272348342947e-05, "loss": 0.758969783782959, "step": 1454 }, { "epoch": 0.26885011144345256, "grad_norm": 0.08969945460557938, "learning_rate": 1.9814890480910815e-05, "loss": 0.7402480840682983, "step": 1455 }, { "epoch": 0.2690348881523484, "grad_norm": 0.08519411087036133, "learning_rate": 1.9814508222876747e-05, "loss": 0.6333010792732239, "step": 1456 }, { "epoch": 0.26921966486124427, "grad_norm": 0.08576565235853195, "learning_rate": 1.9814125574255957e-05, "loss": 0.6507075428962708, "step": 1457 }, { "epoch": 0.2694044415701401, "grad_norm": 0.07842403650283813, "learning_rate": 1.9813742535063677e-05, "loss": 0.7630261182785034, "step": 1458 }, { "epoch": 0.2695892182790359, "grad_norm": 0.09197118133306503, "learning_rate": 1.9813359105315144e-05, "loss": 0.8912684321403503, "step": 1459 }, { "epoch": 0.26977399498793175, "grad_norm": 0.07204412668943405, "learning_rate": 1.9812975285025624e-05, "loss": 0.6835379600524902, "step": 1460 }, { "epoch": 0.26995877169682764, "grad_norm": 0.08579453825950623, "learning_rate": 1.9812591074210385e-05, "loss": 0.6865627765655518, "step": 1461 }, { "epoch": 0.27014354840572347, "grad_norm": 0.07697734236717224, "learning_rate": 1.9812206472884725e-05, "loss": 0.8023110628128052, "step": 1462 }, { "epoch": 0.2703283251146193, "grad_norm": 0.08861850947141647, "learning_rate": 1.9811821481063943e-05, "loss": 0.7018006443977356, "step": 1463 }, { "epoch": 0.2705131018235151, "grad_norm": 0.08721664547920227, "learning_rate": 1.981143609876336e-05, "loss": 0.6645216345787048, "step": 1464 }, { "epoch": 0.270697878532411, "grad_norm": 0.08017013221979141, "learning_rate": 1.9811050325998323e-05, "loss": 0.4490172564983368, "step": 1465 }, { "epoch": 0.27088265524130684, "grad_norm": 0.07376329600811005, "learning_rate": 1.9810664162784176e-05, "loss": 0.6234690546989441, "step": 1466 }, { "epoch": 0.27106743195020266, "grad_norm": 0.07300654798746109, "learning_rate": 1.981027760913629e-05, "loss": 0.6264599561691284, "step": 1467 }, { "epoch": 0.27125220865909855, "grad_norm": 0.09759058058261871, "learning_rate": 1.980989066507004e-05, "loss": 0.9196485280990601, "step": 1468 }, { "epoch": 0.2714369853679944, "grad_norm": 0.0837840735912323, "learning_rate": 1.980950333060084e-05, "loss": 0.530738890171051, "step": 1469 }, { "epoch": 0.2716217620768902, "grad_norm": 0.07600148022174835, "learning_rate": 1.9809115605744097e-05, "loss": 0.5040048360824585, "step": 1470 }, { "epoch": 0.27180653878578603, "grad_norm": 0.07696036994457245, "learning_rate": 1.9808727490515238e-05, "loss": 0.7642129063606262, "step": 1471 }, { "epoch": 0.2719913154946819, "grad_norm": 0.08447156101465225, "learning_rate": 1.9808338984929717e-05, "loss": 0.7772355675697327, "step": 1472 }, { "epoch": 0.27217609220357775, "grad_norm": 0.09380346536636353, "learning_rate": 1.980795008900299e-05, "loss": 0.6633673906326294, "step": 1473 }, { "epoch": 0.2723608689124736, "grad_norm": 0.08995403349399567, "learning_rate": 1.9807560802750533e-05, "loss": 0.7711108922958374, "step": 1474 }, { "epoch": 0.2725456456213694, "grad_norm": 0.07373813539743423, "learning_rate": 1.9807171126187838e-05, "loss": 0.5689606666564941, "step": 1475 }, { "epoch": 0.2727304223302653, "grad_norm": 0.07707108557224274, "learning_rate": 1.980678105933042e-05, "loss": 0.5939176678657532, "step": 1476 }, { "epoch": 0.2729151990391611, "grad_norm": 0.05576924607157707, "learning_rate": 1.980639060219379e-05, "loss": 0.5002748370170593, "step": 1477 }, { "epoch": 0.27309997574805694, "grad_norm": 0.09329205006361008, "learning_rate": 1.9805999754793503e-05, "loss": 0.9085617065429688, "step": 1478 }, { "epoch": 0.27328475245695283, "grad_norm": 0.0745263397693634, "learning_rate": 1.9805608517145098e-05, "loss": 0.7259835600852966, "step": 1479 }, { "epoch": 0.27346952916584866, "grad_norm": 0.08092562109231949, "learning_rate": 1.9805216889264155e-05, "loss": 0.7348419427871704, "step": 1480 }, { "epoch": 0.2736543058747445, "grad_norm": 0.08193597197532654, "learning_rate": 1.9804824871166254e-05, "loss": 0.6958684325218201, "step": 1481 }, { "epoch": 0.2738390825836403, "grad_norm": 0.07525932043790817, "learning_rate": 1.9804432462867002e-05, "loss": 0.8857287764549255, "step": 1482 }, { "epoch": 0.2740238592925362, "grad_norm": 0.09673633426427841, "learning_rate": 1.9804039664382007e-05, "loss": 0.9581448435783386, "step": 1483 }, { "epoch": 0.274208636001432, "grad_norm": 0.09613839536905289, "learning_rate": 1.980364647572691e-05, "loss": 0.956735372543335, "step": 1484 }, { "epoch": 0.27439341271032786, "grad_norm": 0.07544142007827759, "learning_rate": 1.9803252896917356e-05, "loss": 0.7517312169075012, "step": 1485 }, { "epoch": 0.2745781894192237, "grad_norm": 0.07626946270465851, "learning_rate": 1.9802858927969004e-05, "loss": 0.6393277645111084, "step": 1486 }, { "epoch": 0.27476296612811957, "grad_norm": 0.07800044864416122, "learning_rate": 1.980246456889754e-05, "loss": 0.6568846106529236, "step": 1487 }, { "epoch": 0.2749477428370154, "grad_norm": 0.07493487000465393, "learning_rate": 1.9802069819718652e-05, "loss": 0.6556905508041382, "step": 1488 }, { "epoch": 0.2751325195459112, "grad_norm": 0.07483842968940735, "learning_rate": 1.980167468044805e-05, "loss": 0.8307350873947144, "step": 1489 }, { "epoch": 0.2753172962548071, "grad_norm": 0.09129715710878372, "learning_rate": 1.9801279151101464e-05, "loss": 1.0061156749725342, "step": 1490 }, { "epoch": 0.27550207296370294, "grad_norm": 0.08590186387300491, "learning_rate": 1.980088323169463e-05, "loss": 0.8525320887565613, "step": 1491 }, { "epoch": 0.27568684967259877, "grad_norm": 0.06311880052089691, "learning_rate": 1.9800486922243306e-05, "loss": 0.5494281053543091, "step": 1492 }, { "epoch": 0.2758716263814946, "grad_norm": 0.07645311206579208, "learning_rate": 1.9800090222763265e-05, "loss": 0.8318729996681213, "step": 1493 }, { "epoch": 0.2760564030903905, "grad_norm": 0.0779624953866005, "learning_rate": 1.9799693133270294e-05, "loss": 0.7272801399230957, "step": 1494 }, { "epoch": 0.2762411797992863, "grad_norm": 0.07608146965503693, "learning_rate": 1.9799295653780197e-05, "loss": 0.6888601183891296, "step": 1495 }, { "epoch": 0.27642595650818214, "grad_norm": 0.07562585175037384, "learning_rate": 1.979889778430879e-05, "loss": 0.6577703952789307, "step": 1496 }, { "epoch": 0.27661073321707796, "grad_norm": 0.07444547116756439, "learning_rate": 1.979849952487191e-05, "loss": 0.7057503461837769, "step": 1497 }, { "epoch": 0.27679550992597385, "grad_norm": 0.12092580646276474, "learning_rate": 1.97981008754854e-05, "loss": 0.4695255756378174, "step": 1498 }, { "epoch": 0.2769802866348697, "grad_norm": 0.06313570588827133, "learning_rate": 1.979770183616513e-05, "loss": 0.5444561839103699, "step": 1499 }, { "epoch": 0.2771650633437655, "grad_norm": 0.08924681693315506, "learning_rate": 1.9797302406926984e-05, "loss": 0.7740501165390015, "step": 1500 }, { "epoch": 0.2771650633437655, "eval_loss": 0.7722002267837524, "eval_runtime": 157.6041, "eval_samples_per_second": 115.663, "eval_steps_per_second": 14.46, "step": 1500 }, { "epoch": 0.2773498400526614, "grad_norm": 0.0823410302400589, "learning_rate": 1.979690258778685e-05, "loss": 0.7805835604667664, "step": 1501 }, { "epoch": 0.2775346167615572, "grad_norm": 0.08543116599321365, "learning_rate": 1.9796502378760647e-05, "loss": 0.7953980565071106, "step": 1502 }, { "epoch": 0.27771939347045305, "grad_norm": 0.07848764955997467, "learning_rate": 1.9796101779864296e-05, "loss": 0.7930999994277954, "step": 1503 }, { "epoch": 0.2779041701793489, "grad_norm": 0.06313935667276382, "learning_rate": 1.9795700791113744e-05, "loss": 0.5610463619232178, "step": 1504 }, { "epoch": 0.27808894688824476, "grad_norm": 0.08681435137987137, "learning_rate": 1.9795299412524948e-05, "loss": 0.7095720767974854, "step": 1505 }, { "epoch": 0.2782737235971406, "grad_norm": 0.08366895467042923, "learning_rate": 1.9794897644113876e-05, "loss": 0.7479497194290161, "step": 1506 }, { "epoch": 0.2784585003060364, "grad_norm": 0.1002199575304985, "learning_rate": 1.9794495485896528e-05, "loss": 0.7468428611755371, "step": 1507 }, { "epoch": 0.27864327701493224, "grad_norm": 0.047667015343904495, "learning_rate": 1.9794092937888902e-05, "loss": 0.4573146402835846, "step": 1508 }, { "epoch": 0.27882805372382813, "grad_norm": 0.05945112183690071, "learning_rate": 1.9793690000107017e-05, "loss": 0.639329731464386, "step": 1509 }, { "epoch": 0.27901283043272396, "grad_norm": 0.06146444007754326, "learning_rate": 1.9793286672566905e-05, "loss": 0.5720182657241821, "step": 1510 }, { "epoch": 0.2791976071416198, "grad_norm": 0.08685909956693649, "learning_rate": 1.979288295528463e-05, "loss": 0.7960376739501953, "step": 1511 }, { "epoch": 0.27938238385051567, "grad_norm": 0.05757363140583038, "learning_rate": 1.979247884827625e-05, "loss": 0.5274953842163086, "step": 1512 }, { "epoch": 0.2795671605594115, "grad_norm": 0.05990239977836609, "learning_rate": 1.9792074351557852e-05, "loss": 0.4350784420967102, "step": 1513 }, { "epoch": 0.2797519372683073, "grad_norm": 0.09007871150970459, "learning_rate": 1.9791669465145525e-05, "loss": 0.6337308287620544, "step": 1514 }, { "epoch": 0.27993671397720316, "grad_norm": 0.07639160007238388, "learning_rate": 1.979126418905539e-05, "loss": 0.6822291016578674, "step": 1515 }, { "epoch": 0.28012149068609904, "grad_norm": 0.0802912637591362, "learning_rate": 1.979085852330357e-05, "loss": 0.747983455657959, "step": 1516 }, { "epoch": 0.28030626739499487, "grad_norm": 0.0773995965719223, "learning_rate": 1.9790452467906216e-05, "loss": 0.7091318368911743, "step": 1517 }, { "epoch": 0.2804910441038907, "grad_norm": 0.0977666899561882, "learning_rate": 1.9790046022879482e-05, "loss": 0.8170154690742493, "step": 1518 }, { "epoch": 0.2806758208127865, "grad_norm": 0.06563309580087662, "learning_rate": 1.9789639188239548e-05, "loss": 0.68996262550354, "step": 1519 }, { "epoch": 0.2808605975216824, "grad_norm": 0.07695797830820084, "learning_rate": 1.97892319640026e-05, "loss": 0.7316287755966187, "step": 1520 }, { "epoch": 0.28104537423057824, "grad_norm": 0.0802626758813858, "learning_rate": 1.9788824350184845e-05, "loss": 0.8406566977500916, "step": 1521 }, { "epoch": 0.28123015093947407, "grad_norm": 0.08003909885883331, "learning_rate": 1.9788416346802508e-05, "loss": 0.7299249768257141, "step": 1522 }, { "epoch": 0.28141492764836995, "grad_norm": 0.09115131944417953, "learning_rate": 1.9788007953871825e-05, "loss": 1.0389716625213623, "step": 1523 }, { "epoch": 0.2815997043572658, "grad_norm": 0.08853866904973984, "learning_rate": 1.9787599171409047e-05, "loss": 0.9766581058502197, "step": 1524 }, { "epoch": 0.2817844810661616, "grad_norm": 0.09646085649728775, "learning_rate": 1.9787189999430443e-05, "loss": 0.8425481915473938, "step": 1525 }, { "epoch": 0.28196925777505744, "grad_norm": 0.07125802338123322, "learning_rate": 1.97867804379523e-05, "loss": 0.4887961149215698, "step": 1526 }, { "epoch": 0.2821540344839533, "grad_norm": 0.07870358228683472, "learning_rate": 1.9786370486990912e-05, "loss": 0.772543728351593, "step": 1527 }, { "epoch": 0.28233881119284915, "grad_norm": 0.09730803221464157, "learning_rate": 1.97859601465626e-05, "loss": 0.7736526727676392, "step": 1528 }, { "epoch": 0.282523587901745, "grad_norm": 0.0814560204744339, "learning_rate": 1.9785549416683685e-05, "loss": 0.8670487403869629, "step": 1529 }, { "epoch": 0.2827083646106408, "grad_norm": 0.09567133337259293, "learning_rate": 1.9785138297370522e-05, "loss": 0.920782208442688, "step": 1530 }, { "epoch": 0.2828931413195367, "grad_norm": 0.09724999219179153, "learning_rate": 1.9784726788639467e-05, "loss": 0.7418619990348816, "step": 1531 }, { "epoch": 0.2830779180284325, "grad_norm": 0.08737719058990479, "learning_rate": 1.97843148905069e-05, "loss": 0.6391053199768066, "step": 1532 }, { "epoch": 0.28326269473732835, "grad_norm": 0.07193499058485031, "learning_rate": 1.978390260298921e-05, "loss": 0.6396481394767761, "step": 1533 }, { "epoch": 0.28344747144622423, "grad_norm": 0.08356016874313354, "learning_rate": 1.9783489926102803e-05, "loss": 0.9004808068275452, "step": 1534 }, { "epoch": 0.28363224815512006, "grad_norm": 0.07595232129096985, "learning_rate": 1.978307685986411e-05, "loss": 0.6460784077644348, "step": 1535 }, { "epoch": 0.2838170248640159, "grad_norm": 0.1030508354306221, "learning_rate": 1.9782663404289563e-05, "loss": 0.9966877102851868, "step": 1536 }, { "epoch": 0.2840018015729117, "grad_norm": 0.07105007767677307, "learning_rate": 1.978224955939562e-05, "loss": 0.8565782904624939, "step": 1537 }, { "epoch": 0.2841865782818076, "grad_norm": 0.08711280673742294, "learning_rate": 1.978183532519875e-05, "loss": 0.7412756085395813, "step": 1538 }, { "epoch": 0.28437135499070343, "grad_norm": 0.08922568708658218, "learning_rate": 1.9781420701715438e-05, "loss": 0.7539790272712708, "step": 1539 }, { "epoch": 0.28455613169959926, "grad_norm": 0.06241748109459877, "learning_rate": 1.9781005688962182e-05, "loss": 0.5449891090393066, "step": 1540 }, { "epoch": 0.2847409084084951, "grad_norm": 0.0753631517291069, "learning_rate": 1.9780590286955502e-05, "loss": 0.7677814364433289, "step": 1541 }, { "epoch": 0.28492568511739097, "grad_norm": 0.09720226377248764, "learning_rate": 1.9780174495711927e-05, "loss": 0.8704606294631958, "step": 1542 }, { "epoch": 0.2851104618262868, "grad_norm": 0.06104160472750664, "learning_rate": 1.9779758315248006e-05, "loss": 0.6052356362342834, "step": 1543 }, { "epoch": 0.2852952385351826, "grad_norm": 0.0829816535115242, "learning_rate": 1.97793417455803e-05, "loss": 0.9357346892356873, "step": 1544 }, { "epoch": 0.2854800152440785, "grad_norm": 0.07320324331521988, "learning_rate": 1.9778924786725388e-05, "loss": 0.6604475378990173, "step": 1545 }, { "epoch": 0.28566479195297434, "grad_norm": 0.06625904887914658, "learning_rate": 1.9778507438699864e-05, "loss": 0.503479540348053, "step": 1546 }, { "epoch": 0.28584956866187017, "grad_norm": 0.08408404886722565, "learning_rate": 1.977808970152034e-05, "loss": 0.7940577268600464, "step": 1547 }, { "epoch": 0.286034345370766, "grad_norm": 0.08889124542474747, "learning_rate": 1.977767157520343e-05, "loss": 0.6014382243156433, "step": 1548 }, { "epoch": 0.2862191220796619, "grad_norm": 0.07590803503990173, "learning_rate": 1.977725305976579e-05, "loss": 0.7175227403640747, "step": 1549 }, { "epoch": 0.2864038987885577, "grad_norm": 0.08566652983427048, "learning_rate": 1.9776834155224066e-05, "loss": 0.652517557144165, "step": 1550 }, { "epoch": 0.28658867549745354, "grad_norm": 0.0753151997923851, "learning_rate": 1.9776414861594925e-05, "loss": 0.5912181735038757, "step": 1551 }, { "epoch": 0.28677345220634937, "grad_norm": 0.08349766582250595, "learning_rate": 1.9775995178895064e-05, "loss": 0.8511011600494385, "step": 1552 }, { "epoch": 0.28695822891524525, "grad_norm": 0.08739432692527771, "learning_rate": 1.977557510714118e-05, "loss": 0.7831947207450867, "step": 1553 }, { "epoch": 0.2871430056241411, "grad_norm": 0.10048934072256088, "learning_rate": 1.977515464634999e-05, "loss": 0.769275963306427, "step": 1554 }, { "epoch": 0.2873277823330369, "grad_norm": 0.06515824794769287, "learning_rate": 1.9774733796538226e-05, "loss": 0.5589236617088318, "step": 1555 }, { "epoch": 0.2875125590419328, "grad_norm": 0.09635279327630997, "learning_rate": 1.9774312557722638e-05, "loss": 0.8554244637489319, "step": 1556 }, { "epoch": 0.2876973357508286, "grad_norm": 0.071099191904068, "learning_rate": 1.9773890929919993e-05, "loss": 0.9100094437599182, "step": 1557 }, { "epoch": 0.28788211245972445, "grad_norm": 0.07915978878736496, "learning_rate": 1.9773468913147066e-05, "loss": 0.708828866481781, "step": 1558 }, { "epoch": 0.2880668891686203, "grad_norm": 0.0634954422712326, "learning_rate": 1.977304650742065e-05, "loss": 0.6176474094390869, "step": 1559 }, { "epoch": 0.28825166587751616, "grad_norm": 0.08128131926059723, "learning_rate": 1.977262371275756e-05, "loss": 0.7375856041908264, "step": 1560 }, { "epoch": 0.288436442586412, "grad_norm": 0.07046988606452942, "learning_rate": 1.9772200529174625e-05, "loss": 0.5920605659484863, "step": 1561 }, { "epoch": 0.2886212192953078, "grad_norm": 0.081866554915905, "learning_rate": 1.977177695668868e-05, "loss": 0.7324924468994141, "step": 1562 }, { "epoch": 0.28880599600420365, "grad_norm": 0.06897556036710739, "learning_rate": 1.9771352995316585e-05, "loss": 0.46013137698173523, "step": 1563 }, { "epoch": 0.28899077271309953, "grad_norm": 0.04982367157936096, "learning_rate": 1.977092864507521e-05, "loss": 0.4955293536186218, "step": 1564 }, { "epoch": 0.28917554942199536, "grad_norm": 0.057923175394535065, "learning_rate": 1.9770503905981444e-05, "loss": 0.48194465041160583, "step": 1565 }, { "epoch": 0.2893603261308912, "grad_norm": 0.08413425087928772, "learning_rate": 1.9770078778052192e-05, "loss": 0.6813338994979858, "step": 1566 }, { "epoch": 0.28954510283978707, "grad_norm": 0.07808249443769455, "learning_rate": 1.976965326130437e-05, "loss": 0.6345247030258179, "step": 1567 }, { "epoch": 0.2897298795486829, "grad_norm": 0.07906540483236313, "learning_rate": 1.9769227355754913e-05, "loss": 0.693480372428894, "step": 1568 }, { "epoch": 0.28991465625757873, "grad_norm": 0.12125765532255173, "learning_rate": 1.9768801061420774e-05, "loss": 1.1367865800857544, "step": 1569 }, { "epoch": 0.29009943296647456, "grad_norm": 0.0932023897767067, "learning_rate": 1.9768374378318915e-05, "loss": 0.7568378448486328, "step": 1570 }, { "epoch": 0.29028420967537044, "grad_norm": 0.09901861846446991, "learning_rate": 1.9767947306466318e-05, "loss": 0.687638521194458, "step": 1571 }, { "epoch": 0.29046898638426627, "grad_norm": 0.08295369148254395, "learning_rate": 1.9767519845879975e-05, "loss": 0.6895972490310669, "step": 1572 }, { "epoch": 0.2906537630931621, "grad_norm": 0.08799974620342255, "learning_rate": 1.97670919965769e-05, "loss": 0.6818599104881287, "step": 1573 }, { "epoch": 0.2908385398020579, "grad_norm": 0.09332655370235443, "learning_rate": 1.9766663758574122e-05, "loss": 0.9478365778923035, "step": 1574 }, { "epoch": 0.2910233165109538, "grad_norm": 0.06791740655899048, "learning_rate": 1.9766235131888684e-05, "loss": 0.5521538257598877, "step": 1575 }, { "epoch": 0.29120809321984964, "grad_norm": 0.07543913275003433, "learning_rate": 1.9765806116537642e-05, "loss": 0.8613795042037964, "step": 1576 }, { "epoch": 0.29139286992874547, "grad_norm": 0.08647683262825012, "learning_rate": 1.9765376712538067e-05, "loss": 0.7674567699432373, "step": 1577 }, { "epoch": 0.29157764663764135, "grad_norm": 0.07757703959941864, "learning_rate": 1.9764946919907054e-05, "loss": 0.6646807193756104, "step": 1578 }, { "epoch": 0.2917624233465372, "grad_norm": 0.061070188879966736, "learning_rate": 1.9764516738661706e-05, "loss": 0.4340011775493622, "step": 1579 }, { "epoch": 0.291947200055433, "grad_norm": 0.07494571805000305, "learning_rate": 1.9764086168819136e-05, "loss": 0.6426244974136353, "step": 1580 }, { "epoch": 0.29213197676432884, "grad_norm": 0.08665431290864944, "learning_rate": 1.9763655210396488e-05, "loss": 0.7494552731513977, "step": 1581 }, { "epoch": 0.2923167534732247, "grad_norm": 0.08033335208892822, "learning_rate": 1.976322386341091e-05, "loss": 0.6891940832138062, "step": 1582 }, { "epoch": 0.29250153018212055, "grad_norm": 0.07510875910520554, "learning_rate": 1.976279212787956e-05, "loss": 0.6621313095092773, "step": 1583 }, { "epoch": 0.2926863068910164, "grad_norm": 0.10144571214914322, "learning_rate": 1.9762360003819637e-05, "loss": 1.0374672412872314, "step": 1584 }, { "epoch": 0.2928710835999122, "grad_norm": 0.07121730595827103, "learning_rate": 1.9761927491248323e-05, "loss": 0.7357268929481506, "step": 1585 }, { "epoch": 0.2930558603088081, "grad_norm": 0.073287732899189, "learning_rate": 1.9761494590182834e-05, "loss": 0.6488692164421082, "step": 1586 }, { "epoch": 0.2932406370177039, "grad_norm": 0.062283504754304886, "learning_rate": 1.9761061300640405e-05, "loss": 0.5477706789970398, "step": 1587 }, { "epoch": 0.29342541372659975, "grad_norm": 0.07306306064128876, "learning_rate": 1.9760627622638272e-05, "loss": 0.7222064137458801, "step": 1588 }, { "epoch": 0.29361019043549563, "grad_norm": 0.09012686461210251, "learning_rate": 1.9760193556193697e-05, "loss": 0.7955993413925171, "step": 1589 }, { "epoch": 0.29379496714439146, "grad_norm": 0.09550463408231735, "learning_rate": 1.9759759101323953e-05, "loss": 0.9095275402069092, "step": 1590 }, { "epoch": 0.2939797438532873, "grad_norm": 0.08157458901405334, "learning_rate": 1.9759324258046336e-05, "loss": 0.7226804494857788, "step": 1591 }, { "epoch": 0.2941645205621831, "grad_norm": 0.0810418352484703, "learning_rate": 1.9758889026378142e-05, "loss": 0.7138922214508057, "step": 1592 }, { "epoch": 0.294349297271079, "grad_norm": 0.08949844539165497, "learning_rate": 1.97584534063367e-05, "loss": 0.8396944999694824, "step": 1593 }, { "epoch": 0.29453407397997483, "grad_norm": 0.07083319872617722, "learning_rate": 1.975801739793934e-05, "loss": 0.6845517158508301, "step": 1594 }, { "epoch": 0.29471885068887066, "grad_norm": 0.08076249808073044, "learning_rate": 1.9757581001203414e-05, "loss": 0.7636187672615051, "step": 1595 }, { "epoch": 0.2949036273977665, "grad_norm": 0.073576420545578, "learning_rate": 1.9757144216146296e-05, "loss": 0.6097922921180725, "step": 1596 }, { "epoch": 0.29508840410666237, "grad_norm": 0.07440821081399918, "learning_rate": 1.975670704278536e-05, "loss": 0.6421791911125183, "step": 1597 }, { "epoch": 0.2952731808155582, "grad_norm": 0.09239313751459122, "learning_rate": 1.9756269481138015e-05, "loss": 0.9637513756752014, "step": 1598 }, { "epoch": 0.29545795752445403, "grad_norm": 0.07679717242717743, "learning_rate": 1.9755831531221663e-05, "loss": 0.6917498111724854, "step": 1599 }, { "epoch": 0.2956427342333499, "grad_norm": 0.07973774522542953, "learning_rate": 1.975539319305374e-05, "loss": 0.7043184041976929, "step": 1600 }, { "epoch": 0.29582751094224574, "grad_norm": 0.08145955204963684, "learning_rate": 1.9754954466651688e-05, "loss": 0.9160181283950806, "step": 1601 }, { "epoch": 0.29601228765114157, "grad_norm": 0.07598098367452621, "learning_rate": 1.9754515352032967e-05, "loss": 0.7016775608062744, "step": 1602 }, { "epoch": 0.2961970643600374, "grad_norm": 0.07639916241168976, "learning_rate": 1.9754075849215056e-05, "loss": 0.617500364780426, "step": 1603 }, { "epoch": 0.2963818410689333, "grad_norm": 0.08500184863805771, "learning_rate": 1.975363595821544e-05, "loss": 0.6746414303779602, "step": 1604 }, { "epoch": 0.2965666177778291, "grad_norm": 0.088084377348423, "learning_rate": 1.975319567905163e-05, "loss": 0.6506028771400452, "step": 1605 }, { "epoch": 0.29675139448672494, "grad_norm": 0.06195947155356407, "learning_rate": 1.9752755011741142e-05, "loss": 0.5611916780471802, "step": 1606 }, { "epoch": 0.29693617119562077, "grad_norm": 0.07300426065921783, "learning_rate": 1.9752313956301518e-05, "loss": 0.724642276763916, "step": 1607 }, { "epoch": 0.29712094790451665, "grad_norm": 0.06841021031141281, "learning_rate": 1.9751872512750314e-05, "loss": 0.6004761457443237, "step": 1608 }, { "epoch": 0.2973057246134125, "grad_norm": 0.07275708764791489, "learning_rate": 1.975143068110509e-05, "loss": 0.8002687692642212, "step": 1609 }, { "epoch": 0.2974905013223083, "grad_norm": 0.06573881953954697, "learning_rate": 1.9750988461383432e-05, "loss": 0.5710200071334839, "step": 1610 }, { "epoch": 0.2976752780312042, "grad_norm": 0.09672277420759201, "learning_rate": 1.975054585360294e-05, "loss": 0.8065577149391174, "step": 1611 }, { "epoch": 0.2978600547401, "grad_norm": 0.05756446346640587, "learning_rate": 1.9750102857781234e-05, "loss": 0.5338827967643738, "step": 1612 }, { "epoch": 0.29804483144899585, "grad_norm": 0.0928102657198906, "learning_rate": 1.9749659473935937e-05, "loss": 0.882835865020752, "step": 1613 }, { "epoch": 0.2982296081578917, "grad_norm": 0.08123216778039932, "learning_rate": 1.9749215702084693e-05, "loss": 0.743979275226593, "step": 1614 }, { "epoch": 0.29841438486678756, "grad_norm": 0.08603404462337494, "learning_rate": 1.9748771542245167e-05, "loss": 0.5993881821632385, "step": 1615 }, { "epoch": 0.2985991615756834, "grad_norm": 0.08198370784521103, "learning_rate": 1.974832699443503e-05, "loss": 0.811705470085144, "step": 1616 }, { "epoch": 0.2987839382845792, "grad_norm": 0.061221882700920105, "learning_rate": 1.9747882058671982e-05, "loss": 0.5289203524589539, "step": 1617 }, { "epoch": 0.29896871499347505, "grad_norm": 0.0895276740193367, "learning_rate": 1.9747436734973722e-05, "loss": 0.8075624108314514, "step": 1618 }, { "epoch": 0.29915349170237093, "grad_norm": 0.10405202955007553, "learning_rate": 1.9746991023357978e-05, "loss": 0.8427689075469971, "step": 1619 }, { "epoch": 0.29933826841126676, "grad_norm": 0.05864064767956734, "learning_rate": 1.9746544923842483e-05, "loss": 0.4327649474143982, "step": 1620 }, { "epoch": 0.2995230451201626, "grad_norm": 0.08024963736534119, "learning_rate": 1.9746098436444997e-05, "loss": 0.7758969068527222, "step": 1621 }, { "epoch": 0.2997078218290585, "grad_norm": 0.09117995947599411, "learning_rate": 1.974565156118328e-05, "loss": 0.8343737125396729, "step": 1622 }, { "epoch": 0.2998925985379543, "grad_norm": 0.08025994151830673, "learning_rate": 1.9745204298075125e-05, "loss": 0.7524757981300354, "step": 1623 }, { "epoch": 0.30007737524685013, "grad_norm": 0.07531878352165222, "learning_rate": 1.9744756647138326e-05, "loss": 0.7118515372276306, "step": 1624 }, { "epoch": 0.30026215195574596, "grad_norm": 0.06404253095388412, "learning_rate": 1.97443086083907e-05, "loss": 0.5755617618560791, "step": 1625 }, { "epoch": 0.30044692866464184, "grad_norm": 0.06982641667127609, "learning_rate": 1.974386018185008e-05, "loss": 0.6257184147834778, "step": 1626 }, { "epoch": 0.30063170537353767, "grad_norm": 0.07712169736623764, "learning_rate": 1.9743411367534306e-05, "loss": 0.7018937468528748, "step": 1627 }, { "epoch": 0.3008164820824335, "grad_norm": 0.07761988788843155, "learning_rate": 1.9742962165461245e-05, "loss": 0.5967124700546265, "step": 1628 }, { "epoch": 0.30100125879132933, "grad_norm": 0.07737915962934494, "learning_rate": 1.974251257564877e-05, "loss": 0.7350765466690063, "step": 1629 }, { "epoch": 0.3011860355002252, "grad_norm": 0.0847436934709549, "learning_rate": 1.9742062598114777e-05, "loss": 0.7208263874053955, "step": 1630 }, { "epoch": 0.30137081220912104, "grad_norm": 0.10813397169113159, "learning_rate": 1.974161223287717e-05, "loss": 0.9324489235877991, "step": 1631 }, { "epoch": 0.30155558891801687, "grad_norm": 0.08521011471748352, "learning_rate": 1.9741161479953872e-05, "loss": 0.7783839106559753, "step": 1632 }, { "epoch": 0.30174036562691275, "grad_norm": 0.08909272402524948, "learning_rate": 1.9740710339362825e-05, "loss": 1.0380836725234985, "step": 1633 }, { "epoch": 0.3019251423358086, "grad_norm": 0.09834171086549759, "learning_rate": 1.9740258811121982e-05, "loss": 0.8330088257789612, "step": 1634 }, { "epoch": 0.3021099190447044, "grad_norm": 0.06591691821813583, "learning_rate": 1.9739806895249312e-05, "loss": 0.6034543514251709, "step": 1635 }, { "epoch": 0.30229469575360024, "grad_norm": 0.0819563940167427, "learning_rate": 1.9739354591762798e-05, "loss": 0.7539654970169067, "step": 1636 }, { "epoch": 0.3024794724624961, "grad_norm": 0.05253589153289795, "learning_rate": 1.973890190068044e-05, "loss": 0.49244508147239685, "step": 1637 }, { "epoch": 0.30266424917139195, "grad_norm": 0.08107242733240128, "learning_rate": 1.973844882202026e-05, "loss": 0.7167231440544128, "step": 1638 }, { "epoch": 0.3028490258802878, "grad_norm": 0.08478766679763794, "learning_rate": 1.9737995355800282e-05, "loss": 0.8549013137817383, "step": 1639 }, { "epoch": 0.3030338025891836, "grad_norm": 0.08669696748256683, "learning_rate": 1.9737541502038556e-05, "loss": 0.9710851907730103, "step": 1640 }, { "epoch": 0.3032185792980795, "grad_norm": 0.0721355527639389, "learning_rate": 1.9737087260753142e-05, "loss": 0.8099871873855591, "step": 1641 }, { "epoch": 0.3034033560069753, "grad_norm": 0.06791551411151886, "learning_rate": 1.9736632631962118e-05, "loss": 0.5181037187576294, "step": 1642 }, { "epoch": 0.30358813271587115, "grad_norm": 0.0887824222445488, "learning_rate": 1.9736177615683577e-05, "loss": 0.9183768033981323, "step": 1643 }, { "epoch": 0.30377290942476703, "grad_norm": 0.06414256989955902, "learning_rate": 1.973572221193563e-05, "loss": 0.6185742020606995, "step": 1644 }, { "epoch": 0.30395768613366286, "grad_norm": 0.0856219157576561, "learning_rate": 1.9735266420736397e-05, "loss": 0.776534914970398, "step": 1645 }, { "epoch": 0.3041424628425587, "grad_norm": 0.08579172939062119, "learning_rate": 1.973481024210402e-05, "loss": 0.7569576501846313, "step": 1646 }, { "epoch": 0.3043272395514545, "grad_norm": 0.07078281790018082, "learning_rate": 1.973435367605665e-05, "loss": 0.7340545654296875, "step": 1647 }, { "epoch": 0.3045120162603504, "grad_norm": 0.06933678686618805, "learning_rate": 1.9733896722612457e-05, "loss": 0.4218800365924835, "step": 1648 }, { "epoch": 0.30469679296924623, "grad_norm": 0.08380237966775894, "learning_rate": 1.9733439381789628e-05, "loss": 0.7405287027359009, "step": 1649 }, { "epoch": 0.30488156967814206, "grad_norm": 0.07812915742397308, "learning_rate": 1.9732981653606367e-05, "loss": 0.9389246702194214, "step": 1650 }, { "epoch": 0.3050663463870379, "grad_norm": 0.08571448922157288, "learning_rate": 1.9732523538080882e-05, "loss": 0.6957086324691772, "step": 1651 }, { "epoch": 0.3052511230959338, "grad_norm": 0.07718904316425323, "learning_rate": 1.9732065035231415e-05, "loss": 0.7227778434753418, "step": 1652 }, { "epoch": 0.3054358998048296, "grad_norm": 0.056292854249477386, "learning_rate": 1.9731606145076204e-05, "loss": 0.4814959168434143, "step": 1653 }, { "epoch": 0.30562067651372543, "grad_norm": 0.06578993052244186, "learning_rate": 1.9731146867633514e-05, "loss": 0.6709810495376587, "step": 1654 }, { "epoch": 0.3058054532226213, "grad_norm": 0.0599241629242897, "learning_rate": 1.9730687202921625e-05, "loss": 0.5246766209602356, "step": 1655 }, { "epoch": 0.30599022993151714, "grad_norm": 0.07661686092615128, "learning_rate": 1.973022715095883e-05, "loss": 0.6775780916213989, "step": 1656 }, { "epoch": 0.30617500664041297, "grad_norm": 0.09185819327831268, "learning_rate": 1.972976671176343e-05, "loss": 0.9404529929161072, "step": 1657 }, { "epoch": 0.3063597833493088, "grad_norm": 0.0755624771118164, "learning_rate": 1.9729305885353766e-05, "loss": 0.7290748357772827, "step": 1658 }, { "epoch": 0.3065445600582047, "grad_norm": 0.06746925413608551, "learning_rate": 1.9728844671748156e-05, "loss": 0.6544076800346375, "step": 1659 }, { "epoch": 0.3067293367671005, "grad_norm": 0.07987582683563232, "learning_rate": 1.9728383070964972e-05, "loss": 0.7994759678840637, "step": 1660 }, { "epoch": 0.30691411347599634, "grad_norm": 0.10454621911048889, "learning_rate": 1.9727921083022577e-05, "loss": 0.8565311431884766, "step": 1661 }, { "epoch": 0.30709889018489217, "grad_norm": 0.08259614557027817, "learning_rate": 1.972745870793936e-05, "loss": 0.8979726433753967, "step": 1662 }, { "epoch": 0.30728366689378805, "grad_norm": 0.07282306253910065, "learning_rate": 1.9726995945733715e-05, "loss": 0.7132416367530823, "step": 1663 }, { "epoch": 0.3074684436026839, "grad_norm": 0.07935132086277008, "learning_rate": 1.9726532796424066e-05, "loss": 0.6348034739494324, "step": 1664 }, { "epoch": 0.3076532203115797, "grad_norm": 0.08050986379384995, "learning_rate": 1.9726069260028838e-05, "loss": 0.6966174244880676, "step": 1665 }, { "epoch": 0.3078379970204756, "grad_norm": 0.07889799773693085, "learning_rate": 1.972560533656649e-05, "loss": 0.7163848280906677, "step": 1666 }, { "epoch": 0.3080227737293714, "grad_norm": 0.11050339043140411, "learning_rate": 1.9725141026055473e-05, "loss": 0.8334489464759827, "step": 1667 }, { "epoch": 0.30820755043826725, "grad_norm": 0.07277112454175949, "learning_rate": 1.9724676328514267e-05, "loss": 0.6129276752471924, "step": 1668 }, { "epoch": 0.3083923271471631, "grad_norm": 0.06715867668390274, "learning_rate": 1.972421124396137e-05, "loss": 0.5154484510421753, "step": 1669 }, { "epoch": 0.30857710385605897, "grad_norm": 0.06250986456871033, "learning_rate": 1.972374577241529e-05, "loss": 0.4796812832355499, "step": 1670 }, { "epoch": 0.3087618805649548, "grad_norm": 0.06873840093612671, "learning_rate": 1.9723279913894547e-05, "loss": 0.6062132120132446, "step": 1671 }, { "epoch": 0.3089466572738506, "grad_norm": 0.06897095590829849, "learning_rate": 1.9722813668417682e-05, "loss": 0.6902108788490295, "step": 1672 }, { "epoch": 0.30913143398274645, "grad_norm": 0.07618202269077301, "learning_rate": 1.972234703600326e-05, "loss": 0.5897256731987, "step": 1673 }, { "epoch": 0.30931621069164233, "grad_norm": 0.08293811231851578, "learning_rate": 1.9721880016669836e-05, "loss": 0.6409721970558167, "step": 1674 }, { "epoch": 0.30950098740053816, "grad_norm": 0.08881959319114685, "learning_rate": 1.9721412610436005e-05, "loss": 0.8430206179618835, "step": 1675 }, { "epoch": 0.309685764109434, "grad_norm": 0.062191516160964966, "learning_rate": 1.9720944817320366e-05, "loss": 0.5813213586807251, "step": 1676 }, { "epoch": 0.3098705408183299, "grad_norm": 0.07826868444681168, "learning_rate": 1.9720476637341538e-05, "loss": 0.5941954255104065, "step": 1677 }, { "epoch": 0.3100553175272257, "grad_norm": 0.07638590037822723, "learning_rate": 1.972000807051815e-05, "loss": 0.512177586555481, "step": 1678 }, { "epoch": 0.31024009423612153, "grad_norm": 0.08276208490133286, "learning_rate": 1.9719539116868852e-05, "loss": 0.6290682554244995, "step": 1679 }, { "epoch": 0.31042487094501736, "grad_norm": 0.08368974179029465, "learning_rate": 1.9719069776412305e-05, "loss": 0.8070504069328308, "step": 1680 }, { "epoch": 0.31060964765391325, "grad_norm": 0.06858662515878677, "learning_rate": 1.9718600049167187e-05, "loss": 0.674410879611969, "step": 1681 }, { "epoch": 0.3107944243628091, "grad_norm": 0.08916907757520676, "learning_rate": 1.9718129935152193e-05, "loss": 0.8041301369667053, "step": 1682 }, { "epoch": 0.3109792010717049, "grad_norm": 0.07412692904472351, "learning_rate": 1.971765943438603e-05, "loss": 0.8276036381721497, "step": 1683 }, { "epoch": 0.31116397778060073, "grad_norm": 0.08200951665639877, "learning_rate": 1.9717188546887428e-05, "loss": 0.6787245273590088, "step": 1684 }, { "epoch": 0.3113487544894966, "grad_norm": 0.0751579999923706, "learning_rate": 1.9716717272675122e-05, "loss": 0.7200093865394592, "step": 1685 }, { "epoch": 0.31153353119839244, "grad_norm": 0.06252330541610718, "learning_rate": 1.9716245611767868e-05, "loss": 0.5830127596855164, "step": 1686 }, { "epoch": 0.3117183079072883, "grad_norm": 0.08460020273923874, "learning_rate": 1.9715773564184436e-05, "loss": 0.9173708558082581, "step": 1687 }, { "epoch": 0.31190308461618416, "grad_norm": 0.07136960327625275, "learning_rate": 1.9715301129943613e-05, "loss": 0.5702857971191406, "step": 1688 }, { "epoch": 0.31208786132508, "grad_norm": 0.08351925015449524, "learning_rate": 1.9714828309064202e-05, "loss": 0.7518531084060669, "step": 1689 }, { "epoch": 0.3122726380339758, "grad_norm": 0.07065819203853607, "learning_rate": 1.9714355101565016e-05, "loss": 0.7276631593704224, "step": 1690 }, { "epoch": 0.31245741474287164, "grad_norm": 0.06705459207296371, "learning_rate": 1.9713881507464888e-05, "loss": 0.6821085810661316, "step": 1691 }, { "epoch": 0.3126421914517675, "grad_norm": 0.09088122099637985, "learning_rate": 1.971340752678267e-05, "loss": 0.8744920492172241, "step": 1692 }, { "epoch": 0.31282696816066335, "grad_norm": 0.08711528778076172, "learning_rate": 1.971293315953722e-05, "loss": 0.7038059830665588, "step": 1693 }, { "epoch": 0.3130117448695592, "grad_norm": 0.07936561852693558, "learning_rate": 1.971245840574742e-05, "loss": 0.7303805947303772, "step": 1694 }, { "epoch": 0.313196521578455, "grad_norm": 0.0750492587685585, "learning_rate": 1.971198326543216e-05, "loss": 0.5951574444770813, "step": 1695 }, { "epoch": 0.3133812982873509, "grad_norm": 0.07795064896345139, "learning_rate": 1.971150773861035e-05, "loss": 0.6603903770446777, "step": 1696 }, { "epoch": 0.3135660749962467, "grad_norm": 0.07857033610343933, "learning_rate": 1.971103182530092e-05, "loss": 0.7110896706581116, "step": 1697 }, { "epoch": 0.31375085170514255, "grad_norm": 0.07372584193944931, "learning_rate": 1.9710555525522802e-05, "loss": 0.5947229862213135, "step": 1698 }, { "epoch": 0.31393562841403844, "grad_norm": 0.06809160858392715, "learning_rate": 1.971007883929495e-05, "loss": 0.6651185750961304, "step": 1699 }, { "epoch": 0.31412040512293427, "grad_norm": 0.05984443426132202, "learning_rate": 1.970960176663635e-05, "loss": 0.40316981077194214, "step": 1700 }, { "epoch": 0.3143051818318301, "grad_norm": 0.11108367145061493, "learning_rate": 1.970912430756597e-05, "loss": 1.125005841255188, "step": 1701 }, { "epoch": 0.3144899585407259, "grad_norm": 0.09628898650407791, "learning_rate": 1.9708646462102818e-05, "loss": 0.708713948726654, "step": 1702 }, { "epoch": 0.3146747352496218, "grad_norm": 0.07885843515396118, "learning_rate": 1.970816823026591e-05, "loss": 0.5283644795417786, "step": 1703 }, { "epoch": 0.31485951195851763, "grad_norm": 0.0594625324010849, "learning_rate": 1.9707689612074286e-05, "loss": 0.4736645221710205, "step": 1704 }, { "epoch": 0.31504428866741346, "grad_norm": 0.09794385731220245, "learning_rate": 1.970721060754698e-05, "loss": 0.9850810170173645, "step": 1705 }, { "epoch": 0.3152290653763093, "grad_norm": 0.07433894276618958, "learning_rate": 1.9706731216703066e-05, "loss": 0.6193894147872925, "step": 1706 }, { "epoch": 0.3154138420852052, "grad_norm": 0.0941387340426445, "learning_rate": 1.970625143956162e-05, "loss": 0.9859358072280884, "step": 1707 }, { "epoch": 0.315598618794101, "grad_norm": 0.06878665834665298, "learning_rate": 1.9705771276141727e-05, "loss": 0.5013079047203064, "step": 1708 }, { "epoch": 0.31578339550299683, "grad_norm": 0.08492394536733627, "learning_rate": 1.970529072646251e-05, "loss": 0.7924782037734985, "step": 1709 }, { "epoch": 0.3159681722118927, "grad_norm": 0.07478411495685577, "learning_rate": 1.970480979054308e-05, "loss": 0.6038049459457397, "step": 1710 }, { "epoch": 0.31615294892078855, "grad_norm": 0.07372638583183289, "learning_rate": 1.9704328468402586e-05, "loss": 0.6259400844573975, "step": 1711 }, { "epoch": 0.3163377256296844, "grad_norm": 0.07732758671045303, "learning_rate": 1.9703846760060175e-05, "loss": 0.7466741800308228, "step": 1712 }, { "epoch": 0.3165225023385802, "grad_norm": 0.07305384427309036, "learning_rate": 1.9703364665535027e-05, "loss": 0.7214388847351074, "step": 1713 }, { "epoch": 0.3167072790474761, "grad_norm": 0.07644770294427872, "learning_rate": 1.9702882184846324e-05, "loss": 0.7326534390449524, "step": 1714 }, { "epoch": 0.3168920557563719, "grad_norm": 0.08744385093450546, "learning_rate": 1.9702399318013265e-05, "loss": 0.8739240169525146, "step": 1715 }, { "epoch": 0.31707683246526774, "grad_norm": 0.06662850826978683, "learning_rate": 1.970191606505507e-05, "loss": 0.6399668455123901, "step": 1716 }, { "epoch": 0.3172616091741636, "grad_norm": 0.08732137829065323, "learning_rate": 1.9701432425990963e-05, "loss": 0.7474459409713745, "step": 1717 }, { "epoch": 0.31744638588305946, "grad_norm": 0.08268634974956512, "learning_rate": 1.9700948400840203e-05, "loss": 0.8560296893119812, "step": 1718 }, { "epoch": 0.3176311625919553, "grad_norm": 0.09292731434106827, "learning_rate": 1.9700463989622048e-05, "loss": 0.905507504940033, "step": 1719 }, { "epoch": 0.3178159393008511, "grad_norm": 0.07785472273826599, "learning_rate": 1.969997919235577e-05, "loss": 0.6575971245765686, "step": 1720 }, { "epoch": 0.318000716009747, "grad_norm": 0.07556314021348953, "learning_rate": 1.969949400906067e-05, "loss": 0.5453295707702637, "step": 1721 }, { "epoch": 0.3181854927186428, "grad_norm": 0.08012852072715759, "learning_rate": 1.9699008439756054e-05, "loss": 0.7174992561340332, "step": 1722 }, { "epoch": 0.31837026942753865, "grad_norm": 0.09103730320930481, "learning_rate": 1.9698522484461248e-05, "loss": 0.6318363547325134, "step": 1723 }, { "epoch": 0.3185550461364345, "grad_norm": 0.08566120266914368, "learning_rate": 1.9698036143195587e-05, "loss": 0.8285524845123291, "step": 1724 }, { "epoch": 0.31873982284533037, "grad_norm": 0.08602138608694077, "learning_rate": 1.9697549415978432e-05, "loss": 0.8359253406524658, "step": 1725 }, { "epoch": 0.3189245995542262, "grad_norm": 0.070762500166893, "learning_rate": 1.9697062302829147e-05, "loss": 0.5441091060638428, "step": 1726 }, { "epoch": 0.319109376263122, "grad_norm": 0.067392498254776, "learning_rate": 1.9696574803767124e-05, "loss": 0.5648691654205322, "step": 1727 }, { "epoch": 0.31929415297201785, "grad_norm": 0.07054133713245392, "learning_rate": 1.9696086918811757e-05, "loss": 0.6661797761917114, "step": 1728 }, { "epoch": 0.31947892968091374, "grad_norm": 0.07675296068191528, "learning_rate": 1.9695598647982467e-05, "loss": 0.5624269843101501, "step": 1729 }, { "epoch": 0.31966370638980957, "grad_norm": 0.08781984448432922, "learning_rate": 1.9695109991298686e-05, "loss": 0.7983731031417847, "step": 1730 }, { "epoch": 0.3198484830987054, "grad_norm": 0.0866129994392395, "learning_rate": 1.9694620948779857e-05, "loss": 0.700450599193573, "step": 1731 }, { "epoch": 0.3200332598076013, "grad_norm": 0.07832545787096024, "learning_rate": 1.9694131520445445e-05, "loss": 0.7811502814292908, "step": 1732 }, { "epoch": 0.3202180365164971, "grad_norm": 0.09444748610258102, "learning_rate": 1.9693641706314926e-05, "loss": 0.8800259828567505, "step": 1733 }, { "epoch": 0.32040281322539294, "grad_norm": 0.08850855380296707, "learning_rate": 1.9693151506407798e-05, "loss": 0.8175960183143616, "step": 1734 }, { "epoch": 0.32058758993428876, "grad_norm": 0.07670994848012924, "learning_rate": 1.9692660920743566e-05, "loss": 0.7358960509300232, "step": 1735 }, { "epoch": 0.32077236664318465, "grad_norm": 0.09213895350694656, "learning_rate": 1.9692169949341753e-05, "loss": 0.9158545732498169, "step": 1736 }, { "epoch": 0.3209571433520805, "grad_norm": 0.07196273654699326, "learning_rate": 1.9691678592221902e-05, "loss": 0.6552197933197021, "step": 1737 }, { "epoch": 0.3211419200609763, "grad_norm": 0.10566510260105133, "learning_rate": 1.969118684940356e-05, "loss": 0.727062463760376, "step": 1738 }, { "epoch": 0.32132669676987213, "grad_norm": 0.09496032446622849, "learning_rate": 1.96906947209063e-05, "loss": 0.9609939455986023, "step": 1739 }, { "epoch": 0.321511473478768, "grad_norm": 0.09011634439229965, "learning_rate": 1.9690202206749713e-05, "loss": 0.864891767501831, "step": 1740 }, { "epoch": 0.32169625018766385, "grad_norm": 0.09794040024280548, "learning_rate": 1.9689709306953393e-05, "loss": 0.9009979963302612, "step": 1741 }, { "epoch": 0.3218810268965597, "grad_norm": 0.07013952732086182, "learning_rate": 1.9689216021536956e-05, "loss": 0.5138965249061584, "step": 1742 }, { "epoch": 0.32206580360545556, "grad_norm": 0.07850988954305649, "learning_rate": 1.9688722350520035e-05, "loss": 0.7609305381774902, "step": 1743 }, { "epoch": 0.3222505803143514, "grad_norm": 0.09505235403776169, "learning_rate": 1.968822829392228e-05, "loss": 0.9003138542175293, "step": 1744 }, { "epoch": 0.3224353570232472, "grad_norm": 0.08867599815130234, "learning_rate": 1.9687733851763347e-05, "loss": 0.7564271092414856, "step": 1745 }, { "epoch": 0.32262013373214304, "grad_norm": 0.06701550632715225, "learning_rate": 1.9687239024062915e-05, "loss": 0.5547588467597961, "step": 1746 }, { "epoch": 0.32280491044103893, "grad_norm": 0.08799927681684494, "learning_rate": 1.968674381084068e-05, "loss": 0.6986576914787292, "step": 1747 }, { "epoch": 0.32298968714993476, "grad_norm": 0.06552515923976898, "learning_rate": 1.9686248212116345e-05, "loss": 0.4841706454753876, "step": 1748 }, { "epoch": 0.3231744638588306, "grad_norm": 0.06485949456691742, "learning_rate": 1.9685752227909636e-05, "loss": 0.5086471438407898, "step": 1749 }, { "epoch": 0.3233592405677264, "grad_norm": 0.07854238152503967, "learning_rate": 1.9685255858240294e-05, "loss": 0.752724289894104, "step": 1750 }, { "epoch": 0.3235440172766223, "grad_norm": 0.07595296949148178, "learning_rate": 1.9684759103128067e-05, "loss": 0.83503657579422, "step": 1751 }, { "epoch": 0.3237287939855181, "grad_norm": 0.06905639916658401, "learning_rate": 1.9684261962592728e-05, "loss": 0.6583921313285828, "step": 1752 }, { "epoch": 0.32391357069441395, "grad_norm": 0.07187511771917343, "learning_rate": 1.968376443665406e-05, "loss": 0.5287723541259766, "step": 1753 }, { "epoch": 0.32409834740330984, "grad_norm": 0.08390723913908005, "learning_rate": 1.9683266525331865e-05, "loss": 0.8683786392211914, "step": 1754 }, { "epoch": 0.32428312411220567, "grad_norm": 0.056276608258485794, "learning_rate": 1.968276822864596e-05, "loss": 0.4335346817970276, "step": 1755 }, { "epoch": 0.3244679008211015, "grad_norm": 0.0724165067076683, "learning_rate": 1.968226954661617e-05, "loss": 0.5859235525131226, "step": 1756 }, { "epoch": 0.3246526775299973, "grad_norm": 0.07898622006177902, "learning_rate": 1.9681770479262344e-05, "loss": 0.7415960431098938, "step": 1757 }, { "epoch": 0.3248374542388932, "grad_norm": 0.07540061324834824, "learning_rate": 1.9681271026604344e-05, "loss": 0.6611322164535522, "step": 1758 }, { "epoch": 0.32502223094778904, "grad_norm": 0.07527685165405273, "learning_rate": 1.9680771188662044e-05, "loss": 0.6283073425292969, "step": 1759 }, { "epoch": 0.32520700765668487, "grad_norm": 0.052247464656829834, "learning_rate": 1.9680270965455343e-05, "loss": 0.42355582118034363, "step": 1760 }, { "epoch": 0.3253917843655807, "grad_norm": 0.08852987736463547, "learning_rate": 1.967977035700414e-05, "loss": 0.8240786790847778, "step": 1761 }, { "epoch": 0.3255765610744766, "grad_norm": 0.0704052597284317, "learning_rate": 1.9679269363328357e-05, "loss": 0.6152225732803345, "step": 1762 }, { "epoch": 0.3257613377833724, "grad_norm": 0.08233390003442764, "learning_rate": 1.967876798444794e-05, "loss": 0.769257128238678, "step": 1763 }, { "epoch": 0.32594611449226824, "grad_norm": 0.06887035071849823, "learning_rate": 1.967826622038284e-05, "loss": 0.6390390992164612, "step": 1764 }, { "epoch": 0.3261308912011641, "grad_norm": 0.06841545552015305, "learning_rate": 1.9677764071153022e-05, "loss": 0.6136705279350281, "step": 1765 }, { "epoch": 0.32631566791005995, "grad_norm": 0.0852772668004036, "learning_rate": 1.967726153677847e-05, "loss": 0.937171995639801, "step": 1766 }, { "epoch": 0.3265004446189558, "grad_norm": 0.07882437855005264, "learning_rate": 1.9676758617279187e-05, "loss": 0.7007542252540588, "step": 1767 }, { "epoch": 0.3266852213278516, "grad_norm": 0.07214733958244324, "learning_rate": 1.9676255312675186e-05, "loss": 0.5207557082176208, "step": 1768 }, { "epoch": 0.3268699980367475, "grad_norm": 0.07498107105493546, "learning_rate": 1.9675751622986493e-05, "loss": 0.683100700378418, "step": 1769 }, { "epoch": 0.3270547747456433, "grad_norm": 0.07111642509698868, "learning_rate": 1.967524754823316e-05, "loss": 0.7055477499961853, "step": 1770 }, { "epoch": 0.32723955145453915, "grad_norm": 0.07640910148620605, "learning_rate": 1.9674743088435245e-05, "loss": 0.7240890264511108, "step": 1771 }, { "epoch": 0.327424328163435, "grad_norm": 0.0713282972574234, "learning_rate": 1.9674238243612824e-05, "loss": 0.6240283846855164, "step": 1772 }, { "epoch": 0.32760910487233086, "grad_norm": 0.08497446030378342, "learning_rate": 1.9673733013785988e-05, "loss": 0.7595280408859253, "step": 1773 }, { "epoch": 0.3277938815812267, "grad_norm": 0.06732115894556046, "learning_rate": 1.967322739897484e-05, "loss": 0.563498318195343, "step": 1774 }, { "epoch": 0.3279786582901225, "grad_norm": 0.07231878489255905, "learning_rate": 1.967272139919951e-05, "loss": 0.663462221622467, "step": 1775 }, { "epoch": 0.3281634349990184, "grad_norm": 0.06210903078317642, "learning_rate": 1.9672215014480125e-05, "loss": 0.5513061881065369, "step": 1776 }, { "epoch": 0.32834821170791423, "grad_norm": 0.0835055485367775, "learning_rate": 1.9671708244836844e-05, "loss": 0.7600329518318176, "step": 1777 }, { "epoch": 0.32853298841681006, "grad_norm": 0.0779385045170784, "learning_rate": 1.9671201090289838e-05, "loss": 0.7166795134544373, "step": 1778 }, { "epoch": 0.3287177651257059, "grad_norm": 0.0785970613360405, "learning_rate": 1.9670693550859284e-05, "loss": 0.5702319741249084, "step": 1779 }, { "epoch": 0.32890254183460177, "grad_norm": 0.08954007923603058, "learning_rate": 1.9670185626565378e-05, "loss": 0.7708001136779785, "step": 1780 }, { "epoch": 0.3290873185434976, "grad_norm": 0.09481219947338104, "learning_rate": 1.9669677317428344e-05, "loss": 0.9120945334434509, "step": 1781 }, { "epoch": 0.3292720952523934, "grad_norm": 0.07935404777526855, "learning_rate": 1.9669168623468403e-05, "loss": 0.8653592467308044, "step": 1782 }, { "epoch": 0.32945687196128925, "grad_norm": 0.08438052982091904, "learning_rate": 1.9668659544705802e-05, "loss": 0.7041996121406555, "step": 1783 }, { "epoch": 0.32964164867018514, "grad_norm": 0.08412235230207443, "learning_rate": 1.96681500811608e-05, "loss": 0.7921789288520813, "step": 1784 }, { "epoch": 0.32982642537908097, "grad_norm": 0.0814802497625351, "learning_rate": 1.966764023285367e-05, "loss": 0.6965544819831848, "step": 1785 }, { "epoch": 0.3300112020879768, "grad_norm": 0.08825701475143433, "learning_rate": 1.9667129999804707e-05, "loss": 0.9298684000968933, "step": 1786 }, { "epoch": 0.3301959787968727, "grad_norm": 0.06260937452316284, "learning_rate": 1.966661938203422e-05, "loss": 0.5783395171165466, "step": 1787 }, { "epoch": 0.3303807555057685, "grad_norm": 0.08222264796495438, "learning_rate": 1.9666108379562518e-05, "loss": 0.7140374183654785, "step": 1788 }, { "epoch": 0.33056553221466434, "grad_norm": 0.08222371339797974, "learning_rate": 1.9665596992409943e-05, "loss": 0.6748720407485962, "step": 1789 }, { "epoch": 0.33075030892356017, "grad_norm": 0.07889696955680847, "learning_rate": 1.966508522059685e-05, "loss": 0.6850640773773193, "step": 1790 }, { "epoch": 0.33093508563245605, "grad_norm": 0.07319721579551697, "learning_rate": 1.9664573064143604e-05, "loss": 0.7206241488456726, "step": 1791 }, { "epoch": 0.3311198623413519, "grad_norm": 0.07060479372739792, "learning_rate": 1.9664060523070588e-05, "loss": 0.8104270696640015, "step": 1792 }, { "epoch": 0.3313046390502477, "grad_norm": 0.08124864101409912, "learning_rate": 1.9663547597398197e-05, "loss": 0.6488017439842224, "step": 1793 }, { "epoch": 0.33148941575914354, "grad_norm": 0.07783747464418411, "learning_rate": 1.9663034287146843e-05, "loss": 0.6115512251853943, "step": 1794 }, { "epoch": 0.3316741924680394, "grad_norm": 0.08700265735387802, "learning_rate": 1.966252059233696e-05, "loss": 0.9234606623649597, "step": 1795 }, { "epoch": 0.33185896917693525, "grad_norm": 0.06759519129991531, "learning_rate": 1.9662006512988983e-05, "loss": 0.6828320622444153, "step": 1796 }, { "epoch": 0.3320437458858311, "grad_norm": 0.06715090572834015, "learning_rate": 1.9661492049123377e-05, "loss": 0.5963985323905945, "step": 1797 }, { "epoch": 0.33222852259472696, "grad_norm": 0.056464727967977524, "learning_rate": 1.9660977200760612e-05, "loss": 0.5596158504486084, "step": 1798 }, { "epoch": 0.3324132993036228, "grad_norm": 0.06462464481592178, "learning_rate": 1.9660461967921184e-05, "loss": 0.37965625524520874, "step": 1799 }, { "epoch": 0.3325980760125186, "grad_norm": 0.0943351536989212, "learning_rate": 1.9659946350625593e-05, "loss": 0.859682559967041, "step": 1800 }, { "epoch": 0.33278285272141445, "grad_norm": 0.06761356443166733, "learning_rate": 1.9659430348894357e-05, "loss": 0.615112841129303, "step": 1801 }, { "epoch": 0.33296762943031033, "grad_norm": 0.10105802863836288, "learning_rate": 1.9658913962748014e-05, "loss": 0.8561904430389404, "step": 1802 }, { "epoch": 0.33315240613920616, "grad_norm": 0.07876117527484894, "learning_rate": 1.9658397192207114e-05, "loss": 0.5849995613098145, "step": 1803 }, { "epoch": 0.333337182848102, "grad_norm": 0.06803808361291885, "learning_rate": 1.9657880037292224e-05, "loss": 0.5378023386001587, "step": 1804 }, { "epoch": 0.3335219595569978, "grad_norm": 0.08828198164701462, "learning_rate": 1.9657362498023923e-05, "loss": 0.7301193475723267, "step": 1805 }, { "epoch": 0.3337067362658937, "grad_norm": 0.07563069462776184, "learning_rate": 1.965684457442281e-05, "loss": 0.594975471496582, "step": 1806 }, { "epoch": 0.33389151297478953, "grad_norm": 0.0690205916762352, "learning_rate": 1.9656326266509496e-05, "loss": 0.6536938548088074, "step": 1807 }, { "epoch": 0.33407628968368536, "grad_norm": 0.052560463547706604, "learning_rate": 1.9655807574304606e-05, "loss": 0.5595214366912842, "step": 1808 }, { "epoch": 0.33426106639258124, "grad_norm": 0.07476627081632614, "learning_rate": 1.9655288497828783e-05, "loss": 0.6869099140167236, "step": 1809 }, { "epoch": 0.33444584310147707, "grad_norm": 0.09601838886737823, "learning_rate": 1.9654769037102688e-05, "loss": 0.9548770785331726, "step": 1810 }, { "epoch": 0.3346306198103729, "grad_norm": 0.07889758050441742, "learning_rate": 1.9654249192146988e-05, "loss": 0.6000795960426331, "step": 1811 }, { "epoch": 0.3348153965192687, "grad_norm": 0.07617029547691345, "learning_rate": 1.9653728962982377e-05, "loss": 0.5528815388679504, "step": 1812 }, { "epoch": 0.3350001732281646, "grad_norm": 0.07510527968406677, "learning_rate": 1.9653208349629555e-05, "loss": 0.6761285066604614, "step": 1813 }, { "epoch": 0.33518494993706044, "grad_norm": 0.07648994773626328, "learning_rate": 1.965268735210924e-05, "loss": 0.6213935613632202, "step": 1814 }, { "epoch": 0.33536972664595627, "grad_norm": 0.08265082538127899, "learning_rate": 1.965216597044217e-05, "loss": 0.6013681292533875, "step": 1815 }, { "epoch": 0.3355545033548521, "grad_norm": 0.07502146810293198, "learning_rate": 1.965164420464909e-05, "loss": 0.6983845829963684, "step": 1816 }, { "epoch": 0.335739280063748, "grad_norm": 0.10032079368829727, "learning_rate": 1.965112205475077e-05, "loss": 0.9250543117523193, "step": 1817 }, { "epoch": 0.3359240567726438, "grad_norm": 0.09584251046180725, "learning_rate": 1.9650599520767984e-05, "loss": 0.743982195854187, "step": 1818 }, { "epoch": 0.33610883348153964, "grad_norm": 0.10235143452882767, "learning_rate": 1.965007660272153e-05, "loss": 0.7937692999839783, "step": 1819 }, { "epoch": 0.3362936101904355, "grad_norm": 0.0825798287987709, "learning_rate": 1.964955330063222e-05, "loss": 0.6732792854309082, "step": 1820 }, { "epoch": 0.33647838689933135, "grad_norm": 0.07754667103290558, "learning_rate": 1.964902961452088e-05, "loss": 0.6148709058761597, "step": 1821 }, { "epoch": 0.3366631636082272, "grad_norm": 0.08688148111104965, "learning_rate": 1.9648505544408343e-05, "loss": 0.7113993167877197, "step": 1822 }, { "epoch": 0.336847940317123, "grad_norm": 0.06463677436113358, "learning_rate": 1.9647981090315474e-05, "loss": 0.6490875482559204, "step": 1823 }, { "epoch": 0.3370327170260189, "grad_norm": 0.07091058790683746, "learning_rate": 1.9647456252263147e-05, "loss": 0.5048199892044067, "step": 1824 }, { "epoch": 0.3372174937349147, "grad_norm": 0.07602108269929886, "learning_rate": 1.9646931030272237e-05, "loss": 0.6208643317222595, "step": 1825 }, { "epoch": 0.33740227044381055, "grad_norm": 0.0957789197564125, "learning_rate": 1.9646405424363658e-05, "loss": 0.8971869945526123, "step": 1826 }, { "epoch": 0.3375870471527064, "grad_norm": 0.09101379662752151, "learning_rate": 1.964587943455832e-05, "loss": 0.6973580121994019, "step": 1827 }, { "epoch": 0.33777182386160226, "grad_norm": 0.06714178621768951, "learning_rate": 1.9645353060877164e-05, "loss": 0.6755441427230835, "step": 1828 }, { "epoch": 0.3379566005704981, "grad_norm": 0.07767485827207565, "learning_rate": 1.964482630334113e-05, "loss": 0.739201545715332, "step": 1829 }, { "epoch": 0.3381413772793939, "grad_norm": 0.08593825250864029, "learning_rate": 1.9644299161971183e-05, "loss": 0.6533412337303162, "step": 1830 }, { "epoch": 0.3383261539882898, "grad_norm": 0.07094033807516098, "learning_rate": 1.96437716367883e-05, "loss": 0.6397164463996887, "step": 1831 }, { "epoch": 0.33851093069718563, "grad_norm": 0.08640284091234207, "learning_rate": 1.9643243727813483e-05, "loss": 0.7347773313522339, "step": 1832 }, { "epoch": 0.33869570740608146, "grad_norm": 0.08087334036827087, "learning_rate": 1.964271543506773e-05, "loss": 0.8569307327270508, "step": 1833 }, { "epoch": 0.3388804841149773, "grad_norm": 0.06818056106567383, "learning_rate": 1.9642186758572074e-05, "loss": 0.484335720539093, "step": 1834 }, { "epoch": 0.33906526082387317, "grad_norm": 0.08313988894224167, "learning_rate": 1.9641657698347553e-05, "loss": 0.7267013192176819, "step": 1835 }, { "epoch": 0.339250037532769, "grad_norm": 0.07647810131311417, "learning_rate": 1.9641128254415216e-05, "loss": 0.6098029613494873, "step": 1836 }, { "epoch": 0.33943481424166483, "grad_norm": 0.07837104797363281, "learning_rate": 1.964059842679614e-05, "loss": 0.7910488843917847, "step": 1837 }, { "epoch": 0.33961959095056066, "grad_norm": 0.07894197106361389, "learning_rate": 1.9640068215511407e-05, "loss": 0.5041911602020264, "step": 1838 }, { "epoch": 0.33980436765945654, "grad_norm": 0.08094620704650879, "learning_rate": 1.963953762058212e-05, "loss": 0.7487735152244568, "step": 1839 }, { "epoch": 0.33998914436835237, "grad_norm": 0.07290375232696533, "learning_rate": 1.9639006642029394e-05, "loss": 0.6246356964111328, "step": 1840 }, { "epoch": 0.3401739210772482, "grad_norm": 0.07809294760227203, "learning_rate": 1.963847527987436e-05, "loss": 0.669266939163208, "step": 1841 }, { "epoch": 0.3403586977861441, "grad_norm": 0.07507922500371933, "learning_rate": 1.9637943534138165e-05, "loss": 0.8690791726112366, "step": 1842 }, { "epoch": 0.3405434744950399, "grad_norm": 0.09490001946687698, "learning_rate": 1.963741140484197e-05, "loss": 0.8107730746269226, "step": 1843 }, { "epoch": 0.34072825120393574, "grad_norm": 0.08989793062210083, "learning_rate": 1.9636878892006953e-05, "loss": 0.9223145246505737, "step": 1844 }, { "epoch": 0.34091302791283157, "grad_norm": 0.07013858109712601, "learning_rate": 1.9636345995654307e-05, "loss": 0.5746279358863831, "step": 1845 }, { "epoch": 0.34109780462172745, "grad_norm": 0.07494598627090454, "learning_rate": 1.963581271580524e-05, "loss": 0.6848379373550415, "step": 1846 }, { "epoch": 0.3412825813306233, "grad_norm": 0.07319419831037521, "learning_rate": 1.963527905248097e-05, "loss": 0.7409180402755737, "step": 1847 }, { "epoch": 0.3414673580395191, "grad_norm": 0.08100725710391998, "learning_rate": 1.963474500570274e-05, "loss": 0.7143396139144897, "step": 1848 }, { "epoch": 0.34165213474841494, "grad_norm": 0.08466223627328873, "learning_rate": 1.9634210575491802e-05, "loss": 0.6691813468933105, "step": 1849 }, { "epoch": 0.3418369114573108, "grad_norm": 0.0932893231511116, "learning_rate": 1.9633675761869425e-05, "loss": 0.9059786200523376, "step": 1850 }, { "epoch": 0.34202168816620665, "grad_norm": 0.10014156252145767, "learning_rate": 1.963314056485689e-05, "loss": 0.982785165309906, "step": 1851 }, { "epoch": 0.3422064648751025, "grad_norm": 0.07843346893787384, "learning_rate": 1.96326049844755e-05, "loss": 0.7437697649002075, "step": 1852 }, { "epoch": 0.34239124158399836, "grad_norm": 0.08212711662054062, "learning_rate": 1.9632069020746574e-05, "loss": 0.5837621688842773, "step": 1853 }, { "epoch": 0.3425760182928942, "grad_norm": 0.07777490466833115, "learning_rate": 1.963153267369143e-05, "loss": 0.5319809317588806, "step": 1854 }, { "epoch": 0.34276079500179, "grad_norm": 0.06932126730680466, "learning_rate": 1.963099594333142e-05, "loss": 0.5694265365600586, "step": 1855 }, { "epoch": 0.34294557171068585, "grad_norm": 0.09377758204936981, "learning_rate": 1.96304588296879e-05, "loss": 0.7562369108200073, "step": 1856 }, { "epoch": 0.34313034841958173, "grad_norm": 0.07521593570709229, "learning_rate": 1.9629921332782254e-05, "loss": 0.7064640522003174, "step": 1857 }, { "epoch": 0.34331512512847756, "grad_norm": 0.0694519504904747, "learning_rate": 1.9629383452635863e-05, "loss": 0.5606677532196045, "step": 1858 }, { "epoch": 0.3434999018373734, "grad_norm": 0.06669106334447861, "learning_rate": 1.962884518927014e-05, "loss": 0.5357717275619507, "step": 1859 }, { "epoch": 0.3436846785462692, "grad_norm": 0.08788613229990005, "learning_rate": 1.96283065427065e-05, "loss": 0.734246551990509, "step": 1860 }, { "epoch": 0.3438694552551651, "grad_norm": 0.0990750715136528, "learning_rate": 1.9627767512966384e-05, "loss": 0.6421576142311096, "step": 1861 }, { "epoch": 0.34405423196406093, "grad_norm": 0.08061659336090088, "learning_rate": 1.9627228100071245e-05, "loss": 0.7515416741371155, "step": 1862 }, { "epoch": 0.34423900867295676, "grad_norm": 0.08036473393440247, "learning_rate": 1.9626688304042544e-05, "loss": 0.6768893599510193, "step": 1863 }, { "epoch": 0.34442378538185264, "grad_norm": 0.07160155475139618, "learning_rate": 1.9626148124901767e-05, "loss": 0.5494096279144287, "step": 1864 }, { "epoch": 0.34460856209074847, "grad_norm": 0.08275075256824493, "learning_rate": 1.9625607562670414e-05, "loss": 0.8985153436660767, "step": 1865 }, { "epoch": 0.3447933387996443, "grad_norm": 0.07015492767095566, "learning_rate": 1.962506661736999e-05, "loss": 0.6336041688919067, "step": 1866 }, { "epoch": 0.34497811550854013, "grad_norm": 0.08545436710119247, "learning_rate": 1.962452528902203e-05, "loss": 0.6721879243850708, "step": 1867 }, { "epoch": 0.345162892217436, "grad_norm": 0.08779406547546387, "learning_rate": 1.9623983577648075e-05, "loss": 0.8231399059295654, "step": 1868 }, { "epoch": 0.34534766892633184, "grad_norm": 0.09778361767530441, "learning_rate": 1.9623441483269682e-05, "loss": 0.924184262752533, "step": 1869 }, { "epoch": 0.34553244563522767, "grad_norm": 0.07136828452348709, "learning_rate": 1.9622899005908426e-05, "loss": 0.6420773863792419, "step": 1870 }, { "epoch": 0.3457172223441235, "grad_norm": 0.08129216730594635, "learning_rate": 1.9622356145585895e-05, "loss": 0.9055580496788025, "step": 1871 }, { "epoch": 0.3459019990530194, "grad_norm": 0.06902094185352325, "learning_rate": 1.96218129023237e-05, "loss": 0.624248743057251, "step": 1872 }, { "epoch": 0.3460867757619152, "grad_norm": 0.07671067863702774, "learning_rate": 1.9621269276143447e-05, "loss": 0.9902414083480835, "step": 1873 }, { "epoch": 0.34627155247081104, "grad_norm": 0.06990405917167664, "learning_rate": 1.962072526706678e-05, "loss": 0.6081162691116333, "step": 1874 }, { "epoch": 0.3464563291797069, "grad_norm": 0.06580513715744019, "learning_rate": 1.9620180875115346e-05, "loss": 0.5052412748336792, "step": 1875 }, { "epoch": 0.34664110588860275, "grad_norm": 0.07508678734302521, "learning_rate": 1.9619636100310815e-05, "loss": 0.573227047920227, "step": 1876 }, { "epoch": 0.3468258825974986, "grad_norm": 0.07413018494844437, "learning_rate": 1.961909094267486e-05, "loss": 0.7761869430541992, "step": 1877 }, { "epoch": 0.3470106593063944, "grad_norm": 0.05134841799736023, "learning_rate": 1.961854540222918e-05, "loss": 0.47000938653945923, "step": 1878 }, { "epoch": 0.3471954360152903, "grad_norm": 0.06442036479711533, "learning_rate": 1.9617999478995483e-05, "loss": 0.6043466329574585, "step": 1879 }, { "epoch": 0.3473802127241861, "grad_norm": 0.07378768920898438, "learning_rate": 1.9617453172995503e-05, "loss": 0.7331511378288269, "step": 1880 }, { "epoch": 0.34756498943308195, "grad_norm": 0.08647879958152771, "learning_rate": 1.9616906484250974e-05, "loss": 0.7170236706733704, "step": 1881 }, { "epoch": 0.3477497661419778, "grad_norm": 0.06821449846029282, "learning_rate": 1.9616359412783653e-05, "loss": 0.6115304231643677, "step": 1882 }, { "epoch": 0.34793454285087366, "grad_norm": 0.058674536645412445, "learning_rate": 1.9615811958615314e-05, "loss": 0.537799060344696, "step": 1883 }, { "epoch": 0.3481193195597695, "grad_norm": 0.07722529768943787, "learning_rate": 1.9615264121767742e-05, "loss": 0.44881102442741394, "step": 1884 }, { "epoch": 0.3483040962686653, "grad_norm": 0.07700357586145401, "learning_rate": 1.961471590226274e-05, "loss": 0.6720776557922363, "step": 1885 }, { "epoch": 0.3484888729775612, "grad_norm": 0.053852230310440063, "learning_rate": 1.9614167300122126e-05, "loss": 0.41168519854545593, "step": 1886 }, { "epoch": 0.34867364968645703, "grad_norm": 0.09329484403133392, "learning_rate": 1.9613618315367734e-05, "loss": 0.8707481622695923, "step": 1887 }, { "epoch": 0.34885842639535286, "grad_norm": 0.10070925951004028, "learning_rate": 1.961306894802141e-05, "loss": 0.7687546610832214, "step": 1888 }, { "epoch": 0.3490432031042487, "grad_norm": 0.08316970616579056, "learning_rate": 1.9612519198105015e-05, "loss": 0.6259077787399292, "step": 1889 }, { "epoch": 0.3492279798131446, "grad_norm": 0.09927278757095337, "learning_rate": 1.9611969065640432e-05, "loss": 0.9406327605247498, "step": 1890 }, { "epoch": 0.3494127565220404, "grad_norm": 0.08534473180770874, "learning_rate": 1.961141855064955e-05, "loss": 0.7000691294670105, "step": 1891 }, { "epoch": 0.34959753323093623, "grad_norm": 0.05822371318936348, "learning_rate": 1.961086765315428e-05, "loss": 0.42065221071243286, "step": 1892 }, { "epoch": 0.34978230993983206, "grad_norm": 0.07493947446346283, "learning_rate": 1.9610316373176548e-05, "loss": 0.6167550086975098, "step": 1893 }, { "epoch": 0.34996708664872794, "grad_norm": 0.08850574493408203, "learning_rate": 1.960976471073829e-05, "loss": 0.6938539147377014, "step": 1894 }, { "epoch": 0.35015186335762377, "grad_norm": 0.08702442049980164, "learning_rate": 1.960921266586146e-05, "loss": 0.6874613165855408, "step": 1895 }, { "epoch": 0.3503366400665196, "grad_norm": 0.07820771634578705, "learning_rate": 1.9608660238568034e-05, "loss": 0.634290337562561, "step": 1896 }, { "epoch": 0.3505214167754155, "grad_norm": 0.0897473394870758, "learning_rate": 1.9608107428879987e-05, "loss": 0.7815918326377869, "step": 1897 }, { "epoch": 0.3507061934843113, "grad_norm": 0.08086293190717697, "learning_rate": 1.9607554236819325e-05, "loss": 0.6558753848075867, "step": 1898 }, { "epoch": 0.35089097019320714, "grad_norm": 0.07397107779979706, "learning_rate": 1.9607000662408066e-05, "loss": 0.7135198712348938, "step": 1899 }, { "epoch": 0.35107574690210297, "grad_norm": 0.08879372477531433, "learning_rate": 1.9606446705668236e-05, "loss": 0.9617974758148193, "step": 1900 }, { "epoch": 0.35126052361099885, "grad_norm": 0.08342552185058594, "learning_rate": 1.960589236662188e-05, "loss": 0.6310023665428162, "step": 1901 }, { "epoch": 0.3514453003198947, "grad_norm": 0.08977493643760681, "learning_rate": 1.9605337645291063e-05, "loss": 0.8947171568870544, "step": 1902 }, { "epoch": 0.3516300770287905, "grad_norm": 0.06647540628910065, "learning_rate": 1.9604782541697858e-05, "loss": 0.5766458511352539, "step": 1903 }, { "epoch": 0.35181485373768634, "grad_norm": 0.09166788309812546, "learning_rate": 1.9604227055864355e-05, "loss": 0.995534360408783, "step": 1904 }, { "epoch": 0.3519996304465822, "grad_norm": 0.05475666746497154, "learning_rate": 1.9603671187812664e-05, "loss": 0.44240716099739075, "step": 1905 }, { "epoch": 0.35218440715547805, "grad_norm": 0.06649932265281677, "learning_rate": 1.960311493756491e-05, "loss": 0.5629976987838745, "step": 1906 }, { "epoch": 0.3523691838643739, "grad_norm": 0.09560313820838928, "learning_rate": 1.960255830514322e-05, "loss": 0.8332951664924622, "step": 1907 }, { "epoch": 0.35255396057326976, "grad_norm": 0.06212356314063072, "learning_rate": 1.9602001290569756e-05, "loss": 0.6503223180770874, "step": 1908 }, { "epoch": 0.3527387372821656, "grad_norm": 0.0768064633011818, "learning_rate": 1.9601443893866682e-05, "loss": 0.682904839515686, "step": 1909 }, { "epoch": 0.3529235139910614, "grad_norm": 0.08543505519628525, "learning_rate": 1.9600886115056177e-05, "loss": 0.6108998656272888, "step": 1910 }, { "epoch": 0.35310829069995725, "grad_norm": 0.11749269813299179, "learning_rate": 1.9600327954160443e-05, "loss": 1.2383038997650146, "step": 1911 }, { "epoch": 0.35329306740885313, "grad_norm": 0.08560445159673691, "learning_rate": 1.9599769411201692e-05, "loss": 0.7582423090934753, "step": 1912 }, { "epoch": 0.35347784411774896, "grad_norm": 0.06951037049293518, "learning_rate": 1.959921048620215e-05, "loss": 0.5835634469985962, "step": 1913 }, { "epoch": 0.3536626208266448, "grad_norm": 0.08175399899482727, "learning_rate": 1.9598651179184065e-05, "loss": 0.6157529950141907, "step": 1914 }, { "epoch": 0.3538473975355406, "grad_norm": 0.09142636507749557, "learning_rate": 1.9598091490169696e-05, "loss": 0.8940083980560303, "step": 1915 }, { "epoch": 0.3540321742444365, "grad_norm": 0.08478675037622452, "learning_rate": 1.959753141918131e-05, "loss": 0.790504515171051, "step": 1916 }, { "epoch": 0.35421695095333233, "grad_norm": 0.0646057203412056, "learning_rate": 1.9596970966241203e-05, "loss": 0.5042975544929504, "step": 1917 }, { "epoch": 0.35440172766222816, "grad_norm": 0.08247191458940506, "learning_rate": 1.9596410131371674e-05, "loss": 0.6063563823699951, "step": 1918 }, { "epoch": 0.35458650437112404, "grad_norm": 0.08805165439844131, "learning_rate": 1.9595848914595047e-05, "loss": 0.7084945440292358, "step": 1919 }, { "epoch": 0.3547712810800199, "grad_norm": 0.06988709419965744, "learning_rate": 1.9595287315933653e-05, "loss": 0.5524942874908447, "step": 1920 }, { "epoch": 0.3549560577889157, "grad_norm": 0.06375003606081009, "learning_rate": 1.9594725335409847e-05, "loss": 0.4416433870792389, "step": 1921 }, { "epoch": 0.35514083449781153, "grad_norm": 0.1005588248372078, "learning_rate": 1.959416297304599e-05, "loss": 0.8840219378471375, "step": 1922 }, { "epoch": 0.3553256112067074, "grad_norm": 0.07297534495592117, "learning_rate": 1.959360022886446e-05, "loss": 0.6581389307975769, "step": 1923 }, { "epoch": 0.35551038791560324, "grad_norm": 0.09779398888349533, "learning_rate": 1.9593037102887657e-05, "loss": 0.8495759963989258, "step": 1924 }, { "epoch": 0.35569516462449907, "grad_norm": 0.09082182496786118, "learning_rate": 1.959247359513799e-05, "loss": 0.8239253759384155, "step": 1925 }, { "epoch": 0.3558799413333949, "grad_norm": 0.07424180954694748, "learning_rate": 1.9591909705637886e-05, "loss": 0.5720775127410889, "step": 1926 }, { "epoch": 0.3560647180422908, "grad_norm": 0.0850430577993393, "learning_rate": 1.9591345434409785e-05, "loss": 0.6905296444892883, "step": 1927 }, { "epoch": 0.3562494947511866, "grad_norm": 0.07360535860061646, "learning_rate": 1.959078078147614e-05, "loss": 0.594716489315033, "step": 1928 }, { "epoch": 0.35643427146008244, "grad_norm": 0.0795220360159874, "learning_rate": 1.9590215746859428e-05, "loss": 0.656869649887085, "step": 1929 }, { "epoch": 0.3566190481689783, "grad_norm": 0.07492407411336899, "learning_rate": 1.9589650330582133e-05, "loss": 0.5760904550552368, "step": 1930 }, { "epoch": 0.35680382487787415, "grad_norm": 0.08463925123214722, "learning_rate": 1.9589084532666757e-05, "loss": 0.7245805263519287, "step": 1931 }, { "epoch": 0.35698860158677, "grad_norm": 0.07536065578460693, "learning_rate": 1.9588518353135818e-05, "loss": 0.623936116695404, "step": 1932 }, { "epoch": 0.3571733782956658, "grad_norm": 0.07619050145149231, "learning_rate": 1.9587951792011844e-05, "loss": 0.6012312769889832, "step": 1933 }, { "epoch": 0.3573581550045617, "grad_norm": 0.07146383076906204, "learning_rate": 1.958738484931739e-05, "loss": 0.6659769415855408, "step": 1934 }, { "epoch": 0.3575429317134575, "grad_norm": 0.07134726643562317, "learning_rate": 1.958681752507501e-05, "loss": 0.6113250851631165, "step": 1935 }, { "epoch": 0.35772770842235335, "grad_norm": 0.08301021158695221, "learning_rate": 1.958624981930729e-05, "loss": 0.7705444693565369, "step": 1936 }, { "epoch": 0.3579124851312492, "grad_norm": 0.08296855539083481, "learning_rate": 1.9585681732036813e-05, "loss": 0.9636433720588684, "step": 1937 }, { "epoch": 0.35809726184014506, "grad_norm": 0.06294649094343185, "learning_rate": 1.9585113263286197e-05, "loss": 0.46373164653778076, "step": 1938 }, { "epoch": 0.3582820385490409, "grad_norm": 0.06871624290943146, "learning_rate": 1.9584544413078062e-05, "loss": 0.5198218822479248, "step": 1939 }, { "epoch": 0.3584668152579367, "grad_norm": 0.08555329591035843, "learning_rate": 1.9583975181435043e-05, "loss": 0.8196623921394348, "step": 1940 }, { "epoch": 0.3586515919668326, "grad_norm": 0.08057794719934464, "learning_rate": 1.9583405568379795e-05, "loss": 0.6855925917625427, "step": 1941 }, { "epoch": 0.35883636867572843, "grad_norm": 0.06789886951446533, "learning_rate": 1.9582835573934994e-05, "loss": 0.6530704498291016, "step": 1942 }, { "epoch": 0.35902114538462426, "grad_norm": 0.08298030495643616, "learning_rate": 1.9582265198123312e-05, "loss": 0.8214961290359497, "step": 1943 }, { "epoch": 0.3592059220935201, "grad_norm": 0.058385469019412994, "learning_rate": 1.9581694440967456e-05, "loss": 0.4939122796058655, "step": 1944 }, { "epoch": 0.359390698802416, "grad_norm": 0.0891612321138382, "learning_rate": 1.958112330249014e-05, "loss": 0.7617546916007996, "step": 1945 }, { "epoch": 0.3595754755113118, "grad_norm": 0.07534582912921906, "learning_rate": 1.958055178271409e-05, "loss": 0.48294517397880554, "step": 1946 }, { "epoch": 0.35976025222020763, "grad_norm": 0.08808505535125732, "learning_rate": 1.957997988166205e-05, "loss": 0.7680104374885559, "step": 1947 }, { "epoch": 0.35994502892910346, "grad_norm": 0.07987001538276672, "learning_rate": 1.9579407599356787e-05, "loss": 0.6420309543609619, "step": 1948 }, { "epoch": 0.36012980563799935, "grad_norm": 0.07023387402296066, "learning_rate": 1.957883493582107e-05, "loss": 0.5578839182853699, "step": 1949 }, { "epoch": 0.3603145823468952, "grad_norm": 0.08079687505960464, "learning_rate": 1.9578261891077693e-05, "loss": 0.8681395053863525, "step": 1950 }, { "epoch": 0.360499359055791, "grad_norm": 0.05796205252408981, "learning_rate": 1.957768846514946e-05, "loss": 0.4505980610847473, "step": 1951 }, { "epoch": 0.3606841357646869, "grad_norm": 0.07316447049379349, "learning_rate": 1.9577114658059186e-05, "loss": 0.6213290095329285, "step": 1952 }, { "epoch": 0.3608689124735827, "grad_norm": 0.0825081542134285, "learning_rate": 1.9576540469829715e-05, "loss": 0.7580359578132629, "step": 1953 }, { "epoch": 0.36105368918247854, "grad_norm": 0.06691969186067581, "learning_rate": 1.9575965900483895e-05, "loss": 0.6928013563156128, "step": 1954 }, { "epoch": 0.36123846589137437, "grad_norm": 0.10172438621520996, "learning_rate": 1.957539095004459e-05, "loss": 0.841858983039856, "step": 1955 }, { "epoch": 0.36142324260027026, "grad_norm": 0.07854878902435303, "learning_rate": 1.9574815618534682e-05, "loss": 0.5641253590583801, "step": 1956 }, { "epoch": 0.3616080193091661, "grad_norm": 0.09107008576393127, "learning_rate": 1.9574239905977072e-05, "loss": 0.8147022724151611, "step": 1957 }, { "epoch": 0.3617927960180619, "grad_norm": 0.08986808359622955, "learning_rate": 1.9573663812394664e-05, "loss": 1.079319953918457, "step": 1958 }, { "epoch": 0.36197757272695774, "grad_norm": 0.08058770000934601, "learning_rate": 1.9573087337810385e-05, "loss": 0.7533799409866333, "step": 1959 }, { "epoch": 0.3621623494358536, "grad_norm": 0.06563723832368851, "learning_rate": 1.9572510482247187e-05, "loss": 0.5782539248466492, "step": 1960 }, { "epoch": 0.36234712614474945, "grad_norm": 0.06482648104429245, "learning_rate": 1.9571933245728017e-05, "loss": 0.6582553386688232, "step": 1961 }, { "epoch": 0.3625319028536453, "grad_norm": 0.08319249749183655, "learning_rate": 1.957135562827585e-05, "loss": 0.6696375012397766, "step": 1962 }, { "epoch": 0.36271667956254117, "grad_norm": 0.0764584019780159, "learning_rate": 1.9570777629913676e-05, "loss": 0.6904196739196777, "step": 1963 }, { "epoch": 0.362901456271437, "grad_norm": 0.08399965614080429, "learning_rate": 1.9570199250664498e-05, "loss": 0.7262436151504517, "step": 1964 }, { "epoch": 0.3630862329803328, "grad_norm": 0.07018699496984482, "learning_rate": 1.9569620490551323e-05, "loss": 0.6780123114585876, "step": 1965 }, { "epoch": 0.36327100968922865, "grad_norm": 0.08127652108669281, "learning_rate": 1.95690413495972e-05, "loss": 0.7128887176513672, "step": 1966 }, { "epoch": 0.36345578639812454, "grad_norm": 0.07078352570533752, "learning_rate": 1.9568461827825165e-05, "loss": 0.8290748596191406, "step": 1967 }, { "epoch": 0.36364056310702036, "grad_norm": 0.07198803126811981, "learning_rate": 1.9567881925258287e-05, "loss": 0.6036486029624939, "step": 1968 }, { "epoch": 0.3638253398159162, "grad_norm": 0.084732785820961, "learning_rate": 1.956730164191964e-05, "loss": 0.9472017288208008, "step": 1969 }, { "epoch": 0.364010116524812, "grad_norm": 0.07745788991451263, "learning_rate": 1.9566720977832322e-05, "loss": 0.6785796284675598, "step": 1970 }, { "epoch": 0.3641948932337079, "grad_norm": 0.0648859366774559, "learning_rate": 1.9566139933019438e-05, "loss": 0.9074662327766418, "step": 1971 }, { "epoch": 0.36437966994260373, "grad_norm": 0.07930831611156464, "learning_rate": 1.9565558507504113e-05, "loss": 0.6451144814491272, "step": 1972 }, { "epoch": 0.36456444665149956, "grad_norm": 0.08495119959115982, "learning_rate": 1.9564976701309488e-05, "loss": 0.7120051383972168, "step": 1973 }, { "epoch": 0.36474922336039545, "grad_norm": 0.0755208432674408, "learning_rate": 1.9564394514458717e-05, "loss": 0.6715715527534485, "step": 1974 }, { "epoch": 0.3649340000692913, "grad_norm": 0.07266179472208023, "learning_rate": 1.9563811946974965e-05, "loss": 0.6780380010604858, "step": 1975 }, { "epoch": 0.3651187767781871, "grad_norm": 0.09038018435239792, "learning_rate": 1.9563228998881417e-05, "loss": 0.8212044835090637, "step": 1976 }, { "epoch": 0.36530355348708293, "grad_norm": 0.0702347606420517, "learning_rate": 1.9562645670201278e-05, "loss": 0.6917005181312561, "step": 1977 }, { "epoch": 0.3654883301959788, "grad_norm": 0.08551733940839767, "learning_rate": 1.9562061960957757e-05, "loss": 0.889920175075531, "step": 1978 }, { "epoch": 0.36567310690487465, "grad_norm": 0.07262120395898819, "learning_rate": 1.9561477871174084e-05, "loss": 0.6123881340026855, "step": 1979 }, { "epoch": 0.3658578836137705, "grad_norm": 0.09774454683065414, "learning_rate": 1.956089340087351e-05, "loss": 0.9917032718658447, "step": 1980 }, { "epoch": 0.3660426603226663, "grad_norm": 0.07489411532878876, "learning_rate": 1.9560308550079288e-05, "loss": 0.6934548020362854, "step": 1981 }, { "epoch": 0.3662274370315622, "grad_norm": 0.08260970562696457, "learning_rate": 1.9559723318814695e-05, "loss": 0.6225256323814392, "step": 1982 }, { "epoch": 0.366412213740458, "grad_norm": 0.0768555998802185, "learning_rate": 1.9559137707103025e-05, "loss": 0.6416477560997009, "step": 1983 }, { "epoch": 0.36659699044935384, "grad_norm": 0.06510823965072632, "learning_rate": 1.955855171496758e-05, "loss": 0.6328122019767761, "step": 1984 }, { "epoch": 0.3667817671582497, "grad_norm": 0.08435893803834915, "learning_rate": 1.9557965342431682e-05, "loss": 0.9326714873313904, "step": 1985 }, { "epoch": 0.36696654386714556, "grad_norm": 0.0724826380610466, "learning_rate": 1.9557378589518665e-05, "loss": 0.6915863156318665, "step": 1986 }, { "epoch": 0.3671513205760414, "grad_norm": 0.08972358703613281, "learning_rate": 1.9556791456251886e-05, "loss": 0.7526139616966248, "step": 1987 }, { "epoch": 0.3673360972849372, "grad_norm": 0.09427791088819504, "learning_rate": 1.95562039426547e-05, "loss": 0.753598153591156, "step": 1988 }, { "epoch": 0.3675208739938331, "grad_norm": 0.0812675729393959, "learning_rate": 1.9555616048750497e-05, "loss": 0.7490280270576477, "step": 1989 }, { "epoch": 0.3677056507027289, "grad_norm": 0.06773082166910172, "learning_rate": 1.955502777456267e-05, "loss": 0.6412544846534729, "step": 1990 }, { "epoch": 0.36789042741162475, "grad_norm": 0.09109006822109222, "learning_rate": 1.9554439120114636e-05, "loss": 0.9695414900779724, "step": 1991 }, { "epoch": 0.3680752041205206, "grad_norm": 0.08510816097259521, "learning_rate": 1.9553850085429814e-05, "loss": 0.7511518597602844, "step": 1992 }, { "epoch": 0.36825998082941647, "grad_norm": 0.08156833797693253, "learning_rate": 1.955326067053165e-05, "loss": 0.6092409491539001, "step": 1993 }, { "epoch": 0.3684447575383123, "grad_norm": 0.0850474014878273, "learning_rate": 1.9552670875443596e-05, "loss": 0.9125552773475647, "step": 1994 }, { "epoch": 0.3686295342472081, "grad_norm": 0.07290153205394745, "learning_rate": 1.9552080700189127e-05, "loss": 0.6870401501655579, "step": 1995 }, { "epoch": 0.368814310956104, "grad_norm": 0.08091038465499878, "learning_rate": 1.9551490144791738e-05, "loss": 0.7886521220207214, "step": 1996 }, { "epoch": 0.36899908766499984, "grad_norm": 0.07019095867872238, "learning_rate": 1.955089920927492e-05, "loss": 0.9075028300285339, "step": 1997 }, { "epoch": 0.36918386437389566, "grad_norm": 0.07463059574365616, "learning_rate": 1.955030789366219e-05, "loss": 0.7937076687812805, "step": 1998 }, { "epoch": 0.3693686410827915, "grad_norm": 0.07808171212673187, "learning_rate": 1.954971619797709e-05, "loss": 0.7428622841835022, "step": 1999 }, { "epoch": 0.3695534177916874, "grad_norm": 0.0843687504529953, "learning_rate": 1.9549124122243163e-05, "loss": 0.6670342683792114, "step": 2000 }, { "epoch": 0.3695534177916874, "eval_loss": 0.7397631406784058, "eval_runtime": 158.3211, "eval_samples_per_second": 115.139, "eval_steps_per_second": 14.395, "step": 2000 }, { "epoch": 0.3697381945005832, "grad_norm": 0.06598306447267532, "learning_rate": 1.954853166648397e-05, "loss": 0.5824833512306213, "step": 2001 }, { "epoch": 0.36992297120947903, "grad_norm": 0.09141170978546143, "learning_rate": 1.9547938830723088e-05, "loss": 0.7796555757522583, "step": 2002 }, { "epoch": 0.37010774791837486, "grad_norm": 0.06458456814289093, "learning_rate": 1.9547345614984116e-05, "loss": 0.817378580570221, "step": 2003 }, { "epoch": 0.37029252462727075, "grad_norm": 0.09466605633497238, "learning_rate": 1.9546752019290656e-05, "loss": 0.9092118740081787, "step": 2004 }, { "epoch": 0.3704773013361666, "grad_norm": 0.07988221198320389, "learning_rate": 1.9546158043666335e-05, "loss": 0.7625096440315247, "step": 2005 }, { "epoch": 0.3706620780450624, "grad_norm": 0.06539753079414368, "learning_rate": 1.9545563688134788e-05, "loss": 0.508465588092804, "step": 2006 }, { "epoch": 0.3708468547539583, "grad_norm": 0.10145644098520279, "learning_rate": 1.9544968952719673e-05, "loss": 0.9701846837997437, "step": 2007 }, { "epoch": 0.3710316314628541, "grad_norm": 0.06246112659573555, "learning_rate": 1.954437383744465e-05, "loss": 0.6103070974349976, "step": 2008 }, { "epoch": 0.37121640817174995, "grad_norm": 0.08505327999591827, "learning_rate": 1.9543778342333415e-05, "loss": 0.8365639448165894, "step": 2009 }, { "epoch": 0.3714011848806458, "grad_norm": 0.09498727321624756, "learning_rate": 1.9543182467409657e-05, "loss": 0.8169976472854614, "step": 2010 }, { "epoch": 0.37158596158954166, "grad_norm": 0.07790561765432358, "learning_rate": 1.9542586212697098e-05, "loss": 0.6823660135269165, "step": 2011 }, { "epoch": 0.3717707382984375, "grad_norm": 0.061434078961610794, "learning_rate": 1.954198957821946e-05, "loss": 0.592761218547821, "step": 2012 }, { "epoch": 0.3719555150073333, "grad_norm": 0.07613083720207214, "learning_rate": 1.954139256400049e-05, "loss": 0.6575087308883667, "step": 2013 }, { "epoch": 0.37214029171622914, "grad_norm": 0.07182037830352783, "learning_rate": 1.954079517006395e-05, "loss": 0.6541277170181274, "step": 2014 }, { "epoch": 0.372325068425125, "grad_norm": 0.08298812061548233, "learning_rate": 1.9540197396433606e-05, "loss": 0.6347714066505432, "step": 2015 }, { "epoch": 0.37250984513402086, "grad_norm": 0.08322648704051971, "learning_rate": 1.9539599243133254e-05, "loss": 0.6312686204910278, "step": 2016 }, { "epoch": 0.3726946218429167, "grad_norm": 0.07473642379045486, "learning_rate": 1.95390007101867e-05, "loss": 0.5637624859809875, "step": 2017 }, { "epoch": 0.37287939855181257, "grad_norm": 0.08620911091566086, "learning_rate": 1.9538401797617762e-05, "loss": 0.8354276418685913, "step": 2018 }, { "epoch": 0.3730641752607084, "grad_norm": 0.10492628812789917, "learning_rate": 1.9537802505450272e-05, "loss": 0.8417186141014099, "step": 2019 }, { "epoch": 0.3732489519696042, "grad_norm": 0.07237657159566879, "learning_rate": 1.9537202833708084e-05, "loss": 0.5563523173332214, "step": 2020 }, { "epoch": 0.37343372867850005, "grad_norm": 0.07768519967794418, "learning_rate": 1.953660278241506e-05, "loss": 0.7059952616691589, "step": 2021 }, { "epoch": 0.37361850538739594, "grad_norm": 0.09444581717252731, "learning_rate": 1.9536002351595082e-05, "loss": 0.8551285266876221, "step": 2022 }, { "epoch": 0.37380328209629177, "grad_norm": 0.1082654669880867, "learning_rate": 1.9535401541272046e-05, "loss": 0.7788101434707642, "step": 2023 }, { "epoch": 0.3739880588051876, "grad_norm": 0.07339019328355789, "learning_rate": 1.9534800351469862e-05, "loss": 0.7254128456115723, "step": 2024 }, { "epoch": 0.3741728355140834, "grad_norm": 0.08173136413097382, "learning_rate": 1.953419878221245e-05, "loss": 0.6819193363189697, "step": 2025 }, { "epoch": 0.3743576122229793, "grad_norm": 0.0670306533575058, "learning_rate": 1.953359683352376e-05, "loss": 0.48552295565605164, "step": 2026 }, { "epoch": 0.37454238893187514, "grad_norm": 0.08190646767616272, "learning_rate": 1.953299450542774e-05, "loss": 0.8560343384742737, "step": 2027 }, { "epoch": 0.37472716564077097, "grad_norm": 0.05877630412578583, "learning_rate": 1.9532391797948365e-05, "loss": 0.4112713634967804, "step": 2028 }, { "epoch": 0.37491194234966685, "grad_norm": 0.09025271981954575, "learning_rate": 1.9531788711109616e-05, "loss": 0.7425708770751953, "step": 2029 }, { "epoch": 0.3750967190585627, "grad_norm": 0.08883294463157654, "learning_rate": 1.95311852449355e-05, "loss": 0.7375306487083435, "step": 2030 }, { "epoch": 0.3752814957674585, "grad_norm": 0.08991797268390656, "learning_rate": 1.9530581399450032e-05, "loss": 0.7483659386634827, "step": 2031 }, { "epoch": 0.37546627247635433, "grad_norm": 0.07757057994604111, "learning_rate": 1.952997717467724e-05, "loss": 0.6233549118041992, "step": 2032 }, { "epoch": 0.3756510491852502, "grad_norm": 0.07741144299507141, "learning_rate": 1.9529372570641173e-05, "loss": 0.5848559141159058, "step": 2033 }, { "epoch": 0.37583582589414605, "grad_norm": 0.07271159440279007, "learning_rate": 1.952876758736589e-05, "loss": 0.6167519092559814, "step": 2034 }, { "epoch": 0.3760206026030419, "grad_norm": 0.07572707533836365, "learning_rate": 1.952816222487547e-05, "loss": 0.5975688099861145, "step": 2035 }, { "epoch": 0.3762053793119377, "grad_norm": 0.09476777911186218, "learning_rate": 1.9527556483194003e-05, "loss": 0.9203355312347412, "step": 2036 }, { "epoch": 0.3763901560208336, "grad_norm": 0.07740332186222076, "learning_rate": 1.9526950362345595e-05, "loss": 0.6362847685813904, "step": 2037 }, { "epoch": 0.3765749327297294, "grad_norm": 0.08949487656354904, "learning_rate": 1.9526343862354368e-05, "loss": 0.8080466985702515, "step": 2038 }, { "epoch": 0.37675970943862525, "grad_norm": 0.0746837854385376, "learning_rate": 1.9525736983244458e-05, "loss": 0.6293449401855469, "step": 2039 }, { "epoch": 0.37694448614752113, "grad_norm": 0.07914283871650696, "learning_rate": 1.9525129725040023e-05, "loss": 0.8751811981201172, "step": 2040 }, { "epoch": 0.37712926285641696, "grad_norm": 0.07535235583782196, "learning_rate": 1.952452208776522e-05, "loss": 0.5902524590492249, "step": 2041 }, { "epoch": 0.3773140395653128, "grad_norm": 0.08348150551319122, "learning_rate": 1.952391407144424e-05, "loss": 0.6699376106262207, "step": 2042 }, { "epoch": 0.3774988162742086, "grad_norm": 0.0957750678062439, "learning_rate": 1.9523305676101275e-05, "loss": 0.8382880687713623, "step": 2043 }, { "epoch": 0.3776835929831045, "grad_norm": 0.07746924459934235, "learning_rate": 1.952269690176054e-05, "loss": 0.5988008379936218, "step": 2044 }, { "epoch": 0.3778683696920003, "grad_norm": 0.06604867428541183, "learning_rate": 1.9522087748446263e-05, "loss": 0.5286591649055481, "step": 2045 }, { "epoch": 0.37805314640089616, "grad_norm": 0.07804200798273087, "learning_rate": 1.952147821618268e-05, "loss": 0.5273786783218384, "step": 2046 }, { "epoch": 0.378237923109792, "grad_norm": 0.07519040256738663, "learning_rate": 1.9520868304994054e-05, "loss": 0.5678566098213196, "step": 2047 }, { "epoch": 0.37842269981868787, "grad_norm": 0.07195018231868744, "learning_rate": 1.9520258014904655e-05, "loss": 0.6135433912277222, "step": 2048 }, { "epoch": 0.3786074765275837, "grad_norm": 0.06383226066827774, "learning_rate": 1.9519647345938776e-05, "loss": 0.5484453439712524, "step": 2049 }, { "epoch": 0.3787922532364795, "grad_norm": 0.07442542165517807, "learning_rate": 1.9519036298120712e-05, "loss": 0.7261379957199097, "step": 2050 }, { "epoch": 0.3789770299453754, "grad_norm": 0.07282903045415878, "learning_rate": 1.9518424871474786e-05, "loss": 0.6312949657440186, "step": 2051 }, { "epoch": 0.37916180665427124, "grad_norm": 0.08609618246555328, "learning_rate": 1.951781306602533e-05, "loss": 0.7831339240074158, "step": 2052 }, { "epoch": 0.37934658336316707, "grad_norm": 0.09370411932468414, "learning_rate": 1.951720088179669e-05, "loss": 0.8748478293418884, "step": 2053 }, { "epoch": 0.3795313600720629, "grad_norm": 0.07409726083278656, "learning_rate": 1.9516588318813233e-05, "loss": 0.7795743942260742, "step": 2054 }, { "epoch": 0.3797161367809588, "grad_norm": 0.08550463616847992, "learning_rate": 1.951597537709933e-05, "loss": 0.6052160859107971, "step": 2055 }, { "epoch": 0.3799009134898546, "grad_norm": 0.08676903694868088, "learning_rate": 1.9515362056679385e-05, "loss": 0.7950112819671631, "step": 2056 }, { "epoch": 0.38008569019875044, "grad_norm": 0.07116511464118958, "learning_rate": 1.9514748357577797e-05, "loss": 0.5086311101913452, "step": 2057 }, { "epoch": 0.38027046690764627, "grad_norm": 0.08856448531150818, "learning_rate": 1.951413427981899e-05, "loss": 0.7990341186523438, "step": 2058 }, { "epoch": 0.38045524361654215, "grad_norm": 0.08384235948324203, "learning_rate": 1.951351982342741e-05, "loss": 0.7706287503242493, "step": 2059 }, { "epoch": 0.380640020325438, "grad_norm": 0.0836147740483284, "learning_rate": 1.9512904988427498e-05, "loss": 0.7029248476028442, "step": 2060 }, { "epoch": 0.3808247970343338, "grad_norm": 0.08189492672681808, "learning_rate": 1.9512289774843737e-05, "loss": 0.7562029361724854, "step": 2061 }, { "epoch": 0.3810095737432297, "grad_norm": 0.08952762931585312, "learning_rate": 1.9511674182700596e-05, "loss": 0.5929439663887024, "step": 2062 }, { "epoch": 0.3811943504521255, "grad_norm": 0.06707214564085007, "learning_rate": 1.9511058212022584e-05, "loss": 0.5128438472747803, "step": 2063 }, { "epoch": 0.38137912716102135, "grad_norm": 0.08093805611133575, "learning_rate": 1.9510441862834212e-05, "loss": 0.6935490369796753, "step": 2064 }, { "epoch": 0.3815639038699172, "grad_norm": 0.08729652315378189, "learning_rate": 1.9509825135160006e-05, "loss": 0.8165642619132996, "step": 2065 }, { "epoch": 0.38174868057881306, "grad_norm": 0.08318503201007843, "learning_rate": 1.9509208029024514e-05, "loss": 0.7330926060676575, "step": 2066 }, { "epoch": 0.3819334572877089, "grad_norm": 0.08431824296712875, "learning_rate": 1.950859054445229e-05, "loss": 0.7111803293228149, "step": 2067 }, { "epoch": 0.3821182339966047, "grad_norm": 0.07840250432491302, "learning_rate": 1.9507972681467913e-05, "loss": 0.5990965962409973, "step": 2068 }, { "epoch": 0.38230301070550055, "grad_norm": 0.08080720901489258, "learning_rate": 1.950735444009597e-05, "loss": 0.7232369780540466, "step": 2069 }, { "epoch": 0.38248778741439643, "grad_norm": 0.07445263117551804, "learning_rate": 1.9506735820361065e-05, "loss": 0.7037575840950012, "step": 2070 }, { "epoch": 0.38267256412329226, "grad_norm": 0.07133829593658447, "learning_rate": 1.9506116822287818e-05, "loss": 0.7455885410308838, "step": 2071 }, { "epoch": 0.3828573408321881, "grad_norm": 0.07049156725406647, "learning_rate": 1.9505497445900864e-05, "loss": 0.5760311484336853, "step": 2072 }, { "epoch": 0.38304211754108397, "grad_norm": 0.06675473600625992, "learning_rate": 1.950487769122485e-05, "loss": 0.5767952799797058, "step": 2073 }, { "epoch": 0.3832268942499798, "grad_norm": 0.09521736949682236, "learning_rate": 1.9504257558284435e-05, "loss": 0.8993252515792847, "step": 2074 }, { "epoch": 0.38341167095887563, "grad_norm": 0.078230120241642, "learning_rate": 1.950363704710431e-05, "loss": 0.8286687135696411, "step": 2075 }, { "epoch": 0.38359644766777146, "grad_norm": 0.0930788516998291, "learning_rate": 1.9503016157709163e-05, "loss": 0.9682884812355042, "step": 2076 }, { "epoch": 0.38378122437666734, "grad_norm": 0.07385998964309692, "learning_rate": 1.95023948901237e-05, "loss": 0.7313173413276672, "step": 2077 }, { "epoch": 0.38396600108556317, "grad_norm": 0.07340817898511887, "learning_rate": 1.9501773244372654e-05, "loss": 0.6114804744720459, "step": 2078 }, { "epoch": 0.384150777794459, "grad_norm": 0.0526450015604496, "learning_rate": 1.9501151220480757e-05, "loss": 0.4882235825061798, "step": 2079 }, { "epoch": 0.3843355545033548, "grad_norm": 0.0750875324010849, "learning_rate": 1.950052881847277e-05, "loss": 0.64805668592453, "step": 2080 }, { "epoch": 0.3845203312122507, "grad_norm": 0.08840855956077576, "learning_rate": 1.9499906038373458e-05, "loss": 0.7312134504318237, "step": 2081 }, { "epoch": 0.38470510792114654, "grad_norm": 0.08124563843011856, "learning_rate": 1.9499282880207605e-05, "loss": 0.6080642342567444, "step": 2082 }, { "epoch": 0.38488988463004237, "grad_norm": 0.0850062444806099, "learning_rate": 1.9498659344000014e-05, "loss": 0.7977728247642517, "step": 2083 }, { "epoch": 0.38507466133893825, "grad_norm": 0.09203612804412842, "learning_rate": 1.94980354297755e-05, "loss": 0.9024616479873657, "step": 2084 }, { "epoch": 0.3852594380478341, "grad_norm": 0.08661805838346481, "learning_rate": 1.9497411137558887e-05, "loss": 0.7511764764785767, "step": 2085 }, { "epoch": 0.3854442147567299, "grad_norm": 0.08258519321680069, "learning_rate": 1.9496786467375028e-05, "loss": 0.6556458473205566, "step": 2086 }, { "epoch": 0.38562899146562574, "grad_norm": 0.0830867737531662, "learning_rate": 1.949616141924878e-05, "loss": 0.6187492609024048, "step": 2087 }, { "epoch": 0.3858137681745216, "grad_norm": 0.057450730353593826, "learning_rate": 1.9495535993205015e-05, "loss": 0.5264581441879272, "step": 2088 }, { "epoch": 0.38599854488341745, "grad_norm": 0.07087472826242447, "learning_rate": 1.9494910189268627e-05, "loss": 0.5897881984710693, "step": 2089 }, { "epoch": 0.3861833215923133, "grad_norm": 0.074063740670681, "learning_rate": 1.949428400746452e-05, "loss": 0.41536253690719604, "step": 2090 }, { "epoch": 0.3863680983012091, "grad_norm": 0.0563860647380352, "learning_rate": 1.949365744781761e-05, "loss": 0.40170884132385254, "step": 2091 }, { "epoch": 0.386552875010105, "grad_norm": 0.06514260172843933, "learning_rate": 1.9493030510352838e-05, "loss": 0.49167242646217346, "step": 2092 }, { "epoch": 0.3867376517190008, "grad_norm": 0.07091984152793884, "learning_rate": 1.9492403195095152e-05, "loss": 0.615524411201477, "step": 2093 }, { "epoch": 0.38692242842789665, "grad_norm": 0.07864012569189072, "learning_rate": 1.9491775502069513e-05, "loss": 0.6243424415588379, "step": 2094 }, { "epoch": 0.38710720513679253, "grad_norm": 0.05738931894302368, "learning_rate": 1.949114743130091e-05, "loss": 0.42757418751716614, "step": 2095 }, { "epoch": 0.38729198184568836, "grad_norm": 0.0666792094707489, "learning_rate": 1.949051898281433e-05, "loss": 0.7218016386032104, "step": 2096 }, { "epoch": 0.3874767585545842, "grad_norm": 0.07960478216409683, "learning_rate": 1.9489890156634787e-05, "loss": 0.7451041340827942, "step": 2097 }, { "epoch": 0.38766153526348, "grad_norm": 0.06650031358003616, "learning_rate": 1.9489260952787305e-05, "loss": 0.5651580691337585, "step": 2098 }, { "epoch": 0.3878463119723759, "grad_norm": 0.0677560567855835, "learning_rate": 1.948863137129693e-05, "loss": 0.6015886664390564, "step": 2099 }, { "epoch": 0.38803108868127173, "grad_norm": 0.07975464314222336, "learning_rate": 1.9488001412188705e-05, "loss": 0.7732228636741638, "step": 2100 }, { "epoch": 0.38821586539016756, "grad_norm": 0.07170242816209793, "learning_rate": 1.948737107548771e-05, "loss": 0.6001203656196594, "step": 2101 }, { "epoch": 0.3884006420990634, "grad_norm": 0.07086360454559326, "learning_rate": 1.9486740361219034e-05, "loss": 0.6004417538642883, "step": 2102 }, { "epoch": 0.38858541880795927, "grad_norm": 0.09102153033018112, "learning_rate": 1.9486109269407768e-05, "loss": 0.864840030670166, "step": 2103 }, { "epoch": 0.3887701955168551, "grad_norm": 0.07844554632902145, "learning_rate": 1.948547780007903e-05, "loss": 0.6806324124336243, "step": 2104 }, { "epoch": 0.38895497222575093, "grad_norm": 0.0875028744339943, "learning_rate": 1.948484595325795e-05, "loss": 0.7554509043693542, "step": 2105 }, { "epoch": 0.3891397489346468, "grad_norm": 0.07255323976278305, "learning_rate": 1.9484213728969685e-05, "loss": 0.8027604818344116, "step": 2106 }, { "epoch": 0.38932452564354264, "grad_norm": 0.07519349455833435, "learning_rate": 1.9483581127239377e-05, "loss": 0.5965269804000854, "step": 2107 }, { "epoch": 0.38950930235243847, "grad_norm": 0.09502169489860535, "learning_rate": 1.9482948148092212e-05, "loss": 1.1054532527923584, "step": 2108 }, { "epoch": 0.3896940790613343, "grad_norm": 0.08516938239336014, "learning_rate": 1.948231479155338e-05, "loss": 0.7914494872093201, "step": 2109 }, { "epoch": 0.3898788557702302, "grad_norm": 0.06948333978652954, "learning_rate": 1.9481681057648085e-05, "loss": 0.6895288825035095, "step": 2110 }, { "epoch": 0.390063632479126, "grad_norm": 0.08115935325622559, "learning_rate": 1.9481046946401548e-05, "loss": 0.6990054845809937, "step": 2111 }, { "epoch": 0.39024840918802184, "grad_norm": 0.07175900787115097, "learning_rate": 1.9480412457839004e-05, "loss": 0.5589213371276855, "step": 2112 }, { "epoch": 0.39043318589691767, "grad_norm": 0.06155014783143997, "learning_rate": 1.9479777591985706e-05, "loss": 0.5068143606185913, "step": 2113 }, { "epoch": 0.39061796260581355, "grad_norm": 0.08584966510534286, "learning_rate": 1.947914234886692e-05, "loss": 0.8504391312599182, "step": 2114 }, { "epoch": 0.3908027393147094, "grad_norm": 0.08797426521778107, "learning_rate": 1.9478506728507925e-05, "loss": 0.7745741009712219, "step": 2115 }, { "epoch": 0.3909875160236052, "grad_norm": 0.08473818749189377, "learning_rate": 1.9477870730934014e-05, "loss": 0.7680900692939758, "step": 2116 }, { "epoch": 0.3911722927325011, "grad_norm": 0.06665950268507004, "learning_rate": 1.94772343561705e-05, "loss": 0.5885246992111206, "step": 2117 }, { "epoch": 0.3913570694413969, "grad_norm": 0.07183478772640228, "learning_rate": 1.947659760424271e-05, "loss": 0.533574104309082, "step": 2118 }, { "epoch": 0.39154184615029275, "grad_norm": 0.07430543750524521, "learning_rate": 1.9475960475175985e-05, "loss": 0.6689956188201904, "step": 2119 }, { "epoch": 0.3917266228591886, "grad_norm": 0.07013824582099915, "learning_rate": 1.947532296899568e-05, "loss": 0.6077504754066467, "step": 2120 }, { "epoch": 0.39191139956808446, "grad_norm": 0.06455551832914352, "learning_rate": 1.9474685085727162e-05, "loss": 0.40655583143234253, "step": 2121 }, { "epoch": 0.3920961762769803, "grad_norm": 0.08743591606616974, "learning_rate": 1.9474046825395825e-05, "loss": 0.9286163449287415, "step": 2122 }, { "epoch": 0.3922809529858761, "grad_norm": 0.08486149460077286, "learning_rate": 1.947340818802706e-05, "loss": 0.8797714114189148, "step": 2123 }, { "epoch": 0.39246572969477195, "grad_norm": 0.06798680871725082, "learning_rate": 1.947276917364629e-05, "loss": 0.52814781665802, "step": 2124 }, { "epoch": 0.39265050640366783, "grad_norm": 0.08360527455806732, "learning_rate": 1.9472129782278944e-05, "loss": 0.7399903535842896, "step": 2125 }, { "epoch": 0.39283528311256366, "grad_norm": 0.06659283488988876, "learning_rate": 1.9471490013950464e-05, "loss": 0.5814614295959473, "step": 2126 }, { "epoch": 0.3930200598214595, "grad_norm": 0.08121927827596664, "learning_rate": 1.9470849868686315e-05, "loss": 0.7577459216117859, "step": 2127 }, { "epoch": 0.3932048365303554, "grad_norm": 0.07345825433731079, "learning_rate": 1.9470209346511977e-05, "loss": 0.6281236410140991, "step": 2128 }, { "epoch": 0.3933896132392512, "grad_norm": 0.06721615046262741, "learning_rate": 1.946956844745293e-05, "loss": 0.5942303538322449, "step": 2129 }, { "epoch": 0.39357438994814703, "grad_norm": 0.07023970037698746, "learning_rate": 1.9468927171534685e-05, "loss": 0.6943172216415405, "step": 2130 }, { "epoch": 0.39375916665704286, "grad_norm": 0.0984039232134819, "learning_rate": 1.9468285518782764e-05, "loss": 0.88523268699646, "step": 2131 }, { "epoch": 0.39394394336593874, "grad_norm": 0.08739271014928818, "learning_rate": 1.9467643489222704e-05, "loss": 0.5721750855445862, "step": 2132 }, { "epoch": 0.39412872007483457, "grad_norm": 0.06664688140153885, "learning_rate": 1.9467001082880054e-05, "loss": 0.6401693224906921, "step": 2133 }, { "epoch": 0.3943134967837304, "grad_norm": 0.07537417113780975, "learning_rate": 1.946635829978038e-05, "loss": 0.6778192520141602, "step": 2134 }, { "epoch": 0.39449827349262623, "grad_norm": 0.0772600993514061, "learning_rate": 1.9465715139949254e-05, "loss": 0.6469069719314575, "step": 2135 }, { "epoch": 0.3946830502015221, "grad_norm": 0.06720546633005142, "learning_rate": 1.9465071603412287e-05, "loss": 0.5592214465141296, "step": 2136 }, { "epoch": 0.39486782691041794, "grad_norm": 0.062190715223550797, "learning_rate": 1.946442769019508e-05, "loss": 0.5607873201370239, "step": 2137 }, { "epoch": 0.39505260361931377, "grad_norm": 0.07531455159187317, "learning_rate": 1.9463783400323263e-05, "loss": 0.7235403656959534, "step": 2138 }, { "epoch": 0.39523738032820965, "grad_norm": 0.0764726921916008, "learning_rate": 1.9463138733822475e-05, "loss": 0.6067501306533813, "step": 2139 }, { "epoch": 0.3954221570371055, "grad_norm": 0.06785880029201508, "learning_rate": 1.9462493690718373e-05, "loss": 0.6442311406135559, "step": 2140 }, { "epoch": 0.3956069337460013, "grad_norm": 0.08522092550992966, "learning_rate": 1.9461848271036623e-05, "loss": 0.7445117235183716, "step": 2141 }, { "epoch": 0.39579171045489714, "grad_norm": 0.09842310845851898, "learning_rate": 1.9461202474802914e-05, "loss": 1.112312912940979, "step": 2142 }, { "epoch": 0.395976487163793, "grad_norm": 0.0805714949965477, "learning_rate": 1.946055630204295e-05, "loss": 0.7900038361549377, "step": 2143 }, { "epoch": 0.39616126387268885, "grad_norm": 0.06138754263520241, "learning_rate": 1.9459909752782444e-05, "loss": 0.6165653467178345, "step": 2144 }, { "epoch": 0.3963460405815847, "grad_norm": 0.07790310680866241, "learning_rate": 1.945926282704712e-05, "loss": 0.7886402010917664, "step": 2145 }, { "epoch": 0.3965308172904805, "grad_norm": 0.07908566296100616, "learning_rate": 1.9458615524862734e-05, "loss": 0.786962628364563, "step": 2146 }, { "epoch": 0.3967155939993764, "grad_norm": 0.0615328773856163, "learning_rate": 1.9457967846255045e-05, "loss": 0.6100361943244934, "step": 2147 }, { "epoch": 0.3969003707082722, "grad_norm": 0.07457233220338821, "learning_rate": 1.945731979124982e-05, "loss": 0.6171791553497314, "step": 2148 }, { "epoch": 0.39708514741716805, "grad_norm": 0.08400756865739822, "learning_rate": 1.9456671359872858e-05, "loss": 0.6951082348823547, "step": 2149 }, { "epoch": 0.39726992412606393, "grad_norm": 0.07961725443601608, "learning_rate": 1.9456022552149965e-05, "loss": 0.8372227549552917, "step": 2150 }, { "epoch": 0.39745470083495976, "grad_norm": 0.0670628771185875, "learning_rate": 1.9455373368106952e-05, "loss": 0.6422262191772461, "step": 2151 }, { "epoch": 0.3976394775438556, "grad_norm": 0.06386101990938187, "learning_rate": 1.9454723807769665e-05, "loss": 0.5038297176361084, "step": 2152 }, { "epoch": 0.3978242542527514, "grad_norm": 0.06022537499666214, "learning_rate": 1.945407387116395e-05, "loss": 0.5005338788032532, "step": 2153 }, { "epoch": 0.3980090309616473, "grad_norm": 0.08071392774581909, "learning_rate": 1.9453423558315677e-05, "loss": 0.6604102253913879, "step": 2154 }, { "epoch": 0.39819380767054313, "grad_norm": 0.06811917573213577, "learning_rate": 1.9452772869250718e-05, "loss": 0.5015507936477661, "step": 2155 }, { "epoch": 0.39837858437943896, "grad_norm": 0.06803597509860992, "learning_rate": 1.9452121803994973e-05, "loss": 0.4728114902973175, "step": 2156 }, { "epoch": 0.3985633610883348, "grad_norm": 0.06741619855165482, "learning_rate": 1.945147036257435e-05, "loss": 0.498307466506958, "step": 2157 }, { "epoch": 0.3987481377972307, "grad_norm": 0.07278750091791153, "learning_rate": 1.945081854501478e-05, "loss": 0.6582752466201782, "step": 2158 }, { "epoch": 0.3989329145061265, "grad_norm": 0.07266269624233246, "learning_rate": 1.9450166351342198e-05, "loss": 0.630315363407135, "step": 2159 }, { "epoch": 0.39911769121502233, "grad_norm": 0.07647449523210526, "learning_rate": 1.9449513781582562e-05, "loss": 0.7801526188850403, "step": 2160 }, { "epoch": 0.3993024679239182, "grad_norm": 0.08277647942304611, "learning_rate": 1.9448860835761842e-05, "loss": 0.7490278482437134, "step": 2161 }, { "epoch": 0.39948724463281404, "grad_norm": 0.060939326882362366, "learning_rate": 1.944820751390602e-05, "loss": 0.5510382652282715, "step": 2162 }, { "epoch": 0.39967202134170987, "grad_norm": 0.06312974542379379, "learning_rate": 1.94475538160411e-05, "loss": 0.6411457061767578, "step": 2163 }, { "epoch": 0.3998567980506057, "grad_norm": 0.06613600999116898, "learning_rate": 1.9446899742193097e-05, "loss": 0.5749844908714294, "step": 2164 }, { "epoch": 0.4000415747595016, "grad_norm": 0.07607866078615189, "learning_rate": 1.944624529238804e-05, "loss": 0.6170638203620911, "step": 2165 }, { "epoch": 0.4002263514683974, "grad_norm": 0.08194948732852936, "learning_rate": 1.944559046665197e-05, "loss": 0.7000471353530884, "step": 2166 }, { "epoch": 0.40041112817729324, "grad_norm": 0.08070991188287735, "learning_rate": 1.9444935265010954e-05, "loss": 0.7806636691093445, "step": 2167 }, { "epoch": 0.40059590488618907, "grad_norm": 0.0809662863612175, "learning_rate": 1.9444279687491065e-05, "loss": 0.7248894572257996, "step": 2168 }, { "epoch": 0.40078068159508495, "grad_norm": 0.07183996587991714, "learning_rate": 1.9443623734118388e-05, "loss": 0.7464427947998047, "step": 2169 }, { "epoch": 0.4009654583039808, "grad_norm": 0.0695219561457634, "learning_rate": 1.944296740491903e-05, "loss": 0.5609503984451294, "step": 2170 }, { "epoch": 0.4011502350128766, "grad_norm": 0.053213201463222504, "learning_rate": 1.944231069991912e-05, "loss": 0.49357420206069946, "step": 2171 }, { "epoch": 0.4013350117217725, "grad_norm": 0.06042713671922684, "learning_rate": 1.944165361914478e-05, "loss": 0.7338321805000305, "step": 2172 }, { "epoch": 0.4015197884306683, "grad_norm": 0.08467214554548264, "learning_rate": 1.944099616262217e-05, "loss": 0.7917910814285278, "step": 2173 }, { "epoch": 0.40170456513956415, "grad_norm": 0.07415515929460526, "learning_rate": 1.9440338330377444e-05, "loss": 0.6823109984397888, "step": 2174 }, { "epoch": 0.40188934184846, "grad_norm": 0.07411254197359085, "learning_rate": 1.943968012243679e-05, "loss": 0.7468394041061401, "step": 2175 }, { "epoch": 0.40207411855735586, "grad_norm": 0.08624184131622314, "learning_rate": 1.9439021538826398e-05, "loss": 0.7924304008483887, "step": 2176 }, { "epoch": 0.4022588952662517, "grad_norm": 0.05622793734073639, "learning_rate": 1.943836257957248e-05, "loss": 0.4555974304676056, "step": 2177 }, { "epoch": 0.4024436719751475, "grad_norm": 0.08712554723024368, "learning_rate": 1.9437703244701266e-05, "loss": 0.6834219098091125, "step": 2178 }, { "epoch": 0.40262844868404335, "grad_norm": 0.07262444496154785, "learning_rate": 1.9437043534238985e-05, "loss": 0.6461427807807922, "step": 2179 }, { "epoch": 0.40281322539293923, "grad_norm": 0.07889416068792343, "learning_rate": 1.9436383448211895e-05, "loss": 0.6368575692176819, "step": 2180 }, { "epoch": 0.40299800210183506, "grad_norm": 0.06394167989492416, "learning_rate": 1.943572298664627e-05, "loss": 0.5541520714759827, "step": 2181 }, { "epoch": 0.4031827788107309, "grad_norm": 0.0851161926984787, "learning_rate": 1.9435062149568386e-05, "loss": 0.9377284646034241, "step": 2182 }, { "epoch": 0.4033675555196268, "grad_norm": 0.07163845002651215, "learning_rate": 1.9434400937004548e-05, "loss": 0.6254559755325317, "step": 2183 }, { "epoch": 0.4035523322285226, "grad_norm": 0.08405304700136185, "learning_rate": 1.9433739348981074e-05, "loss": 0.7649668455123901, "step": 2184 }, { "epoch": 0.40373710893741843, "grad_norm": 0.07005638629198074, "learning_rate": 1.9433077385524284e-05, "loss": 0.6673843860626221, "step": 2185 }, { "epoch": 0.40392188564631426, "grad_norm": 0.08381688594818115, "learning_rate": 1.9432415046660526e-05, "loss": 0.7347395420074463, "step": 2186 }, { "epoch": 0.40410666235521014, "grad_norm": 0.0781802386045456, "learning_rate": 1.943175233241616e-05, "loss": 0.7603856325149536, "step": 2187 }, { "epoch": 0.404291439064106, "grad_norm": 0.07804013043642044, "learning_rate": 1.9431089242817564e-05, "loss": 0.7772961258888245, "step": 2188 }, { "epoch": 0.4044762157730018, "grad_norm": 0.0823303833603859, "learning_rate": 1.9430425777891116e-05, "loss": 0.7311716079711914, "step": 2189 }, { "epoch": 0.40466099248189763, "grad_norm": 0.06019553914666176, "learning_rate": 1.942976193766323e-05, "loss": 0.5009331703186035, "step": 2190 }, { "epoch": 0.4048457691907935, "grad_norm": 0.07683239877223969, "learning_rate": 1.942909772216032e-05, "loss": 0.7285566926002502, "step": 2191 }, { "epoch": 0.40503054589968934, "grad_norm": 0.09352260082960129, "learning_rate": 1.9428433131408816e-05, "loss": 0.8195133805274963, "step": 2192 }, { "epoch": 0.40521532260858517, "grad_norm": 0.09269582480192184, "learning_rate": 1.9427768165435177e-05, "loss": 0.843238890171051, "step": 2193 }, { "epoch": 0.40540009931748106, "grad_norm": 0.08898035436868668, "learning_rate": 1.9427102824265858e-05, "loss": 0.8769370913505554, "step": 2194 }, { "epoch": 0.4055848760263769, "grad_norm": 0.08849231898784637, "learning_rate": 1.942643710792734e-05, "loss": 0.5764329433441162, "step": 2195 }, { "epoch": 0.4057696527352727, "grad_norm": 0.06048697978258133, "learning_rate": 1.942577101644612e-05, "loss": 0.38896650075912476, "step": 2196 }, { "epoch": 0.40595442944416854, "grad_norm": 0.07495073974132538, "learning_rate": 1.94251045498487e-05, "loss": 0.5774204730987549, "step": 2197 }, { "epoch": 0.4061392061530644, "grad_norm": 0.07874375581741333, "learning_rate": 1.9424437708161605e-05, "loss": 0.6599615216255188, "step": 2198 }, { "epoch": 0.40632398286196025, "grad_norm": 0.08063821494579315, "learning_rate": 1.9423770491411375e-05, "loss": 0.5673590302467346, "step": 2199 }, { "epoch": 0.4065087595708561, "grad_norm": 0.08884714543819427, "learning_rate": 1.9423102899624565e-05, "loss": 0.8434959053993225, "step": 2200 }, { "epoch": 0.4066935362797519, "grad_norm": 0.0742383748292923, "learning_rate": 1.9422434932827737e-05, "loss": 0.6055968999862671, "step": 2201 }, { "epoch": 0.4068783129886478, "grad_norm": 0.08328180760145187, "learning_rate": 1.9421766591047483e-05, "loss": 0.9570925831794739, "step": 2202 }, { "epoch": 0.4070630896975436, "grad_norm": 0.0653611570596695, "learning_rate": 1.9421097874310394e-05, "loss": 0.6053483486175537, "step": 2203 }, { "epoch": 0.40724786640643945, "grad_norm": 0.07700639218091965, "learning_rate": 1.9420428782643083e-05, "loss": 0.6471602916717529, "step": 2204 }, { "epoch": 0.40743264311533534, "grad_norm": 0.07719118893146515, "learning_rate": 1.941975931607218e-05, "loss": 0.6258816719055176, "step": 2205 }, { "epoch": 0.40761741982423116, "grad_norm": 0.067261703312397, "learning_rate": 1.9419089474624326e-05, "loss": 0.5652295351028442, "step": 2206 }, { "epoch": 0.407802196533127, "grad_norm": 0.06438491493463516, "learning_rate": 1.941841925832618e-05, "loss": 0.49911659955978394, "step": 2207 }, { "epoch": 0.4079869732420228, "grad_norm": 0.07922912389039993, "learning_rate": 1.9417748667204414e-05, "loss": 0.5777056813240051, "step": 2208 }, { "epoch": 0.4081717499509187, "grad_norm": 0.08038914203643799, "learning_rate": 1.9417077701285714e-05, "loss": 0.7142505645751953, "step": 2209 }, { "epoch": 0.40835652665981453, "grad_norm": 0.059858158230781555, "learning_rate": 1.9416406360596785e-05, "loss": 0.38203585147857666, "step": 2210 }, { "epoch": 0.40854130336871036, "grad_norm": 0.07125923037528992, "learning_rate": 1.9415734645164343e-05, "loss": 0.6019932627677917, "step": 2211 }, { "epoch": 0.4087260800776062, "grad_norm": 0.08031252771615982, "learning_rate": 1.941506255501512e-05, "loss": 0.718646764755249, "step": 2212 }, { "epoch": 0.4089108567865021, "grad_norm": 0.06486880779266357, "learning_rate": 1.9414390090175864e-05, "loss": 0.5900680422782898, "step": 2213 }, { "epoch": 0.4090956334953979, "grad_norm": 0.06934762746095657, "learning_rate": 1.9413717250673333e-05, "loss": 0.49603864550590515, "step": 2214 }, { "epoch": 0.40928041020429373, "grad_norm": 0.052057359367609024, "learning_rate": 1.941304403653431e-05, "loss": 0.4228403866291046, "step": 2215 }, { "epoch": 0.4094651869131896, "grad_norm": 0.06075945496559143, "learning_rate": 1.9412370447785586e-05, "loss": 0.6137130856513977, "step": 2216 }, { "epoch": 0.40964996362208544, "grad_norm": 0.06219998747110367, "learning_rate": 1.941169648445396e-05, "loss": 0.547929584980011, "step": 2217 }, { "epoch": 0.4098347403309813, "grad_norm": 0.09154465049505234, "learning_rate": 1.9411022146566266e-05, "loss": 0.8030654788017273, "step": 2218 }, { "epoch": 0.4100195170398771, "grad_norm": 0.08464067429304123, "learning_rate": 1.941034743414933e-05, "loss": 0.8160238265991211, "step": 2219 }, { "epoch": 0.410204293748773, "grad_norm": 0.07993149757385254, "learning_rate": 1.9409672347230008e-05, "loss": 0.6873763203620911, "step": 2220 }, { "epoch": 0.4103890704576688, "grad_norm": 0.07135781645774841, "learning_rate": 1.9408996885835166e-05, "loss": 0.5866267681121826, "step": 2221 }, { "epoch": 0.41057384716656464, "grad_norm": 0.08876077830791473, "learning_rate": 1.9408321049991684e-05, "loss": 0.8736963272094727, "step": 2222 }, { "epoch": 0.41075862387546047, "grad_norm": 0.07346765697002411, "learning_rate": 1.940764483972646e-05, "loss": 0.7503250241279602, "step": 2223 }, { "epoch": 0.41094340058435636, "grad_norm": 0.07542257755994797, "learning_rate": 1.9406968255066403e-05, "loss": 0.6756606698036194, "step": 2224 }, { "epoch": 0.4111281772932522, "grad_norm": 0.06280212849378586, "learning_rate": 1.940629129603844e-05, "loss": 0.5291039347648621, "step": 2225 }, { "epoch": 0.411312954002148, "grad_norm": 0.0706767737865448, "learning_rate": 1.940561396266951e-05, "loss": 0.6159385442733765, "step": 2226 }, { "epoch": 0.4114977307110439, "grad_norm": 0.07819431275129318, "learning_rate": 1.9404936254986576e-05, "loss": 0.7118359208106995, "step": 2227 }, { "epoch": 0.4116825074199397, "grad_norm": 0.06865965574979782, "learning_rate": 1.94042581730166e-05, "loss": 0.7549982070922852, "step": 2228 }, { "epoch": 0.41186728412883555, "grad_norm": 0.07349448651075363, "learning_rate": 1.9403579716786572e-05, "loss": 0.6773434281349182, "step": 2229 }, { "epoch": 0.4120520608377314, "grad_norm": 0.07281513512134552, "learning_rate": 1.9402900886323492e-05, "loss": 0.6251955032348633, "step": 2230 }, { "epoch": 0.41223683754662727, "grad_norm": 0.07288835942745209, "learning_rate": 1.940222168165437e-05, "loss": 0.6025035381317139, "step": 2231 }, { "epoch": 0.4124216142555231, "grad_norm": 0.08412369340658188, "learning_rate": 1.9401542102806248e-05, "loss": 0.6773068308830261, "step": 2232 }, { "epoch": 0.4126063909644189, "grad_norm": 0.10129547864198685, "learning_rate": 1.940086214980616e-05, "loss": 0.8324509263038635, "step": 2233 }, { "epoch": 0.41279116767331475, "grad_norm": 0.091436967253685, "learning_rate": 1.940018182268117e-05, "loss": 0.8332235813140869, "step": 2234 }, { "epoch": 0.41297594438221064, "grad_norm": 0.0675014853477478, "learning_rate": 1.9399501121458354e-05, "loss": 0.46324366331100464, "step": 2235 }, { "epoch": 0.41316072109110646, "grad_norm": 0.07934007793664932, "learning_rate": 1.93988200461648e-05, "loss": 0.6471525430679321, "step": 2236 }, { "epoch": 0.4133454978000023, "grad_norm": 0.07486380636692047, "learning_rate": 1.939813859682761e-05, "loss": 0.572313129901886, "step": 2237 }, { "epoch": 0.4135302745088982, "grad_norm": 0.09455690532922745, "learning_rate": 1.939745677347391e-05, "loss": 0.9386391043663025, "step": 2238 }, { "epoch": 0.413715051217794, "grad_norm": 0.07782839238643646, "learning_rate": 1.9396774576130834e-05, "loss": 0.8280326128005981, "step": 2239 }, { "epoch": 0.41389982792668983, "grad_norm": 0.08085377514362335, "learning_rate": 1.9396092004825523e-05, "loss": 0.7216889262199402, "step": 2240 }, { "epoch": 0.41408460463558566, "grad_norm": 0.06378732621669769, "learning_rate": 1.939540905958515e-05, "loss": 0.5238621830940247, "step": 2241 }, { "epoch": 0.41426938134448155, "grad_norm": 0.08064355701208115, "learning_rate": 1.939472574043689e-05, "loss": 0.6881387233734131, "step": 2242 }, { "epoch": 0.4144541580533774, "grad_norm": 0.08286409080028534, "learning_rate": 1.939404204740794e-05, "loss": 0.760225772857666, "step": 2243 }, { "epoch": 0.4146389347622732, "grad_norm": 0.08433771133422852, "learning_rate": 1.9393357980525504e-05, "loss": 0.7714629173278809, "step": 2244 }, { "epoch": 0.41482371147116903, "grad_norm": 0.07636398077011108, "learning_rate": 1.9392673539816812e-05, "loss": 0.5675088763237, "step": 2245 }, { "epoch": 0.4150084881800649, "grad_norm": 0.09437257796525955, "learning_rate": 1.9391988725309096e-05, "loss": 0.6651148200035095, "step": 2246 }, { "epoch": 0.41519326488896074, "grad_norm": 0.06223485618829727, "learning_rate": 1.9391303537029612e-05, "loss": 0.66727215051651, "step": 2247 }, { "epoch": 0.4153780415978566, "grad_norm": 0.06689508259296417, "learning_rate": 1.939061797500563e-05, "loss": 0.5697513818740845, "step": 2248 }, { "epoch": 0.41556281830675246, "grad_norm": 0.07865443825721741, "learning_rate": 1.938993203926443e-05, "loss": 0.7371279001235962, "step": 2249 }, { "epoch": 0.4157475950156483, "grad_norm": 0.09136402606964111, "learning_rate": 1.9389245729833315e-05, "loss": 0.7764683961868286, "step": 2250 }, { "epoch": 0.4159323717245441, "grad_norm": 0.06448964774608612, "learning_rate": 1.9388559046739594e-05, "loss": 0.7599173188209534, "step": 2251 }, { "epoch": 0.41611714843343994, "grad_norm": 0.07628330588340759, "learning_rate": 1.9387871990010596e-05, "loss": 0.6795546412467957, "step": 2252 }, { "epoch": 0.4163019251423358, "grad_norm": 0.06867800652980804, "learning_rate": 1.9387184559673665e-05, "loss": 0.523367166519165, "step": 2253 }, { "epoch": 0.41648670185123166, "grad_norm": 0.06983886659145355, "learning_rate": 1.9386496755756156e-05, "loss": 0.6407635807991028, "step": 2254 }, { "epoch": 0.4166714785601275, "grad_norm": 0.07012315839529037, "learning_rate": 1.9385808578285445e-05, "loss": 0.5064341425895691, "step": 2255 }, { "epoch": 0.4168562552690233, "grad_norm": 0.08256127685308456, "learning_rate": 1.9385120027288914e-05, "loss": 0.7292260527610779, "step": 2256 }, { "epoch": 0.4170410319779192, "grad_norm": 0.09244728088378906, "learning_rate": 1.938443110279397e-05, "loss": 0.9887353181838989, "step": 2257 }, { "epoch": 0.417225808686815, "grad_norm": 0.07802069187164307, "learning_rate": 1.9383741804828024e-05, "loss": 0.7937911748886108, "step": 2258 }, { "epoch": 0.41741058539571085, "grad_norm": 0.07037319988012314, "learning_rate": 1.9383052133418517e-05, "loss": 0.6395033001899719, "step": 2259 }, { "epoch": 0.41759536210460674, "grad_norm": 0.0810866579413414, "learning_rate": 1.938236208859289e-05, "loss": 0.7780205011367798, "step": 2260 }, { "epoch": 0.41778013881350257, "grad_norm": 0.07843460887670517, "learning_rate": 1.93816716703786e-05, "loss": 0.7478843927383423, "step": 2261 }, { "epoch": 0.4179649155223984, "grad_norm": 0.07771844416856766, "learning_rate": 1.9380980878803135e-05, "loss": 0.7685114741325378, "step": 2262 }, { "epoch": 0.4181496922312942, "grad_norm": 0.07894376665353775, "learning_rate": 1.938028971389398e-05, "loss": 0.5902146697044373, "step": 2263 }, { "epoch": 0.4183344689401901, "grad_norm": 0.07636061310768127, "learning_rate": 1.937959817567864e-05, "loss": 0.6044233441352844, "step": 2264 }, { "epoch": 0.41851924564908594, "grad_norm": 0.08153094351291656, "learning_rate": 1.9378906264184638e-05, "loss": 0.6065340042114258, "step": 2265 }, { "epoch": 0.41870402235798176, "grad_norm": 0.07207811623811722, "learning_rate": 1.937821397943951e-05, "loss": 0.6241428852081299, "step": 2266 }, { "epoch": 0.4188887990668776, "grad_norm": 0.07843715697526932, "learning_rate": 1.9377521321470806e-05, "loss": 0.5644761919975281, "step": 2267 }, { "epoch": 0.4190735757757735, "grad_norm": 0.08663053065538406, "learning_rate": 1.9376828290306093e-05, "loss": 0.9684317708015442, "step": 2268 }, { "epoch": 0.4192583524846693, "grad_norm": 0.0744810476899147, "learning_rate": 1.9376134885972948e-05, "loss": 0.6165178418159485, "step": 2269 }, { "epoch": 0.41944312919356513, "grad_norm": 0.07540447264909744, "learning_rate": 1.937544110849897e-05, "loss": 0.5572347044944763, "step": 2270 }, { "epoch": 0.419627905902461, "grad_norm": 0.06462650746107101, "learning_rate": 1.9374746957911768e-05, "loss": 0.6695753335952759, "step": 2271 }, { "epoch": 0.41981268261135685, "grad_norm": 0.06049533188343048, "learning_rate": 1.937405243423897e-05, "loss": 0.5189021825790405, "step": 2272 }, { "epoch": 0.4199974593202527, "grad_norm": 0.0741419643163681, "learning_rate": 1.9373357537508205e-05, "loss": 0.7069633603096008, "step": 2273 }, { "epoch": 0.4201822360291485, "grad_norm": 0.062040168792009354, "learning_rate": 1.937266226774714e-05, "loss": 0.5386475920677185, "step": 2274 }, { "epoch": 0.4203670127380444, "grad_norm": 0.08488454669713974, "learning_rate": 1.937196662498344e-05, "loss": 0.685762345790863, "step": 2275 }, { "epoch": 0.4205517894469402, "grad_norm": 0.07908879220485687, "learning_rate": 1.937127060924479e-05, "loss": 0.7075814008712769, "step": 2276 }, { "epoch": 0.42073656615583604, "grad_norm": 0.09031480550765991, "learning_rate": 1.9370574220558888e-05, "loss": 0.6897242665290833, "step": 2277 }, { "epoch": 0.4209213428647319, "grad_norm": 0.07690101861953735, "learning_rate": 1.936987745895345e-05, "loss": 0.5031424760818481, "step": 2278 }, { "epoch": 0.42110611957362776, "grad_norm": 0.06790696084499359, "learning_rate": 1.9369180324456204e-05, "loss": 0.6615981459617615, "step": 2279 }, { "epoch": 0.4212908962825236, "grad_norm": 0.08125462383031845, "learning_rate": 1.936848281709489e-05, "loss": 0.8710426092147827, "step": 2280 }, { "epoch": 0.4214756729914194, "grad_norm": 0.0631629079580307, "learning_rate": 1.9367784936897272e-05, "loss": 0.6070328950881958, "step": 2281 }, { "epoch": 0.4216604497003153, "grad_norm": 0.0831427350640297, "learning_rate": 1.9367086683891123e-05, "loss": 0.703594446182251, "step": 2282 }, { "epoch": 0.4218452264092111, "grad_norm": 0.0841081514954567, "learning_rate": 1.936638805810423e-05, "loss": 0.6820002198219299, "step": 2283 }, { "epoch": 0.42203000311810696, "grad_norm": 0.07842420041561127, "learning_rate": 1.9365689059564393e-05, "loss": 0.6011175513267517, "step": 2284 }, { "epoch": 0.4222147798270028, "grad_norm": 0.08695252239704132, "learning_rate": 1.9364989688299432e-05, "loss": 0.8149831295013428, "step": 2285 }, { "epoch": 0.42239955653589867, "grad_norm": 0.08886296302080154, "learning_rate": 1.9364289944337185e-05, "loss": 0.6890162229537964, "step": 2286 }, { "epoch": 0.4225843332447945, "grad_norm": 0.0779884085059166, "learning_rate": 1.9363589827705494e-05, "loss": 0.8665019273757935, "step": 2287 }, { "epoch": 0.4227691099536903, "grad_norm": 0.08771474659442902, "learning_rate": 1.936288933843222e-05, "loss": 0.7957674860954285, "step": 2288 }, { "epoch": 0.42295388666258615, "grad_norm": 0.08377643674612045, "learning_rate": 1.936218847654525e-05, "loss": 0.7866319417953491, "step": 2289 }, { "epoch": 0.42313866337148204, "grad_norm": 0.06442653387784958, "learning_rate": 1.936148724207246e-05, "loss": 0.569203794002533, "step": 2290 }, { "epoch": 0.42332344008037787, "grad_norm": 0.07990846037864685, "learning_rate": 1.936078563504177e-05, "loss": 0.6724473834037781, "step": 2291 }, { "epoch": 0.4235082167892737, "grad_norm": 0.09859101474285126, "learning_rate": 1.93600836554811e-05, "loss": 0.7977859973907471, "step": 2292 }, { "epoch": 0.4236929934981696, "grad_norm": 0.05321364849805832, "learning_rate": 1.9359381303418384e-05, "loss": 0.45695260167121887, "step": 2293 }, { "epoch": 0.4238777702070654, "grad_norm": 0.07542157173156738, "learning_rate": 1.9358678578881572e-05, "loss": 0.6335441470146179, "step": 2294 }, { "epoch": 0.42406254691596124, "grad_norm": 0.08657735586166382, "learning_rate": 1.9357975481898634e-05, "loss": 0.8366142511367798, "step": 2295 }, { "epoch": 0.42424732362485706, "grad_norm": 0.05203680694103241, "learning_rate": 1.9357272012497546e-05, "loss": 0.38978737592697144, "step": 2296 }, { "epoch": 0.42443210033375295, "grad_norm": 0.07493631541728973, "learning_rate": 1.935656817070631e-05, "loss": 0.5956997275352478, "step": 2297 }, { "epoch": 0.4246168770426488, "grad_norm": 0.07352018356323242, "learning_rate": 1.9355863956552933e-05, "loss": 0.6605044603347778, "step": 2298 }, { "epoch": 0.4248016537515446, "grad_norm": 0.06980215758085251, "learning_rate": 1.935515937006544e-05, "loss": 0.5341028571128845, "step": 2299 }, { "epoch": 0.42498643046044043, "grad_norm": 0.07171224057674408, "learning_rate": 1.9354454411271874e-05, "loss": 0.6301398277282715, "step": 2300 }, { "epoch": 0.4251712071693363, "grad_norm": 0.0617557093501091, "learning_rate": 1.935374908020029e-05, "loss": 0.40994107723236084, "step": 2301 }, { "epoch": 0.42535598387823215, "grad_norm": 0.07506370544433594, "learning_rate": 1.9353043376878755e-05, "loss": 0.7342982292175293, "step": 2302 }, { "epoch": 0.425540760587128, "grad_norm": 0.06689701974391937, "learning_rate": 1.9352337301335355e-05, "loss": 0.6360087394714355, "step": 2303 }, { "epoch": 0.42572553729602386, "grad_norm": 0.08720137923955917, "learning_rate": 1.9351630853598193e-05, "loss": 0.8357270359992981, "step": 2304 }, { "epoch": 0.4259103140049197, "grad_norm": 0.0771905854344368, "learning_rate": 1.9350924033695378e-05, "loss": 0.7509700655937195, "step": 2305 }, { "epoch": 0.4260950907138155, "grad_norm": 0.06878393888473511, "learning_rate": 1.935021684165504e-05, "loss": 0.5679242610931396, "step": 2306 }, { "epoch": 0.42627986742271134, "grad_norm": 0.08172270655632019, "learning_rate": 1.9349509277505327e-05, "loss": 0.6561329960823059, "step": 2307 }, { "epoch": 0.42646464413160723, "grad_norm": 0.07850314676761627, "learning_rate": 1.9348801341274395e-05, "loss": 0.5804690718650818, "step": 2308 }, { "epoch": 0.42664942084050306, "grad_norm": 0.07448265701532364, "learning_rate": 1.934809303299042e-05, "loss": 0.7353978753089905, "step": 2309 }, { "epoch": 0.4268341975493989, "grad_norm": 0.0733490064740181, "learning_rate": 1.934738435268159e-05, "loss": 0.6897704005241394, "step": 2310 }, { "epoch": 0.4270189742582947, "grad_norm": 0.08506204932928085, "learning_rate": 1.93466753003761e-05, "loss": 0.7046520709991455, "step": 2311 }, { "epoch": 0.4272037509671906, "grad_norm": 0.06439055502414703, "learning_rate": 1.934596587610218e-05, "loss": 0.7001133561134338, "step": 2312 }, { "epoch": 0.4273885276760864, "grad_norm": 0.07183575630187988, "learning_rate": 1.934525607988806e-05, "loss": 0.6664788722991943, "step": 2313 }, { "epoch": 0.42757330438498226, "grad_norm": 0.07120703905820847, "learning_rate": 1.934454591176198e-05, "loss": 0.5988874435424805, "step": 2314 }, { "epoch": 0.42775808109387814, "grad_norm": 0.0718512088060379, "learning_rate": 1.9343835371752212e-05, "loss": 0.8172309398651123, "step": 2315 }, { "epoch": 0.42794285780277397, "grad_norm": 0.054339561611413956, "learning_rate": 1.934312445988703e-05, "loss": 0.4375886023044586, "step": 2316 }, { "epoch": 0.4281276345116698, "grad_norm": 0.054011277854442596, "learning_rate": 1.9342413176194724e-05, "loss": 0.5350235104560852, "step": 2317 }, { "epoch": 0.4283124112205656, "grad_norm": 0.05996915325522423, "learning_rate": 1.93417015207036e-05, "loss": 0.7294011116027832, "step": 2318 }, { "epoch": 0.4284971879294615, "grad_norm": 0.06260991841554642, "learning_rate": 1.9340989493441988e-05, "loss": 0.6489644646644592, "step": 2319 }, { "epoch": 0.42868196463835734, "grad_norm": 0.07388906925916672, "learning_rate": 1.9340277094438213e-05, "loss": 0.8232399225234985, "step": 2320 }, { "epoch": 0.42886674134725317, "grad_norm": 0.06844634562730789, "learning_rate": 1.933956432372063e-05, "loss": 0.5962044596672058, "step": 2321 }, { "epoch": 0.429051518056149, "grad_norm": 0.07142406702041626, "learning_rate": 1.933885118131761e-05, "loss": 0.5791578888893127, "step": 2322 }, { "epoch": 0.4292362947650449, "grad_norm": 0.07448078691959381, "learning_rate": 1.933813766725753e-05, "loss": 0.6933003664016724, "step": 2323 }, { "epoch": 0.4294210714739407, "grad_norm": 0.061442915350198746, "learning_rate": 1.9337423781568788e-05, "loss": 0.5783045291900635, "step": 2324 }, { "epoch": 0.42960584818283654, "grad_norm": 0.07764607667922974, "learning_rate": 1.933670952427979e-05, "loss": 0.6919342279434204, "step": 2325 }, { "epoch": 0.4297906248917324, "grad_norm": 0.05729961022734642, "learning_rate": 1.9335994895418965e-05, "loss": 0.45752331614494324, "step": 2326 }, { "epoch": 0.42997540160062825, "grad_norm": 0.06795137375593185, "learning_rate": 1.933527989501475e-05, "loss": 0.6426984071731567, "step": 2327 }, { "epoch": 0.4301601783095241, "grad_norm": 0.0695880651473999, "learning_rate": 1.9334564523095603e-05, "loss": 0.5382835268974304, "step": 2328 }, { "epoch": 0.4303449550184199, "grad_norm": 0.08700678497552872, "learning_rate": 1.933384877968999e-05, "loss": 0.8012987375259399, "step": 2329 }, { "epoch": 0.4305297317273158, "grad_norm": 0.08838114142417908, "learning_rate": 1.9333132664826403e-05, "loss": 0.8843043446540833, "step": 2330 }, { "epoch": 0.4307145084362116, "grad_norm": 0.07206934690475464, "learning_rate": 1.9332416178533327e-05, "loss": 0.6823926568031311, "step": 2331 }, { "epoch": 0.43089928514510745, "grad_norm": 0.061117593199014664, "learning_rate": 1.9331699320839293e-05, "loss": 0.5759217143058777, "step": 2332 }, { "epoch": 0.4310840618540033, "grad_norm": 0.08535294234752655, "learning_rate": 1.9330982091772817e-05, "loss": 0.7633690237998962, "step": 2333 }, { "epoch": 0.43126883856289916, "grad_norm": 0.07020317763090134, "learning_rate": 1.9330264491362446e-05, "loss": 0.5276690125465393, "step": 2334 }, { "epoch": 0.431453615271795, "grad_norm": 0.060124464333057404, "learning_rate": 1.932954651963674e-05, "loss": 0.5451531410217285, "step": 2335 }, { "epoch": 0.4316383919806908, "grad_norm": 0.06954493373632431, "learning_rate": 1.932882817662427e-05, "loss": 0.5817059874534607, "step": 2336 }, { "epoch": 0.4318231686895867, "grad_norm": 0.0750768631696701, "learning_rate": 1.9328109462353626e-05, "loss": 0.5557680726051331, "step": 2337 }, { "epoch": 0.43200794539848253, "grad_norm": 0.07473158091306686, "learning_rate": 1.932739037685341e-05, "loss": 0.646592378616333, "step": 2338 }, { "epoch": 0.43219272210737836, "grad_norm": 0.061547454446554184, "learning_rate": 1.9326670920152237e-05, "loss": 0.6056718826293945, "step": 2339 }, { "epoch": 0.4323774988162742, "grad_norm": 0.056639768183231354, "learning_rate": 1.932595109227874e-05, "loss": 0.4849189519882202, "step": 2340 }, { "epoch": 0.43256227552517007, "grad_norm": 0.07089490443468094, "learning_rate": 1.932523089326157e-05, "loss": 0.6140329241752625, "step": 2341 }, { "epoch": 0.4327470522340659, "grad_norm": 0.08315933495759964, "learning_rate": 1.9324510323129383e-05, "loss": 0.6899304389953613, "step": 2342 }, { "epoch": 0.4329318289429617, "grad_norm": 0.07349039614200592, "learning_rate": 1.932378938191086e-05, "loss": 0.4938865005970001, "step": 2343 }, { "epoch": 0.43311660565185756, "grad_norm": 0.06191008538007736, "learning_rate": 1.9323068069634688e-05, "loss": 0.44645974040031433, "step": 2344 }, { "epoch": 0.43330138236075344, "grad_norm": 0.1034642830491066, "learning_rate": 1.9322346386329575e-05, "loss": 1.1280803680419922, "step": 2345 }, { "epoch": 0.43348615906964927, "grad_norm": 0.09495116770267487, "learning_rate": 1.932162433202424e-05, "loss": 0.806296169757843, "step": 2346 }, { "epoch": 0.4336709357785451, "grad_norm": 0.079786017537117, "learning_rate": 1.932090190674742e-05, "loss": 0.7048410773277283, "step": 2347 }, { "epoch": 0.433855712487441, "grad_norm": 0.07984772324562073, "learning_rate": 1.9320179110527867e-05, "loss": 0.6196243166923523, "step": 2348 }, { "epoch": 0.4340404891963368, "grad_norm": 0.07886778563261032, "learning_rate": 1.9319455943394347e-05, "loss": 0.65333491563797, "step": 2349 }, { "epoch": 0.43422526590523264, "grad_norm": 0.07592529058456421, "learning_rate": 1.9318732405375636e-05, "loss": 0.7202848196029663, "step": 2350 }, { "epoch": 0.43441004261412847, "grad_norm": 0.0713939517736435, "learning_rate": 1.9318008496500528e-05, "loss": 0.7466500997543335, "step": 2351 }, { "epoch": 0.43459481932302435, "grad_norm": 0.05905028060078621, "learning_rate": 1.9317284216797837e-05, "loss": 0.5592542290687561, "step": 2352 }, { "epoch": 0.4347795960319202, "grad_norm": 0.09187117964029312, "learning_rate": 1.931655956629638e-05, "loss": 0.7937484979629517, "step": 2353 }, { "epoch": 0.434964372740816, "grad_norm": 0.07646917551755905, "learning_rate": 1.9315834545025005e-05, "loss": 0.6700643301010132, "step": 2354 }, { "epoch": 0.43514914944971184, "grad_norm": 0.08851625770330429, "learning_rate": 1.9315109153012557e-05, "loss": 0.8922794461250305, "step": 2355 }, { "epoch": 0.4353339261586077, "grad_norm": 0.08113475888967514, "learning_rate": 1.931438339028791e-05, "loss": 0.8182970285415649, "step": 2356 }, { "epoch": 0.43551870286750355, "grad_norm": 0.08301424980163574, "learning_rate": 1.9313657256879943e-05, "loss": 0.7397076487541199, "step": 2357 }, { "epoch": 0.4357034795763994, "grad_norm": 0.06106698885560036, "learning_rate": 1.931293075281756e-05, "loss": 0.4387102723121643, "step": 2358 }, { "epoch": 0.43588825628529526, "grad_norm": 0.07855153828859329, "learning_rate": 1.9312203878129664e-05, "loss": 0.7661651968955994, "step": 2359 }, { "epoch": 0.4360730329941911, "grad_norm": 0.0937490165233612, "learning_rate": 1.931147663284519e-05, "loss": 0.8062765002250671, "step": 2360 }, { "epoch": 0.4362578097030869, "grad_norm": 0.0676034688949585, "learning_rate": 1.931074901699308e-05, "loss": 0.5919547080993652, "step": 2361 }, { "epoch": 0.43644258641198275, "grad_norm": 0.07248328626155853, "learning_rate": 1.9310021030602285e-05, "loss": 0.6367123126983643, "step": 2362 }, { "epoch": 0.43662736312087863, "grad_norm": 0.08319476991891861, "learning_rate": 1.930929267370178e-05, "loss": 0.778451681137085, "step": 2363 }, { "epoch": 0.43681213982977446, "grad_norm": 0.06528496742248535, "learning_rate": 1.9308563946320556e-05, "loss": 0.66303551197052, "step": 2364 }, { "epoch": 0.4369969165386703, "grad_norm": 0.07230069488286972, "learning_rate": 1.930783484848761e-05, "loss": 0.550811231136322, "step": 2365 }, { "epoch": 0.4371816932475661, "grad_norm": 0.08370313793420792, "learning_rate": 1.9307105380231952e-05, "loss": 0.6972153186798096, "step": 2366 }, { "epoch": 0.437366469956462, "grad_norm": 0.07506057620048523, "learning_rate": 1.930637554158262e-05, "loss": 0.5916268825531006, "step": 2367 }, { "epoch": 0.43755124666535783, "grad_norm": 0.07940205186605453, "learning_rate": 1.930564533256866e-05, "loss": 0.7237229943275452, "step": 2368 }, { "epoch": 0.43773602337425366, "grad_norm": 0.08555081486701965, "learning_rate": 1.9304914753219126e-05, "loss": 0.9042553901672363, "step": 2369 }, { "epoch": 0.43792080008314954, "grad_norm": 0.06261549890041351, "learning_rate": 1.93041838035631e-05, "loss": 0.5432640314102173, "step": 2370 }, { "epoch": 0.43810557679204537, "grad_norm": 0.08132058382034302, "learning_rate": 1.9303452483629664e-05, "loss": 0.7692998051643372, "step": 2371 }, { "epoch": 0.4382903535009412, "grad_norm": 0.08701673150062561, "learning_rate": 1.9302720793447927e-05, "loss": 0.7004591822624207, "step": 2372 }, { "epoch": 0.438475130209837, "grad_norm": 0.052568644285202026, "learning_rate": 1.930198873304701e-05, "loss": 0.4833621680736542, "step": 2373 }, { "epoch": 0.4386599069187329, "grad_norm": 0.0768493190407753, "learning_rate": 1.930125630245604e-05, "loss": 0.5789093971252441, "step": 2374 }, { "epoch": 0.43884468362762874, "grad_norm": 0.06829556077718735, "learning_rate": 1.930052350170417e-05, "loss": 0.6952472925186157, "step": 2375 }, { "epoch": 0.43902946033652457, "grad_norm": 0.0838492289185524, "learning_rate": 1.9299790330820563e-05, "loss": 0.6650012731552124, "step": 2376 }, { "epoch": 0.4392142370454204, "grad_norm": 0.06579825282096863, "learning_rate": 1.9299056789834394e-05, "loss": 0.6096588969230652, "step": 2377 }, { "epoch": 0.4393990137543163, "grad_norm": 0.0709729790687561, "learning_rate": 1.929832287877486e-05, "loss": 0.508579432964325, "step": 2378 }, { "epoch": 0.4395837904632121, "grad_norm": 0.06732675433158875, "learning_rate": 1.9297588597671164e-05, "loss": 0.5655678510665894, "step": 2379 }, { "epoch": 0.43976856717210794, "grad_norm": 0.05913890525698662, "learning_rate": 1.9296853946552532e-05, "loss": 0.5239315629005432, "step": 2380 }, { "epoch": 0.4399533438810038, "grad_norm": 0.06641931086778641, "learning_rate": 1.92961189254482e-05, "loss": 0.5648493766784668, "step": 2381 }, { "epoch": 0.44013812058989965, "grad_norm": 0.0653366819024086, "learning_rate": 1.9295383534387416e-05, "loss": 0.6413757801055908, "step": 2382 }, { "epoch": 0.4403228972987955, "grad_norm": 0.06733138859272003, "learning_rate": 1.929464777339945e-05, "loss": 0.4827044904232025, "step": 2383 }, { "epoch": 0.4405076740076913, "grad_norm": 0.08121521770954132, "learning_rate": 1.9293911642513585e-05, "loss": 0.6802595257759094, "step": 2384 }, { "epoch": 0.4406924507165872, "grad_norm": 0.06312023103237152, "learning_rate": 1.9293175141759107e-05, "loss": 0.5452331900596619, "step": 2385 }, { "epoch": 0.440877227425483, "grad_norm": 0.08019667118787766, "learning_rate": 1.9292438271165335e-05, "loss": 0.7767255902290344, "step": 2386 }, { "epoch": 0.44106200413437885, "grad_norm": 0.08663544803857803, "learning_rate": 1.9291701030761597e-05, "loss": 0.9065479636192322, "step": 2387 }, { "epoch": 0.44124678084327473, "grad_norm": 0.07809998840093613, "learning_rate": 1.9290963420577223e-05, "loss": 0.7812683582305908, "step": 2388 }, { "epoch": 0.44143155755217056, "grad_norm": 0.08127256482839584, "learning_rate": 1.9290225440641574e-05, "loss": 0.7572963237762451, "step": 2389 }, { "epoch": 0.4416163342610664, "grad_norm": 0.06240149214863777, "learning_rate": 1.9289487090984017e-05, "loss": 0.5791239142417908, "step": 2390 }, { "epoch": 0.4418011109699622, "grad_norm": 0.07807501405477524, "learning_rate": 1.928874837163394e-05, "loss": 0.6849839091300964, "step": 2391 }, { "epoch": 0.4419858876788581, "grad_norm": 0.07311359792947769, "learning_rate": 1.9288009282620736e-05, "loss": 0.708427369594574, "step": 2392 }, { "epoch": 0.44217066438775393, "grad_norm": 0.05785594880580902, "learning_rate": 1.928726982397382e-05, "loss": 0.5417725443840027, "step": 2393 }, { "epoch": 0.44235544109664976, "grad_norm": 0.06097231060266495, "learning_rate": 1.9286529995722624e-05, "loss": 0.5638762712478638, "step": 2394 }, { "epoch": 0.4425402178055456, "grad_norm": 0.07345853000879288, "learning_rate": 1.9285789797896587e-05, "loss": 0.5357245802879333, "step": 2395 }, { "epoch": 0.44272499451444147, "grad_norm": 0.09665371477603912, "learning_rate": 1.9285049230525166e-05, "loss": 0.8695791959762573, "step": 2396 }, { "epoch": 0.4429097712233373, "grad_norm": 0.06887251883745193, "learning_rate": 1.928430829363784e-05, "loss": 0.7601022124290466, "step": 2397 }, { "epoch": 0.44309454793223313, "grad_norm": 0.09401489794254303, "learning_rate": 1.9283566987264083e-05, "loss": 0.9063498377799988, "step": 2398 }, { "epoch": 0.443279324641129, "grad_norm": 0.08337516337633133, "learning_rate": 1.9282825311433408e-05, "loss": 0.6751862168312073, "step": 2399 }, { "epoch": 0.44346410135002484, "grad_norm": 0.08896543830633163, "learning_rate": 1.928208326617533e-05, "loss": 0.6873741149902344, "step": 2400 }, { "epoch": 0.44364887805892067, "grad_norm": 0.0696672722697258, "learning_rate": 1.9281340851519373e-05, "loss": 0.6347296237945557, "step": 2401 }, { "epoch": 0.4438336547678165, "grad_norm": 0.05829437077045441, "learning_rate": 1.928059806749509e-05, "loss": 0.63972008228302, "step": 2402 }, { "epoch": 0.4440184314767124, "grad_norm": 0.07010830193758011, "learning_rate": 1.927985491413204e-05, "loss": 0.6212910413742065, "step": 2403 }, { "epoch": 0.4442032081856082, "grad_norm": 0.07966028898954391, "learning_rate": 1.9279111391459797e-05, "loss": 0.6606336236000061, "step": 2404 }, { "epoch": 0.44438798489450404, "grad_norm": 0.0820903331041336, "learning_rate": 1.927836749950795e-05, "loss": 0.6804654002189636, "step": 2405 }, { "epoch": 0.44457276160339987, "grad_norm": 0.08157593756914139, "learning_rate": 1.927762323830611e-05, "loss": 0.6042102575302124, "step": 2406 }, { "epoch": 0.44475753831229575, "grad_norm": 0.07228659093379974, "learning_rate": 1.9276878607883886e-05, "loss": 0.6968677043914795, "step": 2407 }, { "epoch": 0.4449423150211916, "grad_norm": 0.059456437826156616, "learning_rate": 1.927613360827092e-05, "loss": 0.5671892166137695, "step": 2408 }, { "epoch": 0.4451270917300874, "grad_norm": 0.07476317882537842, "learning_rate": 1.9275388239496854e-05, "loss": 0.6846385598182678, "step": 2409 }, { "epoch": 0.4453118684389833, "grad_norm": 0.06802140921354294, "learning_rate": 1.9274642501591358e-05, "loss": 0.42480891942977905, "step": 2410 }, { "epoch": 0.4454966451478791, "grad_norm": 0.07595323771238327, "learning_rate": 1.9273896394584103e-05, "loss": 0.7496429681777954, "step": 2411 }, { "epoch": 0.44568142185677495, "grad_norm": 0.06730681657791138, "learning_rate": 1.927314991850479e-05, "loss": 0.5631354451179504, "step": 2412 }, { "epoch": 0.4458661985656708, "grad_norm": 0.09018740803003311, "learning_rate": 1.927240307338312e-05, "loss": 0.6622211933135986, "step": 2413 }, { "epoch": 0.44605097527456666, "grad_norm": 0.05780932679772377, "learning_rate": 1.927165585924882e-05, "loss": 0.47575706243515015, "step": 2414 }, { "epoch": 0.4462357519834625, "grad_norm": 0.07823736220598221, "learning_rate": 1.9270908276131623e-05, "loss": 0.7001676559448242, "step": 2415 }, { "epoch": 0.4464205286923583, "grad_norm": 0.07646199315786362, "learning_rate": 1.927016032406128e-05, "loss": 0.7157690525054932, "step": 2416 }, { "epoch": 0.44660530540125415, "grad_norm": 0.09840088337659836, "learning_rate": 1.926941200306756e-05, "loss": 0.8731129169464111, "step": 2417 }, { "epoch": 0.44679008211015003, "grad_norm": 0.07829701900482178, "learning_rate": 1.9268663313180244e-05, "loss": 0.6255185008049011, "step": 2418 }, { "epoch": 0.44697485881904586, "grad_norm": 0.08753079921007156, "learning_rate": 1.9267914254429125e-05, "loss": 0.8361132144927979, "step": 2419 }, { "epoch": 0.4471596355279417, "grad_norm": 0.057597216218709946, "learning_rate": 1.926716482684401e-05, "loss": 0.5651022791862488, "step": 2420 }, { "epoch": 0.4473444122368376, "grad_norm": 0.07024961709976196, "learning_rate": 1.9266415030454734e-05, "loss": 0.695307195186615, "step": 2421 }, { "epoch": 0.4475291889457334, "grad_norm": 0.0742584764957428, "learning_rate": 1.9265664865291128e-05, "loss": 0.6812407374382019, "step": 2422 }, { "epoch": 0.44771396565462923, "grad_norm": 0.06642390042543411, "learning_rate": 1.9264914331383047e-05, "loss": 0.5371487140655518, "step": 2423 }, { "epoch": 0.44789874236352506, "grad_norm": 0.0593610443174839, "learning_rate": 1.9264163428760366e-05, "loss": 0.4845843017101288, "step": 2424 }, { "epoch": 0.44808351907242094, "grad_norm": 0.08533383905887604, "learning_rate": 1.9263412157452964e-05, "loss": 0.6560807228088379, "step": 2425 }, { "epoch": 0.4482682957813168, "grad_norm": 0.06368084996938705, "learning_rate": 1.9262660517490735e-05, "loss": 0.3997742831707001, "step": 2426 }, { "epoch": 0.4484530724902126, "grad_norm": 0.06924302130937576, "learning_rate": 1.9261908508903603e-05, "loss": 0.4483805298805237, "step": 2427 }, { "epoch": 0.44863784919910843, "grad_norm": 0.07959199696779251, "learning_rate": 1.9261156131721485e-05, "loss": 0.792576253414154, "step": 2428 }, { "epoch": 0.4488226259080043, "grad_norm": 0.059455327689647675, "learning_rate": 1.9260403385974328e-05, "loss": 0.44581228494644165, "step": 2429 }, { "epoch": 0.44900740261690014, "grad_norm": 0.07160536199808121, "learning_rate": 1.9259650271692084e-05, "loss": 0.745840311050415, "step": 2430 }, { "epoch": 0.44919217932579597, "grad_norm": 0.07705884426832199, "learning_rate": 1.9258896788904734e-05, "loss": 0.8486708402633667, "step": 2431 }, { "epoch": 0.44937695603469185, "grad_norm": 0.069585882127285, "learning_rate": 1.925814293764226e-05, "loss": 0.5814905762672424, "step": 2432 }, { "epoch": 0.4495617327435877, "grad_norm": 0.07240708917379379, "learning_rate": 1.925738871793466e-05, "loss": 0.5621956586837769, "step": 2433 }, { "epoch": 0.4497465094524835, "grad_norm": 0.0654572919011116, "learning_rate": 1.9256634129811954e-05, "loss": 0.5260647535324097, "step": 2434 }, { "epoch": 0.44993128616137934, "grad_norm": 0.0821259617805481, "learning_rate": 1.925587917330417e-05, "loss": 0.5811529755592346, "step": 2435 }, { "epoch": 0.4501160628702752, "grad_norm": 0.08183261007070541, "learning_rate": 1.9255123848441347e-05, "loss": 0.7321382164955139, "step": 2436 }, { "epoch": 0.45030083957917105, "grad_norm": 0.0739772692322731, "learning_rate": 1.925436815525356e-05, "loss": 0.5492620468139648, "step": 2437 }, { "epoch": 0.4504856162880669, "grad_norm": 0.08293473720550537, "learning_rate": 1.9253612093770865e-05, "loss": 0.7169588208198547, "step": 2438 }, { "epoch": 0.4506703929969627, "grad_norm": 0.06674166768789291, "learning_rate": 1.925285566402336e-05, "loss": 0.693142294883728, "step": 2439 }, { "epoch": 0.4508551697058586, "grad_norm": 0.07635101675987244, "learning_rate": 1.9252098866041152e-05, "loss": 0.5711791515350342, "step": 2440 }, { "epoch": 0.4510399464147544, "grad_norm": 0.0930107980966568, "learning_rate": 1.9251341699854354e-05, "loss": 0.7992475032806396, "step": 2441 }, { "epoch": 0.45122472312365025, "grad_norm": 0.08799722790718079, "learning_rate": 1.9250584165493102e-05, "loss": 0.8901659250259399, "step": 2442 }, { "epoch": 0.45140949983254613, "grad_norm": 0.074408158659935, "learning_rate": 1.924982626298754e-05, "loss": 0.7303105592727661, "step": 2443 }, { "epoch": 0.45159427654144196, "grad_norm": 0.08466403186321259, "learning_rate": 1.924906799236783e-05, "loss": 0.7405203580856323, "step": 2444 }, { "epoch": 0.4517790532503378, "grad_norm": 0.06970347464084625, "learning_rate": 1.924830935366415e-05, "loss": 0.6607108116149902, "step": 2445 }, { "epoch": 0.4519638299592336, "grad_norm": 0.0749702900648117, "learning_rate": 1.9247550346906692e-05, "loss": 0.7693953514099121, "step": 2446 }, { "epoch": 0.4521486066681295, "grad_norm": 0.09294398874044418, "learning_rate": 1.924679097212567e-05, "loss": 0.8226107954978943, "step": 2447 }, { "epoch": 0.45233338337702533, "grad_norm": 0.06877472251653671, "learning_rate": 1.9246031229351287e-05, "loss": 0.6727323532104492, "step": 2448 }, { "epoch": 0.45251816008592116, "grad_norm": 0.06864230334758759, "learning_rate": 1.9245271118613792e-05, "loss": 0.6496172547340393, "step": 2449 }, { "epoch": 0.452702936794817, "grad_norm": 0.06905477494001389, "learning_rate": 1.9244510639943433e-05, "loss": 0.46779608726501465, "step": 2450 }, { "epoch": 0.4528877135037129, "grad_norm": 0.06438469886779785, "learning_rate": 1.924374979337047e-05, "loss": 0.5262128710746765, "step": 2451 }, { "epoch": 0.4530724902126087, "grad_norm": 0.06250059604644775, "learning_rate": 1.9242988578925185e-05, "loss": 0.4987810552120209, "step": 2452 }, { "epoch": 0.45325726692150453, "grad_norm": 0.06168653443455696, "learning_rate": 1.9242226996637873e-05, "loss": 0.5915943384170532, "step": 2453 }, { "epoch": 0.4534420436304004, "grad_norm": 0.07355090975761414, "learning_rate": 1.9241465046538843e-05, "loss": 0.7472153902053833, "step": 2454 }, { "epoch": 0.45362682033929624, "grad_norm": 0.06379469484090805, "learning_rate": 1.9240702728658415e-05, "loss": 0.5391486287117004, "step": 2455 }, { "epoch": 0.4538115970481921, "grad_norm": 0.06752083450555801, "learning_rate": 1.923994004302693e-05, "loss": 0.6245964765548706, "step": 2456 }, { "epoch": 0.4539963737570879, "grad_norm": 0.07804492115974426, "learning_rate": 1.923917698967474e-05, "loss": 0.6221499443054199, "step": 2457 }, { "epoch": 0.4541811504659838, "grad_norm": 0.07160136848688126, "learning_rate": 1.923841356863221e-05, "loss": 0.8083291053771973, "step": 2458 }, { "epoch": 0.4543659271748796, "grad_norm": 0.08072566241025925, "learning_rate": 1.9237649779929724e-05, "loss": 0.9119392037391663, "step": 2459 }, { "epoch": 0.45455070388377544, "grad_norm": 0.07465825974941254, "learning_rate": 1.9236885623597678e-05, "loss": 0.6454877853393555, "step": 2460 }, { "epoch": 0.45473548059267127, "grad_norm": 0.07917649298906326, "learning_rate": 1.9236121099666482e-05, "loss": 0.7718840837478638, "step": 2461 }, { "epoch": 0.45492025730156715, "grad_norm": 0.07406383752822876, "learning_rate": 1.923535620816656e-05, "loss": 0.633132815361023, "step": 2462 }, { "epoch": 0.455105034010463, "grad_norm": 0.08593975752592087, "learning_rate": 1.923459094912836e-05, "loss": 0.7576243877410889, "step": 2463 }, { "epoch": 0.4552898107193588, "grad_norm": 0.07461173832416534, "learning_rate": 1.923382532258233e-05, "loss": 0.6283621191978455, "step": 2464 }, { "epoch": 0.4554745874282547, "grad_norm": 0.0630280002951622, "learning_rate": 1.9233059328558942e-05, "loss": 0.4881044328212738, "step": 2465 }, { "epoch": 0.4556593641371505, "grad_norm": 0.06719866394996643, "learning_rate": 1.9232292967088673e-05, "loss": 0.6233997941017151, "step": 2466 }, { "epoch": 0.45584414084604635, "grad_norm": 0.07608038932085037, "learning_rate": 1.9231526238202034e-05, "loss": 0.7199850082397461, "step": 2467 }, { "epoch": 0.4560289175549422, "grad_norm": 0.07195864617824554, "learning_rate": 1.923075914192953e-05, "loss": 0.6179254055023193, "step": 2468 }, { "epoch": 0.45621369426383807, "grad_norm": 0.07848989963531494, "learning_rate": 1.9229991678301693e-05, "loss": 0.8123495578765869, "step": 2469 }, { "epoch": 0.4563984709727339, "grad_norm": 0.06677235662937164, "learning_rate": 1.9229223847349062e-05, "loss": 0.7239199280738831, "step": 2470 }, { "epoch": 0.4565832476816297, "grad_norm": 0.06927766650915146, "learning_rate": 1.92284556491022e-05, "loss": 0.664320707321167, "step": 2471 }, { "epoch": 0.45676802439052555, "grad_norm": 0.0737115889787674, "learning_rate": 1.922768708359167e-05, "loss": 0.6365139484405518, "step": 2472 }, { "epoch": 0.45695280109942144, "grad_norm": 0.09078367799520493, "learning_rate": 1.9226918150848067e-05, "loss": 0.7377526164054871, "step": 2473 }, { "epoch": 0.45713757780831726, "grad_norm": 0.06729824095964432, "learning_rate": 1.922614885090199e-05, "loss": 0.6383175253868103, "step": 2474 }, { "epoch": 0.4573223545172131, "grad_norm": 0.08023769408464432, "learning_rate": 1.9225379183784052e-05, "loss": 0.6124799251556396, "step": 2475 }, { "epoch": 0.457507131226109, "grad_norm": 0.08397477865219116, "learning_rate": 1.9224609149524887e-05, "loss": 0.9499881267547607, "step": 2476 }, { "epoch": 0.4576919079350048, "grad_norm": 0.08595887571573257, "learning_rate": 1.9223838748155133e-05, "loss": 0.6891126036643982, "step": 2477 }, { "epoch": 0.45787668464390063, "grad_norm": 0.0752493292093277, "learning_rate": 1.922306797970546e-05, "loss": 0.6955389380455017, "step": 2478 }, { "epoch": 0.45806146135279646, "grad_norm": 0.07774579524993896, "learning_rate": 1.9222296844206533e-05, "loss": 0.7269667983055115, "step": 2479 }, { "epoch": 0.45824623806169235, "grad_norm": 0.07733689248561859, "learning_rate": 1.922152534168905e-05, "loss": 0.9509046077728271, "step": 2480 }, { "epoch": 0.4584310147705882, "grad_norm": 0.060073934495449066, "learning_rate": 1.9220753472183702e-05, "loss": 0.5735282897949219, "step": 2481 }, { "epoch": 0.458615791479484, "grad_norm": 0.08871357142925262, "learning_rate": 1.9219981235721216e-05, "loss": 0.7650564908981323, "step": 2482 }, { "epoch": 0.45880056818837983, "grad_norm": 0.06762327253818512, "learning_rate": 1.9219208632332324e-05, "loss": 0.6746568083763123, "step": 2483 }, { "epoch": 0.4589853448972757, "grad_norm": 0.0836707055568695, "learning_rate": 1.921843566204777e-05, "loss": 0.6644420027732849, "step": 2484 }, { "epoch": 0.45917012160617154, "grad_norm": 0.05917372927069664, "learning_rate": 1.9217662324898318e-05, "loss": 0.49475622177124023, "step": 2485 }, { "epoch": 0.4593548983150674, "grad_norm": 0.06416773051023483, "learning_rate": 1.9216888620914743e-05, "loss": 0.6015645265579224, "step": 2486 }, { "epoch": 0.45953967502396326, "grad_norm": 0.08696015924215317, "learning_rate": 1.921611455012784e-05, "loss": 0.9279436469078064, "step": 2487 }, { "epoch": 0.4597244517328591, "grad_norm": 0.08332741260528564, "learning_rate": 1.9215340112568407e-05, "loss": 0.7063831090927124, "step": 2488 }, { "epoch": 0.4599092284417549, "grad_norm": 0.07548101246356964, "learning_rate": 1.921456530826727e-05, "loss": 0.674170196056366, "step": 2489 }, { "epoch": 0.46009400515065074, "grad_norm": 0.06416136771440506, "learning_rate": 1.9213790137255267e-05, "loss": 0.5054815411567688, "step": 2490 }, { "epoch": 0.4602787818595466, "grad_norm": 0.08905654400587082, "learning_rate": 1.9213014599563238e-05, "loss": 0.8949898481369019, "step": 2491 }, { "epoch": 0.46046355856844245, "grad_norm": 0.08505464345216751, "learning_rate": 1.9212238695222054e-05, "loss": 0.7196219563484192, "step": 2492 }, { "epoch": 0.4606483352773383, "grad_norm": 0.0721084251999855, "learning_rate": 1.921146242426259e-05, "loss": 0.567509651184082, "step": 2493 }, { "epoch": 0.4608331119862341, "grad_norm": 0.08344163745641708, "learning_rate": 1.921068578671574e-05, "loss": 0.7773234844207764, "step": 2494 }, { "epoch": 0.46101788869513, "grad_norm": 0.07499945163726807, "learning_rate": 1.9209908782612415e-05, "loss": 0.726072371006012, "step": 2495 }, { "epoch": 0.4612026654040258, "grad_norm": 0.07331238687038422, "learning_rate": 1.920913141198353e-05, "loss": 0.6004643440246582, "step": 2496 }, { "epoch": 0.46138744211292165, "grad_norm": 0.0997093915939331, "learning_rate": 1.9208353674860028e-05, "loss": 1.0000945329666138, "step": 2497 }, { "epoch": 0.46157221882181754, "grad_norm": 0.09368643164634705, "learning_rate": 1.920757557127286e-05, "loss": 0.8160755038261414, "step": 2498 }, { "epoch": 0.46175699553071337, "grad_norm": 0.07633251696825027, "learning_rate": 1.9206797101252993e-05, "loss": 0.7055872678756714, "step": 2499 }, { "epoch": 0.4619417722396092, "grad_norm": 0.06902278959751129, "learning_rate": 1.92060182648314e-05, "loss": 0.667657196521759, "step": 2500 }, { "epoch": 0.4619417722396092, "eval_loss": 0.7116170525550842, "eval_runtime": 157.2484, "eval_samples_per_second": 115.925, "eval_steps_per_second": 14.493, "step": 2500 }, { "epoch": 0.462126548948505, "grad_norm": 0.06343262642621994, "learning_rate": 1.920523906203909e-05, "loss": 0.5842658281326294, "step": 2501 }, { "epoch": 0.4623113256574009, "grad_norm": 0.06518920511007309, "learning_rate": 1.920445949290706e-05, "loss": 0.6732863187789917, "step": 2502 }, { "epoch": 0.46249610236629674, "grad_norm": 0.06654220074415207, "learning_rate": 1.9203679557466338e-05, "loss": 0.6887181997299194, "step": 2503 }, { "epoch": 0.46268087907519256, "grad_norm": 0.07807012647390366, "learning_rate": 1.9202899255747967e-05, "loss": 0.6344214677810669, "step": 2504 }, { "epoch": 0.4628656557840884, "grad_norm": 0.06473081558942795, "learning_rate": 1.9202118587782996e-05, "loss": 0.734042763710022, "step": 2505 }, { "epoch": 0.4630504324929843, "grad_norm": 0.08167917281389236, "learning_rate": 1.9201337553602496e-05, "loss": 0.867456316947937, "step": 2506 }, { "epoch": 0.4632352092018801, "grad_norm": 0.0646093562245369, "learning_rate": 1.9200556153237547e-05, "loss": 0.46739864349365234, "step": 2507 }, { "epoch": 0.46341998591077593, "grad_norm": 0.06381334364414215, "learning_rate": 1.919977438671925e-05, "loss": 0.5306061506271362, "step": 2508 }, { "epoch": 0.4636047626196718, "grad_norm": 0.07060441374778748, "learning_rate": 1.9198992254078715e-05, "loss": 0.7806938290596008, "step": 2509 }, { "epoch": 0.46378953932856765, "grad_norm": 0.07286828011274338, "learning_rate": 1.9198209755347065e-05, "loss": 0.7457721829414368, "step": 2510 }, { "epoch": 0.4639743160374635, "grad_norm": 0.0571209117770195, "learning_rate": 1.919742689055545e-05, "loss": 0.5006831884384155, "step": 2511 }, { "epoch": 0.4641590927463593, "grad_norm": 0.07706863433122635, "learning_rate": 1.9196643659735016e-05, "loss": 0.6784906983375549, "step": 2512 }, { "epoch": 0.4643438694552552, "grad_norm": 0.06896209716796875, "learning_rate": 1.9195860062916937e-05, "loss": 0.721760094165802, "step": 2513 }, { "epoch": 0.464528646164151, "grad_norm": 0.07125797867774963, "learning_rate": 1.91950761001324e-05, "loss": 0.6727914810180664, "step": 2514 }, { "epoch": 0.46471342287304684, "grad_norm": 0.08965711295604706, "learning_rate": 1.9194291771412596e-05, "loss": 0.8182405233383179, "step": 2515 }, { "epoch": 0.4648981995819427, "grad_norm": 0.046130772680044174, "learning_rate": 1.919350707678875e-05, "loss": 0.4636189341545105, "step": 2516 }, { "epoch": 0.46508297629083856, "grad_norm": 0.09742356091737747, "learning_rate": 1.9192722016292086e-05, "loss": 0.7898565530776978, "step": 2517 }, { "epoch": 0.4652677529997344, "grad_norm": 0.08445709943771362, "learning_rate": 1.919193658995384e-05, "loss": 0.7934099435806274, "step": 2518 }, { "epoch": 0.4654525297086302, "grad_norm": 0.07222909480333328, "learning_rate": 1.9191150797805283e-05, "loss": 0.6173120737075806, "step": 2519 }, { "epoch": 0.4656373064175261, "grad_norm": 0.08419971913099289, "learning_rate": 1.9190364639877674e-05, "loss": 0.6999402046203613, "step": 2520 }, { "epoch": 0.4658220831264219, "grad_norm": 0.09709232300519943, "learning_rate": 1.918957811620231e-05, "loss": 0.7624440789222717, "step": 2521 }, { "epoch": 0.46600685983531775, "grad_norm": 0.07338286936283112, "learning_rate": 1.918879122681048e-05, "loss": 0.6639955639839172, "step": 2522 }, { "epoch": 0.4661916365442136, "grad_norm": 0.07687751948833466, "learning_rate": 1.9188003971733515e-05, "loss": 0.8092284798622131, "step": 2523 }, { "epoch": 0.46637641325310947, "grad_norm": 0.07229873538017273, "learning_rate": 1.9187216351002734e-05, "loss": 0.5076743960380554, "step": 2524 }, { "epoch": 0.4665611899620053, "grad_norm": 0.06916353851556778, "learning_rate": 1.9186428364649486e-05, "loss": 0.732584536075592, "step": 2525 }, { "epoch": 0.4667459666709011, "grad_norm": 0.0701894760131836, "learning_rate": 1.9185640012705133e-05, "loss": 0.724190354347229, "step": 2526 }, { "epoch": 0.46693074337979695, "grad_norm": 0.07493194937705994, "learning_rate": 1.918485129520104e-05, "loss": 0.69194495677948, "step": 2527 }, { "epoch": 0.46711552008869284, "grad_norm": 0.08086828887462616, "learning_rate": 1.9184062212168605e-05, "loss": 0.7124866843223572, "step": 2528 }, { "epoch": 0.46730029679758867, "grad_norm": 0.07317820936441422, "learning_rate": 1.9183272763639223e-05, "loss": 0.695803165435791, "step": 2529 }, { "epoch": 0.4674850735064845, "grad_norm": 0.07890819758176804, "learning_rate": 1.918248294964432e-05, "loss": 0.5504632592201233, "step": 2530 }, { "epoch": 0.4676698502153804, "grad_norm": 0.07478110492229462, "learning_rate": 1.9181692770215324e-05, "loss": 0.7427833080291748, "step": 2531 }, { "epoch": 0.4678546269242762, "grad_norm": 0.0800105631351471, "learning_rate": 1.9180902225383677e-05, "loss": 0.755503237247467, "step": 2532 }, { "epoch": 0.46803940363317204, "grad_norm": 0.06926467269659042, "learning_rate": 1.918011131518085e-05, "loss": 0.5626682043075562, "step": 2533 }, { "epoch": 0.46822418034206786, "grad_norm": 0.0702790766954422, "learning_rate": 1.917932003963831e-05, "loss": 0.7690215110778809, "step": 2534 }, { "epoch": 0.46840895705096375, "grad_norm": 0.08173204958438873, "learning_rate": 1.9178528398787553e-05, "loss": 0.7654154896736145, "step": 2535 }, { "epoch": 0.4685937337598596, "grad_norm": 0.08061196655035019, "learning_rate": 1.917773639266008e-05, "loss": 0.7449684739112854, "step": 2536 }, { "epoch": 0.4687785104687554, "grad_norm": 0.06667713820934296, "learning_rate": 1.917694402128741e-05, "loss": 0.5902036428451538, "step": 2537 }, { "epoch": 0.46896328717765123, "grad_norm": 0.08062142133712769, "learning_rate": 1.9176151284701086e-05, "loss": 0.7364373207092285, "step": 2538 }, { "epoch": 0.4691480638865471, "grad_norm": 0.06273316591978073, "learning_rate": 1.9175358182932643e-05, "loss": 0.522224485874176, "step": 2539 }, { "epoch": 0.46933284059544295, "grad_norm": 0.06959912925958633, "learning_rate": 1.9174564716013653e-05, "loss": 0.5129040479660034, "step": 2540 }, { "epoch": 0.4695176173043388, "grad_norm": 0.07042738795280457, "learning_rate": 1.917377088397569e-05, "loss": 0.7187867164611816, "step": 2541 }, { "epoch": 0.46970239401323466, "grad_norm": 0.08257179707288742, "learning_rate": 1.9172976686850345e-05, "loss": 0.8150436878204346, "step": 2542 }, { "epoch": 0.4698871707221305, "grad_norm": 0.07191328704357147, "learning_rate": 1.917218212466923e-05, "loss": 0.5697478652000427, "step": 2543 }, { "epoch": 0.4700719474310263, "grad_norm": 0.05890669673681259, "learning_rate": 1.917138719746396e-05, "loss": 0.4980667531490326, "step": 2544 }, { "epoch": 0.47025672413992214, "grad_norm": 0.08073609322309494, "learning_rate": 1.9170591905266176e-05, "loss": 0.6568699479103088, "step": 2545 }, { "epoch": 0.47044150084881803, "grad_norm": 0.06748133897781372, "learning_rate": 1.9169796248107524e-05, "loss": 0.6333909034729004, "step": 2546 }, { "epoch": 0.47062627755771386, "grad_norm": 0.07974343746900558, "learning_rate": 1.916900022601967e-05, "loss": 0.7419365048408508, "step": 2547 }, { "epoch": 0.4708110542666097, "grad_norm": 0.08520668745040894, "learning_rate": 1.9168203839034292e-05, "loss": 0.6401491761207581, "step": 2548 }, { "epoch": 0.4709958309755055, "grad_norm": 0.061686594039201736, "learning_rate": 1.9167407087183087e-05, "loss": 0.573703944683075, "step": 2549 }, { "epoch": 0.4711806076844014, "grad_norm": 0.09480550140142441, "learning_rate": 1.9166609970497755e-05, "loss": 0.8099426031112671, "step": 2550 }, { "epoch": 0.4713653843932972, "grad_norm": 0.07680436223745346, "learning_rate": 1.916581248901003e-05, "loss": 0.6837723851203918, "step": 2551 }, { "epoch": 0.47155016110219306, "grad_norm": 0.08173999935388565, "learning_rate": 1.9165014642751645e-05, "loss": 0.8117296695709229, "step": 2552 }, { "epoch": 0.47173493781108894, "grad_norm": 0.0909331738948822, "learning_rate": 1.916421643175435e-05, "loss": 0.8888765573501587, "step": 2553 }, { "epoch": 0.47191971451998477, "grad_norm": 0.08059228211641312, "learning_rate": 1.916341785604991e-05, "loss": 0.7117865681648254, "step": 2554 }, { "epoch": 0.4721044912288806, "grad_norm": 0.07878051698207855, "learning_rate": 1.9162618915670112e-05, "loss": 0.7551975846290588, "step": 2555 }, { "epoch": 0.4722892679377764, "grad_norm": 0.06273041665554047, "learning_rate": 1.916181961064674e-05, "loss": 0.4910038113594055, "step": 2556 }, { "epoch": 0.4724740446466723, "grad_norm": 0.07543109357357025, "learning_rate": 1.916101994101162e-05, "loss": 0.6243723630905151, "step": 2557 }, { "epoch": 0.47265882135556814, "grad_norm": 0.0721454918384552, "learning_rate": 1.916021990679656e-05, "loss": 0.5600568056106567, "step": 2558 }, { "epoch": 0.47284359806446397, "grad_norm": 0.08377529680728912, "learning_rate": 1.915941950803341e-05, "loss": 0.7743291258811951, "step": 2559 }, { "epoch": 0.4730283747733598, "grad_norm": 0.08089539408683777, "learning_rate": 1.915861874475402e-05, "loss": 0.5809829235076904, "step": 2560 }, { "epoch": 0.4732131514822557, "grad_norm": 0.07542770355939865, "learning_rate": 1.915781761699026e-05, "loss": 0.6139705777168274, "step": 2561 }, { "epoch": 0.4733979281911515, "grad_norm": 0.09735579043626785, "learning_rate": 1.9157016124774004e-05, "loss": 0.5442507266998291, "step": 2562 }, { "epoch": 0.47358270490004734, "grad_norm": 0.06749841570854187, "learning_rate": 1.915621426813716e-05, "loss": 0.6063088774681091, "step": 2563 }, { "epoch": 0.4737674816089432, "grad_norm": 0.07153279334306717, "learning_rate": 1.915541204711163e-05, "loss": 0.6011493802070618, "step": 2564 }, { "epoch": 0.47395225831783905, "grad_norm": 0.07286897301673889, "learning_rate": 1.9154609461729348e-05, "loss": 0.7002130150794983, "step": 2565 }, { "epoch": 0.4741370350267349, "grad_norm": 0.0901065319776535, "learning_rate": 1.9153806512022248e-05, "loss": 0.8844193816184998, "step": 2566 }, { "epoch": 0.4743218117356307, "grad_norm": 0.06940233707427979, "learning_rate": 1.9153003198022286e-05, "loss": 0.6113611459732056, "step": 2567 }, { "epoch": 0.4745065884445266, "grad_norm": 0.07977228611707687, "learning_rate": 1.9152199519761436e-05, "loss": 0.7797371745109558, "step": 2568 }, { "epoch": 0.4746913651534224, "grad_norm": 0.0750238448381424, "learning_rate": 1.9151395477271675e-05, "loss": 0.5979514718055725, "step": 2569 }, { "epoch": 0.47487614186231825, "grad_norm": 0.08806634694337845, "learning_rate": 1.9150591070585012e-05, "loss": 0.7285597324371338, "step": 2570 }, { "epoch": 0.4750609185712141, "grad_norm": 0.09475602209568024, "learning_rate": 1.914978629973345e-05, "loss": 0.7155942916870117, "step": 2571 }, { "epoch": 0.47524569528010996, "grad_norm": 0.07977049797773361, "learning_rate": 1.9148981164749013e-05, "loss": 0.6524484753608704, "step": 2572 }, { "epoch": 0.4754304719890058, "grad_norm": 0.09101420640945435, "learning_rate": 1.9148175665663755e-05, "loss": 1.1290339231491089, "step": 2573 }, { "epoch": 0.4756152486979016, "grad_norm": 0.09440110623836517, "learning_rate": 1.9147369802509725e-05, "loss": 0.9461732506752014, "step": 2574 }, { "epoch": 0.4758000254067975, "grad_norm": 0.08689321577548981, "learning_rate": 1.9146563575318997e-05, "loss": 0.7111562490463257, "step": 2575 }, { "epoch": 0.47598480211569333, "grad_norm": 0.07283070683479309, "learning_rate": 1.9145756984123653e-05, "loss": 0.6966633796691895, "step": 2576 }, { "epoch": 0.47616957882458916, "grad_norm": 0.07687104493379593, "learning_rate": 1.9144950028955795e-05, "loss": 0.7655548453330994, "step": 2577 }, { "epoch": 0.476354355533485, "grad_norm": 0.07238690555095673, "learning_rate": 1.914414270984754e-05, "loss": 0.7400521636009216, "step": 2578 }, { "epoch": 0.47653913224238087, "grad_norm": 0.0622221976518631, "learning_rate": 1.9143335026831008e-05, "loss": 0.4354769289493561, "step": 2579 }, { "epoch": 0.4767239089512767, "grad_norm": 0.06557375937700272, "learning_rate": 1.914252697993835e-05, "loss": 0.505308210849762, "step": 2580 }, { "epoch": 0.4769086856601725, "grad_norm": 0.08358443528413773, "learning_rate": 1.914171856920172e-05, "loss": 0.716081440448761, "step": 2581 }, { "epoch": 0.47709346236906836, "grad_norm": 0.060606375336647034, "learning_rate": 1.9140909794653293e-05, "loss": 0.4815409481525421, "step": 2582 }, { "epoch": 0.47727823907796424, "grad_norm": 0.06997659057378769, "learning_rate": 1.9140100656325254e-05, "loss": 0.6048935055732727, "step": 2583 }, { "epoch": 0.47746301578686007, "grad_norm": 0.07896342128515244, "learning_rate": 1.9139291154249802e-05, "loss": 0.5597572922706604, "step": 2584 }, { "epoch": 0.4776477924957559, "grad_norm": 0.05853699892759323, "learning_rate": 1.9138481288459162e-05, "loss": 0.4828832447528839, "step": 2585 }, { "epoch": 0.4778325692046518, "grad_norm": 0.05594884976744652, "learning_rate": 1.9137671058985554e-05, "loss": 0.4123653173446655, "step": 2586 }, { "epoch": 0.4780173459135476, "grad_norm": 0.06755481660366058, "learning_rate": 1.9136860465861223e-05, "loss": 0.4792831838130951, "step": 2587 }, { "epoch": 0.47820212262244344, "grad_norm": 0.07026038318872452, "learning_rate": 1.9136049509118435e-05, "loss": 0.6120736598968506, "step": 2588 }, { "epoch": 0.47838689933133927, "grad_norm": 0.06810068339109421, "learning_rate": 1.9135238188789458e-05, "loss": 0.5771077275276184, "step": 2589 }, { "epoch": 0.47857167604023515, "grad_norm": 0.06867280602455139, "learning_rate": 1.9134426504906584e-05, "loss": 0.6913584470748901, "step": 2590 }, { "epoch": 0.478756452749131, "grad_norm": 0.08671563118696213, "learning_rate": 1.9133614457502106e-05, "loss": 0.7088780999183655, "step": 2591 }, { "epoch": 0.4789412294580268, "grad_norm": 0.08995692431926727, "learning_rate": 1.9132802046608353e-05, "loss": 0.716721773147583, "step": 2592 }, { "epoch": 0.47912600616692264, "grad_norm": 0.08103762567043304, "learning_rate": 1.9131989272257652e-05, "loss": 0.594050407409668, "step": 2593 }, { "epoch": 0.4793107828758185, "grad_norm": 0.08107825368642807, "learning_rate": 1.913117613448235e-05, "loss": 0.7606329321861267, "step": 2594 }, { "epoch": 0.47949555958471435, "grad_norm": 0.08666590601205826, "learning_rate": 1.9130362633314803e-05, "loss": 0.7732304930686951, "step": 2595 }, { "epoch": 0.4796803362936102, "grad_norm": 0.09467215836048126, "learning_rate": 1.9129548768787386e-05, "loss": 0.789035439491272, "step": 2596 }, { "epoch": 0.47986511300250606, "grad_norm": 0.09766748547554016, "learning_rate": 1.9128734540932494e-05, "loss": 0.8102937340736389, "step": 2597 }, { "epoch": 0.4800498897114019, "grad_norm": 0.08985164016485214, "learning_rate": 1.912791994978253e-05, "loss": 0.8242364525794983, "step": 2598 }, { "epoch": 0.4802346664202977, "grad_norm": 0.08733810484409332, "learning_rate": 1.9127104995369903e-05, "loss": 0.6473926901817322, "step": 2599 }, { "epoch": 0.48041944312919355, "grad_norm": 0.06384208053350449, "learning_rate": 1.9126289677727053e-05, "loss": 0.5705603957176208, "step": 2600 }, { "epoch": 0.48060421983808943, "grad_norm": 0.08345800638198853, "learning_rate": 1.9125473996886433e-05, "loss": 0.7921107411384583, "step": 2601 }, { "epoch": 0.48078899654698526, "grad_norm": 0.09258704632520676, "learning_rate": 1.912465795288049e-05, "loss": 0.8663824200630188, "step": 2602 }, { "epoch": 0.4809737732558811, "grad_norm": 0.08237463235855103, "learning_rate": 1.9123841545741712e-05, "loss": 0.6780328750610352, "step": 2603 }, { "epoch": 0.4811585499647769, "grad_norm": 0.09698188304901123, "learning_rate": 1.9123024775502586e-05, "loss": 0.9325119853019714, "step": 2604 }, { "epoch": 0.4813433266736728, "grad_norm": 0.057871270924806595, "learning_rate": 1.9122207642195617e-05, "loss": 0.46808913350105286, "step": 2605 }, { "epoch": 0.48152810338256863, "grad_norm": 0.06541048735380173, "learning_rate": 1.912139014585332e-05, "loss": 0.5878641605377197, "step": 2606 }, { "epoch": 0.48171288009146446, "grad_norm": 0.07617659121751785, "learning_rate": 1.912057228650823e-05, "loss": 0.7120945453643799, "step": 2607 }, { "epoch": 0.48189765680036034, "grad_norm": 0.08291994035243988, "learning_rate": 1.9119754064192904e-05, "loss": 0.4768596291542053, "step": 2608 }, { "epoch": 0.48208243350925617, "grad_norm": 0.0723223090171814, "learning_rate": 1.9118935478939896e-05, "loss": 0.7024025321006775, "step": 2609 }, { "epoch": 0.482267210218152, "grad_norm": 0.06848274171352386, "learning_rate": 1.9118116530781785e-05, "loss": 0.5369501709938049, "step": 2610 }, { "epoch": 0.4824519869270478, "grad_norm": 0.08580145984888077, "learning_rate": 1.9117297219751164e-05, "loss": 0.7307426929473877, "step": 2611 }, { "epoch": 0.4826367636359437, "grad_norm": 0.06089922785758972, "learning_rate": 1.9116477545880638e-05, "loss": 0.6530369520187378, "step": 2612 }, { "epoch": 0.48282154034483954, "grad_norm": 0.080387182533741, "learning_rate": 1.9115657509202824e-05, "loss": 0.7199724912643433, "step": 2613 }, { "epoch": 0.48300631705373537, "grad_norm": 0.07296282052993774, "learning_rate": 1.9114837109750367e-05, "loss": 0.5948304533958435, "step": 2614 }, { "epoch": 0.4831910937626312, "grad_norm": 0.0720793828368187, "learning_rate": 1.9114016347555905e-05, "loss": 0.5981985926628113, "step": 2615 }, { "epoch": 0.4833758704715271, "grad_norm": 0.07115527987480164, "learning_rate": 1.9113195222652105e-05, "loss": 0.6019927859306335, "step": 2616 }, { "epoch": 0.4835606471804229, "grad_norm": 0.08490081131458282, "learning_rate": 1.911237373507165e-05, "loss": 0.5547221899032593, "step": 2617 }, { "epoch": 0.48374542388931874, "grad_norm": 0.08143550902605057, "learning_rate": 1.911155188484723e-05, "loss": 0.6641083359718323, "step": 2618 }, { "epoch": 0.4839302005982146, "grad_norm": 0.07218817621469498, "learning_rate": 1.911072967201155e-05, "loss": 0.6021130084991455, "step": 2619 }, { "epoch": 0.48411497730711045, "grad_norm": 0.08438766002655029, "learning_rate": 1.9109907096597332e-05, "loss": 0.6461876034736633, "step": 2620 }, { "epoch": 0.4842997540160063, "grad_norm": 0.077565997838974, "learning_rate": 1.9109084158637314e-05, "loss": 0.6658491492271423, "step": 2621 }, { "epoch": 0.4844845307249021, "grad_norm": 0.07408081740140915, "learning_rate": 1.9108260858164243e-05, "loss": 0.8159285187721252, "step": 2622 }, { "epoch": 0.484669307433798, "grad_norm": 0.09126505255699158, "learning_rate": 1.9107437195210886e-05, "loss": 0.8331578373908997, "step": 2623 }, { "epoch": 0.4848540841426938, "grad_norm": 0.10413938015699387, "learning_rate": 1.9106613169810024e-05, "loss": 0.8409904837608337, "step": 2624 }, { "epoch": 0.48503886085158965, "grad_norm": 0.07460866123437881, "learning_rate": 1.910578878199445e-05, "loss": 0.6452231407165527, "step": 2625 }, { "epoch": 0.4852236375604855, "grad_norm": 0.0811716690659523, "learning_rate": 1.9104964031796965e-05, "loss": 0.7975817918777466, "step": 2626 }, { "epoch": 0.48540841426938136, "grad_norm": 0.07679463922977448, "learning_rate": 1.9104138919250403e-05, "loss": 0.6129171848297119, "step": 2627 }, { "epoch": 0.4855931909782772, "grad_norm": 0.06981959193944931, "learning_rate": 1.9103313444387595e-05, "loss": 0.7612093687057495, "step": 2628 }, { "epoch": 0.485777967687173, "grad_norm": 0.07798459380865097, "learning_rate": 1.9102487607241393e-05, "loss": 0.6142989993095398, "step": 2629 }, { "epoch": 0.4859627443960689, "grad_norm": 0.08926472812891006, "learning_rate": 1.9101661407844657e-05, "loss": 0.8828129172325134, "step": 2630 }, { "epoch": 0.48614752110496473, "grad_norm": 0.08611288666725159, "learning_rate": 1.910083484623028e-05, "loss": 0.6943038702011108, "step": 2631 }, { "epoch": 0.48633229781386056, "grad_norm": 0.06649988144636154, "learning_rate": 1.910000792243114e-05, "loss": 0.42508068680763245, "step": 2632 }, { "epoch": 0.4865170745227564, "grad_norm": 0.07903112471103668, "learning_rate": 1.909918063648016e-05, "loss": 0.9131513237953186, "step": 2633 }, { "epoch": 0.48670185123165227, "grad_norm": 0.07621757686138153, "learning_rate": 1.909835298841026e-05, "loss": 0.5481411218643188, "step": 2634 }, { "epoch": 0.4868866279405481, "grad_norm": 0.06687542051076889, "learning_rate": 1.9097524978254377e-05, "loss": 0.5872876048088074, "step": 2635 }, { "epoch": 0.48707140464944393, "grad_norm": 0.06902679055929184, "learning_rate": 1.909669660604546e-05, "loss": 0.6621937155723572, "step": 2636 }, { "epoch": 0.48725618135833976, "grad_norm": 0.06544358283281326, "learning_rate": 1.9095867871816475e-05, "loss": 0.4884672462940216, "step": 2637 }, { "epoch": 0.48744095806723564, "grad_norm": 0.07585190236568451, "learning_rate": 1.909503877560041e-05, "loss": 0.7296018004417419, "step": 2638 }, { "epoch": 0.48762573477613147, "grad_norm": 0.07426097989082336, "learning_rate": 1.9094209317430255e-05, "loss": 0.567711353302002, "step": 2639 }, { "epoch": 0.4878105114850273, "grad_norm": 0.06902457028627396, "learning_rate": 1.909337949733902e-05, "loss": 0.37785089015960693, "step": 2640 }, { "epoch": 0.4879952881939232, "grad_norm": 0.09323791414499283, "learning_rate": 1.9092549315359732e-05, "loss": 0.8677100539207458, "step": 2641 }, { "epoch": 0.488180064902819, "grad_norm": 0.07936318963766098, "learning_rate": 1.909171877152543e-05, "loss": 0.5746418237686157, "step": 2642 }, { "epoch": 0.48836484161171484, "grad_norm": 0.0856119766831398, "learning_rate": 1.9090887865869162e-05, "loss": 0.6803861260414124, "step": 2643 }, { "epoch": 0.48854961832061067, "grad_norm": 0.07777687907218933, "learning_rate": 1.9090056598424002e-05, "loss": 0.8061811923980713, "step": 2644 }, { "epoch": 0.48873439502950655, "grad_norm": 0.07252785563468933, "learning_rate": 1.9089224969223022e-05, "loss": 0.5486541390419006, "step": 2645 }, { "epoch": 0.4889191717384024, "grad_norm": 0.07425657659769058, "learning_rate": 1.9088392978299334e-05, "loss": 0.6537051200866699, "step": 2646 }, { "epoch": 0.4891039484472982, "grad_norm": 0.06550854444503784, "learning_rate": 1.9087560625686034e-05, "loss": 0.591060996055603, "step": 2647 }, { "epoch": 0.48928872515619404, "grad_norm": 0.06094660237431526, "learning_rate": 1.908672791141625e-05, "loss": 0.5385389924049377, "step": 2648 }, { "epoch": 0.4894735018650899, "grad_norm": 0.06148020550608635, "learning_rate": 1.9085894835523128e-05, "loss": 0.5736526846885681, "step": 2649 }, { "epoch": 0.48965827857398575, "grad_norm": 0.0693485289812088, "learning_rate": 1.9085061398039814e-05, "loss": 0.47210174798965454, "step": 2650 }, { "epoch": 0.4898430552828816, "grad_norm": 0.07060623168945312, "learning_rate": 1.9084227598999484e-05, "loss": 0.6329917907714844, "step": 2651 }, { "epoch": 0.49002783199177746, "grad_norm": 0.07458177953958511, "learning_rate": 1.9083393438435318e-05, "loss": 0.6945242881774902, "step": 2652 }, { "epoch": 0.4902126087006733, "grad_norm": 0.07435189187526703, "learning_rate": 1.9082558916380508e-05, "loss": 0.678519606590271, "step": 2653 }, { "epoch": 0.4903973854095691, "grad_norm": 0.07789159566164017, "learning_rate": 1.9081724032868266e-05, "loss": 0.716600775718689, "step": 2654 }, { "epoch": 0.49058216211846495, "grad_norm": 0.08185722678899765, "learning_rate": 1.9080888787931826e-05, "loss": 0.64130699634552, "step": 2655 }, { "epoch": 0.49076693882736083, "grad_norm": 0.09033802151679993, "learning_rate": 1.9080053181604418e-05, "loss": 0.8799751400947571, "step": 2656 }, { "epoch": 0.49095171553625666, "grad_norm": 0.07720265537500381, "learning_rate": 1.9079217213919304e-05, "loss": 0.6216465830802917, "step": 2657 }, { "epoch": 0.4911364922451525, "grad_norm": 0.07524937391281128, "learning_rate": 1.9078380884909752e-05, "loss": 0.7585391402244568, "step": 2658 }, { "epoch": 0.4913212689540483, "grad_norm": 0.0857534259557724, "learning_rate": 1.907754419460904e-05, "loss": 0.8452739715576172, "step": 2659 }, { "epoch": 0.4915060456629442, "grad_norm": 0.08280126750469208, "learning_rate": 1.907670714305047e-05, "loss": 0.8687022924423218, "step": 2660 }, { "epoch": 0.49169082237184003, "grad_norm": 0.07255477458238602, "learning_rate": 1.9075869730267355e-05, "loss": 0.7363627552986145, "step": 2661 }, { "epoch": 0.49187559908073586, "grad_norm": 0.09360745549201965, "learning_rate": 1.9075031956293016e-05, "loss": 0.7569074034690857, "step": 2662 }, { "epoch": 0.49206037578963174, "grad_norm": 0.06543853878974915, "learning_rate": 1.90741938211608e-05, "loss": 0.5689033269882202, "step": 2663 }, { "epoch": 0.49224515249852757, "grad_norm": 0.06809094548225403, "learning_rate": 1.907335532490406e-05, "loss": 0.6460486650466919, "step": 2664 }, { "epoch": 0.4924299292074234, "grad_norm": 0.07789400964975357, "learning_rate": 1.907251646755616e-05, "loss": 0.7170886397361755, "step": 2665 }, { "epoch": 0.49261470591631923, "grad_norm": 0.10317979753017426, "learning_rate": 1.9071677249150492e-05, "loss": 1.0603386163711548, "step": 2666 }, { "epoch": 0.4927994826252151, "grad_norm": 0.0847686231136322, "learning_rate": 1.9070837669720452e-05, "loss": 0.7669317722320557, "step": 2667 }, { "epoch": 0.49298425933411094, "grad_norm": 0.08344128727912903, "learning_rate": 1.906999772929945e-05, "loss": 0.7641721367835999, "step": 2668 }, { "epoch": 0.49316903604300677, "grad_norm": 0.06451429426670074, "learning_rate": 1.9069157427920916e-05, "loss": 0.6100636720657349, "step": 2669 }, { "epoch": 0.4933538127519026, "grad_norm": 0.08458209782838821, "learning_rate": 1.9068316765618294e-05, "loss": 0.6070970892906189, "step": 2670 }, { "epoch": 0.4935385894607985, "grad_norm": 0.058811575174331665, "learning_rate": 1.906747574242503e-05, "loss": 0.44909724593162537, "step": 2671 }, { "epoch": 0.4937233661696943, "grad_norm": 0.07956483960151672, "learning_rate": 1.90666343583746e-05, "loss": 0.7305945158004761, "step": 2672 }, { "epoch": 0.49390814287859014, "grad_norm": 0.06249157711863518, "learning_rate": 1.906579261350049e-05, "loss": 0.474531888961792, "step": 2673 }, { "epoch": 0.494092919587486, "grad_norm": 0.058922164142131805, "learning_rate": 1.90649505078362e-05, "loss": 0.6985012888908386, "step": 2674 }, { "epoch": 0.49427769629638185, "grad_norm": 0.07136017829179764, "learning_rate": 1.9064108041415237e-05, "loss": 0.6611461639404297, "step": 2675 }, { "epoch": 0.4944624730052777, "grad_norm": 0.06852423399686813, "learning_rate": 1.906326521427113e-05, "loss": 0.6475507616996765, "step": 2676 }, { "epoch": 0.4946472497141735, "grad_norm": 0.09397808462381363, "learning_rate": 1.906242202643743e-05, "loss": 0.8768760561943054, "step": 2677 }, { "epoch": 0.4948320264230694, "grad_norm": 0.08374115079641342, "learning_rate": 1.906157847794768e-05, "loss": 0.6318570375442505, "step": 2678 }, { "epoch": 0.4950168031319652, "grad_norm": 0.056238822638988495, "learning_rate": 1.9060734568835457e-05, "loss": 0.5789932608604431, "step": 2679 }, { "epoch": 0.49520157984086105, "grad_norm": 0.06980939954519272, "learning_rate": 1.905989029913435e-05, "loss": 0.6135944724082947, "step": 2680 }, { "epoch": 0.4953863565497569, "grad_norm": 0.0664597600698471, "learning_rate": 1.9059045668877945e-05, "loss": 0.605728805065155, "step": 2681 }, { "epoch": 0.49557113325865276, "grad_norm": 0.07725506275892258, "learning_rate": 1.9058200678099873e-05, "loss": 0.7923427224159241, "step": 2682 }, { "epoch": 0.4957559099675486, "grad_norm": 0.07151152193546295, "learning_rate": 1.905735532683375e-05, "loss": 0.7274478673934937, "step": 2683 }, { "epoch": 0.4959406866764444, "grad_norm": 0.07864515483379364, "learning_rate": 1.9056509615113223e-05, "loss": 0.6789471507072449, "step": 2684 }, { "epoch": 0.4961254633853403, "grad_norm": 0.06356114894151688, "learning_rate": 1.9055663542971948e-05, "loss": 0.6436051726341248, "step": 2685 }, { "epoch": 0.49631024009423613, "grad_norm": 0.07219547033309937, "learning_rate": 1.905481711044359e-05, "loss": 0.6921743154525757, "step": 2686 }, { "epoch": 0.49649501680313196, "grad_norm": 0.08134686946868896, "learning_rate": 1.9053970317561846e-05, "loss": 0.6954108476638794, "step": 2687 }, { "epoch": 0.4966797935120278, "grad_norm": 0.09121778607368469, "learning_rate": 1.905312316436041e-05, "loss": 0.8187353610992432, "step": 2688 }, { "epoch": 0.4968645702209237, "grad_norm": 0.057139378041028976, "learning_rate": 1.9052275650872994e-05, "loss": 0.5716656446456909, "step": 2689 }, { "epoch": 0.4970493469298195, "grad_norm": 0.08653534203767776, "learning_rate": 1.9051427777133328e-05, "loss": 0.7700663208961487, "step": 2690 }, { "epoch": 0.49723412363871533, "grad_norm": 0.09053795039653778, "learning_rate": 1.905057954317515e-05, "loss": 0.8304776549339294, "step": 2691 }, { "epoch": 0.49741890034761116, "grad_norm": 0.061494387686252594, "learning_rate": 1.9049730949032228e-05, "loss": 0.4574540853500366, "step": 2692 }, { "epoch": 0.49760367705650704, "grad_norm": 0.07644534856081009, "learning_rate": 1.9048881994738323e-05, "loss": 0.685924768447876, "step": 2693 }, { "epoch": 0.49778845376540287, "grad_norm": 0.06368592381477356, "learning_rate": 1.904803268032723e-05, "loss": 0.5423883199691772, "step": 2694 }, { "epoch": 0.4979732304742987, "grad_norm": 0.07776868343353271, "learning_rate": 1.904718300583274e-05, "loss": 0.6837641000747681, "step": 2695 }, { "epoch": 0.4981580071831946, "grad_norm": 0.07274238020181656, "learning_rate": 1.9046332971288674e-05, "loss": 0.5656123757362366, "step": 2696 }, { "epoch": 0.4983427838920904, "grad_norm": 0.06574950367212296, "learning_rate": 1.9045482576728857e-05, "loss": 0.520660400390625, "step": 2697 }, { "epoch": 0.49852756060098624, "grad_norm": 0.08441847562789917, "learning_rate": 1.9044631822187132e-05, "loss": 0.7153335213661194, "step": 2698 }, { "epoch": 0.49871233730988207, "grad_norm": 0.07338439673185349, "learning_rate": 1.904378070769736e-05, "loss": 0.7561910152435303, "step": 2699 }, { "epoch": 0.49889711401877795, "grad_norm": 0.08297833055257797, "learning_rate": 1.9042929233293405e-05, "loss": 0.6804150938987732, "step": 2700 }, { "epoch": 0.4990818907276738, "grad_norm": 0.07729196548461914, "learning_rate": 1.9042077399009163e-05, "loss": 0.8838931322097778, "step": 2701 }, { "epoch": 0.4992666674365696, "grad_norm": 0.08958151936531067, "learning_rate": 1.904122520487853e-05, "loss": 0.9126286506652832, "step": 2702 }, { "epoch": 0.49945144414546544, "grad_norm": 0.07667583972215652, "learning_rate": 1.9040372650935416e-05, "loss": 0.5196372866630554, "step": 2703 }, { "epoch": 0.4996362208543613, "grad_norm": 0.06985201686620712, "learning_rate": 1.903951973721376e-05, "loss": 0.6988966464996338, "step": 2704 }, { "epoch": 0.49982099756325715, "grad_norm": 0.06755391508340836, "learning_rate": 1.9038666463747494e-05, "loss": 0.5743021965026855, "step": 2705 }, { "epoch": 0.500005774272153, "grad_norm": 0.06953644752502441, "learning_rate": 1.9037812830570583e-05, "loss": 0.6474657654762268, "step": 2706 }, { "epoch": 0.5001905509810488, "grad_norm": 0.07906162738800049, "learning_rate": 1.9036958837717e-05, "loss": 0.6331603527069092, "step": 2707 }, { "epoch": 0.5003753276899446, "grad_norm": 0.0696539580821991, "learning_rate": 1.9036104485220723e-05, "loss": 0.5354222059249878, "step": 2708 }, { "epoch": 0.5005601043988406, "grad_norm": 0.08513978868722916, "learning_rate": 1.903524977311576e-05, "loss": 0.7808301448822021, "step": 2709 }, { "epoch": 0.5007448811077364, "grad_norm": 0.08418749272823334, "learning_rate": 1.9034394701436124e-05, "loss": 0.905589759349823, "step": 2710 }, { "epoch": 0.5009296578166322, "grad_norm": 0.07378925383090973, "learning_rate": 1.9033539270215843e-05, "loss": 0.6261293292045593, "step": 2711 }, { "epoch": 0.5011144345255281, "grad_norm": 0.05783259868621826, "learning_rate": 1.903268347948896e-05, "loss": 0.5617566108703613, "step": 2712 }, { "epoch": 0.5012992112344239, "grad_norm": 0.07201316952705383, "learning_rate": 1.903182732928954e-05, "loss": 0.5230177044868469, "step": 2713 }, { "epoch": 0.5014839879433197, "grad_norm": 0.08267080038785934, "learning_rate": 1.9030970819651644e-05, "loss": 0.7685118913650513, "step": 2714 }, { "epoch": 0.5016687646522155, "grad_norm": 0.06884673237800598, "learning_rate": 1.9030113950609367e-05, "loss": 0.5821294784545898, "step": 2715 }, { "epoch": 0.5018535413611114, "grad_norm": 0.07333012670278549, "learning_rate": 1.9029256722196805e-05, "loss": 0.6633030772209167, "step": 2716 }, { "epoch": 0.5020383180700073, "grad_norm": 0.06039682775735855, "learning_rate": 1.9028399134448072e-05, "loss": 0.49971288442611694, "step": 2717 }, { "epoch": 0.5022230947789031, "grad_norm": 0.06957642734050751, "learning_rate": 1.9027541187397304e-05, "loss": 0.5987708568572998, "step": 2718 }, { "epoch": 0.502407871487799, "grad_norm": 0.07104673236608505, "learning_rate": 1.9026682881078636e-05, "loss": 0.6045911908149719, "step": 2719 }, { "epoch": 0.5025926481966948, "grad_norm": 0.0739755779504776, "learning_rate": 1.9025824215526235e-05, "loss": 0.6368657350540161, "step": 2720 }, { "epoch": 0.5027774249055906, "grad_norm": 0.07629484683275223, "learning_rate": 1.9024965190774262e-05, "loss": 0.9732115864753723, "step": 2721 }, { "epoch": 0.5029622016144865, "grad_norm": 0.06285340338945389, "learning_rate": 1.9024105806856918e-05, "loss": 0.5596101880073547, "step": 2722 }, { "epoch": 0.5031469783233823, "grad_norm": 0.08465234935283661, "learning_rate": 1.9023246063808388e-05, "loss": 0.714224100112915, "step": 2723 }, { "epoch": 0.5033317550322782, "grad_norm": 0.07528623193502426, "learning_rate": 1.90223859616629e-05, "loss": 0.7736612558364868, "step": 2724 }, { "epoch": 0.5035165317411741, "grad_norm": 0.08436401933431625, "learning_rate": 1.9021525500454678e-05, "loss": 0.6686825752258301, "step": 2725 }, { "epoch": 0.5037013084500699, "grad_norm": 0.09414120018482208, "learning_rate": 1.902066468021796e-05, "loss": 0.9490776658058167, "step": 2726 }, { "epoch": 0.5038860851589657, "grad_norm": 0.08042771369218826, "learning_rate": 1.9019803500987014e-05, "loss": 0.6027342677116394, "step": 2727 }, { "epoch": 0.5040708618678615, "grad_norm": 0.06140635535120964, "learning_rate": 1.901894196279611e-05, "loss": 0.5515220761299133, "step": 2728 }, { "epoch": 0.5042556385767574, "grad_norm": 0.07926372438669205, "learning_rate": 1.901808006567953e-05, "loss": 0.8234108090400696, "step": 2729 }, { "epoch": 0.5044404152856532, "grad_norm": 0.09257242828607559, "learning_rate": 1.9017217809671575e-05, "loss": 0.99014812707901, "step": 2730 }, { "epoch": 0.5046251919945491, "grad_norm": 0.06899430602788925, "learning_rate": 1.9016355194806566e-05, "loss": 0.6572807431221008, "step": 2731 }, { "epoch": 0.504809968703445, "grad_norm": 0.053285859525203705, "learning_rate": 1.901549222111883e-05, "loss": 0.5255534648895264, "step": 2732 }, { "epoch": 0.5049947454123408, "grad_norm": 0.08632458001375198, "learning_rate": 1.9014628888642705e-05, "loss": 0.6835488677024841, "step": 2733 }, { "epoch": 0.5051795221212366, "grad_norm": 0.08509247750043869, "learning_rate": 1.9013765197412553e-05, "loss": 0.7912634015083313, "step": 2734 }, { "epoch": 0.5053642988301325, "grad_norm": 0.06132848560810089, "learning_rate": 1.9012901147462752e-05, "loss": 0.3929689824581146, "step": 2735 }, { "epoch": 0.5055490755390283, "grad_norm": 0.08009739220142365, "learning_rate": 1.9012036738827682e-05, "loss": 0.7682035565376282, "step": 2736 }, { "epoch": 0.5057338522479241, "grad_norm": 0.06091945245862007, "learning_rate": 1.901117197154174e-05, "loss": 0.5390786528587341, "step": 2737 }, { "epoch": 0.5059186289568199, "grad_norm": 0.07729063183069229, "learning_rate": 1.901030684563935e-05, "loss": 0.7572644352912903, "step": 2738 }, { "epoch": 0.5061034056657159, "grad_norm": 0.08812917768955231, "learning_rate": 1.9009441361154937e-05, "loss": 0.6920105814933777, "step": 2739 }, { "epoch": 0.5062881823746117, "grad_norm": 0.06975310295820236, "learning_rate": 1.9008575518122943e-05, "loss": 0.517683207988739, "step": 2740 }, { "epoch": 0.5064729590835075, "grad_norm": 0.07183025032281876, "learning_rate": 1.900770931657783e-05, "loss": 0.7221886515617371, "step": 2741 }, { "epoch": 0.5066577357924034, "grad_norm": 0.07859528064727783, "learning_rate": 1.9006842756554067e-05, "loss": 0.6803582310676575, "step": 2742 }, { "epoch": 0.5068425125012992, "grad_norm": 0.0603187195956707, "learning_rate": 1.900597583808614e-05, "loss": 0.46500974893569946, "step": 2743 }, { "epoch": 0.507027289210195, "grad_norm": 0.08773189038038254, "learning_rate": 1.900510856120855e-05, "loss": 0.7085680365562439, "step": 2744 }, { "epoch": 0.5072120659190908, "grad_norm": 0.08600956946611404, "learning_rate": 1.9004240925955814e-05, "loss": 0.7808557152748108, "step": 2745 }, { "epoch": 0.5073968426279868, "grad_norm": 0.07820644229650497, "learning_rate": 1.9003372932362462e-05, "loss": 0.6414129137992859, "step": 2746 }, { "epoch": 0.5075816193368826, "grad_norm": 0.0755482167005539, "learning_rate": 1.900250458046303e-05, "loss": 0.5816751718521118, "step": 2747 }, { "epoch": 0.5077663960457784, "grad_norm": 0.07866478711366653, "learning_rate": 1.9001635870292086e-05, "loss": 0.6917564272880554, "step": 2748 }, { "epoch": 0.5079511727546743, "grad_norm": 0.08214591443538666, "learning_rate": 1.9000766801884194e-05, "loss": 0.6834210753440857, "step": 2749 }, { "epoch": 0.5081359494635701, "grad_norm": 0.06740007549524307, "learning_rate": 1.8999897375273942e-05, "loss": 0.6274173259735107, "step": 2750 }, { "epoch": 0.5083207261724659, "grad_norm": 0.07330995053052902, "learning_rate": 1.8999027590495934e-05, "loss": 0.6141194105148315, "step": 2751 }, { "epoch": 0.5085055028813618, "grad_norm": 0.07362136989831924, "learning_rate": 1.899815744758478e-05, "loss": 0.6480097770690918, "step": 2752 }, { "epoch": 0.5086902795902577, "grad_norm": 0.09079992771148682, "learning_rate": 1.8997286946575114e-05, "loss": 0.820427417755127, "step": 2753 }, { "epoch": 0.5088750562991535, "grad_norm": 0.06545901298522949, "learning_rate": 1.8996416087501573e-05, "loss": 0.478307843208313, "step": 2754 }, { "epoch": 0.5090598330080494, "grad_norm": 0.09047531336545944, "learning_rate": 1.899554487039882e-05, "loss": 0.8143502473831177, "step": 2755 }, { "epoch": 0.5092446097169452, "grad_norm": 0.08255776762962341, "learning_rate": 1.8994673295301526e-05, "loss": 0.5235763788223267, "step": 2756 }, { "epoch": 0.509429386425841, "grad_norm": 0.08837346732616425, "learning_rate": 1.8993801362244374e-05, "loss": 0.6911593079566956, "step": 2757 }, { "epoch": 0.5096141631347368, "grad_norm": 0.08509925752878189, "learning_rate": 1.8992929071262066e-05, "loss": 0.8627534508705139, "step": 2758 }, { "epoch": 0.5097989398436327, "grad_norm": 0.07599958032369614, "learning_rate": 1.8992056422389317e-05, "loss": 0.49385976791381836, "step": 2759 }, { "epoch": 0.5099837165525285, "grad_norm": 0.07793485373258591, "learning_rate": 1.8991183415660855e-05, "loss": 0.748390793800354, "step": 2760 }, { "epoch": 0.5101684932614244, "grad_norm": 0.09587264060974121, "learning_rate": 1.899031005111142e-05, "loss": 0.9381311535835266, "step": 2761 }, { "epoch": 0.5103532699703203, "grad_norm": 0.09079372882843018, "learning_rate": 1.898943632877577e-05, "loss": 0.6771690249443054, "step": 2762 }, { "epoch": 0.5105380466792161, "grad_norm": 0.07188550382852554, "learning_rate": 1.8988562248688686e-05, "loss": 0.6365196704864502, "step": 2763 }, { "epoch": 0.5107228233881119, "grad_norm": 0.08427656441926956, "learning_rate": 1.8987687810884944e-05, "loss": 0.7004556655883789, "step": 2764 }, { "epoch": 0.5109076000970078, "grad_norm": 0.06205383315682411, "learning_rate": 1.8986813015399345e-05, "loss": 0.6061794757843018, "step": 2765 }, { "epoch": 0.5110923768059036, "grad_norm": 0.08489475399255753, "learning_rate": 1.89859378622667e-05, "loss": 0.9061506986618042, "step": 2766 }, { "epoch": 0.5112771535147994, "grad_norm": 0.06409691274166107, "learning_rate": 1.898506235152185e-05, "loss": 0.6327704787254333, "step": 2767 }, { "epoch": 0.5114619302236953, "grad_norm": 0.06629861146211624, "learning_rate": 1.898418648319962e-05, "loss": 0.5824447274208069, "step": 2768 }, { "epoch": 0.5116467069325912, "grad_norm": 0.0706552192568779, "learning_rate": 1.8983310257334883e-05, "loss": 0.7207609415054321, "step": 2769 }, { "epoch": 0.511831483641487, "grad_norm": 0.07945195585489273, "learning_rate": 1.8982433673962496e-05, "loss": 0.6402596831321716, "step": 2770 }, { "epoch": 0.5120162603503828, "grad_norm": 0.06763187795877457, "learning_rate": 1.8981556733117357e-05, "loss": 0.5913063287734985, "step": 2771 }, { "epoch": 0.5122010370592787, "grad_norm": 0.06970265507698059, "learning_rate": 1.8980679434834357e-05, "loss": 0.5915688276290894, "step": 2772 }, { "epoch": 0.5123858137681745, "grad_norm": 0.06420344859361649, "learning_rate": 1.8979801779148413e-05, "loss": 0.5847893357276917, "step": 2773 }, { "epoch": 0.5125705904770703, "grad_norm": 0.07548118382692337, "learning_rate": 1.897892376609445e-05, "loss": 0.8099266886711121, "step": 2774 }, { "epoch": 0.5127553671859663, "grad_norm": 0.06970521062612534, "learning_rate": 1.897804539570742e-05, "loss": 0.5355647206306458, "step": 2775 }, { "epoch": 0.5129401438948621, "grad_norm": 0.06311215460300446, "learning_rate": 1.8977166668022263e-05, "loss": 0.6434404850006104, "step": 2776 }, { "epoch": 0.5131249206037579, "grad_norm": 0.06808818131685257, "learning_rate": 1.8976287583073965e-05, "loss": 0.6484166383743286, "step": 2777 }, { "epoch": 0.5133096973126537, "grad_norm": 0.08092934638261795, "learning_rate": 1.8975408140897503e-05, "loss": 0.8626954555511475, "step": 2778 }, { "epoch": 0.5134944740215496, "grad_norm": 0.07606197148561478, "learning_rate": 1.8974528341527875e-05, "loss": 0.5682605504989624, "step": 2779 }, { "epoch": 0.5136792507304454, "grad_norm": 0.07515977323055267, "learning_rate": 1.89736481850001e-05, "loss": 0.6957910060882568, "step": 2780 }, { "epoch": 0.5138640274393412, "grad_norm": 0.07134959846735, "learning_rate": 1.89727676713492e-05, "loss": 0.5610045194625854, "step": 2781 }, { "epoch": 0.5140488041482371, "grad_norm": 0.07571757584810257, "learning_rate": 1.8971886800610218e-05, "loss": 0.6205386519432068, "step": 2782 }, { "epoch": 0.514233580857133, "grad_norm": 0.0926576629281044, "learning_rate": 1.8971005572818213e-05, "loss": 0.865348756313324, "step": 2783 }, { "epoch": 0.5144183575660288, "grad_norm": 0.06826504319906235, "learning_rate": 1.8970123988008252e-05, "loss": 0.6385295391082764, "step": 2784 }, { "epoch": 0.5146031342749247, "grad_norm": 0.0901683047413826, "learning_rate": 1.8969242046215418e-05, "loss": 0.9333497881889343, "step": 2785 }, { "epoch": 0.5147879109838205, "grad_norm": 0.08303900808095932, "learning_rate": 1.8968359747474813e-05, "loss": 0.6660789847373962, "step": 2786 }, { "epoch": 0.5149726876927163, "grad_norm": 0.06373574584722519, "learning_rate": 1.896747709182155e-05, "loss": 0.5466078519821167, "step": 2787 }, { "epoch": 0.5151574644016121, "grad_norm": 0.07366237789392471, "learning_rate": 1.8966594079290757e-05, "loss": 0.6122941970825195, "step": 2788 }, { "epoch": 0.515342241110508, "grad_norm": 0.07785239070653915, "learning_rate": 1.896571070991757e-05, "loss": 0.729058563709259, "step": 2789 }, { "epoch": 0.5155270178194039, "grad_norm": 0.05069505050778389, "learning_rate": 1.8964826983737143e-05, "loss": 0.44589418172836304, "step": 2790 }, { "epoch": 0.5157117945282997, "grad_norm": 0.07521497458219528, "learning_rate": 1.8963942900784653e-05, "loss": 0.6502828001976013, "step": 2791 }, { "epoch": 0.5158965712371956, "grad_norm": 0.061132580041885376, "learning_rate": 1.896305846109528e-05, "loss": 0.46217507123947144, "step": 2792 }, { "epoch": 0.5160813479460914, "grad_norm": 0.08042273670434952, "learning_rate": 1.8962173664704222e-05, "loss": 0.7091048955917358, "step": 2793 }, { "epoch": 0.5162661246549872, "grad_norm": 0.07649550586938858, "learning_rate": 1.896128851164669e-05, "loss": 0.7375391125679016, "step": 2794 }, { "epoch": 0.516450901363883, "grad_norm": 0.0892588421702385, "learning_rate": 1.8960403001957914e-05, "loss": 0.7438496351242065, "step": 2795 }, { "epoch": 0.5166356780727789, "grad_norm": 0.06633324921131134, "learning_rate": 1.8959517135673126e-05, "loss": 0.5604415535926819, "step": 2796 }, { "epoch": 0.5168204547816748, "grad_norm": 0.07909633964300156, "learning_rate": 1.895863091282759e-05, "loss": 0.6037436723709106, "step": 2797 }, { "epoch": 0.5170052314905706, "grad_norm": 0.09118392318487167, "learning_rate": 1.8957744333456577e-05, "loss": 0.7270393371582031, "step": 2798 }, { "epoch": 0.5171900081994665, "grad_norm": 0.07374778389930725, "learning_rate": 1.895685739759536e-05, "loss": 0.5679983496665955, "step": 2799 }, { "epoch": 0.5173747849083623, "grad_norm": 0.07633034884929657, "learning_rate": 1.895597010527924e-05, "loss": 0.6480544805526733, "step": 2800 }, { "epoch": 0.5175595616172581, "grad_norm": 0.06983176618814468, "learning_rate": 1.895508245654353e-05, "loss": 0.6487261652946472, "step": 2801 }, { "epoch": 0.517744338326154, "grad_norm": 0.07402893155813217, "learning_rate": 1.8954194451423555e-05, "loss": 0.8582574725151062, "step": 2802 }, { "epoch": 0.5179291150350498, "grad_norm": 0.0776456966996193, "learning_rate": 1.895330608995466e-05, "loss": 0.6201335787773132, "step": 2803 }, { "epoch": 0.5181138917439456, "grad_norm": 0.0688788965344429, "learning_rate": 1.8952417372172187e-05, "loss": 0.6365094780921936, "step": 2804 }, { "epoch": 0.5182986684528416, "grad_norm": 0.06985542923212051, "learning_rate": 1.8951528298111514e-05, "loss": 0.5172978043556213, "step": 2805 }, { "epoch": 0.5184834451617374, "grad_norm": 0.07759788632392883, "learning_rate": 1.895063886780802e-05, "loss": 0.6132344603538513, "step": 2806 }, { "epoch": 0.5186682218706332, "grad_norm": 0.08312520384788513, "learning_rate": 1.89497490812971e-05, "loss": 0.7646285891532898, "step": 2807 }, { "epoch": 0.518852998579529, "grad_norm": 0.08181486278772354, "learning_rate": 1.8948858938614172e-05, "loss": 0.7406775951385498, "step": 2808 }, { "epoch": 0.5190377752884249, "grad_norm": 0.0924254059791565, "learning_rate": 1.8947968439794653e-05, "loss": 0.6629146337509155, "step": 2809 }, { "epoch": 0.5192225519973207, "grad_norm": 0.08145590871572495, "learning_rate": 1.8947077584873984e-05, "loss": 0.7635599374771118, "step": 2810 }, { "epoch": 0.5194073287062165, "grad_norm": 0.07248992472887039, "learning_rate": 1.8946186373887617e-05, "loss": 0.620116114616394, "step": 2811 }, { "epoch": 0.5195921054151125, "grad_norm": 0.06582232564687729, "learning_rate": 1.8945294806871026e-05, "loss": 0.5889583826065063, "step": 2812 }, { "epoch": 0.5197768821240083, "grad_norm": 0.08726590871810913, "learning_rate": 1.8944402883859687e-05, "loss": 0.7653307914733887, "step": 2813 }, { "epoch": 0.5199616588329041, "grad_norm": 0.053412389010190964, "learning_rate": 1.8943510604889094e-05, "loss": 0.4532836973667145, "step": 2814 }, { "epoch": 0.5201464355418, "grad_norm": 0.09321916848421097, "learning_rate": 1.8942617969994762e-05, "loss": 0.752572238445282, "step": 2815 }, { "epoch": 0.5203312122506958, "grad_norm": 0.06985002756118774, "learning_rate": 1.894172497921221e-05, "loss": 0.45620113611221313, "step": 2816 }, { "epoch": 0.5205159889595916, "grad_norm": 0.08312235027551651, "learning_rate": 1.894083163257698e-05, "loss": 0.7842530608177185, "step": 2817 }, { "epoch": 0.5207007656684874, "grad_norm": 0.06230049580335617, "learning_rate": 1.8939937930124622e-05, "loss": 0.49488046765327454, "step": 2818 }, { "epoch": 0.5208855423773834, "grad_norm": 0.07006420195102692, "learning_rate": 1.893904387189071e-05, "loss": 0.5373267531394958, "step": 2819 }, { "epoch": 0.5210703190862792, "grad_norm": 0.08325011283159256, "learning_rate": 1.893814945791081e-05, "loss": 0.8509864807128906, "step": 2820 }, { "epoch": 0.521255095795175, "grad_norm": 0.06438539177179337, "learning_rate": 1.893725468822053e-05, "loss": 0.701816737651825, "step": 2821 }, { "epoch": 0.5214398725040709, "grad_norm": 0.09208257496356964, "learning_rate": 1.8936359562855475e-05, "loss": 0.774588406085968, "step": 2822 }, { "epoch": 0.5216246492129667, "grad_norm": 0.07522048056125641, "learning_rate": 1.8935464081851267e-05, "loss": 0.674850583076477, "step": 2823 }, { "epoch": 0.5218094259218625, "grad_norm": 0.07833349704742432, "learning_rate": 1.8934568245243542e-05, "loss": 0.6751000285148621, "step": 2824 }, { "epoch": 0.5219942026307584, "grad_norm": 0.08105552941560745, "learning_rate": 1.8933672053067957e-05, "loss": 0.6712080836296082, "step": 2825 }, { "epoch": 0.5221789793396542, "grad_norm": 0.07644625008106232, "learning_rate": 1.8932775505360173e-05, "loss": 0.5222357511520386, "step": 2826 }, { "epoch": 0.5223637560485501, "grad_norm": 0.05553733929991722, "learning_rate": 1.8931878602155872e-05, "loss": 0.5329239368438721, "step": 2827 }, { "epoch": 0.522548532757446, "grad_norm": 0.05887029320001602, "learning_rate": 1.8930981343490742e-05, "loss": 0.5770387649536133, "step": 2828 }, { "epoch": 0.5227333094663418, "grad_norm": 0.07055466622114182, "learning_rate": 1.8930083729400502e-05, "loss": 0.6148492693901062, "step": 2829 }, { "epoch": 0.5229180861752376, "grad_norm": 0.08709493279457092, "learning_rate": 1.8929185759920864e-05, "loss": 0.8767763376235962, "step": 2830 }, { "epoch": 0.5231028628841334, "grad_norm": 0.0684783011674881, "learning_rate": 1.8928287435087568e-05, "loss": 0.5474317073822021, "step": 2831 }, { "epoch": 0.5232876395930293, "grad_norm": 0.07589972019195557, "learning_rate": 1.8927388754936368e-05, "loss": 0.710189938545227, "step": 2832 }, { "epoch": 0.5234724163019251, "grad_norm": 0.08322039991617203, "learning_rate": 1.8926489719503025e-05, "loss": 0.7560164928436279, "step": 2833 }, { "epoch": 0.523657193010821, "grad_norm": 0.08088655769824982, "learning_rate": 1.892559032882332e-05, "loss": 0.7506522536277771, "step": 2834 }, { "epoch": 0.5238419697197169, "grad_norm": 0.083648681640625, "learning_rate": 1.8924690582933043e-05, "loss": 0.8052482008934021, "step": 2835 }, { "epoch": 0.5240267464286127, "grad_norm": 0.05375261977314949, "learning_rate": 1.8923790481868e-05, "loss": 0.5737895369529724, "step": 2836 }, { "epoch": 0.5242115231375085, "grad_norm": 0.07327893376350403, "learning_rate": 1.8922890025664018e-05, "loss": 0.6785560846328735, "step": 2837 }, { "epoch": 0.5243962998464043, "grad_norm": 0.07243496924638748, "learning_rate": 1.892198921435693e-05, "loss": 0.6918960809707642, "step": 2838 }, { "epoch": 0.5245810765553002, "grad_norm": 0.07730116695165634, "learning_rate": 1.8921088047982585e-05, "loss": 0.6704845428466797, "step": 2839 }, { "epoch": 0.524765853264196, "grad_norm": 0.06954976171255112, "learning_rate": 1.8920186526576843e-05, "loss": 0.6522843241691589, "step": 2840 }, { "epoch": 0.5249506299730919, "grad_norm": 0.09051687270402908, "learning_rate": 1.8919284650175585e-05, "loss": 0.8548020720481873, "step": 2841 }, { "epoch": 0.5251354066819878, "grad_norm": 0.055443525314331055, "learning_rate": 1.8918382418814705e-05, "loss": 0.5138656497001648, "step": 2842 }, { "epoch": 0.5253201833908836, "grad_norm": 0.08884968608617783, "learning_rate": 1.8917479832530102e-05, "loss": 0.8936039805412292, "step": 2843 }, { "epoch": 0.5255049600997794, "grad_norm": 0.066892109811306, "learning_rate": 1.8916576891357706e-05, "loss": 0.6073099374771118, "step": 2844 }, { "epoch": 0.5256897368086753, "grad_norm": 0.06852417439222336, "learning_rate": 1.8915673595333443e-05, "loss": 0.6002171039581299, "step": 2845 }, { "epoch": 0.5258745135175711, "grad_norm": 0.06942515820264816, "learning_rate": 1.891476994449327e-05, "loss": 0.6562768220901489, "step": 2846 }, { "epoch": 0.5260592902264669, "grad_norm": 0.08007383346557617, "learning_rate": 1.8913865938873138e-05, "loss": 0.63433438539505, "step": 2847 }, { "epoch": 0.5262440669353627, "grad_norm": 0.07730159908533096, "learning_rate": 1.8912961578509032e-05, "loss": 0.6335527896881104, "step": 2848 }, { "epoch": 0.5264288436442587, "grad_norm": 0.09433772414922714, "learning_rate": 1.891205686343694e-05, "loss": 1.0060772895812988, "step": 2849 }, { "epoch": 0.5266136203531545, "grad_norm": 0.07724204659461975, "learning_rate": 1.891115179369287e-05, "loss": 0.6215003728866577, "step": 2850 }, { "epoch": 0.5267983970620503, "grad_norm": 0.07483768463134766, "learning_rate": 1.8910246369312833e-05, "loss": 0.6356710195541382, "step": 2851 }, { "epoch": 0.5269831737709462, "grad_norm": 0.05760635435581207, "learning_rate": 1.8909340590332868e-05, "loss": 0.42942285537719727, "step": 2852 }, { "epoch": 0.527167950479842, "grad_norm": 0.0790332779288292, "learning_rate": 1.8908434456789022e-05, "loss": 0.5897369980812073, "step": 2853 }, { "epoch": 0.5273527271887378, "grad_norm": 0.06736014038324356, "learning_rate": 1.8907527968717357e-05, "loss": 0.5854964852333069, "step": 2854 }, { "epoch": 0.5275375038976337, "grad_norm": 0.07692205905914307, "learning_rate": 1.8906621126153947e-05, "loss": 0.7592504024505615, "step": 2855 }, { "epoch": 0.5277222806065296, "grad_norm": 0.08579183369874954, "learning_rate": 1.8905713929134878e-05, "loss": 0.6285127401351929, "step": 2856 }, { "epoch": 0.5279070573154254, "grad_norm": 0.062469176948070526, "learning_rate": 1.890480637769626e-05, "loss": 0.5253337025642395, "step": 2857 }, { "epoch": 0.5280918340243212, "grad_norm": 0.05412564054131508, "learning_rate": 1.8903898471874206e-05, "loss": 0.4102505147457123, "step": 2858 }, { "epoch": 0.5282766107332171, "grad_norm": 0.06502963602542877, "learning_rate": 1.890299021170485e-05, "loss": 0.5365229845046997, "step": 2859 }, { "epoch": 0.5284613874421129, "grad_norm": 0.0952506884932518, "learning_rate": 1.8902081597224338e-05, "loss": 0.8815677165985107, "step": 2860 }, { "epoch": 0.5286461641510087, "grad_norm": 0.06655575335025787, "learning_rate": 1.8901172628468833e-05, "loss": 0.569294273853302, "step": 2861 }, { "epoch": 0.5288309408599046, "grad_norm": 0.06547219306230545, "learning_rate": 1.89002633054745e-05, "loss": 0.477306604385376, "step": 2862 }, { "epoch": 0.5290157175688005, "grad_norm": 0.06068910285830498, "learning_rate": 1.8899353628277536e-05, "loss": 0.6271467208862305, "step": 2863 }, { "epoch": 0.5292004942776963, "grad_norm": 0.07076477259397507, "learning_rate": 1.8898443596914136e-05, "loss": 0.605965793132782, "step": 2864 }, { "epoch": 0.5293852709865922, "grad_norm": 0.07341812551021576, "learning_rate": 1.8897533211420525e-05, "loss": 0.9426294565200806, "step": 2865 }, { "epoch": 0.529570047695488, "grad_norm": 0.07296188175678253, "learning_rate": 1.8896622471832925e-05, "loss": 0.612720787525177, "step": 2866 }, { "epoch": 0.5297548244043838, "grad_norm": 0.07263094931840897, "learning_rate": 1.889571137818759e-05, "loss": 0.5333542823791504, "step": 2867 }, { "epoch": 0.5299396011132796, "grad_norm": 0.08414128422737122, "learning_rate": 1.8894799930520768e-05, "loss": 0.7564873695373535, "step": 2868 }, { "epoch": 0.5301243778221755, "grad_norm": 0.12541791796684265, "learning_rate": 1.889388812886874e-05, "loss": 0.8672096729278564, "step": 2869 }, { "epoch": 0.5303091545310714, "grad_norm": 0.07895677536725998, "learning_rate": 1.8892975973267787e-05, "loss": 0.6737049221992493, "step": 2870 }, { "epoch": 0.5304939312399672, "grad_norm": 0.06124429777264595, "learning_rate": 1.8892063463754215e-05, "loss": 0.5795494318008423, "step": 2871 }, { "epoch": 0.5306787079488631, "grad_norm": 0.07113495469093323, "learning_rate": 1.8891150600364342e-05, "loss": 0.6831026077270508, "step": 2872 }, { "epoch": 0.5308634846577589, "grad_norm": 0.08000877499580383, "learning_rate": 1.8890237383134485e-05, "loss": 0.7559480667114258, "step": 2873 }, { "epoch": 0.5310482613666547, "grad_norm": 0.08119085431098938, "learning_rate": 1.8889323812100995e-05, "loss": 0.8790738582611084, "step": 2874 }, { "epoch": 0.5312330380755506, "grad_norm": 0.06821675598621368, "learning_rate": 1.888840988730023e-05, "loss": 0.5029259920120239, "step": 2875 }, { "epoch": 0.5314178147844464, "grad_norm": 0.06950125098228455, "learning_rate": 1.8887495608768557e-05, "loss": 0.5709627866744995, "step": 2876 }, { "epoch": 0.5316025914933422, "grad_norm": 0.05820643901824951, "learning_rate": 1.888658097654237e-05, "loss": 0.4662620723247528, "step": 2877 }, { "epoch": 0.5317873682022382, "grad_norm": 0.09406157582998276, "learning_rate": 1.8885665990658055e-05, "loss": 0.7106308341026306, "step": 2878 }, { "epoch": 0.531972144911134, "grad_norm": 0.06230054423213005, "learning_rate": 1.8884750651152037e-05, "loss": 0.5774460434913635, "step": 2879 }, { "epoch": 0.5321569216200298, "grad_norm": 0.0671701729297638, "learning_rate": 1.8883834958060742e-05, "loss": 0.5192086100578308, "step": 2880 }, { "epoch": 0.5323416983289256, "grad_norm": 0.0866723582148552, "learning_rate": 1.888291891142061e-05, "loss": 0.6210560202598572, "step": 2881 }, { "epoch": 0.5325264750378215, "grad_norm": 0.06697280704975128, "learning_rate": 1.8882002511268093e-05, "loss": 0.667514443397522, "step": 2882 }, { "epoch": 0.5327112517467173, "grad_norm": 0.047818925231695175, "learning_rate": 1.8881085757639662e-05, "loss": 0.4015243351459503, "step": 2883 }, { "epoch": 0.5328960284556131, "grad_norm": 0.06965786963701248, "learning_rate": 1.8880168650571805e-05, "loss": 0.6664662957191467, "step": 2884 }, { "epoch": 0.5330808051645091, "grad_norm": 0.07071227580308914, "learning_rate": 1.8879251190101024e-05, "loss": 0.6189071536064148, "step": 2885 }, { "epoch": 0.5332655818734049, "grad_norm": 0.07223183661699295, "learning_rate": 1.8878333376263818e-05, "loss": 0.6490969657897949, "step": 2886 }, { "epoch": 0.5334503585823007, "grad_norm": 0.06491070985794067, "learning_rate": 1.8877415209096725e-05, "loss": 0.7136419415473938, "step": 2887 }, { "epoch": 0.5336351352911965, "grad_norm": 0.06153097003698349, "learning_rate": 1.887649668863628e-05, "loss": 0.7171862125396729, "step": 2888 }, { "epoch": 0.5338199120000924, "grad_norm": 0.08631009608507156, "learning_rate": 1.8875577814919035e-05, "loss": 0.7024433612823486, "step": 2889 }, { "epoch": 0.5340046887089882, "grad_norm": 0.06136897951364517, "learning_rate": 1.8874658587981563e-05, "loss": 0.542658805847168, "step": 2890 }, { "epoch": 0.534189465417884, "grad_norm": 0.059265486896038055, "learning_rate": 1.8873739007860444e-05, "loss": 0.42815086245536804, "step": 2891 }, { "epoch": 0.53437424212678, "grad_norm": 0.06907325237989426, "learning_rate": 1.8872819074592275e-05, "loss": 0.5861951112747192, "step": 2892 }, { "epoch": 0.5345590188356758, "grad_norm": 0.08019591122865677, "learning_rate": 1.8871898788213667e-05, "loss": 0.7540650963783264, "step": 2893 }, { "epoch": 0.5347437955445716, "grad_norm": 0.06720145046710968, "learning_rate": 1.8870978148761245e-05, "loss": 0.6117087602615356, "step": 2894 }, { "epoch": 0.5349285722534675, "grad_norm": 0.06779257208108902, "learning_rate": 1.8870057156271643e-05, "loss": 0.504643976688385, "step": 2895 }, { "epoch": 0.5351133489623633, "grad_norm": 0.06835329532623291, "learning_rate": 1.8869135810781517e-05, "loss": 0.48108264803886414, "step": 2896 }, { "epoch": 0.5352981256712591, "grad_norm": 0.08525102585554123, "learning_rate": 1.8868214112327538e-05, "loss": 0.7447647452354431, "step": 2897 }, { "epoch": 0.535482902380155, "grad_norm": 0.06674760580062866, "learning_rate": 1.8867292060946378e-05, "loss": 0.499419629573822, "step": 2898 }, { "epoch": 0.5356676790890508, "grad_norm": 0.055783241987228394, "learning_rate": 1.886636965667474e-05, "loss": 0.49741414189338684, "step": 2899 }, { "epoch": 0.5358524557979467, "grad_norm": 0.08357007801532745, "learning_rate": 1.8865446899549322e-05, "loss": 0.7474427223205566, "step": 2900 }, { "epoch": 0.5360372325068425, "grad_norm": 0.10168084502220154, "learning_rate": 1.886452378960686e-05, "loss": 0.876984715461731, "step": 2901 }, { "epoch": 0.5362220092157384, "grad_norm": 0.07848334312438965, "learning_rate": 1.8863600326884085e-05, "loss": 0.7350624203681946, "step": 2902 }, { "epoch": 0.5364067859246342, "grad_norm": 0.06975662708282471, "learning_rate": 1.8862676511417747e-05, "loss": 0.6788052320480347, "step": 2903 }, { "epoch": 0.53659156263353, "grad_norm": 0.09710551053285599, "learning_rate": 1.886175234324461e-05, "loss": 0.826421320438385, "step": 2904 }, { "epoch": 0.5367763393424259, "grad_norm": 0.08636506646871567, "learning_rate": 1.8860827822401454e-05, "loss": 0.8522170186042786, "step": 2905 }, { "epoch": 0.5369611160513217, "grad_norm": 0.061069127172231674, "learning_rate": 1.8859902948925076e-05, "loss": 0.6208251714706421, "step": 2906 }, { "epoch": 0.5371458927602176, "grad_norm": 0.07857941836118698, "learning_rate": 1.8858977722852273e-05, "loss": 0.6387959718704224, "step": 2907 }, { "epoch": 0.5373306694691135, "grad_norm": 0.07378064841032028, "learning_rate": 1.885805214421988e-05, "loss": 0.7660172581672668, "step": 2908 }, { "epoch": 0.5375154461780093, "grad_norm": 0.050828199833631516, "learning_rate": 1.885712621306472e-05, "loss": 0.3771149814128876, "step": 2909 }, { "epoch": 0.5377002228869051, "grad_norm": 0.07281278073787689, "learning_rate": 1.885619992942365e-05, "loss": 0.5961358547210693, "step": 2910 }, { "epoch": 0.5378849995958009, "grad_norm": 0.08137860894203186, "learning_rate": 1.8855273293333532e-05, "loss": 0.7484938502311707, "step": 2911 }, { "epoch": 0.5380697763046968, "grad_norm": 0.0633959174156189, "learning_rate": 1.8854346304831236e-05, "loss": 0.553483784198761, "step": 2912 }, { "epoch": 0.5382545530135926, "grad_norm": 0.08014381676912308, "learning_rate": 1.8853418963953666e-05, "loss": 0.6085349917411804, "step": 2913 }, { "epoch": 0.5384393297224885, "grad_norm": 0.05916145443916321, "learning_rate": 1.8852491270737715e-05, "loss": 0.5331775546073914, "step": 2914 }, { "epoch": 0.5386241064313844, "grad_norm": 0.07928591966629028, "learning_rate": 1.8851563225220307e-05, "loss": 0.7232125997543335, "step": 2915 }, { "epoch": 0.5388088831402802, "grad_norm": 0.08045277744531631, "learning_rate": 1.8850634827438377e-05, "loss": 0.7127057313919067, "step": 2916 }, { "epoch": 0.538993659849176, "grad_norm": 0.06988881528377533, "learning_rate": 1.8849706077428874e-05, "loss": 0.6256963014602661, "step": 2917 }, { "epoch": 0.5391784365580718, "grad_norm": 0.06709430366754532, "learning_rate": 1.884877697522875e-05, "loss": 0.5660346746444702, "step": 2918 }, { "epoch": 0.5393632132669677, "grad_norm": 0.07249153405427933, "learning_rate": 1.884784752087499e-05, "loss": 0.5950549244880676, "step": 2919 }, { "epoch": 0.5395479899758635, "grad_norm": 0.07752339541912079, "learning_rate": 1.884691771440458e-05, "loss": 0.6727233529090881, "step": 2920 }, { "epoch": 0.5397327666847593, "grad_norm": 0.05704038217663765, "learning_rate": 1.8845987555854526e-05, "loss": 0.49041783809661865, "step": 2921 }, { "epoch": 0.5399175433936553, "grad_norm": 0.08285940438508987, "learning_rate": 1.884505704526184e-05, "loss": 0.7150740623474121, "step": 2922 }, { "epoch": 0.5401023201025511, "grad_norm": 0.07266629487276077, "learning_rate": 1.8844126182663552e-05, "loss": 0.6662338972091675, "step": 2923 }, { "epoch": 0.5402870968114469, "grad_norm": 0.07145297527313232, "learning_rate": 1.884319496809672e-05, "loss": 0.6038743257522583, "step": 2924 }, { "epoch": 0.5404718735203428, "grad_norm": 0.0714673399925232, "learning_rate": 1.884226340159839e-05, "loss": 0.7645812034606934, "step": 2925 }, { "epoch": 0.5406566502292386, "grad_norm": 0.06393370777368546, "learning_rate": 1.8841331483205642e-05, "loss": 0.4089282751083374, "step": 2926 }, { "epoch": 0.5408414269381344, "grad_norm": 0.09627486765384674, "learning_rate": 1.884039921295556e-05, "loss": 0.7667011618614197, "step": 2927 }, { "epoch": 0.5410262036470302, "grad_norm": 0.07943038642406464, "learning_rate": 1.8839466590885253e-05, "loss": 0.7740654349327087, "step": 2928 }, { "epoch": 0.5412109803559262, "grad_norm": 0.0655045360326767, "learning_rate": 1.8838533617031826e-05, "loss": 0.6008351445198059, "step": 2929 }, { "epoch": 0.541395757064822, "grad_norm": 0.06013663485646248, "learning_rate": 1.8837600291432413e-05, "loss": 0.4785308539867401, "step": 2930 }, { "epoch": 0.5415805337737178, "grad_norm": 0.05999566987156868, "learning_rate": 1.8836666614124158e-05, "loss": 0.4873706102371216, "step": 2931 }, { "epoch": 0.5417653104826137, "grad_norm": 0.09239016473293304, "learning_rate": 1.8835732585144218e-05, "loss": 0.9298089146614075, "step": 2932 }, { "epoch": 0.5419500871915095, "grad_norm": 0.06117738410830498, "learning_rate": 1.883479820452977e-05, "loss": 0.5474444031715393, "step": 2933 }, { "epoch": 0.5421348639004053, "grad_norm": 0.08043549954891205, "learning_rate": 1.8833863472317984e-05, "loss": 0.7750460505485535, "step": 2934 }, { "epoch": 0.5423196406093012, "grad_norm": 0.09016053378582001, "learning_rate": 1.8832928388546075e-05, "loss": 0.7897309064865112, "step": 2935 }, { "epoch": 0.5425044173181971, "grad_norm": 0.06423919647932053, "learning_rate": 1.883199295325125e-05, "loss": 0.4962039887905121, "step": 2936 }, { "epoch": 0.5426891940270929, "grad_norm": 0.09071557223796844, "learning_rate": 1.883105716647074e-05, "loss": 0.9806622862815857, "step": 2937 }, { "epoch": 0.5428739707359888, "grad_norm": 0.07148994505405426, "learning_rate": 1.883012102824178e-05, "loss": 0.7550164461135864, "step": 2938 }, { "epoch": 0.5430587474448846, "grad_norm": 0.08958619832992554, "learning_rate": 1.882918453860163e-05, "loss": 0.7683021426200867, "step": 2939 }, { "epoch": 0.5432435241537804, "grad_norm": 0.0824618861079216, "learning_rate": 1.882824769758756e-05, "loss": 0.6786718368530273, "step": 2940 }, { "epoch": 0.5434283008626762, "grad_norm": 0.05214720591902733, "learning_rate": 1.882731050523685e-05, "loss": 0.4915755093097687, "step": 2941 }, { "epoch": 0.5436130775715721, "grad_norm": 0.06492382287979126, "learning_rate": 1.88263729615868e-05, "loss": 0.6302931308746338, "step": 2942 }, { "epoch": 0.5437978542804679, "grad_norm": 0.0823633000254631, "learning_rate": 1.882543506667472e-05, "loss": 0.6546368598937988, "step": 2943 }, { "epoch": 0.5439826309893638, "grad_norm": 0.0699317455291748, "learning_rate": 1.8824496820537934e-05, "loss": 0.5268377661705017, "step": 2944 }, { "epoch": 0.5441674076982597, "grad_norm": 0.05576154962182045, "learning_rate": 1.8823558223213787e-05, "loss": 0.4686092734336853, "step": 2945 }, { "epoch": 0.5443521844071555, "grad_norm": 0.06533041596412659, "learning_rate": 1.8822619274739623e-05, "loss": 0.4361056089401245, "step": 2946 }, { "epoch": 0.5445369611160513, "grad_norm": 0.08646456152200699, "learning_rate": 1.882167997515282e-05, "loss": 0.8023089170455933, "step": 2947 }, { "epoch": 0.5447217378249471, "grad_norm": 0.0743655264377594, "learning_rate": 1.8820740324490747e-05, "loss": 0.645487368106842, "step": 2948 }, { "epoch": 0.544906514533843, "grad_norm": 0.0705256536602974, "learning_rate": 1.8819800322790808e-05, "loss": 0.6143032312393188, "step": 2949 }, { "epoch": 0.5450912912427388, "grad_norm": 0.0705714076757431, "learning_rate": 1.8818859970090414e-05, "loss": 0.6757660508155823, "step": 2950 }, { "epoch": 0.5452760679516347, "grad_norm": 0.07375648617744446, "learning_rate": 1.8817919266426977e-05, "loss": 0.6835747361183167, "step": 2951 }, { "epoch": 0.5454608446605306, "grad_norm": 0.07369264960289001, "learning_rate": 1.8816978211837945e-05, "loss": 0.6894022226333618, "step": 2952 }, { "epoch": 0.5456456213694264, "grad_norm": 0.08359493315219879, "learning_rate": 1.8816036806360766e-05, "loss": 0.6959936618804932, "step": 2953 }, { "epoch": 0.5458303980783222, "grad_norm": 0.08166102319955826, "learning_rate": 1.88150950500329e-05, "loss": 0.7394427061080933, "step": 2954 }, { "epoch": 0.5460151747872181, "grad_norm": 0.06949790567159653, "learning_rate": 1.881415294289183e-05, "loss": 0.6202840209007263, "step": 2955 }, { "epoch": 0.5461999514961139, "grad_norm": 0.07749205827713013, "learning_rate": 1.8813210484975055e-05, "loss": 0.5997181534767151, "step": 2956 }, { "epoch": 0.5463847282050097, "grad_norm": 0.06218162178993225, "learning_rate": 1.881226767632007e-05, "loss": 0.611036479473114, "step": 2957 }, { "epoch": 0.5465695049139057, "grad_norm": 0.09652518481016159, "learning_rate": 1.8811324516964404e-05, "loss": 0.8997894525527954, "step": 2958 }, { "epoch": 0.5467542816228015, "grad_norm": 0.07064218819141388, "learning_rate": 1.881038100694559e-05, "loss": 0.5235415101051331, "step": 2959 }, { "epoch": 0.5469390583316973, "grad_norm": 0.09446271508932114, "learning_rate": 1.880943714630117e-05, "loss": 0.8649404644966125, "step": 2960 }, { "epoch": 0.5471238350405931, "grad_norm": 0.06781157851219177, "learning_rate": 1.880849293506872e-05, "loss": 0.6357306241989136, "step": 2961 }, { "epoch": 0.547308611749489, "grad_norm": 0.06145769730210304, "learning_rate": 1.8807548373285808e-05, "loss": 0.5617401003837585, "step": 2962 }, { "epoch": 0.5474933884583848, "grad_norm": 0.05609019100666046, "learning_rate": 1.8806603460990023e-05, "loss": 0.48635318875312805, "step": 2963 }, { "epoch": 0.5476781651672806, "grad_norm": 0.0804615467786789, "learning_rate": 1.8805658198218975e-05, "loss": 0.714273989200592, "step": 2964 }, { "epoch": 0.5478629418761765, "grad_norm": 0.062169257551431656, "learning_rate": 1.8804712585010277e-05, "loss": 0.4738171100616455, "step": 2965 }, { "epoch": 0.5480477185850724, "grad_norm": 0.0890984758734703, "learning_rate": 1.880376662140157e-05, "loss": 1.014730453491211, "step": 2966 }, { "epoch": 0.5482324952939682, "grad_norm": 0.08369938284158707, "learning_rate": 1.880282030743049e-05, "loss": 0.6934597492218018, "step": 2967 }, { "epoch": 0.548417272002864, "grad_norm": 0.05199027433991432, "learning_rate": 1.8801873643134705e-05, "loss": 0.3649899959564209, "step": 2968 }, { "epoch": 0.5486020487117599, "grad_norm": 0.07019580900669098, "learning_rate": 1.8800926628551884e-05, "loss": 0.6902180910110474, "step": 2969 }, { "epoch": 0.5487868254206557, "grad_norm": 0.051402270793914795, "learning_rate": 1.8799979263719722e-05, "loss": 0.396835595369339, "step": 2970 }, { "epoch": 0.5489716021295515, "grad_norm": 0.08352036774158478, "learning_rate": 1.879903154867591e-05, "loss": 0.7998559474945068, "step": 2971 }, { "epoch": 0.5491563788384474, "grad_norm": 0.07527686655521393, "learning_rate": 1.879808348345818e-05, "loss": 0.5918179154396057, "step": 2972 }, { "epoch": 0.5493411555473433, "grad_norm": 0.10220187157392502, "learning_rate": 1.8797135068104247e-05, "loss": 0.9950894713401794, "step": 2973 }, { "epoch": 0.5495259322562391, "grad_norm": 0.09731113165616989, "learning_rate": 1.879618630265186e-05, "loss": 0.7071390151977539, "step": 2974 }, { "epoch": 0.549710708965135, "grad_norm": 0.06815184652805328, "learning_rate": 1.879523718713878e-05, "loss": 0.5961635112762451, "step": 2975 }, { "epoch": 0.5498954856740308, "grad_norm": 0.08032702654600143, "learning_rate": 1.879428772160278e-05, "loss": 0.7274848818778992, "step": 2976 }, { "epoch": 0.5500802623829266, "grad_norm": 0.06666399538516998, "learning_rate": 1.879333790608164e-05, "loss": 0.5938247442245483, "step": 2977 }, { "epoch": 0.5502650390918224, "grad_norm": 0.07029084116220474, "learning_rate": 1.8792387740613162e-05, "loss": 0.6550891995429993, "step": 2978 }, { "epoch": 0.5504498158007183, "grad_norm": 0.07481728494167328, "learning_rate": 1.8791437225235157e-05, "loss": 0.6303069591522217, "step": 2979 }, { "epoch": 0.5506345925096142, "grad_norm": 0.07247333973646164, "learning_rate": 1.8790486359985456e-05, "loss": 0.5467405915260315, "step": 2980 }, { "epoch": 0.55081936921851, "grad_norm": 0.06596650183200836, "learning_rate": 1.8789535144901902e-05, "loss": 0.7341605424880981, "step": 2981 }, { "epoch": 0.5510041459274059, "grad_norm": 0.06992919743061066, "learning_rate": 1.8788583580022347e-05, "loss": 0.6399144530296326, "step": 2982 }, { "epoch": 0.5511889226363017, "grad_norm": 0.08813784271478653, "learning_rate": 1.8787631665384666e-05, "loss": 0.7428203225135803, "step": 2983 }, { "epoch": 0.5513736993451975, "grad_norm": 0.06808775663375854, "learning_rate": 1.878667940102673e-05, "loss": 0.595568835735321, "step": 2984 }, { "epoch": 0.5515584760540934, "grad_norm": 0.08083754032850266, "learning_rate": 1.8785726786986446e-05, "loss": 0.7247161269187927, "step": 2985 }, { "epoch": 0.5517432527629892, "grad_norm": 0.07417766749858856, "learning_rate": 1.8784773823301726e-05, "loss": 0.6404522061347961, "step": 2986 }, { "epoch": 0.551928029471885, "grad_norm": 0.06823204457759857, "learning_rate": 1.878382051001049e-05, "loss": 0.6214050650596619, "step": 2987 }, { "epoch": 0.552112806180781, "grad_norm": 0.0789300948381424, "learning_rate": 1.878286684715068e-05, "loss": 0.7323463559150696, "step": 2988 }, { "epoch": 0.5522975828896768, "grad_norm": 0.09078711271286011, "learning_rate": 1.8781912834760246e-05, "loss": 0.6080383062362671, "step": 2989 }, { "epoch": 0.5524823595985726, "grad_norm": 0.06681746989488602, "learning_rate": 1.8780958472877156e-05, "loss": 0.5171971917152405, "step": 2990 }, { "epoch": 0.5526671363074684, "grad_norm": 0.055142708122730255, "learning_rate": 1.8780003761539392e-05, "loss": 0.4934493601322174, "step": 2991 }, { "epoch": 0.5528519130163643, "grad_norm": 0.0705462172627449, "learning_rate": 1.877904870078495e-05, "loss": 0.6238037347793579, "step": 2992 }, { "epoch": 0.5530366897252601, "grad_norm": 0.06349740922451019, "learning_rate": 1.877809329065183e-05, "loss": 0.6783521175384521, "step": 2993 }, { "epoch": 0.5532214664341559, "grad_norm": 0.06308883428573608, "learning_rate": 1.8777137531178066e-05, "loss": 0.4816989004611969, "step": 2994 }, { "epoch": 0.5534062431430519, "grad_norm": 0.08100791275501251, "learning_rate": 1.8776181422401683e-05, "loss": 0.739810585975647, "step": 2995 }, { "epoch": 0.5535910198519477, "grad_norm": 0.07574406266212463, "learning_rate": 1.8775224964360738e-05, "loss": 0.5386341214179993, "step": 2996 }, { "epoch": 0.5537757965608435, "grad_norm": 0.08601616322994232, "learning_rate": 1.8774268157093295e-05, "loss": 0.6581635475158691, "step": 2997 }, { "epoch": 0.5539605732697394, "grad_norm": 0.07239647209644318, "learning_rate": 1.877331100063743e-05, "loss": 0.6122103929519653, "step": 2998 }, { "epoch": 0.5541453499786352, "grad_norm": 0.07645408809185028, "learning_rate": 1.8772353495031236e-05, "loss": 0.6776065826416016, "step": 2999 }, { "epoch": 0.554330126687531, "grad_norm": 0.08867352455854416, "learning_rate": 1.877139564031282e-05, "loss": 0.7638838291168213, "step": 3000 }, { "epoch": 0.554330126687531, "eval_loss": 0.6894482374191284, "eval_runtime": 157.2981, "eval_samples_per_second": 115.888, "eval_steps_per_second": 14.488, "step": 3000 }, { "epoch": 0.5545149033964268, "grad_norm": 0.09155572205781937, "learning_rate": 1.8770437436520293e-05, "loss": 0.9210847616195679, "step": 3001 }, { "epoch": 0.5546996801053228, "grad_norm": 0.07019831240177155, "learning_rate": 1.87694788836918e-05, "loss": 0.6158140897750854, "step": 3002 }, { "epoch": 0.5548844568142186, "grad_norm": 0.055636219680309296, "learning_rate": 1.8768519981865485e-05, "loss": 0.5288290977478027, "step": 3003 }, { "epoch": 0.5550692335231144, "grad_norm": 0.07038795202970505, "learning_rate": 1.8767560731079504e-05, "loss": 0.5546280145645142, "step": 3004 }, { "epoch": 0.5552540102320103, "grad_norm": 0.07437584549188614, "learning_rate": 1.876660113137204e-05, "loss": 0.7176721096038818, "step": 3005 }, { "epoch": 0.5554387869409061, "grad_norm": 0.06974802166223526, "learning_rate": 1.8765641182781274e-05, "loss": 0.8900144100189209, "step": 3006 }, { "epoch": 0.5556235636498019, "grad_norm": 0.07461828738451004, "learning_rate": 1.8764680885345415e-05, "loss": 0.6833454966545105, "step": 3007 }, { "epoch": 0.5558083403586978, "grad_norm": 0.09632608294487, "learning_rate": 1.8763720239102682e-05, "loss": 0.8168778419494629, "step": 3008 }, { "epoch": 0.5559931170675936, "grad_norm": 0.06430435180664062, "learning_rate": 1.8762759244091294e-05, "loss": 0.48340433835983276, "step": 3009 }, { "epoch": 0.5561778937764895, "grad_norm": 0.06506127864122391, "learning_rate": 1.876179790034951e-05, "loss": 0.6659327149391174, "step": 3010 }, { "epoch": 0.5563626704853853, "grad_norm": 0.0728253498673439, "learning_rate": 1.8760836207915577e-05, "loss": 0.6238194704055786, "step": 3011 }, { "epoch": 0.5565474471942812, "grad_norm": 0.0691649317741394, "learning_rate": 1.8759874166827773e-05, "loss": 0.699463963508606, "step": 3012 }, { "epoch": 0.556732223903177, "grad_norm": 0.07036525011062622, "learning_rate": 1.8758911777124385e-05, "loss": 0.6164835691452026, "step": 3013 }, { "epoch": 0.5569170006120728, "grad_norm": 0.07765907049179077, "learning_rate": 1.875794903884371e-05, "loss": 0.5402286648750305, "step": 3014 }, { "epoch": 0.5571017773209687, "grad_norm": 0.06866981834173203, "learning_rate": 1.8756985952024066e-05, "loss": 0.5985514521598816, "step": 3015 }, { "epoch": 0.5572865540298645, "grad_norm": 0.0924587994813919, "learning_rate": 1.8756022516703774e-05, "loss": 0.6655313968658447, "step": 3016 }, { "epoch": 0.5574713307387604, "grad_norm": 0.06619324535131454, "learning_rate": 1.875505873292118e-05, "loss": 0.6115932464599609, "step": 3017 }, { "epoch": 0.5576561074476563, "grad_norm": 0.06452200561761856, "learning_rate": 1.8754094600714646e-05, "loss": 0.5450788140296936, "step": 3018 }, { "epoch": 0.5578408841565521, "grad_norm": 0.08582423627376556, "learning_rate": 1.875313012012253e-05, "loss": 0.7833226919174194, "step": 3019 }, { "epoch": 0.5580256608654479, "grad_norm": 0.09632756561040878, "learning_rate": 1.8752165291183216e-05, "loss": 0.9585537314414978, "step": 3020 }, { "epoch": 0.5582104375743437, "grad_norm": 0.08630585670471191, "learning_rate": 1.8751200113935114e-05, "loss": 0.8109465837478638, "step": 3021 }, { "epoch": 0.5583952142832396, "grad_norm": 0.07621660083532333, "learning_rate": 1.8750234588416623e-05, "loss": 0.6150826811790466, "step": 3022 }, { "epoch": 0.5585799909921354, "grad_norm": 0.08415526896715164, "learning_rate": 1.874926871466617e-05, "loss": 0.7688988447189331, "step": 3023 }, { "epoch": 0.5587647677010313, "grad_norm": 0.07290530949831009, "learning_rate": 1.8748302492722196e-05, "loss": 0.8483543395996094, "step": 3024 }, { "epoch": 0.5589495444099272, "grad_norm": 0.08862084150314331, "learning_rate": 1.874733592262315e-05, "loss": 0.6412017941474915, "step": 3025 }, { "epoch": 0.559134321118823, "grad_norm": 0.07729244977235794, "learning_rate": 1.8746369004407505e-05, "loss": 0.6772671937942505, "step": 3026 }, { "epoch": 0.5593190978277188, "grad_norm": 0.0640832781791687, "learning_rate": 1.8745401738113737e-05, "loss": 0.6276355385780334, "step": 3027 }, { "epoch": 0.5595038745366147, "grad_norm": 0.0748014822602272, "learning_rate": 1.874443412378034e-05, "loss": 0.629840612411499, "step": 3028 }, { "epoch": 0.5596886512455105, "grad_norm": 0.07689131051301956, "learning_rate": 1.8743466161445823e-05, "loss": 0.7019093632698059, "step": 3029 }, { "epoch": 0.5598734279544063, "grad_norm": 0.0724974200129509, "learning_rate": 1.8742497851148708e-05, "loss": 0.6934340596199036, "step": 3030 }, { "epoch": 0.5600582046633021, "grad_norm": 0.09675077348947525, "learning_rate": 1.8741529192927528e-05, "loss": 0.7328585982322693, "step": 3031 }, { "epoch": 0.5602429813721981, "grad_norm": 0.06351233273744583, "learning_rate": 1.8740560186820837e-05, "loss": 0.5489311814308167, "step": 3032 }, { "epoch": 0.5604277580810939, "grad_norm": 0.08618681132793427, "learning_rate": 1.8739590832867197e-05, "loss": 0.7736677527427673, "step": 3033 }, { "epoch": 0.5606125347899897, "grad_norm": 0.0654274970293045, "learning_rate": 1.873862113110518e-05, "loss": 0.5988054275512695, "step": 3034 }, { "epoch": 0.5607973114988856, "grad_norm": 0.08028063178062439, "learning_rate": 1.8737651081573387e-05, "loss": 0.8057206273078918, "step": 3035 }, { "epoch": 0.5609820882077814, "grad_norm": 0.07563837617635727, "learning_rate": 1.8736680684310415e-05, "loss": 0.6490789651870728, "step": 3036 }, { "epoch": 0.5611668649166772, "grad_norm": 0.09664560109376907, "learning_rate": 1.8735709939354885e-05, "loss": 0.9325444102287292, "step": 3037 }, { "epoch": 0.561351641625573, "grad_norm": 0.05492691323161125, "learning_rate": 1.8734738846745433e-05, "loss": 0.45082294940948486, "step": 3038 }, { "epoch": 0.561536418334469, "grad_norm": 0.0741322934627533, "learning_rate": 1.87337674065207e-05, "loss": 0.7102787494659424, "step": 3039 }, { "epoch": 0.5617211950433648, "grad_norm": 0.07162413746118546, "learning_rate": 1.8732795618719347e-05, "loss": 0.6887521147727966, "step": 3040 }, { "epoch": 0.5619059717522606, "grad_norm": 0.08063210546970367, "learning_rate": 1.873182348338005e-05, "loss": 0.7454641461372375, "step": 3041 }, { "epoch": 0.5620907484611565, "grad_norm": 0.0753689780831337, "learning_rate": 1.87308510005415e-05, "loss": 0.637800931930542, "step": 3042 }, { "epoch": 0.5622755251700523, "grad_norm": 0.07068897038698196, "learning_rate": 1.8729878170242392e-05, "loss": 0.594787061214447, "step": 3043 }, { "epoch": 0.5624603018789481, "grad_norm": 0.07820609211921692, "learning_rate": 1.8728904992521448e-05, "loss": 0.8760103583335876, "step": 3044 }, { "epoch": 0.562645078587844, "grad_norm": 0.06646838784217834, "learning_rate": 1.8727931467417394e-05, "loss": 0.46639692783355713, "step": 3045 }, { "epoch": 0.5628298552967399, "grad_norm": 0.08322024345397949, "learning_rate": 1.8726957594968974e-05, "loss": 0.6693019866943359, "step": 3046 }, { "epoch": 0.5630146320056357, "grad_norm": 0.07052835077047348, "learning_rate": 1.8725983375214945e-05, "loss": 0.5950911045074463, "step": 3047 }, { "epoch": 0.5631994087145316, "grad_norm": 0.0648900717496872, "learning_rate": 1.8725008808194074e-05, "loss": 0.5393177270889282, "step": 3048 }, { "epoch": 0.5633841854234274, "grad_norm": 0.0670960322022438, "learning_rate": 1.872403389394515e-05, "loss": 0.5252442955970764, "step": 3049 }, { "epoch": 0.5635689621323232, "grad_norm": 0.07087778300046921, "learning_rate": 1.8723058632506975e-05, "loss": 0.66823810338974, "step": 3050 }, { "epoch": 0.563753738841219, "grad_norm": 0.08115480095148087, "learning_rate": 1.872208302391836e-05, "loss": 0.6442320346832275, "step": 3051 }, { "epoch": 0.5639385155501149, "grad_norm": 0.06653082370758057, "learning_rate": 1.872110706821812e-05, "loss": 0.6443053483963013, "step": 3052 }, { "epoch": 0.5641232922590107, "grad_norm": 0.09376993775367737, "learning_rate": 1.8720130765445107e-05, "loss": 0.8187600374221802, "step": 3053 }, { "epoch": 0.5643080689679066, "grad_norm": 0.07879412919282913, "learning_rate": 1.8719154115638174e-05, "loss": 0.7443960905075073, "step": 3054 }, { "epoch": 0.5644928456768025, "grad_norm": 0.08549877256155014, "learning_rate": 1.8718177118836185e-05, "loss": 0.6854761242866516, "step": 3055 }, { "epoch": 0.5646776223856983, "grad_norm": 0.0675218477845192, "learning_rate": 1.8717199775078022e-05, "loss": 0.7379408478736877, "step": 3056 }, { "epoch": 0.5648623990945941, "grad_norm": 0.07826707512140274, "learning_rate": 1.871622208440258e-05, "loss": 0.7605068683624268, "step": 3057 }, { "epoch": 0.56504717580349, "grad_norm": 0.07787346839904785, "learning_rate": 1.871524404684877e-05, "loss": 0.6818773746490479, "step": 3058 }, { "epoch": 0.5652319525123858, "grad_norm": 0.06998469680547714, "learning_rate": 1.871426566245551e-05, "loss": 0.7140995860099792, "step": 3059 }, { "epoch": 0.5654167292212816, "grad_norm": 0.07448789477348328, "learning_rate": 1.8713286931261742e-05, "loss": 0.6892114281654358, "step": 3060 }, { "epoch": 0.5656015059301776, "grad_norm": 0.07089152187108994, "learning_rate": 1.871230785330641e-05, "loss": 0.5666224360466003, "step": 3061 }, { "epoch": 0.5657862826390734, "grad_norm": 0.09205210208892822, "learning_rate": 1.8711328428628492e-05, "loss": 0.728224515914917, "step": 3062 }, { "epoch": 0.5659710593479692, "grad_norm": 0.05926433950662613, "learning_rate": 1.8710348657266953e-05, "loss": 0.5452472567558289, "step": 3063 }, { "epoch": 0.566155836056865, "grad_norm": 0.07631290704011917, "learning_rate": 1.8709368539260785e-05, "loss": 0.6477214694023132, "step": 3064 }, { "epoch": 0.5663406127657609, "grad_norm": 0.07008951157331467, "learning_rate": 1.8708388074649e-05, "loss": 0.6564326882362366, "step": 3065 }, { "epoch": 0.5665253894746567, "grad_norm": 0.07543662935495377, "learning_rate": 1.8707407263470614e-05, "loss": 0.6354846358299255, "step": 3066 }, { "epoch": 0.5667101661835525, "grad_norm": 0.06850934028625488, "learning_rate": 1.8706426105764663e-05, "loss": 0.6425608396530151, "step": 3067 }, { "epoch": 0.5668949428924485, "grad_norm": 0.08726583421230316, "learning_rate": 1.870544460157019e-05, "loss": 0.8744035363197327, "step": 3068 }, { "epoch": 0.5670797196013443, "grad_norm": 0.064764603972435, "learning_rate": 1.8704462750926258e-05, "loss": 0.7021517753601074, "step": 3069 }, { "epoch": 0.5672644963102401, "grad_norm": 0.06063363328576088, "learning_rate": 1.870348055387194e-05, "loss": 0.5126523971557617, "step": 3070 }, { "epoch": 0.567449273019136, "grad_norm": 0.0791720300912857, "learning_rate": 1.870249801044633e-05, "loss": 0.6843903064727783, "step": 3071 }, { "epoch": 0.5676340497280318, "grad_norm": 0.07378975301980972, "learning_rate": 1.8701515120688522e-05, "loss": 0.5955958366394043, "step": 3072 }, { "epoch": 0.5678188264369276, "grad_norm": 0.06286358088254929, "learning_rate": 1.8700531884637635e-05, "loss": 0.511538565158844, "step": 3073 }, { "epoch": 0.5680036031458234, "grad_norm": 0.04755840823054314, "learning_rate": 1.8699548302332802e-05, "loss": 0.3661784529685974, "step": 3074 }, { "epoch": 0.5681883798547193, "grad_norm": 0.056572191417217255, "learning_rate": 1.8698564373813162e-05, "loss": 0.5282621383666992, "step": 3075 }, { "epoch": 0.5683731565636152, "grad_norm": 0.08536586165428162, "learning_rate": 1.8697580099117875e-05, "loss": 0.6903574466705322, "step": 3076 }, { "epoch": 0.568557933272511, "grad_norm": 0.0768958032131195, "learning_rate": 1.869659547828611e-05, "loss": 0.8080697655677795, "step": 3077 }, { "epoch": 0.5687427099814069, "grad_norm": 0.0779389813542366, "learning_rate": 1.8695610511357055e-05, "loss": 0.6167462468147278, "step": 3078 }, { "epoch": 0.5689274866903027, "grad_norm": 0.07720815390348434, "learning_rate": 1.869462519836991e-05, "loss": 0.5635837316513062, "step": 3079 }, { "epoch": 0.5691122633991985, "grad_norm": 0.06597666442394257, "learning_rate": 1.869363953936388e-05, "loss": 0.5614623427391052, "step": 3080 }, { "epoch": 0.5692970401080943, "grad_norm": 0.0675392746925354, "learning_rate": 1.8692653534378195e-05, "loss": 0.4289820194244385, "step": 3081 }, { "epoch": 0.5694818168169902, "grad_norm": 0.10091688483953476, "learning_rate": 1.8691667183452096e-05, "loss": 0.8583399653434753, "step": 3082 }, { "epoch": 0.5696665935258861, "grad_norm": 0.046852800995111465, "learning_rate": 1.8690680486624835e-05, "loss": 0.46401602029800415, "step": 3083 }, { "epoch": 0.5698513702347819, "grad_norm": 0.07693001627922058, "learning_rate": 1.8689693443935683e-05, "loss": 0.6485803723335266, "step": 3084 }, { "epoch": 0.5700361469436778, "grad_norm": 0.07034293562173843, "learning_rate": 1.8688706055423916e-05, "loss": 0.6597393751144409, "step": 3085 }, { "epoch": 0.5702209236525736, "grad_norm": 0.08732926100492477, "learning_rate": 1.8687718321128832e-05, "loss": 0.7701346278190613, "step": 3086 }, { "epoch": 0.5704057003614694, "grad_norm": 0.07584357261657715, "learning_rate": 1.8686730241089738e-05, "loss": 0.7044535875320435, "step": 3087 }, { "epoch": 0.5705904770703653, "grad_norm": 0.0704139694571495, "learning_rate": 1.8685741815345958e-05, "loss": 0.7031932473182678, "step": 3088 }, { "epoch": 0.5707752537792611, "grad_norm": 0.08140061050653458, "learning_rate": 1.8684753043936828e-05, "loss": 0.6103025078773499, "step": 3089 }, { "epoch": 0.570960030488157, "grad_norm": 0.06319376826286316, "learning_rate": 1.8683763926901697e-05, "loss": 0.5372883677482605, "step": 3090 }, { "epoch": 0.5711448071970529, "grad_norm": 0.07563546299934387, "learning_rate": 1.8682774464279933e-05, "loss": 0.6491016745567322, "step": 3091 }, { "epoch": 0.5713295839059487, "grad_norm": 0.08996942639350891, "learning_rate": 1.8681784656110912e-05, "loss": 0.7877377271652222, "step": 3092 }, { "epoch": 0.5715143606148445, "grad_norm": 0.07759862393140793, "learning_rate": 1.8680794502434018e-05, "loss": 0.8040440678596497, "step": 3093 }, { "epoch": 0.5716991373237403, "grad_norm": 0.06511888653039932, "learning_rate": 1.8679804003288664e-05, "loss": 0.5631576776504517, "step": 3094 }, { "epoch": 0.5718839140326362, "grad_norm": 0.0843639001250267, "learning_rate": 1.8678813158714266e-05, "loss": 0.6956359148025513, "step": 3095 }, { "epoch": 0.572068690741532, "grad_norm": 0.05912714824080467, "learning_rate": 1.8677821968750257e-05, "loss": 0.4489075541496277, "step": 3096 }, { "epoch": 0.5722534674504278, "grad_norm": 0.06777922064065933, "learning_rate": 1.8676830433436082e-05, "loss": 0.7072759866714478, "step": 3097 }, { "epoch": 0.5724382441593238, "grad_norm": 0.07907670736312866, "learning_rate": 1.8675838552811204e-05, "loss": 0.7033583521842957, "step": 3098 }, { "epoch": 0.5726230208682196, "grad_norm": 0.06337752193212509, "learning_rate": 1.8674846326915092e-05, "loss": 0.47836634516716003, "step": 3099 }, { "epoch": 0.5728077975771154, "grad_norm": 0.06873060017824173, "learning_rate": 1.867385375578724e-05, "loss": 0.6064472794532776, "step": 3100 }, { "epoch": 0.5729925742860112, "grad_norm": 0.06846865266561508, "learning_rate": 1.8672860839467143e-05, "loss": 0.569277822971344, "step": 3101 }, { "epoch": 0.5731773509949071, "grad_norm": 0.07393836230039597, "learning_rate": 1.867186757799432e-05, "loss": 0.6339954137802124, "step": 3102 }, { "epoch": 0.5733621277038029, "grad_norm": 0.08977872878313065, "learning_rate": 1.8670873971408298e-05, "loss": 0.7323946356773376, "step": 3103 }, { "epoch": 0.5735469044126987, "grad_norm": 0.07141544669866562, "learning_rate": 1.8669880019748618e-05, "loss": 0.6208509206771851, "step": 3104 }, { "epoch": 0.5737316811215947, "grad_norm": 0.07710936665534973, "learning_rate": 1.8668885723054838e-05, "loss": 0.651936411857605, "step": 3105 }, { "epoch": 0.5739164578304905, "grad_norm": 0.06347020715475082, "learning_rate": 1.866789108136653e-05, "loss": 0.6423757672309875, "step": 3106 }, { "epoch": 0.5741012345393863, "grad_norm": 0.06215847283601761, "learning_rate": 1.866689609472327e-05, "loss": 0.5604876279830933, "step": 3107 }, { "epoch": 0.5742860112482822, "grad_norm": 0.07825514674186707, "learning_rate": 1.8665900763164665e-05, "loss": 0.6292716264724731, "step": 3108 }, { "epoch": 0.574470787957178, "grad_norm": 0.08346331119537354, "learning_rate": 1.8664905086730324e-05, "loss": 0.6163325309753418, "step": 3109 }, { "epoch": 0.5746555646660738, "grad_norm": 0.0739307776093483, "learning_rate": 1.8663909065459866e-05, "loss": 0.5332595705986023, "step": 3110 }, { "epoch": 0.5748403413749696, "grad_norm": 0.0750046968460083, "learning_rate": 1.8662912699392933e-05, "loss": 0.6106127500534058, "step": 3111 }, { "epoch": 0.5750251180838656, "grad_norm": 0.08194831013679504, "learning_rate": 1.8661915988569177e-05, "loss": 0.7189168930053711, "step": 3112 }, { "epoch": 0.5752098947927614, "grad_norm": 0.06915279477834702, "learning_rate": 1.8660918933028267e-05, "loss": 0.5820999145507812, "step": 3113 }, { "epoch": 0.5753946715016572, "grad_norm": 0.0856345072388649, "learning_rate": 1.8659921532809878e-05, "loss": 0.8091294765472412, "step": 3114 }, { "epoch": 0.5755794482105531, "grad_norm": 0.07053239643573761, "learning_rate": 1.8658923787953705e-05, "loss": 0.7515913844108582, "step": 3115 }, { "epoch": 0.5757642249194489, "grad_norm": 0.07171469181776047, "learning_rate": 1.8657925698499457e-05, "loss": 0.6084983348846436, "step": 3116 }, { "epoch": 0.5759490016283447, "grad_norm": 0.0760403648018837, "learning_rate": 1.865692726448685e-05, "loss": 0.5334648489952087, "step": 3117 }, { "epoch": 0.5761337783372406, "grad_norm": 0.06720221787691116, "learning_rate": 1.8655928485955628e-05, "loss": 0.5867680311203003, "step": 3118 }, { "epoch": 0.5763185550461364, "grad_norm": 0.077094167470932, "learning_rate": 1.865492936294553e-05, "loss": 0.7712277770042419, "step": 3119 }, { "epoch": 0.5765033317550323, "grad_norm": 0.08528798073530197, "learning_rate": 1.865392989549632e-05, "loss": 0.7741163969039917, "step": 3120 }, { "epoch": 0.5766881084639282, "grad_norm": 0.0766475647687912, "learning_rate": 1.8652930083647774e-05, "loss": 0.7091198563575745, "step": 3121 }, { "epoch": 0.576872885172824, "grad_norm": 0.06112033501267433, "learning_rate": 1.8651929927439684e-05, "loss": 0.44175073504447937, "step": 3122 }, { "epoch": 0.5770576618817198, "grad_norm": 0.0839383453130722, "learning_rate": 1.8650929426911853e-05, "loss": 0.8505361080169678, "step": 3123 }, { "epoch": 0.5772424385906156, "grad_norm": 0.07571355998516083, "learning_rate": 1.8649928582104097e-05, "loss": 0.6146236658096313, "step": 3124 }, { "epoch": 0.5774272152995115, "grad_norm": 0.07697071135044098, "learning_rate": 1.864892739305624e-05, "loss": 0.6030546426773071, "step": 3125 }, { "epoch": 0.5776119920084073, "grad_norm": 0.06890033930540085, "learning_rate": 1.8647925859808135e-05, "loss": 0.6683984994888306, "step": 3126 }, { "epoch": 0.5777967687173032, "grad_norm": 0.06484845280647278, "learning_rate": 1.8646923982399636e-05, "loss": 0.5480669140815735, "step": 3127 }, { "epoch": 0.5779815454261991, "grad_norm": 0.08451871573925018, "learning_rate": 1.8645921760870616e-05, "loss": 0.6453613638877869, "step": 3128 }, { "epoch": 0.5781663221350949, "grad_norm": 0.06641737371683121, "learning_rate": 1.864491919526096e-05, "loss": 0.47079309821128845, "step": 3129 }, { "epoch": 0.5783510988439907, "grad_norm": 0.06663155555725098, "learning_rate": 1.8643916285610565e-05, "loss": 0.6069074273109436, "step": 3130 }, { "epoch": 0.5785358755528865, "grad_norm": 0.06879729777574539, "learning_rate": 1.8642913031959345e-05, "loss": 0.5753926634788513, "step": 3131 }, { "epoch": 0.5787206522617824, "grad_norm": 0.08380347490310669, "learning_rate": 1.8641909434347226e-05, "loss": 0.6205527782440186, "step": 3132 }, { "epoch": 0.5789054289706782, "grad_norm": 0.08263260871171951, "learning_rate": 1.8640905492814153e-05, "loss": 0.6053383350372314, "step": 3133 }, { "epoch": 0.5790902056795741, "grad_norm": 0.09215070307254791, "learning_rate": 1.863990120740007e-05, "loss": 0.7877563238143921, "step": 3134 }, { "epoch": 0.57927498238847, "grad_norm": 0.06187755614519119, "learning_rate": 1.8638896578144955e-05, "loss": 0.43906375765800476, "step": 3135 }, { "epoch": 0.5794597590973658, "grad_norm": 0.07065249234437943, "learning_rate": 1.863789160508878e-05, "loss": 0.5221515893936157, "step": 3136 }, { "epoch": 0.5796445358062616, "grad_norm": 0.07332364469766617, "learning_rate": 1.8636886288271542e-05, "loss": 0.6718504428863525, "step": 3137 }, { "epoch": 0.5798293125151575, "grad_norm": 0.08444932848215103, "learning_rate": 1.8635880627733255e-05, "loss": 0.7537721991539001, "step": 3138 }, { "epoch": 0.5800140892240533, "grad_norm": 0.08700313419103622, "learning_rate": 1.8634874623513938e-05, "loss": 0.6897266507148743, "step": 3139 }, { "epoch": 0.5801988659329491, "grad_norm": 0.07157497107982635, "learning_rate": 1.8633868275653622e-05, "loss": 0.521382749080658, "step": 3140 }, { "epoch": 0.5803836426418449, "grad_norm": 0.06812208890914917, "learning_rate": 1.863286158419236e-05, "loss": 0.5932210683822632, "step": 3141 }, { "epoch": 0.5805684193507409, "grad_norm": 0.07344353944063187, "learning_rate": 1.863185454917022e-05, "loss": 0.6644601821899414, "step": 3142 }, { "epoch": 0.5807531960596367, "grad_norm": 0.07347593456506729, "learning_rate": 1.8630847170627272e-05, "loss": 0.6279237270355225, "step": 3143 }, { "epoch": 0.5809379727685325, "grad_norm": 0.07865365594625473, "learning_rate": 1.862983944860361e-05, "loss": 0.8507033586502075, "step": 3144 }, { "epoch": 0.5811227494774284, "grad_norm": 0.06068730726838112, "learning_rate": 1.8628831383139336e-05, "loss": 0.4948467016220093, "step": 3145 }, { "epoch": 0.5813075261863242, "grad_norm": 0.07076761871576309, "learning_rate": 1.8627822974274574e-05, "loss": 0.7468083500862122, "step": 3146 }, { "epoch": 0.58149230289522, "grad_norm": 0.08117230981588364, "learning_rate": 1.8626814222049444e-05, "loss": 0.5720183849334717, "step": 3147 }, { "epoch": 0.5816770796041159, "grad_norm": 0.06479369103908539, "learning_rate": 1.86258051265041e-05, "loss": 0.652022659778595, "step": 3148 }, { "epoch": 0.5818618563130118, "grad_norm": 0.06874661147594452, "learning_rate": 1.86247956876787e-05, "loss": 0.6422584056854248, "step": 3149 }, { "epoch": 0.5820466330219076, "grad_norm": 0.06053101271390915, "learning_rate": 1.8623785905613416e-05, "loss": 0.6103196144104004, "step": 3150 }, { "epoch": 0.5822314097308035, "grad_norm": 0.056345485150814056, "learning_rate": 1.862277578034843e-05, "loss": 0.45207127928733826, "step": 3151 }, { "epoch": 0.5824161864396993, "grad_norm": 0.07697878777980804, "learning_rate": 1.8621765311923945e-05, "loss": 0.7261789441108704, "step": 3152 }, { "epoch": 0.5826009631485951, "grad_norm": 0.07872536778450012, "learning_rate": 1.8620754500380177e-05, "loss": 0.7119261622428894, "step": 3153 }, { "epoch": 0.5827857398574909, "grad_norm": 0.06227670609951019, "learning_rate": 1.861974334575735e-05, "loss": 0.47681817412376404, "step": 3154 }, { "epoch": 0.5829705165663868, "grad_norm": 0.056329309940338135, "learning_rate": 1.8618731848095706e-05, "loss": 0.46987074613571167, "step": 3155 }, { "epoch": 0.5831552932752827, "grad_norm": 0.06145971640944481, "learning_rate": 1.8617720007435497e-05, "loss": 0.6057943105697632, "step": 3156 }, { "epoch": 0.5833400699841785, "grad_norm": 0.08054883033037186, "learning_rate": 1.8616707823816994e-05, "loss": 0.6347730159759521, "step": 3157 }, { "epoch": 0.5835248466930744, "grad_norm": 0.08149060606956482, "learning_rate": 1.8615695297280482e-05, "loss": 0.9930283427238464, "step": 3158 }, { "epoch": 0.5837096234019702, "grad_norm": 0.08788027614355087, "learning_rate": 1.8614682427866246e-05, "loss": 0.8771606087684631, "step": 3159 }, { "epoch": 0.583894400110866, "grad_norm": 0.08429764211177826, "learning_rate": 1.8613669215614605e-05, "loss": 0.7415722012519836, "step": 3160 }, { "epoch": 0.5840791768197618, "grad_norm": 0.07797253876924515, "learning_rate": 1.8612655660565877e-05, "loss": 0.6256560683250427, "step": 3161 }, { "epoch": 0.5842639535286577, "grad_norm": 0.0795632153749466, "learning_rate": 1.8611641762760398e-05, "loss": 0.7124666571617126, "step": 3162 }, { "epoch": 0.5844487302375535, "grad_norm": 0.07458134740591049, "learning_rate": 1.861062752223852e-05, "loss": 0.6469964981079102, "step": 3163 }, { "epoch": 0.5846335069464494, "grad_norm": 0.0760737806558609, "learning_rate": 1.860961293904061e-05, "loss": 0.7084463238716125, "step": 3164 }, { "epoch": 0.5848182836553453, "grad_norm": 0.09203182905912399, "learning_rate": 1.8608598013207034e-05, "loss": 0.74711674451828, "step": 3165 }, { "epoch": 0.5850030603642411, "grad_norm": 0.06944387406110764, "learning_rate": 1.8607582744778193e-05, "loss": 0.522801399230957, "step": 3166 }, { "epoch": 0.5851878370731369, "grad_norm": 0.060882631689310074, "learning_rate": 1.860656713379449e-05, "loss": 0.5053161382675171, "step": 3167 }, { "epoch": 0.5853726137820328, "grad_norm": 0.0728553980588913, "learning_rate": 1.860555118029634e-05, "loss": 0.5969119668006897, "step": 3168 }, { "epoch": 0.5855573904909286, "grad_norm": 0.07058120518922806, "learning_rate": 1.8604534884324173e-05, "loss": 0.596505343914032, "step": 3169 }, { "epoch": 0.5857421671998244, "grad_norm": 0.08228509873151779, "learning_rate": 1.8603518245918444e-05, "loss": 0.7489085793495178, "step": 3170 }, { "epoch": 0.5859269439087204, "grad_norm": 0.08472972363233566, "learning_rate": 1.8602501265119604e-05, "loss": 0.7827440500259399, "step": 3171 }, { "epoch": 0.5861117206176162, "grad_norm": 0.06148391589522362, "learning_rate": 1.8601483941968127e-05, "loss": 0.48352840542793274, "step": 3172 }, { "epoch": 0.586296497326512, "grad_norm": 0.07577600330114365, "learning_rate": 1.8600466276504496e-05, "loss": 0.6786936521530151, "step": 3173 }, { "epoch": 0.5864812740354078, "grad_norm": 0.07122032344341278, "learning_rate": 1.859944826876922e-05, "loss": 0.6640927791595459, "step": 3174 }, { "epoch": 0.5866660507443037, "grad_norm": 0.05961944907903671, "learning_rate": 1.8598429918802802e-05, "loss": 0.5764791965484619, "step": 3175 }, { "epoch": 0.5868508274531995, "grad_norm": 0.0938916951417923, "learning_rate": 1.859741122664578e-05, "loss": 0.8614349961280823, "step": 3176 }, { "epoch": 0.5870356041620953, "grad_norm": 0.06286361813545227, "learning_rate": 1.8596392192338687e-05, "loss": 0.6520477533340454, "step": 3177 }, { "epoch": 0.5872203808709913, "grad_norm": 0.07921268045902252, "learning_rate": 1.8595372815922076e-05, "loss": 0.6980974674224854, "step": 3178 }, { "epoch": 0.5874051575798871, "grad_norm": 0.07156159728765488, "learning_rate": 1.859435309743652e-05, "loss": 0.6495121121406555, "step": 3179 }, { "epoch": 0.5875899342887829, "grad_norm": 0.06057432293891907, "learning_rate": 1.8593333036922604e-05, "loss": 0.5264384150505066, "step": 3180 }, { "epoch": 0.5877747109976788, "grad_norm": 0.05280206352472305, "learning_rate": 1.8592312634420912e-05, "loss": 0.3305424451828003, "step": 3181 }, { "epoch": 0.5879594877065746, "grad_norm": 0.0661168247461319, "learning_rate": 1.859129188997206e-05, "loss": 0.5634755492210388, "step": 3182 }, { "epoch": 0.5881442644154704, "grad_norm": 0.06081791967153549, "learning_rate": 1.8590270803616673e-05, "loss": 0.6199816465377808, "step": 3183 }, { "epoch": 0.5883290411243662, "grad_norm": 0.07526887953281403, "learning_rate": 1.8589249375395382e-05, "loss": 0.6815241575241089, "step": 3184 }, { "epoch": 0.5885138178332621, "grad_norm": 0.07210526615381241, "learning_rate": 1.8588227605348836e-05, "loss": 0.5609758496284485, "step": 3185 }, { "epoch": 0.588698594542158, "grad_norm": 0.07922544330358505, "learning_rate": 1.8587205493517703e-05, "loss": 0.7402794361114502, "step": 3186 }, { "epoch": 0.5888833712510538, "grad_norm": 0.08084230870008469, "learning_rate": 1.8586183039942654e-05, "loss": 0.8662818670272827, "step": 3187 }, { "epoch": 0.5890681479599497, "grad_norm": 0.06553030014038086, "learning_rate": 1.8585160244664386e-05, "loss": 0.5434334874153137, "step": 3188 }, { "epoch": 0.5892529246688455, "grad_norm": 0.07362144440412521, "learning_rate": 1.85841371077236e-05, "loss": 0.73642897605896, "step": 3189 }, { "epoch": 0.5894377013777413, "grad_norm": 0.07301516830921173, "learning_rate": 1.858311362916101e-05, "loss": 0.618234395980835, "step": 3190 }, { "epoch": 0.5896224780866371, "grad_norm": 0.07383144646883011, "learning_rate": 1.8582089809017352e-05, "loss": 0.6209704875946045, "step": 3191 }, { "epoch": 0.589807254795533, "grad_norm": 0.07683329284191132, "learning_rate": 1.8581065647333368e-05, "loss": 0.6593388319015503, "step": 3192 }, { "epoch": 0.5899920315044289, "grad_norm": 0.07412169873714447, "learning_rate": 1.8580041144149822e-05, "loss": 0.5662711262702942, "step": 3193 }, { "epoch": 0.5901768082133247, "grad_norm": 0.07404091209173203, "learning_rate": 1.8579016299507482e-05, "loss": 0.6805081367492676, "step": 3194 }, { "epoch": 0.5903615849222206, "grad_norm": 0.08029306679964066, "learning_rate": 1.857799111344713e-05, "loss": 0.6811212301254272, "step": 3195 }, { "epoch": 0.5905463616311164, "grad_norm": 0.07096394896507263, "learning_rate": 1.857696558600957e-05, "loss": 0.684027373790741, "step": 3196 }, { "epoch": 0.5907311383400122, "grad_norm": 0.07632946223020554, "learning_rate": 1.8575939717235614e-05, "loss": 0.7099227905273438, "step": 3197 }, { "epoch": 0.5909159150489081, "grad_norm": 0.06812963634729385, "learning_rate": 1.857491350716609e-05, "loss": 0.7205207943916321, "step": 3198 }, { "epoch": 0.5911006917578039, "grad_norm": 0.07323703914880753, "learning_rate": 1.857388695584183e-05, "loss": 0.6930529475212097, "step": 3199 }, { "epoch": 0.5912854684666998, "grad_norm": 0.0693032518029213, "learning_rate": 1.85728600633037e-05, "loss": 0.508209228515625, "step": 3200 }, { "epoch": 0.5914702451755957, "grad_norm": 0.07588290423154831, "learning_rate": 1.8571832829592557e-05, "loss": 0.6380006074905396, "step": 3201 }, { "epoch": 0.5916550218844915, "grad_norm": 0.06168055534362793, "learning_rate": 1.8570805254749288e-05, "loss": 0.5089638829231262, "step": 3202 }, { "epoch": 0.5918397985933873, "grad_norm": 0.07789887487888336, "learning_rate": 1.856977733881478e-05, "loss": 0.6378219127655029, "step": 3203 }, { "epoch": 0.5920245753022831, "grad_norm": 0.054984163492918015, "learning_rate": 1.856874908182995e-05, "loss": 0.3924255073070526, "step": 3204 }, { "epoch": 0.592209352011179, "grad_norm": 0.08939534425735474, "learning_rate": 1.856772048383571e-05, "loss": 0.583304762840271, "step": 3205 }, { "epoch": 0.5923941287200748, "grad_norm": 0.09006770700216293, "learning_rate": 1.8566691544873003e-05, "loss": 0.8556082844734192, "step": 3206 }, { "epoch": 0.5925789054289706, "grad_norm": 0.07439000904560089, "learning_rate": 1.8565662264982772e-05, "loss": 0.5220317244529724, "step": 3207 }, { "epoch": 0.5927636821378666, "grad_norm": 0.08255641162395477, "learning_rate": 1.8564632644205984e-05, "loss": 0.72211092710495, "step": 3208 }, { "epoch": 0.5929484588467624, "grad_norm": 0.07791288942098618, "learning_rate": 1.856360268258361e-05, "loss": 0.6217178106307983, "step": 3209 }, { "epoch": 0.5931332355556582, "grad_norm": 0.0743233859539032, "learning_rate": 1.856257238015664e-05, "loss": 0.6467982530593872, "step": 3210 }, { "epoch": 0.593318012264554, "grad_norm": 0.08191141486167908, "learning_rate": 1.8561541736966085e-05, "loss": 0.8423718214035034, "step": 3211 }, { "epoch": 0.5935027889734499, "grad_norm": 0.06875808537006378, "learning_rate": 1.8560510753052948e-05, "loss": 0.6221856474876404, "step": 3212 }, { "epoch": 0.5936875656823457, "grad_norm": 0.07226449251174927, "learning_rate": 1.8559479428458267e-05, "loss": 0.5470890402793884, "step": 3213 }, { "epoch": 0.5938723423912415, "grad_norm": 0.06766360253095627, "learning_rate": 1.8558447763223083e-05, "loss": 0.6161670684814453, "step": 3214 }, { "epoch": 0.5940571191001375, "grad_norm": 0.08009753376245499, "learning_rate": 1.8557415757388456e-05, "loss": 0.6970452070236206, "step": 3215 }, { "epoch": 0.5942418958090333, "grad_norm": 0.06258440017700195, "learning_rate": 1.8556383410995454e-05, "loss": 0.45787256956100464, "step": 3216 }, { "epoch": 0.5944266725179291, "grad_norm": 0.06200527027249336, "learning_rate": 1.855535072408516e-05, "loss": 0.5589002370834351, "step": 3217 }, { "epoch": 0.594611449226825, "grad_norm": 0.06871971487998962, "learning_rate": 1.8554317696698676e-05, "loss": 0.4686943590641022, "step": 3218 }, { "epoch": 0.5947962259357208, "grad_norm": 0.07597262412309647, "learning_rate": 1.855328432887711e-05, "loss": 0.6524820923805237, "step": 3219 }, { "epoch": 0.5949810026446166, "grad_norm": 0.07778976857662201, "learning_rate": 1.8552250620661585e-05, "loss": 0.5683805346488953, "step": 3220 }, { "epoch": 0.5951657793535124, "grad_norm": 0.07574175298213959, "learning_rate": 1.8551216572093246e-05, "loss": 0.6568016409873962, "step": 3221 }, { "epoch": 0.5953505560624084, "grad_norm": 0.09157819300889969, "learning_rate": 1.8550182183213238e-05, "loss": 0.6208380460739136, "step": 3222 }, { "epoch": 0.5955353327713042, "grad_norm": 0.07280399650335312, "learning_rate": 1.8549147454062728e-05, "loss": 0.6524849534034729, "step": 3223 }, { "epoch": 0.5957201094802, "grad_norm": 0.0947246253490448, "learning_rate": 1.85481123846829e-05, "loss": 0.7721500396728516, "step": 3224 }, { "epoch": 0.5959048861890959, "grad_norm": 0.0865185558795929, "learning_rate": 1.854707697511494e-05, "loss": 0.7139161825180054, "step": 3225 }, { "epoch": 0.5960896628979917, "grad_norm": 0.06716441363096237, "learning_rate": 1.854604122540006e-05, "loss": 0.5239250659942627, "step": 3226 }, { "epoch": 0.5962744396068875, "grad_norm": 0.059036269783973694, "learning_rate": 1.854500513557947e-05, "loss": 0.47485873103141785, "step": 3227 }, { "epoch": 0.5964592163157834, "grad_norm": 0.07232226431369781, "learning_rate": 1.8543968705694414e-05, "loss": 0.6586685180664062, "step": 3228 }, { "epoch": 0.5966439930246792, "grad_norm": 0.07339149713516235, "learning_rate": 1.8542931935786133e-05, "loss": 0.7041042447090149, "step": 3229 }, { "epoch": 0.5968287697335751, "grad_norm": 0.08636587113142014, "learning_rate": 1.854189482589589e-05, "loss": 0.6246604323387146, "step": 3230 }, { "epoch": 0.597013546442471, "grad_norm": 0.08113526552915573, "learning_rate": 1.8540857376064956e-05, "loss": 0.7897917628288269, "step": 3231 }, { "epoch": 0.5971983231513668, "grad_norm": 0.06901397556066513, "learning_rate": 1.8539819586334617e-05, "loss": 0.5459996461868286, "step": 3232 }, { "epoch": 0.5973830998602626, "grad_norm": 0.07561596482992172, "learning_rate": 1.8538781456746183e-05, "loss": 0.5742215514183044, "step": 3233 }, { "epoch": 0.5975678765691584, "grad_norm": 0.07877890765666962, "learning_rate": 1.8537742987340955e-05, "loss": 0.6900753974914551, "step": 3234 }, { "epoch": 0.5977526532780543, "grad_norm": 0.09065587818622589, "learning_rate": 1.853670417816027e-05, "loss": 0.7630480527877808, "step": 3235 }, { "epoch": 0.5979374299869501, "grad_norm": 0.08781592547893524, "learning_rate": 1.8535665029245463e-05, "loss": 0.7213505506515503, "step": 3236 }, { "epoch": 0.598122206695846, "grad_norm": 0.07441502809524536, "learning_rate": 1.8534625540637897e-05, "loss": 0.537497878074646, "step": 3237 }, { "epoch": 0.5983069834047419, "grad_norm": 0.08751673996448517, "learning_rate": 1.853358571237893e-05, "loss": 0.8879984021186829, "step": 3238 }, { "epoch": 0.5984917601136377, "grad_norm": 0.08641214668750763, "learning_rate": 1.8532545544509955e-05, "loss": 0.772212028503418, "step": 3239 }, { "epoch": 0.5986765368225335, "grad_norm": 0.06951478868722916, "learning_rate": 1.8531505037072363e-05, "loss": 0.6568793654441833, "step": 3240 }, { "epoch": 0.5988613135314294, "grad_norm": 0.08141907304525375, "learning_rate": 1.853046419010756e-05, "loss": 0.6164892911911011, "step": 3241 }, { "epoch": 0.5990460902403252, "grad_norm": 0.05399297922849655, "learning_rate": 1.852942300365697e-05, "loss": 0.43579041957855225, "step": 3242 }, { "epoch": 0.599230866949221, "grad_norm": 0.0674586072564125, "learning_rate": 1.852838147776203e-05, "loss": 0.6669760942459106, "step": 3243 }, { "epoch": 0.599415643658117, "grad_norm": 0.05470862612128258, "learning_rate": 1.8527339612464192e-05, "loss": 0.37778860330581665, "step": 3244 }, { "epoch": 0.5996004203670128, "grad_norm": 0.07815508544445038, "learning_rate": 1.8526297407804915e-05, "loss": 0.6272417306900024, "step": 3245 }, { "epoch": 0.5997851970759086, "grad_norm": 0.0828259065747261, "learning_rate": 1.852525486382567e-05, "loss": 0.8422073721885681, "step": 3246 }, { "epoch": 0.5999699737848044, "grad_norm": 0.07688503712415695, "learning_rate": 1.852421198056796e-05, "loss": 0.6805994510650635, "step": 3247 }, { "epoch": 0.6001547504937003, "grad_norm": 0.0888686254620552, "learning_rate": 1.8523168758073283e-05, "loss": 0.7215426564216614, "step": 3248 }, { "epoch": 0.6003395272025961, "grad_norm": 0.06879184395074844, "learning_rate": 1.8522125196383154e-05, "loss": 0.6762117147445679, "step": 3249 }, { "epoch": 0.6005243039114919, "grad_norm": 0.07216864079236984, "learning_rate": 1.8521081295539102e-05, "loss": 0.6251771450042725, "step": 3250 }, { "epoch": 0.6007090806203877, "grad_norm": 0.07336558401584625, "learning_rate": 1.8520037055582675e-05, "loss": 0.6005572080612183, "step": 3251 }, { "epoch": 0.6008938573292837, "grad_norm": 0.08599632233381271, "learning_rate": 1.851899247655543e-05, "loss": 0.7398759126663208, "step": 3252 }, { "epoch": 0.6010786340381795, "grad_norm": 0.057038020342588425, "learning_rate": 1.8517947558498936e-05, "loss": 0.44321659207344055, "step": 3253 }, { "epoch": 0.6012634107470753, "grad_norm": 0.06850314885377884, "learning_rate": 1.8516902301454775e-05, "loss": 0.5966536402702332, "step": 3254 }, { "epoch": 0.6014481874559712, "grad_norm": 0.0940929725766182, "learning_rate": 1.8515856705464553e-05, "loss": 0.8563060164451599, "step": 3255 }, { "epoch": 0.601632964164867, "grad_norm": 0.07415281236171722, "learning_rate": 1.8514810770569872e-05, "loss": 0.7482355237007141, "step": 3256 }, { "epoch": 0.6018177408737628, "grad_norm": 0.09791791439056396, "learning_rate": 1.8513764496812366e-05, "loss": 0.8284050822257996, "step": 3257 }, { "epoch": 0.6020025175826587, "grad_norm": 0.05737827345728874, "learning_rate": 1.851271788423367e-05, "loss": 0.43601882457733154, "step": 3258 }, { "epoch": 0.6021872942915546, "grad_norm": 0.07074494659900665, "learning_rate": 1.8511670932875432e-05, "loss": 0.6547046899795532, "step": 3259 }, { "epoch": 0.6023720710004504, "grad_norm": 0.08046936243772507, "learning_rate": 1.8510623642779322e-05, "loss": 0.5685117840766907, "step": 3260 }, { "epoch": 0.6025568477093463, "grad_norm": 0.0642126202583313, "learning_rate": 1.8509576013987015e-05, "loss": 0.5987184643745422, "step": 3261 }, { "epoch": 0.6027416244182421, "grad_norm": 0.06426668912172318, "learning_rate": 1.850852804654021e-05, "loss": 0.5323985815048218, "step": 3262 }, { "epoch": 0.6029264011271379, "grad_norm": 0.07552376389503479, "learning_rate": 1.8507479740480608e-05, "loss": 0.6078667044639587, "step": 3263 }, { "epoch": 0.6031111778360337, "grad_norm": 0.07132647186517715, "learning_rate": 1.8506431095849927e-05, "loss": 0.5994269251823425, "step": 3264 }, { "epoch": 0.6032959545449296, "grad_norm": 0.07693766802549362, "learning_rate": 1.85053821126899e-05, "loss": 0.6154434680938721, "step": 3265 }, { "epoch": 0.6034807312538255, "grad_norm": 0.0722254142165184, "learning_rate": 1.8504332791042276e-05, "loss": 0.586520791053772, "step": 3266 }, { "epoch": 0.6036655079627213, "grad_norm": 0.0783991739153862, "learning_rate": 1.8503283130948813e-05, "loss": 0.5686954259872437, "step": 3267 }, { "epoch": 0.6038502846716172, "grad_norm": 0.08890886604785919, "learning_rate": 1.8502233132451285e-05, "loss": 0.6486931443214417, "step": 3268 }, { "epoch": 0.604035061380513, "grad_norm": 0.05799055099487305, "learning_rate": 1.850118279559148e-05, "loss": 0.5202708840370178, "step": 3269 }, { "epoch": 0.6042198380894088, "grad_norm": 0.07490106672048569, "learning_rate": 1.8500132120411195e-05, "loss": 0.7785816788673401, "step": 3270 }, { "epoch": 0.6044046147983047, "grad_norm": 0.07757486402988434, "learning_rate": 1.8499081106952247e-05, "loss": 0.8517777323722839, "step": 3271 }, { "epoch": 0.6045893915072005, "grad_norm": 0.06630745530128479, "learning_rate": 1.849802975525646e-05, "loss": 0.5189116597175598, "step": 3272 }, { "epoch": 0.6047741682160963, "grad_norm": 0.0747363269329071, "learning_rate": 1.8496978065365677e-05, "loss": 0.7185724973678589, "step": 3273 }, { "epoch": 0.6049589449249922, "grad_norm": 0.05487100034952164, "learning_rate": 1.8495926037321747e-05, "loss": 0.45234984159469604, "step": 3274 }, { "epoch": 0.6051437216338881, "grad_norm": 0.09173326194286346, "learning_rate": 1.8494873671166543e-05, "loss": 0.8588842749595642, "step": 3275 }, { "epoch": 0.6053284983427839, "grad_norm": 0.0814746618270874, "learning_rate": 1.8493820966941944e-05, "loss": 0.619792103767395, "step": 3276 }, { "epoch": 0.6055132750516797, "grad_norm": 0.06092033535242081, "learning_rate": 1.8492767924689846e-05, "loss": 0.4326985478401184, "step": 3277 }, { "epoch": 0.6056980517605756, "grad_norm": 0.08345521986484528, "learning_rate": 1.8491714544452154e-05, "loss": 0.7388136386871338, "step": 3278 }, { "epoch": 0.6058828284694714, "grad_norm": 0.06334060430526733, "learning_rate": 1.849066082627079e-05, "loss": 0.5493499040603638, "step": 3279 }, { "epoch": 0.6060676051783672, "grad_norm": 0.08863652497529984, "learning_rate": 1.8489606770187685e-05, "loss": 0.847687840461731, "step": 3280 }, { "epoch": 0.6062523818872632, "grad_norm": 0.05811255797743797, "learning_rate": 1.8488552376244798e-05, "loss": 0.48114174604415894, "step": 3281 }, { "epoch": 0.606437158596159, "grad_norm": 0.07179747521877289, "learning_rate": 1.848749764448408e-05, "loss": 0.6397152543067932, "step": 3282 }, { "epoch": 0.6066219353050548, "grad_norm": 0.08426441997289658, "learning_rate": 1.848644257494751e-05, "loss": 0.5927404165267944, "step": 3283 }, { "epoch": 0.6068067120139506, "grad_norm": 0.06691189855337143, "learning_rate": 1.848538716767708e-05, "loss": 0.47045934200286865, "step": 3284 }, { "epoch": 0.6069914887228465, "grad_norm": 0.056987274438142776, "learning_rate": 1.8484331422714784e-05, "loss": 0.4347154200077057, "step": 3285 }, { "epoch": 0.6071762654317423, "grad_norm": 0.07422585040330887, "learning_rate": 1.848327534010264e-05, "loss": 0.6322421431541443, "step": 3286 }, { "epoch": 0.6073610421406381, "grad_norm": 0.06570249050855637, "learning_rate": 1.848221891988268e-05, "loss": 0.5806671977043152, "step": 3287 }, { "epoch": 0.6075458188495341, "grad_norm": 0.08185900747776031, "learning_rate": 1.8481162162096944e-05, "loss": 0.602039098739624, "step": 3288 }, { "epoch": 0.6077305955584299, "grad_norm": 0.07493102550506592, "learning_rate": 1.848010506678749e-05, "loss": 0.5471720695495605, "step": 3289 }, { "epoch": 0.6079153722673257, "grad_norm": 0.06624553352594376, "learning_rate": 1.8479047633996384e-05, "loss": 0.5783137083053589, "step": 3290 }, { "epoch": 0.6081001489762216, "grad_norm": 0.08775525540113449, "learning_rate": 1.8477989863765712e-05, "loss": 0.6186946630477905, "step": 3291 }, { "epoch": 0.6082849256851174, "grad_norm": 0.08846092969179153, "learning_rate": 1.8476931756137565e-05, "loss": 0.7376492619514465, "step": 3292 }, { "epoch": 0.6084697023940132, "grad_norm": 0.08089148998260498, "learning_rate": 1.8475873311154053e-05, "loss": 0.5945156812667847, "step": 3293 }, { "epoch": 0.608654479102909, "grad_norm": 0.08092156797647476, "learning_rate": 1.8474814528857306e-05, "loss": 0.7160748839378357, "step": 3294 }, { "epoch": 0.6088392558118049, "grad_norm": 0.07545629888772964, "learning_rate": 1.847375540928945e-05, "loss": 0.7555568814277649, "step": 3295 }, { "epoch": 0.6090240325207008, "grad_norm": 0.07335377484560013, "learning_rate": 1.8472695952492642e-05, "loss": 0.7228835225105286, "step": 3296 }, { "epoch": 0.6092088092295966, "grad_norm": 0.0645759105682373, "learning_rate": 1.8471636158509043e-05, "loss": 0.5339785814285278, "step": 3297 }, { "epoch": 0.6093935859384925, "grad_norm": 0.07800009101629257, "learning_rate": 1.8470576027380828e-05, "loss": 0.7866642475128174, "step": 3298 }, { "epoch": 0.6095783626473883, "grad_norm": 0.06919465214014053, "learning_rate": 1.846951555915019e-05, "loss": 0.6962285041809082, "step": 3299 }, { "epoch": 0.6097631393562841, "grad_norm": 0.08536528050899506, "learning_rate": 1.8468454753859332e-05, "loss": 0.7147235870361328, "step": 3300 }, { "epoch": 0.60994791606518, "grad_norm": 0.08068986982107162, "learning_rate": 1.8467393611550462e-05, "loss": 0.6971014738082886, "step": 3301 }, { "epoch": 0.6101326927740758, "grad_norm": 0.06266585737466812, "learning_rate": 1.8466332132265825e-05, "loss": 0.5209909677505493, "step": 3302 }, { "epoch": 0.6103174694829717, "grad_norm": 0.07673177868127823, "learning_rate": 1.8465270316047653e-05, "loss": 0.6880748271942139, "step": 3303 }, { "epoch": 0.6105022461918675, "grad_norm": 0.070456363260746, "learning_rate": 1.846420816293821e-05, "loss": 0.6609374284744263, "step": 3304 }, { "epoch": 0.6106870229007634, "grad_norm": 0.07725219428539276, "learning_rate": 1.8463145672979758e-05, "loss": 0.7507308721542358, "step": 3305 }, { "epoch": 0.6108717996096592, "grad_norm": 0.06939958781003952, "learning_rate": 1.846208284621459e-05, "loss": 0.6096147894859314, "step": 3306 }, { "epoch": 0.611056576318555, "grad_norm": 0.0722557008266449, "learning_rate": 1.8461019682684998e-05, "loss": 0.8005504012107849, "step": 3307 }, { "epoch": 0.6112413530274509, "grad_norm": 0.06575772166252136, "learning_rate": 1.8459956182433295e-05, "loss": 0.47339561581611633, "step": 3308 }, { "epoch": 0.6114261297363467, "grad_norm": 0.08038409054279327, "learning_rate": 1.8458892345501804e-05, "loss": 0.650078535079956, "step": 3309 }, { "epoch": 0.6116109064452426, "grad_norm": 0.0885750949382782, "learning_rate": 1.845782817193286e-05, "loss": 0.8320760726928711, "step": 3310 }, { "epoch": 0.6117956831541385, "grad_norm": 0.07795794308185577, "learning_rate": 1.8456763661768815e-05, "loss": 0.6257549524307251, "step": 3311 }, { "epoch": 0.6119804598630343, "grad_norm": 0.07041628658771515, "learning_rate": 1.8455698815052037e-05, "loss": 0.580536425113678, "step": 3312 }, { "epoch": 0.6121652365719301, "grad_norm": 0.06542914360761642, "learning_rate": 1.84546336318249e-05, "loss": 0.44415077567100525, "step": 3313 }, { "epoch": 0.6123500132808259, "grad_norm": 0.06586247682571411, "learning_rate": 1.8453568112129793e-05, "loss": 0.5182660222053528, "step": 3314 }, { "epoch": 0.6125347899897218, "grad_norm": 0.06454501301050186, "learning_rate": 1.8452502256009127e-05, "loss": 0.5770164132118225, "step": 3315 }, { "epoch": 0.6127195666986176, "grad_norm": 0.0784081220626831, "learning_rate": 1.8451436063505312e-05, "loss": 0.7196560502052307, "step": 3316 }, { "epoch": 0.6129043434075134, "grad_norm": 0.06240353360772133, "learning_rate": 1.8450369534660787e-05, "loss": 0.4739285111427307, "step": 3317 }, { "epoch": 0.6130891201164094, "grad_norm": 0.06942515820264816, "learning_rate": 1.8449302669517988e-05, "loss": 0.6828460097312927, "step": 3318 }, { "epoch": 0.6132738968253052, "grad_norm": 0.07242225855588913, "learning_rate": 1.844823546811938e-05, "loss": 0.5765320658683777, "step": 3319 }, { "epoch": 0.613458673534201, "grad_norm": 0.07123544812202454, "learning_rate": 1.844716793050743e-05, "loss": 0.6075055599212646, "step": 3320 }, { "epoch": 0.6136434502430969, "grad_norm": 0.0873766541481018, "learning_rate": 1.8446100056724624e-05, "loss": 0.7996965050697327, "step": 3321 }, { "epoch": 0.6138282269519927, "grad_norm": 0.06624772399663925, "learning_rate": 1.8445031846813463e-05, "loss": 0.6351407766342163, "step": 3322 }, { "epoch": 0.6140130036608885, "grad_norm": 0.0864158496260643, "learning_rate": 1.8443963300816454e-05, "loss": 0.9147331714630127, "step": 3323 }, { "epoch": 0.6141977803697843, "grad_norm": 0.07096041738986969, "learning_rate": 1.844289441877612e-05, "loss": 0.5557135939598083, "step": 3324 }, { "epoch": 0.6143825570786803, "grad_norm": 0.068775475025177, "learning_rate": 1.844182520073501e-05, "loss": 0.4887027144432068, "step": 3325 }, { "epoch": 0.6145673337875761, "grad_norm": 0.05247717723250389, "learning_rate": 1.844075564673566e-05, "loss": 0.31910794973373413, "step": 3326 }, { "epoch": 0.6147521104964719, "grad_norm": 0.07341188192367554, "learning_rate": 1.8439685756820646e-05, "loss": 0.7932683229446411, "step": 3327 }, { "epoch": 0.6149368872053678, "grad_norm": 0.06132347509264946, "learning_rate": 1.8438615531032545e-05, "loss": 0.5565425157546997, "step": 3328 }, { "epoch": 0.6151216639142636, "grad_norm": 0.06424925476312637, "learning_rate": 1.8437544969413946e-05, "loss": 0.46804794669151306, "step": 3329 }, { "epoch": 0.6153064406231594, "grad_norm": 0.07150422781705856, "learning_rate": 1.8436474072007454e-05, "loss": 0.6623454093933105, "step": 3330 }, { "epoch": 0.6154912173320553, "grad_norm": 0.0803765058517456, "learning_rate": 1.843540283885569e-05, "loss": 0.7229027152061462, "step": 3331 }, { "epoch": 0.6156759940409512, "grad_norm": 0.08057615906000137, "learning_rate": 1.843433127000128e-05, "loss": 0.6208106279373169, "step": 3332 }, { "epoch": 0.615860770749847, "grad_norm": 0.08613825589418411, "learning_rate": 1.8433259365486876e-05, "loss": 0.7484444379806519, "step": 3333 }, { "epoch": 0.6160455474587428, "grad_norm": 0.07063552737236023, "learning_rate": 1.8432187125355137e-05, "loss": 0.5618511438369751, "step": 3334 }, { "epoch": 0.6162303241676387, "grad_norm": 0.07601696252822876, "learning_rate": 1.8431114549648728e-05, "loss": 0.5722567439079285, "step": 3335 }, { "epoch": 0.6164151008765345, "grad_norm": 0.08163177222013474, "learning_rate": 1.8430041638410335e-05, "loss": 0.7786068320274353, "step": 3336 }, { "epoch": 0.6165998775854303, "grad_norm": 0.08218467980623245, "learning_rate": 1.8428968391682663e-05, "loss": 0.7029726505279541, "step": 3337 }, { "epoch": 0.6167846542943262, "grad_norm": 0.06208677962422371, "learning_rate": 1.842789480950842e-05, "loss": 0.652988851070404, "step": 3338 }, { "epoch": 0.616969431003222, "grad_norm": 0.06685537099838257, "learning_rate": 1.8426820891930328e-05, "loss": 0.6263839602470398, "step": 3339 }, { "epoch": 0.6171542077121179, "grad_norm": 0.07050909101963043, "learning_rate": 1.842574663899113e-05, "loss": 0.6059039235115051, "step": 3340 }, { "epoch": 0.6173389844210138, "grad_norm": 0.04347433149814606, "learning_rate": 1.8424672050733577e-05, "loss": 0.4000491201877594, "step": 3341 }, { "epoch": 0.6175237611299096, "grad_norm": 0.07061401009559631, "learning_rate": 1.842359712720043e-05, "loss": 0.5760173201560974, "step": 3342 }, { "epoch": 0.6177085378388054, "grad_norm": 0.09141604602336884, "learning_rate": 1.8422521868434477e-05, "loss": 0.8013163805007935, "step": 3343 }, { "epoch": 0.6178933145477012, "grad_norm": 0.07007431983947754, "learning_rate": 1.8421446274478504e-05, "loss": 0.5493162870407104, "step": 3344 }, { "epoch": 0.6180780912565971, "grad_norm": 0.057326290756464005, "learning_rate": 1.8420370345375315e-05, "loss": 0.41068577766418457, "step": 3345 }, { "epoch": 0.6182628679654929, "grad_norm": 0.06700431555509567, "learning_rate": 1.841929408116773e-05, "loss": 0.5786662101745605, "step": 3346 }, { "epoch": 0.6184476446743888, "grad_norm": 0.07303698360919952, "learning_rate": 1.8418217481898578e-05, "loss": 0.632289707660675, "step": 3347 }, { "epoch": 0.6186324213832847, "grad_norm": 0.06894835084676743, "learning_rate": 1.841714054761071e-05, "loss": 0.5914403796195984, "step": 3348 }, { "epoch": 0.6188171980921805, "grad_norm": 0.061302755028009415, "learning_rate": 1.8416063278346983e-05, "loss": 0.5831184387207031, "step": 3349 }, { "epoch": 0.6190019748010763, "grad_norm": 0.0628557801246643, "learning_rate": 1.8414985674150268e-05, "loss": 0.53914874792099, "step": 3350 }, { "epoch": 0.6191867515099722, "grad_norm": 0.07403040677309036, "learning_rate": 1.841390773506345e-05, "loss": 0.5408547520637512, "step": 3351 }, { "epoch": 0.619371528218868, "grad_norm": 0.09489922225475311, "learning_rate": 1.841282946112943e-05, "loss": 0.6683562994003296, "step": 3352 }, { "epoch": 0.6195563049277638, "grad_norm": 0.06954994052648544, "learning_rate": 1.8411750852391114e-05, "loss": 0.6364865303039551, "step": 3353 }, { "epoch": 0.6197410816366598, "grad_norm": 0.07777140289545059, "learning_rate": 1.8410671908891432e-05, "loss": 0.7604506015777588, "step": 3354 }, { "epoch": 0.6199258583455556, "grad_norm": 0.07304697483778, "learning_rate": 1.840959263067332e-05, "loss": 0.4944457709789276, "step": 3355 }, { "epoch": 0.6201106350544514, "grad_norm": 0.06757856905460358, "learning_rate": 1.8408513017779737e-05, "loss": 0.6350497007369995, "step": 3356 }, { "epoch": 0.6202954117633472, "grad_norm": 0.08216935396194458, "learning_rate": 1.8407433070253637e-05, "loss": 0.6694963574409485, "step": 3357 }, { "epoch": 0.6204801884722431, "grad_norm": 0.07288269698619843, "learning_rate": 1.840635278813801e-05, "loss": 0.7217207551002502, "step": 3358 }, { "epoch": 0.6206649651811389, "grad_norm": 0.0807751789689064, "learning_rate": 1.8405272171475838e-05, "loss": 0.6694827675819397, "step": 3359 }, { "epoch": 0.6208497418900347, "grad_norm": 0.06977913528680801, "learning_rate": 1.840419122031013e-05, "loss": 0.6728543639183044, "step": 3360 }, { "epoch": 0.6210345185989306, "grad_norm": 0.06334500014781952, "learning_rate": 1.8403109934683908e-05, "loss": 0.5174497365951538, "step": 3361 }, { "epoch": 0.6212192953078265, "grad_norm": 0.08478124439716339, "learning_rate": 1.8402028314640198e-05, "loss": 0.7299423813819885, "step": 3362 }, { "epoch": 0.6214040720167223, "grad_norm": 0.0655829980969429, "learning_rate": 1.8400946360222046e-05, "loss": 0.5044342875480652, "step": 3363 }, { "epoch": 0.6215888487256181, "grad_norm": 0.061931535601615906, "learning_rate": 1.8399864071472516e-05, "loss": 0.548366367816925, "step": 3364 }, { "epoch": 0.621773625434514, "grad_norm": 0.0731566846370697, "learning_rate": 1.8398781448434674e-05, "loss": 0.6606848239898682, "step": 3365 }, { "epoch": 0.6219584021434098, "grad_norm": 0.0684320405125618, "learning_rate": 1.8397698491151607e-05, "loss": 0.5689799189567566, "step": 3366 }, { "epoch": 0.6221431788523056, "grad_norm": 0.07679462432861328, "learning_rate": 1.839661519966641e-05, "loss": 0.6740244030952454, "step": 3367 }, { "epoch": 0.6223279555612015, "grad_norm": 0.0783647820353508, "learning_rate": 1.8395531574022202e-05, "loss": 0.796058177947998, "step": 3368 }, { "epoch": 0.6225127322700974, "grad_norm": 0.06929682195186615, "learning_rate": 1.8394447614262103e-05, "loss": 0.6750960350036621, "step": 3369 }, { "epoch": 0.6226975089789932, "grad_norm": 0.07672516256570816, "learning_rate": 1.839336332042925e-05, "loss": 0.7037633657455444, "step": 3370 }, { "epoch": 0.6228822856878891, "grad_norm": 0.07461292296648026, "learning_rate": 1.83922786925668e-05, "loss": 0.5920368432998657, "step": 3371 }, { "epoch": 0.6230670623967849, "grad_norm": 0.061919037252664566, "learning_rate": 1.839119373071791e-05, "loss": 0.4925020933151245, "step": 3372 }, { "epoch": 0.6232518391056807, "grad_norm": 0.08758971095085144, "learning_rate": 1.8390108434925764e-05, "loss": 0.7388421893119812, "step": 3373 }, { "epoch": 0.6234366158145765, "grad_norm": 0.07638192921876907, "learning_rate": 1.8389022805233548e-05, "loss": 0.7971750497817993, "step": 3374 }, { "epoch": 0.6236213925234724, "grad_norm": 0.1041233167052269, "learning_rate": 1.838793684168448e-05, "loss": 0.8845223784446716, "step": 3375 }, { "epoch": 0.6238061692323683, "grad_norm": 0.07788344472646713, "learning_rate": 1.8386850544321758e-05, "loss": 0.7362569570541382, "step": 3376 }, { "epoch": 0.6239909459412641, "grad_norm": 0.07352737337350845, "learning_rate": 1.8385763913188624e-05, "loss": 0.6514081358909607, "step": 3377 }, { "epoch": 0.62417572265016, "grad_norm": 0.07312724739313126, "learning_rate": 1.8384676948328328e-05, "loss": 0.5283224582672119, "step": 3378 }, { "epoch": 0.6243604993590558, "grad_norm": 0.06424093246459961, "learning_rate": 1.838358964978412e-05, "loss": 0.5747008919715881, "step": 3379 }, { "epoch": 0.6245452760679516, "grad_norm": 0.07459218800067902, "learning_rate": 1.8382502017599272e-05, "loss": 0.6848477721214294, "step": 3380 }, { "epoch": 0.6247300527768475, "grad_norm": 0.07399237900972366, "learning_rate": 1.8381414051817066e-05, "loss": 0.541366457939148, "step": 3381 }, { "epoch": 0.6249148294857433, "grad_norm": 0.07620833069086075, "learning_rate": 1.8380325752480807e-05, "loss": 0.638131320476532, "step": 3382 }, { "epoch": 0.6250996061946391, "grad_norm": 0.06079495698213577, "learning_rate": 1.8379237119633798e-05, "loss": 0.6371907591819763, "step": 3383 }, { "epoch": 0.625284382903535, "grad_norm": 0.06467308104038239, "learning_rate": 1.837814815331937e-05, "loss": 0.5650978088378906, "step": 3384 }, { "epoch": 0.6254691596124309, "grad_norm": 0.06969483196735382, "learning_rate": 1.8377058853580857e-05, "loss": 0.6680585741996765, "step": 3385 }, { "epoch": 0.6256539363213267, "grad_norm": 0.08084537833929062, "learning_rate": 1.837596922046161e-05, "loss": 0.7017515301704407, "step": 3386 }, { "epoch": 0.6258387130302225, "grad_norm": 0.07957891374826431, "learning_rate": 1.8374879254004993e-05, "loss": 0.7071402072906494, "step": 3387 }, { "epoch": 0.6260234897391184, "grad_norm": 0.07972588390111923, "learning_rate": 1.837378895425438e-05, "loss": 0.8170965909957886, "step": 3388 }, { "epoch": 0.6262082664480142, "grad_norm": 0.09192267060279846, "learning_rate": 1.837269832125317e-05, "loss": 0.7305018901824951, "step": 3389 }, { "epoch": 0.62639304315691, "grad_norm": 0.07038337737321854, "learning_rate": 1.8371607355044757e-05, "loss": 0.5975012183189392, "step": 3390 }, { "epoch": 0.626577819865806, "grad_norm": 0.07235181331634521, "learning_rate": 1.8370516055672565e-05, "loss": 0.6369036436080933, "step": 3391 }, { "epoch": 0.6267625965747018, "grad_norm": 0.07539573311805725, "learning_rate": 1.8369424423180025e-05, "loss": 0.6021792888641357, "step": 3392 }, { "epoch": 0.6269473732835976, "grad_norm": 0.07311061024665833, "learning_rate": 1.8368332457610572e-05, "loss": 0.6630080938339233, "step": 3393 }, { "epoch": 0.6271321499924934, "grad_norm": 0.06958476454019547, "learning_rate": 1.8367240159007673e-05, "loss": 0.6373473405838013, "step": 3394 }, { "epoch": 0.6273169267013893, "grad_norm": 0.0566987618803978, "learning_rate": 1.836614752741479e-05, "loss": 0.5827599763870239, "step": 3395 }, { "epoch": 0.6275017034102851, "grad_norm": 0.08414741605520248, "learning_rate": 1.8365054562875412e-05, "loss": 0.7651265263557434, "step": 3396 }, { "epoch": 0.6276864801191809, "grad_norm": 0.05634079501032829, "learning_rate": 1.8363961265433033e-05, "loss": 0.4115261733531952, "step": 3397 }, { "epoch": 0.6278712568280769, "grad_norm": 0.08532488346099854, "learning_rate": 1.8362867635131162e-05, "loss": 0.7812259793281555, "step": 3398 }, { "epoch": 0.6280560335369727, "grad_norm": 0.07976091653108597, "learning_rate": 1.836177367201332e-05, "loss": 0.7045861482620239, "step": 3399 }, { "epoch": 0.6282408102458685, "grad_norm": 0.05625972896814346, "learning_rate": 1.836067937612305e-05, "loss": 0.5176336765289307, "step": 3400 }, { "epoch": 0.6284255869547644, "grad_norm": 0.0938640907406807, "learning_rate": 1.8359584747503902e-05, "loss": 0.7844260334968567, "step": 3401 }, { "epoch": 0.6286103636636602, "grad_norm": 0.06254132837057114, "learning_rate": 1.835848978619943e-05, "loss": 0.6245369911193848, "step": 3402 }, { "epoch": 0.628795140372556, "grad_norm": 0.07394769787788391, "learning_rate": 1.8357394492253216e-05, "loss": 0.5009530782699585, "step": 3403 }, { "epoch": 0.6289799170814518, "grad_norm": 0.06485763937234879, "learning_rate": 1.835629886570885e-05, "loss": 0.5574130415916443, "step": 3404 }, { "epoch": 0.6291646937903477, "grad_norm": 0.07892514020204544, "learning_rate": 1.8355202906609927e-05, "loss": 0.6365463137626648, "step": 3405 }, { "epoch": 0.6293494704992436, "grad_norm": 0.10166321694850922, "learning_rate": 1.8354106615000073e-05, "loss": 0.9062671065330505, "step": 3406 }, { "epoch": 0.6295342472081394, "grad_norm": 0.07256297767162323, "learning_rate": 1.8353009990922913e-05, "loss": 0.7056639194488525, "step": 3407 }, { "epoch": 0.6297190239170353, "grad_norm": 0.06842894852161407, "learning_rate": 1.8351913034422083e-05, "loss": 0.45249462127685547, "step": 3408 }, { "epoch": 0.6299038006259311, "grad_norm": 0.08753480762243271, "learning_rate": 1.835081574554125e-05, "loss": 0.7865512371063232, "step": 3409 }, { "epoch": 0.6300885773348269, "grad_norm": 0.07850634306669235, "learning_rate": 1.8349718124324075e-05, "loss": 0.580086350440979, "step": 3410 }, { "epoch": 0.6302733540437228, "grad_norm": 0.10032707452774048, "learning_rate": 1.8348620170814244e-05, "loss": 0.7837745547294617, "step": 3411 }, { "epoch": 0.6304581307526186, "grad_norm": 0.08185004442930222, "learning_rate": 1.8347521885055447e-05, "loss": 0.7753044366836548, "step": 3412 }, { "epoch": 0.6306429074615145, "grad_norm": 0.06751414388418198, "learning_rate": 1.834642326709139e-05, "loss": 0.540576159954071, "step": 3413 }, { "epoch": 0.6308276841704104, "grad_norm": 0.08356916904449463, "learning_rate": 1.8345324316965808e-05, "loss": 0.6384175419807434, "step": 3414 }, { "epoch": 0.6310124608793062, "grad_norm": 0.07579503953456879, "learning_rate": 1.8344225034722423e-05, "loss": 0.5331666469573975, "step": 3415 }, { "epoch": 0.631197237588202, "grad_norm": 0.0715034231543541, "learning_rate": 1.834312542040499e-05, "loss": 0.6281111836433411, "step": 3416 }, { "epoch": 0.6313820142970978, "grad_norm": 0.07418831437826157, "learning_rate": 1.8342025474057263e-05, "loss": 0.739043653011322, "step": 3417 }, { "epoch": 0.6315667910059937, "grad_norm": 0.07007978111505508, "learning_rate": 1.8340925195723023e-05, "loss": 0.5546165704727173, "step": 3418 }, { "epoch": 0.6317515677148895, "grad_norm": 0.07733705639839172, "learning_rate": 1.833982458544606e-05, "loss": 0.6315229535102844, "step": 3419 }, { "epoch": 0.6319363444237854, "grad_norm": 0.09028880298137665, "learning_rate": 1.8338723643270163e-05, "loss": 0.8068675398826599, "step": 3420 }, { "epoch": 0.6321211211326813, "grad_norm": 0.0775882750749588, "learning_rate": 1.833762236923916e-05, "loss": 0.598249077796936, "step": 3421 }, { "epoch": 0.6323058978415771, "grad_norm": 0.07598394900560379, "learning_rate": 1.8336520763396868e-05, "loss": 0.7596868276596069, "step": 3422 }, { "epoch": 0.6324906745504729, "grad_norm": 0.07083048671483994, "learning_rate": 1.833541882578713e-05, "loss": 0.6752363443374634, "step": 3423 }, { "epoch": 0.6326754512593687, "grad_norm": 0.07214643061161041, "learning_rate": 1.8334316556453808e-05, "loss": 0.5167519450187683, "step": 3424 }, { "epoch": 0.6328602279682646, "grad_norm": 0.0798087939620018, "learning_rate": 1.8333213955440755e-05, "loss": 0.5881116390228271, "step": 3425 }, { "epoch": 0.6330450046771604, "grad_norm": 0.0739937275648117, "learning_rate": 1.833211102279186e-05, "loss": 0.6433027386665344, "step": 3426 }, { "epoch": 0.6332297813860562, "grad_norm": 0.06404921412467957, "learning_rate": 1.8331007758551015e-05, "loss": 0.4750000834465027, "step": 3427 }, { "epoch": 0.6334145580949522, "grad_norm": 0.08710794895887375, "learning_rate": 1.8329904162762124e-05, "loss": 0.7732343673706055, "step": 3428 }, { "epoch": 0.633599334803848, "grad_norm": 0.08195815980434418, "learning_rate": 1.8328800235469108e-05, "loss": 0.5903754830360413, "step": 3429 }, { "epoch": 0.6337841115127438, "grad_norm": 0.07433349639177322, "learning_rate": 1.83276959767159e-05, "loss": 0.42970842123031616, "step": 3430 }, { "epoch": 0.6339688882216397, "grad_norm": 0.060573991388082504, "learning_rate": 1.8326591386546447e-05, "loss": 0.4808928370475769, "step": 3431 }, { "epoch": 0.6341536649305355, "grad_norm": 0.08820538222789764, "learning_rate": 1.8325486465004707e-05, "loss": 0.7748810648918152, "step": 3432 }, { "epoch": 0.6343384416394313, "grad_norm": 0.07573293894529343, "learning_rate": 1.8324381212134653e-05, "loss": 0.7678648233413696, "step": 3433 }, { "epoch": 0.6345232183483271, "grad_norm": 0.10143709182739258, "learning_rate": 1.8323275627980272e-05, "loss": 0.751928448677063, "step": 3434 }, { "epoch": 0.6347079950572231, "grad_norm": 0.10220063477754593, "learning_rate": 1.832216971258556e-05, "loss": 0.9340986013412476, "step": 3435 }, { "epoch": 0.6348927717661189, "grad_norm": 0.07440445572137833, "learning_rate": 1.8321063465994527e-05, "loss": 0.7372763752937317, "step": 3436 }, { "epoch": 0.6350775484750147, "grad_norm": 0.07867652922868729, "learning_rate": 1.8319956888251207e-05, "loss": 0.7990411520004272, "step": 3437 }, { "epoch": 0.6352623251839106, "grad_norm": 0.07615192979574203, "learning_rate": 1.831884997939963e-05, "loss": 0.7793571949005127, "step": 3438 }, { "epoch": 0.6354471018928064, "grad_norm": 0.06848933547735214, "learning_rate": 1.831774273948385e-05, "loss": 0.5564258694648743, "step": 3439 }, { "epoch": 0.6356318786017022, "grad_norm": 0.06217201426625252, "learning_rate": 1.831663516854793e-05, "loss": 0.52495938539505, "step": 3440 }, { "epoch": 0.6358166553105981, "grad_norm": 0.08522749692201614, "learning_rate": 1.8315527266635955e-05, "loss": 0.60796719789505, "step": 3441 }, { "epoch": 0.636001432019494, "grad_norm": 0.09203702211380005, "learning_rate": 1.8314419033792007e-05, "loss": 0.5910808444023132, "step": 3442 }, { "epoch": 0.6361862087283898, "grad_norm": 0.07956217974424362, "learning_rate": 1.8313310470060192e-05, "loss": 0.7164194583892822, "step": 3443 }, { "epoch": 0.6363709854372857, "grad_norm": 0.08320367336273193, "learning_rate": 1.8312201575484633e-05, "loss": 0.8383685350418091, "step": 3444 }, { "epoch": 0.6365557621461815, "grad_norm": 0.053374577313661575, "learning_rate": 1.8311092350109457e-05, "loss": 0.3978824019432068, "step": 3445 }, { "epoch": 0.6367405388550773, "grad_norm": 0.05999819189310074, "learning_rate": 1.8309982793978808e-05, "loss": 0.5727517604827881, "step": 3446 }, { "epoch": 0.6369253155639731, "grad_norm": 0.059646982699632645, "learning_rate": 1.8308872907136837e-05, "loss": 0.45642557740211487, "step": 3447 }, { "epoch": 0.637110092272869, "grad_norm": 0.07357610017061234, "learning_rate": 1.8307762689627724e-05, "loss": 0.7836448550224304, "step": 3448 }, { "epoch": 0.6372948689817648, "grad_norm": 0.06309615075588226, "learning_rate": 1.8306652141495645e-05, "loss": 0.5483134984970093, "step": 3449 }, { "epoch": 0.6374796456906607, "grad_norm": 0.07365509867668152, "learning_rate": 1.83055412627848e-05, "loss": 0.5442796349525452, "step": 3450 }, { "epoch": 0.6376644223995566, "grad_norm": 0.07378187030553818, "learning_rate": 1.83044300535394e-05, "loss": 0.7219253182411194, "step": 3451 }, { "epoch": 0.6378491991084524, "grad_norm": 0.08257278054952621, "learning_rate": 1.8303318513803664e-05, "loss": 0.8525739908218384, "step": 3452 }, { "epoch": 0.6380339758173482, "grad_norm": 0.08498632162809372, "learning_rate": 1.8302206643621826e-05, "loss": 0.7980212569236755, "step": 3453 }, { "epoch": 0.638218752526244, "grad_norm": 0.07984080910682678, "learning_rate": 1.830109444303814e-05, "loss": 0.575787365436554, "step": 3454 }, { "epoch": 0.6384035292351399, "grad_norm": 0.07604240626096725, "learning_rate": 1.8299981912096867e-05, "loss": 0.6391733288764954, "step": 3455 }, { "epoch": 0.6385883059440357, "grad_norm": 0.07938699424266815, "learning_rate": 1.829886905084228e-05, "loss": 0.7856255769729614, "step": 3456 }, { "epoch": 0.6387730826529316, "grad_norm": 0.0769173875451088, "learning_rate": 1.8297755859318665e-05, "loss": 0.7210874557495117, "step": 3457 }, { "epoch": 0.6389578593618275, "grad_norm": 0.07991595566272736, "learning_rate": 1.8296642337570333e-05, "loss": 0.7814546823501587, "step": 3458 }, { "epoch": 0.6391426360707233, "grad_norm": 0.08006802946329117, "learning_rate": 1.8295528485641588e-05, "loss": 0.7504193186759949, "step": 3459 }, { "epoch": 0.6393274127796191, "grad_norm": 0.07210196554660797, "learning_rate": 1.8294414303576768e-05, "loss": 0.45494532585144043, "step": 3460 }, { "epoch": 0.639512189488515, "grad_norm": 0.06325925886631012, "learning_rate": 1.8293299791420203e-05, "loss": 0.5189319849014282, "step": 3461 }, { "epoch": 0.6396969661974108, "grad_norm": 0.0802246481180191, "learning_rate": 1.8292184949216255e-05, "loss": 0.658757746219635, "step": 3462 }, { "epoch": 0.6398817429063066, "grad_norm": 0.07019998133182526, "learning_rate": 1.8291069777009293e-05, "loss": 0.5831966996192932, "step": 3463 }, { "epoch": 0.6400665196152026, "grad_norm": 0.07150223106145859, "learning_rate": 1.828995427484369e-05, "loss": 0.629213273525238, "step": 3464 }, { "epoch": 0.6402512963240984, "grad_norm": 0.06425396353006363, "learning_rate": 1.8288838442763838e-05, "loss": 0.6516048908233643, "step": 3465 }, { "epoch": 0.6404360730329942, "grad_norm": 0.07360067218542099, "learning_rate": 1.8287722280814154e-05, "loss": 0.7233392596244812, "step": 3466 }, { "epoch": 0.64062084974189, "grad_norm": 0.0641668289899826, "learning_rate": 1.828660578903905e-05, "loss": 0.5412912964820862, "step": 3467 }, { "epoch": 0.6408056264507859, "grad_norm": 0.0622696615755558, "learning_rate": 1.8285488967482964e-05, "loss": 0.48052069544792175, "step": 3468 }, { "epoch": 0.6409904031596817, "grad_norm": 0.06292513012886047, "learning_rate": 1.8284371816190338e-05, "loss": 0.619753897190094, "step": 3469 }, { "epoch": 0.6411751798685775, "grad_norm": 0.06942099332809448, "learning_rate": 1.828325433520563e-05, "loss": 0.5545070767402649, "step": 3470 }, { "epoch": 0.6413599565774734, "grad_norm": 0.0720704197883606, "learning_rate": 1.8282136524573316e-05, "loss": 0.7403892278671265, "step": 3471 }, { "epoch": 0.6415447332863693, "grad_norm": 0.08801698684692383, "learning_rate": 1.8281018384337882e-05, "loss": 0.7320387363433838, "step": 3472 }, { "epoch": 0.6417295099952651, "grad_norm": 0.06612563133239746, "learning_rate": 1.827989991454382e-05, "loss": 0.50302654504776, "step": 3473 }, { "epoch": 0.641914286704161, "grad_norm": 0.07397046685218811, "learning_rate": 1.8278781115235648e-05, "loss": 0.7002925276756287, "step": 3474 }, { "epoch": 0.6420990634130568, "grad_norm": 0.08516622334718704, "learning_rate": 1.827766198645789e-05, "loss": 0.6786304712295532, "step": 3475 }, { "epoch": 0.6422838401219526, "grad_norm": 0.058225326240062714, "learning_rate": 1.8276542528255078e-05, "loss": 0.4930121600627899, "step": 3476 }, { "epoch": 0.6424686168308484, "grad_norm": 0.08818955719470978, "learning_rate": 1.827542274067177e-05, "loss": 0.8350800275802612, "step": 3477 }, { "epoch": 0.6426533935397443, "grad_norm": 0.0774618610739708, "learning_rate": 1.827430262375253e-05, "loss": 0.6766074895858765, "step": 3478 }, { "epoch": 0.6428381702486402, "grad_norm": 0.06310693174600601, "learning_rate": 1.8273182177541928e-05, "loss": 0.5774590969085693, "step": 3479 }, { "epoch": 0.643022946957536, "grad_norm": 0.06140054389834404, "learning_rate": 1.827206140208456e-05, "loss": 0.48896872997283936, "step": 3480 }, { "epoch": 0.6432077236664319, "grad_norm": 0.06771141290664673, "learning_rate": 1.827094029742503e-05, "loss": 0.6462276577949524, "step": 3481 }, { "epoch": 0.6433925003753277, "grad_norm": 0.07365892082452774, "learning_rate": 1.826981886360795e-05, "loss": 0.7363516688346863, "step": 3482 }, { "epoch": 0.6435772770842235, "grad_norm": 0.07005984336137772, "learning_rate": 1.8268697100677955e-05, "loss": 0.5591853857040405, "step": 3483 }, { "epoch": 0.6437620537931193, "grad_norm": 0.07851269841194153, "learning_rate": 1.8267575008679685e-05, "loss": 0.7781826257705688, "step": 3484 }, { "epoch": 0.6439468305020152, "grad_norm": 0.0728900283575058, "learning_rate": 1.8266452587657792e-05, "loss": 0.7133674621582031, "step": 3485 }, { "epoch": 0.6441316072109111, "grad_norm": 0.08376836031675339, "learning_rate": 1.8265329837656952e-05, "loss": 0.7407172918319702, "step": 3486 }, { "epoch": 0.644316383919807, "grad_norm": 0.07879316061735153, "learning_rate": 1.8264206758721845e-05, "loss": 0.7643840909004211, "step": 3487 }, { "epoch": 0.6445011606287028, "grad_norm": 0.0975750982761383, "learning_rate": 1.8263083350897156e-05, "loss": 0.7902787923812866, "step": 3488 }, { "epoch": 0.6446859373375986, "grad_norm": 0.07233584672212601, "learning_rate": 1.826195961422761e-05, "loss": 0.6836885809898376, "step": 3489 }, { "epoch": 0.6448707140464944, "grad_norm": 0.06888895481824875, "learning_rate": 1.8260835548757917e-05, "loss": 0.6279575824737549, "step": 3490 }, { "epoch": 0.6450554907553903, "grad_norm": 0.07138030976057053, "learning_rate": 1.8259711154532814e-05, "loss": 0.63683021068573, "step": 3491 }, { "epoch": 0.6452402674642861, "grad_norm": 0.06463315337896347, "learning_rate": 1.8258586431597046e-05, "loss": 0.6189367771148682, "step": 3492 }, { "epoch": 0.6454250441731819, "grad_norm": 0.0695798248052597, "learning_rate": 1.8257461379995378e-05, "loss": 0.6790233850479126, "step": 3493 }, { "epoch": 0.6456098208820779, "grad_norm": 0.07280732691287994, "learning_rate": 1.825633599977258e-05, "loss": 0.5111994743347168, "step": 3494 }, { "epoch": 0.6457945975909737, "grad_norm": 0.08698681741952896, "learning_rate": 1.825521029097344e-05, "loss": 0.8289183378219604, "step": 3495 }, { "epoch": 0.6459793742998695, "grad_norm": 0.06710066646337509, "learning_rate": 1.825408425364276e-05, "loss": 0.5631644129753113, "step": 3496 }, { "epoch": 0.6461641510087653, "grad_norm": 0.07575134932994843, "learning_rate": 1.825295788782535e-05, "loss": 0.8791454434394836, "step": 3497 }, { "epoch": 0.6463489277176612, "grad_norm": 0.07547102123498917, "learning_rate": 1.8251831193566038e-05, "loss": 0.5951558947563171, "step": 3498 }, { "epoch": 0.646533704426557, "grad_norm": 0.062191519886255264, "learning_rate": 1.8250704170909655e-05, "loss": 0.6984429359436035, "step": 3499 }, { "epoch": 0.6467184811354528, "grad_norm": 0.07333795726299286, "learning_rate": 1.8249576819901062e-05, "loss": 0.6207255125045776, "step": 3500 }, { "epoch": 0.6467184811354528, "eval_loss": 0.6716294884681702, "eval_runtime": 157.9746, "eval_samples_per_second": 115.392, "eval_steps_per_second": 14.426, "step": 3500 }, { "epoch": 0.6469032578443488, "grad_norm": 0.07832157611846924, "learning_rate": 1.824844914058512e-05, "loss": 0.6614238619804382, "step": 3501 }, { "epoch": 0.6470880345532446, "grad_norm": 0.06644383817911148, "learning_rate": 1.8247321133006715e-05, "loss": 0.5758317708969116, "step": 3502 }, { "epoch": 0.6472728112621404, "grad_norm": 0.06412899494171143, "learning_rate": 1.8246192797210723e-05, "loss": 0.47316062450408936, "step": 3503 }, { "epoch": 0.6474575879710363, "grad_norm": 0.08234144747257233, "learning_rate": 1.8245064133242064e-05, "loss": 0.7362156510353088, "step": 3504 }, { "epoch": 0.6476423646799321, "grad_norm": 0.07396001368761063, "learning_rate": 1.8243935141145642e-05, "loss": 0.7157362103462219, "step": 3505 }, { "epoch": 0.6478271413888279, "grad_norm": 0.08159304410219193, "learning_rate": 1.8242805820966397e-05, "loss": 0.7625038027763367, "step": 3506 }, { "epoch": 0.6480119180977237, "grad_norm": 0.07692290842533112, "learning_rate": 1.824167617274927e-05, "loss": 0.6218598484992981, "step": 3507 }, { "epoch": 0.6481966948066197, "grad_norm": 0.07371748983860016, "learning_rate": 1.824054619653921e-05, "loss": 0.6727126836776733, "step": 3508 }, { "epoch": 0.6483814715155155, "grad_norm": 0.07481326907873154, "learning_rate": 1.82394158923812e-05, "loss": 0.6585962772369385, "step": 3509 }, { "epoch": 0.6485662482244113, "grad_norm": 0.07488329708576202, "learning_rate": 1.823828526032021e-05, "loss": 0.8188884258270264, "step": 3510 }, { "epoch": 0.6487510249333072, "grad_norm": 0.07071026414632797, "learning_rate": 1.823715430040124e-05, "loss": 0.6719712018966675, "step": 3511 }, { "epoch": 0.648935801642203, "grad_norm": 0.050728604197502136, "learning_rate": 1.8236023012669305e-05, "loss": 0.4805065393447876, "step": 3512 }, { "epoch": 0.6491205783510988, "grad_norm": 0.06074635684490204, "learning_rate": 1.8234891397169415e-05, "loss": 0.4907507598400116, "step": 3513 }, { "epoch": 0.6493053550599946, "grad_norm": 0.0737895593047142, "learning_rate": 1.823375945394662e-05, "loss": 0.6974319815635681, "step": 3514 }, { "epoch": 0.6494901317688905, "grad_norm": 0.06137435510754585, "learning_rate": 1.8232627183045954e-05, "loss": 0.5445990562438965, "step": 3515 }, { "epoch": 0.6496749084777864, "grad_norm": 0.06566373258829117, "learning_rate": 1.8231494584512477e-05, "loss": 0.5697711110115051, "step": 3516 }, { "epoch": 0.6498596851866822, "grad_norm": 0.0688183605670929, "learning_rate": 1.8230361658391277e-05, "loss": 0.634209930896759, "step": 3517 }, { "epoch": 0.6500444618955781, "grad_norm": 0.07534511387348175, "learning_rate": 1.8229228404727428e-05, "loss": 0.7855115532875061, "step": 3518 }, { "epoch": 0.6502292386044739, "grad_norm": 0.07258817553520203, "learning_rate": 1.8228094823566034e-05, "loss": 0.6711122989654541, "step": 3519 }, { "epoch": 0.6504140153133697, "grad_norm": 0.08547286689281464, "learning_rate": 1.822696091495221e-05, "loss": 0.76134192943573, "step": 3520 }, { "epoch": 0.6505987920222656, "grad_norm": 0.062105804681777954, "learning_rate": 1.8225826678931082e-05, "loss": 0.5121784210205078, "step": 3521 }, { "epoch": 0.6507835687311614, "grad_norm": 0.07176516205072403, "learning_rate": 1.8224692115547786e-05, "loss": 0.5259842276573181, "step": 3522 }, { "epoch": 0.6509683454400573, "grad_norm": 0.06844273209571838, "learning_rate": 1.8223557224847476e-05, "loss": 0.5787487030029297, "step": 3523 }, { "epoch": 0.6511531221489532, "grad_norm": 0.10131745785474777, "learning_rate": 1.8222422006875316e-05, "loss": 0.9592319130897522, "step": 3524 }, { "epoch": 0.651337898857849, "grad_norm": 0.06959455460309982, "learning_rate": 1.8221286461676487e-05, "loss": 0.5962274670600891, "step": 3525 }, { "epoch": 0.6515226755667448, "grad_norm": 0.0840156152844429, "learning_rate": 1.8220150589296172e-05, "loss": 0.7271184921264648, "step": 3526 }, { "epoch": 0.6517074522756406, "grad_norm": 0.0845293253660202, "learning_rate": 1.8219014389779586e-05, "loss": 0.7290505766868591, "step": 3527 }, { "epoch": 0.6518922289845365, "grad_norm": 0.08639811724424362, "learning_rate": 1.821787786317194e-05, "loss": 0.8814318776130676, "step": 3528 }, { "epoch": 0.6520770056934323, "grad_norm": 0.07506068795919418, "learning_rate": 1.8216741009518465e-05, "loss": 0.6959407329559326, "step": 3529 }, { "epoch": 0.6522617824023282, "grad_norm": 0.06726741790771484, "learning_rate": 1.8215603828864406e-05, "loss": 0.5602008104324341, "step": 3530 }, { "epoch": 0.6524465591112241, "grad_norm": 0.0724782720208168, "learning_rate": 1.8214466321255016e-05, "loss": 0.6988791823387146, "step": 3531 }, { "epoch": 0.6526313358201199, "grad_norm": 0.08787646889686584, "learning_rate": 1.8213328486735568e-05, "loss": 0.7606102228164673, "step": 3532 }, { "epoch": 0.6528161125290157, "grad_norm": 0.06051532179117203, "learning_rate": 1.8212190325351343e-05, "loss": 0.5349277257919312, "step": 3533 }, { "epoch": 0.6530008892379116, "grad_norm": 0.06168433651328087, "learning_rate": 1.8211051837147638e-05, "loss": 0.49732333421707153, "step": 3534 }, { "epoch": 0.6531856659468074, "grad_norm": 0.0682239979505539, "learning_rate": 1.8209913022169755e-05, "loss": 0.555517315864563, "step": 3535 }, { "epoch": 0.6533704426557032, "grad_norm": 0.07696058601140976, "learning_rate": 1.8208773880463017e-05, "loss": 0.7068920731544495, "step": 3536 }, { "epoch": 0.653555219364599, "grad_norm": 0.08367285877466202, "learning_rate": 1.8207634412072765e-05, "loss": 0.799506664276123, "step": 3537 }, { "epoch": 0.653739996073495, "grad_norm": 0.08666908740997314, "learning_rate": 1.8206494617044338e-05, "loss": 0.8445583581924438, "step": 3538 }, { "epoch": 0.6539247727823908, "grad_norm": 0.0719844400882721, "learning_rate": 1.8205354495423104e-05, "loss": 0.5322660803794861, "step": 3539 }, { "epoch": 0.6541095494912866, "grad_norm": 0.08823683857917786, "learning_rate": 1.8204214047254433e-05, "loss": 0.745097279548645, "step": 3540 }, { "epoch": 0.6542943262001825, "grad_norm": 0.06618287414312363, "learning_rate": 1.8203073272583705e-05, "loss": 0.6083573698997498, "step": 3541 }, { "epoch": 0.6544791029090783, "grad_norm": 0.0668790340423584, "learning_rate": 1.8201932171456328e-05, "loss": 0.6096829771995544, "step": 3542 }, { "epoch": 0.6546638796179741, "grad_norm": 0.06935631483793259, "learning_rate": 1.8200790743917714e-05, "loss": 0.5925930738449097, "step": 3543 }, { "epoch": 0.65484865632687, "grad_norm": 0.058166343718767166, "learning_rate": 1.819964899001328e-05, "loss": 0.46284744143486023, "step": 3544 }, { "epoch": 0.6550334330357659, "grad_norm": 0.08119804412126541, "learning_rate": 1.8198506909788475e-05, "loss": 0.7212343811988831, "step": 3545 }, { "epoch": 0.6552182097446617, "grad_norm": 0.08805336058139801, "learning_rate": 1.819736450328874e-05, "loss": 0.7497941255569458, "step": 3546 }, { "epoch": 0.6554029864535575, "grad_norm": 0.07108543813228607, "learning_rate": 1.8196221770559548e-05, "loss": 0.5629989504814148, "step": 3547 }, { "epoch": 0.6555877631624534, "grad_norm": 0.06211220473051071, "learning_rate": 1.8195078711646367e-05, "loss": 0.6136374473571777, "step": 3548 }, { "epoch": 0.6557725398713492, "grad_norm": 0.07083010673522949, "learning_rate": 1.81939353265947e-05, "loss": 0.5266101360321045, "step": 3549 }, { "epoch": 0.655957316580245, "grad_norm": 0.070891834795475, "learning_rate": 1.8192791615450035e-05, "loss": 0.6135351061820984, "step": 3550 }, { "epoch": 0.6561420932891409, "grad_norm": 0.06508482247591019, "learning_rate": 1.81916475782579e-05, "loss": 0.4885174334049225, "step": 3551 }, { "epoch": 0.6563268699980368, "grad_norm": 0.07081446051597595, "learning_rate": 1.819050321506382e-05, "loss": 0.48294541239738464, "step": 3552 }, { "epoch": 0.6565116467069326, "grad_norm": 0.08830460160970688, "learning_rate": 1.8189358525913335e-05, "loss": 0.6990798115730286, "step": 3553 }, { "epoch": 0.6566964234158285, "grad_norm": 0.07055427134037018, "learning_rate": 1.8188213510852003e-05, "loss": 0.5924009084701538, "step": 3554 }, { "epoch": 0.6568812001247243, "grad_norm": 0.06699106097221375, "learning_rate": 1.8187068169925387e-05, "loss": 0.5916507840156555, "step": 3555 }, { "epoch": 0.6570659768336201, "grad_norm": 0.07054045796394348, "learning_rate": 1.8185922503179077e-05, "loss": 0.48589131236076355, "step": 3556 }, { "epoch": 0.6572507535425159, "grad_norm": 0.07716887444257736, "learning_rate": 1.818477651065866e-05, "loss": 0.6579416990280151, "step": 3557 }, { "epoch": 0.6574355302514118, "grad_norm": 0.0649254322052002, "learning_rate": 1.8183630192409746e-05, "loss": 0.5554823279380798, "step": 3558 }, { "epoch": 0.6576203069603076, "grad_norm": 0.07145379483699799, "learning_rate": 1.818248354847795e-05, "loss": 0.4613144099712372, "step": 3559 }, { "epoch": 0.6578050836692035, "grad_norm": 0.07647396624088287, "learning_rate": 1.8181336578908913e-05, "loss": 0.6117957830429077, "step": 3560 }, { "epoch": 0.6579898603780994, "grad_norm": 0.078497014939785, "learning_rate": 1.8180189283748274e-05, "loss": 0.756879448890686, "step": 3561 }, { "epoch": 0.6581746370869952, "grad_norm": 0.07156045734882355, "learning_rate": 1.8179041663041693e-05, "loss": 0.583175778388977, "step": 3562 }, { "epoch": 0.658359413795891, "grad_norm": 0.054694127291440964, "learning_rate": 1.8177893716834844e-05, "loss": 0.4489407241344452, "step": 3563 }, { "epoch": 0.6585441905047869, "grad_norm": 0.05911286920309067, "learning_rate": 1.817674544517341e-05, "loss": 0.5961577892303467, "step": 3564 }, { "epoch": 0.6587289672136827, "grad_norm": 0.08541987836360931, "learning_rate": 1.817559684810309e-05, "loss": 0.7918174266815186, "step": 3565 }, { "epoch": 0.6589137439225785, "grad_norm": 0.07582125067710876, "learning_rate": 1.8174447925669594e-05, "loss": 0.5886412858963013, "step": 3566 }, { "epoch": 0.6590985206314744, "grad_norm": 0.09455190598964691, "learning_rate": 1.8173298677918644e-05, "loss": 0.7425788640975952, "step": 3567 }, { "epoch": 0.6592832973403703, "grad_norm": 0.07785138487815857, "learning_rate": 1.8172149104895976e-05, "loss": 0.711449146270752, "step": 3568 }, { "epoch": 0.6594680740492661, "grad_norm": 0.06602492183446884, "learning_rate": 1.817099920664734e-05, "loss": 0.5020294189453125, "step": 3569 }, { "epoch": 0.6596528507581619, "grad_norm": 0.07863244414329529, "learning_rate": 1.8169848983218506e-05, "loss": 0.6526080369949341, "step": 3570 }, { "epoch": 0.6598376274670578, "grad_norm": 0.08201786875724792, "learning_rate": 1.8168698434655237e-05, "loss": 0.6860029697418213, "step": 3571 }, { "epoch": 0.6600224041759536, "grad_norm": 0.06559526920318604, "learning_rate": 1.8167547561003328e-05, "loss": 0.6369378566741943, "step": 3572 }, { "epoch": 0.6602071808848494, "grad_norm": 0.07632806897163391, "learning_rate": 1.8166396362308584e-05, "loss": 0.6945319175720215, "step": 3573 }, { "epoch": 0.6603919575937454, "grad_norm": 0.08810561150312424, "learning_rate": 1.8165244838616808e-05, "loss": 0.8644512891769409, "step": 3574 }, { "epoch": 0.6605767343026412, "grad_norm": 0.06652560085058212, "learning_rate": 1.8164092989973832e-05, "loss": 0.626679539680481, "step": 3575 }, { "epoch": 0.660761511011537, "grad_norm": 0.06613638997077942, "learning_rate": 1.81629408164255e-05, "loss": 0.522729754447937, "step": 3576 }, { "epoch": 0.6609462877204328, "grad_norm": 0.07767696678638458, "learning_rate": 1.8161788318017663e-05, "loss": 0.5926226377487183, "step": 3577 }, { "epoch": 0.6611310644293287, "grad_norm": 0.07696923613548279, "learning_rate": 1.8160635494796186e-05, "loss": 0.6262413263320923, "step": 3578 }, { "epoch": 0.6613158411382245, "grad_norm": 0.07074544578790665, "learning_rate": 1.8159482346806946e-05, "loss": 0.5292245149612427, "step": 3579 }, { "epoch": 0.6615006178471203, "grad_norm": 0.06287173181772232, "learning_rate": 1.8158328874095835e-05, "loss": 0.5215235352516174, "step": 3580 }, { "epoch": 0.6616853945560162, "grad_norm": 0.065033458173275, "learning_rate": 1.8157175076708765e-05, "loss": 0.5890915393829346, "step": 3581 }, { "epoch": 0.6618701712649121, "grad_norm": 0.07116048783063889, "learning_rate": 1.8156020954691643e-05, "loss": 0.6286876201629639, "step": 3582 }, { "epoch": 0.6620549479738079, "grad_norm": 0.08613475412130356, "learning_rate": 1.8154866508090404e-05, "loss": 0.6727132201194763, "step": 3583 }, { "epoch": 0.6622397246827038, "grad_norm": 0.0765560194849968, "learning_rate": 1.8153711736950992e-05, "loss": 0.7270196080207825, "step": 3584 }, { "epoch": 0.6624245013915996, "grad_norm": 0.08965720236301422, "learning_rate": 1.8152556641319364e-05, "loss": 0.8607513904571533, "step": 3585 }, { "epoch": 0.6626092781004954, "grad_norm": 0.0839729979634285, "learning_rate": 1.8151401221241482e-05, "loss": 0.6847149729728699, "step": 3586 }, { "epoch": 0.6627940548093912, "grad_norm": 0.07854394614696503, "learning_rate": 1.815024547676334e-05, "loss": 0.7056261301040649, "step": 3587 }, { "epoch": 0.6629788315182871, "grad_norm": 0.06578505784273148, "learning_rate": 1.8149089407930924e-05, "loss": 0.4915512502193451, "step": 3588 }, { "epoch": 0.663163608227183, "grad_norm": 0.07030507922172546, "learning_rate": 1.8147933014790245e-05, "loss": 0.5319059491157532, "step": 3589 }, { "epoch": 0.6633483849360788, "grad_norm": 0.062074411660432816, "learning_rate": 1.8146776297387327e-05, "loss": 0.4846246540546417, "step": 3590 }, { "epoch": 0.6635331616449747, "grad_norm": 0.06728599220514297, "learning_rate": 1.8145619255768195e-05, "loss": 0.6230316758155823, "step": 3591 }, { "epoch": 0.6637179383538705, "grad_norm": 0.08350211381912231, "learning_rate": 1.8144461889978902e-05, "loss": 0.7138779163360596, "step": 3592 }, { "epoch": 0.6639027150627663, "grad_norm": 0.07312264293432236, "learning_rate": 1.814330420006551e-05, "loss": 0.5450552701950073, "step": 3593 }, { "epoch": 0.6640874917716622, "grad_norm": 0.09821341931819916, "learning_rate": 1.8142146186074087e-05, "loss": 0.5662215948104858, "step": 3594 }, { "epoch": 0.664272268480558, "grad_norm": 0.06497794389724731, "learning_rate": 1.8140987848050717e-05, "loss": 0.43087509274482727, "step": 3595 }, { "epoch": 0.6644570451894539, "grad_norm": 0.09049307554960251, "learning_rate": 1.81398291860415e-05, "loss": 0.6527272462844849, "step": 3596 }, { "epoch": 0.6646418218983497, "grad_norm": 0.07319524884223938, "learning_rate": 1.813867020009255e-05, "loss": 0.6607534885406494, "step": 3597 }, { "epoch": 0.6648265986072456, "grad_norm": 0.10025575011968613, "learning_rate": 1.813751089024999e-05, "loss": 0.7384710311889648, "step": 3598 }, { "epoch": 0.6650113753161414, "grad_norm": 0.07169393450021744, "learning_rate": 1.813635125655995e-05, "loss": 0.5975751280784607, "step": 3599 }, { "epoch": 0.6651961520250372, "grad_norm": 0.08455440402030945, "learning_rate": 1.813519129906859e-05, "loss": 0.5890780687332153, "step": 3600 }, { "epoch": 0.6653809287339331, "grad_norm": 0.07142335176467896, "learning_rate": 1.813403101782207e-05, "loss": 0.6866121292114258, "step": 3601 }, { "epoch": 0.6655657054428289, "grad_norm": 0.06693186610937119, "learning_rate": 1.8132870412866557e-05, "loss": 0.5880172848701477, "step": 3602 }, { "epoch": 0.6657504821517247, "grad_norm": 0.06023497134447098, "learning_rate": 1.8131709484248254e-05, "loss": 0.5216284394264221, "step": 3603 }, { "epoch": 0.6659352588606207, "grad_norm": 0.05582762137055397, "learning_rate": 1.813054823201335e-05, "loss": 0.47629982233047485, "step": 3604 }, { "epoch": 0.6661200355695165, "grad_norm": 0.05890011042356491, "learning_rate": 1.812938665620806e-05, "loss": 0.5389571785926819, "step": 3605 }, { "epoch": 0.6663048122784123, "grad_norm": 0.08104158937931061, "learning_rate": 1.8128224756878622e-05, "loss": 0.7427095174789429, "step": 3606 }, { "epoch": 0.6664895889873081, "grad_norm": 0.060655295848846436, "learning_rate": 1.8127062534071265e-05, "loss": 0.47118300199508667, "step": 3607 }, { "epoch": 0.666674365696204, "grad_norm": 0.07759436219930649, "learning_rate": 1.8125899987832245e-05, "loss": 0.5543125867843628, "step": 3608 }, { "epoch": 0.6668591424050998, "grad_norm": 0.07310891151428223, "learning_rate": 1.812473711820783e-05, "loss": 0.5512562990188599, "step": 3609 }, { "epoch": 0.6670439191139956, "grad_norm": 0.07100726664066315, "learning_rate": 1.8123573925244293e-05, "loss": 0.5451439023017883, "step": 3610 }, { "epoch": 0.6672286958228916, "grad_norm": 0.0782528966665268, "learning_rate": 1.8122410408987933e-05, "loss": 0.6551583409309387, "step": 3611 }, { "epoch": 0.6674134725317874, "grad_norm": 0.0719519704580307, "learning_rate": 1.812124656948505e-05, "loss": 0.6590461134910583, "step": 3612 }, { "epoch": 0.6675982492406832, "grad_norm": 0.06585057824850082, "learning_rate": 1.812008240678196e-05, "loss": 0.5170865058898926, "step": 3613 }, { "epoch": 0.6677830259495791, "grad_norm": 0.08960982412099838, "learning_rate": 1.8118917920924995e-05, "loss": 0.9026049375534058, "step": 3614 }, { "epoch": 0.6679678026584749, "grad_norm": 0.07508343458175659, "learning_rate": 1.8117753111960496e-05, "loss": 0.5850327014923096, "step": 3615 }, { "epoch": 0.6681525793673707, "grad_norm": 0.06610523909330368, "learning_rate": 1.8116587979934825e-05, "loss": 0.4828251004219055, "step": 3616 }, { "epoch": 0.6683373560762665, "grad_norm": 0.08065943419933319, "learning_rate": 1.811542252489434e-05, "loss": 0.6314260959625244, "step": 3617 }, { "epoch": 0.6685221327851625, "grad_norm": 0.08027535676956177, "learning_rate": 1.8114256746885433e-05, "loss": 0.7746886610984802, "step": 3618 }, { "epoch": 0.6687069094940583, "grad_norm": 0.05435701459646225, "learning_rate": 1.8113090645954492e-05, "loss": 0.4653548002243042, "step": 3619 }, { "epoch": 0.6688916862029541, "grad_norm": 0.07187805324792862, "learning_rate": 1.8111924222147927e-05, "loss": 0.588527500629425, "step": 3620 }, { "epoch": 0.66907646291185, "grad_norm": 0.07427569478750229, "learning_rate": 1.811075747551216e-05, "loss": 0.710505485534668, "step": 3621 }, { "epoch": 0.6692612396207458, "grad_norm": 0.09005344659090042, "learning_rate": 1.8109590406093612e-05, "loss": 0.834923505783081, "step": 3622 }, { "epoch": 0.6694460163296416, "grad_norm": 0.06310055404901505, "learning_rate": 1.8108423013938744e-05, "loss": 0.521003246307373, "step": 3623 }, { "epoch": 0.6696307930385375, "grad_norm": 0.06574016809463501, "learning_rate": 1.8107255299094007e-05, "loss": 0.6228640079498291, "step": 3624 }, { "epoch": 0.6698155697474333, "grad_norm": 0.0749313235282898, "learning_rate": 1.8106087261605872e-05, "loss": 0.3983880579471588, "step": 3625 }, { "epoch": 0.6700003464563292, "grad_norm": 0.08887924253940582, "learning_rate": 1.8104918901520828e-05, "loss": 0.6355776786804199, "step": 3626 }, { "epoch": 0.670185123165225, "grad_norm": 0.06345250457525253, "learning_rate": 1.8103750218885366e-05, "loss": 0.49787241220474243, "step": 3627 }, { "epoch": 0.6703698998741209, "grad_norm": 0.07932516932487488, "learning_rate": 1.8102581213745996e-05, "loss": 0.8111638426780701, "step": 3628 }, { "epoch": 0.6705546765830167, "grad_norm": 0.05282360315322876, "learning_rate": 1.810141188614925e-05, "loss": 0.46341559290885925, "step": 3629 }, { "epoch": 0.6707394532919125, "grad_norm": 0.0626249611377716, "learning_rate": 1.810024223614165e-05, "loss": 0.6896385550498962, "step": 3630 }, { "epoch": 0.6709242300008084, "grad_norm": 0.07346781343221664, "learning_rate": 1.8099072263769754e-05, "loss": 0.5050565004348755, "step": 3631 }, { "epoch": 0.6711090067097042, "grad_norm": 0.07342267781496048, "learning_rate": 1.8097901969080123e-05, "loss": 0.6833550930023193, "step": 3632 }, { "epoch": 0.6712937834186001, "grad_norm": 0.07485052198171616, "learning_rate": 1.809673135211933e-05, "loss": 0.6823423504829407, "step": 3633 }, { "epoch": 0.671478560127496, "grad_norm": 0.06795880943536758, "learning_rate": 1.8095560412933956e-05, "loss": 0.5953877568244934, "step": 3634 }, { "epoch": 0.6716633368363918, "grad_norm": 0.07932394742965698, "learning_rate": 1.8094389151570607e-05, "loss": 0.7074558734893799, "step": 3635 }, { "epoch": 0.6718481135452876, "grad_norm": 0.08381252735853195, "learning_rate": 1.8093217568075895e-05, "loss": 0.601979672908783, "step": 3636 }, { "epoch": 0.6720328902541834, "grad_norm": 0.07244662940502167, "learning_rate": 1.809204566249644e-05, "loss": 0.6082628965377808, "step": 3637 }, { "epoch": 0.6722176669630793, "grad_norm": 0.08388855308294296, "learning_rate": 1.8090873434878888e-05, "loss": 0.7804757952690125, "step": 3638 }, { "epoch": 0.6724024436719751, "grad_norm": 0.06491474062204361, "learning_rate": 1.808970088526989e-05, "loss": 0.5486597418785095, "step": 3639 }, { "epoch": 0.672587220380871, "grad_norm": 0.07179899513721466, "learning_rate": 1.80885280137161e-05, "loss": 0.6362982988357544, "step": 3640 }, { "epoch": 0.6727719970897669, "grad_norm": 0.07112115621566772, "learning_rate": 1.8087354820264202e-05, "loss": 0.6815124750137329, "step": 3641 }, { "epoch": 0.6729567737986627, "grad_norm": 0.07267649471759796, "learning_rate": 1.8086181304960885e-05, "loss": 0.6394959688186646, "step": 3642 }, { "epoch": 0.6731415505075585, "grad_norm": 0.06543809920549393, "learning_rate": 1.808500746785285e-05, "loss": 0.53403639793396, "step": 3643 }, { "epoch": 0.6733263272164544, "grad_norm": 0.06312116980552673, "learning_rate": 1.8083833308986816e-05, "loss": 0.5903558135032654, "step": 3644 }, { "epoch": 0.6735111039253502, "grad_norm": 0.06180790811777115, "learning_rate": 1.8082658828409502e-05, "loss": 0.5831218361854553, "step": 3645 }, { "epoch": 0.673695880634246, "grad_norm": 0.07454147934913635, "learning_rate": 1.808148402616766e-05, "loss": 0.56765216588974, "step": 3646 }, { "epoch": 0.6738806573431418, "grad_norm": 0.07599983364343643, "learning_rate": 1.808030890230803e-05, "loss": 0.6603242754936218, "step": 3647 }, { "epoch": 0.6740654340520378, "grad_norm": 0.0702044740319252, "learning_rate": 1.8079133456877393e-05, "loss": 0.541609525680542, "step": 3648 }, { "epoch": 0.6742502107609336, "grad_norm": 0.06919199228286743, "learning_rate": 1.8077957689922516e-05, "loss": 0.6654222011566162, "step": 3649 }, { "epoch": 0.6744349874698294, "grad_norm": 0.06848164647817612, "learning_rate": 1.8076781601490196e-05, "loss": 0.6674426198005676, "step": 3650 }, { "epoch": 0.6746197641787253, "grad_norm": 0.08180391043424606, "learning_rate": 1.8075605191627242e-05, "loss": 0.7538283467292786, "step": 3651 }, { "epoch": 0.6748045408876211, "grad_norm": 0.0738702192902565, "learning_rate": 1.8074428460380463e-05, "loss": 0.6896567940711975, "step": 3652 }, { "epoch": 0.6749893175965169, "grad_norm": 0.07367371767759323, "learning_rate": 1.8073251407796692e-05, "loss": 0.5425678491592407, "step": 3653 }, { "epoch": 0.6751740943054128, "grad_norm": 0.0782298818230629, "learning_rate": 1.8072074033922773e-05, "loss": 0.6600009202957153, "step": 3654 }, { "epoch": 0.6753588710143087, "grad_norm": 0.06548859924077988, "learning_rate": 1.8070896338805565e-05, "loss": 0.7054764032363892, "step": 3655 }, { "epoch": 0.6755436477232045, "grad_norm": 0.08852683752775192, "learning_rate": 1.8069718322491928e-05, "loss": 0.7335495948791504, "step": 3656 }, { "epoch": 0.6757284244321003, "grad_norm": 0.07670899480581284, "learning_rate": 1.8068539985028755e-05, "loss": 0.5692101716995239, "step": 3657 }, { "epoch": 0.6759132011409962, "grad_norm": 0.07531186193227768, "learning_rate": 1.806736132646293e-05, "loss": 0.594825029373169, "step": 3658 }, { "epoch": 0.676097977849892, "grad_norm": 0.09214551001787186, "learning_rate": 1.8066182346841365e-05, "loss": 0.9483715891838074, "step": 3659 }, { "epoch": 0.6762827545587878, "grad_norm": 0.06421562284231186, "learning_rate": 1.8065003046210976e-05, "loss": 0.5851206183433533, "step": 3660 }, { "epoch": 0.6764675312676837, "grad_norm": 0.1005164384841919, "learning_rate": 1.8063823424618698e-05, "loss": 0.8281410932540894, "step": 3661 }, { "epoch": 0.6766523079765796, "grad_norm": 0.06132997199892998, "learning_rate": 1.806264348211148e-05, "loss": 0.4958787262439728, "step": 3662 }, { "epoch": 0.6768370846854754, "grad_norm": 0.07368472963571548, "learning_rate": 1.8061463218736272e-05, "loss": 0.7197514176368713, "step": 3663 }, { "epoch": 0.6770218613943713, "grad_norm": 0.06913574039936066, "learning_rate": 1.8060282634540053e-05, "loss": 0.6327351331710815, "step": 3664 }, { "epoch": 0.6772066381032671, "grad_norm": 0.07094834744930267, "learning_rate": 1.80591017295698e-05, "loss": 0.7247785925865173, "step": 3665 }, { "epoch": 0.6773914148121629, "grad_norm": 0.07524259388446808, "learning_rate": 1.8057920503872514e-05, "loss": 0.7688266634941101, "step": 3666 }, { "epoch": 0.6775761915210587, "grad_norm": 0.060354214161634445, "learning_rate": 1.80567389574952e-05, "loss": 0.5938658714294434, "step": 3667 }, { "epoch": 0.6777609682299546, "grad_norm": 0.06695031374692917, "learning_rate": 1.805555709048488e-05, "loss": 0.6668074131011963, "step": 3668 }, { "epoch": 0.6779457449388504, "grad_norm": 0.06782311946153641, "learning_rate": 1.8054374902888594e-05, "loss": 0.5017516613006592, "step": 3669 }, { "epoch": 0.6781305216477463, "grad_norm": 0.08580289781093597, "learning_rate": 1.8053192394753383e-05, "loss": 0.814161479473114, "step": 3670 }, { "epoch": 0.6783152983566422, "grad_norm": 0.07032842934131622, "learning_rate": 1.8052009566126312e-05, "loss": 0.6677312254905701, "step": 3671 }, { "epoch": 0.678500075065538, "grad_norm": 0.07815011590719223, "learning_rate": 1.805082641705445e-05, "loss": 0.6751715540885925, "step": 3672 }, { "epoch": 0.6786848517744338, "grad_norm": 0.06954637169837952, "learning_rate": 1.8049642947584885e-05, "loss": 0.5944041609764099, "step": 3673 }, { "epoch": 0.6788696284833297, "grad_norm": 0.06862206757068634, "learning_rate": 1.8048459157764714e-05, "loss": 0.5541815757751465, "step": 3674 }, { "epoch": 0.6790544051922255, "grad_norm": 0.06570684164762497, "learning_rate": 1.804727504764105e-05, "loss": 0.5496829152107239, "step": 3675 }, { "epoch": 0.6792391819011213, "grad_norm": 0.0733305811882019, "learning_rate": 1.804609061726102e-05, "loss": 0.604638397693634, "step": 3676 }, { "epoch": 0.6794239586100173, "grad_norm": 0.06596026569604874, "learning_rate": 1.804490586667175e-05, "loss": 0.5498355031013489, "step": 3677 }, { "epoch": 0.6796087353189131, "grad_norm": 0.060254111886024475, "learning_rate": 1.8043720795920397e-05, "loss": 0.489950031042099, "step": 3678 }, { "epoch": 0.6797935120278089, "grad_norm": 0.07105099409818649, "learning_rate": 1.8042535405054125e-05, "loss": 0.5928880572319031, "step": 3679 }, { "epoch": 0.6799782887367047, "grad_norm": 0.08792119473218918, "learning_rate": 1.8041349694120102e-05, "loss": 0.628506064414978, "step": 3680 }, { "epoch": 0.6801630654456006, "grad_norm": 0.08043865859508514, "learning_rate": 1.8040163663165523e-05, "loss": 0.7550156712532043, "step": 3681 }, { "epoch": 0.6803478421544964, "grad_norm": 0.07714878022670746, "learning_rate": 1.8038977312237583e-05, "loss": 0.6829538941383362, "step": 3682 }, { "epoch": 0.6805326188633922, "grad_norm": 0.06187025457620621, "learning_rate": 1.8037790641383493e-05, "loss": 0.4977126717567444, "step": 3683 }, { "epoch": 0.6807173955722882, "grad_norm": 0.11160174012184143, "learning_rate": 1.8036603650650487e-05, "loss": 0.634476363658905, "step": 3684 }, { "epoch": 0.680902172281184, "grad_norm": 0.07590920478105545, "learning_rate": 1.80354163400858e-05, "loss": 0.6065258383750916, "step": 3685 }, { "epoch": 0.6810869489900798, "grad_norm": 0.08354957401752472, "learning_rate": 1.803422870973668e-05, "loss": 0.7981310486793518, "step": 3686 }, { "epoch": 0.6812717256989756, "grad_norm": 0.07451831549406052, "learning_rate": 1.803304075965039e-05, "loss": 0.5937463641166687, "step": 3687 }, { "epoch": 0.6814565024078715, "grad_norm": 0.07504851371049881, "learning_rate": 1.8031852489874215e-05, "loss": 0.6576966643333435, "step": 3688 }, { "epoch": 0.6816412791167673, "grad_norm": 0.08937250822782516, "learning_rate": 1.803066390045544e-05, "loss": 1.0074913501739502, "step": 3689 }, { "epoch": 0.6818260558256631, "grad_norm": 0.07768749445676804, "learning_rate": 1.802947499144136e-05, "loss": 0.7405135035514832, "step": 3690 }, { "epoch": 0.682010832534559, "grad_norm": 0.0934264287352562, "learning_rate": 1.8028285762879303e-05, "loss": 0.7602882981300354, "step": 3691 }, { "epoch": 0.6821956092434549, "grad_norm": 0.0645613968372345, "learning_rate": 1.802709621481659e-05, "loss": 0.6215939521789551, "step": 3692 }, { "epoch": 0.6823803859523507, "grad_norm": 0.07587705552577972, "learning_rate": 1.8025906347300557e-05, "loss": 0.7235476970672607, "step": 3693 }, { "epoch": 0.6825651626612466, "grad_norm": 0.06425310671329498, "learning_rate": 1.802471616037856e-05, "loss": 0.548570454120636, "step": 3694 }, { "epoch": 0.6827499393701424, "grad_norm": 0.07246150821447372, "learning_rate": 1.8023525654097967e-05, "loss": 0.6220743656158447, "step": 3695 }, { "epoch": 0.6829347160790382, "grad_norm": 0.06483404338359833, "learning_rate": 1.802233482850616e-05, "loss": 0.6329941153526306, "step": 3696 }, { "epoch": 0.683119492787934, "grad_norm": 0.07063949108123779, "learning_rate": 1.8021143683650524e-05, "loss": 0.6304616332054138, "step": 3697 }, { "epoch": 0.6833042694968299, "grad_norm": 0.06189752742648125, "learning_rate": 1.8019952219578464e-05, "loss": 0.49720051884651184, "step": 3698 }, { "epoch": 0.6834890462057258, "grad_norm": 0.0959625318646431, "learning_rate": 1.8018760436337396e-05, "loss": 0.9130963087081909, "step": 3699 }, { "epoch": 0.6836738229146216, "grad_norm": 0.06473139673471451, "learning_rate": 1.8017568333974748e-05, "loss": 0.5368582010269165, "step": 3700 }, { "epoch": 0.6838585996235175, "grad_norm": 0.0729895830154419, "learning_rate": 1.8016375912537963e-05, "loss": 0.6586579084396362, "step": 3701 }, { "epoch": 0.6840433763324133, "grad_norm": 0.06475422531366348, "learning_rate": 1.8015183172074503e-05, "loss": 0.4308708608150482, "step": 3702 }, { "epoch": 0.6842281530413091, "grad_norm": 0.0831836611032486, "learning_rate": 1.8013990112631824e-05, "loss": 0.738739550113678, "step": 3703 }, { "epoch": 0.684412929750205, "grad_norm": 0.06140682101249695, "learning_rate": 1.8012796734257412e-05, "loss": 0.4796154499053955, "step": 3704 }, { "epoch": 0.6845977064591008, "grad_norm": 0.06863638758659363, "learning_rate": 1.8011603036998762e-05, "loss": 0.5414847135543823, "step": 3705 }, { "epoch": 0.6847824831679967, "grad_norm": 0.08135831356048584, "learning_rate": 1.801040902090337e-05, "loss": 0.8075755834579468, "step": 3706 }, { "epoch": 0.6849672598768926, "grad_norm": 0.05040891095995903, "learning_rate": 1.800921468601877e-05, "loss": 0.45555415749549866, "step": 3707 }, { "epoch": 0.6851520365857884, "grad_norm": 0.0659366175532341, "learning_rate": 1.8008020032392474e-05, "loss": 0.5273883938789368, "step": 3708 }, { "epoch": 0.6853368132946842, "grad_norm": 0.06233956292271614, "learning_rate": 1.8006825060072038e-05, "loss": 0.6255277991294861, "step": 3709 }, { "epoch": 0.68552159000358, "grad_norm": 0.08703937381505966, "learning_rate": 1.8005629769105013e-05, "loss": 0.851870596408844, "step": 3710 }, { "epoch": 0.6857063667124759, "grad_norm": 0.06627961993217468, "learning_rate": 1.8004434159538974e-05, "loss": 0.512995183467865, "step": 3711 }, { "epoch": 0.6858911434213717, "grad_norm": 0.0790930762887001, "learning_rate": 1.8003238231421495e-05, "loss": 0.5680521726608276, "step": 3712 }, { "epoch": 0.6860759201302675, "grad_norm": 0.07907992601394653, "learning_rate": 1.8002041984800173e-05, "loss": 0.6068723201751709, "step": 3713 }, { "epoch": 0.6862606968391635, "grad_norm": 0.08863968402147293, "learning_rate": 1.8000845419722615e-05, "loss": 0.7572266459465027, "step": 3714 }, { "epoch": 0.6864454735480593, "grad_norm": 0.06970055401325226, "learning_rate": 1.799964853623644e-05, "loss": 0.5449360609054565, "step": 3715 }, { "epoch": 0.6866302502569551, "grad_norm": 0.08415934443473816, "learning_rate": 1.7998451334389285e-05, "loss": 0.6751998662948608, "step": 3716 }, { "epoch": 0.686815026965851, "grad_norm": 0.060137297958135605, "learning_rate": 1.7997253814228787e-05, "loss": 0.5908098220825195, "step": 3717 }, { "epoch": 0.6869998036747468, "grad_norm": 0.058346912264823914, "learning_rate": 1.7996055975802608e-05, "loss": 0.4068335294723511, "step": 3718 }, { "epoch": 0.6871845803836426, "grad_norm": 0.07939445227384567, "learning_rate": 1.7994857819158416e-05, "loss": 0.6595229506492615, "step": 3719 }, { "epoch": 0.6873693570925384, "grad_norm": 0.086093969643116, "learning_rate": 1.7993659344343902e-05, "loss": 0.664191484451294, "step": 3720 }, { "epoch": 0.6875541338014344, "grad_norm": 0.0652756467461586, "learning_rate": 1.799246055140675e-05, "loss": 0.5476946234703064, "step": 3721 }, { "epoch": 0.6877389105103302, "grad_norm": 0.06591864675283432, "learning_rate": 1.7991261440394674e-05, "loss": 0.5816909074783325, "step": 3722 }, { "epoch": 0.687923687219226, "grad_norm": 0.08605477958917618, "learning_rate": 1.7990062011355393e-05, "loss": 0.7622525691986084, "step": 3723 }, { "epoch": 0.6881084639281219, "grad_norm": 0.07969419658184052, "learning_rate": 1.7988862264336644e-05, "loss": 0.7628303170204163, "step": 3724 }, { "epoch": 0.6882932406370177, "grad_norm": 0.06664358079433441, "learning_rate": 1.798766219938617e-05, "loss": 0.6266488432884216, "step": 3725 }, { "epoch": 0.6884780173459135, "grad_norm": 0.11209172755479813, "learning_rate": 1.798646181655173e-05, "loss": 0.9541575908660889, "step": 3726 }, { "epoch": 0.6886627940548093, "grad_norm": 0.0730072557926178, "learning_rate": 1.7985261115881096e-05, "loss": 0.6682151556015015, "step": 3727 }, { "epoch": 0.6888475707637053, "grad_norm": 0.07961619645357132, "learning_rate": 1.7984060097422054e-05, "loss": 0.7633053660392761, "step": 3728 }, { "epoch": 0.6890323474726011, "grad_norm": 0.06646641343832016, "learning_rate": 1.7982858761222396e-05, "loss": 0.5505644679069519, "step": 3729 }, { "epoch": 0.6892171241814969, "grad_norm": 0.07400480657815933, "learning_rate": 1.7981657107329933e-05, "loss": 0.6487265229225159, "step": 3730 }, { "epoch": 0.6894019008903928, "grad_norm": 0.07567115128040314, "learning_rate": 1.7980455135792495e-05, "loss": 0.6150903105735779, "step": 3731 }, { "epoch": 0.6895866775992886, "grad_norm": 0.08317635953426361, "learning_rate": 1.7979252846657906e-05, "loss": 0.7369841933250427, "step": 3732 }, { "epoch": 0.6897714543081844, "grad_norm": 0.07989586144685745, "learning_rate": 1.797805023997402e-05, "loss": 0.6162236928939819, "step": 3733 }, { "epoch": 0.6899562310170803, "grad_norm": 0.08052266389131546, "learning_rate": 1.797684731578869e-05, "loss": 0.66728276014328, "step": 3734 }, { "epoch": 0.6901410077259761, "grad_norm": 0.06295419484376907, "learning_rate": 1.7975644074149798e-05, "loss": 0.6573705077171326, "step": 3735 }, { "epoch": 0.690325784434872, "grad_norm": 0.06569328904151917, "learning_rate": 1.7974440515105223e-05, "loss": 0.4756871461868286, "step": 3736 }, { "epoch": 0.6905105611437679, "grad_norm": 0.08963353931903839, "learning_rate": 1.7973236638702864e-05, "loss": 0.764071524143219, "step": 3737 }, { "epoch": 0.6906953378526637, "grad_norm": 0.08409163355827332, "learning_rate": 1.7972032444990633e-05, "loss": 0.7622500061988831, "step": 3738 }, { "epoch": 0.6908801145615595, "grad_norm": 0.07594644278287888, "learning_rate": 1.797082793401645e-05, "loss": 0.568515956401825, "step": 3739 }, { "epoch": 0.6910648912704553, "grad_norm": 0.08169730007648468, "learning_rate": 1.7969623105828254e-05, "loss": 0.6786748766899109, "step": 3740 }, { "epoch": 0.6912496679793512, "grad_norm": 0.07881565392017365, "learning_rate": 1.7968417960473992e-05, "loss": 0.6388014554977417, "step": 3741 }, { "epoch": 0.691434444688247, "grad_norm": 0.06085721775889397, "learning_rate": 1.7967212498001623e-05, "loss": 0.5034889578819275, "step": 3742 }, { "epoch": 0.6916192213971429, "grad_norm": 0.07765382528305054, "learning_rate": 1.7966006718459126e-05, "loss": 0.4946385622024536, "step": 3743 }, { "epoch": 0.6918039981060388, "grad_norm": 0.07568485289812088, "learning_rate": 1.796480062189448e-05, "loss": 0.525029182434082, "step": 3744 }, { "epoch": 0.6919887748149346, "grad_norm": 0.05768405646085739, "learning_rate": 1.7963594208355694e-05, "loss": 0.4105791449546814, "step": 3745 }, { "epoch": 0.6921735515238304, "grad_norm": 0.08348099887371063, "learning_rate": 1.7962387477890768e-05, "loss": 0.876397967338562, "step": 3746 }, { "epoch": 0.6923583282327263, "grad_norm": 0.06559912860393524, "learning_rate": 1.7961180430547737e-05, "loss": 0.5905696153640747, "step": 3747 }, { "epoch": 0.6925431049416221, "grad_norm": 0.07305724173784256, "learning_rate": 1.7959973066374627e-05, "loss": 0.6378846168518066, "step": 3748 }, { "epoch": 0.6927278816505179, "grad_norm": 0.06761205941438675, "learning_rate": 1.7958765385419492e-05, "loss": 0.5716357827186584, "step": 3749 }, { "epoch": 0.6929126583594138, "grad_norm": 0.07637447118759155, "learning_rate": 1.7957557387730397e-05, "loss": 0.6930877566337585, "step": 3750 }, { "epoch": 0.6930974350683097, "grad_norm": 0.08084310591220856, "learning_rate": 1.7956349073355415e-05, "loss": 0.6851915121078491, "step": 3751 }, { "epoch": 0.6932822117772055, "grad_norm": 0.07316136360168457, "learning_rate": 1.7955140442342628e-05, "loss": 0.6507387757301331, "step": 3752 }, { "epoch": 0.6934669884861013, "grad_norm": 0.06407404690980911, "learning_rate": 1.7953931494740143e-05, "loss": 0.5251136422157288, "step": 3753 }, { "epoch": 0.6936517651949972, "grad_norm": 0.06200653687119484, "learning_rate": 1.7952722230596072e-05, "loss": 0.5305708050727844, "step": 3754 }, { "epoch": 0.693836541903893, "grad_norm": 0.08334333449602127, "learning_rate": 1.795151264995853e-05, "loss": 0.6771195530891418, "step": 3755 }, { "epoch": 0.6940213186127888, "grad_norm": 0.09637106955051422, "learning_rate": 1.795030275287567e-05, "loss": 0.7789075374603271, "step": 3756 }, { "epoch": 0.6942060953216846, "grad_norm": 0.07496035844087601, "learning_rate": 1.7949092539395624e-05, "loss": 0.7156970500946045, "step": 3757 }, { "epoch": 0.6943908720305806, "grad_norm": 0.07671833038330078, "learning_rate": 1.7947882009566572e-05, "loss": 0.8442854285240173, "step": 3758 }, { "epoch": 0.6945756487394764, "grad_norm": 0.0862608402967453, "learning_rate": 1.794667116343668e-05, "loss": 0.624031662940979, "step": 3759 }, { "epoch": 0.6947604254483722, "grad_norm": 0.06570187211036682, "learning_rate": 1.7945460001054136e-05, "loss": 0.4962296485900879, "step": 3760 }, { "epoch": 0.6949452021572681, "grad_norm": 0.06686298549175262, "learning_rate": 1.7944248522467145e-05, "loss": 0.5098669528961182, "step": 3761 }, { "epoch": 0.6951299788661639, "grad_norm": 0.0717516541481018, "learning_rate": 1.7943036727723914e-05, "loss": 0.6537037491798401, "step": 3762 }, { "epoch": 0.6953147555750597, "grad_norm": 0.08012343943119049, "learning_rate": 1.7941824616872673e-05, "loss": 0.49236705899238586, "step": 3763 }, { "epoch": 0.6954995322839556, "grad_norm": 0.06467476487159729, "learning_rate": 1.794061218996166e-05, "loss": 0.5756182670593262, "step": 3764 }, { "epoch": 0.6956843089928515, "grad_norm": 0.09816405922174454, "learning_rate": 1.7939399447039124e-05, "loss": 0.7457982301712036, "step": 3765 }, { "epoch": 0.6958690857017473, "grad_norm": 0.08083008974790573, "learning_rate": 1.7938186388153328e-05, "loss": 0.7266609072685242, "step": 3766 }, { "epoch": 0.6960538624106432, "grad_norm": 0.056356221437454224, "learning_rate": 1.793697301335255e-05, "loss": 0.3506295084953308, "step": 3767 }, { "epoch": 0.696238639119539, "grad_norm": 0.07170756161212921, "learning_rate": 1.793575932268508e-05, "loss": 0.7279176115989685, "step": 3768 }, { "epoch": 0.6964234158284348, "grad_norm": 0.0925443023443222, "learning_rate": 1.793454531619921e-05, "loss": 0.8251651525497437, "step": 3769 }, { "epoch": 0.6966081925373306, "grad_norm": 0.09287851303815842, "learning_rate": 1.793333099394327e-05, "loss": 0.7615050673484802, "step": 3770 }, { "epoch": 0.6967929692462265, "grad_norm": 0.07109637558460236, "learning_rate": 1.7932116355965573e-05, "loss": 0.7140378355979919, "step": 3771 }, { "epoch": 0.6969777459551224, "grad_norm": 0.0803835317492485, "learning_rate": 1.7930901402314457e-05, "loss": 0.6896501779556274, "step": 3772 }, { "epoch": 0.6971625226640182, "grad_norm": 0.08121559768915176, "learning_rate": 1.792968613303828e-05, "loss": 0.7581483721733093, "step": 3773 }, { "epoch": 0.6973472993729141, "grad_norm": 0.07262784987688065, "learning_rate": 1.7928470548185406e-05, "loss": 0.5780869722366333, "step": 3774 }, { "epoch": 0.6975320760818099, "grad_norm": 0.06558236479759216, "learning_rate": 1.792725464780421e-05, "loss": 0.6090644598007202, "step": 3775 }, { "epoch": 0.6977168527907057, "grad_norm": 0.08067404478788376, "learning_rate": 1.7926038431943077e-05, "loss": 0.6883424520492554, "step": 3776 }, { "epoch": 0.6979016294996016, "grad_norm": 0.0694260448217392, "learning_rate": 1.7924821900650413e-05, "loss": 0.5911904573440552, "step": 3777 }, { "epoch": 0.6980864062084974, "grad_norm": 0.07945213466882706, "learning_rate": 1.792360505397463e-05, "loss": 0.7563620209693909, "step": 3778 }, { "epoch": 0.6982711829173932, "grad_norm": 0.0901646539568901, "learning_rate": 1.7922387891964156e-05, "loss": 0.8546429872512817, "step": 3779 }, { "epoch": 0.6984559596262891, "grad_norm": 0.06837425380945206, "learning_rate": 1.7921170414667434e-05, "loss": 0.5491940379142761, "step": 3780 }, { "epoch": 0.698640736335185, "grad_norm": 0.05254460498690605, "learning_rate": 1.7919952622132906e-05, "loss": 0.41425883769989014, "step": 3781 }, { "epoch": 0.6988255130440808, "grad_norm": 0.09623497724533081, "learning_rate": 1.7918734514409043e-05, "loss": 0.7828701734542847, "step": 3782 }, { "epoch": 0.6990102897529766, "grad_norm": 0.07763856649398804, "learning_rate": 1.7917516091544322e-05, "loss": 0.6677383780479431, "step": 3783 }, { "epoch": 0.6991950664618725, "grad_norm": 0.08122856169939041, "learning_rate": 1.791629735358723e-05, "loss": 0.6720624566078186, "step": 3784 }, { "epoch": 0.6993798431707683, "grad_norm": 0.08240723609924316, "learning_rate": 1.7915078300586274e-05, "loss": 0.6644288301467896, "step": 3785 }, { "epoch": 0.6995646198796641, "grad_norm": 0.0688764750957489, "learning_rate": 1.791385893258996e-05, "loss": 0.576237142086029, "step": 3786 }, { "epoch": 0.6997493965885601, "grad_norm": 0.07055231928825378, "learning_rate": 1.7912639249646822e-05, "loss": 0.5224719643592834, "step": 3787 }, { "epoch": 0.6999341732974559, "grad_norm": 0.057666126638650894, "learning_rate": 1.79114192518054e-05, "loss": 0.47353747487068176, "step": 3788 }, { "epoch": 0.7001189500063517, "grad_norm": 0.08372517675161362, "learning_rate": 1.791019893911424e-05, "loss": 0.7721982598304749, "step": 3789 }, { "epoch": 0.7003037267152475, "grad_norm": 0.06702134013175964, "learning_rate": 1.790897831162191e-05, "loss": 0.7016957998275757, "step": 3790 }, { "epoch": 0.7004885034241434, "grad_norm": 0.06706986576318741, "learning_rate": 1.7907757369376984e-05, "loss": 0.6234015226364136, "step": 3791 }, { "epoch": 0.7006732801330392, "grad_norm": 0.05316302180290222, "learning_rate": 1.7906536112428063e-05, "loss": 0.4383869171142578, "step": 3792 }, { "epoch": 0.700858056841935, "grad_norm": 0.07911352813243866, "learning_rate": 1.7905314540823738e-05, "loss": 0.6943994164466858, "step": 3793 }, { "epoch": 0.701042833550831, "grad_norm": 0.08364924788475037, "learning_rate": 1.7904092654612623e-05, "loss": 0.8795351982116699, "step": 3794 }, { "epoch": 0.7012276102597268, "grad_norm": 0.08151282370090485, "learning_rate": 1.7902870453843352e-05, "loss": 0.8242526054382324, "step": 3795 }, { "epoch": 0.7014123869686226, "grad_norm": 0.05963482707738876, "learning_rate": 1.790164793856456e-05, "loss": 0.4913819134235382, "step": 3796 }, { "epoch": 0.7015971636775185, "grad_norm": 0.07553902268409729, "learning_rate": 1.7900425108824907e-05, "loss": 0.5874858498573303, "step": 3797 }, { "epoch": 0.7017819403864143, "grad_norm": 0.06418804824352264, "learning_rate": 1.7899201964673046e-05, "loss": 0.6610527634620667, "step": 3798 }, { "epoch": 0.7019667170953101, "grad_norm": 0.05755390599370003, "learning_rate": 1.7897978506157663e-05, "loss": 0.4823980927467346, "step": 3799 }, { "epoch": 0.7021514938042059, "grad_norm": 0.05311084911227226, "learning_rate": 1.7896754733327443e-05, "loss": 0.44125896692276, "step": 3800 }, { "epoch": 0.7023362705131018, "grad_norm": 0.07113431394100189, "learning_rate": 1.7895530646231092e-05, "loss": 0.733215868473053, "step": 3801 }, { "epoch": 0.7025210472219977, "grad_norm": 0.0652877688407898, "learning_rate": 1.7894306244917322e-05, "loss": 0.6344561576843262, "step": 3802 }, { "epoch": 0.7027058239308935, "grad_norm": 0.051007334142923355, "learning_rate": 1.7893081529434862e-05, "loss": 0.4225222170352936, "step": 3803 }, { "epoch": 0.7028906006397894, "grad_norm": 0.07267943769693375, "learning_rate": 1.7891856499832455e-05, "loss": 0.6625338196754456, "step": 3804 }, { "epoch": 0.7030753773486852, "grad_norm": 0.07777641713619232, "learning_rate": 1.789063115615884e-05, "loss": 0.5510960221290588, "step": 3805 }, { "epoch": 0.703260154057581, "grad_norm": 0.06831711530685425, "learning_rate": 1.78894054984628e-05, "loss": 0.6913149356842041, "step": 3806 }, { "epoch": 0.7034449307664769, "grad_norm": 0.05947549268603325, "learning_rate": 1.7888179526793102e-05, "loss": 0.5916891694068909, "step": 3807 }, { "epoch": 0.7036297074753727, "grad_norm": 0.06820245832204819, "learning_rate": 1.788695324119854e-05, "loss": 0.6104221940040588, "step": 3808 }, { "epoch": 0.7038144841842686, "grad_norm": 0.08294255286455154, "learning_rate": 1.788572664172791e-05, "loss": 0.6323229670524597, "step": 3809 }, { "epoch": 0.7039992608931644, "grad_norm": 0.0709289014339447, "learning_rate": 1.7884499728430034e-05, "loss": 0.6614767909049988, "step": 3810 }, { "epoch": 0.7041840376020603, "grad_norm": 0.07165282219648361, "learning_rate": 1.788327250135374e-05, "loss": 0.5701707601547241, "step": 3811 }, { "epoch": 0.7043688143109561, "grad_norm": 0.07202035188674927, "learning_rate": 1.7882044960547854e-05, "loss": 0.5524270534515381, "step": 3812 }, { "epoch": 0.7045535910198519, "grad_norm": 0.06056154891848564, "learning_rate": 1.7880817106061244e-05, "loss": 0.6596307158470154, "step": 3813 }, { "epoch": 0.7047383677287478, "grad_norm": 0.06145206466317177, "learning_rate": 1.7879588937942765e-05, "loss": 0.549569845199585, "step": 3814 }, { "epoch": 0.7049231444376436, "grad_norm": 0.07081493735313416, "learning_rate": 1.7878360456241302e-05, "loss": 0.5861347913742065, "step": 3815 }, { "epoch": 0.7051079211465395, "grad_norm": 0.06296757608652115, "learning_rate": 1.7877131661005745e-05, "loss": 0.5720245838165283, "step": 3816 }, { "epoch": 0.7052926978554354, "grad_norm": 0.07370515912771225, "learning_rate": 1.787590255228499e-05, "loss": 0.6370769143104553, "step": 3817 }, { "epoch": 0.7054774745643312, "grad_norm": 0.08321458101272583, "learning_rate": 1.787467313012795e-05, "loss": 0.7091321349143982, "step": 3818 }, { "epoch": 0.705662251273227, "grad_norm": 0.06314190477132797, "learning_rate": 1.7873443394583558e-05, "loss": 0.6903059482574463, "step": 3819 }, { "epoch": 0.7058470279821228, "grad_norm": 0.06438859552145004, "learning_rate": 1.787221334570075e-05, "loss": 0.47248029708862305, "step": 3820 }, { "epoch": 0.7060318046910187, "grad_norm": 0.07569018006324768, "learning_rate": 1.787098298352848e-05, "loss": 0.684784471988678, "step": 3821 }, { "epoch": 0.7062165813999145, "grad_norm": 0.06873763352632523, "learning_rate": 1.7869752308115717e-05, "loss": 0.5052890777587891, "step": 3822 }, { "epoch": 0.7064013581088104, "grad_norm": 0.07325165718793869, "learning_rate": 1.786852131951143e-05, "loss": 0.6105488538742065, "step": 3823 }, { "epoch": 0.7065861348177063, "grad_norm": 0.07279790937900543, "learning_rate": 1.7867290017764612e-05, "loss": 0.6297613978385925, "step": 3824 }, { "epoch": 0.7067709115266021, "grad_norm": 0.06293085962533951, "learning_rate": 1.7866058402924266e-05, "loss": 0.4462578594684601, "step": 3825 }, { "epoch": 0.7069556882354979, "grad_norm": 0.09141212701797485, "learning_rate": 1.7864826475039404e-05, "loss": 0.7812224626541138, "step": 3826 }, { "epoch": 0.7071404649443938, "grad_norm": 0.0632830262184143, "learning_rate": 1.7863594234159056e-05, "loss": 0.5227277278900146, "step": 3827 }, { "epoch": 0.7073252416532896, "grad_norm": 0.07047037035226822, "learning_rate": 1.786236168033226e-05, "loss": 0.590502142906189, "step": 3828 }, { "epoch": 0.7075100183621854, "grad_norm": 0.07778255641460419, "learning_rate": 1.7861128813608066e-05, "loss": 0.7174902558326721, "step": 3829 }, { "epoch": 0.7076947950710812, "grad_norm": 0.0773974359035492, "learning_rate": 1.7859895634035536e-05, "loss": 0.5292126536369324, "step": 3830 }, { "epoch": 0.7078795717799772, "grad_norm": 0.06591079384088516, "learning_rate": 1.7858662141663755e-05, "loss": 0.651407778263092, "step": 3831 }, { "epoch": 0.708064348488873, "grad_norm": 0.06398523598909378, "learning_rate": 1.7857428336541805e-05, "loss": 0.5415223836898804, "step": 3832 }, { "epoch": 0.7082491251977688, "grad_norm": 0.0722912922501564, "learning_rate": 1.7856194218718788e-05, "loss": 0.6524251103401184, "step": 3833 }, { "epoch": 0.7084339019066647, "grad_norm": 0.06724940985441208, "learning_rate": 1.7854959788243825e-05, "loss": 0.5232310891151428, "step": 3834 }, { "epoch": 0.7086186786155605, "grad_norm": 0.08662715554237366, "learning_rate": 1.7853725045166036e-05, "loss": 0.7855296730995178, "step": 3835 }, { "epoch": 0.7088034553244563, "grad_norm": 0.06664146482944489, "learning_rate": 1.785248998953456e-05, "loss": 0.47468939423561096, "step": 3836 }, { "epoch": 0.7089882320333522, "grad_norm": 0.08050867915153503, "learning_rate": 1.785125462139855e-05, "loss": 0.8734326362609863, "step": 3837 }, { "epoch": 0.7091730087422481, "grad_norm": 0.07380545884370804, "learning_rate": 1.785001894080717e-05, "loss": 0.6531058549880981, "step": 3838 }, { "epoch": 0.7093577854511439, "grad_norm": 0.07150714844465256, "learning_rate": 1.7848782947809595e-05, "loss": 0.6065245270729065, "step": 3839 }, { "epoch": 0.7095425621600397, "grad_norm": 0.06883050501346588, "learning_rate": 1.7847546642455016e-05, "loss": 0.6219596266746521, "step": 3840 }, { "epoch": 0.7097273388689356, "grad_norm": 0.0754944309592247, "learning_rate": 1.7846310024792634e-05, "loss": 0.6314144134521484, "step": 3841 }, { "epoch": 0.7099121155778314, "grad_norm": 0.07237895578145981, "learning_rate": 1.7845073094871653e-05, "loss": 0.6415191888809204, "step": 3842 }, { "epoch": 0.7100968922867272, "grad_norm": 0.07527688145637512, "learning_rate": 1.7843835852741315e-05, "loss": 0.6790621280670166, "step": 3843 }, { "epoch": 0.7102816689956231, "grad_norm": 0.06775790452957153, "learning_rate": 1.7842598298450845e-05, "loss": 0.6450338363647461, "step": 3844 }, { "epoch": 0.710466445704519, "grad_norm": 0.0949234738945961, "learning_rate": 1.7841360432049503e-05, "loss": 0.7092158198356628, "step": 3845 }, { "epoch": 0.7106512224134148, "grad_norm": 0.06313542276620865, "learning_rate": 1.7840122253586546e-05, "loss": 0.502371609210968, "step": 3846 }, { "epoch": 0.7108359991223107, "grad_norm": 0.07874614000320435, "learning_rate": 1.7838883763111254e-05, "loss": 0.6510919332504272, "step": 3847 }, { "epoch": 0.7110207758312065, "grad_norm": 0.07000324130058289, "learning_rate": 1.783764496067291e-05, "loss": 0.6148480176925659, "step": 3848 }, { "epoch": 0.7112055525401023, "grad_norm": 0.07263125479221344, "learning_rate": 1.783640584632082e-05, "loss": 0.6642104983329773, "step": 3849 }, { "epoch": 0.7113903292489981, "grad_norm": 0.08290033042430878, "learning_rate": 1.783516642010429e-05, "loss": 0.6861293315887451, "step": 3850 }, { "epoch": 0.711575105957894, "grad_norm": 0.0689179077744484, "learning_rate": 1.7833926682072657e-05, "loss": 0.5201602578163147, "step": 3851 }, { "epoch": 0.7117598826667898, "grad_norm": 0.08319676667451859, "learning_rate": 1.7832686632275246e-05, "loss": 0.682870090007782, "step": 3852 }, { "epoch": 0.7119446593756857, "grad_norm": 0.06869909167289734, "learning_rate": 1.7831446270761416e-05, "loss": 0.6011759042739868, "step": 3853 }, { "epoch": 0.7121294360845816, "grad_norm": 0.0635833740234375, "learning_rate": 1.7830205597580522e-05, "loss": 0.5728490352630615, "step": 3854 }, { "epoch": 0.7123142127934774, "grad_norm": 0.07619897276163101, "learning_rate": 1.7828964612781943e-05, "loss": 0.5542641282081604, "step": 3855 }, { "epoch": 0.7124989895023732, "grad_norm": 0.06108187139034271, "learning_rate": 1.7827723316415068e-05, "loss": 0.4776553511619568, "step": 3856 }, { "epoch": 0.712683766211269, "grad_norm": 0.09078365564346313, "learning_rate": 1.7826481708529292e-05, "loss": 0.8141420483589172, "step": 3857 }, { "epoch": 0.7128685429201649, "grad_norm": 0.06589607894420624, "learning_rate": 1.782523978917403e-05, "loss": 0.5259532332420349, "step": 3858 }, { "epoch": 0.7130533196290607, "grad_norm": 0.07421430945396423, "learning_rate": 1.782399755839871e-05, "loss": 0.6141277551651001, "step": 3859 }, { "epoch": 0.7132380963379567, "grad_norm": 0.07042445987462997, "learning_rate": 1.7822755016252765e-05, "loss": 0.637400209903717, "step": 3860 }, { "epoch": 0.7134228730468525, "grad_norm": 0.07997411489486694, "learning_rate": 1.7821512162785643e-05, "loss": 0.6571568846702576, "step": 3861 }, { "epoch": 0.7136076497557483, "grad_norm": 0.08841746300458908, "learning_rate": 1.7820268998046808e-05, "loss": 0.7182778716087341, "step": 3862 }, { "epoch": 0.7137924264646441, "grad_norm": 0.06398257613182068, "learning_rate": 1.7819025522085733e-05, "loss": 0.5413829684257507, "step": 3863 }, { "epoch": 0.71397720317354, "grad_norm": 0.07014451175928116, "learning_rate": 1.7817781734951903e-05, "loss": 0.5261696577072144, "step": 3864 }, { "epoch": 0.7141619798824358, "grad_norm": 0.0701596662402153, "learning_rate": 1.781653763669482e-05, "loss": 0.675118625164032, "step": 3865 }, { "epoch": 0.7143467565913316, "grad_norm": 0.07133375108242035, "learning_rate": 1.7815293227363995e-05, "loss": 0.5645657777786255, "step": 3866 }, { "epoch": 0.7145315333002276, "grad_norm": 0.06540185958147049, "learning_rate": 1.781404850700895e-05, "loss": 0.6554360389709473, "step": 3867 }, { "epoch": 0.7147163100091234, "grad_norm": 0.09399189800024033, "learning_rate": 1.7812803475679224e-05, "loss": 0.8199816942214966, "step": 3868 }, { "epoch": 0.7149010867180192, "grad_norm": 0.06917788833379745, "learning_rate": 1.7811558133424358e-05, "loss": 0.5742474794387817, "step": 3869 }, { "epoch": 0.715085863426915, "grad_norm": 0.0679846853017807, "learning_rate": 1.781031248029392e-05, "loss": 0.5272837281227112, "step": 3870 }, { "epoch": 0.7152706401358109, "grad_norm": 0.09259763360023499, "learning_rate": 1.780906651633748e-05, "loss": 0.7504703402519226, "step": 3871 }, { "epoch": 0.7154554168447067, "grad_norm": 0.06919008493423462, "learning_rate": 1.7807820241604626e-05, "loss": 0.6918947100639343, "step": 3872 }, { "epoch": 0.7156401935536025, "grad_norm": 0.053969644010066986, "learning_rate": 1.780657365614495e-05, "loss": 0.41444283723831177, "step": 3873 }, { "epoch": 0.7158249702624984, "grad_norm": 0.06332223862409592, "learning_rate": 1.780532676000807e-05, "loss": 0.5879743695259094, "step": 3874 }, { "epoch": 0.7160097469713943, "grad_norm": 0.0728738009929657, "learning_rate": 1.7804079553243602e-05, "loss": 0.6047773361206055, "step": 3875 }, { "epoch": 0.7161945236802901, "grad_norm": 0.06611176580190659, "learning_rate": 1.7802832035901186e-05, "loss": 0.5424304604530334, "step": 3876 }, { "epoch": 0.716379300389186, "grad_norm": 0.0637843981385231, "learning_rate": 1.7801584208030464e-05, "loss": 0.5460506677627563, "step": 3877 }, { "epoch": 0.7165640770980818, "grad_norm": 0.06435133516788483, "learning_rate": 1.78003360696811e-05, "loss": 0.5191080570220947, "step": 3878 }, { "epoch": 0.7167488538069776, "grad_norm": 0.0822996124625206, "learning_rate": 1.7799087620902765e-05, "loss": 0.6032326221466064, "step": 3879 }, { "epoch": 0.7169336305158734, "grad_norm": 0.07257374376058578, "learning_rate": 1.779783886174514e-05, "loss": 0.7256016731262207, "step": 3880 }, { "epoch": 0.7171184072247693, "grad_norm": 0.070966936647892, "learning_rate": 1.7796589792257927e-05, "loss": 0.5935529470443726, "step": 3881 }, { "epoch": 0.7173031839336652, "grad_norm": 0.07597895711660385, "learning_rate": 1.7795340412490834e-05, "loss": 0.6139712333679199, "step": 3882 }, { "epoch": 0.717487960642561, "grad_norm": 0.06419572979211807, "learning_rate": 1.779409072249358e-05, "loss": 0.5486287474632263, "step": 3883 }, { "epoch": 0.7176727373514569, "grad_norm": 0.08788354694843292, "learning_rate": 1.7792840722315897e-05, "loss": 0.8828362226486206, "step": 3884 }, { "epoch": 0.7178575140603527, "grad_norm": 0.06075332686305046, "learning_rate": 1.779159041200754e-05, "loss": 0.43247318267822266, "step": 3885 }, { "epoch": 0.7180422907692485, "grad_norm": 0.08744195848703384, "learning_rate": 1.7790339791618258e-05, "loss": 0.5396788716316223, "step": 3886 }, { "epoch": 0.7182270674781444, "grad_norm": 0.07079479843378067, "learning_rate": 1.7789088861197824e-05, "loss": 0.5855565667152405, "step": 3887 }, { "epoch": 0.7184118441870402, "grad_norm": 0.09693542867898941, "learning_rate": 1.778783762079602e-05, "loss": 0.7175348401069641, "step": 3888 }, { "epoch": 0.7185966208959361, "grad_norm": 0.07204362750053406, "learning_rate": 1.778658607046265e-05, "loss": 0.6128134727478027, "step": 3889 }, { "epoch": 0.718781397604832, "grad_norm": 0.06511104851961136, "learning_rate": 1.778533421024751e-05, "loss": 0.6019546389579773, "step": 3890 }, { "epoch": 0.7189661743137278, "grad_norm": 0.07765532284975052, "learning_rate": 1.778408204020043e-05, "loss": 0.6474810838699341, "step": 3891 }, { "epoch": 0.7191509510226236, "grad_norm": 0.09051033109426498, "learning_rate": 1.778282956037124e-05, "loss": 0.7019118070602417, "step": 3892 }, { "epoch": 0.7193357277315194, "grad_norm": 0.07164207100868225, "learning_rate": 1.7781576770809774e-05, "loss": 0.5620560646057129, "step": 3893 }, { "epoch": 0.7195205044404153, "grad_norm": 0.0695822462439537, "learning_rate": 1.7780323671565904e-05, "loss": 0.6145302653312683, "step": 3894 }, { "epoch": 0.7197052811493111, "grad_norm": 0.10266381502151489, "learning_rate": 1.7779070262689493e-05, "loss": 0.8040466904640198, "step": 3895 }, { "epoch": 0.7198900578582069, "grad_norm": 0.06830579787492752, "learning_rate": 1.777781654423042e-05, "loss": 0.5737794637680054, "step": 3896 }, { "epoch": 0.7200748345671029, "grad_norm": 0.08501392602920532, "learning_rate": 1.7776562516238586e-05, "loss": 0.6017941832542419, "step": 3897 }, { "epoch": 0.7202596112759987, "grad_norm": 0.08942998200654984, "learning_rate": 1.7775308178763892e-05, "loss": 0.8681851029396057, "step": 3898 }, { "epoch": 0.7204443879848945, "grad_norm": 0.07187824696302414, "learning_rate": 1.7774053531856258e-05, "loss": 0.6001152396202087, "step": 3899 }, { "epoch": 0.7206291646937903, "grad_norm": 0.07265845686197281, "learning_rate": 1.7772798575565618e-05, "loss": 0.6501057147979736, "step": 3900 }, { "epoch": 0.7208139414026862, "grad_norm": 0.06733600050210953, "learning_rate": 1.777154330994191e-05, "loss": 0.5438884496688843, "step": 3901 }, { "epoch": 0.720998718111582, "grad_norm": 0.07749243080615997, "learning_rate": 1.7770287735035093e-05, "loss": 0.6194239258766174, "step": 3902 }, { "epoch": 0.7211834948204778, "grad_norm": 0.07621680945158005, "learning_rate": 1.7769031850895133e-05, "loss": 0.5735338926315308, "step": 3903 }, { "epoch": 0.7213682715293738, "grad_norm": 0.06601224094629288, "learning_rate": 1.7767775657572014e-05, "loss": 0.5219985842704773, "step": 3904 }, { "epoch": 0.7215530482382696, "grad_norm": 0.08114080131053925, "learning_rate": 1.7766519155115726e-05, "loss": 0.8340771794319153, "step": 3905 }, { "epoch": 0.7217378249471654, "grad_norm": 0.07835365831851959, "learning_rate": 1.776526234357627e-05, "loss": 0.5177896618843079, "step": 3906 }, { "epoch": 0.7219226016560613, "grad_norm": 0.06266766041517258, "learning_rate": 1.7764005223003668e-05, "loss": 0.4608587324619293, "step": 3907 }, { "epoch": 0.7221073783649571, "grad_norm": 0.06603166460990906, "learning_rate": 1.7762747793447953e-05, "loss": 0.6084400415420532, "step": 3908 }, { "epoch": 0.7222921550738529, "grad_norm": 0.07372553646564484, "learning_rate": 1.7761490054959162e-05, "loss": 0.6600576043128967, "step": 3909 }, { "epoch": 0.7224769317827487, "grad_norm": 0.058667220175266266, "learning_rate": 1.7760232007587346e-05, "loss": 0.43612024188041687, "step": 3910 }, { "epoch": 0.7226617084916447, "grad_norm": 0.07343069463968277, "learning_rate": 1.7758973651382573e-05, "loss": 0.6958191394805908, "step": 3911 }, { "epoch": 0.7228464852005405, "grad_norm": 0.0658382996916771, "learning_rate": 1.775771498639493e-05, "loss": 0.6587331295013428, "step": 3912 }, { "epoch": 0.7230312619094363, "grad_norm": 0.05375619977712631, "learning_rate": 1.7756456012674494e-05, "loss": 0.44945162534713745, "step": 3913 }, { "epoch": 0.7232160386183322, "grad_norm": 0.07939761877059937, "learning_rate": 1.775519673027138e-05, "loss": 0.6397998929023743, "step": 3914 }, { "epoch": 0.723400815327228, "grad_norm": 0.06388827413320541, "learning_rate": 1.77539371392357e-05, "loss": 0.4501069486141205, "step": 3915 }, { "epoch": 0.7235855920361238, "grad_norm": 0.08833901584148407, "learning_rate": 1.7752677239617578e-05, "loss": 0.7420762777328491, "step": 3916 }, { "epoch": 0.7237703687450197, "grad_norm": 0.06442126631736755, "learning_rate": 1.7751417031467156e-05, "loss": 0.5256044268608093, "step": 3917 }, { "epoch": 0.7239551454539155, "grad_norm": 0.06660003215074539, "learning_rate": 1.775015651483459e-05, "loss": 0.5712388157844543, "step": 3918 }, { "epoch": 0.7241399221628114, "grad_norm": 0.07039026916027069, "learning_rate": 1.774889568977004e-05, "loss": 0.7009496092796326, "step": 3919 }, { "epoch": 0.7243246988717073, "grad_norm": 0.06107284873723984, "learning_rate": 1.7747634556323687e-05, "loss": 0.6473613977432251, "step": 3920 }, { "epoch": 0.7245094755806031, "grad_norm": 0.11032213270664215, "learning_rate": 1.7746373114545715e-05, "loss": 0.5946314930915833, "step": 3921 }, { "epoch": 0.7246942522894989, "grad_norm": 0.07178107649087906, "learning_rate": 1.7745111364486328e-05, "loss": 0.6469153165817261, "step": 3922 }, { "epoch": 0.7248790289983947, "grad_norm": 0.06041441857814789, "learning_rate": 1.7743849306195744e-05, "loss": 0.4419972598552704, "step": 3923 }, { "epoch": 0.7250638057072906, "grad_norm": 0.08421284705400467, "learning_rate": 1.7742586939724183e-05, "loss": 0.6808865070343018, "step": 3924 }, { "epoch": 0.7252485824161864, "grad_norm": 0.07748471945524216, "learning_rate": 1.7741324265121883e-05, "loss": 0.6303725242614746, "step": 3925 }, { "epoch": 0.7254333591250823, "grad_norm": 0.09201602637767792, "learning_rate": 1.7740061282439097e-05, "loss": 0.7040421366691589, "step": 3926 }, { "epoch": 0.7256181358339782, "grad_norm": 0.07980838418006897, "learning_rate": 1.7738797991726092e-05, "loss": 0.6566523313522339, "step": 3927 }, { "epoch": 0.725802912542874, "grad_norm": 0.06741447001695633, "learning_rate": 1.7737534393033134e-05, "loss": 0.6093820929527283, "step": 3928 }, { "epoch": 0.7259876892517698, "grad_norm": 0.05772562697529793, "learning_rate": 1.773627048641052e-05, "loss": 0.5092942118644714, "step": 3929 }, { "epoch": 0.7261724659606656, "grad_norm": 0.08732646703720093, "learning_rate": 1.773500627190854e-05, "loss": 0.721347451210022, "step": 3930 }, { "epoch": 0.7263572426695615, "grad_norm": 0.07607407867908478, "learning_rate": 1.7733741749577512e-05, "loss": 0.6305980682373047, "step": 3931 }, { "epoch": 0.7265420193784573, "grad_norm": 0.08443769067525864, "learning_rate": 1.7732476919467757e-05, "loss": 0.8407239317893982, "step": 3932 }, { "epoch": 0.7267267960873532, "grad_norm": 0.07998590916395187, "learning_rate": 1.773121178162961e-05, "loss": 0.5579816102981567, "step": 3933 }, { "epoch": 0.7269115727962491, "grad_norm": 0.06412210315465927, "learning_rate": 1.7729946336113428e-05, "loss": 0.5187691450119019, "step": 3934 }, { "epoch": 0.7270963495051449, "grad_norm": 0.06768874824047089, "learning_rate": 1.7728680582969562e-05, "loss": 0.5259274840354919, "step": 3935 }, { "epoch": 0.7272811262140407, "grad_norm": 0.07999744266271591, "learning_rate": 1.7727414522248386e-05, "loss": 0.61149001121521, "step": 3936 }, { "epoch": 0.7274659029229366, "grad_norm": 0.05589217320084572, "learning_rate": 1.7726148154000294e-05, "loss": 0.45100581645965576, "step": 3937 }, { "epoch": 0.7276506796318324, "grad_norm": 0.069762222468853, "learning_rate": 1.772488147827567e-05, "loss": 0.48234400153160095, "step": 3938 }, { "epoch": 0.7278354563407282, "grad_norm": 0.07306203246116638, "learning_rate": 1.772361449512494e-05, "loss": 0.4789789319038391, "step": 3939 }, { "epoch": 0.728020233049624, "grad_norm": 0.07036879658699036, "learning_rate": 1.772234720459851e-05, "loss": 0.7625179886817932, "step": 3940 }, { "epoch": 0.72820500975852, "grad_norm": 0.08836708217859268, "learning_rate": 1.772107960674683e-05, "loss": 0.7655839323997498, "step": 3941 }, { "epoch": 0.7283897864674158, "grad_norm": 0.05918063595890999, "learning_rate": 1.7719811701620327e-05, "loss": 0.49179041385650635, "step": 3942 }, { "epoch": 0.7285745631763116, "grad_norm": 0.08613593131303787, "learning_rate": 1.7718543489269477e-05, "loss": 0.6651601791381836, "step": 3943 }, { "epoch": 0.7287593398852075, "grad_norm": 0.06794585287570953, "learning_rate": 1.771727496974474e-05, "loss": 0.6362688541412354, "step": 3944 }, { "epoch": 0.7289441165941033, "grad_norm": 0.08600226044654846, "learning_rate": 1.771600614309661e-05, "loss": 0.763226330280304, "step": 3945 }, { "epoch": 0.7291288933029991, "grad_norm": 0.08081506937742233, "learning_rate": 1.771473700937557e-05, "loss": 0.6456172466278076, "step": 3946 }, { "epoch": 0.729313670011895, "grad_norm": 0.05206015706062317, "learning_rate": 1.7713467568632136e-05, "loss": 0.47310560941696167, "step": 3947 }, { "epoch": 0.7294984467207909, "grad_norm": 0.07662763446569443, "learning_rate": 1.7712197820916826e-05, "loss": 0.7199443578720093, "step": 3948 }, { "epoch": 0.7296832234296867, "grad_norm": 0.08438249677419662, "learning_rate": 1.7710927766280167e-05, "loss": 0.5828445553779602, "step": 3949 }, { "epoch": 0.7298680001385826, "grad_norm": 0.05719861388206482, "learning_rate": 1.7709657404772712e-05, "loss": 0.48395925760269165, "step": 3950 }, { "epoch": 0.7300527768474784, "grad_norm": 0.0705437883734703, "learning_rate": 1.770838673644501e-05, "loss": 0.5369189977645874, "step": 3951 }, { "epoch": 0.7302375535563742, "grad_norm": 0.07795518636703491, "learning_rate": 1.7707115761347633e-05, "loss": 0.6306718587875366, "step": 3952 }, { "epoch": 0.73042233026527, "grad_norm": 0.06175463646650314, "learning_rate": 1.7705844479531162e-05, "loss": 0.5458230972290039, "step": 3953 }, { "epoch": 0.7306071069741659, "grad_norm": 0.0780644491314888, "learning_rate": 1.770457289104619e-05, "loss": 0.6508029699325562, "step": 3954 }, { "epoch": 0.7307918836830618, "grad_norm": 0.07498148083686829, "learning_rate": 1.770330099594332e-05, "loss": 0.5971976518630981, "step": 3955 }, { "epoch": 0.7309766603919576, "grad_norm": 0.06846655160188675, "learning_rate": 1.7702028794273167e-05, "loss": 0.6009979248046875, "step": 3956 }, { "epoch": 0.7311614371008535, "grad_norm": 0.05756732448935509, "learning_rate": 1.7700756286086372e-05, "loss": 0.5455886125564575, "step": 3957 }, { "epoch": 0.7313462138097493, "grad_norm": 0.08418041467666626, "learning_rate": 1.7699483471433564e-05, "loss": 0.73096764087677, "step": 3958 }, { "epoch": 0.7315309905186451, "grad_norm": 0.07023555040359497, "learning_rate": 1.7698210350365404e-05, "loss": 0.5592522025108337, "step": 3959 }, { "epoch": 0.731715767227541, "grad_norm": 0.0518750362098217, "learning_rate": 1.7696936922932556e-05, "loss": 0.4030129611492157, "step": 3960 }, { "epoch": 0.7319005439364368, "grad_norm": 0.0892823114991188, "learning_rate": 1.7695663189185703e-05, "loss": 0.691656231880188, "step": 3961 }, { "epoch": 0.7320853206453326, "grad_norm": 0.0701933354139328, "learning_rate": 1.7694389149175527e-05, "loss": 0.5611509680747986, "step": 3962 }, { "epoch": 0.7322700973542285, "grad_norm": 0.07196095585823059, "learning_rate": 1.7693114802952736e-05, "loss": 0.5962368249893188, "step": 3963 }, { "epoch": 0.7324548740631244, "grad_norm": 0.06125257909297943, "learning_rate": 1.7691840150568046e-05, "loss": 0.5681639909744263, "step": 3964 }, { "epoch": 0.7326396507720202, "grad_norm": 0.06818356364965439, "learning_rate": 1.7690565192072182e-05, "loss": 0.5990875959396362, "step": 3965 }, { "epoch": 0.732824427480916, "grad_norm": 0.06957458704710007, "learning_rate": 1.7689289927515883e-05, "loss": 0.6563817858695984, "step": 3966 }, { "epoch": 0.7330092041898119, "grad_norm": 0.07935319095849991, "learning_rate": 1.76880143569499e-05, "loss": 0.7299106121063232, "step": 3967 }, { "epoch": 0.7331939808987077, "grad_norm": 0.09141634404659271, "learning_rate": 1.7686738480425004e-05, "loss": 0.8262278437614441, "step": 3968 }, { "epoch": 0.7333787576076035, "grad_norm": 0.06430824100971222, "learning_rate": 1.7685462297991966e-05, "loss": 0.4728889763355255, "step": 3969 }, { "epoch": 0.7335635343164995, "grad_norm": 0.06557998061180115, "learning_rate": 1.7684185809701567e-05, "loss": 0.563641369342804, "step": 3970 }, { "epoch": 0.7337483110253953, "grad_norm": 0.0910504013299942, "learning_rate": 1.7682909015604615e-05, "loss": 0.9313309192657471, "step": 3971 }, { "epoch": 0.7339330877342911, "grad_norm": 0.08911722898483276, "learning_rate": 1.7681631915751922e-05, "loss": 0.6804410815238953, "step": 3972 }, { "epoch": 0.7341178644431869, "grad_norm": 0.07896842062473297, "learning_rate": 1.7680354510194312e-05, "loss": 0.6099547743797302, "step": 3973 }, { "epoch": 0.7343026411520828, "grad_norm": 0.06189596280455589, "learning_rate": 1.767907679898262e-05, "loss": 0.47814786434173584, "step": 3974 }, { "epoch": 0.7344874178609786, "grad_norm": 0.06864321231842041, "learning_rate": 1.76777987821677e-05, "loss": 0.6335147619247437, "step": 3975 }, { "epoch": 0.7346721945698744, "grad_norm": 0.0641375333070755, "learning_rate": 1.7676520459800404e-05, "loss": 0.5671116709709167, "step": 3976 }, { "epoch": 0.7348569712787704, "grad_norm": 0.07548077404499054, "learning_rate": 1.7675241831931612e-05, "loss": 0.6503629684448242, "step": 3977 }, { "epoch": 0.7350417479876662, "grad_norm": 0.07392218708992004, "learning_rate": 1.7673962898612212e-05, "loss": 0.5626558065414429, "step": 3978 }, { "epoch": 0.735226524696562, "grad_norm": 0.08736756443977356, "learning_rate": 1.7672683659893094e-05, "loss": 0.6278281807899475, "step": 3979 }, { "epoch": 0.7354113014054579, "grad_norm": 0.064041368663311, "learning_rate": 1.767140411582517e-05, "loss": 0.5311237573623657, "step": 3980 }, { "epoch": 0.7355960781143537, "grad_norm": 0.07592851668596268, "learning_rate": 1.767012426645936e-05, "loss": 0.6173804402351379, "step": 3981 }, { "epoch": 0.7357808548232495, "grad_norm": 0.07629850506782532, "learning_rate": 1.7668844111846607e-05, "loss": 0.603466808795929, "step": 3982 }, { "epoch": 0.7359656315321453, "grad_norm": 0.07655462622642517, "learning_rate": 1.766756365203785e-05, "loss": 0.6817041039466858, "step": 3983 }, { "epoch": 0.7361504082410412, "grad_norm": 0.07560434937477112, "learning_rate": 1.7666282887084048e-05, "loss": 0.6514676213264465, "step": 3984 }, { "epoch": 0.7363351849499371, "grad_norm": 0.06375480443239212, "learning_rate": 1.766500181703617e-05, "loss": 0.460000604391098, "step": 3985 }, { "epoch": 0.7365199616588329, "grad_norm": 0.07798736542463303, "learning_rate": 1.7663720441945203e-05, "loss": 0.5900716781616211, "step": 3986 }, { "epoch": 0.7367047383677288, "grad_norm": 0.05762708559632301, "learning_rate": 1.7662438761862137e-05, "loss": 0.48273253440856934, "step": 3987 }, { "epoch": 0.7368895150766246, "grad_norm": 0.06853564083576202, "learning_rate": 1.766115677683798e-05, "loss": 0.5477281212806702, "step": 3988 }, { "epoch": 0.7370742917855204, "grad_norm": 0.0676281601190567, "learning_rate": 1.7659874486923753e-05, "loss": 0.682117223739624, "step": 3989 }, { "epoch": 0.7372590684944162, "grad_norm": 0.0906582772731781, "learning_rate": 1.7658591892170485e-05, "loss": 0.7386360168457031, "step": 3990 }, { "epoch": 0.7374438452033121, "grad_norm": 0.08889324963092804, "learning_rate": 1.7657308992629227e-05, "loss": 0.6794676780700684, "step": 3991 }, { "epoch": 0.737628621912208, "grad_norm": 0.056633397936820984, "learning_rate": 1.765602578835102e-05, "loss": 0.4404573440551758, "step": 3992 }, { "epoch": 0.7378133986211038, "grad_norm": 0.08712149411439896, "learning_rate": 1.765474227938694e-05, "loss": 0.6856634616851807, "step": 3993 }, { "epoch": 0.7379981753299997, "grad_norm": 0.05891774967312813, "learning_rate": 1.765345846578807e-05, "loss": 0.39748650789260864, "step": 3994 }, { "epoch": 0.7381829520388955, "grad_norm": 0.09329812973737717, "learning_rate": 1.7652174347605495e-05, "loss": 0.8040505647659302, "step": 3995 }, { "epoch": 0.7383677287477913, "grad_norm": 0.07120615243911743, "learning_rate": 1.7650889924890322e-05, "loss": 0.5254811644554138, "step": 3996 }, { "epoch": 0.7385525054566872, "grad_norm": 0.07867088913917542, "learning_rate": 1.7649605197693666e-05, "loss": 0.6115449666976929, "step": 3997 }, { "epoch": 0.738737282165583, "grad_norm": 0.08392847329378128, "learning_rate": 1.7648320166066657e-05, "loss": 0.7248279452323914, "step": 3998 }, { "epoch": 0.7389220588744789, "grad_norm": 0.07307955622673035, "learning_rate": 1.764703483006043e-05, "loss": 0.5999466776847839, "step": 3999 }, { "epoch": 0.7391068355833748, "grad_norm": 0.08082327246665955, "learning_rate": 1.7645749189726148e-05, "loss": 0.6654618978500366, "step": 4000 }, { "epoch": 0.7391068355833748, "eval_loss": 0.6549291610717773, "eval_runtime": 158.519, "eval_samples_per_second": 114.996, "eval_steps_per_second": 14.377, "step": 4000 }, { "epoch": 0.7392916122922706, "grad_norm": 0.07467007637023926, "learning_rate": 1.7644463245114966e-05, "loss": 0.48813697695732117, "step": 4001 }, { "epoch": 0.7394763890011664, "grad_norm": 0.06314804404973984, "learning_rate": 1.764317699627806e-05, "loss": 0.5541012287139893, "step": 4002 }, { "epoch": 0.7396611657100622, "grad_norm": 0.06194116547703743, "learning_rate": 1.7641890443266626e-05, "loss": 0.611510157585144, "step": 4003 }, { "epoch": 0.7398459424189581, "grad_norm": 0.07432377338409424, "learning_rate": 1.7640603586131858e-05, "loss": 0.5475557446479797, "step": 4004 }, { "epoch": 0.7400307191278539, "grad_norm": 0.08566770702600479, "learning_rate": 1.7639316424924974e-05, "loss": 0.7035894393920898, "step": 4005 }, { "epoch": 0.7402154958367497, "grad_norm": 0.0692681223154068, "learning_rate": 1.7638028959697195e-05, "loss": 0.6889891028404236, "step": 4006 }, { "epoch": 0.7404002725456457, "grad_norm": 0.06803371757268906, "learning_rate": 1.7636741190499762e-05, "loss": 0.5378096103668213, "step": 4007 }, { "epoch": 0.7405850492545415, "grad_norm": 0.06590144336223602, "learning_rate": 1.763545311738392e-05, "loss": 0.6045305728912354, "step": 4008 }, { "epoch": 0.7407698259634373, "grad_norm": 0.07703419029712677, "learning_rate": 1.763416474040093e-05, "loss": 0.6117264032363892, "step": 4009 }, { "epoch": 0.7409546026723332, "grad_norm": 0.0843396931886673, "learning_rate": 1.7632876059602073e-05, "loss": 0.8733839988708496, "step": 4010 }, { "epoch": 0.741139379381229, "grad_norm": 0.07758200168609619, "learning_rate": 1.7631587075038625e-05, "loss": 0.620842695236206, "step": 4011 }, { "epoch": 0.7413241560901248, "grad_norm": 0.07516031712293625, "learning_rate": 1.763029778676189e-05, "loss": 0.6462552547454834, "step": 4012 }, { "epoch": 0.7415089327990206, "grad_norm": 0.06614000350236893, "learning_rate": 1.762900819482317e-05, "loss": 0.5343265533447266, "step": 4013 }, { "epoch": 0.7416937095079166, "grad_norm": 0.0890808179974556, "learning_rate": 1.7627718299273796e-05, "loss": 0.7041956186294556, "step": 4014 }, { "epoch": 0.7418784862168124, "grad_norm": 0.06308693438768387, "learning_rate": 1.76264281001651e-05, "loss": 0.44210219383239746, "step": 4015 }, { "epoch": 0.7420632629257082, "grad_norm": 0.06638413667678833, "learning_rate": 1.762513759754842e-05, "loss": 0.5798364877700806, "step": 4016 }, { "epoch": 0.7422480396346041, "grad_norm": 0.06280551105737686, "learning_rate": 1.7623846791475126e-05, "loss": 0.5701326727867126, "step": 4017 }, { "epoch": 0.7424328163434999, "grad_norm": 0.07918789237737656, "learning_rate": 1.7622555681996577e-05, "loss": 0.6177583336830139, "step": 4018 }, { "epoch": 0.7426175930523957, "grad_norm": 0.06910663098096848, "learning_rate": 1.762126426916416e-05, "loss": 0.5345649123191833, "step": 4019 }, { "epoch": 0.7428023697612915, "grad_norm": 0.0932488888502121, "learning_rate": 1.761997255302927e-05, "loss": 0.6094528436660767, "step": 4020 }, { "epoch": 0.7429871464701875, "grad_norm": 0.0931335985660553, "learning_rate": 1.7618680533643316e-05, "loss": 0.6541128754615784, "step": 4021 }, { "epoch": 0.7431719231790833, "grad_norm": 0.071692556142807, "learning_rate": 1.7617388211057706e-05, "loss": 0.5665780305862427, "step": 4022 }, { "epoch": 0.7433566998879791, "grad_norm": 0.07735349237918854, "learning_rate": 1.7616095585323882e-05, "loss": 0.6871585249900818, "step": 4023 }, { "epoch": 0.743541476596875, "grad_norm": 0.08922318369150162, "learning_rate": 1.7614802656493277e-05, "loss": 0.7715819478034973, "step": 4024 }, { "epoch": 0.7437262533057708, "grad_norm": 0.06489979475736618, "learning_rate": 1.7613509424617353e-05, "loss": 0.568026065826416, "step": 4025 }, { "epoch": 0.7439110300146666, "grad_norm": 0.0566529706120491, "learning_rate": 1.7612215889747574e-05, "loss": 0.36088061332702637, "step": 4026 }, { "epoch": 0.7440958067235625, "grad_norm": 0.0665200874209404, "learning_rate": 1.7610922051935416e-05, "loss": 0.6098876595497131, "step": 4027 }, { "epoch": 0.7442805834324583, "grad_norm": 0.07974401116371155, "learning_rate": 1.760962791123237e-05, "loss": 0.6779783964157104, "step": 4028 }, { "epoch": 0.7444653601413542, "grad_norm": 0.062424369156360626, "learning_rate": 1.7608333467689946e-05, "loss": 0.5455675721168518, "step": 4029 }, { "epoch": 0.74465013685025, "grad_norm": 0.07580641657114029, "learning_rate": 1.7607038721359648e-05, "loss": 0.6508584022521973, "step": 4030 }, { "epoch": 0.7448349135591459, "grad_norm": 0.06872642040252686, "learning_rate": 1.7605743672293016e-05, "loss": 0.6221672296524048, "step": 4031 }, { "epoch": 0.7450196902680417, "grad_norm": 0.07672706991434097, "learning_rate": 1.7604448320541575e-05, "loss": 0.6213904023170471, "step": 4032 }, { "epoch": 0.7452044669769375, "grad_norm": 0.08280938118696213, "learning_rate": 1.760315266615688e-05, "loss": 0.7548631429672241, "step": 4033 }, { "epoch": 0.7453892436858334, "grad_norm": 0.06836125999689102, "learning_rate": 1.76018567091905e-05, "loss": 0.5210031270980835, "step": 4034 }, { "epoch": 0.7455740203947292, "grad_norm": 0.06920409947633743, "learning_rate": 1.7600560449694006e-05, "loss": 0.6825928688049316, "step": 4035 }, { "epoch": 0.7457587971036251, "grad_norm": 0.05258931964635849, "learning_rate": 1.7599263887718984e-05, "loss": 0.44466057419776917, "step": 4036 }, { "epoch": 0.745943573812521, "grad_norm": 0.0675012394785881, "learning_rate": 1.7597967023317035e-05, "loss": 0.5741130113601685, "step": 4037 }, { "epoch": 0.7461283505214168, "grad_norm": 0.07422135025262833, "learning_rate": 1.759666985653977e-05, "loss": 0.7687113285064697, "step": 4038 }, { "epoch": 0.7463131272303126, "grad_norm": 0.06264545023441315, "learning_rate": 1.759537238743881e-05, "loss": 0.5124937891960144, "step": 4039 }, { "epoch": 0.7464979039392085, "grad_norm": 0.04960038885474205, "learning_rate": 1.7594074616065793e-05, "loss": 0.45736101269721985, "step": 4040 }, { "epoch": 0.7466826806481043, "grad_norm": 0.08585156500339508, "learning_rate": 1.7592776542472364e-05, "loss": 0.8170877695083618, "step": 4041 }, { "epoch": 0.7468674573570001, "grad_norm": 0.0781988576054573, "learning_rate": 1.7591478166710184e-05, "loss": 0.6889027953147888, "step": 4042 }, { "epoch": 0.747052234065896, "grad_norm": 0.07258368283510208, "learning_rate": 1.759017948883093e-05, "loss": 0.5686144232749939, "step": 4043 }, { "epoch": 0.7472370107747919, "grad_norm": 0.09401769191026688, "learning_rate": 1.758888050888627e-05, "loss": 0.6536602973937988, "step": 4044 }, { "epoch": 0.7474217874836877, "grad_norm": 0.07565975189208984, "learning_rate": 1.758758122692791e-05, "loss": 0.5427644848823547, "step": 4045 }, { "epoch": 0.7476065641925835, "grad_norm": 0.09071590006351471, "learning_rate": 1.7586281643007558e-05, "loss": 0.8327360153198242, "step": 4046 }, { "epoch": 0.7477913409014794, "grad_norm": 0.05182671546936035, "learning_rate": 1.7584981757176927e-05, "loss": 0.44840091466903687, "step": 4047 }, { "epoch": 0.7479761176103752, "grad_norm": 0.05731528252363205, "learning_rate": 1.758368156948776e-05, "loss": 0.5093470811843872, "step": 4048 }, { "epoch": 0.748160894319271, "grad_norm": 0.07756191492080688, "learning_rate": 1.7582381079991787e-05, "loss": 0.6823296546936035, "step": 4049 }, { "epoch": 0.7483456710281668, "grad_norm": 0.08770789951086044, "learning_rate": 1.758108028874077e-05, "loss": 0.684930145740509, "step": 4050 }, { "epoch": 0.7485304477370628, "grad_norm": 0.07347483932971954, "learning_rate": 1.7579779195786475e-05, "loss": 0.5228387713432312, "step": 4051 }, { "epoch": 0.7487152244459586, "grad_norm": 0.08817595988512039, "learning_rate": 1.7578477801180684e-05, "loss": 0.6824162006378174, "step": 4052 }, { "epoch": 0.7489000011548544, "grad_norm": 0.07642996311187744, "learning_rate": 1.7577176104975188e-05, "loss": 0.5606251358985901, "step": 4053 }, { "epoch": 0.7490847778637503, "grad_norm": 0.06220955774188042, "learning_rate": 1.7575874107221785e-05, "loss": 0.5040315389633179, "step": 4054 }, { "epoch": 0.7492695545726461, "grad_norm": 0.08808313310146332, "learning_rate": 1.7574571807972297e-05, "loss": 1.009043574333191, "step": 4055 }, { "epoch": 0.7494543312815419, "grad_norm": 0.06635929644107819, "learning_rate": 1.7573269207278546e-05, "loss": 0.4607446789741516, "step": 4056 }, { "epoch": 0.7496391079904378, "grad_norm": 0.08293266594409943, "learning_rate": 1.757196630519238e-05, "loss": 0.723304808139801, "step": 4057 }, { "epoch": 0.7498238846993337, "grad_norm": 0.06487040966749191, "learning_rate": 1.7570663101765638e-05, "loss": 0.5701680183410645, "step": 4058 }, { "epoch": 0.7500086614082295, "grad_norm": 0.06392481178045273, "learning_rate": 1.7569359597050193e-05, "loss": 0.5060821175575256, "step": 4059 }, { "epoch": 0.7501934381171254, "grad_norm": 0.0927731916308403, "learning_rate": 1.756805579109792e-05, "loss": 0.7945534586906433, "step": 4060 }, { "epoch": 0.7503782148260212, "grad_norm": 0.075643390417099, "learning_rate": 1.75667516839607e-05, "loss": 0.6051799654960632, "step": 4061 }, { "epoch": 0.750562991534917, "grad_norm": 0.06552645564079285, "learning_rate": 1.756544727569044e-05, "loss": 0.519436240196228, "step": 4062 }, { "epoch": 0.7507477682438128, "grad_norm": 0.053866442292928696, "learning_rate": 1.756414256633904e-05, "loss": 0.44880855083465576, "step": 4063 }, { "epoch": 0.7509325449527087, "grad_norm": 0.06403449922800064, "learning_rate": 1.756283755595844e-05, "loss": 0.5271477103233337, "step": 4064 }, { "epoch": 0.7511173216616046, "grad_norm": 0.07670261710882187, "learning_rate": 1.7561532244600562e-05, "loss": 0.7232034802436829, "step": 4065 }, { "epoch": 0.7513020983705004, "grad_norm": 0.07809510827064514, "learning_rate": 1.7560226632317355e-05, "loss": 0.6339353919029236, "step": 4066 }, { "epoch": 0.7514868750793963, "grad_norm": 0.07474415004253387, "learning_rate": 1.7558920719160788e-05, "loss": 0.515523374080658, "step": 4067 }, { "epoch": 0.7516716517882921, "grad_norm": 0.07664012908935547, "learning_rate": 1.755761450518282e-05, "loss": 0.6550431847572327, "step": 4068 }, { "epoch": 0.7518564284971879, "grad_norm": 0.07359275966882706, "learning_rate": 1.7556307990435445e-05, "loss": 0.6118465662002563, "step": 4069 }, { "epoch": 0.7520412052060838, "grad_norm": 0.048818353563547134, "learning_rate": 1.7555001174970647e-05, "loss": 0.3596686124801636, "step": 4070 }, { "epoch": 0.7522259819149796, "grad_norm": 0.07568147033452988, "learning_rate": 1.755369405884044e-05, "loss": 0.5742887258529663, "step": 4071 }, { "epoch": 0.7524107586238754, "grad_norm": 0.0754503533244133, "learning_rate": 1.7552386642096842e-05, "loss": 0.7414309978485107, "step": 4072 }, { "epoch": 0.7525955353327713, "grad_norm": 0.07180456072092056, "learning_rate": 1.755107892479188e-05, "loss": 0.6643378138542175, "step": 4073 }, { "epoch": 0.7527803120416672, "grad_norm": 0.06921889632940292, "learning_rate": 1.7549770906977612e-05, "loss": 0.6333237886428833, "step": 4074 }, { "epoch": 0.752965088750563, "grad_norm": 0.072540782392025, "learning_rate": 1.7548462588706075e-05, "loss": 0.5915318727493286, "step": 4075 }, { "epoch": 0.7531498654594588, "grad_norm": 0.07446306198835373, "learning_rate": 1.7547153970029343e-05, "loss": 0.5665271878242493, "step": 4076 }, { "epoch": 0.7533346421683547, "grad_norm": 0.07440496236085892, "learning_rate": 1.7545845050999495e-05, "loss": 0.8101900219917297, "step": 4077 }, { "epoch": 0.7535194188772505, "grad_norm": 0.0785202607512474, "learning_rate": 1.7544535831668624e-05, "loss": 0.6516543626785278, "step": 4078 }, { "epoch": 0.7537041955861463, "grad_norm": 0.07105366140604019, "learning_rate": 1.7543226312088828e-05, "loss": 0.5027989745140076, "step": 4079 }, { "epoch": 0.7538889722950423, "grad_norm": 0.06337833404541016, "learning_rate": 1.7541916492312225e-05, "loss": 0.5374016761779785, "step": 4080 }, { "epoch": 0.7540737490039381, "grad_norm": 0.0845162644982338, "learning_rate": 1.7540606372390946e-05, "loss": 0.5916616916656494, "step": 4081 }, { "epoch": 0.7542585257128339, "grad_norm": 0.08202160894870758, "learning_rate": 1.7539295952377117e-05, "loss": 0.5300063490867615, "step": 4082 }, { "epoch": 0.7544433024217297, "grad_norm": 0.06920167058706284, "learning_rate": 1.7537985232322902e-05, "loss": 0.5863104462623596, "step": 4083 }, { "epoch": 0.7546280791306256, "grad_norm": 0.061373304575681686, "learning_rate": 1.7536674212280456e-05, "loss": 0.42102378606796265, "step": 4084 }, { "epoch": 0.7548128558395214, "grad_norm": 0.08496609330177307, "learning_rate": 1.7535362892301953e-05, "loss": 0.6914458870887756, "step": 4085 }, { "epoch": 0.7549976325484172, "grad_norm": 0.08487819880247116, "learning_rate": 1.753405127243959e-05, "loss": 0.6755183935165405, "step": 4086 }, { "epoch": 0.7551824092573132, "grad_norm": 0.08387638628482819, "learning_rate": 1.7532739352745552e-05, "loss": 0.7153843641281128, "step": 4087 }, { "epoch": 0.755367185966209, "grad_norm": 0.07073529064655304, "learning_rate": 1.7531427133272056e-05, "loss": 0.44453296065330505, "step": 4088 }, { "epoch": 0.7555519626751048, "grad_norm": 0.08027850091457367, "learning_rate": 1.753011461407132e-05, "loss": 0.7097273468971252, "step": 4089 }, { "epoch": 0.7557367393840007, "grad_norm": 0.06549596786499023, "learning_rate": 1.752880179519558e-05, "loss": 0.5342280864715576, "step": 4090 }, { "epoch": 0.7559215160928965, "grad_norm": 0.09357139468193054, "learning_rate": 1.752748867669709e-05, "loss": 0.6749565005302429, "step": 4091 }, { "epoch": 0.7561062928017923, "grad_norm": 0.09218194335699081, "learning_rate": 1.7526175258628097e-05, "loss": 0.6712514162063599, "step": 4092 }, { "epoch": 0.7562910695106881, "grad_norm": 0.11231732368469238, "learning_rate": 1.7524861541040878e-05, "loss": 0.705974817276001, "step": 4093 }, { "epoch": 0.756475846219584, "grad_norm": 0.06249980628490448, "learning_rate": 1.7523547523987708e-05, "loss": 0.514185905456543, "step": 4094 }, { "epoch": 0.7566606229284799, "grad_norm": 0.08503606915473938, "learning_rate": 1.7522233207520887e-05, "loss": 0.790868878364563, "step": 4095 }, { "epoch": 0.7568453996373757, "grad_norm": 0.08546585589647293, "learning_rate": 1.7520918591692713e-05, "loss": 0.6644178032875061, "step": 4096 }, { "epoch": 0.7570301763462716, "grad_norm": 0.07207859307527542, "learning_rate": 1.7519603676555517e-05, "loss": 0.5520370006561279, "step": 4097 }, { "epoch": 0.7572149530551674, "grad_norm": 0.07846345752477646, "learning_rate": 1.751828846216162e-05, "loss": 0.6240181922912598, "step": 4098 }, { "epoch": 0.7573997297640632, "grad_norm": 0.07634284347295761, "learning_rate": 1.751697294856336e-05, "loss": 0.6089432239532471, "step": 4099 }, { "epoch": 0.757584506472959, "grad_norm": 0.06646178662776947, "learning_rate": 1.7515657135813095e-05, "loss": 0.5072463154792786, "step": 4100 }, { "epoch": 0.7577692831818549, "grad_norm": 0.05174202099442482, "learning_rate": 1.7514341023963187e-05, "loss": 0.38996005058288574, "step": 4101 }, { "epoch": 0.7579540598907508, "grad_norm": 0.0655844584107399, "learning_rate": 1.7513024613066017e-05, "loss": 0.5721313953399658, "step": 4102 }, { "epoch": 0.7581388365996466, "grad_norm": 0.07744917273521423, "learning_rate": 1.7511707903173975e-05, "loss": 0.6617624163627625, "step": 4103 }, { "epoch": 0.7583236133085425, "grad_norm": 0.07809010148048401, "learning_rate": 1.7510390894339463e-05, "loss": 0.6835892200469971, "step": 4104 }, { "epoch": 0.7585083900174383, "grad_norm": 0.07202986627817154, "learning_rate": 1.7509073586614884e-05, "loss": 0.7185051441192627, "step": 4105 }, { "epoch": 0.7586931667263341, "grad_norm": 0.06993846595287323, "learning_rate": 1.750775598005267e-05, "loss": 0.536699652671814, "step": 4106 }, { "epoch": 0.75887794343523, "grad_norm": 0.08079773187637329, "learning_rate": 1.750643807470526e-05, "loss": 0.5461199879646301, "step": 4107 }, { "epoch": 0.7590627201441258, "grad_norm": 0.0769638791680336, "learning_rate": 1.7505119870625097e-05, "loss": 0.6999402642250061, "step": 4108 }, { "epoch": 0.7592474968530217, "grad_norm": 0.07775267958641052, "learning_rate": 1.7503801367864643e-05, "loss": 0.6964604258537292, "step": 4109 }, { "epoch": 0.7594322735619176, "grad_norm": 0.0595572330057621, "learning_rate": 1.750248256647637e-05, "loss": 0.4696602523326874, "step": 4110 }, { "epoch": 0.7596170502708134, "grad_norm": 0.06406479328870773, "learning_rate": 1.7501163466512764e-05, "loss": 0.4890046715736389, "step": 4111 }, { "epoch": 0.7598018269797092, "grad_norm": 0.06237300857901573, "learning_rate": 1.7499844068026322e-05, "loss": 0.4907143712043762, "step": 4112 }, { "epoch": 0.759986603688605, "grad_norm": 0.05173421651124954, "learning_rate": 1.749852437106955e-05, "loss": 0.45887884497642517, "step": 4113 }, { "epoch": 0.7601713803975009, "grad_norm": 0.07699117809534073, "learning_rate": 1.749720437569497e-05, "loss": 0.659019410610199, "step": 4114 }, { "epoch": 0.7603561571063967, "grad_norm": 0.05027296394109726, "learning_rate": 1.7495884081955106e-05, "loss": 0.3970877528190613, "step": 4115 }, { "epoch": 0.7605409338152925, "grad_norm": 0.06710248440504074, "learning_rate": 1.749456348990251e-05, "loss": 0.596959114074707, "step": 4116 }, { "epoch": 0.7607257105241885, "grad_norm": 0.06048336625099182, "learning_rate": 1.7493242599589733e-05, "loss": 0.4490605592727661, "step": 4117 }, { "epoch": 0.7609104872330843, "grad_norm": 0.07077085226774216, "learning_rate": 1.7491921411069347e-05, "loss": 0.6158462762832642, "step": 4118 }, { "epoch": 0.7610952639419801, "grad_norm": 0.08598323911428452, "learning_rate": 1.7490599924393925e-05, "loss": 0.826266884803772, "step": 4119 }, { "epoch": 0.761280040650876, "grad_norm": 0.07506779581308365, "learning_rate": 1.7489278139616063e-05, "loss": 0.6603870391845703, "step": 4120 }, { "epoch": 0.7614648173597718, "grad_norm": 0.07160505652427673, "learning_rate": 1.748795605678836e-05, "loss": 0.6138166189193726, "step": 4121 }, { "epoch": 0.7616495940686676, "grad_norm": 0.08130978792905807, "learning_rate": 1.7486633675963432e-05, "loss": 0.7139620184898376, "step": 4122 }, { "epoch": 0.7618343707775634, "grad_norm": 0.07921002805233002, "learning_rate": 1.748531099719391e-05, "loss": 0.7038363814353943, "step": 4123 }, { "epoch": 0.7620191474864594, "grad_norm": 0.06836456805467606, "learning_rate": 1.748398802053243e-05, "loss": 0.5928983092308044, "step": 4124 }, { "epoch": 0.7622039241953552, "grad_norm": 0.07586997747421265, "learning_rate": 1.7482664746031637e-05, "loss": 0.6390272974967957, "step": 4125 }, { "epoch": 0.762388700904251, "grad_norm": 0.0682540088891983, "learning_rate": 1.7481341173744198e-05, "loss": 0.6863719820976257, "step": 4126 }, { "epoch": 0.7625734776131469, "grad_norm": 0.07922250777482986, "learning_rate": 1.7480017303722788e-05, "loss": 0.5649644136428833, "step": 4127 }, { "epoch": 0.7627582543220427, "grad_norm": 0.08836416155099869, "learning_rate": 1.747869313602009e-05, "loss": 0.6617914438247681, "step": 4128 }, { "epoch": 0.7629430310309385, "grad_norm": 0.06390947103500366, "learning_rate": 1.74773686706888e-05, "loss": 0.584636390209198, "step": 4129 }, { "epoch": 0.7631278077398344, "grad_norm": 0.0760967954993248, "learning_rate": 1.7476043907781636e-05, "loss": 0.6412030458450317, "step": 4130 }, { "epoch": 0.7633125844487303, "grad_norm": 0.05150337144732475, "learning_rate": 1.747471884735131e-05, "loss": 0.3768945038318634, "step": 4131 }, { "epoch": 0.7634973611576261, "grad_norm": 0.07066693156957626, "learning_rate": 1.7473393489450564e-05, "loss": 0.6143984794616699, "step": 4132 }, { "epoch": 0.763682137866522, "grad_norm": 0.0610533282160759, "learning_rate": 1.7472067834132135e-05, "loss": 0.45592719316482544, "step": 4133 }, { "epoch": 0.7638669145754178, "grad_norm": 0.08170929551124573, "learning_rate": 1.7470741881448784e-05, "loss": 0.6536384224891663, "step": 4134 }, { "epoch": 0.7640516912843136, "grad_norm": 0.071434386074543, "learning_rate": 1.746941563145328e-05, "loss": 0.6187744140625, "step": 4135 }, { "epoch": 0.7642364679932094, "grad_norm": 0.1042235866189003, "learning_rate": 1.74680890841984e-05, "loss": 0.7490211129188538, "step": 4136 }, { "epoch": 0.7644212447021053, "grad_norm": 0.0673421323299408, "learning_rate": 1.7466762239736944e-05, "loss": 0.5491911172866821, "step": 4137 }, { "epoch": 0.7646060214110011, "grad_norm": 0.07412095367908478, "learning_rate": 1.746543509812171e-05, "loss": 0.6123514175415039, "step": 4138 }, { "epoch": 0.764790798119897, "grad_norm": 0.08058208227157593, "learning_rate": 1.746410765940551e-05, "loss": 0.8483732342720032, "step": 4139 }, { "epoch": 0.7649755748287929, "grad_norm": 0.07618124783039093, "learning_rate": 1.7462779923641183e-05, "loss": 0.6350252032279968, "step": 4140 }, { "epoch": 0.7651603515376887, "grad_norm": 0.0847897008061409, "learning_rate": 1.746145189088156e-05, "loss": 0.9960081577301025, "step": 4141 }, { "epoch": 0.7653451282465845, "grad_norm": 0.06631160527467728, "learning_rate": 1.7460123561179496e-05, "loss": 0.6314607262611389, "step": 4142 }, { "epoch": 0.7655299049554803, "grad_norm": 0.07614582031965256, "learning_rate": 1.7458794934587856e-05, "loss": 0.6961138248443604, "step": 4143 }, { "epoch": 0.7657146816643762, "grad_norm": 0.07594437897205353, "learning_rate": 1.745746601115951e-05, "loss": 0.5030292868614197, "step": 4144 }, { "epoch": 0.765899458373272, "grad_norm": 0.08059264719486237, "learning_rate": 1.7456136790947347e-05, "loss": 0.6420890092849731, "step": 4145 }, { "epoch": 0.7660842350821679, "grad_norm": 0.11633842438459396, "learning_rate": 1.7454807274004273e-05, "loss": 0.8226794004440308, "step": 4146 }, { "epoch": 0.7662690117910638, "grad_norm": 0.05782187730073929, "learning_rate": 1.745347746038319e-05, "loss": 0.5326021313667297, "step": 4147 }, { "epoch": 0.7664537884999596, "grad_norm": 0.07117144018411636, "learning_rate": 1.7452147350137024e-05, "loss": 0.6022680401802063, "step": 4148 }, { "epoch": 0.7666385652088554, "grad_norm": 0.0824783444404602, "learning_rate": 1.7450816943318705e-05, "loss": 0.7285429835319519, "step": 4149 }, { "epoch": 0.7668233419177513, "grad_norm": 0.07009123265743256, "learning_rate": 1.7449486239981186e-05, "loss": 0.5529493093490601, "step": 4150 }, { "epoch": 0.7670081186266471, "grad_norm": 0.06966505944728851, "learning_rate": 1.744815524017742e-05, "loss": 0.6751921772956848, "step": 4151 }, { "epoch": 0.7671928953355429, "grad_norm": 0.06169646605849266, "learning_rate": 1.7446823943960374e-05, "loss": 0.5230487585067749, "step": 4152 }, { "epoch": 0.7673776720444389, "grad_norm": 0.06246807053685188, "learning_rate": 1.744549235138304e-05, "loss": 0.4462629556655884, "step": 4153 }, { "epoch": 0.7675624487533347, "grad_norm": 0.09959576278924942, "learning_rate": 1.74441604624984e-05, "loss": 0.9976585507392883, "step": 4154 }, { "epoch": 0.7677472254622305, "grad_norm": 0.09119909256696701, "learning_rate": 1.7442828277359463e-05, "loss": 0.7003543376922607, "step": 4155 }, { "epoch": 0.7679320021711263, "grad_norm": 0.08542408049106598, "learning_rate": 1.7441495796019245e-05, "loss": 0.7313602566719055, "step": 4156 }, { "epoch": 0.7681167788800222, "grad_norm": 0.06858178228139877, "learning_rate": 1.744016301853078e-05, "loss": 0.5413244962692261, "step": 4157 }, { "epoch": 0.768301555588918, "grad_norm": 0.05746445804834366, "learning_rate": 1.74388299449471e-05, "loss": 0.4712941646575928, "step": 4158 }, { "epoch": 0.7684863322978138, "grad_norm": 0.07297435402870178, "learning_rate": 1.7437496575321264e-05, "loss": 0.7246826887130737, "step": 4159 }, { "epoch": 0.7686711090067097, "grad_norm": 0.08480487018823624, "learning_rate": 1.7436162909706335e-05, "loss": 0.9000516533851624, "step": 4160 }, { "epoch": 0.7688558857156056, "grad_norm": 0.07197446376085281, "learning_rate": 1.743482894815538e-05, "loss": 0.6599001288414001, "step": 4161 }, { "epoch": 0.7690406624245014, "grad_norm": 0.10154495388269424, "learning_rate": 1.74334946907215e-05, "loss": 0.8901887536048889, "step": 4162 }, { "epoch": 0.7692254391333972, "grad_norm": 0.0685698539018631, "learning_rate": 1.7432160137457787e-05, "loss": 0.6849889159202576, "step": 4163 }, { "epoch": 0.7694102158422931, "grad_norm": 0.07454440742731094, "learning_rate": 1.743082528841735e-05, "loss": 0.6368915438652039, "step": 4164 }, { "epoch": 0.7695949925511889, "grad_norm": 0.08280269056558609, "learning_rate": 1.7429490143653317e-05, "loss": 0.765697181224823, "step": 4165 }, { "epoch": 0.7697797692600847, "grad_norm": 0.08227710425853729, "learning_rate": 1.742815470321882e-05, "loss": 0.7411954998970032, "step": 4166 }, { "epoch": 0.7699645459689806, "grad_norm": 0.07122933119535446, "learning_rate": 1.7426818967167003e-05, "loss": 0.595898449420929, "step": 4167 }, { "epoch": 0.7701493226778765, "grad_norm": 0.08117000758647919, "learning_rate": 1.742548293555103e-05, "loss": 0.7033901810646057, "step": 4168 }, { "epoch": 0.7703340993867723, "grad_norm": 0.06495106965303421, "learning_rate": 1.7424146608424065e-05, "loss": 0.5481119751930237, "step": 4169 }, { "epoch": 0.7705188760956682, "grad_norm": 0.06746747344732285, "learning_rate": 1.7422809985839292e-05, "loss": 0.49438005685806274, "step": 4170 }, { "epoch": 0.770703652804564, "grad_norm": 0.08098278194665909, "learning_rate": 1.7421473067849906e-05, "loss": 0.6253182291984558, "step": 4171 }, { "epoch": 0.7708884295134598, "grad_norm": 0.07808305323123932, "learning_rate": 1.742013585450911e-05, "loss": 0.6477851867675781, "step": 4172 }, { "epoch": 0.7710732062223556, "grad_norm": 0.08123036473989487, "learning_rate": 1.741879834587012e-05, "loss": 0.6954832673072815, "step": 4173 }, { "epoch": 0.7712579829312515, "grad_norm": 0.08123773336410522, "learning_rate": 1.741746054198617e-05, "loss": 0.8441729545593262, "step": 4174 }, { "epoch": 0.7714427596401474, "grad_norm": 0.05783558264374733, "learning_rate": 1.7416122442910493e-05, "loss": 0.44951534271240234, "step": 4175 }, { "epoch": 0.7716275363490432, "grad_norm": 0.07396768778562546, "learning_rate": 1.741478404869635e-05, "loss": 0.6257613301277161, "step": 4176 }, { "epoch": 0.7718123130579391, "grad_norm": 0.05979736149311066, "learning_rate": 1.7413445359396996e-05, "loss": 0.5418450832366943, "step": 4177 }, { "epoch": 0.7719970897668349, "grad_norm": 0.07243388146162033, "learning_rate": 1.741210637506571e-05, "loss": 0.4934229254722595, "step": 4178 }, { "epoch": 0.7721818664757307, "grad_norm": 0.07115595042705536, "learning_rate": 1.741076709575578e-05, "loss": 0.572291910648346, "step": 4179 }, { "epoch": 0.7723666431846266, "grad_norm": 0.0746389627456665, "learning_rate": 1.7409427521520507e-05, "loss": 0.49469316005706787, "step": 4180 }, { "epoch": 0.7725514198935224, "grad_norm": 0.09076672792434692, "learning_rate": 1.7408087652413197e-05, "loss": 0.7113850116729736, "step": 4181 }, { "epoch": 0.7727361966024182, "grad_norm": 0.08029329776763916, "learning_rate": 1.7406747488487176e-05, "loss": 0.7337382435798645, "step": 4182 }, { "epoch": 0.7729209733113142, "grad_norm": 0.0713610053062439, "learning_rate": 1.740540702979578e-05, "loss": 0.6376339793205261, "step": 4183 }, { "epoch": 0.77310575002021, "grad_norm": 0.07699505239725113, "learning_rate": 1.740406627639235e-05, "loss": 0.714060366153717, "step": 4184 }, { "epoch": 0.7732905267291058, "grad_norm": 0.054786115884780884, "learning_rate": 1.7402725228330247e-05, "loss": 0.4561084508895874, "step": 4185 }, { "epoch": 0.7734753034380016, "grad_norm": 0.08547017723321915, "learning_rate": 1.7401383885662843e-05, "loss": 0.7607282400131226, "step": 4186 }, { "epoch": 0.7736600801468975, "grad_norm": 0.055155374109745026, "learning_rate": 1.7400042248443513e-05, "loss": 0.4208586513996124, "step": 4187 }, { "epoch": 0.7738448568557933, "grad_norm": 0.08057721704244614, "learning_rate": 1.7398700316725653e-05, "loss": 0.7284572124481201, "step": 4188 }, { "epoch": 0.7740296335646891, "grad_norm": 0.07119718194007874, "learning_rate": 1.739735809056267e-05, "loss": 0.5601897835731506, "step": 4189 }, { "epoch": 0.7742144102735851, "grad_norm": 0.0902874544262886, "learning_rate": 1.7396015570007978e-05, "loss": 0.7009224891662598, "step": 4190 }, { "epoch": 0.7743991869824809, "grad_norm": 0.07109608501195908, "learning_rate": 1.7394672755115003e-05, "loss": 0.6656509637832642, "step": 4191 }, { "epoch": 0.7745839636913767, "grad_norm": 0.07586213946342468, "learning_rate": 1.739332964593719e-05, "loss": 0.6752843260765076, "step": 4192 }, { "epoch": 0.7747687404002725, "grad_norm": 0.0882461667060852, "learning_rate": 1.739198624252799e-05, "loss": 0.7218917608261108, "step": 4193 }, { "epoch": 0.7749535171091684, "grad_norm": 0.07135339826345444, "learning_rate": 1.739064254494086e-05, "loss": 0.7736853361129761, "step": 4194 }, { "epoch": 0.7751382938180642, "grad_norm": 0.07494081556797028, "learning_rate": 1.738929855322928e-05, "loss": 0.7577793002128601, "step": 4195 }, { "epoch": 0.77532307052696, "grad_norm": 0.0696171447634697, "learning_rate": 1.7387954267446737e-05, "loss": 0.5817857980728149, "step": 4196 }, { "epoch": 0.775507847235856, "grad_norm": 0.059537775814533234, "learning_rate": 1.7386609687646726e-05, "loss": 0.5929745435714722, "step": 4197 }, { "epoch": 0.7756926239447518, "grad_norm": 0.06773867458105087, "learning_rate": 1.738526481388276e-05, "loss": 0.5150438547134399, "step": 4198 }, { "epoch": 0.7758774006536476, "grad_norm": 0.07512631267309189, "learning_rate": 1.7383919646208364e-05, "loss": 0.6949543356895447, "step": 4199 }, { "epoch": 0.7760621773625435, "grad_norm": 0.0708075538277626, "learning_rate": 1.7382574184677063e-05, "loss": 0.5188945531845093, "step": 4200 }, { "epoch": 0.7762469540714393, "grad_norm": 0.06188984587788582, "learning_rate": 1.7381228429342406e-05, "loss": 0.5236637592315674, "step": 4201 }, { "epoch": 0.7764317307803351, "grad_norm": 0.06864194571971893, "learning_rate": 1.7379882380257952e-05, "loss": 0.5545336008071899, "step": 4202 }, { "epoch": 0.776616507489231, "grad_norm": 0.08061398565769196, "learning_rate": 1.7378536037477266e-05, "loss": 0.6850088238716125, "step": 4203 }, { "epoch": 0.7768012841981268, "grad_norm": 0.09009570628404617, "learning_rate": 1.7377189401053933e-05, "loss": 0.6856690049171448, "step": 4204 }, { "epoch": 0.7769860609070227, "grad_norm": 0.07767023891210556, "learning_rate": 1.7375842471041543e-05, "loss": 0.7038486003875732, "step": 4205 }, { "epoch": 0.7771708376159185, "grad_norm": 0.07596709579229355, "learning_rate": 1.7374495247493694e-05, "loss": 0.7392452359199524, "step": 4206 }, { "epoch": 0.7773556143248144, "grad_norm": 0.06344443559646606, "learning_rate": 1.737314773046401e-05, "loss": 0.4634053409099579, "step": 4207 }, { "epoch": 0.7775403910337102, "grad_norm": 0.06472158432006836, "learning_rate": 1.737179992000611e-05, "loss": 0.5714576244354248, "step": 4208 }, { "epoch": 0.777725167742606, "grad_norm": 0.07439829409122467, "learning_rate": 1.737045181617364e-05, "loss": 0.6813909411430359, "step": 4209 }, { "epoch": 0.7779099444515019, "grad_norm": 0.08288738131523132, "learning_rate": 1.7369103419020244e-05, "loss": 0.6286230683326721, "step": 4210 }, { "epoch": 0.7780947211603977, "grad_norm": 0.08833064138889313, "learning_rate": 1.7367754728599592e-05, "loss": 0.7476876378059387, "step": 4211 }, { "epoch": 0.7782794978692936, "grad_norm": 0.07594143599271774, "learning_rate": 1.736640574496535e-05, "loss": 0.6591677665710449, "step": 4212 }, { "epoch": 0.7784642745781895, "grad_norm": 0.08570119738578796, "learning_rate": 1.7365056468171204e-05, "loss": 0.6744270920753479, "step": 4213 }, { "epoch": 0.7786490512870853, "grad_norm": 0.08634412288665771, "learning_rate": 1.7363706898270852e-05, "loss": 0.6466952562332153, "step": 4214 }, { "epoch": 0.7788338279959811, "grad_norm": 0.07181413471698761, "learning_rate": 1.736235703531801e-05, "loss": 0.6132568120956421, "step": 4215 }, { "epoch": 0.7790186047048769, "grad_norm": 0.072494275867939, "learning_rate": 1.7361006879366385e-05, "loss": 0.6853365302085876, "step": 4216 }, { "epoch": 0.7792033814137728, "grad_norm": 0.07777240872383118, "learning_rate": 1.7359656430469722e-05, "loss": 0.5434789657592773, "step": 4217 }, { "epoch": 0.7793881581226686, "grad_norm": 0.07259754091501236, "learning_rate": 1.7358305688681754e-05, "loss": 0.6910667419433594, "step": 4218 }, { "epoch": 0.7795729348315645, "grad_norm": 0.06869952380657196, "learning_rate": 1.735695465405624e-05, "loss": 0.643589973449707, "step": 4219 }, { "epoch": 0.7797577115404604, "grad_norm": 0.06629443913698196, "learning_rate": 1.7355603326646952e-05, "loss": 0.557184100151062, "step": 4220 }, { "epoch": 0.7799424882493562, "grad_norm": 0.0654207244515419, "learning_rate": 1.7354251706507657e-05, "loss": 0.6412302255630493, "step": 4221 }, { "epoch": 0.780127264958252, "grad_norm": 0.08161873370409012, "learning_rate": 1.735289979369216e-05, "loss": 0.7459937334060669, "step": 4222 }, { "epoch": 0.7803120416671478, "grad_norm": 0.07101137936115265, "learning_rate": 1.7351547588254255e-05, "loss": 0.5612960457801819, "step": 4223 }, { "epoch": 0.7804968183760437, "grad_norm": 0.07463975250720978, "learning_rate": 1.7350195090247754e-05, "loss": 0.6693721413612366, "step": 4224 }, { "epoch": 0.7806815950849395, "grad_norm": 0.08101318031549454, "learning_rate": 1.734884229972648e-05, "loss": 0.6169843077659607, "step": 4225 }, { "epoch": 0.7808663717938353, "grad_norm": 0.08010120689868927, "learning_rate": 1.734748921674428e-05, "loss": 0.6786614656448364, "step": 4226 }, { "epoch": 0.7810511485027313, "grad_norm": 0.06889232993125916, "learning_rate": 1.7346135841354993e-05, "loss": 0.5372576117515564, "step": 4227 }, { "epoch": 0.7812359252116271, "grad_norm": 0.07085257768630981, "learning_rate": 1.7344782173612485e-05, "loss": 0.5229369401931763, "step": 4228 }, { "epoch": 0.7814207019205229, "grad_norm": 0.08060770481824875, "learning_rate": 1.7343428213570624e-05, "loss": 0.7314550280570984, "step": 4229 }, { "epoch": 0.7816054786294188, "grad_norm": 0.08209926635026932, "learning_rate": 1.7342073961283293e-05, "loss": 0.7790261507034302, "step": 4230 }, { "epoch": 0.7817902553383146, "grad_norm": 0.08321072161197662, "learning_rate": 1.7340719416804395e-05, "loss": 0.7635213136672974, "step": 4231 }, { "epoch": 0.7819750320472104, "grad_norm": 0.08907254040241241, "learning_rate": 1.7339364580187825e-05, "loss": 0.7458104491233826, "step": 4232 }, { "epoch": 0.7821598087561062, "grad_norm": 0.0697903037071228, "learning_rate": 1.733800945148751e-05, "loss": 0.6363630890846252, "step": 4233 }, { "epoch": 0.7823445854650022, "grad_norm": 0.06094703823328018, "learning_rate": 1.7336654030757373e-05, "loss": 0.5128779411315918, "step": 4234 }, { "epoch": 0.782529362173898, "grad_norm": 0.07650782912969589, "learning_rate": 1.7335298318051362e-05, "loss": 0.6506317853927612, "step": 4235 }, { "epoch": 0.7827141388827938, "grad_norm": 0.057419292628765106, "learning_rate": 1.7333942313423426e-05, "loss": 0.3510390520095825, "step": 4236 }, { "epoch": 0.7828989155916897, "grad_norm": 0.06217406317591667, "learning_rate": 1.733258601692753e-05, "loss": 0.4840368330478668, "step": 4237 }, { "epoch": 0.7830836923005855, "grad_norm": 0.08508466929197311, "learning_rate": 1.7331229428617652e-05, "loss": 0.7261524200439453, "step": 4238 }, { "epoch": 0.7832684690094813, "grad_norm": 0.06267179548740387, "learning_rate": 1.7329872548547778e-05, "loss": 0.591058075428009, "step": 4239 }, { "epoch": 0.7834532457183772, "grad_norm": 0.06621445715427399, "learning_rate": 1.732851537677191e-05, "loss": 0.5970615744590759, "step": 4240 }, { "epoch": 0.7836380224272731, "grad_norm": 0.05005696043372154, "learning_rate": 1.7327157913344058e-05, "loss": 0.3451262414455414, "step": 4241 }, { "epoch": 0.7838227991361689, "grad_norm": 0.0736042931675911, "learning_rate": 1.7325800158318243e-05, "loss": 0.6042583584785461, "step": 4242 }, { "epoch": 0.7840075758450648, "grad_norm": 0.05432210862636566, "learning_rate": 1.7324442111748506e-05, "loss": 0.38885262608528137, "step": 4243 }, { "epoch": 0.7841923525539606, "grad_norm": 0.08645527064800262, "learning_rate": 1.7323083773688883e-05, "loss": 0.771323561668396, "step": 4244 }, { "epoch": 0.7843771292628564, "grad_norm": 0.05179852247238159, "learning_rate": 1.732172514419344e-05, "loss": 0.4171346127986908, "step": 4245 }, { "epoch": 0.7845619059717522, "grad_norm": 0.07013101130723953, "learning_rate": 1.732036622331624e-05, "loss": 0.6128926873207092, "step": 4246 }, { "epoch": 0.7847466826806481, "grad_norm": 0.06737525761127472, "learning_rate": 1.7319007011111372e-05, "loss": 0.7678019404411316, "step": 4247 }, { "epoch": 0.7849314593895439, "grad_norm": 0.05731862410902977, "learning_rate": 1.7317647507632917e-05, "loss": 0.4066280424594879, "step": 4248 }, { "epoch": 0.7851162360984398, "grad_norm": 0.048538047820329666, "learning_rate": 1.7316287712934987e-05, "loss": 0.4312340021133423, "step": 4249 }, { "epoch": 0.7853010128073357, "grad_norm": 0.06966068595647812, "learning_rate": 1.7314927627071697e-05, "loss": 0.5768951773643494, "step": 4250 }, { "epoch": 0.7854857895162315, "grad_norm": 0.07969526946544647, "learning_rate": 1.7313567250097173e-05, "loss": 0.6721000671386719, "step": 4251 }, { "epoch": 0.7856705662251273, "grad_norm": 0.06059734523296356, "learning_rate": 1.7312206582065557e-05, "loss": 0.5634155869483948, "step": 4252 }, { "epoch": 0.7858553429340231, "grad_norm": 0.07579533755779266, "learning_rate": 1.7310845623030988e-05, "loss": 0.6615477204322815, "step": 4253 }, { "epoch": 0.786040119642919, "grad_norm": 0.08253277838230133, "learning_rate": 1.7309484373047642e-05, "loss": 0.7589923739433289, "step": 4254 }, { "epoch": 0.7862248963518148, "grad_norm": 0.07874087989330292, "learning_rate": 1.7308122832169685e-05, "loss": 0.58607417345047, "step": 4255 }, { "epoch": 0.7864096730607107, "grad_norm": 0.06461337208747864, "learning_rate": 1.7306761000451304e-05, "loss": 0.49482282996177673, "step": 4256 }, { "epoch": 0.7865944497696066, "grad_norm": 0.07468587160110474, "learning_rate": 1.7305398877946692e-05, "loss": 0.5755637288093567, "step": 4257 }, { "epoch": 0.7867792264785024, "grad_norm": 0.06519705057144165, "learning_rate": 1.7304036464710065e-05, "loss": 0.5765430927276611, "step": 4258 }, { "epoch": 0.7869640031873982, "grad_norm": 0.07528979331254959, "learning_rate": 1.7302673760795638e-05, "loss": 0.6647313833236694, "step": 4259 }, { "epoch": 0.7871487798962941, "grad_norm": 0.06806044280529022, "learning_rate": 1.7301310766257636e-05, "loss": 0.6607141494750977, "step": 4260 }, { "epoch": 0.7873335566051899, "grad_norm": 0.06762052327394485, "learning_rate": 1.7299947481150315e-05, "loss": 0.5975756049156189, "step": 4261 }, { "epoch": 0.7875183333140857, "grad_norm": 0.062473200261592865, "learning_rate": 1.729858390552792e-05, "loss": 0.5687182545661926, "step": 4262 }, { "epoch": 0.7877031100229817, "grad_norm": 0.06199536472558975, "learning_rate": 1.7297220039444717e-05, "loss": 0.4761790335178375, "step": 4263 }, { "epoch": 0.7878878867318775, "grad_norm": 0.09022694826126099, "learning_rate": 1.7295855882954993e-05, "loss": 0.7738853693008423, "step": 4264 }, { "epoch": 0.7880726634407733, "grad_norm": 0.10750069469213486, "learning_rate": 1.7294491436113026e-05, "loss": 0.875851035118103, "step": 4265 }, { "epoch": 0.7882574401496691, "grad_norm": 0.07180424779653549, "learning_rate": 1.7293126698973123e-05, "loss": 0.6548527479171753, "step": 4266 }, { "epoch": 0.788442216858565, "grad_norm": 0.06557504087686539, "learning_rate": 1.7291761671589594e-05, "loss": 0.588817298412323, "step": 4267 }, { "epoch": 0.7886269935674608, "grad_norm": 0.09686272591352463, "learning_rate": 1.7290396354016762e-05, "loss": 0.9058108925819397, "step": 4268 }, { "epoch": 0.7888117702763566, "grad_norm": 0.06769488006830215, "learning_rate": 1.7289030746308965e-05, "loss": 0.6059497594833374, "step": 4269 }, { "epoch": 0.7889965469852525, "grad_norm": 0.07028723508119583, "learning_rate": 1.7287664848520553e-05, "loss": 0.562899649143219, "step": 4270 }, { "epoch": 0.7891813236941484, "grad_norm": 0.07883322983980179, "learning_rate": 1.7286298660705877e-05, "loss": 0.6428919434547424, "step": 4271 }, { "epoch": 0.7893661004030442, "grad_norm": 0.06248297542333603, "learning_rate": 1.7284932182919308e-05, "loss": 0.5069523453712463, "step": 4272 }, { "epoch": 0.78955087711194, "grad_norm": 0.07720385491847992, "learning_rate": 1.728356541521523e-05, "loss": 0.6113148927688599, "step": 4273 }, { "epoch": 0.7897356538208359, "grad_norm": 0.08412636816501617, "learning_rate": 1.728219835764804e-05, "loss": 0.6969552636146545, "step": 4274 }, { "epoch": 0.7899204305297317, "grad_norm": 0.06477232277393341, "learning_rate": 1.7280831010272135e-05, "loss": 0.5412000417709351, "step": 4275 }, { "epoch": 0.7901052072386275, "grad_norm": 0.07632836699485779, "learning_rate": 1.7279463373141935e-05, "loss": 0.5482943058013916, "step": 4276 }, { "epoch": 0.7902899839475234, "grad_norm": 0.0716867446899414, "learning_rate": 1.7278095446311868e-05, "loss": 0.5588719844818115, "step": 4277 }, { "epoch": 0.7904747606564193, "grad_norm": 0.09096336364746094, "learning_rate": 1.7276727229836374e-05, "loss": 0.7104210257530212, "step": 4278 }, { "epoch": 0.7906595373653151, "grad_norm": 0.07804781198501587, "learning_rate": 1.72753587237699e-05, "loss": 0.717307448387146, "step": 4279 }, { "epoch": 0.790844314074211, "grad_norm": 0.056012026965618134, "learning_rate": 1.7273989928166907e-05, "loss": 0.5214253664016724, "step": 4280 }, { "epoch": 0.7910290907831068, "grad_norm": 0.07080499827861786, "learning_rate": 1.7272620843081877e-05, "loss": 0.5698209404945374, "step": 4281 }, { "epoch": 0.7912138674920026, "grad_norm": 0.0885167047381401, "learning_rate": 1.727125146856929e-05, "loss": 0.729579508304596, "step": 4282 }, { "epoch": 0.7913986442008984, "grad_norm": 0.06951869279146194, "learning_rate": 1.7269881804683645e-05, "loss": 0.4667820334434509, "step": 4283 }, { "epoch": 0.7915834209097943, "grad_norm": 0.06312818080186844, "learning_rate": 1.7268511851479446e-05, "loss": 0.5823342204093933, "step": 4284 }, { "epoch": 0.7917681976186902, "grad_norm": 0.057776253670454025, "learning_rate": 1.7267141609011215e-05, "loss": 0.5697093605995178, "step": 4285 }, { "epoch": 0.791952974327586, "grad_norm": 0.08048953860998154, "learning_rate": 1.7265771077333485e-05, "loss": 0.7026470899581909, "step": 4286 }, { "epoch": 0.7921377510364819, "grad_norm": 0.06322105973958969, "learning_rate": 1.72644002565008e-05, "loss": 0.5684886574745178, "step": 4287 }, { "epoch": 0.7923225277453777, "grad_norm": 0.07667342573404312, "learning_rate": 1.7263029146567708e-05, "loss": 0.6792047619819641, "step": 4288 }, { "epoch": 0.7925073044542735, "grad_norm": 0.06281965970993042, "learning_rate": 1.7261657747588782e-05, "loss": 0.49382346868515015, "step": 4289 }, { "epoch": 0.7926920811631694, "grad_norm": 0.05980212613940239, "learning_rate": 1.7260286059618597e-05, "loss": 0.5538467764854431, "step": 4290 }, { "epoch": 0.7928768578720652, "grad_norm": 0.0731351226568222, "learning_rate": 1.725891408271174e-05, "loss": 0.4768342673778534, "step": 4291 }, { "epoch": 0.793061634580961, "grad_norm": 0.05807039886713028, "learning_rate": 1.725754181692281e-05, "loss": 0.43910741806030273, "step": 4292 }, { "epoch": 0.793246411289857, "grad_norm": 0.0727703794836998, "learning_rate": 1.7256169262306427e-05, "loss": 0.6913385391235352, "step": 4293 }, { "epoch": 0.7934311879987528, "grad_norm": 0.0621478408575058, "learning_rate": 1.725479641891721e-05, "loss": 0.5831829309463501, "step": 4294 }, { "epoch": 0.7936159647076486, "grad_norm": 0.09093275666236877, "learning_rate": 1.7253423286809784e-05, "loss": 0.7480353713035583, "step": 4295 }, { "epoch": 0.7938007414165444, "grad_norm": 0.07408110052347183, "learning_rate": 1.7252049866038812e-05, "loss": 0.7685124278068542, "step": 4296 }, { "epoch": 0.7939855181254403, "grad_norm": 0.08359529078006744, "learning_rate": 1.7250676156658942e-05, "loss": 0.8137569427490234, "step": 4297 }, { "epoch": 0.7941702948343361, "grad_norm": 0.07526369392871857, "learning_rate": 1.7249302158724843e-05, "loss": 0.6307947039604187, "step": 4298 }, { "epoch": 0.7943550715432319, "grad_norm": 0.08648904412984848, "learning_rate": 1.72479278722912e-05, "loss": 0.6008473038673401, "step": 4299 }, { "epoch": 0.7945398482521279, "grad_norm": 0.07652433216571808, "learning_rate": 1.7246553297412705e-05, "loss": 0.6652103662490845, "step": 4300 }, { "epoch": 0.7947246249610237, "grad_norm": 0.06447356194257736, "learning_rate": 1.7245178434144063e-05, "loss": 0.5354271531105042, "step": 4301 }, { "epoch": 0.7949094016699195, "grad_norm": 0.08572579175233841, "learning_rate": 1.724380328253998e-05, "loss": 0.5951279997825623, "step": 4302 }, { "epoch": 0.7950941783788154, "grad_norm": 0.06310103088617325, "learning_rate": 1.7242427842655193e-05, "loss": 0.49456357955932617, "step": 4303 }, { "epoch": 0.7952789550877112, "grad_norm": 0.07046617567539215, "learning_rate": 1.7241052114544434e-05, "loss": 0.6634379625320435, "step": 4304 }, { "epoch": 0.795463731796607, "grad_norm": 0.06549013406038284, "learning_rate": 1.7239676098262457e-05, "loss": 0.5416505336761475, "step": 4305 }, { "epoch": 0.7956485085055028, "grad_norm": 0.06831902265548706, "learning_rate": 1.7238299793864023e-05, "loss": 0.5576737523078918, "step": 4306 }, { "epoch": 0.7958332852143988, "grad_norm": 0.06174025312066078, "learning_rate": 1.72369232014039e-05, "loss": 0.5236870050430298, "step": 4307 }, { "epoch": 0.7960180619232946, "grad_norm": 0.0703766718506813, "learning_rate": 1.7235546320936874e-05, "loss": 0.5725675821304321, "step": 4308 }, { "epoch": 0.7962028386321904, "grad_norm": 0.06023186445236206, "learning_rate": 1.7234169152517742e-05, "loss": 0.4951670169830322, "step": 4309 }, { "epoch": 0.7963876153410863, "grad_norm": 0.06425395607948303, "learning_rate": 1.7232791696201313e-05, "loss": 0.5032260417938232, "step": 4310 }, { "epoch": 0.7965723920499821, "grad_norm": 0.06461165845394135, "learning_rate": 1.72314139520424e-05, "loss": 0.4951946437358856, "step": 4311 }, { "epoch": 0.7967571687588779, "grad_norm": 0.07528678327798843, "learning_rate": 1.723003592009584e-05, "loss": 0.6738261580467224, "step": 4312 }, { "epoch": 0.7969419454677737, "grad_norm": 0.08563785254955292, "learning_rate": 1.7228657600416468e-05, "loss": 0.8475391864776611, "step": 4313 }, { "epoch": 0.7971267221766696, "grad_norm": 0.07527659833431244, "learning_rate": 1.722727899305914e-05, "loss": 0.571058988571167, "step": 4314 }, { "epoch": 0.7973114988855655, "grad_norm": 0.06086825951933861, "learning_rate": 1.7225900098078718e-05, "loss": 0.5177538990974426, "step": 4315 }, { "epoch": 0.7974962755944613, "grad_norm": 0.09399010986089706, "learning_rate": 1.722452091553008e-05, "loss": 0.6054516434669495, "step": 4316 }, { "epoch": 0.7976810523033572, "grad_norm": 0.07653100788593292, "learning_rate": 1.7223141445468112e-05, "loss": 0.5676854252815247, "step": 4317 }, { "epoch": 0.797865829012253, "grad_norm": 0.06869052350521088, "learning_rate": 1.7221761687947713e-05, "loss": 0.6330968141555786, "step": 4318 }, { "epoch": 0.7980506057211488, "grad_norm": 0.0903267040848732, "learning_rate": 1.7220381643023795e-05, "loss": 0.8275489807128906, "step": 4319 }, { "epoch": 0.7982353824300447, "grad_norm": 0.06249494478106499, "learning_rate": 1.721900131075127e-05, "loss": 0.5433062314987183, "step": 4320 }, { "epoch": 0.7984201591389405, "grad_norm": 0.0794859305024147, "learning_rate": 1.7217620691185083e-05, "loss": 0.5860161781311035, "step": 4321 }, { "epoch": 0.7986049358478364, "grad_norm": 0.05575048550963402, "learning_rate": 1.7216239784380176e-05, "loss": 0.47690248489379883, "step": 4322 }, { "epoch": 0.7987897125567323, "grad_norm": 0.07652156054973602, "learning_rate": 1.72148585903915e-05, "loss": 0.6837499737739563, "step": 4323 }, { "epoch": 0.7989744892656281, "grad_norm": 0.06289152801036835, "learning_rate": 1.7213477109274024e-05, "loss": 0.5508039593696594, "step": 4324 }, { "epoch": 0.7991592659745239, "grad_norm": 0.0741892084479332, "learning_rate": 1.721209534108273e-05, "loss": 0.6964645385742188, "step": 4325 }, { "epoch": 0.7993440426834197, "grad_norm": 0.07051388919353485, "learning_rate": 1.72107132858726e-05, "loss": 0.6231642961502075, "step": 4326 }, { "epoch": 0.7995288193923156, "grad_norm": 0.07337725907564163, "learning_rate": 1.7209330943698644e-05, "loss": 0.5993421673774719, "step": 4327 }, { "epoch": 0.7997135961012114, "grad_norm": 0.07059329003095627, "learning_rate": 1.720794831461587e-05, "loss": 0.6256791949272156, "step": 4328 }, { "epoch": 0.7998983728101073, "grad_norm": 0.07519291341304779, "learning_rate": 1.7206565398679306e-05, "loss": 0.5259360074996948, "step": 4329 }, { "epoch": 0.8000831495190032, "grad_norm": 0.06302302330732346, "learning_rate": 1.7205182195943983e-05, "loss": 0.4448622763156891, "step": 4330 }, { "epoch": 0.800267926227899, "grad_norm": 0.08149128407239914, "learning_rate": 1.720379870646495e-05, "loss": 0.47039729356765747, "step": 4331 }, { "epoch": 0.8004527029367948, "grad_norm": 0.0848081186413765, "learning_rate": 1.720241493029727e-05, "loss": 0.7555239200592041, "step": 4332 }, { "epoch": 0.8006374796456907, "grad_norm": 0.05285144969820976, "learning_rate": 1.7201030867496005e-05, "loss": 0.41368257999420166, "step": 4333 }, { "epoch": 0.8008222563545865, "grad_norm": 0.07442737370729446, "learning_rate": 1.7199646518116243e-05, "loss": 0.7890750765800476, "step": 4334 }, { "epoch": 0.8010070330634823, "grad_norm": 0.06920800358057022, "learning_rate": 1.7198261882213073e-05, "loss": 0.5713163018226624, "step": 4335 }, { "epoch": 0.8011918097723781, "grad_norm": 0.07392875850200653, "learning_rate": 1.7196876959841607e-05, "loss": 0.5265414118766785, "step": 4336 }, { "epoch": 0.8013765864812741, "grad_norm": 0.07111220806837082, "learning_rate": 1.719549175105695e-05, "loss": 0.6268044710159302, "step": 4337 }, { "epoch": 0.8015613631901699, "grad_norm": 0.06094978004693985, "learning_rate": 1.719410625591423e-05, "loss": 0.5070362091064453, "step": 4338 }, { "epoch": 0.8017461398990657, "grad_norm": 0.06945740431547165, "learning_rate": 1.7192720474468592e-05, "loss": 0.47703972458839417, "step": 4339 }, { "epoch": 0.8019309166079616, "grad_norm": 0.08109176903963089, "learning_rate": 1.719133440677518e-05, "loss": 1.0706208944320679, "step": 4340 }, { "epoch": 0.8021156933168574, "grad_norm": 0.0658700242638588, "learning_rate": 1.7189948052889155e-05, "loss": 0.5788075923919678, "step": 4341 }, { "epoch": 0.8023004700257532, "grad_norm": 0.08159137517213821, "learning_rate": 1.71885614128657e-05, "loss": 0.742228627204895, "step": 4342 }, { "epoch": 0.802485246734649, "grad_norm": 0.08924289792776108, "learning_rate": 1.7187174486759985e-05, "loss": 0.7699382305145264, "step": 4343 }, { "epoch": 0.802670023443545, "grad_norm": 0.07061409950256348, "learning_rate": 1.7185787274627213e-05, "loss": 0.47217515110969543, "step": 4344 }, { "epoch": 0.8028548001524408, "grad_norm": 0.07241703569889069, "learning_rate": 1.7184399776522586e-05, "loss": 0.6893797516822815, "step": 4345 }, { "epoch": 0.8030395768613366, "grad_norm": 0.05688874423503876, "learning_rate": 1.718301199250133e-05, "loss": 0.43385496735572815, "step": 4346 }, { "epoch": 0.8032243535702325, "grad_norm": 0.0645657479763031, "learning_rate": 1.7181623922618665e-05, "loss": 0.6052931547164917, "step": 4347 }, { "epoch": 0.8034091302791283, "grad_norm": 0.07200708985328674, "learning_rate": 1.7180235566929835e-05, "loss": 0.6070054769515991, "step": 4348 }, { "epoch": 0.8035939069880241, "grad_norm": 0.08863953500986099, "learning_rate": 1.71788469254901e-05, "loss": 0.8176451325416565, "step": 4349 }, { "epoch": 0.80377868369692, "grad_norm": 0.07187061756849289, "learning_rate": 1.717745799835471e-05, "loss": 0.6840246319770813, "step": 4350 }, { "epoch": 0.8039634604058159, "grad_norm": 0.06401456892490387, "learning_rate": 1.7176068785578954e-05, "loss": 0.42584607005119324, "step": 4351 }, { "epoch": 0.8041482371147117, "grad_norm": 0.068192258477211, "learning_rate": 1.7174679287218108e-05, "loss": 0.5894313454627991, "step": 4352 }, { "epoch": 0.8043330138236076, "grad_norm": 0.07762222737073898, "learning_rate": 1.7173289503327472e-05, "loss": 0.608502209186554, "step": 4353 }, { "epoch": 0.8045177905325034, "grad_norm": 0.07506255060434341, "learning_rate": 1.7171899433962356e-05, "loss": 0.6749979257583618, "step": 4354 }, { "epoch": 0.8047025672413992, "grad_norm": 0.07582836598157883, "learning_rate": 1.7170509079178084e-05, "loss": 0.7523781061172485, "step": 4355 }, { "epoch": 0.804887343950295, "grad_norm": 0.06109226122498512, "learning_rate": 1.716911843902998e-05, "loss": 0.591269314289093, "step": 4356 }, { "epoch": 0.8050721206591909, "grad_norm": 0.0685863196849823, "learning_rate": 1.7167727513573395e-05, "loss": 0.5930875539779663, "step": 4357 }, { "epoch": 0.8052568973680867, "grad_norm": 0.09155690670013428, "learning_rate": 1.716633630286368e-05, "loss": 0.8301301598548889, "step": 4358 }, { "epoch": 0.8054416740769826, "grad_norm": 0.06751079857349396, "learning_rate": 1.71649448069562e-05, "loss": 0.47847267985343933, "step": 4359 }, { "epoch": 0.8056264507858785, "grad_norm": 0.06919397413730621, "learning_rate": 1.716355302590633e-05, "loss": 0.547621488571167, "step": 4360 }, { "epoch": 0.8058112274947743, "grad_norm": 0.08696835488080978, "learning_rate": 1.7162160959769462e-05, "loss": 0.7215811610221863, "step": 4361 }, { "epoch": 0.8059960042036701, "grad_norm": 0.07177529484033585, "learning_rate": 1.7160768608601e-05, "loss": 0.5491442680358887, "step": 4362 }, { "epoch": 0.806180780912566, "grad_norm": 0.07802259176969528, "learning_rate": 1.7159375972456343e-05, "loss": 0.8298656344413757, "step": 4363 }, { "epoch": 0.8063655576214618, "grad_norm": 0.0680144727230072, "learning_rate": 1.7157983051390926e-05, "loss": 0.5937074422836304, "step": 4364 }, { "epoch": 0.8065503343303576, "grad_norm": 0.08034884929656982, "learning_rate": 1.7156589845460177e-05, "loss": 0.6708976030349731, "step": 4365 }, { "epoch": 0.8067351110392535, "grad_norm": 0.0692923367023468, "learning_rate": 1.7155196354719543e-05, "loss": 0.4966511130332947, "step": 4366 }, { "epoch": 0.8069198877481494, "grad_norm": 0.06150248274207115, "learning_rate": 1.715380257922448e-05, "loss": 0.5387994647026062, "step": 4367 }, { "epoch": 0.8071046644570452, "grad_norm": 0.06653249263763428, "learning_rate": 1.7152408519030457e-05, "loss": 0.49013054370880127, "step": 4368 }, { "epoch": 0.807289441165941, "grad_norm": 0.06323239952325821, "learning_rate": 1.715101417419295e-05, "loss": 0.4928307831287384, "step": 4369 }, { "epoch": 0.8074742178748369, "grad_norm": 0.07590825110673904, "learning_rate": 1.7149619544767452e-05, "loss": 0.7191093564033508, "step": 4370 }, { "epoch": 0.8076589945837327, "grad_norm": 0.07575014978647232, "learning_rate": 1.7148224630809463e-05, "loss": 0.6842823624610901, "step": 4371 }, { "epoch": 0.8078437712926285, "grad_norm": 0.058443885296583176, "learning_rate": 1.71468294323745e-05, "loss": 0.43093398213386536, "step": 4372 }, { "epoch": 0.8080285480015245, "grad_norm": 0.06890040636062622, "learning_rate": 1.7145433949518083e-05, "loss": 0.7078091502189636, "step": 4373 }, { "epoch": 0.8082133247104203, "grad_norm": 0.056887783110141754, "learning_rate": 1.7144038182295752e-05, "loss": 0.39555490016937256, "step": 4374 }, { "epoch": 0.8083981014193161, "grad_norm": 0.08662290126085281, "learning_rate": 1.7142642130763048e-05, "loss": 0.7448327541351318, "step": 4375 }, { "epoch": 0.808582878128212, "grad_norm": 0.07885993272066116, "learning_rate": 1.714124579497554e-05, "loss": 0.6442118287086487, "step": 4376 }, { "epoch": 0.8087676548371078, "grad_norm": 0.06210273504257202, "learning_rate": 1.7139849174988786e-05, "loss": 0.5477867126464844, "step": 4377 }, { "epoch": 0.8089524315460036, "grad_norm": 0.06308577209711075, "learning_rate": 1.7138452270858376e-05, "loss": 0.5083187222480774, "step": 4378 }, { "epoch": 0.8091372082548994, "grad_norm": 0.0746660828590393, "learning_rate": 1.7137055082639898e-05, "loss": 0.5498301982879639, "step": 4379 }, { "epoch": 0.8093219849637953, "grad_norm": 0.08194633573293686, "learning_rate": 1.7135657610388955e-05, "loss": 0.7917114496231079, "step": 4380 }, { "epoch": 0.8095067616726912, "grad_norm": 0.0716506838798523, "learning_rate": 1.7134259854161164e-05, "loss": 0.5995321869850159, "step": 4381 }, { "epoch": 0.809691538381587, "grad_norm": 0.06110651046037674, "learning_rate": 1.7132861814012154e-05, "loss": 0.5288779735565186, "step": 4382 }, { "epoch": 0.8098763150904829, "grad_norm": 0.07181107252836227, "learning_rate": 1.7131463489997558e-05, "loss": 0.6976112127304077, "step": 4383 }, { "epoch": 0.8100610917993787, "grad_norm": 0.07864098995923996, "learning_rate": 1.7130064882173026e-05, "loss": 0.6208000779151917, "step": 4384 }, { "epoch": 0.8102458685082745, "grad_norm": 0.07953057438135147, "learning_rate": 1.712866599059422e-05, "loss": 0.7169456481933594, "step": 4385 }, { "epoch": 0.8104306452171703, "grad_norm": 0.0593542642891407, "learning_rate": 1.712726681531681e-05, "loss": 0.4753279685974121, "step": 4386 }, { "epoch": 0.8106154219260662, "grad_norm": 0.07157573103904724, "learning_rate": 1.7125867356396476e-05, "loss": 0.5962673425674438, "step": 4387 }, { "epoch": 0.8108001986349621, "grad_norm": 0.08001889288425446, "learning_rate": 1.712446761388892e-05, "loss": 0.6433578729629517, "step": 4388 }, { "epoch": 0.8109849753438579, "grad_norm": 0.07539349794387817, "learning_rate": 1.712306758784984e-05, "loss": 0.6595147848129272, "step": 4389 }, { "epoch": 0.8111697520527538, "grad_norm": 0.0677788108587265, "learning_rate": 1.7121667278334954e-05, "loss": 0.5246864557266235, "step": 4390 }, { "epoch": 0.8113545287616496, "grad_norm": 0.07340111583471298, "learning_rate": 1.7120266685399992e-05, "loss": 0.6696658730506897, "step": 4391 }, { "epoch": 0.8115393054705454, "grad_norm": 0.08444202691316605, "learning_rate": 1.7118865809100695e-05, "loss": 0.649530291557312, "step": 4392 }, { "epoch": 0.8117240821794413, "grad_norm": 0.06394477933645248, "learning_rate": 1.7117464649492807e-05, "loss": 0.5001380443572998, "step": 4393 }, { "epoch": 0.8119088588883371, "grad_norm": 0.058860085904598236, "learning_rate": 1.7116063206632098e-05, "loss": 0.4476807415485382, "step": 4394 }, { "epoch": 0.812093635597233, "grad_norm": 0.06653957813978195, "learning_rate": 1.711466148057433e-05, "loss": 0.6362738609313965, "step": 4395 }, { "epoch": 0.8122784123061288, "grad_norm": 0.08320585638284683, "learning_rate": 1.7113259471375304e-05, "loss": 0.829651951789856, "step": 4396 }, { "epoch": 0.8124631890150247, "grad_norm": 0.05259089916944504, "learning_rate": 1.71118571790908e-05, "loss": 0.46883735060691833, "step": 4397 }, { "epoch": 0.8126479657239205, "grad_norm": 0.07715387642383575, "learning_rate": 1.7110454603776627e-05, "loss": 0.6277435421943665, "step": 4398 }, { "epoch": 0.8128327424328163, "grad_norm": 0.0638882964849472, "learning_rate": 1.7109051745488613e-05, "loss": 0.4864892363548279, "step": 4399 }, { "epoch": 0.8130175191417122, "grad_norm": 0.07390380650758743, "learning_rate": 1.7107648604282574e-05, "loss": 0.5733029842376709, "step": 4400 }, { "epoch": 0.813202295850608, "grad_norm": 0.07391858845949173, "learning_rate": 1.7106245180214363e-05, "loss": 0.6999667882919312, "step": 4401 }, { "epoch": 0.8133870725595038, "grad_norm": 0.06986992061138153, "learning_rate": 1.7104841473339827e-05, "loss": 0.5703665614128113, "step": 4402 }, { "epoch": 0.8135718492683998, "grad_norm": 0.0572233721613884, "learning_rate": 1.710343748371482e-05, "loss": 0.4385322332382202, "step": 4403 }, { "epoch": 0.8137566259772956, "grad_norm": 0.07142923772335052, "learning_rate": 1.7102033211395236e-05, "loss": 0.6870672106742859, "step": 4404 }, { "epoch": 0.8139414026861914, "grad_norm": 0.07860967516899109, "learning_rate": 1.7100628656436944e-05, "loss": 0.7508385181427002, "step": 4405 }, { "epoch": 0.8141261793950872, "grad_norm": 0.084261454641819, "learning_rate": 1.7099223818895848e-05, "loss": 0.5924925804138184, "step": 4406 }, { "epoch": 0.8143109561039831, "grad_norm": 0.05773601308465004, "learning_rate": 1.7097818698827853e-05, "loss": 0.5639837980270386, "step": 4407 }, { "epoch": 0.8144957328128789, "grad_norm": 0.06550680100917816, "learning_rate": 1.709641329628888e-05, "loss": 0.5971671342849731, "step": 4408 }, { "epoch": 0.8146805095217747, "grad_norm": 0.058647219091653824, "learning_rate": 1.709500761133486e-05, "loss": 0.5427902936935425, "step": 4409 }, { "epoch": 0.8148652862306707, "grad_norm": 0.0689467191696167, "learning_rate": 1.7093601644021736e-05, "loss": 0.7049939036369324, "step": 4410 }, { "epoch": 0.8150500629395665, "grad_norm": 0.08845806121826172, "learning_rate": 1.7092195394405457e-05, "loss": 0.7434052228927612, "step": 4411 }, { "epoch": 0.8152348396484623, "grad_norm": 0.06892676651477814, "learning_rate": 1.7090788862541995e-05, "loss": 0.5471463203430176, "step": 4412 }, { "epoch": 0.8154196163573582, "grad_norm": 0.053055547177791595, "learning_rate": 1.7089382048487314e-05, "loss": 0.4262996315956116, "step": 4413 }, { "epoch": 0.815604393066254, "grad_norm": 0.07068893313407898, "learning_rate": 1.708797495229741e-05, "loss": 0.6667650938034058, "step": 4414 }, { "epoch": 0.8157891697751498, "grad_norm": 0.0889437347650528, "learning_rate": 1.7086567574028282e-05, "loss": 0.7486896514892578, "step": 4415 }, { "epoch": 0.8159739464840456, "grad_norm": 0.06847400963306427, "learning_rate": 1.708515991373593e-05, "loss": 0.5367305874824524, "step": 4416 }, { "epoch": 0.8161587231929416, "grad_norm": 0.08452947437763214, "learning_rate": 1.708375197147638e-05, "loss": 0.827254593372345, "step": 4417 }, { "epoch": 0.8163434999018374, "grad_norm": 0.06215844675898552, "learning_rate": 1.7082343747305668e-05, "loss": 0.5081216096878052, "step": 4418 }, { "epoch": 0.8165282766107332, "grad_norm": 0.08681292086839676, "learning_rate": 1.7080935241279832e-05, "loss": 0.8006658554077148, "step": 4419 }, { "epoch": 0.8167130533196291, "grad_norm": 0.05513704940676689, "learning_rate": 1.7079526453454923e-05, "loss": 0.55133455991745, "step": 4420 }, { "epoch": 0.8168978300285249, "grad_norm": 0.08391734212636948, "learning_rate": 1.7078117383887016e-05, "loss": 0.7561026811599731, "step": 4421 }, { "epoch": 0.8170826067374207, "grad_norm": 0.06949673593044281, "learning_rate": 1.7076708032632175e-05, "loss": 0.5683756470680237, "step": 4422 }, { "epoch": 0.8172673834463166, "grad_norm": 0.06849777698516846, "learning_rate": 1.70752983997465e-05, "loss": 0.5504254102706909, "step": 4423 }, { "epoch": 0.8174521601552124, "grad_norm": 0.08198640495538712, "learning_rate": 1.7073888485286083e-05, "loss": 0.7536360025405884, "step": 4424 }, { "epoch": 0.8176369368641083, "grad_norm": 0.07069296389818192, "learning_rate": 1.7072478289307037e-05, "loss": 0.607262372970581, "step": 4425 }, { "epoch": 0.8178217135730041, "grad_norm": 0.08360360562801361, "learning_rate": 1.7071067811865477e-05, "loss": 0.7798020243644714, "step": 4426 }, { "epoch": 0.8180064902819, "grad_norm": 0.077246755361557, "learning_rate": 1.7069657053017543e-05, "loss": 0.7054858207702637, "step": 4427 }, { "epoch": 0.8181912669907958, "grad_norm": 0.0620933473110199, "learning_rate": 1.706824601281938e-05, "loss": 0.45810335874557495, "step": 4428 }, { "epoch": 0.8183760436996916, "grad_norm": 0.06633087247610092, "learning_rate": 1.7066834691327133e-05, "loss": 0.6133204698562622, "step": 4429 }, { "epoch": 0.8185608204085875, "grad_norm": 0.06926491856575012, "learning_rate": 1.706542308859698e-05, "loss": 0.4294852018356323, "step": 4430 }, { "epoch": 0.8187455971174833, "grad_norm": 0.06534317135810852, "learning_rate": 1.7064011204685093e-05, "loss": 0.5917895436286926, "step": 4431 }, { "epoch": 0.8189303738263792, "grad_norm": 0.06398814916610718, "learning_rate": 1.7062599039647656e-05, "loss": 0.5340343713760376, "step": 4432 }, { "epoch": 0.8191151505352751, "grad_norm": 0.06289204210042953, "learning_rate": 1.7061186593540876e-05, "loss": 0.5285554528236389, "step": 4433 }, { "epoch": 0.8192999272441709, "grad_norm": 0.0706745833158493, "learning_rate": 1.705977386642096e-05, "loss": 0.6111737489700317, "step": 4434 }, { "epoch": 0.8194847039530667, "grad_norm": 0.07645706087350845, "learning_rate": 1.705836085834413e-05, "loss": 0.7045507431030273, "step": 4435 }, { "epoch": 0.8196694806619625, "grad_norm": 0.08695833384990692, "learning_rate": 1.7056947569366624e-05, "loss": 0.614539384841919, "step": 4436 }, { "epoch": 0.8198542573708584, "grad_norm": 0.07618943601846695, "learning_rate": 1.7055533999544682e-05, "loss": 0.6795693635940552, "step": 4437 }, { "epoch": 0.8200390340797542, "grad_norm": 0.09049010276794434, "learning_rate": 1.705412014893456e-05, "loss": 0.7961665391921997, "step": 4438 }, { "epoch": 0.8202238107886501, "grad_norm": 0.06798869371414185, "learning_rate": 1.705270601759253e-05, "loss": 0.6422942280769348, "step": 4439 }, { "epoch": 0.820408587497546, "grad_norm": 0.08138899505138397, "learning_rate": 1.705129160557486e-05, "loss": 0.6776303052902222, "step": 4440 }, { "epoch": 0.8205933642064418, "grad_norm": 0.07137871533632278, "learning_rate": 1.7049876912937845e-05, "loss": 0.5417454838752747, "step": 4441 }, { "epoch": 0.8207781409153376, "grad_norm": 0.08167102932929993, "learning_rate": 1.7048461939737787e-05, "loss": 0.7033447027206421, "step": 4442 }, { "epoch": 0.8209629176242335, "grad_norm": 0.0888083353638649, "learning_rate": 1.7047046686030995e-05, "loss": 0.6298259496688843, "step": 4443 }, { "epoch": 0.8211476943331293, "grad_norm": 0.07381979376077652, "learning_rate": 1.7045631151873793e-05, "loss": 0.6119664907455444, "step": 4444 }, { "epoch": 0.8213324710420251, "grad_norm": 0.08775873482227325, "learning_rate": 1.7044215337322512e-05, "loss": 0.697877824306488, "step": 4445 }, { "epoch": 0.8215172477509209, "grad_norm": 0.08506224304437637, "learning_rate": 1.70427992424335e-05, "loss": 0.7121521234512329, "step": 4446 }, { "epoch": 0.8217020244598169, "grad_norm": 0.06776197999715805, "learning_rate": 1.7041382867263116e-05, "loss": 0.587286114692688, "step": 4447 }, { "epoch": 0.8218868011687127, "grad_norm": 0.05220887064933777, "learning_rate": 1.703996621186772e-05, "loss": 0.5562371015548706, "step": 4448 }, { "epoch": 0.8220715778776085, "grad_norm": 0.07543464750051498, "learning_rate": 1.7038549276303693e-05, "loss": 0.7780980467796326, "step": 4449 }, { "epoch": 0.8222563545865044, "grad_norm": 0.07507284730672836, "learning_rate": 1.7037132060627428e-05, "loss": 0.4695199728012085, "step": 4450 }, { "epoch": 0.8224411312954002, "grad_norm": 0.04930936545133591, "learning_rate": 1.7035714564895324e-05, "loss": 0.4211044907569885, "step": 4451 }, { "epoch": 0.822625908004296, "grad_norm": 0.06219407171010971, "learning_rate": 1.7034296789163788e-05, "loss": 0.4526977241039276, "step": 4452 }, { "epoch": 0.8228106847131919, "grad_norm": 0.09974870830774307, "learning_rate": 1.7032878733489252e-05, "loss": 0.8299565315246582, "step": 4453 }, { "epoch": 0.8229954614220878, "grad_norm": 0.0561041384935379, "learning_rate": 1.7031460397928142e-05, "loss": 0.427130788564682, "step": 4454 }, { "epoch": 0.8231802381309836, "grad_norm": 0.0706462636590004, "learning_rate": 1.703004178253691e-05, "loss": 0.654176652431488, "step": 4455 }, { "epoch": 0.8233650148398794, "grad_norm": 0.07966897636651993, "learning_rate": 1.7028622887372005e-05, "loss": 0.5644944310188293, "step": 4456 }, { "epoch": 0.8235497915487753, "grad_norm": 0.061665527522563934, "learning_rate": 1.7027203712489902e-05, "loss": 0.5428031086921692, "step": 4457 }, { "epoch": 0.8237345682576711, "grad_norm": 0.05557411164045334, "learning_rate": 1.7025784257947077e-05, "loss": 0.3972078561782837, "step": 4458 }, { "epoch": 0.8239193449665669, "grad_norm": 0.0654132142663002, "learning_rate": 1.7024364523800015e-05, "loss": 0.6011388897895813, "step": 4459 }, { "epoch": 0.8241041216754628, "grad_norm": 0.07930979877710342, "learning_rate": 1.7022944510105226e-05, "loss": 0.6692883968353271, "step": 4460 }, { "epoch": 0.8242888983843587, "grad_norm": 0.0695558562874794, "learning_rate": 1.7021524216919217e-05, "loss": 0.6444147825241089, "step": 4461 }, { "epoch": 0.8244736750932545, "grad_norm": 0.08044788986444473, "learning_rate": 1.702010364429851e-05, "loss": 0.6611325740814209, "step": 4462 }, { "epoch": 0.8246584518021504, "grad_norm": 0.06428727507591248, "learning_rate": 1.7018682792299644e-05, "loss": 0.5038701891899109, "step": 4463 }, { "epoch": 0.8248432285110462, "grad_norm": 0.08757028728723526, "learning_rate": 1.701726166097916e-05, "loss": 0.7276875972747803, "step": 4464 }, { "epoch": 0.825028005219942, "grad_norm": 0.07670968770980835, "learning_rate": 1.7015840250393615e-05, "loss": 0.6314493417739868, "step": 4465 }, { "epoch": 0.8252127819288378, "grad_norm": 0.08930184692144394, "learning_rate": 1.7014418560599578e-05, "loss": 0.6691626310348511, "step": 4466 }, { "epoch": 0.8253975586377337, "grad_norm": 0.08691312372684479, "learning_rate": 1.7012996591653625e-05, "loss": 0.8360946774482727, "step": 4467 }, { "epoch": 0.8255823353466295, "grad_norm": 0.0800599530339241, "learning_rate": 1.7011574343612353e-05, "loss": 0.754214346408844, "step": 4468 }, { "epoch": 0.8257671120555254, "grad_norm": 0.0611540786921978, "learning_rate": 1.7010151816532356e-05, "loss": 0.5543545484542847, "step": 4469 }, { "epoch": 0.8259518887644213, "grad_norm": 0.06635784357786179, "learning_rate": 1.700872901047025e-05, "loss": 0.5271308422088623, "step": 4470 }, { "epoch": 0.8261366654733171, "grad_norm": 0.061269983649253845, "learning_rate": 1.7007305925482658e-05, "loss": 0.4760308563709259, "step": 4471 }, { "epoch": 0.8263214421822129, "grad_norm": 0.06325298547744751, "learning_rate": 1.700588256162621e-05, "loss": 0.5194886326789856, "step": 4472 }, { "epoch": 0.8265062188911088, "grad_norm": 0.0797019973397255, "learning_rate": 1.7004458918957555e-05, "loss": 0.5213291645050049, "step": 4473 }, { "epoch": 0.8266909956000046, "grad_norm": 0.06251664459705353, "learning_rate": 1.700303499753335e-05, "loss": 0.4912989139556885, "step": 4474 }, { "epoch": 0.8268757723089004, "grad_norm": 0.06516954302787781, "learning_rate": 1.7001610797410265e-05, "loss": 0.4361167252063751, "step": 4475 }, { "epoch": 0.8270605490177964, "grad_norm": 0.07414435595273972, "learning_rate": 1.700018631864497e-05, "loss": 0.615641176700592, "step": 4476 }, { "epoch": 0.8272453257266922, "grad_norm": 0.09901707619428635, "learning_rate": 1.6998761561294162e-05, "loss": 0.8456782102584839, "step": 4477 }, { "epoch": 0.827430102435588, "grad_norm": 0.060959070920944214, "learning_rate": 1.6997336525414543e-05, "loss": 0.5491607189178467, "step": 4478 }, { "epoch": 0.8276148791444838, "grad_norm": 0.061658356338739395, "learning_rate": 1.699591121106282e-05, "loss": 0.43196821212768555, "step": 4479 }, { "epoch": 0.8277996558533797, "grad_norm": 0.07034077495336533, "learning_rate": 1.699448561829572e-05, "loss": 0.6002528667449951, "step": 4480 }, { "epoch": 0.8279844325622755, "grad_norm": 0.07465361058712006, "learning_rate": 1.6993059747169975e-05, "loss": 0.6691159605979919, "step": 4481 }, { "epoch": 0.8281692092711713, "grad_norm": 0.0629972591996193, "learning_rate": 1.6991633597742327e-05, "loss": 0.6098475456237793, "step": 4482 }, { "epoch": 0.8283539859800673, "grad_norm": 0.08248724043369293, "learning_rate": 1.6990207170069536e-05, "loss": 0.7085081934928894, "step": 4483 }, { "epoch": 0.8285387626889631, "grad_norm": 0.073788121342659, "learning_rate": 1.698878046420837e-05, "loss": 0.5459364652633667, "step": 4484 }, { "epoch": 0.8287235393978589, "grad_norm": 0.07505763322114944, "learning_rate": 1.6987353480215613e-05, "loss": 0.7524222135543823, "step": 4485 }, { "epoch": 0.8289083161067548, "grad_norm": 0.07289927452802658, "learning_rate": 1.698592621814804e-05, "loss": 0.683181643486023, "step": 4486 }, { "epoch": 0.8290930928156506, "grad_norm": 0.052059952169656754, "learning_rate": 1.6984498678062462e-05, "loss": 0.4803551137447357, "step": 4487 }, { "epoch": 0.8292778695245464, "grad_norm": 0.09347348660230637, "learning_rate": 1.698307086001569e-05, "loss": 0.7751423716545105, "step": 4488 }, { "epoch": 0.8294626462334422, "grad_norm": 0.05178924649953842, "learning_rate": 1.6981642764064544e-05, "loss": 0.4005620777606964, "step": 4489 }, { "epoch": 0.8296474229423381, "grad_norm": 0.0625031441450119, "learning_rate": 1.698021439026586e-05, "loss": 0.472150593996048, "step": 4490 }, { "epoch": 0.829832199651234, "grad_norm": 0.0583854503929615, "learning_rate": 1.697878573867648e-05, "loss": 0.43358004093170166, "step": 4491 }, { "epoch": 0.8300169763601298, "grad_norm": 0.05779613181948662, "learning_rate": 1.6977356809353263e-05, "loss": 0.5184218883514404, "step": 4492 }, { "epoch": 0.8302017530690257, "grad_norm": 0.07386235892772675, "learning_rate": 1.697592760235307e-05, "loss": 0.5868077874183655, "step": 4493 }, { "epoch": 0.8303865297779215, "grad_norm": 0.06986936181783676, "learning_rate": 1.6974498117732788e-05, "loss": 0.49657654762268066, "step": 4494 }, { "epoch": 0.8305713064868173, "grad_norm": 0.07015872746706009, "learning_rate": 1.69730683555493e-05, "loss": 0.6315212249755859, "step": 4495 }, { "epoch": 0.8307560831957131, "grad_norm": 0.06291679292917252, "learning_rate": 1.6971638315859507e-05, "loss": 0.4786362648010254, "step": 4496 }, { "epoch": 0.830940859904609, "grad_norm": 0.07277002930641174, "learning_rate": 1.697020799872032e-05, "loss": 0.6787610650062561, "step": 4497 }, { "epoch": 0.8311256366135049, "grad_norm": 0.08893369883298874, "learning_rate": 1.6968777404188662e-05, "loss": 0.5605788230895996, "step": 4498 }, { "epoch": 0.8313104133224007, "grad_norm": 0.06755659729242325, "learning_rate": 1.6967346532321466e-05, "loss": 0.5297644138336182, "step": 4499 }, { "epoch": 0.8314951900312966, "grad_norm": 0.05591427907347679, "learning_rate": 1.6965915383175676e-05, "loss": 0.3980846703052521, "step": 4500 }, { "epoch": 0.8314951900312966, "eval_loss": 0.6395584940910339, "eval_runtime": 158.8241, "eval_samples_per_second": 114.775, "eval_steps_per_second": 14.349, "step": 4500 }, { "epoch": 0.8316799667401924, "grad_norm": 0.06723398715257645, "learning_rate": 1.6964483956808248e-05, "loss": 0.537386953830719, "step": 4501 }, { "epoch": 0.8318647434490882, "grad_norm": 0.058650754392147064, "learning_rate": 1.6963052253276145e-05, "loss": 0.5143908262252808, "step": 4502 }, { "epoch": 0.8320495201579841, "grad_norm": 0.09709373861551285, "learning_rate": 1.6961620272636346e-05, "loss": 0.8258609771728516, "step": 4503 }, { "epoch": 0.8322342968668799, "grad_norm": 0.0741024762392044, "learning_rate": 1.6960188014945847e-05, "loss": 0.6264018416404724, "step": 4504 }, { "epoch": 0.8324190735757758, "grad_norm": 0.06891115009784698, "learning_rate": 1.6958755480261638e-05, "loss": 0.6287631988525391, "step": 4505 }, { "epoch": 0.8326038502846717, "grad_norm": 0.06442379951477051, "learning_rate": 1.695732266864073e-05, "loss": 0.5368323922157288, "step": 4506 }, { "epoch": 0.8327886269935675, "grad_norm": 0.08387748897075653, "learning_rate": 1.6955889580140145e-05, "loss": 0.8382667899131775, "step": 4507 }, { "epoch": 0.8329734037024633, "grad_norm": 0.07572843134403229, "learning_rate": 1.6954456214816918e-05, "loss": 0.6271460652351379, "step": 4508 }, { "epoch": 0.8331581804113591, "grad_norm": 0.06325176358222961, "learning_rate": 1.6953022572728095e-05, "loss": 0.6080461740493774, "step": 4509 }, { "epoch": 0.833342957120255, "grad_norm": 0.07202797383069992, "learning_rate": 1.6951588653930722e-05, "loss": 0.5612244009971619, "step": 4510 }, { "epoch": 0.8335277338291508, "grad_norm": 0.07825497537851334, "learning_rate": 1.6950154458481875e-05, "loss": 0.615082859992981, "step": 4511 }, { "epoch": 0.8337125105380466, "grad_norm": 0.09370280802249908, "learning_rate": 1.694871998643862e-05, "loss": 0.7465894222259521, "step": 4512 }, { "epoch": 0.8338972872469426, "grad_norm": 0.07400539517402649, "learning_rate": 1.694728523785805e-05, "loss": 0.6992408037185669, "step": 4513 }, { "epoch": 0.8340820639558384, "grad_norm": 0.05352245643734932, "learning_rate": 1.6945850212797265e-05, "loss": 0.4538097381591797, "step": 4514 }, { "epoch": 0.8342668406647342, "grad_norm": 0.057059090584516525, "learning_rate": 1.6944414911313368e-05, "loss": 0.4463070333003998, "step": 4515 }, { "epoch": 0.83445161737363, "grad_norm": 0.06638690829277039, "learning_rate": 1.694297933346349e-05, "loss": 0.4994492530822754, "step": 4516 }, { "epoch": 0.8346363940825259, "grad_norm": 0.054007966071367264, "learning_rate": 1.6941543479304748e-05, "loss": 0.3897664248943329, "step": 4517 }, { "epoch": 0.8348211707914217, "grad_norm": 0.07031978666782379, "learning_rate": 1.69401073488943e-05, "loss": 0.541957437992096, "step": 4518 }, { "epoch": 0.8350059475003175, "grad_norm": 0.06689532101154327, "learning_rate": 1.6938670942289292e-05, "loss": 0.6915385723114014, "step": 4519 }, { "epoch": 0.8351907242092135, "grad_norm": 0.0767417848110199, "learning_rate": 1.6937234259546888e-05, "loss": 0.614142119884491, "step": 4520 }, { "epoch": 0.8353755009181093, "grad_norm": 0.06671950221061707, "learning_rate": 1.693579730072426e-05, "loss": 0.5127571225166321, "step": 4521 }, { "epoch": 0.8355602776270051, "grad_norm": 0.07585155218839645, "learning_rate": 1.6934360065878603e-05, "loss": 0.5769263505935669, "step": 4522 }, { "epoch": 0.835745054335901, "grad_norm": 0.07272087782621384, "learning_rate": 1.693292255506711e-05, "loss": 0.5349190831184387, "step": 4523 }, { "epoch": 0.8359298310447968, "grad_norm": 0.062750905752182, "learning_rate": 1.693148476834699e-05, "loss": 0.47064346075057983, "step": 4524 }, { "epoch": 0.8361146077536926, "grad_norm": 0.0642617717385292, "learning_rate": 1.693004670577546e-05, "loss": 0.5044402480125427, "step": 4525 }, { "epoch": 0.8362993844625884, "grad_norm": 0.0815463587641716, "learning_rate": 1.692860836740975e-05, "loss": 0.7980261445045471, "step": 4526 }, { "epoch": 0.8364841611714844, "grad_norm": 0.06477741152048111, "learning_rate": 1.692716975330711e-05, "loss": 0.6120722889900208, "step": 4527 }, { "epoch": 0.8366689378803802, "grad_norm": 0.0836218073964119, "learning_rate": 1.6925730863524782e-05, "loss": 0.720973789691925, "step": 4528 }, { "epoch": 0.836853714589276, "grad_norm": 0.07507210969924927, "learning_rate": 1.6924291698120034e-05, "loss": 0.5757606625556946, "step": 4529 }, { "epoch": 0.8370384912981719, "grad_norm": 0.07866434752941132, "learning_rate": 1.6922852257150142e-05, "loss": 0.579961359500885, "step": 4530 }, { "epoch": 0.8372232680070677, "grad_norm": 0.07555273920297623, "learning_rate": 1.6921412540672385e-05, "loss": 0.636628270149231, "step": 4531 }, { "epoch": 0.8374080447159635, "grad_norm": 0.054431378841400146, "learning_rate": 1.6919972548744068e-05, "loss": 0.42866355180740356, "step": 4532 }, { "epoch": 0.8375928214248594, "grad_norm": 0.07677029073238373, "learning_rate": 1.691853228142249e-05, "loss": 0.655987024307251, "step": 4533 }, { "epoch": 0.8377775981337552, "grad_norm": 0.08130481839179993, "learning_rate": 1.6917091738764972e-05, "loss": 0.7719659805297852, "step": 4534 }, { "epoch": 0.8379623748426511, "grad_norm": 0.06564033031463623, "learning_rate": 1.6915650920828848e-05, "loss": 0.6413950324058533, "step": 4535 }, { "epoch": 0.838147151551547, "grad_norm": 0.07372447848320007, "learning_rate": 1.691420982767145e-05, "loss": 0.5301737189292908, "step": 4536 }, { "epoch": 0.8383319282604428, "grad_norm": 0.0624161921441555, "learning_rate": 1.6912768459350132e-05, "loss": 0.5359398722648621, "step": 4537 }, { "epoch": 0.8385167049693386, "grad_norm": 0.07300139218568802, "learning_rate": 1.6911326815922257e-05, "loss": 0.6078569293022156, "step": 4538 }, { "epoch": 0.8387014816782344, "grad_norm": 0.07421409338712692, "learning_rate": 1.6909884897445202e-05, "loss": 0.5118612051010132, "step": 4539 }, { "epoch": 0.8388862583871303, "grad_norm": 0.06582600623369217, "learning_rate": 1.690844270397634e-05, "loss": 0.5924390554428101, "step": 4540 }, { "epoch": 0.8390710350960261, "grad_norm": 0.07796216011047363, "learning_rate": 1.6907000235573076e-05, "loss": 0.5968838930130005, "step": 4541 }, { "epoch": 0.839255811804922, "grad_norm": 0.06646906584501266, "learning_rate": 1.690555749229281e-05, "loss": 0.5918703079223633, "step": 4542 }, { "epoch": 0.8394405885138179, "grad_norm": 0.07779476046562195, "learning_rate": 1.6904114474192962e-05, "loss": 0.7303890585899353, "step": 4543 }, { "epoch": 0.8396253652227137, "grad_norm": 0.07487162947654724, "learning_rate": 1.6902671181330957e-05, "loss": 0.6921830177307129, "step": 4544 }, { "epoch": 0.8398101419316095, "grad_norm": 0.05664646998047829, "learning_rate": 1.6901227613764235e-05, "loss": 0.42495113611221313, "step": 4545 }, { "epoch": 0.8399949186405054, "grad_norm": 0.06526501476764679, "learning_rate": 1.6899783771550247e-05, "loss": 0.504043459892273, "step": 4546 }, { "epoch": 0.8401796953494012, "grad_norm": 0.05720466375350952, "learning_rate": 1.689833965474645e-05, "loss": 0.4433921277523041, "step": 4547 }, { "epoch": 0.840364472058297, "grad_norm": 0.07096187770366669, "learning_rate": 1.6896895263410313e-05, "loss": 0.5995004773139954, "step": 4548 }, { "epoch": 0.840549248767193, "grad_norm": 0.0845320001244545, "learning_rate": 1.6895450597599326e-05, "loss": 0.764687716960907, "step": 4549 }, { "epoch": 0.8407340254760888, "grad_norm": 0.06377948075532913, "learning_rate": 1.689400565737098e-05, "loss": 0.488741010427475, "step": 4550 }, { "epoch": 0.8409188021849846, "grad_norm": 0.0828799456357956, "learning_rate": 1.6892560442782775e-05, "loss": 0.6057949662208557, "step": 4551 }, { "epoch": 0.8411035788938804, "grad_norm": 0.08121705800294876, "learning_rate": 1.689111495389223e-05, "loss": 0.7909074425697327, "step": 4552 }, { "epoch": 0.8412883556027763, "grad_norm": 0.07321442663669586, "learning_rate": 1.688966919075687e-05, "loss": 0.6617448329925537, "step": 4553 }, { "epoch": 0.8414731323116721, "grad_norm": 0.07743881642818451, "learning_rate": 1.6888223153434226e-05, "loss": 0.5645763278007507, "step": 4554 }, { "epoch": 0.8416579090205679, "grad_norm": 0.07393790036439896, "learning_rate": 1.6886776841981856e-05, "loss": 0.6077877283096313, "step": 4555 }, { "epoch": 0.8418426857294637, "grad_norm": 0.06216173246502876, "learning_rate": 1.6885330256457312e-05, "loss": 0.5201695561408997, "step": 4556 }, { "epoch": 0.8420274624383597, "grad_norm": 0.06612975895404816, "learning_rate": 1.6883883396918165e-05, "loss": 0.541587233543396, "step": 4557 }, { "epoch": 0.8422122391472555, "grad_norm": 0.07147186994552612, "learning_rate": 1.6882436263421996e-05, "loss": 0.5515293478965759, "step": 4558 }, { "epoch": 0.8423970158561513, "grad_norm": 0.07946402579545975, "learning_rate": 1.6880988856026394e-05, "loss": 0.7292967438697815, "step": 4559 }, { "epoch": 0.8425817925650472, "grad_norm": 0.07769119739532471, "learning_rate": 1.687954117478897e-05, "loss": 0.6431043148040771, "step": 4560 }, { "epoch": 0.842766569273943, "grad_norm": 0.06636208295822144, "learning_rate": 1.6878093219767324e-05, "loss": 0.4926970303058624, "step": 4561 }, { "epoch": 0.8429513459828388, "grad_norm": 0.0701172798871994, "learning_rate": 1.6876644991019086e-05, "loss": 0.6440094709396362, "step": 4562 }, { "epoch": 0.8431361226917347, "grad_norm": 0.08018402010202408, "learning_rate": 1.6875196488601895e-05, "loss": 0.6849448084831238, "step": 4563 }, { "epoch": 0.8433208994006306, "grad_norm": 0.07953750342130661, "learning_rate": 1.6873747712573395e-05, "loss": 0.6987465023994446, "step": 4564 }, { "epoch": 0.8435056761095264, "grad_norm": 0.08021809160709381, "learning_rate": 1.6872298662991237e-05, "loss": 0.7143018841743469, "step": 4565 }, { "epoch": 0.8436904528184223, "grad_norm": 0.05858346447348595, "learning_rate": 1.6870849339913097e-05, "loss": 0.4057566523551941, "step": 4566 }, { "epoch": 0.8438752295273181, "grad_norm": 0.05692509189248085, "learning_rate": 1.686939974339665e-05, "loss": 0.4452887773513794, "step": 4567 }, { "epoch": 0.8440600062362139, "grad_norm": 0.09390457719564438, "learning_rate": 1.6867949873499578e-05, "loss": 0.7715519666671753, "step": 4568 }, { "epoch": 0.8442447829451097, "grad_norm": 0.08112477511167526, "learning_rate": 1.6866499730279592e-05, "loss": 0.7168828845024109, "step": 4569 }, { "epoch": 0.8444295596540056, "grad_norm": 0.07794813811779022, "learning_rate": 1.68650493137944e-05, "loss": 0.7800693511962891, "step": 4570 }, { "epoch": 0.8446143363629015, "grad_norm": 0.06772364675998688, "learning_rate": 1.6863598624101723e-05, "loss": 0.5149499773979187, "step": 4571 }, { "epoch": 0.8447991130717973, "grad_norm": 0.08500882238149643, "learning_rate": 1.6862147661259297e-05, "loss": 0.7767760157585144, "step": 4572 }, { "epoch": 0.8449838897806932, "grad_norm": 0.0779474526643753, "learning_rate": 1.6860696425324857e-05, "loss": 0.6483212113380432, "step": 4573 }, { "epoch": 0.845168666489589, "grad_norm": 0.08208338171243668, "learning_rate": 1.6859244916356167e-05, "loss": 0.6114625930786133, "step": 4574 }, { "epoch": 0.8453534431984848, "grad_norm": 0.052994124591350555, "learning_rate": 1.6857793134410987e-05, "loss": 0.4159746468067169, "step": 4575 }, { "epoch": 0.8455382199073807, "grad_norm": 0.06645326316356659, "learning_rate": 1.68563410795471e-05, "loss": 0.3910166621208191, "step": 4576 }, { "epoch": 0.8457229966162765, "grad_norm": 0.07535551488399506, "learning_rate": 1.6854888751822284e-05, "loss": 0.698360025882721, "step": 4577 }, { "epoch": 0.8459077733251723, "grad_norm": 0.06745228171348572, "learning_rate": 1.6853436151294347e-05, "loss": 0.45721209049224854, "step": 4578 }, { "epoch": 0.8460925500340682, "grad_norm": 0.06511545181274414, "learning_rate": 1.685198327802109e-05, "loss": 0.4750843942165375, "step": 4579 }, { "epoch": 0.8462773267429641, "grad_norm": 0.06708203256130219, "learning_rate": 1.6850530132060334e-05, "loss": 0.5141110420227051, "step": 4580 }, { "epoch": 0.8464621034518599, "grad_norm": 0.07462667673826218, "learning_rate": 1.6849076713469914e-05, "loss": 0.6454291343688965, "step": 4581 }, { "epoch": 0.8466468801607557, "grad_norm": 0.09495879709720612, "learning_rate": 1.6847623022307664e-05, "loss": 0.9474629759788513, "step": 4582 }, { "epoch": 0.8468316568696516, "grad_norm": 0.06724563241004944, "learning_rate": 1.6846169058631448e-05, "loss": 0.6943251490592957, "step": 4583 }, { "epoch": 0.8470164335785474, "grad_norm": 0.06822342425584793, "learning_rate": 1.684471482249912e-05, "loss": 0.46047502756118774, "step": 4584 }, { "epoch": 0.8472012102874432, "grad_norm": 0.07819680124521255, "learning_rate": 1.6843260313968553e-05, "loss": 0.6768338680267334, "step": 4585 }, { "epoch": 0.8473859869963392, "grad_norm": 0.07070040702819824, "learning_rate": 1.6841805533097637e-05, "loss": 0.5495981574058533, "step": 4586 }, { "epoch": 0.847570763705235, "grad_norm": 0.0862039178609848, "learning_rate": 1.684035047994427e-05, "loss": 0.6363370418548584, "step": 4587 }, { "epoch": 0.8477555404141308, "grad_norm": 0.077043317258358, "learning_rate": 1.683889515456635e-05, "loss": 0.7591915130615234, "step": 4588 }, { "epoch": 0.8479403171230266, "grad_norm": 0.09326723963022232, "learning_rate": 1.68374395570218e-05, "loss": 0.7203951478004456, "step": 4589 }, { "epoch": 0.8481250938319225, "grad_norm": 0.06428168714046478, "learning_rate": 1.6835983687368547e-05, "loss": 0.5105397701263428, "step": 4590 }, { "epoch": 0.8483098705408183, "grad_norm": 0.06252838671207428, "learning_rate": 1.683452754566453e-05, "loss": 0.6851661205291748, "step": 4591 }, { "epoch": 0.8484946472497141, "grad_norm": 0.06116794794797897, "learning_rate": 1.68330711319677e-05, "loss": 0.4769090712070465, "step": 4592 }, { "epoch": 0.8486794239586101, "grad_norm": 0.07975073158740997, "learning_rate": 1.6831614446336017e-05, "loss": 0.756597101688385, "step": 4593 }, { "epoch": 0.8488642006675059, "grad_norm": 0.07053950428962708, "learning_rate": 1.6830157488827453e-05, "loss": 0.7950809597969055, "step": 4594 }, { "epoch": 0.8490489773764017, "grad_norm": 0.049777209758758545, "learning_rate": 1.682870025949999e-05, "loss": 0.38451868295669556, "step": 4595 }, { "epoch": 0.8492337540852976, "grad_norm": 0.07076207548379898, "learning_rate": 1.682724275841162e-05, "loss": 0.6204366683959961, "step": 4596 }, { "epoch": 0.8494185307941934, "grad_norm": 0.06023591011762619, "learning_rate": 1.6825784985620348e-05, "loss": 0.4931405484676361, "step": 4597 }, { "epoch": 0.8496033075030892, "grad_norm": 0.06567323207855225, "learning_rate": 1.6824326941184192e-05, "loss": 0.6039255857467651, "step": 4598 }, { "epoch": 0.849788084211985, "grad_norm": 0.06637229770421982, "learning_rate": 1.6822868625161168e-05, "loss": 0.5879940390586853, "step": 4599 }, { "epoch": 0.8499728609208809, "grad_norm": 0.06800197809934616, "learning_rate": 1.6821410037609322e-05, "loss": 0.5560672283172607, "step": 4600 }, { "epoch": 0.8501576376297768, "grad_norm": 0.056423820555210114, "learning_rate": 1.6819951178586696e-05, "loss": 0.44926315546035767, "step": 4601 }, { "epoch": 0.8503424143386726, "grad_norm": 0.06409399956464767, "learning_rate": 1.6818492048151353e-05, "loss": 0.5209833979606628, "step": 4602 }, { "epoch": 0.8505271910475685, "grad_norm": 0.0760723277926445, "learning_rate": 1.681703264636136e-05, "loss": 0.5303493738174438, "step": 4603 }, { "epoch": 0.8507119677564643, "grad_norm": 0.0445675253868103, "learning_rate": 1.681557297327479e-05, "loss": 0.3546523153781891, "step": 4604 }, { "epoch": 0.8508967444653601, "grad_norm": 0.07749450206756592, "learning_rate": 1.6814113028949744e-05, "loss": 0.6340559124946594, "step": 4605 }, { "epoch": 0.851081521174256, "grad_norm": 0.08493492752313614, "learning_rate": 1.6812652813444318e-05, "loss": 0.6882403492927551, "step": 4606 }, { "epoch": 0.8512662978831518, "grad_norm": 0.0660935565829277, "learning_rate": 1.6811192326816618e-05, "loss": 0.5860942602157593, "step": 4607 }, { "epoch": 0.8514510745920477, "grad_norm": 0.07247716933488846, "learning_rate": 1.6809731569124782e-05, "loss": 0.5857391953468323, "step": 4608 }, { "epoch": 0.8516358513009435, "grad_norm": 0.06126458942890167, "learning_rate": 1.6808270540426927e-05, "loss": 0.48983433842658997, "step": 4609 }, { "epoch": 0.8518206280098394, "grad_norm": 0.09214968234300613, "learning_rate": 1.6806809240781205e-05, "loss": 1.0490683317184448, "step": 4610 }, { "epoch": 0.8520054047187352, "grad_norm": 0.06320153176784515, "learning_rate": 1.6805347670245775e-05, "loss": 0.6951673626899719, "step": 4611 }, { "epoch": 0.852190181427631, "grad_norm": 0.08076881617307663, "learning_rate": 1.6803885828878798e-05, "loss": 0.7401670217514038, "step": 4612 }, { "epoch": 0.8523749581365269, "grad_norm": 0.054451070725917816, "learning_rate": 1.680242371673845e-05, "loss": 0.5073938369750977, "step": 4613 }, { "epoch": 0.8525597348454227, "grad_norm": 0.06178588420152664, "learning_rate": 1.680096133388292e-05, "loss": 0.44078755378723145, "step": 4614 }, { "epoch": 0.8527445115543186, "grad_norm": 0.06964290887117386, "learning_rate": 1.6799498680370408e-05, "loss": 0.455206960439682, "step": 4615 }, { "epoch": 0.8529292882632145, "grad_norm": 0.08222243934869766, "learning_rate": 1.679803575625912e-05, "loss": 0.6792607307434082, "step": 4616 }, { "epoch": 0.8531140649721103, "grad_norm": 0.08779963850975037, "learning_rate": 1.6796572561607277e-05, "loss": 0.7054410576820374, "step": 4617 }, { "epoch": 0.8532988416810061, "grad_norm": 0.06393636018037796, "learning_rate": 1.679510909647311e-05, "loss": 0.558661699295044, "step": 4618 }, { "epoch": 0.8534836183899019, "grad_norm": 0.07679730653762817, "learning_rate": 1.679364536091486e-05, "loss": 0.6361806988716125, "step": 4619 }, { "epoch": 0.8536683950987978, "grad_norm": 0.052144911140203476, "learning_rate": 1.6792181354990783e-05, "loss": 0.3589189350605011, "step": 4620 }, { "epoch": 0.8538531718076936, "grad_norm": 0.0488000325858593, "learning_rate": 1.6790717078759134e-05, "loss": 0.3850957155227661, "step": 4621 }, { "epoch": 0.8540379485165894, "grad_norm": 0.08071233332157135, "learning_rate": 1.678925253227819e-05, "loss": 0.5997284650802612, "step": 4622 }, { "epoch": 0.8542227252254854, "grad_norm": 0.07301712781190872, "learning_rate": 1.678778771560624e-05, "loss": 0.6283912062644958, "step": 4623 }, { "epoch": 0.8544075019343812, "grad_norm": 0.07521724700927734, "learning_rate": 1.6786322628801572e-05, "loss": 0.676716685295105, "step": 4624 }, { "epoch": 0.854592278643277, "grad_norm": 0.08184777945280075, "learning_rate": 1.6784857271922497e-05, "loss": 0.8131962418556213, "step": 4625 }, { "epoch": 0.8547770553521729, "grad_norm": 0.0697786808013916, "learning_rate": 1.678339164502733e-05, "loss": 0.6574063301086426, "step": 4626 }, { "epoch": 0.8549618320610687, "grad_norm": 0.05795733258128166, "learning_rate": 1.6781925748174398e-05, "loss": 0.43882185220718384, "step": 4627 }, { "epoch": 0.8551466087699645, "grad_norm": 0.06942173093557358, "learning_rate": 1.6780459581422037e-05, "loss": 0.5867627859115601, "step": 4628 }, { "epoch": 0.8553313854788603, "grad_norm": 0.05828528851270676, "learning_rate": 1.6778993144828598e-05, "loss": 0.42724406719207764, "step": 4629 }, { "epoch": 0.8555161621877563, "grad_norm": 0.0868777185678482, "learning_rate": 1.6777526438452444e-05, "loss": 0.7190034985542297, "step": 4630 }, { "epoch": 0.8557009388966521, "grad_norm": 0.07899882644414902, "learning_rate": 1.6776059462351936e-05, "loss": 0.6514811515808105, "step": 4631 }, { "epoch": 0.8558857156055479, "grad_norm": 0.08145968616008759, "learning_rate": 1.6774592216585466e-05, "loss": 0.7642940282821655, "step": 4632 }, { "epoch": 0.8560704923144438, "grad_norm": 0.06042362004518509, "learning_rate": 1.6773124701211417e-05, "loss": 0.44519340991973877, "step": 4633 }, { "epoch": 0.8562552690233396, "grad_norm": 0.07294254004955292, "learning_rate": 1.6771656916288198e-05, "loss": 0.5128921866416931, "step": 4634 }, { "epoch": 0.8564400457322354, "grad_norm": 0.06889355182647705, "learning_rate": 1.6770188861874223e-05, "loss": 0.5176600217819214, "step": 4635 }, { "epoch": 0.8566248224411313, "grad_norm": 0.05914265289902687, "learning_rate": 1.6768720538027907e-05, "loss": 0.5071340799331665, "step": 4636 }, { "epoch": 0.8568095991500272, "grad_norm": 0.08248062431812286, "learning_rate": 1.6767251944807692e-05, "loss": 0.6715055704116821, "step": 4637 }, { "epoch": 0.856994375858923, "grad_norm": 0.07607916742563248, "learning_rate": 1.676578308227202e-05, "loss": 0.6736531853675842, "step": 4638 }, { "epoch": 0.8571791525678188, "grad_norm": 0.09606055170297623, "learning_rate": 1.676431395047935e-05, "loss": 0.9217409491539001, "step": 4639 }, { "epoch": 0.8573639292767147, "grad_norm": 0.09148918092250824, "learning_rate": 1.6762844549488147e-05, "loss": 0.7480746507644653, "step": 4640 }, { "epoch": 0.8575487059856105, "grad_norm": 0.0659724771976471, "learning_rate": 1.676137487935689e-05, "loss": 0.6606292128562927, "step": 4641 }, { "epoch": 0.8577334826945063, "grad_norm": 0.07820889353752136, "learning_rate": 1.6759904940144067e-05, "loss": 0.6565632224082947, "step": 4642 }, { "epoch": 0.8579182594034022, "grad_norm": 0.07718625664710999, "learning_rate": 1.6758434731908178e-05, "loss": 0.7136979699134827, "step": 4643 }, { "epoch": 0.858103036112298, "grad_norm": 0.07513435930013657, "learning_rate": 1.6756964254707727e-05, "loss": 0.7594067454338074, "step": 4644 }, { "epoch": 0.8582878128211939, "grad_norm": 0.05214472487568855, "learning_rate": 1.6755493508601238e-05, "loss": 0.44269320368766785, "step": 4645 }, { "epoch": 0.8584725895300898, "grad_norm": 0.0848061591386795, "learning_rate": 1.6754022493647247e-05, "loss": 0.6620956659317017, "step": 4646 }, { "epoch": 0.8586573662389856, "grad_norm": 0.08480852842330933, "learning_rate": 1.6752551209904287e-05, "loss": 0.646623969078064, "step": 4647 }, { "epoch": 0.8588421429478814, "grad_norm": 0.06943809986114502, "learning_rate": 1.675107965743092e-05, "loss": 0.45650023221969604, "step": 4648 }, { "epoch": 0.8590269196567772, "grad_norm": 0.07890260219573975, "learning_rate": 1.67496078362857e-05, "loss": 0.5708764791488647, "step": 4649 }, { "epoch": 0.8592116963656731, "grad_norm": 0.07639990001916885, "learning_rate": 1.6748135746527205e-05, "loss": 0.5387254953384399, "step": 4650 }, { "epoch": 0.8593964730745689, "grad_norm": 0.0655929297208786, "learning_rate": 1.674666338821402e-05, "loss": 0.5580186247825623, "step": 4651 }, { "epoch": 0.8595812497834648, "grad_norm": 0.08575616776943207, "learning_rate": 1.6745190761404743e-05, "loss": 0.6784237623214722, "step": 4652 }, { "epoch": 0.8597660264923607, "grad_norm": 0.06969567388296127, "learning_rate": 1.6743717866157972e-05, "loss": 0.5400366187095642, "step": 4653 }, { "epoch": 0.8599508032012565, "grad_norm": 0.04362644627690315, "learning_rate": 1.6742244702532332e-05, "loss": 0.4012106657028198, "step": 4654 }, { "epoch": 0.8601355799101523, "grad_norm": 0.06540507078170776, "learning_rate": 1.6740771270586445e-05, "loss": 0.4933326542377472, "step": 4655 }, { "epoch": 0.8603203566190482, "grad_norm": 0.08104443550109863, "learning_rate": 1.673929757037895e-05, "loss": 0.8567174673080444, "step": 4656 }, { "epoch": 0.860505133327944, "grad_norm": 0.06365179270505905, "learning_rate": 1.6737823601968495e-05, "loss": 0.6120196580886841, "step": 4657 }, { "epoch": 0.8606899100368398, "grad_norm": 0.06998001039028168, "learning_rate": 1.6736349365413742e-05, "loss": 0.7329786419868469, "step": 4658 }, { "epoch": 0.8608746867457358, "grad_norm": 0.05753127485513687, "learning_rate": 1.673487486077336e-05, "loss": 0.5385212898254395, "step": 4659 }, { "epoch": 0.8610594634546316, "grad_norm": 0.07382532209157944, "learning_rate": 1.673340008810603e-05, "loss": 0.5739226937294006, "step": 4660 }, { "epoch": 0.8612442401635274, "grad_norm": 0.06449062377214432, "learning_rate": 1.673192504747044e-05, "loss": 0.5096633434295654, "step": 4661 }, { "epoch": 0.8614290168724232, "grad_norm": 0.07269871234893799, "learning_rate": 1.6730449738925298e-05, "loss": 0.6623801589012146, "step": 4662 }, { "epoch": 0.8616137935813191, "grad_norm": 0.08698683232069016, "learning_rate": 1.6728974162529313e-05, "loss": 0.7171550393104553, "step": 4663 }, { "epoch": 0.8617985702902149, "grad_norm": 0.07263854891061783, "learning_rate": 1.6727498318341206e-05, "loss": 0.6417308449745178, "step": 4664 }, { "epoch": 0.8619833469991107, "grad_norm": 0.04888635128736496, "learning_rate": 1.6726022206419716e-05, "loss": 0.33261534571647644, "step": 4665 }, { "epoch": 0.8621681237080066, "grad_norm": 0.09420102089643478, "learning_rate": 1.6724545826823583e-05, "loss": 0.8491489291191101, "step": 4666 }, { "epoch": 0.8623529004169025, "grad_norm": 0.07827732712030411, "learning_rate": 1.6723069179611566e-05, "loss": 0.6525130867958069, "step": 4667 }, { "epoch": 0.8625376771257983, "grad_norm": 0.07107626646757126, "learning_rate": 1.672159226484243e-05, "loss": 0.506168782711029, "step": 4668 }, { "epoch": 0.8627224538346941, "grad_norm": 0.062264494597911835, "learning_rate": 1.672011508257495e-05, "loss": 0.4457719624042511, "step": 4669 }, { "epoch": 0.86290723054359, "grad_norm": 0.06501355767250061, "learning_rate": 1.6718637632867914e-05, "loss": 0.539233386516571, "step": 4670 }, { "epoch": 0.8630920072524858, "grad_norm": 0.07183745503425598, "learning_rate": 1.6717159915780118e-05, "loss": 0.5258473753929138, "step": 4671 }, { "epoch": 0.8632767839613816, "grad_norm": 0.07959087193012238, "learning_rate": 1.6715681931370376e-05, "loss": 0.6688862442970276, "step": 4672 }, { "epoch": 0.8634615606702775, "grad_norm": 0.05958564579486847, "learning_rate": 1.6714203679697504e-05, "loss": 0.4352157711982727, "step": 4673 }, { "epoch": 0.8636463373791734, "grad_norm": 0.07748246192932129, "learning_rate": 1.671272516082033e-05, "loss": 0.6799011826515198, "step": 4674 }, { "epoch": 0.8638311140880692, "grad_norm": 0.09600286185741425, "learning_rate": 1.6711246374797696e-05, "loss": 0.7128432393074036, "step": 4675 }, { "epoch": 0.8640158907969651, "grad_norm": 0.06360357999801636, "learning_rate": 1.6709767321688453e-05, "loss": 0.614168643951416, "step": 4676 }, { "epoch": 0.8642006675058609, "grad_norm": 0.07200276106595993, "learning_rate": 1.6708288001551464e-05, "loss": 0.44147413969039917, "step": 4677 }, { "epoch": 0.8643854442147567, "grad_norm": 0.06741948425769806, "learning_rate": 1.67068084144456e-05, "loss": 0.4448484480381012, "step": 4678 }, { "epoch": 0.8645702209236525, "grad_norm": 0.061067428439855576, "learning_rate": 1.670532856042974e-05, "loss": 0.43858760595321655, "step": 4679 }, { "epoch": 0.8647549976325484, "grad_norm": 0.0807904303073883, "learning_rate": 1.6703848439562787e-05, "loss": 0.8016616106033325, "step": 4680 }, { "epoch": 0.8649397743414443, "grad_norm": 0.07437105476856232, "learning_rate": 1.6702368051903638e-05, "loss": 0.43584221601486206, "step": 4681 }, { "epoch": 0.8651245510503401, "grad_norm": 0.07812274992465973, "learning_rate": 1.6700887397511206e-05, "loss": 0.7267005443572998, "step": 4682 }, { "epoch": 0.865309327759236, "grad_norm": 0.08380448073148727, "learning_rate": 1.6699406476444426e-05, "loss": 0.7394942045211792, "step": 4683 }, { "epoch": 0.8654941044681318, "grad_norm": 0.06715326011180878, "learning_rate": 1.6697925288762226e-05, "loss": 0.5929086804389954, "step": 4684 }, { "epoch": 0.8656788811770276, "grad_norm": 0.06208386272192001, "learning_rate": 1.6696443834523554e-05, "loss": 0.4253726601600647, "step": 4685 }, { "epoch": 0.8658636578859235, "grad_norm": 0.07226450741291046, "learning_rate": 1.6694962113787365e-05, "loss": 0.6204879283905029, "step": 4686 }, { "epoch": 0.8660484345948193, "grad_norm": 0.05629701539874077, "learning_rate": 1.6693480126612636e-05, "loss": 0.4896463453769684, "step": 4687 }, { "epoch": 0.8662332113037151, "grad_norm": 0.06906317919492722, "learning_rate": 1.6691997873058333e-05, "loss": 0.5383235216140747, "step": 4688 }, { "epoch": 0.866417988012611, "grad_norm": 0.06155070289969444, "learning_rate": 1.6690515353183455e-05, "loss": 0.5179824233055115, "step": 4689 }, { "epoch": 0.8666027647215069, "grad_norm": 0.07300128042697906, "learning_rate": 1.6689032567046996e-05, "loss": 0.586868166923523, "step": 4690 }, { "epoch": 0.8667875414304027, "grad_norm": 0.07631676644086838, "learning_rate": 1.668754951470797e-05, "loss": 0.5804420113563538, "step": 4691 }, { "epoch": 0.8669723181392985, "grad_norm": 0.05737726390361786, "learning_rate": 1.6686066196225394e-05, "loss": 0.5011819005012512, "step": 4692 }, { "epoch": 0.8671570948481944, "grad_norm": 0.08360470086336136, "learning_rate": 1.66845826116583e-05, "loss": 0.6318942308425903, "step": 4693 }, { "epoch": 0.8673418715570902, "grad_norm": 0.060124389827251434, "learning_rate": 1.6683098761065734e-05, "loss": 0.5247859358787537, "step": 4694 }, { "epoch": 0.867526648265986, "grad_norm": 0.0769641250371933, "learning_rate": 1.6681614644506747e-05, "loss": 0.5290091037750244, "step": 4695 }, { "epoch": 0.867711424974882, "grad_norm": 0.0742589458823204, "learning_rate": 1.6680130262040398e-05, "loss": 0.7358421087265015, "step": 4696 }, { "epoch": 0.8678962016837778, "grad_norm": 0.0717698410153389, "learning_rate": 1.667864561372577e-05, "loss": 0.5507757663726807, "step": 4697 }, { "epoch": 0.8680809783926736, "grad_norm": 0.060020409524440765, "learning_rate": 1.6677160699621934e-05, "loss": 0.5884896516799927, "step": 4698 }, { "epoch": 0.8682657551015694, "grad_norm": 0.08041785657405853, "learning_rate": 1.6675675519787997e-05, "loss": 0.6492130756378174, "step": 4699 }, { "epoch": 0.8684505318104653, "grad_norm": 0.07929142564535141, "learning_rate": 1.6674190074283058e-05, "loss": 0.638306736946106, "step": 4700 }, { "epoch": 0.8686353085193611, "grad_norm": 0.06563692539930344, "learning_rate": 1.6672704363166238e-05, "loss": 0.5677976608276367, "step": 4701 }, { "epoch": 0.8688200852282569, "grad_norm": 0.07268399000167847, "learning_rate": 1.6671218386496655e-05, "loss": 0.6142613887786865, "step": 4702 }, { "epoch": 0.8690048619371529, "grad_norm": 0.059030793607234955, "learning_rate": 1.6669732144333454e-05, "loss": 0.49718761444091797, "step": 4703 }, { "epoch": 0.8691896386460487, "grad_norm": 0.0930427759885788, "learning_rate": 1.6668245636735782e-05, "loss": 0.765467643737793, "step": 4704 }, { "epoch": 0.8693744153549445, "grad_norm": 0.07027864456176758, "learning_rate": 1.6666758863762796e-05, "loss": 0.468212366104126, "step": 4705 }, { "epoch": 0.8695591920638404, "grad_norm": 0.07914797216653824, "learning_rate": 1.6665271825473663e-05, "loss": 0.5796051025390625, "step": 4706 }, { "epoch": 0.8697439687727362, "grad_norm": 0.07291863113641739, "learning_rate": 1.6663784521927568e-05, "loss": 0.5344331860542297, "step": 4707 }, { "epoch": 0.869928745481632, "grad_norm": 0.05999236926436424, "learning_rate": 1.6662296953183693e-05, "loss": 0.45997464656829834, "step": 4708 }, { "epoch": 0.8701135221905278, "grad_norm": 0.0643937885761261, "learning_rate": 1.6660809119301246e-05, "loss": 0.5652839541435242, "step": 4709 }, { "epoch": 0.8702982988994237, "grad_norm": 0.06123213842511177, "learning_rate": 1.665932102033943e-05, "loss": 0.5423359274864197, "step": 4710 }, { "epoch": 0.8704830756083196, "grad_norm": 0.06579851359128952, "learning_rate": 1.6657832656357475e-05, "loss": 0.565425455570221, "step": 4711 }, { "epoch": 0.8706678523172154, "grad_norm": 0.07380993664264679, "learning_rate": 1.665634402741461e-05, "loss": 0.5821062922477722, "step": 4712 }, { "epoch": 0.8708526290261113, "grad_norm": 0.07134454697370529, "learning_rate": 1.665485513357008e-05, "loss": 0.5998745560646057, "step": 4713 }, { "epoch": 0.8710374057350071, "grad_norm": 0.05814999341964722, "learning_rate": 1.6653365974883134e-05, "loss": 0.4527260363101959, "step": 4714 }, { "epoch": 0.8712221824439029, "grad_norm": 0.09124371409416199, "learning_rate": 1.6651876551413038e-05, "loss": 0.9598178863525391, "step": 4715 }, { "epoch": 0.8714069591527988, "grad_norm": 0.05583808571100235, "learning_rate": 1.6650386863219067e-05, "loss": 0.44631731510162354, "step": 4716 }, { "epoch": 0.8715917358616946, "grad_norm": 0.06759850680828094, "learning_rate": 1.6648896910360503e-05, "loss": 0.5839464664459229, "step": 4717 }, { "epoch": 0.8717765125705905, "grad_norm": 0.057879723608493805, "learning_rate": 1.6647406692896646e-05, "loss": 0.5076382756233215, "step": 4718 }, { "epoch": 0.8719612892794864, "grad_norm": 0.07822850346565247, "learning_rate": 1.66459162108868e-05, "loss": 0.5981899499893188, "step": 4719 }, { "epoch": 0.8721460659883822, "grad_norm": 0.05845734477043152, "learning_rate": 1.6644425464390277e-05, "loss": 0.473381370306015, "step": 4720 }, { "epoch": 0.872330842697278, "grad_norm": 0.06890823692083359, "learning_rate": 1.6642934453466413e-05, "loss": 0.5807965993881226, "step": 4721 }, { "epoch": 0.8725156194061738, "grad_norm": 0.07697651535272598, "learning_rate": 1.6641443178174536e-05, "loss": 0.6806358695030212, "step": 4722 }, { "epoch": 0.8727003961150697, "grad_norm": 0.0753675252199173, "learning_rate": 1.6639951638574005e-05, "loss": 0.5100016593933105, "step": 4723 }, { "epoch": 0.8728851728239655, "grad_norm": 0.06294082850217819, "learning_rate": 1.663845983472417e-05, "loss": 0.5712581872940063, "step": 4724 }, { "epoch": 0.8730699495328614, "grad_norm": 0.057558026164770126, "learning_rate": 1.66369677666844e-05, "loss": 0.4887886643409729, "step": 4725 }, { "epoch": 0.8732547262417573, "grad_norm": 0.05099833011627197, "learning_rate": 1.663547543451408e-05, "loss": 0.49933555722236633, "step": 4726 }, { "epoch": 0.8734395029506531, "grad_norm": 0.09851501882076263, "learning_rate": 1.6633982838272598e-05, "loss": 0.7819404006004333, "step": 4727 }, { "epoch": 0.8736242796595489, "grad_norm": 0.05875126272439957, "learning_rate": 1.6632489978019352e-05, "loss": 0.39106565713882446, "step": 4728 }, { "epoch": 0.8738090563684447, "grad_norm": 0.07598012685775757, "learning_rate": 1.6630996853813757e-05, "loss": 0.6143175959587097, "step": 4729 }, { "epoch": 0.8739938330773406, "grad_norm": 0.05999317392706871, "learning_rate": 1.662950346571523e-05, "loss": 0.5516825914382935, "step": 4730 }, { "epoch": 0.8741786097862364, "grad_norm": 0.08445960283279419, "learning_rate": 1.6628009813783213e-05, "loss": 0.6581443548202515, "step": 4731 }, { "epoch": 0.8743633864951322, "grad_norm": 0.07585146278142929, "learning_rate": 1.6626515898077137e-05, "loss": 0.6074404716491699, "step": 4732 }, { "epoch": 0.8745481632040282, "grad_norm": 0.06283219903707504, "learning_rate": 1.662502171865646e-05, "loss": 0.6039258241653442, "step": 4733 }, { "epoch": 0.874732939912924, "grad_norm": 0.05872878059744835, "learning_rate": 1.6623527275580643e-05, "loss": 0.4644799530506134, "step": 4734 }, { "epoch": 0.8749177166218198, "grad_norm": 0.055183276534080505, "learning_rate": 1.662203256890917e-05, "loss": 0.512610137462616, "step": 4735 }, { "epoch": 0.8751024933307157, "grad_norm": 0.06261883676052094, "learning_rate": 1.662053759870151e-05, "loss": 0.580086350440979, "step": 4736 }, { "epoch": 0.8752872700396115, "grad_norm": 0.07283858954906464, "learning_rate": 1.6619042365017173e-05, "loss": 0.6381665468215942, "step": 4737 }, { "epoch": 0.8754720467485073, "grad_norm": 0.07218164205551147, "learning_rate": 1.6617546867915654e-05, "loss": 0.5770898461341858, "step": 4738 }, { "epoch": 0.8756568234574031, "grad_norm": 0.085800901055336, "learning_rate": 1.6616051107456478e-05, "loss": 0.7564215064048767, "step": 4739 }, { "epoch": 0.8758416001662991, "grad_norm": 0.0664244145154953, "learning_rate": 1.6614555083699163e-05, "loss": 0.593537449836731, "step": 4740 }, { "epoch": 0.8760263768751949, "grad_norm": 0.08380355685949326, "learning_rate": 1.661305879670325e-05, "loss": 0.7730197310447693, "step": 4741 }, { "epoch": 0.8762111535840907, "grad_norm": 0.06761428713798523, "learning_rate": 1.6611562246528287e-05, "loss": 0.5476041436195374, "step": 4742 }, { "epoch": 0.8763959302929866, "grad_norm": 0.09186141937971115, "learning_rate": 1.6610065433233832e-05, "loss": 0.7884858250617981, "step": 4743 }, { "epoch": 0.8765807070018824, "grad_norm": 0.060746416449546814, "learning_rate": 1.6608568356879454e-05, "loss": 0.44659048318862915, "step": 4744 }, { "epoch": 0.8767654837107782, "grad_norm": 0.07710873335599899, "learning_rate": 1.660707101752473e-05, "loss": 0.7485287189483643, "step": 4745 }, { "epoch": 0.876950260419674, "grad_norm": 0.07253643125295639, "learning_rate": 1.660557341522925e-05, "loss": 0.5462217330932617, "step": 4746 }, { "epoch": 0.87713503712857, "grad_norm": 0.07171177864074707, "learning_rate": 1.6604075550052616e-05, "loss": 0.6452522873878479, "step": 4747 }, { "epoch": 0.8773198138374658, "grad_norm": 0.08357395231723785, "learning_rate": 1.6602577422054433e-05, "loss": 0.7898638844490051, "step": 4748 }, { "epoch": 0.8775045905463617, "grad_norm": 0.07633797824382782, "learning_rate": 1.660107903129433e-05, "loss": 0.5493875741958618, "step": 4749 }, { "epoch": 0.8776893672552575, "grad_norm": 0.07141376286745071, "learning_rate": 1.659958037783193e-05, "loss": 0.6093866229057312, "step": 4750 }, { "epoch": 0.8778741439641533, "grad_norm": 0.09596671909093857, "learning_rate": 1.659808146172688e-05, "loss": 0.8044968843460083, "step": 4751 }, { "epoch": 0.8780589206730491, "grad_norm": 0.06962507218122482, "learning_rate": 1.6596582283038828e-05, "loss": 0.4918615221977234, "step": 4752 }, { "epoch": 0.878243697381945, "grad_norm": 0.07077303528785706, "learning_rate": 1.6595082841827442e-05, "loss": 0.5758873224258423, "step": 4753 }, { "epoch": 0.8784284740908408, "grad_norm": 0.063609778881073, "learning_rate": 1.659358313815239e-05, "loss": 0.6233231425285339, "step": 4754 }, { "epoch": 0.8786132507997367, "grad_norm": 0.07234170287847519, "learning_rate": 1.659208317207336e-05, "loss": 0.6821828484535217, "step": 4755 }, { "epoch": 0.8787980275086326, "grad_norm": 0.08028619736433029, "learning_rate": 1.6590582943650046e-05, "loss": 0.840358555316925, "step": 4756 }, { "epoch": 0.8789828042175284, "grad_norm": 0.05547550693154335, "learning_rate": 1.658908245294215e-05, "loss": 0.4089423418045044, "step": 4757 }, { "epoch": 0.8791675809264242, "grad_norm": 0.08615767955780029, "learning_rate": 1.658758170000938e-05, "loss": 0.6610065698623657, "step": 4758 }, { "epoch": 0.87935235763532, "grad_norm": 0.05322204530239105, "learning_rate": 1.658608068491147e-05, "loss": 0.39673668146133423, "step": 4759 }, { "epoch": 0.8795371343442159, "grad_norm": 0.07618802785873413, "learning_rate": 1.658457940770816e-05, "loss": 0.5760771632194519, "step": 4760 }, { "epoch": 0.8797219110531117, "grad_norm": 0.06550726294517517, "learning_rate": 1.6583077868459185e-05, "loss": 0.518328845500946, "step": 4761 }, { "epoch": 0.8799066877620076, "grad_norm": 0.06240643188357353, "learning_rate": 1.658157606722431e-05, "loss": 0.43588632345199585, "step": 4762 }, { "epoch": 0.8800914644709035, "grad_norm": 0.07559328526258469, "learning_rate": 1.6580074004063295e-05, "loss": 0.6650227308273315, "step": 4763 }, { "epoch": 0.8802762411797993, "grad_norm": 0.08168413490056992, "learning_rate": 1.6578571679035924e-05, "loss": 0.6146327257156372, "step": 4764 }, { "epoch": 0.8804610178886951, "grad_norm": 0.05272490531206131, "learning_rate": 1.6577069092201982e-05, "loss": 0.4760764241218567, "step": 4765 }, { "epoch": 0.880645794597591, "grad_norm": 0.07334236055612564, "learning_rate": 1.6575566243621263e-05, "loss": 0.6713932156562805, "step": 4766 }, { "epoch": 0.8808305713064868, "grad_norm": 0.08298858255147934, "learning_rate": 1.657406313335358e-05, "loss": 0.6796658635139465, "step": 4767 }, { "epoch": 0.8810153480153826, "grad_norm": 0.09175878763198853, "learning_rate": 1.657255976145876e-05, "loss": 0.6366848349571228, "step": 4768 }, { "epoch": 0.8812001247242786, "grad_norm": 0.0788515955209732, "learning_rate": 1.657105612799662e-05, "loss": 0.6692134737968445, "step": 4769 }, { "epoch": 0.8813849014331744, "grad_norm": 0.04891791194677353, "learning_rate": 1.6569552233027e-05, "loss": 0.3151491582393646, "step": 4770 }, { "epoch": 0.8815696781420702, "grad_norm": 0.06327511370182037, "learning_rate": 1.656804807660976e-05, "loss": 0.6637892127037048, "step": 4771 }, { "epoch": 0.881754454850966, "grad_norm": 0.08450198918581009, "learning_rate": 1.6566543658804753e-05, "loss": 0.7439243197441101, "step": 4772 }, { "epoch": 0.8819392315598619, "grad_norm": 0.08025246858596802, "learning_rate": 1.656503897967185e-05, "loss": 0.6441263556480408, "step": 4773 }, { "epoch": 0.8821240082687577, "grad_norm": 0.07658755034208298, "learning_rate": 1.656353403927094e-05, "loss": 0.6907283067703247, "step": 4774 }, { "epoch": 0.8823087849776535, "grad_norm": 0.07724691182374954, "learning_rate": 1.6562028837661905e-05, "loss": 0.6994632482528687, "step": 4775 }, { "epoch": 0.8824935616865495, "grad_norm": 0.07235722243785858, "learning_rate": 1.6560523374904658e-05, "loss": 0.5693969130516052, "step": 4776 }, { "epoch": 0.8826783383954453, "grad_norm": 0.07386350631713867, "learning_rate": 1.65590176510591e-05, "loss": 0.5384517312049866, "step": 4777 }, { "epoch": 0.8828631151043411, "grad_norm": 0.06712142378091812, "learning_rate": 1.6557511666185164e-05, "loss": 0.5699175000190735, "step": 4778 }, { "epoch": 0.883047891813237, "grad_norm": 0.07460469007492065, "learning_rate": 1.655600542034278e-05, "loss": 0.523343563079834, "step": 4779 }, { "epoch": 0.8832326685221328, "grad_norm": 0.06637348234653473, "learning_rate": 1.6554498913591888e-05, "loss": 0.4786106050014496, "step": 4780 }, { "epoch": 0.8834174452310286, "grad_norm": 0.06271538138389587, "learning_rate": 1.6552992145992444e-05, "loss": 0.492175817489624, "step": 4781 }, { "epoch": 0.8836022219399244, "grad_norm": 0.07379522174596786, "learning_rate": 1.655148511760442e-05, "loss": 0.5145571231842041, "step": 4782 }, { "epoch": 0.8837869986488203, "grad_norm": 0.07079476863145828, "learning_rate": 1.6549977828487784e-05, "loss": 0.5578653216362, "step": 4783 }, { "epoch": 0.8839717753577162, "grad_norm": 0.05883871391415596, "learning_rate": 1.654847027870252e-05, "loss": 0.36676284670829773, "step": 4784 }, { "epoch": 0.884156552066612, "grad_norm": 0.07372432202100754, "learning_rate": 1.6546962468308628e-05, "loss": 0.6349548101425171, "step": 4785 }, { "epoch": 0.8843413287755079, "grad_norm": 0.08021949231624603, "learning_rate": 1.6545454397366114e-05, "loss": 0.5997925996780396, "step": 4786 }, { "epoch": 0.8845261054844037, "grad_norm": 0.08539916574954987, "learning_rate": 1.654394606593499e-05, "loss": 0.7283767461776733, "step": 4787 }, { "epoch": 0.8847108821932995, "grad_norm": 0.06747384369373322, "learning_rate": 1.6542437474075288e-05, "loss": 0.5858517289161682, "step": 4788 }, { "epoch": 0.8848956589021953, "grad_norm": 0.08050844818353653, "learning_rate": 1.6540928621847042e-05, "loss": 0.6546807885169983, "step": 4789 }, { "epoch": 0.8850804356110912, "grad_norm": 0.05787456035614014, "learning_rate": 1.65394195093103e-05, "loss": 0.4163222908973694, "step": 4790 }, { "epoch": 0.8852652123199871, "grad_norm": 0.05363563075661659, "learning_rate": 1.6537910136525123e-05, "loss": 0.4085903465747833, "step": 4791 }, { "epoch": 0.8854499890288829, "grad_norm": 0.08327876031398773, "learning_rate": 1.6536400503551576e-05, "loss": 0.5789997577667236, "step": 4792 }, { "epoch": 0.8856347657377788, "grad_norm": 0.06537973135709763, "learning_rate": 1.653489061044974e-05, "loss": 0.6246554851531982, "step": 4793 }, { "epoch": 0.8858195424466746, "grad_norm": 0.062478817999362946, "learning_rate": 1.6533380457279704e-05, "loss": 0.4444185793399811, "step": 4794 }, { "epoch": 0.8860043191555704, "grad_norm": 0.06802353262901306, "learning_rate": 1.6531870044101565e-05, "loss": 0.6060287952423096, "step": 4795 }, { "epoch": 0.8861890958644663, "grad_norm": 0.06890678405761719, "learning_rate": 1.653035937097543e-05, "loss": 0.48558810353279114, "step": 4796 }, { "epoch": 0.8863738725733621, "grad_norm": 0.04655135050415993, "learning_rate": 1.6528848437961426e-05, "loss": 0.38830509781837463, "step": 4797 }, { "epoch": 0.886558649282258, "grad_norm": 0.06424148380756378, "learning_rate": 1.6527337245119678e-05, "loss": 0.4444373548030853, "step": 4798 }, { "epoch": 0.8867434259911539, "grad_norm": 0.07430419325828552, "learning_rate": 1.6525825792510333e-05, "loss": 0.7238464951515198, "step": 4799 }, { "epoch": 0.8869282027000497, "grad_norm": 0.0749935656785965, "learning_rate": 1.6524314080193534e-05, "loss": 0.7441866993904114, "step": 4800 }, { "epoch": 0.8871129794089455, "grad_norm": 0.08086378127336502, "learning_rate": 1.652280210822945e-05, "loss": 0.7391872406005859, "step": 4801 }, { "epoch": 0.8872977561178413, "grad_norm": 0.07954630255699158, "learning_rate": 1.6521289876678247e-05, "loss": 0.6579791903495789, "step": 4802 }, { "epoch": 0.8874825328267372, "grad_norm": 0.07959762960672379, "learning_rate": 1.6519777385600112e-05, "loss": 0.7004535794258118, "step": 4803 }, { "epoch": 0.887667309535633, "grad_norm": 0.05923865735530853, "learning_rate": 1.6518264635055233e-05, "loss": 0.3998711109161377, "step": 4804 }, { "epoch": 0.8878520862445288, "grad_norm": 0.08902712911367416, "learning_rate": 1.6516751625103817e-05, "loss": 0.6480748057365417, "step": 4805 }, { "epoch": 0.8880368629534248, "grad_norm": 0.06671420484781265, "learning_rate": 1.651523835580607e-05, "loss": 0.6713802218437195, "step": 4806 }, { "epoch": 0.8882216396623206, "grad_norm": 0.0831875428557396, "learning_rate": 1.6513724827222225e-05, "loss": 0.6743994355201721, "step": 4807 }, { "epoch": 0.8884064163712164, "grad_norm": 0.07316502928733826, "learning_rate": 1.651221103941251e-05, "loss": 0.6043171882629395, "step": 4808 }, { "epoch": 0.8885911930801123, "grad_norm": 0.05832867696881294, "learning_rate": 1.6510696992437164e-05, "loss": 0.5134963989257812, "step": 4809 }, { "epoch": 0.8887759697890081, "grad_norm": 0.07833995670080185, "learning_rate": 1.6509182686356454e-05, "loss": 0.5890991687774658, "step": 4810 }, { "epoch": 0.8889607464979039, "grad_norm": 0.07378154247999191, "learning_rate": 1.6507668121230632e-05, "loss": 0.40639033913612366, "step": 4811 }, { "epoch": 0.8891455232067997, "grad_norm": 0.06623880565166473, "learning_rate": 1.6506153297119984e-05, "loss": 0.5028772354125977, "step": 4812 }, { "epoch": 0.8893302999156957, "grad_norm": 0.07434017211198807, "learning_rate": 1.6504638214084784e-05, "loss": 0.7079054117202759, "step": 4813 }, { "epoch": 0.8895150766245915, "grad_norm": 0.07623261958360672, "learning_rate": 1.6503122872185338e-05, "loss": 0.5926937460899353, "step": 4814 }, { "epoch": 0.8896998533334873, "grad_norm": 0.06846163421869278, "learning_rate": 1.6501607271481944e-05, "loss": 0.5976287126541138, "step": 4815 }, { "epoch": 0.8898846300423832, "grad_norm": 0.0537530779838562, "learning_rate": 1.6500091412034925e-05, "loss": 0.40996718406677246, "step": 4816 }, { "epoch": 0.890069406751279, "grad_norm": 0.07969720661640167, "learning_rate": 1.6498575293904603e-05, "loss": 0.6000237464904785, "step": 4817 }, { "epoch": 0.8902541834601748, "grad_norm": 0.07043500989675522, "learning_rate": 1.6497058917151314e-05, "loss": 0.5773909091949463, "step": 4818 }, { "epoch": 0.8904389601690706, "grad_norm": 0.07048541307449341, "learning_rate": 1.6495542281835407e-05, "loss": 0.705425500869751, "step": 4819 }, { "epoch": 0.8906237368779666, "grad_norm": 0.06701841205358505, "learning_rate": 1.649402538801724e-05, "loss": 0.722366213798523, "step": 4820 }, { "epoch": 0.8908085135868624, "grad_norm": 0.08606120198965073, "learning_rate": 1.6492508235757184e-05, "loss": 0.7456164956092834, "step": 4821 }, { "epoch": 0.8909932902957582, "grad_norm": 0.05852803960442543, "learning_rate": 1.649099082511561e-05, "loss": 0.4330172836780548, "step": 4822 }, { "epoch": 0.8911780670046541, "grad_norm": 0.06518279016017914, "learning_rate": 1.6489473156152904e-05, "loss": 0.5213345885276794, "step": 4823 }, { "epoch": 0.8913628437135499, "grad_norm": 0.0874481275677681, "learning_rate": 1.6487955228929474e-05, "loss": 0.7169750332832336, "step": 4824 }, { "epoch": 0.8915476204224457, "grad_norm": 0.060117240995168686, "learning_rate": 1.648643704350572e-05, "loss": 0.5598580241203308, "step": 4825 }, { "epoch": 0.8917323971313416, "grad_norm": 0.06978029757738113, "learning_rate": 1.648491859994207e-05, "loss": 0.648662805557251, "step": 4826 }, { "epoch": 0.8919171738402374, "grad_norm": 0.07007380574941635, "learning_rate": 1.6483399898298945e-05, "loss": 0.7349458336830139, "step": 4827 }, { "epoch": 0.8921019505491333, "grad_norm": 0.06880953907966614, "learning_rate": 1.648188093863679e-05, "loss": 0.6454513072967529, "step": 4828 }, { "epoch": 0.8922867272580292, "grad_norm": 0.05998198315501213, "learning_rate": 1.6480361721016053e-05, "loss": 0.3911832571029663, "step": 4829 }, { "epoch": 0.892471503966925, "grad_norm": 0.08584605157375336, "learning_rate": 1.6478842245497193e-05, "loss": 0.6219820380210876, "step": 4830 }, { "epoch": 0.8926562806758208, "grad_norm": 0.06927721202373505, "learning_rate": 1.6477322512140683e-05, "loss": 0.5584146976470947, "step": 4831 }, { "epoch": 0.8928410573847166, "grad_norm": 0.06739287823438644, "learning_rate": 1.6475802521007e-05, "loss": 0.6995735764503479, "step": 4832 }, { "epoch": 0.8930258340936125, "grad_norm": 0.08070404082536697, "learning_rate": 1.647428227215664e-05, "loss": 0.6666145324707031, "step": 4833 }, { "epoch": 0.8932106108025083, "grad_norm": 0.05215371027588844, "learning_rate": 1.6472761765650095e-05, "loss": 0.40434175729751587, "step": 4834 }, { "epoch": 0.8933953875114042, "grad_norm": 0.085506372153759, "learning_rate": 1.6471241001547886e-05, "loss": 0.7858412265777588, "step": 4835 }, { "epoch": 0.8935801642203001, "grad_norm": 0.08535362035036087, "learning_rate": 1.6469719979910534e-05, "loss": 0.5532266497612, "step": 4836 }, { "epoch": 0.8937649409291959, "grad_norm": 0.07354865223169327, "learning_rate": 1.646819870079856e-05, "loss": 0.6461263298988342, "step": 4837 }, { "epoch": 0.8939497176380917, "grad_norm": 0.06490924954414368, "learning_rate": 1.6466677164272523e-05, "loss": 0.48238641023635864, "step": 4838 }, { "epoch": 0.8941344943469876, "grad_norm": 0.0884164422750473, "learning_rate": 1.646515537039296e-05, "loss": 0.7125632166862488, "step": 4839 }, { "epoch": 0.8943192710558834, "grad_norm": 0.06169293075799942, "learning_rate": 1.6463633319220443e-05, "loss": 0.39989128708839417, "step": 4840 }, { "epoch": 0.8945040477647792, "grad_norm": 0.08390737324953079, "learning_rate": 1.6462111010815543e-05, "loss": 0.6275712251663208, "step": 4841 }, { "epoch": 0.8946888244736751, "grad_norm": 0.05772528424859047, "learning_rate": 1.646058844523884e-05, "loss": 0.4604227542877197, "step": 4842 }, { "epoch": 0.894873601182571, "grad_norm": 0.047833140939474106, "learning_rate": 1.6459065622550928e-05, "loss": 0.36416640877723694, "step": 4843 }, { "epoch": 0.8950583778914668, "grad_norm": 0.07665587961673737, "learning_rate": 1.6457542542812415e-05, "loss": 0.5700147747993469, "step": 4844 }, { "epoch": 0.8952431546003626, "grad_norm": 0.06834596395492554, "learning_rate": 1.645601920608391e-05, "loss": 0.5849366188049316, "step": 4845 }, { "epoch": 0.8954279313092585, "grad_norm": 0.05772769823670387, "learning_rate": 1.6454495612426044e-05, "loss": 0.481160968542099, "step": 4846 }, { "epoch": 0.8956127080181543, "grad_norm": 0.08508264273405075, "learning_rate": 1.6452971761899438e-05, "loss": 0.6891061067581177, "step": 4847 }, { "epoch": 0.8957974847270501, "grad_norm": 0.07678437978029251, "learning_rate": 1.645144765456475e-05, "loss": 0.6270875930786133, "step": 4848 }, { "epoch": 0.895982261435946, "grad_norm": 0.07674133032560349, "learning_rate": 1.6449923290482627e-05, "loss": 0.5793139338493347, "step": 4849 }, { "epoch": 0.8961670381448419, "grad_norm": 0.06758452206850052, "learning_rate": 1.644839866971374e-05, "loss": 0.5169587135314941, "step": 4850 }, { "epoch": 0.8963518148537377, "grad_norm": 0.08784767240285873, "learning_rate": 1.6446873792318755e-05, "loss": 0.6400648951530457, "step": 4851 }, { "epoch": 0.8965365915626335, "grad_norm": 0.0625050738453865, "learning_rate": 1.6445348658358365e-05, "loss": 0.539431095123291, "step": 4852 }, { "epoch": 0.8967213682715294, "grad_norm": 0.07906383275985718, "learning_rate": 1.6443823267893265e-05, "loss": 0.5679528713226318, "step": 4853 }, { "epoch": 0.8969061449804252, "grad_norm": 0.07062255591154099, "learning_rate": 1.644229762098416e-05, "loss": 0.5080562829971313, "step": 4854 }, { "epoch": 0.897090921689321, "grad_norm": 0.09191711992025375, "learning_rate": 1.6440771717691762e-05, "loss": 0.8223711848258972, "step": 4855 }, { "epoch": 0.8972756983982169, "grad_norm": 0.09057801216840744, "learning_rate": 1.64392455580768e-05, "loss": 0.7925399541854858, "step": 4856 }, { "epoch": 0.8974604751071128, "grad_norm": 0.0694667398929596, "learning_rate": 1.6437719142200012e-05, "loss": 0.46083754301071167, "step": 4857 }, { "epoch": 0.8976452518160086, "grad_norm": 0.057980529963970184, "learning_rate": 1.6436192470122142e-05, "loss": 0.5017035603523254, "step": 4858 }, { "epoch": 0.8978300285249045, "grad_norm": 0.07309112697839737, "learning_rate": 1.643466554190395e-05, "loss": 0.6440054178237915, "step": 4859 }, { "epoch": 0.8980148052338003, "grad_norm": 0.09001088887453079, "learning_rate": 1.6433138357606198e-05, "loss": 0.7718321681022644, "step": 4860 }, { "epoch": 0.8981995819426961, "grad_norm": 0.0937967598438263, "learning_rate": 1.643161091728967e-05, "loss": 0.7171733379364014, "step": 4861 }, { "epoch": 0.8983843586515919, "grad_norm": 0.09350735694169998, "learning_rate": 1.6430083221015145e-05, "loss": 0.7647481560707092, "step": 4862 }, { "epoch": 0.8985691353604878, "grad_norm": 0.0811251699924469, "learning_rate": 1.642855526884343e-05, "loss": 0.7368043065071106, "step": 4863 }, { "epoch": 0.8987539120693837, "grad_norm": 0.056616462767124176, "learning_rate": 1.6427027060835323e-05, "loss": 0.46778184175491333, "step": 4864 }, { "epoch": 0.8989386887782795, "grad_norm": 0.08717775344848633, "learning_rate": 1.642549859705165e-05, "loss": 0.8994236588478088, "step": 4865 }, { "epoch": 0.8991234654871754, "grad_norm": 0.07994919270277023, "learning_rate": 1.642396987755323e-05, "loss": 0.5550265908241272, "step": 4866 }, { "epoch": 0.8993082421960712, "grad_norm": 0.08052093535661697, "learning_rate": 1.6422440902400913e-05, "loss": 0.7112287878990173, "step": 4867 }, { "epoch": 0.899493018904967, "grad_norm": 0.06995108723640442, "learning_rate": 1.6420911671655542e-05, "loss": 0.6041479110717773, "step": 4868 }, { "epoch": 0.8996777956138629, "grad_norm": 0.07692621648311615, "learning_rate": 1.641938218537797e-05, "loss": 0.6449621915817261, "step": 4869 }, { "epoch": 0.8998625723227587, "grad_norm": 0.05623577535152435, "learning_rate": 1.6417852443629074e-05, "loss": 0.4509305953979492, "step": 4870 }, { "epoch": 0.9000473490316545, "grad_norm": 0.06790454685688019, "learning_rate": 1.641632244646973e-05, "loss": 0.6181674003601074, "step": 4871 }, { "epoch": 0.9002321257405504, "grad_norm": 0.06258635967969894, "learning_rate": 1.6414792193960823e-05, "loss": 0.50604248046875, "step": 4872 }, { "epoch": 0.9004169024494463, "grad_norm": 0.0780041292309761, "learning_rate": 1.6413261686163258e-05, "loss": 0.6751472353935242, "step": 4873 }, { "epoch": 0.9006016791583421, "grad_norm": 0.08517557382583618, "learning_rate": 1.6411730923137942e-05, "loss": 0.656969428062439, "step": 4874 }, { "epoch": 0.9007864558672379, "grad_norm": 0.07794827222824097, "learning_rate": 1.6410199904945798e-05, "loss": 0.6910105347633362, "step": 4875 }, { "epoch": 0.9009712325761338, "grad_norm": 0.06862415373325348, "learning_rate": 1.640866863164775e-05, "loss": 0.5876045227050781, "step": 4876 }, { "epoch": 0.9011560092850296, "grad_norm": 0.06553607434034348, "learning_rate": 1.640713710330474e-05, "loss": 0.5301469564437866, "step": 4877 }, { "epoch": 0.9013407859939254, "grad_norm": 0.06396906822919846, "learning_rate": 1.640560531997772e-05, "loss": 0.43047258257865906, "step": 4878 }, { "epoch": 0.9015255627028214, "grad_norm": 0.06935402005910873, "learning_rate": 1.6404073281727648e-05, "loss": 0.6457479596138, "step": 4879 }, { "epoch": 0.9017103394117172, "grad_norm": 0.07602575421333313, "learning_rate": 1.6402540988615494e-05, "loss": 0.5612639784812927, "step": 4880 }, { "epoch": 0.901895116120613, "grad_norm": 0.06645981222391129, "learning_rate": 1.6401008440702243e-05, "loss": 0.575152575969696, "step": 4881 }, { "epoch": 0.9020798928295088, "grad_norm": 0.07158118486404419, "learning_rate": 1.639947563804888e-05, "loss": 0.39184918999671936, "step": 4882 }, { "epoch": 0.9022646695384047, "grad_norm": 0.07773970067501068, "learning_rate": 1.639794258071641e-05, "loss": 0.6216570138931274, "step": 4883 }, { "epoch": 0.9024494462473005, "grad_norm": 0.07514741271734238, "learning_rate": 1.6396409268765837e-05, "loss": 0.5021771788597107, "step": 4884 }, { "epoch": 0.9026342229561963, "grad_norm": 0.07661321014165878, "learning_rate": 1.639487570225819e-05, "loss": 0.6844589710235596, "step": 4885 }, { "epoch": 0.9028189996650923, "grad_norm": 0.0743803083896637, "learning_rate": 1.6393341881254498e-05, "loss": 0.655817449092865, "step": 4886 }, { "epoch": 0.9030037763739881, "grad_norm": 0.05988422408699989, "learning_rate": 1.63918078058158e-05, "loss": 0.565988302230835, "step": 4887 }, { "epoch": 0.9031885530828839, "grad_norm": 0.06683322787284851, "learning_rate": 1.6390273476003152e-05, "loss": 0.5435384511947632, "step": 4888 }, { "epoch": 0.9033733297917798, "grad_norm": 0.07705941051244736, "learning_rate": 1.6388738891877607e-05, "loss": 0.5629130005836487, "step": 4889 }, { "epoch": 0.9035581065006756, "grad_norm": 0.09960421919822693, "learning_rate": 1.6387204053500246e-05, "loss": 0.8571576476097107, "step": 4890 }, { "epoch": 0.9037428832095714, "grad_norm": 0.06615423411130905, "learning_rate": 1.6385668960932143e-05, "loss": 0.6547509431838989, "step": 4891 }, { "epoch": 0.9039276599184672, "grad_norm": 0.0818847268819809, "learning_rate": 1.63841336142344e-05, "loss": 0.724168062210083, "step": 4892 }, { "epoch": 0.9041124366273631, "grad_norm": 0.0793035700917244, "learning_rate": 1.6382598013468104e-05, "loss": 0.7631268501281738, "step": 4893 }, { "epoch": 0.904297213336259, "grad_norm": 0.06299310177564621, "learning_rate": 1.638106215869438e-05, "loss": 0.39549270272254944, "step": 4894 }, { "epoch": 0.9044819900451548, "grad_norm": 0.06919150054454803, "learning_rate": 1.6379526049974347e-05, "loss": 0.5981887578964233, "step": 4895 }, { "epoch": 0.9046667667540507, "grad_norm": 0.07601941376924515, "learning_rate": 1.6377989687369135e-05, "loss": 0.5894408822059631, "step": 4896 }, { "epoch": 0.9048515434629465, "grad_norm": 0.0833205133676529, "learning_rate": 1.637645307093989e-05, "loss": 0.6436570286750793, "step": 4897 }, { "epoch": 0.9050363201718423, "grad_norm": 0.06943897157907486, "learning_rate": 1.637491620074776e-05, "loss": 0.6639769673347473, "step": 4898 }, { "epoch": 0.9052210968807382, "grad_norm": 0.05080414563417435, "learning_rate": 1.637337907685391e-05, "loss": 0.3694339096546173, "step": 4899 }, { "epoch": 0.905405873589634, "grad_norm": 0.07731123268604279, "learning_rate": 1.637184169931951e-05, "loss": 0.6865700483322144, "step": 4900 }, { "epoch": 0.9055906502985299, "grad_norm": 0.07063229382038116, "learning_rate": 1.6370304068205748e-05, "loss": 0.7400882244110107, "step": 4901 }, { "epoch": 0.9057754270074257, "grad_norm": 0.06370572000741959, "learning_rate": 1.6368766183573814e-05, "loss": 0.500558614730835, "step": 4902 }, { "epoch": 0.9059602037163216, "grad_norm": 0.06759117543697357, "learning_rate": 1.636722804548491e-05, "loss": 0.5578844547271729, "step": 4903 }, { "epoch": 0.9061449804252174, "grad_norm": 0.07463599741458893, "learning_rate": 1.636568965400025e-05, "loss": 0.5760760307312012, "step": 4904 }, { "epoch": 0.9063297571341132, "grad_norm": 0.06752346456050873, "learning_rate": 1.636415100918106e-05, "loss": 0.6381351947784424, "step": 4905 }, { "epoch": 0.9065145338430091, "grad_norm": 0.06896194815635681, "learning_rate": 1.636261211108857e-05, "loss": 0.6848575472831726, "step": 4906 }, { "epoch": 0.9066993105519049, "grad_norm": 0.07377118617296219, "learning_rate": 1.636107295978402e-05, "loss": 0.599323570728302, "step": 4907 }, { "epoch": 0.9068840872608008, "grad_norm": 0.06920889765024185, "learning_rate": 1.635953355532867e-05, "loss": 0.5673415660858154, "step": 4908 }, { "epoch": 0.9070688639696967, "grad_norm": 0.06042078137397766, "learning_rate": 1.6357993897783783e-05, "loss": 0.43269699811935425, "step": 4909 }, { "epoch": 0.9072536406785925, "grad_norm": 0.07586175203323364, "learning_rate": 1.635645398721063e-05, "loss": 0.6240195631980896, "step": 4910 }, { "epoch": 0.9074384173874883, "grad_norm": 0.08614122122526169, "learning_rate": 1.635491382367049e-05, "loss": 0.5619620084762573, "step": 4911 }, { "epoch": 0.9076231940963841, "grad_norm": 0.06778106093406677, "learning_rate": 1.635337340722467e-05, "loss": 0.5462279319763184, "step": 4912 }, { "epoch": 0.90780797080528, "grad_norm": 0.07216772437095642, "learning_rate": 1.635183273793446e-05, "loss": 0.5313920378684998, "step": 4913 }, { "epoch": 0.9079927475141758, "grad_norm": 0.05886697396636009, "learning_rate": 1.635029181586118e-05, "loss": 0.46407654881477356, "step": 4914 }, { "epoch": 0.9081775242230716, "grad_norm": 0.06765791028738022, "learning_rate": 1.6348750641066154e-05, "loss": 0.6518081426620483, "step": 4915 }, { "epoch": 0.9083623009319676, "grad_norm": 0.06862279027700424, "learning_rate": 1.6347209213610718e-05, "loss": 0.5767778158187866, "step": 4916 }, { "epoch": 0.9085470776408634, "grad_norm": 0.058729130774736404, "learning_rate": 1.6345667533556206e-05, "loss": 0.4166557490825653, "step": 4917 }, { "epoch": 0.9087318543497592, "grad_norm": 0.061408836394548416, "learning_rate": 1.6344125600963984e-05, "loss": 0.4724981188774109, "step": 4918 }, { "epoch": 0.9089166310586551, "grad_norm": 0.06421653181314468, "learning_rate": 1.6342583415895412e-05, "loss": 0.4762861728668213, "step": 4919 }, { "epoch": 0.9091014077675509, "grad_norm": 0.08160565793514252, "learning_rate": 1.6341040978411865e-05, "loss": 0.8044451475143433, "step": 4920 }, { "epoch": 0.9092861844764467, "grad_norm": 0.07565393298864365, "learning_rate": 1.633949828857472e-05, "loss": 0.7070301175117493, "step": 4921 }, { "epoch": 0.9094709611853425, "grad_norm": 0.07401212304830551, "learning_rate": 1.633795534644538e-05, "loss": 0.7780914902687073, "step": 4922 }, { "epoch": 0.9096557378942385, "grad_norm": 0.0788111537694931, "learning_rate": 1.6336412152085248e-05, "loss": 0.7249211072921753, "step": 4923 }, { "epoch": 0.9098405146031343, "grad_norm": 0.04927157983183861, "learning_rate": 1.6334868705555732e-05, "loss": 0.35182616114616394, "step": 4924 }, { "epoch": 0.9100252913120301, "grad_norm": 0.07681036740541458, "learning_rate": 1.6333325006918267e-05, "loss": 0.7379781603813171, "step": 4925 }, { "epoch": 0.910210068020926, "grad_norm": 0.07329613715410233, "learning_rate": 1.6331781056234277e-05, "loss": 0.5301315188407898, "step": 4926 }, { "epoch": 0.9103948447298218, "grad_norm": 0.07360527664422989, "learning_rate": 1.633023685356521e-05, "loss": 0.5996437668800354, "step": 4927 }, { "epoch": 0.9105796214387176, "grad_norm": 0.06598999351263046, "learning_rate": 1.632869239897252e-05, "loss": 0.4719834327697754, "step": 4928 }, { "epoch": 0.9107643981476135, "grad_norm": 0.0765930563211441, "learning_rate": 1.6327147692517675e-05, "loss": 0.565828800201416, "step": 4929 }, { "epoch": 0.9109491748565094, "grad_norm": 0.07155296951532364, "learning_rate": 1.6325602734262146e-05, "loss": 0.7482982873916626, "step": 4930 }, { "epoch": 0.9111339515654052, "grad_norm": 0.06430218368768692, "learning_rate": 1.6324057524267418e-05, "loss": 0.4136260449886322, "step": 4931 }, { "epoch": 0.911318728274301, "grad_norm": 0.07828143984079361, "learning_rate": 1.6322512062594987e-05, "loss": 0.7757020592689514, "step": 4932 }, { "epoch": 0.9115035049831969, "grad_norm": 0.08137954771518707, "learning_rate": 1.6320966349306357e-05, "loss": 0.7074471116065979, "step": 4933 }, { "epoch": 0.9116882816920927, "grad_norm": 0.05411933362483978, "learning_rate": 1.631942038446304e-05, "loss": 0.39557674527168274, "step": 4934 }, { "epoch": 0.9118730584009885, "grad_norm": 0.08513291925191879, "learning_rate": 1.6317874168126567e-05, "loss": 0.5569822788238525, "step": 4935 }, { "epoch": 0.9120578351098844, "grad_norm": 0.08622615039348602, "learning_rate": 1.6316327700358464e-05, "loss": 0.5976544618606567, "step": 4936 }, { "epoch": 0.9122426118187802, "grad_norm": 0.0760151743888855, "learning_rate": 1.631478098122028e-05, "loss": 0.6827889084815979, "step": 4937 }, { "epoch": 0.9124273885276761, "grad_norm": 0.07461119443178177, "learning_rate": 1.6313234010773573e-05, "loss": 0.647698700428009, "step": 4938 }, { "epoch": 0.912612165236572, "grad_norm": 0.0759672299027443, "learning_rate": 1.63116867890799e-05, "loss": 0.5611090064048767, "step": 4939 }, { "epoch": 0.9127969419454678, "grad_norm": 0.08994041383266449, "learning_rate": 1.631013931620084e-05, "loss": 0.6958615779876709, "step": 4940 }, { "epoch": 0.9129817186543636, "grad_norm": 0.062235210090875626, "learning_rate": 1.6308591592197976e-05, "loss": 0.5414263010025024, "step": 4941 }, { "epoch": 0.9131664953632594, "grad_norm": 0.06273581087589264, "learning_rate": 1.6307043617132907e-05, "loss": 0.5574995279312134, "step": 4942 }, { "epoch": 0.9133512720721553, "grad_norm": 0.08604917675256729, "learning_rate": 1.6305495391067232e-05, "loss": 0.7597737312316895, "step": 4943 }, { "epoch": 0.9135360487810511, "grad_norm": 0.09719632565975189, "learning_rate": 1.630394691406257e-05, "loss": 0.9429509043693542, "step": 4944 }, { "epoch": 0.913720825489947, "grad_norm": 0.06911340355873108, "learning_rate": 1.6302398186180538e-05, "loss": 0.47666671872138977, "step": 4945 }, { "epoch": 0.9139056021988429, "grad_norm": 0.07012543827295303, "learning_rate": 1.6300849207482783e-05, "loss": 0.4357103705406189, "step": 4946 }, { "epoch": 0.9140903789077387, "grad_norm": 0.06667513400316238, "learning_rate": 1.629929997803094e-05, "loss": 0.44440895318984985, "step": 4947 }, { "epoch": 0.9142751556166345, "grad_norm": 0.07876349240541458, "learning_rate": 1.6297750497886663e-05, "loss": 0.6954589486122131, "step": 4948 }, { "epoch": 0.9144599323255304, "grad_norm": 0.08191389590501785, "learning_rate": 1.629620076711162e-05, "loss": 0.6731683611869812, "step": 4949 }, { "epoch": 0.9146447090344262, "grad_norm": 0.07889698445796967, "learning_rate": 1.629465078576749e-05, "loss": 0.6995881199836731, "step": 4950 }, { "epoch": 0.914829485743322, "grad_norm": 0.0822673887014389, "learning_rate": 1.6293100553915947e-05, "loss": 0.6512246131896973, "step": 4951 }, { "epoch": 0.915014262452218, "grad_norm": 0.06273180991411209, "learning_rate": 1.629155007161869e-05, "loss": 0.4994935989379883, "step": 4952 }, { "epoch": 0.9151990391611138, "grad_norm": 0.055114369839429855, "learning_rate": 1.6289999338937427e-05, "loss": 0.37916818261146545, "step": 4953 }, { "epoch": 0.9153838158700096, "grad_norm": 0.0721181109547615, "learning_rate": 1.628844835593387e-05, "loss": 0.6371509432792664, "step": 4954 }, { "epoch": 0.9155685925789054, "grad_norm": 0.06248628720641136, "learning_rate": 1.6286897122669737e-05, "loss": 0.502285897731781, "step": 4955 }, { "epoch": 0.9157533692878013, "grad_norm": 0.07796809077262878, "learning_rate": 1.628534563920677e-05, "loss": 0.7872824668884277, "step": 4956 }, { "epoch": 0.9159381459966971, "grad_norm": 0.06692524999380112, "learning_rate": 1.6283793905606715e-05, "loss": 0.7130103707313538, "step": 4957 }, { "epoch": 0.9161229227055929, "grad_norm": 0.07191796600818634, "learning_rate": 1.6282241921931317e-05, "loss": 0.708375096321106, "step": 4958 }, { "epoch": 0.9163076994144888, "grad_norm": 0.07857691496610641, "learning_rate": 1.6280689688242345e-05, "loss": 0.6777384281158447, "step": 4959 }, { "epoch": 0.9164924761233847, "grad_norm": 0.0750550627708435, "learning_rate": 1.6279137204601577e-05, "loss": 0.5284432768821716, "step": 4960 }, { "epoch": 0.9166772528322805, "grad_norm": 0.06321000307798386, "learning_rate": 1.627758447107079e-05, "loss": 0.4843653738498688, "step": 4961 }, { "epoch": 0.9168620295411763, "grad_norm": 0.07654401659965515, "learning_rate": 1.6276031487711782e-05, "loss": 0.6463335156440735, "step": 4962 }, { "epoch": 0.9170468062500722, "grad_norm": 0.074707992374897, "learning_rate": 1.627447825458636e-05, "loss": 0.6347569227218628, "step": 4963 }, { "epoch": 0.917231582958968, "grad_norm": 0.06458912789821625, "learning_rate": 1.627292477175633e-05, "loss": 0.5805029273033142, "step": 4964 }, { "epoch": 0.9174163596678638, "grad_norm": 0.07568664848804474, "learning_rate": 1.6271371039283517e-05, "loss": 0.6437733173370361, "step": 4965 }, { "epoch": 0.9176011363767597, "grad_norm": 0.05623985081911087, "learning_rate": 1.6269817057229762e-05, "loss": 0.41585254669189453, "step": 4966 }, { "epoch": 0.9177859130856556, "grad_norm": 0.08120466023683548, "learning_rate": 1.6268262825656903e-05, "loss": 0.6529135704040527, "step": 4967 }, { "epoch": 0.9179706897945514, "grad_norm": 0.08207594603300095, "learning_rate": 1.6266708344626793e-05, "loss": 0.7336923480033875, "step": 4968 }, { "epoch": 0.9181554665034473, "grad_norm": 0.09322047978639603, "learning_rate": 1.6265153614201296e-05, "loss": 0.6285800337791443, "step": 4969 }, { "epoch": 0.9183402432123431, "grad_norm": 0.08115655928850174, "learning_rate": 1.6263598634442286e-05, "loss": 0.7460980415344238, "step": 4970 }, { "epoch": 0.9185250199212389, "grad_norm": 0.09479428082704544, "learning_rate": 1.6262043405411648e-05, "loss": 0.784386396408081, "step": 4971 }, { "epoch": 0.9187097966301347, "grad_norm": 0.07809218764305115, "learning_rate": 1.6260487927171276e-05, "loss": 0.7730811834335327, "step": 4972 }, { "epoch": 0.9188945733390306, "grad_norm": 0.07673768699169159, "learning_rate": 1.625893219978307e-05, "loss": 0.5420278906822205, "step": 4973 }, { "epoch": 0.9190793500479265, "grad_norm": 0.08296892046928406, "learning_rate": 1.625737622330894e-05, "loss": 0.7337756752967834, "step": 4974 }, { "epoch": 0.9192641267568223, "grad_norm": 0.07037664204835892, "learning_rate": 1.6255819997810815e-05, "loss": 0.545322835445404, "step": 4975 }, { "epoch": 0.9194489034657182, "grad_norm": 0.07493076473474503, "learning_rate": 1.625426352335063e-05, "loss": 0.70954829454422, "step": 4976 }, { "epoch": 0.919633680174614, "grad_norm": 0.07487804442644119, "learning_rate": 1.625270679999032e-05, "loss": 0.574744462966919, "step": 4977 }, { "epoch": 0.9198184568835098, "grad_norm": 0.0659298375248909, "learning_rate": 1.6251149827791843e-05, "loss": 0.3916098475456238, "step": 4978 }, { "epoch": 0.9200032335924057, "grad_norm": 0.07134214043617249, "learning_rate": 1.624959260681716e-05, "loss": 0.5763881206512451, "step": 4979 }, { "epoch": 0.9201880103013015, "grad_norm": 0.06730277836322784, "learning_rate": 1.6248035137128244e-05, "loss": 0.5897141695022583, "step": 4980 }, { "epoch": 0.9203727870101973, "grad_norm": 0.06790062040090561, "learning_rate": 1.6246477418787077e-05, "loss": 0.566528856754303, "step": 4981 }, { "epoch": 0.9205575637190933, "grad_norm": 0.07552764564752579, "learning_rate": 1.624491945185565e-05, "loss": 0.5551050305366516, "step": 4982 }, { "epoch": 0.9207423404279891, "grad_norm": 0.050118498504161835, "learning_rate": 1.624336123639597e-05, "loss": 0.3442586064338684, "step": 4983 }, { "epoch": 0.9209271171368849, "grad_norm": 0.09067343175411224, "learning_rate": 1.6241802772470043e-05, "loss": 0.6247709393501282, "step": 4984 }, { "epoch": 0.9211118938457807, "grad_norm": 0.08580661565065384, "learning_rate": 1.6240244060139896e-05, "loss": 0.7336589694023132, "step": 4985 }, { "epoch": 0.9212966705546766, "grad_norm": 0.06558381766080856, "learning_rate": 1.6238685099467557e-05, "loss": 0.5565704107284546, "step": 4986 }, { "epoch": 0.9214814472635724, "grad_norm": 0.07769566029310226, "learning_rate": 1.6237125890515068e-05, "loss": 0.5454164743423462, "step": 4987 }, { "epoch": 0.9216662239724682, "grad_norm": 0.06431884318590164, "learning_rate": 1.6235566433344483e-05, "loss": 0.47945302724838257, "step": 4988 }, { "epoch": 0.9218510006813642, "grad_norm": 0.0770520567893982, "learning_rate": 1.6234006728017863e-05, "loss": 0.6441652178764343, "step": 4989 }, { "epoch": 0.92203577739026, "grad_norm": 0.07232240587472916, "learning_rate": 1.6232446774597278e-05, "loss": 0.6215723156929016, "step": 4990 }, { "epoch": 0.9222205540991558, "grad_norm": 0.0755547434091568, "learning_rate": 1.6230886573144812e-05, "loss": 0.6493498086929321, "step": 4991 }, { "epoch": 0.9224053308080516, "grad_norm": 0.06353598833084106, "learning_rate": 1.6229326123722554e-05, "loss": 0.47383472323417664, "step": 4992 }, { "epoch": 0.9225901075169475, "grad_norm": 0.06302771717309952, "learning_rate": 1.6227765426392603e-05, "loss": 0.5499935746192932, "step": 4993 }, { "epoch": 0.9227748842258433, "grad_norm": 0.0657678171992302, "learning_rate": 1.6226204481217074e-05, "loss": 0.4634450078010559, "step": 4994 }, { "epoch": 0.9229596609347391, "grad_norm": 0.07355940341949463, "learning_rate": 1.622464328825809e-05, "loss": 0.4975418746471405, "step": 4995 }, { "epoch": 0.9231444376436351, "grad_norm": 0.05299646034836769, "learning_rate": 1.622308184757777e-05, "loss": 0.4634086787700653, "step": 4996 }, { "epoch": 0.9233292143525309, "grad_norm": 0.05826892331242561, "learning_rate": 1.6221520159238266e-05, "loss": 0.5520720481872559, "step": 4997 }, { "epoch": 0.9235139910614267, "grad_norm": 0.08895806223154068, "learning_rate": 1.6219958223301723e-05, "loss": 0.710663378238678, "step": 4998 }, { "epoch": 0.9236987677703226, "grad_norm": 0.05497613176703453, "learning_rate": 1.6218396039830304e-05, "loss": 0.3812207877635956, "step": 4999 }, { "epoch": 0.9238835444792184, "grad_norm": 0.06913777440786362, "learning_rate": 1.6216833608886175e-05, "loss": 0.5585352778434753, "step": 5000 }, { "epoch": 0.9238835444792184, "eval_loss": 0.6308066844940186, "eval_runtime": 157.2644, "eval_samples_per_second": 115.913, "eval_steps_per_second": 14.492, "step": 5000 }, { "epoch": 0.9240683211881142, "grad_norm": 0.06755195558071136, "learning_rate": 1.621527093053152e-05, "loss": 0.5701950192451477, "step": 5001 }, { "epoch": 0.92425309789701, "grad_norm": 0.08724575489759445, "learning_rate": 1.6213708004828527e-05, "loss": 0.7150865793228149, "step": 5002 }, { "epoch": 0.9244378746059059, "grad_norm": 0.08624764531850815, "learning_rate": 1.6212144831839396e-05, "loss": 0.6906049847602844, "step": 5003 }, { "epoch": 0.9246226513148018, "grad_norm": 0.06757311522960663, "learning_rate": 1.6210581411626335e-05, "loss": 0.5546698570251465, "step": 5004 }, { "epoch": 0.9248074280236976, "grad_norm": 0.0782892256975174, "learning_rate": 1.6209017744251564e-05, "loss": 0.8180869221687317, "step": 5005 }, { "epoch": 0.9249922047325935, "grad_norm": 0.06884290277957916, "learning_rate": 1.6207453829777312e-05, "loss": 0.6495415568351746, "step": 5006 }, { "epoch": 0.9251769814414893, "grad_norm": 0.06144087016582489, "learning_rate": 1.620588966826582e-05, "loss": 0.5278459787368774, "step": 5007 }, { "epoch": 0.9253617581503851, "grad_norm": 0.07224945724010468, "learning_rate": 1.6204325259779335e-05, "loss": 0.6224374771118164, "step": 5008 }, { "epoch": 0.925546534859281, "grad_norm": 0.08506694436073303, "learning_rate": 1.6202760604380116e-05, "loss": 0.8050345182418823, "step": 5009 }, { "epoch": 0.9257313115681768, "grad_norm": 0.08770319819450378, "learning_rate": 1.620119570213043e-05, "loss": 0.7414484620094299, "step": 5010 }, { "epoch": 0.9259160882770727, "grad_norm": 0.09567960351705551, "learning_rate": 1.6199630553092557e-05, "loss": 0.6663733720779419, "step": 5011 }, { "epoch": 0.9261008649859686, "grad_norm": 0.06947190314531326, "learning_rate": 1.6198065157328785e-05, "loss": 0.5598324537277222, "step": 5012 }, { "epoch": 0.9262856416948644, "grad_norm": 0.04812987893819809, "learning_rate": 1.6196499514901405e-05, "loss": 0.37595170736312866, "step": 5013 }, { "epoch": 0.9264704184037602, "grad_norm": 0.07149752974510193, "learning_rate": 1.6194933625872736e-05, "loss": 0.6893596649169922, "step": 5014 }, { "epoch": 0.926655195112656, "grad_norm": 0.06001589819788933, "learning_rate": 1.619336749030509e-05, "loss": 0.3789342939853668, "step": 5015 }, { "epoch": 0.9268399718215519, "grad_norm": 0.07331600040197372, "learning_rate": 1.619180110826079e-05, "loss": 0.7051414251327515, "step": 5016 }, { "epoch": 0.9270247485304477, "grad_norm": 0.06992633640766144, "learning_rate": 1.619023447980218e-05, "loss": 0.5805511474609375, "step": 5017 }, { "epoch": 0.9272095252393436, "grad_norm": 0.08719083666801453, "learning_rate": 1.6188667604991608e-05, "loss": 0.7659269571304321, "step": 5018 }, { "epoch": 0.9273943019482395, "grad_norm": 0.04258688911795616, "learning_rate": 1.6187100483891423e-05, "loss": 0.2700992822647095, "step": 5019 }, { "epoch": 0.9275790786571353, "grad_norm": 0.0832752212882042, "learning_rate": 1.6185533116563998e-05, "loss": 0.7233130931854248, "step": 5020 }, { "epoch": 0.9277638553660311, "grad_norm": 0.06890590488910675, "learning_rate": 1.6183965503071706e-05, "loss": 0.5845312476158142, "step": 5021 }, { "epoch": 0.927948632074927, "grad_norm": 0.062875896692276, "learning_rate": 1.6182397643476935e-05, "loss": 0.529586672782898, "step": 5022 }, { "epoch": 0.9281334087838228, "grad_norm": 0.05886458605527878, "learning_rate": 1.6180829537842078e-05, "loss": 0.4220605492591858, "step": 5023 }, { "epoch": 0.9283181854927186, "grad_norm": 0.0718119889497757, "learning_rate": 1.6179261186229544e-05, "loss": 0.6670949459075928, "step": 5024 }, { "epoch": 0.9285029622016144, "grad_norm": 0.07199763506650925, "learning_rate": 1.6177692588701746e-05, "loss": 0.5530394911766052, "step": 5025 }, { "epoch": 0.9286877389105104, "grad_norm": 0.0965399518609047, "learning_rate": 1.6176123745321114e-05, "loss": 0.8364000916481018, "step": 5026 }, { "epoch": 0.9288725156194062, "grad_norm": 0.05655921995639801, "learning_rate": 1.6174554656150078e-05, "loss": 0.4526393711566925, "step": 5027 }, { "epoch": 0.929057292328302, "grad_norm": 0.07491301745176315, "learning_rate": 1.6172985321251084e-05, "loss": 0.5258397459983826, "step": 5028 }, { "epoch": 0.9292420690371979, "grad_norm": 0.08294566720724106, "learning_rate": 1.6171415740686585e-05, "loss": 0.5293903350830078, "step": 5029 }, { "epoch": 0.9294268457460937, "grad_norm": 0.08152367919683456, "learning_rate": 1.616984591451905e-05, "loss": 0.7470456957817078, "step": 5030 }, { "epoch": 0.9296116224549895, "grad_norm": 0.06945068389177322, "learning_rate": 1.6168275842810946e-05, "loss": 0.48202794790267944, "step": 5031 }, { "epoch": 0.9297963991638853, "grad_norm": 0.06779973953962326, "learning_rate": 1.616670552562477e-05, "loss": 0.552852988243103, "step": 5032 }, { "epoch": 0.9299811758727813, "grad_norm": 0.06413020193576813, "learning_rate": 1.6165134963023e-05, "loss": 0.6048864722251892, "step": 5033 }, { "epoch": 0.9301659525816771, "grad_norm": 0.09240268170833588, "learning_rate": 1.6163564155068148e-05, "loss": 0.9151281714439392, "step": 5034 }, { "epoch": 0.9303507292905729, "grad_norm": 0.08496936410665512, "learning_rate": 1.6161993101822728e-05, "loss": 0.7461206316947937, "step": 5035 }, { "epoch": 0.9305355059994688, "grad_norm": 0.06811504065990448, "learning_rate": 1.616042180334926e-05, "loss": 0.6777445077896118, "step": 5036 }, { "epoch": 0.9307202827083646, "grad_norm": 0.07164547592401505, "learning_rate": 1.6158850259710278e-05, "loss": 0.4562653601169586, "step": 5037 }, { "epoch": 0.9309050594172604, "grad_norm": 0.07087064534425735, "learning_rate": 1.615727847096832e-05, "loss": 0.5231823921203613, "step": 5038 }, { "epoch": 0.9310898361261563, "grad_norm": 0.07362006604671478, "learning_rate": 1.615570643718595e-05, "loss": 0.5995796918869019, "step": 5039 }, { "epoch": 0.9312746128350522, "grad_norm": 0.08397943526506424, "learning_rate": 1.6154134158425717e-05, "loss": 0.735202431678772, "step": 5040 }, { "epoch": 0.931459389543948, "grad_norm": 0.09518817067146301, "learning_rate": 1.6152561634750202e-05, "loss": 0.7966129183769226, "step": 5041 }, { "epoch": 0.9316441662528439, "grad_norm": 0.07340359687805176, "learning_rate": 1.6150988866221983e-05, "loss": 0.5222837924957275, "step": 5042 }, { "epoch": 0.9318289429617397, "grad_norm": 0.07132686674594879, "learning_rate": 1.6149415852903647e-05, "loss": 0.589817225933075, "step": 5043 }, { "epoch": 0.9320137196706355, "grad_norm": 0.07675815373659134, "learning_rate": 1.61478425948578e-05, "loss": 0.6767191290855408, "step": 5044 }, { "epoch": 0.9321984963795313, "grad_norm": 0.05632895603775978, "learning_rate": 1.6146269092147054e-05, "loss": 0.4868313670158386, "step": 5045 }, { "epoch": 0.9323832730884272, "grad_norm": 0.08111368119716644, "learning_rate": 1.6144695344834026e-05, "loss": 0.6972765922546387, "step": 5046 }, { "epoch": 0.932568049797323, "grad_norm": 0.09418383985757828, "learning_rate": 1.614312135298135e-05, "loss": 0.786720871925354, "step": 5047 }, { "epoch": 0.9327528265062189, "grad_norm": 0.07354355603456497, "learning_rate": 1.6141547116651663e-05, "loss": 0.9190953969955444, "step": 5048 }, { "epoch": 0.9329376032151148, "grad_norm": 0.06468519568443298, "learning_rate": 1.613997263590761e-05, "loss": 0.6413533091545105, "step": 5049 }, { "epoch": 0.9331223799240106, "grad_norm": 0.06637117266654968, "learning_rate": 1.613839791081186e-05, "loss": 0.5141046047210693, "step": 5050 }, { "epoch": 0.9333071566329064, "grad_norm": 0.08465000241994858, "learning_rate": 1.6136822941427076e-05, "loss": 0.7368367910385132, "step": 5051 }, { "epoch": 0.9334919333418022, "grad_norm": 0.05120861530303955, "learning_rate": 1.6135247727815943e-05, "loss": 0.5021374225616455, "step": 5052 }, { "epoch": 0.9336767100506981, "grad_norm": 0.06843441724777222, "learning_rate": 1.6133672270041142e-05, "loss": 0.47936877608299255, "step": 5053 }, { "epoch": 0.9338614867595939, "grad_norm": 0.07375901937484741, "learning_rate": 1.613209656816537e-05, "loss": 0.6750283241271973, "step": 5054 }, { "epoch": 0.9340462634684898, "grad_norm": 0.057786088436841965, "learning_rate": 1.6130520622251347e-05, "loss": 0.5178053379058838, "step": 5055 }, { "epoch": 0.9342310401773857, "grad_norm": 0.04772485792636871, "learning_rate": 1.612894443236178e-05, "loss": 0.38083186745643616, "step": 5056 }, { "epoch": 0.9344158168862815, "grad_norm": 0.08076415956020355, "learning_rate": 1.6127367998559397e-05, "loss": 0.6789405345916748, "step": 5057 }, { "epoch": 0.9346005935951773, "grad_norm": 0.06267094612121582, "learning_rate": 1.612579132090694e-05, "loss": 0.6396245360374451, "step": 5058 }, { "epoch": 0.9347853703040732, "grad_norm": 0.06518642604351044, "learning_rate": 1.6124214399467154e-05, "loss": 0.5198378562927246, "step": 5059 }, { "epoch": 0.934970147012969, "grad_norm": 0.07412681728601456, "learning_rate": 1.61226372343028e-05, "loss": 0.6395267248153687, "step": 5060 }, { "epoch": 0.9351549237218648, "grad_norm": 0.07623349875211716, "learning_rate": 1.612105982547663e-05, "loss": 0.6735665202140808, "step": 5061 }, { "epoch": 0.9353397004307608, "grad_norm": 0.08154106885194778, "learning_rate": 1.6119482173051434e-05, "loss": 0.6256721019744873, "step": 5062 }, { "epoch": 0.9355244771396566, "grad_norm": 0.06194007769227028, "learning_rate": 1.6117904277089994e-05, "loss": 0.6189951300621033, "step": 5063 }, { "epoch": 0.9357092538485524, "grad_norm": 0.05380154773592949, "learning_rate": 1.61163261376551e-05, "loss": 0.3931328058242798, "step": 5064 }, { "epoch": 0.9358940305574482, "grad_norm": 0.11030847579240799, "learning_rate": 1.6114747754809564e-05, "loss": 0.8911649584770203, "step": 5065 }, { "epoch": 0.9360788072663441, "grad_norm": 0.06107322871685028, "learning_rate": 1.61131691286162e-05, "loss": 0.5231435894966125, "step": 5066 }, { "epoch": 0.9362635839752399, "grad_norm": 0.05898779630661011, "learning_rate": 1.6111590259137827e-05, "loss": 0.478934109210968, "step": 5067 }, { "epoch": 0.9364483606841357, "grad_norm": 0.06787285208702087, "learning_rate": 1.6110011146437282e-05, "loss": 0.5066587924957275, "step": 5068 }, { "epoch": 0.9366331373930316, "grad_norm": 0.08234155178070068, "learning_rate": 1.6108431790577413e-05, "loss": 0.6809791922569275, "step": 5069 }, { "epoch": 0.9368179141019275, "grad_norm": 0.07514581084251404, "learning_rate": 1.6106852191621067e-05, "loss": 0.7342582941055298, "step": 5070 }, { "epoch": 0.9370026908108233, "grad_norm": 0.06335484981536865, "learning_rate": 1.6105272349631107e-05, "loss": 0.5712248682975769, "step": 5071 }, { "epoch": 0.9371874675197192, "grad_norm": 0.0602981261909008, "learning_rate": 1.6103692264670414e-05, "loss": 0.5489089488983154, "step": 5072 }, { "epoch": 0.937372244228615, "grad_norm": 0.08747103810310364, "learning_rate": 1.6102111936801865e-05, "loss": 0.6118226647377014, "step": 5073 }, { "epoch": 0.9375570209375108, "grad_norm": 0.08617452532052994, "learning_rate": 1.610053136608835e-05, "loss": 0.6262293457984924, "step": 5074 }, { "epoch": 0.9377417976464066, "grad_norm": 0.07134665548801422, "learning_rate": 1.6098950552592768e-05, "loss": 0.4300174117088318, "step": 5075 }, { "epoch": 0.9379265743553025, "grad_norm": 0.0613059476017952, "learning_rate": 1.609736949637804e-05, "loss": 0.5369535088539124, "step": 5076 }, { "epoch": 0.9381113510641984, "grad_norm": 0.06910678744316101, "learning_rate": 1.609578819750708e-05, "loss": 0.5396366119384766, "step": 5077 }, { "epoch": 0.9382961277730942, "grad_norm": 0.07462180405855179, "learning_rate": 1.6094206656042822e-05, "loss": 0.6210793852806091, "step": 5078 }, { "epoch": 0.9384809044819901, "grad_norm": 0.07486458867788315, "learning_rate": 1.6092624872048207e-05, "loss": 0.5217027068138123, "step": 5079 }, { "epoch": 0.9386656811908859, "grad_norm": 0.08017706125974655, "learning_rate": 1.609104284558618e-05, "loss": 0.7342339754104614, "step": 5080 }, { "epoch": 0.9388504578997817, "grad_norm": 0.08369428664445877, "learning_rate": 1.608946057671971e-05, "loss": 0.7358474731445312, "step": 5081 }, { "epoch": 0.9390352346086775, "grad_norm": 0.11979498714208603, "learning_rate": 1.6087878065511756e-05, "loss": 0.5417165160179138, "step": 5082 }, { "epoch": 0.9392200113175734, "grad_norm": 0.06309329718351364, "learning_rate": 1.6086295312025303e-05, "loss": 0.546745777130127, "step": 5083 }, { "epoch": 0.9394047880264693, "grad_norm": 0.0681857243180275, "learning_rate": 1.6084712316323338e-05, "loss": 0.5794031023979187, "step": 5084 }, { "epoch": 0.9395895647353651, "grad_norm": 0.07119515538215637, "learning_rate": 1.608312907846886e-05, "loss": 0.5442270636558533, "step": 5085 }, { "epoch": 0.939774341444261, "grad_norm": 0.05828424543142319, "learning_rate": 1.6081545598524873e-05, "loss": 0.3991624414920807, "step": 5086 }, { "epoch": 0.9399591181531568, "grad_norm": 0.0791083499789238, "learning_rate": 1.6079961876554402e-05, "loss": 0.7310355305671692, "step": 5087 }, { "epoch": 0.9401438948620526, "grad_norm": 0.07687226682901382, "learning_rate": 1.6078377912620466e-05, "loss": 0.6118609309196472, "step": 5088 }, { "epoch": 0.9403286715709485, "grad_norm": 0.06193707883358002, "learning_rate": 1.607679370678611e-05, "loss": 0.4281879663467407, "step": 5089 }, { "epoch": 0.9405134482798443, "grad_norm": 0.08414844423532486, "learning_rate": 1.6075209259114375e-05, "loss": 0.693047285079956, "step": 5090 }, { "epoch": 0.9406982249887401, "grad_norm": 0.06623263657093048, "learning_rate": 1.607362456966832e-05, "loss": 0.5509737133979797, "step": 5091 }, { "epoch": 0.9408830016976361, "grad_norm": 0.07156947255134583, "learning_rate": 1.6072039638511004e-05, "loss": 0.6185341477394104, "step": 5092 }, { "epoch": 0.9410677784065319, "grad_norm": 0.07968199253082275, "learning_rate": 1.6070454465705513e-05, "loss": 0.5912548899650574, "step": 5093 }, { "epoch": 0.9412525551154277, "grad_norm": 0.06254375725984573, "learning_rate": 1.6068869051314923e-05, "loss": 0.5338126420974731, "step": 5094 }, { "epoch": 0.9414373318243235, "grad_norm": 0.07212621718645096, "learning_rate": 1.606728339540233e-05, "loss": 0.6360775232315063, "step": 5095 }, { "epoch": 0.9416221085332194, "grad_norm": 0.05787743255496025, "learning_rate": 1.6065697498030842e-05, "loss": 0.4220399558544159, "step": 5096 }, { "epoch": 0.9418068852421152, "grad_norm": 0.06521794199943542, "learning_rate": 1.606411135926357e-05, "loss": 0.5160315036773682, "step": 5097 }, { "epoch": 0.941991661951011, "grad_norm": 0.08517172932624817, "learning_rate": 1.6062524979163643e-05, "loss": 0.705226719379425, "step": 5098 }, { "epoch": 0.942176438659907, "grad_norm": 0.09427408128976822, "learning_rate": 1.6060938357794182e-05, "loss": 0.6183182001113892, "step": 5099 }, { "epoch": 0.9423612153688028, "grad_norm": 0.07305286824703217, "learning_rate": 1.605935149521834e-05, "loss": 0.5855145454406738, "step": 5100 }, { "epoch": 0.9425459920776986, "grad_norm": 0.07183791697025299, "learning_rate": 1.6057764391499267e-05, "loss": 0.6883285045623779, "step": 5101 }, { "epoch": 0.9427307687865945, "grad_norm": 0.08010408282279968, "learning_rate": 1.6056177046700122e-05, "loss": 0.6888131499290466, "step": 5102 }, { "epoch": 0.9429155454954903, "grad_norm": 0.06611838191747665, "learning_rate": 1.6054589460884078e-05, "loss": 0.5510590672492981, "step": 5103 }, { "epoch": 0.9431003222043861, "grad_norm": 0.08526670187711716, "learning_rate": 1.6053001634114316e-05, "loss": 0.6659255623817444, "step": 5104 }, { "epoch": 0.9432850989132819, "grad_norm": 0.08515694737434387, "learning_rate": 1.6051413566454025e-05, "loss": 0.8655163645744324, "step": 5105 }, { "epoch": 0.9434698756221779, "grad_norm": 0.0811673104763031, "learning_rate": 1.6049825257966407e-05, "loss": 0.7180903553962708, "step": 5106 }, { "epoch": 0.9436546523310737, "grad_norm": 0.07173803448677063, "learning_rate": 1.6048236708714674e-05, "loss": 0.7913423776626587, "step": 5107 }, { "epoch": 0.9438394290399695, "grad_norm": 0.05145305395126343, "learning_rate": 1.604664791876204e-05, "loss": 0.42830294370651245, "step": 5108 }, { "epoch": 0.9440242057488654, "grad_norm": 0.07102995365858078, "learning_rate": 1.6045058888171737e-05, "loss": 0.5304608941078186, "step": 5109 }, { "epoch": 0.9442089824577612, "grad_norm": 0.07459255307912827, "learning_rate": 1.6043469617007e-05, "loss": 0.6177938580513, "step": 5110 }, { "epoch": 0.944393759166657, "grad_norm": 0.06430578976869583, "learning_rate": 1.6041880105331083e-05, "loss": 0.40000367164611816, "step": 5111 }, { "epoch": 0.9445785358755528, "grad_norm": 0.054752789437770844, "learning_rate": 1.604029035320724e-05, "loss": 0.4391731917858124, "step": 5112 }, { "epoch": 0.9447633125844487, "grad_norm": 0.06837918609380722, "learning_rate": 1.603870036069874e-05, "loss": 0.5762681365013123, "step": 5113 }, { "epoch": 0.9449480892933446, "grad_norm": 0.07113198935985565, "learning_rate": 1.603711012786886e-05, "loss": 0.5563413500785828, "step": 5114 }, { "epoch": 0.9451328660022404, "grad_norm": 0.07704070955514908, "learning_rate": 1.6035519654780878e-05, "loss": 0.7374782562255859, "step": 5115 }, { "epoch": 0.9453176427111363, "grad_norm": 0.08055262267589569, "learning_rate": 1.60339289414981e-05, "loss": 0.6849924325942993, "step": 5116 }, { "epoch": 0.9455024194200321, "grad_norm": 0.06334561854600906, "learning_rate": 1.6032337988083828e-05, "loss": 0.3979511857032776, "step": 5117 }, { "epoch": 0.9456871961289279, "grad_norm": 0.057456646114587784, "learning_rate": 1.603074679460138e-05, "loss": 0.45759090781211853, "step": 5118 }, { "epoch": 0.9458719728378238, "grad_norm": 0.057219650596380234, "learning_rate": 1.6029155361114068e-05, "loss": 0.4451177418231964, "step": 5119 }, { "epoch": 0.9460567495467196, "grad_norm": 0.08612550050020218, "learning_rate": 1.6027563687685244e-05, "loss": 0.6650874614715576, "step": 5120 }, { "epoch": 0.9462415262556155, "grad_norm": 0.0630384311079979, "learning_rate": 1.6025971774378238e-05, "loss": 0.46417829394340515, "step": 5121 }, { "epoch": 0.9464263029645114, "grad_norm": 0.09216060489416122, "learning_rate": 1.6024379621256415e-05, "loss": 0.7036070227622986, "step": 5122 }, { "epoch": 0.9466110796734072, "grad_norm": 0.08361869305372238, "learning_rate": 1.6022787228383125e-05, "loss": 0.7870634198188782, "step": 5123 }, { "epoch": 0.946795856382303, "grad_norm": 0.06421814113855362, "learning_rate": 1.6021194595821747e-05, "loss": 0.4977126717567444, "step": 5124 }, { "epoch": 0.9469806330911988, "grad_norm": 0.07801469415426254, "learning_rate": 1.6019601723635664e-05, "loss": 0.6157658100128174, "step": 5125 }, { "epoch": 0.9471654098000947, "grad_norm": 0.06259127706289291, "learning_rate": 1.6018008611888263e-05, "loss": 0.5550975799560547, "step": 5126 }, { "epoch": 0.9473501865089905, "grad_norm": 0.06924456357955933, "learning_rate": 1.6016415260642947e-05, "loss": 0.5265330672264099, "step": 5127 }, { "epoch": 0.9475349632178864, "grad_norm": 0.0692691057920456, "learning_rate": 1.601482166996313e-05, "loss": 0.5791558027267456, "step": 5128 }, { "epoch": 0.9477197399267823, "grad_norm": 0.08452518284320831, "learning_rate": 1.6013227839912224e-05, "loss": 0.7562264800071716, "step": 5129 }, { "epoch": 0.9479045166356781, "grad_norm": 0.07634246349334717, "learning_rate": 1.6011633770553666e-05, "loss": 0.7103491425514221, "step": 5130 }, { "epoch": 0.9480892933445739, "grad_norm": 0.08022932708263397, "learning_rate": 1.6010039461950893e-05, "loss": 0.5910216569900513, "step": 5131 }, { "epoch": 0.9482740700534698, "grad_norm": 0.0745372623205185, "learning_rate": 1.600844491416735e-05, "loss": 0.5447332859039307, "step": 5132 }, { "epoch": 0.9484588467623656, "grad_norm": 0.07081407308578491, "learning_rate": 1.6006850127266498e-05, "loss": 0.4733290374279022, "step": 5133 }, { "epoch": 0.9486436234712614, "grad_norm": 0.06008889898657799, "learning_rate": 1.6005255101311803e-05, "loss": 0.4657738208770752, "step": 5134 }, { "epoch": 0.9488284001801572, "grad_norm": 0.06958460807800293, "learning_rate": 1.6003659836366744e-05, "loss": 0.5668250322341919, "step": 5135 }, { "epoch": 0.9490131768890532, "grad_norm": 0.05939478427171707, "learning_rate": 1.6002064332494806e-05, "loss": 0.5383512377738953, "step": 5136 }, { "epoch": 0.949197953597949, "grad_norm": 0.0792398750782013, "learning_rate": 1.6000468589759486e-05, "loss": 0.5559828877449036, "step": 5137 }, { "epoch": 0.9493827303068448, "grad_norm": 0.07488033920526505, "learning_rate": 1.599887260822429e-05, "loss": 0.5714380145072937, "step": 5138 }, { "epoch": 0.9495675070157407, "grad_norm": 0.08198749274015427, "learning_rate": 1.5997276387952733e-05, "loss": 0.6661936640739441, "step": 5139 }, { "epoch": 0.9497522837246365, "grad_norm": 0.06090731918811798, "learning_rate": 1.5995679929008338e-05, "loss": 0.5148775577545166, "step": 5140 }, { "epoch": 0.9499370604335323, "grad_norm": 0.0652225986123085, "learning_rate": 1.599408323145464e-05, "loss": 0.4911291301250458, "step": 5141 }, { "epoch": 0.9501218371424281, "grad_norm": 0.07245299220085144, "learning_rate": 1.599248629535518e-05, "loss": 0.5561530590057373, "step": 5142 }, { "epoch": 0.9503066138513241, "grad_norm": 0.06982336938381195, "learning_rate": 1.5990889120773515e-05, "loss": 0.5131959915161133, "step": 5143 }, { "epoch": 0.9504913905602199, "grad_norm": 0.059342216700315475, "learning_rate": 1.5989291707773204e-05, "loss": 0.5701693296432495, "step": 5144 }, { "epoch": 0.9506761672691157, "grad_norm": 0.0620679147541523, "learning_rate": 1.5987694056417825e-05, "loss": 0.46893739700317383, "step": 5145 }, { "epoch": 0.9508609439780116, "grad_norm": 0.08065487444400787, "learning_rate": 1.5986096166770953e-05, "loss": 0.610145628452301, "step": 5146 }, { "epoch": 0.9510457206869074, "grad_norm": 0.0846034586429596, "learning_rate": 1.5984498038896184e-05, "loss": 0.7594650983810425, "step": 5147 }, { "epoch": 0.9512304973958032, "grad_norm": 0.05852271243929863, "learning_rate": 1.5982899672857115e-05, "loss": 0.4893558621406555, "step": 5148 }, { "epoch": 0.9514152741046991, "grad_norm": 0.06891465187072754, "learning_rate": 1.598130106871736e-05, "loss": 0.6074330806732178, "step": 5149 }, { "epoch": 0.951600050813595, "grad_norm": 0.05314657464623451, "learning_rate": 1.5979702226540528e-05, "loss": 0.4676016569137573, "step": 5150 }, { "epoch": 0.9517848275224908, "grad_norm": 0.0809001624584198, "learning_rate": 1.597810314639026e-05, "loss": 0.5725110173225403, "step": 5151 }, { "epoch": 0.9519696042313867, "grad_norm": 0.0798337310552597, "learning_rate": 1.5976503828330192e-05, "loss": 0.5859540700912476, "step": 5152 }, { "epoch": 0.9521543809402825, "grad_norm": 0.07935647666454315, "learning_rate": 1.597490427242397e-05, "loss": 0.6600157022476196, "step": 5153 }, { "epoch": 0.9523391576491783, "grad_norm": 0.06016004458069801, "learning_rate": 1.5973304478735245e-05, "loss": 0.42700162529945374, "step": 5154 }, { "epoch": 0.9525239343580741, "grad_norm": 0.08889743685722351, "learning_rate": 1.5971704447327697e-05, "loss": 0.7476317882537842, "step": 5155 }, { "epoch": 0.95270871106697, "grad_norm": 0.06792720407247543, "learning_rate": 1.5970104178264988e-05, "loss": 0.5295884013175964, "step": 5156 }, { "epoch": 0.9528934877758658, "grad_norm": 0.05554249882698059, "learning_rate": 1.5968503671610814e-05, "loss": 0.3461635708808899, "step": 5157 }, { "epoch": 0.9530782644847617, "grad_norm": 0.05836229398846626, "learning_rate": 1.596690292742887e-05, "loss": 0.49273398518562317, "step": 5158 }, { "epoch": 0.9532630411936576, "grad_norm": 0.06659424304962158, "learning_rate": 1.5965301945782854e-05, "loss": 0.5845066905021667, "step": 5159 }, { "epoch": 0.9534478179025534, "grad_norm": 0.07461929321289062, "learning_rate": 1.5963700726736485e-05, "loss": 0.7000290155410767, "step": 5160 }, { "epoch": 0.9536325946114492, "grad_norm": 0.08027936518192291, "learning_rate": 1.5962099270353484e-05, "loss": 0.734795331954956, "step": 5161 }, { "epoch": 0.953817371320345, "grad_norm": 0.05215161293745041, "learning_rate": 1.5960497576697584e-05, "loss": 0.36611029505729675, "step": 5162 }, { "epoch": 0.9540021480292409, "grad_norm": 0.06992398202419281, "learning_rate": 1.5958895645832533e-05, "loss": 0.5141623020172119, "step": 5163 }, { "epoch": 0.9541869247381367, "grad_norm": 0.06411082297563553, "learning_rate": 1.5957293477822075e-05, "loss": 0.5430804491043091, "step": 5164 }, { "epoch": 0.9543717014470326, "grad_norm": 0.0489242747426033, "learning_rate": 1.595569107272997e-05, "loss": 0.3583165407180786, "step": 5165 }, { "epoch": 0.9545564781559285, "grad_norm": 0.08241157233715057, "learning_rate": 1.5954088430620004e-05, "loss": 0.6570907831192017, "step": 5166 }, { "epoch": 0.9547412548648243, "grad_norm": 0.07930099219083786, "learning_rate": 1.5952485551555938e-05, "loss": 0.6064406037330627, "step": 5167 }, { "epoch": 0.9549260315737201, "grad_norm": 0.07888708263635635, "learning_rate": 1.595088243560157e-05, "loss": 0.6349717378616333, "step": 5168 }, { "epoch": 0.955110808282616, "grad_norm": 0.0772731751203537, "learning_rate": 1.5949279082820702e-05, "loss": 0.7229316234588623, "step": 5169 }, { "epoch": 0.9552955849915118, "grad_norm": 0.08178115636110306, "learning_rate": 1.594767549327714e-05, "loss": 0.6186074614524841, "step": 5170 }, { "epoch": 0.9554803617004076, "grad_norm": 0.07245965301990509, "learning_rate": 1.5946071667034702e-05, "loss": 0.5710280537605286, "step": 5171 }, { "epoch": 0.9556651384093036, "grad_norm": 0.06430346518754959, "learning_rate": 1.5944467604157213e-05, "loss": 0.6199550032615662, "step": 5172 }, { "epoch": 0.9558499151181994, "grad_norm": 0.07227133214473724, "learning_rate": 1.594286330470851e-05, "loss": 0.5163151025772095, "step": 5173 }, { "epoch": 0.9560346918270952, "grad_norm": 0.05971883237361908, "learning_rate": 1.594125876875244e-05, "loss": 0.5595969557762146, "step": 5174 }, { "epoch": 0.956219468535991, "grad_norm": 0.07065737247467041, "learning_rate": 1.593965399635286e-05, "loss": 0.48677173256874084, "step": 5175 }, { "epoch": 0.9564042452448869, "grad_norm": 0.0731082633137703, "learning_rate": 1.5938048987573633e-05, "loss": 0.7291607856750488, "step": 5176 }, { "epoch": 0.9565890219537827, "grad_norm": 0.08345812559127808, "learning_rate": 1.5936443742478632e-05, "loss": 0.5648295283317566, "step": 5177 }, { "epoch": 0.9567737986626785, "grad_norm": 0.07417561113834381, "learning_rate": 1.593483826113175e-05, "loss": 0.7409591674804688, "step": 5178 }, { "epoch": 0.9569585753715744, "grad_norm": 0.06974013149738312, "learning_rate": 1.5933232543596868e-05, "loss": 0.6175902485847473, "step": 5179 }, { "epoch": 0.9571433520804703, "grad_norm": 0.07038947194814682, "learning_rate": 1.5931626589937895e-05, "loss": 0.5422641634941101, "step": 5180 }, { "epoch": 0.9573281287893661, "grad_norm": 0.06995180249214172, "learning_rate": 1.5930020400218737e-05, "loss": 0.5708144903182983, "step": 5181 }, { "epoch": 0.957512905498262, "grad_norm": 0.05506231263279915, "learning_rate": 1.5928413974503326e-05, "loss": 0.4746338725090027, "step": 5182 }, { "epoch": 0.9576976822071578, "grad_norm": 0.05807039141654968, "learning_rate": 1.5926807312855584e-05, "loss": 0.3851551115512848, "step": 5183 }, { "epoch": 0.9578824589160536, "grad_norm": 0.07046632468700409, "learning_rate": 1.5925200415339453e-05, "loss": 0.5573902130126953, "step": 5184 }, { "epoch": 0.9580672356249494, "grad_norm": 0.07509570568799973, "learning_rate": 1.592359328201888e-05, "loss": 0.563309371471405, "step": 5185 }, { "epoch": 0.9582520123338453, "grad_norm": 0.0638134554028511, "learning_rate": 1.592198591295783e-05, "loss": 0.5711766481399536, "step": 5186 }, { "epoch": 0.9584367890427412, "grad_norm": 0.06136501207947731, "learning_rate": 1.592037830822027e-05, "loss": 0.4811912775039673, "step": 5187 }, { "epoch": 0.958621565751637, "grad_norm": 0.0601169727742672, "learning_rate": 1.5918770467870174e-05, "loss": 0.40823274850845337, "step": 5188 }, { "epoch": 0.9588063424605329, "grad_norm": 0.08154735714197159, "learning_rate": 1.5917162391971534e-05, "loss": 0.5557390451431274, "step": 5189 }, { "epoch": 0.9589911191694287, "grad_norm": 0.07775022834539413, "learning_rate": 1.591555408058834e-05, "loss": 0.5641697645187378, "step": 5190 }, { "epoch": 0.9591758958783245, "grad_norm": 0.08029249310493469, "learning_rate": 1.5913945533784598e-05, "loss": 0.6794726252555847, "step": 5191 }, { "epoch": 0.9593606725872204, "grad_norm": 0.07469724863767624, "learning_rate": 1.591233675162433e-05, "loss": 0.5435570478439331, "step": 5192 }, { "epoch": 0.9595454492961162, "grad_norm": 0.07713647931814194, "learning_rate": 1.5910727734171554e-05, "loss": 0.6686422228813171, "step": 5193 }, { "epoch": 0.9597302260050121, "grad_norm": 0.07689940184354782, "learning_rate": 1.5909118481490308e-05, "loss": 0.5657721757888794, "step": 5194 }, { "epoch": 0.959915002713908, "grad_norm": 0.06951393187046051, "learning_rate": 1.5907508993644635e-05, "loss": 0.4880034923553467, "step": 5195 }, { "epoch": 0.9600997794228038, "grad_norm": 0.07189091295003891, "learning_rate": 1.5905899270698588e-05, "loss": 0.5114649534225464, "step": 5196 }, { "epoch": 0.9602845561316996, "grad_norm": 0.05564050376415253, "learning_rate": 1.5904289312716222e-05, "loss": 0.5179625153541565, "step": 5197 }, { "epoch": 0.9604693328405954, "grad_norm": 0.07559002190828323, "learning_rate": 1.5902679119761618e-05, "loss": 0.7051935195922852, "step": 5198 }, { "epoch": 0.9606541095494913, "grad_norm": 0.0721646100282669, "learning_rate": 1.5901068691898848e-05, "loss": 0.5809262990951538, "step": 5199 }, { "epoch": 0.9608388862583871, "grad_norm": 0.07101470977067947, "learning_rate": 1.589945802919201e-05, "loss": 0.49693232774734497, "step": 5200 }, { "epoch": 0.9610236629672829, "grad_norm": 0.09033049643039703, "learning_rate": 1.5897847131705194e-05, "loss": 0.6932334899902344, "step": 5201 }, { "epoch": 0.9612084396761789, "grad_norm": 0.06097559258341789, "learning_rate": 1.589623599950252e-05, "loss": 0.38020753860473633, "step": 5202 }, { "epoch": 0.9613932163850747, "grad_norm": 0.05993859842419624, "learning_rate": 1.58946246326481e-05, "loss": 0.405231773853302, "step": 5203 }, { "epoch": 0.9615779930939705, "grad_norm": 0.07280710339546204, "learning_rate": 1.589301303120606e-05, "loss": 0.6655395030975342, "step": 5204 }, { "epoch": 0.9617627698028663, "grad_norm": 0.0598413310945034, "learning_rate": 1.5891401195240533e-05, "loss": 0.563666045665741, "step": 5205 }, { "epoch": 0.9619475465117622, "grad_norm": 0.08729203790426254, "learning_rate": 1.5889789124815676e-05, "loss": 0.5672181844711304, "step": 5206 }, { "epoch": 0.962132323220658, "grad_norm": 0.05892879143357277, "learning_rate": 1.588817681999564e-05, "loss": 0.5308200716972351, "step": 5207 }, { "epoch": 0.9623170999295538, "grad_norm": 0.07471846789121628, "learning_rate": 1.5886564280844585e-05, "loss": 0.7177116274833679, "step": 5208 }, { "epoch": 0.9625018766384498, "grad_norm": 0.07476504147052765, "learning_rate": 1.5884951507426692e-05, "loss": 0.6094974875450134, "step": 5209 }, { "epoch": 0.9626866533473456, "grad_norm": 0.06729447841644287, "learning_rate": 1.5883338499806137e-05, "loss": 0.5416808724403381, "step": 5210 }, { "epoch": 0.9628714300562414, "grad_norm": 0.07437737286090851, "learning_rate": 1.5881725258047116e-05, "loss": 0.5953792333602905, "step": 5211 }, { "epoch": 0.9630562067651373, "grad_norm": 0.06470108032226562, "learning_rate": 1.5880111782213833e-05, "loss": 0.5616847276687622, "step": 5212 }, { "epoch": 0.9632409834740331, "grad_norm": 0.08143790066242218, "learning_rate": 1.5878498072370497e-05, "loss": 0.5888870358467102, "step": 5213 }, { "epoch": 0.9634257601829289, "grad_norm": 0.07331033051013947, "learning_rate": 1.587688412858133e-05, "loss": 0.7387831211090088, "step": 5214 }, { "epoch": 0.9636105368918247, "grad_norm": 0.06353524327278137, "learning_rate": 1.587526995091056e-05, "loss": 0.675960123538971, "step": 5215 }, { "epoch": 0.9637953136007207, "grad_norm": 0.07013055682182312, "learning_rate": 1.5873655539422426e-05, "loss": 0.5157957673072815, "step": 5216 }, { "epoch": 0.9639800903096165, "grad_norm": 0.0558655709028244, "learning_rate": 1.5872040894181182e-05, "loss": 0.45955491065979004, "step": 5217 }, { "epoch": 0.9641648670185123, "grad_norm": 0.07099532335996628, "learning_rate": 1.5870426015251076e-05, "loss": 0.5657529234886169, "step": 5218 }, { "epoch": 0.9643496437274082, "grad_norm": 0.061591215431690216, "learning_rate": 1.5868810902696385e-05, "loss": 0.4769912362098694, "step": 5219 }, { "epoch": 0.964534420436304, "grad_norm": 0.08955328166484833, "learning_rate": 1.586719555658138e-05, "loss": 0.7824416160583496, "step": 5220 }, { "epoch": 0.9647191971451998, "grad_norm": 0.0744243934750557, "learning_rate": 1.5865579976970348e-05, "loss": 0.49541524052619934, "step": 5221 }, { "epoch": 0.9649039738540957, "grad_norm": 0.06643404066562653, "learning_rate": 1.5863964163927585e-05, "loss": 0.4621345102787018, "step": 5222 }, { "epoch": 0.9650887505629915, "grad_norm": 0.05877881869673729, "learning_rate": 1.5862348117517395e-05, "loss": 0.48775753378868103, "step": 5223 }, { "epoch": 0.9652735272718874, "grad_norm": 0.07754340022802353, "learning_rate": 1.586073183780409e-05, "loss": 0.5634762644767761, "step": 5224 }, { "epoch": 0.9654583039807833, "grad_norm": 0.058851584792137146, "learning_rate": 1.5859115324851992e-05, "loss": 0.5044225454330444, "step": 5225 }, { "epoch": 0.9656430806896791, "grad_norm": 0.06915190070867538, "learning_rate": 1.5857498578725435e-05, "loss": 0.5596942901611328, "step": 5226 }, { "epoch": 0.9658278573985749, "grad_norm": 0.06534717231988907, "learning_rate": 1.5855881599488764e-05, "loss": 0.6266942620277405, "step": 5227 }, { "epoch": 0.9660126341074707, "grad_norm": 0.1051904484629631, "learning_rate": 1.5854264387206322e-05, "loss": 0.6451123356819153, "step": 5228 }, { "epoch": 0.9661974108163666, "grad_norm": 0.07486904412508011, "learning_rate": 1.5852646941942474e-05, "loss": 0.5404165387153625, "step": 5229 }, { "epoch": 0.9663821875252624, "grad_norm": 0.06894651800394058, "learning_rate": 1.5851029263761588e-05, "loss": 0.6602181196212769, "step": 5230 }, { "epoch": 0.9665669642341583, "grad_norm": 0.07317795604467392, "learning_rate": 1.5849411352728044e-05, "loss": 0.5347000360488892, "step": 5231 }, { "epoch": 0.9667517409430542, "grad_norm": 0.07264052331447601, "learning_rate": 1.5847793208906228e-05, "loss": 0.6339982748031616, "step": 5232 }, { "epoch": 0.96693651765195, "grad_norm": 0.06145188957452774, "learning_rate": 1.5846174832360537e-05, "loss": 0.4953806698322296, "step": 5233 }, { "epoch": 0.9671212943608458, "grad_norm": 0.08297760039567947, "learning_rate": 1.5844556223155377e-05, "loss": 0.5764387249946594, "step": 5234 }, { "epoch": 0.9673060710697416, "grad_norm": 0.06188672408461571, "learning_rate": 1.5842937381355166e-05, "loss": 0.5386495590209961, "step": 5235 }, { "epoch": 0.9674908477786375, "grad_norm": 0.06407991051673889, "learning_rate": 1.584131830702433e-05, "loss": 0.6187621355056763, "step": 5236 }, { "epoch": 0.9676756244875333, "grad_norm": 0.07902231812477112, "learning_rate": 1.58396990002273e-05, "loss": 0.5892176032066345, "step": 5237 }, { "epoch": 0.9678604011964292, "grad_norm": 0.07633795589208603, "learning_rate": 1.5838079461028516e-05, "loss": 0.5621336102485657, "step": 5238 }, { "epoch": 0.9680451779053251, "grad_norm": 0.06466647982597351, "learning_rate": 1.5836459689492437e-05, "loss": 0.5642315745353699, "step": 5239 }, { "epoch": 0.9682299546142209, "grad_norm": 0.06404679268598557, "learning_rate": 1.583483968568352e-05, "loss": 0.5862606167793274, "step": 5240 }, { "epoch": 0.9684147313231167, "grad_norm": 0.08490888774394989, "learning_rate": 1.5833219449666242e-05, "loss": 0.6298123002052307, "step": 5241 }, { "epoch": 0.9685995080320126, "grad_norm": 0.06971051543951035, "learning_rate": 1.5831598981505076e-05, "loss": 0.7906245589256287, "step": 5242 }, { "epoch": 0.9687842847409084, "grad_norm": 0.05880312621593475, "learning_rate": 1.5829978281264516e-05, "loss": 0.5124143362045288, "step": 5243 }, { "epoch": 0.9689690614498042, "grad_norm": 0.07851839065551758, "learning_rate": 1.5828357349009065e-05, "loss": 0.7467482686042786, "step": 5244 }, { "epoch": 0.9691538381587, "grad_norm": 0.09485996514558792, "learning_rate": 1.5826736184803223e-05, "loss": 0.8176977038383484, "step": 5245 }, { "epoch": 0.969338614867596, "grad_norm": 0.08205217123031616, "learning_rate": 1.5825114788711507e-05, "loss": 0.6752728223800659, "step": 5246 }, { "epoch": 0.9695233915764918, "grad_norm": 0.07848254591226578, "learning_rate": 1.582349316079845e-05, "loss": 0.6415618658065796, "step": 5247 }, { "epoch": 0.9697081682853876, "grad_norm": 0.0754413902759552, "learning_rate": 1.5821871301128587e-05, "loss": 0.7687599658966064, "step": 5248 }, { "epoch": 0.9698929449942835, "grad_norm": 0.060738738626241684, "learning_rate": 1.5820249209766455e-05, "loss": 0.4032150208950043, "step": 5249 }, { "epoch": 0.9700777217031793, "grad_norm": 0.06582488119602203, "learning_rate": 1.5818626886776617e-05, "loss": 0.5358696579933167, "step": 5250 }, { "epoch": 0.9702624984120751, "grad_norm": 0.07538236677646637, "learning_rate": 1.5817004332223634e-05, "loss": 0.6872379183769226, "step": 5251 }, { "epoch": 0.970447275120971, "grad_norm": 0.0801084116101265, "learning_rate": 1.5815381546172075e-05, "loss": 0.6306943893432617, "step": 5252 }, { "epoch": 0.9706320518298669, "grad_norm": 0.06021309643983841, "learning_rate": 1.5813758528686523e-05, "loss": 0.4739924967288971, "step": 5253 }, { "epoch": 0.9708168285387627, "grad_norm": 0.05175168812274933, "learning_rate": 1.5812135279831574e-05, "loss": 0.43031033873558044, "step": 5254 }, { "epoch": 0.9710016052476586, "grad_norm": 0.07822158187627792, "learning_rate": 1.5810511799671826e-05, "loss": 0.46244823932647705, "step": 5255 }, { "epoch": 0.9711863819565544, "grad_norm": 0.08453062176704407, "learning_rate": 1.5808888088271883e-05, "loss": 0.6953146457672119, "step": 5256 }, { "epoch": 0.9713711586654502, "grad_norm": 0.07755032181739807, "learning_rate": 1.580726414569637e-05, "loss": 0.7229933738708496, "step": 5257 }, { "epoch": 0.971555935374346, "grad_norm": 0.07005933672189713, "learning_rate": 1.5805639972009914e-05, "loss": 0.5575450658798218, "step": 5258 }, { "epoch": 0.9717407120832419, "grad_norm": 0.06289315223693848, "learning_rate": 1.580401556727715e-05, "loss": 0.4901737570762634, "step": 5259 }, { "epoch": 0.9719254887921378, "grad_norm": 0.07762367278337479, "learning_rate": 1.580239093156272e-05, "loss": 0.7002114653587341, "step": 5260 }, { "epoch": 0.9721102655010336, "grad_norm": 0.08419755101203918, "learning_rate": 1.580076606493129e-05, "loss": 0.6662076115608215, "step": 5261 }, { "epoch": 0.9722950422099295, "grad_norm": 0.0712367370724678, "learning_rate": 1.5799140967447516e-05, "loss": 0.5575383305549622, "step": 5262 }, { "epoch": 0.9724798189188253, "grad_norm": 0.05275033041834831, "learning_rate": 1.5797515639176077e-05, "loss": 0.4794505536556244, "step": 5263 }, { "epoch": 0.9726645956277211, "grad_norm": 0.08217772841453552, "learning_rate": 1.579589008018165e-05, "loss": 0.8188614845275879, "step": 5264 }, { "epoch": 0.972849372336617, "grad_norm": 0.07426926493644714, "learning_rate": 1.5794264290528937e-05, "loss": 0.6695041060447693, "step": 5265 }, { "epoch": 0.9730341490455128, "grad_norm": 0.07670366019010544, "learning_rate": 1.5792638270282626e-05, "loss": 0.6676644682884216, "step": 5266 }, { "epoch": 0.9732189257544086, "grad_norm": 0.07113322615623474, "learning_rate": 1.5791012019507438e-05, "loss": 0.590463399887085, "step": 5267 }, { "epoch": 0.9734037024633045, "grad_norm": 0.0710613802075386, "learning_rate": 1.578938553826809e-05, "loss": 0.6342708468437195, "step": 5268 }, { "epoch": 0.9735884791722004, "grad_norm": 0.06318343430757523, "learning_rate": 1.578775882662931e-05, "loss": 0.5334287285804749, "step": 5269 }, { "epoch": 0.9737732558810962, "grad_norm": 0.06320284307003021, "learning_rate": 1.5786131884655836e-05, "loss": 0.6215629577636719, "step": 5270 }, { "epoch": 0.973958032589992, "grad_norm": 0.08957011252641678, "learning_rate": 1.5784504712412414e-05, "loss": 0.670143723487854, "step": 5271 }, { "epoch": 0.9741428092988879, "grad_norm": 0.05370910465717316, "learning_rate": 1.57828773099638e-05, "loss": 0.5103188753128052, "step": 5272 }, { "epoch": 0.9743275860077837, "grad_norm": 0.05635027587413788, "learning_rate": 1.5781249677374767e-05, "loss": 0.43409958481788635, "step": 5273 }, { "epoch": 0.9745123627166795, "grad_norm": 0.07601001858711243, "learning_rate": 1.577962181471008e-05, "loss": 0.7073262333869934, "step": 5274 }, { "epoch": 0.9746971394255755, "grad_norm": 0.06489431858062744, "learning_rate": 1.577799372203453e-05, "loss": 0.3917011320590973, "step": 5275 }, { "epoch": 0.9748819161344713, "grad_norm": 0.06366218626499176, "learning_rate": 1.5776365399412905e-05, "loss": 0.5069113969802856, "step": 5276 }, { "epoch": 0.9750666928433671, "grad_norm": 0.07721502333879471, "learning_rate": 1.5774736846910007e-05, "loss": 0.6327058672904968, "step": 5277 }, { "epoch": 0.9752514695522629, "grad_norm": 0.05550169572234154, "learning_rate": 1.5773108064590655e-05, "loss": 0.4321601688861847, "step": 5278 }, { "epoch": 0.9754362462611588, "grad_norm": 0.0736430287361145, "learning_rate": 1.577147905251966e-05, "loss": 0.5526038408279419, "step": 5279 }, { "epoch": 0.9756210229700546, "grad_norm": 0.08940877765417099, "learning_rate": 1.5769849810761855e-05, "loss": 0.6926903128623962, "step": 5280 }, { "epoch": 0.9758057996789504, "grad_norm": 0.07702791690826416, "learning_rate": 1.5768220339382077e-05, "loss": 0.5691621899604797, "step": 5281 }, { "epoch": 0.9759905763878464, "grad_norm": 0.055418889969587326, "learning_rate": 1.576659063844518e-05, "loss": 0.48040205240249634, "step": 5282 }, { "epoch": 0.9761753530967422, "grad_norm": 0.05237689986824989, "learning_rate": 1.5764960708016016e-05, "loss": 0.4535737633705139, "step": 5283 }, { "epoch": 0.976360129805638, "grad_norm": 0.05730956047773361, "learning_rate": 1.5763330548159453e-05, "loss": 0.4882669150829315, "step": 5284 }, { "epoch": 0.9765449065145339, "grad_norm": 0.0691274106502533, "learning_rate": 1.5761700158940364e-05, "loss": 0.6178051829338074, "step": 5285 }, { "epoch": 0.9767296832234297, "grad_norm": 0.08765674382448196, "learning_rate": 1.5760069540423636e-05, "loss": 0.8575605154037476, "step": 5286 }, { "epoch": 0.9769144599323255, "grad_norm": 0.06924710422754288, "learning_rate": 1.5758438692674158e-05, "loss": 0.6817244291305542, "step": 5287 }, { "epoch": 0.9770992366412213, "grad_norm": 0.0700920894742012, "learning_rate": 1.5756807615756836e-05, "loss": 0.7932645678520203, "step": 5288 }, { "epoch": 0.9772840133501172, "grad_norm": 0.05774247646331787, "learning_rate": 1.5755176309736586e-05, "loss": 0.49710604548454285, "step": 5289 }, { "epoch": 0.9774687900590131, "grad_norm": 0.056986741721630096, "learning_rate": 1.575354477467832e-05, "loss": 0.45592036843299866, "step": 5290 }, { "epoch": 0.9776535667679089, "grad_norm": 0.059322137385606766, "learning_rate": 1.5751913010646977e-05, "loss": 0.47350993752479553, "step": 5291 }, { "epoch": 0.9778383434768048, "grad_norm": 0.07256398350000381, "learning_rate": 1.575028101770749e-05, "loss": 0.5942186713218689, "step": 5292 }, { "epoch": 0.9780231201857006, "grad_norm": 0.05885408818721771, "learning_rate": 1.5748648795924807e-05, "loss": 0.40352359414100647, "step": 5293 }, { "epoch": 0.9782078968945964, "grad_norm": 0.06275617331266403, "learning_rate": 1.5747016345363885e-05, "loss": 0.38128185272216797, "step": 5294 }, { "epoch": 0.9783926736034922, "grad_norm": 0.06964803487062454, "learning_rate": 1.5745383666089698e-05, "loss": 0.5841737389564514, "step": 5295 }, { "epoch": 0.9785774503123881, "grad_norm": 0.06731443107128143, "learning_rate": 1.5743750758167214e-05, "loss": 0.6095026731491089, "step": 5296 }, { "epoch": 0.978762227021284, "grad_norm": 0.0793696939945221, "learning_rate": 1.574211762166142e-05, "loss": 0.6659227609634399, "step": 5297 }, { "epoch": 0.9789470037301798, "grad_norm": 0.07954433560371399, "learning_rate": 1.574048425663731e-05, "loss": 0.5637004971504211, "step": 5298 }, { "epoch": 0.9791317804390757, "grad_norm": 0.07504207640886307, "learning_rate": 1.5738850663159885e-05, "loss": 0.7107203006744385, "step": 5299 }, { "epoch": 0.9793165571479715, "grad_norm": 0.06294222921133041, "learning_rate": 1.5737216841294156e-05, "loss": 0.5561524033546448, "step": 5300 }, { "epoch": 0.9795013338568673, "grad_norm": 0.06990914791822433, "learning_rate": 1.5735582791105147e-05, "loss": 0.5332736968994141, "step": 5301 }, { "epoch": 0.9796861105657632, "grad_norm": 0.06765109300613403, "learning_rate": 1.5733948512657892e-05, "loss": 0.7284939885139465, "step": 5302 }, { "epoch": 0.979870887274659, "grad_norm": 0.08294203877449036, "learning_rate": 1.573231400601742e-05, "loss": 0.7408113479614258, "step": 5303 }, { "epoch": 0.9800556639835549, "grad_norm": 0.07409501075744629, "learning_rate": 1.5730679271248787e-05, "loss": 0.5788695812225342, "step": 5304 }, { "epoch": 0.9802404406924508, "grad_norm": 0.06979045271873474, "learning_rate": 1.572904430841705e-05, "loss": 0.4978390038013458, "step": 5305 }, { "epoch": 0.9804252174013466, "grad_norm": 0.09059244394302368, "learning_rate": 1.5727409117587268e-05, "loss": 0.7129696011543274, "step": 5306 }, { "epoch": 0.9806099941102424, "grad_norm": 0.07869086414575577, "learning_rate": 1.5725773698824527e-05, "loss": 0.6792417764663696, "step": 5307 }, { "epoch": 0.9807947708191382, "grad_norm": 0.06648946553468704, "learning_rate": 1.5724138052193904e-05, "loss": 0.5888637900352478, "step": 5308 }, { "epoch": 0.9809795475280341, "grad_norm": 0.071448914706707, "learning_rate": 1.5722502177760495e-05, "loss": 0.6279944181442261, "step": 5309 }, { "epoch": 0.9811643242369299, "grad_norm": 0.09720901399850845, "learning_rate": 1.5720866075589404e-05, "loss": 0.9879162907600403, "step": 5310 }, { "epoch": 0.9813491009458257, "grad_norm": 0.06968852877616882, "learning_rate": 1.571922974574574e-05, "loss": 0.5217567086219788, "step": 5311 }, { "epoch": 0.9815338776547217, "grad_norm": 0.0745844841003418, "learning_rate": 1.5717593188294626e-05, "loss": 0.5189414024353027, "step": 5312 }, { "epoch": 0.9817186543636175, "grad_norm": 0.0729883685708046, "learning_rate": 1.571595640330119e-05, "loss": 0.5511730909347534, "step": 5313 }, { "epoch": 0.9819034310725133, "grad_norm": 0.06353732198476791, "learning_rate": 1.5714319390830575e-05, "loss": 0.5453438758850098, "step": 5314 }, { "epoch": 0.9820882077814092, "grad_norm": 0.06904557347297668, "learning_rate": 1.5712682150947926e-05, "loss": 0.5570688247680664, "step": 5315 }, { "epoch": 0.982272984490305, "grad_norm": 0.07444656640291214, "learning_rate": 1.5711044683718398e-05, "loss": 0.7203077673912048, "step": 5316 }, { "epoch": 0.9824577611992008, "grad_norm": 0.07032838463783264, "learning_rate": 1.570940698920716e-05, "loss": 0.5388743877410889, "step": 5317 }, { "epoch": 0.9826425379080966, "grad_norm": 0.06798477470874786, "learning_rate": 1.5707769067479382e-05, "loss": 0.5358221530914307, "step": 5318 }, { "epoch": 0.9828273146169926, "grad_norm": 0.0741899311542511, "learning_rate": 1.570613091860026e-05, "loss": 0.5113516449928284, "step": 5319 }, { "epoch": 0.9830120913258884, "grad_norm": 0.07862939685583115, "learning_rate": 1.5704492542634974e-05, "loss": 0.7012460827827454, "step": 5320 }, { "epoch": 0.9831968680347842, "grad_norm": 0.07793296128511429, "learning_rate": 1.5702853939648736e-05, "loss": 0.6566935777664185, "step": 5321 }, { "epoch": 0.9833816447436801, "grad_norm": 0.0800136998295784, "learning_rate": 1.5701215109706748e-05, "loss": 0.6474668979644775, "step": 5322 }, { "epoch": 0.9835664214525759, "grad_norm": 0.0727175772190094, "learning_rate": 1.569957605287424e-05, "loss": 0.5037731528282166, "step": 5323 }, { "epoch": 0.9837511981614717, "grad_norm": 0.07204470783472061, "learning_rate": 1.5697936769216436e-05, "loss": 0.4809809625148773, "step": 5324 }, { "epoch": 0.9839359748703675, "grad_norm": 0.09261243045330048, "learning_rate": 1.5696297258798573e-05, "loss": 0.6194671988487244, "step": 5325 }, { "epoch": 0.9841207515792635, "grad_norm": 0.07927238941192627, "learning_rate": 1.5694657521685905e-05, "loss": 0.4942941963672638, "step": 5326 }, { "epoch": 0.9843055282881593, "grad_norm": 0.06556130945682526, "learning_rate": 1.569301755794368e-05, "loss": 0.45880836248397827, "step": 5327 }, { "epoch": 0.9844903049970551, "grad_norm": 0.07203684747219086, "learning_rate": 1.5691377367637165e-05, "loss": 0.5204325914382935, "step": 5328 }, { "epoch": 0.984675081705951, "grad_norm": 0.07471609115600586, "learning_rate": 1.5689736950831643e-05, "loss": 0.5105763673782349, "step": 5329 }, { "epoch": 0.9848598584148468, "grad_norm": 0.09363534301519394, "learning_rate": 1.5688096307592387e-05, "loss": 0.7678961753845215, "step": 5330 }, { "epoch": 0.9850446351237426, "grad_norm": 0.05547983944416046, "learning_rate": 1.5686455437984694e-05, "loss": 0.42751187086105347, "step": 5331 }, { "epoch": 0.9852294118326385, "grad_norm": 0.078705795109272, "learning_rate": 1.5684814342073864e-05, "loss": 0.4591256380081177, "step": 5332 }, { "epoch": 0.9854141885415343, "grad_norm": 0.058858368545770645, "learning_rate": 1.5683173019925212e-05, "loss": 0.430483877658844, "step": 5333 }, { "epoch": 0.9855989652504302, "grad_norm": 0.06559912860393524, "learning_rate": 1.5681531471604056e-05, "loss": 0.477867066860199, "step": 5334 }, { "epoch": 0.985783741959326, "grad_norm": 0.07676411420106888, "learning_rate": 1.5679889697175718e-05, "loss": 0.5628607869148254, "step": 5335 }, { "epoch": 0.9859685186682219, "grad_norm": 0.06487436592578888, "learning_rate": 1.5678247696705538e-05, "loss": 0.5461073517799377, "step": 5336 }, { "epoch": 0.9861532953771177, "grad_norm": 0.07578388601541519, "learning_rate": 1.567660547025887e-05, "loss": 0.6844896078109741, "step": 5337 }, { "epoch": 0.9863380720860135, "grad_norm": 0.07774979621171951, "learning_rate": 1.5674963017901062e-05, "loss": 0.6670076251029968, "step": 5338 }, { "epoch": 0.9865228487949094, "grad_norm": 0.06051953136920929, "learning_rate": 1.5673320339697484e-05, "loss": 0.47586143016815186, "step": 5339 }, { "epoch": 0.9867076255038052, "grad_norm": 0.0646534264087677, "learning_rate": 1.5671677435713502e-05, "loss": 0.4309314787387848, "step": 5340 }, { "epoch": 0.9868924022127011, "grad_norm": 0.07801027595996857, "learning_rate": 1.56700343060145e-05, "loss": 0.5589619874954224, "step": 5341 }, { "epoch": 0.987077178921597, "grad_norm": 0.08354455232620239, "learning_rate": 1.566839095066588e-05, "loss": 0.621575117111206, "step": 5342 }, { "epoch": 0.9872619556304928, "grad_norm": 0.061820484697818756, "learning_rate": 1.5666747369733028e-05, "loss": 0.48609113693237305, "step": 5343 }, { "epoch": 0.9874467323393886, "grad_norm": 0.07441884279251099, "learning_rate": 1.5665103563281364e-05, "loss": 0.6886221766471863, "step": 5344 }, { "epoch": 0.9876315090482845, "grad_norm": 0.06706920266151428, "learning_rate": 1.56634595313763e-05, "loss": 0.4719564914703369, "step": 5345 }, { "epoch": 0.9878162857571803, "grad_norm": 0.061861515045166016, "learning_rate": 1.5661815274083264e-05, "loss": 0.5134391188621521, "step": 5346 }, { "epoch": 0.9880010624660761, "grad_norm": 0.08870477229356766, "learning_rate": 1.5660170791467692e-05, "loss": 0.7590252757072449, "step": 5347 }, { "epoch": 0.988185839174972, "grad_norm": 0.08870309591293335, "learning_rate": 1.5658526083595034e-05, "loss": 0.8938247561454773, "step": 5348 }, { "epoch": 0.9883706158838679, "grad_norm": 0.07829594612121582, "learning_rate": 1.5656881150530742e-05, "loss": 0.6156164407730103, "step": 5349 }, { "epoch": 0.9885553925927637, "grad_norm": 0.06284963339567184, "learning_rate": 1.5655235992340275e-05, "loss": 0.5012479424476624, "step": 5350 }, { "epoch": 0.9887401693016595, "grad_norm": 0.06273221224546432, "learning_rate": 1.5653590609089112e-05, "loss": 0.4839748442173004, "step": 5351 }, { "epoch": 0.9889249460105554, "grad_norm": 0.09538552910089493, "learning_rate": 1.5651945000842728e-05, "loss": 0.8040647506713867, "step": 5352 }, { "epoch": 0.9891097227194512, "grad_norm": 0.06506232172250748, "learning_rate": 1.5650299167666617e-05, "loss": 0.5908097624778748, "step": 5353 }, { "epoch": 0.989294499428347, "grad_norm": 0.0688038244843483, "learning_rate": 1.5648653109626277e-05, "loss": 0.6032392978668213, "step": 5354 }, { "epoch": 0.9894792761372428, "grad_norm": 0.08789508044719696, "learning_rate": 1.564700682678721e-05, "loss": 0.6738143563270569, "step": 5355 }, { "epoch": 0.9896640528461388, "grad_norm": 0.07509738206863403, "learning_rate": 1.5645360319214946e-05, "loss": 0.5347864627838135, "step": 5356 }, { "epoch": 0.9898488295550346, "grad_norm": 0.07636866718530655, "learning_rate": 1.5643713586975e-05, "loss": 0.6815749406814575, "step": 5357 }, { "epoch": 0.9900336062639304, "grad_norm": 0.06707502901554108, "learning_rate": 1.564206663013291e-05, "loss": 0.7252213954925537, "step": 5358 }, { "epoch": 0.9902183829728263, "grad_norm": 0.05714981257915497, "learning_rate": 1.564041944875422e-05, "loss": 0.4083198606967926, "step": 5359 }, { "epoch": 0.9904031596817221, "grad_norm": 0.06341986358165741, "learning_rate": 1.5638772042904486e-05, "loss": 0.577389121055603, "step": 5360 }, { "epoch": 0.9905879363906179, "grad_norm": 0.07032588869333267, "learning_rate": 1.5637124412649263e-05, "loss": 0.5840871930122375, "step": 5361 }, { "epoch": 0.9907727130995138, "grad_norm": 0.09448905289173126, "learning_rate": 1.5635476558054122e-05, "loss": 0.7708903551101685, "step": 5362 }, { "epoch": 0.9909574898084097, "grad_norm": 0.051332227885723114, "learning_rate": 1.563382847918465e-05, "loss": 0.42049360275268555, "step": 5363 }, { "epoch": 0.9911422665173055, "grad_norm": 0.07908229529857635, "learning_rate": 1.5632180176106428e-05, "loss": 0.6879507303237915, "step": 5364 }, { "epoch": 0.9913270432262014, "grad_norm": 0.08213875442743301, "learning_rate": 1.5630531648885056e-05, "loss": 0.7663908004760742, "step": 5365 }, { "epoch": 0.9915118199350972, "grad_norm": 0.07541374117136002, "learning_rate": 1.5628882897586143e-05, "loss": 0.5930274724960327, "step": 5366 }, { "epoch": 0.991696596643993, "grad_norm": 0.07516644895076752, "learning_rate": 1.56272339222753e-05, "loss": 0.7060964107513428, "step": 5367 }, { "epoch": 0.9918813733528888, "grad_norm": 0.07756774127483368, "learning_rate": 1.5625584723018147e-05, "loss": 0.5873556733131409, "step": 5368 }, { "epoch": 0.9920661500617847, "grad_norm": 0.08249982446432114, "learning_rate": 1.5623935299880323e-05, "loss": 0.7442416548728943, "step": 5369 }, { "epoch": 0.9922509267706806, "grad_norm": 0.0567890889942646, "learning_rate": 1.5622285652927477e-05, "loss": 0.4587095379829407, "step": 5370 }, { "epoch": 0.9924357034795764, "grad_norm": 0.08920449018478394, "learning_rate": 1.5620635782225247e-05, "loss": 0.7942427396774292, "step": 5371 }, { "epoch": 0.9926204801884723, "grad_norm": 0.07286550104618073, "learning_rate": 1.5618985687839298e-05, "loss": 0.6068935394287109, "step": 5372 }, { "epoch": 0.9928052568973681, "grad_norm": 0.08293316513299942, "learning_rate": 1.5617335369835296e-05, "loss": 0.740690290927887, "step": 5373 }, { "epoch": 0.9929900336062639, "grad_norm": 0.06495033204555511, "learning_rate": 1.5615684828278923e-05, "loss": 0.5658296942710876, "step": 5374 }, { "epoch": 0.9931748103151598, "grad_norm": 0.0681936964392662, "learning_rate": 1.5614034063235864e-05, "loss": 0.4906209707260132, "step": 5375 }, { "epoch": 0.9933595870240556, "grad_norm": 0.07649872452020645, "learning_rate": 1.561238307477181e-05, "loss": 0.6106581687927246, "step": 5376 }, { "epoch": 0.9935443637329514, "grad_norm": 0.06747819483280182, "learning_rate": 1.5610731862952468e-05, "loss": 0.5255357027053833, "step": 5377 }, { "epoch": 0.9937291404418473, "grad_norm": 0.05354423075914383, "learning_rate": 1.5609080427843556e-05, "loss": 0.3053976595401764, "step": 5378 }, { "epoch": 0.9939139171507432, "grad_norm": 0.0726679265499115, "learning_rate": 1.560742876951079e-05, "loss": 0.598291277885437, "step": 5379 }, { "epoch": 0.994098693859639, "grad_norm": 0.06871083378791809, "learning_rate": 1.5605776888019902e-05, "loss": 0.5416322350502014, "step": 5380 }, { "epoch": 0.9942834705685348, "grad_norm": 0.062419094145298004, "learning_rate": 1.560412478343663e-05, "loss": 0.4946483075618744, "step": 5381 }, { "epoch": 0.9944682472774307, "grad_norm": 0.0625448226928711, "learning_rate": 1.560247245582673e-05, "loss": 0.4920055866241455, "step": 5382 }, { "epoch": 0.9946530239863265, "grad_norm": 0.0630967766046524, "learning_rate": 1.560081990525595e-05, "loss": 0.5786566138267517, "step": 5383 }, { "epoch": 0.9948378006952223, "grad_norm": 0.06180526688694954, "learning_rate": 1.559916713179006e-05, "loss": 0.43185970187187195, "step": 5384 }, { "epoch": 0.9950225774041183, "grad_norm": 0.07326313853263855, "learning_rate": 1.559751413549484e-05, "loss": 0.6359105110168457, "step": 5385 }, { "epoch": 0.9952073541130141, "grad_norm": 0.06609123200178146, "learning_rate": 1.5595860916436064e-05, "loss": 0.4288696348667145, "step": 5386 }, { "epoch": 0.9953921308219099, "grad_norm": 0.07263244688510895, "learning_rate": 1.5594207474679533e-05, "loss": 0.5994312167167664, "step": 5387 }, { "epoch": 0.9955769075308057, "grad_norm": 0.08110374957323074, "learning_rate": 1.5592553810291045e-05, "loss": 0.5752834677696228, "step": 5388 }, { "epoch": 0.9957616842397016, "grad_norm": 0.07594095915555954, "learning_rate": 1.5590899923336417e-05, "loss": 0.6776776909828186, "step": 5389 }, { "epoch": 0.9959464609485974, "grad_norm": 0.08966058492660522, "learning_rate": 1.558924581388146e-05, "loss": 0.6651446223258972, "step": 5390 }, { "epoch": 0.9961312376574932, "grad_norm": 0.06374116241931915, "learning_rate": 1.5587591481992008e-05, "loss": 0.4789484143257141, "step": 5391 }, { "epoch": 0.9963160143663892, "grad_norm": 0.0633915439248085, "learning_rate": 1.5585936927733897e-05, "loss": 0.5667264461517334, "step": 5392 }, { "epoch": 0.996500791075285, "grad_norm": 0.05590339004993439, "learning_rate": 1.558428215117297e-05, "loss": 0.4168759882450104, "step": 5393 }, { "epoch": 0.9966855677841808, "grad_norm": 0.08429696410894394, "learning_rate": 1.5582627152375084e-05, "loss": 0.6339855790138245, "step": 5394 }, { "epoch": 0.9968703444930767, "grad_norm": 0.05786977708339691, "learning_rate": 1.5580971931406105e-05, "loss": 0.46310970187187195, "step": 5395 }, { "epoch": 0.9970551212019725, "grad_norm": 0.0657820925116539, "learning_rate": 1.55793164883319e-05, "loss": 0.4870114028453827, "step": 5396 }, { "epoch": 0.9972398979108683, "grad_norm": 0.0635991096496582, "learning_rate": 1.557766082321836e-05, "loss": 0.4639195501804352, "step": 5397 }, { "epoch": 0.9974246746197641, "grad_norm": 0.0636812224984169, "learning_rate": 1.5576004936131366e-05, "loss": 0.5039251446723938, "step": 5398 }, { "epoch": 0.99760945132866, "grad_norm": 0.07310467213392258, "learning_rate": 1.5574348827136823e-05, "loss": 0.5871421694755554, "step": 5399 }, { "epoch": 0.9977942280375559, "grad_norm": 0.05908821150660515, "learning_rate": 1.557269249630063e-05, "loss": 0.5073022246360779, "step": 5400 }, { "epoch": 0.9979790047464517, "grad_norm": 0.06619273126125336, "learning_rate": 1.557103594368872e-05, "loss": 0.5582687854766846, "step": 5401 }, { "epoch": 0.9981637814553476, "grad_norm": 0.09524130076169968, "learning_rate": 1.5569379169367005e-05, "loss": 0.9023342132568359, "step": 5402 }, { "epoch": 0.9983485581642434, "grad_norm": 0.06774596124887466, "learning_rate": 1.556772217340142e-05, "loss": 0.5595493912696838, "step": 5403 }, { "epoch": 0.9985333348731392, "grad_norm": 0.07581521570682526, "learning_rate": 1.5566064955857916e-05, "loss": 0.6437134146690369, "step": 5404 }, { "epoch": 0.998718111582035, "grad_norm": 0.08497077226638794, "learning_rate": 1.5564407516802437e-05, "loss": 0.72376549243927, "step": 5405 }, { "epoch": 0.9989028882909309, "grad_norm": 0.07422885298728943, "learning_rate": 1.556274985630095e-05, "loss": 0.7248567342758179, "step": 5406 }, { "epoch": 0.9990876649998268, "grad_norm": 0.08695416897535324, "learning_rate": 1.5561091974419423e-05, "loss": 0.7146385312080383, "step": 5407 }, { "epoch": 0.9992724417087226, "grad_norm": 0.08638768643140793, "learning_rate": 1.555943387122383e-05, "loss": 0.8375434279441833, "step": 5408 }, { "epoch": 0.9994572184176185, "grad_norm": 0.068270742893219, "learning_rate": 1.555777554678017e-05, "loss": 0.6461607813835144, "step": 5409 }, { "epoch": 0.9996419951265143, "grad_norm": 0.07097753137350082, "learning_rate": 1.5556117001154426e-05, "loss": 0.5239364504814148, "step": 5410 }, { "epoch": 0.9998267718354101, "grad_norm": 0.055535804480314255, "learning_rate": 1.5554458234412607e-05, "loss": 0.44697487354278564, "step": 5411 }, { "epoch": 1.0, "grad_norm": 0.07980701327323914, "learning_rate": 1.555279924662073e-05, "loss": 0.6596300601959229, "step": 5412 }, { "epoch": 1.0001847767088958, "grad_norm": 0.06499882787466049, "learning_rate": 1.5551140037844816e-05, "loss": 0.5656397938728333, "step": 5413 }, { "epoch": 1.0003695534177917, "grad_norm": 0.0532810240983963, "learning_rate": 1.5549480608150892e-05, "loss": 0.4539932906627655, "step": 5414 }, { "epoch": 1.0005543301266875, "grad_norm": 0.07145704329013824, "learning_rate": 1.5547820957605006e-05, "loss": 0.6365221738815308, "step": 5415 }, { "epoch": 1.0007391068355833, "grad_norm": 0.06368197500705719, "learning_rate": 1.5546161086273203e-05, "loss": 0.5140100717544556, "step": 5416 }, { "epoch": 1.0009238835444791, "grad_norm": 0.0678260400891304, "learning_rate": 1.5544500994221537e-05, "loss": 0.6245336532592773, "step": 5417 }, { "epoch": 1.001108660253375, "grad_norm": 0.07416384667158127, "learning_rate": 1.554284068151608e-05, "loss": 0.5578438639640808, "step": 5418 }, { "epoch": 1.0012934369622708, "grad_norm": 0.07240698486566544, "learning_rate": 1.5541180148222907e-05, "loss": 0.46354150772094727, "step": 5419 }, { "epoch": 1.0014782136711669, "grad_norm": 0.07387363910675049, "learning_rate": 1.5539519394408096e-05, "loss": 0.513384997844696, "step": 5420 }, { "epoch": 1.0016629903800627, "grad_norm": 0.060762811452150345, "learning_rate": 1.5537858420137748e-05, "loss": 0.4742019474506378, "step": 5421 }, { "epoch": 1.0018477670889585, "grad_norm": 0.0828094333410263, "learning_rate": 1.553619722547796e-05, "loss": 0.6738830208778381, "step": 5422 }, { "epoch": 1.0020325437978543, "grad_norm": 0.06672517955303192, "learning_rate": 1.553453581049484e-05, "loss": 0.37967976927757263, "step": 5423 }, { "epoch": 1.0022173205067502, "grad_norm": 0.07504408061504364, "learning_rate": 1.5532874175254512e-05, "loss": 0.48869621753692627, "step": 5424 }, { "epoch": 1.002402097215646, "grad_norm": 0.07129882276058197, "learning_rate": 1.5531212319823104e-05, "loss": 0.5685672163963318, "step": 5425 }, { "epoch": 1.0025868739245418, "grad_norm": 0.06440310180187225, "learning_rate": 1.5529550244266746e-05, "loss": 0.46098026633262634, "step": 5426 }, { "epoch": 1.0027716506334377, "grad_norm": 0.07999856770038605, "learning_rate": 1.5527887948651594e-05, "loss": 0.6286073923110962, "step": 5427 }, { "epoch": 1.0029564273423335, "grad_norm": 0.06721231341362, "learning_rate": 1.5526225433043787e-05, "loss": 0.45072394609451294, "step": 5428 }, { "epoch": 1.0031412040512293, "grad_norm": 0.09263379871845245, "learning_rate": 1.5524562697509507e-05, "loss": 0.5145958065986633, "step": 5429 }, { "epoch": 1.0033259807601251, "grad_norm": 0.08839530497789383, "learning_rate": 1.552289974211491e-05, "loss": 0.6489437818527222, "step": 5430 }, { "epoch": 1.003510757469021, "grad_norm": 0.08482103794813156, "learning_rate": 1.5521236566926187e-05, "loss": 0.5917055606842041, "step": 5431 }, { "epoch": 1.0036955341779168, "grad_norm": 0.06561548262834549, "learning_rate": 1.5519573172009517e-05, "loss": 0.45737460255622864, "step": 5432 }, { "epoch": 1.0038803108868126, "grad_norm": 0.07934774458408356, "learning_rate": 1.5517909557431106e-05, "loss": 0.50255286693573, "step": 5433 }, { "epoch": 1.0040650875957087, "grad_norm": 0.08711782097816467, "learning_rate": 1.5516245723257156e-05, "loss": 0.7246228456497192, "step": 5434 }, { "epoch": 1.0042498643046045, "grad_norm": 0.08274222910404205, "learning_rate": 1.5514581669553887e-05, "loss": 0.6145168542861938, "step": 5435 }, { "epoch": 1.0044346410135003, "grad_norm": 0.0734354630112648, "learning_rate": 1.5512917396387518e-05, "loss": 0.6290450692176819, "step": 5436 }, { "epoch": 1.0046194177223962, "grad_norm": 0.0731857642531395, "learning_rate": 1.5511252903824284e-05, "loss": 0.5591952204704285, "step": 5437 }, { "epoch": 1.004804194431292, "grad_norm": 0.0669926181435585, "learning_rate": 1.550958819193043e-05, "loss": 0.4976261556148529, "step": 5438 }, { "epoch": 1.0049889711401878, "grad_norm": 0.06242423877120018, "learning_rate": 1.5507923260772206e-05, "loss": 0.4081506133079529, "step": 5439 }, { "epoch": 1.0051737478490836, "grad_norm": 0.06533505767583847, "learning_rate": 1.550625811041586e-05, "loss": 0.5145694613456726, "step": 5440 }, { "epoch": 1.0053585245579795, "grad_norm": 0.09347716718912125, "learning_rate": 1.5504592740927673e-05, "loss": 0.7778919339179993, "step": 5441 }, { "epoch": 1.0055433012668753, "grad_norm": 0.060112446546554565, "learning_rate": 1.5502927152373913e-05, "loss": 0.4014849364757538, "step": 5442 }, { "epoch": 1.0057280779757711, "grad_norm": 0.06632987409830093, "learning_rate": 1.5501261344820875e-05, "loss": 0.5001177191734314, "step": 5443 }, { "epoch": 1.005912854684667, "grad_norm": 0.06259676814079285, "learning_rate": 1.5499595318334842e-05, "loss": 0.502464771270752, "step": 5444 }, { "epoch": 1.0060976313935628, "grad_norm": 0.06666150689125061, "learning_rate": 1.5497929072982123e-05, "loss": 0.4545712471008301, "step": 5445 }, { "epoch": 1.0062824081024586, "grad_norm": 0.07456757128238678, "learning_rate": 1.5496262608829026e-05, "loss": 0.6057341694831848, "step": 5446 }, { "epoch": 1.0064671848113544, "grad_norm": 0.0757986307144165, "learning_rate": 1.5494595925941875e-05, "loss": 0.6850146055221558, "step": 5447 }, { "epoch": 1.0066519615202503, "grad_norm": 0.07049146294593811, "learning_rate": 1.5492929024386995e-05, "loss": 0.5972450971603394, "step": 5448 }, { "epoch": 1.0068367382291463, "grad_norm": 0.06518596410751343, "learning_rate": 1.549126190423073e-05, "loss": 0.5264145135879517, "step": 5449 }, { "epoch": 1.0070215149380422, "grad_norm": 0.08410429954528809, "learning_rate": 1.5489594565539415e-05, "loss": 0.8311405181884766, "step": 5450 }, { "epoch": 1.007206291646938, "grad_norm": 0.08313155174255371, "learning_rate": 1.5487927008379407e-05, "loss": 0.6686275601387024, "step": 5451 }, { "epoch": 1.0073910683558338, "grad_norm": 0.07137402147054672, "learning_rate": 1.548625923281708e-05, "loss": 0.5926514267921448, "step": 5452 }, { "epoch": 1.0075758450647296, "grad_norm": 0.06430884450674057, "learning_rate": 1.5484591238918802e-05, "loss": 0.3721638023853302, "step": 5453 }, { "epoch": 1.0077606217736255, "grad_norm": 0.07286176085472107, "learning_rate": 1.548292302675095e-05, "loss": 0.6848993301391602, "step": 5454 }, { "epoch": 1.0079453984825213, "grad_norm": 0.0833728238940239, "learning_rate": 1.5481254596379914e-05, "loss": 0.6791229248046875, "step": 5455 }, { "epoch": 1.0081301751914171, "grad_norm": 0.07102257013320923, "learning_rate": 1.5479585947872093e-05, "loss": 0.41884228587150574, "step": 5456 }, { "epoch": 1.008314951900313, "grad_norm": 0.07091695815324783, "learning_rate": 1.5477917081293895e-05, "loss": 0.5533478260040283, "step": 5457 }, { "epoch": 1.0084997286092088, "grad_norm": 0.04651034250855446, "learning_rate": 1.5476247996711737e-05, "loss": 0.3375348448753357, "step": 5458 }, { "epoch": 1.0086845053181046, "grad_norm": 0.0720076709985733, "learning_rate": 1.547457869419204e-05, "loss": 0.5965259671211243, "step": 5459 }, { "epoch": 1.0088692820270004, "grad_norm": 0.05651504918932915, "learning_rate": 1.547290917380124e-05, "loss": 0.3244526982307434, "step": 5460 }, { "epoch": 1.0090540587358963, "grad_norm": 0.06690341979265213, "learning_rate": 1.5471239435605777e-05, "loss": 0.455815851688385, "step": 5461 }, { "epoch": 1.009238835444792, "grad_norm": 0.0782356858253479, "learning_rate": 1.5469569479672102e-05, "loss": 0.6343777775764465, "step": 5462 }, { "epoch": 1.009423612153688, "grad_norm": 0.06311261653900146, "learning_rate": 1.5467899306066674e-05, "loss": 0.44077301025390625, "step": 5463 }, { "epoch": 1.009608388862584, "grad_norm": 0.08928355574607849, "learning_rate": 1.5466228914855957e-05, "loss": 0.6142847537994385, "step": 5464 }, { "epoch": 1.0097931655714798, "grad_norm": 0.07347680628299713, "learning_rate": 1.5464558306106438e-05, "loss": 0.5666141510009766, "step": 5465 }, { "epoch": 1.0099779422803756, "grad_norm": 0.08045663684606552, "learning_rate": 1.546288747988459e-05, "loss": 0.6125535368919373, "step": 5466 }, { "epoch": 1.0101627189892715, "grad_norm": 0.07437922060489655, "learning_rate": 1.546121643625691e-05, "loss": 0.6528797745704651, "step": 5467 }, { "epoch": 1.0103474956981673, "grad_norm": 0.058625247329473495, "learning_rate": 1.5459545175289904e-05, "loss": 0.4213486909866333, "step": 5468 }, { "epoch": 1.0105322724070631, "grad_norm": 0.07732836157083511, "learning_rate": 1.5457873697050083e-05, "loss": 0.6027098298072815, "step": 5469 }, { "epoch": 1.010717049115959, "grad_norm": 0.07866568118333817, "learning_rate": 1.545620200160396e-05, "loss": 0.6570467948913574, "step": 5470 }, { "epoch": 1.0109018258248548, "grad_norm": 0.07446414977312088, "learning_rate": 1.5454530089018068e-05, "loss": 0.5937471389770508, "step": 5471 }, { "epoch": 1.0110866025337506, "grad_norm": 0.06321976333856583, "learning_rate": 1.545285795935895e-05, "loss": 0.43261998891830444, "step": 5472 }, { "epoch": 1.0112713792426464, "grad_norm": 0.0733419880270958, "learning_rate": 1.545118561269314e-05, "loss": 0.5204454064369202, "step": 5473 }, { "epoch": 1.0114561559515423, "grad_norm": 0.07306050509214401, "learning_rate": 1.5449513049087196e-05, "loss": 0.5122585296630859, "step": 5474 }, { "epoch": 1.011640932660438, "grad_norm": 0.07231169193983078, "learning_rate": 1.5447840268607684e-05, "loss": 0.5543947219848633, "step": 5475 }, { "epoch": 1.011825709369334, "grad_norm": 0.06687439233064651, "learning_rate": 1.544616727132117e-05, "loss": 0.5254085063934326, "step": 5476 }, { "epoch": 1.0120104860782297, "grad_norm": 0.06679672747850418, "learning_rate": 1.544449405729424e-05, "loss": 0.5370311737060547, "step": 5477 }, { "epoch": 1.0121952627871256, "grad_norm": 0.06086615473031998, "learning_rate": 1.544282062659348e-05, "loss": 0.5779776573181152, "step": 5478 }, { "epoch": 1.0123800394960216, "grad_norm": 0.07743128389120102, "learning_rate": 1.544114697928549e-05, "loss": 0.601859450340271, "step": 5479 }, { "epoch": 1.0125648162049175, "grad_norm": 0.0775611400604248, "learning_rate": 1.5439473115436872e-05, "loss": 0.5330616235733032, "step": 5480 }, { "epoch": 1.0127495929138133, "grad_norm": 0.07540713995695114, "learning_rate": 1.543779903511424e-05, "loss": 0.5143282413482666, "step": 5481 }, { "epoch": 1.012934369622709, "grad_norm": 0.06653149425983429, "learning_rate": 1.543612473838422e-05, "loss": 0.5389485955238342, "step": 5482 }, { "epoch": 1.013119146331605, "grad_norm": 0.06639883667230606, "learning_rate": 1.5434450225313443e-05, "loss": 0.486784428358078, "step": 5483 }, { "epoch": 1.0133039230405008, "grad_norm": 0.07527770102024078, "learning_rate": 1.5432775495968552e-05, "loss": 0.5490717887878418, "step": 5484 }, { "epoch": 1.0134886997493966, "grad_norm": 0.09229796379804611, "learning_rate": 1.5431100550416187e-05, "loss": 0.7034298777580261, "step": 5485 }, { "epoch": 1.0136734764582924, "grad_norm": 0.0757904052734375, "learning_rate": 1.5429425388723017e-05, "loss": 0.5530351996421814, "step": 5486 }, { "epoch": 1.0138582531671883, "grad_norm": 0.08032525330781937, "learning_rate": 1.5427750010955702e-05, "loss": 0.5245693325996399, "step": 5487 }, { "epoch": 1.014043029876084, "grad_norm": 0.07433024793863297, "learning_rate": 1.5426074417180918e-05, "loss": 0.5217931270599365, "step": 5488 }, { "epoch": 1.01422780658498, "grad_norm": 0.07630334794521332, "learning_rate": 1.5424398607465344e-05, "loss": 0.46245628595352173, "step": 5489 }, { "epoch": 1.0144125832938757, "grad_norm": 0.06917870044708252, "learning_rate": 1.542272258187568e-05, "loss": 0.5663442015647888, "step": 5490 }, { "epoch": 1.0145973600027716, "grad_norm": 0.06446558982133865, "learning_rate": 1.5421046340478625e-05, "loss": 0.49829304218292236, "step": 5491 }, { "epoch": 1.0147821367116674, "grad_norm": 0.06780035048723221, "learning_rate": 1.541936988334088e-05, "loss": 0.5756086707115173, "step": 5492 }, { "epoch": 1.0149669134205634, "grad_norm": 0.09589719772338867, "learning_rate": 1.5417693210529172e-05, "loss": 0.7299249768257141, "step": 5493 }, { "epoch": 1.0151516901294593, "grad_norm": 0.07468991726636887, "learning_rate": 1.5416016322110224e-05, "loss": 0.37915417551994324, "step": 5494 }, { "epoch": 1.015336466838355, "grad_norm": 0.08760546892881393, "learning_rate": 1.5414339218150773e-05, "loss": 0.661727786064148, "step": 5495 }, { "epoch": 1.015521243547251, "grad_norm": 0.07419498264789581, "learning_rate": 1.541266189871756e-05, "loss": 0.5948960185050964, "step": 5496 }, { "epoch": 1.0157060202561468, "grad_norm": 0.0739092081785202, "learning_rate": 1.5410984363877336e-05, "loss": 0.5626744031906128, "step": 5497 }, { "epoch": 1.0158907969650426, "grad_norm": 0.07224955409765244, "learning_rate": 1.5409306613696863e-05, "loss": 0.576987087726593, "step": 5498 }, { "epoch": 1.0160755736739384, "grad_norm": 0.0674276202917099, "learning_rate": 1.5407628648242908e-05, "loss": 0.5401109457015991, "step": 5499 }, { "epoch": 1.0162603503828342, "grad_norm": 0.06937623769044876, "learning_rate": 1.5405950467582253e-05, "loss": 0.5262582302093506, "step": 5500 }, { "epoch": 1.0162603503828342, "eval_loss": 0.6191006898880005, "eval_runtime": 158.2235, "eval_samples_per_second": 115.21, "eval_steps_per_second": 14.404, "step": 5500 }, { "epoch": 1.01644512709173, "grad_norm": 0.078186996281147, "learning_rate": 1.5404272071781683e-05, "loss": 0.605469822883606, "step": 5501 }, { "epoch": 1.016629903800626, "grad_norm": 0.06780393421649933, "learning_rate": 1.5402593460907992e-05, "loss": 0.4818827211856842, "step": 5502 }, { "epoch": 1.0168146805095217, "grad_norm": 0.07865656167268753, "learning_rate": 1.5400914635027985e-05, "loss": 0.5803611874580383, "step": 5503 }, { "epoch": 1.0169994572184176, "grad_norm": 0.05911247432231903, "learning_rate": 1.5399235594208472e-05, "loss": 0.38554736971855164, "step": 5504 }, { "epoch": 1.0171842339273134, "grad_norm": 0.07047073543071747, "learning_rate": 1.5397556338516273e-05, "loss": 0.5108228325843811, "step": 5505 }, { "epoch": 1.0173690106362092, "grad_norm": 0.0866054967045784, "learning_rate": 1.539587686801822e-05, "loss": 0.597832202911377, "step": 5506 }, { "epoch": 1.017553787345105, "grad_norm": 0.08333642035722733, "learning_rate": 1.539419718278115e-05, "loss": 0.550136387348175, "step": 5507 }, { "epoch": 1.017738564054001, "grad_norm": 0.09204985201358795, "learning_rate": 1.5392517282871906e-05, "loss": 0.6344136595726013, "step": 5508 }, { "epoch": 1.017923340762897, "grad_norm": 0.05923591926693916, "learning_rate": 1.5390837168357346e-05, "loss": 0.4173552393913269, "step": 5509 }, { "epoch": 1.0181081174717928, "grad_norm": 0.08533397316932678, "learning_rate": 1.5389156839304332e-05, "loss": 0.6466034054756165, "step": 5510 }, { "epoch": 1.0182928941806886, "grad_norm": 0.07171865552663803, "learning_rate": 1.5387476295779737e-05, "loss": 0.6014915108680725, "step": 5511 }, { "epoch": 1.0184776708895844, "grad_norm": 0.07948136329650879, "learning_rate": 1.5385795537850442e-05, "loss": 0.588539183139801, "step": 5512 }, { "epoch": 1.0186624475984802, "grad_norm": 0.07882174849510193, "learning_rate": 1.5384114565583332e-05, "loss": 0.47921741008758545, "step": 5513 }, { "epoch": 1.018847224307376, "grad_norm": 0.07933034002780914, "learning_rate": 1.538243337904531e-05, "loss": 0.5907740592956543, "step": 5514 }, { "epoch": 1.019032001016272, "grad_norm": 0.06324376910924911, "learning_rate": 1.538075197830328e-05, "loss": 0.5618665218353271, "step": 5515 }, { "epoch": 1.0192167777251677, "grad_norm": 0.06595669686794281, "learning_rate": 1.5379070363424153e-05, "loss": 0.4812150299549103, "step": 5516 }, { "epoch": 1.0194015544340636, "grad_norm": 0.05870814248919487, "learning_rate": 1.5377388534474852e-05, "loss": 0.40500393509864807, "step": 5517 }, { "epoch": 1.0195863311429594, "grad_norm": 0.07318040728569031, "learning_rate": 1.5375706491522312e-05, "loss": 0.4814295172691345, "step": 5518 }, { "epoch": 1.0197711078518552, "grad_norm": 0.07311618328094482, "learning_rate": 1.5374024234633474e-05, "loss": 0.6199733018875122, "step": 5519 }, { "epoch": 1.019955884560751, "grad_norm": 0.07848178595304489, "learning_rate": 1.5372341763875285e-05, "loss": 0.6515681147575378, "step": 5520 }, { "epoch": 1.0201406612696469, "grad_norm": 0.08523409068584442, "learning_rate": 1.53706590793147e-05, "loss": 0.5154955983161926, "step": 5521 }, { "epoch": 1.020325437978543, "grad_norm": 0.07961570471525192, "learning_rate": 1.536897618101869e-05, "loss": 0.5723915100097656, "step": 5522 }, { "epoch": 1.0205102146874387, "grad_norm": 0.06371118128299713, "learning_rate": 1.5367293069054217e-05, "loss": 0.49540960788726807, "step": 5523 }, { "epoch": 1.0206949913963346, "grad_norm": 0.06904740631580353, "learning_rate": 1.536560974348828e-05, "loss": 0.5264773368835449, "step": 5524 }, { "epoch": 1.0208797681052304, "grad_norm": 0.055011942982673645, "learning_rate": 1.536392620438786e-05, "loss": 0.430140882730484, "step": 5525 }, { "epoch": 1.0210645448141262, "grad_norm": 0.07497008144855499, "learning_rate": 1.536224245181996e-05, "loss": 0.5237014889717102, "step": 5526 }, { "epoch": 1.021249321523022, "grad_norm": 0.07027363777160645, "learning_rate": 1.536055848585158e-05, "loss": 0.5692754983901978, "step": 5527 }, { "epoch": 1.0214340982319179, "grad_norm": 0.11043912917375565, "learning_rate": 1.535887430654975e-05, "loss": 0.46968314051628113, "step": 5528 }, { "epoch": 1.0216188749408137, "grad_norm": 0.08006355911493301, "learning_rate": 1.535718991398149e-05, "loss": 0.49496397376060486, "step": 5529 }, { "epoch": 1.0218036516497095, "grad_norm": 0.06432589888572693, "learning_rate": 1.535550530821382e-05, "loss": 0.3639981746673584, "step": 5530 }, { "epoch": 1.0219884283586054, "grad_norm": 0.10602528601884842, "learning_rate": 1.5353820489313807e-05, "loss": 0.7828198075294495, "step": 5531 }, { "epoch": 1.0221732050675012, "grad_norm": 0.07818809896707535, "learning_rate": 1.5352135457348488e-05, "loss": 0.6441103219985962, "step": 5532 }, { "epoch": 1.022357981776397, "grad_norm": 0.08831508457660675, "learning_rate": 1.5350450212384914e-05, "loss": 0.5698842406272888, "step": 5533 }, { "epoch": 1.0225427584852929, "grad_norm": 0.06406545639038086, "learning_rate": 1.5348764754490165e-05, "loss": 0.37343931198120117, "step": 5534 }, { "epoch": 1.0227275351941887, "grad_norm": 0.08259700238704681, "learning_rate": 1.5347079083731314e-05, "loss": 0.5247676968574524, "step": 5535 }, { "epoch": 1.0229123119030845, "grad_norm": 0.08037258684635162, "learning_rate": 1.5345393200175442e-05, "loss": 0.6214337348937988, "step": 5536 }, { "epoch": 1.0230970886119806, "grad_norm": 0.0930820181965828, "learning_rate": 1.5343707103889647e-05, "loss": 0.6681873202323914, "step": 5537 }, { "epoch": 1.0232818653208764, "grad_norm": 0.08174339681863785, "learning_rate": 1.5342020794941025e-05, "loss": 0.6081960797309875, "step": 5538 }, { "epoch": 1.0234666420297722, "grad_norm": 0.0695425271987915, "learning_rate": 1.5340334273396695e-05, "loss": 0.4818345606327057, "step": 5539 }, { "epoch": 1.023651418738668, "grad_norm": 0.06923358887434006, "learning_rate": 1.5338647539323758e-05, "loss": 0.43072810769081116, "step": 5540 }, { "epoch": 1.0238361954475639, "grad_norm": 0.09394175559282303, "learning_rate": 1.5336960592789357e-05, "loss": 0.6028846502304077, "step": 5541 }, { "epoch": 1.0240209721564597, "grad_norm": 0.0752132385969162, "learning_rate": 1.533527343386062e-05, "loss": 0.4695470333099365, "step": 5542 }, { "epoch": 1.0242057488653555, "grad_norm": 0.07551165670156479, "learning_rate": 1.5333586062604696e-05, "loss": 0.5507363080978394, "step": 5543 }, { "epoch": 1.0243905255742514, "grad_norm": 0.07440776377916336, "learning_rate": 1.5331898479088732e-05, "loss": 0.45515015721321106, "step": 5544 }, { "epoch": 1.0245753022831472, "grad_norm": 0.08244110643863678, "learning_rate": 1.533021068337989e-05, "loss": 0.6030721664428711, "step": 5545 }, { "epoch": 1.024760078992043, "grad_norm": 0.07014291733503342, "learning_rate": 1.5328522675545334e-05, "loss": 0.5677472949028015, "step": 5546 }, { "epoch": 1.0249448557009389, "grad_norm": 0.07949929684400558, "learning_rate": 1.532683445565225e-05, "loss": 0.6872313618659973, "step": 5547 }, { "epoch": 1.0251296324098347, "grad_norm": 0.07746239751577377, "learning_rate": 1.532514602376782e-05, "loss": 0.6091598868370056, "step": 5548 }, { "epoch": 1.0253144091187305, "grad_norm": 0.0781155452132225, "learning_rate": 1.532345737995924e-05, "loss": 0.5775952339172363, "step": 5549 }, { "epoch": 1.0254991858276263, "grad_norm": 0.07638484984636307, "learning_rate": 1.532176852429371e-05, "loss": 0.4975985288619995, "step": 5550 }, { "epoch": 1.0256839625365222, "grad_norm": 0.0655810683965683, "learning_rate": 1.5320079456838443e-05, "loss": 0.4833434522151947, "step": 5551 }, { "epoch": 1.0258687392454182, "grad_norm": 0.07090182602405548, "learning_rate": 1.531839017766066e-05, "loss": 0.4814104437828064, "step": 5552 }, { "epoch": 1.026053515954314, "grad_norm": 0.08101111650466919, "learning_rate": 1.5316700686827584e-05, "loss": 0.5643125772476196, "step": 5553 }, { "epoch": 1.0262382926632099, "grad_norm": 0.07077205181121826, "learning_rate": 1.5315010984406454e-05, "loss": 0.4897610545158386, "step": 5554 }, { "epoch": 1.0264230693721057, "grad_norm": 0.0607805959880352, "learning_rate": 1.5313321070464517e-05, "loss": 0.4320688843727112, "step": 5555 }, { "epoch": 1.0266078460810015, "grad_norm": 0.08007220178842545, "learning_rate": 1.531163094506902e-05, "loss": 0.6080632209777832, "step": 5556 }, { "epoch": 1.0267926227898974, "grad_norm": 0.0696863904595375, "learning_rate": 1.5309940608287234e-05, "loss": 0.47654882073402405, "step": 5557 }, { "epoch": 1.0269773994987932, "grad_norm": 0.08483091741800308, "learning_rate": 1.5308250060186428e-05, "loss": 0.7323111295700073, "step": 5558 }, { "epoch": 1.027162176207689, "grad_norm": 0.07609230279922485, "learning_rate": 1.530655930083387e-05, "loss": 0.5510598421096802, "step": 5559 }, { "epoch": 1.0273469529165848, "grad_norm": 0.05846942961215973, "learning_rate": 1.5304868330296854e-05, "loss": 0.4501841068267822, "step": 5560 }, { "epoch": 1.0275317296254807, "grad_norm": 0.0615713931620121, "learning_rate": 1.530317714864268e-05, "loss": 0.3922766149044037, "step": 5561 }, { "epoch": 1.0277165063343765, "grad_norm": 0.08426828682422638, "learning_rate": 1.5301485755938648e-05, "loss": 0.803465723991394, "step": 5562 }, { "epoch": 1.0279012830432723, "grad_norm": 0.08208931237459183, "learning_rate": 1.5299794152252064e-05, "loss": 0.6569002866744995, "step": 5563 }, { "epoch": 1.0280860597521682, "grad_norm": 0.0686066746711731, "learning_rate": 1.5298102337650254e-05, "loss": 0.4910159707069397, "step": 5564 }, { "epoch": 1.028270836461064, "grad_norm": 0.08395460247993469, "learning_rate": 1.529641031220055e-05, "loss": 0.610248327255249, "step": 5565 }, { "epoch": 1.0284556131699598, "grad_norm": 0.05675387382507324, "learning_rate": 1.5294718075970284e-05, "loss": 0.40480339527130127, "step": 5566 }, { "epoch": 1.0286403898788559, "grad_norm": 0.06499865651130676, "learning_rate": 1.5293025629026805e-05, "loss": 0.47910645604133606, "step": 5567 }, { "epoch": 1.0288251665877517, "grad_norm": 0.0667455643415451, "learning_rate": 1.5291332971437464e-05, "loss": 0.35491806268692017, "step": 5568 }, { "epoch": 1.0290099432966475, "grad_norm": 0.0756828561425209, "learning_rate": 1.5289640103269626e-05, "loss": 0.4958469867706299, "step": 5569 }, { "epoch": 1.0291947200055434, "grad_norm": 0.06689758598804474, "learning_rate": 1.5287947024590662e-05, "loss": 0.4663046896457672, "step": 5570 }, { "epoch": 1.0293794967144392, "grad_norm": 0.08043645322322845, "learning_rate": 1.528625373546795e-05, "loss": 0.5557774305343628, "step": 5571 }, { "epoch": 1.029564273423335, "grad_norm": 0.07677663862705231, "learning_rate": 1.5284560235968874e-05, "loss": 0.6274809837341309, "step": 5572 }, { "epoch": 1.0297490501322308, "grad_norm": 0.07894661277532578, "learning_rate": 1.5282866526160837e-05, "loss": 0.6272726655006409, "step": 5573 }, { "epoch": 1.0299338268411267, "grad_norm": 0.07600554823875427, "learning_rate": 1.528117260611124e-05, "loss": 0.547213077545166, "step": 5574 }, { "epoch": 1.0301186035500225, "grad_norm": 0.07473323494195938, "learning_rate": 1.5279478475887496e-05, "loss": 0.4859544634819031, "step": 5575 }, { "epoch": 1.0303033802589183, "grad_norm": 0.06868378818035126, "learning_rate": 1.527778413555703e-05, "loss": 0.5427464246749878, "step": 5576 }, { "epoch": 1.0304881569678142, "grad_norm": 0.08214934915304184, "learning_rate": 1.5276089585187258e-05, "loss": 0.6326417326927185, "step": 5577 }, { "epoch": 1.03067293367671, "grad_norm": 0.06721127033233643, "learning_rate": 1.5274394824845635e-05, "loss": 0.4642300307750702, "step": 5578 }, { "epoch": 1.0308577103856058, "grad_norm": 0.07905059307813644, "learning_rate": 1.52726998545996e-05, "loss": 0.5799643993377686, "step": 5579 }, { "epoch": 1.0310424870945016, "grad_norm": 0.08044886589050293, "learning_rate": 1.5271004674516603e-05, "loss": 0.521228015422821, "step": 5580 }, { "epoch": 1.0312272638033977, "grad_norm": 0.06395208090543747, "learning_rate": 1.5269309284664112e-05, "loss": 0.5170220136642456, "step": 5581 }, { "epoch": 1.0314120405122935, "grad_norm": 0.0650828406214714, "learning_rate": 1.5267613685109597e-05, "loss": 0.5077491998672485, "step": 5582 }, { "epoch": 1.0315968172211893, "grad_norm": 0.05765007808804512, "learning_rate": 1.5265917875920537e-05, "loss": 0.39931830763816833, "step": 5583 }, { "epoch": 1.0317815939300852, "grad_norm": 0.07269348949193954, "learning_rate": 1.526422185716442e-05, "loss": 0.4732840955257416, "step": 5584 }, { "epoch": 1.031966370638981, "grad_norm": 0.07275344431400299, "learning_rate": 1.5262525628908743e-05, "loss": 0.4784200191497803, "step": 5585 }, { "epoch": 1.0321511473478768, "grad_norm": 0.07855315506458282, "learning_rate": 1.5260829191221012e-05, "loss": 0.5925523638725281, "step": 5586 }, { "epoch": 1.0323359240567727, "grad_norm": 0.06200972571969032, "learning_rate": 1.525913254416874e-05, "loss": 0.4778120517730713, "step": 5587 }, { "epoch": 1.0325207007656685, "grad_norm": 0.08109522610902786, "learning_rate": 1.5257435687819442e-05, "loss": 0.5588696002960205, "step": 5588 }, { "epoch": 1.0327054774745643, "grad_norm": 0.06934543699026108, "learning_rate": 1.5255738622240653e-05, "loss": 0.5938635468482971, "step": 5589 }, { "epoch": 1.0328902541834601, "grad_norm": 0.07456963509321213, "learning_rate": 1.5254041347499912e-05, "loss": 0.6001103520393372, "step": 5590 }, { "epoch": 1.033075030892356, "grad_norm": 0.07474758476018906, "learning_rate": 1.525234386366476e-05, "loss": 0.5206001996994019, "step": 5591 }, { "epoch": 1.0332598076012518, "grad_norm": 0.07479168474674225, "learning_rate": 1.5250646170802759e-05, "loss": 0.5422087907791138, "step": 5592 }, { "epoch": 1.0334445843101476, "grad_norm": 0.07698295265436172, "learning_rate": 1.5248948268981462e-05, "loss": 0.6600850224494934, "step": 5593 }, { "epoch": 1.0336293610190435, "grad_norm": 0.09354109317064285, "learning_rate": 1.5247250158268452e-05, "loss": 0.6611118912696838, "step": 5594 }, { "epoch": 1.0338141377279393, "grad_norm": 0.08147210627794266, "learning_rate": 1.5245551838731299e-05, "loss": 0.5379778146743774, "step": 5595 }, { "epoch": 1.0339989144368353, "grad_norm": 0.06687918305397034, "learning_rate": 1.5243853310437593e-05, "loss": 0.6023465991020203, "step": 5596 }, { "epoch": 1.0341836911457312, "grad_norm": 0.09458979964256287, "learning_rate": 1.5242154573454934e-05, "loss": 0.7716991901397705, "step": 5597 }, { "epoch": 1.034368467854627, "grad_norm": 0.0748145803809166, "learning_rate": 1.524045562785092e-05, "loss": 0.5194568634033203, "step": 5598 }, { "epoch": 1.0345532445635228, "grad_norm": 0.08319809287786484, "learning_rate": 1.5238756473693167e-05, "loss": 0.6805424094200134, "step": 5599 }, { "epoch": 1.0347380212724187, "grad_norm": 0.06190166249871254, "learning_rate": 1.5237057111049303e-05, "loss": 0.5145440101623535, "step": 5600 }, { "epoch": 1.0349227979813145, "grad_norm": 0.08432719111442566, "learning_rate": 1.5235357539986945e-05, "loss": 0.6638540029525757, "step": 5601 }, { "epoch": 1.0351075746902103, "grad_norm": 0.07394157350063324, "learning_rate": 1.5233657760573737e-05, "loss": 0.6293306946754456, "step": 5602 }, { "epoch": 1.0352923513991061, "grad_norm": 0.08452186733484268, "learning_rate": 1.5231957772877323e-05, "loss": 0.6883015632629395, "step": 5603 }, { "epoch": 1.035477128108002, "grad_norm": 0.0699499100446701, "learning_rate": 1.5230257576965363e-05, "loss": 0.5550675392150879, "step": 5604 }, { "epoch": 1.0356619048168978, "grad_norm": 0.05052657797932625, "learning_rate": 1.5228557172905509e-05, "loss": 0.3413355350494385, "step": 5605 }, { "epoch": 1.0358466815257936, "grad_norm": 0.08890295773744583, "learning_rate": 1.522685656076544e-05, "loss": 0.6631185412406921, "step": 5606 }, { "epoch": 1.0360314582346895, "grad_norm": 0.06723684817552567, "learning_rate": 1.5225155740612834e-05, "loss": 0.44392311573028564, "step": 5607 }, { "epoch": 1.0362162349435853, "grad_norm": 0.07482519745826721, "learning_rate": 1.5223454712515376e-05, "loss": 0.6936173439025879, "step": 5608 }, { "epoch": 1.036401011652481, "grad_norm": 0.06911630183458328, "learning_rate": 1.5221753476540762e-05, "loss": 0.4736194610595703, "step": 5609 }, { "epoch": 1.0365857883613772, "grad_norm": 0.07511420547962189, "learning_rate": 1.5220052032756698e-05, "loss": 0.583503007888794, "step": 5610 }, { "epoch": 1.036770565070273, "grad_norm": 0.07482054084539413, "learning_rate": 1.5218350381230895e-05, "loss": 0.5486778020858765, "step": 5611 }, { "epoch": 1.0369553417791688, "grad_norm": 0.08281565457582474, "learning_rate": 1.521664852203107e-05, "loss": 0.5580335855484009, "step": 5612 }, { "epoch": 1.0371401184880646, "grad_norm": 0.06626997888088226, "learning_rate": 1.5214946455224955e-05, "loss": 0.5215938091278076, "step": 5613 }, { "epoch": 1.0373248951969605, "grad_norm": 0.07665207982063293, "learning_rate": 1.5213244180880287e-05, "loss": 0.5431420803070068, "step": 5614 }, { "epoch": 1.0375096719058563, "grad_norm": 0.06467811018228531, "learning_rate": 1.5211541699064811e-05, "loss": 0.3931715786457062, "step": 5615 }, { "epoch": 1.0376944486147521, "grad_norm": 0.06558360904455185, "learning_rate": 1.5209839009846282e-05, "loss": 0.42572009563446045, "step": 5616 }, { "epoch": 1.037879225323648, "grad_norm": 0.058751028031110764, "learning_rate": 1.5208136113292457e-05, "loss": 0.4192984104156494, "step": 5617 }, { "epoch": 1.0380640020325438, "grad_norm": 0.07840841263532639, "learning_rate": 1.5206433009471112e-05, "loss": 0.5992063879966736, "step": 5618 }, { "epoch": 1.0382487787414396, "grad_norm": 0.0799863412976265, "learning_rate": 1.5204729698450015e-05, "loss": 0.5317435264587402, "step": 5619 }, { "epoch": 1.0384335554503354, "grad_norm": 0.09214732050895691, "learning_rate": 1.520302618029697e-05, "loss": 0.5794035792350769, "step": 5620 }, { "epoch": 1.0386183321592313, "grad_norm": 0.06093394011259079, "learning_rate": 1.5201322455079757e-05, "loss": 0.5225287675857544, "step": 5621 }, { "epoch": 1.038803108868127, "grad_norm": 0.0768059566617012, "learning_rate": 1.519961852286618e-05, "loss": 0.4805833697319031, "step": 5622 }, { "epoch": 1.038987885577023, "grad_norm": 0.0913882628083229, "learning_rate": 1.5197914383724052e-05, "loss": 0.7455976009368896, "step": 5623 }, { "epoch": 1.0391726622859188, "grad_norm": 0.08133476972579956, "learning_rate": 1.51962100377212e-05, "loss": 0.6164051294326782, "step": 5624 }, { "epoch": 1.0393574389948148, "grad_norm": 0.06969386339187622, "learning_rate": 1.5194505484925444e-05, "loss": 0.5532299280166626, "step": 5625 }, { "epoch": 1.0395422157037106, "grad_norm": 0.0681932345032692, "learning_rate": 1.5192800725404618e-05, "loss": 0.4927964508533478, "step": 5626 }, { "epoch": 1.0397269924126065, "grad_norm": 0.07505299150943756, "learning_rate": 1.519109575922657e-05, "loss": 0.6235383152961731, "step": 5627 }, { "epoch": 1.0399117691215023, "grad_norm": 0.06280749291181564, "learning_rate": 1.5189390586459155e-05, "loss": 0.4233841001987457, "step": 5628 }, { "epoch": 1.0400965458303981, "grad_norm": 0.06342928856611252, "learning_rate": 1.5187685207170226e-05, "loss": 0.48731809854507446, "step": 5629 }, { "epoch": 1.040281322539294, "grad_norm": 0.06959110498428345, "learning_rate": 1.5185979621427657e-05, "loss": 0.5357281565666199, "step": 5630 }, { "epoch": 1.0404660992481898, "grad_norm": 0.07965416461229324, "learning_rate": 1.5184273829299327e-05, "loss": 0.6262179613113403, "step": 5631 }, { "epoch": 1.0406508759570856, "grad_norm": 0.06511030346155167, "learning_rate": 1.5182567830853114e-05, "loss": 0.37972134351730347, "step": 5632 }, { "epoch": 1.0408356526659814, "grad_norm": 0.10105421394109726, "learning_rate": 1.5180861626156915e-05, "loss": 0.6640194654464722, "step": 5633 }, { "epoch": 1.0410204293748773, "grad_norm": 0.07888025045394897, "learning_rate": 1.5179155215278637e-05, "loss": 0.5475636124610901, "step": 5634 }, { "epoch": 1.041205206083773, "grad_norm": 0.07195180654525757, "learning_rate": 1.5177448598286182e-05, "loss": 0.5586546063423157, "step": 5635 }, { "epoch": 1.041389982792669, "grad_norm": 0.07774486392736435, "learning_rate": 1.517574177524747e-05, "loss": 0.5166386961936951, "step": 5636 }, { "epoch": 1.0415747595015648, "grad_norm": 0.06525541096925735, "learning_rate": 1.517403474623043e-05, "loss": 0.46400272846221924, "step": 5637 }, { "epoch": 1.0417595362104606, "grad_norm": 0.06237497553229332, "learning_rate": 1.5172327511302996e-05, "loss": 0.4679761528968811, "step": 5638 }, { "epoch": 1.0419443129193564, "grad_norm": 0.08574408292770386, "learning_rate": 1.5170620070533104e-05, "loss": 0.5731038451194763, "step": 5639 }, { "epoch": 1.0421290896282525, "grad_norm": 0.0831538513302803, "learning_rate": 1.5168912423988716e-05, "loss": 0.5945683717727661, "step": 5640 }, { "epoch": 1.0423138663371483, "grad_norm": 0.06644801050424576, "learning_rate": 1.5167204571737782e-05, "loss": 0.5209828019142151, "step": 5641 }, { "epoch": 1.0424986430460441, "grad_norm": 0.07284197211265564, "learning_rate": 1.516549651384827e-05, "loss": 0.5813314318656921, "step": 5642 }, { "epoch": 1.04268341975494, "grad_norm": 0.07253412902355194, "learning_rate": 1.5163788250388161e-05, "loss": 0.6203429698944092, "step": 5643 }, { "epoch": 1.0428681964638358, "grad_norm": 0.06365064531564713, "learning_rate": 1.5162079781425434e-05, "loss": 0.4796571731567383, "step": 5644 }, { "epoch": 1.0430529731727316, "grad_norm": 0.06871246546506882, "learning_rate": 1.5160371107028082e-05, "loss": 0.4531722366809845, "step": 5645 }, { "epoch": 1.0432377498816274, "grad_norm": 0.08286961913108826, "learning_rate": 1.5158662227264102e-05, "loss": 0.6958336234092712, "step": 5646 }, { "epoch": 1.0434225265905233, "grad_norm": 0.05920685827732086, "learning_rate": 1.515695314220151e-05, "loss": 0.46679872274398804, "step": 5647 }, { "epoch": 1.043607303299419, "grad_norm": 0.08717560023069382, "learning_rate": 1.5155243851908314e-05, "loss": 0.6550828814506531, "step": 5648 }, { "epoch": 1.043792080008315, "grad_norm": 0.06699342280626297, "learning_rate": 1.515353435645254e-05, "loss": 0.5816624164581299, "step": 5649 }, { "epoch": 1.0439768567172107, "grad_norm": 0.07312697917222977, "learning_rate": 1.5151824655902223e-05, "loss": 0.656792938709259, "step": 5650 }, { "epoch": 1.0441616334261066, "grad_norm": 0.07946156710386276, "learning_rate": 1.5150114750325404e-05, "loss": 0.5273401737213135, "step": 5651 }, { "epoch": 1.0443464101350024, "grad_norm": 0.08179052174091339, "learning_rate": 1.514840463979013e-05, "loss": 0.568706214427948, "step": 5652 }, { "epoch": 1.0445311868438982, "grad_norm": 0.08967617899179459, "learning_rate": 1.5146694324364454e-05, "loss": 0.6211278438568115, "step": 5653 }, { "epoch": 1.044715963552794, "grad_norm": 0.07301918417215347, "learning_rate": 1.5144983804116453e-05, "loss": 0.541821300983429, "step": 5654 }, { "epoch": 1.04490074026169, "grad_norm": 0.07512032240629196, "learning_rate": 1.5143273079114189e-05, "loss": 0.38737353682518005, "step": 5655 }, { "epoch": 1.045085516970586, "grad_norm": 0.07602082192897797, "learning_rate": 1.5141562149425748e-05, "loss": 0.5600385665893555, "step": 5656 }, { "epoch": 1.0452702936794818, "grad_norm": 0.05643437057733536, "learning_rate": 1.5139851015119223e-05, "loss": 0.4082464575767517, "step": 5657 }, { "epoch": 1.0454550703883776, "grad_norm": 0.06733223795890808, "learning_rate": 1.5138139676262706e-05, "loss": 0.5769407749176025, "step": 5658 }, { "epoch": 1.0456398470972734, "grad_norm": 0.0798359215259552, "learning_rate": 1.5136428132924304e-05, "loss": 0.500033438205719, "step": 5659 }, { "epoch": 1.0458246238061693, "grad_norm": 0.06644061207771301, "learning_rate": 1.513471638517213e-05, "loss": 0.39873623847961426, "step": 5660 }, { "epoch": 1.046009400515065, "grad_norm": 0.06640005856752396, "learning_rate": 1.5133004433074314e-05, "loss": 0.44805052876472473, "step": 5661 }, { "epoch": 1.046194177223961, "grad_norm": 0.08006201684474945, "learning_rate": 1.5131292276698977e-05, "loss": 0.6086596846580505, "step": 5662 }, { "epoch": 1.0463789539328567, "grad_norm": 0.08362042158842087, "learning_rate": 1.5129579916114262e-05, "loss": 0.664318323135376, "step": 5663 }, { "epoch": 1.0465637306417526, "grad_norm": 0.0611673966050148, "learning_rate": 1.5127867351388314e-05, "loss": 0.45818454027175903, "step": 5664 }, { "epoch": 1.0467485073506484, "grad_norm": 0.07986735552549362, "learning_rate": 1.5126154582589287e-05, "loss": 0.6387860178947449, "step": 5665 }, { "epoch": 1.0469332840595442, "grad_norm": 0.08133723586797714, "learning_rate": 1.5124441609785347e-05, "loss": 0.5613438487052917, "step": 5666 }, { "epoch": 1.04711806076844, "grad_norm": 0.06914244592189789, "learning_rate": 1.5122728433044664e-05, "loss": 0.5158078670501709, "step": 5667 }, { "epoch": 1.0473028374773359, "grad_norm": 0.08781937509775162, "learning_rate": 1.5121015052435418e-05, "loss": 0.6469088792800903, "step": 5668 }, { "epoch": 1.047487614186232, "grad_norm": 0.06933942437171936, "learning_rate": 1.511930146802579e-05, "loss": 0.4561266303062439, "step": 5669 }, { "epoch": 1.0476723908951278, "grad_norm": 0.08494089543819427, "learning_rate": 1.5117587679883982e-05, "loss": 0.630203902721405, "step": 5670 }, { "epoch": 1.0478571676040236, "grad_norm": 0.060044411569833755, "learning_rate": 1.5115873688078197e-05, "loss": 0.427950382232666, "step": 5671 }, { "epoch": 1.0480419443129194, "grad_norm": 0.07982151210308075, "learning_rate": 1.511415949267664e-05, "loss": 0.5333434343338013, "step": 5672 }, { "epoch": 1.0482267210218152, "grad_norm": 0.08310159295797348, "learning_rate": 1.511244509374754e-05, "loss": 0.5588505268096924, "step": 5673 }, { "epoch": 1.048411497730711, "grad_norm": 0.07040046900510788, "learning_rate": 1.511073049135912e-05, "loss": 0.46905678510665894, "step": 5674 }, { "epoch": 1.048596274439607, "grad_norm": 0.0707654356956482, "learning_rate": 1.5109015685579613e-05, "loss": 0.5168400406837463, "step": 5675 }, { "epoch": 1.0487810511485027, "grad_norm": 0.07354991137981415, "learning_rate": 1.5107300676477268e-05, "loss": 0.5358753800392151, "step": 5676 }, { "epoch": 1.0489658278573986, "grad_norm": 0.06685210019350052, "learning_rate": 1.5105585464120333e-05, "loss": 0.587394654750824, "step": 5677 }, { "epoch": 1.0491506045662944, "grad_norm": 0.09235299378633499, "learning_rate": 1.5103870048577071e-05, "loss": 0.6667936444282532, "step": 5678 }, { "epoch": 1.0493353812751902, "grad_norm": 0.0625777542591095, "learning_rate": 1.510215442991575e-05, "loss": 0.5211875438690186, "step": 5679 }, { "epoch": 1.049520157984086, "grad_norm": 0.0792139396071434, "learning_rate": 1.5100438608204645e-05, "loss": 0.5901344418525696, "step": 5680 }, { "epoch": 1.0497049346929819, "grad_norm": 0.07802049815654755, "learning_rate": 1.5098722583512038e-05, "loss": 0.6435686349868774, "step": 5681 }, { "epoch": 1.0498897114018777, "grad_norm": 0.07689131051301956, "learning_rate": 1.5097006355906225e-05, "loss": 0.5567794442176819, "step": 5682 }, { "epoch": 1.0500744881107735, "grad_norm": 0.07338476926088333, "learning_rate": 1.5095289925455507e-05, "loss": 0.542125940322876, "step": 5683 }, { "epoch": 1.0502592648196696, "grad_norm": 0.06186862662434578, "learning_rate": 1.509357329222819e-05, "loss": 0.46599534153938293, "step": 5684 }, { "epoch": 1.0504440415285654, "grad_norm": 0.08224906027317047, "learning_rate": 1.5091856456292591e-05, "loss": 0.6175563931465149, "step": 5685 }, { "epoch": 1.0506288182374612, "grad_norm": 0.08578823506832123, "learning_rate": 1.5090139417717039e-05, "loss": 0.6648751497268677, "step": 5686 }, { "epoch": 1.050813594946357, "grad_norm": 0.06645061075687408, "learning_rate": 1.5088422176569859e-05, "loss": 0.5244321823120117, "step": 5687 }, { "epoch": 1.050998371655253, "grad_norm": 0.0692540779709816, "learning_rate": 1.50867047329194e-05, "loss": 0.4675929546356201, "step": 5688 }, { "epoch": 1.0511831483641487, "grad_norm": 0.0704752653837204, "learning_rate": 1.5084987086834003e-05, "loss": 0.5662005543708801, "step": 5689 }, { "epoch": 1.0513679250730446, "grad_norm": 0.06947939097881317, "learning_rate": 1.5083269238382028e-05, "loss": 0.460791677236557, "step": 5690 }, { "epoch": 1.0515527017819404, "grad_norm": 0.06659407913684845, "learning_rate": 1.5081551187631844e-05, "loss": 0.49134254455566406, "step": 5691 }, { "epoch": 1.0517374784908362, "grad_norm": 0.0819624662399292, "learning_rate": 1.507983293465182e-05, "loss": 0.5807049870491028, "step": 5692 }, { "epoch": 1.051922255199732, "grad_norm": 0.07566314190626144, "learning_rate": 1.507811447951034e-05, "loss": 0.5705682635307312, "step": 5693 }, { "epoch": 1.0521070319086279, "grad_norm": 0.07033289223909378, "learning_rate": 1.5076395822275787e-05, "loss": 0.49281132221221924, "step": 5694 }, { "epoch": 1.0522918086175237, "grad_norm": 0.09457696974277496, "learning_rate": 1.5074676963016563e-05, "loss": 0.6702513098716736, "step": 5695 }, { "epoch": 1.0524765853264195, "grad_norm": 0.08114906400442123, "learning_rate": 1.5072957901801075e-05, "loss": 0.6118483543395996, "step": 5696 }, { "epoch": 1.0526613620353154, "grad_norm": 0.07982999831438065, "learning_rate": 1.5071238638697731e-05, "loss": 0.562346339225769, "step": 5697 }, { "epoch": 1.0528461387442114, "grad_norm": 0.07520321756601334, "learning_rate": 1.5069519173774958e-05, "loss": 0.4821903109550476, "step": 5698 }, { "epoch": 1.0530309154531072, "grad_norm": 0.07969825714826584, "learning_rate": 1.5067799507101182e-05, "loss": 0.5231217741966248, "step": 5699 }, { "epoch": 1.053215692162003, "grad_norm": 0.08513433486223221, "learning_rate": 1.5066079638744839e-05, "loss": 0.6409650444984436, "step": 5700 }, { "epoch": 1.0534004688708989, "grad_norm": 0.06774573773145676, "learning_rate": 1.5064359568774376e-05, "loss": 0.44569161534309387, "step": 5701 }, { "epoch": 1.0535852455797947, "grad_norm": 0.06494417041540146, "learning_rate": 1.5062639297258246e-05, "loss": 0.4762156307697296, "step": 5702 }, { "epoch": 1.0537700222886905, "grad_norm": 0.07973779737949371, "learning_rate": 1.5060918824264916e-05, "loss": 0.5730336308479309, "step": 5703 }, { "epoch": 1.0539547989975864, "grad_norm": 0.05715023726224899, "learning_rate": 1.5059198149862843e-05, "loss": 0.47526082396507263, "step": 5704 }, { "epoch": 1.0541395757064822, "grad_norm": 0.0791873037815094, "learning_rate": 1.5057477274120516e-05, "loss": 0.556162416934967, "step": 5705 }, { "epoch": 1.054324352415378, "grad_norm": 0.056715238839387894, "learning_rate": 1.5055756197106417e-05, "loss": 0.42971712350845337, "step": 5706 }, { "epoch": 1.0545091291242739, "grad_norm": 0.06378652155399323, "learning_rate": 1.5054034918889037e-05, "loss": 0.4626498222351074, "step": 5707 }, { "epoch": 1.0546939058331697, "grad_norm": 0.07502374798059464, "learning_rate": 1.505231343953688e-05, "loss": 0.531059741973877, "step": 5708 }, { "epoch": 1.0548786825420655, "grad_norm": 0.09165331721305847, "learning_rate": 1.5050591759118454e-05, "loss": 0.7677619457244873, "step": 5709 }, { "epoch": 1.0550634592509613, "grad_norm": 0.06765854358673096, "learning_rate": 1.5048869877702278e-05, "loss": 0.5271489024162292, "step": 5710 }, { "epoch": 1.0552482359598572, "grad_norm": 0.0819627195596695, "learning_rate": 1.5047147795356877e-05, "loss": 0.5126342177391052, "step": 5711 }, { "epoch": 1.055433012668753, "grad_norm": 0.06692739576101303, "learning_rate": 1.5045425512150784e-05, "loss": 0.5844963192939758, "step": 5712 }, { "epoch": 1.055617789377649, "grad_norm": 0.0615801103413105, "learning_rate": 1.504370302815254e-05, "loss": 0.4775800108909607, "step": 5713 }, { "epoch": 1.0558025660865449, "grad_norm": 0.06606190651655197, "learning_rate": 1.5041980343430696e-05, "loss": 0.4907645583152771, "step": 5714 }, { "epoch": 1.0559873427954407, "grad_norm": 0.060940444469451904, "learning_rate": 1.5040257458053806e-05, "loss": 0.37098509073257446, "step": 5715 }, { "epoch": 1.0561721195043365, "grad_norm": 0.07180286198854446, "learning_rate": 1.5038534372090443e-05, "loss": 0.5962932705879211, "step": 5716 }, { "epoch": 1.0563568962132324, "grad_norm": 0.07350271940231323, "learning_rate": 1.5036811085609176e-05, "loss": 0.5578521490097046, "step": 5717 }, { "epoch": 1.0565416729221282, "grad_norm": 0.06898215413093567, "learning_rate": 1.5035087598678581e-05, "loss": 0.6065282821655273, "step": 5718 }, { "epoch": 1.056726449631024, "grad_norm": 0.08027186244726181, "learning_rate": 1.5033363911367254e-05, "loss": 0.49961212277412415, "step": 5719 }, { "epoch": 1.0569112263399199, "grad_norm": 0.06551721692085266, "learning_rate": 1.5031640023743792e-05, "loss": 0.3818071186542511, "step": 5720 }, { "epoch": 1.0570960030488157, "grad_norm": 0.06600042432546616, "learning_rate": 1.5029915935876797e-05, "loss": 0.4476735293865204, "step": 5721 }, { "epoch": 1.0572807797577115, "grad_norm": 0.05965065583586693, "learning_rate": 1.5028191647834884e-05, "loss": 0.5809292197227478, "step": 5722 }, { "epoch": 1.0574655564666073, "grad_norm": 0.06527971476316452, "learning_rate": 1.502646715968668e-05, "loss": 0.4536020755767822, "step": 5723 }, { "epoch": 1.0576503331755032, "grad_norm": 0.07186955213546753, "learning_rate": 1.5024742471500804e-05, "loss": 0.5548267960548401, "step": 5724 }, { "epoch": 1.057835109884399, "grad_norm": 0.07215177267789841, "learning_rate": 1.5023017583345895e-05, "loss": 0.49939098954200745, "step": 5725 }, { "epoch": 1.0580198865932948, "grad_norm": 0.06784142553806305, "learning_rate": 1.5021292495290608e-05, "loss": 0.4960416555404663, "step": 5726 }, { "epoch": 1.0582046633021909, "grad_norm": 0.05334806442260742, "learning_rate": 1.5019567207403587e-05, "loss": 0.46388018131256104, "step": 5727 }, { "epoch": 1.0583894400110867, "grad_norm": 0.08525721728801727, "learning_rate": 1.5017841719753495e-05, "loss": 0.5141664147377014, "step": 5728 }, { "epoch": 1.0585742167199825, "grad_norm": 0.07427357137203217, "learning_rate": 1.5016116032409e-05, "loss": 0.48649123311042786, "step": 5729 }, { "epoch": 1.0587589934288784, "grad_norm": 0.07980693876743317, "learning_rate": 1.5014390145438782e-05, "loss": 0.59165358543396, "step": 5730 }, { "epoch": 1.0589437701377742, "grad_norm": 0.07934518903493881, "learning_rate": 1.5012664058911522e-05, "loss": 0.5416390299797058, "step": 5731 }, { "epoch": 1.05912854684667, "grad_norm": 0.08083886653184891, "learning_rate": 1.5010937772895918e-05, "loss": 0.45603734254837036, "step": 5732 }, { "epoch": 1.0593133235555658, "grad_norm": 0.09596753865480423, "learning_rate": 1.5009211287460668e-05, "loss": 0.6592952609062195, "step": 5733 }, { "epoch": 1.0594981002644617, "grad_norm": 0.07743064314126968, "learning_rate": 1.500748460267448e-05, "loss": 0.502553403377533, "step": 5734 }, { "epoch": 1.0596828769733575, "grad_norm": 0.07169241458177567, "learning_rate": 1.5005757718606066e-05, "loss": 0.558634877204895, "step": 5735 }, { "epoch": 1.0598676536822533, "grad_norm": 0.07375902682542801, "learning_rate": 1.5004030635324163e-05, "loss": 0.45072486996650696, "step": 5736 }, { "epoch": 1.0600524303911492, "grad_norm": 0.0858403667807579, "learning_rate": 1.5002303352897494e-05, "loss": 0.6073910593986511, "step": 5737 }, { "epoch": 1.060237207100045, "grad_norm": 0.06966253370046616, "learning_rate": 1.50005758713948e-05, "loss": 0.5068526268005371, "step": 5738 }, { "epoch": 1.0604219838089408, "grad_norm": 0.0676216408610344, "learning_rate": 1.4998848190884832e-05, "loss": 0.37021389603614807, "step": 5739 }, { "epoch": 1.0606067605178366, "grad_norm": 0.07576752454042435, "learning_rate": 1.4997120311436346e-05, "loss": 0.5430153608322144, "step": 5740 }, { "epoch": 1.0607915372267325, "grad_norm": 0.07603286951780319, "learning_rate": 1.4995392233118104e-05, "loss": 0.5482485294342041, "step": 5741 }, { "epoch": 1.0609763139356283, "grad_norm": 0.08099351823329926, "learning_rate": 1.4993663955998883e-05, "loss": 0.5044888854026794, "step": 5742 }, { "epoch": 1.0611610906445244, "grad_norm": 0.062198203057050705, "learning_rate": 1.4991935480147457e-05, "loss": 0.4543401300907135, "step": 5743 }, { "epoch": 1.0613458673534202, "grad_norm": 0.07020148634910583, "learning_rate": 1.4990206805632618e-05, "loss": 0.49687787890434265, "step": 5744 }, { "epoch": 1.061530644062316, "grad_norm": 0.08795180171728134, "learning_rate": 1.4988477932523157e-05, "loss": 0.6046985387802124, "step": 5745 }, { "epoch": 1.0617154207712118, "grad_norm": 0.07090011239051819, "learning_rate": 1.4986748860887885e-05, "loss": 0.3979244828224182, "step": 5746 }, { "epoch": 1.0619001974801077, "grad_norm": 0.07279148697853088, "learning_rate": 1.4985019590795611e-05, "loss": 0.6921658515930176, "step": 5747 }, { "epoch": 1.0620849741890035, "grad_norm": 0.06524580717086792, "learning_rate": 1.4983290122315151e-05, "loss": 0.51961350440979, "step": 5748 }, { "epoch": 1.0622697508978993, "grad_norm": 0.07926417142152786, "learning_rate": 1.4981560455515337e-05, "loss": 0.6424283385276794, "step": 5749 }, { "epoch": 1.0624545276067952, "grad_norm": 0.05497293919324875, "learning_rate": 1.4979830590465e-05, "loss": 0.3074781596660614, "step": 5750 }, { "epoch": 1.062639304315691, "grad_norm": 0.09386415034532547, "learning_rate": 1.4978100527232985e-05, "loss": 0.6136388182640076, "step": 5751 }, { "epoch": 1.0628240810245868, "grad_norm": 0.06570550799369812, "learning_rate": 1.4976370265888142e-05, "loss": 0.5354533791542053, "step": 5752 }, { "epoch": 1.0630088577334826, "grad_norm": 0.07388859987258911, "learning_rate": 1.4974639806499336e-05, "loss": 0.4849873185157776, "step": 5753 }, { "epoch": 1.0631936344423785, "grad_norm": 0.09081213921308517, "learning_rate": 1.4972909149135429e-05, "loss": 0.5858061909675598, "step": 5754 }, { "epoch": 1.0633784111512743, "grad_norm": 0.06316659599542618, "learning_rate": 1.4971178293865292e-05, "loss": 0.4371189773082733, "step": 5755 }, { "epoch": 1.0635631878601701, "grad_norm": 0.0723111629486084, "learning_rate": 1.4969447240757812e-05, "loss": 0.4745716154575348, "step": 5756 }, { "epoch": 1.0637479645690662, "grad_norm": 0.05701501667499542, "learning_rate": 1.4967715989881884e-05, "loss": 0.4331725239753723, "step": 5757 }, { "epoch": 1.063932741277962, "grad_norm": 0.05743462219834328, "learning_rate": 1.4965984541306398e-05, "loss": 0.31881242990493774, "step": 5758 }, { "epoch": 1.0641175179868578, "grad_norm": 0.05291770398616791, "learning_rate": 1.4964252895100265e-05, "loss": 0.3698563873767853, "step": 5759 }, { "epoch": 1.0643022946957537, "grad_norm": 0.06304258853197098, "learning_rate": 1.4962521051332397e-05, "loss": 0.39415884017944336, "step": 5760 }, { "epoch": 1.0644870714046495, "grad_norm": 0.054569315165281296, "learning_rate": 1.4960789010071717e-05, "loss": 0.3881533443927765, "step": 5761 }, { "epoch": 1.0646718481135453, "grad_norm": 0.06850560009479523, "learning_rate": 1.4959056771387156e-05, "loss": 0.44263342022895813, "step": 5762 }, { "epoch": 1.0648566248224411, "grad_norm": 0.08048789948225021, "learning_rate": 1.495732433534765e-05, "loss": 0.5522597432136536, "step": 5763 }, { "epoch": 1.065041401531337, "grad_norm": 0.0781223401427269, "learning_rate": 1.4955591702022145e-05, "loss": 0.6733675003051758, "step": 5764 }, { "epoch": 1.0652261782402328, "grad_norm": 0.07135794311761856, "learning_rate": 1.4953858871479595e-05, "loss": 0.5192707777023315, "step": 5765 }, { "epoch": 1.0654109549491286, "grad_norm": 0.05873025581240654, "learning_rate": 1.4952125843788955e-05, "loss": 0.36700373888015747, "step": 5766 }, { "epoch": 1.0655957316580245, "grad_norm": 0.07478354871273041, "learning_rate": 1.4950392619019208e-05, "loss": 0.5370163321495056, "step": 5767 }, { "epoch": 1.0657805083669203, "grad_norm": 0.08937849849462509, "learning_rate": 1.4948659197239317e-05, "loss": 0.6988489031791687, "step": 5768 }, { "epoch": 1.0659652850758161, "grad_norm": 0.08429345488548279, "learning_rate": 1.4946925578518273e-05, "loss": 0.636594295501709, "step": 5769 }, { "epoch": 1.066150061784712, "grad_norm": 0.07610718160867691, "learning_rate": 1.4945191762925068e-05, "loss": 0.5621874928474426, "step": 5770 }, { "epoch": 1.0663348384936078, "grad_norm": 0.10598469525575638, "learning_rate": 1.4943457750528706e-05, "loss": 0.685886025428772, "step": 5771 }, { "epoch": 1.0665196152025038, "grad_norm": 0.07759395241737366, "learning_rate": 1.4941723541398185e-05, "loss": 0.4770432710647583, "step": 5772 }, { "epoch": 1.0667043919113997, "grad_norm": 0.077302485704422, "learning_rate": 1.4939989135602532e-05, "loss": 0.50245600938797, "step": 5773 }, { "epoch": 1.0668891686202955, "grad_norm": 0.06550062447786331, "learning_rate": 1.493825453321077e-05, "loss": 0.49462637305259705, "step": 5774 }, { "epoch": 1.0670739453291913, "grad_norm": 0.08421865105628967, "learning_rate": 1.493651973429192e-05, "loss": 0.5801661014556885, "step": 5775 }, { "epoch": 1.0672587220380871, "grad_norm": 0.08015292137861252, "learning_rate": 1.4934784738915034e-05, "loss": 0.5254921913146973, "step": 5776 }, { "epoch": 1.067443498746983, "grad_norm": 0.05905531346797943, "learning_rate": 1.4933049547149155e-05, "loss": 0.38395801186561584, "step": 5777 }, { "epoch": 1.0676282754558788, "grad_norm": 0.06965994834899902, "learning_rate": 1.4931314159063333e-05, "loss": 0.46900445222854614, "step": 5778 }, { "epoch": 1.0678130521647746, "grad_norm": 0.07749634981155396, "learning_rate": 1.4929578574726637e-05, "loss": 0.5108754634857178, "step": 5779 }, { "epoch": 1.0679978288736705, "grad_norm": 0.07539774477481842, "learning_rate": 1.4927842794208138e-05, "loss": 0.5456463694572449, "step": 5780 }, { "epoch": 1.0681826055825663, "grad_norm": 0.08375140279531479, "learning_rate": 1.4926106817576913e-05, "loss": 0.592858076095581, "step": 5781 }, { "epoch": 1.068367382291462, "grad_norm": 0.07365249842405319, "learning_rate": 1.4924370644902048e-05, "loss": 0.5774492025375366, "step": 5782 }, { "epoch": 1.068552159000358, "grad_norm": 0.08658870309591293, "learning_rate": 1.4922634276252636e-05, "loss": 0.5666930079460144, "step": 5783 }, { "epoch": 1.0687369357092538, "grad_norm": 0.06969824433326721, "learning_rate": 1.4920897711697784e-05, "loss": 0.4051949679851532, "step": 5784 }, { "epoch": 1.0689217124181496, "grad_norm": 0.094402015209198, "learning_rate": 1.4919160951306598e-05, "loss": 0.6602669358253479, "step": 5785 }, { "epoch": 1.0691064891270456, "grad_norm": 0.06941288709640503, "learning_rate": 1.4917423995148193e-05, "loss": 0.5111122131347656, "step": 5786 }, { "epoch": 1.0692912658359415, "grad_norm": 0.06397631764411926, "learning_rate": 1.4915686843291701e-05, "loss": 0.421415239572525, "step": 5787 }, { "epoch": 1.0694760425448373, "grad_norm": 0.09350269287824631, "learning_rate": 1.491394949580625e-05, "loss": 0.7124272584915161, "step": 5788 }, { "epoch": 1.0696608192537331, "grad_norm": 0.07877876609563828, "learning_rate": 1.4912211952760985e-05, "loss": 0.5305052399635315, "step": 5789 }, { "epoch": 1.069845595962629, "grad_norm": 0.06914859265089035, "learning_rate": 1.491047421422505e-05, "loss": 0.47308892011642456, "step": 5790 }, { "epoch": 1.0700303726715248, "grad_norm": 0.07343301177024841, "learning_rate": 1.4908736280267604e-05, "loss": 0.5048878788948059, "step": 5791 }, { "epoch": 1.0702151493804206, "grad_norm": 0.07299642264842987, "learning_rate": 1.4906998150957815e-05, "loss": 0.43826934695243835, "step": 5792 }, { "epoch": 1.0703999260893164, "grad_norm": 0.07881126552820206, "learning_rate": 1.4905259826364846e-05, "loss": 0.49007317423820496, "step": 5793 }, { "epoch": 1.0705847027982123, "grad_norm": 0.06472620368003845, "learning_rate": 1.490352130655789e-05, "loss": 0.4609697461128235, "step": 5794 }, { "epoch": 1.070769479507108, "grad_norm": 0.07513286918401718, "learning_rate": 1.490178259160612e-05, "loss": 0.6415743827819824, "step": 5795 }, { "epoch": 1.070954256216004, "grad_norm": 0.08034192025661469, "learning_rate": 1.4900043681578741e-05, "loss": 0.6771365404129028, "step": 5796 }, { "epoch": 1.0711390329248998, "grad_norm": 0.08471996337175369, "learning_rate": 1.4898304576544955e-05, "loss": 0.5288458466529846, "step": 5797 }, { "epoch": 1.0713238096337956, "grad_norm": 0.08926571905612946, "learning_rate": 1.489656527657397e-05, "loss": 0.7090543508529663, "step": 5798 }, { "epoch": 1.0715085863426914, "grad_norm": 0.06450363248586655, "learning_rate": 1.489482578173501e-05, "loss": 0.49964165687561035, "step": 5799 }, { "epoch": 1.0716933630515872, "grad_norm": 0.07753396779298782, "learning_rate": 1.4893086092097292e-05, "loss": 0.5874074101448059, "step": 5800 }, { "epoch": 1.0718781397604833, "grad_norm": 0.06905125826597214, "learning_rate": 1.489134620773006e-05, "loss": 0.5268652439117432, "step": 5801 }, { "epoch": 1.0720629164693791, "grad_norm": 0.08922120928764343, "learning_rate": 1.4889606128702552e-05, "loss": 0.6464628577232361, "step": 5802 }, { "epoch": 1.072247693178275, "grad_norm": 0.07564452290534973, "learning_rate": 1.4887865855084015e-05, "loss": 0.5073250532150269, "step": 5803 }, { "epoch": 1.0724324698871708, "grad_norm": 0.07366842031478882, "learning_rate": 1.4886125386943713e-05, "loss": 0.6223083734512329, "step": 5804 }, { "epoch": 1.0726172465960666, "grad_norm": 0.06662733852863312, "learning_rate": 1.4884384724350907e-05, "loss": 0.47135940194129944, "step": 5805 }, { "epoch": 1.0728020233049624, "grad_norm": 0.07442791759967804, "learning_rate": 1.4882643867374868e-05, "loss": 0.46855729818344116, "step": 5806 }, { "epoch": 1.0729868000138583, "grad_norm": 0.06059639900922775, "learning_rate": 1.488090281608488e-05, "loss": 0.40140438079833984, "step": 5807 }, { "epoch": 1.073171576722754, "grad_norm": 0.08974947035312653, "learning_rate": 1.4879161570550227e-05, "loss": 0.6785848736763, "step": 5808 }, { "epoch": 1.07335635343165, "grad_norm": 0.08336343616247177, "learning_rate": 1.4877420130840214e-05, "loss": 0.539770781993866, "step": 5809 }, { "epoch": 1.0735411301405458, "grad_norm": 0.07375794649124146, "learning_rate": 1.4875678497024134e-05, "loss": 0.5181423425674438, "step": 5810 }, { "epoch": 1.0737259068494416, "grad_norm": 0.08090443909168243, "learning_rate": 1.4873936669171307e-05, "loss": 0.6372614502906799, "step": 5811 }, { "epoch": 1.0739106835583374, "grad_norm": 0.08555450290441513, "learning_rate": 1.4872194647351049e-05, "loss": 0.540556788444519, "step": 5812 }, { "epoch": 1.0740954602672332, "grad_norm": 0.06083214282989502, "learning_rate": 1.4870452431632689e-05, "loss": 0.3508705198764801, "step": 5813 }, { "epoch": 1.074280236976129, "grad_norm": 0.062058694660663605, "learning_rate": 1.4868710022085555e-05, "loss": 0.4283808767795563, "step": 5814 }, { "epoch": 1.0744650136850251, "grad_norm": 0.04995314031839371, "learning_rate": 1.4866967418778996e-05, "loss": 0.33063793182373047, "step": 5815 }, { "epoch": 1.074649790393921, "grad_norm": 0.07562576979398727, "learning_rate": 1.4865224621782364e-05, "loss": 0.5480437278747559, "step": 5816 }, { "epoch": 1.0748345671028168, "grad_norm": 0.06574447453022003, "learning_rate": 1.486348163116501e-05, "loss": 0.3196644186973572, "step": 5817 }, { "epoch": 1.0750193438117126, "grad_norm": 0.06903432309627533, "learning_rate": 1.4861738446996304e-05, "loss": 0.5272139310836792, "step": 5818 }, { "epoch": 1.0752041205206084, "grad_norm": 0.06662653386592865, "learning_rate": 1.4859995069345618e-05, "loss": 0.48803701996803284, "step": 5819 }, { "epoch": 1.0753888972295043, "grad_norm": 0.0712452158331871, "learning_rate": 1.4858251498282333e-05, "loss": 0.46093958616256714, "step": 5820 }, { "epoch": 1.0755736739384, "grad_norm": 0.07670149952173233, "learning_rate": 1.4856507733875837e-05, "loss": 0.5727660655975342, "step": 5821 }, { "epoch": 1.075758450647296, "grad_norm": 0.08891289681196213, "learning_rate": 1.485476377619553e-05, "loss": 0.6572068929672241, "step": 5822 }, { "epoch": 1.0759432273561917, "grad_norm": 0.06130025535821915, "learning_rate": 1.4853019625310813e-05, "loss": 0.4660205543041229, "step": 5823 }, { "epoch": 1.0761280040650876, "grad_norm": 0.06196140870451927, "learning_rate": 1.4851275281291095e-05, "loss": 0.44341546297073364, "step": 5824 }, { "epoch": 1.0763127807739834, "grad_norm": 0.08439138531684875, "learning_rate": 1.48495307442058e-05, "loss": 0.5173313021659851, "step": 5825 }, { "epoch": 1.0764975574828792, "grad_norm": 0.06659369170665741, "learning_rate": 1.4847786014124354e-05, "loss": 0.4271799325942993, "step": 5826 }, { "epoch": 1.076682334191775, "grad_norm": 0.06720487028360367, "learning_rate": 1.4846041091116192e-05, "loss": 0.47105199098587036, "step": 5827 }, { "epoch": 1.0768671109006709, "grad_norm": 0.09249649196863174, "learning_rate": 1.4844295975250755e-05, "loss": 0.6819266080856323, "step": 5828 }, { "epoch": 1.0770518876095667, "grad_norm": 0.08595030009746552, "learning_rate": 1.4842550666597495e-05, "loss": 0.5509486794471741, "step": 5829 }, { "epoch": 1.0772366643184625, "grad_norm": 0.06970569491386414, "learning_rate": 1.484080516522587e-05, "loss": 0.39800581336021423, "step": 5830 }, { "epoch": 1.0774214410273586, "grad_norm": 0.07932216674089432, "learning_rate": 1.483905947120534e-05, "loss": 0.540603518486023, "step": 5831 }, { "epoch": 1.0776062177362544, "grad_norm": 0.07762263715267181, "learning_rate": 1.4837313584605386e-05, "loss": 0.656118631362915, "step": 5832 }, { "epoch": 1.0777909944451503, "grad_norm": 0.05933019518852234, "learning_rate": 1.4835567505495486e-05, "loss": 0.3998440206050873, "step": 5833 }, { "epoch": 1.077975771154046, "grad_norm": 0.06421727687120438, "learning_rate": 1.4833821233945122e-05, "loss": 0.3742504119873047, "step": 5834 }, { "epoch": 1.078160547862942, "grad_norm": 0.0962388664484024, "learning_rate": 1.48320747700238e-05, "loss": 0.8221604228019714, "step": 5835 }, { "epoch": 1.0783453245718377, "grad_norm": 0.07165448367595673, "learning_rate": 1.4830328113801022e-05, "loss": 0.4744814336299896, "step": 5836 }, { "epoch": 1.0785301012807336, "grad_norm": 0.08229894191026688, "learning_rate": 1.4828581265346295e-05, "loss": 0.602943480014801, "step": 5837 }, { "epoch": 1.0787148779896294, "grad_norm": 0.06584686785936356, "learning_rate": 1.482683422472914e-05, "loss": 0.5143553614616394, "step": 5838 }, { "epoch": 1.0788996546985252, "grad_norm": 0.0741025060415268, "learning_rate": 1.4825086992019087e-05, "loss": 0.45243039727211, "step": 5839 }, { "epoch": 1.079084431407421, "grad_norm": 0.08383125066757202, "learning_rate": 1.4823339567285664e-05, "loss": 0.651415228843689, "step": 5840 }, { "epoch": 1.0792692081163169, "grad_norm": 0.06560477614402771, "learning_rate": 1.4821591950598415e-05, "loss": 0.5027257204055786, "step": 5841 }, { "epoch": 1.0794539848252127, "grad_norm": 0.06961048394441605, "learning_rate": 1.4819844142026895e-05, "loss": 0.4787517488002777, "step": 5842 }, { "epoch": 1.0796387615341085, "grad_norm": 0.0803142711520195, "learning_rate": 1.4818096141640658e-05, "loss": 0.6678456664085388, "step": 5843 }, { "epoch": 1.0798235382430044, "grad_norm": 0.06719299405813217, "learning_rate": 1.4816347949509264e-05, "loss": 0.5077908039093018, "step": 5844 }, { "epoch": 1.0800083149519004, "grad_norm": 0.07734595239162445, "learning_rate": 1.4814599565702295e-05, "loss": 0.6761911511421204, "step": 5845 }, { "epoch": 1.0801930916607962, "grad_norm": 0.07329533249139786, "learning_rate": 1.4812850990289324e-05, "loss": 0.5296775698661804, "step": 5846 }, { "epoch": 1.080377868369692, "grad_norm": 0.06282027065753937, "learning_rate": 1.4811102223339942e-05, "loss": 0.5500996708869934, "step": 5847 }, { "epoch": 1.080562645078588, "grad_norm": 0.06660337001085281, "learning_rate": 1.4809353264923741e-05, "loss": 0.3677659332752228, "step": 5848 }, { "epoch": 1.0807474217874837, "grad_norm": 0.08197630196809769, "learning_rate": 1.480760411511033e-05, "loss": 0.5850522518157959, "step": 5849 }, { "epoch": 1.0809321984963796, "grad_norm": 0.08148553967475891, "learning_rate": 1.4805854773969314e-05, "loss": 0.556190013885498, "step": 5850 }, { "epoch": 1.0811169752052754, "grad_norm": 0.089113749563694, "learning_rate": 1.4804105241570312e-05, "loss": 0.6048898696899414, "step": 5851 }, { "epoch": 1.0813017519141712, "grad_norm": 0.06875333935022354, "learning_rate": 1.4802355517982956e-05, "loss": 0.5353031754493713, "step": 5852 }, { "epoch": 1.081486528623067, "grad_norm": 0.06257513910531998, "learning_rate": 1.4800605603276873e-05, "loss": 0.5051626563072205, "step": 5853 }, { "epoch": 1.0816713053319629, "grad_norm": 0.07647594064474106, "learning_rate": 1.4798855497521705e-05, "loss": 0.4592037796974182, "step": 5854 }, { "epoch": 1.0818560820408587, "grad_norm": 0.07302510738372803, "learning_rate": 1.4797105200787102e-05, "loss": 0.4558219611644745, "step": 5855 }, { "epoch": 1.0820408587497545, "grad_norm": 0.07816193252801895, "learning_rate": 1.479535471314272e-05, "loss": 0.47629034519195557, "step": 5856 }, { "epoch": 1.0822256354586504, "grad_norm": 0.06925367563962936, "learning_rate": 1.4793604034658224e-05, "loss": 0.4966387450695038, "step": 5857 }, { "epoch": 1.0824104121675462, "grad_norm": 0.0733964666724205, "learning_rate": 1.4791853165403284e-05, "loss": 0.4893820881843567, "step": 5858 }, { "epoch": 1.082595188876442, "grad_norm": 0.06309165805578232, "learning_rate": 1.4790102105447582e-05, "loss": 0.4359748661518097, "step": 5859 }, { "epoch": 1.082779965585338, "grad_norm": 0.08443053811788559, "learning_rate": 1.4788350854860803e-05, "loss": 0.5850485563278198, "step": 5860 }, { "epoch": 1.082964742294234, "grad_norm": 0.067757748067379, "learning_rate": 1.4786599413712634e-05, "loss": 0.4967274069786072, "step": 5861 }, { "epoch": 1.0831495190031297, "grad_norm": 0.06535761803388596, "learning_rate": 1.478484778207279e-05, "loss": 0.546383261680603, "step": 5862 }, { "epoch": 1.0833342957120256, "grad_norm": 0.07297582179307938, "learning_rate": 1.4783095960010973e-05, "loss": 0.6665569543838501, "step": 5863 }, { "epoch": 1.0835190724209214, "grad_norm": 0.08054815232753754, "learning_rate": 1.4781343947596903e-05, "loss": 0.5554509162902832, "step": 5864 }, { "epoch": 1.0837038491298172, "grad_norm": 0.06834076344966888, "learning_rate": 1.4779591744900298e-05, "loss": 0.47249260544776917, "step": 5865 }, { "epoch": 1.083888625838713, "grad_norm": 0.07324875891208649, "learning_rate": 1.4777839351990898e-05, "loss": 0.546453058719635, "step": 5866 }, { "epoch": 1.0840734025476089, "grad_norm": 0.07995382696390152, "learning_rate": 1.4776086768938438e-05, "loss": 0.5270389318466187, "step": 5867 }, { "epoch": 1.0842581792565047, "grad_norm": 0.054281000047922134, "learning_rate": 1.4774333995812669e-05, "loss": 0.3481077551841736, "step": 5868 }, { "epoch": 1.0844429559654005, "grad_norm": 0.0845366045832634, "learning_rate": 1.4772581032683343e-05, "loss": 0.6981854438781738, "step": 5869 }, { "epoch": 1.0846277326742964, "grad_norm": 0.07454873621463776, "learning_rate": 1.4770827879620227e-05, "loss": 0.5273903608322144, "step": 5870 }, { "epoch": 1.0848125093831922, "grad_norm": 0.0768999382853508, "learning_rate": 1.4769074536693082e-05, "loss": 0.500775933265686, "step": 5871 }, { "epoch": 1.084997286092088, "grad_norm": 0.06956993043422699, "learning_rate": 1.4767321003971693e-05, "loss": 0.3707350492477417, "step": 5872 }, { "epoch": 1.0851820628009838, "grad_norm": 0.074930340051651, "learning_rate": 1.4765567281525843e-05, "loss": 0.5382987856864929, "step": 5873 }, { "epoch": 1.0853668395098799, "grad_norm": 0.07988627254962921, "learning_rate": 1.4763813369425325e-05, "loss": 0.6682331562042236, "step": 5874 }, { "epoch": 1.0855516162187757, "grad_norm": 0.07061360031366348, "learning_rate": 1.4762059267739939e-05, "loss": 0.40944498777389526, "step": 5875 }, { "epoch": 1.0857363929276715, "grad_norm": 0.07396155595779419, "learning_rate": 1.4760304976539492e-05, "loss": 0.47943374514579773, "step": 5876 }, { "epoch": 1.0859211696365674, "grad_norm": 0.08270414173603058, "learning_rate": 1.47585504958938e-05, "loss": 0.4946344494819641, "step": 5877 }, { "epoch": 1.0861059463454632, "grad_norm": 0.08222021907567978, "learning_rate": 1.4756795825872687e-05, "loss": 0.5393399596214294, "step": 5878 }, { "epoch": 1.086290723054359, "grad_norm": 0.07117902487516403, "learning_rate": 1.4755040966545982e-05, "loss": 0.5953274369239807, "step": 5879 }, { "epoch": 1.0864754997632549, "grad_norm": 0.06577019393444061, "learning_rate": 1.4753285917983522e-05, "loss": 0.39420297741889954, "step": 5880 }, { "epoch": 1.0866602764721507, "grad_norm": 0.06357182562351227, "learning_rate": 1.4751530680255155e-05, "loss": 0.38492023944854736, "step": 5881 }, { "epoch": 1.0868450531810465, "grad_norm": 0.06568669527769089, "learning_rate": 1.4749775253430732e-05, "loss": 0.47780489921569824, "step": 5882 }, { "epoch": 1.0870298298899423, "grad_norm": 0.05697185918688774, "learning_rate": 1.4748019637580116e-05, "loss": 0.4504018723964691, "step": 5883 }, { "epoch": 1.0872146065988382, "grad_norm": 0.06915974617004395, "learning_rate": 1.4746263832773168e-05, "loss": 0.5515559315681458, "step": 5884 }, { "epoch": 1.087399383307734, "grad_norm": 0.06538233160972595, "learning_rate": 1.4744507839079772e-05, "loss": 0.572848379611969, "step": 5885 }, { "epoch": 1.0875841600166298, "grad_norm": 0.07176683843135834, "learning_rate": 1.4742751656569806e-05, "loss": 0.4935082495212555, "step": 5886 }, { "epoch": 1.0877689367255257, "grad_norm": 0.07255774736404419, "learning_rate": 1.4740995285313165e-05, "loss": 0.5060724020004272, "step": 5887 }, { "epoch": 1.0879537134344215, "grad_norm": 0.058834806084632874, "learning_rate": 1.4739238725379743e-05, "loss": 0.5066661238670349, "step": 5888 }, { "epoch": 1.0881384901433175, "grad_norm": 0.07082164287567139, "learning_rate": 1.4737481976839448e-05, "loss": 0.4143075942993164, "step": 5889 }, { "epoch": 1.0883232668522134, "grad_norm": 0.09433870762586594, "learning_rate": 1.473572503976219e-05, "loss": 0.7209493517875671, "step": 5890 }, { "epoch": 1.0885080435611092, "grad_norm": 0.06608996540307999, "learning_rate": 1.4733967914217893e-05, "loss": 0.502673864364624, "step": 5891 }, { "epoch": 1.088692820270005, "grad_norm": 0.07874344289302826, "learning_rate": 1.4732210600276481e-05, "loss": 0.5536506175994873, "step": 5892 }, { "epoch": 1.0888775969789009, "grad_norm": 0.07617444545030594, "learning_rate": 1.4730453098007896e-05, "loss": 0.5813974738121033, "step": 5893 }, { "epoch": 1.0890623736877967, "grad_norm": 0.05752871558070183, "learning_rate": 1.4728695407482074e-05, "loss": 0.33988383412361145, "step": 5894 }, { "epoch": 1.0892471503966925, "grad_norm": 0.08659808337688446, "learning_rate": 1.4726937528768971e-05, "loss": 0.6173219680786133, "step": 5895 }, { "epoch": 1.0894319271055883, "grad_norm": 0.06591004878282547, "learning_rate": 1.4725179461938544e-05, "loss": 0.515119731426239, "step": 5896 }, { "epoch": 1.0896167038144842, "grad_norm": 0.08528603613376617, "learning_rate": 1.4723421207060756e-05, "loss": 0.6740525960922241, "step": 5897 }, { "epoch": 1.08980148052338, "grad_norm": 0.0652061402797699, "learning_rate": 1.4721662764205583e-05, "loss": 0.483233243227005, "step": 5898 }, { "epoch": 1.0899862572322758, "grad_norm": 0.08202153444290161, "learning_rate": 1.4719904133443e-05, "loss": 0.5837781429290771, "step": 5899 }, { "epoch": 1.0901710339411717, "grad_norm": 0.09289800375699997, "learning_rate": 1.4718145314843004e-05, "loss": 0.6462467908859253, "step": 5900 }, { "epoch": 1.0903558106500675, "grad_norm": 0.07023860514163971, "learning_rate": 1.4716386308475583e-05, "loss": 0.5342769622802734, "step": 5901 }, { "epoch": 1.0905405873589633, "grad_norm": 0.07424052804708481, "learning_rate": 1.4714627114410744e-05, "loss": 0.6305635571479797, "step": 5902 }, { "epoch": 1.0907253640678594, "grad_norm": 0.09783070534467697, "learning_rate": 1.4712867732718496e-05, "loss": 0.7436492443084717, "step": 5903 }, { "epoch": 1.0909101407767552, "grad_norm": 0.09632259607315063, "learning_rate": 1.4711108163468857e-05, "loss": 0.7273596525192261, "step": 5904 }, { "epoch": 1.091094917485651, "grad_norm": 0.06209570914506912, "learning_rate": 1.4709348406731851e-05, "loss": 0.4110969305038452, "step": 5905 }, { "epoch": 1.0912796941945468, "grad_norm": 0.06332927197217941, "learning_rate": 1.4707588462577513e-05, "loss": 0.42797258496284485, "step": 5906 }, { "epoch": 1.0914644709034427, "grad_norm": 0.09194277226924896, "learning_rate": 1.4705828331075883e-05, "loss": 0.7485794425010681, "step": 5907 }, { "epoch": 1.0916492476123385, "grad_norm": 0.07481253892183304, "learning_rate": 1.4704068012297009e-05, "loss": 0.4699377417564392, "step": 5908 }, { "epoch": 1.0918340243212343, "grad_norm": 0.0622544102370739, "learning_rate": 1.4702307506310943e-05, "loss": 0.45635709166526794, "step": 5909 }, { "epoch": 1.0920188010301302, "grad_norm": 0.07138415426015854, "learning_rate": 1.4700546813187749e-05, "loss": 0.5593393445014954, "step": 5910 }, { "epoch": 1.092203577739026, "grad_norm": 0.0714128315448761, "learning_rate": 1.4698785932997499e-05, "loss": 0.5067830681800842, "step": 5911 }, { "epoch": 1.0923883544479218, "grad_norm": 0.07260000705718994, "learning_rate": 1.4697024865810269e-05, "loss": 0.5159979462623596, "step": 5912 }, { "epoch": 1.0925731311568176, "grad_norm": 0.07511389255523682, "learning_rate": 1.4695263611696146e-05, "loss": 0.5461058020591736, "step": 5913 }, { "epoch": 1.0927579078657135, "grad_norm": 0.06673587113618851, "learning_rate": 1.469350217072522e-05, "loss": 0.38038143515586853, "step": 5914 }, { "epoch": 1.0929426845746093, "grad_norm": 0.08167888224124908, "learning_rate": 1.4691740542967594e-05, "loss": 0.5731920599937439, "step": 5915 }, { "epoch": 1.0931274612835051, "grad_norm": 0.07429999113082886, "learning_rate": 1.4689978728493368e-05, "loss": 0.5720574259757996, "step": 5916 }, { "epoch": 1.093312237992401, "grad_norm": 0.0745207667350769, "learning_rate": 1.4688216727372664e-05, "loss": 0.47233352065086365, "step": 5917 }, { "epoch": 1.0934970147012968, "grad_norm": 0.08932074159383774, "learning_rate": 1.4686454539675606e-05, "loss": 0.6862395405769348, "step": 5918 }, { "epoch": 1.0936817914101928, "grad_norm": 0.07582224160432816, "learning_rate": 1.4684692165472316e-05, "loss": 0.6259335875511169, "step": 5919 }, { "epoch": 1.0938665681190887, "grad_norm": 0.07972133904695511, "learning_rate": 1.468292960483293e-05, "loss": 0.4001395106315613, "step": 5920 }, { "epoch": 1.0940513448279845, "grad_norm": 0.06810474395751953, "learning_rate": 1.4681166857827603e-05, "loss": 0.567063570022583, "step": 5921 }, { "epoch": 1.0942361215368803, "grad_norm": 0.06280171126127243, "learning_rate": 1.4679403924526479e-05, "loss": 0.41762790083885193, "step": 5922 }, { "epoch": 1.0944208982457762, "grad_norm": 0.06112830713391304, "learning_rate": 1.4677640804999716e-05, "loss": 0.462650328874588, "step": 5923 }, { "epoch": 1.094605674954672, "grad_norm": 0.0655970349907875, "learning_rate": 1.4675877499317486e-05, "loss": 0.39639896154403687, "step": 5924 }, { "epoch": 1.0947904516635678, "grad_norm": 0.08832792192697525, "learning_rate": 1.467411400754996e-05, "loss": 0.6320725679397583, "step": 5925 }, { "epoch": 1.0949752283724636, "grad_norm": 0.06304946541786194, "learning_rate": 1.467235032976732e-05, "loss": 0.44484469294548035, "step": 5926 }, { "epoch": 1.0951600050813595, "grad_norm": 0.07686931639909744, "learning_rate": 1.4670586466039753e-05, "loss": 0.5905748009681702, "step": 5927 }, { "epoch": 1.0953447817902553, "grad_norm": 0.08200360089540482, "learning_rate": 1.4668822416437461e-05, "loss": 0.5519489645957947, "step": 5928 }, { "epoch": 1.0955295584991511, "grad_norm": 0.0726717934012413, "learning_rate": 1.4667058181030642e-05, "loss": 0.594283938407898, "step": 5929 }, { "epoch": 1.095714335208047, "grad_norm": 0.07586503028869629, "learning_rate": 1.4665293759889506e-05, "loss": 0.5935530662536621, "step": 5930 }, { "epoch": 1.0958991119169428, "grad_norm": 0.06125348433852196, "learning_rate": 1.4663529153084275e-05, "loss": 0.4546845853328705, "step": 5931 }, { "epoch": 1.0960838886258386, "grad_norm": 0.060897890478372574, "learning_rate": 1.4661764360685178e-05, "loss": 0.4350041151046753, "step": 5932 }, { "epoch": 1.0962686653347347, "grad_norm": 0.059175021946430206, "learning_rate": 1.465999938276244e-05, "loss": 0.4221407175064087, "step": 5933 }, { "epoch": 1.0964534420436305, "grad_norm": 0.06250432133674622, "learning_rate": 1.4658234219386307e-05, "loss": 0.46823054552078247, "step": 5934 }, { "epoch": 1.0966382187525263, "grad_norm": 0.06827010214328766, "learning_rate": 1.4656468870627028e-05, "loss": 0.4594465494155884, "step": 5935 }, { "epoch": 1.0968229954614221, "grad_norm": 0.08258457481861115, "learning_rate": 1.4654703336554852e-05, "loss": 0.6088943481445312, "step": 5936 }, { "epoch": 1.097007772170318, "grad_norm": 0.07347897440195084, "learning_rate": 1.4652937617240049e-05, "loss": 0.529721736907959, "step": 5937 }, { "epoch": 1.0971925488792138, "grad_norm": 0.06577084958553314, "learning_rate": 1.4651171712752886e-05, "loss": 0.4842659533023834, "step": 5938 }, { "epoch": 1.0973773255881096, "grad_norm": 0.06812813133001328, "learning_rate": 1.464940562316364e-05, "loss": 0.5418099164962769, "step": 5939 }, { "epoch": 1.0975621022970055, "grad_norm": 0.06750774383544922, "learning_rate": 1.4647639348542593e-05, "loss": 0.43857085704803467, "step": 5940 }, { "epoch": 1.0977468790059013, "grad_norm": 0.0675678625702858, "learning_rate": 1.4645872888960045e-05, "loss": 0.39154133200645447, "step": 5941 }, { "epoch": 1.0979316557147971, "grad_norm": 0.0663144662976265, "learning_rate": 1.4644106244486291e-05, "loss": 0.5225595831871033, "step": 5942 }, { "epoch": 1.098116432423693, "grad_norm": 0.062324948608875275, "learning_rate": 1.4642339415191636e-05, "loss": 0.496875137090683, "step": 5943 }, { "epoch": 1.0983012091325888, "grad_norm": 0.07069569081068039, "learning_rate": 1.4640572401146396e-05, "loss": 0.513957679271698, "step": 5944 }, { "epoch": 1.0984859858414846, "grad_norm": 0.08459258824586868, "learning_rate": 1.4638805202420896e-05, "loss": 0.5632613897323608, "step": 5945 }, { "epoch": 1.0986707625503804, "grad_norm": 0.0628662258386612, "learning_rate": 1.4637037819085458e-05, "loss": 0.48443713784217834, "step": 5946 }, { "epoch": 1.0988555392592763, "grad_norm": 0.06784513592720032, "learning_rate": 1.4635270251210423e-05, "loss": 0.47913438081741333, "step": 5947 }, { "epoch": 1.0990403159681723, "grad_norm": 0.0707409605383873, "learning_rate": 1.4633502498866136e-05, "loss": 0.4980694353580475, "step": 5948 }, { "epoch": 1.0992250926770681, "grad_norm": 0.05797749012708664, "learning_rate": 1.4631734562122945e-05, "loss": 0.36766472458839417, "step": 5949 }, { "epoch": 1.099409869385964, "grad_norm": 0.095884308218956, "learning_rate": 1.4629966441051208e-05, "loss": 0.6991642713546753, "step": 5950 }, { "epoch": 1.0995946460948598, "grad_norm": 0.06615202128887177, "learning_rate": 1.4628198135721295e-05, "loss": 0.5221999883651733, "step": 5951 }, { "epoch": 1.0997794228037556, "grad_norm": 0.05633965879678726, "learning_rate": 1.4626429646203575e-05, "loss": 0.4555867314338684, "step": 5952 }, { "epoch": 1.0999641995126515, "grad_norm": 0.07581228017807007, "learning_rate": 1.4624660972568427e-05, "loss": 0.5978538990020752, "step": 5953 }, { "epoch": 1.1001489762215473, "grad_norm": 0.08281077444553375, "learning_rate": 1.4622892114886243e-05, "loss": 0.618635892868042, "step": 5954 }, { "epoch": 1.100333752930443, "grad_norm": 0.0922408252954483, "learning_rate": 1.4621123073227414e-05, "loss": 0.5132061243057251, "step": 5955 }, { "epoch": 1.100518529639339, "grad_norm": 0.0687292069196701, "learning_rate": 1.4619353847662346e-05, "loss": 0.43602538108825684, "step": 5956 }, { "epoch": 1.1007033063482348, "grad_norm": 0.061973683536052704, "learning_rate": 1.4617584438261445e-05, "loss": 0.3978612720966339, "step": 5957 }, { "epoch": 1.1008880830571306, "grad_norm": 0.07644131034612656, "learning_rate": 1.4615814845095134e-05, "loss": 0.558164656162262, "step": 5958 }, { "epoch": 1.1010728597660264, "grad_norm": 0.0646175965666771, "learning_rate": 1.461404506823383e-05, "loss": 0.5489871501922607, "step": 5959 }, { "epoch": 1.1012576364749223, "grad_norm": 0.08470311760902405, "learning_rate": 1.4612275107747968e-05, "loss": 0.5196349024772644, "step": 5960 }, { "epoch": 1.101442413183818, "grad_norm": 0.07912751287221909, "learning_rate": 1.4610504963707988e-05, "loss": 0.659913957118988, "step": 5961 }, { "epoch": 1.1016271898927141, "grad_norm": 0.07972804456949234, "learning_rate": 1.4608734636184333e-05, "loss": 0.5457559823989868, "step": 5962 }, { "epoch": 1.10181196660161, "grad_norm": 0.0745624229311943, "learning_rate": 1.4606964125247461e-05, "loss": 0.46467500925064087, "step": 5963 }, { "epoch": 1.1019967433105058, "grad_norm": 0.07970108091831207, "learning_rate": 1.4605193430967827e-05, "loss": 0.7072663903236389, "step": 5964 }, { "epoch": 1.1021815200194016, "grad_norm": 0.10347652435302734, "learning_rate": 1.4603422553415905e-05, "loss": 0.759666919708252, "step": 5965 }, { "epoch": 1.1023662967282974, "grad_norm": 0.06851720064878464, "learning_rate": 1.4601651492662166e-05, "loss": 0.420702189207077, "step": 5966 }, { "epoch": 1.1025510734371933, "grad_norm": 0.051265593618154526, "learning_rate": 1.4599880248777094e-05, "loss": 0.3847259283065796, "step": 5967 }, { "epoch": 1.102735850146089, "grad_norm": 0.08739349246025085, "learning_rate": 1.4598108821831181e-05, "loss": 0.5255281329154968, "step": 5968 }, { "epoch": 1.102920626854985, "grad_norm": 0.08335155993700027, "learning_rate": 1.4596337211894922e-05, "loss": 0.529498279094696, "step": 5969 }, { "epoch": 1.1031054035638808, "grad_norm": 0.08187707513570786, "learning_rate": 1.4594565419038822e-05, "loss": 0.5741780996322632, "step": 5970 }, { "epoch": 1.1032901802727766, "grad_norm": 0.08028721064329147, "learning_rate": 1.459279344333339e-05, "loss": 0.5540239810943604, "step": 5971 }, { "epoch": 1.1034749569816724, "grad_norm": 0.07067148387432098, "learning_rate": 1.4591021284849152e-05, "loss": 0.43878358602523804, "step": 5972 }, { "epoch": 1.1036597336905682, "grad_norm": 0.06293197721242905, "learning_rate": 1.4589248943656629e-05, "loss": 0.3931485414505005, "step": 5973 }, { "epoch": 1.103844510399464, "grad_norm": 0.07255510240793228, "learning_rate": 1.4587476419826354e-05, "loss": 0.5277048349380493, "step": 5974 }, { "epoch": 1.10402928710836, "grad_norm": 0.06751251220703125, "learning_rate": 1.4585703713428873e-05, "loss": 0.5238932371139526, "step": 5975 }, { "epoch": 1.1042140638172557, "grad_norm": 0.06556138396263123, "learning_rate": 1.4583930824534729e-05, "loss": 0.41263192892074585, "step": 5976 }, { "epoch": 1.1043988405261518, "grad_norm": 0.07805372029542923, "learning_rate": 1.4582157753214482e-05, "loss": 0.6438567638397217, "step": 5977 }, { "epoch": 1.1045836172350476, "grad_norm": 0.07314425706863403, "learning_rate": 1.4580384499538688e-05, "loss": 0.49132171273231506, "step": 5978 }, { "epoch": 1.1047683939439434, "grad_norm": 0.08974375575780869, "learning_rate": 1.4578611063577925e-05, "loss": 0.7477900981903076, "step": 5979 }, { "epoch": 1.1049531706528393, "grad_norm": 0.08745142817497253, "learning_rate": 1.4576837445402765e-05, "loss": 0.5474734902381897, "step": 5980 }, { "epoch": 1.105137947361735, "grad_norm": 0.08209695667028427, "learning_rate": 1.4575063645083792e-05, "loss": 0.5888841152191162, "step": 5981 }, { "epoch": 1.105322724070631, "grad_norm": 0.0720118060708046, "learning_rate": 1.4573289662691601e-05, "loss": 0.5148651599884033, "step": 5982 }, { "epoch": 1.1055075007795268, "grad_norm": 0.07805000245571136, "learning_rate": 1.457151549829679e-05, "loss": 0.6746365427970886, "step": 5983 }, { "epoch": 1.1056922774884226, "grad_norm": 0.08017796277999878, "learning_rate": 1.4569741151969963e-05, "loss": 0.5076448321342468, "step": 5984 }, { "epoch": 1.1058770541973184, "grad_norm": 0.08330678194761276, "learning_rate": 1.4567966623781736e-05, "loss": 0.5435523986816406, "step": 5985 }, { "epoch": 1.1060618309062142, "grad_norm": 0.07538160681724548, "learning_rate": 1.4566191913802728e-05, "loss": 0.5347604155540466, "step": 5986 }, { "epoch": 1.10624660761511, "grad_norm": 0.06279928982257843, "learning_rate": 1.456441702210357e-05, "loss": 0.38746389746665955, "step": 5987 }, { "epoch": 1.106431384324006, "grad_norm": 0.09230431169271469, "learning_rate": 1.4562641948754891e-05, "loss": 0.6264443397521973, "step": 5988 }, { "epoch": 1.1066161610329017, "grad_norm": 0.065264031291008, "learning_rate": 1.456086669382734e-05, "loss": 0.5556307435035706, "step": 5989 }, { "epoch": 1.1068009377417976, "grad_norm": 0.07666502147912979, "learning_rate": 1.4559091257391562e-05, "loss": 0.4763059914112091, "step": 5990 }, { "epoch": 1.1069857144506936, "grad_norm": 0.07860376685857773, "learning_rate": 1.4557315639518216e-05, "loss": 0.5905876159667969, "step": 5991 }, { "epoch": 1.1071704911595894, "grad_norm": 0.07574237138032913, "learning_rate": 1.4555539840277968e-05, "loss": 0.5085563063621521, "step": 5992 }, { "epoch": 1.1073552678684853, "grad_norm": 0.06739070266485214, "learning_rate": 1.4553763859741484e-05, "loss": 0.5284940004348755, "step": 5993 }, { "epoch": 1.107540044577381, "grad_norm": 0.06255804002285004, "learning_rate": 1.4551987697979447e-05, "loss": 0.3962157964706421, "step": 5994 }, { "epoch": 1.107724821286277, "grad_norm": 0.07420317083597183, "learning_rate": 1.4550211355062537e-05, "loss": 0.6526479125022888, "step": 5995 }, { "epoch": 1.1079095979951727, "grad_norm": 0.08326204866170883, "learning_rate": 1.4548434831061456e-05, "loss": 0.5966176390647888, "step": 5996 }, { "epoch": 1.1080943747040686, "grad_norm": 0.06730221211910248, "learning_rate": 1.4546658126046898e-05, "loss": 0.5358225703239441, "step": 5997 }, { "epoch": 1.1082791514129644, "grad_norm": 0.06927043199539185, "learning_rate": 1.4544881240089568e-05, "loss": 0.5677108764648438, "step": 5998 }, { "epoch": 1.1084639281218602, "grad_norm": 0.05860039219260216, "learning_rate": 1.4543104173260187e-05, "loss": 0.4075140058994293, "step": 5999 }, { "epoch": 1.108648704830756, "grad_norm": 0.08386365324258804, "learning_rate": 1.454132692562947e-05, "loss": 0.5389184951782227, "step": 6000 }, { "epoch": 1.108648704830756, "eval_loss": 0.6118724346160889, "eval_runtime": 159.1496, "eval_samples_per_second": 114.54, "eval_steps_per_second": 14.32, "step": 6000 }, { "epoch": 1.1088334815396519, "grad_norm": 0.08243799954652786, "learning_rate": 1.4539549497268155e-05, "loss": 0.534057080745697, "step": 6001 }, { "epoch": 1.1090182582485477, "grad_norm": 0.07323864102363586, "learning_rate": 1.4537771888246967e-05, "loss": 0.526421070098877, "step": 6002 }, { "epoch": 1.1092030349574435, "grad_norm": 0.07760884612798691, "learning_rate": 1.4535994098636656e-05, "loss": 0.5368732810020447, "step": 6003 }, { "epoch": 1.1093878116663394, "grad_norm": 0.08383920788764954, "learning_rate": 1.4534216128507974e-05, "loss": 0.7088597416877747, "step": 6004 }, { "epoch": 1.1095725883752352, "grad_norm": 0.0841090977191925, "learning_rate": 1.4532437977931672e-05, "loss": 0.6554641127586365, "step": 6005 }, { "epoch": 1.109757365084131, "grad_norm": 0.09048908203840256, "learning_rate": 1.453065964697852e-05, "loss": 0.6777673363685608, "step": 6006 }, { "epoch": 1.109942141793027, "grad_norm": 0.07771904021501541, "learning_rate": 1.452888113571929e-05, "loss": 0.5503280758857727, "step": 6007 }, { "epoch": 1.110126918501923, "grad_norm": 0.06885528564453125, "learning_rate": 1.452710244422476e-05, "loss": 0.43942975997924805, "step": 6008 }, { "epoch": 1.1103116952108187, "grad_norm": 0.07003390043973923, "learning_rate": 1.4525323572565713e-05, "loss": 0.48695823550224304, "step": 6009 }, { "epoch": 1.1104964719197146, "grad_norm": 0.08813440054655075, "learning_rate": 1.4523544520812949e-05, "loss": 0.7120080590248108, "step": 6010 }, { "epoch": 1.1106812486286104, "grad_norm": 0.0674251914024353, "learning_rate": 1.4521765289037264e-05, "loss": 0.48009634017944336, "step": 6011 }, { "epoch": 1.1108660253375062, "grad_norm": 0.06363063305616379, "learning_rate": 1.4519985877309468e-05, "loss": 0.4351237714290619, "step": 6012 }, { "epoch": 1.111050802046402, "grad_norm": 0.06190628930926323, "learning_rate": 1.4518206285700373e-05, "loss": 0.43532848358154297, "step": 6013 }, { "epoch": 1.1112355787552979, "grad_norm": 0.06464696675539017, "learning_rate": 1.4516426514280806e-05, "loss": 0.3579815924167633, "step": 6014 }, { "epoch": 1.1114203554641937, "grad_norm": 0.06103749945759773, "learning_rate": 1.4514646563121592e-05, "loss": 0.37997567653656006, "step": 6015 }, { "epoch": 1.1116051321730895, "grad_norm": 0.07939308881759644, "learning_rate": 1.4512866432293571e-05, "loss": 0.5923547148704529, "step": 6016 }, { "epoch": 1.1117899088819854, "grad_norm": 0.07335177063941956, "learning_rate": 1.4511086121867584e-05, "loss": 0.5131193399429321, "step": 6017 }, { "epoch": 1.1119746855908812, "grad_norm": 0.0859297588467598, "learning_rate": 1.4509305631914485e-05, "loss": 0.6909281611442566, "step": 6018 }, { "epoch": 1.112159462299777, "grad_norm": 0.08923984318971634, "learning_rate": 1.4507524962505129e-05, "loss": 0.6461963653564453, "step": 6019 }, { "epoch": 1.112344239008673, "grad_norm": 0.044790174812078476, "learning_rate": 1.4505744113710378e-05, "loss": 0.25615447759628296, "step": 6020 }, { "epoch": 1.112529015717569, "grad_norm": 0.06457506865262985, "learning_rate": 1.4503963085601112e-05, "loss": 0.368161141872406, "step": 6021 }, { "epoch": 1.1127137924264647, "grad_norm": 0.07680926471948624, "learning_rate": 1.4502181878248203e-05, "loss": 0.48869234323501587, "step": 6022 }, { "epoch": 1.1128985691353606, "grad_norm": 0.05992692708969116, "learning_rate": 1.4500400491722542e-05, "loss": 0.3915933072566986, "step": 6023 }, { "epoch": 1.1130833458442564, "grad_norm": 0.08236189186573029, "learning_rate": 1.4498618926095023e-05, "loss": 0.7077838182449341, "step": 6024 }, { "epoch": 1.1132681225531522, "grad_norm": 0.07403172552585602, "learning_rate": 1.4496837181436545e-05, "loss": 0.5123920440673828, "step": 6025 }, { "epoch": 1.113452899262048, "grad_norm": 0.08864319324493408, "learning_rate": 1.4495055257818011e-05, "loss": 0.9052874445915222, "step": 6026 }, { "epoch": 1.1136376759709439, "grad_norm": 0.0638665109872818, "learning_rate": 1.4493273155310349e-05, "loss": 0.5355097055435181, "step": 6027 }, { "epoch": 1.1138224526798397, "grad_norm": 0.07314230501651764, "learning_rate": 1.4491490873984468e-05, "loss": 0.5114896893501282, "step": 6028 }, { "epoch": 1.1140072293887355, "grad_norm": 0.06396160274744034, "learning_rate": 1.4489708413911303e-05, "loss": 0.5015410780906677, "step": 6029 }, { "epoch": 1.1141920060976314, "grad_norm": 0.07526998221874237, "learning_rate": 1.4487925775161789e-05, "loss": 0.6208897829055786, "step": 6030 }, { "epoch": 1.1143767828065272, "grad_norm": 0.06430046260356903, "learning_rate": 1.4486142957806873e-05, "loss": 0.5269572734832764, "step": 6031 }, { "epoch": 1.114561559515423, "grad_norm": 0.06466152518987656, "learning_rate": 1.4484359961917497e-05, "loss": 0.4159741997718811, "step": 6032 }, { "epoch": 1.1147463362243188, "grad_norm": 0.06660066545009613, "learning_rate": 1.4482576787564628e-05, "loss": 0.5687733292579651, "step": 6033 }, { "epoch": 1.1149311129332147, "grad_norm": 0.07294777780771255, "learning_rate": 1.448079343481923e-05, "loss": 0.5561234951019287, "step": 6034 }, { "epoch": 1.1151158896421105, "grad_norm": 0.07686596363782883, "learning_rate": 1.4479009903752268e-05, "loss": 0.5526153445243835, "step": 6035 }, { "epoch": 1.1153006663510066, "grad_norm": 0.0635218620300293, "learning_rate": 1.4477226194434724e-05, "loss": 0.48672422766685486, "step": 6036 }, { "epoch": 1.1154854430599024, "grad_norm": 0.06493838876485825, "learning_rate": 1.4475442306937586e-05, "loss": 0.4204823076725006, "step": 6037 }, { "epoch": 1.1156702197687982, "grad_norm": 0.0830598846077919, "learning_rate": 1.447365824133185e-05, "loss": 0.6063317060470581, "step": 6038 }, { "epoch": 1.115854996477694, "grad_norm": 0.07490938156843185, "learning_rate": 1.4471873997688506e-05, "loss": 0.5406398177146912, "step": 6039 }, { "epoch": 1.1160397731865899, "grad_norm": 0.07630017399787903, "learning_rate": 1.447008957607857e-05, "loss": 0.5382445454597473, "step": 6040 }, { "epoch": 1.1162245498954857, "grad_norm": 0.06891347467899323, "learning_rate": 1.4468304976573056e-05, "loss": 0.4742557406425476, "step": 6041 }, { "epoch": 1.1164093266043815, "grad_norm": 0.06572108715772629, "learning_rate": 1.4466520199242982e-05, "loss": 0.499061644077301, "step": 6042 }, { "epoch": 1.1165941033132774, "grad_norm": 0.09027126431465149, "learning_rate": 1.4464735244159376e-05, "loss": 0.6984020471572876, "step": 6043 }, { "epoch": 1.1167788800221732, "grad_norm": 0.061515845358371735, "learning_rate": 1.446295011139328e-05, "loss": 0.36859866976737976, "step": 6044 }, { "epoch": 1.116963656731069, "grad_norm": 0.06884491443634033, "learning_rate": 1.446116480101573e-05, "loss": 0.42635002732276917, "step": 6045 }, { "epoch": 1.1171484334399648, "grad_norm": 0.07418891042470932, "learning_rate": 1.4459379313097777e-05, "loss": 0.6263567805290222, "step": 6046 }, { "epoch": 1.1173332101488607, "grad_norm": 0.06352093070745468, "learning_rate": 1.445759364771048e-05, "loss": 0.5206384062767029, "step": 6047 }, { "epoch": 1.1175179868577565, "grad_norm": 0.08683977276086807, "learning_rate": 1.4455807804924902e-05, "loss": 0.7265470623970032, "step": 6048 }, { "epoch": 1.1177027635666523, "grad_norm": 0.06020265817642212, "learning_rate": 1.4454021784812113e-05, "loss": 0.3667547404766083, "step": 6049 }, { "epoch": 1.1178875402755484, "grad_norm": 0.06311333179473877, "learning_rate": 1.4452235587443193e-05, "loss": 0.3707239329814911, "step": 6050 }, { "epoch": 1.1180723169844442, "grad_norm": 0.06688230484724045, "learning_rate": 1.4450449212889226e-05, "loss": 0.5023731589317322, "step": 6051 }, { "epoch": 1.11825709369334, "grad_norm": 0.06343791633844376, "learning_rate": 1.4448662661221302e-05, "loss": 0.433843195438385, "step": 6052 }, { "epoch": 1.1184418704022359, "grad_norm": 0.0687842145562172, "learning_rate": 1.4446875932510522e-05, "loss": 0.47582367062568665, "step": 6053 }, { "epoch": 1.1186266471111317, "grad_norm": 0.09454551339149475, "learning_rate": 1.4445089026827997e-05, "loss": 0.6751272678375244, "step": 6054 }, { "epoch": 1.1188114238200275, "grad_norm": 0.07350743561983109, "learning_rate": 1.4443301944244832e-05, "loss": 0.5118169784545898, "step": 6055 }, { "epoch": 1.1189962005289233, "grad_norm": 0.07297584414482117, "learning_rate": 1.4441514684832147e-05, "loss": 0.5605136156082153, "step": 6056 }, { "epoch": 1.1191809772378192, "grad_norm": 0.07259882241487503, "learning_rate": 1.443972724866108e-05, "loss": 0.3932836055755615, "step": 6057 }, { "epoch": 1.119365753946715, "grad_norm": 0.076852947473526, "learning_rate": 1.4437939635802759e-05, "loss": 0.5650920271873474, "step": 6058 }, { "epoch": 1.1195505306556108, "grad_norm": 0.06762345135211945, "learning_rate": 1.4436151846328321e-05, "loss": 0.5524746179580688, "step": 6059 }, { "epoch": 1.1197353073645067, "grad_norm": 0.06991656869649887, "learning_rate": 1.4434363880308917e-05, "loss": 0.5342642664909363, "step": 6060 }, { "epoch": 1.1199200840734025, "grad_norm": 0.07431400567293167, "learning_rate": 1.4432575737815709e-05, "loss": 0.5111920833587646, "step": 6061 }, { "epoch": 1.1201048607822983, "grad_norm": 0.06559804826974869, "learning_rate": 1.4430787418919851e-05, "loss": 0.4708617031574249, "step": 6062 }, { "epoch": 1.1202896374911941, "grad_norm": 0.08211726695299149, "learning_rate": 1.4428998923692517e-05, "loss": 0.5830162763595581, "step": 6063 }, { "epoch": 1.12047441420009, "grad_norm": 0.06784723699092865, "learning_rate": 1.4427210252204882e-05, "loss": 0.6211737990379333, "step": 6064 }, { "epoch": 1.120659190908986, "grad_norm": 0.07016149163246155, "learning_rate": 1.4425421404528133e-05, "loss": 0.5034101605415344, "step": 6065 }, { "epoch": 1.1208439676178819, "grad_norm": 0.06789090484380722, "learning_rate": 1.4423632380733452e-05, "loss": 0.5031054019927979, "step": 6066 }, { "epoch": 1.1210287443267777, "grad_norm": 0.07210063934326172, "learning_rate": 1.4421843180892045e-05, "loss": 0.5558570027351379, "step": 6067 }, { "epoch": 1.1212135210356735, "grad_norm": 0.10158746689558029, "learning_rate": 1.4420053805075113e-05, "loss": 0.7106173038482666, "step": 6068 }, { "epoch": 1.1213982977445693, "grad_norm": 0.05270679295063019, "learning_rate": 1.4418264253353869e-05, "loss": 0.4389966130256653, "step": 6069 }, { "epoch": 1.1215830744534652, "grad_norm": 0.06519922614097595, "learning_rate": 1.441647452579953e-05, "loss": 0.42029258608818054, "step": 6070 }, { "epoch": 1.121767851162361, "grad_norm": 0.06086088716983795, "learning_rate": 1.4414684622483321e-05, "loss": 0.4866493046283722, "step": 6071 }, { "epoch": 1.1219526278712568, "grad_norm": 0.07039511948823929, "learning_rate": 1.4412894543476479e-05, "loss": 0.47862088680267334, "step": 6072 }, { "epoch": 1.1221374045801527, "grad_norm": 0.09172705560922623, "learning_rate": 1.4411104288850237e-05, "loss": 0.6564626693725586, "step": 6073 }, { "epoch": 1.1223221812890485, "grad_norm": 0.06046932563185692, "learning_rate": 1.4409313858675847e-05, "loss": 0.3803384006023407, "step": 6074 }, { "epoch": 1.1225069579979443, "grad_norm": 0.09191624075174332, "learning_rate": 1.440752325302456e-05, "loss": 0.6493774652481079, "step": 6075 }, { "epoch": 1.1226917347068401, "grad_norm": 0.0672389566898346, "learning_rate": 1.4405732471967637e-05, "loss": 0.43903395533561707, "step": 6076 }, { "epoch": 1.122876511415736, "grad_norm": 0.0659782737493515, "learning_rate": 1.4403941515576344e-05, "loss": 0.49504348635673523, "step": 6077 }, { "epoch": 1.1230612881246318, "grad_norm": 0.08274340629577637, "learning_rate": 1.440215038392196e-05, "loss": 0.5620540380477905, "step": 6078 }, { "epoch": 1.1232460648335278, "grad_norm": 0.06952735781669617, "learning_rate": 1.4400359077075758e-05, "loss": 0.5920997858047485, "step": 6079 }, { "epoch": 1.1234308415424237, "grad_norm": 0.06475818902254105, "learning_rate": 1.4398567595109034e-05, "loss": 0.4705714285373688, "step": 6080 }, { "epoch": 1.1236156182513195, "grad_norm": 0.0710345134139061, "learning_rate": 1.4396775938093084e-05, "loss": 0.47257503867149353, "step": 6081 }, { "epoch": 1.1238003949602153, "grad_norm": 0.07058537006378174, "learning_rate": 1.4394984106099206e-05, "loss": 0.5050140619277954, "step": 6082 }, { "epoch": 1.1239851716691112, "grad_norm": 0.06465966254472733, "learning_rate": 1.4393192099198711e-05, "loss": 0.4918656349182129, "step": 6083 }, { "epoch": 1.124169948378007, "grad_norm": 0.07355429232120514, "learning_rate": 1.4391399917462913e-05, "loss": 0.5089967250823975, "step": 6084 }, { "epoch": 1.1243547250869028, "grad_norm": 0.07652002573013306, "learning_rate": 1.4389607560963139e-05, "loss": 0.47434723377227783, "step": 6085 }, { "epoch": 1.1245395017957986, "grad_norm": 0.08014384657144547, "learning_rate": 1.4387815029770715e-05, "loss": 0.6197258234024048, "step": 6086 }, { "epoch": 1.1247242785046945, "grad_norm": 0.06608502566814423, "learning_rate": 1.4386022323956983e-05, "loss": 0.5037209987640381, "step": 6087 }, { "epoch": 1.1249090552135903, "grad_norm": 0.07929182052612305, "learning_rate": 1.4384229443593285e-05, "loss": 0.6429815888404846, "step": 6088 }, { "epoch": 1.1250938319224861, "grad_norm": 0.06564852595329285, "learning_rate": 1.4382436388750968e-05, "loss": 0.4675427973270416, "step": 6089 }, { "epoch": 1.125278608631382, "grad_norm": 0.06928091496229172, "learning_rate": 1.4380643159501398e-05, "loss": 0.46712902188301086, "step": 6090 }, { "epoch": 1.1254633853402778, "grad_norm": 0.08031101524829865, "learning_rate": 1.4378849755915934e-05, "loss": 0.6043239235877991, "step": 6091 }, { "epoch": 1.1256481620491736, "grad_norm": 0.09885122627019882, "learning_rate": 1.4377056178065947e-05, "loss": 0.6168032288551331, "step": 6092 }, { "epoch": 1.1258329387580694, "grad_norm": 0.060262907296419144, "learning_rate": 1.4375262426022821e-05, "loss": 0.39597856998443604, "step": 6093 }, { "epoch": 1.1260177154669653, "grad_norm": 0.05512017384171486, "learning_rate": 1.4373468499857937e-05, "loss": 0.3198016285896301, "step": 6094 }, { "epoch": 1.1262024921758613, "grad_norm": 0.08341045677661896, "learning_rate": 1.4371674399642693e-05, "loss": 0.5664653778076172, "step": 6095 }, { "epoch": 1.1263872688847572, "grad_norm": 0.07183565199375153, "learning_rate": 1.4369880125448481e-05, "loss": 0.47869887948036194, "step": 6096 }, { "epoch": 1.126572045593653, "grad_norm": 0.07197265326976776, "learning_rate": 1.4368085677346713e-05, "loss": 0.42871785163879395, "step": 6097 }, { "epoch": 1.1267568223025488, "grad_norm": 0.08379499614238739, "learning_rate": 1.4366291055408801e-05, "loss": 0.5623905062675476, "step": 6098 }, { "epoch": 1.1269415990114446, "grad_norm": 0.08065547049045563, "learning_rate": 1.4364496259706165e-05, "loss": 0.5996270179748535, "step": 6099 }, { "epoch": 1.1271263757203405, "grad_norm": 0.07119474560022354, "learning_rate": 1.4362701290310234e-05, "loss": 0.47350260615348816, "step": 6100 }, { "epoch": 1.1273111524292363, "grad_norm": 0.06404874473810196, "learning_rate": 1.436090614729244e-05, "loss": 0.475182443857193, "step": 6101 }, { "epoch": 1.1274959291381321, "grad_norm": 0.06995225697755814, "learning_rate": 1.4359110830724222e-05, "loss": 0.5138711929321289, "step": 6102 }, { "epoch": 1.127680705847028, "grad_norm": 0.06132664531469345, "learning_rate": 1.4357315340677036e-05, "loss": 0.43427854776382446, "step": 6103 }, { "epoch": 1.1278654825559238, "grad_norm": 0.08065839856863022, "learning_rate": 1.4355519677222329e-05, "loss": 0.48715531826019287, "step": 6104 }, { "epoch": 1.1280502592648196, "grad_norm": 0.08922401815652847, "learning_rate": 1.4353723840431568e-05, "loss": 0.6146382689476013, "step": 6105 }, { "epoch": 1.1282350359737154, "grad_norm": 0.08260243386030197, "learning_rate": 1.4351927830376215e-05, "loss": 0.6335489749908447, "step": 6106 }, { "epoch": 1.1284198126826113, "grad_norm": 0.07442322373390198, "learning_rate": 1.4350131647127754e-05, "loss": 0.49106425046920776, "step": 6107 }, { "epoch": 1.1286045893915073, "grad_norm": 0.05707094073295593, "learning_rate": 1.434833529075766e-05, "loss": 0.4567444920539856, "step": 6108 }, { "epoch": 1.1287893661004031, "grad_norm": 0.09413350373506546, "learning_rate": 1.4346538761337428e-05, "loss": 0.6615262627601624, "step": 6109 }, { "epoch": 1.128974142809299, "grad_norm": 0.07104384899139404, "learning_rate": 1.434474205893855e-05, "loss": 0.47242528200149536, "step": 6110 }, { "epoch": 1.1291589195181948, "grad_norm": 0.06719780713319778, "learning_rate": 1.434294518363253e-05, "loss": 0.49545174837112427, "step": 6111 }, { "epoch": 1.1293436962270906, "grad_norm": 0.06883067637681961, "learning_rate": 1.434114813549088e-05, "loss": 0.40031710267066956, "step": 6112 }, { "epoch": 1.1295284729359865, "grad_norm": 0.07639193534851074, "learning_rate": 1.433935091458512e-05, "loss": 0.6572412252426147, "step": 6113 }, { "epoch": 1.1297132496448823, "grad_norm": 0.08633438497781754, "learning_rate": 1.4337553520986767e-05, "loss": 0.55412757396698, "step": 6114 }, { "epoch": 1.1298980263537781, "grad_norm": 0.05746854469180107, "learning_rate": 1.4335755954767352e-05, "loss": 0.36753028631210327, "step": 6115 }, { "epoch": 1.130082803062674, "grad_norm": 0.06715678423643112, "learning_rate": 1.4333958215998416e-05, "loss": 0.31338319182395935, "step": 6116 }, { "epoch": 1.1302675797715698, "grad_norm": 0.06040837615728378, "learning_rate": 1.4332160304751503e-05, "loss": 0.4689605236053467, "step": 6117 }, { "epoch": 1.1304523564804656, "grad_norm": 0.0614374615252018, "learning_rate": 1.4330362221098164e-05, "loss": 0.43147027492523193, "step": 6118 }, { "epoch": 1.1306371331893614, "grad_norm": 0.08722779154777527, "learning_rate": 1.4328563965109954e-05, "loss": 0.8032390475273132, "step": 6119 }, { "epoch": 1.1308219098982573, "grad_norm": 0.0980684831738472, "learning_rate": 1.4326765536858444e-05, "loss": 0.6666969060897827, "step": 6120 }, { "epoch": 1.131006686607153, "grad_norm": 0.09519989043474197, "learning_rate": 1.4324966936415199e-05, "loss": 0.7287571430206299, "step": 6121 }, { "epoch": 1.131191463316049, "grad_norm": 0.06062401831150055, "learning_rate": 1.4323168163851801e-05, "loss": 0.4222762882709503, "step": 6122 }, { "epoch": 1.1313762400249447, "grad_norm": 0.07143185287714005, "learning_rate": 1.432136921923984e-05, "loss": 0.49105358123779297, "step": 6123 }, { "epoch": 1.1315610167338406, "grad_norm": 0.050204817205667496, "learning_rate": 1.4319570102650902e-05, "loss": 0.33912599086761475, "step": 6124 }, { "epoch": 1.1317457934427366, "grad_norm": 0.06106993183493614, "learning_rate": 1.4317770814156586e-05, "loss": 0.38527730107307434, "step": 6125 }, { "epoch": 1.1319305701516325, "grad_norm": 0.06932901591062546, "learning_rate": 1.4315971353828502e-05, "loss": 0.5505931973457336, "step": 6126 }, { "epoch": 1.1321153468605283, "grad_norm": 0.08598202466964722, "learning_rate": 1.4314171721738262e-05, "loss": 0.7473421096801758, "step": 6127 }, { "epoch": 1.132300123569424, "grad_norm": 0.06986215710639954, "learning_rate": 1.4312371917957482e-05, "loss": 0.48098546266555786, "step": 6128 }, { "epoch": 1.13248490027832, "grad_norm": 0.08303413540124893, "learning_rate": 1.4310571942557791e-05, "loss": 0.6047471165657043, "step": 6129 }, { "epoch": 1.1326696769872158, "grad_norm": 0.08076757192611694, "learning_rate": 1.4308771795610826e-05, "loss": 0.5525906085968018, "step": 6130 }, { "epoch": 1.1328544536961116, "grad_norm": 0.0855245515704155, "learning_rate": 1.4306971477188223e-05, "loss": 0.7699131965637207, "step": 6131 }, { "epoch": 1.1330392304050074, "grad_norm": 0.07134143263101578, "learning_rate": 1.4305170987361625e-05, "loss": 0.4692050814628601, "step": 6132 }, { "epoch": 1.1332240071139033, "grad_norm": 0.08296187222003937, "learning_rate": 1.4303370326202697e-05, "loss": 0.5955659747123718, "step": 6133 }, { "epoch": 1.133408783822799, "grad_norm": 0.0706106424331665, "learning_rate": 1.4301569493783094e-05, "loss": 0.5117326974868774, "step": 6134 }, { "epoch": 1.133593560531695, "grad_norm": 0.08032231777906418, "learning_rate": 1.429976849017448e-05, "loss": 0.6375417709350586, "step": 6135 }, { "epoch": 1.1337783372405907, "grad_norm": 0.06780054420232773, "learning_rate": 1.4297967315448531e-05, "loss": 0.4244823157787323, "step": 6136 }, { "epoch": 1.1339631139494868, "grad_norm": 0.07901187241077423, "learning_rate": 1.4296165969676934e-05, "loss": 0.7523353695869446, "step": 6137 }, { "epoch": 1.1341478906583826, "grad_norm": 0.07902489602565765, "learning_rate": 1.4294364452931368e-05, "loss": 0.6589572429656982, "step": 6138 }, { "epoch": 1.1343326673672784, "grad_norm": 0.08424142748117447, "learning_rate": 1.4292562765283533e-05, "loss": 0.6515619158744812, "step": 6139 }, { "epoch": 1.1345174440761743, "grad_norm": 0.07620836049318314, "learning_rate": 1.4290760906805133e-05, "loss": 0.583553671836853, "step": 6140 }, { "epoch": 1.13470222078507, "grad_norm": 0.06322798877954483, "learning_rate": 1.4288958877567872e-05, "loss": 0.5742557048797607, "step": 6141 }, { "epoch": 1.134886997493966, "grad_norm": 0.07936611771583557, "learning_rate": 1.4287156677643462e-05, "loss": 0.5712964534759521, "step": 6142 }, { "epoch": 1.1350717742028618, "grad_norm": 0.07948508113622665, "learning_rate": 1.4285354307103631e-05, "loss": 0.5610211491584778, "step": 6143 }, { "epoch": 1.1352565509117576, "grad_norm": 0.08358441293239594, "learning_rate": 1.4283551766020107e-05, "loss": 0.5952147841453552, "step": 6144 }, { "epoch": 1.1354413276206534, "grad_norm": 0.06404483318328857, "learning_rate": 1.4281749054464625e-05, "loss": 0.42181798815727234, "step": 6145 }, { "epoch": 1.1356261043295492, "grad_norm": 0.05818679928779602, "learning_rate": 1.4279946172508923e-05, "loss": 0.4475404620170593, "step": 6146 }, { "epoch": 1.135810881038445, "grad_norm": 0.07719261199235916, "learning_rate": 1.4278143120224757e-05, "loss": 0.7213553786277771, "step": 6147 }, { "epoch": 1.135995657747341, "grad_norm": 0.060571376234292984, "learning_rate": 1.4276339897683877e-05, "loss": 0.36024120450019836, "step": 6148 }, { "epoch": 1.1361804344562367, "grad_norm": 0.06773599237203598, "learning_rate": 1.4274536504958048e-05, "loss": 0.44352224469184875, "step": 6149 }, { "epoch": 1.1363652111651326, "grad_norm": 0.07060518860816956, "learning_rate": 1.4272732942119044e-05, "loss": 0.5673344731330872, "step": 6150 }, { "epoch": 1.1365499878740284, "grad_norm": 0.08268341422080994, "learning_rate": 1.4270929209238632e-05, "loss": 0.5837506651878357, "step": 6151 }, { "epoch": 1.1367347645829242, "grad_norm": 0.11173179000616074, "learning_rate": 1.4269125306388599e-05, "loss": 0.8468867540359497, "step": 6152 }, { "epoch": 1.13691954129182, "grad_norm": 0.06694246083498001, "learning_rate": 1.4267321233640736e-05, "loss": 0.40071406960487366, "step": 6153 }, { "epoch": 1.137104318000716, "grad_norm": 0.05182839557528496, "learning_rate": 1.426551699106684e-05, "loss": 0.36508703231811523, "step": 6154 }, { "epoch": 1.137289094709612, "grad_norm": 0.1034051924943924, "learning_rate": 1.4263712578738714e-05, "loss": 0.7002763152122498, "step": 6155 }, { "epoch": 1.1374738714185078, "grad_norm": 0.06058888137340546, "learning_rate": 1.4261907996728164e-05, "loss": 0.45546698570251465, "step": 6156 }, { "epoch": 1.1376586481274036, "grad_norm": 0.08832358568906784, "learning_rate": 1.426010324510701e-05, "loss": 0.5462371706962585, "step": 6157 }, { "epoch": 1.1378434248362994, "grad_norm": 0.06523042917251587, "learning_rate": 1.4258298323947078e-05, "loss": 0.46396517753601074, "step": 6158 }, { "epoch": 1.1380282015451952, "grad_norm": 0.08396708220243454, "learning_rate": 1.425649323332019e-05, "loss": 0.6781487464904785, "step": 6159 }, { "epoch": 1.138212978254091, "grad_norm": 0.07320381700992584, "learning_rate": 1.425468797329819e-05, "loss": 0.4183204174041748, "step": 6160 }, { "epoch": 1.138397754962987, "grad_norm": 0.062182389199733734, "learning_rate": 1.4252882543952923e-05, "loss": 0.4624471962451935, "step": 6161 }, { "epoch": 1.1385825316718827, "grad_norm": 0.06833712011575699, "learning_rate": 1.4251076945356233e-05, "loss": 0.4128556251525879, "step": 6162 }, { "epoch": 1.1387673083807786, "grad_norm": 0.08125222474336624, "learning_rate": 1.4249271177579985e-05, "loss": 0.6006513833999634, "step": 6163 }, { "epoch": 1.1389520850896744, "grad_norm": 0.07562055438756943, "learning_rate": 1.4247465240696035e-05, "loss": 0.6590495109558105, "step": 6164 }, { "epoch": 1.1391368617985702, "grad_norm": 0.0725562646985054, "learning_rate": 1.4245659134776255e-05, "loss": 0.5063749551773071, "step": 6165 }, { "epoch": 1.139321638507466, "grad_norm": 0.06678872555494308, "learning_rate": 1.4243852859892527e-05, "loss": 0.5019094347953796, "step": 6166 }, { "epoch": 1.139506415216362, "grad_norm": 0.0874992311000824, "learning_rate": 1.4242046416116732e-05, "loss": 0.6056196689605713, "step": 6167 }, { "epoch": 1.139691191925258, "grad_norm": 0.06671545654535294, "learning_rate": 1.4240239803520761e-05, "loss": 0.5596376061439514, "step": 6168 }, { "epoch": 1.1398759686341537, "grad_norm": 0.06795782595872879, "learning_rate": 1.4238433022176513e-05, "loss": 0.4091525375843048, "step": 6169 }, { "epoch": 1.1400607453430496, "grad_norm": 0.08453847467899323, "learning_rate": 1.423662607215589e-05, "loss": 0.5426561236381531, "step": 6170 }, { "epoch": 1.1402455220519454, "grad_norm": 0.0707712396979332, "learning_rate": 1.4234818953530805e-05, "loss": 0.4745196998119354, "step": 6171 }, { "epoch": 1.1404302987608412, "grad_norm": 0.06672391295433044, "learning_rate": 1.4233011666373174e-05, "loss": 0.4232742488384247, "step": 6172 }, { "epoch": 1.140615075469737, "grad_norm": 0.06453859061002731, "learning_rate": 1.423120421075492e-05, "loss": 0.45850545167922974, "step": 6173 }, { "epoch": 1.140799852178633, "grad_norm": 0.09031231701374054, "learning_rate": 1.4229396586747978e-05, "loss": 0.6560296416282654, "step": 6174 }, { "epoch": 1.1409846288875287, "grad_norm": 0.07331298291683197, "learning_rate": 1.4227588794424284e-05, "loss": 0.5255630612373352, "step": 6175 }, { "epoch": 1.1411694055964245, "grad_norm": 0.0803334191441536, "learning_rate": 1.4225780833855782e-05, "loss": 0.6760626435279846, "step": 6176 }, { "epoch": 1.1413541823053204, "grad_norm": 0.09699446707963943, "learning_rate": 1.4223972705114427e-05, "loss": 0.5984539985656738, "step": 6177 }, { "epoch": 1.1415389590142162, "grad_norm": 0.0784691572189331, "learning_rate": 1.4222164408272168e-05, "loss": 0.5706794857978821, "step": 6178 }, { "epoch": 1.141723735723112, "grad_norm": 0.069001205265522, "learning_rate": 1.4220355943400979e-05, "loss": 0.48837417364120483, "step": 6179 }, { "epoch": 1.1419085124320079, "grad_norm": 0.07547164708375931, "learning_rate": 1.4218547310572826e-05, "loss": 0.45486870408058167, "step": 6180 }, { "epoch": 1.1420932891409037, "grad_norm": 0.07717430591583252, "learning_rate": 1.4216738509859688e-05, "loss": 0.5167067646980286, "step": 6181 }, { "epoch": 1.1422780658497995, "grad_norm": 0.0627954751253128, "learning_rate": 1.4214929541333548e-05, "loss": 0.3714037239551544, "step": 6182 }, { "epoch": 1.1424628425586956, "grad_norm": 0.07511921972036362, "learning_rate": 1.4213120405066401e-05, "loss": 0.5616947412490845, "step": 6183 }, { "epoch": 1.1426476192675914, "grad_norm": 0.07544286549091339, "learning_rate": 1.4211311101130246e-05, "loss": 0.5095857381820679, "step": 6184 }, { "epoch": 1.1428323959764872, "grad_norm": 0.0724918395280838, "learning_rate": 1.420950162959708e-05, "loss": 0.4429417848587036, "step": 6185 }, { "epoch": 1.143017172685383, "grad_norm": 0.05164062976837158, "learning_rate": 1.4207691990538919e-05, "loss": 0.3569068908691406, "step": 6186 }, { "epoch": 1.1432019493942789, "grad_norm": 0.05280061066150665, "learning_rate": 1.4205882184027784e-05, "loss": 0.3826059401035309, "step": 6187 }, { "epoch": 1.1433867261031747, "grad_norm": 0.07177378982305527, "learning_rate": 1.4204072210135693e-05, "loss": 0.5403726100921631, "step": 6188 }, { "epoch": 1.1435715028120705, "grad_norm": 0.06955300271511078, "learning_rate": 1.4202262068934684e-05, "loss": 0.451949805021286, "step": 6189 }, { "epoch": 1.1437562795209664, "grad_norm": 0.07127396017313004, "learning_rate": 1.420045176049679e-05, "loss": 0.42342081665992737, "step": 6190 }, { "epoch": 1.1439410562298622, "grad_norm": 0.06536135077476501, "learning_rate": 1.4198641284894059e-05, "loss": 0.44511792063713074, "step": 6191 }, { "epoch": 1.144125832938758, "grad_norm": 0.07281245291233063, "learning_rate": 1.419683064219854e-05, "loss": 0.4214109182357788, "step": 6192 }, { "epoch": 1.1443106096476539, "grad_norm": 0.08532760292291641, "learning_rate": 1.419501983248229e-05, "loss": 0.7385908961296082, "step": 6193 }, { "epoch": 1.1444953863565497, "grad_norm": 0.06349904835224152, "learning_rate": 1.419320885581738e-05, "loss": 0.4616239666938782, "step": 6194 }, { "epoch": 1.1446801630654455, "grad_norm": 0.10051140189170837, "learning_rate": 1.4191397712275871e-05, "loss": 0.7320863008499146, "step": 6195 }, { "epoch": 1.1448649397743416, "grad_norm": 0.09361196309328079, "learning_rate": 1.418958640192985e-05, "loss": 0.6548205018043518, "step": 6196 }, { "epoch": 1.1450497164832374, "grad_norm": 0.06741033494472504, "learning_rate": 1.4187774924851394e-05, "loss": 0.5115160346031189, "step": 6197 }, { "epoch": 1.1452344931921332, "grad_norm": 0.07313167303800583, "learning_rate": 1.41859632811126e-05, "loss": 0.500661313533783, "step": 6198 }, { "epoch": 1.145419269901029, "grad_norm": 0.08618351817131042, "learning_rate": 1.4184151470785565e-05, "loss": 0.6208575367927551, "step": 6199 }, { "epoch": 1.1456040466099249, "grad_norm": 0.052863236516714096, "learning_rate": 1.4182339493942389e-05, "loss": 0.4039441645145416, "step": 6200 }, { "epoch": 1.1457888233188207, "grad_norm": 0.07417035847902298, "learning_rate": 1.418052735065519e-05, "loss": 0.41991057991981506, "step": 6201 }, { "epoch": 1.1459736000277165, "grad_norm": 0.0847744345664978, "learning_rate": 1.4178715040996078e-05, "loss": 0.49262678623199463, "step": 6202 }, { "epoch": 1.1461583767366124, "grad_norm": 0.06870493292808533, "learning_rate": 1.4176902565037184e-05, "loss": 0.5651652216911316, "step": 6203 }, { "epoch": 1.1463431534455082, "grad_norm": 0.0765766054391861, "learning_rate": 1.4175089922850633e-05, "loss": 0.5696491003036499, "step": 6204 }, { "epoch": 1.146527930154404, "grad_norm": 0.07318069785833359, "learning_rate": 1.4173277114508565e-05, "loss": 0.49148693680763245, "step": 6205 }, { "epoch": 1.1467127068632998, "grad_norm": 0.06859167665243149, "learning_rate": 1.4171464140083127e-05, "loss": 0.514062762260437, "step": 6206 }, { "epoch": 1.1468974835721957, "grad_norm": 0.06501016020774841, "learning_rate": 1.4169650999646466e-05, "loss": 0.7300497889518738, "step": 6207 }, { "epoch": 1.1470822602810915, "grad_norm": 0.07674582302570343, "learning_rate": 1.416783769327074e-05, "loss": 0.47462305426597595, "step": 6208 }, { "epoch": 1.1472670369899873, "grad_norm": 0.08116015046834946, "learning_rate": 1.4166024221028111e-05, "loss": 0.6110057234764099, "step": 6209 }, { "epoch": 1.1474518136988832, "grad_norm": 0.06770819425582886, "learning_rate": 1.4164210582990756e-05, "loss": 0.5061403512954712, "step": 6210 }, { "epoch": 1.147636590407779, "grad_norm": 0.06339634209871292, "learning_rate": 1.4162396779230844e-05, "loss": 0.4284840226173401, "step": 6211 }, { "epoch": 1.147821367116675, "grad_norm": 0.0632321760058403, "learning_rate": 1.4160582809820566e-05, "loss": 0.3946978449821472, "step": 6212 }, { "epoch": 1.1480061438255709, "grad_norm": 0.06572254002094269, "learning_rate": 1.4158768674832108e-05, "loss": 0.5568139553070068, "step": 6213 }, { "epoch": 1.1481909205344667, "grad_norm": 0.09508955478668213, "learning_rate": 1.4156954374337669e-05, "loss": 0.7293902635574341, "step": 6214 }, { "epoch": 1.1483756972433625, "grad_norm": 0.08176790177822113, "learning_rate": 1.4155139908409447e-05, "loss": 0.5837011933326721, "step": 6215 }, { "epoch": 1.1485604739522584, "grad_norm": 0.07929205894470215, "learning_rate": 1.415332527711966e-05, "loss": 0.612383246421814, "step": 6216 }, { "epoch": 1.1487452506611542, "grad_norm": 0.06473682820796967, "learning_rate": 1.415151048054052e-05, "loss": 0.6698353886604309, "step": 6217 }, { "epoch": 1.14893002737005, "grad_norm": 0.08424325287342072, "learning_rate": 1.414969551874425e-05, "loss": 0.6709126830101013, "step": 6218 }, { "epoch": 1.1491148040789458, "grad_norm": 0.0723126232624054, "learning_rate": 1.4147880391803087e-05, "loss": 0.6147063970565796, "step": 6219 }, { "epoch": 1.1492995807878417, "grad_norm": 0.07604363560676575, "learning_rate": 1.4146065099789257e-05, "loss": 0.5668408870697021, "step": 6220 }, { "epoch": 1.1494843574967375, "grad_norm": 0.06804443150758743, "learning_rate": 1.4144249642775006e-05, "loss": 0.503356397151947, "step": 6221 }, { "epoch": 1.1496691342056333, "grad_norm": 0.08500366657972336, "learning_rate": 1.4142434020832587e-05, "loss": 0.5444658398628235, "step": 6222 }, { "epoch": 1.1498539109145292, "grad_norm": 0.06647278368473053, "learning_rate": 1.4140618234034254e-05, "loss": 0.4239024221897125, "step": 6223 }, { "epoch": 1.150038687623425, "grad_norm": 0.06973589956760406, "learning_rate": 1.4138802282452269e-05, "loss": 0.48607513308525085, "step": 6224 }, { "epoch": 1.150223464332321, "grad_norm": 0.08751551061868668, "learning_rate": 1.4136986166158901e-05, "loss": 0.7162054777145386, "step": 6225 }, { "epoch": 1.1504082410412169, "grad_norm": 0.07949352264404297, "learning_rate": 1.4135169885226427e-05, "loss": 0.563558042049408, "step": 6226 }, { "epoch": 1.1505930177501127, "grad_norm": 0.07515808939933777, "learning_rate": 1.4133353439727128e-05, "loss": 0.5348332524299622, "step": 6227 }, { "epoch": 1.1507777944590085, "grad_norm": 0.07966458052396774, "learning_rate": 1.4131536829733294e-05, "loss": 0.6537339091300964, "step": 6228 }, { "epoch": 1.1509625711679043, "grad_norm": 0.06919048726558685, "learning_rate": 1.4129720055317224e-05, "loss": 0.4522291421890259, "step": 6229 }, { "epoch": 1.1511473478768002, "grad_norm": 0.06940469890832901, "learning_rate": 1.4127903116551214e-05, "loss": 0.5043797492980957, "step": 6230 }, { "epoch": 1.151332124585696, "grad_norm": 0.0794387236237526, "learning_rate": 1.412608601350757e-05, "loss": 0.5534088015556335, "step": 6231 }, { "epoch": 1.1515169012945918, "grad_norm": 0.07416708767414093, "learning_rate": 1.4124268746258616e-05, "loss": 0.3723811209201813, "step": 6232 }, { "epoch": 1.1517016780034877, "grad_norm": 0.08943319320678711, "learning_rate": 1.412245131487667e-05, "loss": 0.6742863059043884, "step": 6233 }, { "epoch": 1.1518864547123835, "grad_norm": 0.10318424552679062, "learning_rate": 1.4120633719434058e-05, "loss": 0.7732130289077759, "step": 6234 }, { "epoch": 1.1520712314212793, "grad_norm": 0.07700960338115692, "learning_rate": 1.4118815960003114e-05, "loss": 0.5543152689933777, "step": 6235 }, { "epoch": 1.1522560081301751, "grad_norm": 0.06386572122573853, "learning_rate": 1.4116998036656183e-05, "loss": 0.44846782088279724, "step": 6236 }, { "epoch": 1.152440784839071, "grad_norm": 0.084290511906147, "learning_rate": 1.4115179949465611e-05, "loss": 0.6337743401527405, "step": 6237 }, { "epoch": 1.1526255615479668, "grad_norm": 0.07609273493289948, "learning_rate": 1.4113361698503747e-05, "loss": 0.4199293255805969, "step": 6238 }, { "epoch": 1.1528103382568626, "grad_norm": 0.06647109985351562, "learning_rate": 1.4111543283842961e-05, "loss": 0.42023780941963196, "step": 6239 }, { "epoch": 1.1529951149657585, "grad_norm": 0.07131869345903397, "learning_rate": 1.4109724705555616e-05, "loss": 0.5998886823654175, "step": 6240 }, { "epoch": 1.1531798916746543, "grad_norm": 0.06336542963981628, "learning_rate": 1.4107905963714082e-05, "loss": 0.4682580530643463, "step": 6241 }, { "epoch": 1.1533646683835503, "grad_norm": 0.08245175331830978, "learning_rate": 1.4106087058390745e-05, "loss": 0.5720283389091492, "step": 6242 }, { "epoch": 1.1535494450924462, "grad_norm": 0.09518399089574814, "learning_rate": 1.4104267989657991e-05, "loss": 0.7204375267028809, "step": 6243 }, { "epoch": 1.153734221801342, "grad_norm": 0.07756468653678894, "learning_rate": 1.4102448757588208e-05, "loss": 0.4689267873764038, "step": 6244 }, { "epoch": 1.1539189985102378, "grad_norm": 0.07330365478992462, "learning_rate": 1.4100629362253799e-05, "loss": 0.48148608207702637, "step": 6245 }, { "epoch": 1.1541037752191337, "grad_norm": 0.08221703767776489, "learning_rate": 1.4098809803727176e-05, "loss": 0.7633028030395508, "step": 6246 }, { "epoch": 1.1542885519280295, "grad_norm": 0.08841904997825623, "learning_rate": 1.4096990082080742e-05, "loss": 0.6865911483764648, "step": 6247 }, { "epoch": 1.1544733286369253, "grad_norm": 0.07374248653650284, "learning_rate": 1.4095170197386918e-05, "loss": 0.5459367632865906, "step": 6248 }, { "epoch": 1.1546581053458211, "grad_norm": 0.06705913692712784, "learning_rate": 1.409335014971814e-05, "loss": 0.3521028161048889, "step": 6249 }, { "epoch": 1.154842882054717, "grad_norm": 0.08836095035076141, "learning_rate": 1.4091529939146828e-05, "loss": 0.6287204623222351, "step": 6250 }, { "epoch": 1.1550276587636128, "grad_norm": 0.0838085412979126, "learning_rate": 1.4089709565745423e-05, "loss": 0.6495293378829956, "step": 6251 }, { "epoch": 1.1552124354725086, "grad_norm": 0.07038464397192001, "learning_rate": 1.4087889029586374e-05, "loss": 0.46939921379089355, "step": 6252 }, { "epoch": 1.1553972121814045, "grad_norm": 0.061671674251556396, "learning_rate": 1.4086068330742135e-05, "loss": 0.4827491044998169, "step": 6253 }, { "epoch": 1.1555819888903003, "grad_norm": 0.0781252309679985, "learning_rate": 1.4084247469285155e-05, "loss": 0.546686589717865, "step": 6254 }, { "epoch": 1.1557667655991963, "grad_norm": 0.11145616322755814, "learning_rate": 1.4082426445287904e-05, "loss": 0.6957457661628723, "step": 6255 }, { "epoch": 1.1559515423080922, "grad_norm": 0.059735823422670364, "learning_rate": 1.4080605258822857e-05, "loss": 0.39741113781929016, "step": 6256 }, { "epoch": 1.156136319016988, "grad_norm": 0.06381309777498245, "learning_rate": 1.4078783909962484e-05, "loss": 0.4249134957790375, "step": 6257 }, { "epoch": 1.1563210957258838, "grad_norm": 0.06865990906953812, "learning_rate": 1.407696239877927e-05, "loss": 0.5038773417472839, "step": 6258 }, { "epoch": 1.1565058724347796, "grad_norm": 0.07598444819450378, "learning_rate": 1.4075140725345713e-05, "loss": 0.5336639285087585, "step": 6259 }, { "epoch": 1.1566906491436755, "grad_norm": 0.091720350086689, "learning_rate": 1.4073318889734303e-05, "loss": 0.7236504554748535, "step": 6260 }, { "epoch": 1.1568754258525713, "grad_norm": 0.07371455430984497, "learning_rate": 1.4071496892017544e-05, "loss": 0.5036258697509766, "step": 6261 }, { "epoch": 1.1570602025614671, "grad_norm": 0.10541260242462158, "learning_rate": 1.4069674732267946e-05, "loss": 0.7018181085586548, "step": 6262 }, { "epoch": 1.157244979270363, "grad_norm": 0.06498979777097702, "learning_rate": 1.4067852410558027e-05, "loss": 0.5900062322616577, "step": 6263 }, { "epoch": 1.1574297559792588, "grad_norm": 0.07714717835187912, "learning_rate": 1.4066029926960308e-05, "loss": 0.5157914161682129, "step": 6264 }, { "epoch": 1.1576145326881546, "grad_norm": 0.059344302862882614, "learning_rate": 1.406420728154732e-05, "loss": 0.35241222381591797, "step": 6265 }, { "epoch": 1.1577993093970504, "grad_norm": 0.07745634019374847, "learning_rate": 1.4062384474391597e-05, "loss": 0.5979710817337036, "step": 6266 }, { "epoch": 1.1579840861059463, "grad_norm": 0.0804426372051239, "learning_rate": 1.4060561505565683e-05, "loss": 0.5755396485328674, "step": 6267 }, { "epoch": 1.158168862814842, "grad_norm": 0.08442779630422592, "learning_rate": 1.405873837514212e-05, "loss": 0.7087805271148682, "step": 6268 }, { "epoch": 1.158353639523738, "grad_norm": 0.05821115896105766, "learning_rate": 1.4056915083193472e-05, "loss": 0.37225276231765747, "step": 6269 }, { "epoch": 1.1585384162326338, "grad_norm": 0.07283907383680344, "learning_rate": 1.4055091629792297e-05, "loss": 0.5235207080841064, "step": 6270 }, { "epoch": 1.1587231929415298, "grad_norm": 0.063742995262146, "learning_rate": 1.4053268015011159e-05, "loss": 0.40237030386924744, "step": 6271 }, { "epoch": 1.1589079696504256, "grad_norm": 0.07434480637311935, "learning_rate": 1.4051444238922635e-05, "loss": 0.5207359194755554, "step": 6272 }, { "epoch": 1.1590927463593215, "grad_norm": 0.0759037435054779, "learning_rate": 1.404962030159931e-05, "loss": 0.4360727071762085, "step": 6273 }, { "epoch": 1.1592775230682173, "grad_norm": 0.07971856743097305, "learning_rate": 1.4047796203113761e-05, "loss": 0.580155074596405, "step": 6274 }, { "epoch": 1.1594622997771131, "grad_norm": 0.0639769434928894, "learning_rate": 1.404597194353859e-05, "loss": 0.4158743619918823, "step": 6275 }, { "epoch": 1.159647076486009, "grad_norm": 0.07871117442846298, "learning_rate": 1.4044147522946393e-05, "loss": 0.46466371417045593, "step": 6276 }, { "epoch": 1.1598318531949048, "grad_norm": 0.05663604661822319, "learning_rate": 1.4042322941409778e-05, "loss": 0.5010232329368591, "step": 6277 }, { "epoch": 1.1600166299038006, "grad_norm": 0.07029540091753006, "learning_rate": 1.4040498199001358e-05, "loss": 0.45440173149108887, "step": 6278 }, { "epoch": 1.1602014066126964, "grad_norm": 0.08714763820171356, "learning_rate": 1.4038673295793747e-05, "loss": 0.5474016666412354, "step": 6279 }, { "epoch": 1.1603861833215923, "grad_norm": 0.06908667832612991, "learning_rate": 1.403684823185958e-05, "loss": 0.45617908239364624, "step": 6280 }, { "epoch": 1.160570960030488, "grad_norm": 0.06304468214511871, "learning_rate": 1.4035023007271478e-05, "loss": 0.37683427333831787, "step": 6281 }, { "epoch": 1.160755736739384, "grad_norm": 0.06093023344874382, "learning_rate": 1.4033197622102084e-05, "loss": 0.40980395674705505, "step": 6282 }, { "epoch": 1.1609405134482798, "grad_norm": 0.056043967604637146, "learning_rate": 1.4031372076424045e-05, "loss": 0.38577473163604736, "step": 6283 }, { "epoch": 1.1611252901571758, "grad_norm": 0.06836773455142975, "learning_rate": 1.402954637031001e-05, "loss": 0.506973922252655, "step": 6284 }, { "epoch": 1.1613100668660716, "grad_norm": 0.07191284000873566, "learning_rate": 1.4027720503832637e-05, "loss": 0.47932422161102295, "step": 6285 }, { "epoch": 1.1614948435749675, "grad_norm": 0.07273761183023453, "learning_rate": 1.4025894477064586e-05, "loss": 0.5129173398017883, "step": 6286 }, { "epoch": 1.1616796202838633, "grad_norm": 0.07798660546541214, "learning_rate": 1.4024068290078531e-05, "loss": 0.6354030966758728, "step": 6287 }, { "epoch": 1.1618643969927591, "grad_norm": 0.08020185679197311, "learning_rate": 1.402224194294715e-05, "loss": 0.5300725698471069, "step": 6288 }, { "epoch": 1.162049173701655, "grad_norm": 0.08034148812294006, "learning_rate": 1.4020415435743121e-05, "loss": 0.5370227098464966, "step": 6289 }, { "epoch": 1.1622339504105508, "grad_norm": 0.09765902161598206, "learning_rate": 1.4018588768539141e-05, "loss": 0.7135058641433716, "step": 6290 }, { "epoch": 1.1624187271194466, "grad_norm": 0.06447859853506088, "learning_rate": 1.4016761941407896e-05, "loss": 0.45083102583885193, "step": 6291 }, { "epoch": 1.1626035038283424, "grad_norm": 0.08970855921506882, "learning_rate": 1.4014934954422093e-05, "loss": 0.6802244782447815, "step": 6292 }, { "epoch": 1.1627882805372383, "grad_norm": 0.0892602801322937, "learning_rate": 1.401310780765444e-05, "loss": 0.607311487197876, "step": 6293 }, { "epoch": 1.162973057246134, "grad_norm": 0.07422207295894623, "learning_rate": 1.4011280501177651e-05, "loss": 0.47422242164611816, "step": 6294 }, { "epoch": 1.16315783395503, "grad_norm": 0.07564323395490646, "learning_rate": 1.400945303506445e-05, "loss": 0.5515903234481812, "step": 6295 }, { "epoch": 1.1633426106639257, "grad_norm": 0.06369898468255997, "learning_rate": 1.400762540938756e-05, "loss": 0.4745848774909973, "step": 6296 }, { "epoch": 1.1635273873728216, "grad_norm": 0.07312898337841034, "learning_rate": 1.4005797624219718e-05, "loss": 0.5416665077209473, "step": 6297 }, { "epoch": 1.1637121640817174, "grad_norm": 0.08416175097227097, "learning_rate": 1.4003969679633664e-05, "loss": 0.5158531665802002, "step": 6298 }, { "epoch": 1.1638969407906132, "grad_norm": 0.082229383289814, "learning_rate": 1.4002141575702141e-05, "loss": 0.5584162473678589, "step": 6299 }, { "epoch": 1.1640817174995093, "grad_norm": 0.06435306370258331, "learning_rate": 1.400031331249791e-05, "loss": 0.4342013895511627, "step": 6300 }, { "epoch": 1.1642664942084051, "grad_norm": 0.06409449130296707, "learning_rate": 1.3998484890093718e-05, "loss": 0.540926992893219, "step": 6301 }, { "epoch": 1.164451270917301, "grad_norm": 0.07323973625898361, "learning_rate": 1.3996656308562341e-05, "loss": 0.45245036482810974, "step": 6302 }, { "epoch": 1.1646360476261968, "grad_norm": 0.06927787512540817, "learning_rate": 1.3994827567976543e-05, "loss": 0.4733739197254181, "step": 6303 }, { "epoch": 1.1648208243350926, "grad_norm": 0.07471513748168945, "learning_rate": 1.3992998668409107e-05, "loss": 0.5356478095054626, "step": 6304 }, { "epoch": 1.1650056010439884, "grad_norm": 0.07582755386829376, "learning_rate": 1.3991169609932822e-05, "loss": 0.5152899026870728, "step": 6305 }, { "epoch": 1.1651903777528843, "grad_norm": 0.08348732441663742, "learning_rate": 1.3989340392620467e-05, "loss": 0.6595236659049988, "step": 6306 }, { "epoch": 1.16537515446178, "grad_norm": 0.08826853334903717, "learning_rate": 1.3987511016544848e-05, "loss": 0.748777449131012, "step": 6307 }, { "epoch": 1.165559931170676, "grad_norm": 0.09934278577566147, "learning_rate": 1.3985681481778766e-05, "loss": 0.6863504648208618, "step": 6308 }, { "epoch": 1.1657447078795717, "grad_norm": 0.06777570396661758, "learning_rate": 1.3983851788395029e-05, "loss": 0.4134644865989685, "step": 6309 }, { "epoch": 1.1659294845884676, "grad_norm": 0.07621151953935623, "learning_rate": 1.3982021936466457e-05, "loss": 0.5733403563499451, "step": 6310 }, { "epoch": 1.1661142612973634, "grad_norm": 0.08632997423410416, "learning_rate": 1.3980191926065869e-05, "loss": 0.5422164797782898, "step": 6311 }, { "epoch": 1.1662990380062592, "grad_norm": 0.09219911694526672, "learning_rate": 1.3978361757266094e-05, "loss": 0.6320534944534302, "step": 6312 }, { "epoch": 1.1664838147151553, "grad_norm": 0.07941587269306183, "learning_rate": 1.3976531430139969e-05, "loss": 0.5548836588859558, "step": 6313 }, { "epoch": 1.166668591424051, "grad_norm": 0.056338176131248474, "learning_rate": 1.3974700944760331e-05, "loss": 0.34804630279541016, "step": 6314 }, { "epoch": 1.166853368132947, "grad_norm": 0.07848487794399261, "learning_rate": 1.3972870301200035e-05, "loss": 0.6259524822235107, "step": 6315 }, { "epoch": 1.1670381448418428, "grad_norm": 0.061018411070108414, "learning_rate": 1.3971039499531926e-05, "loss": 0.44650569558143616, "step": 6316 }, { "epoch": 1.1672229215507386, "grad_norm": 0.07319525629281998, "learning_rate": 1.3969208539828873e-05, "loss": 0.4377747178077698, "step": 6317 }, { "epoch": 1.1674076982596344, "grad_norm": 0.08733868598937988, "learning_rate": 1.3967377422163736e-05, "loss": 0.745111346244812, "step": 6318 }, { "epoch": 1.1675924749685302, "grad_norm": 0.05967577174305916, "learning_rate": 1.3965546146609392e-05, "loss": 0.4343259632587433, "step": 6319 }, { "epoch": 1.167777251677426, "grad_norm": 0.07628301531076431, "learning_rate": 1.3963714713238716e-05, "loss": 0.6616719961166382, "step": 6320 }, { "epoch": 1.167962028386322, "grad_norm": 0.05657539516687393, "learning_rate": 1.3961883122124595e-05, "loss": 0.47218891978263855, "step": 6321 }, { "epoch": 1.1681468050952177, "grad_norm": 0.10010165721178055, "learning_rate": 1.3960051373339922e-05, "loss": 0.695989191532135, "step": 6322 }, { "epoch": 1.1683315818041136, "grad_norm": 0.09093029797077179, "learning_rate": 1.3958219466957595e-05, "loss": 0.6357280611991882, "step": 6323 }, { "epoch": 1.1685163585130094, "grad_norm": 0.07471542060375214, "learning_rate": 1.3956387403050513e-05, "loss": 0.4901159405708313, "step": 6324 }, { "epoch": 1.1687011352219052, "grad_norm": 0.08277177810668945, "learning_rate": 1.3954555181691593e-05, "loss": 0.7276491522789001, "step": 6325 }, { "epoch": 1.168885911930801, "grad_norm": 0.07894638180732727, "learning_rate": 1.3952722802953749e-05, "loss": 0.49660179018974304, "step": 6326 }, { "epoch": 1.1690706886396969, "grad_norm": 0.07644003629684448, "learning_rate": 1.3950890266909902e-05, "loss": 0.497342050075531, "step": 6327 }, { "epoch": 1.1692554653485927, "grad_norm": 0.06979218870401382, "learning_rate": 1.3949057573632984e-05, "loss": 0.5278323292732239, "step": 6328 }, { "epoch": 1.1694402420574885, "grad_norm": 0.08275625109672546, "learning_rate": 1.394722472319593e-05, "loss": 0.6355860829353333, "step": 6329 }, { "epoch": 1.1696250187663846, "grad_norm": 0.08698844164609909, "learning_rate": 1.3945391715671684e-05, "loss": 0.6144800186157227, "step": 6330 }, { "epoch": 1.1698097954752804, "grad_norm": 0.05441611260175705, "learning_rate": 1.3943558551133186e-05, "loss": 0.35613131523132324, "step": 6331 }, { "epoch": 1.1699945721841762, "grad_norm": 0.07020799815654755, "learning_rate": 1.39417252296534e-05, "loss": 0.4802166521549225, "step": 6332 }, { "epoch": 1.170179348893072, "grad_norm": 0.05347013846039772, "learning_rate": 1.3939891751305279e-05, "loss": 0.4263867139816284, "step": 6333 }, { "epoch": 1.170364125601968, "grad_norm": 0.07271816581487656, "learning_rate": 1.3938058116161791e-05, "loss": 0.5226719379425049, "step": 6334 }, { "epoch": 1.1705489023108637, "grad_norm": 0.06094420701265335, "learning_rate": 1.3936224324295918e-05, "loss": 0.37615513801574707, "step": 6335 }, { "epoch": 1.1707336790197596, "grad_norm": 0.07450886070728302, "learning_rate": 1.3934390375780627e-05, "loss": 0.5362197756767273, "step": 6336 }, { "epoch": 1.1709184557286554, "grad_norm": 0.04974592104554176, "learning_rate": 1.3932556270688907e-05, "loss": 0.3268347680568695, "step": 6337 }, { "epoch": 1.1711032324375512, "grad_norm": 0.07232057303190231, "learning_rate": 1.3930722009093751e-05, "loss": 0.4910028576850891, "step": 6338 }, { "epoch": 1.171288009146447, "grad_norm": 0.09514714032411575, "learning_rate": 1.3928887591068158e-05, "loss": 0.594700276851654, "step": 6339 }, { "epoch": 1.1714727858553429, "grad_norm": 0.09708668291568756, "learning_rate": 1.3927053016685132e-05, "loss": 0.6765315532684326, "step": 6340 }, { "epoch": 1.1716575625642387, "grad_norm": 0.06906653195619583, "learning_rate": 1.3925218286017679e-05, "loss": 0.4455786943435669, "step": 6341 }, { "epoch": 1.1718423392731345, "grad_norm": 0.0670214593410492, "learning_rate": 1.3923383399138821e-05, "loss": 0.33591046929359436, "step": 6342 }, { "epoch": 1.1720271159820306, "grad_norm": 0.04764154553413391, "learning_rate": 1.3921548356121577e-05, "loss": 0.36550554633140564, "step": 6343 }, { "epoch": 1.1722118926909264, "grad_norm": 0.06641551852226257, "learning_rate": 1.3919713157038977e-05, "loss": 0.4700031876564026, "step": 6344 }, { "epoch": 1.1723966693998222, "grad_norm": 0.0555669404566288, "learning_rate": 1.3917877801964059e-05, "loss": 0.39579448103904724, "step": 6345 }, { "epoch": 1.172581446108718, "grad_norm": 0.07697714865207672, "learning_rate": 1.3916042290969863e-05, "loss": 0.5403766632080078, "step": 6346 }, { "epoch": 1.172766222817614, "grad_norm": 0.0931277722120285, "learning_rate": 1.391420662412943e-05, "loss": 0.6140669584274292, "step": 6347 }, { "epoch": 1.1729509995265097, "grad_norm": 0.07469156384468079, "learning_rate": 1.3912370801515821e-05, "loss": 0.47297799587249756, "step": 6348 }, { "epoch": 1.1731357762354055, "grad_norm": 0.0785590186715126, "learning_rate": 1.39105348232021e-05, "loss": 0.6158168911933899, "step": 6349 }, { "epoch": 1.1733205529443014, "grad_norm": 0.07559805363416672, "learning_rate": 1.3908698689261322e-05, "loss": 0.6002750396728516, "step": 6350 }, { "epoch": 1.1735053296531972, "grad_norm": 0.08132782578468323, "learning_rate": 1.3906862399766566e-05, "loss": 0.5227487683296204, "step": 6351 }, { "epoch": 1.173690106362093, "grad_norm": 0.08013526350259781, "learning_rate": 1.390502595479091e-05, "loss": 0.5677516460418701, "step": 6352 }, { "epoch": 1.1738748830709889, "grad_norm": 0.059739675372838974, "learning_rate": 1.3903189354407438e-05, "loss": 0.49403083324432373, "step": 6353 }, { "epoch": 1.1740596597798847, "grad_norm": 0.07766123861074448, "learning_rate": 1.3901352598689239e-05, "loss": 0.5616611242294312, "step": 6354 }, { "epoch": 1.1742444364887805, "grad_norm": 0.08889677375555038, "learning_rate": 1.3899515687709415e-05, "loss": 0.6060358285903931, "step": 6355 }, { "epoch": 1.1744292131976763, "grad_norm": 0.08227483183145523, "learning_rate": 1.3897678621541068e-05, "loss": 0.5569694638252258, "step": 6356 }, { "epoch": 1.1746139899065722, "grad_norm": 0.062437236309051514, "learning_rate": 1.3895841400257302e-05, "loss": 0.4091362953186035, "step": 6357 }, { "epoch": 1.174798766615468, "grad_norm": 0.06542489677667618, "learning_rate": 1.3894004023931241e-05, "loss": 0.3848593235015869, "step": 6358 }, { "epoch": 1.174983543324364, "grad_norm": 0.07575369626283646, "learning_rate": 1.3892166492636001e-05, "loss": 0.6310125589370728, "step": 6359 }, { "epoch": 1.1751683200332599, "grad_norm": 0.0725938156247139, "learning_rate": 1.389032880644471e-05, "loss": 0.41989418864250183, "step": 6360 }, { "epoch": 1.1753530967421557, "grad_norm": 0.05446556583046913, "learning_rate": 1.3888490965430505e-05, "loss": 0.31907030940055847, "step": 6361 }, { "epoch": 1.1755378734510515, "grad_norm": 0.08280963450670242, "learning_rate": 1.3886652969666525e-05, "loss": 0.5975019335746765, "step": 6362 }, { "epoch": 1.1757226501599474, "grad_norm": 0.09536924213171005, "learning_rate": 1.3884814819225917e-05, "loss": 0.7690222263336182, "step": 6363 }, { "epoch": 1.1759074268688432, "grad_norm": 0.06217603012919426, "learning_rate": 1.3882976514181832e-05, "loss": 0.41438350081443787, "step": 6364 }, { "epoch": 1.176092203577739, "grad_norm": 0.06440955400466919, "learning_rate": 1.3881138054607432e-05, "loss": 0.37238502502441406, "step": 6365 }, { "epoch": 1.1762769802866349, "grad_norm": 0.09802161902189255, "learning_rate": 1.387929944057588e-05, "loss": 0.5374606847763062, "step": 6366 }, { "epoch": 1.1764617569955307, "grad_norm": 0.06957017630338669, "learning_rate": 1.3877460672160345e-05, "loss": 0.4594106376171112, "step": 6367 }, { "epoch": 1.1766465337044265, "grad_norm": 0.07976226508617401, "learning_rate": 1.3875621749434007e-05, "loss": 0.45554158091545105, "step": 6368 }, { "epoch": 1.1768313104133223, "grad_norm": 0.08345699310302734, "learning_rate": 1.3873782672470051e-05, "loss": 0.6796742677688599, "step": 6369 }, { "epoch": 1.1770160871222182, "grad_norm": 0.07811243087053299, "learning_rate": 1.387194344134166e-05, "loss": 0.4833604395389557, "step": 6370 }, { "epoch": 1.177200863831114, "grad_norm": 0.08850207924842834, "learning_rate": 1.3870104056122035e-05, "loss": 0.657397985458374, "step": 6371 }, { "epoch": 1.17738564054001, "grad_norm": 0.10178535431623459, "learning_rate": 1.386826451688438e-05, "loss": 0.6059274077415466, "step": 6372 }, { "epoch": 1.1775704172489059, "grad_norm": 0.06804938614368439, "learning_rate": 1.3866424823701895e-05, "loss": 0.5423354506492615, "step": 6373 }, { "epoch": 1.1777551939578017, "grad_norm": 0.06546375900506973, "learning_rate": 1.38645849766478e-05, "loss": 0.4411933422088623, "step": 6374 }, { "epoch": 1.1779399706666975, "grad_norm": 0.12013743817806244, "learning_rate": 1.3862744975795315e-05, "loss": 0.8030775189399719, "step": 6375 }, { "epoch": 1.1781247473755934, "grad_norm": 0.06903454661369324, "learning_rate": 1.3860904821217664e-05, "loss": 0.4790995717048645, "step": 6376 }, { "epoch": 1.1783095240844892, "grad_norm": 0.09132442623376846, "learning_rate": 1.385906451298808e-05, "loss": 0.5514641404151917, "step": 6377 }, { "epoch": 1.178494300793385, "grad_norm": 0.06411786377429962, "learning_rate": 1.3857224051179803e-05, "loss": 0.45666056871414185, "step": 6378 }, { "epoch": 1.1786790775022808, "grad_norm": 0.07586948573589325, "learning_rate": 1.3855383435866076e-05, "loss": 0.5549495220184326, "step": 6379 }, { "epoch": 1.1788638542111767, "grad_norm": 0.06605006754398346, "learning_rate": 1.3853542667120148e-05, "loss": 0.51011723279953, "step": 6380 }, { "epoch": 1.1790486309200725, "grad_norm": 0.08502263575792313, "learning_rate": 1.385170174501528e-05, "loss": 0.7431524395942688, "step": 6381 }, { "epoch": 1.1792334076289683, "grad_norm": 0.06498820334672928, "learning_rate": 1.3849860669624736e-05, "loss": 0.4389677047729492, "step": 6382 }, { "epoch": 1.1794181843378642, "grad_norm": 0.07702399045228958, "learning_rate": 1.3848019441021775e-05, "loss": 0.5601319074630737, "step": 6383 }, { "epoch": 1.17960296104676, "grad_norm": 0.08520006388425827, "learning_rate": 1.3846178059279685e-05, "loss": 0.7258798480033875, "step": 6384 }, { "epoch": 1.1797877377556558, "grad_norm": 0.0709204450249672, "learning_rate": 1.3844336524471738e-05, "loss": 0.451447069644928, "step": 6385 }, { "epoch": 1.1799725144645516, "grad_norm": 0.06795123964548111, "learning_rate": 1.3842494836671227e-05, "loss": 0.4453830122947693, "step": 6386 }, { "epoch": 1.1801572911734475, "grad_norm": 0.067714162170887, "learning_rate": 1.3840652995951443e-05, "loss": 0.4020233452320099, "step": 6387 }, { "epoch": 1.1803420678823435, "grad_norm": 0.0798187404870987, "learning_rate": 1.3838811002385684e-05, "loss": 0.5517411828041077, "step": 6388 }, { "epoch": 1.1805268445912394, "grad_norm": 0.08496396243572235, "learning_rate": 1.3836968856047259e-05, "loss": 0.5635435581207275, "step": 6389 }, { "epoch": 1.1807116213001352, "grad_norm": 0.07240577042102814, "learning_rate": 1.3835126557009474e-05, "loss": 0.4667511284351349, "step": 6390 }, { "epoch": 1.180896398009031, "grad_norm": 0.06981519609689713, "learning_rate": 1.3833284105345657e-05, "loss": 0.5124683380126953, "step": 6391 }, { "epoch": 1.1810811747179268, "grad_norm": 0.06850245594978333, "learning_rate": 1.3831441501129122e-05, "loss": 0.4211080074310303, "step": 6392 }, { "epoch": 1.1812659514268227, "grad_norm": 0.09302947670221329, "learning_rate": 1.3829598744433202e-05, "loss": 0.6746656894683838, "step": 6393 }, { "epoch": 1.1814507281357185, "grad_norm": 0.07104253768920898, "learning_rate": 1.3827755835331233e-05, "loss": 0.4650229811668396, "step": 6394 }, { "epoch": 1.1816355048446143, "grad_norm": 0.07435964047908783, "learning_rate": 1.3825912773896557e-05, "loss": 0.47008681297302246, "step": 6395 }, { "epoch": 1.1818202815535102, "grad_norm": 0.09043249487876892, "learning_rate": 1.3824069560202525e-05, "loss": 0.5703387260437012, "step": 6396 }, { "epoch": 1.182005058262406, "grad_norm": 0.06266016513109207, "learning_rate": 1.3822226194322486e-05, "loss": 0.500885546207428, "step": 6397 }, { "epoch": 1.1821898349713018, "grad_norm": 0.07461611926555634, "learning_rate": 1.3820382676329803e-05, "loss": 0.5794101357460022, "step": 6398 }, { "epoch": 1.1823746116801976, "grad_norm": 0.05493578687310219, "learning_rate": 1.3818539006297842e-05, "loss": 0.3821215331554413, "step": 6399 }, { "epoch": 1.1825593883890935, "grad_norm": 0.06709956377744675, "learning_rate": 1.3816695184299976e-05, "loss": 0.45987358689308167, "step": 6400 }, { "epoch": 1.1827441650979895, "grad_norm": 0.06329990178346634, "learning_rate": 1.381485121040958e-05, "loss": 0.44406333565711975, "step": 6401 }, { "epoch": 1.1829289418068853, "grad_norm": 0.07872890681028366, "learning_rate": 1.3813007084700043e-05, "loss": 0.5449680089950562, "step": 6402 }, { "epoch": 1.1831137185157812, "grad_norm": 0.07600017637014389, "learning_rate": 1.3811162807244754e-05, "loss": 0.5501166582107544, "step": 6403 }, { "epoch": 1.183298495224677, "grad_norm": 0.09407585114240646, "learning_rate": 1.380931837811711e-05, "loss": 0.7043866515159607, "step": 6404 }, { "epoch": 1.1834832719335728, "grad_norm": 0.06222414970397949, "learning_rate": 1.3807473797390509e-05, "loss": 0.35799136757850647, "step": 6405 }, { "epoch": 1.1836680486424687, "grad_norm": 0.06174860894680023, "learning_rate": 1.3805629065138365e-05, "loss": 0.510539174079895, "step": 6406 }, { "epoch": 1.1838528253513645, "grad_norm": 0.07312752306461334, "learning_rate": 1.380378418143409e-05, "loss": 0.5282796621322632, "step": 6407 }, { "epoch": 1.1840376020602603, "grad_norm": 0.08893660455942154, "learning_rate": 1.3801939146351107e-05, "loss": 0.5515533685684204, "step": 6408 }, { "epoch": 1.1842223787691561, "grad_norm": 0.0776859000325203, "learning_rate": 1.3800093959962837e-05, "loss": 0.6523489952087402, "step": 6409 }, { "epoch": 1.184407155478052, "grad_norm": 0.055580753833055496, "learning_rate": 1.3798248622342719e-05, "loss": 0.38817736506462097, "step": 6410 }, { "epoch": 1.1845919321869478, "grad_norm": 0.09317143261432648, "learning_rate": 1.3796403133564187e-05, "loss": 0.6648978590965271, "step": 6411 }, { "epoch": 1.1847767088958436, "grad_norm": 0.07249646633863449, "learning_rate": 1.379455749370069e-05, "loss": 0.4550534784793854, "step": 6412 }, { "epoch": 1.1849614856047395, "grad_norm": 0.08754712343215942, "learning_rate": 1.3792711702825674e-05, "loss": 0.7931678295135498, "step": 6413 }, { "epoch": 1.1851462623136353, "grad_norm": 0.09442269057035446, "learning_rate": 1.3790865761012599e-05, "loss": 0.6443036198616028, "step": 6414 }, { "epoch": 1.1853310390225311, "grad_norm": 0.06372959166765213, "learning_rate": 1.3789019668334928e-05, "loss": 0.4782651960849762, "step": 6415 }, { "epoch": 1.185515815731427, "grad_norm": 0.06049873307347298, "learning_rate": 1.3787173424866128e-05, "loss": 0.4206707179546356, "step": 6416 }, { "epoch": 1.1857005924403228, "grad_norm": 0.06669747084379196, "learning_rate": 1.3785327030679674e-05, "loss": 0.47748395800590515, "step": 6417 }, { "epoch": 1.1858853691492188, "grad_norm": 0.07769818603992462, "learning_rate": 1.3783480485849049e-05, "loss": 0.44561567902565, "step": 6418 }, { "epoch": 1.1860701458581147, "grad_norm": 0.07723916321992874, "learning_rate": 1.3781633790447733e-05, "loss": 0.44810453057289124, "step": 6419 }, { "epoch": 1.1862549225670105, "grad_norm": 0.09011591970920563, "learning_rate": 1.3779786944549224e-05, "loss": 0.5518773198127747, "step": 6420 }, { "epoch": 1.1864396992759063, "grad_norm": 0.06609180569648743, "learning_rate": 1.3777939948227024e-05, "loss": 0.48049649596214294, "step": 6421 }, { "epoch": 1.1866244759848021, "grad_norm": 0.08514054864645004, "learning_rate": 1.377609280155463e-05, "loss": 0.6176885366439819, "step": 6422 }, { "epoch": 1.186809252693698, "grad_norm": 0.08028165996074677, "learning_rate": 1.377424550460556e-05, "loss": 0.5609257221221924, "step": 6423 }, { "epoch": 1.1869940294025938, "grad_norm": 0.08063492923974991, "learning_rate": 1.3772398057453325e-05, "loss": 0.6729696989059448, "step": 6424 }, { "epoch": 1.1871788061114896, "grad_norm": 0.06459198147058487, "learning_rate": 1.377055046017145e-05, "loss": 0.540610134601593, "step": 6425 }, { "epoch": 1.1873635828203855, "grad_norm": 0.07490245252847672, "learning_rate": 1.3768702712833461e-05, "loss": 0.5625375509262085, "step": 6426 }, { "epoch": 1.1875483595292813, "grad_norm": 0.07173041999340057, "learning_rate": 1.3766854815512897e-05, "loss": 0.4706256091594696, "step": 6427 }, { "epoch": 1.1877331362381771, "grad_norm": 0.0744413509964943, "learning_rate": 1.3765006768283297e-05, "loss": 0.5037400126457214, "step": 6428 }, { "epoch": 1.187917912947073, "grad_norm": 0.08372724056243896, "learning_rate": 1.3763158571218205e-05, "loss": 0.6533783674240112, "step": 6429 }, { "epoch": 1.1881026896559688, "grad_norm": 0.07452093809843063, "learning_rate": 1.3761310224391176e-05, "loss": 0.45203134417533875, "step": 6430 }, { "epoch": 1.1882874663648648, "grad_norm": 0.06394612044095993, "learning_rate": 1.3759461727875768e-05, "loss": 0.3614550232887268, "step": 6431 }, { "epoch": 1.1884722430737606, "grad_norm": 0.06935583800077438, "learning_rate": 1.3757613081745546e-05, "loss": 0.4750102758407593, "step": 6432 }, { "epoch": 1.1886570197826565, "grad_norm": 0.07817787677049637, "learning_rate": 1.3755764286074076e-05, "loss": 0.5745018124580383, "step": 6433 }, { "epoch": 1.1888417964915523, "grad_norm": 0.0575583279132843, "learning_rate": 1.375391534093494e-05, "loss": 0.3608076274394989, "step": 6434 }, { "epoch": 1.1890265732004481, "grad_norm": 0.06657610833644867, "learning_rate": 1.375206624640172e-05, "loss": 0.5197997689247131, "step": 6435 }, { "epoch": 1.189211349909344, "grad_norm": 0.05878225713968277, "learning_rate": 1.3750217002547998e-05, "loss": 0.44958725571632385, "step": 6436 }, { "epoch": 1.1893961266182398, "grad_norm": 0.086529441177845, "learning_rate": 1.3748367609447375e-05, "loss": 0.5028765201568604, "step": 6437 }, { "epoch": 1.1895809033271356, "grad_norm": 0.07351250946521759, "learning_rate": 1.3746518067173449e-05, "loss": 0.5214920043945312, "step": 6438 }, { "epoch": 1.1897656800360314, "grad_norm": 0.06650044023990631, "learning_rate": 1.3744668375799823e-05, "loss": 0.4056544899940491, "step": 6439 }, { "epoch": 1.1899504567449273, "grad_norm": 0.0780612975358963, "learning_rate": 1.3742818535400111e-05, "loss": 0.5858096480369568, "step": 6440 }, { "epoch": 1.190135233453823, "grad_norm": 0.09381809085607529, "learning_rate": 1.3740968546047935e-05, "loss": 0.7084423899650574, "step": 6441 }, { "epoch": 1.190320010162719, "grad_norm": 0.08907200396060944, "learning_rate": 1.3739118407816912e-05, "loss": 0.6315073370933533, "step": 6442 }, { "epoch": 1.1905047868716148, "grad_norm": 0.07788009196519852, "learning_rate": 1.3737268120780671e-05, "loss": 0.5950636863708496, "step": 6443 }, { "epoch": 1.1906895635805106, "grad_norm": 0.08506182581186295, "learning_rate": 1.3735417685012857e-05, "loss": 0.6485891342163086, "step": 6444 }, { "epoch": 1.1908743402894064, "grad_norm": 0.06447650492191315, "learning_rate": 1.3733567100587104e-05, "loss": 0.4180641174316406, "step": 6445 }, { "epoch": 1.1910591169983022, "grad_norm": 0.08556390553712845, "learning_rate": 1.3731716367577059e-05, "loss": 0.5860201716423035, "step": 6446 }, { "epoch": 1.1912438937071983, "grad_norm": 0.07690877467393875, "learning_rate": 1.372986548605638e-05, "loss": 0.5545225143432617, "step": 6447 }, { "epoch": 1.1914286704160941, "grad_norm": 0.08372151106595993, "learning_rate": 1.3728014456098724e-05, "loss": 0.5921500325202942, "step": 6448 }, { "epoch": 1.19161344712499, "grad_norm": 0.06034170463681221, "learning_rate": 1.3726163277777755e-05, "loss": 0.5056084990501404, "step": 6449 }, { "epoch": 1.1917982238338858, "grad_norm": 0.06315089762210846, "learning_rate": 1.3724311951167144e-05, "loss": 0.5258921384811401, "step": 6450 }, { "epoch": 1.1919830005427816, "grad_norm": 0.07752560079097748, "learning_rate": 1.372246047634057e-05, "loss": 0.5884163975715637, "step": 6451 }, { "epoch": 1.1921677772516774, "grad_norm": 0.08313777297735214, "learning_rate": 1.3720608853371719e-05, "loss": 0.5612613558769226, "step": 6452 }, { "epoch": 1.1923525539605733, "grad_norm": 0.08458501100540161, "learning_rate": 1.3718757082334268e-05, "loss": 0.640018880367279, "step": 6453 }, { "epoch": 1.192537330669469, "grad_norm": 0.06649594753980637, "learning_rate": 1.3716905163301928e-05, "loss": 0.4684979319572449, "step": 6454 }, { "epoch": 1.192722107378365, "grad_norm": 0.07032130658626556, "learning_rate": 1.3715053096348387e-05, "loss": 0.47029954195022583, "step": 6455 }, { "epoch": 1.1929068840872608, "grad_norm": 0.07311615347862244, "learning_rate": 1.3713200881547357e-05, "loss": 0.43086791038513184, "step": 6456 }, { "epoch": 1.1930916607961566, "grad_norm": 0.07677126675844193, "learning_rate": 1.3711348518972547e-05, "loss": 0.542203962802887, "step": 6457 }, { "epoch": 1.1932764375050524, "grad_norm": 0.06928981095552444, "learning_rate": 1.370949600869768e-05, "loss": 0.48314666748046875, "step": 6458 }, { "epoch": 1.1934612142139482, "grad_norm": 0.08782751858234406, "learning_rate": 1.3707643350796476e-05, "loss": 0.6548619270324707, "step": 6459 }, { "epoch": 1.1936459909228443, "grad_norm": 0.0846259817481041, "learning_rate": 1.3705790545342664e-05, "loss": 0.5844980478286743, "step": 6460 }, { "epoch": 1.1938307676317401, "grad_norm": 0.07081461697816849, "learning_rate": 1.3703937592409985e-05, "loss": 0.544660210609436, "step": 6461 }, { "epoch": 1.194015544340636, "grad_norm": 0.07676222175359726, "learning_rate": 1.3702084492072181e-05, "loss": 0.5665822625160217, "step": 6462 }, { "epoch": 1.1942003210495318, "grad_norm": 0.07171925157308578, "learning_rate": 1.3700231244402988e-05, "loss": 0.564756453037262, "step": 6463 }, { "epoch": 1.1943850977584276, "grad_norm": 0.06867315620183945, "learning_rate": 1.3698377849476176e-05, "loss": 0.4594693183898926, "step": 6464 }, { "epoch": 1.1945698744673234, "grad_norm": 0.0693492516875267, "learning_rate": 1.3696524307365496e-05, "loss": 0.5229794979095459, "step": 6465 }, { "epoch": 1.1947546511762193, "grad_norm": 0.08138994872570038, "learning_rate": 1.3694670618144708e-05, "loss": 0.7047170400619507, "step": 6466 }, { "epoch": 1.194939427885115, "grad_norm": 0.07865667343139648, "learning_rate": 1.369281678188759e-05, "loss": 0.5625156164169312, "step": 6467 }, { "epoch": 1.195124204594011, "grad_norm": 0.06311140954494476, "learning_rate": 1.3690962798667921e-05, "loss": 0.45640701055526733, "step": 6468 }, { "epoch": 1.1953089813029067, "grad_norm": 0.0694119855761528, "learning_rate": 1.3689108668559476e-05, "loss": 0.4804447293281555, "step": 6469 }, { "epoch": 1.1954937580118026, "grad_norm": 0.08370231091976166, "learning_rate": 1.3687254391636052e-05, "loss": 0.4979509115219116, "step": 6470 }, { "epoch": 1.1956785347206984, "grad_norm": 0.07539594173431396, "learning_rate": 1.3685399967971436e-05, "loss": 0.5329857468605042, "step": 6471 }, { "epoch": 1.1958633114295942, "grad_norm": 0.06413303315639496, "learning_rate": 1.3683545397639433e-05, "loss": 0.3933248519897461, "step": 6472 }, { "epoch": 1.19604808813849, "grad_norm": 0.08125191926956177, "learning_rate": 1.3681690680713846e-05, "loss": 0.47538602352142334, "step": 6473 }, { "epoch": 1.196232864847386, "grad_norm": 0.0841967985033989, "learning_rate": 1.367983581726849e-05, "loss": 0.5319108366966248, "step": 6474 }, { "epoch": 1.1964176415562817, "grad_norm": 0.07825710624456406, "learning_rate": 1.3677980807377181e-05, "loss": 0.5691261887550354, "step": 6475 }, { "epoch": 1.1966024182651778, "grad_norm": 0.06753269582986832, "learning_rate": 1.3676125651113741e-05, "loss": 0.48817798495292664, "step": 6476 }, { "epoch": 1.1967871949740736, "grad_norm": 0.07528755813837051, "learning_rate": 1.3674270348552001e-05, "loss": 0.5107649564743042, "step": 6477 }, { "epoch": 1.1969719716829694, "grad_norm": 0.07238613069057465, "learning_rate": 1.36724148997658e-05, "loss": 0.5391866564750671, "step": 6478 }, { "epoch": 1.1971567483918653, "grad_norm": 0.061458688229322433, "learning_rate": 1.3670559304828972e-05, "loss": 0.4981677532196045, "step": 6479 }, { "epoch": 1.197341525100761, "grad_norm": 0.08438178896903992, "learning_rate": 1.366870356381537e-05, "loss": 0.6701396703720093, "step": 6480 }, { "epoch": 1.197526301809657, "grad_norm": 0.06598415970802307, "learning_rate": 1.3666847676798842e-05, "loss": 0.5153096914291382, "step": 6481 }, { "epoch": 1.1977110785185527, "grad_norm": 0.0857049971818924, "learning_rate": 1.3664991643853251e-05, "loss": 0.5699812173843384, "step": 6482 }, { "epoch": 1.1978958552274486, "grad_norm": 0.05555878207087517, "learning_rate": 1.3663135465052457e-05, "loss": 0.45247071981430054, "step": 6483 }, { "epoch": 1.1980806319363444, "grad_norm": 0.10718713700771332, "learning_rate": 1.3661279140470331e-05, "loss": 0.6991795897483826, "step": 6484 }, { "epoch": 1.1982654086452402, "grad_norm": 0.08743496984243393, "learning_rate": 1.3659422670180754e-05, "loss": 0.4683980345726013, "step": 6485 }, { "epoch": 1.198450185354136, "grad_norm": 0.06674148142337799, "learning_rate": 1.3657566054257598e-05, "loss": 0.4011494517326355, "step": 6486 }, { "epoch": 1.1986349620630319, "grad_norm": 0.06291612982749939, "learning_rate": 1.365570929277476e-05, "loss": 0.41058915853500366, "step": 6487 }, { "epoch": 1.1988197387719277, "grad_norm": 0.08688245713710785, "learning_rate": 1.3653852385806128e-05, "loss": 0.5843929648399353, "step": 6488 }, { "epoch": 1.1990045154808238, "grad_norm": 0.07869403064250946, "learning_rate": 1.36519953334256e-05, "loss": 0.5506920218467712, "step": 6489 }, { "epoch": 1.1991892921897196, "grad_norm": 0.06990286707878113, "learning_rate": 1.365013813570709e-05, "loss": 0.4719213545322418, "step": 6490 }, { "epoch": 1.1993740688986154, "grad_norm": 0.07826852798461914, "learning_rate": 1.3648280792724496e-05, "loss": 0.5762194991111755, "step": 6491 }, { "epoch": 1.1995588456075112, "grad_norm": 0.058163248002529144, "learning_rate": 1.3646423304551743e-05, "loss": 0.38399115204811096, "step": 6492 }, { "epoch": 1.199743622316407, "grad_norm": 0.08161134272813797, "learning_rate": 1.364456567126275e-05, "loss": 0.7120457887649536, "step": 6493 }, { "epoch": 1.199928399025303, "grad_norm": 0.07929819822311401, "learning_rate": 1.3642707892931447e-05, "loss": 0.5719782710075378, "step": 6494 }, { "epoch": 1.2001131757341987, "grad_norm": 0.08201904594898224, "learning_rate": 1.3640849969631765e-05, "loss": 0.5495536923408508, "step": 6495 }, { "epoch": 1.2002979524430946, "grad_norm": 0.07408961653709412, "learning_rate": 1.3638991901437644e-05, "loss": 0.43150046467781067, "step": 6496 }, { "epoch": 1.2004827291519904, "grad_norm": 0.08079312741756439, "learning_rate": 1.3637133688423032e-05, "loss": 0.5508987903594971, "step": 6497 }, { "epoch": 1.2006675058608862, "grad_norm": 0.0907614529132843, "learning_rate": 1.3635275330661877e-05, "loss": 0.8658058643341064, "step": 6498 }, { "epoch": 1.200852282569782, "grad_norm": 0.08234362304210663, "learning_rate": 1.3633416828228136e-05, "loss": 0.5761150121688843, "step": 6499 }, { "epoch": 1.2010370592786779, "grad_norm": 0.06498465687036514, "learning_rate": 1.3631558181195779e-05, "loss": 0.4183274805545807, "step": 6500 }, { "epoch": 1.2010370592786779, "eval_loss": 0.6012307405471802, "eval_runtime": 157.8572, "eval_samples_per_second": 115.478, "eval_steps_per_second": 14.437, "step": 6500 }, { "epoch": 1.2012218359875737, "grad_norm": 0.05880355462431908, "learning_rate": 1.3629699389638762e-05, "loss": 0.41759729385375977, "step": 6501 }, { "epoch": 1.2014066126964695, "grad_norm": 0.09420034289360046, "learning_rate": 1.3627840453631068e-05, "loss": 0.5877329707145691, "step": 6502 }, { "epoch": 1.2015913894053654, "grad_norm": 0.05163704231381416, "learning_rate": 1.362598137324667e-05, "loss": 0.3486137390136719, "step": 6503 }, { "epoch": 1.2017761661142612, "grad_norm": 0.08603861182928085, "learning_rate": 1.3624122148559563e-05, "loss": 0.5939251780509949, "step": 6504 }, { "epoch": 1.201960942823157, "grad_norm": 0.07779145240783691, "learning_rate": 1.362226277964373e-05, "loss": 0.5520979762077332, "step": 6505 }, { "epoch": 1.202145719532053, "grad_norm": 0.07623668015003204, "learning_rate": 1.362040326657317e-05, "loss": 0.6112082004547119, "step": 6506 }, { "epoch": 1.202330496240949, "grad_norm": 0.06870964169502258, "learning_rate": 1.361854360942189e-05, "loss": 0.4941728115081787, "step": 6507 }, { "epoch": 1.2025152729498447, "grad_norm": 0.08155199885368347, "learning_rate": 1.3616683808263893e-05, "loss": 0.6787418723106384, "step": 6508 }, { "epoch": 1.2027000496587406, "grad_norm": 0.06495072692632675, "learning_rate": 1.3614823863173194e-05, "loss": 0.474793404340744, "step": 6509 }, { "epoch": 1.2028848263676364, "grad_norm": 0.07195496559143066, "learning_rate": 1.361296377422382e-05, "loss": 0.5775393843650818, "step": 6510 }, { "epoch": 1.2030696030765322, "grad_norm": 0.08239276707172394, "learning_rate": 1.3611103541489787e-05, "loss": 0.47624650597572327, "step": 6511 }, { "epoch": 1.203254379785428, "grad_norm": 0.060199178755283356, "learning_rate": 1.3609243165045131e-05, "loss": 0.39695146679878235, "step": 6512 }, { "epoch": 1.2034391564943239, "grad_norm": 0.0860724076628685, "learning_rate": 1.3607382644963888e-05, "loss": 0.5685034990310669, "step": 6513 }, { "epoch": 1.2036239332032197, "grad_norm": 0.07247007638216019, "learning_rate": 1.3605521981320107e-05, "loss": 0.4510326683521271, "step": 6514 }, { "epoch": 1.2038087099121155, "grad_norm": 0.10170851647853851, "learning_rate": 1.3603661174187828e-05, "loss": 0.8736907243728638, "step": 6515 }, { "epoch": 1.2039934866210114, "grad_norm": 0.07688196003437042, "learning_rate": 1.3601800223641107e-05, "loss": 0.5343433022499084, "step": 6516 }, { "epoch": 1.2041782633299072, "grad_norm": 0.0692083016037941, "learning_rate": 1.3599939129754008e-05, "loss": 0.44495779275894165, "step": 6517 }, { "epoch": 1.204363040038803, "grad_norm": 0.10107952356338501, "learning_rate": 1.3598077892600592e-05, "loss": 0.720582902431488, "step": 6518 }, { "epoch": 1.204547816747699, "grad_norm": 0.06074807047843933, "learning_rate": 1.3596216512254934e-05, "loss": 0.46574512124061584, "step": 6519 }, { "epoch": 1.204732593456595, "grad_norm": 0.06732956320047379, "learning_rate": 1.3594354988791111e-05, "loss": 0.5194476246833801, "step": 6520 }, { "epoch": 1.2049173701654907, "grad_norm": 0.07385317981243134, "learning_rate": 1.3592493322283207e-05, "loss": 0.5754207968711853, "step": 6521 }, { "epoch": 1.2051021468743865, "grad_norm": 0.08071117848157883, "learning_rate": 1.3590631512805303e-05, "loss": 0.5599603652954102, "step": 6522 }, { "epoch": 1.2052869235832824, "grad_norm": 0.0781562328338623, "learning_rate": 1.35887695604315e-05, "loss": 0.48199522495269775, "step": 6523 }, { "epoch": 1.2054717002921782, "grad_norm": 0.044577743858098984, "learning_rate": 1.3586907465235898e-05, "loss": 0.2852509319782257, "step": 6524 }, { "epoch": 1.205656477001074, "grad_norm": 0.08220332860946655, "learning_rate": 1.3585045227292598e-05, "loss": 0.5416831374168396, "step": 6525 }, { "epoch": 1.2058412537099699, "grad_norm": 0.07076594978570938, "learning_rate": 1.3583182846675716e-05, "loss": 0.5500925779342651, "step": 6526 }, { "epoch": 1.2060260304188657, "grad_norm": 0.07169757783412933, "learning_rate": 1.3581320323459368e-05, "loss": 0.45242324471473694, "step": 6527 }, { "epoch": 1.2062108071277615, "grad_norm": 0.06065558269619942, "learning_rate": 1.3579457657717673e-05, "loss": 0.41949501633644104, "step": 6528 }, { "epoch": 1.2063955838366573, "grad_norm": 0.07661020010709763, "learning_rate": 1.3577594849524765e-05, "loss": 0.4991658627986908, "step": 6529 }, { "epoch": 1.2065803605455532, "grad_norm": 0.08028572052717209, "learning_rate": 1.3575731898954774e-05, "loss": 0.5469301342964172, "step": 6530 }, { "epoch": 1.206765137254449, "grad_norm": 0.07623415440320969, "learning_rate": 1.357386880608184e-05, "loss": 0.6443749666213989, "step": 6531 }, { "epoch": 1.2069499139633448, "grad_norm": 0.07754608243703842, "learning_rate": 1.3572005570980109e-05, "loss": 0.5259369015693665, "step": 6532 }, { "epoch": 1.2071346906722407, "grad_norm": 0.06529566645622253, "learning_rate": 1.357014219372373e-05, "loss": 0.4533982574939728, "step": 6533 }, { "epoch": 1.2073194673811365, "grad_norm": 0.051948487758636475, "learning_rate": 1.3568278674386863e-05, "loss": 0.3756308853626251, "step": 6534 }, { "epoch": 1.2075042440900325, "grad_norm": 0.06701385974884033, "learning_rate": 1.3566415013043667e-05, "loss": 0.4647313952445984, "step": 6535 }, { "epoch": 1.2076890207989284, "grad_norm": 0.05771351978182793, "learning_rate": 1.3564551209768312e-05, "loss": 0.4375614821910858, "step": 6536 }, { "epoch": 1.2078737975078242, "grad_norm": 0.07962189614772797, "learning_rate": 1.3562687264634972e-05, "loss": 0.5191725492477417, "step": 6537 }, { "epoch": 1.20805857421672, "grad_norm": 0.08615492284297943, "learning_rate": 1.3560823177717826e-05, "loss": 0.651888906955719, "step": 6538 }, { "epoch": 1.2082433509256159, "grad_norm": 0.07054128497838974, "learning_rate": 1.3558958949091055e-05, "loss": 0.48627346754074097, "step": 6539 }, { "epoch": 1.2084281276345117, "grad_norm": 0.07507564127445221, "learning_rate": 1.3557094578828853e-05, "loss": 0.6051132082939148, "step": 6540 }, { "epoch": 1.2086129043434075, "grad_norm": 0.07555394619703293, "learning_rate": 1.3555230067005418e-05, "loss": 0.6303532123565674, "step": 6541 }, { "epoch": 1.2087976810523033, "grad_norm": 0.06445108354091644, "learning_rate": 1.3553365413694946e-05, "loss": 0.4361160099506378, "step": 6542 }, { "epoch": 1.2089824577611992, "grad_norm": 0.058598194271326065, "learning_rate": 1.355150061897165e-05, "loss": 0.43637320399284363, "step": 6543 }, { "epoch": 1.209167234470095, "grad_norm": 0.08800679445266724, "learning_rate": 1.3549635682909738e-05, "loss": 0.6950803995132446, "step": 6544 }, { "epoch": 1.2093520111789908, "grad_norm": 0.07335197180509567, "learning_rate": 1.3547770605583433e-05, "loss": 0.5119472742080688, "step": 6545 }, { "epoch": 1.2095367878878867, "grad_norm": 0.07864607125520706, "learning_rate": 1.3545905387066956e-05, "loss": 0.6012009382247925, "step": 6546 }, { "epoch": 1.2097215645967825, "grad_norm": 0.07670775055885315, "learning_rate": 1.3544040027434542e-05, "loss": 0.5006359815597534, "step": 6547 }, { "epoch": 1.2099063413056785, "grad_norm": 0.05005637928843498, "learning_rate": 1.3542174526760421e-05, "loss": 0.30397164821624756, "step": 6548 }, { "epoch": 1.2100911180145744, "grad_norm": 0.06848768889904022, "learning_rate": 1.3540308885118832e-05, "loss": 0.36815324425697327, "step": 6549 }, { "epoch": 1.2102758947234702, "grad_norm": 0.07376129180192947, "learning_rate": 1.353844310258403e-05, "loss": 0.5528689622879028, "step": 6550 }, { "epoch": 1.210460671432366, "grad_norm": 0.07270894199609756, "learning_rate": 1.3536577179230261e-05, "loss": 0.5203938484191895, "step": 6551 }, { "epoch": 1.2106454481412618, "grad_norm": 0.09340828657150269, "learning_rate": 1.3534711115131784e-05, "loss": 0.8169743418693542, "step": 6552 }, { "epoch": 1.2108302248501577, "grad_norm": 0.08001939207315445, "learning_rate": 1.3532844910362865e-05, "loss": 0.5689393877983093, "step": 6553 }, { "epoch": 1.2110150015590535, "grad_norm": 0.07906338572502136, "learning_rate": 1.3530978564997774e-05, "loss": 0.47733554244041443, "step": 6554 }, { "epoch": 1.2111997782679493, "grad_norm": 0.0676964595913887, "learning_rate": 1.3529112079110778e-05, "loss": 0.4821913242340088, "step": 6555 }, { "epoch": 1.2113845549768452, "grad_norm": 0.06866760551929474, "learning_rate": 1.3527245452776163e-05, "loss": 0.5118091702461243, "step": 6556 }, { "epoch": 1.211569331685741, "grad_norm": 0.07368195801973343, "learning_rate": 1.3525378686068218e-05, "loss": 0.36613550782203674, "step": 6557 }, { "epoch": 1.2117541083946368, "grad_norm": 0.09328657388687134, "learning_rate": 1.3523511779061228e-05, "loss": 0.5485423803329468, "step": 6558 }, { "epoch": 1.2119388851035326, "grad_norm": 0.09039495885372162, "learning_rate": 1.3521644731829493e-05, "loss": 0.6272793412208557, "step": 6559 }, { "epoch": 1.2121236618124285, "grad_norm": 0.08554688841104507, "learning_rate": 1.3519777544447316e-05, "loss": 0.625359296798706, "step": 6560 }, { "epoch": 1.2123084385213243, "grad_norm": 0.09835842251777649, "learning_rate": 1.3517910216989008e-05, "loss": 0.7249084115028381, "step": 6561 }, { "epoch": 1.2124932152302201, "grad_norm": 0.07324441522359848, "learning_rate": 1.3516042749528874e-05, "loss": 0.47122693061828613, "step": 6562 }, { "epoch": 1.212677991939116, "grad_norm": 0.07746856659650803, "learning_rate": 1.3514175142141241e-05, "loss": 0.5781249403953552, "step": 6563 }, { "epoch": 1.212862768648012, "grad_norm": 0.08498020470142365, "learning_rate": 1.3512307394900433e-05, "loss": 0.493082731962204, "step": 6564 }, { "epoch": 1.2130475453569078, "grad_norm": 0.06842965632677078, "learning_rate": 1.3510439507880778e-05, "loss": 0.5504364371299744, "step": 6565 }, { "epoch": 1.2132323220658037, "grad_norm": 0.09442943334579468, "learning_rate": 1.3508571481156612e-05, "loss": 0.6746259927749634, "step": 6566 }, { "epoch": 1.2134170987746995, "grad_norm": 0.08421729505062103, "learning_rate": 1.350670331480228e-05, "loss": 0.6475216746330261, "step": 6567 }, { "epoch": 1.2136018754835953, "grad_norm": 0.07111159712076187, "learning_rate": 1.350483500889213e-05, "loss": 0.43587779998779297, "step": 6568 }, { "epoch": 1.2137866521924912, "grad_norm": 0.06216439977288246, "learning_rate": 1.3502966563500504e-05, "loss": 0.512531042098999, "step": 6569 }, { "epoch": 1.213971428901387, "grad_norm": 0.05999573692679405, "learning_rate": 1.3501097978701773e-05, "loss": 0.4109059274196625, "step": 6570 }, { "epoch": 1.2141562056102828, "grad_norm": 0.06874170154333115, "learning_rate": 1.3499229254570298e-05, "loss": 0.43019795417785645, "step": 6571 }, { "epoch": 1.2143409823191786, "grad_norm": 0.08103114366531372, "learning_rate": 1.3497360391180443e-05, "loss": 0.5568566918373108, "step": 6572 }, { "epoch": 1.2145257590280745, "grad_norm": 0.0737990066409111, "learning_rate": 1.3495491388606587e-05, "loss": 0.4694644808769226, "step": 6573 }, { "epoch": 1.2147105357369703, "grad_norm": 0.07458315044641495, "learning_rate": 1.349362224692311e-05, "loss": 0.6392929553985596, "step": 6574 }, { "epoch": 1.2148953124458661, "grad_norm": 0.0786605253815651, "learning_rate": 1.3491752966204397e-05, "loss": 0.7331372499465942, "step": 6575 }, { "epoch": 1.215080089154762, "grad_norm": 0.08692830055952072, "learning_rate": 1.348988354652484e-05, "loss": 0.6495953798294067, "step": 6576 }, { "epoch": 1.215264865863658, "grad_norm": 0.07171203196048737, "learning_rate": 1.3488013987958839e-05, "loss": 0.5462040305137634, "step": 6577 }, { "epoch": 1.2154496425725538, "grad_norm": 0.07235080003738403, "learning_rate": 1.3486144290580793e-05, "loss": 0.5880376696586609, "step": 6578 }, { "epoch": 1.2156344192814497, "grad_norm": 0.06372851878404617, "learning_rate": 1.3484274454465109e-05, "loss": 0.4510330855846405, "step": 6579 }, { "epoch": 1.2158191959903455, "grad_norm": 0.054737433791160583, "learning_rate": 1.3482404479686204e-05, "loss": 0.34662091732025146, "step": 6580 }, { "epoch": 1.2160039726992413, "grad_norm": 0.08225202560424805, "learning_rate": 1.3480534366318496e-05, "loss": 0.615791380405426, "step": 6581 }, { "epoch": 1.2161887494081371, "grad_norm": 0.0670294538140297, "learning_rate": 1.3478664114436408e-05, "loss": 0.3681284487247467, "step": 6582 }, { "epoch": 1.216373526117033, "grad_norm": 0.06846636533737183, "learning_rate": 1.3476793724114372e-05, "loss": 0.48841947317123413, "step": 6583 }, { "epoch": 1.2165583028259288, "grad_norm": 0.10067915171384811, "learning_rate": 1.3474923195426825e-05, "loss": 0.672841489315033, "step": 6584 }, { "epoch": 1.2167430795348246, "grad_norm": 0.090632364153862, "learning_rate": 1.3473052528448203e-05, "loss": 0.6621450185775757, "step": 6585 }, { "epoch": 1.2169278562437205, "grad_norm": 0.0676613301038742, "learning_rate": 1.347118172325296e-05, "loss": 0.5203623175621033, "step": 6586 }, { "epoch": 1.2171126329526163, "grad_norm": 0.07406887412071228, "learning_rate": 1.3469310779915543e-05, "loss": 0.6254865527153015, "step": 6587 }, { "epoch": 1.2172974096615121, "grad_norm": 0.07997486740350723, "learning_rate": 1.346743969851041e-05, "loss": 0.6252841353416443, "step": 6588 }, { "epoch": 1.217482186370408, "grad_norm": 0.06023580953478813, "learning_rate": 1.3465568479112026e-05, "loss": 0.40626582503318787, "step": 6589 }, { "epoch": 1.2176669630793038, "grad_norm": 0.08407054096460342, "learning_rate": 1.3463697121794859e-05, "loss": 0.7044686079025269, "step": 6590 }, { "epoch": 1.2178517397881996, "grad_norm": 0.0657910481095314, "learning_rate": 1.3461825626633384e-05, "loss": 0.3766320049762726, "step": 6591 }, { "epoch": 1.2180365164970954, "grad_norm": 0.05869833379983902, "learning_rate": 1.3459953993702077e-05, "loss": 0.37285542488098145, "step": 6592 }, { "epoch": 1.2182212932059913, "grad_norm": 0.08053995668888092, "learning_rate": 1.3458082223075425e-05, "loss": 0.5446112751960754, "step": 6593 }, { "epoch": 1.2184060699148873, "grad_norm": 0.07651842385530472, "learning_rate": 1.3456210314827924e-05, "loss": 0.6633077263832092, "step": 6594 }, { "epoch": 1.2185908466237831, "grad_norm": 0.08434221893548965, "learning_rate": 1.3454338269034064e-05, "loss": 0.47503310441970825, "step": 6595 }, { "epoch": 1.218775623332679, "grad_norm": 0.08381889015436172, "learning_rate": 1.3452466085768348e-05, "loss": 0.6799910068511963, "step": 6596 }, { "epoch": 1.2189604000415748, "grad_norm": 0.08820167928934097, "learning_rate": 1.3450593765105282e-05, "loss": 0.5599125027656555, "step": 6597 }, { "epoch": 1.2191451767504706, "grad_norm": 0.0874655693769455, "learning_rate": 1.3448721307119379e-05, "loss": 0.6755706071853638, "step": 6598 }, { "epoch": 1.2193299534593665, "grad_norm": 0.05301399528980255, "learning_rate": 1.344684871188516e-05, "loss": 0.36308756470680237, "step": 6599 }, { "epoch": 1.2195147301682623, "grad_norm": 0.07892420142889023, "learning_rate": 1.3444975979477146e-05, "loss": 0.5817967057228088, "step": 6600 }, { "epoch": 1.2196995068771581, "grad_norm": 0.07648614048957825, "learning_rate": 1.3443103109969866e-05, "loss": 0.5099329352378845, "step": 6601 }, { "epoch": 1.219884283586054, "grad_norm": 0.0788240060210228, "learning_rate": 1.3441230103437852e-05, "loss": 0.6097611784934998, "step": 6602 }, { "epoch": 1.2200690602949498, "grad_norm": 0.06284268945455551, "learning_rate": 1.343935695995565e-05, "loss": 0.43738633394241333, "step": 6603 }, { "epoch": 1.2202538370038456, "grad_norm": 0.07347282022237778, "learning_rate": 1.3437483679597798e-05, "loss": 0.5626323819160461, "step": 6604 }, { "epoch": 1.2204386137127414, "grad_norm": 0.09133567661046982, "learning_rate": 1.343561026243885e-05, "loss": 0.6883614659309387, "step": 6605 }, { "epoch": 1.2206233904216375, "grad_norm": 0.06906570494174957, "learning_rate": 1.3433736708553364e-05, "loss": 0.48239877820014954, "step": 6606 }, { "epoch": 1.2208081671305333, "grad_norm": 0.0667714849114418, "learning_rate": 1.3431863018015898e-05, "loss": 0.46174854040145874, "step": 6607 }, { "epoch": 1.2209929438394291, "grad_norm": 0.08062037825584412, "learning_rate": 1.3429989190901021e-05, "loss": 0.5113742351531982, "step": 6608 }, { "epoch": 1.221177720548325, "grad_norm": 0.08745943754911423, "learning_rate": 1.3428115227283308e-05, "loss": 0.6630105972290039, "step": 6609 }, { "epoch": 1.2213624972572208, "grad_norm": 0.06435438990592957, "learning_rate": 1.3426241127237331e-05, "loss": 0.4193195104598999, "step": 6610 }, { "epoch": 1.2215472739661166, "grad_norm": 0.06775399297475815, "learning_rate": 1.342436689083768e-05, "loss": 0.49177947640419006, "step": 6611 }, { "epoch": 1.2217320506750124, "grad_norm": 0.07548322528600693, "learning_rate": 1.3422492518158936e-05, "loss": 0.5043904781341553, "step": 6612 }, { "epoch": 1.2219168273839083, "grad_norm": 0.0687546357512474, "learning_rate": 1.3420618009275701e-05, "loss": 0.4975576400756836, "step": 6613 }, { "epoch": 1.222101604092804, "grad_norm": 0.06727460771799088, "learning_rate": 1.3418743364262567e-05, "loss": 0.4292713403701782, "step": 6614 }, { "epoch": 1.2222863808017, "grad_norm": 0.08708701282739639, "learning_rate": 1.3416868583194145e-05, "loss": 0.6526595950126648, "step": 6615 }, { "epoch": 1.2224711575105958, "grad_norm": 0.07395205646753311, "learning_rate": 1.3414993666145043e-05, "loss": 0.6405277848243713, "step": 6616 }, { "epoch": 1.2226559342194916, "grad_norm": 0.05583598464727402, "learning_rate": 1.341311861318988e-05, "loss": 0.411292165517807, "step": 6617 }, { "epoch": 1.2228407109283874, "grad_norm": 0.07856931537389755, "learning_rate": 1.341124342440327e-05, "loss": 0.5631988048553467, "step": 6618 }, { "epoch": 1.2230254876372832, "grad_norm": 0.06796279549598694, "learning_rate": 1.3409368099859848e-05, "loss": 0.39263972640037537, "step": 6619 }, { "epoch": 1.223210264346179, "grad_norm": 0.08865714818239212, "learning_rate": 1.3407492639634243e-05, "loss": 0.6106154918670654, "step": 6620 }, { "epoch": 1.223395041055075, "grad_norm": 0.07416286319494247, "learning_rate": 1.3405617043801087e-05, "loss": 0.5677984356880188, "step": 6621 }, { "epoch": 1.2235798177639707, "grad_norm": 0.06661788374185562, "learning_rate": 1.340374131243503e-05, "loss": 0.5082527995109558, "step": 6622 }, { "epoch": 1.2237645944728668, "grad_norm": 0.09100113809108734, "learning_rate": 1.3401865445610717e-05, "loss": 0.6676788926124573, "step": 6623 }, { "epoch": 1.2239493711817626, "grad_norm": 0.0856882631778717, "learning_rate": 1.3399989443402804e-05, "loss": 0.5832250714302063, "step": 6624 }, { "epoch": 1.2241341478906584, "grad_norm": 0.0818934291601181, "learning_rate": 1.3398113305885944e-05, "loss": 0.6237608790397644, "step": 6625 }, { "epoch": 1.2243189245995543, "grad_norm": 0.07755490392446518, "learning_rate": 1.3396237033134811e-05, "loss": 0.4891819655895233, "step": 6626 }, { "epoch": 1.22450370130845, "grad_norm": 0.08020766079425812, "learning_rate": 1.3394360625224067e-05, "loss": 0.561857283115387, "step": 6627 }, { "epoch": 1.224688478017346, "grad_norm": 0.07287803292274475, "learning_rate": 1.3392484082228387e-05, "loss": 0.5870558023452759, "step": 6628 }, { "epoch": 1.2248732547262418, "grad_norm": 0.06975536793470383, "learning_rate": 1.339060740422246e-05, "loss": 0.5530955791473389, "step": 6629 }, { "epoch": 1.2250580314351376, "grad_norm": 0.09488081187009811, "learning_rate": 1.3388730591280966e-05, "loss": 0.6036531925201416, "step": 6630 }, { "epoch": 1.2252428081440334, "grad_norm": 0.0829424187541008, "learning_rate": 1.3386853643478592e-05, "loss": 0.595618486404419, "step": 6631 }, { "epoch": 1.2254275848529292, "grad_norm": 0.09147574752569199, "learning_rate": 1.338497656089004e-05, "loss": 0.6551826000213623, "step": 6632 }, { "epoch": 1.225612361561825, "grad_norm": 0.0640762522816658, "learning_rate": 1.3383099343590014e-05, "loss": 0.46834108233451843, "step": 6633 }, { "epoch": 1.225797138270721, "grad_norm": 0.07283955067396164, "learning_rate": 1.3381221991653215e-05, "loss": 0.5464327335357666, "step": 6634 }, { "epoch": 1.2259819149796167, "grad_norm": 0.06744116544723511, "learning_rate": 1.3379344505154359e-05, "loss": 0.4603147804737091, "step": 6635 }, { "epoch": 1.2261666916885128, "grad_norm": 0.08637522161006927, "learning_rate": 1.3377466884168168e-05, "loss": 0.6327239871025085, "step": 6636 }, { "epoch": 1.2263514683974086, "grad_norm": 0.07645534723997116, "learning_rate": 1.3375589128769362e-05, "loss": 0.620918333530426, "step": 6637 }, { "epoch": 1.2265362451063044, "grad_norm": 0.06302107870578766, "learning_rate": 1.3373711239032664e-05, "loss": 0.38208499550819397, "step": 6638 }, { "epoch": 1.2267210218152003, "grad_norm": 0.059298522770404816, "learning_rate": 1.3371833215032819e-05, "loss": 0.3346162438392639, "step": 6639 }, { "epoch": 1.226905798524096, "grad_norm": 0.06452131271362305, "learning_rate": 1.3369955056844562e-05, "loss": 0.535065233707428, "step": 6640 }, { "epoch": 1.227090575232992, "grad_norm": 0.07480758428573608, "learning_rate": 1.3368076764542632e-05, "loss": 0.6556054949760437, "step": 6641 }, { "epoch": 1.2272753519418877, "grad_norm": 0.060900650918483734, "learning_rate": 1.3366198338201786e-05, "loss": 0.3786194622516632, "step": 6642 }, { "epoch": 1.2274601286507836, "grad_norm": 0.06550586223602295, "learning_rate": 1.3364319777896779e-05, "loss": 0.5116629600524902, "step": 6643 }, { "epoch": 1.2276449053596794, "grad_norm": 0.07762665301561356, "learning_rate": 1.3362441083702366e-05, "loss": 0.5098814964294434, "step": 6644 }, { "epoch": 1.2278296820685752, "grad_norm": 0.0742892399430275, "learning_rate": 1.336056225569332e-05, "loss": 0.4967721700668335, "step": 6645 }, { "epoch": 1.228014458777471, "grad_norm": 0.07690393924713135, "learning_rate": 1.3358683293944414e-05, "loss": 0.5976964235305786, "step": 6646 }, { "epoch": 1.228199235486367, "grad_norm": 0.08312725275754929, "learning_rate": 1.335680419853042e-05, "loss": 0.6433517932891846, "step": 6647 }, { "epoch": 1.2283840121952627, "grad_norm": 0.07433654367923737, "learning_rate": 1.3354924969526116e-05, "loss": 0.4797278940677643, "step": 6648 }, { "epoch": 1.2285687889041585, "grad_norm": 0.06998168677091599, "learning_rate": 1.33530456070063e-05, "loss": 0.46945229172706604, "step": 6649 }, { "epoch": 1.2287535656130544, "grad_norm": 0.06637603789567947, "learning_rate": 1.3351166111045757e-05, "loss": 0.42683297395706177, "step": 6650 }, { "epoch": 1.2289383423219502, "grad_norm": 0.07931186258792877, "learning_rate": 1.3349286481719283e-05, "loss": 0.6133133769035339, "step": 6651 }, { "epoch": 1.2291231190308463, "grad_norm": 0.07115180045366287, "learning_rate": 1.334740671910169e-05, "loss": 0.6747742891311646, "step": 6652 }, { "epoch": 1.229307895739742, "grad_norm": 0.07112821936607361, "learning_rate": 1.3345526823267782e-05, "loss": 0.4979085326194763, "step": 6653 }, { "epoch": 1.229492672448638, "grad_norm": 0.06507711112499237, "learning_rate": 1.3343646794292373e-05, "loss": 0.3852187693119049, "step": 6654 }, { "epoch": 1.2296774491575337, "grad_norm": 0.07518497854471207, "learning_rate": 1.3341766632250281e-05, "loss": 0.47556841373443604, "step": 6655 }, { "epoch": 1.2298622258664296, "grad_norm": 0.07489053905010223, "learning_rate": 1.3339886337216336e-05, "loss": 0.4678479731082916, "step": 6656 }, { "epoch": 1.2300470025753254, "grad_norm": 0.07171334326267242, "learning_rate": 1.3338005909265363e-05, "loss": 0.46340975165367126, "step": 6657 }, { "epoch": 1.2302317792842212, "grad_norm": 0.0713566243648529, "learning_rate": 1.3336125348472193e-05, "loss": 0.49299490451812744, "step": 6658 }, { "epoch": 1.230416555993117, "grad_norm": 0.09151481091976166, "learning_rate": 1.3334244654911677e-05, "loss": 0.601913332939148, "step": 6659 }, { "epoch": 1.2306013327020129, "grad_norm": 0.07149849832057953, "learning_rate": 1.3332363828658655e-05, "loss": 0.44596779346466064, "step": 6660 }, { "epoch": 1.2307861094109087, "grad_norm": 0.10163896530866623, "learning_rate": 1.3330482869787975e-05, "loss": 0.7691639065742493, "step": 6661 }, { "epoch": 1.2309708861198045, "grad_norm": 0.08098061382770538, "learning_rate": 1.3328601778374497e-05, "loss": 0.5822969675064087, "step": 6662 }, { "epoch": 1.2311556628287004, "grad_norm": 0.08098798990249634, "learning_rate": 1.3326720554493084e-05, "loss": 0.6189523935317993, "step": 6663 }, { "epoch": 1.2313404395375962, "grad_norm": 0.06337269395589828, "learning_rate": 1.33248391982186e-05, "loss": 0.32708972692489624, "step": 6664 }, { "epoch": 1.2315252162464922, "grad_norm": 0.0693390890955925, "learning_rate": 1.3322957709625916e-05, "loss": 0.4504762887954712, "step": 6665 }, { "epoch": 1.231709992955388, "grad_norm": 0.08594589680433273, "learning_rate": 1.3321076088789915e-05, "loss": 0.601592481136322, "step": 6666 }, { "epoch": 1.231894769664284, "grad_norm": 0.10171791166067123, "learning_rate": 1.3319194335785475e-05, "loss": 0.7838782668113708, "step": 6667 }, { "epoch": 1.2320795463731797, "grad_norm": 0.06850502640008926, "learning_rate": 1.3317312450687485e-05, "loss": 0.4453665614128113, "step": 6668 }, { "epoch": 1.2322643230820756, "grad_norm": 0.0664345771074295, "learning_rate": 1.3315430433570834e-05, "loss": 0.4064602553844452, "step": 6669 }, { "epoch": 1.2324490997909714, "grad_norm": 0.07736603915691376, "learning_rate": 1.3313548284510432e-05, "loss": 0.5926414728164673, "step": 6670 }, { "epoch": 1.2326338764998672, "grad_norm": 0.05535926669836044, "learning_rate": 1.3311666003581168e-05, "loss": 0.4185660481452942, "step": 6671 }, { "epoch": 1.232818653208763, "grad_norm": 0.06340336799621582, "learning_rate": 1.330978359085796e-05, "loss": 0.4234006702899933, "step": 6672 }, { "epoch": 1.2330034299176589, "grad_norm": 0.07930430769920349, "learning_rate": 1.3307901046415723e-05, "loss": 0.5866430401802063, "step": 6673 }, { "epoch": 1.2331882066265547, "grad_norm": 0.06652922928333282, "learning_rate": 1.330601837032937e-05, "loss": 0.4335622191429138, "step": 6674 }, { "epoch": 1.2333729833354505, "grad_norm": 0.07018651813268661, "learning_rate": 1.330413556267383e-05, "loss": 0.5484957098960876, "step": 6675 }, { "epoch": 1.2335577600443464, "grad_norm": 0.0835627093911171, "learning_rate": 1.3302252623524032e-05, "loss": 0.6306493878364563, "step": 6676 }, { "epoch": 1.2337425367532422, "grad_norm": 0.07812164723873138, "learning_rate": 1.3300369552954913e-05, "loss": 0.5238651633262634, "step": 6677 }, { "epoch": 1.233927313462138, "grad_norm": 0.07560842484235764, "learning_rate": 1.3298486351041409e-05, "loss": 0.5358222126960754, "step": 6678 }, { "epoch": 1.2341120901710338, "grad_norm": 0.07144767791032791, "learning_rate": 1.3296603017858467e-05, "loss": 0.5098992586135864, "step": 6679 }, { "epoch": 1.2342968668799297, "grad_norm": 0.0746634379029274, "learning_rate": 1.3294719553481042e-05, "loss": 0.4734437167644501, "step": 6680 }, { "epoch": 1.2344816435888255, "grad_norm": 0.0832318589091301, "learning_rate": 1.3292835957984082e-05, "loss": 0.592925488948822, "step": 6681 }, { "epoch": 1.2346664202977216, "grad_norm": 0.08652772754430771, "learning_rate": 1.3290952231442555e-05, "loss": 0.6108662486076355, "step": 6682 }, { "epoch": 1.2348511970066174, "grad_norm": 0.06642487645149231, "learning_rate": 1.3289068373931426e-05, "loss": 0.4984731376171112, "step": 6683 }, { "epoch": 1.2350359737155132, "grad_norm": 0.09597799926996231, "learning_rate": 1.3287184385525667e-05, "loss": 0.5345010757446289, "step": 6684 }, { "epoch": 1.235220750424409, "grad_norm": 0.08704934269189835, "learning_rate": 1.3285300266300252e-05, "loss": 0.6655499935150146, "step": 6685 }, { "epoch": 1.2354055271333049, "grad_norm": 0.08564785122871399, "learning_rate": 1.3283416016330164e-05, "loss": 0.601367175579071, "step": 6686 }, { "epoch": 1.2355903038422007, "grad_norm": 0.07977317273616791, "learning_rate": 1.3281531635690391e-05, "loss": 0.43817946314811707, "step": 6687 }, { "epoch": 1.2357750805510965, "grad_norm": 0.08475592732429504, "learning_rate": 1.3279647124455927e-05, "loss": 0.5711531043052673, "step": 6688 }, { "epoch": 1.2359598572599924, "grad_norm": 0.09372538328170776, "learning_rate": 1.3277762482701769e-05, "loss": 0.5565690398216248, "step": 6689 }, { "epoch": 1.2361446339688882, "grad_norm": 0.08273839950561523, "learning_rate": 1.3275877710502918e-05, "loss": 0.6361427307128906, "step": 6690 }, { "epoch": 1.236329410677784, "grad_norm": 0.06452780216932297, "learning_rate": 1.327399280793438e-05, "loss": 0.3863506019115448, "step": 6691 }, { "epoch": 1.2365141873866798, "grad_norm": 0.07726220041513443, "learning_rate": 1.3272107775071176e-05, "loss": 0.6117289662361145, "step": 6692 }, { "epoch": 1.2366989640955757, "grad_norm": 0.06845694035291672, "learning_rate": 1.3270222611988318e-05, "loss": 0.4468269944190979, "step": 6693 }, { "epoch": 1.2368837408044717, "grad_norm": 0.09642685204744339, "learning_rate": 1.3268337318760832e-05, "loss": 0.6524372100830078, "step": 6694 }, { "epoch": 1.2370685175133675, "grad_norm": 0.09860479086637497, "learning_rate": 1.3266451895463743e-05, "loss": 0.6463438272476196, "step": 6695 }, { "epoch": 1.2372532942222634, "grad_norm": 0.07172819972038269, "learning_rate": 1.326456634217209e-05, "loss": 0.5616058707237244, "step": 6696 }, { "epoch": 1.2374380709311592, "grad_norm": 0.08177749067544937, "learning_rate": 1.326268065896091e-05, "loss": 0.4797169864177704, "step": 6697 }, { "epoch": 1.237622847640055, "grad_norm": 0.06497815996408463, "learning_rate": 1.3260794845905249e-05, "loss": 0.45328235626220703, "step": 6698 }, { "epoch": 1.2378076243489509, "grad_norm": 0.07354207336902618, "learning_rate": 1.3258908903080151e-05, "loss": 0.531097948551178, "step": 6699 }, { "epoch": 1.2379924010578467, "grad_norm": 0.06290023028850555, "learning_rate": 1.325702283056068e-05, "loss": 0.4945487380027771, "step": 6700 }, { "epoch": 1.2381771777667425, "grad_norm": 0.07945331186056137, "learning_rate": 1.3255136628421885e-05, "loss": 0.511954128742218, "step": 6701 }, { "epoch": 1.2383619544756383, "grad_norm": 0.06767923384904861, "learning_rate": 1.325325029673884e-05, "loss": 0.440019428730011, "step": 6702 }, { "epoch": 1.2385467311845342, "grad_norm": 0.06857583671808243, "learning_rate": 1.3251363835586609e-05, "loss": 0.49880003929138184, "step": 6703 }, { "epoch": 1.23873150789343, "grad_norm": 0.08058851212263107, "learning_rate": 1.324947724504027e-05, "loss": 0.5239635109901428, "step": 6704 }, { "epoch": 1.2389162846023258, "grad_norm": 0.0902724415063858, "learning_rate": 1.3247590525174902e-05, "loss": 0.6282240748405457, "step": 6705 }, { "epoch": 1.2391010613112217, "grad_norm": 0.07073131948709488, "learning_rate": 1.3245703676065594e-05, "loss": 0.4652053713798523, "step": 6706 }, { "epoch": 1.2392858380201175, "grad_norm": 0.08671863377094269, "learning_rate": 1.3243816697787433e-05, "loss": 0.6942138671875, "step": 6707 }, { "epoch": 1.2394706147290133, "grad_norm": 0.07620774209499359, "learning_rate": 1.3241929590415514e-05, "loss": 0.5614061951637268, "step": 6708 }, { "epoch": 1.2396553914379091, "grad_norm": 0.06715484708547592, "learning_rate": 1.3240042354024944e-05, "loss": 0.4390954077243805, "step": 6709 }, { "epoch": 1.239840168146805, "grad_norm": 0.05757160112261772, "learning_rate": 1.3238154988690821e-05, "loss": 0.41786855459213257, "step": 6710 }, { "epoch": 1.240024944855701, "grad_norm": 0.07303710281848907, "learning_rate": 1.3236267494488262e-05, "loss": 0.5968295335769653, "step": 6711 }, { "epoch": 1.2402097215645969, "grad_norm": 0.09075836092233658, "learning_rate": 1.3234379871492381e-05, "loss": 0.704807698726654, "step": 6712 }, { "epoch": 1.2403944982734927, "grad_norm": 0.06804577261209488, "learning_rate": 1.3232492119778301e-05, "loss": 0.4883617162704468, "step": 6713 }, { "epoch": 1.2405792749823885, "grad_norm": 0.0695258155465126, "learning_rate": 1.3230604239421148e-05, "loss": 0.5278023481369019, "step": 6714 }, { "epoch": 1.2407640516912843, "grad_norm": 0.07082250714302063, "learning_rate": 1.3228716230496055e-05, "loss": 0.4770531952381134, "step": 6715 }, { "epoch": 1.2409488284001802, "grad_norm": 0.0864938423037529, "learning_rate": 1.3226828093078157e-05, "loss": 0.6933788657188416, "step": 6716 }, { "epoch": 1.241133605109076, "grad_norm": 0.08195040374994278, "learning_rate": 1.3224939827242596e-05, "loss": 0.5969827175140381, "step": 6717 }, { "epoch": 1.2413183818179718, "grad_norm": 0.07748837023973465, "learning_rate": 1.3223051433064515e-05, "loss": 0.6048961281776428, "step": 6718 }, { "epoch": 1.2415031585268677, "grad_norm": 0.07402081042528152, "learning_rate": 1.3221162910619076e-05, "loss": 0.42801129817962646, "step": 6719 }, { "epoch": 1.2416879352357635, "grad_norm": 0.08601423352956772, "learning_rate": 1.321927425998143e-05, "loss": 0.6223377585411072, "step": 6720 }, { "epoch": 1.2418727119446593, "grad_norm": 0.0846116915345192, "learning_rate": 1.3217385481226736e-05, "loss": 0.5239998698234558, "step": 6721 }, { "epoch": 1.2420574886535551, "grad_norm": 0.07802564650774002, "learning_rate": 1.321549657443017e-05, "loss": 0.4778412878513336, "step": 6722 }, { "epoch": 1.242242265362451, "grad_norm": 0.09074050933122635, "learning_rate": 1.3213607539666899e-05, "loss": 0.6743155717849731, "step": 6723 }, { "epoch": 1.242427042071347, "grad_norm": 0.06890838593244553, "learning_rate": 1.3211718377012103e-05, "loss": 0.4325670003890991, "step": 6724 }, { "epoch": 1.2426118187802428, "grad_norm": 0.08446649461984634, "learning_rate": 1.3209829086540964e-05, "loss": 0.6355634331703186, "step": 6725 }, { "epoch": 1.2427965954891387, "grad_norm": 0.07344458997249603, "learning_rate": 1.3207939668328671e-05, "loss": 0.5197588801383972, "step": 6726 }, { "epoch": 1.2429813721980345, "grad_norm": 0.06373180449008942, "learning_rate": 1.320605012245041e-05, "loss": 0.44659337401390076, "step": 6727 }, { "epoch": 1.2431661489069303, "grad_norm": 0.07958760857582092, "learning_rate": 1.320416044898139e-05, "loss": 0.6337906718254089, "step": 6728 }, { "epoch": 1.2433509256158262, "grad_norm": 0.08500469475984573, "learning_rate": 1.3202270647996807e-05, "loss": 0.6289973258972168, "step": 6729 }, { "epoch": 1.243535702324722, "grad_norm": 0.08085852116346359, "learning_rate": 1.3200380719571868e-05, "loss": 0.5270951986312866, "step": 6730 }, { "epoch": 1.2437204790336178, "grad_norm": 0.08276652544736862, "learning_rate": 1.319849066378179e-05, "loss": 0.7250403165817261, "step": 6731 }, { "epoch": 1.2439052557425136, "grad_norm": 0.08534059673547745, "learning_rate": 1.3196600480701796e-05, "loss": 0.5166981220245361, "step": 6732 }, { "epoch": 1.2440900324514095, "grad_norm": 0.07980574667453766, "learning_rate": 1.31947101704071e-05, "loss": 0.5468252301216125, "step": 6733 }, { "epoch": 1.2442748091603053, "grad_norm": 0.06491481512784958, "learning_rate": 1.3192819732972931e-05, "loss": 0.48686864972114563, "step": 6734 }, { "epoch": 1.2444595858692011, "grad_norm": 0.0737755075097084, "learning_rate": 1.3190929168474528e-05, "loss": 0.5102857947349548, "step": 6735 }, { "epoch": 1.244644362578097, "grad_norm": 0.07817957550287247, "learning_rate": 1.318903847698713e-05, "loss": 0.4815283715724945, "step": 6736 }, { "epoch": 1.2448291392869928, "grad_norm": 0.06640022993087769, "learning_rate": 1.3187147658585975e-05, "loss": 0.47119250893592834, "step": 6737 }, { "epoch": 1.2450139159958886, "grad_norm": 0.08190456032752991, "learning_rate": 1.3185256713346315e-05, "loss": 0.5948231220245361, "step": 6738 }, { "epoch": 1.2451986927047844, "grad_norm": 0.06790990382432938, "learning_rate": 1.3183365641343404e-05, "loss": 0.42641395330429077, "step": 6739 }, { "epoch": 1.2453834694136805, "grad_norm": 0.09158918261528015, "learning_rate": 1.3181474442652498e-05, "loss": 0.5970664024353027, "step": 6740 }, { "epoch": 1.2455682461225763, "grad_norm": 0.07614406943321228, "learning_rate": 1.3179583117348865e-05, "loss": 0.5861613154411316, "step": 6741 }, { "epoch": 1.2457530228314722, "grad_norm": 0.08723059296607971, "learning_rate": 1.317769166550777e-05, "loss": 0.6448711156845093, "step": 6742 }, { "epoch": 1.245937799540368, "grad_norm": 0.08985978364944458, "learning_rate": 1.3175800087204488e-05, "loss": 0.5434716939926147, "step": 6743 }, { "epoch": 1.2461225762492638, "grad_norm": 0.08016160130500793, "learning_rate": 1.3173908382514298e-05, "loss": 0.49246692657470703, "step": 6744 }, { "epoch": 1.2463073529581596, "grad_norm": 0.07651840895414352, "learning_rate": 1.3172016551512487e-05, "loss": 0.5844117403030396, "step": 6745 }, { "epoch": 1.2464921296670555, "grad_norm": 0.08612750470638275, "learning_rate": 1.317012459427434e-05, "loss": 0.49526458978652954, "step": 6746 }, { "epoch": 1.2466769063759513, "grad_norm": 0.06630276143550873, "learning_rate": 1.3168232510875152e-05, "loss": 0.4218505024909973, "step": 6747 }, { "epoch": 1.2468616830848471, "grad_norm": 0.06982891261577606, "learning_rate": 1.3166340301390222e-05, "loss": 0.43704402446746826, "step": 6748 }, { "epoch": 1.247046459793743, "grad_norm": 0.10117150843143463, "learning_rate": 1.3164447965894856e-05, "loss": 0.618638277053833, "step": 6749 }, { "epoch": 1.2472312365026388, "grad_norm": 0.07406169176101685, "learning_rate": 1.3162555504464358e-05, "loss": 0.47164854407310486, "step": 6750 }, { "epoch": 1.2474160132115346, "grad_norm": 0.10184917598962784, "learning_rate": 1.3160662917174045e-05, "loss": 0.6744089722633362, "step": 6751 }, { "epoch": 1.2476007899204304, "grad_norm": 0.06890112161636353, "learning_rate": 1.3158770204099241e-05, "loss": 0.4289524257183075, "step": 6752 }, { "epoch": 1.2477855666293265, "grad_norm": 0.06529653072357178, "learning_rate": 1.3156877365315264e-05, "loss": 0.39639273285865784, "step": 6753 }, { "epoch": 1.2479703433382223, "grad_norm": 0.0774889588356018, "learning_rate": 1.315498440089744e-05, "loss": 0.641357958316803, "step": 6754 }, { "epoch": 1.2481551200471181, "grad_norm": 0.05716458335518837, "learning_rate": 1.315309131092111e-05, "loss": 0.3876572549343109, "step": 6755 }, { "epoch": 1.248339896756014, "grad_norm": 0.0856190174818039, "learning_rate": 1.3151198095461614e-05, "loss": 0.6331546306610107, "step": 6756 }, { "epoch": 1.2485246734649098, "grad_norm": 0.07629573345184326, "learning_rate": 1.3149304754594287e-05, "loss": 0.6202632784843445, "step": 6757 }, { "epoch": 1.2487094501738056, "grad_norm": 0.09377996623516083, "learning_rate": 1.3147411288394487e-05, "loss": 0.733933687210083, "step": 6758 }, { "epoch": 1.2488942268827015, "grad_norm": 0.07805003225803375, "learning_rate": 1.3145517696937567e-05, "loss": 0.5285007953643799, "step": 6759 }, { "epoch": 1.2490790035915973, "grad_norm": 0.07465283572673798, "learning_rate": 1.3143623980298879e-05, "loss": 0.5317094326019287, "step": 6760 }, { "epoch": 1.2492637803004931, "grad_norm": 0.08603695034980774, "learning_rate": 1.3141730138553792e-05, "loss": 0.7554119825363159, "step": 6761 }, { "epoch": 1.249448557009389, "grad_norm": 0.06203283742070198, "learning_rate": 1.3139836171777678e-05, "loss": 0.5262657403945923, "step": 6762 }, { "epoch": 1.2496333337182848, "grad_norm": 0.09563897550106049, "learning_rate": 1.3137942080045906e-05, "loss": 0.6859795451164246, "step": 6763 }, { "epoch": 1.2498181104271806, "grad_norm": 0.07398170232772827, "learning_rate": 1.3136047863433854e-05, "loss": 0.5132628083229065, "step": 6764 }, { "epoch": 1.2500028871360764, "grad_norm": 0.08611463010311127, "learning_rate": 1.3134153522016912e-05, "loss": 0.6254287958145142, "step": 6765 }, { "epoch": 1.2501876638449723, "grad_norm": 0.08766376227140427, "learning_rate": 1.3132259055870467e-05, "loss": 0.5664530992507935, "step": 6766 }, { "epoch": 1.250372440553868, "grad_norm": 0.08339742571115494, "learning_rate": 1.3130364465069906e-05, "loss": 0.5552389025688171, "step": 6767 }, { "epoch": 1.250557217262764, "grad_norm": 0.0711752325296402, "learning_rate": 1.3128469749690635e-05, "loss": 0.5233983993530273, "step": 6768 }, { "epoch": 1.2507419939716597, "grad_norm": 0.07766459137201309, "learning_rate": 1.312657490980806e-05, "loss": 0.6995660066604614, "step": 6769 }, { "epoch": 1.2509267706805556, "grad_norm": 0.06316307187080383, "learning_rate": 1.312467994549758e-05, "loss": 0.4747420847415924, "step": 6770 }, { "epoch": 1.2511115473894516, "grad_norm": 0.0716712549328804, "learning_rate": 1.3122784856834615e-05, "loss": 0.5163635611534119, "step": 6771 }, { "epoch": 1.2512963240983475, "grad_norm": 0.07563406974077225, "learning_rate": 1.3120889643894584e-05, "loss": 0.5496046543121338, "step": 6772 }, { "epoch": 1.2514811008072433, "grad_norm": 0.09463442116975784, "learning_rate": 1.311899430675291e-05, "loss": 0.625213086605072, "step": 6773 }, { "epoch": 1.2516658775161391, "grad_norm": 0.06193559616804123, "learning_rate": 1.3117098845485016e-05, "loss": 0.404417484998703, "step": 6774 }, { "epoch": 1.251850654225035, "grad_norm": 0.06744953989982605, "learning_rate": 1.3115203260166345e-05, "loss": 0.5050620436668396, "step": 6775 }, { "epoch": 1.2520354309339308, "grad_norm": 0.06575006991624832, "learning_rate": 1.3113307550872327e-05, "loss": 0.4802807569503784, "step": 6776 }, { "epoch": 1.2522202076428266, "grad_norm": 0.06637375056743622, "learning_rate": 1.311141171767841e-05, "loss": 0.45567330718040466, "step": 6777 }, { "epoch": 1.2524049843517224, "grad_norm": 0.07214214652776718, "learning_rate": 1.310951576066004e-05, "loss": 0.5020220279693604, "step": 6778 }, { "epoch": 1.2525897610606183, "grad_norm": 0.06326988339424133, "learning_rate": 1.3107619679892676e-05, "loss": 0.40720850229263306, "step": 6779 }, { "epoch": 1.252774537769514, "grad_norm": 0.09027061611413956, "learning_rate": 1.3105723475451765e-05, "loss": 0.5622882843017578, "step": 6780 }, { "epoch": 1.25295931447841, "grad_norm": 0.07883419841527939, "learning_rate": 1.3103827147412781e-05, "loss": 0.600059986114502, "step": 6781 }, { "epoch": 1.253144091187306, "grad_norm": 0.07642436027526855, "learning_rate": 1.3101930695851186e-05, "loss": 0.4871520698070526, "step": 6782 }, { "epoch": 1.2533288678962018, "grad_norm": 0.07144514471292496, "learning_rate": 1.3100034120842453e-05, "loss": 0.3252977132797241, "step": 6783 }, { "epoch": 1.2535136446050976, "grad_norm": 0.07730413973331451, "learning_rate": 1.309813742246206e-05, "loss": 0.5172085762023926, "step": 6784 }, { "epoch": 1.2536984213139934, "grad_norm": 0.0767231211066246, "learning_rate": 1.309624060078549e-05, "loss": 0.551487147808075, "step": 6785 }, { "epoch": 1.2538831980228893, "grad_norm": 0.05568498373031616, "learning_rate": 1.3094343655888233e-05, "loss": 0.3281753957271576, "step": 6786 }, { "epoch": 1.254067974731785, "grad_norm": 0.08305204659700394, "learning_rate": 1.309244658784578e-05, "loss": 0.6095369458198547, "step": 6787 }, { "epoch": 1.254252751440681, "grad_norm": 0.06953762471675873, "learning_rate": 1.3090549396733626e-05, "loss": 0.5702585577964783, "step": 6788 }, { "epoch": 1.2544375281495768, "grad_norm": 0.06709403544664383, "learning_rate": 1.3088652082627276e-05, "loss": 0.5290798544883728, "step": 6789 }, { "epoch": 1.2546223048584726, "grad_norm": 0.0815216526389122, "learning_rate": 1.3086754645602235e-05, "loss": 0.5775642395019531, "step": 6790 }, { "epoch": 1.2548070815673684, "grad_norm": 0.07735057175159454, "learning_rate": 1.308485708573402e-05, "loss": 0.5133861899375916, "step": 6791 }, { "epoch": 1.2549918582762642, "grad_norm": 0.07463232427835464, "learning_rate": 1.3082959403098139e-05, "loss": 0.4989258050918579, "step": 6792 }, { "epoch": 1.25517663498516, "grad_norm": 0.07572955638170242, "learning_rate": 1.3081061597770124e-05, "loss": 0.5668498277664185, "step": 6793 }, { "epoch": 1.255361411694056, "grad_norm": 0.0815187320113182, "learning_rate": 1.3079163669825495e-05, "loss": 0.5903770327568054, "step": 6794 }, { "epoch": 1.2555461884029517, "grad_norm": 0.08574850112199783, "learning_rate": 1.3077265619339783e-05, "loss": 0.7418274283409119, "step": 6795 }, { "epoch": 1.2557309651118476, "grad_norm": 0.068257175385952, "learning_rate": 1.307536744638853e-05, "loss": 0.47835734486579895, "step": 6796 }, { "epoch": 1.2559157418207434, "grad_norm": 0.0685720145702362, "learning_rate": 1.3073469151047272e-05, "loss": 0.415475994348526, "step": 6797 }, { "epoch": 1.2561005185296392, "grad_norm": 0.06705527752637863, "learning_rate": 1.3071570733391558e-05, "loss": 0.5454516410827637, "step": 6798 }, { "epoch": 1.256285295238535, "grad_norm": 0.06844533234834671, "learning_rate": 1.3069672193496938e-05, "loss": 0.4502028822898865, "step": 6799 }, { "epoch": 1.256470071947431, "grad_norm": 0.06797377020120621, "learning_rate": 1.306777353143897e-05, "loss": 0.4507002532482147, "step": 6800 }, { "epoch": 1.256654848656327, "grad_norm": 0.07801464200019836, "learning_rate": 1.3065874747293212e-05, "loss": 0.490688294172287, "step": 6801 }, { "epoch": 1.2568396253652228, "grad_norm": 0.09060056507587433, "learning_rate": 1.3063975841135232e-05, "loss": 0.6962713003158569, "step": 6802 }, { "epoch": 1.2570244020741186, "grad_norm": 0.09092527627944946, "learning_rate": 1.3062076813040601e-05, "loss": 0.617733359336853, "step": 6803 }, { "epoch": 1.2572091787830144, "grad_norm": 0.0821225643157959, "learning_rate": 1.306017766308489e-05, "loss": 0.6366010904312134, "step": 6804 }, { "epoch": 1.2573939554919102, "grad_norm": 0.0752747431397438, "learning_rate": 1.3058278391343682e-05, "loss": 0.6357575058937073, "step": 6805 }, { "epoch": 1.257578732200806, "grad_norm": 0.06302738189697266, "learning_rate": 1.3056378997892565e-05, "loss": 0.43746256828308105, "step": 6806 }, { "epoch": 1.257763508909702, "grad_norm": 0.07448502629995346, "learning_rate": 1.3054479482807122e-05, "loss": 0.5271299481391907, "step": 6807 }, { "epoch": 1.2579482856185977, "grad_norm": 0.07659098505973816, "learning_rate": 1.3052579846162957e-05, "loss": 0.580751359462738, "step": 6808 }, { "epoch": 1.2581330623274936, "grad_norm": 0.08074422180652618, "learning_rate": 1.3050680088035658e-05, "loss": 0.4916374087333679, "step": 6809 }, { "epoch": 1.2583178390363894, "grad_norm": 0.05657816305756569, "learning_rate": 1.304878020850084e-05, "loss": 0.40220433473587036, "step": 6810 }, { "epoch": 1.2585026157452854, "grad_norm": 0.0831356793642044, "learning_rate": 1.3046880207634109e-05, "loss": 0.6043918132781982, "step": 6811 }, { "epoch": 1.2586873924541813, "grad_norm": 0.09046194702386856, "learning_rate": 1.3044980085511076e-05, "loss": 0.6078789830207825, "step": 6812 }, { "epoch": 1.258872169163077, "grad_norm": 0.06054454669356346, "learning_rate": 1.3043079842207363e-05, "loss": 0.40452149510383606, "step": 6813 }, { "epoch": 1.259056945871973, "grad_norm": 0.06690578907728195, "learning_rate": 1.3041179477798593e-05, "loss": 0.5067480206489563, "step": 6814 }, { "epoch": 1.2592417225808687, "grad_norm": 0.079596608877182, "learning_rate": 1.3039278992360393e-05, "loss": 0.5522646903991699, "step": 6815 }, { "epoch": 1.2594264992897646, "grad_norm": 0.0601852685213089, "learning_rate": 1.30373783859684e-05, "loss": 0.41679537296295166, "step": 6816 }, { "epoch": 1.2596112759986604, "grad_norm": 0.0934712216258049, "learning_rate": 1.3035477658698247e-05, "loss": 0.6759485602378845, "step": 6817 }, { "epoch": 1.2597960527075562, "grad_norm": 0.09387373924255371, "learning_rate": 1.3033576810625583e-05, "loss": 0.7017277479171753, "step": 6818 }, { "epoch": 1.259980829416452, "grad_norm": 0.10304391384124756, "learning_rate": 1.3031675841826052e-05, "loss": 0.7227799892425537, "step": 6819 }, { "epoch": 1.260165606125348, "grad_norm": 0.051855817437171936, "learning_rate": 1.3029774752375307e-05, "loss": 0.32178065180778503, "step": 6820 }, { "epoch": 1.2603503828342437, "grad_norm": 0.08134342730045319, "learning_rate": 1.3027873542349005e-05, "loss": 0.5557973980903625, "step": 6821 }, { "epoch": 1.2605351595431395, "grad_norm": 0.07952242344617844, "learning_rate": 1.302597221182281e-05, "loss": 0.6073988080024719, "step": 6822 }, { "epoch": 1.2607199362520354, "grad_norm": 0.0611259751021862, "learning_rate": 1.3024070760872389e-05, "loss": 0.42082422971725464, "step": 6823 }, { "epoch": 1.2609047129609312, "grad_norm": 0.07557714730501175, "learning_rate": 1.3022169189573411e-05, "loss": 0.5456272959709167, "step": 6824 }, { "epoch": 1.261089489669827, "grad_norm": 0.08217813819646835, "learning_rate": 1.3020267498001555e-05, "loss": 0.536934494972229, "step": 6825 }, { "epoch": 1.2612742663787229, "grad_norm": 0.06710366904735565, "learning_rate": 1.3018365686232502e-05, "loss": 0.5903006792068481, "step": 6826 }, { "epoch": 1.2614590430876187, "grad_norm": 0.06867175549268723, "learning_rate": 1.3016463754341936e-05, "loss": 0.45960867404937744, "step": 6827 }, { "epoch": 1.2616438197965145, "grad_norm": 0.09854653477668762, "learning_rate": 1.3014561702405552e-05, "loss": 0.62856525182724, "step": 6828 }, { "epoch": 1.2618285965054106, "grad_norm": 0.08738111704587936, "learning_rate": 1.3012659530499043e-05, "loss": 0.5031391382217407, "step": 6829 }, { "epoch": 1.2620133732143064, "grad_norm": 0.06079642102122307, "learning_rate": 1.3010757238698108e-05, "loss": 0.38979560136795044, "step": 6830 }, { "epoch": 1.2621981499232022, "grad_norm": 0.06816396117210388, "learning_rate": 1.3008854827078458e-05, "loss": 0.4951815605163574, "step": 6831 }, { "epoch": 1.262382926632098, "grad_norm": 0.07417619228363037, "learning_rate": 1.3006952295715802e-05, "loss": 0.4765762686729431, "step": 6832 }, { "epoch": 1.2625677033409939, "grad_norm": 0.06951801478862762, "learning_rate": 1.3005049644685847e-05, "loss": 0.4217744469642639, "step": 6833 }, { "epoch": 1.2627524800498897, "grad_norm": 0.08772267401218414, "learning_rate": 1.3003146874064317e-05, "loss": 0.637980043888092, "step": 6834 }, { "epoch": 1.2629372567587855, "grad_norm": 0.07551013678312302, "learning_rate": 1.300124398392694e-05, "loss": 0.529155433177948, "step": 6835 }, { "epoch": 1.2631220334676814, "grad_norm": 0.08979663252830505, "learning_rate": 1.2999340974349442e-05, "loss": 0.6354886889457703, "step": 6836 }, { "epoch": 1.2633068101765772, "grad_norm": 0.06971348822116852, "learning_rate": 1.2997437845407555e-05, "loss": 0.5855482816696167, "step": 6837 }, { "epoch": 1.263491586885473, "grad_norm": 0.07374582439661026, "learning_rate": 1.2995534597177023e-05, "loss": 0.4582844376564026, "step": 6838 }, { "epoch": 1.2636763635943689, "grad_norm": 0.09206511825323105, "learning_rate": 1.2993631229733584e-05, "loss": 0.6397450566291809, "step": 6839 }, { "epoch": 1.263861140303265, "grad_norm": 0.0784001424908638, "learning_rate": 1.2991727743152983e-05, "loss": 0.5534389019012451, "step": 6840 }, { "epoch": 1.2640459170121607, "grad_norm": 0.0904441550374031, "learning_rate": 1.2989824137510984e-05, "loss": 0.8288828134536743, "step": 6841 }, { "epoch": 1.2642306937210566, "grad_norm": 0.09138955175876617, "learning_rate": 1.2987920412883336e-05, "loss": 0.6156802177429199, "step": 6842 }, { "epoch": 1.2644154704299524, "grad_norm": 0.09530110657215118, "learning_rate": 1.2986016569345806e-05, "loss": 0.6880958080291748, "step": 6843 }, { "epoch": 1.2646002471388482, "grad_norm": 0.08604827523231506, "learning_rate": 1.2984112606974155e-05, "loss": 0.6318317651748657, "step": 6844 }, { "epoch": 1.264785023847744, "grad_norm": 0.07958484441041946, "learning_rate": 1.298220852584416e-05, "loss": 0.4509182870388031, "step": 6845 }, { "epoch": 1.2649698005566399, "grad_norm": 0.07328151166439056, "learning_rate": 1.2980304326031593e-05, "loss": 0.5359677672386169, "step": 6846 }, { "epoch": 1.2651545772655357, "grad_norm": 0.06363991647958755, "learning_rate": 1.2978400007612242e-05, "loss": 0.46096542477607727, "step": 6847 }, { "epoch": 1.2653393539744315, "grad_norm": 0.08119732141494751, "learning_rate": 1.2976495570661888e-05, "loss": 0.49483710527420044, "step": 6848 }, { "epoch": 1.2655241306833274, "grad_norm": 0.05809759348630905, "learning_rate": 1.2974591015256324e-05, "loss": 0.41567057371139526, "step": 6849 }, { "epoch": 1.2657089073922232, "grad_norm": 0.059295497834682465, "learning_rate": 1.2972686341471338e-05, "loss": 0.419766366481781, "step": 6850 }, { "epoch": 1.265893684101119, "grad_norm": 0.07198849320411682, "learning_rate": 1.2970781549382743e-05, "loss": 0.46864405274391174, "step": 6851 }, { "epoch": 1.2660784608100148, "grad_norm": 0.06495899707078934, "learning_rate": 1.2968876639066335e-05, "loss": 0.36471015214920044, "step": 6852 }, { "epoch": 1.2662632375189107, "grad_norm": 0.07884179800748825, "learning_rate": 1.2966971610597922e-05, "loss": 0.4284929037094116, "step": 6853 }, { "epoch": 1.2664480142278065, "grad_norm": 0.06695716083049774, "learning_rate": 1.2965066464053323e-05, "loss": 0.42951127886772156, "step": 6854 }, { "epoch": 1.2666327909367023, "grad_norm": 0.10632362216711044, "learning_rate": 1.2963161199508356e-05, "loss": 0.6579766273498535, "step": 6855 }, { "epoch": 1.2668175676455982, "grad_norm": 0.05297665670514107, "learning_rate": 1.2961255817038842e-05, "loss": 0.3616389334201813, "step": 6856 }, { "epoch": 1.267002344354494, "grad_norm": 0.08395391702651978, "learning_rate": 1.2959350316720613e-05, "loss": 0.6917855143547058, "step": 6857 }, { "epoch": 1.26718712106339, "grad_norm": 0.09088056534528732, "learning_rate": 1.29574446986295e-05, "loss": 0.6455844640731812, "step": 6858 }, { "epoch": 1.2673718977722859, "grad_norm": 0.10937219858169556, "learning_rate": 1.295553896284134e-05, "loss": 0.7937737703323364, "step": 6859 }, { "epoch": 1.2675566744811817, "grad_norm": 0.09938866645097733, "learning_rate": 1.2953633109431975e-05, "loss": 0.7208584547042847, "step": 6860 }, { "epoch": 1.2677414511900775, "grad_norm": 0.06810204684734344, "learning_rate": 1.2951727138477255e-05, "loss": 0.3876679539680481, "step": 6861 }, { "epoch": 1.2679262278989734, "grad_norm": 0.07531758397817612, "learning_rate": 1.294982105005303e-05, "loss": 0.5154255628585815, "step": 6862 }, { "epoch": 1.2681110046078692, "grad_norm": 0.08114465326070786, "learning_rate": 1.2947914844235154e-05, "loss": 0.4871000349521637, "step": 6863 }, { "epoch": 1.268295781316765, "grad_norm": 0.09290967136621475, "learning_rate": 1.2946008521099488e-05, "loss": 0.678450882434845, "step": 6864 }, { "epoch": 1.2684805580256608, "grad_norm": 0.05580367147922516, "learning_rate": 1.2944102080721905e-05, "loss": 0.36511459946632385, "step": 6865 }, { "epoch": 1.2686653347345567, "grad_norm": 0.0826374813914299, "learning_rate": 1.2942195523178268e-05, "loss": 0.6721389293670654, "step": 6866 }, { "epoch": 1.2688501114434525, "grad_norm": 0.08435200899839401, "learning_rate": 1.2940288848544451e-05, "loss": 0.5716503262519836, "step": 6867 }, { "epoch": 1.2690348881523483, "grad_norm": 0.08023706823587418, "learning_rate": 1.2938382056896342e-05, "loss": 0.5046827793121338, "step": 6868 }, { "epoch": 1.2692196648612444, "grad_norm": 0.06476599723100662, "learning_rate": 1.293647514830982e-05, "loss": 0.47997936606407166, "step": 6869 }, { "epoch": 1.2694044415701402, "grad_norm": 0.06006301939487457, "learning_rate": 1.2934568122860766e-05, "loss": 0.44025835394859314, "step": 6870 }, { "epoch": 1.269589218279036, "grad_norm": 0.0865289717912674, "learning_rate": 1.293266098062509e-05, "loss": 0.6664592623710632, "step": 6871 }, { "epoch": 1.2697739949879319, "grad_norm": 0.0914430171251297, "learning_rate": 1.2930753721678681e-05, "loss": 0.6752035021781921, "step": 6872 }, { "epoch": 1.2699587716968277, "grad_norm": 0.07003988325595856, "learning_rate": 1.2928846346097442e-05, "loss": 0.5116729140281677, "step": 6873 }, { "epoch": 1.2701435484057235, "grad_norm": 0.0638568103313446, "learning_rate": 1.2926938853957278e-05, "loss": 0.38800185918807983, "step": 6874 }, { "epoch": 1.2703283251146193, "grad_norm": 0.0729961097240448, "learning_rate": 1.2925031245334112e-05, "loss": 0.38727983832359314, "step": 6875 }, { "epoch": 1.2705131018235152, "grad_norm": 0.08079040795564651, "learning_rate": 1.2923123520303848e-05, "loss": 0.5495513677597046, "step": 6876 }, { "epoch": 1.270697878532411, "grad_norm": 0.06676933914422989, "learning_rate": 1.2921215678942413e-05, "loss": 0.30596923828125, "step": 6877 }, { "epoch": 1.2708826552413068, "grad_norm": 0.0731934905052185, "learning_rate": 1.2919307721325737e-05, "loss": 0.504347562789917, "step": 6878 }, { "epoch": 1.2710674319502027, "grad_norm": 0.08271405100822449, "learning_rate": 1.2917399647529747e-05, "loss": 0.6242300868034363, "step": 6879 }, { "epoch": 1.2712522086590985, "grad_norm": 0.08044064044952393, "learning_rate": 1.2915491457630376e-05, "loss": 0.6209437847137451, "step": 6880 }, { "epoch": 1.2714369853679943, "grad_norm": 0.06818465143442154, "learning_rate": 1.2913583151703567e-05, "loss": 0.5452882051467896, "step": 6881 }, { "epoch": 1.2716217620768901, "grad_norm": 0.06986252218484879, "learning_rate": 1.2911674729825264e-05, "loss": 0.4770204722881317, "step": 6882 }, { "epoch": 1.271806538785786, "grad_norm": 0.07381994277238846, "learning_rate": 1.2909766192071416e-05, "loss": 0.49883171916007996, "step": 6883 }, { "epoch": 1.2719913154946818, "grad_norm": 0.09339702874422073, "learning_rate": 1.2907857538517976e-05, "loss": 0.7252719402313232, "step": 6884 }, { "epoch": 1.2721760922035776, "grad_norm": 0.08802812546491623, "learning_rate": 1.2905948769240905e-05, "loss": 0.5160189867019653, "step": 6885 }, { "epoch": 1.2723608689124735, "grad_norm": 0.11236675083637238, "learning_rate": 1.2904039884316163e-05, "loss": 0.7686521410942078, "step": 6886 }, { "epoch": 1.2725456456213693, "grad_norm": 0.07604110985994339, "learning_rate": 1.2902130883819724e-05, "loss": 0.5760014653205872, "step": 6887 }, { "epoch": 1.2727304223302653, "grad_norm": 0.08620353788137436, "learning_rate": 1.290022176782755e-05, "loss": 0.5739667415618896, "step": 6888 }, { "epoch": 1.2729151990391612, "grad_norm": 0.061291273683309555, "learning_rate": 1.2898312536415628e-05, "loss": 0.4189784824848175, "step": 6889 }, { "epoch": 1.273099975748057, "grad_norm": 0.0792623907327652, "learning_rate": 1.2896403189659929e-05, "loss": 0.5867186188697815, "step": 6890 }, { "epoch": 1.2732847524569528, "grad_norm": 0.06291884183883667, "learning_rate": 1.2894493727636448e-05, "loss": 0.3795328140258789, "step": 6891 }, { "epoch": 1.2734695291658487, "grad_norm": 0.0688159242272377, "learning_rate": 1.2892584150421175e-05, "loss": 0.39740902185440063, "step": 6892 }, { "epoch": 1.2736543058747445, "grad_norm": 0.08040191978216171, "learning_rate": 1.2890674458090098e-05, "loss": 0.6047346591949463, "step": 6893 }, { "epoch": 1.2738390825836403, "grad_norm": 0.0698571503162384, "learning_rate": 1.288876465071922e-05, "loss": 0.4499545097351074, "step": 6894 }, { "epoch": 1.2740238592925361, "grad_norm": 0.07487684488296509, "learning_rate": 1.2886854728384552e-05, "loss": 0.565583348274231, "step": 6895 }, { "epoch": 1.274208636001432, "grad_norm": 0.06601119041442871, "learning_rate": 1.2884944691162096e-05, "loss": 0.44833654165267944, "step": 6896 }, { "epoch": 1.2743934127103278, "grad_norm": 0.08068963885307312, "learning_rate": 1.2883034539127865e-05, "loss": 0.6059198975563049, "step": 6897 }, { "epoch": 1.2745781894192236, "grad_norm": 0.0770842581987381, "learning_rate": 1.2881124272357881e-05, "loss": 0.44424009323120117, "step": 6898 }, { "epoch": 1.2747629661281197, "grad_norm": 0.0816042497754097, "learning_rate": 1.2879213890928166e-05, "loss": 0.5663117170333862, "step": 6899 }, { "epoch": 1.2749477428370155, "grad_norm": 0.05652960389852524, "learning_rate": 1.2877303394914744e-05, "loss": 0.47078806161880493, "step": 6900 }, { "epoch": 1.2751325195459113, "grad_norm": 0.08161277323961258, "learning_rate": 1.2875392784393648e-05, "loss": 0.5621728301048279, "step": 6901 }, { "epoch": 1.2753172962548072, "grad_norm": 0.08607976138591766, "learning_rate": 1.2873482059440915e-05, "loss": 0.7239657640457153, "step": 6902 }, { "epoch": 1.275502072963703, "grad_norm": 0.0837235152721405, "learning_rate": 1.2871571220132589e-05, "loss": 0.5190312266349792, "step": 6903 }, { "epoch": 1.2756868496725988, "grad_norm": 0.09978122264146805, "learning_rate": 1.2869660266544713e-05, "loss": 0.555137574672699, "step": 6904 }, { "epoch": 1.2758716263814947, "grad_norm": 0.06831305474042892, "learning_rate": 1.2867749198753333e-05, "loss": 0.41242703795433044, "step": 6905 }, { "epoch": 1.2760564030903905, "grad_norm": 0.07195712625980377, "learning_rate": 1.2865838016834506e-05, "loss": 0.557858407497406, "step": 6906 }, { "epoch": 1.2762411797992863, "grad_norm": 0.08062034100294113, "learning_rate": 1.2863926720864295e-05, "loss": 0.45327404141426086, "step": 6907 }, { "epoch": 1.2764259565081821, "grad_norm": 0.07121732085943222, "learning_rate": 1.2862015310918759e-05, "loss": 0.5176864862442017, "step": 6908 }, { "epoch": 1.276610733217078, "grad_norm": 0.06773148477077484, "learning_rate": 1.2860103787073969e-05, "loss": 0.5205627083778381, "step": 6909 }, { "epoch": 1.2767955099259738, "grad_norm": 0.05887208133935928, "learning_rate": 1.2858192149405997e-05, "loss": 0.3680979013442993, "step": 6910 }, { "epoch": 1.2769802866348696, "grad_norm": 0.08620485663414001, "learning_rate": 1.2856280397990917e-05, "loss": 0.6715238690376282, "step": 6911 }, { "epoch": 1.2771650633437654, "grad_norm": 0.058001063764095306, "learning_rate": 1.2854368532904815e-05, "loss": 0.3583117723464966, "step": 6912 }, { "epoch": 1.2773498400526613, "grad_norm": 0.09623677283525467, "learning_rate": 1.2852456554223775e-05, "loss": 0.5942349433898926, "step": 6913 }, { "epoch": 1.277534616761557, "grad_norm": 0.06850364059209824, "learning_rate": 1.2850544462023891e-05, "loss": 0.428526371717453, "step": 6914 }, { "epoch": 1.277719393470453, "grad_norm": 0.06925079971551895, "learning_rate": 1.284863225638125e-05, "loss": 0.4980008006095886, "step": 6915 }, { "epoch": 1.2779041701793488, "grad_norm": 0.05816519632935524, "learning_rate": 1.2846719937371961e-05, "loss": 0.3704940676689148, "step": 6916 }, { "epoch": 1.2780889468882448, "grad_norm": 0.06680703908205032, "learning_rate": 1.2844807505072125e-05, "loss": 0.43866413831710815, "step": 6917 }, { "epoch": 1.2782737235971406, "grad_norm": 0.058567631989717484, "learning_rate": 1.284289495955785e-05, "loss": 0.37732750177383423, "step": 6918 }, { "epoch": 1.2784585003060365, "grad_norm": 0.07167914509773254, "learning_rate": 1.2840982300905246e-05, "loss": 0.44376444816589355, "step": 6919 }, { "epoch": 1.2786432770149323, "grad_norm": 0.08945298194885254, "learning_rate": 1.2839069529190441e-05, "loss": 0.5941975712776184, "step": 6920 }, { "epoch": 1.2788280537238281, "grad_norm": 0.08231666684150696, "learning_rate": 1.283715664448955e-05, "loss": 0.4978278875350952, "step": 6921 }, { "epoch": 1.279012830432724, "grad_norm": 0.08088124543428421, "learning_rate": 1.2835243646878699e-05, "loss": 0.6616016030311584, "step": 6922 }, { "epoch": 1.2791976071416198, "grad_norm": 0.07822462916374207, "learning_rate": 1.283333053643402e-05, "loss": 0.5347031950950623, "step": 6923 }, { "epoch": 1.2793823838505156, "grad_norm": 0.05929892882704735, "learning_rate": 1.2831417313231653e-05, "loss": 0.38059520721435547, "step": 6924 }, { "epoch": 1.2795671605594114, "grad_norm": 0.09595592319965363, "learning_rate": 1.2829503977347734e-05, "loss": 0.5843982696533203, "step": 6925 }, { "epoch": 1.2797519372683073, "grad_norm": 0.08812601119279861, "learning_rate": 1.2827590528858409e-05, "loss": 0.5967447757720947, "step": 6926 }, { "epoch": 1.279936713977203, "grad_norm": 0.06501595675945282, "learning_rate": 1.2825676967839828e-05, "loss": 0.4090256989002228, "step": 6927 }, { "epoch": 1.2801214906860992, "grad_norm": 0.059129539877176285, "learning_rate": 1.2823763294368145e-05, "loss": 0.4044893980026245, "step": 6928 }, { "epoch": 1.280306267394995, "grad_norm": 0.09271105378866196, "learning_rate": 1.2821849508519513e-05, "loss": 0.6913366317749023, "step": 6929 }, { "epoch": 1.2804910441038908, "grad_norm": 0.07255643606185913, "learning_rate": 1.2819935610370102e-05, "loss": 0.4509837031364441, "step": 6930 }, { "epoch": 1.2806758208127866, "grad_norm": 0.0755506157875061, "learning_rate": 1.2818021599996079e-05, "loss": 0.5539512038230896, "step": 6931 }, { "epoch": 1.2808605975216825, "grad_norm": 0.051255643367767334, "learning_rate": 1.2816107477473607e-05, "loss": 0.3722156882286072, "step": 6932 }, { "epoch": 1.2810453742305783, "grad_norm": 0.06447292864322662, "learning_rate": 1.281419324287887e-05, "loss": 0.46363601088523865, "step": 6933 }, { "epoch": 1.2812301509394741, "grad_norm": 0.09020563215017319, "learning_rate": 1.2812278896288048e-05, "loss": 0.692448616027832, "step": 6934 }, { "epoch": 1.28141492764837, "grad_norm": 0.06983469426631927, "learning_rate": 1.2810364437777324e-05, "loss": 0.40606072545051575, "step": 6935 }, { "epoch": 1.2815997043572658, "grad_norm": 0.0692342072725296, "learning_rate": 1.2808449867422885e-05, "loss": 0.5313026905059814, "step": 6936 }, { "epoch": 1.2817844810661616, "grad_norm": 0.06339572370052338, "learning_rate": 1.2806535185300931e-05, "loss": 0.4123586118221283, "step": 6937 }, { "epoch": 1.2819692577750574, "grad_norm": 0.08292673528194427, "learning_rate": 1.2804620391487658e-05, "loss": 0.4507513642311096, "step": 6938 }, { "epoch": 1.2821540344839533, "grad_norm": 0.05868062749505043, "learning_rate": 1.2802705486059264e-05, "loss": 0.4681969881057739, "step": 6939 }, { "epoch": 1.282338811192849, "grad_norm": 0.07713848352432251, "learning_rate": 1.2800790469091964e-05, "loss": 0.5836921334266663, "step": 6940 }, { "epoch": 1.282523587901745, "grad_norm": 0.06986226886510849, "learning_rate": 1.2798875340661964e-05, "loss": 0.5518940687179565, "step": 6941 }, { "epoch": 1.2827083646106407, "grad_norm": 0.08023731410503387, "learning_rate": 1.2796960100845483e-05, "loss": 0.5069800615310669, "step": 6942 }, { "epoch": 1.2828931413195366, "grad_norm": 0.08042874932289124, "learning_rate": 1.2795044749718737e-05, "loss": 0.5868576169013977, "step": 6943 }, { "epoch": 1.2830779180284324, "grad_norm": 0.07451844215393066, "learning_rate": 1.2793129287357959e-05, "loss": 0.5001909732818604, "step": 6944 }, { "epoch": 1.2832626947373282, "grad_norm": 0.05197039991617203, "learning_rate": 1.2791213713839374e-05, "loss": 0.3045160472393036, "step": 6945 }, { "epoch": 1.2834474714462243, "grad_norm": 0.07311443239450455, "learning_rate": 1.2789298029239212e-05, "loss": 0.5387001633644104, "step": 6946 }, { "epoch": 1.2836322481551201, "grad_norm": 0.07509905844926834, "learning_rate": 1.2787382233633718e-05, "loss": 0.5643014311790466, "step": 6947 }, { "epoch": 1.283817024864016, "grad_norm": 0.060981862246990204, "learning_rate": 1.2785466327099132e-05, "loss": 0.4061810076236725, "step": 6948 }, { "epoch": 1.2840018015729118, "grad_norm": 0.07428246736526489, "learning_rate": 1.2783550309711696e-05, "loss": 0.6092492341995239, "step": 6949 }, { "epoch": 1.2841865782818076, "grad_norm": 0.08736100047826767, "learning_rate": 1.2781634181547671e-05, "loss": 0.6303153038024902, "step": 6950 }, { "epoch": 1.2843713549907034, "grad_norm": 0.07647480070590973, "learning_rate": 1.277971794268331e-05, "loss": 0.6002855896949768, "step": 6951 }, { "epoch": 1.2845561316995993, "grad_norm": 0.06566685438156128, "learning_rate": 1.2777801593194865e-05, "loss": 0.432452529668808, "step": 6952 }, { "epoch": 1.284740908408495, "grad_norm": 0.05242609977722168, "learning_rate": 1.2775885133158612e-05, "loss": 0.35408815741539, "step": 6953 }, { "epoch": 1.284925685117391, "grad_norm": 0.06851579248905182, "learning_rate": 1.2773968562650816e-05, "loss": 0.45086902379989624, "step": 6954 }, { "epoch": 1.2851104618262867, "grad_norm": 0.05527650564908981, "learning_rate": 1.277205188174775e-05, "loss": 0.3405957520008087, "step": 6955 }, { "epoch": 1.2852952385351826, "grad_norm": 0.06148513779044151, "learning_rate": 1.2770135090525683e-05, "loss": 0.44460922479629517, "step": 6956 }, { "epoch": 1.2854800152440786, "grad_norm": 0.06990844756364822, "learning_rate": 1.2768218189060915e-05, "loss": 0.4537244737148285, "step": 6957 }, { "epoch": 1.2856647919529745, "grad_norm": 0.07241354137659073, "learning_rate": 1.2766301177429722e-05, "loss": 0.6087204813957214, "step": 6958 }, { "epoch": 1.2858495686618703, "grad_norm": 0.06654617190361023, "learning_rate": 1.2764384055708394e-05, "loss": 0.44286319613456726, "step": 6959 }, { "epoch": 1.286034345370766, "grad_norm": 0.056730449199676514, "learning_rate": 1.2762466823973231e-05, "loss": 0.4290338456630707, "step": 6960 }, { "epoch": 1.286219122079662, "grad_norm": 0.07880526781082153, "learning_rate": 1.2760549482300535e-05, "loss": 0.5071108341217041, "step": 6961 }, { "epoch": 1.2864038987885578, "grad_norm": 0.06961959600448608, "learning_rate": 1.2758632030766603e-05, "loss": 0.5442366003990173, "step": 6962 }, { "epoch": 1.2865886754974536, "grad_norm": 0.06874290853738785, "learning_rate": 1.2756714469447744e-05, "loss": 0.4268724024295807, "step": 6963 }, { "epoch": 1.2867734522063494, "grad_norm": 0.08130981028079987, "learning_rate": 1.2754796798420279e-05, "loss": 0.5325379967689514, "step": 6964 }, { "epoch": 1.2869582289152453, "grad_norm": 0.05133212357759476, "learning_rate": 1.2752879017760516e-05, "loss": 0.34395477175712585, "step": 6965 }, { "epoch": 1.287143005624141, "grad_norm": 0.0714641585946083, "learning_rate": 1.2750961127544782e-05, "loss": 0.5277564525604248, "step": 6966 }, { "epoch": 1.287327782333037, "grad_norm": 0.067436084151268, "learning_rate": 1.27490431278494e-05, "loss": 0.38900041580200195, "step": 6967 }, { "epoch": 1.2875125590419327, "grad_norm": 0.07422590255737305, "learning_rate": 1.2747125018750708e-05, "loss": 0.45272085070610046, "step": 6968 }, { "epoch": 1.2876973357508286, "grad_norm": 0.07180160284042358, "learning_rate": 1.2745206800325029e-05, "loss": 0.5089162588119507, "step": 6969 }, { "epoch": 1.2878821124597244, "grad_norm": 0.0845794528722763, "learning_rate": 1.2743288472648709e-05, "loss": 0.626602292060852, "step": 6970 }, { "epoch": 1.2880668891686202, "grad_norm": 0.07468029856681824, "learning_rate": 1.2741370035798093e-05, "loss": 0.490026593208313, "step": 6971 }, { "epoch": 1.288251665877516, "grad_norm": 0.07268189638853073, "learning_rate": 1.2739451489849524e-05, "loss": 0.45294222235679626, "step": 6972 }, { "epoch": 1.2884364425864119, "grad_norm": 0.07588563859462738, "learning_rate": 1.2737532834879356e-05, "loss": 0.5010835528373718, "step": 6973 }, { "epoch": 1.2886212192953077, "grad_norm": 0.08507157117128372, "learning_rate": 1.2735614070963948e-05, "loss": 0.5612196922302246, "step": 6974 }, { "epoch": 1.2888059960042035, "grad_norm": 0.0746978223323822, "learning_rate": 1.273369519817966e-05, "loss": 0.5309698581695557, "step": 6975 }, { "epoch": 1.2889907727130996, "grad_norm": 0.07962056994438171, "learning_rate": 1.2731776216602849e-05, "loss": 0.5472279787063599, "step": 6976 }, { "epoch": 1.2891755494219954, "grad_norm": 0.06945842504501343, "learning_rate": 1.2729857126309898e-05, "loss": 0.4632667303085327, "step": 6977 }, { "epoch": 1.2893603261308912, "grad_norm": 0.06587348133325577, "learning_rate": 1.2727937927377172e-05, "loss": 0.3979934751987457, "step": 6978 }, { "epoch": 1.289545102839787, "grad_norm": 0.06981249898672104, "learning_rate": 1.272601861988105e-05, "loss": 0.5129185318946838, "step": 6979 }, { "epoch": 1.289729879548683, "grad_norm": 0.06999025493860245, "learning_rate": 1.2724099203897915e-05, "loss": 0.4692162573337555, "step": 6980 }, { "epoch": 1.2899146562575787, "grad_norm": 0.08427304029464722, "learning_rate": 1.2722179679504156e-05, "loss": 0.5699877738952637, "step": 6981 }, { "epoch": 1.2900994329664746, "grad_norm": 0.08691181987524033, "learning_rate": 1.2720260046776161e-05, "loss": 0.5394098162651062, "step": 6982 }, { "epoch": 1.2902842096753704, "grad_norm": 0.08646968007087708, "learning_rate": 1.2718340305790326e-05, "loss": 0.6010709404945374, "step": 6983 }, { "epoch": 1.2904689863842662, "grad_norm": 0.07945973426103592, "learning_rate": 1.2716420456623055e-05, "loss": 0.6645624041557312, "step": 6984 }, { "epoch": 1.290653763093162, "grad_norm": 0.07346010953187943, "learning_rate": 1.2714500499350746e-05, "loss": 0.46226420998573303, "step": 6985 }, { "epoch": 1.2908385398020579, "grad_norm": 0.08596549928188324, "learning_rate": 1.271258043404981e-05, "loss": 0.7146180272102356, "step": 6986 }, { "epoch": 1.291023316510954, "grad_norm": 0.07501712441444397, "learning_rate": 1.271066026079666e-05, "loss": 0.4995262622833252, "step": 6987 }, { "epoch": 1.2912080932198498, "grad_norm": 0.061273232102394104, "learning_rate": 1.2708739979667713e-05, "loss": 0.3552984595298767, "step": 6988 }, { "epoch": 1.2913928699287456, "grad_norm": 0.09160473197698593, "learning_rate": 1.2706819590739385e-05, "loss": 0.6788548827171326, "step": 6989 }, { "epoch": 1.2915776466376414, "grad_norm": 0.08938539773225784, "learning_rate": 1.2704899094088108e-05, "loss": 0.5572099089622498, "step": 6990 }, { "epoch": 1.2917624233465372, "grad_norm": 0.07936986535787582, "learning_rate": 1.2702978489790312e-05, "loss": 0.6609634160995483, "step": 6991 }, { "epoch": 1.291947200055433, "grad_norm": 0.07230542600154877, "learning_rate": 1.2701057777922428e-05, "loss": 0.5651715397834778, "step": 6992 }, { "epoch": 1.292131976764329, "grad_norm": 0.06436077505350113, "learning_rate": 1.2699136958560893e-05, "loss": 0.3970441520214081, "step": 6993 }, { "epoch": 1.2923167534732247, "grad_norm": 0.06204066798090935, "learning_rate": 1.2697216031782151e-05, "loss": 0.49848824739456177, "step": 6994 }, { "epoch": 1.2925015301821206, "grad_norm": 0.0783504843711853, "learning_rate": 1.269529499766265e-05, "loss": 0.5778496265411377, "step": 6995 }, { "epoch": 1.2926863068910164, "grad_norm": 0.06484290212392807, "learning_rate": 1.2693373856278843e-05, "loss": 0.4126026928424835, "step": 6996 }, { "epoch": 1.2928710835999122, "grad_norm": 0.08233322203159332, "learning_rate": 1.2691452607707182e-05, "loss": 0.6408974528312683, "step": 6997 }, { "epoch": 1.293055860308808, "grad_norm": 0.07067559659481049, "learning_rate": 1.2689531252024127e-05, "loss": 0.44265973567962646, "step": 6998 }, { "epoch": 1.2932406370177039, "grad_norm": 0.08672047406435013, "learning_rate": 1.2687609789306144e-05, "loss": 0.7062324285507202, "step": 6999 }, { "epoch": 1.2934254137265997, "grad_norm": 0.0679650530219078, "learning_rate": 1.2685688219629697e-05, "loss": 0.5295279622077942, "step": 7000 }, { "epoch": 1.2934254137265997, "eval_loss": 0.5948106646537781, "eval_runtime": 243.2682, "eval_samples_per_second": 74.934, "eval_steps_per_second": 9.368, "step": 7000 }, { "epoch": 1.2936101904354955, "grad_norm": 0.07759220898151398, "learning_rate": 1.2683766543071263e-05, "loss": 0.570826530456543, "step": 7001 }, { "epoch": 1.2937949671443914, "grad_norm": 0.07804378122091293, "learning_rate": 1.2681844759707316e-05, "loss": 0.47277188301086426, "step": 7002 }, { "epoch": 1.2939797438532872, "grad_norm": 0.07858088612556458, "learning_rate": 1.2679922869614341e-05, "loss": 0.5211628079414368, "step": 7003 }, { "epoch": 1.294164520562183, "grad_norm": 0.06452016532421112, "learning_rate": 1.2678000872868817e-05, "loss": 0.4292004108428955, "step": 7004 }, { "epoch": 1.294349297271079, "grad_norm": 0.08996476233005524, "learning_rate": 1.2676078769547238e-05, "loss": 0.6236442923545837, "step": 7005 }, { "epoch": 1.2945340739799749, "grad_norm": 0.0958971306681633, "learning_rate": 1.2674156559726096e-05, "loss": 0.6066073775291443, "step": 7006 }, { "epoch": 1.2947188506888707, "grad_norm": 0.06967156380414963, "learning_rate": 1.2672234243481889e-05, "loss": 0.4723014533519745, "step": 7007 }, { "epoch": 1.2949036273977665, "grad_norm": 0.09893563389778137, "learning_rate": 1.2670311820891122e-05, "loss": 0.7446179389953613, "step": 7008 }, { "epoch": 1.2950884041066624, "grad_norm": 0.10600033402442932, "learning_rate": 1.2668389292030296e-05, "loss": 0.7669276595115662, "step": 7009 }, { "epoch": 1.2952731808155582, "grad_norm": 0.06642530858516693, "learning_rate": 1.2666466656975927e-05, "loss": 0.41287514567375183, "step": 7010 }, { "epoch": 1.295457957524454, "grad_norm": 0.09201806038618088, "learning_rate": 1.2664543915804524e-05, "loss": 0.4747542142868042, "step": 7011 }, { "epoch": 1.2956427342333499, "grad_norm": 0.07357966899871826, "learning_rate": 1.2662621068592608e-05, "loss": 0.5911757946014404, "step": 7012 }, { "epoch": 1.2958275109422457, "grad_norm": 0.08051083981990814, "learning_rate": 1.266069811541671e-05, "loss": 0.6123976111412048, "step": 7013 }, { "epoch": 1.2960122876511415, "grad_norm": 0.07193154096603394, "learning_rate": 1.2658775056353347e-05, "loss": 0.5236746072769165, "step": 7014 }, { "epoch": 1.2961970643600373, "grad_norm": 0.06868410855531693, "learning_rate": 1.2656851891479055e-05, "loss": 0.467146635055542, "step": 7015 }, { "epoch": 1.2963818410689334, "grad_norm": 0.06204485893249512, "learning_rate": 1.2654928620870373e-05, "loss": 0.4814089238643646, "step": 7016 }, { "epoch": 1.2965666177778292, "grad_norm": 0.07624665647745132, "learning_rate": 1.2653005244603836e-05, "loss": 0.5406903624534607, "step": 7017 }, { "epoch": 1.296751394486725, "grad_norm": 0.07900479435920715, "learning_rate": 1.2651081762755991e-05, "loss": 0.6114708185195923, "step": 7018 }, { "epoch": 1.2969361711956209, "grad_norm": 0.06851568073034286, "learning_rate": 1.2649158175403384e-05, "loss": 0.41163602471351624, "step": 7019 }, { "epoch": 1.2971209479045167, "grad_norm": 0.08029831945896149, "learning_rate": 1.2647234482622573e-05, "loss": 0.5897607207298279, "step": 7020 }, { "epoch": 1.2973057246134125, "grad_norm": 0.05459596589207649, "learning_rate": 1.2645310684490108e-05, "loss": 0.37470948696136475, "step": 7021 }, { "epoch": 1.2974905013223084, "grad_norm": 0.09061688184738159, "learning_rate": 1.2643386781082555e-05, "loss": 0.5633164048194885, "step": 7022 }, { "epoch": 1.2976752780312042, "grad_norm": 0.08323931694030762, "learning_rate": 1.264146277247648e-05, "loss": 0.5104799866676331, "step": 7023 }, { "epoch": 1.2978600547401, "grad_norm": 0.07911505550146103, "learning_rate": 1.2639538658748449e-05, "loss": 0.6013384461402893, "step": 7024 }, { "epoch": 1.2980448314489959, "grad_norm": 0.09018902480602264, "learning_rate": 1.263761443997504e-05, "loss": 0.5095791220664978, "step": 7025 }, { "epoch": 1.2982296081578917, "grad_norm": 0.07577616721391678, "learning_rate": 1.2635690116232827e-05, "loss": 0.587360143661499, "step": 7026 }, { "epoch": 1.2984143848667875, "grad_norm": 0.07888596504926682, "learning_rate": 1.2633765687598394e-05, "loss": 0.6399943232536316, "step": 7027 }, { "epoch": 1.2985991615756833, "grad_norm": 0.09185665100812912, "learning_rate": 1.2631841154148323e-05, "loss": 0.7375509142875671, "step": 7028 }, { "epoch": 1.2987839382845792, "grad_norm": 0.07888969779014587, "learning_rate": 1.2629916515959211e-05, "loss": 0.4613966643810272, "step": 7029 }, { "epoch": 1.298968714993475, "grad_norm": 0.07805892080068588, "learning_rate": 1.2627991773107651e-05, "loss": 0.482734352350235, "step": 7030 }, { "epoch": 1.2991534917023708, "grad_norm": 0.05919409543275833, "learning_rate": 1.2626066925670237e-05, "loss": 0.35460159182548523, "step": 7031 }, { "epoch": 1.2993382684112667, "grad_norm": 0.07844141125679016, "learning_rate": 1.2624141973723576e-05, "loss": 0.5155153274536133, "step": 7032 }, { "epoch": 1.2995230451201625, "grad_norm": 0.0717112123966217, "learning_rate": 1.2622216917344276e-05, "loss": 0.38670143485069275, "step": 7033 }, { "epoch": 1.2997078218290585, "grad_norm": 0.07971569150686264, "learning_rate": 1.2620291756608948e-05, "loss": 0.5672218799591064, "step": 7034 }, { "epoch": 1.2998925985379544, "grad_norm": 0.07753638923168182, "learning_rate": 1.26183664915942e-05, "loss": 0.5556024312973022, "step": 7035 }, { "epoch": 1.3000773752468502, "grad_norm": 0.06535354256629944, "learning_rate": 1.2616441122376664e-05, "loss": 0.519101083278656, "step": 7036 }, { "epoch": 1.300262151955746, "grad_norm": 0.09425181895494461, "learning_rate": 1.2614515649032955e-05, "loss": 0.7004117369651794, "step": 7037 }, { "epoch": 1.3004469286646418, "grad_norm": 0.08313268423080444, "learning_rate": 1.2612590071639702e-05, "loss": 0.5423683524131775, "step": 7038 }, { "epoch": 1.3006317053735377, "grad_norm": 0.08191510289907455, "learning_rate": 1.2610664390273537e-05, "loss": 0.6264122128486633, "step": 7039 }, { "epoch": 1.3008164820824335, "grad_norm": 0.0811074897646904, "learning_rate": 1.26087386050111e-05, "loss": 0.6357519030570984, "step": 7040 }, { "epoch": 1.3010012587913293, "grad_norm": 0.07255978882312775, "learning_rate": 1.2606812715929024e-05, "loss": 0.43697232007980347, "step": 7041 }, { "epoch": 1.3011860355002252, "grad_norm": 0.07301337271928787, "learning_rate": 1.260488672310396e-05, "loss": 0.4118881821632385, "step": 7042 }, { "epoch": 1.301370812209121, "grad_norm": 0.09116517752408981, "learning_rate": 1.2602960626612555e-05, "loss": 0.7510353922843933, "step": 7043 }, { "epoch": 1.3015555889180168, "grad_norm": 0.08381814509630203, "learning_rate": 1.260103442653146e-05, "loss": 0.47766539454460144, "step": 7044 }, { "epoch": 1.3017403656269129, "grad_norm": 0.08243940770626068, "learning_rate": 1.259910812293733e-05, "loss": 0.7016247510910034, "step": 7045 }, { "epoch": 1.3019251423358087, "grad_norm": 0.06611216813325882, "learning_rate": 1.259718171590683e-05, "loss": 0.4857097566127777, "step": 7046 }, { "epoch": 1.3021099190447045, "grad_norm": 0.07947178184986115, "learning_rate": 1.2595255205516625e-05, "loss": 0.558712899684906, "step": 7047 }, { "epoch": 1.3022946957536004, "grad_norm": 0.09471435844898224, "learning_rate": 1.259332859184338e-05, "loss": 0.7513187527656555, "step": 7048 }, { "epoch": 1.3024794724624962, "grad_norm": 0.06267374753952026, "learning_rate": 1.2591401874963771e-05, "loss": 0.45277971029281616, "step": 7049 }, { "epoch": 1.302664249171392, "grad_norm": 0.06743251532316208, "learning_rate": 1.2589475054954476e-05, "loss": 0.4124307334423065, "step": 7050 }, { "epoch": 1.3028490258802878, "grad_norm": 0.086782306432724, "learning_rate": 1.2587548131892175e-05, "loss": 0.6340530514717102, "step": 7051 }, { "epoch": 1.3030338025891837, "grad_norm": 0.08213392645120621, "learning_rate": 1.2585621105853551e-05, "loss": 0.558947741985321, "step": 7052 }, { "epoch": 1.3032185792980795, "grad_norm": 0.0744403526186943, "learning_rate": 1.2583693976915301e-05, "loss": 0.5660160779953003, "step": 7053 }, { "epoch": 1.3034033560069753, "grad_norm": 0.07753294706344604, "learning_rate": 1.2581766745154114e-05, "loss": 0.5865622758865356, "step": 7054 }, { "epoch": 1.3035881327158712, "grad_norm": 0.054875295609235764, "learning_rate": 1.2579839410646682e-05, "loss": 0.40499284863471985, "step": 7055 }, { "epoch": 1.303772909424767, "grad_norm": 0.07428351789712906, "learning_rate": 1.2577911973469717e-05, "loss": 0.6527612805366516, "step": 7056 }, { "epoch": 1.3039576861336628, "grad_norm": 0.09750670939683914, "learning_rate": 1.2575984433699921e-05, "loss": 0.6454939246177673, "step": 7057 }, { "epoch": 1.3041424628425586, "grad_norm": 0.06648306548595428, "learning_rate": 1.2574056791414003e-05, "loss": 0.46796974539756775, "step": 7058 }, { "epoch": 1.3043272395514545, "grad_norm": 0.08090738207101822, "learning_rate": 1.2572129046688675e-05, "loss": 0.5984477400779724, "step": 7059 }, { "epoch": 1.3045120162603503, "grad_norm": 0.06808750331401825, "learning_rate": 1.2570201199600663e-05, "loss": 0.42916426062583923, "step": 7060 }, { "epoch": 1.3046967929692461, "grad_norm": 0.08575625717639923, "learning_rate": 1.2568273250226681e-05, "loss": 0.6604623198509216, "step": 7061 }, { "epoch": 1.304881569678142, "grad_norm": 0.08025676012039185, "learning_rate": 1.256634519864346e-05, "loss": 0.7682700753211975, "step": 7062 }, { "epoch": 1.3050663463870378, "grad_norm": 0.06096980720758438, "learning_rate": 1.2564417044927728e-05, "loss": 0.41423553228378296, "step": 7063 }, { "epoch": 1.3052511230959338, "grad_norm": 0.06407662481069565, "learning_rate": 1.2562488789156224e-05, "loss": 0.43208426237106323, "step": 7064 }, { "epoch": 1.3054358998048297, "grad_norm": 0.06232353299856186, "learning_rate": 1.2560560431405678e-05, "loss": 0.3925876319408417, "step": 7065 }, { "epoch": 1.3056206765137255, "grad_norm": 0.07849103957414627, "learning_rate": 1.2558631971752842e-05, "loss": 0.7047256231307983, "step": 7066 }, { "epoch": 1.3058054532226213, "grad_norm": 0.09758805483579636, "learning_rate": 1.2556703410274458e-05, "loss": 0.6756837964057922, "step": 7067 }, { "epoch": 1.3059902299315171, "grad_norm": 0.08068440109491348, "learning_rate": 1.2554774747047275e-05, "loss": 0.654943585395813, "step": 7068 }, { "epoch": 1.306175006640413, "grad_norm": 0.08406716585159302, "learning_rate": 1.2552845982148049e-05, "loss": 0.6230036616325378, "step": 7069 }, { "epoch": 1.3063597833493088, "grad_norm": 0.06222580000758171, "learning_rate": 1.2550917115653545e-05, "loss": 0.4030933976173401, "step": 7070 }, { "epoch": 1.3065445600582046, "grad_norm": 0.08229056000709534, "learning_rate": 1.2548988147640518e-05, "loss": 0.5132369995117188, "step": 7071 }, { "epoch": 1.3067293367671005, "grad_norm": 0.08437987416982651, "learning_rate": 1.2547059078185735e-05, "loss": 0.4772125482559204, "step": 7072 }, { "epoch": 1.3069141134759963, "grad_norm": 0.062136292457580566, "learning_rate": 1.2545129907365973e-05, "loss": 0.4129381775856018, "step": 7073 }, { "epoch": 1.3070988901848921, "grad_norm": 0.05574364960193634, "learning_rate": 1.2543200635258002e-05, "loss": 0.38210901618003845, "step": 7074 }, { "epoch": 1.3072836668937882, "grad_norm": 0.06634816527366638, "learning_rate": 1.2541271261938603e-05, "loss": 0.4745114743709564, "step": 7075 }, { "epoch": 1.307468443602684, "grad_norm": 0.06524790078401566, "learning_rate": 1.2539341787484555e-05, "loss": 0.46080282330513, "step": 7076 }, { "epoch": 1.3076532203115798, "grad_norm": 0.06377245485782623, "learning_rate": 1.2537412211972652e-05, "loss": 0.44798263907432556, "step": 7077 }, { "epoch": 1.3078379970204757, "grad_norm": 0.07370581477880478, "learning_rate": 1.253548253547968e-05, "loss": 0.46238815784454346, "step": 7078 }, { "epoch": 1.3080227737293715, "grad_norm": 0.06885351985692978, "learning_rate": 1.2533552758082435e-05, "loss": 0.4642230272293091, "step": 7079 }, { "epoch": 1.3082075504382673, "grad_norm": 0.08454688638448715, "learning_rate": 1.253162287985772e-05, "loss": 0.6509603261947632, "step": 7080 }, { "epoch": 1.3083923271471631, "grad_norm": 0.0632704496383667, "learning_rate": 1.2529692900882331e-05, "loss": 0.43099644780158997, "step": 7081 }, { "epoch": 1.308577103856059, "grad_norm": 0.08630534261465073, "learning_rate": 1.2527762821233083e-05, "loss": 0.7119595408439636, "step": 7082 }, { "epoch": 1.3087618805649548, "grad_norm": 0.07517912983894348, "learning_rate": 1.252583264098678e-05, "loss": 0.47247129678726196, "step": 7083 }, { "epoch": 1.3089466572738506, "grad_norm": 0.07358289510011673, "learning_rate": 1.2523902360220242e-05, "loss": 0.5356598496437073, "step": 7084 }, { "epoch": 1.3091314339827465, "grad_norm": 0.07740814238786697, "learning_rate": 1.2521971979010283e-05, "loss": 0.5637345314025879, "step": 7085 }, { "epoch": 1.3093162106916423, "grad_norm": 0.07030104100704193, "learning_rate": 1.2520041497433733e-05, "loss": 0.5204507112503052, "step": 7086 }, { "epoch": 1.309500987400538, "grad_norm": 0.08233232796192169, "learning_rate": 1.2518110915567413e-05, "loss": 0.5168277621269226, "step": 7087 }, { "epoch": 1.309685764109434, "grad_norm": 0.0762810930609703, "learning_rate": 1.2516180233488158e-05, "loss": 0.5626890063285828, "step": 7088 }, { "epoch": 1.3098705408183298, "grad_norm": 0.08059023320674896, "learning_rate": 1.2514249451272802e-05, "loss": 0.65793377161026, "step": 7089 }, { "epoch": 1.3100553175272256, "grad_norm": 0.06713052093982697, "learning_rate": 1.2512318568998185e-05, "loss": 0.5770498514175415, "step": 7090 }, { "epoch": 1.3102400942361214, "grad_norm": 0.059266820549964905, "learning_rate": 1.2510387586741146e-05, "loss": 0.503598690032959, "step": 7091 }, { "epoch": 1.3104248709450173, "grad_norm": 0.07634708285331726, "learning_rate": 1.2508456504578538e-05, "loss": 0.5503666996955872, "step": 7092 }, { "epoch": 1.3106096476539133, "grad_norm": 0.05500860884785652, "learning_rate": 1.2506525322587207e-05, "loss": 0.3018694818019867, "step": 7093 }, { "epoch": 1.3107944243628091, "grad_norm": 0.0848320797085762, "learning_rate": 1.250459404084401e-05, "loss": 0.5146724581718445, "step": 7094 }, { "epoch": 1.310979201071705, "grad_norm": 0.06465902179479599, "learning_rate": 1.2502662659425808e-05, "loss": 0.3964228928089142, "step": 7095 }, { "epoch": 1.3111639777806008, "grad_norm": 0.08403968065977097, "learning_rate": 1.250073117840946e-05, "loss": 0.5929785966873169, "step": 7096 }, { "epoch": 1.3113487544894966, "grad_norm": 0.06558596342802048, "learning_rate": 1.2498799597871836e-05, "loss": 0.3913648724555969, "step": 7097 }, { "epoch": 1.3115335311983924, "grad_norm": 0.07862965017557144, "learning_rate": 1.2496867917889805e-05, "loss": 0.5325332283973694, "step": 7098 }, { "epoch": 1.3117183079072883, "grad_norm": 0.06948400288820267, "learning_rate": 1.2494936138540246e-05, "loss": 0.34856298565864563, "step": 7099 }, { "epoch": 1.311903084616184, "grad_norm": 0.06603899598121643, "learning_rate": 1.249300425990003e-05, "loss": 0.3758459985256195, "step": 7100 }, { "epoch": 1.31208786132508, "grad_norm": 0.08364688605070114, "learning_rate": 1.2491072282046044e-05, "loss": 0.6271767616271973, "step": 7101 }, { "epoch": 1.3122726380339758, "grad_norm": 0.08045737445354462, "learning_rate": 1.2489140205055177e-05, "loss": 0.541724443435669, "step": 7102 }, { "epoch": 1.3124574147428716, "grad_norm": 0.05574335902929306, "learning_rate": 1.2487208029004315e-05, "loss": 0.30128908157348633, "step": 7103 }, { "epoch": 1.3126421914517676, "grad_norm": 0.07106778025627136, "learning_rate": 1.2485275753970358e-05, "loss": 0.5022606253623962, "step": 7104 }, { "epoch": 1.3128269681606635, "grad_norm": 0.07627978175878525, "learning_rate": 1.2483343380030199e-05, "loss": 0.6493616700172424, "step": 7105 }, { "epoch": 1.3130117448695593, "grad_norm": 0.08818231523036957, "learning_rate": 1.2481410907260745e-05, "loss": 0.6240003705024719, "step": 7106 }, { "epoch": 1.3131965215784551, "grad_norm": 0.08052614331245422, "learning_rate": 1.24794783357389e-05, "loss": 0.5767407417297363, "step": 7107 }, { "epoch": 1.313381298287351, "grad_norm": 0.08588764816522598, "learning_rate": 1.2477545665541573e-05, "loss": 0.6063689589500427, "step": 7108 }, { "epoch": 1.3135660749962468, "grad_norm": 0.05303497985005379, "learning_rate": 1.2475612896745681e-05, "loss": 0.32240840792655945, "step": 7109 }, { "epoch": 1.3137508517051426, "grad_norm": 0.07196793705224991, "learning_rate": 1.247368002942814e-05, "loss": 0.4601721465587616, "step": 7110 }, { "epoch": 1.3139356284140384, "grad_norm": 0.08057314157485962, "learning_rate": 1.2471747063665871e-05, "loss": 0.6230965852737427, "step": 7111 }, { "epoch": 1.3141204051229343, "grad_norm": 0.08216112852096558, "learning_rate": 1.2469813999535804e-05, "loss": 0.5606250762939453, "step": 7112 }, { "epoch": 1.31430518183183, "grad_norm": 0.06786283105611801, "learning_rate": 1.2467880837114867e-05, "loss": 0.35154303908348083, "step": 7113 }, { "epoch": 1.314489958540726, "grad_norm": 0.06885476410388947, "learning_rate": 1.2465947576479996e-05, "loss": 0.504109263420105, "step": 7114 }, { "epoch": 1.3146747352496218, "grad_norm": 0.07210491597652435, "learning_rate": 1.2464014217708123e-05, "loss": 0.4459759294986725, "step": 7115 }, { "epoch": 1.3148595119585176, "grad_norm": 0.08211330324411392, "learning_rate": 1.2462080760876196e-05, "loss": 0.602215051651001, "step": 7116 }, { "epoch": 1.3150442886674134, "grad_norm": 0.07126942276954651, "learning_rate": 1.2460147206061156e-05, "loss": 0.4325295090675354, "step": 7117 }, { "epoch": 1.3152290653763092, "grad_norm": 0.07953279465436935, "learning_rate": 1.245821355333995e-05, "loss": 0.5518671870231628, "step": 7118 }, { "epoch": 1.315413842085205, "grad_norm": 0.08978675305843353, "learning_rate": 1.2456279802789542e-05, "loss": 0.58292555809021, "step": 7119 }, { "epoch": 1.315598618794101, "grad_norm": 0.08348673582077026, "learning_rate": 1.2454345954486878e-05, "loss": 0.5909175276756287, "step": 7120 }, { "epoch": 1.3157833955029967, "grad_norm": 0.08878988772630692, "learning_rate": 1.2452412008508924e-05, "loss": 0.6513380408287048, "step": 7121 }, { "epoch": 1.3159681722118928, "grad_norm": 0.06557399779558182, "learning_rate": 1.2450477964932648e-05, "loss": 0.4970392882823944, "step": 7122 }, { "epoch": 1.3161529489207886, "grad_norm": 0.08615729212760925, "learning_rate": 1.2448543823835016e-05, "loss": 0.7483137845993042, "step": 7123 }, { "epoch": 1.3163377256296844, "grad_norm": 0.0829634815454483, "learning_rate": 1.2446609585292997e-05, "loss": 0.6574590802192688, "step": 7124 }, { "epoch": 1.3165225023385803, "grad_norm": 0.07868000119924545, "learning_rate": 1.244467524938357e-05, "loss": 0.5516237020492554, "step": 7125 }, { "epoch": 1.316707279047476, "grad_norm": 0.07573801279067993, "learning_rate": 1.244274081618372e-05, "loss": 0.5354952216148376, "step": 7126 }, { "epoch": 1.316892055756372, "grad_norm": 0.06890484690666199, "learning_rate": 1.2440806285770427e-05, "loss": 0.5610262155532837, "step": 7127 }, { "epoch": 1.3170768324652677, "grad_norm": 0.08226225525140762, "learning_rate": 1.2438871658220677e-05, "loss": 0.5836856961250305, "step": 7128 }, { "epoch": 1.3172616091741636, "grad_norm": 0.09046713262796402, "learning_rate": 1.2436936933611467e-05, "loss": 0.5806461572647095, "step": 7129 }, { "epoch": 1.3174463858830594, "grad_norm": 0.08166642487049103, "learning_rate": 1.2435002112019791e-05, "loss": 0.48745235800743103, "step": 7130 }, { "epoch": 1.3176311625919552, "grad_norm": 0.06724494695663452, "learning_rate": 1.243306719352265e-05, "loss": 0.38586583733558655, "step": 7131 }, { "epoch": 1.317815939300851, "grad_norm": 0.08288092166185379, "learning_rate": 1.2431132178197048e-05, "loss": 0.5428813099861145, "step": 7132 }, { "epoch": 1.318000716009747, "grad_norm": 0.07065463811159134, "learning_rate": 1.2429197066119991e-05, "loss": 0.4732227027416229, "step": 7133 }, { "epoch": 1.318185492718643, "grad_norm": 0.0860670879483223, "learning_rate": 1.242726185736849e-05, "loss": 0.7225802540779114, "step": 7134 }, { "epoch": 1.3183702694275388, "grad_norm": 0.10098420083522797, "learning_rate": 1.2425326552019558e-05, "loss": 0.735201358795166, "step": 7135 }, { "epoch": 1.3185550461364346, "grad_norm": 0.06383946537971497, "learning_rate": 1.2423391150150223e-05, "loss": 0.4172554016113281, "step": 7136 }, { "epoch": 1.3187398228453304, "grad_norm": 0.07630904018878937, "learning_rate": 1.2421455651837498e-05, "loss": 0.5034219622612, "step": 7137 }, { "epoch": 1.3189245995542263, "grad_norm": 0.0586567223072052, "learning_rate": 1.2419520057158413e-05, "loss": 0.3733353912830353, "step": 7138 }, { "epoch": 1.319109376263122, "grad_norm": 0.0758417621254921, "learning_rate": 1.2417584366190003e-05, "loss": 0.5236166715621948, "step": 7139 }, { "epoch": 1.319294152972018, "grad_norm": 0.09308257699012756, "learning_rate": 1.2415648579009298e-05, "loss": 0.7132977247238159, "step": 7140 }, { "epoch": 1.3194789296809137, "grad_norm": 0.06762132048606873, "learning_rate": 1.2413712695693334e-05, "loss": 0.4622902274131775, "step": 7141 }, { "epoch": 1.3196637063898096, "grad_norm": 0.07987305521965027, "learning_rate": 1.241177671631916e-05, "loss": 0.531873345375061, "step": 7142 }, { "epoch": 1.3198484830987054, "grad_norm": 0.07383070886135101, "learning_rate": 1.2409840640963818e-05, "loss": 0.46143603324890137, "step": 7143 }, { "epoch": 1.3200332598076012, "grad_norm": 0.08451993018388748, "learning_rate": 1.2407904469704355e-05, "loss": 0.4642788767814636, "step": 7144 }, { "epoch": 1.320218036516497, "grad_norm": 0.07359858602285385, "learning_rate": 1.2405968202617828e-05, "loss": 0.4435389041900635, "step": 7145 }, { "epoch": 1.3204028132253929, "grad_norm": 0.08350193500518799, "learning_rate": 1.2404031839781297e-05, "loss": 0.5778533816337585, "step": 7146 }, { "epoch": 1.3205875899342887, "grad_norm": 0.09533848613500595, "learning_rate": 1.2402095381271817e-05, "loss": 0.7368243336677551, "step": 7147 }, { "epoch": 1.3207723666431845, "grad_norm": 0.08333290368318558, "learning_rate": 1.2400158827166456e-05, "loss": 0.4749222695827484, "step": 7148 }, { "epoch": 1.3209571433520804, "grad_norm": 0.07425980269908905, "learning_rate": 1.2398222177542284e-05, "loss": 0.4216611087322235, "step": 7149 }, { "epoch": 1.3211419200609762, "grad_norm": 0.07611807435750961, "learning_rate": 1.2396285432476374e-05, "loss": 0.472459077835083, "step": 7150 }, { "epoch": 1.321326696769872, "grad_norm": 0.07201111316680908, "learning_rate": 1.2394348592045797e-05, "loss": 0.37915852665901184, "step": 7151 }, { "epoch": 1.321511473478768, "grad_norm": 0.0910440981388092, "learning_rate": 1.2392411656327638e-05, "loss": 0.6684134602546692, "step": 7152 }, { "epoch": 1.321696250187664, "grad_norm": 0.08137714117765427, "learning_rate": 1.2390474625398982e-05, "loss": 0.5427489876747131, "step": 7153 }, { "epoch": 1.3218810268965597, "grad_norm": 0.07152608782052994, "learning_rate": 1.2388537499336915e-05, "loss": 0.49878379702568054, "step": 7154 }, { "epoch": 1.3220658036054556, "grad_norm": 0.07559207826852798, "learning_rate": 1.2386600278218527e-05, "loss": 0.5141193866729736, "step": 7155 }, { "epoch": 1.3222505803143514, "grad_norm": 0.06035945564508438, "learning_rate": 1.2384662962120914e-05, "loss": 0.34767431020736694, "step": 7156 }, { "epoch": 1.3224353570232472, "grad_norm": 0.08908118307590485, "learning_rate": 1.2382725551121175e-05, "loss": 0.5584812164306641, "step": 7157 }, { "epoch": 1.322620133732143, "grad_norm": 0.0691198781132698, "learning_rate": 1.2380788045296414e-05, "loss": 0.36187484860420227, "step": 7158 }, { "epoch": 1.3228049104410389, "grad_norm": 0.07853727042675018, "learning_rate": 1.237885044472374e-05, "loss": 0.4802594780921936, "step": 7159 }, { "epoch": 1.3229896871499347, "grad_norm": 0.08320342749357224, "learning_rate": 1.237691274948026e-05, "loss": 0.5174792408943176, "step": 7160 }, { "epoch": 1.3231744638588305, "grad_norm": 0.07984607666730881, "learning_rate": 1.2374974959643087e-05, "loss": 0.65461665391922, "step": 7161 }, { "epoch": 1.3233592405677264, "grad_norm": 0.09089548885822296, "learning_rate": 1.2373037075289343e-05, "loss": 0.6292493343353271, "step": 7162 }, { "epoch": 1.3235440172766224, "grad_norm": 0.08720018714666367, "learning_rate": 1.2371099096496146e-05, "loss": 0.6128824353218079, "step": 7163 }, { "epoch": 1.3237287939855182, "grad_norm": 0.07171821594238281, "learning_rate": 1.2369161023340623e-05, "loss": 0.5503191351890564, "step": 7164 }, { "epoch": 1.323913570694414, "grad_norm": 0.08616384863853455, "learning_rate": 1.2367222855899906e-05, "loss": 0.6394333243370056, "step": 7165 }, { "epoch": 1.32409834740331, "grad_norm": 0.0766521617770195, "learning_rate": 1.2365284594251124e-05, "loss": 0.4956338703632355, "step": 7166 }, { "epoch": 1.3242831241122057, "grad_norm": 0.0780312716960907, "learning_rate": 1.2363346238471415e-05, "loss": 0.48946505784988403, "step": 7167 }, { "epoch": 1.3244679008211016, "grad_norm": 0.07764456421136856, "learning_rate": 1.2361407788637917e-05, "loss": 0.5881205797195435, "step": 7168 }, { "epoch": 1.3246526775299974, "grad_norm": 0.07844637334346771, "learning_rate": 1.2359469244827781e-05, "loss": 0.5487515926361084, "step": 7169 }, { "epoch": 1.3248374542388932, "grad_norm": 0.08375510573387146, "learning_rate": 1.2357530607118151e-05, "loss": 0.5522754788398743, "step": 7170 }, { "epoch": 1.325022230947789, "grad_norm": 0.0740007609128952, "learning_rate": 1.2355591875586175e-05, "loss": 0.46284234523773193, "step": 7171 }, { "epoch": 1.3252070076566849, "grad_norm": 0.08384130150079727, "learning_rate": 1.2353653050309013e-05, "loss": 0.48505058884620667, "step": 7172 }, { "epoch": 1.3253917843655807, "grad_norm": 0.07120472937822342, "learning_rate": 1.2351714131363828e-05, "loss": 0.6011337637901306, "step": 7173 }, { "epoch": 1.3255765610744765, "grad_norm": 0.06549742817878723, "learning_rate": 1.2349775118827772e-05, "loss": 0.43408912420272827, "step": 7174 }, { "epoch": 1.3257613377833724, "grad_norm": 0.07499620318412781, "learning_rate": 1.2347836012778021e-05, "loss": 0.4794289767742157, "step": 7175 }, { "epoch": 1.3259461144922682, "grad_norm": 0.06897576153278351, "learning_rate": 1.2345896813291743e-05, "loss": 0.4830062687397003, "step": 7176 }, { "epoch": 1.326130891201164, "grad_norm": 0.062312051653862, "learning_rate": 1.2343957520446106e-05, "loss": 0.3550172746181488, "step": 7177 }, { "epoch": 1.3263156679100598, "grad_norm": 0.05161038041114807, "learning_rate": 1.2342018134318296e-05, "loss": 0.33226051926612854, "step": 7178 }, { "epoch": 1.3265004446189557, "grad_norm": 0.08036305010318756, "learning_rate": 1.2340078654985495e-05, "loss": 0.5838134288787842, "step": 7179 }, { "epoch": 1.3266852213278515, "grad_norm": 0.05553985759615898, "learning_rate": 1.2338139082524883e-05, "loss": 0.3583305776119232, "step": 7180 }, { "epoch": 1.3268699980367475, "grad_norm": 0.0874638631939888, "learning_rate": 1.2336199417013649e-05, "loss": 0.624904990196228, "step": 7181 }, { "epoch": 1.3270547747456434, "grad_norm": 0.10083182156085968, "learning_rate": 1.2334259658528985e-05, "loss": 0.6989080905914307, "step": 7182 }, { "epoch": 1.3272395514545392, "grad_norm": 0.0915534570813179, "learning_rate": 1.2332319807148094e-05, "loss": 0.7326936721801758, "step": 7183 }, { "epoch": 1.327424328163435, "grad_norm": 0.06375181674957275, "learning_rate": 1.2330379862948167e-05, "loss": 0.47836050391197205, "step": 7184 }, { "epoch": 1.3276091048723309, "grad_norm": 0.06903169304132462, "learning_rate": 1.2328439826006415e-05, "loss": 0.5004631876945496, "step": 7185 }, { "epoch": 1.3277938815812267, "grad_norm": 0.07927877455949783, "learning_rate": 1.2326499696400042e-05, "loss": 0.6567851901054382, "step": 7186 }, { "epoch": 1.3279786582901225, "grad_norm": 0.08236057311296463, "learning_rate": 1.2324559474206261e-05, "loss": 0.549164354801178, "step": 7187 }, { "epoch": 1.3281634349990183, "grad_norm": 0.1732921451330185, "learning_rate": 1.2322619159502287e-05, "loss": 0.6361644864082336, "step": 7188 }, { "epoch": 1.3283482117079142, "grad_norm": 0.08105883747339249, "learning_rate": 1.2320678752365333e-05, "loss": 0.5528790950775146, "step": 7189 }, { "epoch": 1.32853298841681, "grad_norm": 0.09747636318206787, "learning_rate": 1.2318738252872628e-05, "loss": 0.7589352130889893, "step": 7190 }, { "epoch": 1.3287177651257058, "grad_norm": 0.0843818187713623, "learning_rate": 1.2316797661101394e-05, "loss": 0.6057540774345398, "step": 7191 }, { "epoch": 1.3289025418346019, "grad_norm": 0.07819300144910812, "learning_rate": 1.2314856977128859e-05, "loss": 0.440432608127594, "step": 7192 }, { "epoch": 1.3290873185434977, "grad_norm": 0.0857505276799202, "learning_rate": 1.2312916201032263e-05, "loss": 0.5792015194892883, "step": 7193 }, { "epoch": 1.3292720952523935, "grad_norm": 0.07800312340259552, "learning_rate": 1.2310975332888837e-05, "loss": 0.4814336597919464, "step": 7194 }, { "epoch": 1.3294568719612894, "grad_norm": 0.05761153995990753, "learning_rate": 1.230903437277582e-05, "loss": 0.3713530898094177, "step": 7195 }, { "epoch": 1.3296416486701852, "grad_norm": 0.0807412788271904, "learning_rate": 1.2307093320770463e-05, "loss": 0.5634688138961792, "step": 7196 }, { "epoch": 1.329826425379081, "grad_norm": 0.0661948099732399, "learning_rate": 1.2305152176950008e-05, "loss": 0.5103600025177002, "step": 7197 }, { "epoch": 1.3300112020879769, "grad_norm": 0.08528309315443039, "learning_rate": 1.2303210941391708e-05, "loss": 0.6124686598777771, "step": 7198 }, { "epoch": 1.3301959787968727, "grad_norm": 0.07087121158838272, "learning_rate": 1.230126961417282e-05, "loss": 0.6103671789169312, "step": 7199 }, { "epoch": 1.3303807555057685, "grad_norm": 0.07148048281669617, "learning_rate": 1.22993281953706e-05, "loss": 0.47916725277900696, "step": 7200 }, { "epoch": 1.3305655322146643, "grad_norm": 0.06045059859752655, "learning_rate": 1.229738668506231e-05, "loss": 0.3602880537509918, "step": 7201 }, { "epoch": 1.3307503089235602, "grad_norm": 0.08225797116756439, "learning_rate": 1.2295445083325217e-05, "loss": 0.5574508309364319, "step": 7202 }, { "epoch": 1.330935085632456, "grad_norm": 0.0703793317079544, "learning_rate": 1.2293503390236595e-05, "loss": 0.46736153960227966, "step": 7203 }, { "epoch": 1.3311198623413518, "grad_norm": 0.06916027516126633, "learning_rate": 1.229156160587371e-05, "loss": 0.5249672532081604, "step": 7204 }, { "epoch": 1.3313046390502477, "grad_norm": 0.06406823545694351, "learning_rate": 1.2289619730313847e-05, "loss": 0.49783429503440857, "step": 7205 }, { "epoch": 1.3314894157591435, "grad_norm": 0.07925626635551453, "learning_rate": 1.2287677763634278e-05, "loss": 0.594709038734436, "step": 7206 }, { "epoch": 1.3316741924680393, "grad_norm": 0.08192488551139832, "learning_rate": 1.228573570591229e-05, "loss": 0.540943443775177, "step": 7207 }, { "epoch": 1.3318589691769351, "grad_norm": 0.06959114968776703, "learning_rate": 1.2283793557225176e-05, "loss": 0.42747682332992554, "step": 7208 }, { "epoch": 1.332043745885831, "grad_norm": 0.062199659645557404, "learning_rate": 1.228185131765022e-05, "loss": 0.44615626335144043, "step": 7209 }, { "epoch": 1.332228522594727, "grad_norm": 0.06088728830218315, "learning_rate": 1.2279908987264725e-05, "loss": 0.3549799919128418, "step": 7210 }, { "epoch": 1.3324132993036228, "grad_norm": 0.0674210712313652, "learning_rate": 1.227796656614598e-05, "loss": 0.38973909616470337, "step": 7211 }, { "epoch": 1.3325980760125187, "grad_norm": 0.055883023887872696, "learning_rate": 1.2276024054371299e-05, "loss": 0.3431173861026764, "step": 7212 }, { "epoch": 1.3327828527214145, "grad_norm": 0.07490044087171555, "learning_rate": 1.2274081452017975e-05, "loss": 0.550331175327301, "step": 7213 }, { "epoch": 1.3329676294303103, "grad_norm": 0.0761045292019844, "learning_rate": 1.2272138759163326e-05, "loss": 0.5015029311180115, "step": 7214 }, { "epoch": 1.3331524061392062, "grad_norm": 0.06385071575641632, "learning_rate": 1.2270195975884664e-05, "loss": 0.43876397609710693, "step": 7215 }, { "epoch": 1.333337182848102, "grad_norm": 0.07857915759086609, "learning_rate": 1.2268253102259302e-05, "loss": 0.5978013873100281, "step": 7216 }, { "epoch": 1.3335219595569978, "grad_norm": 0.06934911012649536, "learning_rate": 1.2266310138364565e-05, "loss": 0.4774762988090515, "step": 7217 }, { "epoch": 1.3337067362658936, "grad_norm": 0.07244524359703064, "learning_rate": 1.2264367084277778e-05, "loss": 0.4618462324142456, "step": 7218 }, { "epoch": 1.3338915129747895, "grad_norm": 0.09606807678937912, "learning_rate": 1.226242394007626e-05, "loss": 0.6047474145889282, "step": 7219 }, { "epoch": 1.3340762896836853, "grad_norm": 0.08191752433776855, "learning_rate": 1.226048070583735e-05, "loss": 0.5253011584281921, "step": 7220 }, { "epoch": 1.3342610663925814, "grad_norm": 0.08500118553638458, "learning_rate": 1.225853738163838e-05, "loss": 0.5972819328308105, "step": 7221 }, { "epoch": 1.3344458431014772, "grad_norm": 0.08752238750457764, "learning_rate": 1.2256593967556689e-05, "loss": 0.5581551790237427, "step": 7222 }, { "epoch": 1.334630619810373, "grad_norm": 0.0636662021279335, "learning_rate": 1.2254650463669614e-05, "loss": 0.4321964681148529, "step": 7223 }, { "epoch": 1.3348153965192688, "grad_norm": 0.07094656676054001, "learning_rate": 1.225270687005451e-05, "loss": 0.4596783518791199, "step": 7224 }, { "epoch": 1.3350001732281647, "grad_norm": 0.07906907796859741, "learning_rate": 1.2250763186788717e-05, "loss": 0.6005190014839172, "step": 7225 }, { "epoch": 1.3351849499370605, "grad_norm": 0.06554142385721207, "learning_rate": 1.2248819413949591e-05, "loss": 0.4663126468658447, "step": 7226 }, { "epoch": 1.3353697266459563, "grad_norm": 0.06856255233287811, "learning_rate": 1.2246875551614487e-05, "loss": 0.5557185411453247, "step": 7227 }, { "epoch": 1.3355545033548522, "grad_norm": 0.06476157903671265, "learning_rate": 1.224493159986077e-05, "loss": 0.5419876575469971, "step": 7228 }, { "epoch": 1.335739280063748, "grad_norm": 0.07515493035316467, "learning_rate": 1.2242987558765798e-05, "loss": 0.5306958556175232, "step": 7229 }, { "epoch": 1.3359240567726438, "grad_norm": 0.07471494376659393, "learning_rate": 1.2241043428406936e-05, "loss": 0.4942961037158966, "step": 7230 }, { "epoch": 1.3361088334815396, "grad_norm": 0.06926632672548294, "learning_rate": 1.2239099208861555e-05, "loss": 0.4492073655128479, "step": 7231 }, { "epoch": 1.3362936101904355, "grad_norm": 0.07723340392112732, "learning_rate": 1.2237154900207036e-05, "loss": 0.576379120349884, "step": 7232 }, { "epoch": 1.3364783868993313, "grad_norm": 0.08155592530965805, "learning_rate": 1.2235210502520746e-05, "loss": 0.688007116317749, "step": 7233 }, { "epoch": 1.3366631636082271, "grad_norm": 0.07088128477334976, "learning_rate": 1.2233266015880074e-05, "loss": 0.48353421688079834, "step": 7234 }, { "epoch": 1.336847940317123, "grad_norm": 0.07835712283849716, "learning_rate": 1.2231321440362402e-05, "loss": 0.6117390990257263, "step": 7235 }, { "epoch": 1.3370327170260188, "grad_norm": 0.08962835371494293, "learning_rate": 1.2229376776045116e-05, "loss": 0.5430634617805481, "step": 7236 }, { "epoch": 1.3372174937349146, "grad_norm": 0.08353981375694275, "learning_rate": 1.2227432023005608e-05, "loss": 0.5032727122306824, "step": 7237 }, { "epoch": 1.3374022704438104, "grad_norm": 0.06118007004261017, "learning_rate": 1.2225487181321278e-05, "loss": 0.458617627620697, "step": 7238 }, { "epoch": 1.3375870471527063, "grad_norm": 0.08352229744195938, "learning_rate": 1.222354225106952e-05, "loss": 0.6291279196739197, "step": 7239 }, { "epoch": 1.3377718238616023, "grad_norm": 0.08776495605707169, "learning_rate": 1.2221597232327736e-05, "loss": 0.617847204208374, "step": 7240 }, { "epoch": 1.3379566005704981, "grad_norm": 0.08763931691646576, "learning_rate": 1.221965212517333e-05, "loss": 0.6111961603164673, "step": 7241 }, { "epoch": 1.338141377279394, "grad_norm": 0.0853385329246521, "learning_rate": 1.221770692968372e-05, "loss": 0.626089334487915, "step": 7242 }, { "epoch": 1.3383261539882898, "grad_norm": 0.059536758810281754, "learning_rate": 1.2215761645936307e-05, "loss": 0.3988431990146637, "step": 7243 }, { "epoch": 1.3385109306971856, "grad_norm": 0.05607954412698746, "learning_rate": 1.2213816274008515e-05, "loss": 0.33263882994651794, "step": 7244 }, { "epoch": 1.3386957074060815, "grad_norm": 0.06977252662181854, "learning_rate": 1.221187081397776e-05, "loss": 0.4728195071220398, "step": 7245 }, { "epoch": 1.3388804841149773, "grad_norm": 0.08638498932123184, "learning_rate": 1.2209925265921469e-05, "loss": 0.5551614761352539, "step": 7246 }, { "epoch": 1.3390652608238731, "grad_norm": 0.07965132594108582, "learning_rate": 1.2207979629917061e-05, "loss": 0.6850483417510986, "step": 7247 }, { "epoch": 1.339250037532769, "grad_norm": 0.11833593249320984, "learning_rate": 1.2206033906041979e-05, "loss": 0.7881156802177429, "step": 7248 }, { "epoch": 1.3394348142416648, "grad_norm": 0.08182307332754135, "learning_rate": 1.2204088094373647e-05, "loss": 0.518099844455719, "step": 7249 }, { "epoch": 1.3396195909505606, "grad_norm": 0.09841877222061157, "learning_rate": 1.22021421949895e-05, "loss": 0.5551977157592773, "step": 7250 }, { "epoch": 1.3398043676594567, "grad_norm": 0.07546274363994598, "learning_rate": 1.2200196207966988e-05, "loss": 0.5618906021118164, "step": 7251 }, { "epoch": 1.3399891443683525, "grad_norm": 0.09216009825468063, "learning_rate": 1.2198250133383552e-05, "loss": 0.7444961071014404, "step": 7252 }, { "epoch": 1.3401739210772483, "grad_norm": 0.06232066452503204, "learning_rate": 1.2196303971316632e-05, "loss": 0.3908202350139618, "step": 7253 }, { "epoch": 1.3403586977861441, "grad_norm": 0.07654942572116852, "learning_rate": 1.2194357721843689e-05, "loss": 0.5528795123100281, "step": 7254 }, { "epoch": 1.34054347449504, "grad_norm": 0.07393674552440643, "learning_rate": 1.2192411385042176e-05, "loss": 0.45518913865089417, "step": 7255 }, { "epoch": 1.3407282512039358, "grad_norm": 0.07973166555166245, "learning_rate": 1.2190464960989546e-05, "loss": 0.5710092186927795, "step": 7256 }, { "epoch": 1.3409130279128316, "grad_norm": 0.09130148589611053, "learning_rate": 1.2188518449763263e-05, "loss": 0.59661865234375, "step": 7257 }, { "epoch": 1.3410978046217275, "grad_norm": 0.07427915185689926, "learning_rate": 1.2186571851440793e-05, "loss": 0.4620944857597351, "step": 7258 }, { "epoch": 1.3412825813306233, "grad_norm": 0.08230415731668472, "learning_rate": 1.2184625166099609e-05, "loss": 0.5412794947624207, "step": 7259 }, { "epoch": 1.341467358039519, "grad_norm": 0.08303827047348022, "learning_rate": 1.2182678393817168e-05, "loss": 0.5233322978019714, "step": 7260 }, { "epoch": 1.341652134748415, "grad_norm": 0.07536231726408005, "learning_rate": 1.2180731534670964e-05, "loss": 0.5114122033119202, "step": 7261 }, { "epoch": 1.3418369114573108, "grad_norm": 0.07248736172914505, "learning_rate": 1.2178784588738468e-05, "loss": 0.5382394194602966, "step": 7262 }, { "epoch": 1.3420216881662066, "grad_norm": 0.07889589667320251, "learning_rate": 1.2176837556097158e-05, "loss": 0.5099925994873047, "step": 7263 }, { "epoch": 1.3422064648751024, "grad_norm": 0.05722340941429138, "learning_rate": 1.2174890436824525e-05, "loss": 0.3894376754760742, "step": 7264 }, { "epoch": 1.3423912415839983, "grad_norm": 0.04962385445833206, "learning_rate": 1.2172943230998058e-05, "loss": 0.3173448145389557, "step": 7265 }, { "epoch": 1.342576018292894, "grad_norm": 0.07014395296573639, "learning_rate": 1.217099593869525e-05, "loss": 0.4455048441886902, "step": 7266 }, { "epoch": 1.34276079500179, "grad_norm": 0.07519932091236115, "learning_rate": 1.2169048559993591e-05, "loss": 0.5243011713027954, "step": 7267 }, { "epoch": 1.3429455717106857, "grad_norm": 0.07758233696222305, "learning_rate": 1.2167101094970588e-05, "loss": 0.5891153216362, "step": 7268 }, { "epoch": 1.3431303484195818, "grad_norm": 0.08783500641584396, "learning_rate": 1.2165153543703744e-05, "loss": 0.6760488152503967, "step": 7269 }, { "epoch": 1.3433151251284776, "grad_norm": 0.05743502825498581, "learning_rate": 1.2163205906270558e-05, "loss": 0.45588070154190063, "step": 7270 }, { "epoch": 1.3434999018373734, "grad_norm": 0.07715628296136856, "learning_rate": 1.2161258182748548e-05, "loss": 0.5763320922851562, "step": 7271 }, { "epoch": 1.3436846785462693, "grad_norm": 0.0772564709186554, "learning_rate": 1.2159310373215223e-05, "loss": 0.6143149733543396, "step": 7272 }, { "epoch": 1.343869455255165, "grad_norm": 0.07961688190698624, "learning_rate": 1.21573624777481e-05, "loss": 0.49570122361183167, "step": 7273 }, { "epoch": 1.344054231964061, "grad_norm": 0.06768622994422913, "learning_rate": 1.21554144964247e-05, "loss": 0.4177214503288269, "step": 7274 }, { "epoch": 1.3442390086729568, "grad_norm": 0.07863906025886536, "learning_rate": 1.215346642932255e-05, "loss": 0.5487481355667114, "step": 7275 }, { "epoch": 1.3444237853818526, "grad_norm": 0.07579664140939713, "learning_rate": 1.215151827651917e-05, "loss": 0.7003390789031982, "step": 7276 }, { "epoch": 1.3446085620907484, "grad_norm": 0.07688242942094803, "learning_rate": 1.214957003809209e-05, "loss": 0.48274096846580505, "step": 7277 }, { "epoch": 1.3447933387996442, "grad_norm": 0.09185263514518738, "learning_rate": 1.2147621714118856e-05, "loss": 0.7358651161193848, "step": 7278 }, { "epoch": 1.34497811550854, "grad_norm": 0.07346237450838089, "learning_rate": 1.2145673304676995e-05, "loss": 0.5424431562423706, "step": 7279 }, { "epoch": 1.3451628922174361, "grad_norm": 0.08200640231370926, "learning_rate": 1.2143724809844046e-05, "loss": 0.6213791370391846, "step": 7280 }, { "epoch": 1.345347668926332, "grad_norm": 0.07538006454706192, "learning_rate": 1.2141776229697557e-05, "loss": 0.4409875273704529, "step": 7281 }, { "epoch": 1.3455324456352278, "grad_norm": 0.07102343440055847, "learning_rate": 1.2139827564315077e-05, "loss": 0.5566208958625793, "step": 7282 }, { "epoch": 1.3457172223441236, "grad_norm": 0.07195903360843658, "learning_rate": 1.213787881377415e-05, "loss": 0.5001834034919739, "step": 7283 }, { "epoch": 1.3459019990530194, "grad_norm": 0.0780748501420021, "learning_rate": 1.2135929978152339e-05, "loss": 0.48966068029403687, "step": 7284 }, { "epoch": 1.3460867757619153, "grad_norm": 0.06550266593694687, "learning_rate": 1.2133981057527197e-05, "loss": 0.5034133195877075, "step": 7285 }, { "epoch": 1.346271552470811, "grad_norm": 0.09186110645532608, "learning_rate": 1.2132032051976285e-05, "loss": 0.5809505581855774, "step": 7286 }, { "epoch": 1.346456329179707, "grad_norm": 0.0906052365899086, "learning_rate": 1.2130082961577167e-05, "loss": 0.6895391345024109, "step": 7287 }, { "epoch": 1.3466411058886028, "grad_norm": 0.07443145662546158, "learning_rate": 1.2128133786407413e-05, "loss": 0.538640022277832, "step": 7288 }, { "epoch": 1.3468258825974986, "grad_norm": 0.057054303586483, "learning_rate": 1.2126184526544591e-05, "loss": 0.402135968208313, "step": 7289 }, { "epoch": 1.3470106593063944, "grad_norm": 0.08881109952926636, "learning_rate": 1.2124235182066275e-05, "loss": 0.6084185838699341, "step": 7290 }, { "epoch": 1.3471954360152902, "grad_norm": 0.06212284043431282, "learning_rate": 1.2122285753050047e-05, "loss": 0.3448404371738434, "step": 7291 }, { "epoch": 1.347380212724186, "grad_norm": 0.0877557173371315, "learning_rate": 1.2120336239573484e-05, "loss": 0.6045507192611694, "step": 7292 }, { "epoch": 1.347564989433082, "grad_norm": 0.07777206599712372, "learning_rate": 1.2118386641714174e-05, "loss": 0.6168590188026428, "step": 7293 }, { "epoch": 1.3477497661419777, "grad_norm": 0.08838807791471481, "learning_rate": 1.2116436959549704e-05, "loss": 0.607566773891449, "step": 7294 }, { "epoch": 1.3479345428508736, "grad_norm": 0.07736007124185562, "learning_rate": 1.2114487193157663e-05, "loss": 0.5946328639984131, "step": 7295 }, { "epoch": 1.3481193195597694, "grad_norm": 0.07302294671535492, "learning_rate": 1.2112537342615646e-05, "loss": 0.45372599363327026, "step": 7296 }, { "epoch": 1.3483040962686652, "grad_norm": 0.10016177594661713, "learning_rate": 1.2110587408001256e-05, "loss": 0.5879311561584473, "step": 7297 }, { "epoch": 1.3484888729775613, "grad_norm": 0.08851506561040878, "learning_rate": 1.2108637389392087e-05, "loss": 0.6472713947296143, "step": 7298 }, { "epoch": 1.348673649686457, "grad_norm": 0.06454683095216751, "learning_rate": 1.2106687286865748e-05, "loss": 0.4215412437915802, "step": 7299 }, { "epoch": 1.348858426395353, "grad_norm": 0.07148120552301407, "learning_rate": 1.2104737100499843e-05, "loss": 0.6257513761520386, "step": 7300 }, { "epoch": 1.3490432031042487, "grad_norm": 0.06810782104730606, "learning_rate": 1.210278683037199e-05, "loss": 0.31949469447135925, "step": 7301 }, { "epoch": 1.3492279798131446, "grad_norm": 0.07105056196451187, "learning_rate": 1.2100836476559799e-05, "loss": 0.4729495942592621, "step": 7302 }, { "epoch": 1.3494127565220404, "grad_norm": 0.06415511667728424, "learning_rate": 1.209888603914089e-05, "loss": 0.41198405623435974, "step": 7303 }, { "epoch": 1.3495975332309362, "grad_norm": 0.05516704544425011, "learning_rate": 1.2096935518192883e-05, "loss": 0.35060352087020874, "step": 7304 }, { "epoch": 1.349782309939832, "grad_norm": 0.08884477615356445, "learning_rate": 1.2094984913793399e-05, "loss": 0.6026931405067444, "step": 7305 }, { "epoch": 1.3499670866487279, "grad_norm": 0.05907578393816948, "learning_rate": 1.2093034226020073e-05, "loss": 0.4430491328239441, "step": 7306 }, { "epoch": 1.3501518633576237, "grad_norm": 0.08960863947868347, "learning_rate": 1.2091083454950534e-05, "loss": 0.660787045955658, "step": 7307 }, { "epoch": 1.3503366400665195, "grad_norm": 0.08193490654230118, "learning_rate": 1.2089132600662412e-05, "loss": 0.62440025806427, "step": 7308 }, { "epoch": 1.3505214167754156, "grad_norm": 0.06207743287086487, "learning_rate": 1.2087181663233354e-05, "loss": 0.3066102862358093, "step": 7309 }, { "epoch": 1.3507061934843114, "grad_norm": 0.07043956220149994, "learning_rate": 1.208523064274099e-05, "loss": 0.4655773341655731, "step": 7310 }, { "epoch": 1.3508909701932073, "grad_norm": 0.07935937494039536, "learning_rate": 1.2083279539262976e-05, "loss": 0.5760067105293274, "step": 7311 }, { "epoch": 1.351075746902103, "grad_norm": 0.06078021600842476, "learning_rate": 1.2081328352876949e-05, "loss": 0.45882537961006165, "step": 7312 }, { "epoch": 1.351260523610999, "grad_norm": 0.08058591932058334, "learning_rate": 1.2079377083660565e-05, "loss": 0.5471338629722595, "step": 7313 }, { "epoch": 1.3514453003198947, "grad_norm": 0.07964106649160385, "learning_rate": 1.2077425731691484e-05, "loss": 0.5816575884819031, "step": 7314 }, { "epoch": 1.3516300770287906, "grad_norm": 0.07928101718425751, "learning_rate": 1.2075474297047353e-05, "loss": 0.5275821089744568, "step": 7315 }, { "epoch": 1.3518148537376864, "grad_norm": 0.07923033088445663, "learning_rate": 1.207352277980584e-05, "loss": 0.5912119150161743, "step": 7316 }, { "epoch": 1.3519996304465822, "grad_norm": 0.07181008905172348, "learning_rate": 1.207157118004461e-05, "loss": 0.4410319924354553, "step": 7317 }, { "epoch": 1.352184407155478, "grad_norm": 0.06785266846418381, "learning_rate": 1.2069619497841327e-05, "loss": 0.4814789593219757, "step": 7318 }, { "epoch": 1.3523691838643739, "grad_norm": 0.0755409523844719, "learning_rate": 1.2067667733273662e-05, "loss": 0.5103818774223328, "step": 7319 }, { "epoch": 1.3525539605732697, "grad_norm": 0.08887257426977158, "learning_rate": 1.206571588641929e-05, "loss": 0.6387781500816345, "step": 7320 }, { "epoch": 1.3527387372821655, "grad_norm": 0.0847201719880104, "learning_rate": 1.206376395735589e-05, "loss": 0.5143601894378662, "step": 7321 }, { "epoch": 1.3529235139910614, "grad_norm": 0.062031347304582596, "learning_rate": 1.2061811946161137e-05, "loss": 0.3705389201641083, "step": 7322 }, { "epoch": 1.3531082906999572, "grad_norm": 0.07319609075784683, "learning_rate": 1.2059859852912724e-05, "loss": 0.546544075012207, "step": 7323 }, { "epoch": 1.353293067408853, "grad_norm": 0.08957704901695251, "learning_rate": 1.205790767768833e-05, "loss": 0.607306182384491, "step": 7324 }, { "epoch": 1.3534778441177489, "grad_norm": 0.08340125530958176, "learning_rate": 1.2055955420565651e-05, "loss": 0.5507020950317383, "step": 7325 }, { "epoch": 1.3536626208266447, "grad_norm": 0.07087838649749756, "learning_rate": 1.2054003081622377e-05, "loss": 0.5279867649078369, "step": 7326 }, { "epoch": 1.3538473975355405, "grad_norm": 0.048319268971681595, "learning_rate": 1.2052050660936208e-05, "loss": 0.30317071080207825, "step": 7327 }, { "epoch": 1.3540321742444366, "grad_norm": 0.08193907141685486, "learning_rate": 1.2050098158584842e-05, "loss": 0.6641421318054199, "step": 7328 }, { "epoch": 1.3542169509533324, "grad_norm": 0.0873681902885437, "learning_rate": 1.2048145574645985e-05, "loss": 0.5591307878494263, "step": 7329 }, { "epoch": 1.3544017276622282, "grad_norm": 0.0727614164352417, "learning_rate": 1.2046192909197339e-05, "loss": 0.5091972351074219, "step": 7330 }, { "epoch": 1.354586504371124, "grad_norm": 0.08346467465162277, "learning_rate": 1.2044240162316619e-05, "loss": 0.47075793147087097, "step": 7331 }, { "epoch": 1.3547712810800199, "grad_norm": 0.06700126826763153, "learning_rate": 1.2042287334081532e-05, "loss": 0.5636169910430908, "step": 7332 }, { "epoch": 1.3549560577889157, "grad_norm": 0.05685332417488098, "learning_rate": 1.2040334424569802e-05, "loss": 0.34867823123931885, "step": 7333 }, { "epoch": 1.3551408344978115, "grad_norm": 0.06659932434558868, "learning_rate": 1.2038381433859145e-05, "loss": 0.42646080255508423, "step": 7334 }, { "epoch": 1.3553256112067074, "grad_norm": 0.08106103539466858, "learning_rate": 1.2036428362027288e-05, "loss": 0.5651917457580566, "step": 7335 }, { "epoch": 1.3555103879156032, "grad_norm": 0.08953486382961273, "learning_rate": 1.2034475209151945e-05, "loss": 0.6413703560829163, "step": 7336 }, { "epoch": 1.355695164624499, "grad_norm": 0.09362461417913437, "learning_rate": 1.203252197531086e-05, "loss": 0.7097888588905334, "step": 7337 }, { "epoch": 1.3558799413333948, "grad_norm": 0.09100347012281418, "learning_rate": 1.203056866058176e-05, "loss": 0.5439901351928711, "step": 7338 }, { "epoch": 1.356064718042291, "grad_norm": 0.07210123538970947, "learning_rate": 1.2028615265042375e-05, "loss": 0.5213744044303894, "step": 7339 }, { "epoch": 1.3562494947511867, "grad_norm": 0.0927305668592453, "learning_rate": 1.2026661788770453e-05, "loss": 0.5957270264625549, "step": 7340 }, { "epoch": 1.3564342714600826, "grad_norm": 0.06996115297079086, "learning_rate": 1.2024708231843731e-05, "loss": 0.4633581042289734, "step": 7341 }, { "epoch": 1.3566190481689784, "grad_norm": 0.09780874103307724, "learning_rate": 1.2022754594339956e-05, "loss": 0.6641948819160461, "step": 7342 }, { "epoch": 1.3568038248778742, "grad_norm": 0.07791966199874878, "learning_rate": 1.2020800876336877e-05, "loss": 0.5625864863395691, "step": 7343 }, { "epoch": 1.35698860158677, "grad_norm": 0.09549916535615921, "learning_rate": 1.2018847077912246e-05, "loss": 0.7063486576080322, "step": 7344 }, { "epoch": 1.3571733782956659, "grad_norm": 0.07231148332357407, "learning_rate": 1.2016893199143818e-05, "loss": 0.48578956723213196, "step": 7345 }, { "epoch": 1.3573581550045617, "grad_norm": 0.0838562548160553, "learning_rate": 1.2014939240109347e-05, "loss": 0.5130182504653931, "step": 7346 }, { "epoch": 1.3575429317134575, "grad_norm": 0.09551063925027847, "learning_rate": 1.2012985200886602e-05, "loss": 0.6210164427757263, "step": 7347 }, { "epoch": 1.3577277084223534, "grad_norm": 0.08454853296279907, "learning_rate": 1.2011031081553344e-05, "loss": 0.7053744196891785, "step": 7348 }, { "epoch": 1.3579124851312492, "grad_norm": 0.07290687412023544, "learning_rate": 1.2009076882187338e-05, "loss": 0.5396966338157654, "step": 7349 }, { "epoch": 1.358097261840145, "grad_norm": 0.05749298259615898, "learning_rate": 1.2007122602866357e-05, "loss": 0.3489977717399597, "step": 7350 }, { "epoch": 1.3582820385490408, "grad_norm": 0.06704192608594894, "learning_rate": 1.200516824366818e-05, "loss": 0.3745604157447815, "step": 7351 }, { "epoch": 1.3584668152579367, "grad_norm": 0.08380575478076935, "learning_rate": 1.2003213804670578e-05, "loss": 0.4638144373893738, "step": 7352 }, { "epoch": 1.3586515919668325, "grad_norm": 0.07318996638059616, "learning_rate": 1.2001259285951333e-05, "loss": 0.500698983669281, "step": 7353 }, { "epoch": 1.3588363686757283, "grad_norm": 0.0672593042254448, "learning_rate": 1.199930468758823e-05, "loss": 0.45993903279304504, "step": 7354 }, { "epoch": 1.3590211453846242, "grad_norm": 0.06959978491067886, "learning_rate": 1.1997350009659057e-05, "loss": 0.48005440831184387, "step": 7355 }, { "epoch": 1.35920592209352, "grad_norm": 0.06343521922826767, "learning_rate": 1.1995395252241599e-05, "loss": 0.38496604561805725, "step": 7356 }, { "epoch": 1.359390698802416, "grad_norm": 0.07417965680360794, "learning_rate": 1.1993440415413655e-05, "loss": 0.46091029047966003, "step": 7357 }, { "epoch": 1.3595754755113119, "grad_norm": 0.06469357013702393, "learning_rate": 1.1991485499253021e-05, "loss": 0.46245938539505005, "step": 7358 }, { "epoch": 1.3597602522202077, "grad_norm": 0.08116722851991653, "learning_rate": 1.1989530503837492e-05, "loss": 0.5595096349716187, "step": 7359 }, { "epoch": 1.3599450289291035, "grad_norm": 0.058397118002176285, "learning_rate": 1.1987575429244873e-05, "loss": 0.37003904581069946, "step": 7360 }, { "epoch": 1.3601298056379993, "grad_norm": 0.0854644626379013, "learning_rate": 1.1985620275552974e-05, "loss": 0.6662513613700867, "step": 7361 }, { "epoch": 1.3603145823468952, "grad_norm": 0.0821366086602211, "learning_rate": 1.1983665042839597e-05, "loss": 0.4664044678211212, "step": 7362 }, { "epoch": 1.360499359055791, "grad_norm": 0.06795128434896469, "learning_rate": 1.1981709731182557e-05, "loss": 0.5029423832893372, "step": 7363 }, { "epoch": 1.3606841357646868, "grad_norm": 0.06815291196107864, "learning_rate": 1.1979754340659673e-05, "loss": 0.4743029475212097, "step": 7364 }, { "epoch": 1.3608689124735827, "grad_norm": 0.05607020482420921, "learning_rate": 1.1977798871348758e-05, "loss": 0.3464440107345581, "step": 7365 }, { "epoch": 1.3610536891824785, "grad_norm": 0.07072314620018005, "learning_rate": 1.1975843323327634e-05, "loss": 0.5013730525970459, "step": 7366 }, { "epoch": 1.3612384658913743, "grad_norm": 0.07554870843887329, "learning_rate": 1.197388769667413e-05, "loss": 0.5226201415061951, "step": 7367 }, { "epoch": 1.3614232426002704, "grad_norm": 0.07110065221786499, "learning_rate": 1.197193199146607e-05, "loss": 0.445374071598053, "step": 7368 }, { "epoch": 1.3616080193091662, "grad_norm": 0.0752422958612442, "learning_rate": 1.1969976207781287e-05, "loss": 0.49761611223220825, "step": 7369 }, { "epoch": 1.361792796018062, "grad_norm": 0.07382792234420776, "learning_rate": 1.1968020345697613e-05, "loss": 0.5077685713768005, "step": 7370 }, { "epoch": 1.3619775727269579, "grad_norm": 0.06980964541435242, "learning_rate": 1.1966064405292887e-05, "loss": 0.44122546911239624, "step": 7371 }, { "epoch": 1.3621623494358537, "grad_norm": 0.08900794386863708, "learning_rate": 1.1964108386644948e-05, "loss": 0.6478540301322937, "step": 7372 }, { "epoch": 1.3623471261447495, "grad_norm": 0.08322156220674515, "learning_rate": 1.196215228983164e-05, "loss": 0.645605206489563, "step": 7373 }, { "epoch": 1.3625319028536453, "grad_norm": 0.06156010180711746, "learning_rate": 1.196019611493081e-05, "loss": 0.4276708960533142, "step": 7374 }, { "epoch": 1.3627166795625412, "grad_norm": 0.08435390144586563, "learning_rate": 1.1958239862020311e-05, "loss": 0.5966938734054565, "step": 7375 }, { "epoch": 1.362901456271437, "grad_norm": 0.05743694305419922, "learning_rate": 1.1956283531177986e-05, "loss": 0.36740046739578247, "step": 7376 }, { "epoch": 1.3630862329803328, "grad_norm": 0.0730554461479187, "learning_rate": 1.19543271224817e-05, "loss": 0.5683557987213135, "step": 7377 }, { "epoch": 1.3632710096892287, "grad_norm": 0.055050771683454514, "learning_rate": 1.1952370636009309e-05, "loss": 0.3631197512149811, "step": 7378 }, { "epoch": 1.3634557863981245, "grad_norm": 0.047246962785720825, "learning_rate": 1.1950414071838673e-05, "loss": 0.34128281474113464, "step": 7379 }, { "epoch": 1.3636405631070203, "grad_norm": 0.06924453377723694, "learning_rate": 1.194845743004766e-05, "loss": 0.38558104634284973, "step": 7380 }, { "epoch": 1.3638253398159161, "grad_norm": 0.06759911775588989, "learning_rate": 1.1946500710714138e-05, "loss": 0.4166731536388397, "step": 7381 }, { "epoch": 1.364010116524812, "grad_norm": 0.08717620372772217, "learning_rate": 1.1944543913915976e-05, "loss": 0.6282873153686523, "step": 7382 }, { "epoch": 1.3641948932337078, "grad_norm": 0.07526937872171402, "learning_rate": 1.1942587039731052e-05, "loss": 0.470471054315567, "step": 7383 }, { "epoch": 1.3643796699426036, "grad_norm": 0.09385870397090912, "learning_rate": 1.1940630088237239e-05, "loss": 0.6459283232688904, "step": 7384 }, { "epoch": 1.3645644466514995, "grad_norm": 0.05255207419395447, "learning_rate": 1.1938673059512422e-05, "loss": 0.317013144493103, "step": 7385 }, { "epoch": 1.3647492233603955, "grad_norm": 0.07780256122350693, "learning_rate": 1.1936715953634481e-05, "loss": 0.5451396107673645, "step": 7386 }, { "epoch": 1.3649340000692913, "grad_norm": 0.0779600441455841, "learning_rate": 1.1934758770681306e-05, "loss": 0.5497245192527771, "step": 7387 }, { "epoch": 1.3651187767781872, "grad_norm": 0.08268772065639496, "learning_rate": 1.1932801510730785e-05, "loss": 0.5683483481407166, "step": 7388 }, { "epoch": 1.365303553487083, "grad_norm": 0.09884671121835709, "learning_rate": 1.193084417386081e-05, "loss": 0.5287616848945618, "step": 7389 }, { "epoch": 1.3654883301959788, "grad_norm": 0.08031193912029266, "learning_rate": 1.1928886760149277e-05, "loss": 0.49728962779045105, "step": 7390 }, { "epoch": 1.3656731069048746, "grad_norm": 0.07396135479211807, "learning_rate": 1.1926929269674086e-05, "loss": 0.4677727520465851, "step": 7391 }, { "epoch": 1.3658578836137705, "grad_norm": 0.06157707795500755, "learning_rate": 1.1924971702513137e-05, "loss": 0.3914041817188263, "step": 7392 }, { "epoch": 1.3660426603226663, "grad_norm": 0.0752425342798233, "learning_rate": 1.1923014058744343e-05, "loss": 0.5061258673667908, "step": 7393 }, { "epoch": 1.3662274370315621, "grad_norm": 0.08719919621944427, "learning_rate": 1.1921056338445599e-05, "loss": 0.5727595686912537, "step": 7394 }, { "epoch": 1.366412213740458, "grad_norm": 0.08396576344966888, "learning_rate": 1.1919098541694828e-05, "loss": 0.6433411836624146, "step": 7395 }, { "epoch": 1.3665969904493538, "grad_norm": 0.07268782705068588, "learning_rate": 1.1917140668569933e-05, "loss": 0.4731309413909912, "step": 7396 }, { "epoch": 1.3667817671582498, "grad_norm": 0.059807468205690384, "learning_rate": 1.1915182719148841e-05, "loss": 0.333195298910141, "step": 7397 }, { "epoch": 1.3669665438671457, "grad_norm": 0.07601609826087952, "learning_rate": 1.191322469350947e-05, "loss": 0.43810439109802246, "step": 7398 }, { "epoch": 1.3671513205760415, "grad_norm": 0.061796288937330246, "learning_rate": 1.191126659172974e-05, "loss": 0.43337252736091614, "step": 7399 }, { "epoch": 1.3673360972849373, "grad_norm": 0.07230713963508606, "learning_rate": 1.1909308413887579e-05, "loss": 0.4834917485713959, "step": 7400 }, { "epoch": 1.3675208739938332, "grad_norm": 0.08036571741104126, "learning_rate": 1.1907350160060918e-05, "loss": 0.4740179777145386, "step": 7401 }, { "epoch": 1.367705650702729, "grad_norm": 0.061237361282110214, "learning_rate": 1.1905391830327685e-05, "loss": 0.39236539602279663, "step": 7402 }, { "epoch": 1.3678904274116248, "grad_norm": 0.09754402190446854, "learning_rate": 1.1903433424765822e-05, "loss": 0.6992279291152954, "step": 7403 }, { "epoch": 1.3680752041205206, "grad_norm": 0.07044808566570282, "learning_rate": 1.1901474943453262e-05, "loss": 0.6163637638092041, "step": 7404 }, { "epoch": 1.3682599808294165, "grad_norm": 0.07580169290304184, "learning_rate": 1.189951638646795e-05, "loss": 0.5042867660522461, "step": 7405 }, { "epoch": 1.3684447575383123, "grad_norm": 0.07228293269872665, "learning_rate": 1.1897557753887826e-05, "loss": 0.6257280111312866, "step": 7406 }, { "epoch": 1.3686295342472081, "grad_norm": 0.0661831870675087, "learning_rate": 1.189559904579084e-05, "loss": 0.4398761987686157, "step": 7407 }, { "epoch": 1.368814310956104, "grad_norm": 0.08952990919351578, "learning_rate": 1.1893640262254946e-05, "loss": 0.6264430284500122, "step": 7408 }, { "epoch": 1.3689990876649998, "grad_norm": 0.0661187618970871, "learning_rate": 1.189168140335809e-05, "loss": 0.37947767972946167, "step": 7409 }, { "epoch": 1.3691838643738956, "grad_norm": 0.09342033416032791, "learning_rate": 1.1889722469178235e-05, "loss": 0.6840120553970337, "step": 7410 }, { "epoch": 1.3693686410827914, "grad_norm": 0.06493902951478958, "learning_rate": 1.1887763459793335e-05, "loss": 0.4494801163673401, "step": 7411 }, { "epoch": 1.3695534177916873, "grad_norm": 0.07095891237258911, "learning_rate": 1.1885804375281357e-05, "loss": 0.39049607515335083, "step": 7412 }, { "epoch": 1.369738194500583, "grad_norm": 0.08437249809503555, "learning_rate": 1.1883845215720267e-05, "loss": 0.5024149417877197, "step": 7413 }, { "epoch": 1.369922971209479, "grad_norm": 0.08498772233724594, "learning_rate": 1.1881885981188029e-05, "loss": 0.5656739473342896, "step": 7414 }, { "epoch": 1.3701077479183748, "grad_norm": 0.07123856991529465, "learning_rate": 1.1879926671762619e-05, "loss": 0.4553503692150116, "step": 7415 }, { "epoch": 1.3702925246272708, "grad_norm": 0.09772376716136932, "learning_rate": 1.1877967287522005e-05, "loss": 0.725156307220459, "step": 7416 }, { "epoch": 1.3704773013361666, "grad_norm": 0.07861115038394928, "learning_rate": 1.1876007828544172e-05, "loss": 0.5879685878753662, "step": 7417 }, { "epoch": 1.3706620780450625, "grad_norm": 0.07092233002185822, "learning_rate": 1.1874048294907094e-05, "loss": 0.4736587405204773, "step": 7418 }, { "epoch": 1.3708468547539583, "grad_norm": 0.049529027193784714, "learning_rate": 1.1872088686688758e-05, "loss": 0.3412547707557678, "step": 7419 }, { "epoch": 1.3710316314628541, "grad_norm": 0.06565441191196442, "learning_rate": 1.187012900396715e-05, "loss": 0.4283704459667206, "step": 7420 }, { "epoch": 1.37121640817175, "grad_norm": 0.06818858534097672, "learning_rate": 1.1868169246820259e-05, "loss": 0.5220703482627869, "step": 7421 }, { "epoch": 1.3714011848806458, "grad_norm": 0.07601748406887054, "learning_rate": 1.1866209415326073e-05, "loss": 0.5260892510414124, "step": 7422 }, { "epoch": 1.3715859615895416, "grad_norm": 0.07375529408454895, "learning_rate": 1.1864249509562595e-05, "loss": 0.4407345950603485, "step": 7423 }, { "epoch": 1.3717707382984374, "grad_norm": 0.06595490127801895, "learning_rate": 1.186228952960782e-05, "loss": 0.3844723701477051, "step": 7424 }, { "epoch": 1.3719555150073333, "grad_norm": 0.06893607974052429, "learning_rate": 1.1860329475539745e-05, "loss": 0.48252663016319275, "step": 7425 }, { "epoch": 1.372140291716229, "grad_norm": 0.06890595704317093, "learning_rate": 1.1858369347436376e-05, "loss": 0.4432915151119232, "step": 7426 }, { "epoch": 1.3723250684251251, "grad_norm": 0.061962999403476715, "learning_rate": 1.1856409145375724e-05, "loss": 0.38078954815864563, "step": 7427 }, { "epoch": 1.372509845134021, "grad_norm": 0.060137588530778885, "learning_rate": 1.1854448869435796e-05, "loss": 0.39205625653266907, "step": 7428 }, { "epoch": 1.3726946218429168, "grad_norm": 0.08822615444660187, "learning_rate": 1.1852488519694601e-05, "loss": 0.6523511409759521, "step": 7429 }, { "epoch": 1.3728793985518126, "grad_norm": 0.07423841953277588, "learning_rate": 1.1850528096230162e-05, "loss": 0.5378643274307251, "step": 7430 }, { "epoch": 1.3730641752607085, "grad_norm": 0.08107765018939972, "learning_rate": 1.1848567599120493e-05, "loss": 0.511623203754425, "step": 7431 }, { "epoch": 1.3732489519696043, "grad_norm": 0.0851687341928482, "learning_rate": 1.1846607028443617e-05, "loss": 0.5813806056976318, "step": 7432 }, { "epoch": 1.3734337286785, "grad_norm": 0.07867684960365295, "learning_rate": 1.184464638427756e-05, "loss": 0.7225265502929688, "step": 7433 }, { "epoch": 1.373618505387396, "grad_norm": 0.07499439269304276, "learning_rate": 1.184268566670035e-05, "loss": 0.4679722785949707, "step": 7434 }, { "epoch": 1.3738032820962918, "grad_norm": 0.08448562771081924, "learning_rate": 1.1840724875790011e-05, "loss": 0.662882924079895, "step": 7435 }, { "epoch": 1.3739880588051876, "grad_norm": 0.05104814097285271, "learning_rate": 1.1838764011624581e-05, "loss": 0.320466011762619, "step": 7436 }, { "epoch": 1.3741728355140834, "grad_norm": 0.07123497128486633, "learning_rate": 1.1836803074282099e-05, "loss": 0.5260931253433228, "step": 7437 }, { "epoch": 1.3743576122229793, "grad_norm": 0.07810332626104355, "learning_rate": 1.18348420638406e-05, "loss": 0.5477177500724792, "step": 7438 }, { "epoch": 1.374542388931875, "grad_norm": 0.07277029752731323, "learning_rate": 1.1832880980378126e-05, "loss": 0.5091302394866943, "step": 7439 }, { "epoch": 1.374727165640771, "grad_norm": 0.06402041763067245, "learning_rate": 1.1830919823972727e-05, "loss": 0.3213406205177307, "step": 7440 }, { "epoch": 1.3749119423496667, "grad_norm": 0.06970551609992981, "learning_rate": 1.1828958594702444e-05, "loss": 0.4358813464641571, "step": 7441 }, { "epoch": 1.3750967190585626, "grad_norm": 0.0727848932147026, "learning_rate": 1.1826997292645328e-05, "loss": 0.4688945412635803, "step": 7442 }, { "epoch": 1.3752814957674584, "grad_norm": 0.0911780372262001, "learning_rate": 1.1825035917879442e-05, "loss": 0.7580387592315674, "step": 7443 }, { "epoch": 1.3754662724763542, "grad_norm": 0.08887773007154465, "learning_rate": 1.1823074470482835e-05, "loss": 0.5618323087692261, "step": 7444 }, { "epoch": 1.3756510491852503, "grad_norm": 0.06911130249500275, "learning_rate": 1.1821112950533564e-05, "loss": 0.36385273933410645, "step": 7445 }, { "epoch": 1.375835825894146, "grad_norm": 0.07710594683885574, "learning_rate": 1.1819151358109697e-05, "loss": 0.5176555514335632, "step": 7446 }, { "epoch": 1.376020602603042, "grad_norm": 0.07961034774780273, "learning_rate": 1.1817189693289299e-05, "loss": 0.48502466082572937, "step": 7447 }, { "epoch": 1.3762053793119378, "grad_norm": 0.08120684325695038, "learning_rate": 1.1815227956150434e-05, "loss": 0.5393785834312439, "step": 7448 }, { "epoch": 1.3763901560208336, "grad_norm": 0.09085174649953842, "learning_rate": 1.1813266146771178e-05, "loss": 0.6135912537574768, "step": 7449 }, { "epoch": 1.3765749327297294, "grad_norm": 0.07598740607500076, "learning_rate": 1.1811304265229601e-05, "loss": 0.5072231292724609, "step": 7450 }, { "epoch": 1.3767597094386252, "grad_norm": 0.09174876660108566, "learning_rate": 1.1809342311603784e-05, "loss": 0.6139599084854126, "step": 7451 }, { "epoch": 1.376944486147521, "grad_norm": 0.07367060333490372, "learning_rate": 1.1807380285971796e-05, "loss": 0.4490894675254822, "step": 7452 }, { "epoch": 1.377129262856417, "grad_norm": 0.07609044760465622, "learning_rate": 1.1805418188411735e-05, "loss": 0.5755985379219055, "step": 7453 }, { "epoch": 1.3773140395653127, "grad_norm": 0.06809990108013153, "learning_rate": 1.1803456019001678e-05, "loss": 0.4108802378177643, "step": 7454 }, { "epoch": 1.3774988162742086, "grad_norm": 0.07754744589328766, "learning_rate": 1.180149377781971e-05, "loss": 0.5679728388786316, "step": 7455 }, { "epoch": 1.3776835929831046, "grad_norm": 0.07412713766098022, "learning_rate": 1.1799531464943926e-05, "loss": 0.5606153011322021, "step": 7456 }, { "epoch": 1.3778683696920004, "grad_norm": 0.05983395874500275, "learning_rate": 1.1797569080452423e-05, "loss": 0.4484902620315552, "step": 7457 }, { "epoch": 1.3780531464008963, "grad_norm": 0.06855115294456482, "learning_rate": 1.179560662442329e-05, "loss": 0.3876204192638397, "step": 7458 }, { "epoch": 1.378237923109792, "grad_norm": 0.08249981701374054, "learning_rate": 1.1793644096934634e-05, "loss": 0.5886198878288269, "step": 7459 }, { "epoch": 1.378422699818688, "grad_norm": 0.09763695299625397, "learning_rate": 1.1791681498064554e-05, "loss": 0.5948026776313782, "step": 7460 }, { "epoch": 1.3786074765275838, "grad_norm": 0.06549713760614395, "learning_rate": 1.1789718827891157e-05, "loss": 0.3910665214061737, "step": 7461 }, { "epoch": 1.3787922532364796, "grad_norm": 0.07282456755638123, "learning_rate": 1.1787756086492546e-05, "loss": 0.5207515358924866, "step": 7462 }, { "epoch": 1.3789770299453754, "grad_norm": 0.08371993154287338, "learning_rate": 1.178579327394684e-05, "loss": 0.507247269153595, "step": 7463 }, { "epoch": 1.3791618066542712, "grad_norm": 0.0790616124868393, "learning_rate": 1.178383039033215e-05, "loss": 0.5724804401397705, "step": 7464 }, { "epoch": 1.379346583363167, "grad_norm": 0.07022110372781754, "learning_rate": 1.1781867435726587e-05, "loss": 0.4332163631916046, "step": 7465 }, { "epoch": 1.379531360072063, "grad_norm": 0.07322531193494797, "learning_rate": 1.1779904410208276e-05, "loss": 0.48209553956985474, "step": 7466 }, { "epoch": 1.3797161367809587, "grad_norm": 0.07616209983825684, "learning_rate": 1.177794131385534e-05, "loss": 0.5288639068603516, "step": 7467 }, { "epoch": 1.3799009134898546, "grad_norm": 0.08086004108190536, "learning_rate": 1.1775978146745899e-05, "loss": 0.47018125653266907, "step": 7468 }, { "epoch": 1.3800856901987504, "grad_norm": 0.08034881204366684, "learning_rate": 1.1774014908958085e-05, "loss": 0.48584097623825073, "step": 7469 }, { "epoch": 1.3802704669076462, "grad_norm": 0.07475937157869339, "learning_rate": 1.1772051600570032e-05, "loss": 0.5507193207740784, "step": 7470 }, { "epoch": 1.380455243616542, "grad_norm": 0.0697658360004425, "learning_rate": 1.1770088221659865e-05, "loss": 0.57442706823349, "step": 7471 }, { "epoch": 1.3806400203254379, "grad_norm": 0.10161164402961731, "learning_rate": 1.1768124772305724e-05, "loss": 0.8037537336349487, "step": 7472 }, { "epoch": 1.3808247970343337, "grad_norm": 0.07438277453184128, "learning_rate": 1.1766161252585751e-05, "loss": 0.45627665519714355, "step": 7473 }, { "epoch": 1.3810095737432297, "grad_norm": 0.07278723269701004, "learning_rate": 1.1764197662578087e-05, "loss": 0.543790340423584, "step": 7474 }, { "epoch": 1.3811943504521256, "grad_norm": 0.08033698797225952, "learning_rate": 1.1762234002360873e-05, "loss": 0.5407695174217224, "step": 7475 }, { "epoch": 1.3813791271610214, "grad_norm": 0.08345130831003189, "learning_rate": 1.176027027201226e-05, "loss": 0.6017276644706726, "step": 7476 }, { "epoch": 1.3815639038699172, "grad_norm": 0.06852320581674576, "learning_rate": 1.1758306471610397e-05, "loss": 0.4384315609931946, "step": 7477 }, { "epoch": 1.381748680578813, "grad_norm": 0.04410892724990845, "learning_rate": 1.1756342601233437e-05, "loss": 0.27874159812927246, "step": 7478 }, { "epoch": 1.3819334572877089, "grad_norm": 0.08524870127439499, "learning_rate": 1.1754378660959536e-05, "loss": 0.6035138964653015, "step": 7479 }, { "epoch": 1.3821182339966047, "grad_norm": 0.06554487347602844, "learning_rate": 1.1752414650866855e-05, "loss": 0.45440593361854553, "step": 7480 }, { "epoch": 1.3823030107055005, "grad_norm": 0.06884905695915222, "learning_rate": 1.1750450571033553e-05, "loss": 0.4647334814071655, "step": 7481 }, { "epoch": 1.3824877874143964, "grad_norm": 0.07676929980516434, "learning_rate": 1.1748486421537794e-05, "loss": 0.5768230557441711, "step": 7482 }, { "epoch": 1.3826725641232922, "grad_norm": 0.08470456302165985, "learning_rate": 1.1746522202457746e-05, "loss": 0.5833501815795898, "step": 7483 }, { "epoch": 1.382857340832188, "grad_norm": 0.09254126250743866, "learning_rate": 1.1744557913871579e-05, "loss": 0.4988571107387543, "step": 7484 }, { "epoch": 1.383042117541084, "grad_norm": 0.08187223970890045, "learning_rate": 1.1742593555857465e-05, "loss": 0.6211193203926086, "step": 7485 }, { "epoch": 1.38322689424998, "grad_norm": 0.07119544595479965, "learning_rate": 1.1740629128493577e-05, "loss": 0.41468870639801025, "step": 7486 }, { "epoch": 1.3834116709588757, "grad_norm": 0.0553731806576252, "learning_rate": 1.17386646318581e-05, "loss": 0.32613423466682434, "step": 7487 }, { "epoch": 1.3835964476677716, "grad_norm": 0.07978975772857666, "learning_rate": 1.1736700066029206e-05, "loss": 0.496436208486557, "step": 7488 }, { "epoch": 1.3837812243766674, "grad_norm": 0.07208065688610077, "learning_rate": 1.1734735431085084e-05, "loss": 0.5078883767127991, "step": 7489 }, { "epoch": 1.3839660010855632, "grad_norm": 0.06337263435125351, "learning_rate": 1.1732770727103919e-05, "loss": 0.3940962255001068, "step": 7490 }, { "epoch": 1.384150777794459, "grad_norm": 0.07468237727880478, "learning_rate": 1.1730805954163902e-05, "loss": 0.5392362475395203, "step": 7491 }, { "epoch": 1.3843355545033549, "grad_norm": 0.0637771487236023, "learning_rate": 1.172884111234322e-05, "loss": 0.3994047939777374, "step": 7492 }, { "epoch": 1.3845203312122507, "grad_norm": 0.10853901505470276, "learning_rate": 1.1726876201720074e-05, "loss": 0.8128345012664795, "step": 7493 }, { "epoch": 1.3847051079211465, "grad_norm": 0.09867019951343536, "learning_rate": 1.1724911222372658e-05, "loss": 0.7452707290649414, "step": 7494 }, { "epoch": 1.3848898846300424, "grad_norm": 0.08588841557502747, "learning_rate": 1.1722946174379168e-05, "loss": 0.6157535910606384, "step": 7495 }, { "epoch": 1.3850746613389382, "grad_norm": 0.08550074696540833, "learning_rate": 1.1720981057817813e-05, "loss": 0.7322298288345337, "step": 7496 }, { "epoch": 1.385259438047834, "grad_norm": 0.07837128639221191, "learning_rate": 1.1719015872766798e-05, "loss": 0.6208504438400269, "step": 7497 }, { "epoch": 1.3854442147567299, "grad_norm": 0.08587179332971573, "learning_rate": 1.1717050619304324e-05, "loss": 0.5662384629249573, "step": 7498 }, { "epoch": 1.3856289914656257, "grad_norm": 0.06542603671550751, "learning_rate": 1.1715085297508613e-05, "loss": 0.40593597292900085, "step": 7499 }, { "epoch": 1.3858137681745215, "grad_norm": 0.06688559800386429, "learning_rate": 1.1713119907457869e-05, "loss": 0.4476969540119171, "step": 7500 }, { "epoch": 1.3858137681745215, "eval_loss": 0.5861050486564636, "eval_runtime": 256.6455, "eval_samples_per_second": 71.028, "eval_steps_per_second": 8.88, "step": 7500 }, { "epoch": 1.3859985448834173, "grad_norm": 0.07808694988489151, "learning_rate": 1.1711154449230315e-05, "loss": 0.5939205884933472, "step": 7501 }, { "epoch": 1.3861833215923132, "grad_norm": 0.09479350596666336, "learning_rate": 1.1709188922904167e-05, "loss": 0.6449679732322693, "step": 7502 }, { "epoch": 1.386368098301209, "grad_norm": 0.06300199031829834, "learning_rate": 1.1707223328557644e-05, "loss": 0.46869057416915894, "step": 7503 }, { "epoch": 1.386552875010105, "grad_norm": 0.05616572126746178, "learning_rate": 1.170525766626898e-05, "loss": 0.365093857049942, "step": 7504 }, { "epoch": 1.3867376517190009, "grad_norm": 0.0733765959739685, "learning_rate": 1.170329193611639e-05, "loss": 0.45358970761299133, "step": 7505 }, { "epoch": 1.3869224284278967, "grad_norm": 0.06689228862524033, "learning_rate": 1.1701326138178113e-05, "loss": 0.5470128059387207, "step": 7506 }, { "epoch": 1.3871072051367925, "grad_norm": 0.0790744200348854, "learning_rate": 1.1699360272532376e-05, "loss": 0.6203442811965942, "step": 7507 }, { "epoch": 1.3872919818456884, "grad_norm": 0.08600825816392899, "learning_rate": 1.1697394339257417e-05, "loss": 0.5041205883026123, "step": 7508 }, { "epoch": 1.3874767585545842, "grad_norm": 0.07286131381988525, "learning_rate": 1.1695428338431479e-05, "loss": 0.5549904704093933, "step": 7509 }, { "epoch": 1.38766153526348, "grad_norm": 0.07373219728469849, "learning_rate": 1.1693462270132792e-05, "loss": 0.42410120368003845, "step": 7510 }, { "epoch": 1.3878463119723758, "grad_norm": 0.06848616898059845, "learning_rate": 1.1691496134439606e-05, "loss": 0.4917903244495392, "step": 7511 }, { "epoch": 1.3880310886812717, "grad_norm": 0.06176409870386124, "learning_rate": 1.1689529931430166e-05, "loss": 0.4779815673828125, "step": 7512 }, { "epoch": 1.3882158653901675, "grad_norm": 0.07737766951322556, "learning_rate": 1.1687563661182724e-05, "loss": 0.4695681929588318, "step": 7513 }, { "epoch": 1.3884006420990633, "grad_norm": 0.08544651418924332, "learning_rate": 1.1685597323775522e-05, "loss": 0.8225537538528442, "step": 7514 }, { "epoch": 1.3885854188079594, "grad_norm": 0.07991264760494232, "learning_rate": 1.1683630919286824e-05, "loss": 0.5340745449066162, "step": 7515 }, { "epoch": 1.3887701955168552, "grad_norm": 0.07726863771677017, "learning_rate": 1.1681664447794883e-05, "loss": 0.456493079662323, "step": 7516 }, { "epoch": 1.388954972225751, "grad_norm": 0.08772004395723343, "learning_rate": 1.1679697909377955e-05, "loss": 0.5773293375968933, "step": 7517 }, { "epoch": 1.3891397489346469, "grad_norm": 0.06700170040130615, "learning_rate": 1.1677731304114306e-05, "loss": 0.4972071945667267, "step": 7518 }, { "epoch": 1.3893245256435427, "grad_norm": 0.05684570223093033, "learning_rate": 1.1675764632082203e-05, "loss": 0.4496079087257385, "step": 7519 }, { "epoch": 1.3895093023524385, "grad_norm": 0.0730443224310875, "learning_rate": 1.1673797893359908e-05, "loss": 0.5882956385612488, "step": 7520 }, { "epoch": 1.3896940790613344, "grad_norm": 0.07208561897277832, "learning_rate": 1.1671831088025695e-05, "loss": 0.4346911311149597, "step": 7521 }, { "epoch": 1.3898788557702302, "grad_norm": 0.059102918952703476, "learning_rate": 1.1669864216157834e-05, "loss": 0.4118361175060272, "step": 7522 }, { "epoch": 1.390063632479126, "grad_norm": 0.08321856707334518, "learning_rate": 1.1667897277834603e-05, "loss": 0.5240601897239685, "step": 7523 }, { "epoch": 1.3902484091880218, "grad_norm": 0.07270022481679916, "learning_rate": 1.1665930273134276e-05, "loss": 0.5131801962852478, "step": 7524 }, { "epoch": 1.3904331858969177, "grad_norm": 0.06680957973003387, "learning_rate": 1.1663963202135137e-05, "loss": 0.44497692584991455, "step": 7525 }, { "epoch": 1.3906179626058135, "grad_norm": 0.06281045824289322, "learning_rate": 1.166199606491547e-05, "loss": 0.44331836700439453, "step": 7526 }, { "epoch": 1.3908027393147093, "grad_norm": 0.07803647220134735, "learning_rate": 1.1660028861553559e-05, "loss": 0.5607433319091797, "step": 7527 }, { "epoch": 1.3909875160236052, "grad_norm": 0.07634811103343964, "learning_rate": 1.165806159212769e-05, "loss": 0.5576267242431641, "step": 7528 }, { "epoch": 1.391172292732501, "grad_norm": 0.09128347784280777, "learning_rate": 1.1656094256716161e-05, "loss": 0.7086127400398254, "step": 7529 }, { "epoch": 1.3913570694413968, "grad_norm": 0.07963167876005173, "learning_rate": 1.165412685539726e-05, "loss": 0.615551233291626, "step": 7530 }, { "epoch": 1.3915418461502926, "grad_norm": 0.07789136469364166, "learning_rate": 1.1652159388249287e-05, "loss": 0.5044615864753723, "step": 7531 }, { "epoch": 1.3917266228591885, "grad_norm": 0.06654134392738342, "learning_rate": 1.1650191855350537e-05, "loss": 0.4450850486755371, "step": 7532 }, { "epoch": 1.3919113995680845, "grad_norm": 0.08933846652507782, "learning_rate": 1.1648224256779314e-05, "loss": 0.5686116218566895, "step": 7533 }, { "epoch": 1.3920961762769803, "grad_norm": 0.0678136795759201, "learning_rate": 1.1646256592613923e-05, "loss": 0.414152592420578, "step": 7534 }, { "epoch": 1.3922809529858762, "grad_norm": 0.07052744925022125, "learning_rate": 1.1644288862932669e-05, "loss": 0.45875686407089233, "step": 7535 }, { "epoch": 1.392465729694772, "grad_norm": 0.07333561033010483, "learning_rate": 1.1642321067813864e-05, "loss": 0.5426931977272034, "step": 7536 }, { "epoch": 1.3926505064036678, "grad_norm": 0.08019225299358368, "learning_rate": 1.1640353207335818e-05, "loss": 0.5834104418754578, "step": 7537 }, { "epoch": 1.3928352831125637, "grad_norm": 0.05879241228103638, "learning_rate": 1.1638385281576844e-05, "loss": 0.33529654145240784, "step": 7538 }, { "epoch": 1.3930200598214595, "grad_norm": 0.058719415217638016, "learning_rate": 1.1636417290615267e-05, "loss": 0.3612607717514038, "step": 7539 }, { "epoch": 1.3932048365303553, "grad_norm": 0.05941032990813255, "learning_rate": 1.1634449234529399e-05, "loss": 0.3875240385532379, "step": 7540 }, { "epoch": 1.3933896132392511, "grad_norm": 0.07174340635538101, "learning_rate": 1.1632481113397565e-05, "loss": 0.4807130992412567, "step": 7541 }, { "epoch": 1.393574389948147, "grad_norm": 0.07699482887983322, "learning_rate": 1.1630512927298087e-05, "loss": 0.49087509512901306, "step": 7542 }, { "epoch": 1.3937591666570428, "grad_norm": 0.07787071168422699, "learning_rate": 1.1628544676309302e-05, "loss": 0.499368280172348, "step": 7543 }, { "epoch": 1.3939439433659389, "grad_norm": 0.06933631747961044, "learning_rate": 1.1626576360509529e-05, "loss": 0.5152738094329834, "step": 7544 }, { "epoch": 1.3941287200748347, "grad_norm": 0.07767070084810257, "learning_rate": 1.1624607979977106e-05, "loss": 0.5594071745872498, "step": 7545 }, { "epoch": 1.3943134967837305, "grad_norm": 0.06435758620500565, "learning_rate": 1.162263953479037e-05, "loss": 0.39479848742485046, "step": 7546 }, { "epoch": 1.3944982734926263, "grad_norm": 0.07449889183044434, "learning_rate": 1.162067102502766e-05, "loss": 0.3971273601055145, "step": 7547 }, { "epoch": 1.3946830502015222, "grad_norm": 0.060775335878133774, "learning_rate": 1.1618702450767306e-05, "loss": 0.3650251030921936, "step": 7548 }, { "epoch": 1.394867826910418, "grad_norm": 0.0665210410952568, "learning_rate": 1.1616733812087663e-05, "loss": 0.37720543146133423, "step": 7549 }, { "epoch": 1.3950526036193138, "grad_norm": 0.091295525431633, "learning_rate": 1.1614765109067075e-05, "loss": 0.7030790448188782, "step": 7550 }, { "epoch": 1.3952373803282097, "grad_norm": 0.07124901562929153, "learning_rate": 1.1612796341783883e-05, "loss": 0.46749329566955566, "step": 7551 }, { "epoch": 1.3954221570371055, "grad_norm": 0.07841958850622177, "learning_rate": 1.1610827510316442e-05, "loss": 0.5060739517211914, "step": 7552 }, { "epoch": 1.3956069337460013, "grad_norm": 0.07065027207136154, "learning_rate": 1.160885861474311e-05, "loss": 0.4663480818271637, "step": 7553 }, { "epoch": 1.3957917104548971, "grad_norm": 0.05885373055934906, "learning_rate": 1.1606889655142236e-05, "loss": 0.468976765871048, "step": 7554 }, { "epoch": 1.395976487163793, "grad_norm": 0.08445914834737778, "learning_rate": 1.160492063159218e-05, "loss": 0.6371353268623352, "step": 7555 }, { "epoch": 1.3961612638726888, "grad_norm": 0.08427069336175919, "learning_rate": 1.1602951544171307e-05, "loss": 0.6276706457138062, "step": 7556 }, { "epoch": 1.3963460405815846, "grad_norm": 0.07150643318891525, "learning_rate": 1.1600982392957978e-05, "loss": 0.4658666253089905, "step": 7557 }, { "epoch": 1.3965308172904805, "grad_norm": 0.08658826351165771, "learning_rate": 1.1599013178030553e-05, "loss": 0.6273196339607239, "step": 7558 }, { "epoch": 1.3967155939993763, "grad_norm": 0.07841559499502182, "learning_rate": 1.1597043899467412e-05, "loss": 0.5224420428276062, "step": 7559 }, { "epoch": 1.396900370708272, "grad_norm": 0.0666312649846077, "learning_rate": 1.159507455734692e-05, "loss": 0.40652960538864136, "step": 7560 }, { "epoch": 1.397085147417168, "grad_norm": 0.0653591901063919, "learning_rate": 1.1593105151747448e-05, "loss": 0.3719636797904968, "step": 7561 }, { "epoch": 1.397269924126064, "grad_norm": 0.07236526161432266, "learning_rate": 1.1591135682747374e-05, "loss": 0.5152609944343567, "step": 7562 }, { "epoch": 1.3974547008349598, "grad_norm": 0.07473872601985931, "learning_rate": 1.1589166150425082e-05, "loss": 0.5459132790565491, "step": 7563 }, { "epoch": 1.3976394775438556, "grad_norm": 0.0817592591047287, "learning_rate": 1.1587196554858946e-05, "loss": 0.5027362704277039, "step": 7564 }, { "epoch": 1.3978242542527515, "grad_norm": 0.08243642747402191, "learning_rate": 1.1585226896127353e-05, "loss": 0.5912965536117554, "step": 7565 }, { "epoch": 1.3980090309616473, "grad_norm": 0.09193118661642075, "learning_rate": 1.1583257174308693e-05, "loss": 0.6074056029319763, "step": 7566 }, { "epoch": 1.3981938076705431, "grad_norm": 0.07539702951908112, "learning_rate": 1.1581287389481348e-05, "loss": 0.5826414823532104, "step": 7567 }, { "epoch": 1.398378584379439, "grad_norm": 0.07248812168836594, "learning_rate": 1.1579317541723709e-05, "loss": 0.4714256227016449, "step": 7568 }, { "epoch": 1.3985633610883348, "grad_norm": 0.059154167771339417, "learning_rate": 1.1577347631114178e-05, "loss": 0.45838063955307007, "step": 7569 }, { "epoch": 1.3987481377972306, "grad_norm": 0.08536428958177567, "learning_rate": 1.1575377657731144e-05, "loss": 0.6104124784469604, "step": 7570 }, { "epoch": 1.3989329145061264, "grad_norm": 0.08255096524953842, "learning_rate": 1.1573407621653007e-05, "loss": 0.48993632197380066, "step": 7571 }, { "epoch": 1.3991176912150223, "grad_norm": 0.08401396870613098, "learning_rate": 1.157143752295817e-05, "loss": 0.5046585202217102, "step": 7572 }, { "epoch": 1.3993024679239183, "grad_norm": 0.1049598827958107, "learning_rate": 1.1569467361725037e-05, "loss": 0.7219524383544922, "step": 7573 }, { "epoch": 1.3994872446328142, "grad_norm": 0.08518721163272858, "learning_rate": 1.1567497138032014e-05, "loss": 0.693490207195282, "step": 7574 }, { "epoch": 1.39967202134171, "grad_norm": 0.07102842628955841, "learning_rate": 1.1565526851957504e-05, "loss": 0.4655759930610657, "step": 7575 }, { "epoch": 1.3998567980506058, "grad_norm": 0.09537466615438461, "learning_rate": 1.1563556503579929e-05, "loss": 0.7377628087997437, "step": 7576 }, { "epoch": 1.4000415747595016, "grad_norm": 0.06323997676372528, "learning_rate": 1.1561586092977697e-05, "loss": 0.43483224511146545, "step": 7577 }, { "epoch": 1.4002263514683975, "grad_norm": 0.06556528061628342, "learning_rate": 1.1559615620229216e-05, "loss": 0.4366847276687622, "step": 7578 }, { "epoch": 1.4004111281772933, "grad_norm": 0.07493555545806885, "learning_rate": 1.1557645085412921e-05, "loss": 0.436001181602478, "step": 7579 }, { "epoch": 1.4005959048861891, "grad_norm": 0.07397256046533585, "learning_rate": 1.1555674488607224e-05, "loss": 0.5384519696235657, "step": 7580 }, { "epoch": 1.400780681595085, "grad_norm": 0.09817645698785782, "learning_rate": 1.1553703829890546e-05, "loss": 0.7104623317718506, "step": 7581 }, { "epoch": 1.4009654583039808, "grad_norm": 0.07672224938869476, "learning_rate": 1.1551733109341318e-05, "loss": 0.4858318269252777, "step": 7582 }, { "epoch": 1.4011502350128766, "grad_norm": 0.08330681920051575, "learning_rate": 1.1549762327037968e-05, "loss": 0.6663315296173096, "step": 7583 }, { "epoch": 1.4013350117217724, "grad_norm": 0.08952096104621887, "learning_rate": 1.1547791483058926e-05, "loss": 0.6832013130187988, "step": 7584 }, { "epoch": 1.4015197884306683, "grad_norm": 0.06818807870149612, "learning_rate": 1.1545820577482623e-05, "loss": 0.47754937410354614, "step": 7585 }, { "epoch": 1.401704565139564, "grad_norm": 0.06141260638833046, "learning_rate": 1.1543849610387499e-05, "loss": 0.3404228985309601, "step": 7586 }, { "epoch": 1.40188934184846, "grad_norm": 0.08560726791620255, "learning_rate": 1.1541878581851994e-05, "loss": 0.5630397796630859, "step": 7587 }, { "epoch": 1.4020741185573558, "grad_norm": 0.07210613787174225, "learning_rate": 1.1539907491954539e-05, "loss": 0.5454407334327698, "step": 7588 }, { "epoch": 1.4022588952662516, "grad_norm": 0.06783408671617508, "learning_rate": 1.1537936340773586e-05, "loss": 0.46421727538108826, "step": 7589 }, { "epoch": 1.4024436719751474, "grad_norm": 0.07097148150205612, "learning_rate": 1.1535965128387578e-05, "loss": 0.5893171429634094, "step": 7590 }, { "epoch": 1.4026284486840432, "grad_norm": 0.07714993506669998, "learning_rate": 1.1533993854874964e-05, "loss": 0.5079788565635681, "step": 7591 }, { "epoch": 1.4028132253929393, "grad_norm": 0.1183595284819603, "learning_rate": 1.1532022520314192e-05, "loss": 0.6799882054328918, "step": 7592 }, { "epoch": 1.4029980021018351, "grad_norm": 0.07514689117670059, "learning_rate": 1.153005112478372e-05, "loss": 0.48914864659309387, "step": 7593 }, { "epoch": 1.403182778810731, "grad_norm": 0.07823141664266586, "learning_rate": 1.1528079668361997e-05, "loss": 0.5553666353225708, "step": 7594 }, { "epoch": 1.4033675555196268, "grad_norm": 0.0717146173119545, "learning_rate": 1.1526108151127488e-05, "loss": 0.5046097636222839, "step": 7595 }, { "epoch": 1.4035523322285226, "grad_norm": 0.07187351584434509, "learning_rate": 1.1524136573158646e-05, "loss": 0.4185374081134796, "step": 7596 }, { "epoch": 1.4037371089374184, "grad_norm": 0.07105645537376404, "learning_rate": 1.1522164934533939e-05, "loss": 0.4948467016220093, "step": 7597 }, { "epoch": 1.4039218856463143, "grad_norm": 0.06178764998912811, "learning_rate": 1.1520193235331827e-05, "loss": 0.4289746582508087, "step": 7598 }, { "epoch": 1.40410666235521, "grad_norm": 0.0821404680609703, "learning_rate": 1.151822147563078e-05, "loss": 0.4482502341270447, "step": 7599 }, { "epoch": 1.404291439064106, "grad_norm": 0.05330733582377434, "learning_rate": 1.1516249655509271e-05, "loss": 0.30158668756484985, "step": 7600 }, { "epoch": 1.4044762157730017, "grad_norm": 0.0885644257068634, "learning_rate": 1.1514277775045768e-05, "loss": 0.456144779920578, "step": 7601 }, { "epoch": 1.4046609924818976, "grad_norm": 0.08371008932590485, "learning_rate": 1.1512305834318746e-05, "loss": 0.5358234643936157, "step": 7602 }, { "epoch": 1.4048457691907936, "grad_norm": 0.06220470368862152, "learning_rate": 1.1510333833406687e-05, "loss": 0.37693867087364197, "step": 7603 }, { "epoch": 1.4050305458996895, "grad_norm": 0.08973923325538635, "learning_rate": 1.1508361772388064e-05, "loss": 0.626574695110321, "step": 7604 }, { "epoch": 1.4052153226085853, "grad_norm": 0.07953702658414841, "learning_rate": 1.150638965134136e-05, "loss": 0.5673231482505798, "step": 7605 }, { "epoch": 1.405400099317481, "grad_norm": 0.06657926738262177, "learning_rate": 1.1504417470345064e-05, "loss": 0.48152756690979004, "step": 7606 }, { "epoch": 1.405584876026377, "grad_norm": 0.09543479979038239, "learning_rate": 1.150244522947766e-05, "loss": 0.5430436134338379, "step": 7607 }, { "epoch": 1.4057696527352728, "grad_norm": 0.08441811054944992, "learning_rate": 1.1500472928817632e-05, "loss": 0.5210660696029663, "step": 7608 }, { "epoch": 1.4059544294441686, "grad_norm": 0.05825930833816528, "learning_rate": 1.1498500568443477e-05, "loss": 0.3573313057422638, "step": 7609 }, { "epoch": 1.4061392061530644, "grad_norm": 0.07727733254432678, "learning_rate": 1.149652814843369e-05, "loss": 0.585524320602417, "step": 7610 }, { "epoch": 1.4063239828619603, "grad_norm": 0.07217800617218018, "learning_rate": 1.1494555668866762e-05, "loss": 0.4551432728767395, "step": 7611 }, { "epoch": 1.406508759570856, "grad_norm": 0.06366957724094391, "learning_rate": 1.1492583129821198e-05, "loss": 0.3803600072860718, "step": 7612 }, { "epoch": 1.406693536279752, "grad_norm": 0.07053103297948837, "learning_rate": 1.1490610531375493e-05, "loss": 0.406423419713974, "step": 7613 }, { "epoch": 1.4068783129886477, "grad_norm": 0.08406764268875122, "learning_rate": 1.148863787360815e-05, "loss": 0.5798128247261047, "step": 7614 }, { "epoch": 1.4070630896975436, "grad_norm": 0.07802645862102509, "learning_rate": 1.148666515659768e-05, "loss": 0.49858537316322327, "step": 7615 }, { "epoch": 1.4072478664064394, "grad_norm": 0.07138711959123611, "learning_rate": 1.1484692380422587e-05, "loss": 0.4957258999347687, "step": 7616 }, { "epoch": 1.4074326431153352, "grad_norm": 0.06466956436634064, "learning_rate": 1.1482719545161382e-05, "loss": 0.4023810923099518, "step": 7617 }, { "epoch": 1.407617419824231, "grad_norm": 0.08849228173494339, "learning_rate": 1.1480746650892578e-05, "loss": 0.5417582392692566, "step": 7618 }, { "epoch": 1.4078021965331269, "grad_norm": 0.1008807122707367, "learning_rate": 1.1478773697694691e-05, "loss": 0.7184629440307617, "step": 7619 }, { "epoch": 1.4079869732420227, "grad_norm": 0.09179085493087769, "learning_rate": 1.1476800685646237e-05, "loss": 0.678741455078125, "step": 7620 }, { "epoch": 1.4081717499509188, "grad_norm": 0.08802378922700882, "learning_rate": 1.1474827614825734e-05, "loss": 0.5173779129981995, "step": 7621 }, { "epoch": 1.4083565266598146, "grad_norm": 0.10323859006166458, "learning_rate": 1.147285448531171e-05, "loss": 0.5072680711746216, "step": 7622 }, { "epoch": 1.4085413033687104, "grad_norm": 0.07147370278835297, "learning_rate": 1.147088129718268e-05, "loss": 0.4114370346069336, "step": 7623 }, { "epoch": 1.4087260800776062, "grad_norm": 0.08556059002876282, "learning_rate": 1.146890805051718e-05, "loss": 0.6060706973075867, "step": 7624 }, { "epoch": 1.408910856786502, "grad_norm": 0.06883016228675842, "learning_rate": 1.1466934745393737e-05, "loss": 0.41225066781044006, "step": 7625 }, { "epoch": 1.409095633495398, "grad_norm": 0.09106704592704773, "learning_rate": 1.1464961381890875e-05, "loss": 0.6388334631919861, "step": 7626 }, { "epoch": 1.4092804102042937, "grad_norm": 0.0875760018825531, "learning_rate": 1.1462987960087139e-05, "loss": 0.7768730521202087, "step": 7627 }, { "epoch": 1.4094651869131896, "grad_norm": 0.06366395950317383, "learning_rate": 1.1461014480061057e-05, "loss": 0.4491613507270813, "step": 7628 }, { "epoch": 1.4096499636220854, "grad_norm": 0.08622222393751144, "learning_rate": 1.1459040941891169e-05, "loss": 0.6296977400779724, "step": 7629 }, { "epoch": 1.4098347403309812, "grad_norm": 0.06523562967777252, "learning_rate": 1.1457067345656016e-05, "loss": 0.4500395953655243, "step": 7630 }, { "epoch": 1.410019517039877, "grad_norm": 0.08612232655286789, "learning_rate": 1.145509369143414e-05, "loss": 0.5739388465881348, "step": 7631 }, { "epoch": 1.410204293748773, "grad_norm": 0.08802293241024017, "learning_rate": 1.145311997930409e-05, "loss": 0.6413158178329468, "step": 7632 }, { "epoch": 1.410389070457669, "grad_norm": 0.06629212200641632, "learning_rate": 1.145114620934441e-05, "loss": 0.5219778418540955, "step": 7633 }, { "epoch": 1.4105738471665648, "grad_norm": 0.07021202892065048, "learning_rate": 1.1449172381633651e-05, "loss": 0.5135833621025085, "step": 7634 }, { "epoch": 1.4107586238754606, "grad_norm": 0.05778151750564575, "learning_rate": 1.1447198496250367e-05, "loss": 0.4304315149784088, "step": 7635 }, { "epoch": 1.4109434005843564, "grad_norm": 0.06428337097167969, "learning_rate": 1.1445224553273111e-05, "loss": 0.454351544380188, "step": 7636 }, { "epoch": 1.4111281772932522, "grad_norm": 0.07603947818279266, "learning_rate": 1.1443250552780435e-05, "loss": 0.4918436110019684, "step": 7637 }, { "epoch": 1.411312954002148, "grad_norm": 0.07855551689863205, "learning_rate": 1.1441276494850904e-05, "loss": 0.5450300574302673, "step": 7638 }, { "epoch": 1.411497730711044, "grad_norm": 0.06913916021585464, "learning_rate": 1.143930237956308e-05, "loss": 0.48682063817977905, "step": 7639 }, { "epoch": 1.4116825074199397, "grad_norm": 0.0858033075928688, "learning_rate": 1.1437328206995521e-05, "loss": 0.457550972700119, "step": 7640 }, { "epoch": 1.4118672841288356, "grad_norm": 0.05459976568818092, "learning_rate": 1.1435353977226797e-05, "loss": 0.3086320757865906, "step": 7641 }, { "epoch": 1.4120520608377314, "grad_norm": 0.05813084542751312, "learning_rate": 1.1433379690335478e-05, "loss": 0.3826307952404022, "step": 7642 }, { "epoch": 1.4122368375466272, "grad_norm": 0.08360633254051208, "learning_rate": 1.1431405346400128e-05, "loss": 0.64500892162323, "step": 7643 }, { "epoch": 1.412421614255523, "grad_norm": 0.07159120589494705, "learning_rate": 1.1429430945499324e-05, "loss": 0.39974844455718994, "step": 7644 }, { "epoch": 1.4126063909644189, "grad_norm": 0.07743488997220993, "learning_rate": 1.1427456487711644e-05, "loss": 0.6075916290283203, "step": 7645 }, { "epoch": 1.4127911676733147, "grad_norm": 0.07663056254386902, "learning_rate": 1.1425481973115659e-05, "loss": 0.5331258177757263, "step": 7646 }, { "epoch": 1.4129759443822105, "grad_norm": 0.07387567311525345, "learning_rate": 1.142350740178995e-05, "loss": 0.4058663845062256, "step": 7647 }, { "epoch": 1.4131607210911064, "grad_norm": 0.08707687258720398, "learning_rate": 1.1421532773813105e-05, "loss": 0.6303785443305969, "step": 7648 }, { "epoch": 1.4133454978000022, "grad_norm": 0.05880158394575119, "learning_rate": 1.14195580892637e-05, "loss": 0.4565977454185486, "step": 7649 }, { "epoch": 1.4135302745088982, "grad_norm": 0.0803513377904892, "learning_rate": 1.1417583348220322e-05, "loss": 0.6228235960006714, "step": 7650 }, { "epoch": 1.413715051217794, "grad_norm": 0.07094324380159378, "learning_rate": 1.1415608550761563e-05, "loss": 0.4482516944408417, "step": 7651 }, { "epoch": 1.41389982792669, "grad_norm": 0.06476152688264847, "learning_rate": 1.1413633696966016e-05, "loss": 0.4385446012020111, "step": 7652 }, { "epoch": 1.4140846046355857, "grad_norm": 0.07475713640451431, "learning_rate": 1.141165878691227e-05, "loss": 0.5316185355186462, "step": 7653 }, { "epoch": 1.4142693813444815, "grad_norm": 0.0754825696349144, "learning_rate": 1.1409683820678913e-05, "loss": 0.5440897345542908, "step": 7654 }, { "epoch": 1.4144541580533774, "grad_norm": 0.07446648925542831, "learning_rate": 1.140770879834456e-05, "loss": 0.47146567702293396, "step": 7655 }, { "epoch": 1.4146389347622732, "grad_norm": 0.06318148970603943, "learning_rate": 1.1405733719987797e-05, "loss": 0.41523852944374084, "step": 7656 }, { "epoch": 1.414823711471169, "grad_norm": 0.07789860665798187, "learning_rate": 1.1403758585687226e-05, "loss": 0.45916494727134705, "step": 7657 }, { "epoch": 1.4150084881800649, "grad_norm": 0.07964061200618744, "learning_rate": 1.1401783395521462e-05, "loss": 0.5693597793579102, "step": 7658 }, { "epoch": 1.4151932648889607, "grad_norm": 0.0904047042131424, "learning_rate": 1.1399808149569102e-05, "loss": 0.6129499077796936, "step": 7659 }, { "epoch": 1.4153780415978565, "grad_norm": 0.06054529547691345, "learning_rate": 1.1397832847908756e-05, "loss": 0.35986170172691345, "step": 7660 }, { "epoch": 1.4155628183067526, "grad_norm": 0.06948984414339066, "learning_rate": 1.1395857490619035e-05, "loss": 0.5107991099357605, "step": 7661 }, { "epoch": 1.4157475950156484, "grad_norm": 0.07513797283172607, "learning_rate": 1.1393882077778555e-05, "loss": 0.5044052004814148, "step": 7662 }, { "epoch": 1.4159323717245442, "grad_norm": 0.07386617362499237, "learning_rate": 1.139190660946593e-05, "loss": 0.5760729908943176, "step": 7663 }, { "epoch": 1.41611714843344, "grad_norm": 0.08695702254772186, "learning_rate": 1.1389931085759774e-05, "loss": 0.6008087992668152, "step": 7664 }, { "epoch": 1.4163019251423359, "grad_norm": 0.08835457265377045, "learning_rate": 1.1387955506738711e-05, "loss": 0.6213218569755554, "step": 7665 }, { "epoch": 1.4164867018512317, "grad_norm": 0.06474512815475464, "learning_rate": 1.1385979872481363e-05, "loss": 0.3683367371559143, "step": 7666 }, { "epoch": 1.4166714785601275, "grad_norm": 0.0808560773730278, "learning_rate": 1.1384004183066347e-05, "loss": 0.5479312539100647, "step": 7667 }, { "epoch": 1.4168562552690234, "grad_norm": 0.061818208545446396, "learning_rate": 1.1382028438572297e-05, "loss": 0.39471688866615295, "step": 7668 }, { "epoch": 1.4170410319779192, "grad_norm": 0.07842092216014862, "learning_rate": 1.1380052639077841e-05, "loss": 0.5562286972999573, "step": 7669 }, { "epoch": 1.417225808686815, "grad_norm": 0.05551144480705261, "learning_rate": 1.1378076784661606e-05, "loss": 0.34070223569869995, "step": 7670 }, { "epoch": 1.4174105853957109, "grad_norm": 0.07446765899658203, "learning_rate": 1.1376100875402225e-05, "loss": 0.5623782277107239, "step": 7671 }, { "epoch": 1.4175953621046067, "grad_norm": 0.06815563142299652, "learning_rate": 1.1374124911378338e-05, "loss": 0.46452295780181885, "step": 7672 }, { "epoch": 1.4177801388135025, "grad_norm": 0.06641937047243118, "learning_rate": 1.1372148892668577e-05, "loss": 0.475123792886734, "step": 7673 }, { "epoch": 1.4179649155223983, "grad_norm": 0.09159291535615921, "learning_rate": 1.1370172819351582e-05, "loss": 0.7458155155181885, "step": 7674 }, { "epoch": 1.4181496922312942, "grad_norm": 0.0605030432343483, "learning_rate": 1.1368196691505995e-05, "loss": 0.4790557622909546, "step": 7675 }, { "epoch": 1.41833446894019, "grad_norm": 0.08951747417449951, "learning_rate": 1.1366220509210464e-05, "loss": 0.5695453882217407, "step": 7676 }, { "epoch": 1.4185192456490858, "grad_norm": 0.06571762263774872, "learning_rate": 1.1364244272543627e-05, "loss": 0.44038400053977966, "step": 7677 }, { "epoch": 1.4187040223579817, "grad_norm": 0.08783255517482758, "learning_rate": 1.1362267981584137e-05, "loss": 0.5953701734542847, "step": 7678 }, { "epoch": 1.4188887990668775, "grad_norm": 0.07799126952886581, "learning_rate": 1.1360291636410645e-05, "loss": 0.4716167151927948, "step": 7679 }, { "epoch": 1.4190735757757735, "grad_norm": 0.07493774592876434, "learning_rate": 1.1358315237101798e-05, "loss": 0.4993197023868561, "step": 7680 }, { "epoch": 1.4192583524846694, "grad_norm": 0.06788554042577744, "learning_rate": 1.1356338783736256e-05, "loss": 0.4875277578830719, "step": 7681 }, { "epoch": 1.4194431291935652, "grad_norm": 0.06015022099018097, "learning_rate": 1.1354362276392677e-05, "loss": 0.34643423557281494, "step": 7682 }, { "epoch": 1.419627905902461, "grad_norm": 0.09699197113513947, "learning_rate": 1.1352385715149711e-05, "loss": 0.5874773859977722, "step": 7683 }, { "epoch": 1.4198126826113568, "grad_norm": 0.08294727653265, "learning_rate": 1.135040910008603e-05, "loss": 0.6303514242172241, "step": 7684 }, { "epoch": 1.4199974593202527, "grad_norm": 0.07292766124010086, "learning_rate": 1.134843243128029e-05, "loss": 0.5303201079368591, "step": 7685 }, { "epoch": 1.4201822360291485, "grad_norm": 0.07239757478237152, "learning_rate": 1.1346455708811157e-05, "loss": 0.6103678941726685, "step": 7686 }, { "epoch": 1.4203670127380443, "grad_norm": 0.08574247360229492, "learning_rate": 1.13444789327573e-05, "loss": 0.547768235206604, "step": 7687 }, { "epoch": 1.4205517894469402, "grad_norm": 0.07449698448181152, "learning_rate": 1.1342502103197386e-05, "loss": 0.4052199721336365, "step": 7688 }, { "epoch": 1.420736566155836, "grad_norm": 0.08636648952960968, "learning_rate": 1.1340525220210092e-05, "loss": 0.6392164826393127, "step": 7689 }, { "epoch": 1.4209213428647318, "grad_norm": 0.07430004328489304, "learning_rate": 1.1338548283874085e-05, "loss": 0.5255374312400818, "step": 7690 }, { "epoch": 1.4211061195736279, "grad_norm": 0.06150273606181145, "learning_rate": 1.1336571294268045e-05, "loss": 0.3642953634262085, "step": 7691 }, { "epoch": 1.4212908962825237, "grad_norm": 0.06689255684614182, "learning_rate": 1.133459425147065e-05, "loss": 0.4309183359146118, "step": 7692 }, { "epoch": 1.4214756729914195, "grad_norm": 0.06311143189668655, "learning_rate": 1.1332617155560578e-05, "loss": 0.3785903751850128, "step": 7693 }, { "epoch": 1.4216604497003154, "grad_norm": 0.06366381794214249, "learning_rate": 1.1330640006616514e-05, "loss": 0.42182084918022156, "step": 7694 }, { "epoch": 1.4218452264092112, "grad_norm": 0.073325976729393, "learning_rate": 1.132866280471714e-05, "loss": 0.4598470628261566, "step": 7695 }, { "epoch": 1.422030003118107, "grad_norm": 0.08542947471141815, "learning_rate": 1.1326685549941144e-05, "loss": 0.503866970539093, "step": 7696 }, { "epoch": 1.4222147798270028, "grad_norm": 0.08375982195138931, "learning_rate": 1.1324708242367211e-05, "loss": 0.5992326140403748, "step": 7697 }, { "epoch": 1.4223995565358987, "grad_norm": 0.06164781376719475, "learning_rate": 1.1322730882074036e-05, "loss": 0.4079485833644867, "step": 7698 }, { "epoch": 1.4225843332447945, "grad_norm": 0.05721287056803703, "learning_rate": 1.132075346914031e-05, "loss": 0.33973169326782227, "step": 7699 }, { "epoch": 1.4227691099536903, "grad_norm": 0.06620140373706818, "learning_rate": 1.1318776003644729e-05, "loss": 0.4641711115837097, "step": 7700 }, { "epoch": 1.4229538866625862, "grad_norm": 0.08204913884401321, "learning_rate": 1.1316798485665989e-05, "loss": 0.4957462251186371, "step": 7701 }, { "epoch": 1.423138663371482, "grad_norm": 0.07838209718465805, "learning_rate": 1.131482091528279e-05, "loss": 0.6666757464408875, "step": 7702 }, { "epoch": 1.4233234400803778, "grad_norm": 0.08237612992525101, "learning_rate": 1.1312843292573827e-05, "loss": 0.568879246711731, "step": 7703 }, { "epoch": 1.4235082167892736, "grad_norm": 0.0746271014213562, "learning_rate": 1.1310865617617814e-05, "loss": 0.47434210777282715, "step": 7704 }, { "epoch": 1.4236929934981695, "grad_norm": 0.07584428042173386, "learning_rate": 1.1308887890493448e-05, "loss": 0.45992404222488403, "step": 7705 }, { "epoch": 1.4238777702070653, "grad_norm": 0.06510897725820541, "learning_rate": 1.130691011127944e-05, "loss": 0.3984183967113495, "step": 7706 }, { "epoch": 1.4240625469159611, "grad_norm": 0.08189408481121063, "learning_rate": 1.1304932280054497e-05, "loss": 0.5754963755607605, "step": 7707 }, { "epoch": 1.424247323624857, "grad_norm": 0.0759274810552597, "learning_rate": 1.1302954396897333e-05, "loss": 0.6165765523910522, "step": 7708 }, { "epoch": 1.424432100333753, "grad_norm": 0.09124520421028137, "learning_rate": 1.130097646188666e-05, "loss": 0.6912825703620911, "step": 7709 }, { "epoch": 1.4246168770426488, "grad_norm": 0.07441210001707077, "learning_rate": 1.1298998475101193e-05, "loss": 0.5612799525260925, "step": 7710 }, { "epoch": 1.4248016537515447, "grad_norm": 0.05193053185939789, "learning_rate": 1.1297020436619652e-05, "loss": 0.31158825755119324, "step": 7711 }, { "epoch": 1.4249864304604405, "grad_norm": 0.08454303443431854, "learning_rate": 1.1295042346520755e-05, "loss": 0.5870208740234375, "step": 7712 }, { "epoch": 1.4251712071693363, "grad_norm": 0.08547506481409073, "learning_rate": 1.1293064204883225e-05, "loss": 0.5300642848014832, "step": 7713 }, { "epoch": 1.4253559838782321, "grad_norm": 0.06898822635412216, "learning_rate": 1.1291086011785785e-05, "loss": 0.41161859035491943, "step": 7714 }, { "epoch": 1.425540760587128, "grad_norm": 0.06064549833536148, "learning_rate": 1.128910776730716e-05, "loss": 0.4096584916114807, "step": 7715 }, { "epoch": 1.4257255372960238, "grad_norm": 0.09677895158529282, "learning_rate": 1.1287129471526081e-05, "loss": 0.6471278667449951, "step": 7716 }, { "epoch": 1.4259103140049196, "grad_norm": 0.07851700484752655, "learning_rate": 1.1285151124521274e-05, "loss": 0.5760736465454102, "step": 7717 }, { "epoch": 1.4260950907138155, "grad_norm": 0.0756153017282486, "learning_rate": 1.1283172726371473e-05, "loss": 0.5310165286064148, "step": 7718 }, { "epoch": 1.4262798674227113, "grad_norm": 0.0864722952246666, "learning_rate": 1.1281194277155414e-05, "loss": 0.5797637104988098, "step": 7719 }, { "epoch": 1.4264646441316073, "grad_norm": 0.06263674050569534, "learning_rate": 1.1279215776951828e-05, "loss": 0.49331384897232056, "step": 7720 }, { "epoch": 1.4266494208405032, "grad_norm": 0.08064206689596176, "learning_rate": 1.1277237225839459e-05, "loss": 0.5718737840652466, "step": 7721 }, { "epoch": 1.426834197549399, "grad_norm": 0.04651773348450661, "learning_rate": 1.1275258623897042e-05, "loss": 0.33713024854660034, "step": 7722 }, { "epoch": 1.4270189742582948, "grad_norm": 0.07942657172679901, "learning_rate": 1.1273279971203324e-05, "loss": 0.48411282896995544, "step": 7723 }, { "epoch": 1.4272037509671907, "grad_norm": 0.059395305812358856, "learning_rate": 1.1271301267837045e-05, "loss": 0.3102441728115082, "step": 7724 }, { "epoch": 1.4273885276760865, "grad_norm": 0.07383601367473602, "learning_rate": 1.1269322513876955e-05, "loss": 0.5118647813796997, "step": 7725 }, { "epoch": 1.4275733043849823, "grad_norm": 0.06309115141630173, "learning_rate": 1.1267343709401798e-05, "loss": 0.4012720584869385, "step": 7726 }, { "epoch": 1.4277580810938781, "grad_norm": 0.073763407766819, "learning_rate": 1.1265364854490326e-05, "loss": 0.4894561171531677, "step": 7727 }, { "epoch": 1.427942857802774, "grad_norm": 0.08136720210313797, "learning_rate": 1.1263385949221294e-05, "loss": 0.6185980439186096, "step": 7728 }, { "epoch": 1.4281276345116698, "grad_norm": 0.07367828488349915, "learning_rate": 1.1261406993673451e-05, "loss": 0.4043392837047577, "step": 7729 }, { "epoch": 1.4283124112205656, "grad_norm": 0.07976184040307999, "learning_rate": 1.1259427987925558e-05, "loss": 0.6872898936271667, "step": 7730 }, { "epoch": 1.4284971879294615, "grad_norm": 0.06573823094367981, "learning_rate": 1.1257448932056373e-05, "loss": 0.43768876791000366, "step": 7731 }, { "epoch": 1.4286819646383573, "grad_norm": 0.059378981590270996, "learning_rate": 1.125546982614465e-05, "loss": 0.36808469891548157, "step": 7732 }, { "epoch": 1.428866741347253, "grad_norm": 0.08485148102045059, "learning_rate": 1.1253490670269158e-05, "loss": 0.4989584982395172, "step": 7733 }, { "epoch": 1.429051518056149, "grad_norm": 0.07902685552835464, "learning_rate": 1.125151146450866e-05, "loss": 0.562540590763092, "step": 7734 }, { "epoch": 1.4292362947650448, "grad_norm": 0.07742798328399658, "learning_rate": 1.1249532208941922e-05, "loss": 0.47228848934173584, "step": 7735 }, { "epoch": 1.4294210714739406, "grad_norm": 0.08142773061990738, "learning_rate": 1.1247552903647709e-05, "loss": 0.5640918612480164, "step": 7736 }, { "epoch": 1.4296058481828364, "grad_norm": 0.07114843279123306, "learning_rate": 1.1245573548704793e-05, "loss": 0.4797847867012024, "step": 7737 }, { "epoch": 1.4297906248917325, "grad_norm": 0.07043295353651047, "learning_rate": 1.1243594144191949e-05, "loss": 0.45127490162849426, "step": 7738 }, { "epoch": 1.4299754016006283, "grad_norm": 0.08176163583993912, "learning_rate": 1.1241614690187947e-05, "loss": 0.565257728099823, "step": 7739 }, { "epoch": 1.4301601783095241, "grad_norm": 0.06472037732601166, "learning_rate": 1.1239635186771565e-05, "loss": 0.3978027403354645, "step": 7740 }, { "epoch": 1.43034495501842, "grad_norm": 0.06999760866165161, "learning_rate": 1.1237655634021582e-05, "loss": 0.4746716022491455, "step": 7741 }, { "epoch": 1.4305297317273158, "grad_norm": 0.05453413724899292, "learning_rate": 1.1235676032016777e-05, "loss": 0.32790878415107727, "step": 7742 }, { "epoch": 1.4307145084362116, "grad_norm": 0.06192592531442642, "learning_rate": 1.123369638083593e-05, "loss": 0.38390231132507324, "step": 7743 }, { "epoch": 1.4308992851451074, "grad_norm": 0.07822002470493317, "learning_rate": 1.1231716680557829e-05, "loss": 0.46052321791648865, "step": 7744 }, { "epoch": 1.4310840618540033, "grad_norm": 0.06971774250268936, "learning_rate": 1.1229736931261258e-05, "loss": 0.4210222065448761, "step": 7745 }, { "epoch": 1.431268838562899, "grad_norm": 0.07256211340427399, "learning_rate": 1.1227757133025002e-05, "loss": 0.558273434638977, "step": 7746 }, { "epoch": 1.431453615271795, "grad_norm": 0.05814126878976822, "learning_rate": 1.1225777285927854e-05, "loss": 0.34946370124816895, "step": 7747 }, { "epoch": 1.4316383919806908, "grad_norm": 0.06424988806247711, "learning_rate": 1.1223797390048607e-05, "loss": 0.4597241282463074, "step": 7748 }, { "epoch": 1.4318231686895868, "grad_norm": 0.08351857215166092, "learning_rate": 1.122181744546605e-05, "loss": 0.5707598924636841, "step": 7749 }, { "epoch": 1.4320079453984826, "grad_norm": 0.06891462951898575, "learning_rate": 1.1219837452258982e-05, "loss": 0.49332642555236816, "step": 7750 }, { "epoch": 1.4321927221073785, "grad_norm": 0.080210842192173, "learning_rate": 1.12178574105062e-05, "loss": 0.530530571937561, "step": 7751 }, { "epoch": 1.4323774988162743, "grad_norm": 0.07507038861513138, "learning_rate": 1.1215877320286506e-05, "loss": 0.5159768462181091, "step": 7752 }, { "epoch": 1.4325622755251701, "grad_norm": 0.06869399547576904, "learning_rate": 1.1213897181678692e-05, "loss": 0.4092599153518677, "step": 7753 }, { "epoch": 1.432747052234066, "grad_norm": 0.059218842536211014, "learning_rate": 1.1211916994761574e-05, "loss": 0.4262658953666687, "step": 7754 }, { "epoch": 1.4329318289429618, "grad_norm": 0.07015969604253769, "learning_rate": 1.120993675961395e-05, "loss": 0.4874521493911743, "step": 7755 }, { "epoch": 1.4331166056518576, "grad_norm": 0.08915772289037704, "learning_rate": 1.1207956476314625e-05, "loss": 0.6265583038330078, "step": 7756 }, { "epoch": 1.4333013823607534, "grad_norm": 0.08509238064289093, "learning_rate": 1.1205976144942415e-05, "loss": 0.5946600437164307, "step": 7757 }, { "epoch": 1.4334861590696493, "grad_norm": 0.06658284366130829, "learning_rate": 1.1203995765576128e-05, "loss": 0.4934423565864563, "step": 7758 }, { "epoch": 1.433670935778545, "grad_norm": 0.08315644413232803, "learning_rate": 1.1202015338294574e-05, "loss": 0.6432234644889832, "step": 7759 }, { "epoch": 1.433855712487441, "grad_norm": 0.09150480479001999, "learning_rate": 1.120003486317657e-05, "loss": 0.7058236002922058, "step": 7760 }, { "epoch": 1.4340404891963368, "grad_norm": 0.08542300760746002, "learning_rate": 1.1198054340300934e-05, "loss": 0.5598074197769165, "step": 7761 }, { "epoch": 1.4342252659052326, "grad_norm": 0.07881592959165573, "learning_rate": 1.1196073769746485e-05, "loss": 0.49808305501937866, "step": 7762 }, { "epoch": 1.4344100426141284, "grad_norm": 0.07317841798067093, "learning_rate": 1.1194093151592037e-05, "loss": 0.476825475692749, "step": 7763 }, { "epoch": 1.4345948193230242, "grad_norm": 0.08663074672222137, "learning_rate": 1.1192112485916422e-05, "loss": 0.6702808141708374, "step": 7764 }, { "epoch": 1.43477959603192, "grad_norm": 0.06999069452285767, "learning_rate": 1.1190131772798461e-05, "loss": 0.42619284987449646, "step": 7765 }, { "epoch": 1.434964372740816, "grad_norm": 0.06526099145412445, "learning_rate": 1.1188151012316974e-05, "loss": 0.46033430099487305, "step": 7766 }, { "epoch": 1.4351491494497117, "grad_norm": 0.05214836075901985, "learning_rate": 1.1186170204550796e-05, "loss": 0.31312480568885803, "step": 7767 }, { "epoch": 1.4353339261586078, "grad_norm": 0.06493684649467468, "learning_rate": 1.1184189349578756e-05, "loss": 0.4746323525905609, "step": 7768 }, { "epoch": 1.4355187028675036, "grad_norm": 0.0669315978884697, "learning_rate": 1.1182208447479682e-05, "loss": 0.485589861869812, "step": 7769 }, { "epoch": 1.4357034795763994, "grad_norm": 0.08709082007408142, "learning_rate": 1.1180227498332413e-05, "loss": 0.6753013730049133, "step": 7770 }, { "epoch": 1.4358882562852953, "grad_norm": 0.07773923128843307, "learning_rate": 1.1178246502215782e-05, "loss": 0.5319858193397522, "step": 7771 }, { "epoch": 1.436073032994191, "grad_norm": 0.07116381824016571, "learning_rate": 1.1176265459208629e-05, "loss": 0.4657229781150818, "step": 7772 }, { "epoch": 1.436257809703087, "grad_norm": 0.06802497059106827, "learning_rate": 1.1174284369389783e-05, "loss": 0.4718885123729706, "step": 7773 }, { "epoch": 1.4364425864119827, "grad_norm": 0.08443068712949753, "learning_rate": 1.11723032328381e-05, "loss": 0.573927104473114, "step": 7774 }, { "epoch": 1.4366273631208786, "grad_norm": 0.06734126806259155, "learning_rate": 1.1170322049632415e-05, "loss": 0.38770437240600586, "step": 7775 }, { "epoch": 1.4368121398297744, "grad_norm": 0.08613882213830948, "learning_rate": 1.116834081985157e-05, "loss": 0.5945224761962891, "step": 7776 }, { "epoch": 1.4369969165386702, "grad_norm": 0.07484793663024902, "learning_rate": 1.1166359543574417e-05, "loss": 0.5455222725868225, "step": 7777 }, { "epoch": 1.437181693247566, "grad_norm": 0.07464735954999924, "learning_rate": 1.1164378220879805e-05, "loss": 0.513776957988739, "step": 7778 }, { "epoch": 1.4373664699564621, "grad_norm": 0.08073262125253677, "learning_rate": 1.1162396851846582e-05, "loss": 0.7173177003860474, "step": 7779 }, { "epoch": 1.437551246665358, "grad_norm": 0.07891233265399933, "learning_rate": 1.11604154365536e-05, "loss": 0.6460133790969849, "step": 7780 }, { "epoch": 1.4377360233742538, "grad_norm": 0.06489276140928268, "learning_rate": 1.1158433975079716e-05, "loss": 0.4140462279319763, "step": 7781 }, { "epoch": 1.4379208000831496, "grad_norm": 0.0625050961971283, "learning_rate": 1.1156452467503785e-05, "loss": 0.36107245087623596, "step": 7782 }, { "epoch": 1.4381055767920454, "grad_norm": 0.06572655588388443, "learning_rate": 1.1154470913904663e-05, "loss": 0.3627634346485138, "step": 7783 }, { "epoch": 1.4382903535009413, "grad_norm": 0.06454043090343475, "learning_rate": 1.1152489314361208e-05, "loss": 0.38726598024368286, "step": 7784 }, { "epoch": 1.438475130209837, "grad_norm": 0.08084996044635773, "learning_rate": 1.1150507668952287e-05, "loss": 0.6202616691589355, "step": 7785 }, { "epoch": 1.438659906918733, "grad_norm": 0.06921318173408508, "learning_rate": 1.1148525977756757e-05, "loss": 0.6285109519958496, "step": 7786 }, { "epoch": 1.4388446836276287, "grad_norm": 0.08460685610771179, "learning_rate": 1.1146544240853488e-05, "loss": 0.546532928943634, "step": 7787 }, { "epoch": 1.4390294603365246, "grad_norm": 0.0826251357793808, "learning_rate": 1.1144562458321346e-05, "loss": 0.5390278100967407, "step": 7788 }, { "epoch": 1.4392142370454204, "grad_norm": 0.09981584548950195, "learning_rate": 1.1142580630239197e-05, "loss": 0.5163625478744507, "step": 7789 }, { "epoch": 1.4393990137543162, "grad_norm": 0.08871352672576904, "learning_rate": 1.1140598756685917e-05, "loss": 0.5099503397941589, "step": 7790 }, { "epoch": 1.439583790463212, "grad_norm": 0.06853802502155304, "learning_rate": 1.1138616837740373e-05, "loss": 0.34996840357780457, "step": 7791 }, { "epoch": 1.4397685671721079, "grad_norm": 0.07858351618051529, "learning_rate": 1.1136634873481442e-05, "loss": 0.5394749045372009, "step": 7792 }, { "epoch": 1.4399533438810037, "grad_norm": 0.07858268171548843, "learning_rate": 1.1134652863987996e-05, "loss": 0.5309223532676697, "step": 7793 }, { "epoch": 1.4401381205898995, "grad_norm": 0.06469916552305222, "learning_rate": 1.1132670809338916e-05, "loss": 0.42184239625930786, "step": 7794 }, { "epoch": 1.4403228972987954, "grad_norm": 0.09995020925998688, "learning_rate": 1.1130688709613087e-05, "loss": 0.58648282289505, "step": 7795 }, { "epoch": 1.4405076740076912, "grad_norm": 0.0675228163599968, "learning_rate": 1.112870656488938e-05, "loss": 0.41711270809173584, "step": 7796 }, { "epoch": 1.4406924507165872, "grad_norm": 0.06286881119012833, "learning_rate": 1.1126724375246685e-05, "loss": 0.47400546073913574, "step": 7797 }, { "epoch": 1.440877227425483, "grad_norm": 0.07993468642234802, "learning_rate": 1.1124742140763884e-05, "loss": 0.5307193994522095, "step": 7798 }, { "epoch": 1.441062004134379, "grad_norm": 0.06807641685009003, "learning_rate": 1.1122759861519864e-05, "loss": 0.5481207370758057, "step": 7799 }, { "epoch": 1.4412467808432747, "grad_norm": 0.07621538639068604, "learning_rate": 1.1120777537593516e-05, "loss": 0.4112272262573242, "step": 7800 }, { "epoch": 1.4414315575521706, "grad_norm": 0.06728537380695343, "learning_rate": 1.1118795169063728e-05, "loss": 0.45030948519706726, "step": 7801 }, { "epoch": 1.4416163342610664, "grad_norm": 0.08467714488506317, "learning_rate": 1.1116812756009394e-05, "loss": 0.5709100365638733, "step": 7802 }, { "epoch": 1.4418011109699622, "grad_norm": 0.07138433307409286, "learning_rate": 1.1114830298509403e-05, "loss": 0.5521631240844727, "step": 7803 }, { "epoch": 1.441985887678858, "grad_norm": 0.08049609512090683, "learning_rate": 1.1112847796642654e-05, "loss": 0.632592499256134, "step": 7804 }, { "epoch": 1.4421706643877539, "grad_norm": 0.07343754172325134, "learning_rate": 1.1110865250488047e-05, "loss": 0.4223109483718872, "step": 7805 }, { "epoch": 1.4423554410966497, "grad_norm": 0.0751941129565239, "learning_rate": 1.1108882660124479e-05, "loss": 0.5047143697738647, "step": 7806 }, { "epoch": 1.4425402178055455, "grad_norm": 0.07815185189247131, "learning_rate": 1.110690002563085e-05, "loss": 0.4017481207847595, "step": 7807 }, { "epoch": 1.4427249945144416, "grad_norm": 0.07059190422296524, "learning_rate": 1.1104917347086059e-05, "loss": 0.44762492179870605, "step": 7808 }, { "epoch": 1.4429097712233374, "grad_norm": 0.08881764858961105, "learning_rate": 1.1102934624569017e-05, "loss": 0.7014904618263245, "step": 7809 }, { "epoch": 1.4430945479322332, "grad_norm": 0.07527472078800201, "learning_rate": 1.1100951858158629e-05, "loss": 0.4444360136985779, "step": 7810 }, { "epoch": 1.443279324641129, "grad_norm": 0.08891136944293976, "learning_rate": 1.1098969047933798e-05, "loss": 0.6279330849647522, "step": 7811 }, { "epoch": 1.443464101350025, "grad_norm": 0.08965428918600082, "learning_rate": 1.109698619397344e-05, "loss": 0.5877953767776489, "step": 7812 }, { "epoch": 1.4436488780589207, "grad_norm": 0.06243619695305824, "learning_rate": 1.1095003296356463e-05, "loss": 0.4526597857475281, "step": 7813 }, { "epoch": 1.4438336547678166, "grad_norm": 0.07425308972597122, "learning_rate": 1.109302035516178e-05, "loss": 0.4679838716983795, "step": 7814 }, { "epoch": 1.4440184314767124, "grad_norm": 0.06625751405954361, "learning_rate": 1.1091037370468307e-05, "loss": 0.49603068828582764, "step": 7815 }, { "epoch": 1.4442032081856082, "grad_norm": 0.07407105714082718, "learning_rate": 1.1089054342354962e-05, "loss": 0.4844321608543396, "step": 7816 }, { "epoch": 1.444387984894504, "grad_norm": 0.08134344965219498, "learning_rate": 1.108707127090066e-05, "loss": 0.6283643245697021, "step": 7817 }, { "epoch": 1.4445727616033999, "grad_norm": 0.08917857706546783, "learning_rate": 1.1085088156184321e-05, "loss": 0.686138927936554, "step": 7818 }, { "epoch": 1.4447575383122957, "grad_norm": 0.06348041445016861, "learning_rate": 1.1083104998284868e-05, "loss": 0.39687955379486084, "step": 7819 }, { "epoch": 1.4449423150211915, "grad_norm": 0.0743764117360115, "learning_rate": 1.1081121797281227e-05, "loss": 0.4445897042751312, "step": 7820 }, { "epoch": 1.4451270917300874, "grad_norm": 0.06222608685493469, "learning_rate": 1.1079138553252318e-05, "loss": 0.4394078552722931, "step": 7821 }, { "epoch": 1.4453118684389832, "grad_norm": 0.0755404457449913, "learning_rate": 1.1077155266277074e-05, "loss": 0.6143812537193298, "step": 7822 }, { "epoch": 1.445496645147879, "grad_norm": 0.07101725041866302, "learning_rate": 1.1075171936434416e-05, "loss": 0.47755134105682373, "step": 7823 }, { "epoch": 1.4456814218567748, "grad_norm": 0.07291561365127563, "learning_rate": 1.1073188563803283e-05, "loss": 0.4806116223335266, "step": 7824 }, { "epoch": 1.4458661985656707, "grad_norm": 0.10162265598773956, "learning_rate": 1.1071205148462602e-05, "loss": 0.696403980255127, "step": 7825 }, { "epoch": 1.4460509752745667, "grad_norm": 0.07536455243825912, "learning_rate": 1.1069221690491306e-05, "loss": 0.539941132068634, "step": 7826 }, { "epoch": 1.4462357519834625, "grad_norm": 0.060847558081150055, "learning_rate": 1.1067238189968331e-05, "loss": 0.338792085647583, "step": 7827 }, { "epoch": 1.4464205286923584, "grad_norm": 0.08490575104951859, "learning_rate": 1.1065254646972618e-05, "loss": 0.7636604905128479, "step": 7828 }, { "epoch": 1.4466053054012542, "grad_norm": 0.08725559711456299, "learning_rate": 1.10632710615831e-05, "loss": 0.6760256886482239, "step": 7829 }, { "epoch": 1.44679008211015, "grad_norm": 0.06068837270140648, "learning_rate": 1.1061287433878722e-05, "loss": 0.4545172154903412, "step": 7830 }, { "epoch": 1.4469748588190459, "grad_norm": 0.06264138966798782, "learning_rate": 1.1059303763938426e-05, "loss": 0.3595879077911377, "step": 7831 }, { "epoch": 1.4471596355279417, "grad_norm": 0.09896326810121536, "learning_rate": 1.1057320051841152e-05, "loss": 0.5653212070465088, "step": 7832 }, { "epoch": 1.4473444122368375, "grad_norm": 0.07048387080430984, "learning_rate": 1.1055336297665849e-05, "loss": 0.4967333972454071, "step": 7833 }, { "epoch": 1.4475291889457333, "grad_norm": 0.08791311085224152, "learning_rate": 1.1053352501491464e-05, "loss": 0.5783764719963074, "step": 7834 }, { "epoch": 1.4477139656546292, "grad_norm": 0.07617121189832687, "learning_rate": 1.1051368663396943e-05, "loss": 0.4915910065174103, "step": 7835 }, { "epoch": 1.447898742363525, "grad_norm": 0.06257335841655731, "learning_rate": 1.1049384783461237e-05, "loss": 0.4560215473175049, "step": 7836 }, { "epoch": 1.448083519072421, "grad_norm": 0.07558248937129974, "learning_rate": 1.1047400861763303e-05, "loss": 0.5505630970001221, "step": 7837 }, { "epoch": 1.4482682957813169, "grad_norm": 0.0665748342871666, "learning_rate": 1.1045416898382088e-05, "loss": 0.47031649947166443, "step": 7838 }, { "epoch": 1.4484530724902127, "grad_norm": 0.0672403872013092, "learning_rate": 1.1043432893396554e-05, "loss": 0.44376689195632935, "step": 7839 }, { "epoch": 1.4486378491991085, "grad_norm": 0.06862741708755493, "learning_rate": 1.1041448846885654e-05, "loss": 0.4826169013977051, "step": 7840 }, { "epoch": 1.4488226259080044, "grad_norm": 0.08931844681501389, "learning_rate": 1.1039464758928351e-05, "loss": 0.6299209594726562, "step": 7841 }, { "epoch": 1.4490074026169002, "grad_norm": 0.06782756000757217, "learning_rate": 1.1037480629603599e-05, "loss": 0.3971423804759979, "step": 7842 }, { "epoch": 1.449192179325796, "grad_norm": 0.07321605831384659, "learning_rate": 1.1035496458990365e-05, "loss": 0.5970366597175598, "step": 7843 }, { "epoch": 1.4493769560346919, "grad_norm": 0.07662840187549591, "learning_rate": 1.1033512247167612e-05, "loss": 0.5170060992240906, "step": 7844 }, { "epoch": 1.4495617327435877, "grad_norm": 0.08513103425502777, "learning_rate": 1.1031527994214303e-05, "loss": 0.6284051537513733, "step": 7845 }, { "epoch": 1.4497465094524835, "grad_norm": 0.0876043513417244, "learning_rate": 1.102954370020941e-05, "loss": 0.658281683921814, "step": 7846 }, { "epoch": 1.4499312861613793, "grad_norm": 0.07371240109205246, "learning_rate": 1.10275593652319e-05, "loss": 0.5149878859519958, "step": 7847 }, { "epoch": 1.4501160628702752, "grad_norm": 0.08449136465787888, "learning_rate": 1.102557498936074e-05, "loss": 0.5654085874557495, "step": 7848 }, { "epoch": 1.450300839579171, "grad_norm": 0.08550971746444702, "learning_rate": 1.10235905726749e-05, "loss": 0.5113701820373535, "step": 7849 }, { "epoch": 1.4504856162880668, "grad_norm": 0.07894014567136765, "learning_rate": 1.1021606115253362e-05, "loss": 0.5105152726173401, "step": 7850 }, { "epoch": 1.4506703929969627, "grad_norm": 0.06562106311321259, "learning_rate": 1.1019621617175098e-05, "loss": 0.48219433426856995, "step": 7851 }, { "epoch": 1.4508551697058585, "grad_norm": 0.06805983185768127, "learning_rate": 1.101763707851908e-05, "loss": 0.3967460095882416, "step": 7852 }, { "epoch": 1.4510399464147543, "grad_norm": 0.08153540641069412, "learning_rate": 1.1015652499364294e-05, "loss": 0.5121418833732605, "step": 7853 }, { "epoch": 1.4512247231236501, "grad_norm": 0.0606747642159462, "learning_rate": 1.1013667879789713e-05, "loss": 0.3817494511604309, "step": 7854 }, { "epoch": 1.4514094998325462, "grad_norm": 0.07964327931404114, "learning_rate": 1.1011683219874324e-05, "loss": 0.6836763024330139, "step": 7855 }, { "epoch": 1.451594276541442, "grad_norm": 0.07234257459640503, "learning_rate": 1.1009698519697106e-05, "loss": 0.5018043518066406, "step": 7856 }, { "epoch": 1.4517790532503378, "grad_norm": 0.06623294204473495, "learning_rate": 1.1007713779337046e-05, "loss": 0.5385190844535828, "step": 7857 }, { "epoch": 1.4519638299592337, "grad_norm": 0.0786714255809784, "learning_rate": 1.1005728998873132e-05, "loss": 0.5198732614517212, "step": 7858 }, { "epoch": 1.4521486066681295, "grad_norm": 0.08908972889184952, "learning_rate": 1.1003744178384347e-05, "loss": 0.6613372564315796, "step": 7859 }, { "epoch": 1.4523333833770253, "grad_norm": 0.07145047187805176, "learning_rate": 1.1001759317949687e-05, "loss": 0.509187638759613, "step": 7860 }, { "epoch": 1.4525181600859212, "grad_norm": 0.08758586645126343, "learning_rate": 1.0999774417648141e-05, "loss": 0.5257436633110046, "step": 7861 }, { "epoch": 1.452702936794817, "grad_norm": 0.08765916526317596, "learning_rate": 1.09977894775587e-05, "loss": 0.6729578375816345, "step": 7862 }, { "epoch": 1.4528877135037128, "grad_norm": 0.08089493960142136, "learning_rate": 1.0995804497760358e-05, "loss": 0.6460222005844116, "step": 7863 }, { "epoch": 1.4530724902126086, "grad_norm": 0.07192704826593399, "learning_rate": 1.0993819478332114e-05, "loss": 0.46706151962280273, "step": 7864 }, { "epoch": 1.4532572669215045, "grad_norm": 0.07399758696556091, "learning_rate": 1.0991834419352963e-05, "loss": 0.4869329333305359, "step": 7865 }, { "epoch": 1.4534420436304005, "grad_norm": 0.09239742904901505, "learning_rate": 1.0989849320901905e-05, "loss": 0.6180732250213623, "step": 7866 }, { "epoch": 1.4536268203392964, "grad_norm": 0.07837023586034775, "learning_rate": 1.0987864183057943e-05, "loss": 0.41628366708755493, "step": 7867 }, { "epoch": 1.4538115970481922, "grad_norm": 0.07429380714893341, "learning_rate": 1.0985879005900078e-05, "loss": 0.5442554354667664, "step": 7868 }, { "epoch": 1.453996373757088, "grad_norm": 0.08155282586812973, "learning_rate": 1.0983893789507307e-05, "loss": 0.5126806497573853, "step": 7869 }, { "epoch": 1.4541811504659838, "grad_norm": 0.0866590216755867, "learning_rate": 1.0981908533958646e-05, "loss": 0.517244815826416, "step": 7870 }, { "epoch": 1.4543659271748797, "grad_norm": 0.07580895721912384, "learning_rate": 1.0979923239333099e-05, "loss": 0.4432229995727539, "step": 7871 }, { "epoch": 1.4545507038837755, "grad_norm": 0.08365512639284134, "learning_rate": 1.0977937905709667e-05, "loss": 0.5921037197113037, "step": 7872 }, { "epoch": 1.4547354805926713, "grad_norm": 0.08475431054830551, "learning_rate": 1.0975952533167369e-05, "loss": 0.5824610590934753, "step": 7873 }, { "epoch": 1.4549202573015672, "grad_norm": 0.070068359375, "learning_rate": 1.0973967121785216e-05, "loss": 0.48140326142311096, "step": 7874 }, { "epoch": 1.455105034010463, "grad_norm": 0.08091516047716141, "learning_rate": 1.0971981671642216e-05, "loss": 0.5462980270385742, "step": 7875 }, { "epoch": 1.4552898107193588, "grad_norm": 0.09640698879957199, "learning_rate": 1.0969996182817387e-05, "loss": 0.5758302807807922, "step": 7876 }, { "epoch": 1.4554745874282546, "grad_norm": 0.06986507028341293, "learning_rate": 1.0968010655389745e-05, "loss": 0.49991628527641296, "step": 7877 }, { "epoch": 1.4556593641371505, "grad_norm": 0.06571005284786224, "learning_rate": 1.0966025089438309e-05, "loss": 0.4069470167160034, "step": 7878 }, { "epoch": 1.4558441408460463, "grad_norm": 0.0801084116101265, "learning_rate": 1.0964039485042091e-05, "loss": 0.663557767868042, "step": 7879 }, { "epoch": 1.4560289175549421, "grad_norm": 0.08647681772708893, "learning_rate": 1.0962053842280123e-05, "loss": 0.6868690848350525, "step": 7880 }, { "epoch": 1.456213694263838, "grad_norm": 0.06825637072324753, "learning_rate": 1.0960068161231422e-05, "loss": 0.537468433380127, "step": 7881 }, { "epoch": 1.4563984709727338, "grad_norm": 0.08584998548030853, "learning_rate": 1.0958082441975009e-05, "loss": 0.5897773504257202, "step": 7882 }, { "epoch": 1.4565832476816296, "grad_norm": 0.08932146430015564, "learning_rate": 1.0956096684589911e-05, "loss": 0.6655458211898804, "step": 7883 }, { "epoch": 1.4567680243905254, "grad_norm": 0.07608090341091156, "learning_rate": 1.095411088915516e-05, "loss": 0.5363531708717346, "step": 7884 }, { "epoch": 1.4569528010994215, "grad_norm": 0.07527932524681091, "learning_rate": 1.0952125055749779e-05, "loss": 0.5014275908470154, "step": 7885 }, { "epoch": 1.4571375778083173, "grad_norm": 0.06645578145980835, "learning_rate": 1.0950139184452799e-05, "loss": 0.46748968958854675, "step": 7886 }, { "epoch": 1.4573223545172131, "grad_norm": 0.07744214683771133, "learning_rate": 1.0948153275343255e-05, "loss": 0.6590765714645386, "step": 7887 }, { "epoch": 1.457507131226109, "grad_norm": 0.061628442257642746, "learning_rate": 1.0946167328500175e-05, "loss": 0.36661460995674133, "step": 7888 }, { "epoch": 1.4576919079350048, "grad_norm": 0.08004427701234818, "learning_rate": 1.0944181344002596e-05, "loss": 0.5551319718360901, "step": 7889 }, { "epoch": 1.4578766846439006, "grad_norm": 0.06822110712528229, "learning_rate": 1.094219532192955e-05, "loss": 0.4428638517856598, "step": 7890 }, { "epoch": 1.4580614613527965, "grad_norm": 0.08248184621334076, "learning_rate": 1.0940209262360082e-05, "loss": 0.5554961562156677, "step": 7891 }, { "epoch": 1.4582462380616923, "grad_norm": 0.09095754474401474, "learning_rate": 1.0938223165373225e-05, "loss": 0.4856225848197937, "step": 7892 }, { "epoch": 1.4584310147705881, "grad_norm": 0.0733831375837326, "learning_rate": 1.0936237031048023e-05, "loss": 0.5460132956504822, "step": 7893 }, { "epoch": 1.458615791479484, "grad_norm": 0.0810202956199646, "learning_rate": 1.0934250859463516e-05, "loss": 0.5689954161643982, "step": 7894 }, { "epoch": 1.4588005681883798, "grad_norm": 0.09141723066568375, "learning_rate": 1.0932264650698745e-05, "loss": 0.5393565893173218, "step": 7895 }, { "epoch": 1.4589853448972758, "grad_norm": 0.08879061788320541, "learning_rate": 1.093027840483276e-05, "loss": 0.4498291015625, "step": 7896 }, { "epoch": 1.4591701216061717, "grad_norm": 0.06332932412624359, "learning_rate": 1.0928292121944606e-05, "loss": 0.3498190939426422, "step": 7897 }, { "epoch": 1.4593548983150675, "grad_norm": 0.07837338000535965, "learning_rate": 1.092630580211333e-05, "loss": 0.5701397657394409, "step": 7898 }, { "epoch": 1.4595396750239633, "grad_norm": 0.08210621774196625, "learning_rate": 1.0924319445417978e-05, "loss": 0.5923003554344177, "step": 7899 }, { "epoch": 1.4597244517328591, "grad_norm": 0.09843314439058304, "learning_rate": 1.0922333051937603e-05, "loss": 0.6211321353912354, "step": 7900 }, { "epoch": 1.459909228441755, "grad_norm": 0.07027072459459305, "learning_rate": 1.0920346621751264e-05, "loss": 0.3532789945602417, "step": 7901 }, { "epoch": 1.4600940051506508, "grad_norm": 0.07323717325925827, "learning_rate": 1.0918360154938004e-05, "loss": 0.4459230303764343, "step": 7902 }, { "epoch": 1.4602787818595466, "grad_norm": 0.0616673082113266, "learning_rate": 1.0916373651576883e-05, "loss": 0.43505892157554626, "step": 7903 }, { "epoch": 1.4604635585684425, "grad_norm": 0.0709221363067627, "learning_rate": 1.091438711174696e-05, "loss": 0.5405328869819641, "step": 7904 }, { "epoch": 1.4606483352773383, "grad_norm": 0.07395005226135254, "learning_rate": 1.091240053552729e-05, "loss": 0.3639757037162781, "step": 7905 }, { "epoch": 1.4608331119862341, "grad_norm": 0.0811300203204155, "learning_rate": 1.0910413922996934e-05, "loss": 0.42535293102264404, "step": 7906 }, { "epoch": 1.46101788869513, "grad_norm": 0.05116962641477585, "learning_rate": 1.090842727423495e-05, "loss": 0.2789819538593292, "step": 7907 }, { "epoch": 1.4612026654040258, "grad_norm": 0.07930802553892136, "learning_rate": 1.0906440589320404e-05, "loss": 0.5680393576622009, "step": 7908 }, { "epoch": 1.4613874421129216, "grad_norm": 0.09925345331430435, "learning_rate": 1.0904453868332358e-05, "loss": 0.705038845539093, "step": 7909 }, { "epoch": 1.4615722188218174, "grad_norm": 0.0764572024345398, "learning_rate": 1.0902467111349876e-05, "loss": 0.4529610574245453, "step": 7910 }, { "epoch": 1.4617569955307133, "grad_norm": 0.086216039955616, "learning_rate": 1.0900480318452032e-05, "loss": 0.5941925644874573, "step": 7911 }, { "epoch": 1.461941772239609, "grad_norm": 0.06900719553232193, "learning_rate": 1.0898493489717884e-05, "loss": 0.40601980686187744, "step": 7912 }, { "epoch": 1.462126548948505, "grad_norm": 0.08022689074277878, "learning_rate": 1.0896506625226505e-05, "loss": 0.604983389377594, "step": 7913 }, { "epoch": 1.462311325657401, "grad_norm": 0.0808817595243454, "learning_rate": 1.0894519725056971e-05, "loss": 0.5770231485366821, "step": 7914 }, { "epoch": 1.4624961023662968, "grad_norm": 0.06129438057541847, "learning_rate": 1.0892532789288347e-05, "loss": 0.356117308139801, "step": 7915 }, { "epoch": 1.4626808790751926, "grad_norm": 0.09225615859031677, "learning_rate": 1.0890545817999714e-05, "loss": 0.6368810534477234, "step": 7916 }, { "epoch": 1.4628656557840884, "grad_norm": 0.07198144495487213, "learning_rate": 1.088855881127014e-05, "loss": 0.4250560998916626, "step": 7917 }, { "epoch": 1.4630504324929843, "grad_norm": 0.07320264726877213, "learning_rate": 1.088657176917871e-05, "loss": 0.5142825245857239, "step": 7918 }, { "epoch": 1.46323520920188, "grad_norm": 0.0757870227098465, "learning_rate": 1.0884584691804492e-05, "loss": 0.49360334873199463, "step": 7919 }, { "epoch": 1.463419985910776, "grad_norm": 0.09541133046150208, "learning_rate": 1.0882597579226574e-05, "loss": 0.717289388179779, "step": 7920 }, { "epoch": 1.4636047626196718, "grad_norm": 0.07429690659046173, "learning_rate": 1.0880610431524033e-05, "loss": 0.4696374535560608, "step": 7921 }, { "epoch": 1.4637895393285676, "grad_norm": 0.051724813878536224, "learning_rate": 1.087862324877595e-05, "loss": 0.3552211821079254, "step": 7922 }, { "epoch": 1.4639743160374634, "grad_norm": 0.0671362653374672, "learning_rate": 1.0876636031061412e-05, "loss": 0.4889882206916809, "step": 7923 }, { "epoch": 1.4641590927463592, "grad_norm": 0.07439544796943665, "learning_rate": 1.0874648778459502e-05, "loss": 0.5556880831718445, "step": 7924 }, { "epoch": 1.4643438694552553, "grad_norm": 0.0624934583902359, "learning_rate": 1.0872661491049308e-05, "loss": 0.3543209135532379, "step": 7925 }, { "epoch": 1.4645286461641511, "grad_norm": 0.0852447897195816, "learning_rate": 1.0870674168909918e-05, "loss": 0.49739766120910645, "step": 7926 }, { "epoch": 1.464713422873047, "grad_norm": 0.08161558210849762, "learning_rate": 1.0868686812120417e-05, "loss": 0.6055195927619934, "step": 7927 }, { "epoch": 1.4648981995819428, "grad_norm": 0.09370871633291245, "learning_rate": 1.0866699420759901e-05, "loss": 0.6243895292282104, "step": 7928 }, { "epoch": 1.4650829762908386, "grad_norm": 0.08233344554901123, "learning_rate": 1.0864711994907457e-05, "loss": 0.544588029384613, "step": 7929 }, { "epoch": 1.4652677529997344, "grad_norm": 0.082923524081707, "learning_rate": 1.0862724534642186e-05, "loss": 0.584108293056488, "step": 7930 }, { "epoch": 1.4654525297086303, "grad_norm": 0.11480110138654709, "learning_rate": 1.0860737040043175e-05, "loss": 0.793763279914856, "step": 7931 }, { "epoch": 1.465637306417526, "grad_norm": 0.08902088552713394, "learning_rate": 1.0858749511189519e-05, "loss": 0.6228944659233093, "step": 7932 }, { "epoch": 1.465822083126422, "grad_norm": 0.08220183104276657, "learning_rate": 1.0856761948160323e-05, "loss": 0.5731696486473083, "step": 7933 }, { "epoch": 1.4660068598353178, "grad_norm": 0.07819283753633499, "learning_rate": 1.0854774351034682e-05, "loss": 0.5738114714622498, "step": 7934 }, { "epoch": 1.4661916365442136, "grad_norm": 0.07486468553543091, "learning_rate": 1.0852786719891695e-05, "loss": 0.557564914226532, "step": 7935 }, { "epoch": 1.4663764132531094, "grad_norm": 0.06882079690694809, "learning_rate": 1.0850799054810465e-05, "loss": 0.4335728585720062, "step": 7936 }, { "epoch": 1.4665611899620052, "grad_norm": 0.07702423632144928, "learning_rate": 1.0848811355870097e-05, "loss": 0.47517985105514526, "step": 7937 }, { "epoch": 1.466745966670901, "grad_norm": 0.07775147259235382, "learning_rate": 1.0846823623149687e-05, "loss": 0.5218677520751953, "step": 7938 }, { "epoch": 1.466930743379797, "grad_norm": 0.08731850981712341, "learning_rate": 1.0844835856728348e-05, "loss": 0.6244857311248779, "step": 7939 }, { "epoch": 1.4671155200886927, "grad_norm": 0.09213139861822128, "learning_rate": 1.0842848056685188e-05, "loss": 0.553689181804657, "step": 7940 }, { "epoch": 1.4673002967975886, "grad_norm": 0.07810238003730774, "learning_rate": 1.084086022309931e-05, "loss": 0.46642959117889404, "step": 7941 }, { "epoch": 1.4674850735064844, "grad_norm": 0.07946629077196121, "learning_rate": 1.0838872356049826e-05, "loss": 0.4211277365684509, "step": 7942 }, { "epoch": 1.4676698502153804, "grad_norm": 0.07928035408258438, "learning_rate": 1.0836884455615848e-05, "loss": 0.5612075924873352, "step": 7943 }, { "epoch": 1.4678546269242763, "grad_norm": 0.08762326091527939, "learning_rate": 1.0834896521876485e-05, "loss": 0.5942943096160889, "step": 7944 }, { "epoch": 1.468039403633172, "grad_norm": 0.09318449348211288, "learning_rate": 1.0832908554910853e-05, "loss": 0.7384404540061951, "step": 7945 }, { "epoch": 1.468224180342068, "grad_norm": 0.06970518082380295, "learning_rate": 1.0830920554798067e-05, "loss": 0.4766594469547272, "step": 7946 }, { "epoch": 1.4684089570509637, "grad_norm": 0.07656311243772507, "learning_rate": 1.0828932521617244e-05, "loss": 0.4877740740776062, "step": 7947 }, { "epoch": 1.4685937337598596, "grad_norm": 0.0660754069685936, "learning_rate": 1.0826944455447498e-05, "loss": 0.48413288593292236, "step": 7948 }, { "epoch": 1.4687785104687554, "grad_norm": 0.09300076216459274, "learning_rate": 1.082495635636795e-05, "loss": 0.6690374612808228, "step": 7949 }, { "epoch": 1.4689632871776512, "grad_norm": 0.07548022270202637, "learning_rate": 1.082296822445772e-05, "loss": 0.5127116441726685, "step": 7950 }, { "epoch": 1.469148063886547, "grad_norm": 0.10325155407190323, "learning_rate": 1.0820980059795929e-05, "loss": 0.699739933013916, "step": 7951 }, { "epoch": 1.469332840595443, "grad_norm": 0.08389373123645782, "learning_rate": 1.0818991862461701e-05, "loss": 0.41915738582611084, "step": 7952 }, { "epoch": 1.4695176173043387, "grad_norm": 0.07074711471796036, "learning_rate": 1.081700363253416e-05, "loss": 0.3849329948425293, "step": 7953 }, { "epoch": 1.4697023940132348, "grad_norm": 0.08760944753885269, "learning_rate": 1.081501537009243e-05, "loss": 0.5913623571395874, "step": 7954 }, { "epoch": 1.4698871707221306, "grad_norm": 0.08129290491342545, "learning_rate": 1.0813027075215635e-05, "loss": 0.6076329350471497, "step": 7955 }, { "epoch": 1.4700719474310264, "grad_norm": 0.09393065422773361, "learning_rate": 1.081103874798291e-05, "loss": 0.7325844168663025, "step": 7956 }, { "epoch": 1.4702567241399223, "grad_norm": 0.07572071999311447, "learning_rate": 1.0809050388473382e-05, "loss": 0.5757875442504883, "step": 7957 }, { "epoch": 1.470441500848818, "grad_norm": 0.08286506682634354, "learning_rate": 1.0807061996766174e-05, "loss": 0.49454647302627563, "step": 7958 }, { "epoch": 1.470626277557714, "grad_norm": 0.0690111443400383, "learning_rate": 1.0805073572940425e-05, "loss": 0.5452752113342285, "step": 7959 }, { "epoch": 1.4708110542666097, "grad_norm": 0.0932869166135788, "learning_rate": 1.080308511707527e-05, "loss": 0.6137073040008545, "step": 7960 }, { "epoch": 1.4709958309755056, "grad_norm": 0.0877099260687828, "learning_rate": 1.0801096629249836e-05, "loss": 0.5865559577941895, "step": 7961 }, { "epoch": 1.4711806076844014, "grad_norm": 0.07646284252405167, "learning_rate": 1.079910810954326e-05, "loss": 0.5363489985466003, "step": 7962 }, { "epoch": 1.4713653843932972, "grad_norm": 0.09914388507604599, "learning_rate": 1.079711955803469e-05, "loss": 0.6129844784736633, "step": 7963 }, { "epoch": 1.471550161102193, "grad_norm": 0.08948146551847458, "learning_rate": 1.0795130974803252e-05, "loss": 0.5178477764129639, "step": 7964 }, { "epoch": 1.4717349378110889, "grad_norm": 0.08743748068809509, "learning_rate": 1.0793142359928084e-05, "loss": 0.5934823751449585, "step": 7965 }, { "epoch": 1.4719197145199847, "grad_norm": 0.05671244114637375, "learning_rate": 1.0791153713488336e-05, "loss": 0.29773834347724915, "step": 7966 }, { "epoch": 1.4721044912288805, "grad_norm": 0.0830715000629425, "learning_rate": 1.0789165035563145e-05, "loss": 0.615138053894043, "step": 7967 }, { "epoch": 1.4722892679377764, "grad_norm": 0.06543489545583725, "learning_rate": 1.0787176326231651e-05, "loss": 0.407809853553772, "step": 7968 }, { "epoch": 1.4724740446466722, "grad_norm": 0.08552611619234085, "learning_rate": 1.0785187585573007e-05, "loss": 0.6576621532440186, "step": 7969 }, { "epoch": 1.472658821355568, "grad_norm": 0.061488475650548935, "learning_rate": 1.0783198813666354e-05, "loss": 0.3980618715286255, "step": 7970 }, { "epoch": 1.4728435980644639, "grad_norm": 0.07736705988645554, "learning_rate": 1.0781210010590834e-05, "loss": 0.5964474678039551, "step": 7971 }, { "epoch": 1.4730283747733597, "grad_norm": 0.06732498109340668, "learning_rate": 1.07792211764256e-05, "loss": 0.3793677091598511, "step": 7972 }, { "epoch": 1.4732131514822557, "grad_norm": 0.0850326418876648, "learning_rate": 1.0777232311249805e-05, "loss": 0.3835803270339966, "step": 7973 }, { "epoch": 1.4733979281911516, "grad_norm": 0.07928745448589325, "learning_rate": 1.0775243415142595e-05, "loss": 0.559565544128418, "step": 7974 }, { "epoch": 1.4735827049000474, "grad_norm": 0.0691276267170906, "learning_rate": 1.0773254488183117e-05, "loss": 0.37865975499153137, "step": 7975 }, { "epoch": 1.4737674816089432, "grad_norm": 0.09156709909439087, "learning_rate": 1.0771265530450537e-05, "loss": 0.6320781111717224, "step": 7976 }, { "epoch": 1.473952258317839, "grad_norm": 0.07213588804006577, "learning_rate": 1.0769276542024e-05, "loss": 0.6258044838905334, "step": 7977 }, { "epoch": 1.4741370350267349, "grad_norm": 0.07551666349172592, "learning_rate": 1.0767287522982662e-05, "loss": 0.5296053290367126, "step": 7978 }, { "epoch": 1.4743218117356307, "grad_norm": 0.08219839632511139, "learning_rate": 1.0765298473405679e-05, "loss": 0.5902359485626221, "step": 7979 }, { "epoch": 1.4745065884445265, "grad_norm": 0.06733104586601257, "learning_rate": 1.0763309393372215e-05, "loss": 0.38392987847328186, "step": 7980 }, { "epoch": 1.4746913651534224, "grad_norm": 0.08572923392057419, "learning_rate": 1.076132028296142e-05, "loss": 0.5022323131561279, "step": 7981 }, { "epoch": 1.4748761418623182, "grad_norm": 0.06923422962427139, "learning_rate": 1.0759331142252463e-05, "loss": 0.4584689438343048, "step": 7982 }, { "epoch": 1.475060918571214, "grad_norm": 0.09197697043418884, "learning_rate": 1.0757341971324504e-05, "loss": 0.6473321914672852, "step": 7983 }, { "epoch": 1.47524569528011, "grad_norm": 0.06988525390625, "learning_rate": 1.0755352770256704e-05, "loss": 0.439445436000824, "step": 7984 }, { "epoch": 1.475430471989006, "grad_norm": 0.05474436655640602, "learning_rate": 1.0753363539128222e-05, "loss": 0.2108364701271057, "step": 7985 }, { "epoch": 1.4756152486979017, "grad_norm": 0.04617056995630264, "learning_rate": 1.0751374278018232e-05, "loss": 0.2944631576538086, "step": 7986 }, { "epoch": 1.4758000254067976, "grad_norm": 0.08965945243835449, "learning_rate": 1.0749384987005896e-05, "loss": 0.4396054744720459, "step": 7987 }, { "epoch": 1.4759848021156934, "grad_norm": 0.09829042851924896, "learning_rate": 1.0747395666170382e-05, "loss": 0.6436299085617065, "step": 7988 }, { "epoch": 1.4761695788245892, "grad_norm": 0.09775824844837189, "learning_rate": 1.0745406315590856e-05, "loss": 0.626186728477478, "step": 7989 }, { "epoch": 1.476354355533485, "grad_norm": 0.08036770671606064, "learning_rate": 1.0743416935346496e-05, "loss": 0.5929142236709595, "step": 7990 }, { "epoch": 1.4765391322423809, "grad_norm": 0.07372663170099258, "learning_rate": 1.0741427525516463e-05, "loss": 0.5616413950920105, "step": 7991 }, { "epoch": 1.4767239089512767, "grad_norm": 0.07719265669584274, "learning_rate": 1.0739438086179934e-05, "loss": 0.6793864369392395, "step": 7992 }, { "epoch": 1.4769086856601725, "grad_norm": 0.09435825049877167, "learning_rate": 1.0737448617416086e-05, "loss": 0.6805324554443359, "step": 7993 }, { "epoch": 1.4770934623690684, "grad_norm": 0.07104142010211945, "learning_rate": 1.0735459119304093e-05, "loss": 0.5474250912666321, "step": 7994 }, { "epoch": 1.4772782390779642, "grad_norm": 0.08389271795749664, "learning_rate": 1.0733469591923122e-05, "loss": 0.5377687215805054, "step": 7995 }, { "epoch": 1.47746301578686, "grad_norm": 0.0856410413980484, "learning_rate": 1.0731480035352356e-05, "loss": 0.47562843561172485, "step": 7996 }, { "epoch": 1.4776477924957558, "grad_norm": 0.06180229038000107, "learning_rate": 1.0729490449670976e-05, "loss": 0.3866513669490814, "step": 7997 }, { "epoch": 1.4778325692046517, "grad_norm": 0.06881602108478546, "learning_rate": 1.0727500834958157e-05, "loss": 0.43669071793556213, "step": 7998 }, { "epoch": 1.4780173459135475, "grad_norm": 0.0712369903922081, "learning_rate": 1.0725511191293082e-05, "loss": 0.39811253547668457, "step": 7999 }, { "epoch": 1.4782021226224433, "grad_norm": 0.10602513700723648, "learning_rate": 1.0723521518754931e-05, "loss": 0.7491739988327026, "step": 8000 }, { "epoch": 1.4782021226224433, "eval_loss": 0.579645574092865, "eval_runtime": 288.6369, "eval_samples_per_second": 63.155, "eval_steps_per_second": 7.896, "step": 8000 }, { "epoch": 1.4783868993313392, "grad_norm": 0.08168964087963104, "learning_rate": 1.0721531817422885e-05, "loss": 0.6392906308174133, "step": 8001 }, { "epoch": 1.4785716760402352, "grad_norm": 0.0581355020403862, "learning_rate": 1.0719542087376134e-05, "loss": 0.4122292995452881, "step": 8002 }, { "epoch": 1.478756452749131, "grad_norm": 0.06892077624797821, "learning_rate": 1.0717552328693855e-05, "loss": 0.4517199695110321, "step": 8003 }, { "epoch": 1.4789412294580269, "grad_norm": 0.05392163619399071, "learning_rate": 1.0715562541455243e-05, "loss": 0.3663863241672516, "step": 8004 }, { "epoch": 1.4791260061669227, "grad_norm": 0.07395106554031372, "learning_rate": 1.0713572725739476e-05, "loss": 0.5130017995834351, "step": 8005 }, { "epoch": 1.4793107828758185, "grad_norm": 0.08342242240905762, "learning_rate": 1.0711582881625746e-05, "loss": 0.6381667256355286, "step": 8006 }, { "epoch": 1.4794955595847143, "grad_norm": 0.068038709461689, "learning_rate": 1.070959300919325e-05, "loss": 0.391664057970047, "step": 8007 }, { "epoch": 1.4796803362936102, "grad_norm": 0.08663652092218399, "learning_rate": 1.0707603108521165e-05, "loss": 0.6319605112075806, "step": 8008 }, { "epoch": 1.479865113002506, "grad_norm": 0.08702877908945084, "learning_rate": 1.0705613179688694e-05, "loss": 0.7374585866928101, "step": 8009 }, { "epoch": 1.4800498897114018, "grad_norm": 0.09197530895471573, "learning_rate": 1.0703623222775028e-05, "loss": 0.589191734790802, "step": 8010 }, { "epoch": 1.4802346664202977, "grad_norm": 0.08512798696756363, "learning_rate": 1.0701633237859355e-05, "loss": 0.5790627598762512, "step": 8011 }, { "epoch": 1.4804194431291935, "grad_norm": 0.06841887533664703, "learning_rate": 1.0699643225020876e-05, "loss": 0.47389841079711914, "step": 8012 }, { "epoch": 1.4806042198380895, "grad_norm": 0.0750150978565216, "learning_rate": 1.0697653184338785e-05, "loss": 0.5332591533660889, "step": 8013 }, { "epoch": 1.4807889965469854, "grad_norm": 0.06956504285335541, "learning_rate": 1.0695663115892282e-05, "loss": 0.4910212755203247, "step": 8014 }, { "epoch": 1.4809737732558812, "grad_norm": 0.0774892121553421, "learning_rate": 1.0693673019760562e-05, "loss": 0.46022897958755493, "step": 8015 }, { "epoch": 1.481158549964777, "grad_norm": 0.07349559664726257, "learning_rate": 1.0691682896022824e-05, "loss": 0.5611245632171631, "step": 8016 }, { "epoch": 1.4813433266736729, "grad_norm": 0.09003234654664993, "learning_rate": 1.0689692744758276e-05, "loss": 0.5233736038208008, "step": 8017 }, { "epoch": 1.4815281033825687, "grad_norm": 0.05826477333903313, "learning_rate": 1.068770256604611e-05, "loss": 0.36907002329826355, "step": 8018 }, { "epoch": 1.4817128800914645, "grad_norm": 0.07595459371805191, "learning_rate": 1.0685712359965534e-05, "loss": 0.5646535754203796, "step": 8019 }, { "epoch": 1.4818976568003603, "grad_norm": 0.06655114889144897, "learning_rate": 1.0683722126595753e-05, "loss": 0.4589296877384186, "step": 8020 }, { "epoch": 1.4820824335092562, "grad_norm": 0.08873427659273148, "learning_rate": 1.0681731866015968e-05, "loss": 0.77529376745224, "step": 8021 }, { "epoch": 1.482267210218152, "grad_norm": 0.1050659716129303, "learning_rate": 1.067974157830539e-05, "loss": 0.714748203754425, "step": 8022 }, { "epoch": 1.4824519869270478, "grad_norm": 0.10710117220878601, "learning_rate": 1.0677751263543221e-05, "loss": 0.7326914668083191, "step": 8023 }, { "epoch": 1.4826367636359437, "grad_norm": 0.07048782706260681, "learning_rate": 1.0675760921808673e-05, "loss": 0.46614688634872437, "step": 8024 }, { "epoch": 1.4828215403448395, "grad_norm": 0.07618443667888641, "learning_rate": 1.0673770553180957e-05, "loss": 0.45787060260772705, "step": 8025 }, { "epoch": 1.4830063170537353, "grad_norm": 0.0792887732386589, "learning_rate": 1.0671780157739282e-05, "loss": 0.4927024245262146, "step": 8026 }, { "epoch": 1.4831910937626311, "grad_norm": 0.10205483436584473, "learning_rate": 1.0669789735562855e-05, "loss": 0.6825256943702698, "step": 8027 }, { "epoch": 1.483375870471527, "grad_norm": 0.08562511950731277, "learning_rate": 1.066779928673089e-05, "loss": 0.5680001974105835, "step": 8028 }, { "epoch": 1.4835606471804228, "grad_norm": 0.07572870701551437, "learning_rate": 1.0665808811322608e-05, "loss": 0.5484418272972107, "step": 8029 }, { "epoch": 1.4837454238893186, "grad_norm": 0.05491387099027634, "learning_rate": 1.0663818309417216e-05, "loss": 0.39442920684814453, "step": 8030 }, { "epoch": 1.4839302005982147, "grad_norm": 0.101420558989048, "learning_rate": 1.066182778109393e-05, "loss": 0.5014135837554932, "step": 8031 }, { "epoch": 1.4841149773071105, "grad_norm": 0.07395175844430923, "learning_rate": 1.0659837226431973e-05, "loss": 0.5414116978645325, "step": 8032 }, { "epoch": 1.4842997540160063, "grad_norm": 0.07887198030948639, "learning_rate": 1.0657846645510557e-05, "loss": 0.5636579394340515, "step": 8033 }, { "epoch": 1.4844845307249022, "grad_norm": 0.073245108127594, "learning_rate": 1.06558560384089e-05, "loss": 0.5003830790519714, "step": 8034 }, { "epoch": 1.484669307433798, "grad_norm": 0.08296787738800049, "learning_rate": 1.0653865405206227e-05, "loss": 0.4997517168521881, "step": 8035 }, { "epoch": 1.4848540841426938, "grad_norm": 0.07751402258872986, "learning_rate": 1.0651874745981758e-05, "loss": 0.5620325207710266, "step": 8036 }, { "epoch": 1.4850388608515896, "grad_norm": 0.060117579996585846, "learning_rate": 1.0649884060814713e-05, "loss": 0.41544926166534424, "step": 8037 }, { "epoch": 1.4852236375604855, "grad_norm": 0.08661402761936188, "learning_rate": 1.0647893349784313e-05, "loss": 0.5965865254402161, "step": 8038 }, { "epoch": 1.4854084142693813, "grad_norm": 0.09608050435781479, "learning_rate": 1.0645902612969788e-05, "loss": 0.7004936337471008, "step": 8039 }, { "epoch": 1.4855931909782771, "grad_norm": 0.07092927396297455, "learning_rate": 1.0643911850450358e-05, "loss": 0.5260869860649109, "step": 8040 }, { "epoch": 1.485777967687173, "grad_norm": 0.07654356956481934, "learning_rate": 1.064192106230525e-05, "loss": 0.43520107865333557, "step": 8041 }, { "epoch": 1.485962744396069, "grad_norm": 0.08207488805055618, "learning_rate": 1.0639930248613694e-05, "loss": 0.49081167578697205, "step": 8042 }, { "epoch": 1.4861475211049648, "grad_norm": 0.07600727677345276, "learning_rate": 1.0637939409454916e-05, "loss": 0.588967502117157, "step": 8043 }, { "epoch": 1.4863322978138607, "grad_norm": 0.08325869590044022, "learning_rate": 1.063594854490814e-05, "loss": 0.6097344160079956, "step": 8044 }, { "epoch": 1.4865170745227565, "grad_norm": 0.06231686845421791, "learning_rate": 1.0633957655052609e-05, "loss": 0.4860547184944153, "step": 8045 }, { "epoch": 1.4867018512316523, "grad_norm": 0.0686325877904892, "learning_rate": 1.0631966739967545e-05, "loss": 0.3996231257915497, "step": 8046 }, { "epoch": 1.4868866279405482, "grad_norm": 0.06175853684544563, "learning_rate": 1.062997579973218e-05, "loss": 0.5008290410041809, "step": 8047 }, { "epoch": 1.487071404649444, "grad_norm": 0.06243591755628586, "learning_rate": 1.0627984834425748e-05, "loss": 0.3662768602371216, "step": 8048 }, { "epoch": 1.4872561813583398, "grad_norm": 0.08133753389120102, "learning_rate": 1.062599384412749e-05, "loss": 0.6068007349967957, "step": 8049 }, { "epoch": 1.4874409580672356, "grad_norm": 0.07411174476146698, "learning_rate": 1.0624002828916631e-05, "loss": 0.48948463797569275, "step": 8050 }, { "epoch": 1.4876257347761315, "grad_norm": 0.08100121468305588, "learning_rate": 1.062201178887241e-05, "loss": 0.5255101323127747, "step": 8051 }, { "epoch": 1.4878105114850273, "grad_norm": 0.07053257524967194, "learning_rate": 1.062002072407407e-05, "loss": 0.5262172818183899, "step": 8052 }, { "epoch": 1.4879952881939231, "grad_norm": 0.08912979066371918, "learning_rate": 1.0618029634600843e-05, "loss": 0.7172182202339172, "step": 8053 }, { "epoch": 1.488180064902819, "grad_norm": 0.08581690490245819, "learning_rate": 1.0616038520531969e-05, "loss": 0.4855179190635681, "step": 8054 }, { "epoch": 1.4883648416117148, "grad_norm": 0.06741776317358017, "learning_rate": 1.061404738194669e-05, "loss": 0.44996926188468933, "step": 8055 }, { "epoch": 1.4885496183206106, "grad_norm": 0.07213829457759857, "learning_rate": 1.061205621892425e-05, "loss": 0.460223913192749, "step": 8056 }, { "epoch": 1.4887343950295064, "grad_norm": 0.07456837594509125, "learning_rate": 1.0610065031543881e-05, "loss": 0.511305034160614, "step": 8057 }, { "epoch": 1.4889191717384023, "grad_norm": 0.07812952995300293, "learning_rate": 1.0608073819884837e-05, "loss": 0.42069342732429504, "step": 8058 }, { "epoch": 1.489103948447298, "grad_norm": 0.09203128516674042, "learning_rate": 1.0606082584026357e-05, "loss": 0.5720989108085632, "step": 8059 }, { "epoch": 1.489288725156194, "grad_norm": 0.057817284017801285, "learning_rate": 1.0604091324047683e-05, "loss": 0.37874454259872437, "step": 8060 }, { "epoch": 1.48947350186509, "grad_norm": 0.08087790012359619, "learning_rate": 1.0602100040028068e-05, "loss": 0.6484622359275818, "step": 8061 }, { "epoch": 1.4896582785739858, "grad_norm": 0.08493216335773468, "learning_rate": 1.0600108732046751e-05, "loss": 0.5885729193687439, "step": 8062 }, { "epoch": 1.4898430552828816, "grad_norm": 0.0692172572016716, "learning_rate": 1.059811740018299e-05, "loss": 0.46173328161239624, "step": 8063 }, { "epoch": 1.4900278319917775, "grad_norm": 0.06761965155601501, "learning_rate": 1.0596126044516021e-05, "loss": 0.45651736855506897, "step": 8064 }, { "epoch": 1.4902126087006733, "grad_norm": 0.10760365426540375, "learning_rate": 1.0594134665125106e-05, "loss": 0.7754571437835693, "step": 8065 }, { "epoch": 1.4903973854095691, "grad_norm": 0.050112102180719376, "learning_rate": 1.059214326208949e-05, "loss": 0.30902236700057983, "step": 8066 }, { "epoch": 1.490582162118465, "grad_norm": 0.06518737971782684, "learning_rate": 1.059015183548842e-05, "loss": 0.4446237087249756, "step": 8067 }, { "epoch": 1.4907669388273608, "grad_norm": 0.0689021572470665, "learning_rate": 1.0588160385401157e-05, "loss": 0.421636700630188, "step": 8068 }, { "epoch": 1.4909517155362566, "grad_norm": 0.07490003854036331, "learning_rate": 1.0586168911906951e-05, "loss": 0.4064212143421173, "step": 8069 }, { "epoch": 1.4911364922451524, "grad_norm": 0.09233926236629486, "learning_rate": 1.0584177415085053e-05, "loss": 0.5941533446311951, "step": 8070 }, { "epoch": 1.4913212689540483, "grad_norm": 0.10283198207616806, "learning_rate": 1.0582185895014723e-05, "loss": 0.5642294883728027, "step": 8071 }, { "epoch": 1.4915060456629443, "grad_norm": 0.08836923539638519, "learning_rate": 1.0580194351775217e-05, "loss": 0.7438347935676575, "step": 8072 }, { "epoch": 1.4916908223718401, "grad_norm": 0.0638003796339035, "learning_rate": 1.0578202785445792e-05, "loss": 0.4012036621570587, "step": 8073 }, { "epoch": 1.491875599080736, "grad_norm": 0.09235439449548721, "learning_rate": 1.05762111961057e-05, "loss": 0.5444309711456299, "step": 8074 }, { "epoch": 1.4920603757896318, "grad_norm": 0.09472963213920593, "learning_rate": 1.0574219583834211e-05, "loss": 0.6380682587623596, "step": 8075 }, { "epoch": 1.4922451524985276, "grad_norm": 0.0787310004234314, "learning_rate": 1.0572227948710578e-05, "loss": 0.5088316202163696, "step": 8076 }, { "epoch": 1.4924299292074235, "grad_norm": 0.0733528658747673, "learning_rate": 1.057023629081406e-05, "loss": 0.40022554993629456, "step": 8077 }, { "epoch": 1.4926147059163193, "grad_norm": 0.0707663968205452, "learning_rate": 1.0568244610223921e-05, "loss": 0.4129393696784973, "step": 8078 }, { "epoch": 1.4927994826252151, "grad_norm": 0.07245533168315887, "learning_rate": 1.056625290701943e-05, "loss": 0.48481282591819763, "step": 8079 }, { "epoch": 1.492984259334111, "grad_norm": 0.08001293241977692, "learning_rate": 1.056426118127984e-05, "loss": 0.5082979798316956, "step": 8080 }, { "epoch": 1.4931690360430068, "grad_norm": 0.05666988343000412, "learning_rate": 1.056226943308442e-05, "loss": 0.3453954756259918, "step": 8081 }, { "epoch": 1.4933538127519026, "grad_norm": 0.06098370626568794, "learning_rate": 1.0560277662512439e-05, "loss": 0.4135182201862335, "step": 8082 }, { "epoch": 1.4935385894607984, "grad_norm": 0.08647754788398743, "learning_rate": 1.055828586964316e-05, "loss": 0.564022421836853, "step": 8083 }, { "epoch": 1.4937233661696943, "grad_norm": 0.07890351861715317, "learning_rate": 1.0556294054555847e-05, "loss": 0.6017817854881287, "step": 8084 }, { "epoch": 1.49390814287859, "grad_norm": 0.07599826902151108, "learning_rate": 1.0554302217329773e-05, "loss": 0.5751614570617676, "step": 8085 }, { "epoch": 1.494092919587486, "grad_norm": 0.10049251466989517, "learning_rate": 1.0552310358044204e-05, "loss": 0.554628312587738, "step": 8086 }, { "epoch": 1.4942776962963817, "grad_norm": 0.06830247491598129, "learning_rate": 1.0550318476778412e-05, "loss": 0.5074917078018188, "step": 8087 }, { "epoch": 1.4944624730052776, "grad_norm": 0.07232210040092468, "learning_rate": 1.0548326573611662e-05, "loss": 0.4642643928527832, "step": 8088 }, { "epoch": 1.4946472497141734, "grad_norm": 0.0779031440615654, "learning_rate": 1.0546334648623235e-05, "loss": 0.6109477281570435, "step": 8089 }, { "epoch": 1.4948320264230694, "grad_norm": 0.08378469198942184, "learning_rate": 1.0544342701892396e-05, "loss": 0.5547432899475098, "step": 8090 }, { "epoch": 1.4950168031319653, "grad_norm": 0.07529163360595703, "learning_rate": 1.0542350733498424e-05, "loss": 0.5577362775802612, "step": 8091 }, { "epoch": 1.495201579840861, "grad_norm": 0.08380243182182312, "learning_rate": 1.0540358743520585e-05, "loss": 0.5255307555198669, "step": 8092 }, { "epoch": 1.495386356549757, "grad_norm": 0.08864055573940277, "learning_rate": 1.0538366732038161e-05, "loss": 0.7255281805992126, "step": 8093 }, { "epoch": 1.4955711332586528, "grad_norm": 0.09007778763771057, "learning_rate": 1.0536374699130422e-05, "loss": 0.6297560930252075, "step": 8094 }, { "epoch": 1.4957559099675486, "grad_norm": 0.07595361769199371, "learning_rate": 1.053438264487665e-05, "loss": 0.5389895439147949, "step": 8095 }, { "epoch": 1.4959406866764444, "grad_norm": 0.07773599028587341, "learning_rate": 1.0532390569356123e-05, "loss": 0.489621639251709, "step": 8096 }, { "epoch": 1.4961254633853402, "grad_norm": 0.07256493717432022, "learning_rate": 1.0530398472648116e-05, "loss": 0.4765538275241852, "step": 8097 }, { "epoch": 1.496310240094236, "grad_norm": 0.06853928416967392, "learning_rate": 1.0528406354831909e-05, "loss": 0.5104422569274902, "step": 8098 }, { "epoch": 1.496495016803132, "grad_norm": 0.0818312019109726, "learning_rate": 1.0526414215986783e-05, "loss": 0.4994242787361145, "step": 8099 }, { "epoch": 1.4966797935120277, "grad_norm": 0.0743444487452507, "learning_rate": 1.0524422056192014e-05, "loss": 0.5327197909355164, "step": 8100 }, { "epoch": 1.4968645702209238, "grad_norm": 0.06243869662284851, "learning_rate": 1.0522429875526892e-05, "loss": 0.3634476661682129, "step": 8101 }, { "epoch": 1.4970493469298196, "grad_norm": 0.09468526393175125, "learning_rate": 1.0520437674070694e-05, "loss": 0.6794441342353821, "step": 8102 }, { "epoch": 1.4972341236387154, "grad_norm": 0.06301013380289078, "learning_rate": 1.0518445451902706e-05, "loss": 0.5078780055046082, "step": 8103 }, { "epoch": 1.4974189003476113, "grad_norm": 0.07378236949443817, "learning_rate": 1.0516453209102209e-05, "loss": 0.535196840763092, "step": 8104 }, { "epoch": 1.497603677056507, "grad_norm": 0.09048765897750854, "learning_rate": 1.051446094574849e-05, "loss": 0.6304789185523987, "step": 8105 }, { "epoch": 1.497788453765403, "grad_norm": 0.08033865690231323, "learning_rate": 1.0512468661920836e-05, "loss": 0.553955078125, "step": 8106 }, { "epoch": 1.4979732304742988, "grad_norm": 0.060053229331970215, "learning_rate": 1.0510476357698534e-05, "loss": 0.40581458806991577, "step": 8107 }, { "epoch": 1.4981580071831946, "grad_norm": 0.07334432750940323, "learning_rate": 1.0508484033160868e-05, "loss": 0.5493158102035522, "step": 8108 }, { "epoch": 1.4983427838920904, "grad_norm": 0.08855029195547104, "learning_rate": 1.0506491688387128e-05, "loss": 0.565083920955658, "step": 8109 }, { "epoch": 1.4985275606009862, "grad_norm": 0.08073693513870239, "learning_rate": 1.0504499323456603e-05, "loss": 0.567347526550293, "step": 8110 }, { "epoch": 1.498712337309882, "grad_norm": 0.07002782821655273, "learning_rate": 1.0502506938448586e-05, "loss": 0.4169497489929199, "step": 8111 }, { "epoch": 1.498897114018778, "grad_norm": 0.05655227601528168, "learning_rate": 1.0500514533442364e-05, "loss": 0.41766270995140076, "step": 8112 }, { "epoch": 1.4990818907276737, "grad_norm": 0.09233249723911285, "learning_rate": 1.0498522108517231e-05, "loss": 0.607703685760498, "step": 8113 }, { "epoch": 1.4992666674365696, "grad_norm": 0.0869932696223259, "learning_rate": 1.0496529663752473e-05, "loss": 0.5394681096076965, "step": 8114 }, { "epoch": 1.4994514441454654, "grad_norm": 0.0950428918004036, "learning_rate": 1.0494537199227393e-05, "loss": 0.5990388989448547, "step": 8115 }, { "epoch": 1.4996362208543612, "grad_norm": 0.07539371401071548, "learning_rate": 1.0492544715021275e-05, "loss": 0.4656071364879608, "step": 8116 }, { "epoch": 1.499820997563257, "grad_norm": 0.0667133778333664, "learning_rate": 1.0490552211213421e-05, "loss": 0.39385464787483215, "step": 8117 }, { "epoch": 1.5000057742721529, "grad_norm": 0.07178043574094772, "learning_rate": 1.0488559687883125e-05, "loss": 0.44700050354003906, "step": 8118 }, { "epoch": 1.5001905509810487, "grad_norm": 0.07774877548217773, "learning_rate": 1.0486567145109678e-05, "loss": 0.575194239616394, "step": 8119 }, { "epoch": 1.5003753276899445, "grad_norm": 0.054962847381830215, "learning_rate": 1.0484574582972383e-05, "loss": 0.32182666659355164, "step": 8120 }, { "epoch": 1.5005601043988406, "grad_norm": 0.08489862084388733, "learning_rate": 1.0482582001550537e-05, "loss": 0.5646397471427917, "step": 8121 }, { "epoch": 1.5007448811077364, "grad_norm": 0.059859659522771835, "learning_rate": 1.0480589400923436e-05, "loss": 0.367511510848999, "step": 8122 }, { "epoch": 1.5009296578166322, "grad_norm": 0.07323503494262695, "learning_rate": 1.047859678117038e-05, "loss": 0.4996204078197479, "step": 8123 }, { "epoch": 1.501114434525528, "grad_norm": 0.06804613769054413, "learning_rate": 1.047660414237067e-05, "loss": 0.38346338272094727, "step": 8124 }, { "epoch": 1.501299211234424, "grad_norm": 0.07917284965515137, "learning_rate": 1.0474611484603607e-05, "loss": 0.6195372343063354, "step": 8125 }, { "epoch": 1.5014839879433197, "grad_norm": 0.0733339861035347, "learning_rate": 1.0472618807948488e-05, "loss": 0.6930992603302002, "step": 8126 }, { "epoch": 1.5016687646522155, "grad_norm": 0.0960981622338295, "learning_rate": 1.0470626112484622e-05, "loss": 0.6226276159286499, "step": 8127 }, { "epoch": 1.5018535413611114, "grad_norm": 0.07643352448940277, "learning_rate": 1.0468633398291313e-05, "loss": 0.6525744199752808, "step": 8128 }, { "epoch": 1.5020383180700074, "grad_norm": 0.09541403502225876, "learning_rate": 1.0466640665447854e-05, "loss": 0.6560640335083008, "step": 8129 }, { "epoch": 1.5022230947789033, "grad_norm": 0.06832639873027802, "learning_rate": 1.0464647914033558e-05, "loss": 0.35440102219581604, "step": 8130 }, { "epoch": 1.502407871487799, "grad_norm": 0.08282726258039474, "learning_rate": 1.0462655144127734e-05, "loss": 0.45437178015708923, "step": 8131 }, { "epoch": 1.502592648196695, "grad_norm": 0.06447652727365494, "learning_rate": 1.0460662355809678e-05, "loss": 0.3525720238685608, "step": 8132 }, { "epoch": 1.5027774249055907, "grad_norm": 0.06565721333026886, "learning_rate": 1.0458669549158703e-05, "loss": 0.4773813486099243, "step": 8133 }, { "epoch": 1.5029622016144866, "grad_norm": 0.07156088203191757, "learning_rate": 1.0456676724254114e-05, "loss": 0.5528655648231506, "step": 8134 }, { "epoch": 1.5031469783233824, "grad_norm": 0.07021511346101761, "learning_rate": 1.0454683881175221e-05, "loss": 0.4314422011375427, "step": 8135 }, { "epoch": 1.5033317550322782, "grad_norm": 0.09521748125553131, "learning_rate": 1.0452691020001329e-05, "loss": 0.7305403351783752, "step": 8136 }, { "epoch": 1.503516531741174, "grad_norm": 0.06114820018410683, "learning_rate": 1.0450698140811753e-05, "loss": 0.40088844299316406, "step": 8137 }, { "epoch": 1.5037013084500699, "grad_norm": 0.07667379826307297, "learning_rate": 1.0448705243685801e-05, "loss": 0.5117903351783752, "step": 8138 }, { "epoch": 1.5038860851589657, "grad_norm": 0.058828432112932205, "learning_rate": 1.0446712328702784e-05, "loss": 0.38778555393218994, "step": 8139 }, { "epoch": 1.5040708618678615, "grad_norm": 0.08744698017835617, "learning_rate": 1.0444719395942013e-05, "loss": 0.6395158767700195, "step": 8140 }, { "epoch": 1.5042556385767574, "grad_norm": 0.07198171317577362, "learning_rate": 1.0442726445482805e-05, "loss": 0.46748894453048706, "step": 8141 }, { "epoch": 1.5044404152856532, "grad_norm": 0.07280024141073227, "learning_rate": 1.0440733477404468e-05, "loss": 0.4413139820098877, "step": 8142 }, { "epoch": 1.504625191994549, "grad_norm": 0.07300464808940887, "learning_rate": 1.0438740491786316e-05, "loss": 0.5154467225074768, "step": 8143 }, { "epoch": 1.5048099687034449, "grad_norm": 0.06128469109535217, "learning_rate": 1.0436747488707666e-05, "loss": 0.40261396765708923, "step": 8144 }, { "epoch": 1.5049947454123407, "grad_norm": 0.08224528282880783, "learning_rate": 1.0434754468247833e-05, "loss": 0.47346919775009155, "step": 8145 }, { "epoch": 1.5051795221212365, "grad_norm": 0.06956780701875687, "learning_rate": 1.043276143048613e-05, "loss": 0.4050731360912323, "step": 8146 }, { "epoch": 1.5053642988301323, "grad_norm": 0.06966309994459152, "learning_rate": 1.0430768375501877e-05, "loss": 0.5770171880722046, "step": 8147 }, { "epoch": 1.5055490755390282, "grad_norm": 0.07297446578741074, "learning_rate": 1.0428775303374392e-05, "loss": 0.4828217327594757, "step": 8148 }, { "epoch": 1.505733852247924, "grad_norm": 0.06459491699934006, "learning_rate": 1.0426782214182991e-05, "loss": 0.3506518006324768, "step": 8149 }, { "epoch": 1.5059186289568198, "grad_norm": 0.10174031555652618, "learning_rate": 1.042478910800699e-05, "loss": 0.6605497002601624, "step": 8150 }, { "epoch": 1.5061034056657159, "grad_norm": 0.0809532031416893, "learning_rate": 1.0422795984925712e-05, "loss": 0.5446367859840393, "step": 8151 }, { "epoch": 1.5062881823746117, "grad_norm": 0.06470794975757599, "learning_rate": 1.0420802845018483e-05, "loss": 0.41524538397789, "step": 8152 }, { "epoch": 1.5064729590835075, "grad_norm": 0.07831034809350967, "learning_rate": 1.041880968836461e-05, "loss": 0.5452517867088318, "step": 8153 }, { "epoch": 1.5066577357924034, "grad_norm": 0.07205667346715927, "learning_rate": 1.0416816515043424e-05, "loss": 0.5258175134658813, "step": 8154 }, { "epoch": 1.5068425125012992, "grad_norm": 0.08268946409225464, "learning_rate": 1.0414823325134248e-05, "loss": 0.5627372860908508, "step": 8155 }, { "epoch": 1.507027289210195, "grad_norm": 0.09006335586309433, "learning_rate": 1.0412830118716396e-05, "loss": 0.5282909870147705, "step": 8156 }, { "epoch": 1.5072120659190908, "grad_norm": 0.06793666630983353, "learning_rate": 1.0410836895869198e-05, "loss": 0.4173637926578522, "step": 8157 }, { "epoch": 1.507396842627987, "grad_norm": 0.0935467779636383, "learning_rate": 1.0408843656671981e-05, "loss": 0.6189107894897461, "step": 8158 }, { "epoch": 1.5075816193368827, "grad_norm": 0.07903085649013519, "learning_rate": 1.0406850401204062e-05, "loss": 0.49702292680740356, "step": 8159 }, { "epoch": 1.5077663960457786, "grad_norm": 0.08739448338747025, "learning_rate": 1.040485712954477e-05, "loss": 0.5867978930473328, "step": 8160 }, { "epoch": 1.5079511727546744, "grad_norm": 0.09268154948949814, "learning_rate": 1.0402863841773432e-05, "loss": 0.648639440536499, "step": 8161 }, { "epoch": 1.5081359494635702, "grad_norm": 0.07363422214984894, "learning_rate": 1.0400870537969375e-05, "loss": 0.5425917506217957, "step": 8162 }, { "epoch": 1.508320726172466, "grad_norm": 0.07788573205471039, "learning_rate": 1.039887721821192e-05, "loss": 0.6117817163467407, "step": 8163 }, { "epoch": 1.5085055028813619, "grad_norm": 0.07192922383546829, "learning_rate": 1.0396883882580401e-05, "loss": 0.5482012033462524, "step": 8164 }, { "epoch": 1.5086902795902577, "grad_norm": 0.079349584877491, "learning_rate": 1.039489053115415e-05, "loss": 0.47276076674461365, "step": 8165 }, { "epoch": 1.5088750562991535, "grad_norm": 0.06868570297956467, "learning_rate": 1.0392897164012487e-05, "loss": 0.5130391716957092, "step": 8166 }, { "epoch": 1.5090598330080494, "grad_norm": 0.061139173805713654, "learning_rate": 1.0390903781234748e-05, "loss": 0.3642079830169678, "step": 8167 }, { "epoch": 1.5092446097169452, "grad_norm": 0.0688120424747467, "learning_rate": 1.0388910382900258e-05, "loss": 0.3949092924594879, "step": 8168 }, { "epoch": 1.509429386425841, "grad_norm": 0.07270647585391998, "learning_rate": 1.0386916969088356e-05, "loss": 0.4588416516780853, "step": 8169 }, { "epoch": 1.5096141631347368, "grad_norm": 0.06708833575248718, "learning_rate": 1.0384923539878366e-05, "loss": 0.514162003993988, "step": 8170 }, { "epoch": 1.5097989398436327, "grad_norm": 0.08766133338212967, "learning_rate": 1.0382930095349625e-05, "loss": 0.5897177457809448, "step": 8171 }, { "epoch": 1.5099837165525285, "grad_norm": 0.08636727929115295, "learning_rate": 1.0380936635581464e-05, "loss": 0.6378280520439148, "step": 8172 }, { "epoch": 1.5101684932614243, "grad_norm": 0.09457448869943619, "learning_rate": 1.0378943160653216e-05, "loss": 0.6356536746025085, "step": 8173 }, { "epoch": 1.5103532699703202, "grad_norm": 0.07422550022602081, "learning_rate": 1.0376949670644216e-05, "loss": 0.50555419921875, "step": 8174 }, { "epoch": 1.510538046679216, "grad_norm": 0.09057539701461792, "learning_rate": 1.03749561656338e-05, "loss": 0.650535523891449, "step": 8175 }, { "epoch": 1.5107228233881118, "grad_norm": 0.0948851928114891, "learning_rate": 1.0372962645701301e-05, "loss": 0.6718326210975647, "step": 8176 }, { "epoch": 1.5109076000970076, "grad_norm": 0.07239285856485367, "learning_rate": 1.0370969110926052e-05, "loss": 0.4960486888885498, "step": 8177 }, { "epoch": 1.5110923768059035, "grad_norm": 0.0766497477889061, "learning_rate": 1.0368975561387398e-05, "loss": 0.552095890045166, "step": 8178 }, { "epoch": 1.5112771535147993, "grad_norm": 0.06806263327598572, "learning_rate": 1.036698199716467e-05, "loss": 0.5912288427352905, "step": 8179 }, { "epoch": 1.5114619302236953, "grad_norm": 0.0985720306634903, "learning_rate": 1.0364988418337205e-05, "loss": 0.6411997079849243, "step": 8180 }, { "epoch": 1.5116467069325912, "grad_norm": 0.0719585195183754, "learning_rate": 1.0362994824984343e-05, "loss": 0.5217729806900024, "step": 8181 }, { "epoch": 1.511831483641487, "grad_norm": 0.0763324648141861, "learning_rate": 1.0361001217185425e-05, "loss": 0.47890594601631165, "step": 8182 }, { "epoch": 1.5120162603503828, "grad_norm": 0.05779948830604553, "learning_rate": 1.0359007595019786e-05, "loss": 0.3642115890979767, "step": 8183 }, { "epoch": 1.5122010370592787, "grad_norm": 0.08898462355136871, "learning_rate": 1.0357013958566766e-05, "loss": 0.5947683453559875, "step": 8184 }, { "epoch": 1.5123858137681745, "grad_norm": 0.06462650001049042, "learning_rate": 1.035502030790571e-05, "loss": 0.5002122521400452, "step": 8185 }, { "epoch": 1.5125705904770703, "grad_norm": 0.06829912215471268, "learning_rate": 1.0353026643115955e-05, "loss": 0.4085816442966461, "step": 8186 }, { "epoch": 1.5127553671859664, "grad_norm": 0.0703347772359848, "learning_rate": 1.0351032964276846e-05, "loss": 0.5114795565605164, "step": 8187 }, { "epoch": 1.5129401438948622, "grad_norm": 0.05297807231545448, "learning_rate": 1.0349039271467722e-05, "loss": 0.3110232651233673, "step": 8188 }, { "epoch": 1.513124920603758, "grad_norm": 0.07348600029945374, "learning_rate": 1.0347045564767928e-05, "loss": 0.5948768258094788, "step": 8189 }, { "epoch": 1.5133096973126539, "grad_norm": 0.07313638925552368, "learning_rate": 1.0345051844256806e-05, "loss": 0.5299927592277527, "step": 8190 }, { "epoch": 1.5134944740215497, "grad_norm": 0.0786438137292862, "learning_rate": 1.0343058110013699e-05, "loss": 0.5249634385108948, "step": 8191 }, { "epoch": 1.5136792507304455, "grad_norm": 0.07947122305631638, "learning_rate": 1.0341064362117954e-05, "loss": 0.49638667702674866, "step": 8192 }, { "epoch": 1.5138640274393413, "grad_norm": 0.0993708148598671, "learning_rate": 1.0339070600648914e-05, "loss": 0.6481083035469055, "step": 8193 }, { "epoch": 1.5140488041482372, "grad_norm": 0.07443512976169586, "learning_rate": 1.0337076825685924e-05, "loss": 0.4163435995578766, "step": 8194 }, { "epoch": 1.514233580857133, "grad_norm": 0.09199873358011246, "learning_rate": 1.033508303730833e-05, "loss": 0.5561436414718628, "step": 8195 }, { "epoch": 1.5144183575660288, "grad_norm": 0.09095046669244766, "learning_rate": 1.0333089235595481e-05, "loss": 0.759151041507721, "step": 8196 }, { "epoch": 1.5146031342749247, "grad_norm": 0.09198595583438873, "learning_rate": 1.0331095420626724e-05, "loss": 0.5493916273117065, "step": 8197 }, { "epoch": 1.5147879109838205, "grad_norm": 0.07630749046802521, "learning_rate": 1.0329101592481403e-05, "loss": 0.5057935118675232, "step": 8198 }, { "epoch": 1.5149726876927163, "grad_norm": 0.06232639402151108, "learning_rate": 1.032710775123887e-05, "loss": 0.4678415358066559, "step": 8199 }, { "epoch": 1.5151574644016121, "grad_norm": 0.05888809263706207, "learning_rate": 1.032511389697847e-05, "loss": 0.3224062919616699, "step": 8200 }, { "epoch": 1.515342241110508, "grad_norm": 0.06751586496829987, "learning_rate": 1.0323120029779555e-05, "loss": 0.47412821650505066, "step": 8201 }, { "epoch": 1.5155270178194038, "grad_norm": 0.06171039491891861, "learning_rate": 1.0321126149721472e-05, "loss": 0.3479996621608734, "step": 8202 }, { "epoch": 1.5157117945282996, "grad_norm": 0.06947099417448044, "learning_rate": 1.0319132256883575e-05, "loss": 0.43905627727508545, "step": 8203 }, { "epoch": 1.5158965712371955, "grad_norm": 0.07921262085437775, "learning_rate": 1.0317138351345211e-05, "loss": 0.6788212656974792, "step": 8204 }, { "epoch": 1.5160813479460913, "grad_norm": 0.06468327343463898, "learning_rate": 1.0315144433185735e-05, "loss": 0.37811005115509033, "step": 8205 }, { "epoch": 1.5162661246549871, "grad_norm": 0.07263485342264175, "learning_rate": 1.0313150502484494e-05, "loss": 0.3825418949127197, "step": 8206 }, { "epoch": 1.516450901363883, "grad_norm": 0.08249958604574203, "learning_rate": 1.0311156559320844e-05, "loss": 0.5926398038864136, "step": 8207 }, { "epoch": 1.5166356780727788, "grad_norm": 0.0563255250453949, "learning_rate": 1.0309162603774137e-05, "loss": 0.36614182591438293, "step": 8208 }, { "epoch": 1.5168204547816748, "grad_norm": 0.08559220284223557, "learning_rate": 1.0307168635923725e-05, "loss": 0.6239089369773865, "step": 8209 }, { "epoch": 1.5170052314905706, "grad_norm": 0.06880374252796173, "learning_rate": 1.0305174655848964e-05, "loss": 0.39663127064704895, "step": 8210 }, { "epoch": 1.5171900081994665, "grad_norm": 0.0709710642695427, "learning_rate": 1.0303180663629201e-05, "loss": 0.47326236963272095, "step": 8211 }, { "epoch": 1.5173747849083623, "grad_norm": 0.07974464446306229, "learning_rate": 1.0301186659343803e-05, "loss": 0.6245824098587036, "step": 8212 }, { "epoch": 1.5175595616172581, "grad_norm": 0.07741749286651611, "learning_rate": 1.0299192643072116e-05, "loss": 0.41176801919937134, "step": 8213 }, { "epoch": 1.517744338326154, "grad_norm": 0.06590697914361954, "learning_rate": 1.0297198614893498e-05, "loss": 0.4024606943130493, "step": 8214 }, { "epoch": 1.5179291150350498, "grad_norm": 0.07015514373779297, "learning_rate": 1.0295204574887303e-05, "loss": 0.5053236484527588, "step": 8215 }, { "epoch": 1.5181138917439456, "grad_norm": 0.061859481036663055, "learning_rate": 1.0293210523132889e-05, "loss": 0.4355672001838684, "step": 8216 }, { "epoch": 1.5182986684528417, "grad_norm": 0.08922475576400757, "learning_rate": 1.0291216459709617e-05, "loss": 0.5943321585655212, "step": 8217 }, { "epoch": 1.5184834451617375, "grad_norm": 0.07734528183937073, "learning_rate": 1.0289222384696838e-05, "loss": 0.47709402441978455, "step": 8218 }, { "epoch": 1.5186682218706333, "grad_norm": 0.07556109875440598, "learning_rate": 1.0287228298173914e-05, "loss": 0.6052689552307129, "step": 8219 }, { "epoch": 1.5188529985795292, "grad_norm": 0.07528205215930939, "learning_rate": 1.0285234200220202e-05, "loss": 0.4916656017303467, "step": 8220 }, { "epoch": 1.519037775288425, "grad_norm": 0.08439075946807861, "learning_rate": 1.0283240090915063e-05, "loss": 0.5587414503097534, "step": 8221 }, { "epoch": 1.5192225519973208, "grad_norm": 0.07659289240837097, "learning_rate": 1.0281245970337851e-05, "loss": 0.4934830963611603, "step": 8222 }, { "epoch": 1.5194073287062166, "grad_norm": 0.06956089287996292, "learning_rate": 1.0279251838567931e-05, "loss": 0.3786989152431488, "step": 8223 }, { "epoch": 1.5195921054151125, "grad_norm": 0.07814884185791016, "learning_rate": 1.0277257695684663e-05, "loss": 0.4549594223499298, "step": 8224 }, { "epoch": 1.5197768821240083, "grad_norm": 0.07259442657232285, "learning_rate": 1.0275263541767405e-05, "loss": 0.407787024974823, "step": 8225 }, { "epoch": 1.5199616588329041, "grad_norm": 0.06543047726154327, "learning_rate": 1.0273269376895518e-05, "loss": 0.4028457701206207, "step": 8226 }, { "epoch": 1.5201464355418, "grad_norm": 0.09713669121265411, "learning_rate": 1.0271275201148368e-05, "loss": 0.7962900996208191, "step": 8227 }, { "epoch": 1.5203312122506958, "grad_norm": 0.08467891812324524, "learning_rate": 1.0269281014605311e-05, "loss": 0.6027758717536926, "step": 8228 }, { "epoch": 1.5205159889595916, "grad_norm": 0.07185697555541992, "learning_rate": 1.0267286817345714e-05, "loss": 0.43385347723960876, "step": 8229 }, { "epoch": 1.5207007656684874, "grad_norm": 0.09062127768993378, "learning_rate": 1.0265292609448936e-05, "loss": 0.684395968914032, "step": 8230 }, { "epoch": 1.5208855423773833, "grad_norm": 0.0966351181268692, "learning_rate": 1.0263298390994342e-05, "loss": 0.7614896297454834, "step": 8231 }, { "epoch": 1.521070319086279, "grad_norm": 0.05065008997917175, "learning_rate": 1.0261304162061296e-05, "loss": 0.3162464201450348, "step": 8232 }, { "epoch": 1.521255095795175, "grad_norm": 0.06607464700937271, "learning_rate": 1.0259309922729161e-05, "loss": 0.4301515817642212, "step": 8233 }, { "epoch": 1.5214398725040708, "grad_norm": 0.08169597387313843, "learning_rate": 1.0257315673077307e-05, "loss": 0.615820050239563, "step": 8234 }, { "epoch": 1.5216246492129666, "grad_norm": 0.0687340497970581, "learning_rate": 1.0255321413185091e-05, "loss": 0.4618292450904846, "step": 8235 }, { "epoch": 1.5218094259218624, "grad_norm": 0.09001611918210983, "learning_rate": 1.025332714313188e-05, "loss": 0.6406571269035339, "step": 8236 }, { "epoch": 1.5219942026307582, "grad_norm": 0.10997528582811356, "learning_rate": 1.0251332862997044e-05, "loss": 0.749646008014679, "step": 8237 }, { "epoch": 1.522178979339654, "grad_norm": 0.07221218198537827, "learning_rate": 1.0249338572859945e-05, "loss": 0.47017595171928406, "step": 8238 }, { "epoch": 1.5223637560485501, "grad_norm": 0.09576410800218582, "learning_rate": 1.024734427279995e-05, "loss": 0.6915577054023743, "step": 8239 }, { "epoch": 1.522548532757446, "grad_norm": 0.06706764549016953, "learning_rate": 1.024534996289643e-05, "loss": 0.3903615474700928, "step": 8240 }, { "epoch": 1.5227333094663418, "grad_norm": 0.09002497047185898, "learning_rate": 1.0243355643228747e-05, "loss": 0.589722752571106, "step": 8241 }, { "epoch": 1.5229180861752376, "grad_norm": 0.05878360942006111, "learning_rate": 1.024136131387627e-05, "loss": 0.3750290870666504, "step": 8242 }, { "epoch": 1.5231028628841334, "grad_norm": 0.08013518899679184, "learning_rate": 1.0239366974918367e-05, "loss": 0.5195515155792236, "step": 8243 }, { "epoch": 1.5232876395930293, "grad_norm": 0.07429202646017075, "learning_rate": 1.023737262643441e-05, "loss": 0.4740116596221924, "step": 8244 }, { "epoch": 1.523472416301925, "grad_norm": 0.07139978557825089, "learning_rate": 1.0235378268503764e-05, "loss": 0.4189651310443878, "step": 8245 }, { "epoch": 1.5236571930108211, "grad_norm": 0.06468570977449417, "learning_rate": 1.0233383901205798e-05, "loss": 0.443634569644928, "step": 8246 }, { "epoch": 1.523841969719717, "grad_norm": 0.0754278302192688, "learning_rate": 1.0231389524619886e-05, "loss": 0.532339334487915, "step": 8247 }, { "epoch": 1.5240267464286128, "grad_norm": 0.05140746012330055, "learning_rate": 1.0229395138825394e-05, "loss": 0.33748894929885864, "step": 8248 }, { "epoch": 1.5242115231375086, "grad_norm": 0.0828530490398407, "learning_rate": 1.0227400743901692e-05, "loss": 0.5280756950378418, "step": 8249 }, { "epoch": 1.5243962998464045, "grad_norm": 0.0824611708521843, "learning_rate": 1.022540633992815e-05, "loss": 0.5182260274887085, "step": 8250 }, { "epoch": 1.5245810765553003, "grad_norm": 0.07414865493774414, "learning_rate": 1.0223411926984146e-05, "loss": 0.4884706437587738, "step": 8251 }, { "epoch": 1.5247658532641961, "grad_norm": 0.09248776733875275, "learning_rate": 1.022141750514904e-05, "loss": 0.7126184701919556, "step": 8252 }, { "epoch": 1.524950629973092, "grad_norm": 0.062182433903217316, "learning_rate": 1.0219423074502213e-05, "loss": 0.3596520721912384, "step": 8253 }, { "epoch": 1.5251354066819878, "grad_norm": 0.08619441092014313, "learning_rate": 1.0217428635123037e-05, "loss": 0.6424943208694458, "step": 8254 }, { "epoch": 1.5253201833908836, "grad_norm": 0.0970609188079834, "learning_rate": 1.021543418709088e-05, "loss": 0.6262872815132141, "step": 8255 }, { "epoch": 1.5255049600997794, "grad_norm": 0.07370869815349579, "learning_rate": 1.0213439730485111e-05, "loss": 0.569189190864563, "step": 8256 }, { "epoch": 1.5256897368086753, "grad_norm": 0.06459003686904907, "learning_rate": 1.0211445265385114e-05, "loss": 0.478630930185318, "step": 8257 }, { "epoch": 1.525874513517571, "grad_norm": 0.0974598228931427, "learning_rate": 1.0209450791870256e-05, "loss": 0.6201545000076294, "step": 8258 }, { "epoch": 1.526059290226467, "grad_norm": 0.0489286407828331, "learning_rate": 1.0207456310019911e-05, "loss": 0.2776273787021637, "step": 8259 }, { "epoch": 1.5262440669353627, "grad_norm": 0.07265926152467728, "learning_rate": 1.0205461819913454e-05, "loss": 0.5331336259841919, "step": 8260 }, { "epoch": 1.5264288436442586, "grad_norm": 0.047001227736473083, "learning_rate": 1.0203467321630263e-05, "loss": 0.26337340474128723, "step": 8261 }, { "epoch": 1.5266136203531544, "grad_norm": 0.07281414419412613, "learning_rate": 1.0201472815249705e-05, "loss": 0.5219126343727112, "step": 8262 }, { "epoch": 1.5267983970620502, "grad_norm": 0.08007118850946426, "learning_rate": 1.0199478300851157e-05, "loss": 0.6058944463729858, "step": 8263 }, { "epoch": 1.526983173770946, "grad_norm": 0.05615640804171562, "learning_rate": 1.0197483778514003e-05, "loss": 0.3353560268878937, "step": 8264 }, { "epoch": 1.5271679504798419, "grad_norm": 0.10725299268960953, "learning_rate": 1.019548924831761e-05, "loss": 0.7764474153518677, "step": 8265 }, { "epoch": 1.5273527271887377, "grad_norm": 0.08144401758909225, "learning_rate": 1.0193494710341354e-05, "loss": 0.4821716547012329, "step": 8266 }, { "epoch": 1.5275375038976335, "grad_norm": 0.07186413556337357, "learning_rate": 1.0191500164664617e-05, "loss": 0.4231451749801636, "step": 8267 }, { "epoch": 1.5277222806065296, "grad_norm": 0.07108518481254578, "learning_rate": 1.0189505611366772e-05, "loss": 0.5909963846206665, "step": 8268 }, { "epoch": 1.5279070573154254, "grad_norm": 0.08292071521282196, "learning_rate": 1.0187511050527195e-05, "loss": 0.5322451591491699, "step": 8269 }, { "epoch": 1.5280918340243212, "grad_norm": 0.08781729638576508, "learning_rate": 1.0185516482225264e-05, "loss": 0.5437963008880615, "step": 8270 }, { "epoch": 1.528276610733217, "grad_norm": 0.06114795804023743, "learning_rate": 1.0183521906540362e-05, "loss": 0.33090609312057495, "step": 8271 }, { "epoch": 1.528461387442113, "grad_norm": 0.09468130022287369, "learning_rate": 1.0181527323551859e-05, "loss": 0.7327249646186829, "step": 8272 }, { "epoch": 1.5286461641510087, "grad_norm": 0.07862450927495956, "learning_rate": 1.0179532733339134e-05, "loss": 0.6347699761390686, "step": 8273 }, { "epoch": 1.5288309408599046, "grad_norm": 0.07262321561574936, "learning_rate": 1.0177538135981573e-05, "loss": 0.5719349980354309, "step": 8274 }, { "epoch": 1.5290157175688006, "grad_norm": 0.0708065778017044, "learning_rate": 1.0175543531558549e-05, "loss": 0.48646262288093567, "step": 8275 }, { "epoch": 1.5292004942776964, "grad_norm": 0.073647640645504, "learning_rate": 1.0173548920149436e-05, "loss": 0.42778047919273376, "step": 8276 }, { "epoch": 1.5293852709865923, "grad_norm": 0.06990744173526764, "learning_rate": 1.0171554301833626e-05, "loss": 0.5315378904342651, "step": 8277 }, { "epoch": 1.529570047695488, "grad_norm": 0.06842117756605148, "learning_rate": 1.0169559676690491e-05, "loss": 0.4153575301170349, "step": 8278 }, { "epoch": 1.529754824404384, "grad_norm": 0.0740189403295517, "learning_rate": 1.0167565044799405e-05, "loss": 0.5298333764076233, "step": 8279 }, { "epoch": 1.5299396011132798, "grad_norm": 0.08885012567043304, "learning_rate": 1.016557040623976e-05, "loss": 0.5935700535774231, "step": 8280 }, { "epoch": 1.5301243778221756, "grad_norm": 0.0665549486875534, "learning_rate": 1.016357576109093e-05, "loss": 0.35256704688072205, "step": 8281 }, { "epoch": 1.5303091545310714, "grad_norm": 0.08737396448850632, "learning_rate": 1.0161581109432295e-05, "loss": 0.6718435287475586, "step": 8282 }, { "epoch": 1.5304939312399672, "grad_norm": 0.0853419080376625, "learning_rate": 1.0159586451343236e-05, "loss": 0.7269653081893921, "step": 8283 }, { "epoch": 1.530678707948863, "grad_norm": 0.09545714408159256, "learning_rate": 1.0157591786903138e-05, "loss": 0.6577144861221313, "step": 8284 }, { "epoch": 1.530863484657759, "grad_norm": 0.08123432099819183, "learning_rate": 1.0155597116191382e-05, "loss": 0.5123420357704163, "step": 8285 }, { "epoch": 1.5310482613666547, "grad_norm": 0.07155416905879974, "learning_rate": 1.0153602439287344e-05, "loss": 0.58453768491745, "step": 8286 }, { "epoch": 1.5312330380755506, "grad_norm": 0.06902708858251572, "learning_rate": 1.015160775627041e-05, "loss": 0.549127459526062, "step": 8287 }, { "epoch": 1.5314178147844464, "grad_norm": 0.08658330887556076, "learning_rate": 1.0149613067219963e-05, "loss": 0.5340646505355835, "step": 8288 }, { "epoch": 1.5316025914933422, "grad_norm": 0.06111351028084755, "learning_rate": 1.0147618372215381e-05, "loss": 0.3370615243911743, "step": 8289 }, { "epoch": 1.531787368202238, "grad_norm": 0.06116218864917755, "learning_rate": 1.0145623671336053e-05, "loss": 0.305520623922348, "step": 8290 }, { "epoch": 1.5319721449111339, "grad_norm": 0.08590128272771835, "learning_rate": 1.0143628964661358e-05, "loss": 0.58865886926651, "step": 8291 }, { "epoch": 1.5321569216200297, "grad_norm": 0.09435473382472992, "learning_rate": 1.0141634252270678e-05, "loss": 0.5893629193305969, "step": 8292 }, { "epoch": 1.5323416983289255, "grad_norm": 0.09070255607366562, "learning_rate": 1.0139639534243397e-05, "loss": 0.5695667862892151, "step": 8293 }, { "epoch": 1.5325264750378214, "grad_norm": 0.0686533972620964, "learning_rate": 1.0137644810658904e-05, "loss": 0.4923887550830841, "step": 8294 }, { "epoch": 1.5327112517467172, "grad_norm": 0.0996929332613945, "learning_rate": 1.0135650081596574e-05, "loss": 0.646103024482727, "step": 8295 }, { "epoch": 1.532896028455613, "grad_norm": 0.07553873211145401, "learning_rate": 1.0133655347135797e-05, "loss": 0.4094032943248749, "step": 8296 }, { "epoch": 1.533080805164509, "grad_norm": 0.0861702561378479, "learning_rate": 1.0131660607355956e-05, "loss": 0.7193480730056763, "step": 8297 }, { "epoch": 1.533265581873405, "grad_norm": 0.07377294450998306, "learning_rate": 1.0129665862336434e-05, "loss": 0.41736268997192383, "step": 8298 }, { "epoch": 1.5334503585823007, "grad_norm": 0.0883931890130043, "learning_rate": 1.0127671112156614e-05, "loss": 0.7603240609169006, "step": 8299 }, { "epoch": 1.5336351352911965, "grad_norm": 0.0921977087855339, "learning_rate": 1.0125676356895884e-05, "loss": 0.67726731300354, "step": 8300 }, { "epoch": 1.5338199120000924, "grad_norm": 0.09125876426696777, "learning_rate": 1.012368159663363e-05, "loss": 0.6322854161262512, "step": 8301 }, { "epoch": 1.5340046887089882, "grad_norm": 0.08249793201684952, "learning_rate": 1.0121686831449235e-05, "loss": 0.590074360370636, "step": 8302 }, { "epoch": 1.534189465417884, "grad_norm": 0.07465846091508865, "learning_rate": 1.0119692061422086e-05, "loss": 0.41635817289352417, "step": 8303 }, { "epoch": 1.53437424212678, "grad_norm": 0.09888509660959244, "learning_rate": 1.0117697286631565e-05, "loss": 0.5953425168991089, "step": 8304 }, { "epoch": 1.534559018835676, "grad_norm": 0.07236199080944061, "learning_rate": 1.0115702507157061e-05, "loss": 0.5066321492195129, "step": 8305 }, { "epoch": 1.5347437955445717, "grad_norm": 0.09026050567626953, "learning_rate": 1.011370772307796e-05, "loss": 0.6313549280166626, "step": 8306 }, { "epoch": 1.5349285722534676, "grad_norm": 0.06680828332901001, "learning_rate": 1.0111712934473645e-05, "loss": 0.5088815689086914, "step": 8307 }, { "epoch": 1.5351133489623634, "grad_norm": 0.07624626904726028, "learning_rate": 1.0109718141423508e-05, "loss": 0.5378090739250183, "step": 8308 }, { "epoch": 1.5352981256712592, "grad_norm": 0.06227405369281769, "learning_rate": 1.010772334400693e-05, "loss": 0.316373735666275, "step": 8309 }, { "epoch": 1.535482902380155, "grad_norm": 0.08028050512075424, "learning_rate": 1.0105728542303299e-05, "loss": 0.5072022676467896, "step": 8310 }, { "epoch": 1.5356676790890509, "grad_norm": 0.08129177987575531, "learning_rate": 1.0103733736392006e-05, "loss": 0.5504554510116577, "step": 8311 }, { "epoch": 1.5358524557979467, "grad_norm": 0.06616882979869843, "learning_rate": 1.0101738926352432e-05, "loss": 0.5026891231536865, "step": 8312 }, { "epoch": 1.5360372325068425, "grad_norm": 0.08874485641717911, "learning_rate": 1.009974411226397e-05, "loss": 0.6369149684906006, "step": 8313 }, { "epoch": 1.5362220092157384, "grad_norm": 0.0581573061645031, "learning_rate": 1.0097749294206e-05, "loss": 0.3060384690761566, "step": 8314 }, { "epoch": 1.5364067859246342, "grad_norm": 0.07999205589294434, "learning_rate": 1.0095754472257919e-05, "loss": 0.5771450996398926, "step": 8315 }, { "epoch": 1.53659156263353, "grad_norm": 0.07648079842329025, "learning_rate": 1.0093759646499106e-05, "loss": 0.6316032409667969, "step": 8316 }, { "epoch": 1.5367763393424259, "grad_norm": 0.09479255229234695, "learning_rate": 1.0091764817008953e-05, "loss": 0.5895540118217468, "step": 8317 }, { "epoch": 1.5369611160513217, "grad_norm": 0.0748986080288887, "learning_rate": 1.0089769983866849e-05, "loss": 0.4719810485839844, "step": 8318 }, { "epoch": 1.5371458927602175, "grad_norm": 0.08592166006565094, "learning_rate": 1.008777514715218e-05, "loss": 0.6072490811347961, "step": 8319 }, { "epoch": 1.5373306694691133, "grad_norm": 0.06951703131198883, "learning_rate": 1.0085780306944335e-05, "loss": 0.49982306361198425, "step": 8320 }, { "epoch": 1.5375154461780092, "grad_norm": 0.09663759917020798, "learning_rate": 1.00837854633227e-05, "loss": 0.6411300301551819, "step": 8321 }, { "epoch": 1.537700222886905, "grad_norm": 0.05903920531272888, "learning_rate": 1.0081790616366665e-05, "loss": 0.4022294878959656, "step": 8322 }, { "epoch": 1.5378849995958008, "grad_norm": 0.08403241634368896, "learning_rate": 1.0079795766155622e-05, "loss": 0.5444654226303101, "step": 8323 }, { "epoch": 1.5380697763046967, "grad_norm": 0.08010748028755188, "learning_rate": 1.0077800912768955e-05, "loss": 0.4678199887275696, "step": 8324 }, { "epoch": 1.5382545530135925, "grad_norm": 0.09412200003862381, "learning_rate": 1.007580605628606e-05, "loss": 0.5801590085029602, "step": 8325 }, { "epoch": 1.5384393297224885, "grad_norm": 0.07934234291315079, "learning_rate": 1.0073811196786316e-05, "loss": 0.4976900517940521, "step": 8326 }, { "epoch": 1.5386241064313844, "grad_norm": 0.07337716966867447, "learning_rate": 1.007181633434912e-05, "loss": 0.576170027256012, "step": 8327 }, { "epoch": 1.5388088831402802, "grad_norm": 0.0709516704082489, "learning_rate": 1.0069821469053858e-05, "loss": 0.6091797351837158, "step": 8328 }, { "epoch": 1.538993659849176, "grad_norm": 0.07814224809408188, "learning_rate": 1.0067826600979917e-05, "loss": 0.6235194206237793, "step": 8329 }, { "epoch": 1.5391784365580718, "grad_norm": 0.06715002655982971, "learning_rate": 1.0065831730206695e-05, "loss": 0.4767167568206787, "step": 8330 }, { "epoch": 1.5393632132669677, "grad_norm": 0.09266090393066406, "learning_rate": 1.0063836856813571e-05, "loss": 0.5710633993148804, "step": 8331 }, { "epoch": 1.5395479899758635, "grad_norm": 0.06542815268039703, "learning_rate": 1.0061841980879941e-05, "loss": 0.4574289917945862, "step": 8332 }, { "epoch": 1.5397327666847593, "grad_norm": 0.06793387979269028, "learning_rate": 1.0059847102485196e-05, "loss": 0.4224914312362671, "step": 8333 }, { "epoch": 1.5399175433936554, "grad_norm": 0.07768009603023529, "learning_rate": 1.0057852221708722e-05, "loss": 0.4772679805755615, "step": 8334 }, { "epoch": 1.5401023201025512, "grad_norm": 0.08414895832538605, "learning_rate": 1.005585733862991e-05, "loss": 0.5030648112297058, "step": 8335 }, { "epoch": 1.540287096811447, "grad_norm": 0.07794249802827835, "learning_rate": 1.0053862453328152e-05, "loss": 0.434922456741333, "step": 8336 }, { "epoch": 1.5404718735203429, "grad_norm": 0.09836164861917496, "learning_rate": 1.0051867565882838e-05, "loss": 0.8413593173027039, "step": 8337 }, { "epoch": 1.5406566502292387, "grad_norm": 0.08989561349153519, "learning_rate": 1.0049872676373354e-05, "loss": 0.6303113698959351, "step": 8338 }, { "epoch": 1.5408414269381345, "grad_norm": 0.07046696543693542, "learning_rate": 1.0047877784879094e-05, "loss": 0.4028065502643585, "step": 8339 }, { "epoch": 1.5410262036470304, "grad_norm": 0.08524170517921448, "learning_rate": 1.004588289147945e-05, "loss": 0.569879949092865, "step": 8340 }, { "epoch": 1.5412109803559262, "grad_norm": 0.0819799154996872, "learning_rate": 1.004388799625381e-05, "loss": 0.6062819957733154, "step": 8341 }, { "epoch": 1.541395757064822, "grad_norm": 0.06758811324834824, "learning_rate": 1.0041893099281564e-05, "loss": 0.5327195525169373, "step": 8342 }, { "epoch": 1.5415805337737178, "grad_norm": 0.057109855115413666, "learning_rate": 1.0039898200642105e-05, "loss": 0.38353052735328674, "step": 8343 }, { "epoch": 1.5417653104826137, "grad_norm": 0.07154912501573563, "learning_rate": 1.0037903300414821e-05, "loss": 0.37415507435798645, "step": 8344 }, { "epoch": 1.5419500871915095, "grad_norm": 0.055001989006996155, "learning_rate": 1.0035908398679101e-05, "loss": 0.27712109684944153, "step": 8345 }, { "epoch": 1.5421348639004053, "grad_norm": 0.07522137463092804, "learning_rate": 1.0033913495514346e-05, "loss": 0.5713220238685608, "step": 8346 }, { "epoch": 1.5423196406093012, "grad_norm": 0.07752378284931183, "learning_rate": 1.0031918590999938e-05, "loss": 0.49891120195388794, "step": 8347 }, { "epoch": 1.542504417318197, "grad_norm": 0.0699925497174263, "learning_rate": 1.0029923685215268e-05, "loss": 0.461686372756958, "step": 8348 }, { "epoch": 1.5426891940270928, "grad_norm": 0.07942511141300201, "learning_rate": 1.0027928778239729e-05, "loss": 0.5484244227409363, "step": 8349 }, { "epoch": 1.5428739707359886, "grad_norm": 0.06200651451945305, "learning_rate": 1.0025933870152714e-05, "loss": 0.3867597281932831, "step": 8350 }, { "epoch": 1.5430587474448845, "grad_norm": 0.0850205346941948, "learning_rate": 1.0023938961033612e-05, "loss": 0.6178774833679199, "step": 8351 }, { "epoch": 1.5432435241537803, "grad_norm": 0.06608953326940536, "learning_rate": 1.0021944050961809e-05, "loss": 0.4367190897464752, "step": 8352 }, { "epoch": 1.5434283008626761, "grad_norm": 0.06947171688079834, "learning_rate": 1.0019949140016707e-05, "loss": 0.4598299264907837, "step": 8353 }, { "epoch": 1.543613077571572, "grad_norm": 0.08985291421413422, "learning_rate": 1.0017954228277694e-05, "loss": 0.7923848628997803, "step": 8354 }, { "epoch": 1.5437978542804678, "grad_norm": 0.07726897299289703, "learning_rate": 1.001595931582415e-05, "loss": 0.42478588223457336, "step": 8355 }, { "epoch": 1.5439826309893638, "grad_norm": 0.07254164665937424, "learning_rate": 1.0013964402735482e-05, "loss": 0.5146327018737793, "step": 8356 }, { "epoch": 1.5441674076982597, "grad_norm": 0.09229913353919983, "learning_rate": 1.0011969489091073e-05, "loss": 0.6449254751205444, "step": 8357 }, { "epoch": 1.5443521844071555, "grad_norm": 0.0819351077079773, "learning_rate": 1.0009974574970316e-05, "loss": 0.634602427482605, "step": 8358 }, { "epoch": 1.5445369611160513, "grad_norm": 0.09047498553991318, "learning_rate": 1.0007979660452601e-05, "loss": 0.67640221118927, "step": 8359 }, { "epoch": 1.5447217378249471, "grad_norm": 0.06298615038394928, "learning_rate": 1.0005984745617321e-05, "loss": 0.39689722657203674, "step": 8360 }, { "epoch": 1.544906514533843, "grad_norm": 0.08086885511875153, "learning_rate": 1.0003989830543868e-05, "loss": 0.3936644196510315, "step": 8361 }, { "epoch": 1.5450912912427388, "grad_norm": 0.08219406753778458, "learning_rate": 1.0001994915311628e-05, "loss": 0.6133519411087036, "step": 8362 }, { "epoch": 1.5452760679516349, "grad_norm": 0.08173570781946182, "learning_rate": 1e-05, "loss": 0.5591699481010437, "step": 8363 }, { "epoch": 1.5454608446605307, "grad_norm": 0.07632359862327576, "learning_rate": 9.998005084688372e-06, "loss": 0.5126186609268188, "step": 8364 }, { "epoch": 1.5456456213694265, "grad_norm": 0.07817716896533966, "learning_rate": 9.996010169456137e-06, "loss": 0.5563318133354187, "step": 8365 }, { "epoch": 1.5458303980783223, "grad_norm": 0.060379207134246826, "learning_rate": 9.994015254382682e-06, "loss": 0.36338597536087036, "step": 8366 }, { "epoch": 1.5460151747872182, "grad_norm": 0.06967508792877197, "learning_rate": 9.9920203395474e-06, "loss": 0.4792593717575073, "step": 8367 }, { "epoch": 1.546199951496114, "grad_norm": 0.08439387381076813, "learning_rate": 9.990025425029689e-06, "loss": 0.5181248784065247, "step": 8368 }, { "epoch": 1.5463847282050098, "grad_norm": 0.0615164078772068, "learning_rate": 9.988030510908929e-06, "loss": 0.43316343426704407, "step": 8369 }, { "epoch": 1.5465695049139057, "grad_norm": 0.07169511914253235, "learning_rate": 9.98603559726452e-06, "loss": 0.4729943871498108, "step": 8370 }, { "epoch": 1.5467542816228015, "grad_norm": 0.09175395220518112, "learning_rate": 9.984040684175853e-06, "loss": 0.6354399919509888, "step": 8371 }, { "epoch": 1.5469390583316973, "grad_norm": 0.0714784562587738, "learning_rate": 9.982045771722311e-06, "loss": 0.4669618010520935, "step": 8372 }, { "epoch": 1.5471238350405931, "grad_norm": 0.0871136412024498, "learning_rate": 9.980050859983296e-06, "loss": 0.5754742622375488, "step": 8373 }, { "epoch": 1.547308611749489, "grad_norm": 0.09413383156061172, "learning_rate": 9.978055949038193e-06, "loss": 0.7470415830612183, "step": 8374 }, { "epoch": 1.5474933884583848, "grad_norm": 0.07848978042602539, "learning_rate": 9.976061038966391e-06, "loss": 0.5846307277679443, "step": 8375 }, { "epoch": 1.5476781651672806, "grad_norm": 0.07685250043869019, "learning_rate": 9.974066129847291e-06, "loss": 0.5765482187271118, "step": 8376 }, { "epoch": 1.5478629418761765, "grad_norm": 0.06022125110030174, "learning_rate": 9.972071221760274e-06, "loss": 0.367461621761322, "step": 8377 }, { "epoch": 1.5480477185850723, "grad_norm": 0.09071533381938934, "learning_rate": 9.970076314784735e-06, "loss": 0.6036496758460999, "step": 8378 }, { "epoch": 1.5482324952939681, "grad_norm": 0.06718786805868149, "learning_rate": 9.968081409000067e-06, "loss": 0.4123038649559021, "step": 8379 }, { "epoch": 1.548417272002864, "grad_norm": 0.08263219892978668, "learning_rate": 9.966086504485657e-06, "loss": 0.5069796442985535, "step": 8380 }, { "epoch": 1.5486020487117598, "grad_norm": 0.09405094385147095, "learning_rate": 9.964091601320897e-06, "loss": 0.6533642411231995, "step": 8381 }, { "epoch": 1.5487868254206556, "grad_norm": 0.0682285875082016, "learning_rate": 9.962096699585184e-06, "loss": 0.45217806100845337, "step": 8382 }, { "epoch": 1.5489716021295514, "grad_norm": 0.08995096385478973, "learning_rate": 9.960101799357899e-06, "loss": 0.6177809834480286, "step": 8383 }, { "epoch": 1.5491563788384473, "grad_norm": 0.09581541270017624, "learning_rate": 9.958106900718438e-06, "loss": 0.6463266015052795, "step": 8384 }, { "epoch": 1.5493411555473433, "grad_norm": 0.09332716464996338, "learning_rate": 9.956112003746194e-06, "loss": 0.6386651396751404, "step": 8385 }, { "epoch": 1.5495259322562391, "grad_norm": 0.08254636079072952, "learning_rate": 9.954117108520552e-06, "loss": 0.5312403440475464, "step": 8386 }, { "epoch": 1.549710708965135, "grad_norm": 0.10466068983078003, "learning_rate": 9.952122215120906e-06, "loss": 0.5892578959465027, "step": 8387 }, { "epoch": 1.5498954856740308, "grad_norm": 0.08869829773902893, "learning_rate": 9.950127323626648e-06, "loss": 0.5440428853034973, "step": 8388 }, { "epoch": 1.5500802623829266, "grad_norm": 0.0550822913646698, "learning_rate": 9.948132434117165e-06, "loss": 0.29246920347213745, "step": 8389 }, { "epoch": 1.5502650390918224, "grad_norm": 0.07488343119621277, "learning_rate": 9.946137546671853e-06, "loss": 0.5593857169151306, "step": 8390 }, { "epoch": 1.5504498158007183, "grad_norm": 0.07491283863782883, "learning_rate": 9.944142661370091e-06, "loss": 0.4983839690685272, "step": 8391 }, { "epoch": 1.5506345925096143, "grad_norm": 0.08424947410821915, "learning_rate": 9.94214777829128e-06, "loss": 0.5868728160858154, "step": 8392 }, { "epoch": 1.5508193692185102, "grad_norm": 0.08198340982198715, "learning_rate": 9.940152897514809e-06, "loss": 0.4823242723941803, "step": 8393 }, { "epoch": 1.551004145927406, "grad_norm": 0.08373264223337173, "learning_rate": 9.93815801912006e-06, "loss": 0.5581148862838745, "step": 8394 }, { "epoch": 1.5511889226363018, "grad_norm": 0.06925924867391586, "learning_rate": 9.936163143186429e-06, "loss": 0.5806288123130798, "step": 8395 }, { "epoch": 1.5513736993451976, "grad_norm": 0.06480623781681061, "learning_rate": 9.93416826979331e-06, "loss": 0.4784233272075653, "step": 8396 }, { "epoch": 1.5515584760540935, "grad_norm": 0.07471255213022232, "learning_rate": 9.932173399020085e-06, "loss": 0.6061058640480042, "step": 8397 }, { "epoch": 1.5517432527629893, "grad_norm": 0.06772679090499878, "learning_rate": 9.930178530946145e-06, "loss": 0.4416591227054596, "step": 8398 }, { "epoch": 1.5519280294718851, "grad_norm": 0.07152847200632095, "learning_rate": 9.928183665650885e-06, "loss": 0.38082125782966614, "step": 8399 }, { "epoch": 1.552112806180781, "grad_norm": 0.06756367534399033, "learning_rate": 9.926188803213687e-06, "loss": 0.43275314569473267, "step": 8400 }, { "epoch": 1.5522975828896768, "grad_norm": 0.07801774889230728, "learning_rate": 9.924193943713943e-06, "loss": 0.5323340892791748, "step": 8401 }, { "epoch": 1.5524823595985726, "grad_norm": 0.06771323084831238, "learning_rate": 9.922199087231046e-06, "loss": 0.481885701417923, "step": 8402 }, { "epoch": 1.5526671363074684, "grad_norm": 0.06427159905433655, "learning_rate": 9.92020423384438e-06, "loss": 0.46014758944511414, "step": 8403 }, { "epoch": 1.5528519130163643, "grad_norm": 0.07718753814697266, "learning_rate": 9.918209383633337e-06, "loss": 0.6056550145149231, "step": 8404 }, { "epoch": 1.55303668972526, "grad_norm": 0.06274012476205826, "learning_rate": 9.916214536677304e-06, "loss": 0.4475315511226654, "step": 8405 }, { "epoch": 1.553221466434156, "grad_norm": 0.0780898854136467, "learning_rate": 9.914219693055669e-06, "loss": 0.5073195695877075, "step": 8406 }, { "epoch": 1.5534062431430518, "grad_norm": 0.07583803683519363, "learning_rate": 9.912224852847825e-06, "loss": 0.5635548233985901, "step": 8407 }, { "epoch": 1.5535910198519476, "grad_norm": 0.06344740837812424, "learning_rate": 9.910230016133153e-06, "loss": 0.379596471786499, "step": 8408 }, { "epoch": 1.5537757965608434, "grad_norm": 0.06110159680247307, "learning_rate": 9.908235182991047e-06, "loss": 0.3941616714000702, "step": 8409 }, { "epoch": 1.5539605732697392, "grad_norm": 0.07348445057868958, "learning_rate": 9.906240353500899e-06, "loss": 0.472446471452713, "step": 8410 }, { "epoch": 1.554145349978635, "grad_norm": 0.08863788843154907, "learning_rate": 9.904245527742083e-06, "loss": 0.5516306161880493, "step": 8411 }, { "epoch": 1.554330126687531, "grad_norm": 0.10298895090818405, "learning_rate": 9.902250705794e-06, "loss": 0.5250436067581177, "step": 8412 }, { "epoch": 1.5545149033964267, "grad_norm": 0.07118190824985504, "learning_rate": 9.900255887736036e-06, "loss": 0.47028887271881104, "step": 8413 }, { "epoch": 1.5546996801053228, "grad_norm": 0.08338896185159683, "learning_rate": 9.89826107364757e-06, "loss": 0.4545728862285614, "step": 8414 }, { "epoch": 1.5548844568142186, "grad_norm": 0.09131994098424911, "learning_rate": 9.896266263607996e-06, "loss": 0.5806403160095215, "step": 8415 }, { "epoch": 1.5550692335231144, "grad_norm": 0.07578743249177933, "learning_rate": 9.894271457696703e-06, "loss": 0.5110961198806763, "step": 8416 }, { "epoch": 1.5552540102320103, "grad_norm": 0.12886503338813782, "learning_rate": 9.892276655993073e-06, "loss": 0.5968006253242493, "step": 8417 }, { "epoch": 1.555438786940906, "grad_norm": 0.07970307767391205, "learning_rate": 9.890281858576494e-06, "loss": 0.5507674217224121, "step": 8418 }, { "epoch": 1.555623563649802, "grad_norm": 0.07072608172893524, "learning_rate": 9.888287065526358e-06, "loss": 0.4310251772403717, "step": 8419 }, { "epoch": 1.5558083403586978, "grad_norm": 0.061415500938892365, "learning_rate": 9.886292276922044e-06, "loss": 0.36818867921829224, "step": 8420 }, { "epoch": 1.5559931170675936, "grad_norm": 0.090281181037426, "learning_rate": 9.884297492842944e-06, "loss": 0.49106040596961975, "step": 8421 }, { "epoch": 1.5561778937764896, "grad_norm": 0.06825218349695206, "learning_rate": 9.882302713368438e-06, "loss": 0.5302210450172424, "step": 8422 }, { "epoch": 1.5563626704853855, "grad_norm": 0.07292143255472183, "learning_rate": 9.880307938577917e-06, "loss": 0.4451478123664856, "step": 8423 }, { "epoch": 1.5565474471942813, "grad_norm": 0.07285430282354355, "learning_rate": 9.878313168550768e-06, "loss": 0.4047282338142395, "step": 8424 }, { "epoch": 1.5567322239031771, "grad_norm": 0.07194212079048157, "learning_rate": 9.876318403366371e-06, "loss": 0.37185072898864746, "step": 8425 }, { "epoch": 1.556917000612073, "grad_norm": 0.06978459656238556, "learning_rate": 9.874323643104116e-06, "loss": 0.46612977981567383, "step": 8426 }, { "epoch": 1.5571017773209688, "grad_norm": 0.076767697930336, "learning_rate": 9.872328887843391e-06, "loss": 0.49881860613822937, "step": 8427 }, { "epoch": 1.5572865540298646, "grad_norm": 0.06601028889417648, "learning_rate": 9.87033413766357e-06, "loss": 0.4231835901737213, "step": 8428 }, { "epoch": 1.5574713307387604, "grad_norm": 0.07401569187641144, "learning_rate": 9.868339392644046e-06, "loss": 0.5855440497398376, "step": 8429 }, { "epoch": 1.5576561074476563, "grad_norm": 0.0813252180814743, "learning_rate": 9.866344652864208e-06, "loss": 0.593075692653656, "step": 8430 }, { "epoch": 1.557840884156552, "grad_norm": 0.07243362069129944, "learning_rate": 9.864349918403427e-06, "loss": 0.5803065896034241, "step": 8431 }, { "epoch": 1.558025660865448, "grad_norm": 0.11118949949741364, "learning_rate": 9.862355189341097e-06, "loss": 0.6703124642372131, "step": 8432 }, { "epoch": 1.5582104375743437, "grad_norm": 0.07939611375331879, "learning_rate": 9.860360465756606e-06, "loss": 0.6099793314933777, "step": 8433 }, { "epoch": 1.5583952142832396, "grad_norm": 0.08516814559698105, "learning_rate": 9.858365747729325e-06, "loss": 0.6696739196777344, "step": 8434 }, { "epoch": 1.5585799909921354, "grad_norm": 0.07504752278327942, "learning_rate": 9.856371035338641e-06, "loss": 0.5141589045524597, "step": 8435 }, { "epoch": 1.5587647677010312, "grad_norm": 0.07196090370416641, "learning_rate": 9.85437632866395e-06, "loss": 0.540097177028656, "step": 8436 }, { "epoch": 1.558949544409927, "grad_norm": 0.06923947483301163, "learning_rate": 9.85238162778462e-06, "loss": 0.40927737951278687, "step": 8437 }, { "epoch": 1.5591343211188229, "grad_norm": 0.07277125865221024, "learning_rate": 9.850386932780042e-06, "loss": 0.45931127667427063, "step": 8438 }, { "epoch": 1.5593190978277187, "grad_norm": 0.09042638540267944, "learning_rate": 9.848392243729594e-06, "loss": 0.6475035548210144, "step": 8439 }, { "epoch": 1.5595038745366145, "grad_norm": 0.07516621053218842, "learning_rate": 9.846397560712658e-06, "loss": 0.47815361618995667, "step": 8440 }, { "epoch": 1.5596886512455104, "grad_norm": 0.06490602344274521, "learning_rate": 9.844402883808623e-06, "loss": 0.40921348333358765, "step": 8441 }, { "epoch": 1.5598734279544062, "grad_norm": 0.08869045972824097, "learning_rate": 9.842408213096863e-06, "loss": 0.6063772439956665, "step": 8442 }, { "epoch": 1.560058204663302, "grad_norm": 0.08136522024869919, "learning_rate": 9.840413548656764e-06, "loss": 0.5187150239944458, "step": 8443 }, { "epoch": 1.560242981372198, "grad_norm": 0.07281823456287384, "learning_rate": 9.83841889056771e-06, "loss": 0.580685019493103, "step": 8444 }, { "epoch": 1.560427758081094, "grad_norm": 0.06910470873117447, "learning_rate": 9.836424238909073e-06, "loss": 0.4532964825630188, "step": 8445 }, { "epoch": 1.5606125347899897, "grad_norm": 0.06782284379005432, "learning_rate": 9.834429593760241e-06, "loss": 0.47977548837661743, "step": 8446 }, { "epoch": 1.5607973114988856, "grad_norm": 0.0937873125076294, "learning_rate": 9.832434955200597e-06, "loss": 0.6324499249458313, "step": 8447 }, { "epoch": 1.5609820882077814, "grad_norm": 0.08188667893409729, "learning_rate": 9.830440323309514e-06, "loss": 0.5060011148452759, "step": 8448 }, { "epoch": 1.5611668649166772, "grad_norm": 0.09644730389118195, "learning_rate": 9.828445698166375e-06, "loss": 0.6682599186897278, "step": 8449 }, { "epoch": 1.561351641625573, "grad_norm": 0.0819346234202385, "learning_rate": 9.826451079850566e-06, "loss": 0.48520559072494507, "step": 8450 }, { "epoch": 1.561536418334469, "grad_norm": 0.07754965871572495, "learning_rate": 9.824456468441455e-06, "loss": 0.3911689817905426, "step": 8451 }, { "epoch": 1.561721195043365, "grad_norm": 0.07915280759334564, "learning_rate": 9.822461864018427e-06, "loss": 0.5869355797767639, "step": 8452 }, { "epoch": 1.5619059717522608, "grad_norm": 0.09538932144641876, "learning_rate": 9.820467266660868e-06, "loss": 0.7775084376335144, "step": 8453 }, { "epoch": 1.5620907484611566, "grad_norm": 0.06357314437627792, "learning_rate": 9.818472676448144e-06, "loss": 0.42435356974601746, "step": 8454 }, { "epoch": 1.5622755251700524, "grad_norm": 0.06316125392913818, "learning_rate": 9.816478093459643e-06, "loss": 0.3821038007736206, "step": 8455 }, { "epoch": 1.5624603018789482, "grad_norm": 0.07353564351797104, "learning_rate": 9.814483517774738e-06, "loss": 0.5012965202331543, "step": 8456 }, { "epoch": 1.562645078587844, "grad_norm": 0.07389863580465317, "learning_rate": 9.812488949472809e-06, "loss": 0.4186929762363434, "step": 8457 }, { "epoch": 1.56282985529674, "grad_norm": 0.06745057553052902, "learning_rate": 9.810494388633233e-06, "loss": 0.44096508622169495, "step": 8458 }, { "epoch": 1.5630146320056357, "grad_norm": 0.07269903272390366, "learning_rate": 9.808499835335387e-06, "loss": 0.5246237516403198, "step": 8459 }, { "epoch": 1.5631994087145316, "grad_norm": 0.08972185850143433, "learning_rate": 9.806505289658648e-06, "loss": 0.5000263452529907, "step": 8460 }, { "epoch": 1.5633841854234274, "grad_norm": 0.1067720502614975, "learning_rate": 9.804510751682394e-06, "loss": 0.7220219373703003, "step": 8461 }, { "epoch": 1.5635689621323232, "grad_norm": 0.06829439848661423, "learning_rate": 9.802516221486e-06, "loss": 0.3715435266494751, "step": 8462 }, { "epoch": 1.563753738841219, "grad_norm": 0.09388057887554169, "learning_rate": 9.800521699148843e-06, "loss": 0.63585364818573, "step": 8463 }, { "epoch": 1.5639385155501149, "grad_norm": 0.07907140254974365, "learning_rate": 9.7985271847503e-06, "loss": 0.5614437460899353, "step": 8464 }, { "epoch": 1.5641232922590107, "grad_norm": 0.07540346682071686, "learning_rate": 9.796532678369742e-06, "loss": 0.5172854661941528, "step": 8465 }, { "epoch": 1.5643080689679065, "grad_norm": 0.05784687027335167, "learning_rate": 9.794538180086546e-06, "loss": 0.3279561698436737, "step": 8466 }, { "epoch": 1.5644928456768024, "grad_norm": 0.08259300887584686, "learning_rate": 9.79254368998009e-06, "loss": 0.6335062384605408, "step": 8467 }, { "epoch": 1.5646776223856982, "grad_norm": 0.08102997392416, "learning_rate": 9.790549208129745e-06, "loss": 0.45309337973594666, "step": 8468 }, { "epoch": 1.564862399094594, "grad_norm": 0.09200865030288696, "learning_rate": 9.788554734614891e-06, "loss": 0.6316246390342712, "step": 8469 }, { "epoch": 1.5650471758034898, "grad_norm": 0.0958419144153595, "learning_rate": 9.78656026951489e-06, "loss": 0.7225764393806458, "step": 8470 }, { "epoch": 1.5652319525123857, "grad_norm": 0.06633394211530685, "learning_rate": 9.784565812909124e-06, "loss": 0.5134775042533875, "step": 8471 }, { "epoch": 1.5654167292212815, "grad_norm": 0.08685419708490372, "learning_rate": 9.78257136487697e-06, "loss": 0.46518373489379883, "step": 8472 }, { "epoch": 1.5656015059301776, "grad_norm": 0.06596294045448303, "learning_rate": 9.780576925497789e-06, "loss": 0.3888777196407318, "step": 8473 }, { "epoch": 1.5657862826390734, "grad_norm": 0.07480762898921967, "learning_rate": 9.778582494850962e-06, "loss": 0.4374832808971405, "step": 8474 }, { "epoch": 1.5659710593479692, "grad_norm": 0.07130126655101776, "learning_rate": 9.77658807301586e-06, "loss": 0.4217027425765991, "step": 8475 }, { "epoch": 1.566155836056865, "grad_norm": 0.07441869378089905, "learning_rate": 9.774593660071853e-06, "loss": 0.5987619161605835, "step": 8476 }, { "epoch": 1.5663406127657609, "grad_norm": 0.08373252302408218, "learning_rate": 9.772599256098312e-06, "loss": 0.6301749348640442, "step": 8477 }, { "epoch": 1.5665253894746567, "grad_norm": 0.06343624740839005, "learning_rate": 9.770604861174611e-06, "loss": 0.3775671422481537, "step": 8478 }, { "epoch": 1.5667101661835525, "grad_norm": 0.06141326576471329, "learning_rate": 9.768610475380117e-06, "loss": 0.3434855341911316, "step": 8479 }, { "epoch": 1.5668949428924486, "grad_norm": 0.08183632045984268, "learning_rate": 9.766616098794202e-06, "loss": 0.48533132672309875, "step": 8480 }, { "epoch": 1.5670797196013444, "grad_norm": 0.0791330561041832, "learning_rate": 9.764621731496239e-06, "loss": 0.4955023229122162, "step": 8481 }, { "epoch": 1.5672644963102402, "grad_norm": 0.07928652316331863, "learning_rate": 9.762627373565591e-06, "loss": 0.49078071117401123, "step": 8482 }, { "epoch": 1.567449273019136, "grad_norm": 0.0764070600271225, "learning_rate": 9.760633025081633e-06, "loss": 0.5474598407745361, "step": 8483 }, { "epoch": 1.5676340497280319, "grad_norm": 0.08049654215574265, "learning_rate": 9.758638686123732e-06, "loss": 0.5398207902908325, "step": 8484 }, { "epoch": 1.5678188264369277, "grad_norm": 0.061328765004873276, "learning_rate": 9.756644356771256e-06, "loss": 0.3806958794593811, "step": 8485 }, { "epoch": 1.5680036031458235, "grad_norm": 0.08577550202608109, "learning_rate": 9.754650037103577e-06, "loss": 0.6493745446205139, "step": 8486 }, { "epoch": 1.5681883798547194, "grad_norm": 0.07504302263259888, "learning_rate": 9.752655727200051e-06, "loss": 0.5318300127983093, "step": 8487 }, { "epoch": 1.5683731565636152, "grad_norm": 0.08011891692876816, "learning_rate": 9.750661427140057e-06, "loss": 0.5619838237762451, "step": 8488 }, { "epoch": 1.568557933272511, "grad_norm": 0.06651551276445389, "learning_rate": 9.748667137002961e-06, "loss": 0.4472563564777374, "step": 8489 }, { "epoch": 1.5687427099814069, "grad_norm": 0.06522566080093384, "learning_rate": 9.746672856868124e-06, "loss": 0.3749229311943054, "step": 8490 }, { "epoch": 1.5689274866903027, "grad_norm": 0.06759969890117645, "learning_rate": 9.74467858681491e-06, "loss": 0.45194461941719055, "step": 8491 }, { "epoch": 1.5691122633991985, "grad_norm": 0.08250371366739273, "learning_rate": 9.742684326922698e-06, "loss": 0.5971841216087341, "step": 8492 }, { "epoch": 1.5692970401080943, "grad_norm": 0.06295836716890335, "learning_rate": 9.74069007727084e-06, "loss": 0.5253193974494934, "step": 8493 }, { "epoch": 1.5694818168169902, "grad_norm": 0.07202901691198349, "learning_rate": 9.738695837938707e-06, "loss": 0.3833845555782318, "step": 8494 }, { "epoch": 1.569666593525886, "grad_norm": 0.06376456469297409, "learning_rate": 9.736701609005661e-06, "loss": 0.42644819617271423, "step": 8495 }, { "epoch": 1.5698513702347818, "grad_norm": 0.09234484285116196, "learning_rate": 9.734707390551069e-06, "loss": 0.6424220204353333, "step": 8496 }, { "epoch": 1.5700361469436777, "grad_norm": 0.08507819473743439, "learning_rate": 9.73271318265429e-06, "loss": 0.5291893482208252, "step": 8497 }, { "epoch": 1.5702209236525735, "grad_norm": 0.07942351698875427, "learning_rate": 9.730718985394692e-06, "loss": 0.4431830942630768, "step": 8498 }, { "epoch": 1.5704057003614693, "grad_norm": 0.06535894423723221, "learning_rate": 9.728724798851636e-06, "loss": 0.3336215615272522, "step": 8499 }, { "epoch": 1.5705904770703651, "grad_norm": 0.061320461332798004, "learning_rate": 9.726730623104482e-06, "loss": 0.36527639627456665, "step": 8500 }, { "epoch": 1.5705904770703651, "eval_loss": 0.574783444404602, "eval_runtime": 277.0199, "eval_samples_per_second": 65.804, "eval_steps_per_second": 8.227, "step": 8500 }, { "epoch": 1.570775253779261, "grad_norm": 0.095549575984478, "learning_rate": 9.724736458232597e-06, "loss": 0.6905973553657532, "step": 8501 }, { "epoch": 1.570960030488157, "grad_norm": 0.08161711692810059, "learning_rate": 9.722742304315339e-06, "loss": 0.49852216243743896, "step": 8502 }, { "epoch": 1.5711448071970529, "grad_norm": 0.08168318122625351, "learning_rate": 9.72074816143207e-06, "loss": 0.5491087436676025, "step": 8503 }, { "epoch": 1.5713295839059487, "grad_norm": 0.061127275228500366, "learning_rate": 9.71875402966215e-06, "loss": 0.401092529296875, "step": 8504 }, { "epoch": 1.5715143606148445, "grad_norm": 0.08369404822587967, "learning_rate": 9.716759909084939e-06, "loss": 0.5485149025917053, "step": 8505 }, { "epoch": 1.5716991373237403, "grad_norm": 0.09047067910432816, "learning_rate": 9.714765799779803e-06, "loss": 0.6157129406929016, "step": 8506 }, { "epoch": 1.5718839140326362, "grad_norm": 0.07468681037425995, "learning_rate": 9.712771701826088e-06, "loss": 0.4496452212333679, "step": 8507 }, { "epoch": 1.572068690741532, "grad_norm": 0.06309043616056442, "learning_rate": 9.710777615303163e-06, "loss": 0.33331549167633057, "step": 8508 }, { "epoch": 1.5722534674504278, "grad_norm": 0.08015831559896469, "learning_rate": 9.708783540290388e-06, "loss": 0.5962845683097839, "step": 8509 }, { "epoch": 1.5724382441593239, "grad_norm": 0.06803075969219208, "learning_rate": 9.706789476867115e-06, "loss": 0.5292885303497314, "step": 8510 }, { "epoch": 1.5726230208682197, "grad_norm": 0.08438073098659515, "learning_rate": 9.704795425112699e-06, "loss": 0.5161520838737488, "step": 8511 }, { "epoch": 1.5728077975771155, "grad_norm": 0.0854136198759079, "learning_rate": 9.702801385106508e-06, "loss": 0.5688244700431824, "step": 8512 }, { "epoch": 1.5729925742860114, "grad_norm": 0.0847952589392662, "learning_rate": 9.700807356927888e-06, "loss": 0.4528445601463318, "step": 8513 }, { "epoch": 1.5731773509949072, "grad_norm": 0.06744641810655594, "learning_rate": 9.698813340656199e-06, "loss": 0.3817410171031952, "step": 8514 }, { "epoch": 1.573362127703803, "grad_norm": 0.08746075630187988, "learning_rate": 9.6968193363708e-06, "loss": 0.5919474363327026, "step": 8515 }, { "epoch": 1.5735469044126988, "grad_norm": 0.08473276346921921, "learning_rate": 9.694825344151039e-06, "loss": 0.6052566766738892, "step": 8516 }, { "epoch": 1.5737316811215947, "grad_norm": 0.07219505310058594, "learning_rate": 9.692831364076277e-06, "loss": 0.40844255685806274, "step": 8517 }, { "epoch": 1.5739164578304905, "grad_norm": 0.0649571493268013, "learning_rate": 9.690837396225867e-06, "loss": 0.5000103712081909, "step": 8518 }, { "epoch": 1.5741012345393863, "grad_norm": 0.061946313828229904, "learning_rate": 9.688843440679157e-06, "loss": 0.39478957653045654, "step": 8519 }, { "epoch": 1.5742860112482822, "grad_norm": 0.07370550185441971, "learning_rate": 9.686849497515509e-06, "loss": 0.4202343225479126, "step": 8520 }, { "epoch": 1.574470787957178, "grad_norm": 0.07990618795156479, "learning_rate": 9.684855566814268e-06, "loss": 0.594482421875, "step": 8521 }, { "epoch": 1.5746555646660738, "grad_norm": 0.07135336846113205, "learning_rate": 9.68286164865479e-06, "loss": 0.387400358915329, "step": 8522 }, { "epoch": 1.5748403413749696, "grad_norm": 0.06443370878696442, "learning_rate": 9.680867743116428e-06, "loss": 0.3974674642086029, "step": 8523 }, { "epoch": 1.5750251180838655, "grad_norm": 0.07257106900215149, "learning_rate": 9.67887385027853e-06, "loss": 0.3580196797847748, "step": 8524 }, { "epoch": 1.5752098947927613, "grad_norm": 0.08325569331645966, "learning_rate": 9.676879970220447e-06, "loss": 0.5409526228904724, "step": 8525 }, { "epoch": 1.5753946715016571, "grad_norm": 0.06914255023002625, "learning_rate": 9.674886103021535e-06, "loss": 0.4544118642807007, "step": 8526 }, { "epoch": 1.575579448210553, "grad_norm": 0.08010513335466385, "learning_rate": 9.672892248761134e-06, "loss": 0.5300863981246948, "step": 8527 }, { "epoch": 1.5757642249194488, "grad_norm": 0.0959707498550415, "learning_rate": 9.670898407518598e-06, "loss": 0.5134875178337097, "step": 8528 }, { "epoch": 1.5759490016283446, "grad_norm": 0.07733378559350967, "learning_rate": 9.668904579373281e-06, "loss": 0.42300963401794434, "step": 8529 }, { "epoch": 1.5761337783372404, "grad_norm": 0.06547591835260391, "learning_rate": 9.666910764404522e-06, "loss": 0.3899762034416199, "step": 8530 }, { "epoch": 1.5763185550461363, "grad_norm": 0.0907437801361084, "learning_rate": 9.66491696269167e-06, "loss": 0.6079534888267517, "step": 8531 }, { "epoch": 1.5765033317550323, "grad_norm": 0.08068032562732697, "learning_rate": 9.662923174314081e-06, "loss": 0.5906936526298523, "step": 8532 }, { "epoch": 1.5766881084639282, "grad_norm": 0.07635143399238586, "learning_rate": 9.66092939935109e-06, "loss": 0.47133001685142517, "step": 8533 }, { "epoch": 1.576872885172824, "grad_norm": 0.08321890980005264, "learning_rate": 9.658935637882051e-06, "loss": 0.43831631541252136, "step": 8534 }, { "epoch": 1.5770576618817198, "grad_norm": 0.08589746803045273, "learning_rate": 9.656941889986304e-06, "loss": 0.5517308712005615, "step": 8535 }, { "epoch": 1.5772424385906156, "grad_norm": 0.08684679120779037, "learning_rate": 9.654948155743197e-06, "loss": 0.5462250113487244, "step": 8536 }, { "epoch": 1.5774272152995115, "grad_norm": 0.07754339277744293, "learning_rate": 9.652954435232076e-06, "loss": 0.47365671396255493, "step": 8537 }, { "epoch": 1.5776119920084073, "grad_norm": 0.0754380151629448, "learning_rate": 9.650960728532281e-06, "loss": 0.5568907856941223, "step": 8538 }, { "epoch": 1.5777967687173033, "grad_norm": 0.0809510350227356, "learning_rate": 9.648967035723155e-06, "loss": 0.6057025790214539, "step": 8539 }, { "epoch": 1.5779815454261992, "grad_norm": 0.06684159487485886, "learning_rate": 9.646973356884048e-06, "loss": 0.45625466108322144, "step": 8540 }, { "epoch": 1.578166322135095, "grad_norm": 0.08466535061597824, "learning_rate": 9.644979692094291e-06, "loss": 0.5078467726707458, "step": 8541 }, { "epoch": 1.5783510988439908, "grad_norm": 0.07972890883684158, "learning_rate": 9.642986041433234e-06, "loss": 0.534195065498352, "step": 8542 }, { "epoch": 1.5785358755528867, "grad_norm": 0.06992193311452866, "learning_rate": 9.64099240498022e-06, "loss": 0.3801504671573639, "step": 8543 }, { "epoch": 1.5787206522617825, "grad_norm": 0.09189796447753906, "learning_rate": 9.638998782814578e-06, "loss": 0.8108822107315063, "step": 8544 }, { "epoch": 1.5789054289706783, "grad_norm": 0.08371283113956451, "learning_rate": 9.637005175015658e-06, "loss": 0.606974184513092, "step": 8545 }, { "epoch": 1.5790902056795741, "grad_norm": 0.08414312452077866, "learning_rate": 9.6350115816628e-06, "loss": 0.5521185398101807, "step": 8546 }, { "epoch": 1.57927498238847, "grad_norm": 0.0676870197057724, "learning_rate": 9.633018002835331e-06, "loss": 0.45219096541404724, "step": 8547 }, { "epoch": 1.5794597590973658, "grad_norm": 0.07192827016115189, "learning_rate": 9.631024438612602e-06, "loss": 0.5052711963653564, "step": 8548 }, { "epoch": 1.5796445358062616, "grad_norm": 0.05874146893620491, "learning_rate": 9.62903088907395e-06, "loss": 0.4122426211833954, "step": 8549 }, { "epoch": 1.5798293125151575, "grad_norm": 0.07684757560491562, "learning_rate": 9.627037354298702e-06, "loss": 0.4595337212085724, "step": 8550 }, { "epoch": 1.5800140892240533, "grad_norm": 0.08987244218587875, "learning_rate": 9.625043834366204e-06, "loss": 0.7718538641929626, "step": 8551 }, { "epoch": 1.5801988659329491, "grad_norm": 0.054467007517814636, "learning_rate": 9.623050329355786e-06, "loss": 0.3730161488056183, "step": 8552 }, { "epoch": 1.580383642641845, "grad_norm": 0.09580789506435394, "learning_rate": 9.621056839346785e-06, "loss": 0.6520174741744995, "step": 8553 }, { "epoch": 1.5805684193507408, "grad_norm": 0.07195496559143066, "learning_rate": 9.619063364418539e-06, "loss": 0.5536537170410156, "step": 8554 }, { "epoch": 1.5807531960596366, "grad_norm": 0.07392167299985886, "learning_rate": 9.617069904650378e-06, "loss": 0.5084531903266907, "step": 8555 }, { "epoch": 1.5809379727685324, "grad_norm": 0.07539352029561996, "learning_rate": 9.615076460121636e-06, "loss": 0.5134398341178894, "step": 8556 }, { "epoch": 1.5811227494774283, "grad_norm": 0.0947013720870018, "learning_rate": 9.613083030911647e-06, "loss": 0.6003947854042053, "step": 8557 }, { "epoch": 1.581307526186324, "grad_norm": 0.08073306828737259, "learning_rate": 9.611089617099743e-06, "loss": 0.5335020422935486, "step": 8558 }, { "epoch": 1.58149230289522, "grad_norm": 0.06273633986711502, "learning_rate": 9.609096218765254e-06, "loss": 0.4417981207370758, "step": 8559 }, { "epoch": 1.5816770796041157, "grad_norm": 0.08009282499551773, "learning_rate": 9.607102835987516e-06, "loss": 0.6260141134262085, "step": 8560 }, { "epoch": 1.5818618563130118, "grad_norm": 0.07559967786073685, "learning_rate": 9.605109468845854e-06, "loss": 0.43849584460258484, "step": 8561 }, { "epoch": 1.5820466330219076, "grad_norm": 0.0853399932384491, "learning_rate": 9.603116117419597e-06, "loss": 0.4622129797935486, "step": 8562 }, { "epoch": 1.5822314097308035, "grad_norm": 0.05605161562561989, "learning_rate": 9.601122781788082e-06, "loss": 0.31195148825645447, "step": 8563 }, { "epoch": 1.5824161864396993, "grad_norm": 0.05759282410144806, "learning_rate": 9.599129462030628e-06, "loss": 0.4043887257575989, "step": 8564 }, { "epoch": 1.582600963148595, "grad_norm": 0.08678488433361053, "learning_rate": 9.59713615822657e-06, "loss": 0.5891516804695129, "step": 8565 }, { "epoch": 1.582785739857491, "grad_norm": 0.06864611059427261, "learning_rate": 9.595142870455233e-06, "loss": 0.48909685015678406, "step": 8566 }, { "epoch": 1.5829705165663868, "grad_norm": 0.063519187271595, "learning_rate": 9.59314959879594e-06, "loss": 0.42133191227912903, "step": 8567 }, { "epoch": 1.5831552932752828, "grad_norm": 0.07372412085533142, "learning_rate": 9.591156343328026e-06, "loss": 0.49911460280418396, "step": 8568 }, { "epoch": 1.5833400699841786, "grad_norm": 0.08856049925088882, "learning_rate": 9.589163104130804e-06, "loss": 0.5521577596664429, "step": 8569 }, { "epoch": 1.5835248466930745, "grad_norm": 0.1006108745932579, "learning_rate": 9.587169881283606e-06, "loss": 0.6770339012145996, "step": 8570 }, { "epoch": 1.5837096234019703, "grad_norm": 0.07106732577085495, "learning_rate": 9.58517667486576e-06, "loss": 0.49186861515045166, "step": 8571 }, { "epoch": 1.5838944001108661, "grad_norm": 0.06963815540075302, "learning_rate": 9.583183484956578e-06, "loss": 0.4227297008037567, "step": 8572 }, { "epoch": 1.584079176819762, "grad_norm": 0.11049696803092957, "learning_rate": 9.581190311635392e-06, "loss": 0.7336570024490356, "step": 8573 }, { "epoch": 1.5842639535286578, "grad_norm": 0.060407184064388275, "learning_rate": 9.579197154981523e-06, "loss": 0.4391293525695801, "step": 8574 }, { "epoch": 1.5844487302375536, "grad_norm": 0.06626936793327332, "learning_rate": 9.57720401507429e-06, "loss": 0.4679132401943207, "step": 8575 }, { "epoch": 1.5846335069464494, "grad_norm": 0.0940513014793396, "learning_rate": 9.575210891993012e-06, "loss": 0.6576391458511353, "step": 8576 }, { "epoch": 1.5848182836553453, "grad_norm": 0.08608409017324448, "learning_rate": 9.573217785817014e-06, "loss": 0.7329196929931641, "step": 8577 }, { "epoch": 1.585003060364241, "grad_norm": 0.05827013775706291, "learning_rate": 9.571224696625612e-06, "loss": 0.3933447301387787, "step": 8578 }, { "epoch": 1.585187837073137, "grad_norm": 0.07204660028219223, "learning_rate": 9.569231624498125e-06, "loss": 0.47026368975639343, "step": 8579 }, { "epoch": 1.5853726137820328, "grad_norm": 0.07245006412267685, "learning_rate": 9.567238569513872e-06, "loss": 0.4619942605495453, "step": 8580 }, { "epoch": 1.5855573904909286, "grad_norm": 0.08662433177232742, "learning_rate": 9.56524553175217e-06, "loss": 0.6254631280899048, "step": 8581 }, { "epoch": 1.5857421671998244, "grad_norm": 0.07138058543205261, "learning_rate": 9.563252511292335e-06, "loss": 0.5061289072036743, "step": 8582 }, { "epoch": 1.5859269439087202, "grad_norm": 0.0875978097319603, "learning_rate": 9.561259508213687e-06, "loss": 0.6061347723007202, "step": 8583 }, { "epoch": 1.586111720617616, "grad_norm": 0.07914415001869202, "learning_rate": 9.559266522595534e-06, "loss": 0.5068250894546509, "step": 8584 }, { "epoch": 1.586296497326512, "grad_norm": 0.07915471494197845, "learning_rate": 9.5572735545172e-06, "loss": 0.5223965048789978, "step": 8585 }, { "epoch": 1.5864812740354077, "grad_norm": 0.0746086835861206, "learning_rate": 9.555280604057989e-06, "loss": 0.4711228311061859, "step": 8586 }, { "epoch": 1.5866660507443036, "grad_norm": 0.07825423032045364, "learning_rate": 9.553287671297216e-06, "loss": 0.5604469776153564, "step": 8587 }, { "epoch": 1.5868508274531994, "grad_norm": 0.08418682962656021, "learning_rate": 9.551294756314202e-06, "loss": 0.6489458680152893, "step": 8588 }, { "epoch": 1.5870356041620952, "grad_norm": 0.0813329666852951, "learning_rate": 9.54930185918825e-06, "loss": 0.5974125266075134, "step": 8589 }, { "epoch": 1.5872203808709913, "grad_norm": 0.08046593517065048, "learning_rate": 9.547308979998673e-06, "loss": 0.5678706169128418, "step": 8590 }, { "epoch": 1.587405157579887, "grad_norm": 0.06909194588661194, "learning_rate": 9.545316118824784e-06, "loss": 0.46457499265670776, "step": 8591 }, { "epoch": 1.587589934288783, "grad_norm": 0.09298303723335266, "learning_rate": 9.543323275745891e-06, "loss": 0.6481351852416992, "step": 8592 }, { "epoch": 1.5877747109976788, "grad_norm": 0.09103282541036606, "learning_rate": 9.5413304508413e-06, "loss": 0.5711163282394409, "step": 8593 }, { "epoch": 1.5879594877065746, "grad_norm": 0.08446116000413895, "learning_rate": 9.539337644190327e-06, "loss": 0.6023228168487549, "step": 8594 }, { "epoch": 1.5881442644154704, "grad_norm": 0.07669594138860703, "learning_rate": 9.537344855872271e-06, "loss": 0.5727745890617371, "step": 8595 }, { "epoch": 1.5883290411243662, "grad_norm": 0.05845790356397629, "learning_rate": 9.535352085966442e-06, "loss": 0.3232874274253845, "step": 8596 }, { "epoch": 1.588513817833262, "grad_norm": 0.06394659727811813, "learning_rate": 9.533359334552148e-06, "loss": 0.42282405495643616, "step": 8597 }, { "epoch": 1.5886985945421581, "grad_norm": 0.08589275926351547, "learning_rate": 9.53136660170869e-06, "loss": 0.5387320518493652, "step": 8598 }, { "epoch": 1.588883371251054, "grad_norm": 0.058079712092876434, "learning_rate": 9.52937388751538e-06, "loss": 0.3864925801753998, "step": 8599 }, { "epoch": 1.5890681479599498, "grad_norm": 0.0662282183766365, "learning_rate": 9.527381192051513e-06, "loss": 0.3943120539188385, "step": 8600 }, { "epoch": 1.5892529246688456, "grad_norm": 0.07138784229755402, "learning_rate": 9.525388515396395e-06, "loss": 0.546523928642273, "step": 8601 }, { "epoch": 1.5894377013777414, "grad_norm": 0.0852469652891159, "learning_rate": 9.523395857629335e-06, "loss": 0.587976336479187, "step": 8602 }, { "epoch": 1.5896224780866373, "grad_norm": 0.08037744462490082, "learning_rate": 9.521403218829622e-06, "loss": 0.5629453659057617, "step": 8603 }, { "epoch": 1.589807254795533, "grad_norm": 0.09018945693969727, "learning_rate": 9.519410599076566e-06, "loss": 0.602557897567749, "step": 8604 }, { "epoch": 1.589992031504429, "grad_norm": 0.08428955078125, "learning_rate": 9.517417998449468e-06, "loss": 0.4888676106929779, "step": 8605 }, { "epoch": 1.5901768082133247, "grad_norm": 0.07636924088001251, "learning_rate": 9.515425417027619e-06, "loss": 0.4932645261287689, "step": 8606 }, { "epoch": 1.5903615849222206, "grad_norm": 0.06713202595710754, "learning_rate": 9.513432854890322e-06, "loss": 0.4275995194911957, "step": 8607 }, { "epoch": 1.5905463616311164, "grad_norm": 0.07278622686862946, "learning_rate": 9.51144031211688e-06, "loss": 0.45914706587791443, "step": 8608 }, { "epoch": 1.5907311383400122, "grad_norm": 0.07968050241470337, "learning_rate": 9.509447788786582e-06, "loss": 0.5035563111305237, "step": 8609 }, { "epoch": 1.590915915048908, "grad_norm": 0.09256451576948166, "learning_rate": 9.507455284978728e-06, "loss": 0.6316693425178528, "step": 8610 }, { "epoch": 1.5911006917578039, "grad_norm": 0.07208509743213654, "learning_rate": 9.505462800772612e-06, "loss": 0.46981462836265564, "step": 8611 }, { "epoch": 1.5912854684666997, "grad_norm": 0.09786707162857056, "learning_rate": 9.503470336247529e-06, "loss": 0.6726077198982239, "step": 8612 }, { "epoch": 1.5914702451755955, "grad_norm": 0.053754352033138275, "learning_rate": 9.501477891482774e-06, "loss": 0.3265490233898163, "step": 8613 }, { "epoch": 1.5916550218844914, "grad_norm": 0.09386641532182693, "learning_rate": 9.49948546655764e-06, "loss": 0.5951492786407471, "step": 8614 }, { "epoch": 1.5918397985933872, "grad_norm": 0.0635455846786499, "learning_rate": 9.497493061551415e-06, "loss": 0.3247123062610626, "step": 8615 }, { "epoch": 1.592024575302283, "grad_norm": 0.09346818923950195, "learning_rate": 9.495500676543398e-06, "loss": 0.7429558634757996, "step": 8616 }, { "epoch": 1.5922093520111789, "grad_norm": 0.06018964573740959, "learning_rate": 9.493508311612874e-06, "loss": 0.3677816092967987, "step": 8617 }, { "epoch": 1.5923941287200747, "grad_norm": 0.06864192336797714, "learning_rate": 9.491515966839134e-06, "loss": 0.45008015632629395, "step": 8618 }, { "epoch": 1.5925789054289705, "grad_norm": 0.06789940595626831, "learning_rate": 9.48952364230147e-06, "loss": 0.43501466512680054, "step": 8619 }, { "epoch": 1.5927636821378666, "grad_norm": 0.08567140996456146, "learning_rate": 9.487531338079166e-06, "loss": 0.5239092707633972, "step": 8620 }, { "epoch": 1.5929484588467624, "grad_norm": 0.09160125255584717, "learning_rate": 9.48553905425151e-06, "loss": 0.6530731320381165, "step": 8621 }, { "epoch": 1.5931332355556582, "grad_norm": 0.07891631871461868, "learning_rate": 9.483546790897796e-06, "loss": 0.5528213977813721, "step": 8622 }, { "epoch": 1.593318012264554, "grad_norm": 0.056103698909282684, "learning_rate": 9.481554548097297e-06, "loss": 0.2905942499637604, "step": 8623 }, { "epoch": 1.5935027889734499, "grad_norm": 0.08046253770589828, "learning_rate": 9.479562325929307e-06, "loss": 0.49072909355163574, "step": 8624 }, { "epoch": 1.5936875656823457, "grad_norm": 0.07400275021791458, "learning_rate": 9.477570124473113e-06, "loss": 0.511428952217102, "step": 8625 }, { "epoch": 1.5938723423912415, "grad_norm": 0.06470328569412231, "learning_rate": 9.47557794380799e-06, "loss": 0.48424461483955383, "step": 8626 }, { "epoch": 1.5940571191001376, "grad_norm": 0.0823003426194191, "learning_rate": 9.473585784013219e-06, "loss": 0.4720412790775299, "step": 8627 }, { "epoch": 1.5942418958090334, "grad_norm": 0.0525951124727726, "learning_rate": 9.471593645168096e-06, "loss": 0.3639041483402252, "step": 8628 }, { "epoch": 1.5944266725179292, "grad_norm": 0.05389215424656868, "learning_rate": 9.469601527351887e-06, "loss": 0.31378528475761414, "step": 8629 }, { "epoch": 1.594611449226825, "grad_norm": 0.06935250759124756, "learning_rate": 9.467609430643877e-06, "loss": 0.5664317011833191, "step": 8630 }, { "epoch": 1.594796225935721, "grad_norm": 0.07487247884273529, "learning_rate": 9.465617355123352e-06, "loss": 0.48654481768608093, "step": 8631 }, { "epoch": 1.5949810026446167, "grad_norm": 0.07327792793512344, "learning_rate": 9.46362530086958e-06, "loss": 0.6170271039009094, "step": 8632 }, { "epoch": 1.5951657793535126, "grad_norm": 0.08417121320962906, "learning_rate": 9.461633267961844e-06, "loss": 0.6322805285453796, "step": 8633 }, { "epoch": 1.5953505560624084, "grad_norm": 0.08535663783550262, "learning_rate": 9.459641256479419e-06, "loss": 0.4844543933868408, "step": 8634 }, { "epoch": 1.5955353327713042, "grad_norm": 0.07370703667402267, "learning_rate": 9.45764926650158e-06, "loss": 0.45001500844955444, "step": 8635 }, { "epoch": 1.5957201094802, "grad_norm": 0.05562310293316841, "learning_rate": 9.455657298107607e-06, "loss": 0.3464704751968384, "step": 8636 }, { "epoch": 1.5959048861890959, "grad_norm": 0.06573774665594101, "learning_rate": 9.453665351376768e-06, "loss": 0.4138845205307007, "step": 8637 }, { "epoch": 1.5960896628979917, "grad_norm": 0.08080170303583145, "learning_rate": 9.451673426388336e-06, "loss": 0.49387991428375244, "step": 8638 }, { "epoch": 1.5962744396068875, "grad_norm": 0.09069648385047913, "learning_rate": 9.449681523221593e-06, "loss": 0.6068739891052246, "step": 8639 }, { "epoch": 1.5964592163157834, "grad_norm": 0.08178447932004929, "learning_rate": 9.447689641955799e-06, "loss": 0.581017792224884, "step": 8640 }, { "epoch": 1.5966439930246792, "grad_norm": 0.0756615549325943, "learning_rate": 9.445697782670229e-06, "loss": 0.4431816637516022, "step": 8641 }, { "epoch": 1.596828769733575, "grad_norm": 0.09431053698062897, "learning_rate": 9.443705945444158e-06, "loss": 0.7131768465042114, "step": 8642 }, { "epoch": 1.5970135464424708, "grad_norm": 0.07183665037155151, "learning_rate": 9.441714130356842e-06, "loss": 0.41819319128990173, "step": 8643 }, { "epoch": 1.5971983231513667, "grad_norm": 0.059549588710069656, "learning_rate": 9.439722337487561e-06, "loss": 0.40329742431640625, "step": 8644 }, { "epoch": 1.5973830998602625, "grad_norm": 0.07357224822044373, "learning_rate": 9.437730566915582e-06, "loss": 0.49433422088623047, "step": 8645 }, { "epoch": 1.5975678765691583, "grad_norm": 0.06763920187950134, "learning_rate": 9.435738818720164e-06, "loss": 0.37012627720832825, "step": 8646 }, { "epoch": 1.5977526532780542, "grad_norm": 0.07158965617418289, "learning_rate": 9.433747092980571e-06, "loss": 0.49394795298576355, "step": 8647 }, { "epoch": 1.59793742998695, "grad_norm": 0.08695575594902039, "learning_rate": 9.43175538977608e-06, "loss": 0.5455334782600403, "step": 8648 }, { "epoch": 1.598122206695846, "grad_norm": 0.06903868168592453, "learning_rate": 9.429763709185943e-06, "loss": 0.4207424819469452, "step": 8649 }, { "epoch": 1.5983069834047419, "grad_norm": 0.07839375734329224, "learning_rate": 9.427772051289427e-06, "loss": 0.5606071949005127, "step": 8650 }, { "epoch": 1.5984917601136377, "grad_norm": 0.07867002487182617, "learning_rate": 9.425780416165794e-06, "loss": 0.4959498345851898, "step": 8651 }, { "epoch": 1.5986765368225335, "grad_norm": 0.08320920169353485, "learning_rate": 9.423788803894301e-06, "loss": 0.5476346015930176, "step": 8652 }, { "epoch": 1.5988613135314294, "grad_norm": 0.07733888924121857, "learning_rate": 9.421797214554213e-06, "loss": 0.4884255826473236, "step": 8653 }, { "epoch": 1.5990460902403252, "grad_norm": 0.0582268163561821, "learning_rate": 9.419805648224785e-06, "loss": 0.3183627426624298, "step": 8654 }, { "epoch": 1.599230866949221, "grad_norm": 0.06752382963895798, "learning_rate": 9.417814104985278e-06, "loss": 0.3429985046386719, "step": 8655 }, { "epoch": 1.599415643658117, "grad_norm": 0.06709057837724686, "learning_rate": 9.41582258491495e-06, "loss": 0.4029124677181244, "step": 8656 }, { "epoch": 1.5996004203670129, "grad_norm": 0.06716939806938171, "learning_rate": 9.413831088093052e-06, "loss": 0.47434157133102417, "step": 8657 }, { "epoch": 1.5997851970759087, "grad_norm": 0.0751735270023346, "learning_rate": 9.411839614598845e-06, "loss": 0.4409109055995941, "step": 8658 }, { "epoch": 1.5999699737848045, "grad_norm": 0.08015467971563339, "learning_rate": 9.409848164511583e-06, "loss": 0.43245774507522583, "step": 8659 }, { "epoch": 1.6001547504937004, "grad_norm": 0.059320174157619476, "learning_rate": 9.407856737910514e-06, "loss": 0.3780234754085541, "step": 8660 }, { "epoch": 1.6003395272025962, "grad_norm": 0.080257847905159, "learning_rate": 9.405865334874896e-06, "loss": 0.5547727346420288, "step": 8661 }, { "epoch": 1.600524303911492, "grad_norm": 0.07040159404277802, "learning_rate": 9.403873955483982e-06, "loss": 0.4538348615169525, "step": 8662 }, { "epoch": 1.6007090806203879, "grad_norm": 0.04806624725461006, "learning_rate": 9.401882599817013e-06, "loss": 0.33367177844047546, "step": 8663 }, { "epoch": 1.6008938573292837, "grad_norm": 0.08615703880786896, "learning_rate": 9.399891267953252e-06, "loss": 0.6941844820976257, "step": 8664 }, { "epoch": 1.6010786340381795, "grad_norm": 0.07073012739419937, "learning_rate": 9.397899959971937e-06, "loss": 0.4684998393058777, "step": 8665 }, { "epoch": 1.6012634107470753, "grad_norm": 0.07388255000114441, "learning_rate": 9.395908675952319e-06, "loss": 0.45641809701919556, "step": 8666 }, { "epoch": 1.6014481874559712, "grad_norm": 0.08160973340272903, "learning_rate": 9.393917415973648e-06, "loss": 0.5580446720123291, "step": 8667 }, { "epoch": 1.601632964164867, "grad_norm": 0.07806427776813507, "learning_rate": 9.391926180115168e-06, "loss": 0.5362488627433777, "step": 8668 }, { "epoch": 1.6018177408737628, "grad_norm": 0.07841908931732178, "learning_rate": 9.38993496845612e-06, "loss": 0.5302690267562866, "step": 8669 }, { "epoch": 1.6020025175826587, "grad_norm": 0.08175771683454514, "learning_rate": 9.387943781075755e-06, "loss": 0.4767942428588867, "step": 8670 }, { "epoch": 1.6021872942915545, "grad_norm": 0.07378681004047394, "learning_rate": 9.385952618053313e-06, "loss": 0.5257818698883057, "step": 8671 }, { "epoch": 1.6023720710004503, "grad_norm": 0.10777638107538223, "learning_rate": 9.383961479468031e-06, "loss": 0.654915988445282, "step": 8672 }, { "epoch": 1.6025568477093461, "grad_norm": 0.09170585870742798, "learning_rate": 9.381970365399162e-06, "loss": 0.6202282905578613, "step": 8673 }, { "epoch": 1.602741624418242, "grad_norm": 0.07236034423112869, "learning_rate": 9.379979275925934e-06, "loss": 0.5273255109786987, "step": 8674 }, { "epoch": 1.6029264011271378, "grad_norm": 0.06548592448234558, "learning_rate": 9.377988211127591e-06, "loss": 0.4312899112701416, "step": 8675 }, { "epoch": 1.6031111778360336, "grad_norm": 0.07652442157268524, "learning_rate": 9.375997171083372e-06, "loss": 0.5723881125450134, "step": 8676 }, { "epoch": 1.6032959545449295, "grad_norm": 0.08384647965431213, "learning_rate": 9.374006155872514e-06, "loss": 0.5194376111030579, "step": 8677 }, { "epoch": 1.6034807312538255, "grad_norm": 0.08439996093511581, "learning_rate": 9.37201516557425e-06, "loss": 0.5405910015106201, "step": 8678 }, { "epoch": 1.6036655079627213, "grad_norm": 0.05453351140022278, "learning_rate": 9.370024200267822e-06, "loss": 0.3144639730453491, "step": 8679 }, { "epoch": 1.6038502846716172, "grad_norm": 0.08673521876335144, "learning_rate": 9.368033260032458e-06, "loss": 0.5571047067642212, "step": 8680 }, { "epoch": 1.604035061380513, "grad_norm": 0.0772957131266594, "learning_rate": 9.366042344947396e-06, "loss": 0.41941192746162415, "step": 8681 }, { "epoch": 1.6042198380894088, "grad_norm": 0.07876481115818024, "learning_rate": 9.364051455091861e-06, "loss": 0.5334144234657288, "step": 8682 }, { "epoch": 1.6044046147983047, "grad_norm": 0.0865866169333458, "learning_rate": 9.362060590545086e-06, "loss": 0.5806391835212708, "step": 8683 }, { "epoch": 1.6045893915072005, "grad_norm": 0.06845936924219131, "learning_rate": 9.360069751386311e-06, "loss": 0.4660915732383728, "step": 8684 }, { "epoch": 1.6047741682160963, "grad_norm": 0.08829370141029358, "learning_rate": 9.358078937694754e-06, "loss": 0.5103304386138916, "step": 8685 }, { "epoch": 1.6049589449249924, "grad_norm": 0.06883946061134338, "learning_rate": 9.356088149549644e-06, "loss": 0.44140616059303284, "step": 8686 }, { "epoch": 1.6051437216338882, "grad_norm": 0.0737924799323082, "learning_rate": 9.354097387030217e-06, "loss": 0.44015246629714966, "step": 8687 }, { "epoch": 1.605328498342784, "grad_norm": 0.08518895506858826, "learning_rate": 9.35210665021569e-06, "loss": 0.5649626851081848, "step": 8688 }, { "epoch": 1.6055132750516798, "grad_norm": 0.06098601967096329, "learning_rate": 9.35011593918529e-06, "loss": 0.3328516483306885, "step": 8689 }, { "epoch": 1.6056980517605757, "grad_norm": 0.06565692275762558, "learning_rate": 9.348125254018245e-06, "loss": 0.5171113610267639, "step": 8690 }, { "epoch": 1.6058828284694715, "grad_norm": 0.08036236464977264, "learning_rate": 9.346134594793774e-06, "loss": 0.44804441928863525, "step": 8691 }, { "epoch": 1.6060676051783673, "grad_norm": 0.06878243386745453, "learning_rate": 9.3441439615911e-06, "loss": 0.3826935589313507, "step": 8692 }, { "epoch": 1.6062523818872632, "grad_norm": 0.05000479891896248, "learning_rate": 9.342153354489448e-06, "loss": 0.2692321240901947, "step": 8693 }, { "epoch": 1.606437158596159, "grad_norm": 0.08034251630306244, "learning_rate": 9.34016277356803e-06, "loss": 0.5494377017021179, "step": 8694 }, { "epoch": 1.6066219353050548, "grad_norm": 0.06501448899507523, "learning_rate": 9.33817221890607e-06, "loss": 0.40996047854423523, "step": 8695 }, { "epoch": 1.6068067120139506, "grad_norm": 0.06616383045911789, "learning_rate": 9.336181690582787e-06, "loss": 0.5146843791007996, "step": 8696 }, { "epoch": 1.6069914887228465, "grad_norm": 0.06285757571458817, "learning_rate": 9.334191188677394e-06, "loss": 0.4098890423774719, "step": 8697 }, { "epoch": 1.6071762654317423, "grad_norm": 0.06963429600000381, "learning_rate": 9.332200713269113e-06, "loss": 0.47420984506607056, "step": 8698 }, { "epoch": 1.6073610421406381, "grad_norm": 0.09927757829427719, "learning_rate": 9.330210264437149e-06, "loss": 0.5142176747322083, "step": 8699 }, { "epoch": 1.607545818849534, "grad_norm": 0.0826658084988594, "learning_rate": 9.328219842260721e-06, "loss": 0.42444026470184326, "step": 8700 }, { "epoch": 1.6077305955584298, "grad_norm": 0.07246047258377075, "learning_rate": 9.326229446819048e-06, "loss": 0.3660123944282532, "step": 8701 }, { "epoch": 1.6079153722673256, "grad_norm": 0.10228099673986435, "learning_rate": 9.324239078191329e-06, "loss": 0.6338186860084534, "step": 8702 }, { "epoch": 1.6081001489762214, "grad_norm": 0.06835248321294785, "learning_rate": 9.322248736456779e-06, "loss": 0.40147051215171814, "step": 8703 }, { "epoch": 1.6082849256851173, "grad_norm": 0.06488882750272751, "learning_rate": 9.320258421694615e-06, "loss": 0.4587598145008087, "step": 8704 }, { "epoch": 1.608469702394013, "grad_norm": 0.07219914346933365, "learning_rate": 9.318268133984035e-06, "loss": 0.5292624831199646, "step": 8705 }, { "epoch": 1.608654479102909, "grad_norm": 0.09015645831823349, "learning_rate": 9.316277873404249e-06, "loss": 0.49059614539146423, "step": 8706 }, { "epoch": 1.6088392558118048, "grad_norm": 0.12085429579019547, "learning_rate": 9.31428764003447e-06, "loss": 0.7310295104980469, "step": 8707 }, { "epoch": 1.6090240325207008, "grad_norm": 0.05734136700630188, "learning_rate": 9.312297433953894e-06, "loss": 0.28329697251319885, "step": 8708 }, { "epoch": 1.6092088092295966, "grad_norm": 0.07954109460115433, "learning_rate": 9.310307255241729e-06, "loss": 0.5011992454528809, "step": 8709 }, { "epoch": 1.6093935859384925, "grad_norm": 0.055546123534440994, "learning_rate": 9.308317103977177e-06, "loss": 0.35209885239601135, "step": 8710 }, { "epoch": 1.6095783626473883, "grad_norm": 0.07863243669271469, "learning_rate": 9.306326980239441e-06, "loss": 0.44597235321998596, "step": 8711 }, { "epoch": 1.6097631393562841, "grad_norm": 0.05761402100324631, "learning_rate": 9.30433688410772e-06, "loss": 0.3015648424625397, "step": 8712 }, { "epoch": 1.60994791606518, "grad_norm": 0.07875753194093704, "learning_rate": 9.302346815661217e-06, "loss": 0.451083242893219, "step": 8713 }, { "epoch": 1.6101326927740758, "grad_norm": 0.06157178059220314, "learning_rate": 9.300356774979125e-06, "loss": 0.35838833451271057, "step": 8714 }, { "epoch": 1.6103174694829718, "grad_norm": 0.06755080819129944, "learning_rate": 9.298366762140648e-06, "loss": 0.4344403147697449, "step": 8715 }, { "epoch": 1.6105022461918677, "grad_norm": 0.07680190354585648, "learning_rate": 9.296376777224977e-06, "loss": 0.4962613880634308, "step": 8716 }, { "epoch": 1.6106870229007635, "grad_norm": 0.06090494990348816, "learning_rate": 9.294386820311306e-06, "loss": 0.3367466926574707, "step": 8717 }, { "epoch": 1.6108717996096593, "grad_norm": 0.09674331545829773, "learning_rate": 9.292396891478838e-06, "loss": 0.6231864094734192, "step": 8718 }, { "epoch": 1.6110565763185551, "grad_norm": 0.07336708903312683, "learning_rate": 9.290406990806754e-06, "loss": 0.44256263971328735, "step": 8719 }, { "epoch": 1.611241353027451, "grad_norm": 0.0809880793094635, "learning_rate": 9.288417118374253e-06, "loss": 0.5246638059616089, "step": 8720 }, { "epoch": 1.6114261297363468, "grad_norm": 0.0665491595864296, "learning_rate": 9.28642727426053e-06, "loss": 0.48506948351860046, "step": 8721 }, { "epoch": 1.6116109064452426, "grad_norm": 0.0818161740899086, "learning_rate": 9.284437458544762e-06, "loss": 0.49265772104263306, "step": 8722 }, { "epoch": 1.6117956831541385, "grad_norm": 0.07088849693536758, "learning_rate": 9.282447671306145e-06, "loss": 0.574463963508606, "step": 8723 }, { "epoch": 1.6119804598630343, "grad_norm": 0.07852593809366226, "learning_rate": 9.280457912623873e-06, "loss": 0.6700880527496338, "step": 8724 }, { "epoch": 1.6121652365719301, "grad_norm": 0.07950948923826218, "learning_rate": 9.278468182577118e-06, "loss": 0.46207764744758606, "step": 8725 }, { "epoch": 1.612350013280826, "grad_norm": 0.061707984656095505, "learning_rate": 9.27647848124507e-06, "loss": 0.4924660921096802, "step": 8726 }, { "epoch": 1.6125347899897218, "grad_norm": 0.0966852679848671, "learning_rate": 9.274488808706923e-06, "loss": 0.7439047694206238, "step": 8727 }, { "epoch": 1.6127195666986176, "grad_norm": 0.06226487085223198, "learning_rate": 9.272499165041846e-06, "loss": 0.4389248490333557, "step": 8728 }, { "epoch": 1.6129043434075134, "grad_norm": 0.0721212774515152, "learning_rate": 9.270509550329027e-06, "loss": 0.45488476753234863, "step": 8729 }, { "epoch": 1.6130891201164093, "grad_norm": 0.07964397221803665, "learning_rate": 9.268519964647646e-06, "loss": 0.566719651222229, "step": 8730 }, { "epoch": 1.613273896825305, "grad_norm": 0.08104722201824188, "learning_rate": 9.266530408076881e-06, "loss": 0.5494105219841003, "step": 8731 }, { "epoch": 1.613458673534201, "grad_norm": 0.07702388614416122, "learning_rate": 9.264540880695914e-06, "loss": 0.5685914158821106, "step": 8732 }, { "epoch": 1.6136434502430967, "grad_norm": 0.07252875715494156, "learning_rate": 9.262551382583916e-06, "loss": 0.3761477470397949, "step": 8733 }, { "epoch": 1.6138282269519926, "grad_norm": 0.06461136043071747, "learning_rate": 9.260561913820066e-06, "loss": 0.41747915744781494, "step": 8734 }, { "epoch": 1.6140130036608884, "grad_norm": 0.07709618657827377, "learning_rate": 9.25857247448354e-06, "loss": 0.4577575623989105, "step": 8735 }, { "epoch": 1.6141977803697842, "grad_norm": 0.07607993483543396, "learning_rate": 9.256583064653509e-06, "loss": 0.4133278429508209, "step": 8736 }, { "epoch": 1.6143825570786803, "grad_norm": 0.08119706809520721, "learning_rate": 9.254593684409144e-06, "loss": 0.5895794034004211, "step": 8737 }, { "epoch": 1.614567333787576, "grad_norm": 0.0602099783718586, "learning_rate": 9.252604333829624e-06, "loss": 0.37203460931777954, "step": 8738 }, { "epoch": 1.614752110496472, "grad_norm": 0.08186354488134384, "learning_rate": 9.250615012994106e-06, "loss": 0.5753328800201416, "step": 8739 }, { "epoch": 1.6149368872053678, "grad_norm": 0.06444091349840164, "learning_rate": 9.24862572198177e-06, "loss": 0.3414130210876465, "step": 8740 }, { "epoch": 1.6151216639142636, "grad_norm": 0.06698300689458847, "learning_rate": 9.246636460871781e-06, "loss": 0.45006752014160156, "step": 8741 }, { "epoch": 1.6153064406231594, "grad_norm": 0.058944471180438995, "learning_rate": 9.244647229743299e-06, "loss": 0.37072205543518066, "step": 8742 }, { "epoch": 1.6154912173320553, "grad_norm": 0.07849530130624771, "learning_rate": 9.242658028675498e-06, "loss": 0.4830027222633362, "step": 8743 }, { "epoch": 1.6156759940409513, "grad_norm": 0.08128982037305832, "learning_rate": 9.24066885774754e-06, "loss": 0.4712119400501251, "step": 8744 }, { "epoch": 1.6158607707498471, "grad_norm": 0.07775290310382843, "learning_rate": 9.238679717038582e-06, "loss": 0.534960925579071, "step": 8745 }, { "epoch": 1.616045547458743, "grad_norm": 0.08364227414131165, "learning_rate": 9.236690606627792e-06, "loss": 0.5130738019943237, "step": 8746 }, { "epoch": 1.6162303241676388, "grad_norm": 0.06736582517623901, "learning_rate": 9.234701526594325e-06, "loss": 0.4213935136795044, "step": 8747 }, { "epoch": 1.6164151008765346, "grad_norm": 0.05899931117892265, "learning_rate": 9.232712477017343e-06, "loss": 0.4013676047325134, "step": 8748 }, { "epoch": 1.6165998775854304, "grad_norm": 0.09669655561447144, "learning_rate": 9.230723457976006e-06, "loss": 0.6942662596702576, "step": 8749 }, { "epoch": 1.6167846542943263, "grad_norm": 0.07031256705522537, "learning_rate": 9.228734469549467e-06, "loss": 0.5576603412628174, "step": 8750 }, { "epoch": 1.616969431003222, "grad_norm": 0.08005089312791824, "learning_rate": 9.226745511816883e-06, "loss": 0.4651269316673279, "step": 8751 }, { "epoch": 1.617154207712118, "grad_norm": 0.09014077484607697, "learning_rate": 9.22475658485741e-06, "loss": 0.7220168113708496, "step": 8752 }, { "epoch": 1.6173389844210138, "grad_norm": 0.0727510005235672, "learning_rate": 9.222767688750196e-06, "loss": 0.5171751379966736, "step": 8753 }, { "epoch": 1.6175237611299096, "grad_norm": 0.0730699971318245, "learning_rate": 9.220778823574398e-06, "loss": 0.5313491225242615, "step": 8754 }, { "epoch": 1.6177085378388054, "grad_norm": 0.07589662820100784, "learning_rate": 9.218789989409167e-06, "loss": 0.5469446778297424, "step": 8755 }, { "epoch": 1.6178933145477012, "grad_norm": 0.07687672972679138, "learning_rate": 9.21680118633365e-06, "loss": 0.46767929196357727, "step": 8756 }, { "epoch": 1.618078091256597, "grad_norm": 0.06446769088506699, "learning_rate": 9.214812414426993e-06, "loss": 0.39684683084487915, "step": 8757 }, { "epoch": 1.618262867965493, "grad_norm": 0.07347891479730606, "learning_rate": 9.21282367376835e-06, "loss": 0.4656224250793457, "step": 8758 }, { "epoch": 1.6184476446743887, "grad_norm": 0.08664803206920624, "learning_rate": 9.210834964436857e-06, "loss": 0.54388028383255, "step": 8759 }, { "epoch": 1.6186324213832846, "grad_norm": 0.08450117707252502, "learning_rate": 9.208846286511664e-06, "loss": 0.560814619064331, "step": 8760 }, { "epoch": 1.6188171980921804, "grad_norm": 0.0759967714548111, "learning_rate": 9.20685764007192e-06, "loss": 0.44394931197166443, "step": 8761 }, { "epoch": 1.6190019748010762, "grad_norm": 0.0643659308552742, "learning_rate": 9.204869025196753e-06, "loss": 0.4153701663017273, "step": 8762 }, { "epoch": 1.619186751509972, "grad_norm": 0.0887654572725296, "learning_rate": 9.202880441965317e-06, "loss": 0.5017815828323364, "step": 8763 }, { "epoch": 1.6193715282188679, "grad_norm": 0.06874753534793854, "learning_rate": 9.20089189045674e-06, "loss": 0.48350366950035095, "step": 8764 }, { "epoch": 1.6195563049277637, "grad_norm": 0.06923694163560867, "learning_rate": 9.198903370750167e-06, "loss": 0.43473052978515625, "step": 8765 }, { "epoch": 1.6197410816366598, "grad_norm": 0.07469451427459717, "learning_rate": 9.196914882924737e-06, "loss": 0.4775337874889374, "step": 8766 }, { "epoch": 1.6199258583455556, "grad_norm": 0.10432615131139755, "learning_rate": 9.194926427059579e-06, "loss": 0.7286350131034851, "step": 8767 }, { "epoch": 1.6201106350544514, "grad_norm": 0.06797898560762405, "learning_rate": 9.192938003233828e-06, "loss": 0.4760822355747223, "step": 8768 }, { "epoch": 1.6202954117633472, "grad_norm": 0.08036093413829803, "learning_rate": 9.190949611526625e-06, "loss": 0.4900142252445221, "step": 8769 }, { "epoch": 1.620480188472243, "grad_norm": 0.07063406705856323, "learning_rate": 9.188961252017094e-06, "loss": 0.4609625041484833, "step": 8770 }, { "epoch": 1.620664965181139, "grad_norm": 0.08164394646883011, "learning_rate": 9.186972924784365e-06, "loss": 0.617030143737793, "step": 8771 }, { "epoch": 1.6208497418900347, "grad_norm": 0.08667688816785812, "learning_rate": 9.184984629907575e-06, "loss": 0.6391412615776062, "step": 8772 }, { "epoch": 1.6210345185989306, "grad_norm": 0.07805630564689636, "learning_rate": 9.182996367465843e-06, "loss": 0.47581109404563904, "step": 8773 }, { "epoch": 1.6212192953078266, "grad_norm": 0.07001031190156937, "learning_rate": 9.1810081375383e-06, "loss": 0.4111134707927704, "step": 8774 }, { "epoch": 1.6214040720167224, "grad_norm": 0.09256626665592194, "learning_rate": 9.179019940204073e-06, "loss": 0.5408514738082886, "step": 8775 }, { "epoch": 1.6215888487256183, "grad_norm": 0.09123337268829346, "learning_rate": 9.177031775542282e-06, "loss": 0.6441941857337952, "step": 8776 }, { "epoch": 1.621773625434514, "grad_norm": 0.07255522906780243, "learning_rate": 9.175043643632051e-06, "loss": 0.5214483737945557, "step": 8777 }, { "epoch": 1.62195840214341, "grad_norm": 0.09604248404502869, "learning_rate": 9.173055544552505e-06, "loss": 0.725089967250824, "step": 8778 }, { "epoch": 1.6221431788523057, "grad_norm": 0.08073451370000839, "learning_rate": 9.171067478382757e-06, "loss": 0.4169258773326874, "step": 8779 }, { "epoch": 1.6223279555612016, "grad_norm": 0.0782463401556015, "learning_rate": 9.169079445201938e-06, "loss": 0.5479577779769897, "step": 8780 }, { "epoch": 1.6225127322700974, "grad_norm": 0.10017386823892593, "learning_rate": 9.16709144508915e-06, "loss": 0.6875959634780884, "step": 8781 }, { "epoch": 1.6226975089789932, "grad_norm": 0.09250274300575256, "learning_rate": 9.165103478123515e-06, "loss": 0.5965291857719421, "step": 8782 }, { "epoch": 1.622882285687889, "grad_norm": 0.08742630481719971, "learning_rate": 9.163115544384157e-06, "loss": 0.4917841851711273, "step": 8783 }, { "epoch": 1.6230670623967849, "grad_norm": 0.07041438668966293, "learning_rate": 9.161127643950178e-06, "loss": 0.5060359835624695, "step": 8784 }, { "epoch": 1.6232518391056807, "grad_norm": 0.07074175029993057, "learning_rate": 9.159139776900691e-06, "loss": 0.48050737380981445, "step": 8785 }, { "epoch": 1.6234366158145765, "grad_norm": 0.06361597776412964, "learning_rate": 9.157151943314817e-06, "loss": 0.42081218957901, "step": 8786 }, { "epoch": 1.6236213925234724, "grad_norm": 0.06792426109313965, "learning_rate": 9.155164143271654e-06, "loss": 0.4286332130432129, "step": 8787 }, { "epoch": 1.6238061692323682, "grad_norm": 0.07698680460453033, "learning_rate": 9.153176376850315e-06, "loss": 0.49443984031677246, "step": 8788 }, { "epoch": 1.623990945941264, "grad_norm": 0.08953724801540375, "learning_rate": 9.15118864412991e-06, "loss": 0.6965150237083435, "step": 8789 }, { "epoch": 1.6241757226501599, "grad_norm": 0.0866585448384285, "learning_rate": 9.149200945189536e-06, "loss": 0.5574845671653748, "step": 8790 }, { "epoch": 1.6243604993590557, "grad_norm": 0.09219162911176682, "learning_rate": 9.147213280108307e-06, "loss": 0.8432692289352417, "step": 8791 }, { "epoch": 1.6245452760679515, "grad_norm": 0.08029278367757797, "learning_rate": 9.145225648965321e-06, "loss": 0.6044814586639404, "step": 8792 }, { "epoch": 1.6247300527768473, "grad_norm": 0.07284297794103622, "learning_rate": 9.143238051839678e-06, "loss": 0.5288881659507751, "step": 8793 }, { "epoch": 1.6249148294857432, "grad_norm": 0.08836342394351959, "learning_rate": 9.141250488810481e-06, "loss": 0.6295874118804932, "step": 8794 }, { "epoch": 1.625099606194639, "grad_norm": 0.0754440426826477, "learning_rate": 9.139262959956829e-06, "loss": 0.5248824954032898, "step": 8795 }, { "epoch": 1.625284382903535, "grad_norm": 0.0965680330991745, "learning_rate": 9.137275465357817e-06, "loss": 0.6318686008453369, "step": 8796 }, { "epoch": 1.6254691596124309, "grad_norm": 0.0794186145067215, "learning_rate": 9.135288005092546e-06, "loss": 0.435815691947937, "step": 8797 }, { "epoch": 1.6256539363213267, "grad_norm": 0.09976905584335327, "learning_rate": 9.1333005792401e-06, "loss": 0.6863988041877747, "step": 8798 }, { "epoch": 1.6258387130302225, "grad_norm": 0.07866614311933517, "learning_rate": 9.131313187879584e-06, "loss": 0.5332317352294922, "step": 8799 }, { "epoch": 1.6260234897391184, "grad_norm": 0.07597986608743668, "learning_rate": 9.129325831090087e-06, "loss": 0.5171675682067871, "step": 8800 }, { "epoch": 1.6262082664480142, "grad_norm": 0.07138339430093765, "learning_rate": 9.127338508950696e-06, "loss": 0.3930073082447052, "step": 8801 }, { "epoch": 1.62639304315691, "grad_norm": 0.08232542872428894, "learning_rate": 9.125351221540498e-06, "loss": 0.5828226804733276, "step": 8802 }, { "epoch": 1.626577819865806, "grad_norm": 0.0767393633723259, "learning_rate": 9.123363968938592e-06, "loss": 0.4624963700771332, "step": 8803 }, { "epoch": 1.626762596574702, "grad_norm": 0.05044902116060257, "learning_rate": 9.121376751224054e-06, "loss": 0.28808170557022095, "step": 8804 }, { "epoch": 1.6269473732835977, "grad_norm": 0.06592875719070435, "learning_rate": 9.119389568475972e-06, "loss": 0.39005059003829956, "step": 8805 }, { "epoch": 1.6271321499924936, "grad_norm": 0.08422244340181351, "learning_rate": 9.117402420773431e-06, "loss": 0.5206887722015381, "step": 8806 }, { "epoch": 1.6273169267013894, "grad_norm": 0.07252081483602524, "learning_rate": 9.115415308195511e-06, "loss": 0.45373600721359253, "step": 8807 }, { "epoch": 1.6275017034102852, "grad_norm": 0.06747157871723175, "learning_rate": 9.113428230821296e-06, "loss": 0.46685582399368286, "step": 8808 }, { "epoch": 1.627686480119181, "grad_norm": 0.08551542460918427, "learning_rate": 9.111441188729863e-06, "loss": 0.5203744769096375, "step": 8809 }, { "epoch": 1.6278712568280769, "grad_norm": 0.0564129538834095, "learning_rate": 9.10945418200029e-06, "loss": 0.35253602266311646, "step": 8810 }, { "epoch": 1.6280560335369727, "grad_norm": 0.06635194271802902, "learning_rate": 9.107467210711655e-06, "loss": 0.4317592978477478, "step": 8811 }, { "epoch": 1.6282408102458685, "grad_norm": 0.06984547525644302, "learning_rate": 9.105480274943032e-06, "loss": 0.5081874132156372, "step": 8812 }, { "epoch": 1.6284255869547644, "grad_norm": 0.07593455910682678, "learning_rate": 9.103493374773496e-06, "loss": 0.4170948266983032, "step": 8813 }, { "epoch": 1.6286103636636602, "grad_norm": 0.07477694749832153, "learning_rate": 9.10150651028212e-06, "loss": 0.5138434767723083, "step": 8814 }, { "epoch": 1.628795140372556, "grad_norm": 0.09288772940635681, "learning_rate": 9.099519681547973e-06, "loss": 0.6115694642066956, "step": 8815 }, { "epoch": 1.6289799170814518, "grad_norm": 0.05931880325078964, "learning_rate": 9.097532888650124e-06, "loss": 0.4423096776008606, "step": 8816 }, { "epoch": 1.6291646937903477, "grad_norm": 0.06403397023677826, "learning_rate": 9.095546131667647e-06, "loss": 0.33560627698898315, "step": 8817 }, { "epoch": 1.6293494704992435, "grad_norm": 0.07780005782842636, "learning_rate": 9.093559410679598e-06, "loss": 0.4819314181804657, "step": 8818 }, { "epoch": 1.6295342472081393, "grad_norm": 0.08891147375106812, "learning_rate": 9.09157272576505e-06, "loss": 0.5211207270622253, "step": 8819 }, { "epoch": 1.6297190239170352, "grad_norm": 0.05928242579102516, "learning_rate": 9.089586077003073e-06, "loss": 0.44235554337501526, "step": 8820 }, { "epoch": 1.629903800625931, "grad_norm": 0.06842076033353806, "learning_rate": 9.087599464472714e-06, "loss": 0.42068982124328613, "step": 8821 }, { "epoch": 1.6300885773348268, "grad_norm": 0.060262531042099, "learning_rate": 9.085612888253041e-06, "loss": 0.3564003109931946, "step": 8822 }, { "epoch": 1.6302733540437226, "grad_norm": 0.07870537787675858, "learning_rate": 9.08362634842312e-06, "loss": 0.5124083757400513, "step": 8823 }, { "epoch": 1.6304581307526185, "grad_norm": 0.08522040396928787, "learning_rate": 9.081639845062e-06, "loss": 0.621161937713623, "step": 8824 }, { "epoch": 1.6306429074615145, "grad_norm": 0.059914588928222656, "learning_rate": 9.07965337824874e-06, "loss": 0.39740699529647827, "step": 8825 }, { "epoch": 1.6308276841704104, "grad_norm": 0.08846762031316757, "learning_rate": 9.077666948062399e-06, "loss": 0.5534099340438843, "step": 8826 }, { "epoch": 1.6310124608793062, "grad_norm": 0.07665707916021347, "learning_rate": 9.075680554582024e-06, "loss": 0.501498818397522, "step": 8827 }, { "epoch": 1.631197237588202, "grad_norm": 0.07973427325487137, "learning_rate": 9.073694197886676e-06, "loss": 0.4741021692752838, "step": 8828 }, { "epoch": 1.6313820142970978, "grad_norm": 0.07348043471574783, "learning_rate": 9.071707878055398e-06, "loss": 0.48767468333244324, "step": 8829 }, { "epoch": 1.6315667910059937, "grad_norm": 0.0816214382648468, "learning_rate": 9.069721595167241e-06, "loss": 0.48633819818496704, "step": 8830 }, { "epoch": 1.6317515677148895, "grad_norm": 0.055964767932891846, "learning_rate": 9.067735349301258e-06, "loss": 0.33711522817611694, "step": 8831 }, { "epoch": 1.6319363444237855, "grad_norm": 0.08837044984102249, "learning_rate": 9.065749140536487e-06, "loss": 0.6438451409339905, "step": 8832 }, { "epoch": 1.6321211211326814, "grad_norm": 0.09076151251792908, "learning_rate": 9.063762968951978e-06, "loss": 0.5306956171989441, "step": 8833 }, { "epoch": 1.6323058978415772, "grad_norm": 0.07748226076364517, "learning_rate": 9.061776834626777e-06, "loss": 0.5394060015678406, "step": 8834 }, { "epoch": 1.632490674550473, "grad_norm": 0.06562750786542892, "learning_rate": 9.05979073763992e-06, "loss": 0.4323406517505646, "step": 8835 }, { "epoch": 1.6326754512593689, "grad_norm": 0.06664278358221054, "learning_rate": 9.05780467807045e-06, "loss": 0.4158181846141815, "step": 8836 }, { "epoch": 1.6328602279682647, "grad_norm": 0.06045094504952431, "learning_rate": 9.05581865599741e-06, "loss": 0.3473166525363922, "step": 8837 }, { "epoch": 1.6330450046771605, "grad_norm": 0.07034634053707123, "learning_rate": 9.053832671499828e-06, "loss": 0.38992130756378174, "step": 8838 }, { "epoch": 1.6332297813860563, "grad_norm": 0.061902426183223724, "learning_rate": 9.051846724656747e-06, "loss": 0.3445275127887726, "step": 8839 }, { "epoch": 1.6334145580949522, "grad_norm": 0.09002410620450974, "learning_rate": 9.049860815547205e-06, "loss": 0.5587263107299805, "step": 8840 }, { "epoch": 1.633599334803848, "grad_norm": 0.08259917795658112, "learning_rate": 9.047874944250225e-06, "loss": 0.5446577668190002, "step": 8841 }, { "epoch": 1.6337841115127438, "grad_norm": 0.07812831550836563, "learning_rate": 9.04588911084484e-06, "loss": 0.4547961950302124, "step": 8842 }, { "epoch": 1.6339688882216397, "grad_norm": 0.07952714711427689, "learning_rate": 9.043903315410092e-06, "loss": 0.5320074558258057, "step": 8843 }, { "epoch": 1.6341536649305355, "grad_norm": 0.08245772123336792, "learning_rate": 9.041917558024994e-06, "loss": 0.4858451187610626, "step": 8844 }, { "epoch": 1.6343384416394313, "grad_norm": 0.07378239929676056, "learning_rate": 9.039931838768583e-06, "loss": 0.5240111351013184, "step": 8845 }, { "epoch": 1.6345232183483271, "grad_norm": 0.06492896378040314, "learning_rate": 9.03794615771988e-06, "loss": 0.3219175636768341, "step": 8846 }, { "epoch": 1.634707995057223, "grad_norm": 0.08643704652786255, "learning_rate": 9.03596051495791e-06, "loss": 0.5498473048210144, "step": 8847 }, { "epoch": 1.6348927717661188, "grad_norm": 0.07704591006040573, "learning_rate": 9.033974910561696e-06, "loss": 0.5322506427764893, "step": 8848 }, { "epoch": 1.6350775484750146, "grad_norm": 0.08054648339748383, "learning_rate": 9.031989344610258e-06, "loss": 0.5502832531929016, "step": 8849 }, { "epoch": 1.6352623251839105, "grad_norm": 0.08075813949108124, "learning_rate": 9.030003817182615e-06, "loss": 0.5375747680664062, "step": 8850 }, { "epoch": 1.6354471018928063, "grad_norm": 0.06502457708120346, "learning_rate": 9.028018328357787e-06, "loss": 0.36693182587623596, "step": 8851 }, { "epoch": 1.6356318786017021, "grad_norm": 0.08187992125749588, "learning_rate": 9.026032878214787e-06, "loss": 0.5035943984985352, "step": 8852 }, { "epoch": 1.635816655310598, "grad_norm": 0.08244334906339645, "learning_rate": 9.024047466832631e-06, "loss": 0.5270260572433472, "step": 8853 }, { "epoch": 1.636001432019494, "grad_norm": 0.08298245817422867, "learning_rate": 9.022062094290334e-06, "loss": 0.5583405494689941, "step": 8854 }, { "epoch": 1.6361862087283898, "grad_norm": 0.06815101951360703, "learning_rate": 9.020076760666904e-06, "loss": 0.47471892833709717, "step": 8855 }, { "epoch": 1.6363709854372857, "grad_norm": 0.08780615776777267, "learning_rate": 9.018091466041354e-06, "loss": 0.6067984104156494, "step": 8856 }, { "epoch": 1.6365557621461815, "grad_norm": 0.0770939365029335, "learning_rate": 9.016106210492696e-06, "loss": 0.6024122834205627, "step": 8857 }, { "epoch": 1.6367405388550773, "grad_norm": 0.06842092424631119, "learning_rate": 9.014120994099926e-06, "loss": 0.46925783157348633, "step": 8858 }, { "epoch": 1.6369253155639731, "grad_norm": 0.06141021475195885, "learning_rate": 9.012135816942058e-06, "loss": 0.3543952405452728, "step": 8859 }, { "epoch": 1.637110092272869, "grad_norm": 0.06749419867992401, "learning_rate": 9.010150679098097e-06, "loss": 0.4042138159275055, "step": 8860 }, { "epoch": 1.6372948689817648, "grad_norm": 0.06526118516921997, "learning_rate": 9.008165580647039e-06, "loss": 0.42056259512901306, "step": 8861 }, { "epoch": 1.6374796456906608, "grad_norm": 0.06441036611795425, "learning_rate": 9.00618052166789e-06, "loss": 0.41664764285087585, "step": 8862 }, { "epoch": 1.6376644223995567, "grad_norm": 0.07588757574558258, "learning_rate": 9.004195502239645e-06, "loss": 0.422836035490036, "step": 8863 }, { "epoch": 1.6378491991084525, "grad_norm": 0.07412891089916229, "learning_rate": 9.002210522441303e-06, "loss": 0.6133260130882263, "step": 8864 }, { "epoch": 1.6380339758173483, "grad_norm": 0.07033253461122513, "learning_rate": 9.000225582351864e-06, "loss": 0.4490993916988373, "step": 8865 }, { "epoch": 1.6382187525262442, "grad_norm": 0.08125453442335129, "learning_rate": 8.998240682050315e-06, "loss": 0.5367612838745117, "step": 8866 }, { "epoch": 1.63840352923514, "grad_norm": 0.09599972516298294, "learning_rate": 8.996255821615654e-06, "loss": 0.6475223302841187, "step": 8867 }, { "epoch": 1.6385883059440358, "grad_norm": 0.07406838983297348, "learning_rate": 8.994271001126873e-06, "loss": 0.4529930055141449, "step": 8868 }, { "epoch": 1.6387730826529316, "grad_norm": 0.08143652975559235, "learning_rate": 8.992286220662956e-06, "loss": 0.5256994366645813, "step": 8869 }, { "epoch": 1.6389578593618275, "grad_norm": 0.07117968052625656, "learning_rate": 8.990301480302896e-06, "loss": 0.4198594093322754, "step": 8870 }, { "epoch": 1.6391426360707233, "grad_norm": 0.062106333673000336, "learning_rate": 8.98831678012568e-06, "loss": 0.3492191731929779, "step": 8871 }, { "epoch": 1.6393274127796191, "grad_norm": 0.056991275399923325, "learning_rate": 8.986332120210289e-06, "loss": 0.40724286437034607, "step": 8872 }, { "epoch": 1.639512189488515, "grad_norm": 0.07889767736196518, "learning_rate": 8.984347500635708e-06, "loss": 0.5376442074775696, "step": 8873 }, { "epoch": 1.6396969661974108, "grad_norm": 0.07714872062206268, "learning_rate": 8.982362921480921e-06, "loss": 0.5542723536491394, "step": 8874 }, { "epoch": 1.6398817429063066, "grad_norm": 0.07410655170679092, "learning_rate": 8.980378382824904e-06, "loss": 0.4115622341632843, "step": 8875 }, { "epoch": 1.6400665196152024, "grad_norm": 0.055506497621536255, "learning_rate": 8.978393884746641e-06, "loss": 0.3900960683822632, "step": 8876 }, { "epoch": 1.6402512963240983, "grad_norm": 0.06810830533504486, "learning_rate": 8.976409427325102e-06, "loss": 0.41855576634407043, "step": 8877 }, { "epoch": 1.640436073032994, "grad_norm": 0.07559581100940704, "learning_rate": 8.974425010639262e-06, "loss": 0.5396881103515625, "step": 8878 }, { "epoch": 1.64062084974189, "grad_norm": 0.094178207218647, "learning_rate": 8.972440634768105e-06, "loss": 0.640783429145813, "step": 8879 }, { "epoch": 1.6408056264507858, "grad_norm": 0.08872411400079727, "learning_rate": 8.970456299790592e-06, "loss": 0.5301050543785095, "step": 8880 }, { "epoch": 1.6409904031596816, "grad_norm": 0.09028414636850357, "learning_rate": 8.968472005785698e-06, "loss": 0.5819946527481079, "step": 8881 }, { "epoch": 1.6411751798685774, "grad_norm": 0.07763354480266571, "learning_rate": 8.966487752832393e-06, "loss": 0.5989773273468018, "step": 8882 }, { "epoch": 1.6413599565774732, "grad_norm": 0.06810028851032257, "learning_rate": 8.964503541009639e-06, "loss": 0.5160920023918152, "step": 8883 }, { "epoch": 1.6415447332863693, "grad_norm": 0.0847913920879364, "learning_rate": 8.962519370396403e-06, "loss": 0.5464459657669067, "step": 8884 }, { "epoch": 1.6417295099952651, "grad_norm": 0.07933083921670914, "learning_rate": 8.960535241071654e-06, "loss": 0.5264043211936951, "step": 8885 }, { "epoch": 1.641914286704161, "grad_norm": 0.06766915321350098, "learning_rate": 8.958551153114348e-06, "loss": 0.449647456407547, "step": 8886 }, { "epoch": 1.6420990634130568, "grad_norm": 0.07304760068655014, "learning_rate": 8.956567106603448e-06, "loss": 0.4253140687942505, "step": 8887 }, { "epoch": 1.6422838401219526, "grad_norm": 0.07899277657270432, "learning_rate": 8.954583101617915e-06, "loss": 0.4085846245288849, "step": 8888 }, { "epoch": 1.6424686168308484, "grad_norm": 0.08426927775144577, "learning_rate": 8.9525991382367e-06, "loss": 0.5908872485160828, "step": 8889 }, { "epoch": 1.6426533935397443, "grad_norm": 0.08341715484857559, "learning_rate": 8.950615216538765e-06, "loss": 0.5195132493972778, "step": 8890 }, { "epoch": 1.6428381702486403, "grad_norm": 0.08649944514036179, "learning_rate": 8.948631336603062e-06, "loss": 0.5062446594238281, "step": 8891 }, { "epoch": 1.6430229469575361, "grad_norm": 0.06944700330495834, "learning_rate": 8.946647498508541e-06, "loss": 0.3602409362792969, "step": 8892 }, { "epoch": 1.643207723666432, "grad_norm": 0.0651615634560585, "learning_rate": 8.944663702334158e-06, "loss": 0.3831489682197571, "step": 8893 }, { "epoch": 1.6433925003753278, "grad_norm": 0.06624651700258255, "learning_rate": 8.94267994815885e-06, "loss": 0.423088401556015, "step": 8894 }, { "epoch": 1.6435772770842236, "grad_norm": 0.0795547142624855, "learning_rate": 8.940696236061575e-06, "loss": 0.5180891752243042, "step": 8895 }, { "epoch": 1.6437620537931195, "grad_norm": 0.06199095770716667, "learning_rate": 8.938712566121281e-06, "loss": 0.3613620400428772, "step": 8896 }, { "epoch": 1.6439468305020153, "grad_norm": 0.06290160119533539, "learning_rate": 8.936728938416901e-06, "loss": 0.48867276310920715, "step": 8897 }, { "epoch": 1.6441316072109111, "grad_norm": 0.09374285489320755, "learning_rate": 8.934745353027384e-06, "loss": 0.6991412043571472, "step": 8898 }, { "epoch": 1.644316383919807, "grad_norm": 0.06297103315591812, "learning_rate": 8.932761810031672e-06, "loss": 0.3790844976902008, "step": 8899 }, { "epoch": 1.6445011606287028, "grad_norm": 0.08258585631847382, "learning_rate": 8.930778309508698e-06, "loss": 0.5071451663970947, "step": 8900 }, { "epoch": 1.6446859373375986, "grad_norm": 0.07014893740415573, "learning_rate": 8.928794851537401e-06, "loss": 0.5357246994972229, "step": 8901 }, { "epoch": 1.6448707140464944, "grad_norm": 0.05861230567097664, "learning_rate": 8.92681143619672e-06, "loss": 0.3450230360031128, "step": 8902 }, { "epoch": 1.6450554907553903, "grad_norm": 0.08785264939069748, "learning_rate": 8.924828063565585e-06, "loss": 0.7066122889518738, "step": 8903 }, { "epoch": 1.645240267464286, "grad_norm": 0.08250744640827179, "learning_rate": 8.92284473372293e-06, "loss": 0.6925334930419922, "step": 8904 }, { "epoch": 1.645425044173182, "grad_norm": 0.06766656786203384, "learning_rate": 8.920861446747685e-06, "loss": 0.2983948886394501, "step": 8905 }, { "epoch": 1.6456098208820777, "grad_norm": 0.07855965942144394, "learning_rate": 8.918878202718778e-06, "loss": 0.4852737486362457, "step": 8906 }, { "epoch": 1.6457945975909736, "grad_norm": 0.08639141917228699, "learning_rate": 8.916895001715134e-06, "loss": 0.614033043384552, "step": 8907 }, { "epoch": 1.6459793742998694, "grad_norm": 0.07663006335496902, "learning_rate": 8.914911843815682e-06, "loss": 0.43437522649765015, "step": 8908 }, { "epoch": 1.6461641510087652, "grad_norm": 0.07699207961559296, "learning_rate": 8.912928729099344e-06, "loss": 0.5516226887702942, "step": 8909 }, { "epoch": 1.646348927717661, "grad_norm": 0.07173573970794678, "learning_rate": 8.910945657645043e-06, "loss": 0.4079380929470062, "step": 8910 }, { "epoch": 1.6465337044265569, "grad_norm": 0.08935403823852539, "learning_rate": 8.908962629531695e-06, "loss": 0.5903975963592529, "step": 8911 }, { "epoch": 1.6467184811354527, "grad_norm": 0.07332302629947662, "learning_rate": 8.906979644838221e-06, "loss": 0.4380277097225189, "step": 8912 }, { "epoch": 1.6469032578443488, "grad_norm": 0.0666826069355011, "learning_rate": 8.904996703643542e-06, "loss": 0.33464354276657104, "step": 8913 }, { "epoch": 1.6470880345532446, "grad_norm": 0.06597012281417847, "learning_rate": 8.903013806026561e-06, "loss": 0.500480055809021, "step": 8914 }, { "epoch": 1.6472728112621404, "grad_norm": 0.1073843464255333, "learning_rate": 8.901030952066202e-06, "loss": 0.6993005871772766, "step": 8915 }, { "epoch": 1.6474575879710363, "grad_norm": 0.07667973637580872, "learning_rate": 8.899048141841376e-06, "loss": 0.5299189686775208, "step": 8916 }, { "epoch": 1.647642364679932, "grad_norm": 0.07186425477266312, "learning_rate": 8.897065375430987e-06, "loss": 0.41190212965011597, "step": 8917 }, { "epoch": 1.647827141388828, "grad_norm": 0.06618180125951767, "learning_rate": 8.895082652913943e-06, "loss": 0.37877678871154785, "step": 8918 }, { "epoch": 1.6480119180977237, "grad_norm": 0.07953804731369019, "learning_rate": 8.893099974369157e-06, "loss": 0.5013386011123657, "step": 8919 }, { "epoch": 1.6481966948066198, "grad_norm": 0.07290409505367279, "learning_rate": 8.891117339875526e-06, "loss": 0.49663516879081726, "step": 8920 }, { "epoch": 1.6483814715155156, "grad_norm": 0.09176815301179886, "learning_rate": 8.889134749511956e-06, "loss": 0.631584107875824, "step": 8921 }, { "epoch": 1.6485662482244114, "grad_norm": 0.0840664729475975, "learning_rate": 8.88715220335735e-06, "loss": 0.5021892189979553, "step": 8922 }, { "epoch": 1.6487510249333073, "grad_norm": 0.08249734342098236, "learning_rate": 8.8851697014906e-06, "loss": 0.5705278515815735, "step": 8923 }, { "epoch": 1.648935801642203, "grad_norm": 0.06764619797468185, "learning_rate": 8.88318724399061e-06, "loss": 0.392068475484848, "step": 8924 }, { "epoch": 1.649120578351099, "grad_norm": 0.09124281257390976, "learning_rate": 8.881204830936275e-06, "loss": 0.6002790927886963, "step": 8925 }, { "epoch": 1.6493053550599948, "grad_norm": 0.06591031700372696, "learning_rate": 8.879222462406485e-06, "loss": 0.4309958517551422, "step": 8926 }, { "epoch": 1.6494901317688906, "grad_norm": 0.0704890787601471, "learning_rate": 8.877240138480139e-06, "loss": 0.42929908633232117, "step": 8927 }, { "epoch": 1.6496749084777864, "grad_norm": 0.060363683849573135, "learning_rate": 8.875257859236119e-06, "loss": 0.4333549737930298, "step": 8928 }, { "epoch": 1.6498596851866822, "grad_norm": 0.08272654563188553, "learning_rate": 8.873275624753316e-06, "loss": 0.49419546127319336, "step": 8929 }, { "epoch": 1.650044461895578, "grad_norm": 0.07461053133010864, "learning_rate": 8.871293435110623e-06, "loss": 0.5192112922668457, "step": 8930 }, { "epoch": 1.650229238604474, "grad_norm": 0.0803760215640068, "learning_rate": 8.869311290386916e-06, "loss": 0.49166378378868103, "step": 8931 }, { "epoch": 1.6504140153133697, "grad_norm": 0.06983582675457001, "learning_rate": 8.867329190661082e-06, "loss": 0.4300093352794647, "step": 8932 }, { "epoch": 1.6505987920222656, "grad_norm": 0.05998978391289711, "learning_rate": 8.865347136012009e-06, "loss": 0.41076162457466125, "step": 8933 }, { "epoch": 1.6507835687311614, "grad_norm": 0.07758557796478271, "learning_rate": 8.863365126518562e-06, "loss": 0.488598495721817, "step": 8934 }, { "epoch": 1.6509683454400572, "grad_norm": 0.06825264543294907, "learning_rate": 8.861383162259628e-06, "loss": 0.43793922662734985, "step": 8935 }, { "epoch": 1.651153122148953, "grad_norm": 0.07090115547180176, "learning_rate": 8.859401243314088e-06, "loss": 0.4335429072380066, "step": 8936 }, { "epoch": 1.6513378988578489, "grad_norm": 0.07850585877895355, "learning_rate": 8.857419369760806e-06, "loss": 0.47743913531303406, "step": 8937 }, { "epoch": 1.6515226755667447, "grad_norm": 0.07450441271066666, "learning_rate": 8.855437541678655e-06, "loss": 0.4250149130821228, "step": 8938 }, { "epoch": 1.6517074522756405, "grad_norm": 0.08737654983997345, "learning_rate": 8.853455759146516e-06, "loss": 0.6291829347610474, "step": 8939 }, { "epoch": 1.6518922289845364, "grad_norm": 0.058018364012241364, "learning_rate": 8.851474022243247e-06, "loss": 0.3619850277900696, "step": 8940 }, { "epoch": 1.6520770056934322, "grad_norm": 0.07415791600942612, "learning_rate": 8.849492331047718e-06, "loss": 0.5303621888160706, "step": 8941 }, { "epoch": 1.6522617824023282, "grad_norm": 0.06933600455522537, "learning_rate": 8.847510685638797e-06, "loss": 0.4177916646003723, "step": 8942 }, { "epoch": 1.652446559111224, "grad_norm": 0.08505621552467346, "learning_rate": 8.845529086095342e-06, "loss": 0.5294852256774902, "step": 8943 }, { "epoch": 1.65263133582012, "grad_norm": 0.10572239756584167, "learning_rate": 8.84354753249622e-06, "loss": 0.7183526158332825, "step": 8944 }, { "epoch": 1.6528161125290157, "grad_norm": 0.06564278900623322, "learning_rate": 8.841566024920286e-06, "loss": 0.34837156534194946, "step": 8945 }, { "epoch": 1.6530008892379116, "grad_norm": 0.09466851502656937, "learning_rate": 8.8395845634464e-06, "loss": 0.5224567651748657, "step": 8946 }, { "epoch": 1.6531856659468074, "grad_norm": 0.08879232406616211, "learning_rate": 8.837603148153421e-06, "loss": 0.6343720555305481, "step": 8947 }, { "epoch": 1.6533704426557032, "grad_norm": 0.0756058394908905, "learning_rate": 8.835621779120197e-06, "loss": 0.48447737097740173, "step": 8948 }, { "epoch": 1.653555219364599, "grad_norm": 0.0774824246764183, "learning_rate": 8.833640456425583e-06, "loss": 0.5902532339096069, "step": 8949 }, { "epoch": 1.653739996073495, "grad_norm": 0.07572329789400101, "learning_rate": 8.831659180148433e-06, "loss": 0.5073521137237549, "step": 8950 }, { "epoch": 1.653924772782391, "grad_norm": 0.0775173082947731, "learning_rate": 8.829677950367589e-06, "loss": 0.4409608244895935, "step": 8951 }, { "epoch": 1.6541095494912867, "grad_norm": 0.09347783029079437, "learning_rate": 8.827696767161902e-06, "loss": 0.5821473598480225, "step": 8952 }, { "epoch": 1.6542943262001826, "grad_norm": 0.08198609203100204, "learning_rate": 8.82571563061022e-06, "loss": 0.5458663702011108, "step": 8953 }, { "epoch": 1.6544791029090784, "grad_norm": 0.08065678924322128, "learning_rate": 8.823734540791375e-06, "loss": 0.451998233795166, "step": 8954 }, { "epoch": 1.6546638796179742, "grad_norm": 0.08799226582050323, "learning_rate": 8.821753497784218e-06, "loss": 0.5489275455474854, "step": 8955 }, { "epoch": 1.65484865632687, "grad_norm": 0.07855572551488876, "learning_rate": 8.81977250166759e-06, "loss": 0.5080621838569641, "step": 8956 }, { "epoch": 1.6550334330357659, "grad_norm": 0.07618331909179688, "learning_rate": 8.817791552520319e-06, "loss": 0.7386259436607361, "step": 8957 }, { "epoch": 1.6552182097446617, "grad_norm": 0.07988817989826202, "learning_rate": 8.815810650421249e-06, "loss": 0.5634803175926208, "step": 8958 }, { "epoch": 1.6554029864535575, "grad_norm": 0.0562705397605896, "learning_rate": 8.813829795449206e-06, "loss": 0.3609471917152405, "step": 8959 }, { "epoch": 1.6555877631624534, "grad_norm": 0.09556792676448822, "learning_rate": 8.811848987683028e-06, "loss": 0.6133294701576233, "step": 8960 }, { "epoch": 1.6557725398713492, "grad_norm": 0.09546475857496262, "learning_rate": 8.809868227201546e-06, "loss": 0.7730337977409363, "step": 8961 }, { "epoch": 1.655957316580245, "grad_norm": 0.06718320399522781, "learning_rate": 8.807887514083581e-06, "loss": 0.34548744559288025, "step": 8962 }, { "epoch": 1.6561420932891409, "grad_norm": 0.07118339091539383, "learning_rate": 8.805906848407964e-06, "loss": 0.45795369148254395, "step": 8963 }, { "epoch": 1.6563268699980367, "grad_norm": 0.06916230171918869, "learning_rate": 8.80392623025352e-06, "loss": 0.45204415917396545, "step": 8964 }, { "epoch": 1.6565116467069325, "grad_norm": 0.07920881360769272, "learning_rate": 8.801945659699067e-06, "loss": 0.43370485305786133, "step": 8965 }, { "epoch": 1.6566964234158283, "grad_norm": 0.08380249887704849, "learning_rate": 8.799965136823432e-06, "loss": 0.5766981244087219, "step": 8966 }, { "epoch": 1.6568812001247242, "grad_norm": 0.09630127996206284, "learning_rate": 8.79798466170543e-06, "loss": 0.7253932952880859, "step": 8967 }, { "epoch": 1.65706597683362, "grad_norm": 0.07304397970438004, "learning_rate": 8.796004234423876e-06, "loss": 0.5263710021972656, "step": 8968 }, { "epoch": 1.6572507535425158, "grad_norm": 0.0875425785779953, "learning_rate": 8.794023855057587e-06, "loss": 0.5486422777175903, "step": 8969 }, { "epoch": 1.6574355302514117, "grad_norm": 0.05754028633236885, "learning_rate": 8.792043523685376e-06, "loss": 0.35505831241607666, "step": 8970 }, { "epoch": 1.6576203069603075, "grad_norm": 0.09025000035762787, "learning_rate": 8.790063240386053e-06, "loss": 0.7122763395309448, "step": 8971 }, { "epoch": 1.6578050836692035, "grad_norm": 0.08361772447824478, "learning_rate": 8.788083005238428e-06, "loss": 0.5144809484481812, "step": 8972 }, { "epoch": 1.6579898603780994, "grad_norm": 0.07230621576309204, "learning_rate": 8.786102818321311e-06, "loss": 0.47324246168136597, "step": 8973 }, { "epoch": 1.6581746370869952, "grad_norm": 0.07307907938957214, "learning_rate": 8.784122679713497e-06, "loss": 0.4546387493610382, "step": 8974 }, { "epoch": 1.658359413795891, "grad_norm": 0.07837999612092972, "learning_rate": 8.782142589493805e-06, "loss": 0.5022808313369751, "step": 8975 }, { "epoch": 1.6585441905047869, "grad_norm": 0.09872845560312271, "learning_rate": 8.780162547741022e-06, "loss": 0.5782057046890259, "step": 8976 }, { "epoch": 1.6587289672136827, "grad_norm": 0.09069226682186127, "learning_rate": 8.778182554533952e-06, "loss": 0.6260485053062439, "step": 8977 }, { "epoch": 1.6589137439225785, "grad_norm": 0.08357825875282288, "learning_rate": 8.776202609951398e-06, "loss": 0.6425731778144836, "step": 8978 }, { "epoch": 1.6590985206314746, "grad_norm": 0.07038763910531998, "learning_rate": 8.77422271407215e-06, "loss": 0.48733213543891907, "step": 8979 }, { "epoch": 1.6592832973403704, "grad_norm": 0.0719500184059143, "learning_rate": 8.772242866975e-06, "loss": 0.44783756136894226, "step": 8980 }, { "epoch": 1.6594680740492662, "grad_norm": 0.09439882636070251, "learning_rate": 8.770263068738747e-06, "loss": 0.595038652420044, "step": 8981 }, { "epoch": 1.659652850758162, "grad_norm": 0.06088631972670555, "learning_rate": 8.768283319442173e-06, "loss": 0.4202379882335663, "step": 8982 }, { "epoch": 1.6598376274670579, "grad_norm": 0.09072203934192657, "learning_rate": 8.76630361916407e-06, "loss": 0.5772908926010132, "step": 8983 }, { "epoch": 1.6600224041759537, "grad_norm": 0.0732671394944191, "learning_rate": 8.764323967983226e-06, "loss": 0.4386714696884155, "step": 8984 }, { "epoch": 1.6602071808848495, "grad_norm": 0.08566321432590485, "learning_rate": 8.76234436597842e-06, "loss": 0.5181201696395874, "step": 8985 }, { "epoch": 1.6603919575937454, "grad_norm": 0.07344254106283188, "learning_rate": 8.760364813228436e-06, "loss": 0.5022342205047607, "step": 8986 }, { "epoch": 1.6605767343026412, "grad_norm": 0.08056008815765381, "learning_rate": 8.758385309812055e-06, "loss": 0.6142107248306274, "step": 8987 }, { "epoch": 1.660761511011537, "grad_norm": 0.08112746477127075, "learning_rate": 8.756405855808053e-06, "loss": 0.5439735651016235, "step": 8988 }, { "epoch": 1.6609462877204328, "grad_norm": 0.08961106836795807, "learning_rate": 8.754426451295207e-06, "loss": 0.6286073327064514, "step": 8989 }, { "epoch": 1.6611310644293287, "grad_norm": 0.07029113173484802, "learning_rate": 8.752447096352295e-06, "loss": 0.36603888869285583, "step": 8990 }, { "epoch": 1.6613158411382245, "grad_norm": 0.10147112607955933, "learning_rate": 8.750467791058081e-06, "loss": 0.743262767791748, "step": 8991 }, { "epoch": 1.6615006178471203, "grad_norm": 0.06966464221477509, "learning_rate": 8.748488535491346e-06, "loss": 0.48661547899246216, "step": 8992 }, { "epoch": 1.6616853945560162, "grad_norm": 0.06567555665969849, "learning_rate": 8.746509329730846e-06, "loss": 0.43307769298553467, "step": 8993 }, { "epoch": 1.661870171264912, "grad_norm": 0.08893869817256927, "learning_rate": 8.74453017385535e-06, "loss": 0.4946390688419342, "step": 8994 }, { "epoch": 1.6620549479738078, "grad_norm": 0.07407506555318832, "learning_rate": 8.742551067943632e-06, "loss": 0.5503557324409485, "step": 8995 }, { "epoch": 1.6622397246827036, "grad_norm": 0.10296279937028885, "learning_rate": 8.740572012074445e-06, "loss": 0.7889887094497681, "step": 8996 }, { "epoch": 1.6624245013915995, "grad_norm": 0.08157563209533691, "learning_rate": 8.73859300632655e-06, "loss": 0.5192692875862122, "step": 8997 }, { "epoch": 1.6626092781004953, "grad_norm": 0.08204268664121628, "learning_rate": 8.73661405077871e-06, "loss": 0.5423938035964966, "step": 8998 }, { "epoch": 1.6627940548093911, "grad_norm": 0.07997702807188034, "learning_rate": 8.734635145509676e-06, "loss": 0.4689732789993286, "step": 8999 }, { "epoch": 1.662978831518287, "grad_norm": 0.0857803151011467, "learning_rate": 8.732656290598205e-06, "loss": 0.3920707404613495, "step": 9000 }, { "epoch": 1.662978831518287, "eval_loss": 0.5690487623214722, "eval_runtime": 172.3601, "eval_samples_per_second": 105.761, "eval_steps_per_second": 13.222, "step": 9000 }, { "epoch": 1.663163608227183, "grad_norm": 0.08445625007152557, "learning_rate": 8.730677486123048e-06, "loss": 0.4855717718601227, "step": 9001 }, { "epoch": 1.6633483849360788, "grad_norm": 0.08252844959497452, "learning_rate": 8.728698732162956e-06, "loss": 0.5647783875465393, "step": 9002 }, { "epoch": 1.6635331616449747, "grad_norm": 0.08127132058143616, "learning_rate": 8.726720028796678e-06, "loss": 0.5227984189987183, "step": 9003 }, { "epoch": 1.6637179383538705, "grad_norm": 0.0665494054555893, "learning_rate": 8.72474137610296e-06, "loss": 0.41443580389022827, "step": 9004 }, { "epoch": 1.6639027150627663, "grad_norm": 0.05049487203359604, "learning_rate": 8.722762774160543e-06, "loss": 0.3106989562511444, "step": 9005 }, { "epoch": 1.6640874917716622, "grad_norm": 0.07193901389837265, "learning_rate": 8.720784223048175e-06, "loss": 0.43464213609695435, "step": 9006 }, { "epoch": 1.664272268480558, "grad_norm": 0.09168123453855515, "learning_rate": 8.71880572284459e-06, "loss": 0.6453408002853394, "step": 9007 }, { "epoch": 1.664457045189454, "grad_norm": 0.08098369836807251, "learning_rate": 8.716827273628528e-06, "loss": 0.5169015526771545, "step": 9008 }, { "epoch": 1.6646418218983499, "grad_norm": 0.08499372750520706, "learning_rate": 8.714848875478732e-06, "loss": 0.5493677258491516, "step": 9009 }, { "epoch": 1.6648265986072457, "grad_norm": 0.06951579451560974, "learning_rate": 8.712870528473922e-06, "loss": 0.46776798367500305, "step": 9010 }, { "epoch": 1.6650113753161415, "grad_norm": 0.10221091657876968, "learning_rate": 8.71089223269284e-06, "loss": 0.7205289602279663, "step": 9011 }, { "epoch": 1.6651961520250373, "grad_norm": 0.07168225944042206, "learning_rate": 8.70891398821422e-06, "loss": 0.44184955954551697, "step": 9012 }, { "epoch": 1.6653809287339332, "grad_norm": 0.06919576227664948, "learning_rate": 8.706935795116779e-06, "loss": 0.47983217239379883, "step": 9013 }, { "epoch": 1.665565705442829, "grad_norm": 0.08107378333806992, "learning_rate": 8.704957653479245e-06, "loss": 0.5134053230285645, "step": 9014 }, { "epoch": 1.6657504821517248, "grad_norm": 0.06921125948429108, "learning_rate": 8.702979563380352e-06, "loss": 0.37715357542037964, "step": 9015 }, { "epoch": 1.6659352588606207, "grad_norm": 0.0681595578789711, "learning_rate": 8.70100152489881e-06, "loss": 0.4451044797897339, "step": 9016 }, { "epoch": 1.6661200355695165, "grad_norm": 0.07533681392669678, "learning_rate": 8.69902353811334e-06, "loss": 0.5605936646461487, "step": 9017 }, { "epoch": 1.6663048122784123, "grad_norm": 0.08548444509506226, "learning_rate": 8.697045603102673e-06, "loss": 0.5757061839103699, "step": 9018 }, { "epoch": 1.6664895889873081, "grad_norm": 0.0741836205124855, "learning_rate": 8.695067719945505e-06, "loss": 0.49671831727027893, "step": 9019 }, { "epoch": 1.666674365696204, "grad_norm": 0.06897836923599243, "learning_rate": 8.693089888720563e-06, "loss": 0.39301538467407227, "step": 9020 }, { "epoch": 1.6668591424050998, "grad_norm": 0.07714451104402542, "learning_rate": 8.691112109506556e-06, "loss": 0.4285025894641876, "step": 9021 }, { "epoch": 1.6670439191139956, "grad_norm": 0.06746784597635269, "learning_rate": 8.68913438238219e-06, "loss": 0.37637728452682495, "step": 9022 }, { "epoch": 1.6672286958228915, "grad_norm": 0.07533597201108932, "learning_rate": 8.687156707426175e-06, "loss": 0.5479862093925476, "step": 9023 }, { "epoch": 1.6674134725317873, "grad_norm": 0.10356532037258148, "learning_rate": 8.685179084717215e-06, "loss": 0.754237949848175, "step": 9024 }, { "epoch": 1.6675982492406831, "grad_norm": 0.08410041034221649, "learning_rate": 8.683201514334013e-06, "loss": 0.5808772444725037, "step": 9025 }, { "epoch": 1.667783025949579, "grad_norm": 0.08972388505935669, "learning_rate": 8.681223996355275e-06, "loss": 0.6561397910118103, "step": 9026 }, { "epoch": 1.6679678026584748, "grad_norm": 0.08572930097579956, "learning_rate": 8.679246530859693e-06, "loss": 0.7714314460754395, "step": 9027 }, { "epoch": 1.6681525793673706, "grad_norm": 0.07372952252626419, "learning_rate": 8.677269117925964e-06, "loss": 0.45184555649757385, "step": 9028 }, { "epoch": 1.6683373560762664, "grad_norm": 0.059556107968091965, "learning_rate": 8.675291757632794e-06, "loss": 0.32072627544403076, "step": 9029 }, { "epoch": 1.6685221327851625, "grad_norm": 0.0691826194524765, "learning_rate": 8.67331445005886e-06, "loss": 0.42070019245147705, "step": 9030 }, { "epoch": 1.6687069094940583, "grad_norm": 0.0846749022603035, "learning_rate": 8.671337195282862e-06, "loss": 0.5196872353553772, "step": 9031 }, { "epoch": 1.6688916862029541, "grad_norm": 0.0776742622256279, "learning_rate": 8.669359993383491e-06, "loss": 0.6171072721481323, "step": 9032 }, { "epoch": 1.66907646291185, "grad_norm": 0.09349127113819122, "learning_rate": 8.667382844439424e-06, "loss": 0.709235668182373, "step": 9033 }, { "epoch": 1.6692612396207458, "grad_norm": 0.0716007873415947, "learning_rate": 8.66540574852935e-06, "loss": 0.45914164185523987, "step": 9034 }, { "epoch": 1.6694460163296416, "grad_norm": 0.05829813703894615, "learning_rate": 8.663428705731957e-06, "loss": 0.36543506383895874, "step": 9035 }, { "epoch": 1.6696307930385375, "grad_norm": 0.061826031655073166, "learning_rate": 8.661451716125917e-06, "loss": 0.35469627380371094, "step": 9036 }, { "epoch": 1.6698155697474333, "grad_norm": 0.07189781963825226, "learning_rate": 8.65947477978991e-06, "loss": 0.38797852396965027, "step": 9037 }, { "epoch": 1.6700003464563293, "grad_norm": 0.07448961585760117, "learning_rate": 8.657497896802616e-06, "loss": 0.49736249446868896, "step": 9038 }, { "epoch": 1.6701851231652252, "grad_norm": 0.0732865110039711, "learning_rate": 8.655521067242703e-06, "loss": 0.41947612166404724, "step": 9039 }, { "epoch": 1.670369899874121, "grad_norm": 0.08115722984075546, "learning_rate": 8.653544291188846e-06, "loss": 0.5365369319915771, "step": 9040 }, { "epoch": 1.6705546765830168, "grad_norm": 0.07986792922019958, "learning_rate": 8.651567568719713e-06, "loss": 0.4790862500667572, "step": 9041 }, { "epoch": 1.6707394532919126, "grad_norm": 0.08238638937473297, "learning_rate": 8.649590899913972e-06, "loss": 0.5070688128471375, "step": 9042 }, { "epoch": 1.6709242300008085, "grad_norm": 0.08489910513162613, "learning_rate": 8.64761428485029e-06, "loss": 0.5985916256904602, "step": 9043 }, { "epoch": 1.6711090067097043, "grad_norm": 0.08574743568897247, "learning_rate": 8.645637723607326e-06, "loss": 0.45888751745224, "step": 9044 }, { "epoch": 1.6712937834186001, "grad_norm": 0.0878172218799591, "learning_rate": 8.643661216263744e-06, "loss": 0.5856870412826538, "step": 9045 }, { "epoch": 1.671478560127496, "grad_norm": 0.07768607884645462, "learning_rate": 8.641684762898203e-06, "loss": 0.5130709409713745, "step": 9046 }, { "epoch": 1.6716633368363918, "grad_norm": 0.07102810591459274, "learning_rate": 8.639708363589358e-06, "loss": 0.49262791872024536, "step": 9047 }, { "epoch": 1.6718481135452876, "grad_norm": 0.0737321674823761, "learning_rate": 8.637732018415865e-06, "loss": 0.5094331502914429, "step": 9048 }, { "epoch": 1.6720328902541834, "grad_norm": 0.070593923330307, "learning_rate": 8.63575572745638e-06, "loss": 0.45176151394844055, "step": 9049 }, { "epoch": 1.6722176669630793, "grad_norm": 0.08042187243700027, "learning_rate": 8.63377949078954e-06, "loss": 0.4876258671283722, "step": 9050 }, { "epoch": 1.672402443671975, "grad_norm": 0.06371616572141647, "learning_rate": 8.631803308494005e-06, "loss": 0.39658114314079285, "step": 9051 }, { "epoch": 1.672587220380871, "grad_norm": 0.07558796554803848, "learning_rate": 8.629827180648423e-06, "loss": 0.5124465227127075, "step": 9052 }, { "epoch": 1.6727719970897668, "grad_norm": 0.08541289716959, "learning_rate": 8.627851107331426e-06, "loss": 0.4782090485095978, "step": 9053 }, { "epoch": 1.6729567737986626, "grad_norm": 0.0727166086435318, "learning_rate": 8.625875088621662e-06, "loss": 0.5052157640457153, "step": 9054 }, { "epoch": 1.6731415505075584, "grad_norm": 0.09100605547428131, "learning_rate": 8.623899124597777e-06, "loss": 0.48215600848197937, "step": 9055 }, { "epoch": 1.6733263272164542, "grad_norm": 0.10287399590015411, "learning_rate": 8.621923215338397e-06, "loss": 0.6642863154411316, "step": 9056 }, { "epoch": 1.67351110392535, "grad_norm": 0.08276277035474777, "learning_rate": 8.619947360922162e-06, "loss": 0.5098517537117004, "step": 9057 }, { "epoch": 1.673695880634246, "grad_norm": 0.0931604877114296, "learning_rate": 8.617971561427705e-06, "loss": 0.5521177053451538, "step": 9058 }, { "epoch": 1.6738806573431417, "grad_norm": 0.0651213601231575, "learning_rate": 8.615995816933655e-06, "loss": 0.3899434506893158, "step": 9059 }, { "epoch": 1.6740654340520378, "grad_norm": 0.07249141484498978, "learning_rate": 8.614020127518642e-06, "loss": 0.4754531979560852, "step": 9060 }, { "epoch": 1.6742502107609336, "grad_norm": 0.10817329585552216, "learning_rate": 8.61204449326129e-06, "loss": 0.7253162264823914, "step": 9061 }, { "epoch": 1.6744349874698294, "grad_norm": 0.09194529801607132, "learning_rate": 8.610068914240227e-06, "loss": 0.6241350769996643, "step": 9062 }, { "epoch": 1.6746197641787253, "grad_norm": 0.09278565645217896, "learning_rate": 8.608093390534074e-06, "loss": 0.6508000493049622, "step": 9063 }, { "epoch": 1.674804540887621, "grad_norm": 0.08632796257734299, "learning_rate": 8.606117922221447e-06, "loss": 0.5592050552368164, "step": 9064 }, { "epoch": 1.674989317596517, "grad_norm": 0.07858537137508392, "learning_rate": 8.604142509380967e-06, "loss": 0.6045482754707336, "step": 9065 }, { "epoch": 1.6751740943054128, "grad_norm": 0.07718654721975327, "learning_rate": 8.602167152091247e-06, "loss": 0.4939536154270172, "step": 9066 }, { "epoch": 1.6753588710143088, "grad_norm": 0.0820227712392807, "learning_rate": 8.600191850430901e-06, "loss": 0.4896371364593506, "step": 9067 }, { "epoch": 1.6755436477232046, "grad_norm": 0.08000557869672775, "learning_rate": 8.59821660447854e-06, "loss": 0.48228392004966736, "step": 9068 }, { "epoch": 1.6757284244321005, "grad_norm": 0.07113906741142273, "learning_rate": 8.596241414312776e-06, "loss": 0.5333797931671143, "step": 9069 }, { "epoch": 1.6759132011409963, "grad_norm": 0.08210809528827667, "learning_rate": 8.594266280012206e-06, "loss": 0.6667137145996094, "step": 9070 }, { "epoch": 1.6760979778498921, "grad_norm": 0.07185039669275284, "learning_rate": 8.592291201655446e-06, "loss": 0.5223159193992615, "step": 9071 }, { "epoch": 1.676282754558788, "grad_norm": 0.06653635203838348, "learning_rate": 8.590316179321088e-06, "loss": 0.38789844512939453, "step": 9072 }, { "epoch": 1.6764675312676838, "grad_norm": 0.06960821896791458, "learning_rate": 8.588341213087734e-06, "loss": 0.5253861546516418, "step": 9073 }, { "epoch": 1.6766523079765796, "grad_norm": 0.05516147240996361, "learning_rate": 8.586366303033989e-06, "loss": 0.3092251121997833, "step": 9074 }, { "epoch": 1.6768370846854754, "grad_norm": 0.10345713794231415, "learning_rate": 8.584391449238439e-06, "loss": 0.6506808996200562, "step": 9075 }, { "epoch": 1.6770218613943713, "grad_norm": 0.06929323077201843, "learning_rate": 8.58241665177968e-06, "loss": 0.4382984936237335, "step": 9076 }, { "epoch": 1.677206638103267, "grad_norm": 0.0668279156088829, "learning_rate": 8.580441910736305e-06, "loss": 0.4805983901023865, "step": 9077 }, { "epoch": 1.677391414812163, "grad_norm": 0.079178586602211, "learning_rate": 8.5784672261869e-06, "loss": 0.5188418626785278, "step": 9078 }, { "epoch": 1.6775761915210587, "grad_norm": 0.07111608237028122, "learning_rate": 8.57649259821005e-06, "loss": 0.46800345182418823, "step": 9079 }, { "epoch": 1.6777609682299546, "grad_norm": 0.07276073843240738, "learning_rate": 8.574518026884345e-06, "loss": 0.36915266513824463, "step": 9080 }, { "epoch": 1.6779457449388504, "grad_norm": 0.07534977793693542, "learning_rate": 8.57254351228836e-06, "loss": 0.4622310400009155, "step": 9081 }, { "epoch": 1.6781305216477462, "grad_norm": 0.06693067401647568, "learning_rate": 8.570569054500676e-06, "loss": 0.4623390734195709, "step": 9082 }, { "epoch": 1.678315298356642, "grad_norm": 0.07008402049541473, "learning_rate": 8.568594653599875e-06, "loss": 0.3596046268939972, "step": 9083 }, { "epoch": 1.6785000750655379, "grad_norm": 0.06442760676145554, "learning_rate": 8.566620309664525e-06, "loss": 0.4336586594581604, "step": 9084 }, { "epoch": 1.6786848517744337, "grad_norm": 0.0926290825009346, "learning_rate": 8.564646022773204e-06, "loss": 0.6721182465553284, "step": 9085 }, { "epoch": 1.6788696284833295, "grad_norm": 0.08583901077508926, "learning_rate": 8.562671793004482e-06, "loss": 0.4880591630935669, "step": 9086 }, { "epoch": 1.6790544051922254, "grad_norm": 0.08112744241952896, "learning_rate": 8.560697620436924e-06, "loss": 0.559378445148468, "step": 9087 }, { "epoch": 1.6792391819011212, "grad_norm": 0.06255873292684555, "learning_rate": 8.5587235051491e-06, "loss": 0.4209061861038208, "step": 9088 }, { "epoch": 1.6794239586100173, "grad_norm": 0.07908415049314499, "learning_rate": 8.55674944721957e-06, "loss": 0.48501768708229065, "step": 9089 }, { "epoch": 1.679608735318913, "grad_norm": 0.09763891249895096, "learning_rate": 8.554775446726892e-06, "loss": 0.6048413515090942, "step": 9090 }, { "epoch": 1.679793512027809, "grad_norm": 0.08248091489076614, "learning_rate": 8.552801503749638e-06, "loss": 0.5712785720825195, "step": 9091 }, { "epoch": 1.6799782887367047, "grad_norm": 0.09981803596019745, "learning_rate": 8.550827618366352e-06, "loss": 0.5898700952529907, "step": 9092 }, { "epoch": 1.6801630654456006, "grad_norm": 0.07063844799995422, "learning_rate": 8.54885379065559e-06, "loss": 0.4967755675315857, "step": 9093 }, { "epoch": 1.6803478421544964, "grad_norm": 0.05967190861701965, "learning_rate": 8.546880020695913e-06, "loss": 0.481548935174942, "step": 9094 }, { "epoch": 1.6805326188633922, "grad_norm": 0.07140645384788513, "learning_rate": 8.544906308565861e-06, "loss": 0.5011531710624695, "step": 9095 }, { "epoch": 1.6807173955722883, "grad_norm": 0.055560152977705, "learning_rate": 8.542932654343987e-06, "loss": 0.3030966520309448, "step": 9096 }, { "epoch": 1.680902172281184, "grad_norm": 0.0700981393456459, "learning_rate": 8.540959058108835e-06, "loss": 0.47643208503723145, "step": 9097 }, { "epoch": 1.68108694899008, "grad_norm": 0.0505668930709362, "learning_rate": 8.538985519938947e-06, "loss": 0.30652886629104614, "step": 9098 }, { "epoch": 1.6812717256989758, "grad_norm": 0.07859101891517639, "learning_rate": 8.537012039912864e-06, "loss": 0.450094074010849, "step": 9099 }, { "epoch": 1.6814565024078716, "grad_norm": 0.06869371980428696, "learning_rate": 8.535038618109126e-06, "loss": 0.36125314235687256, "step": 9100 }, { "epoch": 1.6816412791167674, "grad_norm": 0.08045898377895355, "learning_rate": 8.533065254606266e-06, "loss": 0.5351243615150452, "step": 9101 }, { "epoch": 1.6818260558256632, "grad_norm": 0.0728968009352684, "learning_rate": 8.53109194948282e-06, "loss": 0.4623984098434448, "step": 9102 }, { "epoch": 1.682010832534559, "grad_norm": 0.06779941916465759, "learning_rate": 8.529118702817321e-06, "loss": 0.47451716661453247, "step": 9103 }, { "epoch": 1.682195609243455, "grad_norm": 0.06614118814468384, "learning_rate": 8.527145514688293e-06, "loss": 0.4593086242675781, "step": 9104 }, { "epoch": 1.6823803859523507, "grad_norm": 0.06442172080278397, "learning_rate": 8.525172385174271e-06, "loss": 0.31958237290382385, "step": 9105 }, { "epoch": 1.6825651626612466, "grad_norm": 0.08403867483139038, "learning_rate": 8.523199314353767e-06, "loss": 0.5540264844894409, "step": 9106 }, { "epoch": 1.6827499393701424, "grad_norm": 0.08076582103967667, "learning_rate": 8.52122630230531e-06, "loss": 0.6045275330543518, "step": 9107 }, { "epoch": 1.6829347160790382, "grad_norm": 0.0629241019487381, "learning_rate": 8.519253349107426e-06, "loss": 0.43549901247024536, "step": 9108 }, { "epoch": 1.683119492787934, "grad_norm": 0.06979610025882721, "learning_rate": 8.51728045483862e-06, "loss": 0.46166253089904785, "step": 9109 }, { "epoch": 1.6833042694968299, "grad_norm": 0.09503830224275589, "learning_rate": 8.515307619577415e-06, "loss": 0.5249154567718506, "step": 9110 }, { "epoch": 1.6834890462057257, "grad_norm": 0.10128334164619446, "learning_rate": 8.513334843402325e-06, "loss": 0.6709297299385071, "step": 9111 }, { "epoch": 1.6836738229146215, "grad_norm": 0.08861662447452545, "learning_rate": 8.511362126391853e-06, "loss": 0.5401449203491211, "step": 9112 }, { "epoch": 1.6838585996235174, "grad_norm": 0.06866878271102905, "learning_rate": 8.509389468624509e-06, "loss": 0.50966477394104, "step": 9113 }, { "epoch": 1.6840433763324132, "grad_norm": 0.06207570061087608, "learning_rate": 8.507416870178807e-06, "loss": 0.4012282192707062, "step": 9114 }, { "epoch": 1.684228153041309, "grad_norm": 0.0821419358253479, "learning_rate": 8.50544433113324e-06, "loss": 0.47681760787963867, "step": 9115 }, { "epoch": 1.6844129297502048, "grad_norm": 0.0714946985244751, "learning_rate": 8.503471851566313e-06, "loss": 0.5043104887008667, "step": 9116 }, { "epoch": 1.6845977064591007, "grad_norm": 0.0466858446598053, "learning_rate": 8.501499431556526e-06, "loss": 0.30423155426979065, "step": 9117 }, { "epoch": 1.6847824831679967, "grad_norm": 0.07633737474679947, "learning_rate": 8.499527071182371e-06, "loss": 0.5213798880577087, "step": 9118 }, { "epoch": 1.6849672598768926, "grad_norm": 0.07348579913377762, "learning_rate": 8.497554770522346e-06, "loss": 0.43600961565971375, "step": 9119 }, { "epoch": 1.6851520365857884, "grad_norm": 0.07686792314052582, "learning_rate": 8.49558252965494e-06, "loss": 0.4652104377746582, "step": 9120 }, { "epoch": 1.6853368132946842, "grad_norm": 0.07115445286035538, "learning_rate": 8.493610348658641e-06, "loss": 0.44471052289009094, "step": 9121 }, { "epoch": 1.68552159000358, "grad_norm": 0.08396821469068527, "learning_rate": 8.49163822761194e-06, "loss": 0.5738164782524109, "step": 9122 }, { "epoch": 1.6857063667124759, "grad_norm": 0.07715192437171936, "learning_rate": 8.489666166593317e-06, "loss": 0.4296110272407532, "step": 9123 }, { "epoch": 1.6858911434213717, "grad_norm": 0.09075894951820374, "learning_rate": 8.487694165681254e-06, "loss": 0.6163529753684998, "step": 9124 }, { "epoch": 1.6860759201302675, "grad_norm": 0.08199284225702286, "learning_rate": 8.485722224954237e-06, "loss": 0.5180081129074097, "step": 9125 }, { "epoch": 1.6862606968391636, "grad_norm": 0.06685285270214081, "learning_rate": 8.48375034449073e-06, "loss": 0.36368563771247864, "step": 9126 }, { "epoch": 1.6864454735480594, "grad_norm": 0.0648450031876564, "learning_rate": 8.48177852436922e-06, "loss": 0.34765690565109253, "step": 9127 }, { "epoch": 1.6866302502569552, "grad_norm": 0.06646620482206345, "learning_rate": 8.479806764668178e-06, "loss": 0.4463668763637543, "step": 9128 }, { "epoch": 1.686815026965851, "grad_norm": 0.06785228103399277, "learning_rate": 8.477835065466065e-06, "loss": 0.42032456398010254, "step": 9129 }, { "epoch": 1.686999803674747, "grad_norm": 0.08265707641839981, "learning_rate": 8.475863426841356e-06, "loss": 0.6559526920318604, "step": 9130 }, { "epoch": 1.6871845803836427, "grad_norm": 0.0722656324505806, "learning_rate": 8.473891848872517e-06, "loss": 0.4738600254058838, "step": 9131 }, { "epoch": 1.6873693570925385, "grad_norm": 0.07115411013364792, "learning_rate": 8.471920331638004e-06, "loss": 0.3999943137168884, "step": 9132 }, { "epoch": 1.6875541338014344, "grad_norm": 0.06939396262168884, "learning_rate": 8.469948875216281e-06, "loss": 0.42091649770736694, "step": 9133 }, { "epoch": 1.6877389105103302, "grad_norm": 0.07250919193029404, "learning_rate": 8.46797747968581e-06, "loss": 0.4605046808719635, "step": 9134 }, { "epoch": 1.687923687219226, "grad_norm": 0.07542398571968079, "learning_rate": 8.466006145125038e-06, "loss": 0.42565053701400757, "step": 9135 }, { "epoch": 1.6881084639281219, "grad_norm": 0.08695194125175476, "learning_rate": 8.464034871612426e-06, "loss": 0.6231099367141724, "step": 9136 }, { "epoch": 1.6882932406370177, "grad_norm": 0.08473557233810425, "learning_rate": 8.462063659226419e-06, "loss": 0.5846316814422607, "step": 9137 }, { "epoch": 1.6884780173459135, "grad_norm": 0.07632940262556076, "learning_rate": 8.460092508045465e-06, "loss": 0.4694315493106842, "step": 9138 }, { "epoch": 1.6886627940548093, "grad_norm": 0.07590238004922867, "learning_rate": 8.458121418148013e-06, "loss": 0.48024001717567444, "step": 9139 }, { "epoch": 1.6888475707637052, "grad_norm": 0.07789980620145798, "learning_rate": 8.456150389612503e-06, "loss": 0.5048647522926331, "step": 9140 }, { "epoch": 1.689032347472601, "grad_norm": 0.08727554976940155, "learning_rate": 8.454179422517378e-06, "loss": 0.5586506724357605, "step": 9141 }, { "epoch": 1.6892171241814968, "grad_norm": 0.06976776570081711, "learning_rate": 8.452208516941079e-06, "loss": 0.3690648078918457, "step": 9142 }, { "epoch": 1.6894019008903927, "grad_norm": 0.07651933282613754, "learning_rate": 8.450237672962034e-06, "loss": 0.580231785774231, "step": 9143 }, { "epoch": 1.6895866775992885, "grad_norm": 0.0674920603632927, "learning_rate": 8.448266890658683e-06, "loss": 0.368477463722229, "step": 9144 }, { "epoch": 1.6897714543081843, "grad_norm": 0.06983933597803116, "learning_rate": 8.446296170109456e-06, "loss": 0.4283688962459564, "step": 9145 }, { "epoch": 1.6899562310170801, "grad_norm": 0.08610700815916061, "learning_rate": 8.444325511392779e-06, "loss": 0.5489993095397949, "step": 9146 }, { "epoch": 1.690141007725976, "grad_norm": 0.08241380751132965, "learning_rate": 8.442354914587079e-06, "loss": 0.5156794190406799, "step": 9147 }, { "epoch": 1.690325784434872, "grad_norm": 0.09464959055185318, "learning_rate": 8.440384379770785e-06, "loss": 0.7287381887435913, "step": 9148 }, { "epoch": 1.6905105611437679, "grad_norm": 0.10081733018159866, "learning_rate": 8.438413907022307e-06, "loss": 0.624424397945404, "step": 9149 }, { "epoch": 1.6906953378526637, "grad_norm": 0.07631852477788925, "learning_rate": 8.436443496420071e-06, "loss": 0.5279861092567444, "step": 9150 }, { "epoch": 1.6908801145615595, "grad_norm": 0.07864688336849213, "learning_rate": 8.434473148042497e-06, "loss": 0.5660857558250427, "step": 9151 }, { "epoch": 1.6910648912704553, "grad_norm": 0.06365436315536499, "learning_rate": 8.432502861967991e-06, "loss": 0.40238404273986816, "step": 9152 }, { "epoch": 1.6912496679793512, "grad_norm": 0.06593908369541168, "learning_rate": 8.430532638274966e-06, "loss": 0.4057481586933136, "step": 9153 }, { "epoch": 1.691434444688247, "grad_norm": 0.0649256780743599, "learning_rate": 8.428562477041833e-06, "loss": 0.3830692768096924, "step": 9154 }, { "epoch": 1.691619221397143, "grad_norm": 0.07413194328546524, "learning_rate": 8.426592378346995e-06, "loss": 0.5539273023605347, "step": 9155 }, { "epoch": 1.6918039981060389, "grad_norm": 0.07905510812997818, "learning_rate": 8.42462234226886e-06, "loss": 0.6951673626899719, "step": 9156 }, { "epoch": 1.6919887748149347, "grad_norm": 0.08952132612466812, "learning_rate": 8.422652368885825e-06, "loss": 0.5128817558288574, "step": 9157 }, { "epoch": 1.6921735515238305, "grad_norm": 0.07787581533193588, "learning_rate": 8.420682458276291e-06, "loss": 0.43147289752960205, "step": 9158 }, { "epoch": 1.6923583282327264, "grad_norm": 0.07305676490068436, "learning_rate": 8.418712610518657e-06, "loss": 0.4102160334587097, "step": 9159 }, { "epoch": 1.6925431049416222, "grad_norm": 0.06288999319076538, "learning_rate": 8.41674282569131e-06, "loss": 0.4454600214958191, "step": 9160 }, { "epoch": 1.692727881650518, "grad_norm": 0.07740910351276398, "learning_rate": 8.414773103872647e-06, "loss": 0.5403621196746826, "step": 9161 }, { "epoch": 1.6929126583594138, "grad_norm": 0.08491584658622742, "learning_rate": 8.412803445141055e-06, "loss": 0.5626220703125, "step": 9162 }, { "epoch": 1.6930974350683097, "grad_norm": 0.07782825082540512, "learning_rate": 8.410833849574921e-06, "loss": 0.5752198696136475, "step": 9163 }, { "epoch": 1.6932822117772055, "grad_norm": 0.07523659616708755, "learning_rate": 8.408864317252626e-06, "loss": 0.5045156478881836, "step": 9164 }, { "epoch": 1.6934669884861013, "grad_norm": 0.0657854899764061, "learning_rate": 8.406894848252555e-06, "loss": 0.4462105333805084, "step": 9165 }, { "epoch": 1.6936517651949972, "grad_norm": 0.06609152257442474, "learning_rate": 8.404925442653084e-06, "loss": 0.4460630714893341, "step": 9166 }, { "epoch": 1.693836541903893, "grad_norm": 0.06646375358104706, "learning_rate": 8.40295610053259e-06, "loss": 0.3748570382595062, "step": 9167 }, { "epoch": 1.6940213186127888, "grad_norm": 0.08455774188041687, "learning_rate": 8.400986821969449e-06, "loss": 0.5032517910003662, "step": 9168 }, { "epoch": 1.6942060953216846, "grad_norm": 0.06838744878768921, "learning_rate": 8.399017607042025e-06, "loss": 0.42218753695487976, "step": 9169 }, { "epoch": 1.6943908720305805, "grad_norm": 0.09815676510334015, "learning_rate": 8.397048455828698e-06, "loss": 0.6644664406776428, "step": 9170 }, { "epoch": 1.6945756487394763, "grad_norm": 0.06904610246419907, "learning_rate": 8.395079368407822e-06, "loss": 0.4369781017303467, "step": 9171 }, { "epoch": 1.6947604254483721, "grad_norm": 0.07216240465641022, "learning_rate": 8.393110344857767e-06, "loss": 0.5592622756958008, "step": 9172 }, { "epoch": 1.694945202157268, "grad_norm": 0.08995957672595978, "learning_rate": 8.391141385256894e-06, "loss": 0.5972262024879456, "step": 9173 }, { "epoch": 1.6951299788661638, "grad_norm": 0.06630527228116989, "learning_rate": 8.38917248968356e-06, "loss": 0.34516778588294983, "step": 9174 }, { "epoch": 1.6953147555750596, "grad_norm": 0.07681643217802048, "learning_rate": 8.38720365821612e-06, "loss": 0.4759213924407959, "step": 9175 }, { "epoch": 1.6954995322839554, "grad_norm": 0.08156821131706238, "learning_rate": 8.38523489093293e-06, "loss": 0.42252761125564575, "step": 9176 }, { "epoch": 1.6956843089928515, "grad_norm": 0.11992143839597702, "learning_rate": 8.383266187912338e-06, "loss": 0.8242896795272827, "step": 9177 }, { "epoch": 1.6958690857017473, "grad_norm": 0.07057183980941772, "learning_rate": 8.381297549232696e-06, "loss": 0.4482508897781372, "step": 9178 }, { "epoch": 1.6960538624106432, "grad_norm": 0.06995099782943726, "learning_rate": 8.379328974972347e-06, "loss": 0.3609004616737366, "step": 9179 }, { "epoch": 1.696238639119539, "grad_norm": 0.0819055363535881, "learning_rate": 8.377360465209632e-06, "loss": 0.5192528367042542, "step": 9180 }, { "epoch": 1.6964234158284348, "grad_norm": 0.06376343965530396, "learning_rate": 8.375392020022894e-06, "loss": 0.3223073482513428, "step": 9181 }, { "epoch": 1.6966081925373306, "grad_norm": 0.09627628326416016, "learning_rate": 8.373423639490474e-06, "loss": 0.6265350580215454, "step": 9182 }, { "epoch": 1.6967929692462265, "grad_norm": 0.08493668586015701, "learning_rate": 8.371455323690701e-06, "loss": 0.6294567584991455, "step": 9183 }, { "epoch": 1.6969777459551225, "grad_norm": 0.07084629684686661, "learning_rate": 8.369487072701911e-06, "loss": 0.4158245623111725, "step": 9184 }, { "epoch": 1.6971625226640183, "grad_norm": 0.1010499894618988, "learning_rate": 8.367518886602439e-06, "loss": 0.6740158200263977, "step": 9185 }, { "epoch": 1.6973472993729142, "grad_norm": 0.08781524747610092, "learning_rate": 8.365550765470603e-06, "loss": 0.5869169235229492, "step": 9186 }, { "epoch": 1.69753207608181, "grad_norm": 0.08365757763385773, "learning_rate": 8.363582709384738e-06, "loss": 0.5274476408958435, "step": 9187 }, { "epoch": 1.6977168527907058, "grad_norm": 0.0663229301571846, "learning_rate": 8.361614718423157e-06, "loss": 0.3971540629863739, "step": 9188 }, { "epoch": 1.6979016294996017, "grad_norm": 0.0671120434999466, "learning_rate": 8.359646792664182e-06, "loss": 0.5282134413719177, "step": 9189 }, { "epoch": 1.6980864062084975, "grad_norm": 0.08804799616336823, "learning_rate": 8.357678932186141e-06, "loss": 0.5921247005462646, "step": 9190 }, { "epoch": 1.6982711829173933, "grad_norm": 0.08442988246679306, "learning_rate": 8.355711137067334e-06, "loss": 0.6660032272338867, "step": 9191 }, { "epoch": 1.6984559596262891, "grad_norm": 0.08538249880075455, "learning_rate": 8.35374340738608e-06, "loss": 0.49038660526275635, "step": 9192 }, { "epoch": 1.698640736335185, "grad_norm": 0.06832028180360794, "learning_rate": 8.351775743220691e-06, "loss": 0.4274335205554962, "step": 9193 }, { "epoch": 1.6988255130440808, "grad_norm": 0.07933923602104187, "learning_rate": 8.349808144649468e-06, "loss": 0.517008364200592, "step": 9194 }, { "epoch": 1.6990102897529766, "grad_norm": 0.06587305665016174, "learning_rate": 8.347840611750718e-06, "loss": 0.4429994821548462, "step": 9195 }, { "epoch": 1.6991950664618725, "grad_norm": 0.0778161957859993, "learning_rate": 8.345873144602743e-06, "loss": 0.4726182818412781, "step": 9196 }, { "epoch": 1.6993798431707683, "grad_norm": 0.07775843143463135, "learning_rate": 8.343905743283842e-06, "loss": 0.5512153506278992, "step": 9197 }, { "epoch": 1.6995646198796641, "grad_norm": 0.08814883232116699, "learning_rate": 8.34193840787231e-06, "loss": 0.5667763352394104, "step": 9198 }, { "epoch": 1.69974939658856, "grad_norm": 0.09019384533166885, "learning_rate": 8.339971138446445e-06, "loss": 0.5292229056358337, "step": 9199 }, { "epoch": 1.6999341732974558, "grad_norm": 0.06382526457309723, "learning_rate": 8.338003935084531e-06, "loss": 0.40799641609191895, "step": 9200 }, { "epoch": 1.7001189500063516, "grad_norm": 0.08602697402238846, "learning_rate": 8.336036797864866e-06, "loss": 0.6295297145843506, "step": 9201 }, { "epoch": 1.7003037267152474, "grad_norm": 0.059395682066679, "learning_rate": 8.334069726865727e-06, "loss": 0.3257017433643341, "step": 9202 }, { "epoch": 1.7004885034241433, "grad_norm": 0.0641443058848381, "learning_rate": 8.3321027221654e-06, "loss": 0.3689802885055542, "step": 9203 }, { "epoch": 1.700673280133039, "grad_norm": 0.07442375272512436, "learning_rate": 8.330135783842171e-06, "loss": 0.4756847321987152, "step": 9204 }, { "epoch": 1.700858056841935, "grad_norm": 0.09624522924423218, "learning_rate": 8.328168911974308e-06, "loss": 0.684943437576294, "step": 9205 }, { "epoch": 1.701042833550831, "grad_norm": 0.06564196199178696, "learning_rate": 8.326202106640093e-06, "loss": 0.4028245806694031, "step": 9206 }, { "epoch": 1.7012276102597268, "grad_norm": 0.07290355116128922, "learning_rate": 8.324235367917802e-06, "loss": 0.4737251400947571, "step": 9207 }, { "epoch": 1.7014123869686226, "grad_norm": 0.09687570482492447, "learning_rate": 8.322268695885697e-06, "loss": 0.6028020977973938, "step": 9208 }, { "epoch": 1.7015971636775185, "grad_norm": 0.06902943551540375, "learning_rate": 8.320302090622045e-06, "loss": 0.40670400857925415, "step": 9209 }, { "epoch": 1.7017819403864143, "grad_norm": 0.07954740524291992, "learning_rate": 8.318335552205124e-06, "loss": 0.5146956443786621, "step": 9210 }, { "epoch": 1.70196671709531, "grad_norm": 0.07294806092977524, "learning_rate": 8.31636908071318e-06, "loss": 0.4044404923915863, "step": 9211 }, { "epoch": 1.702151493804206, "grad_norm": 0.0694422796368599, "learning_rate": 8.31440267622448e-06, "loss": 0.48178523778915405, "step": 9212 }, { "epoch": 1.7023362705131018, "grad_norm": 0.08079212158918381, "learning_rate": 8.312436338817282e-06, "loss": 0.4246774911880493, "step": 9213 }, { "epoch": 1.7025210472219978, "grad_norm": 0.08284983783960342, "learning_rate": 8.310470068569835e-06, "loss": 0.6177600026130676, "step": 9214 }, { "epoch": 1.7027058239308936, "grad_norm": 0.08616185933351517, "learning_rate": 8.308503865560395e-06, "loss": 0.5661207437515259, "step": 9215 }, { "epoch": 1.7028906006397895, "grad_norm": 0.06548822671175003, "learning_rate": 8.306537729867212e-06, "loss": 0.5266295671463013, "step": 9216 }, { "epoch": 1.7030753773486853, "grad_norm": 0.05171108990907669, "learning_rate": 8.304571661568526e-06, "loss": 0.3543473482131958, "step": 9217 }, { "epoch": 1.7032601540575811, "grad_norm": 0.07920978218317032, "learning_rate": 8.302605660742585e-06, "loss": 0.5950316786766052, "step": 9218 }, { "epoch": 1.703444930766477, "grad_norm": 0.07471512258052826, "learning_rate": 8.300639727467626e-06, "loss": 0.4885013699531555, "step": 9219 }, { "epoch": 1.7036297074753728, "grad_norm": 0.08602702617645264, "learning_rate": 8.29867386182189e-06, "loss": 0.6973574757575989, "step": 9220 }, { "epoch": 1.7038144841842686, "grad_norm": 0.07249139994382858, "learning_rate": 8.296708063883614e-06, "loss": 0.46478256583213806, "step": 9221 }, { "epoch": 1.7039992608931644, "grad_norm": 0.09790276736021042, "learning_rate": 8.294742333731026e-06, "loss": 0.6673256158828735, "step": 9222 }, { "epoch": 1.7041840376020603, "grad_norm": 0.08677719533443451, "learning_rate": 8.292776671442356e-06, "loss": 0.6140323281288147, "step": 9223 }, { "epoch": 1.704368814310956, "grad_norm": 0.07667604833841324, "learning_rate": 8.29081107709584e-06, "loss": 0.4562872648239136, "step": 9224 }, { "epoch": 1.704553591019852, "grad_norm": 0.09175780415534973, "learning_rate": 8.288845550769688e-06, "loss": 0.5294307470321655, "step": 9225 }, { "epoch": 1.7047383677287478, "grad_norm": 0.07180283963680267, "learning_rate": 8.286880092542131e-06, "loss": 0.4086364209651947, "step": 9226 }, { "epoch": 1.7049231444376436, "grad_norm": 0.06642324477434158, "learning_rate": 8.284914702491392e-06, "loss": 0.33012962341308594, "step": 9227 }, { "epoch": 1.7051079211465394, "grad_norm": 0.0625172108411789, "learning_rate": 8.282949380695679e-06, "loss": 0.3410041332244873, "step": 9228 }, { "epoch": 1.7052926978554352, "grad_norm": 0.09009237587451935, "learning_rate": 8.280984127233204e-06, "loss": 0.5375388264656067, "step": 9229 }, { "epoch": 1.705477474564331, "grad_norm": 0.08445901423692703, "learning_rate": 8.27901894218219e-06, "loss": 0.42762553691864014, "step": 9230 }, { "epoch": 1.705662251273227, "grad_norm": 0.0737801045179367, "learning_rate": 8.277053825620836e-06, "loss": 0.4096711575984955, "step": 9231 }, { "epoch": 1.7058470279821227, "grad_norm": 0.08339770138263702, "learning_rate": 8.275088777627346e-06, "loss": 0.44599124789237976, "step": 9232 }, { "epoch": 1.7060318046910186, "grad_norm": 0.06168259680271149, "learning_rate": 8.27312379827993e-06, "loss": 0.43915095925331116, "step": 9233 }, { "epoch": 1.7062165813999144, "grad_norm": 0.06330515444278717, "learning_rate": 8.271158887656781e-06, "loss": 0.4240843951702118, "step": 9234 }, { "epoch": 1.7064013581088104, "grad_norm": 0.07995455712080002, "learning_rate": 8.269194045836103e-06, "loss": 0.5356221199035645, "step": 9235 }, { "epoch": 1.7065861348177063, "grad_norm": 0.07991659641265869, "learning_rate": 8.267229272896083e-06, "loss": 0.47236770391464233, "step": 9236 }, { "epoch": 1.706770911526602, "grad_norm": 0.07582999765872955, "learning_rate": 8.265264568914917e-06, "loss": 0.43814337253570557, "step": 9237 }, { "epoch": 1.706955688235498, "grad_norm": 0.08043278008699417, "learning_rate": 8.263299933970798e-06, "loss": 0.4860098659992218, "step": 9238 }, { "epoch": 1.7071404649443938, "grad_norm": 0.0776016041636467, "learning_rate": 8.261335368141904e-06, "loss": 0.419701486825943, "step": 9239 }, { "epoch": 1.7073252416532896, "grad_norm": 0.08165136724710464, "learning_rate": 8.259370871506423e-06, "loss": 0.5630422830581665, "step": 9240 }, { "epoch": 1.7075100183621854, "grad_norm": 0.09332025796175003, "learning_rate": 8.257406444142539e-06, "loss": 0.548369824886322, "step": 9241 }, { "epoch": 1.7076947950710812, "grad_norm": 0.06935670971870422, "learning_rate": 8.255442086128423e-06, "loss": 0.3939349949359894, "step": 9242 }, { "epoch": 1.7078795717799773, "grad_norm": 0.06551147997379303, "learning_rate": 8.253477797542256e-06, "loss": 0.41584062576293945, "step": 9243 }, { "epoch": 1.7080643484888731, "grad_norm": 0.07558514177799225, "learning_rate": 8.251513578462211e-06, "loss": 0.4720218777656555, "step": 9244 }, { "epoch": 1.708249125197769, "grad_norm": 0.08111558854579926, "learning_rate": 8.249549428966448e-06, "loss": 0.5089553594589233, "step": 9245 }, { "epoch": 1.7084339019066648, "grad_norm": 0.08173785358667374, "learning_rate": 8.247585349133145e-06, "loss": 0.5933139324188232, "step": 9246 }, { "epoch": 1.7086186786155606, "grad_norm": 0.08025600761175156, "learning_rate": 8.245621339040467e-06, "loss": 0.4453078806400299, "step": 9247 }, { "epoch": 1.7088034553244564, "grad_norm": 0.09482229501008987, "learning_rate": 8.243657398766565e-06, "loss": 0.5188925862312317, "step": 9248 }, { "epoch": 1.7089882320333523, "grad_norm": 0.10804635286331177, "learning_rate": 8.241693528389603e-06, "loss": 0.682178258895874, "step": 9249 }, { "epoch": 1.709173008742248, "grad_norm": 0.09234584122896194, "learning_rate": 8.239729727987745e-06, "loss": 0.5333462953567505, "step": 9250 }, { "epoch": 1.709357785451144, "grad_norm": 0.0879039317369461, "learning_rate": 8.23776599763913e-06, "loss": 0.49046385288238525, "step": 9251 }, { "epoch": 1.7095425621600397, "grad_norm": 0.08815449476242065, "learning_rate": 8.23580233742192e-06, "loss": 0.5991628766059875, "step": 9252 }, { "epoch": 1.7097273388689356, "grad_norm": 0.07602617889642715, "learning_rate": 8.23383874741425e-06, "loss": 0.48946478962898254, "step": 9253 }, { "epoch": 1.7099121155778314, "grad_norm": 0.07960982620716095, "learning_rate": 8.231875227694277e-06, "loss": 0.4935239255428314, "step": 9254 }, { "epoch": 1.7100968922867272, "grad_norm": 0.07796081900596619, "learning_rate": 8.22991177834014e-06, "loss": 0.5628326535224915, "step": 9255 }, { "epoch": 1.710281668995623, "grad_norm": 0.08486931771039963, "learning_rate": 8.227948399429973e-06, "loss": 0.5364896655082703, "step": 9256 }, { "epoch": 1.710466445704519, "grad_norm": 0.0651976615190506, "learning_rate": 8.225985091041914e-06, "loss": 0.41608723998069763, "step": 9257 }, { "epoch": 1.7106512224134147, "grad_norm": 0.09099406003952026, "learning_rate": 8.224021853254103e-06, "loss": 0.5208678841590881, "step": 9258 }, { "epoch": 1.7108359991223105, "grad_norm": 0.06338699907064438, "learning_rate": 8.222058686144664e-06, "loss": 0.36156728863716125, "step": 9259 }, { "epoch": 1.7110207758312064, "grad_norm": 0.08680493384599686, "learning_rate": 8.220095589791725e-06, "loss": 0.5534539818763733, "step": 9260 }, { "epoch": 1.7112055525401022, "grad_norm": 0.07716375589370728, "learning_rate": 8.218132564273415e-06, "loss": 0.564572811126709, "step": 9261 }, { "epoch": 1.711390329248998, "grad_norm": 0.06359605491161346, "learning_rate": 8.216169609667854e-06, "loss": 0.4242609739303589, "step": 9262 }, { "epoch": 1.7115751059578939, "grad_norm": 0.08185631781816483, "learning_rate": 8.21420672605316e-06, "loss": 0.48531660437583923, "step": 9263 }, { "epoch": 1.7117598826667897, "grad_norm": 0.06824252009391785, "learning_rate": 8.212243913507456e-06, "loss": 0.49250736832618713, "step": 9264 }, { "epoch": 1.7119446593756857, "grad_norm": 0.06700660288333893, "learning_rate": 8.210281172108844e-06, "loss": 0.414394348859787, "step": 9265 }, { "epoch": 1.7121294360845816, "grad_norm": 0.085497185587883, "learning_rate": 8.208318501935451e-06, "loss": 0.6617801189422607, "step": 9266 }, { "epoch": 1.7123142127934774, "grad_norm": 0.07121191173791885, "learning_rate": 8.20635590306537e-06, "loss": 0.4714220464229584, "step": 9267 }, { "epoch": 1.7124989895023732, "grad_norm": 0.0877823531627655, "learning_rate": 8.204393375576713e-06, "loss": 0.5986184477806091, "step": 9268 }, { "epoch": 1.712683766211269, "grad_norm": 0.07633073627948761, "learning_rate": 8.202430919547584e-06, "loss": 0.48237112164497375, "step": 9269 }, { "epoch": 1.7128685429201649, "grad_norm": 0.0636589303612709, "learning_rate": 8.200468535056076e-06, "loss": 0.42352762818336487, "step": 9270 }, { "epoch": 1.7130533196290607, "grad_norm": 0.08576313406229019, "learning_rate": 8.198506222180294e-06, "loss": 0.6194055676460266, "step": 9271 }, { "epoch": 1.7132380963379568, "grad_norm": 0.07260756194591522, "learning_rate": 8.196543980998328e-06, "loss": 0.4961593747138977, "step": 9272 }, { "epoch": 1.7134228730468526, "grad_norm": 0.07460932433605194, "learning_rate": 8.194581811588268e-06, "loss": 0.6499295234680176, "step": 9273 }, { "epoch": 1.7136076497557484, "grad_norm": 0.07087811827659607, "learning_rate": 8.192619714028202e-06, "loss": 0.5215992331504822, "step": 9274 }, { "epoch": 1.7137924264646442, "grad_norm": 0.06718888133764267, "learning_rate": 8.190657688396223e-06, "loss": 0.36170563101768494, "step": 9275 }, { "epoch": 1.71397720317354, "grad_norm": 0.09156475961208344, "learning_rate": 8.1886957347704e-06, "loss": 0.5560495853424072, "step": 9276 }, { "epoch": 1.714161979882436, "grad_norm": 0.07081842422485352, "learning_rate": 8.186733853228823e-06, "loss": 0.47347983717918396, "step": 9277 }, { "epoch": 1.7143467565913317, "grad_norm": 0.06800918281078339, "learning_rate": 8.184772043849568e-06, "loss": 0.418511301279068, "step": 9278 }, { "epoch": 1.7145315333002276, "grad_norm": 0.08380264043807983, "learning_rate": 8.182810306710703e-06, "loss": 0.5594571828842163, "step": 9279 }, { "epoch": 1.7147163100091234, "grad_norm": 0.104112409055233, "learning_rate": 8.180848641890301e-06, "loss": 0.7429915070533752, "step": 9280 }, { "epoch": 1.7149010867180192, "grad_norm": 0.08604252338409424, "learning_rate": 8.178887049466438e-06, "loss": 0.5117092132568359, "step": 9281 }, { "epoch": 1.715085863426915, "grad_norm": 0.07407425343990326, "learning_rate": 8.176925529517168e-06, "loss": 0.4131082594394684, "step": 9282 }, { "epoch": 1.7152706401358109, "grad_norm": 0.09201198071241379, "learning_rate": 8.174964082120563e-06, "loss": 0.4691229462623596, "step": 9283 }, { "epoch": 1.7154554168447067, "grad_norm": 0.07233555614948273, "learning_rate": 8.173002707354673e-06, "loss": 0.47029054164886475, "step": 9284 }, { "epoch": 1.7156401935536025, "grad_norm": 0.08242167532444, "learning_rate": 8.171041405297558e-06, "loss": 0.4827267527580261, "step": 9285 }, { "epoch": 1.7158249702624984, "grad_norm": 0.10752084106206894, "learning_rate": 8.16908017602728e-06, "loss": 0.7165725827217102, "step": 9286 }, { "epoch": 1.7160097469713942, "grad_norm": 0.06484485417604446, "learning_rate": 8.167119019621878e-06, "loss": 0.3245079517364502, "step": 9287 }, { "epoch": 1.71619452368029, "grad_norm": 0.07973135262727737, "learning_rate": 8.165157936159404e-06, "loss": 0.5481160879135132, "step": 9288 }, { "epoch": 1.7163793003891858, "grad_norm": 0.08740073442459106, "learning_rate": 8.163196925717906e-06, "loss": 0.46844935417175293, "step": 9289 }, { "epoch": 1.7165640770980817, "grad_norm": 0.07873176783323288, "learning_rate": 8.16123598837542e-06, "loss": 0.551338791847229, "step": 9290 }, { "epoch": 1.7167488538069775, "grad_norm": 0.06615818291902542, "learning_rate": 8.159275124209992e-06, "loss": 0.3472258448600769, "step": 9291 }, { "epoch": 1.7169336305158733, "grad_norm": 0.06066036969423294, "learning_rate": 8.157314333299656e-06, "loss": 0.376941978931427, "step": 9292 }, { "epoch": 1.7171184072247692, "grad_norm": 0.06927074491977692, "learning_rate": 8.155353615722442e-06, "loss": 0.4731561541557312, "step": 9293 }, { "epoch": 1.7173031839336652, "grad_norm": 0.0620710514485836, "learning_rate": 8.153392971556384e-06, "loss": 0.4183216392993927, "step": 9294 }, { "epoch": 1.717487960642561, "grad_norm": 0.08235717564821243, "learning_rate": 8.151432400879508e-06, "loss": 0.6418178677558899, "step": 9295 }, { "epoch": 1.7176727373514569, "grad_norm": 0.0678219273686409, "learning_rate": 8.14947190376984e-06, "loss": 0.43770119547843933, "step": 9296 }, { "epoch": 1.7178575140603527, "grad_norm": 0.04814592003822327, "learning_rate": 8.147511480305399e-06, "loss": 0.2407323718070984, "step": 9297 }, { "epoch": 1.7180422907692485, "grad_norm": 0.09880015254020691, "learning_rate": 8.14555113056421e-06, "loss": 0.5883893966674805, "step": 9298 }, { "epoch": 1.7182270674781444, "grad_norm": 0.08084731549024582, "learning_rate": 8.143590854624279e-06, "loss": 0.5978436470031738, "step": 9299 }, { "epoch": 1.7184118441870402, "grad_norm": 0.07525096833705902, "learning_rate": 8.141630652563627e-06, "loss": 0.5851767063140869, "step": 9300 }, { "epoch": 1.7185966208959362, "grad_norm": 0.08009577542543411, "learning_rate": 8.139670524460259e-06, "loss": 0.661676824092865, "step": 9301 }, { "epoch": 1.718781397604832, "grad_norm": 0.06729946285486221, "learning_rate": 8.137710470392182e-06, "loss": 0.41329067945480347, "step": 9302 }, { "epoch": 1.718966174313728, "grad_norm": 0.06843673437833786, "learning_rate": 8.135750490437409e-06, "loss": 0.41551798582077026, "step": 9303 }, { "epoch": 1.7191509510226237, "grad_norm": 0.08220253139734268, "learning_rate": 8.133790584673929e-06, "loss": 0.5747065544128418, "step": 9304 }, { "epoch": 1.7193357277315195, "grad_norm": 0.1003865972161293, "learning_rate": 8.131830753179743e-06, "loss": 0.6246880292892456, "step": 9305 }, { "epoch": 1.7195205044404154, "grad_norm": 0.09194488078355789, "learning_rate": 8.129870996032854e-06, "loss": 0.5484771728515625, "step": 9306 }, { "epoch": 1.7197052811493112, "grad_norm": 0.08729182183742523, "learning_rate": 8.127911313311244e-06, "loss": 0.5819669961929321, "step": 9307 }, { "epoch": 1.719890057858207, "grad_norm": 0.07935697585344315, "learning_rate": 8.125951705092908e-06, "loss": 0.6015247106552124, "step": 9308 }, { "epoch": 1.7200748345671029, "grad_norm": 0.0668381005525589, "learning_rate": 8.123992171455832e-06, "loss": 0.4277127981185913, "step": 9309 }, { "epoch": 1.7202596112759987, "grad_norm": 0.09297236800193787, "learning_rate": 8.122032712477996e-06, "loss": 0.6343827247619629, "step": 9310 }, { "epoch": 1.7204443879848945, "grad_norm": 0.08086070418357849, "learning_rate": 8.120073328237383e-06, "loss": 0.6234132647514343, "step": 9311 }, { "epoch": 1.7206291646937903, "grad_norm": 0.07798231393098831, "learning_rate": 8.118114018811973e-06, "loss": 0.5367053151130676, "step": 9312 }, { "epoch": 1.7208139414026862, "grad_norm": 0.08083062618970871, "learning_rate": 8.116154784279735e-06, "loss": 0.7250441908836365, "step": 9313 }, { "epoch": 1.720998718111582, "grad_norm": 0.07413654029369354, "learning_rate": 8.114195624718643e-06, "loss": 0.48422250151634216, "step": 9314 }, { "epoch": 1.7211834948204778, "grad_norm": 0.07940345257520676, "learning_rate": 8.112236540206667e-06, "loss": 0.5368062853813171, "step": 9315 }, { "epoch": 1.7213682715293737, "grad_norm": 0.07169221341609955, "learning_rate": 8.110277530821768e-06, "loss": 0.501593828201294, "step": 9316 }, { "epoch": 1.7215530482382695, "grad_norm": 0.07917898893356323, "learning_rate": 8.108318596641913e-06, "loss": 0.5751444697380066, "step": 9317 }, { "epoch": 1.7217378249471653, "grad_norm": 0.0815306231379509, "learning_rate": 8.106359737745057e-06, "loss": 0.5254802703857422, "step": 9318 }, { "epoch": 1.7219226016560611, "grad_norm": 0.10219857096672058, "learning_rate": 8.104400954209161e-06, "loss": 0.6476911306381226, "step": 9319 }, { "epoch": 1.722107378364957, "grad_norm": 0.06414046138525009, "learning_rate": 8.10244224611218e-06, "loss": 0.39360371232032776, "step": 9320 }, { "epoch": 1.7222921550738528, "grad_norm": 0.07243400812149048, "learning_rate": 8.100483613532052e-06, "loss": 0.584331750869751, "step": 9321 }, { "epoch": 1.7224769317827486, "grad_norm": 0.07788241654634476, "learning_rate": 8.09852505654674e-06, "loss": 0.49636155366897583, "step": 9322 }, { "epoch": 1.7226617084916447, "grad_norm": 0.07601357251405716, "learning_rate": 8.096566575234183e-06, "loss": 0.5032370090484619, "step": 9323 }, { "epoch": 1.7228464852005405, "grad_norm": 0.07387128472328186, "learning_rate": 8.094608169672318e-06, "loss": 0.6311846971511841, "step": 9324 }, { "epoch": 1.7230312619094363, "grad_norm": 0.0795421153306961, "learning_rate": 8.092649839939084e-06, "loss": 0.5139166712760925, "step": 9325 }, { "epoch": 1.7232160386183322, "grad_norm": 0.08422742038965225, "learning_rate": 8.090691586112424e-06, "loss": 0.4988109767436981, "step": 9326 }, { "epoch": 1.723400815327228, "grad_norm": 0.05840161815285683, "learning_rate": 8.088733408270265e-06, "loss": 0.3771735429763794, "step": 9327 }, { "epoch": 1.7235855920361238, "grad_norm": 0.050449687987565994, "learning_rate": 8.086775306490532e-06, "loss": 0.39605778455734253, "step": 9328 }, { "epoch": 1.7237703687450197, "grad_norm": 0.07807933539152145, "learning_rate": 8.084817280851162e-06, "loss": 0.4537414312362671, "step": 9329 }, { "epoch": 1.7239551454539155, "grad_norm": 0.07585584372282028, "learning_rate": 8.082859331430068e-06, "loss": 0.48708009719848633, "step": 9330 }, { "epoch": 1.7241399221628115, "grad_norm": 0.07332560420036316, "learning_rate": 8.08090145830518e-06, "loss": 0.4790911376476288, "step": 9331 }, { "epoch": 1.7243246988717074, "grad_norm": 0.07301469892263412, "learning_rate": 8.078943661554403e-06, "loss": 0.4665886461734772, "step": 9332 }, { "epoch": 1.7245094755806032, "grad_norm": 0.05806805193424225, "learning_rate": 8.076985941255662e-06, "loss": 0.3560118079185486, "step": 9333 }, { "epoch": 1.724694252289499, "grad_norm": 0.07970564812421799, "learning_rate": 8.075028297486865e-06, "loss": 0.5235270261764526, "step": 9334 }, { "epoch": 1.7248790289983948, "grad_norm": 0.07301004976034164, "learning_rate": 8.073070730325917e-06, "loss": 0.4996209740638733, "step": 9335 }, { "epoch": 1.7250638057072907, "grad_norm": 0.06933730840682983, "learning_rate": 8.071113239850725e-06, "loss": 0.43340930342674255, "step": 9336 }, { "epoch": 1.7252485824161865, "grad_norm": 0.06640269607305527, "learning_rate": 8.069155826139195e-06, "loss": 0.37686920166015625, "step": 9337 }, { "epoch": 1.7254333591250823, "grad_norm": 0.08653712272644043, "learning_rate": 8.067198489269218e-06, "loss": 0.6015225648880005, "step": 9338 }, { "epoch": 1.7256181358339782, "grad_norm": 0.06931693851947784, "learning_rate": 8.065241229318696e-06, "loss": 0.3466190695762634, "step": 9339 }, { "epoch": 1.725802912542874, "grad_norm": 0.08971694856882095, "learning_rate": 8.063284046365522e-06, "loss": 0.5727711915969849, "step": 9340 }, { "epoch": 1.7259876892517698, "grad_norm": 0.08386840671300888, "learning_rate": 8.06132694048758e-06, "loss": 0.6206101179122925, "step": 9341 }, { "epoch": 1.7261724659606656, "grad_norm": 0.06562499701976776, "learning_rate": 8.059369911762761e-06, "loss": 0.42439258098602295, "step": 9342 }, { "epoch": 1.7263572426695615, "grad_norm": 0.07445552945137024, "learning_rate": 8.057412960268951e-06, "loss": 0.5168936848640442, "step": 9343 }, { "epoch": 1.7265420193784573, "grad_norm": 0.07908100634813309, "learning_rate": 8.055456086084025e-06, "loss": 0.647506833076477, "step": 9344 }, { "epoch": 1.7267267960873531, "grad_norm": 0.07324788719415665, "learning_rate": 8.053499289285862e-06, "loss": 0.5335649847984314, "step": 9345 }, { "epoch": 1.726911572796249, "grad_norm": 0.0806252658367157, "learning_rate": 8.051542569952343e-06, "loss": 0.4795989692211151, "step": 9346 }, { "epoch": 1.7270963495051448, "grad_norm": 0.10036157071590424, "learning_rate": 8.04958592816133e-06, "loss": 0.6414546370506287, "step": 9347 }, { "epoch": 1.7272811262140406, "grad_norm": 0.09356236457824707, "learning_rate": 8.047629363990696e-06, "loss": 0.6074750423431396, "step": 9348 }, { "epoch": 1.7274659029229364, "grad_norm": 0.058355070650577545, "learning_rate": 8.045672877518303e-06, "loss": 0.40787652134895325, "step": 9349 }, { "epoch": 1.7276506796318323, "grad_norm": 0.07903466373682022, "learning_rate": 8.043716468822016e-06, "loss": 0.438971608877182, "step": 9350 }, { "epoch": 1.727835456340728, "grad_norm": 0.0694471225142479, "learning_rate": 8.041760137979696e-06, "loss": 0.365113765001297, "step": 9351 }, { "epoch": 1.728020233049624, "grad_norm": 0.08171175420284271, "learning_rate": 8.039803885069193e-06, "loss": 0.5658257603645325, "step": 9352 }, { "epoch": 1.72820500975852, "grad_norm": 0.08628968894481659, "learning_rate": 8.037847710168362e-06, "loss": 0.44679346680641174, "step": 9353 }, { "epoch": 1.7283897864674158, "grad_norm": 0.08869720995426178, "learning_rate": 8.035891613355055e-06, "loss": 0.569314181804657, "step": 9354 }, { "epoch": 1.7285745631763116, "grad_norm": 0.0922289565205574, "learning_rate": 8.033935594707116e-06, "loss": 0.6781488656997681, "step": 9355 }, { "epoch": 1.7287593398852075, "grad_norm": 0.07456088811159134, "learning_rate": 8.031979654302389e-06, "loss": 0.5413568019866943, "step": 9356 }, { "epoch": 1.7289441165941033, "grad_norm": 0.09318801015615463, "learning_rate": 8.030023792218717e-06, "loss": 0.722627580165863, "step": 9357 }, { "epoch": 1.7291288933029991, "grad_norm": 0.07570246607065201, "learning_rate": 8.028068008533931e-06, "loss": 0.41319751739501953, "step": 9358 }, { "epoch": 1.729313670011895, "grad_norm": 0.07300356030464172, "learning_rate": 8.026112303325872e-06, "loss": 0.427320659160614, "step": 9359 }, { "epoch": 1.729498446720791, "grad_norm": 0.09192816913127899, "learning_rate": 8.02415667667237e-06, "loss": 0.48963406682014465, "step": 9360 }, { "epoch": 1.7296832234296868, "grad_norm": 0.07422766089439392, "learning_rate": 8.022201128651244e-06, "loss": 0.41928577423095703, "step": 9361 }, { "epoch": 1.7298680001385827, "grad_norm": 0.09352274239063263, "learning_rate": 8.020245659340329e-06, "loss": 0.5814811587333679, "step": 9362 }, { "epoch": 1.7300527768474785, "grad_norm": 0.05729234963655472, "learning_rate": 8.018290268817446e-06, "loss": 0.3619372844696045, "step": 9363 }, { "epoch": 1.7302375535563743, "grad_norm": 0.07728464156389236, "learning_rate": 8.016334957160405e-06, "loss": 0.5245939493179321, "step": 9364 }, { "epoch": 1.7304223302652701, "grad_norm": 0.06916660815477371, "learning_rate": 8.01437972444703e-06, "loss": 0.5502699613571167, "step": 9365 }, { "epoch": 1.730607106974166, "grad_norm": 0.08366513252258301, "learning_rate": 8.012424570755129e-06, "loss": 0.5558500289916992, "step": 9366 }, { "epoch": 1.7307918836830618, "grad_norm": 0.08778365701436996, "learning_rate": 8.01046949616251e-06, "loss": 0.588188886642456, "step": 9367 }, { "epoch": 1.7309766603919576, "grad_norm": 0.06705878674983978, "learning_rate": 8.008514500746984e-06, "loss": 0.47494634985923767, "step": 9368 }, { "epoch": 1.7311614371008535, "grad_norm": 0.08472789078950882, "learning_rate": 8.006559584586346e-06, "loss": 0.6099481582641602, "step": 9369 }, { "epoch": 1.7313462138097493, "grad_norm": 0.08809303492307663, "learning_rate": 8.004604747758403e-06, "loss": 0.5671088695526123, "step": 9370 }, { "epoch": 1.7315309905186451, "grad_norm": 0.06466368585824966, "learning_rate": 8.002649990340947e-06, "loss": 0.3539324104785919, "step": 9371 }, { "epoch": 1.731715767227541, "grad_norm": 0.06473246961832047, "learning_rate": 8.000695312411773e-06, "loss": 0.389993280172348, "step": 9372 }, { "epoch": 1.7319005439364368, "grad_norm": 0.09119462966918945, "learning_rate": 7.998740714048669e-06, "loss": 0.5501751899719238, "step": 9373 }, { "epoch": 1.7320853206453326, "grad_norm": 0.08342806994915009, "learning_rate": 7.996786195329426e-06, "loss": 0.504432737827301, "step": 9374 }, { "epoch": 1.7322700973542284, "grad_norm": 0.07765016704797745, "learning_rate": 7.994831756331822e-06, "loss": 0.42915263772010803, "step": 9375 }, { "epoch": 1.7324548740631243, "grad_norm": 0.09550423920154572, "learning_rate": 7.992877397133643e-06, "loss": 0.6175699234008789, "step": 9376 }, { "epoch": 1.73263965077202, "grad_norm": 0.09117421507835388, "learning_rate": 7.990923117812665e-06, "loss": 0.604438841342926, "step": 9377 }, { "epoch": 1.732824427480916, "grad_norm": 0.06244116649031639, "learning_rate": 7.98896891844666e-06, "loss": 0.3006291389465332, "step": 9378 }, { "epoch": 1.7330092041898117, "grad_norm": 0.09400566667318344, "learning_rate": 7.987014799113398e-06, "loss": 0.6906332969665527, "step": 9379 }, { "epoch": 1.7331939808987076, "grad_norm": 0.07890813797712326, "learning_rate": 7.985060759890656e-06, "loss": 0.6461838483810425, "step": 9380 }, { "epoch": 1.7333787576076034, "grad_norm": 0.08000336587429047, "learning_rate": 7.983106800856183e-06, "loss": 0.6111680865287781, "step": 9381 }, { "epoch": 1.7335635343164995, "grad_norm": 0.07113444805145264, "learning_rate": 7.981152922087759e-06, "loss": 0.48228105902671814, "step": 9382 }, { "epoch": 1.7337483110253953, "grad_norm": 0.07770003378391266, "learning_rate": 7.979199123663126e-06, "loss": 0.47291597723960876, "step": 9383 }, { "epoch": 1.7339330877342911, "grad_norm": 0.08316867053508759, "learning_rate": 7.977245405660045e-06, "loss": 0.5423946976661682, "step": 9384 }, { "epoch": 1.734117864443187, "grad_norm": 0.06833017617464066, "learning_rate": 7.975291768156272e-06, "loss": 0.4052598178386688, "step": 9385 }, { "epoch": 1.7343026411520828, "grad_norm": 0.09429217129945755, "learning_rate": 7.97333821122955e-06, "loss": 0.6468340754508972, "step": 9386 }, { "epoch": 1.7344874178609786, "grad_norm": 0.09072468429803848, "learning_rate": 7.971384734957626e-06, "loss": 0.5985592603683472, "step": 9387 }, { "epoch": 1.7346721945698744, "grad_norm": 0.06963697820901871, "learning_rate": 7.969431339418245e-06, "loss": 0.3786318600177765, "step": 9388 }, { "epoch": 1.7348569712787705, "grad_norm": 0.08657761663198471, "learning_rate": 7.967478024689143e-06, "loss": 0.5320836305618286, "step": 9389 }, { "epoch": 1.7350417479876663, "grad_norm": 0.08098902553319931, "learning_rate": 7.965524790848055e-06, "loss": 0.6015971302986145, "step": 9390 }, { "epoch": 1.7352265246965621, "grad_norm": 0.07428756356239319, "learning_rate": 7.963571637972717e-06, "loss": 0.49490147829055786, "step": 9391 }, { "epoch": 1.735411301405458, "grad_norm": 0.08095613121986389, "learning_rate": 7.961618566140856e-06, "loss": 0.525471031665802, "step": 9392 }, { "epoch": 1.7355960781143538, "grad_norm": 0.06748519837856293, "learning_rate": 7.959665575430198e-06, "loss": 0.4298991858959198, "step": 9393 }, { "epoch": 1.7357808548232496, "grad_norm": 0.08516919612884521, "learning_rate": 7.95771266591847e-06, "loss": 0.5407508611679077, "step": 9394 }, { "epoch": 1.7359656315321454, "grad_norm": 0.07032306492328644, "learning_rate": 7.955759837683386e-06, "loss": 0.3875845968723297, "step": 9395 }, { "epoch": 1.7361504082410413, "grad_norm": 0.07609395682811737, "learning_rate": 7.953807090802663e-06, "loss": 0.5288292765617371, "step": 9396 }, { "epoch": 1.736335184949937, "grad_norm": 0.10851506143808365, "learning_rate": 7.95185442535402e-06, "loss": 0.7212783098220825, "step": 9397 }, { "epoch": 1.736519961658833, "grad_norm": 0.07989294826984406, "learning_rate": 7.94990184141516e-06, "loss": 0.48487424850463867, "step": 9398 }, { "epoch": 1.7367047383677288, "grad_norm": 0.0886591300368309, "learning_rate": 7.947949339063797e-06, "loss": 0.6012457609176636, "step": 9399 }, { "epoch": 1.7368895150766246, "grad_norm": 0.07172589749097824, "learning_rate": 7.945996918377627e-06, "loss": 0.5031811594963074, "step": 9400 }, { "epoch": 1.7370742917855204, "grad_norm": 0.0771087184548378, "learning_rate": 7.94404457943435e-06, "loss": 0.4250201880931854, "step": 9401 }, { "epoch": 1.7372590684944162, "grad_norm": 0.06980002671480179, "learning_rate": 7.942092322311674e-06, "loss": 0.47895023226737976, "step": 9402 }, { "epoch": 1.737443845203312, "grad_norm": 0.08409155905246735, "learning_rate": 7.940140147087281e-06, "loss": 0.5779047012329102, "step": 9403 }, { "epoch": 1.737628621912208, "grad_norm": 0.08319824188947678, "learning_rate": 7.938188053838863e-06, "loss": 0.585312008857727, "step": 9404 }, { "epoch": 1.7378133986211037, "grad_norm": 0.06984545290470123, "learning_rate": 7.936236042644116e-06, "loss": 0.3505295515060425, "step": 9405 }, { "epoch": 1.7379981753299996, "grad_norm": 0.09396980702877045, "learning_rate": 7.934284113580715e-06, "loss": 0.664936900138855, "step": 9406 }, { "epoch": 1.7381829520388954, "grad_norm": 0.07843194901943207, "learning_rate": 7.932332266726341e-06, "loss": 0.4490804374217987, "step": 9407 }, { "epoch": 1.7383677287477912, "grad_norm": 0.08212018013000488, "learning_rate": 7.930380502158678e-06, "loss": 0.492334246635437, "step": 9408 }, { "epoch": 1.738552505456687, "grad_norm": 0.07664764672517776, "learning_rate": 7.928428819955395e-06, "loss": 0.44178497791290283, "step": 9409 }, { "epoch": 1.7387372821655829, "grad_norm": 0.07601568102836609, "learning_rate": 7.92647722019416e-06, "loss": 0.5334704518318176, "step": 9410 }, { "epoch": 1.738922058874479, "grad_norm": 0.06846748292446136, "learning_rate": 7.924525702952648e-06, "loss": 0.4268120527267456, "step": 9411 }, { "epoch": 1.7391068355833748, "grad_norm": 0.07301660627126694, "learning_rate": 7.92257426830852e-06, "loss": 0.43639346957206726, "step": 9412 }, { "epoch": 1.7392916122922706, "grad_norm": 0.09251867234706879, "learning_rate": 7.920622916339436e-06, "loss": 0.6594666838645935, "step": 9413 }, { "epoch": 1.7394763890011664, "grad_norm": 0.06635914742946625, "learning_rate": 7.918671647123055e-06, "loss": 0.4128287732601166, "step": 9414 }, { "epoch": 1.7396611657100622, "grad_norm": 0.07639806717634201, "learning_rate": 7.916720460737029e-06, "loss": 0.46493440866470337, "step": 9415 }, { "epoch": 1.739845942418958, "grad_norm": 0.061943717300891876, "learning_rate": 7.914769357259015e-06, "loss": 0.3976179361343384, "step": 9416 }, { "epoch": 1.740030719127854, "grad_norm": 0.07549803704023361, "learning_rate": 7.91281833676665e-06, "loss": 0.5620946884155273, "step": 9417 }, { "epoch": 1.7402154958367497, "grad_norm": 0.0732521340250969, "learning_rate": 7.910867399337587e-06, "loss": 0.3955199122428894, "step": 9418 }, { "epoch": 1.7404002725456458, "grad_norm": 0.09458731859922409, "learning_rate": 7.908916545049473e-06, "loss": 0.5515162944793701, "step": 9419 }, { "epoch": 1.7405850492545416, "grad_norm": 0.08107059448957443, "learning_rate": 7.906965773979932e-06, "loss": 0.4785090684890747, "step": 9420 }, { "epoch": 1.7407698259634374, "grad_norm": 0.07096207141876221, "learning_rate": 7.905015086206601e-06, "loss": 0.5611074566841125, "step": 9421 }, { "epoch": 1.7409546026723333, "grad_norm": 0.07923568785190582, "learning_rate": 7.903064481807123e-06, "loss": 0.631668210029602, "step": 9422 }, { "epoch": 1.741139379381229, "grad_norm": 0.09152689576148987, "learning_rate": 7.901113960859115e-06, "loss": 0.6958367824554443, "step": 9423 }, { "epoch": 1.741324156090125, "grad_norm": 0.07087397575378418, "learning_rate": 7.899163523440201e-06, "loss": 0.3661838173866272, "step": 9424 }, { "epoch": 1.7415089327990207, "grad_norm": 0.07663542032241821, "learning_rate": 7.897213169628012e-06, "loss": 0.5703363418579102, "step": 9425 }, { "epoch": 1.7416937095079166, "grad_norm": 0.05955135449767113, "learning_rate": 7.895262899500158e-06, "loss": 0.3244597911834717, "step": 9426 }, { "epoch": 1.7418784862168124, "grad_norm": 0.07609820365905762, "learning_rate": 7.893312713134256e-06, "loss": 0.5215273499488831, "step": 9427 }, { "epoch": 1.7420632629257082, "grad_norm": 0.06051214411854744, "learning_rate": 7.891362610607918e-06, "loss": 0.3890979588031769, "step": 9428 }, { "epoch": 1.742248039634604, "grad_norm": 0.07205689698457718, "learning_rate": 7.889412591998749e-06, "loss": 0.4643065631389618, "step": 9429 }, { "epoch": 1.7424328163435, "grad_norm": 0.09298235923051834, "learning_rate": 7.887462657384357e-06, "loss": 0.5200021266937256, "step": 9430 }, { "epoch": 1.7426175930523957, "grad_norm": 0.07720060646533966, "learning_rate": 7.88551280684234e-06, "loss": 0.49047455191612244, "step": 9431 }, { "epoch": 1.7428023697612915, "grad_norm": 0.09403630346059799, "learning_rate": 7.8835630404503e-06, "loss": 0.6260966658592224, "step": 9432 }, { "epoch": 1.7429871464701874, "grad_norm": 0.07295014709234238, "learning_rate": 7.88161335828583e-06, "loss": 0.4665113389492035, "step": 9433 }, { "epoch": 1.7431719231790832, "grad_norm": 0.08470214158296585, "learning_rate": 7.879663760426517e-06, "loss": 0.5773360729217529, "step": 9434 }, { "epoch": 1.743356699887979, "grad_norm": 0.08234825730323792, "learning_rate": 7.877714246949954e-06, "loss": 0.558610737323761, "step": 9435 }, { "epoch": 1.7435414765968749, "grad_norm": 0.07873562723398209, "learning_rate": 7.87576481793373e-06, "loss": 0.5046735405921936, "step": 9436 }, { "epoch": 1.7437262533057707, "grad_norm": 0.05728263407945633, "learning_rate": 7.873815473455414e-06, "loss": 0.2989661693572998, "step": 9437 }, { "epoch": 1.7439110300146665, "grad_norm": 0.07303888350725174, "learning_rate": 7.871866213592589e-06, "loss": 0.4734708070755005, "step": 9438 }, { "epoch": 1.7440958067235623, "grad_norm": 0.08110443502664566, "learning_rate": 7.869917038422838e-06, "loss": 0.5412731766700745, "step": 9439 }, { "epoch": 1.7442805834324582, "grad_norm": 0.08457206934690475, "learning_rate": 7.867967948023716e-06, "loss": 0.59527987241745, "step": 9440 }, { "epoch": 1.7444653601413542, "grad_norm": 0.09340102970600128, "learning_rate": 7.866018942472803e-06, "loss": 0.5167881846427917, "step": 9441 }, { "epoch": 1.74465013685025, "grad_norm": 0.06627646088600159, "learning_rate": 7.864070021847664e-06, "loss": 0.39817744493484497, "step": 9442 }, { "epoch": 1.7448349135591459, "grad_norm": 0.08489912748336792, "learning_rate": 7.862121186225851e-06, "loss": 0.5790280103683472, "step": 9443 }, { "epoch": 1.7450196902680417, "grad_norm": 0.06530047208070755, "learning_rate": 7.860172435684923e-06, "loss": 0.39563748240470886, "step": 9444 }, { "epoch": 1.7452044669769375, "grad_norm": 0.0687323808670044, "learning_rate": 7.858223770302447e-06, "loss": 0.5383394956588745, "step": 9445 }, { "epoch": 1.7453892436858334, "grad_norm": 0.10695014894008636, "learning_rate": 7.856275190155957e-06, "loss": 0.6556435823440552, "step": 9446 }, { "epoch": 1.7455740203947292, "grad_norm": 0.08394554257392883, "learning_rate": 7.854326695323012e-06, "loss": 0.6104413270950317, "step": 9447 }, { "epoch": 1.7457587971036252, "grad_norm": 0.06306581944227219, "learning_rate": 7.852378285881148e-06, "loss": 0.4853476881980896, "step": 9448 }, { "epoch": 1.745943573812521, "grad_norm": 0.08481727540493011, "learning_rate": 7.850429961907908e-06, "loss": 0.5893101096153259, "step": 9449 }, { "epoch": 1.746128350521417, "grad_norm": 0.08066444098949432, "learning_rate": 7.848481723480835e-06, "loss": 0.516244113445282, "step": 9450 }, { "epoch": 1.7463131272303127, "grad_norm": 0.07358620315790176, "learning_rate": 7.846533570677454e-06, "loss": 0.4533986449241638, "step": 9451 }, { "epoch": 1.7464979039392086, "grad_norm": 0.06455602496862411, "learning_rate": 7.8445855035753e-06, "loss": 0.504241943359375, "step": 9452 }, { "epoch": 1.7466826806481044, "grad_norm": 0.08291526883840561, "learning_rate": 7.842637522251902e-06, "loss": 0.5514028668403625, "step": 9453 }, { "epoch": 1.7468674573570002, "grad_norm": 0.07108502089977264, "learning_rate": 7.84068962678478e-06, "loss": 0.37282323837280273, "step": 9454 }, { "epoch": 1.747052234065896, "grad_norm": 0.08243813365697861, "learning_rate": 7.838741817251454e-06, "loss": 0.5866271257400513, "step": 9455 }, { "epoch": 1.7472370107747919, "grad_norm": 0.06616461277008057, "learning_rate": 7.836794093729447e-06, "loss": 0.4198001027107239, "step": 9456 }, { "epoch": 1.7474217874836877, "grad_norm": 0.07621096819639206, "learning_rate": 7.834846456296258e-06, "loss": 0.419243723154068, "step": 9457 }, { "epoch": 1.7476065641925835, "grad_norm": 0.08235196769237518, "learning_rate": 7.832898905029412e-06, "loss": 0.5241071581840515, "step": 9458 }, { "epoch": 1.7477913409014794, "grad_norm": 0.09907345473766327, "learning_rate": 7.830951440006412e-06, "loss": 0.6666129231452942, "step": 9459 }, { "epoch": 1.7479761176103752, "grad_norm": 0.0671384260058403, "learning_rate": 7.829004061304753e-06, "loss": 0.3617722988128662, "step": 9460 }, { "epoch": 1.748160894319271, "grad_norm": 0.07696773111820221, "learning_rate": 7.827056769001942e-06, "loss": 0.4717048406600952, "step": 9461 }, { "epoch": 1.7483456710281668, "grad_norm": 0.07589234411716461, "learning_rate": 7.825109563175478e-06, "loss": 0.5012123584747314, "step": 9462 }, { "epoch": 1.7485304477370627, "grad_norm": 0.08382602035999298, "learning_rate": 7.823162443902845e-06, "loss": 0.5069903135299683, "step": 9463 }, { "epoch": 1.7487152244459585, "grad_norm": 0.08438605815172195, "learning_rate": 7.821215411261537e-06, "loss": 0.6001019477844238, "step": 9464 }, { "epoch": 1.7489000011548543, "grad_norm": 0.07784610241651535, "learning_rate": 7.819268465329038e-06, "loss": 0.494361013174057, "step": 9465 }, { "epoch": 1.7490847778637502, "grad_norm": 0.0861455500125885, "learning_rate": 7.81732160618283e-06, "loss": 0.7007192969322205, "step": 9466 }, { "epoch": 1.749269554572646, "grad_norm": 0.07989050447940826, "learning_rate": 7.815374833900398e-06, "loss": 0.46829497814178467, "step": 9467 }, { "epoch": 1.7494543312815418, "grad_norm": 0.09458218514919281, "learning_rate": 7.813428148559208e-06, "loss": 0.5205943584442139, "step": 9468 }, { "epoch": 1.7496391079904376, "grad_norm": 0.06660239398479462, "learning_rate": 7.811481550236739e-06, "loss": 0.41754552721977234, "step": 9469 }, { "epoch": 1.7498238846993337, "grad_norm": 0.07794316858053207, "learning_rate": 7.80953503901046e-06, "loss": 0.46037155389785767, "step": 9470 }, { "epoch": 1.7500086614082295, "grad_norm": 0.07195150852203369, "learning_rate": 7.807588614957829e-06, "loss": 0.4594876766204834, "step": 9471 }, { "epoch": 1.7501934381171254, "grad_norm": 0.08619675040245056, "learning_rate": 7.805642278156313e-06, "loss": 0.527175784111023, "step": 9472 }, { "epoch": 1.7503782148260212, "grad_norm": 0.08233191072940826, "learning_rate": 7.80369602868337e-06, "loss": 0.6458790898323059, "step": 9473 }, { "epoch": 1.750562991534917, "grad_norm": 0.07300320267677307, "learning_rate": 7.801749866616453e-06, "loss": 0.39369773864746094, "step": 9474 }, { "epoch": 1.7507477682438128, "grad_norm": 0.09571012854576111, "learning_rate": 7.799803792033014e-06, "loss": 0.5348305106163025, "step": 9475 }, { "epoch": 1.7509325449527087, "grad_norm": 0.08784550428390503, "learning_rate": 7.797857805010502e-06, "loss": 0.5967904329299927, "step": 9476 }, { "epoch": 1.7511173216616047, "grad_norm": 0.0804838165640831, "learning_rate": 7.795911905626356e-06, "loss": 0.4976516366004944, "step": 9477 }, { "epoch": 1.7513020983705005, "grad_norm": 0.08038236200809479, "learning_rate": 7.793966093958028e-06, "loss": 0.5232406854629517, "step": 9478 }, { "epoch": 1.7514868750793964, "grad_norm": 0.08380588889122009, "learning_rate": 7.79202037008294e-06, "loss": 0.552866518497467, "step": 9479 }, { "epoch": 1.7516716517882922, "grad_norm": 0.07746408134698868, "learning_rate": 7.790074734078533e-06, "loss": 0.41058093309402466, "step": 9480 }, { "epoch": 1.751856428497188, "grad_norm": 0.0629132091999054, "learning_rate": 7.788129186022244e-06, "loss": 0.42107585072517395, "step": 9481 }, { "epoch": 1.7520412052060839, "grad_norm": 0.061843644827604294, "learning_rate": 7.78618372599149e-06, "loss": 0.34514370560646057, "step": 9482 }, { "epoch": 1.7522259819149797, "grad_norm": 0.09421967715024948, "learning_rate": 7.784238354063697e-06, "loss": 0.5947861075401306, "step": 9483 }, { "epoch": 1.7524107586238755, "grad_norm": 0.06692761182785034, "learning_rate": 7.782293070316287e-06, "loss": 0.2929871380329132, "step": 9484 }, { "epoch": 1.7525955353327713, "grad_norm": 0.07493630051612854, "learning_rate": 7.780347874826672e-06, "loss": 0.47335997223854065, "step": 9485 }, { "epoch": 1.7527803120416672, "grad_norm": 0.08465097099542618, "learning_rate": 7.778402767672268e-06, "loss": 0.5007327795028687, "step": 9486 }, { "epoch": 1.752965088750563, "grad_norm": 0.0741303339600563, "learning_rate": 7.776457748930486e-06, "loss": 0.41544151306152344, "step": 9487 }, { "epoch": 1.7531498654594588, "grad_norm": 0.05960865691304207, "learning_rate": 7.774512818678724e-06, "loss": 0.3089883625507355, "step": 9488 }, { "epoch": 1.7533346421683547, "grad_norm": 0.08837155252695084, "learning_rate": 7.772567976994392e-06, "loss": 0.5421566367149353, "step": 9489 }, { "epoch": 1.7535194188772505, "grad_norm": 0.08413968235254288, "learning_rate": 7.770623223954887e-06, "loss": 0.6096692681312561, "step": 9490 }, { "epoch": 1.7537041955861463, "grad_norm": 0.06976504623889923, "learning_rate": 7.768678559637601e-06, "loss": 0.4283706545829773, "step": 9491 }, { "epoch": 1.7538889722950421, "grad_norm": 0.07454965263605118, "learning_rate": 7.766733984119927e-06, "loss": 0.5498755574226379, "step": 9492 }, { "epoch": 1.754073749003938, "grad_norm": 0.07341553270816803, "learning_rate": 7.764789497479256e-06, "loss": 0.45700550079345703, "step": 9493 }, { "epoch": 1.7542585257128338, "grad_norm": 0.07001147419214249, "learning_rate": 7.762845099792968e-06, "loss": 0.4273150861263275, "step": 9494 }, { "epoch": 1.7544433024217296, "grad_norm": 0.05497468262910843, "learning_rate": 7.76090079113845e-06, "loss": 0.3325614035129547, "step": 9495 }, { "epoch": 1.7546280791306255, "grad_norm": 0.05593828856945038, "learning_rate": 7.758956571593069e-06, "loss": 0.3491348326206207, "step": 9496 }, { "epoch": 1.7548128558395213, "grad_norm": 0.10387798398733139, "learning_rate": 7.757012441234206e-06, "loss": 0.5724706053733826, "step": 9497 }, { "epoch": 1.7549976325484171, "grad_norm": 0.07001633197069168, "learning_rate": 7.755068400139236e-06, "loss": 0.3945654034614563, "step": 9498 }, { "epoch": 1.7551824092573132, "grad_norm": 0.09581859409809113, "learning_rate": 7.753124448385514e-06, "loss": 0.7347464561462402, "step": 9499 }, { "epoch": 1.755367185966209, "grad_norm": 0.0977296233177185, "learning_rate": 7.751180586050409e-06, "loss": 0.734951376914978, "step": 9500 }, { "epoch": 1.755367185966209, "eval_loss": 0.5634395480155945, "eval_runtime": 156.2077, "eval_samples_per_second": 116.697, "eval_steps_per_second": 14.59, "step": 9500 }, { "epoch": 1.7555519626751048, "grad_norm": 0.07938997447490692, "learning_rate": 7.749236813211288e-06, "loss": 0.47607746720314026, "step": 9501 }, { "epoch": 1.7557367393840007, "grad_norm": 0.0752115324139595, "learning_rate": 7.747293129945495e-06, "loss": 0.7275061011314392, "step": 9502 }, { "epoch": 1.7559215160928965, "grad_norm": 0.06737703084945679, "learning_rate": 7.745349536330387e-06, "loss": 0.4250943958759308, "step": 9503 }, { "epoch": 1.7561062928017923, "grad_norm": 0.07570453733205795, "learning_rate": 7.743406032443318e-06, "loss": 0.5231233835220337, "step": 9504 }, { "epoch": 1.7562910695106881, "grad_norm": 0.07378605753183365, "learning_rate": 7.741462618361624e-06, "loss": 0.4782612919807434, "step": 9505 }, { "epoch": 1.756475846219584, "grad_norm": 0.07631854712963104, "learning_rate": 7.739519294162652e-06, "loss": 0.4818604588508606, "step": 9506 }, { "epoch": 1.75666062292848, "grad_norm": 0.08436261117458344, "learning_rate": 7.737576059923742e-06, "loss": 0.5321990251541138, "step": 9507 }, { "epoch": 1.7568453996373758, "grad_norm": 0.07916104048490524, "learning_rate": 7.735632915722227e-06, "loss": 0.5069300532341003, "step": 9508 }, { "epoch": 1.7570301763462717, "grad_norm": 0.08644711226224899, "learning_rate": 7.733689861635435e-06, "loss": 0.511889636516571, "step": 9509 }, { "epoch": 1.7572149530551675, "grad_norm": 0.08409576117992401, "learning_rate": 7.7317468977407e-06, "loss": 0.4757402241230011, "step": 9510 }, { "epoch": 1.7573997297640633, "grad_norm": 0.0835278183221817, "learning_rate": 7.729804024115339e-06, "loss": 0.5135257244110107, "step": 9511 }, { "epoch": 1.7575845064729592, "grad_norm": 0.08325167745351791, "learning_rate": 7.727861240836679e-06, "loss": 0.5767073035240173, "step": 9512 }, { "epoch": 1.757769283181855, "grad_norm": 0.06520769000053406, "learning_rate": 7.725918547982027e-06, "loss": 0.3839736878871918, "step": 9513 }, { "epoch": 1.7579540598907508, "grad_norm": 0.07241534441709518, "learning_rate": 7.723975945628706e-06, "loss": 0.45690375566482544, "step": 9514 }, { "epoch": 1.7581388365996466, "grad_norm": 0.09301277995109558, "learning_rate": 7.722033433854023e-06, "loss": 0.6211903691291809, "step": 9515 }, { "epoch": 1.7583236133085425, "grad_norm": 0.05278944596648216, "learning_rate": 7.720091012735277e-06, "loss": 0.34502747654914856, "step": 9516 }, { "epoch": 1.7585083900174383, "grad_norm": 0.07892879843711853, "learning_rate": 7.71814868234978e-06, "loss": 0.4249326288700104, "step": 9517 }, { "epoch": 1.7586931667263341, "grad_norm": 0.09665438532829285, "learning_rate": 7.71620644277483e-06, "loss": 0.5845499634742737, "step": 9518 }, { "epoch": 1.75887794343523, "grad_norm": 0.06883665919303894, "learning_rate": 7.714264294087711e-06, "loss": 0.513221025466919, "step": 9519 }, { "epoch": 1.7590627201441258, "grad_norm": 0.0666922777891159, "learning_rate": 7.712322236365724e-06, "loss": 0.5049949884414673, "step": 9520 }, { "epoch": 1.7592474968530216, "grad_norm": 0.07511871308088303, "learning_rate": 7.71038026968616e-06, "loss": 0.4871228337287903, "step": 9521 }, { "epoch": 1.7594322735619174, "grad_norm": 0.06378930807113647, "learning_rate": 7.708438394126292e-06, "loss": 0.3740116059780121, "step": 9522 }, { "epoch": 1.7596170502708133, "grad_norm": 0.0661415383219719, "learning_rate": 7.706496609763407e-06, "loss": 0.49298548698425293, "step": 9523 }, { "epoch": 1.759801826979709, "grad_norm": 0.07151772826910019, "learning_rate": 7.704554916674785e-06, "loss": 0.4438452422618866, "step": 9524 }, { "epoch": 1.759986603688605, "grad_norm": 0.06835362315177917, "learning_rate": 7.702613314937692e-06, "loss": 0.48267531394958496, "step": 9525 }, { "epoch": 1.7601713803975008, "grad_norm": 0.07833728194236755, "learning_rate": 7.700671804629402e-06, "loss": 0.4843129515647888, "step": 9526 }, { "epoch": 1.7603561571063966, "grad_norm": 0.08553629368543625, "learning_rate": 7.698730385827184e-06, "loss": 0.5773013234138489, "step": 9527 }, { "epoch": 1.7605409338152924, "grad_norm": 0.11041746288537979, "learning_rate": 7.696789058608294e-06, "loss": 0.79600989818573, "step": 9528 }, { "epoch": 1.7607257105241885, "grad_norm": 0.09957388788461685, "learning_rate": 7.694847823049995e-06, "loss": 0.6443513035774231, "step": 9529 }, { "epoch": 1.7609104872330843, "grad_norm": 0.07423588633537292, "learning_rate": 7.692906679229539e-06, "loss": 0.4746418297290802, "step": 9530 }, { "epoch": 1.7610952639419801, "grad_norm": 0.09016186743974686, "learning_rate": 7.690965627224181e-06, "loss": 0.45739561319351196, "step": 9531 }, { "epoch": 1.761280040650876, "grad_norm": 0.07074403762817383, "learning_rate": 7.689024667111167e-06, "loss": 0.5079753994941711, "step": 9532 }, { "epoch": 1.7614648173597718, "grad_norm": 0.07867482304573059, "learning_rate": 7.687083798967739e-06, "loss": 0.5321707129478455, "step": 9533 }, { "epoch": 1.7616495940686676, "grad_norm": 0.08339225500822067, "learning_rate": 7.68514302287114e-06, "loss": 0.6382849812507629, "step": 9534 }, { "epoch": 1.7618343707775634, "grad_norm": 0.06861494481563568, "learning_rate": 7.68320233889861e-06, "loss": 0.5510586500167847, "step": 9535 }, { "epoch": 1.7620191474864595, "grad_norm": 0.08419942855834961, "learning_rate": 7.681261747127375e-06, "loss": 0.5666383504867554, "step": 9536 }, { "epoch": 1.7622039241953553, "grad_norm": 0.07097092270851135, "learning_rate": 7.679321247634667e-06, "loss": 0.47546514868736267, "step": 9537 }, { "epoch": 1.7623887009042511, "grad_norm": 0.0674498975276947, "learning_rate": 7.67738084049772e-06, "loss": 0.4514087736606598, "step": 9538 }, { "epoch": 1.762573477613147, "grad_norm": 0.08018751442432404, "learning_rate": 7.67544052579374e-06, "loss": 0.4930320382118225, "step": 9539 }, { "epoch": 1.7627582543220428, "grad_norm": 0.05618758499622345, "learning_rate": 7.673500303599956e-06, "loss": 0.3329872786998749, "step": 9540 }, { "epoch": 1.7629430310309386, "grad_norm": 0.10689683258533478, "learning_rate": 7.671560173993588e-06, "loss": 0.677473783493042, "step": 9541 }, { "epoch": 1.7631278077398345, "grad_norm": 0.08824285119771957, "learning_rate": 7.669620137051835e-06, "loss": 0.6107396483421326, "step": 9542 }, { "epoch": 1.7633125844487303, "grad_norm": 0.08850574493408203, "learning_rate": 7.667680192851912e-06, "loss": 0.5533414483070374, "step": 9543 }, { "epoch": 1.7634973611576261, "grad_norm": 0.07688694447278976, "learning_rate": 7.665740341471017e-06, "loss": 0.46593785285949707, "step": 9544 }, { "epoch": 1.763682137866522, "grad_norm": 0.0715436115860939, "learning_rate": 7.663800582986356e-06, "loss": 0.3948075771331787, "step": 9545 }, { "epoch": 1.7638669145754178, "grad_norm": 0.057816267013549805, "learning_rate": 7.661860917475124e-06, "loss": 0.368903785943985, "step": 9546 }, { "epoch": 1.7640516912843136, "grad_norm": 0.08684322983026505, "learning_rate": 7.659921345014509e-06, "loss": 0.5822094082832336, "step": 9547 }, { "epoch": 1.7642364679932094, "grad_norm": 0.09293843060731888, "learning_rate": 7.657981865681704e-06, "loss": 0.6093835234642029, "step": 9548 }, { "epoch": 1.7644212447021053, "grad_norm": 0.07811232656240463, "learning_rate": 7.656042479553896e-06, "loss": 0.463060587644577, "step": 9549 }, { "epoch": 1.764606021411001, "grad_norm": 0.07818163931369781, "learning_rate": 7.654103186708262e-06, "loss": 0.5459082126617432, "step": 9550 }, { "epoch": 1.764790798119897, "grad_norm": 0.07132180780172348, "learning_rate": 7.65216398722198e-06, "loss": 0.4241693615913391, "step": 9551 }, { "epoch": 1.7649755748287927, "grad_norm": 0.07287544012069702, "learning_rate": 7.65022488117223e-06, "loss": 0.49057701230049133, "step": 9552 }, { "epoch": 1.7651603515376886, "grad_norm": 0.06782901287078857, "learning_rate": 7.648285868636177e-06, "loss": 0.4755297303199768, "step": 9553 }, { "epoch": 1.7653451282465844, "grad_norm": 0.06399807333946228, "learning_rate": 7.646346949690987e-06, "loss": 0.35742703080177307, "step": 9554 }, { "epoch": 1.7655299049554802, "grad_norm": 0.07754683494567871, "learning_rate": 7.644408124413828e-06, "loss": 0.6166073083877563, "step": 9555 }, { "epoch": 1.765714681664376, "grad_norm": 0.0765303298830986, "learning_rate": 7.642469392881852e-06, "loss": 0.4563341736793518, "step": 9556 }, { "epoch": 1.765899458373272, "grad_norm": 0.07518597692251205, "learning_rate": 7.64053075517222e-06, "loss": 0.49914613366127014, "step": 9557 }, { "epoch": 1.766084235082168, "grad_norm": 0.07142817974090576, "learning_rate": 7.638592211362086e-06, "loss": 0.45610418915748596, "step": 9558 }, { "epoch": 1.7662690117910638, "grad_norm": 0.07195606827735901, "learning_rate": 7.63665376152859e-06, "loss": 0.4759080410003662, "step": 9559 }, { "epoch": 1.7664537884999596, "grad_norm": 0.07048597931861877, "learning_rate": 7.634715405748881e-06, "loss": 0.4257037341594696, "step": 9560 }, { "epoch": 1.7666385652088554, "grad_norm": 0.08749891072511673, "learning_rate": 7.632777144100099e-06, "loss": 0.46232447028160095, "step": 9561 }, { "epoch": 1.7668233419177513, "grad_norm": 0.06787905097007751, "learning_rate": 7.630838976659379e-06, "loss": 0.371409147977829, "step": 9562 }, { "epoch": 1.767008118626647, "grad_norm": 0.07177285850048065, "learning_rate": 7.628900903503858e-06, "loss": 0.414307177066803, "step": 9563 }, { "epoch": 1.767192895335543, "grad_norm": 0.0932188481092453, "learning_rate": 7.62696292471066e-06, "loss": 0.6285747289657593, "step": 9564 }, { "epoch": 1.767377672044439, "grad_norm": 0.08144089579582214, "learning_rate": 7.625025040356915e-06, "loss": 0.4960605800151825, "step": 9565 }, { "epoch": 1.7675624487533348, "grad_norm": 0.07047521322965622, "learning_rate": 7.623087250519744e-06, "loss": 0.35761797428131104, "step": 9566 }, { "epoch": 1.7677472254622306, "grad_norm": 0.08805403113365173, "learning_rate": 7.621149555276262e-06, "loss": 0.5082883834838867, "step": 9567 }, { "epoch": 1.7679320021711264, "grad_norm": 0.09301161766052246, "learning_rate": 7.619211954703586e-06, "loss": 0.6269007325172424, "step": 9568 }, { "epoch": 1.7681167788800223, "grad_norm": 0.08129319548606873, "learning_rate": 7.6172744488788276e-06, "loss": 0.5256569981575012, "step": 9569 }, { "epoch": 1.768301555588918, "grad_norm": 0.05803351476788521, "learning_rate": 7.615337037879089e-06, "loss": 0.37849074602127075, "step": 9570 }, { "epoch": 1.768486332297814, "grad_norm": 0.08406578749418259, "learning_rate": 7.613399721781476e-06, "loss": 0.5408613681793213, "step": 9571 }, { "epoch": 1.7686711090067098, "grad_norm": 0.0749550610780716, "learning_rate": 7.6114625006630885e-06, "loss": 0.5040973424911499, "step": 9572 }, { "epoch": 1.7688558857156056, "grad_norm": 0.06905291229486465, "learning_rate": 7.609525374601019e-06, "loss": 0.462920606136322, "step": 9573 }, { "epoch": 1.7690406624245014, "grad_norm": 0.09600643068552017, "learning_rate": 7.607588343672361e-06, "loss": 0.7481486201286316, "step": 9574 }, { "epoch": 1.7692254391333972, "grad_norm": 0.1076044961810112, "learning_rate": 7.605651407954207e-06, "loss": 0.713120698928833, "step": 9575 }, { "epoch": 1.769410215842293, "grad_norm": 0.08154827356338501, "learning_rate": 7.603714567523629e-06, "loss": 0.5379562973976135, "step": 9576 }, { "epoch": 1.769594992551189, "grad_norm": 0.07339855283498764, "learning_rate": 7.60177782245772e-06, "loss": 0.4116850197315216, "step": 9577 }, { "epoch": 1.7697797692600847, "grad_norm": 0.07978306710720062, "learning_rate": 7.599841172833548e-06, "loss": 0.49824514985084534, "step": 9578 }, { "epoch": 1.7699645459689806, "grad_norm": 0.07998265326023102, "learning_rate": 7.597904618728187e-06, "loss": 0.5987973809242249, "step": 9579 }, { "epoch": 1.7701493226778764, "grad_norm": 0.07898522913455963, "learning_rate": 7.5959681602187085e-06, "loss": 0.47599172592163086, "step": 9580 }, { "epoch": 1.7703340993867722, "grad_norm": 0.08431115746498108, "learning_rate": 7.594031797382174e-06, "loss": 0.5221173763275146, "step": 9581 }, { "epoch": 1.770518876095668, "grad_norm": 0.07711915671825409, "learning_rate": 7.592095530295648e-06, "loss": 0.5558249950408936, "step": 9582 }, { "epoch": 1.7707036528045639, "grad_norm": 0.09012097120285034, "learning_rate": 7.590159359036188e-06, "loss": 0.7073286771774292, "step": 9583 }, { "epoch": 1.7708884295134597, "grad_norm": 0.07047846913337708, "learning_rate": 7.588223283680844e-06, "loss": 0.5091392993927002, "step": 9584 }, { "epoch": 1.7710732062223555, "grad_norm": 0.07714288681745529, "learning_rate": 7.586287304306667e-06, "loss": 0.5538867115974426, "step": 9585 }, { "epoch": 1.7712579829312514, "grad_norm": 0.0795782133936882, "learning_rate": 7.584351420990707e-06, "loss": 0.4915255606174469, "step": 9586 }, { "epoch": 1.7714427596401474, "grad_norm": 0.08537991344928741, "learning_rate": 7.58241563381e-06, "loss": 0.48552417755126953, "step": 9587 }, { "epoch": 1.7716275363490432, "grad_norm": 0.10023138672113419, "learning_rate": 7.5804799428415865e-06, "loss": 0.6737853288650513, "step": 9588 }, { "epoch": 1.771812313057939, "grad_norm": 0.07969482243061066, "learning_rate": 7.578544348162504e-06, "loss": 0.5007420182228088, "step": 9589 }, { "epoch": 1.771997089766835, "grad_norm": 0.09847646951675415, "learning_rate": 7.5766088498497805e-06, "loss": 0.6564161777496338, "step": 9590 }, { "epoch": 1.7721818664757307, "grad_norm": 0.07532694935798645, "learning_rate": 7.574673447980441e-06, "loss": 0.5126252174377441, "step": 9591 }, { "epoch": 1.7723666431846266, "grad_norm": 0.09312979876995087, "learning_rate": 7.572738142631513e-06, "loss": 0.6322164535522461, "step": 9592 }, { "epoch": 1.7725514198935224, "grad_norm": 0.06892203539609909, "learning_rate": 7.5708029338800104e-06, "loss": 0.3911009132862091, "step": 9593 }, { "epoch": 1.7727361966024182, "grad_norm": 0.07569395750761032, "learning_rate": 7.5688678218029564e-06, "loss": 0.4439738094806671, "step": 9594 }, { "epoch": 1.7729209733113143, "grad_norm": 0.07597517222166061, "learning_rate": 7.5669328064773515e-06, "loss": 0.43144547939300537, "step": 9595 }, { "epoch": 1.77310575002021, "grad_norm": 0.0796041339635849, "learning_rate": 7.564997887980208e-06, "loss": 0.5145467519760132, "step": 9596 }, { "epoch": 1.773290526729106, "grad_norm": 0.06158768758177757, "learning_rate": 7.563063066388537e-06, "loss": 0.4237648546695709, "step": 9597 }, { "epoch": 1.7734753034380017, "grad_norm": 0.06502994149923325, "learning_rate": 7.561128341779327e-06, "loss": 0.41174840927124023, "step": 9598 }, { "epoch": 1.7736600801468976, "grad_norm": 0.069393090903759, "learning_rate": 7.5591937142295775e-06, "loss": 0.4664877951145172, "step": 9599 }, { "epoch": 1.7738448568557934, "grad_norm": 0.06445951014757156, "learning_rate": 7.557259183816286e-06, "loss": 0.45197027921676636, "step": 9600 }, { "epoch": 1.7740296335646892, "grad_norm": 0.09038061648607254, "learning_rate": 7.555324750616433e-06, "loss": 0.5763864517211914, "step": 9601 }, { "epoch": 1.774214410273585, "grad_norm": 0.0962783694267273, "learning_rate": 7.553390414707007e-06, "loss": 0.7027230262756348, "step": 9602 }, { "epoch": 1.774399186982481, "grad_norm": 0.08621051907539368, "learning_rate": 7.551456176164989e-06, "loss": 0.5542473793029785, "step": 9603 }, { "epoch": 1.7745839636913767, "grad_norm": 0.07505802810192108, "learning_rate": 7.549522035067355e-06, "loss": 0.4667530953884125, "step": 9604 }, { "epoch": 1.7747687404002725, "grad_norm": 0.09039128571748734, "learning_rate": 7.5475879914910755e-06, "loss": 0.6583998799324036, "step": 9605 }, { "epoch": 1.7749535171091684, "grad_norm": 0.07745156437158585, "learning_rate": 7.545654045513125e-06, "loss": 0.5156211853027344, "step": 9606 }, { "epoch": 1.7751382938180642, "grad_norm": 0.08875171840190887, "learning_rate": 7.543720197210461e-06, "loss": 0.5398255586624146, "step": 9607 }, { "epoch": 1.77532307052696, "grad_norm": 0.08882952481508255, "learning_rate": 7.541786446660051e-06, "loss": 0.5149835348129272, "step": 9608 }, { "epoch": 1.7755078472358559, "grad_norm": 0.06775467097759247, "learning_rate": 7.5398527939388485e-06, "loss": 0.45543473958969116, "step": 9609 }, { "epoch": 1.7756926239447517, "grad_norm": 0.07815680652856827, "learning_rate": 7.537919239123808e-06, "loss": 0.46514320373535156, "step": 9610 }, { "epoch": 1.7758774006536475, "grad_norm": 0.07627175003290176, "learning_rate": 7.5359857822918814e-06, "loss": 0.46797317266464233, "step": 9611 }, { "epoch": 1.7760621773625433, "grad_norm": 0.07827355712652206, "learning_rate": 7.534052423520007e-06, "loss": 0.5280380249023438, "step": 9612 }, { "epoch": 1.7762469540714392, "grad_norm": 0.07887908071279526, "learning_rate": 7.5321191628851335e-06, "loss": 0.5931577682495117, "step": 9613 }, { "epoch": 1.776431730780335, "grad_norm": 0.067986860871315, "learning_rate": 7.530186000464199e-06, "loss": 0.45303210616111755, "step": 9614 }, { "epoch": 1.7766165074892308, "grad_norm": 0.0722842738032341, "learning_rate": 7.5282529363341316e-06, "loss": 0.4417860507965088, "step": 9615 }, { "epoch": 1.7768012841981267, "grad_norm": 0.0893688052892685, "learning_rate": 7.526319970571861e-06, "loss": 0.5766925811767578, "step": 9616 }, { "epoch": 1.7769860609070227, "grad_norm": 0.08695358783006668, "learning_rate": 7.524387103254325e-06, "loss": 0.5963427424430847, "step": 9617 }, { "epoch": 1.7771708376159185, "grad_norm": 0.0718984454870224, "learning_rate": 7.522454334458431e-06, "loss": 0.5042772889137268, "step": 9618 }, { "epoch": 1.7773556143248144, "grad_norm": 0.08929789811372757, "learning_rate": 7.520521664261103e-06, "loss": 0.4746643602848053, "step": 9619 }, { "epoch": 1.7775403910337102, "grad_norm": 0.09637338668107986, "learning_rate": 7.518589092739259e-06, "loss": 0.6487293243408203, "step": 9620 }, { "epoch": 1.777725167742606, "grad_norm": 0.08786317706108093, "learning_rate": 7.516656619969802e-06, "loss": 0.43691861629486084, "step": 9621 }, { "epoch": 1.7779099444515019, "grad_norm": 0.0733262300491333, "learning_rate": 7.514724246029643e-06, "loss": 0.45732972025871277, "step": 9622 }, { "epoch": 1.7780947211603977, "grad_norm": 0.05736264958977699, "learning_rate": 7.512791970995686e-06, "loss": 0.35856419801712036, "step": 9623 }, { "epoch": 1.7782794978692937, "grad_norm": 0.08865299820899963, "learning_rate": 7.510859794944825e-06, "loss": 0.5417463183403015, "step": 9624 }, { "epoch": 1.7784642745781896, "grad_norm": 0.07106424868106842, "learning_rate": 7.508927717953959e-06, "loss": 0.4163905680179596, "step": 9625 }, { "epoch": 1.7786490512870854, "grad_norm": 0.07050973176956177, "learning_rate": 7.506995740099974e-06, "loss": 0.376446932554245, "step": 9626 }, { "epoch": 1.7788338279959812, "grad_norm": 0.07406870275735855, "learning_rate": 7.505063861459758e-06, "loss": 0.5682767033576965, "step": 9627 }, { "epoch": 1.779018604704877, "grad_norm": 0.08051006495952606, "learning_rate": 7.503132082110197e-06, "loss": 0.5184383392333984, "step": 9628 }, { "epoch": 1.7792033814137729, "grad_norm": 0.07349243015050888, "learning_rate": 7.501200402128166e-06, "loss": 0.4858967661857605, "step": 9629 }, { "epoch": 1.7793881581226687, "grad_norm": 0.08572787046432495, "learning_rate": 7.499268821590541e-06, "loss": 0.5259782075881958, "step": 9630 }, { "epoch": 1.7795729348315645, "grad_norm": 0.08060348778963089, "learning_rate": 7.497337340574197e-06, "loss": 0.6207387447357178, "step": 9631 }, { "epoch": 1.7797577115404604, "grad_norm": 0.0894874706864357, "learning_rate": 7.495405959155992e-06, "loss": 0.6868194937705994, "step": 9632 }, { "epoch": 1.7799424882493562, "grad_norm": 0.06732596457004547, "learning_rate": 7.493474677412795e-06, "loss": 0.4337766766548157, "step": 9633 }, { "epoch": 1.780127264958252, "grad_norm": 0.0839940533041954, "learning_rate": 7.491543495421468e-06, "loss": 0.5468871593475342, "step": 9634 }, { "epoch": 1.7803120416671478, "grad_norm": 0.07228980213403702, "learning_rate": 7.489612413258858e-06, "loss": 0.4850339889526367, "step": 9635 }, { "epoch": 1.7804968183760437, "grad_norm": 0.08797910809516907, "learning_rate": 7.4876814310018164e-06, "loss": 0.5503541827201843, "step": 9636 }, { "epoch": 1.7806815950849395, "grad_norm": 0.06826333701610565, "learning_rate": 7.485750548727202e-06, "loss": 0.4150521457195282, "step": 9637 }, { "epoch": 1.7808663717938353, "grad_norm": 0.05818479508161545, "learning_rate": 7.483819766511845e-06, "loss": 0.3105936050415039, "step": 9638 }, { "epoch": 1.7810511485027312, "grad_norm": 0.07861250638961792, "learning_rate": 7.481889084432588e-06, "loss": 0.5360723733901978, "step": 9639 }, { "epoch": 1.781235925211627, "grad_norm": 0.08440329134464264, "learning_rate": 7.479958502566271e-06, "loss": 0.5286691188812256, "step": 9640 }, { "epoch": 1.7814207019205228, "grad_norm": 0.0839398205280304, "learning_rate": 7.47802802098972e-06, "loss": 0.6164692640304565, "step": 9641 }, { "epoch": 1.7816054786294186, "grad_norm": 0.07791807502508163, "learning_rate": 7.476097639779763e-06, "loss": 0.4462954103946686, "step": 9642 }, { "epoch": 1.7817902553383145, "grad_norm": 0.09706820547580719, "learning_rate": 7.474167359013223e-06, "loss": 0.7078897356987, "step": 9643 }, { "epoch": 1.7819750320472103, "grad_norm": 0.08299513906240463, "learning_rate": 7.47223717876692e-06, "loss": 0.6251681447029114, "step": 9644 }, { "epoch": 1.7821598087561061, "grad_norm": 0.06504345685243607, "learning_rate": 7.4703070991176706e-06, "loss": 0.3207865357398987, "step": 9645 }, { "epoch": 1.7823445854650022, "grad_norm": 0.07922597974538803, "learning_rate": 7.468377120142282e-06, "loss": 0.5469554662704468, "step": 9646 }, { "epoch": 1.782529362173898, "grad_norm": 0.07841379940509796, "learning_rate": 7.4664472419175645e-06, "loss": 0.4845714569091797, "step": 9647 }, { "epoch": 1.7827141388827938, "grad_norm": 0.061545539647340775, "learning_rate": 7.464517464520322e-06, "loss": 0.4248470664024353, "step": 9648 }, { "epoch": 1.7828989155916897, "grad_norm": 0.088067926466465, "learning_rate": 7.46258778802735e-06, "loss": 0.6738609075546265, "step": 9649 }, { "epoch": 1.7830836923005855, "grad_norm": 0.0731796994805336, "learning_rate": 7.460658212515445e-06, "loss": 0.4305770695209503, "step": 9650 }, { "epoch": 1.7832684690094813, "grad_norm": 0.07577754557132721, "learning_rate": 7.458728738061402e-06, "loss": 0.5598363876342773, "step": 9651 }, { "epoch": 1.7834532457183772, "grad_norm": 0.08362109959125519, "learning_rate": 7.456799364742e-06, "loss": 0.514435887336731, "step": 9652 }, { "epoch": 1.7836380224272732, "grad_norm": 0.07791508734226227, "learning_rate": 7.454870092634028e-06, "loss": 0.48999202251434326, "step": 9653 }, { "epoch": 1.783822799136169, "grad_norm": 0.07102424651384354, "learning_rate": 7.452940921814268e-06, "loss": 0.46250784397125244, "step": 9654 }, { "epoch": 1.7840075758450649, "grad_norm": 0.07442230731248856, "learning_rate": 7.451011852359486e-06, "loss": 0.5393667817115784, "step": 9655 }, { "epoch": 1.7841923525539607, "grad_norm": 0.0678488239645958, "learning_rate": 7.449082884346455e-06, "loss": 0.48200443387031555, "step": 9656 }, { "epoch": 1.7843771292628565, "grad_norm": 0.0755557268857956, "learning_rate": 7.447154017851952e-06, "loss": 0.534457266330719, "step": 9657 }, { "epoch": 1.7845619059717523, "grad_norm": 0.08040129393339157, "learning_rate": 7.4452252529527266e-06, "loss": 0.4577656090259552, "step": 9658 }, { "epoch": 1.7847466826806482, "grad_norm": 0.08037807047367096, "learning_rate": 7.443296589725546e-06, "loss": 0.5453135371208191, "step": 9659 }, { "epoch": 1.784931459389544, "grad_norm": 0.0812598168849945, "learning_rate": 7.44136802824716e-06, "loss": 0.6238100528717041, "step": 9660 }, { "epoch": 1.7851162360984398, "grad_norm": 0.07747691124677658, "learning_rate": 7.439439568594322e-06, "loss": 0.4280628561973572, "step": 9661 }, { "epoch": 1.7853010128073357, "grad_norm": 0.09302282333374023, "learning_rate": 7.4375112108437805e-06, "loss": 0.6061352491378784, "step": 9662 }, { "epoch": 1.7854857895162315, "grad_norm": 0.09619981795549393, "learning_rate": 7.435582955072274e-06, "loss": 0.6168282628059387, "step": 9663 }, { "epoch": 1.7856705662251273, "grad_norm": 0.1117103174328804, "learning_rate": 7.433654801356543e-06, "loss": 0.7298385500907898, "step": 9664 }, { "epoch": 1.7858553429340231, "grad_norm": 0.07348813861608505, "learning_rate": 7.431726749773322e-06, "loss": 0.4037703275680542, "step": 9665 }, { "epoch": 1.786040119642919, "grad_norm": 0.09630374610424042, "learning_rate": 7.429798800399339e-06, "loss": 0.5390593409538269, "step": 9666 }, { "epoch": 1.7862248963518148, "grad_norm": 0.08244305104017258, "learning_rate": 7.427870953311325e-06, "loss": 0.5543193221092224, "step": 9667 }, { "epoch": 1.7864096730607106, "grad_norm": 0.06571978330612183, "learning_rate": 7.425943208586001e-06, "loss": 0.40200668573379517, "step": 9668 }, { "epoch": 1.7865944497696065, "grad_norm": 0.10474611818790436, "learning_rate": 7.424015566300082e-06, "loss": 0.597493052482605, "step": 9669 }, { "epoch": 1.7867792264785023, "grad_norm": 0.07248591631650925, "learning_rate": 7.422088026530283e-06, "loss": 0.45960649847984314, "step": 9670 }, { "epoch": 1.7869640031873981, "grad_norm": 0.08366648852825165, "learning_rate": 7.420160589353321e-06, "loss": 0.5246530771255493, "step": 9671 }, { "epoch": 1.787148779896294, "grad_norm": 0.06871534883975983, "learning_rate": 7.41823325484589e-06, "loss": 0.4420692026615143, "step": 9672 }, { "epoch": 1.7873335566051898, "grad_norm": 0.06331802904605865, "learning_rate": 7.416306023084704e-06, "loss": 0.3460390865802765, "step": 9673 }, { "epoch": 1.7875183333140856, "grad_norm": 0.09252341091632843, "learning_rate": 7.41437889414645e-06, "loss": 0.5817192792892456, "step": 9674 }, { "epoch": 1.7877031100229817, "grad_norm": 0.09684668481349945, "learning_rate": 7.412451868107828e-06, "loss": 0.5826550722122192, "step": 9675 }, { "epoch": 1.7878878867318775, "grad_norm": 0.08396687358617783, "learning_rate": 7.410524945045528e-06, "loss": 0.6268729567527771, "step": 9676 }, { "epoch": 1.7880726634407733, "grad_norm": 0.049349989742040634, "learning_rate": 7.408598125036231e-06, "loss": 0.2656620442867279, "step": 9677 }, { "epoch": 1.7882574401496691, "grad_norm": 0.07575535774230957, "learning_rate": 7.4066714081566225e-06, "loss": 0.409138560295105, "step": 9678 }, { "epoch": 1.788442216858565, "grad_norm": 0.08824644237756729, "learning_rate": 7.404744794483378e-06, "loss": 0.5588775873184204, "step": 9679 }, { "epoch": 1.7886269935674608, "grad_norm": 0.06662043184041977, "learning_rate": 7.4028182840931714e-06, "loss": 0.3633524477481842, "step": 9680 }, { "epoch": 1.7888117702763566, "grad_norm": 0.09043443202972412, "learning_rate": 7.400891877062672e-06, "loss": 0.623502254486084, "step": 9681 }, { "epoch": 1.7889965469852525, "grad_norm": 0.07488062232732773, "learning_rate": 7.398965573468544e-06, "loss": 0.4273262619972229, "step": 9682 }, { "epoch": 1.7891813236941485, "grad_norm": 0.07975243777036667, "learning_rate": 7.397039373387449e-06, "loss": 0.47635743021965027, "step": 9683 }, { "epoch": 1.7893661004030443, "grad_norm": 0.08129294961690903, "learning_rate": 7.395113276896042e-06, "loss": 0.5076517462730408, "step": 9684 }, { "epoch": 1.7895508771119402, "grad_norm": 0.07039433717727661, "learning_rate": 7.393187284070979e-06, "loss": 0.45592862367630005, "step": 9685 }, { "epoch": 1.789735653820836, "grad_norm": 0.07881192862987518, "learning_rate": 7.391261394988904e-06, "loss": 0.4758132994174957, "step": 9686 }, { "epoch": 1.7899204305297318, "grad_norm": 0.0708584189414978, "learning_rate": 7.389335609726464e-06, "loss": 0.4376681447029114, "step": 9687 }, { "epoch": 1.7901052072386276, "grad_norm": 0.08312217891216278, "learning_rate": 7.387409928360302e-06, "loss": 0.5108171105384827, "step": 9688 }, { "epoch": 1.7902899839475235, "grad_norm": 0.06353427469730377, "learning_rate": 7.385484350967048e-06, "loss": 0.3415788412094116, "step": 9689 }, { "epoch": 1.7904747606564193, "grad_norm": 0.08577631413936615, "learning_rate": 7.383558877623342e-06, "loss": 0.5608941316604614, "step": 9690 }, { "epoch": 1.7906595373653151, "grad_norm": 0.07521551102399826, "learning_rate": 7.381633508405802e-06, "loss": 0.4245568513870239, "step": 9691 }, { "epoch": 1.790844314074211, "grad_norm": 0.09310653805732727, "learning_rate": 7.379708243391055e-06, "loss": 0.602554440498352, "step": 9692 }, { "epoch": 1.7910290907831068, "grad_norm": 0.09033379703760147, "learning_rate": 7.377783082655727e-06, "loss": 0.5871620178222656, "step": 9693 }, { "epoch": 1.7912138674920026, "grad_norm": 0.06903165578842163, "learning_rate": 7.375858026276426e-06, "loss": 0.3480731248855591, "step": 9694 }, { "epoch": 1.7913986442008984, "grad_norm": 0.1019633412361145, "learning_rate": 7.373933074329765e-06, "loss": 0.6741067171096802, "step": 9695 }, { "epoch": 1.7915834209097943, "grad_norm": 0.07089679688215256, "learning_rate": 7.372008226892354e-06, "loss": 0.40935018658638, "step": 9696 }, { "epoch": 1.79176819761869, "grad_norm": 0.07195250689983368, "learning_rate": 7.370083484040792e-06, "loss": 0.3602537214756012, "step": 9697 }, { "epoch": 1.791952974327586, "grad_norm": 0.06774020940065384, "learning_rate": 7.368158845851679e-06, "loss": 0.35686254501342773, "step": 9698 }, { "epoch": 1.7921377510364818, "grad_norm": 0.0797189399600029, "learning_rate": 7.366234312401611e-06, "loss": 0.5029522776603699, "step": 9699 }, { "epoch": 1.7923225277453776, "grad_norm": 0.07402968406677246, "learning_rate": 7.364309883767177e-06, "loss": 0.36364853382110596, "step": 9700 }, { "epoch": 1.7925073044542734, "grad_norm": 0.06966808438301086, "learning_rate": 7.362385560024963e-06, "loss": 0.3557095229625702, "step": 9701 }, { "epoch": 1.7926920811631692, "grad_norm": 0.08454054594039917, "learning_rate": 7.360461341251552e-06, "loss": 0.46431073546409607, "step": 9702 }, { "epoch": 1.792876857872065, "grad_norm": 0.07569453120231628, "learning_rate": 7.358537227523521e-06, "loss": 0.4756021201610565, "step": 9703 }, { "epoch": 1.793061634580961, "grad_norm": 0.09158246964216232, "learning_rate": 7.356613218917445e-06, "loss": 0.4759051501750946, "step": 9704 }, { "epoch": 1.793246411289857, "grad_norm": 0.06996876746416092, "learning_rate": 7.354689315509894e-06, "loss": 0.40846434235572815, "step": 9705 }, { "epoch": 1.7934311879987528, "grad_norm": 0.09398932754993439, "learning_rate": 7.3527655173774306e-06, "loss": 0.5916178822517395, "step": 9706 }, { "epoch": 1.7936159647076486, "grad_norm": 0.08205067366361618, "learning_rate": 7.350841824596622e-06, "loss": 0.4599655568599701, "step": 9707 }, { "epoch": 1.7938007414165444, "grad_norm": 0.0769825279712677, "learning_rate": 7.3489182372440124e-06, "loss": 0.37565580010414124, "step": 9708 }, { "epoch": 1.7939855181254403, "grad_norm": 0.0755021944642067, "learning_rate": 7.3469947553961665e-06, "loss": 0.44360727071762085, "step": 9709 }, { "epoch": 1.794170294834336, "grad_norm": 0.12141609936952591, "learning_rate": 7.345071379129632e-06, "loss": 0.6779170036315918, "step": 9710 }, { "epoch": 1.794355071543232, "grad_norm": 0.06998870521783829, "learning_rate": 7.343148108520948e-06, "loss": 0.5503925681114197, "step": 9711 }, { "epoch": 1.794539848252128, "grad_norm": 0.07839400321245193, "learning_rate": 7.341224943646654e-06, "loss": 0.616855263710022, "step": 9712 }, { "epoch": 1.7947246249610238, "grad_norm": 0.07722536474466324, "learning_rate": 7.3393018845832955e-06, "loss": 0.5017062425613403, "step": 9713 }, { "epoch": 1.7949094016699196, "grad_norm": 0.06142596900463104, "learning_rate": 7.3373789314073925e-06, "loss": 0.35877326130867004, "step": 9714 }, { "epoch": 1.7950941783788155, "grad_norm": 0.0714077427983284, "learning_rate": 7.335456084195479e-06, "loss": 0.3872145414352417, "step": 9715 }, { "epoch": 1.7952789550877113, "grad_norm": 0.0684652104973793, "learning_rate": 7.33353334302408e-06, "loss": 0.40973180532455444, "step": 9716 }, { "epoch": 1.7954637317966071, "grad_norm": 0.07915206253528595, "learning_rate": 7.331610707969707e-06, "loss": 0.5760145783424377, "step": 9717 }, { "epoch": 1.795648508505503, "grad_norm": 0.08069485425949097, "learning_rate": 7.329688179108882e-06, "loss": 0.5191196799278259, "step": 9718 }, { "epoch": 1.7958332852143988, "grad_norm": 0.06689658761024475, "learning_rate": 7.327765756518113e-06, "loss": 0.37917643785476685, "step": 9719 }, { "epoch": 1.7960180619232946, "grad_norm": 0.06615526229143143, "learning_rate": 7.325843440273905e-06, "loss": 0.3649718761444092, "step": 9720 }, { "epoch": 1.7962028386321904, "grad_norm": 0.07950661331415176, "learning_rate": 7.323921230452764e-06, "loss": 0.4523574113845825, "step": 9721 }, { "epoch": 1.7963876153410863, "grad_norm": 0.0933559387922287, "learning_rate": 7.321999127131185e-06, "loss": 0.5563049912452698, "step": 9722 }, { "epoch": 1.796572392049982, "grad_norm": 0.0657040923833847, "learning_rate": 7.320077130385661e-06, "loss": 0.4413251578807831, "step": 9723 }, { "epoch": 1.796757168758878, "grad_norm": 0.086649589240551, "learning_rate": 7.318155240292686e-06, "loss": 0.5430909991264343, "step": 9724 }, { "epoch": 1.7969419454677737, "grad_norm": 0.0868072658777237, "learning_rate": 7.316233456928738e-06, "loss": 0.4656325876712799, "step": 9725 }, { "epoch": 1.7971267221766696, "grad_norm": 0.09229880571365356, "learning_rate": 7.3143117803703046e-06, "loss": 0.5382356643676758, "step": 9726 }, { "epoch": 1.7973114988855654, "grad_norm": 0.08575570583343506, "learning_rate": 7.312390210693863e-06, "loss": 0.5168148875236511, "step": 9727 }, { "epoch": 1.7974962755944612, "grad_norm": 0.08345237374305725, "learning_rate": 7.310468747975875e-06, "loss": 0.5733045339584351, "step": 9728 }, { "epoch": 1.797681052303357, "grad_norm": 0.07230813056230545, "learning_rate": 7.30854739229282e-06, "loss": 0.3298667371273041, "step": 9729 }, { "epoch": 1.797865829012253, "grad_norm": 0.11275004595518112, "learning_rate": 7.306626143721161e-06, "loss": 0.6309958100318909, "step": 9730 }, { "epoch": 1.7980506057211487, "grad_norm": 0.07427927106618881, "learning_rate": 7.304705002337351e-06, "loss": 0.45528972148895264, "step": 9731 }, { "epoch": 1.7982353824300445, "grad_norm": 0.07842516154050827, "learning_rate": 7.3027839682178485e-06, "loss": 0.5899828672409058, "step": 9732 }, { "epoch": 1.7984201591389404, "grad_norm": 0.06140856072306633, "learning_rate": 7.300863041439113e-06, "loss": 0.42100775241851807, "step": 9733 }, { "epoch": 1.7986049358478364, "grad_norm": 0.09284207969903946, "learning_rate": 7.298942222077576e-06, "loss": 0.5992365479469299, "step": 9734 }, { "epoch": 1.7987897125567323, "grad_norm": 0.074347585439682, "learning_rate": 7.297021510209689e-06, "loss": 0.3827507197856903, "step": 9735 }, { "epoch": 1.798974489265628, "grad_norm": 0.08603885769844055, "learning_rate": 7.295100905911894e-06, "loss": 0.5358006954193115, "step": 9736 }, { "epoch": 1.799159265974524, "grad_norm": 0.07486552000045776, "learning_rate": 7.293180409260617e-06, "loss": 0.6258729100227356, "step": 9737 }, { "epoch": 1.7993440426834197, "grad_norm": 0.09046154469251633, "learning_rate": 7.291260020332294e-06, "loss": 0.5852406620979309, "step": 9738 }, { "epoch": 1.7995288193923156, "grad_norm": 0.07911182194948196, "learning_rate": 7.289339739203344e-06, "loss": 0.4991699755191803, "step": 9739 }, { "epoch": 1.7997135961012114, "grad_norm": 0.0798080712556839, "learning_rate": 7.287419565950193e-06, "loss": 0.4510963559150696, "step": 9740 }, { "epoch": 1.7998983728101074, "grad_norm": 0.07900462299585342, "learning_rate": 7.285499500649258e-06, "loss": 0.5157014727592468, "step": 9741 }, { "epoch": 1.8000831495190033, "grad_norm": 0.06722088903188705, "learning_rate": 7.283579543376948e-06, "loss": 0.3551257848739624, "step": 9742 }, { "epoch": 1.800267926227899, "grad_norm": 0.06953731924295425, "learning_rate": 7.281659694209674e-06, "loss": 0.44554603099823, "step": 9743 }, { "epoch": 1.800452702936795, "grad_norm": 0.07420971989631653, "learning_rate": 7.279739953223841e-06, "loss": 0.46750375628471375, "step": 9744 }, { "epoch": 1.8006374796456908, "grad_norm": 0.06312204152345657, "learning_rate": 7.277820320495846e-06, "loss": 0.33511245250701904, "step": 9745 }, { "epoch": 1.8008222563545866, "grad_norm": 0.06883435696363449, "learning_rate": 7.275900796102087e-06, "loss": 0.4391895532608032, "step": 9746 }, { "epoch": 1.8010070330634824, "grad_norm": 0.06549646705389023, "learning_rate": 7.2739813801189556e-06, "loss": 0.3687029778957367, "step": 9747 }, { "epoch": 1.8011918097723782, "grad_norm": 0.0845990851521492, "learning_rate": 7.272062072622831e-06, "loss": 0.5504123568534851, "step": 9748 }, { "epoch": 1.801376586481274, "grad_norm": 0.07709339261054993, "learning_rate": 7.270142873690103e-06, "loss": 0.4302341938018799, "step": 9749 }, { "epoch": 1.80156136319017, "grad_norm": 0.09314499795436859, "learning_rate": 7.268223783397152e-06, "loss": 0.5776973962783813, "step": 9750 }, { "epoch": 1.8017461398990657, "grad_norm": 0.07090350985527039, "learning_rate": 7.266304801820346e-06, "loss": 0.41678741574287415, "step": 9751 }, { "epoch": 1.8019309166079616, "grad_norm": 0.054357532411813736, "learning_rate": 7.264385929036052e-06, "loss": 0.30052199959754944, "step": 9752 }, { "epoch": 1.8021156933168574, "grad_norm": 0.09315387904644012, "learning_rate": 7.262467165120646e-06, "loss": 0.5855849981307983, "step": 9753 }, { "epoch": 1.8023004700257532, "grad_norm": 0.06688593327999115, "learning_rate": 7.260548510150478e-06, "loss": 0.4450649619102478, "step": 9754 }, { "epoch": 1.802485246734649, "grad_norm": 0.07729198038578033, "learning_rate": 7.258629964201911e-06, "loss": 0.5191230177879333, "step": 9755 }, { "epoch": 1.8026700234435449, "grad_norm": 0.08478415757417679, "learning_rate": 7.256711527351292e-06, "loss": 0.5620251893997192, "step": 9756 }, { "epoch": 1.8028548001524407, "grad_norm": 0.06856848299503326, "learning_rate": 7.2547931996749734e-06, "loss": 0.382502019405365, "step": 9757 }, { "epoch": 1.8030395768613365, "grad_norm": 0.09224586188793182, "learning_rate": 7.252874981249297e-06, "loss": 0.5560954809188843, "step": 9758 }, { "epoch": 1.8032243535702324, "grad_norm": 0.07440272718667984, "learning_rate": 7.250956872150601e-06, "loss": 0.4763925075531006, "step": 9759 }, { "epoch": 1.8034091302791282, "grad_norm": 0.06670232862234116, "learning_rate": 7.24903887245522e-06, "loss": 0.5068976879119873, "step": 9760 }, { "epoch": 1.803593906988024, "grad_norm": 0.08348406851291656, "learning_rate": 7.247120982239487e-06, "loss": 0.5030762553215027, "step": 9761 }, { "epoch": 1.8037786836969198, "grad_norm": 0.05656994879245758, "learning_rate": 7.245203201579724e-06, "loss": 0.277743935585022, "step": 9762 }, { "epoch": 1.803963460405816, "grad_norm": 0.05856098234653473, "learning_rate": 7.243285530552256e-06, "loss": 0.35996246337890625, "step": 9763 }, { "epoch": 1.8041482371147117, "grad_norm": 0.05998080223798752, "learning_rate": 7.241367969233402e-06, "loss": 0.3923867344856262, "step": 9764 }, { "epoch": 1.8043330138236076, "grad_norm": 0.054737962782382965, "learning_rate": 7.239450517699468e-06, "loss": 0.3953229486942291, "step": 9765 }, { "epoch": 1.8045177905325034, "grad_norm": 0.08780429512262344, "learning_rate": 7.237533176026768e-06, "loss": 0.6455278992652893, "step": 9766 }, { "epoch": 1.8047025672413992, "grad_norm": 0.09323868900537491, "learning_rate": 7.235615944291609e-06, "loss": 0.6022985577583313, "step": 9767 }, { "epoch": 1.804887343950295, "grad_norm": 0.08070196956396103, "learning_rate": 7.233698822570279e-06, "loss": 0.49842768907546997, "step": 9768 }, { "epoch": 1.8050721206591909, "grad_norm": 0.0877484530210495, "learning_rate": 7.231781810939085e-06, "loss": 0.585066556930542, "step": 9769 }, { "epoch": 1.8052568973680867, "grad_norm": 0.08354081213474274, "learning_rate": 7.229864909474318e-06, "loss": 0.6162436008453369, "step": 9770 }, { "epoch": 1.8054416740769827, "grad_norm": 0.07809589803218842, "learning_rate": 7.227948118252255e-06, "loss": 0.5466450452804565, "step": 9771 }, { "epoch": 1.8056264507858786, "grad_norm": 0.07099008560180664, "learning_rate": 7.2260314373491905e-06, "loss": 0.5045560002326965, "step": 9772 }, { "epoch": 1.8058112274947744, "grad_norm": 0.0817057341337204, "learning_rate": 7.224114866841392e-06, "loss": 0.4768495261669159, "step": 9773 }, { "epoch": 1.8059960042036702, "grad_norm": 0.04109758511185646, "learning_rate": 7.222198406805137e-06, "loss": 0.26770099997520447, "step": 9774 }, { "epoch": 1.806180780912566, "grad_norm": 0.07153109461069107, "learning_rate": 7.220282057316697e-06, "loss": 0.41642487049102783, "step": 9775 }, { "epoch": 1.806365557621462, "grad_norm": 0.09041387587785721, "learning_rate": 7.2183658184523305e-06, "loss": 0.47185105085372925, "step": 9776 }, { "epoch": 1.8065503343303577, "grad_norm": 0.07475770264863968, "learning_rate": 7.216449690288304e-06, "loss": 0.47243043780326843, "step": 9777 }, { "epoch": 1.8067351110392535, "grad_norm": 0.08335085958242416, "learning_rate": 7.214533672900873e-06, "loss": 0.5192751884460449, "step": 9778 }, { "epoch": 1.8069198877481494, "grad_norm": 0.08981821686029434, "learning_rate": 7.2126177663662855e-06, "loss": 0.5628516674041748, "step": 9779 }, { "epoch": 1.8071046644570452, "grad_norm": 0.06832242757081985, "learning_rate": 7.210701970760789e-06, "loss": 0.41456109285354614, "step": 9780 }, { "epoch": 1.807289441165941, "grad_norm": 0.06621191650629044, "learning_rate": 7.20878628616063e-06, "loss": 0.484127402305603, "step": 9781 }, { "epoch": 1.8074742178748369, "grad_norm": 0.07209111005067825, "learning_rate": 7.2068707126420425e-06, "loss": 0.5045483708381653, "step": 9782 }, { "epoch": 1.8076589945837327, "grad_norm": 0.05990150570869446, "learning_rate": 7.204955250281263e-06, "loss": 0.44331952929496765, "step": 9783 }, { "epoch": 1.8078437712926285, "grad_norm": 0.07342742383480072, "learning_rate": 7.20303989915452e-06, "loss": 0.4118276536464691, "step": 9784 }, { "epoch": 1.8080285480015243, "grad_norm": 0.0759073868393898, "learning_rate": 7.201124659338038e-06, "loss": 0.45692363381385803, "step": 9785 }, { "epoch": 1.8082133247104202, "grad_norm": 0.0632883831858635, "learning_rate": 7.199209530908038e-06, "loss": 0.3301604986190796, "step": 9786 }, { "epoch": 1.808398101419316, "grad_norm": 0.06672390550374985, "learning_rate": 7.197294513940739e-06, "loss": 0.4440694749355316, "step": 9787 }, { "epoch": 1.8085828781282118, "grad_norm": 0.07054869830608368, "learning_rate": 7.195379608512344e-06, "loss": 0.5055124759674072, "step": 9788 }, { "epoch": 1.8087676548371077, "grad_norm": 0.08731034398078918, "learning_rate": 7.193464814699073e-06, "loss": 0.5469434261322021, "step": 9789 }, { "epoch": 1.8089524315460035, "grad_norm": 0.06051642820239067, "learning_rate": 7.191550132577116e-06, "loss": 0.4700313210487366, "step": 9790 }, { "epoch": 1.8091372082548993, "grad_norm": 0.07084860652685165, "learning_rate": 7.189635562222676e-06, "loss": 0.4844103455543518, "step": 9791 }, { "epoch": 1.8093219849637951, "grad_norm": 0.0787714347243309, "learning_rate": 7.1877211037119556e-06, "loss": 0.5192242860794067, "step": 9792 }, { "epoch": 1.8095067616726912, "grad_norm": 0.07096560299396515, "learning_rate": 7.185806757121132e-06, "loss": 0.47444817423820496, "step": 9793 }, { "epoch": 1.809691538381587, "grad_norm": 0.08192567527294159, "learning_rate": 7.183892522526394e-06, "loss": 0.5131165981292725, "step": 9794 }, { "epoch": 1.8098763150904829, "grad_norm": 0.06697290390729904, "learning_rate": 7.1819784000039264e-06, "loss": 0.45709118247032166, "step": 9795 }, { "epoch": 1.8100610917993787, "grad_norm": 0.07221035659313202, "learning_rate": 7.180064389629899e-06, "loss": 0.4476308226585388, "step": 9796 }, { "epoch": 1.8102458685082745, "grad_norm": 0.09427494555711746, "learning_rate": 7.178150491480488e-06, "loss": 0.6278370022773743, "step": 9797 }, { "epoch": 1.8104306452171703, "grad_norm": 0.06774226576089859, "learning_rate": 7.176236705631861e-06, "loss": 0.36737802624702454, "step": 9798 }, { "epoch": 1.8106154219260662, "grad_norm": 0.06325653940439224, "learning_rate": 7.174323032160175e-06, "loss": 0.42155921459198, "step": 9799 }, { "epoch": 1.8108001986349622, "grad_norm": 0.06160309165716171, "learning_rate": 7.172409471141593e-06, "loss": 0.3376319706439972, "step": 9800 }, { "epoch": 1.810984975343858, "grad_norm": 0.0902712419629097, "learning_rate": 7.170496022652269e-06, "loss": 0.6608295440673828, "step": 9801 }, { "epoch": 1.8111697520527539, "grad_norm": 0.07389810681343079, "learning_rate": 7.168582686768348e-06, "loss": 0.41427645087242126, "step": 9802 }, { "epoch": 1.8113545287616497, "grad_norm": 0.08707796782255173, "learning_rate": 7.1666694635659826e-06, "loss": 0.7434542179107666, "step": 9803 }, { "epoch": 1.8115393054705455, "grad_norm": 0.07786441594362259, "learning_rate": 7.164756353121303e-06, "loss": 0.5525217652320862, "step": 9804 }, { "epoch": 1.8117240821794414, "grad_norm": 0.0723809227347374, "learning_rate": 7.162843355510452e-06, "loss": 0.39500388503074646, "step": 9805 }, { "epoch": 1.8119088588883372, "grad_norm": 0.08595466613769531, "learning_rate": 7.160930470809563e-06, "loss": 0.5993435382843018, "step": 9806 }, { "epoch": 1.812093635597233, "grad_norm": 0.07043426483869553, "learning_rate": 7.1590176990947545e-06, "loss": 0.38586297631263733, "step": 9807 }, { "epoch": 1.8122784123061288, "grad_norm": 0.08636261522769928, "learning_rate": 7.157105040442151e-06, "loss": 0.4882827401161194, "step": 9808 }, { "epoch": 1.8124631890150247, "grad_norm": 0.08507783710956573, "learning_rate": 7.1551924949278795e-06, "loss": 0.509990930557251, "step": 9809 }, { "epoch": 1.8126479657239205, "grad_norm": 0.0886579379439354, "learning_rate": 7.153280062628043e-06, "loss": 0.6217797994613647, "step": 9810 }, { "epoch": 1.8128327424328163, "grad_norm": 0.0848434716463089, "learning_rate": 7.15136774361875e-06, "loss": 0.7512550950050354, "step": 9811 }, { "epoch": 1.8130175191417122, "grad_norm": 0.09012161195278168, "learning_rate": 7.1494555379761156e-06, "loss": 0.49214449524879456, "step": 9812 }, { "epoch": 1.813202295850608, "grad_norm": 0.06810518354177475, "learning_rate": 7.147543445776228e-06, "loss": 0.40887418389320374, "step": 9813 }, { "epoch": 1.8133870725595038, "grad_norm": 0.0676533579826355, "learning_rate": 7.145631467095188e-06, "loss": 0.33318957686424255, "step": 9814 }, { "epoch": 1.8135718492683996, "grad_norm": 0.0729999765753746, "learning_rate": 7.1437196020090875e-06, "loss": 0.5460203886032104, "step": 9815 }, { "epoch": 1.8137566259772955, "grad_norm": 0.09123071283102036, "learning_rate": 7.141807850594007e-06, "loss": 0.7088720798492432, "step": 9816 }, { "epoch": 1.8139414026861913, "grad_norm": 0.06890129297971725, "learning_rate": 7.139896212926033e-06, "loss": 0.4780724048614502, "step": 9817 }, { "epoch": 1.8141261793950871, "grad_norm": 0.07324981689453125, "learning_rate": 7.137984689081243e-06, "loss": 0.5160731673240662, "step": 9818 }, { "epoch": 1.814310956103983, "grad_norm": 0.07690378278493881, "learning_rate": 7.136073279135707e-06, "loss": 0.44267570972442627, "step": 9819 }, { "epoch": 1.8144957328128788, "grad_norm": 0.0685325637459755, "learning_rate": 7.134161983165498e-06, "loss": 0.5272607803344727, "step": 9820 }, { "epoch": 1.8146805095217746, "grad_norm": 0.06789597123861313, "learning_rate": 7.132250801246672e-06, "loss": 0.492597758769989, "step": 9821 }, { "epoch": 1.8148652862306707, "grad_norm": 0.0947544276714325, "learning_rate": 7.130339733455291e-06, "loss": 0.6372851729393005, "step": 9822 }, { "epoch": 1.8150500629395665, "grad_norm": 0.06775210797786713, "learning_rate": 7.1284287798674165e-06, "loss": 0.39347755908966064, "step": 9823 }, { "epoch": 1.8152348396484623, "grad_norm": 0.07759533077478409, "learning_rate": 7.1265179405590855e-06, "loss": 0.5344981551170349, "step": 9824 }, { "epoch": 1.8154196163573582, "grad_norm": 0.0798647329211235, "learning_rate": 7.1246072156063536e-06, "loss": 0.4983478784561157, "step": 9825 }, { "epoch": 1.815604393066254, "grad_norm": 0.06834018975496292, "learning_rate": 7.122696605085262e-06, "loss": 0.434520423412323, "step": 9826 }, { "epoch": 1.8157891697751498, "grad_norm": 0.06779895722866058, "learning_rate": 7.120786109071838e-06, "loss": 0.5057935118675232, "step": 9827 }, { "epoch": 1.8159739464840456, "grad_norm": 0.0671994760632515, "learning_rate": 7.118875727642121e-06, "loss": 0.4207999110221863, "step": 9828 }, { "epoch": 1.8161587231929417, "grad_norm": 0.08160896599292755, "learning_rate": 7.1169654608721384e-06, "loss": 0.5233028531074524, "step": 9829 }, { "epoch": 1.8163434999018375, "grad_norm": 0.07913441210985184, "learning_rate": 7.115055308837908e-06, "loss": 0.5464879274368286, "step": 9830 }, { "epoch": 1.8165282766107334, "grad_norm": 0.07577405124902725, "learning_rate": 7.113145271615449e-06, "loss": 0.4584244191646576, "step": 9831 }, { "epoch": 1.8167130533196292, "grad_norm": 0.07947442680597305, "learning_rate": 7.111235349280782e-06, "loss": 0.4469531774520874, "step": 9832 }, { "epoch": 1.816897830028525, "grad_norm": 0.08003919571638107, "learning_rate": 7.109325541909906e-06, "loss": 0.5821112990379333, "step": 9833 }, { "epoch": 1.8170826067374208, "grad_norm": 0.08050795644521713, "learning_rate": 7.107415849578829e-06, "loss": 0.399353951215744, "step": 9834 }, { "epoch": 1.8172673834463167, "grad_norm": 0.07931918650865555, "learning_rate": 7.105506272363555e-06, "loss": 0.4955158531665802, "step": 9835 }, { "epoch": 1.8174521601552125, "grad_norm": 0.08377605676651001, "learning_rate": 7.103596810340072e-06, "loss": 0.5487820506095886, "step": 9836 }, { "epoch": 1.8176369368641083, "grad_norm": 0.07905194163322449, "learning_rate": 7.101687463584378e-06, "loss": 0.5085796117782593, "step": 9837 }, { "epoch": 1.8178217135730041, "grad_norm": 0.09129060804843903, "learning_rate": 7.099778232172452e-06, "loss": 0.6609033346176147, "step": 9838 }, { "epoch": 1.8180064902819, "grad_norm": 0.0898594856262207, "learning_rate": 7.0978691161802796e-06, "loss": 0.6358002424240112, "step": 9839 }, { "epoch": 1.8181912669907958, "grad_norm": 0.09380216896533966, "learning_rate": 7.095960115683837e-06, "loss": 0.589912474155426, "step": 9840 }, { "epoch": 1.8183760436996916, "grad_norm": 0.06584444642066956, "learning_rate": 7.0940512307590956e-06, "loss": 0.44311562180519104, "step": 9841 }, { "epoch": 1.8185608204085875, "grad_norm": 0.065439872443676, "learning_rate": 7.0921424614820244e-06, "loss": 0.4536382853984833, "step": 9842 }, { "epoch": 1.8187455971174833, "grad_norm": 0.08510088175535202, "learning_rate": 7.090233807928589e-06, "loss": 0.612328827381134, "step": 9843 }, { "epoch": 1.8189303738263791, "grad_norm": 0.07813709229230881, "learning_rate": 7.088325270174739e-06, "loss": 0.5523490309715271, "step": 9844 }, { "epoch": 1.819115150535275, "grad_norm": 0.057043276727199554, "learning_rate": 7.086416848296435e-06, "loss": 0.4329480230808258, "step": 9845 }, { "epoch": 1.8192999272441708, "grad_norm": 0.08415151387453079, "learning_rate": 7.0845085423696295e-06, "loss": 0.5207234621047974, "step": 9846 }, { "epoch": 1.8194847039530666, "grad_norm": 0.06661385297775269, "learning_rate": 7.082600352470256e-06, "loss": 0.35886847972869873, "step": 9847 }, { "epoch": 1.8196694806619624, "grad_norm": 0.08949161320924759, "learning_rate": 7.080692278674264e-06, "loss": 0.5949727296829224, "step": 9848 }, { "epoch": 1.8198542573708583, "grad_norm": 0.07575736194849014, "learning_rate": 7.078784321057589e-06, "loss": 0.5442951321601868, "step": 9849 }, { "epoch": 1.820039034079754, "grad_norm": 0.05346906557679176, "learning_rate": 7.076876479696155e-06, "loss": 0.32708707451820374, "step": 9850 }, { "epoch": 1.8202238107886501, "grad_norm": 0.08726691454648972, "learning_rate": 7.07496875466589e-06, "loss": 0.5074048638343811, "step": 9851 }, { "epoch": 1.820408587497546, "grad_norm": 0.08543438464403152, "learning_rate": 7.073061146042723e-06, "loss": 0.5461840629577637, "step": 9852 }, { "epoch": 1.8205933642064418, "grad_norm": 0.06283988803625107, "learning_rate": 7.071153653902562e-06, "loss": 0.4173544943332672, "step": 9853 }, { "epoch": 1.8207781409153376, "grad_norm": 0.06358476728200912, "learning_rate": 7.069246278321325e-06, "loss": 0.3695000112056732, "step": 9854 }, { "epoch": 1.8209629176242335, "grad_norm": 0.08632160723209381, "learning_rate": 7.067339019374912e-06, "loss": 0.5667553544044495, "step": 9855 }, { "epoch": 1.8211476943331293, "grad_norm": 0.06680776923894882, "learning_rate": 7.065431877139232e-06, "loss": 0.36568430066108704, "step": 9856 }, { "epoch": 1.8213324710420251, "grad_norm": 0.08722712099552155, "learning_rate": 7.063524851690187e-06, "loss": 0.5414561629295349, "step": 9857 }, { "epoch": 1.821517247750921, "grad_norm": 0.04953658580780029, "learning_rate": 7.061617943103661e-06, "loss": 0.3062533140182495, "step": 9858 }, { "epoch": 1.821702024459817, "grad_norm": 0.06840323656797409, "learning_rate": 7.0597111514555486e-06, "loss": 0.3991606831550598, "step": 9859 }, { "epoch": 1.8218868011687128, "grad_norm": 0.08398078382015228, "learning_rate": 7.057804476821736e-06, "loss": 0.5020613670349121, "step": 9860 }, { "epoch": 1.8220715778776087, "grad_norm": 0.0917510837316513, "learning_rate": 7.055897919278097e-06, "loss": 0.6168114542961121, "step": 9861 }, { "epoch": 1.8222563545865045, "grad_norm": 0.08755534142255783, "learning_rate": 7.053991478900511e-06, "loss": 0.6405090689659119, "step": 9862 }, { "epoch": 1.8224411312954003, "grad_norm": 0.07648077607154846, "learning_rate": 7.05208515576485e-06, "loss": 0.5264204144477844, "step": 9863 }, { "epoch": 1.8226259080042961, "grad_norm": 0.10150912404060364, "learning_rate": 7.050178949946973e-06, "loss": 0.5225452780723572, "step": 9864 }, { "epoch": 1.822810684713192, "grad_norm": 0.10042621940374374, "learning_rate": 7.048272861522746e-06, "loss": 0.6024169921875, "step": 9865 }, { "epoch": 1.8229954614220878, "grad_norm": 0.07107646018266678, "learning_rate": 7.046366890568028e-06, "loss": 0.445407509803772, "step": 9866 }, { "epoch": 1.8231802381309836, "grad_norm": 0.0642337054014206, "learning_rate": 7.044461037158661e-06, "loss": 0.2864264249801636, "step": 9867 }, { "epoch": 1.8233650148398794, "grad_norm": 0.08605214953422546, "learning_rate": 7.042555301370504e-06, "loss": 0.5610020160675049, "step": 9868 }, { "epoch": 1.8235497915487753, "grad_norm": 0.06902395188808441, "learning_rate": 7.04064968327939e-06, "loss": 0.41148707270622253, "step": 9869 }, { "epoch": 1.823734568257671, "grad_norm": 0.08039598912000656, "learning_rate": 7.038744182961159e-06, "loss": 0.4248596131801605, "step": 9870 }, { "epoch": 1.823919344966567, "grad_norm": 0.059146229177713394, "learning_rate": 7.036838800491648e-06, "loss": 0.4018034040927887, "step": 9871 }, { "epoch": 1.8241041216754628, "grad_norm": 0.08252823352813721, "learning_rate": 7.03493353594668e-06, "loss": 0.46294206380844116, "step": 9872 }, { "epoch": 1.8242888983843586, "grad_norm": 0.07533363997936249, "learning_rate": 7.03302838940208e-06, "loss": 0.5676658153533936, "step": 9873 }, { "epoch": 1.8244736750932544, "grad_norm": 0.06901130080223083, "learning_rate": 7.03112336093367e-06, "loss": 0.44045159220695496, "step": 9874 }, { "epoch": 1.8246584518021502, "grad_norm": 0.05934334546327591, "learning_rate": 7.02921845061726e-06, "loss": 0.3102717697620392, "step": 9875 }, { "epoch": 1.824843228511046, "grad_norm": 0.08287785947322845, "learning_rate": 7.02731365852866e-06, "loss": 0.517486035823822, "step": 9876 }, { "epoch": 1.825028005219942, "grad_norm": 0.08595351129770279, "learning_rate": 7.02540898474368e-06, "loss": 0.5475971698760986, "step": 9877 }, { "epoch": 1.8252127819288377, "grad_norm": 0.0753556564450264, "learning_rate": 7.023504429338114e-06, "loss": 0.4455927610397339, "step": 9878 }, { "epoch": 1.8253975586377336, "grad_norm": 0.06898584961891174, "learning_rate": 7.021599992387759e-06, "loss": 0.5426642894744873, "step": 9879 }, { "epoch": 1.8255823353466294, "grad_norm": 0.08508019149303436, "learning_rate": 7.0196956739684074e-06, "loss": 0.5580354928970337, "step": 9880 }, { "epoch": 1.8257671120555254, "grad_norm": 0.07500471919775009, "learning_rate": 7.0177914741558415e-06, "loss": 0.41870805621147156, "step": 9881 }, { "epoch": 1.8259518887644213, "grad_norm": 0.08029566705226898, "learning_rate": 7.015887393025847e-06, "loss": 0.49106553196907043, "step": 9882 }, { "epoch": 1.826136665473317, "grad_norm": 0.07958827912807465, "learning_rate": 7.013983430654199e-06, "loss": 0.3987799286842346, "step": 9883 }, { "epoch": 1.826321442182213, "grad_norm": 0.05812789127230644, "learning_rate": 7.012079587116666e-06, "loss": 0.3668726086616516, "step": 9884 }, { "epoch": 1.8265062188911088, "grad_norm": 0.09355711191892624, "learning_rate": 7.010175862489022e-06, "loss": 0.4632348418235779, "step": 9885 }, { "epoch": 1.8266909956000046, "grad_norm": 0.08564065396785736, "learning_rate": 7.00827225684702e-06, "loss": 0.48215222358703613, "step": 9886 }, { "epoch": 1.8268757723089004, "grad_norm": 0.0790952816605568, "learning_rate": 7.006368770266421e-06, "loss": 0.5680748224258423, "step": 9887 }, { "epoch": 1.8270605490177965, "grad_norm": 0.08761174976825714, "learning_rate": 7.004465402822984e-06, "loss": 0.5640802979469299, "step": 9888 }, { "epoch": 1.8272453257266923, "grad_norm": 0.0727882906794548, "learning_rate": 7.002562154592449e-06, "loss": 0.43162351846694946, "step": 9889 }, { "epoch": 1.8274301024355881, "grad_norm": 0.09398949891328812, "learning_rate": 7.0006590256505625e-06, "loss": 0.5356913805007935, "step": 9890 }, { "epoch": 1.827614879144484, "grad_norm": 0.08576202392578125, "learning_rate": 6.998756016073065e-06, "loss": 0.507265031337738, "step": 9891 }, { "epoch": 1.8277996558533798, "grad_norm": 0.09658868610858917, "learning_rate": 6.996853125935685e-06, "loss": 0.5329380631446838, "step": 9892 }, { "epoch": 1.8279844325622756, "grad_norm": 0.055821459740400314, "learning_rate": 6.9949503553141564e-06, "loss": 0.27001625299453735, "step": 9893 }, { "epoch": 1.8281692092711714, "grad_norm": 0.061614297330379486, "learning_rate": 6.993047704284204e-06, "loss": 0.29331445693969727, "step": 9894 }, { "epoch": 1.8283539859800673, "grad_norm": 0.09182018041610718, "learning_rate": 6.991145172921543e-06, "loss": 0.5280823707580566, "step": 9895 }, { "epoch": 1.828538762688963, "grad_norm": 0.0736556202173233, "learning_rate": 6.9892427613018905e-06, "loss": 0.4377639889717102, "step": 9896 }, { "epoch": 1.828723539397859, "grad_norm": 0.08335860818624496, "learning_rate": 6.987340469500959e-06, "loss": 0.540762722492218, "step": 9897 }, { "epoch": 1.8289083161067548, "grad_norm": 0.0884045958518982, "learning_rate": 6.985438297594449e-06, "loss": 0.6356663107872009, "step": 9898 }, { "epoch": 1.8290930928156506, "grad_norm": 0.08124842494726181, "learning_rate": 6.983536245658064e-06, "loss": 0.5272491574287415, "step": 9899 }, { "epoch": 1.8292778695245464, "grad_norm": 0.06727912276983261, "learning_rate": 6.981634313767501e-06, "loss": 0.49043992161750793, "step": 9900 }, { "epoch": 1.8294626462334422, "grad_norm": 0.0834495797753334, "learning_rate": 6.979732501998447e-06, "loss": 0.4925161600112915, "step": 9901 }, { "epoch": 1.829647422942338, "grad_norm": 0.08709096163511276, "learning_rate": 6.9778308104265955e-06, "loss": 0.5015747547149658, "step": 9902 }, { "epoch": 1.829832199651234, "grad_norm": 0.0766579806804657, "learning_rate": 6.975929239127614e-06, "loss": 0.5223663449287415, "step": 9903 }, { "epoch": 1.8300169763601297, "grad_norm": 0.06918003410100937, "learning_rate": 6.974027788177191e-06, "loss": 0.45638301968574524, "step": 9904 }, { "epoch": 1.8302017530690255, "grad_norm": 0.072813019156456, "learning_rate": 6.972126457650999e-06, "loss": 0.459013968706131, "step": 9905 }, { "epoch": 1.8303865297779214, "grad_norm": 0.09804744273424149, "learning_rate": 6.970225247624698e-06, "loss": 0.7139219641685486, "step": 9906 }, { "epoch": 1.8305713064868172, "grad_norm": 0.0948563814163208, "learning_rate": 6.968324158173949e-06, "loss": 0.663381040096283, "step": 9907 }, { "epoch": 1.830756083195713, "grad_norm": 0.09061747044324875, "learning_rate": 6.966423189374422e-06, "loss": 0.6212316155433655, "step": 9908 }, { "epoch": 1.8309408599046089, "grad_norm": 0.06985548883676529, "learning_rate": 6.964522341301756e-06, "loss": 0.5033018589019775, "step": 9909 }, { "epoch": 1.831125636613505, "grad_norm": 0.05785486102104187, "learning_rate": 6.9626216140316035e-06, "loss": 0.31822776794433594, "step": 9910 }, { "epoch": 1.8313104133224007, "grad_norm": 0.07244248688220978, "learning_rate": 6.9607210076396104e-06, "loss": 0.46352434158325195, "step": 9911 }, { "epoch": 1.8314951900312966, "grad_norm": 0.07493920624256134, "learning_rate": 6.958820522201411e-06, "loss": 0.5265142321586609, "step": 9912 }, { "epoch": 1.8316799667401924, "grad_norm": 0.08248679339885712, "learning_rate": 6.9569201577926395e-06, "loss": 0.5041503310203552, "step": 9913 }, { "epoch": 1.8318647434490882, "grad_norm": 0.08329528570175171, "learning_rate": 6.955019914488927e-06, "loss": 0.573448657989502, "step": 9914 }, { "epoch": 1.832049520157984, "grad_norm": 0.09237170219421387, "learning_rate": 6.953119792365895e-06, "loss": 0.5240632891654968, "step": 9915 }, { "epoch": 1.8322342968668799, "grad_norm": 0.09821862727403641, "learning_rate": 6.951219791499161e-06, "loss": 0.6255685091018677, "step": 9916 }, { "epoch": 1.832419073575776, "grad_norm": 0.08152344822883606, "learning_rate": 6.949319911964343e-06, "loss": 0.6066064238548279, "step": 9917 }, { "epoch": 1.8326038502846718, "grad_norm": 0.08967302739620209, "learning_rate": 6.947420153837047e-06, "loss": 0.5588473677635193, "step": 9918 }, { "epoch": 1.8327886269935676, "grad_norm": 0.06246509402990341, "learning_rate": 6.945520517192881e-06, "loss": 0.3397400379180908, "step": 9919 }, { "epoch": 1.8329734037024634, "grad_norm": 0.07850545644760132, "learning_rate": 6.943621002107439e-06, "loss": 0.4385947585105896, "step": 9920 }, { "epoch": 1.8331581804113593, "grad_norm": 0.08367501199245453, "learning_rate": 6.941721608656319e-06, "loss": 0.47891995310783386, "step": 9921 }, { "epoch": 1.833342957120255, "grad_norm": 0.07400240004062653, "learning_rate": 6.9398223369151155e-06, "loss": 0.41493046283721924, "step": 9922 }, { "epoch": 1.833527733829151, "grad_norm": 0.085567906498909, "learning_rate": 6.937923186959402e-06, "loss": 0.5399067401885986, "step": 9923 }, { "epoch": 1.8337125105380467, "grad_norm": 0.0904676541686058, "learning_rate": 6.936024158864769e-06, "loss": 0.7474747896194458, "step": 9924 }, { "epoch": 1.8338972872469426, "grad_norm": 0.06639987975358963, "learning_rate": 6.934125252706791e-06, "loss": 0.33274802565574646, "step": 9925 }, { "epoch": 1.8340820639558384, "grad_norm": 0.09406183660030365, "learning_rate": 6.932226468561034e-06, "loss": 0.66783607006073, "step": 9926 }, { "epoch": 1.8342668406647342, "grad_norm": 0.08418642729520798, "learning_rate": 6.930327806503061e-06, "loss": 0.47889599204063416, "step": 9927 }, { "epoch": 1.83445161737363, "grad_norm": 0.0629533976316452, "learning_rate": 6.928429266608446e-06, "loss": 0.3412812352180481, "step": 9928 }, { "epoch": 1.8346363940825259, "grad_norm": 0.0737699642777443, "learning_rate": 6.926530848952731e-06, "loss": 0.37405097484588623, "step": 9929 }, { "epoch": 1.8348211707914217, "grad_norm": 0.07190654426813126, "learning_rate": 6.924632553611474e-06, "loss": 0.4059646427631378, "step": 9930 }, { "epoch": 1.8350059475003175, "grad_norm": 0.07069795578718185, "learning_rate": 6.922734380660221e-06, "loss": 0.4506323039531708, "step": 9931 }, { "epoch": 1.8351907242092134, "grad_norm": 0.07418803125619888, "learning_rate": 6.920836330174509e-06, "loss": 0.3802109956741333, "step": 9932 }, { "epoch": 1.8353755009181092, "grad_norm": 0.08620359003543854, "learning_rate": 6.918938402229882e-06, "loss": 0.4469900131225586, "step": 9933 }, { "epoch": 1.835560277627005, "grad_norm": 0.07176492363214493, "learning_rate": 6.9170405969018626e-06, "loss": 0.475315660238266, "step": 9934 }, { "epoch": 1.8357450543359008, "grad_norm": 0.09536311775445938, "learning_rate": 6.915142914265984e-06, "loss": 0.7110531330108643, "step": 9935 }, { "epoch": 1.8359298310447967, "grad_norm": 0.07972738891839981, "learning_rate": 6.913245354397768e-06, "loss": 0.5264150500297546, "step": 9936 }, { "epoch": 1.8361146077536925, "grad_norm": 0.09280319511890411, "learning_rate": 6.911347917372726e-06, "loss": 0.5452582836151123, "step": 9937 }, { "epoch": 1.8362993844625883, "grad_norm": 0.06958615034818649, "learning_rate": 6.909450603266376e-06, "loss": 0.476435124874115, "step": 9938 }, { "epoch": 1.8364841611714844, "grad_norm": 0.08039085566997528, "learning_rate": 6.907553412154223e-06, "loss": 0.5513376593589783, "step": 9939 }, { "epoch": 1.8366689378803802, "grad_norm": 0.0806492269039154, "learning_rate": 6.905656344111768e-06, "loss": 0.48370277881622314, "step": 9940 }, { "epoch": 1.836853714589276, "grad_norm": 0.07488995045423508, "learning_rate": 6.903759399214509e-06, "loss": 0.41942453384399414, "step": 9941 }, { "epoch": 1.8370384912981719, "grad_norm": 0.08002443611621857, "learning_rate": 6.901862577537945e-06, "loss": 0.4895157217979431, "step": 9942 }, { "epoch": 1.8372232680070677, "grad_norm": 0.09190994501113892, "learning_rate": 6.899965879157549e-06, "loss": 0.590259850025177, "step": 9943 }, { "epoch": 1.8374080447159635, "grad_norm": 0.09339113533496857, "learning_rate": 6.898069304148816e-06, "loss": 0.5751887559890747, "step": 9944 }, { "epoch": 1.8375928214248594, "grad_norm": 0.08202441781759262, "learning_rate": 6.896172852587224e-06, "loss": 0.607812225818634, "step": 9945 }, { "epoch": 1.8377775981337552, "grad_norm": 0.05778725445270538, "learning_rate": 6.8942765245482355e-06, "loss": 0.36201390624046326, "step": 9946 }, { "epoch": 1.8379623748426512, "grad_norm": 0.05295009911060333, "learning_rate": 6.892380320107326e-06, "loss": 0.2442961186170578, "step": 9947 }, { "epoch": 1.838147151551547, "grad_norm": 0.0946890339255333, "learning_rate": 6.8904842393399605e-06, "loss": 0.6545706987380981, "step": 9948 }, { "epoch": 1.838331928260443, "grad_norm": 0.06986173987388611, "learning_rate": 6.888588282321591e-06, "loss": 0.4297598898410797, "step": 9949 }, { "epoch": 1.8385167049693387, "grad_norm": 0.06517749279737473, "learning_rate": 6.886692449127676e-06, "loss": 0.382990300655365, "step": 9950 }, { "epoch": 1.8387014816782346, "grad_norm": 0.08499684184789658, "learning_rate": 6.884796739833659e-06, "loss": 0.512717068195343, "step": 9951 }, { "epoch": 1.8388862583871304, "grad_norm": 0.08333538472652435, "learning_rate": 6.8829011545149845e-06, "loss": 0.5726484060287476, "step": 9952 }, { "epoch": 1.8390710350960262, "grad_norm": 0.07835826277732849, "learning_rate": 6.881005693247096e-06, "loss": 0.49940311908721924, "step": 9953 }, { "epoch": 1.839255811804922, "grad_norm": 0.07669610530138016, "learning_rate": 6.87911035610542e-06, "loss": 0.4895862638950348, "step": 9954 }, { "epoch": 1.8394405885138179, "grad_norm": 0.07560370862483978, "learning_rate": 6.877215143165387e-06, "loss": 0.5197358131408691, "step": 9955 }, { "epoch": 1.8396253652227137, "grad_norm": 0.09632917493581772, "learning_rate": 6.875320054502424e-06, "loss": 0.6280316114425659, "step": 9956 }, { "epoch": 1.8398101419316095, "grad_norm": 0.08223257213830948, "learning_rate": 6.873425090191944e-06, "loss": 0.5331270098686218, "step": 9957 }, { "epoch": 1.8399949186405054, "grad_norm": 0.07054473459720612, "learning_rate": 6.871530250309364e-06, "loss": 0.46475088596343994, "step": 9958 }, { "epoch": 1.8401796953494012, "grad_norm": 0.08357888460159302, "learning_rate": 6.8696355349300945e-06, "loss": 0.4834239184856415, "step": 9959 }, { "epoch": 1.840364472058297, "grad_norm": 0.06820651888847351, "learning_rate": 6.867740944129535e-06, "loss": 0.45789211988449097, "step": 9960 }, { "epoch": 1.8405492487671928, "grad_norm": 0.08674849569797516, "learning_rate": 6.865846477983087e-06, "loss": 0.5982378125190735, "step": 9961 }, { "epoch": 1.8407340254760887, "grad_norm": 0.060728199779987335, "learning_rate": 6.863952136566147e-06, "loss": 0.34441497921943665, "step": 9962 }, { "epoch": 1.8409188021849845, "grad_norm": 0.0902811661362648, "learning_rate": 6.862057919954095e-06, "loss": 0.5118451714515686, "step": 9963 }, { "epoch": 1.8411035788938803, "grad_norm": 0.08659269660711288, "learning_rate": 6.860163828222323e-06, "loss": 0.7537838220596313, "step": 9964 }, { "epoch": 1.8412883556027762, "grad_norm": 0.08275507390499115, "learning_rate": 6.858269861446209e-06, "loss": 0.5685074329376221, "step": 9965 }, { "epoch": 1.841473132311672, "grad_norm": 0.06703290343284607, "learning_rate": 6.856376019701124e-06, "loss": 0.3911975622177124, "step": 9966 }, { "epoch": 1.8416579090205678, "grad_norm": 0.07106104493141174, "learning_rate": 6.85448230306244e-06, "loss": 0.4847944378852844, "step": 9967 }, { "epoch": 1.8418426857294636, "grad_norm": 0.06882786750793457, "learning_rate": 6.8525887116055155e-06, "loss": 0.4596867561340332, "step": 9968 }, { "epoch": 1.8420274624383597, "grad_norm": 0.07429055124521255, "learning_rate": 6.850695245405714e-06, "loss": 0.5174936056137085, "step": 9969 }, { "epoch": 1.8422122391472555, "grad_norm": 0.08973759412765503, "learning_rate": 6.848801904538392e-06, "loss": 0.6093193888664246, "step": 9970 }, { "epoch": 1.8423970158561513, "grad_norm": 0.06997554004192352, "learning_rate": 6.846908689078892e-06, "loss": 0.39199551939964294, "step": 9971 }, { "epoch": 1.8425817925650472, "grad_norm": 0.06855449080467224, "learning_rate": 6.845015599102561e-06, "loss": 0.37640291452407837, "step": 9972 }, { "epoch": 1.842766569273943, "grad_norm": 0.08905099332332611, "learning_rate": 6.843122634684743e-06, "loss": 0.59391188621521, "step": 9973 }, { "epoch": 1.8429513459828388, "grad_norm": 0.09431247413158417, "learning_rate": 6.841229795900762e-06, "loss": 0.6495637893676758, "step": 9974 }, { "epoch": 1.8431361226917347, "grad_norm": 0.07599987089633942, "learning_rate": 6.839337082825954e-06, "loss": 0.5637093782424927, "step": 9975 }, { "epoch": 1.8433208994006307, "grad_norm": 0.07558701187372208, "learning_rate": 6.837444495535646e-06, "loss": 0.4508266746997833, "step": 9976 }, { "epoch": 1.8435056761095265, "grad_norm": 0.08141207695007324, "learning_rate": 6.835552034105147e-06, "loss": 0.5105732083320618, "step": 9977 }, { "epoch": 1.8436904528184224, "grad_norm": 0.07613011449575424, "learning_rate": 6.8336596986097795e-06, "loss": 0.4700372815132141, "step": 9978 }, { "epoch": 1.8438752295273182, "grad_norm": 0.09200368821620941, "learning_rate": 6.83176748912485e-06, "loss": 0.5477043390274048, "step": 9979 }, { "epoch": 1.844060006236214, "grad_norm": 0.07809238880872726, "learning_rate": 6.829875405725661e-06, "loss": 0.461892694234848, "step": 9980 }, { "epoch": 1.8442447829451099, "grad_norm": 0.09412237256765366, "learning_rate": 6.827983448487514e-06, "loss": 0.5485309362411499, "step": 9981 }, { "epoch": 1.8444295596540057, "grad_norm": 0.0677417665719986, "learning_rate": 6.826091617485704e-06, "loss": 0.41071319580078125, "step": 9982 }, { "epoch": 1.8446143363629015, "grad_norm": 0.09405647963285446, "learning_rate": 6.8241999127955125e-06, "loss": 0.6819890141487122, "step": 9983 }, { "epoch": 1.8447991130717973, "grad_norm": 0.06098010018467903, "learning_rate": 6.822308334492234e-06, "loss": 0.33615392446517944, "step": 9984 }, { "epoch": 1.8449838897806932, "grad_norm": 0.0719384029507637, "learning_rate": 6.82041688265114e-06, "loss": 0.3856649398803711, "step": 9985 }, { "epoch": 1.845168666489589, "grad_norm": 0.07117690145969391, "learning_rate": 6.818525557347504e-06, "loss": 0.42119550704956055, "step": 9986 }, { "epoch": 1.8453534431984848, "grad_norm": 0.07510238140821457, "learning_rate": 6.816634358656601e-06, "loss": 0.4040794372558594, "step": 9987 }, { "epoch": 1.8455382199073807, "grad_norm": 0.06964851915836334, "learning_rate": 6.814743286653689e-06, "loss": 0.43530791997909546, "step": 9988 }, { "epoch": 1.8457229966162765, "grad_norm": 0.09477655589580536, "learning_rate": 6.8128523414140266e-06, "loss": 0.7339559197425842, "step": 9989 }, { "epoch": 1.8459077733251723, "grad_norm": 0.06167411431670189, "learning_rate": 6.810961523012875e-06, "loss": 0.3423909544944763, "step": 9990 }, { "epoch": 1.8460925500340681, "grad_norm": 0.08428163081407547, "learning_rate": 6.8090708315254725e-06, "loss": 0.5697319507598877, "step": 9991 }, { "epoch": 1.846277326742964, "grad_norm": 0.10079807043075562, "learning_rate": 6.807180267027069e-06, "loss": 0.6265227794647217, "step": 9992 }, { "epoch": 1.8464621034518598, "grad_norm": 0.08388925343751907, "learning_rate": 6.8052898295929046e-06, "loss": 0.4699854552745819, "step": 9993 }, { "epoch": 1.8466468801607556, "grad_norm": 0.06862840056419373, "learning_rate": 6.8033995192982085e-06, "loss": 0.4226209223270416, "step": 9994 }, { "epoch": 1.8468316568696515, "grad_norm": 0.0832512304186821, "learning_rate": 6.801509336218208e-06, "loss": 0.5740237236022949, "step": 9995 }, { "epoch": 1.8470164335785473, "grad_norm": 0.07339507341384888, "learning_rate": 6.799619280428133e-06, "loss": 0.3959071934223175, "step": 9996 }, { "epoch": 1.847201210287443, "grad_norm": 0.08081066608428955, "learning_rate": 6.797729352003196e-06, "loss": 0.5652762055397034, "step": 9997 }, { "epoch": 1.8473859869963392, "grad_norm": 0.07260546088218689, "learning_rate": 6.795839551018616e-06, "loss": 0.45198237895965576, "step": 9998 }, { "epoch": 1.847570763705235, "grad_norm": 0.07437772303819656, "learning_rate": 6.79394987754959e-06, "loss": 0.41886457800865173, "step": 9999 }, { "epoch": 1.8477555404141308, "grad_norm": 0.07442960888147354, "learning_rate": 6.792060331671333e-06, "loss": 0.4797389507293701, "step": 10000 }, { "epoch": 1.8477555404141308, "eval_loss": 0.5581634044647217, "eval_runtime": 156.285, "eval_samples_per_second": 116.64, "eval_steps_per_second": 14.582, "step": 10000 }, { "epoch": 1.8479403171230266, "grad_norm": 0.06269600987434387, "learning_rate": 6.79017091345904e-06, "loss": 0.37403443455696106, "step": 10001 }, { "epoch": 1.8481250938319225, "grad_norm": 0.06929280608892441, "learning_rate": 6.7882816229879e-06, "loss": 0.46427372097969055, "step": 10002 }, { "epoch": 1.8483098705408183, "grad_norm": 0.08431359380483627, "learning_rate": 6.7863924603331e-06, "loss": 0.6038509607315063, "step": 10003 }, { "epoch": 1.8484946472497141, "grad_norm": 0.11570043861865997, "learning_rate": 6.784503425569833e-06, "loss": 0.7939857244491577, "step": 10004 }, { "epoch": 1.8486794239586102, "grad_norm": 0.07394532114267349, "learning_rate": 6.782614518773265e-06, "loss": 0.4864119291305542, "step": 10005 }, { "epoch": 1.848864200667506, "grad_norm": 0.06036113202571869, "learning_rate": 6.7807257400185745e-06, "loss": 0.29472512006759644, "step": 10006 }, { "epoch": 1.8490489773764018, "grad_norm": 0.08272892236709595, "learning_rate": 6.778837089380927e-06, "loss": 0.4968661069869995, "step": 10007 }, { "epoch": 1.8492337540852977, "grad_norm": 0.07526808977127075, "learning_rate": 6.7769485669354865e-06, "loss": 0.46775147318840027, "step": 10008 }, { "epoch": 1.8494185307941935, "grad_norm": 0.0638841912150383, "learning_rate": 6.775060172757408e-06, "loss": 0.36736905574798584, "step": 10009 }, { "epoch": 1.8496033075030893, "grad_norm": 0.07938042283058167, "learning_rate": 6.773171906921847e-06, "loss": 0.4630228579044342, "step": 10010 }, { "epoch": 1.8497880842119852, "grad_norm": 0.06860628724098206, "learning_rate": 6.771283769503948e-06, "loss": 0.42078661918640137, "step": 10011 }, { "epoch": 1.849972860920881, "grad_norm": 0.07782910764217377, "learning_rate": 6.769395760578852e-06, "loss": 0.5826637744903564, "step": 10012 }, { "epoch": 1.8501576376297768, "grad_norm": 0.07134412974119186, "learning_rate": 6.7675078802217e-06, "loss": 0.45725923776626587, "step": 10013 }, { "epoch": 1.8503424143386726, "grad_norm": 0.09386952221393585, "learning_rate": 6.7656201285076195e-06, "loss": 0.583639919757843, "step": 10014 }, { "epoch": 1.8505271910475685, "grad_norm": 0.07963576167821884, "learning_rate": 6.763732505511741e-06, "loss": 0.5077404975891113, "step": 10015 }, { "epoch": 1.8507119677564643, "grad_norm": 0.05521143600344658, "learning_rate": 6.761845011309181e-06, "loss": 0.3559388518333435, "step": 10016 }, { "epoch": 1.8508967444653601, "grad_norm": 0.07247708737850189, "learning_rate": 6.7599576459750595e-06, "loss": 0.4644903838634491, "step": 10017 }, { "epoch": 1.851081521174256, "grad_norm": 0.07846806943416595, "learning_rate": 6.7580704095844894e-06, "loss": 0.588983416557312, "step": 10018 }, { "epoch": 1.8512662978831518, "grad_norm": 0.08637133240699768, "learning_rate": 6.75618330221257e-06, "loss": 0.5109580159187317, "step": 10019 }, { "epoch": 1.8514510745920476, "grad_norm": 0.08097885549068451, "learning_rate": 6.754296323934408e-06, "loss": 0.47517940402030945, "step": 10020 }, { "epoch": 1.8516358513009434, "grad_norm": 0.04997225105762482, "learning_rate": 6.752409474825101e-06, "loss": 0.26674655079841614, "step": 10021 }, { "epoch": 1.8518206280098393, "grad_norm": 0.057102106511592865, "learning_rate": 6.750522754959734e-06, "loss": 0.27486395835876465, "step": 10022 }, { "epoch": 1.852005404718735, "grad_norm": 0.08661918342113495, "learning_rate": 6.748636164413392e-06, "loss": 0.5088651776313782, "step": 10023 }, { "epoch": 1.852190181427631, "grad_norm": 0.0762132778763771, "learning_rate": 6.746749703261165e-06, "loss": 0.4059827923774719, "step": 10024 }, { "epoch": 1.8523749581365268, "grad_norm": 0.08523812144994736, "learning_rate": 6.7448633715781176e-06, "loss": 0.48798370361328125, "step": 10025 }, { "epoch": 1.8525597348454226, "grad_norm": 0.075056292116642, "learning_rate": 6.742977169439324e-06, "loss": 0.4803304076194763, "step": 10026 }, { "epoch": 1.8527445115543186, "grad_norm": 0.09188424795866013, "learning_rate": 6.74109109691985e-06, "loss": 0.5603108406066895, "step": 10027 }, { "epoch": 1.8529292882632145, "grad_norm": 0.06817316263914108, "learning_rate": 6.739205154094755e-06, "loss": 0.40446001291275024, "step": 10028 }, { "epoch": 1.8531140649721103, "grad_norm": 0.08375546336174011, "learning_rate": 6.73731934103909e-06, "loss": 0.5641688704490662, "step": 10029 }, { "epoch": 1.8532988416810061, "grad_norm": 0.057858940213918686, "learning_rate": 6.735433657827912e-06, "loss": 0.3124054968357086, "step": 10030 }, { "epoch": 1.853483618389902, "grad_norm": 0.08280590176582336, "learning_rate": 6.733548104536258e-06, "loss": 0.4873959422111511, "step": 10031 }, { "epoch": 1.8536683950987978, "grad_norm": 0.07169628143310547, "learning_rate": 6.731662681239172e-06, "loss": 0.388360857963562, "step": 10032 }, { "epoch": 1.8538531718076936, "grad_norm": 0.07017473876476288, "learning_rate": 6.729777388011685e-06, "loss": 0.47163650393486023, "step": 10033 }, { "epoch": 1.8540379485165894, "grad_norm": 0.08672241121530533, "learning_rate": 6.727892224928825e-06, "loss": 0.44719213247299194, "step": 10034 }, { "epoch": 1.8542227252254855, "grad_norm": 0.09848632663488388, "learning_rate": 6.72600719206562e-06, "loss": 0.6737756729125977, "step": 10035 }, { "epoch": 1.8544075019343813, "grad_norm": 0.08251882344484329, "learning_rate": 6.724122289497083e-06, "loss": 0.5112337470054626, "step": 10036 }, { "epoch": 1.8545922786432771, "grad_norm": 0.07042793184518814, "learning_rate": 6.722237517298232e-06, "loss": 0.4941219985485077, "step": 10037 }, { "epoch": 1.854777055352173, "grad_norm": 0.09096898138523102, "learning_rate": 6.720352875544076e-06, "loss": 0.4680633842945099, "step": 10038 }, { "epoch": 1.8549618320610688, "grad_norm": 0.08553384989500046, "learning_rate": 6.718468364309609e-06, "loss": 0.620337724685669, "step": 10039 }, { "epoch": 1.8551466087699646, "grad_norm": 0.06293967366218567, "learning_rate": 6.7165839836698364e-06, "loss": 0.3222028911113739, "step": 10040 }, { "epoch": 1.8553313854788605, "grad_norm": 0.06140477955341339, "learning_rate": 6.714699733699752e-06, "loss": 0.4061179459095001, "step": 10041 }, { "epoch": 1.8555161621877563, "grad_norm": 0.06999810039997101, "learning_rate": 6.712815614474337e-06, "loss": 0.5046561360359192, "step": 10042 }, { "epoch": 1.855700938896652, "grad_norm": 0.08148129284381866, "learning_rate": 6.710931626068573e-06, "loss": 0.516724169254303, "step": 10043 }, { "epoch": 1.855885715605548, "grad_norm": 0.0839998796582222, "learning_rate": 6.709047768557449e-06, "loss": 0.42863452434539795, "step": 10044 }, { "epoch": 1.8560704923144438, "grad_norm": 0.08073319494724274, "learning_rate": 6.707164042015921e-06, "loss": 0.47118139266967773, "step": 10045 }, { "epoch": 1.8562552690233396, "grad_norm": 0.07812882214784622, "learning_rate": 6.705280446518962e-06, "loss": 0.4859619736671448, "step": 10046 }, { "epoch": 1.8564400457322354, "grad_norm": 0.0773615762591362, "learning_rate": 6.703396982141537e-06, "loss": 0.39667513966560364, "step": 10047 }, { "epoch": 1.8566248224411313, "grad_norm": 0.07865162938833237, "learning_rate": 6.701513648958595e-06, "loss": 0.56267911195755, "step": 10048 }, { "epoch": 1.856809599150027, "grad_norm": 0.0917169600725174, "learning_rate": 6.699630447045092e-06, "loss": 0.6247175335884094, "step": 10049 }, { "epoch": 1.856994375858923, "grad_norm": 0.07467745244503021, "learning_rate": 6.697747376475972e-06, "loss": 0.36883577704429626, "step": 10050 }, { "epoch": 1.8571791525678187, "grad_norm": 0.071692556142807, "learning_rate": 6.695864437326171e-06, "loss": 0.4360310435295105, "step": 10051 }, { "epoch": 1.8573639292767146, "grad_norm": 0.08954844623804092, "learning_rate": 6.693981629670634e-06, "loss": 0.544607400894165, "step": 10052 }, { "epoch": 1.8575487059856104, "grad_norm": 0.06607211381196976, "learning_rate": 6.6920989535842805e-06, "loss": 0.41852515935897827, "step": 10053 }, { "epoch": 1.8577334826945062, "grad_norm": 0.09537258744239807, "learning_rate": 6.690216409142041e-06, "loss": 0.6173677444458008, "step": 10054 }, { "epoch": 1.857918259403402, "grad_norm": 0.0744006410241127, "learning_rate": 6.688333996418834e-06, "loss": 0.4436899721622467, "step": 10055 }, { "epoch": 1.8581030361122979, "grad_norm": 0.10246053338050842, "learning_rate": 6.686451715489573e-06, "loss": 0.6799331903457642, "step": 10056 }, { "epoch": 1.858287812821194, "grad_norm": 0.0734836682677269, "learning_rate": 6.684569566429164e-06, "loss": 0.5199905633926392, "step": 10057 }, { "epoch": 1.8584725895300898, "grad_norm": 0.07924776524305344, "learning_rate": 6.682687549312521e-06, "loss": 0.4931090176105499, "step": 10058 }, { "epoch": 1.8586573662389856, "grad_norm": 0.060191184282302856, "learning_rate": 6.680805664214527e-06, "loss": 0.4219018518924713, "step": 10059 }, { "epoch": 1.8588421429478814, "grad_norm": 0.08619184046983719, "learning_rate": 6.678923911210086e-06, "loss": 0.6257535815238953, "step": 10060 }, { "epoch": 1.8590269196567772, "grad_norm": 0.080179862678051, "learning_rate": 6.677042290374086e-06, "loss": 0.5102938413619995, "step": 10061 }, { "epoch": 1.859211696365673, "grad_norm": 0.0652344822883606, "learning_rate": 6.675160801781404e-06, "loss": 0.4639420211315155, "step": 10062 }, { "epoch": 1.859396473074569, "grad_norm": 0.08999873697757721, "learning_rate": 6.673279445506917e-06, "loss": 0.7283390164375305, "step": 10063 }, { "epoch": 1.859581249783465, "grad_norm": 0.07395680248737335, "learning_rate": 6.671398221625507e-06, "loss": 0.46372368931770325, "step": 10064 }, { "epoch": 1.8597660264923608, "grad_norm": 0.09114869683980942, "learning_rate": 6.669517130212029e-06, "loss": 0.5683853626251221, "step": 10065 }, { "epoch": 1.8599508032012566, "grad_norm": 0.07969726622104645, "learning_rate": 6.667636171341352e-06, "loss": 0.5142582654953003, "step": 10066 }, { "epoch": 1.8601355799101524, "grad_norm": 0.09282614290714264, "learning_rate": 6.665755345088328e-06, "loss": 0.5591207146644592, "step": 10067 }, { "epoch": 1.8603203566190483, "grad_norm": 0.06714857369661331, "learning_rate": 6.6638746515278086e-06, "loss": 0.39201489090919495, "step": 10068 }, { "epoch": 1.860505133327944, "grad_norm": 0.055099327117204666, "learning_rate": 6.661994090734642e-06, "loss": 0.3117598593235016, "step": 10069 }, { "epoch": 1.86068991003684, "grad_norm": 0.09940553456544876, "learning_rate": 6.660113662783667e-06, "loss": 0.611347496509552, "step": 10070 }, { "epoch": 1.8608746867457358, "grad_norm": 0.08205731213092804, "learning_rate": 6.658233367749719e-06, "loss": 0.47489482164382935, "step": 10071 }, { "epoch": 1.8610594634546316, "grad_norm": 0.06458830088376999, "learning_rate": 6.65635320570763e-06, "loss": 0.40867653489112854, "step": 10072 }, { "epoch": 1.8612442401635274, "grad_norm": 0.07972311973571777, "learning_rate": 6.654473176732219e-06, "loss": 0.5350630879402161, "step": 10073 }, { "epoch": 1.8614290168724232, "grad_norm": 0.08452945202589035, "learning_rate": 6.65259328089831e-06, "loss": 0.599851667881012, "step": 10074 }, { "epoch": 1.861613793581319, "grad_norm": 0.07066328078508377, "learning_rate": 6.650713518280718e-06, "loss": 0.44157370924949646, "step": 10075 }, { "epoch": 1.861798570290215, "grad_norm": 0.08250157535076141, "learning_rate": 6.648833888954247e-06, "loss": 0.5383029580116272, "step": 10076 }, { "epoch": 1.8619833469991107, "grad_norm": 0.08089201897382736, "learning_rate": 6.646954392993703e-06, "loss": 0.5467923283576965, "step": 10077 }, { "epoch": 1.8621681237080066, "grad_norm": 0.07823770493268967, "learning_rate": 6.645075030473887e-06, "loss": 0.5012009143829346, "step": 10078 }, { "epoch": 1.8623529004169024, "grad_norm": 0.06355928629636765, "learning_rate": 6.643195801469584e-06, "loss": 0.30434858798980713, "step": 10079 }, { "epoch": 1.8625376771257982, "grad_norm": 0.08099062740802765, "learning_rate": 6.6413167060555904e-06, "loss": 0.5842652320861816, "step": 10080 }, { "epoch": 1.862722453834694, "grad_norm": 0.06439043581485748, "learning_rate": 6.63943774430668e-06, "loss": 0.41050440073013306, "step": 10081 }, { "epoch": 1.8629072305435899, "grad_norm": 0.08004370331764221, "learning_rate": 6.637558916297635e-06, "loss": 0.5602760314941406, "step": 10082 }, { "epoch": 1.8630920072524857, "grad_norm": 0.07466733455657959, "learning_rate": 6.6356802221032265e-06, "loss": 0.4141661524772644, "step": 10083 }, { "epoch": 1.8632767839613815, "grad_norm": 0.07983822375535965, "learning_rate": 6.633801661798218e-06, "loss": 0.523760199546814, "step": 10084 }, { "epoch": 1.8634615606702774, "grad_norm": 0.10091492533683777, "learning_rate": 6.631923235457371e-06, "loss": 0.3581887185573578, "step": 10085 }, { "epoch": 1.8636463373791734, "grad_norm": 0.0627380982041359, "learning_rate": 6.630044943155445e-06, "loss": 0.33766254782676697, "step": 10086 }, { "epoch": 1.8638311140880692, "grad_norm": 0.0785946249961853, "learning_rate": 6.628166784967185e-06, "loss": 0.584014356136322, "step": 10087 }, { "epoch": 1.864015890796965, "grad_norm": 0.09274392575025558, "learning_rate": 6.6262887609673365e-06, "loss": 0.5695738792419434, "step": 10088 }, { "epoch": 1.8642006675058609, "grad_norm": 0.07663106918334961, "learning_rate": 6.6244108712306435e-06, "loss": 0.4880254566669464, "step": 10089 }, { "epoch": 1.8643854442147567, "grad_norm": 0.07401253283023834, "learning_rate": 6.622533115831834e-06, "loss": 0.4619203805923462, "step": 10090 }, { "epoch": 1.8645702209236525, "grad_norm": 0.08102946728467941, "learning_rate": 6.62065549484564e-06, "loss": 0.4943026900291443, "step": 10091 }, { "epoch": 1.8647549976325484, "grad_norm": 0.07213003933429718, "learning_rate": 6.618778008346787e-06, "loss": 0.41184893250465393, "step": 10092 }, { "epoch": 1.8649397743414444, "grad_norm": 0.0901002511382103, "learning_rate": 6.616900656409989e-06, "loss": 0.4627062976360321, "step": 10093 }, { "epoch": 1.8651245510503403, "grad_norm": 0.06586375832557678, "learning_rate": 6.61502343910996e-06, "loss": 0.4276363253593445, "step": 10094 }, { "epoch": 1.865309327759236, "grad_norm": 0.07709570974111557, "learning_rate": 6.613146356521412e-06, "loss": 0.46082285046577454, "step": 10095 }, { "epoch": 1.865494104468132, "grad_norm": 0.07655750960111618, "learning_rate": 6.6112694087190375e-06, "loss": 0.4414830803871155, "step": 10096 }, { "epoch": 1.8656788811770277, "grad_norm": 0.06628584116697311, "learning_rate": 6.609392595777544e-06, "loss": 0.4861391484737396, "step": 10097 }, { "epoch": 1.8658636578859236, "grad_norm": 0.11271800100803375, "learning_rate": 6.607515917771614e-06, "loss": 0.5347905158996582, "step": 10098 }, { "epoch": 1.8660484345948194, "grad_norm": 0.09717021882534027, "learning_rate": 6.605639374775934e-06, "loss": 0.5996444225311279, "step": 10099 }, { "epoch": 1.8662332113037152, "grad_norm": 0.0778956487774849, "learning_rate": 6.603762966865195e-06, "loss": 0.4967779219150543, "step": 10100 }, { "epoch": 1.866417988012611, "grad_norm": 0.0822027325630188, "learning_rate": 6.601886694114058e-06, "loss": 0.49735188484191895, "step": 10101 }, { "epoch": 1.8666027647215069, "grad_norm": 0.08405333012342453, "learning_rate": 6.600010556597198e-06, "loss": 0.4059186577796936, "step": 10102 }, { "epoch": 1.8667875414304027, "grad_norm": 0.077104352414608, "learning_rate": 6.598134554389287e-06, "loss": 0.45331335067749023, "step": 10103 }, { "epoch": 1.8669723181392985, "grad_norm": 0.10179364681243896, "learning_rate": 6.596258687564974e-06, "loss": 0.6316981315612793, "step": 10104 }, { "epoch": 1.8671570948481944, "grad_norm": 0.08048411458730698, "learning_rate": 6.594382956198915e-06, "loss": 0.5279122591018677, "step": 10105 }, { "epoch": 1.8673418715570902, "grad_norm": 0.08929668366909027, "learning_rate": 6.592507360365763e-06, "loss": 0.49193674325942993, "step": 10106 }, { "epoch": 1.867526648265986, "grad_norm": 0.06527028977870941, "learning_rate": 6.5906319001401545e-06, "loss": 0.40781906247138977, "step": 10107 }, { "epoch": 1.8677114249748819, "grad_norm": 0.06002240255475044, "learning_rate": 6.5887565755967305e-06, "loss": 0.4073244333267212, "step": 10108 }, { "epoch": 1.8678962016837777, "grad_norm": 0.0968843549489975, "learning_rate": 6.586881386810125e-06, "loss": 0.584854245185852, "step": 10109 }, { "epoch": 1.8680809783926735, "grad_norm": 0.07757356762886047, "learning_rate": 6.585006333854957e-06, "loss": 0.49007049202919006, "step": 10110 }, { "epoch": 1.8682657551015693, "grad_norm": 0.0810554251074791, "learning_rate": 6.5831314168058554e-06, "loss": 0.5097789764404297, "step": 10111 }, { "epoch": 1.8684505318104652, "grad_norm": 0.08307379484176636, "learning_rate": 6.581256635737435e-06, "loss": 0.5431102514266968, "step": 10112 }, { "epoch": 1.868635308519361, "grad_norm": 0.08331498503684998, "learning_rate": 6.579381990724303e-06, "loss": 0.5673648118972778, "step": 10113 }, { "epoch": 1.8688200852282568, "grad_norm": 0.086639903485775, "learning_rate": 6.577507481841069e-06, "loss": 0.5178836584091187, "step": 10114 }, { "epoch": 1.8690048619371529, "grad_norm": 0.06253974884748459, "learning_rate": 6.575633109162324e-06, "loss": 0.37117743492126465, "step": 10115 }, { "epoch": 1.8691896386460487, "grad_norm": 0.07451470196247101, "learning_rate": 6.57375887276267e-06, "loss": 0.5134936571121216, "step": 10116 }, { "epoch": 1.8693744153549445, "grad_norm": 0.06519269198179245, "learning_rate": 6.5718847727166965e-06, "loss": 0.40584704279899597, "step": 10117 }, { "epoch": 1.8695591920638404, "grad_norm": 0.05505971610546112, "learning_rate": 6.570010809098981e-06, "loss": 0.23253515362739563, "step": 10118 }, { "epoch": 1.8697439687727362, "grad_norm": 0.07418741285800934, "learning_rate": 6.568136981984102e-06, "loss": 0.46305274963378906, "step": 10119 }, { "epoch": 1.869928745481632, "grad_norm": 0.07298068702220917, "learning_rate": 6.5662632914466405e-06, "loss": 0.38585028052330017, "step": 10120 }, { "epoch": 1.8701135221905278, "grad_norm": 0.06870625913143158, "learning_rate": 6.564389737561153e-06, "loss": 0.4443548619747162, "step": 10121 }, { "epoch": 1.8702982988994237, "grad_norm": 0.07343483716249466, "learning_rate": 6.562516320402204e-06, "loss": 0.43571585416793823, "step": 10122 }, { "epoch": 1.8704830756083197, "grad_norm": 0.09324276447296143, "learning_rate": 6.5606430400443555e-06, "loss": 0.5833722352981567, "step": 10123 }, { "epoch": 1.8706678523172156, "grad_norm": 0.08678468316793442, "learning_rate": 6.558769896562149e-06, "loss": 0.6048173308372498, "step": 10124 }, { "epoch": 1.8708526290261114, "grad_norm": 0.07251591235399246, "learning_rate": 6.556896890030137e-06, "loss": 0.4826517105102539, "step": 10125 }, { "epoch": 1.8710374057350072, "grad_norm": 0.06635832786560059, "learning_rate": 6.555024020522858e-06, "loss": 0.4049977660179138, "step": 10126 }, { "epoch": 1.871222182443903, "grad_norm": 0.0675082802772522, "learning_rate": 6.55315128811484e-06, "loss": 0.40836289525032043, "step": 10127 }, { "epoch": 1.8714069591527989, "grad_norm": 0.07173550873994827, "learning_rate": 6.551278692880621e-06, "loss": 0.45877552032470703, "step": 10128 }, { "epoch": 1.8715917358616947, "grad_norm": 0.08252470940351486, "learning_rate": 6.54940623489472e-06, "loss": 0.5686274766921997, "step": 10129 }, { "epoch": 1.8717765125705905, "grad_norm": 0.06638361513614655, "learning_rate": 6.547533914231654e-06, "loss": 0.3055427670478821, "step": 10130 }, { "epoch": 1.8719612892794864, "grad_norm": 0.09710148721933365, "learning_rate": 6.54566173096594e-06, "loss": 0.6220736503601074, "step": 10131 }, { "epoch": 1.8721460659883822, "grad_norm": 0.08042354881763458, "learning_rate": 6.543789685172077e-06, "loss": 0.5249215364456177, "step": 10132 }, { "epoch": 1.872330842697278, "grad_norm": 0.0686904564499855, "learning_rate": 6.541917776924574e-06, "loss": 0.38781800866127014, "step": 10133 }, { "epoch": 1.8725156194061738, "grad_norm": 0.06881073862314224, "learning_rate": 6.540046006297928e-06, "loss": 0.4447661340236664, "step": 10134 }, { "epoch": 1.8727003961150697, "grad_norm": 0.06407603621482849, "learning_rate": 6.538174373366619e-06, "loss": 0.32195812463760376, "step": 10135 }, { "epoch": 1.8728851728239655, "grad_norm": 0.07901185750961304, "learning_rate": 6.536302878205143e-06, "loss": 0.4765346348285675, "step": 10136 }, { "epoch": 1.8730699495328613, "grad_norm": 0.08572155982255936, "learning_rate": 6.534431520887978e-06, "loss": 0.5873922109603882, "step": 10137 }, { "epoch": 1.8732547262417572, "grad_norm": 0.09743129462003708, "learning_rate": 6.532560301489594e-06, "loss": 0.5956275463104248, "step": 10138 }, { "epoch": 1.873439502950653, "grad_norm": 0.07195150852203369, "learning_rate": 6.530689220084459e-06, "loss": 0.4158497452735901, "step": 10139 }, { "epoch": 1.8736242796595488, "grad_norm": 0.06035377085208893, "learning_rate": 6.528818276747044e-06, "loss": 0.3526591956615448, "step": 10140 }, { "epoch": 1.8738090563684446, "grad_norm": 0.09304926544427872, "learning_rate": 6.526947471551799e-06, "loss": 0.563854992389679, "step": 10141 }, { "epoch": 1.8739938330773405, "grad_norm": 0.10085073858499527, "learning_rate": 6.525076804573176e-06, "loss": 0.7046477794647217, "step": 10142 }, { "epoch": 1.8741786097862363, "grad_norm": 0.055469196289777756, "learning_rate": 6.523206275885632e-06, "loss": 0.26885268092155457, "step": 10143 }, { "epoch": 1.8743633864951321, "grad_norm": 0.07149965316057205, "learning_rate": 6.521335885563595e-06, "loss": 0.5555588006973267, "step": 10144 }, { "epoch": 1.8745481632040282, "grad_norm": 0.07556617259979248, "learning_rate": 6.5194656336815085e-06, "loss": 0.5391772985458374, "step": 10145 }, { "epoch": 1.874732939912924, "grad_norm": 0.09786257892847061, "learning_rate": 6.517595520313799e-06, "loss": 0.6267918944358826, "step": 10146 }, { "epoch": 1.8749177166218198, "grad_norm": 0.07329027354717255, "learning_rate": 6.515725545534894e-06, "loss": 0.5676301717758179, "step": 10147 }, { "epoch": 1.8751024933307157, "grad_norm": 0.09947676956653595, "learning_rate": 6.513855709419212e-06, "loss": 0.6026296019554138, "step": 10148 }, { "epoch": 1.8752872700396115, "grad_norm": 0.07943307608366013, "learning_rate": 6.511986012041163e-06, "loss": 0.4566938281059265, "step": 10149 }, { "epoch": 1.8754720467485073, "grad_norm": 0.10105638206005096, "learning_rate": 6.510116453475159e-06, "loss": 0.7011048197746277, "step": 10150 }, { "epoch": 1.8756568234574031, "grad_norm": 0.08896586298942566, "learning_rate": 6.508247033795605e-06, "loss": 0.5455494523048401, "step": 10151 }, { "epoch": 1.8758416001662992, "grad_norm": 0.0694127157330513, "learning_rate": 6.506377753076891e-06, "loss": 0.3472730219364166, "step": 10152 }, { "epoch": 1.876026376875195, "grad_norm": 0.0655612051486969, "learning_rate": 6.504508611393414e-06, "loss": 0.3974023163318634, "step": 10153 }, { "epoch": 1.8762111535840909, "grad_norm": 0.08311789482831955, "learning_rate": 6.502639608819561e-06, "loss": 0.6139525175094604, "step": 10154 }, { "epoch": 1.8763959302929867, "grad_norm": 0.10273124277591705, "learning_rate": 6.5007707454297055e-06, "loss": 0.5326289534568787, "step": 10155 }, { "epoch": 1.8765807070018825, "grad_norm": 0.08947394788265228, "learning_rate": 6.498902021298227e-06, "loss": 0.5882757902145386, "step": 10156 }, { "epoch": 1.8767654837107783, "grad_norm": 0.06457432359457016, "learning_rate": 6.497033436499498e-06, "loss": 0.38947319984436035, "step": 10157 }, { "epoch": 1.8769502604196742, "grad_norm": 0.05972028151154518, "learning_rate": 6.495164991107874e-06, "loss": 0.3036060035228729, "step": 10158 }, { "epoch": 1.87713503712857, "grad_norm": 0.06575573980808258, "learning_rate": 6.493296685197719e-06, "loss": 0.41468435525894165, "step": 10159 }, { "epoch": 1.8773198138374658, "grad_norm": 0.07296153903007507, "learning_rate": 6.49142851884339e-06, "loss": 0.4320918917655945, "step": 10160 }, { "epoch": 1.8775045905463617, "grad_norm": 0.07599961757659912, "learning_rate": 6.489560492119225e-06, "loss": 0.4930938482284546, "step": 10161 }, { "epoch": 1.8776893672552575, "grad_norm": 0.08170460909605026, "learning_rate": 6.487692605099571e-06, "loss": 0.5137930512428284, "step": 10162 }, { "epoch": 1.8778741439641533, "grad_norm": 0.06708872318267822, "learning_rate": 6.485824857858762e-06, "loss": 0.4063275158405304, "step": 10163 }, { "epoch": 1.8780589206730491, "grad_norm": 0.08488880097866058, "learning_rate": 6.483957250471128e-06, "loss": 0.47834888100624084, "step": 10164 }, { "epoch": 1.878243697381945, "grad_norm": 0.07035119831562042, "learning_rate": 6.482089783010997e-06, "loss": 0.45612436532974243, "step": 10165 }, { "epoch": 1.8784284740908408, "grad_norm": 0.07598753273487091, "learning_rate": 6.480222455552685e-06, "loss": 0.4267653226852417, "step": 10166 }, { "epoch": 1.8786132507997366, "grad_norm": 0.07850490510463715, "learning_rate": 6.478355268170507e-06, "loss": 0.5753515362739563, "step": 10167 }, { "epoch": 1.8787980275086325, "grad_norm": 0.06916812807321548, "learning_rate": 6.476488220938775e-06, "loss": 0.43277326226234436, "step": 10168 }, { "epoch": 1.8789828042175283, "grad_norm": 0.08787582814693451, "learning_rate": 6.474621313931784e-06, "loss": 0.4945637881755829, "step": 10169 }, { "epoch": 1.879167580926424, "grad_norm": 0.095709890127182, "learning_rate": 6.4727545472238366e-06, "loss": 0.5912142395973206, "step": 10170 }, { "epoch": 1.87935235763532, "grad_norm": 0.07700788974761963, "learning_rate": 6.470887920889224e-06, "loss": 0.3847402334213257, "step": 10171 }, { "epoch": 1.8795371343442158, "grad_norm": 0.09229899942874908, "learning_rate": 6.4690214350022296e-06, "loss": 0.546176552772522, "step": 10172 }, { "epoch": 1.8797219110531116, "grad_norm": 0.06881638616323471, "learning_rate": 6.4671550896371345e-06, "loss": 0.43623825907707214, "step": 10173 }, { "epoch": 1.8799066877620076, "grad_norm": 0.06641674786806107, "learning_rate": 6.4652888848682194e-06, "loss": 0.5173807144165039, "step": 10174 }, { "epoch": 1.8800914644709035, "grad_norm": 0.07400946319103241, "learning_rate": 6.46342282076974e-06, "loss": 0.4365104138851166, "step": 10175 }, { "epoch": 1.8802762411797993, "grad_norm": 0.07894344627857208, "learning_rate": 6.461556897415972e-06, "loss": 0.496677041053772, "step": 10176 }, { "epoch": 1.8804610178886951, "grad_norm": 0.08894534409046173, "learning_rate": 6.459691114881172e-06, "loss": 0.6015393137931824, "step": 10177 }, { "epoch": 1.880645794597591, "grad_norm": 0.08057379722595215, "learning_rate": 6.457825473239583e-06, "loss": 0.4932987689971924, "step": 10178 }, { "epoch": 1.8808305713064868, "grad_norm": 0.08173685520887375, "learning_rate": 6.4559599725654645e-06, "loss": 0.5451905727386475, "step": 10179 }, { "epoch": 1.8810153480153826, "grad_norm": 0.07516109943389893, "learning_rate": 6.454094612933046e-06, "loss": 0.37563127279281616, "step": 10180 }, { "epoch": 1.8812001247242787, "grad_norm": 0.054435789585113525, "learning_rate": 6.4522293944165695e-06, "loss": 0.39257878065109253, "step": 10181 }, { "epoch": 1.8813849014331745, "grad_norm": 0.0689535066485405, "learning_rate": 6.450364317090265e-06, "loss": 0.45522540807724, "step": 10182 }, { "epoch": 1.8815696781420703, "grad_norm": 0.10657844692468643, "learning_rate": 6.448499381028355e-06, "loss": 0.7042019963264465, "step": 10183 }, { "epoch": 1.8817544548509662, "grad_norm": 0.09257829934358597, "learning_rate": 6.4466345863050565e-06, "loss": 0.5799952745437622, "step": 10184 }, { "epoch": 1.881939231559862, "grad_norm": 0.06281529366970062, "learning_rate": 6.444769932994586e-06, "loss": 0.3686409294605255, "step": 10185 }, { "epoch": 1.8821240082687578, "grad_norm": 0.07586891204118729, "learning_rate": 6.44290542117115e-06, "loss": 0.4478495121002197, "step": 10186 }, { "epoch": 1.8823087849776536, "grad_norm": 0.08726614713668823, "learning_rate": 6.441041050908947e-06, "loss": 0.5721866488456726, "step": 10187 }, { "epoch": 1.8824935616865495, "grad_norm": 0.07208137959241867, "learning_rate": 6.439176822282178e-06, "loss": 0.49009835720062256, "step": 10188 }, { "epoch": 1.8826783383954453, "grad_norm": 0.0700579583644867, "learning_rate": 6.43731273536503e-06, "loss": 0.4596255123615265, "step": 10189 }, { "epoch": 1.8828631151043411, "grad_norm": 0.07105761021375656, "learning_rate": 6.4354487902316885e-06, "loss": 0.4763326644897461, "step": 10190 }, { "epoch": 1.883047891813237, "grad_norm": 0.05622998997569084, "learning_rate": 6.433584986956335e-06, "loss": 0.31442946195602417, "step": 10191 }, { "epoch": 1.8832326685221328, "grad_norm": 0.07619105279445648, "learning_rate": 6.431721325613138e-06, "loss": 0.47837021946907043, "step": 10192 }, { "epoch": 1.8834174452310286, "grad_norm": 0.073966383934021, "learning_rate": 6.4298578062762705e-06, "loss": 0.4140937924385071, "step": 10193 }, { "epoch": 1.8836022219399244, "grad_norm": 0.08703131228685379, "learning_rate": 6.427994429019894e-06, "loss": 0.4924027919769287, "step": 10194 }, { "epoch": 1.8837869986488203, "grad_norm": 0.07850147038698196, "learning_rate": 6.426131193918162e-06, "loss": 0.3722976744174957, "step": 10195 }, { "epoch": 1.883971775357716, "grad_norm": 0.06796495616436005, "learning_rate": 6.424268101045231e-06, "loss": 0.4188387095928192, "step": 10196 }, { "epoch": 1.884156552066612, "grad_norm": 0.08321154117584229, "learning_rate": 6.42240515047524e-06, "loss": 0.47672274708747864, "step": 10197 }, { "epoch": 1.8843413287755078, "grad_norm": 0.0876714438199997, "learning_rate": 6.4205423422823265e-06, "loss": 0.5653049349784851, "step": 10198 }, { "epoch": 1.8845261054844036, "grad_norm": 0.08870537579059601, "learning_rate": 6.418679676540635e-06, "loss": 0.5262870192527771, "step": 10199 }, { "epoch": 1.8847108821932994, "grad_norm": 0.0855855643749237, "learning_rate": 6.4168171533242865e-06, "loss": 0.4589315950870514, "step": 10200 }, { "epoch": 1.8848956589021952, "grad_norm": 0.06956803053617477, "learning_rate": 6.414954772707403e-06, "loss": 0.44253531098365784, "step": 10201 }, { "epoch": 1.885080435611091, "grad_norm": 0.08228602260351181, "learning_rate": 6.4130925347641074e-06, "loss": 0.5241605043411255, "step": 10202 }, { "epoch": 1.8852652123199871, "grad_norm": 0.0796494111418724, "learning_rate": 6.411230439568504e-06, "loss": 0.4705255627632141, "step": 10203 }, { "epoch": 1.885449989028883, "grad_norm": 0.08287597447633743, "learning_rate": 6.4093684871947e-06, "loss": 0.5432465076446533, "step": 10204 }, { "epoch": 1.8856347657377788, "grad_norm": 0.08077621459960938, "learning_rate": 6.4075066777167996e-06, "loss": 0.5836336016654968, "step": 10205 }, { "epoch": 1.8858195424466746, "grad_norm": 0.08096127957105637, "learning_rate": 6.405645011208892e-06, "loss": 0.4534474313259125, "step": 10206 }, { "epoch": 1.8860043191555704, "grad_norm": 0.05567679926753044, "learning_rate": 6.4037834877450675e-06, "loss": 0.37423694133758545, "step": 10207 }, { "epoch": 1.8861890958644663, "grad_norm": 0.061636678874492645, "learning_rate": 6.401922107399411e-06, "loss": 0.3531928062438965, "step": 10208 }, { "epoch": 1.886373872573362, "grad_norm": 0.07669080793857574, "learning_rate": 6.400060870245996e-06, "loss": 0.40901196002960205, "step": 10209 }, { "epoch": 1.8865586492822581, "grad_norm": 0.0789622887969017, "learning_rate": 6.398199776358899e-06, "loss": 0.47715774178504944, "step": 10210 }, { "epoch": 1.886743425991154, "grad_norm": 0.08765920251607895, "learning_rate": 6.3963388258121765e-06, "loss": 0.46810436248779297, "step": 10211 }, { "epoch": 1.8869282027000498, "grad_norm": 0.07397231459617615, "learning_rate": 6.3944780186798964e-06, "loss": 0.5212484002113342, "step": 10212 }, { "epoch": 1.8871129794089456, "grad_norm": 0.08069170266389847, "learning_rate": 6.392617355036115e-06, "loss": 0.5141767263412476, "step": 10213 }, { "epoch": 1.8872977561178415, "grad_norm": 0.09006567299365997, "learning_rate": 6.390756834954871e-06, "loss": 0.47084227204322815, "step": 10214 }, { "epoch": 1.8874825328267373, "grad_norm": 0.07339008897542953, "learning_rate": 6.388896458510214e-06, "loss": 0.4445154368877411, "step": 10215 }, { "epoch": 1.887667309535633, "grad_norm": 0.09226617962121964, "learning_rate": 6.387036225776187e-06, "loss": 0.7287762761116028, "step": 10216 }, { "epoch": 1.887852086244529, "grad_norm": 0.07907861471176147, "learning_rate": 6.385176136826808e-06, "loss": 0.46330586075782776, "step": 10217 }, { "epoch": 1.8880368629534248, "grad_norm": 0.07914993166923523, "learning_rate": 6.383316191736108e-06, "loss": 0.5281094312667847, "step": 10218 }, { "epoch": 1.8882216396623206, "grad_norm": 0.08094493299722672, "learning_rate": 6.381456390578115e-06, "loss": 0.4696466326713562, "step": 10219 }, { "epoch": 1.8884064163712164, "grad_norm": 0.09042327105998993, "learning_rate": 6.3795967334268315e-06, "loss": 0.5085193514823914, "step": 10220 }, { "epoch": 1.8885911930801123, "grad_norm": 0.06535433977842331, "learning_rate": 6.377737220356273e-06, "loss": 0.36800816655158997, "step": 10221 }, { "epoch": 1.888775969789008, "grad_norm": 0.08334439992904663, "learning_rate": 6.3758778514404415e-06, "loss": 0.4639260470867157, "step": 10222 }, { "epoch": 1.888960746497904, "grad_norm": 0.06565045565366745, "learning_rate": 6.374018626753331e-06, "loss": 0.37057387828826904, "step": 10223 }, { "epoch": 1.8891455232067997, "grad_norm": 0.06614066660404205, "learning_rate": 6.372159546368935e-06, "loss": 0.3984827995300293, "step": 10224 }, { "epoch": 1.8893302999156956, "grad_norm": 0.06254734098911285, "learning_rate": 6.370300610361242e-06, "loss": 0.3221375048160553, "step": 10225 }, { "epoch": 1.8895150766245914, "grad_norm": 0.09444409608840942, "learning_rate": 6.368441818804225e-06, "loss": 0.633273184299469, "step": 10226 }, { "epoch": 1.8896998533334872, "grad_norm": 0.07186122983694077, "learning_rate": 6.366583171771865e-06, "loss": 0.5475074052810669, "step": 10227 }, { "epoch": 1.889884630042383, "grad_norm": 0.07813169807195663, "learning_rate": 6.364724669338125e-06, "loss": 0.43461698293685913, "step": 10228 }, { "epoch": 1.8900694067512789, "grad_norm": 0.08743274956941605, "learning_rate": 6.362866311576971e-06, "loss": 0.5691038966178894, "step": 10229 }, { "epoch": 1.8902541834601747, "grad_norm": 0.07686718553304672, "learning_rate": 6.361008098562361e-06, "loss": 0.45107215642929077, "step": 10230 }, { "epoch": 1.8904389601690705, "grad_norm": 0.0921458750963211, "learning_rate": 6.3591500303682385e-06, "loss": 0.518035888671875, "step": 10231 }, { "epoch": 1.8906237368779666, "grad_norm": 0.06643166393041611, "learning_rate": 6.357292107068556e-06, "loss": 0.46066051721572876, "step": 10232 }, { "epoch": 1.8908085135868624, "grad_norm": 0.08394398540258408, "learning_rate": 6.355434328737255e-06, "loss": 0.5157032608985901, "step": 10233 }, { "epoch": 1.8909932902957582, "grad_norm": 0.09607004374265671, "learning_rate": 6.3535766954482595e-06, "loss": 0.6931172609329224, "step": 10234 }, { "epoch": 1.891178067004654, "grad_norm": 0.09028512239456177, "learning_rate": 6.3517192072755055e-06, "loss": 0.560213565826416, "step": 10235 }, { "epoch": 1.89136284371355, "grad_norm": 0.08541875332593918, "learning_rate": 6.349861864292916e-06, "loss": 0.6247984170913696, "step": 10236 }, { "epoch": 1.8915476204224457, "grad_norm": 0.0664595291018486, "learning_rate": 6.348004666574401e-06, "loss": 0.4602713882923126, "step": 10237 }, { "epoch": 1.8917323971313416, "grad_norm": 0.07790371775627136, "learning_rate": 6.346147614193874e-06, "loss": 0.4913193881511688, "step": 10238 }, { "epoch": 1.8919171738402374, "grad_norm": 0.07915055751800537, "learning_rate": 6.3442907072252445e-06, "loss": 0.49132734537124634, "step": 10239 }, { "epoch": 1.8921019505491334, "grad_norm": 0.05303800106048584, "learning_rate": 6.342433945742405e-06, "loss": 0.3594009280204773, "step": 10240 }, { "epoch": 1.8922867272580293, "grad_norm": 0.06748857349157333, "learning_rate": 6.34057732981925e-06, "loss": 0.40470314025878906, "step": 10241 }, { "epoch": 1.892471503966925, "grad_norm": 0.07024706900119781, "learning_rate": 6.338720859529672e-06, "loss": 0.43911558389663696, "step": 10242 }, { "epoch": 1.892656280675821, "grad_norm": 0.08209729939699173, "learning_rate": 6.336864534947547e-06, "loss": 0.5497640371322632, "step": 10243 }, { "epoch": 1.8928410573847168, "grad_norm": 0.07350103557109833, "learning_rate": 6.335008356146755e-06, "loss": 0.45002931356430054, "step": 10244 }, { "epoch": 1.8930258340936126, "grad_norm": 0.06537279486656189, "learning_rate": 6.333152323201161e-06, "loss": 0.4208161532878876, "step": 10245 }, { "epoch": 1.8932106108025084, "grad_norm": 0.08805809915065765, "learning_rate": 6.331296436184633e-06, "loss": 0.6379576325416565, "step": 10246 }, { "epoch": 1.8933953875114042, "grad_norm": 0.07174934446811676, "learning_rate": 6.329440695171029e-06, "loss": 0.44198155403137207, "step": 10247 }, { "epoch": 1.8935801642203, "grad_norm": 0.09321418404579163, "learning_rate": 6.327585100234204e-06, "loss": 0.5571639537811279, "step": 10248 }, { "epoch": 1.893764940929196, "grad_norm": 0.08630082756280899, "learning_rate": 6.325729651447999e-06, "loss": 0.5520722270011902, "step": 10249 }, { "epoch": 1.8939497176380917, "grad_norm": 0.07743130624294281, "learning_rate": 6.323874348886261e-06, "loss": 0.4844270348548889, "step": 10250 }, { "epoch": 1.8941344943469876, "grad_norm": 0.0724787712097168, "learning_rate": 6.3220191926228216e-06, "loss": 0.38706961274147034, "step": 10251 }, { "epoch": 1.8943192710558834, "grad_norm": 0.07613388448953629, "learning_rate": 6.320164182731512e-06, "loss": 0.5574577450752258, "step": 10252 }, { "epoch": 1.8945040477647792, "grad_norm": 0.07308190315961838, "learning_rate": 6.318309319286158e-06, "loss": 0.472690612077713, "step": 10253 }, { "epoch": 1.894688824473675, "grad_norm": 0.07388962805271149, "learning_rate": 6.316454602360569e-06, "loss": 0.3549666106700897, "step": 10254 }, { "epoch": 1.8948736011825709, "grad_norm": 0.07017778605222702, "learning_rate": 6.314600032028564e-06, "loss": 0.405681848526001, "step": 10255 }, { "epoch": 1.8950583778914667, "grad_norm": 0.09609831124544144, "learning_rate": 6.312745608363952e-06, "loss": 0.5824297666549683, "step": 10256 }, { "epoch": 1.8952431546003625, "grad_norm": 0.10361137241125107, "learning_rate": 6.310891331440525e-06, "loss": 0.6679677367210388, "step": 10257 }, { "epoch": 1.8954279313092584, "grad_norm": 0.07523815333843231, "learning_rate": 6.30903720133208e-06, "loss": 0.3741815984249115, "step": 10258 }, { "epoch": 1.8956127080181542, "grad_norm": 0.07877101749181747, "learning_rate": 6.307183218112412e-06, "loss": 0.43796366453170776, "step": 10259 }, { "epoch": 1.89579748472705, "grad_norm": 0.07526076585054398, "learning_rate": 6.305329381855294e-06, "loss": 0.5309394001960754, "step": 10260 }, { "epoch": 1.8959822614359458, "grad_norm": 0.08595466613769531, "learning_rate": 6.303475692634511e-06, "loss": 0.5667848587036133, "step": 10261 }, { "epoch": 1.8961670381448419, "grad_norm": 0.07923302054405212, "learning_rate": 6.301622150523827e-06, "loss": 0.5488939881324768, "step": 10262 }, { "epoch": 1.8963518148537377, "grad_norm": 0.11377062648534775, "learning_rate": 6.299768755597011e-06, "loss": 0.6961948275566101, "step": 10263 }, { "epoch": 1.8965365915626335, "grad_norm": 0.08949556201696396, "learning_rate": 6.297915507927825e-06, "loss": 0.5056279897689819, "step": 10264 }, { "epoch": 1.8967213682715294, "grad_norm": 0.059668708592653275, "learning_rate": 6.296062407590017e-06, "loss": 0.32332471013069153, "step": 10265 }, { "epoch": 1.8969061449804252, "grad_norm": 0.07532314211130142, "learning_rate": 6.294209454657336e-06, "loss": 0.4130173325538635, "step": 10266 }, { "epoch": 1.897090921689321, "grad_norm": 0.07130767405033112, "learning_rate": 6.292356649203528e-06, "loss": 0.3886146545410156, "step": 10267 }, { "epoch": 1.8972756983982169, "grad_norm": 0.07246781140565872, "learning_rate": 6.290503991302324e-06, "loss": 0.4462714195251465, "step": 10268 }, { "epoch": 1.897460475107113, "grad_norm": 0.0651962012052536, "learning_rate": 6.288651481027453e-06, "loss": 0.5322108268737793, "step": 10269 }, { "epoch": 1.8976452518160087, "grad_norm": 0.08073778450489044, "learning_rate": 6.286799118452647e-06, "loss": 0.534866452217102, "step": 10270 }, { "epoch": 1.8978300285249046, "grad_norm": 0.06440076977014542, "learning_rate": 6.284946903651614e-06, "loss": 0.4517211616039276, "step": 10271 }, { "epoch": 1.8980148052338004, "grad_norm": 0.07085192203521729, "learning_rate": 6.283094836698074e-06, "loss": 0.40669822692871094, "step": 10272 }, { "epoch": 1.8981995819426962, "grad_norm": 0.09401829540729523, "learning_rate": 6.281242917665733e-06, "loss": 0.5307532548904419, "step": 10273 }, { "epoch": 1.898384358651592, "grad_norm": 0.09388338774442673, "learning_rate": 6.279391146628284e-06, "loss": 0.6148204207420349, "step": 10274 }, { "epoch": 1.8985691353604879, "grad_norm": 0.08041196316480637, "learning_rate": 6.277539523659433e-06, "loss": 0.5400888919830322, "step": 10275 }, { "epoch": 1.8987539120693837, "grad_norm": 0.06962060928344727, "learning_rate": 6.27568804883286e-06, "loss": 0.30403396487236023, "step": 10276 }, { "epoch": 1.8989386887782795, "grad_norm": 0.09105116128921509, "learning_rate": 6.273836722222249e-06, "loss": 0.526177167892456, "step": 10277 }, { "epoch": 1.8991234654871754, "grad_norm": 0.08550825715065002, "learning_rate": 6.271985543901281e-06, "loss": 0.45866915583610535, "step": 10278 }, { "epoch": 1.8993082421960712, "grad_norm": 0.07376360893249512, "learning_rate": 6.270134513943624e-06, "loss": 0.46657779812812805, "step": 10279 }, { "epoch": 1.899493018904967, "grad_norm": 0.08404447138309479, "learning_rate": 6.268283632422943e-06, "loss": 0.5317788124084473, "step": 10280 }, { "epoch": 1.8996777956138629, "grad_norm": 0.07737813889980316, "learning_rate": 6.266432899412901e-06, "loss": 0.43988361954689026, "step": 10281 }, { "epoch": 1.8998625723227587, "grad_norm": 0.06922601908445358, "learning_rate": 6.264582314987147e-06, "loss": 0.3500733971595764, "step": 10282 }, { "epoch": 1.9000473490316545, "grad_norm": 0.07693938165903091, "learning_rate": 6.262731879219329e-06, "loss": 0.508966863155365, "step": 10283 }, { "epoch": 1.9002321257405503, "grad_norm": 0.0762089341878891, "learning_rate": 6.2608815921830936e-06, "loss": 0.5069466829299927, "step": 10284 }, { "epoch": 1.9004169024494462, "grad_norm": 0.06816435605287552, "learning_rate": 6.2590314539520695e-06, "loss": 0.4386304020881653, "step": 10285 }, { "epoch": 1.900601679158342, "grad_norm": 0.07959213852882385, "learning_rate": 6.25718146459989e-06, "loss": 0.47748059034347534, "step": 10286 }, { "epoch": 1.9007864558672378, "grad_norm": 0.07564561069011688, "learning_rate": 6.2553316242001806e-06, "loss": 0.4964078366756439, "step": 10287 }, { "epoch": 1.9009712325761337, "grad_norm": 0.07462283223867416, "learning_rate": 6.253481932826554e-06, "loss": 0.5468541383743286, "step": 10288 }, { "epoch": 1.9011560092850295, "grad_norm": 0.0738021656870842, "learning_rate": 6.251632390552626e-06, "loss": 0.46736302971839905, "step": 10289 }, { "epoch": 1.9013407859939253, "grad_norm": 0.06180545315146446, "learning_rate": 6.249782997452003e-06, "loss": 0.41027358174324036, "step": 10290 }, { "epoch": 1.9015255627028214, "grad_norm": 0.0797877311706543, "learning_rate": 6.247933753598282e-06, "loss": 0.5366336107254028, "step": 10291 }, { "epoch": 1.9017103394117172, "grad_norm": 0.07238733768463135, "learning_rate": 6.246084659065064e-06, "loss": 0.49080348014831543, "step": 10292 }, { "epoch": 1.901895116120613, "grad_norm": 0.07169201970100403, "learning_rate": 6.244235713925926e-06, "loss": 0.47226986289024353, "step": 10293 }, { "epoch": 1.9020798928295088, "grad_norm": 0.08983547240495682, "learning_rate": 6.242386918254456e-06, "loss": 0.4665200710296631, "step": 10294 }, { "epoch": 1.9022646695384047, "grad_norm": 0.07715783268213272, "learning_rate": 6.240538272124236e-06, "loss": 0.5719449520111084, "step": 10295 }, { "epoch": 1.9024494462473005, "grad_norm": 0.07715067267417908, "learning_rate": 6.238689775608827e-06, "loss": 0.5054954886436462, "step": 10296 }, { "epoch": 1.9026342229561963, "grad_norm": 0.08391452580690384, "learning_rate": 6.236841428781797e-06, "loss": 0.4786243736743927, "step": 10297 }, { "epoch": 1.9028189996650924, "grad_norm": 0.08295056223869324, "learning_rate": 6.234993231716707e-06, "loss": 0.6718369126319885, "step": 10298 }, { "epoch": 1.9030037763739882, "grad_norm": 0.07840042561292648, "learning_rate": 6.233145184487106e-06, "loss": 0.5167830586433411, "step": 10299 }, { "epoch": 1.903188553082884, "grad_norm": 0.09705166518688202, "learning_rate": 6.23129728716654e-06, "loss": 0.5749402046203613, "step": 10300 }, { "epoch": 1.9033733297917799, "grad_norm": 0.07300394028425217, "learning_rate": 6.2294495398285535e-06, "loss": 0.4947923719882965, "step": 10301 }, { "epoch": 1.9035581065006757, "grad_norm": 0.08533252030611038, "learning_rate": 6.227601942546678e-06, "loss": 0.5547330379486084, "step": 10302 }, { "epoch": 1.9037428832095715, "grad_norm": 0.05910911411046982, "learning_rate": 6.2257544953944425e-06, "loss": 0.36130478978157043, "step": 10303 }, { "epoch": 1.9039276599184674, "grad_norm": 0.047769539058208466, "learning_rate": 6.2239071984453715e-06, "loss": 0.35184383392333984, "step": 10304 }, { "epoch": 1.9041124366273632, "grad_norm": 0.08566884696483612, "learning_rate": 6.222060051772978e-06, "loss": 0.5417880415916443, "step": 10305 }, { "epoch": 1.904297213336259, "grad_norm": 0.07999825477600098, "learning_rate": 6.2202130554507755e-06, "loss": 0.5098516941070557, "step": 10306 }, { "epoch": 1.9044819900451548, "grad_norm": 0.06672988831996918, "learning_rate": 6.21836620955227e-06, "loss": 0.45859119296073914, "step": 10307 }, { "epoch": 1.9046667667540507, "grad_norm": 0.08266816288232803, "learning_rate": 6.216519514150956e-06, "loss": 0.5112531185150146, "step": 10308 }, { "epoch": 1.9048515434629465, "grad_norm": 0.07994687557220459, "learning_rate": 6.214672969320332e-06, "loss": 0.48196014761924744, "step": 10309 }, { "epoch": 1.9050363201718423, "grad_norm": 0.08007802069187164, "learning_rate": 6.212826575133875e-06, "loss": 0.43633612990379333, "step": 10310 }, { "epoch": 1.9052210968807382, "grad_norm": 0.06801573932170868, "learning_rate": 6.210980331665074e-06, "loss": 0.3640526235103607, "step": 10311 }, { "epoch": 1.905405873589634, "grad_norm": 0.10344184935092926, "learning_rate": 6.209134238987405e-06, "loss": 0.7233929634094238, "step": 10312 }, { "epoch": 1.9055906502985298, "grad_norm": 0.08550361543893814, "learning_rate": 6.20728829717433e-06, "loss": 0.5223844647407532, "step": 10313 }, { "epoch": 1.9057754270074256, "grad_norm": 0.10022129863500595, "learning_rate": 6.205442506299313e-06, "loss": 0.8361809253692627, "step": 10314 }, { "epoch": 1.9059602037163215, "grad_norm": 0.05882253497838974, "learning_rate": 6.203596866435817e-06, "loss": 0.32034730911254883, "step": 10315 }, { "epoch": 1.9061449804252173, "grad_norm": 0.0817984938621521, "learning_rate": 6.2017513776572855e-06, "loss": 0.5322924256324768, "step": 10316 }, { "epoch": 1.9063297571341131, "grad_norm": 0.07522819936275482, "learning_rate": 6.1999060400371666e-06, "loss": 0.4112682342529297, "step": 10317 }, { "epoch": 1.906514533843009, "grad_norm": 0.09052328765392303, "learning_rate": 6.1980608536488994e-06, "loss": 0.5937305688858032, "step": 10318 }, { "epoch": 1.9066993105519048, "grad_norm": 0.10432654619216919, "learning_rate": 6.196215818565914e-06, "loss": 0.532381534576416, "step": 10319 }, { "epoch": 1.9068840872608008, "grad_norm": 0.10870999097824097, "learning_rate": 6.194370934861638e-06, "loss": 0.7528077960014343, "step": 10320 }, { "epoch": 1.9070688639696967, "grad_norm": 0.06587886810302734, "learning_rate": 6.192526202609495e-06, "loss": 0.3334970474243164, "step": 10321 }, { "epoch": 1.9072536406785925, "grad_norm": 0.08507116138935089, "learning_rate": 6.190681621882895e-06, "loss": 0.43544384837150574, "step": 10322 }, { "epoch": 1.9074384173874883, "grad_norm": 0.05950252711772919, "learning_rate": 6.188837192755248e-06, "loss": 0.35488152503967285, "step": 10323 }, { "epoch": 1.9076231940963841, "grad_norm": 0.07645530998706818, "learning_rate": 6.186992915299959e-06, "loss": 0.4979608952999115, "step": 10324 }, { "epoch": 1.90780797080528, "grad_norm": 0.11194054037332535, "learning_rate": 6.18514878959042e-06, "loss": 0.6795698404312134, "step": 10325 }, { "epoch": 1.9079927475141758, "grad_norm": 0.08848891407251358, "learning_rate": 6.183304815700029e-06, "loss": 0.5858112573623657, "step": 10326 }, { "epoch": 1.9081775242230716, "grad_norm": 0.08644504100084305, "learning_rate": 6.181460993702161e-06, "loss": 0.5262491703033447, "step": 10327 }, { "epoch": 1.9083623009319677, "grad_norm": 0.07708645612001419, "learning_rate": 6.1796173236702e-06, "loss": 0.5004913806915283, "step": 10328 }, { "epoch": 1.9085470776408635, "grad_norm": 0.0642990693449974, "learning_rate": 6.177773805677521e-06, "loss": 0.37086158990859985, "step": 10329 }, { "epoch": 1.9087318543497593, "grad_norm": 0.06983623653650284, "learning_rate": 6.1759304397974786e-06, "loss": 0.4389687776565552, "step": 10330 }, { "epoch": 1.9089166310586552, "grad_norm": 0.07059077173471451, "learning_rate": 6.174087226103444e-06, "loss": 0.42408299446105957, "step": 10331 }, { "epoch": 1.909101407767551, "grad_norm": 0.04802374541759491, "learning_rate": 6.172244164668773e-06, "loss": 0.25084447860717773, "step": 10332 }, { "epoch": 1.9092861844764468, "grad_norm": 0.0758056640625, "learning_rate": 6.1704012555668025e-06, "loss": 0.5087399482727051, "step": 10333 }, { "epoch": 1.9094709611853427, "grad_norm": 0.10258832573890686, "learning_rate": 6.16855849887088e-06, "loss": 0.7423362731933594, "step": 10334 }, { "epoch": 1.9096557378942385, "grad_norm": 0.06941121816635132, "learning_rate": 6.166715894654348e-06, "loss": 0.48833563923835754, "step": 10335 }, { "epoch": 1.9098405146031343, "grad_norm": 0.08097316324710846, "learning_rate": 6.164873442990526e-06, "loss": 0.4520004093647003, "step": 10336 }, { "epoch": 1.9100252913120301, "grad_norm": 0.08406997472047806, "learning_rate": 6.1630311439527445e-06, "loss": 0.5130544900894165, "step": 10337 }, { "epoch": 1.910210068020926, "grad_norm": 0.07970693707466125, "learning_rate": 6.161188997614319e-06, "loss": 0.53461754322052, "step": 10338 }, { "epoch": 1.9103948447298218, "grad_norm": 0.09099024534225464, "learning_rate": 6.159347004048561e-06, "loss": 0.5752238631248474, "step": 10339 }, { "epoch": 1.9105796214387176, "grad_norm": 0.055456504225730896, "learning_rate": 6.157505163328776e-06, "loss": 0.3347260653972626, "step": 10340 }, { "epoch": 1.9107643981476135, "grad_norm": 0.08471523225307465, "learning_rate": 6.155663475528264e-06, "loss": 0.57076096534729, "step": 10341 }, { "epoch": 1.9109491748565093, "grad_norm": 0.05895467475056648, "learning_rate": 6.153821940720317e-06, "loss": 0.3902541995048523, "step": 10342 }, { "epoch": 1.911133951565405, "grad_norm": 0.07855869084596634, "learning_rate": 6.151980558978227e-06, "loss": 0.5438547730445862, "step": 10343 }, { "epoch": 1.911318728274301, "grad_norm": 0.08026958256959915, "learning_rate": 6.1501393303752686e-06, "loss": 0.565049409866333, "step": 10344 }, { "epoch": 1.9115035049831968, "grad_norm": 0.08474355936050415, "learning_rate": 6.148298254984721e-06, "loss": 0.507989764213562, "step": 10345 }, { "epoch": 1.9116882816920926, "grad_norm": 0.09837860614061356, "learning_rate": 6.146457332879854e-06, "loss": 0.6434282064437866, "step": 10346 }, { "epoch": 1.9118730584009884, "grad_norm": 0.10607185959815979, "learning_rate": 6.144616564133927e-06, "loss": 0.7941292524337769, "step": 10347 }, { "epoch": 1.9120578351098843, "grad_norm": 0.07790163159370422, "learning_rate": 6.142775948820198e-06, "loss": 0.5895043611526489, "step": 10348 }, { "epoch": 1.91224261181878, "grad_norm": 0.06479763239622116, "learning_rate": 6.140935487011924e-06, "loss": 0.3058730363845825, "step": 10349 }, { "epoch": 1.9124273885276761, "grad_norm": 0.07747827470302582, "learning_rate": 6.139095178782337e-06, "loss": 0.5066514015197754, "step": 10350 }, { "epoch": 1.912612165236572, "grad_norm": 0.0633983463048935, "learning_rate": 6.1372550242046855e-06, "loss": 0.39069458842277527, "step": 10351 }, { "epoch": 1.9127969419454678, "grad_norm": 0.09156794100999832, "learning_rate": 6.135415023352203e-06, "loss": 0.5516767501831055, "step": 10352 }, { "epoch": 1.9129817186543636, "grad_norm": 0.07638484984636307, "learning_rate": 6.133575176298108e-06, "loss": 0.4340505003929138, "step": 10353 }, { "epoch": 1.9131664953632594, "grad_norm": 0.07331151515245438, "learning_rate": 6.131735483115622e-06, "loss": 0.4559667408466339, "step": 10354 }, { "epoch": 1.9133512720721553, "grad_norm": 0.1106058731675148, "learning_rate": 6.129895943877967e-06, "loss": 0.7445495128631592, "step": 10355 }, { "epoch": 1.913536048781051, "grad_norm": 0.08542877435684204, "learning_rate": 6.128056558658342e-06, "loss": 0.47054705023765564, "step": 10356 }, { "epoch": 1.9137208254899472, "grad_norm": 0.07826549559831619, "learning_rate": 6.126217327529955e-06, "loss": 0.51534104347229, "step": 10357 }, { "epoch": 1.913905602198843, "grad_norm": 0.06726085394620895, "learning_rate": 6.124378250565996e-06, "loss": 0.4334796667098999, "step": 10358 }, { "epoch": 1.9140903789077388, "grad_norm": 0.07645734399557114, "learning_rate": 6.122539327839657e-06, "loss": 0.48238635063171387, "step": 10359 }, { "epoch": 1.9142751556166346, "grad_norm": 0.07240963727235794, "learning_rate": 6.120700559424124e-06, "loss": 0.4427392780780792, "step": 10360 }, { "epoch": 1.9144599323255305, "grad_norm": 0.07713354378938675, "learning_rate": 6.1188619453925704e-06, "loss": 0.38899877667427063, "step": 10361 }, { "epoch": 1.9146447090344263, "grad_norm": 0.0955655425786972, "learning_rate": 6.117023485818169e-06, "loss": 0.581228494644165, "step": 10362 }, { "epoch": 1.9148294857433221, "grad_norm": 0.08136298507452011, "learning_rate": 6.115185180774086e-06, "loss": 0.5755365490913391, "step": 10363 }, { "epoch": 1.915014262452218, "grad_norm": 0.09747554361820221, "learning_rate": 6.113347030333476e-06, "loss": 0.5765631198883057, "step": 10364 }, { "epoch": 1.9151990391611138, "grad_norm": 0.06636013835668564, "learning_rate": 6.111509034569496e-06, "loss": 0.4715322256088257, "step": 10365 }, { "epoch": 1.9153838158700096, "grad_norm": 0.10040026158094406, "learning_rate": 6.109671193555292e-06, "loss": 0.5345032811164856, "step": 10366 }, { "epoch": 1.9155685925789054, "grad_norm": 0.0752132385969162, "learning_rate": 6.107833507364001e-06, "loss": 0.50083988904953, "step": 10367 }, { "epoch": 1.9157533692878013, "grad_norm": 0.08617395907640457, "learning_rate": 6.105995976068762e-06, "loss": 0.5246359705924988, "step": 10368 }, { "epoch": 1.915938145996697, "grad_norm": 0.07722268253564835, "learning_rate": 6.104158599742701e-06, "loss": 0.5290027260780334, "step": 10369 }, { "epoch": 1.916122922705593, "grad_norm": 0.07662174850702286, "learning_rate": 6.102321378458935e-06, "loss": 0.4751085937023163, "step": 10370 }, { "epoch": 1.9163076994144888, "grad_norm": 0.09481582790613174, "learning_rate": 6.100484312290584e-06, "loss": 0.5759310722351074, "step": 10371 }, { "epoch": 1.9164924761233846, "grad_norm": 0.06988898664712906, "learning_rate": 6.098647401310764e-06, "loss": 0.40731358528137207, "step": 10372 }, { "epoch": 1.9166772528322804, "grad_norm": 0.0789935514330864, "learning_rate": 6.096810645592566e-06, "loss": 0.45298251509666443, "step": 10373 }, { "epoch": 1.9168620295411762, "grad_norm": 0.08178699761629105, "learning_rate": 6.094974045209094e-06, "loss": 0.6943897604942322, "step": 10374 }, { "epoch": 1.917046806250072, "grad_norm": 0.05865257978439331, "learning_rate": 6.093137600233438e-06, "loss": 0.43970754742622375, "step": 10375 }, { "epoch": 1.917231582958968, "grad_norm": 0.07162103056907654, "learning_rate": 6.091301310738682e-06, "loss": 0.3854061961174011, "step": 10376 }, { "epoch": 1.9174163596678637, "grad_norm": 0.07564530521631241, "learning_rate": 6.0894651767979065e-06, "loss": 0.4976777732372284, "step": 10377 }, { "epoch": 1.9176011363767596, "grad_norm": 0.06435713917016983, "learning_rate": 6.0876291984841795e-06, "loss": 0.3928331434726715, "step": 10378 }, { "epoch": 1.9177859130856556, "grad_norm": 0.07462138682603836, "learning_rate": 6.085793375870571e-06, "loss": 0.4585098922252655, "step": 10379 }, { "epoch": 1.9179706897945514, "grad_norm": 0.08818219602108002, "learning_rate": 6.083957709030143e-06, "loss": 0.5107340812683105, "step": 10380 }, { "epoch": 1.9181554665034473, "grad_norm": 0.08969642221927643, "learning_rate": 6.082122198035944e-06, "loss": 0.39215609431266785, "step": 10381 }, { "epoch": 1.918340243212343, "grad_norm": 0.0698242038488388, "learning_rate": 6.080286842961023e-06, "loss": 0.5694100260734558, "step": 10382 }, { "epoch": 1.918525019921239, "grad_norm": 0.0948592871427536, "learning_rate": 6.078451643878424e-06, "loss": 0.5535092949867249, "step": 10383 }, { "epoch": 1.9187097966301347, "grad_norm": 0.08598551899194717, "learning_rate": 6.076616600861181e-06, "loss": 0.4676663875579834, "step": 10384 }, { "epoch": 1.9188945733390306, "grad_norm": 0.09912725538015366, "learning_rate": 6.074781713982322e-06, "loss": 0.6111690402030945, "step": 10385 }, { "epoch": 1.9190793500479266, "grad_norm": 0.06452207267284393, "learning_rate": 6.0729469833148716e-06, "loss": 0.3722819685935974, "step": 10386 }, { "epoch": 1.9192641267568225, "grad_norm": 0.06180416792631149, "learning_rate": 6.071112408931843e-06, "loss": 0.3394249975681305, "step": 10387 }, { "epoch": 1.9194489034657183, "grad_norm": 0.09019298851490021, "learning_rate": 6.0692779909062495e-06, "loss": 0.6170430183410645, "step": 10388 }, { "epoch": 1.919633680174614, "grad_norm": 0.08395922929048538, "learning_rate": 6.067443729311098e-06, "loss": 0.5684940218925476, "step": 10389 }, { "epoch": 1.91981845688351, "grad_norm": 0.08177103102207184, "learning_rate": 6.065609624219375e-06, "loss": 0.5323196053504944, "step": 10390 }, { "epoch": 1.9200032335924058, "grad_norm": 0.08991898596286774, "learning_rate": 6.063775675704088e-06, "loss": 0.6166906356811523, "step": 10391 }, { "epoch": 1.9201880103013016, "grad_norm": 0.07399672269821167, "learning_rate": 6.061941883838209e-06, "loss": 0.5431609749794006, "step": 10392 }, { "epoch": 1.9203727870101974, "grad_norm": 0.08427660912275314, "learning_rate": 6.060108248694723e-06, "loss": 0.441610723733902, "step": 10393 }, { "epoch": 1.9205575637190933, "grad_norm": 0.09697717428207397, "learning_rate": 6.058274770346604e-06, "loss": 0.5912427306175232, "step": 10394 }, { "epoch": 1.920742340427989, "grad_norm": 0.07520875334739685, "learning_rate": 6.056441448866817e-06, "loss": 0.5347209572792053, "step": 10395 }, { "epoch": 1.920927117136885, "grad_norm": 0.07598564773797989, "learning_rate": 6.0546082843283206e-06, "loss": 0.5276317000389099, "step": 10396 }, { "epoch": 1.9211118938457807, "grad_norm": 0.07036890089511871, "learning_rate": 6.052775276804073e-06, "loss": 0.35731253027915955, "step": 10397 }, { "epoch": 1.9212966705546766, "grad_norm": 0.08115974813699722, "learning_rate": 6.050942426367017e-06, "loss": 0.5783150792121887, "step": 10398 }, { "epoch": 1.9214814472635724, "grad_norm": 0.07887168973684311, "learning_rate": 6.0491097330901e-06, "loss": 0.5009549856185913, "step": 10399 }, { "epoch": 1.9216662239724682, "grad_norm": 0.08980367332696915, "learning_rate": 6.0472771970462555e-06, "loss": 0.5049043297767639, "step": 10400 }, { "epoch": 1.921851000681364, "grad_norm": 0.08237417787313461, "learning_rate": 6.04544481830841e-06, "loss": 0.5043971538543701, "step": 10401 }, { "epoch": 1.9220357773902599, "grad_norm": 0.07935957610607147, "learning_rate": 6.043612596949489e-06, "loss": 0.4859786033630371, "step": 10402 }, { "epoch": 1.9222205540991557, "grad_norm": 0.07649687677621841, "learning_rate": 6.041780533042409e-06, "loss": 0.5247918963432312, "step": 10403 }, { "epoch": 1.9224053308080515, "grad_norm": 0.08480358868837357, "learning_rate": 6.03994862666008e-06, "loss": 0.5415157079696655, "step": 10404 }, { "epoch": 1.9225901075169474, "grad_norm": 0.06939677149057388, "learning_rate": 6.038116877875409e-06, "loss": 0.4288090765476227, "step": 10405 }, { "epoch": 1.9227748842258432, "grad_norm": 0.06798798590898514, "learning_rate": 6.0362852867612864e-06, "loss": 0.35469070076942444, "step": 10406 }, { "epoch": 1.922959660934739, "grad_norm": 0.07632050663232803, "learning_rate": 6.034453853390609e-06, "loss": 0.40644606947898865, "step": 10407 }, { "epoch": 1.923144437643635, "grad_norm": 0.07538872212171555, "learning_rate": 6.032622577836268e-06, "loss": 0.4695570468902588, "step": 10408 }, { "epoch": 1.923329214352531, "grad_norm": 0.08649013936519623, "learning_rate": 6.03079146017113e-06, "loss": 0.5374628901481628, "step": 10409 }, { "epoch": 1.9235139910614267, "grad_norm": 0.05631586164236069, "learning_rate": 6.028960500468073e-06, "loss": 0.33060532808303833, "step": 10410 }, { "epoch": 1.9236987677703226, "grad_norm": 0.0786207765340805, "learning_rate": 6.0271296987999695e-06, "loss": 0.5497831702232361, "step": 10411 }, { "epoch": 1.9238835444792184, "grad_norm": 0.09200528264045715, "learning_rate": 6.025299055239671e-06, "loss": 0.6450908780097961, "step": 10412 }, { "epoch": 1.9240683211881142, "grad_norm": 0.07723037153482437, "learning_rate": 6.023468569860034e-06, "loss": 0.49986669421195984, "step": 10413 }, { "epoch": 1.92425309789701, "grad_norm": 0.08802322298288345, "learning_rate": 6.02163824273391e-06, "loss": 0.4276263415813446, "step": 10414 }, { "epoch": 1.9244378746059059, "grad_norm": 0.08626127243041992, "learning_rate": 6.0198080739341345e-06, "loss": 0.4679073095321655, "step": 10415 }, { "epoch": 1.924622651314802, "grad_norm": 0.09432826936244965, "learning_rate": 6.0179780635335464e-06, "loss": 0.5690611004829407, "step": 10416 }, { "epoch": 1.9248074280236978, "grad_norm": 0.07629045099020004, "learning_rate": 6.016148211604974e-06, "loss": 0.5528715252876282, "step": 10417 }, { "epoch": 1.9249922047325936, "grad_norm": 0.06245577335357666, "learning_rate": 6.014318518221237e-06, "loss": 0.4268755316734314, "step": 10418 }, { "epoch": 1.9251769814414894, "grad_norm": 0.10878819972276688, "learning_rate": 6.012488983455154e-06, "loss": 0.5934674739837646, "step": 10419 }, { "epoch": 1.9253617581503852, "grad_norm": 0.07463783025741577, "learning_rate": 6.0106596073795356e-06, "loss": 0.41747212409973145, "step": 10420 }, { "epoch": 1.925546534859281, "grad_norm": 0.09554737061262131, "learning_rate": 6.008830390067182e-06, "loss": 0.503079891204834, "step": 10421 }, { "epoch": 1.925731311568177, "grad_norm": 0.06815576553344727, "learning_rate": 6.007001331590894e-06, "loss": 0.4974030554294586, "step": 10422 }, { "epoch": 1.9259160882770727, "grad_norm": 0.061820026487112045, "learning_rate": 6.005172432023458e-06, "loss": 0.31762582063674927, "step": 10423 }, { "epoch": 1.9261008649859686, "grad_norm": 0.0639713779091835, "learning_rate": 6.003343691437662e-06, "loss": 0.3796601891517639, "step": 10424 }, { "epoch": 1.9262856416948644, "grad_norm": 0.06414221227169037, "learning_rate": 6.001515109906286e-06, "loss": 0.32676807045936584, "step": 10425 }, { "epoch": 1.9264704184037602, "grad_norm": 0.08235526829957962, "learning_rate": 5.999686687502096e-06, "loss": 0.4994039237499237, "step": 10426 }, { "epoch": 1.926655195112656, "grad_norm": 0.0586988627910614, "learning_rate": 5.997858424297859e-06, "loss": 0.38159024715423584, "step": 10427 }, { "epoch": 1.9268399718215519, "grad_norm": 0.09036070853471756, "learning_rate": 5.996030320366341e-06, "loss": 0.6151449680328369, "step": 10428 }, { "epoch": 1.9270247485304477, "grad_norm": 0.08748619258403778, "learning_rate": 5.994202375780285e-06, "loss": 0.585174560546875, "step": 10429 }, { "epoch": 1.9272095252393435, "grad_norm": 0.10161363333463669, "learning_rate": 5.9923745906124395e-06, "loss": 0.5904964208602905, "step": 10430 }, { "epoch": 1.9273943019482394, "grad_norm": 0.07477103918790817, "learning_rate": 5.990546964935554e-06, "loss": 0.4831435978412628, "step": 10431 }, { "epoch": 1.9275790786571352, "grad_norm": 0.07682488858699799, "learning_rate": 5.9887194988223506e-06, "loss": 0.4539066255092621, "step": 10432 }, { "epoch": 1.927763855366031, "grad_norm": 0.0927143543958664, "learning_rate": 5.9868921923455605e-06, "loss": 0.4629073441028595, "step": 10433 }, { "epoch": 1.9279486320749268, "grad_norm": 0.0746363177895546, "learning_rate": 5.985065045577912e-06, "loss": 0.3715229630470276, "step": 10434 }, { "epoch": 1.9281334087838227, "grad_norm": 0.07532990723848343, "learning_rate": 5.983238058592107e-06, "loss": 0.5250852108001709, "step": 10435 }, { "epoch": 1.9283181854927185, "grad_norm": 0.09127747267484665, "learning_rate": 5.981411231460863e-06, "loss": 0.6466876268386841, "step": 10436 }, { "epoch": 1.9285029622016143, "grad_norm": 0.10759834200143814, "learning_rate": 5.9795845642568795e-06, "loss": 0.7927917242050171, "step": 10437 }, { "epoch": 1.9286877389105104, "grad_norm": 0.07551736384630203, "learning_rate": 5.977758057052852e-06, "loss": 0.3971814513206482, "step": 10438 }, { "epoch": 1.9288725156194062, "grad_norm": 0.08489684015512466, "learning_rate": 5.975931709921471e-06, "loss": 0.5904057621955872, "step": 10439 }, { "epoch": 1.929057292328302, "grad_norm": 0.07266104966402054, "learning_rate": 5.974105522935416e-06, "loss": 0.5374613404273987, "step": 10440 }, { "epoch": 1.9292420690371979, "grad_norm": 0.07506699115037918, "learning_rate": 5.972279496167366e-06, "loss": 0.47188636660575867, "step": 10441 }, { "epoch": 1.9294268457460937, "grad_norm": 0.06014501675963402, "learning_rate": 5.970453629689993e-06, "loss": 0.3415280878543854, "step": 10442 }, { "epoch": 1.9296116224549895, "grad_norm": 0.09418486803770065, "learning_rate": 5.968627923575956e-06, "loss": 0.6142709851264954, "step": 10443 }, { "epoch": 1.9297963991638853, "grad_norm": 0.06263016909360886, "learning_rate": 5.966802377897916e-06, "loss": 0.35985496640205383, "step": 10444 }, { "epoch": 1.9299811758727814, "grad_norm": 0.09261230379343033, "learning_rate": 5.964976992728527e-06, "loss": 0.5534440875053406, "step": 10445 }, { "epoch": 1.9301659525816772, "grad_norm": 0.08280779421329498, "learning_rate": 5.963151768140424e-06, "loss": 0.38883811235427856, "step": 10446 }, { "epoch": 1.930350729290573, "grad_norm": 0.07502814382314682, "learning_rate": 5.961326704206252e-06, "loss": 0.46978461742401123, "step": 10447 }, { "epoch": 1.9305355059994689, "grad_norm": 0.08471072465181351, "learning_rate": 5.959501800998646e-06, "loss": 0.6152291893959045, "step": 10448 }, { "epoch": 1.9307202827083647, "grad_norm": 0.06850223988294601, "learning_rate": 5.9576770585902246e-06, "loss": 0.42707252502441406, "step": 10449 }, { "epoch": 1.9309050594172605, "grad_norm": 0.05500276759266853, "learning_rate": 5.955852477053606e-06, "loss": 0.37856414914131165, "step": 10450 }, { "epoch": 1.9310898361261564, "grad_norm": 0.09683556854724884, "learning_rate": 5.954028056461413e-06, "loss": 0.6295728087425232, "step": 10451 }, { "epoch": 1.9312746128350522, "grad_norm": 0.07046552002429962, "learning_rate": 5.95220379688624e-06, "loss": 0.502396821975708, "step": 10452 }, { "epoch": 1.931459389543948, "grad_norm": 0.08507654815912247, "learning_rate": 5.950379698400691e-06, "loss": 0.5647777915000916, "step": 10453 }, { "epoch": 1.9316441662528439, "grad_norm": 0.08452076464891434, "learning_rate": 5.9485557610773655e-06, "loss": 0.5014138221740723, "step": 10454 }, { "epoch": 1.9318289429617397, "grad_norm": 0.07229094952344894, "learning_rate": 5.9467319849888425e-06, "loss": 0.5160284638404846, "step": 10455 }, { "epoch": 1.9320137196706355, "grad_norm": 0.07167952507734299, "learning_rate": 5.944908370207707e-06, "loss": 0.48818081617355347, "step": 10456 }, { "epoch": 1.9321984963795313, "grad_norm": 0.08753294497728348, "learning_rate": 5.943084916806529e-06, "loss": 0.6104143261909485, "step": 10457 }, { "epoch": 1.9323832730884272, "grad_norm": 0.07173937559127808, "learning_rate": 5.94126162485788e-06, "loss": 0.4641045033931732, "step": 10458 }, { "epoch": 1.932568049797323, "grad_norm": 0.06941964477300644, "learning_rate": 5.9394384944343216e-06, "loss": 0.3881646990776062, "step": 10459 }, { "epoch": 1.9327528265062188, "grad_norm": 0.08919605612754822, "learning_rate": 5.937615525608406e-06, "loss": 0.5695154070854187, "step": 10460 }, { "epoch": 1.9329376032151147, "grad_norm": 0.06986981630325317, "learning_rate": 5.935792718452682e-06, "loss": 0.44560667872428894, "step": 10461 }, { "epoch": 1.9331223799240105, "grad_norm": 0.0688788965344429, "learning_rate": 5.933970073039694e-06, "loss": 0.3941502273082733, "step": 10462 }, { "epoch": 1.9333071566329063, "grad_norm": 0.06806276738643646, "learning_rate": 5.932147589441976e-06, "loss": 0.42018571496009827, "step": 10463 }, { "epoch": 1.9334919333418021, "grad_norm": 0.06949281692504883, "learning_rate": 5.930325267732056e-06, "loss": 0.40737178921699524, "step": 10464 }, { "epoch": 1.933676710050698, "grad_norm": 0.06800030916929245, "learning_rate": 5.928503107982462e-06, "loss": 0.5011507868766785, "step": 10465 }, { "epoch": 1.9338614867595938, "grad_norm": 0.06391538679599762, "learning_rate": 5.9266811102657e-06, "loss": 0.35602039098739624, "step": 10466 }, { "epoch": 1.9340462634684898, "grad_norm": 0.07929027080535889, "learning_rate": 5.924859274654289e-06, "loss": 0.45571425557136536, "step": 10467 }, { "epoch": 1.9342310401773857, "grad_norm": 0.08948726207017899, "learning_rate": 5.923037601220731e-06, "loss": 0.5991467237472534, "step": 10468 }, { "epoch": 1.9344158168862815, "grad_norm": 0.09263361245393753, "learning_rate": 5.92121609003752e-06, "loss": 0.4827702045440674, "step": 10469 }, { "epoch": 1.9346005935951773, "grad_norm": 0.06380041688680649, "learning_rate": 5.919394741177149e-06, "loss": 0.41103777289390564, "step": 10470 }, { "epoch": 1.9347853703040732, "grad_norm": 0.08030567318201065, "learning_rate": 5.9175735547120975e-06, "loss": 0.5373403429985046, "step": 10471 }, { "epoch": 1.934970147012969, "grad_norm": 0.0762866884469986, "learning_rate": 5.915752530714848e-06, "loss": 0.4612323045730591, "step": 10472 }, { "epoch": 1.9351549237218648, "grad_norm": 0.0838063657283783, "learning_rate": 5.9139316692578705e-06, "loss": 0.472083181142807, "step": 10473 }, { "epoch": 1.9353397004307609, "grad_norm": 0.09463243186473846, "learning_rate": 5.912110970413627e-06, "loss": 0.6421471238136292, "step": 10474 }, { "epoch": 1.9355244771396567, "grad_norm": 0.07890670001506805, "learning_rate": 5.910290434254579e-06, "loss": 0.4910679757595062, "step": 10475 }, { "epoch": 1.9357092538485525, "grad_norm": 0.07321932166814804, "learning_rate": 5.908470060853178e-06, "loss": 0.317918062210083, "step": 10476 }, { "epoch": 1.9358940305574484, "grad_norm": 0.08434881269931793, "learning_rate": 5.906649850281865e-06, "loss": 0.5010132789611816, "step": 10477 }, { "epoch": 1.9360788072663442, "grad_norm": 0.06752678006887436, "learning_rate": 5.904829802613081e-06, "loss": 0.379227876663208, "step": 10478 }, { "epoch": 1.93626358397524, "grad_norm": 0.08105649054050446, "learning_rate": 5.903009917919262e-06, "loss": 0.5523325204849243, "step": 10479 }, { "epoch": 1.9364483606841358, "grad_norm": 0.07709749788045883, "learning_rate": 5.9011901962728276e-06, "loss": 0.49589478969573975, "step": 10480 }, { "epoch": 1.9366331373930317, "grad_norm": 0.07420652359724045, "learning_rate": 5.8993706377462e-06, "loss": 0.4581543505191803, "step": 10481 }, { "epoch": 1.9368179141019275, "grad_norm": 0.07852959632873535, "learning_rate": 5.897551242411794e-06, "loss": 0.42142826318740845, "step": 10482 }, { "epoch": 1.9370026908108233, "grad_norm": 0.08900757133960724, "learning_rate": 5.8957320103420124e-06, "loss": 0.5019552707672119, "step": 10483 }, { "epoch": 1.9371874675197192, "grad_norm": 0.07102083414793015, "learning_rate": 5.893912941609255e-06, "loss": 0.42338263988494873, "step": 10484 }, { "epoch": 1.937372244228615, "grad_norm": 0.08349662274122238, "learning_rate": 5.892094036285922e-06, "loss": 0.4304684102535248, "step": 10485 }, { "epoch": 1.9375570209375108, "grad_norm": 0.08405330032110214, "learning_rate": 5.890275294444386e-06, "loss": 0.4575079083442688, "step": 10486 }, { "epoch": 1.9377417976464066, "grad_norm": 0.06834299117326736, "learning_rate": 5.888456716157043e-06, "loss": 0.35891595482826233, "step": 10487 }, { "epoch": 1.9379265743553025, "grad_norm": 0.08467060327529907, "learning_rate": 5.886638301496255e-06, "loss": 0.5952319502830505, "step": 10488 }, { "epoch": 1.9381113510641983, "grad_norm": 0.08719463646411896, "learning_rate": 5.884820050534392e-06, "loss": 0.5042622685432434, "step": 10489 }, { "epoch": 1.9382961277730941, "grad_norm": 0.07109535485506058, "learning_rate": 5.8830019633438215e-06, "loss": 0.4510539472103119, "step": 10490 }, { "epoch": 1.93848090448199, "grad_norm": 0.09614437073469162, "learning_rate": 5.881184039996889e-06, "loss": 0.6957901120185852, "step": 10491 }, { "epoch": 1.9386656811908858, "grad_norm": 0.06927401572465897, "learning_rate": 5.8793662805659455e-06, "loss": 0.4295322597026825, "step": 10492 }, { "epoch": 1.9388504578997816, "grad_norm": 0.09393969178199768, "learning_rate": 5.877548685123334e-06, "loss": 0.5344082117080688, "step": 10493 }, { "epoch": 1.9390352346086774, "grad_norm": 0.09615222364664078, "learning_rate": 5.875731253741386e-06, "loss": 0.550508975982666, "step": 10494 }, { "epoch": 1.9392200113175733, "grad_norm": 0.09041385352611542, "learning_rate": 5.87391398649243e-06, "loss": 0.5991291999816895, "step": 10495 }, { "epoch": 1.9394047880264693, "grad_norm": 0.09496104717254639, "learning_rate": 5.87209688344879e-06, "loss": 0.673936128616333, "step": 10496 }, { "epoch": 1.9395895647353651, "grad_norm": 0.07641255855560303, "learning_rate": 5.870279944682779e-06, "loss": 0.5525445938110352, "step": 10497 }, { "epoch": 1.939774341444261, "grad_norm": 0.09686005860567093, "learning_rate": 5.868463170266705e-06, "loss": 0.47172901034355164, "step": 10498 }, { "epoch": 1.9399591181531568, "grad_norm": 0.06705188751220703, "learning_rate": 5.866646560272873e-06, "loss": 0.4666908085346222, "step": 10499 }, { "epoch": 1.9401438948620526, "grad_norm": 0.0796026661992073, "learning_rate": 5.864830114773574e-06, "loss": 0.4845815896987915, "step": 10500 }, { "epoch": 1.9401438948620526, "eval_loss": 0.5543113946914673, "eval_runtime": 155.6506, "eval_samples_per_second": 117.115, "eval_steps_per_second": 14.642, "step": 10500 }, { "epoch": 1.9403286715709485, "grad_norm": 0.08364847302436829, "learning_rate": 5.8630138338411005e-06, "loss": 0.5841268301010132, "step": 10501 }, { "epoch": 1.9405134482798443, "grad_norm": 0.06963703781366348, "learning_rate": 5.8611977175477355e-06, "loss": 0.37664470076560974, "step": 10502 }, { "epoch": 1.9406982249887401, "grad_norm": 0.08510028570890427, "learning_rate": 5.859381765965748e-06, "loss": 0.5224068760871887, "step": 10503 }, { "epoch": 1.9408830016976362, "grad_norm": 0.06919050216674805, "learning_rate": 5.857565979167419e-06, "loss": 0.49947017431259155, "step": 10504 }, { "epoch": 1.941067778406532, "grad_norm": 0.07207190990447998, "learning_rate": 5.855750357224998e-06, "loss": 0.4688243567943573, "step": 10505 }, { "epoch": 1.9412525551154278, "grad_norm": 0.1118570864200592, "learning_rate": 5.853934900210746e-06, "loss": 0.5335554480552673, "step": 10506 }, { "epoch": 1.9414373318243237, "grad_norm": 0.07921619713306427, "learning_rate": 5.85211960819692e-06, "loss": 0.47763168811798096, "step": 10507 }, { "epoch": 1.9416221085332195, "grad_norm": 0.08585667610168457, "learning_rate": 5.850304481255751e-06, "loss": 0.669448733329773, "step": 10508 }, { "epoch": 1.9418068852421153, "grad_norm": 0.09483261406421661, "learning_rate": 5.8484895194594796e-06, "loss": 0.6821432709693909, "step": 10509 }, { "epoch": 1.9419916619510111, "grad_norm": 0.07414834201335907, "learning_rate": 5.846674722880343e-06, "loss": 0.4566863179206848, "step": 10510 }, { "epoch": 1.942176438659907, "grad_norm": 0.07801329344511032, "learning_rate": 5.8448600915905555e-06, "loss": 0.48371413350105286, "step": 10511 }, { "epoch": 1.9423612153688028, "grad_norm": 0.09932101517915726, "learning_rate": 5.8430456256623345e-06, "loss": 0.5337772965431213, "step": 10512 }, { "epoch": 1.9425459920776986, "grad_norm": 0.08487209677696228, "learning_rate": 5.841231325167896e-06, "loss": 0.5186351537704468, "step": 10513 }, { "epoch": 1.9427307687865945, "grad_norm": 0.07193045318126678, "learning_rate": 5.839417190179437e-06, "loss": 0.4164048433303833, "step": 10514 }, { "epoch": 1.9429155454954903, "grad_norm": 0.07649257779121399, "learning_rate": 5.837603220769157e-06, "loss": 0.5069774389266968, "step": 10515 }, { "epoch": 1.943100322204386, "grad_norm": 0.07070234417915344, "learning_rate": 5.83578941700925e-06, "loss": 0.34997639060020447, "step": 10516 }, { "epoch": 1.943285098913282, "grad_norm": 0.0739535391330719, "learning_rate": 5.833975778971888e-06, "loss": 0.41146987676620483, "step": 10517 }, { "epoch": 1.9434698756221778, "grad_norm": 0.08722543716430664, "learning_rate": 5.832162306729261e-06, "loss": 0.5551208257675171, "step": 10518 }, { "epoch": 1.9436546523310736, "grad_norm": 0.08653347194194794, "learning_rate": 5.830349000353537e-06, "loss": 0.5518236756324768, "step": 10519 }, { "epoch": 1.9438394290399694, "grad_norm": 0.0890008807182312, "learning_rate": 5.828535859916875e-06, "loss": 0.6357223987579346, "step": 10520 }, { "epoch": 1.9440242057488653, "grad_norm": 0.07691646367311478, "learning_rate": 5.8267228854914396e-06, "loss": 0.4500856101512909, "step": 10521 }, { "epoch": 1.944208982457761, "grad_norm": 0.06855422258377075, "learning_rate": 5.824910077149372e-06, "loss": 0.31975024938583374, "step": 10522 }, { "epoch": 1.944393759166657, "grad_norm": 0.10461317747831345, "learning_rate": 5.8230974349628215e-06, "loss": 0.6291202902793884, "step": 10523 }, { "epoch": 1.9445785358755527, "grad_norm": 0.08101405203342438, "learning_rate": 5.821284959003923e-06, "loss": 0.5928642749786377, "step": 10524 }, { "epoch": 1.9447633125844486, "grad_norm": 0.06952399015426636, "learning_rate": 5.819472649344813e-06, "loss": 0.49268776178359985, "step": 10525 }, { "epoch": 1.9449480892933446, "grad_norm": 0.0785820409655571, "learning_rate": 5.81766050605761e-06, "loss": 0.4932481348514557, "step": 10526 }, { "epoch": 1.9451328660022404, "grad_norm": 0.0837739109992981, "learning_rate": 5.815848529214439e-06, "loss": 0.5106244087219238, "step": 10527 }, { "epoch": 1.9453176427111363, "grad_norm": 0.08298195898532867, "learning_rate": 5.814036718887401e-06, "loss": 0.6543368101119995, "step": 10528 }, { "epoch": 1.945502419420032, "grad_norm": 0.0707169771194458, "learning_rate": 5.812225075148607e-06, "loss": 0.46865272521972656, "step": 10529 }, { "epoch": 1.945687196128928, "grad_norm": 0.08626644313335419, "learning_rate": 5.810413598070153e-06, "loss": 0.4652383327484131, "step": 10530 }, { "epoch": 1.9458719728378238, "grad_norm": 0.07402166724205017, "learning_rate": 5.80860228772413e-06, "loss": 0.4375515878200531, "step": 10531 }, { "epoch": 1.9460567495467196, "grad_norm": 0.0773448720574379, "learning_rate": 5.806791144182622e-06, "loss": 0.49111664295196533, "step": 10532 }, { "epoch": 1.9462415262556156, "grad_norm": 0.06117716431617737, "learning_rate": 5.804980167517712e-06, "loss": 0.4226382076740265, "step": 10533 }, { "epoch": 1.9464263029645115, "grad_norm": 0.068551205098629, "learning_rate": 5.803169357801463e-06, "loss": 0.45732957124710083, "step": 10534 }, { "epoch": 1.9466110796734073, "grad_norm": 0.07132992148399353, "learning_rate": 5.801358715105947e-06, "loss": 0.5618545413017273, "step": 10535 }, { "epoch": 1.9467958563823031, "grad_norm": 0.06258437782526016, "learning_rate": 5.799548239503214e-06, "loss": 0.30846071243286133, "step": 10536 }, { "epoch": 1.946980633091199, "grad_norm": 0.08688078820705414, "learning_rate": 5.797737931065316e-06, "loss": 0.6651712656021118, "step": 10537 }, { "epoch": 1.9471654098000948, "grad_norm": 0.08604100346565247, "learning_rate": 5.79592778986431e-06, "loss": 0.5880390405654907, "step": 10538 }, { "epoch": 1.9473501865089906, "grad_norm": 0.08469399809837341, "learning_rate": 5.79411781597222e-06, "loss": 0.544089674949646, "step": 10539 }, { "epoch": 1.9475349632178864, "grad_norm": 0.08143501728773117, "learning_rate": 5.7923080094610825e-06, "loss": 0.4275970160961151, "step": 10540 }, { "epoch": 1.9477197399267823, "grad_norm": 0.06817419826984406, "learning_rate": 5.7904983704029265e-06, "loss": 0.4366365075111389, "step": 10541 }, { "epoch": 1.947904516635678, "grad_norm": 0.06847912818193436, "learning_rate": 5.788688898869761e-06, "loss": 0.3891089856624603, "step": 10542 }, { "epoch": 1.948089293344574, "grad_norm": 0.060372162610292435, "learning_rate": 5.786879594933601e-06, "loss": 0.30533790588378906, "step": 10543 }, { "epoch": 1.9482740700534698, "grad_norm": 0.07052935659885406, "learning_rate": 5.785070458666453e-06, "loss": 0.4420267641544342, "step": 10544 }, { "epoch": 1.9484588467623656, "grad_norm": 0.0873042568564415, "learning_rate": 5.783261490140315e-06, "loss": 0.5908474326133728, "step": 10545 }, { "epoch": 1.9486436234712614, "grad_norm": 0.0672554299235344, "learning_rate": 5.781452689427176e-06, "loss": 0.30557793378829956, "step": 10546 }, { "epoch": 1.9488284001801572, "grad_norm": 0.07209660857915878, "learning_rate": 5.779644056599025e-06, "loss": 0.4309651255607605, "step": 10547 }, { "epoch": 1.949013176889053, "grad_norm": 0.07991938292980194, "learning_rate": 5.777835591727834e-06, "loss": 0.46604594588279724, "step": 10548 }, { "epoch": 1.949197953597949, "grad_norm": 0.08834175020456314, "learning_rate": 5.7760272948855776e-06, "loss": 0.6423034071922302, "step": 10549 }, { "epoch": 1.9493827303068447, "grad_norm": 0.09046275168657303, "learning_rate": 5.774219166144218e-06, "loss": 0.5118681192398071, "step": 10550 }, { "epoch": 1.9495675070157406, "grad_norm": 0.07558931410312653, "learning_rate": 5.772411205575716e-06, "loss": 0.40838319063186646, "step": 10551 }, { "epoch": 1.9497522837246364, "grad_norm": 0.08017290383577347, "learning_rate": 5.770603413252025e-06, "loss": 0.554165244102478, "step": 10552 }, { "epoch": 1.9499370604335322, "grad_norm": 0.08638852834701538, "learning_rate": 5.768795789245083e-06, "loss": 0.5371276140213013, "step": 10553 }, { "epoch": 1.950121837142428, "grad_norm": 0.09567637741565704, "learning_rate": 5.76698833362683e-06, "loss": 0.6154451370239258, "step": 10554 }, { "epoch": 1.950306613851324, "grad_norm": 0.07529415935277939, "learning_rate": 5.7651810464692016e-06, "loss": 0.41964831948280334, "step": 10555 }, { "epoch": 1.95049139056022, "grad_norm": 0.09110301733016968, "learning_rate": 5.7633739278441155e-06, "loss": 0.6057930588722229, "step": 10556 }, { "epoch": 1.9506761672691157, "grad_norm": 0.07765660434961319, "learning_rate": 5.761566977823487e-06, "loss": 0.41644635796546936, "step": 10557 }, { "epoch": 1.9508609439780116, "grad_norm": 0.08114904910326004, "learning_rate": 5.759760196479242e-06, "loss": 0.49219024181365967, "step": 10558 }, { "epoch": 1.9510457206869074, "grad_norm": 0.06297268718481064, "learning_rate": 5.757953583883271e-06, "loss": 0.3770653307437897, "step": 10559 }, { "epoch": 1.9512304973958032, "grad_norm": 0.06261013448238373, "learning_rate": 5.756147140107475e-06, "loss": 0.38064223527908325, "step": 10560 }, { "epoch": 1.951415274104699, "grad_norm": 0.08856848627328873, "learning_rate": 5.7543408652237484e-06, "loss": 0.6331533789634705, "step": 10561 }, { "epoch": 1.951600050813595, "grad_norm": 0.07619134336709976, "learning_rate": 5.7525347593039704e-06, "loss": 0.5557543635368347, "step": 10562 }, { "epoch": 1.951784827522491, "grad_norm": 0.06637249141931534, "learning_rate": 5.7507288224200195e-06, "loss": 0.39819884300231934, "step": 10563 }, { "epoch": 1.9519696042313868, "grad_norm": 0.09656571596860886, "learning_rate": 5.748923054643767e-06, "loss": 0.7308982014656067, "step": 10564 }, { "epoch": 1.9521543809402826, "grad_norm": 0.10094495117664337, "learning_rate": 5.7471174560470775e-06, "loss": 0.5335283875465393, "step": 10565 }, { "epoch": 1.9523391576491784, "grad_norm": 0.06061745807528496, "learning_rate": 5.745312026701808e-06, "loss": 0.388337105512619, "step": 10566 }, { "epoch": 1.9525239343580743, "grad_norm": 0.07064970582723618, "learning_rate": 5.743506766679812e-06, "loss": 0.4752836227416992, "step": 10567 }, { "epoch": 1.95270871106697, "grad_norm": 0.07706630975008011, "learning_rate": 5.741701676052926e-06, "loss": 0.45462024211883545, "step": 10568 }, { "epoch": 1.952893487775866, "grad_norm": 0.0834769457578659, "learning_rate": 5.739896754892995e-06, "loss": 0.46920305490493774, "step": 10569 }, { "epoch": 1.9530782644847617, "grad_norm": 0.08264000713825226, "learning_rate": 5.738092003271837e-06, "loss": 0.5234748125076294, "step": 10570 }, { "epoch": 1.9532630411936576, "grad_norm": 0.1118183583021164, "learning_rate": 5.736287421261287e-06, "loss": 0.8297079205513, "step": 10571 }, { "epoch": 1.9534478179025534, "grad_norm": 0.07726228982210159, "learning_rate": 5.734483008933163e-06, "loss": 0.5071477890014648, "step": 10572 }, { "epoch": 1.9536325946114492, "grad_norm": 0.05474299192428589, "learning_rate": 5.732678766359265e-06, "loss": 0.2890155613422394, "step": 10573 }, { "epoch": 1.953817371320345, "grad_norm": 0.0789993479847908, "learning_rate": 5.730874693611402e-06, "loss": 0.4219309687614441, "step": 10574 }, { "epoch": 1.9540021480292409, "grad_norm": 0.09682317823171616, "learning_rate": 5.729070790761374e-06, "loss": 0.4167204797267914, "step": 10575 }, { "epoch": 1.9541869247381367, "grad_norm": 0.06620560586452484, "learning_rate": 5.727267057880963e-06, "loss": 0.36565524339675903, "step": 10576 }, { "epoch": 1.9543717014470325, "grad_norm": 0.06693736463785172, "learning_rate": 5.72546349504195e-06, "loss": 0.30560731887817383, "step": 10577 }, { "epoch": 1.9545564781559284, "grad_norm": 0.06256041675806046, "learning_rate": 5.723660102316126e-06, "loss": 0.354993999004364, "step": 10578 }, { "epoch": 1.9547412548648242, "grad_norm": 0.07756361365318298, "learning_rate": 5.7218568797752445e-06, "loss": 0.587867796421051, "step": 10579 }, { "epoch": 1.95492603157372, "grad_norm": 0.09032636880874634, "learning_rate": 5.7200538274910775e-06, "loss": 0.4667337238788605, "step": 10580 }, { "epoch": 1.9551108082826159, "grad_norm": 0.08582337945699692, "learning_rate": 5.718250945535382e-06, "loss": 0.4862954616546631, "step": 10581 }, { "epoch": 1.9552955849915117, "grad_norm": 0.07556741684675217, "learning_rate": 5.716448233979897e-06, "loss": 0.4102937877178192, "step": 10582 }, { "epoch": 1.9554803617004075, "grad_norm": 0.09605452418327332, "learning_rate": 5.714645692896372e-06, "loss": 0.5979748964309692, "step": 10583 }, { "epoch": 1.9556651384093036, "grad_norm": 0.068499855697155, "learning_rate": 5.712843322356541e-06, "loss": 0.384988933801651, "step": 10584 }, { "epoch": 1.9558499151181994, "grad_norm": 0.07488887757062912, "learning_rate": 5.711041122432132e-06, "loss": 0.4321574568748474, "step": 10585 }, { "epoch": 1.9560346918270952, "grad_norm": 0.08099797368049622, "learning_rate": 5.709239093194872e-06, "loss": 0.5244421362876892, "step": 10586 }, { "epoch": 1.956219468535991, "grad_norm": 0.07003786414861679, "learning_rate": 5.7074372347164695e-06, "loss": 0.3788778781890869, "step": 10587 }, { "epoch": 1.9564042452448869, "grad_norm": 0.0769960880279541, "learning_rate": 5.7056355470686354e-06, "loss": 0.4467634856700897, "step": 10588 }, { "epoch": 1.9565890219537827, "grad_norm": 0.08135896921157837, "learning_rate": 5.703834030323074e-06, "loss": 0.46010300517082214, "step": 10589 }, { "epoch": 1.9567737986626785, "grad_norm": 0.08489905297756195, "learning_rate": 5.7020326845514695e-06, "loss": 0.5769398212432861, "step": 10590 }, { "epoch": 1.9569585753715744, "grad_norm": 0.0838000625371933, "learning_rate": 5.7002315098255225e-06, "loss": 0.5246551632881165, "step": 10591 }, { "epoch": 1.9571433520804704, "grad_norm": 0.08578898012638092, "learning_rate": 5.698430506216912e-06, "loss": 0.4350026249885559, "step": 10592 }, { "epoch": 1.9573281287893662, "grad_norm": 0.0876152515411377, "learning_rate": 5.696629673797305e-06, "loss": 0.6735652089118958, "step": 10593 }, { "epoch": 1.957512905498262, "grad_norm": 0.0778566524386406, "learning_rate": 5.694829012638374e-06, "loss": 0.49129819869995117, "step": 10594 }, { "epoch": 1.957697682207158, "grad_norm": 0.07125280797481537, "learning_rate": 5.693028522811783e-06, "loss": 0.38843223452568054, "step": 10595 }, { "epoch": 1.9578824589160537, "grad_norm": 0.08513450622558594, "learning_rate": 5.691228204389179e-06, "loss": 0.44833922386169434, "step": 10596 }, { "epoch": 1.9580672356249496, "grad_norm": 0.10721521079540253, "learning_rate": 5.689428057442208e-06, "loss": 0.6927924156188965, "step": 10597 }, { "epoch": 1.9582520123338454, "grad_norm": 0.07172559946775436, "learning_rate": 5.687628082042522e-06, "loss": 0.42140018939971924, "step": 10598 }, { "epoch": 1.9584367890427412, "grad_norm": 0.04447796568274498, "learning_rate": 5.685828278261743e-06, "loss": 0.27840760350227356, "step": 10599 }, { "epoch": 1.958621565751637, "grad_norm": 0.06525732576847076, "learning_rate": 5.684028646171505e-06, "loss": 0.3571588695049286, "step": 10600 }, { "epoch": 1.9588063424605329, "grad_norm": 0.06040734425187111, "learning_rate": 5.682229185843418e-06, "loss": 0.3269086182117462, "step": 10601 }, { "epoch": 1.9589911191694287, "grad_norm": 0.08246826380491257, "learning_rate": 5.680429897349102e-06, "loss": 0.48208680748939514, "step": 10602 }, { "epoch": 1.9591758958783245, "grad_norm": 0.10077651590108871, "learning_rate": 5.6786307807601625e-06, "loss": 0.7280152440071106, "step": 10603 }, { "epoch": 1.9593606725872204, "grad_norm": 0.07238568365573883, "learning_rate": 5.676831836148198e-06, "loss": 0.42356643080711365, "step": 10604 }, { "epoch": 1.9595454492961162, "grad_norm": 0.07383681833744049, "learning_rate": 5.675033063584801e-06, "loss": 0.49863195419311523, "step": 10605 }, { "epoch": 1.959730226005012, "grad_norm": 0.07692846655845642, "learning_rate": 5.6732344631415616e-06, "loss": 0.4908929765224457, "step": 10606 }, { "epoch": 1.9599150027139078, "grad_norm": 0.08402621746063232, "learning_rate": 5.6714360348900475e-06, "loss": 0.4820367693901062, "step": 10607 }, { "epoch": 1.9600997794228037, "grad_norm": 0.05579538643360138, "learning_rate": 5.66963777890184e-06, "loss": 0.29534614086151123, "step": 10608 }, { "epoch": 1.9602845561316995, "grad_norm": 0.07454511523246765, "learning_rate": 5.667839695248498e-06, "loss": 0.36852237582206726, "step": 10609 }, { "epoch": 1.9604693328405953, "grad_norm": 0.07275497168302536, "learning_rate": 5.666041784001584e-06, "loss": 0.49770715832710266, "step": 10610 }, { "epoch": 1.9606541095494912, "grad_norm": 0.07775789499282837, "learning_rate": 5.664244045232647e-06, "loss": 0.49964237213134766, "step": 10611 }, { "epoch": 1.960838886258387, "grad_norm": 0.0660817101597786, "learning_rate": 5.662446479013238e-06, "loss": 0.4232540726661682, "step": 10612 }, { "epoch": 1.9610236629672828, "grad_norm": 0.07350712269544601, "learning_rate": 5.660649085414884e-06, "loss": 0.41187983751296997, "step": 10613 }, { "epoch": 1.9612084396761789, "grad_norm": 0.06443696469068527, "learning_rate": 5.658851864509119e-06, "loss": 0.3623831272125244, "step": 10614 }, { "epoch": 1.9613932163850747, "grad_norm": 0.07406435161828995, "learning_rate": 5.657054816367473e-06, "loss": 0.5087496638298035, "step": 10615 }, { "epoch": 1.9615779930939705, "grad_norm": 0.08812040090560913, "learning_rate": 5.655257941061454e-06, "loss": 0.6455977559089661, "step": 10616 }, { "epoch": 1.9617627698028663, "grad_norm": 0.07852617651224136, "learning_rate": 5.653461238662577e-06, "loss": 0.5253956913948059, "step": 10617 }, { "epoch": 1.9619475465117622, "grad_norm": 0.08296500146389008, "learning_rate": 5.6516647092423414e-06, "loss": 0.49200141429901123, "step": 10618 }, { "epoch": 1.962132323220658, "grad_norm": 0.07371774315834045, "learning_rate": 5.6498683528722486e-06, "loss": 0.5164257287979126, "step": 10619 }, { "epoch": 1.9623170999295538, "grad_norm": 0.07330730557441711, "learning_rate": 5.6480721696237884e-06, "loss": 0.3856649696826935, "step": 10620 }, { "epoch": 1.9625018766384499, "grad_norm": 0.08983331173658371, "learning_rate": 5.646276159568437e-06, "loss": 0.5152300000190735, "step": 10621 }, { "epoch": 1.9626866533473457, "grad_norm": 0.07849666476249695, "learning_rate": 5.644480322777673e-06, "loss": 0.5518277883529663, "step": 10622 }, { "epoch": 1.9628714300562415, "grad_norm": 0.07726949453353882, "learning_rate": 5.642684659322966e-06, "loss": 0.4965454936027527, "step": 10623 }, { "epoch": 1.9630562067651374, "grad_norm": 0.10425007343292236, "learning_rate": 5.640889169275776e-06, "loss": 0.647617757320404, "step": 10624 }, { "epoch": 1.9632409834740332, "grad_norm": 0.08119388669729233, "learning_rate": 5.63909385270756e-06, "loss": 0.5539153814315796, "step": 10625 }, { "epoch": 1.963425760182929, "grad_norm": 0.07241155952215195, "learning_rate": 5.63729870968977e-06, "loss": 0.4532361924648285, "step": 10626 }, { "epoch": 1.9636105368918249, "grad_norm": 0.08779602497816086, "learning_rate": 5.6355037402938375e-06, "loss": 0.5239005088806152, "step": 10627 }, { "epoch": 1.9637953136007207, "grad_norm": 0.07113391160964966, "learning_rate": 5.6337089445912e-06, "loss": 0.4499783217906952, "step": 10628 }, { "epoch": 1.9639800903096165, "grad_norm": 0.09953673183917999, "learning_rate": 5.631914322653289e-06, "loss": 0.6576701402664185, "step": 10629 }, { "epoch": 1.9641648670185123, "grad_norm": 0.07178323715925217, "learning_rate": 5.6301198745515205e-06, "loss": 0.487251877784729, "step": 10630 }, { "epoch": 1.9643496437274082, "grad_norm": 0.0825979933142662, "learning_rate": 5.6283256003573095e-06, "loss": 0.5146287083625793, "step": 10631 }, { "epoch": 1.964534420436304, "grad_norm": 0.07387817651033401, "learning_rate": 5.626531500142065e-06, "loss": 0.4900865852832794, "step": 10632 }, { "epoch": 1.9647191971451998, "grad_norm": 0.07697905600070953, "learning_rate": 5.624737573977182e-06, "loss": 0.4235023260116577, "step": 10633 }, { "epoch": 1.9649039738540957, "grad_norm": 0.09078847616910934, "learning_rate": 5.622943821934058e-06, "loss": 0.47874563932418823, "step": 10634 }, { "epoch": 1.9650887505629915, "grad_norm": 0.07096932083368301, "learning_rate": 5.621150244084072e-06, "loss": 0.33702322840690613, "step": 10635 }, { "epoch": 1.9652735272718873, "grad_norm": 0.07716767489910126, "learning_rate": 5.619356840498607e-06, "loss": 0.45271533727645874, "step": 10636 }, { "epoch": 1.9654583039807831, "grad_norm": 0.08461139351129532, "learning_rate": 5.617563611249034e-06, "loss": 0.6253509521484375, "step": 10637 }, { "epoch": 1.965643080689679, "grad_norm": 0.08762659132480621, "learning_rate": 5.615770556406719e-06, "loss": 0.7164840698242188, "step": 10638 }, { "epoch": 1.9658278573985748, "grad_norm": 0.09939246624708176, "learning_rate": 5.613977676043019e-06, "loss": 0.6778596639633179, "step": 10639 }, { "epoch": 1.9660126341074706, "grad_norm": 0.0584709607064724, "learning_rate": 5.612184970229288e-06, "loss": 0.3437689542770386, "step": 10640 }, { "epoch": 1.9661974108163665, "grad_norm": 0.08932600915431976, "learning_rate": 5.610392439036866e-06, "loss": 0.4983118772506714, "step": 10641 }, { "epoch": 1.9663821875252623, "grad_norm": 0.06830000877380371, "learning_rate": 5.60860008253709e-06, "loss": 0.3114332854747772, "step": 10642 }, { "epoch": 1.9665669642341583, "grad_norm": 0.08273927122354507, "learning_rate": 5.606807900801292e-06, "loss": 0.4588139057159424, "step": 10643 }, { "epoch": 1.9667517409430542, "grad_norm": 0.08656501770019531, "learning_rate": 5.605015893900796e-06, "loss": 0.4701785147190094, "step": 10644 }, { "epoch": 1.96693651765195, "grad_norm": 0.07206618040800095, "learning_rate": 5.6032240619069156e-06, "loss": 0.45341089367866516, "step": 10645 }, { "epoch": 1.9671212943608458, "grad_norm": 0.08199794590473175, "learning_rate": 5.601432404890967e-06, "loss": 0.44720548391342163, "step": 10646 }, { "epoch": 1.9673060710697416, "grad_norm": 0.10419245809316635, "learning_rate": 5.599640922924243e-06, "loss": 0.6424961090087891, "step": 10647 }, { "epoch": 1.9674908477786375, "grad_norm": 0.09122408926486969, "learning_rate": 5.5978496160780435e-06, "loss": 0.5698374509811401, "step": 10648 }, { "epoch": 1.9676756244875333, "grad_norm": 0.08791826665401459, "learning_rate": 5.5960584844236565e-06, "loss": 0.6349215507507324, "step": 10649 }, { "epoch": 1.9678604011964294, "grad_norm": 0.07481888681650162, "learning_rate": 5.594267528032364e-06, "loss": 0.39072588086128235, "step": 10650 }, { "epoch": 1.9680451779053252, "grad_norm": 0.0983092337846756, "learning_rate": 5.5924767469754435e-06, "loss": 0.4008905291557312, "step": 10651 }, { "epoch": 1.968229954614221, "grad_norm": 0.08656061440706253, "learning_rate": 5.590686141324155e-06, "loss": 0.3958432078361511, "step": 10652 }, { "epoch": 1.9684147313231168, "grad_norm": 0.0860547348856926, "learning_rate": 5.588895711149764e-06, "loss": 0.6702467203140259, "step": 10653 }, { "epoch": 1.9685995080320127, "grad_norm": 0.09186636656522751, "learning_rate": 5.587105456523527e-06, "loss": 0.6488978266716003, "step": 10654 }, { "epoch": 1.9687842847409085, "grad_norm": 0.08431357890367508, "learning_rate": 5.585315377516682e-06, "loss": 0.5484136343002319, "step": 10655 }, { "epoch": 1.9689690614498043, "grad_norm": 0.06792140752077103, "learning_rate": 5.583525474200473e-06, "loss": 0.35278281569480896, "step": 10656 }, { "epoch": 1.9691538381587002, "grad_norm": 0.07110414654016495, "learning_rate": 5.581735746646134e-06, "loss": 0.4158727824687958, "step": 10657 }, { "epoch": 1.969338614867596, "grad_norm": 0.08212693780660629, "learning_rate": 5.579946194924888e-06, "loss": 0.5473571419715881, "step": 10658 }, { "epoch": 1.9695233915764918, "grad_norm": 0.07613690942525864, "learning_rate": 5.5781568191079564e-06, "loss": 0.5071023106575012, "step": 10659 }, { "epoch": 1.9697081682853876, "grad_norm": 0.06780392676591873, "learning_rate": 5.576367619266552e-06, "loss": 0.3396739661693573, "step": 10660 }, { "epoch": 1.9698929449942835, "grad_norm": 0.06602289527654648, "learning_rate": 5.574578595471873e-06, "loss": 0.37511688470840454, "step": 10661 }, { "epoch": 1.9700777217031793, "grad_norm": 0.08096983283758163, "learning_rate": 5.5727897477951196e-06, "loss": 0.48158252239227295, "step": 10662 }, { "epoch": 1.9702624984120751, "grad_norm": 0.10609427094459534, "learning_rate": 5.5710010763074854e-06, "loss": 0.5912395119667053, "step": 10663 }, { "epoch": 1.970447275120971, "grad_norm": 0.07183204591274261, "learning_rate": 5.56921258108015e-06, "loss": 0.435452401638031, "step": 10664 }, { "epoch": 1.9706320518298668, "grad_norm": 0.06339947134256363, "learning_rate": 5.567424262184292e-06, "loss": 0.4167206585407257, "step": 10665 }, { "epoch": 1.9708168285387626, "grad_norm": 0.07174595445394516, "learning_rate": 5.565636119691085e-06, "loss": 0.4783751368522644, "step": 10666 }, { "epoch": 1.9710016052476584, "grad_norm": 0.05023857578635216, "learning_rate": 5.563848153671682e-06, "loss": 0.25515496730804443, "step": 10667 }, { "epoch": 1.9711863819565543, "grad_norm": 0.09054163098335266, "learning_rate": 5.562060364197249e-06, "loss": 0.597802460193634, "step": 10668 }, { "epoch": 1.97137115866545, "grad_norm": 0.09694191068410873, "learning_rate": 5.56027275133892e-06, "loss": 0.4279719889163971, "step": 10669 }, { "epoch": 1.971555935374346, "grad_norm": 0.0726257711648941, "learning_rate": 5.558485315167849e-06, "loss": 0.38296180963516235, "step": 10670 }, { "epoch": 1.9717407120832418, "grad_norm": 0.06664053350687027, "learning_rate": 5.556698055755173e-06, "loss": 0.3466009497642517, "step": 10671 }, { "epoch": 1.9719254887921378, "grad_norm": 0.07835600525140762, "learning_rate": 5.554910973172008e-06, "loss": 0.4621722400188446, "step": 10672 }, { "epoch": 1.9721102655010336, "grad_norm": 0.09859440475702286, "learning_rate": 5.5531240674894796e-06, "loss": 0.5644648671150208, "step": 10673 }, { "epoch": 1.9722950422099295, "grad_norm": 0.08445590734481812, "learning_rate": 5.551337338778703e-06, "loss": 0.45129987597465515, "step": 10674 }, { "epoch": 1.9724798189188253, "grad_norm": 0.10042787343263626, "learning_rate": 5.54955078711078e-06, "loss": 0.5884333848953247, "step": 10675 }, { "epoch": 1.9726645956277211, "grad_norm": 0.06397289037704468, "learning_rate": 5.547764412556811e-06, "loss": 0.39160019159317017, "step": 10676 }, { "epoch": 1.972849372336617, "grad_norm": 0.07438860833644867, "learning_rate": 5.545978215187889e-06, "loss": 0.4273079037666321, "step": 10677 }, { "epoch": 1.9730341490455128, "grad_norm": 0.09187070280313492, "learning_rate": 5.5441921950751e-06, "loss": 0.6627777814865112, "step": 10678 }, { "epoch": 1.9732189257544086, "grad_norm": 0.0796092301607132, "learning_rate": 5.542406352289521e-06, "loss": 0.3833546042442322, "step": 10679 }, { "epoch": 1.9734037024633047, "grad_norm": 0.07409553974866867, "learning_rate": 5.540620686902227e-06, "loss": 0.40941688418388367, "step": 10680 }, { "epoch": 1.9735884791722005, "grad_norm": 0.07712230086326599, "learning_rate": 5.5388351989842745e-06, "loss": 0.49133092164993286, "step": 10681 }, { "epoch": 1.9737732558810963, "grad_norm": 0.08963990211486816, "learning_rate": 5.537049888606724e-06, "loss": 0.4761016368865967, "step": 10682 }, { "epoch": 1.9739580325899921, "grad_norm": 0.07985302060842514, "learning_rate": 5.535264755840624e-06, "loss": 0.5194723606109619, "step": 10683 }, { "epoch": 1.974142809298888, "grad_norm": 0.06895793974399567, "learning_rate": 5.5334798007570205e-06, "loss": 0.3801591396331787, "step": 10684 }, { "epoch": 1.9743275860077838, "grad_norm": 0.07750911265611649, "learning_rate": 5.531695023426949e-06, "loss": 0.48247331380844116, "step": 10685 }, { "epoch": 1.9745123627166796, "grad_norm": 0.07938727736473083, "learning_rate": 5.529910423921432e-06, "loss": 0.55830979347229, "step": 10686 }, { "epoch": 1.9746971394255755, "grad_norm": 0.09892601519823074, "learning_rate": 5.528126002311496e-06, "loss": 0.6771171689033508, "step": 10687 }, { "epoch": 1.9748819161344713, "grad_norm": 0.06546211242675781, "learning_rate": 5.526341758668158e-06, "loss": 0.37425461411476135, "step": 10688 }, { "epoch": 1.975066692843367, "grad_norm": 0.0693388432264328, "learning_rate": 5.524557693062414e-06, "loss": 0.41841715574264526, "step": 10689 }, { "epoch": 1.975251469552263, "grad_norm": 0.08304619044065475, "learning_rate": 5.5227738055652755e-06, "loss": 0.5554640889167786, "step": 10690 }, { "epoch": 1.9754362462611588, "grad_norm": 0.06779686361551285, "learning_rate": 5.520990096247736e-06, "loss": 0.48768579959869385, "step": 10691 }, { "epoch": 1.9756210229700546, "grad_norm": 0.09251510351896286, "learning_rate": 5.519206565180775e-06, "loss": 0.6752281785011292, "step": 10692 }, { "epoch": 1.9758057996789504, "grad_norm": 0.08035050332546234, "learning_rate": 5.517423212435372e-06, "loss": 0.5057024955749512, "step": 10693 }, { "epoch": 1.9759905763878463, "grad_norm": 0.07251998037099838, "learning_rate": 5.515640038082506e-06, "loss": 0.46995460987091064, "step": 10694 }, { "epoch": 1.976175353096742, "grad_norm": 0.06584444642066956, "learning_rate": 5.5138570421931325e-06, "loss": 0.3751562237739563, "step": 10695 }, { "epoch": 1.976360129805638, "grad_norm": 0.09301778674125671, "learning_rate": 5.5120742248382134e-06, "loss": 0.626192033290863, "step": 10696 }, { "epoch": 1.9765449065145337, "grad_norm": 0.07395187765359879, "learning_rate": 5.5102915860887e-06, "loss": 0.5168426632881165, "step": 10697 }, { "epoch": 1.9767296832234296, "grad_norm": 0.08475355803966522, "learning_rate": 5.508509126015535e-06, "loss": 0.5751456022262573, "step": 10698 }, { "epoch": 1.9769144599323254, "grad_norm": 0.08303186297416687, "learning_rate": 5.506726844689658e-06, "loss": 0.5156502723693848, "step": 10699 }, { "epoch": 1.9770992366412212, "grad_norm": 0.06387243419885635, "learning_rate": 5.5049447421819904e-06, "loss": 0.533256471157074, "step": 10700 }, { "epoch": 1.977284013350117, "grad_norm": 0.08207794278860092, "learning_rate": 5.503162818563459e-06, "loss": 0.5266383290290833, "step": 10701 }, { "epoch": 1.977468790059013, "grad_norm": 0.09505506604909897, "learning_rate": 5.50138107390498e-06, "loss": 0.690740168094635, "step": 10702 }, { "epoch": 1.977653566767909, "grad_norm": 0.07784173637628555, "learning_rate": 5.4995995082774585e-06, "loss": 0.513063907623291, "step": 10703 }, { "epoch": 1.9778383434768048, "grad_norm": 0.08312927931547165, "learning_rate": 5.497818121751797e-06, "loss": 0.5379572510719299, "step": 10704 }, { "epoch": 1.9780231201857006, "grad_norm": 0.062268663197755814, "learning_rate": 5.4960369143988935e-06, "loss": 0.38305482268333435, "step": 10705 }, { "epoch": 1.9782078968945964, "grad_norm": 0.0820695236325264, "learning_rate": 5.4942558862896255e-06, "loss": 0.4770805537700653, "step": 10706 }, { "epoch": 1.9783926736034922, "grad_norm": 0.07518643140792847, "learning_rate": 5.492475037494875e-06, "loss": 0.5276569724082947, "step": 10707 }, { "epoch": 1.978577450312388, "grad_norm": 0.07038936764001846, "learning_rate": 5.49069436808552e-06, "loss": 0.48559287190437317, "step": 10708 }, { "epoch": 1.9787622270212841, "grad_norm": 0.08067972213029861, "learning_rate": 5.488913878132416e-06, "loss": 0.5168792605400085, "step": 10709 }, { "epoch": 1.97894700373018, "grad_norm": 0.07291867583990097, "learning_rate": 5.487133567706429e-06, "loss": 0.4612545967102051, "step": 10710 }, { "epoch": 1.9791317804390758, "grad_norm": 0.0964045450091362, "learning_rate": 5.48535343687841e-06, "loss": 0.6943390369415283, "step": 10711 }, { "epoch": 1.9793165571479716, "grad_norm": 0.06853216141462326, "learning_rate": 5.483573485719196e-06, "loss": 0.439113974571228, "step": 10712 }, { "epoch": 1.9795013338568674, "grad_norm": 0.07594563066959381, "learning_rate": 5.481793714299628e-06, "loss": 0.49463391304016113, "step": 10713 }, { "epoch": 1.9796861105657633, "grad_norm": 0.06695324182510376, "learning_rate": 5.480014122690538e-06, "loss": 0.4390440285205841, "step": 10714 }, { "epoch": 1.979870887274659, "grad_norm": 0.07316230237483978, "learning_rate": 5.47823471096274e-06, "loss": 0.510161280632019, "step": 10715 }, { "epoch": 1.980055663983555, "grad_norm": 0.05946960672736168, "learning_rate": 5.476455479187055e-06, "loss": 0.3409574031829834, "step": 10716 }, { "epoch": 1.9802404406924508, "grad_norm": 0.07861226797103882, "learning_rate": 5.474676427434289e-06, "loss": 0.4796425700187683, "step": 10717 }, { "epoch": 1.9804252174013466, "grad_norm": 0.0815582200884819, "learning_rate": 5.472897555775243e-06, "loss": 0.6053028106689453, "step": 10718 }, { "epoch": 1.9806099941102424, "grad_norm": 0.06498534232378006, "learning_rate": 5.471118864280716e-06, "loss": 0.3518756031990051, "step": 10719 }, { "epoch": 1.9807947708191382, "grad_norm": 0.07267723232507706, "learning_rate": 5.469340353021484e-06, "loss": 0.38878706097602844, "step": 10720 }, { "epoch": 1.980979547528034, "grad_norm": 0.07959043234586716, "learning_rate": 5.4675620220683315e-06, "loss": 0.4758075177669525, "step": 10721 }, { "epoch": 1.98116432423693, "grad_norm": 0.08470583707094193, "learning_rate": 5.4657838714920295e-06, "loss": 0.5368279814720154, "step": 10722 }, { "epoch": 1.9813491009458257, "grad_norm": 0.07101590186357498, "learning_rate": 5.464005901363345e-06, "loss": 0.4188085198402405, "step": 10723 }, { "epoch": 1.9815338776547216, "grad_norm": 0.06812476366758347, "learning_rate": 5.462228111753034e-06, "loss": 0.39685487747192383, "step": 10724 }, { "epoch": 1.9817186543636174, "grad_norm": 0.0752476379275322, "learning_rate": 5.460450502731851e-06, "loss": 0.4410916268825531, "step": 10725 }, { "epoch": 1.9819034310725132, "grad_norm": 0.06209181249141693, "learning_rate": 5.4586730743705315e-06, "loss": 0.400260329246521, "step": 10726 }, { "epoch": 1.982088207781409, "grad_norm": 0.07415378093719482, "learning_rate": 5.4568958267398165e-06, "loss": 0.4660295844078064, "step": 10727 }, { "epoch": 1.9822729844903049, "grad_norm": 0.07002268731594086, "learning_rate": 5.455118759910437e-06, "loss": 0.46611306071281433, "step": 10728 }, { "epoch": 1.9824577611992007, "grad_norm": 0.08125322312116623, "learning_rate": 5.453341873953104e-06, "loss": 0.4385994076728821, "step": 10729 }, { "epoch": 1.9826425379080965, "grad_norm": 0.07736670225858688, "learning_rate": 5.451565168938544e-06, "loss": 0.4350145161151886, "step": 10730 }, { "epoch": 1.9828273146169926, "grad_norm": 0.08490642160177231, "learning_rate": 5.449788644937464e-06, "loss": 0.4928675889968872, "step": 10731 }, { "epoch": 1.9830120913258884, "grad_norm": 0.06990993022918701, "learning_rate": 5.448012302020556e-06, "loss": 0.36245250701904297, "step": 10732 }, { "epoch": 1.9831968680347842, "grad_norm": 0.0806194469332695, "learning_rate": 5.44623614025852e-06, "loss": 0.5019152164459229, "step": 10733 }, { "epoch": 1.98338164474368, "grad_norm": 0.09441230446100235, "learning_rate": 5.444460159722037e-06, "loss": 0.6116966009140015, "step": 10734 }, { "epoch": 1.983566421452576, "grad_norm": 0.07513176649808884, "learning_rate": 5.442684360481787e-06, "loss": 0.4708969295024872, "step": 10735 }, { "epoch": 1.9837511981614717, "grad_norm": 0.08305010944604874, "learning_rate": 5.4409087426084395e-06, "loss": 0.5403691530227661, "step": 10736 }, { "epoch": 1.9839359748703675, "grad_norm": 0.06144353374838829, "learning_rate": 5.439133306172661e-06, "loss": 0.38456642627716064, "step": 10737 }, { "epoch": 1.9841207515792636, "grad_norm": 0.06547252833843231, "learning_rate": 5.4373580512451095e-06, "loss": 0.3211238980293274, "step": 10738 }, { "epoch": 1.9843055282881594, "grad_norm": 0.09167303889989853, "learning_rate": 5.435582977896435e-06, "loss": 0.6328478455543518, "step": 10739 }, { "epoch": 1.9844903049970553, "grad_norm": 0.09654346853494644, "learning_rate": 5.433808086197274e-06, "loss": 0.5210638642311096, "step": 10740 }, { "epoch": 1.984675081705951, "grad_norm": 0.08878304809331894, "learning_rate": 5.432033376218267e-06, "loss": 0.5295599699020386, "step": 10741 }, { "epoch": 1.984859858414847, "grad_norm": 0.06716418266296387, "learning_rate": 5.4302588480300385e-06, "loss": 0.4261035621166229, "step": 10742 }, { "epoch": 1.9850446351237427, "grad_norm": 0.07184049487113953, "learning_rate": 5.428484501703212e-06, "loss": 0.49542099237442017, "step": 10743 }, { "epoch": 1.9852294118326386, "grad_norm": 0.07388246059417725, "learning_rate": 5.4267103373083985e-06, "loss": 0.480552077293396, "step": 10744 }, { "epoch": 1.9854141885415344, "grad_norm": 0.08299454301595688, "learning_rate": 5.424936354916212e-06, "loss": 0.43938887119293213, "step": 10745 }, { "epoch": 1.9855989652504302, "grad_norm": 0.09323057532310486, "learning_rate": 5.423162554597239e-06, "loss": 0.5631217360496521, "step": 10746 }, { "epoch": 1.985783741959326, "grad_norm": 0.07826078683137894, "learning_rate": 5.421388936422082e-06, "loss": 0.47334596514701843, "step": 10747 }, { "epoch": 1.9859685186682219, "grad_norm": 0.0843670666217804, "learning_rate": 5.419615500461316e-06, "loss": 0.6462908387184143, "step": 10748 }, { "epoch": 1.9861532953771177, "grad_norm": 0.09763434529304504, "learning_rate": 5.41784224678552e-06, "loss": 0.5311712026596069, "step": 10749 }, { "epoch": 1.9863380720860135, "grad_norm": 0.06457974761724472, "learning_rate": 5.416069175465274e-06, "loss": 0.3622543513774872, "step": 10750 }, { "epoch": 1.9865228487949094, "grad_norm": 0.06540407985448837, "learning_rate": 5.41429628657113e-06, "loss": 0.4064207673072815, "step": 10751 }, { "epoch": 1.9867076255038052, "grad_norm": 0.09722238779067993, "learning_rate": 5.412523580173647e-06, "loss": 0.6162816286087036, "step": 10752 }, { "epoch": 1.986892402212701, "grad_norm": 0.0980430468916893, "learning_rate": 5.410751056343376e-06, "loss": 0.6720651984214783, "step": 10753 }, { "epoch": 1.9870771789215969, "grad_norm": 0.10663004219532013, "learning_rate": 5.4089787151508525e-06, "loss": 0.7572103142738342, "step": 10754 }, { "epoch": 1.9872619556304927, "grad_norm": 0.08628568798303604, "learning_rate": 5.407206556666612e-06, "loss": 0.5272665619850159, "step": 10755 }, { "epoch": 1.9874467323393885, "grad_norm": 0.07148962467908859, "learning_rate": 5.405434580961182e-06, "loss": 0.433988094329834, "step": 10756 }, { "epoch": 1.9876315090482843, "grad_norm": 0.07028964906930923, "learning_rate": 5.403662788105081e-06, "loss": 0.44745177030563354, "step": 10757 }, { "epoch": 1.9878162857571802, "grad_norm": 0.0810186043381691, "learning_rate": 5.401891178168821e-06, "loss": 0.5359044075012207, "step": 10758 }, { "epoch": 1.988001062466076, "grad_norm": 0.09937306493520737, "learning_rate": 5.40011975122291e-06, "loss": 0.7215362191200256, "step": 10759 }, { "epoch": 1.988185839174972, "grad_norm": 0.08598115295171738, "learning_rate": 5.398348507337839e-06, "loss": 0.5261132121086121, "step": 10760 }, { "epoch": 1.9883706158838679, "grad_norm": 0.08206876367330551, "learning_rate": 5.3965774465840985e-06, "loss": 0.4480670094490051, "step": 10761 }, { "epoch": 1.9885553925927637, "grad_norm": 0.0705202966928482, "learning_rate": 5.394806569032174e-06, "loss": 0.3446158170700073, "step": 10762 }, { "epoch": 1.9887401693016595, "grad_norm": 0.08046627044677734, "learning_rate": 5.3930358747525415e-06, "loss": 0.47605058550834656, "step": 10763 }, { "epoch": 1.9889249460105554, "grad_norm": 0.08263932168483734, "learning_rate": 5.39126536381567e-06, "loss": 0.45277732610702515, "step": 10764 }, { "epoch": 1.9891097227194512, "grad_norm": 0.07077358663082123, "learning_rate": 5.389495036292016e-06, "loss": 0.48961758613586426, "step": 10765 }, { "epoch": 1.989294499428347, "grad_norm": 0.06984628736972809, "learning_rate": 5.387724892252034e-06, "loss": 0.3571658432483673, "step": 10766 }, { "epoch": 1.9894792761372428, "grad_norm": 0.0704636201262474, "learning_rate": 5.385954931766175e-06, "loss": 0.43523016571998596, "step": 10767 }, { "epoch": 1.989664052846139, "grad_norm": 0.08782478421926498, "learning_rate": 5.384185154904872e-06, "loss": 0.6328921914100647, "step": 10768 }, { "epoch": 1.9898488295550347, "grad_norm": 0.0787673145532608, "learning_rate": 5.382415561738555e-06, "loss": 0.4771063029766083, "step": 10769 }, { "epoch": 1.9900336062639306, "grad_norm": 0.06567374616861343, "learning_rate": 5.380646152337657e-06, "loss": 0.39580053091049194, "step": 10770 }, { "epoch": 1.9902183829728264, "grad_norm": 0.08275172114372253, "learning_rate": 5.378876926772588e-06, "loss": 0.5587243437767029, "step": 10771 }, { "epoch": 1.9904031596817222, "grad_norm": 0.08253266662359238, "learning_rate": 5.377107885113759e-06, "loss": 0.5211490392684937, "step": 10772 }, { "epoch": 1.990587936390618, "grad_norm": 0.08791507035493851, "learning_rate": 5.375339027431579e-06, "loss": 0.5906206369400024, "step": 10773 }, { "epoch": 1.9907727130995139, "grad_norm": 0.06841687858104706, "learning_rate": 5.373570353796431e-06, "loss": 0.37706512212753296, "step": 10774 }, { "epoch": 1.9909574898084097, "grad_norm": 0.062276240438222885, "learning_rate": 5.371801864278709e-06, "loss": 0.4059755504131317, "step": 10775 }, { "epoch": 1.9911422665173055, "grad_norm": 0.0774695873260498, "learning_rate": 5.370033558948793e-06, "loss": 0.5846366286277771, "step": 10776 }, { "epoch": 1.9913270432262014, "grad_norm": 0.0987345427274704, "learning_rate": 5.368265437877056e-06, "loss": 0.6710880398750305, "step": 10777 }, { "epoch": 1.9915118199350972, "grad_norm": 0.06954395771026611, "learning_rate": 5.366497501133865e-06, "loss": 0.36767855286598206, "step": 10778 }, { "epoch": 1.991696596643993, "grad_norm": 0.08661821484565735, "learning_rate": 5.364729748789579e-06, "loss": 0.6453872323036194, "step": 10779 }, { "epoch": 1.9918813733528888, "grad_norm": 0.07232673466205597, "learning_rate": 5.362962180914545e-06, "loss": 0.37234097719192505, "step": 10780 }, { "epoch": 1.9920661500617847, "grad_norm": 0.06527242809534073, "learning_rate": 5.361194797579108e-06, "loss": 0.45118600130081177, "step": 10781 }, { "epoch": 1.9922509267706805, "grad_norm": 0.09439390897750854, "learning_rate": 5.3594275988536045e-06, "loss": 0.6040094494819641, "step": 10782 }, { "epoch": 1.9924357034795763, "grad_norm": 0.061494261026382446, "learning_rate": 5.357660584808364e-06, "loss": 0.348712295293808, "step": 10783 }, { "epoch": 1.9926204801884722, "grad_norm": 0.07200989127159119, "learning_rate": 5.355893755513714e-06, "loss": 0.38869237899780273, "step": 10784 }, { "epoch": 1.992805256897368, "grad_norm": 0.08273415267467499, "learning_rate": 5.354127111039957e-06, "loss": 0.6209052801132202, "step": 10785 }, { "epoch": 1.9929900336062638, "grad_norm": 0.07028911262750626, "learning_rate": 5.3523606514574066e-06, "loss": 0.47335246205329895, "step": 10786 }, { "epoch": 1.9931748103151596, "grad_norm": 0.0690479502081871, "learning_rate": 5.350594376836366e-06, "loss": 0.4047301411628723, "step": 10787 }, { "epoch": 1.9933595870240555, "grad_norm": 0.0809905007481575, "learning_rate": 5.348828287247119e-06, "loss": 0.5023267269134521, "step": 10788 }, { "epoch": 1.9935443637329513, "grad_norm": 0.07330295443534851, "learning_rate": 5.347062382759951e-06, "loss": 0.4426124691963196, "step": 10789 }, { "epoch": 1.9937291404418473, "grad_norm": 0.07849891483783722, "learning_rate": 5.3452966634451494e-06, "loss": 0.46866852045059204, "step": 10790 }, { "epoch": 1.9939139171507432, "grad_norm": 0.0910225436091423, "learning_rate": 5.343531129372976e-06, "loss": 0.6327340006828308, "step": 10791 }, { "epoch": 1.994098693859639, "grad_norm": 0.09518351405858994, "learning_rate": 5.341765780613695e-06, "loss": 0.5966469645500183, "step": 10792 }, { "epoch": 1.9942834705685348, "grad_norm": 0.08670774847269058, "learning_rate": 5.340000617237564e-06, "loss": 0.6537419557571411, "step": 10793 }, { "epoch": 1.9944682472774307, "grad_norm": 0.07579492032527924, "learning_rate": 5.338235639314827e-06, "loss": 0.5172703862190247, "step": 10794 }, { "epoch": 1.9946530239863265, "grad_norm": 0.06951619684696198, "learning_rate": 5.3364708469157265e-06, "loss": 0.27607613801956177, "step": 10795 }, { "epoch": 1.9948378006952223, "grad_norm": 0.07942194491624832, "learning_rate": 5.334706240110497e-06, "loss": 0.47532564401626587, "step": 10796 }, { "epoch": 1.9950225774041184, "grad_norm": 0.07299544662237167, "learning_rate": 5.3329418189693615e-06, "loss": 0.4442344009876251, "step": 10797 }, { "epoch": 1.9952073541130142, "grad_norm": 0.082549549639225, "learning_rate": 5.3311775835625455e-06, "loss": 0.45624038577079773, "step": 10798 }, { "epoch": 1.99539213082191, "grad_norm": 0.08699746429920197, "learning_rate": 5.329413533960251e-06, "loss": 0.5316396951675415, "step": 10799 }, { "epoch": 1.9955769075308059, "grad_norm": 0.08323982357978821, "learning_rate": 5.327649670232684e-06, "loss": 0.48319756984710693, "step": 10800 }, { "epoch": 1.9957616842397017, "grad_norm": 0.0729801207780838, "learning_rate": 5.325885992450043e-06, "loss": 0.4878920614719391, "step": 10801 }, { "epoch": 1.9959464609485975, "grad_norm": 0.07294317334890366, "learning_rate": 5.324122500682516e-06, "loss": 0.42991456389427185, "step": 10802 }, { "epoch": 1.9961312376574933, "grad_norm": 0.0744498074054718, "learning_rate": 5.322359195000284e-06, "loss": 0.43306389451026917, "step": 10803 }, { "epoch": 1.9963160143663892, "grad_norm": 0.07690717279911041, "learning_rate": 5.320596075473527e-06, "loss": 0.43909937143325806, "step": 10804 }, { "epoch": 1.996500791075285, "grad_norm": 0.10090182721614838, "learning_rate": 5.318833142172402e-06, "loss": 0.6324017643928528, "step": 10805 }, { "epoch": 1.9966855677841808, "grad_norm": 0.07912295311689377, "learning_rate": 5.31707039516707e-06, "loss": 0.48258641362190247, "step": 10806 }, { "epoch": 1.9968703444930767, "grad_norm": 0.0740799680352211, "learning_rate": 5.315307834527692e-06, "loss": 0.4834136664867401, "step": 10807 }, { "epoch": 1.9970551212019725, "grad_norm": 0.0663553848862648, "learning_rate": 5.313545460324401e-06, "loss": 0.4489022195339203, "step": 10808 }, { "epoch": 1.9972398979108683, "grad_norm": 0.07279397547245026, "learning_rate": 5.311783272627333e-06, "loss": 0.40262991189956665, "step": 10809 }, { "epoch": 1.9974246746197641, "grad_norm": 0.07630464434623718, "learning_rate": 5.310021271506634e-06, "loss": 0.44888296723365784, "step": 10810 }, { "epoch": 1.99760945132866, "grad_norm": 0.07023772597312927, "learning_rate": 5.3082594570324094e-06, "loss": 0.4138171970844269, "step": 10811 }, { "epoch": 1.9977942280375558, "grad_norm": 0.0694822445511818, "learning_rate": 5.306497829274785e-06, "loss": 0.38712769746780396, "step": 10812 }, { "epoch": 1.9979790047464516, "grad_norm": 0.06312718242406845, "learning_rate": 5.3047363883038575e-06, "loss": 0.3996821939945221, "step": 10813 }, { "epoch": 1.9981637814553475, "grad_norm": 0.08352584391832352, "learning_rate": 5.302975134189734e-06, "loss": 0.46897953748703003, "step": 10814 }, { "epoch": 1.9983485581642433, "grad_norm": 0.07533421367406845, "learning_rate": 5.3012140670025035e-06, "loss": 0.3818869888782501, "step": 10815 }, { "epoch": 1.9985333348731391, "grad_norm": 0.07463730871677399, "learning_rate": 5.299453186812253e-06, "loss": 0.5138566493988037, "step": 10816 }, { "epoch": 1.998718111582035, "grad_norm": 0.08611175417900085, "learning_rate": 5.2976924936890595e-06, "loss": 0.5639271140098572, "step": 10817 }, { "epoch": 1.9989028882909308, "grad_norm": 0.08599690347909927, "learning_rate": 5.295931987702998e-06, "loss": 0.43813201785087585, "step": 10818 }, { "epoch": 1.9990876649998268, "grad_norm": 0.06455601006746292, "learning_rate": 5.294171668924121e-06, "loss": 0.38193124532699585, "step": 10819 }, { "epoch": 1.9992724417087226, "grad_norm": 0.0643867626786232, "learning_rate": 5.292411537422489e-06, "loss": 0.32054176926612854, "step": 10820 }, { "epoch": 1.9994572184176185, "grad_norm": 0.09307566285133362, "learning_rate": 5.29065159326815e-06, "loss": 0.5503920912742615, "step": 10821 }, { "epoch": 1.9996419951265143, "grad_norm": 0.09054537862539291, "learning_rate": 5.288891836531145e-06, "loss": 0.4443438947200775, "step": 10822 }, { "epoch": 1.9998267718354101, "grad_norm": 0.07253723591566086, "learning_rate": 5.287132267281504e-06, "loss": 0.3412075340747833, "step": 10823 }, { "epoch": 2.0, "grad_norm": 0.10073523968458176, "learning_rate": 5.28537288558926e-06, "loss": 0.5651444792747498, "step": 10824 }, { "epoch": 2.000184776708896, "grad_norm": 0.06520046293735504, "learning_rate": 5.283613691524419e-06, "loss": 0.45347392559051514, "step": 10825 }, { "epoch": 2.0003695534177917, "grad_norm": 0.07755034416913986, "learning_rate": 5.281854685156998e-06, "loss": 0.44497644901275635, "step": 10826 }, { "epoch": 2.0005543301266875, "grad_norm": 0.08444507420063019, "learning_rate": 5.280095866557003e-06, "loss": 0.5389115214347839, "step": 10827 }, { "epoch": 2.0007391068355833, "grad_norm": 0.08076513558626175, "learning_rate": 5.278337235794422e-06, "loss": 0.4192272126674652, "step": 10828 }, { "epoch": 2.000923883544479, "grad_norm": 0.08427664637565613, "learning_rate": 5.2765787929392475e-06, "loss": 0.5673325061798096, "step": 10829 }, { "epoch": 2.001108660253375, "grad_norm": 0.07211994379758835, "learning_rate": 5.2748205380614595e-06, "loss": 0.4486318528652191, "step": 10830 }, { "epoch": 2.001293436962271, "grad_norm": 0.06385567784309387, "learning_rate": 5.273062471231029e-06, "loss": 0.43328577280044556, "step": 10831 }, { "epoch": 2.0014782136711666, "grad_norm": 0.04617303982377052, "learning_rate": 5.27130459251793e-06, "loss": 0.25220465660095215, "step": 10832 }, { "epoch": 2.0016629903800625, "grad_norm": 0.07331185042858124, "learning_rate": 5.269546901992108e-06, "loss": 0.3785974085330963, "step": 10833 }, { "epoch": 2.0018477670889583, "grad_norm": 0.0816309005022049, "learning_rate": 5.267789399723522e-06, "loss": 0.5629788637161255, "step": 10834 }, { "epoch": 2.002032543797854, "grad_norm": 0.07381216436624527, "learning_rate": 5.2660320857821116e-06, "loss": 0.4770171344280243, "step": 10835 }, { "epoch": 2.00221732050675, "grad_norm": 0.0860087126493454, "learning_rate": 5.264274960237812e-06, "loss": 0.6274966597557068, "step": 10836 }, { "epoch": 2.0024020972156458, "grad_norm": 0.06753948330879211, "learning_rate": 5.262518023160554e-06, "loss": 0.35875818133354187, "step": 10837 }, { "epoch": 2.0025868739245416, "grad_norm": 0.07698596268892288, "learning_rate": 5.260761274620261e-06, "loss": 0.49586576223373413, "step": 10838 }, { "epoch": 2.002771650633438, "grad_norm": 0.09860380738973618, "learning_rate": 5.259004714686839e-06, "loss": 0.5277653932571411, "step": 10839 }, { "epoch": 2.0029564273423337, "grad_norm": 0.07372115552425385, "learning_rate": 5.2572483434301944e-06, "loss": 0.4336004853248596, "step": 10840 }, { "epoch": 2.0031412040512295, "grad_norm": 0.06894727051258087, "learning_rate": 5.2554921609202296e-06, "loss": 0.3536815047264099, "step": 10841 }, { "epoch": 2.0033259807601254, "grad_norm": 0.07026606053113937, "learning_rate": 5.253736167226833e-06, "loss": 0.468704491853714, "step": 10842 }, { "epoch": 2.003510757469021, "grad_norm": 0.07699372619390488, "learning_rate": 5.2519803624198865e-06, "loss": 0.448326051235199, "step": 10843 }, { "epoch": 2.003695534177917, "grad_norm": 0.08106248825788498, "learning_rate": 5.250224746569271e-06, "loss": 0.4945138692855835, "step": 10844 }, { "epoch": 2.003880310886813, "grad_norm": 0.08255085349082947, "learning_rate": 5.248469319744848e-06, "loss": 0.5569407939910889, "step": 10845 }, { "epoch": 2.0040650875957087, "grad_norm": 0.06983159482479095, "learning_rate": 5.246714082016483e-06, "loss": 0.3689742088317871, "step": 10846 }, { "epoch": 2.0042498643046045, "grad_norm": 0.0922977477312088, "learning_rate": 5.244959033454022e-06, "loss": 0.49106916785240173, "step": 10847 }, { "epoch": 2.0044346410135003, "grad_norm": 0.05711909383535385, "learning_rate": 5.2432041741273134e-06, "loss": 0.3055451512336731, "step": 10848 }, { "epoch": 2.004619417722396, "grad_norm": 0.07882208377122879, "learning_rate": 5.241449504106202e-06, "loss": 0.4588870704174042, "step": 10849 }, { "epoch": 2.004804194431292, "grad_norm": 0.0810166671872139, "learning_rate": 5.23969502346051e-06, "loss": 0.5236678123474121, "step": 10850 }, { "epoch": 2.004988971140188, "grad_norm": 0.06356259435415268, "learning_rate": 5.237940732260063e-06, "loss": 0.346655011177063, "step": 10851 }, { "epoch": 2.0051737478490836, "grad_norm": 0.09319958090782166, "learning_rate": 5.23618663057468e-06, "loss": 0.46527671813964844, "step": 10852 }, { "epoch": 2.0053585245579795, "grad_norm": 0.07804631441831589, "learning_rate": 5.23443271847416e-06, "loss": 0.37992700934410095, "step": 10853 }, { "epoch": 2.0055433012668753, "grad_norm": 0.08168787509202957, "learning_rate": 5.232678996028311e-06, "loss": 0.46410107612609863, "step": 10854 }, { "epoch": 2.005728077975771, "grad_norm": 0.06901519745588303, "learning_rate": 5.230925463306921e-06, "loss": 0.3564493656158447, "step": 10855 }, { "epoch": 2.005912854684667, "grad_norm": 0.07116210460662842, "learning_rate": 5.229172120379778e-06, "loss": 0.3951399624347687, "step": 10856 }, { "epoch": 2.006097631393563, "grad_norm": 0.05574139207601547, "learning_rate": 5.2274189673166565e-06, "loss": 0.2359476536512375, "step": 10857 }, { "epoch": 2.0062824081024586, "grad_norm": 0.08123526722192764, "learning_rate": 5.225666004187334e-06, "loss": 0.3718389868736267, "step": 10858 }, { "epoch": 2.0064671848113544, "grad_norm": 0.06732258200645447, "learning_rate": 5.2239132310615635e-06, "loss": 0.4056186378002167, "step": 10859 }, { "epoch": 2.0066519615202503, "grad_norm": 0.09004206210374832, "learning_rate": 5.222160648009105e-06, "loss": 0.48015791177749634, "step": 10860 }, { "epoch": 2.006836738229146, "grad_norm": 0.07492027431726456, "learning_rate": 5.2204082550997026e-06, "loss": 0.38669294118881226, "step": 10861 }, { "epoch": 2.007021514938042, "grad_norm": 0.07910634577274323, "learning_rate": 5.2186560524030995e-06, "loss": 0.5493848919868469, "step": 10862 }, { "epoch": 2.0072062916469378, "grad_norm": 0.06139914691448212, "learning_rate": 5.21690403998903e-06, "loss": 0.35784342885017395, "step": 10863 }, { "epoch": 2.0073910683558336, "grad_norm": 0.06231553852558136, "learning_rate": 5.215152217927213e-06, "loss": 0.2554203271865845, "step": 10864 }, { "epoch": 2.0075758450647294, "grad_norm": 0.07526741176843643, "learning_rate": 5.213400586287366e-06, "loss": 0.41515955328941345, "step": 10865 }, { "epoch": 2.0077606217736252, "grad_norm": 0.07477451860904694, "learning_rate": 5.211649145139205e-06, "loss": 0.36199766397476196, "step": 10866 }, { "epoch": 2.007945398482521, "grad_norm": 0.10428471118211746, "learning_rate": 5.209897894552422e-06, "loss": 0.588308572769165, "step": 10867 }, { "epoch": 2.0081301751914173, "grad_norm": 0.06379430741071701, "learning_rate": 5.208146834596715e-06, "loss": 0.3397676944732666, "step": 10868 }, { "epoch": 2.008314951900313, "grad_norm": 0.07628770172595978, "learning_rate": 5.206395965341778e-06, "loss": 0.3742277920246124, "step": 10869 }, { "epoch": 2.008499728609209, "grad_norm": 0.08512669056653976, "learning_rate": 5.2046452868572815e-06, "loss": 0.42715978622436523, "step": 10870 }, { "epoch": 2.008684505318105, "grad_norm": 0.07170064002275467, "learning_rate": 5.2028947992129e-06, "loss": 0.3647734522819519, "step": 10871 }, { "epoch": 2.0088692820270007, "grad_norm": 0.0802338495850563, "learning_rate": 5.2011445024783e-06, "loss": 0.43790730834007263, "step": 10872 }, { "epoch": 2.0090540587358965, "grad_norm": 0.09971331059932709, "learning_rate": 5.199394396723132e-06, "loss": 0.4873550534248352, "step": 10873 }, { "epoch": 2.0092388354447923, "grad_norm": 0.08621958643198013, "learning_rate": 5.197644482017048e-06, "loss": 0.461483359336853, "step": 10874 }, { "epoch": 2.009423612153688, "grad_norm": 0.0824543908238411, "learning_rate": 5.195894758429689e-06, "loss": 0.4282684326171875, "step": 10875 }, { "epoch": 2.009608388862584, "grad_norm": 0.07176830619573593, "learning_rate": 5.194145226030688e-06, "loss": 0.3931328058242798, "step": 10876 }, { "epoch": 2.00979316557148, "grad_norm": 0.07633424550294876, "learning_rate": 5.192395884889676e-06, "loss": 0.45451030135154724, "step": 10877 }, { "epoch": 2.0099779422803756, "grad_norm": 0.08542587608098984, "learning_rate": 5.190646735076262e-06, "loss": 0.4537266492843628, "step": 10878 }, { "epoch": 2.0101627189892715, "grad_norm": 0.09187118709087372, "learning_rate": 5.188897776660062e-06, "loss": 0.5085985064506531, "step": 10879 }, { "epoch": 2.0103474956981673, "grad_norm": 0.08106537163257599, "learning_rate": 5.187149009710681e-06, "loss": 0.5278698205947876, "step": 10880 }, { "epoch": 2.010532272407063, "grad_norm": 0.06416518241167068, "learning_rate": 5.185400434297707e-06, "loss": 0.31291911005973816, "step": 10881 }, { "epoch": 2.010717049115959, "grad_norm": 0.05878720059990883, "learning_rate": 5.183652050490735e-06, "loss": 0.2681567370891571, "step": 10882 }, { "epoch": 2.0109018258248548, "grad_norm": 0.07457895576953888, "learning_rate": 5.181903858359346e-06, "loss": 0.3523574769496918, "step": 10883 }, { "epoch": 2.0110866025337506, "grad_norm": 0.0683598443865776, "learning_rate": 5.180155857973106e-06, "loss": 0.3450174629688263, "step": 10884 }, { "epoch": 2.0112713792426464, "grad_norm": 0.07570520788431168, "learning_rate": 5.178408049401584e-06, "loss": 0.4106927216053009, "step": 10885 }, { "epoch": 2.0114561559515423, "grad_norm": 0.05980301275849342, "learning_rate": 5.176660432714342e-06, "loss": 0.28846660256385803, "step": 10886 }, { "epoch": 2.011640932660438, "grad_norm": 0.08202092349529266, "learning_rate": 5.174913007980919e-06, "loss": 0.4278833866119385, "step": 10887 }, { "epoch": 2.011825709369334, "grad_norm": 0.07988191395998001, "learning_rate": 5.173165775270859e-06, "loss": 0.4565815031528473, "step": 10888 }, { "epoch": 2.0120104860782297, "grad_norm": 0.07050027698278427, "learning_rate": 5.171418734653707e-06, "loss": 0.3265901207923889, "step": 10889 }, { "epoch": 2.0121952627871256, "grad_norm": 0.09780313819646835, "learning_rate": 5.16967188619898e-06, "loss": 0.6623883843421936, "step": 10890 }, { "epoch": 2.0123800394960214, "grad_norm": 0.07266128063201904, "learning_rate": 5.167925229976199e-06, "loss": 0.33542200922966003, "step": 10891 }, { "epoch": 2.0125648162049172, "grad_norm": 0.06911231577396393, "learning_rate": 5.16617876605488e-06, "loss": 0.28636428713798523, "step": 10892 }, { "epoch": 2.012749592913813, "grad_norm": 0.09343921393156052, "learning_rate": 5.164432494504519e-06, "loss": 0.5033407211303711, "step": 10893 }, { "epoch": 2.012934369622709, "grad_norm": 0.0805625319480896, "learning_rate": 5.1626864153946175e-06, "loss": 0.4543337821960449, "step": 10894 }, { "epoch": 2.0131191463316047, "grad_norm": 0.07634482532739639, "learning_rate": 5.160940528794661e-06, "loss": 0.48865413665771484, "step": 10895 }, { "epoch": 2.0133039230405005, "grad_norm": 0.11683768779039383, "learning_rate": 5.159194834774132e-06, "loss": 0.420229971408844, "step": 10896 }, { "epoch": 2.0134886997493964, "grad_norm": 0.07559465616941452, "learning_rate": 5.1574493334025084e-06, "loss": 0.4204034209251404, "step": 10897 }, { "epoch": 2.0136734764582926, "grad_norm": 0.08906613290309906, "learning_rate": 5.155704024749249e-06, "loss": 0.49783578515052795, "step": 10898 }, { "epoch": 2.0138582531671885, "grad_norm": 0.08135172724723816, "learning_rate": 5.153958908883811e-06, "loss": 0.427848219871521, "step": 10899 }, { "epoch": 2.0140430298760843, "grad_norm": 0.08237715810537338, "learning_rate": 5.1522139858756514e-06, "loss": 0.4681159257888794, "step": 10900 }, { "epoch": 2.01422780658498, "grad_norm": 0.05532315745949745, "learning_rate": 5.150469255794199e-06, "loss": 0.25912758708000183, "step": 10901 }, { "epoch": 2.014412583293876, "grad_norm": 0.07079538702964783, "learning_rate": 5.148724718708904e-06, "loss": 0.4080043435096741, "step": 10902 }, { "epoch": 2.014597360002772, "grad_norm": 0.06199576333165169, "learning_rate": 5.146980374689192e-06, "loss": 0.3021320700645447, "step": 10903 }, { "epoch": 2.0147821367116676, "grad_norm": 0.0703669860959053, "learning_rate": 5.145236223804473e-06, "loss": 0.4637066721916199, "step": 10904 }, { "epoch": 2.0149669134205634, "grad_norm": 0.08532287925481796, "learning_rate": 5.143492266124164e-06, "loss": 0.5112899541854858, "step": 10905 }, { "epoch": 2.0151516901294593, "grad_norm": 0.0936102494597435, "learning_rate": 5.1417485017176714e-06, "loss": 0.47748786211013794, "step": 10906 }, { "epoch": 2.015336466838355, "grad_norm": 0.08761214464902878, "learning_rate": 5.140004930654385e-06, "loss": 0.6001975536346436, "step": 10907 }, { "epoch": 2.015521243547251, "grad_norm": 0.09377985447645187, "learning_rate": 5.138261553003696e-06, "loss": 0.5890383124351501, "step": 10908 }, { "epoch": 2.0157060202561468, "grad_norm": 0.08416535705327988, "learning_rate": 5.136518368834993e-06, "loss": 0.47278961539268494, "step": 10909 }, { "epoch": 2.0158907969650426, "grad_norm": 0.06976797431707382, "learning_rate": 5.134775378217638e-06, "loss": 0.3145328164100647, "step": 10910 }, { "epoch": 2.0160755736739384, "grad_norm": 0.09272748231887817, "learning_rate": 5.133032581221007e-06, "loss": 0.6585069298744202, "step": 10911 }, { "epoch": 2.0162603503828342, "grad_norm": 0.07258772850036621, "learning_rate": 5.131289977914449e-06, "loss": 0.3980956971645355, "step": 10912 }, { "epoch": 2.01644512709173, "grad_norm": 0.07333315908908844, "learning_rate": 5.129547568367317e-06, "loss": 0.3659195303916931, "step": 10913 }, { "epoch": 2.016629903800626, "grad_norm": 0.06620004773139954, "learning_rate": 5.127805352648954e-06, "loss": 0.3466736376285553, "step": 10914 }, { "epoch": 2.0168146805095217, "grad_norm": 0.08554978668689728, "learning_rate": 5.126063330828694e-06, "loss": 0.41909193992614746, "step": 10915 }, { "epoch": 2.0169994572184176, "grad_norm": 0.08530129492282867, "learning_rate": 5.124321502975866e-06, "loss": 0.4803772270679474, "step": 10916 }, { "epoch": 2.0171842339273134, "grad_norm": 0.08911938220262527, "learning_rate": 5.1225798691597915e-06, "loss": 0.45174241065979004, "step": 10917 }, { "epoch": 2.017369010636209, "grad_norm": 0.06977102160453796, "learning_rate": 5.120838429449775e-06, "loss": 0.340168297290802, "step": 10918 }, { "epoch": 2.017553787345105, "grad_norm": 0.0680844634771347, "learning_rate": 5.119097183915124e-06, "loss": 0.34571701288223267, "step": 10919 }, { "epoch": 2.017738564054001, "grad_norm": 0.0721874088048935, "learning_rate": 5.117356132625138e-06, "loss": 0.4670308232307434, "step": 10920 }, { "epoch": 2.0179233407628967, "grad_norm": 0.0811346173286438, "learning_rate": 5.115615275649095e-06, "loss": 0.38460540771484375, "step": 10921 }, { "epoch": 2.0181081174717925, "grad_norm": 0.08082343637943268, "learning_rate": 5.113874613056287e-06, "loss": 0.43244844675064087, "step": 10922 }, { "epoch": 2.0182928941806884, "grad_norm": 0.06001897156238556, "learning_rate": 5.112134144915986e-06, "loss": 0.2741605341434479, "step": 10923 }, { "epoch": 2.018477670889584, "grad_norm": 0.05844532698392868, "learning_rate": 5.11039387129745e-06, "loss": 0.36012426018714905, "step": 10924 }, { "epoch": 2.01866244759848, "grad_norm": 0.1003597304224968, "learning_rate": 5.108653792269941e-06, "loss": 0.5373878479003906, "step": 10925 }, { "epoch": 2.018847224307376, "grad_norm": 0.07224072515964508, "learning_rate": 5.106913907902711e-06, "loss": 0.3647426664829254, "step": 10926 }, { "epoch": 2.019032001016272, "grad_norm": 0.06438612192869186, "learning_rate": 5.105174218264995e-06, "loss": 0.3541163206100464, "step": 10927 }, { "epoch": 2.019216777725168, "grad_norm": 0.07255525141954422, "learning_rate": 5.103434723426032e-06, "loss": 0.4254012703895569, "step": 10928 }, { "epoch": 2.0194015544340638, "grad_norm": 0.09289377927780151, "learning_rate": 5.101695423455046e-06, "loss": 0.39545321464538574, "step": 10929 }, { "epoch": 2.0195863311429596, "grad_norm": 0.08766859769821167, "learning_rate": 5.09995631842126e-06, "loss": 0.4713236689567566, "step": 10930 }, { "epoch": 2.0197711078518554, "grad_norm": 0.08483327925205231, "learning_rate": 5.098217408393884e-06, "loss": 0.4761367738246918, "step": 10931 }, { "epoch": 2.0199558845607513, "grad_norm": 0.06505721807479858, "learning_rate": 5.096478693442117e-06, "loss": 0.3717211186885834, "step": 10932 }, { "epoch": 2.020140661269647, "grad_norm": 0.09126225113868713, "learning_rate": 5.094740173635156e-06, "loss": 0.43229779601097107, "step": 10933 }, { "epoch": 2.020325437978543, "grad_norm": 0.07946305721998215, "learning_rate": 5.0930018490421895e-06, "loss": 0.4669344127178192, "step": 10934 }, { "epoch": 2.0205102146874387, "grad_norm": 0.09561219811439514, "learning_rate": 5.091263719732398e-06, "loss": 0.4787537753582001, "step": 10935 }, { "epoch": 2.0206949913963346, "grad_norm": 0.09669752418994904, "learning_rate": 5.089525785774951e-06, "loss": 0.5886106491088867, "step": 10936 }, { "epoch": 2.0208797681052304, "grad_norm": 0.10397924482822418, "learning_rate": 5.087788047239021e-06, "loss": 0.5654873251914978, "step": 10937 }, { "epoch": 2.0210645448141262, "grad_norm": 0.06124212220311165, "learning_rate": 5.086050504193753e-06, "loss": 0.4133400321006775, "step": 10938 }, { "epoch": 2.021249321523022, "grad_norm": 0.08833230286836624, "learning_rate": 5.084313156708303e-06, "loss": 0.5162288546562195, "step": 10939 }, { "epoch": 2.021434098231918, "grad_norm": 0.09045036882162094, "learning_rate": 5.082576004851808e-06, "loss": 0.5982534885406494, "step": 10940 }, { "epoch": 2.0216188749408137, "grad_norm": 0.0780353844165802, "learning_rate": 5.080839048693405e-06, "loss": 0.519133985042572, "step": 10941 }, { "epoch": 2.0218036516497095, "grad_norm": 0.0687236413359642, "learning_rate": 5.07910228830222e-06, "loss": 0.3705589771270752, "step": 10942 }, { "epoch": 2.0219884283586054, "grad_norm": 0.07354672998189926, "learning_rate": 5.077365723747366e-06, "loss": 0.41299891471862793, "step": 10943 }, { "epoch": 2.022173205067501, "grad_norm": 0.08132113516330719, "learning_rate": 5.075629355097955e-06, "loss": 0.4075068533420563, "step": 10944 }, { "epoch": 2.022357981776397, "grad_norm": 0.06150183826684952, "learning_rate": 5.073893182423093e-06, "loss": 0.37654855847358704, "step": 10945 }, { "epoch": 2.022542758485293, "grad_norm": 0.10539187490940094, "learning_rate": 5.072157205791866e-06, "loss": 0.4971143901348114, "step": 10946 }, { "epoch": 2.0227275351941887, "grad_norm": 0.05700913444161415, "learning_rate": 5.070421425273366e-06, "loss": 0.3304547071456909, "step": 10947 }, { "epoch": 2.0229123119030845, "grad_norm": 0.08524139970541, "learning_rate": 5.06868584093667e-06, "loss": 0.5176447629928589, "step": 10948 }, { "epoch": 2.0230970886119803, "grad_norm": 0.08728422969579697, "learning_rate": 5.06695045285085e-06, "loss": 0.408867746591568, "step": 10949 }, { "epoch": 2.023281865320876, "grad_norm": 0.09003953635692596, "learning_rate": 5.065215261084968e-06, "loss": 0.4363194704055786, "step": 10950 }, { "epoch": 2.023466642029772, "grad_norm": 0.0999642014503479, "learning_rate": 5.063480265708083e-06, "loss": 0.5393664240837097, "step": 10951 }, { "epoch": 2.023651418738668, "grad_norm": 0.06618688255548477, "learning_rate": 5.061745466789236e-06, "loss": 0.3658475875854492, "step": 10952 }, { "epoch": 2.0238361954475637, "grad_norm": 0.08020365238189697, "learning_rate": 5.060010864397469e-06, "loss": 0.5106897354125977, "step": 10953 }, { "epoch": 2.0240209721564595, "grad_norm": 0.07106049358844757, "learning_rate": 5.058276458601814e-06, "loss": 0.30423349142074585, "step": 10954 }, { "epoch": 2.0242057488653553, "grad_norm": 0.0817643254995346, "learning_rate": 5.056542249471297e-06, "loss": 0.4568421542644501, "step": 10955 }, { "epoch": 2.024390525574251, "grad_norm": 0.06630393117666245, "learning_rate": 5.054808237074931e-06, "loss": 0.4356433153152466, "step": 10956 }, { "epoch": 2.0245753022831474, "grad_norm": 0.10030622035264969, "learning_rate": 5.05307442148173e-06, "loss": 0.5095507502555847, "step": 10957 }, { "epoch": 2.0247600789920432, "grad_norm": 0.07793160527944565, "learning_rate": 5.051340802760686e-06, "loss": 0.35566869378089905, "step": 10958 }, { "epoch": 2.024944855700939, "grad_norm": 0.07248266786336899, "learning_rate": 5.049607380980799e-06, "loss": 0.3697780668735504, "step": 10959 }, { "epoch": 2.025129632409835, "grad_norm": 0.09467242658138275, "learning_rate": 5.047874156211044e-06, "loss": 0.44589683413505554, "step": 10960 }, { "epoch": 2.0253144091187307, "grad_norm": 0.0702122300863266, "learning_rate": 5.046141128520408e-06, "loss": 0.35430559515953064, "step": 10961 }, { "epoch": 2.0254991858276266, "grad_norm": 0.07696325331926346, "learning_rate": 5.04440829797786e-06, "loss": 0.3792334794998169, "step": 10962 }, { "epoch": 2.0256839625365224, "grad_norm": 0.07569151371717453, "learning_rate": 5.042675664652353e-06, "loss": 0.3286278247833252, "step": 10963 }, { "epoch": 2.025868739245418, "grad_norm": 0.0927334651350975, "learning_rate": 5.040943228612845e-06, "loss": 0.4318874478340149, "step": 10964 }, { "epoch": 2.026053515954314, "grad_norm": 0.08600287139415741, "learning_rate": 5.039210989928287e-06, "loss": 0.48451122641563416, "step": 10965 }, { "epoch": 2.02623829266321, "grad_norm": 0.07625958323478699, "learning_rate": 5.037478948667607e-06, "loss": 0.3884626626968384, "step": 10966 }, { "epoch": 2.0264230693721057, "grad_norm": 0.061320796608924866, "learning_rate": 5.035747104899738e-06, "loss": 0.36891666054725647, "step": 10967 }, { "epoch": 2.0266078460810015, "grad_norm": 0.08422596752643585, "learning_rate": 5.034015458693604e-06, "loss": 0.43773600459098816, "step": 10968 }, { "epoch": 2.0267926227898974, "grad_norm": 0.11182351410388947, "learning_rate": 5.032284010118118e-06, "loss": 0.604387640953064, "step": 10969 }, { "epoch": 2.026977399498793, "grad_norm": 0.0635233074426651, "learning_rate": 5.030552759242186e-06, "loss": 0.28664737939834595, "step": 10970 }, { "epoch": 2.027162176207689, "grad_norm": 0.09212157130241394, "learning_rate": 5.028821706134712e-06, "loss": 0.4764632284641266, "step": 10971 }, { "epoch": 2.027346952916585, "grad_norm": 0.08039910346269608, "learning_rate": 5.027090850864577e-06, "loss": 0.4236772358417511, "step": 10972 }, { "epoch": 2.0275317296254807, "grad_norm": 0.06990626454353333, "learning_rate": 5.025360193500667e-06, "loss": 0.381586492061615, "step": 10973 }, { "epoch": 2.0277165063343765, "grad_norm": 0.06147979572415352, "learning_rate": 5.023629734111858e-06, "loss": 0.34560880064964294, "step": 10974 }, { "epoch": 2.0279012830432723, "grad_norm": 0.06412357836961746, "learning_rate": 5.021899472767015e-06, "loss": 0.36367711424827576, "step": 10975 }, { "epoch": 2.028086059752168, "grad_norm": 0.07729597389698029, "learning_rate": 5.020169409535005e-06, "loss": 0.34711959958076477, "step": 10976 }, { "epoch": 2.028270836461064, "grad_norm": 0.0709250196814537, "learning_rate": 5.0184395444846676e-06, "loss": 0.360468327999115, "step": 10977 }, { "epoch": 2.02845561316996, "grad_norm": 0.05430329591035843, "learning_rate": 5.0167098776848515e-06, "loss": 0.24139876663684845, "step": 10978 }, { "epoch": 2.0286403898788556, "grad_norm": 0.08792615681886673, "learning_rate": 5.014980409204395e-06, "loss": 0.48062777519226074, "step": 10979 }, { "epoch": 2.0288251665877515, "grad_norm": 0.10243596881628036, "learning_rate": 5.013251139112114e-06, "loss": 0.6778828501701355, "step": 10980 }, { "epoch": 2.0290099432966473, "grad_norm": 0.07227854430675507, "learning_rate": 5.0115220674768405e-06, "loss": 0.3224528133869171, "step": 10981 }, { "epoch": 2.029194720005543, "grad_norm": 0.08872295916080475, "learning_rate": 5.009793194367385e-06, "loss": 0.51368647813797, "step": 10982 }, { "epoch": 2.029379496714439, "grad_norm": 0.07591360807418823, "learning_rate": 5.008064519852545e-06, "loss": 0.3432996869087219, "step": 10983 }, { "epoch": 2.029564273423335, "grad_norm": 0.061702460050582886, "learning_rate": 5.006336044001119e-06, "loss": 0.30575910210609436, "step": 10984 }, { "epoch": 2.0297490501322306, "grad_norm": 0.07337259501218796, "learning_rate": 5.004607766881899e-06, "loss": 0.44703561067581177, "step": 10985 }, { "epoch": 2.029933826841127, "grad_norm": 0.09474797546863556, "learning_rate": 5.002879688563658e-06, "loss": 0.6129400134086609, "step": 10986 }, { "epoch": 2.0301186035500227, "grad_norm": 0.07356349378824234, "learning_rate": 5.0011518091151716e-06, "loss": 0.4627012014389038, "step": 10987 }, { "epoch": 2.0303033802589185, "grad_norm": 0.05867588892579079, "learning_rate": 4.999424128605203e-06, "loss": 0.2929174304008484, "step": 10988 }, { "epoch": 2.0304881569678144, "grad_norm": 0.07791939377784729, "learning_rate": 4.997696647102509e-06, "loss": 0.40584731101989746, "step": 10989 }, { "epoch": 2.03067293367671, "grad_norm": 0.06808242946863174, "learning_rate": 4.995969364675839e-06, "loss": 0.4458301067352295, "step": 10990 }, { "epoch": 2.030857710385606, "grad_norm": 0.08416265994310379, "learning_rate": 4.994242281393936e-06, "loss": 0.43289077281951904, "step": 10991 }, { "epoch": 2.031042487094502, "grad_norm": 0.08570495992898941, "learning_rate": 4.992515397325526e-06, "loss": 0.5073711276054382, "step": 10992 }, { "epoch": 2.0312272638033977, "grad_norm": 0.09325053542852402, "learning_rate": 4.990788712539336e-06, "loss": 0.5359335541725159, "step": 10993 }, { "epoch": 2.0314120405122935, "grad_norm": 0.05882570520043373, "learning_rate": 4.989062227104083e-06, "loss": 0.29013773798942566, "step": 10994 }, { "epoch": 2.0315968172211893, "grad_norm": 0.07409077882766724, "learning_rate": 4.987335941088478e-06, "loss": 0.42141684889793396, "step": 10995 }, { "epoch": 2.031781593930085, "grad_norm": 0.06654199212789536, "learning_rate": 4.985609854561223e-06, "loss": 0.37594857811927795, "step": 10996 }, { "epoch": 2.031966370638981, "grad_norm": 0.06049758940935135, "learning_rate": 4.9838839675910035e-06, "loss": 0.31000638008117676, "step": 10997 }, { "epoch": 2.032151147347877, "grad_norm": 0.09586911648511887, "learning_rate": 4.982158280246508e-06, "loss": 0.5141433477401733, "step": 10998 }, { "epoch": 2.0323359240567727, "grad_norm": 0.07909748703241348, "learning_rate": 4.980432792596419e-06, "loss": 0.4322912096977234, "step": 10999 }, { "epoch": 2.0325207007656685, "grad_norm": 0.06540367752313614, "learning_rate": 4.978707504709394e-06, "loss": 0.3917093276977539, "step": 11000 }, { "epoch": 2.0325207007656685, "eval_loss": 0.5544933080673218, "eval_runtime": 156.4967, "eval_samples_per_second": 116.482, "eval_steps_per_second": 14.563, "step": 11000 }, { "epoch": 2.0327054774745643, "grad_norm": 0.09441975504159927, "learning_rate": 4.976982416654102e-06, "loss": 0.5133340358734131, "step": 11001 }, { "epoch": 2.03289025418346, "grad_norm": 0.054924800992012024, "learning_rate": 4.975257528499201e-06, "loss": 0.2618792951107025, "step": 11002 }, { "epoch": 2.033075030892356, "grad_norm": 0.07110217213630676, "learning_rate": 4.973532840313325e-06, "loss": 0.447940468788147, "step": 11003 }, { "epoch": 2.033259807601252, "grad_norm": 0.08391235768795013, "learning_rate": 4.971808352165116e-06, "loss": 0.38242557644844055, "step": 11004 }, { "epoch": 2.0334445843101476, "grad_norm": 0.08753757923841476, "learning_rate": 4.970084064123208e-06, "loss": 0.5258985757827759, "step": 11005 }, { "epoch": 2.0336293610190435, "grad_norm": 0.07218511402606964, "learning_rate": 4.968359976256213e-06, "loss": 0.4401981234550476, "step": 11006 }, { "epoch": 2.0338141377279393, "grad_norm": 0.08419043570756912, "learning_rate": 4.966636088632749e-06, "loss": 0.4633215665817261, "step": 11007 }, { "epoch": 2.033998914436835, "grad_norm": 0.10938230901956558, "learning_rate": 4.964912401321421e-06, "loss": 0.5555753111839294, "step": 11008 }, { "epoch": 2.034183691145731, "grad_norm": 0.0766386166214943, "learning_rate": 4.963188914390827e-06, "loss": 0.3662005066871643, "step": 11009 }, { "epoch": 2.0343684678546268, "grad_norm": 0.08304692059755325, "learning_rate": 4.961465627909561e-06, "loss": 0.48914024233818054, "step": 11010 }, { "epoch": 2.0345532445635226, "grad_norm": 0.07511365413665771, "learning_rate": 4.959742541946195e-06, "loss": 0.4251895546913147, "step": 11011 }, { "epoch": 2.0347380212724184, "grad_norm": 0.09212878346443176, "learning_rate": 4.958019656569306e-06, "loss": 0.4835797846317291, "step": 11012 }, { "epoch": 2.0349227979813143, "grad_norm": 0.07405047863721848, "learning_rate": 4.956296971847462e-06, "loss": 0.38855189085006714, "step": 11013 }, { "epoch": 2.03510757469021, "grad_norm": 0.07276478409767151, "learning_rate": 4.954574487849218e-06, "loss": 0.3583826422691345, "step": 11014 }, { "epoch": 2.0352923513991064, "grad_norm": 0.05918056517839432, "learning_rate": 4.952852204643124e-06, "loss": 0.32278576493263245, "step": 11015 }, { "epoch": 2.035477128108002, "grad_norm": 0.08996355533599854, "learning_rate": 4.951130122297725e-06, "loss": 0.5059264898300171, "step": 11016 }, { "epoch": 2.035661904816898, "grad_norm": 0.07419190555810928, "learning_rate": 4.949408240881548e-06, "loss": 0.3496807813644409, "step": 11017 }, { "epoch": 2.035846681525794, "grad_norm": 0.08473897725343704, "learning_rate": 4.947686560463122e-06, "loss": 0.42983487248420715, "step": 11018 }, { "epoch": 2.0360314582346897, "grad_norm": 0.08575071394443512, "learning_rate": 4.945965081110967e-06, "loss": 0.4554291367530823, "step": 11019 }, { "epoch": 2.0362162349435855, "grad_norm": 0.08277926594018936, "learning_rate": 4.944243802893584e-06, "loss": 0.4762677550315857, "step": 11020 }, { "epoch": 2.0364010116524813, "grad_norm": 0.07830332964658737, "learning_rate": 4.942522725879483e-06, "loss": 0.4043978750705719, "step": 11021 }, { "epoch": 2.036585788361377, "grad_norm": 0.08838173002004623, "learning_rate": 4.940801850137158e-06, "loss": 0.4758176803588867, "step": 11022 }, { "epoch": 2.036770565070273, "grad_norm": 0.06347142904996872, "learning_rate": 4.939081175735087e-06, "loss": 0.31101086735725403, "step": 11023 }, { "epoch": 2.036955341779169, "grad_norm": 0.07915815711021423, "learning_rate": 4.937360702741757e-06, "loss": 0.45772865414619446, "step": 11024 }, { "epoch": 2.0371401184880646, "grad_norm": 0.07311692088842392, "learning_rate": 4.935640431225628e-06, "loss": 0.3552285134792328, "step": 11025 }, { "epoch": 2.0373248951969605, "grad_norm": 0.0813852846622467, "learning_rate": 4.933920361255164e-06, "loss": 0.42151376605033875, "step": 11026 }, { "epoch": 2.0375096719058563, "grad_norm": 0.06344735622406006, "learning_rate": 4.932200492898822e-06, "loss": 0.3813314437866211, "step": 11027 }, { "epoch": 2.037694448614752, "grad_norm": 0.06767319142818451, "learning_rate": 4.930480826225043e-06, "loss": 0.350860595703125, "step": 11028 }, { "epoch": 2.037879225323648, "grad_norm": 0.07548850029706955, "learning_rate": 4.928761361302269e-06, "loss": 0.4137412905693054, "step": 11029 }, { "epoch": 2.038064002032544, "grad_norm": 0.08888956904411316, "learning_rate": 4.9270420981989295e-06, "loss": 0.4689168930053711, "step": 11030 }, { "epoch": 2.0382487787414396, "grad_norm": 0.080434650182724, "learning_rate": 4.925323036983439e-06, "loss": 0.4242910146713257, "step": 11031 }, { "epoch": 2.0384335554503354, "grad_norm": 0.07344117015600204, "learning_rate": 4.923604177724216e-06, "loss": 0.39162227511405945, "step": 11032 }, { "epoch": 2.0386183321592313, "grad_norm": 0.11162707954645157, "learning_rate": 4.921885520489664e-06, "loss": 0.6516607403755188, "step": 11033 }, { "epoch": 2.038803108868127, "grad_norm": 0.09089545905590057, "learning_rate": 4.9201670653481816e-06, "loss": 0.5949812531471252, "step": 11034 }, { "epoch": 2.038987885577023, "grad_norm": 0.09269700199365616, "learning_rate": 4.918448812368156e-06, "loss": 0.4980557858943939, "step": 11035 }, { "epoch": 2.0391726622859188, "grad_norm": 0.05755534768104553, "learning_rate": 4.916730761617975e-06, "loss": 0.2817052900791168, "step": 11036 }, { "epoch": 2.0393574389948146, "grad_norm": 0.0832810029387474, "learning_rate": 4.915012913166001e-06, "loss": 0.4219299554824829, "step": 11037 }, { "epoch": 2.0395422157037104, "grad_norm": 0.08774325251579285, "learning_rate": 4.913295267080604e-06, "loss": 0.5183811783790588, "step": 11038 }, { "epoch": 2.0397269924126062, "grad_norm": 0.09232005476951599, "learning_rate": 4.911577823430146e-06, "loss": 0.39076298475265503, "step": 11039 }, { "epoch": 2.039911769121502, "grad_norm": 0.06912195682525635, "learning_rate": 4.909860582282964e-06, "loss": 0.36591362953186035, "step": 11040 }, { "epoch": 2.040096545830398, "grad_norm": 0.07138343155384064, "learning_rate": 4.908143543707412e-06, "loss": 0.3453134298324585, "step": 11041 }, { "epoch": 2.0402813225392937, "grad_norm": 0.06172487884759903, "learning_rate": 4.906426707771813e-06, "loss": 0.2931046783924103, "step": 11042 }, { "epoch": 2.0404660992481896, "grad_norm": 0.08806567639112473, "learning_rate": 4.904710074544495e-06, "loss": 0.3510777950286865, "step": 11043 }, { "epoch": 2.040650875957086, "grad_norm": 0.06607240438461304, "learning_rate": 4.90299364409378e-06, "loss": 0.39713090658187866, "step": 11044 }, { "epoch": 2.0408356526659817, "grad_norm": 0.07734241336584091, "learning_rate": 4.901277416487967e-06, "loss": 0.37471258640289307, "step": 11045 }, { "epoch": 2.0410204293748775, "grad_norm": 0.08430340886116028, "learning_rate": 4.89956139179536e-06, "loss": 0.46838080883026123, "step": 11046 }, { "epoch": 2.0412052060837733, "grad_norm": 0.048094622790813446, "learning_rate": 4.897845570084253e-06, "loss": 0.22031986713409424, "step": 11047 }, { "epoch": 2.041389982792669, "grad_norm": 0.09382618963718414, "learning_rate": 4.896129951422931e-06, "loss": 0.44207218289375305, "step": 11048 }, { "epoch": 2.041574759501565, "grad_norm": 0.09609103947877884, "learning_rate": 4.894414535879668e-06, "loss": 0.693742573261261, "step": 11049 }, { "epoch": 2.041759536210461, "grad_norm": 0.08293814957141876, "learning_rate": 4.892699323522736e-06, "loss": 0.41994351148605347, "step": 11050 }, { "epoch": 2.0419443129193566, "grad_norm": 0.06315720081329346, "learning_rate": 4.890984314420389e-06, "loss": 0.30417191982269287, "step": 11051 }, { "epoch": 2.0421290896282525, "grad_norm": 0.0869455635547638, "learning_rate": 4.889269508640884e-06, "loss": 0.48857417702674866, "step": 11052 }, { "epoch": 2.0423138663371483, "grad_norm": 0.06293217092752457, "learning_rate": 4.887554906252461e-06, "loss": 0.3305082321166992, "step": 11053 }, { "epoch": 2.042498643046044, "grad_norm": 0.0635986402630806, "learning_rate": 4.885840507323359e-06, "loss": 0.2795847952365875, "step": 11054 }, { "epoch": 2.04268341975494, "grad_norm": 0.061714962124824524, "learning_rate": 4.884126311921804e-06, "loss": 0.3719215393066406, "step": 11055 }, { "epoch": 2.0428681964638358, "grad_norm": 0.0834885984659195, "learning_rate": 4.8824123201160205e-06, "loss": 0.4365217983722687, "step": 11056 }, { "epoch": 2.0430529731727316, "grad_norm": 0.07816538214683533, "learning_rate": 4.880698531974212e-06, "loss": 0.40753647685050964, "step": 11057 }, { "epoch": 2.0432377498816274, "grad_norm": 0.0627506822347641, "learning_rate": 4.878984947564589e-06, "loss": 0.338919073343277, "step": 11058 }, { "epoch": 2.0434225265905233, "grad_norm": 0.05984465032815933, "learning_rate": 4.877271566955339e-06, "loss": 0.3647313117980957, "step": 11059 }, { "epoch": 2.043607303299419, "grad_norm": 0.0724530890583992, "learning_rate": 4.875558390214652e-06, "loss": 0.42080751061439514, "step": 11060 }, { "epoch": 2.043792080008315, "grad_norm": 0.0803777351975441, "learning_rate": 4.873845417410714e-06, "loss": 0.5013444423675537, "step": 11061 }, { "epoch": 2.0439768567172107, "grad_norm": 0.10302601754665375, "learning_rate": 4.872132648611689e-06, "loss": 0.6300573348999023, "step": 11062 }, { "epoch": 2.0441616334261066, "grad_norm": 0.062125064432621, "learning_rate": 4.870420083885741e-06, "loss": 0.3633728623390198, "step": 11063 }, { "epoch": 2.0443464101350024, "grad_norm": 0.07537313550710678, "learning_rate": 4.8687077233010286e-06, "loss": 0.36421942710876465, "step": 11064 }, { "epoch": 2.0445311868438982, "grad_norm": 0.07662282884120941, "learning_rate": 4.8669955669256905e-06, "loss": 0.3631492257118225, "step": 11065 }, { "epoch": 2.044715963552794, "grad_norm": 0.06461525708436966, "learning_rate": 4.865283614827871e-06, "loss": 0.3085329830646515, "step": 11066 }, { "epoch": 2.04490074026169, "grad_norm": 0.08214090019464493, "learning_rate": 4.863571867075699e-06, "loss": 0.4510284960269928, "step": 11067 }, { "epoch": 2.0450855169705857, "grad_norm": 0.05756901204586029, "learning_rate": 4.861860323737297e-06, "loss": 0.3061563968658447, "step": 11068 }, { "epoch": 2.0452702936794815, "grad_norm": 0.08127584308385849, "learning_rate": 4.860148984880778e-06, "loss": 0.5320109128952026, "step": 11069 }, { "epoch": 2.0454550703883774, "grad_norm": 0.07574288547039032, "learning_rate": 4.858437850574253e-06, "loss": 0.39699089527130127, "step": 11070 }, { "epoch": 2.045639847097273, "grad_norm": 0.1033301055431366, "learning_rate": 4.8567269208858125e-06, "loss": 0.5131538510322571, "step": 11071 }, { "epoch": 2.045824623806169, "grad_norm": 0.09955132752656937, "learning_rate": 4.85501619588355e-06, "loss": 0.5548917055130005, "step": 11072 }, { "epoch": 2.046009400515065, "grad_norm": 0.05739162489771843, "learning_rate": 4.853305675635544e-06, "loss": 0.23811882734298706, "step": 11073 }, { "epoch": 2.046194177223961, "grad_norm": 0.06883546710014343, "learning_rate": 4.851595360209872e-06, "loss": 0.26252683997154236, "step": 11074 }, { "epoch": 2.046378953932857, "grad_norm": 0.07779659330844879, "learning_rate": 4.849885249674601e-06, "loss": 0.4272381067276001, "step": 11075 }, { "epoch": 2.046563730641753, "grad_norm": 0.07720985263586044, "learning_rate": 4.84817534409778e-06, "loss": 0.48315441608428955, "step": 11076 }, { "epoch": 2.0467485073506486, "grad_norm": 0.0919061005115509, "learning_rate": 4.846465643547462e-06, "loss": 0.5336605906486511, "step": 11077 }, { "epoch": 2.0469332840595444, "grad_norm": 0.08197201788425446, "learning_rate": 4.8447561480916925e-06, "loss": 0.45011091232299805, "step": 11078 }, { "epoch": 2.0471180607684403, "grad_norm": 0.0737820491194725, "learning_rate": 4.8430468577984955e-06, "loss": 0.4351140558719635, "step": 11079 }, { "epoch": 2.047302837477336, "grad_norm": 0.08648515492677689, "learning_rate": 4.841337772735897e-06, "loss": 0.46810683608055115, "step": 11080 }, { "epoch": 2.047487614186232, "grad_norm": 0.0811564177274704, "learning_rate": 4.839628892971922e-06, "loss": 0.37373584508895874, "step": 11081 }, { "epoch": 2.0476723908951278, "grad_norm": 0.08425736427307129, "learning_rate": 4.837920218574569e-06, "loss": 0.4559929668903351, "step": 11082 }, { "epoch": 2.0478571676040236, "grad_norm": 0.09902206808328629, "learning_rate": 4.8362117496118395e-06, "loss": 0.4782213270664215, "step": 11083 }, { "epoch": 2.0480419443129194, "grad_norm": 0.08472497761249542, "learning_rate": 4.8345034861517334e-06, "loss": 0.4149915874004364, "step": 11084 }, { "epoch": 2.0482267210218152, "grad_norm": 0.07617567479610443, "learning_rate": 4.832795428262222e-06, "loss": 0.3213249146938324, "step": 11085 }, { "epoch": 2.048411497730711, "grad_norm": 0.09814009070396423, "learning_rate": 4.8310875760112884e-06, "loss": 0.4558155834674835, "step": 11086 }, { "epoch": 2.048596274439607, "grad_norm": 0.10031770914793015, "learning_rate": 4.829379929466897e-06, "loss": 0.5350594520568848, "step": 11087 }, { "epoch": 2.0487810511485027, "grad_norm": 0.0721861869096756, "learning_rate": 4.827672488697007e-06, "loss": 0.37536805868148804, "step": 11088 }, { "epoch": 2.0489658278573986, "grad_norm": 0.09258493781089783, "learning_rate": 4.825965253769574e-06, "loss": 0.43440642952919006, "step": 11089 }, { "epoch": 2.0491506045662944, "grad_norm": 0.08985716104507446, "learning_rate": 4.824258224752533e-06, "loss": 0.5369082093238831, "step": 11090 }, { "epoch": 2.04933538127519, "grad_norm": 0.08058978617191315, "learning_rate": 4.8225514017138205e-06, "loss": 0.39088350534439087, "step": 11091 }, { "epoch": 2.049520157984086, "grad_norm": 0.07473517954349518, "learning_rate": 4.820844784721366e-06, "loss": 0.312513530254364, "step": 11092 }, { "epoch": 2.049704934692982, "grad_norm": 0.0834210067987442, "learning_rate": 4.819138373843084e-06, "loss": 0.44025424122810364, "step": 11093 }, { "epoch": 2.0498897114018777, "grad_norm": 0.08357541263103485, "learning_rate": 4.8174321691468865e-06, "loss": 0.36911502480506897, "step": 11094 }, { "epoch": 2.0500744881107735, "grad_norm": 0.08149900287389755, "learning_rate": 4.8157261707006785e-06, "loss": 0.47982460260391235, "step": 11095 }, { "epoch": 2.0502592648196694, "grad_norm": 0.0683201402425766, "learning_rate": 4.814020378572345e-06, "loss": 0.32079848647117615, "step": 11096 }, { "epoch": 2.050444041528565, "grad_norm": 0.08017799258232117, "learning_rate": 4.812314792829776e-06, "loss": 0.3812326490879059, "step": 11097 }, { "epoch": 2.050628818237461, "grad_norm": 0.08644194155931473, "learning_rate": 4.810609413540852e-06, "loss": 0.47296470403671265, "step": 11098 }, { "epoch": 2.050813594946357, "grad_norm": 0.07601633667945862, "learning_rate": 4.808904240773433e-06, "loss": 0.34887439012527466, "step": 11099 }, { "epoch": 2.0509983716552527, "grad_norm": 0.08265700191259384, "learning_rate": 4.807199274595382e-06, "loss": 0.39728039503097534, "step": 11100 }, { "epoch": 2.0511831483641485, "grad_norm": 0.07100403308868408, "learning_rate": 4.805494515074561e-06, "loss": 0.4029456675052643, "step": 11101 }, { "epoch": 2.0513679250730443, "grad_norm": 0.10822330415248871, "learning_rate": 4.803789962278802e-06, "loss": 0.6121471524238586, "step": 11102 }, { "epoch": 2.0515527017819406, "grad_norm": 0.07543343305587769, "learning_rate": 4.802085616275947e-06, "loss": 0.34128338098526, "step": 11103 }, { "epoch": 2.0517374784908364, "grad_norm": 0.08622822910547256, "learning_rate": 4.8003814771338256e-06, "loss": 0.47281673550605774, "step": 11104 }, { "epoch": 2.0519222551997323, "grad_norm": 0.13187064230442047, "learning_rate": 4.79867754492025e-06, "loss": 0.8056926727294922, "step": 11105 }, { "epoch": 2.052107031908628, "grad_norm": 0.1005135178565979, "learning_rate": 4.796973819703035e-06, "loss": 0.5747947096824646, "step": 11106 }, { "epoch": 2.052291808617524, "grad_norm": 0.09979305416345596, "learning_rate": 4.795270301549984e-06, "loss": 0.4201939105987549, "step": 11107 }, { "epoch": 2.0524765853264197, "grad_norm": 0.08658093214035034, "learning_rate": 4.793566990528891e-06, "loss": 0.5614806413650513, "step": 11108 }, { "epoch": 2.0526613620353156, "grad_norm": 0.08579076081514359, "learning_rate": 4.791863886707547e-06, "loss": 0.3762155771255493, "step": 11109 }, { "epoch": 2.0528461387442114, "grad_norm": 0.11106253415346146, "learning_rate": 4.7901609901537226e-06, "loss": 0.5277875065803528, "step": 11110 }, { "epoch": 2.0530309154531072, "grad_norm": 0.08382830023765564, "learning_rate": 4.788458300935191e-06, "loss": 0.48897239565849304, "step": 11111 }, { "epoch": 2.053215692162003, "grad_norm": 0.08559130132198334, "learning_rate": 4.786755819119715e-06, "loss": 0.48729780316352844, "step": 11112 }, { "epoch": 2.053400468870899, "grad_norm": 0.08569462597370148, "learning_rate": 4.7850535447750455e-06, "loss": 0.48792409896850586, "step": 11113 }, { "epoch": 2.0535852455797947, "grad_norm": 0.11856204271316528, "learning_rate": 4.783351477968932e-06, "loss": 0.6652650237083435, "step": 11114 }, { "epoch": 2.0537700222886905, "grad_norm": 0.08174629509449005, "learning_rate": 4.7816496187691105e-06, "loss": 0.5053659081459045, "step": 11115 }, { "epoch": 2.0539547989975864, "grad_norm": 0.10635677725076675, "learning_rate": 4.779947967243305e-06, "loss": 0.5447197556495667, "step": 11116 }, { "epoch": 2.054139575706482, "grad_norm": 0.07048063725233078, "learning_rate": 4.778246523459239e-06, "loss": 0.4420374631881714, "step": 11117 }, { "epoch": 2.054324352415378, "grad_norm": 0.0684606060385704, "learning_rate": 4.776545287484629e-06, "loss": 0.3754832148551941, "step": 11118 }, { "epoch": 2.054509129124274, "grad_norm": 0.07237427681684494, "learning_rate": 4.774844259387169e-06, "loss": 0.4265793561935425, "step": 11119 }, { "epoch": 2.0546939058331697, "grad_norm": 0.08699125796556473, "learning_rate": 4.773143439234558e-06, "loss": 0.45075085759162903, "step": 11120 }, { "epoch": 2.0548786825420655, "grad_norm": 0.08007710427045822, "learning_rate": 4.771442827094493e-06, "loss": 0.4470076262950897, "step": 11121 }, { "epoch": 2.0550634592509613, "grad_norm": 0.07942335307598114, "learning_rate": 4.76974242303464e-06, "loss": 0.45654499530792236, "step": 11122 }, { "epoch": 2.055248235959857, "grad_norm": 0.08444330096244812, "learning_rate": 4.76804222712268e-06, "loss": 0.4239978790283203, "step": 11123 }, { "epoch": 2.055433012668753, "grad_norm": 0.08909330517053604, "learning_rate": 4.766342239426267e-06, "loss": 0.47700098156929016, "step": 11124 }, { "epoch": 2.055617789377649, "grad_norm": 0.08406031131744385, "learning_rate": 4.764642460013058e-06, "loss": 0.40069150924682617, "step": 11125 }, { "epoch": 2.0558025660865447, "grad_norm": 0.09387758374214172, "learning_rate": 4.762942888950702e-06, "loss": 0.5573445558547974, "step": 11126 }, { "epoch": 2.0559873427954405, "grad_norm": 0.09293120354413986, "learning_rate": 4.761243526306831e-06, "loss": 0.5203923583030701, "step": 11127 }, { "epoch": 2.0561721195043363, "grad_norm": 0.09802858531475067, "learning_rate": 4.75954437214908e-06, "loss": 0.5696846842765808, "step": 11128 }, { "epoch": 2.056356896213232, "grad_norm": 0.07466862350702286, "learning_rate": 4.7578454265450715e-06, "loss": 0.41942253708839417, "step": 11129 }, { "epoch": 2.056541672922128, "grad_norm": 0.09071548283100128, "learning_rate": 4.75614668956241e-06, "loss": 0.4312906265258789, "step": 11130 }, { "epoch": 2.056726449631024, "grad_norm": 0.059735409915447235, "learning_rate": 4.7544481612687045e-06, "loss": 0.3316957652568817, "step": 11131 }, { "epoch": 2.0569112263399196, "grad_norm": 0.08869393914937973, "learning_rate": 4.75274984173155e-06, "loss": 0.44609615206718445, "step": 11132 }, { "epoch": 2.057096003048816, "grad_norm": 0.08205130696296692, "learning_rate": 4.751051731018537e-06, "loss": 0.4383608400821686, "step": 11133 }, { "epoch": 2.0572807797577117, "grad_norm": 0.07823719084262848, "learning_rate": 4.749353829197242e-06, "loss": 0.4239680767059326, "step": 11134 }, { "epoch": 2.0574655564666076, "grad_norm": 0.10449334979057312, "learning_rate": 4.747656136335242e-06, "loss": 0.5839539766311646, "step": 11135 }, { "epoch": 2.0576503331755034, "grad_norm": 0.09385858476161957, "learning_rate": 4.745958652500091e-06, "loss": 0.713076651096344, "step": 11136 }, { "epoch": 2.057835109884399, "grad_norm": 0.08174755424261093, "learning_rate": 4.744261377759352e-06, "loss": 0.4505770206451416, "step": 11137 }, { "epoch": 2.058019886593295, "grad_norm": 0.07801038026809692, "learning_rate": 4.742564312180562e-06, "loss": 0.3908689618110657, "step": 11138 }, { "epoch": 2.058204663302191, "grad_norm": 0.07908947765827179, "learning_rate": 4.7408674558312654e-06, "loss": 0.48987993597984314, "step": 11139 }, { "epoch": 2.0583894400110867, "grad_norm": 0.10476920753717422, "learning_rate": 4.73917080877899e-06, "loss": 0.5112239122390747, "step": 11140 }, { "epoch": 2.0585742167199825, "grad_norm": 0.07934331148862839, "learning_rate": 4.737474371091257e-06, "loss": 0.41086068749427795, "step": 11141 }, { "epoch": 2.0587589934288784, "grad_norm": 0.07572036236524582, "learning_rate": 4.735778142835581e-06, "loss": 0.4580512046813965, "step": 11142 }, { "epoch": 2.058943770137774, "grad_norm": 0.1033131554722786, "learning_rate": 4.734082124079468e-06, "loss": 0.6239036321640015, "step": 11143 }, { "epoch": 2.05912854684667, "grad_norm": 0.061327748000621796, "learning_rate": 4.732386314890408e-06, "loss": 0.2922775149345398, "step": 11144 }, { "epoch": 2.059313323555566, "grad_norm": 0.07396014779806137, "learning_rate": 4.7306907153358915e-06, "loss": 0.33885297179222107, "step": 11145 }, { "epoch": 2.0594981002644617, "grad_norm": 0.07981745153665543, "learning_rate": 4.7289953254834e-06, "loss": 0.41183799505233765, "step": 11146 }, { "epoch": 2.0596828769733575, "grad_norm": 0.09812553226947784, "learning_rate": 4.727300145400403e-06, "loss": 0.508681058883667, "step": 11147 }, { "epoch": 2.0598676536822533, "grad_norm": 0.07962342351675034, "learning_rate": 4.725605175154365e-06, "loss": 0.5423054099082947, "step": 11148 }, { "epoch": 2.060052430391149, "grad_norm": 0.07644487917423248, "learning_rate": 4.723910414812742e-06, "loss": 0.3208584189414978, "step": 11149 }, { "epoch": 2.060237207100045, "grad_norm": 0.09190234541893005, "learning_rate": 4.7222158644429764e-06, "loss": 0.6065411567687988, "step": 11150 }, { "epoch": 2.060421983808941, "grad_norm": 0.08584508299827576, "learning_rate": 4.720521524112504e-06, "loss": 0.4849585294723511, "step": 11151 }, { "epoch": 2.0606067605178366, "grad_norm": 0.08093219250440598, "learning_rate": 4.7188273938887605e-06, "loss": 0.4830683469772339, "step": 11152 }, { "epoch": 2.0607915372267325, "grad_norm": 0.10254708677530289, "learning_rate": 4.717133473839163e-06, "loss": 0.6699663400650024, "step": 11153 }, { "epoch": 2.0609763139356283, "grad_norm": 0.09748372435569763, "learning_rate": 4.715439764031129e-06, "loss": 0.5278805494308472, "step": 11154 }, { "epoch": 2.061161090644524, "grad_norm": 0.09834560006856918, "learning_rate": 4.713746264532056e-06, "loss": 0.4821929931640625, "step": 11155 }, { "epoch": 2.06134586735342, "grad_norm": 0.10058598965406418, "learning_rate": 4.712052975409342e-06, "loss": 0.5771047472953796, "step": 11156 }, { "epoch": 2.061530644062316, "grad_norm": 0.07111495733261108, "learning_rate": 4.710359896730379e-06, "loss": 0.3995524048805237, "step": 11157 }, { "epoch": 2.0617154207712116, "grad_norm": 0.09514980763196945, "learning_rate": 4.7086670285625406e-06, "loss": 0.47377482056617737, "step": 11158 }, { "epoch": 2.0619001974801074, "grad_norm": 0.07909617573022842, "learning_rate": 4.7069743709731985e-06, "loss": 0.41966110467910767, "step": 11159 }, { "epoch": 2.0620849741890033, "grad_norm": 0.08692745119333267, "learning_rate": 4.705281924029718e-06, "loss": 0.45984774827957153, "step": 11160 }, { "epoch": 2.062269750897899, "grad_norm": 0.070327527821064, "learning_rate": 4.7035896877994514e-06, "loss": 0.3887335956096649, "step": 11161 }, { "epoch": 2.0624545276067954, "grad_norm": 0.07713624089956284, "learning_rate": 4.701897662349745e-06, "loss": 0.37153637409210205, "step": 11162 }, { "epoch": 2.062639304315691, "grad_norm": 0.06490417569875717, "learning_rate": 4.7002058477479395e-06, "loss": 0.35753506422042847, "step": 11163 }, { "epoch": 2.062824081024587, "grad_norm": 0.06854233890771866, "learning_rate": 4.698514244061357e-06, "loss": 0.37862610816955566, "step": 11164 }, { "epoch": 2.063008857733483, "grad_norm": 0.07123073190450668, "learning_rate": 4.696822851357321e-06, "loss": 0.34985968470573425, "step": 11165 }, { "epoch": 2.0631936344423787, "grad_norm": 0.06531429290771484, "learning_rate": 4.695131669703145e-06, "loss": 0.37143802642822266, "step": 11166 }, { "epoch": 2.0633784111512745, "grad_norm": 0.08906910568475723, "learning_rate": 4.6934406991661304e-06, "loss": 0.38228845596313477, "step": 11167 }, { "epoch": 2.0635631878601703, "grad_norm": 0.06802390515804291, "learning_rate": 4.691749939813575e-06, "loss": 0.2796257436275482, "step": 11168 }, { "epoch": 2.063747964569066, "grad_norm": 0.09548873454332352, "learning_rate": 4.690059391712767e-06, "loss": 0.5309671759605408, "step": 11169 }, { "epoch": 2.063932741277962, "grad_norm": 0.07820142060518265, "learning_rate": 4.68836905493098e-06, "loss": 0.4591372311115265, "step": 11170 }, { "epoch": 2.064117517986858, "grad_norm": 0.08913306146860123, "learning_rate": 4.68667892953549e-06, "loss": 0.6002703905105591, "step": 11171 }, { "epoch": 2.0643022946957537, "grad_norm": 0.10198698937892914, "learning_rate": 4.684989015593547e-06, "loss": 0.6035742163658142, "step": 11172 }, { "epoch": 2.0644870714046495, "grad_norm": 0.08727562427520752, "learning_rate": 4.683299313172418e-06, "loss": 0.406217098236084, "step": 11173 }, { "epoch": 2.0646718481135453, "grad_norm": 0.10919637978076935, "learning_rate": 4.681609822339346e-06, "loss": 0.501842200756073, "step": 11174 }, { "epoch": 2.064856624822441, "grad_norm": 0.09159188717603683, "learning_rate": 4.679920543161559e-06, "loss": 0.5147289633750916, "step": 11175 }, { "epoch": 2.065041401531337, "grad_norm": 0.07231157273054123, "learning_rate": 4.678231475706291e-06, "loss": 0.3952037990093231, "step": 11176 }, { "epoch": 2.065226178240233, "grad_norm": 0.06625118851661682, "learning_rate": 4.6765426200407635e-06, "loss": 0.22810131311416626, "step": 11177 }, { "epoch": 2.0654109549491286, "grad_norm": 0.08682150393724442, "learning_rate": 4.674853976232182e-06, "loss": 0.473644495010376, "step": 11178 }, { "epoch": 2.0655957316580245, "grad_norm": 0.10142802447080612, "learning_rate": 4.673165544347748e-06, "loss": 0.5257569551467896, "step": 11179 }, { "epoch": 2.0657805083669203, "grad_norm": 0.06687464565038681, "learning_rate": 4.6714773244546665e-06, "loss": 0.3753644824028015, "step": 11180 }, { "epoch": 2.065965285075816, "grad_norm": 0.06486748158931732, "learning_rate": 4.669789316620113e-06, "loss": 0.32193541526794434, "step": 11181 }, { "epoch": 2.066150061784712, "grad_norm": 0.08548235893249512, "learning_rate": 4.66810152091127e-06, "loss": 0.47026437520980835, "step": 11182 }, { "epoch": 2.0663348384936078, "grad_norm": 0.11571351438760757, "learning_rate": 4.666413937395308e-06, "loss": 0.5890333652496338, "step": 11183 }, { "epoch": 2.0665196152025036, "grad_norm": 0.10707179456949234, "learning_rate": 4.6647265661393806e-06, "loss": 0.5810878276824951, "step": 11184 }, { "epoch": 2.0667043919113994, "grad_norm": 0.08522901684045792, "learning_rate": 4.663039407210645e-06, "loss": 0.42667514085769653, "step": 11185 }, { "epoch": 2.0668891686202953, "grad_norm": 0.09314467012882233, "learning_rate": 4.661352460676243e-06, "loss": 0.4527299702167511, "step": 11186 }, { "epoch": 2.067073945329191, "grad_norm": 0.07868710905313492, "learning_rate": 4.659665726603309e-06, "loss": 0.42437943816185, "step": 11187 }, { "epoch": 2.067258722038087, "grad_norm": 0.07289140671491623, "learning_rate": 4.657979205058978e-06, "loss": 0.44353288412094116, "step": 11188 }, { "epoch": 2.0674434987469827, "grad_norm": 0.09959884732961655, "learning_rate": 4.656292896110356e-06, "loss": 0.5056084990501404, "step": 11189 }, { "epoch": 2.0676282754558786, "grad_norm": 0.11057322472333908, "learning_rate": 4.6546067998245595e-06, "loss": 0.6372722387313843, "step": 11190 }, { "epoch": 2.067813052164775, "grad_norm": 0.09406181424856186, "learning_rate": 4.6529209162686916e-06, "loss": 0.5327972769737244, "step": 11191 }, { "epoch": 2.0679978288736707, "grad_norm": 0.09712541103363037, "learning_rate": 4.651235245509835e-06, "loss": 0.5264273285865784, "step": 11192 }, { "epoch": 2.0681826055825665, "grad_norm": 0.07588005065917969, "learning_rate": 4.649549787615086e-06, "loss": 0.4727795124053955, "step": 11193 }, { "epoch": 2.0683673822914623, "grad_norm": 0.07036327570676804, "learning_rate": 4.647864542651519e-06, "loss": 0.31954243779182434, "step": 11194 }, { "epoch": 2.068552159000358, "grad_norm": 0.07767093181610107, "learning_rate": 4.646179510686195e-06, "loss": 0.364122211933136, "step": 11195 }, { "epoch": 2.068736935709254, "grad_norm": 0.08550487458705902, "learning_rate": 4.644494691786177e-06, "loss": 0.5416126847267151, "step": 11196 }, { "epoch": 2.06892171241815, "grad_norm": 0.08919231593608856, "learning_rate": 4.642810086018518e-06, "loss": 0.512860894203186, "step": 11197 }, { "epoch": 2.0691064891270456, "grad_norm": 0.08846841007471085, "learning_rate": 4.641125693450253e-06, "loss": 0.4849124550819397, "step": 11198 }, { "epoch": 2.0692912658359415, "grad_norm": 0.07089854776859283, "learning_rate": 4.6394415141484176e-06, "loss": 0.3652840256690979, "step": 11199 }, { "epoch": 2.0694760425448373, "grad_norm": 0.06689655035734177, "learning_rate": 4.637757548180045e-06, "loss": 0.37974056601524353, "step": 11200 }, { "epoch": 2.069660819253733, "grad_norm": 0.05894790217280388, "learning_rate": 4.6360737956121425e-06, "loss": 0.3253614604473114, "step": 11201 }, { "epoch": 2.069845595962629, "grad_norm": 0.1028907522559166, "learning_rate": 4.634390256511725e-06, "loss": 0.5811859369277954, "step": 11202 }, { "epoch": 2.070030372671525, "grad_norm": 0.08756034821271896, "learning_rate": 4.632706930945784e-06, "loss": 0.49308815598487854, "step": 11203 }, { "epoch": 2.0702151493804206, "grad_norm": 0.07366007566452026, "learning_rate": 4.6310238189813165e-06, "loss": 0.43656429648399353, "step": 11204 }, { "epoch": 2.0703999260893164, "grad_norm": 0.09568152576684952, "learning_rate": 4.629340920685302e-06, "loss": 0.5442222356796265, "step": 11205 }, { "epoch": 2.0705847027982123, "grad_norm": 0.08735237270593643, "learning_rate": 4.627658236124717e-06, "loss": 0.5923407673835754, "step": 11206 }, { "epoch": 2.070769479507108, "grad_norm": 0.06391552090644836, "learning_rate": 4.625975765366527e-06, "loss": 0.3378419280052185, "step": 11207 }, { "epoch": 2.070954256216004, "grad_norm": 0.0819556936621666, "learning_rate": 4.624293508477691e-06, "loss": 0.3597787022590637, "step": 11208 }, { "epoch": 2.0711390329248998, "grad_norm": 0.10062183439731598, "learning_rate": 4.622611465525152e-06, "loss": 0.5810568332672119, "step": 11209 }, { "epoch": 2.0713238096337956, "grad_norm": 0.0777035653591156, "learning_rate": 4.620929636575852e-06, "loss": 0.38935208320617676, "step": 11210 }, { "epoch": 2.0715085863426914, "grad_norm": 0.09280351549386978, "learning_rate": 4.619248021696728e-06, "loss": 0.5045894980430603, "step": 11211 }, { "epoch": 2.0716933630515872, "grad_norm": 0.0894412100315094, "learning_rate": 4.617566620954691e-06, "loss": 0.39157426357269287, "step": 11212 }, { "epoch": 2.071878139760483, "grad_norm": 0.056693993508815765, "learning_rate": 4.615885434416667e-06, "loss": 0.3080008625984192, "step": 11213 }, { "epoch": 2.072062916469379, "grad_norm": 0.07515745609998703, "learning_rate": 4.614204462149561e-06, "loss": 0.5135540962219238, "step": 11214 }, { "epoch": 2.0722476931782747, "grad_norm": 0.09220051020383835, "learning_rate": 4.612523704220264e-06, "loss": 0.4865436851978302, "step": 11215 }, { "epoch": 2.0724324698871706, "grad_norm": 0.10826101899147034, "learning_rate": 4.610843160695668e-06, "loss": 0.5282402634620667, "step": 11216 }, { "epoch": 2.0726172465960664, "grad_norm": 0.08101743459701538, "learning_rate": 4.6091628316426585e-06, "loss": 0.39346298575401306, "step": 11217 }, { "epoch": 2.072802023304962, "grad_norm": 0.07107812911272049, "learning_rate": 4.607482717128098e-06, "loss": 0.4143473207950592, "step": 11218 }, { "epoch": 2.072986800013858, "grad_norm": 0.07952581346035004, "learning_rate": 4.605802817218855e-06, "loss": 0.439717561006546, "step": 11219 }, { "epoch": 2.0731715767227543, "grad_norm": 0.06694263964891434, "learning_rate": 4.604123131981782e-06, "loss": 0.3764767050743103, "step": 11220 }, { "epoch": 2.07335635343165, "grad_norm": 0.05754847824573517, "learning_rate": 4.602443661483729e-06, "loss": 0.32524630427360535, "step": 11221 }, { "epoch": 2.073541130140546, "grad_norm": 0.07798845320940018, "learning_rate": 4.600764405791533e-06, "loss": 0.39575040340423584, "step": 11222 }, { "epoch": 2.073725906849442, "grad_norm": 0.06815237551927567, "learning_rate": 4.59908536497202e-06, "loss": 0.37505558133125305, "step": 11223 }, { "epoch": 2.0739106835583376, "grad_norm": 0.08333446830511093, "learning_rate": 4.597406539092011e-06, "loss": 0.41966623067855835, "step": 11224 }, { "epoch": 2.0740954602672335, "grad_norm": 0.08505209535360336, "learning_rate": 4.595727928218319e-06, "loss": 0.4656205475330353, "step": 11225 }, { "epoch": 2.0742802369761293, "grad_norm": 0.08556347340345383, "learning_rate": 4.594049532417748e-06, "loss": 0.3957214951515198, "step": 11226 }, { "epoch": 2.074465013685025, "grad_norm": 0.06823302060365677, "learning_rate": 4.592371351757093e-06, "loss": 0.3491239845752716, "step": 11227 }, { "epoch": 2.074649790393921, "grad_norm": 0.07873357087373734, "learning_rate": 4.590693386303143e-06, "loss": 0.45640286803245544, "step": 11228 }, { "epoch": 2.0748345671028168, "grad_norm": 0.09663490951061249, "learning_rate": 4.589015636122669e-06, "loss": 0.5493663549423218, "step": 11229 }, { "epoch": 2.0750193438117126, "grad_norm": 0.06493667513132095, "learning_rate": 4.587338101282443e-06, "loss": 0.41553476452827454, "step": 11230 }, { "epoch": 2.0752041205206084, "grad_norm": 0.08128707110881805, "learning_rate": 4.585660781849233e-06, "loss": 0.45945361256599426, "step": 11231 }, { "epoch": 2.0753888972295043, "grad_norm": 0.05496186390519142, "learning_rate": 4.583983677889775e-06, "loss": 0.30561724305152893, "step": 11232 }, { "epoch": 2.0755736739384, "grad_norm": 0.05655686557292938, "learning_rate": 4.582306789470826e-06, "loss": 0.341400146484375, "step": 11233 }, { "epoch": 2.075758450647296, "grad_norm": 0.083131805062294, "learning_rate": 4.5806301166591214e-06, "loss": 0.6002212762832642, "step": 11234 }, { "epoch": 2.0759432273561917, "grad_norm": 0.07641672343015671, "learning_rate": 4.578953659521379e-06, "loss": 0.5141034722328186, "step": 11235 }, { "epoch": 2.0761280040650876, "grad_norm": 0.07359210401773453, "learning_rate": 4.577277418124324e-06, "loss": 0.40362802147865295, "step": 11236 }, { "epoch": 2.0763127807739834, "grad_norm": 0.09323938935995102, "learning_rate": 4.575601392534659e-06, "loss": 0.5022433400154114, "step": 11237 }, { "epoch": 2.0764975574828792, "grad_norm": 0.06990652531385422, "learning_rate": 4.573925582819088e-06, "loss": 0.4661812484264374, "step": 11238 }, { "epoch": 2.076682334191775, "grad_norm": 0.09031682461500168, "learning_rate": 4.5722499890443015e-06, "loss": 0.46780532598495483, "step": 11239 }, { "epoch": 2.076867110900671, "grad_norm": 0.06735450774431229, "learning_rate": 4.570574611276986e-06, "loss": 0.34275004267692566, "step": 11240 }, { "epoch": 2.0770518876095667, "grad_norm": 0.07152236998081207, "learning_rate": 4.568899449583814e-06, "loss": 0.3821204602718353, "step": 11241 }, { "epoch": 2.0772366643184625, "grad_norm": 0.0831417590379715, "learning_rate": 4.567224504031455e-06, "loss": 0.47654974460601807, "step": 11242 }, { "epoch": 2.0774214410273584, "grad_norm": 0.08847978711128235, "learning_rate": 4.565549774686561e-06, "loss": 0.4934597611427307, "step": 11243 }, { "epoch": 2.077606217736254, "grad_norm": 0.09545759856700897, "learning_rate": 4.563875261615782e-06, "loss": 0.4834637939929962, "step": 11244 }, { "epoch": 2.07779099444515, "grad_norm": 0.08540671318769455, "learning_rate": 4.5622009648857625e-06, "loss": 0.4346141815185547, "step": 11245 }, { "epoch": 2.077975771154046, "grad_norm": 0.07556750625371933, "learning_rate": 4.56052688456313e-06, "loss": 0.38500696420669556, "step": 11246 }, { "epoch": 2.0781605478629417, "grad_norm": 0.08311685919761658, "learning_rate": 4.558853020714511e-06, "loss": 0.5165578126907349, "step": 11247 }, { "epoch": 2.0783453245718375, "grad_norm": 0.09188653528690338, "learning_rate": 4.557179373406521e-06, "loss": 0.5210410356521606, "step": 11248 }, { "epoch": 2.078530101280734, "grad_norm": 0.07977807521820068, "learning_rate": 4.555505942705761e-06, "loss": 0.43148764967918396, "step": 11249 }, { "epoch": 2.0787148779896296, "grad_norm": 0.09601611644029617, "learning_rate": 4.5538327286788305e-06, "loss": 0.5566371083259583, "step": 11250 }, { "epoch": 2.0788996546985254, "grad_norm": 0.08546191453933716, "learning_rate": 4.552159731392323e-06, "loss": 0.5322090983390808, "step": 11251 }, { "epoch": 2.0790844314074213, "grad_norm": 0.07977309823036194, "learning_rate": 4.550486950912806e-06, "loss": 0.3500477373600006, "step": 11252 }, { "epoch": 2.079269208116317, "grad_norm": 0.07240080088376999, "learning_rate": 4.548814387306866e-06, "loss": 0.36769571900367737, "step": 11253 }, { "epoch": 2.079453984825213, "grad_norm": 0.06968306005001068, "learning_rate": 4.547142040641055e-06, "loss": 0.3258252739906311, "step": 11254 }, { "epoch": 2.0796387615341088, "grad_norm": 0.060997962951660156, "learning_rate": 4.545469910981931e-06, "loss": 0.34021857380867004, "step": 11255 }, { "epoch": 2.0798235382430046, "grad_norm": 0.06318764388561249, "learning_rate": 4.543797998396045e-06, "loss": 0.3376246690750122, "step": 11256 }, { "epoch": 2.0800083149519004, "grad_norm": 0.06657318770885468, "learning_rate": 4.542126302949922e-06, "loss": 0.2888146638870239, "step": 11257 }, { "epoch": 2.0801930916607962, "grad_norm": 0.09087678045034409, "learning_rate": 4.540454824710099e-06, "loss": 0.4406352639198303, "step": 11258 }, { "epoch": 2.080377868369692, "grad_norm": 0.07760107517242432, "learning_rate": 4.538783563743091e-06, "loss": 0.35341158509254456, "step": 11259 }, { "epoch": 2.080562645078588, "grad_norm": 0.06082098186016083, "learning_rate": 4.537112520115413e-06, "loss": 0.30027127265930176, "step": 11260 }, { "epoch": 2.0807474217874837, "grad_norm": 0.06878552585840225, "learning_rate": 4.535441693893565e-06, "loss": 0.36671727895736694, "step": 11261 }, { "epoch": 2.0809321984963796, "grad_norm": 0.07365518808364868, "learning_rate": 4.5337710851440445e-06, "loss": 0.3336552381515503, "step": 11262 }, { "epoch": 2.0811169752052754, "grad_norm": 0.08688491582870483, "learning_rate": 4.53210069393333e-06, "loss": 0.48835572600364685, "step": 11263 }, { "epoch": 2.081301751914171, "grad_norm": 0.09472530335187912, "learning_rate": 4.5304305203279005e-06, "loss": 0.48959553241729736, "step": 11264 }, { "epoch": 2.081486528623067, "grad_norm": 0.0924774780869484, "learning_rate": 4.528760564394225e-06, "loss": 0.46644827723503113, "step": 11265 }, { "epoch": 2.081671305331963, "grad_norm": 0.07498573511838913, "learning_rate": 4.527090826198761e-06, "loss": 0.3633112609386444, "step": 11266 }, { "epoch": 2.0818560820408587, "grad_norm": 0.10801418870687485, "learning_rate": 4.525421305807963e-06, "loss": 0.48465314507484436, "step": 11267 }, { "epoch": 2.0820408587497545, "grad_norm": 0.08142693340778351, "learning_rate": 4.523752003288267e-06, "loss": 0.49605080485343933, "step": 11268 }, { "epoch": 2.0822256354586504, "grad_norm": 0.08006380498409271, "learning_rate": 4.5220829187061065e-06, "loss": 0.45951616764068604, "step": 11269 }, { "epoch": 2.082410412167546, "grad_norm": 0.09330236911773682, "learning_rate": 4.520414052127913e-06, "loss": 0.48875072598457336, "step": 11270 }, { "epoch": 2.082595188876442, "grad_norm": 0.09738299995660782, "learning_rate": 4.518745403620088e-06, "loss": 0.5769286751747131, "step": 11271 }, { "epoch": 2.082779965585338, "grad_norm": 0.08468339592218399, "learning_rate": 4.517076973249052e-06, "loss": 0.5612066388130188, "step": 11272 }, { "epoch": 2.0829647422942337, "grad_norm": 0.08240416646003723, "learning_rate": 4.5154087610812016e-06, "loss": 0.513605535030365, "step": 11273 }, { "epoch": 2.0831495190031295, "grad_norm": 0.06436806917190552, "learning_rate": 4.51374076718292e-06, "loss": 0.3433886170387268, "step": 11274 }, { "epoch": 2.0833342957120253, "grad_norm": 0.08961392194032669, "learning_rate": 4.512072991620592e-06, "loss": 0.5268140435218811, "step": 11275 }, { "epoch": 2.083519072420921, "grad_norm": 0.1014905646443367, "learning_rate": 4.510405434460592e-06, "loss": 0.5389054417610168, "step": 11276 }, { "epoch": 2.083703849129817, "grad_norm": 0.07252752780914307, "learning_rate": 4.508738095769278e-06, "loss": 0.3551664650440216, "step": 11277 }, { "epoch": 2.083888625838713, "grad_norm": 0.07098570466041565, "learning_rate": 4.507070975613009e-06, "loss": 0.3488197922706604, "step": 11278 }, { "epoch": 2.084073402547609, "grad_norm": 0.06824567168951035, "learning_rate": 4.505404074058127e-06, "loss": 0.3708655834197998, "step": 11279 }, { "epoch": 2.084258179256505, "grad_norm": 0.08788527548313141, "learning_rate": 4.503737391170975e-06, "loss": 0.481997549533844, "step": 11280 }, { "epoch": 2.0844429559654007, "grad_norm": 0.07096780091524124, "learning_rate": 4.502070927017879e-06, "loss": 0.4012865424156189, "step": 11281 }, { "epoch": 2.0846277326742966, "grad_norm": 0.10750937461853027, "learning_rate": 4.500404681665161e-06, "loss": 0.5957963466644287, "step": 11282 }, { "epoch": 2.0848125093831924, "grad_norm": 0.08033590018749237, "learning_rate": 4.498738655179129e-06, "loss": 0.3472810685634613, "step": 11283 }, { "epoch": 2.0849972860920882, "grad_norm": 0.06028683856129646, "learning_rate": 4.497072847626087e-06, "loss": 0.3033576011657715, "step": 11284 }, { "epoch": 2.085182062800984, "grad_norm": 0.09236586093902588, "learning_rate": 4.4954072590723285e-06, "loss": 0.4759647846221924, "step": 11285 }, { "epoch": 2.08536683950988, "grad_norm": 0.09992455691099167, "learning_rate": 4.49374188958414e-06, "loss": 0.5862817764282227, "step": 11286 }, { "epoch": 2.0855516162187757, "grad_norm": 0.09145543724298477, "learning_rate": 4.492076739227802e-06, "loss": 0.5127483606338501, "step": 11287 }, { "epoch": 2.0857363929276715, "grad_norm": 0.07276683300733566, "learning_rate": 4.490411808069573e-06, "loss": 0.3518741726875305, "step": 11288 }, { "epoch": 2.0859211696365674, "grad_norm": 0.09228905290365219, "learning_rate": 4.488747096175717e-06, "loss": 0.568116307258606, "step": 11289 }, { "epoch": 2.086105946345463, "grad_norm": 0.08240782469511032, "learning_rate": 4.487082603612487e-06, "loss": 0.3835344910621643, "step": 11290 }, { "epoch": 2.086290723054359, "grad_norm": 0.09818808734416962, "learning_rate": 4.485418330446114e-06, "loss": 0.4284479022026062, "step": 11291 }, { "epoch": 2.086475499763255, "grad_norm": 0.07579676806926727, "learning_rate": 4.4837542767428436e-06, "loss": 0.3927929103374481, "step": 11292 }, { "epoch": 2.0866602764721507, "grad_norm": 0.07215126603841782, "learning_rate": 4.482090442568898e-06, "loss": 0.4338514506816864, "step": 11293 }, { "epoch": 2.0868450531810465, "grad_norm": 0.07936978340148926, "learning_rate": 4.480426827990486e-06, "loss": 0.42621442675590515, "step": 11294 }, { "epoch": 2.0870298298899423, "grad_norm": 0.08208302408456802, "learning_rate": 4.478763433073817e-06, "loss": 0.42807069420814514, "step": 11295 }, { "epoch": 2.087214606598838, "grad_norm": 0.10644751042127609, "learning_rate": 4.477100257885094e-06, "loss": 0.6804676055908203, "step": 11296 }, { "epoch": 2.087399383307734, "grad_norm": 0.08382293581962585, "learning_rate": 4.475437302490498e-06, "loss": 0.4962066411972046, "step": 11297 }, { "epoch": 2.08758416001663, "grad_norm": 0.09304467588663101, "learning_rate": 4.473774566956213e-06, "loss": 0.46378782391548157, "step": 11298 }, { "epoch": 2.0877689367255257, "grad_norm": 0.10059910267591476, "learning_rate": 4.472112051348411e-06, "loss": 0.5793818235397339, "step": 11299 }, { "epoch": 2.0879537134344215, "grad_norm": 0.06869319081306458, "learning_rate": 4.470449755733255e-06, "loss": 0.29588621854782104, "step": 11300 }, { "epoch": 2.0881384901433173, "grad_norm": 0.0781109407544136, "learning_rate": 4.4687876801769025e-06, "loss": 0.4064006507396698, "step": 11301 }, { "epoch": 2.088323266852213, "grad_norm": 0.08586934953927994, "learning_rate": 4.467125824745492e-06, "loss": 0.5395097136497498, "step": 11302 }, { "epoch": 2.088508043561109, "grad_norm": 0.07244936376810074, "learning_rate": 4.465464189505163e-06, "loss": 0.3408164978027344, "step": 11303 }, { "epoch": 2.088692820270005, "grad_norm": 0.07981320470571518, "learning_rate": 4.463802774522044e-06, "loss": 0.3848876357078552, "step": 11304 }, { "epoch": 2.0888775969789006, "grad_norm": 0.07874210178852081, "learning_rate": 4.462141579862254e-06, "loss": 0.46407175064086914, "step": 11305 }, { "epoch": 2.0890623736877965, "grad_norm": 0.06232302635908127, "learning_rate": 4.460480605591904e-06, "loss": 0.2524704933166504, "step": 11306 }, { "epoch": 2.0892471503966923, "grad_norm": 0.08226414024829865, "learning_rate": 4.458819851777097e-06, "loss": 0.35842904448509216, "step": 11307 }, { "epoch": 2.089431927105588, "grad_norm": 0.0843888595700264, "learning_rate": 4.457159318483922e-06, "loss": 0.44134512543678284, "step": 11308 }, { "epoch": 2.0896167038144844, "grad_norm": 0.07801353186368942, "learning_rate": 4.455499005778464e-06, "loss": 0.3788571357727051, "step": 11309 }, { "epoch": 2.08980148052338, "grad_norm": 0.07737331092357635, "learning_rate": 4.453838913726803e-06, "loss": 0.40529578924179077, "step": 11310 }, { "epoch": 2.089986257232276, "grad_norm": 0.0766071155667305, "learning_rate": 4.452179042394994e-06, "loss": 0.4201054871082306, "step": 11311 }, { "epoch": 2.090171033941172, "grad_norm": 0.07338808476924896, "learning_rate": 4.450519391849106e-06, "loss": 0.3696323037147522, "step": 11312 }, { "epoch": 2.0903558106500677, "grad_norm": 0.0812407061457634, "learning_rate": 4.4488599621551876e-06, "loss": 0.5269801020622253, "step": 11313 }, { "epoch": 2.0905405873589635, "grad_norm": 0.07736600190401077, "learning_rate": 4.447200753379273e-06, "loss": 0.4681062400341034, "step": 11314 }, { "epoch": 2.0907253640678594, "grad_norm": 0.07834678888320923, "learning_rate": 4.445541765587394e-06, "loss": 0.40067198872566223, "step": 11315 }, { "epoch": 2.090910140776755, "grad_norm": 0.0707080066204071, "learning_rate": 4.44388299884558e-06, "loss": 0.38395214080810547, "step": 11316 }, { "epoch": 2.091094917485651, "grad_norm": 0.10281843692064285, "learning_rate": 4.442224453219836e-06, "loss": 0.5071035027503967, "step": 11317 }, { "epoch": 2.091279694194547, "grad_norm": 0.1025250256061554, "learning_rate": 4.44056612877617e-06, "loss": 0.5465942025184631, "step": 11318 }, { "epoch": 2.0914644709034427, "grad_norm": 0.09008805453777313, "learning_rate": 4.438908025580578e-06, "loss": 0.5358907580375671, "step": 11319 }, { "epoch": 2.0916492476123385, "grad_norm": 0.06519326567649841, "learning_rate": 4.43725014369905e-06, "loss": 0.3326597511768341, "step": 11320 }, { "epoch": 2.0918340243212343, "grad_norm": 0.09936525672674179, "learning_rate": 4.4355924831975665e-06, "loss": 0.4908013343811035, "step": 11321 }, { "epoch": 2.09201880103013, "grad_norm": 0.08073471486568451, "learning_rate": 4.433935044142088e-06, "loss": 0.3885650932788849, "step": 11322 }, { "epoch": 2.092203577739026, "grad_norm": 0.09077001363039017, "learning_rate": 4.432277826598582e-06, "loss": 0.4323769807815552, "step": 11323 }, { "epoch": 2.092388354447922, "grad_norm": 0.11247825622558594, "learning_rate": 4.430620830632999e-06, "loss": 0.6296273469924927, "step": 11324 }, { "epoch": 2.0925731311568176, "grad_norm": 0.08233305811882019, "learning_rate": 4.428964056311282e-06, "loss": 0.4658873975276947, "step": 11325 }, { "epoch": 2.0927579078657135, "grad_norm": 0.07252024859189987, "learning_rate": 4.427307503699368e-06, "loss": 0.41115713119506836, "step": 11326 }, { "epoch": 2.0929426845746093, "grad_norm": 0.10351111739873886, "learning_rate": 4.425651172863181e-06, "loss": 0.35353049635887146, "step": 11327 }, { "epoch": 2.093127461283505, "grad_norm": 0.08197803050279617, "learning_rate": 4.423995063868637e-06, "loss": 0.38247957825660706, "step": 11328 }, { "epoch": 2.093312237992401, "grad_norm": 0.09849874675273895, "learning_rate": 4.422339176781643e-06, "loss": 0.4812338054180145, "step": 11329 }, { "epoch": 2.093497014701297, "grad_norm": 0.06511776149272919, "learning_rate": 4.420683511668102e-06, "loss": 0.29902955889701843, "step": 11330 }, { "epoch": 2.0936817914101926, "grad_norm": 0.05650252476334572, "learning_rate": 4.419028068593896e-06, "loss": 0.23297971487045288, "step": 11331 }, { "epoch": 2.0938665681190884, "grad_norm": 0.09680482000112534, "learning_rate": 4.417372847624915e-06, "loss": 0.5279828906059265, "step": 11332 }, { "epoch": 2.0940513448279843, "grad_norm": 0.05814513936638832, "learning_rate": 4.415717848827034e-06, "loss": 0.26471182703971863, "step": 11333 }, { "epoch": 2.09423612153688, "grad_norm": 0.07361941784620285, "learning_rate": 4.414063072266107e-06, "loss": 0.3733961880207062, "step": 11334 }, { "epoch": 2.094420898245776, "grad_norm": 0.07856351882219315, "learning_rate": 4.412408518007998e-06, "loss": 0.44502511620521545, "step": 11335 }, { "epoch": 2.0946056749546718, "grad_norm": 0.09728807955980301, "learning_rate": 4.410754186118543e-06, "loss": 0.5463005304336548, "step": 11336 }, { "epoch": 2.0947904516635676, "grad_norm": 0.08082041144371033, "learning_rate": 4.409100076663587e-06, "loss": 0.44808241724967957, "step": 11337 }, { "epoch": 2.094975228372464, "grad_norm": 0.0760640949010849, "learning_rate": 4.407446189708955e-06, "loss": 0.31200912594795227, "step": 11338 }, { "epoch": 2.0951600050813597, "grad_norm": 0.10331191122531891, "learning_rate": 4.405792525320469e-06, "loss": 0.565214991569519, "step": 11339 }, { "epoch": 2.0953447817902555, "grad_norm": 0.07570210099220276, "learning_rate": 4.404139083563937e-06, "loss": 0.4376543164253235, "step": 11340 }, { "epoch": 2.0955295584991513, "grad_norm": 0.07459602504968643, "learning_rate": 4.402485864505167e-06, "loss": 0.3753867447376251, "step": 11341 }, { "epoch": 2.095714335208047, "grad_norm": 0.09447022527456284, "learning_rate": 4.4008328682099436e-06, "loss": 0.4516494572162628, "step": 11342 }, { "epoch": 2.095899111916943, "grad_norm": 0.07076124846935272, "learning_rate": 4.399180094744053e-06, "loss": 0.45155102014541626, "step": 11343 }, { "epoch": 2.096083888625839, "grad_norm": 0.08209472894668579, "learning_rate": 4.397527544173273e-06, "loss": 0.43991395831108093, "step": 11344 }, { "epoch": 2.0962686653347347, "grad_norm": 0.10229241102933884, "learning_rate": 4.39587521656337e-06, "loss": 0.5501560568809509, "step": 11345 }, { "epoch": 2.0964534420436305, "grad_norm": 0.08802369236946106, "learning_rate": 4.394223111980099e-06, "loss": 0.4686582088470459, "step": 11346 }, { "epoch": 2.0966382187525263, "grad_norm": 0.08246442675590515, "learning_rate": 4.392571230489214e-06, "loss": 0.4176312983036041, "step": 11347 }, { "epoch": 2.096822995461422, "grad_norm": 0.07992678135633469, "learning_rate": 4.390919572156447e-06, "loss": 0.4794798791408539, "step": 11348 }, { "epoch": 2.097007772170318, "grad_norm": 0.09510378539562225, "learning_rate": 4.389268137047535e-06, "loss": 0.6322532892227173, "step": 11349 }, { "epoch": 2.097192548879214, "grad_norm": 0.10204022377729416, "learning_rate": 4.387616925228195e-06, "loss": 0.5359669923782349, "step": 11350 }, { "epoch": 2.0973773255881096, "grad_norm": 0.07107967138290405, "learning_rate": 4.385965936764138e-06, "loss": 0.34690505266189575, "step": 11351 }, { "epoch": 2.0975621022970055, "grad_norm": 0.07835035026073456, "learning_rate": 4.384315171721081e-06, "loss": 0.4288128614425659, "step": 11352 }, { "epoch": 2.0977468790059013, "grad_norm": 0.08641502261161804, "learning_rate": 4.382664630164707e-06, "loss": 0.4066121578216553, "step": 11353 }, { "epoch": 2.097931655714797, "grad_norm": 0.11038441210985184, "learning_rate": 4.381014312160706e-06, "loss": 0.6246651411056519, "step": 11354 }, { "epoch": 2.098116432423693, "grad_norm": 0.07031238079071045, "learning_rate": 4.37936421777476e-06, "loss": 0.3774273991584778, "step": 11355 }, { "epoch": 2.0983012091325888, "grad_norm": 0.09349191188812256, "learning_rate": 4.377714347072529e-06, "loss": 0.4103989005088806, "step": 11356 }, { "epoch": 2.0984859858414846, "grad_norm": 0.07701905071735382, "learning_rate": 4.376064700119678e-06, "loss": 0.47127047181129456, "step": 11357 }, { "epoch": 2.0986707625503804, "grad_norm": 0.09151667356491089, "learning_rate": 4.374415276981856e-06, "loss": 0.5917862057685852, "step": 11358 }, { "epoch": 2.0988555392592763, "grad_norm": 0.09142620861530304, "learning_rate": 4.372766077724706e-06, "loss": 0.5134718418121338, "step": 11359 }, { "epoch": 2.099040315968172, "grad_norm": 0.0978093072772026, "learning_rate": 4.371117102413861e-06, "loss": 0.4793660044670105, "step": 11360 }, { "epoch": 2.099225092677068, "grad_norm": 0.07756274193525314, "learning_rate": 4.369468351114949e-06, "loss": 0.4334990382194519, "step": 11361 }, { "epoch": 2.0994098693859637, "grad_norm": 0.0866687074303627, "learning_rate": 4.367819823893575e-06, "loss": 0.6182705163955688, "step": 11362 }, { "epoch": 2.0995946460948596, "grad_norm": 0.0961247980594635, "learning_rate": 4.366171520815353e-06, "loss": 0.5028599500656128, "step": 11363 }, { "epoch": 2.0997794228037554, "grad_norm": 0.08079350739717484, "learning_rate": 4.364523441945878e-06, "loss": 0.41719940304756165, "step": 11364 }, { "epoch": 2.0999641995126512, "grad_norm": 0.074763722717762, "learning_rate": 4.36287558735074e-06, "loss": 0.40286093950271606, "step": 11365 }, { "epoch": 2.100148976221547, "grad_norm": 0.08648432046175003, "learning_rate": 4.361227957095519e-06, "loss": 0.5090119242668152, "step": 11366 }, { "epoch": 2.1003337529304433, "grad_norm": 0.07295078039169312, "learning_rate": 4.359580551245782e-06, "loss": 0.35367390513420105, "step": 11367 }, { "epoch": 2.100518529639339, "grad_norm": 0.10807738453149796, "learning_rate": 4.357933369867092e-06, "loss": 0.5633283257484436, "step": 11368 }, { "epoch": 2.100703306348235, "grad_norm": 0.08042987436056137, "learning_rate": 4.356286413025006e-06, "loss": 0.43587690591812134, "step": 11369 }, { "epoch": 2.100888083057131, "grad_norm": 0.07895466685295105, "learning_rate": 4.354639680785059e-06, "loss": 0.46001380681991577, "step": 11370 }, { "epoch": 2.1010728597660266, "grad_norm": 0.08027620613574982, "learning_rate": 4.3529931732127884e-06, "loss": 0.41186389327049255, "step": 11371 }, { "epoch": 2.1012576364749225, "grad_norm": 0.06099643185734749, "learning_rate": 4.3513468903737285e-06, "loss": 0.2797749936580658, "step": 11372 }, { "epoch": 2.1014424131838183, "grad_norm": 0.08282571285963058, "learning_rate": 4.349700832333387e-06, "loss": 0.46589988470077515, "step": 11373 }, { "epoch": 2.101627189892714, "grad_norm": 0.08938560634851456, "learning_rate": 4.348054999157274e-06, "loss": 0.37464770674705505, "step": 11374 }, { "epoch": 2.10181196660161, "grad_norm": 0.06825102865695953, "learning_rate": 4.346409390910894e-06, "loss": 0.2708218991756439, "step": 11375 }, { "epoch": 2.101996743310506, "grad_norm": 0.06989553570747375, "learning_rate": 4.3447640076597284e-06, "loss": 0.45829659700393677, "step": 11376 }, { "epoch": 2.1021815200194016, "grad_norm": 0.07006486505270004, "learning_rate": 4.343118849469262e-06, "loss": 0.3486316204071045, "step": 11377 }, { "epoch": 2.1023662967282974, "grad_norm": 0.08205582946538925, "learning_rate": 4.341473916404968e-06, "loss": 0.4590587615966797, "step": 11378 }, { "epoch": 2.1025510734371933, "grad_norm": 0.08832045644521713, "learning_rate": 4.339829208532309e-06, "loss": 0.4677901566028595, "step": 11379 }, { "epoch": 2.102735850146089, "grad_norm": 0.10020288825035095, "learning_rate": 4.3381847259167385e-06, "loss": 0.6569520831108093, "step": 11380 }, { "epoch": 2.102920626854985, "grad_norm": 0.1064857766032219, "learning_rate": 4.336540468623706e-06, "loss": 0.5721414685249329, "step": 11381 }, { "epoch": 2.1031054035638808, "grad_norm": 0.08472268283367157, "learning_rate": 4.3348964367186405e-06, "loss": 0.4144759774208069, "step": 11382 }, { "epoch": 2.1032901802727766, "grad_norm": 0.08525696396827698, "learning_rate": 4.333252630266973e-06, "loss": 0.4409826397895813, "step": 11383 }, { "epoch": 2.1034749569816724, "grad_norm": 0.06475947797298431, "learning_rate": 4.331609049334123e-06, "loss": 0.22162888944149017, "step": 11384 }, { "epoch": 2.1036597336905682, "grad_norm": 0.07369149476289749, "learning_rate": 4.3299656939854974e-06, "loss": 0.3384570777416229, "step": 11385 }, { "epoch": 2.103844510399464, "grad_norm": 0.07061440497636795, "learning_rate": 4.328322564286501e-06, "loss": 0.386101633310318, "step": 11386 }, { "epoch": 2.10402928710836, "grad_norm": 0.0843283012509346, "learning_rate": 4.3266796603025194e-06, "loss": 0.4622347354888916, "step": 11387 }, { "epoch": 2.1042140638172557, "grad_norm": 0.0684337466955185, "learning_rate": 4.325036982098938e-06, "loss": 0.40077027678489685, "step": 11388 }, { "epoch": 2.1043988405261516, "grad_norm": 0.09900067746639252, "learning_rate": 4.323394529741134e-06, "loss": 0.5515775084495544, "step": 11389 }, { "epoch": 2.1045836172350474, "grad_norm": 0.09377042949199677, "learning_rate": 4.321752303294463e-06, "loss": 0.43071335554122925, "step": 11390 }, { "epoch": 2.104768393943943, "grad_norm": 0.09065300226211548, "learning_rate": 4.320110302824283e-06, "loss": 0.3990553319454193, "step": 11391 }, { "epoch": 2.104953170652839, "grad_norm": 0.09074597805738449, "learning_rate": 4.318468528395949e-06, "loss": 0.48185208439826965, "step": 11392 }, { "epoch": 2.105137947361735, "grad_norm": 0.07206054031848907, "learning_rate": 4.3168269800747896e-06, "loss": 0.4884740710258484, "step": 11393 }, { "epoch": 2.1053227240706307, "grad_norm": 0.08428829163312912, "learning_rate": 4.315185657926135e-06, "loss": 0.40511807799339294, "step": 11394 }, { "epoch": 2.1055075007795265, "grad_norm": 0.10548090189695358, "learning_rate": 4.31354456201531e-06, "loss": 0.6051211357116699, "step": 11395 }, { "epoch": 2.105692277488423, "grad_norm": 0.0688234344124794, "learning_rate": 4.311903692407617e-06, "loss": 0.3551594614982605, "step": 11396 }, { "epoch": 2.1058770541973186, "grad_norm": 0.09894660115242004, "learning_rate": 4.3102630491683615e-06, "loss": 0.49841412901878357, "step": 11397 }, { "epoch": 2.1060618309062145, "grad_norm": 0.08894941210746765, "learning_rate": 4.308622632362836e-06, "loss": 0.5526653528213501, "step": 11398 }, { "epoch": 2.1062466076151103, "grad_norm": 0.08873556554317474, "learning_rate": 4.3069824420563235e-06, "loss": 0.4127436876296997, "step": 11399 }, { "epoch": 2.106431384324006, "grad_norm": 0.07422003149986267, "learning_rate": 4.305342478314102e-06, "loss": 0.5035715699195862, "step": 11400 }, { "epoch": 2.106616161032902, "grad_norm": 0.07391268759965897, "learning_rate": 4.303702741201431e-06, "loss": 0.44224563241004944, "step": 11401 }, { "epoch": 2.1068009377417978, "grad_norm": 0.0754476934671402, "learning_rate": 4.302063230783568e-06, "loss": 0.421756386756897, "step": 11402 }, { "epoch": 2.1069857144506936, "grad_norm": 0.07438241690397263, "learning_rate": 4.3004239471257625e-06, "loss": 0.4200321137905121, "step": 11403 }, { "epoch": 2.1071704911595894, "grad_norm": 0.0940864086151123, "learning_rate": 4.2987848902932516e-06, "loss": 0.4505451023578644, "step": 11404 }, { "epoch": 2.1073552678684853, "grad_norm": 0.0756436213850975, "learning_rate": 4.297146060351266e-06, "loss": 0.2705419361591339, "step": 11405 }, { "epoch": 2.107540044577381, "grad_norm": 0.10930308699607849, "learning_rate": 4.295507457365029e-06, "loss": 0.4992304742336273, "step": 11406 }, { "epoch": 2.107724821286277, "grad_norm": 0.0841393992304802, "learning_rate": 4.293869081399744e-06, "loss": 0.4608651101589203, "step": 11407 }, { "epoch": 2.1079095979951727, "grad_norm": 0.07042815536260605, "learning_rate": 4.292230932520618e-06, "loss": 0.4713101089000702, "step": 11408 }, { "epoch": 2.1080943747040686, "grad_norm": 0.08705121278762817, "learning_rate": 4.2905930107928465e-06, "loss": 0.4724443554878235, "step": 11409 }, { "epoch": 2.1082791514129644, "grad_norm": 0.06809006631374359, "learning_rate": 4.288955316281608e-06, "loss": 0.35661178827285767, "step": 11410 }, { "epoch": 2.1084639281218602, "grad_norm": 0.06043905392289162, "learning_rate": 4.287317849052075e-06, "loss": 0.3042537569999695, "step": 11411 }, { "epoch": 2.108648704830756, "grad_norm": 0.09773826599121094, "learning_rate": 4.285680609169428e-06, "loss": 0.5207033157348633, "step": 11412 }, { "epoch": 2.108833481539652, "grad_norm": 0.08604561537504196, "learning_rate": 4.28404359669881e-06, "loss": 0.5575354695320129, "step": 11413 }, { "epoch": 2.1090182582485477, "grad_norm": 0.06699854880571365, "learning_rate": 4.282406811705379e-06, "loss": 0.40921831130981445, "step": 11414 }, { "epoch": 2.1092030349574435, "grad_norm": 0.07288289070129395, "learning_rate": 4.280770254254264e-06, "loss": 0.40244776010513306, "step": 11415 }, { "epoch": 2.1093878116663394, "grad_norm": 0.052294157445430756, "learning_rate": 4.2791339244106e-06, "loss": 0.2464476376771927, "step": 11416 }, { "epoch": 2.109572588375235, "grad_norm": 0.074588343501091, "learning_rate": 4.277497822239507e-06, "loss": 0.36213380098342896, "step": 11417 }, { "epoch": 2.109757365084131, "grad_norm": 0.07086427509784698, "learning_rate": 4.275861947806098e-06, "loss": 0.4147190749645233, "step": 11418 }, { "epoch": 2.109942141793027, "grad_norm": 0.08113404363393784, "learning_rate": 4.274226301175475e-06, "loss": 0.4108676314353943, "step": 11419 }, { "epoch": 2.1101269185019227, "grad_norm": 0.0712827816605568, "learning_rate": 4.272590882412735e-06, "loss": 0.39405569434165955, "step": 11420 }, { "epoch": 2.1103116952108185, "grad_norm": 0.0880788117647171, "learning_rate": 4.270955691582955e-06, "loss": 0.47541195154190063, "step": 11421 }, { "epoch": 2.1104964719197143, "grad_norm": 0.08298428356647491, "learning_rate": 4.2693207287512155e-06, "loss": 0.4658651053905487, "step": 11422 }, { "epoch": 2.11068124862861, "grad_norm": 0.08433230221271515, "learning_rate": 4.267685993982581e-06, "loss": 0.5950572490692139, "step": 11423 }, { "epoch": 2.110866025337506, "grad_norm": 0.07129106670618057, "learning_rate": 4.266051487342111e-06, "loss": 0.3497973382472992, "step": 11424 }, { "epoch": 2.1110508020464023, "grad_norm": 0.08530896157026291, "learning_rate": 4.264417208894851e-06, "loss": 0.4574190378189087, "step": 11425 }, { "epoch": 2.111235578755298, "grad_norm": 0.08308255672454834, "learning_rate": 4.262783158705846e-06, "loss": 0.40405362844467163, "step": 11426 }, { "epoch": 2.111420355464194, "grad_norm": 0.07241486757993698, "learning_rate": 4.2611493368401194e-06, "loss": 0.2930608093738556, "step": 11427 }, { "epoch": 2.1116051321730898, "grad_norm": 0.09505489468574524, "learning_rate": 4.259515743362694e-06, "loss": 0.5223978757858276, "step": 11428 }, { "epoch": 2.1117899088819856, "grad_norm": 0.06957757472991943, "learning_rate": 4.257882378338586e-06, "loss": 0.38343876600265503, "step": 11429 }, { "epoch": 2.1119746855908814, "grad_norm": 0.08739648014307022, "learning_rate": 4.25624924183279e-06, "loss": 0.47917744517326355, "step": 11430 }, { "epoch": 2.1121594622997772, "grad_norm": 0.08945481479167938, "learning_rate": 4.254616333910305e-06, "loss": 0.5491586923599243, "step": 11431 }, { "epoch": 2.112344239008673, "grad_norm": 0.08414768427610397, "learning_rate": 4.252983654636115e-06, "loss": 0.45746487379074097, "step": 11432 }, { "epoch": 2.112529015717569, "grad_norm": 0.08652835339307785, "learning_rate": 4.2513512040751954e-06, "loss": 0.506133496761322, "step": 11433 }, { "epoch": 2.1127137924264647, "grad_norm": 0.09679882228374481, "learning_rate": 4.249718982292517e-06, "loss": 0.5231440663337708, "step": 11434 }, { "epoch": 2.1128985691353606, "grad_norm": 0.10607045888900757, "learning_rate": 4.248086989353027e-06, "loss": 0.46721115708351135, "step": 11435 }, { "epoch": 2.1130833458442564, "grad_norm": 0.09369184076786041, "learning_rate": 4.2464552253216815e-06, "loss": 0.4600922465324402, "step": 11436 }, { "epoch": 2.113268122553152, "grad_norm": 0.09084629267454147, "learning_rate": 4.244823690263417e-06, "loss": 0.4539335072040558, "step": 11437 }, { "epoch": 2.113452899262048, "grad_norm": 0.09121471643447876, "learning_rate": 4.2431923842431635e-06, "loss": 0.4551478326320648, "step": 11438 }, { "epoch": 2.113637675970944, "grad_norm": 0.09681767970323563, "learning_rate": 4.241561307325842e-06, "loss": 0.5657509565353394, "step": 11439 }, { "epoch": 2.1138224526798397, "grad_norm": 0.07663063704967499, "learning_rate": 4.239930459576369e-06, "loss": 0.41186854243278503, "step": 11440 }, { "epoch": 2.1140072293887355, "grad_norm": 0.09556585550308228, "learning_rate": 4.23829984105964e-06, "loss": 0.5589721202850342, "step": 11441 }, { "epoch": 2.1141920060976314, "grad_norm": 0.10073244571685791, "learning_rate": 4.23666945184055e-06, "loss": 0.5649580359458923, "step": 11442 }, { "epoch": 2.114376782806527, "grad_norm": 0.09522378444671631, "learning_rate": 4.235039291983984e-06, "loss": 0.4742183983325958, "step": 11443 }, { "epoch": 2.114561559515423, "grad_norm": 0.10242415219545364, "learning_rate": 4.23340936155482e-06, "loss": 0.549902617931366, "step": 11444 }, { "epoch": 2.114746336224319, "grad_norm": 0.08314938098192215, "learning_rate": 4.231779660617921e-06, "loss": 0.478944331407547, "step": 11445 }, { "epoch": 2.1149311129332147, "grad_norm": 0.07818952202796936, "learning_rate": 4.230150189238148e-06, "loss": 0.4710225760936737, "step": 11446 }, { "epoch": 2.1151158896421105, "grad_norm": 0.09127730876207352, "learning_rate": 4.228520947480343e-06, "loss": 0.4920573830604553, "step": 11447 }, { "epoch": 2.1153006663510063, "grad_norm": 0.07601220905780792, "learning_rate": 4.226891935409352e-06, "loss": 0.3525579273700714, "step": 11448 }, { "epoch": 2.115485443059902, "grad_norm": 0.10441645234823227, "learning_rate": 4.225263153089996e-06, "loss": 0.5868218541145325, "step": 11449 }, { "epoch": 2.115670219768798, "grad_norm": 0.09655965119600296, "learning_rate": 4.223634600587099e-06, "loss": 0.49087628722190857, "step": 11450 }, { "epoch": 2.115854996477694, "grad_norm": 0.09375781565904617, "learning_rate": 4.222006277965474e-06, "loss": 0.4645474851131439, "step": 11451 }, { "epoch": 2.1160397731865896, "grad_norm": 0.07621949911117554, "learning_rate": 4.220378185289921e-06, "loss": 0.34230300784111023, "step": 11452 }, { "epoch": 2.1162245498954855, "grad_norm": 0.07253853231668472, "learning_rate": 4.218750322625235e-06, "loss": 0.3808154761791229, "step": 11453 }, { "epoch": 2.1164093266043817, "grad_norm": 0.08523720502853394, "learning_rate": 4.217122690036202e-06, "loss": 0.44787776470184326, "step": 11454 }, { "epoch": 2.1165941033132776, "grad_norm": 0.09397739917039871, "learning_rate": 4.21549528758759e-06, "loss": 0.46045711636543274, "step": 11455 }, { "epoch": 2.1167788800221734, "grad_norm": 0.10516354441642761, "learning_rate": 4.213868115344168e-06, "loss": 0.6142834424972534, "step": 11456 }, { "epoch": 2.1169636567310692, "grad_norm": 0.07183492183685303, "learning_rate": 4.212241173370693e-06, "loss": 0.36418893933296204, "step": 11457 }, { "epoch": 2.117148433439965, "grad_norm": 0.10707825422286987, "learning_rate": 4.2106144617319125e-06, "loss": 0.5130747556686401, "step": 11458 }, { "epoch": 2.117333210148861, "grad_norm": 0.09258676320314407, "learning_rate": 4.208987980492562e-06, "loss": 0.5682313442230225, "step": 11459 }, { "epoch": 2.1175179868577567, "grad_norm": 0.10493189841508865, "learning_rate": 4.207361729717378e-06, "loss": 0.6336565613746643, "step": 11460 }, { "epoch": 2.1177027635666525, "grad_norm": 0.09559211134910583, "learning_rate": 4.205735709471069e-06, "loss": 0.4078772962093353, "step": 11461 }, { "epoch": 2.1178875402755484, "grad_norm": 0.08572104573249817, "learning_rate": 4.204109919818351e-06, "loss": 0.48710963129997253, "step": 11462 }, { "epoch": 2.118072316984444, "grad_norm": 0.06969450414180756, "learning_rate": 4.202484360823926e-06, "loss": 0.3506666421890259, "step": 11463 }, { "epoch": 2.11825709369334, "grad_norm": 0.08649852126836777, "learning_rate": 4.200859032552484e-06, "loss": 0.38269883394241333, "step": 11464 }, { "epoch": 2.118441870402236, "grad_norm": 0.08273562788963318, "learning_rate": 4.199233935068714e-06, "loss": 0.4608445167541504, "step": 11465 }, { "epoch": 2.1186266471111317, "grad_norm": 0.07988139986991882, "learning_rate": 4.197609068437281e-06, "loss": 0.445500910282135, "step": 11466 }, { "epoch": 2.1188114238200275, "grad_norm": 0.08333956450223923, "learning_rate": 4.195984432722855e-06, "loss": 0.43747177720069885, "step": 11467 }, { "epoch": 2.1189962005289233, "grad_norm": 0.07530811429023743, "learning_rate": 4.194360027990092e-06, "loss": 0.38888633251190186, "step": 11468 }, { "epoch": 2.119180977237819, "grad_norm": 0.08294999599456787, "learning_rate": 4.192735854303634e-06, "loss": 0.36828699707984924, "step": 11469 }, { "epoch": 2.119365753946715, "grad_norm": 0.08670689165592194, "learning_rate": 4.19111191172812e-06, "loss": 0.45033150911331177, "step": 11470 }, { "epoch": 2.119550530655611, "grad_norm": 0.10300054401159286, "learning_rate": 4.189488200328178e-06, "loss": 0.5954097509384155, "step": 11471 }, { "epoch": 2.1197353073645067, "grad_norm": 0.08131957799196243, "learning_rate": 4.187864720168427e-06, "loss": 0.45312342047691345, "step": 11472 }, { "epoch": 2.1199200840734025, "grad_norm": 0.09454979747533798, "learning_rate": 4.186241471313476e-06, "loss": 0.4882694482803345, "step": 11473 }, { "epoch": 2.1201048607822983, "grad_norm": 0.09436237812042236, "learning_rate": 4.184618453827929e-06, "loss": 0.5106803178787231, "step": 11474 }, { "epoch": 2.120289637491194, "grad_norm": 0.08325103670358658, "learning_rate": 4.18299566777637e-06, "loss": 0.47879454493522644, "step": 11475 }, { "epoch": 2.12047441420009, "grad_norm": 0.10389212518930435, "learning_rate": 4.181373113223385e-06, "loss": 0.5199220180511475, "step": 11476 }, { "epoch": 2.120659190908986, "grad_norm": 0.08224156498908997, "learning_rate": 4.179750790233545e-06, "loss": 0.5292742848396301, "step": 11477 }, { "epoch": 2.1208439676178816, "grad_norm": 0.08779798448085785, "learning_rate": 4.178128698871415e-06, "loss": 0.49805256724357605, "step": 11478 }, { "epoch": 2.1210287443267775, "grad_norm": 0.10667131841182709, "learning_rate": 4.176506839201553e-06, "loss": 0.6199412941932678, "step": 11479 }, { "epoch": 2.1212135210356733, "grad_norm": 0.08206725120544434, "learning_rate": 4.1748852112884955e-06, "loss": 0.4308219850063324, "step": 11480 }, { "epoch": 2.121398297744569, "grad_norm": 0.08746568858623505, "learning_rate": 4.173263815196781e-06, "loss": 0.4784696698188782, "step": 11481 }, { "epoch": 2.121583074453465, "grad_norm": 0.09873547405004501, "learning_rate": 4.171642650990942e-06, "loss": 0.48659324645996094, "step": 11482 }, { "epoch": 2.1217678511623608, "grad_norm": 0.0774245634675026, "learning_rate": 4.170021718735482e-06, "loss": 0.41697925329208374, "step": 11483 }, { "epoch": 2.1219526278712566, "grad_norm": 0.07203105092048645, "learning_rate": 4.168401018494923e-06, "loss": 0.3487274944782257, "step": 11484 }, { "epoch": 2.122137404580153, "grad_norm": 0.07573983818292618, "learning_rate": 4.166780550333761e-06, "loss": 0.3803150951862335, "step": 11485 }, { "epoch": 2.1223221812890487, "grad_norm": 0.08086445927619934, "learning_rate": 4.165160314316481e-06, "loss": 0.42372170090675354, "step": 11486 }, { "epoch": 2.1225069579979445, "grad_norm": 0.07548331469297409, "learning_rate": 4.163540310507566e-06, "loss": 0.4192907512187958, "step": 11487 }, { "epoch": 2.1226917347068404, "grad_norm": 0.07853441685438156, "learning_rate": 4.161920538971489e-06, "loss": 0.3423905074596405, "step": 11488 }, { "epoch": 2.122876511415736, "grad_norm": 0.0724734291434288, "learning_rate": 4.160300999772706e-06, "loss": 0.3461209535598755, "step": 11489 }, { "epoch": 2.123061288124632, "grad_norm": 0.09682875126600266, "learning_rate": 4.158681692975673e-06, "loss": 0.5491484999656677, "step": 11490 }, { "epoch": 2.123246064833528, "grad_norm": 0.08606716990470886, "learning_rate": 4.1570626186448344e-06, "loss": 0.4194796681404114, "step": 11491 }, { "epoch": 2.1234308415424237, "grad_norm": 0.08166229724884033, "learning_rate": 4.155443776844624e-06, "loss": 0.4911487400531769, "step": 11492 }, { "epoch": 2.1236156182513195, "grad_norm": 0.08884919434785843, "learning_rate": 4.1538251676394636e-06, "loss": 0.4438813030719757, "step": 11493 }, { "epoch": 2.1238003949602153, "grad_norm": 0.07676146924495697, "learning_rate": 4.152206791093777e-06, "loss": 0.37591931223869324, "step": 11494 }, { "epoch": 2.123985171669111, "grad_norm": 0.0794263407588005, "learning_rate": 4.15058864727196e-06, "loss": 0.3733331561088562, "step": 11495 }, { "epoch": 2.124169948378007, "grad_norm": 0.07943850010633469, "learning_rate": 4.1489707362384145e-06, "loss": 0.3863421380519867, "step": 11496 }, { "epoch": 2.124354725086903, "grad_norm": 0.0789160281419754, "learning_rate": 4.147353058057528e-06, "loss": 0.39705219864845276, "step": 11497 }, { "epoch": 2.1245395017957986, "grad_norm": 0.07900829613208771, "learning_rate": 4.14573561279368e-06, "loss": 0.3503285348415375, "step": 11498 }, { "epoch": 2.1247242785046945, "grad_norm": 0.08270596712827682, "learning_rate": 4.1441184005112425e-06, "loss": 0.447653204202652, "step": 11499 }, { "epoch": 2.1249090552135903, "grad_norm": 0.06858129799365997, "learning_rate": 4.142501421274567e-06, "loss": 0.3982352316379547, "step": 11500 }, { "epoch": 2.1249090552135903, "eval_loss": 0.5545409321784973, "eval_runtime": 158.3669, "eval_samples_per_second": 115.106, "eval_steps_per_second": 14.391, "step": 11500 }, { "epoch": 2.125093831922486, "grad_norm": 0.10040252655744553, "learning_rate": 4.140884675148011e-06, "loss": 0.6206842660903931, "step": 11501 }, { "epoch": 2.125278608631382, "grad_norm": 0.1062774583697319, "learning_rate": 4.139268162195916e-06, "loss": 0.5820229649543762, "step": 11502 }, { "epoch": 2.125463385340278, "grad_norm": 0.06915470212697983, "learning_rate": 4.137651882482607e-06, "loss": 0.39213237166404724, "step": 11503 }, { "epoch": 2.1256481620491736, "grad_norm": 0.07784624397754669, "learning_rate": 4.136035836072414e-06, "loss": 0.37819647789001465, "step": 11504 }, { "epoch": 2.1258329387580694, "grad_norm": 0.10554467141628265, "learning_rate": 4.134420023029654e-06, "loss": 0.6587731838226318, "step": 11505 }, { "epoch": 2.1260177154669653, "grad_norm": 0.09531303495168686, "learning_rate": 4.132804443418621e-06, "loss": 0.4118732511997223, "step": 11506 }, { "epoch": 2.126202492175861, "grad_norm": 0.07894685864448547, "learning_rate": 4.131189097303615e-06, "loss": 0.39554134011268616, "step": 11507 }, { "epoch": 2.126387268884757, "grad_norm": 0.08437313884496689, "learning_rate": 4.129573984748927e-06, "loss": 0.38657623529434204, "step": 11508 }, { "epoch": 2.1265720455936528, "grad_norm": 0.09306003898382187, "learning_rate": 4.127959105818823e-06, "loss": 0.4309487044811249, "step": 11509 }, { "epoch": 2.1267568223025486, "grad_norm": 0.08010219037532806, "learning_rate": 4.126344460577573e-06, "loss": 0.4455183446407318, "step": 11510 }, { "epoch": 2.1269415990114444, "grad_norm": 0.07695150375366211, "learning_rate": 4.124730049089443e-06, "loss": 0.40218260884284973, "step": 11511 }, { "epoch": 2.1271263757203402, "grad_norm": 0.09618864953517914, "learning_rate": 4.123115871418672e-06, "loss": 0.4410863518714905, "step": 11512 }, { "epoch": 2.127311152429236, "grad_norm": 0.08329358696937561, "learning_rate": 4.1215019276295074e-06, "loss": 0.3536233603954315, "step": 11513 }, { "epoch": 2.1274959291381323, "grad_norm": 0.07927120476961136, "learning_rate": 4.119888217786171e-06, "loss": 0.33560553193092346, "step": 11514 }, { "epoch": 2.127680705847028, "grad_norm": 0.08909047394990921, "learning_rate": 4.1182747419528864e-06, "loss": 0.41112515330314636, "step": 11515 }, { "epoch": 2.127865482555924, "grad_norm": 0.08944455534219742, "learning_rate": 4.116661500193867e-06, "loss": 0.44694074988365173, "step": 11516 }, { "epoch": 2.12805025926482, "grad_norm": 0.09210187941789627, "learning_rate": 4.115048492573311e-06, "loss": 0.520761251449585, "step": 11517 }, { "epoch": 2.1282350359737157, "grad_norm": 0.09095914661884308, "learning_rate": 4.113435719155415e-06, "loss": 0.5401807427406311, "step": 11518 }, { "epoch": 2.1284198126826115, "grad_norm": 0.06786547601222992, "learning_rate": 4.111823180004364e-06, "loss": 0.3559070825576782, "step": 11519 }, { "epoch": 2.1286045893915073, "grad_norm": 0.08700638264417648, "learning_rate": 4.110210875184325e-06, "loss": 0.4307456910610199, "step": 11520 }, { "epoch": 2.128789366100403, "grad_norm": 0.09621629863977432, "learning_rate": 4.108598804759466e-06, "loss": 0.5875341296195984, "step": 11521 }, { "epoch": 2.128974142809299, "grad_norm": 0.09203772991895676, "learning_rate": 4.106986968793947e-06, "loss": 0.45047008991241455, "step": 11522 }, { "epoch": 2.129158919518195, "grad_norm": 0.08629295974969864, "learning_rate": 4.105375367351902e-06, "loss": 0.376220166683197, "step": 11523 }, { "epoch": 2.1293436962270906, "grad_norm": 0.07493583858013153, "learning_rate": 4.10376400049748e-06, "loss": 0.3935466706752777, "step": 11524 }, { "epoch": 2.1295284729359865, "grad_norm": 0.09711001068353653, "learning_rate": 4.1021528682948064e-06, "loss": 0.7404162287712097, "step": 11525 }, { "epoch": 2.1297132496448823, "grad_norm": 0.07738396525382996, "learning_rate": 4.100541970807993e-06, "loss": 0.3708794414997101, "step": 11526 }, { "epoch": 2.129898026353778, "grad_norm": 0.06657982617616653, "learning_rate": 4.098931308101153e-06, "loss": 0.33644160628318787, "step": 11527 }, { "epoch": 2.130082803062674, "grad_norm": 0.07026130706071854, "learning_rate": 4.097320880238388e-06, "loss": 0.46239909529685974, "step": 11528 }, { "epoch": 2.1302675797715698, "grad_norm": 0.10004347562789917, "learning_rate": 4.095710687283781e-06, "loss": 0.5297632217407227, "step": 11529 }, { "epoch": 2.1304523564804656, "grad_norm": 0.09007824957370758, "learning_rate": 4.0941007293014166e-06, "loss": 0.43050703406333923, "step": 11530 }, { "epoch": 2.1306371331893614, "grad_norm": 0.09030665457248688, "learning_rate": 4.092491006355367e-06, "loss": 0.4323001801967621, "step": 11531 }, { "epoch": 2.1308219098982573, "grad_norm": 0.06192596256732941, "learning_rate": 4.090881518509692e-06, "loss": 0.32067015767097473, "step": 11532 }, { "epoch": 2.131006686607153, "grad_norm": 0.08256769180297852, "learning_rate": 4.089272265828449e-06, "loss": 0.46855875849723816, "step": 11533 }, { "epoch": 2.131191463316049, "grad_norm": 0.10042224824428558, "learning_rate": 4.087663248375674e-06, "loss": 0.5218668580055237, "step": 11534 }, { "epoch": 2.1313762400249447, "grad_norm": 0.08853939175605774, "learning_rate": 4.086054466215404e-06, "loss": 0.4142692983150482, "step": 11535 }, { "epoch": 2.1315610167338406, "grad_norm": 0.080595463514328, "learning_rate": 4.084445919411664e-06, "loss": 0.41706037521362305, "step": 11536 }, { "epoch": 2.1317457934427364, "grad_norm": 0.08282291889190674, "learning_rate": 4.082837608028471e-06, "loss": 0.42229169607162476, "step": 11537 }, { "epoch": 2.1319305701516322, "grad_norm": 0.08266448974609375, "learning_rate": 4.081229532129826e-06, "loss": 0.48666149377822876, "step": 11538 }, { "epoch": 2.132115346860528, "grad_norm": 0.08011366426944733, "learning_rate": 4.0796216917797335e-06, "loss": 0.36033910512924194, "step": 11539 }, { "epoch": 2.132300123569424, "grad_norm": 0.11487693339586258, "learning_rate": 4.07801408704217e-06, "loss": 0.6611344218254089, "step": 11540 }, { "epoch": 2.1324849002783197, "grad_norm": 0.09254970401525497, "learning_rate": 4.076406717981121e-06, "loss": 0.4138562083244324, "step": 11541 }, { "epoch": 2.1326696769872155, "grad_norm": 0.06875742226839066, "learning_rate": 4.074799584660552e-06, "loss": 0.2918391823768616, "step": 11542 }, { "epoch": 2.132854453696112, "grad_norm": 0.08584243059158325, "learning_rate": 4.073192687144418e-06, "loss": 0.5013090372085571, "step": 11543 }, { "epoch": 2.1330392304050076, "grad_norm": 0.09431134909391403, "learning_rate": 4.071586025496679e-06, "loss": 0.6037891507148743, "step": 11544 }, { "epoch": 2.1332240071139035, "grad_norm": 0.10009553283452988, "learning_rate": 4.0699795997812644e-06, "loss": 0.46707063913345337, "step": 11545 }, { "epoch": 2.1334087838227993, "grad_norm": 0.07532192021608353, "learning_rate": 4.0683734100621085e-06, "loss": 0.3813382387161255, "step": 11546 }, { "epoch": 2.133593560531695, "grad_norm": 0.0831272080540657, "learning_rate": 4.066767456403137e-06, "loss": 0.49544063210487366, "step": 11547 }, { "epoch": 2.133778337240591, "grad_norm": 0.07652248442173004, "learning_rate": 4.065161738868255e-06, "loss": 0.4752427041530609, "step": 11548 }, { "epoch": 2.133963113949487, "grad_norm": 0.08891411125659943, "learning_rate": 4.063556257521369e-06, "loss": 0.4780322015285492, "step": 11549 }, { "epoch": 2.1341478906583826, "grad_norm": 0.08017689734697342, "learning_rate": 4.0619510124263684e-06, "loss": 0.3103610873222351, "step": 11550 }, { "epoch": 2.1343326673672784, "grad_norm": 0.06672009825706482, "learning_rate": 4.060346003647142e-06, "loss": 0.29543837904930115, "step": 11551 }, { "epoch": 2.1345174440761743, "grad_norm": 0.0958172008395195, "learning_rate": 4.0587412312475614e-06, "loss": 0.5142347812652588, "step": 11552 }, { "epoch": 2.13470222078507, "grad_norm": 0.07005161046981812, "learning_rate": 4.057136695291495e-06, "loss": 0.43864113092422485, "step": 11553 }, { "epoch": 2.134886997493966, "grad_norm": 0.09195329993963242, "learning_rate": 4.055532395842791e-06, "loss": 0.4625697731971741, "step": 11554 }, { "epoch": 2.1350717742028618, "grad_norm": 0.0720549076795578, "learning_rate": 4.053928332965303e-06, "loss": 0.35446739196777344, "step": 11555 }, { "epoch": 2.1352565509117576, "grad_norm": 0.0694388598203659, "learning_rate": 4.052324506722861e-06, "loss": 0.3407813012599945, "step": 11556 }, { "epoch": 2.1354413276206534, "grad_norm": 0.04996746778488159, "learning_rate": 4.050720917179297e-06, "loss": 0.23797312378883362, "step": 11557 }, { "epoch": 2.1356261043295492, "grad_norm": 0.09368681162595749, "learning_rate": 4.049117564398428e-06, "loss": 0.4741019010543823, "step": 11558 }, { "epoch": 2.135810881038445, "grad_norm": 0.08431044965982437, "learning_rate": 4.047514448444065e-06, "loss": 0.44846999645233154, "step": 11559 }, { "epoch": 2.135995657747341, "grad_norm": 0.10597667098045349, "learning_rate": 4.0459115693800015e-06, "loss": 0.5060697197914124, "step": 11560 }, { "epoch": 2.1361804344562367, "grad_norm": 0.08452227711677551, "learning_rate": 4.044308927270032e-06, "loss": 0.47105276584625244, "step": 11561 }, { "epoch": 2.1363652111651326, "grad_norm": 0.08384208381175995, "learning_rate": 4.042706522177932e-06, "loss": 0.5375339984893799, "step": 11562 }, { "epoch": 2.1365499878740284, "grad_norm": 0.10141726583242416, "learning_rate": 4.04110435416747e-06, "loss": 0.690080463886261, "step": 11563 }, { "epoch": 2.136734764582924, "grad_norm": 0.07506753504276276, "learning_rate": 4.039502423302418e-06, "loss": 0.3889322876930237, "step": 11564 }, { "epoch": 2.13691954129182, "grad_norm": 0.08942927420139313, "learning_rate": 4.0379007296465186e-06, "loss": 0.48192328214645386, "step": 11565 }, { "epoch": 2.137104318000716, "grad_norm": 0.08105836808681488, "learning_rate": 4.036299273263518e-06, "loss": 0.40929368138313293, "step": 11566 }, { "epoch": 2.1372890947096117, "grad_norm": 0.10397261381149292, "learning_rate": 4.034698054217151e-06, "loss": 0.5696961283683777, "step": 11567 }, { "epoch": 2.1374738714185075, "grad_norm": 0.10482069104909897, "learning_rate": 4.033097072571135e-06, "loss": 0.5859604477882385, "step": 11568 }, { "epoch": 2.1376586481274034, "grad_norm": 0.08016854524612427, "learning_rate": 4.031496328389188e-06, "loss": 0.42372801899909973, "step": 11569 }, { "epoch": 2.137843424836299, "grad_norm": 0.0814351961016655, "learning_rate": 4.029895821735013e-06, "loss": 0.408677339553833, "step": 11570 }, { "epoch": 2.138028201545195, "grad_norm": 0.08894795179367065, "learning_rate": 4.028295552672307e-06, "loss": 0.4432206153869629, "step": 11571 }, { "epoch": 2.1382129782540913, "grad_norm": 0.09582308679819107, "learning_rate": 4.0266955212647555e-06, "loss": 0.5286830067634583, "step": 11572 }, { "epoch": 2.138397754962987, "grad_norm": 0.07396747916936874, "learning_rate": 4.025095727576036e-06, "loss": 0.3959920406341553, "step": 11573 }, { "epoch": 2.138582531671883, "grad_norm": 0.08140245079994202, "learning_rate": 4.023496171669811e-06, "loss": 0.3716605603694916, "step": 11574 }, { "epoch": 2.1387673083807788, "grad_norm": 0.09383906424045563, "learning_rate": 4.02189685360974e-06, "loss": 0.5051342844963074, "step": 11575 }, { "epoch": 2.1389520850896746, "grad_norm": 0.11029773950576782, "learning_rate": 4.020297773459472e-06, "loss": 0.6979007720947266, "step": 11576 }, { "epoch": 2.1391368617985704, "grad_norm": 0.10944915562868118, "learning_rate": 4.0186989312826445e-06, "loss": 0.5362704396247864, "step": 11577 }, { "epoch": 2.1393216385074663, "grad_norm": 0.07990576326847076, "learning_rate": 4.017100327142889e-06, "loss": 0.4585151970386505, "step": 11578 }, { "epoch": 2.139506415216362, "grad_norm": 0.07353705167770386, "learning_rate": 4.015501961103818e-06, "loss": 0.3769177496433258, "step": 11579 }, { "epoch": 2.139691191925258, "grad_norm": 0.09751986712217331, "learning_rate": 4.013903833229048e-06, "loss": 0.48764896392822266, "step": 11580 }, { "epoch": 2.1398759686341537, "grad_norm": 0.10674029588699341, "learning_rate": 4.01230594358218e-06, "loss": 0.48834967613220215, "step": 11581 }, { "epoch": 2.1400607453430496, "grad_norm": 0.09530480206012726, "learning_rate": 4.010708292226798e-06, "loss": 0.5856447815895081, "step": 11582 }, { "epoch": 2.1402455220519454, "grad_norm": 0.07518292963504791, "learning_rate": 4.009110879226485e-06, "loss": 0.343195378780365, "step": 11583 }, { "epoch": 2.1404302987608412, "grad_norm": 0.10637890547513962, "learning_rate": 4.007513704644823e-06, "loss": 0.48493310809135437, "step": 11584 }, { "epoch": 2.140615075469737, "grad_norm": 0.10789762437343597, "learning_rate": 4.005916768545365e-06, "loss": 0.5495380759239197, "step": 11585 }, { "epoch": 2.140799852178633, "grad_norm": 0.08924262225627899, "learning_rate": 4.004320070991666e-06, "loss": 0.5379125475883484, "step": 11586 }, { "epoch": 2.1409846288875287, "grad_norm": 0.06434209644794464, "learning_rate": 4.002723612047272e-06, "loss": 0.3162035048007965, "step": 11587 }, { "epoch": 2.1411694055964245, "grad_norm": 0.08127405494451523, "learning_rate": 4.001127391775713e-06, "loss": 0.393638551235199, "step": 11588 }, { "epoch": 2.1413541823053204, "grad_norm": 0.09264098852872849, "learning_rate": 3.999531410240516e-06, "loss": 0.3852292001247406, "step": 11589 }, { "epoch": 2.141538959014216, "grad_norm": 0.08991791307926178, "learning_rate": 3.997935667505195e-06, "loss": 0.4149450957775116, "step": 11590 }, { "epoch": 2.141723735723112, "grad_norm": 0.08795911073684692, "learning_rate": 3.996340163633257e-06, "loss": 0.4311430752277374, "step": 11591 }, { "epoch": 2.141908512432008, "grad_norm": 0.08005280047655106, "learning_rate": 3.994744898688197e-06, "loss": 0.40000590682029724, "step": 11592 }, { "epoch": 2.1420932891409037, "grad_norm": 0.0808529183268547, "learning_rate": 3.993149872733506e-06, "loss": 0.47213149070739746, "step": 11593 }, { "epoch": 2.1422780658497995, "grad_norm": 0.09862861782312393, "learning_rate": 3.991555085832653e-06, "loss": 0.6171346306800842, "step": 11594 }, { "epoch": 2.1424628425586953, "grad_norm": 0.11860477179288864, "learning_rate": 3.98996053804911e-06, "loss": 0.6687604188919067, "step": 11595 }, { "epoch": 2.142647619267591, "grad_norm": 0.09474646300077438, "learning_rate": 3.988366229446335e-06, "loss": 0.45556727051734924, "step": 11596 }, { "epoch": 2.142832395976487, "grad_norm": 0.09376123547554016, "learning_rate": 3.986772160087775e-06, "loss": 0.5133540034294128, "step": 11597 }, { "epoch": 2.143017172685383, "grad_norm": 0.09000354260206223, "learning_rate": 3.985178330036874e-06, "loss": 0.48997581005096436, "step": 11598 }, { "epoch": 2.1432019493942787, "grad_norm": 0.07603364437818527, "learning_rate": 3.983584739357055e-06, "loss": 0.324707955121994, "step": 11599 }, { "epoch": 2.1433867261031745, "grad_norm": 0.08514192700386047, "learning_rate": 3.981991388111739e-06, "loss": 0.4303866922855377, "step": 11600 }, { "epoch": 2.1435715028120708, "grad_norm": 0.11421073228120804, "learning_rate": 3.980398276364342e-06, "loss": 0.5772789716720581, "step": 11601 }, { "epoch": 2.1437562795209666, "grad_norm": 0.08341315388679504, "learning_rate": 3.9788054041782535e-06, "loss": 0.4874880611896515, "step": 11602 }, { "epoch": 2.1439410562298624, "grad_norm": 0.07558920979499817, "learning_rate": 3.977212771616875e-06, "loss": 0.3493494689464569, "step": 11603 }, { "epoch": 2.1441258329387582, "grad_norm": 0.06613948196172714, "learning_rate": 3.97562037874359e-06, "loss": 0.3161487281322479, "step": 11604 }, { "epoch": 2.144310609647654, "grad_norm": 0.09245660156011581, "learning_rate": 3.974028225621762e-06, "loss": 0.45485496520996094, "step": 11605 }, { "epoch": 2.14449538635655, "grad_norm": 0.06644047796726227, "learning_rate": 3.972436312314758e-06, "loss": 0.3771281838417053, "step": 11606 }, { "epoch": 2.1446801630654457, "grad_norm": 0.06990991532802582, "learning_rate": 3.9708446388859335e-06, "loss": 0.3933688998222351, "step": 11607 }, { "epoch": 2.1448649397743416, "grad_norm": 0.06094184145331383, "learning_rate": 3.969253205398626e-06, "loss": 0.26781320571899414, "step": 11608 }, { "epoch": 2.1450497164832374, "grad_norm": 0.10649731755256653, "learning_rate": 3.967662011916174e-06, "loss": 0.5235942006111145, "step": 11609 }, { "epoch": 2.145234493192133, "grad_norm": 0.09553772211074829, "learning_rate": 3.966071058501902e-06, "loss": 0.4927021265029907, "step": 11610 }, { "epoch": 2.145419269901029, "grad_norm": 0.07307995110750198, "learning_rate": 3.964480345219122e-06, "loss": 0.39524558186531067, "step": 11611 }, { "epoch": 2.145604046609925, "grad_norm": 0.07455287873744965, "learning_rate": 3.962889872131147e-06, "loss": 0.35658198595046997, "step": 11612 }, { "epoch": 2.1457888233188207, "grad_norm": 0.07614074647426605, "learning_rate": 3.961299639301262e-06, "loss": 0.33883386850357056, "step": 11613 }, { "epoch": 2.1459736000277165, "grad_norm": 0.08309691399335861, "learning_rate": 3.959709646792761e-06, "loss": 0.39179253578186035, "step": 11614 }, { "epoch": 2.1461583767366124, "grad_norm": 0.12092384696006775, "learning_rate": 3.958119894668917e-06, "loss": 0.6273046135902405, "step": 11615 }, { "epoch": 2.146343153445508, "grad_norm": 0.10526594519615173, "learning_rate": 3.956530382992999e-06, "loss": 0.5224920511245728, "step": 11616 }, { "epoch": 2.146527930154404, "grad_norm": 0.07732649892568588, "learning_rate": 3.954941111828263e-06, "loss": 0.38463592529296875, "step": 11617 }, { "epoch": 2.1467127068633, "grad_norm": 0.07383439689874649, "learning_rate": 3.953352081237963e-06, "loss": 0.4427803158760071, "step": 11618 }, { "epoch": 2.1468974835721957, "grad_norm": 0.06280277669429779, "learning_rate": 3.951763291285329e-06, "loss": 0.26544296741485596, "step": 11619 }, { "epoch": 2.1470822602810915, "grad_norm": 0.05331779643893242, "learning_rate": 3.950174742033593e-06, "loss": 0.2512390911579132, "step": 11620 }, { "epoch": 2.1472670369899873, "grad_norm": 0.06967869400978088, "learning_rate": 3.948586433545979e-06, "loss": 0.39704135060310364, "step": 11621 }, { "epoch": 2.147451813698883, "grad_norm": 0.07695640623569489, "learning_rate": 3.946998365885685e-06, "loss": 0.46992355585098267, "step": 11622 }, { "epoch": 2.147636590407779, "grad_norm": 0.0801285058259964, "learning_rate": 3.945410539115921e-06, "loss": 0.4090726971626282, "step": 11623 }, { "epoch": 2.147821367116675, "grad_norm": 0.07338245213031769, "learning_rate": 3.943822953299881e-06, "loss": 0.39517590403556824, "step": 11624 }, { "epoch": 2.1480061438255706, "grad_norm": 0.09227310866117477, "learning_rate": 3.942235608500735e-06, "loss": 0.5156955122947693, "step": 11625 }, { "epoch": 2.1481909205344665, "grad_norm": 0.08540918678045273, "learning_rate": 3.940648504781664e-06, "loss": 0.3832940459251404, "step": 11626 }, { "epoch": 2.1483756972433623, "grad_norm": 0.09108877927064896, "learning_rate": 3.93906164220582e-06, "loss": 0.44129088521003723, "step": 11627 }, { "epoch": 2.148560473952258, "grad_norm": 0.0894840881228447, "learning_rate": 3.9374750208363625e-06, "loss": 0.49256956577301025, "step": 11628 }, { "epoch": 2.148745250661154, "grad_norm": 0.07960314303636551, "learning_rate": 3.93588864073643e-06, "loss": 0.4049118161201477, "step": 11629 }, { "epoch": 2.1489300273700502, "grad_norm": 0.06465893238782883, "learning_rate": 3.934302501969159e-06, "loss": 0.28798285126686096, "step": 11630 }, { "epoch": 2.1491148040789456, "grad_norm": 0.09121961891651154, "learning_rate": 3.932716604597671e-06, "loss": 0.47333887219429016, "step": 11631 }, { "epoch": 2.149299580787842, "grad_norm": 0.0883626863360405, "learning_rate": 3.931130948685083e-06, "loss": 0.5514802932739258, "step": 11632 }, { "epoch": 2.1494843574967377, "grad_norm": 0.08931796997785568, "learning_rate": 3.9295455342944935e-06, "loss": 0.40167751908302307, "step": 11633 }, { "epoch": 2.1496691342056335, "grad_norm": 0.06782089173793793, "learning_rate": 3.927960361489e-06, "loss": 0.39248910546302795, "step": 11634 }, { "epoch": 2.1498539109145294, "grad_norm": 0.06785066425800323, "learning_rate": 3.926375430331685e-06, "loss": 0.3341401517391205, "step": 11635 }, { "epoch": 2.150038687623425, "grad_norm": 0.0803646445274353, "learning_rate": 3.924790740885628e-06, "loss": 0.43350958824157715, "step": 11636 }, { "epoch": 2.150223464332321, "grad_norm": 0.08781901746988297, "learning_rate": 3.923206293213892e-06, "loss": 0.38169121742248535, "step": 11637 }, { "epoch": 2.150408241041217, "grad_norm": 0.07651541382074356, "learning_rate": 3.921622087379536e-06, "loss": 0.4201239347457886, "step": 11638 }, { "epoch": 2.1505930177501127, "grad_norm": 0.0754622220993042, "learning_rate": 3.920038123445602e-06, "loss": 0.3791052997112274, "step": 11639 }, { "epoch": 2.1507777944590085, "grad_norm": 0.09134471416473389, "learning_rate": 3.9184544014751295e-06, "loss": 0.5088651180267334, "step": 11640 }, { "epoch": 2.1509625711679043, "grad_norm": 0.08660312741994858, "learning_rate": 3.916870921531148e-06, "loss": 0.5049603581428528, "step": 11641 }, { "epoch": 2.1511473478768, "grad_norm": 0.07500050216913223, "learning_rate": 3.915287683676664e-06, "loss": 0.32820960879325867, "step": 11642 }, { "epoch": 2.151332124585696, "grad_norm": 0.09916878491640091, "learning_rate": 3.913704687974701e-06, "loss": 0.5198667049407959, "step": 11643 }, { "epoch": 2.151516901294592, "grad_norm": 0.08865980803966522, "learning_rate": 3.912121934488246e-06, "loss": 0.5391780734062195, "step": 11644 }, { "epoch": 2.1517016780034877, "grad_norm": 0.07601450383663177, "learning_rate": 3.910539423280293e-06, "loss": 0.470345675945282, "step": 11645 }, { "epoch": 2.1518864547123835, "grad_norm": 0.08250866830348969, "learning_rate": 3.908957154413823e-06, "loss": 0.38118302822113037, "step": 11646 }, { "epoch": 2.1520712314212793, "grad_norm": 0.09184881299734116, "learning_rate": 3.907375127951797e-06, "loss": 0.5007253289222717, "step": 11647 }, { "epoch": 2.152256008130175, "grad_norm": 0.07770222425460815, "learning_rate": 3.90579334395718e-06, "loss": 0.39987003803253174, "step": 11648 }, { "epoch": 2.152440784839071, "grad_norm": 0.09182540327310562, "learning_rate": 3.904211802492922e-06, "loss": 0.48013049364089966, "step": 11649 }, { "epoch": 2.152625561547967, "grad_norm": 0.08273793756961823, "learning_rate": 3.902630503621963e-06, "loss": 0.5183853507041931, "step": 11650 }, { "epoch": 2.1528103382568626, "grad_norm": 0.09289605170488358, "learning_rate": 3.901049447407234e-06, "loss": 0.39718329906463623, "step": 11651 }, { "epoch": 2.1529951149657585, "grad_norm": 0.06910242140293121, "learning_rate": 3.899468633911658e-06, "loss": 0.3946877717971802, "step": 11652 }, { "epoch": 2.1531798916746543, "grad_norm": 0.08967945724725723, "learning_rate": 3.897888063198142e-06, "loss": 0.4960063695907593, "step": 11653 }, { "epoch": 2.15336466838355, "grad_norm": 0.07799212634563446, "learning_rate": 3.896307735329588e-06, "loss": 0.4674167037010193, "step": 11654 }, { "epoch": 2.153549445092446, "grad_norm": 0.08065201342105865, "learning_rate": 3.8947276503688925e-06, "loss": 0.3883388936519623, "step": 11655 }, { "epoch": 2.1537342218013418, "grad_norm": 0.055760860443115234, "learning_rate": 3.893147808378935e-06, "loss": 0.21677128970623016, "step": 11656 }, { "epoch": 2.1539189985102376, "grad_norm": 0.09226616472005844, "learning_rate": 3.8915682094225885e-06, "loss": 0.4941445589065552, "step": 11657 }, { "epoch": 2.1541037752191334, "grad_norm": 0.07622168213129044, "learning_rate": 3.88998885356272e-06, "loss": 0.4406668543815613, "step": 11658 }, { "epoch": 2.1542885519280297, "grad_norm": 0.08101237565279007, "learning_rate": 3.8884097408621754e-06, "loss": 0.47072938084602356, "step": 11659 }, { "epoch": 2.154473328636925, "grad_norm": 0.08489350974559784, "learning_rate": 3.886830871383806e-06, "loss": 0.3665739595890045, "step": 11660 }, { "epoch": 2.1546581053458214, "grad_norm": 0.12182774394750595, "learning_rate": 3.8852522451904395e-06, "loss": 0.5753546357154846, "step": 11661 }, { "epoch": 2.154842882054717, "grad_norm": 0.10022709518671036, "learning_rate": 3.8836738623449e-06, "loss": 0.5788068771362305, "step": 11662 }, { "epoch": 2.155027658763613, "grad_norm": 0.08397048711776733, "learning_rate": 3.882095722910011e-06, "loss": 0.43908387422561646, "step": 11663 }, { "epoch": 2.155212435472509, "grad_norm": 0.09467579424381256, "learning_rate": 3.880517826948569e-06, "loss": 0.4748491048812866, "step": 11664 }, { "epoch": 2.1553972121814047, "grad_norm": 0.08129502087831497, "learning_rate": 3.878940174523371e-06, "loss": 0.5089216828346252, "step": 11665 }, { "epoch": 2.1555819888903005, "grad_norm": 0.08315569162368774, "learning_rate": 3.877362765697209e-06, "loss": 0.3821069300174713, "step": 11666 }, { "epoch": 2.1557667655991963, "grad_norm": 0.08684141933917999, "learning_rate": 3.875785600532849e-06, "loss": 0.3733517527580261, "step": 11667 }, { "epoch": 2.155951542308092, "grad_norm": 0.09421560913324356, "learning_rate": 3.874208679093063e-06, "loss": 0.5246985554695129, "step": 11668 }, { "epoch": 2.156136319016988, "grad_norm": 0.06547679752111435, "learning_rate": 3.872632001440604e-06, "loss": 0.33231088519096375, "step": 11669 }, { "epoch": 2.156321095725884, "grad_norm": 0.08077463507652283, "learning_rate": 3.871055567638224e-06, "loss": 0.4162328839302063, "step": 11670 }, { "epoch": 2.1565058724347796, "grad_norm": 0.08313507586717606, "learning_rate": 3.869479377748655e-06, "loss": 0.41471588611602783, "step": 11671 }, { "epoch": 2.1566906491436755, "grad_norm": 0.06435898691415787, "learning_rate": 3.867903431834632e-06, "loss": 0.30664384365081787, "step": 11672 }, { "epoch": 2.1568754258525713, "grad_norm": 0.08906294405460358, "learning_rate": 3.866327729958863e-06, "loss": 0.4741312563419342, "step": 11673 }, { "epoch": 2.157060202561467, "grad_norm": 0.10594379901885986, "learning_rate": 3.864752272184065e-06, "loss": 0.7573657631874084, "step": 11674 }, { "epoch": 2.157244979270363, "grad_norm": 0.07337291538715363, "learning_rate": 3.863177058572925e-06, "loss": 0.3911682963371277, "step": 11675 }, { "epoch": 2.157429755979259, "grad_norm": 0.08629732578992844, "learning_rate": 3.86160208918814e-06, "loss": 0.44535213708877563, "step": 11676 }, { "epoch": 2.1576145326881546, "grad_norm": 0.06811978667974472, "learning_rate": 3.860027364092393e-06, "loss": 0.37970200181007385, "step": 11677 }, { "epoch": 2.1577993093970504, "grad_norm": 0.09466637670993805, "learning_rate": 3.858452883348342e-06, "loss": 0.42708879709243774, "step": 11678 }, { "epoch": 2.1579840861059463, "grad_norm": 0.07300035655498505, "learning_rate": 3.856878647018654e-06, "loss": 0.46120768785476685, "step": 11679 }, { "epoch": 2.158168862814842, "grad_norm": 0.09889834374189377, "learning_rate": 3.855304655165978e-06, "loss": 0.5782213807106018, "step": 11680 }, { "epoch": 2.158353639523738, "grad_norm": 0.08899401128292084, "learning_rate": 3.853730907852949e-06, "loss": 0.4296332001686096, "step": 11681 }, { "epoch": 2.1585384162326338, "grad_norm": 0.07711257040500641, "learning_rate": 3.852157405142199e-06, "loss": 0.49526482820510864, "step": 11682 }, { "epoch": 2.1587231929415296, "grad_norm": 0.08905673772096634, "learning_rate": 3.850584147096355e-06, "loss": 0.456241637468338, "step": 11683 }, { "epoch": 2.1589079696504254, "grad_norm": 0.10414526611566544, "learning_rate": 3.849011133778021e-06, "loss": 0.5894927382469177, "step": 11684 }, { "epoch": 2.1590927463593212, "grad_norm": 0.08418159186840057, "learning_rate": 3.847438365249799e-06, "loss": 0.40886250138282776, "step": 11685 }, { "epoch": 2.159277523068217, "grad_norm": 0.06604897230863571, "learning_rate": 3.845865841574286e-06, "loss": 0.3613528907299042, "step": 11686 }, { "epoch": 2.159462299777113, "grad_norm": 0.098621666431427, "learning_rate": 3.8442935628140545e-06, "loss": 0.5726948380470276, "step": 11687 }, { "epoch": 2.1596470764860087, "grad_norm": 0.07698316872119904, "learning_rate": 3.84272152903168e-06, "loss": 0.4723285138607025, "step": 11688 }, { "epoch": 2.1598318531949046, "grad_norm": 0.0858742967247963, "learning_rate": 3.841149740289725e-06, "loss": 0.4319852292537689, "step": 11689 }, { "epoch": 2.160016629903801, "grad_norm": 0.08265368640422821, "learning_rate": 3.839578196650742e-06, "loss": 0.4376170337200165, "step": 11690 }, { "epoch": 2.1602014066126967, "grad_norm": 0.08331847935914993, "learning_rate": 3.838006898177277e-06, "loss": 0.5049540400505066, "step": 11691 }, { "epoch": 2.1603861833215925, "grad_norm": 0.08013642579317093, "learning_rate": 3.836435844931855e-06, "loss": 0.40282782912254333, "step": 11692 }, { "epoch": 2.1605709600304883, "grad_norm": 0.08243634551763535, "learning_rate": 3.834865036977003e-06, "loss": 0.5801693797111511, "step": 11693 }, { "epoch": 2.160755736739384, "grad_norm": 0.08161672204732895, "learning_rate": 3.833294474375234e-06, "loss": 0.40056663751602173, "step": 11694 }, { "epoch": 2.16094051344828, "grad_norm": 0.08250513672828674, "learning_rate": 3.831724157189053e-06, "loss": 0.433946430683136, "step": 11695 }, { "epoch": 2.161125290157176, "grad_norm": 0.08036414533853531, "learning_rate": 3.830154085480952e-06, "loss": 0.4758602976799011, "step": 11696 }, { "epoch": 2.1613100668660716, "grad_norm": 0.07142708450555801, "learning_rate": 3.828584259313418e-06, "loss": 0.34024667739868164, "step": 11697 }, { "epoch": 2.1614948435749675, "grad_norm": 0.11011051386594772, "learning_rate": 3.82701467874892e-06, "loss": 0.5866760611534119, "step": 11698 }, { "epoch": 2.1616796202838633, "grad_norm": 0.08226508647203445, "learning_rate": 3.825445343849925e-06, "loss": 0.5511084794998169, "step": 11699 }, { "epoch": 2.161864396992759, "grad_norm": 0.0740421712398529, "learning_rate": 3.823876254678891e-06, "loss": 0.3521818220615387, "step": 11700 }, { "epoch": 2.162049173701655, "grad_norm": 0.058426644653081894, "learning_rate": 3.822307411298256e-06, "loss": 0.21237623691558838, "step": 11701 }, { "epoch": 2.1622339504105508, "grad_norm": 0.08686983585357666, "learning_rate": 3.820738813770455e-06, "loss": 0.4280414283275604, "step": 11702 }, { "epoch": 2.1624187271194466, "grad_norm": 0.06894486397504807, "learning_rate": 3.819170462157924e-06, "loss": 0.35992392897605896, "step": 11703 }, { "epoch": 2.1626035038283424, "grad_norm": 0.11185557395219803, "learning_rate": 3.8176023565230676e-06, "loss": 0.5863062739372253, "step": 11704 }, { "epoch": 2.1627882805372383, "grad_norm": 0.08624433726072311, "learning_rate": 3.816034496928295e-06, "loss": 0.44079217314720154, "step": 11705 }, { "epoch": 2.162973057246134, "grad_norm": 0.09721717238426208, "learning_rate": 3.8144668834360067e-06, "loss": 0.502701461315155, "step": 11706 }, { "epoch": 2.16315783395503, "grad_norm": 0.06353076547384262, "learning_rate": 3.812899516108579e-06, "loss": 0.32220011949539185, "step": 11707 }, { "epoch": 2.1633426106639257, "grad_norm": 0.0796390026807785, "learning_rate": 3.8113323950083947e-06, "loss": 0.4178166687488556, "step": 11708 }, { "epoch": 2.1635273873728216, "grad_norm": 0.0978512093424797, "learning_rate": 3.80976552019782e-06, "loss": 0.5056663751602173, "step": 11709 }, { "epoch": 2.1637121640817174, "grad_norm": 0.07964329421520233, "learning_rate": 3.80819889173921e-06, "loss": 0.3986744284629822, "step": 11710 }, { "epoch": 2.1638969407906132, "grad_norm": 0.08560479432344437, "learning_rate": 3.8066325096949153e-06, "loss": 0.3445097804069519, "step": 11711 }, { "epoch": 2.164081717499509, "grad_norm": 0.0772845521569252, "learning_rate": 3.8050663741272675e-06, "loss": 0.3878020644187927, "step": 11712 }, { "epoch": 2.164266494208405, "grad_norm": 0.07131746411323547, "learning_rate": 3.803500485098597e-06, "loss": 0.3322165012359619, "step": 11713 }, { "epoch": 2.1644512709173007, "grad_norm": 0.07706581801176071, "learning_rate": 3.8019348426712198e-06, "loss": 0.41919857263565063, "step": 11714 }, { "epoch": 2.1646360476261965, "grad_norm": 0.08948921412229538, "learning_rate": 3.8003694469074446e-06, "loss": 0.411518394947052, "step": 11715 }, { "epoch": 2.1648208243350924, "grad_norm": 0.07101071625947952, "learning_rate": 3.7988042978695706e-06, "loss": 0.3982822597026825, "step": 11716 }, { "epoch": 2.165005601043988, "grad_norm": 0.09112266451120377, "learning_rate": 3.797239395619887e-06, "loss": 0.41766148805618286, "step": 11717 }, { "epoch": 2.165190377752884, "grad_norm": 0.07062102854251862, "learning_rate": 3.7956747402206663e-06, "loss": 0.38626226782798767, "step": 11718 }, { "epoch": 2.1653751544617803, "grad_norm": 0.09081759303808212, "learning_rate": 3.79411033173418e-06, "loss": 0.40110763907432556, "step": 11719 }, { "epoch": 2.165559931170676, "grad_norm": 0.09068002551794052, "learning_rate": 3.7925461702226897e-06, "loss": 0.4494416117668152, "step": 11720 }, { "epoch": 2.165744707879572, "grad_norm": 0.06869544833898544, "learning_rate": 3.7909822557484378e-06, "loss": 0.3673214912414551, "step": 11721 }, { "epoch": 2.165929484588468, "grad_norm": 0.07023914158344269, "learning_rate": 3.7894185883736633e-06, "loss": 0.4389057755470276, "step": 11722 }, { "epoch": 2.1661142612973636, "grad_norm": 0.0772065743803978, "learning_rate": 3.7878551681606057e-06, "loss": 0.4547201097011566, "step": 11723 }, { "epoch": 2.1662990380062594, "grad_norm": 0.08422631770372391, "learning_rate": 3.7862919951714737e-06, "loss": 0.3660799562931061, "step": 11724 }, { "epoch": 2.1664838147151553, "grad_norm": 0.08606277406215668, "learning_rate": 3.7847290694684836e-06, "loss": 0.4506904184818268, "step": 11725 }, { "epoch": 2.166668591424051, "grad_norm": 0.07380979508161545, "learning_rate": 3.7831663911138283e-06, "loss": 0.30309197306632996, "step": 11726 }, { "epoch": 2.166853368132947, "grad_norm": 0.08433002978563309, "learning_rate": 3.7816039601696996e-06, "loss": 0.6037353873252869, "step": 11727 }, { "epoch": 2.1670381448418428, "grad_norm": 0.0858033299446106, "learning_rate": 3.78004177669828e-06, "loss": 0.30435726046562195, "step": 11728 }, { "epoch": 2.1672229215507386, "grad_norm": 0.10412168502807617, "learning_rate": 3.7784798407617364e-06, "loss": 0.5917178988456726, "step": 11729 }, { "epoch": 2.1674076982596344, "grad_norm": 0.08749578148126602, "learning_rate": 3.776918152422231e-06, "loss": 0.49174991250038147, "step": 11730 }, { "epoch": 2.1675924749685302, "grad_norm": 0.07869294285774231, "learning_rate": 3.7753567117419175e-06, "loss": 0.2797946631908417, "step": 11731 }, { "epoch": 2.167777251677426, "grad_norm": 0.10534682124853134, "learning_rate": 3.773795518782929e-06, "loss": 0.5504571795463562, "step": 11732 }, { "epoch": 2.167962028386322, "grad_norm": 0.08306889235973358, "learning_rate": 3.7722345736073984e-06, "loss": 0.44501793384552, "step": 11733 }, { "epoch": 2.1681468050952177, "grad_norm": 0.09396319091320038, "learning_rate": 3.7706738762774485e-06, "loss": 0.43054285645484924, "step": 11734 }, { "epoch": 2.1683315818041136, "grad_norm": 0.062297649681568146, "learning_rate": 3.7691134268551897e-06, "loss": 0.2745853364467621, "step": 11735 }, { "epoch": 2.1685163585130094, "grad_norm": 0.0928124189376831, "learning_rate": 3.7675532254027216e-06, "loss": 0.5709888935089111, "step": 11736 }, { "epoch": 2.168701135221905, "grad_norm": 0.08623574674129486, "learning_rate": 3.76599327198214e-06, "loss": 0.5272578597068787, "step": 11737 }, { "epoch": 2.168885911930801, "grad_norm": 0.0718969851732254, "learning_rate": 3.76443356665552e-06, "loss": 0.3616103529930115, "step": 11738 }, { "epoch": 2.169070688639697, "grad_norm": 0.0819055438041687, "learning_rate": 3.7628741094849374e-06, "loss": 0.46330684423446655, "step": 11739 }, { "epoch": 2.1692554653485927, "grad_norm": 0.09148211777210236, "learning_rate": 3.761314900532449e-06, "loss": 0.5191062688827515, "step": 11740 }, { "epoch": 2.1694402420574885, "grad_norm": 0.0735086053609848, "learning_rate": 3.7597559398601102e-06, "loss": 0.38592714071273804, "step": 11741 }, { "epoch": 2.1696250187663844, "grad_norm": 0.07115492224693298, "learning_rate": 3.7581972275299606e-06, "loss": 0.3410407304763794, "step": 11742 }, { "epoch": 2.16980979547528, "grad_norm": 0.09364961087703705, "learning_rate": 3.7566387636040334e-06, "loss": 0.5007708668708801, "step": 11743 }, { "epoch": 2.169994572184176, "grad_norm": 0.07597985863685608, "learning_rate": 3.755080548144351e-06, "loss": 0.49047887325286865, "step": 11744 }, { "epoch": 2.170179348893072, "grad_norm": 0.0871448889374733, "learning_rate": 3.7535225812129274e-06, "loss": 0.37562286853790283, "step": 11745 }, { "epoch": 2.1703641256019677, "grad_norm": 0.0897744670510292, "learning_rate": 3.7519648628717596e-06, "loss": 0.47237977385520935, "step": 11746 }, { "epoch": 2.1705489023108635, "grad_norm": 0.11996883153915405, "learning_rate": 3.7504073931828424e-06, "loss": 0.6920106410980225, "step": 11747 }, { "epoch": 2.1707336790197598, "grad_norm": 0.09214407950639725, "learning_rate": 3.7488501722081582e-06, "loss": 0.5268396735191345, "step": 11748 }, { "epoch": 2.1709184557286556, "grad_norm": 0.08599527925252914, "learning_rate": 3.7472932000096807e-06, "loss": 0.39189955592155457, "step": 11749 }, { "epoch": 2.1711032324375514, "grad_norm": 0.09038657695055008, "learning_rate": 3.7457364766493708e-06, "loss": 0.43005287647247314, "step": 11750 }, { "epoch": 2.1712880091464473, "grad_norm": 0.07823736220598221, "learning_rate": 3.7441800021891863e-06, "loss": 0.4083983898162842, "step": 11751 }, { "epoch": 2.171472785855343, "grad_norm": 0.0693674385547638, "learning_rate": 3.742623776691061e-06, "loss": 0.42666032910346985, "step": 11752 }, { "epoch": 2.171657562564239, "grad_norm": 0.07568097859621048, "learning_rate": 3.741067800216934e-06, "loss": 0.39862126111984253, "step": 11753 }, { "epoch": 2.1718423392731347, "grad_norm": 0.08159180730581284, "learning_rate": 3.739512072828726e-06, "loss": 0.4888143539428711, "step": 11754 }, { "epoch": 2.1720271159820306, "grad_norm": 0.07871854305267334, "learning_rate": 3.737956594588351e-06, "loss": 0.3868490755558014, "step": 11755 }, { "epoch": 2.1722118926909264, "grad_norm": 0.10375802218914032, "learning_rate": 3.736401365557716e-06, "loss": 0.6778278350830078, "step": 11756 }, { "epoch": 2.1723966693998222, "grad_norm": 0.08540346473455429, "learning_rate": 3.734846385798707e-06, "loss": 0.4792468845844269, "step": 11757 }, { "epoch": 2.172581446108718, "grad_norm": 0.07805228233337402, "learning_rate": 3.73329165537321e-06, "loss": 0.3809247314929962, "step": 11758 }, { "epoch": 2.172766222817614, "grad_norm": 0.08918479830026627, "learning_rate": 3.731737174343103e-06, "loss": 0.44978439807891846, "step": 11759 }, { "epoch": 2.1729509995265097, "grad_norm": 0.07604454457759857, "learning_rate": 3.730182942770243e-06, "loss": 0.3504337668418884, "step": 11760 }, { "epoch": 2.1731357762354055, "grad_norm": 0.07437156140804291, "learning_rate": 3.728628960716485e-06, "loss": 0.3848017454147339, "step": 11761 }, { "epoch": 2.1733205529443014, "grad_norm": 0.08974701166152954, "learning_rate": 3.727075228243674e-06, "loss": 0.4856947064399719, "step": 11762 }, { "epoch": 2.173505329653197, "grad_norm": 0.09555413573980331, "learning_rate": 3.7255217454136428e-06, "loss": 0.4570215046405792, "step": 11763 }, { "epoch": 2.173690106362093, "grad_norm": 0.06414473801851273, "learning_rate": 3.7239685122882173e-06, "loss": 0.37356042861938477, "step": 11764 }, { "epoch": 2.173874883070989, "grad_norm": 0.0668707937002182, "learning_rate": 3.722415528929212e-06, "loss": 0.35844603180885315, "step": 11765 }, { "epoch": 2.1740596597798847, "grad_norm": 0.0816434994339943, "learning_rate": 3.7208627953984257e-06, "loss": 0.4278120696544647, "step": 11766 }, { "epoch": 2.1742444364887805, "grad_norm": 0.09556222707033157, "learning_rate": 3.7193103117576557e-06, "loss": 0.4568500220775604, "step": 11767 }, { "epoch": 2.1744292131976763, "grad_norm": 0.09306345134973526, "learning_rate": 3.7177580780686838e-06, "loss": 0.4913712441921234, "step": 11768 }, { "epoch": 2.174613989906572, "grad_norm": 0.07091156393289566, "learning_rate": 3.7162060943932875e-06, "loss": 0.3560081720352173, "step": 11769 }, { "epoch": 2.174798766615468, "grad_norm": 0.07228139787912369, "learning_rate": 3.7146543607932284e-06, "loss": 0.3397913873195648, "step": 11770 }, { "epoch": 2.174983543324364, "grad_norm": 0.057565197348594666, "learning_rate": 3.7131028773302656e-06, "loss": 0.3120599687099457, "step": 11771 }, { "epoch": 2.1751683200332597, "grad_norm": 0.07905125617980957, "learning_rate": 3.7115516440661347e-06, "loss": 0.4089307487010956, "step": 11772 }, { "epoch": 2.1753530967421555, "grad_norm": 0.08861220628023148, "learning_rate": 3.710000661062578e-06, "loss": 0.4595871567726135, "step": 11773 }, { "epoch": 2.1755378734510513, "grad_norm": 0.07304118573665619, "learning_rate": 3.7084499283813103e-06, "loss": 0.30586081743240356, "step": 11774 }, { "epoch": 2.175722650159947, "grad_norm": 0.0830596312880516, "learning_rate": 3.706899446084055e-06, "loss": 0.3427889943122864, "step": 11775 }, { "epoch": 2.175907426868843, "grad_norm": 0.09748268127441406, "learning_rate": 3.7053492142325156e-06, "loss": 0.5185943841934204, "step": 11776 }, { "epoch": 2.1760922035777392, "grad_norm": 0.08044993877410889, "learning_rate": 3.703799232888381e-06, "loss": 0.38030368089675903, "step": 11777 }, { "epoch": 2.176276980286635, "grad_norm": 0.09990067034959793, "learning_rate": 3.7022495021133378e-06, "loss": 0.5028130412101746, "step": 11778 }, { "epoch": 2.176461756995531, "grad_norm": 0.09113533794879913, "learning_rate": 3.700700021969066e-06, "loss": 0.3763606548309326, "step": 11779 }, { "epoch": 2.1766465337044267, "grad_norm": 0.0950152724981308, "learning_rate": 3.699150792517221e-06, "loss": 0.5273303985595703, "step": 11780 }, { "epoch": 2.1768313104133226, "grad_norm": 0.0905051901936531, "learning_rate": 3.6976018138194625e-06, "loss": 0.6240034699440002, "step": 11781 }, { "epoch": 2.1770160871222184, "grad_norm": 0.11675798892974854, "learning_rate": 3.6960530859374334e-06, "loss": 0.7055141925811768, "step": 11782 }, { "epoch": 2.177200863831114, "grad_norm": 0.08601131290197372, "learning_rate": 3.6945046089327698e-06, "loss": 0.47330668568611145, "step": 11783 }, { "epoch": 2.17738564054001, "grad_norm": 0.06150782108306885, "learning_rate": 3.6929563828670945e-06, "loss": 0.27529773116111755, "step": 11784 }, { "epoch": 2.177570417248906, "grad_norm": 0.07958129793405533, "learning_rate": 3.6914084078020263e-06, "loss": 0.42535436153411865, "step": 11785 }, { "epoch": 2.1777551939578017, "grad_norm": 0.07603038847446442, "learning_rate": 3.6898606837991635e-06, "loss": 0.35221901535987854, "step": 11786 }, { "epoch": 2.1779399706666975, "grad_norm": 0.06935341656208038, "learning_rate": 3.6883132109201037e-06, "loss": 0.34105798602104187, "step": 11787 }, { "epoch": 2.1781247473755934, "grad_norm": 0.07835391908884048, "learning_rate": 3.6867659892264307e-06, "loss": 0.44978412985801697, "step": 11788 }, { "epoch": 2.178309524084489, "grad_norm": 0.08281917124986649, "learning_rate": 3.685219018779721e-06, "loss": 0.37326085567474365, "step": 11789 }, { "epoch": 2.178494300793385, "grad_norm": 0.08823174983263016, "learning_rate": 3.683672299641541e-06, "loss": 0.45342573523521423, "step": 11790 }, { "epoch": 2.178679077502281, "grad_norm": 0.06711402535438538, "learning_rate": 3.6821258318734376e-06, "loss": 0.2581331431865692, "step": 11791 }, { "epoch": 2.1788638542111767, "grad_norm": 0.10414480417966843, "learning_rate": 3.680579615536961e-06, "loss": 0.6712921857833862, "step": 11792 }, { "epoch": 2.1790486309200725, "grad_norm": 0.078679159283638, "learning_rate": 3.6790336506936473e-06, "loss": 0.4711715281009674, "step": 11793 }, { "epoch": 2.1792334076289683, "grad_norm": 0.0918063074350357, "learning_rate": 3.677487937405013e-06, "loss": 0.4823344647884369, "step": 11794 }, { "epoch": 2.179418184337864, "grad_norm": 0.08719798922538757, "learning_rate": 3.6759424757325813e-06, "loss": 0.43217337131500244, "step": 11795 }, { "epoch": 2.17960296104676, "grad_norm": 0.08216572552919388, "learning_rate": 3.674397265737857e-06, "loss": 0.4107217490673065, "step": 11796 }, { "epoch": 2.179787737755656, "grad_norm": 0.10715235769748688, "learning_rate": 3.6728523074823276e-06, "loss": 0.5502179265022278, "step": 11797 }, { "epoch": 2.1799725144645516, "grad_norm": 0.06882361322641373, "learning_rate": 3.6713076010274806e-06, "loss": 0.37534210085868835, "step": 11798 }, { "epoch": 2.1801572911734475, "grad_norm": 0.08649523556232452, "learning_rate": 3.669763146434795e-06, "loss": 0.4610140323638916, "step": 11799 }, { "epoch": 2.1803420678823433, "grad_norm": 0.07134760916233063, "learning_rate": 3.668218943765729e-06, "loss": 0.28405073285102844, "step": 11800 }, { "epoch": 2.180526844591239, "grad_norm": 0.07951401174068451, "learning_rate": 3.666674993081738e-06, "loss": 0.44334912300109863, "step": 11801 }, { "epoch": 2.180711621300135, "grad_norm": 0.07624167203903198, "learning_rate": 3.6651312944442684e-06, "loss": 0.4165308475494385, "step": 11802 }, { "epoch": 2.180896398009031, "grad_norm": 0.07992348074913025, "learning_rate": 3.6635878479147545e-06, "loss": 0.38696640729904175, "step": 11803 }, { "epoch": 2.1810811747179266, "grad_norm": 0.09303223341703415, "learning_rate": 3.6620446535546227e-06, "loss": 0.48980575799942017, "step": 11804 }, { "epoch": 2.1812659514268224, "grad_norm": 0.08045366406440735, "learning_rate": 3.6605017114252816e-06, "loss": 0.3471381664276123, "step": 11805 }, { "epoch": 2.1814507281357187, "grad_norm": 0.08809412270784378, "learning_rate": 3.65895902158814e-06, "loss": 0.42216527462005615, "step": 11806 }, { "epoch": 2.181635504844614, "grad_norm": 0.07181458920240402, "learning_rate": 3.6574165841045894e-06, "loss": 0.30205219984054565, "step": 11807 }, { "epoch": 2.1818202815535104, "grad_norm": 0.0813143402338028, "learning_rate": 3.655874399036016e-06, "loss": 0.49499523639678955, "step": 11808 }, { "epoch": 2.182005058262406, "grad_norm": 0.08100121468305588, "learning_rate": 3.6543324664437916e-06, "loss": 0.37999892234802246, "step": 11809 }, { "epoch": 2.182189834971302, "grad_norm": 0.0968104675412178, "learning_rate": 3.6527907863892877e-06, "loss": 0.42118382453918457, "step": 11810 }, { "epoch": 2.182374611680198, "grad_norm": 0.10659095644950867, "learning_rate": 3.6512493589338483e-06, "loss": 0.5945422649383545, "step": 11811 }, { "epoch": 2.1825593883890937, "grad_norm": 0.07730945199728012, "learning_rate": 3.6497081841388215e-06, "loss": 0.42148011922836304, "step": 11812 }, { "epoch": 2.1827441650979895, "grad_norm": 0.1045936793088913, "learning_rate": 3.6481672620655452e-06, "loss": 0.5791264772415161, "step": 11813 }, { "epoch": 2.1829289418068853, "grad_norm": 0.08466159552335739, "learning_rate": 3.646626592775332e-06, "loss": 0.4451292157173157, "step": 11814 }, { "epoch": 2.183113718515781, "grad_norm": 0.07520155608654022, "learning_rate": 3.6450861763295076e-06, "loss": 0.3622979521751404, "step": 11815 }, { "epoch": 2.183298495224677, "grad_norm": 0.0929901972413063, "learning_rate": 3.643546012789374e-06, "loss": 0.5606027841567993, "step": 11816 }, { "epoch": 2.183483271933573, "grad_norm": 0.09161688387393951, "learning_rate": 3.642006102216219e-06, "loss": 0.44947391748428345, "step": 11817 }, { "epoch": 2.1836680486424687, "grad_norm": 0.09159534424543381, "learning_rate": 3.64046644467133e-06, "loss": 0.5428000092506409, "step": 11818 }, { "epoch": 2.1838528253513645, "grad_norm": 0.08401782065629959, "learning_rate": 3.638927040215984e-06, "loss": 0.5569350123405457, "step": 11819 }, { "epoch": 2.1840376020602603, "grad_norm": 0.07696730643510818, "learning_rate": 3.6373878889114356e-06, "loss": 0.4635745882987976, "step": 11820 }, { "epoch": 2.184222378769156, "grad_norm": 0.08972986042499542, "learning_rate": 3.635848990818944e-06, "loss": 0.5002453327178955, "step": 11821 }, { "epoch": 2.184407155478052, "grad_norm": 0.08567506819963455, "learning_rate": 3.634310345999752e-06, "loss": 0.46873846650123596, "step": 11822 }, { "epoch": 2.184591932186948, "grad_norm": 0.09065090119838715, "learning_rate": 3.632771954515092e-06, "loss": 0.5605395436286926, "step": 11823 }, { "epoch": 2.1847767088958436, "grad_norm": 0.07702086865901947, "learning_rate": 3.6312338164261917e-06, "loss": 0.38053658604621887, "step": 11824 }, { "epoch": 2.1849614856047395, "grad_norm": 0.09548892825841904, "learning_rate": 3.629695931794257e-06, "loss": 0.568294882774353, "step": 11825 }, { "epoch": 2.1851462623136353, "grad_norm": 0.09351617842912674, "learning_rate": 3.6281583006804933e-06, "loss": 0.5914403200149536, "step": 11826 }, { "epoch": 2.185331039022531, "grad_norm": 0.09066955745220184, "learning_rate": 3.626620923146095e-06, "loss": 0.44789034128189087, "step": 11827 }, { "epoch": 2.185515815731427, "grad_norm": 0.09279196709394455, "learning_rate": 3.6250837992522435e-06, "loss": 0.5210738182067871, "step": 11828 }, { "epoch": 2.1857005924403228, "grad_norm": 0.06666386127471924, "learning_rate": 3.6235469290601122e-06, "loss": 0.2853530943393707, "step": 11829 }, { "epoch": 2.1858853691492186, "grad_norm": 0.07821212708950043, "learning_rate": 3.622010312630867e-06, "loss": 0.3277926445007324, "step": 11830 }, { "epoch": 2.1860701458581144, "grad_norm": 0.09027982503175735, "learning_rate": 3.6204739500256546e-06, "loss": 0.4053875207901001, "step": 11831 }, { "epoch": 2.1862549225670103, "grad_norm": 0.08265797793865204, "learning_rate": 3.61893784130562e-06, "loss": 0.4514121413230896, "step": 11832 }, { "epoch": 2.186439699275906, "grad_norm": 0.09165399521589279, "learning_rate": 3.6174019865318987e-06, "loss": 0.5018165111541748, "step": 11833 }, { "epoch": 2.186624475984802, "grad_norm": 0.07767356932163239, "learning_rate": 3.615866385765603e-06, "loss": 0.43387266993522644, "step": 11834 }, { "epoch": 2.186809252693698, "grad_norm": 0.08318489789962769, "learning_rate": 3.6143310390678544e-06, "loss": 0.45476630330085754, "step": 11835 }, { "epoch": 2.1869940294025936, "grad_norm": 0.08616919815540314, "learning_rate": 3.6127959464997565e-06, "loss": 0.39537912607192993, "step": 11836 }, { "epoch": 2.18717880611149, "grad_norm": 0.09167175740003586, "learning_rate": 3.6112611081223937e-06, "loss": 0.38645097613334656, "step": 11837 }, { "epoch": 2.1873635828203857, "grad_norm": 0.08732689172029495, "learning_rate": 3.6097265239968537e-06, "loss": 0.46446800231933594, "step": 11838 }, { "epoch": 2.1875483595292815, "grad_norm": 0.07376278191804886, "learning_rate": 3.6081921941842024e-06, "loss": 0.3687284588813782, "step": 11839 }, { "epoch": 2.1877331362381773, "grad_norm": 0.08316045999526978, "learning_rate": 3.6066581187455042e-06, "loss": 0.39745235443115234, "step": 11840 }, { "epoch": 2.187917912947073, "grad_norm": 0.09907536953687668, "learning_rate": 3.605124297741811e-06, "loss": 0.5286823511123657, "step": 11841 }, { "epoch": 2.188102689655969, "grad_norm": 0.07469500601291656, "learning_rate": 3.603590731234163e-06, "loss": 0.36436185240745544, "step": 11842 }, { "epoch": 2.188287466364865, "grad_norm": 0.0899069607257843, "learning_rate": 3.6020574192835934e-06, "loss": 0.371680349111557, "step": 11843 }, { "epoch": 2.1884722430737606, "grad_norm": 0.0726056843996048, "learning_rate": 3.6005243619511242e-06, "loss": 0.37702593207359314, "step": 11844 }, { "epoch": 2.1886570197826565, "grad_norm": 0.08643928915262222, "learning_rate": 3.598991559297761e-06, "loss": 0.45487257838249207, "step": 11845 }, { "epoch": 2.1888417964915523, "grad_norm": 0.08755392581224442, "learning_rate": 3.5974590113845076e-06, "loss": 0.3754900395870209, "step": 11846 }, { "epoch": 2.189026573200448, "grad_norm": 0.10274093598127365, "learning_rate": 3.5959267182723544e-06, "loss": 0.6082805395126343, "step": 11847 }, { "epoch": 2.189211349909344, "grad_norm": 0.0784970372915268, "learning_rate": 3.5943946800222816e-06, "loss": 0.4100046455860138, "step": 11848 }, { "epoch": 2.18939612661824, "grad_norm": 0.05849752947688103, "learning_rate": 3.5928628966952608e-06, "loss": 0.28361162543296814, "step": 11849 }, { "epoch": 2.1895809033271356, "grad_norm": 0.07341820001602173, "learning_rate": 3.5913313683522544e-06, "loss": 0.3717271685600281, "step": 11850 }, { "epoch": 2.1897656800360314, "grad_norm": 0.09280548244714737, "learning_rate": 3.5898000950542067e-06, "loss": 0.39923617243766785, "step": 11851 }, { "epoch": 2.1899504567449273, "grad_norm": 0.10625611990690231, "learning_rate": 3.58826907686206e-06, "loss": 0.5870501399040222, "step": 11852 }, { "epoch": 2.190135233453823, "grad_norm": 0.10157416760921478, "learning_rate": 3.586738313836746e-06, "loss": 0.5157890319824219, "step": 11853 }, { "epoch": 2.190320010162719, "grad_norm": 0.08418985456228256, "learning_rate": 3.585207806039178e-06, "loss": 0.4399016201496124, "step": 11854 }, { "epoch": 2.1905047868716148, "grad_norm": 0.08287651836872101, "learning_rate": 3.583677553530276e-06, "loss": 0.4425356388092041, "step": 11855 }, { "epoch": 2.1906895635805106, "grad_norm": 0.08783163130283356, "learning_rate": 3.5821475563709294e-06, "loss": 0.4846430718898773, "step": 11856 }, { "epoch": 2.1908743402894064, "grad_norm": 0.10092713683843613, "learning_rate": 3.5806178146220315e-06, "loss": 0.6803297400474548, "step": 11857 }, { "epoch": 2.1910591169983022, "grad_norm": 0.09470190852880478, "learning_rate": 3.5790883283444643e-06, "loss": 0.5392593741416931, "step": 11858 }, { "epoch": 2.191243893707198, "grad_norm": 0.08710703253746033, "learning_rate": 3.5775590975990903e-06, "loss": 0.548549234867096, "step": 11859 }, { "epoch": 2.191428670416094, "grad_norm": 0.07717004418373108, "learning_rate": 3.576030122446771e-06, "loss": 0.4283442497253418, "step": 11860 }, { "epoch": 2.1916134471249897, "grad_norm": 0.09224745631217957, "learning_rate": 3.574501402948354e-06, "loss": 0.44451847672462463, "step": 11861 }, { "epoch": 2.1917982238338856, "grad_norm": 0.09668068587779999, "learning_rate": 3.572972939164678e-06, "loss": 0.4325692653656006, "step": 11862 }, { "epoch": 2.1919830005427814, "grad_norm": 0.07948118448257446, "learning_rate": 3.5714447311565727e-06, "loss": 0.37542101740837097, "step": 11863 }, { "epoch": 2.192167777251677, "grad_norm": 0.1118590459227562, "learning_rate": 3.5699167789848575e-06, "loss": 0.49960219860076904, "step": 11864 }, { "epoch": 2.192352553960573, "grad_norm": 0.0815550684928894, "learning_rate": 3.568389082710334e-06, "loss": 0.4175896942615509, "step": 11865 }, { "epoch": 2.1925373306694693, "grad_norm": 0.08199605345726013, "learning_rate": 3.566861642393803e-06, "loss": 0.3794676661491394, "step": 11866 }, { "epoch": 2.192722107378365, "grad_norm": 0.06854138523340225, "learning_rate": 3.5653344580960525e-06, "loss": 0.3001802861690521, "step": 11867 }, { "epoch": 2.192906884087261, "grad_norm": 0.0924617350101471, "learning_rate": 3.5638075298778584e-06, "loss": 0.4806053936481476, "step": 11868 }, { "epoch": 2.193091660796157, "grad_norm": 0.0795179083943367, "learning_rate": 3.5622808577999922e-06, "loss": 0.45049938559532166, "step": 11869 }, { "epoch": 2.1932764375050526, "grad_norm": 0.07543812692165375, "learning_rate": 3.5607544419232033e-06, "loss": 0.39572301506996155, "step": 11870 }, { "epoch": 2.1934612142139485, "grad_norm": 0.06583981215953827, "learning_rate": 3.5592282823082413e-06, "loss": 0.368479460477829, "step": 11871 }, { "epoch": 2.1936459909228443, "grad_norm": 0.10285375267267227, "learning_rate": 3.5577023790158472e-06, "loss": 0.7166904807090759, "step": 11872 }, { "epoch": 2.19383076763174, "grad_norm": 0.08366060256958008, "learning_rate": 3.5561767321067387e-06, "loss": 0.4107072055339813, "step": 11873 }, { "epoch": 2.194015544340636, "grad_norm": 0.08970008790493011, "learning_rate": 3.554651341641634e-06, "loss": 0.45388728380203247, "step": 11874 }, { "epoch": 2.1942003210495318, "grad_norm": 0.08710724115371704, "learning_rate": 3.553126207681247e-06, "loss": 0.49304187297821045, "step": 11875 }, { "epoch": 2.1943850977584276, "grad_norm": 0.07061879336833954, "learning_rate": 3.551601330286264e-06, "loss": 0.36744633316993713, "step": 11876 }, { "epoch": 2.1945698744673234, "grad_norm": 0.0940442904829979, "learning_rate": 3.550076709517374e-06, "loss": 0.4634474813938141, "step": 11877 }, { "epoch": 2.1947546511762193, "grad_norm": 0.08538489043712616, "learning_rate": 3.5485523454352543e-06, "loss": 0.44748833775520325, "step": 11878 }, { "epoch": 2.194939427885115, "grad_norm": 0.1014489233493805, "learning_rate": 3.547028238100564e-06, "loss": 0.5843960642814636, "step": 11879 }, { "epoch": 2.195124204594011, "grad_norm": 0.10388089716434479, "learning_rate": 3.5455043875739615e-06, "loss": 0.4797080457210541, "step": 11880 }, { "epoch": 2.1953089813029067, "grad_norm": 0.08865340054035187, "learning_rate": 3.5439807939160907e-06, "loss": 0.4965810179710388, "step": 11881 }, { "epoch": 2.1954937580118026, "grad_norm": 0.07938023656606674, "learning_rate": 3.5424574571875857e-06, "loss": 0.41988202929496765, "step": 11882 }, { "epoch": 2.1956785347206984, "grad_norm": 0.08724203705787659, "learning_rate": 3.5409343774490714e-06, "loss": 0.5557771921157837, "step": 11883 }, { "epoch": 2.1958633114295942, "grad_norm": 0.08031976968050003, "learning_rate": 3.539411554761164e-06, "loss": 0.4043499231338501, "step": 11884 }, { "epoch": 2.19604808813849, "grad_norm": 0.08532105386257172, "learning_rate": 3.5378889891844616e-06, "loss": 0.6659044027328491, "step": 11885 }, { "epoch": 2.196232864847386, "grad_norm": 0.06489470601081848, "learning_rate": 3.5363666807795595e-06, "loss": 0.3123665452003479, "step": 11886 }, { "epoch": 2.1964176415562817, "grad_norm": 0.10015974193811417, "learning_rate": 3.5348446296070414e-06, "loss": 0.4849621653556824, "step": 11887 }, { "epoch": 2.1966024182651775, "grad_norm": 0.07387948781251907, "learning_rate": 3.5333228357274794e-06, "loss": 0.36443015933036804, "step": 11888 }, { "epoch": 2.1967871949740734, "grad_norm": 0.08133751153945923, "learning_rate": 3.5318012992014418e-06, "loss": 0.4533928632736206, "step": 11889 }, { "epoch": 2.196971971682969, "grad_norm": 0.08863720297813416, "learning_rate": 3.5302800200894715e-06, "loss": 0.40780341625213623, "step": 11890 }, { "epoch": 2.197156748391865, "grad_norm": 0.08395319432020187, "learning_rate": 3.5287589984521154e-06, "loss": 0.48462802171707153, "step": 11891 }, { "epoch": 2.197341525100761, "grad_norm": 0.09309794008731842, "learning_rate": 3.527238234349909e-06, "loss": 0.4617069363594055, "step": 11892 }, { "epoch": 2.1975263018096567, "grad_norm": 0.09020397067070007, "learning_rate": 3.5257177278433674e-06, "loss": 0.45796695351600647, "step": 11893 }, { "epoch": 2.1977110785185525, "grad_norm": 0.07047610729932785, "learning_rate": 3.524197478993e-06, "loss": 0.3196723461151123, "step": 11894 }, { "epoch": 2.197895855227449, "grad_norm": 0.08661914616823196, "learning_rate": 3.5226774878593208e-06, "loss": 0.4376320242881775, "step": 11895 }, { "epoch": 2.1980806319363446, "grad_norm": 0.08178701251745224, "learning_rate": 3.5211577545028086e-06, "loss": 0.42635318636894226, "step": 11896 }, { "epoch": 2.1982654086452404, "grad_norm": 0.07078079879283905, "learning_rate": 3.5196382789839477e-06, "loss": 0.402277410030365, "step": 11897 }, { "epoch": 2.1984501853541363, "grad_norm": 0.07917147874832153, "learning_rate": 3.518119061363213e-06, "loss": 0.4432550072669983, "step": 11898 }, { "epoch": 2.198634962063032, "grad_norm": 0.08097616583108902, "learning_rate": 3.5166001017010563e-06, "loss": 0.4853762090206146, "step": 11899 }, { "epoch": 2.198819738771928, "grad_norm": 0.08459150791168213, "learning_rate": 3.5150814000579327e-06, "loss": 0.45333394408226013, "step": 11900 }, { "epoch": 2.1990045154808238, "grad_norm": 0.09444347023963928, "learning_rate": 3.5135629564942797e-06, "loss": 0.46410757303237915, "step": 11901 }, { "epoch": 2.1991892921897196, "grad_norm": 0.08272453397512436, "learning_rate": 3.5120447710705285e-06, "loss": 0.33521467447280884, "step": 11902 }, { "epoch": 2.1993740688986154, "grad_norm": 0.09044880419969559, "learning_rate": 3.5105268438470996e-06, "loss": 0.4096111059188843, "step": 11903 }, { "epoch": 2.1995588456075112, "grad_norm": 0.11137299984693527, "learning_rate": 3.5090091748843967e-06, "loss": 0.5373837351799011, "step": 11904 }, { "epoch": 2.199743622316407, "grad_norm": 0.09911485016345978, "learning_rate": 3.5074917642428207e-06, "loss": 0.5808022618293762, "step": 11905 }, { "epoch": 2.199928399025303, "grad_norm": 0.10136883705854416, "learning_rate": 3.5059746119827597e-06, "loss": 0.4865693151950836, "step": 11906 }, { "epoch": 2.2001131757341987, "grad_norm": 0.1080540269613266, "learning_rate": 3.5044577181645923e-06, "loss": 0.48078617453575134, "step": 11907 }, { "epoch": 2.2002979524430946, "grad_norm": 0.07321763783693314, "learning_rate": 3.502941082848685e-06, "loss": 0.3743250370025635, "step": 11908 }, { "epoch": 2.2004827291519904, "grad_norm": 0.08438288420438766, "learning_rate": 3.501424706095401e-06, "loss": 0.4794260561466217, "step": 11909 }, { "epoch": 2.200667505860886, "grad_norm": 0.08540996164083481, "learning_rate": 3.4999085879650772e-06, "loss": 0.4936371147632599, "step": 11910 }, { "epoch": 2.200852282569782, "grad_norm": 0.08107694983482361, "learning_rate": 3.4983927285180565e-06, "loss": 0.35202932357788086, "step": 11911 }, { "epoch": 2.201037059278678, "grad_norm": 0.0972508043050766, "learning_rate": 3.4968771278146675e-06, "loss": 0.4687209725379944, "step": 11912 }, { "epoch": 2.2012218359875737, "grad_norm": 0.0832279622554779, "learning_rate": 3.495361785915219e-06, "loss": 0.4170093238353729, "step": 11913 }, { "epoch": 2.2014066126964695, "grad_norm": 0.0792597085237503, "learning_rate": 3.4938467028800182e-06, "loss": 0.2837228775024414, "step": 11914 }, { "epoch": 2.2015913894053654, "grad_norm": 0.08879082649946213, "learning_rate": 3.4923318787693704e-06, "loss": 0.440814733505249, "step": 11915 }, { "epoch": 2.201776166114261, "grad_norm": 0.08689576387405396, "learning_rate": 3.4908173136435508e-06, "loss": 0.3709763288497925, "step": 11916 }, { "epoch": 2.201960942823157, "grad_norm": 0.06214141845703125, "learning_rate": 3.4893030075628367e-06, "loss": 0.2839714586734772, "step": 11917 }, { "epoch": 2.202145719532053, "grad_norm": 0.0947304293513298, "learning_rate": 3.487788960587497e-06, "loss": 0.43734437227249146, "step": 11918 }, { "epoch": 2.2023304962409487, "grad_norm": 0.07839353382587433, "learning_rate": 3.48627517277778e-06, "loss": 0.33791032433509827, "step": 11919 }, { "epoch": 2.2025152729498445, "grad_norm": 0.09421993046998978, "learning_rate": 3.4847616441939314e-06, "loss": 0.4476447105407715, "step": 11920 }, { "epoch": 2.2027000496587403, "grad_norm": 0.08122393488883972, "learning_rate": 3.4832483748961866e-06, "loss": 0.36605972051620483, "step": 11921 }, { "epoch": 2.202884826367636, "grad_norm": 0.08852571249008179, "learning_rate": 3.481735364944767e-06, "loss": 0.39648857712745667, "step": 11922 }, { "epoch": 2.203069603076532, "grad_norm": 0.07064167410135269, "learning_rate": 3.4802226143998917e-06, "loss": 0.2931559085845947, "step": 11923 }, { "epoch": 2.2032543797854283, "grad_norm": 0.08540599793195724, "learning_rate": 3.4787101233217546e-06, "loss": 0.42261940240859985, "step": 11924 }, { "epoch": 2.203439156494324, "grad_norm": 0.11070524901151657, "learning_rate": 3.477197891770552e-06, "loss": 0.5631526112556458, "step": 11925 }, { "epoch": 2.20362393320322, "grad_norm": 0.11584214866161346, "learning_rate": 3.475685919806465e-06, "loss": 0.6900765895843506, "step": 11926 }, { "epoch": 2.2038087099121157, "grad_norm": 0.09533005952835083, "learning_rate": 3.474174207489668e-06, "loss": 0.42649832367897034, "step": 11927 }, { "epoch": 2.2039934866210116, "grad_norm": 0.0895998403429985, "learning_rate": 3.4726627548803205e-06, "loss": 0.4245060384273529, "step": 11928 }, { "epoch": 2.2041782633299074, "grad_norm": 0.082939513027668, "learning_rate": 3.471151562038577e-06, "loss": 0.4422062039375305, "step": 11929 }, { "epoch": 2.2043630400388032, "grad_norm": 0.0672890916466713, "learning_rate": 3.469640629024572e-06, "loss": 0.30697351694107056, "step": 11930 }, { "epoch": 2.204547816747699, "grad_norm": 0.09478650987148285, "learning_rate": 3.468129955898439e-06, "loss": 0.4834575653076172, "step": 11931 }, { "epoch": 2.204732593456595, "grad_norm": 0.07390895485877991, "learning_rate": 3.466619542720302e-06, "loss": 0.36322087049484253, "step": 11932 }, { "epoch": 2.2049173701654907, "grad_norm": 0.08637066185474396, "learning_rate": 3.46510938955026e-06, "loss": 0.49460700154304504, "step": 11933 }, { "epoch": 2.2051021468743865, "grad_norm": 0.09042581170797348, "learning_rate": 3.4635994964484252e-06, "loss": 0.40781348943710327, "step": 11934 }, { "epoch": 2.2052869235832824, "grad_norm": 0.09745407849550247, "learning_rate": 3.462089863474878e-06, "loss": 0.4389975666999817, "step": 11935 }, { "epoch": 2.205471700292178, "grad_norm": 0.08554673939943314, "learning_rate": 3.4605804906897e-06, "loss": 0.4696071147918701, "step": 11936 }, { "epoch": 2.205656477001074, "grad_norm": 0.060610331594944, "learning_rate": 3.4590713781529616e-06, "loss": 0.2960943877696991, "step": 11937 }, { "epoch": 2.20584125370997, "grad_norm": 0.07446163147687912, "learning_rate": 3.4575625259247157e-06, "loss": 0.393887996673584, "step": 11938 }, { "epoch": 2.2060260304188657, "grad_norm": 0.07066913694143295, "learning_rate": 3.456053934065012e-06, "loss": 0.3027123212814331, "step": 11939 }, { "epoch": 2.2062108071277615, "grad_norm": 0.07689270377159119, "learning_rate": 3.4545456026338896e-06, "loss": 0.3906818628311157, "step": 11940 }, { "epoch": 2.2063955838366573, "grad_norm": 0.05763699486851692, "learning_rate": 3.4530375316913734e-06, "loss": 0.27099940180778503, "step": 11941 }, { "epoch": 2.206580360545553, "grad_norm": 0.0828971266746521, "learning_rate": 3.451529721297481e-06, "loss": 0.44155043363571167, "step": 11942 }, { "epoch": 2.206765137254449, "grad_norm": 0.0719708651304245, "learning_rate": 3.450022171512221e-06, "loss": 0.35504668951034546, "step": 11943 }, { "epoch": 2.206949913963345, "grad_norm": 0.10324752330780029, "learning_rate": 3.4485148823955827e-06, "loss": 0.4461381435394287, "step": 11944 }, { "epoch": 2.2071346906722407, "grad_norm": 0.09716669470071793, "learning_rate": 3.4470078540075556e-06, "loss": 0.5391970872879028, "step": 11945 }, { "epoch": 2.2073194673811365, "grad_norm": 0.07661837339401245, "learning_rate": 3.445501086408114e-06, "loss": 0.38608694076538086, "step": 11946 }, { "epoch": 2.2075042440900323, "grad_norm": 0.08236774057149887, "learning_rate": 3.443994579657223e-06, "loss": 0.48675239086151123, "step": 11947 }, { "epoch": 2.207689020798928, "grad_norm": 0.10291637480258942, "learning_rate": 3.4424883338148364e-06, "loss": 0.5458536148071289, "step": 11948 }, { "epoch": 2.207873797507824, "grad_norm": 0.07039511948823929, "learning_rate": 3.440982348940902e-06, "loss": 0.3777443766593933, "step": 11949 }, { "epoch": 2.20805857421672, "grad_norm": 0.08171594887971878, "learning_rate": 3.439476625095346e-06, "loss": 0.4108811914920807, "step": 11950 }, { "epoch": 2.2082433509256156, "grad_norm": 0.09483866393566132, "learning_rate": 3.4379711623380984e-06, "loss": 0.41626477241516113, "step": 11951 }, { "epoch": 2.2084281276345115, "grad_norm": 0.0926767885684967, "learning_rate": 3.436465960729065e-06, "loss": 0.37870514392852783, "step": 11952 }, { "epoch": 2.2086129043434077, "grad_norm": 0.08968275785446167, "learning_rate": 3.4349610203281492e-06, "loss": 0.4915672540664673, "step": 11953 }, { "epoch": 2.2087976810523036, "grad_norm": 0.08231180906295776, "learning_rate": 3.4334563411952514e-06, "loss": 0.39932894706726074, "step": 11954 }, { "epoch": 2.2089824577611994, "grad_norm": 0.08135201781988144, "learning_rate": 3.4319519233902443e-06, "loss": 0.34979644417762756, "step": 11955 }, { "epoch": 2.209167234470095, "grad_norm": 0.07436149567365646, "learning_rate": 3.4304477669730008e-06, "loss": 0.35989686846733093, "step": 11956 }, { "epoch": 2.209352011178991, "grad_norm": 0.09048670530319214, "learning_rate": 3.4289438720033875e-06, "loss": 0.4885326027870178, "step": 11957 }, { "epoch": 2.209536787887887, "grad_norm": 0.13973698019981384, "learning_rate": 3.4274402385412452e-06, "loss": 0.7130243182182312, "step": 11958 }, { "epoch": 2.2097215645967827, "grad_norm": 0.07120823860168457, "learning_rate": 3.425936866646419e-06, "loss": 0.32339027523994446, "step": 11959 }, { "epoch": 2.2099063413056785, "grad_norm": 0.08559058606624603, "learning_rate": 3.424433756378738e-06, "loss": 0.3834274709224701, "step": 11960 }, { "epoch": 2.2100911180145744, "grad_norm": 0.0591914989054203, "learning_rate": 3.422930907798021e-06, "loss": 0.3140208125114441, "step": 11961 }, { "epoch": 2.21027589472347, "grad_norm": 0.06373035162687302, "learning_rate": 3.4214283209640774e-06, "loss": 0.33546945452690125, "step": 11962 }, { "epoch": 2.210460671432366, "grad_norm": 0.08530183136463165, "learning_rate": 3.4199259959367084e-06, "loss": 0.42840614914894104, "step": 11963 }, { "epoch": 2.210645448141262, "grad_norm": 0.09318653494119644, "learning_rate": 3.418423932775694e-06, "loss": 0.5658191442489624, "step": 11964 }, { "epoch": 2.2108302248501577, "grad_norm": 0.09994968771934509, "learning_rate": 3.4169221315408163e-06, "loss": 0.5752363204956055, "step": 11965 }, { "epoch": 2.2110150015590535, "grad_norm": 0.10247165709733963, "learning_rate": 3.4154205922918428e-06, "loss": 0.44143274426460266, "step": 11966 }, { "epoch": 2.2111997782679493, "grad_norm": 0.0819198489189148, "learning_rate": 3.4139193150885284e-06, "loss": 0.41120263934135437, "step": 11967 }, { "epoch": 2.211384554976845, "grad_norm": 0.09771253913640976, "learning_rate": 3.412418299990623e-06, "loss": 0.47400107979774475, "step": 11968 }, { "epoch": 2.211569331685741, "grad_norm": 0.11114433407783508, "learning_rate": 3.410917547057857e-06, "loss": 0.6460572481155396, "step": 11969 }, { "epoch": 2.211754108394637, "grad_norm": 0.09645184874534607, "learning_rate": 3.4094170563499575e-06, "loss": 0.485895037651062, "step": 11970 }, { "epoch": 2.2119388851035326, "grad_norm": 0.08106567710638046, "learning_rate": 3.407916827926644e-06, "loss": 0.37728479504585266, "step": 11971 }, { "epoch": 2.2121236618124285, "grad_norm": 0.07832597196102142, "learning_rate": 3.4064168618476125e-06, "loss": 0.36626359820365906, "step": 11972 }, { "epoch": 2.2123084385213243, "grad_norm": 0.07612678408622742, "learning_rate": 3.4049171581725584e-06, "loss": 0.3591512441635132, "step": 11973 }, { "epoch": 2.21249321523022, "grad_norm": 0.09806806594133377, "learning_rate": 3.403417716961174e-06, "loss": 0.5178738832473755, "step": 11974 }, { "epoch": 2.212677991939116, "grad_norm": 0.07646838575601578, "learning_rate": 3.4019185382731233e-06, "loss": 0.31318169832229614, "step": 11975 }, { "epoch": 2.212862768648012, "grad_norm": 0.0919872522354126, "learning_rate": 3.400419622168073e-06, "loss": 0.43961310386657715, "step": 11976 }, { "epoch": 2.2130475453569076, "grad_norm": 0.08175146579742432, "learning_rate": 3.3989209687056767e-06, "loss": 0.38150280714035034, "step": 11977 }, { "epoch": 2.2132323220658034, "grad_norm": 0.0916038379073143, "learning_rate": 3.3974225779455703e-06, "loss": 0.4013831913471222, "step": 11978 }, { "epoch": 2.2134170987746993, "grad_norm": 0.08610547333955765, "learning_rate": 3.3959244499473886e-06, "loss": 0.4272667467594147, "step": 11979 }, { "epoch": 2.213601875483595, "grad_norm": 0.08782272040843964, "learning_rate": 3.3944265847707525e-06, "loss": 0.5055347084999084, "step": 11980 }, { "epoch": 2.213786652192491, "grad_norm": 0.06641381978988647, "learning_rate": 3.392928982475272e-06, "loss": 0.30324873328208923, "step": 11981 }, { "epoch": 2.213971428901387, "grad_norm": 0.11009874194860458, "learning_rate": 3.3914316431205476e-06, "loss": 0.6258784532546997, "step": 11982 }, { "epoch": 2.2141562056102826, "grad_norm": 0.08467631787061691, "learning_rate": 3.389934566766171e-06, "loss": 0.4102543294429779, "step": 11983 }, { "epoch": 2.214340982319179, "grad_norm": 0.06726336479187012, "learning_rate": 3.388437753471715e-06, "loss": 0.2789519429206848, "step": 11984 }, { "epoch": 2.2145257590280747, "grad_norm": 0.08241001516580582, "learning_rate": 3.3869412032967552e-06, "loss": 0.46530020236968994, "step": 11985 }, { "epoch": 2.2147105357369705, "grad_norm": 0.06900808215141296, "learning_rate": 3.3854449163008395e-06, "loss": 0.3533879816532135, "step": 11986 }, { "epoch": 2.2148953124458663, "grad_norm": 0.08180130273103714, "learning_rate": 3.3839488925435248e-06, "loss": 0.4307593107223511, "step": 11987 }, { "epoch": 2.215080089154762, "grad_norm": 0.08925405889749527, "learning_rate": 3.382453132084349e-06, "loss": 0.41310012340545654, "step": 11988 }, { "epoch": 2.215264865863658, "grad_norm": 0.1149209663271904, "learning_rate": 3.380957634982831e-06, "loss": 0.49383172392845154, "step": 11989 }, { "epoch": 2.215449642572554, "grad_norm": 0.08416301012039185, "learning_rate": 3.3794624012984913e-06, "loss": 0.4587768316268921, "step": 11990 }, { "epoch": 2.2156344192814497, "grad_norm": 0.08828668296337128, "learning_rate": 3.3779674310908373e-06, "loss": 0.4280339479446411, "step": 11991 }, { "epoch": 2.2158191959903455, "grad_norm": 0.08794192224740982, "learning_rate": 3.3764727244193596e-06, "loss": 0.40835249423980713, "step": 11992 }, { "epoch": 2.2160039726992413, "grad_norm": 0.08347532898187637, "learning_rate": 3.3749782813435415e-06, "loss": 0.41212543845176697, "step": 11993 }, { "epoch": 2.216188749408137, "grad_norm": 0.11080392450094223, "learning_rate": 3.373484101922867e-06, "loss": 0.6224543452262878, "step": 11994 }, { "epoch": 2.216373526117033, "grad_norm": 0.06352946907281876, "learning_rate": 3.3719901862167903e-06, "loss": 0.31230124831199646, "step": 11995 }, { "epoch": 2.216558302825929, "grad_norm": 0.07902231812477112, "learning_rate": 3.3704965342847683e-06, "loss": 0.34405481815338135, "step": 11996 }, { "epoch": 2.2167430795348246, "grad_norm": 0.07277875393629074, "learning_rate": 3.369003146186246e-06, "loss": 0.2731490731239319, "step": 11997 }, { "epoch": 2.2169278562437205, "grad_norm": 0.07820237427949905, "learning_rate": 3.36751002198065e-06, "loss": 0.34150072932243347, "step": 11998 }, { "epoch": 2.2171126329526163, "grad_norm": 0.08240468800067902, "learning_rate": 3.366017161727404e-06, "loss": 0.42970985174179077, "step": 11999 }, { "epoch": 2.217297409661512, "grad_norm": 0.08826398104429245, "learning_rate": 3.3645245654859206e-06, "loss": 0.3769395053386688, "step": 12000 }, { "epoch": 2.217297409661512, "eval_loss": 0.5515779852867126, "eval_runtime": 155.3367, "eval_samples_per_second": 117.352, "eval_steps_per_second": 14.671, "step": 12000 }, { "epoch": 2.217482186370408, "grad_norm": 0.08796269446611404, "learning_rate": 3.3630322333155996e-06, "loss": 0.49599650502204895, "step": 12001 }, { "epoch": 2.2176669630793038, "grad_norm": 0.10347124934196472, "learning_rate": 3.3615401652758353e-06, "loss": 0.49439701437950134, "step": 12002 }, { "epoch": 2.2178517397881996, "grad_norm": 0.1028018519282341, "learning_rate": 3.3600483614259983e-06, "loss": 0.4486571252346039, "step": 12003 }, { "epoch": 2.2180365164970954, "grad_norm": 0.10097562521696091, "learning_rate": 3.358556821825464e-06, "loss": 0.5498074293136597, "step": 12004 }, { "epoch": 2.2182212932059913, "grad_norm": 0.07746328413486481, "learning_rate": 3.357065546533592e-06, "loss": 0.3791002929210663, "step": 12005 }, { "epoch": 2.218406069914887, "grad_norm": 0.08741196244955063, "learning_rate": 3.3555745356097224e-06, "loss": 0.41606611013412476, "step": 12006 }, { "epoch": 2.218590846623783, "grad_norm": 0.09901390224695206, "learning_rate": 3.3540837891132027e-06, "loss": 0.5478041768074036, "step": 12007 }, { "epoch": 2.2187756233326787, "grad_norm": 0.0802740678191185, "learning_rate": 3.3525933071033578e-06, "loss": 0.3526931405067444, "step": 12008 }, { "epoch": 2.2189604000415746, "grad_norm": 0.09909913688898087, "learning_rate": 3.3511030896394994e-06, "loss": 0.5463982224464417, "step": 12009 }, { "epoch": 2.2191451767504704, "grad_norm": 0.07805711776018143, "learning_rate": 3.349613136780936e-06, "loss": 0.4081910252571106, "step": 12010 }, { "epoch": 2.2193299534593667, "grad_norm": 0.07684759795665741, "learning_rate": 3.3481234485869673e-06, "loss": 0.4387986361980438, "step": 12011 }, { "epoch": 2.219514730168262, "grad_norm": 0.07478204369544983, "learning_rate": 3.3466340251168706e-06, "loss": 0.38535648584365845, "step": 12012 }, { "epoch": 2.2196995068771583, "grad_norm": 0.06635311245918274, "learning_rate": 3.3451448664299203e-06, "loss": 0.2572517395019531, "step": 12013 }, { "epoch": 2.219884283586054, "grad_norm": 0.08421587198972702, "learning_rate": 3.343655972585391e-06, "loss": 0.5203030109405518, "step": 12014 }, { "epoch": 2.22006906029495, "grad_norm": 0.10073671489953995, "learning_rate": 3.3421673436425263e-06, "loss": 0.5088030695915222, "step": 12015 }, { "epoch": 2.220253837003846, "grad_norm": 0.10075697302818298, "learning_rate": 3.340678979660573e-06, "loss": 0.4893205463886261, "step": 12016 }, { "epoch": 2.2204386137127416, "grad_norm": 0.07593869417905807, "learning_rate": 3.3391908806987604e-06, "loss": 0.3319643437862396, "step": 12017 }, { "epoch": 2.2206233904216375, "grad_norm": 0.11687902361154556, "learning_rate": 3.3377030468163107e-06, "loss": 0.5747287273406982, "step": 12018 }, { "epoch": 2.2208081671305333, "grad_norm": 0.08582670241594315, "learning_rate": 3.3362154780724378e-06, "loss": 0.369323194026947, "step": 12019 }, { "epoch": 2.220992943839429, "grad_norm": 0.0650225281715393, "learning_rate": 3.3347281745263394e-06, "loss": 0.256856769323349, "step": 12020 }, { "epoch": 2.221177720548325, "grad_norm": 0.08185169100761414, "learning_rate": 3.3332411362372063e-06, "loss": 0.4835938811302185, "step": 12021 }, { "epoch": 2.221362497257221, "grad_norm": 0.07414474338293076, "learning_rate": 3.3317543632642215e-06, "loss": 0.4327687919139862, "step": 12022 }, { "epoch": 2.2215472739661166, "grad_norm": 0.08091580122709274, "learning_rate": 3.330267855666548e-06, "loss": 0.428693562746048, "step": 12023 }, { "epoch": 2.2217320506750124, "grad_norm": 0.10111634433269501, "learning_rate": 3.3287816135033467e-06, "loss": 0.4685934782028198, "step": 12024 }, { "epoch": 2.2219168273839083, "grad_norm": 0.06814808398485184, "learning_rate": 3.327295636833766e-06, "loss": 0.3026934862136841, "step": 12025 }, { "epoch": 2.222101604092804, "grad_norm": 0.10446780920028687, "learning_rate": 3.325809925716943e-06, "loss": 0.6697503924369812, "step": 12026 }, { "epoch": 2.2222863808017, "grad_norm": 0.12640899419784546, "learning_rate": 3.3243244802120034e-06, "loss": 0.6471139788627625, "step": 12027 }, { "epoch": 2.2224711575105958, "grad_norm": 0.09218905866146088, "learning_rate": 3.322839300378068e-06, "loss": 0.4591101109981537, "step": 12028 }, { "epoch": 2.2226559342194916, "grad_norm": 0.11036652326583862, "learning_rate": 3.321354386274235e-06, "loss": 0.6440871357917786, "step": 12029 }, { "epoch": 2.2228407109283874, "grad_norm": 0.10076822340488434, "learning_rate": 3.3198697379596023e-06, "loss": 0.553157389163971, "step": 12030 }, { "epoch": 2.2230254876372832, "grad_norm": 0.08576725423336029, "learning_rate": 3.3183853554932576e-06, "loss": 0.45056310296058655, "step": 12031 }, { "epoch": 2.223210264346179, "grad_norm": 0.10493794828653336, "learning_rate": 3.316901238934268e-06, "loss": 0.550243079662323, "step": 12032 }, { "epoch": 2.223395041055075, "grad_norm": 0.11206745356321335, "learning_rate": 3.3154173883417016e-06, "loss": 0.6716387867927551, "step": 12033 }, { "epoch": 2.2235798177639707, "grad_norm": 0.08630875498056412, "learning_rate": 3.3139338037746083e-06, "loss": 0.38989031314849854, "step": 12034 }, { "epoch": 2.2237645944728666, "grad_norm": 0.1089872494339943, "learning_rate": 3.3124504852920323e-06, "loss": 0.5918889045715332, "step": 12035 }, { "epoch": 2.2239493711817624, "grad_norm": 0.09236101806163788, "learning_rate": 3.3109674329530084e-06, "loss": 0.4525047838687897, "step": 12036 }, { "epoch": 2.224134147890658, "grad_norm": 0.06594641506671906, "learning_rate": 3.3094846468165497e-06, "loss": 0.33796876668930054, "step": 12037 }, { "epoch": 2.224318924599554, "grad_norm": 0.07526916265487671, "learning_rate": 3.3080021269416696e-06, "loss": 0.36459073424339294, "step": 12038 }, { "epoch": 2.22450370130845, "grad_norm": 0.10719329863786697, "learning_rate": 3.306519873387368e-06, "loss": 0.5638750791549683, "step": 12039 }, { "epoch": 2.224688478017346, "grad_norm": 0.05023134872317314, "learning_rate": 3.3050378862126355e-06, "loss": 0.2469005435705185, "step": 12040 }, { "epoch": 2.2248732547262415, "grad_norm": 0.10357912629842758, "learning_rate": 3.303556165476448e-06, "loss": 0.49883297085762024, "step": 12041 }, { "epoch": 2.225058031435138, "grad_norm": 0.09542550891637802, "learning_rate": 3.302074711237778e-06, "loss": 0.502481997013092, "step": 12042 }, { "epoch": 2.2252428081440336, "grad_norm": 0.08671344816684723, "learning_rate": 3.3005935235555762e-06, "loss": 0.3964404761791229, "step": 12043 }, { "epoch": 2.2254275848529295, "grad_norm": 0.11842737346887589, "learning_rate": 3.299112602488793e-06, "loss": 0.6013658046722412, "step": 12044 }, { "epoch": 2.2256123615618253, "grad_norm": 0.0860036090016365, "learning_rate": 3.297631948096364e-06, "loss": 0.4228939414024353, "step": 12045 }, { "epoch": 2.225797138270721, "grad_norm": 0.08696401119232178, "learning_rate": 3.296151560437214e-06, "loss": 0.47006645798683167, "step": 12046 }, { "epoch": 2.225981914979617, "grad_norm": 0.0872708186507225, "learning_rate": 3.2946714395702584e-06, "loss": 0.40168893337249756, "step": 12047 }, { "epoch": 2.2261666916885128, "grad_norm": 0.0834670215845108, "learning_rate": 3.2931915855544038e-06, "loss": 0.44790807366371155, "step": 12048 }, { "epoch": 2.2263514683974086, "grad_norm": 0.07953152805566788, "learning_rate": 3.291711998448539e-06, "loss": 0.4631030559539795, "step": 12049 }, { "epoch": 2.2265362451063044, "grad_norm": 0.09107434004545212, "learning_rate": 3.2902326783115514e-06, "loss": 0.5299221277236938, "step": 12050 }, { "epoch": 2.2267210218152003, "grad_norm": 0.08279658854007721, "learning_rate": 3.2887536252023087e-06, "loss": 0.41639232635498047, "step": 12051 }, { "epoch": 2.226905798524096, "grad_norm": 0.08936301618814468, "learning_rate": 3.2872748391796736e-06, "loss": 0.45119351148605347, "step": 12052 }, { "epoch": 2.227090575232992, "grad_norm": 0.07799794524908066, "learning_rate": 3.2857963203025e-06, "loss": 0.39014753699302673, "step": 12053 }, { "epoch": 2.2272753519418877, "grad_norm": 0.08831122517585754, "learning_rate": 3.2843180686296262e-06, "loss": 0.4471457302570343, "step": 12054 }, { "epoch": 2.2274601286507836, "grad_norm": 0.09028404951095581, "learning_rate": 3.282840084219883e-06, "loss": 0.45349228382110596, "step": 12055 }, { "epoch": 2.2276449053596794, "grad_norm": 0.08866817504167557, "learning_rate": 3.2813623671320914e-06, "loss": 0.43158385157585144, "step": 12056 }, { "epoch": 2.2278296820685752, "grad_norm": 0.08073228597640991, "learning_rate": 3.2798849174250546e-06, "loss": 0.40182361006736755, "step": 12057 }, { "epoch": 2.228014458777471, "grad_norm": 0.09359989315271378, "learning_rate": 3.278407735157574e-06, "loss": 0.4055033028125763, "step": 12058 }, { "epoch": 2.228199235486367, "grad_norm": 0.07239627838134766, "learning_rate": 3.2769308203884365e-06, "loss": 0.43219462037086487, "step": 12059 }, { "epoch": 2.2283840121952627, "grad_norm": 0.11576760560274124, "learning_rate": 3.275454173176418e-06, "loss": 0.5686132907867432, "step": 12060 }, { "epoch": 2.2285687889041585, "grad_norm": 0.10355863720178604, "learning_rate": 3.273977793580285e-06, "loss": 0.5342440009117126, "step": 12061 }, { "epoch": 2.2287535656130544, "grad_norm": 0.10653654485940933, "learning_rate": 3.2725016816587973e-06, "loss": 0.5543602705001831, "step": 12062 }, { "epoch": 2.22893834232195, "grad_norm": 0.12046940624713898, "learning_rate": 3.2710258374706904e-06, "loss": 0.6986100077629089, "step": 12063 }, { "epoch": 2.229123119030846, "grad_norm": 0.09040313959121704, "learning_rate": 3.269550261074703e-06, "loss": 0.37894928455352783, "step": 12064 }, { "epoch": 2.229307895739742, "grad_norm": 0.06946759670972824, "learning_rate": 3.26807495252956e-06, "loss": 0.34007248282432556, "step": 12065 }, { "epoch": 2.2294926724486377, "grad_norm": 0.07851733267307281, "learning_rate": 3.266599911893971e-06, "loss": 0.3887780010700226, "step": 12066 }, { "epoch": 2.2296774491575335, "grad_norm": 0.08522792905569077, "learning_rate": 3.2651251392266424e-06, "loss": 0.43342339992523193, "step": 12067 }, { "epoch": 2.2298622258664293, "grad_norm": 0.0925016775727272, "learning_rate": 3.2636506345862595e-06, "loss": 0.4961267113685608, "step": 12068 }, { "epoch": 2.230047002575325, "grad_norm": 0.0975349023938179, "learning_rate": 3.262176398031506e-06, "loss": 0.4908147156238556, "step": 12069 }, { "epoch": 2.230231779284221, "grad_norm": 0.09131171554327011, "learning_rate": 3.2607024296210553e-06, "loss": 0.4999105632305145, "step": 12070 }, { "epoch": 2.2304165559931173, "grad_norm": 0.10944220423698425, "learning_rate": 3.2592287294135604e-06, "loss": 0.5443828701972961, "step": 12071 }, { "epoch": 2.230601332702013, "grad_norm": 0.08160214871168137, "learning_rate": 3.2577552974676718e-06, "loss": 0.38871800899505615, "step": 12072 }, { "epoch": 2.230786109410909, "grad_norm": 0.073506660759449, "learning_rate": 3.2562821338420303e-06, "loss": 0.37624433636665344, "step": 12073 }, { "epoch": 2.2309708861198048, "grad_norm": 0.07640836387872696, "learning_rate": 3.25480923859526e-06, "loss": 0.3682493567466736, "step": 12074 }, { "epoch": 2.2311556628287006, "grad_norm": 0.06404562294483185, "learning_rate": 3.25333661178598e-06, "loss": 0.3405209183692932, "step": 12075 }, { "epoch": 2.2313404395375964, "grad_norm": 0.09222139418125153, "learning_rate": 3.2518642534727985e-06, "loss": 0.5926950573921204, "step": 12076 }, { "epoch": 2.2315252162464922, "grad_norm": 0.0780331939458847, "learning_rate": 3.250392163714303e-06, "loss": 0.4412083029747009, "step": 12077 }, { "epoch": 2.231709992955388, "grad_norm": 0.07865004241466522, "learning_rate": 3.248920342569084e-06, "loss": 0.43166717886924744, "step": 12078 }, { "epoch": 2.231894769664284, "grad_norm": 0.07837144285440445, "learning_rate": 3.247448790095713e-06, "loss": 0.4415147304534912, "step": 12079 }, { "epoch": 2.2320795463731797, "grad_norm": 0.07582159340381622, "learning_rate": 3.2459775063527543e-06, "loss": 0.4302468001842499, "step": 12080 }, { "epoch": 2.2322643230820756, "grad_norm": 0.0823829248547554, "learning_rate": 3.2445064913987644e-06, "loss": 0.48967957496643066, "step": 12081 }, { "epoch": 2.2324490997909714, "grad_norm": 0.07873893529176712, "learning_rate": 3.243035745292277e-06, "loss": 0.3432171642780304, "step": 12082 }, { "epoch": 2.232633876499867, "grad_norm": 0.1042979285120964, "learning_rate": 3.2415652680918262e-06, "loss": 0.5159332752227783, "step": 12083 }, { "epoch": 2.232818653208763, "grad_norm": 0.08104976266622543, "learning_rate": 3.240095059855938e-06, "loss": 0.42197322845458984, "step": 12084 }, { "epoch": 2.233003429917659, "grad_norm": 0.09248243272304535, "learning_rate": 3.238625120643111e-06, "loss": 0.526531994342804, "step": 12085 }, { "epoch": 2.2331882066265547, "grad_norm": 0.0943179726600647, "learning_rate": 3.237155450511852e-06, "loss": 0.5523107647895813, "step": 12086 }, { "epoch": 2.2333729833354505, "grad_norm": 0.08297137916088104, "learning_rate": 3.235686049520652e-06, "loss": 0.4356424808502197, "step": 12087 }, { "epoch": 2.2335577600443464, "grad_norm": 0.06990028917789459, "learning_rate": 3.2342169177279826e-06, "loss": 0.3431797921657562, "step": 12088 }, { "epoch": 2.233742536753242, "grad_norm": 0.09813868999481201, "learning_rate": 3.2327480551923107e-06, "loss": 0.4996069371700287, "step": 12089 }, { "epoch": 2.233927313462138, "grad_norm": 0.08070428669452667, "learning_rate": 3.2312794619720976e-06, "loss": 0.3879484236240387, "step": 12090 }, { "epoch": 2.234112090171034, "grad_norm": 0.0751897394657135, "learning_rate": 3.229811138125782e-06, "loss": 0.4100872576236725, "step": 12091 }, { "epoch": 2.2342968668799297, "grad_norm": 0.07286939024925232, "learning_rate": 3.2283430837118035e-06, "loss": 0.32202818989753723, "step": 12092 }, { "epoch": 2.2344816435888255, "grad_norm": 0.08649271726608276, "learning_rate": 3.2268752987885834e-06, "loss": 0.5581140518188477, "step": 12093 }, { "epoch": 2.2346664202977213, "grad_norm": 0.09588231891393661, "learning_rate": 3.225407783414536e-06, "loss": 0.4919707775115967, "step": 12094 }, { "epoch": 2.234851197006617, "grad_norm": 0.10235357284545898, "learning_rate": 3.2239405376480638e-06, "loss": 0.4288097620010376, "step": 12095 }, { "epoch": 2.235035973715513, "grad_norm": 0.07489422708749771, "learning_rate": 3.2224735615475612e-06, "loss": 0.3515622913837433, "step": 12096 }, { "epoch": 2.235220750424409, "grad_norm": 0.08253826200962067, "learning_rate": 3.2210068551714045e-06, "loss": 0.4348796010017395, "step": 12097 }, { "epoch": 2.2354055271333046, "grad_norm": 0.07540115714073181, "learning_rate": 3.2195404185779654e-06, "loss": 0.3401135504245758, "step": 12098 }, { "epoch": 2.2355903038422005, "grad_norm": 0.06745466589927673, "learning_rate": 3.2180742518256047e-06, "loss": 0.3664204180240631, "step": 12099 }, { "epoch": 2.2357750805510967, "grad_norm": 0.07638771086931229, "learning_rate": 3.216608354972671e-06, "loss": 0.43941521644592285, "step": 12100 }, { "epoch": 2.2359598572599926, "grad_norm": 0.08894451707601547, "learning_rate": 3.215142728077505e-06, "loss": 0.46996885538101196, "step": 12101 }, { "epoch": 2.2361446339688884, "grad_norm": 0.07854200154542923, "learning_rate": 3.2136773711984293e-06, "loss": 0.36215487122535706, "step": 12102 }, { "epoch": 2.2363294106777842, "grad_norm": 0.09711235761642456, "learning_rate": 3.212212284393761e-06, "loss": 0.41373053193092346, "step": 12103 }, { "epoch": 2.23651418738668, "grad_norm": 0.07770132273435593, "learning_rate": 3.210747467721812e-06, "loss": 0.3723510503768921, "step": 12104 }, { "epoch": 2.236698964095576, "grad_norm": 0.09046914428472519, "learning_rate": 3.2092829212408662e-06, "loss": 0.4817093312740326, "step": 12105 }, { "epoch": 2.2368837408044717, "grad_norm": 0.060229867696762085, "learning_rate": 3.2078186450092176e-06, "loss": 0.23560619354248047, "step": 12106 }, { "epoch": 2.2370685175133675, "grad_norm": 0.09669230878353119, "learning_rate": 3.2063546390851397e-06, "loss": 0.4499640464782715, "step": 12107 }, { "epoch": 2.2372532942222634, "grad_norm": 0.08333880454301834, "learning_rate": 3.2048909035268906e-06, "loss": 0.4146597385406494, "step": 12108 }, { "epoch": 2.237438070931159, "grad_norm": 0.0770716592669487, "learning_rate": 3.2034274383927233e-06, "loss": 0.39594510197639465, "step": 12109 }, { "epoch": 2.237622847640055, "grad_norm": 0.07659077644348145, "learning_rate": 3.2019642437408836e-06, "loss": 0.32030820846557617, "step": 12110 }, { "epoch": 2.237807624348951, "grad_norm": 0.09327710419893265, "learning_rate": 3.2005013196295953e-06, "loss": 0.47533729672431946, "step": 12111 }, { "epoch": 2.2379924010578467, "grad_norm": 0.08465027809143066, "learning_rate": 3.1990386661170825e-06, "loss": 0.40699127316474915, "step": 12112 }, { "epoch": 2.2381771777667425, "grad_norm": 0.0673907920718193, "learning_rate": 3.197576283261553e-06, "loss": 0.30016353726387024, "step": 12113 }, { "epoch": 2.2383619544756383, "grad_norm": 0.08417432755231857, "learning_rate": 3.196114171121205e-06, "loss": 0.42807674407958984, "step": 12114 }, { "epoch": 2.238546731184534, "grad_norm": 0.08571884036064148, "learning_rate": 3.1946523297542298e-06, "loss": 0.4084596633911133, "step": 12115 }, { "epoch": 2.23873150789343, "grad_norm": 0.068547323346138, "learning_rate": 3.1931907592187973e-06, "loss": 0.2529278099536896, "step": 12116 }, { "epoch": 2.238916284602326, "grad_norm": 0.06548000872135162, "learning_rate": 3.1917294595730763e-06, "loss": 0.27962929010391235, "step": 12117 }, { "epoch": 2.2391010613112217, "grad_norm": 0.10712318867444992, "learning_rate": 3.190268430875223e-06, "loss": 0.48425424098968506, "step": 12118 }, { "epoch": 2.2392858380201175, "grad_norm": 0.10012485086917877, "learning_rate": 3.188807673183382e-06, "loss": 0.4494583308696747, "step": 12119 }, { "epoch": 2.2394706147290133, "grad_norm": 0.09713777899742126, "learning_rate": 3.1873471865556848e-06, "loss": 0.544177770614624, "step": 12120 }, { "epoch": 2.239655391437909, "grad_norm": 0.09713928401470184, "learning_rate": 3.1858869710502593e-06, "loss": 0.5080793499946594, "step": 12121 }, { "epoch": 2.239840168146805, "grad_norm": 0.10769485682249069, "learning_rate": 3.184427026725211e-06, "loss": 0.5964470505714417, "step": 12122 }, { "epoch": 2.240024944855701, "grad_norm": 0.08020690083503723, "learning_rate": 3.182967353638643e-06, "loss": 0.3854624032974243, "step": 12123 }, { "epoch": 2.2402097215645966, "grad_norm": 0.10620839148759842, "learning_rate": 3.1815079518486505e-06, "loss": 0.6212494373321533, "step": 12124 }, { "epoch": 2.2403944982734925, "grad_norm": 0.06393173336982727, "learning_rate": 3.1800488214133017e-06, "loss": 0.31893590092658997, "step": 12125 }, { "epoch": 2.2405792749823883, "grad_norm": 0.08972685784101486, "learning_rate": 3.1785899623906767e-06, "loss": 0.397165983915329, "step": 12126 }, { "epoch": 2.240764051691284, "grad_norm": 0.0904398113489151, "learning_rate": 3.1771313748388334e-06, "loss": 0.4341369867324829, "step": 12127 }, { "epoch": 2.24094882840018, "grad_norm": 0.06866602599620819, "learning_rate": 3.1756730588158124e-06, "loss": 0.2974797487258911, "step": 12128 }, { "epoch": 2.241133605109076, "grad_norm": 0.10474536567926407, "learning_rate": 3.1742150143796525e-06, "loss": 0.5112119317054749, "step": 12129 }, { "epoch": 2.241318381817972, "grad_norm": 0.08430393785238266, "learning_rate": 3.1727572415883835e-06, "loss": 0.4515208899974823, "step": 12130 }, { "epoch": 2.241503158526868, "grad_norm": 0.08922908455133438, "learning_rate": 3.1712997405000124e-06, "loss": 0.4198884963989258, "step": 12131 }, { "epoch": 2.2416879352357637, "grad_norm": 0.0893121287226677, "learning_rate": 3.1698425111725485e-06, "loss": 0.45976704359054565, "step": 12132 }, { "epoch": 2.2418727119446595, "grad_norm": 0.07319130748510361, "learning_rate": 3.168385553663983e-06, "loss": 0.4156619608402252, "step": 12133 }, { "epoch": 2.2420574886535554, "grad_norm": 0.09958360344171524, "learning_rate": 3.1669288680322997e-06, "loss": 0.6084612011909485, "step": 12134 }, { "epoch": 2.242242265362451, "grad_norm": 0.08181202411651611, "learning_rate": 3.165472454335472e-06, "loss": 0.4880278408527374, "step": 12135 }, { "epoch": 2.242427042071347, "grad_norm": 0.07543037086725235, "learning_rate": 3.164016312631456e-06, "loss": 0.43885689973831177, "step": 12136 }, { "epoch": 2.242611818780243, "grad_norm": 0.08452077209949493, "learning_rate": 3.162560442978203e-06, "loss": 0.37461787462234497, "step": 12137 }, { "epoch": 2.2427965954891387, "grad_norm": 0.08403962850570679, "learning_rate": 3.1611048454336523e-06, "loss": 0.3464776873588562, "step": 12138 }, { "epoch": 2.2429813721980345, "grad_norm": 0.06161332130432129, "learning_rate": 3.159649520055733e-06, "loss": 0.3631473779678345, "step": 12139 }, { "epoch": 2.2431661489069303, "grad_norm": 0.06809493154287338, "learning_rate": 3.158194466902362e-06, "loss": 0.3071410059928894, "step": 12140 }, { "epoch": 2.243350925615826, "grad_norm": 0.08933389186859131, "learning_rate": 3.1567396860314503e-06, "loss": 0.4868786633014679, "step": 12141 }, { "epoch": 2.243535702324722, "grad_norm": 0.12151765823364258, "learning_rate": 3.1552851775008853e-06, "loss": 0.6342805027961731, "step": 12142 }, { "epoch": 2.243720479033618, "grad_norm": 0.0988689661026001, "learning_rate": 3.153830941368555e-06, "loss": 0.4726806879043579, "step": 12143 }, { "epoch": 2.2439052557425136, "grad_norm": 0.0977170541882515, "learning_rate": 3.1523769776923384e-06, "loss": 0.46017003059387207, "step": 12144 }, { "epoch": 2.2440900324514095, "grad_norm": 0.07871796190738678, "learning_rate": 3.1509232865300886e-06, "loss": 0.343954861164093, "step": 12145 }, { "epoch": 2.2442748091603053, "grad_norm": 0.09883001446723938, "learning_rate": 3.1494698679396697e-06, "loss": 0.7568049430847168, "step": 12146 }, { "epoch": 2.244459585869201, "grad_norm": 0.08142291754484177, "learning_rate": 3.1480167219789136e-06, "loss": 0.4528813362121582, "step": 12147 }, { "epoch": 2.244644362578097, "grad_norm": 0.07283273339271545, "learning_rate": 3.146563848705656e-06, "loss": 0.3055526316165924, "step": 12148 }, { "epoch": 2.244829139286993, "grad_norm": 0.10215907543897629, "learning_rate": 3.1451112481777193e-06, "loss": 0.5181620121002197, "step": 12149 }, { "epoch": 2.2450139159958886, "grad_norm": 0.0920044481754303, "learning_rate": 3.1436589204529044e-06, "loss": 0.5287151336669922, "step": 12150 }, { "epoch": 2.2451986927047844, "grad_norm": 0.1108456403017044, "learning_rate": 3.1422068655890136e-06, "loss": 0.5236777663230896, "step": 12151 }, { "epoch": 2.2453834694136803, "grad_norm": 0.08828677237033844, "learning_rate": 3.140755083643835e-06, "loss": 0.49629533290863037, "step": 12152 }, { "epoch": 2.245568246122576, "grad_norm": 0.08154132962226868, "learning_rate": 3.1393035746751443e-06, "loss": 0.41256043314933777, "step": 12153 }, { "epoch": 2.245753022831472, "grad_norm": 0.06214584410190582, "learning_rate": 3.1378523387407068e-06, "loss": 0.3293093144893646, "step": 12154 }, { "epoch": 2.2459377995403678, "grad_norm": 0.0842163935303688, "learning_rate": 3.1364013758982803e-06, "loss": 0.4421621859073639, "step": 12155 }, { "epoch": 2.2461225762492636, "grad_norm": 0.08379825204610825, "learning_rate": 3.134950686205602e-06, "loss": 0.41820988059043884, "step": 12156 }, { "epoch": 2.2463073529581594, "grad_norm": 0.11791109293699265, "learning_rate": 3.1335002697204085e-06, "loss": 0.8276405930519104, "step": 12157 }, { "epoch": 2.2464921296670557, "grad_norm": 0.07951841503381729, "learning_rate": 3.132050126500422e-06, "loss": 0.3736032545566559, "step": 12158 }, { "epoch": 2.246676906375951, "grad_norm": 0.0968988686800003, "learning_rate": 3.1306002566033545e-06, "loss": 0.40144771337509155, "step": 12159 }, { "epoch": 2.2468616830848473, "grad_norm": 0.12055794149637222, "learning_rate": 3.1291506600869037e-06, "loss": 0.5013450384140015, "step": 12160 }, { "epoch": 2.247046459793743, "grad_norm": 0.08677809685468674, "learning_rate": 3.127701337008764e-06, "loss": 0.41387808322906494, "step": 12161 }, { "epoch": 2.247231236502639, "grad_norm": 0.07982303202152252, "learning_rate": 3.1262522874266076e-06, "loss": 0.40415987372398376, "step": 12162 }, { "epoch": 2.247416013211535, "grad_norm": 0.10855205357074738, "learning_rate": 3.124803511398108e-06, "loss": 0.506013810634613, "step": 12163 }, { "epoch": 2.2476007899204307, "grad_norm": 0.0775640606880188, "learning_rate": 3.123355008980916e-06, "loss": 0.41776102781295776, "step": 12164 }, { "epoch": 2.2477855666293265, "grad_norm": 0.09691471606492996, "learning_rate": 3.1219067802326763e-06, "loss": 0.6090372800827026, "step": 12165 }, { "epoch": 2.2479703433382223, "grad_norm": 0.08877398073673248, "learning_rate": 3.1204588252110358e-06, "loss": 0.49977755546569824, "step": 12166 }, { "epoch": 2.248155120047118, "grad_norm": 0.11311852186918259, "learning_rate": 3.119011143973606e-06, "loss": 0.6844763159751892, "step": 12167 }, { "epoch": 2.248339896756014, "grad_norm": 0.09220191091299057, "learning_rate": 3.1175637365780053e-06, "loss": 0.36833304166793823, "step": 12168 }, { "epoch": 2.24852467346491, "grad_norm": 0.10116321593523026, "learning_rate": 3.116116603081839e-06, "loss": 0.5867173671722412, "step": 12169 }, { "epoch": 2.2487094501738056, "grad_norm": 0.06711015850305557, "learning_rate": 3.114669743542692e-06, "loss": 0.3416115343570709, "step": 12170 }, { "epoch": 2.2488942268827015, "grad_norm": 0.08700167387723923, "learning_rate": 3.113223158018148e-06, "loss": 0.41278156638145447, "step": 12171 }, { "epoch": 2.2490790035915973, "grad_norm": 0.07968169450759888, "learning_rate": 3.111776846565776e-06, "loss": 0.35030606389045715, "step": 12172 }, { "epoch": 2.249263780300493, "grad_norm": 0.07849443703889847, "learning_rate": 3.110330809243134e-06, "loss": 0.3592144250869751, "step": 12173 }, { "epoch": 2.249448557009389, "grad_norm": 0.0856466069817543, "learning_rate": 3.1088850461077724e-06, "loss": 0.4331666827201843, "step": 12174 }, { "epoch": 2.2496333337182848, "grad_norm": 0.08074059337377548, "learning_rate": 3.1074395572172287e-06, "loss": 0.4187830984592438, "step": 12175 }, { "epoch": 2.2498181104271806, "grad_norm": 0.08852949738502502, "learning_rate": 3.1059943426290228e-06, "loss": 0.3407609760761261, "step": 12176 }, { "epoch": 2.2500028871360764, "grad_norm": 0.08934972435235977, "learning_rate": 3.104549402400675e-06, "loss": 0.4727935791015625, "step": 12177 }, { "epoch": 2.2501876638449723, "grad_norm": 0.0799507200717926, "learning_rate": 3.103104736589687e-06, "loss": 0.45224735140800476, "step": 12178 }, { "epoch": 2.250372440553868, "grad_norm": 0.08885331451892853, "learning_rate": 3.1016603452535533e-06, "loss": 0.4831817150115967, "step": 12179 }, { "epoch": 2.250557217262764, "grad_norm": 0.08326054364442825, "learning_rate": 3.1002162284497584e-06, "loss": 0.42966553568840027, "step": 12180 }, { "epoch": 2.2507419939716597, "grad_norm": 0.08480405062437057, "learning_rate": 3.0987723862357677e-06, "loss": 0.33884021639823914, "step": 12181 }, { "epoch": 2.2509267706805556, "grad_norm": 0.06686267256736755, "learning_rate": 3.097328818669045e-06, "loss": 0.3047025799751282, "step": 12182 }, { "epoch": 2.2511115473894514, "grad_norm": 0.10958808660507202, "learning_rate": 3.095885525807043e-06, "loss": 0.5943275094032288, "step": 12183 }, { "epoch": 2.2512963240983472, "grad_norm": 0.07362347841262817, "learning_rate": 3.094442507707194e-06, "loss": 0.3816607594490051, "step": 12184 }, { "epoch": 2.251481100807243, "grad_norm": 0.08515738695859909, "learning_rate": 3.092999764426925e-06, "loss": 0.4283255636692047, "step": 12185 }, { "epoch": 2.251665877516139, "grad_norm": 0.08227543532848358, "learning_rate": 3.0915572960236617e-06, "loss": 0.3923949599266052, "step": 12186 }, { "epoch": 2.251850654225035, "grad_norm": 0.07537674158811569, "learning_rate": 3.0901151025548026e-06, "loss": 0.3772265911102295, "step": 12187 }, { "epoch": 2.2520354309339305, "grad_norm": 0.08898736536502838, "learning_rate": 3.0886731840777427e-06, "loss": 0.42761051654815674, "step": 12188 }, { "epoch": 2.252220207642827, "grad_norm": 0.06679991632699966, "learning_rate": 3.087231540649872e-06, "loss": 0.31185150146484375, "step": 12189 }, { "epoch": 2.2524049843517226, "grad_norm": 0.09417211264371872, "learning_rate": 3.0857901723285544e-06, "loss": 0.5708805918693542, "step": 12190 }, { "epoch": 2.2525897610606185, "grad_norm": 0.07637672126293182, "learning_rate": 3.0843490791711562e-06, "loss": 0.34745824337005615, "step": 12191 }, { "epoch": 2.2527745377695143, "grad_norm": 0.0839376151561737, "learning_rate": 3.082908261235029e-06, "loss": 0.4435441195964813, "step": 12192 }, { "epoch": 2.25295931447841, "grad_norm": 0.10060539841651917, "learning_rate": 3.081467718577512e-06, "loss": 0.6545190811157227, "step": 12193 }, { "epoch": 2.253144091187306, "grad_norm": 0.07129131257534027, "learning_rate": 3.0800274512559334e-06, "loss": 0.414524108171463, "step": 12194 }, { "epoch": 2.253328867896202, "grad_norm": 0.10510852932929993, "learning_rate": 3.0785874593276167e-06, "loss": 0.6114096641540527, "step": 12195 }, { "epoch": 2.2535136446050976, "grad_norm": 0.08640439063310623, "learning_rate": 3.077147742849862e-06, "loss": 0.41606390476226807, "step": 12196 }, { "epoch": 2.2536984213139934, "grad_norm": 0.09171494096517563, "learning_rate": 3.0757083018799673e-06, "loss": 0.51341313123703, "step": 12197 }, { "epoch": 2.2538831980228893, "grad_norm": 0.08453557640314102, "learning_rate": 3.0742691364752196e-06, "loss": 0.43702369928359985, "step": 12198 }, { "epoch": 2.254067974731785, "grad_norm": 0.0747491717338562, "learning_rate": 3.0728302466928914e-06, "loss": 0.37024620175361633, "step": 12199 }, { "epoch": 2.254252751440681, "grad_norm": 0.0880352258682251, "learning_rate": 3.0713916325902516e-06, "loss": 0.44032952189445496, "step": 12200 }, { "epoch": 2.2544375281495768, "grad_norm": 0.08446033298969269, "learning_rate": 3.0699532942245446e-06, "loss": 0.5010290741920471, "step": 12201 }, { "epoch": 2.2546223048584726, "grad_norm": 0.07818498462438583, "learning_rate": 3.0685152316530143e-06, "loss": 0.44643253087997437, "step": 12202 }, { "epoch": 2.2548070815673684, "grad_norm": 0.0621052086353302, "learning_rate": 3.0670774449328956e-06, "loss": 0.34929224848747253, "step": 12203 }, { "epoch": 2.2549918582762642, "grad_norm": 0.09159861505031586, "learning_rate": 3.0656399341214016e-06, "loss": 0.49612244963645935, "step": 12204 }, { "epoch": 2.25517663498516, "grad_norm": 0.07836683839559555, "learning_rate": 3.064202699275739e-06, "loss": 0.47169002890586853, "step": 12205 }, { "epoch": 2.255361411694056, "grad_norm": 0.08987939357757568, "learning_rate": 3.0627657404531164e-06, "loss": 0.4715505838394165, "step": 12206 }, { "epoch": 2.2555461884029517, "grad_norm": 0.08097187429666519, "learning_rate": 3.061329057710711e-06, "loss": 0.3911837637424469, "step": 12207 }, { "epoch": 2.2557309651118476, "grad_norm": 0.0881069153547287, "learning_rate": 3.0598926511057002e-06, "loss": 0.4455852806568146, "step": 12208 }, { "epoch": 2.2559157418207434, "grad_norm": 0.10333002358675003, "learning_rate": 3.0584565206952534e-06, "loss": 0.4860212802886963, "step": 12209 }, { "epoch": 2.256100518529639, "grad_norm": 0.08561402559280396, "learning_rate": 3.0570206665365152e-06, "loss": 0.45404475927352905, "step": 12210 }, { "epoch": 2.256285295238535, "grad_norm": 0.06861907243728638, "learning_rate": 3.0555850886866334e-06, "loss": 0.33333149552345276, "step": 12211 }, { "epoch": 2.256470071947431, "grad_norm": 0.11791132390499115, "learning_rate": 3.054149787202738e-06, "loss": 0.5899015069007874, "step": 12212 }, { "epoch": 2.2566548486563267, "grad_norm": 0.08489428460597992, "learning_rate": 3.0527147621419504e-06, "loss": 0.4829174280166626, "step": 12213 }, { "epoch": 2.2568396253652225, "grad_norm": 0.08123718947172165, "learning_rate": 3.051280013561384e-06, "loss": 0.4060458838939667, "step": 12214 }, { "epoch": 2.2570244020741184, "grad_norm": 0.08335306495428085, "learning_rate": 3.0498455415181296e-06, "loss": 0.3981687128543854, "step": 12215 }, { "epoch": 2.2572091787830146, "grad_norm": 0.08260929584503174, "learning_rate": 3.0484113460692786e-06, "loss": 0.4201548993587494, "step": 12216 }, { "epoch": 2.25739395549191, "grad_norm": 0.08963587880134583, "learning_rate": 3.0469774272719075e-06, "loss": 0.4701205790042877, "step": 12217 }, { "epoch": 2.2575787322008063, "grad_norm": 0.09164369851350784, "learning_rate": 3.0455437851830805e-06, "loss": 0.4588594436645508, "step": 12218 }, { "epoch": 2.257763508909702, "grad_norm": 0.07610947638750076, "learning_rate": 3.044110419859855e-06, "loss": 0.346407026052475, "step": 12219 }, { "epoch": 2.257948285618598, "grad_norm": 0.0824890211224556, "learning_rate": 3.042677331359274e-06, "loss": 0.4116358160972595, "step": 12220 }, { "epoch": 2.258133062327494, "grad_norm": 0.08806995302438736, "learning_rate": 3.0412445197383667e-06, "loss": 0.4943826198577881, "step": 12221 }, { "epoch": 2.2583178390363896, "grad_norm": 0.10130472481250763, "learning_rate": 3.0398119850541553e-06, "loss": 0.5649289488792419, "step": 12222 }, { "epoch": 2.2585026157452854, "grad_norm": 0.08071459829807281, "learning_rate": 3.0383797273636552e-06, "loss": 0.5119844675064087, "step": 12223 }, { "epoch": 2.2586873924541813, "grad_norm": 0.08857184648513794, "learning_rate": 3.0369477467238586e-06, "loss": 0.3872298300266266, "step": 12224 }, { "epoch": 2.258872169163077, "grad_norm": 0.0861668512225151, "learning_rate": 3.035516043191753e-06, "loss": 0.4301668405532837, "step": 12225 }, { "epoch": 2.259056945871973, "grad_norm": 0.10000453144311905, "learning_rate": 3.0340846168243265e-06, "loss": 0.45657336711883545, "step": 12226 }, { "epoch": 2.2592417225808687, "grad_norm": 0.0979422852396965, "learning_rate": 3.0326534676785357e-06, "loss": 0.4080390930175781, "step": 12227 }, { "epoch": 2.2594264992897646, "grad_norm": 0.0736035481095314, "learning_rate": 3.031222595811343e-06, "loss": 0.3957144320011139, "step": 12228 }, { "epoch": 2.2596112759986604, "grad_norm": 0.09222523123025894, "learning_rate": 3.0297920012796842e-06, "loss": 0.46320977807044983, "step": 12229 }, { "epoch": 2.2597960527075562, "grad_norm": 0.09962626546621323, "learning_rate": 3.0283616841404974e-06, "loss": 0.47465965151786804, "step": 12230 }, { "epoch": 2.259980829416452, "grad_norm": 0.07642363756895065, "learning_rate": 3.0269316444507035e-06, "loss": 0.4165627360343933, "step": 12231 }, { "epoch": 2.260165606125348, "grad_norm": 0.08377820253372192, "learning_rate": 3.0255018822672143e-06, "loss": 0.4770684242248535, "step": 12232 }, { "epoch": 2.2603503828342437, "grad_norm": 0.08431167155504227, "learning_rate": 3.02407239764693e-06, "loss": 0.4955669641494751, "step": 12233 }, { "epoch": 2.2605351595431395, "grad_norm": 0.07196210324764252, "learning_rate": 3.0226431906467425e-06, "loss": 0.4227936863899231, "step": 12234 }, { "epoch": 2.2607199362520354, "grad_norm": 0.08174673467874527, "learning_rate": 3.021214261323524e-06, "loss": 0.4703042507171631, "step": 12235 }, { "epoch": 2.260904712960931, "grad_norm": 0.08144989609718323, "learning_rate": 3.019785609734144e-06, "loss": 0.3058887720108032, "step": 12236 }, { "epoch": 2.261089489669827, "grad_norm": 0.09629102796316147, "learning_rate": 3.0183572359354574e-06, "loss": 0.5303314328193665, "step": 12237 }, { "epoch": 2.261274266378723, "grad_norm": 0.08101619780063629, "learning_rate": 3.0169291399843105e-06, "loss": 0.41484183073043823, "step": 12238 }, { "epoch": 2.2614590430876187, "grad_norm": 0.09562167525291443, "learning_rate": 3.0155013219375374e-06, "loss": 0.5148574709892273, "step": 12239 }, { "epoch": 2.2616438197965145, "grad_norm": 0.08201883733272552, "learning_rate": 3.0140737818519616e-06, "loss": 0.41835635900497437, "step": 12240 }, { "epoch": 2.2618285965054103, "grad_norm": 0.07495265454053879, "learning_rate": 3.012646519784391e-06, "loss": 0.36787310242652893, "step": 12241 }, { "epoch": 2.262013373214306, "grad_norm": 0.08998357504606247, "learning_rate": 3.0112195357916284e-06, "loss": 0.4299069941043854, "step": 12242 }, { "epoch": 2.262198149923202, "grad_norm": 0.08980880677700043, "learning_rate": 3.0097928299304666e-06, "loss": 0.3831421136856079, "step": 12243 }, { "epoch": 2.262382926632098, "grad_norm": 0.09747115522623062, "learning_rate": 3.0083664022576773e-06, "loss": 0.4732903242111206, "step": 12244 }, { "epoch": 2.262567703340994, "grad_norm": 0.07659371942281723, "learning_rate": 3.0069402528300307e-06, "loss": 0.3989792466163635, "step": 12245 }, { "epoch": 2.2627524800498895, "grad_norm": 0.08523440361022949, "learning_rate": 3.0055143817042844e-06, "loss": 0.3412840962409973, "step": 12246 }, { "epoch": 2.2629372567587858, "grad_norm": 0.08923565596342087, "learning_rate": 3.0040887889371816e-06, "loss": 0.35347479581832886, "step": 12247 }, { "epoch": 2.263122033467681, "grad_norm": 0.10447020083665848, "learning_rate": 3.002663474585461e-06, "loss": 0.5423638224601746, "step": 12248 }, { "epoch": 2.2633068101765774, "grad_norm": 0.06621216982603073, "learning_rate": 3.001238438705839e-06, "loss": 0.362056165933609, "step": 12249 }, { "epoch": 2.2634915868854733, "grad_norm": 0.05140472948551178, "learning_rate": 2.9998136813550318e-06, "loss": 0.21704205870628357, "step": 12250 }, { "epoch": 2.263676363594369, "grad_norm": 0.07546143233776093, "learning_rate": 2.9983892025897386e-06, "loss": 0.48610544204711914, "step": 12251 }, { "epoch": 2.263861140303265, "grad_norm": 0.1048959270119667, "learning_rate": 2.9969650024666497e-06, "loss": 0.6081486940383911, "step": 12252 }, { "epoch": 2.2640459170121607, "grad_norm": 0.061856064945459366, "learning_rate": 2.9955410810424446e-06, "loss": 0.29667016863822937, "step": 12253 }, { "epoch": 2.2642306937210566, "grad_norm": 0.07674991339445114, "learning_rate": 2.9941174383737937e-06, "loss": 0.3966175615787506, "step": 12254 }, { "epoch": 2.2644154704299524, "grad_norm": 0.10296843945980072, "learning_rate": 2.992694074517346e-06, "loss": 0.46330904960632324, "step": 12255 }, { "epoch": 2.264600247138848, "grad_norm": 0.09111112356185913, "learning_rate": 2.991270989529752e-06, "loss": 0.4320758283138275, "step": 12256 }, { "epoch": 2.264785023847744, "grad_norm": 0.10260162502527237, "learning_rate": 2.9898481834676453e-06, "loss": 0.5434527397155762, "step": 12257 }, { "epoch": 2.26496980055664, "grad_norm": 0.08933594822883606, "learning_rate": 2.988425656387648e-06, "loss": 0.49509263038635254, "step": 12258 }, { "epoch": 2.2651545772655357, "grad_norm": 0.07403186708688736, "learning_rate": 2.987003408346374e-06, "loss": 0.3863668441772461, "step": 12259 }, { "epoch": 2.2653393539744315, "grad_norm": 0.07996796071529388, "learning_rate": 2.9855814394004255e-06, "loss": 0.32834455370903015, "step": 12260 }, { "epoch": 2.2655241306833274, "grad_norm": 0.09327889233827591, "learning_rate": 2.984159749606388e-06, "loss": 0.4397789239883423, "step": 12261 }, { "epoch": 2.265708907392223, "grad_norm": 0.08188837766647339, "learning_rate": 2.9827383390208464e-06, "loss": 0.3994868993759155, "step": 12262 }, { "epoch": 2.265893684101119, "grad_norm": 0.09514510631561279, "learning_rate": 2.9813172077003603e-06, "loss": 0.5370783805847168, "step": 12263 }, { "epoch": 2.266078460810015, "grad_norm": 0.08097981661558151, "learning_rate": 2.9798963557014895e-06, "loss": 0.36683669686317444, "step": 12264 }, { "epoch": 2.2662632375189107, "grad_norm": 0.08686467260122299, "learning_rate": 2.9784757830807852e-06, "loss": 0.44896113872528076, "step": 12265 }, { "epoch": 2.2664480142278065, "grad_norm": 0.09172335267066956, "learning_rate": 2.9770554898947747e-06, "loss": 0.5055941343307495, "step": 12266 }, { "epoch": 2.2666327909367023, "grad_norm": 0.1009598821401596, "learning_rate": 2.975635476199984e-06, "loss": 0.6283546090126038, "step": 12267 }, { "epoch": 2.266817567645598, "grad_norm": 0.06228647753596306, "learning_rate": 2.9742157420529273e-06, "loss": 0.2549760341644287, "step": 12268 }, { "epoch": 2.267002344354494, "grad_norm": 0.07727950811386108, "learning_rate": 2.9727962875101e-06, "loss": 0.37321093678474426, "step": 12269 }, { "epoch": 2.26718712106339, "grad_norm": 0.09002166986465454, "learning_rate": 2.9713771126279958e-06, "loss": 0.4421631097793579, "step": 12270 }, { "epoch": 2.2673718977722856, "grad_norm": 0.10053392499685287, "learning_rate": 2.9699582174630927e-06, "loss": 0.534861147403717, "step": 12271 }, { "epoch": 2.2675566744811815, "grad_norm": 0.06048179417848587, "learning_rate": 2.9685396020718584e-06, "loss": 0.26993635296821594, "step": 12272 }, { "epoch": 2.2677414511900773, "grad_norm": 0.07431792467832565, "learning_rate": 2.9671212665107496e-06, "loss": 0.35286685824394226, "step": 12273 }, { "epoch": 2.2679262278989736, "grad_norm": 0.08689253777265549, "learning_rate": 2.9657032108362136e-06, "loss": 0.5525953769683838, "step": 12274 }, { "epoch": 2.268111004607869, "grad_norm": 0.0873987227678299, "learning_rate": 2.96428543510468e-06, "loss": 0.4629843831062317, "step": 12275 }, { "epoch": 2.2682957813167652, "grad_norm": 0.08665183931589127, "learning_rate": 2.9628679393725766e-06, "loss": 0.4415779113769531, "step": 12276 }, { "epoch": 2.2684805580256606, "grad_norm": 0.08151692152023315, "learning_rate": 2.9614507236963077e-06, "loss": 0.4622322618961334, "step": 12277 }, { "epoch": 2.268665334734557, "grad_norm": 0.07684565335512161, "learning_rate": 2.9600337881322805e-06, "loss": 0.39885133504867554, "step": 12278 }, { "epoch": 2.2688501114434527, "grad_norm": 0.09301837533712387, "learning_rate": 2.958617132736887e-06, "loss": 0.5050402879714966, "step": 12279 }, { "epoch": 2.2690348881523486, "grad_norm": 0.09714837372303009, "learning_rate": 2.9572007575665006e-06, "loss": 0.5582828521728516, "step": 12280 }, { "epoch": 2.2692196648612444, "grad_norm": 0.08942749351263046, "learning_rate": 2.9557846626774876e-06, "loss": 0.46698012948036194, "step": 12281 }, { "epoch": 2.26940444157014, "grad_norm": 0.09786448627710342, "learning_rate": 2.954368848126211e-06, "loss": 0.5502012372016907, "step": 12282 }, { "epoch": 2.269589218279036, "grad_norm": 0.06812259554862976, "learning_rate": 2.952953313969008e-06, "loss": 0.35303428769111633, "step": 12283 }, { "epoch": 2.269773994987932, "grad_norm": 0.0862889289855957, "learning_rate": 2.951538060262212e-06, "loss": 0.4027027487754822, "step": 12284 }, { "epoch": 2.2699587716968277, "grad_norm": 0.08643152564764023, "learning_rate": 2.9501230870621565e-06, "loss": 0.48595723509788513, "step": 12285 }, { "epoch": 2.2701435484057235, "grad_norm": 0.06484793871641159, "learning_rate": 2.9487083944251428e-06, "loss": 0.2770717442035675, "step": 12286 }, { "epoch": 2.2703283251146193, "grad_norm": 0.09154028445482254, "learning_rate": 2.9472939824074742e-06, "loss": 0.41690942645072937, "step": 12287 }, { "epoch": 2.270513101823515, "grad_norm": 0.09700242429971695, "learning_rate": 2.945879851065443e-06, "loss": 0.473203182220459, "step": 12288 }, { "epoch": 2.270697878532411, "grad_norm": 0.08067073673009872, "learning_rate": 2.9444660004553207e-06, "loss": 0.3523070514202118, "step": 12289 }, { "epoch": 2.270882655241307, "grad_norm": 0.0762988030910492, "learning_rate": 2.9430524306333785e-06, "loss": 0.40847325325012207, "step": 12290 }, { "epoch": 2.2710674319502027, "grad_norm": 0.06584256887435913, "learning_rate": 2.94163914165587e-06, "loss": 0.33420929312705994, "step": 12291 }, { "epoch": 2.2712522086590985, "grad_norm": 0.09168466925621033, "learning_rate": 2.9402261335790415e-06, "loss": 0.4268166720867157, "step": 12292 }, { "epoch": 2.2714369853679943, "grad_norm": 0.07886332273483276, "learning_rate": 2.938813406459129e-06, "loss": 0.42885634303092957, "step": 12293 }, { "epoch": 2.27162176207689, "grad_norm": 0.09260807931423187, "learning_rate": 2.937400960352348e-06, "loss": 0.5658440589904785, "step": 12294 }, { "epoch": 2.271806538785786, "grad_norm": 0.09772022813558578, "learning_rate": 2.9359887953149125e-06, "loss": 0.5705576539039612, "step": 12295 }, { "epoch": 2.271991315494682, "grad_norm": 0.11060616374015808, "learning_rate": 2.9345769114030264e-06, "loss": 0.6149977445602417, "step": 12296 }, { "epoch": 2.2721760922035776, "grad_norm": 0.0723041296005249, "learning_rate": 2.9331653086728674e-06, "loss": 0.3808667063713074, "step": 12297 }, { "epoch": 2.2723608689124735, "grad_norm": 0.06351529806852341, "learning_rate": 2.9317539871806235e-06, "loss": 0.3369500935077667, "step": 12298 }, { "epoch": 2.2725456456213693, "grad_norm": 0.05478255823254585, "learning_rate": 2.9303429469824594e-06, "loss": 0.24042272567749023, "step": 12299 }, { "epoch": 2.272730422330265, "grad_norm": 0.06112119182944298, "learning_rate": 2.9289321881345257e-06, "loss": 0.2693672180175781, "step": 12300 }, { "epoch": 2.272915199039161, "grad_norm": 0.07038292288780212, "learning_rate": 2.9275217106929675e-06, "loss": 0.3221263587474823, "step": 12301 }, { "epoch": 2.273099975748057, "grad_norm": 0.0761604756116867, "learning_rate": 2.926111514713923e-06, "loss": 0.35919904708862305, "step": 12302 }, { "epoch": 2.273284752456953, "grad_norm": 0.12052042782306671, "learning_rate": 2.9247016002535043e-06, "loss": 0.5472878813743591, "step": 12303 }, { "epoch": 2.2734695291658484, "grad_norm": 0.08945787698030472, "learning_rate": 2.923291967367823e-06, "loss": 0.43166667222976685, "step": 12304 }, { "epoch": 2.2736543058747447, "grad_norm": 0.11389131098985672, "learning_rate": 2.921882616112988e-06, "loss": 0.569865882396698, "step": 12305 }, { "epoch": 2.27383908258364, "grad_norm": 0.10434228926897049, "learning_rate": 2.9204735465450773e-06, "loss": 0.5434978008270264, "step": 12306 }, { "epoch": 2.2740238592925364, "grad_norm": 0.09066736698150635, "learning_rate": 2.9190647587201703e-06, "loss": 0.5771661996841431, "step": 12307 }, { "epoch": 2.274208636001432, "grad_norm": 0.08334506303071976, "learning_rate": 2.917656252694335e-06, "loss": 0.41939690709114075, "step": 12308 }, { "epoch": 2.274393412710328, "grad_norm": 0.05808553472161293, "learning_rate": 2.9162480285236204e-06, "loss": 0.30752813816070557, "step": 12309 }, { "epoch": 2.274578189419224, "grad_norm": 0.08325260877609253, "learning_rate": 2.9148400862640726e-06, "loss": 0.40775370597839355, "step": 12310 }, { "epoch": 2.2747629661281197, "grad_norm": 0.07577984780073166, "learning_rate": 2.913432425971722e-06, "loss": 0.2847796380519867, "step": 12311 }, { "epoch": 2.2749477428370155, "grad_norm": 0.10682272911071777, "learning_rate": 2.9120250477025903e-06, "loss": 0.4378647804260254, "step": 12312 }, { "epoch": 2.2751325195459113, "grad_norm": 0.09837109595537186, "learning_rate": 2.910617951512689e-06, "loss": 0.47766849398612976, "step": 12313 }, { "epoch": 2.275317296254807, "grad_norm": 0.08506254851818085, "learning_rate": 2.9092111374580103e-06, "loss": 0.47322845458984375, "step": 12314 }, { "epoch": 2.275502072963703, "grad_norm": 0.09625948965549469, "learning_rate": 2.9078046055945443e-06, "loss": 0.4225115180015564, "step": 12315 }, { "epoch": 2.275686849672599, "grad_norm": 0.07625984400510788, "learning_rate": 2.906398355978269e-06, "loss": 0.3932497203350067, "step": 12316 }, { "epoch": 2.2758716263814947, "grad_norm": 0.07267304509878159, "learning_rate": 2.90499238866514e-06, "loss": 0.33147087693214417, "step": 12317 }, { "epoch": 2.2760564030903905, "grad_norm": 0.0772964283823967, "learning_rate": 2.90358670371112e-06, "loss": 0.49541783332824707, "step": 12318 }, { "epoch": 2.2762411797992863, "grad_norm": 0.07828882336616516, "learning_rate": 2.90218130117215e-06, "loss": 0.45402592420578003, "step": 12319 }, { "epoch": 2.276425956508182, "grad_norm": 0.07979245483875275, "learning_rate": 2.9007761811041555e-06, "loss": 0.3754137456417084, "step": 12320 }, { "epoch": 2.276610733217078, "grad_norm": 0.09181807935237885, "learning_rate": 2.8993713435630576e-06, "loss": 0.5156495571136475, "step": 12321 }, { "epoch": 2.276795509925974, "grad_norm": 0.0858747586607933, "learning_rate": 2.897966788604769e-06, "loss": 0.46253150701522827, "step": 12322 }, { "epoch": 2.2769802866348696, "grad_norm": 0.09898536652326584, "learning_rate": 2.8965625162851794e-06, "loss": 0.43217700719833374, "step": 12323 }, { "epoch": 2.2771650633437654, "grad_norm": 0.08097942918539047, "learning_rate": 2.8951585266601757e-06, "loss": 0.3872702121734619, "step": 12324 }, { "epoch": 2.2773498400526613, "grad_norm": 0.07538451254367828, "learning_rate": 2.893754819785639e-06, "loss": 0.39061447978019714, "step": 12325 }, { "epoch": 2.277534616761557, "grad_norm": 0.07708559930324554, "learning_rate": 2.8923513957174263e-06, "loss": 0.4144393503665924, "step": 12326 }, { "epoch": 2.277719393470453, "grad_norm": 0.08210837841033936, "learning_rate": 2.8909482545113932e-06, "loss": 0.43052053451538086, "step": 12327 }, { "epoch": 2.2779041701793488, "grad_norm": 0.12307770550251007, "learning_rate": 2.8895453962233757e-06, "loss": 0.6036669015884399, "step": 12328 }, { "epoch": 2.2780889468882446, "grad_norm": 0.07950335741043091, "learning_rate": 2.8881428209092054e-06, "loss": 0.42403754591941833, "step": 12329 }, { "epoch": 2.2782737235971404, "grad_norm": 0.0881686806678772, "learning_rate": 2.8867405286247007e-06, "loss": 0.3945855498313904, "step": 12330 }, { "epoch": 2.2784585003060362, "grad_norm": 0.08936318010091782, "learning_rate": 2.8853385194256677e-06, "loss": 0.41373908519744873, "step": 12331 }, { "epoch": 2.278643277014932, "grad_norm": 0.0715850442647934, "learning_rate": 2.883936793367904e-06, "loss": 0.2821645140647888, "step": 12332 }, { "epoch": 2.278828053723828, "grad_norm": 0.07524916529655457, "learning_rate": 2.8825353505071953e-06, "loss": 0.4662242531776428, "step": 12333 }, { "epoch": 2.279012830432724, "grad_norm": 0.08967549353837967, "learning_rate": 2.8811341908993084e-06, "loss": 0.4699711501598358, "step": 12334 }, { "epoch": 2.2791976071416196, "grad_norm": 0.07801796495914459, "learning_rate": 2.8797333146000086e-06, "loss": 0.33761677145957947, "step": 12335 }, { "epoch": 2.279382383850516, "grad_norm": 0.09331436455249786, "learning_rate": 2.87833272166505e-06, "loss": 0.49292394518852234, "step": 12336 }, { "epoch": 2.2795671605594117, "grad_norm": 0.10073967278003693, "learning_rate": 2.8769324121501618e-06, "loss": 0.4239262342453003, "step": 12337 }, { "epoch": 2.2797519372683075, "grad_norm": 0.09610048681497574, "learning_rate": 2.875532386111082e-06, "loss": 0.5705044865608215, "step": 12338 }, { "epoch": 2.2799367139772033, "grad_norm": 0.10764279961585999, "learning_rate": 2.8741326436035255e-06, "loss": 0.5637108087539673, "step": 12339 }, { "epoch": 2.280121490686099, "grad_norm": 0.09255742281675339, "learning_rate": 2.872733184683194e-06, "loss": 0.5211510062217712, "step": 12340 }, { "epoch": 2.280306267394995, "grad_norm": 0.09281174838542938, "learning_rate": 2.871334009405785e-06, "loss": 0.4678809642791748, "step": 12341 }, { "epoch": 2.280491044103891, "grad_norm": 0.07860644906759262, "learning_rate": 2.8699351178269787e-06, "loss": 0.4409528076648712, "step": 12342 }, { "epoch": 2.2806758208127866, "grad_norm": 0.1086873859167099, "learning_rate": 2.868536510002445e-06, "loss": 0.5125857591629028, "step": 12343 }, { "epoch": 2.2808605975216825, "grad_norm": 0.07654060423374176, "learning_rate": 2.8671381859878488e-06, "loss": 0.4426422417163849, "step": 12344 }, { "epoch": 2.2810453742305783, "grad_norm": 0.09781482815742493, "learning_rate": 2.865740145838837e-06, "loss": 0.43132483959198, "step": 12345 }, { "epoch": 2.281230150939474, "grad_norm": 0.07767101377248764, "learning_rate": 2.8643423896110455e-06, "loss": 0.35789021849632263, "step": 12346 }, { "epoch": 2.28141492764837, "grad_norm": 0.10004222393035889, "learning_rate": 2.8629449173601067e-06, "loss": 0.5255815982818604, "step": 12347 }, { "epoch": 2.281599704357266, "grad_norm": 0.09296823292970657, "learning_rate": 2.8615477291416284e-06, "loss": 0.4425857961177826, "step": 12348 }, { "epoch": 2.2817844810661616, "grad_norm": 0.08270562440156937, "learning_rate": 2.8601508250112164e-06, "loss": 0.40904340147972107, "step": 12349 }, { "epoch": 2.2819692577750574, "grad_norm": 0.0664658322930336, "learning_rate": 2.858754205024463e-06, "loss": 0.34156301617622375, "step": 12350 }, { "epoch": 2.2821540344839533, "grad_norm": 0.07145830988883972, "learning_rate": 2.857357869236952e-06, "loss": 0.28238388895988464, "step": 12351 }, { "epoch": 2.282338811192849, "grad_norm": 0.07514777034521103, "learning_rate": 2.8559618177042504e-06, "loss": 0.37296679615974426, "step": 12352 }, { "epoch": 2.282523587901745, "grad_norm": 0.07478883117437363, "learning_rate": 2.8545660504819207e-06, "loss": 0.3149990439414978, "step": 12353 }, { "epoch": 2.2827083646106407, "grad_norm": 0.09237714856863022, "learning_rate": 2.853170567625504e-06, "loss": 0.43616247177124023, "step": 12354 }, { "epoch": 2.2828931413195366, "grad_norm": 0.07515031844377518, "learning_rate": 2.851775369190539e-06, "loss": 0.3258705139160156, "step": 12355 }, { "epoch": 2.2830779180284324, "grad_norm": 0.08433238416910172, "learning_rate": 2.8503804552325497e-06, "loss": 0.4098306894302368, "step": 12356 }, { "epoch": 2.2832626947373282, "grad_norm": 0.0852990448474884, "learning_rate": 2.848985825807051e-06, "loss": 0.4909660220146179, "step": 12357 }, { "epoch": 2.283447471446224, "grad_norm": 0.09817817807197571, "learning_rate": 2.847591480969547e-06, "loss": 0.47760963439941406, "step": 12358 }, { "epoch": 2.28363224815512, "grad_norm": 0.10420354455709457, "learning_rate": 2.8461974207755217e-06, "loss": 0.7738246917724609, "step": 12359 }, { "epoch": 2.2838170248640157, "grad_norm": 0.10038287192583084, "learning_rate": 2.844803645280457e-06, "loss": 0.44567567110061646, "step": 12360 }, { "epoch": 2.2840018015729115, "grad_norm": 0.07754106819629669, "learning_rate": 2.843410154539825e-06, "loss": 0.34756210446357727, "step": 12361 }, { "epoch": 2.2841865782818074, "grad_norm": 0.09777885675430298, "learning_rate": 2.8420169486090765e-06, "loss": 0.5114613175392151, "step": 12362 }, { "epoch": 2.2843713549907037, "grad_norm": 0.0712917372584343, "learning_rate": 2.840624027543658e-06, "loss": 0.342816025018692, "step": 12363 }, { "epoch": 2.284556131699599, "grad_norm": 0.09564562886953354, "learning_rate": 2.8392313913990054e-06, "loss": 0.4747394323348999, "step": 12364 }, { "epoch": 2.2847409084084953, "grad_norm": 0.08557190746068954, "learning_rate": 2.837839040230539e-06, "loss": 0.42063960433006287, "step": 12365 }, { "epoch": 2.284925685117391, "grad_norm": 0.09702543914318085, "learning_rate": 2.8364469740936717e-06, "loss": 0.4699907898902893, "step": 12366 }, { "epoch": 2.285110461826287, "grad_norm": 0.0841965600848198, "learning_rate": 2.8350551930438066e-06, "loss": 0.3842927813529968, "step": 12367 }, { "epoch": 2.285295238535183, "grad_norm": 0.07941141724586487, "learning_rate": 2.8336636971363253e-06, "loss": 0.44426429271698, "step": 12368 }, { "epoch": 2.2854800152440786, "grad_norm": 0.09702118486166, "learning_rate": 2.832272486426608e-06, "loss": 0.5666193962097168, "step": 12369 }, { "epoch": 2.2856647919529745, "grad_norm": 0.07746150344610214, "learning_rate": 2.8308815609700203e-06, "loss": 0.34402111172676086, "step": 12370 }, { "epoch": 2.2858495686618703, "grad_norm": 0.07326214760541916, "learning_rate": 2.829490920821918e-06, "loss": 0.40299466252326965, "step": 12371 }, { "epoch": 2.286034345370766, "grad_norm": 0.08180015534162521, "learning_rate": 2.828100566037643e-06, "loss": 0.4826923906803131, "step": 12372 }, { "epoch": 2.286219122079662, "grad_norm": 0.07712136209011078, "learning_rate": 2.826710496672531e-06, "loss": 0.3627420663833618, "step": 12373 }, { "epoch": 2.2864038987885578, "grad_norm": 0.0706978365778923, "learning_rate": 2.825320712781895e-06, "loss": 0.3895527124404907, "step": 12374 }, { "epoch": 2.2865886754974536, "grad_norm": 0.07732373476028442, "learning_rate": 2.8239312144210517e-06, "loss": 0.3908056616783142, "step": 12375 }, { "epoch": 2.2867734522063494, "grad_norm": 0.08550095558166504, "learning_rate": 2.8225420016452886e-06, "loss": 0.437029093503952, "step": 12376 }, { "epoch": 2.2869582289152453, "grad_norm": 0.10298527777194977, "learning_rate": 2.8211530745099016e-06, "loss": 0.5659658312797546, "step": 12377 }, { "epoch": 2.287143005624141, "grad_norm": 0.08626532554626465, "learning_rate": 2.819764433070166e-06, "loss": 0.46370428800582886, "step": 12378 }, { "epoch": 2.287327782333037, "grad_norm": 0.090863436460495, "learning_rate": 2.8183760773813384e-06, "loss": 0.5512515902519226, "step": 12379 }, { "epoch": 2.2875125590419327, "grad_norm": 0.07174748927354813, "learning_rate": 2.8169880074986742e-06, "loss": 0.3715180456638336, "step": 12380 }, { "epoch": 2.2876973357508286, "grad_norm": 0.0870097428560257, "learning_rate": 2.815600223477418e-06, "loss": 0.4142930805683136, "step": 12381 }, { "epoch": 2.2878821124597244, "grad_norm": 0.062451448291540146, "learning_rate": 2.8142127253727923e-06, "loss": 0.3340838849544525, "step": 12382 }, { "epoch": 2.28806688916862, "grad_norm": 0.0948941633105278, "learning_rate": 2.8128255132400196e-06, "loss": 0.4817712604999542, "step": 12383 }, { "epoch": 2.288251665877516, "grad_norm": 0.07591880857944489, "learning_rate": 2.8114385871343053e-06, "loss": 0.30848807096481323, "step": 12384 }, { "epoch": 2.288436442586412, "grad_norm": 0.07042495161294937, "learning_rate": 2.8100519471108447e-06, "loss": 0.3212406039237976, "step": 12385 }, { "epoch": 2.2886212192953077, "grad_norm": 0.0942525789141655, "learning_rate": 2.808665593224822e-06, "loss": 0.48285847902297974, "step": 12386 }, { "epoch": 2.2888059960042035, "grad_norm": 0.06566546857357025, "learning_rate": 2.807279525531413e-06, "loss": 0.3156110346317291, "step": 12387 }, { "epoch": 2.2889907727130994, "grad_norm": 0.10510123521089554, "learning_rate": 2.805893744085774e-06, "loss": 0.4791485369205475, "step": 12388 }, { "epoch": 2.289175549421995, "grad_norm": 0.08050920069217682, "learning_rate": 2.8045082489430554e-06, "loss": 0.4345119893550873, "step": 12389 }, { "epoch": 2.289360326130891, "grad_norm": 0.09542886167764664, "learning_rate": 2.8031230401583965e-06, "loss": 0.5287306308746338, "step": 12390 }, { "epoch": 2.289545102839787, "grad_norm": 0.08587858080863953, "learning_rate": 2.8017381177869253e-06, "loss": 0.4475300908088684, "step": 12391 }, { "epoch": 2.289729879548683, "grad_norm": 0.0851026400923729, "learning_rate": 2.8003534818837586e-06, "loss": 0.39573603868484497, "step": 12392 }, { "epoch": 2.2899146562575785, "grad_norm": 0.0926097109913826, "learning_rate": 2.798969132503997e-06, "loss": 0.4512878656387329, "step": 12393 }, { "epoch": 2.290099432966475, "grad_norm": 0.08771153539419174, "learning_rate": 2.797585069702733e-06, "loss": 0.4938456118106842, "step": 12394 }, { "epoch": 2.2902842096753706, "grad_norm": 0.08554097265005112, "learning_rate": 2.7962012935350537e-06, "loss": 0.3957395851612091, "step": 12395 }, { "epoch": 2.2904689863842664, "grad_norm": 0.10183302313089371, "learning_rate": 2.794817804056019e-06, "loss": 0.5511239767074585, "step": 12396 }, { "epoch": 2.2906537630931623, "grad_norm": 0.10341359674930573, "learning_rate": 2.793434601320697e-06, "loss": 0.5638948082923889, "step": 12397 }, { "epoch": 2.290838539802058, "grad_norm": 0.09127336740493774, "learning_rate": 2.792051685384134e-06, "loss": 0.43918925523757935, "step": 12398 }, { "epoch": 2.291023316510954, "grad_norm": 0.08343155682086945, "learning_rate": 2.7906690563013593e-06, "loss": 0.38254082202911377, "step": 12399 }, { "epoch": 2.2912080932198498, "grad_norm": 0.08328774571418762, "learning_rate": 2.789286714127402e-06, "loss": 0.402026504278183, "step": 12400 }, { "epoch": 2.2913928699287456, "grad_norm": 0.09894675016403198, "learning_rate": 2.7879046589172776e-06, "loss": 0.3728368878364563, "step": 12401 }, { "epoch": 2.2915776466376414, "grad_norm": 0.08401940017938614, "learning_rate": 2.7865228907259802e-06, "loss": 0.46870890259742737, "step": 12402 }, { "epoch": 2.2917624233465372, "grad_norm": 0.09789912402629852, "learning_rate": 2.785141409608504e-06, "loss": 0.5191943049430847, "step": 12403 }, { "epoch": 2.291947200055433, "grad_norm": 0.06944061815738678, "learning_rate": 2.7837602156198262e-06, "loss": 0.35012003779411316, "step": 12404 }, { "epoch": 2.292131976764329, "grad_norm": 0.07652708142995834, "learning_rate": 2.7823793088149166e-06, "loss": 0.3387015759944916, "step": 12405 }, { "epoch": 2.2923167534732247, "grad_norm": 0.10045712441205978, "learning_rate": 2.7809986892487316e-06, "loss": 0.5079493522644043, "step": 12406 }, { "epoch": 2.2925015301821206, "grad_norm": 0.07915651798248291, "learning_rate": 2.7796183569762103e-06, "loss": 0.32102376222610474, "step": 12407 }, { "epoch": 2.2926863068910164, "grad_norm": 0.11177308857440948, "learning_rate": 2.7782383120522895e-06, "loss": 0.5183802843093872, "step": 12408 }, { "epoch": 2.292871083599912, "grad_norm": 0.09553904831409454, "learning_rate": 2.7768585545318895e-06, "loss": 0.40244248509407043, "step": 12409 }, { "epoch": 2.293055860308808, "grad_norm": 0.09308487921953201, "learning_rate": 2.775479084469921e-06, "loss": 0.4925037622451782, "step": 12410 }, { "epoch": 2.293240637017704, "grad_norm": 0.07482548803091049, "learning_rate": 2.7740999019212824e-06, "loss": 0.30733537673950195, "step": 12411 }, { "epoch": 2.2934254137265997, "grad_norm": 0.0775015726685524, "learning_rate": 2.772721006940863e-06, "loss": 0.37895214557647705, "step": 12412 }, { "epoch": 2.2936101904354955, "grad_norm": 0.09507368505001068, "learning_rate": 2.7713423995835343e-06, "loss": 0.41809919476509094, "step": 12413 }, { "epoch": 2.2937949671443914, "grad_norm": 0.10335507243871689, "learning_rate": 2.7699640799041615e-06, "loss": 0.5223547220230103, "step": 12414 }, { "epoch": 2.293979743853287, "grad_norm": 0.06919983774423599, "learning_rate": 2.768586047957602e-06, "loss": 0.35734066367149353, "step": 12415 }, { "epoch": 2.294164520562183, "grad_norm": 0.09655167907476425, "learning_rate": 2.7672083037986874e-06, "loss": 0.4475431442260742, "step": 12416 }, { "epoch": 2.294349297271079, "grad_norm": 0.07486025243997574, "learning_rate": 2.765830847482257e-06, "loss": 0.39141911268234253, "step": 12417 }, { "epoch": 2.2945340739799747, "grad_norm": 0.07163551449775696, "learning_rate": 2.7644536790631283e-06, "loss": 0.4052906930446625, "step": 12418 }, { "epoch": 2.2947188506888705, "grad_norm": 0.09198658913373947, "learning_rate": 2.763076798596104e-06, "loss": 0.4742256700992584, "step": 12419 }, { "epoch": 2.2949036273977663, "grad_norm": 0.09546126425266266, "learning_rate": 2.761700206135981e-06, "loss": 0.38848382234573364, "step": 12420 }, { "epoch": 2.2950884041066626, "grad_norm": 0.09016796946525574, "learning_rate": 2.7603239017375483e-06, "loss": 0.49152642488479614, "step": 12421 }, { "epoch": 2.295273180815558, "grad_norm": 0.07008977234363556, "learning_rate": 2.7589478854555694e-06, "loss": 0.4098982512950897, "step": 12422 }, { "epoch": 2.2954579575244543, "grad_norm": 0.1004912257194519, "learning_rate": 2.757572157344812e-06, "loss": 0.45596155524253845, "step": 12423 }, { "epoch": 2.29564273423335, "grad_norm": 0.08788701146841049, "learning_rate": 2.7561967174600234e-06, "loss": 0.4334774613380432, "step": 12424 }, { "epoch": 2.295827510942246, "grad_norm": 0.07405531406402588, "learning_rate": 2.7548215658559417e-06, "loss": 0.3822348117828369, "step": 12425 }, { "epoch": 2.2960122876511417, "grad_norm": 0.10018901526927948, "learning_rate": 2.753446702587299e-06, "loss": 0.5195327997207642, "step": 12426 }, { "epoch": 2.2961970643600376, "grad_norm": 0.08698148280382156, "learning_rate": 2.7520721277088023e-06, "loss": 0.5598820447921753, "step": 12427 }, { "epoch": 2.2963818410689334, "grad_norm": 0.0688018724322319, "learning_rate": 2.7506978412751585e-06, "loss": 0.35568612813949585, "step": 12428 }, { "epoch": 2.2965666177778292, "grad_norm": 0.08764747530221939, "learning_rate": 2.7493238433410606e-06, "loss": 0.5102495551109314, "step": 12429 }, { "epoch": 2.296751394486725, "grad_norm": 0.09153677523136139, "learning_rate": 2.74795013396119e-06, "loss": 0.4793400764465332, "step": 12430 }, { "epoch": 2.296936171195621, "grad_norm": 0.09142836928367615, "learning_rate": 2.7465767131902154e-06, "loss": 0.48736387491226196, "step": 12431 }, { "epoch": 2.2971209479045167, "grad_norm": 0.06931941211223602, "learning_rate": 2.7452035810827972e-06, "loss": 0.32798847556114197, "step": 12432 }, { "epoch": 2.2973057246134125, "grad_norm": 0.10837685316801071, "learning_rate": 2.743830737693576e-06, "loss": 0.5421178340911865, "step": 12433 }, { "epoch": 2.2974905013223084, "grad_norm": 0.08300384879112244, "learning_rate": 2.742458183077189e-06, "loss": 0.3388262093067169, "step": 12434 }, { "epoch": 2.297675278031204, "grad_norm": 0.08761202543973923, "learning_rate": 2.7410859172882643e-06, "loss": 0.4011119306087494, "step": 12435 }, { "epoch": 2.2978600547401, "grad_norm": 0.08077117800712585, "learning_rate": 2.7397139403814045e-06, "loss": 0.4460929036140442, "step": 12436 }, { "epoch": 2.298044831448996, "grad_norm": 0.07666885852813721, "learning_rate": 2.7383422524112168e-06, "loss": 0.3937883973121643, "step": 12437 }, { "epoch": 2.2982296081578917, "grad_norm": 0.09943380206823349, "learning_rate": 2.7369708534322924e-06, "loss": 0.559493899345398, "step": 12438 }, { "epoch": 2.2984143848667875, "grad_norm": 0.06779221445322037, "learning_rate": 2.735599743499202e-06, "loss": 0.3402836322784424, "step": 12439 }, { "epoch": 2.2985991615756833, "grad_norm": 0.07608848065137863, "learning_rate": 2.7342289226665185e-06, "loss": 0.33676713705062866, "step": 12440 }, { "epoch": 2.298783938284579, "grad_norm": 0.07917264848947525, "learning_rate": 2.7328583909887875e-06, "loss": 0.48675215244293213, "step": 12441 }, { "epoch": 2.298968714993475, "grad_norm": 0.09452951699495316, "learning_rate": 2.731488148520557e-06, "loss": 0.4705582857131958, "step": 12442 }, { "epoch": 2.299153491702371, "grad_norm": 0.09254277497529984, "learning_rate": 2.730118195316358e-06, "loss": 0.6044603586196899, "step": 12443 }, { "epoch": 2.2993382684112667, "grad_norm": 0.08278968185186386, "learning_rate": 2.7287485314307105e-06, "loss": 0.41316351294517517, "step": 12444 }, { "epoch": 2.2995230451201625, "grad_norm": 0.09281335026025772, "learning_rate": 2.727379156918123e-06, "loss": 0.4842070937156677, "step": 12445 }, { "epoch": 2.2997078218290583, "grad_norm": 0.08693793416023254, "learning_rate": 2.7260100718330938e-06, "loss": 0.5524295568466187, "step": 12446 }, { "epoch": 2.299892598537954, "grad_norm": 0.08974786847829819, "learning_rate": 2.7246412762301045e-06, "loss": 0.5332388877868652, "step": 12447 }, { "epoch": 2.30007737524685, "grad_norm": 0.05453294888138771, "learning_rate": 2.7232727701636306e-06, "loss": 0.21315710246562958, "step": 12448 }, { "epoch": 2.300262151955746, "grad_norm": 0.08957856148481369, "learning_rate": 2.721904553688134e-06, "loss": 0.43898043036460876, "step": 12449 }, { "epoch": 2.300446928664642, "grad_norm": 0.06817204505205154, "learning_rate": 2.7205366268580657e-06, "loss": 0.30610817670822144, "step": 12450 }, { "epoch": 2.3006317053735375, "grad_norm": 0.08877559751272202, "learning_rate": 2.7191689897278662e-06, "loss": 0.40955445170402527, "step": 12451 }, { "epoch": 2.3008164820824337, "grad_norm": 0.08863110840320587, "learning_rate": 2.7178016423519637e-06, "loss": 0.5220192074775696, "step": 12452 }, { "epoch": 2.301001258791329, "grad_norm": 0.07328446954488754, "learning_rate": 2.7164345847847706e-06, "loss": 0.3678598999977112, "step": 12453 }, { "epoch": 2.3011860355002254, "grad_norm": 0.08586084097623825, "learning_rate": 2.7150678170806944e-06, "loss": 0.43170756101608276, "step": 12454 }, { "epoch": 2.301370812209121, "grad_norm": 0.08982164412736893, "learning_rate": 2.713701339294129e-06, "loss": 0.40715205669403076, "step": 12455 }, { "epoch": 2.301555588918017, "grad_norm": 0.09180708974599838, "learning_rate": 2.7123351514794494e-06, "loss": 0.4255968928337097, "step": 12456 }, { "epoch": 2.301740365626913, "grad_norm": 0.0955817773938179, "learning_rate": 2.710969253691036e-06, "loss": 0.4432634711265564, "step": 12457 }, { "epoch": 2.3019251423358087, "grad_norm": 0.06264004111289978, "learning_rate": 2.7096036459832387e-06, "loss": 0.25520288944244385, "step": 12458 }, { "epoch": 2.3021099190447045, "grad_norm": 0.06978282332420349, "learning_rate": 2.7082383284104085e-06, "loss": 0.29496538639068604, "step": 12459 }, { "epoch": 2.3022946957536004, "grad_norm": 0.07816436141729355, "learning_rate": 2.706873301026882e-06, "loss": 0.32827895879745483, "step": 12460 }, { "epoch": 2.302479472462496, "grad_norm": 0.08574339002370834, "learning_rate": 2.705508563886978e-06, "loss": 0.47787851095199585, "step": 12461 }, { "epoch": 2.302664249171392, "grad_norm": 0.08717440068721771, "learning_rate": 2.704144117045012e-06, "loss": 0.4790334105491638, "step": 12462 }, { "epoch": 2.302849025880288, "grad_norm": 0.08738885074853897, "learning_rate": 2.7027799605552842e-06, "loss": 0.4241340458393097, "step": 12463 }, { "epoch": 2.3030338025891837, "grad_norm": 0.06934608519077301, "learning_rate": 2.7014160944720835e-06, "loss": 0.34214988350868225, "step": 12464 }, { "epoch": 2.3032185792980795, "grad_norm": 0.10133513063192368, "learning_rate": 2.7000525188496885e-06, "loss": 0.5286313891410828, "step": 12465 }, { "epoch": 2.3034033560069753, "grad_norm": 0.07713404297828674, "learning_rate": 2.6986892337423675e-06, "loss": 0.4190782606601715, "step": 12466 }, { "epoch": 2.303588132715871, "grad_norm": 0.11312388628721237, "learning_rate": 2.6973262392043687e-06, "loss": 0.5498267412185669, "step": 12467 }, { "epoch": 2.303772909424767, "grad_norm": 0.07396145910024643, "learning_rate": 2.695963535289938e-06, "loss": 0.3859500288963318, "step": 12468 }, { "epoch": 2.303957686133663, "grad_norm": 0.07330184429883957, "learning_rate": 2.6946011220533085e-06, "loss": 0.44282686710357666, "step": 12469 }, { "epoch": 2.3041424628425586, "grad_norm": 0.10176707804203033, "learning_rate": 2.6932389995486986e-06, "loss": 0.49235883355140686, "step": 12470 }, { "epoch": 2.3043272395514545, "grad_norm": 0.09177861362695694, "learning_rate": 2.691877167830319e-06, "loss": 0.5612393021583557, "step": 12471 }, { "epoch": 2.3045120162603503, "grad_norm": 0.06859984993934631, "learning_rate": 2.6905156269523603e-06, "loss": 0.3102670907974243, "step": 12472 }, { "epoch": 2.304696792969246, "grad_norm": 0.10172141343355179, "learning_rate": 2.689154376969012e-06, "loss": 0.6309396028518677, "step": 12473 }, { "epoch": 2.304881569678142, "grad_norm": 0.07310739904642105, "learning_rate": 2.68779341793445e-06, "loss": 0.3750215172767639, "step": 12474 }, { "epoch": 2.305066346387038, "grad_norm": 0.0803554430603981, "learning_rate": 2.68643274990283e-06, "loss": 0.43689945340156555, "step": 12475 }, { "epoch": 2.3052511230959336, "grad_norm": 0.08919933438301086, "learning_rate": 2.685072372928301e-06, "loss": 0.5057060122489929, "step": 12476 }, { "epoch": 2.3054358998048294, "grad_norm": 0.08986406028270721, "learning_rate": 2.6837122870650136e-06, "loss": 0.4250834286212921, "step": 12477 }, { "epoch": 2.3056206765137253, "grad_norm": 0.08082538843154907, "learning_rate": 2.682352492367084e-06, "loss": 0.4560541808605194, "step": 12478 }, { "epoch": 2.3058054532226215, "grad_norm": 0.08965017646551132, "learning_rate": 2.680992988888631e-06, "loss": 0.4921261668205261, "step": 12479 }, { "epoch": 2.305990229931517, "grad_norm": 0.09341119229793549, "learning_rate": 2.679633776683762e-06, "loss": 0.5224530100822449, "step": 12480 }, { "epoch": 2.306175006640413, "grad_norm": 0.08903618156909943, "learning_rate": 2.678274855806564e-06, "loss": 0.503605306148529, "step": 12481 }, { "epoch": 2.3063597833493086, "grad_norm": 0.08572878688573837, "learning_rate": 2.6769162263111194e-06, "loss": 0.5046517252922058, "step": 12482 }, { "epoch": 2.306544560058205, "grad_norm": 0.08222978562116623, "learning_rate": 2.6755578882514976e-06, "loss": 0.49034225940704346, "step": 12483 }, { "epoch": 2.3067293367671007, "grad_norm": 0.07876164466142654, "learning_rate": 2.6741998416817572e-06, "loss": 0.42356806993484497, "step": 12484 }, { "epoch": 2.3069141134759965, "grad_norm": 0.08781693875789642, "learning_rate": 2.6728420866559424e-06, "loss": 0.47269538044929504, "step": 12485 }, { "epoch": 2.3070988901848923, "grad_norm": 0.0931205004453659, "learning_rate": 2.6714846232280932e-06, "loss": 0.49855610728263855, "step": 12486 }, { "epoch": 2.307283666893788, "grad_norm": 0.08059027045965195, "learning_rate": 2.6701274514522248e-06, "loss": 0.490103542804718, "step": 12487 }, { "epoch": 2.307468443602684, "grad_norm": 0.089565210044384, "learning_rate": 2.668770571382351e-06, "loss": 0.43446487188339233, "step": 12488 }, { "epoch": 2.30765322031158, "grad_norm": 0.08159112930297852, "learning_rate": 2.6674139830724722e-06, "loss": 0.46210017800331116, "step": 12489 }, { "epoch": 2.3078379970204757, "grad_norm": 0.06302274018526077, "learning_rate": 2.6660576865765764e-06, "loss": 0.3557790517807007, "step": 12490 }, { "epoch": 2.3080227737293715, "grad_norm": 0.06396881490945816, "learning_rate": 2.6647016819486427e-06, "loss": 0.28712761402130127, "step": 12491 }, { "epoch": 2.3082075504382673, "grad_norm": 0.09645667672157288, "learning_rate": 2.66334596924263e-06, "loss": 0.5571883916854858, "step": 12492 }, { "epoch": 2.308392327147163, "grad_norm": 0.07313007116317749, "learning_rate": 2.661990548512493e-06, "loss": 0.3786899149417877, "step": 12493 }, { "epoch": 2.308577103856059, "grad_norm": 0.07516108453273773, "learning_rate": 2.6606354198121786e-06, "loss": 0.388254851102829, "step": 12494 }, { "epoch": 2.308761880564955, "grad_norm": 0.0844891294836998, "learning_rate": 2.6592805831956105e-06, "loss": 0.466629296541214, "step": 12495 }, { "epoch": 2.3089466572738506, "grad_norm": 0.09410285204648972, "learning_rate": 2.657926038716704e-06, "loss": 0.6287585496902466, "step": 12496 }, { "epoch": 2.3091314339827465, "grad_norm": 0.09713611751794815, "learning_rate": 2.6565717864293784e-06, "loss": 0.5529349446296692, "step": 12497 }, { "epoch": 2.3093162106916423, "grad_norm": 0.09099281579256058, "learning_rate": 2.6552178263875172e-06, "loss": 0.4954475462436676, "step": 12498 }, { "epoch": 2.309500987400538, "grad_norm": 0.07115762680768967, "learning_rate": 2.6538641586450075e-06, "loss": 0.3478897213935852, "step": 12499 }, { "epoch": 2.309685764109434, "grad_norm": 0.08878853917121887, "learning_rate": 2.652510783255725e-06, "loss": 0.5340008735656738, "step": 12500 }, { "epoch": 2.309685764109434, "eval_loss": 0.5491987466812134, "eval_runtime": 156.6238, "eval_samples_per_second": 116.387, "eval_steps_per_second": 14.551, "step": 12500 } ], "logging_steps": 1, "max_steps": 16236, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.554647767097955e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }