diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,87734 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.309685764109434, + "eval_steps": 500, + "global_step": 12500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00018477670889584369, + "grad_norm": 0.24088887870311737, + "learning_rate": 0.0, + "loss": 2.3987152576446533, + "step": 1 + }, + { + "epoch": 0.00036955341779168737, + "grad_norm": 0.26547858119010925, + "learning_rate": 4.098360655737705e-08, + "loss": 2.774164915084839, + "step": 2 + }, + { + "epoch": 0.000554330126687531, + "grad_norm": 0.24729517102241516, + "learning_rate": 8.19672131147541e-08, + "loss": 2.3681344985961914, + "step": 3 + }, + { + "epoch": 0.0007391068355833747, + "grad_norm": 0.2252364456653595, + "learning_rate": 1.2295081967213116e-07, + "loss": 2.347365617752075, + "step": 4 + }, + { + "epoch": 0.0009238835444792183, + "grad_norm": 0.24495361745357513, + "learning_rate": 1.639344262295082e-07, + "loss": 2.149761199951172, + "step": 5 + }, + { + "epoch": 0.001108660253375062, + "grad_norm": 0.27319467067718506, + "learning_rate": 2.0491803278688524e-07, + "loss": 2.494878053665161, + "step": 6 + }, + { + "epoch": 0.0012934369622709058, + "grad_norm": 0.2361554354429245, + "learning_rate": 2.459016393442623e-07, + "loss": 2.5770766735076904, + "step": 7 + }, + { + "epoch": 0.0014782136711667495, + "grad_norm": 0.22133229672908783, + "learning_rate": 2.8688524590163937e-07, + "loss": 2.042956829071045, + "step": 8 + }, + { + "epoch": 0.0016629903800625932, + "grad_norm": 0.22019384801387787, + "learning_rate": 3.278688524590164e-07, + "loss": 2.4631993770599365, + "step": 9 + }, + { + "epoch": 0.0018477670889584367, + "grad_norm": 0.2294536978006363, + "learning_rate": 3.6885245901639347e-07, + "loss": 2.508697986602783, + "step": 10 + }, + { + "epoch": 0.0020325437978542804, + "grad_norm": 0.31911879777908325, + "learning_rate": 4.0983606557377047e-07, + "loss": 3.188476085662842, + "step": 11 + }, + { + "epoch": 0.002217320506750124, + "grad_norm": 0.2536254823207855, + "learning_rate": 4.508196721311476e-07, + "loss": 2.5951364040374756, + "step": 12 + }, + { + "epoch": 0.002402097215645968, + "grad_norm": 0.2997930347919464, + "learning_rate": 4.918032786885246e-07, + "loss": 2.9771904945373535, + "step": 13 + }, + { + "epoch": 0.0025868739245418115, + "grad_norm": 0.21778813004493713, + "learning_rate": 5.327868852459017e-07, + "loss": 2.44155216217041, + "step": 14 + }, + { + "epoch": 0.0027716506334376553, + "grad_norm": 0.3283197283744812, + "learning_rate": 5.737704918032787e-07, + "loss": 2.663536787033081, + "step": 15 + }, + { + "epoch": 0.002956427342333499, + "grad_norm": 0.25381243228912354, + "learning_rate": 6.147540983606558e-07, + "loss": 2.3754961490631104, + "step": 16 + }, + { + "epoch": 0.0031412040512293427, + "grad_norm": 0.33151787519454956, + "learning_rate": 6.557377049180328e-07, + "loss": 3.3958041667938232, + "step": 17 + }, + { + "epoch": 0.0033259807601251864, + "grad_norm": 0.3099863827228546, + "learning_rate": 6.967213114754098e-07, + "loss": 2.711893320083618, + "step": 18 + }, + { + "epoch": 0.00351075746902103, + "grad_norm": 0.2153325229883194, + "learning_rate": 7.377049180327869e-07, + "loss": 2.202052116394043, + "step": 19 + }, + { + "epoch": 0.0036955341779168734, + "grad_norm": 0.24825315177440643, + "learning_rate": 7.78688524590164e-07, + "loss": 2.1632490158081055, + "step": 20 + }, + { + "epoch": 0.003880310886812717, + "grad_norm": 0.27885496616363525, + "learning_rate": 8.196721311475409e-07, + "loss": 2.8247857093811035, + "step": 21 + }, + { + "epoch": 0.004065087595708561, + "grad_norm": 0.3095180094242096, + "learning_rate": 8.606557377049181e-07, + "loss": 2.9461660385131836, + "step": 22 + }, + { + "epoch": 0.004249864304604405, + "grad_norm": 0.2433432936668396, + "learning_rate": 9.016393442622952e-07, + "loss": 2.6200363636016846, + "step": 23 + }, + { + "epoch": 0.004434641013500248, + "grad_norm": 0.29434219002723694, + "learning_rate": 9.426229508196721e-07, + "loss": 2.5439352989196777, + "step": 24 + }, + { + "epoch": 0.004619417722396092, + "grad_norm": 0.2591114342212677, + "learning_rate": 9.836065573770493e-07, + "loss": 2.057875394821167, + "step": 25 + }, + { + "epoch": 0.004804194431291936, + "grad_norm": 0.2714301645755768, + "learning_rate": 1.0245901639344263e-06, + "loss": 2.739349365234375, + "step": 26 + }, + { + "epoch": 0.004988971140187779, + "grad_norm": 0.24641598761081696, + "learning_rate": 1.0655737704918034e-06, + "loss": 2.5039243698120117, + "step": 27 + }, + { + "epoch": 0.005173747849083623, + "grad_norm": 0.32515525817871094, + "learning_rate": 1.1065573770491804e-06, + "loss": 2.549954652786255, + "step": 28 + }, + { + "epoch": 0.005358524557979466, + "grad_norm": 0.3026675879955292, + "learning_rate": 1.1475409836065575e-06, + "loss": 2.81745982170105, + "step": 29 + }, + { + "epoch": 0.0055433012668753105, + "grad_norm": 0.2552269995212555, + "learning_rate": 1.1885245901639345e-06, + "loss": 2.684337615966797, + "step": 30 + }, + { + "epoch": 0.005728077975771154, + "grad_norm": 0.26018139719963074, + "learning_rate": 1.2295081967213116e-06, + "loss": 2.580073833465576, + "step": 31 + }, + { + "epoch": 0.005912854684666998, + "grad_norm": 0.25323620438575745, + "learning_rate": 1.2704918032786886e-06, + "loss": 2.737957000732422, + "step": 32 + }, + { + "epoch": 0.006097631393562841, + "grad_norm": 0.2035834640264511, + "learning_rate": 1.3114754098360657e-06, + "loss": 2.101224422454834, + "step": 33 + }, + { + "epoch": 0.006282408102458685, + "grad_norm": 0.2545967102050781, + "learning_rate": 1.352459016393443e-06, + "loss": 2.7195825576782227, + "step": 34 + }, + { + "epoch": 0.006467184811354529, + "grad_norm": 0.22219978272914886, + "learning_rate": 1.3934426229508196e-06, + "loss": 2.2762818336486816, + "step": 35 + }, + { + "epoch": 0.006651961520250373, + "grad_norm": 0.21565257012844086, + "learning_rate": 1.4344262295081968e-06, + "loss": 2.105567216873169, + "step": 36 + }, + { + "epoch": 0.006836738229146216, + "grad_norm": 0.21315789222717285, + "learning_rate": 1.4754098360655739e-06, + "loss": 2.218881607055664, + "step": 37 + }, + { + "epoch": 0.00702151493804206, + "grad_norm": 0.23275326192378998, + "learning_rate": 1.516393442622951e-06, + "loss": 2.6363465785980225, + "step": 38 + }, + { + "epoch": 0.0072062916469379035, + "grad_norm": 0.20184484124183655, + "learning_rate": 1.557377049180328e-06, + "loss": 2.000232458114624, + "step": 39 + }, + { + "epoch": 0.007391068355833747, + "grad_norm": 0.18502052128314972, + "learning_rate": 1.5983606557377053e-06, + "loss": 2.02783465385437, + "step": 40 + }, + { + "epoch": 0.007575845064729591, + "grad_norm": 0.23034077882766724, + "learning_rate": 1.6393442622950819e-06, + "loss": 2.4156503677368164, + "step": 41 + }, + { + "epoch": 0.007760621773625434, + "grad_norm": 0.1947338730096817, + "learning_rate": 1.6803278688524592e-06, + "loss": 2.017721652984619, + "step": 42 + }, + { + "epoch": 0.007945398482521277, + "grad_norm": 0.2304108589887619, + "learning_rate": 1.7213114754098362e-06, + "loss": 2.479938268661499, + "step": 43 + }, + { + "epoch": 0.008130175191417122, + "grad_norm": 0.20530247688293457, + "learning_rate": 1.7622950819672133e-06, + "loss": 2.1011605262756348, + "step": 44 + }, + { + "epoch": 0.008314951900312966, + "grad_norm": 0.2010030895471573, + "learning_rate": 1.8032786885245903e-06, + "loss": 2.2112722396850586, + "step": 45 + }, + { + "epoch": 0.00849972860920881, + "grad_norm": 0.23062516748905182, + "learning_rate": 1.8442622950819674e-06, + "loss": 2.625047445297241, + "step": 46 + }, + { + "epoch": 0.008684505318104652, + "grad_norm": 0.1795029640197754, + "learning_rate": 1.8852459016393442e-06, + "loss": 2.0575997829437256, + "step": 47 + }, + { + "epoch": 0.008869282027000496, + "grad_norm": 0.19284705817699432, + "learning_rate": 1.9262295081967215e-06, + "loss": 2.186389923095703, + "step": 48 + }, + { + "epoch": 0.00905405873589634, + "grad_norm": 0.18980354070663452, + "learning_rate": 1.9672131147540985e-06, + "loss": 2.1369142532348633, + "step": 49 + }, + { + "epoch": 0.009238835444792185, + "grad_norm": 0.20028269290924072, + "learning_rate": 2.0081967213114756e-06, + "loss": 2.149282217025757, + "step": 50 + }, + { + "epoch": 0.009423612153688027, + "grad_norm": 0.1844315528869629, + "learning_rate": 2.0491803278688526e-06, + "loss": 2.1714837551116943, + "step": 51 + }, + { + "epoch": 0.009608388862583871, + "grad_norm": 0.19009949266910553, + "learning_rate": 2.0901639344262297e-06, + "loss": 2.0276408195495605, + "step": 52 + }, + { + "epoch": 0.009793165571479715, + "grad_norm": 0.1857149451971054, + "learning_rate": 2.1311475409836067e-06, + "loss": 2.414680004119873, + "step": 53 + }, + { + "epoch": 0.009977942280375558, + "grad_norm": 0.14500652253627777, + "learning_rate": 2.1721311475409838e-06, + "loss": 1.8302123546600342, + "step": 54 + }, + { + "epoch": 0.010162718989271402, + "grad_norm": 0.1605122685432434, + "learning_rate": 2.213114754098361e-06, + "loss": 2.0301051139831543, + "step": 55 + }, + { + "epoch": 0.010347495698167246, + "grad_norm": 0.13292397558689117, + "learning_rate": 2.254098360655738e-06, + "loss": 1.6478054523468018, + "step": 56 + }, + { + "epoch": 0.01053227240706309, + "grad_norm": 0.19711221754550934, + "learning_rate": 2.295081967213115e-06, + "loss": 2.2585537433624268, + "step": 57 + }, + { + "epoch": 0.010717049115958933, + "grad_norm": 0.1681194305419922, + "learning_rate": 2.336065573770492e-06, + "loss": 1.8417537212371826, + "step": 58 + }, + { + "epoch": 0.010901825824854777, + "grad_norm": 0.15966543555259705, + "learning_rate": 2.377049180327869e-06, + "loss": 1.9968725442886353, + "step": 59 + }, + { + "epoch": 0.011086602533750621, + "grad_norm": 0.15611667931079865, + "learning_rate": 2.418032786885246e-06, + "loss": 1.7798278331756592, + "step": 60 + }, + { + "epoch": 0.011271379242646465, + "grad_norm": 0.1719726324081421, + "learning_rate": 2.459016393442623e-06, + "loss": 2.1844446659088135, + "step": 61 + }, + { + "epoch": 0.011456155951542308, + "grad_norm": 0.13560648262500763, + "learning_rate": 2.5e-06, + "loss": 1.4112403392791748, + "step": 62 + }, + { + "epoch": 0.011640932660438152, + "grad_norm": 0.10778555274009705, + "learning_rate": 2.5409836065573773e-06, + "loss": 1.4861712455749512, + "step": 63 + }, + { + "epoch": 0.011825709369333996, + "grad_norm": 0.12593482434749603, + "learning_rate": 2.5819672131147543e-06, + "loss": 1.7662166357040405, + "step": 64 + }, + { + "epoch": 0.012010486078229838, + "grad_norm": 0.0996285229921341, + "learning_rate": 2.6229508196721314e-06, + "loss": 1.5829087495803833, + "step": 65 + }, + { + "epoch": 0.012195262787125682, + "grad_norm": 0.12871770560741425, + "learning_rate": 2.6639344262295084e-06, + "loss": 1.5366804599761963, + "step": 66 + }, + { + "epoch": 0.012380039496021527, + "grad_norm": 0.11756619065999985, + "learning_rate": 2.704918032786886e-06, + "loss": 1.3947206735610962, + "step": 67 + }, + { + "epoch": 0.01256481620491737, + "grad_norm": 0.15036584436893463, + "learning_rate": 2.745901639344263e-06, + "loss": 1.607935905456543, + "step": 68 + }, + { + "epoch": 0.012749592913813213, + "grad_norm": 0.09538834542036057, + "learning_rate": 2.786885245901639e-06, + "loss": 1.2980877161026, + "step": 69 + }, + { + "epoch": 0.012934369622709057, + "grad_norm": 0.12562216818332672, + "learning_rate": 2.8278688524590166e-06, + "loss": 1.7359843254089355, + "step": 70 + }, + { + "epoch": 0.013119146331604901, + "grad_norm": 0.09023502469062805, + "learning_rate": 2.8688524590163937e-06, + "loss": 1.2565981149673462, + "step": 71 + }, + { + "epoch": 0.013303923040500746, + "grad_norm": 0.11920010298490524, + "learning_rate": 2.9098360655737707e-06, + "loss": 1.6668967008590698, + "step": 72 + }, + { + "epoch": 0.013488699749396588, + "grad_norm": 0.10211700946092606, + "learning_rate": 2.9508196721311478e-06, + "loss": 1.5361316204071045, + "step": 73 + }, + { + "epoch": 0.013673476458292432, + "grad_norm": 0.1129099503159523, + "learning_rate": 2.991803278688525e-06, + "loss": 1.5233663320541382, + "step": 74 + }, + { + "epoch": 0.013858253167188276, + "grad_norm": 0.09841630607843399, + "learning_rate": 3.032786885245902e-06, + "loss": 1.3280178308486938, + "step": 75 + }, + { + "epoch": 0.01404302987608412, + "grad_norm": 0.1030346006155014, + "learning_rate": 3.073770491803279e-06, + "loss": 1.3855637311935425, + "step": 76 + }, + { + "epoch": 0.014227806584979963, + "grad_norm": 0.1166791021823883, + "learning_rate": 3.114754098360656e-06, + "loss": 1.692766547203064, + "step": 77 + }, + { + "epoch": 0.014412583293875807, + "grad_norm": 0.1157117560505867, + "learning_rate": 3.155737704918033e-06, + "loss": 1.5444836616516113, + "step": 78 + }, + { + "epoch": 0.014597360002771651, + "grad_norm": 0.1203937903046608, + "learning_rate": 3.1967213114754105e-06, + "loss": 1.7095307111740112, + "step": 79 + }, + { + "epoch": 0.014782136711667494, + "grad_norm": 0.09563681483268738, + "learning_rate": 3.2377049180327876e-06, + "loss": 1.3926582336425781, + "step": 80 + }, + { + "epoch": 0.014966913420563338, + "grad_norm": 0.13222035765647888, + "learning_rate": 3.2786885245901638e-06, + "loss": 1.6481196880340576, + "step": 81 + }, + { + "epoch": 0.015151690129459182, + "grad_norm": 0.12444092333316803, + "learning_rate": 3.3196721311475413e-06, + "loss": 1.6188586950302124, + "step": 82 + }, + { + "epoch": 0.015336466838355026, + "grad_norm": 0.1225326806306839, + "learning_rate": 3.3606557377049183e-06, + "loss": 1.6079766750335693, + "step": 83 + }, + { + "epoch": 0.015521243547250868, + "grad_norm": 0.07850661873817444, + "learning_rate": 3.4016393442622954e-06, + "loss": 1.0474170446395874, + "step": 84 + }, + { + "epoch": 0.015706020256146713, + "grad_norm": 0.10204868763685226, + "learning_rate": 3.4426229508196724e-06, + "loss": 1.6754392385482788, + "step": 85 + }, + { + "epoch": 0.015890796965042555, + "grad_norm": 0.12254143506288528, + "learning_rate": 3.4836065573770495e-06, + "loss": 1.6609466075897217, + "step": 86 + }, + { + "epoch": 0.0160755736739384, + "grad_norm": 0.10189526528120041, + "learning_rate": 3.5245901639344265e-06, + "loss": 1.5539140701293945, + "step": 87 + }, + { + "epoch": 0.016260350382834243, + "grad_norm": 0.09892205893993378, + "learning_rate": 3.5655737704918036e-06, + "loss": 1.3632971048355103, + "step": 88 + }, + { + "epoch": 0.016445127091730086, + "grad_norm": 0.11935164034366608, + "learning_rate": 3.6065573770491806e-06, + "loss": 1.5281620025634766, + "step": 89 + }, + { + "epoch": 0.01662990380062593, + "grad_norm": 0.10507043451070786, + "learning_rate": 3.6475409836065577e-06, + "loss": 1.451995849609375, + "step": 90 + }, + { + "epoch": 0.016814680509521774, + "grad_norm": 0.12459581345319748, + "learning_rate": 3.6885245901639347e-06, + "loss": 1.6090208292007446, + "step": 91 + }, + { + "epoch": 0.01699945721841762, + "grad_norm": 0.1009344607591629, + "learning_rate": 3.729508196721312e-06, + "loss": 1.2886296510696411, + "step": 92 + }, + { + "epoch": 0.017184233927313462, + "grad_norm": 0.12125498056411743, + "learning_rate": 3.7704918032786884e-06, + "loss": 1.5595990419387817, + "step": 93 + }, + { + "epoch": 0.017369010636209305, + "grad_norm": 0.08220856636762619, + "learning_rate": 3.811475409836066e-06, + "loss": 1.2803837060928345, + "step": 94 + }, + { + "epoch": 0.01755378734510515, + "grad_norm": 0.09604591131210327, + "learning_rate": 3.852459016393443e-06, + "loss": 1.0359008312225342, + "step": 95 + }, + { + "epoch": 0.017738564054000993, + "grad_norm": 0.0831838995218277, + "learning_rate": 3.8934426229508196e-06, + "loss": 1.4928193092346191, + "step": 96 + }, + { + "epoch": 0.017923340762896835, + "grad_norm": 0.10466645658016205, + "learning_rate": 3.934426229508197e-06, + "loss": 1.3826165199279785, + "step": 97 + }, + { + "epoch": 0.01810811747179268, + "grad_norm": 0.0696515217423439, + "learning_rate": 3.975409836065574e-06, + "loss": 1.0134645700454712, + "step": 98 + }, + { + "epoch": 0.018292894180688524, + "grad_norm": 0.11665759235620499, + "learning_rate": 4.016393442622951e-06, + "loss": 1.7774734497070312, + "step": 99 + }, + { + "epoch": 0.01847767088958437, + "grad_norm": 0.09961281716823578, + "learning_rate": 4.057377049180329e-06, + "loss": 1.40458345413208, + "step": 100 + }, + { + "epoch": 0.018662447598480212, + "grad_norm": 0.12503457069396973, + "learning_rate": 4.098360655737705e-06, + "loss": 1.4400181770324707, + "step": 101 + }, + { + "epoch": 0.018847224307376054, + "grad_norm": 0.0715487003326416, + "learning_rate": 4.139344262295083e-06, + "loss": 1.164671778678894, + "step": 102 + }, + { + "epoch": 0.0190320010162719, + "grad_norm": 0.11996988952159882, + "learning_rate": 4.180327868852459e-06, + "loss": 1.4990315437316895, + "step": 103 + }, + { + "epoch": 0.019216777725167743, + "grad_norm": 0.10172359645366669, + "learning_rate": 4.221311475409837e-06, + "loss": 1.3712670803070068, + "step": 104 + }, + { + "epoch": 0.019401554434063585, + "grad_norm": 0.11445247381925583, + "learning_rate": 4.2622950819672135e-06, + "loss": 1.4764546155929565, + "step": 105 + }, + { + "epoch": 0.01958633114295943, + "grad_norm": 0.14201143383979797, + "learning_rate": 4.30327868852459e-06, + "loss": 1.818518042564392, + "step": 106 + }, + { + "epoch": 0.019771107851855273, + "grad_norm": 0.09214181452989578, + "learning_rate": 4.3442622950819676e-06, + "loss": 1.3146964311599731, + "step": 107 + }, + { + "epoch": 0.019955884560751116, + "grad_norm": 0.1036999523639679, + "learning_rate": 4.385245901639344e-06, + "loss": 1.4357385635375977, + "step": 108 + }, + { + "epoch": 0.02014066126964696, + "grad_norm": 0.10278178751468658, + "learning_rate": 4.426229508196722e-06, + "loss": 1.4627935886383057, + "step": 109 + }, + { + "epoch": 0.020325437978542804, + "grad_norm": 0.08426320552825928, + "learning_rate": 4.467213114754098e-06, + "loss": 1.5069490671157837, + "step": 110 + }, + { + "epoch": 0.02051021468743865, + "grad_norm": 0.07356536388397217, + "learning_rate": 4.508196721311476e-06, + "loss": 0.9656306505203247, + "step": 111 + }, + { + "epoch": 0.020694991396334492, + "grad_norm": 0.08690712600946426, + "learning_rate": 4.549180327868853e-06, + "loss": 1.2329204082489014, + "step": 112 + }, + { + "epoch": 0.020879768105230335, + "grad_norm": 0.08951961249113083, + "learning_rate": 4.59016393442623e-06, + "loss": 1.258447289466858, + "step": 113 + }, + { + "epoch": 0.02106454481412618, + "grad_norm": 0.09533683955669403, + "learning_rate": 4.631147540983607e-06, + "loss": 1.3724256753921509, + "step": 114 + }, + { + "epoch": 0.021249321523022023, + "grad_norm": 0.09054508060216904, + "learning_rate": 4.672131147540984e-06, + "loss": 1.235846996307373, + "step": 115 + }, + { + "epoch": 0.021434098231917866, + "grad_norm": 0.09722615778446198, + "learning_rate": 4.7131147540983615e-06, + "loss": 1.3113802671432495, + "step": 116 + }, + { + "epoch": 0.02161887494081371, + "grad_norm": 0.08708129078149796, + "learning_rate": 4.754098360655738e-06, + "loss": 1.0961401462554932, + "step": 117 + }, + { + "epoch": 0.021803651649709554, + "grad_norm": 0.08968407660722733, + "learning_rate": 4.795081967213115e-06, + "loss": 1.1457524299621582, + "step": 118 + }, + { + "epoch": 0.021988428358605396, + "grad_norm": 0.09171068668365479, + "learning_rate": 4.836065573770492e-06, + "loss": 1.076693058013916, + "step": 119 + }, + { + "epoch": 0.022173205067501242, + "grad_norm": 0.09295544773340225, + "learning_rate": 4.877049180327869e-06, + "loss": 1.3223167657852173, + "step": 120 + }, + { + "epoch": 0.022357981776397085, + "grad_norm": 0.0667370930314064, + "learning_rate": 4.918032786885246e-06, + "loss": 0.8422390222549438, + "step": 121 + }, + { + "epoch": 0.02254275848529293, + "grad_norm": 0.08461110293865204, + "learning_rate": 4.959016393442623e-06, + "loss": 1.0604937076568604, + "step": 122 + }, + { + "epoch": 0.022727535194188773, + "grad_norm": 0.11348158866167068, + "learning_rate": 5e-06, + "loss": 1.3220497369766235, + "step": 123 + }, + { + "epoch": 0.022912311903084615, + "grad_norm": 0.08554375171661377, + "learning_rate": 5.040983606557377e-06, + "loss": 1.2320774793624878, + "step": 124 + }, + { + "epoch": 0.02309708861198046, + "grad_norm": 0.09483008086681366, + "learning_rate": 5.0819672131147545e-06, + "loss": 1.144835352897644, + "step": 125 + }, + { + "epoch": 0.023281865320876304, + "grad_norm": 0.12047784775495529, + "learning_rate": 5.122950819672131e-06, + "loss": 1.6951477527618408, + "step": 126 + }, + { + "epoch": 0.023466642029772146, + "grad_norm": 0.10329974442720413, + "learning_rate": 5.163934426229509e-06, + "loss": 1.4667521715164185, + "step": 127 + }, + { + "epoch": 0.023651418738667992, + "grad_norm": 0.09008090943098068, + "learning_rate": 5.204918032786885e-06, + "loss": 1.2631990909576416, + "step": 128 + }, + { + "epoch": 0.023836195447563834, + "grad_norm": 0.08008470386266708, + "learning_rate": 5.245901639344263e-06, + "loss": 1.1619493961334229, + "step": 129 + }, + { + "epoch": 0.024020972156459677, + "grad_norm": 0.1004214957356453, + "learning_rate": 5.286885245901639e-06, + "loss": 1.3743982315063477, + "step": 130 + }, + { + "epoch": 0.024205748865355523, + "grad_norm": 0.08813057094812393, + "learning_rate": 5.327868852459017e-06, + "loss": 0.9727039337158203, + "step": 131 + }, + { + "epoch": 0.024390525574251365, + "grad_norm": 0.095633365213871, + "learning_rate": 5.3688524590163935e-06, + "loss": 1.322685718536377, + "step": 132 + }, + { + "epoch": 0.02457530228314721, + "grad_norm": 0.0931473970413208, + "learning_rate": 5.409836065573772e-06, + "loss": 1.4433943033218384, + "step": 133 + }, + { + "epoch": 0.024760078992043053, + "grad_norm": 0.08528053015470505, + "learning_rate": 5.4508196721311476e-06, + "loss": 1.325724482536316, + "step": 134 + }, + { + "epoch": 0.024944855700938896, + "grad_norm": 0.07085525244474411, + "learning_rate": 5.491803278688526e-06, + "loss": 0.9489492177963257, + "step": 135 + }, + { + "epoch": 0.02512963240983474, + "grad_norm": 0.08872570842504501, + "learning_rate": 5.5327868852459025e-06, + "loss": 1.2950146198272705, + "step": 136 + }, + { + "epoch": 0.025314409118730584, + "grad_norm": 0.07972602546215057, + "learning_rate": 5.573770491803278e-06, + "loss": 1.0334540605545044, + "step": 137 + }, + { + "epoch": 0.025499185827626426, + "grad_norm": 0.08055080473423004, + "learning_rate": 5.614754098360657e-06, + "loss": 0.9743032455444336, + "step": 138 + }, + { + "epoch": 0.025683962536522272, + "grad_norm": 0.07908285409212112, + "learning_rate": 5.655737704918033e-06, + "loss": 1.0162171125411987, + "step": 139 + }, + { + "epoch": 0.025868739245418115, + "grad_norm": 0.10497123003005981, + "learning_rate": 5.696721311475411e-06, + "loss": 1.3711857795715332, + "step": 140 + }, + { + "epoch": 0.026053515954313957, + "grad_norm": 0.09259134531021118, + "learning_rate": 5.737704918032787e-06, + "loss": 1.1905478239059448, + "step": 141 + }, + { + "epoch": 0.026238292663209803, + "grad_norm": 0.09423007816076279, + "learning_rate": 5.778688524590165e-06, + "loss": 1.2282381057739258, + "step": 142 + }, + { + "epoch": 0.026423069372105645, + "grad_norm": 0.08443727344274521, + "learning_rate": 5.8196721311475415e-06, + "loss": 1.3001261949539185, + "step": 143 + }, + { + "epoch": 0.02660784608100149, + "grad_norm": 0.0933670923113823, + "learning_rate": 5.860655737704919e-06, + "loss": 1.2991963624954224, + "step": 144 + }, + { + "epoch": 0.026792622789897334, + "grad_norm": 0.09587407857179642, + "learning_rate": 5.9016393442622956e-06, + "loss": 1.064162015914917, + "step": 145 + }, + { + "epoch": 0.026977399498793176, + "grad_norm": 0.08887308090925217, + "learning_rate": 5.942622950819673e-06, + "loss": 1.1303232908248901, + "step": 146 + }, + { + "epoch": 0.027162176207689022, + "grad_norm": 0.0891450047492981, + "learning_rate": 5.98360655737705e-06, + "loss": 1.1807233095169067, + "step": 147 + }, + { + "epoch": 0.027346952916584864, + "grad_norm": 0.09064659476280212, + "learning_rate": 6.024590163934426e-06, + "loss": 1.0990560054779053, + "step": 148 + }, + { + "epoch": 0.027531729625480707, + "grad_norm": 0.10074954479932785, + "learning_rate": 6.065573770491804e-06, + "loss": 1.231015920639038, + "step": 149 + }, + { + "epoch": 0.027716506334376553, + "grad_norm": 0.07731477916240692, + "learning_rate": 6.10655737704918e-06, + "loss": 1.0035730600357056, + "step": 150 + }, + { + "epoch": 0.027901283043272395, + "grad_norm": 0.08103454858064651, + "learning_rate": 6.147540983606558e-06, + "loss": 1.036755919456482, + "step": 151 + }, + { + "epoch": 0.02808605975216824, + "grad_norm": 0.10394132882356644, + "learning_rate": 6.1885245901639345e-06, + "loss": 1.3168418407440186, + "step": 152 + }, + { + "epoch": 0.028270836461064083, + "grad_norm": 0.1187523603439331, + "learning_rate": 6.229508196721312e-06, + "loss": 1.3754808902740479, + "step": 153 + }, + { + "epoch": 0.028455613169959926, + "grad_norm": 0.09438347071409225, + "learning_rate": 6.270491803278689e-06, + "loss": 1.065092921257019, + "step": 154 + }, + { + "epoch": 0.02864038987885577, + "grad_norm": 0.08473348617553711, + "learning_rate": 6.311475409836066e-06, + "loss": 1.0317045450210571, + "step": 155 + }, + { + "epoch": 0.028825166587751614, + "grad_norm": 0.10343526303768158, + "learning_rate": 6.352459016393443e-06, + "loss": 1.0830442905426025, + "step": 156 + }, + { + "epoch": 0.029009943296647456, + "grad_norm": 0.11212706565856934, + "learning_rate": 6.393442622950821e-06, + "loss": 1.4527738094329834, + "step": 157 + }, + { + "epoch": 0.029194720005543302, + "grad_norm": 0.08572731912136078, + "learning_rate": 6.434426229508197e-06, + "loss": 1.1567916870117188, + "step": 158 + }, + { + "epoch": 0.029379496714439145, + "grad_norm": 0.05398239567875862, + "learning_rate": 6.475409836065575e-06, + "loss": 0.6827751398086548, + "step": 159 + }, + { + "epoch": 0.029564273423334987, + "grad_norm": 0.11699340492486954, + "learning_rate": 6.516393442622952e-06, + "loss": 1.2860791683197021, + "step": 160 + }, + { + "epoch": 0.029749050132230833, + "grad_norm": 0.07042679190635681, + "learning_rate": 6.5573770491803276e-06, + "loss": 0.7918217182159424, + "step": 161 + }, + { + "epoch": 0.029933826841126675, + "grad_norm": 0.08320360630750656, + "learning_rate": 6.598360655737706e-06, + "loss": 1.0839710235595703, + "step": 162 + }, + { + "epoch": 0.03011860355002252, + "grad_norm": 0.0856919214129448, + "learning_rate": 6.6393442622950825e-06, + "loss": 0.9883152842521667, + "step": 163 + }, + { + "epoch": 0.030303380258918364, + "grad_norm": 0.08802906423807144, + "learning_rate": 6.68032786885246e-06, + "loss": 1.0608097314834595, + "step": 164 + }, + { + "epoch": 0.030488156967814206, + "grad_norm": 0.08420740067958832, + "learning_rate": 6.721311475409837e-06, + "loss": 0.9704433083534241, + "step": 165 + }, + { + "epoch": 0.030672933676710052, + "grad_norm": 0.08119208365678787, + "learning_rate": 6.762295081967214e-06, + "loss": 0.8476821184158325, + "step": 166 + }, + { + "epoch": 0.030857710385605894, + "grad_norm": 0.08200537413358688, + "learning_rate": 6.803278688524591e-06, + "loss": 0.9476600289344788, + "step": 167 + }, + { + "epoch": 0.031042487094501737, + "grad_norm": 0.08968322724103928, + "learning_rate": 6.844262295081968e-06, + "loss": 1.2163679599761963, + "step": 168 + }, + { + "epoch": 0.031227263803397583, + "grad_norm": 0.08660093694925308, + "learning_rate": 6.885245901639345e-06, + "loss": 1.174856424331665, + "step": 169 + }, + { + "epoch": 0.031412040512293425, + "grad_norm": 0.09091655910015106, + "learning_rate": 6.926229508196722e-06, + "loss": 0.9161083102226257, + "step": 170 + }, + { + "epoch": 0.03159681722118927, + "grad_norm": 0.10828382521867752, + "learning_rate": 6.967213114754099e-06, + "loss": 1.1203913688659668, + "step": 171 + }, + { + "epoch": 0.03178159393008511, + "grad_norm": 0.07517366856336594, + "learning_rate": 7.0081967213114756e-06, + "loss": 1.1747320890426636, + "step": 172 + }, + { + "epoch": 0.03196637063898096, + "grad_norm": 0.07769443094730377, + "learning_rate": 7.049180327868853e-06, + "loss": 0.934011697769165, + "step": 173 + }, + { + "epoch": 0.0321511473478768, + "grad_norm": 0.104559026658535, + "learning_rate": 7.09016393442623e-06, + "loss": 1.1921355724334717, + "step": 174 + }, + { + "epoch": 0.032335924056772644, + "grad_norm": 0.08976680040359497, + "learning_rate": 7.131147540983607e-06, + "loss": 1.234793782234192, + "step": 175 + }, + { + "epoch": 0.03252070076566849, + "grad_norm": 0.09020117670297623, + "learning_rate": 7.172131147540984e-06, + "loss": 1.1587923765182495, + "step": 176 + }, + { + "epoch": 0.03270547747456433, + "grad_norm": 0.08177059888839722, + "learning_rate": 7.213114754098361e-06, + "loss": 1.0700960159301758, + "step": 177 + }, + { + "epoch": 0.03289025418346017, + "grad_norm": 0.0836954414844513, + "learning_rate": 7.254098360655738e-06, + "loss": 0.9924188852310181, + "step": 178 + }, + { + "epoch": 0.03307503089235602, + "grad_norm": 0.10337381064891815, + "learning_rate": 7.295081967213115e-06, + "loss": 1.2393605709075928, + "step": 179 + }, + { + "epoch": 0.03325980760125186, + "grad_norm": 0.08602118492126465, + "learning_rate": 7.336065573770492e-06, + "loss": 1.0827878713607788, + "step": 180 + }, + { + "epoch": 0.033444584310147706, + "grad_norm": 0.09046392887830734, + "learning_rate": 7.3770491803278695e-06, + "loss": 1.1079829931259155, + "step": 181 + }, + { + "epoch": 0.03362936101904355, + "grad_norm": 0.10092524439096451, + "learning_rate": 7.418032786885246e-06, + "loss": 1.2582985162734985, + "step": 182 + }, + { + "epoch": 0.03381413772793939, + "grad_norm": 0.0789152979850769, + "learning_rate": 7.459016393442624e-06, + "loss": 0.8313584923744202, + "step": 183 + }, + { + "epoch": 0.03399891443683524, + "grad_norm": 0.09583103656768799, + "learning_rate": 7.500000000000001e-06, + "loss": 1.1705000400543213, + "step": 184 + }, + { + "epoch": 0.03418369114573108, + "grad_norm": 0.09844133257865906, + "learning_rate": 7.540983606557377e-06, + "loss": 1.3607006072998047, + "step": 185 + }, + { + "epoch": 0.034368467854626925, + "grad_norm": 0.08308165520429611, + "learning_rate": 7.581967213114755e-06, + "loss": 1.0205916166305542, + "step": 186 + }, + { + "epoch": 0.03455324456352277, + "grad_norm": 0.1008264496922493, + "learning_rate": 7.622950819672132e-06, + "loss": 1.3429181575775146, + "step": 187 + }, + { + "epoch": 0.03473802127241861, + "grad_norm": 0.09739381819963455, + "learning_rate": 7.66393442622951e-06, + "loss": 1.2232327461242676, + "step": 188 + }, + { + "epoch": 0.03492279798131446, + "grad_norm": 0.0783327966928482, + "learning_rate": 7.704918032786886e-06, + "loss": 0.9063292741775513, + "step": 189 + }, + { + "epoch": 0.0351075746902103, + "grad_norm": 0.07952532172203064, + "learning_rate": 7.745901639344263e-06, + "loss": 0.9290217161178589, + "step": 190 + }, + { + "epoch": 0.035292351399106144, + "grad_norm": 0.1293192207813263, + "learning_rate": 7.786885245901639e-06, + "loss": 1.3529701232910156, + "step": 191 + }, + { + "epoch": 0.035477128108001986, + "grad_norm": 0.09185554087162018, + "learning_rate": 7.827868852459017e-06, + "loss": 1.1313979625701904, + "step": 192 + }, + { + "epoch": 0.03566190481689783, + "grad_norm": 0.10453785955905914, + "learning_rate": 7.868852459016394e-06, + "loss": 1.2551578283309937, + "step": 193 + }, + { + "epoch": 0.03584668152579367, + "grad_norm": 0.11565221101045609, + "learning_rate": 7.909836065573772e-06, + "loss": 1.2458629608154297, + "step": 194 + }, + { + "epoch": 0.03603145823468952, + "grad_norm": 0.07788601517677307, + "learning_rate": 7.950819672131147e-06, + "loss": 0.9804476499557495, + "step": 195 + }, + { + "epoch": 0.03621623494358536, + "grad_norm": 0.10067404806613922, + "learning_rate": 7.991803278688526e-06, + "loss": 1.265161156654358, + "step": 196 + }, + { + "epoch": 0.036401011652481205, + "grad_norm": 0.09044502675533295, + "learning_rate": 8.032786885245902e-06, + "loss": 1.0315583944320679, + "step": 197 + }, + { + "epoch": 0.03658578836137705, + "grad_norm": 0.11177962273359299, + "learning_rate": 8.073770491803279e-06, + "loss": 1.254334807395935, + "step": 198 + }, + { + "epoch": 0.03677056507027289, + "grad_norm": 0.0948624461889267, + "learning_rate": 8.114754098360657e-06, + "loss": 1.1573119163513184, + "step": 199 + }, + { + "epoch": 0.03695534177916874, + "grad_norm": 0.09116200357675552, + "learning_rate": 8.155737704918034e-06, + "loss": 1.2316014766693115, + "step": 200 + }, + { + "epoch": 0.03714011848806458, + "grad_norm": 0.09280700981616974, + "learning_rate": 8.19672131147541e-06, + "loss": 1.0697439908981323, + "step": 201 + }, + { + "epoch": 0.037324895196960424, + "grad_norm": 0.1117481216788292, + "learning_rate": 8.237704918032787e-06, + "loss": 1.4836028814315796, + "step": 202 + }, + { + "epoch": 0.037509671905856266, + "grad_norm": 0.0852770209312439, + "learning_rate": 8.278688524590165e-06, + "loss": 0.7765272855758667, + "step": 203 + }, + { + "epoch": 0.03769444861475211, + "grad_norm": 0.10650324076414108, + "learning_rate": 8.319672131147542e-06, + "loss": 1.1163196563720703, + "step": 204 + }, + { + "epoch": 0.03787922532364795, + "grad_norm": 0.08654922991991043, + "learning_rate": 8.360655737704919e-06, + "loss": 1.166925311088562, + "step": 205 + }, + { + "epoch": 0.0380640020325438, + "grad_norm": 0.10339689999818802, + "learning_rate": 8.401639344262295e-06, + "loss": 1.0431673526763916, + "step": 206 + }, + { + "epoch": 0.03824877874143964, + "grad_norm": 0.076447993516922, + "learning_rate": 8.442622950819674e-06, + "loss": 1.0487338304519653, + "step": 207 + }, + { + "epoch": 0.038433555450335485, + "grad_norm": 0.09971610456705093, + "learning_rate": 8.48360655737705e-06, + "loss": 1.163187861442566, + "step": 208 + }, + { + "epoch": 0.03861833215923133, + "grad_norm": 0.07643406838178635, + "learning_rate": 8.524590163934427e-06, + "loss": 1.0120989084243774, + "step": 209 + }, + { + "epoch": 0.03880310886812717, + "grad_norm": 0.1098799780011177, + "learning_rate": 8.565573770491804e-06, + "loss": 1.110826849937439, + "step": 210 + }, + { + "epoch": 0.03898788557702302, + "grad_norm": 0.08974554389715195, + "learning_rate": 8.60655737704918e-06, + "loss": 0.9144413471221924, + "step": 211 + }, + { + "epoch": 0.03917266228591886, + "grad_norm": 0.09564737975597382, + "learning_rate": 8.647540983606559e-06, + "loss": 0.9425402283668518, + "step": 212 + }, + { + "epoch": 0.039357438994814704, + "grad_norm": 0.11915887147188187, + "learning_rate": 8.688524590163935e-06, + "loss": 0.995085597038269, + "step": 213 + }, + { + "epoch": 0.03954221570371055, + "grad_norm": 0.09983936697244644, + "learning_rate": 8.729508196721312e-06, + "loss": 1.0563819408416748, + "step": 214 + }, + { + "epoch": 0.03972699241260639, + "grad_norm": 0.08968392759561539, + "learning_rate": 8.770491803278688e-06, + "loss": 1.0094102621078491, + "step": 215 + }, + { + "epoch": 0.03991176912150223, + "grad_norm": 0.07635963708162308, + "learning_rate": 8.811475409836067e-06, + "loss": 0.7993847727775574, + "step": 216 + }, + { + "epoch": 0.04009654583039808, + "grad_norm": 0.10572884231805801, + "learning_rate": 8.852459016393443e-06, + "loss": 1.058998465538025, + "step": 217 + }, + { + "epoch": 0.04028132253929392, + "grad_norm": 0.09346319735050201, + "learning_rate": 8.893442622950822e-06, + "loss": 1.100991129875183, + "step": 218 + }, + { + "epoch": 0.040466099248189766, + "grad_norm": 0.10200077295303345, + "learning_rate": 8.934426229508197e-06, + "loss": 0.9690842628479004, + "step": 219 + }, + { + "epoch": 0.04065087595708561, + "grad_norm": 0.0913805440068245, + "learning_rate": 8.975409836065575e-06, + "loss": 0.8409448862075806, + "step": 220 + }, + { + "epoch": 0.04083565266598145, + "grad_norm": 0.0992816761136055, + "learning_rate": 9.016393442622952e-06, + "loss": 1.1589744091033936, + "step": 221 + }, + { + "epoch": 0.0410204293748773, + "grad_norm": 0.07575351744890213, + "learning_rate": 9.057377049180328e-06, + "loss": 0.8102637529373169, + "step": 222 + }, + { + "epoch": 0.04120520608377314, + "grad_norm": 0.11256787180900574, + "learning_rate": 9.098360655737707e-06, + "loss": 1.1053478717803955, + "step": 223 + }, + { + "epoch": 0.041389982792668985, + "grad_norm": 0.08245761692523956, + "learning_rate": 9.139344262295083e-06, + "loss": 0.6602897644042969, + "step": 224 + }, + { + "epoch": 0.04157475950156483, + "grad_norm": 0.08066370338201523, + "learning_rate": 9.18032786885246e-06, + "loss": 0.7633033990859985, + "step": 225 + }, + { + "epoch": 0.04175953621046067, + "grad_norm": 0.08782163262367249, + "learning_rate": 9.221311475409836e-06, + "loss": 0.929959237575531, + "step": 226 + }, + { + "epoch": 0.04194431291935651, + "grad_norm": 0.09314026683568954, + "learning_rate": 9.262295081967215e-06, + "loss": 1.0352487564086914, + "step": 227 + }, + { + "epoch": 0.04212908962825236, + "grad_norm": 0.10643976181745529, + "learning_rate": 9.303278688524591e-06, + "loss": 1.229055404663086, + "step": 228 + }, + { + "epoch": 0.042313866337148204, + "grad_norm": 0.07235404849052429, + "learning_rate": 9.344262295081968e-06, + "loss": 0.8788920640945435, + "step": 229 + }, + { + "epoch": 0.042498643046044046, + "grad_norm": 0.09235186874866486, + "learning_rate": 9.385245901639345e-06, + "loss": 1.0391452312469482, + "step": 230 + }, + { + "epoch": 0.04268341975493989, + "grad_norm": 0.13859771192073822, + "learning_rate": 9.426229508196723e-06, + "loss": 1.4094034433364868, + "step": 231 + }, + { + "epoch": 0.04286819646383573, + "grad_norm": 0.07954560965299606, + "learning_rate": 9.4672131147541e-06, + "loss": 1.129581093788147, + "step": 232 + }, + { + "epoch": 0.04305297317273158, + "grad_norm": 0.10096707195043564, + "learning_rate": 9.508196721311476e-06, + "loss": 1.051155924797058, + "step": 233 + }, + { + "epoch": 0.04323774988162742, + "grad_norm": 0.08780834078788757, + "learning_rate": 9.549180327868853e-06, + "loss": 1.0105705261230469, + "step": 234 + }, + { + "epoch": 0.043422526590523265, + "grad_norm": 0.07152073830366135, + "learning_rate": 9.59016393442623e-06, + "loss": 0.9030419588088989, + "step": 235 + }, + { + "epoch": 0.04360730329941911, + "grad_norm": 0.11264440417289734, + "learning_rate": 9.631147540983608e-06, + "loss": 1.0321972370147705, + "step": 236 + }, + { + "epoch": 0.04379208000831495, + "grad_norm": 0.10329741984605789, + "learning_rate": 9.672131147540984e-06, + "loss": 1.207244634628296, + "step": 237 + }, + { + "epoch": 0.04397685671721079, + "grad_norm": 0.0821845754981041, + "learning_rate": 9.713114754098361e-06, + "loss": 0.9656891822814941, + "step": 238 + }, + { + "epoch": 0.04416163342610664, + "grad_norm": 0.08063480257987976, + "learning_rate": 9.754098360655738e-06, + "loss": 0.986838161945343, + "step": 239 + }, + { + "epoch": 0.044346410135002484, + "grad_norm": 0.08364249020814896, + "learning_rate": 9.795081967213116e-06, + "loss": 0.9730472564697266, + "step": 240 + }, + { + "epoch": 0.04453118684389833, + "grad_norm": 0.09396041929721832, + "learning_rate": 9.836065573770493e-06, + "loss": 1.0492497682571411, + "step": 241 + }, + { + "epoch": 0.04471596355279417, + "grad_norm": 0.09696823358535767, + "learning_rate": 9.87704918032787e-06, + "loss": 1.0710127353668213, + "step": 242 + }, + { + "epoch": 0.04490074026169001, + "grad_norm": 0.0735907331109047, + "learning_rate": 9.918032786885246e-06, + "loss": 0.7238403558731079, + "step": 243 + }, + { + "epoch": 0.04508551697058586, + "grad_norm": 0.09278254956007004, + "learning_rate": 9.959016393442624e-06, + "loss": 1.0645323991775513, + "step": 244 + }, + { + "epoch": 0.0452702936794817, + "grad_norm": 0.10433805733919144, + "learning_rate": 1e-05, + "loss": 1.2725396156311035, + "step": 245 + }, + { + "epoch": 0.045455070388377546, + "grad_norm": 0.10844039171934128, + "learning_rate": 1.0040983606557377e-05, + "loss": 1.019277811050415, + "step": 246 + }, + { + "epoch": 0.04563984709727339, + "grad_norm": 0.09526235610246658, + "learning_rate": 1.0081967213114754e-05, + "loss": 1.0928999185562134, + "step": 247 + }, + { + "epoch": 0.04582462380616923, + "grad_norm": 0.08871227502822876, + "learning_rate": 1.0122950819672132e-05, + "loss": 0.94351726770401, + "step": 248 + }, + { + "epoch": 0.04600940051506507, + "grad_norm": 0.0853639468550682, + "learning_rate": 1.0163934426229509e-05, + "loss": 0.9936844706535339, + "step": 249 + }, + { + "epoch": 0.04619417722396092, + "grad_norm": 0.11534149944782257, + "learning_rate": 1.0204918032786886e-05, + "loss": 1.150606393814087, + "step": 250 + }, + { + "epoch": 0.046378953932856765, + "grad_norm": 0.0720873549580574, + "learning_rate": 1.0245901639344262e-05, + "loss": 0.6555285453796387, + "step": 251 + }, + { + "epoch": 0.04656373064175261, + "grad_norm": 0.10066215693950653, + "learning_rate": 1.028688524590164e-05, + "loss": 0.9825627207756042, + "step": 252 + }, + { + "epoch": 0.04674850735064845, + "grad_norm": 0.09775615483522415, + "learning_rate": 1.0327868852459017e-05, + "loss": 0.9665984511375427, + "step": 253 + }, + { + "epoch": 0.04693328405954429, + "grad_norm": 0.10551930218935013, + "learning_rate": 1.0368852459016394e-05, + "loss": 1.1738957166671753, + "step": 254 + }, + { + "epoch": 0.04711806076844014, + "grad_norm": 0.1370166391134262, + "learning_rate": 1.040983606557377e-05, + "loss": 1.2341063022613525, + "step": 255 + }, + { + "epoch": 0.047302837477335984, + "grad_norm": 0.11037618666887283, + "learning_rate": 1.0450819672131149e-05, + "loss": 1.14421546459198, + "step": 256 + }, + { + "epoch": 0.047487614186231826, + "grad_norm": 0.08367924392223358, + "learning_rate": 1.0491803278688525e-05, + "loss": 1.0374960899353027, + "step": 257 + }, + { + "epoch": 0.04767239089512767, + "grad_norm": 0.08623643964529037, + "learning_rate": 1.0532786885245902e-05, + "loss": 0.9188487529754639, + "step": 258 + }, + { + "epoch": 0.04785716760402351, + "grad_norm": 0.09963801503181458, + "learning_rate": 1.0573770491803279e-05, + "loss": 1.2390835285186768, + "step": 259 + }, + { + "epoch": 0.04804194431291935, + "grad_norm": 0.08543514460325241, + "learning_rate": 1.0614754098360655e-05, + "loss": 1.0762650966644287, + "step": 260 + }, + { + "epoch": 0.0482267210218152, + "grad_norm": 0.09559163451194763, + "learning_rate": 1.0655737704918034e-05, + "loss": 0.8684812784194946, + "step": 261 + }, + { + "epoch": 0.048411497730711045, + "grad_norm": 0.10531015694141388, + "learning_rate": 1.069672131147541e-05, + "loss": 1.0792323350906372, + "step": 262 + }, + { + "epoch": 0.04859627443960689, + "grad_norm": 0.09568508714437485, + "learning_rate": 1.0737704918032787e-05, + "loss": 1.1282249689102173, + "step": 263 + }, + { + "epoch": 0.04878105114850273, + "grad_norm": 0.09946364909410477, + "learning_rate": 1.0778688524590164e-05, + "loss": 1.0437533855438232, + "step": 264 + }, + { + "epoch": 0.04896582785739857, + "grad_norm": 0.12145375460386276, + "learning_rate": 1.0819672131147544e-05, + "loss": 1.2372792959213257, + "step": 265 + }, + { + "epoch": 0.04915060456629442, + "grad_norm": 0.09334026277065277, + "learning_rate": 1.0860655737704918e-05, + "loss": 1.0724656581878662, + "step": 266 + }, + { + "epoch": 0.049335381275190264, + "grad_norm": 0.09644033759832382, + "learning_rate": 1.0901639344262295e-05, + "loss": 1.2204649448394775, + "step": 267 + }, + { + "epoch": 0.049520157984086106, + "grad_norm": 0.10595440119504929, + "learning_rate": 1.0942622950819672e-05, + "loss": 1.0160824060440063, + "step": 268 + }, + { + "epoch": 0.04970493469298195, + "grad_norm": 0.09714332222938538, + "learning_rate": 1.0983606557377052e-05, + "loss": 0.906075656414032, + "step": 269 + }, + { + "epoch": 0.04988971140187779, + "grad_norm": 0.09888976812362671, + "learning_rate": 1.1024590163934428e-05, + "loss": 1.0426760911941528, + "step": 270 + }, + { + "epoch": 0.050074488110773634, + "grad_norm": 0.09720077365636826, + "learning_rate": 1.1065573770491805e-05, + "loss": 1.1333513259887695, + "step": 271 + }, + { + "epoch": 0.05025926481966948, + "grad_norm": 0.09125541895627975, + "learning_rate": 1.110655737704918e-05, + "loss": 1.1542598009109497, + "step": 272 + }, + { + "epoch": 0.050444041528565325, + "grad_norm": 0.08100289106369019, + "learning_rate": 1.1147540983606557e-05, + "loss": 0.8822868466377258, + "step": 273 + }, + { + "epoch": 0.05062881823746117, + "grad_norm": 0.08017222583293915, + "learning_rate": 1.1188524590163937e-05, + "loss": 0.8326126337051392, + "step": 274 + }, + { + "epoch": 0.05081359494635701, + "grad_norm": 0.09766320139169693, + "learning_rate": 1.1229508196721313e-05, + "loss": 1.1107693910598755, + "step": 275 + }, + { + "epoch": 0.05099837165525285, + "grad_norm": 0.09009626507759094, + "learning_rate": 1.127049180327869e-05, + "loss": 0.8500745296478271, + "step": 276 + }, + { + "epoch": 0.0511831483641487, + "grad_norm": 0.10108991712331772, + "learning_rate": 1.1311475409836066e-05, + "loss": 0.9838616847991943, + "step": 277 + }, + { + "epoch": 0.051367925073044544, + "grad_norm": 0.08185333013534546, + "learning_rate": 1.1352459016393445e-05, + "loss": 0.8202808499336243, + "step": 278 + }, + { + "epoch": 0.05155270178194039, + "grad_norm": 0.11285862326622009, + "learning_rate": 1.1393442622950821e-05, + "loss": 1.48050856590271, + "step": 279 + }, + { + "epoch": 0.05173747849083623, + "grad_norm": 0.09318527579307556, + "learning_rate": 1.1434426229508198e-05, + "loss": 0.8346209526062012, + "step": 280 + }, + { + "epoch": 0.05192225519973207, + "grad_norm": 0.09572022408246994, + "learning_rate": 1.1475409836065575e-05, + "loss": 1.0783284902572632, + "step": 281 + }, + { + "epoch": 0.052107031908627914, + "grad_norm": 0.11212638020515442, + "learning_rate": 1.1516393442622951e-05, + "loss": 1.0902286767959595, + "step": 282 + }, + { + "epoch": 0.05229180861752376, + "grad_norm": 0.10433069616556168, + "learning_rate": 1.155737704918033e-05, + "loss": 1.058199167251587, + "step": 283 + }, + { + "epoch": 0.052476585326419606, + "grad_norm": 0.10224845260381699, + "learning_rate": 1.1598360655737706e-05, + "loss": 1.081488013267517, + "step": 284 + }, + { + "epoch": 0.05266136203531545, + "grad_norm": 0.10507450252771378, + "learning_rate": 1.1639344262295083e-05, + "loss": 0.9051607847213745, + "step": 285 + }, + { + "epoch": 0.05284613874421129, + "grad_norm": 0.0947372242808342, + "learning_rate": 1.168032786885246e-05, + "loss": 1.0995267629623413, + "step": 286 + }, + { + "epoch": 0.05303091545310713, + "grad_norm": 0.09245672821998596, + "learning_rate": 1.1721311475409838e-05, + "loss": 1.012171745300293, + "step": 287 + }, + { + "epoch": 0.05321569216200298, + "grad_norm": 0.12127245962619781, + "learning_rate": 1.1762295081967215e-05, + "loss": 1.309299349784851, + "step": 288 + }, + { + "epoch": 0.053400468870898825, + "grad_norm": 0.1081591546535492, + "learning_rate": 1.1803278688524591e-05, + "loss": 1.0298875570297241, + "step": 289 + }, + { + "epoch": 0.05358524557979467, + "grad_norm": 0.11109253019094467, + "learning_rate": 1.1844262295081968e-05, + "loss": 1.058588981628418, + "step": 290 + }, + { + "epoch": 0.05377002228869051, + "grad_norm": 0.08653238415718079, + "learning_rate": 1.1885245901639346e-05, + "loss": 0.898544430732727, + "step": 291 + }, + { + "epoch": 0.05395479899758635, + "grad_norm": 0.0926504135131836, + "learning_rate": 1.1926229508196723e-05, + "loss": 0.7896566987037659, + "step": 292 + }, + { + "epoch": 0.054139575706482194, + "grad_norm": 0.08921337872743607, + "learning_rate": 1.19672131147541e-05, + "loss": 0.9710261225700378, + "step": 293 + }, + { + "epoch": 0.054324352415378044, + "grad_norm": 0.07718883454799652, + "learning_rate": 1.2008196721311476e-05, + "loss": 0.8338017463684082, + "step": 294 + }, + { + "epoch": 0.054509129124273886, + "grad_norm": 0.0848095566034317, + "learning_rate": 1.2049180327868853e-05, + "loss": 0.6920614838600159, + "step": 295 + }, + { + "epoch": 0.05469390583316973, + "grad_norm": 0.08996455371379852, + "learning_rate": 1.2090163934426231e-05, + "loss": 0.7501096129417419, + "step": 296 + }, + { + "epoch": 0.05487868254206557, + "grad_norm": 0.07516958564519882, + "learning_rate": 1.2131147540983608e-05, + "loss": 0.8861922025680542, + "step": 297 + }, + { + "epoch": 0.055063459250961413, + "grad_norm": 0.10512320697307587, + "learning_rate": 1.2172131147540984e-05, + "loss": 1.018561840057373, + "step": 298 + }, + { + "epoch": 0.05524823595985726, + "grad_norm": 0.0865112841129303, + "learning_rate": 1.221311475409836e-05, + "loss": 0.7376396059989929, + "step": 299 + }, + { + "epoch": 0.055433012668753105, + "grad_norm": 0.12389017641544342, + "learning_rate": 1.2254098360655739e-05, + "loss": 1.332137107849121, + "step": 300 + }, + { + "epoch": 0.05561778937764895, + "grad_norm": 0.11509191244840622, + "learning_rate": 1.2295081967213116e-05, + "loss": 1.004392385482788, + "step": 301 + }, + { + "epoch": 0.05580256608654479, + "grad_norm": 0.13411563634872437, + "learning_rate": 1.2336065573770492e-05, + "loss": 1.276947021484375, + "step": 302 + }, + { + "epoch": 0.05598734279544063, + "grad_norm": 0.09792132675647736, + "learning_rate": 1.2377049180327869e-05, + "loss": 0.8487666249275208, + "step": 303 + }, + { + "epoch": 0.05617211950433648, + "grad_norm": 0.09597836434841156, + "learning_rate": 1.2418032786885247e-05, + "loss": 0.9482805728912354, + "step": 304 + }, + { + "epoch": 0.056356896213232324, + "grad_norm": 0.11644294112920761, + "learning_rate": 1.2459016393442624e-05, + "loss": 0.9525696039199829, + "step": 305 + }, + { + "epoch": 0.05654167292212817, + "grad_norm": 0.10617950558662415, + "learning_rate": 1.25e-05, + "loss": 1.0335408449172974, + "step": 306 + }, + { + "epoch": 0.05672644963102401, + "grad_norm": 0.09923581779003143, + "learning_rate": 1.2540983606557377e-05, + "loss": 1.0804893970489502, + "step": 307 + }, + { + "epoch": 0.05691122633991985, + "grad_norm": 0.10484209656715393, + "learning_rate": 1.2581967213114754e-05, + "loss": 0.920568585395813, + "step": 308 + }, + { + "epoch": 0.057096003048815694, + "grad_norm": 0.09422818571329117, + "learning_rate": 1.2622950819672132e-05, + "loss": 1.0048713684082031, + "step": 309 + }, + { + "epoch": 0.05728077975771154, + "grad_norm": 0.09801265597343445, + "learning_rate": 1.2663934426229509e-05, + "loss": 0.9483872652053833, + "step": 310 + }, + { + "epoch": 0.057465556466607386, + "grad_norm": 0.10625987499952316, + "learning_rate": 1.2704918032786885e-05, + "loss": 1.1098862886428833, + "step": 311 + }, + { + "epoch": 0.05765033317550323, + "grad_norm": 0.10191453993320465, + "learning_rate": 1.2745901639344262e-05, + "loss": 0.9445469975471497, + "step": 312 + }, + { + "epoch": 0.05783510988439907, + "grad_norm": 0.09489865601062775, + "learning_rate": 1.2786885245901642e-05, + "loss": 0.9424755573272705, + "step": 313 + }, + { + "epoch": 0.05801988659329491, + "grad_norm": 0.07801543921232224, + "learning_rate": 1.2827868852459017e-05, + "loss": 0.9401955008506775, + "step": 314 + }, + { + "epoch": 0.05820466330219076, + "grad_norm": 0.09698277711868286, + "learning_rate": 1.2868852459016394e-05, + "loss": 0.9096693396568298, + "step": 315 + }, + { + "epoch": 0.058389440011086605, + "grad_norm": 0.07716673612594604, + "learning_rate": 1.290983606557377e-05, + "loss": 0.7786237597465515, + "step": 316 + }, + { + "epoch": 0.05857421671998245, + "grad_norm": 0.09627088904380798, + "learning_rate": 1.295081967213115e-05, + "loss": 0.9308719635009766, + "step": 317 + }, + { + "epoch": 0.05875899342887829, + "grad_norm": 0.09469778835773468, + "learning_rate": 1.2991803278688527e-05, + "loss": 1.0299229621887207, + "step": 318 + }, + { + "epoch": 0.05894377013777413, + "grad_norm": 0.08667682856321335, + "learning_rate": 1.3032786885245904e-05, + "loss": 0.785006582736969, + "step": 319 + }, + { + "epoch": 0.059128546846669974, + "grad_norm": 0.1045043021440506, + "learning_rate": 1.3073770491803278e-05, + "loss": 0.8180931806564331, + "step": 320 + }, + { + "epoch": 0.059313323555565824, + "grad_norm": 0.1012943759560585, + "learning_rate": 1.3114754098360655e-05, + "loss": 1.0428085327148438, + "step": 321 + }, + { + "epoch": 0.059498100264461666, + "grad_norm": 0.08716757595539093, + "learning_rate": 1.3155737704918035e-05, + "loss": 1.0261026620864868, + "step": 322 + }, + { + "epoch": 0.05968287697335751, + "grad_norm": 0.08290330320596695, + "learning_rate": 1.3196721311475412e-05, + "loss": 0.8196086287498474, + "step": 323 + }, + { + "epoch": 0.05986765368225335, + "grad_norm": 0.07775544375181198, + "learning_rate": 1.3237704918032788e-05, + "loss": 0.7242082357406616, + "step": 324 + }, + { + "epoch": 0.06005243039114919, + "grad_norm": 0.05980847030878067, + "learning_rate": 1.3278688524590165e-05, + "loss": 0.7295750975608826, + "step": 325 + }, + { + "epoch": 0.06023720710004504, + "grad_norm": 0.0982968881726265, + "learning_rate": 1.3319672131147543e-05, + "loss": 0.873098611831665, + "step": 326 + }, + { + "epoch": 0.060421983808940885, + "grad_norm": 0.0912838950753212, + "learning_rate": 1.336065573770492e-05, + "loss": 1.3055800199508667, + "step": 327 + }, + { + "epoch": 0.06060676051783673, + "grad_norm": 0.09126565605401993, + "learning_rate": 1.3401639344262297e-05, + "loss": 0.836475670337677, + "step": 328 + }, + { + "epoch": 0.06079153722673257, + "grad_norm": 0.1270003318786621, + "learning_rate": 1.3442622950819673e-05, + "loss": 1.2076267004013062, + "step": 329 + }, + { + "epoch": 0.06097631393562841, + "grad_norm": 0.09601800888776779, + "learning_rate": 1.3483606557377052e-05, + "loss": 1.0199816226959229, + "step": 330 + }, + { + "epoch": 0.061161090644524255, + "grad_norm": 0.1017669290304184, + "learning_rate": 1.3524590163934428e-05, + "loss": 1.0680336952209473, + "step": 331 + }, + { + "epoch": 0.061345867353420104, + "grad_norm": 0.11380743980407715, + "learning_rate": 1.3565573770491805e-05, + "loss": 1.0062092542648315, + "step": 332 + }, + { + "epoch": 0.061530644062315946, + "grad_norm": 0.10115383565425873, + "learning_rate": 1.3606557377049181e-05, + "loss": 1.1867700815200806, + "step": 333 + }, + { + "epoch": 0.06171542077121179, + "grad_norm": 0.09494657814502716, + "learning_rate": 1.3647540983606558e-05, + "loss": 0.7801775336265564, + "step": 334 + }, + { + "epoch": 0.06190019748010763, + "grad_norm": 0.12234895676374435, + "learning_rate": 1.3688524590163936e-05, + "loss": 1.0539796352386475, + "step": 335 + }, + { + "epoch": 0.062084974189003474, + "grad_norm": 0.1199754998087883, + "learning_rate": 1.3729508196721313e-05, + "loss": 1.2007956504821777, + "step": 336 + }, + { + "epoch": 0.06226975089789932, + "grad_norm": 0.10975956916809082, + "learning_rate": 1.377049180327869e-05, + "loss": 1.0453523397445679, + "step": 337 + }, + { + "epoch": 0.062454527606795165, + "grad_norm": 0.09918422996997833, + "learning_rate": 1.3811475409836066e-05, + "loss": 0.8010137677192688, + "step": 338 + }, + { + "epoch": 0.06263930431569101, + "grad_norm": 0.0994090810418129, + "learning_rate": 1.3852459016393445e-05, + "loss": 1.0801807641983032, + "step": 339 + }, + { + "epoch": 0.06282408102458685, + "grad_norm": 0.10293493419885635, + "learning_rate": 1.3893442622950821e-05, + "loss": 0.9230149388313293, + "step": 340 + }, + { + "epoch": 0.06300885773348269, + "grad_norm": 0.10307420045137405, + "learning_rate": 1.3934426229508198e-05, + "loss": 1.0578651428222656, + "step": 341 + }, + { + "epoch": 0.06319363444237854, + "grad_norm": 0.1045430526137352, + "learning_rate": 1.3975409836065574e-05, + "loss": 0.790934681892395, + "step": 342 + }, + { + "epoch": 0.06337841115127438, + "grad_norm": 0.10155732929706573, + "learning_rate": 1.4016393442622951e-05, + "loss": 1.1352696418762207, + "step": 343 + }, + { + "epoch": 0.06356318786017022, + "grad_norm": 0.09721534699201584, + "learning_rate": 1.405737704918033e-05, + "loss": 0.9279108047485352, + "step": 344 + }, + { + "epoch": 0.06374796456906606, + "grad_norm": 0.11629458516836166, + "learning_rate": 1.4098360655737706e-05, + "loss": 1.0785913467407227, + "step": 345 + }, + { + "epoch": 0.06393274127796192, + "grad_norm": 0.10066460072994232, + "learning_rate": 1.4139344262295083e-05, + "loss": 1.1896709203720093, + "step": 346 + }, + { + "epoch": 0.06411751798685776, + "grad_norm": 0.09405656158924103, + "learning_rate": 1.418032786885246e-05, + "loss": 0.8420297503471375, + "step": 347 + }, + { + "epoch": 0.0643022946957536, + "grad_norm": 0.08271870762109756, + "learning_rate": 1.4221311475409838e-05, + "loss": 0.7494404315948486, + "step": 348 + }, + { + "epoch": 0.06448707140464945, + "grad_norm": 0.1078755334019661, + "learning_rate": 1.4262295081967214e-05, + "loss": 0.9479129314422607, + "step": 349 + }, + { + "epoch": 0.06467184811354529, + "grad_norm": 0.0918903723359108, + "learning_rate": 1.4303278688524591e-05, + "loss": 0.9387383460998535, + "step": 350 + }, + { + "epoch": 0.06485662482244113, + "grad_norm": 0.11810861527919769, + "learning_rate": 1.4344262295081968e-05, + "loss": 0.9701591730117798, + "step": 351 + }, + { + "epoch": 0.06504140153133697, + "grad_norm": 0.09874974936246872, + "learning_rate": 1.4385245901639346e-05, + "loss": 0.9196026921272278, + "step": 352 + }, + { + "epoch": 0.06522617824023282, + "grad_norm": 0.0841999500989914, + "learning_rate": 1.4426229508196722e-05, + "loss": 0.6813482642173767, + "step": 353 + }, + { + "epoch": 0.06541095494912866, + "grad_norm": 0.09054264426231384, + "learning_rate": 1.4467213114754099e-05, + "loss": 1.0393775701522827, + "step": 354 + }, + { + "epoch": 0.0655957316580245, + "grad_norm": 0.09485882520675659, + "learning_rate": 1.4508196721311476e-05, + "loss": 1.008347988128662, + "step": 355 + }, + { + "epoch": 0.06578050836692034, + "grad_norm": 0.09545883536338806, + "learning_rate": 1.4549180327868852e-05, + "loss": 0.8867791295051575, + "step": 356 + }, + { + "epoch": 0.0659652850758162, + "grad_norm": 0.10870155692100525, + "learning_rate": 1.459016393442623e-05, + "loss": 1.0850865840911865, + "step": 357 + }, + { + "epoch": 0.06615006178471204, + "grad_norm": 0.0992053747177124, + "learning_rate": 1.4631147540983607e-05, + "loss": 1.008838415145874, + "step": 358 + }, + { + "epoch": 0.06633483849360788, + "grad_norm": 0.08941731601953506, + "learning_rate": 1.4672131147540984e-05, + "loss": 0.9069108963012695, + "step": 359 + }, + { + "epoch": 0.06651961520250373, + "grad_norm": 0.11634092032909393, + "learning_rate": 1.471311475409836e-05, + "loss": 1.202878713607788, + "step": 360 + }, + { + "epoch": 0.06670439191139957, + "grad_norm": 0.10910794138908386, + "learning_rate": 1.4754098360655739e-05, + "loss": 1.082908034324646, + "step": 361 + }, + { + "epoch": 0.06688916862029541, + "grad_norm": 0.0834878534078598, + "learning_rate": 1.4795081967213116e-05, + "loss": 0.7075738906860352, + "step": 362 + }, + { + "epoch": 0.06707394532919125, + "grad_norm": 0.10349691659212112, + "learning_rate": 1.4836065573770492e-05, + "loss": 0.9434449672698975, + "step": 363 + }, + { + "epoch": 0.0672587220380871, + "grad_norm": 0.11296708881855011, + "learning_rate": 1.4877049180327869e-05, + "loss": 0.933619499206543, + "step": 364 + }, + { + "epoch": 0.06744349874698294, + "grad_norm": 0.10705320537090302, + "learning_rate": 1.4918032786885249e-05, + "loss": 1.2247308492660522, + "step": 365 + }, + { + "epoch": 0.06762827545587878, + "grad_norm": 0.09186027944087982, + "learning_rate": 1.4959016393442625e-05, + "loss": 0.9711430072784424, + "step": 366 + }, + { + "epoch": 0.06781305216477464, + "grad_norm": 0.1063535287976265, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.7582070231437683, + "step": 367 + }, + { + "epoch": 0.06799782887367048, + "grad_norm": 0.12384825944900513, + "learning_rate": 1.5040983606557377e-05, + "loss": 0.9878946542739868, + "step": 368 + }, + { + "epoch": 0.06818260558256632, + "grad_norm": 0.11601697653532028, + "learning_rate": 1.5081967213114754e-05, + "loss": 1.1344435214996338, + "step": 369 + }, + { + "epoch": 0.06836738229146216, + "grad_norm": 0.10092558711767197, + "learning_rate": 1.5122950819672134e-05, + "loss": 0.8472614288330078, + "step": 370 + }, + { + "epoch": 0.068552159000358, + "grad_norm": 0.08755794912576675, + "learning_rate": 1.516393442622951e-05, + "loss": 0.9642510414123535, + "step": 371 + }, + { + "epoch": 0.06873693570925385, + "grad_norm": 0.11546262353658676, + "learning_rate": 1.5204918032786887e-05, + "loss": 0.9715655446052551, + "step": 372 + }, + { + "epoch": 0.06892171241814969, + "grad_norm": 0.10162407904863358, + "learning_rate": 1.5245901639344264e-05, + "loss": 0.8278041481971741, + "step": 373 + }, + { + "epoch": 0.06910648912704553, + "grad_norm": 0.12836207449436188, + "learning_rate": 1.528688524590164e-05, + "loss": 1.1721863746643066, + "step": 374 + }, + { + "epoch": 0.06929126583594138, + "grad_norm": 0.08836661279201508, + "learning_rate": 1.532786885245902e-05, + "loss": 0.8702605962753296, + "step": 375 + }, + { + "epoch": 0.06947604254483722, + "grad_norm": 0.09650567173957825, + "learning_rate": 1.5368852459016393e-05, + "loss": 0.8430268168449402, + "step": 376 + }, + { + "epoch": 0.06966081925373306, + "grad_norm": 0.12295356392860413, + "learning_rate": 1.5409836065573772e-05, + "loss": 1.495065689086914, + "step": 377 + }, + { + "epoch": 0.06984559596262892, + "grad_norm": 0.09706033766269684, + "learning_rate": 1.545081967213115e-05, + "loss": 0.7906158566474915, + "step": 378 + }, + { + "epoch": 0.07003037267152476, + "grad_norm": 0.11473716050386429, + "learning_rate": 1.5491803278688525e-05, + "loss": 0.9313616752624512, + "step": 379 + }, + { + "epoch": 0.0702151493804206, + "grad_norm": 0.11317595094442368, + "learning_rate": 1.5532786885245903e-05, + "loss": 0.9224214553833008, + "step": 380 + }, + { + "epoch": 0.07039992608931644, + "grad_norm": 0.08347756415605545, + "learning_rate": 1.5573770491803278e-05, + "loss": 0.9982295632362366, + "step": 381 + }, + { + "epoch": 0.07058470279821229, + "grad_norm": 0.09798528254032135, + "learning_rate": 1.5614754098360657e-05, + "loss": 1.0946242809295654, + "step": 382 + }, + { + "epoch": 0.07076947950710813, + "grad_norm": 0.10663064569234848, + "learning_rate": 1.5655737704918035e-05, + "loss": 1.1035512685775757, + "step": 383 + }, + { + "epoch": 0.07095425621600397, + "grad_norm": 0.0784713551402092, + "learning_rate": 1.569672131147541e-05, + "loss": 0.8944029808044434, + "step": 384 + }, + { + "epoch": 0.07113903292489981, + "grad_norm": 0.11879897117614746, + "learning_rate": 1.5737704918032788e-05, + "loss": 0.9249335527420044, + "step": 385 + }, + { + "epoch": 0.07132380963379566, + "grad_norm": 0.11596395075321198, + "learning_rate": 1.5778688524590163e-05, + "loss": 1.006790041923523, + "step": 386 + }, + { + "epoch": 0.0715085863426915, + "grad_norm": 0.10372721403837204, + "learning_rate": 1.5819672131147545e-05, + "loss": 0.950377881526947, + "step": 387 + }, + { + "epoch": 0.07169336305158734, + "grad_norm": 0.10973094403743744, + "learning_rate": 1.586065573770492e-05, + "loss": 0.9215264916419983, + "step": 388 + }, + { + "epoch": 0.0718781397604832, + "grad_norm": 0.08272566646337509, + "learning_rate": 1.5901639344262295e-05, + "loss": 0.7860650420188904, + "step": 389 + }, + { + "epoch": 0.07206291646937904, + "grad_norm": 0.09537763893604279, + "learning_rate": 1.5942622950819673e-05, + "loss": 0.8267325758934021, + "step": 390 + }, + { + "epoch": 0.07224769317827488, + "grad_norm": 0.1512487381696701, + "learning_rate": 1.598360655737705e-05, + "loss": 1.1345775127410889, + "step": 391 + }, + { + "epoch": 0.07243246988717073, + "grad_norm": 0.07760065793991089, + "learning_rate": 1.602459016393443e-05, + "loss": 0.6036292314529419, + "step": 392 + }, + { + "epoch": 0.07261724659606657, + "grad_norm": 0.08124257624149323, + "learning_rate": 1.6065573770491805e-05, + "loss": 0.7405913472175598, + "step": 393 + }, + { + "epoch": 0.07280202330496241, + "grad_norm": 0.10059768706560135, + "learning_rate": 1.610655737704918e-05, + "loss": 0.9785237908363342, + "step": 394 + }, + { + "epoch": 0.07298680001385825, + "grad_norm": 0.0916321724653244, + "learning_rate": 1.6147540983606558e-05, + "loss": 0.7635613679885864, + "step": 395 + }, + { + "epoch": 0.0731715767227541, + "grad_norm": 0.08234703540802002, + "learning_rate": 1.6188524590163936e-05, + "loss": 0.6982128620147705, + "step": 396 + }, + { + "epoch": 0.07335635343164994, + "grad_norm": 0.10206922143697739, + "learning_rate": 1.6229508196721314e-05, + "loss": 0.7990150451660156, + "step": 397 + }, + { + "epoch": 0.07354113014054578, + "grad_norm": 0.08356337994337082, + "learning_rate": 1.627049180327869e-05, + "loss": 0.7303667068481445, + "step": 398 + }, + { + "epoch": 0.07372590684944162, + "grad_norm": 0.11013100296258926, + "learning_rate": 1.6311475409836068e-05, + "loss": 1.070306658744812, + "step": 399 + }, + { + "epoch": 0.07391068355833748, + "grad_norm": 0.09661299735307693, + "learning_rate": 1.6352459016393446e-05, + "loss": 0.8590985536575317, + "step": 400 + }, + { + "epoch": 0.07409546026723332, + "grad_norm": 0.090263731777668, + "learning_rate": 1.639344262295082e-05, + "loss": 0.674292266368866, + "step": 401 + }, + { + "epoch": 0.07428023697612916, + "grad_norm": 0.08033633232116699, + "learning_rate": 1.64344262295082e-05, + "loss": 0.6512205600738525, + "step": 402 + }, + { + "epoch": 0.074465013685025, + "grad_norm": 0.10023355484008789, + "learning_rate": 1.6475409836065574e-05, + "loss": 0.8848332762718201, + "step": 403 + }, + { + "epoch": 0.07464979039392085, + "grad_norm": 0.09321942925453186, + "learning_rate": 1.6516393442622953e-05, + "loss": 0.8359254598617554, + "step": 404 + }, + { + "epoch": 0.07483456710281669, + "grad_norm": 0.07962282747030258, + "learning_rate": 1.655737704918033e-05, + "loss": 0.9046615362167358, + "step": 405 + }, + { + "epoch": 0.07501934381171253, + "grad_norm": 0.10793166607618332, + "learning_rate": 1.6598360655737706e-05, + "loss": 0.9817122220993042, + "step": 406 + }, + { + "epoch": 0.07520412052060838, + "grad_norm": 0.10675039142370224, + "learning_rate": 1.6639344262295084e-05, + "loss": 1.2107510566711426, + "step": 407 + }, + { + "epoch": 0.07538889722950422, + "grad_norm": 0.0928160548210144, + "learning_rate": 1.668032786885246e-05, + "loss": 0.8065657019615173, + "step": 408 + }, + { + "epoch": 0.07557367393840006, + "grad_norm": 0.09683706611394882, + "learning_rate": 1.6721311475409837e-05, + "loss": 1.064256191253662, + "step": 409 + }, + { + "epoch": 0.0757584506472959, + "grad_norm": 0.09184283018112183, + "learning_rate": 1.6762295081967216e-05, + "loss": 0.8598893284797668, + "step": 410 + }, + { + "epoch": 0.07594322735619176, + "grad_norm": 0.09558923542499542, + "learning_rate": 1.680327868852459e-05, + "loss": 0.8903302550315857, + "step": 411 + }, + { + "epoch": 0.0761280040650876, + "grad_norm": 0.09565886855125427, + "learning_rate": 1.684426229508197e-05, + "loss": 0.9265789985656738, + "step": 412 + }, + { + "epoch": 0.07631278077398344, + "grad_norm": 0.10494910180568695, + "learning_rate": 1.6885245901639347e-05, + "loss": 0.8625717163085938, + "step": 413 + }, + { + "epoch": 0.07649755748287929, + "grad_norm": 0.10598883777856827, + "learning_rate": 1.6926229508196722e-05, + "loss": 1.1451170444488525, + "step": 414 + }, + { + "epoch": 0.07668233419177513, + "grad_norm": 0.11120012402534485, + "learning_rate": 1.69672131147541e-05, + "loss": 0.9379380345344543, + "step": 415 + }, + { + "epoch": 0.07686711090067097, + "grad_norm": 0.09714861214160919, + "learning_rate": 1.7008196721311476e-05, + "loss": 0.8064665794372559, + "step": 416 + }, + { + "epoch": 0.07705188760956681, + "grad_norm": 0.11614526808261871, + "learning_rate": 1.7049180327868854e-05, + "loss": 1.0065346956253052, + "step": 417 + }, + { + "epoch": 0.07723666431846266, + "grad_norm": 0.09795574098825455, + "learning_rate": 1.7090163934426232e-05, + "loss": 0.8045728206634521, + "step": 418 + }, + { + "epoch": 0.0774214410273585, + "grad_norm": 0.08873660117387772, + "learning_rate": 1.7131147540983607e-05, + "loss": 0.8565447330474854, + "step": 419 + }, + { + "epoch": 0.07760621773625434, + "grad_norm": 0.09851158410310745, + "learning_rate": 1.7172131147540985e-05, + "loss": 0.7614431977272034, + "step": 420 + }, + { + "epoch": 0.07779099444515018, + "grad_norm": 0.09649550169706345, + "learning_rate": 1.721311475409836e-05, + "loss": 0.9361590147018433, + "step": 421 + }, + { + "epoch": 0.07797577115404604, + "grad_norm": 0.09242745488882065, + "learning_rate": 1.725409836065574e-05, + "loss": 0.8737956881523132, + "step": 422 + }, + { + "epoch": 0.07816054786294188, + "grad_norm": 0.09545590728521347, + "learning_rate": 1.7295081967213117e-05, + "loss": 0.8091181516647339, + "step": 423 + }, + { + "epoch": 0.07834532457183772, + "grad_norm": 0.10765133798122406, + "learning_rate": 1.7336065573770492e-05, + "loss": 0.9091676473617554, + "step": 424 + }, + { + "epoch": 0.07853010128073357, + "grad_norm": 0.09113151580095291, + "learning_rate": 1.737704918032787e-05, + "loss": 0.6701531410217285, + "step": 425 + }, + { + "epoch": 0.07871487798962941, + "grad_norm": 0.11152313649654388, + "learning_rate": 1.741803278688525e-05, + "loss": 1.0032432079315186, + "step": 426 + }, + { + "epoch": 0.07889965469852525, + "grad_norm": 0.0958375558257103, + "learning_rate": 1.7459016393442624e-05, + "loss": 0.7796164155006409, + "step": 427 + }, + { + "epoch": 0.0790844314074211, + "grad_norm": 0.071872279047966, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.6129750609397888, + "step": 428 + }, + { + "epoch": 0.07926920811631694, + "grad_norm": 0.08849525451660156, + "learning_rate": 1.7540983606557377e-05, + "loss": 0.619393527507782, + "step": 429 + }, + { + "epoch": 0.07945398482521278, + "grad_norm": 0.07459349185228348, + "learning_rate": 1.7581967213114755e-05, + "loss": 0.6974170207977295, + "step": 430 + }, + { + "epoch": 0.07963876153410862, + "grad_norm": 0.0954682007431984, + "learning_rate": 1.7622950819672133e-05, + "loss": 0.860471785068512, + "step": 431 + }, + { + "epoch": 0.07982353824300446, + "grad_norm": 0.10465925931930542, + "learning_rate": 1.766393442622951e-05, + "loss": 1.0079864263534546, + "step": 432 + }, + { + "epoch": 0.08000831495190032, + "grad_norm": 0.09779639542102814, + "learning_rate": 1.7704918032786887e-05, + "loss": 0.7794288396835327, + "step": 433 + }, + { + "epoch": 0.08019309166079616, + "grad_norm": 0.0917540118098259, + "learning_rate": 1.774590163934426e-05, + "loss": 0.8439017534255981, + "step": 434 + }, + { + "epoch": 0.080377868369692, + "grad_norm": 0.10367224365472794, + "learning_rate": 1.7786885245901643e-05, + "loss": 0.9272810816764832, + "step": 435 + }, + { + "epoch": 0.08056264507858785, + "grad_norm": 0.09312202036380768, + "learning_rate": 1.7827868852459018e-05, + "loss": 0.9261062145233154, + "step": 436 + }, + { + "epoch": 0.08074742178748369, + "grad_norm": 0.0932997316122055, + "learning_rate": 1.7868852459016393e-05, + "loss": 0.7352979183197021, + "step": 437 + }, + { + "epoch": 0.08093219849637953, + "grad_norm": 0.10954531282186508, + "learning_rate": 1.790983606557377e-05, + "loss": 1.0063031911849976, + "step": 438 + }, + { + "epoch": 0.08111697520527537, + "grad_norm": 0.09387508779764175, + "learning_rate": 1.795081967213115e-05, + "loss": 0.7922593355178833, + "step": 439 + }, + { + "epoch": 0.08130175191417122, + "grad_norm": 0.09354770928621292, + "learning_rate": 1.7991803278688528e-05, + "loss": 0.9488433003425598, + "step": 440 + }, + { + "epoch": 0.08148652862306706, + "grad_norm": 0.13175411522388458, + "learning_rate": 1.8032786885245903e-05, + "loss": 1.0685322284698486, + "step": 441 + }, + { + "epoch": 0.0816713053319629, + "grad_norm": 0.09272784739732742, + "learning_rate": 1.8073770491803278e-05, + "loss": 0.8308841586112976, + "step": 442 + }, + { + "epoch": 0.08185608204085874, + "grad_norm": 0.09840014576911926, + "learning_rate": 1.8114754098360656e-05, + "loss": 0.7484462261199951, + "step": 443 + }, + { + "epoch": 0.0820408587497546, + "grad_norm": 0.10059419274330139, + "learning_rate": 1.8155737704918035e-05, + "loss": 1.1569838523864746, + "step": 444 + }, + { + "epoch": 0.08222563545865044, + "grad_norm": 0.09600666910409927, + "learning_rate": 1.8196721311475413e-05, + "loss": 0.8261542320251465, + "step": 445 + }, + { + "epoch": 0.08241041216754628, + "grad_norm": 0.09666614979505539, + "learning_rate": 1.8237704918032788e-05, + "loss": 0.8323838710784912, + "step": 446 + }, + { + "epoch": 0.08259518887644213, + "grad_norm": 0.09458190947771072, + "learning_rate": 1.8278688524590166e-05, + "loss": 0.9338691234588623, + "step": 447 + }, + { + "epoch": 0.08277996558533797, + "grad_norm": 0.09368739277124405, + "learning_rate": 1.8319672131147545e-05, + "loss": 0.9827497005462646, + "step": 448 + }, + { + "epoch": 0.08296474229423381, + "grad_norm": 0.08596470952033997, + "learning_rate": 1.836065573770492e-05, + "loss": 0.9741113185882568, + "step": 449 + }, + { + "epoch": 0.08314951900312965, + "grad_norm": 0.08872201293706894, + "learning_rate": 1.8401639344262298e-05, + "loss": 0.9058973789215088, + "step": 450 + }, + { + "epoch": 0.0833342957120255, + "grad_norm": 0.09660264849662781, + "learning_rate": 1.8442622950819673e-05, + "loss": 0.9430114030838013, + "step": 451 + }, + { + "epoch": 0.08351907242092134, + "grad_norm": 0.11228955537080765, + "learning_rate": 1.848360655737705e-05, + "loss": 0.8239207863807678, + "step": 452 + }, + { + "epoch": 0.08370384912981718, + "grad_norm": 0.10609623044729233, + "learning_rate": 1.852459016393443e-05, + "loss": 0.9952499270439148, + "step": 453 + }, + { + "epoch": 0.08388862583871302, + "grad_norm": 0.11035417765378952, + "learning_rate": 1.8565573770491804e-05, + "loss": 1.1380780935287476, + "step": 454 + }, + { + "epoch": 0.08407340254760888, + "grad_norm": 0.11164045333862305, + "learning_rate": 1.8606557377049183e-05, + "loss": 1.0310348272323608, + "step": 455 + }, + { + "epoch": 0.08425817925650472, + "grad_norm": 0.11842917650938034, + "learning_rate": 1.8647540983606558e-05, + "loss": 0.9357023239135742, + "step": 456 + }, + { + "epoch": 0.08444295596540057, + "grad_norm": 0.09763655066490173, + "learning_rate": 1.8688524590163936e-05, + "loss": 0.9334539175033569, + "step": 457 + }, + { + "epoch": 0.08462773267429641, + "grad_norm": 0.11949948966503143, + "learning_rate": 1.8729508196721314e-05, + "loss": 0.9747587442398071, + "step": 458 + }, + { + "epoch": 0.08481250938319225, + "grad_norm": 0.10351543128490448, + "learning_rate": 1.877049180327869e-05, + "loss": 1.0134328603744507, + "step": 459 + }, + { + "epoch": 0.08499728609208809, + "grad_norm": 0.08718964457511902, + "learning_rate": 1.8811475409836068e-05, + "loss": 0.6342071890830994, + "step": 460 + }, + { + "epoch": 0.08518206280098393, + "grad_norm": 0.09610489755868912, + "learning_rate": 1.8852459016393446e-05, + "loss": 0.9932312369346619, + "step": 461 + }, + { + "epoch": 0.08536683950987978, + "grad_norm": 0.08185546100139618, + "learning_rate": 1.889344262295082e-05, + "loss": 0.7839753031730652, + "step": 462 + }, + { + "epoch": 0.08555161621877562, + "grad_norm": 0.11200262606143951, + "learning_rate": 1.89344262295082e-05, + "loss": 1.0270265340805054, + "step": 463 + }, + { + "epoch": 0.08573639292767146, + "grad_norm": 0.11818201094865799, + "learning_rate": 1.8975409836065574e-05, + "loss": 1.0113353729248047, + "step": 464 + }, + { + "epoch": 0.0859211696365673, + "grad_norm": 0.10922921448945999, + "learning_rate": 1.9016393442622952e-05, + "loss": 0.9178203344345093, + "step": 465 + }, + { + "epoch": 0.08610594634546316, + "grad_norm": 0.09575849026441574, + "learning_rate": 1.905737704918033e-05, + "loss": 0.7755801677703857, + "step": 466 + }, + { + "epoch": 0.086290723054359, + "grad_norm": 0.11132930964231491, + "learning_rate": 1.9098360655737706e-05, + "loss": 1.046008586883545, + "step": 467 + }, + { + "epoch": 0.08647549976325485, + "grad_norm": 0.07992382347583771, + "learning_rate": 1.9139344262295084e-05, + "loss": 0.8655605912208557, + "step": 468 + }, + { + "epoch": 0.08666027647215069, + "grad_norm": 0.09868017584085464, + "learning_rate": 1.918032786885246e-05, + "loss": 0.8974450826644897, + "step": 469 + }, + { + "epoch": 0.08684505318104653, + "grad_norm": 0.09519588947296143, + "learning_rate": 1.9221311475409837e-05, + "loss": 0.8987129926681519, + "step": 470 + }, + { + "epoch": 0.08702982988994237, + "grad_norm": 0.111813485622406, + "learning_rate": 1.9262295081967216e-05, + "loss": 0.9828987717628479, + "step": 471 + }, + { + "epoch": 0.08721460659883822, + "grad_norm": 0.08673518896102905, + "learning_rate": 1.930327868852459e-05, + "loss": 0.8034600615501404, + "step": 472 + }, + { + "epoch": 0.08739938330773406, + "grad_norm": 0.0696590393781662, + "learning_rate": 1.934426229508197e-05, + "loss": 0.6808267831802368, + "step": 473 + }, + { + "epoch": 0.0875841600166299, + "grad_norm": 0.08259183168411255, + "learning_rate": 1.9385245901639347e-05, + "loss": 0.8313323855400085, + "step": 474 + }, + { + "epoch": 0.08776893672552574, + "grad_norm": 0.10918860882520676, + "learning_rate": 1.9426229508196722e-05, + "loss": 1.0069092512130737, + "step": 475 + }, + { + "epoch": 0.08795371343442158, + "grad_norm": 0.09052850306034088, + "learning_rate": 1.94672131147541e-05, + "loss": 0.7818104028701782, + "step": 476 + }, + { + "epoch": 0.08813849014331744, + "grad_norm": 0.10562017560005188, + "learning_rate": 1.9508196721311475e-05, + "loss": 0.8447665572166443, + "step": 477 + }, + { + "epoch": 0.08832326685221328, + "grad_norm": 0.10511931777000427, + "learning_rate": 1.9549180327868854e-05, + "loss": 1.037174940109253, + "step": 478 + }, + { + "epoch": 0.08850804356110913, + "grad_norm": 0.11579558998346329, + "learning_rate": 1.9590163934426232e-05, + "loss": 0.9202028512954712, + "step": 479 + }, + { + "epoch": 0.08869282027000497, + "grad_norm": 0.10364022850990295, + "learning_rate": 1.9631147540983607e-05, + "loss": 0.9401568174362183, + "step": 480 + }, + { + "epoch": 0.08887759697890081, + "grad_norm": 0.08251364529132843, + "learning_rate": 1.9672131147540985e-05, + "loss": 0.5896936655044556, + "step": 481 + }, + { + "epoch": 0.08906237368779665, + "grad_norm": 0.10294211655855179, + "learning_rate": 1.971311475409836e-05, + "loss": 0.7742730379104614, + "step": 482 + }, + { + "epoch": 0.0892471503966925, + "grad_norm": 0.11383319646120071, + "learning_rate": 1.975409836065574e-05, + "loss": 1.1207690238952637, + "step": 483 + }, + { + "epoch": 0.08943192710558834, + "grad_norm": 0.09466154128313065, + "learning_rate": 1.9795081967213117e-05, + "loss": 0.7100986838340759, + "step": 484 + }, + { + "epoch": 0.08961670381448418, + "grad_norm": 0.10002315789461136, + "learning_rate": 1.9836065573770492e-05, + "loss": 0.967641294002533, + "step": 485 + }, + { + "epoch": 0.08980148052338002, + "grad_norm": 0.1255611926317215, + "learning_rate": 1.987704918032787e-05, + "loss": 0.9620874524116516, + "step": 486 + }, + { + "epoch": 0.08998625723227587, + "grad_norm": 0.09591948240995407, + "learning_rate": 1.991803278688525e-05, + "loss": 0.9446707367897034, + "step": 487 + }, + { + "epoch": 0.09017103394117172, + "grad_norm": 0.11456209421157837, + "learning_rate": 1.9959016393442627e-05, + "loss": 1.0979769229888916, + "step": 488 + }, + { + "epoch": 0.09035581065006756, + "grad_norm": 0.10334254056215286, + "learning_rate": 2e-05, + "loss": 0.9240891337394714, + "step": 489 + }, + { + "epoch": 0.0905405873589634, + "grad_norm": 0.09890230000019073, + "learning_rate": 1.9999999801015645e-05, + "loss": 0.8223688006401062, + "step": 490 + }, + { + "epoch": 0.09072536406785925, + "grad_norm": 0.10173583775758743, + "learning_rate": 1.9999999204062582e-05, + "loss": 0.8781738877296448, + "step": 491 + }, + { + "epoch": 0.09091014077675509, + "grad_norm": 0.09750431776046753, + "learning_rate": 1.9999998209140837e-05, + "loss": 0.8199716806411743, + "step": 492 + }, + { + "epoch": 0.09109491748565093, + "grad_norm": 0.09942717105150223, + "learning_rate": 1.999999681625045e-05, + "loss": 0.7402327656745911, + "step": 493 + }, + { + "epoch": 0.09127969419454678, + "grad_norm": 0.09399393945932388, + "learning_rate": 1.999999502539147e-05, + "loss": 0.9684867262840271, + "step": 494 + }, + { + "epoch": 0.09146447090344262, + "grad_norm": 0.08668413758277893, + "learning_rate": 1.999999283656398e-05, + "loss": 0.6834291219711304, + "step": 495 + }, + { + "epoch": 0.09164924761233846, + "grad_norm": 0.10017100721597672, + "learning_rate": 1.999999024976806e-05, + "loss": 0.9483546018600464, + "step": 496 + }, + { + "epoch": 0.0918340243212343, + "grad_norm": 0.0832497626543045, + "learning_rate": 1.9999987265003815e-05, + "loss": 0.5636377930641174, + "step": 497 + }, + { + "epoch": 0.09201880103013015, + "grad_norm": 0.10710781812667847, + "learning_rate": 1.999998388227136e-05, + "loss": 0.8883796334266663, + "step": 498 + }, + { + "epoch": 0.092203577739026, + "grad_norm": 0.10211720317602158, + "learning_rate": 1.9999980101570835e-05, + "loss": 0.7937446236610413, + "step": 499 + }, + { + "epoch": 0.09238835444792184, + "grad_norm": 0.08369703590869904, + "learning_rate": 1.9999975922902386e-05, + "loss": 0.7668294310569763, + "step": 500 + }, + { + "epoch": 0.09238835444792184, + "eval_loss": 0.9341711401939392, + "eval_runtime": 170.5358, + "eval_samples_per_second": 106.893, + "eval_steps_per_second": 13.364, + "step": 500 + }, + { + "epoch": 0.09257313115681769, + "grad_norm": 0.10618746280670166, + "learning_rate": 1.999997134626618e-05, + "loss": 0.7023143768310547, + "step": 501 + }, + { + "epoch": 0.09275790786571353, + "grad_norm": 0.09422959387302399, + "learning_rate": 1.9999966371662403e-05, + "loss": 0.9204212427139282, + "step": 502 + }, + { + "epoch": 0.09294268457460937, + "grad_norm": 0.09210536628961563, + "learning_rate": 1.999996099909125e-05, + "loss": 0.8752090334892273, + "step": 503 + }, + { + "epoch": 0.09312746128350521, + "grad_norm": 0.10762644559144974, + "learning_rate": 1.9999955228552934e-05, + "loss": 0.8156858682632446, + "step": 504 + }, + { + "epoch": 0.09331223799240106, + "grad_norm": 0.12836356461048126, + "learning_rate": 1.999994906004769e-05, + "loss": 1.0994911193847656, + "step": 505 + }, + { + "epoch": 0.0934970147012969, + "grad_norm": 0.12129420787096024, + "learning_rate": 1.9999942493575754e-05, + "loss": 0.8530201315879822, + "step": 506 + }, + { + "epoch": 0.09368179141019274, + "grad_norm": 0.0979141965508461, + "learning_rate": 1.9999935529137393e-05, + "loss": 0.7191640138626099, + "step": 507 + }, + { + "epoch": 0.09386656811908858, + "grad_norm": 0.08052355796098709, + "learning_rate": 1.9999928166732884e-05, + "loss": 0.9695467948913574, + "step": 508 + }, + { + "epoch": 0.09405134482798443, + "grad_norm": 0.09615960717201233, + "learning_rate": 1.999992040636252e-05, + "loss": 0.7721450328826904, + "step": 509 + }, + { + "epoch": 0.09423612153688028, + "grad_norm": 0.12196368724107742, + "learning_rate": 1.9999912248026613e-05, + "loss": 0.9920752644538879, + "step": 510 + }, + { + "epoch": 0.09442089824577612, + "grad_norm": 0.08245757967233658, + "learning_rate": 1.9999903691725478e-05, + "loss": 0.8075022101402283, + "step": 511 + }, + { + "epoch": 0.09460567495467197, + "grad_norm": 0.11245324462652206, + "learning_rate": 1.9999894737459466e-05, + "loss": 0.8487539291381836, + "step": 512 + }, + { + "epoch": 0.09479045166356781, + "grad_norm": 0.08481521904468536, + "learning_rate": 1.9999885385228928e-05, + "loss": 0.6565818786621094, + "step": 513 + }, + { + "epoch": 0.09497522837246365, + "grad_norm": 0.11278005689382553, + "learning_rate": 1.9999875635034237e-05, + "loss": 0.8271149396896362, + "step": 514 + }, + { + "epoch": 0.0951600050813595, + "grad_norm": 0.09750741720199585, + "learning_rate": 1.9999865486875784e-05, + "loss": 0.8503895401954651, + "step": 515 + }, + { + "epoch": 0.09534478179025534, + "grad_norm": 0.11001479625701904, + "learning_rate": 1.9999854940753964e-05, + "loss": 0.9437558054924011, + "step": 516 + }, + { + "epoch": 0.09552955849915118, + "grad_norm": 0.12122728675603867, + "learning_rate": 1.999984399666921e-05, + "loss": 0.9913941025733948, + "step": 517 + }, + { + "epoch": 0.09571433520804702, + "grad_norm": 0.11051741242408752, + "learning_rate": 1.9999832654621945e-05, + "loss": 0.9222496747970581, + "step": 518 + }, + { + "epoch": 0.09589911191694286, + "grad_norm": 0.08907787501811981, + "learning_rate": 1.999982091461263e-05, + "loss": 0.6398655772209167, + "step": 519 + }, + { + "epoch": 0.0960838886258387, + "grad_norm": 0.10980657488107681, + "learning_rate": 1.9999808776641724e-05, + "loss": 1.0253318548202515, + "step": 520 + }, + { + "epoch": 0.09626866533473456, + "grad_norm": 0.09272947907447815, + "learning_rate": 1.9999796240709718e-05, + "loss": 0.8119353652000427, + "step": 521 + }, + { + "epoch": 0.0964534420436304, + "grad_norm": 0.07884709537029266, + "learning_rate": 1.9999783306817104e-05, + "loss": 0.6862602829933167, + "step": 522 + }, + { + "epoch": 0.09663821875252625, + "grad_norm": 0.08118434995412827, + "learning_rate": 1.99997699749644e-05, + "loss": 0.580864429473877, + "step": 523 + }, + { + "epoch": 0.09682299546142209, + "grad_norm": 0.09141118079423904, + "learning_rate": 1.999975624515214e-05, + "loss": 0.6712226271629333, + "step": 524 + }, + { + "epoch": 0.09700777217031793, + "grad_norm": 0.11130113899707794, + "learning_rate": 1.9999742117380863e-05, + "loss": 0.8245023488998413, + "step": 525 + }, + { + "epoch": 0.09719254887921377, + "grad_norm": 0.08998782932758331, + "learning_rate": 1.9999727591651136e-05, + "loss": 0.7681167125701904, + "step": 526 + }, + { + "epoch": 0.09737732558810962, + "grad_norm": 0.10016212612390518, + "learning_rate": 1.9999712667963535e-05, + "loss": 0.8970088958740234, + "step": 527 + }, + { + "epoch": 0.09756210229700546, + "grad_norm": 0.0988573282957077, + "learning_rate": 1.9999697346318653e-05, + "loss": 1.0120849609375, + "step": 528 + }, + { + "epoch": 0.0977468790059013, + "grad_norm": 0.0811084657907486, + "learning_rate": 1.9999681626717105e-05, + "loss": 0.6222402453422546, + "step": 529 + }, + { + "epoch": 0.09793165571479714, + "grad_norm": 0.11052072793245316, + "learning_rate": 1.9999665509159513e-05, + "loss": 0.8718100786209106, + "step": 530 + }, + { + "epoch": 0.09811643242369299, + "grad_norm": 0.10513053089380264, + "learning_rate": 1.999964899364652e-05, + "loss": 0.8752185702323914, + "step": 531 + }, + { + "epoch": 0.09830120913258884, + "grad_norm": 0.09590096026659012, + "learning_rate": 1.999963208017878e-05, + "loss": 0.9732754230499268, + "step": 532 + }, + { + "epoch": 0.09848598584148469, + "grad_norm": 0.07648955285549164, + "learning_rate": 1.9999614768756968e-05, + "loss": 0.5848802924156189, + "step": 533 + }, + { + "epoch": 0.09867076255038053, + "grad_norm": 0.09366374462842941, + "learning_rate": 1.9999597059381773e-05, + "loss": 0.7161018252372742, + "step": 534 + }, + { + "epoch": 0.09885553925927637, + "grad_norm": 0.10417844355106354, + "learning_rate": 1.9999578952053896e-05, + "loss": 0.7374823689460754, + "step": 535 + }, + { + "epoch": 0.09904031596817221, + "grad_norm": 0.07650736719369888, + "learning_rate": 1.999956044677407e-05, + "loss": 0.678923487663269, + "step": 536 + }, + { + "epoch": 0.09922509267706806, + "grad_norm": 0.07849035412073135, + "learning_rate": 1.9999541543543017e-05, + "loss": 0.73483806848526, + "step": 537 + }, + { + "epoch": 0.0994098693859639, + "grad_norm": 0.09365969151258469, + "learning_rate": 1.9999522242361494e-05, + "loss": 0.6670161485671997, + "step": 538 + }, + { + "epoch": 0.09959464609485974, + "grad_norm": 0.08298808336257935, + "learning_rate": 1.9999502543230272e-05, + "loss": 0.7895097136497498, + "step": 539 + }, + { + "epoch": 0.09977942280375558, + "grad_norm": 0.09531105309724808, + "learning_rate": 1.9999482446150137e-05, + "loss": 0.7203987240791321, + "step": 540 + }, + { + "epoch": 0.09996419951265142, + "grad_norm": 0.10739698261022568, + "learning_rate": 1.999946195112188e-05, + "loss": 0.8619695901870728, + "step": 541 + }, + { + "epoch": 0.10014897622154727, + "grad_norm": 0.10465580970048904, + "learning_rate": 1.9999441058146324e-05, + "loss": 1.1077197790145874, + "step": 542 + }, + { + "epoch": 0.10033375293044312, + "grad_norm": 0.11346356570720673, + "learning_rate": 1.9999419767224296e-05, + "loss": 1.1599786281585693, + "step": 543 + }, + { + "epoch": 0.10051852963933897, + "grad_norm": 0.09236248582601547, + "learning_rate": 1.9999398078356648e-05, + "loss": 0.8175798654556274, + "step": 544 + }, + { + "epoch": 0.10070330634823481, + "grad_norm": 0.07506538927555084, + "learning_rate": 1.9999375991544237e-05, + "loss": 0.7683030962944031, + "step": 545 + }, + { + "epoch": 0.10088808305713065, + "grad_norm": 0.08747687190771103, + "learning_rate": 1.999935350678795e-05, + "loss": 0.7463568449020386, + "step": 546 + }, + { + "epoch": 0.1010728597660265, + "grad_norm": 0.11351469159126282, + "learning_rate": 1.9999330624088677e-05, + "loss": 0.9395483136177063, + "step": 547 + }, + { + "epoch": 0.10125763647492234, + "grad_norm": 0.09075150638818741, + "learning_rate": 1.9999307343447326e-05, + "loss": 0.854822039604187, + "step": 548 + }, + { + "epoch": 0.10144241318381818, + "grad_norm": 0.08667241036891937, + "learning_rate": 1.9999283664864828e-05, + "loss": 0.698087215423584, + "step": 549 + }, + { + "epoch": 0.10162718989271402, + "grad_norm": 0.07823988050222397, + "learning_rate": 1.9999259588342124e-05, + "loss": 0.7376258969306946, + "step": 550 + }, + { + "epoch": 0.10181196660160986, + "grad_norm": 0.10926726460456848, + "learning_rate": 1.999923511388017e-05, + "loss": 1.0011591911315918, + "step": 551 + }, + { + "epoch": 0.1019967433105057, + "grad_norm": 0.1054510846734047, + "learning_rate": 1.9999210241479946e-05, + "loss": 1.009000301361084, + "step": 552 + }, + { + "epoch": 0.10218152001940155, + "grad_norm": 0.08677765727043152, + "learning_rate": 1.9999184971142433e-05, + "loss": 0.6511209011077881, + "step": 553 + }, + { + "epoch": 0.1023662967282974, + "grad_norm": 0.09379676729440689, + "learning_rate": 1.9999159302868646e-05, + "loss": 0.9610671997070312, + "step": 554 + }, + { + "epoch": 0.10255107343719325, + "grad_norm": 0.09060139954090118, + "learning_rate": 1.99991332366596e-05, + "loss": 0.883156955242157, + "step": 555 + }, + { + "epoch": 0.10273585014608909, + "grad_norm": 0.10240011662244797, + "learning_rate": 1.9999106772516334e-05, + "loss": 1.0079379081726074, + "step": 556 + }, + { + "epoch": 0.10292062685498493, + "grad_norm": 0.09460246562957764, + "learning_rate": 1.9999079910439905e-05, + "loss": 0.8006080985069275, + "step": 557 + }, + { + "epoch": 0.10310540356388077, + "grad_norm": 0.09683685004711151, + "learning_rate": 1.9999052650431374e-05, + "loss": 0.8120759129524231, + "step": 558 + }, + { + "epoch": 0.10329018027277662, + "grad_norm": 0.1001666709780693, + "learning_rate": 1.9999024992491837e-05, + "loss": 0.951696515083313, + "step": 559 + }, + { + "epoch": 0.10347495698167246, + "grad_norm": 0.08484689146280289, + "learning_rate": 1.999899693662238e-05, + "loss": 0.6899352073669434, + "step": 560 + }, + { + "epoch": 0.1036597336905683, + "grad_norm": 0.10029439628124237, + "learning_rate": 1.9998968482824134e-05, + "loss": 0.9168360829353333, + "step": 561 + }, + { + "epoch": 0.10384451039946414, + "grad_norm": 0.09970587491989136, + "learning_rate": 1.999893963109822e-05, + "loss": 0.9419733285903931, + "step": 562 + }, + { + "epoch": 0.10402928710835999, + "grad_norm": 0.0891055092215538, + "learning_rate": 1.9998910381445794e-05, + "loss": 0.762864351272583, + "step": 563 + }, + { + "epoch": 0.10421406381725583, + "grad_norm": 0.0846485048532486, + "learning_rate": 1.999888073386802e-05, + "loss": 0.7737445831298828, + "step": 564 + }, + { + "epoch": 0.10439884052615168, + "grad_norm": 0.08326343446969986, + "learning_rate": 1.999885068836607e-05, + "loss": 0.7202712893486023, + "step": 565 + }, + { + "epoch": 0.10458361723504753, + "grad_norm": 0.08514825999736786, + "learning_rate": 1.999882024494115e-05, + "loss": 0.8478882312774658, + "step": 566 + }, + { + "epoch": 0.10476839394394337, + "grad_norm": 0.07370934635400772, + "learning_rate": 1.9998789403594464e-05, + "loss": 0.680758535861969, + "step": 567 + }, + { + "epoch": 0.10495317065283921, + "grad_norm": 0.09160866588354111, + "learning_rate": 1.9998758164327242e-05, + "loss": 0.8851611018180847, + "step": 568 + }, + { + "epoch": 0.10513794736173505, + "grad_norm": 0.08800017088651657, + "learning_rate": 1.999872652714073e-05, + "loss": 0.7214294075965881, + "step": 569 + }, + { + "epoch": 0.1053227240706309, + "grad_norm": 0.07336652278900146, + "learning_rate": 1.999869449203618e-05, + "loss": 0.6500799655914307, + "step": 570 + }, + { + "epoch": 0.10550750077952674, + "grad_norm": 0.09790507704019547, + "learning_rate": 1.9998662059014874e-05, + "loss": 0.9606900811195374, + "step": 571 + }, + { + "epoch": 0.10569227748842258, + "grad_norm": 0.08473934978246689, + "learning_rate": 1.99986292280781e-05, + "loss": 0.7611656785011292, + "step": 572 + }, + { + "epoch": 0.10587705419731842, + "grad_norm": 0.08579279482364655, + "learning_rate": 1.999859599922716e-05, + "loss": 0.6954261660575867, + "step": 573 + }, + { + "epoch": 0.10606183090621427, + "grad_norm": 0.08276309818029404, + "learning_rate": 1.9998562372463387e-05, + "loss": 0.6657392978668213, + "step": 574 + }, + { + "epoch": 0.10624660761511011, + "grad_norm": 0.07595104724168777, + "learning_rate": 1.9998528347788108e-05, + "loss": 0.7012154459953308, + "step": 575 + }, + { + "epoch": 0.10643138432400596, + "grad_norm": 0.09763680398464203, + "learning_rate": 1.9998493925202686e-05, + "loss": 0.8681825399398804, + "step": 576 + }, + { + "epoch": 0.10661616103290181, + "grad_norm": 0.09107891470193863, + "learning_rate": 1.9998459104708485e-05, + "loss": 0.8562954664230347, + "step": 577 + }, + { + "epoch": 0.10680093774179765, + "grad_norm": 0.10249120742082596, + "learning_rate": 1.999842388630689e-05, + "loss": 0.8481846451759338, + "step": 578 + }, + { + "epoch": 0.10698571445069349, + "grad_norm": 0.09755781292915344, + "learning_rate": 1.999838826999931e-05, + "loss": 0.7242237329483032, + "step": 579 + }, + { + "epoch": 0.10717049115958933, + "grad_norm": 0.06296950578689575, + "learning_rate": 1.9998352255787155e-05, + "loss": 0.6511966586112976, + "step": 580 + }, + { + "epoch": 0.10735526786848518, + "grad_norm": 0.08668170869350433, + "learning_rate": 1.9998315843671862e-05, + "loss": 0.8875470161437988, + "step": 581 + }, + { + "epoch": 0.10754004457738102, + "grad_norm": 0.10093273967504501, + "learning_rate": 1.9998279033654883e-05, + "loss": 0.8163881301879883, + "step": 582 + }, + { + "epoch": 0.10772482128627686, + "grad_norm": 0.08768948167562485, + "learning_rate": 1.9998241825737675e-05, + "loss": 0.7841170430183411, + "step": 583 + }, + { + "epoch": 0.1079095979951727, + "grad_norm": 0.10083579272031784, + "learning_rate": 1.9998204219921722e-05, + "loss": 0.8083724975585938, + "step": 584 + }, + { + "epoch": 0.10809437470406855, + "grad_norm": 0.0848889872431755, + "learning_rate": 1.9998166216208522e-05, + "loss": 0.6563320159912109, + "step": 585 + }, + { + "epoch": 0.10827915141296439, + "grad_norm": 0.08785545080900192, + "learning_rate": 1.999812781459959e-05, + "loss": 0.9830499291419983, + "step": 586 + }, + { + "epoch": 0.10846392812186025, + "grad_norm": 0.11273134499788284, + "learning_rate": 1.9998089015096445e-05, + "loss": 1.1327345371246338, + "step": 587 + }, + { + "epoch": 0.10864870483075609, + "grad_norm": 0.10496936738491058, + "learning_rate": 1.999804981770064e-05, + "loss": 0.9502683281898499, + "step": 588 + }, + { + "epoch": 0.10883348153965193, + "grad_norm": 0.07698635756969452, + "learning_rate": 1.9998010222413736e-05, + "loss": 0.5978910326957703, + "step": 589 + }, + { + "epoch": 0.10901825824854777, + "grad_norm": 0.11628872156143188, + "learning_rate": 1.9997970229237302e-05, + "loss": 0.9122211337089539, + "step": 590 + }, + { + "epoch": 0.10920303495744361, + "grad_norm": 0.10752473771572113, + "learning_rate": 1.9997929838172935e-05, + "loss": 1.009599208831787, + "step": 591 + }, + { + "epoch": 0.10938781166633946, + "grad_norm": 0.10253801196813583, + "learning_rate": 1.9997889049222233e-05, + "loss": 0.8574459552764893, + "step": 592 + }, + { + "epoch": 0.1095725883752353, + "grad_norm": 0.09937529265880585, + "learning_rate": 1.999784786238683e-05, + "loss": 0.8006525039672852, + "step": 593 + }, + { + "epoch": 0.10975736508413114, + "grad_norm": 0.10417422652244568, + "learning_rate": 1.9997806277668364e-05, + "loss": 1.0326251983642578, + "step": 594 + }, + { + "epoch": 0.10994214179302698, + "grad_norm": 0.08065392822027206, + "learning_rate": 1.9997764295068486e-05, + "loss": 0.7957016825675964, + "step": 595 + }, + { + "epoch": 0.11012691850192283, + "grad_norm": 0.12417735159397125, + "learning_rate": 1.9997721914588867e-05, + "loss": 1.0323731899261475, + "step": 596 + }, + { + "epoch": 0.11031169521081868, + "grad_norm": 0.08042848110198975, + "learning_rate": 1.9997679136231195e-05, + "loss": 0.8618655204772949, + "step": 597 + }, + { + "epoch": 0.11049647191971453, + "grad_norm": 0.12250402569770813, + "learning_rate": 1.999763595999717e-05, + "loss": 1.1520699262619019, + "step": 598 + }, + { + "epoch": 0.11068124862861037, + "grad_norm": 0.08132418990135193, + "learning_rate": 1.9997592385888517e-05, + "loss": 0.7422507405281067, + "step": 599 + }, + { + "epoch": 0.11086602533750621, + "grad_norm": 0.10508530586957932, + "learning_rate": 1.9997548413906964e-05, + "loss": 0.7894071340560913, + "step": 600 + }, + { + "epoch": 0.11105080204640205, + "grad_norm": 0.10218667984008789, + "learning_rate": 1.999750404405426e-05, + "loss": 0.8501331210136414, + "step": 601 + }, + { + "epoch": 0.1112355787552979, + "grad_norm": 0.10211113095283508, + "learning_rate": 1.9997459276332174e-05, + "loss": 0.9509809017181396, + "step": 602 + }, + { + "epoch": 0.11142035546419374, + "grad_norm": 0.10265897959470749, + "learning_rate": 1.9997414110742488e-05, + "loss": 1.0141488313674927, + "step": 603 + }, + { + "epoch": 0.11160513217308958, + "grad_norm": 0.10459209978580475, + "learning_rate": 1.9997368547286996e-05, + "loss": 0.9121615886688232, + "step": 604 + }, + { + "epoch": 0.11178990888198542, + "grad_norm": 0.09054961055517197, + "learning_rate": 1.9997322585967516e-05, + "loss": 0.808859646320343, + "step": 605 + }, + { + "epoch": 0.11197468559088126, + "grad_norm": 0.09328276664018631, + "learning_rate": 1.9997276226785872e-05, + "loss": 0.8714438080787659, + "step": 606 + }, + { + "epoch": 0.11215946229977711, + "grad_norm": 0.10548422485589981, + "learning_rate": 1.9997229469743915e-05, + "loss": 0.9935821294784546, + "step": 607 + }, + { + "epoch": 0.11234423900867296, + "grad_norm": 0.09083138406276703, + "learning_rate": 1.99971823148435e-05, + "loss": 0.7940509915351868, + "step": 608 + }, + { + "epoch": 0.1125290157175688, + "grad_norm": 0.11800063401460648, + "learning_rate": 1.999713476208651e-05, + "loss": 1.1958283185958862, + "step": 609 + }, + { + "epoch": 0.11271379242646465, + "grad_norm": 0.10174321383237839, + "learning_rate": 1.999708681147483e-05, + "loss": 0.7750235199928284, + "step": 610 + }, + { + "epoch": 0.11289856913536049, + "grad_norm": 0.07584797590970993, + "learning_rate": 1.9997038463010373e-05, + "loss": 0.632562518119812, + "step": 611 + }, + { + "epoch": 0.11308334584425633, + "grad_norm": 0.08101744204759598, + "learning_rate": 1.999698971669506e-05, + "loss": 0.5663021206855774, + "step": 612 + }, + { + "epoch": 0.11326812255315218, + "grad_norm": 0.09835753589868546, + "learning_rate": 1.999694057253083e-05, + "loss": 0.905450165271759, + "step": 613 + }, + { + "epoch": 0.11345289926204802, + "grad_norm": 0.09858648478984833, + "learning_rate": 1.999689103051965e-05, + "loss": 0.92292320728302, + "step": 614 + }, + { + "epoch": 0.11363767597094386, + "grad_norm": 0.06762873381376266, + "learning_rate": 1.9996841090663476e-05, + "loss": 0.5619035363197327, + "step": 615 + }, + { + "epoch": 0.1138224526798397, + "grad_norm": 0.09497879445552826, + "learning_rate": 1.9996790752964305e-05, + "loss": 0.7997409701347351, + "step": 616 + }, + { + "epoch": 0.11400722938873555, + "grad_norm": 0.09728559106588364, + "learning_rate": 1.9996740017424143e-05, + "loss": 1.0197670459747314, + "step": 617 + }, + { + "epoch": 0.11419200609763139, + "grad_norm": 0.09417632222175598, + "learning_rate": 1.9996688884044995e-05, + "loss": 0.8190154433250427, + "step": 618 + }, + { + "epoch": 0.11437678280652724, + "grad_norm": 0.08123410493135452, + "learning_rate": 1.999663735282891e-05, + "loss": 0.7777723670005798, + "step": 619 + }, + { + "epoch": 0.11456155951542309, + "grad_norm": 0.08742178231477737, + "learning_rate": 1.9996585423777936e-05, + "loss": 0.7551705837249756, + "step": 620 + }, + { + "epoch": 0.11474633622431893, + "grad_norm": 0.07352913916110992, + "learning_rate": 1.9996533096894133e-05, + "loss": 0.6663444638252258, + "step": 621 + }, + { + "epoch": 0.11493111293321477, + "grad_norm": 0.07812623679637909, + "learning_rate": 1.999648037217959e-05, + "loss": 0.7678573131561279, + "step": 622 + }, + { + "epoch": 0.11511588964211061, + "grad_norm": 0.0896257683634758, + "learning_rate": 1.9996427249636403e-05, + "loss": 0.7776379585266113, + "step": 623 + }, + { + "epoch": 0.11530066635100646, + "grad_norm": 0.09168458729982376, + "learning_rate": 1.9996373729266687e-05, + "loss": 0.828106701374054, + "step": 624 + }, + { + "epoch": 0.1154854430599023, + "grad_norm": 0.09804865717887878, + "learning_rate": 1.999631981107257e-05, + "loss": 0.8594550490379333, + "step": 625 + }, + { + "epoch": 0.11567021976879814, + "grad_norm": 0.11177453398704529, + "learning_rate": 1.99962654950562e-05, + "loss": 0.9018521904945374, + "step": 626 + }, + { + "epoch": 0.11585499647769398, + "grad_norm": 0.10924191772937775, + "learning_rate": 1.9996210781219738e-05, + "loss": 0.9099810719490051, + "step": 627 + }, + { + "epoch": 0.11603977318658983, + "grad_norm": 0.08882026374340057, + "learning_rate": 1.999615566956536e-05, + "loss": 0.7561041116714478, + "step": 628 + }, + { + "epoch": 0.11622454989548567, + "grad_norm": 0.1086808294057846, + "learning_rate": 1.999610016009526e-05, + "loss": 1.036689043045044, + "step": 629 + }, + { + "epoch": 0.11640932660438152, + "grad_norm": 0.08189629763364792, + "learning_rate": 1.9996044252811647e-05, + "loss": 0.7450594305992126, + "step": 630 + }, + { + "epoch": 0.11659410331327737, + "grad_norm": 0.08234315365552902, + "learning_rate": 1.9995987947716746e-05, + "loss": 0.6694872379302979, + "step": 631 + }, + { + "epoch": 0.11677888002217321, + "grad_norm": 0.07986690104007721, + "learning_rate": 1.99959312448128e-05, + "loss": 0.6215049028396606, + "step": 632 + }, + { + "epoch": 0.11696365673106905, + "grad_norm": 0.08140669018030167, + "learning_rate": 1.9995874144102065e-05, + "loss": 0.6296094655990601, + "step": 633 + }, + { + "epoch": 0.1171484334399649, + "grad_norm": 0.07857182621955872, + "learning_rate": 1.9995816645586808e-05, + "loss": 0.7098954916000366, + "step": 634 + }, + { + "epoch": 0.11733321014886074, + "grad_norm": 0.10621404647827148, + "learning_rate": 1.9995758749269324e-05, + "loss": 1.011143445968628, + "step": 635 + }, + { + "epoch": 0.11751798685775658, + "grad_norm": 0.09262366592884064, + "learning_rate": 1.9995700455151913e-05, + "loss": 0.7535731792449951, + "step": 636 + }, + { + "epoch": 0.11770276356665242, + "grad_norm": 0.07685394585132599, + "learning_rate": 1.99956417632369e-05, + "loss": 0.7163308262825012, + "step": 637 + }, + { + "epoch": 0.11788754027554826, + "grad_norm": 0.10388977080583572, + "learning_rate": 1.9995582673526613e-05, + "loss": 0.8714351654052734, + "step": 638 + }, + { + "epoch": 0.1180723169844441, + "grad_norm": 0.1180369183421135, + "learning_rate": 1.999552318602341e-05, + "loss": 1.1385008096694946, + "step": 639 + }, + { + "epoch": 0.11825709369333995, + "grad_norm": 0.08246862888336182, + "learning_rate": 1.9995463300729653e-05, + "loss": 0.6757149696350098, + "step": 640 + }, + { + "epoch": 0.1184418704022358, + "grad_norm": 0.116151362657547, + "learning_rate": 1.999540301764773e-05, + "loss": 1.0220239162445068, + "step": 641 + }, + { + "epoch": 0.11862664711113165, + "grad_norm": 0.09925840049982071, + "learning_rate": 1.9995342336780042e-05, + "loss": 0.8448605537414551, + "step": 642 + }, + { + "epoch": 0.11881142382002749, + "grad_norm": 0.08795686811208725, + "learning_rate": 1.9995281258128994e-05, + "loss": 0.9142518043518066, + "step": 643 + }, + { + "epoch": 0.11899620052892333, + "grad_norm": 0.07408778369426727, + "learning_rate": 1.999521978169703e-05, + "loss": 0.8021790385246277, + "step": 644 + }, + { + "epoch": 0.11918097723781917, + "grad_norm": 0.08842435479164124, + "learning_rate": 1.9995157907486587e-05, + "loss": 0.994310736656189, + "step": 645 + }, + { + "epoch": 0.11936575394671502, + "grad_norm": 0.10104726999998093, + "learning_rate": 1.999509563550013e-05, + "loss": 1.126815915107727, + "step": 646 + }, + { + "epoch": 0.11955053065561086, + "grad_norm": 0.1172916516661644, + "learning_rate": 1.9995032965740137e-05, + "loss": 1.0974340438842773, + "step": 647 + }, + { + "epoch": 0.1197353073645067, + "grad_norm": 0.08660288900136948, + "learning_rate": 1.9994969898209102e-05, + "loss": 0.7137119770050049, + "step": 648 + }, + { + "epoch": 0.11992008407340254, + "grad_norm": 0.09413591772317886, + "learning_rate": 1.9994906432909537e-05, + "loss": 0.8359218239784241, + "step": 649 + }, + { + "epoch": 0.12010486078229839, + "grad_norm": 0.07111218571662903, + "learning_rate": 1.9994842569843965e-05, + "loss": 0.6207714676856995, + "step": 650 + }, + { + "epoch": 0.12028963749119423, + "grad_norm": 0.07730361819267273, + "learning_rate": 1.999477830901493e-05, + "loss": 0.6430050134658813, + "step": 651 + }, + { + "epoch": 0.12047441420009009, + "grad_norm": 0.09931548684835434, + "learning_rate": 1.9994713650424985e-05, + "loss": 0.8501662611961365, + "step": 652 + }, + { + "epoch": 0.12065919090898593, + "grad_norm": 0.12040199339389801, + "learning_rate": 1.9994648594076706e-05, + "loss": 0.983227014541626, + "step": 653 + }, + { + "epoch": 0.12084396761788177, + "grad_norm": 0.10506908595561981, + "learning_rate": 1.9994583139972686e-05, + "loss": 0.8017892241477966, + "step": 654 + }, + { + "epoch": 0.12102874432677761, + "grad_norm": 0.0854715034365654, + "learning_rate": 1.9994517288115522e-05, + "loss": 0.7763564586639404, + "step": 655 + }, + { + "epoch": 0.12121352103567345, + "grad_norm": 0.10104908049106598, + "learning_rate": 1.999445103850784e-05, + "loss": 0.8201025128364563, + "step": 656 + }, + { + "epoch": 0.1213982977445693, + "grad_norm": 0.1063561663031578, + "learning_rate": 1.9994384391152276e-05, + "loss": 0.9748373031616211, + "step": 657 + }, + { + "epoch": 0.12158307445346514, + "grad_norm": 0.08973393589258194, + "learning_rate": 1.999431734605148e-05, + "loss": 0.7709097862243652, + "step": 658 + }, + { + "epoch": 0.12176785116236098, + "grad_norm": 0.09718851745128632, + "learning_rate": 1.9994249903208125e-05, + "loss": 0.8377055525779724, + "step": 659 + }, + { + "epoch": 0.12195262787125682, + "grad_norm": 0.10766126215457916, + "learning_rate": 1.999418206262489e-05, + "loss": 0.9289681315422058, + "step": 660 + }, + { + "epoch": 0.12213740458015267, + "grad_norm": 0.09482572227716446, + "learning_rate": 1.9994113824304476e-05, + "loss": 0.8877840638160706, + "step": 661 + }, + { + "epoch": 0.12232218128904851, + "grad_norm": 0.06976597756147385, + "learning_rate": 1.99940451882496e-05, + "loss": 0.5023065209388733, + "step": 662 + }, + { + "epoch": 0.12250695799794437, + "grad_norm": 0.07548683881759644, + "learning_rate": 1.9993976154462997e-05, + "loss": 0.7584120631217957, + "step": 663 + }, + { + "epoch": 0.12269173470684021, + "grad_norm": 0.09401997923851013, + "learning_rate": 1.9993906722947406e-05, + "loss": 0.8495917320251465, + "step": 664 + }, + { + "epoch": 0.12287651141573605, + "grad_norm": 0.0940614566206932, + "learning_rate": 1.9993836893705594e-05, + "loss": 0.8613463640213013, + "step": 665 + }, + { + "epoch": 0.12306128812463189, + "grad_norm": 0.0905088260769844, + "learning_rate": 1.999376666674034e-05, + "loss": 0.7595023512840271, + "step": 666 + }, + { + "epoch": 0.12324606483352774, + "grad_norm": 0.08580617606639862, + "learning_rate": 1.9993696042054437e-05, + "loss": 0.9611132144927979, + "step": 667 + }, + { + "epoch": 0.12343084154242358, + "grad_norm": 0.09921999275684357, + "learning_rate": 1.9993625019650703e-05, + "loss": 0.7061740756034851, + "step": 668 + }, + { + "epoch": 0.12361561825131942, + "grad_norm": 0.09604513645172119, + "learning_rate": 1.999355359953196e-05, + "loss": 0.8366731405258179, + "step": 669 + }, + { + "epoch": 0.12380039496021526, + "grad_norm": 0.08084668219089508, + "learning_rate": 1.9993481781701044e-05, + "loss": 0.6480671167373657, + "step": 670 + }, + { + "epoch": 0.1239851716691111, + "grad_norm": 0.07487687468528748, + "learning_rate": 1.9993409566160822e-05, + "loss": 0.6911617517471313, + "step": 671 + }, + { + "epoch": 0.12416994837800695, + "grad_norm": 0.09577896445989609, + "learning_rate": 1.9993336952914165e-05, + "loss": 0.9893839359283447, + "step": 672 + }, + { + "epoch": 0.12435472508690279, + "grad_norm": 0.10856965184211731, + "learning_rate": 1.999326394196396e-05, + "loss": 0.9420531392097473, + "step": 673 + }, + { + "epoch": 0.12453950179579865, + "grad_norm": 0.1150006502866745, + "learning_rate": 1.9993190533313116e-05, + "loss": 0.9741981029510498, + "step": 674 + }, + { + "epoch": 0.12472427850469449, + "grad_norm": 0.10523250699043274, + "learning_rate": 1.9993116726964554e-05, + "loss": 0.9235258102416992, + "step": 675 + }, + { + "epoch": 0.12490905521359033, + "grad_norm": 0.07580921053886414, + "learning_rate": 1.9993042522921212e-05, + "loss": 0.635983943939209, + "step": 676 + }, + { + "epoch": 0.12509383192248616, + "grad_norm": 0.0988108292222023, + "learning_rate": 1.999296792118604e-05, + "loss": 0.740143358707428, + "step": 677 + }, + { + "epoch": 0.12527860863138202, + "grad_norm": 0.10186348855495453, + "learning_rate": 1.9992892921762007e-05, + "loss": 1.0258514881134033, + "step": 678 + }, + { + "epoch": 0.12546338534027784, + "grad_norm": 0.09436734020709991, + "learning_rate": 1.9992817524652102e-05, + "loss": 0.8242838978767395, + "step": 679 + }, + { + "epoch": 0.1256481620491737, + "grad_norm": 0.08851488679647446, + "learning_rate": 1.9992741729859323e-05, + "loss": 0.8003968000411987, + "step": 680 + }, + { + "epoch": 0.12583293875806956, + "grad_norm": 0.10243381559848785, + "learning_rate": 1.9992665537386687e-05, + "loss": 0.7095505595207214, + "step": 681 + }, + { + "epoch": 0.12601771546696539, + "grad_norm": 0.08683772385120392, + "learning_rate": 1.9992588947237226e-05, + "loss": 0.7240039110183716, + "step": 682 + }, + { + "epoch": 0.12620249217586124, + "grad_norm": 0.07817398011684418, + "learning_rate": 1.9992511959413984e-05, + "loss": 0.8571678996086121, + "step": 683 + }, + { + "epoch": 0.12638726888475707, + "grad_norm": 0.0839422419667244, + "learning_rate": 1.9992434573920033e-05, + "loss": 0.757088840007782, + "step": 684 + }, + { + "epoch": 0.12657204559365293, + "grad_norm": 0.09364355355501175, + "learning_rate": 1.9992356790758445e-05, + "loss": 0.8351529240608215, + "step": 685 + }, + { + "epoch": 0.12675682230254876, + "grad_norm": 0.10530338436365128, + "learning_rate": 1.999227860993232e-05, + "loss": 0.9548841118812561, + "step": 686 + }, + { + "epoch": 0.1269415990114446, + "grad_norm": 0.1037643700838089, + "learning_rate": 1.9992200031444768e-05, + "loss": 0.7979400753974915, + "step": 687 + }, + { + "epoch": 0.12712637572034044, + "grad_norm": 0.10997983813285828, + "learning_rate": 1.9992121055298917e-05, + "loss": 1.0404939651489258, + "step": 688 + }, + { + "epoch": 0.1273111524292363, + "grad_norm": 0.09578070789575577, + "learning_rate": 1.9992041681497908e-05, + "loss": 0.8999356031417847, + "step": 689 + }, + { + "epoch": 0.12749592913813212, + "grad_norm": 0.08665809035301208, + "learning_rate": 1.99919619100449e-05, + "loss": 0.7991875410079956, + "step": 690 + }, + { + "epoch": 0.12768070584702798, + "grad_norm": 0.1182815432548523, + "learning_rate": 1.9991881740943072e-05, + "loss": 1.1467589139938354, + "step": 691 + }, + { + "epoch": 0.12786548255592384, + "grad_norm": 0.08033980429172516, + "learning_rate": 1.9991801174195612e-05, + "loss": 0.7506182193756104, + "step": 692 + }, + { + "epoch": 0.12805025926481967, + "grad_norm": 0.0881318673491478, + "learning_rate": 1.9991720209805723e-05, + "loss": 0.7702957987785339, + "step": 693 + }, + { + "epoch": 0.12823503597371552, + "grad_norm": 0.09789463877677917, + "learning_rate": 1.999163884777663e-05, + "loss": 0.9726911187171936, + "step": 694 + }, + { + "epoch": 0.12841981268261135, + "grad_norm": 0.08305973559617996, + "learning_rate": 1.999155708811157e-05, + "loss": 0.6882860064506531, + "step": 695 + }, + { + "epoch": 0.1286045893915072, + "grad_norm": 0.10566031187772751, + "learning_rate": 1.99914749308138e-05, + "loss": 0.9091855883598328, + "step": 696 + }, + { + "epoch": 0.12878936610040304, + "grad_norm": 0.11838914453983307, + "learning_rate": 1.9991392375886586e-05, + "loss": 0.9343292713165283, + "step": 697 + }, + { + "epoch": 0.1289741428092989, + "grad_norm": 0.10054321587085724, + "learning_rate": 1.9991309423333214e-05, + "loss": 0.8488895297050476, + "step": 698 + }, + { + "epoch": 0.12915891951819472, + "grad_norm": 0.0989433005452156, + "learning_rate": 1.9991226073156986e-05, + "loss": 0.9436942934989929, + "step": 699 + }, + { + "epoch": 0.12934369622709058, + "grad_norm": 0.07221207022666931, + "learning_rate": 1.999114232536122e-05, + "loss": 0.6067855358123779, + "step": 700 + }, + { + "epoch": 0.1295284729359864, + "grad_norm": 0.07408981025218964, + "learning_rate": 1.9991058179949247e-05, + "loss": 0.7271761894226074, + "step": 701 + }, + { + "epoch": 0.12971324964488226, + "grad_norm": 0.08090857416391373, + "learning_rate": 1.9990973636924417e-05, + "loss": 0.7468402981758118, + "step": 702 + }, + { + "epoch": 0.12989802635377812, + "grad_norm": 0.10384667664766312, + "learning_rate": 1.999088869629009e-05, + "loss": 0.8260988593101501, + "step": 703 + }, + { + "epoch": 0.13008280306267395, + "grad_norm": 0.06383131444454193, + "learning_rate": 1.9990803358049653e-05, + "loss": 0.6380683183670044, + "step": 704 + }, + { + "epoch": 0.1302675797715698, + "grad_norm": 0.08272639662027359, + "learning_rate": 1.99907176222065e-05, + "loss": 0.761116623878479, + "step": 705 + }, + { + "epoch": 0.13045235648046563, + "grad_norm": 0.10247909277677536, + "learning_rate": 1.9990631488764044e-05, + "loss": 0.8150011897087097, + "step": 706 + }, + { + "epoch": 0.1306371331893615, + "grad_norm": 0.09713143110275269, + "learning_rate": 1.9990544957725708e-05, + "loss": 0.6454431414604187, + "step": 707 + }, + { + "epoch": 0.13082190989825732, + "grad_norm": 0.10591935366392136, + "learning_rate": 1.999045802909494e-05, + "loss": 1.0341289043426514, + "step": 708 + }, + { + "epoch": 0.13100668660715317, + "grad_norm": 0.10145984590053558, + "learning_rate": 1.9990370702875203e-05, + "loss": 0.7613344192504883, + "step": 709 + }, + { + "epoch": 0.131191463316049, + "grad_norm": 0.08258189260959625, + "learning_rate": 1.9990282979069962e-05, + "loss": 0.701318085193634, + "step": 710 + }, + { + "epoch": 0.13137624002494486, + "grad_norm": 0.07917267829179764, + "learning_rate": 1.9990194857682717e-05, + "loss": 0.7271844744682312, + "step": 711 + }, + { + "epoch": 0.13156101673384069, + "grad_norm": 0.07668077200651169, + "learning_rate": 1.9990106338716973e-05, + "loss": 0.7700219750404358, + "step": 712 + }, + { + "epoch": 0.13174579344273654, + "grad_norm": 0.06670837849378586, + "learning_rate": 1.9990017422176247e-05, + "loss": 0.6176334619522095, + "step": 713 + }, + { + "epoch": 0.1319305701516324, + "grad_norm": 0.08646373450756073, + "learning_rate": 1.9989928108064087e-05, + "loss": 0.7595674395561218, + "step": 714 + }, + { + "epoch": 0.13211534686052823, + "grad_norm": 0.10018078237771988, + "learning_rate": 1.998983839638404e-05, + "loss": 1.067365288734436, + "step": 715 + }, + { + "epoch": 0.13230012356942408, + "grad_norm": 0.06973787397146225, + "learning_rate": 1.998974828713968e-05, + "loss": 0.7161954641342163, + "step": 716 + }, + { + "epoch": 0.1324849002783199, + "grad_norm": 0.08027666807174683, + "learning_rate": 1.9989657780334593e-05, + "loss": 0.696490466594696, + "step": 717 + }, + { + "epoch": 0.13266967698721577, + "grad_norm": 0.0808442011475563, + "learning_rate": 1.998956687597238e-05, + "loss": 0.8137856721878052, + "step": 718 + }, + { + "epoch": 0.1328544536961116, + "grad_norm": 0.10365045815706253, + "learning_rate": 1.9989475574056655e-05, + "loss": 0.9946999549865723, + "step": 719 + }, + { + "epoch": 0.13303923040500745, + "grad_norm": 0.09060733020305634, + "learning_rate": 1.998938387459106e-05, + "loss": 0.9365242719650269, + "step": 720 + }, + { + "epoch": 0.13322400711390328, + "grad_norm": 0.09630218148231506, + "learning_rate": 1.9989291777579238e-05, + "loss": 1.0065200328826904, + "step": 721 + }, + { + "epoch": 0.13340878382279914, + "grad_norm": 0.08935556560754776, + "learning_rate": 1.9989199283024857e-05, + "loss": 0.8397243618965149, + "step": 722 + }, + { + "epoch": 0.133593560531695, + "grad_norm": 0.08932442218065262, + "learning_rate": 1.9989106390931595e-05, + "loss": 0.8106871843338013, + "step": 723 + }, + { + "epoch": 0.13377833724059082, + "grad_norm": 0.10723939538002014, + "learning_rate": 1.998901310130315e-05, + "loss": 0.9284294247627258, + "step": 724 + }, + { + "epoch": 0.13396311394948668, + "grad_norm": 0.09080146253108978, + "learning_rate": 1.9988919414143234e-05, + "loss": 0.8117637634277344, + "step": 725 + }, + { + "epoch": 0.1341478906583825, + "grad_norm": 0.08735419064760208, + "learning_rate": 1.998882532945558e-05, + "loss": 0.6929842829704285, + "step": 726 + }, + { + "epoch": 0.13433266736727836, + "grad_norm": 0.09440413117408752, + "learning_rate": 1.9988730847243926e-05, + "loss": 0.7941557765007019, + "step": 727 + }, + { + "epoch": 0.1345174440761742, + "grad_norm": 0.08626824617385864, + "learning_rate": 1.9988635967512037e-05, + "loss": 0.7169809937477112, + "step": 728 + }, + { + "epoch": 0.13470222078507005, + "grad_norm": 0.07833532989025116, + "learning_rate": 1.998854069026369e-05, + "loss": 0.6908170580863953, + "step": 729 + }, + { + "epoch": 0.13488699749396588, + "grad_norm": 0.08950688689947128, + "learning_rate": 1.9988445015502668e-05, + "loss": 0.8975874185562134, + "step": 730 + }, + { + "epoch": 0.13507177420286173, + "grad_norm": 0.09731578081846237, + "learning_rate": 1.9988348943232787e-05, + "loss": 0.8549961447715759, + "step": 731 + }, + { + "epoch": 0.13525655091175756, + "grad_norm": 0.0867985412478447, + "learning_rate": 1.9988252473457867e-05, + "loss": 0.7790268659591675, + "step": 732 + }, + { + "epoch": 0.13544132762065342, + "grad_norm": 0.08507269620895386, + "learning_rate": 1.9988155606181747e-05, + "loss": 0.7599582672119141, + "step": 733 + }, + { + "epoch": 0.13562610432954927, + "grad_norm": 0.07925692200660706, + "learning_rate": 1.9988058341408282e-05, + "loss": 0.6674377918243408, + "step": 734 + }, + { + "epoch": 0.1358108810384451, + "grad_norm": 0.10991337895393372, + "learning_rate": 1.9987960679141344e-05, + "loss": 0.9170887470245361, + "step": 735 + }, + { + "epoch": 0.13599565774734096, + "grad_norm": 0.07741792500019073, + "learning_rate": 1.998786261938482e-05, + "loss": 0.7736765146255493, + "step": 736 + }, + { + "epoch": 0.1361804344562368, + "grad_norm": 0.07393886893987656, + "learning_rate": 1.9987764162142615e-05, + "loss": 0.6246984004974365, + "step": 737 + }, + { + "epoch": 0.13636521116513264, + "grad_norm": 0.0793522372841835, + "learning_rate": 1.998766530741864e-05, + "loss": 0.9042505621910095, + "step": 738 + }, + { + "epoch": 0.13654998787402847, + "grad_norm": 0.08039207756519318, + "learning_rate": 1.9987566055216833e-05, + "loss": 0.6424958109855652, + "step": 739 + }, + { + "epoch": 0.13673476458292433, + "grad_norm": 0.08686236292123795, + "learning_rate": 1.998746640554115e-05, + "loss": 0.8035511374473572, + "step": 740 + }, + { + "epoch": 0.13691954129182016, + "grad_norm": 0.08280868828296661, + "learning_rate": 1.9987366358395542e-05, + "loss": 0.7081146240234375, + "step": 741 + }, + { + "epoch": 0.137104318000716, + "grad_norm": 0.08056240528821945, + "learning_rate": 1.9987265913784007e-05, + "loss": 0.7108740210533142, + "step": 742 + }, + { + "epoch": 0.13728909470961184, + "grad_norm": 0.08866320550441742, + "learning_rate": 1.998716507171053e-05, + "loss": 0.9253249764442444, + "step": 743 + }, + { + "epoch": 0.1374738714185077, + "grad_norm": 0.09651792794466019, + "learning_rate": 1.998706383217913e-05, + "loss": 0.9384243488311768, + "step": 744 + }, + { + "epoch": 0.13765864812740355, + "grad_norm": 0.0859675332903862, + "learning_rate": 1.9986962195193836e-05, + "loss": 0.9224919676780701, + "step": 745 + }, + { + "epoch": 0.13784342483629938, + "grad_norm": 0.09163744002580643, + "learning_rate": 1.998686016075869e-05, + "loss": 0.6864213347434998, + "step": 746 + }, + { + "epoch": 0.13802820154519524, + "grad_norm": 0.09888351708650589, + "learning_rate": 1.9986757728877755e-05, + "loss": 0.9802375435829163, + "step": 747 + }, + { + "epoch": 0.13821297825409107, + "grad_norm": 0.0809028223156929, + "learning_rate": 1.998665489955511e-05, + "loss": 0.8272873759269714, + "step": 748 + }, + { + "epoch": 0.13839775496298692, + "grad_norm": 0.08693183958530426, + "learning_rate": 1.998655167279484e-05, + "loss": 0.7850380539894104, + "step": 749 + }, + { + "epoch": 0.13858253167188275, + "grad_norm": 0.10577927529811859, + "learning_rate": 1.998644804860106e-05, + "loss": 1.0953820943832397, + "step": 750 + }, + { + "epoch": 0.1387673083807786, + "grad_norm": 0.08052216470241547, + "learning_rate": 1.998634402697789e-05, + "loss": 0.7538807988166809, + "step": 751 + }, + { + "epoch": 0.13895208508967444, + "grad_norm": 0.08013401925563812, + "learning_rate": 1.9986239607929473e-05, + "loss": 0.6019425988197327, + "step": 752 + }, + { + "epoch": 0.1391368617985703, + "grad_norm": 0.08231046795845032, + "learning_rate": 1.998613479145996e-05, + "loss": 0.7123339772224426, + "step": 753 + }, + { + "epoch": 0.13932163850746612, + "grad_norm": 0.10204365849494934, + "learning_rate": 1.9986029577573526e-05, + "loss": 0.8958173394203186, + "step": 754 + }, + { + "epoch": 0.13950641521636198, + "grad_norm": 0.08089638501405716, + "learning_rate": 1.9985923966274357e-05, + "loss": 0.9935266375541687, + "step": 755 + }, + { + "epoch": 0.13969119192525783, + "grad_norm": 0.07984032481908798, + "learning_rate": 1.9985817957566655e-05, + "loss": 0.6228787899017334, + "step": 756 + }, + { + "epoch": 0.13987596863415366, + "grad_norm": 0.08934484422206879, + "learning_rate": 1.998571155145464e-05, + "loss": 0.7877615690231323, + "step": 757 + }, + { + "epoch": 0.14006074534304952, + "grad_norm": 0.08587730675935745, + "learning_rate": 1.9985604747942552e-05, + "loss": 0.7033259272575378, + "step": 758 + }, + { + "epoch": 0.14024552205194535, + "grad_norm": 0.0903143361210823, + "learning_rate": 1.998549754703463e-05, + "loss": 0.7459292411804199, + "step": 759 + }, + { + "epoch": 0.1404302987608412, + "grad_norm": 0.08646641671657562, + "learning_rate": 1.9985389948735146e-05, + "loss": 0.8067314624786377, + "step": 760 + }, + { + "epoch": 0.14061507546973703, + "grad_norm": 0.10522181540727615, + "learning_rate": 1.9985281953048385e-05, + "loss": 1.0474854707717896, + "step": 761 + }, + { + "epoch": 0.1407998521786329, + "grad_norm": 0.07542385160923004, + "learning_rate": 1.9985173559978637e-05, + "loss": 0.6246185898780823, + "step": 762 + }, + { + "epoch": 0.14098462888752872, + "grad_norm": 0.09156143665313721, + "learning_rate": 1.998506476953023e-05, + "loss": 1.0200754404067993, + "step": 763 + }, + { + "epoch": 0.14116940559642457, + "grad_norm": 0.08457624912261963, + "learning_rate": 1.9984955581707476e-05, + "loss": 0.6566670536994934, + "step": 764 + }, + { + "epoch": 0.1413541823053204, + "grad_norm": 0.08702805638313293, + "learning_rate": 1.9984845996514735e-05, + "loss": 0.7731756567955017, + "step": 765 + }, + { + "epoch": 0.14153895901421626, + "grad_norm": 0.0885612964630127, + "learning_rate": 1.998473601395636e-05, + "loss": 0.7161411046981812, + "step": 766 + }, + { + "epoch": 0.14172373572311212, + "grad_norm": 0.08405031263828278, + "learning_rate": 1.9984625634036728e-05, + "loss": 0.6718830466270447, + "step": 767 + }, + { + "epoch": 0.14190851243200794, + "grad_norm": 0.09604194760322571, + "learning_rate": 1.9984514856760233e-05, + "loss": 0.8609301447868347, + "step": 768 + }, + { + "epoch": 0.1420932891409038, + "grad_norm": 0.08894237875938416, + "learning_rate": 1.998440368213129e-05, + "loss": 0.6825015544891357, + "step": 769 + }, + { + "epoch": 0.14227806584979963, + "grad_norm": 0.0985831692814827, + "learning_rate": 1.998429211015431e-05, + "loss": 0.8550534248352051, + "step": 770 + }, + { + "epoch": 0.14246284255869548, + "grad_norm": 0.07364708185195923, + "learning_rate": 1.9984180140833745e-05, + "loss": 0.58270263671875, + "step": 771 + }, + { + "epoch": 0.1426476192675913, + "grad_norm": 0.09174038469791412, + "learning_rate": 1.9984067774174047e-05, + "loss": 0.9174992442131042, + "step": 772 + }, + { + "epoch": 0.14283239597648717, + "grad_norm": 0.08195028454065323, + "learning_rate": 1.9983955010179687e-05, + "loss": 0.8114473223686218, + "step": 773 + }, + { + "epoch": 0.143017172685383, + "grad_norm": 0.09834912419319153, + "learning_rate": 1.998384184885515e-05, + "loss": 0.7728997468948364, + "step": 774 + }, + { + "epoch": 0.14320194939427885, + "grad_norm": 0.09782921522855759, + "learning_rate": 1.998372829020495e-05, + "loss": 0.8147768378257751, + "step": 775 + }, + { + "epoch": 0.14338672610317468, + "grad_norm": 0.0865607038140297, + "learning_rate": 1.9983614334233595e-05, + "loss": 0.7355035543441772, + "step": 776 + }, + { + "epoch": 0.14357150281207054, + "grad_norm": 0.10707329958677292, + "learning_rate": 1.9983499980945624e-05, + "loss": 0.9970985651016235, + "step": 777 + }, + { + "epoch": 0.1437562795209664, + "grad_norm": 0.11169784516096115, + "learning_rate": 1.998338523034559e-05, + "loss": 0.9548088908195496, + "step": 778 + }, + { + "epoch": 0.14394105622986222, + "grad_norm": 0.089739590883255, + "learning_rate": 1.9983270082438054e-05, + "loss": 0.8539899587631226, + "step": 779 + }, + { + "epoch": 0.14412583293875808, + "grad_norm": 0.11042316257953644, + "learning_rate": 1.9983154537227607e-05, + "loss": 1.1233160495758057, + "step": 780 + }, + { + "epoch": 0.1443106096476539, + "grad_norm": 0.09637823700904846, + "learning_rate": 1.998303859471884e-05, + "loss": 1.0442636013031006, + "step": 781 + }, + { + "epoch": 0.14449538635654977, + "grad_norm": 0.09303180873394012, + "learning_rate": 1.9982922254916373e-05, + "loss": 0.850719153881073, + "step": 782 + }, + { + "epoch": 0.1446801630654456, + "grad_norm": 0.10702165216207504, + "learning_rate": 1.998280551782483e-05, + "loss": 0.9073002338409424, + "step": 783 + }, + { + "epoch": 0.14486493977434145, + "grad_norm": 0.10565286874771118, + "learning_rate": 1.998268838344886e-05, + "loss": 0.9817302823066711, + "step": 784 + }, + { + "epoch": 0.14504971648323728, + "grad_norm": 0.10369947552680969, + "learning_rate": 1.9982570851793125e-05, + "loss": 0.865040123462677, + "step": 785 + }, + { + "epoch": 0.14523449319213314, + "grad_norm": 0.09121627360582352, + "learning_rate": 1.9982452922862297e-05, + "loss": 0.7408377528190613, + "step": 786 + }, + { + "epoch": 0.14541926990102896, + "grad_norm": 0.09499744325876236, + "learning_rate": 1.998233459666108e-05, + "loss": 0.903713047504425, + "step": 787 + }, + { + "epoch": 0.14560404660992482, + "grad_norm": 0.08999454230070114, + "learning_rate": 1.9982215873194174e-05, + "loss": 0.9278289079666138, + "step": 788 + }, + { + "epoch": 0.14578882331882068, + "grad_norm": 0.09898703545331955, + "learning_rate": 1.998209675246631e-05, + "loss": 0.8522279262542725, + "step": 789 + }, + { + "epoch": 0.1459736000277165, + "grad_norm": 0.08116642385721207, + "learning_rate": 1.9981977234482216e-05, + "loss": 0.7823371887207031, + "step": 790 + }, + { + "epoch": 0.14615837673661236, + "grad_norm": 0.09841936081647873, + "learning_rate": 1.998185731924667e-05, + "loss": 0.6668987274169922, + "step": 791 + }, + { + "epoch": 0.1463431534455082, + "grad_norm": 0.08227173984050751, + "learning_rate": 1.9981737006764422e-05, + "loss": 0.7551202178001404, + "step": 792 + }, + { + "epoch": 0.14652793015440405, + "grad_norm": 0.09930627793073654, + "learning_rate": 1.9981616297040274e-05, + "loss": 1.0937387943267822, + "step": 793 + }, + { + "epoch": 0.14671270686329987, + "grad_norm": 0.09635364264249802, + "learning_rate": 1.9981495190079023e-05, + "loss": 1.0252056121826172, + "step": 794 + }, + { + "epoch": 0.14689748357219573, + "grad_norm": 0.07074606418609619, + "learning_rate": 1.9981373685885496e-05, + "loss": 0.6450613141059875, + "step": 795 + }, + { + "epoch": 0.14708226028109156, + "grad_norm": 0.10457006096839905, + "learning_rate": 1.998125178446452e-05, + "loss": 0.872199535369873, + "step": 796 + }, + { + "epoch": 0.14726703698998742, + "grad_norm": 0.10501401871442795, + "learning_rate": 1.9981129485820955e-05, + "loss": 0.9675683379173279, + "step": 797 + }, + { + "epoch": 0.14745181369888324, + "grad_norm": 0.08505741506814957, + "learning_rate": 1.9981006789959658e-05, + "loss": 0.7211896181106567, + "step": 798 + }, + { + "epoch": 0.1476365904077791, + "grad_norm": 0.08668606728315353, + "learning_rate": 1.998088369688552e-05, + "loss": 0.7685105204582214, + "step": 799 + }, + { + "epoch": 0.14782136711667496, + "grad_norm": 0.08549778908491135, + "learning_rate": 1.998076020660344e-05, + "loss": 0.7868371605873108, + "step": 800 + }, + { + "epoch": 0.14800614382557079, + "grad_norm": 0.10425841063261032, + "learning_rate": 1.9980636319118326e-05, + "loss": 0.8326320648193359, + "step": 801 + }, + { + "epoch": 0.14819092053446664, + "grad_norm": 0.08408709615468979, + "learning_rate": 1.998051203443511e-05, + "loss": 0.7171183824539185, + "step": 802 + }, + { + "epoch": 0.14837569724336247, + "grad_norm": 0.09104572981595993, + "learning_rate": 1.9980387352558742e-05, + "loss": 0.843257486820221, + "step": 803 + }, + { + "epoch": 0.14856047395225833, + "grad_norm": 0.08411456644535065, + "learning_rate": 1.9980262273494184e-05, + "loss": 0.6456289887428284, + "step": 804 + }, + { + "epoch": 0.14874525066115415, + "grad_norm": 0.08256927132606506, + "learning_rate": 1.998013679724641e-05, + "loss": 0.7349569201469421, + "step": 805 + }, + { + "epoch": 0.14893002737005, + "grad_norm": 0.07136604189872742, + "learning_rate": 1.9980010923820417e-05, + "loss": 0.7609940767288208, + "step": 806 + }, + { + "epoch": 0.14911480407894584, + "grad_norm": 0.09291189163923264, + "learning_rate": 1.997988465322121e-05, + "loss": 0.7991480231285095, + "step": 807 + }, + { + "epoch": 0.1492995807878417, + "grad_norm": 0.08440721780061722, + "learning_rate": 1.9979757985453818e-05, + "loss": 0.7707111835479736, + "step": 808 + }, + { + "epoch": 0.14948435749673752, + "grad_norm": 0.0729835107922554, + "learning_rate": 1.997963092052328e-05, + "loss": 0.622437059879303, + "step": 809 + }, + { + "epoch": 0.14966913420563338, + "grad_norm": 0.08329617232084274, + "learning_rate": 1.9979503458434654e-05, + "loss": 0.8455030918121338, + "step": 810 + }, + { + "epoch": 0.14985391091452924, + "grad_norm": 0.07677850127220154, + "learning_rate": 1.9979375599193013e-05, + "loss": 0.6293231844902039, + "step": 811 + }, + { + "epoch": 0.15003868762342507, + "grad_norm": 0.10512515902519226, + "learning_rate": 1.9979247342803445e-05, + "loss": 0.9506365656852722, + "step": 812 + }, + { + "epoch": 0.15022346433232092, + "grad_norm": 0.0830494686961174, + "learning_rate": 1.9979118689271054e-05, + "loss": 0.8180676698684692, + "step": 813 + }, + { + "epoch": 0.15040824104121675, + "grad_norm": 0.09016073495149612, + "learning_rate": 1.9978989638600958e-05, + "loss": 0.9344583749771118, + "step": 814 + }, + { + "epoch": 0.1505930177501126, + "grad_norm": 0.09691261500120163, + "learning_rate": 1.9978860190798298e-05, + "loss": 1.0068974494934082, + "step": 815 + }, + { + "epoch": 0.15077779445900844, + "grad_norm": 0.08613571524620056, + "learning_rate": 1.997873034586822e-05, + "loss": 0.7329363226890564, + "step": 816 + }, + { + "epoch": 0.1509625711679043, + "grad_norm": 0.08310612291097641, + "learning_rate": 1.9978600103815894e-05, + "loss": 0.7454736232757568, + "step": 817 + }, + { + "epoch": 0.15114734787680012, + "grad_norm": 0.07982125133275986, + "learning_rate": 1.99784694646465e-05, + "loss": 0.6723255515098572, + "step": 818 + }, + { + "epoch": 0.15133212458569598, + "grad_norm": 0.10597433149814606, + "learning_rate": 1.997833842836524e-05, + "loss": 0.8811939358711243, + "step": 819 + }, + { + "epoch": 0.1515169012945918, + "grad_norm": 0.09027368575334549, + "learning_rate": 1.9978206994977332e-05, + "loss": 1.095210313796997, + "step": 820 + }, + { + "epoch": 0.15170167800348766, + "grad_norm": 0.0823710560798645, + "learning_rate": 1.9978075164488004e-05, + "loss": 0.6737566590309143, + "step": 821 + }, + { + "epoch": 0.15188645471238352, + "grad_norm": 0.0979158878326416, + "learning_rate": 1.99779429369025e-05, + "loss": 0.9108914732933044, + "step": 822 + }, + { + "epoch": 0.15207123142127935, + "grad_norm": 0.09988109767436981, + "learning_rate": 1.9977810312226086e-05, + "loss": 0.911587655544281, + "step": 823 + }, + { + "epoch": 0.1522560081301752, + "grad_norm": 0.08708114922046661, + "learning_rate": 1.9977677290464034e-05, + "loss": 0.6712893843650818, + "step": 824 + }, + { + "epoch": 0.15244078483907103, + "grad_norm": 0.07156742364168167, + "learning_rate": 1.9977543871621647e-05, + "loss": 0.5517778396606445, + "step": 825 + }, + { + "epoch": 0.1526255615479669, + "grad_norm": 0.08961985260248184, + "learning_rate": 1.9977410055704228e-05, + "loss": 0.7994911670684814, + "step": 826 + }, + { + "epoch": 0.15281033825686272, + "grad_norm": 0.08305425941944122, + "learning_rate": 1.9977275842717102e-05, + "loss": 0.6371324062347412, + "step": 827 + }, + { + "epoch": 0.15299511496575857, + "grad_norm": 0.08506402373313904, + "learning_rate": 1.9977141232665613e-05, + "loss": 0.8960465788841248, + "step": 828 + }, + { + "epoch": 0.1531798916746544, + "grad_norm": 0.0903603583574295, + "learning_rate": 1.9977006225555118e-05, + "loss": 0.7395355105400085, + "step": 829 + }, + { + "epoch": 0.15336466838355026, + "grad_norm": 0.10823410004377365, + "learning_rate": 1.997687082139099e-05, + "loss": 1.0822867155075073, + "step": 830 + }, + { + "epoch": 0.15354944509244609, + "grad_norm": 0.11097127199172974, + "learning_rate": 1.9976735020178616e-05, + "loss": 0.9120482206344604, + "step": 831 + }, + { + "epoch": 0.15373422180134194, + "grad_norm": 0.07994866371154785, + "learning_rate": 1.9976598821923403e-05, + "loss": 0.775039553642273, + "step": 832 + }, + { + "epoch": 0.1539189985102378, + "grad_norm": 0.11645234376192093, + "learning_rate": 1.9976462226630767e-05, + "loss": 1.08674955368042, + "step": 833 + }, + { + "epoch": 0.15410377521913363, + "grad_norm": 0.09166614711284637, + "learning_rate": 1.9976325234306148e-05, + "loss": 0.9463396072387695, + "step": 834 + }, + { + "epoch": 0.15428855192802948, + "grad_norm": 0.078475721180439, + "learning_rate": 1.9976187844954997e-05, + "loss": 0.7562021613121033, + "step": 835 + }, + { + "epoch": 0.1544733286369253, + "grad_norm": 0.0811050683259964, + "learning_rate": 1.997605005858278e-05, + "loss": 0.9685558080673218, + "step": 836 + }, + { + "epoch": 0.15465810534582117, + "grad_norm": 0.07300540059804916, + "learning_rate": 1.9975911875194983e-05, + "loss": 0.6694080829620361, + "step": 837 + }, + { + "epoch": 0.154842882054717, + "grad_norm": 0.09105053544044495, + "learning_rate": 1.9975773294797104e-05, + "loss": 0.771217405796051, + "step": 838 + }, + { + "epoch": 0.15502765876361285, + "grad_norm": 0.09635568410158157, + "learning_rate": 1.9975634317394655e-05, + "loss": 0.9028376340866089, + "step": 839 + }, + { + "epoch": 0.15521243547250868, + "grad_norm": 0.07771394401788712, + "learning_rate": 1.9975494942993173e-05, + "loss": 0.9490246176719666, + "step": 840 + }, + { + "epoch": 0.15539721218140454, + "grad_norm": 0.08951511979103088, + "learning_rate": 1.9975355171598205e-05, + "loss": 0.7538551092147827, + "step": 841 + }, + { + "epoch": 0.15558198889030037, + "grad_norm": 0.10040956735610962, + "learning_rate": 1.9975215003215306e-05, + "loss": 0.8540251851081848, + "step": 842 + }, + { + "epoch": 0.15576676559919622, + "grad_norm": 0.08941560238599777, + "learning_rate": 1.9975074437850057e-05, + "loss": 0.8490448594093323, + "step": 843 + }, + { + "epoch": 0.15595154230809208, + "grad_norm": 0.0840606689453125, + "learning_rate": 1.9974933475508055e-05, + "loss": 0.8591848611831665, + "step": 844 + }, + { + "epoch": 0.1561363190169879, + "grad_norm": 0.09096793085336685, + "learning_rate": 1.9974792116194908e-05, + "loss": 0.8079205751419067, + "step": 845 + }, + { + "epoch": 0.15632109572588376, + "grad_norm": 0.10489457100629807, + "learning_rate": 1.9974650359916243e-05, + "loss": 0.9832424521446228, + "step": 846 + }, + { + "epoch": 0.1565058724347796, + "grad_norm": 0.08821387588977814, + "learning_rate": 1.99745082066777e-05, + "loss": 0.7868931293487549, + "step": 847 + }, + { + "epoch": 0.15669064914367545, + "grad_norm": 0.07660951465368271, + "learning_rate": 1.9974365656484934e-05, + "loss": 0.8489773273468018, + "step": 848 + }, + { + "epoch": 0.15687542585257128, + "grad_norm": 0.09838790446519852, + "learning_rate": 1.9974222709343625e-05, + "loss": 0.8385222554206848, + "step": 849 + }, + { + "epoch": 0.15706020256146713, + "grad_norm": 0.08230140805244446, + "learning_rate": 1.9974079365259453e-05, + "loss": 0.791317343711853, + "step": 850 + }, + { + "epoch": 0.15724497927036296, + "grad_norm": 0.08971503376960754, + "learning_rate": 1.997393562423813e-05, + "loss": 0.8988771438598633, + "step": 851 + }, + { + "epoch": 0.15742975597925882, + "grad_norm": 0.07249333709478378, + "learning_rate": 1.9973791486285373e-05, + "loss": 0.639558732509613, + "step": 852 + }, + { + "epoch": 0.15761453268815465, + "grad_norm": 0.08529103547334671, + "learning_rate": 1.997364695140692e-05, + "loss": 0.7391307353973389, + "step": 853 + }, + { + "epoch": 0.1577993093970505, + "grad_norm": 0.06635551154613495, + "learning_rate": 1.9973502019608518e-05, + "loss": 0.5126902461051941, + "step": 854 + }, + { + "epoch": 0.15798408610594636, + "grad_norm": 0.07860208302736282, + "learning_rate": 1.9973356690895943e-05, + "loss": 0.7455308437347412, + "step": 855 + }, + { + "epoch": 0.1581688628148422, + "grad_norm": 0.0684715062379837, + "learning_rate": 1.997321096527497e-05, + "loss": 0.5100698471069336, + "step": 856 + }, + { + "epoch": 0.15835363952373804, + "grad_norm": 0.07221705466508865, + "learning_rate": 1.9973064842751408e-05, + "loss": 0.6271705031394958, + "step": 857 + }, + { + "epoch": 0.15853841623263387, + "grad_norm": 0.09156036376953125, + "learning_rate": 1.9972918323331062e-05, + "loss": 0.872652530670166, + "step": 858 + }, + { + "epoch": 0.15872319294152973, + "grad_norm": 0.0856616422533989, + "learning_rate": 1.9972771407019772e-05, + "loss": 0.6149677038192749, + "step": 859 + }, + { + "epoch": 0.15890796965042556, + "grad_norm": 0.10809344798326492, + "learning_rate": 1.997262409382338e-05, + "loss": 0.968105673789978, + "step": 860 + }, + { + "epoch": 0.1590927463593214, + "grad_norm": 0.09980929642915726, + "learning_rate": 1.9972476383747748e-05, + "loss": 0.8662238717079163, + "step": 861 + }, + { + "epoch": 0.15927752306821724, + "grad_norm": 0.07973527908325195, + "learning_rate": 1.9972328276798758e-05, + "loss": 0.6740208864212036, + "step": 862 + }, + { + "epoch": 0.1594622997771131, + "grad_norm": 0.09533673524856567, + "learning_rate": 1.99721797729823e-05, + "loss": 0.964790940284729, + "step": 863 + }, + { + "epoch": 0.15964707648600893, + "grad_norm": 0.08445615321397781, + "learning_rate": 1.9972030872304287e-05, + "loss": 0.6526170969009399, + "step": 864 + }, + { + "epoch": 0.15983185319490478, + "grad_norm": 0.09407380223274231, + "learning_rate": 1.997188157477064e-05, + "loss": 0.820464551448822, + "step": 865 + }, + { + "epoch": 0.16001662990380064, + "grad_norm": 0.08107048273086548, + "learning_rate": 1.997173188038731e-05, + "loss": 0.8346737623214722, + "step": 866 + }, + { + "epoch": 0.16020140661269647, + "grad_norm": 0.10146121680736542, + "learning_rate": 1.9971581789160246e-05, + "loss": 0.831204354763031, + "step": 867 + }, + { + "epoch": 0.16038618332159232, + "grad_norm": 0.09238161146640778, + "learning_rate": 1.9971431301095423e-05, + "loss": 1.003567099571228, + "step": 868 + }, + { + "epoch": 0.16057096003048815, + "grad_norm": 0.10330084711313248, + "learning_rate": 1.9971280416198832e-05, + "loss": 0.8877651691436768, + "step": 869 + }, + { + "epoch": 0.160755736739384, + "grad_norm": 0.08259479701519012, + "learning_rate": 1.9971129134476474e-05, + "loss": 0.6704717874526978, + "step": 870 + }, + { + "epoch": 0.16094051344827984, + "grad_norm": 0.07004322111606598, + "learning_rate": 1.9970977455934376e-05, + "loss": 0.526638388633728, + "step": 871 + }, + { + "epoch": 0.1611252901571757, + "grad_norm": 0.07713456451892853, + "learning_rate": 1.9970825380578568e-05, + "loss": 0.5882245898246765, + "step": 872 + }, + { + "epoch": 0.16131006686607152, + "grad_norm": 0.06343498080968857, + "learning_rate": 1.9970672908415106e-05, + "loss": 0.6268212199211121, + "step": 873 + }, + { + "epoch": 0.16149484357496738, + "grad_norm": 0.08529190719127655, + "learning_rate": 1.9970520039450057e-05, + "loss": 0.8552349209785461, + "step": 874 + }, + { + "epoch": 0.1616796202838632, + "grad_norm": 0.07656506448984146, + "learning_rate": 1.9970366773689504e-05, + "loss": 0.685907781124115, + "step": 875 + }, + { + "epoch": 0.16186439699275906, + "grad_norm": 0.09757979214191437, + "learning_rate": 1.9970213111139545e-05, + "loss": 0.9193124771118164, + "step": 876 + }, + { + "epoch": 0.16204917370165492, + "grad_norm": 0.0972999557852745, + "learning_rate": 1.99700590518063e-05, + "loss": 0.8101493120193481, + "step": 877 + }, + { + "epoch": 0.16223395041055075, + "grad_norm": 0.08974293619394302, + "learning_rate": 1.9969904595695894e-05, + "loss": 0.7346316576004028, + "step": 878 + }, + { + "epoch": 0.1624187271194466, + "grad_norm": 0.10032976418733597, + "learning_rate": 1.9969749742814474e-05, + "loss": 0.8234612345695496, + "step": 879 + }, + { + "epoch": 0.16260350382834243, + "grad_norm": 0.10190098732709885, + "learning_rate": 1.9969594493168213e-05, + "loss": 0.8721520900726318, + "step": 880 + }, + { + "epoch": 0.1627882805372383, + "grad_norm": 0.10501670837402344, + "learning_rate": 1.996943884676328e-05, + "loss": 1.0961534976959229, + "step": 881 + }, + { + "epoch": 0.16297305724613412, + "grad_norm": 0.07484227418899536, + "learning_rate": 1.9969282803605866e-05, + "loss": 0.571873128414154, + "step": 882 + }, + { + "epoch": 0.16315783395502997, + "grad_norm": 0.09870273619890213, + "learning_rate": 1.9969126363702188e-05, + "loss": 0.8259404301643372, + "step": 883 + }, + { + "epoch": 0.1633426106639258, + "grad_norm": 0.09730734676122665, + "learning_rate": 1.9968969527058476e-05, + "loss": 0.8662014007568359, + "step": 884 + }, + { + "epoch": 0.16352738737282166, + "grad_norm": 0.09543541073799133, + "learning_rate": 1.996881229368096e-05, + "loss": 0.8927724957466125, + "step": 885 + }, + { + "epoch": 0.1637121640817175, + "grad_norm": 0.0955570712685585, + "learning_rate": 1.9968654663575906e-05, + "loss": 0.8467931151390076, + "step": 886 + }, + { + "epoch": 0.16389694079061334, + "grad_norm": 0.10068543255329132, + "learning_rate": 1.9968496636749584e-05, + "loss": 0.9493212699890137, + "step": 887 + }, + { + "epoch": 0.1640817174995092, + "grad_norm": 0.08250802755355835, + "learning_rate": 1.996833821320828e-05, + "loss": 0.625404417514801, + "step": 888 + }, + { + "epoch": 0.16426649420840503, + "grad_norm": 0.09056300669908524, + "learning_rate": 1.9968179392958305e-05, + "loss": 0.9785575270652771, + "step": 889 + }, + { + "epoch": 0.16445127091730088, + "grad_norm": 0.09318284690380096, + "learning_rate": 1.9968020176005976e-05, + "loss": 0.8643733859062195, + "step": 890 + }, + { + "epoch": 0.1646360476261967, + "grad_norm": 0.0878918394446373, + "learning_rate": 1.996786056235763e-05, + "loss": 0.8458439111709595, + "step": 891 + }, + { + "epoch": 0.16482082433509257, + "grad_norm": 0.07812018692493439, + "learning_rate": 1.996770055201962e-05, + "loss": 0.7509803175926208, + "step": 892 + }, + { + "epoch": 0.1650056010439884, + "grad_norm": 0.051492493599653244, + "learning_rate": 1.9967540144998313e-05, + "loss": 0.3646559417247772, + "step": 893 + }, + { + "epoch": 0.16519037775288425, + "grad_norm": 0.08322598040103912, + "learning_rate": 1.9967379341300092e-05, + "loss": 0.7500286102294922, + "step": 894 + }, + { + "epoch": 0.16537515446178008, + "grad_norm": 0.1015838161110878, + "learning_rate": 1.9967218140931358e-05, + "loss": 0.8413242697715759, + "step": 895 + }, + { + "epoch": 0.16555993117067594, + "grad_norm": 0.08555304259061813, + "learning_rate": 1.996705654389852e-05, + "loss": 0.7918609380722046, + "step": 896 + }, + { + "epoch": 0.16574470787957177, + "grad_norm": 0.08763028681278229, + "learning_rate": 1.996689455020802e-05, + "loss": 0.8354541063308716, + "step": 897 + }, + { + "epoch": 0.16592948458846762, + "grad_norm": 0.09521177411079407, + "learning_rate": 1.99667321598663e-05, + "loss": 0.9694790244102478, + "step": 898 + }, + { + "epoch": 0.16611426129736348, + "grad_norm": 0.09489469975233078, + "learning_rate": 1.996656937287982e-05, + "loss": 0.6468320488929749, + "step": 899 + }, + { + "epoch": 0.1662990380062593, + "grad_norm": 0.09139697998762131, + "learning_rate": 1.9966406189255057e-05, + "loss": 0.8143627643585205, + "step": 900 + }, + { + "epoch": 0.16648381471515517, + "grad_norm": 0.08560251444578171, + "learning_rate": 1.9966242608998515e-05, + "loss": 0.5542747974395752, + "step": 901 + }, + { + "epoch": 0.166668591424051, + "grad_norm": 0.07455048710107803, + "learning_rate": 1.9966078632116695e-05, + "loss": 0.7415421605110168, + "step": 902 + }, + { + "epoch": 0.16685336813294685, + "grad_norm": 0.06992721557617188, + "learning_rate": 1.9965914258616123e-05, + "loss": 0.5292272567749023, + "step": 903 + }, + { + "epoch": 0.16703814484184268, + "grad_norm": 0.08455928415060043, + "learning_rate": 1.9965749488503343e-05, + "loss": 0.6594451665878296, + "step": 904 + }, + { + "epoch": 0.16722292155073853, + "grad_norm": 0.09297055006027222, + "learning_rate": 1.9965584321784917e-05, + "loss": 1.0128505229949951, + "step": 905 + }, + { + "epoch": 0.16740769825963436, + "grad_norm": 0.07890793681144714, + "learning_rate": 1.996541875846741e-05, + "loss": 0.5768525004386902, + "step": 906 + }, + { + "epoch": 0.16759247496853022, + "grad_norm": 0.09260081499814987, + "learning_rate": 1.9965252798557413e-05, + "loss": 0.7528817057609558, + "step": 907 + }, + { + "epoch": 0.16777725167742605, + "grad_norm": 0.09680453687906265, + "learning_rate": 1.9965086442061533e-05, + "loss": 0.7937643527984619, + "step": 908 + }, + { + "epoch": 0.1679620283863219, + "grad_norm": 0.09023380279541016, + "learning_rate": 1.9964919688986392e-05, + "loss": 0.7089190483093262, + "step": 909 + }, + { + "epoch": 0.16814680509521776, + "grad_norm": 0.09799093753099442, + "learning_rate": 1.9964752539338618e-05, + "loss": 0.8649925589561462, + "step": 910 + }, + { + "epoch": 0.1683315818041136, + "grad_norm": 0.10270999372005463, + "learning_rate": 1.996458499312487e-05, + "loss": 0.8653252720832825, + "step": 911 + }, + { + "epoch": 0.16851635851300945, + "grad_norm": 0.09714650362730026, + "learning_rate": 1.9964417050351817e-05, + "loss": 0.8030729293823242, + "step": 912 + }, + { + "epoch": 0.16870113522190527, + "grad_norm": 0.08536910265684128, + "learning_rate": 1.996424871102614e-05, + "loss": 0.6893182992935181, + "step": 913 + }, + { + "epoch": 0.16888591193080113, + "grad_norm": 0.08386935293674469, + "learning_rate": 1.996407997515454e-05, + "loss": 0.7112575173377991, + "step": 914 + }, + { + "epoch": 0.16907068863969696, + "grad_norm": 0.10440012812614441, + "learning_rate": 1.9963910842743726e-05, + "loss": 0.8684526681900024, + "step": 915 + }, + { + "epoch": 0.16925546534859282, + "grad_norm": 0.09515652805566788, + "learning_rate": 1.9963741313800437e-05, + "loss": 0.7283082008361816, + "step": 916 + }, + { + "epoch": 0.16944024205748864, + "grad_norm": 0.09991753101348877, + "learning_rate": 1.9963571388331417e-05, + "loss": 0.8834851384162903, + "step": 917 + }, + { + "epoch": 0.1696250187663845, + "grad_norm": 0.11581186950206757, + "learning_rate": 1.9963401066343424e-05, + "loss": 1.0167608261108398, + "step": 918 + }, + { + "epoch": 0.16980979547528033, + "grad_norm": 0.06342756003141403, + "learning_rate": 1.9963230347843242e-05, + "loss": 0.6235218048095703, + "step": 919 + }, + { + "epoch": 0.16999457218417618, + "grad_norm": 0.09533628076314926, + "learning_rate": 1.996305923283766e-05, + "loss": 0.7711799144744873, + "step": 920 + }, + { + "epoch": 0.17017934889307204, + "grad_norm": 0.0671616718173027, + "learning_rate": 1.9962887721333498e-05, + "loss": 0.5659441947937012, + "step": 921 + }, + { + "epoch": 0.17036412560196787, + "grad_norm": 0.07848536968231201, + "learning_rate": 1.996271581333757e-05, + "loss": 0.8277614116668701, + "step": 922 + }, + { + "epoch": 0.17054890231086373, + "grad_norm": 0.09858408570289612, + "learning_rate": 1.9962543508856722e-05, + "loss": 0.943729817867279, + "step": 923 + }, + { + "epoch": 0.17073367901975955, + "grad_norm": 0.10257252305746078, + "learning_rate": 1.996237080789781e-05, + "loss": 0.9582602977752686, + "step": 924 + }, + { + "epoch": 0.1709184557286554, + "grad_norm": 0.09687380492687225, + "learning_rate": 1.996219771046771e-05, + "loss": 0.8321186900138855, + "step": 925 + }, + { + "epoch": 0.17110323243755124, + "grad_norm": 0.0945492833852768, + "learning_rate": 1.996202421657331e-05, + "loss": 1.0206397771835327, + "step": 926 + }, + { + "epoch": 0.1712880091464471, + "grad_norm": 0.10808293521404266, + "learning_rate": 1.996185032622151e-05, + "loss": 1.0324662923812866, + "step": 927 + }, + { + "epoch": 0.17147278585534292, + "grad_norm": 0.09793483465909958, + "learning_rate": 1.9961676039419236e-05, + "loss": 0.8650661110877991, + "step": 928 + }, + { + "epoch": 0.17165756256423878, + "grad_norm": 0.08973977714776993, + "learning_rate": 1.9961501356173422e-05, + "loss": 0.8832427859306335, + "step": 929 + }, + { + "epoch": 0.1718423392731346, + "grad_norm": 0.07093001157045364, + "learning_rate": 1.996132627649102e-05, + "loss": 0.6482492685317993, + "step": 930 + }, + { + "epoch": 0.17202711598203047, + "grad_norm": 0.07376673072576523, + "learning_rate": 1.9961150800378997e-05, + "loss": 0.5817755460739136, + "step": 931 + }, + { + "epoch": 0.17221189269092632, + "grad_norm": 0.09312041848897934, + "learning_rate": 1.9960974927844332e-05, + "loss": 0.7770845890045166, + "step": 932 + }, + { + "epoch": 0.17239666939982215, + "grad_norm": 0.08325351029634476, + "learning_rate": 1.9960798658894033e-05, + "loss": 0.6577749848365784, + "step": 933 + }, + { + "epoch": 0.172581446108718, + "grad_norm": 0.10464484989643097, + "learning_rate": 1.996062199353511e-05, + "loss": 1.0084058046340942, + "step": 934 + }, + { + "epoch": 0.17276622281761383, + "grad_norm": 0.09137419611215591, + "learning_rate": 1.9960444931774596e-05, + "loss": 0.9034083485603333, + "step": 935 + }, + { + "epoch": 0.1729509995265097, + "grad_norm": 0.0853210985660553, + "learning_rate": 1.9960267473619535e-05, + "loss": 0.6832973957061768, + "step": 936 + }, + { + "epoch": 0.17313577623540552, + "grad_norm": 0.06843896955251694, + "learning_rate": 1.9960089619076986e-05, + "loss": 0.49686911702156067, + "step": 937 + }, + { + "epoch": 0.17332055294430138, + "grad_norm": 0.08093714714050293, + "learning_rate": 1.9959911368154036e-05, + "loss": 0.7712295055389404, + "step": 938 + }, + { + "epoch": 0.1735053296531972, + "grad_norm": 0.08600125461816788, + "learning_rate": 1.9959732720857773e-05, + "loss": 0.7024419903755188, + "step": 939 + }, + { + "epoch": 0.17369010636209306, + "grad_norm": 0.08981095999479294, + "learning_rate": 1.995955367719531e-05, + "loss": 0.739615261554718, + "step": 940 + }, + { + "epoch": 0.1738748830709889, + "grad_norm": 0.09242704510688782, + "learning_rate": 1.9959374237173768e-05, + "loss": 0.867473304271698, + "step": 941 + }, + { + "epoch": 0.17405965977988475, + "grad_norm": 0.09307640045881271, + "learning_rate": 1.995919440080029e-05, + "loss": 0.8205432295799255, + "step": 942 + }, + { + "epoch": 0.1742444364887806, + "grad_norm": 0.09234365820884705, + "learning_rate": 1.995901416808203e-05, + "loss": 0.6995620727539062, + "step": 943 + }, + { + "epoch": 0.17442921319767643, + "grad_norm": 0.0816473513841629, + "learning_rate": 1.995883353902617e-05, + "loss": 0.7264127731323242, + "step": 944 + }, + { + "epoch": 0.1746139899065723, + "grad_norm": 0.09935896098613739, + "learning_rate": 1.9958652513639893e-05, + "loss": 0.8626465201377869, + "step": 945 + }, + { + "epoch": 0.17479876661546812, + "grad_norm": 0.08586036413908005, + "learning_rate": 1.9958471091930396e-05, + "loss": 0.8120777010917664, + "step": 946 + }, + { + "epoch": 0.17498354332436397, + "grad_norm": 0.0693439394235611, + "learning_rate": 1.9958289273904907e-05, + "loss": 0.6296210289001465, + "step": 947 + }, + { + "epoch": 0.1751683200332598, + "grad_norm": 0.09171350300312042, + "learning_rate": 1.9958107059570665e-05, + "loss": 0.7480406165122986, + "step": 948 + }, + { + "epoch": 0.17535309674215566, + "grad_norm": 0.09481046348810196, + "learning_rate": 1.9957924448934912e-05, + "loss": 0.931344211101532, + "step": 949 + }, + { + "epoch": 0.17553787345105148, + "grad_norm": 0.09654638916254044, + "learning_rate": 1.9957741442004922e-05, + "loss": 0.8118683695793152, + "step": 950 + }, + { + "epoch": 0.17572265015994734, + "grad_norm": 0.07205606251955032, + "learning_rate": 1.995755803878798e-05, + "loss": 0.46986880898475647, + "step": 951 + }, + { + "epoch": 0.17590742686884317, + "grad_norm": 0.09873572736978531, + "learning_rate": 1.9957374239291373e-05, + "loss": 0.8671227693557739, + "step": 952 + }, + { + "epoch": 0.17609220357773903, + "grad_norm": 0.0714307501912117, + "learning_rate": 1.995719004352243e-05, + "loss": 0.6845585703849792, + "step": 953 + }, + { + "epoch": 0.17627698028663488, + "grad_norm": 0.09012128412723541, + "learning_rate": 1.9957005451488476e-05, + "loss": 0.8618735671043396, + "step": 954 + }, + { + "epoch": 0.1764617569955307, + "grad_norm": 0.07841545343399048, + "learning_rate": 1.9956820463196857e-05, + "loss": 0.7780154943466187, + "step": 955 + }, + { + "epoch": 0.17664653370442657, + "grad_norm": 0.08329493552446365, + "learning_rate": 1.9956635078654928e-05, + "loss": 0.7111560106277466, + "step": 956 + }, + { + "epoch": 0.1768313104133224, + "grad_norm": 0.07888112962245941, + "learning_rate": 1.995644929787008e-05, + "loss": 0.6423460841178894, + "step": 957 + }, + { + "epoch": 0.17701608712221825, + "grad_norm": 0.08623760938644409, + "learning_rate": 1.9956263120849697e-05, + "loss": 0.7237339615821838, + "step": 958 + }, + { + "epoch": 0.17720086383111408, + "grad_norm": 0.09325414150953293, + "learning_rate": 1.9956076547601188e-05, + "loss": 0.584494948387146, + "step": 959 + }, + { + "epoch": 0.17738564054000994, + "grad_norm": 0.07906820625066757, + "learning_rate": 1.9955889578131984e-05, + "loss": 0.6621897220611572, + "step": 960 + }, + { + "epoch": 0.17757041724890577, + "grad_norm": 0.1097625195980072, + "learning_rate": 1.9955702212449522e-05, + "loss": 0.8541625142097473, + "step": 961 + }, + { + "epoch": 0.17775519395780162, + "grad_norm": 0.10267429798841476, + "learning_rate": 1.995551445056126e-05, + "loss": 0.9353813529014587, + "step": 962 + }, + { + "epoch": 0.17793997066669745, + "grad_norm": 0.0709155723452568, + "learning_rate": 1.995532629247467e-05, + "loss": 0.56894850730896, + "step": 963 + }, + { + "epoch": 0.1781247473755933, + "grad_norm": 0.08225613087415695, + "learning_rate": 1.9955137738197243e-05, + "loss": 0.7326880693435669, + "step": 964 + }, + { + "epoch": 0.17830952408448916, + "grad_norm": 0.08972861617803574, + "learning_rate": 1.9954948787736476e-05, + "loss": 0.6604665517807007, + "step": 965 + }, + { + "epoch": 0.178494300793385, + "grad_norm": 0.08534478396177292, + "learning_rate": 1.995475944109989e-05, + "loss": 0.648709774017334, + "step": 966 + }, + { + "epoch": 0.17867907750228085, + "grad_norm": 0.1054343581199646, + "learning_rate": 1.9954569698295024e-05, + "loss": 0.9679998159408569, + "step": 967 + }, + { + "epoch": 0.17886385421117668, + "grad_norm": 0.10303540527820587, + "learning_rate": 1.995437955932943e-05, + "loss": 0.9443056583404541, + "step": 968 + }, + { + "epoch": 0.17904863092007253, + "grad_norm": 0.0820276066660881, + "learning_rate": 1.9954189024210674e-05, + "loss": 0.6332961916923523, + "step": 969 + }, + { + "epoch": 0.17923340762896836, + "grad_norm": 0.07548929005861282, + "learning_rate": 1.995399809294633e-05, + "loss": 0.6359805464744568, + "step": 970 + }, + { + "epoch": 0.17941818433786422, + "grad_norm": 0.09777320176362991, + "learning_rate": 1.9953806765544012e-05, + "loss": 0.9533307552337646, + "step": 971 + }, + { + "epoch": 0.17960296104676005, + "grad_norm": 0.097019262611866, + "learning_rate": 1.9953615042011326e-05, + "loss": 1.0277851819992065, + "step": 972 + }, + { + "epoch": 0.1797877377556559, + "grad_norm": 0.07848110049962997, + "learning_rate": 1.9953422922355895e-05, + "loss": 0.5935329794883728, + "step": 973 + }, + { + "epoch": 0.17997251446455173, + "grad_norm": 0.12135414034128189, + "learning_rate": 1.995323040658538e-05, + "loss": 1.195263147354126, + "step": 974 + }, + { + "epoch": 0.1801572911734476, + "grad_norm": 0.11152930557727814, + "learning_rate": 1.995303749470743e-05, + "loss": 0.8123299479484558, + "step": 975 + }, + { + "epoch": 0.18034206788234344, + "grad_norm": 0.1028800904750824, + "learning_rate": 1.9952844186729728e-05, + "loss": 0.8166159391403198, + "step": 976 + }, + { + "epoch": 0.18052684459123927, + "grad_norm": 0.09579476714134216, + "learning_rate": 1.9952650482659966e-05, + "loss": 0.936848521232605, + "step": 977 + }, + { + "epoch": 0.18071162130013513, + "grad_norm": 0.09917067736387253, + "learning_rate": 1.995245638250585e-05, + "loss": 0.8639838099479675, + "step": 978 + }, + { + "epoch": 0.18089639800903096, + "grad_norm": 0.0719900131225586, + "learning_rate": 1.995226188627511e-05, + "loss": 0.7680509686470032, + "step": 979 + }, + { + "epoch": 0.1810811747179268, + "grad_norm": 0.08661803603172302, + "learning_rate": 1.9952066993975486e-05, + "loss": 0.7090547680854797, + "step": 980 + }, + { + "epoch": 0.18126595142682264, + "grad_norm": 0.08709144592285156, + "learning_rate": 1.9951871705614727e-05, + "loss": 0.7234505414962769, + "step": 981 + }, + { + "epoch": 0.1814507281357185, + "grad_norm": 0.08482590317726135, + "learning_rate": 1.995167602120061e-05, + "loss": 0.7665219306945801, + "step": 982 + }, + { + "epoch": 0.18163550484461433, + "grad_norm": 0.08609596639871597, + "learning_rate": 1.9951479940740923e-05, + "loss": 0.7475874423980713, + "step": 983 + }, + { + "epoch": 0.18182028155351018, + "grad_norm": 0.07693618535995483, + "learning_rate": 1.9951283464243468e-05, + "loss": 0.7607953548431396, + "step": 984 + }, + { + "epoch": 0.182005058262406, + "grad_norm": 0.10553596913814545, + "learning_rate": 1.995108659171607e-05, + "loss": 0.9523313045501709, + "step": 985 + }, + { + "epoch": 0.18218983497130187, + "grad_norm": 0.10550583153963089, + "learning_rate": 1.995088932316655e-05, + "loss": 0.8637052178382874, + "step": 986 + }, + { + "epoch": 0.18237461168019772, + "grad_norm": 0.0804867073893547, + "learning_rate": 1.995069165860277e-05, + "loss": 0.6479719877243042, + "step": 987 + }, + { + "epoch": 0.18255938838909355, + "grad_norm": 0.07370677590370178, + "learning_rate": 1.99504935980326e-05, + "loss": 0.49580657482147217, + "step": 988 + }, + { + "epoch": 0.1827441650979894, + "grad_norm": 0.09654896706342697, + "learning_rate": 1.995029514146391e-05, + "loss": 0.7312811017036438, + "step": 989 + }, + { + "epoch": 0.18292894180688524, + "grad_norm": 0.09291725605726242, + "learning_rate": 1.9950096288904605e-05, + "loss": 0.9061948657035828, + "step": 990 + }, + { + "epoch": 0.1831137185157811, + "grad_norm": 0.08796926587820053, + "learning_rate": 1.9949897040362596e-05, + "loss": 0.6325365304946899, + "step": 991 + }, + { + "epoch": 0.18329849522467692, + "grad_norm": 0.1115405261516571, + "learning_rate": 1.9949697395845816e-05, + "loss": 1.12819242477417, + "step": 992 + }, + { + "epoch": 0.18348327193357278, + "grad_norm": 0.09811253100633621, + "learning_rate": 1.9949497355362205e-05, + "loss": 0.9766569137573242, + "step": 993 + }, + { + "epoch": 0.1836680486424686, + "grad_norm": 0.07640735805034637, + "learning_rate": 1.994929691891973e-05, + "loss": 0.7131111025810242, + "step": 994 + }, + { + "epoch": 0.18385282535136446, + "grad_norm": 0.0764615386724472, + "learning_rate": 1.9949096086526368e-05, + "loss": 0.6240501403808594, + "step": 995 + }, + { + "epoch": 0.1840376020602603, + "grad_norm": 0.07657036185264587, + "learning_rate": 1.9948894858190108e-05, + "loss": 0.8184218406677246, + "step": 996 + }, + { + "epoch": 0.18422237876915615, + "grad_norm": 0.09979189932346344, + "learning_rate": 1.994869323391895e-05, + "loss": 0.7589877247810364, + "step": 997 + }, + { + "epoch": 0.184407155478052, + "grad_norm": 0.0920339897274971, + "learning_rate": 1.9948491213720937e-05, + "loss": 0.8375788927078247, + "step": 998 + }, + { + "epoch": 0.18459193218694783, + "grad_norm": 0.09007628262042999, + "learning_rate": 1.9948288797604093e-05, + "loss": 0.8215615749359131, + "step": 999 + }, + { + "epoch": 0.1847767088958437, + "grad_norm": 0.0851169154047966, + "learning_rate": 1.994808598557648e-05, + "loss": 0.7080106139183044, + "step": 1000 + }, + { + "epoch": 0.1847767088958437, + "eval_loss": 0.8302240371704102, + "eval_runtime": 157.4667, + "eval_samples_per_second": 115.764, + "eval_steps_per_second": 14.473, + "step": 1000 + }, + { + "epoch": 0.18496148560473952, + "grad_norm": 0.09749092161655426, + "learning_rate": 1.994788277764617e-05, + "loss": 0.7249320149421692, + "step": 1001 + }, + { + "epoch": 0.18514626231363537, + "grad_norm": 0.07457061111927032, + "learning_rate": 1.9947679173821245e-05, + "loss": 0.6512501835823059, + "step": 1002 + }, + { + "epoch": 0.1853310390225312, + "grad_norm": 0.11633574962615967, + "learning_rate": 1.9947475174109814e-05, + "loss": 0.9869779348373413, + "step": 1003 + }, + { + "epoch": 0.18551581573142706, + "grad_norm": 0.08418001234531403, + "learning_rate": 1.9947270778519995e-05, + "loss": 0.8138560056686401, + "step": 1004 + }, + { + "epoch": 0.1857005924403229, + "grad_norm": 0.10566221177577972, + "learning_rate": 1.9947065987059916e-05, + "loss": 0.6288236975669861, + "step": 1005 + }, + { + "epoch": 0.18588536914921874, + "grad_norm": 0.09031148254871368, + "learning_rate": 1.9946860799737732e-05, + "loss": 0.7053165435791016, + "step": 1006 + }, + { + "epoch": 0.18607014585811457, + "grad_norm": 0.09639808535575867, + "learning_rate": 1.994665521656161e-05, + "loss": 0.9204103350639343, + "step": 1007 + }, + { + "epoch": 0.18625492256701043, + "grad_norm": 0.08621752262115479, + "learning_rate": 1.9946449237539728e-05, + "loss": 0.8180477619171143, + "step": 1008 + }, + { + "epoch": 0.18643969927590628, + "grad_norm": 0.07304370403289795, + "learning_rate": 1.9946242862680282e-05, + "loss": 0.5348410606384277, + "step": 1009 + }, + { + "epoch": 0.1866244759848021, + "grad_norm": 0.07953565567731857, + "learning_rate": 1.994603609199149e-05, + "loss": 0.7202300429344177, + "step": 1010 + }, + { + "epoch": 0.18680925269369797, + "grad_norm": 0.08353704959154129, + "learning_rate": 1.994582892548158e-05, + "loss": 0.6676353216171265, + "step": 1011 + }, + { + "epoch": 0.1869940294025938, + "grad_norm": 0.08431074023246765, + "learning_rate": 1.9945621363158795e-05, + "loss": 0.7750869393348694, + "step": 1012 + }, + { + "epoch": 0.18717880611148965, + "grad_norm": 0.10428814589977264, + "learning_rate": 1.9945413405031392e-05, + "loss": 0.9942725896835327, + "step": 1013 + }, + { + "epoch": 0.18736358282038548, + "grad_norm": 0.0732867419719696, + "learning_rate": 1.9945205051107654e-05, + "loss": 0.6770198345184326, + "step": 1014 + }, + { + "epoch": 0.18754835952928134, + "grad_norm": 0.09832040220499039, + "learning_rate": 1.994499630139587e-05, + "loss": 0.9014858603477478, + "step": 1015 + }, + { + "epoch": 0.18773313623817717, + "grad_norm": 0.06784018129110336, + "learning_rate": 1.9944787155904346e-05, + "loss": 0.5552687048912048, + "step": 1016 + }, + { + "epoch": 0.18791791294707302, + "grad_norm": 0.06917358934879303, + "learning_rate": 1.9944577614641404e-05, + "loss": 0.7413119673728943, + "step": 1017 + }, + { + "epoch": 0.18810268965596885, + "grad_norm": 0.09127135574817657, + "learning_rate": 1.9944367677615392e-05, + "loss": 0.9209888577461243, + "step": 1018 + }, + { + "epoch": 0.1882874663648647, + "grad_norm": 0.09378290921449661, + "learning_rate": 1.9944157344834655e-05, + "loss": 0.7302212715148926, + "step": 1019 + }, + { + "epoch": 0.18847224307376056, + "grad_norm": 0.08918961882591248, + "learning_rate": 1.9943946616307562e-05, + "loss": 0.9352380633354187, + "step": 1020 + }, + { + "epoch": 0.1886570197826564, + "grad_norm": 0.0958271324634552, + "learning_rate": 1.9943735492042512e-05, + "loss": 0.6923654675483704, + "step": 1021 + }, + { + "epoch": 0.18884179649155225, + "grad_norm": 0.08914941549301147, + "learning_rate": 1.9943523972047894e-05, + "loss": 0.701963484287262, + "step": 1022 + }, + { + "epoch": 0.18902657320044808, + "grad_norm": 0.10291727632284164, + "learning_rate": 1.9943312056332135e-05, + "loss": 0.9576240181922913, + "step": 1023 + }, + { + "epoch": 0.18921134990934393, + "grad_norm": 0.09569457173347473, + "learning_rate": 1.9943099744903663e-05, + "loss": 0.9132493734359741, + "step": 1024 + }, + { + "epoch": 0.18939612661823976, + "grad_norm": 0.08394474536180496, + "learning_rate": 1.9942887037770927e-05, + "loss": 0.7519565224647522, + "step": 1025 + }, + { + "epoch": 0.18958090332713562, + "grad_norm": 0.08643844723701477, + "learning_rate": 1.9942673934942398e-05, + "loss": 0.8622266054153442, + "step": 1026 + }, + { + "epoch": 0.18976568003603145, + "grad_norm": 0.08506625890731812, + "learning_rate": 1.994246043642655e-05, + "loss": 0.7522600889205933, + "step": 1027 + }, + { + "epoch": 0.1899504567449273, + "grad_norm": 0.0753428116440773, + "learning_rate": 1.9942246542231888e-05, + "loss": 0.6824458837509155, + "step": 1028 + }, + { + "epoch": 0.19013523345382313, + "grad_norm": 0.09744856506586075, + "learning_rate": 1.994203225236691e-05, + "loss": 0.7293679714202881, + "step": 1029 + }, + { + "epoch": 0.190320010162719, + "grad_norm": 0.08699744939804077, + "learning_rate": 1.994181756684016e-05, + "loss": 0.8544273972511292, + "step": 1030 + }, + { + "epoch": 0.19050478687161485, + "grad_norm": 0.08195915818214417, + "learning_rate": 1.9941602485660172e-05, + "loss": 0.6181380748748779, + "step": 1031 + }, + { + "epoch": 0.19068956358051067, + "grad_norm": 0.09370932728052139, + "learning_rate": 1.994138700883551e-05, + "loss": 0.7301265597343445, + "step": 1032 + }, + { + "epoch": 0.19087434028940653, + "grad_norm": 0.08087928593158722, + "learning_rate": 1.9941171136374746e-05, + "loss": 0.6871247291564941, + "step": 1033 + }, + { + "epoch": 0.19105911699830236, + "grad_norm": 0.07140730321407318, + "learning_rate": 1.9940954868286476e-05, + "loss": 0.576732873916626, + "step": 1034 + }, + { + "epoch": 0.19124389370719821, + "grad_norm": 0.08278313279151917, + "learning_rate": 1.9940738204579298e-05, + "loss": 0.7809289693832397, + "step": 1035 + }, + { + "epoch": 0.19142867041609404, + "grad_norm": 0.09478002786636353, + "learning_rate": 1.9940521145261845e-05, + "loss": 0.7151646018028259, + "step": 1036 + }, + { + "epoch": 0.1916134471249899, + "grad_norm": 0.08096028864383698, + "learning_rate": 1.994030369034275e-05, + "loss": 0.8550668954849243, + "step": 1037 + }, + { + "epoch": 0.19179822383388573, + "grad_norm": 0.08005808293819427, + "learning_rate": 1.994008583983066e-05, + "loss": 0.8426173329353333, + "step": 1038 + }, + { + "epoch": 0.19198300054278158, + "grad_norm": 0.08567973971366882, + "learning_rate": 1.993986759373426e-05, + "loss": 0.8091012239456177, + "step": 1039 + }, + { + "epoch": 0.1921677772516774, + "grad_norm": 0.06659490615129471, + "learning_rate": 1.9939648952062227e-05, + "loss": 0.5663833618164062, + "step": 1040 + }, + { + "epoch": 0.19235255396057327, + "grad_norm": 0.11582151055335999, + "learning_rate": 1.9939429914823258e-05, + "loss": 1.0861891508102417, + "step": 1041 + }, + { + "epoch": 0.19253733066946913, + "grad_norm": 0.07196705043315887, + "learning_rate": 1.9939210482026082e-05, + "loss": 0.6811832785606384, + "step": 1042 + }, + { + "epoch": 0.19272210737836495, + "grad_norm": 0.08960779756307602, + "learning_rate": 1.9938990653679418e-05, + "loss": 0.7905018329620361, + "step": 1043 + }, + { + "epoch": 0.1929068840872608, + "grad_norm": 0.07718110829591751, + "learning_rate": 1.9938770429792026e-05, + "loss": 0.753892183303833, + "step": 1044 + }, + { + "epoch": 0.19309166079615664, + "grad_norm": 0.08389970660209656, + "learning_rate": 1.993854981037266e-05, + "loss": 0.8010688424110413, + "step": 1045 + }, + { + "epoch": 0.1932764375050525, + "grad_norm": 0.0901460349559784, + "learning_rate": 1.993832879543011e-05, + "loss": 0.7262868881225586, + "step": 1046 + }, + { + "epoch": 0.19346121421394832, + "grad_norm": 0.06715024262666702, + "learning_rate": 1.9938107384973165e-05, + "loss": 0.6049861907958984, + "step": 1047 + }, + { + "epoch": 0.19364599092284418, + "grad_norm": 0.0782829076051712, + "learning_rate": 1.993788557901064e-05, + "loss": 0.7502110004425049, + "step": 1048 + }, + { + "epoch": 0.19383076763174, + "grad_norm": 0.09568478912115097, + "learning_rate": 1.993766337755136e-05, + "loss": 0.8624519109725952, + "step": 1049 + }, + { + "epoch": 0.19401554434063586, + "grad_norm": 0.07993607223033905, + "learning_rate": 1.9937440780604164e-05, + "loss": 0.7470861077308655, + "step": 1050 + }, + { + "epoch": 0.1942003210495317, + "grad_norm": 0.08586437255144119, + "learning_rate": 1.993721778817792e-05, + "loss": 0.7969268560409546, + "step": 1051 + }, + { + "epoch": 0.19438509775842755, + "grad_norm": 0.07363275438547134, + "learning_rate": 1.99369944002815e-05, + "loss": 0.5416469573974609, + "step": 1052 + }, + { + "epoch": 0.1945698744673234, + "grad_norm": 0.0870773121714592, + "learning_rate": 1.9936770616923786e-05, + "loss": 0.6686387658119202, + "step": 1053 + }, + { + "epoch": 0.19475465117621923, + "grad_norm": 0.08845312148332596, + "learning_rate": 1.9936546438113694e-05, + "loss": 0.7606138586997986, + "step": 1054 + }, + { + "epoch": 0.1949394278851151, + "grad_norm": 0.0819728672504425, + "learning_rate": 1.9936321863860136e-05, + "loss": 0.6012722849845886, + "step": 1055 + }, + { + "epoch": 0.19512420459401092, + "grad_norm": 0.10822150856256485, + "learning_rate": 1.9936096894172058e-05, + "loss": 0.9842246770858765, + "step": 1056 + }, + { + "epoch": 0.19530898130290678, + "grad_norm": 0.07973014563322067, + "learning_rate": 1.9935871529058413e-05, + "loss": 0.6289697289466858, + "step": 1057 + }, + { + "epoch": 0.1954937580118026, + "grad_norm": 0.0844748392701149, + "learning_rate": 1.993564576852816e-05, + "loss": 0.7360936403274536, + "step": 1058 + }, + { + "epoch": 0.19567853472069846, + "grad_norm": 0.08219747990369797, + "learning_rate": 1.9935419612590295e-05, + "loss": 0.6736936569213867, + "step": 1059 + }, + { + "epoch": 0.1958633114295943, + "grad_norm": 0.07414291799068451, + "learning_rate": 1.993519306125381e-05, + "loss": 0.7290785312652588, + "step": 1060 + }, + { + "epoch": 0.19604808813849015, + "grad_norm": 0.08525729924440384, + "learning_rate": 1.9934966114527726e-05, + "loss": 0.6029162406921387, + "step": 1061 + }, + { + "epoch": 0.19623286484738597, + "grad_norm": 0.09205019474029541, + "learning_rate": 1.9934738772421072e-05, + "loss": 0.9196493029594421, + "step": 1062 + }, + { + "epoch": 0.19641764155628183, + "grad_norm": 0.10401707142591476, + "learning_rate": 1.99345110349429e-05, + "loss": 0.8627897500991821, + "step": 1063 + }, + { + "epoch": 0.1966024182651777, + "grad_norm": 0.08633103966712952, + "learning_rate": 1.9934282902102266e-05, + "loss": 0.7805629372596741, + "step": 1064 + }, + { + "epoch": 0.19678719497407351, + "grad_norm": 0.08859745413064957, + "learning_rate": 1.9934054373908255e-05, + "loss": 0.7532527446746826, + "step": 1065 + }, + { + "epoch": 0.19697197168296937, + "grad_norm": 0.08352278918027878, + "learning_rate": 1.993382545036996e-05, + "loss": 0.6543923020362854, + "step": 1066 + }, + { + "epoch": 0.1971567483918652, + "grad_norm": 0.1249319389462471, + "learning_rate": 1.993359613149649e-05, + "loss": 1.1770665645599365, + "step": 1067 + }, + { + "epoch": 0.19734152510076106, + "grad_norm": 0.07279996573925018, + "learning_rate": 1.993336641729697e-05, + "loss": 0.5936124324798584, + "step": 1068 + }, + { + "epoch": 0.19752630180965688, + "grad_norm": 0.08983556926250458, + "learning_rate": 1.9933136307780547e-05, + "loss": 0.7313957214355469, + "step": 1069 + }, + { + "epoch": 0.19771107851855274, + "grad_norm": 0.08589496463537216, + "learning_rate": 1.9932905802956375e-05, + "loss": 0.7616296410560608, + "step": 1070 + }, + { + "epoch": 0.19789585522744857, + "grad_norm": 0.0885387435555458, + "learning_rate": 1.993267490283363e-05, + "loss": 0.7941570281982422, + "step": 1071 + }, + { + "epoch": 0.19808063193634443, + "grad_norm": 0.09348790347576141, + "learning_rate": 1.9932443607421496e-05, + "loss": 0.8178659677505493, + "step": 1072 + }, + { + "epoch": 0.19826540864524025, + "grad_norm": 0.07949327677488327, + "learning_rate": 1.9932211916729182e-05, + "loss": 0.7278982400894165, + "step": 1073 + }, + { + "epoch": 0.1984501853541361, + "grad_norm": 0.07637959718704224, + "learning_rate": 1.9931979830765907e-05, + "loss": 0.6828699707984924, + "step": 1074 + }, + { + "epoch": 0.19863496206303197, + "grad_norm": 0.08116207271814346, + "learning_rate": 1.993174734954091e-05, + "loss": 0.641601026058197, + "step": 1075 + }, + { + "epoch": 0.1988197387719278, + "grad_norm": 0.08993934839963913, + "learning_rate": 1.993151447306344e-05, + "loss": 0.9200141429901123, + "step": 1076 + }, + { + "epoch": 0.19900451548082365, + "grad_norm": 0.0704822912812233, + "learning_rate": 1.9931281201342765e-05, + "loss": 0.5704006552696228, + "step": 1077 + }, + { + "epoch": 0.19918929218971948, + "grad_norm": 0.08818235993385315, + "learning_rate": 1.993104753438817e-05, + "loss": 0.8348625302314758, + "step": 1078 + }, + { + "epoch": 0.19937406889861534, + "grad_norm": 0.09258076548576355, + "learning_rate": 1.9930813472208953e-05, + "loss": 0.8007818460464478, + "step": 1079 + }, + { + "epoch": 0.19955884560751116, + "grad_norm": 0.08599089831113815, + "learning_rate": 1.993057901481443e-05, + "loss": 0.7286479473114014, + "step": 1080 + }, + { + "epoch": 0.19974362231640702, + "grad_norm": 0.07952600717544556, + "learning_rate": 1.9930344162213933e-05, + "loss": 0.7580991387367249, + "step": 1081 + }, + { + "epoch": 0.19992839902530285, + "grad_norm": 0.07922924309968948, + "learning_rate": 1.9930108914416803e-05, + "loss": 0.6698630452156067, + "step": 1082 + }, + { + "epoch": 0.2001131757341987, + "grad_norm": 0.10020700097084045, + "learning_rate": 1.9929873271432406e-05, + "loss": 0.821386456489563, + "step": 1083 + }, + { + "epoch": 0.20029795244309453, + "grad_norm": 0.103725366294384, + "learning_rate": 1.9929637233270117e-05, + "loss": 1.2106066942214966, + "step": 1084 + }, + { + "epoch": 0.2004827291519904, + "grad_norm": 0.08130776882171631, + "learning_rate": 1.9929400799939338e-05, + "loss": 0.8309373259544373, + "step": 1085 + }, + { + "epoch": 0.20066750586088625, + "grad_norm": 0.09061729907989502, + "learning_rate": 1.992916397144947e-05, + "loss": 0.6868097186088562, + "step": 1086 + }, + { + "epoch": 0.20085228256978208, + "grad_norm": 0.08802130818367004, + "learning_rate": 1.992892674780994e-05, + "loss": 0.754677414894104, + "step": 1087 + }, + { + "epoch": 0.20103705927867793, + "grad_norm": 0.09083602577447891, + "learning_rate": 1.9928689129030187e-05, + "loss": 0.8705667853355408, + "step": 1088 + }, + { + "epoch": 0.20122183598757376, + "grad_norm": 0.0779944658279419, + "learning_rate": 1.992845111511967e-05, + "loss": 0.8392444849014282, + "step": 1089 + }, + { + "epoch": 0.20140661269646962, + "grad_norm": 0.07750675082206726, + "learning_rate": 1.9928212706087864e-05, + "loss": 0.6429112553596497, + "step": 1090 + }, + { + "epoch": 0.20159138940536545, + "grad_norm": 0.09703640639781952, + "learning_rate": 1.992797390194425e-05, + "loss": 0.6411582827568054, + "step": 1091 + }, + { + "epoch": 0.2017761661142613, + "grad_norm": 0.07213166356086731, + "learning_rate": 1.9927734702698335e-05, + "loss": 0.5872079133987427, + "step": 1092 + }, + { + "epoch": 0.20196094282315713, + "grad_norm": 0.07832180708646774, + "learning_rate": 1.9927495108359642e-05, + "loss": 0.6529865860939026, + "step": 1093 + }, + { + "epoch": 0.202145719532053, + "grad_norm": 0.08114868402481079, + "learning_rate": 1.9927255118937702e-05, + "loss": 0.6691939830780029, + "step": 1094 + }, + { + "epoch": 0.20233049624094882, + "grad_norm": 0.09954454749822617, + "learning_rate": 1.9927014734442064e-05, + "loss": 0.8835906982421875, + "step": 1095 + }, + { + "epoch": 0.20251527294984467, + "grad_norm": 0.06392431259155273, + "learning_rate": 1.9926773954882298e-05, + "loss": 0.501159131526947, + "step": 1096 + }, + { + "epoch": 0.20270004965874053, + "grad_norm": 0.07308457046747208, + "learning_rate": 1.9926532780267988e-05, + "loss": 0.5708874464035034, + "step": 1097 + }, + { + "epoch": 0.20288482636763636, + "grad_norm": 0.07481972128152847, + "learning_rate": 1.992629121060873e-05, + "loss": 0.5472757816314697, + "step": 1098 + }, + { + "epoch": 0.2030696030765322, + "grad_norm": 0.08071167767047882, + "learning_rate": 1.9926049245914135e-05, + "loss": 0.77153080701828, + "step": 1099 + }, + { + "epoch": 0.20325437978542804, + "grad_norm": 0.0896330252289772, + "learning_rate": 1.9925806886193836e-05, + "loss": 0.8954538702964783, + "step": 1100 + }, + { + "epoch": 0.2034391564943239, + "grad_norm": 0.069943867623806, + "learning_rate": 1.9925564131457476e-05, + "loss": 0.6015023589134216, + "step": 1101 + }, + { + "epoch": 0.20362393320321973, + "grad_norm": 0.11448749899864197, + "learning_rate": 1.9925320981714715e-05, + "loss": 0.9205179214477539, + "step": 1102 + }, + { + "epoch": 0.20380870991211558, + "grad_norm": 0.09403878450393677, + "learning_rate": 1.9925077436975235e-05, + "loss": 0.7833645343780518, + "step": 1103 + }, + { + "epoch": 0.2039934866210114, + "grad_norm": 0.08931637555360794, + "learning_rate": 1.992483349724872e-05, + "loss": 0.7928817868232727, + "step": 1104 + }, + { + "epoch": 0.20417826332990727, + "grad_norm": 0.07081054151058197, + "learning_rate": 1.9924589162544886e-05, + "loss": 0.5713711977005005, + "step": 1105 + }, + { + "epoch": 0.2043630400388031, + "grad_norm": 0.11387964338064194, + "learning_rate": 1.992434443287345e-05, + "loss": 1.0558032989501953, + "step": 1106 + }, + { + "epoch": 0.20454781674769895, + "grad_norm": 0.09888028353452682, + "learning_rate": 1.9924099308244158e-05, + "loss": 0.9759566783905029, + "step": 1107 + }, + { + "epoch": 0.2047325934565948, + "grad_norm": 0.08901600539684296, + "learning_rate": 1.9923853788666762e-05, + "loss": 0.7663286924362183, + "step": 1108 + }, + { + "epoch": 0.20491737016549064, + "grad_norm": 0.07079759985208511, + "learning_rate": 1.992360787415103e-05, + "loss": 0.679031252861023, + "step": 1109 + }, + { + "epoch": 0.2051021468743865, + "grad_norm": 0.07839877158403397, + "learning_rate": 1.9923361564706755e-05, + "loss": 0.7949055433273315, + "step": 1110 + }, + { + "epoch": 0.20528692358328232, + "grad_norm": 0.09051861613988876, + "learning_rate": 1.9923114860343735e-05, + "loss": 0.7728940844535828, + "step": 1111 + }, + { + "epoch": 0.20547170029217818, + "grad_norm": 0.06782103329896927, + "learning_rate": 1.992286776107179e-05, + "loss": 0.8745223879814148, + "step": 1112 + }, + { + "epoch": 0.205656477001074, + "grad_norm": 0.08042112737894058, + "learning_rate": 1.992262026690075e-05, + "loss": 0.823594868183136, + "step": 1113 + }, + { + "epoch": 0.20584125370996986, + "grad_norm": 0.09936164319515228, + "learning_rate": 1.992237237784047e-05, + "loss": 0.8101600408554077, + "step": 1114 + }, + { + "epoch": 0.2060260304188657, + "grad_norm": 0.09016161412000656, + "learning_rate": 1.992212409390081e-05, + "loss": 0.7377327680587769, + "step": 1115 + }, + { + "epoch": 0.20621080712776155, + "grad_norm": 0.08577019721269608, + "learning_rate": 1.9921875415091655e-05, + "loss": 0.6838337779045105, + "step": 1116 + }, + { + "epoch": 0.20639558383665738, + "grad_norm": 0.07404208183288574, + "learning_rate": 1.9921626341422898e-05, + "loss": 0.6607319712638855, + "step": 1117 + }, + { + "epoch": 0.20658036054555323, + "grad_norm": 0.06708002835512161, + "learning_rate": 1.9921376872904457e-05, + "loss": 0.5383575558662415, + "step": 1118 + }, + { + "epoch": 0.2067651372544491, + "grad_norm": 0.07707026600837708, + "learning_rate": 1.9921127009546256e-05, + "loss": 0.6438384056091309, + "step": 1119 + }, + { + "epoch": 0.20694991396334492, + "grad_norm": 0.09694302827119827, + "learning_rate": 1.9920876751358237e-05, + "loss": 0.8988054990768433, + "step": 1120 + }, + { + "epoch": 0.20713469067224077, + "grad_norm": 0.07617320120334625, + "learning_rate": 1.9920626098350362e-05, + "loss": 0.5821647644042969, + "step": 1121 + }, + { + "epoch": 0.2073194673811366, + "grad_norm": 0.07221636176109314, + "learning_rate": 1.9920375050532605e-05, + "loss": 0.7280129790306091, + "step": 1122 + }, + { + "epoch": 0.20750424409003246, + "grad_norm": 0.0678236186504364, + "learning_rate": 1.9920123607914962e-05, + "loss": 0.5645017027854919, + "step": 1123 + }, + { + "epoch": 0.2076890207989283, + "grad_norm": 0.06640082597732544, + "learning_rate": 1.991987177050743e-05, + "loss": 0.6989132165908813, + "step": 1124 + }, + { + "epoch": 0.20787379750782414, + "grad_norm": 0.08389496803283691, + "learning_rate": 1.991961953832004e-05, + "loss": 0.7181524634361267, + "step": 1125 + }, + { + "epoch": 0.20805857421671997, + "grad_norm": 0.0670672282576561, + "learning_rate": 1.9919366911362828e-05, + "loss": 0.661719799041748, + "step": 1126 + }, + { + "epoch": 0.20824335092561583, + "grad_norm": 0.06571565568447113, + "learning_rate": 1.9919113889645846e-05, + "loss": 0.48099377751350403, + "step": 1127 + }, + { + "epoch": 0.20842812763451166, + "grad_norm": 0.08513090759515762, + "learning_rate": 1.991886047317916e-05, + "loss": 0.7070115208625793, + "step": 1128 + }, + { + "epoch": 0.2086129043434075, + "grad_norm": 0.08598559349775314, + "learning_rate": 1.9918606661972863e-05, + "loss": 0.9339227676391602, + "step": 1129 + }, + { + "epoch": 0.20879768105230337, + "grad_norm": 0.0697142630815506, + "learning_rate": 1.9918352456037054e-05, + "loss": 0.5696187019348145, + "step": 1130 + }, + { + "epoch": 0.2089824577611992, + "grad_norm": 0.09361305087804794, + "learning_rate": 1.9918097855381843e-05, + "loss": 0.8069785833358765, + "step": 1131 + }, + { + "epoch": 0.20916723447009505, + "grad_norm": 0.10314858704805374, + "learning_rate": 1.9917842860017372e-05, + "loss": 0.9178709387779236, + "step": 1132 + }, + { + "epoch": 0.20935201117899088, + "grad_norm": 0.0890069231390953, + "learning_rate": 1.991758746995378e-05, + "loss": 0.7417526245117188, + "step": 1133 + }, + { + "epoch": 0.20953678788788674, + "grad_norm": 0.07984063774347305, + "learning_rate": 1.991733168520124e-05, + "loss": 0.6990677118301392, + "step": 1134 + }, + { + "epoch": 0.20972156459678257, + "grad_norm": 0.0774579867720604, + "learning_rate": 1.991707550576992e-05, + "loss": 0.7703189253807068, + "step": 1135 + }, + { + "epoch": 0.20990634130567842, + "grad_norm": 0.07212287187576294, + "learning_rate": 1.9916818931670026e-05, + "loss": 0.6072457432746887, + "step": 1136 + }, + { + "epoch": 0.21009111801457425, + "grad_norm": 0.06705975532531738, + "learning_rate": 1.9916561962911762e-05, + "loss": 0.5408006310462952, + "step": 1137 + }, + { + "epoch": 0.2102758947234701, + "grad_norm": 0.0907805860042572, + "learning_rate": 1.9916304599505358e-05, + "loss": 0.8637043833732605, + "step": 1138 + }, + { + "epoch": 0.21046067143236594, + "grad_norm": 0.0981602668762207, + "learning_rate": 1.9916046841461056e-05, + "loss": 0.7083920240402222, + "step": 1139 + }, + { + "epoch": 0.2106454481412618, + "grad_norm": 0.0859125480055809, + "learning_rate": 1.9915788688789107e-05, + "loss": 0.754231870174408, + "step": 1140 + }, + { + "epoch": 0.21083022485015765, + "grad_norm": 0.06962268799543381, + "learning_rate": 1.9915530141499796e-05, + "loss": 0.7630135416984558, + "step": 1141 + }, + { + "epoch": 0.21101500155905348, + "grad_norm": 0.09127385914325714, + "learning_rate": 1.991527119960341e-05, + "loss": 1.0107409954071045, + "step": 1142 + }, + { + "epoch": 0.21119977826794933, + "grad_norm": 0.0875440314412117, + "learning_rate": 1.9915011863110244e-05, + "loss": 0.6957036256790161, + "step": 1143 + }, + { + "epoch": 0.21138455497684516, + "grad_norm": 0.0832233801484108, + "learning_rate": 1.9914752132030634e-05, + "loss": 0.8316786289215088, + "step": 1144 + }, + { + "epoch": 0.21156933168574102, + "grad_norm": 0.09219091385602951, + "learning_rate": 1.99144920063749e-05, + "loss": 0.9146633148193359, + "step": 1145 + }, + { + "epoch": 0.21175410839463685, + "grad_norm": 0.06797627359628677, + "learning_rate": 1.991423148615341e-05, + "loss": 0.5604722499847412, + "step": 1146 + }, + { + "epoch": 0.2119388851035327, + "grad_norm": 0.09596192091703415, + "learning_rate": 1.9913970571376517e-05, + "loss": 0.9451921582221985, + "step": 1147 + }, + { + "epoch": 0.21212366181242853, + "grad_norm": 0.08437191694974899, + "learning_rate": 1.9913709262054616e-05, + "loss": 0.8336598873138428, + "step": 1148 + }, + { + "epoch": 0.2123084385213244, + "grad_norm": 0.07678723335266113, + "learning_rate": 1.9913447558198104e-05, + "loss": 0.7418199181556702, + "step": 1149 + }, + { + "epoch": 0.21249321523022022, + "grad_norm": 0.08348348736763, + "learning_rate": 1.9913185459817392e-05, + "loss": 0.7916473150253296, + "step": 1150 + }, + { + "epoch": 0.21267799193911607, + "grad_norm": 0.08356853574514389, + "learning_rate": 1.9912922966922913e-05, + "loss": 0.716545045375824, + "step": 1151 + }, + { + "epoch": 0.21286276864801193, + "grad_norm": 0.055216237902641296, + "learning_rate": 1.9912660079525115e-05, + "loss": 0.5315919518470764, + "step": 1152 + }, + { + "epoch": 0.21304754535690776, + "grad_norm": 0.09753018617630005, + "learning_rate": 1.991239679763446e-05, + "loss": 1.0574451684951782, + "step": 1153 + }, + { + "epoch": 0.21323232206580361, + "grad_norm": 0.08376552909612656, + "learning_rate": 1.9912133121261422e-05, + "loss": 0.9052149057388306, + "step": 1154 + }, + { + "epoch": 0.21341709877469944, + "grad_norm": 0.09207002073526382, + "learning_rate": 1.9911869050416495e-05, + "loss": 0.9631213545799255, + "step": 1155 + }, + { + "epoch": 0.2136018754835953, + "grad_norm": 0.10031726956367493, + "learning_rate": 1.9911604585110192e-05, + "loss": 0.6098602414131165, + "step": 1156 + }, + { + "epoch": 0.21378665219249113, + "grad_norm": 0.07222917675971985, + "learning_rate": 1.9911339725353036e-05, + "loss": 0.5439106225967407, + "step": 1157 + }, + { + "epoch": 0.21397142890138698, + "grad_norm": 0.08820630609989166, + "learning_rate": 1.991107447115557e-05, + "loss": 0.6557695269584656, + "step": 1158 + }, + { + "epoch": 0.2141562056102828, + "grad_norm": 0.09099892526865005, + "learning_rate": 1.9910808822528346e-05, + "loss": 0.7740167379379272, + "step": 1159 + }, + { + "epoch": 0.21434098231917867, + "grad_norm": 0.08491285890340805, + "learning_rate": 1.9910542779481938e-05, + "loss": 0.7722455859184265, + "step": 1160 + }, + { + "epoch": 0.2145257590280745, + "grad_norm": 0.09214689582586288, + "learning_rate": 1.991027634202693e-05, + "loss": 0.9850207567214966, + "step": 1161 + }, + { + "epoch": 0.21471053573697035, + "grad_norm": 0.08727172762155533, + "learning_rate": 1.991000951017393e-05, + "loss": 0.7745745778083801, + "step": 1162 + }, + { + "epoch": 0.2148953124458662, + "grad_norm": 0.07708032429218292, + "learning_rate": 1.990974228393356e-05, + "loss": 0.6668936610221863, + "step": 1163 + }, + { + "epoch": 0.21508008915476204, + "grad_norm": 0.08658210188150406, + "learning_rate": 1.990947466331645e-05, + "loss": 0.8688307404518127, + "step": 1164 + }, + { + "epoch": 0.2152648658636579, + "grad_norm": 0.07693911343812943, + "learning_rate": 1.9909206648333247e-05, + "loss": 0.7023448944091797, + "step": 1165 + }, + { + "epoch": 0.21544964257255372, + "grad_norm": 0.0952480137348175, + "learning_rate": 1.9908938238994624e-05, + "loss": 0.5236302614212036, + "step": 1166 + }, + { + "epoch": 0.21563441928144958, + "grad_norm": 0.09324135631322861, + "learning_rate": 1.9908669435311258e-05, + "loss": 0.7592065334320068, + "step": 1167 + }, + { + "epoch": 0.2158191959903454, + "grad_norm": 0.06865952908992767, + "learning_rate": 1.990840023729385e-05, + "loss": 0.6358137130737305, + "step": 1168 + }, + { + "epoch": 0.21600397269924126, + "grad_norm": 0.09976091235876083, + "learning_rate": 1.9908130644953118e-05, + "loss": 0.8514857888221741, + "step": 1169 + }, + { + "epoch": 0.2161887494081371, + "grad_norm": 0.07364201545715332, + "learning_rate": 1.9907860658299777e-05, + "loss": 0.6873587965965271, + "step": 1170 + }, + { + "epoch": 0.21637352611703295, + "grad_norm": 0.0919695496559143, + "learning_rate": 1.9907590277344582e-05, + "loss": 0.8332082033157349, + "step": 1171 + }, + { + "epoch": 0.21655830282592878, + "grad_norm": 0.06787417829036713, + "learning_rate": 1.990731950209829e-05, + "loss": 0.6495833992958069, + "step": 1172 + }, + { + "epoch": 0.21674307953482463, + "grad_norm": 0.09035515785217285, + "learning_rate": 1.990704833257168e-05, + "loss": 0.8606963157653809, + "step": 1173 + }, + { + "epoch": 0.2169278562437205, + "grad_norm": 0.08666759729385376, + "learning_rate": 1.990677676877554e-05, + "loss": 0.9564822316169739, + "step": 1174 + }, + { + "epoch": 0.21711263295261632, + "grad_norm": 0.10453042387962341, + "learning_rate": 1.9906504810720676e-05, + "loss": 0.8247703909873962, + "step": 1175 + }, + { + "epoch": 0.21729740966151218, + "grad_norm": 0.08272422850131989, + "learning_rate": 1.990623245841792e-05, + "loss": 0.8222265839576721, + "step": 1176 + }, + { + "epoch": 0.217482186370408, + "grad_norm": 0.07431092113256454, + "learning_rate": 1.9905959711878104e-05, + "loss": 0.644834041595459, + "step": 1177 + }, + { + "epoch": 0.21766696307930386, + "grad_norm": 0.07192020118236542, + "learning_rate": 1.990568657111208e-05, + "loss": 0.6560988426208496, + "step": 1178 + }, + { + "epoch": 0.2178517397881997, + "grad_norm": 0.09463713318109512, + "learning_rate": 1.9905413036130722e-05, + "loss": 0.6308455467224121, + "step": 1179 + }, + { + "epoch": 0.21803651649709554, + "grad_norm": 0.07202714681625366, + "learning_rate": 1.9905139106944916e-05, + "loss": 0.6653580665588379, + "step": 1180 + }, + { + "epoch": 0.21822129320599137, + "grad_norm": 0.08728264272212982, + "learning_rate": 1.990486478356556e-05, + "loss": 1.002651572227478, + "step": 1181 + }, + { + "epoch": 0.21840606991488723, + "grad_norm": 0.06476552039384842, + "learning_rate": 1.9904590066003577e-05, + "loss": 0.5150159001350403, + "step": 1182 + }, + { + "epoch": 0.21859084662378306, + "grad_norm": 0.09379825741052628, + "learning_rate": 1.99043149542699e-05, + "loss": 0.8470251560211182, + "step": 1183 + }, + { + "epoch": 0.21877562333267891, + "grad_norm": 0.08273567259311676, + "learning_rate": 1.990403944837547e-05, + "loss": 0.7435149550437927, + "step": 1184 + }, + { + "epoch": 0.21896040004157477, + "grad_norm": 0.09383412450551987, + "learning_rate": 1.9903763548331257e-05, + "loss": 0.8953707218170166, + "step": 1185 + }, + { + "epoch": 0.2191451767504706, + "grad_norm": 0.07706382870674133, + "learning_rate": 1.9903487254148236e-05, + "loss": 0.7261871099472046, + "step": 1186 + }, + { + "epoch": 0.21932995345936646, + "grad_norm": 0.08325329422950745, + "learning_rate": 1.9903210565837413e-05, + "loss": 0.7809126377105713, + "step": 1187 + }, + { + "epoch": 0.21951473016826228, + "grad_norm": 0.08461137115955353, + "learning_rate": 1.9902933483409786e-05, + "loss": 0.7430332899093628, + "step": 1188 + }, + { + "epoch": 0.21969950687715814, + "grad_norm": 0.08576197177171707, + "learning_rate": 1.9902656006876394e-05, + "loss": 0.9017677307128906, + "step": 1189 + }, + { + "epoch": 0.21988428358605397, + "grad_norm": 0.08884377777576447, + "learning_rate": 1.990237813624827e-05, + "loss": 0.8662840127944946, + "step": 1190 + }, + { + "epoch": 0.22006906029494983, + "grad_norm": 0.0905468612909317, + "learning_rate": 1.990209987153648e-05, + "loss": 0.7434231042861938, + "step": 1191 + }, + { + "epoch": 0.22025383700384565, + "grad_norm": 0.09583505243062973, + "learning_rate": 1.990182121275209e-05, + "loss": 0.8754289150238037, + "step": 1192 + }, + { + "epoch": 0.2204386137127415, + "grad_norm": 0.08617255091667175, + "learning_rate": 1.99015421599062e-05, + "loss": 0.8593115210533142, + "step": 1193 + }, + { + "epoch": 0.22062339042163737, + "grad_norm": 0.07539169490337372, + "learning_rate": 1.990126271300991e-05, + "loss": 0.7073256969451904, + "step": 1194 + }, + { + "epoch": 0.2208081671305332, + "grad_norm": 0.07064154744148254, + "learning_rate": 1.990098287207434e-05, + "loss": 0.7071460485458374, + "step": 1195 + }, + { + "epoch": 0.22099294383942905, + "grad_norm": 0.09236112982034683, + "learning_rate": 1.9900702637110627e-05, + "loss": 0.9311996102333069, + "step": 1196 + }, + { + "epoch": 0.22117772054832488, + "grad_norm": 0.09491878002882004, + "learning_rate": 1.9900422008129924e-05, + "loss": 0.7254270315170288, + "step": 1197 + }, + { + "epoch": 0.22136249725722074, + "grad_norm": 0.10193610191345215, + "learning_rate": 1.99001409851434e-05, + "loss": 0.7491152286529541, + "step": 1198 + }, + { + "epoch": 0.22154727396611656, + "grad_norm": 0.08911536633968353, + "learning_rate": 1.9899859568162237e-05, + "loss": 0.7793245315551758, + "step": 1199 + }, + { + "epoch": 0.22173205067501242, + "grad_norm": 0.08352331072092056, + "learning_rate": 1.9899577757197638e-05, + "loss": 0.6871762871742249, + "step": 1200 + }, + { + "epoch": 0.22191682738390825, + "grad_norm": 0.09086652100086212, + "learning_rate": 1.9899295552260817e-05, + "loss": 0.8809306621551514, + "step": 1201 + }, + { + "epoch": 0.2221016040928041, + "grad_norm": 0.10989069938659668, + "learning_rate": 1.9899012953363002e-05, + "loss": 1.0556919574737549, + "step": 1202 + }, + { + "epoch": 0.22228638080169993, + "grad_norm": 0.0815066546201706, + "learning_rate": 1.9898729960515442e-05, + "loss": 0.5446941256523132, + "step": 1203 + }, + { + "epoch": 0.2224711575105958, + "grad_norm": 0.07634031027555466, + "learning_rate": 1.98984465737294e-05, + "loss": 0.5923195481300354, + "step": 1204 + }, + { + "epoch": 0.22265593421949165, + "grad_norm": 0.08047515153884888, + "learning_rate": 1.989816279301615e-05, + "loss": 0.8424436450004578, + "step": 1205 + }, + { + "epoch": 0.22284071092838748, + "grad_norm": 0.08787591010332108, + "learning_rate": 1.989787861838699e-05, + "loss": 0.7214845418930054, + "step": 1206 + }, + { + "epoch": 0.22302548763728333, + "grad_norm": 0.05372117832303047, + "learning_rate": 1.9897594049853226e-05, + "loss": 0.4613834619522095, + "step": 1207 + }, + { + "epoch": 0.22321026434617916, + "grad_norm": 0.07950277626514435, + "learning_rate": 1.9897309087426185e-05, + "loss": 0.7548415064811707, + "step": 1208 + }, + { + "epoch": 0.22339504105507502, + "grad_norm": 0.06759131699800491, + "learning_rate": 1.9897023731117206e-05, + "loss": 0.43362218141555786, + "step": 1209 + }, + { + "epoch": 0.22357981776397085, + "grad_norm": 0.071576789021492, + "learning_rate": 1.9896737980937648e-05, + "loss": 0.7462440729141235, + "step": 1210 + }, + { + "epoch": 0.2237645944728667, + "grad_norm": 0.07904816418886185, + "learning_rate": 1.9896451836898883e-05, + "loss": 0.7481318116188049, + "step": 1211 + }, + { + "epoch": 0.22394937118176253, + "grad_norm": 0.08989887684583664, + "learning_rate": 1.9896165299012292e-05, + "loss": 0.7053769826889038, + "step": 1212 + }, + { + "epoch": 0.2241341478906584, + "grad_norm": 0.09380374103784561, + "learning_rate": 1.989587836728929e-05, + "loss": 0.8070808053016663, + "step": 1213 + }, + { + "epoch": 0.22431892459955421, + "grad_norm": 0.09103138744831085, + "learning_rate": 1.9895591041741284e-05, + "loss": 0.6746754050254822, + "step": 1214 + }, + { + "epoch": 0.22450370130845007, + "grad_norm": 0.06641924381256104, + "learning_rate": 1.9895303322379714e-05, + "loss": 0.6270115971565247, + "step": 1215 + }, + { + "epoch": 0.22468847801734593, + "grad_norm": 0.08196838200092316, + "learning_rate": 1.989501520921603e-05, + "loss": 0.6588491797447205, + "step": 1216 + }, + { + "epoch": 0.22487325472624176, + "grad_norm": 0.07861845195293427, + "learning_rate": 1.98947267022617e-05, + "loss": 0.6215094327926636, + "step": 1217 + }, + { + "epoch": 0.2250580314351376, + "grad_norm": 0.09896759688854218, + "learning_rate": 1.9894437801528205e-05, + "loss": 0.903312087059021, + "step": 1218 + }, + { + "epoch": 0.22524280814403344, + "grad_norm": 0.07655292004346848, + "learning_rate": 1.989414850702704e-05, + "loss": 0.7730190753936768, + "step": 1219 + }, + { + "epoch": 0.2254275848529293, + "grad_norm": 0.09886181354522705, + "learning_rate": 1.989385881876972e-05, + "loss": 0.6895635724067688, + "step": 1220 + }, + { + "epoch": 0.22561236156182513, + "grad_norm": 0.08377520740032196, + "learning_rate": 1.989356873676777e-05, + "loss": 0.780552089214325, + "step": 1221 + }, + { + "epoch": 0.22579713827072098, + "grad_norm": 0.08539623767137527, + "learning_rate": 1.9893278261032737e-05, + "loss": 0.7185485363006592, + "step": 1222 + }, + { + "epoch": 0.2259819149796168, + "grad_norm": 0.09074624627828598, + "learning_rate": 1.9892987391576188e-05, + "loss": 0.7275176048278809, + "step": 1223 + }, + { + "epoch": 0.22616669168851267, + "grad_norm": 0.08140923827886581, + "learning_rate": 1.9892696128409686e-05, + "loss": 0.6939892172813416, + "step": 1224 + }, + { + "epoch": 0.2263514683974085, + "grad_norm": 0.07564108073711395, + "learning_rate": 1.9892404471544828e-05, + "loss": 0.6892415285110474, + "step": 1225 + }, + { + "epoch": 0.22653624510630435, + "grad_norm": 0.0782395526766777, + "learning_rate": 1.9892112420993225e-05, + "loss": 0.7155357599258423, + "step": 1226 + }, + { + "epoch": 0.2267210218152002, + "grad_norm": 0.08511853218078613, + "learning_rate": 1.9891819976766492e-05, + "loss": 0.832256019115448, + "step": 1227 + }, + { + "epoch": 0.22690579852409604, + "grad_norm": 0.08317041397094727, + "learning_rate": 1.9891527138876274e-05, + "loss": 0.7731190919876099, + "step": 1228 + }, + { + "epoch": 0.2270905752329919, + "grad_norm": 0.10014408081769943, + "learning_rate": 1.9891233907334223e-05, + "loss": 0.8443605899810791, + "step": 1229 + }, + { + "epoch": 0.22727535194188772, + "grad_norm": 0.098563052713871, + "learning_rate": 1.9890940282152007e-05, + "loss": 0.8996736407279968, + "step": 1230 + }, + { + "epoch": 0.22746012865078358, + "grad_norm": 0.07454513013362885, + "learning_rate": 1.9890646263341315e-05, + "loss": 0.5073458552360535, + "step": 1231 + }, + { + "epoch": 0.2276449053596794, + "grad_norm": 0.07696285098791122, + "learning_rate": 1.989035185091384e-05, + "loss": 0.5916829705238342, + "step": 1232 + }, + { + "epoch": 0.22782968206857526, + "grad_norm": 0.0965103730559349, + "learning_rate": 1.9890057044881308e-05, + "loss": 0.8592946529388428, + "step": 1233 + }, + { + "epoch": 0.2280144587774711, + "grad_norm": 0.08271915465593338, + "learning_rate": 1.988976184525545e-05, + "loss": 0.7643226981163025, + "step": 1234 + }, + { + "epoch": 0.22819923548636695, + "grad_norm": 0.09926889091730118, + "learning_rate": 1.988946625204801e-05, + "loss": 0.9657240509986877, + "step": 1235 + }, + { + "epoch": 0.22838401219526278, + "grad_norm": 0.06959125399589539, + "learning_rate": 1.988917026527075e-05, + "loss": 0.6401278376579285, + "step": 1236 + }, + { + "epoch": 0.22856878890415863, + "grad_norm": 0.06192828342318535, + "learning_rate": 1.9888873884935457e-05, + "loss": 0.46463993191719055, + "step": 1237 + }, + { + "epoch": 0.2287535656130545, + "grad_norm": 0.07837715744972229, + "learning_rate": 1.988857711105392e-05, + "loss": 0.6322407722473145, + "step": 1238 + }, + { + "epoch": 0.22893834232195032, + "grad_norm": 0.10930605977773666, + "learning_rate": 1.9888279943637946e-05, + "loss": 0.9277044534683228, + "step": 1239 + }, + { + "epoch": 0.22912311903084617, + "grad_norm": 0.09522504359483719, + "learning_rate": 1.9887982382699373e-05, + "loss": 0.8586084246635437, + "step": 1240 + }, + { + "epoch": 0.229307895739742, + "grad_norm": 0.07182016968727112, + "learning_rate": 1.9887684428250038e-05, + "loss": 0.6435913443565369, + "step": 1241 + }, + { + "epoch": 0.22949267244863786, + "grad_norm": 0.07749020308256149, + "learning_rate": 1.9887386080301793e-05, + "loss": 0.983945369720459, + "step": 1242 + }, + { + "epoch": 0.2296774491575337, + "grad_norm": 0.08700563758611679, + "learning_rate": 1.988708733886652e-05, + "loss": 0.8135854005813599, + "step": 1243 + }, + { + "epoch": 0.22986222586642954, + "grad_norm": 0.08946280181407928, + "learning_rate": 1.98867882039561e-05, + "loss": 0.8678815364837646, + "step": 1244 + }, + { + "epoch": 0.23004700257532537, + "grad_norm": 0.07762499898672104, + "learning_rate": 1.988648867558244e-05, + "loss": 0.7090401649475098, + "step": 1245 + }, + { + "epoch": 0.23023177928422123, + "grad_norm": 0.09532805532217026, + "learning_rate": 1.9886188753757466e-05, + "loss": 0.9154309034347534, + "step": 1246 + }, + { + "epoch": 0.23041655599311706, + "grad_norm": 0.0796472430229187, + "learning_rate": 1.9885888438493106e-05, + "loss": 0.6643787026405334, + "step": 1247 + }, + { + "epoch": 0.2306013327020129, + "grad_norm": 0.07897227257490158, + "learning_rate": 1.988558772980132e-05, + "loss": 0.6329892873764038, + "step": 1248 + }, + { + "epoch": 0.23078610941090877, + "grad_norm": 0.09313155710697174, + "learning_rate": 1.9885286627694066e-05, + "loss": 0.832964301109314, + "step": 1249 + }, + { + "epoch": 0.2309708861198046, + "grad_norm": 0.06968510150909424, + "learning_rate": 1.9884985132183333e-05, + "loss": 0.679764449596405, + "step": 1250 + }, + { + "epoch": 0.23115566282870045, + "grad_norm": 0.08463939279317856, + "learning_rate": 1.9884683243281117e-05, + "loss": 0.8374265432357788, + "step": 1251 + }, + { + "epoch": 0.23134043953759628, + "grad_norm": 0.08091641962528229, + "learning_rate": 1.9884380960999432e-05, + "loss": 0.7396032214164734, + "step": 1252 + }, + { + "epoch": 0.23152521624649214, + "grad_norm": 0.12453664094209671, + "learning_rate": 1.988407828535031e-05, + "loss": 1.1132965087890625, + "step": 1253 + }, + { + "epoch": 0.23170999295538797, + "grad_norm": 0.10481418669223785, + "learning_rate": 1.9883775216345797e-05, + "loss": 1.0461211204528809, + "step": 1254 + }, + { + "epoch": 0.23189476966428382, + "grad_norm": 0.09625512361526489, + "learning_rate": 1.988347175399795e-05, + "loss": 0.8646210432052612, + "step": 1255 + }, + { + "epoch": 0.23207954637317965, + "grad_norm": 0.0808907076716423, + "learning_rate": 1.988316789831885e-05, + "loss": 0.8495672345161438, + "step": 1256 + }, + { + "epoch": 0.2322643230820755, + "grad_norm": 0.06943518668413162, + "learning_rate": 1.9882863649320588e-05, + "loss": 0.7596601247787476, + "step": 1257 + }, + { + "epoch": 0.23244909979097134, + "grad_norm": 0.069698266685009, + "learning_rate": 1.9882559007015275e-05, + "loss": 0.5961692929267883, + "step": 1258 + }, + { + "epoch": 0.2326338764998672, + "grad_norm": 0.07678410410881042, + "learning_rate": 1.988225397141503e-05, + "loss": 0.6730903387069702, + "step": 1259 + }, + { + "epoch": 0.23281865320876305, + "grad_norm": 0.09135778993368149, + "learning_rate": 1.9881948542531994e-05, + "loss": 0.7469479441642761, + "step": 1260 + }, + { + "epoch": 0.23300342991765888, + "grad_norm": 0.08337856829166412, + "learning_rate": 1.988164272037832e-05, + "loss": 0.6452245116233826, + "step": 1261 + }, + { + "epoch": 0.23318820662655473, + "grad_norm": 0.06979101896286011, + "learning_rate": 1.9881336504966187e-05, + "loss": 0.6644605398178101, + "step": 1262 + }, + { + "epoch": 0.23337298333545056, + "grad_norm": 0.07771389931440353, + "learning_rate": 1.9881029896307772e-05, + "loss": 0.7592771053314209, + "step": 1263 + }, + { + "epoch": 0.23355776004434642, + "grad_norm": 0.0938957929611206, + "learning_rate": 1.9880722894415284e-05, + "loss": 0.8080681562423706, + "step": 1264 + }, + { + "epoch": 0.23374253675324225, + "grad_norm": 0.07659591734409332, + "learning_rate": 1.9880415499300936e-05, + "loss": 0.6537554264068604, + "step": 1265 + }, + { + "epoch": 0.2339273134621381, + "grad_norm": 0.07603617012500763, + "learning_rate": 1.988010771097696e-05, + "loss": 0.688729465007782, + "step": 1266 + }, + { + "epoch": 0.23411209017103393, + "grad_norm": 0.10170614719390869, + "learning_rate": 1.987979952945561e-05, + "loss": 0.8643448948860168, + "step": 1267 + }, + { + "epoch": 0.2342968668799298, + "grad_norm": 0.07562986016273499, + "learning_rate": 1.9879490954749152e-05, + "loss": 0.8214960098266602, + "step": 1268 + }, + { + "epoch": 0.23448164358882562, + "grad_norm": 0.09902802109718323, + "learning_rate": 1.9879181986869856e-05, + "loss": 0.8390169143676758, + "step": 1269 + }, + { + "epoch": 0.23466642029772147, + "grad_norm": 0.07118549942970276, + "learning_rate": 1.9878872625830033e-05, + "loss": 0.6914301514625549, + "step": 1270 + }, + { + "epoch": 0.23485119700661733, + "grad_norm": 0.10551498085260391, + "learning_rate": 1.987856287164198e-05, + "loss": 1.0575722455978394, + "step": 1271 + }, + { + "epoch": 0.23503597371551316, + "grad_norm": 0.08981330692768097, + "learning_rate": 1.9878252724318034e-05, + "loss": 0.704413890838623, + "step": 1272 + }, + { + "epoch": 0.23522075042440901, + "grad_norm": 0.09239045530557632, + "learning_rate": 1.9877942183870534e-05, + "loss": 0.8672037124633789, + "step": 1273 + }, + { + "epoch": 0.23540552713330484, + "grad_norm": 0.07538927346467972, + "learning_rate": 1.9877631250311838e-05, + "loss": 0.6462569832801819, + "step": 1274 + }, + { + "epoch": 0.2355903038422007, + "grad_norm": 0.07566707581281662, + "learning_rate": 1.9877319923654327e-05, + "loss": 0.650894284248352, + "step": 1275 + }, + { + "epoch": 0.23577508055109653, + "grad_norm": 0.08834036439657211, + "learning_rate": 1.987700820391038e-05, + "loss": 0.8693649768829346, + "step": 1276 + }, + { + "epoch": 0.23595985725999238, + "grad_norm": 0.06067048758268356, + "learning_rate": 1.9876696091092408e-05, + "loss": 0.6727550625801086, + "step": 1277 + }, + { + "epoch": 0.2361446339688882, + "grad_norm": 0.09425334632396698, + "learning_rate": 1.9876383585212832e-05, + "loss": 0.7923711538314819, + "step": 1278 + }, + { + "epoch": 0.23632941067778407, + "grad_norm": 0.09646166861057281, + "learning_rate": 1.987607068628409e-05, + "loss": 0.8077743053436279, + "step": 1279 + }, + { + "epoch": 0.2365141873866799, + "grad_norm": 0.06434794515371323, + "learning_rate": 1.987575739431863e-05, + "loss": 0.6180709004402161, + "step": 1280 + }, + { + "epoch": 0.23669896409557575, + "grad_norm": 0.0861670970916748, + "learning_rate": 1.9875443709328928e-05, + "loss": 0.830324649810791, + "step": 1281 + }, + { + "epoch": 0.2368837408044716, + "grad_norm": 0.07235396653413773, + "learning_rate": 1.987512963132746e-05, + "loss": 0.6347554922103882, + "step": 1282 + }, + { + "epoch": 0.23706851751336744, + "grad_norm": 0.08272948861122131, + "learning_rate": 1.9874815160326728e-05, + "loss": 0.6697772741317749, + "step": 1283 + }, + { + "epoch": 0.2372532942222633, + "grad_norm": 0.11427222192287445, + "learning_rate": 1.9874500296339245e-05, + "loss": 0.8781434893608093, + "step": 1284 + }, + { + "epoch": 0.23743807093115912, + "grad_norm": 0.08316230773925781, + "learning_rate": 1.987418503937754e-05, + "loss": 0.8479765057563782, + "step": 1285 + }, + { + "epoch": 0.23762284764005498, + "grad_norm": 0.07936954498291016, + "learning_rate": 1.987386938945417e-05, + "loss": 0.7314356565475464, + "step": 1286 + }, + { + "epoch": 0.2378076243489508, + "grad_norm": 0.0801529586315155, + "learning_rate": 1.9873553346581688e-05, + "loss": 0.5724437832832336, + "step": 1287 + }, + { + "epoch": 0.23799240105784666, + "grad_norm": 0.09593310952186584, + "learning_rate": 1.9873236910772674e-05, + "loss": 0.8072952628135681, + "step": 1288 + }, + { + "epoch": 0.2381771777667425, + "grad_norm": 0.1059904620051384, + "learning_rate": 1.987292008203972e-05, + "loss": 1.0050170421600342, + "step": 1289 + }, + { + "epoch": 0.23836195447563835, + "grad_norm": 0.09045623242855072, + "learning_rate": 1.9872602860395433e-05, + "loss": 0.779002845287323, + "step": 1290 + }, + { + "epoch": 0.23854673118453418, + "grad_norm": 0.09053946286439896, + "learning_rate": 1.987228524585244e-05, + "loss": 0.7273655533790588, + "step": 1291 + }, + { + "epoch": 0.23873150789343003, + "grad_norm": 0.0855286493897438, + "learning_rate": 1.987196723842338e-05, + "loss": 0.7778965830802917, + "step": 1292 + }, + { + "epoch": 0.2389162846023259, + "grad_norm": 0.09738358855247498, + "learning_rate": 1.9871648838120913e-05, + "loss": 0.9179601073265076, + "step": 1293 + }, + { + "epoch": 0.23910106131122172, + "grad_norm": 0.07933405041694641, + "learning_rate": 1.9871330044957703e-05, + "loss": 0.7287842035293579, + "step": 1294 + }, + { + "epoch": 0.23928583802011757, + "grad_norm": 0.09340131282806396, + "learning_rate": 1.9871010858946443e-05, + "loss": 0.8566576242446899, + "step": 1295 + }, + { + "epoch": 0.2394706147290134, + "grad_norm": 0.09495611488819122, + "learning_rate": 1.987069128009983e-05, + "loss": 0.9072137475013733, + "step": 1296 + }, + { + "epoch": 0.23965539143790926, + "grad_norm": 0.09656231105327606, + "learning_rate": 1.987037130843059e-05, + "loss": 0.7251742482185364, + "step": 1297 + }, + { + "epoch": 0.2398401681468051, + "grad_norm": 0.08660931140184402, + "learning_rate": 1.987005094395145e-05, + "loss": 0.5973469614982605, + "step": 1298 + }, + { + "epoch": 0.24002494485570094, + "grad_norm": 0.10160975158214569, + "learning_rate": 1.986973018667516e-05, + "loss": 0.779219925403595, + "step": 1299 + }, + { + "epoch": 0.24020972156459677, + "grad_norm": 0.07078233361244202, + "learning_rate": 1.986940903661449e-05, + "loss": 0.69890958070755, + "step": 1300 + }, + { + "epoch": 0.24039449827349263, + "grad_norm": 0.07332238554954529, + "learning_rate": 1.9869087493782217e-05, + "loss": 0.7630764245986938, + "step": 1301 + }, + { + "epoch": 0.24057927498238846, + "grad_norm": 0.07510337233543396, + "learning_rate": 1.9868765558191137e-05, + "loss": 0.638870894908905, + "step": 1302 + }, + { + "epoch": 0.24076405169128431, + "grad_norm": 0.08256179094314575, + "learning_rate": 1.9868443229854068e-05, + "loss": 0.7185980081558228, + "step": 1303 + }, + { + "epoch": 0.24094882840018017, + "grad_norm": 0.10575845092535019, + "learning_rate": 1.9868120508783826e-05, + "loss": 0.9325575828552246, + "step": 1304 + }, + { + "epoch": 0.241133605109076, + "grad_norm": 0.07245877385139465, + "learning_rate": 1.986779739499326e-05, + "loss": 0.6992596387863159, + "step": 1305 + }, + { + "epoch": 0.24131838181797186, + "grad_norm": 0.08188942819833755, + "learning_rate": 1.9867473888495236e-05, + "loss": 0.8546136617660522, + "step": 1306 + }, + { + "epoch": 0.24150315852686768, + "grad_norm": 0.0791037529706955, + "learning_rate": 1.9867149989302623e-05, + "loss": 0.6521901488304138, + "step": 1307 + }, + { + "epoch": 0.24168793523576354, + "grad_norm": 0.08614641427993774, + "learning_rate": 1.986682569742831e-05, + "loss": 0.7859422564506531, + "step": 1308 + }, + { + "epoch": 0.24187271194465937, + "grad_norm": 0.08477243781089783, + "learning_rate": 1.98665010128852e-05, + "loss": 0.8153690695762634, + "step": 1309 + }, + { + "epoch": 0.24205748865355523, + "grad_norm": 0.0749654546380043, + "learning_rate": 1.986617593568622e-05, + "loss": 0.6461166739463806, + "step": 1310 + }, + { + "epoch": 0.24224226536245105, + "grad_norm": 0.07793160527944565, + "learning_rate": 1.9865850465844305e-05, + "loss": 0.6098483800888062, + "step": 1311 + }, + { + "epoch": 0.2424270420713469, + "grad_norm": 0.07243506610393524, + "learning_rate": 1.9865524603372408e-05, + "loss": 0.5794659852981567, + "step": 1312 + }, + { + "epoch": 0.24261181878024274, + "grad_norm": 0.08559340983629227, + "learning_rate": 1.9865198348283497e-05, + "loss": 0.6404768824577332, + "step": 1313 + }, + { + "epoch": 0.2427965954891386, + "grad_norm": 0.10084764659404755, + "learning_rate": 1.9864871700590557e-05, + "loss": 0.7237789630889893, + "step": 1314 + }, + { + "epoch": 0.24298137219803445, + "grad_norm": 0.0957445353269577, + "learning_rate": 1.9864544660306584e-05, + "loss": 0.9082088470458984, + "step": 1315 + }, + { + "epoch": 0.24316614890693028, + "grad_norm": 0.08300480246543884, + "learning_rate": 1.9864217227444594e-05, + "loss": 0.8019734025001526, + "step": 1316 + }, + { + "epoch": 0.24335092561582614, + "grad_norm": 0.09435844421386719, + "learning_rate": 1.9863889402017627e-05, + "loss": 0.8119986057281494, + "step": 1317 + }, + { + "epoch": 0.24353570232472196, + "grad_norm": 0.08238641917705536, + "learning_rate": 1.9863561184038715e-05, + "loss": 0.7308359146118164, + "step": 1318 + }, + { + "epoch": 0.24372047903361782, + "grad_norm": 0.11390416324138641, + "learning_rate": 1.986323257352093e-05, + "loss": 0.9338391423225403, + "step": 1319 + }, + { + "epoch": 0.24390525574251365, + "grad_norm": 0.07044551521539688, + "learning_rate": 1.9862903570477345e-05, + "loss": 0.5287854671478271, + "step": 1320 + }, + { + "epoch": 0.2440900324514095, + "grad_norm": 0.06873449683189392, + "learning_rate": 1.9862574174921056e-05, + "loss": 0.6060889959335327, + "step": 1321 + }, + { + "epoch": 0.24427480916030533, + "grad_norm": 0.07628993690013885, + "learning_rate": 1.9862244386865173e-05, + "loss": 0.834665060043335, + "step": 1322 + }, + { + "epoch": 0.2444595858692012, + "grad_norm": 0.09126506000757217, + "learning_rate": 1.9861914206322815e-05, + "loss": 1.0127897262573242, + "step": 1323 + }, + { + "epoch": 0.24464436257809702, + "grad_norm": 0.07502689212560654, + "learning_rate": 1.9861583633307127e-05, + "loss": 0.7499599456787109, + "step": 1324 + }, + { + "epoch": 0.24482913928699288, + "grad_norm": 0.08039402216672897, + "learning_rate": 1.9861252667831263e-05, + "loss": 0.6770073175430298, + "step": 1325 + }, + { + "epoch": 0.24501391599588873, + "grad_norm": 0.07558752596378326, + "learning_rate": 1.9860921309908395e-05, + "loss": 0.720396876335144, + "step": 1326 + }, + { + "epoch": 0.24519869270478456, + "grad_norm": 0.07513535767793655, + "learning_rate": 1.986058955955171e-05, + "loss": 0.6042259931564331, + "step": 1327 + }, + { + "epoch": 0.24538346941368042, + "grad_norm": 0.08912438899278641, + "learning_rate": 1.9860257416774413e-05, + "loss": 0.8705518245697021, + "step": 1328 + }, + { + "epoch": 0.24556824612257624, + "grad_norm": 0.09717576205730438, + "learning_rate": 1.9859924881589715e-05, + "loss": 1.0236485004425049, + "step": 1329 + }, + { + "epoch": 0.2457530228314721, + "grad_norm": 0.07261338829994202, + "learning_rate": 1.9859591954010855e-05, + "loss": 0.5481549501419067, + "step": 1330 + }, + { + "epoch": 0.24593779954036793, + "grad_norm": 0.06435700505971909, + "learning_rate": 1.9859258634051083e-05, + "loss": 0.5956790447235107, + "step": 1331 + }, + { + "epoch": 0.24612257624926379, + "grad_norm": 0.07993557304143906, + "learning_rate": 1.9858924921723665e-05, + "loss": 0.5512259006500244, + "step": 1332 + }, + { + "epoch": 0.24630735295815961, + "grad_norm": 0.1006544828414917, + "learning_rate": 1.9858590817041875e-05, + "loss": 0.8863869309425354, + "step": 1333 + }, + { + "epoch": 0.24649212966705547, + "grad_norm": 0.0793013721704483, + "learning_rate": 1.9858256320019018e-05, + "loss": 0.7920618057250977, + "step": 1334 + }, + { + "epoch": 0.2466769063759513, + "grad_norm": 0.0840287134051323, + "learning_rate": 1.9857921430668402e-05, + "loss": 0.7028228044509888, + "step": 1335 + }, + { + "epoch": 0.24686168308484716, + "grad_norm": 0.09260048717260361, + "learning_rate": 1.9857586149003354e-05, + "loss": 0.7949702739715576, + "step": 1336 + }, + { + "epoch": 0.247046459793743, + "grad_norm": 0.08278703689575195, + "learning_rate": 1.985725047503722e-05, + "loss": 0.8024004697799683, + "step": 1337 + }, + { + "epoch": 0.24723123650263884, + "grad_norm": 0.07588304579257965, + "learning_rate": 1.985691440878335e-05, + "loss": 0.648167610168457, + "step": 1338 + }, + { + "epoch": 0.2474160132115347, + "grad_norm": 0.08276861160993576, + "learning_rate": 1.985657795025513e-05, + "loss": 0.7005817294120789, + "step": 1339 + }, + { + "epoch": 0.24760078992043053, + "grad_norm": 0.08604889363050461, + "learning_rate": 1.9856241099465944e-05, + "loss": 0.8027825951576233, + "step": 1340 + }, + { + "epoch": 0.24778556662932638, + "grad_norm": 0.08195369690656662, + "learning_rate": 1.9855903856429198e-05, + "loss": 0.7094834446907043, + "step": 1341 + }, + { + "epoch": 0.2479703433382222, + "grad_norm": 0.07793563604354858, + "learning_rate": 1.9855566221158314e-05, + "loss": 0.6649020314216614, + "step": 1342 + }, + { + "epoch": 0.24815512004711807, + "grad_norm": 0.08001888543367386, + "learning_rate": 1.9855228193666724e-05, + "loss": 0.7470601797103882, + "step": 1343 + }, + { + "epoch": 0.2483398967560139, + "grad_norm": 0.0751085951924324, + "learning_rate": 1.985488977396789e-05, + "loss": 0.646827220916748, + "step": 1344 + }, + { + "epoch": 0.24852467346490975, + "grad_norm": 0.08412483334541321, + "learning_rate": 1.9854550962075273e-05, + "loss": 0.7207925319671631, + "step": 1345 + }, + { + "epoch": 0.24870945017380558, + "grad_norm": 0.09168536216020584, + "learning_rate": 1.985421175800236e-05, + "loss": 0.8863273859024048, + "step": 1346 + }, + { + "epoch": 0.24889422688270144, + "grad_norm": 0.09173490107059479, + "learning_rate": 1.985387216176265e-05, + "loss": 0.7285597324371338, + "step": 1347 + }, + { + "epoch": 0.2490790035915973, + "grad_norm": 0.0815114825963974, + "learning_rate": 1.9853532173369653e-05, + "loss": 0.8123331665992737, + "step": 1348 + }, + { + "epoch": 0.24926378030049312, + "grad_norm": 0.07043108344078064, + "learning_rate": 1.9853191792836906e-05, + "loss": 0.5145629644393921, + "step": 1349 + }, + { + "epoch": 0.24944855700938898, + "grad_norm": 0.0833626538515091, + "learning_rate": 1.9852851020177955e-05, + "loss": 0.7493253946304321, + "step": 1350 + }, + { + "epoch": 0.2496333337182848, + "grad_norm": 0.07964198291301727, + "learning_rate": 1.9852509855406354e-05, + "loss": 0.7904506325721741, + "step": 1351 + }, + { + "epoch": 0.24981811042718066, + "grad_norm": 0.08142036944627762, + "learning_rate": 1.9852168298535687e-05, + "loss": 0.6080458164215088, + "step": 1352 + }, + { + "epoch": 0.2500028871360765, + "grad_norm": 0.0822858139872551, + "learning_rate": 1.9851826349579547e-05, + "loss": 0.7370697259902954, + "step": 1353 + }, + { + "epoch": 0.2501876638449723, + "grad_norm": 0.0878128632903099, + "learning_rate": 1.9851484008551537e-05, + "loss": 0.8177471160888672, + "step": 1354 + }, + { + "epoch": 0.2503724405538682, + "grad_norm": 0.08483421057462692, + "learning_rate": 1.9851141275465288e-05, + "loss": 0.7663596272468567, + "step": 1355 + }, + { + "epoch": 0.25055721726276403, + "grad_norm": 0.08922464400529861, + "learning_rate": 1.9850798150334434e-05, + "loss": 0.70176100730896, + "step": 1356 + }, + { + "epoch": 0.25074199397165986, + "grad_norm": 0.08818759769201279, + "learning_rate": 1.9850454633172632e-05, + "loss": 0.8136799335479736, + "step": 1357 + }, + { + "epoch": 0.2509267706805557, + "grad_norm": 0.09864350408315659, + "learning_rate": 1.985011072399356e-05, + "loss": 1.0431067943572998, + "step": 1358 + }, + { + "epoch": 0.2511115473894516, + "grad_norm": 0.06932670623064041, + "learning_rate": 1.9849766422810893e-05, + "loss": 0.5578855276107788, + "step": 1359 + }, + { + "epoch": 0.2512963240983474, + "grad_norm": 0.09292439371347427, + "learning_rate": 1.984942172963834e-05, + "loss": 0.7040930986404419, + "step": 1360 + }, + { + "epoch": 0.25148110080724323, + "grad_norm": 0.07903002947568893, + "learning_rate": 1.9849076644489616e-05, + "loss": 0.6327306032180786, + "step": 1361 + }, + { + "epoch": 0.2516658775161391, + "grad_norm": 0.07349206507205963, + "learning_rate": 1.9848731167378457e-05, + "loss": 0.6429340243339539, + "step": 1362 + }, + { + "epoch": 0.25185065422503494, + "grad_norm": 0.08471519500017166, + "learning_rate": 1.9848385298318607e-05, + "loss": 0.754509449005127, + "step": 1363 + }, + { + "epoch": 0.25203543093393077, + "grad_norm": 0.07947010546922684, + "learning_rate": 1.9848039037323836e-05, + "loss": 0.5998097658157349, + "step": 1364 + }, + { + "epoch": 0.2522202076428266, + "grad_norm": 0.07180643081665039, + "learning_rate": 1.984769238440792e-05, + "loss": 0.6718336343765259, + "step": 1365 + }, + { + "epoch": 0.2524049843517225, + "grad_norm": 0.08686842024326324, + "learning_rate": 1.9847345339584662e-05, + "loss": 0.9123610258102417, + "step": 1366 + }, + { + "epoch": 0.2525897610606183, + "grad_norm": 0.09164551645517349, + "learning_rate": 1.984699790286786e-05, + "loss": 0.9346916079521179, + "step": 1367 + }, + { + "epoch": 0.25277453776951414, + "grad_norm": 0.06970732659101486, + "learning_rate": 1.9846650074271356e-05, + "loss": 0.758202075958252, + "step": 1368 + }, + { + "epoch": 0.25295931447840997, + "grad_norm": 0.08139359951019287, + "learning_rate": 1.984630185380898e-05, + "loss": 0.7334830164909363, + "step": 1369 + }, + { + "epoch": 0.25314409118730585, + "grad_norm": 0.08255112171173096, + "learning_rate": 1.98459532414946e-05, + "loss": 0.7164485454559326, + "step": 1370 + }, + { + "epoch": 0.2533288678962017, + "grad_norm": 0.09030817449092865, + "learning_rate": 1.984560423734208e-05, + "loss": 0.7118257880210876, + "step": 1371 + }, + { + "epoch": 0.2535136446050975, + "grad_norm": 0.09159182012081146, + "learning_rate": 1.9845254841365316e-05, + "loss": 0.8373132944107056, + "step": 1372 + }, + { + "epoch": 0.2536984213139934, + "grad_norm": 0.08677585422992706, + "learning_rate": 1.9844905053578213e-05, + "loss": 0.7780888676643372, + "step": 1373 + }, + { + "epoch": 0.2538831980228892, + "grad_norm": 0.06799543648958206, + "learning_rate": 1.984455487399469e-05, + "loss": 0.546689510345459, + "step": 1374 + }, + { + "epoch": 0.25406797473178505, + "grad_norm": 0.06946510821580887, + "learning_rate": 1.9844204302628683e-05, + "loss": 0.5932323932647705, + "step": 1375 + }, + { + "epoch": 0.2542527514406809, + "grad_norm": 0.09083466231822968, + "learning_rate": 1.984385333949414e-05, + "loss": 0.6616789698600769, + "step": 1376 + }, + { + "epoch": 0.25443752814957676, + "grad_norm": 0.08521077036857605, + "learning_rate": 1.9843501984605033e-05, + "loss": 0.8017097115516663, + "step": 1377 + }, + { + "epoch": 0.2546223048584726, + "grad_norm": 0.08863421529531479, + "learning_rate": 1.9843150237975343e-05, + "loss": 0.8838331699371338, + "step": 1378 + }, + { + "epoch": 0.2548070815673684, + "grad_norm": 0.1019650399684906, + "learning_rate": 1.984279809961907e-05, + "loss": 0.7910183072090149, + "step": 1379 + }, + { + "epoch": 0.25499185827626425, + "grad_norm": 0.0719834640622139, + "learning_rate": 1.9842445569550227e-05, + "loss": 0.682668149471283, + "step": 1380 + }, + { + "epoch": 0.25517663498516013, + "grad_norm": 0.08507004380226135, + "learning_rate": 1.984209264778284e-05, + "loss": 0.8330849409103394, + "step": 1381 + }, + { + "epoch": 0.25536141169405596, + "grad_norm": 0.10418731719255447, + "learning_rate": 1.9841739334330962e-05, + "loss": 1.0728949308395386, + "step": 1382 + }, + { + "epoch": 0.2555461884029518, + "grad_norm": 0.0746791809797287, + "learning_rate": 1.984138562920865e-05, + "loss": 0.6407696604728699, + "step": 1383 + }, + { + "epoch": 0.2557309651118477, + "grad_norm": 0.09626784175634384, + "learning_rate": 1.9841031532429972e-05, + "loss": 0.9932002425193787, + "step": 1384 + }, + { + "epoch": 0.2559157418207435, + "grad_norm": 0.09764228016138077, + "learning_rate": 1.9840677044009035e-05, + "loss": 0.7491921782493591, + "step": 1385 + }, + { + "epoch": 0.25610051852963933, + "grad_norm": 0.08643902093172073, + "learning_rate": 1.9840322163959938e-05, + "loss": 0.7618559002876282, + "step": 1386 + }, + { + "epoch": 0.25628529523853516, + "grad_norm": 0.08111730217933655, + "learning_rate": 1.9839966892296802e-05, + "loss": 0.6650518178939819, + "step": 1387 + }, + { + "epoch": 0.25647007194743104, + "grad_norm": 0.08415523916482925, + "learning_rate": 1.9839611229033774e-05, + "loss": 0.8092013001441956, + "step": 1388 + }, + { + "epoch": 0.2566548486563269, + "grad_norm": 0.07144782692193985, + "learning_rate": 1.9839255174185e-05, + "loss": 0.6734938621520996, + "step": 1389 + }, + { + "epoch": 0.2568396253652227, + "grad_norm": 0.09223801642656326, + "learning_rate": 1.983889872776465e-05, + "loss": 0.7030454277992249, + "step": 1390 + }, + { + "epoch": 0.25702440207411853, + "grad_norm": 0.09314022213220596, + "learning_rate": 1.983854188978692e-05, + "loss": 0.8970528841018677, + "step": 1391 + }, + { + "epoch": 0.2572091787830144, + "grad_norm": 0.0720200166106224, + "learning_rate": 1.9838184660265996e-05, + "loss": 0.6767599582672119, + "step": 1392 + }, + { + "epoch": 0.25739395549191024, + "grad_norm": 0.08036263287067413, + "learning_rate": 1.983782703921611e-05, + "loss": 0.8033877611160278, + "step": 1393 + }, + { + "epoch": 0.25757873220080607, + "grad_norm": 0.07800677418708801, + "learning_rate": 1.983746902665148e-05, + "loss": 0.7212039232254028, + "step": 1394 + }, + { + "epoch": 0.25776350890970195, + "grad_norm": 0.08257672935724258, + "learning_rate": 1.9837110622586364e-05, + "loss": 0.7464303374290466, + "step": 1395 + }, + { + "epoch": 0.2579482856185978, + "grad_norm": 0.07256592065095901, + "learning_rate": 1.983675182703502e-05, + "loss": 0.7287595868110657, + "step": 1396 + }, + { + "epoch": 0.2581330623274936, + "grad_norm": 0.06713775545358658, + "learning_rate": 1.983639264001173e-05, + "loss": 0.5811136960983276, + "step": 1397 + }, + { + "epoch": 0.25831783903638944, + "grad_norm": 0.08436959981918335, + "learning_rate": 1.9836033061530785e-05, + "loss": 0.6900782585144043, + "step": 1398 + }, + { + "epoch": 0.2585026157452853, + "grad_norm": 0.08242448419332504, + "learning_rate": 1.9835673091606498e-05, + "loss": 0.7158823609352112, + "step": 1399 + }, + { + "epoch": 0.25868739245418115, + "grad_norm": 0.07009007781744003, + "learning_rate": 1.9835312730253195e-05, + "loss": 0.5768419504165649, + "step": 1400 + }, + { + "epoch": 0.258872169163077, + "grad_norm": 0.07849988341331482, + "learning_rate": 1.983495197748521e-05, + "loss": 0.6357064247131348, + "step": 1401 + }, + { + "epoch": 0.2590569458719728, + "grad_norm": 0.10085313767194748, + "learning_rate": 1.9834590833316913e-05, + "loss": 0.817133903503418, + "step": 1402 + }, + { + "epoch": 0.2592417225808687, + "grad_norm": 0.10113980621099472, + "learning_rate": 1.983422929776267e-05, + "loss": 0.7684735655784607, + "step": 1403 + }, + { + "epoch": 0.2594264992897645, + "grad_norm": 0.0733640268445015, + "learning_rate": 1.9833867370836865e-05, + "loss": 0.6575866937637329, + "step": 1404 + }, + { + "epoch": 0.25961127599866035, + "grad_norm": 0.10067889094352722, + "learning_rate": 1.9833505052553905e-05, + "loss": 0.8692967891693115, + "step": 1405 + }, + { + "epoch": 0.25979605270755624, + "grad_norm": 0.07616506516933441, + "learning_rate": 1.983314234292821e-05, + "loss": 0.5759550333023071, + "step": 1406 + }, + { + "epoch": 0.25998082941645206, + "grad_norm": 0.13058625161647797, + "learning_rate": 1.9832779241974213e-05, + "loss": 1.1444944143295288, + "step": 1407 + }, + { + "epoch": 0.2601656061253479, + "grad_norm": 0.09796320647001266, + "learning_rate": 1.9832415749706366e-05, + "loss": 0.8220914006233215, + "step": 1408 + }, + { + "epoch": 0.2603503828342437, + "grad_norm": 0.06864874809980392, + "learning_rate": 1.9832051866139133e-05, + "loss": 0.6185899972915649, + "step": 1409 + }, + { + "epoch": 0.2605351595431396, + "grad_norm": 0.06889761239290237, + "learning_rate": 1.9831687591286995e-05, + "loss": 0.607519805431366, + "step": 1410 + }, + { + "epoch": 0.26071993625203543, + "grad_norm": 0.07183163613080978, + "learning_rate": 1.983132292516445e-05, + "loss": 0.6053565740585327, + "step": 1411 + }, + { + "epoch": 0.26090471296093126, + "grad_norm": 0.07045409828424454, + "learning_rate": 1.9830957867786013e-05, + "loss": 0.5963341593742371, + "step": 1412 + }, + { + "epoch": 0.2610894896698271, + "grad_norm": 0.0631919577717781, + "learning_rate": 1.983059241916621e-05, + "loss": 0.5690242052078247, + "step": 1413 + }, + { + "epoch": 0.261274266378723, + "grad_norm": 0.08845905214548111, + "learning_rate": 1.9830226579319585e-05, + "loss": 0.7385035157203674, + "step": 1414 + }, + { + "epoch": 0.2614590430876188, + "grad_norm": 0.08544179052114487, + "learning_rate": 1.9829860348260695e-05, + "loss": 0.8613137006759644, + "step": 1415 + }, + { + "epoch": 0.26164381979651463, + "grad_norm": 0.06891070306301117, + "learning_rate": 1.9829493726004117e-05, + "loss": 0.5957195162773132, + "step": 1416 + }, + { + "epoch": 0.2618285965054105, + "grad_norm": 0.07848822325468063, + "learning_rate": 1.982912671256444e-05, + "loss": 0.6115220189094543, + "step": 1417 + }, + { + "epoch": 0.26201337321430634, + "grad_norm": 0.10504502058029175, + "learning_rate": 1.9828759307956277e-05, + "loss": 0.9463704824447632, + "step": 1418 + }, + { + "epoch": 0.2621981499232022, + "grad_norm": 0.08608116209506989, + "learning_rate": 1.982839151219424e-05, + "loss": 0.7963468432426453, + "step": 1419 + }, + { + "epoch": 0.262382926632098, + "grad_norm": 0.09126728773117065, + "learning_rate": 1.9828023325292968e-05, + "loss": 0.7408127784729004, + "step": 1420 + }, + { + "epoch": 0.2625677033409939, + "grad_norm": 0.0852249264717102, + "learning_rate": 1.982765474726712e-05, + "loss": 0.6558241844177246, + "step": 1421 + }, + { + "epoch": 0.2627524800498897, + "grad_norm": 0.08563613146543503, + "learning_rate": 1.9827285778131355e-05, + "loss": 0.8080082535743713, + "step": 1422 + }, + { + "epoch": 0.26293725675878554, + "grad_norm": 0.08746327459812164, + "learning_rate": 1.9826916417900363e-05, + "loss": 0.7995231747627258, + "step": 1423 + }, + { + "epoch": 0.26312203346768137, + "grad_norm": 0.08268547058105469, + "learning_rate": 1.9826546666588844e-05, + "loss": 0.7918660640716553, + "step": 1424 + }, + { + "epoch": 0.26330681017657726, + "grad_norm": 0.0886000469326973, + "learning_rate": 1.982617652421151e-05, + "loss": 0.782829225063324, + "step": 1425 + }, + { + "epoch": 0.2634915868854731, + "grad_norm": 0.0858738124370575, + "learning_rate": 1.9825805990783095e-05, + "loss": 0.8857966065406799, + "step": 1426 + }, + { + "epoch": 0.2636763635943689, + "grad_norm": 0.07344409823417664, + "learning_rate": 1.982543506631834e-05, + "loss": 0.5548909902572632, + "step": 1427 + }, + { + "epoch": 0.2638611403032648, + "grad_norm": 0.09202000498771667, + "learning_rate": 1.9825063750832007e-05, + "loss": 0.8485000729560852, + "step": 1428 + }, + { + "epoch": 0.2640459170121606, + "grad_norm": 0.10929467529058456, + "learning_rate": 1.9824692044338876e-05, + "loss": 0.9773752093315125, + "step": 1429 + }, + { + "epoch": 0.26423069372105645, + "grad_norm": 0.08134205639362335, + "learning_rate": 1.982431994685374e-05, + "loss": 0.710753321647644, + "step": 1430 + }, + { + "epoch": 0.2644154704299523, + "grad_norm": 0.07787948101758957, + "learning_rate": 1.982394745839141e-05, + "loss": 0.6895082592964172, + "step": 1431 + }, + { + "epoch": 0.26460024713884817, + "grad_norm": 0.08764622360467911, + "learning_rate": 1.9823574578966704e-05, + "loss": 0.7310826182365417, + "step": 1432 + }, + { + "epoch": 0.264785023847744, + "grad_norm": 0.08674835413694382, + "learning_rate": 1.9823201308594465e-05, + "loss": 0.7989583015441895, + "step": 1433 + }, + { + "epoch": 0.2649698005566398, + "grad_norm": 0.08755107969045639, + "learning_rate": 1.9822827647289544e-05, + "loss": 0.7573221325874329, + "step": 1434 + }, + { + "epoch": 0.2651545772655357, + "grad_norm": 0.08561009168624878, + "learning_rate": 1.982245359506682e-05, + "loss": 0.7378658652305603, + "step": 1435 + }, + { + "epoch": 0.26533935397443154, + "grad_norm": 0.08592981845140457, + "learning_rate": 1.9822079151941163e-05, + "loss": 0.6005727648735046, + "step": 1436 + }, + { + "epoch": 0.26552413068332736, + "grad_norm": 0.0772705152630806, + "learning_rate": 1.9821704317927492e-05, + "loss": 0.5923016667366028, + "step": 1437 + }, + { + "epoch": 0.2657089073922232, + "grad_norm": 0.06057063862681389, + "learning_rate": 1.9821329093040717e-05, + "loss": 0.6054449081420898, + "step": 1438 + }, + { + "epoch": 0.2658936841011191, + "grad_norm": 0.08752863854169846, + "learning_rate": 1.982095347729577e-05, + "loss": 0.7829182147979736, + "step": 1439 + }, + { + "epoch": 0.2660784608100149, + "grad_norm": 0.06969458609819412, + "learning_rate": 1.98205774707076e-05, + "loss": 0.6038533449172974, + "step": 1440 + }, + { + "epoch": 0.26626323751891073, + "grad_norm": 0.0858176127076149, + "learning_rate": 1.9820201073291176e-05, + "loss": 0.7014481425285339, + "step": 1441 + }, + { + "epoch": 0.26644801422780656, + "grad_norm": 0.0728302150964737, + "learning_rate": 1.9819824285061466e-05, + "loss": 0.636441171169281, + "step": 1442 + }, + { + "epoch": 0.26663279093670245, + "grad_norm": 0.08914124220609665, + "learning_rate": 1.9819447106033476e-05, + "loss": 0.7722464799880981, + "step": 1443 + }, + { + "epoch": 0.2668175676455983, + "grad_norm": 0.07109539955854416, + "learning_rate": 1.981906953622221e-05, + "loss": 0.6806744337081909, + "step": 1444 + }, + { + "epoch": 0.2670023443544941, + "grad_norm": 0.07968464493751526, + "learning_rate": 1.98186915756427e-05, + "loss": 0.7673649191856384, + "step": 1445 + }, + { + "epoch": 0.26718712106339, + "grad_norm": 0.08722911030054092, + "learning_rate": 1.981831322430998e-05, + "loss": 0.6988089084625244, + "step": 1446 + }, + { + "epoch": 0.2673718977722858, + "grad_norm": 0.08498848229646683, + "learning_rate": 1.981793448223911e-05, + "loss": 0.7942920923233032, + "step": 1447 + }, + { + "epoch": 0.26755667448118164, + "grad_norm": 0.08321723341941833, + "learning_rate": 1.981755534944517e-05, + "loss": 0.8039382696151733, + "step": 1448 + }, + { + "epoch": 0.2677414511900775, + "grad_norm": 0.061859726905822754, + "learning_rate": 1.981717582594324e-05, + "loss": 0.5708411335945129, + "step": 1449 + }, + { + "epoch": 0.26792622789897336, + "grad_norm": 0.08954576402902603, + "learning_rate": 1.9816795911748422e-05, + "loss": 0.621968686580658, + "step": 1450 + }, + { + "epoch": 0.2681110046078692, + "grad_norm": 0.08501655608415604, + "learning_rate": 1.9816415606875844e-05, + "loss": 0.8544004559516907, + "step": 1451 + }, + { + "epoch": 0.268295781316765, + "grad_norm": 0.06574973464012146, + "learning_rate": 1.9816034911340635e-05, + "loss": 0.6134265065193176, + "step": 1452 + }, + { + "epoch": 0.26848055802566084, + "grad_norm": 0.04690899699926376, + "learning_rate": 1.9815653825157944e-05, + "loss": 0.4340095818042755, + "step": 1453 + }, + { + "epoch": 0.2686653347345567, + "grad_norm": 0.09002692997455597, + "learning_rate": 1.9815272348342947e-05, + "loss": 0.758969783782959, + "step": 1454 + }, + { + "epoch": 0.26885011144345256, + "grad_norm": 0.08969945460557938, + "learning_rate": 1.9814890480910815e-05, + "loss": 0.7402480840682983, + "step": 1455 + }, + { + "epoch": 0.2690348881523484, + "grad_norm": 0.08519411087036133, + "learning_rate": 1.9814508222876747e-05, + "loss": 0.6333010792732239, + "step": 1456 + }, + { + "epoch": 0.26921966486124427, + "grad_norm": 0.08576565235853195, + "learning_rate": 1.9814125574255957e-05, + "loss": 0.6507075428962708, + "step": 1457 + }, + { + "epoch": 0.2694044415701401, + "grad_norm": 0.07842403650283813, + "learning_rate": 1.9813742535063677e-05, + "loss": 0.7630261182785034, + "step": 1458 + }, + { + "epoch": 0.2695892182790359, + "grad_norm": 0.09197118133306503, + "learning_rate": 1.9813359105315144e-05, + "loss": 0.8912684321403503, + "step": 1459 + }, + { + "epoch": 0.26977399498793175, + "grad_norm": 0.07204412668943405, + "learning_rate": 1.9812975285025624e-05, + "loss": 0.6835379600524902, + "step": 1460 + }, + { + "epoch": 0.26995877169682764, + "grad_norm": 0.08579453825950623, + "learning_rate": 1.9812591074210385e-05, + "loss": 0.6865627765655518, + "step": 1461 + }, + { + "epoch": 0.27014354840572347, + "grad_norm": 0.07697734236717224, + "learning_rate": 1.9812206472884725e-05, + "loss": 0.8023110628128052, + "step": 1462 + }, + { + "epoch": 0.2703283251146193, + "grad_norm": 0.08861850947141647, + "learning_rate": 1.9811821481063943e-05, + "loss": 0.7018006443977356, + "step": 1463 + }, + { + "epoch": 0.2705131018235151, + "grad_norm": 0.08721664547920227, + "learning_rate": 1.981143609876336e-05, + "loss": 0.6645216345787048, + "step": 1464 + }, + { + "epoch": 0.270697878532411, + "grad_norm": 0.08017013221979141, + "learning_rate": 1.9811050325998323e-05, + "loss": 0.4490172564983368, + "step": 1465 + }, + { + "epoch": 0.27088265524130684, + "grad_norm": 0.07376329600811005, + "learning_rate": 1.9810664162784176e-05, + "loss": 0.6234690546989441, + "step": 1466 + }, + { + "epoch": 0.27106743195020266, + "grad_norm": 0.07300654798746109, + "learning_rate": 1.981027760913629e-05, + "loss": 0.6264599561691284, + "step": 1467 + }, + { + "epoch": 0.27125220865909855, + "grad_norm": 0.09759058058261871, + "learning_rate": 1.980989066507004e-05, + "loss": 0.9196485280990601, + "step": 1468 + }, + { + "epoch": 0.2714369853679944, + "grad_norm": 0.0837840735912323, + "learning_rate": 1.980950333060084e-05, + "loss": 0.530738890171051, + "step": 1469 + }, + { + "epoch": 0.2716217620768902, + "grad_norm": 0.07600148022174835, + "learning_rate": 1.9809115605744097e-05, + "loss": 0.5040048360824585, + "step": 1470 + }, + { + "epoch": 0.27180653878578603, + "grad_norm": 0.07696036994457245, + "learning_rate": 1.9808727490515238e-05, + "loss": 0.7642129063606262, + "step": 1471 + }, + { + "epoch": 0.2719913154946819, + "grad_norm": 0.08447156101465225, + "learning_rate": 1.9808338984929717e-05, + "loss": 0.7772355675697327, + "step": 1472 + }, + { + "epoch": 0.27217609220357775, + "grad_norm": 0.09380346536636353, + "learning_rate": 1.980795008900299e-05, + "loss": 0.6633673906326294, + "step": 1473 + }, + { + "epoch": 0.2723608689124736, + "grad_norm": 0.08995403349399567, + "learning_rate": 1.9807560802750533e-05, + "loss": 0.7711108922958374, + "step": 1474 + }, + { + "epoch": 0.2725456456213694, + "grad_norm": 0.07373813539743423, + "learning_rate": 1.9807171126187838e-05, + "loss": 0.5689606666564941, + "step": 1475 + }, + { + "epoch": 0.2727304223302653, + "grad_norm": 0.07707108557224274, + "learning_rate": 1.980678105933042e-05, + "loss": 0.5939176678657532, + "step": 1476 + }, + { + "epoch": 0.2729151990391611, + "grad_norm": 0.05576924607157707, + "learning_rate": 1.980639060219379e-05, + "loss": 0.5002748370170593, + "step": 1477 + }, + { + "epoch": 0.27309997574805694, + "grad_norm": 0.09329205006361008, + "learning_rate": 1.9805999754793503e-05, + "loss": 0.9085617065429688, + "step": 1478 + }, + { + "epoch": 0.27328475245695283, + "grad_norm": 0.0745263397693634, + "learning_rate": 1.9805608517145098e-05, + "loss": 0.7259835600852966, + "step": 1479 + }, + { + "epoch": 0.27346952916584866, + "grad_norm": 0.08092562109231949, + "learning_rate": 1.9805216889264155e-05, + "loss": 0.7348419427871704, + "step": 1480 + }, + { + "epoch": 0.2736543058747445, + "grad_norm": 0.08193597197532654, + "learning_rate": 1.9804824871166254e-05, + "loss": 0.6958684325218201, + "step": 1481 + }, + { + "epoch": 0.2738390825836403, + "grad_norm": 0.07525932043790817, + "learning_rate": 1.9804432462867002e-05, + "loss": 0.8857287764549255, + "step": 1482 + }, + { + "epoch": 0.2740238592925362, + "grad_norm": 0.09673633426427841, + "learning_rate": 1.9804039664382007e-05, + "loss": 0.9581448435783386, + "step": 1483 + }, + { + "epoch": 0.274208636001432, + "grad_norm": 0.09613839536905289, + "learning_rate": 1.980364647572691e-05, + "loss": 0.956735372543335, + "step": 1484 + }, + { + "epoch": 0.27439341271032786, + "grad_norm": 0.07544142007827759, + "learning_rate": 1.9803252896917356e-05, + "loss": 0.7517312169075012, + "step": 1485 + }, + { + "epoch": 0.2745781894192237, + "grad_norm": 0.07626946270465851, + "learning_rate": 1.9802858927969004e-05, + "loss": 0.6393277645111084, + "step": 1486 + }, + { + "epoch": 0.27476296612811957, + "grad_norm": 0.07800044864416122, + "learning_rate": 1.980246456889754e-05, + "loss": 0.6568846106529236, + "step": 1487 + }, + { + "epoch": 0.2749477428370154, + "grad_norm": 0.07493487000465393, + "learning_rate": 1.9802069819718652e-05, + "loss": 0.6556905508041382, + "step": 1488 + }, + { + "epoch": 0.2751325195459112, + "grad_norm": 0.07483842968940735, + "learning_rate": 1.980167468044805e-05, + "loss": 0.8307350873947144, + "step": 1489 + }, + { + "epoch": 0.2753172962548071, + "grad_norm": 0.09129715710878372, + "learning_rate": 1.9801279151101464e-05, + "loss": 1.0061156749725342, + "step": 1490 + }, + { + "epoch": 0.27550207296370294, + "grad_norm": 0.08590186387300491, + "learning_rate": 1.980088323169463e-05, + "loss": 0.8525320887565613, + "step": 1491 + }, + { + "epoch": 0.27568684967259877, + "grad_norm": 0.06311880052089691, + "learning_rate": 1.9800486922243306e-05, + "loss": 0.5494281053543091, + "step": 1492 + }, + { + "epoch": 0.2758716263814946, + "grad_norm": 0.07645311206579208, + "learning_rate": 1.9800090222763265e-05, + "loss": 0.8318729996681213, + "step": 1493 + }, + { + "epoch": 0.2760564030903905, + "grad_norm": 0.0779624953866005, + "learning_rate": 1.9799693133270294e-05, + "loss": 0.7272801399230957, + "step": 1494 + }, + { + "epoch": 0.2762411797992863, + "grad_norm": 0.07608146965503693, + "learning_rate": 1.9799295653780197e-05, + "loss": 0.6888601183891296, + "step": 1495 + }, + { + "epoch": 0.27642595650818214, + "grad_norm": 0.07562585175037384, + "learning_rate": 1.979889778430879e-05, + "loss": 0.6577703952789307, + "step": 1496 + }, + { + "epoch": 0.27661073321707796, + "grad_norm": 0.07444547116756439, + "learning_rate": 1.979849952487191e-05, + "loss": 0.7057503461837769, + "step": 1497 + }, + { + "epoch": 0.27679550992597385, + "grad_norm": 0.12092580646276474, + "learning_rate": 1.97981008754854e-05, + "loss": 0.4695255756378174, + "step": 1498 + }, + { + "epoch": 0.2769802866348697, + "grad_norm": 0.06313570588827133, + "learning_rate": 1.979770183616513e-05, + "loss": 0.5444561839103699, + "step": 1499 + }, + { + "epoch": 0.2771650633437655, + "grad_norm": 0.08924681693315506, + "learning_rate": 1.9797302406926984e-05, + "loss": 0.7740501165390015, + "step": 1500 + }, + { + "epoch": 0.2771650633437655, + "eval_loss": 0.7722002267837524, + "eval_runtime": 157.6041, + "eval_samples_per_second": 115.663, + "eval_steps_per_second": 14.46, + "step": 1500 + }, + { + "epoch": 0.2773498400526614, + "grad_norm": 0.0823410302400589, + "learning_rate": 1.979690258778685e-05, + "loss": 0.7805835604667664, + "step": 1501 + }, + { + "epoch": 0.2775346167615572, + "grad_norm": 0.08543116599321365, + "learning_rate": 1.9796502378760647e-05, + "loss": 0.7953980565071106, + "step": 1502 + }, + { + "epoch": 0.27771939347045305, + "grad_norm": 0.07848764955997467, + "learning_rate": 1.9796101779864296e-05, + "loss": 0.7930999994277954, + "step": 1503 + }, + { + "epoch": 0.2779041701793489, + "grad_norm": 0.06313935667276382, + "learning_rate": 1.9795700791113744e-05, + "loss": 0.5610463619232178, + "step": 1504 + }, + { + "epoch": 0.27808894688824476, + "grad_norm": 0.08681435137987137, + "learning_rate": 1.9795299412524948e-05, + "loss": 0.7095720767974854, + "step": 1505 + }, + { + "epoch": 0.2782737235971406, + "grad_norm": 0.08366895467042923, + "learning_rate": 1.9794897644113876e-05, + "loss": 0.7479497194290161, + "step": 1506 + }, + { + "epoch": 0.2784585003060364, + "grad_norm": 0.1002199575304985, + "learning_rate": 1.9794495485896528e-05, + "loss": 0.7468428611755371, + "step": 1507 + }, + { + "epoch": 0.27864327701493224, + "grad_norm": 0.047667015343904495, + "learning_rate": 1.9794092937888902e-05, + "loss": 0.4573146402835846, + "step": 1508 + }, + { + "epoch": 0.27882805372382813, + "grad_norm": 0.05945112183690071, + "learning_rate": 1.9793690000107017e-05, + "loss": 0.639329731464386, + "step": 1509 + }, + { + "epoch": 0.27901283043272396, + "grad_norm": 0.06146444007754326, + "learning_rate": 1.9793286672566905e-05, + "loss": 0.5720182657241821, + "step": 1510 + }, + { + "epoch": 0.2791976071416198, + "grad_norm": 0.08685909956693649, + "learning_rate": 1.979288295528463e-05, + "loss": 0.7960376739501953, + "step": 1511 + }, + { + "epoch": 0.27938238385051567, + "grad_norm": 0.05757363140583038, + "learning_rate": 1.979247884827625e-05, + "loss": 0.5274953842163086, + "step": 1512 + }, + { + "epoch": 0.2795671605594115, + "grad_norm": 0.05990239977836609, + "learning_rate": 1.9792074351557852e-05, + "loss": 0.4350784420967102, + "step": 1513 + }, + { + "epoch": 0.2797519372683073, + "grad_norm": 0.09007871150970459, + "learning_rate": 1.9791669465145525e-05, + "loss": 0.6337308287620544, + "step": 1514 + }, + { + "epoch": 0.27993671397720316, + "grad_norm": 0.07639160007238388, + "learning_rate": 1.979126418905539e-05, + "loss": 0.6822291016578674, + "step": 1515 + }, + { + "epoch": 0.28012149068609904, + "grad_norm": 0.0802912637591362, + "learning_rate": 1.979085852330357e-05, + "loss": 0.747983455657959, + "step": 1516 + }, + { + "epoch": 0.28030626739499487, + "grad_norm": 0.0773995965719223, + "learning_rate": 1.9790452467906216e-05, + "loss": 0.7091318368911743, + "step": 1517 + }, + { + "epoch": 0.2804910441038907, + "grad_norm": 0.0977666899561882, + "learning_rate": 1.9790046022879482e-05, + "loss": 0.8170154690742493, + "step": 1518 + }, + { + "epoch": 0.2806758208127865, + "grad_norm": 0.06563309580087662, + "learning_rate": 1.9789639188239548e-05, + "loss": 0.68996262550354, + "step": 1519 + }, + { + "epoch": 0.2808605975216824, + "grad_norm": 0.07695797830820084, + "learning_rate": 1.97892319640026e-05, + "loss": 0.7316287755966187, + "step": 1520 + }, + { + "epoch": 0.28104537423057824, + "grad_norm": 0.0802626758813858, + "learning_rate": 1.9788824350184845e-05, + "loss": 0.8406566977500916, + "step": 1521 + }, + { + "epoch": 0.28123015093947407, + "grad_norm": 0.08003909885883331, + "learning_rate": 1.9788416346802508e-05, + "loss": 0.7299249768257141, + "step": 1522 + }, + { + "epoch": 0.28141492764836995, + "grad_norm": 0.09115131944417953, + "learning_rate": 1.9788007953871825e-05, + "loss": 1.0389716625213623, + "step": 1523 + }, + { + "epoch": 0.2815997043572658, + "grad_norm": 0.08853866904973984, + "learning_rate": 1.9787599171409047e-05, + "loss": 0.9766581058502197, + "step": 1524 + }, + { + "epoch": 0.2817844810661616, + "grad_norm": 0.09646085649728775, + "learning_rate": 1.9787189999430443e-05, + "loss": 0.8425481915473938, + "step": 1525 + }, + { + "epoch": 0.28196925777505744, + "grad_norm": 0.07125802338123322, + "learning_rate": 1.97867804379523e-05, + "loss": 0.4887961149215698, + "step": 1526 + }, + { + "epoch": 0.2821540344839533, + "grad_norm": 0.07870358228683472, + "learning_rate": 1.9786370486990912e-05, + "loss": 0.772543728351593, + "step": 1527 + }, + { + "epoch": 0.28233881119284915, + "grad_norm": 0.09730803221464157, + "learning_rate": 1.97859601465626e-05, + "loss": 0.7736526727676392, + "step": 1528 + }, + { + "epoch": 0.282523587901745, + "grad_norm": 0.0814560204744339, + "learning_rate": 1.9785549416683685e-05, + "loss": 0.8670487403869629, + "step": 1529 + }, + { + "epoch": 0.2827083646106408, + "grad_norm": 0.09567133337259293, + "learning_rate": 1.9785138297370522e-05, + "loss": 0.920782208442688, + "step": 1530 + }, + { + "epoch": 0.2828931413195367, + "grad_norm": 0.09724999219179153, + "learning_rate": 1.9784726788639467e-05, + "loss": 0.7418619990348816, + "step": 1531 + }, + { + "epoch": 0.2830779180284325, + "grad_norm": 0.08737719058990479, + "learning_rate": 1.97843148905069e-05, + "loss": 0.6391053199768066, + "step": 1532 + }, + { + "epoch": 0.28326269473732835, + "grad_norm": 0.07193499058485031, + "learning_rate": 1.978390260298921e-05, + "loss": 0.6396481394767761, + "step": 1533 + }, + { + "epoch": 0.28344747144622423, + "grad_norm": 0.08356016874313354, + "learning_rate": 1.9783489926102803e-05, + "loss": 0.9004808068275452, + "step": 1534 + }, + { + "epoch": 0.28363224815512006, + "grad_norm": 0.07595232129096985, + "learning_rate": 1.978307685986411e-05, + "loss": 0.6460784077644348, + "step": 1535 + }, + { + "epoch": 0.2838170248640159, + "grad_norm": 0.1030508354306221, + "learning_rate": 1.9782663404289563e-05, + "loss": 0.9966877102851868, + "step": 1536 + }, + { + "epoch": 0.2840018015729117, + "grad_norm": 0.07105007767677307, + "learning_rate": 1.978224955939562e-05, + "loss": 0.8565782904624939, + "step": 1537 + }, + { + "epoch": 0.2841865782818076, + "grad_norm": 0.08711280673742294, + "learning_rate": 1.978183532519875e-05, + "loss": 0.7412756085395813, + "step": 1538 + }, + { + "epoch": 0.28437135499070343, + "grad_norm": 0.08922568708658218, + "learning_rate": 1.9781420701715438e-05, + "loss": 0.7539790272712708, + "step": 1539 + }, + { + "epoch": 0.28455613169959926, + "grad_norm": 0.06241748109459877, + "learning_rate": 1.9781005688962182e-05, + "loss": 0.5449891090393066, + "step": 1540 + }, + { + "epoch": 0.2847409084084951, + "grad_norm": 0.0753631517291069, + "learning_rate": 1.9780590286955502e-05, + "loss": 0.7677814364433289, + "step": 1541 + }, + { + "epoch": 0.28492568511739097, + "grad_norm": 0.09720226377248764, + "learning_rate": 1.9780174495711927e-05, + "loss": 0.8704606294631958, + "step": 1542 + }, + { + "epoch": 0.2851104618262868, + "grad_norm": 0.06104160472750664, + "learning_rate": 1.9779758315248006e-05, + "loss": 0.6052356362342834, + "step": 1543 + }, + { + "epoch": 0.2852952385351826, + "grad_norm": 0.0829816535115242, + "learning_rate": 1.97793417455803e-05, + "loss": 0.9357346892356873, + "step": 1544 + }, + { + "epoch": 0.2854800152440785, + "grad_norm": 0.07320324331521988, + "learning_rate": 1.9778924786725388e-05, + "loss": 0.6604475378990173, + "step": 1545 + }, + { + "epoch": 0.28566479195297434, + "grad_norm": 0.06625904887914658, + "learning_rate": 1.9778507438699864e-05, + "loss": 0.503479540348053, + "step": 1546 + }, + { + "epoch": 0.28584956866187017, + "grad_norm": 0.08408404886722565, + "learning_rate": 1.977808970152034e-05, + "loss": 0.7940577268600464, + "step": 1547 + }, + { + "epoch": 0.286034345370766, + "grad_norm": 0.08889124542474747, + "learning_rate": 1.977767157520343e-05, + "loss": 0.6014382243156433, + "step": 1548 + }, + { + "epoch": 0.2862191220796619, + "grad_norm": 0.07590803503990173, + "learning_rate": 1.977725305976579e-05, + "loss": 0.7175227403640747, + "step": 1549 + }, + { + "epoch": 0.2864038987885577, + "grad_norm": 0.08566652983427048, + "learning_rate": 1.9776834155224066e-05, + "loss": 0.652517557144165, + "step": 1550 + }, + { + "epoch": 0.28658867549745354, + "grad_norm": 0.0753151997923851, + "learning_rate": 1.9776414861594925e-05, + "loss": 0.5912181735038757, + "step": 1551 + }, + { + "epoch": 0.28677345220634937, + "grad_norm": 0.08349766582250595, + "learning_rate": 1.9775995178895064e-05, + "loss": 0.8511011600494385, + "step": 1552 + }, + { + "epoch": 0.28695822891524525, + "grad_norm": 0.08739432692527771, + "learning_rate": 1.977557510714118e-05, + "loss": 0.7831947207450867, + "step": 1553 + }, + { + "epoch": 0.2871430056241411, + "grad_norm": 0.10048934072256088, + "learning_rate": 1.977515464634999e-05, + "loss": 0.769275963306427, + "step": 1554 + }, + { + "epoch": 0.2873277823330369, + "grad_norm": 0.06515824794769287, + "learning_rate": 1.9774733796538226e-05, + "loss": 0.5589236617088318, + "step": 1555 + }, + { + "epoch": 0.2875125590419328, + "grad_norm": 0.09635279327630997, + "learning_rate": 1.9774312557722638e-05, + "loss": 0.8554244637489319, + "step": 1556 + }, + { + "epoch": 0.2876973357508286, + "grad_norm": 0.071099191904068, + "learning_rate": 1.9773890929919993e-05, + "loss": 0.9100094437599182, + "step": 1557 + }, + { + "epoch": 0.28788211245972445, + "grad_norm": 0.07915978878736496, + "learning_rate": 1.9773468913147066e-05, + "loss": 0.708828866481781, + "step": 1558 + }, + { + "epoch": 0.2880668891686203, + "grad_norm": 0.0634954422712326, + "learning_rate": 1.977304650742065e-05, + "loss": 0.6176474094390869, + "step": 1559 + }, + { + "epoch": 0.28825166587751616, + "grad_norm": 0.08128131926059723, + "learning_rate": 1.977262371275756e-05, + "loss": 0.7375856041908264, + "step": 1560 + }, + { + "epoch": 0.288436442586412, + "grad_norm": 0.07046988606452942, + "learning_rate": 1.9772200529174625e-05, + "loss": 0.5920605659484863, + "step": 1561 + }, + { + "epoch": 0.2886212192953078, + "grad_norm": 0.081866554915905, + "learning_rate": 1.977177695668868e-05, + "loss": 0.7324924468994141, + "step": 1562 + }, + { + "epoch": 0.28880599600420365, + "grad_norm": 0.06897556036710739, + "learning_rate": 1.9771352995316585e-05, + "loss": 0.46013137698173523, + "step": 1563 + }, + { + "epoch": 0.28899077271309953, + "grad_norm": 0.04982367157936096, + "learning_rate": 1.977092864507521e-05, + "loss": 0.4955293536186218, + "step": 1564 + }, + { + "epoch": 0.28917554942199536, + "grad_norm": 0.057923175394535065, + "learning_rate": 1.9770503905981444e-05, + "loss": 0.48194465041160583, + "step": 1565 + }, + { + "epoch": 0.2893603261308912, + "grad_norm": 0.08413425087928772, + "learning_rate": 1.9770078778052192e-05, + "loss": 0.6813338994979858, + "step": 1566 + }, + { + "epoch": 0.28954510283978707, + "grad_norm": 0.07808249443769455, + "learning_rate": 1.976965326130437e-05, + "loss": 0.6345247030258179, + "step": 1567 + }, + { + "epoch": 0.2897298795486829, + "grad_norm": 0.07906540483236313, + "learning_rate": 1.9769227355754913e-05, + "loss": 0.693480372428894, + "step": 1568 + }, + { + "epoch": 0.28991465625757873, + "grad_norm": 0.12125765532255173, + "learning_rate": 1.9768801061420774e-05, + "loss": 1.1367865800857544, + "step": 1569 + }, + { + "epoch": 0.29009943296647456, + "grad_norm": 0.0932023897767067, + "learning_rate": 1.9768374378318915e-05, + "loss": 0.7568378448486328, + "step": 1570 + }, + { + "epoch": 0.29028420967537044, + "grad_norm": 0.09901861846446991, + "learning_rate": 1.9767947306466318e-05, + "loss": 0.687638521194458, + "step": 1571 + }, + { + "epoch": 0.29046898638426627, + "grad_norm": 0.08295369148254395, + "learning_rate": 1.9767519845879975e-05, + "loss": 0.6895972490310669, + "step": 1572 + }, + { + "epoch": 0.2906537630931621, + "grad_norm": 0.08799974620342255, + "learning_rate": 1.97670919965769e-05, + "loss": 0.6818599104881287, + "step": 1573 + }, + { + "epoch": 0.2908385398020579, + "grad_norm": 0.09332655370235443, + "learning_rate": 1.9766663758574122e-05, + "loss": 0.9478365778923035, + "step": 1574 + }, + { + "epoch": 0.2910233165109538, + "grad_norm": 0.06791740655899048, + "learning_rate": 1.9766235131888684e-05, + "loss": 0.5521538257598877, + "step": 1575 + }, + { + "epoch": 0.29120809321984964, + "grad_norm": 0.07543913275003433, + "learning_rate": 1.9765806116537642e-05, + "loss": 0.8613795042037964, + "step": 1576 + }, + { + "epoch": 0.29139286992874547, + "grad_norm": 0.08647683262825012, + "learning_rate": 1.9765376712538067e-05, + "loss": 0.7674567699432373, + "step": 1577 + }, + { + "epoch": 0.29157764663764135, + "grad_norm": 0.07757703959941864, + "learning_rate": 1.9764946919907054e-05, + "loss": 0.6646807193756104, + "step": 1578 + }, + { + "epoch": 0.2917624233465372, + "grad_norm": 0.061070188879966736, + "learning_rate": 1.9764516738661706e-05, + "loss": 0.4340011775493622, + "step": 1579 + }, + { + "epoch": 0.291947200055433, + "grad_norm": 0.07494571805000305, + "learning_rate": 1.9764086168819136e-05, + "loss": 0.6426244974136353, + "step": 1580 + }, + { + "epoch": 0.29213197676432884, + "grad_norm": 0.08665431290864944, + "learning_rate": 1.9763655210396488e-05, + "loss": 0.7494552731513977, + "step": 1581 + }, + { + "epoch": 0.2923167534732247, + "grad_norm": 0.08033335208892822, + "learning_rate": 1.976322386341091e-05, + "loss": 0.6891940832138062, + "step": 1582 + }, + { + "epoch": 0.29250153018212055, + "grad_norm": 0.07510875910520554, + "learning_rate": 1.976279212787956e-05, + "loss": 0.6621313095092773, + "step": 1583 + }, + { + "epoch": 0.2926863068910164, + "grad_norm": 0.10144571214914322, + "learning_rate": 1.9762360003819637e-05, + "loss": 1.0374672412872314, + "step": 1584 + }, + { + "epoch": 0.2928710835999122, + "grad_norm": 0.07121730595827103, + "learning_rate": 1.9761927491248323e-05, + "loss": 0.7357268929481506, + "step": 1585 + }, + { + "epoch": 0.2930558603088081, + "grad_norm": 0.073287732899189, + "learning_rate": 1.9761494590182834e-05, + "loss": 0.6488692164421082, + "step": 1586 + }, + { + "epoch": 0.2932406370177039, + "grad_norm": 0.062283504754304886, + "learning_rate": 1.9761061300640405e-05, + "loss": 0.5477706789970398, + "step": 1587 + }, + { + "epoch": 0.29342541372659975, + "grad_norm": 0.07306306064128876, + "learning_rate": 1.9760627622638272e-05, + "loss": 0.7222064137458801, + "step": 1588 + }, + { + "epoch": 0.29361019043549563, + "grad_norm": 0.09012686461210251, + "learning_rate": 1.9760193556193697e-05, + "loss": 0.7955993413925171, + "step": 1589 + }, + { + "epoch": 0.29379496714439146, + "grad_norm": 0.09550463408231735, + "learning_rate": 1.9759759101323953e-05, + "loss": 0.9095275402069092, + "step": 1590 + }, + { + "epoch": 0.2939797438532873, + "grad_norm": 0.08157458901405334, + "learning_rate": 1.9759324258046336e-05, + "loss": 0.7226804494857788, + "step": 1591 + }, + { + "epoch": 0.2941645205621831, + "grad_norm": 0.0810418352484703, + "learning_rate": 1.9758889026378142e-05, + "loss": 0.7138922214508057, + "step": 1592 + }, + { + "epoch": 0.294349297271079, + "grad_norm": 0.08949844539165497, + "learning_rate": 1.97584534063367e-05, + "loss": 0.8396944999694824, + "step": 1593 + }, + { + "epoch": 0.29453407397997483, + "grad_norm": 0.07083319872617722, + "learning_rate": 1.975801739793934e-05, + "loss": 0.6845517158508301, + "step": 1594 + }, + { + "epoch": 0.29471885068887066, + "grad_norm": 0.08076249808073044, + "learning_rate": 1.9757581001203414e-05, + "loss": 0.7636187672615051, + "step": 1595 + }, + { + "epoch": 0.2949036273977665, + "grad_norm": 0.073576420545578, + "learning_rate": 1.9757144216146296e-05, + "loss": 0.6097922921180725, + "step": 1596 + }, + { + "epoch": 0.29508840410666237, + "grad_norm": 0.07440821081399918, + "learning_rate": 1.975670704278536e-05, + "loss": 0.6421791911125183, + "step": 1597 + }, + { + "epoch": 0.2952731808155582, + "grad_norm": 0.09239313751459122, + "learning_rate": 1.9756269481138015e-05, + "loss": 0.9637513756752014, + "step": 1598 + }, + { + "epoch": 0.29545795752445403, + "grad_norm": 0.07679717242717743, + "learning_rate": 1.9755831531221663e-05, + "loss": 0.6917498111724854, + "step": 1599 + }, + { + "epoch": 0.2956427342333499, + "grad_norm": 0.07973774522542953, + "learning_rate": 1.975539319305374e-05, + "loss": 0.7043184041976929, + "step": 1600 + }, + { + "epoch": 0.29582751094224574, + "grad_norm": 0.08145955204963684, + "learning_rate": 1.9754954466651688e-05, + "loss": 0.9160181283950806, + "step": 1601 + }, + { + "epoch": 0.29601228765114157, + "grad_norm": 0.07598098367452621, + "learning_rate": 1.9754515352032967e-05, + "loss": 0.7016775608062744, + "step": 1602 + }, + { + "epoch": 0.2961970643600374, + "grad_norm": 0.07639916241168976, + "learning_rate": 1.9754075849215056e-05, + "loss": 0.617500364780426, + "step": 1603 + }, + { + "epoch": 0.2963818410689333, + "grad_norm": 0.08500184863805771, + "learning_rate": 1.975363595821544e-05, + "loss": 0.6746414303779602, + "step": 1604 + }, + { + "epoch": 0.2965666177778291, + "grad_norm": 0.088084377348423, + "learning_rate": 1.975319567905163e-05, + "loss": 0.6506028771400452, + "step": 1605 + }, + { + "epoch": 0.29675139448672494, + "grad_norm": 0.06195947155356407, + "learning_rate": 1.9752755011741142e-05, + "loss": 0.5611916780471802, + "step": 1606 + }, + { + "epoch": 0.29693617119562077, + "grad_norm": 0.07300426065921783, + "learning_rate": 1.9752313956301518e-05, + "loss": 0.724642276763916, + "step": 1607 + }, + { + "epoch": 0.29712094790451665, + "grad_norm": 0.06841021031141281, + "learning_rate": 1.9751872512750314e-05, + "loss": 0.6004761457443237, + "step": 1608 + }, + { + "epoch": 0.2973057246134125, + "grad_norm": 0.07275708764791489, + "learning_rate": 1.975143068110509e-05, + "loss": 0.8002687692642212, + "step": 1609 + }, + { + "epoch": 0.2974905013223083, + "grad_norm": 0.06573881953954697, + "learning_rate": 1.9750988461383432e-05, + "loss": 0.5710200071334839, + "step": 1610 + }, + { + "epoch": 0.2976752780312042, + "grad_norm": 0.09672277420759201, + "learning_rate": 1.975054585360294e-05, + "loss": 0.8065577149391174, + "step": 1611 + }, + { + "epoch": 0.2978600547401, + "grad_norm": 0.05756446346640587, + "learning_rate": 1.9750102857781234e-05, + "loss": 0.5338827967643738, + "step": 1612 + }, + { + "epoch": 0.29804483144899585, + "grad_norm": 0.0928102657198906, + "learning_rate": 1.9749659473935937e-05, + "loss": 0.882835865020752, + "step": 1613 + }, + { + "epoch": 0.2982296081578917, + "grad_norm": 0.08123216778039932, + "learning_rate": 1.9749215702084693e-05, + "loss": 0.743979275226593, + "step": 1614 + }, + { + "epoch": 0.29841438486678756, + "grad_norm": 0.08603404462337494, + "learning_rate": 1.9748771542245167e-05, + "loss": 0.5993881821632385, + "step": 1615 + }, + { + "epoch": 0.2985991615756834, + "grad_norm": 0.08198370784521103, + "learning_rate": 1.974832699443503e-05, + "loss": 0.811705470085144, + "step": 1616 + }, + { + "epoch": 0.2987839382845792, + "grad_norm": 0.061221882700920105, + "learning_rate": 1.9747882058671982e-05, + "loss": 0.5289203524589539, + "step": 1617 + }, + { + "epoch": 0.29896871499347505, + "grad_norm": 0.0895276740193367, + "learning_rate": 1.9747436734973722e-05, + "loss": 0.8075624108314514, + "step": 1618 + }, + { + "epoch": 0.29915349170237093, + "grad_norm": 0.10405202955007553, + "learning_rate": 1.9746991023357978e-05, + "loss": 0.8427689075469971, + "step": 1619 + }, + { + "epoch": 0.29933826841126676, + "grad_norm": 0.05864064767956734, + "learning_rate": 1.9746544923842483e-05, + "loss": 0.4327649474143982, + "step": 1620 + }, + { + "epoch": 0.2995230451201626, + "grad_norm": 0.08024963736534119, + "learning_rate": 1.9746098436444997e-05, + "loss": 0.7758969068527222, + "step": 1621 + }, + { + "epoch": 0.2997078218290585, + "grad_norm": 0.09117995947599411, + "learning_rate": 1.974565156118328e-05, + "loss": 0.8343737125396729, + "step": 1622 + }, + { + "epoch": 0.2998925985379543, + "grad_norm": 0.08025994151830673, + "learning_rate": 1.9745204298075125e-05, + "loss": 0.7524757981300354, + "step": 1623 + }, + { + "epoch": 0.30007737524685013, + "grad_norm": 0.07531878352165222, + "learning_rate": 1.9744756647138326e-05, + "loss": 0.7118515372276306, + "step": 1624 + }, + { + "epoch": 0.30026215195574596, + "grad_norm": 0.06404253095388412, + "learning_rate": 1.97443086083907e-05, + "loss": 0.5755617618560791, + "step": 1625 + }, + { + "epoch": 0.30044692866464184, + "grad_norm": 0.06982641667127609, + "learning_rate": 1.974386018185008e-05, + "loss": 0.6257184147834778, + "step": 1626 + }, + { + "epoch": 0.30063170537353767, + "grad_norm": 0.07712169736623764, + "learning_rate": 1.9743411367534306e-05, + "loss": 0.7018937468528748, + "step": 1627 + }, + { + "epoch": 0.3008164820824335, + "grad_norm": 0.07761988788843155, + "learning_rate": 1.9742962165461245e-05, + "loss": 0.5967124700546265, + "step": 1628 + }, + { + "epoch": 0.30100125879132933, + "grad_norm": 0.07737915962934494, + "learning_rate": 1.974251257564877e-05, + "loss": 0.7350765466690063, + "step": 1629 + }, + { + "epoch": 0.3011860355002252, + "grad_norm": 0.0847436934709549, + "learning_rate": 1.9742062598114777e-05, + "loss": 0.7208263874053955, + "step": 1630 + }, + { + "epoch": 0.30137081220912104, + "grad_norm": 0.10813397169113159, + "learning_rate": 1.974161223287717e-05, + "loss": 0.9324489235877991, + "step": 1631 + }, + { + "epoch": 0.30155558891801687, + "grad_norm": 0.08521011471748352, + "learning_rate": 1.9741161479953872e-05, + "loss": 0.7783839106559753, + "step": 1632 + }, + { + "epoch": 0.30174036562691275, + "grad_norm": 0.08909272402524948, + "learning_rate": 1.9740710339362825e-05, + "loss": 1.0380836725234985, + "step": 1633 + }, + { + "epoch": 0.3019251423358086, + "grad_norm": 0.09834171086549759, + "learning_rate": 1.9740258811121982e-05, + "loss": 0.8330088257789612, + "step": 1634 + }, + { + "epoch": 0.3021099190447044, + "grad_norm": 0.06591691821813583, + "learning_rate": 1.9739806895249312e-05, + "loss": 0.6034543514251709, + "step": 1635 + }, + { + "epoch": 0.30229469575360024, + "grad_norm": 0.0819563940167427, + "learning_rate": 1.9739354591762798e-05, + "loss": 0.7539654970169067, + "step": 1636 + }, + { + "epoch": 0.3024794724624961, + "grad_norm": 0.05253589153289795, + "learning_rate": 1.973890190068044e-05, + "loss": 0.49244508147239685, + "step": 1637 + }, + { + "epoch": 0.30266424917139195, + "grad_norm": 0.08107242733240128, + "learning_rate": 1.973844882202026e-05, + "loss": 0.7167231440544128, + "step": 1638 + }, + { + "epoch": 0.3028490258802878, + "grad_norm": 0.08478766679763794, + "learning_rate": 1.9737995355800282e-05, + "loss": 0.8549013137817383, + "step": 1639 + }, + { + "epoch": 0.3030338025891836, + "grad_norm": 0.08669696748256683, + "learning_rate": 1.9737541502038556e-05, + "loss": 0.9710851907730103, + "step": 1640 + }, + { + "epoch": 0.3032185792980795, + "grad_norm": 0.0721355527639389, + "learning_rate": 1.9737087260753142e-05, + "loss": 0.8099871873855591, + "step": 1641 + }, + { + "epoch": 0.3034033560069753, + "grad_norm": 0.06791551411151886, + "learning_rate": 1.9736632631962118e-05, + "loss": 0.5181037187576294, + "step": 1642 + }, + { + "epoch": 0.30358813271587115, + "grad_norm": 0.0887824222445488, + "learning_rate": 1.9736177615683577e-05, + "loss": 0.9183768033981323, + "step": 1643 + }, + { + "epoch": 0.30377290942476703, + "grad_norm": 0.06414256989955902, + "learning_rate": 1.973572221193563e-05, + "loss": 0.6185742020606995, + "step": 1644 + }, + { + "epoch": 0.30395768613366286, + "grad_norm": 0.0856219157576561, + "learning_rate": 1.9735266420736397e-05, + "loss": 0.776534914970398, + "step": 1645 + }, + { + "epoch": 0.3041424628425587, + "grad_norm": 0.08579172939062119, + "learning_rate": 1.973481024210402e-05, + "loss": 0.7569576501846313, + "step": 1646 + }, + { + "epoch": 0.3043272395514545, + "grad_norm": 0.07078281790018082, + "learning_rate": 1.973435367605665e-05, + "loss": 0.7340545654296875, + "step": 1647 + }, + { + "epoch": 0.3045120162603504, + "grad_norm": 0.06933678686618805, + "learning_rate": 1.9733896722612457e-05, + "loss": 0.4218800365924835, + "step": 1648 + }, + { + "epoch": 0.30469679296924623, + "grad_norm": 0.08380237966775894, + "learning_rate": 1.9733439381789628e-05, + "loss": 0.7405287027359009, + "step": 1649 + }, + { + "epoch": 0.30488156967814206, + "grad_norm": 0.07812915742397308, + "learning_rate": 1.9732981653606367e-05, + "loss": 0.9389246702194214, + "step": 1650 + }, + { + "epoch": 0.3050663463870379, + "grad_norm": 0.08571448922157288, + "learning_rate": 1.9732523538080882e-05, + "loss": 0.6957086324691772, + "step": 1651 + }, + { + "epoch": 0.3052511230959338, + "grad_norm": 0.07718904316425323, + "learning_rate": 1.9732065035231415e-05, + "loss": 0.7227778434753418, + "step": 1652 + }, + { + "epoch": 0.3054358998048296, + "grad_norm": 0.056292854249477386, + "learning_rate": 1.9731606145076204e-05, + "loss": 0.4814959168434143, + "step": 1653 + }, + { + "epoch": 0.30562067651372543, + "grad_norm": 0.06578993052244186, + "learning_rate": 1.9731146867633514e-05, + "loss": 0.6709810495376587, + "step": 1654 + }, + { + "epoch": 0.3058054532226213, + "grad_norm": 0.0599241629242897, + "learning_rate": 1.9730687202921625e-05, + "loss": 0.5246766209602356, + "step": 1655 + }, + { + "epoch": 0.30599022993151714, + "grad_norm": 0.07661686092615128, + "learning_rate": 1.973022715095883e-05, + "loss": 0.6775780916213989, + "step": 1656 + }, + { + "epoch": 0.30617500664041297, + "grad_norm": 0.09185819327831268, + "learning_rate": 1.972976671176343e-05, + "loss": 0.9404529929161072, + "step": 1657 + }, + { + "epoch": 0.3063597833493088, + "grad_norm": 0.0755624771118164, + "learning_rate": 1.9729305885353766e-05, + "loss": 0.7290748357772827, + "step": 1658 + }, + { + "epoch": 0.3065445600582047, + "grad_norm": 0.06746925413608551, + "learning_rate": 1.9728844671748156e-05, + "loss": 0.6544076800346375, + "step": 1659 + }, + { + "epoch": 0.3067293367671005, + "grad_norm": 0.07987582683563232, + "learning_rate": 1.9728383070964972e-05, + "loss": 0.7994759678840637, + "step": 1660 + }, + { + "epoch": 0.30691411347599634, + "grad_norm": 0.10454621911048889, + "learning_rate": 1.9727921083022577e-05, + "loss": 0.8565311431884766, + "step": 1661 + }, + { + "epoch": 0.30709889018489217, + "grad_norm": 0.08259614557027817, + "learning_rate": 1.972745870793936e-05, + "loss": 0.8979726433753967, + "step": 1662 + }, + { + "epoch": 0.30728366689378805, + "grad_norm": 0.07282306253910065, + "learning_rate": 1.9726995945733715e-05, + "loss": 0.7132416367530823, + "step": 1663 + }, + { + "epoch": 0.3074684436026839, + "grad_norm": 0.07935132086277008, + "learning_rate": 1.9726532796424066e-05, + "loss": 0.6348034739494324, + "step": 1664 + }, + { + "epoch": 0.3076532203115797, + "grad_norm": 0.08050986379384995, + "learning_rate": 1.9726069260028838e-05, + "loss": 0.6966174244880676, + "step": 1665 + }, + { + "epoch": 0.3078379970204756, + "grad_norm": 0.07889799773693085, + "learning_rate": 1.972560533656649e-05, + "loss": 0.7163848280906677, + "step": 1666 + }, + { + "epoch": 0.3080227737293714, + "grad_norm": 0.11050339043140411, + "learning_rate": 1.9725141026055473e-05, + "loss": 0.8334489464759827, + "step": 1667 + }, + { + "epoch": 0.30820755043826725, + "grad_norm": 0.07277112454175949, + "learning_rate": 1.9724676328514267e-05, + "loss": 0.6129276752471924, + "step": 1668 + }, + { + "epoch": 0.3083923271471631, + "grad_norm": 0.06715867668390274, + "learning_rate": 1.972421124396137e-05, + "loss": 0.5154484510421753, + "step": 1669 + }, + { + "epoch": 0.30857710385605897, + "grad_norm": 0.06250986456871033, + "learning_rate": 1.972374577241529e-05, + "loss": 0.4796812832355499, + "step": 1670 + }, + { + "epoch": 0.3087618805649548, + "grad_norm": 0.06873840093612671, + "learning_rate": 1.9723279913894547e-05, + "loss": 0.6062132120132446, + "step": 1671 + }, + { + "epoch": 0.3089466572738506, + "grad_norm": 0.06897095590829849, + "learning_rate": 1.9722813668417682e-05, + "loss": 0.6902108788490295, + "step": 1672 + }, + { + "epoch": 0.30913143398274645, + "grad_norm": 0.07618202269077301, + "learning_rate": 1.972234703600326e-05, + "loss": 0.5897256731987, + "step": 1673 + }, + { + "epoch": 0.30931621069164233, + "grad_norm": 0.08293811231851578, + "learning_rate": 1.9721880016669836e-05, + "loss": 0.6409721970558167, + "step": 1674 + }, + { + "epoch": 0.30950098740053816, + "grad_norm": 0.08881959319114685, + "learning_rate": 1.9721412610436005e-05, + "loss": 0.8430206179618835, + "step": 1675 + }, + { + "epoch": 0.309685764109434, + "grad_norm": 0.062191516160964966, + "learning_rate": 1.9720944817320366e-05, + "loss": 0.5813213586807251, + "step": 1676 + }, + { + "epoch": 0.3098705408183299, + "grad_norm": 0.07826868444681168, + "learning_rate": 1.9720476637341538e-05, + "loss": 0.5941954255104065, + "step": 1677 + }, + { + "epoch": 0.3100553175272257, + "grad_norm": 0.07638590037822723, + "learning_rate": 1.972000807051815e-05, + "loss": 0.512177586555481, + "step": 1678 + }, + { + "epoch": 0.31024009423612153, + "grad_norm": 0.08276208490133286, + "learning_rate": 1.9719539116868852e-05, + "loss": 0.6290682554244995, + "step": 1679 + }, + { + "epoch": 0.31042487094501736, + "grad_norm": 0.08368974179029465, + "learning_rate": 1.9719069776412305e-05, + "loss": 0.8070504069328308, + "step": 1680 + }, + { + "epoch": 0.31060964765391325, + "grad_norm": 0.06858662515878677, + "learning_rate": 1.9718600049167187e-05, + "loss": 0.674410879611969, + "step": 1681 + }, + { + "epoch": 0.3107944243628091, + "grad_norm": 0.08916907757520676, + "learning_rate": 1.9718129935152193e-05, + "loss": 0.8041301369667053, + "step": 1682 + }, + { + "epoch": 0.3109792010717049, + "grad_norm": 0.07412692904472351, + "learning_rate": 1.971765943438603e-05, + "loss": 0.8276036381721497, + "step": 1683 + }, + { + "epoch": 0.31116397778060073, + "grad_norm": 0.08200951665639877, + "learning_rate": 1.9717188546887428e-05, + "loss": 0.6787245273590088, + "step": 1684 + }, + { + "epoch": 0.3113487544894966, + "grad_norm": 0.0751579999923706, + "learning_rate": 1.9716717272675122e-05, + "loss": 0.7200093865394592, + "step": 1685 + }, + { + "epoch": 0.31153353119839244, + "grad_norm": 0.06252330541610718, + "learning_rate": 1.9716245611767868e-05, + "loss": 0.5830127596855164, + "step": 1686 + }, + { + "epoch": 0.3117183079072883, + "grad_norm": 0.08460020273923874, + "learning_rate": 1.9715773564184436e-05, + "loss": 0.9173708558082581, + "step": 1687 + }, + { + "epoch": 0.31190308461618416, + "grad_norm": 0.07136960327625275, + "learning_rate": 1.9715301129943613e-05, + "loss": 0.5702857971191406, + "step": 1688 + }, + { + "epoch": 0.31208786132508, + "grad_norm": 0.08351925015449524, + "learning_rate": 1.9714828309064202e-05, + "loss": 0.7518531084060669, + "step": 1689 + }, + { + "epoch": 0.3122726380339758, + "grad_norm": 0.07065819203853607, + "learning_rate": 1.9714355101565016e-05, + "loss": 0.7276631593704224, + "step": 1690 + }, + { + "epoch": 0.31245741474287164, + "grad_norm": 0.06705459207296371, + "learning_rate": 1.9713881507464888e-05, + "loss": 0.6821085810661316, + "step": 1691 + }, + { + "epoch": 0.3126421914517675, + "grad_norm": 0.09088122099637985, + "learning_rate": 1.971340752678267e-05, + "loss": 0.8744920492172241, + "step": 1692 + }, + { + "epoch": 0.31282696816066335, + "grad_norm": 0.08711528778076172, + "learning_rate": 1.971293315953722e-05, + "loss": 0.7038059830665588, + "step": 1693 + }, + { + "epoch": 0.3130117448695592, + "grad_norm": 0.07936561852693558, + "learning_rate": 1.971245840574742e-05, + "loss": 0.7303805947303772, + "step": 1694 + }, + { + "epoch": 0.313196521578455, + "grad_norm": 0.0750492587685585, + "learning_rate": 1.971198326543216e-05, + "loss": 0.5951574444770813, + "step": 1695 + }, + { + "epoch": 0.3133812982873509, + "grad_norm": 0.07795064896345139, + "learning_rate": 1.971150773861035e-05, + "loss": 0.6603903770446777, + "step": 1696 + }, + { + "epoch": 0.3135660749962467, + "grad_norm": 0.07857033610343933, + "learning_rate": 1.971103182530092e-05, + "loss": 0.7110896706581116, + "step": 1697 + }, + { + "epoch": 0.31375085170514255, + "grad_norm": 0.07372584193944931, + "learning_rate": 1.9710555525522802e-05, + "loss": 0.5947229862213135, + "step": 1698 + }, + { + "epoch": 0.31393562841403844, + "grad_norm": 0.06809160858392715, + "learning_rate": 1.971007883929495e-05, + "loss": 0.6651185750961304, + "step": 1699 + }, + { + "epoch": 0.31412040512293427, + "grad_norm": 0.05984443426132202, + "learning_rate": 1.970960176663635e-05, + "loss": 0.40316981077194214, + "step": 1700 + }, + { + "epoch": 0.3143051818318301, + "grad_norm": 0.11108367145061493, + "learning_rate": 1.970912430756597e-05, + "loss": 1.125005841255188, + "step": 1701 + }, + { + "epoch": 0.3144899585407259, + "grad_norm": 0.09628898650407791, + "learning_rate": 1.9708646462102818e-05, + "loss": 0.708713948726654, + "step": 1702 + }, + { + "epoch": 0.3146747352496218, + "grad_norm": 0.07885843515396118, + "learning_rate": 1.970816823026591e-05, + "loss": 0.5283644795417786, + "step": 1703 + }, + { + "epoch": 0.31485951195851763, + "grad_norm": 0.0594625324010849, + "learning_rate": 1.9707689612074286e-05, + "loss": 0.4736645221710205, + "step": 1704 + }, + { + "epoch": 0.31504428866741346, + "grad_norm": 0.09794385731220245, + "learning_rate": 1.970721060754698e-05, + "loss": 0.9850810170173645, + "step": 1705 + }, + { + "epoch": 0.3152290653763093, + "grad_norm": 0.07433894276618958, + "learning_rate": 1.9706731216703066e-05, + "loss": 0.6193894147872925, + "step": 1706 + }, + { + "epoch": 0.3154138420852052, + "grad_norm": 0.0941387340426445, + "learning_rate": 1.970625143956162e-05, + "loss": 0.9859358072280884, + "step": 1707 + }, + { + "epoch": 0.315598618794101, + "grad_norm": 0.06878665834665298, + "learning_rate": 1.9705771276141727e-05, + "loss": 0.5013079047203064, + "step": 1708 + }, + { + "epoch": 0.31578339550299683, + "grad_norm": 0.08492394536733627, + "learning_rate": 1.970529072646251e-05, + "loss": 0.7924782037734985, + "step": 1709 + }, + { + "epoch": 0.3159681722118927, + "grad_norm": 0.07478411495685577, + "learning_rate": 1.970480979054308e-05, + "loss": 0.6038049459457397, + "step": 1710 + }, + { + "epoch": 0.31615294892078855, + "grad_norm": 0.07372638583183289, + "learning_rate": 1.9704328468402586e-05, + "loss": 0.6259400844573975, + "step": 1711 + }, + { + "epoch": 0.3163377256296844, + "grad_norm": 0.07732758671045303, + "learning_rate": 1.9703846760060175e-05, + "loss": 0.7466741800308228, + "step": 1712 + }, + { + "epoch": 0.3165225023385802, + "grad_norm": 0.07305384427309036, + "learning_rate": 1.9703364665535027e-05, + "loss": 0.7214388847351074, + "step": 1713 + }, + { + "epoch": 0.3167072790474761, + "grad_norm": 0.07644770294427872, + "learning_rate": 1.9702882184846324e-05, + "loss": 0.7326534390449524, + "step": 1714 + }, + { + "epoch": 0.3168920557563719, + "grad_norm": 0.08744385093450546, + "learning_rate": 1.9702399318013265e-05, + "loss": 0.8739240169525146, + "step": 1715 + }, + { + "epoch": 0.31707683246526774, + "grad_norm": 0.06662850826978683, + "learning_rate": 1.970191606505507e-05, + "loss": 0.6399668455123901, + "step": 1716 + }, + { + "epoch": 0.3172616091741636, + "grad_norm": 0.08732137829065323, + "learning_rate": 1.9701432425990963e-05, + "loss": 0.7474459409713745, + "step": 1717 + }, + { + "epoch": 0.31744638588305946, + "grad_norm": 0.08268634974956512, + "learning_rate": 1.9700948400840203e-05, + "loss": 0.8560296893119812, + "step": 1718 + }, + { + "epoch": 0.3176311625919553, + "grad_norm": 0.09292731434106827, + "learning_rate": 1.9700463989622048e-05, + "loss": 0.905507504940033, + "step": 1719 + }, + { + "epoch": 0.3178159393008511, + "grad_norm": 0.07785472273826599, + "learning_rate": 1.969997919235577e-05, + "loss": 0.6575971245765686, + "step": 1720 + }, + { + "epoch": 0.318000716009747, + "grad_norm": 0.07556314021348953, + "learning_rate": 1.969949400906067e-05, + "loss": 0.5453295707702637, + "step": 1721 + }, + { + "epoch": 0.3181854927186428, + "grad_norm": 0.08012852072715759, + "learning_rate": 1.9699008439756054e-05, + "loss": 0.7174992561340332, + "step": 1722 + }, + { + "epoch": 0.31837026942753865, + "grad_norm": 0.09103730320930481, + "learning_rate": 1.9698522484461248e-05, + "loss": 0.6318363547325134, + "step": 1723 + }, + { + "epoch": 0.3185550461364345, + "grad_norm": 0.08566120266914368, + "learning_rate": 1.9698036143195587e-05, + "loss": 0.8285524845123291, + "step": 1724 + }, + { + "epoch": 0.31873982284533037, + "grad_norm": 0.08602138608694077, + "learning_rate": 1.9697549415978432e-05, + "loss": 0.8359253406524658, + "step": 1725 + }, + { + "epoch": 0.3189245995542262, + "grad_norm": 0.070762500166893, + "learning_rate": 1.9697062302829147e-05, + "loss": 0.5441091060638428, + "step": 1726 + }, + { + "epoch": 0.319109376263122, + "grad_norm": 0.067392498254776, + "learning_rate": 1.9696574803767124e-05, + "loss": 0.5648691654205322, + "step": 1727 + }, + { + "epoch": 0.31929415297201785, + "grad_norm": 0.07054133713245392, + "learning_rate": 1.9696086918811757e-05, + "loss": 0.6661797761917114, + "step": 1728 + }, + { + "epoch": 0.31947892968091374, + "grad_norm": 0.07675296068191528, + "learning_rate": 1.9695598647982467e-05, + "loss": 0.5624269843101501, + "step": 1729 + }, + { + "epoch": 0.31966370638980957, + "grad_norm": 0.08781984448432922, + "learning_rate": 1.9695109991298686e-05, + "loss": 0.7983731031417847, + "step": 1730 + }, + { + "epoch": 0.3198484830987054, + "grad_norm": 0.0866129994392395, + "learning_rate": 1.9694620948779857e-05, + "loss": 0.700450599193573, + "step": 1731 + }, + { + "epoch": 0.3200332598076013, + "grad_norm": 0.07832545787096024, + "learning_rate": 1.9694131520445445e-05, + "loss": 0.7811502814292908, + "step": 1732 + }, + { + "epoch": 0.3202180365164971, + "grad_norm": 0.09444748610258102, + "learning_rate": 1.9693641706314926e-05, + "loss": 0.8800259828567505, + "step": 1733 + }, + { + "epoch": 0.32040281322539294, + "grad_norm": 0.08850855380296707, + "learning_rate": 1.9693151506407798e-05, + "loss": 0.8175960183143616, + "step": 1734 + }, + { + "epoch": 0.32058758993428876, + "grad_norm": 0.07670994848012924, + "learning_rate": 1.9692660920743566e-05, + "loss": 0.7358960509300232, + "step": 1735 + }, + { + "epoch": 0.32077236664318465, + "grad_norm": 0.09213895350694656, + "learning_rate": 1.9692169949341753e-05, + "loss": 0.9158545732498169, + "step": 1736 + }, + { + "epoch": 0.3209571433520805, + "grad_norm": 0.07196273654699326, + "learning_rate": 1.9691678592221902e-05, + "loss": 0.6552197933197021, + "step": 1737 + }, + { + "epoch": 0.3211419200609763, + "grad_norm": 0.10566510260105133, + "learning_rate": 1.969118684940356e-05, + "loss": 0.727062463760376, + "step": 1738 + }, + { + "epoch": 0.32132669676987213, + "grad_norm": 0.09496032446622849, + "learning_rate": 1.96906947209063e-05, + "loss": 0.9609939455986023, + "step": 1739 + }, + { + "epoch": 0.321511473478768, + "grad_norm": 0.09011634439229965, + "learning_rate": 1.9690202206749713e-05, + "loss": 0.864891767501831, + "step": 1740 + }, + { + "epoch": 0.32169625018766385, + "grad_norm": 0.09794040024280548, + "learning_rate": 1.9689709306953393e-05, + "loss": 0.9009979963302612, + "step": 1741 + }, + { + "epoch": 0.3218810268965597, + "grad_norm": 0.07013952732086182, + "learning_rate": 1.9689216021536956e-05, + "loss": 0.5138965249061584, + "step": 1742 + }, + { + "epoch": 0.32206580360545556, + "grad_norm": 0.07850988954305649, + "learning_rate": 1.9688722350520035e-05, + "loss": 0.7609305381774902, + "step": 1743 + }, + { + "epoch": 0.3222505803143514, + "grad_norm": 0.09505235403776169, + "learning_rate": 1.968822829392228e-05, + "loss": 0.9003138542175293, + "step": 1744 + }, + { + "epoch": 0.3224353570232472, + "grad_norm": 0.08867599815130234, + "learning_rate": 1.9687733851763347e-05, + "loss": 0.7564271092414856, + "step": 1745 + }, + { + "epoch": 0.32262013373214304, + "grad_norm": 0.06701550632715225, + "learning_rate": 1.9687239024062915e-05, + "loss": 0.5547588467597961, + "step": 1746 + }, + { + "epoch": 0.32280491044103893, + "grad_norm": 0.08799927681684494, + "learning_rate": 1.968674381084068e-05, + "loss": 0.6986576914787292, + "step": 1747 + }, + { + "epoch": 0.32298968714993476, + "grad_norm": 0.06552515923976898, + "learning_rate": 1.9686248212116345e-05, + "loss": 0.4841706454753876, + "step": 1748 + }, + { + "epoch": 0.3231744638588306, + "grad_norm": 0.06485949456691742, + "learning_rate": 1.9685752227909636e-05, + "loss": 0.5086471438407898, + "step": 1749 + }, + { + "epoch": 0.3233592405677264, + "grad_norm": 0.07854238152503967, + "learning_rate": 1.9685255858240294e-05, + "loss": 0.752724289894104, + "step": 1750 + }, + { + "epoch": 0.3235440172766223, + "grad_norm": 0.07595296949148178, + "learning_rate": 1.9684759103128067e-05, + "loss": 0.83503657579422, + "step": 1751 + }, + { + "epoch": 0.3237287939855181, + "grad_norm": 0.06905639916658401, + "learning_rate": 1.9684261962592728e-05, + "loss": 0.6583921313285828, + "step": 1752 + }, + { + "epoch": 0.32391357069441395, + "grad_norm": 0.07187511771917343, + "learning_rate": 1.968376443665406e-05, + "loss": 0.5287723541259766, + "step": 1753 + }, + { + "epoch": 0.32409834740330984, + "grad_norm": 0.08390723913908005, + "learning_rate": 1.9683266525331865e-05, + "loss": 0.8683786392211914, + "step": 1754 + }, + { + "epoch": 0.32428312411220567, + "grad_norm": 0.056276608258485794, + "learning_rate": 1.968276822864596e-05, + "loss": 0.4335346817970276, + "step": 1755 + }, + { + "epoch": 0.3244679008211015, + "grad_norm": 0.0724165067076683, + "learning_rate": 1.968226954661617e-05, + "loss": 0.5859235525131226, + "step": 1756 + }, + { + "epoch": 0.3246526775299973, + "grad_norm": 0.07898622006177902, + "learning_rate": 1.9681770479262344e-05, + "loss": 0.7415960431098938, + "step": 1757 + }, + { + "epoch": 0.3248374542388932, + "grad_norm": 0.07540061324834824, + "learning_rate": 1.9681271026604344e-05, + "loss": 0.6611322164535522, + "step": 1758 + }, + { + "epoch": 0.32502223094778904, + "grad_norm": 0.07527685165405273, + "learning_rate": 1.9680771188662044e-05, + "loss": 0.6283073425292969, + "step": 1759 + }, + { + "epoch": 0.32520700765668487, + "grad_norm": 0.052247464656829834, + "learning_rate": 1.9680270965455343e-05, + "loss": 0.42355582118034363, + "step": 1760 + }, + { + "epoch": 0.3253917843655807, + "grad_norm": 0.08852987736463547, + "learning_rate": 1.967977035700414e-05, + "loss": 0.8240786790847778, + "step": 1761 + }, + { + "epoch": 0.3255765610744766, + "grad_norm": 0.0704052597284317, + "learning_rate": 1.9679269363328357e-05, + "loss": 0.6152225732803345, + "step": 1762 + }, + { + "epoch": 0.3257613377833724, + "grad_norm": 0.08233390003442764, + "learning_rate": 1.967876798444794e-05, + "loss": 0.769257128238678, + "step": 1763 + }, + { + "epoch": 0.32594611449226824, + "grad_norm": 0.06887035071849823, + "learning_rate": 1.967826622038284e-05, + "loss": 0.6390390992164612, + "step": 1764 + }, + { + "epoch": 0.3261308912011641, + "grad_norm": 0.06841545552015305, + "learning_rate": 1.9677764071153022e-05, + "loss": 0.6136705279350281, + "step": 1765 + }, + { + "epoch": 0.32631566791005995, + "grad_norm": 0.0852772668004036, + "learning_rate": 1.967726153677847e-05, + "loss": 0.937171995639801, + "step": 1766 + }, + { + "epoch": 0.3265004446189558, + "grad_norm": 0.07882437855005264, + "learning_rate": 1.9676758617279187e-05, + "loss": 0.7007542252540588, + "step": 1767 + }, + { + "epoch": 0.3266852213278516, + "grad_norm": 0.07214733958244324, + "learning_rate": 1.9676255312675186e-05, + "loss": 0.5207557082176208, + "step": 1768 + }, + { + "epoch": 0.3268699980367475, + "grad_norm": 0.07498107105493546, + "learning_rate": 1.9675751622986493e-05, + "loss": 0.683100700378418, + "step": 1769 + }, + { + "epoch": 0.3270547747456433, + "grad_norm": 0.07111642509698868, + "learning_rate": 1.967524754823316e-05, + "loss": 0.7055477499961853, + "step": 1770 + }, + { + "epoch": 0.32723955145453915, + "grad_norm": 0.07640910148620605, + "learning_rate": 1.9674743088435245e-05, + "loss": 0.7240890264511108, + "step": 1771 + }, + { + "epoch": 0.327424328163435, + "grad_norm": 0.0713282972574234, + "learning_rate": 1.9674238243612824e-05, + "loss": 0.6240283846855164, + "step": 1772 + }, + { + "epoch": 0.32760910487233086, + "grad_norm": 0.08497446030378342, + "learning_rate": 1.9673733013785988e-05, + "loss": 0.7595280408859253, + "step": 1773 + }, + { + "epoch": 0.3277938815812267, + "grad_norm": 0.06732115894556046, + "learning_rate": 1.967322739897484e-05, + "loss": 0.563498318195343, + "step": 1774 + }, + { + "epoch": 0.3279786582901225, + "grad_norm": 0.07231878489255905, + "learning_rate": 1.967272139919951e-05, + "loss": 0.663462221622467, + "step": 1775 + }, + { + "epoch": 0.3281634349990184, + "grad_norm": 0.06210903078317642, + "learning_rate": 1.9672215014480125e-05, + "loss": 0.5513061881065369, + "step": 1776 + }, + { + "epoch": 0.32834821170791423, + "grad_norm": 0.0835055485367775, + "learning_rate": 1.9671708244836844e-05, + "loss": 0.7600329518318176, + "step": 1777 + }, + { + "epoch": 0.32853298841681006, + "grad_norm": 0.0779385045170784, + "learning_rate": 1.9671201090289838e-05, + "loss": 0.7166795134544373, + "step": 1778 + }, + { + "epoch": 0.3287177651257059, + "grad_norm": 0.0785970613360405, + "learning_rate": 1.9670693550859284e-05, + "loss": 0.5702319741249084, + "step": 1779 + }, + { + "epoch": 0.32890254183460177, + "grad_norm": 0.08954007923603058, + "learning_rate": 1.9670185626565378e-05, + "loss": 0.7708001136779785, + "step": 1780 + }, + { + "epoch": 0.3290873185434976, + "grad_norm": 0.09481219947338104, + "learning_rate": 1.9669677317428344e-05, + "loss": 0.9120945334434509, + "step": 1781 + }, + { + "epoch": 0.3292720952523934, + "grad_norm": 0.07935404777526855, + "learning_rate": 1.9669168623468403e-05, + "loss": 0.8653592467308044, + "step": 1782 + }, + { + "epoch": 0.32945687196128925, + "grad_norm": 0.08438052982091904, + "learning_rate": 1.9668659544705802e-05, + "loss": 0.7041996121406555, + "step": 1783 + }, + { + "epoch": 0.32964164867018514, + "grad_norm": 0.08412235230207443, + "learning_rate": 1.96681500811608e-05, + "loss": 0.7921789288520813, + "step": 1784 + }, + { + "epoch": 0.32982642537908097, + "grad_norm": 0.0814802497625351, + "learning_rate": 1.966764023285367e-05, + "loss": 0.6965544819831848, + "step": 1785 + }, + { + "epoch": 0.3300112020879768, + "grad_norm": 0.08825701475143433, + "learning_rate": 1.9667129999804707e-05, + "loss": 0.9298684000968933, + "step": 1786 + }, + { + "epoch": 0.3301959787968727, + "grad_norm": 0.06260937452316284, + "learning_rate": 1.966661938203422e-05, + "loss": 0.5783395171165466, + "step": 1787 + }, + { + "epoch": 0.3303807555057685, + "grad_norm": 0.08222264796495438, + "learning_rate": 1.9666108379562518e-05, + "loss": 0.7140374183654785, + "step": 1788 + }, + { + "epoch": 0.33056553221466434, + "grad_norm": 0.08222371339797974, + "learning_rate": 1.9665596992409943e-05, + "loss": 0.6748720407485962, + "step": 1789 + }, + { + "epoch": 0.33075030892356017, + "grad_norm": 0.07889696955680847, + "learning_rate": 1.966508522059685e-05, + "loss": 0.6850640773773193, + "step": 1790 + }, + { + "epoch": 0.33093508563245605, + "grad_norm": 0.07319721579551697, + "learning_rate": 1.9664573064143604e-05, + "loss": 0.7206241488456726, + "step": 1791 + }, + { + "epoch": 0.3311198623413519, + "grad_norm": 0.07060479372739792, + "learning_rate": 1.9664060523070588e-05, + "loss": 0.8104270696640015, + "step": 1792 + }, + { + "epoch": 0.3313046390502477, + "grad_norm": 0.08124864101409912, + "learning_rate": 1.9663547597398197e-05, + "loss": 0.6488017439842224, + "step": 1793 + }, + { + "epoch": 0.33148941575914354, + "grad_norm": 0.07783747464418411, + "learning_rate": 1.9663034287146843e-05, + "loss": 0.6115512251853943, + "step": 1794 + }, + { + "epoch": 0.3316741924680394, + "grad_norm": 0.08700265735387802, + "learning_rate": 1.966252059233696e-05, + "loss": 0.9234606623649597, + "step": 1795 + }, + { + "epoch": 0.33185896917693525, + "grad_norm": 0.06759519129991531, + "learning_rate": 1.9662006512988983e-05, + "loss": 0.6828320622444153, + "step": 1796 + }, + { + "epoch": 0.3320437458858311, + "grad_norm": 0.06715090572834015, + "learning_rate": 1.9661492049123377e-05, + "loss": 0.5963985323905945, + "step": 1797 + }, + { + "epoch": 0.33222852259472696, + "grad_norm": 0.056464727967977524, + "learning_rate": 1.9660977200760612e-05, + "loss": 0.5596158504486084, + "step": 1798 + }, + { + "epoch": 0.3324132993036228, + "grad_norm": 0.06462464481592178, + "learning_rate": 1.9660461967921184e-05, + "loss": 0.37965625524520874, + "step": 1799 + }, + { + "epoch": 0.3325980760125186, + "grad_norm": 0.0943351536989212, + "learning_rate": 1.9659946350625593e-05, + "loss": 0.859682559967041, + "step": 1800 + }, + { + "epoch": 0.33278285272141445, + "grad_norm": 0.06761356443166733, + "learning_rate": 1.9659430348894357e-05, + "loss": 0.615112841129303, + "step": 1801 + }, + { + "epoch": 0.33296762943031033, + "grad_norm": 0.10105802863836288, + "learning_rate": 1.9658913962748014e-05, + "loss": 0.8561904430389404, + "step": 1802 + }, + { + "epoch": 0.33315240613920616, + "grad_norm": 0.07876117527484894, + "learning_rate": 1.9658397192207114e-05, + "loss": 0.5849995613098145, + "step": 1803 + }, + { + "epoch": 0.333337182848102, + "grad_norm": 0.06803808361291885, + "learning_rate": 1.9657880037292224e-05, + "loss": 0.5378023386001587, + "step": 1804 + }, + { + "epoch": 0.3335219595569978, + "grad_norm": 0.08828198164701462, + "learning_rate": 1.9657362498023923e-05, + "loss": 0.7301193475723267, + "step": 1805 + }, + { + "epoch": 0.3337067362658937, + "grad_norm": 0.07563069462776184, + "learning_rate": 1.965684457442281e-05, + "loss": 0.594975471496582, + "step": 1806 + }, + { + "epoch": 0.33389151297478953, + "grad_norm": 0.0690205916762352, + "learning_rate": 1.9656326266509496e-05, + "loss": 0.6536938548088074, + "step": 1807 + }, + { + "epoch": 0.33407628968368536, + "grad_norm": 0.052560463547706604, + "learning_rate": 1.9655807574304606e-05, + "loss": 0.5595214366912842, + "step": 1808 + }, + { + "epoch": 0.33426106639258124, + "grad_norm": 0.07476627081632614, + "learning_rate": 1.9655288497828783e-05, + "loss": 0.6869099140167236, + "step": 1809 + }, + { + "epoch": 0.33444584310147707, + "grad_norm": 0.09601838886737823, + "learning_rate": 1.9654769037102688e-05, + "loss": 0.9548770785331726, + "step": 1810 + }, + { + "epoch": 0.3346306198103729, + "grad_norm": 0.07889758050441742, + "learning_rate": 1.9654249192146988e-05, + "loss": 0.6000795960426331, + "step": 1811 + }, + { + "epoch": 0.3348153965192687, + "grad_norm": 0.07617029547691345, + "learning_rate": 1.9653728962982377e-05, + "loss": 0.5528815388679504, + "step": 1812 + }, + { + "epoch": 0.3350001732281646, + "grad_norm": 0.07510527968406677, + "learning_rate": 1.9653208349629555e-05, + "loss": 0.6761285066604614, + "step": 1813 + }, + { + "epoch": 0.33518494993706044, + "grad_norm": 0.07648994773626328, + "learning_rate": 1.965268735210924e-05, + "loss": 0.6213935613632202, + "step": 1814 + }, + { + "epoch": 0.33536972664595627, + "grad_norm": 0.08265082538127899, + "learning_rate": 1.965216597044217e-05, + "loss": 0.6013681292533875, + "step": 1815 + }, + { + "epoch": 0.3355545033548521, + "grad_norm": 0.07502146810293198, + "learning_rate": 1.965164420464909e-05, + "loss": 0.6983845829963684, + "step": 1816 + }, + { + "epoch": 0.335739280063748, + "grad_norm": 0.10032079368829727, + "learning_rate": 1.965112205475077e-05, + "loss": 0.9250543117523193, + "step": 1817 + }, + { + "epoch": 0.3359240567726438, + "grad_norm": 0.09584251046180725, + "learning_rate": 1.9650599520767984e-05, + "loss": 0.743982195854187, + "step": 1818 + }, + { + "epoch": 0.33610883348153964, + "grad_norm": 0.10235143452882767, + "learning_rate": 1.965007660272153e-05, + "loss": 0.7937692999839783, + "step": 1819 + }, + { + "epoch": 0.3362936101904355, + "grad_norm": 0.0825798287987709, + "learning_rate": 1.964955330063222e-05, + "loss": 0.6732792854309082, + "step": 1820 + }, + { + "epoch": 0.33647838689933135, + "grad_norm": 0.07754667103290558, + "learning_rate": 1.964902961452088e-05, + "loss": 0.6148709058761597, + "step": 1821 + }, + { + "epoch": 0.3366631636082272, + "grad_norm": 0.08688148111104965, + "learning_rate": 1.9648505544408343e-05, + "loss": 0.7113993167877197, + "step": 1822 + }, + { + "epoch": 0.336847940317123, + "grad_norm": 0.06463677436113358, + "learning_rate": 1.9647981090315474e-05, + "loss": 0.6490875482559204, + "step": 1823 + }, + { + "epoch": 0.3370327170260189, + "grad_norm": 0.07091058790683746, + "learning_rate": 1.9647456252263147e-05, + "loss": 0.5048199892044067, + "step": 1824 + }, + { + "epoch": 0.3372174937349147, + "grad_norm": 0.07602108269929886, + "learning_rate": 1.9646931030272237e-05, + "loss": 0.6208643317222595, + "step": 1825 + }, + { + "epoch": 0.33740227044381055, + "grad_norm": 0.0957789197564125, + "learning_rate": 1.9646405424363658e-05, + "loss": 0.8971869945526123, + "step": 1826 + }, + { + "epoch": 0.3375870471527064, + "grad_norm": 0.09101379662752151, + "learning_rate": 1.964587943455832e-05, + "loss": 0.6973580121994019, + "step": 1827 + }, + { + "epoch": 0.33777182386160226, + "grad_norm": 0.06714178621768951, + "learning_rate": 1.9645353060877164e-05, + "loss": 0.6755441427230835, + "step": 1828 + }, + { + "epoch": 0.3379566005704981, + "grad_norm": 0.07767485827207565, + "learning_rate": 1.964482630334113e-05, + "loss": 0.739201545715332, + "step": 1829 + }, + { + "epoch": 0.3381413772793939, + "grad_norm": 0.08593825250864029, + "learning_rate": 1.9644299161971183e-05, + "loss": 0.6533412337303162, + "step": 1830 + }, + { + "epoch": 0.3383261539882898, + "grad_norm": 0.07094033807516098, + "learning_rate": 1.96437716367883e-05, + "loss": 0.6397164463996887, + "step": 1831 + }, + { + "epoch": 0.33851093069718563, + "grad_norm": 0.08640284091234207, + "learning_rate": 1.9643243727813483e-05, + "loss": 0.7347773313522339, + "step": 1832 + }, + { + "epoch": 0.33869570740608146, + "grad_norm": 0.08087334036827087, + "learning_rate": 1.964271543506773e-05, + "loss": 0.8569307327270508, + "step": 1833 + }, + { + "epoch": 0.3388804841149773, + "grad_norm": 0.06818056106567383, + "learning_rate": 1.9642186758572074e-05, + "loss": 0.484335720539093, + "step": 1834 + }, + { + "epoch": 0.33906526082387317, + "grad_norm": 0.08313988894224167, + "learning_rate": 1.9641657698347553e-05, + "loss": 0.7267013192176819, + "step": 1835 + }, + { + "epoch": 0.339250037532769, + "grad_norm": 0.07647810131311417, + "learning_rate": 1.9641128254415216e-05, + "loss": 0.6098029613494873, + "step": 1836 + }, + { + "epoch": 0.33943481424166483, + "grad_norm": 0.07837104797363281, + "learning_rate": 1.964059842679614e-05, + "loss": 0.7910488843917847, + "step": 1837 + }, + { + "epoch": 0.33961959095056066, + "grad_norm": 0.07894197106361389, + "learning_rate": 1.9640068215511407e-05, + "loss": 0.5041911602020264, + "step": 1838 + }, + { + "epoch": 0.33980436765945654, + "grad_norm": 0.08094620704650879, + "learning_rate": 1.963953762058212e-05, + "loss": 0.7487735152244568, + "step": 1839 + }, + { + "epoch": 0.33998914436835237, + "grad_norm": 0.07290375232696533, + "learning_rate": 1.9639006642029394e-05, + "loss": 0.6246356964111328, + "step": 1840 + }, + { + "epoch": 0.3401739210772482, + "grad_norm": 0.07809294760227203, + "learning_rate": 1.963847527987436e-05, + "loss": 0.669266939163208, + "step": 1841 + }, + { + "epoch": 0.3403586977861441, + "grad_norm": 0.07507922500371933, + "learning_rate": 1.9637943534138165e-05, + "loss": 0.8690791726112366, + "step": 1842 + }, + { + "epoch": 0.3405434744950399, + "grad_norm": 0.09490001946687698, + "learning_rate": 1.963741140484197e-05, + "loss": 0.8107730746269226, + "step": 1843 + }, + { + "epoch": 0.34072825120393574, + "grad_norm": 0.08989793062210083, + "learning_rate": 1.9636878892006953e-05, + "loss": 0.9223145246505737, + "step": 1844 + }, + { + "epoch": 0.34091302791283157, + "grad_norm": 0.07013858109712601, + "learning_rate": 1.9636345995654307e-05, + "loss": 0.5746279358863831, + "step": 1845 + }, + { + "epoch": 0.34109780462172745, + "grad_norm": 0.07494598627090454, + "learning_rate": 1.963581271580524e-05, + "loss": 0.6848379373550415, + "step": 1846 + }, + { + "epoch": 0.3412825813306233, + "grad_norm": 0.07319419831037521, + "learning_rate": 1.963527905248097e-05, + "loss": 0.7409180402755737, + "step": 1847 + }, + { + "epoch": 0.3414673580395191, + "grad_norm": 0.08100725710391998, + "learning_rate": 1.963474500570274e-05, + "loss": 0.7143396139144897, + "step": 1848 + }, + { + "epoch": 0.34165213474841494, + "grad_norm": 0.08466223627328873, + "learning_rate": 1.9634210575491802e-05, + "loss": 0.6691813468933105, + "step": 1849 + }, + { + "epoch": 0.3418369114573108, + "grad_norm": 0.0932893231511116, + "learning_rate": 1.9633675761869425e-05, + "loss": 0.9059786200523376, + "step": 1850 + }, + { + "epoch": 0.34202168816620665, + "grad_norm": 0.10014156252145767, + "learning_rate": 1.963314056485689e-05, + "loss": 0.982785165309906, + "step": 1851 + }, + { + "epoch": 0.3422064648751025, + "grad_norm": 0.07843346893787384, + "learning_rate": 1.96326049844755e-05, + "loss": 0.7437697649002075, + "step": 1852 + }, + { + "epoch": 0.34239124158399836, + "grad_norm": 0.08212711662054062, + "learning_rate": 1.9632069020746574e-05, + "loss": 0.5837621688842773, + "step": 1853 + }, + { + "epoch": 0.3425760182928942, + "grad_norm": 0.07777490466833115, + "learning_rate": 1.963153267369143e-05, + "loss": 0.5319809317588806, + "step": 1854 + }, + { + "epoch": 0.34276079500179, + "grad_norm": 0.06932126730680466, + "learning_rate": 1.963099594333142e-05, + "loss": 0.5694265365600586, + "step": 1855 + }, + { + "epoch": 0.34294557171068585, + "grad_norm": 0.09377758204936981, + "learning_rate": 1.96304588296879e-05, + "loss": 0.7562369108200073, + "step": 1856 + }, + { + "epoch": 0.34313034841958173, + "grad_norm": 0.07521593570709229, + "learning_rate": 1.9629921332782254e-05, + "loss": 0.7064640522003174, + "step": 1857 + }, + { + "epoch": 0.34331512512847756, + "grad_norm": 0.0694519504904747, + "learning_rate": 1.9629383452635863e-05, + "loss": 0.5606677532196045, + "step": 1858 + }, + { + "epoch": 0.3434999018373734, + "grad_norm": 0.06669106334447861, + "learning_rate": 1.962884518927014e-05, + "loss": 0.5357717275619507, + "step": 1859 + }, + { + "epoch": 0.3436846785462692, + "grad_norm": 0.08788613229990005, + "learning_rate": 1.96283065427065e-05, + "loss": 0.734246551990509, + "step": 1860 + }, + { + "epoch": 0.3438694552551651, + "grad_norm": 0.0990750715136528, + "learning_rate": 1.9627767512966384e-05, + "loss": 0.6421576142311096, + "step": 1861 + }, + { + "epoch": 0.34405423196406093, + "grad_norm": 0.08061659336090088, + "learning_rate": 1.9627228100071245e-05, + "loss": 0.7515416741371155, + "step": 1862 + }, + { + "epoch": 0.34423900867295676, + "grad_norm": 0.08036473393440247, + "learning_rate": 1.9626688304042544e-05, + "loss": 0.6768893599510193, + "step": 1863 + }, + { + "epoch": 0.34442378538185264, + "grad_norm": 0.07160155475139618, + "learning_rate": 1.9626148124901767e-05, + "loss": 0.5494096279144287, + "step": 1864 + }, + { + "epoch": 0.34460856209074847, + "grad_norm": 0.08275075256824493, + "learning_rate": 1.9625607562670414e-05, + "loss": 0.8985153436660767, + "step": 1865 + }, + { + "epoch": 0.3447933387996443, + "grad_norm": 0.07015492767095566, + "learning_rate": 1.962506661736999e-05, + "loss": 0.6336041688919067, + "step": 1866 + }, + { + "epoch": 0.34497811550854013, + "grad_norm": 0.08545436710119247, + "learning_rate": 1.962452528902203e-05, + "loss": 0.6721879243850708, + "step": 1867 + }, + { + "epoch": 0.345162892217436, + "grad_norm": 0.08779406547546387, + "learning_rate": 1.9623983577648075e-05, + "loss": 0.8231399059295654, + "step": 1868 + }, + { + "epoch": 0.34534766892633184, + "grad_norm": 0.09778361767530441, + "learning_rate": 1.9623441483269682e-05, + "loss": 0.924184262752533, + "step": 1869 + }, + { + "epoch": 0.34553244563522767, + "grad_norm": 0.07136828452348709, + "learning_rate": 1.9622899005908426e-05, + "loss": 0.6420773863792419, + "step": 1870 + }, + { + "epoch": 0.3457172223441235, + "grad_norm": 0.08129216730594635, + "learning_rate": 1.9622356145585895e-05, + "loss": 0.9055580496788025, + "step": 1871 + }, + { + "epoch": 0.3459019990530194, + "grad_norm": 0.06902094185352325, + "learning_rate": 1.96218129023237e-05, + "loss": 0.624248743057251, + "step": 1872 + }, + { + "epoch": 0.3460867757619152, + "grad_norm": 0.07671067863702774, + "learning_rate": 1.9621269276143447e-05, + "loss": 0.9902414083480835, + "step": 1873 + }, + { + "epoch": 0.34627155247081104, + "grad_norm": 0.06990405917167664, + "learning_rate": 1.962072526706678e-05, + "loss": 0.6081162691116333, + "step": 1874 + }, + { + "epoch": 0.3464563291797069, + "grad_norm": 0.06580513715744019, + "learning_rate": 1.9620180875115346e-05, + "loss": 0.5052412748336792, + "step": 1875 + }, + { + "epoch": 0.34664110588860275, + "grad_norm": 0.07508678734302521, + "learning_rate": 1.9619636100310815e-05, + "loss": 0.573227047920227, + "step": 1876 + }, + { + "epoch": 0.3468258825974986, + "grad_norm": 0.07413018494844437, + "learning_rate": 1.961909094267486e-05, + "loss": 0.7761869430541992, + "step": 1877 + }, + { + "epoch": 0.3470106593063944, + "grad_norm": 0.05134841799736023, + "learning_rate": 1.961854540222918e-05, + "loss": 0.47000938653945923, + "step": 1878 + }, + { + "epoch": 0.3471954360152903, + "grad_norm": 0.06442036479711533, + "learning_rate": 1.9617999478995483e-05, + "loss": 0.6043466329574585, + "step": 1879 + }, + { + "epoch": 0.3473802127241861, + "grad_norm": 0.07378768920898438, + "learning_rate": 1.9617453172995503e-05, + "loss": 0.7331511378288269, + "step": 1880 + }, + { + "epoch": 0.34756498943308195, + "grad_norm": 0.08647879958152771, + "learning_rate": 1.9616906484250974e-05, + "loss": 0.7170236706733704, + "step": 1881 + }, + { + "epoch": 0.3477497661419778, + "grad_norm": 0.06821449846029282, + "learning_rate": 1.9616359412783653e-05, + "loss": 0.6115304231643677, + "step": 1882 + }, + { + "epoch": 0.34793454285087366, + "grad_norm": 0.058674536645412445, + "learning_rate": 1.9615811958615314e-05, + "loss": 0.537799060344696, + "step": 1883 + }, + { + "epoch": 0.3481193195597695, + "grad_norm": 0.07722529768943787, + "learning_rate": 1.9615264121767742e-05, + "loss": 0.44881102442741394, + "step": 1884 + }, + { + "epoch": 0.3483040962686653, + "grad_norm": 0.07700357586145401, + "learning_rate": 1.961471590226274e-05, + "loss": 0.6720776557922363, + "step": 1885 + }, + { + "epoch": 0.3484888729775612, + "grad_norm": 0.053852230310440063, + "learning_rate": 1.9614167300122126e-05, + "loss": 0.41168519854545593, + "step": 1886 + }, + { + "epoch": 0.34867364968645703, + "grad_norm": 0.09329484403133392, + "learning_rate": 1.9613618315367734e-05, + "loss": 0.8707481622695923, + "step": 1887 + }, + { + "epoch": 0.34885842639535286, + "grad_norm": 0.10070925951004028, + "learning_rate": 1.961306894802141e-05, + "loss": 0.7687546610832214, + "step": 1888 + }, + { + "epoch": 0.3490432031042487, + "grad_norm": 0.08316970616579056, + "learning_rate": 1.9612519198105015e-05, + "loss": 0.6259077787399292, + "step": 1889 + }, + { + "epoch": 0.3492279798131446, + "grad_norm": 0.09927278757095337, + "learning_rate": 1.9611969065640432e-05, + "loss": 0.9406327605247498, + "step": 1890 + }, + { + "epoch": 0.3494127565220404, + "grad_norm": 0.08534473180770874, + "learning_rate": 1.961141855064955e-05, + "loss": 0.7000691294670105, + "step": 1891 + }, + { + "epoch": 0.34959753323093623, + "grad_norm": 0.05822371318936348, + "learning_rate": 1.961086765315428e-05, + "loss": 0.42065221071243286, + "step": 1892 + }, + { + "epoch": 0.34978230993983206, + "grad_norm": 0.07493947446346283, + "learning_rate": 1.9610316373176548e-05, + "loss": 0.6167550086975098, + "step": 1893 + }, + { + "epoch": 0.34996708664872794, + "grad_norm": 0.08850574493408203, + "learning_rate": 1.960976471073829e-05, + "loss": 0.6938539147377014, + "step": 1894 + }, + { + "epoch": 0.35015186335762377, + "grad_norm": 0.08702442049980164, + "learning_rate": 1.960921266586146e-05, + "loss": 0.6874613165855408, + "step": 1895 + }, + { + "epoch": 0.3503366400665196, + "grad_norm": 0.07820771634578705, + "learning_rate": 1.9608660238568034e-05, + "loss": 0.634290337562561, + "step": 1896 + }, + { + "epoch": 0.3505214167754155, + "grad_norm": 0.0897473394870758, + "learning_rate": 1.9608107428879987e-05, + "loss": 0.7815918326377869, + "step": 1897 + }, + { + "epoch": 0.3507061934843113, + "grad_norm": 0.08086293190717697, + "learning_rate": 1.9607554236819325e-05, + "loss": 0.6558753848075867, + "step": 1898 + }, + { + "epoch": 0.35089097019320714, + "grad_norm": 0.07397107779979706, + "learning_rate": 1.9607000662408066e-05, + "loss": 0.7135198712348938, + "step": 1899 + }, + { + "epoch": 0.35107574690210297, + "grad_norm": 0.08879372477531433, + "learning_rate": 1.9606446705668236e-05, + "loss": 0.9617974758148193, + "step": 1900 + }, + { + "epoch": 0.35126052361099885, + "grad_norm": 0.08342552185058594, + "learning_rate": 1.960589236662188e-05, + "loss": 0.6310023665428162, + "step": 1901 + }, + { + "epoch": 0.3514453003198947, + "grad_norm": 0.08977493643760681, + "learning_rate": 1.9605337645291063e-05, + "loss": 0.8947171568870544, + "step": 1902 + }, + { + "epoch": 0.3516300770287905, + "grad_norm": 0.06647540628910065, + "learning_rate": 1.9604782541697858e-05, + "loss": 0.5766458511352539, + "step": 1903 + }, + { + "epoch": 0.35181485373768634, + "grad_norm": 0.09166788309812546, + "learning_rate": 1.9604227055864355e-05, + "loss": 0.995534360408783, + "step": 1904 + }, + { + "epoch": 0.3519996304465822, + "grad_norm": 0.05475666746497154, + "learning_rate": 1.9603671187812664e-05, + "loss": 0.44240716099739075, + "step": 1905 + }, + { + "epoch": 0.35218440715547805, + "grad_norm": 0.06649932265281677, + "learning_rate": 1.960311493756491e-05, + "loss": 0.5629976987838745, + "step": 1906 + }, + { + "epoch": 0.3523691838643739, + "grad_norm": 0.09560313820838928, + "learning_rate": 1.960255830514322e-05, + "loss": 0.8332951664924622, + "step": 1907 + }, + { + "epoch": 0.35255396057326976, + "grad_norm": 0.06212356314063072, + "learning_rate": 1.9602001290569756e-05, + "loss": 0.6503223180770874, + "step": 1908 + }, + { + "epoch": 0.3527387372821656, + "grad_norm": 0.0768064633011818, + "learning_rate": 1.9601443893866682e-05, + "loss": 0.682904839515686, + "step": 1909 + }, + { + "epoch": 0.3529235139910614, + "grad_norm": 0.08543505519628525, + "learning_rate": 1.9600886115056177e-05, + "loss": 0.6108998656272888, + "step": 1910 + }, + { + "epoch": 0.35310829069995725, + "grad_norm": 0.11749269813299179, + "learning_rate": 1.9600327954160443e-05, + "loss": 1.2383038997650146, + "step": 1911 + }, + { + "epoch": 0.35329306740885313, + "grad_norm": 0.08560445159673691, + "learning_rate": 1.9599769411201692e-05, + "loss": 0.7582423090934753, + "step": 1912 + }, + { + "epoch": 0.35347784411774896, + "grad_norm": 0.06951037049293518, + "learning_rate": 1.959921048620215e-05, + "loss": 0.5835634469985962, + "step": 1913 + }, + { + "epoch": 0.3536626208266448, + "grad_norm": 0.08175399899482727, + "learning_rate": 1.9598651179184065e-05, + "loss": 0.6157529950141907, + "step": 1914 + }, + { + "epoch": 0.3538473975355406, + "grad_norm": 0.09142636507749557, + "learning_rate": 1.9598091490169696e-05, + "loss": 0.8940083980560303, + "step": 1915 + }, + { + "epoch": 0.3540321742444365, + "grad_norm": 0.08478675037622452, + "learning_rate": 1.959753141918131e-05, + "loss": 0.790504515171051, + "step": 1916 + }, + { + "epoch": 0.35421695095333233, + "grad_norm": 0.0646057203412056, + "learning_rate": 1.9596970966241203e-05, + "loss": 0.5042975544929504, + "step": 1917 + }, + { + "epoch": 0.35440172766222816, + "grad_norm": 0.08247191458940506, + "learning_rate": 1.9596410131371674e-05, + "loss": 0.6063563823699951, + "step": 1918 + }, + { + "epoch": 0.35458650437112404, + "grad_norm": 0.08805165439844131, + "learning_rate": 1.9595848914595047e-05, + "loss": 0.7084945440292358, + "step": 1919 + }, + { + "epoch": 0.3547712810800199, + "grad_norm": 0.06988709419965744, + "learning_rate": 1.9595287315933653e-05, + "loss": 0.5524942874908447, + "step": 1920 + }, + { + "epoch": 0.3549560577889157, + "grad_norm": 0.06375003606081009, + "learning_rate": 1.9594725335409847e-05, + "loss": 0.4416433870792389, + "step": 1921 + }, + { + "epoch": 0.35514083449781153, + "grad_norm": 0.1005588248372078, + "learning_rate": 1.959416297304599e-05, + "loss": 0.8840219378471375, + "step": 1922 + }, + { + "epoch": 0.3553256112067074, + "grad_norm": 0.07297534495592117, + "learning_rate": 1.959360022886446e-05, + "loss": 0.6581389307975769, + "step": 1923 + }, + { + "epoch": 0.35551038791560324, + "grad_norm": 0.09779398888349533, + "learning_rate": 1.9593037102887657e-05, + "loss": 0.8495759963989258, + "step": 1924 + }, + { + "epoch": 0.35569516462449907, + "grad_norm": 0.09082182496786118, + "learning_rate": 1.959247359513799e-05, + "loss": 0.8239253759384155, + "step": 1925 + }, + { + "epoch": 0.3558799413333949, + "grad_norm": 0.07424180954694748, + "learning_rate": 1.9591909705637886e-05, + "loss": 0.5720775127410889, + "step": 1926 + }, + { + "epoch": 0.3560647180422908, + "grad_norm": 0.0850430577993393, + "learning_rate": 1.9591345434409785e-05, + "loss": 0.6905296444892883, + "step": 1927 + }, + { + "epoch": 0.3562494947511866, + "grad_norm": 0.07360535860061646, + "learning_rate": 1.959078078147614e-05, + "loss": 0.594716489315033, + "step": 1928 + }, + { + "epoch": 0.35643427146008244, + "grad_norm": 0.0795220360159874, + "learning_rate": 1.9590215746859428e-05, + "loss": 0.656869649887085, + "step": 1929 + }, + { + "epoch": 0.3566190481689783, + "grad_norm": 0.07492407411336899, + "learning_rate": 1.9589650330582133e-05, + "loss": 0.5760904550552368, + "step": 1930 + }, + { + "epoch": 0.35680382487787415, + "grad_norm": 0.08463925123214722, + "learning_rate": 1.9589084532666757e-05, + "loss": 0.7245805263519287, + "step": 1931 + }, + { + "epoch": 0.35698860158677, + "grad_norm": 0.07536065578460693, + "learning_rate": 1.9588518353135818e-05, + "loss": 0.623936116695404, + "step": 1932 + }, + { + "epoch": 0.3571733782956658, + "grad_norm": 0.07619050145149231, + "learning_rate": 1.9587951792011844e-05, + "loss": 0.6012312769889832, + "step": 1933 + }, + { + "epoch": 0.3573581550045617, + "grad_norm": 0.07146383076906204, + "learning_rate": 1.958738484931739e-05, + "loss": 0.6659769415855408, + "step": 1934 + }, + { + "epoch": 0.3575429317134575, + "grad_norm": 0.07134726643562317, + "learning_rate": 1.958681752507501e-05, + "loss": 0.6113250851631165, + "step": 1935 + }, + { + "epoch": 0.35772770842235335, + "grad_norm": 0.08301021158695221, + "learning_rate": 1.958624981930729e-05, + "loss": 0.7705444693565369, + "step": 1936 + }, + { + "epoch": 0.3579124851312492, + "grad_norm": 0.08296855539083481, + "learning_rate": 1.9585681732036813e-05, + "loss": 0.9636433720588684, + "step": 1937 + }, + { + "epoch": 0.35809726184014506, + "grad_norm": 0.06294649094343185, + "learning_rate": 1.9585113263286197e-05, + "loss": 0.46373164653778076, + "step": 1938 + }, + { + "epoch": 0.3582820385490409, + "grad_norm": 0.06871624290943146, + "learning_rate": 1.9584544413078062e-05, + "loss": 0.5198218822479248, + "step": 1939 + }, + { + "epoch": 0.3584668152579367, + "grad_norm": 0.08555329591035843, + "learning_rate": 1.9583975181435043e-05, + "loss": 0.8196623921394348, + "step": 1940 + }, + { + "epoch": 0.3586515919668326, + "grad_norm": 0.08057794719934464, + "learning_rate": 1.9583405568379795e-05, + "loss": 0.6855925917625427, + "step": 1941 + }, + { + "epoch": 0.35883636867572843, + "grad_norm": 0.06789886951446533, + "learning_rate": 1.9582835573934994e-05, + "loss": 0.6530704498291016, + "step": 1942 + }, + { + "epoch": 0.35902114538462426, + "grad_norm": 0.08298030495643616, + "learning_rate": 1.9582265198123312e-05, + "loss": 0.8214961290359497, + "step": 1943 + }, + { + "epoch": 0.3592059220935201, + "grad_norm": 0.058385469019412994, + "learning_rate": 1.9581694440967456e-05, + "loss": 0.4939122796058655, + "step": 1944 + }, + { + "epoch": 0.359390698802416, + "grad_norm": 0.0891612321138382, + "learning_rate": 1.958112330249014e-05, + "loss": 0.7617546916007996, + "step": 1945 + }, + { + "epoch": 0.3595754755113118, + "grad_norm": 0.07534582912921906, + "learning_rate": 1.958055178271409e-05, + "loss": 0.48294517397880554, + "step": 1946 + }, + { + "epoch": 0.35976025222020763, + "grad_norm": 0.08808505535125732, + "learning_rate": 1.957997988166205e-05, + "loss": 0.7680104374885559, + "step": 1947 + }, + { + "epoch": 0.35994502892910346, + "grad_norm": 0.07987001538276672, + "learning_rate": 1.9579407599356787e-05, + "loss": 0.6420309543609619, + "step": 1948 + }, + { + "epoch": 0.36012980563799935, + "grad_norm": 0.07023387402296066, + "learning_rate": 1.957883493582107e-05, + "loss": 0.5578839182853699, + "step": 1949 + }, + { + "epoch": 0.3603145823468952, + "grad_norm": 0.08079687505960464, + "learning_rate": 1.9578261891077693e-05, + "loss": 0.8681395053863525, + "step": 1950 + }, + { + "epoch": 0.360499359055791, + "grad_norm": 0.05796205252408981, + "learning_rate": 1.957768846514946e-05, + "loss": 0.4505980610847473, + "step": 1951 + }, + { + "epoch": 0.3606841357646869, + "grad_norm": 0.07316447049379349, + "learning_rate": 1.9577114658059186e-05, + "loss": 0.6213290095329285, + "step": 1952 + }, + { + "epoch": 0.3608689124735827, + "grad_norm": 0.0825081542134285, + "learning_rate": 1.9576540469829715e-05, + "loss": 0.7580359578132629, + "step": 1953 + }, + { + "epoch": 0.36105368918247854, + "grad_norm": 0.06691969186067581, + "learning_rate": 1.9575965900483895e-05, + "loss": 0.6928013563156128, + "step": 1954 + }, + { + "epoch": 0.36123846589137437, + "grad_norm": 0.10172438621520996, + "learning_rate": 1.957539095004459e-05, + "loss": 0.841858983039856, + "step": 1955 + }, + { + "epoch": 0.36142324260027026, + "grad_norm": 0.07854878902435303, + "learning_rate": 1.9574815618534682e-05, + "loss": 0.5641253590583801, + "step": 1956 + }, + { + "epoch": 0.3616080193091661, + "grad_norm": 0.09107008576393127, + "learning_rate": 1.9574239905977072e-05, + "loss": 0.8147022724151611, + "step": 1957 + }, + { + "epoch": 0.3617927960180619, + "grad_norm": 0.08986808359622955, + "learning_rate": 1.9573663812394664e-05, + "loss": 1.079319953918457, + "step": 1958 + }, + { + "epoch": 0.36197757272695774, + "grad_norm": 0.08058770000934601, + "learning_rate": 1.9573087337810385e-05, + "loss": 0.7533799409866333, + "step": 1959 + }, + { + "epoch": 0.3621623494358536, + "grad_norm": 0.06563723832368851, + "learning_rate": 1.9572510482247187e-05, + "loss": 0.5782539248466492, + "step": 1960 + }, + { + "epoch": 0.36234712614474945, + "grad_norm": 0.06482648104429245, + "learning_rate": 1.9571933245728017e-05, + "loss": 0.6582553386688232, + "step": 1961 + }, + { + "epoch": 0.3625319028536453, + "grad_norm": 0.08319249749183655, + "learning_rate": 1.957135562827585e-05, + "loss": 0.6696375012397766, + "step": 1962 + }, + { + "epoch": 0.36271667956254117, + "grad_norm": 0.0764584019780159, + "learning_rate": 1.9570777629913676e-05, + "loss": 0.6904196739196777, + "step": 1963 + }, + { + "epoch": 0.362901456271437, + "grad_norm": 0.08399965614080429, + "learning_rate": 1.9570199250664498e-05, + "loss": 0.7262436151504517, + "step": 1964 + }, + { + "epoch": 0.3630862329803328, + "grad_norm": 0.07018699496984482, + "learning_rate": 1.9569620490551323e-05, + "loss": 0.6780123114585876, + "step": 1965 + }, + { + "epoch": 0.36327100968922865, + "grad_norm": 0.08127652108669281, + "learning_rate": 1.95690413495972e-05, + "loss": 0.7128887176513672, + "step": 1966 + }, + { + "epoch": 0.36345578639812454, + "grad_norm": 0.07078352570533752, + "learning_rate": 1.9568461827825165e-05, + "loss": 0.8290748596191406, + "step": 1967 + }, + { + "epoch": 0.36364056310702036, + "grad_norm": 0.07198803126811981, + "learning_rate": 1.9567881925258287e-05, + "loss": 0.6036486029624939, + "step": 1968 + }, + { + "epoch": 0.3638253398159162, + "grad_norm": 0.084732785820961, + "learning_rate": 1.956730164191964e-05, + "loss": 0.9472017288208008, + "step": 1969 + }, + { + "epoch": 0.364010116524812, + "grad_norm": 0.07745788991451263, + "learning_rate": 1.9566720977832322e-05, + "loss": 0.6785796284675598, + "step": 1970 + }, + { + "epoch": 0.3641948932337079, + "grad_norm": 0.0648859366774559, + "learning_rate": 1.9566139933019438e-05, + "loss": 0.9074662327766418, + "step": 1971 + }, + { + "epoch": 0.36437966994260373, + "grad_norm": 0.07930831611156464, + "learning_rate": 1.9565558507504113e-05, + "loss": 0.6451144814491272, + "step": 1972 + }, + { + "epoch": 0.36456444665149956, + "grad_norm": 0.08495119959115982, + "learning_rate": 1.9564976701309488e-05, + "loss": 0.7120051383972168, + "step": 1973 + }, + { + "epoch": 0.36474922336039545, + "grad_norm": 0.0755208432674408, + "learning_rate": 1.9564394514458717e-05, + "loss": 0.6715715527534485, + "step": 1974 + }, + { + "epoch": 0.3649340000692913, + "grad_norm": 0.07266179472208023, + "learning_rate": 1.9563811946974965e-05, + "loss": 0.6780380010604858, + "step": 1975 + }, + { + "epoch": 0.3651187767781871, + "grad_norm": 0.09038018435239792, + "learning_rate": 1.9563228998881417e-05, + "loss": 0.8212044835090637, + "step": 1976 + }, + { + "epoch": 0.36530355348708293, + "grad_norm": 0.0702347606420517, + "learning_rate": 1.9562645670201278e-05, + "loss": 0.6917005181312561, + "step": 1977 + }, + { + "epoch": 0.3654883301959788, + "grad_norm": 0.08551733940839767, + "learning_rate": 1.9562061960957757e-05, + "loss": 0.889920175075531, + "step": 1978 + }, + { + "epoch": 0.36567310690487465, + "grad_norm": 0.07262120395898819, + "learning_rate": 1.9561477871174084e-05, + "loss": 0.6123881340026855, + "step": 1979 + }, + { + "epoch": 0.3658578836137705, + "grad_norm": 0.09774454683065414, + "learning_rate": 1.956089340087351e-05, + "loss": 0.9917032718658447, + "step": 1980 + }, + { + "epoch": 0.3660426603226663, + "grad_norm": 0.07489411532878876, + "learning_rate": 1.9560308550079288e-05, + "loss": 0.6934548020362854, + "step": 1981 + }, + { + "epoch": 0.3662274370315622, + "grad_norm": 0.08260970562696457, + "learning_rate": 1.9559723318814695e-05, + "loss": 0.6225256323814392, + "step": 1982 + }, + { + "epoch": 0.366412213740458, + "grad_norm": 0.0768555998802185, + "learning_rate": 1.9559137707103025e-05, + "loss": 0.6416477560997009, + "step": 1983 + }, + { + "epoch": 0.36659699044935384, + "grad_norm": 0.06510823965072632, + "learning_rate": 1.955855171496758e-05, + "loss": 0.6328122019767761, + "step": 1984 + }, + { + "epoch": 0.3667817671582497, + "grad_norm": 0.08435893803834915, + "learning_rate": 1.9557965342431682e-05, + "loss": 0.9326714873313904, + "step": 1985 + }, + { + "epoch": 0.36696654386714556, + "grad_norm": 0.0724826380610466, + "learning_rate": 1.9557378589518665e-05, + "loss": 0.6915863156318665, + "step": 1986 + }, + { + "epoch": 0.3671513205760414, + "grad_norm": 0.08972358703613281, + "learning_rate": 1.9556791456251886e-05, + "loss": 0.7526139616966248, + "step": 1987 + }, + { + "epoch": 0.3673360972849372, + "grad_norm": 0.09427791088819504, + "learning_rate": 1.95562039426547e-05, + "loss": 0.753598153591156, + "step": 1988 + }, + { + "epoch": 0.3675208739938331, + "grad_norm": 0.0812675729393959, + "learning_rate": 1.9555616048750497e-05, + "loss": 0.7490280270576477, + "step": 1989 + }, + { + "epoch": 0.3677056507027289, + "grad_norm": 0.06773082166910172, + "learning_rate": 1.955502777456267e-05, + "loss": 0.6412544846534729, + "step": 1990 + }, + { + "epoch": 0.36789042741162475, + "grad_norm": 0.09109006822109222, + "learning_rate": 1.9554439120114636e-05, + "loss": 0.9695414900779724, + "step": 1991 + }, + { + "epoch": 0.3680752041205206, + "grad_norm": 0.08510816097259521, + "learning_rate": 1.9553850085429814e-05, + "loss": 0.7511518597602844, + "step": 1992 + }, + { + "epoch": 0.36825998082941647, + "grad_norm": 0.08156833797693253, + "learning_rate": 1.955326067053165e-05, + "loss": 0.6092409491539001, + "step": 1993 + }, + { + "epoch": 0.3684447575383123, + "grad_norm": 0.0850474014878273, + "learning_rate": 1.9552670875443596e-05, + "loss": 0.9125552773475647, + "step": 1994 + }, + { + "epoch": 0.3686295342472081, + "grad_norm": 0.07290153205394745, + "learning_rate": 1.9552080700189127e-05, + "loss": 0.6870401501655579, + "step": 1995 + }, + { + "epoch": 0.368814310956104, + "grad_norm": 0.08091038465499878, + "learning_rate": 1.9551490144791738e-05, + "loss": 0.7886521220207214, + "step": 1996 + }, + { + "epoch": 0.36899908766499984, + "grad_norm": 0.07019095867872238, + "learning_rate": 1.955089920927492e-05, + "loss": 0.9075028300285339, + "step": 1997 + }, + { + "epoch": 0.36918386437389566, + "grad_norm": 0.07463059574365616, + "learning_rate": 1.955030789366219e-05, + "loss": 0.7937076687812805, + "step": 1998 + }, + { + "epoch": 0.3693686410827915, + "grad_norm": 0.07808171212673187, + "learning_rate": 1.954971619797709e-05, + "loss": 0.7428622841835022, + "step": 1999 + }, + { + "epoch": 0.3695534177916874, + "grad_norm": 0.0843687504529953, + "learning_rate": 1.9549124122243163e-05, + "loss": 0.6670342683792114, + "step": 2000 + }, + { + "epoch": 0.3695534177916874, + "eval_loss": 0.7397631406784058, + "eval_runtime": 158.3211, + "eval_samples_per_second": 115.139, + "eval_steps_per_second": 14.395, + "step": 2000 + }, + { + "epoch": 0.3697381945005832, + "grad_norm": 0.06598306447267532, + "learning_rate": 1.954853166648397e-05, + "loss": 0.5824833512306213, + "step": 2001 + }, + { + "epoch": 0.36992297120947903, + "grad_norm": 0.09141170978546143, + "learning_rate": 1.9547938830723088e-05, + "loss": 0.7796555757522583, + "step": 2002 + }, + { + "epoch": 0.37010774791837486, + "grad_norm": 0.06458456814289093, + "learning_rate": 1.9547345614984116e-05, + "loss": 0.817378580570221, + "step": 2003 + }, + { + "epoch": 0.37029252462727075, + "grad_norm": 0.09466605633497238, + "learning_rate": 1.9546752019290656e-05, + "loss": 0.9092118740081787, + "step": 2004 + }, + { + "epoch": 0.3704773013361666, + "grad_norm": 0.07988221198320389, + "learning_rate": 1.9546158043666335e-05, + "loss": 0.7625096440315247, + "step": 2005 + }, + { + "epoch": 0.3706620780450624, + "grad_norm": 0.06539753079414368, + "learning_rate": 1.9545563688134788e-05, + "loss": 0.508465588092804, + "step": 2006 + }, + { + "epoch": 0.3708468547539583, + "grad_norm": 0.10145644098520279, + "learning_rate": 1.9544968952719673e-05, + "loss": 0.9701846837997437, + "step": 2007 + }, + { + "epoch": 0.3710316314628541, + "grad_norm": 0.06246112659573555, + "learning_rate": 1.954437383744465e-05, + "loss": 0.6103070974349976, + "step": 2008 + }, + { + "epoch": 0.37121640817174995, + "grad_norm": 0.08505327999591827, + "learning_rate": 1.9543778342333415e-05, + "loss": 0.8365639448165894, + "step": 2009 + }, + { + "epoch": 0.3714011848806458, + "grad_norm": 0.09498727321624756, + "learning_rate": 1.9543182467409657e-05, + "loss": 0.8169976472854614, + "step": 2010 + }, + { + "epoch": 0.37158596158954166, + "grad_norm": 0.07790561765432358, + "learning_rate": 1.9542586212697098e-05, + "loss": 0.6823660135269165, + "step": 2011 + }, + { + "epoch": 0.3717707382984375, + "grad_norm": 0.061434078961610794, + "learning_rate": 1.954198957821946e-05, + "loss": 0.592761218547821, + "step": 2012 + }, + { + "epoch": 0.3719555150073333, + "grad_norm": 0.07613083720207214, + "learning_rate": 1.954139256400049e-05, + "loss": 0.6575087308883667, + "step": 2013 + }, + { + "epoch": 0.37214029171622914, + "grad_norm": 0.07182037830352783, + "learning_rate": 1.954079517006395e-05, + "loss": 0.6541277170181274, + "step": 2014 + }, + { + "epoch": 0.372325068425125, + "grad_norm": 0.08298812061548233, + "learning_rate": 1.9540197396433606e-05, + "loss": 0.6347714066505432, + "step": 2015 + }, + { + "epoch": 0.37250984513402086, + "grad_norm": 0.08322648704051971, + "learning_rate": 1.9539599243133254e-05, + "loss": 0.6312686204910278, + "step": 2016 + }, + { + "epoch": 0.3726946218429167, + "grad_norm": 0.07473642379045486, + "learning_rate": 1.95390007101867e-05, + "loss": 0.5637624859809875, + "step": 2017 + }, + { + "epoch": 0.37287939855181257, + "grad_norm": 0.08620911091566086, + "learning_rate": 1.9538401797617762e-05, + "loss": 0.8354276418685913, + "step": 2018 + }, + { + "epoch": 0.3730641752607084, + "grad_norm": 0.10492628812789917, + "learning_rate": 1.9537802505450272e-05, + "loss": 0.8417186141014099, + "step": 2019 + }, + { + "epoch": 0.3732489519696042, + "grad_norm": 0.07237657159566879, + "learning_rate": 1.9537202833708084e-05, + "loss": 0.5563523173332214, + "step": 2020 + }, + { + "epoch": 0.37343372867850005, + "grad_norm": 0.07768519967794418, + "learning_rate": 1.953660278241506e-05, + "loss": 0.7059952616691589, + "step": 2021 + }, + { + "epoch": 0.37361850538739594, + "grad_norm": 0.09444581717252731, + "learning_rate": 1.9536002351595082e-05, + "loss": 0.8551285266876221, + "step": 2022 + }, + { + "epoch": 0.37380328209629177, + "grad_norm": 0.1082654669880867, + "learning_rate": 1.9535401541272046e-05, + "loss": 0.7788101434707642, + "step": 2023 + }, + { + "epoch": 0.3739880588051876, + "grad_norm": 0.07339019328355789, + "learning_rate": 1.9534800351469862e-05, + "loss": 0.7254128456115723, + "step": 2024 + }, + { + "epoch": 0.3741728355140834, + "grad_norm": 0.08173136413097382, + "learning_rate": 1.953419878221245e-05, + "loss": 0.6819193363189697, + "step": 2025 + }, + { + "epoch": 0.3743576122229793, + "grad_norm": 0.0670306533575058, + "learning_rate": 1.953359683352376e-05, + "loss": 0.48552295565605164, + "step": 2026 + }, + { + "epoch": 0.37454238893187514, + "grad_norm": 0.08190646767616272, + "learning_rate": 1.953299450542774e-05, + "loss": 0.8560343384742737, + "step": 2027 + }, + { + "epoch": 0.37472716564077097, + "grad_norm": 0.05877630412578583, + "learning_rate": 1.9532391797948365e-05, + "loss": 0.4112713634967804, + "step": 2028 + }, + { + "epoch": 0.37491194234966685, + "grad_norm": 0.09025271981954575, + "learning_rate": 1.9531788711109616e-05, + "loss": 0.7425708770751953, + "step": 2029 + }, + { + "epoch": 0.3750967190585627, + "grad_norm": 0.08883294463157654, + "learning_rate": 1.95311852449355e-05, + "loss": 0.7375306487083435, + "step": 2030 + }, + { + "epoch": 0.3752814957674585, + "grad_norm": 0.08991797268390656, + "learning_rate": 1.9530581399450032e-05, + "loss": 0.7483659386634827, + "step": 2031 + }, + { + "epoch": 0.37546627247635433, + "grad_norm": 0.07757057994604111, + "learning_rate": 1.952997717467724e-05, + "loss": 0.6233549118041992, + "step": 2032 + }, + { + "epoch": 0.3756510491852502, + "grad_norm": 0.07741144299507141, + "learning_rate": 1.9529372570641173e-05, + "loss": 0.5848559141159058, + "step": 2033 + }, + { + "epoch": 0.37583582589414605, + "grad_norm": 0.07271159440279007, + "learning_rate": 1.952876758736589e-05, + "loss": 0.6167519092559814, + "step": 2034 + }, + { + "epoch": 0.3760206026030419, + "grad_norm": 0.07572707533836365, + "learning_rate": 1.952816222487547e-05, + "loss": 0.5975688099861145, + "step": 2035 + }, + { + "epoch": 0.3762053793119377, + "grad_norm": 0.09476777911186218, + "learning_rate": 1.9527556483194003e-05, + "loss": 0.9203355312347412, + "step": 2036 + }, + { + "epoch": 0.3763901560208336, + "grad_norm": 0.07740332186222076, + "learning_rate": 1.9526950362345595e-05, + "loss": 0.6362847685813904, + "step": 2037 + }, + { + "epoch": 0.3765749327297294, + "grad_norm": 0.08949487656354904, + "learning_rate": 1.9526343862354368e-05, + "loss": 0.8080466985702515, + "step": 2038 + }, + { + "epoch": 0.37675970943862525, + "grad_norm": 0.0746837854385376, + "learning_rate": 1.9525736983244458e-05, + "loss": 0.6293449401855469, + "step": 2039 + }, + { + "epoch": 0.37694448614752113, + "grad_norm": 0.07914283871650696, + "learning_rate": 1.9525129725040023e-05, + "loss": 0.8751811981201172, + "step": 2040 + }, + { + "epoch": 0.37712926285641696, + "grad_norm": 0.07535235583782196, + "learning_rate": 1.952452208776522e-05, + "loss": 0.5902524590492249, + "step": 2041 + }, + { + "epoch": 0.3773140395653128, + "grad_norm": 0.08348150551319122, + "learning_rate": 1.952391407144424e-05, + "loss": 0.6699376106262207, + "step": 2042 + }, + { + "epoch": 0.3774988162742086, + "grad_norm": 0.0957750678062439, + "learning_rate": 1.9523305676101275e-05, + "loss": 0.8382880687713623, + "step": 2043 + }, + { + "epoch": 0.3776835929831045, + "grad_norm": 0.07746924459934235, + "learning_rate": 1.952269690176054e-05, + "loss": 0.5988008379936218, + "step": 2044 + }, + { + "epoch": 0.3778683696920003, + "grad_norm": 0.06604867428541183, + "learning_rate": 1.9522087748446263e-05, + "loss": 0.5286591649055481, + "step": 2045 + }, + { + "epoch": 0.37805314640089616, + "grad_norm": 0.07804200798273087, + "learning_rate": 1.952147821618268e-05, + "loss": 0.5273786783218384, + "step": 2046 + }, + { + "epoch": 0.378237923109792, + "grad_norm": 0.07519040256738663, + "learning_rate": 1.9520868304994054e-05, + "loss": 0.5678566098213196, + "step": 2047 + }, + { + "epoch": 0.37842269981868787, + "grad_norm": 0.07195018231868744, + "learning_rate": 1.9520258014904655e-05, + "loss": 0.6135433912277222, + "step": 2048 + }, + { + "epoch": 0.3786074765275837, + "grad_norm": 0.06383226066827774, + "learning_rate": 1.9519647345938776e-05, + "loss": 0.5484453439712524, + "step": 2049 + }, + { + "epoch": 0.3787922532364795, + "grad_norm": 0.07442542165517807, + "learning_rate": 1.9519036298120712e-05, + "loss": 0.7261379957199097, + "step": 2050 + }, + { + "epoch": 0.3789770299453754, + "grad_norm": 0.07282903045415878, + "learning_rate": 1.9518424871474786e-05, + "loss": 0.6312949657440186, + "step": 2051 + }, + { + "epoch": 0.37916180665427124, + "grad_norm": 0.08609618246555328, + "learning_rate": 1.951781306602533e-05, + "loss": 0.7831339240074158, + "step": 2052 + }, + { + "epoch": 0.37934658336316707, + "grad_norm": 0.09370411932468414, + "learning_rate": 1.951720088179669e-05, + "loss": 0.8748478293418884, + "step": 2053 + }, + { + "epoch": 0.3795313600720629, + "grad_norm": 0.07409726083278656, + "learning_rate": 1.9516588318813233e-05, + "loss": 0.7795743942260742, + "step": 2054 + }, + { + "epoch": 0.3797161367809588, + "grad_norm": 0.08550463616847992, + "learning_rate": 1.951597537709933e-05, + "loss": 0.6052160859107971, + "step": 2055 + }, + { + "epoch": 0.3799009134898546, + "grad_norm": 0.08676903694868088, + "learning_rate": 1.9515362056679385e-05, + "loss": 0.7950112819671631, + "step": 2056 + }, + { + "epoch": 0.38008569019875044, + "grad_norm": 0.07116511464118958, + "learning_rate": 1.9514748357577797e-05, + "loss": 0.5086311101913452, + "step": 2057 + }, + { + "epoch": 0.38027046690764627, + "grad_norm": 0.08856448531150818, + "learning_rate": 1.951413427981899e-05, + "loss": 0.7990341186523438, + "step": 2058 + }, + { + "epoch": 0.38045524361654215, + "grad_norm": 0.08384235948324203, + "learning_rate": 1.951351982342741e-05, + "loss": 0.7706287503242493, + "step": 2059 + }, + { + "epoch": 0.380640020325438, + "grad_norm": 0.0836147740483284, + "learning_rate": 1.9512904988427498e-05, + "loss": 0.7029248476028442, + "step": 2060 + }, + { + "epoch": 0.3808247970343338, + "grad_norm": 0.08189492672681808, + "learning_rate": 1.9512289774843737e-05, + "loss": 0.7562029361724854, + "step": 2061 + }, + { + "epoch": 0.3810095737432297, + "grad_norm": 0.08952762931585312, + "learning_rate": 1.9511674182700596e-05, + "loss": 0.5929439663887024, + "step": 2062 + }, + { + "epoch": 0.3811943504521255, + "grad_norm": 0.06707214564085007, + "learning_rate": 1.9511058212022584e-05, + "loss": 0.5128438472747803, + "step": 2063 + }, + { + "epoch": 0.38137912716102135, + "grad_norm": 0.08093805611133575, + "learning_rate": 1.9510441862834212e-05, + "loss": 0.6935490369796753, + "step": 2064 + }, + { + "epoch": 0.3815639038699172, + "grad_norm": 0.08729652315378189, + "learning_rate": 1.9509825135160006e-05, + "loss": 0.8165642619132996, + "step": 2065 + }, + { + "epoch": 0.38174868057881306, + "grad_norm": 0.08318503201007843, + "learning_rate": 1.9509208029024514e-05, + "loss": 0.7330926060676575, + "step": 2066 + }, + { + "epoch": 0.3819334572877089, + "grad_norm": 0.08431824296712875, + "learning_rate": 1.950859054445229e-05, + "loss": 0.7111803293228149, + "step": 2067 + }, + { + "epoch": 0.3821182339966047, + "grad_norm": 0.07840250432491302, + "learning_rate": 1.9507972681467913e-05, + "loss": 0.5990965962409973, + "step": 2068 + }, + { + "epoch": 0.38230301070550055, + "grad_norm": 0.08080720901489258, + "learning_rate": 1.950735444009597e-05, + "loss": 0.7232369780540466, + "step": 2069 + }, + { + "epoch": 0.38248778741439643, + "grad_norm": 0.07445263117551804, + "learning_rate": 1.9506735820361065e-05, + "loss": 0.7037575840950012, + "step": 2070 + }, + { + "epoch": 0.38267256412329226, + "grad_norm": 0.07133829593658447, + "learning_rate": 1.9506116822287818e-05, + "loss": 0.7455885410308838, + "step": 2071 + }, + { + "epoch": 0.3828573408321881, + "grad_norm": 0.07049156725406647, + "learning_rate": 1.9505497445900864e-05, + "loss": 0.5760311484336853, + "step": 2072 + }, + { + "epoch": 0.38304211754108397, + "grad_norm": 0.06675473600625992, + "learning_rate": 1.950487769122485e-05, + "loss": 0.5767952799797058, + "step": 2073 + }, + { + "epoch": 0.3832268942499798, + "grad_norm": 0.09521736949682236, + "learning_rate": 1.9504257558284435e-05, + "loss": 0.8993252515792847, + "step": 2074 + }, + { + "epoch": 0.38341167095887563, + "grad_norm": 0.078230120241642, + "learning_rate": 1.950363704710431e-05, + "loss": 0.8286687135696411, + "step": 2075 + }, + { + "epoch": 0.38359644766777146, + "grad_norm": 0.0930788516998291, + "learning_rate": 1.9503016157709163e-05, + "loss": 0.9682884812355042, + "step": 2076 + }, + { + "epoch": 0.38378122437666734, + "grad_norm": 0.07385998964309692, + "learning_rate": 1.95023948901237e-05, + "loss": 0.7313173413276672, + "step": 2077 + }, + { + "epoch": 0.38396600108556317, + "grad_norm": 0.07340817898511887, + "learning_rate": 1.9501773244372654e-05, + "loss": 0.6114804744720459, + "step": 2078 + }, + { + "epoch": 0.384150777794459, + "grad_norm": 0.0526450015604496, + "learning_rate": 1.9501151220480757e-05, + "loss": 0.4882235825061798, + "step": 2079 + }, + { + "epoch": 0.3843355545033548, + "grad_norm": 0.0750875324010849, + "learning_rate": 1.950052881847277e-05, + "loss": 0.64805668592453, + "step": 2080 + }, + { + "epoch": 0.3845203312122507, + "grad_norm": 0.08840855956077576, + "learning_rate": 1.9499906038373458e-05, + "loss": 0.7312134504318237, + "step": 2081 + }, + { + "epoch": 0.38470510792114654, + "grad_norm": 0.08124563843011856, + "learning_rate": 1.9499282880207605e-05, + "loss": 0.6080642342567444, + "step": 2082 + }, + { + "epoch": 0.38488988463004237, + "grad_norm": 0.0850062444806099, + "learning_rate": 1.9498659344000014e-05, + "loss": 0.7977728247642517, + "step": 2083 + }, + { + "epoch": 0.38507466133893825, + "grad_norm": 0.09203612804412842, + "learning_rate": 1.94980354297755e-05, + "loss": 0.9024616479873657, + "step": 2084 + }, + { + "epoch": 0.3852594380478341, + "grad_norm": 0.08661805838346481, + "learning_rate": 1.9497411137558887e-05, + "loss": 0.7511764764785767, + "step": 2085 + }, + { + "epoch": 0.3854442147567299, + "grad_norm": 0.08258519321680069, + "learning_rate": 1.9496786467375028e-05, + "loss": 0.6556458473205566, + "step": 2086 + }, + { + "epoch": 0.38562899146562574, + "grad_norm": 0.0830867737531662, + "learning_rate": 1.949616141924878e-05, + "loss": 0.6187492609024048, + "step": 2087 + }, + { + "epoch": 0.3858137681745216, + "grad_norm": 0.057450730353593826, + "learning_rate": 1.9495535993205015e-05, + "loss": 0.5264581441879272, + "step": 2088 + }, + { + "epoch": 0.38599854488341745, + "grad_norm": 0.07087472826242447, + "learning_rate": 1.9494910189268627e-05, + "loss": 0.5897881984710693, + "step": 2089 + }, + { + "epoch": 0.3861833215923133, + "grad_norm": 0.074063740670681, + "learning_rate": 1.949428400746452e-05, + "loss": 0.41536253690719604, + "step": 2090 + }, + { + "epoch": 0.3863680983012091, + "grad_norm": 0.0563860647380352, + "learning_rate": 1.949365744781761e-05, + "loss": 0.40170884132385254, + "step": 2091 + }, + { + "epoch": 0.386552875010105, + "grad_norm": 0.06514260172843933, + "learning_rate": 1.9493030510352838e-05, + "loss": 0.49167242646217346, + "step": 2092 + }, + { + "epoch": 0.3867376517190008, + "grad_norm": 0.07091984152793884, + "learning_rate": 1.9492403195095152e-05, + "loss": 0.615524411201477, + "step": 2093 + }, + { + "epoch": 0.38692242842789665, + "grad_norm": 0.07864012569189072, + "learning_rate": 1.9491775502069513e-05, + "loss": 0.6243424415588379, + "step": 2094 + }, + { + "epoch": 0.38710720513679253, + "grad_norm": 0.05738931894302368, + "learning_rate": 1.949114743130091e-05, + "loss": 0.42757418751716614, + "step": 2095 + }, + { + "epoch": 0.38729198184568836, + "grad_norm": 0.0666792094707489, + "learning_rate": 1.949051898281433e-05, + "loss": 0.7218016386032104, + "step": 2096 + }, + { + "epoch": 0.3874767585545842, + "grad_norm": 0.07960478216409683, + "learning_rate": 1.9489890156634787e-05, + "loss": 0.7451041340827942, + "step": 2097 + }, + { + "epoch": 0.38766153526348, + "grad_norm": 0.06650031358003616, + "learning_rate": 1.9489260952787305e-05, + "loss": 0.5651580691337585, + "step": 2098 + }, + { + "epoch": 0.3878463119723759, + "grad_norm": 0.0677560567855835, + "learning_rate": 1.948863137129693e-05, + "loss": 0.6015886664390564, + "step": 2099 + }, + { + "epoch": 0.38803108868127173, + "grad_norm": 0.07975464314222336, + "learning_rate": 1.9488001412188705e-05, + "loss": 0.7732228636741638, + "step": 2100 + }, + { + "epoch": 0.38821586539016756, + "grad_norm": 0.07170242816209793, + "learning_rate": 1.948737107548771e-05, + "loss": 0.6001203656196594, + "step": 2101 + }, + { + "epoch": 0.3884006420990634, + "grad_norm": 0.07086360454559326, + "learning_rate": 1.9486740361219034e-05, + "loss": 0.6004417538642883, + "step": 2102 + }, + { + "epoch": 0.38858541880795927, + "grad_norm": 0.09102153033018112, + "learning_rate": 1.9486109269407768e-05, + "loss": 0.864840030670166, + "step": 2103 + }, + { + "epoch": 0.3887701955168551, + "grad_norm": 0.07844554632902145, + "learning_rate": 1.948547780007903e-05, + "loss": 0.6806324124336243, + "step": 2104 + }, + { + "epoch": 0.38895497222575093, + "grad_norm": 0.0875028744339943, + "learning_rate": 1.948484595325795e-05, + "loss": 0.7554509043693542, + "step": 2105 + }, + { + "epoch": 0.3891397489346468, + "grad_norm": 0.07255323976278305, + "learning_rate": 1.9484213728969685e-05, + "loss": 0.8027604818344116, + "step": 2106 + }, + { + "epoch": 0.38932452564354264, + "grad_norm": 0.07519349455833435, + "learning_rate": 1.9483581127239377e-05, + "loss": 0.5965269804000854, + "step": 2107 + }, + { + "epoch": 0.38950930235243847, + "grad_norm": 0.09502169489860535, + "learning_rate": 1.9482948148092212e-05, + "loss": 1.1054532527923584, + "step": 2108 + }, + { + "epoch": 0.3896940790613343, + "grad_norm": 0.08516938239336014, + "learning_rate": 1.948231479155338e-05, + "loss": 0.7914494872093201, + "step": 2109 + }, + { + "epoch": 0.3898788557702302, + "grad_norm": 0.06948333978652954, + "learning_rate": 1.9481681057648085e-05, + "loss": 0.6895288825035095, + "step": 2110 + }, + { + "epoch": 0.390063632479126, + "grad_norm": 0.08115935325622559, + "learning_rate": 1.9481046946401548e-05, + "loss": 0.6990054845809937, + "step": 2111 + }, + { + "epoch": 0.39024840918802184, + "grad_norm": 0.07175900787115097, + "learning_rate": 1.9480412457839004e-05, + "loss": 0.5589213371276855, + "step": 2112 + }, + { + "epoch": 0.39043318589691767, + "grad_norm": 0.06155014783143997, + "learning_rate": 1.9479777591985706e-05, + "loss": 0.5068143606185913, + "step": 2113 + }, + { + "epoch": 0.39061796260581355, + "grad_norm": 0.08584966510534286, + "learning_rate": 1.947914234886692e-05, + "loss": 0.8504391312599182, + "step": 2114 + }, + { + "epoch": 0.3908027393147094, + "grad_norm": 0.08797426521778107, + "learning_rate": 1.9478506728507925e-05, + "loss": 0.7745741009712219, + "step": 2115 + }, + { + "epoch": 0.3909875160236052, + "grad_norm": 0.08473818749189377, + "learning_rate": 1.9477870730934014e-05, + "loss": 0.7680900692939758, + "step": 2116 + }, + { + "epoch": 0.3911722927325011, + "grad_norm": 0.06665950268507004, + "learning_rate": 1.94772343561705e-05, + "loss": 0.5885246992111206, + "step": 2117 + }, + { + "epoch": 0.3913570694413969, + "grad_norm": 0.07183478772640228, + "learning_rate": 1.947659760424271e-05, + "loss": 0.533574104309082, + "step": 2118 + }, + { + "epoch": 0.39154184615029275, + "grad_norm": 0.07430543750524521, + "learning_rate": 1.9475960475175985e-05, + "loss": 0.6689956188201904, + "step": 2119 + }, + { + "epoch": 0.3917266228591886, + "grad_norm": 0.07013824582099915, + "learning_rate": 1.947532296899568e-05, + "loss": 0.6077504754066467, + "step": 2120 + }, + { + "epoch": 0.39191139956808446, + "grad_norm": 0.06455551832914352, + "learning_rate": 1.9474685085727162e-05, + "loss": 0.40655583143234253, + "step": 2121 + }, + { + "epoch": 0.3920961762769803, + "grad_norm": 0.08743591606616974, + "learning_rate": 1.9474046825395825e-05, + "loss": 0.9286163449287415, + "step": 2122 + }, + { + "epoch": 0.3922809529858761, + "grad_norm": 0.08486149460077286, + "learning_rate": 1.947340818802706e-05, + "loss": 0.8797714114189148, + "step": 2123 + }, + { + "epoch": 0.39246572969477195, + "grad_norm": 0.06798680871725082, + "learning_rate": 1.947276917364629e-05, + "loss": 0.52814781665802, + "step": 2124 + }, + { + "epoch": 0.39265050640366783, + "grad_norm": 0.08360527455806732, + "learning_rate": 1.9472129782278944e-05, + "loss": 0.7399903535842896, + "step": 2125 + }, + { + "epoch": 0.39283528311256366, + "grad_norm": 0.06659283488988876, + "learning_rate": 1.9471490013950464e-05, + "loss": 0.5814614295959473, + "step": 2126 + }, + { + "epoch": 0.3930200598214595, + "grad_norm": 0.08121927827596664, + "learning_rate": 1.9470849868686315e-05, + "loss": 0.7577459216117859, + "step": 2127 + }, + { + "epoch": 0.3932048365303554, + "grad_norm": 0.07345825433731079, + "learning_rate": 1.9470209346511977e-05, + "loss": 0.6281236410140991, + "step": 2128 + }, + { + "epoch": 0.3933896132392512, + "grad_norm": 0.06721615046262741, + "learning_rate": 1.946956844745293e-05, + "loss": 0.5942303538322449, + "step": 2129 + }, + { + "epoch": 0.39357438994814703, + "grad_norm": 0.07023970037698746, + "learning_rate": 1.9468927171534685e-05, + "loss": 0.6943172216415405, + "step": 2130 + }, + { + "epoch": 0.39375916665704286, + "grad_norm": 0.0984039232134819, + "learning_rate": 1.9468285518782764e-05, + "loss": 0.88523268699646, + "step": 2131 + }, + { + "epoch": 0.39394394336593874, + "grad_norm": 0.08739271014928818, + "learning_rate": 1.9467643489222704e-05, + "loss": 0.5721750855445862, + "step": 2132 + }, + { + "epoch": 0.39412872007483457, + "grad_norm": 0.06664688140153885, + "learning_rate": 1.9467001082880054e-05, + "loss": 0.6401693224906921, + "step": 2133 + }, + { + "epoch": 0.3943134967837304, + "grad_norm": 0.07537417113780975, + "learning_rate": 1.946635829978038e-05, + "loss": 0.6778192520141602, + "step": 2134 + }, + { + "epoch": 0.39449827349262623, + "grad_norm": 0.0772600993514061, + "learning_rate": 1.9465715139949254e-05, + "loss": 0.6469069719314575, + "step": 2135 + }, + { + "epoch": 0.3946830502015221, + "grad_norm": 0.06720546633005142, + "learning_rate": 1.9465071603412287e-05, + "loss": 0.5592214465141296, + "step": 2136 + }, + { + "epoch": 0.39486782691041794, + "grad_norm": 0.062190715223550797, + "learning_rate": 1.946442769019508e-05, + "loss": 0.5607873201370239, + "step": 2137 + }, + { + "epoch": 0.39505260361931377, + "grad_norm": 0.07531455159187317, + "learning_rate": 1.9463783400323263e-05, + "loss": 0.7235403656959534, + "step": 2138 + }, + { + "epoch": 0.39523738032820965, + "grad_norm": 0.0764726921916008, + "learning_rate": 1.9463138733822475e-05, + "loss": 0.6067501306533813, + "step": 2139 + }, + { + "epoch": 0.3954221570371055, + "grad_norm": 0.06785880029201508, + "learning_rate": 1.9462493690718373e-05, + "loss": 0.6442311406135559, + "step": 2140 + }, + { + "epoch": 0.3956069337460013, + "grad_norm": 0.08522092550992966, + "learning_rate": 1.9461848271036623e-05, + "loss": 0.7445117235183716, + "step": 2141 + }, + { + "epoch": 0.39579171045489714, + "grad_norm": 0.09842310845851898, + "learning_rate": 1.9461202474802914e-05, + "loss": 1.112312912940979, + "step": 2142 + }, + { + "epoch": 0.395976487163793, + "grad_norm": 0.0805714949965477, + "learning_rate": 1.946055630204295e-05, + "loss": 0.7900038361549377, + "step": 2143 + }, + { + "epoch": 0.39616126387268885, + "grad_norm": 0.06138754263520241, + "learning_rate": 1.9459909752782444e-05, + "loss": 0.6165653467178345, + "step": 2144 + }, + { + "epoch": 0.3963460405815847, + "grad_norm": 0.07790310680866241, + "learning_rate": 1.945926282704712e-05, + "loss": 0.7886402010917664, + "step": 2145 + }, + { + "epoch": 0.3965308172904805, + "grad_norm": 0.07908566296100616, + "learning_rate": 1.9458615524862734e-05, + "loss": 0.786962628364563, + "step": 2146 + }, + { + "epoch": 0.3967155939993764, + "grad_norm": 0.0615328773856163, + "learning_rate": 1.9457967846255045e-05, + "loss": 0.6100361943244934, + "step": 2147 + }, + { + "epoch": 0.3969003707082722, + "grad_norm": 0.07457233220338821, + "learning_rate": 1.945731979124982e-05, + "loss": 0.6171791553497314, + "step": 2148 + }, + { + "epoch": 0.39708514741716805, + "grad_norm": 0.08400756865739822, + "learning_rate": 1.9456671359872858e-05, + "loss": 0.6951082348823547, + "step": 2149 + }, + { + "epoch": 0.39726992412606393, + "grad_norm": 0.07961725443601608, + "learning_rate": 1.9456022552149965e-05, + "loss": 0.8372227549552917, + "step": 2150 + }, + { + "epoch": 0.39745470083495976, + "grad_norm": 0.0670628771185875, + "learning_rate": 1.9455373368106952e-05, + "loss": 0.6422262191772461, + "step": 2151 + }, + { + "epoch": 0.3976394775438556, + "grad_norm": 0.06386101990938187, + "learning_rate": 1.9454723807769665e-05, + "loss": 0.5038297176361084, + "step": 2152 + }, + { + "epoch": 0.3978242542527514, + "grad_norm": 0.06022537499666214, + "learning_rate": 1.945407387116395e-05, + "loss": 0.5005338788032532, + "step": 2153 + }, + { + "epoch": 0.3980090309616473, + "grad_norm": 0.08071392774581909, + "learning_rate": 1.9453423558315677e-05, + "loss": 0.6604102253913879, + "step": 2154 + }, + { + "epoch": 0.39819380767054313, + "grad_norm": 0.06811917573213577, + "learning_rate": 1.9452772869250718e-05, + "loss": 0.5015507936477661, + "step": 2155 + }, + { + "epoch": 0.39837858437943896, + "grad_norm": 0.06803597509860992, + "learning_rate": 1.9452121803994973e-05, + "loss": 0.4728114902973175, + "step": 2156 + }, + { + "epoch": 0.3985633610883348, + "grad_norm": 0.06741619855165482, + "learning_rate": 1.945147036257435e-05, + "loss": 0.498307466506958, + "step": 2157 + }, + { + "epoch": 0.3987481377972307, + "grad_norm": 0.07278750091791153, + "learning_rate": 1.945081854501478e-05, + "loss": 0.6582752466201782, + "step": 2158 + }, + { + "epoch": 0.3989329145061265, + "grad_norm": 0.07266269624233246, + "learning_rate": 1.9450166351342198e-05, + "loss": 0.630315363407135, + "step": 2159 + }, + { + "epoch": 0.39911769121502233, + "grad_norm": 0.07647449523210526, + "learning_rate": 1.9449513781582562e-05, + "loss": 0.7801526188850403, + "step": 2160 + }, + { + "epoch": 0.3993024679239182, + "grad_norm": 0.08277647942304611, + "learning_rate": 1.9448860835761842e-05, + "loss": 0.7490278482437134, + "step": 2161 + }, + { + "epoch": 0.39948724463281404, + "grad_norm": 0.060939326882362366, + "learning_rate": 1.944820751390602e-05, + "loss": 0.5510382652282715, + "step": 2162 + }, + { + "epoch": 0.39967202134170987, + "grad_norm": 0.06312974542379379, + "learning_rate": 1.94475538160411e-05, + "loss": 0.6411457061767578, + "step": 2163 + }, + { + "epoch": 0.3998567980506057, + "grad_norm": 0.06613600999116898, + "learning_rate": 1.9446899742193097e-05, + "loss": 0.5749844908714294, + "step": 2164 + }, + { + "epoch": 0.4000415747595016, + "grad_norm": 0.07607866078615189, + "learning_rate": 1.944624529238804e-05, + "loss": 0.6170638203620911, + "step": 2165 + }, + { + "epoch": 0.4002263514683974, + "grad_norm": 0.08194948732852936, + "learning_rate": 1.944559046665197e-05, + "loss": 0.7000471353530884, + "step": 2166 + }, + { + "epoch": 0.40041112817729324, + "grad_norm": 0.08070991188287735, + "learning_rate": 1.9444935265010954e-05, + "loss": 0.7806636691093445, + "step": 2167 + }, + { + "epoch": 0.40059590488618907, + "grad_norm": 0.0809662863612175, + "learning_rate": 1.9444279687491065e-05, + "loss": 0.7248894572257996, + "step": 2168 + }, + { + "epoch": 0.40078068159508495, + "grad_norm": 0.07183996587991714, + "learning_rate": 1.9443623734118388e-05, + "loss": 0.7464427947998047, + "step": 2169 + }, + { + "epoch": 0.4009654583039808, + "grad_norm": 0.0695219561457634, + "learning_rate": 1.944296740491903e-05, + "loss": 0.5609503984451294, + "step": 2170 + }, + { + "epoch": 0.4011502350128766, + "grad_norm": 0.053213201463222504, + "learning_rate": 1.944231069991912e-05, + "loss": 0.49357420206069946, + "step": 2171 + }, + { + "epoch": 0.4013350117217725, + "grad_norm": 0.06042713671922684, + "learning_rate": 1.944165361914478e-05, + "loss": 0.7338321805000305, + "step": 2172 + }, + { + "epoch": 0.4015197884306683, + "grad_norm": 0.08467214554548264, + "learning_rate": 1.944099616262217e-05, + "loss": 0.7917910814285278, + "step": 2173 + }, + { + "epoch": 0.40170456513956415, + "grad_norm": 0.07415515929460526, + "learning_rate": 1.9440338330377444e-05, + "loss": 0.6823109984397888, + "step": 2174 + }, + { + "epoch": 0.40188934184846, + "grad_norm": 0.07411254197359085, + "learning_rate": 1.943968012243679e-05, + "loss": 0.7468394041061401, + "step": 2175 + }, + { + "epoch": 0.40207411855735586, + "grad_norm": 0.08624184131622314, + "learning_rate": 1.9439021538826398e-05, + "loss": 0.7924304008483887, + "step": 2176 + }, + { + "epoch": 0.4022588952662517, + "grad_norm": 0.05622793734073639, + "learning_rate": 1.943836257957248e-05, + "loss": 0.4555974304676056, + "step": 2177 + }, + { + "epoch": 0.4024436719751475, + "grad_norm": 0.08712554723024368, + "learning_rate": 1.9437703244701266e-05, + "loss": 0.6834219098091125, + "step": 2178 + }, + { + "epoch": 0.40262844868404335, + "grad_norm": 0.07262444496154785, + "learning_rate": 1.9437043534238985e-05, + "loss": 0.6461427807807922, + "step": 2179 + }, + { + "epoch": 0.40281322539293923, + "grad_norm": 0.07889416068792343, + "learning_rate": 1.9436383448211895e-05, + "loss": 0.6368575692176819, + "step": 2180 + }, + { + "epoch": 0.40299800210183506, + "grad_norm": 0.06394167989492416, + "learning_rate": 1.943572298664627e-05, + "loss": 0.5541520714759827, + "step": 2181 + }, + { + "epoch": 0.4031827788107309, + "grad_norm": 0.0851161926984787, + "learning_rate": 1.9435062149568386e-05, + "loss": 0.9377284646034241, + "step": 2182 + }, + { + "epoch": 0.4033675555196268, + "grad_norm": 0.07163845002651215, + "learning_rate": 1.9434400937004548e-05, + "loss": 0.6254559755325317, + "step": 2183 + }, + { + "epoch": 0.4035523322285226, + "grad_norm": 0.08405304700136185, + "learning_rate": 1.9433739348981074e-05, + "loss": 0.7649668455123901, + "step": 2184 + }, + { + "epoch": 0.40373710893741843, + "grad_norm": 0.07005638629198074, + "learning_rate": 1.9433077385524284e-05, + "loss": 0.6673843860626221, + "step": 2185 + }, + { + "epoch": 0.40392188564631426, + "grad_norm": 0.08381688594818115, + "learning_rate": 1.9432415046660526e-05, + "loss": 0.7347395420074463, + "step": 2186 + }, + { + "epoch": 0.40410666235521014, + "grad_norm": 0.0781802386045456, + "learning_rate": 1.943175233241616e-05, + "loss": 0.7603856325149536, + "step": 2187 + }, + { + "epoch": 0.404291439064106, + "grad_norm": 0.07804013043642044, + "learning_rate": 1.9431089242817564e-05, + "loss": 0.7772961258888245, + "step": 2188 + }, + { + "epoch": 0.4044762157730018, + "grad_norm": 0.0823303833603859, + "learning_rate": 1.9430425777891116e-05, + "loss": 0.7311716079711914, + "step": 2189 + }, + { + "epoch": 0.40466099248189763, + "grad_norm": 0.06019553914666176, + "learning_rate": 1.942976193766323e-05, + "loss": 0.5009331703186035, + "step": 2190 + }, + { + "epoch": 0.4048457691907935, + "grad_norm": 0.07683239877223969, + "learning_rate": 1.942909772216032e-05, + "loss": 0.7285566926002502, + "step": 2191 + }, + { + "epoch": 0.40503054589968934, + "grad_norm": 0.09352260082960129, + "learning_rate": 1.9428433131408816e-05, + "loss": 0.8195133805274963, + "step": 2192 + }, + { + "epoch": 0.40521532260858517, + "grad_norm": 0.09269582480192184, + "learning_rate": 1.9427768165435177e-05, + "loss": 0.843238890171051, + "step": 2193 + }, + { + "epoch": 0.40540009931748106, + "grad_norm": 0.08898035436868668, + "learning_rate": 1.9427102824265858e-05, + "loss": 0.8769370913505554, + "step": 2194 + }, + { + "epoch": 0.4055848760263769, + "grad_norm": 0.08849231898784637, + "learning_rate": 1.942643710792734e-05, + "loss": 0.5764329433441162, + "step": 2195 + }, + { + "epoch": 0.4057696527352727, + "grad_norm": 0.06048697978258133, + "learning_rate": 1.942577101644612e-05, + "loss": 0.38896650075912476, + "step": 2196 + }, + { + "epoch": 0.40595442944416854, + "grad_norm": 0.07495073974132538, + "learning_rate": 1.94251045498487e-05, + "loss": 0.5774204730987549, + "step": 2197 + }, + { + "epoch": 0.4061392061530644, + "grad_norm": 0.07874375581741333, + "learning_rate": 1.9424437708161605e-05, + "loss": 0.6599615216255188, + "step": 2198 + }, + { + "epoch": 0.40632398286196025, + "grad_norm": 0.08063821494579315, + "learning_rate": 1.9423770491411375e-05, + "loss": 0.5673590302467346, + "step": 2199 + }, + { + "epoch": 0.4065087595708561, + "grad_norm": 0.08884714543819427, + "learning_rate": 1.9423102899624565e-05, + "loss": 0.8434959053993225, + "step": 2200 + }, + { + "epoch": 0.4066935362797519, + "grad_norm": 0.0742383748292923, + "learning_rate": 1.9422434932827737e-05, + "loss": 0.6055968999862671, + "step": 2201 + }, + { + "epoch": 0.4068783129886478, + "grad_norm": 0.08328180760145187, + "learning_rate": 1.9421766591047483e-05, + "loss": 0.9570925831794739, + "step": 2202 + }, + { + "epoch": 0.4070630896975436, + "grad_norm": 0.0653611570596695, + "learning_rate": 1.9421097874310394e-05, + "loss": 0.6053483486175537, + "step": 2203 + }, + { + "epoch": 0.40724786640643945, + "grad_norm": 0.07700639218091965, + "learning_rate": 1.9420428782643083e-05, + "loss": 0.6471602916717529, + "step": 2204 + }, + { + "epoch": 0.40743264311533534, + "grad_norm": 0.07719118893146515, + "learning_rate": 1.941975931607218e-05, + "loss": 0.6258816719055176, + "step": 2205 + }, + { + "epoch": 0.40761741982423116, + "grad_norm": 0.067261703312397, + "learning_rate": 1.9419089474624326e-05, + "loss": 0.5652295351028442, + "step": 2206 + }, + { + "epoch": 0.407802196533127, + "grad_norm": 0.06438491493463516, + "learning_rate": 1.941841925832618e-05, + "loss": 0.49911659955978394, + "step": 2207 + }, + { + "epoch": 0.4079869732420228, + "grad_norm": 0.07922912389039993, + "learning_rate": 1.9417748667204414e-05, + "loss": 0.5777056813240051, + "step": 2208 + }, + { + "epoch": 0.4081717499509187, + "grad_norm": 0.08038914203643799, + "learning_rate": 1.9417077701285714e-05, + "loss": 0.7142505645751953, + "step": 2209 + }, + { + "epoch": 0.40835652665981453, + "grad_norm": 0.059858158230781555, + "learning_rate": 1.9416406360596785e-05, + "loss": 0.38203585147857666, + "step": 2210 + }, + { + "epoch": 0.40854130336871036, + "grad_norm": 0.07125923037528992, + "learning_rate": 1.9415734645164343e-05, + "loss": 0.6019932627677917, + "step": 2211 + }, + { + "epoch": 0.4087260800776062, + "grad_norm": 0.08031252771615982, + "learning_rate": 1.941506255501512e-05, + "loss": 0.718646764755249, + "step": 2212 + }, + { + "epoch": 0.4089108567865021, + "grad_norm": 0.06486880779266357, + "learning_rate": 1.9414390090175864e-05, + "loss": 0.5900680422782898, + "step": 2213 + }, + { + "epoch": 0.4090956334953979, + "grad_norm": 0.06934762746095657, + "learning_rate": 1.9413717250673333e-05, + "loss": 0.49603864550590515, + "step": 2214 + }, + { + "epoch": 0.40928041020429373, + "grad_norm": 0.052057359367609024, + "learning_rate": 1.941304403653431e-05, + "loss": 0.4228403866291046, + "step": 2215 + }, + { + "epoch": 0.4094651869131896, + "grad_norm": 0.06075945496559143, + "learning_rate": 1.9412370447785586e-05, + "loss": 0.6137130856513977, + "step": 2216 + }, + { + "epoch": 0.40964996362208544, + "grad_norm": 0.06219998747110367, + "learning_rate": 1.941169648445396e-05, + "loss": 0.547929584980011, + "step": 2217 + }, + { + "epoch": 0.4098347403309813, + "grad_norm": 0.09154465049505234, + "learning_rate": 1.9411022146566266e-05, + "loss": 0.8030654788017273, + "step": 2218 + }, + { + "epoch": 0.4100195170398771, + "grad_norm": 0.08464067429304123, + "learning_rate": 1.941034743414933e-05, + "loss": 0.8160238265991211, + "step": 2219 + }, + { + "epoch": 0.410204293748773, + "grad_norm": 0.07993149757385254, + "learning_rate": 1.9409672347230008e-05, + "loss": 0.6873763203620911, + "step": 2220 + }, + { + "epoch": 0.4103890704576688, + "grad_norm": 0.07135781645774841, + "learning_rate": 1.9408996885835166e-05, + "loss": 0.5866267681121826, + "step": 2221 + }, + { + "epoch": 0.41057384716656464, + "grad_norm": 0.08876077830791473, + "learning_rate": 1.9408321049991684e-05, + "loss": 0.8736963272094727, + "step": 2222 + }, + { + "epoch": 0.41075862387546047, + "grad_norm": 0.07346765697002411, + "learning_rate": 1.940764483972646e-05, + "loss": 0.7503250241279602, + "step": 2223 + }, + { + "epoch": 0.41094340058435636, + "grad_norm": 0.07542257755994797, + "learning_rate": 1.9406968255066403e-05, + "loss": 0.6756606698036194, + "step": 2224 + }, + { + "epoch": 0.4111281772932522, + "grad_norm": 0.06280212849378586, + "learning_rate": 1.940629129603844e-05, + "loss": 0.5291039347648621, + "step": 2225 + }, + { + "epoch": 0.411312954002148, + "grad_norm": 0.0706767737865448, + "learning_rate": 1.940561396266951e-05, + "loss": 0.6159385442733765, + "step": 2226 + }, + { + "epoch": 0.4114977307110439, + "grad_norm": 0.07819431275129318, + "learning_rate": 1.9404936254986576e-05, + "loss": 0.7118359208106995, + "step": 2227 + }, + { + "epoch": 0.4116825074199397, + "grad_norm": 0.06865965574979782, + "learning_rate": 1.94042581730166e-05, + "loss": 0.7549982070922852, + "step": 2228 + }, + { + "epoch": 0.41186728412883555, + "grad_norm": 0.07349448651075363, + "learning_rate": 1.9403579716786572e-05, + "loss": 0.6773434281349182, + "step": 2229 + }, + { + "epoch": 0.4120520608377314, + "grad_norm": 0.07281513512134552, + "learning_rate": 1.9402900886323492e-05, + "loss": 0.6251955032348633, + "step": 2230 + }, + { + "epoch": 0.41223683754662727, + "grad_norm": 0.07288835942745209, + "learning_rate": 1.940222168165437e-05, + "loss": 0.6025035381317139, + "step": 2231 + }, + { + "epoch": 0.4124216142555231, + "grad_norm": 0.08412369340658188, + "learning_rate": 1.9401542102806248e-05, + "loss": 0.6773068308830261, + "step": 2232 + }, + { + "epoch": 0.4126063909644189, + "grad_norm": 0.10129547864198685, + "learning_rate": 1.940086214980616e-05, + "loss": 0.8324509263038635, + "step": 2233 + }, + { + "epoch": 0.41279116767331475, + "grad_norm": 0.091436967253685, + "learning_rate": 1.940018182268117e-05, + "loss": 0.8332235813140869, + "step": 2234 + }, + { + "epoch": 0.41297594438221064, + "grad_norm": 0.0675014853477478, + "learning_rate": 1.9399501121458354e-05, + "loss": 0.46324366331100464, + "step": 2235 + }, + { + "epoch": 0.41316072109110646, + "grad_norm": 0.07934007793664932, + "learning_rate": 1.93988200461648e-05, + "loss": 0.6471525430679321, + "step": 2236 + }, + { + "epoch": 0.4133454978000023, + "grad_norm": 0.07486380636692047, + "learning_rate": 1.939813859682761e-05, + "loss": 0.572313129901886, + "step": 2237 + }, + { + "epoch": 0.4135302745088982, + "grad_norm": 0.09455690532922745, + "learning_rate": 1.939745677347391e-05, + "loss": 0.9386391043663025, + "step": 2238 + }, + { + "epoch": 0.413715051217794, + "grad_norm": 0.07782839238643646, + "learning_rate": 1.9396774576130834e-05, + "loss": 0.8280326128005981, + "step": 2239 + }, + { + "epoch": 0.41389982792668983, + "grad_norm": 0.08085377514362335, + "learning_rate": 1.9396092004825523e-05, + "loss": 0.7216889262199402, + "step": 2240 + }, + { + "epoch": 0.41408460463558566, + "grad_norm": 0.06378732621669769, + "learning_rate": 1.939540905958515e-05, + "loss": 0.5238621830940247, + "step": 2241 + }, + { + "epoch": 0.41426938134448155, + "grad_norm": 0.08064355701208115, + "learning_rate": 1.939472574043689e-05, + "loss": 0.6881387233734131, + "step": 2242 + }, + { + "epoch": 0.4144541580533774, + "grad_norm": 0.08286409080028534, + "learning_rate": 1.939404204740794e-05, + "loss": 0.760225772857666, + "step": 2243 + }, + { + "epoch": 0.4146389347622732, + "grad_norm": 0.08433771133422852, + "learning_rate": 1.9393357980525504e-05, + "loss": 0.7714629173278809, + "step": 2244 + }, + { + "epoch": 0.41482371147116903, + "grad_norm": 0.07636398077011108, + "learning_rate": 1.9392673539816812e-05, + "loss": 0.5675088763237, + "step": 2245 + }, + { + "epoch": 0.4150084881800649, + "grad_norm": 0.09437257796525955, + "learning_rate": 1.9391988725309096e-05, + "loss": 0.6651148200035095, + "step": 2246 + }, + { + "epoch": 0.41519326488896074, + "grad_norm": 0.06223485618829727, + "learning_rate": 1.9391303537029612e-05, + "loss": 0.66727215051651, + "step": 2247 + }, + { + "epoch": 0.4153780415978566, + "grad_norm": 0.06689508259296417, + "learning_rate": 1.939061797500563e-05, + "loss": 0.5697513818740845, + "step": 2248 + }, + { + "epoch": 0.41556281830675246, + "grad_norm": 0.07865443825721741, + "learning_rate": 1.938993203926443e-05, + "loss": 0.7371279001235962, + "step": 2249 + }, + { + "epoch": 0.4157475950156483, + "grad_norm": 0.09136402606964111, + "learning_rate": 1.9389245729833315e-05, + "loss": 0.7764683961868286, + "step": 2250 + }, + { + "epoch": 0.4159323717245441, + "grad_norm": 0.06448964774608612, + "learning_rate": 1.9388559046739594e-05, + "loss": 0.7599173188209534, + "step": 2251 + }, + { + "epoch": 0.41611714843343994, + "grad_norm": 0.07628330588340759, + "learning_rate": 1.9387871990010596e-05, + "loss": 0.6795546412467957, + "step": 2252 + }, + { + "epoch": 0.4163019251423358, + "grad_norm": 0.06867800652980804, + "learning_rate": 1.9387184559673665e-05, + "loss": 0.523367166519165, + "step": 2253 + }, + { + "epoch": 0.41648670185123166, + "grad_norm": 0.06983886659145355, + "learning_rate": 1.9386496755756156e-05, + "loss": 0.6407635807991028, + "step": 2254 + }, + { + "epoch": 0.4166714785601275, + "grad_norm": 0.07012315839529037, + "learning_rate": 1.9385808578285445e-05, + "loss": 0.5064341425895691, + "step": 2255 + }, + { + "epoch": 0.4168562552690233, + "grad_norm": 0.08256127685308456, + "learning_rate": 1.9385120027288914e-05, + "loss": 0.7292260527610779, + "step": 2256 + }, + { + "epoch": 0.4170410319779192, + "grad_norm": 0.09244728088378906, + "learning_rate": 1.938443110279397e-05, + "loss": 0.9887353181838989, + "step": 2257 + }, + { + "epoch": 0.417225808686815, + "grad_norm": 0.07802069187164307, + "learning_rate": 1.9383741804828024e-05, + "loss": 0.7937911748886108, + "step": 2258 + }, + { + "epoch": 0.41741058539571085, + "grad_norm": 0.07037319988012314, + "learning_rate": 1.9383052133418517e-05, + "loss": 0.6395033001899719, + "step": 2259 + }, + { + "epoch": 0.41759536210460674, + "grad_norm": 0.0810866579413414, + "learning_rate": 1.938236208859289e-05, + "loss": 0.7780205011367798, + "step": 2260 + }, + { + "epoch": 0.41778013881350257, + "grad_norm": 0.07843460887670517, + "learning_rate": 1.93816716703786e-05, + "loss": 0.7478843927383423, + "step": 2261 + }, + { + "epoch": 0.4179649155223984, + "grad_norm": 0.07771844416856766, + "learning_rate": 1.9380980878803135e-05, + "loss": 0.7685114741325378, + "step": 2262 + }, + { + "epoch": 0.4181496922312942, + "grad_norm": 0.07894376665353775, + "learning_rate": 1.938028971389398e-05, + "loss": 0.5902146697044373, + "step": 2263 + }, + { + "epoch": 0.4183344689401901, + "grad_norm": 0.07636061310768127, + "learning_rate": 1.937959817567864e-05, + "loss": 0.6044233441352844, + "step": 2264 + }, + { + "epoch": 0.41851924564908594, + "grad_norm": 0.08153094351291656, + "learning_rate": 1.9378906264184638e-05, + "loss": 0.6065340042114258, + "step": 2265 + }, + { + "epoch": 0.41870402235798176, + "grad_norm": 0.07207811623811722, + "learning_rate": 1.937821397943951e-05, + "loss": 0.6241428852081299, + "step": 2266 + }, + { + "epoch": 0.4188887990668776, + "grad_norm": 0.07843715697526932, + "learning_rate": 1.9377521321470806e-05, + "loss": 0.5644761919975281, + "step": 2267 + }, + { + "epoch": 0.4190735757757735, + "grad_norm": 0.08663053065538406, + "learning_rate": 1.9376828290306093e-05, + "loss": 0.9684317708015442, + "step": 2268 + }, + { + "epoch": 0.4192583524846693, + "grad_norm": 0.0744810476899147, + "learning_rate": 1.9376134885972948e-05, + "loss": 0.6165178418159485, + "step": 2269 + }, + { + "epoch": 0.41944312919356513, + "grad_norm": 0.07540447264909744, + "learning_rate": 1.937544110849897e-05, + "loss": 0.5572347044944763, + "step": 2270 + }, + { + "epoch": 0.419627905902461, + "grad_norm": 0.06462650746107101, + "learning_rate": 1.9374746957911768e-05, + "loss": 0.6695753335952759, + "step": 2271 + }, + { + "epoch": 0.41981268261135685, + "grad_norm": 0.06049533188343048, + "learning_rate": 1.937405243423897e-05, + "loss": 0.5189021825790405, + "step": 2272 + }, + { + "epoch": 0.4199974593202527, + "grad_norm": 0.0741419643163681, + "learning_rate": 1.9373357537508205e-05, + "loss": 0.7069633603096008, + "step": 2273 + }, + { + "epoch": 0.4201822360291485, + "grad_norm": 0.062040168792009354, + "learning_rate": 1.937266226774714e-05, + "loss": 0.5386475920677185, + "step": 2274 + }, + { + "epoch": 0.4203670127380444, + "grad_norm": 0.08488454669713974, + "learning_rate": 1.937196662498344e-05, + "loss": 0.685762345790863, + "step": 2275 + }, + { + "epoch": 0.4205517894469402, + "grad_norm": 0.07908879220485687, + "learning_rate": 1.937127060924479e-05, + "loss": 0.7075814008712769, + "step": 2276 + }, + { + "epoch": 0.42073656615583604, + "grad_norm": 0.09031480550765991, + "learning_rate": 1.9370574220558888e-05, + "loss": 0.6897242665290833, + "step": 2277 + }, + { + "epoch": 0.4209213428647319, + "grad_norm": 0.07690101861953735, + "learning_rate": 1.936987745895345e-05, + "loss": 0.5031424760818481, + "step": 2278 + }, + { + "epoch": 0.42110611957362776, + "grad_norm": 0.06790696084499359, + "learning_rate": 1.9369180324456204e-05, + "loss": 0.6615981459617615, + "step": 2279 + }, + { + "epoch": 0.4212908962825236, + "grad_norm": 0.08125462383031845, + "learning_rate": 1.936848281709489e-05, + "loss": 0.8710426092147827, + "step": 2280 + }, + { + "epoch": 0.4214756729914194, + "grad_norm": 0.0631629079580307, + "learning_rate": 1.9367784936897272e-05, + "loss": 0.6070328950881958, + "step": 2281 + }, + { + "epoch": 0.4216604497003153, + "grad_norm": 0.0831427350640297, + "learning_rate": 1.9367086683891123e-05, + "loss": 0.703594446182251, + "step": 2282 + }, + { + "epoch": 0.4218452264092111, + "grad_norm": 0.0841081514954567, + "learning_rate": 1.936638805810423e-05, + "loss": 0.6820002198219299, + "step": 2283 + }, + { + "epoch": 0.42203000311810696, + "grad_norm": 0.07842420041561127, + "learning_rate": 1.9365689059564393e-05, + "loss": 0.6011175513267517, + "step": 2284 + }, + { + "epoch": 0.4222147798270028, + "grad_norm": 0.08695252239704132, + "learning_rate": 1.9364989688299432e-05, + "loss": 0.8149831295013428, + "step": 2285 + }, + { + "epoch": 0.42239955653589867, + "grad_norm": 0.08886296302080154, + "learning_rate": 1.9364289944337185e-05, + "loss": 0.6890162229537964, + "step": 2286 + }, + { + "epoch": 0.4225843332447945, + "grad_norm": 0.0779884085059166, + "learning_rate": 1.9363589827705494e-05, + "loss": 0.8665019273757935, + "step": 2287 + }, + { + "epoch": 0.4227691099536903, + "grad_norm": 0.08771474659442902, + "learning_rate": 1.936288933843222e-05, + "loss": 0.7957674860954285, + "step": 2288 + }, + { + "epoch": 0.42295388666258615, + "grad_norm": 0.08377643674612045, + "learning_rate": 1.936218847654525e-05, + "loss": 0.7866319417953491, + "step": 2289 + }, + { + "epoch": 0.42313866337148204, + "grad_norm": 0.06442653387784958, + "learning_rate": 1.936148724207246e-05, + "loss": 0.569203794002533, + "step": 2290 + }, + { + "epoch": 0.42332344008037787, + "grad_norm": 0.07990846037864685, + "learning_rate": 1.936078563504177e-05, + "loss": 0.6724473834037781, + "step": 2291 + }, + { + "epoch": 0.4235082167892737, + "grad_norm": 0.09859101474285126, + "learning_rate": 1.93600836554811e-05, + "loss": 0.7977859973907471, + "step": 2292 + }, + { + "epoch": 0.4236929934981696, + "grad_norm": 0.05321364849805832, + "learning_rate": 1.9359381303418384e-05, + "loss": 0.45695260167121887, + "step": 2293 + }, + { + "epoch": 0.4238777702070654, + "grad_norm": 0.07542157173156738, + "learning_rate": 1.9358678578881572e-05, + "loss": 0.6335441470146179, + "step": 2294 + }, + { + "epoch": 0.42406254691596124, + "grad_norm": 0.08657735586166382, + "learning_rate": 1.9357975481898634e-05, + "loss": 0.8366142511367798, + "step": 2295 + }, + { + "epoch": 0.42424732362485706, + "grad_norm": 0.05203680694103241, + "learning_rate": 1.9357272012497546e-05, + "loss": 0.38978737592697144, + "step": 2296 + }, + { + "epoch": 0.42443210033375295, + "grad_norm": 0.07493631541728973, + "learning_rate": 1.935656817070631e-05, + "loss": 0.5956997275352478, + "step": 2297 + }, + { + "epoch": 0.4246168770426488, + "grad_norm": 0.07352018356323242, + "learning_rate": 1.9355863956552933e-05, + "loss": 0.6605044603347778, + "step": 2298 + }, + { + "epoch": 0.4248016537515446, + "grad_norm": 0.06980215758085251, + "learning_rate": 1.935515937006544e-05, + "loss": 0.5341028571128845, + "step": 2299 + }, + { + "epoch": 0.42498643046044043, + "grad_norm": 0.07171224057674408, + "learning_rate": 1.9354454411271874e-05, + "loss": 0.6301398277282715, + "step": 2300 + }, + { + "epoch": 0.4251712071693363, + "grad_norm": 0.0617557093501091, + "learning_rate": 1.935374908020029e-05, + "loss": 0.40994107723236084, + "step": 2301 + }, + { + "epoch": 0.42535598387823215, + "grad_norm": 0.07506370544433594, + "learning_rate": 1.9353043376878755e-05, + "loss": 0.7342982292175293, + "step": 2302 + }, + { + "epoch": 0.425540760587128, + "grad_norm": 0.06689701974391937, + "learning_rate": 1.9352337301335355e-05, + "loss": 0.6360087394714355, + "step": 2303 + }, + { + "epoch": 0.42572553729602386, + "grad_norm": 0.08720137923955917, + "learning_rate": 1.9351630853598193e-05, + "loss": 0.8357270359992981, + "step": 2304 + }, + { + "epoch": 0.4259103140049197, + "grad_norm": 0.0771905854344368, + "learning_rate": 1.9350924033695378e-05, + "loss": 0.7509700655937195, + "step": 2305 + }, + { + "epoch": 0.4260950907138155, + "grad_norm": 0.06878393888473511, + "learning_rate": 1.935021684165504e-05, + "loss": 0.5679242610931396, + "step": 2306 + }, + { + "epoch": 0.42627986742271134, + "grad_norm": 0.08172270655632019, + "learning_rate": 1.9349509277505327e-05, + "loss": 0.6561329960823059, + "step": 2307 + }, + { + "epoch": 0.42646464413160723, + "grad_norm": 0.07850314676761627, + "learning_rate": 1.9348801341274395e-05, + "loss": 0.5804690718650818, + "step": 2308 + }, + { + "epoch": 0.42664942084050306, + "grad_norm": 0.07448265701532364, + "learning_rate": 1.934809303299042e-05, + "loss": 0.7353978753089905, + "step": 2309 + }, + { + "epoch": 0.4268341975493989, + "grad_norm": 0.0733490064740181, + "learning_rate": 1.934738435268159e-05, + "loss": 0.6897704005241394, + "step": 2310 + }, + { + "epoch": 0.4270189742582947, + "grad_norm": 0.08506204932928085, + "learning_rate": 1.93466753003761e-05, + "loss": 0.7046520709991455, + "step": 2311 + }, + { + "epoch": 0.4272037509671906, + "grad_norm": 0.06439055502414703, + "learning_rate": 1.934596587610218e-05, + "loss": 0.7001133561134338, + "step": 2312 + }, + { + "epoch": 0.4273885276760864, + "grad_norm": 0.07183575630187988, + "learning_rate": 1.934525607988806e-05, + "loss": 0.6664788722991943, + "step": 2313 + }, + { + "epoch": 0.42757330438498226, + "grad_norm": 0.07120703905820847, + "learning_rate": 1.934454591176198e-05, + "loss": 0.5988874435424805, + "step": 2314 + }, + { + "epoch": 0.42775808109387814, + "grad_norm": 0.0718512088060379, + "learning_rate": 1.9343835371752212e-05, + "loss": 0.8172309398651123, + "step": 2315 + }, + { + "epoch": 0.42794285780277397, + "grad_norm": 0.054339561611413956, + "learning_rate": 1.934312445988703e-05, + "loss": 0.4375886023044586, + "step": 2316 + }, + { + "epoch": 0.4281276345116698, + "grad_norm": 0.054011277854442596, + "learning_rate": 1.9342413176194724e-05, + "loss": 0.5350235104560852, + "step": 2317 + }, + { + "epoch": 0.4283124112205656, + "grad_norm": 0.05996915325522423, + "learning_rate": 1.93417015207036e-05, + "loss": 0.7294011116027832, + "step": 2318 + }, + { + "epoch": 0.4284971879294615, + "grad_norm": 0.06260991841554642, + "learning_rate": 1.9340989493441988e-05, + "loss": 0.6489644646644592, + "step": 2319 + }, + { + "epoch": 0.42868196463835734, + "grad_norm": 0.07388906925916672, + "learning_rate": 1.9340277094438213e-05, + "loss": 0.8232399225234985, + "step": 2320 + }, + { + "epoch": 0.42886674134725317, + "grad_norm": 0.06844634562730789, + "learning_rate": 1.933956432372063e-05, + "loss": 0.5962044596672058, + "step": 2321 + }, + { + "epoch": 0.429051518056149, + "grad_norm": 0.07142406702041626, + "learning_rate": 1.933885118131761e-05, + "loss": 0.5791578888893127, + "step": 2322 + }, + { + "epoch": 0.4292362947650449, + "grad_norm": 0.07448078691959381, + "learning_rate": 1.933813766725753e-05, + "loss": 0.6933003664016724, + "step": 2323 + }, + { + "epoch": 0.4294210714739407, + "grad_norm": 0.061442915350198746, + "learning_rate": 1.9337423781568788e-05, + "loss": 0.5783045291900635, + "step": 2324 + }, + { + "epoch": 0.42960584818283654, + "grad_norm": 0.07764607667922974, + "learning_rate": 1.933670952427979e-05, + "loss": 0.6919342279434204, + "step": 2325 + }, + { + "epoch": 0.4297906248917324, + "grad_norm": 0.05729961022734642, + "learning_rate": 1.9335994895418965e-05, + "loss": 0.45752331614494324, + "step": 2326 + }, + { + "epoch": 0.42997540160062825, + "grad_norm": 0.06795137375593185, + "learning_rate": 1.933527989501475e-05, + "loss": 0.6426984071731567, + "step": 2327 + }, + { + "epoch": 0.4301601783095241, + "grad_norm": 0.0695880651473999, + "learning_rate": 1.9334564523095603e-05, + "loss": 0.5382835268974304, + "step": 2328 + }, + { + "epoch": 0.4303449550184199, + "grad_norm": 0.08700678497552872, + "learning_rate": 1.933384877968999e-05, + "loss": 0.8012987375259399, + "step": 2329 + }, + { + "epoch": 0.4305297317273158, + "grad_norm": 0.08838114142417908, + "learning_rate": 1.9333132664826403e-05, + "loss": 0.8843043446540833, + "step": 2330 + }, + { + "epoch": 0.4307145084362116, + "grad_norm": 0.07206934690475464, + "learning_rate": 1.9332416178533327e-05, + "loss": 0.6823926568031311, + "step": 2331 + }, + { + "epoch": 0.43089928514510745, + "grad_norm": 0.061117593199014664, + "learning_rate": 1.9331699320839293e-05, + "loss": 0.5759217143058777, + "step": 2332 + }, + { + "epoch": 0.4310840618540033, + "grad_norm": 0.08535294234752655, + "learning_rate": 1.9330982091772817e-05, + "loss": 0.7633690237998962, + "step": 2333 + }, + { + "epoch": 0.43126883856289916, + "grad_norm": 0.07020317763090134, + "learning_rate": 1.9330264491362446e-05, + "loss": 0.5276690125465393, + "step": 2334 + }, + { + "epoch": 0.431453615271795, + "grad_norm": 0.060124464333057404, + "learning_rate": 1.932954651963674e-05, + "loss": 0.5451531410217285, + "step": 2335 + }, + { + "epoch": 0.4316383919806908, + "grad_norm": 0.06954493373632431, + "learning_rate": 1.932882817662427e-05, + "loss": 0.5817059874534607, + "step": 2336 + }, + { + "epoch": 0.4318231686895867, + "grad_norm": 0.0750768631696701, + "learning_rate": 1.9328109462353626e-05, + "loss": 0.5557680726051331, + "step": 2337 + }, + { + "epoch": 0.43200794539848253, + "grad_norm": 0.07473158091306686, + "learning_rate": 1.932739037685341e-05, + "loss": 0.646592378616333, + "step": 2338 + }, + { + "epoch": 0.43219272210737836, + "grad_norm": 0.061547454446554184, + "learning_rate": 1.9326670920152237e-05, + "loss": 0.6056718826293945, + "step": 2339 + }, + { + "epoch": 0.4323774988162742, + "grad_norm": 0.056639768183231354, + "learning_rate": 1.932595109227874e-05, + "loss": 0.4849189519882202, + "step": 2340 + }, + { + "epoch": 0.43256227552517007, + "grad_norm": 0.07089490443468094, + "learning_rate": 1.932523089326157e-05, + "loss": 0.6140329241752625, + "step": 2341 + }, + { + "epoch": 0.4327470522340659, + "grad_norm": 0.08315933495759964, + "learning_rate": 1.9324510323129383e-05, + "loss": 0.6899304389953613, + "step": 2342 + }, + { + "epoch": 0.4329318289429617, + "grad_norm": 0.07349039614200592, + "learning_rate": 1.932378938191086e-05, + "loss": 0.4938865005970001, + "step": 2343 + }, + { + "epoch": 0.43311660565185756, + "grad_norm": 0.06191008538007736, + "learning_rate": 1.9323068069634688e-05, + "loss": 0.44645974040031433, + "step": 2344 + }, + { + "epoch": 0.43330138236075344, + "grad_norm": 0.1034642830491066, + "learning_rate": 1.9322346386329575e-05, + "loss": 1.1280803680419922, + "step": 2345 + }, + { + "epoch": 0.43348615906964927, + "grad_norm": 0.09495116770267487, + "learning_rate": 1.932162433202424e-05, + "loss": 0.806296169757843, + "step": 2346 + }, + { + "epoch": 0.4336709357785451, + "grad_norm": 0.079786017537117, + "learning_rate": 1.932090190674742e-05, + "loss": 0.7048410773277283, + "step": 2347 + }, + { + "epoch": 0.433855712487441, + "grad_norm": 0.07984772324562073, + "learning_rate": 1.9320179110527867e-05, + "loss": 0.6196243166923523, + "step": 2348 + }, + { + "epoch": 0.4340404891963368, + "grad_norm": 0.07886778563261032, + "learning_rate": 1.9319455943394347e-05, + "loss": 0.65333491563797, + "step": 2349 + }, + { + "epoch": 0.43422526590523264, + "grad_norm": 0.07592529058456421, + "learning_rate": 1.9318732405375636e-05, + "loss": 0.7202848196029663, + "step": 2350 + }, + { + "epoch": 0.43441004261412847, + "grad_norm": 0.0713939517736435, + "learning_rate": 1.9318008496500528e-05, + "loss": 0.7466500997543335, + "step": 2351 + }, + { + "epoch": 0.43459481932302435, + "grad_norm": 0.05905028060078621, + "learning_rate": 1.9317284216797837e-05, + "loss": 0.5592542290687561, + "step": 2352 + }, + { + "epoch": 0.4347795960319202, + "grad_norm": 0.09187117964029312, + "learning_rate": 1.931655956629638e-05, + "loss": 0.7937484979629517, + "step": 2353 + }, + { + "epoch": 0.434964372740816, + "grad_norm": 0.07646917551755905, + "learning_rate": 1.9315834545025005e-05, + "loss": 0.6700643301010132, + "step": 2354 + }, + { + "epoch": 0.43514914944971184, + "grad_norm": 0.08851625770330429, + "learning_rate": 1.9315109153012557e-05, + "loss": 0.8922794461250305, + "step": 2355 + }, + { + "epoch": 0.4353339261586077, + "grad_norm": 0.08113475888967514, + "learning_rate": 1.931438339028791e-05, + "loss": 0.8182970285415649, + "step": 2356 + }, + { + "epoch": 0.43551870286750355, + "grad_norm": 0.08301424980163574, + "learning_rate": 1.9313657256879943e-05, + "loss": 0.7397076487541199, + "step": 2357 + }, + { + "epoch": 0.4357034795763994, + "grad_norm": 0.06106698885560036, + "learning_rate": 1.931293075281756e-05, + "loss": 0.4387102723121643, + "step": 2358 + }, + { + "epoch": 0.43588825628529526, + "grad_norm": 0.07855153828859329, + "learning_rate": 1.9312203878129664e-05, + "loss": 0.7661651968955994, + "step": 2359 + }, + { + "epoch": 0.4360730329941911, + "grad_norm": 0.0937490165233612, + "learning_rate": 1.931147663284519e-05, + "loss": 0.8062765002250671, + "step": 2360 + }, + { + "epoch": 0.4362578097030869, + "grad_norm": 0.0676034688949585, + "learning_rate": 1.931074901699308e-05, + "loss": 0.5919547080993652, + "step": 2361 + }, + { + "epoch": 0.43644258641198275, + "grad_norm": 0.07248328626155853, + "learning_rate": 1.9310021030602285e-05, + "loss": 0.6367123126983643, + "step": 2362 + }, + { + "epoch": 0.43662736312087863, + "grad_norm": 0.08319476991891861, + "learning_rate": 1.930929267370178e-05, + "loss": 0.778451681137085, + "step": 2363 + }, + { + "epoch": 0.43681213982977446, + "grad_norm": 0.06528496742248535, + "learning_rate": 1.9308563946320556e-05, + "loss": 0.66303551197052, + "step": 2364 + }, + { + "epoch": 0.4369969165386703, + "grad_norm": 0.07230069488286972, + "learning_rate": 1.930783484848761e-05, + "loss": 0.550811231136322, + "step": 2365 + }, + { + "epoch": 0.4371816932475661, + "grad_norm": 0.08370313793420792, + "learning_rate": 1.9307105380231952e-05, + "loss": 0.6972153186798096, + "step": 2366 + }, + { + "epoch": 0.437366469956462, + "grad_norm": 0.07506057620048523, + "learning_rate": 1.930637554158262e-05, + "loss": 0.5916268825531006, + "step": 2367 + }, + { + "epoch": 0.43755124666535783, + "grad_norm": 0.07940205186605453, + "learning_rate": 1.930564533256866e-05, + "loss": 0.7237229943275452, + "step": 2368 + }, + { + "epoch": 0.43773602337425366, + "grad_norm": 0.08555081486701965, + "learning_rate": 1.9304914753219126e-05, + "loss": 0.9042553901672363, + "step": 2369 + }, + { + "epoch": 0.43792080008314954, + "grad_norm": 0.06261549890041351, + "learning_rate": 1.93041838035631e-05, + "loss": 0.5432640314102173, + "step": 2370 + }, + { + "epoch": 0.43810557679204537, + "grad_norm": 0.08132058382034302, + "learning_rate": 1.9303452483629664e-05, + "loss": 0.7692998051643372, + "step": 2371 + }, + { + "epoch": 0.4382903535009412, + "grad_norm": 0.08701673150062561, + "learning_rate": 1.9302720793447927e-05, + "loss": 0.7004591822624207, + "step": 2372 + }, + { + "epoch": 0.438475130209837, + "grad_norm": 0.052568644285202026, + "learning_rate": 1.930198873304701e-05, + "loss": 0.4833621680736542, + "step": 2373 + }, + { + "epoch": 0.4386599069187329, + "grad_norm": 0.0768493190407753, + "learning_rate": 1.930125630245604e-05, + "loss": 0.5789093971252441, + "step": 2374 + }, + { + "epoch": 0.43884468362762874, + "grad_norm": 0.06829556077718735, + "learning_rate": 1.930052350170417e-05, + "loss": 0.6952472925186157, + "step": 2375 + }, + { + "epoch": 0.43902946033652457, + "grad_norm": 0.0838492289185524, + "learning_rate": 1.9299790330820563e-05, + "loss": 0.6650012731552124, + "step": 2376 + }, + { + "epoch": 0.4392142370454204, + "grad_norm": 0.06579825282096863, + "learning_rate": 1.9299056789834394e-05, + "loss": 0.6096588969230652, + "step": 2377 + }, + { + "epoch": 0.4393990137543163, + "grad_norm": 0.0709729790687561, + "learning_rate": 1.929832287877486e-05, + "loss": 0.508579432964325, + "step": 2378 + }, + { + "epoch": 0.4395837904632121, + "grad_norm": 0.06732675433158875, + "learning_rate": 1.9297588597671164e-05, + "loss": 0.5655678510665894, + "step": 2379 + }, + { + "epoch": 0.43976856717210794, + "grad_norm": 0.05913890525698662, + "learning_rate": 1.9296853946552532e-05, + "loss": 0.5239315629005432, + "step": 2380 + }, + { + "epoch": 0.4399533438810038, + "grad_norm": 0.06641931086778641, + "learning_rate": 1.92961189254482e-05, + "loss": 0.5648493766784668, + "step": 2381 + }, + { + "epoch": 0.44013812058989965, + "grad_norm": 0.0653366819024086, + "learning_rate": 1.9295383534387416e-05, + "loss": 0.6413757801055908, + "step": 2382 + }, + { + "epoch": 0.4403228972987955, + "grad_norm": 0.06733138859272003, + "learning_rate": 1.929464777339945e-05, + "loss": 0.4827044904232025, + "step": 2383 + }, + { + "epoch": 0.4405076740076913, + "grad_norm": 0.08121521770954132, + "learning_rate": 1.9293911642513585e-05, + "loss": 0.6802595257759094, + "step": 2384 + }, + { + "epoch": 0.4406924507165872, + "grad_norm": 0.06312023103237152, + "learning_rate": 1.9293175141759107e-05, + "loss": 0.5452331900596619, + "step": 2385 + }, + { + "epoch": 0.440877227425483, + "grad_norm": 0.08019667118787766, + "learning_rate": 1.9292438271165335e-05, + "loss": 0.7767255902290344, + "step": 2386 + }, + { + "epoch": 0.44106200413437885, + "grad_norm": 0.08663544803857803, + "learning_rate": 1.9291701030761597e-05, + "loss": 0.9065479636192322, + "step": 2387 + }, + { + "epoch": 0.44124678084327473, + "grad_norm": 0.07809998840093613, + "learning_rate": 1.9290963420577223e-05, + "loss": 0.7812683582305908, + "step": 2388 + }, + { + "epoch": 0.44143155755217056, + "grad_norm": 0.08127256482839584, + "learning_rate": 1.9290225440641574e-05, + "loss": 0.7572963237762451, + "step": 2389 + }, + { + "epoch": 0.4416163342610664, + "grad_norm": 0.06240149214863777, + "learning_rate": 1.9289487090984017e-05, + "loss": 0.5791239142417908, + "step": 2390 + }, + { + "epoch": 0.4418011109699622, + "grad_norm": 0.07807501405477524, + "learning_rate": 1.928874837163394e-05, + "loss": 0.6849839091300964, + "step": 2391 + }, + { + "epoch": 0.4419858876788581, + "grad_norm": 0.07311359792947769, + "learning_rate": 1.9288009282620736e-05, + "loss": 0.708427369594574, + "step": 2392 + }, + { + "epoch": 0.44217066438775393, + "grad_norm": 0.05785594880580902, + "learning_rate": 1.928726982397382e-05, + "loss": 0.5417725443840027, + "step": 2393 + }, + { + "epoch": 0.44235544109664976, + "grad_norm": 0.06097231060266495, + "learning_rate": 1.9286529995722624e-05, + "loss": 0.5638762712478638, + "step": 2394 + }, + { + "epoch": 0.4425402178055456, + "grad_norm": 0.07345853000879288, + "learning_rate": 1.9285789797896587e-05, + "loss": 0.5357245802879333, + "step": 2395 + }, + { + "epoch": 0.44272499451444147, + "grad_norm": 0.09665371477603912, + "learning_rate": 1.9285049230525166e-05, + "loss": 0.8695791959762573, + "step": 2396 + }, + { + "epoch": 0.4429097712233373, + "grad_norm": 0.06887251883745193, + "learning_rate": 1.928430829363784e-05, + "loss": 0.7601022124290466, + "step": 2397 + }, + { + "epoch": 0.44309454793223313, + "grad_norm": 0.09401489794254303, + "learning_rate": 1.9283566987264083e-05, + "loss": 0.9063498377799988, + "step": 2398 + }, + { + "epoch": 0.443279324641129, + "grad_norm": 0.08337516337633133, + "learning_rate": 1.9282825311433408e-05, + "loss": 0.6751862168312073, + "step": 2399 + }, + { + "epoch": 0.44346410135002484, + "grad_norm": 0.08896543830633163, + "learning_rate": 1.928208326617533e-05, + "loss": 0.6873741149902344, + "step": 2400 + }, + { + "epoch": 0.44364887805892067, + "grad_norm": 0.0696672722697258, + "learning_rate": 1.9281340851519373e-05, + "loss": 0.6347296237945557, + "step": 2401 + }, + { + "epoch": 0.4438336547678165, + "grad_norm": 0.05829437077045441, + "learning_rate": 1.928059806749509e-05, + "loss": 0.63972008228302, + "step": 2402 + }, + { + "epoch": 0.4440184314767124, + "grad_norm": 0.07010830193758011, + "learning_rate": 1.927985491413204e-05, + "loss": 0.6212910413742065, + "step": 2403 + }, + { + "epoch": 0.4442032081856082, + "grad_norm": 0.07966028898954391, + "learning_rate": 1.9279111391459797e-05, + "loss": 0.6606336236000061, + "step": 2404 + }, + { + "epoch": 0.44438798489450404, + "grad_norm": 0.0820903331041336, + "learning_rate": 1.927836749950795e-05, + "loss": 0.6804654002189636, + "step": 2405 + }, + { + "epoch": 0.44457276160339987, + "grad_norm": 0.08157593756914139, + "learning_rate": 1.927762323830611e-05, + "loss": 0.6042102575302124, + "step": 2406 + }, + { + "epoch": 0.44475753831229575, + "grad_norm": 0.07228659093379974, + "learning_rate": 1.9276878607883886e-05, + "loss": 0.6968677043914795, + "step": 2407 + }, + { + "epoch": 0.4449423150211916, + "grad_norm": 0.059456437826156616, + "learning_rate": 1.927613360827092e-05, + "loss": 0.5671892166137695, + "step": 2408 + }, + { + "epoch": 0.4451270917300874, + "grad_norm": 0.07476317882537842, + "learning_rate": 1.9275388239496854e-05, + "loss": 0.6846385598182678, + "step": 2409 + }, + { + "epoch": 0.4453118684389833, + "grad_norm": 0.06802140921354294, + "learning_rate": 1.9274642501591358e-05, + "loss": 0.42480891942977905, + "step": 2410 + }, + { + "epoch": 0.4454966451478791, + "grad_norm": 0.07595323771238327, + "learning_rate": 1.9273896394584103e-05, + "loss": 0.7496429681777954, + "step": 2411 + }, + { + "epoch": 0.44568142185677495, + "grad_norm": 0.06730681657791138, + "learning_rate": 1.927314991850479e-05, + "loss": 0.5631354451179504, + "step": 2412 + }, + { + "epoch": 0.4458661985656708, + "grad_norm": 0.09018740803003311, + "learning_rate": 1.927240307338312e-05, + "loss": 0.6622211933135986, + "step": 2413 + }, + { + "epoch": 0.44605097527456666, + "grad_norm": 0.05780932679772377, + "learning_rate": 1.927165585924882e-05, + "loss": 0.47575706243515015, + "step": 2414 + }, + { + "epoch": 0.4462357519834625, + "grad_norm": 0.07823736220598221, + "learning_rate": 1.9270908276131623e-05, + "loss": 0.7001676559448242, + "step": 2415 + }, + { + "epoch": 0.4464205286923583, + "grad_norm": 0.07646199315786362, + "learning_rate": 1.927016032406128e-05, + "loss": 0.7157690525054932, + "step": 2416 + }, + { + "epoch": 0.44660530540125415, + "grad_norm": 0.09840088337659836, + "learning_rate": 1.926941200306756e-05, + "loss": 0.8731129169464111, + "step": 2417 + }, + { + "epoch": 0.44679008211015003, + "grad_norm": 0.07829701900482178, + "learning_rate": 1.9268663313180244e-05, + "loss": 0.6255185008049011, + "step": 2418 + }, + { + "epoch": 0.44697485881904586, + "grad_norm": 0.08753079921007156, + "learning_rate": 1.9267914254429125e-05, + "loss": 0.8361132144927979, + "step": 2419 + }, + { + "epoch": 0.4471596355279417, + "grad_norm": 0.057597216218709946, + "learning_rate": 1.926716482684401e-05, + "loss": 0.5651022791862488, + "step": 2420 + }, + { + "epoch": 0.4473444122368376, + "grad_norm": 0.07024961709976196, + "learning_rate": 1.9266415030454734e-05, + "loss": 0.695307195186615, + "step": 2421 + }, + { + "epoch": 0.4475291889457334, + "grad_norm": 0.0742584764957428, + "learning_rate": 1.9265664865291128e-05, + "loss": 0.6812407374382019, + "step": 2422 + }, + { + "epoch": 0.44771396565462923, + "grad_norm": 0.06642390042543411, + "learning_rate": 1.9264914331383047e-05, + "loss": 0.5371487140655518, + "step": 2423 + }, + { + "epoch": 0.44789874236352506, + "grad_norm": 0.0593610443174839, + "learning_rate": 1.9264163428760366e-05, + "loss": 0.4845843017101288, + "step": 2424 + }, + { + "epoch": 0.44808351907242094, + "grad_norm": 0.08533383905887604, + "learning_rate": 1.9263412157452964e-05, + "loss": 0.6560807228088379, + "step": 2425 + }, + { + "epoch": 0.4482682957813168, + "grad_norm": 0.06368084996938705, + "learning_rate": 1.9262660517490735e-05, + "loss": 0.3997742831707001, + "step": 2426 + }, + { + "epoch": 0.4484530724902126, + "grad_norm": 0.06924302130937576, + "learning_rate": 1.9261908508903603e-05, + "loss": 0.4483805298805237, + "step": 2427 + }, + { + "epoch": 0.44863784919910843, + "grad_norm": 0.07959199696779251, + "learning_rate": 1.9261156131721485e-05, + "loss": 0.792576253414154, + "step": 2428 + }, + { + "epoch": 0.4488226259080043, + "grad_norm": 0.059455327689647675, + "learning_rate": 1.9260403385974328e-05, + "loss": 0.44581228494644165, + "step": 2429 + }, + { + "epoch": 0.44900740261690014, + "grad_norm": 0.07160536199808121, + "learning_rate": 1.9259650271692084e-05, + "loss": 0.745840311050415, + "step": 2430 + }, + { + "epoch": 0.44919217932579597, + "grad_norm": 0.07705884426832199, + "learning_rate": 1.9258896788904734e-05, + "loss": 0.8486708402633667, + "step": 2431 + }, + { + "epoch": 0.44937695603469185, + "grad_norm": 0.069585882127285, + "learning_rate": 1.925814293764226e-05, + "loss": 0.5814905762672424, + "step": 2432 + }, + { + "epoch": 0.4495617327435877, + "grad_norm": 0.07240708917379379, + "learning_rate": 1.925738871793466e-05, + "loss": 0.5621956586837769, + "step": 2433 + }, + { + "epoch": 0.4497465094524835, + "grad_norm": 0.0654572919011116, + "learning_rate": 1.9256634129811954e-05, + "loss": 0.5260647535324097, + "step": 2434 + }, + { + "epoch": 0.44993128616137934, + "grad_norm": 0.0821259617805481, + "learning_rate": 1.925587917330417e-05, + "loss": 0.5811529755592346, + "step": 2435 + }, + { + "epoch": 0.4501160628702752, + "grad_norm": 0.08183261007070541, + "learning_rate": 1.9255123848441347e-05, + "loss": 0.7321382164955139, + "step": 2436 + }, + { + "epoch": 0.45030083957917105, + "grad_norm": 0.0739772692322731, + "learning_rate": 1.925436815525356e-05, + "loss": 0.5492620468139648, + "step": 2437 + }, + { + "epoch": 0.4504856162880669, + "grad_norm": 0.08293473720550537, + "learning_rate": 1.9253612093770865e-05, + "loss": 0.7169588208198547, + "step": 2438 + }, + { + "epoch": 0.4506703929969627, + "grad_norm": 0.06674166768789291, + "learning_rate": 1.925285566402336e-05, + "loss": 0.693142294883728, + "step": 2439 + }, + { + "epoch": 0.4508551697058586, + "grad_norm": 0.07635101675987244, + "learning_rate": 1.9252098866041152e-05, + "loss": 0.5711791515350342, + "step": 2440 + }, + { + "epoch": 0.4510399464147544, + "grad_norm": 0.0930107980966568, + "learning_rate": 1.9251341699854354e-05, + "loss": 0.7992475032806396, + "step": 2441 + }, + { + "epoch": 0.45122472312365025, + "grad_norm": 0.08799722790718079, + "learning_rate": 1.9250584165493102e-05, + "loss": 0.8901659250259399, + "step": 2442 + }, + { + "epoch": 0.45140949983254613, + "grad_norm": 0.074408158659935, + "learning_rate": 1.924982626298754e-05, + "loss": 0.7303105592727661, + "step": 2443 + }, + { + "epoch": 0.45159427654144196, + "grad_norm": 0.08466403186321259, + "learning_rate": 1.924906799236783e-05, + "loss": 0.7405203580856323, + "step": 2444 + }, + { + "epoch": 0.4517790532503378, + "grad_norm": 0.06970347464084625, + "learning_rate": 1.924830935366415e-05, + "loss": 0.6607108116149902, + "step": 2445 + }, + { + "epoch": 0.4519638299592336, + "grad_norm": 0.0749702900648117, + "learning_rate": 1.9247550346906692e-05, + "loss": 0.7693953514099121, + "step": 2446 + }, + { + "epoch": 0.4521486066681295, + "grad_norm": 0.09294398874044418, + "learning_rate": 1.924679097212567e-05, + "loss": 0.8226107954978943, + "step": 2447 + }, + { + "epoch": 0.45233338337702533, + "grad_norm": 0.06877472251653671, + "learning_rate": 1.9246031229351287e-05, + "loss": 0.6727323532104492, + "step": 2448 + }, + { + "epoch": 0.45251816008592116, + "grad_norm": 0.06864230334758759, + "learning_rate": 1.9245271118613792e-05, + "loss": 0.6496172547340393, + "step": 2449 + }, + { + "epoch": 0.452702936794817, + "grad_norm": 0.06905477494001389, + "learning_rate": 1.9244510639943433e-05, + "loss": 0.46779608726501465, + "step": 2450 + }, + { + "epoch": 0.4528877135037129, + "grad_norm": 0.06438469886779785, + "learning_rate": 1.924374979337047e-05, + "loss": 0.5262128710746765, + "step": 2451 + }, + { + "epoch": 0.4530724902126087, + "grad_norm": 0.06250059604644775, + "learning_rate": 1.9242988578925185e-05, + "loss": 0.4987810552120209, + "step": 2452 + }, + { + "epoch": 0.45325726692150453, + "grad_norm": 0.06168653443455696, + "learning_rate": 1.9242226996637873e-05, + "loss": 0.5915943384170532, + "step": 2453 + }, + { + "epoch": 0.4534420436304004, + "grad_norm": 0.07355090975761414, + "learning_rate": 1.9241465046538843e-05, + "loss": 0.7472153902053833, + "step": 2454 + }, + { + "epoch": 0.45362682033929624, + "grad_norm": 0.06379469484090805, + "learning_rate": 1.9240702728658415e-05, + "loss": 0.5391486287117004, + "step": 2455 + }, + { + "epoch": 0.4538115970481921, + "grad_norm": 0.06752083450555801, + "learning_rate": 1.923994004302693e-05, + "loss": 0.6245964765548706, + "step": 2456 + }, + { + "epoch": 0.4539963737570879, + "grad_norm": 0.07804492115974426, + "learning_rate": 1.923917698967474e-05, + "loss": 0.6221499443054199, + "step": 2457 + }, + { + "epoch": 0.4541811504659838, + "grad_norm": 0.07160136848688126, + "learning_rate": 1.923841356863221e-05, + "loss": 0.8083291053771973, + "step": 2458 + }, + { + "epoch": 0.4543659271748796, + "grad_norm": 0.08072566241025925, + "learning_rate": 1.9237649779929724e-05, + "loss": 0.9119392037391663, + "step": 2459 + }, + { + "epoch": 0.45455070388377544, + "grad_norm": 0.07465825974941254, + "learning_rate": 1.9236885623597678e-05, + "loss": 0.6454877853393555, + "step": 2460 + }, + { + "epoch": 0.45473548059267127, + "grad_norm": 0.07917649298906326, + "learning_rate": 1.9236121099666482e-05, + "loss": 0.7718840837478638, + "step": 2461 + }, + { + "epoch": 0.45492025730156715, + "grad_norm": 0.07406383752822876, + "learning_rate": 1.923535620816656e-05, + "loss": 0.633132815361023, + "step": 2462 + }, + { + "epoch": 0.455105034010463, + "grad_norm": 0.08593975752592087, + "learning_rate": 1.923459094912836e-05, + "loss": 0.7576243877410889, + "step": 2463 + }, + { + "epoch": 0.4552898107193588, + "grad_norm": 0.07461173832416534, + "learning_rate": 1.923382532258233e-05, + "loss": 0.6283621191978455, + "step": 2464 + }, + { + "epoch": 0.4554745874282547, + "grad_norm": 0.0630280002951622, + "learning_rate": 1.9233059328558942e-05, + "loss": 0.4881044328212738, + "step": 2465 + }, + { + "epoch": 0.4556593641371505, + "grad_norm": 0.06719866394996643, + "learning_rate": 1.9232292967088673e-05, + "loss": 0.6233997941017151, + "step": 2466 + }, + { + "epoch": 0.45584414084604635, + "grad_norm": 0.07608038932085037, + "learning_rate": 1.9231526238202034e-05, + "loss": 0.7199850082397461, + "step": 2467 + }, + { + "epoch": 0.4560289175549422, + "grad_norm": 0.07195864617824554, + "learning_rate": 1.923075914192953e-05, + "loss": 0.6179254055023193, + "step": 2468 + }, + { + "epoch": 0.45621369426383807, + "grad_norm": 0.07848989963531494, + "learning_rate": 1.9229991678301693e-05, + "loss": 0.8123495578765869, + "step": 2469 + }, + { + "epoch": 0.4563984709727339, + "grad_norm": 0.06677235662937164, + "learning_rate": 1.9229223847349062e-05, + "loss": 0.7239199280738831, + "step": 2470 + }, + { + "epoch": 0.4565832476816297, + "grad_norm": 0.06927766650915146, + "learning_rate": 1.92284556491022e-05, + "loss": 0.664320707321167, + "step": 2471 + }, + { + "epoch": 0.45676802439052555, + "grad_norm": 0.0737115889787674, + "learning_rate": 1.922768708359167e-05, + "loss": 0.6365139484405518, + "step": 2472 + }, + { + "epoch": 0.45695280109942144, + "grad_norm": 0.09078367799520493, + "learning_rate": 1.9226918150848067e-05, + "loss": 0.7377526164054871, + "step": 2473 + }, + { + "epoch": 0.45713757780831726, + "grad_norm": 0.06729824095964432, + "learning_rate": 1.922614885090199e-05, + "loss": 0.6383175253868103, + "step": 2474 + }, + { + "epoch": 0.4573223545172131, + "grad_norm": 0.08023769408464432, + "learning_rate": 1.9225379183784052e-05, + "loss": 0.6124799251556396, + "step": 2475 + }, + { + "epoch": 0.457507131226109, + "grad_norm": 0.08397477865219116, + "learning_rate": 1.9224609149524887e-05, + "loss": 0.9499881267547607, + "step": 2476 + }, + { + "epoch": 0.4576919079350048, + "grad_norm": 0.08595887571573257, + "learning_rate": 1.9223838748155133e-05, + "loss": 0.6891126036643982, + "step": 2477 + }, + { + "epoch": 0.45787668464390063, + "grad_norm": 0.0752493292093277, + "learning_rate": 1.922306797970546e-05, + "loss": 0.6955389380455017, + "step": 2478 + }, + { + "epoch": 0.45806146135279646, + "grad_norm": 0.07774579524993896, + "learning_rate": 1.9222296844206533e-05, + "loss": 0.7269667983055115, + "step": 2479 + }, + { + "epoch": 0.45824623806169235, + "grad_norm": 0.07733689248561859, + "learning_rate": 1.922152534168905e-05, + "loss": 0.9509046077728271, + "step": 2480 + }, + { + "epoch": 0.4584310147705882, + "grad_norm": 0.060073934495449066, + "learning_rate": 1.9220753472183702e-05, + "loss": 0.5735282897949219, + "step": 2481 + }, + { + "epoch": 0.458615791479484, + "grad_norm": 0.08871357142925262, + "learning_rate": 1.9219981235721216e-05, + "loss": 0.7650564908981323, + "step": 2482 + }, + { + "epoch": 0.45880056818837983, + "grad_norm": 0.06762327253818512, + "learning_rate": 1.9219208632332324e-05, + "loss": 0.6746568083763123, + "step": 2483 + }, + { + "epoch": 0.4589853448972757, + "grad_norm": 0.0836707055568695, + "learning_rate": 1.921843566204777e-05, + "loss": 0.6644420027732849, + "step": 2484 + }, + { + "epoch": 0.45917012160617154, + "grad_norm": 0.05917372927069664, + "learning_rate": 1.9217662324898318e-05, + "loss": 0.49475622177124023, + "step": 2485 + }, + { + "epoch": 0.4593548983150674, + "grad_norm": 0.06416773051023483, + "learning_rate": 1.9216888620914743e-05, + "loss": 0.6015645265579224, + "step": 2486 + }, + { + "epoch": 0.45953967502396326, + "grad_norm": 0.08696015924215317, + "learning_rate": 1.921611455012784e-05, + "loss": 0.9279436469078064, + "step": 2487 + }, + { + "epoch": 0.4597244517328591, + "grad_norm": 0.08332741260528564, + "learning_rate": 1.9215340112568407e-05, + "loss": 0.7063831090927124, + "step": 2488 + }, + { + "epoch": 0.4599092284417549, + "grad_norm": 0.07548101246356964, + "learning_rate": 1.921456530826727e-05, + "loss": 0.674170196056366, + "step": 2489 + }, + { + "epoch": 0.46009400515065074, + "grad_norm": 0.06416136771440506, + "learning_rate": 1.9213790137255267e-05, + "loss": 0.5054815411567688, + "step": 2490 + }, + { + "epoch": 0.4602787818595466, + "grad_norm": 0.08905654400587082, + "learning_rate": 1.9213014599563238e-05, + "loss": 0.8949898481369019, + "step": 2491 + }, + { + "epoch": 0.46046355856844245, + "grad_norm": 0.08505464345216751, + "learning_rate": 1.9212238695222054e-05, + "loss": 0.7196219563484192, + "step": 2492 + }, + { + "epoch": 0.4606483352773383, + "grad_norm": 0.0721084251999855, + "learning_rate": 1.921146242426259e-05, + "loss": 0.567509651184082, + "step": 2493 + }, + { + "epoch": 0.4608331119862341, + "grad_norm": 0.08344163745641708, + "learning_rate": 1.921068578671574e-05, + "loss": 0.7773234844207764, + "step": 2494 + }, + { + "epoch": 0.46101788869513, + "grad_norm": 0.07499945163726807, + "learning_rate": 1.9209908782612415e-05, + "loss": 0.726072371006012, + "step": 2495 + }, + { + "epoch": 0.4612026654040258, + "grad_norm": 0.07331238687038422, + "learning_rate": 1.920913141198353e-05, + "loss": 0.6004643440246582, + "step": 2496 + }, + { + "epoch": 0.46138744211292165, + "grad_norm": 0.0997093915939331, + "learning_rate": 1.9208353674860028e-05, + "loss": 1.0000945329666138, + "step": 2497 + }, + { + "epoch": 0.46157221882181754, + "grad_norm": 0.09368643164634705, + "learning_rate": 1.920757557127286e-05, + "loss": 0.8160755038261414, + "step": 2498 + }, + { + "epoch": 0.46175699553071337, + "grad_norm": 0.07633251696825027, + "learning_rate": 1.9206797101252993e-05, + "loss": 0.7055872678756714, + "step": 2499 + }, + { + "epoch": 0.4619417722396092, + "grad_norm": 0.06902278959751129, + "learning_rate": 1.92060182648314e-05, + "loss": 0.667657196521759, + "step": 2500 + }, + { + "epoch": 0.4619417722396092, + "eval_loss": 0.7116170525550842, + "eval_runtime": 157.2484, + "eval_samples_per_second": 115.925, + "eval_steps_per_second": 14.493, + "step": 2500 + }, + { + "epoch": 0.462126548948505, + "grad_norm": 0.06343262642621994, + "learning_rate": 1.920523906203909e-05, + "loss": 0.5842658281326294, + "step": 2501 + }, + { + "epoch": 0.4623113256574009, + "grad_norm": 0.06518920511007309, + "learning_rate": 1.920445949290706e-05, + "loss": 0.6732863187789917, + "step": 2502 + }, + { + "epoch": 0.46249610236629674, + "grad_norm": 0.06654220074415207, + "learning_rate": 1.9203679557466338e-05, + "loss": 0.6887181997299194, + "step": 2503 + }, + { + "epoch": 0.46268087907519256, + "grad_norm": 0.07807012647390366, + "learning_rate": 1.9202899255747967e-05, + "loss": 0.6344214677810669, + "step": 2504 + }, + { + "epoch": 0.4628656557840884, + "grad_norm": 0.06473081558942795, + "learning_rate": 1.9202118587782996e-05, + "loss": 0.734042763710022, + "step": 2505 + }, + { + "epoch": 0.4630504324929843, + "grad_norm": 0.08167917281389236, + "learning_rate": 1.9201337553602496e-05, + "loss": 0.867456316947937, + "step": 2506 + }, + { + "epoch": 0.4632352092018801, + "grad_norm": 0.0646093562245369, + "learning_rate": 1.9200556153237547e-05, + "loss": 0.46739864349365234, + "step": 2507 + }, + { + "epoch": 0.46341998591077593, + "grad_norm": 0.06381334364414215, + "learning_rate": 1.919977438671925e-05, + "loss": 0.5306061506271362, + "step": 2508 + }, + { + "epoch": 0.4636047626196718, + "grad_norm": 0.07060441374778748, + "learning_rate": 1.9198992254078715e-05, + "loss": 0.7806938290596008, + "step": 2509 + }, + { + "epoch": 0.46378953932856765, + "grad_norm": 0.07286828011274338, + "learning_rate": 1.9198209755347065e-05, + "loss": 0.7457721829414368, + "step": 2510 + }, + { + "epoch": 0.4639743160374635, + "grad_norm": 0.0571209117770195, + "learning_rate": 1.919742689055545e-05, + "loss": 0.5006831884384155, + "step": 2511 + }, + { + "epoch": 0.4641590927463593, + "grad_norm": 0.07706863433122635, + "learning_rate": 1.9196643659735016e-05, + "loss": 0.6784906983375549, + "step": 2512 + }, + { + "epoch": 0.4643438694552552, + "grad_norm": 0.06896209716796875, + "learning_rate": 1.9195860062916937e-05, + "loss": 0.721760094165802, + "step": 2513 + }, + { + "epoch": 0.464528646164151, + "grad_norm": 0.07125797867774963, + "learning_rate": 1.91950761001324e-05, + "loss": 0.6727914810180664, + "step": 2514 + }, + { + "epoch": 0.46471342287304684, + "grad_norm": 0.08965711295604706, + "learning_rate": 1.9194291771412596e-05, + "loss": 0.8182405233383179, + "step": 2515 + }, + { + "epoch": 0.4648981995819427, + "grad_norm": 0.046130772680044174, + "learning_rate": 1.919350707678875e-05, + "loss": 0.4636189341545105, + "step": 2516 + }, + { + "epoch": 0.46508297629083856, + "grad_norm": 0.09742356091737747, + "learning_rate": 1.9192722016292086e-05, + "loss": 0.7898565530776978, + "step": 2517 + }, + { + "epoch": 0.4652677529997344, + "grad_norm": 0.08445709943771362, + "learning_rate": 1.919193658995384e-05, + "loss": 0.7934099435806274, + "step": 2518 + }, + { + "epoch": 0.4654525297086302, + "grad_norm": 0.07222909480333328, + "learning_rate": 1.9191150797805283e-05, + "loss": 0.6173120737075806, + "step": 2519 + }, + { + "epoch": 0.4656373064175261, + "grad_norm": 0.08419971913099289, + "learning_rate": 1.9190364639877674e-05, + "loss": 0.6999402046203613, + "step": 2520 + }, + { + "epoch": 0.4658220831264219, + "grad_norm": 0.09709232300519943, + "learning_rate": 1.918957811620231e-05, + "loss": 0.7624440789222717, + "step": 2521 + }, + { + "epoch": 0.46600685983531775, + "grad_norm": 0.07338286936283112, + "learning_rate": 1.918879122681048e-05, + "loss": 0.6639955639839172, + "step": 2522 + }, + { + "epoch": 0.4661916365442136, + "grad_norm": 0.07687751948833466, + "learning_rate": 1.9188003971733515e-05, + "loss": 0.8092284798622131, + "step": 2523 + }, + { + "epoch": 0.46637641325310947, + "grad_norm": 0.07229873538017273, + "learning_rate": 1.9187216351002734e-05, + "loss": 0.5076743960380554, + "step": 2524 + }, + { + "epoch": 0.4665611899620053, + "grad_norm": 0.06916353851556778, + "learning_rate": 1.9186428364649486e-05, + "loss": 0.732584536075592, + "step": 2525 + }, + { + "epoch": 0.4667459666709011, + "grad_norm": 0.0701894760131836, + "learning_rate": 1.9185640012705133e-05, + "loss": 0.724190354347229, + "step": 2526 + }, + { + "epoch": 0.46693074337979695, + "grad_norm": 0.07493194937705994, + "learning_rate": 1.918485129520104e-05, + "loss": 0.69194495677948, + "step": 2527 + }, + { + "epoch": 0.46711552008869284, + "grad_norm": 0.08086828887462616, + "learning_rate": 1.9184062212168605e-05, + "loss": 0.7124866843223572, + "step": 2528 + }, + { + "epoch": 0.46730029679758867, + "grad_norm": 0.07317820936441422, + "learning_rate": 1.9183272763639223e-05, + "loss": 0.695803165435791, + "step": 2529 + }, + { + "epoch": 0.4674850735064845, + "grad_norm": 0.07890819758176804, + "learning_rate": 1.918248294964432e-05, + "loss": 0.5504632592201233, + "step": 2530 + }, + { + "epoch": 0.4676698502153804, + "grad_norm": 0.07478110492229462, + "learning_rate": 1.9181692770215324e-05, + "loss": 0.7427833080291748, + "step": 2531 + }, + { + "epoch": 0.4678546269242762, + "grad_norm": 0.0800105631351471, + "learning_rate": 1.9180902225383677e-05, + "loss": 0.755503237247467, + "step": 2532 + }, + { + "epoch": 0.46803940363317204, + "grad_norm": 0.06926467269659042, + "learning_rate": 1.918011131518085e-05, + "loss": 0.5626682043075562, + "step": 2533 + }, + { + "epoch": 0.46822418034206786, + "grad_norm": 0.0702790766954422, + "learning_rate": 1.917932003963831e-05, + "loss": 0.7690215110778809, + "step": 2534 + }, + { + "epoch": 0.46840895705096375, + "grad_norm": 0.08173204958438873, + "learning_rate": 1.9178528398787553e-05, + "loss": 0.7654154896736145, + "step": 2535 + }, + { + "epoch": 0.4685937337598596, + "grad_norm": 0.08061196655035019, + "learning_rate": 1.917773639266008e-05, + "loss": 0.7449684739112854, + "step": 2536 + }, + { + "epoch": 0.4687785104687554, + "grad_norm": 0.06667713820934296, + "learning_rate": 1.917694402128741e-05, + "loss": 0.5902036428451538, + "step": 2537 + }, + { + "epoch": 0.46896328717765123, + "grad_norm": 0.08062142133712769, + "learning_rate": 1.9176151284701086e-05, + "loss": 0.7364373207092285, + "step": 2538 + }, + { + "epoch": 0.4691480638865471, + "grad_norm": 0.06273316591978073, + "learning_rate": 1.9175358182932643e-05, + "loss": 0.522224485874176, + "step": 2539 + }, + { + "epoch": 0.46933284059544295, + "grad_norm": 0.06959912925958633, + "learning_rate": 1.9174564716013653e-05, + "loss": 0.5129040479660034, + "step": 2540 + }, + { + "epoch": 0.4695176173043388, + "grad_norm": 0.07042738795280457, + "learning_rate": 1.917377088397569e-05, + "loss": 0.7187867164611816, + "step": 2541 + }, + { + "epoch": 0.46970239401323466, + "grad_norm": 0.08257179707288742, + "learning_rate": 1.9172976686850345e-05, + "loss": 0.8150436878204346, + "step": 2542 + }, + { + "epoch": 0.4698871707221305, + "grad_norm": 0.07191328704357147, + "learning_rate": 1.917218212466923e-05, + "loss": 0.5697478652000427, + "step": 2543 + }, + { + "epoch": 0.4700719474310263, + "grad_norm": 0.05890669673681259, + "learning_rate": 1.917138719746396e-05, + "loss": 0.4980667531490326, + "step": 2544 + }, + { + "epoch": 0.47025672413992214, + "grad_norm": 0.08073609322309494, + "learning_rate": 1.9170591905266176e-05, + "loss": 0.6568699479103088, + "step": 2545 + }, + { + "epoch": 0.47044150084881803, + "grad_norm": 0.06748133897781372, + "learning_rate": 1.9169796248107524e-05, + "loss": 0.6333909034729004, + "step": 2546 + }, + { + "epoch": 0.47062627755771386, + "grad_norm": 0.07974343746900558, + "learning_rate": 1.916900022601967e-05, + "loss": 0.7419365048408508, + "step": 2547 + }, + { + "epoch": 0.4708110542666097, + "grad_norm": 0.08520668745040894, + "learning_rate": 1.9168203839034292e-05, + "loss": 0.6401491761207581, + "step": 2548 + }, + { + "epoch": 0.4709958309755055, + "grad_norm": 0.061686594039201736, + "learning_rate": 1.9167407087183087e-05, + "loss": 0.573703944683075, + "step": 2549 + }, + { + "epoch": 0.4711806076844014, + "grad_norm": 0.09480550140142441, + "learning_rate": 1.9166609970497755e-05, + "loss": 0.8099426031112671, + "step": 2550 + }, + { + "epoch": 0.4713653843932972, + "grad_norm": 0.07680436223745346, + "learning_rate": 1.916581248901003e-05, + "loss": 0.6837723851203918, + "step": 2551 + }, + { + "epoch": 0.47155016110219306, + "grad_norm": 0.08173999935388565, + "learning_rate": 1.9165014642751645e-05, + "loss": 0.8117296695709229, + "step": 2552 + }, + { + "epoch": 0.47173493781108894, + "grad_norm": 0.0909331738948822, + "learning_rate": 1.916421643175435e-05, + "loss": 0.8888765573501587, + "step": 2553 + }, + { + "epoch": 0.47191971451998477, + "grad_norm": 0.08059228211641312, + "learning_rate": 1.916341785604991e-05, + "loss": 0.7117865681648254, + "step": 2554 + }, + { + "epoch": 0.4721044912288806, + "grad_norm": 0.07878051698207855, + "learning_rate": 1.9162618915670112e-05, + "loss": 0.7551975846290588, + "step": 2555 + }, + { + "epoch": 0.4722892679377764, + "grad_norm": 0.06273041665554047, + "learning_rate": 1.916181961064674e-05, + "loss": 0.4910038113594055, + "step": 2556 + }, + { + "epoch": 0.4724740446466723, + "grad_norm": 0.07543109357357025, + "learning_rate": 1.916101994101162e-05, + "loss": 0.6243723630905151, + "step": 2557 + }, + { + "epoch": 0.47265882135556814, + "grad_norm": 0.0721454918384552, + "learning_rate": 1.916021990679656e-05, + "loss": 0.5600568056106567, + "step": 2558 + }, + { + "epoch": 0.47284359806446397, + "grad_norm": 0.08377529680728912, + "learning_rate": 1.915941950803341e-05, + "loss": 0.7743291258811951, + "step": 2559 + }, + { + "epoch": 0.4730283747733598, + "grad_norm": 0.08089539408683777, + "learning_rate": 1.915861874475402e-05, + "loss": 0.5809829235076904, + "step": 2560 + }, + { + "epoch": 0.4732131514822557, + "grad_norm": 0.07542770355939865, + "learning_rate": 1.915781761699026e-05, + "loss": 0.6139705777168274, + "step": 2561 + }, + { + "epoch": 0.4733979281911515, + "grad_norm": 0.09735579043626785, + "learning_rate": 1.9157016124774004e-05, + "loss": 0.5442507266998291, + "step": 2562 + }, + { + "epoch": 0.47358270490004734, + "grad_norm": 0.06749841570854187, + "learning_rate": 1.915621426813716e-05, + "loss": 0.6063088774681091, + "step": 2563 + }, + { + "epoch": 0.4737674816089432, + "grad_norm": 0.07153279334306717, + "learning_rate": 1.915541204711163e-05, + "loss": 0.6011493802070618, + "step": 2564 + }, + { + "epoch": 0.47395225831783905, + "grad_norm": 0.07286897301673889, + "learning_rate": 1.9154609461729348e-05, + "loss": 0.7002130150794983, + "step": 2565 + }, + { + "epoch": 0.4741370350267349, + "grad_norm": 0.0901065319776535, + "learning_rate": 1.9153806512022248e-05, + "loss": 0.8844193816184998, + "step": 2566 + }, + { + "epoch": 0.4743218117356307, + "grad_norm": 0.06940233707427979, + "learning_rate": 1.9153003198022286e-05, + "loss": 0.6113611459732056, + "step": 2567 + }, + { + "epoch": 0.4745065884445266, + "grad_norm": 0.07977228611707687, + "learning_rate": 1.9152199519761436e-05, + "loss": 0.7797371745109558, + "step": 2568 + }, + { + "epoch": 0.4746913651534224, + "grad_norm": 0.0750238448381424, + "learning_rate": 1.9151395477271675e-05, + "loss": 0.5979514718055725, + "step": 2569 + }, + { + "epoch": 0.47487614186231825, + "grad_norm": 0.08806634694337845, + "learning_rate": 1.9150591070585012e-05, + "loss": 0.7285597324371338, + "step": 2570 + }, + { + "epoch": 0.4750609185712141, + "grad_norm": 0.09475602209568024, + "learning_rate": 1.914978629973345e-05, + "loss": 0.7155942916870117, + "step": 2571 + }, + { + "epoch": 0.47524569528010996, + "grad_norm": 0.07977049797773361, + "learning_rate": 1.9148981164749013e-05, + "loss": 0.6524484753608704, + "step": 2572 + }, + { + "epoch": 0.4754304719890058, + "grad_norm": 0.09101420640945435, + "learning_rate": 1.9148175665663755e-05, + "loss": 1.1290339231491089, + "step": 2573 + }, + { + "epoch": 0.4756152486979016, + "grad_norm": 0.09440110623836517, + "learning_rate": 1.9147369802509725e-05, + "loss": 0.9461732506752014, + "step": 2574 + }, + { + "epoch": 0.4758000254067975, + "grad_norm": 0.08689321577548981, + "learning_rate": 1.9146563575318997e-05, + "loss": 0.7111562490463257, + "step": 2575 + }, + { + "epoch": 0.47598480211569333, + "grad_norm": 0.07283070683479309, + "learning_rate": 1.9145756984123653e-05, + "loss": 0.6966633796691895, + "step": 2576 + }, + { + "epoch": 0.47616957882458916, + "grad_norm": 0.07687104493379593, + "learning_rate": 1.9144950028955795e-05, + "loss": 0.7655548453330994, + "step": 2577 + }, + { + "epoch": 0.476354355533485, + "grad_norm": 0.07238690555095673, + "learning_rate": 1.914414270984754e-05, + "loss": 0.7400521636009216, + "step": 2578 + }, + { + "epoch": 0.47653913224238087, + "grad_norm": 0.0622221976518631, + "learning_rate": 1.9143335026831008e-05, + "loss": 0.4354769289493561, + "step": 2579 + }, + { + "epoch": 0.4767239089512767, + "grad_norm": 0.06557375937700272, + "learning_rate": 1.914252697993835e-05, + "loss": 0.505308210849762, + "step": 2580 + }, + { + "epoch": 0.4769086856601725, + "grad_norm": 0.08358443528413773, + "learning_rate": 1.914171856920172e-05, + "loss": 0.716081440448761, + "step": 2581 + }, + { + "epoch": 0.47709346236906836, + "grad_norm": 0.060606375336647034, + "learning_rate": 1.9140909794653293e-05, + "loss": 0.4815409481525421, + "step": 2582 + }, + { + "epoch": 0.47727823907796424, + "grad_norm": 0.06997659057378769, + "learning_rate": 1.9140100656325254e-05, + "loss": 0.6048935055732727, + "step": 2583 + }, + { + "epoch": 0.47746301578686007, + "grad_norm": 0.07896342128515244, + "learning_rate": 1.9139291154249802e-05, + "loss": 0.5597572922706604, + "step": 2584 + }, + { + "epoch": 0.4776477924957559, + "grad_norm": 0.05853699892759323, + "learning_rate": 1.9138481288459162e-05, + "loss": 0.4828832447528839, + "step": 2585 + }, + { + "epoch": 0.4778325692046518, + "grad_norm": 0.05594884976744652, + "learning_rate": 1.9137671058985554e-05, + "loss": 0.4123653173446655, + "step": 2586 + }, + { + "epoch": 0.4780173459135476, + "grad_norm": 0.06755481660366058, + "learning_rate": 1.9136860465861223e-05, + "loss": 0.4792831838130951, + "step": 2587 + }, + { + "epoch": 0.47820212262244344, + "grad_norm": 0.07026038318872452, + "learning_rate": 1.9136049509118435e-05, + "loss": 0.6120736598968506, + "step": 2588 + }, + { + "epoch": 0.47838689933133927, + "grad_norm": 0.06810068339109421, + "learning_rate": 1.9135238188789458e-05, + "loss": 0.5771077275276184, + "step": 2589 + }, + { + "epoch": 0.47857167604023515, + "grad_norm": 0.06867280602455139, + "learning_rate": 1.9134426504906584e-05, + "loss": 0.6913584470748901, + "step": 2590 + }, + { + "epoch": 0.478756452749131, + "grad_norm": 0.08671563118696213, + "learning_rate": 1.9133614457502106e-05, + "loss": 0.7088780999183655, + "step": 2591 + }, + { + "epoch": 0.4789412294580268, + "grad_norm": 0.08995692431926727, + "learning_rate": 1.9132802046608353e-05, + "loss": 0.716721773147583, + "step": 2592 + }, + { + "epoch": 0.47912600616692264, + "grad_norm": 0.08103762567043304, + "learning_rate": 1.9131989272257652e-05, + "loss": 0.594050407409668, + "step": 2593 + }, + { + "epoch": 0.4793107828758185, + "grad_norm": 0.08107825368642807, + "learning_rate": 1.913117613448235e-05, + "loss": 0.7606329321861267, + "step": 2594 + }, + { + "epoch": 0.47949555958471435, + "grad_norm": 0.08666590601205826, + "learning_rate": 1.9130362633314803e-05, + "loss": 0.7732304930686951, + "step": 2595 + }, + { + "epoch": 0.4796803362936102, + "grad_norm": 0.09467215836048126, + "learning_rate": 1.9129548768787386e-05, + "loss": 0.789035439491272, + "step": 2596 + }, + { + "epoch": 0.47986511300250606, + "grad_norm": 0.09766748547554016, + "learning_rate": 1.9128734540932494e-05, + "loss": 0.8102937340736389, + "step": 2597 + }, + { + "epoch": 0.4800498897114019, + "grad_norm": 0.08985164016485214, + "learning_rate": 1.912791994978253e-05, + "loss": 0.8242364525794983, + "step": 2598 + }, + { + "epoch": 0.4802346664202977, + "grad_norm": 0.08733810484409332, + "learning_rate": 1.9127104995369903e-05, + "loss": 0.6473926901817322, + "step": 2599 + }, + { + "epoch": 0.48041944312919355, + "grad_norm": 0.06384208053350449, + "learning_rate": 1.9126289677727053e-05, + "loss": 0.5705603957176208, + "step": 2600 + }, + { + "epoch": 0.48060421983808943, + "grad_norm": 0.08345800638198853, + "learning_rate": 1.9125473996886433e-05, + "loss": 0.7921107411384583, + "step": 2601 + }, + { + "epoch": 0.48078899654698526, + "grad_norm": 0.09258704632520676, + "learning_rate": 1.912465795288049e-05, + "loss": 0.8663824200630188, + "step": 2602 + }, + { + "epoch": 0.4809737732558811, + "grad_norm": 0.08237463235855103, + "learning_rate": 1.9123841545741712e-05, + "loss": 0.6780328750610352, + "step": 2603 + }, + { + "epoch": 0.4811585499647769, + "grad_norm": 0.09698188304901123, + "learning_rate": 1.9123024775502586e-05, + "loss": 0.9325119853019714, + "step": 2604 + }, + { + "epoch": 0.4813433266736728, + "grad_norm": 0.057871270924806595, + "learning_rate": 1.9122207642195617e-05, + "loss": 0.46808913350105286, + "step": 2605 + }, + { + "epoch": 0.48152810338256863, + "grad_norm": 0.06541048735380173, + "learning_rate": 1.912139014585332e-05, + "loss": 0.5878641605377197, + "step": 2606 + }, + { + "epoch": 0.48171288009146446, + "grad_norm": 0.07617659121751785, + "learning_rate": 1.912057228650823e-05, + "loss": 0.7120945453643799, + "step": 2607 + }, + { + "epoch": 0.48189765680036034, + "grad_norm": 0.08291994035243988, + "learning_rate": 1.9119754064192904e-05, + "loss": 0.4768596291542053, + "step": 2608 + }, + { + "epoch": 0.48208243350925617, + "grad_norm": 0.0723223090171814, + "learning_rate": 1.9118935478939896e-05, + "loss": 0.7024025321006775, + "step": 2609 + }, + { + "epoch": 0.482267210218152, + "grad_norm": 0.06848274171352386, + "learning_rate": 1.9118116530781785e-05, + "loss": 0.5369501709938049, + "step": 2610 + }, + { + "epoch": 0.4824519869270478, + "grad_norm": 0.08580145984888077, + "learning_rate": 1.9117297219751164e-05, + "loss": 0.7307426929473877, + "step": 2611 + }, + { + "epoch": 0.4826367636359437, + "grad_norm": 0.06089922785758972, + "learning_rate": 1.9116477545880638e-05, + "loss": 0.6530369520187378, + "step": 2612 + }, + { + "epoch": 0.48282154034483954, + "grad_norm": 0.080387182533741, + "learning_rate": 1.9115657509202824e-05, + "loss": 0.7199724912643433, + "step": 2613 + }, + { + "epoch": 0.48300631705373537, + "grad_norm": 0.07296282052993774, + "learning_rate": 1.9114837109750367e-05, + "loss": 0.5948304533958435, + "step": 2614 + }, + { + "epoch": 0.4831910937626312, + "grad_norm": 0.0720793828368187, + "learning_rate": 1.9114016347555905e-05, + "loss": 0.5981985926628113, + "step": 2615 + }, + { + "epoch": 0.4833758704715271, + "grad_norm": 0.07115527987480164, + "learning_rate": 1.9113195222652105e-05, + "loss": 0.6019927859306335, + "step": 2616 + }, + { + "epoch": 0.4835606471804229, + "grad_norm": 0.08490081131458282, + "learning_rate": 1.911237373507165e-05, + "loss": 0.5547221899032593, + "step": 2617 + }, + { + "epoch": 0.48374542388931874, + "grad_norm": 0.08143550902605057, + "learning_rate": 1.911155188484723e-05, + "loss": 0.6641083359718323, + "step": 2618 + }, + { + "epoch": 0.4839302005982146, + "grad_norm": 0.07218817621469498, + "learning_rate": 1.911072967201155e-05, + "loss": 0.6021130084991455, + "step": 2619 + }, + { + "epoch": 0.48411497730711045, + "grad_norm": 0.08438766002655029, + "learning_rate": 1.9109907096597332e-05, + "loss": 0.6461876034736633, + "step": 2620 + }, + { + "epoch": 0.4842997540160063, + "grad_norm": 0.077565997838974, + "learning_rate": 1.9109084158637314e-05, + "loss": 0.6658491492271423, + "step": 2621 + }, + { + "epoch": 0.4844845307249021, + "grad_norm": 0.07408081740140915, + "learning_rate": 1.9108260858164243e-05, + "loss": 0.8159285187721252, + "step": 2622 + }, + { + "epoch": 0.484669307433798, + "grad_norm": 0.09126505255699158, + "learning_rate": 1.9107437195210886e-05, + "loss": 0.8331578373908997, + "step": 2623 + }, + { + "epoch": 0.4848540841426938, + "grad_norm": 0.10413938015699387, + "learning_rate": 1.9106613169810024e-05, + "loss": 0.8409904837608337, + "step": 2624 + }, + { + "epoch": 0.48503886085158965, + "grad_norm": 0.07460866123437881, + "learning_rate": 1.910578878199445e-05, + "loss": 0.6452231407165527, + "step": 2625 + }, + { + "epoch": 0.4852236375604855, + "grad_norm": 0.0811716690659523, + "learning_rate": 1.9104964031796965e-05, + "loss": 0.7975817918777466, + "step": 2626 + }, + { + "epoch": 0.48540841426938136, + "grad_norm": 0.07679463922977448, + "learning_rate": 1.9104138919250403e-05, + "loss": 0.6129171848297119, + "step": 2627 + }, + { + "epoch": 0.4855931909782772, + "grad_norm": 0.06981959193944931, + "learning_rate": 1.9103313444387595e-05, + "loss": 0.7612093687057495, + "step": 2628 + }, + { + "epoch": 0.485777967687173, + "grad_norm": 0.07798459380865097, + "learning_rate": 1.9102487607241393e-05, + "loss": 0.6142989993095398, + "step": 2629 + }, + { + "epoch": 0.4859627443960689, + "grad_norm": 0.08926472812891006, + "learning_rate": 1.9101661407844657e-05, + "loss": 0.8828129172325134, + "step": 2630 + }, + { + "epoch": 0.48614752110496473, + "grad_norm": 0.08611288666725159, + "learning_rate": 1.910083484623028e-05, + "loss": 0.6943038702011108, + "step": 2631 + }, + { + "epoch": 0.48633229781386056, + "grad_norm": 0.06649988144636154, + "learning_rate": 1.910000792243114e-05, + "loss": 0.42508068680763245, + "step": 2632 + }, + { + "epoch": 0.4865170745227564, + "grad_norm": 0.07903112471103668, + "learning_rate": 1.909918063648016e-05, + "loss": 0.9131513237953186, + "step": 2633 + }, + { + "epoch": 0.48670185123165227, + "grad_norm": 0.07621757686138153, + "learning_rate": 1.909835298841026e-05, + "loss": 0.5481411218643188, + "step": 2634 + }, + { + "epoch": 0.4868866279405481, + "grad_norm": 0.06687542051076889, + "learning_rate": 1.9097524978254377e-05, + "loss": 0.5872876048088074, + "step": 2635 + }, + { + "epoch": 0.48707140464944393, + "grad_norm": 0.06902679055929184, + "learning_rate": 1.909669660604546e-05, + "loss": 0.6621937155723572, + "step": 2636 + }, + { + "epoch": 0.48725618135833976, + "grad_norm": 0.06544358283281326, + "learning_rate": 1.9095867871816475e-05, + "loss": 0.4884672462940216, + "step": 2637 + }, + { + "epoch": 0.48744095806723564, + "grad_norm": 0.07585190236568451, + "learning_rate": 1.909503877560041e-05, + "loss": 0.7296018004417419, + "step": 2638 + }, + { + "epoch": 0.48762573477613147, + "grad_norm": 0.07426097989082336, + "learning_rate": 1.9094209317430255e-05, + "loss": 0.567711353302002, + "step": 2639 + }, + { + "epoch": 0.4878105114850273, + "grad_norm": 0.06902457028627396, + "learning_rate": 1.909337949733902e-05, + "loss": 0.37785089015960693, + "step": 2640 + }, + { + "epoch": 0.4879952881939232, + "grad_norm": 0.09323791414499283, + "learning_rate": 1.9092549315359732e-05, + "loss": 0.8677100539207458, + "step": 2641 + }, + { + "epoch": 0.488180064902819, + "grad_norm": 0.07936318963766098, + "learning_rate": 1.909171877152543e-05, + "loss": 0.5746418237686157, + "step": 2642 + }, + { + "epoch": 0.48836484161171484, + "grad_norm": 0.0856119766831398, + "learning_rate": 1.9090887865869162e-05, + "loss": 0.6803861260414124, + "step": 2643 + }, + { + "epoch": 0.48854961832061067, + "grad_norm": 0.07777687907218933, + "learning_rate": 1.9090056598424002e-05, + "loss": 0.8061811923980713, + "step": 2644 + }, + { + "epoch": 0.48873439502950655, + "grad_norm": 0.07252785563468933, + "learning_rate": 1.9089224969223022e-05, + "loss": 0.5486541390419006, + "step": 2645 + }, + { + "epoch": 0.4889191717384024, + "grad_norm": 0.07425657659769058, + "learning_rate": 1.9088392978299334e-05, + "loss": 0.6537051200866699, + "step": 2646 + }, + { + "epoch": 0.4891039484472982, + "grad_norm": 0.06550854444503784, + "learning_rate": 1.9087560625686034e-05, + "loss": 0.591060996055603, + "step": 2647 + }, + { + "epoch": 0.48928872515619404, + "grad_norm": 0.06094660237431526, + "learning_rate": 1.908672791141625e-05, + "loss": 0.5385389924049377, + "step": 2648 + }, + { + "epoch": 0.4894735018650899, + "grad_norm": 0.06148020550608635, + "learning_rate": 1.9085894835523128e-05, + "loss": 0.5736526846885681, + "step": 2649 + }, + { + "epoch": 0.48965827857398575, + "grad_norm": 0.0693485289812088, + "learning_rate": 1.9085061398039814e-05, + "loss": 0.47210174798965454, + "step": 2650 + }, + { + "epoch": 0.4898430552828816, + "grad_norm": 0.07060623168945312, + "learning_rate": 1.9084227598999484e-05, + "loss": 0.6329917907714844, + "step": 2651 + }, + { + "epoch": 0.49002783199177746, + "grad_norm": 0.07458177953958511, + "learning_rate": 1.9083393438435318e-05, + "loss": 0.6945242881774902, + "step": 2652 + }, + { + "epoch": 0.4902126087006733, + "grad_norm": 0.07435189187526703, + "learning_rate": 1.9082558916380508e-05, + "loss": 0.678519606590271, + "step": 2653 + }, + { + "epoch": 0.4903973854095691, + "grad_norm": 0.07789159566164017, + "learning_rate": 1.9081724032868266e-05, + "loss": 0.716600775718689, + "step": 2654 + }, + { + "epoch": 0.49058216211846495, + "grad_norm": 0.08185722678899765, + "learning_rate": 1.9080888787931826e-05, + "loss": 0.64130699634552, + "step": 2655 + }, + { + "epoch": 0.49076693882736083, + "grad_norm": 0.09033802151679993, + "learning_rate": 1.9080053181604418e-05, + "loss": 0.8799751400947571, + "step": 2656 + }, + { + "epoch": 0.49095171553625666, + "grad_norm": 0.07720265537500381, + "learning_rate": 1.9079217213919304e-05, + "loss": 0.6216465830802917, + "step": 2657 + }, + { + "epoch": 0.4911364922451525, + "grad_norm": 0.07524937391281128, + "learning_rate": 1.9078380884909752e-05, + "loss": 0.7585391402244568, + "step": 2658 + }, + { + "epoch": 0.4913212689540483, + "grad_norm": 0.0857534259557724, + "learning_rate": 1.907754419460904e-05, + "loss": 0.8452739715576172, + "step": 2659 + }, + { + "epoch": 0.4915060456629442, + "grad_norm": 0.08280126750469208, + "learning_rate": 1.907670714305047e-05, + "loss": 0.8687022924423218, + "step": 2660 + }, + { + "epoch": 0.49169082237184003, + "grad_norm": 0.07255477458238602, + "learning_rate": 1.9075869730267355e-05, + "loss": 0.7363627552986145, + "step": 2661 + }, + { + "epoch": 0.49187559908073586, + "grad_norm": 0.09360745549201965, + "learning_rate": 1.9075031956293016e-05, + "loss": 0.7569074034690857, + "step": 2662 + }, + { + "epoch": 0.49206037578963174, + "grad_norm": 0.06543853878974915, + "learning_rate": 1.90741938211608e-05, + "loss": 0.5689033269882202, + "step": 2663 + }, + { + "epoch": 0.49224515249852757, + "grad_norm": 0.06809094548225403, + "learning_rate": 1.907335532490406e-05, + "loss": 0.6460486650466919, + "step": 2664 + }, + { + "epoch": 0.4924299292074234, + "grad_norm": 0.07789400964975357, + "learning_rate": 1.907251646755616e-05, + "loss": 0.7170886397361755, + "step": 2665 + }, + { + "epoch": 0.49261470591631923, + "grad_norm": 0.10317979753017426, + "learning_rate": 1.9071677249150492e-05, + "loss": 1.0603386163711548, + "step": 2666 + }, + { + "epoch": 0.4927994826252151, + "grad_norm": 0.0847686231136322, + "learning_rate": 1.9070837669720452e-05, + "loss": 0.7669317722320557, + "step": 2667 + }, + { + "epoch": 0.49298425933411094, + "grad_norm": 0.08344128727912903, + "learning_rate": 1.906999772929945e-05, + "loss": 0.7641721367835999, + "step": 2668 + }, + { + "epoch": 0.49316903604300677, + "grad_norm": 0.06451429426670074, + "learning_rate": 1.9069157427920916e-05, + "loss": 0.6100636720657349, + "step": 2669 + }, + { + "epoch": 0.4933538127519026, + "grad_norm": 0.08458209782838821, + "learning_rate": 1.9068316765618294e-05, + "loss": 0.6070970892906189, + "step": 2670 + }, + { + "epoch": 0.4935385894607985, + "grad_norm": 0.058811575174331665, + "learning_rate": 1.906747574242503e-05, + "loss": 0.44909724593162537, + "step": 2671 + }, + { + "epoch": 0.4937233661696943, + "grad_norm": 0.07956483960151672, + "learning_rate": 1.90666343583746e-05, + "loss": 0.7305945158004761, + "step": 2672 + }, + { + "epoch": 0.49390814287859014, + "grad_norm": 0.06249157711863518, + "learning_rate": 1.906579261350049e-05, + "loss": 0.474531888961792, + "step": 2673 + }, + { + "epoch": 0.494092919587486, + "grad_norm": 0.058922164142131805, + "learning_rate": 1.90649505078362e-05, + "loss": 0.6985012888908386, + "step": 2674 + }, + { + "epoch": 0.49427769629638185, + "grad_norm": 0.07136017829179764, + "learning_rate": 1.9064108041415237e-05, + "loss": 0.6611461639404297, + "step": 2675 + }, + { + "epoch": 0.4944624730052777, + "grad_norm": 0.06852423399686813, + "learning_rate": 1.906326521427113e-05, + "loss": 0.6475507616996765, + "step": 2676 + }, + { + "epoch": 0.4946472497141735, + "grad_norm": 0.09397808462381363, + "learning_rate": 1.906242202643743e-05, + "loss": 0.8768760561943054, + "step": 2677 + }, + { + "epoch": 0.4948320264230694, + "grad_norm": 0.08374115079641342, + "learning_rate": 1.906157847794768e-05, + "loss": 0.6318570375442505, + "step": 2678 + }, + { + "epoch": 0.4950168031319652, + "grad_norm": 0.056238822638988495, + "learning_rate": 1.9060734568835457e-05, + "loss": 0.5789932608604431, + "step": 2679 + }, + { + "epoch": 0.49520157984086105, + "grad_norm": 0.06980939954519272, + "learning_rate": 1.905989029913435e-05, + "loss": 0.6135944724082947, + "step": 2680 + }, + { + "epoch": 0.4953863565497569, + "grad_norm": 0.0664597600698471, + "learning_rate": 1.9059045668877945e-05, + "loss": 0.605728805065155, + "step": 2681 + }, + { + "epoch": 0.49557113325865276, + "grad_norm": 0.07725506275892258, + "learning_rate": 1.9058200678099873e-05, + "loss": 0.7923427224159241, + "step": 2682 + }, + { + "epoch": 0.4957559099675486, + "grad_norm": 0.07151152193546295, + "learning_rate": 1.905735532683375e-05, + "loss": 0.7274478673934937, + "step": 2683 + }, + { + "epoch": 0.4959406866764444, + "grad_norm": 0.07864515483379364, + "learning_rate": 1.9056509615113223e-05, + "loss": 0.6789471507072449, + "step": 2684 + }, + { + "epoch": 0.4961254633853403, + "grad_norm": 0.06356114894151688, + "learning_rate": 1.9055663542971948e-05, + "loss": 0.6436051726341248, + "step": 2685 + }, + { + "epoch": 0.49631024009423613, + "grad_norm": 0.07219547033309937, + "learning_rate": 1.905481711044359e-05, + "loss": 0.6921743154525757, + "step": 2686 + }, + { + "epoch": 0.49649501680313196, + "grad_norm": 0.08134686946868896, + "learning_rate": 1.9053970317561846e-05, + "loss": 0.6954108476638794, + "step": 2687 + }, + { + "epoch": 0.4966797935120278, + "grad_norm": 0.09121778607368469, + "learning_rate": 1.905312316436041e-05, + "loss": 0.8187353610992432, + "step": 2688 + }, + { + "epoch": 0.4968645702209237, + "grad_norm": 0.057139378041028976, + "learning_rate": 1.9052275650872994e-05, + "loss": 0.5716656446456909, + "step": 2689 + }, + { + "epoch": 0.4970493469298195, + "grad_norm": 0.08653534203767776, + "learning_rate": 1.9051427777133328e-05, + "loss": 0.7700663208961487, + "step": 2690 + }, + { + "epoch": 0.49723412363871533, + "grad_norm": 0.09053795039653778, + "learning_rate": 1.905057954317515e-05, + "loss": 0.8304776549339294, + "step": 2691 + }, + { + "epoch": 0.49741890034761116, + "grad_norm": 0.061494387686252594, + "learning_rate": 1.9049730949032228e-05, + "loss": 0.4574540853500366, + "step": 2692 + }, + { + "epoch": 0.49760367705650704, + "grad_norm": 0.07644534856081009, + "learning_rate": 1.9048881994738323e-05, + "loss": 0.685924768447876, + "step": 2693 + }, + { + "epoch": 0.49778845376540287, + "grad_norm": 0.06368592381477356, + "learning_rate": 1.904803268032723e-05, + "loss": 0.5423883199691772, + "step": 2694 + }, + { + "epoch": 0.4979732304742987, + "grad_norm": 0.07776868343353271, + "learning_rate": 1.904718300583274e-05, + "loss": 0.6837641000747681, + "step": 2695 + }, + { + "epoch": 0.4981580071831946, + "grad_norm": 0.07274238020181656, + "learning_rate": 1.9046332971288674e-05, + "loss": 0.5656123757362366, + "step": 2696 + }, + { + "epoch": 0.4983427838920904, + "grad_norm": 0.06574950367212296, + "learning_rate": 1.9045482576728857e-05, + "loss": 0.520660400390625, + "step": 2697 + }, + { + "epoch": 0.49852756060098624, + "grad_norm": 0.08441847562789917, + "learning_rate": 1.9044631822187132e-05, + "loss": 0.7153335213661194, + "step": 2698 + }, + { + "epoch": 0.49871233730988207, + "grad_norm": 0.07338439673185349, + "learning_rate": 1.904378070769736e-05, + "loss": 0.7561910152435303, + "step": 2699 + }, + { + "epoch": 0.49889711401877795, + "grad_norm": 0.08297833055257797, + "learning_rate": 1.9042929233293405e-05, + "loss": 0.6804150938987732, + "step": 2700 + }, + { + "epoch": 0.4990818907276738, + "grad_norm": 0.07729196548461914, + "learning_rate": 1.9042077399009163e-05, + "loss": 0.8838931322097778, + "step": 2701 + }, + { + "epoch": 0.4992666674365696, + "grad_norm": 0.08958151936531067, + "learning_rate": 1.904122520487853e-05, + "loss": 0.9126286506652832, + "step": 2702 + }, + { + "epoch": 0.49945144414546544, + "grad_norm": 0.07667583972215652, + "learning_rate": 1.9040372650935416e-05, + "loss": 0.5196372866630554, + "step": 2703 + }, + { + "epoch": 0.4996362208543613, + "grad_norm": 0.06985201686620712, + "learning_rate": 1.903951973721376e-05, + "loss": 0.6988966464996338, + "step": 2704 + }, + { + "epoch": 0.49982099756325715, + "grad_norm": 0.06755391508340836, + "learning_rate": 1.9038666463747494e-05, + "loss": 0.5743021965026855, + "step": 2705 + }, + { + "epoch": 0.500005774272153, + "grad_norm": 0.06953644752502441, + "learning_rate": 1.9037812830570583e-05, + "loss": 0.6474657654762268, + "step": 2706 + }, + { + "epoch": 0.5001905509810488, + "grad_norm": 0.07906162738800049, + "learning_rate": 1.9036958837717e-05, + "loss": 0.6331603527069092, + "step": 2707 + }, + { + "epoch": 0.5003753276899446, + "grad_norm": 0.0696539580821991, + "learning_rate": 1.9036104485220723e-05, + "loss": 0.5354222059249878, + "step": 2708 + }, + { + "epoch": 0.5005601043988406, + "grad_norm": 0.08513978868722916, + "learning_rate": 1.903524977311576e-05, + "loss": 0.7808301448822021, + "step": 2709 + }, + { + "epoch": 0.5007448811077364, + "grad_norm": 0.08418749272823334, + "learning_rate": 1.9034394701436124e-05, + "loss": 0.905589759349823, + "step": 2710 + }, + { + "epoch": 0.5009296578166322, + "grad_norm": 0.07378925383090973, + "learning_rate": 1.9033539270215843e-05, + "loss": 0.6261293292045593, + "step": 2711 + }, + { + "epoch": 0.5011144345255281, + "grad_norm": 0.05783259868621826, + "learning_rate": 1.903268347948896e-05, + "loss": 0.5617566108703613, + "step": 2712 + }, + { + "epoch": 0.5012992112344239, + "grad_norm": 0.07201316952705383, + "learning_rate": 1.903182732928954e-05, + "loss": 0.5230177044868469, + "step": 2713 + }, + { + "epoch": 0.5014839879433197, + "grad_norm": 0.08267080038785934, + "learning_rate": 1.9030970819651644e-05, + "loss": 0.7685118913650513, + "step": 2714 + }, + { + "epoch": 0.5016687646522155, + "grad_norm": 0.06884673237800598, + "learning_rate": 1.9030113950609367e-05, + "loss": 0.5821294784545898, + "step": 2715 + }, + { + "epoch": 0.5018535413611114, + "grad_norm": 0.07333012670278549, + "learning_rate": 1.9029256722196805e-05, + "loss": 0.6633030772209167, + "step": 2716 + }, + { + "epoch": 0.5020383180700073, + "grad_norm": 0.06039682775735855, + "learning_rate": 1.9028399134448072e-05, + "loss": 0.49971288442611694, + "step": 2717 + }, + { + "epoch": 0.5022230947789031, + "grad_norm": 0.06957642734050751, + "learning_rate": 1.9027541187397304e-05, + "loss": 0.5987708568572998, + "step": 2718 + }, + { + "epoch": 0.502407871487799, + "grad_norm": 0.07104673236608505, + "learning_rate": 1.9026682881078636e-05, + "loss": 0.6045911908149719, + "step": 2719 + }, + { + "epoch": 0.5025926481966948, + "grad_norm": 0.0739755779504776, + "learning_rate": 1.9025824215526235e-05, + "loss": 0.6368657350540161, + "step": 2720 + }, + { + "epoch": 0.5027774249055906, + "grad_norm": 0.07629484683275223, + "learning_rate": 1.9024965190774262e-05, + "loss": 0.9732115864753723, + "step": 2721 + }, + { + "epoch": 0.5029622016144865, + "grad_norm": 0.06285340338945389, + "learning_rate": 1.9024105806856918e-05, + "loss": 0.5596101880073547, + "step": 2722 + }, + { + "epoch": 0.5031469783233823, + "grad_norm": 0.08465234935283661, + "learning_rate": 1.9023246063808388e-05, + "loss": 0.714224100112915, + "step": 2723 + }, + { + "epoch": 0.5033317550322782, + "grad_norm": 0.07528623193502426, + "learning_rate": 1.90223859616629e-05, + "loss": 0.7736612558364868, + "step": 2724 + }, + { + "epoch": 0.5035165317411741, + "grad_norm": 0.08436401933431625, + "learning_rate": 1.9021525500454678e-05, + "loss": 0.6686825752258301, + "step": 2725 + }, + { + "epoch": 0.5037013084500699, + "grad_norm": 0.09414120018482208, + "learning_rate": 1.902066468021796e-05, + "loss": 0.9490776658058167, + "step": 2726 + }, + { + "epoch": 0.5038860851589657, + "grad_norm": 0.08042771369218826, + "learning_rate": 1.9019803500987014e-05, + "loss": 0.6027342677116394, + "step": 2727 + }, + { + "epoch": 0.5040708618678615, + "grad_norm": 0.06140635535120964, + "learning_rate": 1.901894196279611e-05, + "loss": 0.5515220761299133, + "step": 2728 + }, + { + "epoch": 0.5042556385767574, + "grad_norm": 0.07926372438669205, + "learning_rate": 1.901808006567953e-05, + "loss": 0.8234108090400696, + "step": 2729 + }, + { + "epoch": 0.5044404152856532, + "grad_norm": 0.09257242828607559, + "learning_rate": 1.9017217809671575e-05, + "loss": 0.99014812707901, + "step": 2730 + }, + { + "epoch": 0.5046251919945491, + "grad_norm": 0.06899430602788925, + "learning_rate": 1.9016355194806566e-05, + "loss": 0.6572807431221008, + "step": 2731 + }, + { + "epoch": 0.504809968703445, + "grad_norm": 0.053285859525203705, + "learning_rate": 1.901549222111883e-05, + "loss": 0.5255534648895264, + "step": 2732 + }, + { + "epoch": 0.5049947454123408, + "grad_norm": 0.08632458001375198, + "learning_rate": 1.9014628888642705e-05, + "loss": 0.6835488677024841, + "step": 2733 + }, + { + "epoch": 0.5051795221212366, + "grad_norm": 0.08509247750043869, + "learning_rate": 1.9013765197412553e-05, + "loss": 0.7912634015083313, + "step": 2734 + }, + { + "epoch": 0.5053642988301325, + "grad_norm": 0.06132848560810089, + "learning_rate": 1.9012901147462752e-05, + "loss": 0.3929689824581146, + "step": 2735 + }, + { + "epoch": 0.5055490755390283, + "grad_norm": 0.08009739220142365, + "learning_rate": 1.9012036738827682e-05, + "loss": 0.7682035565376282, + "step": 2736 + }, + { + "epoch": 0.5057338522479241, + "grad_norm": 0.06091945245862007, + "learning_rate": 1.901117197154174e-05, + "loss": 0.5390786528587341, + "step": 2737 + }, + { + "epoch": 0.5059186289568199, + "grad_norm": 0.07729063183069229, + "learning_rate": 1.901030684563935e-05, + "loss": 0.7572644352912903, + "step": 2738 + }, + { + "epoch": 0.5061034056657159, + "grad_norm": 0.08812917768955231, + "learning_rate": 1.9009441361154937e-05, + "loss": 0.6920105814933777, + "step": 2739 + }, + { + "epoch": 0.5062881823746117, + "grad_norm": 0.06975310295820236, + "learning_rate": 1.9008575518122943e-05, + "loss": 0.517683207988739, + "step": 2740 + }, + { + "epoch": 0.5064729590835075, + "grad_norm": 0.07183025032281876, + "learning_rate": 1.900770931657783e-05, + "loss": 0.7221886515617371, + "step": 2741 + }, + { + "epoch": 0.5066577357924034, + "grad_norm": 0.07859528064727783, + "learning_rate": 1.9006842756554067e-05, + "loss": 0.6803582310676575, + "step": 2742 + }, + { + "epoch": 0.5068425125012992, + "grad_norm": 0.0603187195956707, + "learning_rate": 1.900597583808614e-05, + "loss": 0.46500974893569946, + "step": 2743 + }, + { + "epoch": 0.507027289210195, + "grad_norm": 0.08773189038038254, + "learning_rate": 1.900510856120855e-05, + "loss": 0.7085680365562439, + "step": 2744 + }, + { + "epoch": 0.5072120659190908, + "grad_norm": 0.08600956946611404, + "learning_rate": 1.9004240925955814e-05, + "loss": 0.7808557152748108, + "step": 2745 + }, + { + "epoch": 0.5073968426279868, + "grad_norm": 0.07820644229650497, + "learning_rate": 1.9003372932362462e-05, + "loss": 0.6414129137992859, + "step": 2746 + }, + { + "epoch": 0.5075816193368826, + "grad_norm": 0.0755482167005539, + "learning_rate": 1.900250458046303e-05, + "loss": 0.5816751718521118, + "step": 2747 + }, + { + "epoch": 0.5077663960457784, + "grad_norm": 0.07866478711366653, + "learning_rate": 1.9001635870292086e-05, + "loss": 0.6917564272880554, + "step": 2748 + }, + { + "epoch": 0.5079511727546743, + "grad_norm": 0.08214591443538666, + "learning_rate": 1.9000766801884194e-05, + "loss": 0.6834210753440857, + "step": 2749 + }, + { + "epoch": 0.5081359494635701, + "grad_norm": 0.06740007549524307, + "learning_rate": 1.8999897375273942e-05, + "loss": 0.6274173259735107, + "step": 2750 + }, + { + "epoch": 0.5083207261724659, + "grad_norm": 0.07330995053052902, + "learning_rate": 1.8999027590495934e-05, + "loss": 0.6141194105148315, + "step": 2751 + }, + { + "epoch": 0.5085055028813618, + "grad_norm": 0.07362136989831924, + "learning_rate": 1.899815744758478e-05, + "loss": 0.6480097770690918, + "step": 2752 + }, + { + "epoch": 0.5086902795902577, + "grad_norm": 0.09079992771148682, + "learning_rate": 1.8997286946575114e-05, + "loss": 0.820427417755127, + "step": 2753 + }, + { + "epoch": 0.5088750562991535, + "grad_norm": 0.06545901298522949, + "learning_rate": 1.8996416087501573e-05, + "loss": 0.478307843208313, + "step": 2754 + }, + { + "epoch": 0.5090598330080494, + "grad_norm": 0.09047531336545944, + "learning_rate": 1.899554487039882e-05, + "loss": 0.8143502473831177, + "step": 2755 + }, + { + "epoch": 0.5092446097169452, + "grad_norm": 0.08255776762962341, + "learning_rate": 1.8994673295301526e-05, + "loss": 0.5235763788223267, + "step": 2756 + }, + { + "epoch": 0.509429386425841, + "grad_norm": 0.08837346732616425, + "learning_rate": 1.8993801362244374e-05, + "loss": 0.6911593079566956, + "step": 2757 + }, + { + "epoch": 0.5096141631347368, + "grad_norm": 0.08509925752878189, + "learning_rate": 1.8992929071262066e-05, + "loss": 0.8627534508705139, + "step": 2758 + }, + { + "epoch": 0.5097989398436327, + "grad_norm": 0.07599958032369614, + "learning_rate": 1.8992056422389317e-05, + "loss": 0.49385976791381836, + "step": 2759 + }, + { + "epoch": 0.5099837165525285, + "grad_norm": 0.07793485373258591, + "learning_rate": 1.8991183415660855e-05, + "loss": 0.748390793800354, + "step": 2760 + }, + { + "epoch": 0.5101684932614244, + "grad_norm": 0.09587264060974121, + "learning_rate": 1.899031005111142e-05, + "loss": 0.9381311535835266, + "step": 2761 + }, + { + "epoch": 0.5103532699703203, + "grad_norm": 0.09079372882843018, + "learning_rate": 1.898943632877577e-05, + "loss": 0.6771690249443054, + "step": 2762 + }, + { + "epoch": 0.5105380466792161, + "grad_norm": 0.07188550382852554, + "learning_rate": 1.8988562248688686e-05, + "loss": 0.6365196704864502, + "step": 2763 + }, + { + "epoch": 0.5107228233881119, + "grad_norm": 0.08427656441926956, + "learning_rate": 1.8987687810884944e-05, + "loss": 0.7004556655883789, + "step": 2764 + }, + { + "epoch": 0.5109076000970078, + "grad_norm": 0.06205383315682411, + "learning_rate": 1.8986813015399345e-05, + "loss": 0.6061794757843018, + "step": 2765 + }, + { + "epoch": 0.5110923768059036, + "grad_norm": 0.08489475399255753, + "learning_rate": 1.89859378622667e-05, + "loss": 0.9061506986618042, + "step": 2766 + }, + { + "epoch": 0.5112771535147994, + "grad_norm": 0.06409691274166107, + "learning_rate": 1.898506235152185e-05, + "loss": 0.6327704787254333, + "step": 2767 + }, + { + "epoch": 0.5114619302236953, + "grad_norm": 0.06629861146211624, + "learning_rate": 1.898418648319962e-05, + "loss": 0.5824447274208069, + "step": 2768 + }, + { + "epoch": 0.5116467069325912, + "grad_norm": 0.0706552192568779, + "learning_rate": 1.8983310257334883e-05, + "loss": 0.7207609415054321, + "step": 2769 + }, + { + "epoch": 0.511831483641487, + "grad_norm": 0.07945195585489273, + "learning_rate": 1.8982433673962496e-05, + "loss": 0.6402596831321716, + "step": 2770 + }, + { + "epoch": 0.5120162603503828, + "grad_norm": 0.06763187795877457, + "learning_rate": 1.8981556733117357e-05, + "loss": 0.5913063287734985, + "step": 2771 + }, + { + "epoch": 0.5122010370592787, + "grad_norm": 0.06970265507698059, + "learning_rate": 1.8980679434834357e-05, + "loss": 0.5915688276290894, + "step": 2772 + }, + { + "epoch": 0.5123858137681745, + "grad_norm": 0.06420344859361649, + "learning_rate": 1.8979801779148413e-05, + "loss": 0.5847893357276917, + "step": 2773 + }, + { + "epoch": 0.5125705904770703, + "grad_norm": 0.07548118382692337, + "learning_rate": 1.897892376609445e-05, + "loss": 0.8099266886711121, + "step": 2774 + }, + { + "epoch": 0.5127553671859663, + "grad_norm": 0.06970521062612534, + "learning_rate": 1.897804539570742e-05, + "loss": 0.5355647206306458, + "step": 2775 + }, + { + "epoch": 0.5129401438948621, + "grad_norm": 0.06311215460300446, + "learning_rate": 1.8977166668022263e-05, + "loss": 0.6434404850006104, + "step": 2776 + }, + { + "epoch": 0.5131249206037579, + "grad_norm": 0.06808818131685257, + "learning_rate": 1.8976287583073965e-05, + "loss": 0.6484166383743286, + "step": 2777 + }, + { + "epoch": 0.5133096973126537, + "grad_norm": 0.08092934638261795, + "learning_rate": 1.8975408140897503e-05, + "loss": 0.8626954555511475, + "step": 2778 + }, + { + "epoch": 0.5134944740215496, + "grad_norm": 0.07606197148561478, + "learning_rate": 1.8974528341527875e-05, + "loss": 0.5682605504989624, + "step": 2779 + }, + { + "epoch": 0.5136792507304454, + "grad_norm": 0.07515977323055267, + "learning_rate": 1.89736481850001e-05, + "loss": 0.6957910060882568, + "step": 2780 + }, + { + "epoch": 0.5138640274393412, + "grad_norm": 0.07134959846735, + "learning_rate": 1.89727676713492e-05, + "loss": 0.5610045194625854, + "step": 2781 + }, + { + "epoch": 0.5140488041482371, + "grad_norm": 0.07571757584810257, + "learning_rate": 1.8971886800610218e-05, + "loss": 0.6205386519432068, + "step": 2782 + }, + { + "epoch": 0.514233580857133, + "grad_norm": 0.0926576629281044, + "learning_rate": 1.8971005572818213e-05, + "loss": 0.865348756313324, + "step": 2783 + }, + { + "epoch": 0.5144183575660288, + "grad_norm": 0.06826504319906235, + "learning_rate": 1.8970123988008252e-05, + "loss": 0.6385295391082764, + "step": 2784 + }, + { + "epoch": 0.5146031342749247, + "grad_norm": 0.0901683047413826, + "learning_rate": 1.8969242046215418e-05, + "loss": 0.9333497881889343, + "step": 2785 + }, + { + "epoch": 0.5147879109838205, + "grad_norm": 0.08303900808095932, + "learning_rate": 1.8968359747474813e-05, + "loss": 0.6660789847373962, + "step": 2786 + }, + { + "epoch": 0.5149726876927163, + "grad_norm": 0.06373574584722519, + "learning_rate": 1.896747709182155e-05, + "loss": 0.5466078519821167, + "step": 2787 + }, + { + "epoch": 0.5151574644016121, + "grad_norm": 0.07366237789392471, + "learning_rate": 1.8966594079290757e-05, + "loss": 0.6122941970825195, + "step": 2788 + }, + { + "epoch": 0.515342241110508, + "grad_norm": 0.07785239070653915, + "learning_rate": 1.896571070991757e-05, + "loss": 0.729058563709259, + "step": 2789 + }, + { + "epoch": 0.5155270178194039, + "grad_norm": 0.05069505050778389, + "learning_rate": 1.8964826983737143e-05, + "loss": 0.44589418172836304, + "step": 2790 + }, + { + "epoch": 0.5157117945282997, + "grad_norm": 0.07521497458219528, + "learning_rate": 1.8963942900784653e-05, + "loss": 0.6502828001976013, + "step": 2791 + }, + { + "epoch": 0.5158965712371956, + "grad_norm": 0.061132580041885376, + "learning_rate": 1.896305846109528e-05, + "loss": 0.46217507123947144, + "step": 2792 + }, + { + "epoch": 0.5160813479460914, + "grad_norm": 0.08042273670434952, + "learning_rate": 1.8962173664704222e-05, + "loss": 0.7091048955917358, + "step": 2793 + }, + { + "epoch": 0.5162661246549872, + "grad_norm": 0.07649550586938858, + "learning_rate": 1.896128851164669e-05, + "loss": 0.7375391125679016, + "step": 2794 + }, + { + "epoch": 0.516450901363883, + "grad_norm": 0.0892588421702385, + "learning_rate": 1.8960403001957914e-05, + "loss": 0.7438496351242065, + "step": 2795 + }, + { + "epoch": 0.5166356780727789, + "grad_norm": 0.06633324921131134, + "learning_rate": 1.8959517135673126e-05, + "loss": 0.5604415535926819, + "step": 2796 + }, + { + "epoch": 0.5168204547816748, + "grad_norm": 0.07909633964300156, + "learning_rate": 1.895863091282759e-05, + "loss": 0.6037436723709106, + "step": 2797 + }, + { + "epoch": 0.5170052314905706, + "grad_norm": 0.09118392318487167, + "learning_rate": 1.8957744333456577e-05, + "loss": 0.7270393371582031, + "step": 2798 + }, + { + "epoch": 0.5171900081994665, + "grad_norm": 0.07374778389930725, + "learning_rate": 1.895685739759536e-05, + "loss": 0.5679983496665955, + "step": 2799 + }, + { + "epoch": 0.5173747849083623, + "grad_norm": 0.07633034884929657, + "learning_rate": 1.895597010527924e-05, + "loss": 0.6480544805526733, + "step": 2800 + }, + { + "epoch": 0.5175595616172581, + "grad_norm": 0.06983176618814468, + "learning_rate": 1.895508245654353e-05, + "loss": 0.6487261652946472, + "step": 2801 + }, + { + "epoch": 0.517744338326154, + "grad_norm": 0.07402893155813217, + "learning_rate": 1.8954194451423555e-05, + "loss": 0.8582574725151062, + "step": 2802 + }, + { + "epoch": 0.5179291150350498, + "grad_norm": 0.0776456966996193, + "learning_rate": 1.895330608995466e-05, + "loss": 0.6201335787773132, + "step": 2803 + }, + { + "epoch": 0.5181138917439456, + "grad_norm": 0.0688788965344429, + "learning_rate": 1.8952417372172187e-05, + "loss": 0.6365094780921936, + "step": 2804 + }, + { + "epoch": 0.5182986684528416, + "grad_norm": 0.06985542923212051, + "learning_rate": 1.8951528298111514e-05, + "loss": 0.5172978043556213, + "step": 2805 + }, + { + "epoch": 0.5184834451617374, + "grad_norm": 0.07759788632392883, + "learning_rate": 1.895063886780802e-05, + "loss": 0.6132344603538513, + "step": 2806 + }, + { + "epoch": 0.5186682218706332, + "grad_norm": 0.08312520384788513, + "learning_rate": 1.89497490812971e-05, + "loss": 0.7646285891532898, + "step": 2807 + }, + { + "epoch": 0.518852998579529, + "grad_norm": 0.08181486278772354, + "learning_rate": 1.8948858938614172e-05, + "loss": 0.7406775951385498, + "step": 2808 + }, + { + "epoch": 0.5190377752884249, + "grad_norm": 0.0924254059791565, + "learning_rate": 1.8947968439794653e-05, + "loss": 0.6629146337509155, + "step": 2809 + }, + { + "epoch": 0.5192225519973207, + "grad_norm": 0.08145590871572495, + "learning_rate": 1.8947077584873984e-05, + "loss": 0.7635599374771118, + "step": 2810 + }, + { + "epoch": 0.5194073287062165, + "grad_norm": 0.07248992472887039, + "learning_rate": 1.8946186373887617e-05, + "loss": 0.620116114616394, + "step": 2811 + }, + { + "epoch": 0.5195921054151125, + "grad_norm": 0.06582232564687729, + "learning_rate": 1.8945294806871026e-05, + "loss": 0.5889583826065063, + "step": 2812 + }, + { + "epoch": 0.5197768821240083, + "grad_norm": 0.08726590871810913, + "learning_rate": 1.8944402883859687e-05, + "loss": 0.7653307914733887, + "step": 2813 + }, + { + "epoch": 0.5199616588329041, + "grad_norm": 0.053412389010190964, + "learning_rate": 1.8943510604889094e-05, + "loss": 0.4532836973667145, + "step": 2814 + }, + { + "epoch": 0.5201464355418, + "grad_norm": 0.09321916848421097, + "learning_rate": 1.8942617969994762e-05, + "loss": 0.752572238445282, + "step": 2815 + }, + { + "epoch": 0.5203312122506958, + "grad_norm": 0.06985002756118774, + "learning_rate": 1.894172497921221e-05, + "loss": 0.45620113611221313, + "step": 2816 + }, + { + "epoch": 0.5205159889595916, + "grad_norm": 0.08312235027551651, + "learning_rate": 1.894083163257698e-05, + "loss": 0.7842530608177185, + "step": 2817 + }, + { + "epoch": 0.5207007656684874, + "grad_norm": 0.06230049580335617, + "learning_rate": 1.8939937930124622e-05, + "loss": 0.49488046765327454, + "step": 2818 + }, + { + "epoch": 0.5208855423773834, + "grad_norm": 0.07006420195102692, + "learning_rate": 1.893904387189071e-05, + "loss": 0.5373267531394958, + "step": 2819 + }, + { + "epoch": 0.5210703190862792, + "grad_norm": 0.08325011283159256, + "learning_rate": 1.893814945791081e-05, + "loss": 0.8509864807128906, + "step": 2820 + }, + { + "epoch": 0.521255095795175, + "grad_norm": 0.06438539177179337, + "learning_rate": 1.893725468822053e-05, + "loss": 0.701816737651825, + "step": 2821 + }, + { + "epoch": 0.5214398725040709, + "grad_norm": 0.09208257496356964, + "learning_rate": 1.8936359562855475e-05, + "loss": 0.774588406085968, + "step": 2822 + }, + { + "epoch": 0.5216246492129667, + "grad_norm": 0.07522048056125641, + "learning_rate": 1.8935464081851267e-05, + "loss": 0.674850583076477, + "step": 2823 + }, + { + "epoch": 0.5218094259218625, + "grad_norm": 0.07833349704742432, + "learning_rate": 1.8934568245243542e-05, + "loss": 0.6751000285148621, + "step": 2824 + }, + { + "epoch": 0.5219942026307584, + "grad_norm": 0.08105552941560745, + "learning_rate": 1.8933672053067957e-05, + "loss": 0.6712080836296082, + "step": 2825 + }, + { + "epoch": 0.5221789793396542, + "grad_norm": 0.07644625008106232, + "learning_rate": 1.8932775505360173e-05, + "loss": 0.5222357511520386, + "step": 2826 + }, + { + "epoch": 0.5223637560485501, + "grad_norm": 0.05553733929991722, + "learning_rate": 1.8931878602155872e-05, + "loss": 0.5329239368438721, + "step": 2827 + }, + { + "epoch": 0.522548532757446, + "grad_norm": 0.05887029320001602, + "learning_rate": 1.8930981343490742e-05, + "loss": 0.5770387649536133, + "step": 2828 + }, + { + "epoch": 0.5227333094663418, + "grad_norm": 0.07055466622114182, + "learning_rate": 1.8930083729400502e-05, + "loss": 0.6148492693901062, + "step": 2829 + }, + { + "epoch": 0.5229180861752376, + "grad_norm": 0.08709493279457092, + "learning_rate": 1.8929185759920864e-05, + "loss": 0.8767763376235962, + "step": 2830 + }, + { + "epoch": 0.5231028628841334, + "grad_norm": 0.0684783011674881, + "learning_rate": 1.8928287435087568e-05, + "loss": 0.5474317073822021, + "step": 2831 + }, + { + "epoch": 0.5232876395930293, + "grad_norm": 0.07589972019195557, + "learning_rate": 1.8927388754936368e-05, + "loss": 0.710189938545227, + "step": 2832 + }, + { + "epoch": 0.5234724163019251, + "grad_norm": 0.08322039991617203, + "learning_rate": 1.8926489719503025e-05, + "loss": 0.7560164928436279, + "step": 2833 + }, + { + "epoch": 0.523657193010821, + "grad_norm": 0.08088655769824982, + "learning_rate": 1.892559032882332e-05, + "loss": 0.7506522536277771, + "step": 2834 + }, + { + "epoch": 0.5238419697197169, + "grad_norm": 0.083648681640625, + "learning_rate": 1.8924690582933043e-05, + "loss": 0.8052482008934021, + "step": 2835 + }, + { + "epoch": 0.5240267464286127, + "grad_norm": 0.05375261977314949, + "learning_rate": 1.8923790481868e-05, + "loss": 0.5737895369529724, + "step": 2836 + }, + { + "epoch": 0.5242115231375085, + "grad_norm": 0.07327893376350403, + "learning_rate": 1.8922890025664018e-05, + "loss": 0.6785560846328735, + "step": 2837 + }, + { + "epoch": 0.5243962998464043, + "grad_norm": 0.07243496924638748, + "learning_rate": 1.892198921435693e-05, + "loss": 0.6918960809707642, + "step": 2838 + }, + { + "epoch": 0.5245810765553002, + "grad_norm": 0.07730116695165634, + "learning_rate": 1.8921088047982585e-05, + "loss": 0.6704845428466797, + "step": 2839 + }, + { + "epoch": 0.524765853264196, + "grad_norm": 0.06954976171255112, + "learning_rate": 1.8920186526576843e-05, + "loss": 0.6522843241691589, + "step": 2840 + }, + { + "epoch": 0.5249506299730919, + "grad_norm": 0.09051687270402908, + "learning_rate": 1.8919284650175585e-05, + "loss": 0.8548020720481873, + "step": 2841 + }, + { + "epoch": 0.5251354066819878, + "grad_norm": 0.055443525314331055, + "learning_rate": 1.8918382418814705e-05, + "loss": 0.5138656497001648, + "step": 2842 + }, + { + "epoch": 0.5253201833908836, + "grad_norm": 0.08884968608617783, + "learning_rate": 1.8917479832530102e-05, + "loss": 0.8936039805412292, + "step": 2843 + }, + { + "epoch": 0.5255049600997794, + "grad_norm": 0.066892109811306, + "learning_rate": 1.8916576891357706e-05, + "loss": 0.6073099374771118, + "step": 2844 + }, + { + "epoch": 0.5256897368086753, + "grad_norm": 0.06852417439222336, + "learning_rate": 1.8915673595333443e-05, + "loss": 0.6002171039581299, + "step": 2845 + }, + { + "epoch": 0.5258745135175711, + "grad_norm": 0.06942515820264816, + "learning_rate": 1.891476994449327e-05, + "loss": 0.6562768220901489, + "step": 2846 + }, + { + "epoch": 0.5260592902264669, + "grad_norm": 0.08007383346557617, + "learning_rate": 1.8913865938873138e-05, + "loss": 0.63433438539505, + "step": 2847 + }, + { + "epoch": 0.5262440669353627, + "grad_norm": 0.07730159908533096, + "learning_rate": 1.8912961578509032e-05, + "loss": 0.6335527896881104, + "step": 2848 + }, + { + "epoch": 0.5264288436442587, + "grad_norm": 0.09433772414922714, + "learning_rate": 1.891205686343694e-05, + "loss": 1.0060772895812988, + "step": 2849 + }, + { + "epoch": 0.5266136203531545, + "grad_norm": 0.07724204659461975, + "learning_rate": 1.891115179369287e-05, + "loss": 0.6215003728866577, + "step": 2850 + }, + { + "epoch": 0.5267983970620503, + "grad_norm": 0.07483768463134766, + "learning_rate": 1.8910246369312833e-05, + "loss": 0.6356710195541382, + "step": 2851 + }, + { + "epoch": 0.5269831737709462, + "grad_norm": 0.05760635435581207, + "learning_rate": 1.8909340590332868e-05, + "loss": 0.42942285537719727, + "step": 2852 + }, + { + "epoch": 0.527167950479842, + "grad_norm": 0.0790332779288292, + "learning_rate": 1.8908434456789022e-05, + "loss": 0.5897369980812073, + "step": 2853 + }, + { + "epoch": 0.5273527271887378, + "grad_norm": 0.06736014038324356, + "learning_rate": 1.8907527968717357e-05, + "loss": 0.5854964852333069, + "step": 2854 + }, + { + "epoch": 0.5275375038976337, + "grad_norm": 0.07692205905914307, + "learning_rate": 1.8906621126153947e-05, + "loss": 0.7592504024505615, + "step": 2855 + }, + { + "epoch": 0.5277222806065296, + "grad_norm": 0.08579183369874954, + "learning_rate": 1.8905713929134878e-05, + "loss": 0.6285127401351929, + "step": 2856 + }, + { + "epoch": 0.5279070573154254, + "grad_norm": 0.062469176948070526, + "learning_rate": 1.890480637769626e-05, + "loss": 0.5253337025642395, + "step": 2857 + }, + { + "epoch": 0.5280918340243212, + "grad_norm": 0.05412564054131508, + "learning_rate": 1.8903898471874206e-05, + "loss": 0.4102505147457123, + "step": 2858 + }, + { + "epoch": 0.5282766107332171, + "grad_norm": 0.06502963602542877, + "learning_rate": 1.890299021170485e-05, + "loss": 0.5365229845046997, + "step": 2859 + }, + { + "epoch": 0.5284613874421129, + "grad_norm": 0.0952506884932518, + "learning_rate": 1.8902081597224338e-05, + "loss": 0.8815677165985107, + "step": 2860 + }, + { + "epoch": 0.5286461641510087, + "grad_norm": 0.06655575335025787, + "learning_rate": 1.8901172628468833e-05, + "loss": 0.569294273853302, + "step": 2861 + }, + { + "epoch": 0.5288309408599046, + "grad_norm": 0.06547219306230545, + "learning_rate": 1.89002633054745e-05, + "loss": 0.477306604385376, + "step": 2862 + }, + { + "epoch": 0.5290157175688005, + "grad_norm": 0.06068910285830498, + "learning_rate": 1.8899353628277536e-05, + "loss": 0.6271467208862305, + "step": 2863 + }, + { + "epoch": 0.5292004942776963, + "grad_norm": 0.07076477259397507, + "learning_rate": 1.8898443596914136e-05, + "loss": 0.605965793132782, + "step": 2864 + }, + { + "epoch": 0.5293852709865922, + "grad_norm": 0.07341812551021576, + "learning_rate": 1.8897533211420525e-05, + "loss": 0.9426294565200806, + "step": 2865 + }, + { + "epoch": 0.529570047695488, + "grad_norm": 0.07296188175678253, + "learning_rate": 1.8896622471832925e-05, + "loss": 0.612720787525177, + "step": 2866 + }, + { + "epoch": 0.5297548244043838, + "grad_norm": 0.07263094931840897, + "learning_rate": 1.889571137818759e-05, + "loss": 0.5333542823791504, + "step": 2867 + }, + { + "epoch": 0.5299396011132796, + "grad_norm": 0.08414128422737122, + "learning_rate": 1.8894799930520768e-05, + "loss": 0.7564873695373535, + "step": 2868 + }, + { + "epoch": 0.5301243778221755, + "grad_norm": 0.12541791796684265, + "learning_rate": 1.889388812886874e-05, + "loss": 0.8672096729278564, + "step": 2869 + }, + { + "epoch": 0.5303091545310714, + "grad_norm": 0.07895677536725998, + "learning_rate": 1.8892975973267787e-05, + "loss": 0.6737049221992493, + "step": 2870 + }, + { + "epoch": 0.5304939312399672, + "grad_norm": 0.06124429777264595, + "learning_rate": 1.8892063463754215e-05, + "loss": 0.5795494318008423, + "step": 2871 + }, + { + "epoch": 0.5306787079488631, + "grad_norm": 0.07113495469093323, + "learning_rate": 1.8891150600364342e-05, + "loss": 0.6831026077270508, + "step": 2872 + }, + { + "epoch": 0.5308634846577589, + "grad_norm": 0.08000877499580383, + "learning_rate": 1.8890237383134485e-05, + "loss": 0.7559480667114258, + "step": 2873 + }, + { + "epoch": 0.5310482613666547, + "grad_norm": 0.08119085431098938, + "learning_rate": 1.8889323812100995e-05, + "loss": 0.8790738582611084, + "step": 2874 + }, + { + "epoch": 0.5312330380755506, + "grad_norm": 0.06821675598621368, + "learning_rate": 1.888840988730023e-05, + "loss": 0.5029259920120239, + "step": 2875 + }, + { + "epoch": 0.5314178147844464, + "grad_norm": 0.06950125098228455, + "learning_rate": 1.8887495608768557e-05, + "loss": 0.5709627866744995, + "step": 2876 + }, + { + "epoch": 0.5316025914933422, + "grad_norm": 0.05820643901824951, + "learning_rate": 1.888658097654237e-05, + "loss": 0.4662620723247528, + "step": 2877 + }, + { + "epoch": 0.5317873682022382, + "grad_norm": 0.09406157582998276, + "learning_rate": 1.8885665990658055e-05, + "loss": 0.7106308341026306, + "step": 2878 + }, + { + "epoch": 0.531972144911134, + "grad_norm": 0.06230054423213005, + "learning_rate": 1.8884750651152037e-05, + "loss": 0.5774460434913635, + "step": 2879 + }, + { + "epoch": 0.5321569216200298, + "grad_norm": 0.0671701729297638, + "learning_rate": 1.8883834958060742e-05, + "loss": 0.5192086100578308, + "step": 2880 + }, + { + "epoch": 0.5323416983289256, + "grad_norm": 0.0866723582148552, + "learning_rate": 1.888291891142061e-05, + "loss": 0.6210560202598572, + "step": 2881 + }, + { + "epoch": 0.5325264750378215, + "grad_norm": 0.06697280704975128, + "learning_rate": 1.8882002511268093e-05, + "loss": 0.667514443397522, + "step": 2882 + }, + { + "epoch": 0.5327112517467173, + "grad_norm": 0.047818925231695175, + "learning_rate": 1.8881085757639662e-05, + "loss": 0.4015243351459503, + "step": 2883 + }, + { + "epoch": 0.5328960284556131, + "grad_norm": 0.06965786963701248, + "learning_rate": 1.8880168650571805e-05, + "loss": 0.6664662957191467, + "step": 2884 + }, + { + "epoch": 0.5330808051645091, + "grad_norm": 0.07071227580308914, + "learning_rate": 1.8879251190101024e-05, + "loss": 0.6189071536064148, + "step": 2885 + }, + { + "epoch": 0.5332655818734049, + "grad_norm": 0.07223183661699295, + "learning_rate": 1.8878333376263818e-05, + "loss": 0.6490969657897949, + "step": 2886 + }, + { + "epoch": 0.5334503585823007, + "grad_norm": 0.06491070985794067, + "learning_rate": 1.8877415209096725e-05, + "loss": 0.7136419415473938, + "step": 2887 + }, + { + "epoch": 0.5336351352911965, + "grad_norm": 0.06153097003698349, + "learning_rate": 1.887649668863628e-05, + "loss": 0.7171862125396729, + "step": 2888 + }, + { + "epoch": 0.5338199120000924, + "grad_norm": 0.08631009608507156, + "learning_rate": 1.8875577814919035e-05, + "loss": 0.7024433612823486, + "step": 2889 + }, + { + "epoch": 0.5340046887089882, + "grad_norm": 0.06136897951364517, + "learning_rate": 1.8874658587981563e-05, + "loss": 0.542658805847168, + "step": 2890 + }, + { + "epoch": 0.534189465417884, + "grad_norm": 0.059265486896038055, + "learning_rate": 1.8873739007860444e-05, + "loss": 0.42815086245536804, + "step": 2891 + }, + { + "epoch": 0.53437424212678, + "grad_norm": 0.06907325237989426, + "learning_rate": 1.8872819074592275e-05, + "loss": 0.5861951112747192, + "step": 2892 + }, + { + "epoch": 0.5345590188356758, + "grad_norm": 0.08019591122865677, + "learning_rate": 1.8871898788213667e-05, + "loss": 0.7540650963783264, + "step": 2893 + }, + { + "epoch": 0.5347437955445716, + "grad_norm": 0.06720145046710968, + "learning_rate": 1.8870978148761245e-05, + "loss": 0.6117087602615356, + "step": 2894 + }, + { + "epoch": 0.5349285722534675, + "grad_norm": 0.06779257208108902, + "learning_rate": 1.8870057156271643e-05, + "loss": 0.504643976688385, + "step": 2895 + }, + { + "epoch": 0.5351133489623633, + "grad_norm": 0.06835329532623291, + "learning_rate": 1.8869135810781517e-05, + "loss": 0.48108264803886414, + "step": 2896 + }, + { + "epoch": 0.5352981256712591, + "grad_norm": 0.08525102585554123, + "learning_rate": 1.8868214112327538e-05, + "loss": 0.7447647452354431, + "step": 2897 + }, + { + "epoch": 0.535482902380155, + "grad_norm": 0.06674760580062866, + "learning_rate": 1.8867292060946378e-05, + "loss": 0.499419629573822, + "step": 2898 + }, + { + "epoch": 0.5356676790890508, + "grad_norm": 0.055783241987228394, + "learning_rate": 1.886636965667474e-05, + "loss": 0.49741414189338684, + "step": 2899 + }, + { + "epoch": 0.5358524557979467, + "grad_norm": 0.08357007801532745, + "learning_rate": 1.8865446899549322e-05, + "loss": 0.7474427223205566, + "step": 2900 + }, + { + "epoch": 0.5360372325068425, + "grad_norm": 0.10168084502220154, + "learning_rate": 1.886452378960686e-05, + "loss": 0.876984715461731, + "step": 2901 + }, + { + "epoch": 0.5362220092157384, + "grad_norm": 0.07848334312438965, + "learning_rate": 1.8863600326884085e-05, + "loss": 0.7350624203681946, + "step": 2902 + }, + { + "epoch": 0.5364067859246342, + "grad_norm": 0.06975662708282471, + "learning_rate": 1.8862676511417747e-05, + "loss": 0.6788052320480347, + "step": 2903 + }, + { + "epoch": 0.53659156263353, + "grad_norm": 0.09710551053285599, + "learning_rate": 1.886175234324461e-05, + "loss": 0.826421320438385, + "step": 2904 + }, + { + "epoch": 0.5367763393424259, + "grad_norm": 0.08636506646871567, + "learning_rate": 1.8860827822401454e-05, + "loss": 0.8522170186042786, + "step": 2905 + }, + { + "epoch": 0.5369611160513217, + "grad_norm": 0.061069127172231674, + "learning_rate": 1.8859902948925076e-05, + "loss": 0.6208251714706421, + "step": 2906 + }, + { + "epoch": 0.5371458927602176, + "grad_norm": 0.07857941836118698, + "learning_rate": 1.8858977722852273e-05, + "loss": 0.6387959718704224, + "step": 2907 + }, + { + "epoch": 0.5373306694691135, + "grad_norm": 0.07378064841032028, + "learning_rate": 1.885805214421988e-05, + "loss": 0.7660172581672668, + "step": 2908 + }, + { + "epoch": 0.5375154461780093, + "grad_norm": 0.050828199833631516, + "learning_rate": 1.885712621306472e-05, + "loss": 0.3771149814128876, + "step": 2909 + }, + { + "epoch": 0.5377002228869051, + "grad_norm": 0.07281278073787689, + "learning_rate": 1.885619992942365e-05, + "loss": 0.5961358547210693, + "step": 2910 + }, + { + "epoch": 0.5378849995958009, + "grad_norm": 0.08137860894203186, + "learning_rate": 1.8855273293333532e-05, + "loss": 0.7484938502311707, + "step": 2911 + }, + { + "epoch": 0.5380697763046968, + "grad_norm": 0.0633959174156189, + "learning_rate": 1.8854346304831236e-05, + "loss": 0.553483784198761, + "step": 2912 + }, + { + "epoch": 0.5382545530135926, + "grad_norm": 0.08014381676912308, + "learning_rate": 1.8853418963953666e-05, + "loss": 0.6085349917411804, + "step": 2913 + }, + { + "epoch": 0.5384393297224885, + "grad_norm": 0.05916145443916321, + "learning_rate": 1.8852491270737715e-05, + "loss": 0.5331775546073914, + "step": 2914 + }, + { + "epoch": 0.5386241064313844, + "grad_norm": 0.07928591966629028, + "learning_rate": 1.8851563225220307e-05, + "loss": 0.7232125997543335, + "step": 2915 + }, + { + "epoch": 0.5388088831402802, + "grad_norm": 0.08045277744531631, + "learning_rate": 1.8850634827438377e-05, + "loss": 0.7127057313919067, + "step": 2916 + }, + { + "epoch": 0.538993659849176, + "grad_norm": 0.06988881528377533, + "learning_rate": 1.8849706077428874e-05, + "loss": 0.6256963014602661, + "step": 2917 + }, + { + "epoch": 0.5391784365580718, + "grad_norm": 0.06709430366754532, + "learning_rate": 1.884877697522875e-05, + "loss": 0.5660346746444702, + "step": 2918 + }, + { + "epoch": 0.5393632132669677, + "grad_norm": 0.07249153405427933, + "learning_rate": 1.884784752087499e-05, + "loss": 0.5950549244880676, + "step": 2919 + }, + { + "epoch": 0.5395479899758635, + "grad_norm": 0.07752339541912079, + "learning_rate": 1.884691771440458e-05, + "loss": 0.6727233529090881, + "step": 2920 + }, + { + "epoch": 0.5397327666847593, + "grad_norm": 0.05704038217663765, + "learning_rate": 1.8845987555854526e-05, + "loss": 0.49041783809661865, + "step": 2921 + }, + { + "epoch": 0.5399175433936553, + "grad_norm": 0.08285940438508987, + "learning_rate": 1.884505704526184e-05, + "loss": 0.7150740623474121, + "step": 2922 + }, + { + "epoch": 0.5401023201025511, + "grad_norm": 0.07266629487276077, + "learning_rate": 1.8844126182663552e-05, + "loss": 0.6662338972091675, + "step": 2923 + }, + { + "epoch": 0.5402870968114469, + "grad_norm": 0.07145297527313232, + "learning_rate": 1.884319496809672e-05, + "loss": 0.6038743257522583, + "step": 2924 + }, + { + "epoch": 0.5404718735203428, + "grad_norm": 0.0714673399925232, + "learning_rate": 1.884226340159839e-05, + "loss": 0.7645812034606934, + "step": 2925 + }, + { + "epoch": 0.5406566502292386, + "grad_norm": 0.06393370777368546, + "learning_rate": 1.8841331483205642e-05, + "loss": 0.4089282751083374, + "step": 2926 + }, + { + "epoch": 0.5408414269381344, + "grad_norm": 0.09627486765384674, + "learning_rate": 1.884039921295556e-05, + "loss": 0.7667011618614197, + "step": 2927 + }, + { + "epoch": 0.5410262036470302, + "grad_norm": 0.07943038642406464, + "learning_rate": 1.8839466590885253e-05, + "loss": 0.7740654349327087, + "step": 2928 + }, + { + "epoch": 0.5412109803559262, + "grad_norm": 0.0655045360326767, + "learning_rate": 1.8838533617031826e-05, + "loss": 0.6008351445198059, + "step": 2929 + }, + { + "epoch": 0.541395757064822, + "grad_norm": 0.06013663485646248, + "learning_rate": 1.8837600291432413e-05, + "loss": 0.4785308539867401, + "step": 2930 + }, + { + "epoch": 0.5415805337737178, + "grad_norm": 0.05999566987156868, + "learning_rate": 1.8836666614124158e-05, + "loss": 0.4873706102371216, + "step": 2931 + }, + { + "epoch": 0.5417653104826137, + "grad_norm": 0.09239016473293304, + "learning_rate": 1.8835732585144218e-05, + "loss": 0.9298089146614075, + "step": 2932 + }, + { + "epoch": 0.5419500871915095, + "grad_norm": 0.06117738410830498, + "learning_rate": 1.883479820452977e-05, + "loss": 0.5474444031715393, + "step": 2933 + }, + { + "epoch": 0.5421348639004053, + "grad_norm": 0.08043549954891205, + "learning_rate": 1.8833863472317984e-05, + "loss": 0.7750460505485535, + "step": 2934 + }, + { + "epoch": 0.5423196406093012, + "grad_norm": 0.09016053378582001, + "learning_rate": 1.8832928388546075e-05, + "loss": 0.7897309064865112, + "step": 2935 + }, + { + "epoch": 0.5425044173181971, + "grad_norm": 0.06423919647932053, + "learning_rate": 1.883199295325125e-05, + "loss": 0.4962039887905121, + "step": 2936 + }, + { + "epoch": 0.5426891940270929, + "grad_norm": 0.09071557223796844, + "learning_rate": 1.883105716647074e-05, + "loss": 0.9806622862815857, + "step": 2937 + }, + { + "epoch": 0.5428739707359888, + "grad_norm": 0.07148994505405426, + "learning_rate": 1.883012102824178e-05, + "loss": 0.7550164461135864, + "step": 2938 + }, + { + "epoch": 0.5430587474448846, + "grad_norm": 0.08958619832992554, + "learning_rate": 1.882918453860163e-05, + "loss": 0.7683021426200867, + "step": 2939 + }, + { + "epoch": 0.5432435241537804, + "grad_norm": 0.0824618861079216, + "learning_rate": 1.882824769758756e-05, + "loss": 0.6786718368530273, + "step": 2940 + }, + { + "epoch": 0.5434283008626762, + "grad_norm": 0.05214720591902733, + "learning_rate": 1.882731050523685e-05, + "loss": 0.4915755093097687, + "step": 2941 + }, + { + "epoch": 0.5436130775715721, + "grad_norm": 0.06492382287979126, + "learning_rate": 1.88263729615868e-05, + "loss": 0.6302931308746338, + "step": 2942 + }, + { + "epoch": 0.5437978542804679, + "grad_norm": 0.0823633000254631, + "learning_rate": 1.882543506667472e-05, + "loss": 0.6546368598937988, + "step": 2943 + }, + { + "epoch": 0.5439826309893638, + "grad_norm": 0.0699317455291748, + "learning_rate": 1.8824496820537934e-05, + "loss": 0.5268377661705017, + "step": 2944 + }, + { + "epoch": 0.5441674076982597, + "grad_norm": 0.05576154962182045, + "learning_rate": 1.8823558223213787e-05, + "loss": 0.4686092734336853, + "step": 2945 + }, + { + "epoch": 0.5443521844071555, + "grad_norm": 0.06533041596412659, + "learning_rate": 1.8822619274739623e-05, + "loss": 0.4361056089401245, + "step": 2946 + }, + { + "epoch": 0.5445369611160513, + "grad_norm": 0.08646456152200699, + "learning_rate": 1.882167997515282e-05, + "loss": 0.8023089170455933, + "step": 2947 + }, + { + "epoch": 0.5447217378249471, + "grad_norm": 0.0743655264377594, + "learning_rate": 1.8820740324490747e-05, + "loss": 0.645487368106842, + "step": 2948 + }, + { + "epoch": 0.544906514533843, + "grad_norm": 0.0705256536602974, + "learning_rate": 1.8819800322790808e-05, + "loss": 0.6143032312393188, + "step": 2949 + }, + { + "epoch": 0.5450912912427388, + "grad_norm": 0.0705714076757431, + "learning_rate": 1.8818859970090414e-05, + "loss": 0.6757660508155823, + "step": 2950 + }, + { + "epoch": 0.5452760679516347, + "grad_norm": 0.07375648617744446, + "learning_rate": 1.8817919266426977e-05, + "loss": 0.6835747361183167, + "step": 2951 + }, + { + "epoch": 0.5454608446605306, + "grad_norm": 0.07369264960289001, + "learning_rate": 1.8816978211837945e-05, + "loss": 0.6894022226333618, + "step": 2952 + }, + { + "epoch": 0.5456456213694264, + "grad_norm": 0.08359493315219879, + "learning_rate": 1.8816036806360766e-05, + "loss": 0.6959936618804932, + "step": 2953 + }, + { + "epoch": 0.5458303980783222, + "grad_norm": 0.08166102319955826, + "learning_rate": 1.88150950500329e-05, + "loss": 0.7394427061080933, + "step": 2954 + }, + { + "epoch": 0.5460151747872181, + "grad_norm": 0.06949790567159653, + "learning_rate": 1.881415294289183e-05, + "loss": 0.6202840209007263, + "step": 2955 + }, + { + "epoch": 0.5461999514961139, + "grad_norm": 0.07749205827713013, + "learning_rate": 1.8813210484975055e-05, + "loss": 0.5997181534767151, + "step": 2956 + }, + { + "epoch": 0.5463847282050097, + "grad_norm": 0.06218162178993225, + "learning_rate": 1.881226767632007e-05, + "loss": 0.611036479473114, + "step": 2957 + }, + { + "epoch": 0.5465695049139057, + "grad_norm": 0.09652518481016159, + "learning_rate": 1.8811324516964404e-05, + "loss": 0.8997894525527954, + "step": 2958 + }, + { + "epoch": 0.5467542816228015, + "grad_norm": 0.07064218819141388, + "learning_rate": 1.881038100694559e-05, + "loss": 0.5235415101051331, + "step": 2959 + }, + { + "epoch": 0.5469390583316973, + "grad_norm": 0.09446271508932114, + "learning_rate": 1.880943714630117e-05, + "loss": 0.8649404644966125, + "step": 2960 + }, + { + "epoch": 0.5471238350405931, + "grad_norm": 0.06781157851219177, + "learning_rate": 1.880849293506872e-05, + "loss": 0.6357306241989136, + "step": 2961 + }, + { + "epoch": 0.547308611749489, + "grad_norm": 0.06145769730210304, + "learning_rate": 1.8807548373285808e-05, + "loss": 0.5617401003837585, + "step": 2962 + }, + { + "epoch": 0.5474933884583848, + "grad_norm": 0.05609019100666046, + "learning_rate": 1.8806603460990023e-05, + "loss": 0.48635318875312805, + "step": 2963 + }, + { + "epoch": 0.5476781651672806, + "grad_norm": 0.0804615467786789, + "learning_rate": 1.8805658198218975e-05, + "loss": 0.714273989200592, + "step": 2964 + }, + { + "epoch": 0.5478629418761765, + "grad_norm": 0.062169257551431656, + "learning_rate": 1.8804712585010277e-05, + "loss": 0.4738171100616455, + "step": 2965 + }, + { + "epoch": 0.5480477185850724, + "grad_norm": 0.0890984758734703, + "learning_rate": 1.880376662140157e-05, + "loss": 1.014730453491211, + "step": 2966 + }, + { + "epoch": 0.5482324952939682, + "grad_norm": 0.08369938284158707, + "learning_rate": 1.880282030743049e-05, + "loss": 0.6934597492218018, + "step": 2967 + }, + { + "epoch": 0.548417272002864, + "grad_norm": 0.05199027433991432, + "learning_rate": 1.8801873643134705e-05, + "loss": 0.3649899959564209, + "step": 2968 + }, + { + "epoch": 0.5486020487117599, + "grad_norm": 0.07019580900669098, + "learning_rate": 1.8800926628551884e-05, + "loss": 0.6902180910110474, + "step": 2969 + }, + { + "epoch": 0.5487868254206557, + "grad_norm": 0.051402270793914795, + "learning_rate": 1.8799979263719722e-05, + "loss": 0.396835595369339, + "step": 2970 + }, + { + "epoch": 0.5489716021295515, + "grad_norm": 0.08352036774158478, + "learning_rate": 1.879903154867591e-05, + "loss": 0.7998559474945068, + "step": 2971 + }, + { + "epoch": 0.5491563788384474, + "grad_norm": 0.07527686655521393, + "learning_rate": 1.879808348345818e-05, + "loss": 0.5918179154396057, + "step": 2972 + }, + { + "epoch": 0.5493411555473433, + "grad_norm": 0.10220187157392502, + "learning_rate": 1.8797135068104247e-05, + "loss": 0.9950894713401794, + "step": 2973 + }, + { + "epoch": 0.5495259322562391, + "grad_norm": 0.09731113165616989, + "learning_rate": 1.879618630265186e-05, + "loss": 0.7071390151977539, + "step": 2974 + }, + { + "epoch": 0.549710708965135, + "grad_norm": 0.06815184652805328, + "learning_rate": 1.879523718713878e-05, + "loss": 0.5961635112762451, + "step": 2975 + }, + { + "epoch": 0.5498954856740308, + "grad_norm": 0.08032702654600143, + "learning_rate": 1.879428772160278e-05, + "loss": 0.7274848818778992, + "step": 2976 + }, + { + "epoch": 0.5500802623829266, + "grad_norm": 0.06666399538516998, + "learning_rate": 1.879333790608164e-05, + "loss": 0.5938247442245483, + "step": 2977 + }, + { + "epoch": 0.5502650390918224, + "grad_norm": 0.07029084116220474, + "learning_rate": 1.8792387740613162e-05, + "loss": 0.6550891995429993, + "step": 2978 + }, + { + "epoch": 0.5504498158007183, + "grad_norm": 0.07481728494167328, + "learning_rate": 1.8791437225235157e-05, + "loss": 0.6303069591522217, + "step": 2979 + }, + { + "epoch": 0.5506345925096142, + "grad_norm": 0.07247333973646164, + "learning_rate": 1.8790486359985456e-05, + "loss": 0.5467405915260315, + "step": 2980 + }, + { + "epoch": 0.55081936921851, + "grad_norm": 0.06596650183200836, + "learning_rate": 1.8789535144901902e-05, + "loss": 0.7341605424880981, + "step": 2981 + }, + { + "epoch": 0.5510041459274059, + "grad_norm": 0.06992919743061066, + "learning_rate": 1.8788583580022347e-05, + "loss": 0.6399144530296326, + "step": 2982 + }, + { + "epoch": 0.5511889226363017, + "grad_norm": 0.08813784271478653, + "learning_rate": 1.8787631665384666e-05, + "loss": 0.7428203225135803, + "step": 2983 + }, + { + "epoch": 0.5513736993451975, + "grad_norm": 0.06808775663375854, + "learning_rate": 1.878667940102673e-05, + "loss": 0.595568835735321, + "step": 2984 + }, + { + "epoch": 0.5515584760540934, + "grad_norm": 0.08083754032850266, + "learning_rate": 1.8785726786986446e-05, + "loss": 0.7247161269187927, + "step": 2985 + }, + { + "epoch": 0.5517432527629892, + "grad_norm": 0.07417766749858856, + "learning_rate": 1.8784773823301726e-05, + "loss": 0.6404522061347961, + "step": 2986 + }, + { + "epoch": 0.551928029471885, + "grad_norm": 0.06823204457759857, + "learning_rate": 1.878382051001049e-05, + "loss": 0.6214050650596619, + "step": 2987 + }, + { + "epoch": 0.552112806180781, + "grad_norm": 0.0789300948381424, + "learning_rate": 1.878286684715068e-05, + "loss": 0.7323463559150696, + "step": 2988 + }, + { + "epoch": 0.5522975828896768, + "grad_norm": 0.09078711271286011, + "learning_rate": 1.8781912834760246e-05, + "loss": 0.6080383062362671, + "step": 2989 + }, + { + "epoch": 0.5524823595985726, + "grad_norm": 0.06681746989488602, + "learning_rate": 1.8780958472877156e-05, + "loss": 0.5171971917152405, + "step": 2990 + }, + { + "epoch": 0.5526671363074684, + "grad_norm": 0.055142708122730255, + "learning_rate": 1.8780003761539392e-05, + "loss": 0.4934493601322174, + "step": 2991 + }, + { + "epoch": 0.5528519130163643, + "grad_norm": 0.0705462172627449, + "learning_rate": 1.877904870078495e-05, + "loss": 0.6238037347793579, + "step": 2992 + }, + { + "epoch": 0.5530366897252601, + "grad_norm": 0.06349740922451019, + "learning_rate": 1.877809329065183e-05, + "loss": 0.6783521175384521, + "step": 2993 + }, + { + "epoch": 0.5532214664341559, + "grad_norm": 0.06308883428573608, + "learning_rate": 1.8777137531178066e-05, + "loss": 0.4816989004611969, + "step": 2994 + }, + { + "epoch": 0.5534062431430519, + "grad_norm": 0.08100791275501251, + "learning_rate": 1.8776181422401683e-05, + "loss": 0.739810585975647, + "step": 2995 + }, + { + "epoch": 0.5535910198519477, + "grad_norm": 0.07574406266212463, + "learning_rate": 1.8775224964360738e-05, + "loss": 0.5386341214179993, + "step": 2996 + }, + { + "epoch": 0.5537757965608435, + "grad_norm": 0.08601616322994232, + "learning_rate": 1.8774268157093295e-05, + "loss": 0.6581635475158691, + "step": 2997 + }, + { + "epoch": 0.5539605732697394, + "grad_norm": 0.07239647209644318, + "learning_rate": 1.877331100063743e-05, + "loss": 0.6122103929519653, + "step": 2998 + }, + { + "epoch": 0.5541453499786352, + "grad_norm": 0.07645408809185028, + "learning_rate": 1.8772353495031236e-05, + "loss": 0.6776065826416016, + "step": 2999 + }, + { + "epoch": 0.554330126687531, + "grad_norm": 0.08867352455854416, + "learning_rate": 1.877139564031282e-05, + "loss": 0.7638838291168213, + "step": 3000 + }, + { + "epoch": 0.554330126687531, + "eval_loss": 0.6894482374191284, + "eval_runtime": 157.2981, + "eval_samples_per_second": 115.888, + "eval_steps_per_second": 14.488, + "step": 3000 + }, + { + "epoch": 0.5545149033964268, + "grad_norm": 0.09155572205781937, + "learning_rate": 1.8770437436520293e-05, + "loss": 0.9210847616195679, + "step": 3001 + }, + { + "epoch": 0.5546996801053228, + "grad_norm": 0.07019831240177155, + "learning_rate": 1.87694788836918e-05, + "loss": 0.6158140897750854, + "step": 3002 + }, + { + "epoch": 0.5548844568142186, + "grad_norm": 0.055636219680309296, + "learning_rate": 1.8768519981865485e-05, + "loss": 0.5288290977478027, + "step": 3003 + }, + { + "epoch": 0.5550692335231144, + "grad_norm": 0.07038795202970505, + "learning_rate": 1.8767560731079504e-05, + "loss": 0.5546280145645142, + "step": 3004 + }, + { + "epoch": 0.5552540102320103, + "grad_norm": 0.07437584549188614, + "learning_rate": 1.876660113137204e-05, + "loss": 0.7176721096038818, + "step": 3005 + }, + { + "epoch": 0.5554387869409061, + "grad_norm": 0.06974802166223526, + "learning_rate": 1.8765641182781274e-05, + "loss": 0.8900144100189209, + "step": 3006 + }, + { + "epoch": 0.5556235636498019, + "grad_norm": 0.07461828738451004, + "learning_rate": 1.8764680885345415e-05, + "loss": 0.6833454966545105, + "step": 3007 + }, + { + "epoch": 0.5558083403586978, + "grad_norm": 0.09632608294487, + "learning_rate": 1.8763720239102682e-05, + "loss": 0.8168778419494629, + "step": 3008 + }, + { + "epoch": 0.5559931170675936, + "grad_norm": 0.06430435180664062, + "learning_rate": 1.8762759244091294e-05, + "loss": 0.48340433835983276, + "step": 3009 + }, + { + "epoch": 0.5561778937764895, + "grad_norm": 0.06506127864122391, + "learning_rate": 1.876179790034951e-05, + "loss": 0.6659327149391174, + "step": 3010 + }, + { + "epoch": 0.5563626704853853, + "grad_norm": 0.0728253498673439, + "learning_rate": 1.8760836207915577e-05, + "loss": 0.6238194704055786, + "step": 3011 + }, + { + "epoch": 0.5565474471942812, + "grad_norm": 0.0691649317741394, + "learning_rate": 1.8759874166827773e-05, + "loss": 0.699463963508606, + "step": 3012 + }, + { + "epoch": 0.556732223903177, + "grad_norm": 0.07036525011062622, + "learning_rate": 1.8758911777124385e-05, + "loss": 0.6164835691452026, + "step": 3013 + }, + { + "epoch": 0.5569170006120728, + "grad_norm": 0.07765907049179077, + "learning_rate": 1.875794903884371e-05, + "loss": 0.5402286648750305, + "step": 3014 + }, + { + "epoch": 0.5571017773209687, + "grad_norm": 0.06866981834173203, + "learning_rate": 1.8756985952024066e-05, + "loss": 0.5985514521598816, + "step": 3015 + }, + { + "epoch": 0.5572865540298645, + "grad_norm": 0.0924587994813919, + "learning_rate": 1.8756022516703774e-05, + "loss": 0.6655313968658447, + "step": 3016 + }, + { + "epoch": 0.5574713307387604, + "grad_norm": 0.06619324535131454, + "learning_rate": 1.875505873292118e-05, + "loss": 0.6115932464599609, + "step": 3017 + }, + { + "epoch": 0.5576561074476563, + "grad_norm": 0.06452200561761856, + "learning_rate": 1.8754094600714646e-05, + "loss": 0.5450788140296936, + "step": 3018 + }, + { + "epoch": 0.5578408841565521, + "grad_norm": 0.08582423627376556, + "learning_rate": 1.875313012012253e-05, + "loss": 0.7833226919174194, + "step": 3019 + }, + { + "epoch": 0.5580256608654479, + "grad_norm": 0.09632756561040878, + "learning_rate": 1.8752165291183216e-05, + "loss": 0.9585537314414978, + "step": 3020 + }, + { + "epoch": 0.5582104375743437, + "grad_norm": 0.08630585670471191, + "learning_rate": 1.8751200113935114e-05, + "loss": 0.8109465837478638, + "step": 3021 + }, + { + "epoch": 0.5583952142832396, + "grad_norm": 0.07621660083532333, + "learning_rate": 1.8750234588416623e-05, + "loss": 0.6150826811790466, + "step": 3022 + }, + { + "epoch": 0.5585799909921354, + "grad_norm": 0.08415526896715164, + "learning_rate": 1.874926871466617e-05, + "loss": 0.7688988447189331, + "step": 3023 + }, + { + "epoch": 0.5587647677010313, + "grad_norm": 0.07290530949831009, + "learning_rate": 1.8748302492722196e-05, + "loss": 0.8483543395996094, + "step": 3024 + }, + { + "epoch": 0.5589495444099272, + "grad_norm": 0.08862084150314331, + "learning_rate": 1.874733592262315e-05, + "loss": 0.6412017941474915, + "step": 3025 + }, + { + "epoch": 0.559134321118823, + "grad_norm": 0.07729244977235794, + "learning_rate": 1.8746369004407505e-05, + "loss": 0.6772671937942505, + "step": 3026 + }, + { + "epoch": 0.5593190978277188, + "grad_norm": 0.0640832781791687, + "learning_rate": 1.8745401738113737e-05, + "loss": 0.6276355385780334, + "step": 3027 + }, + { + "epoch": 0.5595038745366147, + "grad_norm": 0.0748014822602272, + "learning_rate": 1.874443412378034e-05, + "loss": 0.629840612411499, + "step": 3028 + }, + { + "epoch": 0.5596886512455105, + "grad_norm": 0.07689131051301956, + "learning_rate": 1.8743466161445823e-05, + "loss": 0.7019093632698059, + "step": 3029 + }, + { + "epoch": 0.5598734279544063, + "grad_norm": 0.0724974200129509, + "learning_rate": 1.8742497851148708e-05, + "loss": 0.6934340596199036, + "step": 3030 + }, + { + "epoch": 0.5600582046633021, + "grad_norm": 0.09675077348947525, + "learning_rate": 1.8741529192927528e-05, + "loss": 0.7328585982322693, + "step": 3031 + }, + { + "epoch": 0.5602429813721981, + "grad_norm": 0.06351233273744583, + "learning_rate": 1.8740560186820837e-05, + "loss": 0.5489311814308167, + "step": 3032 + }, + { + "epoch": 0.5604277580810939, + "grad_norm": 0.08618681132793427, + "learning_rate": 1.8739590832867197e-05, + "loss": 0.7736677527427673, + "step": 3033 + }, + { + "epoch": 0.5606125347899897, + "grad_norm": 0.0654274970293045, + "learning_rate": 1.873862113110518e-05, + "loss": 0.5988054275512695, + "step": 3034 + }, + { + "epoch": 0.5607973114988856, + "grad_norm": 0.08028063178062439, + "learning_rate": 1.8737651081573387e-05, + "loss": 0.8057206273078918, + "step": 3035 + }, + { + "epoch": 0.5609820882077814, + "grad_norm": 0.07563837617635727, + "learning_rate": 1.8736680684310415e-05, + "loss": 0.6490789651870728, + "step": 3036 + }, + { + "epoch": 0.5611668649166772, + "grad_norm": 0.09664560109376907, + "learning_rate": 1.8735709939354885e-05, + "loss": 0.9325444102287292, + "step": 3037 + }, + { + "epoch": 0.561351641625573, + "grad_norm": 0.05492691323161125, + "learning_rate": 1.8734738846745433e-05, + "loss": 0.45082294940948486, + "step": 3038 + }, + { + "epoch": 0.561536418334469, + "grad_norm": 0.0741322934627533, + "learning_rate": 1.87337674065207e-05, + "loss": 0.7102787494659424, + "step": 3039 + }, + { + "epoch": 0.5617211950433648, + "grad_norm": 0.07162413746118546, + "learning_rate": 1.8732795618719347e-05, + "loss": 0.6887521147727966, + "step": 3040 + }, + { + "epoch": 0.5619059717522606, + "grad_norm": 0.08063210546970367, + "learning_rate": 1.873182348338005e-05, + "loss": 0.7454641461372375, + "step": 3041 + }, + { + "epoch": 0.5620907484611565, + "grad_norm": 0.0753689780831337, + "learning_rate": 1.87308510005415e-05, + "loss": 0.637800931930542, + "step": 3042 + }, + { + "epoch": 0.5622755251700523, + "grad_norm": 0.07068897038698196, + "learning_rate": 1.8729878170242392e-05, + "loss": 0.594787061214447, + "step": 3043 + }, + { + "epoch": 0.5624603018789481, + "grad_norm": 0.07820609211921692, + "learning_rate": 1.8728904992521448e-05, + "loss": 0.8760103583335876, + "step": 3044 + }, + { + "epoch": 0.562645078587844, + "grad_norm": 0.06646838784217834, + "learning_rate": 1.8727931467417394e-05, + "loss": 0.46639692783355713, + "step": 3045 + }, + { + "epoch": 0.5628298552967399, + "grad_norm": 0.08322024345397949, + "learning_rate": 1.8726957594968974e-05, + "loss": 0.6693019866943359, + "step": 3046 + }, + { + "epoch": 0.5630146320056357, + "grad_norm": 0.07052835077047348, + "learning_rate": 1.8725983375214945e-05, + "loss": 0.5950911045074463, + "step": 3047 + }, + { + "epoch": 0.5631994087145316, + "grad_norm": 0.0648900717496872, + "learning_rate": 1.8725008808194074e-05, + "loss": 0.5393177270889282, + "step": 3048 + }, + { + "epoch": 0.5633841854234274, + "grad_norm": 0.0670960322022438, + "learning_rate": 1.872403389394515e-05, + "loss": 0.5252442955970764, + "step": 3049 + }, + { + "epoch": 0.5635689621323232, + "grad_norm": 0.07087778300046921, + "learning_rate": 1.8723058632506975e-05, + "loss": 0.66823810338974, + "step": 3050 + }, + { + "epoch": 0.563753738841219, + "grad_norm": 0.08115480095148087, + "learning_rate": 1.872208302391836e-05, + "loss": 0.6442320346832275, + "step": 3051 + }, + { + "epoch": 0.5639385155501149, + "grad_norm": 0.06653082370758057, + "learning_rate": 1.872110706821812e-05, + "loss": 0.6443053483963013, + "step": 3052 + }, + { + "epoch": 0.5641232922590107, + "grad_norm": 0.09376993775367737, + "learning_rate": 1.8720130765445107e-05, + "loss": 0.8187600374221802, + "step": 3053 + }, + { + "epoch": 0.5643080689679066, + "grad_norm": 0.07879412919282913, + "learning_rate": 1.8719154115638174e-05, + "loss": 0.7443960905075073, + "step": 3054 + }, + { + "epoch": 0.5644928456768025, + "grad_norm": 0.08549877256155014, + "learning_rate": 1.8718177118836185e-05, + "loss": 0.6854761242866516, + "step": 3055 + }, + { + "epoch": 0.5646776223856983, + "grad_norm": 0.0675218477845192, + "learning_rate": 1.8717199775078022e-05, + "loss": 0.7379408478736877, + "step": 3056 + }, + { + "epoch": 0.5648623990945941, + "grad_norm": 0.07826707512140274, + "learning_rate": 1.871622208440258e-05, + "loss": 0.7605068683624268, + "step": 3057 + }, + { + "epoch": 0.56504717580349, + "grad_norm": 0.07787346839904785, + "learning_rate": 1.871524404684877e-05, + "loss": 0.6818773746490479, + "step": 3058 + }, + { + "epoch": 0.5652319525123858, + "grad_norm": 0.06998469680547714, + "learning_rate": 1.871426566245551e-05, + "loss": 0.7140995860099792, + "step": 3059 + }, + { + "epoch": 0.5654167292212816, + "grad_norm": 0.07448789477348328, + "learning_rate": 1.8713286931261742e-05, + "loss": 0.6892114281654358, + "step": 3060 + }, + { + "epoch": 0.5656015059301776, + "grad_norm": 0.07089152187108994, + "learning_rate": 1.871230785330641e-05, + "loss": 0.5666224360466003, + "step": 3061 + }, + { + "epoch": 0.5657862826390734, + "grad_norm": 0.09205210208892822, + "learning_rate": 1.8711328428628492e-05, + "loss": 0.728224515914917, + "step": 3062 + }, + { + "epoch": 0.5659710593479692, + "grad_norm": 0.05926433950662613, + "learning_rate": 1.8710348657266953e-05, + "loss": 0.5452472567558289, + "step": 3063 + }, + { + "epoch": 0.566155836056865, + "grad_norm": 0.07631290704011917, + "learning_rate": 1.8709368539260785e-05, + "loss": 0.6477214694023132, + "step": 3064 + }, + { + "epoch": 0.5663406127657609, + "grad_norm": 0.07008951157331467, + "learning_rate": 1.8708388074649e-05, + "loss": 0.6564326882362366, + "step": 3065 + }, + { + "epoch": 0.5665253894746567, + "grad_norm": 0.07543662935495377, + "learning_rate": 1.8707407263470614e-05, + "loss": 0.6354846358299255, + "step": 3066 + }, + { + "epoch": 0.5667101661835525, + "grad_norm": 0.06850934028625488, + "learning_rate": 1.8706426105764663e-05, + "loss": 0.6425608396530151, + "step": 3067 + }, + { + "epoch": 0.5668949428924485, + "grad_norm": 0.08726583421230316, + "learning_rate": 1.870544460157019e-05, + "loss": 0.8744035363197327, + "step": 3068 + }, + { + "epoch": 0.5670797196013443, + "grad_norm": 0.064764603972435, + "learning_rate": 1.8704462750926258e-05, + "loss": 0.7021517753601074, + "step": 3069 + }, + { + "epoch": 0.5672644963102401, + "grad_norm": 0.06063363328576088, + "learning_rate": 1.870348055387194e-05, + "loss": 0.5126523971557617, + "step": 3070 + }, + { + "epoch": 0.567449273019136, + "grad_norm": 0.0791720300912857, + "learning_rate": 1.870249801044633e-05, + "loss": 0.6843903064727783, + "step": 3071 + }, + { + "epoch": 0.5676340497280318, + "grad_norm": 0.07378975301980972, + "learning_rate": 1.8701515120688522e-05, + "loss": 0.5955958366394043, + "step": 3072 + }, + { + "epoch": 0.5678188264369276, + "grad_norm": 0.06286358088254929, + "learning_rate": 1.8700531884637635e-05, + "loss": 0.511538565158844, + "step": 3073 + }, + { + "epoch": 0.5680036031458234, + "grad_norm": 0.04755840823054314, + "learning_rate": 1.8699548302332802e-05, + "loss": 0.3661784529685974, + "step": 3074 + }, + { + "epoch": 0.5681883798547193, + "grad_norm": 0.056572191417217255, + "learning_rate": 1.8698564373813162e-05, + "loss": 0.5282621383666992, + "step": 3075 + }, + { + "epoch": 0.5683731565636152, + "grad_norm": 0.08536586165428162, + "learning_rate": 1.8697580099117875e-05, + "loss": 0.6903574466705322, + "step": 3076 + }, + { + "epoch": 0.568557933272511, + "grad_norm": 0.0768958032131195, + "learning_rate": 1.869659547828611e-05, + "loss": 0.8080697655677795, + "step": 3077 + }, + { + "epoch": 0.5687427099814069, + "grad_norm": 0.0779389813542366, + "learning_rate": 1.8695610511357055e-05, + "loss": 0.6167462468147278, + "step": 3078 + }, + { + "epoch": 0.5689274866903027, + "grad_norm": 0.07720815390348434, + "learning_rate": 1.869462519836991e-05, + "loss": 0.5635837316513062, + "step": 3079 + }, + { + "epoch": 0.5691122633991985, + "grad_norm": 0.06597666442394257, + "learning_rate": 1.869363953936388e-05, + "loss": 0.5614623427391052, + "step": 3080 + }, + { + "epoch": 0.5692970401080943, + "grad_norm": 0.0675392746925354, + "learning_rate": 1.8692653534378195e-05, + "loss": 0.4289820194244385, + "step": 3081 + }, + { + "epoch": 0.5694818168169902, + "grad_norm": 0.10091688483953476, + "learning_rate": 1.8691667183452096e-05, + "loss": 0.8583399653434753, + "step": 3082 + }, + { + "epoch": 0.5696665935258861, + "grad_norm": 0.046852800995111465, + "learning_rate": 1.8690680486624835e-05, + "loss": 0.46401602029800415, + "step": 3083 + }, + { + "epoch": 0.5698513702347819, + "grad_norm": 0.07693001627922058, + "learning_rate": 1.8689693443935683e-05, + "loss": 0.6485803723335266, + "step": 3084 + }, + { + "epoch": 0.5700361469436778, + "grad_norm": 0.07034293562173843, + "learning_rate": 1.8688706055423916e-05, + "loss": 0.6597393751144409, + "step": 3085 + }, + { + "epoch": 0.5702209236525736, + "grad_norm": 0.08732926100492477, + "learning_rate": 1.8687718321128832e-05, + "loss": 0.7701346278190613, + "step": 3086 + }, + { + "epoch": 0.5704057003614694, + "grad_norm": 0.07584357261657715, + "learning_rate": 1.8686730241089738e-05, + "loss": 0.7044535875320435, + "step": 3087 + }, + { + "epoch": 0.5705904770703653, + "grad_norm": 0.0704139694571495, + "learning_rate": 1.8685741815345958e-05, + "loss": 0.7031932473182678, + "step": 3088 + }, + { + "epoch": 0.5707752537792611, + "grad_norm": 0.08140061050653458, + "learning_rate": 1.8684753043936828e-05, + "loss": 0.6103025078773499, + "step": 3089 + }, + { + "epoch": 0.570960030488157, + "grad_norm": 0.06319376826286316, + "learning_rate": 1.8683763926901697e-05, + "loss": 0.5372883677482605, + "step": 3090 + }, + { + "epoch": 0.5711448071970529, + "grad_norm": 0.07563546299934387, + "learning_rate": 1.8682774464279933e-05, + "loss": 0.6491016745567322, + "step": 3091 + }, + { + "epoch": 0.5713295839059487, + "grad_norm": 0.08996942639350891, + "learning_rate": 1.8681784656110912e-05, + "loss": 0.7877377271652222, + "step": 3092 + }, + { + "epoch": 0.5715143606148445, + "grad_norm": 0.07759862393140793, + "learning_rate": 1.8680794502434018e-05, + "loss": 0.8040440678596497, + "step": 3093 + }, + { + "epoch": 0.5716991373237403, + "grad_norm": 0.06511888653039932, + "learning_rate": 1.8679804003288664e-05, + "loss": 0.5631576776504517, + "step": 3094 + }, + { + "epoch": 0.5718839140326362, + "grad_norm": 0.0843639001250267, + "learning_rate": 1.8678813158714266e-05, + "loss": 0.6956359148025513, + "step": 3095 + }, + { + "epoch": 0.572068690741532, + "grad_norm": 0.05912714824080467, + "learning_rate": 1.8677821968750257e-05, + "loss": 0.4489075541496277, + "step": 3096 + }, + { + "epoch": 0.5722534674504278, + "grad_norm": 0.06777922064065933, + "learning_rate": 1.8676830433436082e-05, + "loss": 0.7072759866714478, + "step": 3097 + }, + { + "epoch": 0.5724382441593238, + "grad_norm": 0.07907670736312866, + "learning_rate": 1.8675838552811204e-05, + "loss": 0.7033583521842957, + "step": 3098 + }, + { + "epoch": 0.5726230208682196, + "grad_norm": 0.06337752193212509, + "learning_rate": 1.8674846326915092e-05, + "loss": 0.47836634516716003, + "step": 3099 + }, + { + "epoch": 0.5728077975771154, + "grad_norm": 0.06873060017824173, + "learning_rate": 1.867385375578724e-05, + "loss": 0.6064472794532776, + "step": 3100 + }, + { + "epoch": 0.5729925742860112, + "grad_norm": 0.06846865266561508, + "learning_rate": 1.8672860839467143e-05, + "loss": 0.569277822971344, + "step": 3101 + }, + { + "epoch": 0.5731773509949071, + "grad_norm": 0.07393836230039597, + "learning_rate": 1.867186757799432e-05, + "loss": 0.6339954137802124, + "step": 3102 + }, + { + "epoch": 0.5733621277038029, + "grad_norm": 0.08977872878313065, + "learning_rate": 1.8670873971408298e-05, + "loss": 0.7323946356773376, + "step": 3103 + }, + { + "epoch": 0.5735469044126987, + "grad_norm": 0.07141544669866562, + "learning_rate": 1.8669880019748618e-05, + "loss": 0.6208509206771851, + "step": 3104 + }, + { + "epoch": 0.5737316811215947, + "grad_norm": 0.07710936665534973, + "learning_rate": 1.8668885723054838e-05, + "loss": 0.651936411857605, + "step": 3105 + }, + { + "epoch": 0.5739164578304905, + "grad_norm": 0.06347020715475082, + "learning_rate": 1.866789108136653e-05, + "loss": 0.6423757672309875, + "step": 3106 + }, + { + "epoch": 0.5741012345393863, + "grad_norm": 0.06215847283601761, + "learning_rate": 1.866689609472327e-05, + "loss": 0.5604876279830933, + "step": 3107 + }, + { + "epoch": 0.5742860112482822, + "grad_norm": 0.07825514674186707, + "learning_rate": 1.8665900763164665e-05, + "loss": 0.6292716264724731, + "step": 3108 + }, + { + "epoch": 0.574470787957178, + "grad_norm": 0.08346331119537354, + "learning_rate": 1.8664905086730324e-05, + "loss": 0.6163325309753418, + "step": 3109 + }, + { + "epoch": 0.5746555646660738, + "grad_norm": 0.0739307776093483, + "learning_rate": 1.8663909065459866e-05, + "loss": 0.5332595705986023, + "step": 3110 + }, + { + "epoch": 0.5748403413749696, + "grad_norm": 0.0750046968460083, + "learning_rate": 1.8662912699392933e-05, + "loss": 0.6106127500534058, + "step": 3111 + }, + { + "epoch": 0.5750251180838656, + "grad_norm": 0.08194831013679504, + "learning_rate": 1.8661915988569177e-05, + "loss": 0.7189168930053711, + "step": 3112 + }, + { + "epoch": 0.5752098947927614, + "grad_norm": 0.06915279477834702, + "learning_rate": 1.8660918933028267e-05, + "loss": 0.5820999145507812, + "step": 3113 + }, + { + "epoch": 0.5753946715016572, + "grad_norm": 0.0856345072388649, + "learning_rate": 1.8659921532809878e-05, + "loss": 0.8091294765472412, + "step": 3114 + }, + { + "epoch": 0.5755794482105531, + "grad_norm": 0.07053239643573761, + "learning_rate": 1.8658923787953705e-05, + "loss": 0.7515913844108582, + "step": 3115 + }, + { + "epoch": 0.5757642249194489, + "grad_norm": 0.07171469181776047, + "learning_rate": 1.8657925698499457e-05, + "loss": 0.6084983348846436, + "step": 3116 + }, + { + "epoch": 0.5759490016283447, + "grad_norm": 0.0760403648018837, + "learning_rate": 1.865692726448685e-05, + "loss": 0.5334648489952087, + "step": 3117 + }, + { + "epoch": 0.5761337783372406, + "grad_norm": 0.06720221787691116, + "learning_rate": 1.8655928485955628e-05, + "loss": 0.5867680311203003, + "step": 3118 + }, + { + "epoch": 0.5763185550461364, + "grad_norm": 0.077094167470932, + "learning_rate": 1.865492936294553e-05, + "loss": 0.7712277770042419, + "step": 3119 + }, + { + "epoch": 0.5765033317550323, + "grad_norm": 0.08528798073530197, + "learning_rate": 1.865392989549632e-05, + "loss": 0.7741163969039917, + "step": 3120 + }, + { + "epoch": 0.5766881084639282, + "grad_norm": 0.0766475647687912, + "learning_rate": 1.8652930083647774e-05, + "loss": 0.7091198563575745, + "step": 3121 + }, + { + "epoch": 0.576872885172824, + "grad_norm": 0.06112033501267433, + "learning_rate": 1.8651929927439684e-05, + "loss": 0.44175073504447937, + "step": 3122 + }, + { + "epoch": 0.5770576618817198, + "grad_norm": 0.0839383453130722, + "learning_rate": 1.8650929426911853e-05, + "loss": 0.8505361080169678, + "step": 3123 + }, + { + "epoch": 0.5772424385906156, + "grad_norm": 0.07571355998516083, + "learning_rate": 1.8649928582104097e-05, + "loss": 0.6146236658096313, + "step": 3124 + }, + { + "epoch": 0.5774272152995115, + "grad_norm": 0.07697071135044098, + "learning_rate": 1.864892739305624e-05, + "loss": 0.6030546426773071, + "step": 3125 + }, + { + "epoch": 0.5776119920084073, + "grad_norm": 0.06890033930540085, + "learning_rate": 1.8647925859808135e-05, + "loss": 0.6683984994888306, + "step": 3126 + }, + { + "epoch": 0.5777967687173032, + "grad_norm": 0.06484845280647278, + "learning_rate": 1.8646923982399636e-05, + "loss": 0.5480669140815735, + "step": 3127 + }, + { + "epoch": 0.5779815454261991, + "grad_norm": 0.08451871573925018, + "learning_rate": 1.8645921760870616e-05, + "loss": 0.6453613638877869, + "step": 3128 + }, + { + "epoch": 0.5781663221350949, + "grad_norm": 0.06641737371683121, + "learning_rate": 1.864491919526096e-05, + "loss": 0.47079309821128845, + "step": 3129 + }, + { + "epoch": 0.5783510988439907, + "grad_norm": 0.06663155555725098, + "learning_rate": 1.8643916285610565e-05, + "loss": 0.6069074273109436, + "step": 3130 + }, + { + "epoch": 0.5785358755528865, + "grad_norm": 0.06879729777574539, + "learning_rate": 1.8642913031959345e-05, + "loss": 0.5753926634788513, + "step": 3131 + }, + { + "epoch": 0.5787206522617824, + "grad_norm": 0.08380347490310669, + "learning_rate": 1.8641909434347226e-05, + "loss": 0.6205527782440186, + "step": 3132 + }, + { + "epoch": 0.5789054289706782, + "grad_norm": 0.08263260871171951, + "learning_rate": 1.8640905492814153e-05, + "loss": 0.6053383350372314, + "step": 3133 + }, + { + "epoch": 0.5790902056795741, + "grad_norm": 0.09215070307254791, + "learning_rate": 1.863990120740007e-05, + "loss": 0.7877563238143921, + "step": 3134 + }, + { + "epoch": 0.57927498238847, + "grad_norm": 0.06187755614519119, + "learning_rate": 1.8638896578144955e-05, + "loss": 0.43906375765800476, + "step": 3135 + }, + { + "epoch": 0.5794597590973658, + "grad_norm": 0.07065249234437943, + "learning_rate": 1.863789160508878e-05, + "loss": 0.5221515893936157, + "step": 3136 + }, + { + "epoch": 0.5796445358062616, + "grad_norm": 0.07332364469766617, + "learning_rate": 1.8636886288271542e-05, + "loss": 0.6718504428863525, + "step": 3137 + }, + { + "epoch": 0.5798293125151575, + "grad_norm": 0.08444932848215103, + "learning_rate": 1.8635880627733255e-05, + "loss": 0.7537721991539001, + "step": 3138 + }, + { + "epoch": 0.5800140892240533, + "grad_norm": 0.08700313419103622, + "learning_rate": 1.8634874623513938e-05, + "loss": 0.6897266507148743, + "step": 3139 + }, + { + "epoch": 0.5801988659329491, + "grad_norm": 0.07157497107982635, + "learning_rate": 1.8633868275653622e-05, + "loss": 0.521382749080658, + "step": 3140 + }, + { + "epoch": 0.5803836426418449, + "grad_norm": 0.06812208890914917, + "learning_rate": 1.863286158419236e-05, + "loss": 0.5932210683822632, + "step": 3141 + }, + { + "epoch": 0.5805684193507409, + "grad_norm": 0.07344353944063187, + "learning_rate": 1.863185454917022e-05, + "loss": 0.6644601821899414, + "step": 3142 + }, + { + "epoch": 0.5807531960596367, + "grad_norm": 0.07347593456506729, + "learning_rate": 1.8630847170627272e-05, + "loss": 0.6279237270355225, + "step": 3143 + }, + { + "epoch": 0.5809379727685325, + "grad_norm": 0.07865365594625473, + "learning_rate": 1.862983944860361e-05, + "loss": 0.8507033586502075, + "step": 3144 + }, + { + "epoch": 0.5811227494774284, + "grad_norm": 0.06068730726838112, + "learning_rate": 1.8628831383139336e-05, + "loss": 0.4948467016220093, + "step": 3145 + }, + { + "epoch": 0.5813075261863242, + "grad_norm": 0.07076761871576309, + "learning_rate": 1.8627822974274574e-05, + "loss": 0.7468083500862122, + "step": 3146 + }, + { + "epoch": 0.58149230289522, + "grad_norm": 0.08117230981588364, + "learning_rate": 1.8626814222049444e-05, + "loss": 0.5720183849334717, + "step": 3147 + }, + { + "epoch": 0.5816770796041159, + "grad_norm": 0.06479369103908539, + "learning_rate": 1.86258051265041e-05, + "loss": 0.652022659778595, + "step": 3148 + }, + { + "epoch": 0.5818618563130118, + "grad_norm": 0.06874661147594452, + "learning_rate": 1.86247956876787e-05, + "loss": 0.6422584056854248, + "step": 3149 + }, + { + "epoch": 0.5820466330219076, + "grad_norm": 0.06053101271390915, + "learning_rate": 1.8623785905613416e-05, + "loss": 0.6103196144104004, + "step": 3150 + }, + { + "epoch": 0.5822314097308035, + "grad_norm": 0.056345485150814056, + "learning_rate": 1.862277578034843e-05, + "loss": 0.45207127928733826, + "step": 3151 + }, + { + "epoch": 0.5824161864396993, + "grad_norm": 0.07697878777980804, + "learning_rate": 1.8621765311923945e-05, + "loss": 0.7261789441108704, + "step": 3152 + }, + { + "epoch": 0.5826009631485951, + "grad_norm": 0.07872536778450012, + "learning_rate": 1.8620754500380177e-05, + "loss": 0.7119261622428894, + "step": 3153 + }, + { + "epoch": 0.5827857398574909, + "grad_norm": 0.06227670609951019, + "learning_rate": 1.861974334575735e-05, + "loss": 0.47681817412376404, + "step": 3154 + }, + { + "epoch": 0.5829705165663868, + "grad_norm": 0.056329309940338135, + "learning_rate": 1.8618731848095706e-05, + "loss": 0.46987074613571167, + "step": 3155 + }, + { + "epoch": 0.5831552932752827, + "grad_norm": 0.06145971640944481, + "learning_rate": 1.8617720007435497e-05, + "loss": 0.6057943105697632, + "step": 3156 + }, + { + "epoch": 0.5833400699841785, + "grad_norm": 0.08054883033037186, + "learning_rate": 1.8616707823816994e-05, + "loss": 0.6347730159759521, + "step": 3157 + }, + { + "epoch": 0.5835248466930744, + "grad_norm": 0.08149060606956482, + "learning_rate": 1.8615695297280482e-05, + "loss": 0.9930283427238464, + "step": 3158 + }, + { + "epoch": 0.5837096234019702, + "grad_norm": 0.08788027614355087, + "learning_rate": 1.8614682427866246e-05, + "loss": 0.8771606087684631, + "step": 3159 + }, + { + "epoch": 0.583894400110866, + "grad_norm": 0.08429764211177826, + "learning_rate": 1.8613669215614605e-05, + "loss": 0.7415722012519836, + "step": 3160 + }, + { + "epoch": 0.5840791768197618, + "grad_norm": 0.07797253876924515, + "learning_rate": 1.8612655660565877e-05, + "loss": 0.6256560683250427, + "step": 3161 + }, + { + "epoch": 0.5842639535286577, + "grad_norm": 0.0795632153749466, + "learning_rate": 1.8611641762760398e-05, + "loss": 0.7124666571617126, + "step": 3162 + }, + { + "epoch": 0.5844487302375535, + "grad_norm": 0.07458134740591049, + "learning_rate": 1.861062752223852e-05, + "loss": 0.6469964981079102, + "step": 3163 + }, + { + "epoch": 0.5846335069464494, + "grad_norm": 0.0760737806558609, + "learning_rate": 1.860961293904061e-05, + "loss": 0.7084463238716125, + "step": 3164 + }, + { + "epoch": 0.5848182836553453, + "grad_norm": 0.09203182905912399, + "learning_rate": 1.8608598013207034e-05, + "loss": 0.74711674451828, + "step": 3165 + }, + { + "epoch": 0.5850030603642411, + "grad_norm": 0.06944387406110764, + "learning_rate": 1.8607582744778193e-05, + "loss": 0.522801399230957, + "step": 3166 + }, + { + "epoch": 0.5851878370731369, + "grad_norm": 0.060882631689310074, + "learning_rate": 1.860656713379449e-05, + "loss": 0.5053161382675171, + "step": 3167 + }, + { + "epoch": 0.5853726137820328, + "grad_norm": 0.0728553980588913, + "learning_rate": 1.860555118029634e-05, + "loss": 0.5969119668006897, + "step": 3168 + }, + { + "epoch": 0.5855573904909286, + "grad_norm": 0.07058120518922806, + "learning_rate": 1.8604534884324173e-05, + "loss": 0.596505343914032, + "step": 3169 + }, + { + "epoch": 0.5857421671998244, + "grad_norm": 0.08228509873151779, + "learning_rate": 1.8603518245918444e-05, + "loss": 0.7489085793495178, + "step": 3170 + }, + { + "epoch": 0.5859269439087204, + "grad_norm": 0.08472972363233566, + "learning_rate": 1.8602501265119604e-05, + "loss": 0.7827440500259399, + "step": 3171 + }, + { + "epoch": 0.5861117206176162, + "grad_norm": 0.06148391589522362, + "learning_rate": 1.8601483941968127e-05, + "loss": 0.48352840542793274, + "step": 3172 + }, + { + "epoch": 0.586296497326512, + "grad_norm": 0.07577600330114365, + "learning_rate": 1.8600466276504496e-05, + "loss": 0.6786936521530151, + "step": 3173 + }, + { + "epoch": 0.5864812740354078, + "grad_norm": 0.07122032344341278, + "learning_rate": 1.859944826876922e-05, + "loss": 0.6640927791595459, + "step": 3174 + }, + { + "epoch": 0.5866660507443037, + "grad_norm": 0.05961944907903671, + "learning_rate": 1.8598429918802802e-05, + "loss": 0.5764791965484619, + "step": 3175 + }, + { + "epoch": 0.5868508274531995, + "grad_norm": 0.0938916951417923, + "learning_rate": 1.859741122664578e-05, + "loss": 0.8614349961280823, + "step": 3176 + }, + { + "epoch": 0.5870356041620953, + "grad_norm": 0.06286361813545227, + "learning_rate": 1.8596392192338687e-05, + "loss": 0.6520477533340454, + "step": 3177 + }, + { + "epoch": 0.5872203808709913, + "grad_norm": 0.07921268045902252, + "learning_rate": 1.8595372815922076e-05, + "loss": 0.6980974674224854, + "step": 3178 + }, + { + "epoch": 0.5874051575798871, + "grad_norm": 0.07156159728765488, + "learning_rate": 1.859435309743652e-05, + "loss": 0.6495121121406555, + "step": 3179 + }, + { + "epoch": 0.5875899342887829, + "grad_norm": 0.06057432293891907, + "learning_rate": 1.8593333036922604e-05, + "loss": 0.5264384150505066, + "step": 3180 + }, + { + "epoch": 0.5877747109976788, + "grad_norm": 0.05280206352472305, + "learning_rate": 1.8592312634420912e-05, + "loss": 0.3305424451828003, + "step": 3181 + }, + { + "epoch": 0.5879594877065746, + "grad_norm": 0.0661168247461319, + "learning_rate": 1.859129188997206e-05, + "loss": 0.5634755492210388, + "step": 3182 + }, + { + "epoch": 0.5881442644154704, + "grad_norm": 0.06081791967153549, + "learning_rate": 1.8590270803616673e-05, + "loss": 0.6199816465377808, + "step": 3183 + }, + { + "epoch": 0.5883290411243662, + "grad_norm": 0.07526887953281403, + "learning_rate": 1.8589249375395382e-05, + "loss": 0.6815241575241089, + "step": 3184 + }, + { + "epoch": 0.5885138178332621, + "grad_norm": 0.07210526615381241, + "learning_rate": 1.8588227605348836e-05, + "loss": 0.5609758496284485, + "step": 3185 + }, + { + "epoch": 0.588698594542158, + "grad_norm": 0.07922544330358505, + "learning_rate": 1.8587205493517703e-05, + "loss": 0.7402794361114502, + "step": 3186 + }, + { + "epoch": 0.5888833712510538, + "grad_norm": 0.08084230870008469, + "learning_rate": 1.8586183039942654e-05, + "loss": 0.8662818670272827, + "step": 3187 + }, + { + "epoch": 0.5890681479599497, + "grad_norm": 0.06553030014038086, + "learning_rate": 1.8585160244664386e-05, + "loss": 0.5434334874153137, + "step": 3188 + }, + { + "epoch": 0.5892529246688455, + "grad_norm": 0.07362144440412521, + "learning_rate": 1.85841371077236e-05, + "loss": 0.73642897605896, + "step": 3189 + }, + { + "epoch": 0.5894377013777413, + "grad_norm": 0.07301516830921173, + "learning_rate": 1.858311362916101e-05, + "loss": 0.618234395980835, + "step": 3190 + }, + { + "epoch": 0.5896224780866371, + "grad_norm": 0.07383144646883011, + "learning_rate": 1.8582089809017352e-05, + "loss": 0.6209704875946045, + "step": 3191 + }, + { + "epoch": 0.589807254795533, + "grad_norm": 0.07683329284191132, + "learning_rate": 1.8581065647333368e-05, + "loss": 0.6593388319015503, + "step": 3192 + }, + { + "epoch": 0.5899920315044289, + "grad_norm": 0.07412169873714447, + "learning_rate": 1.8580041144149822e-05, + "loss": 0.5662711262702942, + "step": 3193 + }, + { + "epoch": 0.5901768082133247, + "grad_norm": 0.07404091209173203, + "learning_rate": 1.8579016299507482e-05, + "loss": 0.6805081367492676, + "step": 3194 + }, + { + "epoch": 0.5903615849222206, + "grad_norm": 0.08029306679964066, + "learning_rate": 1.857799111344713e-05, + "loss": 0.6811212301254272, + "step": 3195 + }, + { + "epoch": 0.5905463616311164, + "grad_norm": 0.07096394896507263, + "learning_rate": 1.857696558600957e-05, + "loss": 0.684027373790741, + "step": 3196 + }, + { + "epoch": 0.5907311383400122, + "grad_norm": 0.07632946223020554, + "learning_rate": 1.8575939717235614e-05, + "loss": 0.7099227905273438, + "step": 3197 + }, + { + "epoch": 0.5909159150489081, + "grad_norm": 0.06812963634729385, + "learning_rate": 1.857491350716609e-05, + "loss": 0.7205207943916321, + "step": 3198 + }, + { + "epoch": 0.5911006917578039, + "grad_norm": 0.07323703914880753, + "learning_rate": 1.857388695584183e-05, + "loss": 0.6930529475212097, + "step": 3199 + }, + { + "epoch": 0.5912854684666998, + "grad_norm": 0.0693032518029213, + "learning_rate": 1.85728600633037e-05, + "loss": 0.508209228515625, + "step": 3200 + }, + { + "epoch": 0.5914702451755957, + "grad_norm": 0.07588290423154831, + "learning_rate": 1.8571832829592557e-05, + "loss": 0.6380006074905396, + "step": 3201 + }, + { + "epoch": 0.5916550218844915, + "grad_norm": 0.06168055534362793, + "learning_rate": 1.8570805254749288e-05, + "loss": 0.5089638829231262, + "step": 3202 + }, + { + "epoch": 0.5918397985933873, + "grad_norm": 0.07789887487888336, + "learning_rate": 1.856977733881478e-05, + "loss": 0.6378219127655029, + "step": 3203 + }, + { + "epoch": 0.5920245753022831, + "grad_norm": 0.054984163492918015, + "learning_rate": 1.856874908182995e-05, + "loss": 0.3924255073070526, + "step": 3204 + }, + { + "epoch": 0.592209352011179, + "grad_norm": 0.08939534425735474, + "learning_rate": 1.856772048383571e-05, + "loss": 0.583304762840271, + "step": 3205 + }, + { + "epoch": 0.5923941287200748, + "grad_norm": 0.09006770700216293, + "learning_rate": 1.8566691544873003e-05, + "loss": 0.8556082844734192, + "step": 3206 + }, + { + "epoch": 0.5925789054289706, + "grad_norm": 0.07439000904560089, + "learning_rate": 1.8565662264982772e-05, + "loss": 0.5220317244529724, + "step": 3207 + }, + { + "epoch": 0.5927636821378666, + "grad_norm": 0.08255641162395477, + "learning_rate": 1.8564632644205984e-05, + "loss": 0.72211092710495, + "step": 3208 + }, + { + "epoch": 0.5929484588467624, + "grad_norm": 0.07791288942098618, + "learning_rate": 1.856360268258361e-05, + "loss": 0.6217178106307983, + "step": 3209 + }, + { + "epoch": 0.5931332355556582, + "grad_norm": 0.0743233859539032, + "learning_rate": 1.856257238015664e-05, + "loss": 0.6467982530593872, + "step": 3210 + }, + { + "epoch": 0.593318012264554, + "grad_norm": 0.08191141486167908, + "learning_rate": 1.8561541736966085e-05, + "loss": 0.8423718214035034, + "step": 3211 + }, + { + "epoch": 0.5935027889734499, + "grad_norm": 0.06875808537006378, + "learning_rate": 1.8560510753052948e-05, + "loss": 0.6221856474876404, + "step": 3212 + }, + { + "epoch": 0.5936875656823457, + "grad_norm": 0.07226449251174927, + "learning_rate": 1.8559479428458267e-05, + "loss": 0.5470890402793884, + "step": 3213 + }, + { + "epoch": 0.5938723423912415, + "grad_norm": 0.06766360253095627, + "learning_rate": 1.8558447763223083e-05, + "loss": 0.6161670684814453, + "step": 3214 + }, + { + "epoch": 0.5940571191001375, + "grad_norm": 0.08009753376245499, + "learning_rate": 1.8557415757388456e-05, + "loss": 0.6970452070236206, + "step": 3215 + }, + { + "epoch": 0.5942418958090333, + "grad_norm": 0.06258440017700195, + "learning_rate": 1.8556383410995454e-05, + "loss": 0.45787256956100464, + "step": 3216 + }, + { + "epoch": 0.5944266725179291, + "grad_norm": 0.06200527027249336, + "learning_rate": 1.855535072408516e-05, + "loss": 0.5589002370834351, + "step": 3217 + }, + { + "epoch": 0.594611449226825, + "grad_norm": 0.06871971487998962, + "learning_rate": 1.8554317696698676e-05, + "loss": 0.4686943590641022, + "step": 3218 + }, + { + "epoch": 0.5947962259357208, + "grad_norm": 0.07597262412309647, + "learning_rate": 1.855328432887711e-05, + "loss": 0.6524820923805237, + "step": 3219 + }, + { + "epoch": 0.5949810026446166, + "grad_norm": 0.07778976857662201, + "learning_rate": 1.8552250620661585e-05, + "loss": 0.5683805346488953, + "step": 3220 + }, + { + "epoch": 0.5951657793535124, + "grad_norm": 0.07574175298213959, + "learning_rate": 1.8551216572093246e-05, + "loss": 0.6568016409873962, + "step": 3221 + }, + { + "epoch": 0.5953505560624084, + "grad_norm": 0.09157819300889969, + "learning_rate": 1.8550182183213238e-05, + "loss": 0.6208380460739136, + "step": 3222 + }, + { + "epoch": 0.5955353327713042, + "grad_norm": 0.07280399650335312, + "learning_rate": 1.8549147454062728e-05, + "loss": 0.6524849534034729, + "step": 3223 + }, + { + "epoch": 0.5957201094802, + "grad_norm": 0.0947246253490448, + "learning_rate": 1.85481123846829e-05, + "loss": 0.7721500396728516, + "step": 3224 + }, + { + "epoch": 0.5959048861890959, + "grad_norm": 0.0865185558795929, + "learning_rate": 1.854707697511494e-05, + "loss": 0.7139161825180054, + "step": 3225 + }, + { + "epoch": 0.5960896628979917, + "grad_norm": 0.06716441363096237, + "learning_rate": 1.854604122540006e-05, + "loss": 0.5239250659942627, + "step": 3226 + }, + { + "epoch": 0.5962744396068875, + "grad_norm": 0.059036269783973694, + "learning_rate": 1.854500513557947e-05, + "loss": 0.47485873103141785, + "step": 3227 + }, + { + "epoch": 0.5964592163157834, + "grad_norm": 0.07232226431369781, + "learning_rate": 1.8543968705694414e-05, + "loss": 0.6586685180664062, + "step": 3228 + }, + { + "epoch": 0.5966439930246792, + "grad_norm": 0.07339149713516235, + "learning_rate": 1.8542931935786133e-05, + "loss": 0.7041042447090149, + "step": 3229 + }, + { + "epoch": 0.5968287697335751, + "grad_norm": 0.08636587113142014, + "learning_rate": 1.854189482589589e-05, + "loss": 0.6246604323387146, + "step": 3230 + }, + { + "epoch": 0.597013546442471, + "grad_norm": 0.08113526552915573, + "learning_rate": 1.8540857376064956e-05, + "loss": 0.7897917628288269, + "step": 3231 + }, + { + "epoch": 0.5971983231513668, + "grad_norm": 0.06901397556066513, + "learning_rate": 1.8539819586334617e-05, + "loss": 0.5459996461868286, + "step": 3232 + }, + { + "epoch": 0.5973830998602626, + "grad_norm": 0.07561596482992172, + "learning_rate": 1.8538781456746183e-05, + "loss": 0.5742215514183044, + "step": 3233 + }, + { + "epoch": 0.5975678765691584, + "grad_norm": 0.07877890765666962, + "learning_rate": 1.8537742987340955e-05, + "loss": 0.6900753974914551, + "step": 3234 + }, + { + "epoch": 0.5977526532780543, + "grad_norm": 0.09065587818622589, + "learning_rate": 1.853670417816027e-05, + "loss": 0.7630480527877808, + "step": 3235 + }, + { + "epoch": 0.5979374299869501, + "grad_norm": 0.08781592547893524, + "learning_rate": 1.8535665029245463e-05, + "loss": 0.7213505506515503, + "step": 3236 + }, + { + "epoch": 0.598122206695846, + "grad_norm": 0.07441502809524536, + "learning_rate": 1.8534625540637897e-05, + "loss": 0.537497878074646, + "step": 3237 + }, + { + "epoch": 0.5983069834047419, + "grad_norm": 0.08751673996448517, + "learning_rate": 1.853358571237893e-05, + "loss": 0.8879984021186829, + "step": 3238 + }, + { + "epoch": 0.5984917601136377, + "grad_norm": 0.08641214668750763, + "learning_rate": 1.8532545544509955e-05, + "loss": 0.772212028503418, + "step": 3239 + }, + { + "epoch": 0.5986765368225335, + "grad_norm": 0.06951478868722916, + "learning_rate": 1.8531505037072363e-05, + "loss": 0.6568793654441833, + "step": 3240 + }, + { + "epoch": 0.5988613135314294, + "grad_norm": 0.08141907304525375, + "learning_rate": 1.853046419010756e-05, + "loss": 0.6164892911911011, + "step": 3241 + }, + { + "epoch": 0.5990460902403252, + "grad_norm": 0.05399297922849655, + "learning_rate": 1.852942300365697e-05, + "loss": 0.43579041957855225, + "step": 3242 + }, + { + "epoch": 0.599230866949221, + "grad_norm": 0.0674586072564125, + "learning_rate": 1.852838147776203e-05, + "loss": 0.6669760942459106, + "step": 3243 + }, + { + "epoch": 0.599415643658117, + "grad_norm": 0.05470862612128258, + "learning_rate": 1.8527339612464192e-05, + "loss": 0.37778860330581665, + "step": 3244 + }, + { + "epoch": 0.5996004203670128, + "grad_norm": 0.07815508544445038, + "learning_rate": 1.8526297407804915e-05, + "loss": 0.6272417306900024, + "step": 3245 + }, + { + "epoch": 0.5997851970759086, + "grad_norm": 0.0828259065747261, + "learning_rate": 1.852525486382567e-05, + "loss": 0.8422073721885681, + "step": 3246 + }, + { + "epoch": 0.5999699737848044, + "grad_norm": 0.07688503712415695, + "learning_rate": 1.852421198056796e-05, + "loss": 0.6805994510650635, + "step": 3247 + }, + { + "epoch": 0.6001547504937003, + "grad_norm": 0.0888686254620552, + "learning_rate": 1.8523168758073283e-05, + "loss": 0.7215426564216614, + "step": 3248 + }, + { + "epoch": 0.6003395272025961, + "grad_norm": 0.06879184395074844, + "learning_rate": 1.8522125196383154e-05, + "loss": 0.6762117147445679, + "step": 3249 + }, + { + "epoch": 0.6005243039114919, + "grad_norm": 0.07216864079236984, + "learning_rate": 1.8521081295539102e-05, + "loss": 0.6251771450042725, + "step": 3250 + }, + { + "epoch": 0.6007090806203877, + "grad_norm": 0.07336558401584625, + "learning_rate": 1.8520037055582675e-05, + "loss": 0.6005572080612183, + "step": 3251 + }, + { + "epoch": 0.6008938573292837, + "grad_norm": 0.08599632233381271, + "learning_rate": 1.851899247655543e-05, + "loss": 0.7398759126663208, + "step": 3252 + }, + { + "epoch": 0.6010786340381795, + "grad_norm": 0.057038020342588425, + "learning_rate": 1.8517947558498936e-05, + "loss": 0.44321659207344055, + "step": 3253 + }, + { + "epoch": 0.6012634107470753, + "grad_norm": 0.06850314885377884, + "learning_rate": 1.8516902301454775e-05, + "loss": 0.5966536402702332, + "step": 3254 + }, + { + "epoch": 0.6014481874559712, + "grad_norm": 0.0940929725766182, + "learning_rate": 1.8515856705464553e-05, + "loss": 0.8563060164451599, + "step": 3255 + }, + { + "epoch": 0.601632964164867, + "grad_norm": 0.07415281236171722, + "learning_rate": 1.8514810770569872e-05, + "loss": 0.7482355237007141, + "step": 3256 + }, + { + "epoch": 0.6018177408737628, + "grad_norm": 0.09791791439056396, + "learning_rate": 1.8513764496812366e-05, + "loss": 0.8284050822257996, + "step": 3257 + }, + { + "epoch": 0.6020025175826587, + "grad_norm": 0.05737827345728874, + "learning_rate": 1.851271788423367e-05, + "loss": 0.43601882457733154, + "step": 3258 + }, + { + "epoch": 0.6021872942915546, + "grad_norm": 0.07074494659900665, + "learning_rate": 1.8511670932875432e-05, + "loss": 0.6547046899795532, + "step": 3259 + }, + { + "epoch": 0.6023720710004504, + "grad_norm": 0.08046936243772507, + "learning_rate": 1.8510623642779322e-05, + "loss": 0.5685117840766907, + "step": 3260 + }, + { + "epoch": 0.6025568477093463, + "grad_norm": 0.0642126202583313, + "learning_rate": 1.8509576013987015e-05, + "loss": 0.5987184643745422, + "step": 3261 + }, + { + "epoch": 0.6027416244182421, + "grad_norm": 0.06426668912172318, + "learning_rate": 1.850852804654021e-05, + "loss": 0.5323985815048218, + "step": 3262 + }, + { + "epoch": 0.6029264011271379, + "grad_norm": 0.07552376389503479, + "learning_rate": 1.8507479740480608e-05, + "loss": 0.6078667044639587, + "step": 3263 + }, + { + "epoch": 0.6031111778360337, + "grad_norm": 0.07132647186517715, + "learning_rate": 1.8506431095849927e-05, + "loss": 0.5994269251823425, + "step": 3264 + }, + { + "epoch": 0.6032959545449296, + "grad_norm": 0.07693766802549362, + "learning_rate": 1.85053821126899e-05, + "loss": 0.6154434680938721, + "step": 3265 + }, + { + "epoch": 0.6034807312538255, + "grad_norm": 0.0722254142165184, + "learning_rate": 1.8504332791042276e-05, + "loss": 0.586520791053772, + "step": 3266 + }, + { + "epoch": 0.6036655079627213, + "grad_norm": 0.0783991739153862, + "learning_rate": 1.8503283130948813e-05, + "loss": 0.5686954259872437, + "step": 3267 + }, + { + "epoch": 0.6038502846716172, + "grad_norm": 0.08890886604785919, + "learning_rate": 1.8502233132451285e-05, + "loss": 0.6486931443214417, + "step": 3268 + }, + { + "epoch": 0.604035061380513, + "grad_norm": 0.05799055099487305, + "learning_rate": 1.850118279559148e-05, + "loss": 0.5202708840370178, + "step": 3269 + }, + { + "epoch": 0.6042198380894088, + "grad_norm": 0.07490106672048569, + "learning_rate": 1.8500132120411195e-05, + "loss": 0.7785816788673401, + "step": 3270 + }, + { + "epoch": 0.6044046147983047, + "grad_norm": 0.07757486402988434, + "learning_rate": 1.8499081106952247e-05, + "loss": 0.8517777323722839, + "step": 3271 + }, + { + "epoch": 0.6045893915072005, + "grad_norm": 0.06630745530128479, + "learning_rate": 1.849802975525646e-05, + "loss": 0.5189116597175598, + "step": 3272 + }, + { + "epoch": 0.6047741682160963, + "grad_norm": 0.0747363269329071, + "learning_rate": 1.8496978065365677e-05, + "loss": 0.7185724973678589, + "step": 3273 + }, + { + "epoch": 0.6049589449249922, + "grad_norm": 0.05487100034952164, + "learning_rate": 1.8495926037321747e-05, + "loss": 0.45234984159469604, + "step": 3274 + }, + { + "epoch": 0.6051437216338881, + "grad_norm": 0.09173326194286346, + "learning_rate": 1.8494873671166543e-05, + "loss": 0.8588842749595642, + "step": 3275 + }, + { + "epoch": 0.6053284983427839, + "grad_norm": 0.0814746618270874, + "learning_rate": 1.8493820966941944e-05, + "loss": 0.619792103767395, + "step": 3276 + }, + { + "epoch": 0.6055132750516797, + "grad_norm": 0.06092033535242081, + "learning_rate": 1.8492767924689846e-05, + "loss": 0.4326985478401184, + "step": 3277 + }, + { + "epoch": 0.6056980517605756, + "grad_norm": 0.08345521986484528, + "learning_rate": 1.8491714544452154e-05, + "loss": 0.7388136386871338, + "step": 3278 + }, + { + "epoch": 0.6058828284694714, + "grad_norm": 0.06334060430526733, + "learning_rate": 1.849066082627079e-05, + "loss": 0.5493499040603638, + "step": 3279 + }, + { + "epoch": 0.6060676051783672, + "grad_norm": 0.08863652497529984, + "learning_rate": 1.8489606770187685e-05, + "loss": 0.847687840461731, + "step": 3280 + }, + { + "epoch": 0.6062523818872632, + "grad_norm": 0.05811255797743797, + "learning_rate": 1.8488552376244798e-05, + "loss": 0.48114174604415894, + "step": 3281 + }, + { + "epoch": 0.606437158596159, + "grad_norm": 0.07179747521877289, + "learning_rate": 1.848749764448408e-05, + "loss": 0.6397152543067932, + "step": 3282 + }, + { + "epoch": 0.6066219353050548, + "grad_norm": 0.08426441997289658, + "learning_rate": 1.848644257494751e-05, + "loss": 0.5927404165267944, + "step": 3283 + }, + { + "epoch": 0.6068067120139506, + "grad_norm": 0.06691189855337143, + "learning_rate": 1.848538716767708e-05, + "loss": 0.47045934200286865, + "step": 3284 + }, + { + "epoch": 0.6069914887228465, + "grad_norm": 0.056987274438142776, + "learning_rate": 1.8484331422714784e-05, + "loss": 0.4347154200077057, + "step": 3285 + }, + { + "epoch": 0.6071762654317423, + "grad_norm": 0.07422585040330887, + "learning_rate": 1.848327534010264e-05, + "loss": 0.6322421431541443, + "step": 3286 + }, + { + "epoch": 0.6073610421406381, + "grad_norm": 0.06570249050855637, + "learning_rate": 1.848221891988268e-05, + "loss": 0.5806671977043152, + "step": 3287 + }, + { + "epoch": 0.6075458188495341, + "grad_norm": 0.08185900747776031, + "learning_rate": 1.8481162162096944e-05, + "loss": 0.602039098739624, + "step": 3288 + }, + { + "epoch": 0.6077305955584299, + "grad_norm": 0.07493102550506592, + "learning_rate": 1.848010506678749e-05, + "loss": 0.5471720695495605, + "step": 3289 + }, + { + "epoch": 0.6079153722673257, + "grad_norm": 0.06624553352594376, + "learning_rate": 1.8479047633996384e-05, + "loss": 0.5783137083053589, + "step": 3290 + }, + { + "epoch": 0.6081001489762216, + "grad_norm": 0.08775525540113449, + "learning_rate": 1.8477989863765712e-05, + "loss": 0.6186946630477905, + "step": 3291 + }, + { + "epoch": 0.6082849256851174, + "grad_norm": 0.08846092969179153, + "learning_rate": 1.8476931756137565e-05, + "loss": 0.7376492619514465, + "step": 3292 + }, + { + "epoch": 0.6084697023940132, + "grad_norm": 0.08089148998260498, + "learning_rate": 1.8475873311154053e-05, + "loss": 0.5945156812667847, + "step": 3293 + }, + { + "epoch": 0.608654479102909, + "grad_norm": 0.08092156797647476, + "learning_rate": 1.8474814528857306e-05, + "loss": 0.7160748839378357, + "step": 3294 + }, + { + "epoch": 0.6088392558118049, + "grad_norm": 0.07545629888772964, + "learning_rate": 1.847375540928945e-05, + "loss": 0.7555568814277649, + "step": 3295 + }, + { + "epoch": 0.6090240325207008, + "grad_norm": 0.07335377484560013, + "learning_rate": 1.8472695952492642e-05, + "loss": 0.7228835225105286, + "step": 3296 + }, + { + "epoch": 0.6092088092295966, + "grad_norm": 0.0645759105682373, + "learning_rate": 1.8471636158509043e-05, + "loss": 0.5339785814285278, + "step": 3297 + }, + { + "epoch": 0.6093935859384925, + "grad_norm": 0.07800009101629257, + "learning_rate": 1.8470576027380828e-05, + "loss": 0.7866642475128174, + "step": 3298 + }, + { + "epoch": 0.6095783626473883, + "grad_norm": 0.06919465214014053, + "learning_rate": 1.846951555915019e-05, + "loss": 0.6962285041809082, + "step": 3299 + }, + { + "epoch": 0.6097631393562841, + "grad_norm": 0.08536528050899506, + "learning_rate": 1.8468454753859332e-05, + "loss": 0.7147235870361328, + "step": 3300 + }, + { + "epoch": 0.60994791606518, + "grad_norm": 0.08068986982107162, + "learning_rate": 1.8467393611550462e-05, + "loss": 0.6971014738082886, + "step": 3301 + }, + { + "epoch": 0.6101326927740758, + "grad_norm": 0.06266585737466812, + "learning_rate": 1.8466332132265825e-05, + "loss": 0.5209909677505493, + "step": 3302 + }, + { + "epoch": 0.6103174694829717, + "grad_norm": 0.07673177868127823, + "learning_rate": 1.8465270316047653e-05, + "loss": 0.6880748271942139, + "step": 3303 + }, + { + "epoch": 0.6105022461918675, + "grad_norm": 0.070456363260746, + "learning_rate": 1.846420816293821e-05, + "loss": 0.6609374284744263, + "step": 3304 + }, + { + "epoch": 0.6106870229007634, + "grad_norm": 0.07725219428539276, + "learning_rate": 1.8463145672979758e-05, + "loss": 0.7507308721542358, + "step": 3305 + }, + { + "epoch": 0.6108717996096592, + "grad_norm": 0.06939958781003952, + "learning_rate": 1.846208284621459e-05, + "loss": 0.6096147894859314, + "step": 3306 + }, + { + "epoch": 0.611056576318555, + "grad_norm": 0.0722557008266449, + "learning_rate": 1.8461019682684998e-05, + "loss": 0.8005504012107849, + "step": 3307 + }, + { + "epoch": 0.6112413530274509, + "grad_norm": 0.06575772166252136, + "learning_rate": 1.8459956182433295e-05, + "loss": 0.47339561581611633, + "step": 3308 + }, + { + "epoch": 0.6114261297363467, + "grad_norm": 0.08038409054279327, + "learning_rate": 1.8458892345501804e-05, + "loss": 0.650078535079956, + "step": 3309 + }, + { + "epoch": 0.6116109064452426, + "grad_norm": 0.0885750949382782, + "learning_rate": 1.845782817193286e-05, + "loss": 0.8320760726928711, + "step": 3310 + }, + { + "epoch": 0.6117956831541385, + "grad_norm": 0.07795794308185577, + "learning_rate": 1.8456763661768815e-05, + "loss": 0.6257549524307251, + "step": 3311 + }, + { + "epoch": 0.6119804598630343, + "grad_norm": 0.07041628658771515, + "learning_rate": 1.8455698815052037e-05, + "loss": 0.580536425113678, + "step": 3312 + }, + { + "epoch": 0.6121652365719301, + "grad_norm": 0.06542914360761642, + "learning_rate": 1.84546336318249e-05, + "loss": 0.44415077567100525, + "step": 3313 + }, + { + "epoch": 0.6123500132808259, + "grad_norm": 0.06586247682571411, + "learning_rate": 1.8453568112129793e-05, + "loss": 0.5182660222053528, + "step": 3314 + }, + { + "epoch": 0.6125347899897218, + "grad_norm": 0.06454501301050186, + "learning_rate": 1.8452502256009127e-05, + "loss": 0.5770164132118225, + "step": 3315 + }, + { + "epoch": 0.6127195666986176, + "grad_norm": 0.0784081220626831, + "learning_rate": 1.8451436063505312e-05, + "loss": 0.7196560502052307, + "step": 3316 + }, + { + "epoch": 0.6129043434075134, + "grad_norm": 0.06240353360772133, + "learning_rate": 1.8450369534660787e-05, + "loss": 0.4739285111427307, + "step": 3317 + }, + { + "epoch": 0.6130891201164094, + "grad_norm": 0.06942515820264816, + "learning_rate": 1.8449302669517988e-05, + "loss": 0.6828460097312927, + "step": 3318 + }, + { + "epoch": 0.6132738968253052, + "grad_norm": 0.07242225855588913, + "learning_rate": 1.844823546811938e-05, + "loss": 0.5765320658683777, + "step": 3319 + }, + { + "epoch": 0.613458673534201, + "grad_norm": 0.07123544812202454, + "learning_rate": 1.844716793050743e-05, + "loss": 0.6075055599212646, + "step": 3320 + }, + { + "epoch": 0.6136434502430969, + "grad_norm": 0.0873766541481018, + "learning_rate": 1.8446100056724624e-05, + "loss": 0.7996965050697327, + "step": 3321 + }, + { + "epoch": 0.6138282269519927, + "grad_norm": 0.06624772399663925, + "learning_rate": 1.8445031846813463e-05, + "loss": 0.6351407766342163, + "step": 3322 + }, + { + "epoch": 0.6140130036608885, + "grad_norm": 0.0864158496260643, + "learning_rate": 1.8443963300816454e-05, + "loss": 0.9147331714630127, + "step": 3323 + }, + { + "epoch": 0.6141977803697843, + "grad_norm": 0.07096041738986969, + "learning_rate": 1.844289441877612e-05, + "loss": 0.5557135939598083, + "step": 3324 + }, + { + "epoch": 0.6143825570786803, + "grad_norm": 0.068775475025177, + "learning_rate": 1.844182520073501e-05, + "loss": 0.4887027144432068, + "step": 3325 + }, + { + "epoch": 0.6145673337875761, + "grad_norm": 0.05247717723250389, + "learning_rate": 1.844075564673566e-05, + "loss": 0.31910794973373413, + "step": 3326 + }, + { + "epoch": 0.6147521104964719, + "grad_norm": 0.07341188192367554, + "learning_rate": 1.8439685756820646e-05, + "loss": 0.7932683229446411, + "step": 3327 + }, + { + "epoch": 0.6149368872053678, + "grad_norm": 0.06132347509264946, + "learning_rate": 1.8438615531032545e-05, + "loss": 0.5565425157546997, + "step": 3328 + }, + { + "epoch": 0.6151216639142636, + "grad_norm": 0.06424925476312637, + "learning_rate": 1.8437544969413946e-05, + "loss": 0.46804794669151306, + "step": 3329 + }, + { + "epoch": 0.6153064406231594, + "grad_norm": 0.07150422781705856, + "learning_rate": 1.8436474072007454e-05, + "loss": 0.6623454093933105, + "step": 3330 + }, + { + "epoch": 0.6154912173320553, + "grad_norm": 0.0803765058517456, + "learning_rate": 1.843540283885569e-05, + "loss": 0.7229027152061462, + "step": 3331 + }, + { + "epoch": 0.6156759940409512, + "grad_norm": 0.08057615906000137, + "learning_rate": 1.843433127000128e-05, + "loss": 0.6208106279373169, + "step": 3332 + }, + { + "epoch": 0.615860770749847, + "grad_norm": 0.08613825589418411, + "learning_rate": 1.8433259365486876e-05, + "loss": 0.7484444379806519, + "step": 3333 + }, + { + "epoch": 0.6160455474587428, + "grad_norm": 0.07063552737236023, + "learning_rate": 1.8432187125355137e-05, + "loss": 0.5618511438369751, + "step": 3334 + }, + { + "epoch": 0.6162303241676387, + "grad_norm": 0.07601696252822876, + "learning_rate": 1.8431114549648728e-05, + "loss": 0.5722567439079285, + "step": 3335 + }, + { + "epoch": 0.6164151008765345, + "grad_norm": 0.08163177222013474, + "learning_rate": 1.8430041638410335e-05, + "loss": 0.7786068320274353, + "step": 3336 + }, + { + "epoch": 0.6165998775854303, + "grad_norm": 0.08218467980623245, + "learning_rate": 1.8428968391682663e-05, + "loss": 0.7029726505279541, + "step": 3337 + }, + { + "epoch": 0.6167846542943262, + "grad_norm": 0.06208677962422371, + "learning_rate": 1.842789480950842e-05, + "loss": 0.652988851070404, + "step": 3338 + }, + { + "epoch": 0.616969431003222, + "grad_norm": 0.06685537099838257, + "learning_rate": 1.8426820891930328e-05, + "loss": 0.6263839602470398, + "step": 3339 + }, + { + "epoch": 0.6171542077121179, + "grad_norm": 0.07050909101963043, + "learning_rate": 1.842574663899113e-05, + "loss": 0.6059039235115051, + "step": 3340 + }, + { + "epoch": 0.6173389844210138, + "grad_norm": 0.04347433149814606, + "learning_rate": 1.8424672050733577e-05, + "loss": 0.4000491201877594, + "step": 3341 + }, + { + "epoch": 0.6175237611299096, + "grad_norm": 0.07061401009559631, + "learning_rate": 1.842359712720043e-05, + "loss": 0.5760173201560974, + "step": 3342 + }, + { + "epoch": 0.6177085378388054, + "grad_norm": 0.09141604602336884, + "learning_rate": 1.8422521868434477e-05, + "loss": 0.8013163805007935, + "step": 3343 + }, + { + "epoch": 0.6178933145477012, + "grad_norm": 0.07007431983947754, + "learning_rate": 1.8421446274478504e-05, + "loss": 0.5493162870407104, + "step": 3344 + }, + { + "epoch": 0.6180780912565971, + "grad_norm": 0.057326290756464005, + "learning_rate": 1.8420370345375315e-05, + "loss": 0.41068577766418457, + "step": 3345 + }, + { + "epoch": 0.6182628679654929, + "grad_norm": 0.06700431555509567, + "learning_rate": 1.841929408116773e-05, + "loss": 0.5786662101745605, + "step": 3346 + }, + { + "epoch": 0.6184476446743888, + "grad_norm": 0.07303698360919952, + "learning_rate": 1.8418217481898578e-05, + "loss": 0.632289707660675, + "step": 3347 + }, + { + "epoch": 0.6186324213832847, + "grad_norm": 0.06894835084676743, + "learning_rate": 1.841714054761071e-05, + "loss": 0.5914403796195984, + "step": 3348 + }, + { + "epoch": 0.6188171980921805, + "grad_norm": 0.061302755028009415, + "learning_rate": 1.8416063278346983e-05, + "loss": 0.5831184387207031, + "step": 3349 + }, + { + "epoch": 0.6190019748010763, + "grad_norm": 0.0628557801246643, + "learning_rate": 1.8414985674150268e-05, + "loss": 0.53914874792099, + "step": 3350 + }, + { + "epoch": 0.6191867515099722, + "grad_norm": 0.07403040677309036, + "learning_rate": 1.841390773506345e-05, + "loss": 0.5408547520637512, + "step": 3351 + }, + { + "epoch": 0.619371528218868, + "grad_norm": 0.09489922225475311, + "learning_rate": 1.841282946112943e-05, + "loss": 0.6683562994003296, + "step": 3352 + }, + { + "epoch": 0.6195563049277638, + "grad_norm": 0.06954994052648544, + "learning_rate": 1.8411750852391114e-05, + "loss": 0.6364865303039551, + "step": 3353 + }, + { + "epoch": 0.6197410816366598, + "grad_norm": 0.07777140289545059, + "learning_rate": 1.8410671908891432e-05, + "loss": 0.7604506015777588, + "step": 3354 + }, + { + "epoch": 0.6199258583455556, + "grad_norm": 0.07304697483778, + "learning_rate": 1.840959263067332e-05, + "loss": 0.4944457709789276, + "step": 3355 + }, + { + "epoch": 0.6201106350544514, + "grad_norm": 0.06757856905460358, + "learning_rate": 1.8408513017779737e-05, + "loss": 0.6350497007369995, + "step": 3356 + }, + { + "epoch": 0.6202954117633472, + "grad_norm": 0.08216935396194458, + "learning_rate": 1.8407433070253637e-05, + "loss": 0.6694963574409485, + "step": 3357 + }, + { + "epoch": 0.6204801884722431, + "grad_norm": 0.07288269698619843, + "learning_rate": 1.840635278813801e-05, + "loss": 0.7217207551002502, + "step": 3358 + }, + { + "epoch": 0.6206649651811389, + "grad_norm": 0.0807751789689064, + "learning_rate": 1.8405272171475838e-05, + "loss": 0.6694827675819397, + "step": 3359 + }, + { + "epoch": 0.6208497418900347, + "grad_norm": 0.06977913528680801, + "learning_rate": 1.840419122031013e-05, + "loss": 0.6728543639183044, + "step": 3360 + }, + { + "epoch": 0.6210345185989306, + "grad_norm": 0.06334500014781952, + "learning_rate": 1.8403109934683908e-05, + "loss": 0.5174497365951538, + "step": 3361 + }, + { + "epoch": 0.6212192953078265, + "grad_norm": 0.08478124439716339, + "learning_rate": 1.8402028314640198e-05, + "loss": 0.7299423813819885, + "step": 3362 + }, + { + "epoch": 0.6214040720167223, + "grad_norm": 0.0655829980969429, + "learning_rate": 1.8400946360222046e-05, + "loss": 0.5044342875480652, + "step": 3363 + }, + { + "epoch": 0.6215888487256181, + "grad_norm": 0.061931535601615906, + "learning_rate": 1.8399864071472516e-05, + "loss": 0.548366367816925, + "step": 3364 + }, + { + "epoch": 0.621773625434514, + "grad_norm": 0.0731566846370697, + "learning_rate": 1.8398781448434674e-05, + "loss": 0.6606848239898682, + "step": 3365 + }, + { + "epoch": 0.6219584021434098, + "grad_norm": 0.0684320405125618, + "learning_rate": 1.8397698491151607e-05, + "loss": 0.5689799189567566, + "step": 3366 + }, + { + "epoch": 0.6221431788523056, + "grad_norm": 0.07679462432861328, + "learning_rate": 1.839661519966641e-05, + "loss": 0.6740244030952454, + "step": 3367 + }, + { + "epoch": 0.6223279555612015, + "grad_norm": 0.0783647820353508, + "learning_rate": 1.8395531574022202e-05, + "loss": 0.796058177947998, + "step": 3368 + }, + { + "epoch": 0.6225127322700974, + "grad_norm": 0.06929682195186615, + "learning_rate": 1.8394447614262103e-05, + "loss": 0.6750960350036621, + "step": 3369 + }, + { + "epoch": 0.6226975089789932, + "grad_norm": 0.07672516256570816, + "learning_rate": 1.839336332042925e-05, + "loss": 0.7037633657455444, + "step": 3370 + }, + { + "epoch": 0.6228822856878891, + "grad_norm": 0.07461292296648026, + "learning_rate": 1.83922786925668e-05, + "loss": 0.5920368432998657, + "step": 3371 + }, + { + "epoch": 0.6230670623967849, + "grad_norm": 0.061919037252664566, + "learning_rate": 1.839119373071791e-05, + "loss": 0.4925020933151245, + "step": 3372 + }, + { + "epoch": 0.6232518391056807, + "grad_norm": 0.08758971095085144, + "learning_rate": 1.8390108434925764e-05, + "loss": 0.7388421893119812, + "step": 3373 + }, + { + "epoch": 0.6234366158145765, + "grad_norm": 0.07638192921876907, + "learning_rate": 1.8389022805233548e-05, + "loss": 0.7971750497817993, + "step": 3374 + }, + { + "epoch": 0.6236213925234724, + "grad_norm": 0.1041233167052269, + "learning_rate": 1.838793684168448e-05, + "loss": 0.8845223784446716, + "step": 3375 + }, + { + "epoch": 0.6238061692323683, + "grad_norm": 0.07788344472646713, + "learning_rate": 1.8386850544321758e-05, + "loss": 0.7362569570541382, + "step": 3376 + }, + { + "epoch": 0.6239909459412641, + "grad_norm": 0.07352737337350845, + "learning_rate": 1.8385763913188624e-05, + "loss": 0.6514081358909607, + "step": 3377 + }, + { + "epoch": 0.62417572265016, + "grad_norm": 0.07312724739313126, + "learning_rate": 1.8384676948328328e-05, + "loss": 0.5283224582672119, + "step": 3378 + }, + { + "epoch": 0.6243604993590558, + "grad_norm": 0.06424093246459961, + "learning_rate": 1.838358964978412e-05, + "loss": 0.5747008919715881, + "step": 3379 + }, + { + "epoch": 0.6245452760679516, + "grad_norm": 0.07459218800067902, + "learning_rate": 1.8382502017599272e-05, + "loss": 0.6848477721214294, + "step": 3380 + }, + { + "epoch": 0.6247300527768475, + "grad_norm": 0.07399237900972366, + "learning_rate": 1.8381414051817066e-05, + "loss": 0.541366457939148, + "step": 3381 + }, + { + "epoch": 0.6249148294857433, + "grad_norm": 0.07620833069086075, + "learning_rate": 1.8380325752480807e-05, + "loss": 0.638131320476532, + "step": 3382 + }, + { + "epoch": 0.6250996061946391, + "grad_norm": 0.06079495698213577, + "learning_rate": 1.8379237119633798e-05, + "loss": 0.6371907591819763, + "step": 3383 + }, + { + "epoch": 0.625284382903535, + "grad_norm": 0.06467308104038239, + "learning_rate": 1.837814815331937e-05, + "loss": 0.5650978088378906, + "step": 3384 + }, + { + "epoch": 0.6254691596124309, + "grad_norm": 0.06969483196735382, + "learning_rate": 1.8377058853580857e-05, + "loss": 0.6680585741996765, + "step": 3385 + }, + { + "epoch": 0.6256539363213267, + "grad_norm": 0.08084537833929062, + "learning_rate": 1.837596922046161e-05, + "loss": 0.7017515301704407, + "step": 3386 + }, + { + "epoch": 0.6258387130302225, + "grad_norm": 0.07957891374826431, + "learning_rate": 1.8374879254004993e-05, + "loss": 0.7071402072906494, + "step": 3387 + }, + { + "epoch": 0.6260234897391184, + "grad_norm": 0.07972588390111923, + "learning_rate": 1.837378895425438e-05, + "loss": 0.8170965909957886, + "step": 3388 + }, + { + "epoch": 0.6262082664480142, + "grad_norm": 0.09192267060279846, + "learning_rate": 1.837269832125317e-05, + "loss": 0.7305018901824951, + "step": 3389 + }, + { + "epoch": 0.62639304315691, + "grad_norm": 0.07038337737321854, + "learning_rate": 1.8371607355044757e-05, + "loss": 0.5975012183189392, + "step": 3390 + }, + { + "epoch": 0.626577819865806, + "grad_norm": 0.07235181331634521, + "learning_rate": 1.8370516055672565e-05, + "loss": 0.6369036436080933, + "step": 3391 + }, + { + "epoch": 0.6267625965747018, + "grad_norm": 0.07539573311805725, + "learning_rate": 1.8369424423180025e-05, + "loss": 0.6021792888641357, + "step": 3392 + }, + { + "epoch": 0.6269473732835976, + "grad_norm": 0.07311061024665833, + "learning_rate": 1.8368332457610572e-05, + "loss": 0.6630080938339233, + "step": 3393 + }, + { + "epoch": 0.6271321499924934, + "grad_norm": 0.06958476454019547, + "learning_rate": 1.8367240159007673e-05, + "loss": 0.6373473405838013, + "step": 3394 + }, + { + "epoch": 0.6273169267013893, + "grad_norm": 0.0566987618803978, + "learning_rate": 1.836614752741479e-05, + "loss": 0.5827599763870239, + "step": 3395 + }, + { + "epoch": 0.6275017034102851, + "grad_norm": 0.08414741605520248, + "learning_rate": 1.8365054562875412e-05, + "loss": 0.7651265263557434, + "step": 3396 + }, + { + "epoch": 0.6276864801191809, + "grad_norm": 0.05634079501032829, + "learning_rate": 1.8363961265433033e-05, + "loss": 0.4115261733531952, + "step": 3397 + }, + { + "epoch": 0.6278712568280769, + "grad_norm": 0.08532488346099854, + "learning_rate": 1.8362867635131162e-05, + "loss": 0.7812259793281555, + "step": 3398 + }, + { + "epoch": 0.6280560335369727, + "grad_norm": 0.07976091653108597, + "learning_rate": 1.836177367201332e-05, + "loss": 0.7045861482620239, + "step": 3399 + }, + { + "epoch": 0.6282408102458685, + "grad_norm": 0.05625972896814346, + "learning_rate": 1.836067937612305e-05, + "loss": 0.5176336765289307, + "step": 3400 + }, + { + "epoch": 0.6284255869547644, + "grad_norm": 0.0938640907406807, + "learning_rate": 1.8359584747503902e-05, + "loss": 0.7844260334968567, + "step": 3401 + }, + { + "epoch": 0.6286103636636602, + "grad_norm": 0.06254132837057114, + "learning_rate": 1.835848978619943e-05, + "loss": 0.6245369911193848, + "step": 3402 + }, + { + "epoch": 0.628795140372556, + "grad_norm": 0.07394769787788391, + "learning_rate": 1.8357394492253216e-05, + "loss": 0.5009530782699585, + "step": 3403 + }, + { + "epoch": 0.6289799170814518, + "grad_norm": 0.06485763937234879, + "learning_rate": 1.835629886570885e-05, + "loss": 0.5574130415916443, + "step": 3404 + }, + { + "epoch": 0.6291646937903477, + "grad_norm": 0.07892514020204544, + "learning_rate": 1.8355202906609927e-05, + "loss": 0.6365463137626648, + "step": 3405 + }, + { + "epoch": 0.6293494704992436, + "grad_norm": 0.10166321694850922, + "learning_rate": 1.8354106615000073e-05, + "loss": 0.9062671065330505, + "step": 3406 + }, + { + "epoch": 0.6295342472081394, + "grad_norm": 0.07256297767162323, + "learning_rate": 1.8353009990922913e-05, + "loss": 0.7056639194488525, + "step": 3407 + }, + { + "epoch": 0.6297190239170353, + "grad_norm": 0.06842894852161407, + "learning_rate": 1.8351913034422083e-05, + "loss": 0.45249462127685547, + "step": 3408 + }, + { + "epoch": 0.6299038006259311, + "grad_norm": 0.08753480762243271, + "learning_rate": 1.835081574554125e-05, + "loss": 0.7865512371063232, + "step": 3409 + }, + { + "epoch": 0.6300885773348269, + "grad_norm": 0.07850634306669235, + "learning_rate": 1.8349718124324075e-05, + "loss": 0.580086350440979, + "step": 3410 + }, + { + "epoch": 0.6302733540437228, + "grad_norm": 0.10032707452774048, + "learning_rate": 1.8348620170814244e-05, + "loss": 0.7837745547294617, + "step": 3411 + }, + { + "epoch": 0.6304581307526186, + "grad_norm": 0.08185004442930222, + "learning_rate": 1.8347521885055447e-05, + "loss": 0.7753044366836548, + "step": 3412 + }, + { + "epoch": 0.6306429074615145, + "grad_norm": 0.06751414388418198, + "learning_rate": 1.834642326709139e-05, + "loss": 0.540576159954071, + "step": 3413 + }, + { + "epoch": 0.6308276841704104, + "grad_norm": 0.08356916904449463, + "learning_rate": 1.8345324316965808e-05, + "loss": 0.6384175419807434, + "step": 3414 + }, + { + "epoch": 0.6310124608793062, + "grad_norm": 0.07579503953456879, + "learning_rate": 1.8344225034722423e-05, + "loss": 0.5331666469573975, + "step": 3415 + }, + { + "epoch": 0.631197237588202, + "grad_norm": 0.0715034231543541, + "learning_rate": 1.834312542040499e-05, + "loss": 0.6281111836433411, + "step": 3416 + }, + { + "epoch": 0.6313820142970978, + "grad_norm": 0.07418831437826157, + "learning_rate": 1.8342025474057263e-05, + "loss": 0.739043653011322, + "step": 3417 + }, + { + "epoch": 0.6315667910059937, + "grad_norm": 0.07007978111505508, + "learning_rate": 1.8340925195723023e-05, + "loss": 0.5546165704727173, + "step": 3418 + }, + { + "epoch": 0.6317515677148895, + "grad_norm": 0.07733705639839172, + "learning_rate": 1.833982458544606e-05, + "loss": 0.6315229535102844, + "step": 3419 + }, + { + "epoch": 0.6319363444237854, + "grad_norm": 0.09028880298137665, + "learning_rate": 1.8338723643270163e-05, + "loss": 0.8068675398826599, + "step": 3420 + }, + { + "epoch": 0.6321211211326813, + "grad_norm": 0.0775882750749588, + "learning_rate": 1.833762236923916e-05, + "loss": 0.598249077796936, + "step": 3421 + }, + { + "epoch": 0.6323058978415771, + "grad_norm": 0.07598394900560379, + "learning_rate": 1.8336520763396868e-05, + "loss": 0.7596868276596069, + "step": 3422 + }, + { + "epoch": 0.6324906745504729, + "grad_norm": 0.07083048671483994, + "learning_rate": 1.833541882578713e-05, + "loss": 0.6752363443374634, + "step": 3423 + }, + { + "epoch": 0.6326754512593687, + "grad_norm": 0.07214643061161041, + "learning_rate": 1.8334316556453808e-05, + "loss": 0.5167519450187683, + "step": 3424 + }, + { + "epoch": 0.6328602279682646, + "grad_norm": 0.0798087939620018, + "learning_rate": 1.8333213955440755e-05, + "loss": 0.5881116390228271, + "step": 3425 + }, + { + "epoch": 0.6330450046771604, + "grad_norm": 0.0739937275648117, + "learning_rate": 1.833211102279186e-05, + "loss": 0.6433027386665344, + "step": 3426 + }, + { + "epoch": 0.6332297813860562, + "grad_norm": 0.06404921412467957, + "learning_rate": 1.8331007758551015e-05, + "loss": 0.4750000834465027, + "step": 3427 + }, + { + "epoch": 0.6334145580949522, + "grad_norm": 0.08710794895887375, + "learning_rate": 1.8329904162762124e-05, + "loss": 0.7732343673706055, + "step": 3428 + }, + { + "epoch": 0.633599334803848, + "grad_norm": 0.08195815980434418, + "learning_rate": 1.8328800235469108e-05, + "loss": 0.5903754830360413, + "step": 3429 + }, + { + "epoch": 0.6337841115127438, + "grad_norm": 0.07433349639177322, + "learning_rate": 1.83276959767159e-05, + "loss": 0.42970842123031616, + "step": 3430 + }, + { + "epoch": 0.6339688882216397, + "grad_norm": 0.060573991388082504, + "learning_rate": 1.8326591386546447e-05, + "loss": 0.4808928370475769, + "step": 3431 + }, + { + "epoch": 0.6341536649305355, + "grad_norm": 0.08820538222789764, + "learning_rate": 1.8325486465004707e-05, + "loss": 0.7748810648918152, + "step": 3432 + }, + { + "epoch": 0.6343384416394313, + "grad_norm": 0.07573293894529343, + "learning_rate": 1.8324381212134653e-05, + "loss": 0.7678648233413696, + "step": 3433 + }, + { + "epoch": 0.6345232183483271, + "grad_norm": 0.10143709182739258, + "learning_rate": 1.8323275627980272e-05, + "loss": 0.751928448677063, + "step": 3434 + }, + { + "epoch": 0.6347079950572231, + "grad_norm": 0.10220063477754593, + "learning_rate": 1.832216971258556e-05, + "loss": 0.9340986013412476, + "step": 3435 + }, + { + "epoch": 0.6348927717661189, + "grad_norm": 0.07440445572137833, + "learning_rate": 1.8321063465994527e-05, + "loss": 0.7372763752937317, + "step": 3436 + }, + { + "epoch": 0.6350775484750147, + "grad_norm": 0.07867652922868729, + "learning_rate": 1.8319956888251207e-05, + "loss": 0.7990411520004272, + "step": 3437 + }, + { + "epoch": 0.6352623251839106, + "grad_norm": 0.07615192979574203, + "learning_rate": 1.831884997939963e-05, + "loss": 0.7793571949005127, + "step": 3438 + }, + { + "epoch": 0.6354471018928064, + "grad_norm": 0.06848933547735214, + "learning_rate": 1.831774273948385e-05, + "loss": 0.5564258694648743, + "step": 3439 + }, + { + "epoch": 0.6356318786017022, + "grad_norm": 0.06217201426625252, + "learning_rate": 1.831663516854793e-05, + "loss": 0.52495938539505, + "step": 3440 + }, + { + "epoch": 0.6358166553105981, + "grad_norm": 0.08522749692201614, + "learning_rate": 1.8315527266635955e-05, + "loss": 0.60796719789505, + "step": 3441 + }, + { + "epoch": 0.636001432019494, + "grad_norm": 0.09203702211380005, + "learning_rate": 1.8314419033792007e-05, + "loss": 0.5910808444023132, + "step": 3442 + }, + { + "epoch": 0.6361862087283898, + "grad_norm": 0.07956217974424362, + "learning_rate": 1.8313310470060192e-05, + "loss": 0.7164194583892822, + "step": 3443 + }, + { + "epoch": 0.6363709854372857, + "grad_norm": 0.08320367336273193, + "learning_rate": 1.8312201575484633e-05, + "loss": 0.8383685350418091, + "step": 3444 + }, + { + "epoch": 0.6365557621461815, + "grad_norm": 0.053374577313661575, + "learning_rate": 1.8311092350109457e-05, + "loss": 0.3978824019432068, + "step": 3445 + }, + { + "epoch": 0.6367405388550773, + "grad_norm": 0.05999819189310074, + "learning_rate": 1.8309982793978808e-05, + "loss": 0.5727517604827881, + "step": 3446 + }, + { + "epoch": 0.6369253155639731, + "grad_norm": 0.059646982699632645, + "learning_rate": 1.8308872907136837e-05, + "loss": 0.45642557740211487, + "step": 3447 + }, + { + "epoch": 0.637110092272869, + "grad_norm": 0.07357610017061234, + "learning_rate": 1.8307762689627724e-05, + "loss": 0.7836448550224304, + "step": 3448 + }, + { + "epoch": 0.6372948689817648, + "grad_norm": 0.06309615075588226, + "learning_rate": 1.8306652141495645e-05, + "loss": 0.5483134984970093, + "step": 3449 + }, + { + "epoch": 0.6374796456906607, + "grad_norm": 0.07365509867668152, + "learning_rate": 1.83055412627848e-05, + "loss": 0.5442796349525452, + "step": 3450 + }, + { + "epoch": 0.6376644223995566, + "grad_norm": 0.07378187030553818, + "learning_rate": 1.83044300535394e-05, + "loss": 0.7219253182411194, + "step": 3451 + }, + { + "epoch": 0.6378491991084524, + "grad_norm": 0.08257278054952621, + "learning_rate": 1.8303318513803664e-05, + "loss": 0.8525739908218384, + "step": 3452 + }, + { + "epoch": 0.6380339758173482, + "grad_norm": 0.08498632162809372, + "learning_rate": 1.8302206643621826e-05, + "loss": 0.7980212569236755, + "step": 3453 + }, + { + "epoch": 0.638218752526244, + "grad_norm": 0.07984080910682678, + "learning_rate": 1.830109444303814e-05, + "loss": 0.575787365436554, + "step": 3454 + }, + { + "epoch": 0.6384035292351399, + "grad_norm": 0.07604240626096725, + "learning_rate": 1.8299981912096867e-05, + "loss": 0.6391733288764954, + "step": 3455 + }, + { + "epoch": 0.6385883059440357, + "grad_norm": 0.07938699424266815, + "learning_rate": 1.829886905084228e-05, + "loss": 0.7856255769729614, + "step": 3456 + }, + { + "epoch": 0.6387730826529316, + "grad_norm": 0.0769173875451088, + "learning_rate": 1.8297755859318665e-05, + "loss": 0.7210874557495117, + "step": 3457 + }, + { + "epoch": 0.6389578593618275, + "grad_norm": 0.07991595566272736, + "learning_rate": 1.8296642337570333e-05, + "loss": 0.7814546823501587, + "step": 3458 + }, + { + "epoch": 0.6391426360707233, + "grad_norm": 0.08006802946329117, + "learning_rate": 1.8295528485641588e-05, + "loss": 0.7504193186759949, + "step": 3459 + }, + { + "epoch": 0.6393274127796191, + "grad_norm": 0.07210196554660797, + "learning_rate": 1.8294414303576768e-05, + "loss": 0.45494532585144043, + "step": 3460 + }, + { + "epoch": 0.639512189488515, + "grad_norm": 0.06325925886631012, + "learning_rate": 1.8293299791420203e-05, + "loss": 0.5189319849014282, + "step": 3461 + }, + { + "epoch": 0.6396969661974108, + "grad_norm": 0.0802246481180191, + "learning_rate": 1.8292184949216255e-05, + "loss": 0.658757746219635, + "step": 3462 + }, + { + "epoch": 0.6398817429063066, + "grad_norm": 0.07019998133182526, + "learning_rate": 1.8291069777009293e-05, + "loss": 0.5831966996192932, + "step": 3463 + }, + { + "epoch": 0.6400665196152026, + "grad_norm": 0.07150223106145859, + "learning_rate": 1.828995427484369e-05, + "loss": 0.629213273525238, + "step": 3464 + }, + { + "epoch": 0.6402512963240984, + "grad_norm": 0.06425396353006363, + "learning_rate": 1.8288838442763838e-05, + "loss": 0.6516048908233643, + "step": 3465 + }, + { + "epoch": 0.6404360730329942, + "grad_norm": 0.07360067218542099, + "learning_rate": 1.8287722280814154e-05, + "loss": 0.7233392596244812, + "step": 3466 + }, + { + "epoch": 0.64062084974189, + "grad_norm": 0.0641668289899826, + "learning_rate": 1.828660578903905e-05, + "loss": 0.5412912964820862, + "step": 3467 + }, + { + "epoch": 0.6408056264507859, + "grad_norm": 0.0622696615755558, + "learning_rate": 1.8285488967482964e-05, + "loss": 0.48052069544792175, + "step": 3468 + }, + { + "epoch": 0.6409904031596817, + "grad_norm": 0.06292513012886047, + "learning_rate": 1.8284371816190338e-05, + "loss": 0.619753897190094, + "step": 3469 + }, + { + "epoch": 0.6411751798685775, + "grad_norm": 0.06942099332809448, + "learning_rate": 1.828325433520563e-05, + "loss": 0.5545070767402649, + "step": 3470 + }, + { + "epoch": 0.6413599565774734, + "grad_norm": 0.0720704197883606, + "learning_rate": 1.8282136524573316e-05, + "loss": 0.7403892278671265, + "step": 3471 + }, + { + "epoch": 0.6415447332863693, + "grad_norm": 0.08801698684692383, + "learning_rate": 1.8281018384337882e-05, + "loss": 0.7320387363433838, + "step": 3472 + }, + { + "epoch": 0.6417295099952651, + "grad_norm": 0.06612563133239746, + "learning_rate": 1.827989991454382e-05, + "loss": 0.50302654504776, + "step": 3473 + }, + { + "epoch": 0.641914286704161, + "grad_norm": 0.07397046685218811, + "learning_rate": 1.8278781115235648e-05, + "loss": 0.7002925276756287, + "step": 3474 + }, + { + "epoch": 0.6420990634130568, + "grad_norm": 0.08516622334718704, + "learning_rate": 1.827766198645789e-05, + "loss": 0.6786304712295532, + "step": 3475 + }, + { + "epoch": 0.6422838401219526, + "grad_norm": 0.058225326240062714, + "learning_rate": 1.8276542528255078e-05, + "loss": 0.4930121600627899, + "step": 3476 + }, + { + "epoch": 0.6424686168308484, + "grad_norm": 0.08818955719470978, + "learning_rate": 1.827542274067177e-05, + "loss": 0.8350800275802612, + "step": 3477 + }, + { + "epoch": 0.6426533935397443, + "grad_norm": 0.0774618610739708, + "learning_rate": 1.827430262375253e-05, + "loss": 0.6766074895858765, + "step": 3478 + }, + { + "epoch": 0.6428381702486402, + "grad_norm": 0.06310693174600601, + "learning_rate": 1.8273182177541928e-05, + "loss": 0.5774590969085693, + "step": 3479 + }, + { + "epoch": 0.643022946957536, + "grad_norm": 0.06140054389834404, + "learning_rate": 1.827206140208456e-05, + "loss": 0.48896872997283936, + "step": 3480 + }, + { + "epoch": 0.6432077236664319, + "grad_norm": 0.06771141290664673, + "learning_rate": 1.827094029742503e-05, + "loss": 0.6462276577949524, + "step": 3481 + }, + { + "epoch": 0.6433925003753277, + "grad_norm": 0.07365892082452774, + "learning_rate": 1.826981886360795e-05, + "loss": 0.7363516688346863, + "step": 3482 + }, + { + "epoch": 0.6435772770842235, + "grad_norm": 0.07005984336137772, + "learning_rate": 1.8268697100677955e-05, + "loss": 0.5591853857040405, + "step": 3483 + }, + { + "epoch": 0.6437620537931193, + "grad_norm": 0.07851269841194153, + "learning_rate": 1.8267575008679685e-05, + "loss": 0.7781826257705688, + "step": 3484 + }, + { + "epoch": 0.6439468305020152, + "grad_norm": 0.0728900283575058, + "learning_rate": 1.8266452587657792e-05, + "loss": 0.7133674621582031, + "step": 3485 + }, + { + "epoch": 0.6441316072109111, + "grad_norm": 0.08376836031675339, + "learning_rate": 1.8265329837656952e-05, + "loss": 0.7407172918319702, + "step": 3486 + }, + { + "epoch": 0.644316383919807, + "grad_norm": 0.07879316061735153, + "learning_rate": 1.8264206758721845e-05, + "loss": 0.7643840909004211, + "step": 3487 + }, + { + "epoch": 0.6445011606287028, + "grad_norm": 0.0975750982761383, + "learning_rate": 1.8263083350897156e-05, + "loss": 0.7902787923812866, + "step": 3488 + }, + { + "epoch": 0.6446859373375986, + "grad_norm": 0.07233584672212601, + "learning_rate": 1.826195961422761e-05, + "loss": 0.6836885809898376, + "step": 3489 + }, + { + "epoch": 0.6448707140464944, + "grad_norm": 0.06888895481824875, + "learning_rate": 1.8260835548757917e-05, + "loss": 0.6279575824737549, + "step": 3490 + }, + { + "epoch": 0.6450554907553903, + "grad_norm": 0.07138030976057053, + "learning_rate": 1.8259711154532814e-05, + "loss": 0.63683021068573, + "step": 3491 + }, + { + "epoch": 0.6452402674642861, + "grad_norm": 0.06463315337896347, + "learning_rate": 1.8258586431597046e-05, + "loss": 0.6189367771148682, + "step": 3492 + }, + { + "epoch": 0.6454250441731819, + "grad_norm": 0.0695798248052597, + "learning_rate": 1.8257461379995378e-05, + "loss": 0.6790233850479126, + "step": 3493 + }, + { + "epoch": 0.6456098208820779, + "grad_norm": 0.07280732691287994, + "learning_rate": 1.825633599977258e-05, + "loss": 0.5111994743347168, + "step": 3494 + }, + { + "epoch": 0.6457945975909737, + "grad_norm": 0.08698681741952896, + "learning_rate": 1.825521029097344e-05, + "loss": 0.8289183378219604, + "step": 3495 + }, + { + "epoch": 0.6459793742998695, + "grad_norm": 0.06710066646337509, + "learning_rate": 1.825408425364276e-05, + "loss": 0.5631644129753113, + "step": 3496 + }, + { + "epoch": 0.6461641510087653, + "grad_norm": 0.07575134932994843, + "learning_rate": 1.825295788782535e-05, + "loss": 0.8791454434394836, + "step": 3497 + }, + { + "epoch": 0.6463489277176612, + "grad_norm": 0.07547102123498917, + "learning_rate": 1.8251831193566038e-05, + "loss": 0.5951558947563171, + "step": 3498 + }, + { + "epoch": 0.646533704426557, + "grad_norm": 0.062191519886255264, + "learning_rate": 1.8250704170909655e-05, + "loss": 0.6984429359436035, + "step": 3499 + }, + { + "epoch": 0.6467184811354528, + "grad_norm": 0.07333795726299286, + "learning_rate": 1.8249576819901062e-05, + "loss": 0.6207255125045776, + "step": 3500 + }, + { + "epoch": 0.6467184811354528, + "eval_loss": 0.6716294884681702, + "eval_runtime": 157.9746, + "eval_samples_per_second": 115.392, + "eval_steps_per_second": 14.426, + "step": 3500 + }, + { + "epoch": 0.6469032578443488, + "grad_norm": 0.07832157611846924, + "learning_rate": 1.824844914058512e-05, + "loss": 0.6614238619804382, + "step": 3501 + }, + { + "epoch": 0.6470880345532446, + "grad_norm": 0.06644383817911148, + "learning_rate": 1.8247321133006715e-05, + "loss": 0.5758317708969116, + "step": 3502 + }, + { + "epoch": 0.6472728112621404, + "grad_norm": 0.06412899494171143, + "learning_rate": 1.8246192797210723e-05, + "loss": 0.47316062450408936, + "step": 3503 + }, + { + "epoch": 0.6474575879710363, + "grad_norm": 0.08234144747257233, + "learning_rate": 1.8245064133242064e-05, + "loss": 0.7362156510353088, + "step": 3504 + }, + { + "epoch": 0.6476423646799321, + "grad_norm": 0.07396001368761063, + "learning_rate": 1.8243935141145642e-05, + "loss": 0.7157362103462219, + "step": 3505 + }, + { + "epoch": 0.6478271413888279, + "grad_norm": 0.08159304410219193, + "learning_rate": 1.8242805820966397e-05, + "loss": 0.7625038027763367, + "step": 3506 + }, + { + "epoch": 0.6480119180977237, + "grad_norm": 0.07692290842533112, + "learning_rate": 1.824167617274927e-05, + "loss": 0.6218598484992981, + "step": 3507 + }, + { + "epoch": 0.6481966948066197, + "grad_norm": 0.07371748983860016, + "learning_rate": 1.824054619653921e-05, + "loss": 0.6727126836776733, + "step": 3508 + }, + { + "epoch": 0.6483814715155155, + "grad_norm": 0.07481326907873154, + "learning_rate": 1.82394158923812e-05, + "loss": 0.6585962772369385, + "step": 3509 + }, + { + "epoch": 0.6485662482244113, + "grad_norm": 0.07488329708576202, + "learning_rate": 1.823828526032021e-05, + "loss": 0.8188884258270264, + "step": 3510 + }, + { + "epoch": 0.6487510249333072, + "grad_norm": 0.07071026414632797, + "learning_rate": 1.823715430040124e-05, + "loss": 0.6719712018966675, + "step": 3511 + }, + { + "epoch": 0.648935801642203, + "grad_norm": 0.050728604197502136, + "learning_rate": 1.8236023012669305e-05, + "loss": 0.4805065393447876, + "step": 3512 + }, + { + "epoch": 0.6491205783510988, + "grad_norm": 0.06074635684490204, + "learning_rate": 1.8234891397169415e-05, + "loss": 0.4907507598400116, + "step": 3513 + }, + { + "epoch": 0.6493053550599946, + "grad_norm": 0.0737895593047142, + "learning_rate": 1.823375945394662e-05, + "loss": 0.6974319815635681, + "step": 3514 + }, + { + "epoch": 0.6494901317688905, + "grad_norm": 0.06137435510754585, + "learning_rate": 1.8232627183045954e-05, + "loss": 0.5445990562438965, + "step": 3515 + }, + { + "epoch": 0.6496749084777864, + "grad_norm": 0.06566373258829117, + "learning_rate": 1.8231494584512477e-05, + "loss": 0.5697711110115051, + "step": 3516 + }, + { + "epoch": 0.6498596851866822, + "grad_norm": 0.0688183605670929, + "learning_rate": 1.8230361658391277e-05, + "loss": 0.634209930896759, + "step": 3517 + }, + { + "epoch": 0.6500444618955781, + "grad_norm": 0.07534511387348175, + "learning_rate": 1.8229228404727428e-05, + "loss": 0.7855115532875061, + "step": 3518 + }, + { + "epoch": 0.6502292386044739, + "grad_norm": 0.07258817553520203, + "learning_rate": 1.8228094823566034e-05, + "loss": 0.6711122989654541, + "step": 3519 + }, + { + "epoch": 0.6504140153133697, + "grad_norm": 0.08547286689281464, + "learning_rate": 1.822696091495221e-05, + "loss": 0.76134192943573, + "step": 3520 + }, + { + "epoch": 0.6505987920222656, + "grad_norm": 0.062105804681777954, + "learning_rate": 1.8225826678931082e-05, + "loss": 0.5121784210205078, + "step": 3521 + }, + { + "epoch": 0.6507835687311614, + "grad_norm": 0.07176516205072403, + "learning_rate": 1.8224692115547786e-05, + "loss": 0.5259842276573181, + "step": 3522 + }, + { + "epoch": 0.6509683454400573, + "grad_norm": 0.06844273209571838, + "learning_rate": 1.8223557224847476e-05, + "loss": 0.5787487030029297, + "step": 3523 + }, + { + "epoch": 0.6511531221489532, + "grad_norm": 0.10131745785474777, + "learning_rate": 1.8222422006875316e-05, + "loss": 0.9592319130897522, + "step": 3524 + }, + { + "epoch": 0.651337898857849, + "grad_norm": 0.06959455460309982, + "learning_rate": 1.8221286461676487e-05, + "loss": 0.5962274670600891, + "step": 3525 + }, + { + "epoch": 0.6515226755667448, + "grad_norm": 0.0840156152844429, + "learning_rate": 1.8220150589296172e-05, + "loss": 0.7271184921264648, + "step": 3526 + }, + { + "epoch": 0.6517074522756406, + "grad_norm": 0.0845293253660202, + "learning_rate": 1.8219014389779586e-05, + "loss": 0.7290505766868591, + "step": 3527 + }, + { + "epoch": 0.6518922289845365, + "grad_norm": 0.08639811724424362, + "learning_rate": 1.821787786317194e-05, + "loss": 0.8814318776130676, + "step": 3528 + }, + { + "epoch": 0.6520770056934323, + "grad_norm": 0.07506068795919418, + "learning_rate": 1.8216741009518465e-05, + "loss": 0.6959407329559326, + "step": 3529 + }, + { + "epoch": 0.6522617824023282, + "grad_norm": 0.06726741790771484, + "learning_rate": 1.8215603828864406e-05, + "loss": 0.5602008104324341, + "step": 3530 + }, + { + "epoch": 0.6524465591112241, + "grad_norm": 0.0724782720208168, + "learning_rate": 1.8214466321255016e-05, + "loss": 0.6988791823387146, + "step": 3531 + }, + { + "epoch": 0.6526313358201199, + "grad_norm": 0.08787646889686584, + "learning_rate": 1.8213328486735568e-05, + "loss": 0.7606102228164673, + "step": 3532 + }, + { + "epoch": 0.6528161125290157, + "grad_norm": 0.06051532179117203, + "learning_rate": 1.8212190325351343e-05, + "loss": 0.5349277257919312, + "step": 3533 + }, + { + "epoch": 0.6530008892379116, + "grad_norm": 0.06168433651328087, + "learning_rate": 1.8211051837147638e-05, + "loss": 0.49732333421707153, + "step": 3534 + }, + { + "epoch": 0.6531856659468074, + "grad_norm": 0.0682239979505539, + "learning_rate": 1.8209913022169755e-05, + "loss": 0.555517315864563, + "step": 3535 + }, + { + "epoch": 0.6533704426557032, + "grad_norm": 0.07696058601140976, + "learning_rate": 1.8208773880463017e-05, + "loss": 0.7068920731544495, + "step": 3536 + }, + { + "epoch": 0.653555219364599, + "grad_norm": 0.08367285877466202, + "learning_rate": 1.8207634412072765e-05, + "loss": 0.799506664276123, + "step": 3537 + }, + { + "epoch": 0.653739996073495, + "grad_norm": 0.08666908740997314, + "learning_rate": 1.8206494617044338e-05, + "loss": 0.8445583581924438, + "step": 3538 + }, + { + "epoch": 0.6539247727823908, + "grad_norm": 0.0719844400882721, + "learning_rate": 1.8205354495423104e-05, + "loss": 0.5322660803794861, + "step": 3539 + }, + { + "epoch": 0.6541095494912866, + "grad_norm": 0.08823683857917786, + "learning_rate": 1.8204214047254433e-05, + "loss": 0.745097279548645, + "step": 3540 + }, + { + "epoch": 0.6542943262001825, + "grad_norm": 0.06618287414312363, + "learning_rate": 1.8203073272583705e-05, + "loss": 0.6083573698997498, + "step": 3541 + }, + { + "epoch": 0.6544791029090783, + "grad_norm": 0.0668790340423584, + "learning_rate": 1.8201932171456328e-05, + "loss": 0.6096829771995544, + "step": 3542 + }, + { + "epoch": 0.6546638796179741, + "grad_norm": 0.06935631483793259, + "learning_rate": 1.8200790743917714e-05, + "loss": 0.5925930738449097, + "step": 3543 + }, + { + "epoch": 0.65484865632687, + "grad_norm": 0.058166343718767166, + "learning_rate": 1.819964899001328e-05, + "loss": 0.46284744143486023, + "step": 3544 + }, + { + "epoch": 0.6550334330357659, + "grad_norm": 0.08119804412126541, + "learning_rate": 1.8198506909788475e-05, + "loss": 0.7212343811988831, + "step": 3545 + }, + { + "epoch": 0.6552182097446617, + "grad_norm": 0.08805336058139801, + "learning_rate": 1.819736450328874e-05, + "loss": 0.7497941255569458, + "step": 3546 + }, + { + "epoch": 0.6554029864535575, + "grad_norm": 0.07108543813228607, + "learning_rate": 1.8196221770559548e-05, + "loss": 0.5629989504814148, + "step": 3547 + }, + { + "epoch": 0.6555877631624534, + "grad_norm": 0.06211220473051071, + "learning_rate": 1.8195078711646367e-05, + "loss": 0.6136374473571777, + "step": 3548 + }, + { + "epoch": 0.6557725398713492, + "grad_norm": 0.07083010673522949, + "learning_rate": 1.81939353265947e-05, + "loss": 0.5266101360321045, + "step": 3549 + }, + { + "epoch": 0.655957316580245, + "grad_norm": 0.070891834795475, + "learning_rate": 1.8192791615450035e-05, + "loss": 0.6135351061820984, + "step": 3550 + }, + { + "epoch": 0.6561420932891409, + "grad_norm": 0.06508482247591019, + "learning_rate": 1.81916475782579e-05, + "loss": 0.4885174334049225, + "step": 3551 + }, + { + "epoch": 0.6563268699980368, + "grad_norm": 0.07081446051597595, + "learning_rate": 1.819050321506382e-05, + "loss": 0.48294541239738464, + "step": 3552 + }, + { + "epoch": 0.6565116467069326, + "grad_norm": 0.08830460160970688, + "learning_rate": 1.8189358525913335e-05, + "loss": 0.6990798115730286, + "step": 3553 + }, + { + "epoch": 0.6566964234158285, + "grad_norm": 0.07055427134037018, + "learning_rate": 1.8188213510852003e-05, + "loss": 0.5924009084701538, + "step": 3554 + }, + { + "epoch": 0.6568812001247243, + "grad_norm": 0.06699106097221375, + "learning_rate": 1.8187068169925387e-05, + "loss": 0.5916507840156555, + "step": 3555 + }, + { + "epoch": 0.6570659768336201, + "grad_norm": 0.07054045796394348, + "learning_rate": 1.8185922503179077e-05, + "loss": 0.48589131236076355, + "step": 3556 + }, + { + "epoch": 0.6572507535425159, + "grad_norm": 0.07716887444257736, + "learning_rate": 1.818477651065866e-05, + "loss": 0.6579416990280151, + "step": 3557 + }, + { + "epoch": 0.6574355302514118, + "grad_norm": 0.0649254322052002, + "learning_rate": 1.8183630192409746e-05, + "loss": 0.5554823279380798, + "step": 3558 + }, + { + "epoch": 0.6576203069603076, + "grad_norm": 0.07145379483699799, + "learning_rate": 1.818248354847795e-05, + "loss": 0.4613144099712372, + "step": 3559 + }, + { + "epoch": 0.6578050836692035, + "grad_norm": 0.07647396624088287, + "learning_rate": 1.8181336578908913e-05, + "loss": 0.6117957830429077, + "step": 3560 + }, + { + "epoch": 0.6579898603780994, + "grad_norm": 0.078497014939785, + "learning_rate": 1.8180189283748274e-05, + "loss": 0.756879448890686, + "step": 3561 + }, + { + "epoch": 0.6581746370869952, + "grad_norm": 0.07156045734882355, + "learning_rate": 1.8179041663041693e-05, + "loss": 0.583175778388977, + "step": 3562 + }, + { + "epoch": 0.658359413795891, + "grad_norm": 0.054694127291440964, + "learning_rate": 1.8177893716834844e-05, + "loss": 0.4489407241344452, + "step": 3563 + }, + { + "epoch": 0.6585441905047869, + "grad_norm": 0.05911286920309067, + "learning_rate": 1.817674544517341e-05, + "loss": 0.5961577892303467, + "step": 3564 + }, + { + "epoch": 0.6587289672136827, + "grad_norm": 0.08541987836360931, + "learning_rate": 1.817559684810309e-05, + "loss": 0.7918174266815186, + "step": 3565 + }, + { + "epoch": 0.6589137439225785, + "grad_norm": 0.07582125067710876, + "learning_rate": 1.8174447925669594e-05, + "loss": 0.5886412858963013, + "step": 3566 + }, + { + "epoch": 0.6590985206314744, + "grad_norm": 0.09455190598964691, + "learning_rate": 1.8173298677918644e-05, + "loss": 0.7425788640975952, + "step": 3567 + }, + { + "epoch": 0.6592832973403703, + "grad_norm": 0.07785138487815857, + "learning_rate": 1.8172149104895976e-05, + "loss": 0.711449146270752, + "step": 3568 + }, + { + "epoch": 0.6594680740492661, + "grad_norm": 0.06602492183446884, + "learning_rate": 1.817099920664734e-05, + "loss": 0.5020294189453125, + "step": 3569 + }, + { + "epoch": 0.6596528507581619, + "grad_norm": 0.07863244414329529, + "learning_rate": 1.8169848983218506e-05, + "loss": 0.6526080369949341, + "step": 3570 + }, + { + "epoch": 0.6598376274670578, + "grad_norm": 0.08201786875724792, + "learning_rate": 1.8168698434655237e-05, + "loss": 0.6860029697418213, + "step": 3571 + }, + { + "epoch": 0.6600224041759536, + "grad_norm": 0.06559526920318604, + "learning_rate": 1.8167547561003328e-05, + "loss": 0.6369378566741943, + "step": 3572 + }, + { + "epoch": 0.6602071808848494, + "grad_norm": 0.07632806897163391, + "learning_rate": 1.8166396362308584e-05, + "loss": 0.6945319175720215, + "step": 3573 + }, + { + "epoch": 0.6603919575937454, + "grad_norm": 0.08810561150312424, + "learning_rate": 1.8165244838616808e-05, + "loss": 0.8644512891769409, + "step": 3574 + }, + { + "epoch": 0.6605767343026412, + "grad_norm": 0.06652560085058212, + "learning_rate": 1.8164092989973832e-05, + "loss": 0.626679539680481, + "step": 3575 + }, + { + "epoch": 0.660761511011537, + "grad_norm": 0.06613638997077942, + "learning_rate": 1.81629408164255e-05, + "loss": 0.522729754447937, + "step": 3576 + }, + { + "epoch": 0.6609462877204328, + "grad_norm": 0.07767696678638458, + "learning_rate": 1.8161788318017663e-05, + "loss": 0.5926226377487183, + "step": 3577 + }, + { + "epoch": 0.6611310644293287, + "grad_norm": 0.07696923613548279, + "learning_rate": 1.8160635494796186e-05, + "loss": 0.6262413263320923, + "step": 3578 + }, + { + "epoch": 0.6613158411382245, + "grad_norm": 0.07074544578790665, + "learning_rate": 1.8159482346806946e-05, + "loss": 0.5292245149612427, + "step": 3579 + }, + { + "epoch": 0.6615006178471203, + "grad_norm": 0.06287173181772232, + "learning_rate": 1.8158328874095835e-05, + "loss": 0.5215235352516174, + "step": 3580 + }, + { + "epoch": 0.6616853945560162, + "grad_norm": 0.065033458173275, + "learning_rate": 1.8157175076708765e-05, + "loss": 0.5890915393829346, + "step": 3581 + }, + { + "epoch": 0.6618701712649121, + "grad_norm": 0.07116048783063889, + "learning_rate": 1.8156020954691643e-05, + "loss": 0.6286876201629639, + "step": 3582 + }, + { + "epoch": 0.6620549479738079, + "grad_norm": 0.08613475412130356, + "learning_rate": 1.8154866508090404e-05, + "loss": 0.6727132201194763, + "step": 3583 + }, + { + "epoch": 0.6622397246827038, + "grad_norm": 0.0765560194849968, + "learning_rate": 1.8153711736950992e-05, + "loss": 0.7270196080207825, + "step": 3584 + }, + { + "epoch": 0.6624245013915996, + "grad_norm": 0.08965720236301422, + "learning_rate": 1.8152556641319364e-05, + "loss": 0.8607513904571533, + "step": 3585 + }, + { + "epoch": 0.6626092781004954, + "grad_norm": 0.0839729979634285, + "learning_rate": 1.8151401221241482e-05, + "loss": 0.6847149729728699, + "step": 3586 + }, + { + "epoch": 0.6627940548093912, + "grad_norm": 0.07854394614696503, + "learning_rate": 1.815024547676334e-05, + "loss": 0.7056261301040649, + "step": 3587 + }, + { + "epoch": 0.6629788315182871, + "grad_norm": 0.06578505784273148, + "learning_rate": 1.8149089407930924e-05, + "loss": 0.4915512502193451, + "step": 3588 + }, + { + "epoch": 0.663163608227183, + "grad_norm": 0.07030507922172546, + "learning_rate": 1.8147933014790245e-05, + "loss": 0.5319059491157532, + "step": 3589 + }, + { + "epoch": 0.6633483849360788, + "grad_norm": 0.062074411660432816, + "learning_rate": 1.8146776297387327e-05, + "loss": 0.4846246540546417, + "step": 3590 + }, + { + "epoch": 0.6635331616449747, + "grad_norm": 0.06728599220514297, + "learning_rate": 1.8145619255768195e-05, + "loss": 0.6230316758155823, + "step": 3591 + }, + { + "epoch": 0.6637179383538705, + "grad_norm": 0.08350211381912231, + "learning_rate": 1.8144461889978902e-05, + "loss": 0.7138779163360596, + "step": 3592 + }, + { + "epoch": 0.6639027150627663, + "grad_norm": 0.07312264293432236, + "learning_rate": 1.814330420006551e-05, + "loss": 0.5450552701950073, + "step": 3593 + }, + { + "epoch": 0.6640874917716622, + "grad_norm": 0.09821341931819916, + "learning_rate": 1.8142146186074087e-05, + "loss": 0.5662215948104858, + "step": 3594 + }, + { + "epoch": 0.664272268480558, + "grad_norm": 0.06497794389724731, + "learning_rate": 1.8140987848050717e-05, + "loss": 0.43087509274482727, + "step": 3595 + }, + { + "epoch": 0.6644570451894539, + "grad_norm": 0.09049307554960251, + "learning_rate": 1.81398291860415e-05, + "loss": 0.6527272462844849, + "step": 3596 + }, + { + "epoch": 0.6646418218983497, + "grad_norm": 0.07319524884223938, + "learning_rate": 1.813867020009255e-05, + "loss": 0.6607534885406494, + "step": 3597 + }, + { + "epoch": 0.6648265986072456, + "grad_norm": 0.10025575011968613, + "learning_rate": 1.813751089024999e-05, + "loss": 0.7384710311889648, + "step": 3598 + }, + { + "epoch": 0.6650113753161414, + "grad_norm": 0.07169393450021744, + "learning_rate": 1.813635125655995e-05, + "loss": 0.5975751280784607, + "step": 3599 + }, + { + "epoch": 0.6651961520250372, + "grad_norm": 0.08455440402030945, + "learning_rate": 1.813519129906859e-05, + "loss": 0.5890780687332153, + "step": 3600 + }, + { + "epoch": 0.6653809287339331, + "grad_norm": 0.07142335176467896, + "learning_rate": 1.813403101782207e-05, + "loss": 0.6866121292114258, + "step": 3601 + }, + { + "epoch": 0.6655657054428289, + "grad_norm": 0.06693186610937119, + "learning_rate": 1.8132870412866557e-05, + "loss": 0.5880172848701477, + "step": 3602 + }, + { + "epoch": 0.6657504821517247, + "grad_norm": 0.06023497134447098, + "learning_rate": 1.8131709484248254e-05, + "loss": 0.5216284394264221, + "step": 3603 + }, + { + "epoch": 0.6659352588606207, + "grad_norm": 0.05582762137055397, + "learning_rate": 1.813054823201335e-05, + "loss": 0.47629982233047485, + "step": 3604 + }, + { + "epoch": 0.6661200355695165, + "grad_norm": 0.05890011042356491, + "learning_rate": 1.812938665620806e-05, + "loss": 0.5389571785926819, + "step": 3605 + }, + { + "epoch": 0.6663048122784123, + "grad_norm": 0.08104158937931061, + "learning_rate": 1.8128224756878622e-05, + "loss": 0.7427095174789429, + "step": 3606 + }, + { + "epoch": 0.6664895889873081, + "grad_norm": 0.060655295848846436, + "learning_rate": 1.8127062534071265e-05, + "loss": 0.47118300199508667, + "step": 3607 + }, + { + "epoch": 0.666674365696204, + "grad_norm": 0.07759436219930649, + "learning_rate": 1.8125899987832245e-05, + "loss": 0.5543125867843628, + "step": 3608 + }, + { + "epoch": 0.6668591424050998, + "grad_norm": 0.07310891151428223, + "learning_rate": 1.812473711820783e-05, + "loss": 0.5512562990188599, + "step": 3609 + }, + { + "epoch": 0.6670439191139956, + "grad_norm": 0.07100726664066315, + "learning_rate": 1.8123573925244293e-05, + "loss": 0.5451439023017883, + "step": 3610 + }, + { + "epoch": 0.6672286958228916, + "grad_norm": 0.0782528966665268, + "learning_rate": 1.8122410408987933e-05, + "loss": 0.6551583409309387, + "step": 3611 + }, + { + "epoch": 0.6674134725317874, + "grad_norm": 0.0719519704580307, + "learning_rate": 1.812124656948505e-05, + "loss": 0.6590461134910583, + "step": 3612 + }, + { + "epoch": 0.6675982492406832, + "grad_norm": 0.06585057824850082, + "learning_rate": 1.812008240678196e-05, + "loss": 0.5170865058898926, + "step": 3613 + }, + { + "epoch": 0.6677830259495791, + "grad_norm": 0.08960982412099838, + "learning_rate": 1.8118917920924995e-05, + "loss": 0.9026049375534058, + "step": 3614 + }, + { + "epoch": 0.6679678026584749, + "grad_norm": 0.07508343458175659, + "learning_rate": 1.8117753111960496e-05, + "loss": 0.5850327014923096, + "step": 3615 + }, + { + "epoch": 0.6681525793673707, + "grad_norm": 0.06610523909330368, + "learning_rate": 1.8116587979934825e-05, + "loss": 0.4828251004219055, + "step": 3616 + }, + { + "epoch": 0.6683373560762665, + "grad_norm": 0.08065943419933319, + "learning_rate": 1.811542252489434e-05, + "loss": 0.6314260959625244, + "step": 3617 + }, + { + "epoch": 0.6685221327851625, + "grad_norm": 0.08027535676956177, + "learning_rate": 1.8114256746885433e-05, + "loss": 0.7746886610984802, + "step": 3618 + }, + { + "epoch": 0.6687069094940583, + "grad_norm": 0.05435701459646225, + "learning_rate": 1.8113090645954492e-05, + "loss": 0.4653548002243042, + "step": 3619 + }, + { + "epoch": 0.6688916862029541, + "grad_norm": 0.07187805324792862, + "learning_rate": 1.8111924222147927e-05, + "loss": 0.588527500629425, + "step": 3620 + }, + { + "epoch": 0.66907646291185, + "grad_norm": 0.07427569478750229, + "learning_rate": 1.811075747551216e-05, + "loss": 0.710505485534668, + "step": 3621 + }, + { + "epoch": 0.6692612396207458, + "grad_norm": 0.09005344659090042, + "learning_rate": 1.8109590406093612e-05, + "loss": 0.834923505783081, + "step": 3622 + }, + { + "epoch": 0.6694460163296416, + "grad_norm": 0.06310055404901505, + "learning_rate": 1.8108423013938744e-05, + "loss": 0.521003246307373, + "step": 3623 + }, + { + "epoch": 0.6696307930385375, + "grad_norm": 0.06574016809463501, + "learning_rate": 1.8107255299094007e-05, + "loss": 0.6228640079498291, + "step": 3624 + }, + { + "epoch": 0.6698155697474333, + "grad_norm": 0.0749313235282898, + "learning_rate": 1.8106087261605872e-05, + "loss": 0.3983880579471588, + "step": 3625 + }, + { + "epoch": 0.6700003464563292, + "grad_norm": 0.08887924253940582, + "learning_rate": 1.8104918901520828e-05, + "loss": 0.6355776786804199, + "step": 3626 + }, + { + "epoch": 0.670185123165225, + "grad_norm": 0.06345250457525253, + "learning_rate": 1.8103750218885366e-05, + "loss": 0.49787241220474243, + "step": 3627 + }, + { + "epoch": 0.6703698998741209, + "grad_norm": 0.07932516932487488, + "learning_rate": 1.8102581213745996e-05, + "loss": 0.8111638426780701, + "step": 3628 + }, + { + "epoch": 0.6705546765830167, + "grad_norm": 0.05282360315322876, + "learning_rate": 1.810141188614925e-05, + "loss": 0.46341559290885925, + "step": 3629 + }, + { + "epoch": 0.6707394532919125, + "grad_norm": 0.0626249611377716, + "learning_rate": 1.810024223614165e-05, + "loss": 0.6896385550498962, + "step": 3630 + }, + { + "epoch": 0.6709242300008084, + "grad_norm": 0.07346781343221664, + "learning_rate": 1.8099072263769754e-05, + "loss": 0.5050565004348755, + "step": 3631 + }, + { + "epoch": 0.6711090067097042, + "grad_norm": 0.07342267781496048, + "learning_rate": 1.8097901969080123e-05, + "loss": 0.6833550930023193, + "step": 3632 + }, + { + "epoch": 0.6712937834186001, + "grad_norm": 0.07485052198171616, + "learning_rate": 1.809673135211933e-05, + "loss": 0.6823423504829407, + "step": 3633 + }, + { + "epoch": 0.671478560127496, + "grad_norm": 0.06795880943536758, + "learning_rate": 1.8095560412933956e-05, + "loss": 0.5953877568244934, + "step": 3634 + }, + { + "epoch": 0.6716633368363918, + "grad_norm": 0.07932394742965698, + "learning_rate": 1.8094389151570607e-05, + "loss": 0.7074558734893799, + "step": 3635 + }, + { + "epoch": 0.6718481135452876, + "grad_norm": 0.08381252735853195, + "learning_rate": 1.8093217568075895e-05, + "loss": 0.601979672908783, + "step": 3636 + }, + { + "epoch": 0.6720328902541834, + "grad_norm": 0.07244662940502167, + "learning_rate": 1.809204566249644e-05, + "loss": 0.6082628965377808, + "step": 3637 + }, + { + "epoch": 0.6722176669630793, + "grad_norm": 0.08388855308294296, + "learning_rate": 1.8090873434878888e-05, + "loss": 0.7804757952690125, + "step": 3638 + }, + { + "epoch": 0.6724024436719751, + "grad_norm": 0.06491474062204361, + "learning_rate": 1.808970088526989e-05, + "loss": 0.5486597418785095, + "step": 3639 + }, + { + "epoch": 0.672587220380871, + "grad_norm": 0.07179899513721466, + "learning_rate": 1.80885280137161e-05, + "loss": 0.6362982988357544, + "step": 3640 + }, + { + "epoch": 0.6727719970897669, + "grad_norm": 0.07112115621566772, + "learning_rate": 1.8087354820264202e-05, + "loss": 0.6815124750137329, + "step": 3641 + }, + { + "epoch": 0.6729567737986627, + "grad_norm": 0.07267649471759796, + "learning_rate": 1.8086181304960885e-05, + "loss": 0.6394959688186646, + "step": 3642 + }, + { + "epoch": 0.6731415505075585, + "grad_norm": 0.06543809920549393, + "learning_rate": 1.808500746785285e-05, + "loss": 0.53403639793396, + "step": 3643 + }, + { + "epoch": 0.6733263272164544, + "grad_norm": 0.06312116980552673, + "learning_rate": 1.8083833308986816e-05, + "loss": 0.5903558135032654, + "step": 3644 + }, + { + "epoch": 0.6735111039253502, + "grad_norm": 0.06180790811777115, + "learning_rate": 1.8082658828409502e-05, + "loss": 0.5831218361854553, + "step": 3645 + }, + { + "epoch": 0.673695880634246, + "grad_norm": 0.07454147934913635, + "learning_rate": 1.808148402616766e-05, + "loss": 0.56765216588974, + "step": 3646 + }, + { + "epoch": 0.6738806573431418, + "grad_norm": 0.07599983364343643, + "learning_rate": 1.808030890230803e-05, + "loss": 0.6603242754936218, + "step": 3647 + }, + { + "epoch": 0.6740654340520378, + "grad_norm": 0.0702044740319252, + "learning_rate": 1.8079133456877393e-05, + "loss": 0.541609525680542, + "step": 3648 + }, + { + "epoch": 0.6742502107609336, + "grad_norm": 0.06919199228286743, + "learning_rate": 1.8077957689922516e-05, + "loss": 0.6654222011566162, + "step": 3649 + }, + { + "epoch": 0.6744349874698294, + "grad_norm": 0.06848164647817612, + "learning_rate": 1.8076781601490196e-05, + "loss": 0.6674426198005676, + "step": 3650 + }, + { + "epoch": 0.6746197641787253, + "grad_norm": 0.08180391043424606, + "learning_rate": 1.8075605191627242e-05, + "loss": 0.7538283467292786, + "step": 3651 + }, + { + "epoch": 0.6748045408876211, + "grad_norm": 0.0738702192902565, + "learning_rate": 1.8074428460380463e-05, + "loss": 0.6896567940711975, + "step": 3652 + }, + { + "epoch": 0.6749893175965169, + "grad_norm": 0.07367371767759323, + "learning_rate": 1.8073251407796692e-05, + "loss": 0.5425678491592407, + "step": 3653 + }, + { + "epoch": 0.6751740943054128, + "grad_norm": 0.0782298818230629, + "learning_rate": 1.8072074033922773e-05, + "loss": 0.6600009202957153, + "step": 3654 + }, + { + "epoch": 0.6753588710143087, + "grad_norm": 0.06548859924077988, + "learning_rate": 1.8070896338805565e-05, + "loss": 0.7054764032363892, + "step": 3655 + }, + { + "epoch": 0.6755436477232045, + "grad_norm": 0.08852683752775192, + "learning_rate": 1.8069718322491928e-05, + "loss": 0.7335495948791504, + "step": 3656 + }, + { + "epoch": 0.6757284244321003, + "grad_norm": 0.07670899480581284, + "learning_rate": 1.8068539985028755e-05, + "loss": 0.5692101716995239, + "step": 3657 + }, + { + "epoch": 0.6759132011409962, + "grad_norm": 0.07531186193227768, + "learning_rate": 1.806736132646293e-05, + "loss": 0.594825029373169, + "step": 3658 + }, + { + "epoch": 0.676097977849892, + "grad_norm": 0.09214551001787186, + "learning_rate": 1.8066182346841365e-05, + "loss": 0.9483715891838074, + "step": 3659 + }, + { + "epoch": 0.6762827545587878, + "grad_norm": 0.06421562284231186, + "learning_rate": 1.8065003046210976e-05, + "loss": 0.5851206183433533, + "step": 3660 + }, + { + "epoch": 0.6764675312676837, + "grad_norm": 0.1005164384841919, + "learning_rate": 1.8063823424618698e-05, + "loss": 0.8281410932540894, + "step": 3661 + }, + { + "epoch": 0.6766523079765796, + "grad_norm": 0.06132997199892998, + "learning_rate": 1.806264348211148e-05, + "loss": 0.4958787262439728, + "step": 3662 + }, + { + "epoch": 0.6768370846854754, + "grad_norm": 0.07368472963571548, + "learning_rate": 1.8061463218736272e-05, + "loss": 0.7197514176368713, + "step": 3663 + }, + { + "epoch": 0.6770218613943713, + "grad_norm": 0.06913574039936066, + "learning_rate": 1.8060282634540053e-05, + "loss": 0.6327351331710815, + "step": 3664 + }, + { + "epoch": 0.6772066381032671, + "grad_norm": 0.07094834744930267, + "learning_rate": 1.80591017295698e-05, + "loss": 0.7247785925865173, + "step": 3665 + }, + { + "epoch": 0.6773914148121629, + "grad_norm": 0.07524259388446808, + "learning_rate": 1.8057920503872514e-05, + "loss": 0.7688266634941101, + "step": 3666 + }, + { + "epoch": 0.6775761915210587, + "grad_norm": 0.060354214161634445, + "learning_rate": 1.80567389574952e-05, + "loss": 0.5938658714294434, + "step": 3667 + }, + { + "epoch": 0.6777609682299546, + "grad_norm": 0.06695031374692917, + "learning_rate": 1.805555709048488e-05, + "loss": 0.6668074131011963, + "step": 3668 + }, + { + "epoch": 0.6779457449388504, + "grad_norm": 0.06782311946153641, + "learning_rate": 1.8054374902888594e-05, + "loss": 0.5017516613006592, + "step": 3669 + }, + { + "epoch": 0.6781305216477463, + "grad_norm": 0.08580289781093597, + "learning_rate": 1.8053192394753383e-05, + "loss": 0.814161479473114, + "step": 3670 + }, + { + "epoch": 0.6783152983566422, + "grad_norm": 0.07032842934131622, + "learning_rate": 1.8052009566126312e-05, + "loss": 0.6677312254905701, + "step": 3671 + }, + { + "epoch": 0.678500075065538, + "grad_norm": 0.07815011590719223, + "learning_rate": 1.805082641705445e-05, + "loss": 0.6751715540885925, + "step": 3672 + }, + { + "epoch": 0.6786848517744338, + "grad_norm": 0.06954637169837952, + "learning_rate": 1.8049642947584885e-05, + "loss": 0.5944041609764099, + "step": 3673 + }, + { + "epoch": 0.6788696284833297, + "grad_norm": 0.06862206757068634, + "learning_rate": 1.8048459157764714e-05, + "loss": 0.5541815757751465, + "step": 3674 + }, + { + "epoch": 0.6790544051922255, + "grad_norm": 0.06570684164762497, + "learning_rate": 1.804727504764105e-05, + "loss": 0.5496829152107239, + "step": 3675 + }, + { + "epoch": 0.6792391819011213, + "grad_norm": 0.0733305811882019, + "learning_rate": 1.804609061726102e-05, + "loss": 0.604638397693634, + "step": 3676 + }, + { + "epoch": 0.6794239586100173, + "grad_norm": 0.06596026569604874, + "learning_rate": 1.804490586667175e-05, + "loss": 0.5498355031013489, + "step": 3677 + }, + { + "epoch": 0.6796087353189131, + "grad_norm": 0.060254111886024475, + "learning_rate": 1.8043720795920397e-05, + "loss": 0.489950031042099, + "step": 3678 + }, + { + "epoch": 0.6797935120278089, + "grad_norm": 0.07105099409818649, + "learning_rate": 1.8042535405054125e-05, + "loss": 0.5928880572319031, + "step": 3679 + }, + { + "epoch": 0.6799782887367047, + "grad_norm": 0.08792119473218918, + "learning_rate": 1.8041349694120102e-05, + "loss": 0.628506064414978, + "step": 3680 + }, + { + "epoch": 0.6801630654456006, + "grad_norm": 0.08043865859508514, + "learning_rate": 1.8040163663165523e-05, + "loss": 0.7550156712532043, + "step": 3681 + }, + { + "epoch": 0.6803478421544964, + "grad_norm": 0.07714878022670746, + "learning_rate": 1.8038977312237583e-05, + "loss": 0.6829538941383362, + "step": 3682 + }, + { + "epoch": 0.6805326188633922, + "grad_norm": 0.06187025457620621, + "learning_rate": 1.8037790641383493e-05, + "loss": 0.4977126717567444, + "step": 3683 + }, + { + "epoch": 0.6807173955722882, + "grad_norm": 0.11160174012184143, + "learning_rate": 1.8036603650650487e-05, + "loss": 0.634476363658905, + "step": 3684 + }, + { + "epoch": 0.680902172281184, + "grad_norm": 0.07590920478105545, + "learning_rate": 1.80354163400858e-05, + "loss": 0.6065258383750916, + "step": 3685 + }, + { + "epoch": 0.6810869489900798, + "grad_norm": 0.08354957401752472, + "learning_rate": 1.803422870973668e-05, + "loss": 0.7981310486793518, + "step": 3686 + }, + { + "epoch": 0.6812717256989756, + "grad_norm": 0.07451831549406052, + "learning_rate": 1.803304075965039e-05, + "loss": 0.5937463641166687, + "step": 3687 + }, + { + "epoch": 0.6814565024078715, + "grad_norm": 0.07504851371049881, + "learning_rate": 1.8031852489874215e-05, + "loss": 0.6576966643333435, + "step": 3688 + }, + { + "epoch": 0.6816412791167673, + "grad_norm": 0.08937250822782516, + "learning_rate": 1.803066390045544e-05, + "loss": 1.0074913501739502, + "step": 3689 + }, + { + "epoch": 0.6818260558256631, + "grad_norm": 0.07768749445676804, + "learning_rate": 1.802947499144136e-05, + "loss": 0.7405135035514832, + "step": 3690 + }, + { + "epoch": 0.682010832534559, + "grad_norm": 0.0934264287352562, + "learning_rate": 1.8028285762879303e-05, + "loss": 0.7602882981300354, + "step": 3691 + }, + { + "epoch": 0.6821956092434549, + "grad_norm": 0.0645613968372345, + "learning_rate": 1.802709621481659e-05, + "loss": 0.6215939521789551, + "step": 3692 + }, + { + "epoch": 0.6823803859523507, + "grad_norm": 0.07587705552577972, + "learning_rate": 1.8025906347300557e-05, + "loss": 0.7235476970672607, + "step": 3693 + }, + { + "epoch": 0.6825651626612466, + "grad_norm": 0.06425310671329498, + "learning_rate": 1.802471616037856e-05, + "loss": 0.548570454120636, + "step": 3694 + }, + { + "epoch": 0.6827499393701424, + "grad_norm": 0.07246150821447372, + "learning_rate": 1.8023525654097967e-05, + "loss": 0.6220743656158447, + "step": 3695 + }, + { + "epoch": 0.6829347160790382, + "grad_norm": 0.06483404338359833, + "learning_rate": 1.802233482850616e-05, + "loss": 0.6329941153526306, + "step": 3696 + }, + { + "epoch": 0.683119492787934, + "grad_norm": 0.07063949108123779, + "learning_rate": 1.8021143683650524e-05, + "loss": 0.6304616332054138, + "step": 3697 + }, + { + "epoch": 0.6833042694968299, + "grad_norm": 0.06189752742648125, + "learning_rate": 1.8019952219578464e-05, + "loss": 0.49720051884651184, + "step": 3698 + }, + { + "epoch": 0.6834890462057258, + "grad_norm": 0.0959625318646431, + "learning_rate": 1.8018760436337396e-05, + "loss": 0.9130963087081909, + "step": 3699 + }, + { + "epoch": 0.6836738229146216, + "grad_norm": 0.06473139673471451, + "learning_rate": 1.8017568333974748e-05, + "loss": 0.5368582010269165, + "step": 3700 + }, + { + "epoch": 0.6838585996235175, + "grad_norm": 0.0729895830154419, + "learning_rate": 1.8016375912537963e-05, + "loss": 0.6586579084396362, + "step": 3701 + }, + { + "epoch": 0.6840433763324133, + "grad_norm": 0.06475422531366348, + "learning_rate": 1.8015183172074503e-05, + "loss": 0.4308708608150482, + "step": 3702 + }, + { + "epoch": 0.6842281530413091, + "grad_norm": 0.0831836611032486, + "learning_rate": 1.8013990112631824e-05, + "loss": 0.738739550113678, + "step": 3703 + }, + { + "epoch": 0.684412929750205, + "grad_norm": 0.06140682101249695, + "learning_rate": 1.8012796734257412e-05, + "loss": 0.4796154499053955, + "step": 3704 + }, + { + "epoch": 0.6845977064591008, + "grad_norm": 0.06863638758659363, + "learning_rate": 1.8011603036998762e-05, + "loss": 0.5414847135543823, + "step": 3705 + }, + { + "epoch": 0.6847824831679967, + "grad_norm": 0.08135831356048584, + "learning_rate": 1.801040902090337e-05, + "loss": 0.8075755834579468, + "step": 3706 + }, + { + "epoch": 0.6849672598768926, + "grad_norm": 0.05040891095995903, + "learning_rate": 1.800921468601877e-05, + "loss": 0.45555415749549866, + "step": 3707 + }, + { + "epoch": 0.6851520365857884, + "grad_norm": 0.0659366175532341, + "learning_rate": 1.8008020032392474e-05, + "loss": 0.5273883938789368, + "step": 3708 + }, + { + "epoch": 0.6853368132946842, + "grad_norm": 0.06233956292271614, + "learning_rate": 1.8006825060072038e-05, + "loss": 0.6255277991294861, + "step": 3709 + }, + { + "epoch": 0.68552159000358, + "grad_norm": 0.08703937381505966, + "learning_rate": 1.8005629769105013e-05, + "loss": 0.851870596408844, + "step": 3710 + }, + { + "epoch": 0.6857063667124759, + "grad_norm": 0.06627961993217468, + "learning_rate": 1.8004434159538974e-05, + "loss": 0.512995183467865, + "step": 3711 + }, + { + "epoch": 0.6858911434213717, + "grad_norm": 0.0790930762887001, + "learning_rate": 1.8003238231421495e-05, + "loss": 0.5680521726608276, + "step": 3712 + }, + { + "epoch": 0.6860759201302675, + "grad_norm": 0.07907992601394653, + "learning_rate": 1.8002041984800173e-05, + "loss": 0.6068723201751709, + "step": 3713 + }, + { + "epoch": 0.6862606968391635, + "grad_norm": 0.08863968402147293, + "learning_rate": 1.8000845419722615e-05, + "loss": 0.7572266459465027, + "step": 3714 + }, + { + "epoch": 0.6864454735480593, + "grad_norm": 0.06970055401325226, + "learning_rate": 1.799964853623644e-05, + "loss": 0.5449360609054565, + "step": 3715 + }, + { + "epoch": 0.6866302502569551, + "grad_norm": 0.08415934443473816, + "learning_rate": 1.7998451334389285e-05, + "loss": 0.6751998662948608, + "step": 3716 + }, + { + "epoch": 0.686815026965851, + "grad_norm": 0.060137297958135605, + "learning_rate": 1.7997253814228787e-05, + "loss": 0.5908098220825195, + "step": 3717 + }, + { + "epoch": 0.6869998036747468, + "grad_norm": 0.058346912264823914, + "learning_rate": 1.7996055975802608e-05, + "loss": 0.4068335294723511, + "step": 3718 + }, + { + "epoch": 0.6871845803836426, + "grad_norm": 0.07939445227384567, + "learning_rate": 1.7994857819158416e-05, + "loss": 0.6595229506492615, + "step": 3719 + }, + { + "epoch": 0.6873693570925384, + "grad_norm": 0.086093969643116, + "learning_rate": 1.7993659344343902e-05, + "loss": 0.664191484451294, + "step": 3720 + }, + { + "epoch": 0.6875541338014344, + "grad_norm": 0.0652756467461586, + "learning_rate": 1.799246055140675e-05, + "loss": 0.5476946234703064, + "step": 3721 + }, + { + "epoch": 0.6877389105103302, + "grad_norm": 0.06591864675283432, + "learning_rate": 1.7991261440394674e-05, + "loss": 0.5816909074783325, + "step": 3722 + }, + { + "epoch": 0.687923687219226, + "grad_norm": 0.08605477958917618, + "learning_rate": 1.7990062011355393e-05, + "loss": 0.7622525691986084, + "step": 3723 + }, + { + "epoch": 0.6881084639281219, + "grad_norm": 0.07969419658184052, + "learning_rate": 1.7988862264336644e-05, + "loss": 0.7628303170204163, + "step": 3724 + }, + { + "epoch": 0.6882932406370177, + "grad_norm": 0.06664358079433441, + "learning_rate": 1.798766219938617e-05, + "loss": 0.6266488432884216, + "step": 3725 + }, + { + "epoch": 0.6884780173459135, + "grad_norm": 0.11209172755479813, + "learning_rate": 1.798646181655173e-05, + "loss": 0.9541575908660889, + "step": 3726 + }, + { + "epoch": 0.6886627940548093, + "grad_norm": 0.0730072557926178, + "learning_rate": 1.7985261115881096e-05, + "loss": 0.6682151556015015, + "step": 3727 + }, + { + "epoch": 0.6888475707637053, + "grad_norm": 0.07961619645357132, + "learning_rate": 1.7984060097422054e-05, + "loss": 0.7633053660392761, + "step": 3728 + }, + { + "epoch": 0.6890323474726011, + "grad_norm": 0.06646641343832016, + "learning_rate": 1.7982858761222396e-05, + "loss": 0.5505644679069519, + "step": 3729 + }, + { + "epoch": 0.6892171241814969, + "grad_norm": 0.07400480657815933, + "learning_rate": 1.7981657107329933e-05, + "loss": 0.6487265229225159, + "step": 3730 + }, + { + "epoch": 0.6894019008903928, + "grad_norm": 0.07567115128040314, + "learning_rate": 1.7980455135792495e-05, + "loss": 0.6150903105735779, + "step": 3731 + }, + { + "epoch": 0.6895866775992886, + "grad_norm": 0.08317635953426361, + "learning_rate": 1.7979252846657906e-05, + "loss": 0.7369841933250427, + "step": 3732 + }, + { + "epoch": 0.6897714543081844, + "grad_norm": 0.07989586144685745, + "learning_rate": 1.797805023997402e-05, + "loss": 0.6162236928939819, + "step": 3733 + }, + { + "epoch": 0.6899562310170803, + "grad_norm": 0.08052266389131546, + "learning_rate": 1.797684731578869e-05, + "loss": 0.66728276014328, + "step": 3734 + }, + { + "epoch": 0.6901410077259761, + "grad_norm": 0.06295419484376907, + "learning_rate": 1.7975644074149798e-05, + "loss": 0.6573705077171326, + "step": 3735 + }, + { + "epoch": 0.690325784434872, + "grad_norm": 0.06569328904151917, + "learning_rate": 1.7974440515105223e-05, + "loss": 0.4756871461868286, + "step": 3736 + }, + { + "epoch": 0.6905105611437679, + "grad_norm": 0.08963353931903839, + "learning_rate": 1.7973236638702864e-05, + "loss": 0.764071524143219, + "step": 3737 + }, + { + "epoch": 0.6906953378526637, + "grad_norm": 0.08409163355827332, + "learning_rate": 1.7972032444990633e-05, + "loss": 0.7622500061988831, + "step": 3738 + }, + { + "epoch": 0.6908801145615595, + "grad_norm": 0.07594644278287888, + "learning_rate": 1.797082793401645e-05, + "loss": 0.568515956401825, + "step": 3739 + }, + { + "epoch": 0.6910648912704553, + "grad_norm": 0.08169730007648468, + "learning_rate": 1.7969623105828254e-05, + "loss": 0.6786748766899109, + "step": 3740 + }, + { + "epoch": 0.6912496679793512, + "grad_norm": 0.07881565392017365, + "learning_rate": 1.7968417960473992e-05, + "loss": 0.6388014554977417, + "step": 3741 + }, + { + "epoch": 0.691434444688247, + "grad_norm": 0.06085721775889397, + "learning_rate": 1.7967212498001623e-05, + "loss": 0.5034889578819275, + "step": 3742 + }, + { + "epoch": 0.6916192213971429, + "grad_norm": 0.07765382528305054, + "learning_rate": 1.7966006718459126e-05, + "loss": 0.4946385622024536, + "step": 3743 + }, + { + "epoch": 0.6918039981060388, + "grad_norm": 0.07568485289812088, + "learning_rate": 1.796480062189448e-05, + "loss": 0.525029182434082, + "step": 3744 + }, + { + "epoch": 0.6919887748149346, + "grad_norm": 0.05768405646085739, + "learning_rate": 1.7963594208355694e-05, + "loss": 0.4105791449546814, + "step": 3745 + }, + { + "epoch": 0.6921735515238304, + "grad_norm": 0.08348099887371063, + "learning_rate": 1.7962387477890768e-05, + "loss": 0.876397967338562, + "step": 3746 + }, + { + "epoch": 0.6923583282327263, + "grad_norm": 0.06559912860393524, + "learning_rate": 1.7961180430547737e-05, + "loss": 0.5905696153640747, + "step": 3747 + }, + { + "epoch": 0.6925431049416221, + "grad_norm": 0.07305724173784256, + "learning_rate": 1.7959973066374627e-05, + "loss": 0.6378846168518066, + "step": 3748 + }, + { + "epoch": 0.6927278816505179, + "grad_norm": 0.06761205941438675, + "learning_rate": 1.7958765385419492e-05, + "loss": 0.5716357827186584, + "step": 3749 + }, + { + "epoch": 0.6929126583594138, + "grad_norm": 0.07637447118759155, + "learning_rate": 1.7957557387730397e-05, + "loss": 0.6930877566337585, + "step": 3750 + }, + { + "epoch": 0.6930974350683097, + "grad_norm": 0.08084310591220856, + "learning_rate": 1.7956349073355415e-05, + "loss": 0.6851915121078491, + "step": 3751 + }, + { + "epoch": 0.6932822117772055, + "grad_norm": 0.07316136360168457, + "learning_rate": 1.7955140442342628e-05, + "loss": 0.6507387757301331, + "step": 3752 + }, + { + "epoch": 0.6934669884861013, + "grad_norm": 0.06407404690980911, + "learning_rate": 1.7953931494740143e-05, + "loss": 0.5251136422157288, + "step": 3753 + }, + { + "epoch": 0.6936517651949972, + "grad_norm": 0.06200653687119484, + "learning_rate": 1.7952722230596072e-05, + "loss": 0.5305708050727844, + "step": 3754 + }, + { + "epoch": 0.693836541903893, + "grad_norm": 0.08334333449602127, + "learning_rate": 1.795151264995853e-05, + "loss": 0.6771195530891418, + "step": 3755 + }, + { + "epoch": 0.6940213186127888, + "grad_norm": 0.09637106955051422, + "learning_rate": 1.795030275287567e-05, + "loss": 0.7789075374603271, + "step": 3756 + }, + { + "epoch": 0.6942060953216846, + "grad_norm": 0.07496035844087601, + "learning_rate": 1.7949092539395624e-05, + "loss": 0.7156970500946045, + "step": 3757 + }, + { + "epoch": 0.6943908720305806, + "grad_norm": 0.07671833038330078, + "learning_rate": 1.7947882009566572e-05, + "loss": 0.8442854285240173, + "step": 3758 + }, + { + "epoch": 0.6945756487394764, + "grad_norm": 0.0862608402967453, + "learning_rate": 1.794667116343668e-05, + "loss": 0.624031662940979, + "step": 3759 + }, + { + "epoch": 0.6947604254483722, + "grad_norm": 0.06570187211036682, + "learning_rate": 1.7945460001054136e-05, + "loss": 0.4962296485900879, + "step": 3760 + }, + { + "epoch": 0.6949452021572681, + "grad_norm": 0.06686298549175262, + "learning_rate": 1.7944248522467145e-05, + "loss": 0.5098669528961182, + "step": 3761 + }, + { + "epoch": 0.6951299788661639, + "grad_norm": 0.0717516541481018, + "learning_rate": 1.7943036727723914e-05, + "loss": 0.6537037491798401, + "step": 3762 + }, + { + "epoch": 0.6953147555750597, + "grad_norm": 0.08012343943119049, + "learning_rate": 1.7941824616872673e-05, + "loss": 0.49236705899238586, + "step": 3763 + }, + { + "epoch": 0.6954995322839556, + "grad_norm": 0.06467476487159729, + "learning_rate": 1.794061218996166e-05, + "loss": 0.5756182670593262, + "step": 3764 + }, + { + "epoch": 0.6956843089928515, + "grad_norm": 0.09816405922174454, + "learning_rate": 1.7939399447039124e-05, + "loss": 0.7457982301712036, + "step": 3765 + }, + { + "epoch": 0.6958690857017473, + "grad_norm": 0.08083008974790573, + "learning_rate": 1.7938186388153328e-05, + "loss": 0.7266609072685242, + "step": 3766 + }, + { + "epoch": 0.6960538624106432, + "grad_norm": 0.056356221437454224, + "learning_rate": 1.793697301335255e-05, + "loss": 0.3506295084953308, + "step": 3767 + }, + { + "epoch": 0.696238639119539, + "grad_norm": 0.07170756161212921, + "learning_rate": 1.793575932268508e-05, + "loss": 0.7279176115989685, + "step": 3768 + }, + { + "epoch": 0.6964234158284348, + "grad_norm": 0.0925443023443222, + "learning_rate": 1.793454531619921e-05, + "loss": 0.8251651525497437, + "step": 3769 + }, + { + "epoch": 0.6966081925373306, + "grad_norm": 0.09287851303815842, + "learning_rate": 1.793333099394327e-05, + "loss": 0.7615050673484802, + "step": 3770 + }, + { + "epoch": 0.6967929692462265, + "grad_norm": 0.07109637558460236, + "learning_rate": 1.7932116355965573e-05, + "loss": 0.7140378355979919, + "step": 3771 + }, + { + "epoch": 0.6969777459551224, + "grad_norm": 0.0803835317492485, + "learning_rate": 1.7930901402314457e-05, + "loss": 0.6896501779556274, + "step": 3772 + }, + { + "epoch": 0.6971625226640182, + "grad_norm": 0.08121559768915176, + "learning_rate": 1.792968613303828e-05, + "loss": 0.7581483721733093, + "step": 3773 + }, + { + "epoch": 0.6973472993729141, + "grad_norm": 0.07262784987688065, + "learning_rate": 1.7928470548185406e-05, + "loss": 0.5780869722366333, + "step": 3774 + }, + { + "epoch": 0.6975320760818099, + "grad_norm": 0.06558236479759216, + "learning_rate": 1.792725464780421e-05, + "loss": 0.6090644598007202, + "step": 3775 + }, + { + "epoch": 0.6977168527907057, + "grad_norm": 0.08067404478788376, + "learning_rate": 1.7926038431943077e-05, + "loss": 0.6883424520492554, + "step": 3776 + }, + { + "epoch": 0.6979016294996016, + "grad_norm": 0.0694260448217392, + "learning_rate": 1.7924821900650413e-05, + "loss": 0.5911904573440552, + "step": 3777 + }, + { + "epoch": 0.6980864062084974, + "grad_norm": 0.07945213466882706, + "learning_rate": 1.792360505397463e-05, + "loss": 0.7563620209693909, + "step": 3778 + }, + { + "epoch": 0.6982711829173932, + "grad_norm": 0.0901646539568901, + "learning_rate": 1.7922387891964156e-05, + "loss": 0.8546429872512817, + "step": 3779 + }, + { + "epoch": 0.6984559596262891, + "grad_norm": 0.06837425380945206, + "learning_rate": 1.7921170414667434e-05, + "loss": 0.5491940379142761, + "step": 3780 + }, + { + "epoch": 0.698640736335185, + "grad_norm": 0.05254460498690605, + "learning_rate": 1.7919952622132906e-05, + "loss": 0.41425883769989014, + "step": 3781 + }, + { + "epoch": 0.6988255130440808, + "grad_norm": 0.09623497724533081, + "learning_rate": 1.7918734514409043e-05, + "loss": 0.7828701734542847, + "step": 3782 + }, + { + "epoch": 0.6990102897529766, + "grad_norm": 0.07763856649398804, + "learning_rate": 1.7917516091544322e-05, + "loss": 0.6677383780479431, + "step": 3783 + }, + { + "epoch": 0.6991950664618725, + "grad_norm": 0.08122856169939041, + "learning_rate": 1.791629735358723e-05, + "loss": 0.6720624566078186, + "step": 3784 + }, + { + "epoch": 0.6993798431707683, + "grad_norm": 0.08240723609924316, + "learning_rate": 1.7915078300586274e-05, + "loss": 0.6644288301467896, + "step": 3785 + }, + { + "epoch": 0.6995646198796641, + "grad_norm": 0.0688764750957489, + "learning_rate": 1.791385893258996e-05, + "loss": 0.576237142086029, + "step": 3786 + }, + { + "epoch": 0.6997493965885601, + "grad_norm": 0.07055231928825378, + "learning_rate": 1.7912639249646822e-05, + "loss": 0.5224719643592834, + "step": 3787 + }, + { + "epoch": 0.6999341732974559, + "grad_norm": 0.057666126638650894, + "learning_rate": 1.79114192518054e-05, + "loss": 0.47353747487068176, + "step": 3788 + }, + { + "epoch": 0.7001189500063517, + "grad_norm": 0.08372517675161362, + "learning_rate": 1.791019893911424e-05, + "loss": 0.7721982598304749, + "step": 3789 + }, + { + "epoch": 0.7003037267152475, + "grad_norm": 0.06702134013175964, + "learning_rate": 1.790897831162191e-05, + "loss": 0.7016957998275757, + "step": 3790 + }, + { + "epoch": 0.7004885034241434, + "grad_norm": 0.06706986576318741, + "learning_rate": 1.7907757369376984e-05, + "loss": 0.6234015226364136, + "step": 3791 + }, + { + "epoch": 0.7006732801330392, + "grad_norm": 0.05316302180290222, + "learning_rate": 1.7906536112428063e-05, + "loss": 0.4383869171142578, + "step": 3792 + }, + { + "epoch": 0.700858056841935, + "grad_norm": 0.07911352813243866, + "learning_rate": 1.7905314540823738e-05, + "loss": 0.6943994164466858, + "step": 3793 + }, + { + "epoch": 0.701042833550831, + "grad_norm": 0.08364924788475037, + "learning_rate": 1.7904092654612623e-05, + "loss": 0.8795351982116699, + "step": 3794 + }, + { + "epoch": 0.7012276102597268, + "grad_norm": 0.08151282370090485, + "learning_rate": 1.7902870453843352e-05, + "loss": 0.8242526054382324, + "step": 3795 + }, + { + "epoch": 0.7014123869686226, + "grad_norm": 0.05963482707738876, + "learning_rate": 1.790164793856456e-05, + "loss": 0.4913819134235382, + "step": 3796 + }, + { + "epoch": 0.7015971636775185, + "grad_norm": 0.07553902268409729, + "learning_rate": 1.7900425108824907e-05, + "loss": 0.5874858498573303, + "step": 3797 + }, + { + "epoch": 0.7017819403864143, + "grad_norm": 0.06418804824352264, + "learning_rate": 1.7899201964673046e-05, + "loss": 0.6610527634620667, + "step": 3798 + }, + { + "epoch": 0.7019667170953101, + "grad_norm": 0.05755390599370003, + "learning_rate": 1.7897978506157663e-05, + "loss": 0.4823980927467346, + "step": 3799 + }, + { + "epoch": 0.7021514938042059, + "grad_norm": 0.05311084911227226, + "learning_rate": 1.7896754733327443e-05, + "loss": 0.44125896692276, + "step": 3800 + }, + { + "epoch": 0.7023362705131018, + "grad_norm": 0.07113431394100189, + "learning_rate": 1.7895530646231092e-05, + "loss": 0.733215868473053, + "step": 3801 + }, + { + "epoch": 0.7025210472219977, + "grad_norm": 0.0652877688407898, + "learning_rate": 1.7894306244917322e-05, + "loss": 0.6344561576843262, + "step": 3802 + }, + { + "epoch": 0.7027058239308935, + "grad_norm": 0.051007334142923355, + "learning_rate": 1.7893081529434862e-05, + "loss": 0.4225222170352936, + "step": 3803 + }, + { + "epoch": 0.7028906006397894, + "grad_norm": 0.07267943769693375, + "learning_rate": 1.7891856499832455e-05, + "loss": 0.6625338196754456, + "step": 3804 + }, + { + "epoch": 0.7030753773486852, + "grad_norm": 0.07777641713619232, + "learning_rate": 1.789063115615884e-05, + "loss": 0.5510960221290588, + "step": 3805 + }, + { + "epoch": 0.703260154057581, + "grad_norm": 0.06831711530685425, + "learning_rate": 1.78894054984628e-05, + "loss": 0.6913149356842041, + "step": 3806 + }, + { + "epoch": 0.7034449307664769, + "grad_norm": 0.05947549268603325, + "learning_rate": 1.7888179526793102e-05, + "loss": 0.5916891694068909, + "step": 3807 + }, + { + "epoch": 0.7036297074753727, + "grad_norm": 0.06820245832204819, + "learning_rate": 1.788695324119854e-05, + "loss": 0.6104221940040588, + "step": 3808 + }, + { + "epoch": 0.7038144841842686, + "grad_norm": 0.08294255286455154, + "learning_rate": 1.788572664172791e-05, + "loss": 0.6323229670524597, + "step": 3809 + }, + { + "epoch": 0.7039992608931644, + "grad_norm": 0.0709289014339447, + "learning_rate": 1.7884499728430034e-05, + "loss": 0.6614767909049988, + "step": 3810 + }, + { + "epoch": 0.7041840376020603, + "grad_norm": 0.07165282219648361, + "learning_rate": 1.788327250135374e-05, + "loss": 0.5701707601547241, + "step": 3811 + }, + { + "epoch": 0.7043688143109561, + "grad_norm": 0.07202035188674927, + "learning_rate": 1.7882044960547854e-05, + "loss": 0.5524270534515381, + "step": 3812 + }, + { + "epoch": 0.7045535910198519, + "grad_norm": 0.06056154891848564, + "learning_rate": 1.7880817106061244e-05, + "loss": 0.6596307158470154, + "step": 3813 + }, + { + "epoch": 0.7047383677287478, + "grad_norm": 0.06145206466317177, + "learning_rate": 1.7879588937942765e-05, + "loss": 0.549569845199585, + "step": 3814 + }, + { + "epoch": 0.7049231444376436, + "grad_norm": 0.07081493735313416, + "learning_rate": 1.7878360456241302e-05, + "loss": 0.5861347913742065, + "step": 3815 + }, + { + "epoch": 0.7051079211465395, + "grad_norm": 0.06296757608652115, + "learning_rate": 1.7877131661005745e-05, + "loss": 0.5720245838165283, + "step": 3816 + }, + { + "epoch": 0.7052926978554354, + "grad_norm": 0.07370515912771225, + "learning_rate": 1.787590255228499e-05, + "loss": 0.6370769143104553, + "step": 3817 + }, + { + "epoch": 0.7054774745643312, + "grad_norm": 0.08321458101272583, + "learning_rate": 1.787467313012795e-05, + "loss": 0.7091321349143982, + "step": 3818 + }, + { + "epoch": 0.705662251273227, + "grad_norm": 0.06314190477132797, + "learning_rate": 1.7873443394583558e-05, + "loss": 0.6903059482574463, + "step": 3819 + }, + { + "epoch": 0.7058470279821228, + "grad_norm": 0.06438859552145004, + "learning_rate": 1.787221334570075e-05, + "loss": 0.47248029708862305, + "step": 3820 + }, + { + "epoch": 0.7060318046910187, + "grad_norm": 0.07569018006324768, + "learning_rate": 1.787098298352848e-05, + "loss": 0.684784471988678, + "step": 3821 + }, + { + "epoch": 0.7062165813999145, + "grad_norm": 0.06873763352632523, + "learning_rate": 1.7869752308115717e-05, + "loss": 0.5052890777587891, + "step": 3822 + }, + { + "epoch": 0.7064013581088104, + "grad_norm": 0.07325165718793869, + "learning_rate": 1.786852131951143e-05, + "loss": 0.6105488538742065, + "step": 3823 + }, + { + "epoch": 0.7065861348177063, + "grad_norm": 0.07279790937900543, + "learning_rate": 1.7867290017764612e-05, + "loss": 0.6297613978385925, + "step": 3824 + }, + { + "epoch": 0.7067709115266021, + "grad_norm": 0.06293085962533951, + "learning_rate": 1.7866058402924266e-05, + "loss": 0.4462578594684601, + "step": 3825 + }, + { + "epoch": 0.7069556882354979, + "grad_norm": 0.09141212701797485, + "learning_rate": 1.7864826475039404e-05, + "loss": 0.7812224626541138, + "step": 3826 + }, + { + "epoch": 0.7071404649443938, + "grad_norm": 0.0632830262184143, + "learning_rate": 1.7863594234159056e-05, + "loss": 0.5227277278900146, + "step": 3827 + }, + { + "epoch": 0.7073252416532896, + "grad_norm": 0.07047037035226822, + "learning_rate": 1.786236168033226e-05, + "loss": 0.590502142906189, + "step": 3828 + }, + { + "epoch": 0.7075100183621854, + "grad_norm": 0.07778255641460419, + "learning_rate": 1.7861128813608066e-05, + "loss": 0.7174902558326721, + "step": 3829 + }, + { + "epoch": 0.7076947950710812, + "grad_norm": 0.0773974359035492, + "learning_rate": 1.7859895634035536e-05, + "loss": 0.5292126536369324, + "step": 3830 + }, + { + "epoch": 0.7078795717799772, + "grad_norm": 0.06591079384088516, + "learning_rate": 1.7858662141663755e-05, + "loss": 0.651407778263092, + "step": 3831 + }, + { + "epoch": 0.708064348488873, + "grad_norm": 0.06398523598909378, + "learning_rate": 1.7857428336541805e-05, + "loss": 0.5415223836898804, + "step": 3832 + }, + { + "epoch": 0.7082491251977688, + "grad_norm": 0.0722912922501564, + "learning_rate": 1.7856194218718788e-05, + "loss": 0.6524251103401184, + "step": 3833 + }, + { + "epoch": 0.7084339019066647, + "grad_norm": 0.06724940985441208, + "learning_rate": 1.7854959788243825e-05, + "loss": 0.5232310891151428, + "step": 3834 + }, + { + "epoch": 0.7086186786155605, + "grad_norm": 0.08662715554237366, + "learning_rate": 1.7853725045166036e-05, + "loss": 0.7855296730995178, + "step": 3835 + }, + { + "epoch": 0.7088034553244563, + "grad_norm": 0.06664146482944489, + "learning_rate": 1.785248998953456e-05, + "loss": 0.47468939423561096, + "step": 3836 + }, + { + "epoch": 0.7089882320333522, + "grad_norm": 0.08050867915153503, + "learning_rate": 1.785125462139855e-05, + "loss": 0.8734326362609863, + "step": 3837 + }, + { + "epoch": 0.7091730087422481, + "grad_norm": 0.07380545884370804, + "learning_rate": 1.785001894080717e-05, + "loss": 0.6531058549880981, + "step": 3838 + }, + { + "epoch": 0.7093577854511439, + "grad_norm": 0.07150714844465256, + "learning_rate": 1.7848782947809595e-05, + "loss": 0.6065245270729065, + "step": 3839 + }, + { + "epoch": 0.7095425621600397, + "grad_norm": 0.06883050501346588, + "learning_rate": 1.7847546642455016e-05, + "loss": 0.6219596266746521, + "step": 3840 + }, + { + "epoch": 0.7097273388689356, + "grad_norm": 0.0754944309592247, + "learning_rate": 1.7846310024792634e-05, + "loss": 0.6314144134521484, + "step": 3841 + }, + { + "epoch": 0.7099121155778314, + "grad_norm": 0.07237895578145981, + "learning_rate": 1.7845073094871653e-05, + "loss": 0.6415191888809204, + "step": 3842 + }, + { + "epoch": 0.7100968922867272, + "grad_norm": 0.07527688145637512, + "learning_rate": 1.7843835852741315e-05, + "loss": 0.6790621280670166, + "step": 3843 + }, + { + "epoch": 0.7102816689956231, + "grad_norm": 0.06775790452957153, + "learning_rate": 1.7842598298450845e-05, + "loss": 0.6450338363647461, + "step": 3844 + }, + { + "epoch": 0.710466445704519, + "grad_norm": 0.0949234738945961, + "learning_rate": 1.7841360432049503e-05, + "loss": 0.7092158198356628, + "step": 3845 + }, + { + "epoch": 0.7106512224134148, + "grad_norm": 0.06313542276620865, + "learning_rate": 1.7840122253586546e-05, + "loss": 0.502371609210968, + "step": 3846 + }, + { + "epoch": 0.7108359991223107, + "grad_norm": 0.07874614000320435, + "learning_rate": 1.7838883763111254e-05, + "loss": 0.6510919332504272, + "step": 3847 + }, + { + "epoch": 0.7110207758312065, + "grad_norm": 0.07000324130058289, + "learning_rate": 1.783764496067291e-05, + "loss": 0.6148480176925659, + "step": 3848 + }, + { + "epoch": 0.7112055525401023, + "grad_norm": 0.07263125479221344, + "learning_rate": 1.783640584632082e-05, + "loss": 0.6642104983329773, + "step": 3849 + }, + { + "epoch": 0.7113903292489981, + "grad_norm": 0.08290033042430878, + "learning_rate": 1.783516642010429e-05, + "loss": 0.6861293315887451, + "step": 3850 + }, + { + "epoch": 0.711575105957894, + "grad_norm": 0.0689179077744484, + "learning_rate": 1.7833926682072657e-05, + "loss": 0.5201602578163147, + "step": 3851 + }, + { + "epoch": 0.7117598826667898, + "grad_norm": 0.08319676667451859, + "learning_rate": 1.7832686632275246e-05, + "loss": 0.682870090007782, + "step": 3852 + }, + { + "epoch": 0.7119446593756857, + "grad_norm": 0.06869909167289734, + "learning_rate": 1.7831446270761416e-05, + "loss": 0.6011759042739868, + "step": 3853 + }, + { + "epoch": 0.7121294360845816, + "grad_norm": 0.0635833740234375, + "learning_rate": 1.7830205597580522e-05, + "loss": 0.5728490352630615, + "step": 3854 + }, + { + "epoch": 0.7123142127934774, + "grad_norm": 0.07619897276163101, + "learning_rate": 1.7828964612781943e-05, + "loss": 0.5542641282081604, + "step": 3855 + }, + { + "epoch": 0.7124989895023732, + "grad_norm": 0.06108187139034271, + "learning_rate": 1.7827723316415068e-05, + "loss": 0.4776553511619568, + "step": 3856 + }, + { + "epoch": 0.712683766211269, + "grad_norm": 0.09078365564346313, + "learning_rate": 1.7826481708529292e-05, + "loss": 0.8141420483589172, + "step": 3857 + }, + { + "epoch": 0.7128685429201649, + "grad_norm": 0.06589607894420624, + "learning_rate": 1.782523978917403e-05, + "loss": 0.5259532332420349, + "step": 3858 + }, + { + "epoch": 0.7130533196290607, + "grad_norm": 0.07421430945396423, + "learning_rate": 1.782399755839871e-05, + "loss": 0.6141277551651001, + "step": 3859 + }, + { + "epoch": 0.7132380963379567, + "grad_norm": 0.07042445987462997, + "learning_rate": 1.7822755016252765e-05, + "loss": 0.637400209903717, + "step": 3860 + }, + { + "epoch": 0.7134228730468525, + "grad_norm": 0.07997411489486694, + "learning_rate": 1.7821512162785643e-05, + "loss": 0.6571568846702576, + "step": 3861 + }, + { + "epoch": 0.7136076497557483, + "grad_norm": 0.08841746300458908, + "learning_rate": 1.7820268998046808e-05, + "loss": 0.7182778716087341, + "step": 3862 + }, + { + "epoch": 0.7137924264646441, + "grad_norm": 0.06398257613182068, + "learning_rate": 1.7819025522085733e-05, + "loss": 0.5413829684257507, + "step": 3863 + }, + { + "epoch": 0.71397720317354, + "grad_norm": 0.07014451175928116, + "learning_rate": 1.7817781734951903e-05, + "loss": 0.5261696577072144, + "step": 3864 + }, + { + "epoch": 0.7141619798824358, + "grad_norm": 0.0701596662402153, + "learning_rate": 1.781653763669482e-05, + "loss": 0.675118625164032, + "step": 3865 + }, + { + "epoch": 0.7143467565913316, + "grad_norm": 0.07133375108242035, + "learning_rate": 1.7815293227363995e-05, + "loss": 0.5645657777786255, + "step": 3866 + }, + { + "epoch": 0.7145315333002276, + "grad_norm": 0.06540185958147049, + "learning_rate": 1.781404850700895e-05, + "loss": 0.6554360389709473, + "step": 3867 + }, + { + "epoch": 0.7147163100091234, + "grad_norm": 0.09399189800024033, + "learning_rate": 1.7812803475679224e-05, + "loss": 0.8199816942214966, + "step": 3868 + }, + { + "epoch": 0.7149010867180192, + "grad_norm": 0.06917788833379745, + "learning_rate": 1.7811558133424358e-05, + "loss": 0.5742474794387817, + "step": 3869 + }, + { + "epoch": 0.715085863426915, + "grad_norm": 0.0679846853017807, + "learning_rate": 1.781031248029392e-05, + "loss": 0.5272837281227112, + "step": 3870 + }, + { + "epoch": 0.7152706401358109, + "grad_norm": 0.09259763360023499, + "learning_rate": 1.780906651633748e-05, + "loss": 0.7504703402519226, + "step": 3871 + }, + { + "epoch": 0.7154554168447067, + "grad_norm": 0.06919008493423462, + "learning_rate": 1.7807820241604626e-05, + "loss": 0.6918947100639343, + "step": 3872 + }, + { + "epoch": 0.7156401935536025, + "grad_norm": 0.053969644010066986, + "learning_rate": 1.780657365614495e-05, + "loss": 0.41444283723831177, + "step": 3873 + }, + { + "epoch": 0.7158249702624984, + "grad_norm": 0.06332223862409592, + "learning_rate": 1.780532676000807e-05, + "loss": 0.5879743695259094, + "step": 3874 + }, + { + "epoch": 0.7160097469713943, + "grad_norm": 0.0728738009929657, + "learning_rate": 1.7804079553243602e-05, + "loss": 0.6047773361206055, + "step": 3875 + }, + { + "epoch": 0.7161945236802901, + "grad_norm": 0.06611176580190659, + "learning_rate": 1.7802832035901186e-05, + "loss": 0.5424304604530334, + "step": 3876 + }, + { + "epoch": 0.716379300389186, + "grad_norm": 0.0637843981385231, + "learning_rate": 1.7801584208030464e-05, + "loss": 0.5460506677627563, + "step": 3877 + }, + { + "epoch": 0.7165640770980818, + "grad_norm": 0.06435133516788483, + "learning_rate": 1.78003360696811e-05, + "loss": 0.5191080570220947, + "step": 3878 + }, + { + "epoch": 0.7167488538069776, + "grad_norm": 0.0822996124625206, + "learning_rate": 1.7799087620902765e-05, + "loss": 0.6032326221466064, + "step": 3879 + }, + { + "epoch": 0.7169336305158734, + "grad_norm": 0.07257374376058578, + "learning_rate": 1.779783886174514e-05, + "loss": 0.7256016731262207, + "step": 3880 + }, + { + "epoch": 0.7171184072247693, + "grad_norm": 0.070966936647892, + "learning_rate": 1.7796589792257927e-05, + "loss": 0.5935529470443726, + "step": 3881 + }, + { + "epoch": 0.7173031839336652, + "grad_norm": 0.07597895711660385, + "learning_rate": 1.7795340412490834e-05, + "loss": 0.6139712333679199, + "step": 3882 + }, + { + "epoch": 0.717487960642561, + "grad_norm": 0.06419572979211807, + "learning_rate": 1.779409072249358e-05, + "loss": 0.5486287474632263, + "step": 3883 + }, + { + "epoch": 0.7176727373514569, + "grad_norm": 0.08788354694843292, + "learning_rate": 1.7792840722315897e-05, + "loss": 0.8828362226486206, + "step": 3884 + }, + { + "epoch": 0.7178575140603527, + "grad_norm": 0.06075332686305046, + "learning_rate": 1.779159041200754e-05, + "loss": 0.43247318267822266, + "step": 3885 + }, + { + "epoch": 0.7180422907692485, + "grad_norm": 0.08744195848703384, + "learning_rate": 1.7790339791618258e-05, + "loss": 0.5396788716316223, + "step": 3886 + }, + { + "epoch": 0.7182270674781444, + "grad_norm": 0.07079479843378067, + "learning_rate": 1.7789088861197824e-05, + "loss": 0.5855565667152405, + "step": 3887 + }, + { + "epoch": 0.7184118441870402, + "grad_norm": 0.09693542867898941, + "learning_rate": 1.778783762079602e-05, + "loss": 0.7175348401069641, + "step": 3888 + }, + { + "epoch": 0.7185966208959361, + "grad_norm": 0.07204362750053406, + "learning_rate": 1.778658607046265e-05, + "loss": 0.6128134727478027, + "step": 3889 + }, + { + "epoch": 0.718781397604832, + "grad_norm": 0.06511104851961136, + "learning_rate": 1.778533421024751e-05, + "loss": 0.6019546389579773, + "step": 3890 + }, + { + "epoch": 0.7189661743137278, + "grad_norm": 0.07765532284975052, + "learning_rate": 1.778408204020043e-05, + "loss": 0.6474810838699341, + "step": 3891 + }, + { + "epoch": 0.7191509510226236, + "grad_norm": 0.09051033109426498, + "learning_rate": 1.778282956037124e-05, + "loss": 0.7019118070602417, + "step": 3892 + }, + { + "epoch": 0.7193357277315194, + "grad_norm": 0.07164207100868225, + "learning_rate": 1.7781576770809774e-05, + "loss": 0.5620560646057129, + "step": 3893 + }, + { + "epoch": 0.7195205044404153, + "grad_norm": 0.0695822462439537, + "learning_rate": 1.7780323671565904e-05, + "loss": 0.6145302653312683, + "step": 3894 + }, + { + "epoch": 0.7197052811493111, + "grad_norm": 0.10266381502151489, + "learning_rate": 1.7779070262689493e-05, + "loss": 0.8040466904640198, + "step": 3895 + }, + { + "epoch": 0.7198900578582069, + "grad_norm": 0.06830579787492752, + "learning_rate": 1.777781654423042e-05, + "loss": 0.5737794637680054, + "step": 3896 + }, + { + "epoch": 0.7200748345671029, + "grad_norm": 0.08501392602920532, + "learning_rate": 1.7776562516238586e-05, + "loss": 0.6017941832542419, + "step": 3897 + }, + { + "epoch": 0.7202596112759987, + "grad_norm": 0.08942998200654984, + "learning_rate": 1.7775308178763892e-05, + "loss": 0.8681851029396057, + "step": 3898 + }, + { + "epoch": 0.7204443879848945, + "grad_norm": 0.07187824696302414, + "learning_rate": 1.7774053531856258e-05, + "loss": 0.6001152396202087, + "step": 3899 + }, + { + "epoch": 0.7206291646937903, + "grad_norm": 0.07265845686197281, + "learning_rate": 1.7772798575565618e-05, + "loss": 0.6501057147979736, + "step": 3900 + }, + { + "epoch": 0.7208139414026862, + "grad_norm": 0.06733600050210953, + "learning_rate": 1.777154330994191e-05, + "loss": 0.5438884496688843, + "step": 3901 + }, + { + "epoch": 0.720998718111582, + "grad_norm": 0.07749243080615997, + "learning_rate": 1.7770287735035093e-05, + "loss": 0.6194239258766174, + "step": 3902 + }, + { + "epoch": 0.7211834948204778, + "grad_norm": 0.07621680945158005, + "learning_rate": 1.7769031850895133e-05, + "loss": 0.5735338926315308, + "step": 3903 + }, + { + "epoch": 0.7213682715293738, + "grad_norm": 0.06601224094629288, + "learning_rate": 1.7767775657572014e-05, + "loss": 0.5219985842704773, + "step": 3904 + }, + { + "epoch": 0.7215530482382696, + "grad_norm": 0.08114080131053925, + "learning_rate": 1.7766519155115726e-05, + "loss": 0.8340771794319153, + "step": 3905 + }, + { + "epoch": 0.7217378249471654, + "grad_norm": 0.07835365831851959, + "learning_rate": 1.776526234357627e-05, + "loss": 0.5177896618843079, + "step": 3906 + }, + { + "epoch": 0.7219226016560613, + "grad_norm": 0.06266766041517258, + "learning_rate": 1.7764005223003668e-05, + "loss": 0.4608587324619293, + "step": 3907 + }, + { + "epoch": 0.7221073783649571, + "grad_norm": 0.06603166460990906, + "learning_rate": 1.7762747793447953e-05, + "loss": 0.6084400415420532, + "step": 3908 + }, + { + "epoch": 0.7222921550738529, + "grad_norm": 0.07372553646564484, + "learning_rate": 1.7761490054959162e-05, + "loss": 0.6600576043128967, + "step": 3909 + }, + { + "epoch": 0.7224769317827487, + "grad_norm": 0.058667220175266266, + "learning_rate": 1.7760232007587346e-05, + "loss": 0.43612024188041687, + "step": 3910 + }, + { + "epoch": 0.7226617084916447, + "grad_norm": 0.07343069463968277, + "learning_rate": 1.7758973651382573e-05, + "loss": 0.6958191394805908, + "step": 3911 + }, + { + "epoch": 0.7228464852005405, + "grad_norm": 0.0658382996916771, + "learning_rate": 1.775771498639493e-05, + "loss": 0.6587331295013428, + "step": 3912 + }, + { + "epoch": 0.7230312619094363, + "grad_norm": 0.05375619977712631, + "learning_rate": 1.7756456012674494e-05, + "loss": 0.44945162534713745, + "step": 3913 + }, + { + "epoch": 0.7232160386183322, + "grad_norm": 0.07939761877059937, + "learning_rate": 1.775519673027138e-05, + "loss": 0.6397998929023743, + "step": 3914 + }, + { + "epoch": 0.723400815327228, + "grad_norm": 0.06388827413320541, + "learning_rate": 1.77539371392357e-05, + "loss": 0.4501069486141205, + "step": 3915 + }, + { + "epoch": 0.7235855920361238, + "grad_norm": 0.08833901584148407, + "learning_rate": 1.7752677239617578e-05, + "loss": 0.7420762777328491, + "step": 3916 + }, + { + "epoch": 0.7237703687450197, + "grad_norm": 0.06442126631736755, + "learning_rate": 1.7751417031467156e-05, + "loss": 0.5256044268608093, + "step": 3917 + }, + { + "epoch": 0.7239551454539155, + "grad_norm": 0.06660003215074539, + "learning_rate": 1.775015651483459e-05, + "loss": 0.5712388157844543, + "step": 3918 + }, + { + "epoch": 0.7241399221628114, + "grad_norm": 0.07039026916027069, + "learning_rate": 1.774889568977004e-05, + "loss": 0.7009496092796326, + "step": 3919 + }, + { + "epoch": 0.7243246988717073, + "grad_norm": 0.06107284873723984, + "learning_rate": 1.7747634556323687e-05, + "loss": 0.6473613977432251, + "step": 3920 + }, + { + "epoch": 0.7245094755806031, + "grad_norm": 0.11032213270664215, + "learning_rate": 1.7746373114545715e-05, + "loss": 0.5946314930915833, + "step": 3921 + }, + { + "epoch": 0.7246942522894989, + "grad_norm": 0.07178107649087906, + "learning_rate": 1.7745111364486328e-05, + "loss": 0.6469153165817261, + "step": 3922 + }, + { + "epoch": 0.7248790289983947, + "grad_norm": 0.06041441857814789, + "learning_rate": 1.7743849306195744e-05, + "loss": 0.4419972598552704, + "step": 3923 + }, + { + "epoch": 0.7250638057072906, + "grad_norm": 0.08421284705400467, + "learning_rate": 1.7742586939724183e-05, + "loss": 0.6808865070343018, + "step": 3924 + }, + { + "epoch": 0.7252485824161864, + "grad_norm": 0.07748471945524216, + "learning_rate": 1.7741324265121883e-05, + "loss": 0.6303725242614746, + "step": 3925 + }, + { + "epoch": 0.7254333591250823, + "grad_norm": 0.09201602637767792, + "learning_rate": 1.7740061282439097e-05, + "loss": 0.7040421366691589, + "step": 3926 + }, + { + "epoch": 0.7256181358339782, + "grad_norm": 0.07980838418006897, + "learning_rate": 1.7738797991726092e-05, + "loss": 0.6566523313522339, + "step": 3927 + }, + { + "epoch": 0.725802912542874, + "grad_norm": 0.06741447001695633, + "learning_rate": 1.7737534393033134e-05, + "loss": 0.6093820929527283, + "step": 3928 + }, + { + "epoch": 0.7259876892517698, + "grad_norm": 0.05772562697529793, + "learning_rate": 1.773627048641052e-05, + "loss": 0.5092942118644714, + "step": 3929 + }, + { + "epoch": 0.7261724659606656, + "grad_norm": 0.08732646703720093, + "learning_rate": 1.773500627190854e-05, + "loss": 0.721347451210022, + "step": 3930 + }, + { + "epoch": 0.7263572426695615, + "grad_norm": 0.07607407867908478, + "learning_rate": 1.7733741749577512e-05, + "loss": 0.6305980682373047, + "step": 3931 + }, + { + "epoch": 0.7265420193784573, + "grad_norm": 0.08443769067525864, + "learning_rate": 1.7732476919467757e-05, + "loss": 0.8407239317893982, + "step": 3932 + }, + { + "epoch": 0.7267267960873532, + "grad_norm": 0.07998590916395187, + "learning_rate": 1.773121178162961e-05, + "loss": 0.5579816102981567, + "step": 3933 + }, + { + "epoch": 0.7269115727962491, + "grad_norm": 0.06412210315465927, + "learning_rate": 1.7729946336113428e-05, + "loss": 0.5187691450119019, + "step": 3934 + }, + { + "epoch": 0.7270963495051449, + "grad_norm": 0.06768874824047089, + "learning_rate": 1.7728680582969562e-05, + "loss": 0.5259274840354919, + "step": 3935 + }, + { + "epoch": 0.7272811262140407, + "grad_norm": 0.07999744266271591, + "learning_rate": 1.7727414522248386e-05, + "loss": 0.61149001121521, + "step": 3936 + }, + { + "epoch": 0.7274659029229366, + "grad_norm": 0.05589217320084572, + "learning_rate": 1.7726148154000294e-05, + "loss": 0.45100581645965576, + "step": 3937 + }, + { + "epoch": 0.7276506796318324, + "grad_norm": 0.069762222468853, + "learning_rate": 1.772488147827567e-05, + "loss": 0.48234400153160095, + "step": 3938 + }, + { + "epoch": 0.7278354563407282, + "grad_norm": 0.07306203246116638, + "learning_rate": 1.772361449512494e-05, + "loss": 0.4789789319038391, + "step": 3939 + }, + { + "epoch": 0.728020233049624, + "grad_norm": 0.07036879658699036, + "learning_rate": 1.772234720459851e-05, + "loss": 0.7625179886817932, + "step": 3940 + }, + { + "epoch": 0.72820500975852, + "grad_norm": 0.08836708217859268, + "learning_rate": 1.772107960674683e-05, + "loss": 0.7655839323997498, + "step": 3941 + }, + { + "epoch": 0.7283897864674158, + "grad_norm": 0.05918063595890999, + "learning_rate": 1.7719811701620327e-05, + "loss": 0.49179041385650635, + "step": 3942 + }, + { + "epoch": 0.7285745631763116, + "grad_norm": 0.08613593131303787, + "learning_rate": 1.7718543489269477e-05, + "loss": 0.6651601791381836, + "step": 3943 + }, + { + "epoch": 0.7287593398852075, + "grad_norm": 0.06794585287570953, + "learning_rate": 1.771727496974474e-05, + "loss": 0.6362688541412354, + "step": 3944 + }, + { + "epoch": 0.7289441165941033, + "grad_norm": 0.08600226044654846, + "learning_rate": 1.771600614309661e-05, + "loss": 0.763226330280304, + "step": 3945 + }, + { + "epoch": 0.7291288933029991, + "grad_norm": 0.08081506937742233, + "learning_rate": 1.771473700937557e-05, + "loss": 0.6456172466278076, + "step": 3946 + }, + { + "epoch": 0.729313670011895, + "grad_norm": 0.05206015706062317, + "learning_rate": 1.7713467568632136e-05, + "loss": 0.47310560941696167, + "step": 3947 + }, + { + "epoch": 0.7294984467207909, + "grad_norm": 0.07662763446569443, + "learning_rate": 1.7712197820916826e-05, + "loss": 0.7199443578720093, + "step": 3948 + }, + { + "epoch": 0.7296832234296867, + "grad_norm": 0.08438249677419662, + "learning_rate": 1.7710927766280167e-05, + "loss": 0.5828445553779602, + "step": 3949 + }, + { + "epoch": 0.7298680001385826, + "grad_norm": 0.05719861388206482, + "learning_rate": 1.7709657404772712e-05, + "loss": 0.48395925760269165, + "step": 3950 + }, + { + "epoch": 0.7300527768474784, + "grad_norm": 0.0705437883734703, + "learning_rate": 1.770838673644501e-05, + "loss": 0.5369189977645874, + "step": 3951 + }, + { + "epoch": 0.7302375535563742, + "grad_norm": 0.07795518636703491, + "learning_rate": 1.7707115761347633e-05, + "loss": 0.6306718587875366, + "step": 3952 + }, + { + "epoch": 0.73042233026527, + "grad_norm": 0.06175463646650314, + "learning_rate": 1.7705844479531162e-05, + "loss": 0.5458230972290039, + "step": 3953 + }, + { + "epoch": 0.7306071069741659, + "grad_norm": 0.0780644491314888, + "learning_rate": 1.770457289104619e-05, + "loss": 0.6508029699325562, + "step": 3954 + }, + { + "epoch": 0.7307918836830618, + "grad_norm": 0.07498148083686829, + "learning_rate": 1.770330099594332e-05, + "loss": 0.5971976518630981, + "step": 3955 + }, + { + "epoch": 0.7309766603919576, + "grad_norm": 0.06846655160188675, + "learning_rate": 1.7702028794273167e-05, + "loss": 0.6009979248046875, + "step": 3956 + }, + { + "epoch": 0.7311614371008535, + "grad_norm": 0.05756732448935509, + "learning_rate": 1.7700756286086372e-05, + "loss": 0.5455886125564575, + "step": 3957 + }, + { + "epoch": 0.7313462138097493, + "grad_norm": 0.08418041467666626, + "learning_rate": 1.7699483471433564e-05, + "loss": 0.73096764087677, + "step": 3958 + }, + { + "epoch": 0.7315309905186451, + "grad_norm": 0.07023555040359497, + "learning_rate": 1.7698210350365404e-05, + "loss": 0.5592522025108337, + "step": 3959 + }, + { + "epoch": 0.731715767227541, + "grad_norm": 0.0518750362098217, + "learning_rate": 1.7696936922932556e-05, + "loss": 0.4030129611492157, + "step": 3960 + }, + { + "epoch": 0.7319005439364368, + "grad_norm": 0.0892823114991188, + "learning_rate": 1.7695663189185703e-05, + "loss": 0.691656231880188, + "step": 3961 + }, + { + "epoch": 0.7320853206453326, + "grad_norm": 0.0701933354139328, + "learning_rate": 1.7694389149175527e-05, + "loss": 0.5611509680747986, + "step": 3962 + }, + { + "epoch": 0.7322700973542285, + "grad_norm": 0.07196095585823059, + "learning_rate": 1.7693114802952736e-05, + "loss": 0.5962368249893188, + "step": 3963 + }, + { + "epoch": 0.7324548740631244, + "grad_norm": 0.06125257909297943, + "learning_rate": 1.7691840150568046e-05, + "loss": 0.5681639909744263, + "step": 3964 + }, + { + "epoch": 0.7326396507720202, + "grad_norm": 0.06818356364965439, + "learning_rate": 1.7690565192072182e-05, + "loss": 0.5990875959396362, + "step": 3965 + }, + { + "epoch": 0.732824427480916, + "grad_norm": 0.06957458704710007, + "learning_rate": 1.7689289927515883e-05, + "loss": 0.6563817858695984, + "step": 3966 + }, + { + "epoch": 0.7330092041898119, + "grad_norm": 0.07935319095849991, + "learning_rate": 1.76880143569499e-05, + "loss": 0.7299106121063232, + "step": 3967 + }, + { + "epoch": 0.7331939808987077, + "grad_norm": 0.09141634404659271, + "learning_rate": 1.7686738480425004e-05, + "loss": 0.8262278437614441, + "step": 3968 + }, + { + "epoch": 0.7333787576076035, + "grad_norm": 0.06430824100971222, + "learning_rate": 1.7685462297991966e-05, + "loss": 0.4728889763355255, + "step": 3969 + }, + { + "epoch": 0.7335635343164995, + "grad_norm": 0.06557998061180115, + "learning_rate": 1.7684185809701567e-05, + "loss": 0.563641369342804, + "step": 3970 + }, + { + "epoch": 0.7337483110253953, + "grad_norm": 0.0910504013299942, + "learning_rate": 1.7682909015604615e-05, + "loss": 0.9313309192657471, + "step": 3971 + }, + { + "epoch": 0.7339330877342911, + "grad_norm": 0.08911722898483276, + "learning_rate": 1.7681631915751922e-05, + "loss": 0.6804410815238953, + "step": 3972 + }, + { + "epoch": 0.7341178644431869, + "grad_norm": 0.07896842062473297, + "learning_rate": 1.7680354510194312e-05, + "loss": 0.6099547743797302, + "step": 3973 + }, + { + "epoch": 0.7343026411520828, + "grad_norm": 0.06189596280455589, + "learning_rate": 1.767907679898262e-05, + "loss": 0.47814786434173584, + "step": 3974 + }, + { + "epoch": 0.7344874178609786, + "grad_norm": 0.06864321231842041, + "learning_rate": 1.76777987821677e-05, + "loss": 0.6335147619247437, + "step": 3975 + }, + { + "epoch": 0.7346721945698744, + "grad_norm": 0.0641375333070755, + "learning_rate": 1.7676520459800404e-05, + "loss": 0.5671116709709167, + "step": 3976 + }, + { + "epoch": 0.7348569712787704, + "grad_norm": 0.07548077404499054, + "learning_rate": 1.7675241831931612e-05, + "loss": 0.6503629684448242, + "step": 3977 + }, + { + "epoch": 0.7350417479876662, + "grad_norm": 0.07392218708992004, + "learning_rate": 1.7673962898612212e-05, + "loss": 0.5626558065414429, + "step": 3978 + }, + { + "epoch": 0.735226524696562, + "grad_norm": 0.08736756443977356, + "learning_rate": 1.7672683659893094e-05, + "loss": 0.6278281807899475, + "step": 3979 + }, + { + "epoch": 0.7354113014054579, + "grad_norm": 0.064041368663311, + "learning_rate": 1.767140411582517e-05, + "loss": 0.5311237573623657, + "step": 3980 + }, + { + "epoch": 0.7355960781143537, + "grad_norm": 0.07592851668596268, + "learning_rate": 1.767012426645936e-05, + "loss": 0.6173804402351379, + "step": 3981 + }, + { + "epoch": 0.7357808548232495, + "grad_norm": 0.07629850506782532, + "learning_rate": 1.7668844111846607e-05, + "loss": 0.603466808795929, + "step": 3982 + }, + { + "epoch": 0.7359656315321453, + "grad_norm": 0.07655462622642517, + "learning_rate": 1.766756365203785e-05, + "loss": 0.6817041039466858, + "step": 3983 + }, + { + "epoch": 0.7361504082410412, + "grad_norm": 0.07560434937477112, + "learning_rate": 1.7666282887084048e-05, + "loss": 0.6514676213264465, + "step": 3984 + }, + { + "epoch": 0.7363351849499371, + "grad_norm": 0.06375480443239212, + "learning_rate": 1.766500181703617e-05, + "loss": 0.460000604391098, + "step": 3985 + }, + { + "epoch": 0.7365199616588329, + "grad_norm": 0.07798736542463303, + "learning_rate": 1.7663720441945203e-05, + "loss": 0.5900716781616211, + "step": 3986 + }, + { + "epoch": 0.7367047383677288, + "grad_norm": 0.05762708559632301, + "learning_rate": 1.7662438761862137e-05, + "loss": 0.48273253440856934, + "step": 3987 + }, + { + "epoch": 0.7368895150766246, + "grad_norm": 0.06853564083576202, + "learning_rate": 1.766115677683798e-05, + "loss": 0.5477281212806702, + "step": 3988 + }, + { + "epoch": 0.7370742917855204, + "grad_norm": 0.0676281601190567, + "learning_rate": 1.7659874486923753e-05, + "loss": 0.682117223739624, + "step": 3989 + }, + { + "epoch": 0.7372590684944162, + "grad_norm": 0.0906582772731781, + "learning_rate": 1.7658591892170485e-05, + "loss": 0.7386360168457031, + "step": 3990 + }, + { + "epoch": 0.7374438452033121, + "grad_norm": 0.08889324963092804, + "learning_rate": 1.7657308992629227e-05, + "loss": 0.6794676780700684, + "step": 3991 + }, + { + "epoch": 0.737628621912208, + "grad_norm": 0.056633397936820984, + "learning_rate": 1.765602578835102e-05, + "loss": 0.4404573440551758, + "step": 3992 + }, + { + "epoch": 0.7378133986211038, + "grad_norm": 0.08712149411439896, + "learning_rate": 1.765474227938694e-05, + "loss": 0.6856634616851807, + "step": 3993 + }, + { + "epoch": 0.7379981753299997, + "grad_norm": 0.05891774967312813, + "learning_rate": 1.765345846578807e-05, + "loss": 0.39748650789260864, + "step": 3994 + }, + { + "epoch": 0.7381829520388955, + "grad_norm": 0.09329812973737717, + "learning_rate": 1.7652174347605495e-05, + "loss": 0.8040505647659302, + "step": 3995 + }, + { + "epoch": 0.7383677287477913, + "grad_norm": 0.07120615243911743, + "learning_rate": 1.7650889924890322e-05, + "loss": 0.5254811644554138, + "step": 3996 + }, + { + "epoch": 0.7385525054566872, + "grad_norm": 0.07867088913917542, + "learning_rate": 1.7649605197693666e-05, + "loss": 0.6115449666976929, + "step": 3997 + }, + { + "epoch": 0.738737282165583, + "grad_norm": 0.08392847329378128, + "learning_rate": 1.7648320166066657e-05, + "loss": 0.7248279452323914, + "step": 3998 + }, + { + "epoch": 0.7389220588744789, + "grad_norm": 0.07307955622673035, + "learning_rate": 1.764703483006043e-05, + "loss": 0.5999466776847839, + "step": 3999 + }, + { + "epoch": 0.7391068355833748, + "grad_norm": 0.08082327246665955, + "learning_rate": 1.7645749189726148e-05, + "loss": 0.6654618978500366, + "step": 4000 + }, + { + "epoch": 0.7391068355833748, + "eval_loss": 0.6549291610717773, + "eval_runtime": 158.519, + "eval_samples_per_second": 114.996, + "eval_steps_per_second": 14.377, + "step": 4000 + }, + { + "epoch": 0.7392916122922706, + "grad_norm": 0.07467007637023926, + "learning_rate": 1.7644463245114966e-05, + "loss": 0.48813697695732117, + "step": 4001 + }, + { + "epoch": 0.7394763890011664, + "grad_norm": 0.06314804404973984, + "learning_rate": 1.764317699627806e-05, + "loss": 0.5541012287139893, + "step": 4002 + }, + { + "epoch": 0.7396611657100622, + "grad_norm": 0.06194116547703743, + "learning_rate": 1.7641890443266626e-05, + "loss": 0.611510157585144, + "step": 4003 + }, + { + "epoch": 0.7398459424189581, + "grad_norm": 0.07432377338409424, + "learning_rate": 1.7640603586131858e-05, + "loss": 0.5475557446479797, + "step": 4004 + }, + { + "epoch": 0.7400307191278539, + "grad_norm": 0.08566770702600479, + "learning_rate": 1.7639316424924974e-05, + "loss": 0.7035894393920898, + "step": 4005 + }, + { + "epoch": 0.7402154958367497, + "grad_norm": 0.0692681223154068, + "learning_rate": 1.7638028959697195e-05, + "loss": 0.6889891028404236, + "step": 4006 + }, + { + "epoch": 0.7404002725456457, + "grad_norm": 0.06803371757268906, + "learning_rate": 1.7636741190499762e-05, + "loss": 0.5378096103668213, + "step": 4007 + }, + { + "epoch": 0.7405850492545415, + "grad_norm": 0.06590144336223602, + "learning_rate": 1.763545311738392e-05, + "loss": 0.6045305728912354, + "step": 4008 + }, + { + "epoch": 0.7407698259634373, + "grad_norm": 0.07703419029712677, + "learning_rate": 1.763416474040093e-05, + "loss": 0.6117264032363892, + "step": 4009 + }, + { + "epoch": 0.7409546026723332, + "grad_norm": 0.0843396931886673, + "learning_rate": 1.7632876059602073e-05, + "loss": 0.8733839988708496, + "step": 4010 + }, + { + "epoch": 0.741139379381229, + "grad_norm": 0.07758200168609619, + "learning_rate": 1.7631587075038625e-05, + "loss": 0.620842695236206, + "step": 4011 + }, + { + "epoch": 0.7413241560901248, + "grad_norm": 0.07516031712293625, + "learning_rate": 1.763029778676189e-05, + "loss": 0.6462552547454834, + "step": 4012 + }, + { + "epoch": 0.7415089327990206, + "grad_norm": 0.06614000350236893, + "learning_rate": 1.762900819482317e-05, + "loss": 0.5343265533447266, + "step": 4013 + }, + { + "epoch": 0.7416937095079166, + "grad_norm": 0.0890808179974556, + "learning_rate": 1.7627718299273796e-05, + "loss": 0.7041956186294556, + "step": 4014 + }, + { + "epoch": 0.7418784862168124, + "grad_norm": 0.06308693438768387, + "learning_rate": 1.76264281001651e-05, + "loss": 0.44210219383239746, + "step": 4015 + }, + { + "epoch": 0.7420632629257082, + "grad_norm": 0.06638413667678833, + "learning_rate": 1.762513759754842e-05, + "loss": 0.5798364877700806, + "step": 4016 + }, + { + "epoch": 0.7422480396346041, + "grad_norm": 0.06280551105737686, + "learning_rate": 1.7623846791475126e-05, + "loss": 0.5701326727867126, + "step": 4017 + }, + { + "epoch": 0.7424328163434999, + "grad_norm": 0.07918789237737656, + "learning_rate": 1.7622555681996577e-05, + "loss": 0.6177583336830139, + "step": 4018 + }, + { + "epoch": 0.7426175930523957, + "grad_norm": 0.06910663098096848, + "learning_rate": 1.762126426916416e-05, + "loss": 0.5345649123191833, + "step": 4019 + }, + { + "epoch": 0.7428023697612915, + "grad_norm": 0.0932488888502121, + "learning_rate": 1.761997255302927e-05, + "loss": 0.6094528436660767, + "step": 4020 + }, + { + "epoch": 0.7429871464701875, + "grad_norm": 0.0931335985660553, + "learning_rate": 1.7618680533643316e-05, + "loss": 0.6541128754615784, + "step": 4021 + }, + { + "epoch": 0.7431719231790833, + "grad_norm": 0.071692556142807, + "learning_rate": 1.7617388211057706e-05, + "loss": 0.5665780305862427, + "step": 4022 + }, + { + "epoch": 0.7433566998879791, + "grad_norm": 0.07735349237918854, + "learning_rate": 1.7616095585323882e-05, + "loss": 0.6871585249900818, + "step": 4023 + }, + { + "epoch": 0.743541476596875, + "grad_norm": 0.08922318369150162, + "learning_rate": 1.7614802656493277e-05, + "loss": 0.7715819478034973, + "step": 4024 + }, + { + "epoch": 0.7437262533057708, + "grad_norm": 0.06489979475736618, + "learning_rate": 1.7613509424617353e-05, + "loss": 0.568026065826416, + "step": 4025 + }, + { + "epoch": 0.7439110300146666, + "grad_norm": 0.0566529706120491, + "learning_rate": 1.7612215889747574e-05, + "loss": 0.36088061332702637, + "step": 4026 + }, + { + "epoch": 0.7440958067235625, + "grad_norm": 0.0665200874209404, + "learning_rate": 1.7610922051935416e-05, + "loss": 0.6098876595497131, + "step": 4027 + }, + { + "epoch": 0.7442805834324583, + "grad_norm": 0.07974401116371155, + "learning_rate": 1.760962791123237e-05, + "loss": 0.6779783964157104, + "step": 4028 + }, + { + "epoch": 0.7444653601413542, + "grad_norm": 0.062424369156360626, + "learning_rate": 1.7608333467689946e-05, + "loss": 0.5455675721168518, + "step": 4029 + }, + { + "epoch": 0.74465013685025, + "grad_norm": 0.07580641657114029, + "learning_rate": 1.7607038721359648e-05, + "loss": 0.6508584022521973, + "step": 4030 + }, + { + "epoch": 0.7448349135591459, + "grad_norm": 0.06872642040252686, + "learning_rate": 1.7605743672293016e-05, + "loss": 0.6221672296524048, + "step": 4031 + }, + { + "epoch": 0.7450196902680417, + "grad_norm": 0.07672706991434097, + "learning_rate": 1.7604448320541575e-05, + "loss": 0.6213904023170471, + "step": 4032 + }, + { + "epoch": 0.7452044669769375, + "grad_norm": 0.08280938118696213, + "learning_rate": 1.760315266615688e-05, + "loss": 0.7548631429672241, + "step": 4033 + }, + { + "epoch": 0.7453892436858334, + "grad_norm": 0.06836125999689102, + "learning_rate": 1.76018567091905e-05, + "loss": 0.5210031270980835, + "step": 4034 + }, + { + "epoch": 0.7455740203947292, + "grad_norm": 0.06920409947633743, + "learning_rate": 1.7600560449694006e-05, + "loss": 0.6825928688049316, + "step": 4035 + }, + { + "epoch": 0.7457587971036251, + "grad_norm": 0.05258931964635849, + "learning_rate": 1.7599263887718984e-05, + "loss": 0.44466057419776917, + "step": 4036 + }, + { + "epoch": 0.745943573812521, + "grad_norm": 0.0675012394785881, + "learning_rate": 1.7597967023317035e-05, + "loss": 0.5741130113601685, + "step": 4037 + }, + { + "epoch": 0.7461283505214168, + "grad_norm": 0.07422135025262833, + "learning_rate": 1.759666985653977e-05, + "loss": 0.7687113285064697, + "step": 4038 + }, + { + "epoch": 0.7463131272303126, + "grad_norm": 0.06264545023441315, + "learning_rate": 1.759537238743881e-05, + "loss": 0.5124937891960144, + "step": 4039 + }, + { + "epoch": 0.7464979039392085, + "grad_norm": 0.04960038885474205, + "learning_rate": 1.7594074616065793e-05, + "loss": 0.45736101269721985, + "step": 4040 + }, + { + "epoch": 0.7466826806481043, + "grad_norm": 0.08585156500339508, + "learning_rate": 1.7592776542472364e-05, + "loss": 0.8170877695083618, + "step": 4041 + }, + { + "epoch": 0.7468674573570001, + "grad_norm": 0.0781988576054573, + "learning_rate": 1.7591478166710184e-05, + "loss": 0.6889027953147888, + "step": 4042 + }, + { + "epoch": 0.747052234065896, + "grad_norm": 0.07258368283510208, + "learning_rate": 1.759017948883093e-05, + "loss": 0.5686144232749939, + "step": 4043 + }, + { + "epoch": 0.7472370107747919, + "grad_norm": 0.09401769191026688, + "learning_rate": 1.758888050888627e-05, + "loss": 0.6536602973937988, + "step": 4044 + }, + { + "epoch": 0.7474217874836877, + "grad_norm": 0.07565975189208984, + "learning_rate": 1.758758122692791e-05, + "loss": 0.5427644848823547, + "step": 4045 + }, + { + "epoch": 0.7476065641925835, + "grad_norm": 0.09071590006351471, + "learning_rate": 1.7586281643007558e-05, + "loss": 0.8327360153198242, + "step": 4046 + }, + { + "epoch": 0.7477913409014794, + "grad_norm": 0.05182671546936035, + "learning_rate": 1.7584981757176927e-05, + "loss": 0.44840091466903687, + "step": 4047 + }, + { + "epoch": 0.7479761176103752, + "grad_norm": 0.05731528252363205, + "learning_rate": 1.758368156948776e-05, + "loss": 0.5093470811843872, + "step": 4048 + }, + { + "epoch": 0.748160894319271, + "grad_norm": 0.07756191492080688, + "learning_rate": 1.7582381079991787e-05, + "loss": 0.6823296546936035, + "step": 4049 + }, + { + "epoch": 0.7483456710281668, + "grad_norm": 0.08770789951086044, + "learning_rate": 1.758108028874077e-05, + "loss": 0.684930145740509, + "step": 4050 + }, + { + "epoch": 0.7485304477370628, + "grad_norm": 0.07347483932971954, + "learning_rate": 1.7579779195786475e-05, + "loss": 0.5228387713432312, + "step": 4051 + }, + { + "epoch": 0.7487152244459586, + "grad_norm": 0.08817595988512039, + "learning_rate": 1.7578477801180684e-05, + "loss": 0.6824162006378174, + "step": 4052 + }, + { + "epoch": 0.7489000011548544, + "grad_norm": 0.07642996311187744, + "learning_rate": 1.7577176104975188e-05, + "loss": 0.5606251358985901, + "step": 4053 + }, + { + "epoch": 0.7490847778637503, + "grad_norm": 0.06220955774188042, + "learning_rate": 1.7575874107221785e-05, + "loss": 0.5040315389633179, + "step": 4054 + }, + { + "epoch": 0.7492695545726461, + "grad_norm": 0.08808313310146332, + "learning_rate": 1.7574571807972297e-05, + "loss": 1.009043574333191, + "step": 4055 + }, + { + "epoch": 0.7494543312815419, + "grad_norm": 0.06635929644107819, + "learning_rate": 1.7573269207278546e-05, + "loss": 0.4607446789741516, + "step": 4056 + }, + { + "epoch": 0.7496391079904378, + "grad_norm": 0.08293266594409943, + "learning_rate": 1.757196630519238e-05, + "loss": 0.723304808139801, + "step": 4057 + }, + { + "epoch": 0.7498238846993337, + "grad_norm": 0.06487040966749191, + "learning_rate": 1.7570663101765638e-05, + "loss": 0.5701680183410645, + "step": 4058 + }, + { + "epoch": 0.7500086614082295, + "grad_norm": 0.06392481178045273, + "learning_rate": 1.7569359597050193e-05, + "loss": 0.5060821175575256, + "step": 4059 + }, + { + "epoch": 0.7501934381171254, + "grad_norm": 0.0927731916308403, + "learning_rate": 1.756805579109792e-05, + "loss": 0.7945534586906433, + "step": 4060 + }, + { + "epoch": 0.7503782148260212, + "grad_norm": 0.075643390417099, + "learning_rate": 1.75667516839607e-05, + "loss": 0.6051799654960632, + "step": 4061 + }, + { + "epoch": 0.750562991534917, + "grad_norm": 0.06552645564079285, + "learning_rate": 1.756544727569044e-05, + "loss": 0.519436240196228, + "step": 4062 + }, + { + "epoch": 0.7507477682438128, + "grad_norm": 0.053866442292928696, + "learning_rate": 1.756414256633904e-05, + "loss": 0.44880855083465576, + "step": 4063 + }, + { + "epoch": 0.7509325449527087, + "grad_norm": 0.06403449922800064, + "learning_rate": 1.756283755595844e-05, + "loss": 0.5271477103233337, + "step": 4064 + }, + { + "epoch": 0.7511173216616046, + "grad_norm": 0.07670261710882187, + "learning_rate": 1.7561532244600562e-05, + "loss": 0.7232034802436829, + "step": 4065 + }, + { + "epoch": 0.7513020983705004, + "grad_norm": 0.07809510827064514, + "learning_rate": 1.7560226632317355e-05, + "loss": 0.6339353919029236, + "step": 4066 + }, + { + "epoch": 0.7514868750793963, + "grad_norm": 0.07474415004253387, + "learning_rate": 1.7558920719160788e-05, + "loss": 0.515523374080658, + "step": 4067 + }, + { + "epoch": 0.7516716517882921, + "grad_norm": 0.07664012908935547, + "learning_rate": 1.755761450518282e-05, + "loss": 0.6550431847572327, + "step": 4068 + }, + { + "epoch": 0.7518564284971879, + "grad_norm": 0.07359275966882706, + "learning_rate": 1.7556307990435445e-05, + "loss": 0.6118465662002563, + "step": 4069 + }, + { + "epoch": 0.7520412052060838, + "grad_norm": 0.048818353563547134, + "learning_rate": 1.7555001174970647e-05, + "loss": 0.3596686124801636, + "step": 4070 + }, + { + "epoch": 0.7522259819149796, + "grad_norm": 0.07568147033452988, + "learning_rate": 1.755369405884044e-05, + "loss": 0.5742887258529663, + "step": 4071 + }, + { + "epoch": 0.7524107586238754, + "grad_norm": 0.0754503533244133, + "learning_rate": 1.7552386642096842e-05, + "loss": 0.7414309978485107, + "step": 4072 + }, + { + "epoch": 0.7525955353327713, + "grad_norm": 0.07180456072092056, + "learning_rate": 1.755107892479188e-05, + "loss": 0.6643378138542175, + "step": 4073 + }, + { + "epoch": 0.7527803120416672, + "grad_norm": 0.06921889632940292, + "learning_rate": 1.7549770906977612e-05, + "loss": 0.6333237886428833, + "step": 4074 + }, + { + "epoch": 0.752965088750563, + "grad_norm": 0.072540782392025, + "learning_rate": 1.7548462588706075e-05, + "loss": 0.5915318727493286, + "step": 4075 + }, + { + "epoch": 0.7531498654594588, + "grad_norm": 0.07446306198835373, + "learning_rate": 1.7547153970029343e-05, + "loss": 0.5665271878242493, + "step": 4076 + }, + { + "epoch": 0.7533346421683547, + "grad_norm": 0.07440496236085892, + "learning_rate": 1.7545845050999495e-05, + "loss": 0.8101900219917297, + "step": 4077 + }, + { + "epoch": 0.7535194188772505, + "grad_norm": 0.0785202607512474, + "learning_rate": 1.7544535831668624e-05, + "loss": 0.6516543626785278, + "step": 4078 + }, + { + "epoch": 0.7537041955861463, + "grad_norm": 0.07105366140604019, + "learning_rate": 1.7543226312088828e-05, + "loss": 0.5027989745140076, + "step": 4079 + }, + { + "epoch": 0.7538889722950423, + "grad_norm": 0.06337833404541016, + "learning_rate": 1.7541916492312225e-05, + "loss": 0.5374016761779785, + "step": 4080 + }, + { + "epoch": 0.7540737490039381, + "grad_norm": 0.0845162644982338, + "learning_rate": 1.7540606372390946e-05, + "loss": 0.5916616916656494, + "step": 4081 + }, + { + "epoch": 0.7542585257128339, + "grad_norm": 0.08202160894870758, + "learning_rate": 1.7539295952377117e-05, + "loss": 0.5300063490867615, + "step": 4082 + }, + { + "epoch": 0.7544433024217297, + "grad_norm": 0.06920167058706284, + "learning_rate": 1.7537985232322902e-05, + "loss": 0.5863104462623596, + "step": 4083 + }, + { + "epoch": 0.7546280791306256, + "grad_norm": 0.061373304575681686, + "learning_rate": 1.7536674212280456e-05, + "loss": 0.42102378606796265, + "step": 4084 + }, + { + "epoch": 0.7548128558395214, + "grad_norm": 0.08496609330177307, + "learning_rate": 1.7535362892301953e-05, + "loss": 0.6914458870887756, + "step": 4085 + }, + { + "epoch": 0.7549976325484172, + "grad_norm": 0.08487819880247116, + "learning_rate": 1.753405127243959e-05, + "loss": 0.6755183935165405, + "step": 4086 + }, + { + "epoch": 0.7551824092573132, + "grad_norm": 0.08387638628482819, + "learning_rate": 1.7532739352745552e-05, + "loss": 0.7153843641281128, + "step": 4087 + }, + { + "epoch": 0.755367185966209, + "grad_norm": 0.07073529064655304, + "learning_rate": 1.7531427133272056e-05, + "loss": 0.44453296065330505, + "step": 4088 + }, + { + "epoch": 0.7555519626751048, + "grad_norm": 0.08027850091457367, + "learning_rate": 1.753011461407132e-05, + "loss": 0.7097273468971252, + "step": 4089 + }, + { + "epoch": 0.7557367393840007, + "grad_norm": 0.06549596786499023, + "learning_rate": 1.752880179519558e-05, + "loss": 0.5342280864715576, + "step": 4090 + }, + { + "epoch": 0.7559215160928965, + "grad_norm": 0.09357139468193054, + "learning_rate": 1.752748867669709e-05, + "loss": 0.6749565005302429, + "step": 4091 + }, + { + "epoch": 0.7561062928017923, + "grad_norm": 0.09218194335699081, + "learning_rate": 1.7526175258628097e-05, + "loss": 0.6712514162063599, + "step": 4092 + }, + { + "epoch": 0.7562910695106881, + "grad_norm": 0.11231732368469238, + "learning_rate": 1.7524861541040878e-05, + "loss": 0.705974817276001, + "step": 4093 + }, + { + "epoch": 0.756475846219584, + "grad_norm": 0.06249980628490448, + "learning_rate": 1.7523547523987708e-05, + "loss": 0.514185905456543, + "step": 4094 + }, + { + "epoch": 0.7566606229284799, + "grad_norm": 0.08503606915473938, + "learning_rate": 1.7522233207520887e-05, + "loss": 0.790868878364563, + "step": 4095 + }, + { + "epoch": 0.7568453996373757, + "grad_norm": 0.08546585589647293, + "learning_rate": 1.7520918591692713e-05, + "loss": 0.6644178032875061, + "step": 4096 + }, + { + "epoch": 0.7570301763462716, + "grad_norm": 0.07207859307527542, + "learning_rate": 1.7519603676555517e-05, + "loss": 0.5520370006561279, + "step": 4097 + }, + { + "epoch": 0.7572149530551674, + "grad_norm": 0.07846345752477646, + "learning_rate": 1.751828846216162e-05, + "loss": 0.6240181922912598, + "step": 4098 + }, + { + "epoch": 0.7573997297640632, + "grad_norm": 0.07634284347295761, + "learning_rate": 1.751697294856336e-05, + "loss": 0.6089432239532471, + "step": 4099 + }, + { + "epoch": 0.757584506472959, + "grad_norm": 0.06646178662776947, + "learning_rate": 1.7515657135813095e-05, + "loss": 0.5072463154792786, + "step": 4100 + }, + { + "epoch": 0.7577692831818549, + "grad_norm": 0.05174202099442482, + "learning_rate": 1.7514341023963187e-05, + "loss": 0.38996005058288574, + "step": 4101 + }, + { + "epoch": 0.7579540598907508, + "grad_norm": 0.0655844584107399, + "learning_rate": 1.7513024613066017e-05, + "loss": 0.5721313953399658, + "step": 4102 + }, + { + "epoch": 0.7581388365996466, + "grad_norm": 0.07744917273521423, + "learning_rate": 1.7511707903173975e-05, + "loss": 0.6617624163627625, + "step": 4103 + }, + { + "epoch": 0.7583236133085425, + "grad_norm": 0.07809010148048401, + "learning_rate": 1.7510390894339463e-05, + "loss": 0.6835892200469971, + "step": 4104 + }, + { + "epoch": 0.7585083900174383, + "grad_norm": 0.07202986627817154, + "learning_rate": 1.7509073586614884e-05, + "loss": 0.7185051441192627, + "step": 4105 + }, + { + "epoch": 0.7586931667263341, + "grad_norm": 0.06993846595287323, + "learning_rate": 1.750775598005267e-05, + "loss": 0.536699652671814, + "step": 4106 + }, + { + "epoch": 0.75887794343523, + "grad_norm": 0.08079773187637329, + "learning_rate": 1.750643807470526e-05, + "loss": 0.5461199879646301, + "step": 4107 + }, + { + "epoch": 0.7590627201441258, + "grad_norm": 0.0769638791680336, + "learning_rate": 1.7505119870625097e-05, + "loss": 0.6999402642250061, + "step": 4108 + }, + { + "epoch": 0.7592474968530217, + "grad_norm": 0.07775267958641052, + "learning_rate": 1.7503801367864643e-05, + "loss": 0.6964604258537292, + "step": 4109 + }, + { + "epoch": 0.7594322735619176, + "grad_norm": 0.0595572330057621, + "learning_rate": 1.750248256647637e-05, + "loss": 0.4696602523326874, + "step": 4110 + }, + { + "epoch": 0.7596170502708134, + "grad_norm": 0.06406479328870773, + "learning_rate": 1.7501163466512764e-05, + "loss": 0.4890046715736389, + "step": 4111 + }, + { + "epoch": 0.7598018269797092, + "grad_norm": 0.06237300857901573, + "learning_rate": 1.7499844068026322e-05, + "loss": 0.4907143712043762, + "step": 4112 + }, + { + "epoch": 0.759986603688605, + "grad_norm": 0.05173421651124954, + "learning_rate": 1.749852437106955e-05, + "loss": 0.45887884497642517, + "step": 4113 + }, + { + "epoch": 0.7601713803975009, + "grad_norm": 0.07699117809534073, + "learning_rate": 1.749720437569497e-05, + "loss": 0.659019410610199, + "step": 4114 + }, + { + "epoch": 0.7603561571063967, + "grad_norm": 0.05027296394109726, + "learning_rate": 1.7495884081955106e-05, + "loss": 0.3970877528190613, + "step": 4115 + }, + { + "epoch": 0.7605409338152925, + "grad_norm": 0.06710248440504074, + "learning_rate": 1.749456348990251e-05, + "loss": 0.596959114074707, + "step": 4116 + }, + { + "epoch": 0.7607257105241885, + "grad_norm": 0.06048336625099182, + "learning_rate": 1.7493242599589733e-05, + "loss": 0.4490605592727661, + "step": 4117 + }, + { + "epoch": 0.7609104872330843, + "grad_norm": 0.07077085226774216, + "learning_rate": 1.7491921411069347e-05, + "loss": 0.6158462762832642, + "step": 4118 + }, + { + "epoch": 0.7610952639419801, + "grad_norm": 0.08598323911428452, + "learning_rate": 1.7490599924393925e-05, + "loss": 0.826266884803772, + "step": 4119 + }, + { + "epoch": 0.761280040650876, + "grad_norm": 0.07506779581308365, + "learning_rate": 1.7489278139616063e-05, + "loss": 0.6603870391845703, + "step": 4120 + }, + { + "epoch": 0.7614648173597718, + "grad_norm": 0.07160505652427673, + "learning_rate": 1.748795605678836e-05, + "loss": 0.6138166189193726, + "step": 4121 + }, + { + "epoch": 0.7616495940686676, + "grad_norm": 0.08130978792905807, + "learning_rate": 1.7486633675963432e-05, + "loss": 0.7139620184898376, + "step": 4122 + }, + { + "epoch": 0.7618343707775634, + "grad_norm": 0.07921002805233002, + "learning_rate": 1.748531099719391e-05, + "loss": 0.7038363814353943, + "step": 4123 + }, + { + "epoch": 0.7620191474864594, + "grad_norm": 0.06836456805467606, + "learning_rate": 1.748398802053243e-05, + "loss": 0.5928983092308044, + "step": 4124 + }, + { + "epoch": 0.7622039241953552, + "grad_norm": 0.07586997747421265, + "learning_rate": 1.7482664746031637e-05, + "loss": 0.6390272974967957, + "step": 4125 + }, + { + "epoch": 0.762388700904251, + "grad_norm": 0.0682540088891983, + "learning_rate": 1.7481341173744198e-05, + "loss": 0.6863719820976257, + "step": 4126 + }, + { + "epoch": 0.7625734776131469, + "grad_norm": 0.07922250777482986, + "learning_rate": 1.7480017303722788e-05, + "loss": 0.5649644136428833, + "step": 4127 + }, + { + "epoch": 0.7627582543220427, + "grad_norm": 0.08836416155099869, + "learning_rate": 1.747869313602009e-05, + "loss": 0.6617914438247681, + "step": 4128 + }, + { + "epoch": 0.7629430310309385, + "grad_norm": 0.06390947103500366, + "learning_rate": 1.74773686706888e-05, + "loss": 0.584636390209198, + "step": 4129 + }, + { + "epoch": 0.7631278077398344, + "grad_norm": 0.0760967954993248, + "learning_rate": 1.7476043907781636e-05, + "loss": 0.6412030458450317, + "step": 4130 + }, + { + "epoch": 0.7633125844487303, + "grad_norm": 0.05150337144732475, + "learning_rate": 1.747471884735131e-05, + "loss": 0.3768945038318634, + "step": 4131 + }, + { + "epoch": 0.7634973611576261, + "grad_norm": 0.07066693156957626, + "learning_rate": 1.7473393489450564e-05, + "loss": 0.6143984794616699, + "step": 4132 + }, + { + "epoch": 0.763682137866522, + "grad_norm": 0.0610533282160759, + "learning_rate": 1.7472067834132135e-05, + "loss": 0.45592719316482544, + "step": 4133 + }, + { + "epoch": 0.7638669145754178, + "grad_norm": 0.08170929551124573, + "learning_rate": 1.7470741881448784e-05, + "loss": 0.6536384224891663, + "step": 4134 + }, + { + "epoch": 0.7640516912843136, + "grad_norm": 0.071434386074543, + "learning_rate": 1.746941563145328e-05, + "loss": 0.6187744140625, + "step": 4135 + }, + { + "epoch": 0.7642364679932094, + "grad_norm": 0.1042235866189003, + "learning_rate": 1.74680890841984e-05, + "loss": 0.7490211129188538, + "step": 4136 + }, + { + "epoch": 0.7644212447021053, + "grad_norm": 0.0673421323299408, + "learning_rate": 1.7466762239736944e-05, + "loss": 0.5491911172866821, + "step": 4137 + }, + { + "epoch": 0.7646060214110011, + "grad_norm": 0.07412095367908478, + "learning_rate": 1.746543509812171e-05, + "loss": 0.6123514175415039, + "step": 4138 + }, + { + "epoch": 0.764790798119897, + "grad_norm": 0.08058208227157593, + "learning_rate": 1.746410765940551e-05, + "loss": 0.8483732342720032, + "step": 4139 + }, + { + "epoch": 0.7649755748287929, + "grad_norm": 0.07618124783039093, + "learning_rate": 1.7462779923641183e-05, + "loss": 0.6350252032279968, + "step": 4140 + }, + { + "epoch": 0.7651603515376887, + "grad_norm": 0.0847897008061409, + "learning_rate": 1.746145189088156e-05, + "loss": 0.9960081577301025, + "step": 4141 + }, + { + "epoch": 0.7653451282465845, + "grad_norm": 0.06631160527467728, + "learning_rate": 1.7460123561179496e-05, + "loss": 0.6314607262611389, + "step": 4142 + }, + { + "epoch": 0.7655299049554803, + "grad_norm": 0.07614582031965256, + "learning_rate": 1.7458794934587856e-05, + "loss": 0.6961138248443604, + "step": 4143 + }, + { + "epoch": 0.7657146816643762, + "grad_norm": 0.07594437897205353, + "learning_rate": 1.745746601115951e-05, + "loss": 0.5030292868614197, + "step": 4144 + }, + { + "epoch": 0.765899458373272, + "grad_norm": 0.08059264719486237, + "learning_rate": 1.7456136790947347e-05, + "loss": 0.6420890092849731, + "step": 4145 + }, + { + "epoch": 0.7660842350821679, + "grad_norm": 0.11633842438459396, + "learning_rate": 1.7454807274004273e-05, + "loss": 0.8226794004440308, + "step": 4146 + }, + { + "epoch": 0.7662690117910638, + "grad_norm": 0.05782187730073929, + "learning_rate": 1.745347746038319e-05, + "loss": 0.5326021313667297, + "step": 4147 + }, + { + "epoch": 0.7664537884999596, + "grad_norm": 0.07117144018411636, + "learning_rate": 1.7452147350137024e-05, + "loss": 0.6022680401802063, + "step": 4148 + }, + { + "epoch": 0.7666385652088554, + "grad_norm": 0.0824783444404602, + "learning_rate": 1.7450816943318705e-05, + "loss": 0.7285429835319519, + "step": 4149 + }, + { + "epoch": 0.7668233419177513, + "grad_norm": 0.07009123265743256, + "learning_rate": 1.7449486239981186e-05, + "loss": 0.5529493093490601, + "step": 4150 + }, + { + "epoch": 0.7670081186266471, + "grad_norm": 0.06966505944728851, + "learning_rate": 1.744815524017742e-05, + "loss": 0.6751921772956848, + "step": 4151 + }, + { + "epoch": 0.7671928953355429, + "grad_norm": 0.06169646605849266, + "learning_rate": 1.7446823943960374e-05, + "loss": 0.5230487585067749, + "step": 4152 + }, + { + "epoch": 0.7673776720444389, + "grad_norm": 0.06246807053685188, + "learning_rate": 1.744549235138304e-05, + "loss": 0.4462629556655884, + "step": 4153 + }, + { + "epoch": 0.7675624487533347, + "grad_norm": 0.09959576278924942, + "learning_rate": 1.74441604624984e-05, + "loss": 0.9976585507392883, + "step": 4154 + }, + { + "epoch": 0.7677472254622305, + "grad_norm": 0.09119909256696701, + "learning_rate": 1.7442828277359463e-05, + "loss": 0.7003543376922607, + "step": 4155 + }, + { + "epoch": 0.7679320021711263, + "grad_norm": 0.08542408049106598, + "learning_rate": 1.7441495796019245e-05, + "loss": 0.7313602566719055, + "step": 4156 + }, + { + "epoch": 0.7681167788800222, + "grad_norm": 0.06858178228139877, + "learning_rate": 1.744016301853078e-05, + "loss": 0.5413244962692261, + "step": 4157 + }, + { + "epoch": 0.768301555588918, + "grad_norm": 0.05746445804834366, + "learning_rate": 1.74388299449471e-05, + "loss": 0.4712941646575928, + "step": 4158 + }, + { + "epoch": 0.7684863322978138, + "grad_norm": 0.07297435402870178, + "learning_rate": 1.7437496575321264e-05, + "loss": 0.7246826887130737, + "step": 4159 + }, + { + "epoch": 0.7686711090067097, + "grad_norm": 0.08480487018823624, + "learning_rate": 1.7436162909706335e-05, + "loss": 0.9000516533851624, + "step": 4160 + }, + { + "epoch": 0.7688558857156056, + "grad_norm": 0.07197446376085281, + "learning_rate": 1.743482894815538e-05, + "loss": 0.6599001288414001, + "step": 4161 + }, + { + "epoch": 0.7690406624245014, + "grad_norm": 0.10154495388269424, + "learning_rate": 1.74334946907215e-05, + "loss": 0.8901887536048889, + "step": 4162 + }, + { + "epoch": 0.7692254391333972, + "grad_norm": 0.0685698539018631, + "learning_rate": 1.7432160137457787e-05, + "loss": 0.6849889159202576, + "step": 4163 + }, + { + "epoch": 0.7694102158422931, + "grad_norm": 0.07454440742731094, + "learning_rate": 1.743082528841735e-05, + "loss": 0.6368915438652039, + "step": 4164 + }, + { + "epoch": 0.7695949925511889, + "grad_norm": 0.08280269056558609, + "learning_rate": 1.7429490143653317e-05, + "loss": 0.765697181224823, + "step": 4165 + }, + { + "epoch": 0.7697797692600847, + "grad_norm": 0.08227710425853729, + "learning_rate": 1.742815470321882e-05, + "loss": 0.7411954998970032, + "step": 4166 + }, + { + "epoch": 0.7699645459689806, + "grad_norm": 0.07122933119535446, + "learning_rate": 1.7426818967167003e-05, + "loss": 0.595898449420929, + "step": 4167 + }, + { + "epoch": 0.7701493226778765, + "grad_norm": 0.08117000758647919, + "learning_rate": 1.742548293555103e-05, + "loss": 0.7033901810646057, + "step": 4168 + }, + { + "epoch": 0.7703340993867723, + "grad_norm": 0.06495106965303421, + "learning_rate": 1.7424146608424065e-05, + "loss": 0.5481119751930237, + "step": 4169 + }, + { + "epoch": 0.7705188760956682, + "grad_norm": 0.06746747344732285, + "learning_rate": 1.7422809985839292e-05, + "loss": 0.49438005685806274, + "step": 4170 + }, + { + "epoch": 0.770703652804564, + "grad_norm": 0.08098278194665909, + "learning_rate": 1.7421473067849906e-05, + "loss": 0.6253182291984558, + "step": 4171 + }, + { + "epoch": 0.7708884295134598, + "grad_norm": 0.07808305323123932, + "learning_rate": 1.742013585450911e-05, + "loss": 0.6477851867675781, + "step": 4172 + }, + { + "epoch": 0.7710732062223556, + "grad_norm": 0.08123036473989487, + "learning_rate": 1.741879834587012e-05, + "loss": 0.6954832673072815, + "step": 4173 + }, + { + "epoch": 0.7712579829312515, + "grad_norm": 0.08123773336410522, + "learning_rate": 1.741746054198617e-05, + "loss": 0.8441729545593262, + "step": 4174 + }, + { + "epoch": 0.7714427596401474, + "grad_norm": 0.05783558264374733, + "learning_rate": 1.7416122442910493e-05, + "loss": 0.44951534271240234, + "step": 4175 + }, + { + "epoch": 0.7716275363490432, + "grad_norm": 0.07396768778562546, + "learning_rate": 1.741478404869635e-05, + "loss": 0.6257613301277161, + "step": 4176 + }, + { + "epoch": 0.7718123130579391, + "grad_norm": 0.05979736149311066, + "learning_rate": 1.7413445359396996e-05, + "loss": 0.5418450832366943, + "step": 4177 + }, + { + "epoch": 0.7719970897668349, + "grad_norm": 0.07243388146162033, + "learning_rate": 1.741210637506571e-05, + "loss": 0.4934229254722595, + "step": 4178 + }, + { + "epoch": 0.7721818664757307, + "grad_norm": 0.07115595042705536, + "learning_rate": 1.741076709575578e-05, + "loss": 0.572291910648346, + "step": 4179 + }, + { + "epoch": 0.7723666431846266, + "grad_norm": 0.0746389627456665, + "learning_rate": 1.7409427521520507e-05, + "loss": 0.49469316005706787, + "step": 4180 + }, + { + "epoch": 0.7725514198935224, + "grad_norm": 0.09076672792434692, + "learning_rate": 1.7408087652413197e-05, + "loss": 0.7113850116729736, + "step": 4181 + }, + { + "epoch": 0.7727361966024182, + "grad_norm": 0.08029329776763916, + "learning_rate": 1.7406747488487176e-05, + "loss": 0.7337382435798645, + "step": 4182 + }, + { + "epoch": 0.7729209733113142, + "grad_norm": 0.0713610053062439, + "learning_rate": 1.740540702979578e-05, + "loss": 0.6376339793205261, + "step": 4183 + }, + { + "epoch": 0.77310575002021, + "grad_norm": 0.07699505239725113, + "learning_rate": 1.740406627639235e-05, + "loss": 0.714060366153717, + "step": 4184 + }, + { + "epoch": 0.7732905267291058, + "grad_norm": 0.054786115884780884, + "learning_rate": 1.7402725228330247e-05, + "loss": 0.4561084508895874, + "step": 4185 + }, + { + "epoch": 0.7734753034380016, + "grad_norm": 0.08547017723321915, + "learning_rate": 1.7401383885662843e-05, + "loss": 0.7607282400131226, + "step": 4186 + }, + { + "epoch": 0.7736600801468975, + "grad_norm": 0.055155374109745026, + "learning_rate": 1.7400042248443513e-05, + "loss": 0.4208586513996124, + "step": 4187 + }, + { + "epoch": 0.7738448568557933, + "grad_norm": 0.08057721704244614, + "learning_rate": 1.7398700316725653e-05, + "loss": 0.7284572124481201, + "step": 4188 + }, + { + "epoch": 0.7740296335646891, + "grad_norm": 0.07119718194007874, + "learning_rate": 1.739735809056267e-05, + "loss": 0.5601897835731506, + "step": 4189 + }, + { + "epoch": 0.7742144102735851, + "grad_norm": 0.0902874544262886, + "learning_rate": 1.7396015570007978e-05, + "loss": 0.7009224891662598, + "step": 4190 + }, + { + "epoch": 0.7743991869824809, + "grad_norm": 0.07109608501195908, + "learning_rate": 1.7394672755115003e-05, + "loss": 0.6656509637832642, + "step": 4191 + }, + { + "epoch": 0.7745839636913767, + "grad_norm": 0.07586213946342468, + "learning_rate": 1.739332964593719e-05, + "loss": 0.6752843260765076, + "step": 4192 + }, + { + "epoch": 0.7747687404002725, + "grad_norm": 0.0882461667060852, + "learning_rate": 1.739198624252799e-05, + "loss": 0.7218917608261108, + "step": 4193 + }, + { + "epoch": 0.7749535171091684, + "grad_norm": 0.07135339826345444, + "learning_rate": 1.739064254494086e-05, + "loss": 0.7736853361129761, + "step": 4194 + }, + { + "epoch": 0.7751382938180642, + "grad_norm": 0.07494081556797028, + "learning_rate": 1.738929855322928e-05, + "loss": 0.7577793002128601, + "step": 4195 + }, + { + "epoch": 0.77532307052696, + "grad_norm": 0.0696171447634697, + "learning_rate": 1.7387954267446737e-05, + "loss": 0.5817857980728149, + "step": 4196 + }, + { + "epoch": 0.775507847235856, + "grad_norm": 0.059537775814533234, + "learning_rate": 1.7386609687646726e-05, + "loss": 0.5929745435714722, + "step": 4197 + }, + { + "epoch": 0.7756926239447518, + "grad_norm": 0.06773867458105087, + "learning_rate": 1.738526481388276e-05, + "loss": 0.5150438547134399, + "step": 4198 + }, + { + "epoch": 0.7758774006536476, + "grad_norm": 0.07512631267309189, + "learning_rate": 1.7383919646208364e-05, + "loss": 0.6949543356895447, + "step": 4199 + }, + { + "epoch": 0.7760621773625435, + "grad_norm": 0.0708075538277626, + "learning_rate": 1.7382574184677063e-05, + "loss": 0.5188945531845093, + "step": 4200 + }, + { + "epoch": 0.7762469540714393, + "grad_norm": 0.06188984587788582, + "learning_rate": 1.7381228429342406e-05, + "loss": 0.5236637592315674, + "step": 4201 + }, + { + "epoch": 0.7764317307803351, + "grad_norm": 0.06864194571971893, + "learning_rate": 1.7379882380257952e-05, + "loss": 0.5545336008071899, + "step": 4202 + }, + { + "epoch": 0.776616507489231, + "grad_norm": 0.08061398565769196, + "learning_rate": 1.7378536037477266e-05, + "loss": 0.6850088238716125, + "step": 4203 + }, + { + "epoch": 0.7768012841981268, + "grad_norm": 0.09009570628404617, + "learning_rate": 1.7377189401053933e-05, + "loss": 0.6856690049171448, + "step": 4204 + }, + { + "epoch": 0.7769860609070227, + "grad_norm": 0.07767023891210556, + "learning_rate": 1.7375842471041543e-05, + "loss": 0.7038486003875732, + "step": 4205 + }, + { + "epoch": 0.7771708376159185, + "grad_norm": 0.07596709579229355, + "learning_rate": 1.7374495247493694e-05, + "loss": 0.7392452359199524, + "step": 4206 + }, + { + "epoch": 0.7773556143248144, + "grad_norm": 0.06344443559646606, + "learning_rate": 1.737314773046401e-05, + "loss": 0.4634053409099579, + "step": 4207 + }, + { + "epoch": 0.7775403910337102, + "grad_norm": 0.06472158432006836, + "learning_rate": 1.737179992000611e-05, + "loss": 0.5714576244354248, + "step": 4208 + }, + { + "epoch": 0.777725167742606, + "grad_norm": 0.07439829409122467, + "learning_rate": 1.737045181617364e-05, + "loss": 0.6813909411430359, + "step": 4209 + }, + { + "epoch": 0.7779099444515019, + "grad_norm": 0.08288738131523132, + "learning_rate": 1.7369103419020244e-05, + "loss": 0.6286230683326721, + "step": 4210 + }, + { + "epoch": 0.7780947211603977, + "grad_norm": 0.08833064138889313, + "learning_rate": 1.7367754728599592e-05, + "loss": 0.7476876378059387, + "step": 4211 + }, + { + "epoch": 0.7782794978692936, + "grad_norm": 0.07594143599271774, + "learning_rate": 1.736640574496535e-05, + "loss": 0.6591677665710449, + "step": 4212 + }, + { + "epoch": 0.7784642745781895, + "grad_norm": 0.08570119738578796, + "learning_rate": 1.7365056468171204e-05, + "loss": 0.6744270920753479, + "step": 4213 + }, + { + "epoch": 0.7786490512870853, + "grad_norm": 0.08634412288665771, + "learning_rate": 1.7363706898270852e-05, + "loss": 0.6466952562332153, + "step": 4214 + }, + { + "epoch": 0.7788338279959811, + "grad_norm": 0.07181413471698761, + "learning_rate": 1.736235703531801e-05, + "loss": 0.6132568120956421, + "step": 4215 + }, + { + "epoch": 0.7790186047048769, + "grad_norm": 0.072494275867939, + "learning_rate": 1.7361006879366385e-05, + "loss": 0.6853365302085876, + "step": 4216 + }, + { + "epoch": 0.7792033814137728, + "grad_norm": 0.07777240872383118, + "learning_rate": 1.7359656430469722e-05, + "loss": 0.5434789657592773, + "step": 4217 + }, + { + "epoch": 0.7793881581226686, + "grad_norm": 0.07259754091501236, + "learning_rate": 1.7358305688681754e-05, + "loss": 0.6910667419433594, + "step": 4218 + }, + { + "epoch": 0.7795729348315645, + "grad_norm": 0.06869952380657196, + "learning_rate": 1.735695465405624e-05, + "loss": 0.643589973449707, + "step": 4219 + }, + { + "epoch": 0.7797577115404604, + "grad_norm": 0.06629443913698196, + "learning_rate": 1.7355603326646952e-05, + "loss": 0.557184100151062, + "step": 4220 + }, + { + "epoch": 0.7799424882493562, + "grad_norm": 0.0654207244515419, + "learning_rate": 1.7354251706507657e-05, + "loss": 0.6412302255630493, + "step": 4221 + }, + { + "epoch": 0.780127264958252, + "grad_norm": 0.08161873370409012, + "learning_rate": 1.735289979369216e-05, + "loss": 0.7459937334060669, + "step": 4222 + }, + { + "epoch": 0.7803120416671478, + "grad_norm": 0.07101137936115265, + "learning_rate": 1.7351547588254255e-05, + "loss": 0.5612960457801819, + "step": 4223 + }, + { + "epoch": 0.7804968183760437, + "grad_norm": 0.07463975250720978, + "learning_rate": 1.7350195090247754e-05, + "loss": 0.6693721413612366, + "step": 4224 + }, + { + "epoch": 0.7806815950849395, + "grad_norm": 0.08101318031549454, + "learning_rate": 1.734884229972648e-05, + "loss": 0.6169843077659607, + "step": 4225 + }, + { + "epoch": 0.7808663717938353, + "grad_norm": 0.08010120689868927, + "learning_rate": 1.734748921674428e-05, + "loss": 0.6786614656448364, + "step": 4226 + }, + { + "epoch": 0.7810511485027313, + "grad_norm": 0.06889232993125916, + "learning_rate": 1.7346135841354993e-05, + "loss": 0.5372576117515564, + "step": 4227 + }, + { + "epoch": 0.7812359252116271, + "grad_norm": 0.07085257768630981, + "learning_rate": 1.7344782173612485e-05, + "loss": 0.5229369401931763, + "step": 4228 + }, + { + "epoch": 0.7814207019205229, + "grad_norm": 0.08060770481824875, + "learning_rate": 1.7343428213570624e-05, + "loss": 0.7314550280570984, + "step": 4229 + }, + { + "epoch": 0.7816054786294188, + "grad_norm": 0.08209926635026932, + "learning_rate": 1.7342073961283293e-05, + "loss": 0.7790261507034302, + "step": 4230 + }, + { + "epoch": 0.7817902553383146, + "grad_norm": 0.08321072161197662, + "learning_rate": 1.7340719416804395e-05, + "loss": 0.7635213136672974, + "step": 4231 + }, + { + "epoch": 0.7819750320472104, + "grad_norm": 0.08907254040241241, + "learning_rate": 1.7339364580187825e-05, + "loss": 0.7458104491233826, + "step": 4232 + }, + { + "epoch": 0.7821598087561062, + "grad_norm": 0.0697903037071228, + "learning_rate": 1.733800945148751e-05, + "loss": 0.6363630890846252, + "step": 4233 + }, + { + "epoch": 0.7823445854650022, + "grad_norm": 0.06094703823328018, + "learning_rate": 1.7336654030757373e-05, + "loss": 0.5128779411315918, + "step": 4234 + }, + { + "epoch": 0.782529362173898, + "grad_norm": 0.07650782912969589, + "learning_rate": 1.7335298318051362e-05, + "loss": 0.6506317853927612, + "step": 4235 + }, + { + "epoch": 0.7827141388827938, + "grad_norm": 0.057419292628765106, + "learning_rate": 1.7333942313423426e-05, + "loss": 0.3510390520095825, + "step": 4236 + }, + { + "epoch": 0.7828989155916897, + "grad_norm": 0.06217406317591667, + "learning_rate": 1.733258601692753e-05, + "loss": 0.4840368330478668, + "step": 4237 + }, + { + "epoch": 0.7830836923005855, + "grad_norm": 0.08508466929197311, + "learning_rate": 1.7331229428617652e-05, + "loss": 0.7261524200439453, + "step": 4238 + }, + { + "epoch": 0.7832684690094813, + "grad_norm": 0.06267179548740387, + "learning_rate": 1.7329872548547778e-05, + "loss": 0.591058075428009, + "step": 4239 + }, + { + "epoch": 0.7834532457183772, + "grad_norm": 0.06621445715427399, + "learning_rate": 1.732851537677191e-05, + "loss": 0.5970615744590759, + "step": 4240 + }, + { + "epoch": 0.7836380224272731, + "grad_norm": 0.05005696043372154, + "learning_rate": 1.7327157913344058e-05, + "loss": 0.3451262414455414, + "step": 4241 + }, + { + "epoch": 0.7838227991361689, + "grad_norm": 0.0736042931675911, + "learning_rate": 1.7325800158318243e-05, + "loss": 0.6042583584785461, + "step": 4242 + }, + { + "epoch": 0.7840075758450648, + "grad_norm": 0.05432210862636566, + "learning_rate": 1.7324442111748506e-05, + "loss": 0.38885262608528137, + "step": 4243 + }, + { + "epoch": 0.7841923525539606, + "grad_norm": 0.08645527064800262, + "learning_rate": 1.7323083773688883e-05, + "loss": 0.771323561668396, + "step": 4244 + }, + { + "epoch": 0.7843771292628564, + "grad_norm": 0.05179852247238159, + "learning_rate": 1.732172514419344e-05, + "loss": 0.4171346127986908, + "step": 4245 + }, + { + "epoch": 0.7845619059717522, + "grad_norm": 0.07013101130723953, + "learning_rate": 1.732036622331624e-05, + "loss": 0.6128926873207092, + "step": 4246 + }, + { + "epoch": 0.7847466826806481, + "grad_norm": 0.06737525761127472, + "learning_rate": 1.7319007011111372e-05, + "loss": 0.7678019404411316, + "step": 4247 + }, + { + "epoch": 0.7849314593895439, + "grad_norm": 0.05731862410902977, + "learning_rate": 1.7317647507632917e-05, + "loss": 0.4066280424594879, + "step": 4248 + }, + { + "epoch": 0.7851162360984398, + "grad_norm": 0.048538047820329666, + "learning_rate": 1.7316287712934987e-05, + "loss": 0.4312340021133423, + "step": 4249 + }, + { + "epoch": 0.7853010128073357, + "grad_norm": 0.06966068595647812, + "learning_rate": 1.7314927627071697e-05, + "loss": 0.5768951773643494, + "step": 4250 + }, + { + "epoch": 0.7854857895162315, + "grad_norm": 0.07969526946544647, + "learning_rate": 1.7313567250097173e-05, + "loss": 0.6721000671386719, + "step": 4251 + }, + { + "epoch": 0.7856705662251273, + "grad_norm": 0.06059734523296356, + "learning_rate": 1.7312206582065557e-05, + "loss": 0.5634155869483948, + "step": 4252 + }, + { + "epoch": 0.7858553429340231, + "grad_norm": 0.07579533755779266, + "learning_rate": 1.7310845623030988e-05, + "loss": 0.6615477204322815, + "step": 4253 + }, + { + "epoch": 0.786040119642919, + "grad_norm": 0.08253277838230133, + "learning_rate": 1.7309484373047642e-05, + "loss": 0.7589923739433289, + "step": 4254 + }, + { + "epoch": 0.7862248963518148, + "grad_norm": 0.07874087989330292, + "learning_rate": 1.7308122832169685e-05, + "loss": 0.58607417345047, + "step": 4255 + }, + { + "epoch": 0.7864096730607107, + "grad_norm": 0.06461337208747864, + "learning_rate": 1.7306761000451304e-05, + "loss": 0.49482282996177673, + "step": 4256 + }, + { + "epoch": 0.7865944497696066, + "grad_norm": 0.07468587160110474, + "learning_rate": 1.7305398877946692e-05, + "loss": 0.5755637288093567, + "step": 4257 + }, + { + "epoch": 0.7867792264785024, + "grad_norm": 0.06519705057144165, + "learning_rate": 1.7304036464710065e-05, + "loss": 0.5765430927276611, + "step": 4258 + }, + { + "epoch": 0.7869640031873982, + "grad_norm": 0.07528979331254959, + "learning_rate": 1.7302673760795638e-05, + "loss": 0.6647313833236694, + "step": 4259 + }, + { + "epoch": 0.7871487798962941, + "grad_norm": 0.06806044280529022, + "learning_rate": 1.7301310766257636e-05, + "loss": 0.6607141494750977, + "step": 4260 + }, + { + "epoch": 0.7873335566051899, + "grad_norm": 0.06762052327394485, + "learning_rate": 1.7299947481150315e-05, + "loss": 0.5975756049156189, + "step": 4261 + }, + { + "epoch": 0.7875183333140857, + "grad_norm": 0.062473200261592865, + "learning_rate": 1.729858390552792e-05, + "loss": 0.5687182545661926, + "step": 4262 + }, + { + "epoch": 0.7877031100229817, + "grad_norm": 0.06199536472558975, + "learning_rate": 1.7297220039444717e-05, + "loss": 0.4761790335178375, + "step": 4263 + }, + { + "epoch": 0.7878878867318775, + "grad_norm": 0.09022694826126099, + "learning_rate": 1.7295855882954993e-05, + "loss": 0.7738853693008423, + "step": 4264 + }, + { + "epoch": 0.7880726634407733, + "grad_norm": 0.10750069469213486, + "learning_rate": 1.7294491436113026e-05, + "loss": 0.875851035118103, + "step": 4265 + }, + { + "epoch": 0.7882574401496691, + "grad_norm": 0.07180424779653549, + "learning_rate": 1.7293126698973123e-05, + "loss": 0.6548527479171753, + "step": 4266 + }, + { + "epoch": 0.788442216858565, + "grad_norm": 0.06557504087686539, + "learning_rate": 1.7291761671589594e-05, + "loss": 0.588817298412323, + "step": 4267 + }, + { + "epoch": 0.7886269935674608, + "grad_norm": 0.09686272591352463, + "learning_rate": 1.7290396354016762e-05, + "loss": 0.9058108925819397, + "step": 4268 + }, + { + "epoch": 0.7888117702763566, + "grad_norm": 0.06769488006830215, + "learning_rate": 1.7289030746308965e-05, + "loss": 0.6059497594833374, + "step": 4269 + }, + { + "epoch": 0.7889965469852525, + "grad_norm": 0.07028723508119583, + "learning_rate": 1.7287664848520553e-05, + "loss": 0.562899649143219, + "step": 4270 + }, + { + "epoch": 0.7891813236941484, + "grad_norm": 0.07883322983980179, + "learning_rate": 1.7286298660705877e-05, + "loss": 0.6428919434547424, + "step": 4271 + }, + { + "epoch": 0.7893661004030442, + "grad_norm": 0.06248297542333603, + "learning_rate": 1.7284932182919308e-05, + "loss": 0.5069523453712463, + "step": 4272 + }, + { + "epoch": 0.78955087711194, + "grad_norm": 0.07720385491847992, + "learning_rate": 1.728356541521523e-05, + "loss": 0.6113148927688599, + "step": 4273 + }, + { + "epoch": 0.7897356538208359, + "grad_norm": 0.08412636816501617, + "learning_rate": 1.728219835764804e-05, + "loss": 0.6969552636146545, + "step": 4274 + }, + { + "epoch": 0.7899204305297317, + "grad_norm": 0.06477232277393341, + "learning_rate": 1.7280831010272135e-05, + "loss": 0.5412000417709351, + "step": 4275 + }, + { + "epoch": 0.7901052072386275, + "grad_norm": 0.07632836699485779, + "learning_rate": 1.7279463373141935e-05, + "loss": 0.5482943058013916, + "step": 4276 + }, + { + "epoch": 0.7902899839475234, + "grad_norm": 0.0716867446899414, + "learning_rate": 1.7278095446311868e-05, + "loss": 0.5588719844818115, + "step": 4277 + }, + { + "epoch": 0.7904747606564193, + "grad_norm": 0.09096336364746094, + "learning_rate": 1.7276727229836374e-05, + "loss": 0.7104210257530212, + "step": 4278 + }, + { + "epoch": 0.7906595373653151, + "grad_norm": 0.07804781198501587, + "learning_rate": 1.72753587237699e-05, + "loss": 0.717307448387146, + "step": 4279 + }, + { + "epoch": 0.790844314074211, + "grad_norm": 0.056012026965618134, + "learning_rate": 1.7273989928166907e-05, + "loss": 0.5214253664016724, + "step": 4280 + }, + { + "epoch": 0.7910290907831068, + "grad_norm": 0.07080499827861786, + "learning_rate": 1.7272620843081877e-05, + "loss": 0.5698209404945374, + "step": 4281 + }, + { + "epoch": 0.7912138674920026, + "grad_norm": 0.0885167047381401, + "learning_rate": 1.727125146856929e-05, + "loss": 0.729579508304596, + "step": 4282 + }, + { + "epoch": 0.7913986442008984, + "grad_norm": 0.06951869279146194, + "learning_rate": 1.7269881804683645e-05, + "loss": 0.4667820334434509, + "step": 4283 + }, + { + "epoch": 0.7915834209097943, + "grad_norm": 0.06312818080186844, + "learning_rate": 1.7268511851479446e-05, + "loss": 0.5823342204093933, + "step": 4284 + }, + { + "epoch": 0.7917681976186902, + "grad_norm": 0.057776253670454025, + "learning_rate": 1.7267141609011215e-05, + "loss": 0.5697093605995178, + "step": 4285 + }, + { + "epoch": 0.791952974327586, + "grad_norm": 0.08048953860998154, + "learning_rate": 1.7265771077333485e-05, + "loss": 0.7026470899581909, + "step": 4286 + }, + { + "epoch": 0.7921377510364819, + "grad_norm": 0.06322105973958969, + "learning_rate": 1.72644002565008e-05, + "loss": 0.5684886574745178, + "step": 4287 + }, + { + "epoch": 0.7923225277453777, + "grad_norm": 0.07667342573404312, + "learning_rate": 1.7263029146567708e-05, + "loss": 0.6792047619819641, + "step": 4288 + }, + { + "epoch": 0.7925073044542735, + "grad_norm": 0.06281965970993042, + "learning_rate": 1.7261657747588782e-05, + "loss": 0.49382346868515015, + "step": 4289 + }, + { + "epoch": 0.7926920811631694, + "grad_norm": 0.05980212613940239, + "learning_rate": 1.7260286059618597e-05, + "loss": 0.5538467764854431, + "step": 4290 + }, + { + "epoch": 0.7928768578720652, + "grad_norm": 0.0731351226568222, + "learning_rate": 1.725891408271174e-05, + "loss": 0.4768342673778534, + "step": 4291 + }, + { + "epoch": 0.793061634580961, + "grad_norm": 0.05807039886713028, + "learning_rate": 1.725754181692281e-05, + "loss": 0.43910741806030273, + "step": 4292 + }, + { + "epoch": 0.793246411289857, + "grad_norm": 0.0727703794836998, + "learning_rate": 1.7256169262306427e-05, + "loss": 0.6913385391235352, + "step": 4293 + }, + { + "epoch": 0.7934311879987528, + "grad_norm": 0.0621478408575058, + "learning_rate": 1.725479641891721e-05, + "loss": 0.5831829309463501, + "step": 4294 + }, + { + "epoch": 0.7936159647076486, + "grad_norm": 0.09093275666236877, + "learning_rate": 1.7253423286809784e-05, + "loss": 0.7480353713035583, + "step": 4295 + }, + { + "epoch": 0.7938007414165444, + "grad_norm": 0.07408110052347183, + "learning_rate": 1.7252049866038812e-05, + "loss": 0.7685124278068542, + "step": 4296 + }, + { + "epoch": 0.7939855181254403, + "grad_norm": 0.08359529078006744, + "learning_rate": 1.7250676156658942e-05, + "loss": 0.8137569427490234, + "step": 4297 + }, + { + "epoch": 0.7941702948343361, + "grad_norm": 0.07526369392871857, + "learning_rate": 1.7249302158724843e-05, + "loss": 0.6307947039604187, + "step": 4298 + }, + { + "epoch": 0.7943550715432319, + "grad_norm": 0.08648904412984848, + "learning_rate": 1.72479278722912e-05, + "loss": 0.6008473038673401, + "step": 4299 + }, + { + "epoch": 0.7945398482521279, + "grad_norm": 0.07652433216571808, + "learning_rate": 1.7246553297412705e-05, + "loss": 0.6652103662490845, + "step": 4300 + }, + { + "epoch": 0.7947246249610237, + "grad_norm": 0.06447356194257736, + "learning_rate": 1.7245178434144063e-05, + "loss": 0.5354271531105042, + "step": 4301 + }, + { + "epoch": 0.7949094016699195, + "grad_norm": 0.08572579175233841, + "learning_rate": 1.724380328253998e-05, + "loss": 0.5951279997825623, + "step": 4302 + }, + { + "epoch": 0.7950941783788154, + "grad_norm": 0.06310103088617325, + "learning_rate": 1.7242427842655193e-05, + "loss": 0.49456357955932617, + "step": 4303 + }, + { + "epoch": 0.7952789550877112, + "grad_norm": 0.07046617567539215, + "learning_rate": 1.7241052114544434e-05, + "loss": 0.6634379625320435, + "step": 4304 + }, + { + "epoch": 0.795463731796607, + "grad_norm": 0.06549013406038284, + "learning_rate": 1.7239676098262457e-05, + "loss": 0.5416505336761475, + "step": 4305 + }, + { + "epoch": 0.7956485085055028, + "grad_norm": 0.06831902265548706, + "learning_rate": 1.7238299793864023e-05, + "loss": 0.5576737523078918, + "step": 4306 + }, + { + "epoch": 0.7958332852143988, + "grad_norm": 0.06174025312066078, + "learning_rate": 1.72369232014039e-05, + "loss": 0.5236870050430298, + "step": 4307 + }, + { + "epoch": 0.7960180619232946, + "grad_norm": 0.0703766718506813, + "learning_rate": 1.7235546320936874e-05, + "loss": 0.5725675821304321, + "step": 4308 + }, + { + "epoch": 0.7962028386321904, + "grad_norm": 0.06023186445236206, + "learning_rate": 1.7234169152517742e-05, + "loss": 0.4951670169830322, + "step": 4309 + }, + { + "epoch": 0.7963876153410863, + "grad_norm": 0.06425395607948303, + "learning_rate": 1.7232791696201313e-05, + "loss": 0.5032260417938232, + "step": 4310 + }, + { + "epoch": 0.7965723920499821, + "grad_norm": 0.06461165845394135, + "learning_rate": 1.72314139520424e-05, + "loss": 0.4951946437358856, + "step": 4311 + }, + { + "epoch": 0.7967571687588779, + "grad_norm": 0.07528678327798843, + "learning_rate": 1.723003592009584e-05, + "loss": 0.6738261580467224, + "step": 4312 + }, + { + "epoch": 0.7969419454677737, + "grad_norm": 0.08563785254955292, + "learning_rate": 1.7228657600416468e-05, + "loss": 0.8475391864776611, + "step": 4313 + }, + { + "epoch": 0.7971267221766696, + "grad_norm": 0.07527659833431244, + "learning_rate": 1.722727899305914e-05, + "loss": 0.571058988571167, + "step": 4314 + }, + { + "epoch": 0.7973114988855655, + "grad_norm": 0.06086825951933861, + "learning_rate": 1.7225900098078718e-05, + "loss": 0.5177538990974426, + "step": 4315 + }, + { + "epoch": 0.7974962755944613, + "grad_norm": 0.09399010986089706, + "learning_rate": 1.722452091553008e-05, + "loss": 0.6054516434669495, + "step": 4316 + }, + { + "epoch": 0.7976810523033572, + "grad_norm": 0.07653100788593292, + "learning_rate": 1.7223141445468112e-05, + "loss": 0.5676854252815247, + "step": 4317 + }, + { + "epoch": 0.797865829012253, + "grad_norm": 0.06869052350521088, + "learning_rate": 1.7221761687947713e-05, + "loss": 0.6330968141555786, + "step": 4318 + }, + { + "epoch": 0.7980506057211488, + "grad_norm": 0.0903267040848732, + "learning_rate": 1.7220381643023795e-05, + "loss": 0.8275489807128906, + "step": 4319 + }, + { + "epoch": 0.7982353824300447, + "grad_norm": 0.06249494478106499, + "learning_rate": 1.721900131075127e-05, + "loss": 0.5433062314987183, + "step": 4320 + }, + { + "epoch": 0.7984201591389405, + "grad_norm": 0.0794859305024147, + "learning_rate": 1.7217620691185083e-05, + "loss": 0.5860161781311035, + "step": 4321 + }, + { + "epoch": 0.7986049358478364, + "grad_norm": 0.05575048550963402, + "learning_rate": 1.7216239784380176e-05, + "loss": 0.47690248489379883, + "step": 4322 + }, + { + "epoch": 0.7987897125567323, + "grad_norm": 0.07652156054973602, + "learning_rate": 1.72148585903915e-05, + "loss": 0.6837499737739563, + "step": 4323 + }, + { + "epoch": 0.7989744892656281, + "grad_norm": 0.06289152801036835, + "learning_rate": 1.7213477109274024e-05, + "loss": 0.5508039593696594, + "step": 4324 + }, + { + "epoch": 0.7991592659745239, + "grad_norm": 0.0741892084479332, + "learning_rate": 1.721209534108273e-05, + "loss": 0.6964645385742188, + "step": 4325 + }, + { + "epoch": 0.7993440426834197, + "grad_norm": 0.07051388919353485, + "learning_rate": 1.72107132858726e-05, + "loss": 0.6231642961502075, + "step": 4326 + }, + { + "epoch": 0.7995288193923156, + "grad_norm": 0.07337725907564163, + "learning_rate": 1.7209330943698644e-05, + "loss": 0.5993421673774719, + "step": 4327 + }, + { + "epoch": 0.7997135961012114, + "grad_norm": 0.07059329003095627, + "learning_rate": 1.720794831461587e-05, + "loss": 0.6256791949272156, + "step": 4328 + }, + { + "epoch": 0.7998983728101073, + "grad_norm": 0.07519291341304779, + "learning_rate": 1.7206565398679306e-05, + "loss": 0.5259360074996948, + "step": 4329 + }, + { + "epoch": 0.8000831495190032, + "grad_norm": 0.06302302330732346, + "learning_rate": 1.7205182195943983e-05, + "loss": 0.4448622763156891, + "step": 4330 + }, + { + "epoch": 0.800267926227899, + "grad_norm": 0.08149128407239914, + "learning_rate": 1.720379870646495e-05, + "loss": 0.47039729356765747, + "step": 4331 + }, + { + "epoch": 0.8004527029367948, + "grad_norm": 0.0848081186413765, + "learning_rate": 1.720241493029727e-05, + "loss": 0.7555239200592041, + "step": 4332 + }, + { + "epoch": 0.8006374796456907, + "grad_norm": 0.05285144969820976, + "learning_rate": 1.7201030867496005e-05, + "loss": 0.41368257999420166, + "step": 4333 + }, + { + "epoch": 0.8008222563545865, + "grad_norm": 0.07442737370729446, + "learning_rate": 1.7199646518116243e-05, + "loss": 0.7890750765800476, + "step": 4334 + }, + { + "epoch": 0.8010070330634823, + "grad_norm": 0.06920800358057022, + "learning_rate": 1.7198261882213073e-05, + "loss": 0.5713163018226624, + "step": 4335 + }, + { + "epoch": 0.8011918097723781, + "grad_norm": 0.07392875850200653, + "learning_rate": 1.7196876959841607e-05, + "loss": 0.5265414118766785, + "step": 4336 + }, + { + "epoch": 0.8013765864812741, + "grad_norm": 0.07111220806837082, + "learning_rate": 1.719549175105695e-05, + "loss": 0.6268044710159302, + "step": 4337 + }, + { + "epoch": 0.8015613631901699, + "grad_norm": 0.06094978004693985, + "learning_rate": 1.719410625591423e-05, + "loss": 0.5070362091064453, + "step": 4338 + }, + { + "epoch": 0.8017461398990657, + "grad_norm": 0.06945740431547165, + "learning_rate": 1.7192720474468592e-05, + "loss": 0.47703972458839417, + "step": 4339 + }, + { + "epoch": 0.8019309166079616, + "grad_norm": 0.08109176903963089, + "learning_rate": 1.719133440677518e-05, + "loss": 1.0706208944320679, + "step": 4340 + }, + { + "epoch": 0.8021156933168574, + "grad_norm": 0.0658700242638588, + "learning_rate": 1.7189948052889155e-05, + "loss": 0.5788075923919678, + "step": 4341 + }, + { + "epoch": 0.8023004700257532, + "grad_norm": 0.08159137517213821, + "learning_rate": 1.71885614128657e-05, + "loss": 0.742228627204895, + "step": 4342 + }, + { + "epoch": 0.802485246734649, + "grad_norm": 0.08924289792776108, + "learning_rate": 1.7187174486759985e-05, + "loss": 0.7699382305145264, + "step": 4343 + }, + { + "epoch": 0.802670023443545, + "grad_norm": 0.07061409950256348, + "learning_rate": 1.7185787274627213e-05, + "loss": 0.47217515110969543, + "step": 4344 + }, + { + "epoch": 0.8028548001524408, + "grad_norm": 0.07241703569889069, + "learning_rate": 1.7184399776522586e-05, + "loss": 0.6893797516822815, + "step": 4345 + }, + { + "epoch": 0.8030395768613366, + "grad_norm": 0.05688874423503876, + "learning_rate": 1.718301199250133e-05, + "loss": 0.43385496735572815, + "step": 4346 + }, + { + "epoch": 0.8032243535702325, + "grad_norm": 0.0645657479763031, + "learning_rate": 1.7181623922618665e-05, + "loss": 0.6052931547164917, + "step": 4347 + }, + { + "epoch": 0.8034091302791283, + "grad_norm": 0.07200708985328674, + "learning_rate": 1.7180235566929835e-05, + "loss": 0.6070054769515991, + "step": 4348 + }, + { + "epoch": 0.8035939069880241, + "grad_norm": 0.08863953500986099, + "learning_rate": 1.71788469254901e-05, + "loss": 0.8176451325416565, + "step": 4349 + }, + { + "epoch": 0.80377868369692, + "grad_norm": 0.07187061756849289, + "learning_rate": 1.717745799835471e-05, + "loss": 0.6840246319770813, + "step": 4350 + }, + { + "epoch": 0.8039634604058159, + "grad_norm": 0.06401456892490387, + "learning_rate": 1.7176068785578954e-05, + "loss": 0.42584607005119324, + "step": 4351 + }, + { + "epoch": 0.8041482371147117, + "grad_norm": 0.068192258477211, + "learning_rate": 1.7174679287218108e-05, + "loss": 0.5894313454627991, + "step": 4352 + }, + { + "epoch": 0.8043330138236076, + "grad_norm": 0.07762222737073898, + "learning_rate": 1.7173289503327472e-05, + "loss": 0.608502209186554, + "step": 4353 + }, + { + "epoch": 0.8045177905325034, + "grad_norm": 0.07506255060434341, + "learning_rate": 1.7171899433962356e-05, + "loss": 0.6749979257583618, + "step": 4354 + }, + { + "epoch": 0.8047025672413992, + "grad_norm": 0.07582836598157883, + "learning_rate": 1.7170509079178084e-05, + "loss": 0.7523781061172485, + "step": 4355 + }, + { + "epoch": 0.804887343950295, + "grad_norm": 0.06109226122498512, + "learning_rate": 1.716911843902998e-05, + "loss": 0.591269314289093, + "step": 4356 + }, + { + "epoch": 0.8050721206591909, + "grad_norm": 0.0685863196849823, + "learning_rate": 1.7167727513573395e-05, + "loss": 0.5930875539779663, + "step": 4357 + }, + { + "epoch": 0.8052568973680867, + "grad_norm": 0.09155690670013428, + "learning_rate": 1.716633630286368e-05, + "loss": 0.8301301598548889, + "step": 4358 + }, + { + "epoch": 0.8054416740769826, + "grad_norm": 0.06751079857349396, + "learning_rate": 1.71649448069562e-05, + "loss": 0.47847267985343933, + "step": 4359 + }, + { + "epoch": 0.8056264507858785, + "grad_norm": 0.06919397413730621, + "learning_rate": 1.716355302590633e-05, + "loss": 0.547621488571167, + "step": 4360 + }, + { + "epoch": 0.8058112274947743, + "grad_norm": 0.08696835488080978, + "learning_rate": 1.7162160959769462e-05, + "loss": 0.7215811610221863, + "step": 4361 + }, + { + "epoch": 0.8059960042036701, + "grad_norm": 0.07177529484033585, + "learning_rate": 1.7160768608601e-05, + "loss": 0.5491442680358887, + "step": 4362 + }, + { + "epoch": 0.806180780912566, + "grad_norm": 0.07802259176969528, + "learning_rate": 1.7159375972456343e-05, + "loss": 0.8298656344413757, + "step": 4363 + }, + { + "epoch": 0.8063655576214618, + "grad_norm": 0.0680144727230072, + "learning_rate": 1.7157983051390926e-05, + "loss": 0.5937074422836304, + "step": 4364 + }, + { + "epoch": 0.8065503343303576, + "grad_norm": 0.08034884929656982, + "learning_rate": 1.7156589845460177e-05, + "loss": 0.6708976030349731, + "step": 4365 + }, + { + "epoch": 0.8067351110392535, + "grad_norm": 0.0692923367023468, + "learning_rate": 1.7155196354719543e-05, + "loss": 0.4966511130332947, + "step": 4366 + }, + { + "epoch": 0.8069198877481494, + "grad_norm": 0.06150248274207115, + "learning_rate": 1.715380257922448e-05, + "loss": 0.5387994647026062, + "step": 4367 + }, + { + "epoch": 0.8071046644570452, + "grad_norm": 0.06653249263763428, + "learning_rate": 1.7152408519030457e-05, + "loss": 0.49013054370880127, + "step": 4368 + }, + { + "epoch": 0.807289441165941, + "grad_norm": 0.06323239952325821, + "learning_rate": 1.715101417419295e-05, + "loss": 0.4928307831287384, + "step": 4369 + }, + { + "epoch": 0.8074742178748369, + "grad_norm": 0.07590825110673904, + "learning_rate": 1.7149619544767452e-05, + "loss": 0.7191093564033508, + "step": 4370 + }, + { + "epoch": 0.8076589945837327, + "grad_norm": 0.07575014978647232, + "learning_rate": 1.7148224630809463e-05, + "loss": 0.6842823624610901, + "step": 4371 + }, + { + "epoch": 0.8078437712926285, + "grad_norm": 0.058443885296583176, + "learning_rate": 1.71468294323745e-05, + "loss": 0.43093398213386536, + "step": 4372 + }, + { + "epoch": 0.8080285480015245, + "grad_norm": 0.06890040636062622, + "learning_rate": 1.7145433949518083e-05, + "loss": 0.7078091502189636, + "step": 4373 + }, + { + "epoch": 0.8082133247104203, + "grad_norm": 0.056887783110141754, + "learning_rate": 1.7144038182295752e-05, + "loss": 0.39555490016937256, + "step": 4374 + }, + { + "epoch": 0.8083981014193161, + "grad_norm": 0.08662290126085281, + "learning_rate": 1.7142642130763048e-05, + "loss": 0.7448327541351318, + "step": 4375 + }, + { + "epoch": 0.808582878128212, + "grad_norm": 0.07885993272066116, + "learning_rate": 1.714124579497554e-05, + "loss": 0.6442118287086487, + "step": 4376 + }, + { + "epoch": 0.8087676548371078, + "grad_norm": 0.06210273504257202, + "learning_rate": 1.7139849174988786e-05, + "loss": 0.5477867126464844, + "step": 4377 + }, + { + "epoch": 0.8089524315460036, + "grad_norm": 0.06308577209711075, + "learning_rate": 1.7138452270858376e-05, + "loss": 0.5083187222480774, + "step": 4378 + }, + { + "epoch": 0.8091372082548994, + "grad_norm": 0.0746660828590393, + "learning_rate": 1.7137055082639898e-05, + "loss": 0.5498301982879639, + "step": 4379 + }, + { + "epoch": 0.8093219849637953, + "grad_norm": 0.08194633573293686, + "learning_rate": 1.7135657610388955e-05, + "loss": 0.7917114496231079, + "step": 4380 + }, + { + "epoch": 0.8095067616726912, + "grad_norm": 0.0716506838798523, + "learning_rate": 1.7134259854161164e-05, + "loss": 0.5995321869850159, + "step": 4381 + }, + { + "epoch": 0.809691538381587, + "grad_norm": 0.06110651046037674, + "learning_rate": 1.7132861814012154e-05, + "loss": 0.5288779735565186, + "step": 4382 + }, + { + "epoch": 0.8098763150904829, + "grad_norm": 0.07181107252836227, + "learning_rate": 1.7131463489997558e-05, + "loss": 0.6976112127304077, + "step": 4383 + }, + { + "epoch": 0.8100610917993787, + "grad_norm": 0.07864098995923996, + "learning_rate": 1.7130064882173026e-05, + "loss": 0.6208000779151917, + "step": 4384 + }, + { + "epoch": 0.8102458685082745, + "grad_norm": 0.07953057438135147, + "learning_rate": 1.712866599059422e-05, + "loss": 0.7169456481933594, + "step": 4385 + }, + { + "epoch": 0.8104306452171703, + "grad_norm": 0.0593542642891407, + "learning_rate": 1.712726681531681e-05, + "loss": 0.4753279685974121, + "step": 4386 + }, + { + "epoch": 0.8106154219260662, + "grad_norm": 0.07157573103904724, + "learning_rate": 1.7125867356396476e-05, + "loss": 0.5962673425674438, + "step": 4387 + }, + { + "epoch": 0.8108001986349621, + "grad_norm": 0.08001889288425446, + "learning_rate": 1.712446761388892e-05, + "loss": 0.6433578729629517, + "step": 4388 + }, + { + "epoch": 0.8109849753438579, + "grad_norm": 0.07539349794387817, + "learning_rate": 1.712306758784984e-05, + "loss": 0.6595147848129272, + "step": 4389 + }, + { + "epoch": 0.8111697520527538, + "grad_norm": 0.0677788108587265, + "learning_rate": 1.7121667278334954e-05, + "loss": 0.5246864557266235, + "step": 4390 + }, + { + "epoch": 0.8113545287616496, + "grad_norm": 0.07340111583471298, + "learning_rate": 1.7120266685399992e-05, + "loss": 0.6696658730506897, + "step": 4391 + }, + { + "epoch": 0.8115393054705454, + "grad_norm": 0.08444202691316605, + "learning_rate": 1.7118865809100695e-05, + "loss": 0.649530291557312, + "step": 4392 + }, + { + "epoch": 0.8117240821794413, + "grad_norm": 0.06394477933645248, + "learning_rate": 1.7117464649492807e-05, + "loss": 0.5001380443572998, + "step": 4393 + }, + { + "epoch": 0.8119088588883371, + "grad_norm": 0.058860085904598236, + "learning_rate": 1.7116063206632098e-05, + "loss": 0.4476807415485382, + "step": 4394 + }, + { + "epoch": 0.812093635597233, + "grad_norm": 0.06653957813978195, + "learning_rate": 1.711466148057433e-05, + "loss": 0.6362738609313965, + "step": 4395 + }, + { + "epoch": 0.8122784123061288, + "grad_norm": 0.08320585638284683, + "learning_rate": 1.7113259471375304e-05, + "loss": 0.829651951789856, + "step": 4396 + }, + { + "epoch": 0.8124631890150247, + "grad_norm": 0.05259089916944504, + "learning_rate": 1.71118571790908e-05, + "loss": 0.46883735060691833, + "step": 4397 + }, + { + "epoch": 0.8126479657239205, + "grad_norm": 0.07715387642383575, + "learning_rate": 1.7110454603776627e-05, + "loss": 0.6277435421943665, + "step": 4398 + }, + { + "epoch": 0.8128327424328163, + "grad_norm": 0.0638882964849472, + "learning_rate": 1.7109051745488613e-05, + "loss": 0.4864892363548279, + "step": 4399 + }, + { + "epoch": 0.8130175191417122, + "grad_norm": 0.07390380650758743, + "learning_rate": 1.7107648604282574e-05, + "loss": 0.5733029842376709, + "step": 4400 + }, + { + "epoch": 0.813202295850608, + "grad_norm": 0.07391858845949173, + "learning_rate": 1.7106245180214363e-05, + "loss": 0.6999667882919312, + "step": 4401 + }, + { + "epoch": 0.8133870725595038, + "grad_norm": 0.06986992061138153, + "learning_rate": 1.7104841473339827e-05, + "loss": 0.5703665614128113, + "step": 4402 + }, + { + "epoch": 0.8135718492683998, + "grad_norm": 0.0572233721613884, + "learning_rate": 1.710343748371482e-05, + "loss": 0.4385322332382202, + "step": 4403 + }, + { + "epoch": 0.8137566259772956, + "grad_norm": 0.07142923772335052, + "learning_rate": 1.7102033211395236e-05, + "loss": 0.6870672106742859, + "step": 4404 + }, + { + "epoch": 0.8139414026861914, + "grad_norm": 0.07860967516899109, + "learning_rate": 1.7100628656436944e-05, + "loss": 0.7508385181427002, + "step": 4405 + }, + { + "epoch": 0.8141261793950872, + "grad_norm": 0.084261454641819, + "learning_rate": 1.7099223818895848e-05, + "loss": 0.5924925804138184, + "step": 4406 + }, + { + "epoch": 0.8143109561039831, + "grad_norm": 0.05773601308465004, + "learning_rate": 1.7097818698827853e-05, + "loss": 0.5639837980270386, + "step": 4407 + }, + { + "epoch": 0.8144957328128789, + "grad_norm": 0.06550680100917816, + "learning_rate": 1.709641329628888e-05, + "loss": 0.5971671342849731, + "step": 4408 + }, + { + "epoch": 0.8146805095217747, + "grad_norm": 0.058647219091653824, + "learning_rate": 1.709500761133486e-05, + "loss": 0.5427902936935425, + "step": 4409 + }, + { + "epoch": 0.8148652862306707, + "grad_norm": 0.0689467191696167, + "learning_rate": 1.7093601644021736e-05, + "loss": 0.7049939036369324, + "step": 4410 + }, + { + "epoch": 0.8150500629395665, + "grad_norm": 0.08845806121826172, + "learning_rate": 1.7092195394405457e-05, + "loss": 0.7434052228927612, + "step": 4411 + }, + { + "epoch": 0.8152348396484623, + "grad_norm": 0.06892676651477814, + "learning_rate": 1.7090788862541995e-05, + "loss": 0.5471463203430176, + "step": 4412 + }, + { + "epoch": 0.8154196163573582, + "grad_norm": 0.053055547177791595, + "learning_rate": 1.7089382048487314e-05, + "loss": 0.4262996315956116, + "step": 4413 + }, + { + "epoch": 0.815604393066254, + "grad_norm": 0.07068893313407898, + "learning_rate": 1.708797495229741e-05, + "loss": 0.6667650938034058, + "step": 4414 + }, + { + "epoch": 0.8157891697751498, + "grad_norm": 0.0889437347650528, + "learning_rate": 1.7086567574028282e-05, + "loss": 0.7486896514892578, + "step": 4415 + }, + { + "epoch": 0.8159739464840456, + "grad_norm": 0.06847400963306427, + "learning_rate": 1.708515991373593e-05, + "loss": 0.5367305874824524, + "step": 4416 + }, + { + "epoch": 0.8161587231929416, + "grad_norm": 0.08452947437763214, + "learning_rate": 1.708375197147638e-05, + "loss": 0.827254593372345, + "step": 4417 + }, + { + "epoch": 0.8163434999018374, + "grad_norm": 0.06215844675898552, + "learning_rate": 1.7082343747305668e-05, + "loss": 0.5081216096878052, + "step": 4418 + }, + { + "epoch": 0.8165282766107332, + "grad_norm": 0.08681292086839676, + "learning_rate": 1.7080935241279832e-05, + "loss": 0.8006658554077148, + "step": 4419 + }, + { + "epoch": 0.8167130533196291, + "grad_norm": 0.05513704940676689, + "learning_rate": 1.7079526453454923e-05, + "loss": 0.55133455991745, + "step": 4420 + }, + { + "epoch": 0.8168978300285249, + "grad_norm": 0.08391734212636948, + "learning_rate": 1.7078117383887016e-05, + "loss": 0.7561026811599731, + "step": 4421 + }, + { + "epoch": 0.8170826067374207, + "grad_norm": 0.06949673593044281, + "learning_rate": 1.7076708032632175e-05, + "loss": 0.5683756470680237, + "step": 4422 + }, + { + "epoch": 0.8172673834463166, + "grad_norm": 0.06849777698516846, + "learning_rate": 1.70752983997465e-05, + "loss": 0.5504254102706909, + "step": 4423 + }, + { + "epoch": 0.8174521601552124, + "grad_norm": 0.08198640495538712, + "learning_rate": 1.7073888485286083e-05, + "loss": 0.7536360025405884, + "step": 4424 + }, + { + "epoch": 0.8176369368641083, + "grad_norm": 0.07069296389818192, + "learning_rate": 1.7072478289307037e-05, + "loss": 0.607262372970581, + "step": 4425 + }, + { + "epoch": 0.8178217135730041, + "grad_norm": 0.08360360562801361, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.7798020243644714, + "step": 4426 + }, + { + "epoch": 0.8180064902819, + "grad_norm": 0.077246755361557, + "learning_rate": 1.7069657053017543e-05, + "loss": 0.7054858207702637, + "step": 4427 + }, + { + "epoch": 0.8181912669907958, + "grad_norm": 0.0620933473110199, + "learning_rate": 1.706824601281938e-05, + "loss": 0.45810335874557495, + "step": 4428 + }, + { + "epoch": 0.8183760436996916, + "grad_norm": 0.06633087247610092, + "learning_rate": 1.7066834691327133e-05, + "loss": 0.6133204698562622, + "step": 4429 + }, + { + "epoch": 0.8185608204085875, + "grad_norm": 0.06926491856575012, + "learning_rate": 1.706542308859698e-05, + "loss": 0.4294852018356323, + "step": 4430 + }, + { + "epoch": 0.8187455971174833, + "grad_norm": 0.06534317135810852, + "learning_rate": 1.7064011204685093e-05, + "loss": 0.5917895436286926, + "step": 4431 + }, + { + "epoch": 0.8189303738263792, + "grad_norm": 0.06398814916610718, + "learning_rate": 1.7062599039647656e-05, + "loss": 0.5340343713760376, + "step": 4432 + }, + { + "epoch": 0.8191151505352751, + "grad_norm": 0.06289204210042953, + "learning_rate": 1.7061186593540876e-05, + "loss": 0.5285554528236389, + "step": 4433 + }, + { + "epoch": 0.8192999272441709, + "grad_norm": 0.0706745833158493, + "learning_rate": 1.705977386642096e-05, + "loss": 0.6111737489700317, + "step": 4434 + }, + { + "epoch": 0.8194847039530667, + "grad_norm": 0.07645706087350845, + "learning_rate": 1.705836085834413e-05, + "loss": 0.7045507431030273, + "step": 4435 + }, + { + "epoch": 0.8196694806619625, + "grad_norm": 0.08695833384990692, + "learning_rate": 1.7056947569366624e-05, + "loss": 0.614539384841919, + "step": 4436 + }, + { + "epoch": 0.8198542573708584, + "grad_norm": 0.07618943601846695, + "learning_rate": 1.7055533999544682e-05, + "loss": 0.6795693635940552, + "step": 4437 + }, + { + "epoch": 0.8200390340797542, + "grad_norm": 0.09049010276794434, + "learning_rate": 1.705412014893456e-05, + "loss": 0.7961665391921997, + "step": 4438 + }, + { + "epoch": 0.8202238107886501, + "grad_norm": 0.06798869371414185, + "learning_rate": 1.705270601759253e-05, + "loss": 0.6422942280769348, + "step": 4439 + }, + { + "epoch": 0.820408587497546, + "grad_norm": 0.08138899505138397, + "learning_rate": 1.705129160557486e-05, + "loss": 0.6776303052902222, + "step": 4440 + }, + { + "epoch": 0.8205933642064418, + "grad_norm": 0.07137871533632278, + "learning_rate": 1.7049876912937845e-05, + "loss": 0.5417454838752747, + "step": 4441 + }, + { + "epoch": 0.8207781409153376, + "grad_norm": 0.08167102932929993, + "learning_rate": 1.7048461939737787e-05, + "loss": 0.7033447027206421, + "step": 4442 + }, + { + "epoch": 0.8209629176242335, + "grad_norm": 0.0888083353638649, + "learning_rate": 1.7047046686030995e-05, + "loss": 0.6298259496688843, + "step": 4443 + }, + { + "epoch": 0.8211476943331293, + "grad_norm": 0.07381979376077652, + "learning_rate": 1.7045631151873793e-05, + "loss": 0.6119664907455444, + "step": 4444 + }, + { + "epoch": 0.8213324710420251, + "grad_norm": 0.08775873482227325, + "learning_rate": 1.7044215337322512e-05, + "loss": 0.697877824306488, + "step": 4445 + }, + { + "epoch": 0.8215172477509209, + "grad_norm": 0.08506224304437637, + "learning_rate": 1.70427992424335e-05, + "loss": 0.7121521234512329, + "step": 4446 + }, + { + "epoch": 0.8217020244598169, + "grad_norm": 0.06776197999715805, + "learning_rate": 1.7041382867263116e-05, + "loss": 0.587286114692688, + "step": 4447 + }, + { + "epoch": 0.8218868011687127, + "grad_norm": 0.05220887064933777, + "learning_rate": 1.703996621186772e-05, + "loss": 0.5562371015548706, + "step": 4448 + }, + { + "epoch": 0.8220715778776085, + "grad_norm": 0.07543464750051498, + "learning_rate": 1.7038549276303693e-05, + "loss": 0.7780980467796326, + "step": 4449 + }, + { + "epoch": 0.8222563545865044, + "grad_norm": 0.07507284730672836, + "learning_rate": 1.7037132060627428e-05, + "loss": 0.4695199728012085, + "step": 4450 + }, + { + "epoch": 0.8224411312954002, + "grad_norm": 0.04930936545133591, + "learning_rate": 1.7035714564895324e-05, + "loss": 0.4211044907569885, + "step": 4451 + }, + { + "epoch": 0.822625908004296, + "grad_norm": 0.06219407171010971, + "learning_rate": 1.7034296789163788e-05, + "loss": 0.4526977241039276, + "step": 4452 + }, + { + "epoch": 0.8228106847131919, + "grad_norm": 0.09974870830774307, + "learning_rate": 1.7032878733489252e-05, + "loss": 0.8299565315246582, + "step": 4453 + }, + { + "epoch": 0.8229954614220878, + "grad_norm": 0.0561041384935379, + "learning_rate": 1.7031460397928142e-05, + "loss": 0.427130788564682, + "step": 4454 + }, + { + "epoch": 0.8231802381309836, + "grad_norm": 0.0706462636590004, + "learning_rate": 1.703004178253691e-05, + "loss": 0.654176652431488, + "step": 4455 + }, + { + "epoch": 0.8233650148398794, + "grad_norm": 0.07966897636651993, + "learning_rate": 1.7028622887372005e-05, + "loss": 0.5644944310188293, + "step": 4456 + }, + { + "epoch": 0.8235497915487753, + "grad_norm": 0.061665527522563934, + "learning_rate": 1.7027203712489902e-05, + "loss": 0.5428031086921692, + "step": 4457 + }, + { + "epoch": 0.8237345682576711, + "grad_norm": 0.05557411164045334, + "learning_rate": 1.7025784257947077e-05, + "loss": 0.3972078561782837, + "step": 4458 + }, + { + "epoch": 0.8239193449665669, + "grad_norm": 0.0654132142663002, + "learning_rate": 1.7024364523800015e-05, + "loss": 0.6011388897895813, + "step": 4459 + }, + { + "epoch": 0.8241041216754628, + "grad_norm": 0.07930979877710342, + "learning_rate": 1.7022944510105226e-05, + "loss": 0.6692883968353271, + "step": 4460 + }, + { + "epoch": 0.8242888983843587, + "grad_norm": 0.0695558562874794, + "learning_rate": 1.7021524216919217e-05, + "loss": 0.6444147825241089, + "step": 4461 + }, + { + "epoch": 0.8244736750932545, + "grad_norm": 0.08044788986444473, + "learning_rate": 1.702010364429851e-05, + "loss": 0.6611325740814209, + "step": 4462 + }, + { + "epoch": 0.8246584518021504, + "grad_norm": 0.06428727507591248, + "learning_rate": 1.7018682792299644e-05, + "loss": 0.5038701891899109, + "step": 4463 + }, + { + "epoch": 0.8248432285110462, + "grad_norm": 0.08757028728723526, + "learning_rate": 1.701726166097916e-05, + "loss": 0.7276875972747803, + "step": 4464 + }, + { + "epoch": 0.825028005219942, + "grad_norm": 0.07670968770980835, + "learning_rate": 1.7015840250393615e-05, + "loss": 0.6314493417739868, + "step": 4465 + }, + { + "epoch": 0.8252127819288378, + "grad_norm": 0.08930184692144394, + "learning_rate": 1.7014418560599578e-05, + "loss": 0.6691626310348511, + "step": 4466 + }, + { + "epoch": 0.8253975586377337, + "grad_norm": 0.08691312372684479, + "learning_rate": 1.7012996591653625e-05, + "loss": 0.8360946774482727, + "step": 4467 + }, + { + "epoch": 0.8255823353466295, + "grad_norm": 0.0800599530339241, + "learning_rate": 1.7011574343612353e-05, + "loss": 0.754214346408844, + "step": 4468 + }, + { + "epoch": 0.8257671120555254, + "grad_norm": 0.0611540786921978, + "learning_rate": 1.7010151816532356e-05, + "loss": 0.5543545484542847, + "step": 4469 + }, + { + "epoch": 0.8259518887644213, + "grad_norm": 0.06635784357786179, + "learning_rate": 1.700872901047025e-05, + "loss": 0.5271308422088623, + "step": 4470 + }, + { + "epoch": 0.8261366654733171, + "grad_norm": 0.061269983649253845, + "learning_rate": 1.7007305925482658e-05, + "loss": 0.4760308563709259, + "step": 4471 + }, + { + "epoch": 0.8263214421822129, + "grad_norm": 0.06325298547744751, + "learning_rate": 1.700588256162621e-05, + "loss": 0.5194886326789856, + "step": 4472 + }, + { + "epoch": 0.8265062188911088, + "grad_norm": 0.0797019973397255, + "learning_rate": 1.7004458918957555e-05, + "loss": 0.5213291645050049, + "step": 4473 + }, + { + "epoch": 0.8266909956000046, + "grad_norm": 0.06251664459705353, + "learning_rate": 1.700303499753335e-05, + "loss": 0.4912989139556885, + "step": 4474 + }, + { + "epoch": 0.8268757723089004, + "grad_norm": 0.06516954302787781, + "learning_rate": 1.7001610797410265e-05, + "loss": 0.4361167252063751, + "step": 4475 + }, + { + "epoch": 0.8270605490177964, + "grad_norm": 0.07414435595273972, + "learning_rate": 1.700018631864497e-05, + "loss": 0.615641176700592, + "step": 4476 + }, + { + "epoch": 0.8272453257266922, + "grad_norm": 0.09901707619428635, + "learning_rate": 1.6998761561294162e-05, + "loss": 0.8456782102584839, + "step": 4477 + }, + { + "epoch": 0.827430102435588, + "grad_norm": 0.060959070920944214, + "learning_rate": 1.6997336525414543e-05, + "loss": 0.5491607189178467, + "step": 4478 + }, + { + "epoch": 0.8276148791444838, + "grad_norm": 0.061658356338739395, + "learning_rate": 1.699591121106282e-05, + "loss": 0.43196821212768555, + "step": 4479 + }, + { + "epoch": 0.8277996558533797, + "grad_norm": 0.07034077495336533, + "learning_rate": 1.699448561829572e-05, + "loss": 0.6002528667449951, + "step": 4480 + }, + { + "epoch": 0.8279844325622755, + "grad_norm": 0.07465361058712006, + "learning_rate": 1.6993059747169975e-05, + "loss": 0.6691159605979919, + "step": 4481 + }, + { + "epoch": 0.8281692092711713, + "grad_norm": 0.0629972591996193, + "learning_rate": 1.6991633597742327e-05, + "loss": 0.6098475456237793, + "step": 4482 + }, + { + "epoch": 0.8283539859800673, + "grad_norm": 0.08248724043369293, + "learning_rate": 1.6990207170069536e-05, + "loss": 0.7085081934928894, + "step": 4483 + }, + { + "epoch": 0.8285387626889631, + "grad_norm": 0.073788121342659, + "learning_rate": 1.698878046420837e-05, + "loss": 0.5459364652633667, + "step": 4484 + }, + { + "epoch": 0.8287235393978589, + "grad_norm": 0.07505763322114944, + "learning_rate": 1.6987353480215613e-05, + "loss": 0.7524222135543823, + "step": 4485 + }, + { + "epoch": 0.8289083161067548, + "grad_norm": 0.07289927452802658, + "learning_rate": 1.698592621814804e-05, + "loss": 0.683181643486023, + "step": 4486 + }, + { + "epoch": 0.8290930928156506, + "grad_norm": 0.052059952169656754, + "learning_rate": 1.6984498678062462e-05, + "loss": 0.4803551137447357, + "step": 4487 + }, + { + "epoch": 0.8292778695245464, + "grad_norm": 0.09347348660230637, + "learning_rate": 1.698307086001569e-05, + "loss": 0.7751423716545105, + "step": 4488 + }, + { + "epoch": 0.8294626462334422, + "grad_norm": 0.05178924649953842, + "learning_rate": 1.6981642764064544e-05, + "loss": 0.4005620777606964, + "step": 4489 + }, + { + "epoch": 0.8296474229423381, + "grad_norm": 0.0625031441450119, + "learning_rate": 1.698021439026586e-05, + "loss": 0.472150593996048, + "step": 4490 + }, + { + "epoch": 0.829832199651234, + "grad_norm": 0.0583854503929615, + "learning_rate": 1.697878573867648e-05, + "loss": 0.43358004093170166, + "step": 4491 + }, + { + "epoch": 0.8300169763601298, + "grad_norm": 0.05779613181948662, + "learning_rate": 1.6977356809353263e-05, + "loss": 0.5184218883514404, + "step": 4492 + }, + { + "epoch": 0.8302017530690257, + "grad_norm": 0.07386235892772675, + "learning_rate": 1.697592760235307e-05, + "loss": 0.5868077874183655, + "step": 4493 + }, + { + "epoch": 0.8303865297779215, + "grad_norm": 0.06986936181783676, + "learning_rate": 1.6974498117732788e-05, + "loss": 0.49657654762268066, + "step": 4494 + }, + { + "epoch": 0.8305713064868173, + "grad_norm": 0.07015872746706009, + "learning_rate": 1.69730683555493e-05, + "loss": 0.6315212249755859, + "step": 4495 + }, + { + "epoch": 0.8307560831957131, + "grad_norm": 0.06291679292917252, + "learning_rate": 1.6971638315859507e-05, + "loss": 0.4786362648010254, + "step": 4496 + }, + { + "epoch": 0.830940859904609, + "grad_norm": 0.07277002930641174, + "learning_rate": 1.697020799872032e-05, + "loss": 0.6787610650062561, + "step": 4497 + }, + { + "epoch": 0.8311256366135049, + "grad_norm": 0.08893369883298874, + "learning_rate": 1.6968777404188662e-05, + "loss": 0.5605788230895996, + "step": 4498 + }, + { + "epoch": 0.8313104133224007, + "grad_norm": 0.06755659729242325, + "learning_rate": 1.6967346532321466e-05, + "loss": 0.5297644138336182, + "step": 4499 + }, + { + "epoch": 0.8314951900312966, + "grad_norm": 0.05591427907347679, + "learning_rate": 1.6965915383175676e-05, + "loss": 0.3980846703052521, + "step": 4500 + }, + { + "epoch": 0.8314951900312966, + "eval_loss": 0.6395584940910339, + "eval_runtime": 158.8241, + "eval_samples_per_second": 114.775, + "eval_steps_per_second": 14.349, + "step": 4500 + }, + { + "epoch": 0.8316799667401924, + "grad_norm": 0.06723398715257645, + "learning_rate": 1.6964483956808248e-05, + "loss": 0.537386953830719, + "step": 4501 + }, + { + "epoch": 0.8318647434490882, + "grad_norm": 0.058650754392147064, + "learning_rate": 1.6963052253276145e-05, + "loss": 0.5143908262252808, + "step": 4502 + }, + { + "epoch": 0.8320495201579841, + "grad_norm": 0.09709373861551285, + "learning_rate": 1.6961620272636346e-05, + "loss": 0.8258609771728516, + "step": 4503 + }, + { + "epoch": 0.8322342968668799, + "grad_norm": 0.0741024762392044, + "learning_rate": 1.6960188014945847e-05, + "loss": 0.6264018416404724, + "step": 4504 + }, + { + "epoch": 0.8324190735757758, + "grad_norm": 0.06891115009784698, + "learning_rate": 1.6958755480261638e-05, + "loss": 0.6287631988525391, + "step": 4505 + }, + { + "epoch": 0.8326038502846717, + "grad_norm": 0.06442379951477051, + "learning_rate": 1.695732266864073e-05, + "loss": 0.5368323922157288, + "step": 4506 + }, + { + "epoch": 0.8327886269935675, + "grad_norm": 0.08387748897075653, + "learning_rate": 1.6955889580140145e-05, + "loss": 0.8382667899131775, + "step": 4507 + }, + { + "epoch": 0.8329734037024633, + "grad_norm": 0.07572843134403229, + "learning_rate": 1.6954456214816918e-05, + "loss": 0.6271460652351379, + "step": 4508 + }, + { + "epoch": 0.8331581804113591, + "grad_norm": 0.06325176358222961, + "learning_rate": 1.6953022572728095e-05, + "loss": 0.6080461740493774, + "step": 4509 + }, + { + "epoch": 0.833342957120255, + "grad_norm": 0.07202797383069992, + "learning_rate": 1.6951588653930722e-05, + "loss": 0.5612244009971619, + "step": 4510 + }, + { + "epoch": 0.8335277338291508, + "grad_norm": 0.07825497537851334, + "learning_rate": 1.6950154458481875e-05, + "loss": 0.615082859992981, + "step": 4511 + }, + { + "epoch": 0.8337125105380466, + "grad_norm": 0.09370280802249908, + "learning_rate": 1.694871998643862e-05, + "loss": 0.7465894222259521, + "step": 4512 + }, + { + "epoch": 0.8338972872469426, + "grad_norm": 0.07400539517402649, + "learning_rate": 1.694728523785805e-05, + "loss": 0.6992408037185669, + "step": 4513 + }, + { + "epoch": 0.8340820639558384, + "grad_norm": 0.05352245643734932, + "learning_rate": 1.6945850212797265e-05, + "loss": 0.4538097381591797, + "step": 4514 + }, + { + "epoch": 0.8342668406647342, + "grad_norm": 0.057059090584516525, + "learning_rate": 1.6944414911313368e-05, + "loss": 0.4463070333003998, + "step": 4515 + }, + { + "epoch": 0.83445161737363, + "grad_norm": 0.06638690829277039, + "learning_rate": 1.694297933346349e-05, + "loss": 0.4994492530822754, + "step": 4516 + }, + { + "epoch": 0.8346363940825259, + "grad_norm": 0.054007966071367264, + "learning_rate": 1.6941543479304748e-05, + "loss": 0.3897664248943329, + "step": 4517 + }, + { + "epoch": 0.8348211707914217, + "grad_norm": 0.07031978666782379, + "learning_rate": 1.69401073488943e-05, + "loss": 0.541957437992096, + "step": 4518 + }, + { + "epoch": 0.8350059475003175, + "grad_norm": 0.06689532101154327, + "learning_rate": 1.6938670942289292e-05, + "loss": 0.6915385723114014, + "step": 4519 + }, + { + "epoch": 0.8351907242092135, + "grad_norm": 0.0767417848110199, + "learning_rate": 1.6937234259546888e-05, + "loss": 0.614142119884491, + "step": 4520 + }, + { + "epoch": 0.8353755009181093, + "grad_norm": 0.06671950221061707, + "learning_rate": 1.693579730072426e-05, + "loss": 0.5127571225166321, + "step": 4521 + }, + { + "epoch": 0.8355602776270051, + "grad_norm": 0.07585155218839645, + "learning_rate": 1.6934360065878603e-05, + "loss": 0.5769263505935669, + "step": 4522 + }, + { + "epoch": 0.835745054335901, + "grad_norm": 0.07272087782621384, + "learning_rate": 1.693292255506711e-05, + "loss": 0.5349190831184387, + "step": 4523 + }, + { + "epoch": 0.8359298310447968, + "grad_norm": 0.062750905752182, + "learning_rate": 1.693148476834699e-05, + "loss": 0.47064346075057983, + "step": 4524 + }, + { + "epoch": 0.8361146077536926, + "grad_norm": 0.0642617717385292, + "learning_rate": 1.693004670577546e-05, + "loss": 0.5044402480125427, + "step": 4525 + }, + { + "epoch": 0.8362993844625884, + "grad_norm": 0.0815463587641716, + "learning_rate": 1.692860836740975e-05, + "loss": 0.7980261445045471, + "step": 4526 + }, + { + "epoch": 0.8364841611714844, + "grad_norm": 0.06477741152048111, + "learning_rate": 1.692716975330711e-05, + "loss": 0.6120722889900208, + "step": 4527 + }, + { + "epoch": 0.8366689378803802, + "grad_norm": 0.0836218073964119, + "learning_rate": 1.6925730863524782e-05, + "loss": 0.720973789691925, + "step": 4528 + }, + { + "epoch": 0.836853714589276, + "grad_norm": 0.07507210969924927, + "learning_rate": 1.6924291698120034e-05, + "loss": 0.5757606625556946, + "step": 4529 + }, + { + "epoch": 0.8370384912981719, + "grad_norm": 0.07866434752941132, + "learning_rate": 1.6922852257150142e-05, + "loss": 0.579961359500885, + "step": 4530 + }, + { + "epoch": 0.8372232680070677, + "grad_norm": 0.07555273920297623, + "learning_rate": 1.6921412540672385e-05, + "loss": 0.636628270149231, + "step": 4531 + }, + { + "epoch": 0.8374080447159635, + "grad_norm": 0.054431378841400146, + "learning_rate": 1.6919972548744068e-05, + "loss": 0.42866355180740356, + "step": 4532 + }, + { + "epoch": 0.8375928214248594, + "grad_norm": 0.07677029073238373, + "learning_rate": 1.691853228142249e-05, + "loss": 0.655987024307251, + "step": 4533 + }, + { + "epoch": 0.8377775981337552, + "grad_norm": 0.08130481839179993, + "learning_rate": 1.6917091738764972e-05, + "loss": 0.7719659805297852, + "step": 4534 + }, + { + "epoch": 0.8379623748426511, + "grad_norm": 0.06564033031463623, + "learning_rate": 1.6915650920828848e-05, + "loss": 0.6413950324058533, + "step": 4535 + }, + { + "epoch": 0.838147151551547, + "grad_norm": 0.07372447848320007, + "learning_rate": 1.691420982767145e-05, + "loss": 0.5301737189292908, + "step": 4536 + }, + { + "epoch": 0.8383319282604428, + "grad_norm": 0.0624161921441555, + "learning_rate": 1.6912768459350132e-05, + "loss": 0.5359398722648621, + "step": 4537 + }, + { + "epoch": 0.8385167049693386, + "grad_norm": 0.07300139218568802, + "learning_rate": 1.6911326815922257e-05, + "loss": 0.6078569293022156, + "step": 4538 + }, + { + "epoch": 0.8387014816782344, + "grad_norm": 0.07421409338712692, + "learning_rate": 1.6909884897445202e-05, + "loss": 0.5118612051010132, + "step": 4539 + }, + { + "epoch": 0.8388862583871303, + "grad_norm": 0.06582600623369217, + "learning_rate": 1.690844270397634e-05, + "loss": 0.5924390554428101, + "step": 4540 + }, + { + "epoch": 0.8390710350960261, + "grad_norm": 0.07796216011047363, + "learning_rate": 1.6907000235573076e-05, + "loss": 0.5968838930130005, + "step": 4541 + }, + { + "epoch": 0.839255811804922, + "grad_norm": 0.06646906584501266, + "learning_rate": 1.690555749229281e-05, + "loss": 0.5918703079223633, + "step": 4542 + }, + { + "epoch": 0.8394405885138179, + "grad_norm": 0.07779476046562195, + "learning_rate": 1.6904114474192962e-05, + "loss": 0.7303890585899353, + "step": 4543 + }, + { + "epoch": 0.8396253652227137, + "grad_norm": 0.07487162947654724, + "learning_rate": 1.6902671181330957e-05, + "loss": 0.6921830177307129, + "step": 4544 + }, + { + "epoch": 0.8398101419316095, + "grad_norm": 0.05664646998047829, + "learning_rate": 1.6901227613764235e-05, + "loss": 0.42495113611221313, + "step": 4545 + }, + { + "epoch": 0.8399949186405054, + "grad_norm": 0.06526501476764679, + "learning_rate": 1.6899783771550247e-05, + "loss": 0.504043459892273, + "step": 4546 + }, + { + "epoch": 0.8401796953494012, + "grad_norm": 0.05720466375350952, + "learning_rate": 1.689833965474645e-05, + "loss": 0.4433921277523041, + "step": 4547 + }, + { + "epoch": 0.840364472058297, + "grad_norm": 0.07096187770366669, + "learning_rate": 1.6896895263410313e-05, + "loss": 0.5995004773139954, + "step": 4548 + }, + { + "epoch": 0.840549248767193, + "grad_norm": 0.0845320001244545, + "learning_rate": 1.6895450597599326e-05, + "loss": 0.764687716960907, + "step": 4549 + }, + { + "epoch": 0.8407340254760888, + "grad_norm": 0.06377948075532913, + "learning_rate": 1.689400565737098e-05, + "loss": 0.488741010427475, + "step": 4550 + }, + { + "epoch": 0.8409188021849846, + "grad_norm": 0.0828799456357956, + "learning_rate": 1.6892560442782775e-05, + "loss": 0.6057949662208557, + "step": 4551 + }, + { + "epoch": 0.8411035788938804, + "grad_norm": 0.08121705800294876, + "learning_rate": 1.689111495389223e-05, + "loss": 0.7909074425697327, + "step": 4552 + }, + { + "epoch": 0.8412883556027763, + "grad_norm": 0.07321442663669586, + "learning_rate": 1.688966919075687e-05, + "loss": 0.6617448329925537, + "step": 4553 + }, + { + "epoch": 0.8414731323116721, + "grad_norm": 0.07743881642818451, + "learning_rate": 1.6888223153434226e-05, + "loss": 0.5645763278007507, + "step": 4554 + }, + { + "epoch": 0.8416579090205679, + "grad_norm": 0.07393790036439896, + "learning_rate": 1.6886776841981856e-05, + "loss": 0.6077877283096313, + "step": 4555 + }, + { + "epoch": 0.8418426857294637, + "grad_norm": 0.06216173246502876, + "learning_rate": 1.6885330256457312e-05, + "loss": 0.5201695561408997, + "step": 4556 + }, + { + "epoch": 0.8420274624383597, + "grad_norm": 0.06612975895404816, + "learning_rate": 1.6883883396918165e-05, + "loss": 0.541587233543396, + "step": 4557 + }, + { + "epoch": 0.8422122391472555, + "grad_norm": 0.07147186994552612, + "learning_rate": 1.6882436263421996e-05, + "loss": 0.5515293478965759, + "step": 4558 + }, + { + "epoch": 0.8423970158561513, + "grad_norm": 0.07946402579545975, + "learning_rate": 1.6880988856026394e-05, + "loss": 0.7292967438697815, + "step": 4559 + }, + { + "epoch": 0.8425817925650472, + "grad_norm": 0.07769119739532471, + "learning_rate": 1.687954117478897e-05, + "loss": 0.6431043148040771, + "step": 4560 + }, + { + "epoch": 0.842766569273943, + "grad_norm": 0.06636208295822144, + "learning_rate": 1.6878093219767324e-05, + "loss": 0.4926970303058624, + "step": 4561 + }, + { + "epoch": 0.8429513459828388, + "grad_norm": 0.0701172798871994, + "learning_rate": 1.6876644991019086e-05, + "loss": 0.6440094709396362, + "step": 4562 + }, + { + "epoch": 0.8431361226917347, + "grad_norm": 0.08018402010202408, + "learning_rate": 1.6875196488601895e-05, + "loss": 0.6849448084831238, + "step": 4563 + }, + { + "epoch": 0.8433208994006306, + "grad_norm": 0.07953750342130661, + "learning_rate": 1.6873747712573395e-05, + "loss": 0.6987465023994446, + "step": 4564 + }, + { + "epoch": 0.8435056761095264, + "grad_norm": 0.08021809160709381, + "learning_rate": 1.6872298662991237e-05, + "loss": 0.7143018841743469, + "step": 4565 + }, + { + "epoch": 0.8436904528184223, + "grad_norm": 0.05858346447348595, + "learning_rate": 1.6870849339913097e-05, + "loss": 0.4057566523551941, + "step": 4566 + }, + { + "epoch": 0.8438752295273181, + "grad_norm": 0.05692509189248085, + "learning_rate": 1.686939974339665e-05, + "loss": 0.4452887773513794, + "step": 4567 + }, + { + "epoch": 0.8440600062362139, + "grad_norm": 0.09390457719564438, + "learning_rate": 1.6867949873499578e-05, + "loss": 0.7715519666671753, + "step": 4568 + }, + { + "epoch": 0.8442447829451097, + "grad_norm": 0.08112477511167526, + "learning_rate": 1.6866499730279592e-05, + "loss": 0.7168828845024109, + "step": 4569 + }, + { + "epoch": 0.8444295596540056, + "grad_norm": 0.07794813811779022, + "learning_rate": 1.68650493137944e-05, + "loss": 0.7800693511962891, + "step": 4570 + }, + { + "epoch": 0.8446143363629015, + "grad_norm": 0.06772364675998688, + "learning_rate": 1.6863598624101723e-05, + "loss": 0.5149499773979187, + "step": 4571 + }, + { + "epoch": 0.8447991130717973, + "grad_norm": 0.08500882238149643, + "learning_rate": 1.6862147661259297e-05, + "loss": 0.7767760157585144, + "step": 4572 + }, + { + "epoch": 0.8449838897806932, + "grad_norm": 0.0779474526643753, + "learning_rate": 1.6860696425324857e-05, + "loss": 0.6483212113380432, + "step": 4573 + }, + { + "epoch": 0.845168666489589, + "grad_norm": 0.08208338171243668, + "learning_rate": 1.6859244916356167e-05, + "loss": 0.6114625930786133, + "step": 4574 + }, + { + "epoch": 0.8453534431984848, + "grad_norm": 0.052994124591350555, + "learning_rate": 1.6857793134410987e-05, + "loss": 0.4159746468067169, + "step": 4575 + }, + { + "epoch": 0.8455382199073807, + "grad_norm": 0.06645326316356659, + "learning_rate": 1.68563410795471e-05, + "loss": 0.3910166621208191, + "step": 4576 + }, + { + "epoch": 0.8457229966162765, + "grad_norm": 0.07535551488399506, + "learning_rate": 1.6854888751822284e-05, + "loss": 0.698360025882721, + "step": 4577 + }, + { + "epoch": 0.8459077733251723, + "grad_norm": 0.06745228171348572, + "learning_rate": 1.6853436151294347e-05, + "loss": 0.45721209049224854, + "step": 4578 + }, + { + "epoch": 0.8460925500340682, + "grad_norm": 0.06511545181274414, + "learning_rate": 1.685198327802109e-05, + "loss": 0.4750843942165375, + "step": 4579 + }, + { + "epoch": 0.8462773267429641, + "grad_norm": 0.06708203256130219, + "learning_rate": 1.6850530132060334e-05, + "loss": 0.5141110420227051, + "step": 4580 + }, + { + "epoch": 0.8464621034518599, + "grad_norm": 0.07462667673826218, + "learning_rate": 1.6849076713469914e-05, + "loss": 0.6454291343688965, + "step": 4581 + }, + { + "epoch": 0.8466468801607557, + "grad_norm": 0.09495879709720612, + "learning_rate": 1.6847623022307664e-05, + "loss": 0.9474629759788513, + "step": 4582 + }, + { + "epoch": 0.8468316568696516, + "grad_norm": 0.06724563241004944, + "learning_rate": 1.6846169058631448e-05, + "loss": 0.6943251490592957, + "step": 4583 + }, + { + "epoch": 0.8470164335785474, + "grad_norm": 0.06822342425584793, + "learning_rate": 1.684471482249912e-05, + "loss": 0.46047502756118774, + "step": 4584 + }, + { + "epoch": 0.8472012102874432, + "grad_norm": 0.07819680124521255, + "learning_rate": 1.6843260313968553e-05, + "loss": 0.6768338680267334, + "step": 4585 + }, + { + "epoch": 0.8473859869963392, + "grad_norm": 0.07070040702819824, + "learning_rate": 1.6841805533097637e-05, + "loss": 0.5495981574058533, + "step": 4586 + }, + { + "epoch": 0.847570763705235, + "grad_norm": 0.0862039178609848, + "learning_rate": 1.684035047994427e-05, + "loss": 0.6363370418548584, + "step": 4587 + }, + { + "epoch": 0.8477555404141308, + "grad_norm": 0.077043317258358, + "learning_rate": 1.683889515456635e-05, + "loss": 0.7591915130615234, + "step": 4588 + }, + { + "epoch": 0.8479403171230266, + "grad_norm": 0.09326723963022232, + "learning_rate": 1.68374395570218e-05, + "loss": 0.7203951478004456, + "step": 4589 + }, + { + "epoch": 0.8481250938319225, + "grad_norm": 0.06428168714046478, + "learning_rate": 1.6835983687368547e-05, + "loss": 0.5105397701263428, + "step": 4590 + }, + { + "epoch": 0.8483098705408183, + "grad_norm": 0.06252838671207428, + "learning_rate": 1.683452754566453e-05, + "loss": 0.6851661205291748, + "step": 4591 + }, + { + "epoch": 0.8484946472497141, + "grad_norm": 0.06116794794797897, + "learning_rate": 1.68330711319677e-05, + "loss": 0.4769090712070465, + "step": 4592 + }, + { + "epoch": 0.8486794239586101, + "grad_norm": 0.07975073158740997, + "learning_rate": 1.6831614446336017e-05, + "loss": 0.756597101688385, + "step": 4593 + }, + { + "epoch": 0.8488642006675059, + "grad_norm": 0.07053950428962708, + "learning_rate": 1.6830157488827453e-05, + "loss": 0.7950809597969055, + "step": 4594 + }, + { + "epoch": 0.8490489773764017, + "grad_norm": 0.049777209758758545, + "learning_rate": 1.682870025949999e-05, + "loss": 0.38451868295669556, + "step": 4595 + }, + { + "epoch": 0.8492337540852976, + "grad_norm": 0.07076207548379898, + "learning_rate": 1.682724275841162e-05, + "loss": 0.6204366683959961, + "step": 4596 + }, + { + "epoch": 0.8494185307941934, + "grad_norm": 0.06023591011762619, + "learning_rate": 1.6825784985620348e-05, + "loss": 0.4931405484676361, + "step": 4597 + }, + { + "epoch": 0.8496033075030892, + "grad_norm": 0.06567323207855225, + "learning_rate": 1.6824326941184192e-05, + "loss": 0.6039255857467651, + "step": 4598 + }, + { + "epoch": 0.849788084211985, + "grad_norm": 0.06637229770421982, + "learning_rate": 1.6822868625161168e-05, + "loss": 0.5879940390586853, + "step": 4599 + }, + { + "epoch": 0.8499728609208809, + "grad_norm": 0.06800197809934616, + "learning_rate": 1.6821410037609322e-05, + "loss": 0.5560672283172607, + "step": 4600 + }, + { + "epoch": 0.8501576376297768, + "grad_norm": 0.056423820555210114, + "learning_rate": 1.6819951178586696e-05, + "loss": 0.44926315546035767, + "step": 4601 + }, + { + "epoch": 0.8503424143386726, + "grad_norm": 0.06409399956464767, + "learning_rate": 1.6818492048151353e-05, + "loss": 0.5209833979606628, + "step": 4602 + }, + { + "epoch": 0.8505271910475685, + "grad_norm": 0.0760723277926445, + "learning_rate": 1.681703264636136e-05, + "loss": 0.5303493738174438, + "step": 4603 + }, + { + "epoch": 0.8507119677564643, + "grad_norm": 0.0445675253868103, + "learning_rate": 1.681557297327479e-05, + "loss": 0.3546523153781891, + "step": 4604 + }, + { + "epoch": 0.8508967444653601, + "grad_norm": 0.07749450206756592, + "learning_rate": 1.6814113028949744e-05, + "loss": 0.6340559124946594, + "step": 4605 + }, + { + "epoch": 0.851081521174256, + "grad_norm": 0.08493492752313614, + "learning_rate": 1.6812652813444318e-05, + "loss": 0.6882403492927551, + "step": 4606 + }, + { + "epoch": 0.8512662978831518, + "grad_norm": 0.0660935565829277, + "learning_rate": 1.6811192326816618e-05, + "loss": 0.5860942602157593, + "step": 4607 + }, + { + "epoch": 0.8514510745920477, + "grad_norm": 0.07247716933488846, + "learning_rate": 1.6809731569124782e-05, + "loss": 0.5857391953468323, + "step": 4608 + }, + { + "epoch": 0.8516358513009435, + "grad_norm": 0.06126458942890167, + "learning_rate": 1.6808270540426927e-05, + "loss": 0.48983433842658997, + "step": 4609 + }, + { + "epoch": 0.8518206280098394, + "grad_norm": 0.09214968234300613, + "learning_rate": 1.6806809240781205e-05, + "loss": 1.0490683317184448, + "step": 4610 + }, + { + "epoch": 0.8520054047187352, + "grad_norm": 0.06320153176784515, + "learning_rate": 1.6805347670245775e-05, + "loss": 0.6951673626899719, + "step": 4611 + }, + { + "epoch": 0.852190181427631, + "grad_norm": 0.08076881617307663, + "learning_rate": 1.6803885828878798e-05, + "loss": 0.7401670217514038, + "step": 4612 + }, + { + "epoch": 0.8523749581365269, + "grad_norm": 0.054451070725917816, + "learning_rate": 1.680242371673845e-05, + "loss": 0.5073938369750977, + "step": 4613 + }, + { + "epoch": 0.8525597348454227, + "grad_norm": 0.06178588420152664, + "learning_rate": 1.680096133388292e-05, + "loss": 0.44078755378723145, + "step": 4614 + }, + { + "epoch": 0.8527445115543186, + "grad_norm": 0.06964290887117386, + "learning_rate": 1.6799498680370408e-05, + "loss": 0.455206960439682, + "step": 4615 + }, + { + "epoch": 0.8529292882632145, + "grad_norm": 0.08222243934869766, + "learning_rate": 1.679803575625912e-05, + "loss": 0.6792607307434082, + "step": 4616 + }, + { + "epoch": 0.8531140649721103, + "grad_norm": 0.08779963850975037, + "learning_rate": 1.6796572561607277e-05, + "loss": 0.7054410576820374, + "step": 4617 + }, + { + "epoch": 0.8532988416810061, + "grad_norm": 0.06393636018037796, + "learning_rate": 1.679510909647311e-05, + "loss": 0.558661699295044, + "step": 4618 + }, + { + "epoch": 0.8534836183899019, + "grad_norm": 0.07679730653762817, + "learning_rate": 1.679364536091486e-05, + "loss": 0.6361806988716125, + "step": 4619 + }, + { + "epoch": 0.8536683950987978, + "grad_norm": 0.052144911140203476, + "learning_rate": 1.6792181354990783e-05, + "loss": 0.3589189350605011, + "step": 4620 + }, + { + "epoch": 0.8538531718076936, + "grad_norm": 0.0488000325858593, + "learning_rate": 1.6790717078759134e-05, + "loss": 0.3850957155227661, + "step": 4621 + }, + { + "epoch": 0.8540379485165894, + "grad_norm": 0.08071233332157135, + "learning_rate": 1.678925253227819e-05, + "loss": 0.5997284650802612, + "step": 4622 + }, + { + "epoch": 0.8542227252254854, + "grad_norm": 0.07301712781190872, + "learning_rate": 1.678778771560624e-05, + "loss": 0.6283912062644958, + "step": 4623 + }, + { + "epoch": 0.8544075019343812, + "grad_norm": 0.07521724700927734, + "learning_rate": 1.6786322628801572e-05, + "loss": 0.676716685295105, + "step": 4624 + }, + { + "epoch": 0.854592278643277, + "grad_norm": 0.08184777945280075, + "learning_rate": 1.6784857271922497e-05, + "loss": 0.8131962418556213, + "step": 4625 + }, + { + "epoch": 0.8547770553521729, + "grad_norm": 0.0697786808013916, + "learning_rate": 1.678339164502733e-05, + "loss": 0.6574063301086426, + "step": 4626 + }, + { + "epoch": 0.8549618320610687, + "grad_norm": 0.05795733258128166, + "learning_rate": 1.6781925748174398e-05, + "loss": 0.43882185220718384, + "step": 4627 + }, + { + "epoch": 0.8551466087699645, + "grad_norm": 0.06942173093557358, + "learning_rate": 1.6780459581422037e-05, + "loss": 0.5867627859115601, + "step": 4628 + }, + { + "epoch": 0.8553313854788603, + "grad_norm": 0.05828528851270676, + "learning_rate": 1.6778993144828598e-05, + "loss": 0.42724406719207764, + "step": 4629 + }, + { + "epoch": 0.8555161621877563, + "grad_norm": 0.0868777185678482, + "learning_rate": 1.6777526438452444e-05, + "loss": 0.7190034985542297, + "step": 4630 + }, + { + "epoch": 0.8557009388966521, + "grad_norm": 0.07899882644414902, + "learning_rate": 1.6776059462351936e-05, + "loss": 0.6514811515808105, + "step": 4631 + }, + { + "epoch": 0.8558857156055479, + "grad_norm": 0.08145968616008759, + "learning_rate": 1.6774592216585466e-05, + "loss": 0.7642940282821655, + "step": 4632 + }, + { + "epoch": 0.8560704923144438, + "grad_norm": 0.06042362004518509, + "learning_rate": 1.6773124701211417e-05, + "loss": 0.44519340991973877, + "step": 4633 + }, + { + "epoch": 0.8562552690233396, + "grad_norm": 0.07294254004955292, + "learning_rate": 1.6771656916288198e-05, + "loss": 0.5128921866416931, + "step": 4634 + }, + { + "epoch": 0.8564400457322354, + "grad_norm": 0.06889355182647705, + "learning_rate": 1.6770188861874223e-05, + "loss": 0.5176600217819214, + "step": 4635 + }, + { + "epoch": 0.8566248224411313, + "grad_norm": 0.05914265289902687, + "learning_rate": 1.6768720538027907e-05, + "loss": 0.5071340799331665, + "step": 4636 + }, + { + "epoch": 0.8568095991500272, + "grad_norm": 0.08248062431812286, + "learning_rate": 1.6767251944807692e-05, + "loss": 0.6715055704116821, + "step": 4637 + }, + { + "epoch": 0.856994375858923, + "grad_norm": 0.07607916742563248, + "learning_rate": 1.676578308227202e-05, + "loss": 0.6736531853675842, + "step": 4638 + }, + { + "epoch": 0.8571791525678188, + "grad_norm": 0.09606055170297623, + "learning_rate": 1.676431395047935e-05, + "loss": 0.9217409491539001, + "step": 4639 + }, + { + "epoch": 0.8573639292767147, + "grad_norm": 0.09148918092250824, + "learning_rate": 1.6762844549488147e-05, + "loss": 0.7480746507644653, + "step": 4640 + }, + { + "epoch": 0.8575487059856105, + "grad_norm": 0.0659724771976471, + "learning_rate": 1.676137487935689e-05, + "loss": 0.6606292128562927, + "step": 4641 + }, + { + "epoch": 0.8577334826945063, + "grad_norm": 0.07820889353752136, + "learning_rate": 1.6759904940144067e-05, + "loss": 0.6565632224082947, + "step": 4642 + }, + { + "epoch": 0.8579182594034022, + "grad_norm": 0.07718625664710999, + "learning_rate": 1.6758434731908178e-05, + "loss": 0.7136979699134827, + "step": 4643 + }, + { + "epoch": 0.858103036112298, + "grad_norm": 0.07513435930013657, + "learning_rate": 1.6756964254707727e-05, + "loss": 0.7594067454338074, + "step": 4644 + }, + { + "epoch": 0.8582878128211939, + "grad_norm": 0.05214472487568855, + "learning_rate": 1.6755493508601238e-05, + "loss": 0.44269320368766785, + "step": 4645 + }, + { + "epoch": 0.8584725895300898, + "grad_norm": 0.0848061591386795, + "learning_rate": 1.6754022493647247e-05, + "loss": 0.6620956659317017, + "step": 4646 + }, + { + "epoch": 0.8586573662389856, + "grad_norm": 0.08480852842330933, + "learning_rate": 1.6752551209904287e-05, + "loss": 0.646623969078064, + "step": 4647 + }, + { + "epoch": 0.8588421429478814, + "grad_norm": 0.06943809986114502, + "learning_rate": 1.675107965743092e-05, + "loss": 0.45650023221969604, + "step": 4648 + }, + { + "epoch": 0.8590269196567772, + "grad_norm": 0.07890260219573975, + "learning_rate": 1.67496078362857e-05, + "loss": 0.5708764791488647, + "step": 4649 + }, + { + "epoch": 0.8592116963656731, + "grad_norm": 0.07639990001916885, + "learning_rate": 1.6748135746527205e-05, + "loss": 0.5387254953384399, + "step": 4650 + }, + { + "epoch": 0.8593964730745689, + "grad_norm": 0.0655929297208786, + "learning_rate": 1.674666338821402e-05, + "loss": 0.5580186247825623, + "step": 4651 + }, + { + "epoch": 0.8595812497834648, + "grad_norm": 0.08575616776943207, + "learning_rate": 1.6745190761404743e-05, + "loss": 0.6784237623214722, + "step": 4652 + }, + { + "epoch": 0.8597660264923607, + "grad_norm": 0.06969567388296127, + "learning_rate": 1.6743717866157972e-05, + "loss": 0.5400366187095642, + "step": 4653 + }, + { + "epoch": 0.8599508032012565, + "grad_norm": 0.04362644627690315, + "learning_rate": 1.6742244702532332e-05, + "loss": 0.4012106657028198, + "step": 4654 + }, + { + "epoch": 0.8601355799101523, + "grad_norm": 0.06540507078170776, + "learning_rate": 1.6740771270586445e-05, + "loss": 0.4933326542377472, + "step": 4655 + }, + { + "epoch": 0.8603203566190482, + "grad_norm": 0.08104443550109863, + "learning_rate": 1.673929757037895e-05, + "loss": 0.8567174673080444, + "step": 4656 + }, + { + "epoch": 0.860505133327944, + "grad_norm": 0.06365179270505905, + "learning_rate": 1.6737823601968495e-05, + "loss": 0.6120196580886841, + "step": 4657 + }, + { + "epoch": 0.8606899100368398, + "grad_norm": 0.06998001039028168, + "learning_rate": 1.6736349365413742e-05, + "loss": 0.7329786419868469, + "step": 4658 + }, + { + "epoch": 0.8608746867457358, + "grad_norm": 0.05753127485513687, + "learning_rate": 1.673487486077336e-05, + "loss": 0.5385212898254395, + "step": 4659 + }, + { + "epoch": 0.8610594634546316, + "grad_norm": 0.07382532209157944, + "learning_rate": 1.673340008810603e-05, + "loss": 0.5739226937294006, + "step": 4660 + }, + { + "epoch": 0.8612442401635274, + "grad_norm": 0.06449062377214432, + "learning_rate": 1.673192504747044e-05, + "loss": 0.5096633434295654, + "step": 4661 + }, + { + "epoch": 0.8614290168724232, + "grad_norm": 0.07269871234893799, + "learning_rate": 1.6730449738925298e-05, + "loss": 0.6623801589012146, + "step": 4662 + }, + { + "epoch": 0.8616137935813191, + "grad_norm": 0.08698683232069016, + "learning_rate": 1.6728974162529313e-05, + "loss": 0.7171550393104553, + "step": 4663 + }, + { + "epoch": 0.8617985702902149, + "grad_norm": 0.07263854891061783, + "learning_rate": 1.6727498318341206e-05, + "loss": 0.6417308449745178, + "step": 4664 + }, + { + "epoch": 0.8619833469991107, + "grad_norm": 0.04888635128736496, + "learning_rate": 1.6726022206419716e-05, + "loss": 0.33261534571647644, + "step": 4665 + }, + { + "epoch": 0.8621681237080066, + "grad_norm": 0.09420102089643478, + "learning_rate": 1.6724545826823583e-05, + "loss": 0.8491489291191101, + "step": 4666 + }, + { + "epoch": 0.8623529004169025, + "grad_norm": 0.07827732712030411, + "learning_rate": 1.6723069179611566e-05, + "loss": 0.6525130867958069, + "step": 4667 + }, + { + "epoch": 0.8625376771257983, + "grad_norm": 0.07107626646757126, + "learning_rate": 1.672159226484243e-05, + "loss": 0.506168782711029, + "step": 4668 + }, + { + "epoch": 0.8627224538346941, + "grad_norm": 0.062264494597911835, + "learning_rate": 1.672011508257495e-05, + "loss": 0.4457719624042511, + "step": 4669 + }, + { + "epoch": 0.86290723054359, + "grad_norm": 0.06501355767250061, + "learning_rate": 1.6718637632867914e-05, + "loss": 0.539233386516571, + "step": 4670 + }, + { + "epoch": 0.8630920072524858, + "grad_norm": 0.07183745503425598, + "learning_rate": 1.6717159915780118e-05, + "loss": 0.5258473753929138, + "step": 4671 + }, + { + "epoch": 0.8632767839613816, + "grad_norm": 0.07959087193012238, + "learning_rate": 1.6715681931370376e-05, + "loss": 0.6688862442970276, + "step": 4672 + }, + { + "epoch": 0.8634615606702775, + "grad_norm": 0.05958564579486847, + "learning_rate": 1.6714203679697504e-05, + "loss": 0.4352157711982727, + "step": 4673 + }, + { + "epoch": 0.8636463373791734, + "grad_norm": 0.07748246192932129, + "learning_rate": 1.671272516082033e-05, + "loss": 0.6799011826515198, + "step": 4674 + }, + { + "epoch": 0.8638311140880692, + "grad_norm": 0.09600286185741425, + "learning_rate": 1.6711246374797696e-05, + "loss": 0.7128432393074036, + "step": 4675 + }, + { + "epoch": 0.8640158907969651, + "grad_norm": 0.06360357999801636, + "learning_rate": 1.6709767321688453e-05, + "loss": 0.614168643951416, + "step": 4676 + }, + { + "epoch": 0.8642006675058609, + "grad_norm": 0.07200276106595993, + "learning_rate": 1.6708288001551464e-05, + "loss": 0.44147413969039917, + "step": 4677 + }, + { + "epoch": 0.8643854442147567, + "grad_norm": 0.06741948425769806, + "learning_rate": 1.67068084144456e-05, + "loss": 0.4448484480381012, + "step": 4678 + }, + { + "epoch": 0.8645702209236525, + "grad_norm": 0.061067428439855576, + "learning_rate": 1.670532856042974e-05, + "loss": 0.43858760595321655, + "step": 4679 + }, + { + "epoch": 0.8647549976325484, + "grad_norm": 0.0807904303073883, + "learning_rate": 1.6703848439562787e-05, + "loss": 0.8016616106033325, + "step": 4680 + }, + { + "epoch": 0.8649397743414443, + "grad_norm": 0.07437105476856232, + "learning_rate": 1.6702368051903638e-05, + "loss": 0.43584221601486206, + "step": 4681 + }, + { + "epoch": 0.8651245510503401, + "grad_norm": 0.07812274992465973, + "learning_rate": 1.6700887397511206e-05, + "loss": 0.7267005443572998, + "step": 4682 + }, + { + "epoch": 0.865309327759236, + "grad_norm": 0.08380448073148727, + "learning_rate": 1.6699406476444426e-05, + "loss": 0.7394942045211792, + "step": 4683 + }, + { + "epoch": 0.8654941044681318, + "grad_norm": 0.06715326011180878, + "learning_rate": 1.6697925288762226e-05, + "loss": 0.5929086804389954, + "step": 4684 + }, + { + "epoch": 0.8656788811770276, + "grad_norm": 0.06208386272192001, + "learning_rate": 1.6696443834523554e-05, + "loss": 0.4253726601600647, + "step": 4685 + }, + { + "epoch": 0.8658636578859235, + "grad_norm": 0.07226450741291046, + "learning_rate": 1.6694962113787365e-05, + "loss": 0.6204879283905029, + "step": 4686 + }, + { + "epoch": 0.8660484345948193, + "grad_norm": 0.05629701539874077, + "learning_rate": 1.6693480126612636e-05, + "loss": 0.4896463453769684, + "step": 4687 + }, + { + "epoch": 0.8662332113037151, + "grad_norm": 0.06906317919492722, + "learning_rate": 1.6691997873058333e-05, + "loss": 0.5383235216140747, + "step": 4688 + }, + { + "epoch": 0.866417988012611, + "grad_norm": 0.06155070289969444, + "learning_rate": 1.6690515353183455e-05, + "loss": 0.5179824233055115, + "step": 4689 + }, + { + "epoch": 0.8666027647215069, + "grad_norm": 0.07300128042697906, + "learning_rate": 1.6689032567046996e-05, + "loss": 0.586868166923523, + "step": 4690 + }, + { + "epoch": 0.8667875414304027, + "grad_norm": 0.07631676644086838, + "learning_rate": 1.668754951470797e-05, + "loss": 0.5804420113563538, + "step": 4691 + }, + { + "epoch": 0.8669723181392985, + "grad_norm": 0.05737726390361786, + "learning_rate": 1.6686066196225394e-05, + "loss": 0.5011819005012512, + "step": 4692 + }, + { + "epoch": 0.8671570948481944, + "grad_norm": 0.08360470086336136, + "learning_rate": 1.66845826116583e-05, + "loss": 0.6318942308425903, + "step": 4693 + }, + { + "epoch": 0.8673418715570902, + "grad_norm": 0.060124389827251434, + "learning_rate": 1.6683098761065734e-05, + "loss": 0.5247859358787537, + "step": 4694 + }, + { + "epoch": 0.867526648265986, + "grad_norm": 0.0769641250371933, + "learning_rate": 1.6681614644506747e-05, + "loss": 0.5290091037750244, + "step": 4695 + }, + { + "epoch": 0.867711424974882, + "grad_norm": 0.0742589458823204, + "learning_rate": 1.6680130262040398e-05, + "loss": 0.7358421087265015, + "step": 4696 + }, + { + "epoch": 0.8678962016837778, + "grad_norm": 0.0717698410153389, + "learning_rate": 1.667864561372577e-05, + "loss": 0.5507757663726807, + "step": 4697 + }, + { + "epoch": 0.8680809783926736, + "grad_norm": 0.060020409524440765, + "learning_rate": 1.6677160699621934e-05, + "loss": 0.5884896516799927, + "step": 4698 + }, + { + "epoch": 0.8682657551015694, + "grad_norm": 0.08041785657405853, + "learning_rate": 1.6675675519787997e-05, + "loss": 0.6492130756378174, + "step": 4699 + }, + { + "epoch": 0.8684505318104653, + "grad_norm": 0.07929142564535141, + "learning_rate": 1.6674190074283058e-05, + "loss": 0.638306736946106, + "step": 4700 + }, + { + "epoch": 0.8686353085193611, + "grad_norm": 0.06563692539930344, + "learning_rate": 1.6672704363166238e-05, + "loss": 0.5677976608276367, + "step": 4701 + }, + { + "epoch": 0.8688200852282569, + "grad_norm": 0.07268399000167847, + "learning_rate": 1.6671218386496655e-05, + "loss": 0.6142613887786865, + "step": 4702 + }, + { + "epoch": 0.8690048619371529, + "grad_norm": 0.059030793607234955, + "learning_rate": 1.6669732144333454e-05, + "loss": 0.49718761444091797, + "step": 4703 + }, + { + "epoch": 0.8691896386460487, + "grad_norm": 0.0930427759885788, + "learning_rate": 1.6668245636735782e-05, + "loss": 0.765467643737793, + "step": 4704 + }, + { + "epoch": 0.8693744153549445, + "grad_norm": 0.07027864456176758, + "learning_rate": 1.6666758863762796e-05, + "loss": 0.468212366104126, + "step": 4705 + }, + { + "epoch": 0.8695591920638404, + "grad_norm": 0.07914797216653824, + "learning_rate": 1.6665271825473663e-05, + "loss": 0.5796051025390625, + "step": 4706 + }, + { + "epoch": 0.8697439687727362, + "grad_norm": 0.07291863113641739, + "learning_rate": 1.6663784521927568e-05, + "loss": 0.5344331860542297, + "step": 4707 + }, + { + "epoch": 0.869928745481632, + "grad_norm": 0.05999236926436424, + "learning_rate": 1.6662296953183693e-05, + "loss": 0.45997464656829834, + "step": 4708 + }, + { + "epoch": 0.8701135221905278, + "grad_norm": 0.0643937885761261, + "learning_rate": 1.6660809119301246e-05, + "loss": 0.5652839541435242, + "step": 4709 + }, + { + "epoch": 0.8702982988994237, + "grad_norm": 0.06123213842511177, + "learning_rate": 1.665932102033943e-05, + "loss": 0.5423359274864197, + "step": 4710 + }, + { + "epoch": 0.8704830756083196, + "grad_norm": 0.06579851359128952, + "learning_rate": 1.6657832656357475e-05, + "loss": 0.565425455570221, + "step": 4711 + }, + { + "epoch": 0.8706678523172154, + "grad_norm": 0.07380993664264679, + "learning_rate": 1.665634402741461e-05, + "loss": 0.5821062922477722, + "step": 4712 + }, + { + "epoch": 0.8708526290261113, + "grad_norm": 0.07134454697370529, + "learning_rate": 1.665485513357008e-05, + "loss": 0.5998745560646057, + "step": 4713 + }, + { + "epoch": 0.8710374057350071, + "grad_norm": 0.05814999341964722, + "learning_rate": 1.6653365974883134e-05, + "loss": 0.4527260363101959, + "step": 4714 + }, + { + "epoch": 0.8712221824439029, + "grad_norm": 0.09124371409416199, + "learning_rate": 1.6651876551413038e-05, + "loss": 0.9598178863525391, + "step": 4715 + }, + { + "epoch": 0.8714069591527988, + "grad_norm": 0.05583808571100235, + "learning_rate": 1.6650386863219067e-05, + "loss": 0.44631731510162354, + "step": 4716 + }, + { + "epoch": 0.8715917358616946, + "grad_norm": 0.06759850680828094, + "learning_rate": 1.6648896910360503e-05, + "loss": 0.5839464664459229, + "step": 4717 + }, + { + "epoch": 0.8717765125705905, + "grad_norm": 0.057879723608493805, + "learning_rate": 1.6647406692896646e-05, + "loss": 0.5076382756233215, + "step": 4718 + }, + { + "epoch": 0.8719612892794864, + "grad_norm": 0.07822850346565247, + "learning_rate": 1.66459162108868e-05, + "loss": 0.5981899499893188, + "step": 4719 + }, + { + "epoch": 0.8721460659883822, + "grad_norm": 0.05845734477043152, + "learning_rate": 1.6644425464390277e-05, + "loss": 0.473381370306015, + "step": 4720 + }, + { + "epoch": 0.872330842697278, + "grad_norm": 0.06890823692083359, + "learning_rate": 1.6642934453466413e-05, + "loss": 0.5807965993881226, + "step": 4721 + }, + { + "epoch": 0.8725156194061738, + "grad_norm": 0.07697651535272598, + "learning_rate": 1.6641443178174536e-05, + "loss": 0.6806358695030212, + "step": 4722 + }, + { + "epoch": 0.8727003961150697, + "grad_norm": 0.0753675252199173, + "learning_rate": 1.6639951638574005e-05, + "loss": 0.5100016593933105, + "step": 4723 + }, + { + "epoch": 0.8728851728239655, + "grad_norm": 0.06294082850217819, + "learning_rate": 1.663845983472417e-05, + "loss": 0.5712581872940063, + "step": 4724 + }, + { + "epoch": 0.8730699495328614, + "grad_norm": 0.057558026164770126, + "learning_rate": 1.66369677666844e-05, + "loss": 0.4887886643409729, + "step": 4725 + }, + { + "epoch": 0.8732547262417573, + "grad_norm": 0.05099833011627197, + "learning_rate": 1.663547543451408e-05, + "loss": 0.49933555722236633, + "step": 4726 + }, + { + "epoch": 0.8734395029506531, + "grad_norm": 0.09851501882076263, + "learning_rate": 1.6633982838272598e-05, + "loss": 0.7819404006004333, + "step": 4727 + }, + { + "epoch": 0.8736242796595489, + "grad_norm": 0.05875126272439957, + "learning_rate": 1.6632489978019352e-05, + "loss": 0.39106565713882446, + "step": 4728 + }, + { + "epoch": 0.8738090563684447, + "grad_norm": 0.07598012685775757, + "learning_rate": 1.6630996853813757e-05, + "loss": 0.6143175959587097, + "step": 4729 + }, + { + "epoch": 0.8739938330773406, + "grad_norm": 0.05999317392706871, + "learning_rate": 1.662950346571523e-05, + "loss": 0.5516825914382935, + "step": 4730 + }, + { + "epoch": 0.8741786097862364, + "grad_norm": 0.08445960283279419, + "learning_rate": 1.6628009813783213e-05, + "loss": 0.6581443548202515, + "step": 4731 + }, + { + "epoch": 0.8743633864951322, + "grad_norm": 0.07585146278142929, + "learning_rate": 1.6626515898077137e-05, + "loss": 0.6074404716491699, + "step": 4732 + }, + { + "epoch": 0.8745481632040282, + "grad_norm": 0.06283219903707504, + "learning_rate": 1.662502171865646e-05, + "loss": 0.6039258241653442, + "step": 4733 + }, + { + "epoch": 0.874732939912924, + "grad_norm": 0.05872878059744835, + "learning_rate": 1.6623527275580643e-05, + "loss": 0.4644799530506134, + "step": 4734 + }, + { + "epoch": 0.8749177166218198, + "grad_norm": 0.055183276534080505, + "learning_rate": 1.662203256890917e-05, + "loss": 0.512610137462616, + "step": 4735 + }, + { + "epoch": 0.8751024933307157, + "grad_norm": 0.06261883676052094, + "learning_rate": 1.662053759870151e-05, + "loss": 0.580086350440979, + "step": 4736 + }, + { + "epoch": 0.8752872700396115, + "grad_norm": 0.07283858954906464, + "learning_rate": 1.6619042365017173e-05, + "loss": 0.6381665468215942, + "step": 4737 + }, + { + "epoch": 0.8754720467485073, + "grad_norm": 0.07218164205551147, + "learning_rate": 1.6617546867915654e-05, + "loss": 0.5770898461341858, + "step": 4738 + }, + { + "epoch": 0.8756568234574031, + "grad_norm": 0.085800901055336, + "learning_rate": 1.6616051107456478e-05, + "loss": 0.7564215064048767, + "step": 4739 + }, + { + "epoch": 0.8758416001662991, + "grad_norm": 0.0664244145154953, + "learning_rate": 1.6614555083699163e-05, + "loss": 0.593537449836731, + "step": 4740 + }, + { + "epoch": 0.8760263768751949, + "grad_norm": 0.08380355685949326, + "learning_rate": 1.661305879670325e-05, + "loss": 0.7730197310447693, + "step": 4741 + }, + { + "epoch": 0.8762111535840907, + "grad_norm": 0.06761428713798523, + "learning_rate": 1.6611562246528287e-05, + "loss": 0.5476041436195374, + "step": 4742 + }, + { + "epoch": 0.8763959302929866, + "grad_norm": 0.09186141937971115, + "learning_rate": 1.6610065433233832e-05, + "loss": 0.7884858250617981, + "step": 4743 + }, + { + "epoch": 0.8765807070018824, + "grad_norm": 0.060746416449546814, + "learning_rate": 1.6608568356879454e-05, + "loss": 0.44659048318862915, + "step": 4744 + }, + { + "epoch": 0.8767654837107782, + "grad_norm": 0.07710873335599899, + "learning_rate": 1.660707101752473e-05, + "loss": 0.7485287189483643, + "step": 4745 + }, + { + "epoch": 0.876950260419674, + "grad_norm": 0.07253643125295639, + "learning_rate": 1.660557341522925e-05, + "loss": 0.5462217330932617, + "step": 4746 + }, + { + "epoch": 0.87713503712857, + "grad_norm": 0.07171177864074707, + "learning_rate": 1.6604075550052616e-05, + "loss": 0.6452522873878479, + "step": 4747 + }, + { + "epoch": 0.8773198138374658, + "grad_norm": 0.08357395231723785, + "learning_rate": 1.6602577422054433e-05, + "loss": 0.7898638844490051, + "step": 4748 + }, + { + "epoch": 0.8775045905463617, + "grad_norm": 0.07633797824382782, + "learning_rate": 1.660107903129433e-05, + "loss": 0.5493875741958618, + "step": 4749 + }, + { + "epoch": 0.8776893672552575, + "grad_norm": 0.07141376286745071, + "learning_rate": 1.659958037783193e-05, + "loss": 0.6093866229057312, + "step": 4750 + }, + { + "epoch": 0.8778741439641533, + "grad_norm": 0.09596671909093857, + "learning_rate": 1.659808146172688e-05, + "loss": 0.8044968843460083, + "step": 4751 + }, + { + "epoch": 0.8780589206730491, + "grad_norm": 0.06962507218122482, + "learning_rate": 1.6596582283038828e-05, + "loss": 0.4918615221977234, + "step": 4752 + }, + { + "epoch": 0.878243697381945, + "grad_norm": 0.07077303528785706, + "learning_rate": 1.6595082841827442e-05, + "loss": 0.5758873224258423, + "step": 4753 + }, + { + "epoch": 0.8784284740908408, + "grad_norm": 0.063609778881073, + "learning_rate": 1.659358313815239e-05, + "loss": 0.6233231425285339, + "step": 4754 + }, + { + "epoch": 0.8786132507997367, + "grad_norm": 0.07234170287847519, + "learning_rate": 1.659208317207336e-05, + "loss": 0.6821828484535217, + "step": 4755 + }, + { + "epoch": 0.8787980275086326, + "grad_norm": 0.08028619736433029, + "learning_rate": 1.6590582943650046e-05, + "loss": 0.840358555316925, + "step": 4756 + }, + { + "epoch": 0.8789828042175284, + "grad_norm": 0.05547550693154335, + "learning_rate": 1.658908245294215e-05, + "loss": 0.4089423418045044, + "step": 4757 + }, + { + "epoch": 0.8791675809264242, + "grad_norm": 0.08615767955780029, + "learning_rate": 1.658758170000938e-05, + "loss": 0.6610065698623657, + "step": 4758 + }, + { + "epoch": 0.87935235763532, + "grad_norm": 0.05322204530239105, + "learning_rate": 1.658608068491147e-05, + "loss": 0.39673668146133423, + "step": 4759 + }, + { + "epoch": 0.8795371343442159, + "grad_norm": 0.07618802785873413, + "learning_rate": 1.658457940770816e-05, + "loss": 0.5760771632194519, + "step": 4760 + }, + { + "epoch": 0.8797219110531117, + "grad_norm": 0.06550726294517517, + "learning_rate": 1.6583077868459185e-05, + "loss": 0.518328845500946, + "step": 4761 + }, + { + "epoch": 0.8799066877620076, + "grad_norm": 0.06240643188357353, + "learning_rate": 1.658157606722431e-05, + "loss": 0.43588632345199585, + "step": 4762 + }, + { + "epoch": 0.8800914644709035, + "grad_norm": 0.07559328526258469, + "learning_rate": 1.6580074004063295e-05, + "loss": 0.6650227308273315, + "step": 4763 + }, + { + "epoch": 0.8802762411797993, + "grad_norm": 0.08168413490056992, + "learning_rate": 1.6578571679035924e-05, + "loss": 0.6146327257156372, + "step": 4764 + }, + { + "epoch": 0.8804610178886951, + "grad_norm": 0.05272490531206131, + "learning_rate": 1.6577069092201982e-05, + "loss": 0.4760764241218567, + "step": 4765 + }, + { + "epoch": 0.880645794597591, + "grad_norm": 0.07334236055612564, + "learning_rate": 1.6575566243621263e-05, + "loss": 0.6713932156562805, + "step": 4766 + }, + { + "epoch": 0.8808305713064868, + "grad_norm": 0.08298858255147934, + "learning_rate": 1.657406313335358e-05, + "loss": 0.6796658635139465, + "step": 4767 + }, + { + "epoch": 0.8810153480153826, + "grad_norm": 0.09175878763198853, + "learning_rate": 1.657255976145876e-05, + "loss": 0.6366848349571228, + "step": 4768 + }, + { + "epoch": 0.8812001247242786, + "grad_norm": 0.0788515955209732, + "learning_rate": 1.657105612799662e-05, + "loss": 0.6692134737968445, + "step": 4769 + }, + { + "epoch": 0.8813849014331744, + "grad_norm": 0.04891791194677353, + "learning_rate": 1.6569552233027e-05, + "loss": 0.3151491582393646, + "step": 4770 + }, + { + "epoch": 0.8815696781420702, + "grad_norm": 0.06327511370182037, + "learning_rate": 1.656804807660976e-05, + "loss": 0.6637892127037048, + "step": 4771 + }, + { + "epoch": 0.881754454850966, + "grad_norm": 0.08450198918581009, + "learning_rate": 1.6566543658804753e-05, + "loss": 0.7439243197441101, + "step": 4772 + }, + { + "epoch": 0.8819392315598619, + "grad_norm": 0.08025246858596802, + "learning_rate": 1.656503897967185e-05, + "loss": 0.6441263556480408, + "step": 4773 + }, + { + "epoch": 0.8821240082687577, + "grad_norm": 0.07658755034208298, + "learning_rate": 1.656353403927094e-05, + "loss": 0.6907283067703247, + "step": 4774 + }, + { + "epoch": 0.8823087849776535, + "grad_norm": 0.07724691182374954, + "learning_rate": 1.6562028837661905e-05, + "loss": 0.6994632482528687, + "step": 4775 + }, + { + "epoch": 0.8824935616865495, + "grad_norm": 0.07235722243785858, + "learning_rate": 1.6560523374904658e-05, + "loss": 0.5693969130516052, + "step": 4776 + }, + { + "epoch": 0.8826783383954453, + "grad_norm": 0.07386350631713867, + "learning_rate": 1.65590176510591e-05, + "loss": 0.5384517312049866, + "step": 4777 + }, + { + "epoch": 0.8828631151043411, + "grad_norm": 0.06712142378091812, + "learning_rate": 1.6557511666185164e-05, + "loss": 0.5699175000190735, + "step": 4778 + }, + { + "epoch": 0.883047891813237, + "grad_norm": 0.07460469007492065, + "learning_rate": 1.655600542034278e-05, + "loss": 0.523343563079834, + "step": 4779 + }, + { + "epoch": 0.8832326685221328, + "grad_norm": 0.06637348234653473, + "learning_rate": 1.6554498913591888e-05, + "loss": 0.4786106050014496, + "step": 4780 + }, + { + "epoch": 0.8834174452310286, + "grad_norm": 0.06271538138389587, + "learning_rate": 1.6552992145992444e-05, + "loss": 0.492175817489624, + "step": 4781 + }, + { + "epoch": 0.8836022219399244, + "grad_norm": 0.07379522174596786, + "learning_rate": 1.655148511760442e-05, + "loss": 0.5145571231842041, + "step": 4782 + }, + { + "epoch": 0.8837869986488203, + "grad_norm": 0.07079476863145828, + "learning_rate": 1.6549977828487784e-05, + "loss": 0.5578653216362, + "step": 4783 + }, + { + "epoch": 0.8839717753577162, + "grad_norm": 0.05883871391415596, + "learning_rate": 1.654847027870252e-05, + "loss": 0.36676284670829773, + "step": 4784 + }, + { + "epoch": 0.884156552066612, + "grad_norm": 0.07372432202100754, + "learning_rate": 1.6546962468308628e-05, + "loss": 0.6349548101425171, + "step": 4785 + }, + { + "epoch": 0.8843413287755079, + "grad_norm": 0.08021949231624603, + "learning_rate": 1.6545454397366114e-05, + "loss": 0.5997925996780396, + "step": 4786 + }, + { + "epoch": 0.8845261054844037, + "grad_norm": 0.08539916574954987, + "learning_rate": 1.654394606593499e-05, + "loss": 0.7283767461776733, + "step": 4787 + }, + { + "epoch": 0.8847108821932995, + "grad_norm": 0.06747384369373322, + "learning_rate": 1.6542437474075288e-05, + "loss": 0.5858517289161682, + "step": 4788 + }, + { + "epoch": 0.8848956589021953, + "grad_norm": 0.08050844818353653, + "learning_rate": 1.6540928621847042e-05, + "loss": 0.6546807885169983, + "step": 4789 + }, + { + "epoch": 0.8850804356110912, + "grad_norm": 0.05787456035614014, + "learning_rate": 1.65394195093103e-05, + "loss": 0.4163222908973694, + "step": 4790 + }, + { + "epoch": 0.8852652123199871, + "grad_norm": 0.05363563075661659, + "learning_rate": 1.6537910136525123e-05, + "loss": 0.4085903465747833, + "step": 4791 + }, + { + "epoch": 0.8854499890288829, + "grad_norm": 0.08327876031398773, + "learning_rate": 1.6536400503551576e-05, + "loss": 0.5789997577667236, + "step": 4792 + }, + { + "epoch": 0.8856347657377788, + "grad_norm": 0.06537973135709763, + "learning_rate": 1.653489061044974e-05, + "loss": 0.6246554851531982, + "step": 4793 + }, + { + "epoch": 0.8858195424466746, + "grad_norm": 0.062478817999362946, + "learning_rate": 1.6533380457279704e-05, + "loss": 0.4444185793399811, + "step": 4794 + }, + { + "epoch": 0.8860043191555704, + "grad_norm": 0.06802353262901306, + "learning_rate": 1.6531870044101565e-05, + "loss": 0.6060287952423096, + "step": 4795 + }, + { + "epoch": 0.8861890958644663, + "grad_norm": 0.06890678405761719, + "learning_rate": 1.653035937097543e-05, + "loss": 0.48558810353279114, + "step": 4796 + }, + { + "epoch": 0.8863738725733621, + "grad_norm": 0.04655135050415993, + "learning_rate": 1.6528848437961426e-05, + "loss": 0.38830509781837463, + "step": 4797 + }, + { + "epoch": 0.886558649282258, + "grad_norm": 0.06424148380756378, + "learning_rate": 1.6527337245119678e-05, + "loss": 0.4444373548030853, + "step": 4798 + }, + { + "epoch": 0.8867434259911539, + "grad_norm": 0.07430419325828552, + "learning_rate": 1.6525825792510333e-05, + "loss": 0.7238464951515198, + "step": 4799 + }, + { + "epoch": 0.8869282027000497, + "grad_norm": 0.0749935656785965, + "learning_rate": 1.6524314080193534e-05, + "loss": 0.7441866993904114, + "step": 4800 + }, + { + "epoch": 0.8871129794089455, + "grad_norm": 0.08086378127336502, + "learning_rate": 1.652280210822945e-05, + "loss": 0.7391872406005859, + "step": 4801 + }, + { + "epoch": 0.8872977561178413, + "grad_norm": 0.07954630255699158, + "learning_rate": 1.6521289876678247e-05, + "loss": 0.6579791903495789, + "step": 4802 + }, + { + "epoch": 0.8874825328267372, + "grad_norm": 0.07959762960672379, + "learning_rate": 1.6519777385600112e-05, + "loss": 0.7004535794258118, + "step": 4803 + }, + { + "epoch": 0.887667309535633, + "grad_norm": 0.05923865735530853, + "learning_rate": 1.6518264635055233e-05, + "loss": 0.3998711109161377, + "step": 4804 + }, + { + "epoch": 0.8878520862445288, + "grad_norm": 0.08902712911367416, + "learning_rate": 1.6516751625103817e-05, + "loss": 0.6480748057365417, + "step": 4805 + }, + { + "epoch": 0.8880368629534248, + "grad_norm": 0.06671420484781265, + "learning_rate": 1.651523835580607e-05, + "loss": 0.6713802218437195, + "step": 4806 + }, + { + "epoch": 0.8882216396623206, + "grad_norm": 0.0831875428557396, + "learning_rate": 1.6513724827222225e-05, + "loss": 0.6743994355201721, + "step": 4807 + }, + { + "epoch": 0.8884064163712164, + "grad_norm": 0.07316502928733826, + "learning_rate": 1.651221103941251e-05, + "loss": 0.6043171882629395, + "step": 4808 + }, + { + "epoch": 0.8885911930801123, + "grad_norm": 0.05832867696881294, + "learning_rate": 1.6510696992437164e-05, + "loss": 0.5134963989257812, + "step": 4809 + }, + { + "epoch": 0.8887759697890081, + "grad_norm": 0.07833995670080185, + "learning_rate": 1.6509182686356454e-05, + "loss": 0.5890991687774658, + "step": 4810 + }, + { + "epoch": 0.8889607464979039, + "grad_norm": 0.07378154247999191, + "learning_rate": 1.6507668121230632e-05, + "loss": 0.40639033913612366, + "step": 4811 + }, + { + "epoch": 0.8891455232067997, + "grad_norm": 0.06623880565166473, + "learning_rate": 1.6506153297119984e-05, + "loss": 0.5028772354125977, + "step": 4812 + }, + { + "epoch": 0.8893302999156957, + "grad_norm": 0.07434017211198807, + "learning_rate": 1.6504638214084784e-05, + "loss": 0.7079054117202759, + "step": 4813 + }, + { + "epoch": 0.8895150766245915, + "grad_norm": 0.07623261958360672, + "learning_rate": 1.6503122872185338e-05, + "loss": 0.5926937460899353, + "step": 4814 + }, + { + "epoch": 0.8896998533334873, + "grad_norm": 0.06846163421869278, + "learning_rate": 1.6501607271481944e-05, + "loss": 0.5976287126541138, + "step": 4815 + }, + { + "epoch": 0.8898846300423832, + "grad_norm": 0.0537530779838562, + "learning_rate": 1.6500091412034925e-05, + "loss": 0.40996718406677246, + "step": 4816 + }, + { + "epoch": 0.890069406751279, + "grad_norm": 0.07969720661640167, + "learning_rate": 1.6498575293904603e-05, + "loss": 0.6000237464904785, + "step": 4817 + }, + { + "epoch": 0.8902541834601748, + "grad_norm": 0.07043500989675522, + "learning_rate": 1.6497058917151314e-05, + "loss": 0.5773909091949463, + "step": 4818 + }, + { + "epoch": 0.8904389601690706, + "grad_norm": 0.07048541307449341, + "learning_rate": 1.6495542281835407e-05, + "loss": 0.705425500869751, + "step": 4819 + }, + { + "epoch": 0.8906237368779666, + "grad_norm": 0.06701841205358505, + "learning_rate": 1.649402538801724e-05, + "loss": 0.722366213798523, + "step": 4820 + }, + { + "epoch": 0.8908085135868624, + "grad_norm": 0.08606120198965073, + "learning_rate": 1.6492508235757184e-05, + "loss": 0.7456164956092834, + "step": 4821 + }, + { + "epoch": 0.8909932902957582, + "grad_norm": 0.05852803960442543, + "learning_rate": 1.649099082511561e-05, + "loss": 0.4330172836780548, + "step": 4822 + }, + { + "epoch": 0.8911780670046541, + "grad_norm": 0.06518279016017914, + "learning_rate": 1.6489473156152904e-05, + "loss": 0.5213345885276794, + "step": 4823 + }, + { + "epoch": 0.8913628437135499, + "grad_norm": 0.0874481275677681, + "learning_rate": 1.6487955228929474e-05, + "loss": 0.7169750332832336, + "step": 4824 + }, + { + "epoch": 0.8915476204224457, + "grad_norm": 0.060117240995168686, + "learning_rate": 1.648643704350572e-05, + "loss": 0.5598580241203308, + "step": 4825 + }, + { + "epoch": 0.8917323971313416, + "grad_norm": 0.06978029757738113, + "learning_rate": 1.648491859994207e-05, + "loss": 0.648662805557251, + "step": 4826 + }, + { + "epoch": 0.8919171738402374, + "grad_norm": 0.07007380574941635, + "learning_rate": 1.6483399898298945e-05, + "loss": 0.7349458336830139, + "step": 4827 + }, + { + "epoch": 0.8921019505491333, + "grad_norm": 0.06880953907966614, + "learning_rate": 1.648188093863679e-05, + "loss": 0.6454513072967529, + "step": 4828 + }, + { + "epoch": 0.8922867272580292, + "grad_norm": 0.05998198315501213, + "learning_rate": 1.6480361721016053e-05, + "loss": 0.3911832571029663, + "step": 4829 + }, + { + "epoch": 0.892471503966925, + "grad_norm": 0.08584605157375336, + "learning_rate": 1.6478842245497193e-05, + "loss": 0.6219820380210876, + "step": 4830 + }, + { + "epoch": 0.8926562806758208, + "grad_norm": 0.06927721202373505, + "learning_rate": 1.6477322512140683e-05, + "loss": 0.5584146976470947, + "step": 4831 + }, + { + "epoch": 0.8928410573847166, + "grad_norm": 0.06739287823438644, + "learning_rate": 1.6475802521007e-05, + "loss": 0.6995735764503479, + "step": 4832 + }, + { + "epoch": 0.8930258340936125, + "grad_norm": 0.08070404082536697, + "learning_rate": 1.647428227215664e-05, + "loss": 0.6666145324707031, + "step": 4833 + }, + { + "epoch": 0.8932106108025083, + "grad_norm": 0.05215371027588844, + "learning_rate": 1.6472761765650095e-05, + "loss": 0.40434175729751587, + "step": 4834 + }, + { + "epoch": 0.8933953875114042, + "grad_norm": 0.085506372153759, + "learning_rate": 1.6471241001547886e-05, + "loss": 0.7858412265777588, + "step": 4835 + }, + { + "epoch": 0.8935801642203001, + "grad_norm": 0.08535362035036087, + "learning_rate": 1.6469719979910534e-05, + "loss": 0.5532266497612, + "step": 4836 + }, + { + "epoch": 0.8937649409291959, + "grad_norm": 0.07354865223169327, + "learning_rate": 1.646819870079856e-05, + "loss": 0.6461263298988342, + "step": 4837 + }, + { + "epoch": 0.8939497176380917, + "grad_norm": 0.06490924954414368, + "learning_rate": 1.6466677164272523e-05, + "loss": 0.48238641023635864, + "step": 4838 + }, + { + "epoch": 0.8941344943469876, + "grad_norm": 0.0884164422750473, + "learning_rate": 1.646515537039296e-05, + "loss": 0.7125632166862488, + "step": 4839 + }, + { + "epoch": 0.8943192710558834, + "grad_norm": 0.06169293075799942, + "learning_rate": 1.6463633319220443e-05, + "loss": 0.39989128708839417, + "step": 4840 + }, + { + "epoch": 0.8945040477647792, + "grad_norm": 0.08390737324953079, + "learning_rate": 1.6462111010815543e-05, + "loss": 0.6275712251663208, + "step": 4841 + }, + { + "epoch": 0.8946888244736751, + "grad_norm": 0.05772528424859047, + "learning_rate": 1.646058844523884e-05, + "loss": 0.4604227542877197, + "step": 4842 + }, + { + "epoch": 0.894873601182571, + "grad_norm": 0.047833140939474106, + "learning_rate": 1.6459065622550928e-05, + "loss": 0.36416640877723694, + "step": 4843 + }, + { + "epoch": 0.8950583778914668, + "grad_norm": 0.07665587961673737, + "learning_rate": 1.6457542542812415e-05, + "loss": 0.5700147747993469, + "step": 4844 + }, + { + "epoch": 0.8952431546003626, + "grad_norm": 0.06834596395492554, + "learning_rate": 1.645601920608391e-05, + "loss": 0.5849366188049316, + "step": 4845 + }, + { + "epoch": 0.8954279313092585, + "grad_norm": 0.05772769823670387, + "learning_rate": 1.6454495612426044e-05, + "loss": 0.481160968542099, + "step": 4846 + }, + { + "epoch": 0.8956127080181543, + "grad_norm": 0.08508264273405075, + "learning_rate": 1.6452971761899438e-05, + "loss": 0.6891061067581177, + "step": 4847 + }, + { + "epoch": 0.8957974847270501, + "grad_norm": 0.07678437978029251, + "learning_rate": 1.645144765456475e-05, + "loss": 0.6270875930786133, + "step": 4848 + }, + { + "epoch": 0.895982261435946, + "grad_norm": 0.07674133032560349, + "learning_rate": 1.6449923290482627e-05, + "loss": 0.5793139338493347, + "step": 4849 + }, + { + "epoch": 0.8961670381448419, + "grad_norm": 0.06758452206850052, + "learning_rate": 1.644839866971374e-05, + "loss": 0.5169587135314941, + "step": 4850 + }, + { + "epoch": 0.8963518148537377, + "grad_norm": 0.08784767240285873, + "learning_rate": 1.6446873792318755e-05, + "loss": 0.6400648951530457, + "step": 4851 + }, + { + "epoch": 0.8965365915626335, + "grad_norm": 0.0625050738453865, + "learning_rate": 1.6445348658358365e-05, + "loss": 0.539431095123291, + "step": 4852 + }, + { + "epoch": 0.8967213682715294, + "grad_norm": 0.07906383275985718, + "learning_rate": 1.6443823267893265e-05, + "loss": 0.5679528713226318, + "step": 4853 + }, + { + "epoch": 0.8969061449804252, + "grad_norm": 0.07062255591154099, + "learning_rate": 1.644229762098416e-05, + "loss": 0.5080562829971313, + "step": 4854 + }, + { + "epoch": 0.897090921689321, + "grad_norm": 0.09191711992025375, + "learning_rate": 1.6440771717691762e-05, + "loss": 0.8223711848258972, + "step": 4855 + }, + { + "epoch": 0.8972756983982169, + "grad_norm": 0.09057801216840744, + "learning_rate": 1.64392455580768e-05, + "loss": 0.7925399541854858, + "step": 4856 + }, + { + "epoch": 0.8974604751071128, + "grad_norm": 0.0694667398929596, + "learning_rate": 1.6437719142200012e-05, + "loss": 0.46083754301071167, + "step": 4857 + }, + { + "epoch": 0.8976452518160086, + "grad_norm": 0.057980529963970184, + "learning_rate": 1.6436192470122142e-05, + "loss": 0.5017035603523254, + "step": 4858 + }, + { + "epoch": 0.8978300285249045, + "grad_norm": 0.07309112697839737, + "learning_rate": 1.643466554190395e-05, + "loss": 0.6440054178237915, + "step": 4859 + }, + { + "epoch": 0.8980148052338003, + "grad_norm": 0.09001088887453079, + "learning_rate": 1.6433138357606198e-05, + "loss": 0.7718321681022644, + "step": 4860 + }, + { + "epoch": 0.8981995819426961, + "grad_norm": 0.0937967598438263, + "learning_rate": 1.643161091728967e-05, + "loss": 0.7171733379364014, + "step": 4861 + }, + { + "epoch": 0.8983843586515919, + "grad_norm": 0.09350735694169998, + "learning_rate": 1.6430083221015145e-05, + "loss": 0.7647481560707092, + "step": 4862 + }, + { + "epoch": 0.8985691353604878, + "grad_norm": 0.0811251699924469, + "learning_rate": 1.642855526884343e-05, + "loss": 0.7368043065071106, + "step": 4863 + }, + { + "epoch": 0.8987539120693837, + "grad_norm": 0.056616462767124176, + "learning_rate": 1.6427027060835323e-05, + "loss": 0.46778184175491333, + "step": 4864 + }, + { + "epoch": 0.8989386887782795, + "grad_norm": 0.08717775344848633, + "learning_rate": 1.642549859705165e-05, + "loss": 0.8994236588478088, + "step": 4865 + }, + { + "epoch": 0.8991234654871754, + "grad_norm": 0.07994919270277023, + "learning_rate": 1.642396987755323e-05, + "loss": 0.5550265908241272, + "step": 4866 + }, + { + "epoch": 0.8993082421960712, + "grad_norm": 0.08052093535661697, + "learning_rate": 1.6422440902400913e-05, + "loss": 0.7112287878990173, + "step": 4867 + }, + { + "epoch": 0.899493018904967, + "grad_norm": 0.06995108723640442, + "learning_rate": 1.6420911671655542e-05, + "loss": 0.6041479110717773, + "step": 4868 + }, + { + "epoch": 0.8996777956138629, + "grad_norm": 0.07692621648311615, + "learning_rate": 1.641938218537797e-05, + "loss": 0.6449621915817261, + "step": 4869 + }, + { + "epoch": 0.8998625723227587, + "grad_norm": 0.05623577535152435, + "learning_rate": 1.6417852443629074e-05, + "loss": 0.4509305953979492, + "step": 4870 + }, + { + "epoch": 0.9000473490316545, + "grad_norm": 0.06790454685688019, + "learning_rate": 1.641632244646973e-05, + "loss": 0.6181674003601074, + "step": 4871 + }, + { + "epoch": 0.9002321257405504, + "grad_norm": 0.06258635967969894, + "learning_rate": 1.6414792193960823e-05, + "loss": 0.50604248046875, + "step": 4872 + }, + { + "epoch": 0.9004169024494463, + "grad_norm": 0.0780041292309761, + "learning_rate": 1.6413261686163258e-05, + "loss": 0.6751472353935242, + "step": 4873 + }, + { + "epoch": 0.9006016791583421, + "grad_norm": 0.08517557382583618, + "learning_rate": 1.6411730923137942e-05, + "loss": 0.656969428062439, + "step": 4874 + }, + { + "epoch": 0.9007864558672379, + "grad_norm": 0.07794827222824097, + "learning_rate": 1.6410199904945798e-05, + "loss": 0.6910105347633362, + "step": 4875 + }, + { + "epoch": 0.9009712325761338, + "grad_norm": 0.06862415373325348, + "learning_rate": 1.640866863164775e-05, + "loss": 0.5876045227050781, + "step": 4876 + }, + { + "epoch": 0.9011560092850296, + "grad_norm": 0.06553607434034348, + "learning_rate": 1.640713710330474e-05, + "loss": 0.5301469564437866, + "step": 4877 + }, + { + "epoch": 0.9013407859939254, + "grad_norm": 0.06396906822919846, + "learning_rate": 1.640560531997772e-05, + "loss": 0.43047258257865906, + "step": 4878 + }, + { + "epoch": 0.9015255627028214, + "grad_norm": 0.06935402005910873, + "learning_rate": 1.6404073281727648e-05, + "loss": 0.6457479596138, + "step": 4879 + }, + { + "epoch": 0.9017103394117172, + "grad_norm": 0.07602575421333313, + "learning_rate": 1.6402540988615494e-05, + "loss": 0.5612639784812927, + "step": 4880 + }, + { + "epoch": 0.901895116120613, + "grad_norm": 0.06645981222391129, + "learning_rate": 1.6401008440702243e-05, + "loss": 0.575152575969696, + "step": 4881 + }, + { + "epoch": 0.9020798928295088, + "grad_norm": 0.07158118486404419, + "learning_rate": 1.639947563804888e-05, + "loss": 0.39184918999671936, + "step": 4882 + }, + { + "epoch": 0.9022646695384047, + "grad_norm": 0.07773970067501068, + "learning_rate": 1.639794258071641e-05, + "loss": 0.6216570138931274, + "step": 4883 + }, + { + "epoch": 0.9024494462473005, + "grad_norm": 0.07514741271734238, + "learning_rate": 1.6396409268765837e-05, + "loss": 0.5021771788597107, + "step": 4884 + }, + { + "epoch": 0.9026342229561963, + "grad_norm": 0.07661321014165878, + "learning_rate": 1.639487570225819e-05, + "loss": 0.6844589710235596, + "step": 4885 + }, + { + "epoch": 0.9028189996650923, + "grad_norm": 0.0743803083896637, + "learning_rate": 1.6393341881254498e-05, + "loss": 0.655817449092865, + "step": 4886 + }, + { + "epoch": 0.9030037763739881, + "grad_norm": 0.05988422408699989, + "learning_rate": 1.63918078058158e-05, + "loss": 0.565988302230835, + "step": 4887 + }, + { + "epoch": 0.9031885530828839, + "grad_norm": 0.06683322787284851, + "learning_rate": 1.6390273476003152e-05, + "loss": 0.5435384511947632, + "step": 4888 + }, + { + "epoch": 0.9033733297917798, + "grad_norm": 0.07705941051244736, + "learning_rate": 1.6388738891877607e-05, + "loss": 0.5629130005836487, + "step": 4889 + }, + { + "epoch": 0.9035581065006756, + "grad_norm": 0.09960421919822693, + "learning_rate": 1.6387204053500246e-05, + "loss": 0.8571576476097107, + "step": 4890 + }, + { + "epoch": 0.9037428832095714, + "grad_norm": 0.06615423411130905, + "learning_rate": 1.6385668960932143e-05, + "loss": 0.6547509431838989, + "step": 4891 + }, + { + "epoch": 0.9039276599184672, + "grad_norm": 0.0818847268819809, + "learning_rate": 1.63841336142344e-05, + "loss": 0.724168062210083, + "step": 4892 + }, + { + "epoch": 0.9041124366273631, + "grad_norm": 0.0793035700917244, + "learning_rate": 1.6382598013468104e-05, + "loss": 0.7631268501281738, + "step": 4893 + }, + { + "epoch": 0.904297213336259, + "grad_norm": 0.06299310177564621, + "learning_rate": 1.638106215869438e-05, + "loss": 0.39549270272254944, + "step": 4894 + }, + { + "epoch": 0.9044819900451548, + "grad_norm": 0.06919150054454803, + "learning_rate": 1.6379526049974347e-05, + "loss": 0.5981887578964233, + "step": 4895 + }, + { + "epoch": 0.9046667667540507, + "grad_norm": 0.07601941376924515, + "learning_rate": 1.6377989687369135e-05, + "loss": 0.5894408822059631, + "step": 4896 + }, + { + "epoch": 0.9048515434629465, + "grad_norm": 0.0833205133676529, + "learning_rate": 1.637645307093989e-05, + "loss": 0.6436570286750793, + "step": 4897 + }, + { + "epoch": 0.9050363201718423, + "grad_norm": 0.06943897157907486, + "learning_rate": 1.637491620074776e-05, + "loss": 0.6639769673347473, + "step": 4898 + }, + { + "epoch": 0.9052210968807382, + "grad_norm": 0.05080414563417435, + "learning_rate": 1.637337907685391e-05, + "loss": 0.3694339096546173, + "step": 4899 + }, + { + "epoch": 0.905405873589634, + "grad_norm": 0.07731123268604279, + "learning_rate": 1.637184169931951e-05, + "loss": 0.6865700483322144, + "step": 4900 + }, + { + "epoch": 0.9055906502985299, + "grad_norm": 0.07063229382038116, + "learning_rate": 1.6370304068205748e-05, + "loss": 0.7400882244110107, + "step": 4901 + }, + { + "epoch": 0.9057754270074257, + "grad_norm": 0.06370572000741959, + "learning_rate": 1.6368766183573814e-05, + "loss": 0.500558614730835, + "step": 4902 + }, + { + "epoch": 0.9059602037163216, + "grad_norm": 0.06759117543697357, + "learning_rate": 1.636722804548491e-05, + "loss": 0.5578844547271729, + "step": 4903 + }, + { + "epoch": 0.9061449804252174, + "grad_norm": 0.07463599741458893, + "learning_rate": 1.636568965400025e-05, + "loss": 0.5760760307312012, + "step": 4904 + }, + { + "epoch": 0.9063297571341132, + "grad_norm": 0.06752346456050873, + "learning_rate": 1.636415100918106e-05, + "loss": 0.6381351947784424, + "step": 4905 + }, + { + "epoch": 0.9065145338430091, + "grad_norm": 0.06896194815635681, + "learning_rate": 1.636261211108857e-05, + "loss": 0.6848575472831726, + "step": 4906 + }, + { + "epoch": 0.9066993105519049, + "grad_norm": 0.07377118617296219, + "learning_rate": 1.636107295978402e-05, + "loss": 0.599323570728302, + "step": 4907 + }, + { + "epoch": 0.9068840872608008, + "grad_norm": 0.06920889765024185, + "learning_rate": 1.635953355532867e-05, + "loss": 0.5673415660858154, + "step": 4908 + }, + { + "epoch": 0.9070688639696967, + "grad_norm": 0.06042078137397766, + "learning_rate": 1.6357993897783783e-05, + "loss": 0.43269699811935425, + "step": 4909 + }, + { + "epoch": 0.9072536406785925, + "grad_norm": 0.07586175203323364, + "learning_rate": 1.635645398721063e-05, + "loss": 0.6240195631980896, + "step": 4910 + }, + { + "epoch": 0.9074384173874883, + "grad_norm": 0.08614122122526169, + "learning_rate": 1.635491382367049e-05, + "loss": 0.5619620084762573, + "step": 4911 + }, + { + "epoch": 0.9076231940963841, + "grad_norm": 0.06778106093406677, + "learning_rate": 1.635337340722467e-05, + "loss": 0.5462279319763184, + "step": 4912 + }, + { + "epoch": 0.90780797080528, + "grad_norm": 0.07216772437095642, + "learning_rate": 1.635183273793446e-05, + "loss": 0.5313920378684998, + "step": 4913 + }, + { + "epoch": 0.9079927475141758, + "grad_norm": 0.05886697396636009, + "learning_rate": 1.635029181586118e-05, + "loss": 0.46407654881477356, + "step": 4914 + }, + { + "epoch": 0.9081775242230716, + "grad_norm": 0.06765791028738022, + "learning_rate": 1.6348750641066154e-05, + "loss": 0.6518081426620483, + "step": 4915 + }, + { + "epoch": 0.9083623009319676, + "grad_norm": 0.06862279027700424, + "learning_rate": 1.6347209213610718e-05, + "loss": 0.5767778158187866, + "step": 4916 + }, + { + "epoch": 0.9085470776408634, + "grad_norm": 0.058729130774736404, + "learning_rate": 1.6345667533556206e-05, + "loss": 0.4166557490825653, + "step": 4917 + }, + { + "epoch": 0.9087318543497592, + "grad_norm": 0.061408836394548416, + "learning_rate": 1.6344125600963984e-05, + "loss": 0.4724981188774109, + "step": 4918 + }, + { + "epoch": 0.9089166310586551, + "grad_norm": 0.06421653181314468, + "learning_rate": 1.6342583415895412e-05, + "loss": 0.4762861728668213, + "step": 4919 + }, + { + "epoch": 0.9091014077675509, + "grad_norm": 0.08160565793514252, + "learning_rate": 1.6341040978411865e-05, + "loss": 0.8044451475143433, + "step": 4920 + }, + { + "epoch": 0.9092861844764467, + "grad_norm": 0.07565393298864365, + "learning_rate": 1.633949828857472e-05, + "loss": 0.7070301175117493, + "step": 4921 + }, + { + "epoch": 0.9094709611853425, + "grad_norm": 0.07401212304830551, + "learning_rate": 1.633795534644538e-05, + "loss": 0.7780914902687073, + "step": 4922 + }, + { + "epoch": 0.9096557378942385, + "grad_norm": 0.0788111537694931, + "learning_rate": 1.6336412152085248e-05, + "loss": 0.7249211072921753, + "step": 4923 + }, + { + "epoch": 0.9098405146031343, + "grad_norm": 0.04927157983183861, + "learning_rate": 1.6334868705555732e-05, + "loss": 0.35182616114616394, + "step": 4924 + }, + { + "epoch": 0.9100252913120301, + "grad_norm": 0.07681036740541458, + "learning_rate": 1.6333325006918267e-05, + "loss": 0.7379781603813171, + "step": 4925 + }, + { + "epoch": 0.910210068020926, + "grad_norm": 0.07329613715410233, + "learning_rate": 1.6331781056234277e-05, + "loss": 0.5301315188407898, + "step": 4926 + }, + { + "epoch": 0.9103948447298218, + "grad_norm": 0.07360527664422989, + "learning_rate": 1.633023685356521e-05, + "loss": 0.5996437668800354, + "step": 4927 + }, + { + "epoch": 0.9105796214387176, + "grad_norm": 0.06598999351263046, + "learning_rate": 1.632869239897252e-05, + "loss": 0.4719834327697754, + "step": 4928 + }, + { + "epoch": 0.9107643981476135, + "grad_norm": 0.0765930563211441, + "learning_rate": 1.6327147692517675e-05, + "loss": 0.565828800201416, + "step": 4929 + }, + { + "epoch": 0.9109491748565094, + "grad_norm": 0.07155296951532364, + "learning_rate": 1.6325602734262146e-05, + "loss": 0.7482982873916626, + "step": 4930 + }, + { + "epoch": 0.9111339515654052, + "grad_norm": 0.06430218368768692, + "learning_rate": 1.6324057524267418e-05, + "loss": 0.4136260449886322, + "step": 4931 + }, + { + "epoch": 0.911318728274301, + "grad_norm": 0.07828143984079361, + "learning_rate": 1.6322512062594987e-05, + "loss": 0.7757020592689514, + "step": 4932 + }, + { + "epoch": 0.9115035049831969, + "grad_norm": 0.08137954771518707, + "learning_rate": 1.6320966349306357e-05, + "loss": 0.7074471116065979, + "step": 4933 + }, + { + "epoch": 0.9116882816920927, + "grad_norm": 0.05411933362483978, + "learning_rate": 1.631942038446304e-05, + "loss": 0.39557674527168274, + "step": 4934 + }, + { + "epoch": 0.9118730584009885, + "grad_norm": 0.08513291925191879, + "learning_rate": 1.6317874168126567e-05, + "loss": 0.5569822788238525, + "step": 4935 + }, + { + "epoch": 0.9120578351098844, + "grad_norm": 0.08622615039348602, + "learning_rate": 1.6316327700358464e-05, + "loss": 0.5976544618606567, + "step": 4936 + }, + { + "epoch": 0.9122426118187802, + "grad_norm": 0.0760151743888855, + "learning_rate": 1.631478098122028e-05, + "loss": 0.6827889084815979, + "step": 4937 + }, + { + "epoch": 0.9124273885276761, + "grad_norm": 0.07461119443178177, + "learning_rate": 1.6313234010773573e-05, + "loss": 0.647698700428009, + "step": 4938 + }, + { + "epoch": 0.912612165236572, + "grad_norm": 0.0759672299027443, + "learning_rate": 1.63116867890799e-05, + "loss": 0.5611090064048767, + "step": 4939 + }, + { + "epoch": 0.9127969419454678, + "grad_norm": 0.08994041383266449, + "learning_rate": 1.631013931620084e-05, + "loss": 0.6958615779876709, + "step": 4940 + }, + { + "epoch": 0.9129817186543636, + "grad_norm": 0.062235210090875626, + "learning_rate": 1.6308591592197976e-05, + "loss": 0.5414263010025024, + "step": 4941 + }, + { + "epoch": 0.9131664953632594, + "grad_norm": 0.06273581087589264, + "learning_rate": 1.6307043617132907e-05, + "loss": 0.5574995279312134, + "step": 4942 + }, + { + "epoch": 0.9133512720721553, + "grad_norm": 0.08604917675256729, + "learning_rate": 1.6305495391067232e-05, + "loss": 0.7597737312316895, + "step": 4943 + }, + { + "epoch": 0.9135360487810511, + "grad_norm": 0.09719632565975189, + "learning_rate": 1.630394691406257e-05, + "loss": 0.9429509043693542, + "step": 4944 + }, + { + "epoch": 0.913720825489947, + "grad_norm": 0.06911340355873108, + "learning_rate": 1.6302398186180538e-05, + "loss": 0.47666671872138977, + "step": 4945 + }, + { + "epoch": 0.9139056021988429, + "grad_norm": 0.07012543827295303, + "learning_rate": 1.6300849207482783e-05, + "loss": 0.4357103705406189, + "step": 4946 + }, + { + "epoch": 0.9140903789077387, + "grad_norm": 0.06667513400316238, + "learning_rate": 1.629929997803094e-05, + "loss": 0.44440895318984985, + "step": 4947 + }, + { + "epoch": 0.9142751556166345, + "grad_norm": 0.07876349240541458, + "learning_rate": 1.6297750497886663e-05, + "loss": 0.6954589486122131, + "step": 4948 + }, + { + "epoch": 0.9144599323255304, + "grad_norm": 0.08191389590501785, + "learning_rate": 1.629620076711162e-05, + "loss": 0.6731683611869812, + "step": 4949 + }, + { + "epoch": 0.9146447090344262, + "grad_norm": 0.07889698445796967, + "learning_rate": 1.629465078576749e-05, + "loss": 0.6995881199836731, + "step": 4950 + }, + { + "epoch": 0.914829485743322, + "grad_norm": 0.0822673887014389, + "learning_rate": 1.6293100553915947e-05, + "loss": 0.6512246131896973, + "step": 4951 + }, + { + "epoch": 0.915014262452218, + "grad_norm": 0.06273180991411209, + "learning_rate": 1.629155007161869e-05, + "loss": 0.4994935989379883, + "step": 4952 + }, + { + "epoch": 0.9151990391611138, + "grad_norm": 0.055114369839429855, + "learning_rate": 1.6289999338937427e-05, + "loss": 0.37916818261146545, + "step": 4953 + }, + { + "epoch": 0.9153838158700096, + "grad_norm": 0.0721181109547615, + "learning_rate": 1.628844835593387e-05, + "loss": 0.6371509432792664, + "step": 4954 + }, + { + "epoch": 0.9155685925789054, + "grad_norm": 0.06248628720641136, + "learning_rate": 1.6286897122669737e-05, + "loss": 0.502285897731781, + "step": 4955 + }, + { + "epoch": 0.9157533692878013, + "grad_norm": 0.07796809077262878, + "learning_rate": 1.628534563920677e-05, + "loss": 0.7872824668884277, + "step": 4956 + }, + { + "epoch": 0.9159381459966971, + "grad_norm": 0.06692524999380112, + "learning_rate": 1.6283793905606715e-05, + "loss": 0.7130103707313538, + "step": 4957 + }, + { + "epoch": 0.9161229227055929, + "grad_norm": 0.07191796600818634, + "learning_rate": 1.6282241921931317e-05, + "loss": 0.708375096321106, + "step": 4958 + }, + { + "epoch": 0.9163076994144888, + "grad_norm": 0.07857691496610641, + "learning_rate": 1.6280689688242345e-05, + "loss": 0.6777384281158447, + "step": 4959 + }, + { + "epoch": 0.9164924761233847, + "grad_norm": 0.0750550627708435, + "learning_rate": 1.6279137204601577e-05, + "loss": 0.5284432768821716, + "step": 4960 + }, + { + "epoch": 0.9166772528322805, + "grad_norm": 0.06321000307798386, + "learning_rate": 1.627758447107079e-05, + "loss": 0.4843653738498688, + "step": 4961 + }, + { + "epoch": 0.9168620295411763, + "grad_norm": 0.07654401659965515, + "learning_rate": 1.6276031487711782e-05, + "loss": 0.6463335156440735, + "step": 4962 + }, + { + "epoch": 0.9170468062500722, + "grad_norm": 0.074707992374897, + "learning_rate": 1.627447825458636e-05, + "loss": 0.6347569227218628, + "step": 4963 + }, + { + "epoch": 0.917231582958968, + "grad_norm": 0.06458912789821625, + "learning_rate": 1.627292477175633e-05, + "loss": 0.5805029273033142, + "step": 4964 + }, + { + "epoch": 0.9174163596678638, + "grad_norm": 0.07568664848804474, + "learning_rate": 1.6271371039283517e-05, + "loss": 0.6437733173370361, + "step": 4965 + }, + { + "epoch": 0.9176011363767597, + "grad_norm": 0.05623985081911087, + "learning_rate": 1.6269817057229762e-05, + "loss": 0.41585254669189453, + "step": 4966 + }, + { + "epoch": 0.9177859130856556, + "grad_norm": 0.08120466023683548, + "learning_rate": 1.6268262825656903e-05, + "loss": 0.6529135704040527, + "step": 4967 + }, + { + "epoch": 0.9179706897945514, + "grad_norm": 0.08207594603300095, + "learning_rate": 1.6266708344626793e-05, + "loss": 0.7336923480033875, + "step": 4968 + }, + { + "epoch": 0.9181554665034473, + "grad_norm": 0.09322047978639603, + "learning_rate": 1.6265153614201296e-05, + "loss": 0.6285800337791443, + "step": 4969 + }, + { + "epoch": 0.9183402432123431, + "grad_norm": 0.08115655928850174, + "learning_rate": 1.6263598634442286e-05, + "loss": 0.7460980415344238, + "step": 4970 + }, + { + "epoch": 0.9185250199212389, + "grad_norm": 0.09479428082704544, + "learning_rate": 1.6262043405411648e-05, + "loss": 0.784386396408081, + "step": 4971 + }, + { + "epoch": 0.9187097966301347, + "grad_norm": 0.07809218764305115, + "learning_rate": 1.6260487927171276e-05, + "loss": 0.7730811834335327, + "step": 4972 + }, + { + "epoch": 0.9188945733390306, + "grad_norm": 0.07673768699169159, + "learning_rate": 1.625893219978307e-05, + "loss": 0.5420278906822205, + "step": 4973 + }, + { + "epoch": 0.9190793500479265, + "grad_norm": 0.08296892046928406, + "learning_rate": 1.625737622330894e-05, + "loss": 0.7337756752967834, + "step": 4974 + }, + { + "epoch": 0.9192641267568223, + "grad_norm": 0.07037664204835892, + "learning_rate": 1.6255819997810815e-05, + "loss": 0.545322835445404, + "step": 4975 + }, + { + "epoch": 0.9194489034657182, + "grad_norm": 0.07493076473474503, + "learning_rate": 1.625426352335063e-05, + "loss": 0.70954829454422, + "step": 4976 + }, + { + "epoch": 0.919633680174614, + "grad_norm": 0.07487804442644119, + "learning_rate": 1.625270679999032e-05, + "loss": 0.574744462966919, + "step": 4977 + }, + { + "epoch": 0.9198184568835098, + "grad_norm": 0.0659298375248909, + "learning_rate": 1.6251149827791843e-05, + "loss": 0.3916098475456238, + "step": 4978 + }, + { + "epoch": 0.9200032335924057, + "grad_norm": 0.07134214043617249, + "learning_rate": 1.624959260681716e-05, + "loss": 0.5763881206512451, + "step": 4979 + }, + { + "epoch": 0.9201880103013015, + "grad_norm": 0.06730277836322784, + "learning_rate": 1.6248035137128244e-05, + "loss": 0.5897141695022583, + "step": 4980 + }, + { + "epoch": 0.9203727870101973, + "grad_norm": 0.06790062040090561, + "learning_rate": 1.6246477418787077e-05, + "loss": 0.566528856754303, + "step": 4981 + }, + { + "epoch": 0.9205575637190933, + "grad_norm": 0.07552764564752579, + "learning_rate": 1.624491945185565e-05, + "loss": 0.5551050305366516, + "step": 4982 + }, + { + "epoch": 0.9207423404279891, + "grad_norm": 0.050118498504161835, + "learning_rate": 1.624336123639597e-05, + "loss": 0.3442586064338684, + "step": 4983 + }, + { + "epoch": 0.9209271171368849, + "grad_norm": 0.09067343175411224, + "learning_rate": 1.6241802772470043e-05, + "loss": 0.6247709393501282, + "step": 4984 + }, + { + "epoch": 0.9211118938457807, + "grad_norm": 0.08580661565065384, + "learning_rate": 1.6240244060139896e-05, + "loss": 0.7336589694023132, + "step": 4985 + }, + { + "epoch": 0.9212966705546766, + "grad_norm": 0.06558381766080856, + "learning_rate": 1.6238685099467557e-05, + "loss": 0.5565704107284546, + "step": 4986 + }, + { + "epoch": 0.9214814472635724, + "grad_norm": 0.07769566029310226, + "learning_rate": 1.6237125890515068e-05, + "loss": 0.5454164743423462, + "step": 4987 + }, + { + "epoch": 0.9216662239724682, + "grad_norm": 0.06431884318590164, + "learning_rate": 1.6235566433344483e-05, + "loss": 0.47945302724838257, + "step": 4988 + }, + { + "epoch": 0.9218510006813642, + "grad_norm": 0.0770520567893982, + "learning_rate": 1.6234006728017863e-05, + "loss": 0.6441652178764343, + "step": 4989 + }, + { + "epoch": 0.92203577739026, + "grad_norm": 0.07232240587472916, + "learning_rate": 1.6232446774597278e-05, + "loss": 0.6215723156929016, + "step": 4990 + }, + { + "epoch": 0.9222205540991558, + "grad_norm": 0.0755547434091568, + "learning_rate": 1.6230886573144812e-05, + "loss": 0.6493498086929321, + "step": 4991 + }, + { + "epoch": 0.9224053308080516, + "grad_norm": 0.06353598833084106, + "learning_rate": 1.6229326123722554e-05, + "loss": 0.47383472323417664, + "step": 4992 + }, + { + "epoch": 0.9225901075169475, + "grad_norm": 0.06302771717309952, + "learning_rate": 1.6227765426392603e-05, + "loss": 0.5499935746192932, + "step": 4993 + }, + { + "epoch": 0.9227748842258433, + "grad_norm": 0.0657678171992302, + "learning_rate": 1.6226204481217074e-05, + "loss": 0.4634450078010559, + "step": 4994 + }, + { + "epoch": 0.9229596609347391, + "grad_norm": 0.07355940341949463, + "learning_rate": 1.622464328825809e-05, + "loss": 0.4975418746471405, + "step": 4995 + }, + { + "epoch": 0.9231444376436351, + "grad_norm": 0.05299646034836769, + "learning_rate": 1.622308184757777e-05, + "loss": 0.4634086787700653, + "step": 4996 + }, + { + "epoch": 0.9233292143525309, + "grad_norm": 0.05826892331242561, + "learning_rate": 1.6221520159238266e-05, + "loss": 0.5520720481872559, + "step": 4997 + }, + { + "epoch": 0.9235139910614267, + "grad_norm": 0.08895806223154068, + "learning_rate": 1.6219958223301723e-05, + "loss": 0.710663378238678, + "step": 4998 + }, + { + "epoch": 0.9236987677703226, + "grad_norm": 0.05497613176703453, + "learning_rate": 1.6218396039830304e-05, + "loss": 0.3812207877635956, + "step": 4999 + }, + { + "epoch": 0.9238835444792184, + "grad_norm": 0.06913777440786362, + "learning_rate": 1.6216833608886175e-05, + "loss": 0.5585352778434753, + "step": 5000 + }, + { + "epoch": 0.9238835444792184, + "eval_loss": 0.6308066844940186, + "eval_runtime": 157.2644, + "eval_samples_per_second": 115.913, + "eval_steps_per_second": 14.492, + "step": 5000 + }, + { + "epoch": 0.9240683211881142, + "grad_norm": 0.06755195558071136, + "learning_rate": 1.621527093053152e-05, + "loss": 0.5701950192451477, + "step": 5001 + }, + { + "epoch": 0.92425309789701, + "grad_norm": 0.08724575489759445, + "learning_rate": 1.6213708004828527e-05, + "loss": 0.7150865793228149, + "step": 5002 + }, + { + "epoch": 0.9244378746059059, + "grad_norm": 0.08624764531850815, + "learning_rate": 1.6212144831839396e-05, + "loss": 0.6906049847602844, + "step": 5003 + }, + { + "epoch": 0.9246226513148018, + "grad_norm": 0.06757311522960663, + "learning_rate": 1.6210581411626335e-05, + "loss": 0.5546698570251465, + "step": 5004 + }, + { + "epoch": 0.9248074280236976, + "grad_norm": 0.0782892256975174, + "learning_rate": 1.6209017744251564e-05, + "loss": 0.8180869221687317, + "step": 5005 + }, + { + "epoch": 0.9249922047325935, + "grad_norm": 0.06884290277957916, + "learning_rate": 1.6207453829777312e-05, + "loss": 0.6495415568351746, + "step": 5006 + }, + { + "epoch": 0.9251769814414893, + "grad_norm": 0.06144087016582489, + "learning_rate": 1.620588966826582e-05, + "loss": 0.5278459787368774, + "step": 5007 + }, + { + "epoch": 0.9253617581503851, + "grad_norm": 0.07224945724010468, + "learning_rate": 1.6204325259779335e-05, + "loss": 0.6224374771118164, + "step": 5008 + }, + { + "epoch": 0.925546534859281, + "grad_norm": 0.08506694436073303, + "learning_rate": 1.6202760604380116e-05, + "loss": 0.8050345182418823, + "step": 5009 + }, + { + "epoch": 0.9257313115681768, + "grad_norm": 0.08770319819450378, + "learning_rate": 1.620119570213043e-05, + "loss": 0.7414484620094299, + "step": 5010 + }, + { + "epoch": 0.9259160882770727, + "grad_norm": 0.09567960351705551, + "learning_rate": 1.6199630553092557e-05, + "loss": 0.6663733720779419, + "step": 5011 + }, + { + "epoch": 0.9261008649859686, + "grad_norm": 0.06947190314531326, + "learning_rate": 1.6198065157328785e-05, + "loss": 0.5598324537277222, + "step": 5012 + }, + { + "epoch": 0.9262856416948644, + "grad_norm": 0.04812987893819809, + "learning_rate": 1.6196499514901405e-05, + "loss": 0.37595170736312866, + "step": 5013 + }, + { + "epoch": 0.9264704184037602, + "grad_norm": 0.07149752974510193, + "learning_rate": 1.6194933625872736e-05, + "loss": 0.6893596649169922, + "step": 5014 + }, + { + "epoch": 0.926655195112656, + "grad_norm": 0.06001589819788933, + "learning_rate": 1.619336749030509e-05, + "loss": 0.3789342939853668, + "step": 5015 + }, + { + "epoch": 0.9268399718215519, + "grad_norm": 0.07331600040197372, + "learning_rate": 1.619180110826079e-05, + "loss": 0.7051414251327515, + "step": 5016 + }, + { + "epoch": 0.9270247485304477, + "grad_norm": 0.06992633640766144, + "learning_rate": 1.619023447980218e-05, + "loss": 0.5805511474609375, + "step": 5017 + }, + { + "epoch": 0.9272095252393436, + "grad_norm": 0.08719083666801453, + "learning_rate": 1.6188667604991608e-05, + "loss": 0.7659269571304321, + "step": 5018 + }, + { + "epoch": 0.9273943019482395, + "grad_norm": 0.04258688911795616, + "learning_rate": 1.6187100483891423e-05, + "loss": 0.2700992822647095, + "step": 5019 + }, + { + "epoch": 0.9275790786571353, + "grad_norm": 0.0832752212882042, + "learning_rate": 1.6185533116563998e-05, + "loss": 0.7233130931854248, + "step": 5020 + }, + { + "epoch": 0.9277638553660311, + "grad_norm": 0.06890590488910675, + "learning_rate": 1.6183965503071706e-05, + "loss": 0.5845312476158142, + "step": 5021 + }, + { + "epoch": 0.927948632074927, + "grad_norm": 0.062875896692276, + "learning_rate": 1.6182397643476935e-05, + "loss": 0.529586672782898, + "step": 5022 + }, + { + "epoch": 0.9281334087838228, + "grad_norm": 0.05886458605527878, + "learning_rate": 1.6180829537842078e-05, + "loss": 0.4220605492591858, + "step": 5023 + }, + { + "epoch": 0.9283181854927186, + "grad_norm": 0.0718119889497757, + "learning_rate": 1.6179261186229544e-05, + "loss": 0.6670949459075928, + "step": 5024 + }, + { + "epoch": 0.9285029622016144, + "grad_norm": 0.07199763506650925, + "learning_rate": 1.6177692588701746e-05, + "loss": 0.5530394911766052, + "step": 5025 + }, + { + "epoch": 0.9286877389105104, + "grad_norm": 0.0965399518609047, + "learning_rate": 1.6176123745321114e-05, + "loss": 0.8364000916481018, + "step": 5026 + }, + { + "epoch": 0.9288725156194062, + "grad_norm": 0.05655921995639801, + "learning_rate": 1.6174554656150078e-05, + "loss": 0.4526393711566925, + "step": 5027 + }, + { + "epoch": 0.929057292328302, + "grad_norm": 0.07491301745176315, + "learning_rate": 1.6172985321251084e-05, + "loss": 0.5258397459983826, + "step": 5028 + }, + { + "epoch": 0.9292420690371979, + "grad_norm": 0.08294566720724106, + "learning_rate": 1.6171415740686585e-05, + "loss": 0.5293903350830078, + "step": 5029 + }, + { + "epoch": 0.9294268457460937, + "grad_norm": 0.08152367919683456, + "learning_rate": 1.616984591451905e-05, + "loss": 0.7470456957817078, + "step": 5030 + }, + { + "epoch": 0.9296116224549895, + "grad_norm": 0.06945068389177322, + "learning_rate": 1.6168275842810946e-05, + "loss": 0.48202794790267944, + "step": 5031 + }, + { + "epoch": 0.9297963991638853, + "grad_norm": 0.06779973953962326, + "learning_rate": 1.616670552562477e-05, + "loss": 0.552852988243103, + "step": 5032 + }, + { + "epoch": 0.9299811758727813, + "grad_norm": 0.06413020193576813, + "learning_rate": 1.6165134963023e-05, + "loss": 0.6048864722251892, + "step": 5033 + }, + { + "epoch": 0.9301659525816771, + "grad_norm": 0.09240268170833588, + "learning_rate": 1.6163564155068148e-05, + "loss": 0.9151281714439392, + "step": 5034 + }, + { + "epoch": 0.9303507292905729, + "grad_norm": 0.08496936410665512, + "learning_rate": 1.6161993101822728e-05, + "loss": 0.7461206316947937, + "step": 5035 + }, + { + "epoch": 0.9305355059994688, + "grad_norm": 0.06811504065990448, + "learning_rate": 1.616042180334926e-05, + "loss": 0.6777445077896118, + "step": 5036 + }, + { + "epoch": 0.9307202827083646, + "grad_norm": 0.07164547592401505, + "learning_rate": 1.6158850259710278e-05, + "loss": 0.4562653601169586, + "step": 5037 + }, + { + "epoch": 0.9309050594172604, + "grad_norm": 0.07087064534425735, + "learning_rate": 1.615727847096832e-05, + "loss": 0.5231823921203613, + "step": 5038 + }, + { + "epoch": 0.9310898361261563, + "grad_norm": 0.07362006604671478, + "learning_rate": 1.615570643718595e-05, + "loss": 0.5995796918869019, + "step": 5039 + }, + { + "epoch": 0.9312746128350522, + "grad_norm": 0.08397943526506424, + "learning_rate": 1.6154134158425717e-05, + "loss": 0.735202431678772, + "step": 5040 + }, + { + "epoch": 0.931459389543948, + "grad_norm": 0.09518817067146301, + "learning_rate": 1.6152561634750202e-05, + "loss": 0.7966129183769226, + "step": 5041 + }, + { + "epoch": 0.9316441662528439, + "grad_norm": 0.07340359687805176, + "learning_rate": 1.6150988866221983e-05, + "loss": 0.5222837924957275, + "step": 5042 + }, + { + "epoch": 0.9318289429617397, + "grad_norm": 0.07132686674594879, + "learning_rate": 1.6149415852903647e-05, + "loss": 0.589817225933075, + "step": 5043 + }, + { + "epoch": 0.9320137196706355, + "grad_norm": 0.07675815373659134, + "learning_rate": 1.61478425948578e-05, + "loss": 0.6767191290855408, + "step": 5044 + }, + { + "epoch": 0.9321984963795313, + "grad_norm": 0.05632895603775978, + "learning_rate": 1.6146269092147054e-05, + "loss": 0.4868313670158386, + "step": 5045 + }, + { + "epoch": 0.9323832730884272, + "grad_norm": 0.08111368119716644, + "learning_rate": 1.6144695344834026e-05, + "loss": 0.6972765922546387, + "step": 5046 + }, + { + "epoch": 0.932568049797323, + "grad_norm": 0.09418383985757828, + "learning_rate": 1.614312135298135e-05, + "loss": 0.786720871925354, + "step": 5047 + }, + { + "epoch": 0.9327528265062189, + "grad_norm": 0.07354355603456497, + "learning_rate": 1.6141547116651663e-05, + "loss": 0.9190953969955444, + "step": 5048 + }, + { + "epoch": 0.9329376032151148, + "grad_norm": 0.06468519568443298, + "learning_rate": 1.613997263590761e-05, + "loss": 0.6413533091545105, + "step": 5049 + }, + { + "epoch": 0.9331223799240106, + "grad_norm": 0.06637117266654968, + "learning_rate": 1.613839791081186e-05, + "loss": 0.5141046047210693, + "step": 5050 + }, + { + "epoch": 0.9333071566329064, + "grad_norm": 0.08465000241994858, + "learning_rate": 1.6136822941427076e-05, + "loss": 0.7368367910385132, + "step": 5051 + }, + { + "epoch": 0.9334919333418022, + "grad_norm": 0.05120861530303955, + "learning_rate": 1.6135247727815943e-05, + "loss": 0.5021374225616455, + "step": 5052 + }, + { + "epoch": 0.9336767100506981, + "grad_norm": 0.06843441724777222, + "learning_rate": 1.6133672270041142e-05, + "loss": 0.47936877608299255, + "step": 5053 + }, + { + "epoch": 0.9338614867595939, + "grad_norm": 0.07375901937484741, + "learning_rate": 1.613209656816537e-05, + "loss": 0.6750283241271973, + "step": 5054 + }, + { + "epoch": 0.9340462634684898, + "grad_norm": 0.057786088436841965, + "learning_rate": 1.6130520622251347e-05, + "loss": 0.5178053379058838, + "step": 5055 + }, + { + "epoch": 0.9342310401773857, + "grad_norm": 0.04772485792636871, + "learning_rate": 1.612894443236178e-05, + "loss": 0.38083186745643616, + "step": 5056 + }, + { + "epoch": 0.9344158168862815, + "grad_norm": 0.08076415956020355, + "learning_rate": 1.6127367998559397e-05, + "loss": 0.6789405345916748, + "step": 5057 + }, + { + "epoch": 0.9346005935951773, + "grad_norm": 0.06267094612121582, + "learning_rate": 1.612579132090694e-05, + "loss": 0.6396245360374451, + "step": 5058 + }, + { + "epoch": 0.9347853703040732, + "grad_norm": 0.06518642604351044, + "learning_rate": 1.6124214399467154e-05, + "loss": 0.5198378562927246, + "step": 5059 + }, + { + "epoch": 0.934970147012969, + "grad_norm": 0.07412681728601456, + "learning_rate": 1.61226372343028e-05, + "loss": 0.6395267248153687, + "step": 5060 + }, + { + "epoch": 0.9351549237218648, + "grad_norm": 0.07623349875211716, + "learning_rate": 1.612105982547663e-05, + "loss": 0.6735665202140808, + "step": 5061 + }, + { + "epoch": 0.9353397004307608, + "grad_norm": 0.08154106885194778, + "learning_rate": 1.6119482173051434e-05, + "loss": 0.6256721019744873, + "step": 5062 + }, + { + "epoch": 0.9355244771396566, + "grad_norm": 0.06194007769227028, + "learning_rate": 1.6117904277089994e-05, + "loss": 0.6189951300621033, + "step": 5063 + }, + { + "epoch": 0.9357092538485524, + "grad_norm": 0.05380154773592949, + "learning_rate": 1.61163261376551e-05, + "loss": 0.3931328058242798, + "step": 5064 + }, + { + "epoch": 0.9358940305574482, + "grad_norm": 0.11030847579240799, + "learning_rate": 1.6114747754809564e-05, + "loss": 0.8911649584770203, + "step": 5065 + }, + { + "epoch": 0.9360788072663441, + "grad_norm": 0.06107322871685028, + "learning_rate": 1.61131691286162e-05, + "loss": 0.5231435894966125, + "step": 5066 + }, + { + "epoch": 0.9362635839752399, + "grad_norm": 0.05898779630661011, + "learning_rate": 1.6111590259137827e-05, + "loss": 0.478934109210968, + "step": 5067 + }, + { + "epoch": 0.9364483606841357, + "grad_norm": 0.06787285208702087, + "learning_rate": 1.6110011146437282e-05, + "loss": 0.5066587924957275, + "step": 5068 + }, + { + "epoch": 0.9366331373930316, + "grad_norm": 0.08234155178070068, + "learning_rate": 1.6108431790577413e-05, + "loss": 0.6809791922569275, + "step": 5069 + }, + { + "epoch": 0.9368179141019275, + "grad_norm": 0.07514581084251404, + "learning_rate": 1.6106852191621067e-05, + "loss": 0.7342582941055298, + "step": 5070 + }, + { + "epoch": 0.9370026908108233, + "grad_norm": 0.06335484981536865, + "learning_rate": 1.6105272349631107e-05, + "loss": 0.5712248682975769, + "step": 5071 + }, + { + "epoch": 0.9371874675197192, + "grad_norm": 0.0602981261909008, + "learning_rate": 1.6103692264670414e-05, + "loss": 0.5489089488983154, + "step": 5072 + }, + { + "epoch": 0.937372244228615, + "grad_norm": 0.08747103810310364, + "learning_rate": 1.6102111936801865e-05, + "loss": 0.6118226647377014, + "step": 5073 + }, + { + "epoch": 0.9375570209375108, + "grad_norm": 0.08617452532052994, + "learning_rate": 1.610053136608835e-05, + "loss": 0.6262293457984924, + "step": 5074 + }, + { + "epoch": 0.9377417976464066, + "grad_norm": 0.07134665548801422, + "learning_rate": 1.6098950552592768e-05, + "loss": 0.4300174117088318, + "step": 5075 + }, + { + "epoch": 0.9379265743553025, + "grad_norm": 0.0613059476017952, + "learning_rate": 1.609736949637804e-05, + "loss": 0.5369535088539124, + "step": 5076 + }, + { + "epoch": 0.9381113510641984, + "grad_norm": 0.06910678744316101, + "learning_rate": 1.609578819750708e-05, + "loss": 0.5396366119384766, + "step": 5077 + }, + { + "epoch": 0.9382961277730942, + "grad_norm": 0.07462180405855179, + "learning_rate": 1.6094206656042822e-05, + "loss": 0.6210793852806091, + "step": 5078 + }, + { + "epoch": 0.9384809044819901, + "grad_norm": 0.07486458867788315, + "learning_rate": 1.6092624872048207e-05, + "loss": 0.5217027068138123, + "step": 5079 + }, + { + "epoch": 0.9386656811908859, + "grad_norm": 0.08017706125974655, + "learning_rate": 1.609104284558618e-05, + "loss": 0.7342339754104614, + "step": 5080 + }, + { + "epoch": 0.9388504578997817, + "grad_norm": 0.08369428664445877, + "learning_rate": 1.608946057671971e-05, + "loss": 0.7358474731445312, + "step": 5081 + }, + { + "epoch": 0.9390352346086775, + "grad_norm": 0.11979498714208603, + "learning_rate": 1.6087878065511756e-05, + "loss": 0.5417165160179138, + "step": 5082 + }, + { + "epoch": 0.9392200113175734, + "grad_norm": 0.06309329718351364, + "learning_rate": 1.6086295312025303e-05, + "loss": 0.546745777130127, + "step": 5083 + }, + { + "epoch": 0.9394047880264693, + "grad_norm": 0.0681857243180275, + "learning_rate": 1.6084712316323338e-05, + "loss": 0.5794031023979187, + "step": 5084 + }, + { + "epoch": 0.9395895647353651, + "grad_norm": 0.07119515538215637, + "learning_rate": 1.608312907846886e-05, + "loss": 0.5442270636558533, + "step": 5085 + }, + { + "epoch": 0.939774341444261, + "grad_norm": 0.05828424543142319, + "learning_rate": 1.6081545598524873e-05, + "loss": 0.3991624414920807, + "step": 5086 + }, + { + "epoch": 0.9399591181531568, + "grad_norm": 0.0791083499789238, + "learning_rate": 1.6079961876554402e-05, + "loss": 0.7310355305671692, + "step": 5087 + }, + { + "epoch": 0.9401438948620526, + "grad_norm": 0.07687226682901382, + "learning_rate": 1.6078377912620466e-05, + "loss": 0.6118609309196472, + "step": 5088 + }, + { + "epoch": 0.9403286715709485, + "grad_norm": 0.06193707883358002, + "learning_rate": 1.607679370678611e-05, + "loss": 0.4281879663467407, + "step": 5089 + }, + { + "epoch": 0.9405134482798443, + "grad_norm": 0.08414844423532486, + "learning_rate": 1.6075209259114375e-05, + "loss": 0.693047285079956, + "step": 5090 + }, + { + "epoch": 0.9406982249887401, + "grad_norm": 0.06623263657093048, + "learning_rate": 1.607362456966832e-05, + "loss": 0.5509737133979797, + "step": 5091 + }, + { + "epoch": 0.9408830016976361, + "grad_norm": 0.07156947255134583, + "learning_rate": 1.6072039638511004e-05, + "loss": 0.6185341477394104, + "step": 5092 + }, + { + "epoch": 0.9410677784065319, + "grad_norm": 0.07968199253082275, + "learning_rate": 1.6070454465705513e-05, + "loss": 0.5912548899650574, + "step": 5093 + }, + { + "epoch": 0.9412525551154277, + "grad_norm": 0.06254375725984573, + "learning_rate": 1.6068869051314923e-05, + "loss": 0.5338126420974731, + "step": 5094 + }, + { + "epoch": 0.9414373318243235, + "grad_norm": 0.07212621718645096, + "learning_rate": 1.606728339540233e-05, + "loss": 0.6360775232315063, + "step": 5095 + }, + { + "epoch": 0.9416221085332194, + "grad_norm": 0.05787743255496025, + "learning_rate": 1.6065697498030842e-05, + "loss": 0.4220399558544159, + "step": 5096 + }, + { + "epoch": 0.9418068852421152, + "grad_norm": 0.06521794199943542, + "learning_rate": 1.606411135926357e-05, + "loss": 0.5160315036773682, + "step": 5097 + }, + { + "epoch": 0.941991661951011, + "grad_norm": 0.08517172932624817, + "learning_rate": 1.6062524979163643e-05, + "loss": 0.705226719379425, + "step": 5098 + }, + { + "epoch": 0.942176438659907, + "grad_norm": 0.09427408128976822, + "learning_rate": 1.6060938357794182e-05, + "loss": 0.6183182001113892, + "step": 5099 + }, + { + "epoch": 0.9423612153688028, + "grad_norm": 0.07305286824703217, + "learning_rate": 1.605935149521834e-05, + "loss": 0.5855145454406738, + "step": 5100 + }, + { + "epoch": 0.9425459920776986, + "grad_norm": 0.07183791697025299, + "learning_rate": 1.6057764391499267e-05, + "loss": 0.6883285045623779, + "step": 5101 + }, + { + "epoch": 0.9427307687865945, + "grad_norm": 0.08010408282279968, + "learning_rate": 1.6056177046700122e-05, + "loss": 0.6888131499290466, + "step": 5102 + }, + { + "epoch": 0.9429155454954903, + "grad_norm": 0.06611838191747665, + "learning_rate": 1.6054589460884078e-05, + "loss": 0.5510590672492981, + "step": 5103 + }, + { + "epoch": 0.9431003222043861, + "grad_norm": 0.08526670187711716, + "learning_rate": 1.6053001634114316e-05, + "loss": 0.6659255623817444, + "step": 5104 + }, + { + "epoch": 0.9432850989132819, + "grad_norm": 0.08515694737434387, + "learning_rate": 1.6051413566454025e-05, + "loss": 0.8655163645744324, + "step": 5105 + }, + { + "epoch": 0.9434698756221779, + "grad_norm": 0.0811673104763031, + "learning_rate": 1.6049825257966407e-05, + "loss": 0.7180903553962708, + "step": 5106 + }, + { + "epoch": 0.9436546523310737, + "grad_norm": 0.07173803448677063, + "learning_rate": 1.6048236708714674e-05, + "loss": 0.7913423776626587, + "step": 5107 + }, + { + "epoch": 0.9438394290399695, + "grad_norm": 0.05145305395126343, + "learning_rate": 1.604664791876204e-05, + "loss": 0.42830294370651245, + "step": 5108 + }, + { + "epoch": 0.9440242057488654, + "grad_norm": 0.07102995365858078, + "learning_rate": 1.6045058888171737e-05, + "loss": 0.5304608941078186, + "step": 5109 + }, + { + "epoch": 0.9442089824577612, + "grad_norm": 0.07459255307912827, + "learning_rate": 1.6043469617007e-05, + "loss": 0.6177938580513, + "step": 5110 + }, + { + "epoch": 0.944393759166657, + "grad_norm": 0.06430578976869583, + "learning_rate": 1.6041880105331083e-05, + "loss": 0.40000367164611816, + "step": 5111 + }, + { + "epoch": 0.9445785358755528, + "grad_norm": 0.054752789437770844, + "learning_rate": 1.604029035320724e-05, + "loss": 0.4391731917858124, + "step": 5112 + }, + { + "epoch": 0.9447633125844487, + "grad_norm": 0.06837918609380722, + "learning_rate": 1.603870036069874e-05, + "loss": 0.5762681365013123, + "step": 5113 + }, + { + "epoch": 0.9449480892933446, + "grad_norm": 0.07113198935985565, + "learning_rate": 1.603711012786886e-05, + "loss": 0.5563413500785828, + "step": 5114 + }, + { + "epoch": 0.9451328660022404, + "grad_norm": 0.07704070955514908, + "learning_rate": 1.6035519654780878e-05, + "loss": 0.7374782562255859, + "step": 5115 + }, + { + "epoch": 0.9453176427111363, + "grad_norm": 0.08055262267589569, + "learning_rate": 1.60339289414981e-05, + "loss": 0.6849924325942993, + "step": 5116 + }, + { + "epoch": 0.9455024194200321, + "grad_norm": 0.06334561854600906, + "learning_rate": 1.6032337988083828e-05, + "loss": 0.3979511857032776, + "step": 5117 + }, + { + "epoch": 0.9456871961289279, + "grad_norm": 0.057456646114587784, + "learning_rate": 1.603074679460138e-05, + "loss": 0.45759090781211853, + "step": 5118 + }, + { + "epoch": 0.9458719728378238, + "grad_norm": 0.057219650596380234, + "learning_rate": 1.6029155361114068e-05, + "loss": 0.4451177418231964, + "step": 5119 + }, + { + "epoch": 0.9460567495467196, + "grad_norm": 0.08612550050020218, + "learning_rate": 1.6027563687685244e-05, + "loss": 0.6650874614715576, + "step": 5120 + }, + { + "epoch": 0.9462415262556155, + "grad_norm": 0.0630384311079979, + "learning_rate": 1.6025971774378238e-05, + "loss": 0.46417829394340515, + "step": 5121 + }, + { + "epoch": 0.9464263029645114, + "grad_norm": 0.09216060489416122, + "learning_rate": 1.6024379621256415e-05, + "loss": 0.7036070227622986, + "step": 5122 + }, + { + "epoch": 0.9466110796734072, + "grad_norm": 0.08361869305372238, + "learning_rate": 1.6022787228383125e-05, + "loss": 0.7870634198188782, + "step": 5123 + }, + { + "epoch": 0.946795856382303, + "grad_norm": 0.06421814113855362, + "learning_rate": 1.6021194595821747e-05, + "loss": 0.4977126717567444, + "step": 5124 + }, + { + "epoch": 0.9469806330911988, + "grad_norm": 0.07801469415426254, + "learning_rate": 1.6019601723635664e-05, + "loss": 0.6157658100128174, + "step": 5125 + }, + { + "epoch": 0.9471654098000947, + "grad_norm": 0.06259127706289291, + "learning_rate": 1.6018008611888263e-05, + "loss": 0.5550975799560547, + "step": 5126 + }, + { + "epoch": 0.9473501865089905, + "grad_norm": 0.06924456357955933, + "learning_rate": 1.6016415260642947e-05, + "loss": 0.5265330672264099, + "step": 5127 + }, + { + "epoch": 0.9475349632178864, + "grad_norm": 0.0692691057920456, + "learning_rate": 1.601482166996313e-05, + "loss": 0.5791558027267456, + "step": 5128 + }, + { + "epoch": 0.9477197399267823, + "grad_norm": 0.08452518284320831, + "learning_rate": 1.6013227839912224e-05, + "loss": 0.7562264800071716, + "step": 5129 + }, + { + "epoch": 0.9479045166356781, + "grad_norm": 0.07634246349334717, + "learning_rate": 1.6011633770553666e-05, + "loss": 0.7103491425514221, + "step": 5130 + }, + { + "epoch": 0.9480892933445739, + "grad_norm": 0.08022932708263397, + "learning_rate": 1.6010039461950893e-05, + "loss": 0.5910216569900513, + "step": 5131 + }, + { + "epoch": 0.9482740700534698, + "grad_norm": 0.0745372623205185, + "learning_rate": 1.600844491416735e-05, + "loss": 0.5447332859039307, + "step": 5132 + }, + { + "epoch": 0.9484588467623656, + "grad_norm": 0.07081407308578491, + "learning_rate": 1.6006850127266498e-05, + "loss": 0.4733290374279022, + "step": 5133 + }, + { + "epoch": 0.9486436234712614, + "grad_norm": 0.06008889898657799, + "learning_rate": 1.6005255101311803e-05, + "loss": 0.4657738208770752, + "step": 5134 + }, + { + "epoch": 0.9488284001801572, + "grad_norm": 0.06958460807800293, + "learning_rate": 1.6003659836366744e-05, + "loss": 0.5668250322341919, + "step": 5135 + }, + { + "epoch": 0.9490131768890532, + "grad_norm": 0.05939478427171707, + "learning_rate": 1.6002064332494806e-05, + "loss": 0.5383512377738953, + "step": 5136 + }, + { + "epoch": 0.949197953597949, + "grad_norm": 0.0792398750782013, + "learning_rate": 1.6000468589759486e-05, + "loss": 0.5559828877449036, + "step": 5137 + }, + { + "epoch": 0.9493827303068448, + "grad_norm": 0.07488033920526505, + "learning_rate": 1.599887260822429e-05, + "loss": 0.5714380145072937, + "step": 5138 + }, + { + "epoch": 0.9495675070157407, + "grad_norm": 0.08198749274015427, + "learning_rate": 1.5997276387952733e-05, + "loss": 0.6661936640739441, + "step": 5139 + }, + { + "epoch": 0.9497522837246365, + "grad_norm": 0.06090731918811798, + "learning_rate": 1.5995679929008338e-05, + "loss": 0.5148775577545166, + "step": 5140 + }, + { + "epoch": 0.9499370604335323, + "grad_norm": 0.0652225986123085, + "learning_rate": 1.599408323145464e-05, + "loss": 0.4911291301250458, + "step": 5141 + }, + { + "epoch": 0.9501218371424281, + "grad_norm": 0.07245299220085144, + "learning_rate": 1.599248629535518e-05, + "loss": 0.5561530590057373, + "step": 5142 + }, + { + "epoch": 0.9503066138513241, + "grad_norm": 0.06982336938381195, + "learning_rate": 1.5990889120773515e-05, + "loss": 0.5131959915161133, + "step": 5143 + }, + { + "epoch": 0.9504913905602199, + "grad_norm": 0.059342216700315475, + "learning_rate": 1.5989291707773204e-05, + "loss": 0.5701693296432495, + "step": 5144 + }, + { + "epoch": 0.9506761672691157, + "grad_norm": 0.0620679147541523, + "learning_rate": 1.5987694056417825e-05, + "loss": 0.46893739700317383, + "step": 5145 + }, + { + "epoch": 0.9508609439780116, + "grad_norm": 0.08065487444400787, + "learning_rate": 1.5986096166770953e-05, + "loss": 0.610145628452301, + "step": 5146 + }, + { + "epoch": 0.9510457206869074, + "grad_norm": 0.0846034586429596, + "learning_rate": 1.5984498038896184e-05, + "loss": 0.7594650983810425, + "step": 5147 + }, + { + "epoch": 0.9512304973958032, + "grad_norm": 0.05852271243929863, + "learning_rate": 1.5982899672857115e-05, + "loss": 0.4893558621406555, + "step": 5148 + }, + { + "epoch": 0.9514152741046991, + "grad_norm": 0.06891465187072754, + "learning_rate": 1.598130106871736e-05, + "loss": 0.6074330806732178, + "step": 5149 + }, + { + "epoch": 0.951600050813595, + "grad_norm": 0.05314657464623451, + "learning_rate": 1.5979702226540528e-05, + "loss": 0.4676016569137573, + "step": 5150 + }, + { + "epoch": 0.9517848275224908, + "grad_norm": 0.0809001624584198, + "learning_rate": 1.597810314639026e-05, + "loss": 0.5725110173225403, + "step": 5151 + }, + { + "epoch": 0.9519696042313867, + "grad_norm": 0.0798337310552597, + "learning_rate": 1.5976503828330192e-05, + "loss": 0.5859540700912476, + "step": 5152 + }, + { + "epoch": 0.9521543809402825, + "grad_norm": 0.07935647666454315, + "learning_rate": 1.597490427242397e-05, + "loss": 0.6600157022476196, + "step": 5153 + }, + { + "epoch": 0.9523391576491783, + "grad_norm": 0.06016004458069801, + "learning_rate": 1.5973304478735245e-05, + "loss": 0.42700162529945374, + "step": 5154 + }, + { + "epoch": 0.9525239343580741, + "grad_norm": 0.08889743685722351, + "learning_rate": 1.5971704447327697e-05, + "loss": 0.7476317882537842, + "step": 5155 + }, + { + "epoch": 0.95270871106697, + "grad_norm": 0.06792720407247543, + "learning_rate": 1.5970104178264988e-05, + "loss": 0.5295884013175964, + "step": 5156 + }, + { + "epoch": 0.9528934877758658, + "grad_norm": 0.05554249882698059, + "learning_rate": 1.5968503671610814e-05, + "loss": 0.3461635708808899, + "step": 5157 + }, + { + "epoch": 0.9530782644847617, + "grad_norm": 0.05836229398846626, + "learning_rate": 1.596690292742887e-05, + "loss": 0.49273398518562317, + "step": 5158 + }, + { + "epoch": 0.9532630411936576, + "grad_norm": 0.06659424304962158, + "learning_rate": 1.5965301945782854e-05, + "loss": 0.5845066905021667, + "step": 5159 + }, + { + "epoch": 0.9534478179025534, + "grad_norm": 0.07461929321289062, + "learning_rate": 1.5963700726736485e-05, + "loss": 0.7000290155410767, + "step": 5160 + }, + { + "epoch": 0.9536325946114492, + "grad_norm": 0.08027936518192291, + "learning_rate": 1.5962099270353484e-05, + "loss": 0.734795331954956, + "step": 5161 + }, + { + "epoch": 0.953817371320345, + "grad_norm": 0.05215161293745041, + "learning_rate": 1.5960497576697584e-05, + "loss": 0.36611029505729675, + "step": 5162 + }, + { + "epoch": 0.9540021480292409, + "grad_norm": 0.06992398202419281, + "learning_rate": 1.5958895645832533e-05, + "loss": 0.5141623020172119, + "step": 5163 + }, + { + "epoch": 0.9541869247381367, + "grad_norm": 0.06411082297563553, + "learning_rate": 1.5957293477822075e-05, + "loss": 0.5430804491043091, + "step": 5164 + }, + { + "epoch": 0.9543717014470326, + "grad_norm": 0.0489242747426033, + "learning_rate": 1.595569107272997e-05, + "loss": 0.3583165407180786, + "step": 5165 + }, + { + "epoch": 0.9545564781559285, + "grad_norm": 0.08241157233715057, + "learning_rate": 1.5954088430620004e-05, + "loss": 0.6570907831192017, + "step": 5166 + }, + { + "epoch": 0.9547412548648243, + "grad_norm": 0.07930099219083786, + "learning_rate": 1.5952485551555938e-05, + "loss": 0.6064406037330627, + "step": 5167 + }, + { + "epoch": 0.9549260315737201, + "grad_norm": 0.07888708263635635, + "learning_rate": 1.595088243560157e-05, + "loss": 0.6349717378616333, + "step": 5168 + }, + { + "epoch": 0.955110808282616, + "grad_norm": 0.0772731751203537, + "learning_rate": 1.5949279082820702e-05, + "loss": 0.7229316234588623, + "step": 5169 + }, + { + "epoch": 0.9552955849915118, + "grad_norm": 0.08178115636110306, + "learning_rate": 1.594767549327714e-05, + "loss": 0.6186074614524841, + "step": 5170 + }, + { + "epoch": 0.9554803617004076, + "grad_norm": 0.07245965301990509, + "learning_rate": 1.5946071667034702e-05, + "loss": 0.5710280537605286, + "step": 5171 + }, + { + "epoch": 0.9556651384093036, + "grad_norm": 0.06430346518754959, + "learning_rate": 1.5944467604157213e-05, + "loss": 0.6199550032615662, + "step": 5172 + }, + { + "epoch": 0.9558499151181994, + "grad_norm": 0.07227133214473724, + "learning_rate": 1.594286330470851e-05, + "loss": 0.5163151025772095, + "step": 5173 + }, + { + "epoch": 0.9560346918270952, + "grad_norm": 0.05971883237361908, + "learning_rate": 1.594125876875244e-05, + "loss": 0.5595969557762146, + "step": 5174 + }, + { + "epoch": 0.956219468535991, + "grad_norm": 0.07065737247467041, + "learning_rate": 1.593965399635286e-05, + "loss": 0.48677173256874084, + "step": 5175 + }, + { + "epoch": 0.9564042452448869, + "grad_norm": 0.0731082633137703, + "learning_rate": 1.5938048987573633e-05, + "loss": 0.7291607856750488, + "step": 5176 + }, + { + "epoch": 0.9565890219537827, + "grad_norm": 0.08345812559127808, + "learning_rate": 1.5936443742478632e-05, + "loss": 0.5648295283317566, + "step": 5177 + }, + { + "epoch": 0.9567737986626785, + "grad_norm": 0.07417561113834381, + "learning_rate": 1.593483826113175e-05, + "loss": 0.7409591674804688, + "step": 5178 + }, + { + "epoch": 0.9569585753715744, + "grad_norm": 0.06974013149738312, + "learning_rate": 1.5933232543596868e-05, + "loss": 0.6175902485847473, + "step": 5179 + }, + { + "epoch": 0.9571433520804703, + "grad_norm": 0.07038947194814682, + "learning_rate": 1.5931626589937895e-05, + "loss": 0.5422641634941101, + "step": 5180 + }, + { + "epoch": 0.9573281287893661, + "grad_norm": 0.06995180249214172, + "learning_rate": 1.5930020400218737e-05, + "loss": 0.5708144903182983, + "step": 5181 + }, + { + "epoch": 0.957512905498262, + "grad_norm": 0.05506231263279915, + "learning_rate": 1.5928413974503326e-05, + "loss": 0.4746338725090027, + "step": 5182 + }, + { + "epoch": 0.9576976822071578, + "grad_norm": 0.05807039141654968, + "learning_rate": 1.5926807312855584e-05, + "loss": 0.3851551115512848, + "step": 5183 + }, + { + "epoch": 0.9578824589160536, + "grad_norm": 0.07046632468700409, + "learning_rate": 1.5925200415339453e-05, + "loss": 0.5573902130126953, + "step": 5184 + }, + { + "epoch": 0.9580672356249494, + "grad_norm": 0.07509570568799973, + "learning_rate": 1.592359328201888e-05, + "loss": 0.563309371471405, + "step": 5185 + }, + { + "epoch": 0.9582520123338453, + "grad_norm": 0.0638134554028511, + "learning_rate": 1.592198591295783e-05, + "loss": 0.5711766481399536, + "step": 5186 + }, + { + "epoch": 0.9584367890427412, + "grad_norm": 0.06136501207947731, + "learning_rate": 1.592037830822027e-05, + "loss": 0.4811912775039673, + "step": 5187 + }, + { + "epoch": 0.958621565751637, + "grad_norm": 0.0601169727742672, + "learning_rate": 1.5918770467870174e-05, + "loss": 0.40823274850845337, + "step": 5188 + }, + { + "epoch": 0.9588063424605329, + "grad_norm": 0.08154735714197159, + "learning_rate": 1.5917162391971534e-05, + "loss": 0.5557390451431274, + "step": 5189 + }, + { + "epoch": 0.9589911191694287, + "grad_norm": 0.07775022834539413, + "learning_rate": 1.591555408058834e-05, + "loss": 0.5641697645187378, + "step": 5190 + }, + { + "epoch": 0.9591758958783245, + "grad_norm": 0.08029249310493469, + "learning_rate": 1.5913945533784598e-05, + "loss": 0.6794726252555847, + "step": 5191 + }, + { + "epoch": 0.9593606725872204, + "grad_norm": 0.07469724863767624, + "learning_rate": 1.591233675162433e-05, + "loss": 0.5435570478439331, + "step": 5192 + }, + { + "epoch": 0.9595454492961162, + "grad_norm": 0.07713647931814194, + "learning_rate": 1.5910727734171554e-05, + "loss": 0.6686422228813171, + "step": 5193 + }, + { + "epoch": 0.9597302260050121, + "grad_norm": 0.07689940184354782, + "learning_rate": 1.5909118481490308e-05, + "loss": 0.5657721757888794, + "step": 5194 + }, + { + "epoch": 0.959915002713908, + "grad_norm": 0.06951393187046051, + "learning_rate": 1.5907508993644635e-05, + "loss": 0.4880034923553467, + "step": 5195 + }, + { + "epoch": 0.9600997794228038, + "grad_norm": 0.07189091295003891, + "learning_rate": 1.5905899270698588e-05, + "loss": 0.5114649534225464, + "step": 5196 + }, + { + "epoch": 0.9602845561316996, + "grad_norm": 0.05564050376415253, + "learning_rate": 1.5904289312716222e-05, + "loss": 0.5179625153541565, + "step": 5197 + }, + { + "epoch": 0.9604693328405954, + "grad_norm": 0.07559002190828323, + "learning_rate": 1.5902679119761618e-05, + "loss": 0.7051935195922852, + "step": 5198 + }, + { + "epoch": 0.9606541095494913, + "grad_norm": 0.0721646100282669, + "learning_rate": 1.5901068691898848e-05, + "loss": 0.5809262990951538, + "step": 5199 + }, + { + "epoch": 0.9608388862583871, + "grad_norm": 0.07101470977067947, + "learning_rate": 1.589945802919201e-05, + "loss": 0.49693232774734497, + "step": 5200 + }, + { + "epoch": 0.9610236629672829, + "grad_norm": 0.09033049643039703, + "learning_rate": 1.5897847131705194e-05, + "loss": 0.6932334899902344, + "step": 5201 + }, + { + "epoch": 0.9612084396761789, + "grad_norm": 0.06097559258341789, + "learning_rate": 1.589623599950252e-05, + "loss": 0.38020753860473633, + "step": 5202 + }, + { + "epoch": 0.9613932163850747, + "grad_norm": 0.05993859842419624, + "learning_rate": 1.58946246326481e-05, + "loss": 0.405231773853302, + "step": 5203 + }, + { + "epoch": 0.9615779930939705, + "grad_norm": 0.07280710339546204, + "learning_rate": 1.589301303120606e-05, + "loss": 0.6655395030975342, + "step": 5204 + }, + { + "epoch": 0.9617627698028663, + "grad_norm": 0.0598413310945034, + "learning_rate": 1.5891401195240533e-05, + "loss": 0.563666045665741, + "step": 5205 + }, + { + "epoch": 0.9619475465117622, + "grad_norm": 0.08729203790426254, + "learning_rate": 1.5889789124815676e-05, + "loss": 0.5672181844711304, + "step": 5206 + }, + { + "epoch": 0.962132323220658, + "grad_norm": 0.05892879143357277, + "learning_rate": 1.588817681999564e-05, + "loss": 0.5308200716972351, + "step": 5207 + }, + { + "epoch": 0.9623170999295538, + "grad_norm": 0.07471846789121628, + "learning_rate": 1.5886564280844585e-05, + "loss": 0.7177116274833679, + "step": 5208 + }, + { + "epoch": 0.9625018766384498, + "grad_norm": 0.07476504147052765, + "learning_rate": 1.5884951507426692e-05, + "loss": 0.6094974875450134, + "step": 5209 + }, + { + "epoch": 0.9626866533473456, + "grad_norm": 0.06729447841644287, + "learning_rate": 1.5883338499806137e-05, + "loss": 0.5416808724403381, + "step": 5210 + }, + { + "epoch": 0.9628714300562414, + "grad_norm": 0.07437737286090851, + "learning_rate": 1.5881725258047116e-05, + "loss": 0.5953792333602905, + "step": 5211 + }, + { + "epoch": 0.9630562067651373, + "grad_norm": 0.06470108032226562, + "learning_rate": 1.5880111782213833e-05, + "loss": 0.5616847276687622, + "step": 5212 + }, + { + "epoch": 0.9632409834740331, + "grad_norm": 0.08143790066242218, + "learning_rate": 1.5878498072370497e-05, + "loss": 0.5888870358467102, + "step": 5213 + }, + { + "epoch": 0.9634257601829289, + "grad_norm": 0.07331033051013947, + "learning_rate": 1.587688412858133e-05, + "loss": 0.7387831211090088, + "step": 5214 + }, + { + "epoch": 0.9636105368918247, + "grad_norm": 0.06353524327278137, + "learning_rate": 1.587526995091056e-05, + "loss": 0.675960123538971, + "step": 5215 + }, + { + "epoch": 0.9637953136007207, + "grad_norm": 0.07013055682182312, + "learning_rate": 1.5873655539422426e-05, + "loss": 0.5157957673072815, + "step": 5216 + }, + { + "epoch": 0.9639800903096165, + "grad_norm": 0.0558655709028244, + "learning_rate": 1.5872040894181182e-05, + "loss": 0.45955491065979004, + "step": 5217 + }, + { + "epoch": 0.9641648670185123, + "grad_norm": 0.07099532335996628, + "learning_rate": 1.5870426015251076e-05, + "loss": 0.5657529234886169, + "step": 5218 + }, + { + "epoch": 0.9643496437274082, + "grad_norm": 0.061591215431690216, + "learning_rate": 1.5868810902696385e-05, + "loss": 0.4769912362098694, + "step": 5219 + }, + { + "epoch": 0.964534420436304, + "grad_norm": 0.08955328166484833, + "learning_rate": 1.586719555658138e-05, + "loss": 0.7824416160583496, + "step": 5220 + }, + { + "epoch": 0.9647191971451998, + "grad_norm": 0.0744243934750557, + "learning_rate": 1.5865579976970348e-05, + "loss": 0.49541524052619934, + "step": 5221 + }, + { + "epoch": 0.9649039738540957, + "grad_norm": 0.06643404066562653, + "learning_rate": 1.5863964163927585e-05, + "loss": 0.4621345102787018, + "step": 5222 + }, + { + "epoch": 0.9650887505629915, + "grad_norm": 0.05877881869673729, + "learning_rate": 1.5862348117517395e-05, + "loss": 0.48775753378868103, + "step": 5223 + }, + { + "epoch": 0.9652735272718874, + "grad_norm": 0.07754340022802353, + "learning_rate": 1.586073183780409e-05, + "loss": 0.5634762644767761, + "step": 5224 + }, + { + "epoch": 0.9654583039807833, + "grad_norm": 0.058851584792137146, + "learning_rate": 1.5859115324851992e-05, + "loss": 0.5044225454330444, + "step": 5225 + }, + { + "epoch": 0.9656430806896791, + "grad_norm": 0.06915190070867538, + "learning_rate": 1.5857498578725435e-05, + "loss": 0.5596942901611328, + "step": 5226 + }, + { + "epoch": 0.9658278573985749, + "grad_norm": 0.06534717231988907, + "learning_rate": 1.5855881599488764e-05, + "loss": 0.6266942620277405, + "step": 5227 + }, + { + "epoch": 0.9660126341074707, + "grad_norm": 0.1051904484629631, + "learning_rate": 1.5854264387206322e-05, + "loss": 0.6451123356819153, + "step": 5228 + }, + { + "epoch": 0.9661974108163666, + "grad_norm": 0.07486904412508011, + "learning_rate": 1.5852646941942474e-05, + "loss": 0.5404165387153625, + "step": 5229 + }, + { + "epoch": 0.9663821875252624, + "grad_norm": 0.06894651800394058, + "learning_rate": 1.5851029263761588e-05, + "loss": 0.6602181196212769, + "step": 5230 + }, + { + "epoch": 0.9665669642341583, + "grad_norm": 0.07317795604467392, + "learning_rate": 1.5849411352728044e-05, + "loss": 0.5347000360488892, + "step": 5231 + }, + { + "epoch": 0.9667517409430542, + "grad_norm": 0.07264052331447601, + "learning_rate": 1.5847793208906228e-05, + "loss": 0.6339982748031616, + "step": 5232 + }, + { + "epoch": 0.96693651765195, + "grad_norm": 0.06145188957452774, + "learning_rate": 1.5846174832360537e-05, + "loss": 0.4953806698322296, + "step": 5233 + }, + { + "epoch": 0.9671212943608458, + "grad_norm": 0.08297760039567947, + "learning_rate": 1.5844556223155377e-05, + "loss": 0.5764387249946594, + "step": 5234 + }, + { + "epoch": 0.9673060710697416, + "grad_norm": 0.06188672408461571, + "learning_rate": 1.5842937381355166e-05, + "loss": 0.5386495590209961, + "step": 5235 + }, + { + "epoch": 0.9674908477786375, + "grad_norm": 0.06407991051673889, + "learning_rate": 1.584131830702433e-05, + "loss": 0.6187621355056763, + "step": 5236 + }, + { + "epoch": 0.9676756244875333, + "grad_norm": 0.07902231812477112, + "learning_rate": 1.58396990002273e-05, + "loss": 0.5892176032066345, + "step": 5237 + }, + { + "epoch": 0.9678604011964292, + "grad_norm": 0.07633795589208603, + "learning_rate": 1.5838079461028516e-05, + "loss": 0.5621336102485657, + "step": 5238 + }, + { + "epoch": 0.9680451779053251, + "grad_norm": 0.06466647982597351, + "learning_rate": 1.5836459689492437e-05, + "loss": 0.5642315745353699, + "step": 5239 + }, + { + "epoch": 0.9682299546142209, + "grad_norm": 0.06404679268598557, + "learning_rate": 1.583483968568352e-05, + "loss": 0.5862606167793274, + "step": 5240 + }, + { + "epoch": 0.9684147313231167, + "grad_norm": 0.08490888774394989, + "learning_rate": 1.5833219449666242e-05, + "loss": 0.6298123002052307, + "step": 5241 + }, + { + "epoch": 0.9685995080320126, + "grad_norm": 0.06971051543951035, + "learning_rate": 1.5831598981505076e-05, + "loss": 0.7906245589256287, + "step": 5242 + }, + { + "epoch": 0.9687842847409084, + "grad_norm": 0.05880312621593475, + "learning_rate": 1.5829978281264516e-05, + "loss": 0.5124143362045288, + "step": 5243 + }, + { + "epoch": 0.9689690614498042, + "grad_norm": 0.07851839065551758, + "learning_rate": 1.5828357349009065e-05, + "loss": 0.7467482686042786, + "step": 5244 + }, + { + "epoch": 0.9691538381587, + "grad_norm": 0.09485996514558792, + "learning_rate": 1.5826736184803223e-05, + "loss": 0.8176977038383484, + "step": 5245 + }, + { + "epoch": 0.969338614867596, + "grad_norm": 0.08205217123031616, + "learning_rate": 1.5825114788711507e-05, + "loss": 0.6752728223800659, + "step": 5246 + }, + { + "epoch": 0.9695233915764918, + "grad_norm": 0.07848254591226578, + "learning_rate": 1.582349316079845e-05, + "loss": 0.6415618658065796, + "step": 5247 + }, + { + "epoch": 0.9697081682853876, + "grad_norm": 0.0754413902759552, + "learning_rate": 1.5821871301128587e-05, + "loss": 0.7687599658966064, + "step": 5248 + }, + { + "epoch": 0.9698929449942835, + "grad_norm": 0.060738738626241684, + "learning_rate": 1.5820249209766455e-05, + "loss": 0.4032150208950043, + "step": 5249 + }, + { + "epoch": 0.9700777217031793, + "grad_norm": 0.06582488119602203, + "learning_rate": 1.5818626886776617e-05, + "loss": 0.5358696579933167, + "step": 5250 + }, + { + "epoch": 0.9702624984120751, + "grad_norm": 0.07538236677646637, + "learning_rate": 1.5817004332223634e-05, + "loss": 0.6872379183769226, + "step": 5251 + }, + { + "epoch": 0.970447275120971, + "grad_norm": 0.0801084116101265, + "learning_rate": 1.5815381546172075e-05, + "loss": 0.6306943893432617, + "step": 5252 + }, + { + "epoch": 0.9706320518298669, + "grad_norm": 0.06021309643983841, + "learning_rate": 1.5813758528686523e-05, + "loss": 0.4739924967288971, + "step": 5253 + }, + { + "epoch": 0.9708168285387627, + "grad_norm": 0.05175168812274933, + "learning_rate": 1.5812135279831574e-05, + "loss": 0.43031033873558044, + "step": 5254 + }, + { + "epoch": 0.9710016052476586, + "grad_norm": 0.07822158187627792, + "learning_rate": 1.5810511799671826e-05, + "loss": 0.46244823932647705, + "step": 5255 + }, + { + "epoch": 0.9711863819565544, + "grad_norm": 0.08453062176704407, + "learning_rate": 1.5808888088271883e-05, + "loss": 0.6953146457672119, + "step": 5256 + }, + { + "epoch": 0.9713711586654502, + "grad_norm": 0.07755032181739807, + "learning_rate": 1.580726414569637e-05, + "loss": 0.7229933738708496, + "step": 5257 + }, + { + "epoch": 0.971555935374346, + "grad_norm": 0.07005933672189713, + "learning_rate": 1.5805639972009914e-05, + "loss": 0.5575450658798218, + "step": 5258 + }, + { + "epoch": 0.9717407120832419, + "grad_norm": 0.06289315223693848, + "learning_rate": 1.580401556727715e-05, + "loss": 0.4901737570762634, + "step": 5259 + }, + { + "epoch": 0.9719254887921378, + "grad_norm": 0.07762367278337479, + "learning_rate": 1.580239093156272e-05, + "loss": 0.7002114653587341, + "step": 5260 + }, + { + "epoch": 0.9721102655010336, + "grad_norm": 0.08419755101203918, + "learning_rate": 1.580076606493129e-05, + "loss": 0.6662076115608215, + "step": 5261 + }, + { + "epoch": 0.9722950422099295, + "grad_norm": 0.0712367370724678, + "learning_rate": 1.5799140967447516e-05, + "loss": 0.5575383305549622, + "step": 5262 + }, + { + "epoch": 0.9724798189188253, + "grad_norm": 0.05275033041834831, + "learning_rate": 1.5797515639176077e-05, + "loss": 0.4794505536556244, + "step": 5263 + }, + { + "epoch": 0.9726645956277211, + "grad_norm": 0.08217772841453552, + "learning_rate": 1.579589008018165e-05, + "loss": 0.8188614845275879, + "step": 5264 + }, + { + "epoch": 0.972849372336617, + "grad_norm": 0.07426926493644714, + "learning_rate": 1.5794264290528937e-05, + "loss": 0.6695041060447693, + "step": 5265 + }, + { + "epoch": 0.9730341490455128, + "grad_norm": 0.07670366019010544, + "learning_rate": 1.5792638270282626e-05, + "loss": 0.6676644682884216, + "step": 5266 + }, + { + "epoch": 0.9732189257544086, + "grad_norm": 0.07113322615623474, + "learning_rate": 1.5791012019507438e-05, + "loss": 0.590463399887085, + "step": 5267 + }, + { + "epoch": 0.9734037024633045, + "grad_norm": 0.0710613802075386, + "learning_rate": 1.578938553826809e-05, + "loss": 0.6342708468437195, + "step": 5268 + }, + { + "epoch": 0.9735884791722004, + "grad_norm": 0.06318343430757523, + "learning_rate": 1.578775882662931e-05, + "loss": 0.5334287285804749, + "step": 5269 + }, + { + "epoch": 0.9737732558810962, + "grad_norm": 0.06320284307003021, + "learning_rate": 1.5786131884655836e-05, + "loss": 0.6215629577636719, + "step": 5270 + }, + { + "epoch": 0.973958032589992, + "grad_norm": 0.08957011252641678, + "learning_rate": 1.5784504712412414e-05, + "loss": 0.670143723487854, + "step": 5271 + }, + { + "epoch": 0.9741428092988879, + "grad_norm": 0.05370910465717316, + "learning_rate": 1.57828773099638e-05, + "loss": 0.5103188753128052, + "step": 5272 + }, + { + "epoch": 0.9743275860077837, + "grad_norm": 0.05635027587413788, + "learning_rate": 1.5781249677374767e-05, + "loss": 0.43409958481788635, + "step": 5273 + }, + { + "epoch": 0.9745123627166795, + "grad_norm": 0.07601001858711243, + "learning_rate": 1.577962181471008e-05, + "loss": 0.7073262333869934, + "step": 5274 + }, + { + "epoch": 0.9746971394255755, + "grad_norm": 0.06489431858062744, + "learning_rate": 1.577799372203453e-05, + "loss": 0.3917011320590973, + "step": 5275 + }, + { + "epoch": 0.9748819161344713, + "grad_norm": 0.06366218626499176, + "learning_rate": 1.5776365399412905e-05, + "loss": 0.5069113969802856, + "step": 5276 + }, + { + "epoch": 0.9750666928433671, + "grad_norm": 0.07721502333879471, + "learning_rate": 1.5774736846910007e-05, + "loss": 0.6327058672904968, + "step": 5277 + }, + { + "epoch": 0.9752514695522629, + "grad_norm": 0.05550169572234154, + "learning_rate": 1.5773108064590655e-05, + "loss": 0.4321601688861847, + "step": 5278 + }, + { + "epoch": 0.9754362462611588, + "grad_norm": 0.0736430287361145, + "learning_rate": 1.577147905251966e-05, + "loss": 0.5526038408279419, + "step": 5279 + }, + { + "epoch": 0.9756210229700546, + "grad_norm": 0.08940877765417099, + "learning_rate": 1.5769849810761855e-05, + "loss": 0.6926903128623962, + "step": 5280 + }, + { + "epoch": 0.9758057996789504, + "grad_norm": 0.07702791690826416, + "learning_rate": 1.5768220339382077e-05, + "loss": 0.5691621899604797, + "step": 5281 + }, + { + "epoch": 0.9759905763878464, + "grad_norm": 0.055418889969587326, + "learning_rate": 1.576659063844518e-05, + "loss": 0.48040205240249634, + "step": 5282 + }, + { + "epoch": 0.9761753530967422, + "grad_norm": 0.05237689986824989, + "learning_rate": 1.5764960708016016e-05, + "loss": 0.4535737633705139, + "step": 5283 + }, + { + "epoch": 0.976360129805638, + "grad_norm": 0.05730956047773361, + "learning_rate": 1.5763330548159453e-05, + "loss": 0.4882669150829315, + "step": 5284 + }, + { + "epoch": 0.9765449065145339, + "grad_norm": 0.0691274106502533, + "learning_rate": 1.5761700158940364e-05, + "loss": 0.6178051829338074, + "step": 5285 + }, + { + "epoch": 0.9767296832234297, + "grad_norm": 0.08765674382448196, + "learning_rate": 1.5760069540423636e-05, + "loss": 0.8575605154037476, + "step": 5286 + }, + { + "epoch": 0.9769144599323255, + "grad_norm": 0.06924710422754288, + "learning_rate": 1.5758438692674158e-05, + "loss": 0.6817244291305542, + "step": 5287 + }, + { + "epoch": 0.9770992366412213, + "grad_norm": 0.0700920894742012, + "learning_rate": 1.5756807615756836e-05, + "loss": 0.7932645678520203, + "step": 5288 + }, + { + "epoch": 0.9772840133501172, + "grad_norm": 0.05774247646331787, + "learning_rate": 1.5755176309736586e-05, + "loss": 0.49710604548454285, + "step": 5289 + }, + { + "epoch": 0.9774687900590131, + "grad_norm": 0.056986741721630096, + "learning_rate": 1.575354477467832e-05, + "loss": 0.45592036843299866, + "step": 5290 + }, + { + "epoch": 0.9776535667679089, + "grad_norm": 0.059322137385606766, + "learning_rate": 1.5751913010646977e-05, + "loss": 0.47350993752479553, + "step": 5291 + }, + { + "epoch": 0.9778383434768048, + "grad_norm": 0.07256398350000381, + "learning_rate": 1.575028101770749e-05, + "loss": 0.5942186713218689, + "step": 5292 + }, + { + "epoch": 0.9780231201857006, + "grad_norm": 0.05885408818721771, + "learning_rate": 1.5748648795924807e-05, + "loss": 0.40352359414100647, + "step": 5293 + }, + { + "epoch": 0.9782078968945964, + "grad_norm": 0.06275617331266403, + "learning_rate": 1.5747016345363885e-05, + "loss": 0.38128185272216797, + "step": 5294 + }, + { + "epoch": 0.9783926736034922, + "grad_norm": 0.06964803487062454, + "learning_rate": 1.5745383666089698e-05, + "loss": 0.5841737389564514, + "step": 5295 + }, + { + "epoch": 0.9785774503123881, + "grad_norm": 0.06731443107128143, + "learning_rate": 1.5743750758167214e-05, + "loss": 0.6095026731491089, + "step": 5296 + }, + { + "epoch": 0.978762227021284, + "grad_norm": 0.0793696939945221, + "learning_rate": 1.574211762166142e-05, + "loss": 0.6659227609634399, + "step": 5297 + }, + { + "epoch": 0.9789470037301798, + "grad_norm": 0.07954433560371399, + "learning_rate": 1.574048425663731e-05, + "loss": 0.5637004971504211, + "step": 5298 + }, + { + "epoch": 0.9791317804390757, + "grad_norm": 0.07504207640886307, + "learning_rate": 1.5738850663159885e-05, + "loss": 0.7107203006744385, + "step": 5299 + }, + { + "epoch": 0.9793165571479715, + "grad_norm": 0.06294222921133041, + "learning_rate": 1.5737216841294156e-05, + "loss": 0.5561524033546448, + "step": 5300 + }, + { + "epoch": 0.9795013338568673, + "grad_norm": 0.06990914791822433, + "learning_rate": 1.5735582791105147e-05, + "loss": 0.5332736968994141, + "step": 5301 + }, + { + "epoch": 0.9796861105657632, + "grad_norm": 0.06765109300613403, + "learning_rate": 1.5733948512657892e-05, + "loss": 0.7284939885139465, + "step": 5302 + }, + { + "epoch": 0.979870887274659, + "grad_norm": 0.08294203877449036, + "learning_rate": 1.573231400601742e-05, + "loss": 0.7408113479614258, + "step": 5303 + }, + { + "epoch": 0.9800556639835549, + "grad_norm": 0.07409501075744629, + "learning_rate": 1.5730679271248787e-05, + "loss": 0.5788695812225342, + "step": 5304 + }, + { + "epoch": 0.9802404406924508, + "grad_norm": 0.06979045271873474, + "learning_rate": 1.572904430841705e-05, + "loss": 0.4978390038013458, + "step": 5305 + }, + { + "epoch": 0.9804252174013466, + "grad_norm": 0.09059244394302368, + "learning_rate": 1.5727409117587268e-05, + "loss": 0.7129696011543274, + "step": 5306 + }, + { + "epoch": 0.9806099941102424, + "grad_norm": 0.07869086414575577, + "learning_rate": 1.5725773698824527e-05, + "loss": 0.6792417764663696, + "step": 5307 + }, + { + "epoch": 0.9807947708191382, + "grad_norm": 0.06648946553468704, + "learning_rate": 1.5724138052193904e-05, + "loss": 0.5888637900352478, + "step": 5308 + }, + { + "epoch": 0.9809795475280341, + "grad_norm": 0.071448914706707, + "learning_rate": 1.5722502177760495e-05, + "loss": 0.6279944181442261, + "step": 5309 + }, + { + "epoch": 0.9811643242369299, + "grad_norm": 0.09720901399850845, + "learning_rate": 1.5720866075589404e-05, + "loss": 0.9879162907600403, + "step": 5310 + }, + { + "epoch": 0.9813491009458257, + "grad_norm": 0.06968852877616882, + "learning_rate": 1.571922974574574e-05, + "loss": 0.5217567086219788, + "step": 5311 + }, + { + "epoch": 0.9815338776547217, + "grad_norm": 0.0745844841003418, + "learning_rate": 1.5717593188294626e-05, + "loss": 0.5189414024353027, + "step": 5312 + }, + { + "epoch": 0.9817186543636175, + "grad_norm": 0.0729883685708046, + "learning_rate": 1.571595640330119e-05, + "loss": 0.5511730909347534, + "step": 5313 + }, + { + "epoch": 0.9819034310725133, + "grad_norm": 0.06353732198476791, + "learning_rate": 1.5714319390830575e-05, + "loss": 0.5453438758850098, + "step": 5314 + }, + { + "epoch": 0.9820882077814092, + "grad_norm": 0.06904557347297668, + "learning_rate": 1.5712682150947926e-05, + "loss": 0.5570688247680664, + "step": 5315 + }, + { + "epoch": 0.982272984490305, + "grad_norm": 0.07444656640291214, + "learning_rate": 1.5711044683718398e-05, + "loss": 0.7203077673912048, + "step": 5316 + }, + { + "epoch": 0.9824577611992008, + "grad_norm": 0.07032838463783264, + "learning_rate": 1.570940698920716e-05, + "loss": 0.5388743877410889, + "step": 5317 + }, + { + "epoch": 0.9826425379080966, + "grad_norm": 0.06798477470874786, + "learning_rate": 1.5707769067479382e-05, + "loss": 0.5358221530914307, + "step": 5318 + }, + { + "epoch": 0.9828273146169926, + "grad_norm": 0.0741899311542511, + "learning_rate": 1.570613091860026e-05, + "loss": 0.5113516449928284, + "step": 5319 + }, + { + "epoch": 0.9830120913258884, + "grad_norm": 0.07862939685583115, + "learning_rate": 1.5704492542634974e-05, + "loss": 0.7012460827827454, + "step": 5320 + }, + { + "epoch": 0.9831968680347842, + "grad_norm": 0.07793296128511429, + "learning_rate": 1.5702853939648736e-05, + "loss": 0.6566935777664185, + "step": 5321 + }, + { + "epoch": 0.9833816447436801, + "grad_norm": 0.0800136998295784, + "learning_rate": 1.5701215109706748e-05, + "loss": 0.6474668979644775, + "step": 5322 + }, + { + "epoch": 0.9835664214525759, + "grad_norm": 0.0727175772190094, + "learning_rate": 1.569957605287424e-05, + "loss": 0.5037731528282166, + "step": 5323 + }, + { + "epoch": 0.9837511981614717, + "grad_norm": 0.07204470783472061, + "learning_rate": 1.5697936769216436e-05, + "loss": 0.4809809625148773, + "step": 5324 + }, + { + "epoch": 0.9839359748703675, + "grad_norm": 0.09261243045330048, + "learning_rate": 1.5696297258798573e-05, + "loss": 0.6194671988487244, + "step": 5325 + }, + { + "epoch": 0.9841207515792635, + "grad_norm": 0.07927238941192627, + "learning_rate": 1.5694657521685905e-05, + "loss": 0.4942941963672638, + "step": 5326 + }, + { + "epoch": 0.9843055282881593, + "grad_norm": 0.06556130945682526, + "learning_rate": 1.569301755794368e-05, + "loss": 0.45880836248397827, + "step": 5327 + }, + { + "epoch": 0.9844903049970551, + "grad_norm": 0.07203684747219086, + "learning_rate": 1.5691377367637165e-05, + "loss": 0.5204325914382935, + "step": 5328 + }, + { + "epoch": 0.984675081705951, + "grad_norm": 0.07471609115600586, + "learning_rate": 1.5689736950831643e-05, + "loss": 0.5105763673782349, + "step": 5329 + }, + { + "epoch": 0.9848598584148468, + "grad_norm": 0.09363534301519394, + "learning_rate": 1.5688096307592387e-05, + "loss": 0.7678961753845215, + "step": 5330 + }, + { + "epoch": 0.9850446351237426, + "grad_norm": 0.05547983944416046, + "learning_rate": 1.5686455437984694e-05, + "loss": 0.42751187086105347, + "step": 5331 + }, + { + "epoch": 0.9852294118326385, + "grad_norm": 0.078705795109272, + "learning_rate": 1.5684814342073864e-05, + "loss": 0.4591256380081177, + "step": 5332 + }, + { + "epoch": 0.9854141885415343, + "grad_norm": 0.058858368545770645, + "learning_rate": 1.5683173019925212e-05, + "loss": 0.430483877658844, + "step": 5333 + }, + { + "epoch": 0.9855989652504302, + "grad_norm": 0.06559912860393524, + "learning_rate": 1.5681531471604056e-05, + "loss": 0.477867066860199, + "step": 5334 + }, + { + "epoch": 0.985783741959326, + "grad_norm": 0.07676411420106888, + "learning_rate": 1.5679889697175718e-05, + "loss": 0.5628607869148254, + "step": 5335 + }, + { + "epoch": 0.9859685186682219, + "grad_norm": 0.06487436592578888, + "learning_rate": 1.5678247696705538e-05, + "loss": 0.5461073517799377, + "step": 5336 + }, + { + "epoch": 0.9861532953771177, + "grad_norm": 0.07578388601541519, + "learning_rate": 1.567660547025887e-05, + "loss": 0.6844896078109741, + "step": 5337 + }, + { + "epoch": 0.9863380720860135, + "grad_norm": 0.07774979621171951, + "learning_rate": 1.5674963017901062e-05, + "loss": 0.6670076251029968, + "step": 5338 + }, + { + "epoch": 0.9865228487949094, + "grad_norm": 0.06051953136920929, + "learning_rate": 1.5673320339697484e-05, + "loss": 0.47586143016815186, + "step": 5339 + }, + { + "epoch": 0.9867076255038052, + "grad_norm": 0.0646534264087677, + "learning_rate": 1.5671677435713502e-05, + "loss": 0.4309314787387848, + "step": 5340 + }, + { + "epoch": 0.9868924022127011, + "grad_norm": 0.07801027595996857, + "learning_rate": 1.56700343060145e-05, + "loss": 0.5589619874954224, + "step": 5341 + }, + { + "epoch": 0.987077178921597, + "grad_norm": 0.08354455232620239, + "learning_rate": 1.566839095066588e-05, + "loss": 0.621575117111206, + "step": 5342 + }, + { + "epoch": 0.9872619556304928, + "grad_norm": 0.061820484697818756, + "learning_rate": 1.5666747369733028e-05, + "loss": 0.48609113693237305, + "step": 5343 + }, + { + "epoch": 0.9874467323393886, + "grad_norm": 0.07441884279251099, + "learning_rate": 1.5665103563281364e-05, + "loss": 0.6886221766471863, + "step": 5344 + }, + { + "epoch": 0.9876315090482845, + "grad_norm": 0.06706920266151428, + "learning_rate": 1.56634595313763e-05, + "loss": 0.4719564914703369, + "step": 5345 + }, + { + "epoch": 0.9878162857571803, + "grad_norm": 0.061861515045166016, + "learning_rate": 1.5661815274083264e-05, + "loss": 0.5134391188621521, + "step": 5346 + }, + { + "epoch": 0.9880010624660761, + "grad_norm": 0.08870477229356766, + "learning_rate": 1.5660170791467692e-05, + "loss": 0.7590252757072449, + "step": 5347 + }, + { + "epoch": 0.988185839174972, + "grad_norm": 0.08870309591293335, + "learning_rate": 1.5658526083595034e-05, + "loss": 0.8938247561454773, + "step": 5348 + }, + { + "epoch": 0.9883706158838679, + "grad_norm": 0.07829594612121582, + "learning_rate": 1.5656881150530742e-05, + "loss": 0.6156164407730103, + "step": 5349 + }, + { + "epoch": 0.9885553925927637, + "grad_norm": 0.06284963339567184, + "learning_rate": 1.5655235992340275e-05, + "loss": 0.5012479424476624, + "step": 5350 + }, + { + "epoch": 0.9887401693016595, + "grad_norm": 0.06273221224546432, + "learning_rate": 1.5653590609089112e-05, + "loss": 0.4839748442173004, + "step": 5351 + }, + { + "epoch": 0.9889249460105554, + "grad_norm": 0.09538552910089493, + "learning_rate": 1.5651945000842728e-05, + "loss": 0.8040647506713867, + "step": 5352 + }, + { + "epoch": 0.9891097227194512, + "grad_norm": 0.06506232172250748, + "learning_rate": 1.5650299167666617e-05, + "loss": 0.5908097624778748, + "step": 5353 + }, + { + "epoch": 0.989294499428347, + "grad_norm": 0.0688038244843483, + "learning_rate": 1.5648653109626277e-05, + "loss": 0.6032392978668213, + "step": 5354 + }, + { + "epoch": 0.9894792761372428, + "grad_norm": 0.08789508044719696, + "learning_rate": 1.564700682678721e-05, + "loss": 0.6738143563270569, + "step": 5355 + }, + { + "epoch": 0.9896640528461388, + "grad_norm": 0.07509738206863403, + "learning_rate": 1.5645360319214946e-05, + "loss": 0.5347864627838135, + "step": 5356 + }, + { + "epoch": 0.9898488295550346, + "grad_norm": 0.07636866718530655, + "learning_rate": 1.5643713586975e-05, + "loss": 0.6815749406814575, + "step": 5357 + }, + { + "epoch": 0.9900336062639304, + "grad_norm": 0.06707502901554108, + "learning_rate": 1.564206663013291e-05, + "loss": 0.7252213954925537, + "step": 5358 + }, + { + "epoch": 0.9902183829728263, + "grad_norm": 0.05714981257915497, + "learning_rate": 1.564041944875422e-05, + "loss": 0.4083198606967926, + "step": 5359 + }, + { + "epoch": 0.9904031596817221, + "grad_norm": 0.06341986358165741, + "learning_rate": 1.5638772042904486e-05, + "loss": 0.577389121055603, + "step": 5360 + }, + { + "epoch": 0.9905879363906179, + "grad_norm": 0.07032588869333267, + "learning_rate": 1.5637124412649263e-05, + "loss": 0.5840871930122375, + "step": 5361 + }, + { + "epoch": 0.9907727130995138, + "grad_norm": 0.09448905289173126, + "learning_rate": 1.5635476558054122e-05, + "loss": 0.7708903551101685, + "step": 5362 + }, + { + "epoch": 0.9909574898084097, + "grad_norm": 0.051332227885723114, + "learning_rate": 1.563382847918465e-05, + "loss": 0.42049360275268555, + "step": 5363 + }, + { + "epoch": 0.9911422665173055, + "grad_norm": 0.07908229529857635, + "learning_rate": 1.5632180176106428e-05, + "loss": 0.6879507303237915, + "step": 5364 + }, + { + "epoch": 0.9913270432262014, + "grad_norm": 0.08213875442743301, + "learning_rate": 1.5630531648885056e-05, + "loss": 0.7663908004760742, + "step": 5365 + }, + { + "epoch": 0.9915118199350972, + "grad_norm": 0.07541374117136002, + "learning_rate": 1.5628882897586143e-05, + "loss": 0.5930274724960327, + "step": 5366 + }, + { + "epoch": 0.991696596643993, + "grad_norm": 0.07516644895076752, + "learning_rate": 1.56272339222753e-05, + "loss": 0.7060964107513428, + "step": 5367 + }, + { + "epoch": 0.9918813733528888, + "grad_norm": 0.07756774127483368, + "learning_rate": 1.5625584723018147e-05, + "loss": 0.5873556733131409, + "step": 5368 + }, + { + "epoch": 0.9920661500617847, + "grad_norm": 0.08249982446432114, + "learning_rate": 1.5623935299880323e-05, + "loss": 0.7442416548728943, + "step": 5369 + }, + { + "epoch": 0.9922509267706806, + "grad_norm": 0.0567890889942646, + "learning_rate": 1.5622285652927477e-05, + "loss": 0.4587095379829407, + "step": 5370 + }, + { + "epoch": 0.9924357034795764, + "grad_norm": 0.08920449018478394, + "learning_rate": 1.5620635782225247e-05, + "loss": 0.7942427396774292, + "step": 5371 + }, + { + "epoch": 0.9926204801884723, + "grad_norm": 0.07286550104618073, + "learning_rate": 1.5618985687839298e-05, + "loss": 0.6068935394287109, + "step": 5372 + }, + { + "epoch": 0.9928052568973681, + "grad_norm": 0.08293316513299942, + "learning_rate": 1.5617335369835296e-05, + "loss": 0.740690290927887, + "step": 5373 + }, + { + "epoch": 0.9929900336062639, + "grad_norm": 0.06495033204555511, + "learning_rate": 1.5615684828278923e-05, + "loss": 0.5658296942710876, + "step": 5374 + }, + { + "epoch": 0.9931748103151598, + "grad_norm": 0.0681936964392662, + "learning_rate": 1.5614034063235864e-05, + "loss": 0.4906209707260132, + "step": 5375 + }, + { + "epoch": 0.9933595870240556, + "grad_norm": 0.07649872452020645, + "learning_rate": 1.561238307477181e-05, + "loss": 0.6106581687927246, + "step": 5376 + }, + { + "epoch": 0.9935443637329514, + "grad_norm": 0.06747819483280182, + "learning_rate": 1.5610731862952468e-05, + "loss": 0.5255357027053833, + "step": 5377 + }, + { + "epoch": 0.9937291404418473, + "grad_norm": 0.05354423075914383, + "learning_rate": 1.5609080427843556e-05, + "loss": 0.3053976595401764, + "step": 5378 + }, + { + "epoch": 0.9939139171507432, + "grad_norm": 0.0726679265499115, + "learning_rate": 1.560742876951079e-05, + "loss": 0.598291277885437, + "step": 5379 + }, + { + "epoch": 0.994098693859639, + "grad_norm": 0.06871083378791809, + "learning_rate": 1.5605776888019902e-05, + "loss": 0.5416322350502014, + "step": 5380 + }, + { + "epoch": 0.9942834705685348, + "grad_norm": 0.062419094145298004, + "learning_rate": 1.560412478343663e-05, + "loss": 0.4946483075618744, + "step": 5381 + }, + { + "epoch": 0.9944682472774307, + "grad_norm": 0.0625448226928711, + "learning_rate": 1.560247245582673e-05, + "loss": 0.4920055866241455, + "step": 5382 + }, + { + "epoch": 0.9946530239863265, + "grad_norm": 0.0630967766046524, + "learning_rate": 1.560081990525595e-05, + "loss": 0.5786566138267517, + "step": 5383 + }, + { + "epoch": 0.9948378006952223, + "grad_norm": 0.06180526688694954, + "learning_rate": 1.559916713179006e-05, + "loss": 0.43185970187187195, + "step": 5384 + }, + { + "epoch": 0.9950225774041183, + "grad_norm": 0.07326313853263855, + "learning_rate": 1.559751413549484e-05, + "loss": 0.6359105110168457, + "step": 5385 + }, + { + "epoch": 0.9952073541130141, + "grad_norm": 0.06609123200178146, + "learning_rate": 1.5595860916436064e-05, + "loss": 0.4288696348667145, + "step": 5386 + }, + { + "epoch": 0.9953921308219099, + "grad_norm": 0.07263244688510895, + "learning_rate": 1.5594207474679533e-05, + "loss": 0.5994312167167664, + "step": 5387 + }, + { + "epoch": 0.9955769075308057, + "grad_norm": 0.08110374957323074, + "learning_rate": 1.5592553810291045e-05, + "loss": 0.5752834677696228, + "step": 5388 + }, + { + "epoch": 0.9957616842397016, + "grad_norm": 0.07594095915555954, + "learning_rate": 1.5590899923336417e-05, + "loss": 0.6776776909828186, + "step": 5389 + }, + { + "epoch": 0.9959464609485974, + "grad_norm": 0.08966058492660522, + "learning_rate": 1.558924581388146e-05, + "loss": 0.6651446223258972, + "step": 5390 + }, + { + "epoch": 0.9961312376574932, + "grad_norm": 0.06374116241931915, + "learning_rate": 1.5587591481992008e-05, + "loss": 0.4789484143257141, + "step": 5391 + }, + { + "epoch": 0.9963160143663892, + "grad_norm": 0.0633915439248085, + "learning_rate": 1.5585936927733897e-05, + "loss": 0.5667264461517334, + "step": 5392 + }, + { + "epoch": 0.996500791075285, + "grad_norm": 0.05590339004993439, + "learning_rate": 1.558428215117297e-05, + "loss": 0.4168759882450104, + "step": 5393 + }, + { + "epoch": 0.9966855677841808, + "grad_norm": 0.08429696410894394, + "learning_rate": 1.5582627152375084e-05, + "loss": 0.6339855790138245, + "step": 5394 + }, + { + "epoch": 0.9968703444930767, + "grad_norm": 0.05786977708339691, + "learning_rate": 1.5580971931406105e-05, + "loss": 0.46310970187187195, + "step": 5395 + }, + { + "epoch": 0.9970551212019725, + "grad_norm": 0.0657820925116539, + "learning_rate": 1.55793164883319e-05, + "loss": 0.4870114028453827, + "step": 5396 + }, + { + "epoch": 0.9972398979108683, + "grad_norm": 0.0635991096496582, + "learning_rate": 1.557766082321836e-05, + "loss": 0.4639195501804352, + "step": 5397 + }, + { + "epoch": 0.9974246746197641, + "grad_norm": 0.0636812224984169, + "learning_rate": 1.5576004936131366e-05, + "loss": 0.5039251446723938, + "step": 5398 + }, + { + "epoch": 0.99760945132866, + "grad_norm": 0.07310467213392258, + "learning_rate": 1.5574348827136823e-05, + "loss": 0.5871421694755554, + "step": 5399 + }, + { + "epoch": 0.9977942280375559, + "grad_norm": 0.05908821150660515, + "learning_rate": 1.557269249630063e-05, + "loss": 0.5073022246360779, + "step": 5400 + }, + { + "epoch": 0.9979790047464517, + "grad_norm": 0.06619273126125336, + "learning_rate": 1.557103594368872e-05, + "loss": 0.5582687854766846, + "step": 5401 + }, + { + "epoch": 0.9981637814553476, + "grad_norm": 0.09524130076169968, + "learning_rate": 1.5569379169367005e-05, + "loss": 0.9023342132568359, + "step": 5402 + }, + { + "epoch": 0.9983485581642434, + "grad_norm": 0.06774596124887466, + "learning_rate": 1.556772217340142e-05, + "loss": 0.5595493912696838, + "step": 5403 + }, + { + "epoch": 0.9985333348731392, + "grad_norm": 0.07581521570682526, + "learning_rate": 1.5566064955857916e-05, + "loss": 0.6437134146690369, + "step": 5404 + }, + { + "epoch": 0.998718111582035, + "grad_norm": 0.08497077226638794, + "learning_rate": 1.5564407516802437e-05, + "loss": 0.72376549243927, + "step": 5405 + }, + { + "epoch": 0.9989028882909309, + "grad_norm": 0.07422885298728943, + "learning_rate": 1.556274985630095e-05, + "loss": 0.7248567342758179, + "step": 5406 + }, + { + "epoch": 0.9990876649998268, + "grad_norm": 0.08695416897535324, + "learning_rate": 1.5561091974419423e-05, + "loss": 0.7146385312080383, + "step": 5407 + }, + { + "epoch": 0.9992724417087226, + "grad_norm": 0.08638768643140793, + "learning_rate": 1.555943387122383e-05, + "loss": 0.8375434279441833, + "step": 5408 + }, + { + "epoch": 0.9994572184176185, + "grad_norm": 0.068270742893219, + "learning_rate": 1.555777554678017e-05, + "loss": 0.6461607813835144, + "step": 5409 + }, + { + "epoch": 0.9996419951265143, + "grad_norm": 0.07097753137350082, + "learning_rate": 1.5556117001154426e-05, + "loss": 0.5239364504814148, + "step": 5410 + }, + { + "epoch": 0.9998267718354101, + "grad_norm": 0.055535804480314255, + "learning_rate": 1.5554458234412607e-05, + "loss": 0.44697487354278564, + "step": 5411 + }, + { + "epoch": 1.0, + "grad_norm": 0.07980701327323914, + "learning_rate": 1.555279924662073e-05, + "loss": 0.6596300601959229, + "step": 5412 + }, + { + "epoch": 1.0001847767088958, + "grad_norm": 0.06499882787466049, + "learning_rate": 1.5551140037844816e-05, + "loss": 0.5656397938728333, + "step": 5413 + }, + { + "epoch": 1.0003695534177917, + "grad_norm": 0.0532810240983963, + "learning_rate": 1.5549480608150892e-05, + "loss": 0.4539932906627655, + "step": 5414 + }, + { + "epoch": 1.0005543301266875, + "grad_norm": 0.07145704329013824, + "learning_rate": 1.5547820957605006e-05, + "loss": 0.6365221738815308, + "step": 5415 + }, + { + "epoch": 1.0007391068355833, + "grad_norm": 0.06368197500705719, + "learning_rate": 1.5546161086273203e-05, + "loss": 0.5140100717544556, + "step": 5416 + }, + { + "epoch": 1.0009238835444791, + "grad_norm": 0.0678260400891304, + "learning_rate": 1.5544500994221537e-05, + "loss": 0.6245336532592773, + "step": 5417 + }, + { + "epoch": 1.001108660253375, + "grad_norm": 0.07416384667158127, + "learning_rate": 1.554284068151608e-05, + "loss": 0.5578438639640808, + "step": 5418 + }, + { + "epoch": 1.0012934369622708, + "grad_norm": 0.07240698486566544, + "learning_rate": 1.5541180148222907e-05, + "loss": 0.46354150772094727, + "step": 5419 + }, + { + "epoch": 1.0014782136711669, + "grad_norm": 0.07387363910675049, + "learning_rate": 1.5539519394408096e-05, + "loss": 0.513384997844696, + "step": 5420 + }, + { + "epoch": 1.0016629903800627, + "grad_norm": 0.060762811452150345, + "learning_rate": 1.5537858420137748e-05, + "loss": 0.4742019474506378, + "step": 5421 + }, + { + "epoch": 1.0018477670889585, + "grad_norm": 0.0828094333410263, + "learning_rate": 1.553619722547796e-05, + "loss": 0.6738830208778381, + "step": 5422 + }, + { + "epoch": 1.0020325437978543, + "grad_norm": 0.06672517955303192, + "learning_rate": 1.553453581049484e-05, + "loss": 0.37967976927757263, + "step": 5423 + }, + { + "epoch": 1.0022173205067502, + "grad_norm": 0.07504408061504364, + "learning_rate": 1.5532874175254512e-05, + "loss": 0.48869621753692627, + "step": 5424 + }, + { + "epoch": 1.002402097215646, + "grad_norm": 0.07129882276058197, + "learning_rate": 1.5531212319823104e-05, + "loss": 0.5685672163963318, + "step": 5425 + }, + { + "epoch": 1.0025868739245418, + "grad_norm": 0.06440310180187225, + "learning_rate": 1.5529550244266746e-05, + "loss": 0.46098026633262634, + "step": 5426 + }, + { + "epoch": 1.0027716506334377, + "grad_norm": 0.07999856770038605, + "learning_rate": 1.5527887948651594e-05, + "loss": 0.6286073923110962, + "step": 5427 + }, + { + "epoch": 1.0029564273423335, + "grad_norm": 0.06721231341362, + "learning_rate": 1.5526225433043787e-05, + "loss": 0.45072394609451294, + "step": 5428 + }, + { + "epoch": 1.0031412040512293, + "grad_norm": 0.09263379871845245, + "learning_rate": 1.5524562697509507e-05, + "loss": 0.5145958065986633, + "step": 5429 + }, + { + "epoch": 1.0033259807601251, + "grad_norm": 0.08839530497789383, + "learning_rate": 1.552289974211491e-05, + "loss": 0.6489437818527222, + "step": 5430 + }, + { + "epoch": 1.003510757469021, + "grad_norm": 0.08482103794813156, + "learning_rate": 1.5521236566926187e-05, + "loss": 0.5917055606842041, + "step": 5431 + }, + { + "epoch": 1.0036955341779168, + "grad_norm": 0.06561548262834549, + "learning_rate": 1.5519573172009517e-05, + "loss": 0.45737460255622864, + "step": 5432 + }, + { + "epoch": 1.0038803108868126, + "grad_norm": 0.07934774458408356, + "learning_rate": 1.5517909557431106e-05, + "loss": 0.50255286693573, + "step": 5433 + }, + { + "epoch": 1.0040650875957087, + "grad_norm": 0.08711782097816467, + "learning_rate": 1.5516245723257156e-05, + "loss": 0.7246228456497192, + "step": 5434 + }, + { + "epoch": 1.0042498643046045, + "grad_norm": 0.08274222910404205, + "learning_rate": 1.5514581669553887e-05, + "loss": 0.6145168542861938, + "step": 5435 + }, + { + "epoch": 1.0044346410135003, + "grad_norm": 0.0734354630112648, + "learning_rate": 1.5512917396387518e-05, + "loss": 0.6290450692176819, + "step": 5436 + }, + { + "epoch": 1.0046194177223962, + "grad_norm": 0.0731857642531395, + "learning_rate": 1.5511252903824284e-05, + "loss": 0.5591952204704285, + "step": 5437 + }, + { + "epoch": 1.004804194431292, + "grad_norm": 0.0669926181435585, + "learning_rate": 1.550958819193043e-05, + "loss": 0.4976261556148529, + "step": 5438 + }, + { + "epoch": 1.0049889711401878, + "grad_norm": 0.06242423877120018, + "learning_rate": 1.5507923260772206e-05, + "loss": 0.4081506133079529, + "step": 5439 + }, + { + "epoch": 1.0051737478490836, + "grad_norm": 0.06533505767583847, + "learning_rate": 1.550625811041586e-05, + "loss": 0.5145694613456726, + "step": 5440 + }, + { + "epoch": 1.0053585245579795, + "grad_norm": 0.09347716718912125, + "learning_rate": 1.5504592740927673e-05, + "loss": 0.7778919339179993, + "step": 5441 + }, + { + "epoch": 1.0055433012668753, + "grad_norm": 0.060112446546554565, + "learning_rate": 1.5502927152373913e-05, + "loss": 0.4014849364757538, + "step": 5442 + }, + { + "epoch": 1.0057280779757711, + "grad_norm": 0.06632987409830093, + "learning_rate": 1.5501261344820875e-05, + "loss": 0.5001177191734314, + "step": 5443 + }, + { + "epoch": 1.005912854684667, + "grad_norm": 0.06259676814079285, + "learning_rate": 1.5499595318334842e-05, + "loss": 0.502464771270752, + "step": 5444 + }, + { + "epoch": 1.0060976313935628, + "grad_norm": 0.06666150689125061, + "learning_rate": 1.5497929072982123e-05, + "loss": 0.4545712471008301, + "step": 5445 + }, + { + "epoch": 1.0062824081024586, + "grad_norm": 0.07456757128238678, + "learning_rate": 1.5496262608829026e-05, + "loss": 0.6057341694831848, + "step": 5446 + }, + { + "epoch": 1.0064671848113544, + "grad_norm": 0.0757986307144165, + "learning_rate": 1.5494595925941875e-05, + "loss": 0.6850146055221558, + "step": 5447 + }, + { + "epoch": 1.0066519615202503, + "grad_norm": 0.07049146294593811, + "learning_rate": 1.5492929024386995e-05, + "loss": 0.5972450971603394, + "step": 5448 + }, + { + "epoch": 1.0068367382291463, + "grad_norm": 0.06518596410751343, + "learning_rate": 1.549126190423073e-05, + "loss": 0.5264145135879517, + "step": 5449 + }, + { + "epoch": 1.0070215149380422, + "grad_norm": 0.08410429954528809, + "learning_rate": 1.5489594565539415e-05, + "loss": 0.8311405181884766, + "step": 5450 + }, + { + "epoch": 1.007206291646938, + "grad_norm": 0.08313155174255371, + "learning_rate": 1.5487927008379407e-05, + "loss": 0.6686275601387024, + "step": 5451 + }, + { + "epoch": 1.0073910683558338, + "grad_norm": 0.07137402147054672, + "learning_rate": 1.548625923281708e-05, + "loss": 0.5926514267921448, + "step": 5452 + }, + { + "epoch": 1.0075758450647296, + "grad_norm": 0.06430884450674057, + "learning_rate": 1.5484591238918802e-05, + "loss": 0.3721638023853302, + "step": 5453 + }, + { + "epoch": 1.0077606217736255, + "grad_norm": 0.07286176085472107, + "learning_rate": 1.548292302675095e-05, + "loss": 0.6848993301391602, + "step": 5454 + }, + { + "epoch": 1.0079453984825213, + "grad_norm": 0.0833728238940239, + "learning_rate": 1.5481254596379914e-05, + "loss": 0.6791229248046875, + "step": 5455 + }, + { + "epoch": 1.0081301751914171, + "grad_norm": 0.07102257013320923, + "learning_rate": 1.5479585947872093e-05, + "loss": 0.41884228587150574, + "step": 5456 + }, + { + "epoch": 1.008314951900313, + "grad_norm": 0.07091695815324783, + "learning_rate": 1.5477917081293895e-05, + "loss": 0.5533478260040283, + "step": 5457 + }, + { + "epoch": 1.0084997286092088, + "grad_norm": 0.04651034250855446, + "learning_rate": 1.5476247996711737e-05, + "loss": 0.3375348448753357, + "step": 5458 + }, + { + "epoch": 1.0086845053181046, + "grad_norm": 0.0720076709985733, + "learning_rate": 1.547457869419204e-05, + "loss": 0.5965259671211243, + "step": 5459 + }, + { + "epoch": 1.0088692820270004, + "grad_norm": 0.05651504918932915, + "learning_rate": 1.547290917380124e-05, + "loss": 0.3244526982307434, + "step": 5460 + }, + { + "epoch": 1.0090540587358963, + "grad_norm": 0.06690341979265213, + "learning_rate": 1.5471239435605777e-05, + "loss": 0.455815851688385, + "step": 5461 + }, + { + "epoch": 1.009238835444792, + "grad_norm": 0.0782356858253479, + "learning_rate": 1.5469569479672102e-05, + "loss": 0.6343777775764465, + "step": 5462 + }, + { + "epoch": 1.009423612153688, + "grad_norm": 0.06311261653900146, + "learning_rate": 1.5467899306066674e-05, + "loss": 0.44077301025390625, + "step": 5463 + }, + { + "epoch": 1.009608388862584, + "grad_norm": 0.08928355574607849, + "learning_rate": 1.5466228914855957e-05, + "loss": 0.6142847537994385, + "step": 5464 + }, + { + "epoch": 1.0097931655714798, + "grad_norm": 0.07347680628299713, + "learning_rate": 1.5464558306106438e-05, + "loss": 0.5666141510009766, + "step": 5465 + }, + { + "epoch": 1.0099779422803756, + "grad_norm": 0.08045663684606552, + "learning_rate": 1.546288747988459e-05, + "loss": 0.6125535368919373, + "step": 5466 + }, + { + "epoch": 1.0101627189892715, + "grad_norm": 0.07437922060489655, + "learning_rate": 1.546121643625691e-05, + "loss": 0.6528797745704651, + "step": 5467 + }, + { + "epoch": 1.0103474956981673, + "grad_norm": 0.058625247329473495, + "learning_rate": 1.5459545175289904e-05, + "loss": 0.4213486909866333, + "step": 5468 + }, + { + "epoch": 1.0105322724070631, + "grad_norm": 0.07732836157083511, + "learning_rate": 1.5457873697050083e-05, + "loss": 0.6027098298072815, + "step": 5469 + }, + { + "epoch": 1.010717049115959, + "grad_norm": 0.07866568118333817, + "learning_rate": 1.545620200160396e-05, + "loss": 0.6570467948913574, + "step": 5470 + }, + { + "epoch": 1.0109018258248548, + "grad_norm": 0.07446414977312088, + "learning_rate": 1.5454530089018068e-05, + "loss": 0.5937471389770508, + "step": 5471 + }, + { + "epoch": 1.0110866025337506, + "grad_norm": 0.06321976333856583, + "learning_rate": 1.545285795935895e-05, + "loss": 0.43261998891830444, + "step": 5472 + }, + { + "epoch": 1.0112713792426464, + "grad_norm": 0.0733419880270958, + "learning_rate": 1.545118561269314e-05, + "loss": 0.5204454064369202, + "step": 5473 + }, + { + "epoch": 1.0114561559515423, + "grad_norm": 0.07306050509214401, + "learning_rate": 1.5449513049087196e-05, + "loss": 0.5122585296630859, + "step": 5474 + }, + { + "epoch": 1.011640932660438, + "grad_norm": 0.07231169193983078, + "learning_rate": 1.5447840268607684e-05, + "loss": 0.5543947219848633, + "step": 5475 + }, + { + "epoch": 1.011825709369334, + "grad_norm": 0.06687439233064651, + "learning_rate": 1.544616727132117e-05, + "loss": 0.5254085063934326, + "step": 5476 + }, + { + "epoch": 1.0120104860782297, + "grad_norm": 0.06679672747850418, + "learning_rate": 1.544449405729424e-05, + "loss": 0.5370311737060547, + "step": 5477 + }, + { + "epoch": 1.0121952627871256, + "grad_norm": 0.06086615473031998, + "learning_rate": 1.544282062659348e-05, + "loss": 0.5779776573181152, + "step": 5478 + }, + { + "epoch": 1.0123800394960216, + "grad_norm": 0.07743128389120102, + "learning_rate": 1.544114697928549e-05, + "loss": 0.601859450340271, + "step": 5479 + }, + { + "epoch": 1.0125648162049175, + "grad_norm": 0.0775611400604248, + "learning_rate": 1.5439473115436872e-05, + "loss": 0.5330616235733032, + "step": 5480 + }, + { + "epoch": 1.0127495929138133, + "grad_norm": 0.07540713995695114, + "learning_rate": 1.543779903511424e-05, + "loss": 0.5143282413482666, + "step": 5481 + }, + { + "epoch": 1.012934369622709, + "grad_norm": 0.06653149425983429, + "learning_rate": 1.543612473838422e-05, + "loss": 0.5389485955238342, + "step": 5482 + }, + { + "epoch": 1.013119146331605, + "grad_norm": 0.06639883667230606, + "learning_rate": 1.5434450225313443e-05, + "loss": 0.486784428358078, + "step": 5483 + }, + { + "epoch": 1.0133039230405008, + "grad_norm": 0.07527770102024078, + "learning_rate": 1.5432775495968552e-05, + "loss": 0.5490717887878418, + "step": 5484 + }, + { + "epoch": 1.0134886997493966, + "grad_norm": 0.09229796379804611, + "learning_rate": 1.5431100550416187e-05, + "loss": 0.7034298777580261, + "step": 5485 + }, + { + "epoch": 1.0136734764582924, + "grad_norm": 0.0757904052734375, + "learning_rate": 1.5429425388723017e-05, + "loss": 0.5530351996421814, + "step": 5486 + }, + { + "epoch": 1.0138582531671883, + "grad_norm": 0.08032525330781937, + "learning_rate": 1.5427750010955702e-05, + "loss": 0.5245693325996399, + "step": 5487 + }, + { + "epoch": 1.014043029876084, + "grad_norm": 0.07433024793863297, + "learning_rate": 1.5426074417180918e-05, + "loss": 0.5217931270599365, + "step": 5488 + }, + { + "epoch": 1.01422780658498, + "grad_norm": 0.07630334794521332, + "learning_rate": 1.5424398607465344e-05, + "loss": 0.46245628595352173, + "step": 5489 + }, + { + "epoch": 1.0144125832938757, + "grad_norm": 0.06917870044708252, + "learning_rate": 1.542272258187568e-05, + "loss": 0.5663442015647888, + "step": 5490 + }, + { + "epoch": 1.0145973600027716, + "grad_norm": 0.06446558982133865, + "learning_rate": 1.5421046340478625e-05, + "loss": 0.49829304218292236, + "step": 5491 + }, + { + "epoch": 1.0147821367116674, + "grad_norm": 0.06780035048723221, + "learning_rate": 1.541936988334088e-05, + "loss": 0.5756086707115173, + "step": 5492 + }, + { + "epoch": 1.0149669134205634, + "grad_norm": 0.09589719772338867, + "learning_rate": 1.5417693210529172e-05, + "loss": 0.7299249768257141, + "step": 5493 + }, + { + "epoch": 1.0151516901294593, + "grad_norm": 0.07468991726636887, + "learning_rate": 1.5416016322110224e-05, + "loss": 0.37915417551994324, + "step": 5494 + }, + { + "epoch": 1.015336466838355, + "grad_norm": 0.08760546892881393, + "learning_rate": 1.5414339218150773e-05, + "loss": 0.661727786064148, + "step": 5495 + }, + { + "epoch": 1.015521243547251, + "grad_norm": 0.07419498264789581, + "learning_rate": 1.541266189871756e-05, + "loss": 0.5948960185050964, + "step": 5496 + }, + { + "epoch": 1.0157060202561468, + "grad_norm": 0.0739092081785202, + "learning_rate": 1.5410984363877336e-05, + "loss": 0.5626744031906128, + "step": 5497 + }, + { + "epoch": 1.0158907969650426, + "grad_norm": 0.07224955409765244, + "learning_rate": 1.5409306613696863e-05, + "loss": 0.576987087726593, + "step": 5498 + }, + { + "epoch": 1.0160755736739384, + "grad_norm": 0.0674276202917099, + "learning_rate": 1.5407628648242908e-05, + "loss": 0.5401109457015991, + "step": 5499 + }, + { + "epoch": 1.0162603503828342, + "grad_norm": 0.06937623769044876, + "learning_rate": 1.5405950467582253e-05, + "loss": 0.5262582302093506, + "step": 5500 + }, + { + "epoch": 1.0162603503828342, + "eval_loss": 0.6191006898880005, + "eval_runtime": 158.2235, + "eval_samples_per_second": 115.21, + "eval_steps_per_second": 14.404, + "step": 5500 + }, + { + "epoch": 1.01644512709173, + "grad_norm": 0.078186996281147, + "learning_rate": 1.5404272071781683e-05, + "loss": 0.605469822883606, + "step": 5501 + }, + { + "epoch": 1.016629903800626, + "grad_norm": 0.06780393421649933, + "learning_rate": 1.5402593460907992e-05, + "loss": 0.4818827211856842, + "step": 5502 + }, + { + "epoch": 1.0168146805095217, + "grad_norm": 0.07865656167268753, + "learning_rate": 1.5400914635027985e-05, + "loss": 0.5803611874580383, + "step": 5503 + }, + { + "epoch": 1.0169994572184176, + "grad_norm": 0.05911247432231903, + "learning_rate": 1.5399235594208472e-05, + "loss": 0.38554736971855164, + "step": 5504 + }, + { + "epoch": 1.0171842339273134, + "grad_norm": 0.07047073543071747, + "learning_rate": 1.5397556338516273e-05, + "loss": 0.5108228325843811, + "step": 5505 + }, + { + "epoch": 1.0173690106362092, + "grad_norm": 0.0866054967045784, + "learning_rate": 1.539587686801822e-05, + "loss": 0.597832202911377, + "step": 5506 + }, + { + "epoch": 1.017553787345105, + "grad_norm": 0.08333642035722733, + "learning_rate": 1.539419718278115e-05, + "loss": 0.550136387348175, + "step": 5507 + }, + { + "epoch": 1.017738564054001, + "grad_norm": 0.09204985201358795, + "learning_rate": 1.5392517282871906e-05, + "loss": 0.6344136595726013, + "step": 5508 + }, + { + "epoch": 1.017923340762897, + "grad_norm": 0.05923591926693916, + "learning_rate": 1.5390837168357346e-05, + "loss": 0.4173552393913269, + "step": 5509 + }, + { + "epoch": 1.0181081174717928, + "grad_norm": 0.08533397316932678, + "learning_rate": 1.5389156839304332e-05, + "loss": 0.6466034054756165, + "step": 5510 + }, + { + "epoch": 1.0182928941806886, + "grad_norm": 0.07171865552663803, + "learning_rate": 1.5387476295779737e-05, + "loss": 0.6014915108680725, + "step": 5511 + }, + { + "epoch": 1.0184776708895844, + "grad_norm": 0.07948136329650879, + "learning_rate": 1.5385795537850442e-05, + "loss": 0.588539183139801, + "step": 5512 + }, + { + "epoch": 1.0186624475984802, + "grad_norm": 0.07882174849510193, + "learning_rate": 1.5384114565583332e-05, + "loss": 0.47921741008758545, + "step": 5513 + }, + { + "epoch": 1.018847224307376, + "grad_norm": 0.07933034002780914, + "learning_rate": 1.538243337904531e-05, + "loss": 0.5907740592956543, + "step": 5514 + }, + { + "epoch": 1.019032001016272, + "grad_norm": 0.06324376910924911, + "learning_rate": 1.538075197830328e-05, + "loss": 0.5618665218353271, + "step": 5515 + }, + { + "epoch": 1.0192167777251677, + "grad_norm": 0.06595669686794281, + "learning_rate": 1.5379070363424153e-05, + "loss": 0.4812150299549103, + "step": 5516 + }, + { + "epoch": 1.0194015544340636, + "grad_norm": 0.05870814248919487, + "learning_rate": 1.5377388534474852e-05, + "loss": 0.40500393509864807, + "step": 5517 + }, + { + "epoch": 1.0195863311429594, + "grad_norm": 0.07318040728569031, + "learning_rate": 1.5375706491522312e-05, + "loss": 0.4814295172691345, + "step": 5518 + }, + { + "epoch": 1.0197711078518552, + "grad_norm": 0.07311618328094482, + "learning_rate": 1.5374024234633474e-05, + "loss": 0.6199733018875122, + "step": 5519 + }, + { + "epoch": 1.019955884560751, + "grad_norm": 0.07848178595304489, + "learning_rate": 1.5372341763875285e-05, + "loss": 0.6515681147575378, + "step": 5520 + }, + { + "epoch": 1.0201406612696469, + "grad_norm": 0.08523409068584442, + "learning_rate": 1.53706590793147e-05, + "loss": 0.5154955983161926, + "step": 5521 + }, + { + "epoch": 1.020325437978543, + "grad_norm": 0.07961570471525192, + "learning_rate": 1.536897618101869e-05, + "loss": 0.5723915100097656, + "step": 5522 + }, + { + "epoch": 1.0205102146874387, + "grad_norm": 0.06371118128299713, + "learning_rate": 1.5367293069054217e-05, + "loss": 0.49540960788726807, + "step": 5523 + }, + { + "epoch": 1.0206949913963346, + "grad_norm": 0.06904740631580353, + "learning_rate": 1.536560974348828e-05, + "loss": 0.5264773368835449, + "step": 5524 + }, + { + "epoch": 1.0208797681052304, + "grad_norm": 0.055011942982673645, + "learning_rate": 1.536392620438786e-05, + "loss": 0.430140882730484, + "step": 5525 + }, + { + "epoch": 1.0210645448141262, + "grad_norm": 0.07497008144855499, + "learning_rate": 1.536224245181996e-05, + "loss": 0.5237014889717102, + "step": 5526 + }, + { + "epoch": 1.021249321523022, + "grad_norm": 0.07027363777160645, + "learning_rate": 1.536055848585158e-05, + "loss": 0.5692754983901978, + "step": 5527 + }, + { + "epoch": 1.0214340982319179, + "grad_norm": 0.11043912917375565, + "learning_rate": 1.535887430654975e-05, + "loss": 0.46968314051628113, + "step": 5528 + }, + { + "epoch": 1.0216188749408137, + "grad_norm": 0.08006355911493301, + "learning_rate": 1.535718991398149e-05, + "loss": 0.49496397376060486, + "step": 5529 + }, + { + "epoch": 1.0218036516497095, + "grad_norm": 0.06432589888572693, + "learning_rate": 1.535550530821382e-05, + "loss": 0.3639981746673584, + "step": 5530 + }, + { + "epoch": 1.0219884283586054, + "grad_norm": 0.10602528601884842, + "learning_rate": 1.5353820489313807e-05, + "loss": 0.7828198075294495, + "step": 5531 + }, + { + "epoch": 1.0221732050675012, + "grad_norm": 0.07818809896707535, + "learning_rate": 1.5352135457348488e-05, + "loss": 0.6441103219985962, + "step": 5532 + }, + { + "epoch": 1.022357981776397, + "grad_norm": 0.08831508457660675, + "learning_rate": 1.5350450212384914e-05, + "loss": 0.5698842406272888, + "step": 5533 + }, + { + "epoch": 1.0225427584852929, + "grad_norm": 0.06406545639038086, + "learning_rate": 1.5348764754490165e-05, + "loss": 0.37343931198120117, + "step": 5534 + }, + { + "epoch": 1.0227275351941887, + "grad_norm": 0.08259700238704681, + "learning_rate": 1.5347079083731314e-05, + "loss": 0.5247676968574524, + "step": 5535 + }, + { + "epoch": 1.0229123119030845, + "grad_norm": 0.08037258684635162, + "learning_rate": 1.5345393200175442e-05, + "loss": 0.6214337348937988, + "step": 5536 + }, + { + "epoch": 1.0230970886119806, + "grad_norm": 0.0930820181965828, + "learning_rate": 1.5343707103889647e-05, + "loss": 0.6681873202323914, + "step": 5537 + }, + { + "epoch": 1.0232818653208764, + "grad_norm": 0.08174339681863785, + "learning_rate": 1.5342020794941025e-05, + "loss": 0.6081960797309875, + "step": 5538 + }, + { + "epoch": 1.0234666420297722, + "grad_norm": 0.0695425271987915, + "learning_rate": 1.5340334273396695e-05, + "loss": 0.4818345606327057, + "step": 5539 + }, + { + "epoch": 1.023651418738668, + "grad_norm": 0.06923358887434006, + "learning_rate": 1.5338647539323758e-05, + "loss": 0.43072810769081116, + "step": 5540 + }, + { + "epoch": 1.0238361954475639, + "grad_norm": 0.09394175559282303, + "learning_rate": 1.5336960592789357e-05, + "loss": 0.6028846502304077, + "step": 5541 + }, + { + "epoch": 1.0240209721564597, + "grad_norm": 0.0752132385969162, + "learning_rate": 1.533527343386062e-05, + "loss": 0.4695470333099365, + "step": 5542 + }, + { + "epoch": 1.0242057488653555, + "grad_norm": 0.07551165670156479, + "learning_rate": 1.5333586062604696e-05, + "loss": 0.5507363080978394, + "step": 5543 + }, + { + "epoch": 1.0243905255742514, + "grad_norm": 0.07440776377916336, + "learning_rate": 1.5331898479088732e-05, + "loss": 0.45515015721321106, + "step": 5544 + }, + { + "epoch": 1.0245753022831472, + "grad_norm": 0.08244110643863678, + "learning_rate": 1.533021068337989e-05, + "loss": 0.6030721664428711, + "step": 5545 + }, + { + "epoch": 1.024760078992043, + "grad_norm": 0.07014291733503342, + "learning_rate": 1.5328522675545334e-05, + "loss": 0.5677472949028015, + "step": 5546 + }, + { + "epoch": 1.0249448557009389, + "grad_norm": 0.07949929684400558, + "learning_rate": 1.532683445565225e-05, + "loss": 0.6872313618659973, + "step": 5547 + }, + { + "epoch": 1.0251296324098347, + "grad_norm": 0.07746239751577377, + "learning_rate": 1.532514602376782e-05, + "loss": 0.6091598868370056, + "step": 5548 + }, + { + "epoch": 1.0253144091187305, + "grad_norm": 0.0781155452132225, + "learning_rate": 1.532345737995924e-05, + "loss": 0.5775952339172363, + "step": 5549 + }, + { + "epoch": 1.0254991858276263, + "grad_norm": 0.07638484984636307, + "learning_rate": 1.532176852429371e-05, + "loss": 0.4975985288619995, + "step": 5550 + }, + { + "epoch": 1.0256839625365222, + "grad_norm": 0.0655810683965683, + "learning_rate": 1.5320079456838443e-05, + "loss": 0.4833434522151947, + "step": 5551 + }, + { + "epoch": 1.0258687392454182, + "grad_norm": 0.07090182602405548, + "learning_rate": 1.531839017766066e-05, + "loss": 0.4814104437828064, + "step": 5552 + }, + { + "epoch": 1.026053515954314, + "grad_norm": 0.08101111650466919, + "learning_rate": 1.5316700686827584e-05, + "loss": 0.5643125772476196, + "step": 5553 + }, + { + "epoch": 1.0262382926632099, + "grad_norm": 0.07077205181121826, + "learning_rate": 1.5315010984406454e-05, + "loss": 0.4897610545158386, + "step": 5554 + }, + { + "epoch": 1.0264230693721057, + "grad_norm": 0.0607805959880352, + "learning_rate": 1.5313321070464517e-05, + "loss": 0.4320688843727112, + "step": 5555 + }, + { + "epoch": 1.0266078460810015, + "grad_norm": 0.08007220178842545, + "learning_rate": 1.531163094506902e-05, + "loss": 0.6080632209777832, + "step": 5556 + }, + { + "epoch": 1.0267926227898974, + "grad_norm": 0.0696863904595375, + "learning_rate": 1.5309940608287234e-05, + "loss": 0.47654882073402405, + "step": 5557 + }, + { + "epoch": 1.0269773994987932, + "grad_norm": 0.08483091741800308, + "learning_rate": 1.5308250060186428e-05, + "loss": 0.7323111295700073, + "step": 5558 + }, + { + "epoch": 1.027162176207689, + "grad_norm": 0.07609230279922485, + "learning_rate": 1.530655930083387e-05, + "loss": 0.5510598421096802, + "step": 5559 + }, + { + "epoch": 1.0273469529165848, + "grad_norm": 0.05846942961215973, + "learning_rate": 1.5304868330296854e-05, + "loss": 0.4501841068267822, + "step": 5560 + }, + { + "epoch": 1.0275317296254807, + "grad_norm": 0.0615713931620121, + "learning_rate": 1.530317714864268e-05, + "loss": 0.3922766149044037, + "step": 5561 + }, + { + "epoch": 1.0277165063343765, + "grad_norm": 0.08426828682422638, + "learning_rate": 1.5301485755938648e-05, + "loss": 0.803465723991394, + "step": 5562 + }, + { + "epoch": 1.0279012830432723, + "grad_norm": 0.08208931237459183, + "learning_rate": 1.5299794152252064e-05, + "loss": 0.6569002866744995, + "step": 5563 + }, + { + "epoch": 1.0280860597521682, + "grad_norm": 0.0686066746711731, + "learning_rate": 1.5298102337650254e-05, + "loss": 0.4910159707069397, + "step": 5564 + }, + { + "epoch": 1.028270836461064, + "grad_norm": 0.08395460247993469, + "learning_rate": 1.529641031220055e-05, + "loss": 0.610248327255249, + "step": 5565 + }, + { + "epoch": 1.0284556131699598, + "grad_norm": 0.05675387382507324, + "learning_rate": 1.5294718075970284e-05, + "loss": 0.40480339527130127, + "step": 5566 + }, + { + "epoch": 1.0286403898788559, + "grad_norm": 0.06499865651130676, + "learning_rate": 1.5293025629026805e-05, + "loss": 0.47910645604133606, + "step": 5567 + }, + { + "epoch": 1.0288251665877517, + "grad_norm": 0.0667455643415451, + "learning_rate": 1.5291332971437464e-05, + "loss": 0.35491806268692017, + "step": 5568 + }, + { + "epoch": 1.0290099432966475, + "grad_norm": 0.0756828561425209, + "learning_rate": 1.5289640103269626e-05, + "loss": 0.4958469867706299, + "step": 5569 + }, + { + "epoch": 1.0291947200055434, + "grad_norm": 0.06689758598804474, + "learning_rate": 1.5287947024590662e-05, + "loss": 0.4663046896457672, + "step": 5570 + }, + { + "epoch": 1.0293794967144392, + "grad_norm": 0.08043645322322845, + "learning_rate": 1.528625373546795e-05, + "loss": 0.5557774305343628, + "step": 5571 + }, + { + "epoch": 1.029564273423335, + "grad_norm": 0.07677663862705231, + "learning_rate": 1.5284560235968874e-05, + "loss": 0.6274809837341309, + "step": 5572 + }, + { + "epoch": 1.0297490501322308, + "grad_norm": 0.07894661277532578, + "learning_rate": 1.5282866526160837e-05, + "loss": 0.6272726655006409, + "step": 5573 + }, + { + "epoch": 1.0299338268411267, + "grad_norm": 0.07600554823875427, + "learning_rate": 1.528117260611124e-05, + "loss": 0.547213077545166, + "step": 5574 + }, + { + "epoch": 1.0301186035500225, + "grad_norm": 0.07473323494195938, + "learning_rate": 1.5279478475887496e-05, + "loss": 0.4859544634819031, + "step": 5575 + }, + { + "epoch": 1.0303033802589183, + "grad_norm": 0.06868378818035126, + "learning_rate": 1.527778413555703e-05, + "loss": 0.5427464246749878, + "step": 5576 + }, + { + "epoch": 1.0304881569678142, + "grad_norm": 0.08214934915304184, + "learning_rate": 1.5276089585187258e-05, + "loss": 0.6326417326927185, + "step": 5577 + }, + { + "epoch": 1.03067293367671, + "grad_norm": 0.06721127033233643, + "learning_rate": 1.5274394824845635e-05, + "loss": 0.4642300307750702, + "step": 5578 + }, + { + "epoch": 1.0308577103856058, + "grad_norm": 0.07905059307813644, + "learning_rate": 1.52726998545996e-05, + "loss": 0.5799643993377686, + "step": 5579 + }, + { + "epoch": 1.0310424870945016, + "grad_norm": 0.08044886589050293, + "learning_rate": 1.5271004674516603e-05, + "loss": 0.521228015422821, + "step": 5580 + }, + { + "epoch": 1.0312272638033977, + "grad_norm": 0.06395208090543747, + "learning_rate": 1.5269309284664112e-05, + "loss": 0.5170220136642456, + "step": 5581 + }, + { + "epoch": 1.0314120405122935, + "grad_norm": 0.0650828406214714, + "learning_rate": 1.5267613685109597e-05, + "loss": 0.5077491998672485, + "step": 5582 + }, + { + "epoch": 1.0315968172211893, + "grad_norm": 0.05765007808804512, + "learning_rate": 1.5265917875920537e-05, + "loss": 0.39931830763816833, + "step": 5583 + }, + { + "epoch": 1.0317815939300852, + "grad_norm": 0.07269348949193954, + "learning_rate": 1.526422185716442e-05, + "loss": 0.4732840955257416, + "step": 5584 + }, + { + "epoch": 1.031966370638981, + "grad_norm": 0.07275344431400299, + "learning_rate": 1.5262525628908743e-05, + "loss": 0.4784200191497803, + "step": 5585 + }, + { + "epoch": 1.0321511473478768, + "grad_norm": 0.07855315506458282, + "learning_rate": 1.5260829191221012e-05, + "loss": 0.5925523638725281, + "step": 5586 + }, + { + "epoch": 1.0323359240567727, + "grad_norm": 0.06200972571969032, + "learning_rate": 1.525913254416874e-05, + "loss": 0.4778120517730713, + "step": 5587 + }, + { + "epoch": 1.0325207007656685, + "grad_norm": 0.08109522610902786, + "learning_rate": 1.5257435687819442e-05, + "loss": 0.5588696002960205, + "step": 5588 + }, + { + "epoch": 1.0327054774745643, + "grad_norm": 0.06934543699026108, + "learning_rate": 1.5255738622240653e-05, + "loss": 0.5938635468482971, + "step": 5589 + }, + { + "epoch": 1.0328902541834601, + "grad_norm": 0.07456963509321213, + "learning_rate": 1.5254041347499912e-05, + "loss": 0.6001103520393372, + "step": 5590 + }, + { + "epoch": 1.033075030892356, + "grad_norm": 0.07474758476018906, + "learning_rate": 1.525234386366476e-05, + "loss": 0.5206001996994019, + "step": 5591 + }, + { + "epoch": 1.0332598076012518, + "grad_norm": 0.07479168474674225, + "learning_rate": 1.5250646170802759e-05, + "loss": 0.5422087907791138, + "step": 5592 + }, + { + "epoch": 1.0334445843101476, + "grad_norm": 0.07698295265436172, + "learning_rate": 1.5248948268981462e-05, + "loss": 0.6600850224494934, + "step": 5593 + }, + { + "epoch": 1.0336293610190435, + "grad_norm": 0.09354109317064285, + "learning_rate": 1.5247250158268452e-05, + "loss": 0.6611118912696838, + "step": 5594 + }, + { + "epoch": 1.0338141377279393, + "grad_norm": 0.08147210627794266, + "learning_rate": 1.5245551838731299e-05, + "loss": 0.5379778146743774, + "step": 5595 + }, + { + "epoch": 1.0339989144368353, + "grad_norm": 0.06687918305397034, + "learning_rate": 1.5243853310437593e-05, + "loss": 0.6023465991020203, + "step": 5596 + }, + { + "epoch": 1.0341836911457312, + "grad_norm": 0.09458979964256287, + "learning_rate": 1.5242154573454934e-05, + "loss": 0.7716991901397705, + "step": 5597 + }, + { + "epoch": 1.034368467854627, + "grad_norm": 0.0748145803809166, + "learning_rate": 1.524045562785092e-05, + "loss": 0.5194568634033203, + "step": 5598 + }, + { + "epoch": 1.0345532445635228, + "grad_norm": 0.08319809287786484, + "learning_rate": 1.5238756473693167e-05, + "loss": 0.6805424094200134, + "step": 5599 + }, + { + "epoch": 1.0347380212724187, + "grad_norm": 0.06190166249871254, + "learning_rate": 1.5237057111049303e-05, + "loss": 0.5145440101623535, + "step": 5600 + }, + { + "epoch": 1.0349227979813145, + "grad_norm": 0.08432719111442566, + "learning_rate": 1.5235357539986945e-05, + "loss": 0.6638540029525757, + "step": 5601 + }, + { + "epoch": 1.0351075746902103, + "grad_norm": 0.07394157350063324, + "learning_rate": 1.5233657760573737e-05, + "loss": 0.6293306946754456, + "step": 5602 + }, + { + "epoch": 1.0352923513991061, + "grad_norm": 0.08452186733484268, + "learning_rate": 1.5231957772877323e-05, + "loss": 0.6883015632629395, + "step": 5603 + }, + { + "epoch": 1.035477128108002, + "grad_norm": 0.0699499100446701, + "learning_rate": 1.5230257576965363e-05, + "loss": 0.5550675392150879, + "step": 5604 + }, + { + "epoch": 1.0356619048168978, + "grad_norm": 0.05052657797932625, + "learning_rate": 1.5228557172905509e-05, + "loss": 0.3413355350494385, + "step": 5605 + }, + { + "epoch": 1.0358466815257936, + "grad_norm": 0.08890295773744583, + "learning_rate": 1.522685656076544e-05, + "loss": 0.6631185412406921, + "step": 5606 + }, + { + "epoch": 1.0360314582346895, + "grad_norm": 0.06723684817552567, + "learning_rate": 1.5225155740612834e-05, + "loss": 0.44392311573028564, + "step": 5607 + }, + { + "epoch": 1.0362162349435853, + "grad_norm": 0.07482519745826721, + "learning_rate": 1.5223454712515376e-05, + "loss": 0.6936173439025879, + "step": 5608 + }, + { + "epoch": 1.036401011652481, + "grad_norm": 0.06911630183458328, + "learning_rate": 1.5221753476540762e-05, + "loss": 0.4736194610595703, + "step": 5609 + }, + { + "epoch": 1.0365857883613772, + "grad_norm": 0.07511420547962189, + "learning_rate": 1.5220052032756698e-05, + "loss": 0.583503007888794, + "step": 5610 + }, + { + "epoch": 1.036770565070273, + "grad_norm": 0.07482054084539413, + "learning_rate": 1.5218350381230895e-05, + "loss": 0.5486778020858765, + "step": 5611 + }, + { + "epoch": 1.0369553417791688, + "grad_norm": 0.08281565457582474, + "learning_rate": 1.521664852203107e-05, + "loss": 0.5580335855484009, + "step": 5612 + }, + { + "epoch": 1.0371401184880646, + "grad_norm": 0.06626997888088226, + "learning_rate": 1.5214946455224955e-05, + "loss": 0.5215938091278076, + "step": 5613 + }, + { + "epoch": 1.0373248951969605, + "grad_norm": 0.07665207982063293, + "learning_rate": 1.5213244180880287e-05, + "loss": 0.5431420803070068, + "step": 5614 + }, + { + "epoch": 1.0375096719058563, + "grad_norm": 0.06467811018228531, + "learning_rate": 1.5211541699064811e-05, + "loss": 0.3931715786457062, + "step": 5615 + }, + { + "epoch": 1.0376944486147521, + "grad_norm": 0.06558360904455185, + "learning_rate": 1.5209839009846282e-05, + "loss": 0.42572009563446045, + "step": 5616 + }, + { + "epoch": 1.037879225323648, + "grad_norm": 0.058751028031110764, + "learning_rate": 1.5208136113292457e-05, + "loss": 0.4192984104156494, + "step": 5617 + }, + { + "epoch": 1.0380640020325438, + "grad_norm": 0.07840841263532639, + "learning_rate": 1.5206433009471112e-05, + "loss": 0.5992063879966736, + "step": 5618 + }, + { + "epoch": 1.0382487787414396, + "grad_norm": 0.0799863412976265, + "learning_rate": 1.5204729698450015e-05, + "loss": 0.5317435264587402, + "step": 5619 + }, + { + "epoch": 1.0384335554503354, + "grad_norm": 0.09214732050895691, + "learning_rate": 1.520302618029697e-05, + "loss": 0.5794035792350769, + "step": 5620 + }, + { + "epoch": 1.0386183321592313, + "grad_norm": 0.06093394011259079, + "learning_rate": 1.5201322455079757e-05, + "loss": 0.5225287675857544, + "step": 5621 + }, + { + "epoch": 1.038803108868127, + "grad_norm": 0.0768059566617012, + "learning_rate": 1.519961852286618e-05, + "loss": 0.4805833697319031, + "step": 5622 + }, + { + "epoch": 1.038987885577023, + "grad_norm": 0.0913882628083229, + "learning_rate": 1.5197914383724052e-05, + "loss": 0.7455976009368896, + "step": 5623 + }, + { + "epoch": 1.0391726622859188, + "grad_norm": 0.08133476972579956, + "learning_rate": 1.51962100377212e-05, + "loss": 0.6164051294326782, + "step": 5624 + }, + { + "epoch": 1.0393574389948148, + "grad_norm": 0.06969386339187622, + "learning_rate": 1.5194505484925444e-05, + "loss": 0.5532299280166626, + "step": 5625 + }, + { + "epoch": 1.0395422157037106, + "grad_norm": 0.0681932345032692, + "learning_rate": 1.5192800725404618e-05, + "loss": 0.4927964508533478, + "step": 5626 + }, + { + "epoch": 1.0397269924126065, + "grad_norm": 0.07505299150943756, + "learning_rate": 1.519109575922657e-05, + "loss": 0.6235383152961731, + "step": 5627 + }, + { + "epoch": 1.0399117691215023, + "grad_norm": 0.06280749291181564, + "learning_rate": 1.5189390586459155e-05, + "loss": 0.4233841001987457, + "step": 5628 + }, + { + "epoch": 1.0400965458303981, + "grad_norm": 0.06342928856611252, + "learning_rate": 1.5187685207170226e-05, + "loss": 0.48731809854507446, + "step": 5629 + }, + { + "epoch": 1.040281322539294, + "grad_norm": 0.06959110498428345, + "learning_rate": 1.5185979621427657e-05, + "loss": 0.5357281565666199, + "step": 5630 + }, + { + "epoch": 1.0404660992481898, + "grad_norm": 0.07965416461229324, + "learning_rate": 1.5184273829299327e-05, + "loss": 0.6262179613113403, + "step": 5631 + }, + { + "epoch": 1.0406508759570856, + "grad_norm": 0.06511030346155167, + "learning_rate": 1.5182567830853114e-05, + "loss": 0.37972134351730347, + "step": 5632 + }, + { + "epoch": 1.0408356526659814, + "grad_norm": 0.10105421394109726, + "learning_rate": 1.5180861626156915e-05, + "loss": 0.6640194654464722, + "step": 5633 + }, + { + "epoch": 1.0410204293748773, + "grad_norm": 0.07888025045394897, + "learning_rate": 1.5179155215278637e-05, + "loss": 0.5475636124610901, + "step": 5634 + }, + { + "epoch": 1.041205206083773, + "grad_norm": 0.07195180654525757, + "learning_rate": 1.5177448598286182e-05, + "loss": 0.5586546063423157, + "step": 5635 + }, + { + "epoch": 1.041389982792669, + "grad_norm": 0.07774486392736435, + "learning_rate": 1.517574177524747e-05, + "loss": 0.5166386961936951, + "step": 5636 + }, + { + "epoch": 1.0415747595015648, + "grad_norm": 0.06525541096925735, + "learning_rate": 1.517403474623043e-05, + "loss": 0.46400272846221924, + "step": 5637 + }, + { + "epoch": 1.0417595362104606, + "grad_norm": 0.06237497553229332, + "learning_rate": 1.5172327511302996e-05, + "loss": 0.4679761528968811, + "step": 5638 + }, + { + "epoch": 1.0419443129193564, + "grad_norm": 0.08574408292770386, + "learning_rate": 1.5170620070533104e-05, + "loss": 0.5731038451194763, + "step": 5639 + }, + { + "epoch": 1.0421290896282525, + "grad_norm": 0.0831538513302803, + "learning_rate": 1.5168912423988716e-05, + "loss": 0.5945683717727661, + "step": 5640 + }, + { + "epoch": 1.0423138663371483, + "grad_norm": 0.06644801050424576, + "learning_rate": 1.5167204571737782e-05, + "loss": 0.5209828019142151, + "step": 5641 + }, + { + "epoch": 1.0424986430460441, + "grad_norm": 0.07284197211265564, + "learning_rate": 1.516549651384827e-05, + "loss": 0.5813314318656921, + "step": 5642 + }, + { + "epoch": 1.04268341975494, + "grad_norm": 0.07253412902355194, + "learning_rate": 1.5163788250388161e-05, + "loss": 0.6203429698944092, + "step": 5643 + }, + { + "epoch": 1.0428681964638358, + "grad_norm": 0.06365064531564713, + "learning_rate": 1.5162079781425434e-05, + "loss": 0.4796571731567383, + "step": 5644 + }, + { + "epoch": 1.0430529731727316, + "grad_norm": 0.06871246546506882, + "learning_rate": 1.5160371107028082e-05, + "loss": 0.4531722366809845, + "step": 5645 + }, + { + "epoch": 1.0432377498816274, + "grad_norm": 0.08286961913108826, + "learning_rate": 1.5158662227264102e-05, + "loss": 0.6958336234092712, + "step": 5646 + }, + { + "epoch": 1.0434225265905233, + "grad_norm": 0.05920685827732086, + "learning_rate": 1.515695314220151e-05, + "loss": 0.46679872274398804, + "step": 5647 + }, + { + "epoch": 1.043607303299419, + "grad_norm": 0.08717560023069382, + "learning_rate": 1.5155243851908314e-05, + "loss": 0.6550828814506531, + "step": 5648 + }, + { + "epoch": 1.043792080008315, + "grad_norm": 0.06699342280626297, + "learning_rate": 1.515353435645254e-05, + "loss": 0.5816624164581299, + "step": 5649 + }, + { + "epoch": 1.0439768567172107, + "grad_norm": 0.07312697917222977, + "learning_rate": 1.5151824655902223e-05, + "loss": 0.656792938709259, + "step": 5650 + }, + { + "epoch": 1.0441616334261066, + "grad_norm": 0.07946156710386276, + "learning_rate": 1.5150114750325404e-05, + "loss": 0.5273401737213135, + "step": 5651 + }, + { + "epoch": 1.0443464101350024, + "grad_norm": 0.08179052174091339, + "learning_rate": 1.514840463979013e-05, + "loss": 0.568706214427948, + "step": 5652 + }, + { + "epoch": 1.0445311868438982, + "grad_norm": 0.08967617899179459, + "learning_rate": 1.5146694324364454e-05, + "loss": 0.6211278438568115, + "step": 5653 + }, + { + "epoch": 1.044715963552794, + "grad_norm": 0.07301918417215347, + "learning_rate": 1.5144983804116453e-05, + "loss": 0.541821300983429, + "step": 5654 + }, + { + "epoch": 1.04490074026169, + "grad_norm": 0.07512032240629196, + "learning_rate": 1.5143273079114189e-05, + "loss": 0.38737353682518005, + "step": 5655 + }, + { + "epoch": 1.045085516970586, + "grad_norm": 0.07602082192897797, + "learning_rate": 1.5141562149425748e-05, + "loss": 0.5600385665893555, + "step": 5656 + }, + { + "epoch": 1.0452702936794818, + "grad_norm": 0.05643437057733536, + "learning_rate": 1.5139851015119223e-05, + "loss": 0.4082464575767517, + "step": 5657 + }, + { + "epoch": 1.0454550703883776, + "grad_norm": 0.06733223795890808, + "learning_rate": 1.5138139676262706e-05, + "loss": 0.5769407749176025, + "step": 5658 + }, + { + "epoch": 1.0456398470972734, + "grad_norm": 0.0798359215259552, + "learning_rate": 1.5136428132924304e-05, + "loss": 0.500033438205719, + "step": 5659 + }, + { + "epoch": 1.0458246238061693, + "grad_norm": 0.06644061207771301, + "learning_rate": 1.513471638517213e-05, + "loss": 0.39873623847961426, + "step": 5660 + }, + { + "epoch": 1.046009400515065, + "grad_norm": 0.06640005856752396, + "learning_rate": 1.5133004433074314e-05, + "loss": 0.44805052876472473, + "step": 5661 + }, + { + "epoch": 1.046194177223961, + "grad_norm": 0.08006201684474945, + "learning_rate": 1.5131292276698977e-05, + "loss": 0.6086596846580505, + "step": 5662 + }, + { + "epoch": 1.0463789539328567, + "grad_norm": 0.08362042158842087, + "learning_rate": 1.5129579916114262e-05, + "loss": 0.664318323135376, + "step": 5663 + }, + { + "epoch": 1.0465637306417526, + "grad_norm": 0.0611673966050148, + "learning_rate": 1.5127867351388314e-05, + "loss": 0.45818454027175903, + "step": 5664 + }, + { + "epoch": 1.0467485073506484, + "grad_norm": 0.07986735552549362, + "learning_rate": 1.5126154582589287e-05, + "loss": 0.6387860178947449, + "step": 5665 + }, + { + "epoch": 1.0469332840595442, + "grad_norm": 0.08133723586797714, + "learning_rate": 1.5124441609785347e-05, + "loss": 0.5613438487052917, + "step": 5666 + }, + { + "epoch": 1.04711806076844, + "grad_norm": 0.06914244592189789, + "learning_rate": 1.5122728433044664e-05, + "loss": 0.5158078670501709, + "step": 5667 + }, + { + "epoch": 1.0473028374773359, + "grad_norm": 0.08781937509775162, + "learning_rate": 1.5121015052435418e-05, + "loss": 0.6469088792800903, + "step": 5668 + }, + { + "epoch": 1.047487614186232, + "grad_norm": 0.06933942437171936, + "learning_rate": 1.511930146802579e-05, + "loss": 0.4561266303062439, + "step": 5669 + }, + { + "epoch": 1.0476723908951278, + "grad_norm": 0.08494089543819427, + "learning_rate": 1.5117587679883982e-05, + "loss": 0.630203902721405, + "step": 5670 + }, + { + "epoch": 1.0478571676040236, + "grad_norm": 0.060044411569833755, + "learning_rate": 1.5115873688078197e-05, + "loss": 0.427950382232666, + "step": 5671 + }, + { + "epoch": 1.0480419443129194, + "grad_norm": 0.07982151210308075, + "learning_rate": 1.511415949267664e-05, + "loss": 0.5333434343338013, + "step": 5672 + }, + { + "epoch": 1.0482267210218152, + "grad_norm": 0.08310159295797348, + "learning_rate": 1.511244509374754e-05, + "loss": 0.5588505268096924, + "step": 5673 + }, + { + "epoch": 1.048411497730711, + "grad_norm": 0.07040046900510788, + "learning_rate": 1.511073049135912e-05, + "loss": 0.46905678510665894, + "step": 5674 + }, + { + "epoch": 1.048596274439607, + "grad_norm": 0.0707654356956482, + "learning_rate": 1.5109015685579613e-05, + "loss": 0.5168400406837463, + "step": 5675 + }, + { + "epoch": 1.0487810511485027, + "grad_norm": 0.07354991137981415, + "learning_rate": 1.5107300676477268e-05, + "loss": 0.5358753800392151, + "step": 5676 + }, + { + "epoch": 1.0489658278573986, + "grad_norm": 0.06685210019350052, + "learning_rate": 1.5105585464120333e-05, + "loss": 0.587394654750824, + "step": 5677 + }, + { + "epoch": 1.0491506045662944, + "grad_norm": 0.09235299378633499, + "learning_rate": 1.5103870048577071e-05, + "loss": 0.6667936444282532, + "step": 5678 + }, + { + "epoch": 1.0493353812751902, + "grad_norm": 0.0625777542591095, + "learning_rate": 1.510215442991575e-05, + "loss": 0.5211875438690186, + "step": 5679 + }, + { + "epoch": 1.049520157984086, + "grad_norm": 0.0792139396071434, + "learning_rate": 1.5100438608204645e-05, + "loss": 0.5901344418525696, + "step": 5680 + }, + { + "epoch": 1.0497049346929819, + "grad_norm": 0.07802049815654755, + "learning_rate": 1.5098722583512038e-05, + "loss": 0.6435686349868774, + "step": 5681 + }, + { + "epoch": 1.0498897114018777, + "grad_norm": 0.07689131051301956, + "learning_rate": 1.5097006355906225e-05, + "loss": 0.5567794442176819, + "step": 5682 + }, + { + "epoch": 1.0500744881107735, + "grad_norm": 0.07338476926088333, + "learning_rate": 1.5095289925455507e-05, + "loss": 0.542125940322876, + "step": 5683 + }, + { + "epoch": 1.0502592648196696, + "grad_norm": 0.06186862662434578, + "learning_rate": 1.509357329222819e-05, + "loss": 0.46599534153938293, + "step": 5684 + }, + { + "epoch": 1.0504440415285654, + "grad_norm": 0.08224906027317047, + "learning_rate": 1.5091856456292591e-05, + "loss": 0.6175563931465149, + "step": 5685 + }, + { + "epoch": 1.0506288182374612, + "grad_norm": 0.08578823506832123, + "learning_rate": 1.5090139417717039e-05, + "loss": 0.6648751497268677, + "step": 5686 + }, + { + "epoch": 1.050813594946357, + "grad_norm": 0.06645061075687408, + "learning_rate": 1.5088422176569859e-05, + "loss": 0.5244321823120117, + "step": 5687 + }, + { + "epoch": 1.050998371655253, + "grad_norm": 0.0692540779709816, + "learning_rate": 1.50867047329194e-05, + "loss": 0.4675929546356201, + "step": 5688 + }, + { + "epoch": 1.0511831483641487, + "grad_norm": 0.0704752653837204, + "learning_rate": 1.5084987086834003e-05, + "loss": 0.5662005543708801, + "step": 5689 + }, + { + "epoch": 1.0513679250730446, + "grad_norm": 0.06947939097881317, + "learning_rate": 1.5083269238382028e-05, + "loss": 0.460791677236557, + "step": 5690 + }, + { + "epoch": 1.0515527017819404, + "grad_norm": 0.06659407913684845, + "learning_rate": 1.5081551187631844e-05, + "loss": 0.49134254455566406, + "step": 5691 + }, + { + "epoch": 1.0517374784908362, + "grad_norm": 0.0819624662399292, + "learning_rate": 1.507983293465182e-05, + "loss": 0.5807049870491028, + "step": 5692 + }, + { + "epoch": 1.051922255199732, + "grad_norm": 0.07566314190626144, + "learning_rate": 1.507811447951034e-05, + "loss": 0.5705682635307312, + "step": 5693 + }, + { + "epoch": 1.0521070319086279, + "grad_norm": 0.07033289223909378, + "learning_rate": 1.5076395822275787e-05, + "loss": 0.49281132221221924, + "step": 5694 + }, + { + "epoch": 1.0522918086175237, + "grad_norm": 0.09457696974277496, + "learning_rate": 1.5074676963016563e-05, + "loss": 0.6702513098716736, + "step": 5695 + }, + { + "epoch": 1.0524765853264195, + "grad_norm": 0.08114906400442123, + "learning_rate": 1.5072957901801075e-05, + "loss": 0.6118483543395996, + "step": 5696 + }, + { + "epoch": 1.0526613620353154, + "grad_norm": 0.07982999831438065, + "learning_rate": 1.5071238638697731e-05, + "loss": 0.562346339225769, + "step": 5697 + }, + { + "epoch": 1.0528461387442114, + "grad_norm": 0.07520321756601334, + "learning_rate": 1.5069519173774958e-05, + "loss": 0.4821903109550476, + "step": 5698 + }, + { + "epoch": 1.0530309154531072, + "grad_norm": 0.07969825714826584, + "learning_rate": 1.5067799507101182e-05, + "loss": 0.5231217741966248, + "step": 5699 + }, + { + "epoch": 1.053215692162003, + "grad_norm": 0.08513433486223221, + "learning_rate": 1.5066079638744839e-05, + "loss": 0.6409650444984436, + "step": 5700 + }, + { + "epoch": 1.0534004688708989, + "grad_norm": 0.06774573773145676, + "learning_rate": 1.5064359568774376e-05, + "loss": 0.44569161534309387, + "step": 5701 + }, + { + "epoch": 1.0535852455797947, + "grad_norm": 0.06494417041540146, + "learning_rate": 1.5062639297258246e-05, + "loss": 0.4762156307697296, + "step": 5702 + }, + { + "epoch": 1.0537700222886905, + "grad_norm": 0.07973779737949371, + "learning_rate": 1.5060918824264916e-05, + "loss": 0.5730336308479309, + "step": 5703 + }, + { + "epoch": 1.0539547989975864, + "grad_norm": 0.05715023726224899, + "learning_rate": 1.5059198149862843e-05, + "loss": 0.47526082396507263, + "step": 5704 + }, + { + "epoch": 1.0541395757064822, + "grad_norm": 0.0791873037815094, + "learning_rate": 1.5057477274120516e-05, + "loss": 0.556162416934967, + "step": 5705 + }, + { + "epoch": 1.054324352415378, + "grad_norm": 0.056715238839387894, + "learning_rate": 1.5055756197106417e-05, + "loss": 0.42971712350845337, + "step": 5706 + }, + { + "epoch": 1.0545091291242739, + "grad_norm": 0.06378652155399323, + "learning_rate": 1.5054034918889037e-05, + "loss": 0.4626498222351074, + "step": 5707 + }, + { + "epoch": 1.0546939058331697, + "grad_norm": 0.07502374798059464, + "learning_rate": 1.505231343953688e-05, + "loss": 0.531059741973877, + "step": 5708 + }, + { + "epoch": 1.0548786825420655, + "grad_norm": 0.09165331721305847, + "learning_rate": 1.5050591759118454e-05, + "loss": 0.7677619457244873, + "step": 5709 + }, + { + "epoch": 1.0550634592509613, + "grad_norm": 0.06765854358673096, + "learning_rate": 1.5048869877702278e-05, + "loss": 0.5271489024162292, + "step": 5710 + }, + { + "epoch": 1.0552482359598572, + "grad_norm": 0.0819627195596695, + "learning_rate": 1.5047147795356877e-05, + "loss": 0.5126342177391052, + "step": 5711 + }, + { + "epoch": 1.055433012668753, + "grad_norm": 0.06692739576101303, + "learning_rate": 1.5045425512150784e-05, + "loss": 0.5844963192939758, + "step": 5712 + }, + { + "epoch": 1.055617789377649, + "grad_norm": 0.0615801103413105, + "learning_rate": 1.504370302815254e-05, + "loss": 0.4775800108909607, + "step": 5713 + }, + { + "epoch": 1.0558025660865449, + "grad_norm": 0.06606190651655197, + "learning_rate": 1.5041980343430696e-05, + "loss": 0.4907645583152771, + "step": 5714 + }, + { + "epoch": 1.0559873427954407, + "grad_norm": 0.060940444469451904, + "learning_rate": 1.5040257458053806e-05, + "loss": 0.37098509073257446, + "step": 5715 + }, + { + "epoch": 1.0561721195043365, + "grad_norm": 0.07180286198854446, + "learning_rate": 1.5038534372090443e-05, + "loss": 0.5962932705879211, + "step": 5716 + }, + { + "epoch": 1.0563568962132324, + "grad_norm": 0.07350271940231323, + "learning_rate": 1.5036811085609176e-05, + "loss": 0.5578521490097046, + "step": 5717 + }, + { + "epoch": 1.0565416729221282, + "grad_norm": 0.06898215413093567, + "learning_rate": 1.5035087598678581e-05, + "loss": 0.6065282821655273, + "step": 5718 + }, + { + "epoch": 1.056726449631024, + "grad_norm": 0.08027186244726181, + "learning_rate": 1.5033363911367254e-05, + "loss": 0.49961212277412415, + "step": 5719 + }, + { + "epoch": 1.0569112263399199, + "grad_norm": 0.06551721692085266, + "learning_rate": 1.5031640023743792e-05, + "loss": 0.3818071186542511, + "step": 5720 + }, + { + "epoch": 1.0570960030488157, + "grad_norm": 0.06600042432546616, + "learning_rate": 1.5029915935876797e-05, + "loss": 0.4476735293865204, + "step": 5721 + }, + { + "epoch": 1.0572807797577115, + "grad_norm": 0.05965065583586693, + "learning_rate": 1.5028191647834884e-05, + "loss": 0.5809292197227478, + "step": 5722 + }, + { + "epoch": 1.0574655564666073, + "grad_norm": 0.06527971476316452, + "learning_rate": 1.502646715968668e-05, + "loss": 0.4536020755767822, + "step": 5723 + }, + { + "epoch": 1.0576503331755032, + "grad_norm": 0.07186955213546753, + "learning_rate": 1.5024742471500804e-05, + "loss": 0.5548267960548401, + "step": 5724 + }, + { + "epoch": 1.057835109884399, + "grad_norm": 0.07215177267789841, + "learning_rate": 1.5023017583345895e-05, + "loss": 0.49939098954200745, + "step": 5725 + }, + { + "epoch": 1.0580198865932948, + "grad_norm": 0.06784142553806305, + "learning_rate": 1.5021292495290608e-05, + "loss": 0.4960416555404663, + "step": 5726 + }, + { + "epoch": 1.0582046633021909, + "grad_norm": 0.05334806442260742, + "learning_rate": 1.5019567207403587e-05, + "loss": 0.46388018131256104, + "step": 5727 + }, + { + "epoch": 1.0583894400110867, + "grad_norm": 0.08525721728801727, + "learning_rate": 1.5017841719753495e-05, + "loss": 0.5141664147377014, + "step": 5728 + }, + { + "epoch": 1.0585742167199825, + "grad_norm": 0.07427357137203217, + "learning_rate": 1.5016116032409e-05, + "loss": 0.48649123311042786, + "step": 5729 + }, + { + "epoch": 1.0587589934288784, + "grad_norm": 0.07980693876743317, + "learning_rate": 1.5014390145438782e-05, + "loss": 0.59165358543396, + "step": 5730 + }, + { + "epoch": 1.0589437701377742, + "grad_norm": 0.07934518903493881, + "learning_rate": 1.5012664058911522e-05, + "loss": 0.5416390299797058, + "step": 5731 + }, + { + "epoch": 1.05912854684667, + "grad_norm": 0.08083886653184891, + "learning_rate": 1.5010937772895918e-05, + "loss": 0.45603734254837036, + "step": 5732 + }, + { + "epoch": 1.0593133235555658, + "grad_norm": 0.09596753865480423, + "learning_rate": 1.5009211287460668e-05, + "loss": 0.6592952609062195, + "step": 5733 + }, + { + "epoch": 1.0594981002644617, + "grad_norm": 0.07743064314126968, + "learning_rate": 1.500748460267448e-05, + "loss": 0.502553403377533, + "step": 5734 + }, + { + "epoch": 1.0596828769733575, + "grad_norm": 0.07169241458177567, + "learning_rate": 1.5005757718606066e-05, + "loss": 0.558634877204895, + "step": 5735 + }, + { + "epoch": 1.0598676536822533, + "grad_norm": 0.07375902682542801, + "learning_rate": 1.5004030635324163e-05, + "loss": 0.45072486996650696, + "step": 5736 + }, + { + "epoch": 1.0600524303911492, + "grad_norm": 0.0858403667807579, + "learning_rate": 1.5002303352897494e-05, + "loss": 0.6073910593986511, + "step": 5737 + }, + { + "epoch": 1.060237207100045, + "grad_norm": 0.06966253370046616, + "learning_rate": 1.50005758713948e-05, + "loss": 0.5068526268005371, + "step": 5738 + }, + { + "epoch": 1.0604219838089408, + "grad_norm": 0.0676216408610344, + "learning_rate": 1.4998848190884832e-05, + "loss": 0.37021389603614807, + "step": 5739 + }, + { + "epoch": 1.0606067605178366, + "grad_norm": 0.07576752454042435, + "learning_rate": 1.4997120311436346e-05, + "loss": 0.5430153608322144, + "step": 5740 + }, + { + "epoch": 1.0607915372267325, + "grad_norm": 0.07603286951780319, + "learning_rate": 1.4995392233118104e-05, + "loss": 0.5482485294342041, + "step": 5741 + }, + { + "epoch": 1.0609763139356283, + "grad_norm": 0.08099351823329926, + "learning_rate": 1.4993663955998883e-05, + "loss": 0.5044888854026794, + "step": 5742 + }, + { + "epoch": 1.0611610906445244, + "grad_norm": 0.062198203057050705, + "learning_rate": 1.4991935480147457e-05, + "loss": 0.4543401300907135, + "step": 5743 + }, + { + "epoch": 1.0613458673534202, + "grad_norm": 0.07020148634910583, + "learning_rate": 1.4990206805632618e-05, + "loss": 0.49687787890434265, + "step": 5744 + }, + { + "epoch": 1.061530644062316, + "grad_norm": 0.08795180171728134, + "learning_rate": 1.4988477932523157e-05, + "loss": 0.6046985387802124, + "step": 5745 + }, + { + "epoch": 1.0617154207712118, + "grad_norm": 0.07090011239051819, + "learning_rate": 1.4986748860887885e-05, + "loss": 0.3979244828224182, + "step": 5746 + }, + { + "epoch": 1.0619001974801077, + "grad_norm": 0.07279148697853088, + "learning_rate": 1.4985019590795611e-05, + "loss": 0.6921658515930176, + "step": 5747 + }, + { + "epoch": 1.0620849741890035, + "grad_norm": 0.06524580717086792, + "learning_rate": 1.4983290122315151e-05, + "loss": 0.51961350440979, + "step": 5748 + }, + { + "epoch": 1.0622697508978993, + "grad_norm": 0.07926417142152786, + "learning_rate": 1.4981560455515337e-05, + "loss": 0.6424283385276794, + "step": 5749 + }, + { + "epoch": 1.0624545276067952, + "grad_norm": 0.05497293919324875, + "learning_rate": 1.4979830590465e-05, + "loss": 0.3074781596660614, + "step": 5750 + }, + { + "epoch": 1.062639304315691, + "grad_norm": 0.09386415034532547, + "learning_rate": 1.4978100527232985e-05, + "loss": 0.6136388182640076, + "step": 5751 + }, + { + "epoch": 1.0628240810245868, + "grad_norm": 0.06570550799369812, + "learning_rate": 1.4976370265888142e-05, + "loss": 0.5354533791542053, + "step": 5752 + }, + { + "epoch": 1.0630088577334826, + "grad_norm": 0.07388859987258911, + "learning_rate": 1.4974639806499336e-05, + "loss": 0.4849873185157776, + "step": 5753 + }, + { + "epoch": 1.0631936344423785, + "grad_norm": 0.09081213921308517, + "learning_rate": 1.4972909149135429e-05, + "loss": 0.5858061909675598, + "step": 5754 + }, + { + "epoch": 1.0633784111512743, + "grad_norm": 0.06316659599542618, + "learning_rate": 1.4971178293865292e-05, + "loss": 0.4371189773082733, + "step": 5755 + }, + { + "epoch": 1.0635631878601701, + "grad_norm": 0.0723111629486084, + "learning_rate": 1.4969447240757812e-05, + "loss": 0.4745716154575348, + "step": 5756 + }, + { + "epoch": 1.0637479645690662, + "grad_norm": 0.05701501667499542, + "learning_rate": 1.4967715989881884e-05, + "loss": 0.4331725239753723, + "step": 5757 + }, + { + "epoch": 1.063932741277962, + "grad_norm": 0.05743462219834328, + "learning_rate": 1.4965984541306398e-05, + "loss": 0.31881242990493774, + "step": 5758 + }, + { + "epoch": 1.0641175179868578, + "grad_norm": 0.05291770398616791, + "learning_rate": 1.4964252895100265e-05, + "loss": 0.3698563873767853, + "step": 5759 + }, + { + "epoch": 1.0643022946957537, + "grad_norm": 0.06304258853197098, + "learning_rate": 1.4962521051332397e-05, + "loss": 0.39415884017944336, + "step": 5760 + }, + { + "epoch": 1.0644870714046495, + "grad_norm": 0.054569315165281296, + "learning_rate": 1.4960789010071717e-05, + "loss": 0.3881533443927765, + "step": 5761 + }, + { + "epoch": 1.0646718481135453, + "grad_norm": 0.06850560009479523, + "learning_rate": 1.4959056771387156e-05, + "loss": 0.44263342022895813, + "step": 5762 + }, + { + "epoch": 1.0648566248224411, + "grad_norm": 0.08048789948225021, + "learning_rate": 1.495732433534765e-05, + "loss": 0.5522597432136536, + "step": 5763 + }, + { + "epoch": 1.065041401531337, + "grad_norm": 0.0781223401427269, + "learning_rate": 1.4955591702022145e-05, + "loss": 0.6733675003051758, + "step": 5764 + }, + { + "epoch": 1.0652261782402328, + "grad_norm": 0.07135794311761856, + "learning_rate": 1.4953858871479595e-05, + "loss": 0.5192707777023315, + "step": 5765 + }, + { + "epoch": 1.0654109549491286, + "grad_norm": 0.05873025581240654, + "learning_rate": 1.4952125843788955e-05, + "loss": 0.36700373888015747, + "step": 5766 + }, + { + "epoch": 1.0655957316580245, + "grad_norm": 0.07478354871273041, + "learning_rate": 1.4950392619019208e-05, + "loss": 0.5370163321495056, + "step": 5767 + }, + { + "epoch": 1.0657805083669203, + "grad_norm": 0.08937849849462509, + "learning_rate": 1.4948659197239317e-05, + "loss": 0.6988489031791687, + "step": 5768 + }, + { + "epoch": 1.0659652850758161, + "grad_norm": 0.08429345488548279, + "learning_rate": 1.4946925578518273e-05, + "loss": 0.636594295501709, + "step": 5769 + }, + { + "epoch": 1.066150061784712, + "grad_norm": 0.07610718160867691, + "learning_rate": 1.4945191762925068e-05, + "loss": 0.5621874928474426, + "step": 5770 + }, + { + "epoch": 1.0663348384936078, + "grad_norm": 0.10598469525575638, + "learning_rate": 1.4943457750528706e-05, + "loss": 0.685886025428772, + "step": 5771 + }, + { + "epoch": 1.0665196152025038, + "grad_norm": 0.07759395241737366, + "learning_rate": 1.4941723541398185e-05, + "loss": 0.4770432710647583, + "step": 5772 + }, + { + "epoch": 1.0667043919113997, + "grad_norm": 0.077302485704422, + "learning_rate": 1.4939989135602532e-05, + "loss": 0.50245600938797, + "step": 5773 + }, + { + "epoch": 1.0668891686202955, + "grad_norm": 0.06550062447786331, + "learning_rate": 1.493825453321077e-05, + "loss": 0.49462637305259705, + "step": 5774 + }, + { + "epoch": 1.0670739453291913, + "grad_norm": 0.08421865105628967, + "learning_rate": 1.493651973429192e-05, + "loss": 0.5801661014556885, + "step": 5775 + }, + { + "epoch": 1.0672587220380871, + "grad_norm": 0.08015292137861252, + "learning_rate": 1.4934784738915034e-05, + "loss": 0.5254921913146973, + "step": 5776 + }, + { + "epoch": 1.067443498746983, + "grad_norm": 0.05905531346797943, + "learning_rate": 1.4933049547149155e-05, + "loss": 0.38395801186561584, + "step": 5777 + }, + { + "epoch": 1.0676282754558788, + "grad_norm": 0.06965994834899902, + "learning_rate": 1.4931314159063333e-05, + "loss": 0.46900445222854614, + "step": 5778 + }, + { + "epoch": 1.0678130521647746, + "grad_norm": 0.07749634981155396, + "learning_rate": 1.4929578574726637e-05, + "loss": 0.5108754634857178, + "step": 5779 + }, + { + "epoch": 1.0679978288736705, + "grad_norm": 0.07539774477481842, + "learning_rate": 1.4927842794208138e-05, + "loss": 0.5456463694572449, + "step": 5780 + }, + { + "epoch": 1.0681826055825663, + "grad_norm": 0.08375140279531479, + "learning_rate": 1.4926106817576913e-05, + "loss": 0.592858076095581, + "step": 5781 + }, + { + "epoch": 1.068367382291462, + "grad_norm": 0.07365249842405319, + "learning_rate": 1.4924370644902048e-05, + "loss": 0.5774492025375366, + "step": 5782 + }, + { + "epoch": 1.068552159000358, + "grad_norm": 0.08658870309591293, + "learning_rate": 1.4922634276252636e-05, + "loss": 0.5666930079460144, + "step": 5783 + }, + { + "epoch": 1.0687369357092538, + "grad_norm": 0.06969824433326721, + "learning_rate": 1.4920897711697784e-05, + "loss": 0.4051949679851532, + "step": 5784 + }, + { + "epoch": 1.0689217124181496, + "grad_norm": 0.094402015209198, + "learning_rate": 1.4919160951306598e-05, + "loss": 0.6602669358253479, + "step": 5785 + }, + { + "epoch": 1.0691064891270456, + "grad_norm": 0.06941288709640503, + "learning_rate": 1.4917423995148193e-05, + "loss": 0.5111122131347656, + "step": 5786 + }, + { + "epoch": 1.0692912658359415, + "grad_norm": 0.06397631764411926, + "learning_rate": 1.4915686843291701e-05, + "loss": 0.421415239572525, + "step": 5787 + }, + { + "epoch": 1.0694760425448373, + "grad_norm": 0.09350269287824631, + "learning_rate": 1.491394949580625e-05, + "loss": 0.7124272584915161, + "step": 5788 + }, + { + "epoch": 1.0696608192537331, + "grad_norm": 0.07877876609563828, + "learning_rate": 1.4912211952760985e-05, + "loss": 0.5305052399635315, + "step": 5789 + }, + { + "epoch": 1.069845595962629, + "grad_norm": 0.06914859265089035, + "learning_rate": 1.491047421422505e-05, + "loss": 0.47308892011642456, + "step": 5790 + }, + { + "epoch": 1.0700303726715248, + "grad_norm": 0.07343301177024841, + "learning_rate": 1.4908736280267604e-05, + "loss": 0.5048878788948059, + "step": 5791 + }, + { + "epoch": 1.0702151493804206, + "grad_norm": 0.07299642264842987, + "learning_rate": 1.4906998150957815e-05, + "loss": 0.43826934695243835, + "step": 5792 + }, + { + "epoch": 1.0703999260893164, + "grad_norm": 0.07881126552820206, + "learning_rate": 1.4905259826364846e-05, + "loss": 0.49007317423820496, + "step": 5793 + }, + { + "epoch": 1.0705847027982123, + "grad_norm": 0.06472620368003845, + "learning_rate": 1.490352130655789e-05, + "loss": 0.4609697461128235, + "step": 5794 + }, + { + "epoch": 1.070769479507108, + "grad_norm": 0.07513286918401718, + "learning_rate": 1.490178259160612e-05, + "loss": 0.6415743827819824, + "step": 5795 + }, + { + "epoch": 1.070954256216004, + "grad_norm": 0.08034192025661469, + "learning_rate": 1.4900043681578741e-05, + "loss": 0.6771365404129028, + "step": 5796 + }, + { + "epoch": 1.0711390329248998, + "grad_norm": 0.08471996337175369, + "learning_rate": 1.4898304576544955e-05, + "loss": 0.5288458466529846, + "step": 5797 + }, + { + "epoch": 1.0713238096337956, + "grad_norm": 0.08926571905612946, + "learning_rate": 1.489656527657397e-05, + "loss": 0.7090543508529663, + "step": 5798 + }, + { + "epoch": 1.0715085863426914, + "grad_norm": 0.06450363248586655, + "learning_rate": 1.489482578173501e-05, + "loss": 0.49964165687561035, + "step": 5799 + }, + { + "epoch": 1.0716933630515872, + "grad_norm": 0.07753396779298782, + "learning_rate": 1.4893086092097292e-05, + "loss": 0.5874074101448059, + "step": 5800 + }, + { + "epoch": 1.0718781397604833, + "grad_norm": 0.06905125826597214, + "learning_rate": 1.489134620773006e-05, + "loss": 0.5268652439117432, + "step": 5801 + }, + { + "epoch": 1.0720629164693791, + "grad_norm": 0.08922120928764343, + "learning_rate": 1.4889606128702552e-05, + "loss": 0.6464628577232361, + "step": 5802 + }, + { + "epoch": 1.072247693178275, + "grad_norm": 0.07564452290534973, + "learning_rate": 1.4887865855084015e-05, + "loss": 0.5073250532150269, + "step": 5803 + }, + { + "epoch": 1.0724324698871708, + "grad_norm": 0.07366842031478882, + "learning_rate": 1.4886125386943713e-05, + "loss": 0.6223083734512329, + "step": 5804 + }, + { + "epoch": 1.0726172465960666, + "grad_norm": 0.06662733852863312, + "learning_rate": 1.4884384724350907e-05, + "loss": 0.47135940194129944, + "step": 5805 + }, + { + "epoch": 1.0728020233049624, + "grad_norm": 0.07442791759967804, + "learning_rate": 1.4882643867374868e-05, + "loss": 0.46855729818344116, + "step": 5806 + }, + { + "epoch": 1.0729868000138583, + "grad_norm": 0.06059639900922775, + "learning_rate": 1.488090281608488e-05, + "loss": 0.40140438079833984, + "step": 5807 + }, + { + "epoch": 1.073171576722754, + "grad_norm": 0.08974947035312653, + "learning_rate": 1.4879161570550227e-05, + "loss": 0.6785848736763, + "step": 5808 + }, + { + "epoch": 1.07335635343165, + "grad_norm": 0.08336343616247177, + "learning_rate": 1.4877420130840214e-05, + "loss": 0.539770781993866, + "step": 5809 + }, + { + "epoch": 1.0735411301405458, + "grad_norm": 0.07375794649124146, + "learning_rate": 1.4875678497024134e-05, + "loss": 0.5181423425674438, + "step": 5810 + }, + { + "epoch": 1.0737259068494416, + "grad_norm": 0.08090443909168243, + "learning_rate": 1.4873936669171307e-05, + "loss": 0.6372614502906799, + "step": 5811 + }, + { + "epoch": 1.0739106835583374, + "grad_norm": 0.08555450290441513, + "learning_rate": 1.4872194647351049e-05, + "loss": 0.540556788444519, + "step": 5812 + }, + { + "epoch": 1.0740954602672332, + "grad_norm": 0.06083214282989502, + "learning_rate": 1.4870452431632689e-05, + "loss": 0.3508705198764801, + "step": 5813 + }, + { + "epoch": 1.074280236976129, + "grad_norm": 0.062058694660663605, + "learning_rate": 1.4868710022085555e-05, + "loss": 0.4283808767795563, + "step": 5814 + }, + { + "epoch": 1.0744650136850251, + "grad_norm": 0.04995314031839371, + "learning_rate": 1.4866967418778996e-05, + "loss": 0.33063793182373047, + "step": 5815 + }, + { + "epoch": 1.074649790393921, + "grad_norm": 0.07562576979398727, + "learning_rate": 1.4865224621782364e-05, + "loss": 0.5480437278747559, + "step": 5816 + }, + { + "epoch": 1.0748345671028168, + "grad_norm": 0.06574447453022003, + "learning_rate": 1.486348163116501e-05, + "loss": 0.3196644186973572, + "step": 5817 + }, + { + "epoch": 1.0750193438117126, + "grad_norm": 0.06903432309627533, + "learning_rate": 1.4861738446996304e-05, + "loss": 0.5272139310836792, + "step": 5818 + }, + { + "epoch": 1.0752041205206084, + "grad_norm": 0.06662653386592865, + "learning_rate": 1.4859995069345618e-05, + "loss": 0.48803701996803284, + "step": 5819 + }, + { + "epoch": 1.0753888972295043, + "grad_norm": 0.0712452158331871, + "learning_rate": 1.4858251498282333e-05, + "loss": 0.46093958616256714, + "step": 5820 + }, + { + "epoch": 1.0755736739384, + "grad_norm": 0.07670149952173233, + "learning_rate": 1.4856507733875837e-05, + "loss": 0.5727660655975342, + "step": 5821 + }, + { + "epoch": 1.075758450647296, + "grad_norm": 0.08891289681196213, + "learning_rate": 1.485476377619553e-05, + "loss": 0.6572068929672241, + "step": 5822 + }, + { + "epoch": 1.0759432273561917, + "grad_norm": 0.06130025535821915, + "learning_rate": 1.4853019625310813e-05, + "loss": 0.4660205543041229, + "step": 5823 + }, + { + "epoch": 1.0761280040650876, + "grad_norm": 0.06196140870451927, + "learning_rate": 1.4851275281291095e-05, + "loss": 0.44341546297073364, + "step": 5824 + }, + { + "epoch": 1.0763127807739834, + "grad_norm": 0.08439138531684875, + "learning_rate": 1.48495307442058e-05, + "loss": 0.5173313021659851, + "step": 5825 + }, + { + "epoch": 1.0764975574828792, + "grad_norm": 0.06659369170665741, + "learning_rate": 1.4847786014124354e-05, + "loss": 0.4271799325942993, + "step": 5826 + }, + { + "epoch": 1.076682334191775, + "grad_norm": 0.06720487028360367, + "learning_rate": 1.4846041091116192e-05, + "loss": 0.47105199098587036, + "step": 5827 + }, + { + "epoch": 1.0768671109006709, + "grad_norm": 0.09249649196863174, + "learning_rate": 1.4844295975250755e-05, + "loss": 0.6819266080856323, + "step": 5828 + }, + { + "epoch": 1.0770518876095667, + "grad_norm": 0.08595030009746552, + "learning_rate": 1.4842550666597495e-05, + "loss": 0.5509486794471741, + "step": 5829 + }, + { + "epoch": 1.0772366643184625, + "grad_norm": 0.06970569491386414, + "learning_rate": 1.484080516522587e-05, + "loss": 0.39800581336021423, + "step": 5830 + }, + { + "epoch": 1.0774214410273586, + "grad_norm": 0.07932216674089432, + "learning_rate": 1.483905947120534e-05, + "loss": 0.540603518486023, + "step": 5831 + }, + { + "epoch": 1.0776062177362544, + "grad_norm": 0.07762263715267181, + "learning_rate": 1.4837313584605386e-05, + "loss": 0.656118631362915, + "step": 5832 + }, + { + "epoch": 1.0777909944451503, + "grad_norm": 0.05933019518852234, + "learning_rate": 1.4835567505495486e-05, + "loss": 0.3998440206050873, + "step": 5833 + }, + { + "epoch": 1.077975771154046, + "grad_norm": 0.06421727687120438, + "learning_rate": 1.4833821233945122e-05, + "loss": 0.3742504119873047, + "step": 5834 + }, + { + "epoch": 1.078160547862942, + "grad_norm": 0.0962388664484024, + "learning_rate": 1.48320747700238e-05, + "loss": 0.8221604228019714, + "step": 5835 + }, + { + "epoch": 1.0783453245718377, + "grad_norm": 0.07165448367595673, + "learning_rate": 1.4830328113801022e-05, + "loss": 0.4744814336299896, + "step": 5836 + }, + { + "epoch": 1.0785301012807336, + "grad_norm": 0.08229894191026688, + "learning_rate": 1.4828581265346295e-05, + "loss": 0.602943480014801, + "step": 5837 + }, + { + "epoch": 1.0787148779896294, + "grad_norm": 0.06584686785936356, + "learning_rate": 1.482683422472914e-05, + "loss": 0.5143553614616394, + "step": 5838 + }, + { + "epoch": 1.0788996546985252, + "grad_norm": 0.0741025060415268, + "learning_rate": 1.4825086992019087e-05, + "loss": 0.45243039727211, + "step": 5839 + }, + { + "epoch": 1.079084431407421, + "grad_norm": 0.08383125066757202, + "learning_rate": 1.4823339567285664e-05, + "loss": 0.651415228843689, + "step": 5840 + }, + { + "epoch": 1.0792692081163169, + "grad_norm": 0.06560477614402771, + "learning_rate": 1.4821591950598415e-05, + "loss": 0.5027257204055786, + "step": 5841 + }, + { + "epoch": 1.0794539848252127, + "grad_norm": 0.06961048394441605, + "learning_rate": 1.4819844142026895e-05, + "loss": 0.4787517488002777, + "step": 5842 + }, + { + "epoch": 1.0796387615341085, + "grad_norm": 0.0803142711520195, + "learning_rate": 1.4818096141640658e-05, + "loss": 0.6678456664085388, + "step": 5843 + }, + { + "epoch": 1.0798235382430044, + "grad_norm": 0.06719299405813217, + "learning_rate": 1.4816347949509264e-05, + "loss": 0.5077908039093018, + "step": 5844 + }, + { + "epoch": 1.0800083149519004, + "grad_norm": 0.07734595239162445, + "learning_rate": 1.4814599565702295e-05, + "loss": 0.6761911511421204, + "step": 5845 + }, + { + "epoch": 1.0801930916607962, + "grad_norm": 0.07329533249139786, + "learning_rate": 1.4812850990289324e-05, + "loss": 0.5296775698661804, + "step": 5846 + }, + { + "epoch": 1.080377868369692, + "grad_norm": 0.06282027065753937, + "learning_rate": 1.4811102223339942e-05, + "loss": 0.5500996708869934, + "step": 5847 + }, + { + "epoch": 1.080562645078588, + "grad_norm": 0.06660337001085281, + "learning_rate": 1.4809353264923741e-05, + "loss": 0.3677659332752228, + "step": 5848 + }, + { + "epoch": 1.0807474217874837, + "grad_norm": 0.08197630196809769, + "learning_rate": 1.480760411511033e-05, + "loss": 0.5850522518157959, + "step": 5849 + }, + { + "epoch": 1.0809321984963796, + "grad_norm": 0.08148553967475891, + "learning_rate": 1.4805854773969314e-05, + "loss": 0.556190013885498, + "step": 5850 + }, + { + "epoch": 1.0811169752052754, + "grad_norm": 0.089113749563694, + "learning_rate": 1.4804105241570312e-05, + "loss": 0.6048898696899414, + "step": 5851 + }, + { + "epoch": 1.0813017519141712, + "grad_norm": 0.06875333935022354, + "learning_rate": 1.4802355517982956e-05, + "loss": 0.5353031754493713, + "step": 5852 + }, + { + "epoch": 1.081486528623067, + "grad_norm": 0.06257513910531998, + "learning_rate": 1.4800605603276873e-05, + "loss": 0.5051626563072205, + "step": 5853 + }, + { + "epoch": 1.0816713053319629, + "grad_norm": 0.07647594064474106, + "learning_rate": 1.4798855497521705e-05, + "loss": 0.4592037796974182, + "step": 5854 + }, + { + "epoch": 1.0818560820408587, + "grad_norm": 0.07302510738372803, + "learning_rate": 1.4797105200787102e-05, + "loss": 0.4558219611644745, + "step": 5855 + }, + { + "epoch": 1.0820408587497545, + "grad_norm": 0.07816193252801895, + "learning_rate": 1.479535471314272e-05, + "loss": 0.47629034519195557, + "step": 5856 + }, + { + "epoch": 1.0822256354586504, + "grad_norm": 0.06925367563962936, + "learning_rate": 1.4793604034658224e-05, + "loss": 0.4966387450695038, + "step": 5857 + }, + { + "epoch": 1.0824104121675462, + "grad_norm": 0.0733964666724205, + "learning_rate": 1.4791853165403284e-05, + "loss": 0.4893820881843567, + "step": 5858 + }, + { + "epoch": 1.082595188876442, + "grad_norm": 0.06309165805578232, + "learning_rate": 1.4790102105447582e-05, + "loss": 0.4359748661518097, + "step": 5859 + }, + { + "epoch": 1.082779965585338, + "grad_norm": 0.08443053811788559, + "learning_rate": 1.4788350854860803e-05, + "loss": 0.5850485563278198, + "step": 5860 + }, + { + "epoch": 1.082964742294234, + "grad_norm": 0.067757748067379, + "learning_rate": 1.4786599413712634e-05, + "loss": 0.4967274069786072, + "step": 5861 + }, + { + "epoch": 1.0831495190031297, + "grad_norm": 0.06535761803388596, + "learning_rate": 1.478484778207279e-05, + "loss": 0.546383261680603, + "step": 5862 + }, + { + "epoch": 1.0833342957120256, + "grad_norm": 0.07297582179307938, + "learning_rate": 1.4783095960010973e-05, + "loss": 0.6665569543838501, + "step": 5863 + }, + { + "epoch": 1.0835190724209214, + "grad_norm": 0.08054815232753754, + "learning_rate": 1.4781343947596903e-05, + "loss": 0.5554509162902832, + "step": 5864 + }, + { + "epoch": 1.0837038491298172, + "grad_norm": 0.06834076344966888, + "learning_rate": 1.4779591744900298e-05, + "loss": 0.47249260544776917, + "step": 5865 + }, + { + "epoch": 1.083888625838713, + "grad_norm": 0.07324875891208649, + "learning_rate": 1.4777839351990898e-05, + "loss": 0.546453058719635, + "step": 5866 + }, + { + "epoch": 1.0840734025476089, + "grad_norm": 0.07995382696390152, + "learning_rate": 1.4776086768938438e-05, + "loss": 0.5270389318466187, + "step": 5867 + }, + { + "epoch": 1.0842581792565047, + "grad_norm": 0.054281000047922134, + "learning_rate": 1.4774333995812669e-05, + "loss": 0.3481077551841736, + "step": 5868 + }, + { + "epoch": 1.0844429559654005, + "grad_norm": 0.0845366045832634, + "learning_rate": 1.4772581032683343e-05, + "loss": 0.6981854438781738, + "step": 5869 + }, + { + "epoch": 1.0846277326742964, + "grad_norm": 0.07454873621463776, + "learning_rate": 1.4770827879620227e-05, + "loss": 0.5273903608322144, + "step": 5870 + }, + { + "epoch": 1.0848125093831922, + "grad_norm": 0.0768999382853508, + "learning_rate": 1.4769074536693082e-05, + "loss": 0.500775933265686, + "step": 5871 + }, + { + "epoch": 1.084997286092088, + "grad_norm": 0.06956993043422699, + "learning_rate": 1.4767321003971693e-05, + "loss": 0.3707350492477417, + "step": 5872 + }, + { + "epoch": 1.0851820628009838, + "grad_norm": 0.074930340051651, + "learning_rate": 1.4765567281525843e-05, + "loss": 0.5382987856864929, + "step": 5873 + }, + { + "epoch": 1.0853668395098799, + "grad_norm": 0.07988627254962921, + "learning_rate": 1.4763813369425325e-05, + "loss": 0.6682331562042236, + "step": 5874 + }, + { + "epoch": 1.0855516162187757, + "grad_norm": 0.07061360031366348, + "learning_rate": 1.4762059267739939e-05, + "loss": 0.40944498777389526, + "step": 5875 + }, + { + "epoch": 1.0857363929276715, + "grad_norm": 0.07396155595779419, + "learning_rate": 1.4760304976539492e-05, + "loss": 0.47943374514579773, + "step": 5876 + }, + { + "epoch": 1.0859211696365674, + "grad_norm": 0.08270414173603058, + "learning_rate": 1.47585504958938e-05, + "loss": 0.4946344494819641, + "step": 5877 + }, + { + "epoch": 1.0861059463454632, + "grad_norm": 0.08222021907567978, + "learning_rate": 1.4756795825872687e-05, + "loss": 0.5393399596214294, + "step": 5878 + }, + { + "epoch": 1.086290723054359, + "grad_norm": 0.07117902487516403, + "learning_rate": 1.4755040966545982e-05, + "loss": 0.5953274369239807, + "step": 5879 + }, + { + "epoch": 1.0864754997632549, + "grad_norm": 0.06577019393444061, + "learning_rate": 1.4753285917983522e-05, + "loss": 0.39420297741889954, + "step": 5880 + }, + { + "epoch": 1.0866602764721507, + "grad_norm": 0.06357182562351227, + "learning_rate": 1.4751530680255155e-05, + "loss": 0.38492023944854736, + "step": 5881 + }, + { + "epoch": 1.0868450531810465, + "grad_norm": 0.06568669527769089, + "learning_rate": 1.4749775253430732e-05, + "loss": 0.47780489921569824, + "step": 5882 + }, + { + "epoch": 1.0870298298899423, + "grad_norm": 0.05697185918688774, + "learning_rate": 1.4748019637580116e-05, + "loss": 0.4504018723964691, + "step": 5883 + }, + { + "epoch": 1.0872146065988382, + "grad_norm": 0.06915974617004395, + "learning_rate": 1.4746263832773168e-05, + "loss": 0.5515559315681458, + "step": 5884 + }, + { + "epoch": 1.087399383307734, + "grad_norm": 0.06538233160972595, + "learning_rate": 1.4744507839079772e-05, + "loss": 0.572848379611969, + "step": 5885 + }, + { + "epoch": 1.0875841600166298, + "grad_norm": 0.07176683843135834, + "learning_rate": 1.4742751656569806e-05, + "loss": 0.4935082495212555, + "step": 5886 + }, + { + "epoch": 1.0877689367255257, + "grad_norm": 0.07255774736404419, + "learning_rate": 1.4740995285313165e-05, + "loss": 0.5060724020004272, + "step": 5887 + }, + { + "epoch": 1.0879537134344215, + "grad_norm": 0.058834806084632874, + "learning_rate": 1.4739238725379743e-05, + "loss": 0.5066661238670349, + "step": 5888 + }, + { + "epoch": 1.0881384901433175, + "grad_norm": 0.07082164287567139, + "learning_rate": 1.4737481976839448e-05, + "loss": 0.4143075942993164, + "step": 5889 + }, + { + "epoch": 1.0883232668522134, + "grad_norm": 0.09433870762586594, + "learning_rate": 1.473572503976219e-05, + "loss": 0.7209493517875671, + "step": 5890 + }, + { + "epoch": 1.0885080435611092, + "grad_norm": 0.06608996540307999, + "learning_rate": 1.4733967914217893e-05, + "loss": 0.502673864364624, + "step": 5891 + }, + { + "epoch": 1.088692820270005, + "grad_norm": 0.07874344289302826, + "learning_rate": 1.4732210600276481e-05, + "loss": 0.5536506175994873, + "step": 5892 + }, + { + "epoch": 1.0888775969789009, + "grad_norm": 0.07617444545030594, + "learning_rate": 1.4730453098007896e-05, + "loss": 0.5813974738121033, + "step": 5893 + }, + { + "epoch": 1.0890623736877967, + "grad_norm": 0.05752871558070183, + "learning_rate": 1.4728695407482074e-05, + "loss": 0.33988383412361145, + "step": 5894 + }, + { + "epoch": 1.0892471503966925, + "grad_norm": 0.08659808337688446, + "learning_rate": 1.4726937528768971e-05, + "loss": 0.6173219680786133, + "step": 5895 + }, + { + "epoch": 1.0894319271055883, + "grad_norm": 0.06591004878282547, + "learning_rate": 1.4725179461938544e-05, + "loss": 0.515119731426239, + "step": 5896 + }, + { + "epoch": 1.0896167038144842, + "grad_norm": 0.08528603613376617, + "learning_rate": 1.4723421207060756e-05, + "loss": 0.6740525960922241, + "step": 5897 + }, + { + "epoch": 1.08980148052338, + "grad_norm": 0.0652061402797699, + "learning_rate": 1.4721662764205583e-05, + "loss": 0.483233243227005, + "step": 5898 + }, + { + "epoch": 1.0899862572322758, + "grad_norm": 0.08202153444290161, + "learning_rate": 1.4719904133443e-05, + "loss": 0.5837781429290771, + "step": 5899 + }, + { + "epoch": 1.0901710339411717, + "grad_norm": 0.09289800375699997, + "learning_rate": 1.4718145314843004e-05, + "loss": 0.6462467908859253, + "step": 5900 + }, + { + "epoch": 1.0903558106500675, + "grad_norm": 0.07023860514163971, + "learning_rate": 1.4716386308475583e-05, + "loss": 0.5342769622802734, + "step": 5901 + }, + { + "epoch": 1.0905405873589633, + "grad_norm": 0.07424052804708481, + "learning_rate": 1.4714627114410744e-05, + "loss": 0.6305635571479797, + "step": 5902 + }, + { + "epoch": 1.0907253640678594, + "grad_norm": 0.09783070534467697, + "learning_rate": 1.4712867732718496e-05, + "loss": 0.7436492443084717, + "step": 5903 + }, + { + "epoch": 1.0909101407767552, + "grad_norm": 0.09632259607315063, + "learning_rate": 1.4711108163468857e-05, + "loss": 0.7273596525192261, + "step": 5904 + }, + { + "epoch": 1.091094917485651, + "grad_norm": 0.06209570914506912, + "learning_rate": 1.4709348406731851e-05, + "loss": 0.4110969305038452, + "step": 5905 + }, + { + "epoch": 1.0912796941945468, + "grad_norm": 0.06332927197217941, + "learning_rate": 1.4707588462577513e-05, + "loss": 0.42797258496284485, + "step": 5906 + }, + { + "epoch": 1.0914644709034427, + "grad_norm": 0.09194277226924896, + "learning_rate": 1.4705828331075883e-05, + "loss": 0.7485794425010681, + "step": 5907 + }, + { + "epoch": 1.0916492476123385, + "grad_norm": 0.07481253892183304, + "learning_rate": 1.4704068012297009e-05, + "loss": 0.4699377417564392, + "step": 5908 + }, + { + "epoch": 1.0918340243212343, + "grad_norm": 0.0622544102370739, + "learning_rate": 1.4702307506310943e-05, + "loss": 0.45635709166526794, + "step": 5909 + }, + { + "epoch": 1.0920188010301302, + "grad_norm": 0.07138415426015854, + "learning_rate": 1.4700546813187749e-05, + "loss": 0.5593393445014954, + "step": 5910 + }, + { + "epoch": 1.092203577739026, + "grad_norm": 0.0714128315448761, + "learning_rate": 1.4698785932997499e-05, + "loss": 0.5067830681800842, + "step": 5911 + }, + { + "epoch": 1.0923883544479218, + "grad_norm": 0.07260000705718994, + "learning_rate": 1.4697024865810269e-05, + "loss": 0.5159979462623596, + "step": 5912 + }, + { + "epoch": 1.0925731311568176, + "grad_norm": 0.07511389255523682, + "learning_rate": 1.4695263611696146e-05, + "loss": 0.5461058020591736, + "step": 5913 + }, + { + "epoch": 1.0927579078657135, + "grad_norm": 0.06673587113618851, + "learning_rate": 1.469350217072522e-05, + "loss": 0.38038143515586853, + "step": 5914 + }, + { + "epoch": 1.0929426845746093, + "grad_norm": 0.08167888224124908, + "learning_rate": 1.4691740542967594e-05, + "loss": 0.5731920599937439, + "step": 5915 + }, + { + "epoch": 1.0931274612835051, + "grad_norm": 0.07429999113082886, + "learning_rate": 1.4689978728493368e-05, + "loss": 0.5720574259757996, + "step": 5916 + }, + { + "epoch": 1.093312237992401, + "grad_norm": 0.0745207667350769, + "learning_rate": 1.4688216727372664e-05, + "loss": 0.47233352065086365, + "step": 5917 + }, + { + "epoch": 1.0934970147012968, + "grad_norm": 0.08932074159383774, + "learning_rate": 1.4686454539675606e-05, + "loss": 0.6862395405769348, + "step": 5918 + }, + { + "epoch": 1.0936817914101928, + "grad_norm": 0.07582224160432816, + "learning_rate": 1.4684692165472316e-05, + "loss": 0.6259335875511169, + "step": 5919 + }, + { + "epoch": 1.0938665681190887, + "grad_norm": 0.07972133904695511, + "learning_rate": 1.468292960483293e-05, + "loss": 0.4001395106315613, + "step": 5920 + }, + { + "epoch": 1.0940513448279845, + "grad_norm": 0.06810474395751953, + "learning_rate": 1.4681166857827603e-05, + "loss": 0.567063570022583, + "step": 5921 + }, + { + "epoch": 1.0942361215368803, + "grad_norm": 0.06280171126127243, + "learning_rate": 1.4679403924526479e-05, + "loss": 0.41762790083885193, + "step": 5922 + }, + { + "epoch": 1.0944208982457762, + "grad_norm": 0.06112830713391304, + "learning_rate": 1.4677640804999716e-05, + "loss": 0.462650328874588, + "step": 5923 + }, + { + "epoch": 1.094605674954672, + "grad_norm": 0.0655970349907875, + "learning_rate": 1.4675877499317486e-05, + "loss": 0.39639896154403687, + "step": 5924 + }, + { + "epoch": 1.0947904516635678, + "grad_norm": 0.08832792192697525, + "learning_rate": 1.467411400754996e-05, + "loss": 0.6320725679397583, + "step": 5925 + }, + { + "epoch": 1.0949752283724636, + "grad_norm": 0.06304946541786194, + "learning_rate": 1.467235032976732e-05, + "loss": 0.44484469294548035, + "step": 5926 + }, + { + "epoch": 1.0951600050813595, + "grad_norm": 0.07686931639909744, + "learning_rate": 1.4670586466039753e-05, + "loss": 0.5905748009681702, + "step": 5927 + }, + { + "epoch": 1.0953447817902553, + "grad_norm": 0.08200360089540482, + "learning_rate": 1.4668822416437461e-05, + "loss": 0.5519489645957947, + "step": 5928 + }, + { + "epoch": 1.0955295584991511, + "grad_norm": 0.0726717934012413, + "learning_rate": 1.4667058181030642e-05, + "loss": 0.594283938407898, + "step": 5929 + }, + { + "epoch": 1.095714335208047, + "grad_norm": 0.07586503028869629, + "learning_rate": 1.4665293759889506e-05, + "loss": 0.5935530662536621, + "step": 5930 + }, + { + "epoch": 1.0958991119169428, + "grad_norm": 0.06125348433852196, + "learning_rate": 1.4663529153084275e-05, + "loss": 0.4546845853328705, + "step": 5931 + }, + { + "epoch": 1.0960838886258386, + "grad_norm": 0.060897890478372574, + "learning_rate": 1.4661764360685178e-05, + "loss": 0.4350041151046753, + "step": 5932 + }, + { + "epoch": 1.0962686653347347, + "grad_norm": 0.059175021946430206, + "learning_rate": 1.465999938276244e-05, + "loss": 0.4221407175064087, + "step": 5933 + }, + { + "epoch": 1.0964534420436305, + "grad_norm": 0.06250432133674622, + "learning_rate": 1.4658234219386307e-05, + "loss": 0.46823054552078247, + "step": 5934 + }, + { + "epoch": 1.0966382187525263, + "grad_norm": 0.06827010214328766, + "learning_rate": 1.4656468870627028e-05, + "loss": 0.4594465494155884, + "step": 5935 + }, + { + "epoch": 1.0968229954614221, + "grad_norm": 0.08258457481861115, + "learning_rate": 1.4654703336554852e-05, + "loss": 0.6088943481445312, + "step": 5936 + }, + { + "epoch": 1.097007772170318, + "grad_norm": 0.07347897440195084, + "learning_rate": 1.4652937617240049e-05, + "loss": 0.529721736907959, + "step": 5937 + }, + { + "epoch": 1.0971925488792138, + "grad_norm": 0.06577084958553314, + "learning_rate": 1.4651171712752886e-05, + "loss": 0.4842659533023834, + "step": 5938 + }, + { + "epoch": 1.0973773255881096, + "grad_norm": 0.06812813133001328, + "learning_rate": 1.464940562316364e-05, + "loss": 0.5418099164962769, + "step": 5939 + }, + { + "epoch": 1.0975621022970055, + "grad_norm": 0.06750774383544922, + "learning_rate": 1.4647639348542593e-05, + "loss": 0.43857085704803467, + "step": 5940 + }, + { + "epoch": 1.0977468790059013, + "grad_norm": 0.0675678625702858, + "learning_rate": 1.4645872888960045e-05, + "loss": 0.39154133200645447, + "step": 5941 + }, + { + "epoch": 1.0979316557147971, + "grad_norm": 0.0663144662976265, + "learning_rate": 1.4644106244486291e-05, + "loss": 0.5225595831871033, + "step": 5942 + }, + { + "epoch": 1.098116432423693, + "grad_norm": 0.062324948608875275, + "learning_rate": 1.4642339415191636e-05, + "loss": 0.496875137090683, + "step": 5943 + }, + { + "epoch": 1.0983012091325888, + "grad_norm": 0.07069569081068039, + "learning_rate": 1.4640572401146396e-05, + "loss": 0.513957679271698, + "step": 5944 + }, + { + "epoch": 1.0984859858414846, + "grad_norm": 0.08459258824586868, + "learning_rate": 1.4638805202420896e-05, + "loss": 0.5632613897323608, + "step": 5945 + }, + { + "epoch": 1.0986707625503804, + "grad_norm": 0.0628662258386612, + "learning_rate": 1.4637037819085458e-05, + "loss": 0.48443713784217834, + "step": 5946 + }, + { + "epoch": 1.0988555392592763, + "grad_norm": 0.06784513592720032, + "learning_rate": 1.4635270251210423e-05, + "loss": 0.47913438081741333, + "step": 5947 + }, + { + "epoch": 1.0990403159681723, + "grad_norm": 0.0707409605383873, + "learning_rate": 1.4633502498866136e-05, + "loss": 0.4980694353580475, + "step": 5948 + }, + { + "epoch": 1.0992250926770681, + "grad_norm": 0.05797749012708664, + "learning_rate": 1.4631734562122945e-05, + "loss": 0.36766472458839417, + "step": 5949 + }, + { + "epoch": 1.099409869385964, + "grad_norm": 0.095884308218956, + "learning_rate": 1.4629966441051208e-05, + "loss": 0.6991642713546753, + "step": 5950 + }, + { + "epoch": 1.0995946460948598, + "grad_norm": 0.06615202128887177, + "learning_rate": 1.4628198135721295e-05, + "loss": 0.5221999883651733, + "step": 5951 + }, + { + "epoch": 1.0997794228037556, + "grad_norm": 0.05633965879678726, + "learning_rate": 1.4626429646203575e-05, + "loss": 0.4555867314338684, + "step": 5952 + }, + { + "epoch": 1.0999641995126515, + "grad_norm": 0.07581228017807007, + "learning_rate": 1.4624660972568427e-05, + "loss": 0.5978538990020752, + "step": 5953 + }, + { + "epoch": 1.1001489762215473, + "grad_norm": 0.08281077444553375, + "learning_rate": 1.4622892114886243e-05, + "loss": 0.618635892868042, + "step": 5954 + }, + { + "epoch": 1.100333752930443, + "grad_norm": 0.0922408252954483, + "learning_rate": 1.4621123073227414e-05, + "loss": 0.5132061243057251, + "step": 5955 + }, + { + "epoch": 1.100518529639339, + "grad_norm": 0.0687292069196701, + "learning_rate": 1.4619353847662346e-05, + "loss": 0.43602538108825684, + "step": 5956 + }, + { + "epoch": 1.1007033063482348, + "grad_norm": 0.061973683536052704, + "learning_rate": 1.4617584438261445e-05, + "loss": 0.3978612720966339, + "step": 5957 + }, + { + "epoch": 1.1008880830571306, + "grad_norm": 0.07644131034612656, + "learning_rate": 1.4615814845095134e-05, + "loss": 0.558164656162262, + "step": 5958 + }, + { + "epoch": 1.1010728597660264, + "grad_norm": 0.0646175965666771, + "learning_rate": 1.461404506823383e-05, + "loss": 0.5489871501922607, + "step": 5959 + }, + { + "epoch": 1.1012576364749223, + "grad_norm": 0.08470311760902405, + "learning_rate": 1.4612275107747968e-05, + "loss": 0.5196349024772644, + "step": 5960 + }, + { + "epoch": 1.101442413183818, + "grad_norm": 0.07912751287221909, + "learning_rate": 1.4610504963707988e-05, + "loss": 0.659913957118988, + "step": 5961 + }, + { + "epoch": 1.1016271898927141, + "grad_norm": 0.07972804456949234, + "learning_rate": 1.4608734636184333e-05, + "loss": 0.5457559823989868, + "step": 5962 + }, + { + "epoch": 1.10181196660161, + "grad_norm": 0.0745624229311943, + "learning_rate": 1.4606964125247461e-05, + "loss": 0.46467500925064087, + "step": 5963 + }, + { + "epoch": 1.1019967433105058, + "grad_norm": 0.07970108091831207, + "learning_rate": 1.4605193430967827e-05, + "loss": 0.7072663903236389, + "step": 5964 + }, + { + "epoch": 1.1021815200194016, + "grad_norm": 0.10347652435302734, + "learning_rate": 1.4603422553415905e-05, + "loss": 0.759666919708252, + "step": 5965 + }, + { + "epoch": 1.1023662967282974, + "grad_norm": 0.06851720064878464, + "learning_rate": 1.4601651492662166e-05, + "loss": 0.420702189207077, + "step": 5966 + }, + { + "epoch": 1.1025510734371933, + "grad_norm": 0.051265593618154526, + "learning_rate": 1.4599880248777094e-05, + "loss": 0.3847259283065796, + "step": 5967 + }, + { + "epoch": 1.102735850146089, + "grad_norm": 0.08739349246025085, + "learning_rate": 1.4598108821831181e-05, + "loss": 0.5255281329154968, + "step": 5968 + }, + { + "epoch": 1.102920626854985, + "grad_norm": 0.08335155993700027, + "learning_rate": 1.4596337211894922e-05, + "loss": 0.529498279094696, + "step": 5969 + }, + { + "epoch": 1.1031054035638808, + "grad_norm": 0.08187707513570786, + "learning_rate": 1.4594565419038822e-05, + "loss": 0.5741780996322632, + "step": 5970 + }, + { + "epoch": 1.1032901802727766, + "grad_norm": 0.08028721064329147, + "learning_rate": 1.459279344333339e-05, + "loss": 0.5540239810943604, + "step": 5971 + }, + { + "epoch": 1.1034749569816724, + "grad_norm": 0.07067148387432098, + "learning_rate": 1.4591021284849152e-05, + "loss": 0.43878358602523804, + "step": 5972 + }, + { + "epoch": 1.1036597336905682, + "grad_norm": 0.06293197721242905, + "learning_rate": 1.4589248943656629e-05, + "loss": 0.3931485414505005, + "step": 5973 + }, + { + "epoch": 1.103844510399464, + "grad_norm": 0.07255510240793228, + "learning_rate": 1.4587476419826354e-05, + "loss": 0.5277048349380493, + "step": 5974 + }, + { + "epoch": 1.10402928710836, + "grad_norm": 0.06751251220703125, + "learning_rate": 1.4585703713428873e-05, + "loss": 0.5238932371139526, + "step": 5975 + }, + { + "epoch": 1.1042140638172557, + "grad_norm": 0.06556138396263123, + "learning_rate": 1.4583930824534729e-05, + "loss": 0.41263192892074585, + "step": 5976 + }, + { + "epoch": 1.1043988405261518, + "grad_norm": 0.07805372029542923, + "learning_rate": 1.4582157753214482e-05, + "loss": 0.6438567638397217, + "step": 5977 + }, + { + "epoch": 1.1045836172350476, + "grad_norm": 0.07314425706863403, + "learning_rate": 1.4580384499538688e-05, + "loss": 0.49132171273231506, + "step": 5978 + }, + { + "epoch": 1.1047683939439434, + "grad_norm": 0.08974375575780869, + "learning_rate": 1.4578611063577925e-05, + "loss": 0.7477900981903076, + "step": 5979 + }, + { + "epoch": 1.1049531706528393, + "grad_norm": 0.08745142817497253, + "learning_rate": 1.4576837445402765e-05, + "loss": 0.5474734902381897, + "step": 5980 + }, + { + "epoch": 1.105137947361735, + "grad_norm": 0.08209695667028427, + "learning_rate": 1.4575063645083792e-05, + "loss": 0.5888841152191162, + "step": 5981 + }, + { + "epoch": 1.105322724070631, + "grad_norm": 0.0720118060708046, + "learning_rate": 1.4573289662691601e-05, + "loss": 0.5148651599884033, + "step": 5982 + }, + { + "epoch": 1.1055075007795268, + "grad_norm": 0.07805000245571136, + "learning_rate": 1.457151549829679e-05, + "loss": 0.6746365427970886, + "step": 5983 + }, + { + "epoch": 1.1056922774884226, + "grad_norm": 0.08017796277999878, + "learning_rate": 1.4569741151969963e-05, + "loss": 0.5076448321342468, + "step": 5984 + }, + { + "epoch": 1.1058770541973184, + "grad_norm": 0.08330678194761276, + "learning_rate": 1.4567966623781736e-05, + "loss": 0.5435523986816406, + "step": 5985 + }, + { + "epoch": 1.1060618309062142, + "grad_norm": 0.07538160681724548, + "learning_rate": 1.4566191913802728e-05, + "loss": 0.5347604155540466, + "step": 5986 + }, + { + "epoch": 1.10624660761511, + "grad_norm": 0.06279928982257843, + "learning_rate": 1.456441702210357e-05, + "loss": 0.38746389746665955, + "step": 5987 + }, + { + "epoch": 1.106431384324006, + "grad_norm": 0.09230431169271469, + "learning_rate": 1.4562641948754891e-05, + "loss": 0.6264443397521973, + "step": 5988 + }, + { + "epoch": 1.1066161610329017, + "grad_norm": 0.065264031291008, + "learning_rate": 1.456086669382734e-05, + "loss": 0.5556307435035706, + "step": 5989 + }, + { + "epoch": 1.1068009377417976, + "grad_norm": 0.07666502147912979, + "learning_rate": 1.4559091257391562e-05, + "loss": 0.4763059914112091, + "step": 5990 + }, + { + "epoch": 1.1069857144506936, + "grad_norm": 0.07860376685857773, + "learning_rate": 1.4557315639518216e-05, + "loss": 0.5905876159667969, + "step": 5991 + }, + { + "epoch": 1.1071704911595894, + "grad_norm": 0.07574237138032913, + "learning_rate": 1.4555539840277968e-05, + "loss": 0.5085563063621521, + "step": 5992 + }, + { + "epoch": 1.1073552678684853, + "grad_norm": 0.06739070266485214, + "learning_rate": 1.4553763859741484e-05, + "loss": 0.5284940004348755, + "step": 5993 + }, + { + "epoch": 1.107540044577381, + "grad_norm": 0.06255804002285004, + "learning_rate": 1.4551987697979447e-05, + "loss": 0.3962157964706421, + "step": 5994 + }, + { + "epoch": 1.107724821286277, + "grad_norm": 0.07420317083597183, + "learning_rate": 1.4550211355062537e-05, + "loss": 0.6526479125022888, + "step": 5995 + }, + { + "epoch": 1.1079095979951727, + "grad_norm": 0.08326204866170883, + "learning_rate": 1.4548434831061456e-05, + "loss": 0.5966176390647888, + "step": 5996 + }, + { + "epoch": 1.1080943747040686, + "grad_norm": 0.06730221211910248, + "learning_rate": 1.4546658126046898e-05, + "loss": 0.5358225703239441, + "step": 5997 + }, + { + "epoch": 1.1082791514129644, + "grad_norm": 0.06927043199539185, + "learning_rate": 1.4544881240089568e-05, + "loss": 0.5677108764648438, + "step": 5998 + }, + { + "epoch": 1.1084639281218602, + "grad_norm": 0.05860039219260216, + "learning_rate": 1.4543104173260187e-05, + "loss": 0.4075140058994293, + "step": 5999 + }, + { + "epoch": 1.108648704830756, + "grad_norm": 0.08386365324258804, + "learning_rate": 1.454132692562947e-05, + "loss": 0.5389184951782227, + "step": 6000 + }, + { + "epoch": 1.108648704830756, + "eval_loss": 0.6118724346160889, + "eval_runtime": 159.1496, + "eval_samples_per_second": 114.54, + "eval_steps_per_second": 14.32, + "step": 6000 + }, + { + "epoch": 1.1088334815396519, + "grad_norm": 0.08243799954652786, + "learning_rate": 1.4539549497268155e-05, + "loss": 0.534057080745697, + "step": 6001 + }, + { + "epoch": 1.1090182582485477, + "grad_norm": 0.07323864102363586, + "learning_rate": 1.4537771888246967e-05, + "loss": 0.526421070098877, + "step": 6002 + }, + { + "epoch": 1.1092030349574435, + "grad_norm": 0.07760884612798691, + "learning_rate": 1.4535994098636656e-05, + "loss": 0.5368732810020447, + "step": 6003 + }, + { + "epoch": 1.1093878116663394, + "grad_norm": 0.08383920788764954, + "learning_rate": 1.4534216128507974e-05, + "loss": 0.7088597416877747, + "step": 6004 + }, + { + "epoch": 1.1095725883752352, + "grad_norm": 0.0841090977191925, + "learning_rate": 1.4532437977931672e-05, + "loss": 0.6554641127586365, + "step": 6005 + }, + { + "epoch": 1.109757365084131, + "grad_norm": 0.09048908203840256, + "learning_rate": 1.453065964697852e-05, + "loss": 0.6777673363685608, + "step": 6006 + }, + { + "epoch": 1.109942141793027, + "grad_norm": 0.07771904021501541, + "learning_rate": 1.452888113571929e-05, + "loss": 0.5503280758857727, + "step": 6007 + }, + { + "epoch": 1.110126918501923, + "grad_norm": 0.06885528564453125, + "learning_rate": 1.452710244422476e-05, + "loss": 0.43942975997924805, + "step": 6008 + }, + { + "epoch": 1.1103116952108187, + "grad_norm": 0.07003390043973923, + "learning_rate": 1.4525323572565713e-05, + "loss": 0.48695823550224304, + "step": 6009 + }, + { + "epoch": 1.1104964719197146, + "grad_norm": 0.08813440054655075, + "learning_rate": 1.4523544520812949e-05, + "loss": 0.7120080590248108, + "step": 6010 + }, + { + "epoch": 1.1106812486286104, + "grad_norm": 0.0674251914024353, + "learning_rate": 1.4521765289037264e-05, + "loss": 0.48009634017944336, + "step": 6011 + }, + { + "epoch": 1.1108660253375062, + "grad_norm": 0.06363063305616379, + "learning_rate": 1.4519985877309468e-05, + "loss": 0.4351237714290619, + "step": 6012 + }, + { + "epoch": 1.111050802046402, + "grad_norm": 0.06190628930926323, + "learning_rate": 1.4518206285700373e-05, + "loss": 0.43532848358154297, + "step": 6013 + }, + { + "epoch": 1.1112355787552979, + "grad_norm": 0.06464696675539017, + "learning_rate": 1.4516426514280806e-05, + "loss": 0.3579815924167633, + "step": 6014 + }, + { + "epoch": 1.1114203554641937, + "grad_norm": 0.06103749945759773, + "learning_rate": 1.4514646563121592e-05, + "loss": 0.37997567653656006, + "step": 6015 + }, + { + "epoch": 1.1116051321730895, + "grad_norm": 0.07939308881759644, + "learning_rate": 1.4512866432293571e-05, + "loss": 0.5923547148704529, + "step": 6016 + }, + { + "epoch": 1.1117899088819854, + "grad_norm": 0.07335177063941956, + "learning_rate": 1.4511086121867584e-05, + "loss": 0.5131193399429321, + "step": 6017 + }, + { + "epoch": 1.1119746855908812, + "grad_norm": 0.0859297588467598, + "learning_rate": 1.4509305631914485e-05, + "loss": 0.6909281611442566, + "step": 6018 + }, + { + "epoch": 1.112159462299777, + "grad_norm": 0.08923984318971634, + "learning_rate": 1.4507524962505129e-05, + "loss": 0.6461963653564453, + "step": 6019 + }, + { + "epoch": 1.112344239008673, + "grad_norm": 0.044790174812078476, + "learning_rate": 1.4505744113710378e-05, + "loss": 0.25615447759628296, + "step": 6020 + }, + { + "epoch": 1.112529015717569, + "grad_norm": 0.06457506865262985, + "learning_rate": 1.4503963085601112e-05, + "loss": 0.368161141872406, + "step": 6021 + }, + { + "epoch": 1.1127137924264647, + "grad_norm": 0.07680926471948624, + "learning_rate": 1.4502181878248203e-05, + "loss": 0.48869234323501587, + "step": 6022 + }, + { + "epoch": 1.1128985691353606, + "grad_norm": 0.05992692708969116, + "learning_rate": 1.4500400491722542e-05, + "loss": 0.3915933072566986, + "step": 6023 + }, + { + "epoch": 1.1130833458442564, + "grad_norm": 0.08236189186573029, + "learning_rate": 1.4498618926095023e-05, + "loss": 0.7077838182449341, + "step": 6024 + }, + { + "epoch": 1.1132681225531522, + "grad_norm": 0.07403172552585602, + "learning_rate": 1.4496837181436545e-05, + "loss": 0.5123920440673828, + "step": 6025 + }, + { + "epoch": 1.113452899262048, + "grad_norm": 0.08864319324493408, + "learning_rate": 1.4495055257818011e-05, + "loss": 0.9052874445915222, + "step": 6026 + }, + { + "epoch": 1.1136376759709439, + "grad_norm": 0.0638665109872818, + "learning_rate": 1.4493273155310349e-05, + "loss": 0.5355097055435181, + "step": 6027 + }, + { + "epoch": 1.1138224526798397, + "grad_norm": 0.07314230501651764, + "learning_rate": 1.4491490873984468e-05, + "loss": 0.5114896893501282, + "step": 6028 + }, + { + "epoch": 1.1140072293887355, + "grad_norm": 0.06396160274744034, + "learning_rate": 1.4489708413911303e-05, + "loss": 0.5015410780906677, + "step": 6029 + }, + { + "epoch": 1.1141920060976314, + "grad_norm": 0.07526998221874237, + "learning_rate": 1.4487925775161789e-05, + "loss": 0.6208897829055786, + "step": 6030 + }, + { + "epoch": 1.1143767828065272, + "grad_norm": 0.06430046260356903, + "learning_rate": 1.4486142957806873e-05, + "loss": 0.5269572734832764, + "step": 6031 + }, + { + "epoch": 1.114561559515423, + "grad_norm": 0.06466152518987656, + "learning_rate": 1.4484359961917497e-05, + "loss": 0.4159741997718811, + "step": 6032 + }, + { + "epoch": 1.1147463362243188, + "grad_norm": 0.06660066545009613, + "learning_rate": 1.4482576787564628e-05, + "loss": 0.5687733292579651, + "step": 6033 + }, + { + "epoch": 1.1149311129332147, + "grad_norm": 0.07294777780771255, + "learning_rate": 1.448079343481923e-05, + "loss": 0.5561234951019287, + "step": 6034 + }, + { + "epoch": 1.1151158896421105, + "grad_norm": 0.07686596363782883, + "learning_rate": 1.4479009903752268e-05, + "loss": 0.5526153445243835, + "step": 6035 + }, + { + "epoch": 1.1153006663510066, + "grad_norm": 0.0635218620300293, + "learning_rate": 1.4477226194434724e-05, + "loss": 0.48672422766685486, + "step": 6036 + }, + { + "epoch": 1.1154854430599024, + "grad_norm": 0.06493838876485825, + "learning_rate": 1.4475442306937586e-05, + "loss": 0.4204823076725006, + "step": 6037 + }, + { + "epoch": 1.1156702197687982, + "grad_norm": 0.0830598846077919, + "learning_rate": 1.447365824133185e-05, + "loss": 0.6063317060470581, + "step": 6038 + }, + { + "epoch": 1.115854996477694, + "grad_norm": 0.07490938156843185, + "learning_rate": 1.4471873997688506e-05, + "loss": 0.5406398177146912, + "step": 6039 + }, + { + "epoch": 1.1160397731865899, + "grad_norm": 0.07630017399787903, + "learning_rate": 1.447008957607857e-05, + "loss": 0.5382445454597473, + "step": 6040 + }, + { + "epoch": 1.1162245498954857, + "grad_norm": 0.06891347467899323, + "learning_rate": 1.4468304976573056e-05, + "loss": 0.4742557406425476, + "step": 6041 + }, + { + "epoch": 1.1164093266043815, + "grad_norm": 0.06572108715772629, + "learning_rate": 1.4466520199242982e-05, + "loss": 0.499061644077301, + "step": 6042 + }, + { + "epoch": 1.1165941033132774, + "grad_norm": 0.09027126431465149, + "learning_rate": 1.4464735244159376e-05, + "loss": 0.6984020471572876, + "step": 6043 + }, + { + "epoch": 1.1167788800221732, + "grad_norm": 0.061515845358371735, + "learning_rate": 1.446295011139328e-05, + "loss": 0.36859866976737976, + "step": 6044 + }, + { + "epoch": 1.116963656731069, + "grad_norm": 0.06884491443634033, + "learning_rate": 1.446116480101573e-05, + "loss": 0.42635002732276917, + "step": 6045 + }, + { + "epoch": 1.1171484334399648, + "grad_norm": 0.07418891042470932, + "learning_rate": 1.4459379313097777e-05, + "loss": 0.6263567805290222, + "step": 6046 + }, + { + "epoch": 1.1173332101488607, + "grad_norm": 0.06352093070745468, + "learning_rate": 1.445759364771048e-05, + "loss": 0.5206384062767029, + "step": 6047 + }, + { + "epoch": 1.1175179868577565, + "grad_norm": 0.08683977276086807, + "learning_rate": 1.4455807804924902e-05, + "loss": 0.7265470623970032, + "step": 6048 + }, + { + "epoch": 1.1177027635666523, + "grad_norm": 0.06020265817642212, + "learning_rate": 1.4454021784812113e-05, + "loss": 0.3667547404766083, + "step": 6049 + }, + { + "epoch": 1.1178875402755484, + "grad_norm": 0.06311333179473877, + "learning_rate": 1.4452235587443193e-05, + "loss": 0.3707239329814911, + "step": 6050 + }, + { + "epoch": 1.1180723169844442, + "grad_norm": 0.06688230484724045, + "learning_rate": 1.4450449212889226e-05, + "loss": 0.5023731589317322, + "step": 6051 + }, + { + "epoch": 1.11825709369334, + "grad_norm": 0.06343791633844376, + "learning_rate": 1.4448662661221302e-05, + "loss": 0.433843195438385, + "step": 6052 + }, + { + "epoch": 1.1184418704022359, + "grad_norm": 0.0687842145562172, + "learning_rate": 1.4446875932510522e-05, + "loss": 0.47582367062568665, + "step": 6053 + }, + { + "epoch": 1.1186266471111317, + "grad_norm": 0.09454551339149475, + "learning_rate": 1.4445089026827997e-05, + "loss": 0.6751272678375244, + "step": 6054 + }, + { + "epoch": 1.1188114238200275, + "grad_norm": 0.07350743561983109, + "learning_rate": 1.4443301944244832e-05, + "loss": 0.5118169784545898, + "step": 6055 + }, + { + "epoch": 1.1189962005289233, + "grad_norm": 0.07297584414482117, + "learning_rate": 1.4441514684832147e-05, + "loss": 0.5605136156082153, + "step": 6056 + }, + { + "epoch": 1.1191809772378192, + "grad_norm": 0.07259882241487503, + "learning_rate": 1.443972724866108e-05, + "loss": 0.3932836055755615, + "step": 6057 + }, + { + "epoch": 1.119365753946715, + "grad_norm": 0.076852947473526, + "learning_rate": 1.4437939635802759e-05, + "loss": 0.5650920271873474, + "step": 6058 + }, + { + "epoch": 1.1195505306556108, + "grad_norm": 0.06762345135211945, + "learning_rate": 1.4436151846328321e-05, + "loss": 0.5524746179580688, + "step": 6059 + }, + { + "epoch": 1.1197353073645067, + "grad_norm": 0.06991656869649887, + "learning_rate": 1.4434363880308917e-05, + "loss": 0.5342642664909363, + "step": 6060 + }, + { + "epoch": 1.1199200840734025, + "grad_norm": 0.07431400567293167, + "learning_rate": 1.4432575737815709e-05, + "loss": 0.5111920833587646, + "step": 6061 + }, + { + "epoch": 1.1201048607822983, + "grad_norm": 0.06559804826974869, + "learning_rate": 1.4430787418919851e-05, + "loss": 0.4708617031574249, + "step": 6062 + }, + { + "epoch": 1.1202896374911941, + "grad_norm": 0.08211726695299149, + "learning_rate": 1.4428998923692517e-05, + "loss": 0.5830162763595581, + "step": 6063 + }, + { + "epoch": 1.12047441420009, + "grad_norm": 0.06784723699092865, + "learning_rate": 1.4427210252204882e-05, + "loss": 0.6211737990379333, + "step": 6064 + }, + { + "epoch": 1.120659190908986, + "grad_norm": 0.07016149163246155, + "learning_rate": 1.4425421404528133e-05, + "loss": 0.5034101605415344, + "step": 6065 + }, + { + "epoch": 1.1208439676178819, + "grad_norm": 0.06789090484380722, + "learning_rate": 1.4423632380733452e-05, + "loss": 0.5031054019927979, + "step": 6066 + }, + { + "epoch": 1.1210287443267777, + "grad_norm": 0.07210063934326172, + "learning_rate": 1.4421843180892045e-05, + "loss": 0.5558570027351379, + "step": 6067 + }, + { + "epoch": 1.1212135210356735, + "grad_norm": 0.10158746689558029, + "learning_rate": 1.4420053805075113e-05, + "loss": 0.7106173038482666, + "step": 6068 + }, + { + "epoch": 1.1213982977445693, + "grad_norm": 0.05270679295063019, + "learning_rate": 1.4418264253353869e-05, + "loss": 0.4389966130256653, + "step": 6069 + }, + { + "epoch": 1.1215830744534652, + "grad_norm": 0.06519922614097595, + "learning_rate": 1.441647452579953e-05, + "loss": 0.42029258608818054, + "step": 6070 + }, + { + "epoch": 1.121767851162361, + "grad_norm": 0.06086088716983795, + "learning_rate": 1.4414684622483321e-05, + "loss": 0.4866493046283722, + "step": 6071 + }, + { + "epoch": 1.1219526278712568, + "grad_norm": 0.07039511948823929, + "learning_rate": 1.4412894543476479e-05, + "loss": 0.47862088680267334, + "step": 6072 + }, + { + "epoch": 1.1221374045801527, + "grad_norm": 0.09172705560922623, + "learning_rate": 1.4411104288850237e-05, + "loss": 0.6564626693725586, + "step": 6073 + }, + { + "epoch": 1.1223221812890485, + "grad_norm": 0.06046932563185692, + "learning_rate": 1.4409313858675847e-05, + "loss": 0.3803384006023407, + "step": 6074 + }, + { + "epoch": 1.1225069579979443, + "grad_norm": 0.09191624075174332, + "learning_rate": 1.440752325302456e-05, + "loss": 0.6493774652481079, + "step": 6075 + }, + { + "epoch": 1.1226917347068401, + "grad_norm": 0.0672389566898346, + "learning_rate": 1.4405732471967637e-05, + "loss": 0.43903395533561707, + "step": 6076 + }, + { + "epoch": 1.122876511415736, + "grad_norm": 0.0659782737493515, + "learning_rate": 1.4403941515576344e-05, + "loss": 0.49504348635673523, + "step": 6077 + }, + { + "epoch": 1.1230612881246318, + "grad_norm": 0.08274340629577637, + "learning_rate": 1.440215038392196e-05, + "loss": 0.5620540380477905, + "step": 6078 + }, + { + "epoch": 1.1232460648335278, + "grad_norm": 0.06952735781669617, + "learning_rate": 1.4400359077075758e-05, + "loss": 0.5920997858047485, + "step": 6079 + }, + { + "epoch": 1.1234308415424237, + "grad_norm": 0.06475818902254105, + "learning_rate": 1.4398567595109034e-05, + "loss": 0.4705714285373688, + "step": 6080 + }, + { + "epoch": 1.1236156182513195, + "grad_norm": 0.0710345134139061, + "learning_rate": 1.4396775938093084e-05, + "loss": 0.47257503867149353, + "step": 6081 + }, + { + "epoch": 1.1238003949602153, + "grad_norm": 0.07058537006378174, + "learning_rate": 1.4394984106099206e-05, + "loss": 0.5050140619277954, + "step": 6082 + }, + { + "epoch": 1.1239851716691112, + "grad_norm": 0.06465966254472733, + "learning_rate": 1.4393192099198711e-05, + "loss": 0.4918656349182129, + "step": 6083 + }, + { + "epoch": 1.124169948378007, + "grad_norm": 0.07355429232120514, + "learning_rate": 1.4391399917462913e-05, + "loss": 0.5089967250823975, + "step": 6084 + }, + { + "epoch": 1.1243547250869028, + "grad_norm": 0.07652002573013306, + "learning_rate": 1.4389607560963139e-05, + "loss": 0.47434723377227783, + "step": 6085 + }, + { + "epoch": 1.1245395017957986, + "grad_norm": 0.08014384657144547, + "learning_rate": 1.4387815029770715e-05, + "loss": 0.6197258234024048, + "step": 6086 + }, + { + "epoch": 1.1247242785046945, + "grad_norm": 0.06608502566814423, + "learning_rate": 1.4386022323956983e-05, + "loss": 0.5037209987640381, + "step": 6087 + }, + { + "epoch": 1.1249090552135903, + "grad_norm": 0.07929182052612305, + "learning_rate": 1.4384229443593285e-05, + "loss": 0.6429815888404846, + "step": 6088 + }, + { + "epoch": 1.1250938319224861, + "grad_norm": 0.06564852595329285, + "learning_rate": 1.4382436388750968e-05, + "loss": 0.4675427973270416, + "step": 6089 + }, + { + "epoch": 1.125278608631382, + "grad_norm": 0.06928091496229172, + "learning_rate": 1.4380643159501398e-05, + "loss": 0.46712902188301086, + "step": 6090 + }, + { + "epoch": 1.1254633853402778, + "grad_norm": 0.08031101524829865, + "learning_rate": 1.4378849755915934e-05, + "loss": 0.6043239235877991, + "step": 6091 + }, + { + "epoch": 1.1256481620491736, + "grad_norm": 0.09885122627019882, + "learning_rate": 1.4377056178065947e-05, + "loss": 0.6168032288551331, + "step": 6092 + }, + { + "epoch": 1.1258329387580694, + "grad_norm": 0.060262907296419144, + "learning_rate": 1.4375262426022821e-05, + "loss": 0.39597856998443604, + "step": 6093 + }, + { + "epoch": 1.1260177154669653, + "grad_norm": 0.05512017384171486, + "learning_rate": 1.4373468499857937e-05, + "loss": 0.3198016285896301, + "step": 6094 + }, + { + "epoch": 1.1262024921758613, + "grad_norm": 0.08341045677661896, + "learning_rate": 1.4371674399642693e-05, + "loss": 0.5664653778076172, + "step": 6095 + }, + { + "epoch": 1.1263872688847572, + "grad_norm": 0.07183565199375153, + "learning_rate": 1.4369880125448481e-05, + "loss": 0.47869887948036194, + "step": 6096 + }, + { + "epoch": 1.126572045593653, + "grad_norm": 0.07197265326976776, + "learning_rate": 1.4368085677346713e-05, + "loss": 0.42871785163879395, + "step": 6097 + }, + { + "epoch": 1.1267568223025488, + "grad_norm": 0.08379499614238739, + "learning_rate": 1.4366291055408801e-05, + "loss": 0.5623905062675476, + "step": 6098 + }, + { + "epoch": 1.1269415990114446, + "grad_norm": 0.08065547049045563, + "learning_rate": 1.4364496259706165e-05, + "loss": 0.5996270179748535, + "step": 6099 + }, + { + "epoch": 1.1271263757203405, + "grad_norm": 0.07119474560022354, + "learning_rate": 1.4362701290310234e-05, + "loss": 0.47350260615348816, + "step": 6100 + }, + { + "epoch": 1.1273111524292363, + "grad_norm": 0.06404874473810196, + "learning_rate": 1.436090614729244e-05, + "loss": 0.475182443857193, + "step": 6101 + }, + { + "epoch": 1.1274959291381321, + "grad_norm": 0.06995225697755814, + "learning_rate": 1.4359110830724222e-05, + "loss": 0.5138711929321289, + "step": 6102 + }, + { + "epoch": 1.127680705847028, + "grad_norm": 0.06132664531469345, + "learning_rate": 1.4357315340677036e-05, + "loss": 0.43427854776382446, + "step": 6103 + }, + { + "epoch": 1.1278654825559238, + "grad_norm": 0.08065839856863022, + "learning_rate": 1.4355519677222329e-05, + "loss": 0.48715531826019287, + "step": 6104 + }, + { + "epoch": 1.1280502592648196, + "grad_norm": 0.08922401815652847, + "learning_rate": 1.4353723840431568e-05, + "loss": 0.6146382689476013, + "step": 6105 + }, + { + "epoch": 1.1282350359737154, + "grad_norm": 0.08260243386030197, + "learning_rate": 1.4351927830376215e-05, + "loss": 0.6335489749908447, + "step": 6106 + }, + { + "epoch": 1.1284198126826113, + "grad_norm": 0.07442322373390198, + "learning_rate": 1.4350131647127754e-05, + "loss": 0.49106425046920776, + "step": 6107 + }, + { + "epoch": 1.1286045893915073, + "grad_norm": 0.05707094073295593, + "learning_rate": 1.434833529075766e-05, + "loss": 0.4567444920539856, + "step": 6108 + }, + { + "epoch": 1.1287893661004031, + "grad_norm": 0.09413350373506546, + "learning_rate": 1.4346538761337428e-05, + "loss": 0.6615262627601624, + "step": 6109 + }, + { + "epoch": 1.128974142809299, + "grad_norm": 0.07104384899139404, + "learning_rate": 1.434474205893855e-05, + "loss": 0.47242528200149536, + "step": 6110 + }, + { + "epoch": 1.1291589195181948, + "grad_norm": 0.06719780713319778, + "learning_rate": 1.434294518363253e-05, + "loss": 0.49545174837112427, + "step": 6111 + }, + { + "epoch": 1.1293436962270906, + "grad_norm": 0.06883067637681961, + "learning_rate": 1.434114813549088e-05, + "loss": 0.40031710267066956, + "step": 6112 + }, + { + "epoch": 1.1295284729359865, + "grad_norm": 0.07639193534851074, + "learning_rate": 1.433935091458512e-05, + "loss": 0.6572412252426147, + "step": 6113 + }, + { + "epoch": 1.1297132496448823, + "grad_norm": 0.08633438497781754, + "learning_rate": 1.4337553520986767e-05, + "loss": 0.55412757396698, + "step": 6114 + }, + { + "epoch": 1.1298980263537781, + "grad_norm": 0.05746854469180107, + "learning_rate": 1.4335755954767352e-05, + "loss": 0.36753028631210327, + "step": 6115 + }, + { + "epoch": 1.130082803062674, + "grad_norm": 0.06715678423643112, + "learning_rate": 1.4333958215998416e-05, + "loss": 0.31338319182395935, + "step": 6116 + }, + { + "epoch": 1.1302675797715698, + "grad_norm": 0.06040837615728378, + "learning_rate": 1.4332160304751503e-05, + "loss": 0.4689605236053467, + "step": 6117 + }, + { + "epoch": 1.1304523564804656, + "grad_norm": 0.0614374615252018, + "learning_rate": 1.4330362221098164e-05, + "loss": 0.43147027492523193, + "step": 6118 + }, + { + "epoch": 1.1306371331893614, + "grad_norm": 0.08722779154777527, + "learning_rate": 1.4328563965109954e-05, + "loss": 0.8032390475273132, + "step": 6119 + }, + { + "epoch": 1.1308219098982573, + "grad_norm": 0.0980684831738472, + "learning_rate": 1.4326765536858444e-05, + "loss": 0.6666969060897827, + "step": 6120 + }, + { + "epoch": 1.131006686607153, + "grad_norm": 0.09519989043474197, + "learning_rate": 1.4324966936415199e-05, + "loss": 0.7287571430206299, + "step": 6121 + }, + { + "epoch": 1.131191463316049, + "grad_norm": 0.06062401831150055, + "learning_rate": 1.4323168163851801e-05, + "loss": 0.4222762882709503, + "step": 6122 + }, + { + "epoch": 1.1313762400249447, + "grad_norm": 0.07143185287714005, + "learning_rate": 1.432136921923984e-05, + "loss": 0.49105358123779297, + "step": 6123 + }, + { + "epoch": 1.1315610167338406, + "grad_norm": 0.050204817205667496, + "learning_rate": 1.4319570102650902e-05, + "loss": 0.33912599086761475, + "step": 6124 + }, + { + "epoch": 1.1317457934427366, + "grad_norm": 0.06106993183493614, + "learning_rate": 1.4317770814156586e-05, + "loss": 0.38527730107307434, + "step": 6125 + }, + { + "epoch": 1.1319305701516325, + "grad_norm": 0.06932901591062546, + "learning_rate": 1.4315971353828502e-05, + "loss": 0.5505931973457336, + "step": 6126 + }, + { + "epoch": 1.1321153468605283, + "grad_norm": 0.08598202466964722, + "learning_rate": 1.4314171721738262e-05, + "loss": 0.7473421096801758, + "step": 6127 + }, + { + "epoch": 1.132300123569424, + "grad_norm": 0.06986215710639954, + "learning_rate": 1.4312371917957482e-05, + "loss": 0.48098546266555786, + "step": 6128 + }, + { + "epoch": 1.13248490027832, + "grad_norm": 0.08303413540124893, + "learning_rate": 1.4310571942557791e-05, + "loss": 0.6047471165657043, + "step": 6129 + }, + { + "epoch": 1.1326696769872158, + "grad_norm": 0.08076757192611694, + "learning_rate": 1.4308771795610826e-05, + "loss": 0.5525906085968018, + "step": 6130 + }, + { + "epoch": 1.1328544536961116, + "grad_norm": 0.0855245515704155, + "learning_rate": 1.4306971477188223e-05, + "loss": 0.7699131965637207, + "step": 6131 + }, + { + "epoch": 1.1330392304050074, + "grad_norm": 0.07134143263101578, + "learning_rate": 1.4305170987361625e-05, + "loss": 0.4692050814628601, + "step": 6132 + }, + { + "epoch": 1.1332240071139033, + "grad_norm": 0.08296187222003937, + "learning_rate": 1.4303370326202697e-05, + "loss": 0.5955659747123718, + "step": 6133 + }, + { + "epoch": 1.133408783822799, + "grad_norm": 0.0706106424331665, + "learning_rate": 1.4301569493783094e-05, + "loss": 0.5117326974868774, + "step": 6134 + }, + { + "epoch": 1.133593560531695, + "grad_norm": 0.08032231777906418, + "learning_rate": 1.429976849017448e-05, + "loss": 0.6375417709350586, + "step": 6135 + }, + { + "epoch": 1.1337783372405907, + "grad_norm": 0.06780054420232773, + "learning_rate": 1.4297967315448531e-05, + "loss": 0.4244823157787323, + "step": 6136 + }, + { + "epoch": 1.1339631139494868, + "grad_norm": 0.07901187241077423, + "learning_rate": 1.4296165969676934e-05, + "loss": 0.7523353695869446, + "step": 6137 + }, + { + "epoch": 1.1341478906583826, + "grad_norm": 0.07902489602565765, + "learning_rate": 1.4294364452931368e-05, + "loss": 0.6589572429656982, + "step": 6138 + }, + { + "epoch": 1.1343326673672784, + "grad_norm": 0.08424142748117447, + "learning_rate": 1.4292562765283533e-05, + "loss": 0.6515619158744812, + "step": 6139 + }, + { + "epoch": 1.1345174440761743, + "grad_norm": 0.07620836049318314, + "learning_rate": 1.4290760906805133e-05, + "loss": 0.583553671836853, + "step": 6140 + }, + { + "epoch": 1.13470222078507, + "grad_norm": 0.06322798877954483, + "learning_rate": 1.4288958877567872e-05, + "loss": 0.5742557048797607, + "step": 6141 + }, + { + "epoch": 1.134886997493966, + "grad_norm": 0.07936611771583557, + "learning_rate": 1.4287156677643462e-05, + "loss": 0.5712964534759521, + "step": 6142 + }, + { + "epoch": 1.1350717742028618, + "grad_norm": 0.07948508113622665, + "learning_rate": 1.4285354307103631e-05, + "loss": 0.5610211491584778, + "step": 6143 + }, + { + "epoch": 1.1352565509117576, + "grad_norm": 0.08358441293239594, + "learning_rate": 1.4283551766020107e-05, + "loss": 0.5952147841453552, + "step": 6144 + }, + { + "epoch": 1.1354413276206534, + "grad_norm": 0.06404483318328857, + "learning_rate": 1.4281749054464625e-05, + "loss": 0.42181798815727234, + "step": 6145 + }, + { + "epoch": 1.1356261043295492, + "grad_norm": 0.05818679928779602, + "learning_rate": 1.4279946172508923e-05, + "loss": 0.4475404620170593, + "step": 6146 + }, + { + "epoch": 1.135810881038445, + "grad_norm": 0.07719261199235916, + "learning_rate": 1.4278143120224757e-05, + "loss": 0.7213553786277771, + "step": 6147 + }, + { + "epoch": 1.135995657747341, + "grad_norm": 0.060571376234292984, + "learning_rate": 1.4276339897683877e-05, + "loss": 0.36024120450019836, + "step": 6148 + }, + { + "epoch": 1.1361804344562367, + "grad_norm": 0.06773599237203598, + "learning_rate": 1.4274536504958048e-05, + "loss": 0.44352224469184875, + "step": 6149 + }, + { + "epoch": 1.1363652111651326, + "grad_norm": 0.07060518860816956, + "learning_rate": 1.4272732942119044e-05, + "loss": 0.5673344731330872, + "step": 6150 + }, + { + "epoch": 1.1365499878740284, + "grad_norm": 0.08268341422080994, + "learning_rate": 1.4270929209238632e-05, + "loss": 0.5837506651878357, + "step": 6151 + }, + { + "epoch": 1.1367347645829242, + "grad_norm": 0.11173179000616074, + "learning_rate": 1.4269125306388599e-05, + "loss": 0.8468867540359497, + "step": 6152 + }, + { + "epoch": 1.13691954129182, + "grad_norm": 0.06694246083498001, + "learning_rate": 1.4267321233640736e-05, + "loss": 0.40071406960487366, + "step": 6153 + }, + { + "epoch": 1.137104318000716, + "grad_norm": 0.05182839557528496, + "learning_rate": 1.426551699106684e-05, + "loss": 0.36508703231811523, + "step": 6154 + }, + { + "epoch": 1.137289094709612, + "grad_norm": 0.1034051924943924, + "learning_rate": 1.4263712578738714e-05, + "loss": 0.7002763152122498, + "step": 6155 + }, + { + "epoch": 1.1374738714185078, + "grad_norm": 0.06058888137340546, + "learning_rate": 1.4261907996728164e-05, + "loss": 0.45546698570251465, + "step": 6156 + }, + { + "epoch": 1.1376586481274036, + "grad_norm": 0.08832358568906784, + "learning_rate": 1.426010324510701e-05, + "loss": 0.5462371706962585, + "step": 6157 + }, + { + "epoch": 1.1378434248362994, + "grad_norm": 0.06523042917251587, + "learning_rate": 1.4258298323947078e-05, + "loss": 0.46396517753601074, + "step": 6158 + }, + { + "epoch": 1.1380282015451952, + "grad_norm": 0.08396708220243454, + "learning_rate": 1.425649323332019e-05, + "loss": 0.6781487464904785, + "step": 6159 + }, + { + "epoch": 1.138212978254091, + "grad_norm": 0.07320381700992584, + "learning_rate": 1.425468797329819e-05, + "loss": 0.4183204174041748, + "step": 6160 + }, + { + "epoch": 1.138397754962987, + "grad_norm": 0.062182389199733734, + "learning_rate": 1.4252882543952923e-05, + "loss": 0.4624471962451935, + "step": 6161 + }, + { + "epoch": 1.1385825316718827, + "grad_norm": 0.06833712011575699, + "learning_rate": 1.4251076945356233e-05, + "loss": 0.4128556251525879, + "step": 6162 + }, + { + "epoch": 1.1387673083807786, + "grad_norm": 0.08125222474336624, + "learning_rate": 1.4249271177579985e-05, + "loss": 0.6006513833999634, + "step": 6163 + }, + { + "epoch": 1.1389520850896744, + "grad_norm": 0.07562055438756943, + "learning_rate": 1.4247465240696035e-05, + "loss": 0.6590495109558105, + "step": 6164 + }, + { + "epoch": 1.1391368617985702, + "grad_norm": 0.0725562646985054, + "learning_rate": 1.4245659134776255e-05, + "loss": 0.5063749551773071, + "step": 6165 + }, + { + "epoch": 1.139321638507466, + "grad_norm": 0.06678872555494308, + "learning_rate": 1.4243852859892527e-05, + "loss": 0.5019094347953796, + "step": 6166 + }, + { + "epoch": 1.139506415216362, + "grad_norm": 0.0874992311000824, + "learning_rate": 1.4242046416116732e-05, + "loss": 0.6056196689605713, + "step": 6167 + }, + { + "epoch": 1.139691191925258, + "grad_norm": 0.06671545654535294, + "learning_rate": 1.4240239803520761e-05, + "loss": 0.5596376061439514, + "step": 6168 + }, + { + "epoch": 1.1398759686341537, + "grad_norm": 0.06795782595872879, + "learning_rate": 1.4238433022176513e-05, + "loss": 0.4091525375843048, + "step": 6169 + }, + { + "epoch": 1.1400607453430496, + "grad_norm": 0.08453847467899323, + "learning_rate": 1.423662607215589e-05, + "loss": 0.5426561236381531, + "step": 6170 + }, + { + "epoch": 1.1402455220519454, + "grad_norm": 0.0707712396979332, + "learning_rate": 1.4234818953530805e-05, + "loss": 0.4745196998119354, + "step": 6171 + }, + { + "epoch": 1.1404302987608412, + "grad_norm": 0.06672391295433044, + "learning_rate": 1.4233011666373174e-05, + "loss": 0.4232742488384247, + "step": 6172 + }, + { + "epoch": 1.140615075469737, + "grad_norm": 0.06453859061002731, + "learning_rate": 1.423120421075492e-05, + "loss": 0.45850545167922974, + "step": 6173 + }, + { + "epoch": 1.140799852178633, + "grad_norm": 0.09031231701374054, + "learning_rate": 1.4229396586747978e-05, + "loss": 0.6560296416282654, + "step": 6174 + }, + { + "epoch": 1.1409846288875287, + "grad_norm": 0.07331298291683197, + "learning_rate": 1.4227588794424284e-05, + "loss": 0.5255630612373352, + "step": 6175 + }, + { + "epoch": 1.1411694055964245, + "grad_norm": 0.0803334191441536, + "learning_rate": 1.4225780833855782e-05, + "loss": 0.6760626435279846, + "step": 6176 + }, + { + "epoch": 1.1413541823053204, + "grad_norm": 0.09699446707963943, + "learning_rate": 1.4223972705114427e-05, + "loss": 0.5984539985656738, + "step": 6177 + }, + { + "epoch": 1.1415389590142162, + "grad_norm": 0.0784691572189331, + "learning_rate": 1.4222164408272168e-05, + "loss": 0.5706794857978821, + "step": 6178 + }, + { + "epoch": 1.141723735723112, + "grad_norm": 0.069001205265522, + "learning_rate": 1.4220355943400979e-05, + "loss": 0.48837417364120483, + "step": 6179 + }, + { + "epoch": 1.1419085124320079, + "grad_norm": 0.07547164708375931, + "learning_rate": 1.4218547310572826e-05, + "loss": 0.45486870408058167, + "step": 6180 + }, + { + "epoch": 1.1420932891409037, + "grad_norm": 0.07717430591583252, + "learning_rate": 1.4216738509859688e-05, + "loss": 0.5167067646980286, + "step": 6181 + }, + { + "epoch": 1.1422780658497995, + "grad_norm": 0.0627954751253128, + "learning_rate": 1.4214929541333548e-05, + "loss": 0.3714037239551544, + "step": 6182 + }, + { + "epoch": 1.1424628425586956, + "grad_norm": 0.07511921972036362, + "learning_rate": 1.4213120405066401e-05, + "loss": 0.5616947412490845, + "step": 6183 + }, + { + "epoch": 1.1426476192675914, + "grad_norm": 0.07544286549091339, + "learning_rate": 1.4211311101130246e-05, + "loss": 0.5095857381820679, + "step": 6184 + }, + { + "epoch": 1.1428323959764872, + "grad_norm": 0.0724918395280838, + "learning_rate": 1.420950162959708e-05, + "loss": 0.4429417848587036, + "step": 6185 + }, + { + "epoch": 1.143017172685383, + "grad_norm": 0.05164062976837158, + "learning_rate": 1.4207691990538919e-05, + "loss": 0.3569068908691406, + "step": 6186 + }, + { + "epoch": 1.1432019493942789, + "grad_norm": 0.05280061066150665, + "learning_rate": 1.4205882184027784e-05, + "loss": 0.3826059401035309, + "step": 6187 + }, + { + "epoch": 1.1433867261031747, + "grad_norm": 0.07177378982305527, + "learning_rate": 1.4204072210135693e-05, + "loss": 0.5403726100921631, + "step": 6188 + }, + { + "epoch": 1.1435715028120705, + "grad_norm": 0.06955300271511078, + "learning_rate": 1.4202262068934684e-05, + "loss": 0.451949805021286, + "step": 6189 + }, + { + "epoch": 1.1437562795209664, + "grad_norm": 0.07127396017313004, + "learning_rate": 1.420045176049679e-05, + "loss": 0.42342081665992737, + "step": 6190 + }, + { + "epoch": 1.1439410562298622, + "grad_norm": 0.06536135077476501, + "learning_rate": 1.4198641284894059e-05, + "loss": 0.44511792063713074, + "step": 6191 + }, + { + "epoch": 1.144125832938758, + "grad_norm": 0.07281245291233063, + "learning_rate": 1.419683064219854e-05, + "loss": 0.4214109182357788, + "step": 6192 + }, + { + "epoch": 1.1443106096476539, + "grad_norm": 0.08532760292291641, + "learning_rate": 1.419501983248229e-05, + "loss": 0.7385908961296082, + "step": 6193 + }, + { + "epoch": 1.1444953863565497, + "grad_norm": 0.06349904835224152, + "learning_rate": 1.419320885581738e-05, + "loss": 0.4616239666938782, + "step": 6194 + }, + { + "epoch": 1.1446801630654455, + "grad_norm": 0.10051140189170837, + "learning_rate": 1.4191397712275871e-05, + "loss": 0.7320863008499146, + "step": 6195 + }, + { + "epoch": 1.1448649397743416, + "grad_norm": 0.09361196309328079, + "learning_rate": 1.418958640192985e-05, + "loss": 0.6548205018043518, + "step": 6196 + }, + { + "epoch": 1.1450497164832374, + "grad_norm": 0.06741033494472504, + "learning_rate": 1.4187774924851394e-05, + "loss": 0.5115160346031189, + "step": 6197 + }, + { + "epoch": 1.1452344931921332, + "grad_norm": 0.07313167303800583, + "learning_rate": 1.41859632811126e-05, + "loss": 0.500661313533783, + "step": 6198 + }, + { + "epoch": 1.145419269901029, + "grad_norm": 0.08618351817131042, + "learning_rate": 1.4184151470785565e-05, + "loss": 0.6208575367927551, + "step": 6199 + }, + { + "epoch": 1.1456040466099249, + "grad_norm": 0.052863236516714096, + "learning_rate": 1.4182339493942389e-05, + "loss": 0.4039441645145416, + "step": 6200 + }, + { + "epoch": 1.1457888233188207, + "grad_norm": 0.07417035847902298, + "learning_rate": 1.418052735065519e-05, + "loss": 0.41991057991981506, + "step": 6201 + }, + { + "epoch": 1.1459736000277165, + "grad_norm": 0.0847744345664978, + "learning_rate": 1.4178715040996078e-05, + "loss": 0.49262678623199463, + "step": 6202 + }, + { + "epoch": 1.1461583767366124, + "grad_norm": 0.06870493292808533, + "learning_rate": 1.4176902565037184e-05, + "loss": 0.5651652216911316, + "step": 6203 + }, + { + "epoch": 1.1463431534455082, + "grad_norm": 0.0765766054391861, + "learning_rate": 1.4175089922850633e-05, + "loss": 0.5696491003036499, + "step": 6204 + }, + { + "epoch": 1.146527930154404, + "grad_norm": 0.07318069785833359, + "learning_rate": 1.4173277114508565e-05, + "loss": 0.49148693680763245, + "step": 6205 + }, + { + "epoch": 1.1467127068632998, + "grad_norm": 0.06859167665243149, + "learning_rate": 1.4171464140083127e-05, + "loss": 0.514062762260437, + "step": 6206 + }, + { + "epoch": 1.1468974835721957, + "grad_norm": 0.06501016020774841, + "learning_rate": 1.4169650999646466e-05, + "loss": 0.7300497889518738, + "step": 6207 + }, + { + "epoch": 1.1470822602810915, + "grad_norm": 0.07674582302570343, + "learning_rate": 1.416783769327074e-05, + "loss": 0.47462305426597595, + "step": 6208 + }, + { + "epoch": 1.1472670369899873, + "grad_norm": 0.08116015046834946, + "learning_rate": 1.4166024221028111e-05, + "loss": 0.6110057234764099, + "step": 6209 + }, + { + "epoch": 1.1474518136988832, + "grad_norm": 0.06770819425582886, + "learning_rate": 1.4164210582990756e-05, + "loss": 0.5061403512954712, + "step": 6210 + }, + { + "epoch": 1.147636590407779, + "grad_norm": 0.06339634209871292, + "learning_rate": 1.4162396779230844e-05, + "loss": 0.4284840226173401, + "step": 6211 + }, + { + "epoch": 1.147821367116675, + "grad_norm": 0.0632321760058403, + "learning_rate": 1.4160582809820566e-05, + "loss": 0.3946978449821472, + "step": 6212 + }, + { + "epoch": 1.1480061438255709, + "grad_norm": 0.06572254002094269, + "learning_rate": 1.4158768674832108e-05, + "loss": 0.5568139553070068, + "step": 6213 + }, + { + "epoch": 1.1481909205344667, + "grad_norm": 0.09508955478668213, + "learning_rate": 1.4156954374337669e-05, + "loss": 0.7293902635574341, + "step": 6214 + }, + { + "epoch": 1.1483756972433625, + "grad_norm": 0.08176790177822113, + "learning_rate": 1.4155139908409447e-05, + "loss": 0.5837011933326721, + "step": 6215 + }, + { + "epoch": 1.1485604739522584, + "grad_norm": 0.07929205894470215, + "learning_rate": 1.415332527711966e-05, + "loss": 0.612383246421814, + "step": 6216 + }, + { + "epoch": 1.1487452506611542, + "grad_norm": 0.06473682820796967, + "learning_rate": 1.415151048054052e-05, + "loss": 0.6698353886604309, + "step": 6217 + }, + { + "epoch": 1.14893002737005, + "grad_norm": 0.08424325287342072, + "learning_rate": 1.414969551874425e-05, + "loss": 0.6709126830101013, + "step": 6218 + }, + { + "epoch": 1.1491148040789458, + "grad_norm": 0.0723126232624054, + "learning_rate": 1.4147880391803087e-05, + "loss": 0.6147063970565796, + "step": 6219 + }, + { + "epoch": 1.1492995807878417, + "grad_norm": 0.07604363560676575, + "learning_rate": 1.4146065099789257e-05, + "loss": 0.5668408870697021, + "step": 6220 + }, + { + "epoch": 1.1494843574967375, + "grad_norm": 0.06804443150758743, + "learning_rate": 1.4144249642775006e-05, + "loss": 0.503356397151947, + "step": 6221 + }, + { + "epoch": 1.1496691342056333, + "grad_norm": 0.08500366657972336, + "learning_rate": 1.4142434020832587e-05, + "loss": 0.5444658398628235, + "step": 6222 + }, + { + "epoch": 1.1498539109145292, + "grad_norm": 0.06647278368473053, + "learning_rate": 1.4140618234034254e-05, + "loss": 0.4239024221897125, + "step": 6223 + }, + { + "epoch": 1.150038687623425, + "grad_norm": 0.06973589956760406, + "learning_rate": 1.4138802282452269e-05, + "loss": 0.48607513308525085, + "step": 6224 + }, + { + "epoch": 1.150223464332321, + "grad_norm": 0.08751551061868668, + "learning_rate": 1.4136986166158901e-05, + "loss": 0.7162054777145386, + "step": 6225 + }, + { + "epoch": 1.1504082410412169, + "grad_norm": 0.07949352264404297, + "learning_rate": 1.4135169885226427e-05, + "loss": 0.563558042049408, + "step": 6226 + }, + { + "epoch": 1.1505930177501127, + "grad_norm": 0.07515808939933777, + "learning_rate": 1.4133353439727128e-05, + "loss": 0.5348332524299622, + "step": 6227 + }, + { + "epoch": 1.1507777944590085, + "grad_norm": 0.07966458052396774, + "learning_rate": 1.4131536829733294e-05, + "loss": 0.6537339091300964, + "step": 6228 + }, + { + "epoch": 1.1509625711679043, + "grad_norm": 0.06919048726558685, + "learning_rate": 1.4129720055317224e-05, + "loss": 0.4522291421890259, + "step": 6229 + }, + { + "epoch": 1.1511473478768002, + "grad_norm": 0.06940469890832901, + "learning_rate": 1.4127903116551214e-05, + "loss": 0.5043797492980957, + "step": 6230 + }, + { + "epoch": 1.151332124585696, + "grad_norm": 0.0794387236237526, + "learning_rate": 1.412608601350757e-05, + "loss": 0.5534088015556335, + "step": 6231 + }, + { + "epoch": 1.1515169012945918, + "grad_norm": 0.07416708767414093, + "learning_rate": 1.4124268746258616e-05, + "loss": 0.3723811209201813, + "step": 6232 + }, + { + "epoch": 1.1517016780034877, + "grad_norm": 0.08943319320678711, + "learning_rate": 1.412245131487667e-05, + "loss": 0.6742863059043884, + "step": 6233 + }, + { + "epoch": 1.1518864547123835, + "grad_norm": 0.10318424552679062, + "learning_rate": 1.4120633719434058e-05, + "loss": 0.7732130289077759, + "step": 6234 + }, + { + "epoch": 1.1520712314212793, + "grad_norm": 0.07700960338115692, + "learning_rate": 1.4118815960003114e-05, + "loss": 0.5543152689933777, + "step": 6235 + }, + { + "epoch": 1.1522560081301751, + "grad_norm": 0.06386572122573853, + "learning_rate": 1.4116998036656183e-05, + "loss": 0.44846782088279724, + "step": 6236 + }, + { + "epoch": 1.152440784839071, + "grad_norm": 0.084290511906147, + "learning_rate": 1.4115179949465611e-05, + "loss": 0.6337743401527405, + "step": 6237 + }, + { + "epoch": 1.1526255615479668, + "grad_norm": 0.07609273493289948, + "learning_rate": 1.4113361698503747e-05, + "loss": 0.4199293255805969, + "step": 6238 + }, + { + "epoch": 1.1528103382568626, + "grad_norm": 0.06647109985351562, + "learning_rate": 1.4111543283842961e-05, + "loss": 0.42023780941963196, + "step": 6239 + }, + { + "epoch": 1.1529951149657585, + "grad_norm": 0.07131869345903397, + "learning_rate": 1.4109724705555616e-05, + "loss": 0.5998886823654175, + "step": 6240 + }, + { + "epoch": 1.1531798916746543, + "grad_norm": 0.06336542963981628, + "learning_rate": 1.4107905963714082e-05, + "loss": 0.4682580530643463, + "step": 6241 + }, + { + "epoch": 1.1533646683835503, + "grad_norm": 0.08245175331830978, + "learning_rate": 1.4106087058390745e-05, + "loss": 0.5720283389091492, + "step": 6242 + }, + { + "epoch": 1.1535494450924462, + "grad_norm": 0.09518399089574814, + "learning_rate": 1.4104267989657991e-05, + "loss": 0.7204375267028809, + "step": 6243 + }, + { + "epoch": 1.153734221801342, + "grad_norm": 0.07756468653678894, + "learning_rate": 1.4102448757588208e-05, + "loss": 0.4689267873764038, + "step": 6244 + }, + { + "epoch": 1.1539189985102378, + "grad_norm": 0.07330365478992462, + "learning_rate": 1.4100629362253799e-05, + "loss": 0.48148608207702637, + "step": 6245 + }, + { + "epoch": 1.1541037752191337, + "grad_norm": 0.08221703767776489, + "learning_rate": 1.4098809803727176e-05, + "loss": 0.7633028030395508, + "step": 6246 + }, + { + "epoch": 1.1542885519280295, + "grad_norm": 0.08841904997825623, + "learning_rate": 1.4096990082080742e-05, + "loss": 0.6865911483764648, + "step": 6247 + }, + { + "epoch": 1.1544733286369253, + "grad_norm": 0.07374248653650284, + "learning_rate": 1.4095170197386918e-05, + "loss": 0.5459367632865906, + "step": 6248 + }, + { + "epoch": 1.1546581053458211, + "grad_norm": 0.06705913692712784, + "learning_rate": 1.409335014971814e-05, + "loss": 0.3521028161048889, + "step": 6249 + }, + { + "epoch": 1.154842882054717, + "grad_norm": 0.08836095035076141, + "learning_rate": 1.4091529939146828e-05, + "loss": 0.6287204623222351, + "step": 6250 + }, + { + "epoch": 1.1550276587636128, + "grad_norm": 0.0838085412979126, + "learning_rate": 1.4089709565745423e-05, + "loss": 0.6495293378829956, + "step": 6251 + }, + { + "epoch": 1.1552124354725086, + "grad_norm": 0.07038464397192001, + "learning_rate": 1.4087889029586374e-05, + "loss": 0.46939921379089355, + "step": 6252 + }, + { + "epoch": 1.1553972121814045, + "grad_norm": 0.061671674251556396, + "learning_rate": 1.4086068330742135e-05, + "loss": 0.4827491044998169, + "step": 6253 + }, + { + "epoch": 1.1555819888903003, + "grad_norm": 0.0781252309679985, + "learning_rate": 1.4084247469285155e-05, + "loss": 0.546686589717865, + "step": 6254 + }, + { + "epoch": 1.1557667655991963, + "grad_norm": 0.11145616322755814, + "learning_rate": 1.4082426445287904e-05, + "loss": 0.6957457661628723, + "step": 6255 + }, + { + "epoch": 1.1559515423080922, + "grad_norm": 0.059735823422670364, + "learning_rate": 1.4080605258822857e-05, + "loss": 0.39741113781929016, + "step": 6256 + }, + { + "epoch": 1.156136319016988, + "grad_norm": 0.06381309777498245, + "learning_rate": 1.4078783909962484e-05, + "loss": 0.4249134957790375, + "step": 6257 + }, + { + "epoch": 1.1563210957258838, + "grad_norm": 0.06865990906953812, + "learning_rate": 1.407696239877927e-05, + "loss": 0.5038773417472839, + "step": 6258 + }, + { + "epoch": 1.1565058724347796, + "grad_norm": 0.07598444819450378, + "learning_rate": 1.4075140725345713e-05, + "loss": 0.5336639285087585, + "step": 6259 + }, + { + "epoch": 1.1566906491436755, + "grad_norm": 0.091720350086689, + "learning_rate": 1.4073318889734303e-05, + "loss": 0.7236504554748535, + "step": 6260 + }, + { + "epoch": 1.1568754258525713, + "grad_norm": 0.07371455430984497, + "learning_rate": 1.4071496892017544e-05, + "loss": 0.5036258697509766, + "step": 6261 + }, + { + "epoch": 1.1570602025614671, + "grad_norm": 0.10541260242462158, + "learning_rate": 1.4069674732267946e-05, + "loss": 0.7018181085586548, + "step": 6262 + }, + { + "epoch": 1.157244979270363, + "grad_norm": 0.06498979777097702, + "learning_rate": 1.4067852410558027e-05, + "loss": 0.5900062322616577, + "step": 6263 + }, + { + "epoch": 1.1574297559792588, + "grad_norm": 0.07714717835187912, + "learning_rate": 1.4066029926960308e-05, + "loss": 0.5157914161682129, + "step": 6264 + }, + { + "epoch": 1.1576145326881546, + "grad_norm": 0.059344302862882614, + "learning_rate": 1.406420728154732e-05, + "loss": 0.35241222381591797, + "step": 6265 + }, + { + "epoch": 1.1577993093970504, + "grad_norm": 0.07745634019374847, + "learning_rate": 1.4062384474391597e-05, + "loss": 0.5979710817337036, + "step": 6266 + }, + { + "epoch": 1.1579840861059463, + "grad_norm": 0.0804426372051239, + "learning_rate": 1.4060561505565683e-05, + "loss": 0.5755396485328674, + "step": 6267 + }, + { + "epoch": 1.158168862814842, + "grad_norm": 0.08442779630422592, + "learning_rate": 1.405873837514212e-05, + "loss": 0.7087805271148682, + "step": 6268 + }, + { + "epoch": 1.158353639523738, + "grad_norm": 0.05821115896105766, + "learning_rate": 1.4056915083193472e-05, + "loss": 0.37225276231765747, + "step": 6269 + }, + { + "epoch": 1.1585384162326338, + "grad_norm": 0.07283907383680344, + "learning_rate": 1.4055091629792297e-05, + "loss": 0.5235207080841064, + "step": 6270 + }, + { + "epoch": 1.1587231929415298, + "grad_norm": 0.063742995262146, + "learning_rate": 1.4053268015011159e-05, + "loss": 0.40237030386924744, + "step": 6271 + }, + { + "epoch": 1.1589079696504256, + "grad_norm": 0.07434480637311935, + "learning_rate": 1.4051444238922635e-05, + "loss": 0.5207359194755554, + "step": 6272 + }, + { + "epoch": 1.1590927463593215, + "grad_norm": 0.0759037435054779, + "learning_rate": 1.404962030159931e-05, + "loss": 0.4360727071762085, + "step": 6273 + }, + { + "epoch": 1.1592775230682173, + "grad_norm": 0.07971856743097305, + "learning_rate": 1.4047796203113761e-05, + "loss": 0.580155074596405, + "step": 6274 + }, + { + "epoch": 1.1594622997771131, + "grad_norm": 0.0639769434928894, + "learning_rate": 1.404597194353859e-05, + "loss": 0.4158743619918823, + "step": 6275 + }, + { + "epoch": 1.159647076486009, + "grad_norm": 0.07871117442846298, + "learning_rate": 1.4044147522946393e-05, + "loss": 0.46466371417045593, + "step": 6276 + }, + { + "epoch": 1.1598318531949048, + "grad_norm": 0.05663604661822319, + "learning_rate": 1.4042322941409778e-05, + "loss": 0.5010232329368591, + "step": 6277 + }, + { + "epoch": 1.1600166299038006, + "grad_norm": 0.07029540091753006, + "learning_rate": 1.4040498199001358e-05, + "loss": 0.45440173149108887, + "step": 6278 + }, + { + "epoch": 1.1602014066126964, + "grad_norm": 0.08714763820171356, + "learning_rate": 1.4038673295793747e-05, + "loss": 0.5474016666412354, + "step": 6279 + }, + { + "epoch": 1.1603861833215923, + "grad_norm": 0.06908667832612991, + "learning_rate": 1.403684823185958e-05, + "loss": 0.45617908239364624, + "step": 6280 + }, + { + "epoch": 1.160570960030488, + "grad_norm": 0.06304468214511871, + "learning_rate": 1.4035023007271478e-05, + "loss": 0.37683427333831787, + "step": 6281 + }, + { + "epoch": 1.160755736739384, + "grad_norm": 0.06093023344874382, + "learning_rate": 1.4033197622102084e-05, + "loss": 0.40980395674705505, + "step": 6282 + }, + { + "epoch": 1.1609405134482798, + "grad_norm": 0.056043967604637146, + "learning_rate": 1.4031372076424045e-05, + "loss": 0.38577473163604736, + "step": 6283 + }, + { + "epoch": 1.1611252901571758, + "grad_norm": 0.06836773455142975, + "learning_rate": 1.402954637031001e-05, + "loss": 0.506973922252655, + "step": 6284 + }, + { + "epoch": 1.1613100668660716, + "grad_norm": 0.07191284000873566, + "learning_rate": 1.4027720503832637e-05, + "loss": 0.47932422161102295, + "step": 6285 + }, + { + "epoch": 1.1614948435749675, + "grad_norm": 0.07273761183023453, + "learning_rate": 1.4025894477064586e-05, + "loss": 0.5129173398017883, + "step": 6286 + }, + { + "epoch": 1.1616796202838633, + "grad_norm": 0.07798660546541214, + "learning_rate": 1.4024068290078531e-05, + "loss": 0.6354030966758728, + "step": 6287 + }, + { + "epoch": 1.1618643969927591, + "grad_norm": 0.08020185679197311, + "learning_rate": 1.402224194294715e-05, + "loss": 0.5300725698471069, + "step": 6288 + }, + { + "epoch": 1.162049173701655, + "grad_norm": 0.08034148812294006, + "learning_rate": 1.4020415435743121e-05, + "loss": 0.5370227098464966, + "step": 6289 + }, + { + "epoch": 1.1622339504105508, + "grad_norm": 0.09765902161598206, + "learning_rate": 1.4018588768539141e-05, + "loss": 0.7135058641433716, + "step": 6290 + }, + { + "epoch": 1.1624187271194466, + "grad_norm": 0.06447859853506088, + "learning_rate": 1.4016761941407896e-05, + "loss": 0.45083102583885193, + "step": 6291 + }, + { + "epoch": 1.1626035038283424, + "grad_norm": 0.08970855921506882, + "learning_rate": 1.4014934954422093e-05, + "loss": 0.6802244782447815, + "step": 6292 + }, + { + "epoch": 1.1627882805372383, + "grad_norm": 0.0892602801322937, + "learning_rate": 1.401310780765444e-05, + "loss": 0.607311487197876, + "step": 6293 + }, + { + "epoch": 1.162973057246134, + "grad_norm": 0.07422207295894623, + "learning_rate": 1.4011280501177651e-05, + "loss": 0.47422242164611816, + "step": 6294 + }, + { + "epoch": 1.16315783395503, + "grad_norm": 0.07564323395490646, + "learning_rate": 1.400945303506445e-05, + "loss": 0.5515903234481812, + "step": 6295 + }, + { + "epoch": 1.1633426106639257, + "grad_norm": 0.06369898468255997, + "learning_rate": 1.400762540938756e-05, + "loss": 0.4745848774909973, + "step": 6296 + }, + { + "epoch": 1.1635273873728216, + "grad_norm": 0.07312898337841034, + "learning_rate": 1.4005797624219718e-05, + "loss": 0.5416665077209473, + "step": 6297 + }, + { + "epoch": 1.1637121640817174, + "grad_norm": 0.08416175097227097, + "learning_rate": 1.4003969679633664e-05, + "loss": 0.5158531665802002, + "step": 6298 + }, + { + "epoch": 1.1638969407906132, + "grad_norm": 0.082229383289814, + "learning_rate": 1.4002141575702141e-05, + "loss": 0.5584162473678589, + "step": 6299 + }, + { + "epoch": 1.1640817174995093, + "grad_norm": 0.06435306370258331, + "learning_rate": 1.400031331249791e-05, + "loss": 0.4342013895511627, + "step": 6300 + }, + { + "epoch": 1.1642664942084051, + "grad_norm": 0.06409449130296707, + "learning_rate": 1.3998484890093718e-05, + "loss": 0.540926992893219, + "step": 6301 + }, + { + "epoch": 1.164451270917301, + "grad_norm": 0.07323973625898361, + "learning_rate": 1.3996656308562341e-05, + "loss": 0.45245036482810974, + "step": 6302 + }, + { + "epoch": 1.1646360476261968, + "grad_norm": 0.06927787512540817, + "learning_rate": 1.3994827567976543e-05, + "loss": 0.4733739197254181, + "step": 6303 + }, + { + "epoch": 1.1648208243350926, + "grad_norm": 0.07471513748168945, + "learning_rate": 1.3992998668409107e-05, + "loss": 0.5356478095054626, + "step": 6304 + }, + { + "epoch": 1.1650056010439884, + "grad_norm": 0.07582755386829376, + "learning_rate": 1.3991169609932822e-05, + "loss": 0.5152899026870728, + "step": 6305 + }, + { + "epoch": 1.1651903777528843, + "grad_norm": 0.08348732441663742, + "learning_rate": 1.3989340392620467e-05, + "loss": 0.6595236659049988, + "step": 6306 + }, + { + "epoch": 1.16537515446178, + "grad_norm": 0.08826853334903717, + "learning_rate": 1.3987511016544848e-05, + "loss": 0.748777449131012, + "step": 6307 + }, + { + "epoch": 1.165559931170676, + "grad_norm": 0.09934278577566147, + "learning_rate": 1.3985681481778766e-05, + "loss": 0.6863504648208618, + "step": 6308 + }, + { + "epoch": 1.1657447078795717, + "grad_norm": 0.06777570396661758, + "learning_rate": 1.3983851788395029e-05, + "loss": 0.4134644865989685, + "step": 6309 + }, + { + "epoch": 1.1659294845884676, + "grad_norm": 0.07621151953935623, + "learning_rate": 1.3982021936466457e-05, + "loss": 0.5733403563499451, + "step": 6310 + }, + { + "epoch": 1.1661142612973634, + "grad_norm": 0.08632997423410416, + "learning_rate": 1.3980191926065869e-05, + "loss": 0.5422164797782898, + "step": 6311 + }, + { + "epoch": 1.1662990380062592, + "grad_norm": 0.09219911694526672, + "learning_rate": 1.3978361757266094e-05, + "loss": 0.6320534944534302, + "step": 6312 + }, + { + "epoch": 1.1664838147151553, + "grad_norm": 0.07941587269306183, + "learning_rate": 1.3976531430139969e-05, + "loss": 0.5548836588859558, + "step": 6313 + }, + { + "epoch": 1.166668591424051, + "grad_norm": 0.056338176131248474, + "learning_rate": 1.3974700944760331e-05, + "loss": 0.34804630279541016, + "step": 6314 + }, + { + "epoch": 1.166853368132947, + "grad_norm": 0.07848487794399261, + "learning_rate": 1.3972870301200035e-05, + "loss": 0.6259524822235107, + "step": 6315 + }, + { + "epoch": 1.1670381448418428, + "grad_norm": 0.061018411070108414, + "learning_rate": 1.3971039499531926e-05, + "loss": 0.44650569558143616, + "step": 6316 + }, + { + "epoch": 1.1672229215507386, + "grad_norm": 0.07319525629281998, + "learning_rate": 1.3969208539828873e-05, + "loss": 0.4377747178077698, + "step": 6317 + }, + { + "epoch": 1.1674076982596344, + "grad_norm": 0.08733868598937988, + "learning_rate": 1.3967377422163736e-05, + "loss": 0.745111346244812, + "step": 6318 + }, + { + "epoch": 1.1675924749685302, + "grad_norm": 0.05967577174305916, + "learning_rate": 1.3965546146609392e-05, + "loss": 0.4343259632587433, + "step": 6319 + }, + { + "epoch": 1.167777251677426, + "grad_norm": 0.07628301531076431, + "learning_rate": 1.3963714713238716e-05, + "loss": 0.6616719961166382, + "step": 6320 + }, + { + "epoch": 1.167962028386322, + "grad_norm": 0.05657539516687393, + "learning_rate": 1.3961883122124595e-05, + "loss": 0.47218891978263855, + "step": 6321 + }, + { + "epoch": 1.1681468050952177, + "grad_norm": 0.10010165721178055, + "learning_rate": 1.3960051373339922e-05, + "loss": 0.695989191532135, + "step": 6322 + }, + { + "epoch": 1.1683315818041136, + "grad_norm": 0.09093029797077179, + "learning_rate": 1.3958219466957595e-05, + "loss": 0.6357280611991882, + "step": 6323 + }, + { + "epoch": 1.1685163585130094, + "grad_norm": 0.07471542060375214, + "learning_rate": 1.3956387403050513e-05, + "loss": 0.4901159405708313, + "step": 6324 + }, + { + "epoch": 1.1687011352219052, + "grad_norm": 0.08277177810668945, + "learning_rate": 1.3954555181691593e-05, + "loss": 0.7276491522789001, + "step": 6325 + }, + { + "epoch": 1.168885911930801, + "grad_norm": 0.07894638180732727, + "learning_rate": 1.3952722802953749e-05, + "loss": 0.49660179018974304, + "step": 6326 + }, + { + "epoch": 1.1690706886396969, + "grad_norm": 0.07644003629684448, + "learning_rate": 1.3950890266909902e-05, + "loss": 0.497342050075531, + "step": 6327 + }, + { + "epoch": 1.1692554653485927, + "grad_norm": 0.06979218870401382, + "learning_rate": 1.3949057573632984e-05, + "loss": 0.5278323292732239, + "step": 6328 + }, + { + "epoch": 1.1694402420574885, + "grad_norm": 0.08275625109672546, + "learning_rate": 1.394722472319593e-05, + "loss": 0.6355860829353333, + "step": 6329 + }, + { + "epoch": 1.1696250187663846, + "grad_norm": 0.08698844164609909, + "learning_rate": 1.3945391715671684e-05, + "loss": 0.6144800186157227, + "step": 6330 + }, + { + "epoch": 1.1698097954752804, + "grad_norm": 0.05441611260175705, + "learning_rate": 1.3943558551133186e-05, + "loss": 0.35613131523132324, + "step": 6331 + }, + { + "epoch": 1.1699945721841762, + "grad_norm": 0.07020799815654755, + "learning_rate": 1.39417252296534e-05, + "loss": 0.4802166521549225, + "step": 6332 + }, + { + "epoch": 1.170179348893072, + "grad_norm": 0.05347013846039772, + "learning_rate": 1.3939891751305279e-05, + "loss": 0.4263867139816284, + "step": 6333 + }, + { + "epoch": 1.170364125601968, + "grad_norm": 0.07271816581487656, + "learning_rate": 1.3938058116161791e-05, + "loss": 0.5226719379425049, + "step": 6334 + }, + { + "epoch": 1.1705489023108637, + "grad_norm": 0.06094420701265335, + "learning_rate": 1.3936224324295918e-05, + "loss": 0.37615513801574707, + "step": 6335 + }, + { + "epoch": 1.1707336790197596, + "grad_norm": 0.07450886070728302, + "learning_rate": 1.3934390375780627e-05, + "loss": 0.5362197756767273, + "step": 6336 + }, + { + "epoch": 1.1709184557286554, + "grad_norm": 0.04974592104554176, + "learning_rate": 1.3932556270688907e-05, + "loss": 0.3268347680568695, + "step": 6337 + }, + { + "epoch": 1.1711032324375512, + "grad_norm": 0.07232057303190231, + "learning_rate": 1.3930722009093751e-05, + "loss": 0.4910028576850891, + "step": 6338 + }, + { + "epoch": 1.171288009146447, + "grad_norm": 0.09514714032411575, + "learning_rate": 1.3928887591068158e-05, + "loss": 0.594700276851654, + "step": 6339 + }, + { + "epoch": 1.1714727858553429, + "grad_norm": 0.09708668291568756, + "learning_rate": 1.3927053016685132e-05, + "loss": 0.6765315532684326, + "step": 6340 + }, + { + "epoch": 1.1716575625642387, + "grad_norm": 0.06906653195619583, + "learning_rate": 1.3925218286017679e-05, + "loss": 0.4455786943435669, + "step": 6341 + }, + { + "epoch": 1.1718423392731345, + "grad_norm": 0.0670214593410492, + "learning_rate": 1.3923383399138821e-05, + "loss": 0.33591046929359436, + "step": 6342 + }, + { + "epoch": 1.1720271159820306, + "grad_norm": 0.04764154553413391, + "learning_rate": 1.3921548356121577e-05, + "loss": 0.36550554633140564, + "step": 6343 + }, + { + "epoch": 1.1722118926909264, + "grad_norm": 0.06641551852226257, + "learning_rate": 1.3919713157038977e-05, + "loss": 0.4700031876564026, + "step": 6344 + }, + { + "epoch": 1.1723966693998222, + "grad_norm": 0.0555669404566288, + "learning_rate": 1.3917877801964059e-05, + "loss": 0.39579448103904724, + "step": 6345 + }, + { + "epoch": 1.172581446108718, + "grad_norm": 0.07697714865207672, + "learning_rate": 1.3916042290969863e-05, + "loss": 0.5403766632080078, + "step": 6346 + }, + { + "epoch": 1.172766222817614, + "grad_norm": 0.0931277722120285, + "learning_rate": 1.391420662412943e-05, + "loss": 0.6140669584274292, + "step": 6347 + }, + { + "epoch": 1.1729509995265097, + "grad_norm": 0.07469156384468079, + "learning_rate": 1.3912370801515821e-05, + "loss": 0.47297799587249756, + "step": 6348 + }, + { + "epoch": 1.1731357762354055, + "grad_norm": 0.0785590186715126, + "learning_rate": 1.39105348232021e-05, + "loss": 0.6158168911933899, + "step": 6349 + }, + { + "epoch": 1.1733205529443014, + "grad_norm": 0.07559805363416672, + "learning_rate": 1.3908698689261322e-05, + "loss": 0.6002750396728516, + "step": 6350 + }, + { + "epoch": 1.1735053296531972, + "grad_norm": 0.08132782578468323, + "learning_rate": 1.3906862399766566e-05, + "loss": 0.5227487683296204, + "step": 6351 + }, + { + "epoch": 1.173690106362093, + "grad_norm": 0.08013526350259781, + "learning_rate": 1.390502595479091e-05, + "loss": 0.5677516460418701, + "step": 6352 + }, + { + "epoch": 1.1738748830709889, + "grad_norm": 0.059739675372838974, + "learning_rate": 1.3903189354407438e-05, + "loss": 0.49403083324432373, + "step": 6353 + }, + { + "epoch": 1.1740596597798847, + "grad_norm": 0.07766123861074448, + "learning_rate": 1.3901352598689239e-05, + "loss": 0.5616611242294312, + "step": 6354 + }, + { + "epoch": 1.1742444364887805, + "grad_norm": 0.08889677375555038, + "learning_rate": 1.3899515687709415e-05, + "loss": 0.6060358285903931, + "step": 6355 + }, + { + "epoch": 1.1744292131976763, + "grad_norm": 0.08227483183145523, + "learning_rate": 1.3897678621541068e-05, + "loss": 0.5569694638252258, + "step": 6356 + }, + { + "epoch": 1.1746139899065722, + "grad_norm": 0.062437236309051514, + "learning_rate": 1.3895841400257302e-05, + "loss": 0.4091362953186035, + "step": 6357 + }, + { + "epoch": 1.174798766615468, + "grad_norm": 0.06542489677667618, + "learning_rate": 1.3894004023931241e-05, + "loss": 0.3848593235015869, + "step": 6358 + }, + { + "epoch": 1.174983543324364, + "grad_norm": 0.07575369626283646, + "learning_rate": 1.3892166492636001e-05, + "loss": 0.6310125589370728, + "step": 6359 + }, + { + "epoch": 1.1751683200332599, + "grad_norm": 0.0725938156247139, + "learning_rate": 1.389032880644471e-05, + "loss": 0.41989418864250183, + "step": 6360 + }, + { + "epoch": 1.1753530967421557, + "grad_norm": 0.05446556583046913, + "learning_rate": 1.3888490965430505e-05, + "loss": 0.31907030940055847, + "step": 6361 + }, + { + "epoch": 1.1755378734510515, + "grad_norm": 0.08280963450670242, + "learning_rate": 1.3886652969666525e-05, + "loss": 0.5975019335746765, + "step": 6362 + }, + { + "epoch": 1.1757226501599474, + "grad_norm": 0.09536924213171005, + "learning_rate": 1.3884814819225917e-05, + "loss": 0.7690222263336182, + "step": 6363 + }, + { + "epoch": 1.1759074268688432, + "grad_norm": 0.06217603012919426, + "learning_rate": 1.3882976514181832e-05, + "loss": 0.41438350081443787, + "step": 6364 + }, + { + "epoch": 1.176092203577739, + "grad_norm": 0.06440955400466919, + "learning_rate": 1.3881138054607432e-05, + "loss": 0.37238502502441406, + "step": 6365 + }, + { + "epoch": 1.1762769802866349, + "grad_norm": 0.09802161902189255, + "learning_rate": 1.387929944057588e-05, + "loss": 0.5374606847763062, + "step": 6366 + }, + { + "epoch": 1.1764617569955307, + "grad_norm": 0.06957017630338669, + "learning_rate": 1.3877460672160345e-05, + "loss": 0.4594106376171112, + "step": 6367 + }, + { + "epoch": 1.1766465337044265, + "grad_norm": 0.07976226508617401, + "learning_rate": 1.3875621749434007e-05, + "loss": 0.45554158091545105, + "step": 6368 + }, + { + "epoch": 1.1768313104133223, + "grad_norm": 0.08345699310302734, + "learning_rate": 1.3873782672470051e-05, + "loss": 0.6796742677688599, + "step": 6369 + }, + { + "epoch": 1.1770160871222182, + "grad_norm": 0.07811243087053299, + "learning_rate": 1.387194344134166e-05, + "loss": 0.4833604395389557, + "step": 6370 + }, + { + "epoch": 1.177200863831114, + "grad_norm": 0.08850207924842834, + "learning_rate": 1.3870104056122035e-05, + "loss": 0.657397985458374, + "step": 6371 + }, + { + "epoch": 1.17738564054001, + "grad_norm": 0.10178535431623459, + "learning_rate": 1.386826451688438e-05, + "loss": 0.6059274077415466, + "step": 6372 + }, + { + "epoch": 1.1775704172489059, + "grad_norm": 0.06804938614368439, + "learning_rate": 1.3866424823701895e-05, + "loss": 0.5423354506492615, + "step": 6373 + }, + { + "epoch": 1.1777551939578017, + "grad_norm": 0.06546375900506973, + "learning_rate": 1.38645849766478e-05, + "loss": 0.4411933422088623, + "step": 6374 + }, + { + "epoch": 1.1779399706666975, + "grad_norm": 0.12013743817806244, + "learning_rate": 1.3862744975795315e-05, + "loss": 0.8030775189399719, + "step": 6375 + }, + { + "epoch": 1.1781247473755934, + "grad_norm": 0.06903454661369324, + "learning_rate": 1.3860904821217664e-05, + "loss": 0.4790995717048645, + "step": 6376 + }, + { + "epoch": 1.1783095240844892, + "grad_norm": 0.09132442623376846, + "learning_rate": 1.385906451298808e-05, + "loss": 0.5514641404151917, + "step": 6377 + }, + { + "epoch": 1.178494300793385, + "grad_norm": 0.06411786377429962, + "learning_rate": 1.3857224051179803e-05, + "loss": 0.45666056871414185, + "step": 6378 + }, + { + "epoch": 1.1786790775022808, + "grad_norm": 0.07586948573589325, + "learning_rate": 1.3855383435866076e-05, + "loss": 0.5549495220184326, + "step": 6379 + }, + { + "epoch": 1.1788638542111767, + "grad_norm": 0.06605006754398346, + "learning_rate": 1.3853542667120148e-05, + "loss": 0.51011723279953, + "step": 6380 + }, + { + "epoch": 1.1790486309200725, + "grad_norm": 0.08502263575792313, + "learning_rate": 1.385170174501528e-05, + "loss": 0.7431524395942688, + "step": 6381 + }, + { + "epoch": 1.1792334076289683, + "grad_norm": 0.06498820334672928, + "learning_rate": 1.3849860669624736e-05, + "loss": 0.4389677047729492, + "step": 6382 + }, + { + "epoch": 1.1794181843378642, + "grad_norm": 0.07702399045228958, + "learning_rate": 1.3848019441021775e-05, + "loss": 0.5601319074630737, + "step": 6383 + }, + { + "epoch": 1.17960296104676, + "grad_norm": 0.08520006388425827, + "learning_rate": 1.3846178059279685e-05, + "loss": 0.7258798480033875, + "step": 6384 + }, + { + "epoch": 1.1797877377556558, + "grad_norm": 0.0709204450249672, + "learning_rate": 1.3844336524471738e-05, + "loss": 0.451447069644928, + "step": 6385 + }, + { + "epoch": 1.1799725144645516, + "grad_norm": 0.06795123964548111, + "learning_rate": 1.3842494836671227e-05, + "loss": 0.4453830122947693, + "step": 6386 + }, + { + "epoch": 1.1801572911734475, + "grad_norm": 0.067714162170887, + "learning_rate": 1.3840652995951443e-05, + "loss": 0.4020233452320099, + "step": 6387 + }, + { + "epoch": 1.1803420678823435, + "grad_norm": 0.0798187404870987, + "learning_rate": 1.3838811002385684e-05, + "loss": 0.5517411828041077, + "step": 6388 + }, + { + "epoch": 1.1805268445912394, + "grad_norm": 0.08496396243572235, + "learning_rate": 1.3836968856047259e-05, + "loss": 0.5635435581207275, + "step": 6389 + }, + { + "epoch": 1.1807116213001352, + "grad_norm": 0.07240577042102814, + "learning_rate": 1.3835126557009474e-05, + "loss": 0.4667511284351349, + "step": 6390 + }, + { + "epoch": 1.180896398009031, + "grad_norm": 0.06981519609689713, + "learning_rate": 1.3833284105345657e-05, + "loss": 0.5124683380126953, + "step": 6391 + }, + { + "epoch": 1.1810811747179268, + "grad_norm": 0.06850245594978333, + "learning_rate": 1.3831441501129122e-05, + "loss": 0.4211080074310303, + "step": 6392 + }, + { + "epoch": 1.1812659514268227, + "grad_norm": 0.09302947670221329, + "learning_rate": 1.3829598744433202e-05, + "loss": 0.6746656894683838, + "step": 6393 + }, + { + "epoch": 1.1814507281357185, + "grad_norm": 0.07104253768920898, + "learning_rate": 1.3827755835331233e-05, + "loss": 0.4650229811668396, + "step": 6394 + }, + { + "epoch": 1.1816355048446143, + "grad_norm": 0.07435964047908783, + "learning_rate": 1.3825912773896557e-05, + "loss": 0.47008681297302246, + "step": 6395 + }, + { + "epoch": 1.1818202815535102, + "grad_norm": 0.09043249487876892, + "learning_rate": 1.3824069560202525e-05, + "loss": 0.5703387260437012, + "step": 6396 + }, + { + "epoch": 1.182005058262406, + "grad_norm": 0.06266016513109207, + "learning_rate": 1.3822226194322486e-05, + "loss": 0.500885546207428, + "step": 6397 + }, + { + "epoch": 1.1821898349713018, + "grad_norm": 0.07461611926555634, + "learning_rate": 1.3820382676329803e-05, + "loss": 0.5794101357460022, + "step": 6398 + }, + { + "epoch": 1.1823746116801976, + "grad_norm": 0.05493578687310219, + "learning_rate": 1.3818539006297842e-05, + "loss": 0.3821215331554413, + "step": 6399 + }, + { + "epoch": 1.1825593883890935, + "grad_norm": 0.06709956377744675, + "learning_rate": 1.3816695184299976e-05, + "loss": 0.45987358689308167, + "step": 6400 + }, + { + "epoch": 1.1827441650979895, + "grad_norm": 0.06329990178346634, + "learning_rate": 1.381485121040958e-05, + "loss": 0.44406333565711975, + "step": 6401 + }, + { + "epoch": 1.1829289418068853, + "grad_norm": 0.07872890681028366, + "learning_rate": 1.3813007084700043e-05, + "loss": 0.5449680089950562, + "step": 6402 + }, + { + "epoch": 1.1831137185157812, + "grad_norm": 0.07600017637014389, + "learning_rate": 1.3811162807244754e-05, + "loss": 0.5501166582107544, + "step": 6403 + }, + { + "epoch": 1.183298495224677, + "grad_norm": 0.09407585114240646, + "learning_rate": 1.380931837811711e-05, + "loss": 0.7043866515159607, + "step": 6404 + }, + { + "epoch": 1.1834832719335728, + "grad_norm": 0.06222414970397949, + "learning_rate": 1.3807473797390509e-05, + "loss": 0.35799136757850647, + "step": 6405 + }, + { + "epoch": 1.1836680486424687, + "grad_norm": 0.06174860894680023, + "learning_rate": 1.3805629065138365e-05, + "loss": 0.510539174079895, + "step": 6406 + }, + { + "epoch": 1.1838528253513645, + "grad_norm": 0.07312752306461334, + "learning_rate": 1.380378418143409e-05, + "loss": 0.5282796621322632, + "step": 6407 + }, + { + "epoch": 1.1840376020602603, + "grad_norm": 0.08893660455942154, + "learning_rate": 1.3801939146351107e-05, + "loss": 0.5515533685684204, + "step": 6408 + }, + { + "epoch": 1.1842223787691561, + "grad_norm": 0.0776859000325203, + "learning_rate": 1.3800093959962837e-05, + "loss": 0.6523489952087402, + "step": 6409 + }, + { + "epoch": 1.184407155478052, + "grad_norm": 0.055580753833055496, + "learning_rate": 1.3798248622342719e-05, + "loss": 0.38817736506462097, + "step": 6410 + }, + { + "epoch": 1.1845919321869478, + "grad_norm": 0.09317143261432648, + "learning_rate": 1.3796403133564187e-05, + "loss": 0.6648978590965271, + "step": 6411 + }, + { + "epoch": 1.1847767088958436, + "grad_norm": 0.07249646633863449, + "learning_rate": 1.379455749370069e-05, + "loss": 0.4550534784793854, + "step": 6412 + }, + { + "epoch": 1.1849614856047395, + "grad_norm": 0.08754712343215942, + "learning_rate": 1.3792711702825674e-05, + "loss": 0.7931678295135498, + "step": 6413 + }, + { + "epoch": 1.1851462623136353, + "grad_norm": 0.09442269057035446, + "learning_rate": 1.3790865761012599e-05, + "loss": 0.6443036198616028, + "step": 6414 + }, + { + "epoch": 1.1853310390225311, + "grad_norm": 0.06372959166765213, + "learning_rate": 1.3789019668334928e-05, + "loss": 0.4782651960849762, + "step": 6415 + }, + { + "epoch": 1.185515815731427, + "grad_norm": 0.06049873307347298, + "learning_rate": 1.3787173424866128e-05, + "loss": 0.4206707179546356, + "step": 6416 + }, + { + "epoch": 1.1857005924403228, + "grad_norm": 0.06669747084379196, + "learning_rate": 1.3785327030679674e-05, + "loss": 0.47748395800590515, + "step": 6417 + }, + { + "epoch": 1.1858853691492188, + "grad_norm": 0.07769818603992462, + "learning_rate": 1.3783480485849049e-05, + "loss": 0.44561567902565, + "step": 6418 + }, + { + "epoch": 1.1860701458581147, + "grad_norm": 0.07723916321992874, + "learning_rate": 1.3781633790447733e-05, + "loss": 0.44810453057289124, + "step": 6419 + }, + { + "epoch": 1.1862549225670105, + "grad_norm": 0.09011591970920563, + "learning_rate": 1.3779786944549224e-05, + "loss": 0.5518773198127747, + "step": 6420 + }, + { + "epoch": 1.1864396992759063, + "grad_norm": 0.06609180569648743, + "learning_rate": 1.3777939948227024e-05, + "loss": 0.48049649596214294, + "step": 6421 + }, + { + "epoch": 1.1866244759848021, + "grad_norm": 0.08514054864645004, + "learning_rate": 1.377609280155463e-05, + "loss": 0.6176885366439819, + "step": 6422 + }, + { + "epoch": 1.186809252693698, + "grad_norm": 0.08028165996074677, + "learning_rate": 1.377424550460556e-05, + "loss": 0.5609257221221924, + "step": 6423 + }, + { + "epoch": 1.1869940294025938, + "grad_norm": 0.08063492923974991, + "learning_rate": 1.3772398057453325e-05, + "loss": 0.6729696989059448, + "step": 6424 + }, + { + "epoch": 1.1871788061114896, + "grad_norm": 0.06459198147058487, + "learning_rate": 1.377055046017145e-05, + "loss": 0.540610134601593, + "step": 6425 + }, + { + "epoch": 1.1873635828203855, + "grad_norm": 0.07490245252847672, + "learning_rate": 1.3768702712833461e-05, + "loss": 0.5625375509262085, + "step": 6426 + }, + { + "epoch": 1.1875483595292813, + "grad_norm": 0.07173041999340057, + "learning_rate": 1.3766854815512897e-05, + "loss": 0.4706256091594696, + "step": 6427 + }, + { + "epoch": 1.1877331362381771, + "grad_norm": 0.0744413509964943, + "learning_rate": 1.3765006768283297e-05, + "loss": 0.5037400126457214, + "step": 6428 + }, + { + "epoch": 1.187917912947073, + "grad_norm": 0.08372724056243896, + "learning_rate": 1.3763158571218205e-05, + "loss": 0.6533783674240112, + "step": 6429 + }, + { + "epoch": 1.1881026896559688, + "grad_norm": 0.07452093809843063, + "learning_rate": 1.3761310224391176e-05, + "loss": 0.45203134417533875, + "step": 6430 + }, + { + "epoch": 1.1882874663648648, + "grad_norm": 0.06394612044095993, + "learning_rate": 1.3759461727875768e-05, + "loss": 0.3614550232887268, + "step": 6431 + }, + { + "epoch": 1.1884722430737606, + "grad_norm": 0.06935583800077438, + "learning_rate": 1.3757613081745546e-05, + "loss": 0.4750102758407593, + "step": 6432 + }, + { + "epoch": 1.1886570197826565, + "grad_norm": 0.07817787677049637, + "learning_rate": 1.3755764286074076e-05, + "loss": 0.5745018124580383, + "step": 6433 + }, + { + "epoch": 1.1888417964915523, + "grad_norm": 0.0575583279132843, + "learning_rate": 1.375391534093494e-05, + "loss": 0.3608076274394989, + "step": 6434 + }, + { + "epoch": 1.1890265732004481, + "grad_norm": 0.06657610833644867, + "learning_rate": 1.375206624640172e-05, + "loss": 0.5197997689247131, + "step": 6435 + }, + { + "epoch": 1.189211349909344, + "grad_norm": 0.05878225713968277, + "learning_rate": 1.3750217002547998e-05, + "loss": 0.44958725571632385, + "step": 6436 + }, + { + "epoch": 1.1893961266182398, + "grad_norm": 0.086529441177845, + "learning_rate": 1.3748367609447375e-05, + "loss": 0.5028765201568604, + "step": 6437 + }, + { + "epoch": 1.1895809033271356, + "grad_norm": 0.07351250946521759, + "learning_rate": 1.3746518067173449e-05, + "loss": 0.5214920043945312, + "step": 6438 + }, + { + "epoch": 1.1897656800360314, + "grad_norm": 0.06650044023990631, + "learning_rate": 1.3744668375799823e-05, + "loss": 0.4056544899940491, + "step": 6439 + }, + { + "epoch": 1.1899504567449273, + "grad_norm": 0.0780612975358963, + "learning_rate": 1.3742818535400111e-05, + "loss": 0.5858096480369568, + "step": 6440 + }, + { + "epoch": 1.190135233453823, + "grad_norm": 0.09381809085607529, + "learning_rate": 1.3740968546047935e-05, + "loss": 0.7084423899650574, + "step": 6441 + }, + { + "epoch": 1.190320010162719, + "grad_norm": 0.08907200396060944, + "learning_rate": 1.3739118407816912e-05, + "loss": 0.6315073370933533, + "step": 6442 + }, + { + "epoch": 1.1905047868716148, + "grad_norm": 0.07788009196519852, + "learning_rate": 1.3737268120780671e-05, + "loss": 0.5950636863708496, + "step": 6443 + }, + { + "epoch": 1.1906895635805106, + "grad_norm": 0.08506182581186295, + "learning_rate": 1.3735417685012857e-05, + "loss": 0.6485891342163086, + "step": 6444 + }, + { + "epoch": 1.1908743402894064, + "grad_norm": 0.06447650492191315, + "learning_rate": 1.3733567100587104e-05, + "loss": 0.4180641174316406, + "step": 6445 + }, + { + "epoch": 1.1910591169983022, + "grad_norm": 0.08556390553712845, + "learning_rate": 1.3731716367577059e-05, + "loss": 0.5860201716423035, + "step": 6446 + }, + { + "epoch": 1.1912438937071983, + "grad_norm": 0.07690877467393875, + "learning_rate": 1.372986548605638e-05, + "loss": 0.5545225143432617, + "step": 6447 + }, + { + "epoch": 1.1914286704160941, + "grad_norm": 0.08372151106595993, + "learning_rate": 1.3728014456098724e-05, + "loss": 0.5921500325202942, + "step": 6448 + }, + { + "epoch": 1.19161344712499, + "grad_norm": 0.06034170463681221, + "learning_rate": 1.3726163277777755e-05, + "loss": 0.5056084990501404, + "step": 6449 + }, + { + "epoch": 1.1917982238338858, + "grad_norm": 0.06315089762210846, + "learning_rate": 1.3724311951167144e-05, + "loss": 0.5258921384811401, + "step": 6450 + }, + { + "epoch": 1.1919830005427816, + "grad_norm": 0.07752560079097748, + "learning_rate": 1.372246047634057e-05, + "loss": 0.5884163975715637, + "step": 6451 + }, + { + "epoch": 1.1921677772516774, + "grad_norm": 0.08313777297735214, + "learning_rate": 1.3720608853371719e-05, + "loss": 0.5612613558769226, + "step": 6452 + }, + { + "epoch": 1.1923525539605733, + "grad_norm": 0.08458501100540161, + "learning_rate": 1.3718757082334268e-05, + "loss": 0.640018880367279, + "step": 6453 + }, + { + "epoch": 1.192537330669469, + "grad_norm": 0.06649594753980637, + "learning_rate": 1.3716905163301928e-05, + "loss": 0.4684979319572449, + "step": 6454 + }, + { + "epoch": 1.192722107378365, + "grad_norm": 0.07032130658626556, + "learning_rate": 1.3715053096348387e-05, + "loss": 0.47029954195022583, + "step": 6455 + }, + { + "epoch": 1.1929068840872608, + "grad_norm": 0.07311615347862244, + "learning_rate": 1.3713200881547357e-05, + "loss": 0.43086791038513184, + "step": 6456 + }, + { + "epoch": 1.1930916607961566, + "grad_norm": 0.07677126675844193, + "learning_rate": 1.3711348518972547e-05, + "loss": 0.542203962802887, + "step": 6457 + }, + { + "epoch": 1.1932764375050524, + "grad_norm": 0.06928981095552444, + "learning_rate": 1.370949600869768e-05, + "loss": 0.48314666748046875, + "step": 6458 + }, + { + "epoch": 1.1934612142139482, + "grad_norm": 0.08782751858234406, + "learning_rate": 1.3707643350796476e-05, + "loss": 0.6548619270324707, + "step": 6459 + }, + { + "epoch": 1.1936459909228443, + "grad_norm": 0.0846259817481041, + "learning_rate": 1.3705790545342664e-05, + "loss": 0.5844980478286743, + "step": 6460 + }, + { + "epoch": 1.1938307676317401, + "grad_norm": 0.07081461697816849, + "learning_rate": 1.3703937592409985e-05, + "loss": 0.544660210609436, + "step": 6461 + }, + { + "epoch": 1.194015544340636, + "grad_norm": 0.07676222175359726, + "learning_rate": 1.3702084492072181e-05, + "loss": 0.5665822625160217, + "step": 6462 + }, + { + "epoch": 1.1942003210495318, + "grad_norm": 0.07171925157308578, + "learning_rate": 1.3700231244402988e-05, + "loss": 0.564756453037262, + "step": 6463 + }, + { + "epoch": 1.1943850977584276, + "grad_norm": 0.06867315620183945, + "learning_rate": 1.3698377849476176e-05, + "loss": 0.4594693183898926, + "step": 6464 + }, + { + "epoch": 1.1945698744673234, + "grad_norm": 0.0693492516875267, + "learning_rate": 1.3696524307365496e-05, + "loss": 0.5229794979095459, + "step": 6465 + }, + { + "epoch": 1.1947546511762193, + "grad_norm": 0.08138994872570038, + "learning_rate": 1.3694670618144708e-05, + "loss": 0.7047170400619507, + "step": 6466 + }, + { + "epoch": 1.194939427885115, + "grad_norm": 0.07865667343139648, + "learning_rate": 1.369281678188759e-05, + "loss": 0.5625156164169312, + "step": 6467 + }, + { + "epoch": 1.195124204594011, + "grad_norm": 0.06311140954494476, + "learning_rate": 1.3690962798667921e-05, + "loss": 0.45640701055526733, + "step": 6468 + }, + { + "epoch": 1.1953089813029067, + "grad_norm": 0.0694119855761528, + "learning_rate": 1.3689108668559476e-05, + "loss": 0.4804447293281555, + "step": 6469 + }, + { + "epoch": 1.1954937580118026, + "grad_norm": 0.08370231091976166, + "learning_rate": 1.3687254391636052e-05, + "loss": 0.4979509115219116, + "step": 6470 + }, + { + "epoch": 1.1956785347206984, + "grad_norm": 0.07539594173431396, + "learning_rate": 1.3685399967971436e-05, + "loss": 0.5329857468605042, + "step": 6471 + }, + { + "epoch": 1.1958633114295942, + "grad_norm": 0.06413303315639496, + "learning_rate": 1.3683545397639433e-05, + "loss": 0.3933248519897461, + "step": 6472 + }, + { + "epoch": 1.19604808813849, + "grad_norm": 0.08125191926956177, + "learning_rate": 1.3681690680713846e-05, + "loss": 0.47538602352142334, + "step": 6473 + }, + { + "epoch": 1.196232864847386, + "grad_norm": 0.0841967985033989, + "learning_rate": 1.367983581726849e-05, + "loss": 0.5319108366966248, + "step": 6474 + }, + { + "epoch": 1.1964176415562817, + "grad_norm": 0.07825710624456406, + "learning_rate": 1.3677980807377181e-05, + "loss": 0.5691261887550354, + "step": 6475 + }, + { + "epoch": 1.1966024182651778, + "grad_norm": 0.06753269582986832, + "learning_rate": 1.3676125651113741e-05, + "loss": 0.48817798495292664, + "step": 6476 + }, + { + "epoch": 1.1967871949740736, + "grad_norm": 0.07528755813837051, + "learning_rate": 1.3674270348552001e-05, + "loss": 0.5107649564743042, + "step": 6477 + }, + { + "epoch": 1.1969719716829694, + "grad_norm": 0.07238613069057465, + "learning_rate": 1.36724148997658e-05, + "loss": 0.5391866564750671, + "step": 6478 + }, + { + "epoch": 1.1971567483918653, + "grad_norm": 0.061458688229322433, + "learning_rate": 1.3670559304828972e-05, + "loss": 0.4981677532196045, + "step": 6479 + }, + { + "epoch": 1.197341525100761, + "grad_norm": 0.08438178896903992, + "learning_rate": 1.366870356381537e-05, + "loss": 0.6701396703720093, + "step": 6480 + }, + { + "epoch": 1.197526301809657, + "grad_norm": 0.06598415970802307, + "learning_rate": 1.3666847676798842e-05, + "loss": 0.5153096914291382, + "step": 6481 + }, + { + "epoch": 1.1977110785185527, + "grad_norm": 0.0857049971818924, + "learning_rate": 1.3664991643853251e-05, + "loss": 0.5699812173843384, + "step": 6482 + }, + { + "epoch": 1.1978958552274486, + "grad_norm": 0.05555878207087517, + "learning_rate": 1.3663135465052457e-05, + "loss": 0.45247071981430054, + "step": 6483 + }, + { + "epoch": 1.1980806319363444, + "grad_norm": 0.10718713700771332, + "learning_rate": 1.3661279140470331e-05, + "loss": 0.6991795897483826, + "step": 6484 + }, + { + "epoch": 1.1982654086452402, + "grad_norm": 0.08743496984243393, + "learning_rate": 1.3659422670180754e-05, + "loss": 0.4683980345726013, + "step": 6485 + }, + { + "epoch": 1.198450185354136, + "grad_norm": 0.06674148142337799, + "learning_rate": 1.3657566054257598e-05, + "loss": 0.4011494517326355, + "step": 6486 + }, + { + "epoch": 1.1986349620630319, + "grad_norm": 0.06291612982749939, + "learning_rate": 1.365570929277476e-05, + "loss": 0.41058915853500366, + "step": 6487 + }, + { + "epoch": 1.1988197387719277, + "grad_norm": 0.08688245713710785, + "learning_rate": 1.3653852385806128e-05, + "loss": 0.5843929648399353, + "step": 6488 + }, + { + "epoch": 1.1990045154808238, + "grad_norm": 0.07869403064250946, + "learning_rate": 1.36519953334256e-05, + "loss": 0.5506920218467712, + "step": 6489 + }, + { + "epoch": 1.1991892921897196, + "grad_norm": 0.06990286707878113, + "learning_rate": 1.365013813570709e-05, + "loss": 0.4719213545322418, + "step": 6490 + }, + { + "epoch": 1.1993740688986154, + "grad_norm": 0.07826852798461914, + "learning_rate": 1.3648280792724496e-05, + "loss": 0.5762194991111755, + "step": 6491 + }, + { + "epoch": 1.1995588456075112, + "grad_norm": 0.058163248002529144, + "learning_rate": 1.3646423304551743e-05, + "loss": 0.38399115204811096, + "step": 6492 + }, + { + "epoch": 1.199743622316407, + "grad_norm": 0.08161134272813797, + "learning_rate": 1.364456567126275e-05, + "loss": 0.7120457887649536, + "step": 6493 + }, + { + "epoch": 1.199928399025303, + "grad_norm": 0.07929819822311401, + "learning_rate": 1.3642707892931447e-05, + "loss": 0.5719782710075378, + "step": 6494 + }, + { + "epoch": 1.2001131757341987, + "grad_norm": 0.08201904594898224, + "learning_rate": 1.3640849969631765e-05, + "loss": 0.5495536923408508, + "step": 6495 + }, + { + "epoch": 1.2002979524430946, + "grad_norm": 0.07408961653709412, + "learning_rate": 1.3638991901437644e-05, + "loss": 0.43150046467781067, + "step": 6496 + }, + { + "epoch": 1.2004827291519904, + "grad_norm": 0.08079312741756439, + "learning_rate": 1.3637133688423032e-05, + "loss": 0.5508987903594971, + "step": 6497 + }, + { + "epoch": 1.2006675058608862, + "grad_norm": 0.0907614529132843, + "learning_rate": 1.3635275330661877e-05, + "loss": 0.8658058643341064, + "step": 6498 + }, + { + "epoch": 1.200852282569782, + "grad_norm": 0.08234362304210663, + "learning_rate": 1.3633416828228136e-05, + "loss": 0.5761150121688843, + "step": 6499 + }, + { + "epoch": 1.2010370592786779, + "grad_norm": 0.06498465687036514, + "learning_rate": 1.3631558181195779e-05, + "loss": 0.4183274805545807, + "step": 6500 + }, + { + "epoch": 1.2010370592786779, + "eval_loss": 0.6012307405471802, + "eval_runtime": 157.8572, + "eval_samples_per_second": 115.478, + "eval_steps_per_second": 14.437, + "step": 6500 + }, + { + "epoch": 1.2012218359875737, + "grad_norm": 0.05880355462431908, + "learning_rate": 1.3629699389638762e-05, + "loss": 0.41759729385375977, + "step": 6501 + }, + { + "epoch": 1.2014066126964695, + "grad_norm": 0.09420034289360046, + "learning_rate": 1.3627840453631068e-05, + "loss": 0.5877329707145691, + "step": 6502 + }, + { + "epoch": 1.2015913894053654, + "grad_norm": 0.05163704231381416, + "learning_rate": 1.362598137324667e-05, + "loss": 0.3486137390136719, + "step": 6503 + }, + { + "epoch": 1.2017761661142612, + "grad_norm": 0.08603861182928085, + "learning_rate": 1.3624122148559563e-05, + "loss": 0.5939251780509949, + "step": 6504 + }, + { + "epoch": 1.201960942823157, + "grad_norm": 0.07779145240783691, + "learning_rate": 1.362226277964373e-05, + "loss": 0.5520979762077332, + "step": 6505 + }, + { + "epoch": 1.202145719532053, + "grad_norm": 0.07623668015003204, + "learning_rate": 1.362040326657317e-05, + "loss": 0.6112082004547119, + "step": 6506 + }, + { + "epoch": 1.202330496240949, + "grad_norm": 0.06870964169502258, + "learning_rate": 1.361854360942189e-05, + "loss": 0.4941728115081787, + "step": 6507 + }, + { + "epoch": 1.2025152729498447, + "grad_norm": 0.08155199885368347, + "learning_rate": 1.3616683808263893e-05, + "loss": 0.6787418723106384, + "step": 6508 + }, + { + "epoch": 1.2027000496587406, + "grad_norm": 0.06495072692632675, + "learning_rate": 1.3614823863173194e-05, + "loss": 0.474793404340744, + "step": 6509 + }, + { + "epoch": 1.2028848263676364, + "grad_norm": 0.07195496559143066, + "learning_rate": 1.361296377422382e-05, + "loss": 0.5775393843650818, + "step": 6510 + }, + { + "epoch": 1.2030696030765322, + "grad_norm": 0.08239276707172394, + "learning_rate": 1.3611103541489787e-05, + "loss": 0.47624650597572327, + "step": 6511 + }, + { + "epoch": 1.203254379785428, + "grad_norm": 0.060199178755283356, + "learning_rate": 1.3609243165045131e-05, + "loss": 0.39695146679878235, + "step": 6512 + }, + { + "epoch": 1.2034391564943239, + "grad_norm": 0.0860724076628685, + "learning_rate": 1.3607382644963888e-05, + "loss": 0.5685034990310669, + "step": 6513 + }, + { + "epoch": 1.2036239332032197, + "grad_norm": 0.07247007638216019, + "learning_rate": 1.3605521981320107e-05, + "loss": 0.4510326683521271, + "step": 6514 + }, + { + "epoch": 1.2038087099121155, + "grad_norm": 0.10170851647853851, + "learning_rate": 1.3603661174187828e-05, + "loss": 0.8736907243728638, + "step": 6515 + }, + { + "epoch": 1.2039934866210114, + "grad_norm": 0.07688196003437042, + "learning_rate": 1.3601800223641107e-05, + "loss": 0.5343433022499084, + "step": 6516 + }, + { + "epoch": 1.2041782633299072, + "grad_norm": 0.0692083016037941, + "learning_rate": 1.3599939129754008e-05, + "loss": 0.44495779275894165, + "step": 6517 + }, + { + "epoch": 1.204363040038803, + "grad_norm": 0.10107952356338501, + "learning_rate": 1.3598077892600592e-05, + "loss": 0.720582902431488, + "step": 6518 + }, + { + "epoch": 1.204547816747699, + "grad_norm": 0.06074807047843933, + "learning_rate": 1.3596216512254934e-05, + "loss": 0.46574512124061584, + "step": 6519 + }, + { + "epoch": 1.204732593456595, + "grad_norm": 0.06732956320047379, + "learning_rate": 1.3594354988791111e-05, + "loss": 0.5194476246833801, + "step": 6520 + }, + { + "epoch": 1.2049173701654907, + "grad_norm": 0.07385317981243134, + "learning_rate": 1.3592493322283207e-05, + "loss": 0.5754207968711853, + "step": 6521 + }, + { + "epoch": 1.2051021468743865, + "grad_norm": 0.08071117848157883, + "learning_rate": 1.3590631512805303e-05, + "loss": 0.5599603652954102, + "step": 6522 + }, + { + "epoch": 1.2052869235832824, + "grad_norm": 0.0781562328338623, + "learning_rate": 1.35887695604315e-05, + "loss": 0.48199522495269775, + "step": 6523 + }, + { + "epoch": 1.2054717002921782, + "grad_norm": 0.044577743858098984, + "learning_rate": 1.3586907465235898e-05, + "loss": 0.2852509319782257, + "step": 6524 + }, + { + "epoch": 1.205656477001074, + "grad_norm": 0.08220332860946655, + "learning_rate": 1.3585045227292598e-05, + "loss": 0.5416831374168396, + "step": 6525 + }, + { + "epoch": 1.2058412537099699, + "grad_norm": 0.07076594978570938, + "learning_rate": 1.3583182846675716e-05, + "loss": 0.5500925779342651, + "step": 6526 + }, + { + "epoch": 1.2060260304188657, + "grad_norm": 0.07169757783412933, + "learning_rate": 1.3581320323459368e-05, + "loss": 0.45242324471473694, + "step": 6527 + }, + { + "epoch": 1.2062108071277615, + "grad_norm": 0.06065558269619942, + "learning_rate": 1.3579457657717673e-05, + "loss": 0.41949501633644104, + "step": 6528 + }, + { + "epoch": 1.2063955838366573, + "grad_norm": 0.07661020010709763, + "learning_rate": 1.3577594849524765e-05, + "loss": 0.4991658627986908, + "step": 6529 + }, + { + "epoch": 1.2065803605455532, + "grad_norm": 0.08028572052717209, + "learning_rate": 1.3575731898954774e-05, + "loss": 0.5469301342964172, + "step": 6530 + }, + { + "epoch": 1.206765137254449, + "grad_norm": 0.07623415440320969, + "learning_rate": 1.357386880608184e-05, + "loss": 0.6443749666213989, + "step": 6531 + }, + { + "epoch": 1.2069499139633448, + "grad_norm": 0.07754608243703842, + "learning_rate": 1.3572005570980109e-05, + "loss": 0.5259369015693665, + "step": 6532 + }, + { + "epoch": 1.2071346906722407, + "grad_norm": 0.06529566645622253, + "learning_rate": 1.357014219372373e-05, + "loss": 0.4533982574939728, + "step": 6533 + }, + { + "epoch": 1.2073194673811365, + "grad_norm": 0.051948487758636475, + "learning_rate": 1.3568278674386863e-05, + "loss": 0.3756308853626251, + "step": 6534 + }, + { + "epoch": 1.2075042440900325, + "grad_norm": 0.06701385974884033, + "learning_rate": 1.3566415013043667e-05, + "loss": 0.4647313952445984, + "step": 6535 + }, + { + "epoch": 1.2076890207989284, + "grad_norm": 0.05771351978182793, + "learning_rate": 1.3564551209768312e-05, + "loss": 0.4375614821910858, + "step": 6536 + }, + { + "epoch": 1.2078737975078242, + "grad_norm": 0.07962189614772797, + "learning_rate": 1.3562687264634972e-05, + "loss": 0.5191725492477417, + "step": 6537 + }, + { + "epoch": 1.20805857421672, + "grad_norm": 0.08615492284297943, + "learning_rate": 1.3560823177717826e-05, + "loss": 0.651888906955719, + "step": 6538 + }, + { + "epoch": 1.2082433509256159, + "grad_norm": 0.07054128497838974, + "learning_rate": 1.3558958949091055e-05, + "loss": 0.48627346754074097, + "step": 6539 + }, + { + "epoch": 1.2084281276345117, + "grad_norm": 0.07507564127445221, + "learning_rate": 1.3557094578828853e-05, + "loss": 0.6051132082939148, + "step": 6540 + }, + { + "epoch": 1.2086129043434075, + "grad_norm": 0.07555394619703293, + "learning_rate": 1.3555230067005418e-05, + "loss": 0.6303532123565674, + "step": 6541 + }, + { + "epoch": 1.2087976810523033, + "grad_norm": 0.06445108354091644, + "learning_rate": 1.3553365413694946e-05, + "loss": 0.4361160099506378, + "step": 6542 + }, + { + "epoch": 1.2089824577611992, + "grad_norm": 0.058598194271326065, + "learning_rate": 1.355150061897165e-05, + "loss": 0.43637320399284363, + "step": 6543 + }, + { + "epoch": 1.209167234470095, + "grad_norm": 0.08800679445266724, + "learning_rate": 1.3549635682909738e-05, + "loss": 0.6950803995132446, + "step": 6544 + }, + { + "epoch": 1.2093520111789908, + "grad_norm": 0.07335197180509567, + "learning_rate": 1.3547770605583433e-05, + "loss": 0.5119472742080688, + "step": 6545 + }, + { + "epoch": 1.2095367878878867, + "grad_norm": 0.07864607125520706, + "learning_rate": 1.3545905387066956e-05, + "loss": 0.6012009382247925, + "step": 6546 + }, + { + "epoch": 1.2097215645967825, + "grad_norm": 0.07670775055885315, + "learning_rate": 1.3544040027434542e-05, + "loss": 0.5006359815597534, + "step": 6547 + }, + { + "epoch": 1.2099063413056785, + "grad_norm": 0.05005637928843498, + "learning_rate": 1.3542174526760421e-05, + "loss": 0.30397164821624756, + "step": 6548 + }, + { + "epoch": 1.2100911180145744, + "grad_norm": 0.06848768889904022, + "learning_rate": 1.3540308885118832e-05, + "loss": 0.36815324425697327, + "step": 6549 + }, + { + "epoch": 1.2102758947234702, + "grad_norm": 0.07376129180192947, + "learning_rate": 1.353844310258403e-05, + "loss": 0.5528689622879028, + "step": 6550 + }, + { + "epoch": 1.210460671432366, + "grad_norm": 0.07270894199609756, + "learning_rate": 1.3536577179230261e-05, + "loss": 0.5203938484191895, + "step": 6551 + }, + { + "epoch": 1.2106454481412618, + "grad_norm": 0.09340828657150269, + "learning_rate": 1.3534711115131784e-05, + "loss": 0.8169743418693542, + "step": 6552 + }, + { + "epoch": 1.2108302248501577, + "grad_norm": 0.08001939207315445, + "learning_rate": 1.3532844910362865e-05, + "loss": 0.5689393877983093, + "step": 6553 + }, + { + "epoch": 1.2110150015590535, + "grad_norm": 0.07906338572502136, + "learning_rate": 1.3530978564997774e-05, + "loss": 0.47733554244041443, + "step": 6554 + }, + { + "epoch": 1.2111997782679493, + "grad_norm": 0.0676964595913887, + "learning_rate": 1.3529112079110778e-05, + "loss": 0.4821913242340088, + "step": 6555 + }, + { + "epoch": 1.2113845549768452, + "grad_norm": 0.06866760551929474, + "learning_rate": 1.3527245452776163e-05, + "loss": 0.5118091702461243, + "step": 6556 + }, + { + "epoch": 1.211569331685741, + "grad_norm": 0.07368195801973343, + "learning_rate": 1.3525378686068218e-05, + "loss": 0.36613550782203674, + "step": 6557 + }, + { + "epoch": 1.2117541083946368, + "grad_norm": 0.09328657388687134, + "learning_rate": 1.3523511779061228e-05, + "loss": 0.5485423803329468, + "step": 6558 + }, + { + "epoch": 1.2119388851035326, + "grad_norm": 0.09039495885372162, + "learning_rate": 1.3521644731829493e-05, + "loss": 0.6272793412208557, + "step": 6559 + }, + { + "epoch": 1.2121236618124285, + "grad_norm": 0.08554688841104507, + "learning_rate": 1.3519777544447316e-05, + "loss": 0.625359296798706, + "step": 6560 + }, + { + "epoch": 1.2123084385213243, + "grad_norm": 0.09835842251777649, + "learning_rate": 1.3517910216989008e-05, + "loss": 0.7249084115028381, + "step": 6561 + }, + { + "epoch": 1.2124932152302201, + "grad_norm": 0.07324441522359848, + "learning_rate": 1.3516042749528874e-05, + "loss": 0.47122693061828613, + "step": 6562 + }, + { + "epoch": 1.212677991939116, + "grad_norm": 0.07746856659650803, + "learning_rate": 1.3514175142141241e-05, + "loss": 0.5781249403953552, + "step": 6563 + }, + { + "epoch": 1.212862768648012, + "grad_norm": 0.08498020470142365, + "learning_rate": 1.3512307394900433e-05, + "loss": 0.493082731962204, + "step": 6564 + }, + { + "epoch": 1.2130475453569078, + "grad_norm": 0.06842965632677078, + "learning_rate": 1.3510439507880778e-05, + "loss": 0.5504364371299744, + "step": 6565 + }, + { + "epoch": 1.2132323220658037, + "grad_norm": 0.09442943334579468, + "learning_rate": 1.3508571481156612e-05, + "loss": 0.6746259927749634, + "step": 6566 + }, + { + "epoch": 1.2134170987746995, + "grad_norm": 0.08421729505062103, + "learning_rate": 1.350670331480228e-05, + "loss": 0.6475216746330261, + "step": 6567 + }, + { + "epoch": 1.2136018754835953, + "grad_norm": 0.07111159712076187, + "learning_rate": 1.350483500889213e-05, + "loss": 0.43587779998779297, + "step": 6568 + }, + { + "epoch": 1.2137866521924912, + "grad_norm": 0.06216439977288246, + "learning_rate": 1.3502966563500504e-05, + "loss": 0.512531042098999, + "step": 6569 + }, + { + "epoch": 1.213971428901387, + "grad_norm": 0.05999573692679405, + "learning_rate": 1.3501097978701773e-05, + "loss": 0.4109059274196625, + "step": 6570 + }, + { + "epoch": 1.2141562056102828, + "grad_norm": 0.06874170154333115, + "learning_rate": 1.3499229254570298e-05, + "loss": 0.43019795417785645, + "step": 6571 + }, + { + "epoch": 1.2143409823191786, + "grad_norm": 0.08103114366531372, + "learning_rate": 1.3497360391180443e-05, + "loss": 0.5568566918373108, + "step": 6572 + }, + { + "epoch": 1.2145257590280745, + "grad_norm": 0.0737990066409111, + "learning_rate": 1.3495491388606587e-05, + "loss": 0.4694644808769226, + "step": 6573 + }, + { + "epoch": 1.2147105357369703, + "grad_norm": 0.07458315044641495, + "learning_rate": 1.349362224692311e-05, + "loss": 0.6392929553985596, + "step": 6574 + }, + { + "epoch": 1.2148953124458661, + "grad_norm": 0.0786605253815651, + "learning_rate": 1.3491752966204397e-05, + "loss": 0.7331372499465942, + "step": 6575 + }, + { + "epoch": 1.215080089154762, + "grad_norm": 0.08692830055952072, + "learning_rate": 1.348988354652484e-05, + "loss": 0.6495953798294067, + "step": 6576 + }, + { + "epoch": 1.215264865863658, + "grad_norm": 0.07171203196048737, + "learning_rate": 1.3488013987958839e-05, + "loss": 0.5462040305137634, + "step": 6577 + }, + { + "epoch": 1.2154496425725538, + "grad_norm": 0.07235080003738403, + "learning_rate": 1.3486144290580793e-05, + "loss": 0.5880376696586609, + "step": 6578 + }, + { + "epoch": 1.2156344192814497, + "grad_norm": 0.06372851878404617, + "learning_rate": 1.3484274454465109e-05, + "loss": 0.4510330855846405, + "step": 6579 + }, + { + "epoch": 1.2158191959903455, + "grad_norm": 0.054737433791160583, + "learning_rate": 1.3482404479686204e-05, + "loss": 0.34662091732025146, + "step": 6580 + }, + { + "epoch": 1.2160039726992413, + "grad_norm": 0.08225202560424805, + "learning_rate": 1.3480534366318496e-05, + "loss": 0.615791380405426, + "step": 6581 + }, + { + "epoch": 1.2161887494081371, + "grad_norm": 0.0670294538140297, + "learning_rate": 1.3478664114436408e-05, + "loss": 0.3681284487247467, + "step": 6582 + }, + { + "epoch": 1.216373526117033, + "grad_norm": 0.06846636533737183, + "learning_rate": 1.3476793724114372e-05, + "loss": 0.48841947317123413, + "step": 6583 + }, + { + "epoch": 1.2165583028259288, + "grad_norm": 0.10067915171384811, + "learning_rate": 1.3474923195426825e-05, + "loss": 0.672841489315033, + "step": 6584 + }, + { + "epoch": 1.2167430795348246, + "grad_norm": 0.090632364153862, + "learning_rate": 1.3473052528448203e-05, + "loss": 0.6621450185775757, + "step": 6585 + }, + { + "epoch": 1.2169278562437205, + "grad_norm": 0.0676613301038742, + "learning_rate": 1.347118172325296e-05, + "loss": 0.5203623175621033, + "step": 6586 + }, + { + "epoch": 1.2171126329526163, + "grad_norm": 0.07406887412071228, + "learning_rate": 1.3469310779915543e-05, + "loss": 0.6254865527153015, + "step": 6587 + }, + { + "epoch": 1.2172974096615121, + "grad_norm": 0.07997486740350723, + "learning_rate": 1.346743969851041e-05, + "loss": 0.6252841353416443, + "step": 6588 + }, + { + "epoch": 1.217482186370408, + "grad_norm": 0.06023580953478813, + "learning_rate": 1.3465568479112026e-05, + "loss": 0.40626582503318787, + "step": 6589 + }, + { + "epoch": 1.2176669630793038, + "grad_norm": 0.08407054096460342, + "learning_rate": 1.3463697121794859e-05, + "loss": 0.7044686079025269, + "step": 6590 + }, + { + "epoch": 1.2178517397881996, + "grad_norm": 0.0657910481095314, + "learning_rate": 1.3461825626633384e-05, + "loss": 0.3766320049762726, + "step": 6591 + }, + { + "epoch": 1.2180365164970954, + "grad_norm": 0.05869833379983902, + "learning_rate": 1.3459953993702077e-05, + "loss": 0.37285542488098145, + "step": 6592 + }, + { + "epoch": 1.2182212932059913, + "grad_norm": 0.08053995668888092, + "learning_rate": 1.3458082223075425e-05, + "loss": 0.5446112751960754, + "step": 6593 + }, + { + "epoch": 1.2184060699148873, + "grad_norm": 0.07651842385530472, + "learning_rate": 1.3456210314827924e-05, + "loss": 0.6633077263832092, + "step": 6594 + }, + { + "epoch": 1.2185908466237831, + "grad_norm": 0.08434221893548965, + "learning_rate": 1.3454338269034064e-05, + "loss": 0.47503310441970825, + "step": 6595 + }, + { + "epoch": 1.218775623332679, + "grad_norm": 0.08381889015436172, + "learning_rate": 1.3452466085768348e-05, + "loss": 0.6799910068511963, + "step": 6596 + }, + { + "epoch": 1.2189604000415748, + "grad_norm": 0.08820167928934097, + "learning_rate": 1.3450593765105282e-05, + "loss": 0.5599125027656555, + "step": 6597 + }, + { + "epoch": 1.2191451767504706, + "grad_norm": 0.0874655693769455, + "learning_rate": 1.3448721307119379e-05, + "loss": 0.6755706071853638, + "step": 6598 + }, + { + "epoch": 1.2193299534593665, + "grad_norm": 0.05301399528980255, + "learning_rate": 1.344684871188516e-05, + "loss": 0.36308756470680237, + "step": 6599 + }, + { + "epoch": 1.2195147301682623, + "grad_norm": 0.07892420142889023, + "learning_rate": 1.3444975979477146e-05, + "loss": 0.5817967057228088, + "step": 6600 + }, + { + "epoch": 1.2196995068771581, + "grad_norm": 0.07648614048957825, + "learning_rate": 1.3443103109969866e-05, + "loss": 0.5099329352378845, + "step": 6601 + }, + { + "epoch": 1.219884283586054, + "grad_norm": 0.0788240060210228, + "learning_rate": 1.3441230103437852e-05, + "loss": 0.6097611784934998, + "step": 6602 + }, + { + "epoch": 1.2200690602949498, + "grad_norm": 0.06284268945455551, + "learning_rate": 1.343935695995565e-05, + "loss": 0.43738633394241333, + "step": 6603 + }, + { + "epoch": 1.2202538370038456, + "grad_norm": 0.07347282022237778, + "learning_rate": 1.3437483679597798e-05, + "loss": 0.5626323819160461, + "step": 6604 + }, + { + "epoch": 1.2204386137127414, + "grad_norm": 0.09133567661046982, + "learning_rate": 1.343561026243885e-05, + "loss": 0.6883614659309387, + "step": 6605 + }, + { + "epoch": 1.2206233904216375, + "grad_norm": 0.06906570494174957, + "learning_rate": 1.3433736708553364e-05, + "loss": 0.48239877820014954, + "step": 6606 + }, + { + "epoch": 1.2208081671305333, + "grad_norm": 0.0667714849114418, + "learning_rate": 1.3431863018015898e-05, + "loss": 0.46174854040145874, + "step": 6607 + }, + { + "epoch": 1.2209929438394291, + "grad_norm": 0.08062037825584412, + "learning_rate": 1.3429989190901021e-05, + "loss": 0.5113742351531982, + "step": 6608 + }, + { + "epoch": 1.221177720548325, + "grad_norm": 0.08745943754911423, + "learning_rate": 1.3428115227283308e-05, + "loss": 0.6630105972290039, + "step": 6609 + }, + { + "epoch": 1.2213624972572208, + "grad_norm": 0.06435438990592957, + "learning_rate": 1.3426241127237331e-05, + "loss": 0.4193195104598999, + "step": 6610 + }, + { + "epoch": 1.2215472739661166, + "grad_norm": 0.06775399297475815, + "learning_rate": 1.342436689083768e-05, + "loss": 0.49177947640419006, + "step": 6611 + }, + { + "epoch": 1.2217320506750124, + "grad_norm": 0.07548322528600693, + "learning_rate": 1.3422492518158936e-05, + "loss": 0.5043904781341553, + "step": 6612 + }, + { + "epoch": 1.2219168273839083, + "grad_norm": 0.0687546357512474, + "learning_rate": 1.3420618009275701e-05, + "loss": 0.4975576400756836, + "step": 6613 + }, + { + "epoch": 1.222101604092804, + "grad_norm": 0.06727460771799088, + "learning_rate": 1.3418743364262567e-05, + "loss": 0.4292713403701782, + "step": 6614 + }, + { + "epoch": 1.2222863808017, + "grad_norm": 0.08708701282739639, + "learning_rate": 1.3416868583194145e-05, + "loss": 0.6526595950126648, + "step": 6615 + }, + { + "epoch": 1.2224711575105958, + "grad_norm": 0.07395205646753311, + "learning_rate": 1.3414993666145043e-05, + "loss": 0.6405277848243713, + "step": 6616 + }, + { + "epoch": 1.2226559342194916, + "grad_norm": 0.05583598464727402, + "learning_rate": 1.341311861318988e-05, + "loss": 0.411292165517807, + "step": 6617 + }, + { + "epoch": 1.2228407109283874, + "grad_norm": 0.07856931537389755, + "learning_rate": 1.341124342440327e-05, + "loss": 0.5631988048553467, + "step": 6618 + }, + { + "epoch": 1.2230254876372832, + "grad_norm": 0.06796279549598694, + "learning_rate": 1.3409368099859848e-05, + "loss": 0.39263972640037537, + "step": 6619 + }, + { + "epoch": 1.223210264346179, + "grad_norm": 0.08865714818239212, + "learning_rate": 1.3407492639634243e-05, + "loss": 0.6106154918670654, + "step": 6620 + }, + { + "epoch": 1.223395041055075, + "grad_norm": 0.07416286319494247, + "learning_rate": 1.3405617043801087e-05, + "loss": 0.5677984356880188, + "step": 6621 + }, + { + "epoch": 1.2235798177639707, + "grad_norm": 0.06661788374185562, + "learning_rate": 1.340374131243503e-05, + "loss": 0.5082527995109558, + "step": 6622 + }, + { + "epoch": 1.2237645944728668, + "grad_norm": 0.09100113809108734, + "learning_rate": 1.3401865445610717e-05, + "loss": 0.6676788926124573, + "step": 6623 + }, + { + "epoch": 1.2239493711817626, + "grad_norm": 0.0856882631778717, + "learning_rate": 1.3399989443402804e-05, + "loss": 0.5832250714302063, + "step": 6624 + }, + { + "epoch": 1.2241341478906584, + "grad_norm": 0.0818934291601181, + "learning_rate": 1.3398113305885944e-05, + "loss": 0.6237608790397644, + "step": 6625 + }, + { + "epoch": 1.2243189245995543, + "grad_norm": 0.07755490392446518, + "learning_rate": 1.3396237033134811e-05, + "loss": 0.4891819655895233, + "step": 6626 + }, + { + "epoch": 1.22450370130845, + "grad_norm": 0.08020766079425812, + "learning_rate": 1.3394360625224067e-05, + "loss": 0.561857283115387, + "step": 6627 + }, + { + "epoch": 1.224688478017346, + "grad_norm": 0.07287803292274475, + "learning_rate": 1.3392484082228387e-05, + "loss": 0.5870558023452759, + "step": 6628 + }, + { + "epoch": 1.2248732547262418, + "grad_norm": 0.06975536793470383, + "learning_rate": 1.339060740422246e-05, + "loss": 0.5530955791473389, + "step": 6629 + }, + { + "epoch": 1.2250580314351376, + "grad_norm": 0.09488081187009811, + "learning_rate": 1.3388730591280966e-05, + "loss": 0.6036531925201416, + "step": 6630 + }, + { + "epoch": 1.2252428081440334, + "grad_norm": 0.0829424187541008, + "learning_rate": 1.3386853643478592e-05, + "loss": 0.595618486404419, + "step": 6631 + }, + { + "epoch": 1.2254275848529292, + "grad_norm": 0.09147574752569199, + "learning_rate": 1.338497656089004e-05, + "loss": 0.6551826000213623, + "step": 6632 + }, + { + "epoch": 1.225612361561825, + "grad_norm": 0.0640762522816658, + "learning_rate": 1.3383099343590014e-05, + "loss": 0.46834108233451843, + "step": 6633 + }, + { + "epoch": 1.225797138270721, + "grad_norm": 0.07283955067396164, + "learning_rate": 1.3381221991653215e-05, + "loss": 0.5464327335357666, + "step": 6634 + }, + { + "epoch": 1.2259819149796167, + "grad_norm": 0.06744116544723511, + "learning_rate": 1.3379344505154359e-05, + "loss": 0.4603147804737091, + "step": 6635 + }, + { + "epoch": 1.2261666916885128, + "grad_norm": 0.08637522161006927, + "learning_rate": 1.3377466884168168e-05, + "loss": 0.6327239871025085, + "step": 6636 + }, + { + "epoch": 1.2263514683974086, + "grad_norm": 0.07645534723997116, + "learning_rate": 1.3375589128769362e-05, + "loss": 0.620918333530426, + "step": 6637 + }, + { + "epoch": 1.2265362451063044, + "grad_norm": 0.06302107870578766, + "learning_rate": 1.3373711239032664e-05, + "loss": 0.38208499550819397, + "step": 6638 + }, + { + "epoch": 1.2267210218152003, + "grad_norm": 0.059298522770404816, + "learning_rate": 1.3371833215032819e-05, + "loss": 0.3346162438392639, + "step": 6639 + }, + { + "epoch": 1.226905798524096, + "grad_norm": 0.06452131271362305, + "learning_rate": 1.3369955056844562e-05, + "loss": 0.535065233707428, + "step": 6640 + }, + { + "epoch": 1.227090575232992, + "grad_norm": 0.07480758428573608, + "learning_rate": 1.3368076764542632e-05, + "loss": 0.6556054949760437, + "step": 6641 + }, + { + "epoch": 1.2272753519418877, + "grad_norm": 0.060900650918483734, + "learning_rate": 1.3366198338201786e-05, + "loss": 0.3786194622516632, + "step": 6642 + }, + { + "epoch": 1.2274601286507836, + "grad_norm": 0.06550586223602295, + "learning_rate": 1.3364319777896779e-05, + "loss": 0.5116629600524902, + "step": 6643 + }, + { + "epoch": 1.2276449053596794, + "grad_norm": 0.07762665301561356, + "learning_rate": 1.3362441083702366e-05, + "loss": 0.5098814964294434, + "step": 6644 + }, + { + "epoch": 1.2278296820685752, + "grad_norm": 0.0742892399430275, + "learning_rate": 1.336056225569332e-05, + "loss": 0.4967721700668335, + "step": 6645 + }, + { + "epoch": 1.228014458777471, + "grad_norm": 0.07690393924713135, + "learning_rate": 1.3358683293944414e-05, + "loss": 0.5976964235305786, + "step": 6646 + }, + { + "epoch": 1.228199235486367, + "grad_norm": 0.08312725275754929, + "learning_rate": 1.335680419853042e-05, + "loss": 0.6433517932891846, + "step": 6647 + }, + { + "epoch": 1.2283840121952627, + "grad_norm": 0.07433654367923737, + "learning_rate": 1.3354924969526116e-05, + "loss": 0.4797278940677643, + "step": 6648 + }, + { + "epoch": 1.2285687889041585, + "grad_norm": 0.06998168677091599, + "learning_rate": 1.33530456070063e-05, + "loss": 0.46945229172706604, + "step": 6649 + }, + { + "epoch": 1.2287535656130544, + "grad_norm": 0.06637603789567947, + "learning_rate": 1.3351166111045757e-05, + "loss": 0.42683297395706177, + "step": 6650 + }, + { + "epoch": 1.2289383423219502, + "grad_norm": 0.07931186258792877, + "learning_rate": 1.3349286481719283e-05, + "loss": 0.6133133769035339, + "step": 6651 + }, + { + "epoch": 1.2291231190308463, + "grad_norm": 0.07115180045366287, + "learning_rate": 1.334740671910169e-05, + "loss": 0.6747742891311646, + "step": 6652 + }, + { + "epoch": 1.229307895739742, + "grad_norm": 0.07112821936607361, + "learning_rate": 1.3345526823267782e-05, + "loss": 0.4979085326194763, + "step": 6653 + }, + { + "epoch": 1.229492672448638, + "grad_norm": 0.06507711112499237, + "learning_rate": 1.3343646794292373e-05, + "loss": 0.3852187693119049, + "step": 6654 + }, + { + "epoch": 1.2296774491575337, + "grad_norm": 0.07518497854471207, + "learning_rate": 1.3341766632250281e-05, + "loss": 0.47556841373443604, + "step": 6655 + }, + { + "epoch": 1.2298622258664296, + "grad_norm": 0.07489053905010223, + "learning_rate": 1.3339886337216336e-05, + "loss": 0.4678479731082916, + "step": 6656 + }, + { + "epoch": 1.2300470025753254, + "grad_norm": 0.07171334326267242, + "learning_rate": 1.3338005909265363e-05, + "loss": 0.46340975165367126, + "step": 6657 + }, + { + "epoch": 1.2302317792842212, + "grad_norm": 0.0713566243648529, + "learning_rate": 1.3336125348472193e-05, + "loss": 0.49299490451812744, + "step": 6658 + }, + { + "epoch": 1.230416555993117, + "grad_norm": 0.09151481091976166, + "learning_rate": 1.3334244654911677e-05, + "loss": 0.601913332939148, + "step": 6659 + }, + { + "epoch": 1.2306013327020129, + "grad_norm": 0.07149849832057953, + "learning_rate": 1.3332363828658655e-05, + "loss": 0.44596779346466064, + "step": 6660 + }, + { + "epoch": 1.2307861094109087, + "grad_norm": 0.10163896530866623, + "learning_rate": 1.3330482869787975e-05, + "loss": 0.7691639065742493, + "step": 6661 + }, + { + "epoch": 1.2309708861198045, + "grad_norm": 0.08098061382770538, + "learning_rate": 1.3328601778374497e-05, + "loss": 0.5822969675064087, + "step": 6662 + }, + { + "epoch": 1.2311556628287004, + "grad_norm": 0.08098798990249634, + "learning_rate": 1.3326720554493084e-05, + "loss": 0.6189523935317993, + "step": 6663 + }, + { + "epoch": 1.2313404395375962, + "grad_norm": 0.06337269395589828, + "learning_rate": 1.33248391982186e-05, + "loss": 0.32708972692489624, + "step": 6664 + }, + { + "epoch": 1.2315252162464922, + "grad_norm": 0.0693390890955925, + "learning_rate": 1.3322957709625916e-05, + "loss": 0.4504762887954712, + "step": 6665 + }, + { + "epoch": 1.231709992955388, + "grad_norm": 0.08594589680433273, + "learning_rate": 1.3321076088789915e-05, + "loss": 0.601592481136322, + "step": 6666 + }, + { + "epoch": 1.231894769664284, + "grad_norm": 0.10171791166067123, + "learning_rate": 1.3319194335785475e-05, + "loss": 0.7838782668113708, + "step": 6667 + }, + { + "epoch": 1.2320795463731797, + "grad_norm": 0.06850502640008926, + "learning_rate": 1.3317312450687485e-05, + "loss": 0.4453665614128113, + "step": 6668 + }, + { + "epoch": 1.2322643230820756, + "grad_norm": 0.0664345771074295, + "learning_rate": 1.3315430433570834e-05, + "loss": 0.4064602553844452, + "step": 6669 + }, + { + "epoch": 1.2324490997909714, + "grad_norm": 0.07736603915691376, + "learning_rate": 1.3313548284510432e-05, + "loss": 0.5926414728164673, + "step": 6670 + }, + { + "epoch": 1.2326338764998672, + "grad_norm": 0.05535926669836044, + "learning_rate": 1.3311666003581168e-05, + "loss": 0.4185660481452942, + "step": 6671 + }, + { + "epoch": 1.232818653208763, + "grad_norm": 0.06340336799621582, + "learning_rate": 1.330978359085796e-05, + "loss": 0.4234006702899933, + "step": 6672 + }, + { + "epoch": 1.2330034299176589, + "grad_norm": 0.07930430769920349, + "learning_rate": 1.3307901046415723e-05, + "loss": 0.5866430401802063, + "step": 6673 + }, + { + "epoch": 1.2331882066265547, + "grad_norm": 0.06652922928333282, + "learning_rate": 1.330601837032937e-05, + "loss": 0.4335622191429138, + "step": 6674 + }, + { + "epoch": 1.2333729833354505, + "grad_norm": 0.07018651813268661, + "learning_rate": 1.330413556267383e-05, + "loss": 0.5484957098960876, + "step": 6675 + }, + { + "epoch": 1.2335577600443464, + "grad_norm": 0.0835627093911171, + "learning_rate": 1.3302252623524032e-05, + "loss": 0.6306493878364563, + "step": 6676 + }, + { + "epoch": 1.2337425367532422, + "grad_norm": 0.07812164723873138, + "learning_rate": 1.3300369552954913e-05, + "loss": 0.5238651633262634, + "step": 6677 + }, + { + "epoch": 1.233927313462138, + "grad_norm": 0.07560842484235764, + "learning_rate": 1.3298486351041409e-05, + "loss": 0.5358222126960754, + "step": 6678 + }, + { + "epoch": 1.2341120901710338, + "grad_norm": 0.07144767791032791, + "learning_rate": 1.3296603017858467e-05, + "loss": 0.5098992586135864, + "step": 6679 + }, + { + "epoch": 1.2342968668799297, + "grad_norm": 0.0746634379029274, + "learning_rate": 1.3294719553481042e-05, + "loss": 0.4734437167644501, + "step": 6680 + }, + { + "epoch": 1.2344816435888255, + "grad_norm": 0.0832318589091301, + "learning_rate": 1.3292835957984082e-05, + "loss": 0.592925488948822, + "step": 6681 + }, + { + "epoch": 1.2346664202977216, + "grad_norm": 0.08652772754430771, + "learning_rate": 1.3290952231442555e-05, + "loss": 0.6108662486076355, + "step": 6682 + }, + { + "epoch": 1.2348511970066174, + "grad_norm": 0.06642487645149231, + "learning_rate": 1.3289068373931426e-05, + "loss": 0.4984731376171112, + "step": 6683 + }, + { + "epoch": 1.2350359737155132, + "grad_norm": 0.09597799926996231, + "learning_rate": 1.3287184385525667e-05, + "loss": 0.5345010757446289, + "step": 6684 + }, + { + "epoch": 1.235220750424409, + "grad_norm": 0.08704934269189835, + "learning_rate": 1.3285300266300252e-05, + "loss": 0.6655499935150146, + "step": 6685 + }, + { + "epoch": 1.2354055271333049, + "grad_norm": 0.08564785122871399, + "learning_rate": 1.3283416016330164e-05, + "loss": 0.601367175579071, + "step": 6686 + }, + { + "epoch": 1.2355903038422007, + "grad_norm": 0.07977317273616791, + "learning_rate": 1.3281531635690391e-05, + "loss": 0.43817946314811707, + "step": 6687 + }, + { + "epoch": 1.2357750805510965, + "grad_norm": 0.08475592732429504, + "learning_rate": 1.3279647124455927e-05, + "loss": 0.5711531043052673, + "step": 6688 + }, + { + "epoch": 1.2359598572599924, + "grad_norm": 0.09372538328170776, + "learning_rate": 1.3277762482701769e-05, + "loss": 0.5565690398216248, + "step": 6689 + }, + { + "epoch": 1.2361446339688882, + "grad_norm": 0.08273839950561523, + "learning_rate": 1.3275877710502918e-05, + "loss": 0.6361427307128906, + "step": 6690 + }, + { + "epoch": 1.236329410677784, + "grad_norm": 0.06452780216932297, + "learning_rate": 1.327399280793438e-05, + "loss": 0.3863506019115448, + "step": 6691 + }, + { + "epoch": 1.2365141873866798, + "grad_norm": 0.07726220041513443, + "learning_rate": 1.3272107775071176e-05, + "loss": 0.6117289662361145, + "step": 6692 + }, + { + "epoch": 1.2366989640955757, + "grad_norm": 0.06845694035291672, + "learning_rate": 1.3270222611988318e-05, + "loss": 0.4468269944190979, + "step": 6693 + }, + { + "epoch": 1.2368837408044717, + "grad_norm": 0.09642685204744339, + "learning_rate": 1.3268337318760832e-05, + "loss": 0.6524372100830078, + "step": 6694 + }, + { + "epoch": 1.2370685175133675, + "grad_norm": 0.09860479086637497, + "learning_rate": 1.3266451895463743e-05, + "loss": 0.6463438272476196, + "step": 6695 + }, + { + "epoch": 1.2372532942222634, + "grad_norm": 0.07172819972038269, + "learning_rate": 1.326456634217209e-05, + "loss": 0.5616058707237244, + "step": 6696 + }, + { + "epoch": 1.2374380709311592, + "grad_norm": 0.08177749067544937, + "learning_rate": 1.326268065896091e-05, + "loss": 0.4797169864177704, + "step": 6697 + }, + { + "epoch": 1.237622847640055, + "grad_norm": 0.06497815996408463, + "learning_rate": 1.3260794845905249e-05, + "loss": 0.45328235626220703, + "step": 6698 + }, + { + "epoch": 1.2378076243489509, + "grad_norm": 0.07354207336902618, + "learning_rate": 1.3258908903080151e-05, + "loss": 0.531097948551178, + "step": 6699 + }, + { + "epoch": 1.2379924010578467, + "grad_norm": 0.06290023028850555, + "learning_rate": 1.325702283056068e-05, + "loss": 0.4945487380027771, + "step": 6700 + }, + { + "epoch": 1.2381771777667425, + "grad_norm": 0.07945331186056137, + "learning_rate": 1.3255136628421885e-05, + "loss": 0.511954128742218, + "step": 6701 + }, + { + "epoch": 1.2383619544756383, + "grad_norm": 0.06767923384904861, + "learning_rate": 1.325325029673884e-05, + "loss": 0.440019428730011, + "step": 6702 + }, + { + "epoch": 1.2385467311845342, + "grad_norm": 0.06857583671808243, + "learning_rate": 1.3251363835586609e-05, + "loss": 0.49880003929138184, + "step": 6703 + }, + { + "epoch": 1.23873150789343, + "grad_norm": 0.08058851212263107, + "learning_rate": 1.324947724504027e-05, + "loss": 0.5239635109901428, + "step": 6704 + }, + { + "epoch": 1.2389162846023258, + "grad_norm": 0.0902724415063858, + "learning_rate": 1.3247590525174902e-05, + "loss": 0.6282240748405457, + "step": 6705 + }, + { + "epoch": 1.2391010613112217, + "grad_norm": 0.07073131948709488, + "learning_rate": 1.3245703676065594e-05, + "loss": 0.4652053713798523, + "step": 6706 + }, + { + "epoch": 1.2392858380201175, + "grad_norm": 0.08671863377094269, + "learning_rate": 1.3243816697787433e-05, + "loss": 0.6942138671875, + "step": 6707 + }, + { + "epoch": 1.2394706147290133, + "grad_norm": 0.07620774209499359, + "learning_rate": 1.3241929590415514e-05, + "loss": 0.5614061951637268, + "step": 6708 + }, + { + "epoch": 1.2396553914379091, + "grad_norm": 0.06715484708547592, + "learning_rate": 1.3240042354024944e-05, + "loss": 0.4390954077243805, + "step": 6709 + }, + { + "epoch": 1.239840168146805, + "grad_norm": 0.05757160112261772, + "learning_rate": 1.3238154988690821e-05, + "loss": 0.41786855459213257, + "step": 6710 + }, + { + "epoch": 1.240024944855701, + "grad_norm": 0.07303710281848907, + "learning_rate": 1.3236267494488262e-05, + "loss": 0.5968295335769653, + "step": 6711 + }, + { + "epoch": 1.2402097215645969, + "grad_norm": 0.09075836092233658, + "learning_rate": 1.3234379871492381e-05, + "loss": 0.704807698726654, + "step": 6712 + }, + { + "epoch": 1.2403944982734927, + "grad_norm": 0.06804577261209488, + "learning_rate": 1.3232492119778301e-05, + "loss": 0.4883617162704468, + "step": 6713 + }, + { + "epoch": 1.2405792749823885, + "grad_norm": 0.0695258155465126, + "learning_rate": 1.3230604239421148e-05, + "loss": 0.5278023481369019, + "step": 6714 + }, + { + "epoch": 1.2407640516912843, + "grad_norm": 0.07082250714302063, + "learning_rate": 1.3228716230496055e-05, + "loss": 0.4770531952381134, + "step": 6715 + }, + { + "epoch": 1.2409488284001802, + "grad_norm": 0.0864938423037529, + "learning_rate": 1.3226828093078157e-05, + "loss": 0.6933788657188416, + "step": 6716 + }, + { + "epoch": 1.241133605109076, + "grad_norm": 0.08195040374994278, + "learning_rate": 1.3224939827242596e-05, + "loss": 0.5969827175140381, + "step": 6717 + }, + { + "epoch": 1.2413183818179718, + "grad_norm": 0.07748837023973465, + "learning_rate": 1.3223051433064515e-05, + "loss": 0.6048961281776428, + "step": 6718 + }, + { + "epoch": 1.2415031585268677, + "grad_norm": 0.07402081042528152, + "learning_rate": 1.3221162910619076e-05, + "loss": 0.42801129817962646, + "step": 6719 + }, + { + "epoch": 1.2416879352357635, + "grad_norm": 0.08601423352956772, + "learning_rate": 1.321927425998143e-05, + "loss": 0.6223377585411072, + "step": 6720 + }, + { + "epoch": 1.2418727119446593, + "grad_norm": 0.0846116915345192, + "learning_rate": 1.3217385481226736e-05, + "loss": 0.5239998698234558, + "step": 6721 + }, + { + "epoch": 1.2420574886535551, + "grad_norm": 0.07802564650774002, + "learning_rate": 1.321549657443017e-05, + "loss": 0.4778412878513336, + "step": 6722 + }, + { + "epoch": 1.242242265362451, + "grad_norm": 0.09074050933122635, + "learning_rate": 1.3213607539666899e-05, + "loss": 0.6743155717849731, + "step": 6723 + }, + { + "epoch": 1.242427042071347, + "grad_norm": 0.06890838593244553, + "learning_rate": 1.3211718377012103e-05, + "loss": 0.4325670003890991, + "step": 6724 + }, + { + "epoch": 1.2426118187802428, + "grad_norm": 0.08446649461984634, + "learning_rate": 1.3209829086540964e-05, + "loss": 0.6355634331703186, + "step": 6725 + }, + { + "epoch": 1.2427965954891387, + "grad_norm": 0.07344458997249603, + "learning_rate": 1.3207939668328671e-05, + "loss": 0.5197588801383972, + "step": 6726 + }, + { + "epoch": 1.2429813721980345, + "grad_norm": 0.06373180449008942, + "learning_rate": 1.320605012245041e-05, + "loss": 0.44659337401390076, + "step": 6727 + }, + { + "epoch": 1.2431661489069303, + "grad_norm": 0.07958760857582092, + "learning_rate": 1.320416044898139e-05, + "loss": 0.6337906718254089, + "step": 6728 + }, + { + "epoch": 1.2433509256158262, + "grad_norm": 0.08500469475984573, + "learning_rate": 1.3202270647996807e-05, + "loss": 0.6289973258972168, + "step": 6729 + }, + { + "epoch": 1.243535702324722, + "grad_norm": 0.08085852116346359, + "learning_rate": 1.3200380719571868e-05, + "loss": 0.5270951986312866, + "step": 6730 + }, + { + "epoch": 1.2437204790336178, + "grad_norm": 0.08276652544736862, + "learning_rate": 1.319849066378179e-05, + "loss": 0.7250403165817261, + "step": 6731 + }, + { + "epoch": 1.2439052557425136, + "grad_norm": 0.08534059673547745, + "learning_rate": 1.3196600480701796e-05, + "loss": 0.5166981220245361, + "step": 6732 + }, + { + "epoch": 1.2440900324514095, + "grad_norm": 0.07980574667453766, + "learning_rate": 1.31947101704071e-05, + "loss": 0.5468252301216125, + "step": 6733 + }, + { + "epoch": 1.2442748091603053, + "grad_norm": 0.06491481512784958, + "learning_rate": 1.3192819732972931e-05, + "loss": 0.48686864972114563, + "step": 6734 + }, + { + "epoch": 1.2444595858692011, + "grad_norm": 0.0737755075097084, + "learning_rate": 1.3190929168474528e-05, + "loss": 0.5102857947349548, + "step": 6735 + }, + { + "epoch": 1.244644362578097, + "grad_norm": 0.07817957550287247, + "learning_rate": 1.318903847698713e-05, + "loss": 0.4815283715724945, + "step": 6736 + }, + { + "epoch": 1.2448291392869928, + "grad_norm": 0.06640022993087769, + "learning_rate": 1.3187147658585975e-05, + "loss": 0.47119250893592834, + "step": 6737 + }, + { + "epoch": 1.2450139159958886, + "grad_norm": 0.08190456032752991, + "learning_rate": 1.3185256713346315e-05, + "loss": 0.5948231220245361, + "step": 6738 + }, + { + "epoch": 1.2451986927047844, + "grad_norm": 0.06790990382432938, + "learning_rate": 1.3183365641343404e-05, + "loss": 0.42641395330429077, + "step": 6739 + }, + { + "epoch": 1.2453834694136805, + "grad_norm": 0.09158918261528015, + "learning_rate": 1.3181474442652498e-05, + "loss": 0.5970664024353027, + "step": 6740 + }, + { + "epoch": 1.2455682461225763, + "grad_norm": 0.07614406943321228, + "learning_rate": 1.3179583117348865e-05, + "loss": 0.5861613154411316, + "step": 6741 + }, + { + "epoch": 1.2457530228314722, + "grad_norm": 0.08723059296607971, + "learning_rate": 1.317769166550777e-05, + "loss": 0.6448711156845093, + "step": 6742 + }, + { + "epoch": 1.245937799540368, + "grad_norm": 0.08985978364944458, + "learning_rate": 1.3175800087204488e-05, + "loss": 0.5434716939926147, + "step": 6743 + }, + { + "epoch": 1.2461225762492638, + "grad_norm": 0.08016160130500793, + "learning_rate": 1.3173908382514298e-05, + "loss": 0.49246692657470703, + "step": 6744 + }, + { + "epoch": 1.2463073529581596, + "grad_norm": 0.07651840895414352, + "learning_rate": 1.3172016551512487e-05, + "loss": 0.5844117403030396, + "step": 6745 + }, + { + "epoch": 1.2464921296670555, + "grad_norm": 0.08612750470638275, + "learning_rate": 1.317012459427434e-05, + "loss": 0.49526458978652954, + "step": 6746 + }, + { + "epoch": 1.2466769063759513, + "grad_norm": 0.06630276143550873, + "learning_rate": 1.3168232510875152e-05, + "loss": 0.4218505024909973, + "step": 6747 + }, + { + "epoch": 1.2468616830848471, + "grad_norm": 0.06982891261577606, + "learning_rate": 1.3166340301390222e-05, + "loss": 0.43704402446746826, + "step": 6748 + }, + { + "epoch": 1.247046459793743, + "grad_norm": 0.10117150843143463, + "learning_rate": 1.3164447965894856e-05, + "loss": 0.618638277053833, + "step": 6749 + }, + { + "epoch": 1.2472312365026388, + "grad_norm": 0.07406169176101685, + "learning_rate": 1.3162555504464358e-05, + "loss": 0.47164854407310486, + "step": 6750 + }, + { + "epoch": 1.2474160132115346, + "grad_norm": 0.10184917598962784, + "learning_rate": 1.3160662917174045e-05, + "loss": 0.6744089722633362, + "step": 6751 + }, + { + "epoch": 1.2476007899204304, + "grad_norm": 0.06890112161636353, + "learning_rate": 1.3158770204099241e-05, + "loss": 0.4289524257183075, + "step": 6752 + }, + { + "epoch": 1.2477855666293265, + "grad_norm": 0.06529653072357178, + "learning_rate": 1.3156877365315264e-05, + "loss": 0.39639273285865784, + "step": 6753 + }, + { + "epoch": 1.2479703433382223, + "grad_norm": 0.0774889588356018, + "learning_rate": 1.315498440089744e-05, + "loss": 0.641357958316803, + "step": 6754 + }, + { + "epoch": 1.2481551200471181, + "grad_norm": 0.05716458335518837, + "learning_rate": 1.315309131092111e-05, + "loss": 0.3876572549343109, + "step": 6755 + }, + { + "epoch": 1.248339896756014, + "grad_norm": 0.0856190174818039, + "learning_rate": 1.3151198095461614e-05, + "loss": 0.6331546306610107, + "step": 6756 + }, + { + "epoch": 1.2485246734649098, + "grad_norm": 0.07629573345184326, + "learning_rate": 1.3149304754594287e-05, + "loss": 0.6202632784843445, + "step": 6757 + }, + { + "epoch": 1.2487094501738056, + "grad_norm": 0.09377996623516083, + "learning_rate": 1.3147411288394487e-05, + "loss": 0.733933687210083, + "step": 6758 + }, + { + "epoch": 1.2488942268827015, + "grad_norm": 0.07805003225803375, + "learning_rate": 1.3145517696937567e-05, + "loss": 0.5285007953643799, + "step": 6759 + }, + { + "epoch": 1.2490790035915973, + "grad_norm": 0.07465283572673798, + "learning_rate": 1.3143623980298879e-05, + "loss": 0.5317094326019287, + "step": 6760 + }, + { + "epoch": 1.2492637803004931, + "grad_norm": 0.08603695034980774, + "learning_rate": 1.3141730138553792e-05, + "loss": 0.7554119825363159, + "step": 6761 + }, + { + "epoch": 1.249448557009389, + "grad_norm": 0.06203283742070198, + "learning_rate": 1.3139836171777678e-05, + "loss": 0.5262657403945923, + "step": 6762 + }, + { + "epoch": 1.2496333337182848, + "grad_norm": 0.09563897550106049, + "learning_rate": 1.3137942080045906e-05, + "loss": 0.6859795451164246, + "step": 6763 + }, + { + "epoch": 1.2498181104271806, + "grad_norm": 0.07398170232772827, + "learning_rate": 1.3136047863433854e-05, + "loss": 0.5132628083229065, + "step": 6764 + }, + { + "epoch": 1.2500028871360764, + "grad_norm": 0.08611463010311127, + "learning_rate": 1.3134153522016912e-05, + "loss": 0.6254287958145142, + "step": 6765 + }, + { + "epoch": 1.2501876638449723, + "grad_norm": 0.08766376227140427, + "learning_rate": 1.3132259055870467e-05, + "loss": 0.5664530992507935, + "step": 6766 + }, + { + "epoch": 1.250372440553868, + "grad_norm": 0.08339742571115494, + "learning_rate": 1.3130364465069906e-05, + "loss": 0.5552389025688171, + "step": 6767 + }, + { + "epoch": 1.250557217262764, + "grad_norm": 0.0711752325296402, + "learning_rate": 1.3128469749690635e-05, + "loss": 0.5233983993530273, + "step": 6768 + }, + { + "epoch": 1.2507419939716597, + "grad_norm": 0.07766459137201309, + "learning_rate": 1.312657490980806e-05, + "loss": 0.6995660066604614, + "step": 6769 + }, + { + "epoch": 1.2509267706805556, + "grad_norm": 0.06316307187080383, + "learning_rate": 1.312467994549758e-05, + "loss": 0.4747420847415924, + "step": 6770 + }, + { + "epoch": 1.2511115473894516, + "grad_norm": 0.0716712549328804, + "learning_rate": 1.3122784856834615e-05, + "loss": 0.5163635611534119, + "step": 6771 + }, + { + "epoch": 1.2512963240983475, + "grad_norm": 0.07563406974077225, + "learning_rate": 1.3120889643894584e-05, + "loss": 0.5496046543121338, + "step": 6772 + }, + { + "epoch": 1.2514811008072433, + "grad_norm": 0.09463442116975784, + "learning_rate": 1.311899430675291e-05, + "loss": 0.625213086605072, + "step": 6773 + }, + { + "epoch": 1.2516658775161391, + "grad_norm": 0.06193559616804123, + "learning_rate": 1.3117098845485016e-05, + "loss": 0.404417484998703, + "step": 6774 + }, + { + "epoch": 1.251850654225035, + "grad_norm": 0.06744953989982605, + "learning_rate": 1.3115203260166345e-05, + "loss": 0.5050620436668396, + "step": 6775 + }, + { + "epoch": 1.2520354309339308, + "grad_norm": 0.06575006991624832, + "learning_rate": 1.3113307550872327e-05, + "loss": 0.4802807569503784, + "step": 6776 + }, + { + "epoch": 1.2522202076428266, + "grad_norm": 0.06637375056743622, + "learning_rate": 1.311141171767841e-05, + "loss": 0.45567330718040466, + "step": 6777 + }, + { + "epoch": 1.2524049843517224, + "grad_norm": 0.07214214652776718, + "learning_rate": 1.310951576066004e-05, + "loss": 0.5020220279693604, + "step": 6778 + }, + { + "epoch": 1.2525897610606183, + "grad_norm": 0.06326988339424133, + "learning_rate": 1.3107619679892676e-05, + "loss": 0.40720850229263306, + "step": 6779 + }, + { + "epoch": 1.252774537769514, + "grad_norm": 0.09027061611413956, + "learning_rate": 1.3105723475451765e-05, + "loss": 0.5622882843017578, + "step": 6780 + }, + { + "epoch": 1.25295931447841, + "grad_norm": 0.07883419841527939, + "learning_rate": 1.3103827147412781e-05, + "loss": 0.600059986114502, + "step": 6781 + }, + { + "epoch": 1.253144091187306, + "grad_norm": 0.07642436027526855, + "learning_rate": 1.3101930695851186e-05, + "loss": 0.4871520698070526, + "step": 6782 + }, + { + "epoch": 1.2533288678962018, + "grad_norm": 0.07144514471292496, + "learning_rate": 1.3100034120842453e-05, + "loss": 0.3252977132797241, + "step": 6783 + }, + { + "epoch": 1.2535136446050976, + "grad_norm": 0.07730413973331451, + "learning_rate": 1.309813742246206e-05, + "loss": 0.5172085762023926, + "step": 6784 + }, + { + "epoch": 1.2536984213139934, + "grad_norm": 0.0767231211066246, + "learning_rate": 1.309624060078549e-05, + "loss": 0.551487147808075, + "step": 6785 + }, + { + "epoch": 1.2538831980228893, + "grad_norm": 0.05568498373031616, + "learning_rate": 1.3094343655888233e-05, + "loss": 0.3281753957271576, + "step": 6786 + }, + { + "epoch": 1.254067974731785, + "grad_norm": 0.08305204659700394, + "learning_rate": 1.309244658784578e-05, + "loss": 0.6095369458198547, + "step": 6787 + }, + { + "epoch": 1.254252751440681, + "grad_norm": 0.06953762471675873, + "learning_rate": 1.3090549396733626e-05, + "loss": 0.5702585577964783, + "step": 6788 + }, + { + "epoch": 1.2544375281495768, + "grad_norm": 0.06709403544664383, + "learning_rate": 1.3088652082627276e-05, + "loss": 0.5290798544883728, + "step": 6789 + }, + { + "epoch": 1.2546223048584726, + "grad_norm": 0.0815216526389122, + "learning_rate": 1.3086754645602235e-05, + "loss": 0.5775642395019531, + "step": 6790 + }, + { + "epoch": 1.2548070815673684, + "grad_norm": 0.07735057175159454, + "learning_rate": 1.308485708573402e-05, + "loss": 0.5133861899375916, + "step": 6791 + }, + { + "epoch": 1.2549918582762642, + "grad_norm": 0.07463232427835464, + "learning_rate": 1.3082959403098139e-05, + "loss": 0.4989258050918579, + "step": 6792 + }, + { + "epoch": 1.25517663498516, + "grad_norm": 0.07572955638170242, + "learning_rate": 1.3081061597770124e-05, + "loss": 0.5668498277664185, + "step": 6793 + }, + { + "epoch": 1.255361411694056, + "grad_norm": 0.0815187320113182, + "learning_rate": 1.3079163669825495e-05, + "loss": 0.5903770327568054, + "step": 6794 + }, + { + "epoch": 1.2555461884029517, + "grad_norm": 0.08574850112199783, + "learning_rate": 1.3077265619339783e-05, + "loss": 0.7418274283409119, + "step": 6795 + }, + { + "epoch": 1.2557309651118476, + "grad_norm": 0.068257175385952, + "learning_rate": 1.307536744638853e-05, + "loss": 0.47835734486579895, + "step": 6796 + }, + { + "epoch": 1.2559157418207434, + "grad_norm": 0.0685720145702362, + "learning_rate": 1.3073469151047272e-05, + "loss": 0.415475994348526, + "step": 6797 + }, + { + "epoch": 1.2561005185296392, + "grad_norm": 0.06705527752637863, + "learning_rate": 1.3071570733391558e-05, + "loss": 0.5454516410827637, + "step": 6798 + }, + { + "epoch": 1.256285295238535, + "grad_norm": 0.06844533234834671, + "learning_rate": 1.3069672193496938e-05, + "loss": 0.4502028822898865, + "step": 6799 + }, + { + "epoch": 1.256470071947431, + "grad_norm": 0.06797377020120621, + "learning_rate": 1.306777353143897e-05, + "loss": 0.4507002532482147, + "step": 6800 + }, + { + "epoch": 1.256654848656327, + "grad_norm": 0.07801464200019836, + "learning_rate": 1.3065874747293212e-05, + "loss": 0.490688294172287, + "step": 6801 + }, + { + "epoch": 1.2568396253652228, + "grad_norm": 0.09060056507587433, + "learning_rate": 1.3063975841135232e-05, + "loss": 0.6962713003158569, + "step": 6802 + }, + { + "epoch": 1.2570244020741186, + "grad_norm": 0.09092527627944946, + "learning_rate": 1.3062076813040601e-05, + "loss": 0.617733359336853, + "step": 6803 + }, + { + "epoch": 1.2572091787830144, + "grad_norm": 0.0821225643157959, + "learning_rate": 1.306017766308489e-05, + "loss": 0.6366010904312134, + "step": 6804 + }, + { + "epoch": 1.2573939554919102, + "grad_norm": 0.0752747431397438, + "learning_rate": 1.3058278391343682e-05, + "loss": 0.6357575058937073, + "step": 6805 + }, + { + "epoch": 1.257578732200806, + "grad_norm": 0.06302738189697266, + "learning_rate": 1.3056378997892565e-05, + "loss": 0.43746256828308105, + "step": 6806 + }, + { + "epoch": 1.257763508909702, + "grad_norm": 0.07448502629995346, + "learning_rate": 1.3054479482807122e-05, + "loss": 0.5271299481391907, + "step": 6807 + }, + { + "epoch": 1.2579482856185977, + "grad_norm": 0.07659098505973816, + "learning_rate": 1.3052579846162957e-05, + "loss": 0.580751359462738, + "step": 6808 + }, + { + "epoch": 1.2581330623274936, + "grad_norm": 0.08074422180652618, + "learning_rate": 1.3050680088035658e-05, + "loss": 0.4916374087333679, + "step": 6809 + }, + { + "epoch": 1.2583178390363894, + "grad_norm": 0.05657816305756569, + "learning_rate": 1.304878020850084e-05, + "loss": 0.40220433473587036, + "step": 6810 + }, + { + "epoch": 1.2585026157452854, + "grad_norm": 0.0831356793642044, + "learning_rate": 1.3046880207634109e-05, + "loss": 0.6043918132781982, + "step": 6811 + }, + { + "epoch": 1.2586873924541813, + "grad_norm": 0.09046194702386856, + "learning_rate": 1.3044980085511076e-05, + "loss": 0.6078789830207825, + "step": 6812 + }, + { + "epoch": 1.258872169163077, + "grad_norm": 0.06054454669356346, + "learning_rate": 1.3043079842207363e-05, + "loss": 0.40452149510383606, + "step": 6813 + }, + { + "epoch": 1.259056945871973, + "grad_norm": 0.06690578907728195, + "learning_rate": 1.3041179477798593e-05, + "loss": 0.5067480206489563, + "step": 6814 + }, + { + "epoch": 1.2592417225808687, + "grad_norm": 0.079596608877182, + "learning_rate": 1.3039278992360393e-05, + "loss": 0.5522646903991699, + "step": 6815 + }, + { + "epoch": 1.2594264992897646, + "grad_norm": 0.0601852685213089, + "learning_rate": 1.30373783859684e-05, + "loss": 0.41679537296295166, + "step": 6816 + }, + { + "epoch": 1.2596112759986604, + "grad_norm": 0.0934712216258049, + "learning_rate": 1.3035477658698247e-05, + "loss": 0.6759485602378845, + "step": 6817 + }, + { + "epoch": 1.2597960527075562, + "grad_norm": 0.09387373924255371, + "learning_rate": 1.3033576810625583e-05, + "loss": 0.7017277479171753, + "step": 6818 + }, + { + "epoch": 1.259980829416452, + "grad_norm": 0.10304391384124756, + "learning_rate": 1.3031675841826052e-05, + "loss": 0.7227799892425537, + "step": 6819 + }, + { + "epoch": 1.260165606125348, + "grad_norm": 0.051855817437171936, + "learning_rate": 1.3029774752375307e-05, + "loss": 0.32178065180778503, + "step": 6820 + }, + { + "epoch": 1.2603503828342437, + "grad_norm": 0.08134342730045319, + "learning_rate": 1.3027873542349005e-05, + "loss": 0.5557973980903625, + "step": 6821 + }, + { + "epoch": 1.2605351595431395, + "grad_norm": 0.07952242344617844, + "learning_rate": 1.302597221182281e-05, + "loss": 0.6073988080024719, + "step": 6822 + }, + { + "epoch": 1.2607199362520354, + "grad_norm": 0.0611259751021862, + "learning_rate": 1.3024070760872389e-05, + "loss": 0.42082422971725464, + "step": 6823 + }, + { + "epoch": 1.2609047129609312, + "grad_norm": 0.07557714730501175, + "learning_rate": 1.3022169189573411e-05, + "loss": 0.5456272959709167, + "step": 6824 + }, + { + "epoch": 1.261089489669827, + "grad_norm": 0.08217813819646835, + "learning_rate": 1.3020267498001555e-05, + "loss": 0.536934494972229, + "step": 6825 + }, + { + "epoch": 1.2612742663787229, + "grad_norm": 0.06710366904735565, + "learning_rate": 1.3018365686232502e-05, + "loss": 0.5903006792068481, + "step": 6826 + }, + { + "epoch": 1.2614590430876187, + "grad_norm": 0.06867175549268723, + "learning_rate": 1.3016463754341936e-05, + "loss": 0.45960867404937744, + "step": 6827 + }, + { + "epoch": 1.2616438197965145, + "grad_norm": 0.09854653477668762, + "learning_rate": 1.3014561702405552e-05, + "loss": 0.62856525182724, + "step": 6828 + }, + { + "epoch": 1.2618285965054106, + "grad_norm": 0.08738111704587936, + "learning_rate": 1.3012659530499043e-05, + "loss": 0.5031391382217407, + "step": 6829 + }, + { + "epoch": 1.2620133732143064, + "grad_norm": 0.06079642102122307, + "learning_rate": 1.3010757238698108e-05, + "loss": 0.38979560136795044, + "step": 6830 + }, + { + "epoch": 1.2621981499232022, + "grad_norm": 0.06816396117210388, + "learning_rate": 1.3008854827078458e-05, + "loss": 0.4951815605163574, + "step": 6831 + }, + { + "epoch": 1.262382926632098, + "grad_norm": 0.07417619228363037, + "learning_rate": 1.3006952295715802e-05, + "loss": 0.4765762686729431, + "step": 6832 + }, + { + "epoch": 1.2625677033409939, + "grad_norm": 0.06951801478862762, + "learning_rate": 1.3005049644685847e-05, + "loss": 0.4217744469642639, + "step": 6833 + }, + { + "epoch": 1.2627524800498897, + "grad_norm": 0.08772267401218414, + "learning_rate": 1.3003146874064317e-05, + "loss": 0.637980043888092, + "step": 6834 + }, + { + "epoch": 1.2629372567587855, + "grad_norm": 0.07551013678312302, + "learning_rate": 1.300124398392694e-05, + "loss": 0.529155433177948, + "step": 6835 + }, + { + "epoch": 1.2631220334676814, + "grad_norm": 0.08979663252830505, + "learning_rate": 1.2999340974349442e-05, + "loss": 0.6354886889457703, + "step": 6836 + }, + { + "epoch": 1.2633068101765772, + "grad_norm": 0.06971348822116852, + "learning_rate": 1.2997437845407555e-05, + "loss": 0.5855482816696167, + "step": 6837 + }, + { + "epoch": 1.263491586885473, + "grad_norm": 0.07374582439661026, + "learning_rate": 1.2995534597177023e-05, + "loss": 0.4582844376564026, + "step": 6838 + }, + { + "epoch": 1.2636763635943689, + "grad_norm": 0.09206511825323105, + "learning_rate": 1.2993631229733584e-05, + "loss": 0.6397450566291809, + "step": 6839 + }, + { + "epoch": 1.263861140303265, + "grad_norm": 0.0784001424908638, + "learning_rate": 1.2991727743152983e-05, + "loss": 0.5534389019012451, + "step": 6840 + }, + { + "epoch": 1.2640459170121607, + "grad_norm": 0.0904441550374031, + "learning_rate": 1.2989824137510984e-05, + "loss": 0.8288828134536743, + "step": 6841 + }, + { + "epoch": 1.2642306937210566, + "grad_norm": 0.09138955175876617, + "learning_rate": 1.2987920412883336e-05, + "loss": 0.6156802177429199, + "step": 6842 + }, + { + "epoch": 1.2644154704299524, + "grad_norm": 0.09530110657215118, + "learning_rate": 1.2986016569345806e-05, + "loss": 0.6880958080291748, + "step": 6843 + }, + { + "epoch": 1.2646002471388482, + "grad_norm": 0.08604827523231506, + "learning_rate": 1.2984112606974155e-05, + "loss": 0.6318317651748657, + "step": 6844 + }, + { + "epoch": 1.264785023847744, + "grad_norm": 0.07958484441041946, + "learning_rate": 1.298220852584416e-05, + "loss": 0.4509182870388031, + "step": 6845 + }, + { + "epoch": 1.2649698005566399, + "grad_norm": 0.07328151166439056, + "learning_rate": 1.2980304326031593e-05, + "loss": 0.5359677672386169, + "step": 6846 + }, + { + "epoch": 1.2651545772655357, + "grad_norm": 0.06363991647958755, + "learning_rate": 1.2978400007612242e-05, + "loss": 0.46096542477607727, + "step": 6847 + }, + { + "epoch": 1.2653393539744315, + "grad_norm": 0.08119732141494751, + "learning_rate": 1.2976495570661888e-05, + "loss": 0.49483710527420044, + "step": 6848 + }, + { + "epoch": 1.2655241306833274, + "grad_norm": 0.05809759348630905, + "learning_rate": 1.2974591015256324e-05, + "loss": 0.41567057371139526, + "step": 6849 + }, + { + "epoch": 1.2657089073922232, + "grad_norm": 0.059295497834682465, + "learning_rate": 1.2972686341471338e-05, + "loss": 0.419766366481781, + "step": 6850 + }, + { + "epoch": 1.265893684101119, + "grad_norm": 0.07198849320411682, + "learning_rate": 1.2970781549382743e-05, + "loss": 0.46864405274391174, + "step": 6851 + }, + { + "epoch": 1.2660784608100148, + "grad_norm": 0.06495899707078934, + "learning_rate": 1.2968876639066335e-05, + "loss": 0.36471015214920044, + "step": 6852 + }, + { + "epoch": 1.2662632375189107, + "grad_norm": 0.07884179800748825, + "learning_rate": 1.2966971610597922e-05, + "loss": 0.4284929037094116, + "step": 6853 + }, + { + "epoch": 1.2664480142278065, + "grad_norm": 0.06695716083049774, + "learning_rate": 1.2965066464053323e-05, + "loss": 0.42951127886772156, + "step": 6854 + }, + { + "epoch": 1.2666327909367023, + "grad_norm": 0.10632362216711044, + "learning_rate": 1.2963161199508356e-05, + "loss": 0.6579766273498535, + "step": 6855 + }, + { + "epoch": 1.2668175676455982, + "grad_norm": 0.05297665670514107, + "learning_rate": 1.2961255817038842e-05, + "loss": 0.3616389334201813, + "step": 6856 + }, + { + "epoch": 1.267002344354494, + "grad_norm": 0.08395391702651978, + "learning_rate": 1.2959350316720613e-05, + "loss": 0.6917855143547058, + "step": 6857 + }, + { + "epoch": 1.26718712106339, + "grad_norm": 0.09088056534528732, + "learning_rate": 1.29574446986295e-05, + "loss": 0.6455844640731812, + "step": 6858 + }, + { + "epoch": 1.2673718977722859, + "grad_norm": 0.10937219858169556, + "learning_rate": 1.295553896284134e-05, + "loss": 0.7937737703323364, + "step": 6859 + }, + { + "epoch": 1.2675566744811817, + "grad_norm": 0.09938866645097733, + "learning_rate": 1.2953633109431975e-05, + "loss": 0.7208584547042847, + "step": 6860 + }, + { + "epoch": 1.2677414511900775, + "grad_norm": 0.06810204684734344, + "learning_rate": 1.2951727138477255e-05, + "loss": 0.3876679539680481, + "step": 6861 + }, + { + "epoch": 1.2679262278989734, + "grad_norm": 0.07531758397817612, + "learning_rate": 1.294982105005303e-05, + "loss": 0.5154255628585815, + "step": 6862 + }, + { + "epoch": 1.2681110046078692, + "grad_norm": 0.08114465326070786, + "learning_rate": 1.2947914844235154e-05, + "loss": 0.4871000349521637, + "step": 6863 + }, + { + "epoch": 1.268295781316765, + "grad_norm": 0.09290967136621475, + "learning_rate": 1.2946008521099488e-05, + "loss": 0.678450882434845, + "step": 6864 + }, + { + "epoch": 1.2684805580256608, + "grad_norm": 0.05580367147922516, + "learning_rate": 1.2944102080721905e-05, + "loss": 0.36511459946632385, + "step": 6865 + }, + { + "epoch": 1.2686653347345567, + "grad_norm": 0.0826374813914299, + "learning_rate": 1.2942195523178268e-05, + "loss": 0.6721389293670654, + "step": 6866 + }, + { + "epoch": 1.2688501114434525, + "grad_norm": 0.08435200899839401, + "learning_rate": 1.2940288848544451e-05, + "loss": 0.5716503262519836, + "step": 6867 + }, + { + "epoch": 1.2690348881523483, + "grad_norm": 0.08023706823587418, + "learning_rate": 1.2938382056896342e-05, + "loss": 0.5046827793121338, + "step": 6868 + }, + { + "epoch": 1.2692196648612444, + "grad_norm": 0.06476599723100662, + "learning_rate": 1.293647514830982e-05, + "loss": 0.47997936606407166, + "step": 6869 + }, + { + "epoch": 1.2694044415701402, + "grad_norm": 0.06006301939487457, + "learning_rate": 1.2934568122860766e-05, + "loss": 0.44025835394859314, + "step": 6870 + }, + { + "epoch": 1.269589218279036, + "grad_norm": 0.0865289717912674, + "learning_rate": 1.293266098062509e-05, + "loss": 0.6664592623710632, + "step": 6871 + }, + { + "epoch": 1.2697739949879319, + "grad_norm": 0.0914430171251297, + "learning_rate": 1.2930753721678681e-05, + "loss": 0.6752035021781921, + "step": 6872 + }, + { + "epoch": 1.2699587716968277, + "grad_norm": 0.07003988325595856, + "learning_rate": 1.2928846346097442e-05, + "loss": 0.5116729140281677, + "step": 6873 + }, + { + "epoch": 1.2701435484057235, + "grad_norm": 0.0638568103313446, + "learning_rate": 1.2926938853957278e-05, + "loss": 0.38800185918807983, + "step": 6874 + }, + { + "epoch": 1.2703283251146193, + "grad_norm": 0.0729961097240448, + "learning_rate": 1.2925031245334112e-05, + "loss": 0.38727983832359314, + "step": 6875 + }, + { + "epoch": 1.2705131018235152, + "grad_norm": 0.08079040795564651, + "learning_rate": 1.2923123520303848e-05, + "loss": 0.5495513677597046, + "step": 6876 + }, + { + "epoch": 1.270697878532411, + "grad_norm": 0.06676933914422989, + "learning_rate": 1.2921215678942413e-05, + "loss": 0.30596923828125, + "step": 6877 + }, + { + "epoch": 1.2708826552413068, + "grad_norm": 0.0731934905052185, + "learning_rate": 1.2919307721325737e-05, + "loss": 0.504347562789917, + "step": 6878 + }, + { + "epoch": 1.2710674319502027, + "grad_norm": 0.08271405100822449, + "learning_rate": 1.2917399647529747e-05, + "loss": 0.6242300868034363, + "step": 6879 + }, + { + "epoch": 1.2712522086590985, + "grad_norm": 0.08044064044952393, + "learning_rate": 1.2915491457630376e-05, + "loss": 0.6209437847137451, + "step": 6880 + }, + { + "epoch": 1.2714369853679943, + "grad_norm": 0.06818465143442154, + "learning_rate": 1.2913583151703567e-05, + "loss": 0.5452882051467896, + "step": 6881 + }, + { + "epoch": 1.2716217620768901, + "grad_norm": 0.06986252218484879, + "learning_rate": 1.2911674729825264e-05, + "loss": 0.4770204722881317, + "step": 6882 + }, + { + "epoch": 1.271806538785786, + "grad_norm": 0.07381994277238846, + "learning_rate": 1.2909766192071416e-05, + "loss": 0.49883171916007996, + "step": 6883 + }, + { + "epoch": 1.2719913154946818, + "grad_norm": 0.09339702874422073, + "learning_rate": 1.2907857538517976e-05, + "loss": 0.7252719402313232, + "step": 6884 + }, + { + "epoch": 1.2721760922035776, + "grad_norm": 0.08802812546491623, + "learning_rate": 1.2905948769240905e-05, + "loss": 0.5160189867019653, + "step": 6885 + }, + { + "epoch": 1.2723608689124735, + "grad_norm": 0.11236675083637238, + "learning_rate": 1.2904039884316163e-05, + "loss": 0.7686521410942078, + "step": 6886 + }, + { + "epoch": 1.2725456456213693, + "grad_norm": 0.07604110985994339, + "learning_rate": 1.2902130883819724e-05, + "loss": 0.5760014653205872, + "step": 6887 + }, + { + "epoch": 1.2727304223302653, + "grad_norm": 0.08620353788137436, + "learning_rate": 1.290022176782755e-05, + "loss": 0.5739667415618896, + "step": 6888 + }, + { + "epoch": 1.2729151990391612, + "grad_norm": 0.061291273683309555, + "learning_rate": 1.2898312536415628e-05, + "loss": 0.4189784824848175, + "step": 6889 + }, + { + "epoch": 1.273099975748057, + "grad_norm": 0.0792623907327652, + "learning_rate": 1.2896403189659929e-05, + "loss": 0.5867186188697815, + "step": 6890 + }, + { + "epoch": 1.2732847524569528, + "grad_norm": 0.06291884183883667, + "learning_rate": 1.2894493727636448e-05, + "loss": 0.3795328140258789, + "step": 6891 + }, + { + "epoch": 1.2734695291658487, + "grad_norm": 0.0688159242272377, + "learning_rate": 1.2892584150421175e-05, + "loss": 0.39740902185440063, + "step": 6892 + }, + { + "epoch": 1.2736543058747445, + "grad_norm": 0.08040191978216171, + "learning_rate": 1.2890674458090098e-05, + "loss": 0.6047346591949463, + "step": 6893 + }, + { + "epoch": 1.2738390825836403, + "grad_norm": 0.0698571503162384, + "learning_rate": 1.288876465071922e-05, + "loss": 0.4499545097351074, + "step": 6894 + }, + { + "epoch": 1.2740238592925361, + "grad_norm": 0.07487684488296509, + "learning_rate": 1.2886854728384552e-05, + "loss": 0.565583348274231, + "step": 6895 + }, + { + "epoch": 1.274208636001432, + "grad_norm": 0.06601119041442871, + "learning_rate": 1.2884944691162096e-05, + "loss": 0.44833654165267944, + "step": 6896 + }, + { + "epoch": 1.2743934127103278, + "grad_norm": 0.08068963885307312, + "learning_rate": 1.2883034539127865e-05, + "loss": 0.6059198975563049, + "step": 6897 + }, + { + "epoch": 1.2745781894192236, + "grad_norm": 0.0770842581987381, + "learning_rate": 1.2881124272357881e-05, + "loss": 0.44424009323120117, + "step": 6898 + }, + { + "epoch": 1.2747629661281197, + "grad_norm": 0.0816042497754097, + "learning_rate": 1.2879213890928166e-05, + "loss": 0.5663117170333862, + "step": 6899 + }, + { + "epoch": 1.2749477428370155, + "grad_norm": 0.05652960389852524, + "learning_rate": 1.2877303394914744e-05, + "loss": 0.47078806161880493, + "step": 6900 + }, + { + "epoch": 1.2751325195459113, + "grad_norm": 0.08161277323961258, + "learning_rate": 1.2875392784393648e-05, + "loss": 0.5621728301048279, + "step": 6901 + }, + { + "epoch": 1.2753172962548072, + "grad_norm": 0.08607976138591766, + "learning_rate": 1.2873482059440915e-05, + "loss": 0.7239657640457153, + "step": 6902 + }, + { + "epoch": 1.275502072963703, + "grad_norm": 0.0837235152721405, + "learning_rate": 1.2871571220132589e-05, + "loss": 0.5190312266349792, + "step": 6903 + }, + { + "epoch": 1.2756868496725988, + "grad_norm": 0.09978122264146805, + "learning_rate": 1.2869660266544713e-05, + "loss": 0.555137574672699, + "step": 6904 + }, + { + "epoch": 1.2758716263814947, + "grad_norm": 0.06831305474042892, + "learning_rate": 1.2867749198753333e-05, + "loss": 0.41242703795433044, + "step": 6905 + }, + { + "epoch": 1.2760564030903905, + "grad_norm": 0.07195712625980377, + "learning_rate": 1.2865838016834506e-05, + "loss": 0.557858407497406, + "step": 6906 + }, + { + "epoch": 1.2762411797992863, + "grad_norm": 0.08062034100294113, + "learning_rate": 1.2863926720864295e-05, + "loss": 0.45327404141426086, + "step": 6907 + }, + { + "epoch": 1.2764259565081821, + "grad_norm": 0.07121732085943222, + "learning_rate": 1.2862015310918759e-05, + "loss": 0.5176864862442017, + "step": 6908 + }, + { + "epoch": 1.276610733217078, + "grad_norm": 0.06773148477077484, + "learning_rate": 1.2860103787073969e-05, + "loss": 0.5205627083778381, + "step": 6909 + }, + { + "epoch": 1.2767955099259738, + "grad_norm": 0.05887208133935928, + "learning_rate": 1.2858192149405997e-05, + "loss": 0.3680979013442993, + "step": 6910 + }, + { + "epoch": 1.2769802866348696, + "grad_norm": 0.08620485663414001, + "learning_rate": 1.2856280397990917e-05, + "loss": 0.6715238690376282, + "step": 6911 + }, + { + "epoch": 1.2771650633437654, + "grad_norm": 0.058001063764095306, + "learning_rate": 1.2854368532904815e-05, + "loss": 0.3583117723464966, + "step": 6912 + }, + { + "epoch": 1.2773498400526613, + "grad_norm": 0.09623677283525467, + "learning_rate": 1.2852456554223775e-05, + "loss": 0.5942349433898926, + "step": 6913 + }, + { + "epoch": 1.277534616761557, + "grad_norm": 0.06850364059209824, + "learning_rate": 1.2850544462023891e-05, + "loss": 0.428526371717453, + "step": 6914 + }, + { + "epoch": 1.277719393470453, + "grad_norm": 0.06925079971551895, + "learning_rate": 1.284863225638125e-05, + "loss": 0.4980008006095886, + "step": 6915 + }, + { + "epoch": 1.2779041701793488, + "grad_norm": 0.05816519632935524, + "learning_rate": 1.2846719937371961e-05, + "loss": 0.3704940676689148, + "step": 6916 + }, + { + "epoch": 1.2780889468882448, + "grad_norm": 0.06680703908205032, + "learning_rate": 1.2844807505072125e-05, + "loss": 0.43866413831710815, + "step": 6917 + }, + { + "epoch": 1.2782737235971406, + "grad_norm": 0.058567631989717484, + "learning_rate": 1.284289495955785e-05, + "loss": 0.37732750177383423, + "step": 6918 + }, + { + "epoch": 1.2784585003060365, + "grad_norm": 0.07167914509773254, + "learning_rate": 1.2840982300905246e-05, + "loss": 0.44376444816589355, + "step": 6919 + }, + { + "epoch": 1.2786432770149323, + "grad_norm": 0.08945298194885254, + "learning_rate": 1.2839069529190441e-05, + "loss": 0.5941975712776184, + "step": 6920 + }, + { + "epoch": 1.2788280537238281, + "grad_norm": 0.08231666684150696, + "learning_rate": 1.283715664448955e-05, + "loss": 0.4978278875350952, + "step": 6921 + }, + { + "epoch": 1.279012830432724, + "grad_norm": 0.08088124543428421, + "learning_rate": 1.2835243646878699e-05, + "loss": 0.6616016030311584, + "step": 6922 + }, + { + "epoch": 1.2791976071416198, + "grad_norm": 0.07822462916374207, + "learning_rate": 1.283333053643402e-05, + "loss": 0.5347031950950623, + "step": 6923 + }, + { + "epoch": 1.2793823838505156, + "grad_norm": 0.05929892882704735, + "learning_rate": 1.2831417313231653e-05, + "loss": 0.38059520721435547, + "step": 6924 + }, + { + "epoch": 1.2795671605594114, + "grad_norm": 0.09595592319965363, + "learning_rate": 1.2829503977347734e-05, + "loss": 0.5843982696533203, + "step": 6925 + }, + { + "epoch": 1.2797519372683073, + "grad_norm": 0.08812601119279861, + "learning_rate": 1.2827590528858409e-05, + "loss": 0.5967447757720947, + "step": 6926 + }, + { + "epoch": 1.279936713977203, + "grad_norm": 0.06501595675945282, + "learning_rate": 1.2825676967839828e-05, + "loss": 0.4090256989002228, + "step": 6927 + }, + { + "epoch": 1.2801214906860992, + "grad_norm": 0.059129539877176285, + "learning_rate": 1.2823763294368145e-05, + "loss": 0.4044893980026245, + "step": 6928 + }, + { + "epoch": 1.280306267394995, + "grad_norm": 0.09271105378866196, + "learning_rate": 1.2821849508519513e-05, + "loss": 0.6913366317749023, + "step": 6929 + }, + { + "epoch": 1.2804910441038908, + "grad_norm": 0.07255643606185913, + "learning_rate": 1.2819935610370102e-05, + "loss": 0.4509837031364441, + "step": 6930 + }, + { + "epoch": 1.2806758208127866, + "grad_norm": 0.0755506157875061, + "learning_rate": 1.2818021599996079e-05, + "loss": 0.5539512038230896, + "step": 6931 + }, + { + "epoch": 1.2808605975216825, + "grad_norm": 0.051255643367767334, + "learning_rate": 1.2816107477473607e-05, + "loss": 0.3722156882286072, + "step": 6932 + }, + { + "epoch": 1.2810453742305783, + "grad_norm": 0.06447292864322662, + "learning_rate": 1.281419324287887e-05, + "loss": 0.46363601088523865, + "step": 6933 + }, + { + "epoch": 1.2812301509394741, + "grad_norm": 0.09020563215017319, + "learning_rate": 1.2812278896288048e-05, + "loss": 0.692448616027832, + "step": 6934 + }, + { + "epoch": 1.28141492764837, + "grad_norm": 0.06983469426631927, + "learning_rate": 1.2810364437777324e-05, + "loss": 0.40606072545051575, + "step": 6935 + }, + { + "epoch": 1.2815997043572658, + "grad_norm": 0.0692342072725296, + "learning_rate": 1.2808449867422885e-05, + "loss": 0.5313026905059814, + "step": 6936 + }, + { + "epoch": 1.2817844810661616, + "grad_norm": 0.06339572370052338, + "learning_rate": 1.2806535185300931e-05, + "loss": 0.4123586118221283, + "step": 6937 + }, + { + "epoch": 1.2819692577750574, + "grad_norm": 0.08292673528194427, + "learning_rate": 1.2804620391487658e-05, + "loss": 0.4507513642311096, + "step": 6938 + }, + { + "epoch": 1.2821540344839533, + "grad_norm": 0.05868062749505043, + "learning_rate": 1.2802705486059264e-05, + "loss": 0.4681969881057739, + "step": 6939 + }, + { + "epoch": 1.282338811192849, + "grad_norm": 0.07713848352432251, + "learning_rate": 1.2800790469091964e-05, + "loss": 0.5836921334266663, + "step": 6940 + }, + { + "epoch": 1.282523587901745, + "grad_norm": 0.06986226886510849, + "learning_rate": 1.2798875340661964e-05, + "loss": 0.5518940687179565, + "step": 6941 + }, + { + "epoch": 1.2827083646106407, + "grad_norm": 0.08023731410503387, + "learning_rate": 1.2796960100845483e-05, + "loss": 0.5069800615310669, + "step": 6942 + }, + { + "epoch": 1.2828931413195366, + "grad_norm": 0.08042874932289124, + "learning_rate": 1.2795044749718737e-05, + "loss": 0.5868576169013977, + "step": 6943 + }, + { + "epoch": 1.2830779180284324, + "grad_norm": 0.07451844215393066, + "learning_rate": 1.2793129287357959e-05, + "loss": 0.5001909732818604, + "step": 6944 + }, + { + "epoch": 1.2832626947373282, + "grad_norm": 0.05197039991617203, + "learning_rate": 1.2791213713839374e-05, + "loss": 0.3045160472393036, + "step": 6945 + }, + { + "epoch": 1.2834474714462243, + "grad_norm": 0.07311443239450455, + "learning_rate": 1.2789298029239212e-05, + "loss": 0.5387001633644104, + "step": 6946 + }, + { + "epoch": 1.2836322481551201, + "grad_norm": 0.07509905844926834, + "learning_rate": 1.2787382233633718e-05, + "loss": 0.5643014311790466, + "step": 6947 + }, + { + "epoch": 1.283817024864016, + "grad_norm": 0.060981862246990204, + "learning_rate": 1.2785466327099132e-05, + "loss": 0.4061810076236725, + "step": 6948 + }, + { + "epoch": 1.2840018015729118, + "grad_norm": 0.07428246736526489, + "learning_rate": 1.2783550309711696e-05, + "loss": 0.6092492341995239, + "step": 6949 + }, + { + "epoch": 1.2841865782818076, + "grad_norm": 0.08736100047826767, + "learning_rate": 1.2781634181547671e-05, + "loss": 0.6303153038024902, + "step": 6950 + }, + { + "epoch": 1.2843713549907034, + "grad_norm": 0.07647480070590973, + "learning_rate": 1.277971794268331e-05, + "loss": 0.6002855896949768, + "step": 6951 + }, + { + "epoch": 1.2845561316995993, + "grad_norm": 0.06566685438156128, + "learning_rate": 1.2777801593194865e-05, + "loss": 0.432452529668808, + "step": 6952 + }, + { + "epoch": 1.284740908408495, + "grad_norm": 0.05242609977722168, + "learning_rate": 1.2775885133158612e-05, + "loss": 0.35408815741539, + "step": 6953 + }, + { + "epoch": 1.284925685117391, + "grad_norm": 0.06851579248905182, + "learning_rate": 1.2773968562650816e-05, + "loss": 0.45086902379989624, + "step": 6954 + }, + { + "epoch": 1.2851104618262867, + "grad_norm": 0.05527650564908981, + "learning_rate": 1.277205188174775e-05, + "loss": 0.3405957520008087, + "step": 6955 + }, + { + "epoch": 1.2852952385351826, + "grad_norm": 0.06148513779044151, + "learning_rate": 1.2770135090525683e-05, + "loss": 0.44460922479629517, + "step": 6956 + }, + { + "epoch": 1.2854800152440786, + "grad_norm": 0.06990844756364822, + "learning_rate": 1.2768218189060915e-05, + "loss": 0.4537244737148285, + "step": 6957 + }, + { + "epoch": 1.2856647919529745, + "grad_norm": 0.07241354137659073, + "learning_rate": 1.2766301177429722e-05, + "loss": 0.6087204813957214, + "step": 6958 + }, + { + "epoch": 1.2858495686618703, + "grad_norm": 0.06654617190361023, + "learning_rate": 1.2764384055708394e-05, + "loss": 0.44286319613456726, + "step": 6959 + }, + { + "epoch": 1.286034345370766, + "grad_norm": 0.056730449199676514, + "learning_rate": 1.2762466823973231e-05, + "loss": 0.4290338456630707, + "step": 6960 + }, + { + "epoch": 1.286219122079662, + "grad_norm": 0.07880526781082153, + "learning_rate": 1.2760549482300535e-05, + "loss": 0.5071108341217041, + "step": 6961 + }, + { + "epoch": 1.2864038987885578, + "grad_norm": 0.06961959600448608, + "learning_rate": 1.2758632030766603e-05, + "loss": 0.5442366003990173, + "step": 6962 + }, + { + "epoch": 1.2865886754974536, + "grad_norm": 0.06874290853738785, + "learning_rate": 1.2756714469447744e-05, + "loss": 0.4268724024295807, + "step": 6963 + }, + { + "epoch": 1.2867734522063494, + "grad_norm": 0.08130981028079987, + "learning_rate": 1.2754796798420279e-05, + "loss": 0.5325379967689514, + "step": 6964 + }, + { + "epoch": 1.2869582289152453, + "grad_norm": 0.05133212357759476, + "learning_rate": 1.2752879017760516e-05, + "loss": 0.34395477175712585, + "step": 6965 + }, + { + "epoch": 1.287143005624141, + "grad_norm": 0.0714641585946083, + "learning_rate": 1.2750961127544782e-05, + "loss": 0.5277564525604248, + "step": 6966 + }, + { + "epoch": 1.287327782333037, + "grad_norm": 0.067436084151268, + "learning_rate": 1.27490431278494e-05, + "loss": 0.38900041580200195, + "step": 6967 + }, + { + "epoch": 1.2875125590419327, + "grad_norm": 0.07422590255737305, + "learning_rate": 1.2747125018750708e-05, + "loss": 0.45272085070610046, + "step": 6968 + }, + { + "epoch": 1.2876973357508286, + "grad_norm": 0.07180160284042358, + "learning_rate": 1.2745206800325029e-05, + "loss": 0.5089162588119507, + "step": 6969 + }, + { + "epoch": 1.2878821124597244, + "grad_norm": 0.0845794528722763, + "learning_rate": 1.2743288472648709e-05, + "loss": 0.626602292060852, + "step": 6970 + }, + { + "epoch": 1.2880668891686202, + "grad_norm": 0.07468029856681824, + "learning_rate": 1.2741370035798093e-05, + "loss": 0.490026593208313, + "step": 6971 + }, + { + "epoch": 1.288251665877516, + "grad_norm": 0.07268189638853073, + "learning_rate": 1.2739451489849524e-05, + "loss": 0.45294222235679626, + "step": 6972 + }, + { + "epoch": 1.2884364425864119, + "grad_norm": 0.07588563859462738, + "learning_rate": 1.2737532834879356e-05, + "loss": 0.5010835528373718, + "step": 6973 + }, + { + "epoch": 1.2886212192953077, + "grad_norm": 0.08507157117128372, + "learning_rate": 1.2735614070963948e-05, + "loss": 0.5612196922302246, + "step": 6974 + }, + { + "epoch": 1.2888059960042035, + "grad_norm": 0.0746978223323822, + "learning_rate": 1.273369519817966e-05, + "loss": 0.5309698581695557, + "step": 6975 + }, + { + "epoch": 1.2889907727130996, + "grad_norm": 0.07962056994438171, + "learning_rate": 1.2731776216602849e-05, + "loss": 0.5472279787063599, + "step": 6976 + }, + { + "epoch": 1.2891755494219954, + "grad_norm": 0.06945842504501343, + "learning_rate": 1.2729857126309898e-05, + "loss": 0.4632667303085327, + "step": 6977 + }, + { + "epoch": 1.2893603261308912, + "grad_norm": 0.06587348133325577, + "learning_rate": 1.2727937927377172e-05, + "loss": 0.3979934751987457, + "step": 6978 + }, + { + "epoch": 1.289545102839787, + "grad_norm": 0.06981249898672104, + "learning_rate": 1.272601861988105e-05, + "loss": 0.5129185318946838, + "step": 6979 + }, + { + "epoch": 1.289729879548683, + "grad_norm": 0.06999025493860245, + "learning_rate": 1.2724099203897915e-05, + "loss": 0.4692162573337555, + "step": 6980 + }, + { + "epoch": 1.2899146562575787, + "grad_norm": 0.08427304029464722, + "learning_rate": 1.2722179679504156e-05, + "loss": 0.5699877738952637, + "step": 6981 + }, + { + "epoch": 1.2900994329664746, + "grad_norm": 0.08691181987524033, + "learning_rate": 1.2720260046776161e-05, + "loss": 0.5394098162651062, + "step": 6982 + }, + { + "epoch": 1.2902842096753704, + "grad_norm": 0.08646968007087708, + "learning_rate": 1.2718340305790326e-05, + "loss": 0.6010709404945374, + "step": 6983 + }, + { + "epoch": 1.2904689863842662, + "grad_norm": 0.07945973426103592, + "learning_rate": 1.2716420456623055e-05, + "loss": 0.6645624041557312, + "step": 6984 + }, + { + "epoch": 1.290653763093162, + "grad_norm": 0.07346010953187943, + "learning_rate": 1.2714500499350746e-05, + "loss": 0.46226420998573303, + "step": 6985 + }, + { + "epoch": 1.2908385398020579, + "grad_norm": 0.08596549928188324, + "learning_rate": 1.271258043404981e-05, + "loss": 0.7146180272102356, + "step": 6986 + }, + { + "epoch": 1.291023316510954, + "grad_norm": 0.07501712441444397, + "learning_rate": 1.271066026079666e-05, + "loss": 0.4995262622833252, + "step": 6987 + }, + { + "epoch": 1.2912080932198498, + "grad_norm": 0.061273232102394104, + "learning_rate": 1.2708739979667713e-05, + "loss": 0.3552984595298767, + "step": 6988 + }, + { + "epoch": 1.2913928699287456, + "grad_norm": 0.09160473197698593, + "learning_rate": 1.2706819590739385e-05, + "loss": 0.6788548827171326, + "step": 6989 + }, + { + "epoch": 1.2915776466376414, + "grad_norm": 0.08938539773225784, + "learning_rate": 1.2704899094088108e-05, + "loss": 0.5572099089622498, + "step": 6990 + }, + { + "epoch": 1.2917624233465372, + "grad_norm": 0.07936986535787582, + "learning_rate": 1.2702978489790312e-05, + "loss": 0.6609634160995483, + "step": 6991 + }, + { + "epoch": 1.291947200055433, + "grad_norm": 0.07230542600154877, + "learning_rate": 1.2701057777922428e-05, + "loss": 0.5651715397834778, + "step": 6992 + }, + { + "epoch": 1.292131976764329, + "grad_norm": 0.06436077505350113, + "learning_rate": 1.2699136958560893e-05, + "loss": 0.3970441520214081, + "step": 6993 + }, + { + "epoch": 1.2923167534732247, + "grad_norm": 0.06204066798090935, + "learning_rate": 1.2697216031782151e-05, + "loss": 0.49848824739456177, + "step": 6994 + }, + { + "epoch": 1.2925015301821206, + "grad_norm": 0.0783504843711853, + "learning_rate": 1.269529499766265e-05, + "loss": 0.5778496265411377, + "step": 6995 + }, + { + "epoch": 1.2926863068910164, + "grad_norm": 0.06484290212392807, + "learning_rate": 1.2693373856278843e-05, + "loss": 0.4126026928424835, + "step": 6996 + }, + { + "epoch": 1.2928710835999122, + "grad_norm": 0.08233322203159332, + "learning_rate": 1.2691452607707182e-05, + "loss": 0.6408974528312683, + "step": 6997 + }, + { + "epoch": 1.293055860308808, + "grad_norm": 0.07067559659481049, + "learning_rate": 1.2689531252024127e-05, + "loss": 0.44265973567962646, + "step": 6998 + }, + { + "epoch": 1.2932406370177039, + "grad_norm": 0.08672047406435013, + "learning_rate": 1.2687609789306144e-05, + "loss": 0.7062324285507202, + "step": 6999 + }, + { + "epoch": 1.2934254137265997, + "grad_norm": 0.0679650530219078, + "learning_rate": 1.2685688219629697e-05, + "loss": 0.5295279622077942, + "step": 7000 + }, + { + "epoch": 1.2934254137265997, + "eval_loss": 0.5948106646537781, + "eval_runtime": 243.2682, + "eval_samples_per_second": 74.934, + "eval_steps_per_second": 9.368, + "step": 7000 + }, + { + "epoch": 1.2936101904354955, + "grad_norm": 0.07759220898151398, + "learning_rate": 1.2683766543071263e-05, + "loss": 0.570826530456543, + "step": 7001 + }, + { + "epoch": 1.2937949671443914, + "grad_norm": 0.07804378122091293, + "learning_rate": 1.2681844759707316e-05, + "loss": 0.47277188301086426, + "step": 7002 + }, + { + "epoch": 1.2939797438532872, + "grad_norm": 0.07858088612556458, + "learning_rate": 1.2679922869614341e-05, + "loss": 0.5211628079414368, + "step": 7003 + }, + { + "epoch": 1.294164520562183, + "grad_norm": 0.06452016532421112, + "learning_rate": 1.2678000872868817e-05, + "loss": 0.4292004108428955, + "step": 7004 + }, + { + "epoch": 1.294349297271079, + "grad_norm": 0.08996476233005524, + "learning_rate": 1.2676078769547238e-05, + "loss": 0.6236442923545837, + "step": 7005 + }, + { + "epoch": 1.2945340739799749, + "grad_norm": 0.0958971306681633, + "learning_rate": 1.2674156559726096e-05, + "loss": 0.6066073775291443, + "step": 7006 + }, + { + "epoch": 1.2947188506888707, + "grad_norm": 0.06967156380414963, + "learning_rate": 1.2672234243481889e-05, + "loss": 0.4723014533519745, + "step": 7007 + }, + { + "epoch": 1.2949036273977665, + "grad_norm": 0.09893563389778137, + "learning_rate": 1.2670311820891122e-05, + "loss": 0.7446179389953613, + "step": 7008 + }, + { + "epoch": 1.2950884041066624, + "grad_norm": 0.10600033402442932, + "learning_rate": 1.2668389292030296e-05, + "loss": 0.7669276595115662, + "step": 7009 + }, + { + "epoch": 1.2952731808155582, + "grad_norm": 0.06642530858516693, + "learning_rate": 1.2666466656975927e-05, + "loss": 0.41287514567375183, + "step": 7010 + }, + { + "epoch": 1.295457957524454, + "grad_norm": 0.09201806038618088, + "learning_rate": 1.2664543915804524e-05, + "loss": 0.4747542142868042, + "step": 7011 + }, + { + "epoch": 1.2956427342333499, + "grad_norm": 0.07357966899871826, + "learning_rate": 1.2662621068592608e-05, + "loss": 0.5911757946014404, + "step": 7012 + }, + { + "epoch": 1.2958275109422457, + "grad_norm": 0.08051083981990814, + "learning_rate": 1.266069811541671e-05, + "loss": 0.6123976111412048, + "step": 7013 + }, + { + "epoch": 1.2960122876511415, + "grad_norm": 0.07193154096603394, + "learning_rate": 1.2658775056353347e-05, + "loss": 0.5236746072769165, + "step": 7014 + }, + { + "epoch": 1.2961970643600373, + "grad_norm": 0.06868410855531693, + "learning_rate": 1.2656851891479055e-05, + "loss": 0.467146635055542, + "step": 7015 + }, + { + "epoch": 1.2963818410689334, + "grad_norm": 0.06204485893249512, + "learning_rate": 1.2654928620870373e-05, + "loss": 0.4814089238643646, + "step": 7016 + }, + { + "epoch": 1.2965666177778292, + "grad_norm": 0.07624665647745132, + "learning_rate": 1.2653005244603836e-05, + "loss": 0.5406903624534607, + "step": 7017 + }, + { + "epoch": 1.296751394486725, + "grad_norm": 0.07900479435920715, + "learning_rate": 1.2651081762755991e-05, + "loss": 0.6114708185195923, + "step": 7018 + }, + { + "epoch": 1.2969361711956209, + "grad_norm": 0.06851568073034286, + "learning_rate": 1.2649158175403384e-05, + "loss": 0.41163602471351624, + "step": 7019 + }, + { + "epoch": 1.2971209479045167, + "grad_norm": 0.08029831945896149, + "learning_rate": 1.2647234482622573e-05, + "loss": 0.5897607207298279, + "step": 7020 + }, + { + "epoch": 1.2973057246134125, + "grad_norm": 0.05459596589207649, + "learning_rate": 1.2645310684490108e-05, + "loss": 0.37470948696136475, + "step": 7021 + }, + { + "epoch": 1.2974905013223084, + "grad_norm": 0.09061688184738159, + "learning_rate": 1.2643386781082555e-05, + "loss": 0.5633164048194885, + "step": 7022 + }, + { + "epoch": 1.2976752780312042, + "grad_norm": 0.08323931694030762, + "learning_rate": 1.264146277247648e-05, + "loss": 0.5104799866676331, + "step": 7023 + }, + { + "epoch": 1.2978600547401, + "grad_norm": 0.07911505550146103, + "learning_rate": 1.2639538658748449e-05, + "loss": 0.6013384461402893, + "step": 7024 + }, + { + "epoch": 1.2980448314489959, + "grad_norm": 0.09018902480602264, + "learning_rate": 1.263761443997504e-05, + "loss": 0.5095791220664978, + "step": 7025 + }, + { + "epoch": 1.2982296081578917, + "grad_norm": 0.07577616721391678, + "learning_rate": 1.2635690116232827e-05, + "loss": 0.587360143661499, + "step": 7026 + }, + { + "epoch": 1.2984143848667875, + "grad_norm": 0.07888596504926682, + "learning_rate": 1.2633765687598394e-05, + "loss": 0.6399943232536316, + "step": 7027 + }, + { + "epoch": 1.2985991615756833, + "grad_norm": 0.09185665100812912, + "learning_rate": 1.2631841154148323e-05, + "loss": 0.7375509142875671, + "step": 7028 + }, + { + "epoch": 1.2987839382845792, + "grad_norm": 0.07888969779014587, + "learning_rate": 1.2629916515959211e-05, + "loss": 0.4613966643810272, + "step": 7029 + }, + { + "epoch": 1.298968714993475, + "grad_norm": 0.07805892080068588, + "learning_rate": 1.2627991773107651e-05, + "loss": 0.482734352350235, + "step": 7030 + }, + { + "epoch": 1.2991534917023708, + "grad_norm": 0.05919409543275833, + "learning_rate": 1.2626066925670237e-05, + "loss": 0.35460159182548523, + "step": 7031 + }, + { + "epoch": 1.2993382684112667, + "grad_norm": 0.07844141125679016, + "learning_rate": 1.2624141973723576e-05, + "loss": 0.5155153274536133, + "step": 7032 + }, + { + "epoch": 1.2995230451201625, + "grad_norm": 0.0717112123966217, + "learning_rate": 1.2622216917344276e-05, + "loss": 0.38670143485069275, + "step": 7033 + }, + { + "epoch": 1.2997078218290585, + "grad_norm": 0.07971569150686264, + "learning_rate": 1.2620291756608948e-05, + "loss": 0.5672218799591064, + "step": 7034 + }, + { + "epoch": 1.2998925985379544, + "grad_norm": 0.07753638923168182, + "learning_rate": 1.26183664915942e-05, + "loss": 0.5556024312973022, + "step": 7035 + }, + { + "epoch": 1.3000773752468502, + "grad_norm": 0.06535354256629944, + "learning_rate": 1.2616441122376664e-05, + "loss": 0.519101083278656, + "step": 7036 + }, + { + "epoch": 1.300262151955746, + "grad_norm": 0.09425181895494461, + "learning_rate": 1.2614515649032955e-05, + "loss": 0.7004117369651794, + "step": 7037 + }, + { + "epoch": 1.3004469286646418, + "grad_norm": 0.08313268423080444, + "learning_rate": 1.2612590071639702e-05, + "loss": 0.5423683524131775, + "step": 7038 + }, + { + "epoch": 1.3006317053735377, + "grad_norm": 0.08191510289907455, + "learning_rate": 1.2610664390273537e-05, + "loss": 0.6264122128486633, + "step": 7039 + }, + { + "epoch": 1.3008164820824335, + "grad_norm": 0.0811074897646904, + "learning_rate": 1.26087386050111e-05, + "loss": 0.6357519030570984, + "step": 7040 + }, + { + "epoch": 1.3010012587913293, + "grad_norm": 0.07255978882312775, + "learning_rate": 1.2606812715929024e-05, + "loss": 0.43697232007980347, + "step": 7041 + }, + { + "epoch": 1.3011860355002252, + "grad_norm": 0.07301337271928787, + "learning_rate": 1.260488672310396e-05, + "loss": 0.4118881821632385, + "step": 7042 + }, + { + "epoch": 1.301370812209121, + "grad_norm": 0.09116517752408981, + "learning_rate": 1.2602960626612555e-05, + "loss": 0.7510353922843933, + "step": 7043 + }, + { + "epoch": 1.3015555889180168, + "grad_norm": 0.08381814509630203, + "learning_rate": 1.260103442653146e-05, + "loss": 0.47766539454460144, + "step": 7044 + }, + { + "epoch": 1.3017403656269129, + "grad_norm": 0.08243940770626068, + "learning_rate": 1.259910812293733e-05, + "loss": 0.7016247510910034, + "step": 7045 + }, + { + "epoch": 1.3019251423358087, + "grad_norm": 0.06611216813325882, + "learning_rate": 1.259718171590683e-05, + "loss": 0.4857097566127777, + "step": 7046 + }, + { + "epoch": 1.3021099190447045, + "grad_norm": 0.07947178184986115, + "learning_rate": 1.2595255205516625e-05, + "loss": 0.558712899684906, + "step": 7047 + }, + { + "epoch": 1.3022946957536004, + "grad_norm": 0.09471435844898224, + "learning_rate": 1.259332859184338e-05, + "loss": 0.7513187527656555, + "step": 7048 + }, + { + "epoch": 1.3024794724624962, + "grad_norm": 0.06267374753952026, + "learning_rate": 1.2591401874963771e-05, + "loss": 0.45277971029281616, + "step": 7049 + }, + { + "epoch": 1.302664249171392, + "grad_norm": 0.06743251532316208, + "learning_rate": 1.2589475054954476e-05, + "loss": 0.4124307334423065, + "step": 7050 + }, + { + "epoch": 1.3028490258802878, + "grad_norm": 0.086782306432724, + "learning_rate": 1.2587548131892175e-05, + "loss": 0.6340530514717102, + "step": 7051 + }, + { + "epoch": 1.3030338025891837, + "grad_norm": 0.08213392645120621, + "learning_rate": 1.2585621105853551e-05, + "loss": 0.558947741985321, + "step": 7052 + }, + { + "epoch": 1.3032185792980795, + "grad_norm": 0.0744403526186943, + "learning_rate": 1.2583693976915301e-05, + "loss": 0.5660160779953003, + "step": 7053 + }, + { + "epoch": 1.3034033560069753, + "grad_norm": 0.07753294706344604, + "learning_rate": 1.2581766745154114e-05, + "loss": 0.5865622758865356, + "step": 7054 + }, + { + "epoch": 1.3035881327158712, + "grad_norm": 0.054875295609235764, + "learning_rate": 1.2579839410646682e-05, + "loss": 0.40499284863471985, + "step": 7055 + }, + { + "epoch": 1.303772909424767, + "grad_norm": 0.07428351789712906, + "learning_rate": 1.2577911973469717e-05, + "loss": 0.6527612805366516, + "step": 7056 + }, + { + "epoch": 1.3039576861336628, + "grad_norm": 0.09750670939683914, + "learning_rate": 1.2575984433699921e-05, + "loss": 0.6454939246177673, + "step": 7057 + }, + { + "epoch": 1.3041424628425586, + "grad_norm": 0.06648306548595428, + "learning_rate": 1.2574056791414003e-05, + "loss": 0.46796974539756775, + "step": 7058 + }, + { + "epoch": 1.3043272395514545, + "grad_norm": 0.08090738207101822, + "learning_rate": 1.2572129046688675e-05, + "loss": 0.5984477400779724, + "step": 7059 + }, + { + "epoch": 1.3045120162603503, + "grad_norm": 0.06808750331401825, + "learning_rate": 1.2570201199600663e-05, + "loss": 0.42916426062583923, + "step": 7060 + }, + { + "epoch": 1.3046967929692461, + "grad_norm": 0.08575625717639923, + "learning_rate": 1.2568273250226681e-05, + "loss": 0.6604623198509216, + "step": 7061 + }, + { + "epoch": 1.304881569678142, + "grad_norm": 0.08025676012039185, + "learning_rate": 1.256634519864346e-05, + "loss": 0.7682700753211975, + "step": 7062 + }, + { + "epoch": 1.3050663463870378, + "grad_norm": 0.06096980720758438, + "learning_rate": 1.2564417044927728e-05, + "loss": 0.41423553228378296, + "step": 7063 + }, + { + "epoch": 1.3052511230959338, + "grad_norm": 0.06407662481069565, + "learning_rate": 1.2562488789156224e-05, + "loss": 0.43208426237106323, + "step": 7064 + }, + { + "epoch": 1.3054358998048297, + "grad_norm": 0.06232353299856186, + "learning_rate": 1.2560560431405678e-05, + "loss": 0.3925876319408417, + "step": 7065 + }, + { + "epoch": 1.3056206765137255, + "grad_norm": 0.07849103957414627, + "learning_rate": 1.2558631971752842e-05, + "loss": 0.7047256231307983, + "step": 7066 + }, + { + "epoch": 1.3058054532226213, + "grad_norm": 0.09758805483579636, + "learning_rate": 1.2556703410274458e-05, + "loss": 0.6756837964057922, + "step": 7067 + }, + { + "epoch": 1.3059902299315171, + "grad_norm": 0.08068440109491348, + "learning_rate": 1.2554774747047275e-05, + "loss": 0.654943585395813, + "step": 7068 + }, + { + "epoch": 1.306175006640413, + "grad_norm": 0.08406716585159302, + "learning_rate": 1.2552845982148049e-05, + "loss": 0.6230036616325378, + "step": 7069 + }, + { + "epoch": 1.3063597833493088, + "grad_norm": 0.06222580000758171, + "learning_rate": 1.2550917115653545e-05, + "loss": 0.4030933976173401, + "step": 7070 + }, + { + "epoch": 1.3065445600582046, + "grad_norm": 0.08229056000709534, + "learning_rate": 1.2548988147640518e-05, + "loss": 0.5132369995117188, + "step": 7071 + }, + { + "epoch": 1.3067293367671005, + "grad_norm": 0.08437987416982651, + "learning_rate": 1.2547059078185735e-05, + "loss": 0.4772125482559204, + "step": 7072 + }, + { + "epoch": 1.3069141134759963, + "grad_norm": 0.062136292457580566, + "learning_rate": 1.2545129907365973e-05, + "loss": 0.4129381775856018, + "step": 7073 + }, + { + "epoch": 1.3070988901848921, + "grad_norm": 0.05574364960193634, + "learning_rate": 1.2543200635258002e-05, + "loss": 0.38210901618003845, + "step": 7074 + }, + { + "epoch": 1.3072836668937882, + "grad_norm": 0.06634816527366638, + "learning_rate": 1.2541271261938603e-05, + "loss": 0.4745114743709564, + "step": 7075 + }, + { + "epoch": 1.307468443602684, + "grad_norm": 0.06524790078401566, + "learning_rate": 1.2539341787484555e-05, + "loss": 0.46080282330513, + "step": 7076 + }, + { + "epoch": 1.3076532203115798, + "grad_norm": 0.06377245485782623, + "learning_rate": 1.2537412211972652e-05, + "loss": 0.44798263907432556, + "step": 7077 + }, + { + "epoch": 1.3078379970204757, + "grad_norm": 0.07370581477880478, + "learning_rate": 1.253548253547968e-05, + "loss": 0.46238815784454346, + "step": 7078 + }, + { + "epoch": 1.3080227737293715, + "grad_norm": 0.06885351985692978, + "learning_rate": 1.2533552758082435e-05, + "loss": 0.4642230272293091, + "step": 7079 + }, + { + "epoch": 1.3082075504382673, + "grad_norm": 0.08454688638448715, + "learning_rate": 1.253162287985772e-05, + "loss": 0.6509603261947632, + "step": 7080 + }, + { + "epoch": 1.3083923271471631, + "grad_norm": 0.0632704496383667, + "learning_rate": 1.2529692900882331e-05, + "loss": 0.43099644780158997, + "step": 7081 + }, + { + "epoch": 1.308577103856059, + "grad_norm": 0.08630534261465073, + "learning_rate": 1.2527762821233083e-05, + "loss": 0.7119595408439636, + "step": 7082 + }, + { + "epoch": 1.3087618805649548, + "grad_norm": 0.07517912983894348, + "learning_rate": 1.252583264098678e-05, + "loss": 0.47247129678726196, + "step": 7083 + }, + { + "epoch": 1.3089466572738506, + "grad_norm": 0.07358289510011673, + "learning_rate": 1.2523902360220242e-05, + "loss": 0.5356598496437073, + "step": 7084 + }, + { + "epoch": 1.3091314339827465, + "grad_norm": 0.07740814238786697, + "learning_rate": 1.2521971979010283e-05, + "loss": 0.5637345314025879, + "step": 7085 + }, + { + "epoch": 1.3093162106916423, + "grad_norm": 0.07030104100704193, + "learning_rate": 1.2520041497433733e-05, + "loss": 0.5204507112503052, + "step": 7086 + }, + { + "epoch": 1.309500987400538, + "grad_norm": 0.08233232796192169, + "learning_rate": 1.2518110915567413e-05, + "loss": 0.5168277621269226, + "step": 7087 + }, + { + "epoch": 1.309685764109434, + "grad_norm": 0.0762810930609703, + "learning_rate": 1.2516180233488158e-05, + "loss": 0.5626890063285828, + "step": 7088 + }, + { + "epoch": 1.3098705408183298, + "grad_norm": 0.08059023320674896, + "learning_rate": 1.2514249451272802e-05, + "loss": 0.65793377161026, + "step": 7089 + }, + { + "epoch": 1.3100553175272256, + "grad_norm": 0.06713052093982697, + "learning_rate": 1.2512318568998185e-05, + "loss": 0.5770498514175415, + "step": 7090 + }, + { + "epoch": 1.3102400942361214, + "grad_norm": 0.059266820549964905, + "learning_rate": 1.2510387586741146e-05, + "loss": 0.503598690032959, + "step": 7091 + }, + { + "epoch": 1.3104248709450173, + "grad_norm": 0.07634708285331726, + "learning_rate": 1.2508456504578538e-05, + "loss": 0.5503666996955872, + "step": 7092 + }, + { + "epoch": 1.3106096476539133, + "grad_norm": 0.05500860884785652, + "learning_rate": 1.2506525322587207e-05, + "loss": 0.3018694818019867, + "step": 7093 + }, + { + "epoch": 1.3107944243628091, + "grad_norm": 0.0848320797085762, + "learning_rate": 1.250459404084401e-05, + "loss": 0.5146724581718445, + "step": 7094 + }, + { + "epoch": 1.310979201071705, + "grad_norm": 0.06465902179479599, + "learning_rate": 1.2502662659425808e-05, + "loss": 0.3964228928089142, + "step": 7095 + }, + { + "epoch": 1.3111639777806008, + "grad_norm": 0.08403968065977097, + "learning_rate": 1.250073117840946e-05, + "loss": 0.5929785966873169, + "step": 7096 + }, + { + "epoch": 1.3113487544894966, + "grad_norm": 0.06558596342802048, + "learning_rate": 1.2498799597871836e-05, + "loss": 0.3913648724555969, + "step": 7097 + }, + { + "epoch": 1.3115335311983924, + "grad_norm": 0.07862965017557144, + "learning_rate": 1.2496867917889805e-05, + "loss": 0.5325332283973694, + "step": 7098 + }, + { + "epoch": 1.3117183079072883, + "grad_norm": 0.06948400288820267, + "learning_rate": 1.2494936138540246e-05, + "loss": 0.34856298565864563, + "step": 7099 + }, + { + "epoch": 1.311903084616184, + "grad_norm": 0.06603899598121643, + "learning_rate": 1.249300425990003e-05, + "loss": 0.3758459985256195, + "step": 7100 + }, + { + "epoch": 1.31208786132508, + "grad_norm": 0.08364688605070114, + "learning_rate": 1.2491072282046044e-05, + "loss": 0.6271767616271973, + "step": 7101 + }, + { + "epoch": 1.3122726380339758, + "grad_norm": 0.08045737445354462, + "learning_rate": 1.2489140205055177e-05, + "loss": 0.541724443435669, + "step": 7102 + }, + { + "epoch": 1.3124574147428716, + "grad_norm": 0.05574335902929306, + "learning_rate": 1.2487208029004315e-05, + "loss": 0.30128908157348633, + "step": 7103 + }, + { + "epoch": 1.3126421914517676, + "grad_norm": 0.07106778025627136, + "learning_rate": 1.2485275753970358e-05, + "loss": 0.5022606253623962, + "step": 7104 + }, + { + "epoch": 1.3128269681606635, + "grad_norm": 0.07627978175878525, + "learning_rate": 1.2483343380030199e-05, + "loss": 0.6493616700172424, + "step": 7105 + }, + { + "epoch": 1.3130117448695593, + "grad_norm": 0.08818231523036957, + "learning_rate": 1.2481410907260745e-05, + "loss": 0.6240003705024719, + "step": 7106 + }, + { + "epoch": 1.3131965215784551, + "grad_norm": 0.08052614331245422, + "learning_rate": 1.24794783357389e-05, + "loss": 0.5767407417297363, + "step": 7107 + }, + { + "epoch": 1.313381298287351, + "grad_norm": 0.08588764816522598, + "learning_rate": 1.2477545665541573e-05, + "loss": 0.6063689589500427, + "step": 7108 + }, + { + "epoch": 1.3135660749962468, + "grad_norm": 0.05303497985005379, + "learning_rate": 1.2475612896745681e-05, + "loss": 0.32240840792655945, + "step": 7109 + }, + { + "epoch": 1.3137508517051426, + "grad_norm": 0.07196793705224991, + "learning_rate": 1.247368002942814e-05, + "loss": 0.4601721465587616, + "step": 7110 + }, + { + "epoch": 1.3139356284140384, + "grad_norm": 0.08057314157485962, + "learning_rate": 1.2471747063665871e-05, + "loss": 0.6230965852737427, + "step": 7111 + }, + { + "epoch": 1.3141204051229343, + "grad_norm": 0.08216112852096558, + "learning_rate": 1.2469813999535804e-05, + "loss": 0.5606250762939453, + "step": 7112 + }, + { + "epoch": 1.31430518183183, + "grad_norm": 0.06786283105611801, + "learning_rate": 1.2467880837114867e-05, + "loss": 0.35154303908348083, + "step": 7113 + }, + { + "epoch": 1.314489958540726, + "grad_norm": 0.06885476410388947, + "learning_rate": 1.2465947576479996e-05, + "loss": 0.504109263420105, + "step": 7114 + }, + { + "epoch": 1.3146747352496218, + "grad_norm": 0.07210491597652435, + "learning_rate": 1.2464014217708123e-05, + "loss": 0.4459759294986725, + "step": 7115 + }, + { + "epoch": 1.3148595119585176, + "grad_norm": 0.08211330324411392, + "learning_rate": 1.2462080760876196e-05, + "loss": 0.602215051651001, + "step": 7116 + }, + { + "epoch": 1.3150442886674134, + "grad_norm": 0.07126942276954651, + "learning_rate": 1.2460147206061156e-05, + "loss": 0.4325295090675354, + "step": 7117 + }, + { + "epoch": 1.3152290653763092, + "grad_norm": 0.07953279465436935, + "learning_rate": 1.245821355333995e-05, + "loss": 0.5518671870231628, + "step": 7118 + }, + { + "epoch": 1.315413842085205, + "grad_norm": 0.08978675305843353, + "learning_rate": 1.2456279802789542e-05, + "loss": 0.58292555809021, + "step": 7119 + }, + { + "epoch": 1.315598618794101, + "grad_norm": 0.08348673582077026, + "learning_rate": 1.2454345954486878e-05, + "loss": 0.5909175276756287, + "step": 7120 + }, + { + "epoch": 1.3157833955029967, + "grad_norm": 0.08878988772630692, + "learning_rate": 1.2452412008508924e-05, + "loss": 0.6513380408287048, + "step": 7121 + }, + { + "epoch": 1.3159681722118928, + "grad_norm": 0.06557399779558182, + "learning_rate": 1.2450477964932648e-05, + "loss": 0.4970392882823944, + "step": 7122 + }, + { + "epoch": 1.3161529489207886, + "grad_norm": 0.08615729212760925, + "learning_rate": 1.2448543823835016e-05, + "loss": 0.7483137845993042, + "step": 7123 + }, + { + "epoch": 1.3163377256296844, + "grad_norm": 0.0829634815454483, + "learning_rate": 1.2446609585292997e-05, + "loss": 0.6574590802192688, + "step": 7124 + }, + { + "epoch": 1.3165225023385803, + "grad_norm": 0.07868000119924545, + "learning_rate": 1.244467524938357e-05, + "loss": 0.5516237020492554, + "step": 7125 + }, + { + "epoch": 1.316707279047476, + "grad_norm": 0.07573801279067993, + "learning_rate": 1.244274081618372e-05, + "loss": 0.5354952216148376, + "step": 7126 + }, + { + "epoch": 1.316892055756372, + "grad_norm": 0.06890484690666199, + "learning_rate": 1.2440806285770427e-05, + "loss": 0.5610262155532837, + "step": 7127 + }, + { + "epoch": 1.3170768324652677, + "grad_norm": 0.08226225525140762, + "learning_rate": 1.2438871658220677e-05, + "loss": 0.5836856961250305, + "step": 7128 + }, + { + "epoch": 1.3172616091741636, + "grad_norm": 0.09046713262796402, + "learning_rate": 1.2436936933611467e-05, + "loss": 0.5806461572647095, + "step": 7129 + }, + { + "epoch": 1.3174463858830594, + "grad_norm": 0.08166642487049103, + "learning_rate": 1.2435002112019791e-05, + "loss": 0.48745235800743103, + "step": 7130 + }, + { + "epoch": 1.3176311625919552, + "grad_norm": 0.06724494695663452, + "learning_rate": 1.243306719352265e-05, + "loss": 0.38586583733558655, + "step": 7131 + }, + { + "epoch": 1.317815939300851, + "grad_norm": 0.08288092166185379, + "learning_rate": 1.2431132178197048e-05, + "loss": 0.5428813099861145, + "step": 7132 + }, + { + "epoch": 1.318000716009747, + "grad_norm": 0.07065463811159134, + "learning_rate": 1.2429197066119991e-05, + "loss": 0.4732227027416229, + "step": 7133 + }, + { + "epoch": 1.318185492718643, + "grad_norm": 0.0860670879483223, + "learning_rate": 1.242726185736849e-05, + "loss": 0.7225802540779114, + "step": 7134 + }, + { + "epoch": 1.3183702694275388, + "grad_norm": 0.10098420083522797, + "learning_rate": 1.2425326552019558e-05, + "loss": 0.735201358795166, + "step": 7135 + }, + { + "epoch": 1.3185550461364346, + "grad_norm": 0.06383946537971497, + "learning_rate": 1.2423391150150223e-05, + "loss": 0.4172554016113281, + "step": 7136 + }, + { + "epoch": 1.3187398228453304, + "grad_norm": 0.07630904018878937, + "learning_rate": 1.2421455651837498e-05, + "loss": 0.5034219622612, + "step": 7137 + }, + { + "epoch": 1.3189245995542263, + "grad_norm": 0.0586567223072052, + "learning_rate": 1.2419520057158413e-05, + "loss": 0.3733353912830353, + "step": 7138 + }, + { + "epoch": 1.319109376263122, + "grad_norm": 0.0758417621254921, + "learning_rate": 1.2417584366190003e-05, + "loss": 0.5236166715621948, + "step": 7139 + }, + { + "epoch": 1.319294152972018, + "grad_norm": 0.09308257699012756, + "learning_rate": 1.2415648579009298e-05, + "loss": 0.7132977247238159, + "step": 7140 + }, + { + "epoch": 1.3194789296809137, + "grad_norm": 0.06762132048606873, + "learning_rate": 1.2413712695693334e-05, + "loss": 0.4622902274131775, + "step": 7141 + }, + { + "epoch": 1.3196637063898096, + "grad_norm": 0.07987305521965027, + "learning_rate": 1.241177671631916e-05, + "loss": 0.531873345375061, + "step": 7142 + }, + { + "epoch": 1.3198484830987054, + "grad_norm": 0.07383070886135101, + "learning_rate": 1.2409840640963818e-05, + "loss": 0.46143603324890137, + "step": 7143 + }, + { + "epoch": 1.3200332598076012, + "grad_norm": 0.08451993018388748, + "learning_rate": 1.2407904469704355e-05, + "loss": 0.4642788767814636, + "step": 7144 + }, + { + "epoch": 1.320218036516497, + "grad_norm": 0.07359858602285385, + "learning_rate": 1.2405968202617828e-05, + "loss": 0.4435389041900635, + "step": 7145 + }, + { + "epoch": 1.3204028132253929, + "grad_norm": 0.08350193500518799, + "learning_rate": 1.2404031839781297e-05, + "loss": 0.5778533816337585, + "step": 7146 + }, + { + "epoch": 1.3205875899342887, + "grad_norm": 0.09533848613500595, + "learning_rate": 1.2402095381271817e-05, + "loss": 0.7368243336677551, + "step": 7147 + }, + { + "epoch": 1.3207723666431845, + "grad_norm": 0.08333290368318558, + "learning_rate": 1.2400158827166456e-05, + "loss": 0.4749222695827484, + "step": 7148 + }, + { + "epoch": 1.3209571433520804, + "grad_norm": 0.07425980269908905, + "learning_rate": 1.2398222177542284e-05, + "loss": 0.4216611087322235, + "step": 7149 + }, + { + "epoch": 1.3211419200609762, + "grad_norm": 0.07611807435750961, + "learning_rate": 1.2396285432476374e-05, + "loss": 0.472459077835083, + "step": 7150 + }, + { + "epoch": 1.321326696769872, + "grad_norm": 0.07201111316680908, + "learning_rate": 1.2394348592045797e-05, + "loss": 0.37915852665901184, + "step": 7151 + }, + { + "epoch": 1.321511473478768, + "grad_norm": 0.0910440981388092, + "learning_rate": 1.2392411656327638e-05, + "loss": 0.6684134602546692, + "step": 7152 + }, + { + "epoch": 1.321696250187664, + "grad_norm": 0.08137714117765427, + "learning_rate": 1.2390474625398982e-05, + "loss": 0.5427489876747131, + "step": 7153 + }, + { + "epoch": 1.3218810268965597, + "grad_norm": 0.07152608782052994, + "learning_rate": 1.2388537499336915e-05, + "loss": 0.49878379702568054, + "step": 7154 + }, + { + "epoch": 1.3220658036054556, + "grad_norm": 0.07559207826852798, + "learning_rate": 1.2386600278218527e-05, + "loss": 0.5141193866729736, + "step": 7155 + }, + { + "epoch": 1.3222505803143514, + "grad_norm": 0.06035945564508438, + "learning_rate": 1.2384662962120914e-05, + "loss": 0.34767431020736694, + "step": 7156 + }, + { + "epoch": 1.3224353570232472, + "grad_norm": 0.08908118307590485, + "learning_rate": 1.2382725551121175e-05, + "loss": 0.5584812164306641, + "step": 7157 + }, + { + "epoch": 1.322620133732143, + "grad_norm": 0.0691198781132698, + "learning_rate": 1.2380788045296414e-05, + "loss": 0.36187484860420227, + "step": 7158 + }, + { + "epoch": 1.3228049104410389, + "grad_norm": 0.07853727042675018, + "learning_rate": 1.237885044472374e-05, + "loss": 0.4802594780921936, + "step": 7159 + }, + { + "epoch": 1.3229896871499347, + "grad_norm": 0.08320342749357224, + "learning_rate": 1.237691274948026e-05, + "loss": 0.5174792408943176, + "step": 7160 + }, + { + "epoch": 1.3231744638588305, + "grad_norm": 0.07984607666730881, + "learning_rate": 1.2374974959643087e-05, + "loss": 0.65461665391922, + "step": 7161 + }, + { + "epoch": 1.3233592405677264, + "grad_norm": 0.09089548885822296, + "learning_rate": 1.2373037075289343e-05, + "loss": 0.6292493343353271, + "step": 7162 + }, + { + "epoch": 1.3235440172766224, + "grad_norm": 0.08720018714666367, + "learning_rate": 1.2371099096496146e-05, + "loss": 0.6128824353218079, + "step": 7163 + }, + { + "epoch": 1.3237287939855182, + "grad_norm": 0.07171821594238281, + "learning_rate": 1.2369161023340623e-05, + "loss": 0.5503191351890564, + "step": 7164 + }, + { + "epoch": 1.323913570694414, + "grad_norm": 0.08616384863853455, + "learning_rate": 1.2367222855899906e-05, + "loss": 0.6394333243370056, + "step": 7165 + }, + { + "epoch": 1.32409834740331, + "grad_norm": 0.0766521617770195, + "learning_rate": 1.2365284594251124e-05, + "loss": 0.4956338703632355, + "step": 7166 + }, + { + "epoch": 1.3242831241122057, + "grad_norm": 0.0780312716960907, + "learning_rate": 1.2363346238471415e-05, + "loss": 0.48946505784988403, + "step": 7167 + }, + { + "epoch": 1.3244679008211016, + "grad_norm": 0.07764456421136856, + "learning_rate": 1.2361407788637917e-05, + "loss": 0.5881205797195435, + "step": 7168 + }, + { + "epoch": 1.3246526775299974, + "grad_norm": 0.07844637334346771, + "learning_rate": 1.2359469244827781e-05, + "loss": 0.5487515926361084, + "step": 7169 + }, + { + "epoch": 1.3248374542388932, + "grad_norm": 0.08375510573387146, + "learning_rate": 1.2357530607118151e-05, + "loss": 0.5522754788398743, + "step": 7170 + }, + { + "epoch": 1.325022230947789, + "grad_norm": 0.0740007609128952, + "learning_rate": 1.2355591875586175e-05, + "loss": 0.46284234523773193, + "step": 7171 + }, + { + "epoch": 1.3252070076566849, + "grad_norm": 0.08384130150079727, + "learning_rate": 1.2353653050309013e-05, + "loss": 0.48505058884620667, + "step": 7172 + }, + { + "epoch": 1.3253917843655807, + "grad_norm": 0.07120472937822342, + "learning_rate": 1.2351714131363828e-05, + "loss": 0.6011337637901306, + "step": 7173 + }, + { + "epoch": 1.3255765610744765, + "grad_norm": 0.06549742817878723, + "learning_rate": 1.2349775118827772e-05, + "loss": 0.43408912420272827, + "step": 7174 + }, + { + "epoch": 1.3257613377833724, + "grad_norm": 0.07499620318412781, + "learning_rate": 1.2347836012778021e-05, + "loss": 0.4794289767742157, + "step": 7175 + }, + { + "epoch": 1.3259461144922682, + "grad_norm": 0.06897576153278351, + "learning_rate": 1.2345896813291743e-05, + "loss": 0.4830062687397003, + "step": 7176 + }, + { + "epoch": 1.326130891201164, + "grad_norm": 0.062312051653862, + "learning_rate": 1.2343957520446106e-05, + "loss": 0.3550172746181488, + "step": 7177 + }, + { + "epoch": 1.3263156679100598, + "grad_norm": 0.05161038041114807, + "learning_rate": 1.2342018134318296e-05, + "loss": 0.33226051926612854, + "step": 7178 + }, + { + "epoch": 1.3265004446189557, + "grad_norm": 0.08036305010318756, + "learning_rate": 1.2340078654985495e-05, + "loss": 0.5838134288787842, + "step": 7179 + }, + { + "epoch": 1.3266852213278515, + "grad_norm": 0.05553985759615898, + "learning_rate": 1.2338139082524883e-05, + "loss": 0.3583305776119232, + "step": 7180 + }, + { + "epoch": 1.3268699980367475, + "grad_norm": 0.0874638631939888, + "learning_rate": 1.2336199417013649e-05, + "loss": 0.624904990196228, + "step": 7181 + }, + { + "epoch": 1.3270547747456434, + "grad_norm": 0.10083182156085968, + "learning_rate": 1.2334259658528985e-05, + "loss": 0.6989080905914307, + "step": 7182 + }, + { + "epoch": 1.3272395514545392, + "grad_norm": 0.0915534570813179, + "learning_rate": 1.2332319807148094e-05, + "loss": 0.7326936721801758, + "step": 7183 + }, + { + "epoch": 1.327424328163435, + "grad_norm": 0.06375181674957275, + "learning_rate": 1.2330379862948167e-05, + "loss": 0.47836050391197205, + "step": 7184 + }, + { + "epoch": 1.3276091048723309, + "grad_norm": 0.06903169304132462, + "learning_rate": 1.2328439826006415e-05, + "loss": 0.5004631876945496, + "step": 7185 + }, + { + "epoch": 1.3277938815812267, + "grad_norm": 0.07927877455949783, + "learning_rate": 1.2326499696400042e-05, + "loss": 0.6567851901054382, + "step": 7186 + }, + { + "epoch": 1.3279786582901225, + "grad_norm": 0.08236057311296463, + "learning_rate": 1.2324559474206261e-05, + "loss": 0.549164354801178, + "step": 7187 + }, + { + "epoch": 1.3281634349990183, + "grad_norm": 0.1732921451330185, + "learning_rate": 1.2322619159502287e-05, + "loss": 0.6361644864082336, + "step": 7188 + }, + { + "epoch": 1.3283482117079142, + "grad_norm": 0.08105883747339249, + "learning_rate": 1.2320678752365333e-05, + "loss": 0.5528790950775146, + "step": 7189 + }, + { + "epoch": 1.32853298841681, + "grad_norm": 0.09747636318206787, + "learning_rate": 1.2318738252872628e-05, + "loss": 0.7589352130889893, + "step": 7190 + }, + { + "epoch": 1.3287177651257058, + "grad_norm": 0.0843818187713623, + "learning_rate": 1.2316797661101394e-05, + "loss": 0.6057540774345398, + "step": 7191 + }, + { + "epoch": 1.3289025418346019, + "grad_norm": 0.07819300144910812, + "learning_rate": 1.2314856977128859e-05, + "loss": 0.440432608127594, + "step": 7192 + }, + { + "epoch": 1.3290873185434977, + "grad_norm": 0.0857505276799202, + "learning_rate": 1.2312916201032263e-05, + "loss": 0.5792015194892883, + "step": 7193 + }, + { + "epoch": 1.3292720952523935, + "grad_norm": 0.07800312340259552, + "learning_rate": 1.2310975332888837e-05, + "loss": 0.4814336597919464, + "step": 7194 + }, + { + "epoch": 1.3294568719612894, + "grad_norm": 0.05761153995990753, + "learning_rate": 1.230903437277582e-05, + "loss": 0.3713530898094177, + "step": 7195 + }, + { + "epoch": 1.3296416486701852, + "grad_norm": 0.0807412788271904, + "learning_rate": 1.2307093320770463e-05, + "loss": 0.5634688138961792, + "step": 7196 + }, + { + "epoch": 1.329826425379081, + "grad_norm": 0.0661948099732399, + "learning_rate": 1.2305152176950008e-05, + "loss": 0.5103600025177002, + "step": 7197 + }, + { + "epoch": 1.3300112020879769, + "grad_norm": 0.08528309315443039, + "learning_rate": 1.2303210941391708e-05, + "loss": 0.6124686598777771, + "step": 7198 + }, + { + "epoch": 1.3301959787968727, + "grad_norm": 0.07087121158838272, + "learning_rate": 1.230126961417282e-05, + "loss": 0.6103671789169312, + "step": 7199 + }, + { + "epoch": 1.3303807555057685, + "grad_norm": 0.07148048281669617, + "learning_rate": 1.22993281953706e-05, + "loss": 0.47916725277900696, + "step": 7200 + }, + { + "epoch": 1.3305655322146643, + "grad_norm": 0.06045059859752655, + "learning_rate": 1.229738668506231e-05, + "loss": 0.3602880537509918, + "step": 7201 + }, + { + "epoch": 1.3307503089235602, + "grad_norm": 0.08225797116756439, + "learning_rate": 1.2295445083325217e-05, + "loss": 0.5574508309364319, + "step": 7202 + }, + { + "epoch": 1.330935085632456, + "grad_norm": 0.0703793317079544, + "learning_rate": 1.2293503390236595e-05, + "loss": 0.46736153960227966, + "step": 7203 + }, + { + "epoch": 1.3311198623413518, + "grad_norm": 0.06916027516126633, + "learning_rate": 1.229156160587371e-05, + "loss": 0.5249672532081604, + "step": 7204 + }, + { + "epoch": 1.3313046390502477, + "grad_norm": 0.06406823545694351, + "learning_rate": 1.2289619730313847e-05, + "loss": 0.49783429503440857, + "step": 7205 + }, + { + "epoch": 1.3314894157591435, + "grad_norm": 0.07925626635551453, + "learning_rate": 1.2287677763634278e-05, + "loss": 0.594709038734436, + "step": 7206 + }, + { + "epoch": 1.3316741924680393, + "grad_norm": 0.08192488551139832, + "learning_rate": 1.228573570591229e-05, + "loss": 0.540943443775177, + "step": 7207 + }, + { + "epoch": 1.3318589691769351, + "grad_norm": 0.06959114968776703, + "learning_rate": 1.2283793557225176e-05, + "loss": 0.42747682332992554, + "step": 7208 + }, + { + "epoch": 1.332043745885831, + "grad_norm": 0.062199659645557404, + "learning_rate": 1.228185131765022e-05, + "loss": 0.44615626335144043, + "step": 7209 + }, + { + "epoch": 1.332228522594727, + "grad_norm": 0.06088728830218315, + "learning_rate": 1.2279908987264725e-05, + "loss": 0.3549799919128418, + "step": 7210 + }, + { + "epoch": 1.3324132993036228, + "grad_norm": 0.0674210712313652, + "learning_rate": 1.227796656614598e-05, + "loss": 0.38973909616470337, + "step": 7211 + }, + { + "epoch": 1.3325980760125187, + "grad_norm": 0.055883023887872696, + "learning_rate": 1.2276024054371299e-05, + "loss": 0.3431173861026764, + "step": 7212 + }, + { + "epoch": 1.3327828527214145, + "grad_norm": 0.07490044087171555, + "learning_rate": 1.2274081452017975e-05, + "loss": 0.550331175327301, + "step": 7213 + }, + { + "epoch": 1.3329676294303103, + "grad_norm": 0.0761045292019844, + "learning_rate": 1.2272138759163326e-05, + "loss": 0.5015029311180115, + "step": 7214 + }, + { + "epoch": 1.3331524061392062, + "grad_norm": 0.06385071575641632, + "learning_rate": 1.2270195975884664e-05, + "loss": 0.43876397609710693, + "step": 7215 + }, + { + "epoch": 1.333337182848102, + "grad_norm": 0.07857915759086609, + "learning_rate": 1.2268253102259302e-05, + "loss": 0.5978013873100281, + "step": 7216 + }, + { + "epoch": 1.3335219595569978, + "grad_norm": 0.06934911012649536, + "learning_rate": 1.2266310138364565e-05, + "loss": 0.4774762988090515, + "step": 7217 + }, + { + "epoch": 1.3337067362658936, + "grad_norm": 0.07244524359703064, + "learning_rate": 1.2264367084277778e-05, + "loss": 0.4618462324142456, + "step": 7218 + }, + { + "epoch": 1.3338915129747895, + "grad_norm": 0.09606807678937912, + "learning_rate": 1.226242394007626e-05, + "loss": 0.6047474145889282, + "step": 7219 + }, + { + "epoch": 1.3340762896836853, + "grad_norm": 0.08191752433776855, + "learning_rate": 1.226048070583735e-05, + "loss": 0.5253011584281921, + "step": 7220 + }, + { + "epoch": 1.3342610663925814, + "grad_norm": 0.08500118553638458, + "learning_rate": 1.225853738163838e-05, + "loss": 0.5972819328308105, + "step": 7221 + }, + { + "epoch": 1.3344458431014772, + "grad_norm": 0.08752238750457764, + "learning_rate": 1.2256593967556689e-05, + "loss": 0.5581551790237427, + "step": 7222 + }, + { + "epoch": 1.334630619810373, + "grad_norm": 0.0636662021279335, + "learning_rate": 1.2254650463669614e-05, + "loss": 0.4321964681148529, + "step": 7223 + }, + { + "epoch": 1.3348153965192688, + "grad_norm": 0.07094656676054001, + "learning_rate": 1.225270687005451e-05, + "loss": 0.4596783518791199, + "step": 7224 + }, + { + "epoch": 1.3350001732281647, + "grad_norm": 0.07906907796859741, + "learning_rate": 1.2250763186788717e-05, + "loss": 0.6005190014839172, + "step": 7225 + }, + { + "epoch": 1.3351849499370605, + "grad_norm": 0.06554142385721207, + "learning_rate": 1.2248819413949591e-05, + "loss": 0.4663126468658447, + "step": 7226 + }, + { + "epoch": 1.3353697266459563, + "grad_norm": 0.06856255233287811, + "learning_rate": 1.2246875551614487e-05, + "loss": 0.5557185411453247, + "step": 7227 + }, + { + "epoch": 1.3355545033548522, + "grad_norm": 0.06476157903671265, + "learning_rate": 1.224493159986077e-05, + "loss": 0.5419876575469971, + "step": 7228 + }, + { + "epoch": 1.335739280063748, + "grad_norm": 0.07515493035316467, + "learning_rate": 1.2242987558765798e-05, + "loss": 0.5306958556175232, + "step": 7229 + }, + { + "epoch": 1.3359240567726438, + "grad_norm": 0.07471494376659393, + "learning_rate": 1.2241043428406936e-05, + "loss": 0.4942961037158966, + "step": 7230 + }, + { + "epoch": 1.3361088334815396, + "grad_norm": 0.06926632672548294, + "learning_rate": 1.2239099208861555e-05, + "loss": 0.4492073655128479, + "step": 7231 + }, + { + "epoch": 1.3362936101904355, + "grad_norm": 0.07723340392112732, + "learning_rate": 1.2237154900207036e-05, + "loss": 0.576379120349884, + "step": 7232 + }, + { + "epoch": 1.3364783868993313, + "grad_norm": 0.08155592530965805, + "learning_rate": 1.2235210502520746e-05, + "loss": 0.688007116317749, + "step": 7233 + }, + { + "epoch": 1.3366631636082271, + "grad_norm": 0.07088128477334976, + "learning_rate": 1.2233266015880074e-05, + "loss": 0.48353421688079834, + "step": 7234 + }, + { + "epoch": 1.336847940317123, + "grad_norm": 0.07835712283849716, + "learning_rate": 1.2231321440362402e-05, + "loss": 0.6117390990257263, + "step": 7235 + }, + { + "epoch": 1.3370327170260188, + "grad_norm": 0.08962835371494293, + "learning_rate": 1.2229376776045116e-05, + "loss": 0.5430634617805481, + "step": 7236 + }, + { + "epoch": 1.3372174937349146, + "grad_norm": 0.08353981375694275, + "learning_rate": 1.2227432023005608e-05, + "loss": 0.5032727122306824, + "step": 7237 + }, + { + "epoch": 1.3374022704438104, + "grad_norm": 0.06118007004261017, + "learning_rate": 1.2225487181321278e-05, + "loss": 0.458617627620697, + "step": 7238 + }, + { + "epoch": 1.3375870471527063, + "grad_norm": 0.08352229744195938, + "learning_rate": 1.222354225106952e-05, + "loss": 0.6291279196739197, + "step": 7239 + }, + { + "epoch": 1.3377718238616023, + "grad_norm": 0.08776495605707169, + "learning_rate": 1.2221597232327736e-05, + "loss": 0.617847204208374, + "step": 7240 + }, + { + "epoch": 1.3379566005704981, + "grad_norm": 0.08763931691646576, + "learning_rate": 1.221965212517333e-05, + "loss": 0.6111961603164673, + "step": 7241 + }, + { + "epoch": 1.338141377279394, + "grad_norm": 0.0853385329246521, + "learning_rate": 1.221770692968372e-05, + "loss": 0.626089334487915, + "step": 7242 + }, + { + "epoch": 1.3383261539882898, + "grad_norm": 0.059536758810281754, + "learning_rate": 1.2215761645936307e-05, + "loss": 0.3988431990146637, + "step": 7243 + }, + { + "epoch": 1.3385109306971856, + "grad_norm": 0.05607954412698746, + "learning_rate": 1.2213816274008515e-05, + "loss": 0.33263882994651794, + "step": 7244 + }, + { + "epoch": 1.3386957074060815, + "grad_norm": 0.06977252662181854, + "learning_rate": 1.221187081397776e-05, + "loss": 0.4728195071220398, + "step": 7245 + }, + { + "epoch": 1.3388804841149773, + "grad_norm": 0.08638498932123184, + "learning_rate": 1.2209925265921469e-05, + "loss": 0.5551614761352539, + "step": 7246 + }, + { + "epoch": 1.3390652608238731, + "grad_norm": 0.07965132594108582, + "learning_rate": 1.2207979629917061e-05, + "loss": 0.6850483417510986, + "step": 7247 + }, + { + "epoch": 1.339250037532769, + "grad_norm": 0.11833593249320984, + "learning_rate": 1.2206033906041979e-05, + "loss": 0.7881156802177429, + "step": 7248 + }, + { + "epoch": 1.3394348142416648, + "grad_norm": 0.08182307332754135, + "learning_rate": 1.2204088094373647e-05, + "loss": 0.518099844455719, + "step": 7249 + }, + { + "epoch": 1.3396195909505606, + "grad_norm": 0.09841877222061157, + "learning_rate": 1.22021421949895e-05, + "loss": 0.5551977157592773, + "step": 7250 + }, + { + "epoch": 1.3398043676594567, + "grad_norm": 0.07546274363994598, + "learning_rate": 1.2200196207966988e-05, + "loss": 0.5618906021118164, + "step": 7251 + }, + { + "epoch": 1.3399891443683525, + "grad_norm": 0.09216009825468063, + "learning_rate": 1.2198250133383552e-05, + "loss": 0.7444961071014404, + "step": 7252 + }, + { + "epoch": 1.3401739210772483, + "grad_norm": 0.06232066452503204, + "learning_rate": 1.2196303971316632e-05, + "loss": 0.3908202350139618, + "step": 7253 + }, + { + "epoch": 1.3403586977861441, + "grad_norm": 0.07654942572116852, + "learning_rate": 1.2194357721843689e-05, + "loss": 0.5528795123100281, + "step": 7254 + }, + { + "epoch": 1.34054347449504, + "grad_norm": 0.07393674552440643, + "learning_rate": 1.2192411385042176e-05, + "loss": 0.45518913865089417, + "step": 7255 + }, + { + "epoch": 1.3407282512039358, + "grad_norm": 0.07973166555166245, + "learning_rate": 1.2190464960989546e-05, + "loss": 0.5710092186927795, + "step": 7256 + }, + { + "epoch": 1.3409130279128316, + "grad_norm": 0.09130148589611053, + "learning_rate": 1.2188518449763263e-05, + "loss": 0.59661865234375, + "step": 7257 + }, + { + "epoch": 1.3410978046217275, + "grad_norm": 0.07427915185689926, + "learning_rate": 1.2186571851440793e-05, + "loss": 0.4620944857597351, + "step": 7258 + }, + { + "epoch": 1.3412825813306233, + "grad_norm": 0.08230415731668472, + "learning_rate": 1.2184625166099609e-05, + "loss": 0.5412794947624207, + "step": 7259 + }, + { + "epoch": 1.341467358039519, + "grad_norm": 0.08303827047348022, + "learning_rate": 1.2182678393817168e-05, + "loss": 0.5233322978019714, + "step": 7260 + }, + { + "epoch": 1.341652134748415, + "grad_norm": 0.07536231726408005, + "learning_rate": 1.2180731534670964e-05, + "loss": 0.5114122033119202, + "step": 7261 + }, + { + "epoch": 1.3418369114573108, + "grad_norm": 0.07248736172914505, + "learning_rate": 1.2178784588738468e-05, + "loss": 0.5382394194602966, + "step": 7262 + }, + { + "epoch": 1.3420216881662066, + "grad_norm": 0.07889589667320251, + "learning_rate": 1.2176837556097158e-05, + "loss": 0.5099925994873047, + "step": 7263 + }, + { + "epoch": 1.3422064648751024, + "grad_norm": 0.05722340941429138, + "learning_rate": 1.2174890436824525e-05, + "loss": 0.3894376754760742, + "step": 7264 + }, + { + "epoch": 1.3423912415839983, + "grad_norm": 0.04962385445833206, + "learning_rate": 1.2172943230998058e-05, + "loss": 0.3173448145389557, + "step": 7265 + }, + { + "epoch": 1.342576018292894, + "grad_norm": 0.07014395296573639, + "learning_rate": 1.217099593869525e-05, + "loss": 0.4455048441886902, + "step": 7266 + }, + { + "epoch": 1.34276079500179, + "grad_norm": 0.07519932091236115, + "learning_rate": 1.2169048559993591e-05, + "loss": 0.5243011713027954, + "step": 7267 + }, + { + "epoch": 1.3429455717106857, + "grad_norm": 0.07758233696222305, + "learning_rate": 1.2167101094970588e-05, + "loss": 0.5891153216362, + "step": 7268 + }, + { + "epoch": 1.3431303484195818, + "grad_norm": 0.08783500641584396, + "learning_rate": 1.2165153543703744e-05, + "loss": 0.6760488152503967, + "step": 7269 + }, + { + "epoch": 1.3433151251284776, + "grad_norm": 0.05743502825498581, + "learning_rate": 1.2163205906270558e-05, + "loss": 0.45588070154190063, + "step": 7270 + }, + { + "epoch": 1.3434999018373734, + "grad_norm": 0.07715628296136856, + "learning_rate": 1.2161258182748548e-05, + "loss": 0.5763320922851562, + "step": 7271 + }, + { + "epoch": 1.3436846785462693, + "grad_norm": 0.0772564709186554, + "learning_rate": 1.2159310373215223e-05, + "loss": 0.6143149733543396, + "step": 7272 + }, + { + "epoch": 1.343869455255165, + "grad_norm": 0.07961688190698624, + "learning_rate": 1.21573624777481e-05, + "loss": 0.49570122361183167, + "step": 7273 + }, + { + "epoch": 1.344054231964061, + "grad_norm": 0.06768622994422913, + "learning_rate": 1.21554144964247e-05, + "loss": 0.4177214503288269, + "step": 7274 + }, + { + "epoch": 1.3442390086729568, + "grad_norm": 0.07863906025886536, + "learning_rate": 1.215346642932255e-05, + "loss": 0.5487481355667114, + "step": 7275 + }, + { + "epoch": 1.3444237853818526, + "grad_norm": 0.07579664140939713, + "learning_rate": 1.215151827651917e-05, + "loss": 0.7003390789031982, + "step": 7276 + }, + { + "epoch": 1.3446085620907484, + "grad_norm": 0.07688242942094803, + "learning_rate": 1.214957003809209e-05, + "loss": 0.48274096846580505, + "step": 7277 + }, + { + "epoch": 1.3447933387996442, + "grad_norm": 0.09185263514518738, + "learning_rate": 1.2147621714118856e-05, + "loss": 0.7358651161193848, + "step": 7278 + }, + { + "epoch": 1.34497811550854, + "grad_norm": 0.07346237450838089, + "learning_rate": 1.2145673304676995e-05, + "loss": 0.5424431562423706, + "step": 7279 + }, + { + "epoch": 1.3451628922174361, + "grad_norm": 0.08200640231370926, + "learning_rate": 1.2143724809844046e-05, + "loss": 0.6213791370391846, + "step": 7280 + }, + { + "epoch": 1.345347668926332, + "grad_norm": 0.07538006454706192, + "learning_rate": 1.2141776229697557e-05, + "loss": 0.4409875273704529, + "step": 7281 + }, + { + "epoch": 1.3455324456352278, + "grad_norm": 0.07102343440055847, + "learning_rate": 1.2139827564315077e-05, + "loss": 0.5566208958625793, + "step": 7282 + }, + { + "epoch": 1.3457172223441236, + "grad_norm": 0.07195903360843658, + "learning_rate": 1.213787881377415e-05, + "loss": 0.5001834034919739, + "step": 7283 + }, + { + "epoch": 1.3459019990530194, + "grad_norm": 0.0780748501420021, + "learning_rate": 1.2135929978152339e-05, + "loss": 0.48966068029403687, + "step": 7284 + }, + { + "epoch": 1.3460867757619153, + "grad_norm": 0.06550266593694687, + "learning_rate": 1.2133981057527197e-05, + "loss": 0.5034133195877075, + "step": 7285 + }, + { + "epoch": 1.346271552470811, + "grad_norm": 0.09186110645532608, + "learning_rate": 1.2132032051976285e-05, + "loss": 0.5809505581855774, + "step": 7286 + }, + { + "epoch": 1.346456329179707, + "grad_norm": 0.0906052365899086, + "learning_rate": 1.2130082961577167e-05, + "loss": 0.6895391345024109, + "step": 7287 + }, + { + "epoch": 1.3466411058886028, + "grad_norm": 0.07443145662546158, + "learning_rate": 1.2128133786407413e-05, + "loss": 0.538640022277832, + "step": 7288 + }, + { + "epoch": 1.3468258825974986, + "grad_norm": 0.057054303586483, + "learning_rate": 1.2126184526544591e-05, + "loss": 0.402135968208313, + "step": 7289 + }, + { + "epoch": 1.3470106593063944, + "grad_norm": 0.08881109952926636, + "learning_rate": 1.2124235182066275e-05, + "loss": 0.6084185838699341, + "step": 7290 + }, + { + "epoch": 1.3471954360152902, + "grad_norm": 0.06212284043431282, + "learning_rate": 1.2122285753050047e-05, + "loss": 0.3448404371738434, + "step": 7291 + }, + { + "epoch": 1.347380212724186, + "grad_norm": 0.0877557173371315, + "learning_rate": 1.2120336239573484e-05, + "loss": 0.6045507192611694, + "step": 7292 + }, + { + "epoch": 1.347564989433082, + "grad_norm": 0.07777206599712372, + "learning_rate": 1.2118386641714174e-05, + "loss": 0.6168590188026428, + "step": 7293 + }, + { + "epoch": 1.3477497661419777, + "grad_norm": 0.08838807791471481, + "learning_rate": 1.2116436959549704e-05, + "loss": 0.607566773891449, + "step": 7294 + }, + { + "epoch": 1.3479345428508736, + "grad_norm": 0.07736007124185562, + "learning_rate": 1.2114487193157663e-05, + "loss": 0.5946328639984131, + "step": 7295 + }, + { + "epoch": 1.3481193195597694, + "grad_norm": 0.07302294671535492, + "learning_rate": 1.2112537342615646e-05, + "loss": 0.45372599363327026, + "step": 7296 + }, + { + "epoch": 1.3483040962686652, + "grad_norm": 0.10016177594661713, + "learning_rate": 1.2110587408001256e-05, + "loss": 0.5879311561584473, + "step": 7297 + }, + { + "epoch": 1.3484888729775613, + "grad_norm": 0.08851506561040878, + "learning_rate": 1.2108637389392087e-05, + "loss": 0.6472713947296143, + "step": 7298 + }, + { + "epoch": 1.348673649686457, + "grad_norm": 0.06454683095216751, + "learning_rate": 1.2106687286865748e-05, + "loss": 0.4215412437915802, + "step": 7299 + }, + { + "epoch": 1.348858426395353, + "grad_norm": 0.07148120552301407, + "learning_rate": 1.2104737100499843e-05, + "loss": 0.6257513761520386, + "step": 7300 + }, + { + "epoch": 1.3490432031042487, + "grad_norm": 0.06810782104730606, + "learning_rate": 1.210278683037199e-05, + "loss": 0.31949469447135925, + "step": 7301 + }, + { + "epoch": 1.3492279798131446, + "grad_norm": 0.07105056196451187, + "learning_rate": 1.2100836476559799e-05, + "loss": 0.4729495942592621, + "step": 7302 + }, + { + "epoch": 1.3494127565220404, + "grad_norm": 0.06415511667728424, + "learning_rate": 1.209888603914089e-05, + "loss": 0.41198405623435974, + "step": 7303 + }, + { + "epoch": 1.3495975332309362, + "grad_norm": 0.05516704544425011, + "learning_rate": 1.2096935518192883e-05, + "loss": 0.35060352087020874, + "step": 7304 + }, + { + "epoch": 1.349782309939832, + "grad_norm": 0.08884477615356445, + "learning_rate": 1.2094984913793399e-05, + "loss": 0.6026931405067444, + "step": 7305 + }, + { + "epoch": 1.3499670866487279, + "grad_norm": 0.05907578393816948, + "learning_rate": 1.2093034226020073e-05, + "loss": 0.4430491328239441, + "step": 7306 + }, + { + "epoch": 1.3501518633576237, + "grad_norm": 0.08960863947868347, + "learning_rate": 1.2091083454950534e-05, + "loss": 0.660787045955658, + "step": 7307 + }, + { + "epoch": 1.3503366400665195, + "grad_norm": 0.08193490654230118, + "learning_rate": 1.2089132600662412e-05, + "loss": 0.62440025806427, + "step": 7308 + }, + { + "epoch": 1.3505214167754156, + "grad_norm": 0.06207743287086487, + "learning_rate": 1.2087181663233354e-05, + "loss": 0.3066102862358093, + "step": 7309 + }, + { + "epoch": 1.3507061934843114, + "grad_norm": 0.07043956220149994, + "learning_rate": 1.208523064274099e-05, + "loss": 0.4655773341655731, + "step": 7310 + }, + { + "epoch": 1.3508909701932073, + "grad_norm": 0.07935937494039536, + "learning_rate": 1.2083279539262976e-05, + "loss": 0.5760067105293274, + "step": 7311 + }, + { + "epoch": 1.351075746902103, + "grad_norm": 0.06078021600842476, + "learning_rate": 1.2081328352876949e-05, + "loss": 0.45882537961006165, + "step": 7312 + }, + { + "epoch": 1.351260523610999, + "grad_norm": 0.08058591932058334, + "learning_rate": 1.2079377083660565e-05, + "loss": 0.5471338629722595, + "step": 7313 + }, + { + "epoch": 1.3514453003198947, + "grad_norm": 0.07964106649160385, + "learning_rate": 1.2077425731691484e-05, + "loss": 0.5816575884819031, + "step": 7314 + }, + { + "epoch": 1.3516300770287906, + "grad_norm": 0.07928101718425751, + "learning_rate": 1.2075474297047353e-05, + "loss": 0.5275821089744568, + "step": 7315 + }, + { + "epoch": 1.3518148537376864, + "grad_norm": 0.07923033088445663, + "learning_rate": 1.207352277980584e-05, + "loss": 0.5912119150161743, + "step": 7316 + }, + { + "epoch": 1.3519996304465822, + "grad_norm": 0.07181008905172348, + "learning_rate": 1.207157118004461e-05, + "loss": 0.4410319924354553, + "step": 7317 + }, + { + "epoch": 1.352184407155478, + "grad_norm": 0.06785266846418381, + "learning_rate": 1.2069619497841327e-05, + "loss": 0.4814789593219757, + "step": 7318 + }, + { + "epoch": 1.3523691838643739, + "grad_norm": 0.0755409523844719, + "learning_rate": 1.2067667733273662e-05, + "loss": 0.5103818774223328, + "step": 7319 + }, + { + "epoch": 1.3525539605732697, + "grad_norm": 0.08887257426977158, + "learning_rate": 1.206571588641929e-05, + "loss": 0.6387781500816345, + "step": 7320 + }, + { + "epoch": 1.3527387372821655, + "grad_norm": 0.0847201719880104, + "learning_rate": 1.206376395735589e-05, + "loss": 0.5143601894378662, + "step": 7321 + }, + { + "epoch": 1.3529235139910614, + "grad_norm": 0.062031347304582596, + "learning_rate": 1.2061811946161137e-05, + "loss": 0.3705389201641083, + "step": 7322 + }, + { + "epoch": 1.3531082906999572, + "grad_norm": 0.07319609075784683, + "learning_rate": 1.2059859852912724e-05, + "loss": 0.546544075012207, + "step": 7323 + }, + { + "epoch": 1.353293067408853, + "grad_norm": 0.08957704901695251, + "learning_rate": 1.205790767768833e-05, + "loss": 0.607306182384491, + "step": 7324 + }, + { + "epoch": 1.3534778441177489, + "grad_norm": 0.08340125530958176, + "learning_rate": 1.2055955420565651e-05, + "loss": 0.5507020950317383, + "step": 7325 + }, + { + "epoch": 1.3536626208266447, + "grad_norm": 0.07087838649749756, + "learning_rate": 1.2054003081622377e-05, + "loss": 0.5279867649078369, + "step": 7326 + }, + { + "epoch": 1.3538473975355405, + "grad_norm": 0.048319268971681595, + "learning_rate": 1.2052050660936208e-05, + "loss": 0.30317071080207825, + "step": 7327 + }, + { + "epoch": 1.3540321742444366, + "grad_norm": 0.08193907141685486, + "learning_rate": 1.2050098158584842e-05, + "loss": 0.6641421318054199, + "step": 7328 + }, + { + "epoch": 1.3542169509533324, + "grad_norm": 0.0873681902885437, + "learning_rate": 1.2048145574645985e-05, + "loss": 0.5591307878494263, + "step": 7329 + }, + { + "epoch": 1.3544017276622282, + "grad_norm": 0.0727614164352417, + "learning_rate": 1.2046192909197339e-05, + "loss": 0.5091972351074219, + "step": 7330 + }, + { + "epoch": 1.354586504371124, + "grad_norm": 0.08346467465162277, + "learning_rate": 1.2044240162316619e-05, + "loss": 0.47075793147087097, + "step": 7331 + }, + { + "epoch": 1.3547712810800199, + "grad_norm": 0.06700126826763153, + "learning_rate": 1.2042287334081532e-05, + "loss": 0.5636169910430908, + "step": 7332 + }, + { + "epoch": 1.3549560577889157, + "grad_norm": 0.05685332417488098, + "learning_rate": 1.2040334424569802e-05, + "loss": 0.34867823123931885, + "step": 7333 + }, + { + "epoch": 1.3551408344978115, + "grad_norm": 0.06659932434558868, + "learning_rate": 1.2038381433859145e-05, + "loss": 0.42646080255508423, + "step": 7334 + }, + { + "epoch": 1.3553256112067074, + "grad_norm": 0.08106103539466858, + "learning_rate": 1.2036428362027288e-05, + "loss": 0.5651917457580566, + "step": 7335 + }, + { + "epoch": 1.3555103879156032, + "grad_norm": 0.08953486382961273, + "learning_rate": 1.2034475209151945e-05, + "loss": 0.6413703560829163, + "step": 7336 + }, + { + "epoch": 1.355695164624499, + "grad_norm": 0.09362461417913437, + "learning_rate": 1.203252197531086e-05, + "loss": 0.7097888588905334, + "step": 7337 + }, + { + "epoch": 1.3558799413333948, + "grad_norm": 0.09100347012281418, + "learning_rate": 1.203056866058176e-05, + "loss": 0.5439901351928711, + "step": 7338 + }, + { + "epoch": 1.356064718042291, + "grad_norm": 0.07210123538970947, + "learning_rate": 1.2028615265042375e-05, + "loss": 0.5213744044303894, + "step": 7339 + }, + { + "epoch": 1.3562494947511867, + "grad_norm": 0.0927305668592453, + "learning_rate": 1.2026661788770453e-05, + "loss": 0.5957270264625549, + "step": 7340 + }, + { + "epoch": 1.3564342714600826, + "grad_norm": 0.06996115297079086, + "learning_rate": 1.2024708231843731e-05, + "loss": 0.4633581042289734, + "step": 7341 + }, + { + "epoch": 1.3566190481689784, + "grad_norm": 0.09780874103307724, + "learning_rate": 1.2022754594339956e-05, + "loss": 0.6641948819160461, + "step": 7342 + }, + { + "epoch": 1.3568038248778742, + "grad_norm": 0.07791966199874878, + "learning_rate": 1.2020800876336877e-05, + "loss": 0.5625864863395691, + "step": 7343 + }, + { + "epoch": 1.35698860158677, + "grad_norm": 0.09549916535615921, + "learning_rate": 1.2018847077912246e-05, + "loss": 0.7063486576080322, + "step": 7344 + }, + { + "epoch": 1.3571733782956659, + "grad_norm": 0.07231148332357407, + "learning_rate": 1.2016893199143818e-05, + "loss": 0.48578956723213196, + "step": 7345 + }, + { + "epoch": 1.3573581550045617, + "grad_norm": 0.0838562548160553, + "learning_rate": 1.2014939240109347e-05, + "loss": 0.5130182504653931, + "step": 7346 + }, + { + "epoch": 1.3575429317134575, + "grad_norm": 0.09551063925027847, + "learning_rate": 1.2012985200886602e-05, + "loss": 0.6210164427757263, + "step": 7347 + }, + { + "epoch": 1.3577277084223534, + "grad_norm": 0.08454853296279907, + "learning_rate": 1.2011031081553344e-05, + "loss": 0.7053744196891785, + "step": 7348 + }, + { + "epoch": 1.3579124851312492, + "grad_norm": 0.07290687412023544, + "learning_rate": 1.2009076882187338e-05, + "loss": 0.5396966338157654, + "step": 7349 + }, + { + "epoch": 1.358097261840145, + "grad_norm": 0.05749298259615898, + "learning_rate": 1.2007122602866357e-05, + "loss": 0.3489977717399597, + "step": 7350 + }, + { + "epoch": 1.3582820385490408, + "grad_norm": 0.06704192608594894, + "learning_rate": 1.200516824366818e-05, + "loss": 0.3745604157447815, + "step": 7351 + }, + { + "epoch": 1.3584668152579367, + "grad_norm": 0.08380575478076935, + "learning_rate": 1.2003213804670578e-05, + "loss": 0.4638144373893738, + "step": 7352 + }, + { + "epoch": 1.3586515919668325, + "grad_norm": 0.07318996638059616, + "learning_rate": 1.2001259285951333e-05, + "loss": 0.500698983669281, + "step": 7353 + }, + { + "epoch": 1.3588363686757283, + "grad_norm": 0.0672593042254448, + "learning_rate": 1.199930468758823e-05, + "loss": 0.45993903279304504, + "step": 7354 + }, + { + "epoch": 1.3590211453846242, + "grad_norm": 0.06959978491067886, + "learning_rate": 1.1997350009659057e-05, + "loss": 0.48005440831184387, + "step": 7355 + }, + { + "epoch": 1.35920592209352, + "grad_norm": 0.06343521922826767, + "learning_rate": 1.1995395252241599e-05, + "loss": 0.38496604561805725, + "step": 7356 + }, + { + "epoch": 1.359390698802416, + "grad_norm": 0.07417965680360794, + "learning_rate": 1.1993440415413655e-05, + "loss": 0.46091029047966003, + "step": 7357 + }, + { + "epoch": 1.3595754755113119, + "grad_norm": 0.06469357013702393, + "learning_rate": 1.1991485499253021e-05, + "loss": 0.46245938539505005, + "step": 7358 + }, + { + "epoch": 1.3597602522202077, + "grad_norm": 0.08116722851991653, + "learning_rate": 1.1989530503837492e-05, + "loss": 0.5595096349716187, + "step": 7359 + }, + { + "epoch": 1.3599450289291035, + "grad_norm": 0.058397118002176285, + "learning_rate": 1.1987575429244873e-05, + "loss": 0.37003904581069946, + "step": 7360 + }, + { + "epoch": 1.3601298056379993, + "grad_norm": 0.0854644626379013, + "learning_rate": 1.1985620275552974e-05, + "loss": 0.6662513613700867, + "step": 7361 + }, + { + "epoch": 1.3603145823468952, + "grad_norm": 0.0821366086602211, + "learning_rate": 1.1983665042839597e-05, + "loss": 0.4664044678211212, + "step": 7362 + }, + { + "epoch": 1.360499359055791, + "grad_norm": 0.06795128434896469, + "learning_rate": 1.1981709731182557e-05, + "loss": 0.5029423832893372, + "step": 7363 + }, + { + "epoch": 1.3606841357646868, + "grad_norm": 0.06815291196107864, + "learning_rate": 1.1979754340659673e-05, + "loss": 0.4743029475212097, + "step": 7364 + }, + { + "epoch": 1.3608689124735827, + "grad_norm": 0.05607020482420921, + "learning_rate": 1.1977798871348758e-05, + "loss": 0.3464440107345581, + "step": 7365 + }, + { + "epoch": 1.3610536891824785, + "grad_norm": 0.07072314620018005, + "learning_rate": 1.1975843323327634e-05, + "loss": 0.5013730525970459, + "step": 7366 + }, + { + "epoch": 1.3612384658913743, + "grad_norm": 0.07554870843887329, + "learning_rate": 1.197388769667413e-05, + "loss": 0.5226201415061951, + "step": 7367 + }, + { + "epoch": 1.3614232426002704, + "grad_norm": 0.07110065221786499, + "learning_rate": 1.197193199146607e-05, + "loss": 0.445374071598053, + "step": 7368 + }, + { + "epoch": 1.3616080193091662, + "grad_norm": 0.0752422958612442, + "learning_rate": 1.1969976207781287e-05, + "loss": 0.49761611223220825, + "step": 7369 + }, + { + "epoch": 1.361792796018062, + "grad_norm": 0.07382792234420776, + "learning_rate": 1.1968020345697613e-05, + "loss": 0.5077685713768005, + "step": 7370 + }, + { + "epoch": 1.3619775727269579, + "grad_norm": 0.06980964541435242, + "learning_rate": 1.1966064405292887e-05, + "loss": 0.44122546911239624, + "step": 7371 + }, + { + "epoch": 1.3621623494358537, + "grad_norm": 0.08900794386863708, + "learning_rate": 1.1964108386644948e-05, + "loss": 0.6478540301322937, + "step": 7372 + }, + { + "epoch": 1.3623471261447495, + "grad_norm": 0.08322156220674515, + "learning_rate": 1.196215228983164e-05, + "loss": 0.645605206489563, + "step": 7373 + }, + { + "epoch": 1.3625319028536453, + "grad_norm": 0.06156010180711746, + "learning_rate": 1.196019611493081e-05, + "loss": 0.4276708960533142, + "step": 7374 + }, + { + "epoch": 1.3627166795625412, + "grad_norm": 0.08435390144586563, + "learning_rate": 1.1958239862020311e-05, + "loss": 0.5966938734054565, + "step": 7375 + }, + { + "epoch": 1.362901456271437, + "grad_norm": 0.05743694305419922, + "learning_rate": 1.1956283531177986e-05, + "loss": 0.36740046739578247, + "step": 7376 + }, + { + "epoch": 1.3630862329803328, + "grad_norm": 0.0730554461479187, + "learning_rate": 1.19543271224817e-05, + "loss": 0.5683557987213135, + "step": 7377 + }, + { + "epoch": 1.3632710096892287, + "grad_norm": 0.055050771683454514, + "learning_rate": 1.1952370636009309e-05, + "loss": 0.3631197512149811, + "step": 7378 + }, + { + "epoch": 1.3634557863981245, + "grad_norm": 0.047246962785720825, + "learning_rate": 1.1950414071838673e-05, + "loss": 0.34128281474113464, + "step": 7379 + }, + { + "epoch": 1.3636405631070203, + "grad_norm": 0.06924453377723694, + "learning_rate": 1.194845743004766e-05, + "loss": 0.38558104634284973, + "step": 7380 + }, + { + "epoch": 1.3638253398159161, + "grad_norm": 0.06759911775588989, + "learning_rate": 1.1946500710714138e-05, + "loss": 0.4166731536388397, + "step": 7381 + }, + { + "epoch": 1.364010116524812, + "grad_norm": 0.08717620372772217, + "learning_rate": 1.1944543913915976e-05, + "loss": 0.6282873153686523, + "step": 7382 + }, + { + "epoch": 1.3641948932337078, + "grad_norm": 0.07526937872171402, + "learning_rate": 1.1942587039731052e-05, + "loss": 0.470471054315567, + "step": 7383 + }, + { + "epoch": 1.3643796699426036, + "grad_norm": 0.09385870397090912, + "learning_rate": 1.1940630088237239e-05, + "loss": 0.6459283232688904, + "step": 7384 + }, + { + "epoch": 1.3645644466514995, + "grad_norm": 0.05255207419395447, + "learning_rate": 1.1938673059512422e-05, + "loss": 0.317013144493103, + "step": 7385 + }, + { + "epoch": 1.3647492233603955, + "grad_norm": 0.07780256122350693, + "learning_rate": 1.1936715953634481e-05, + "loss": 0.5451396107673645, + "step": 7386 + }, + { + "epoch": 1.3649340000692913, + "grad_norm": 0.0779600441455841, + "learning_rate": 1.1934758770681306e-05, + "loss": 0.5497245192527771, + "step": 7387 + }, + { + "epoch": 1.3651187767781872, + "grad_norm": 0.08268772065639496, + "learning_rate": 1.1932801510730785e-05, + "loss": 0.5683483481407166, + "step": 7388 + }, + { + "epoch": 1.365303553487083, + "grad_norm": 0.09884671121835709, + "learning_rate": 1.193084417386081e-05, + "loss": 0.5287616848945618, + "step": 7389 + }, + { + "epoch": 1.3654883301959788, + "grad_norm": 0.08031193912029266, + "learning_rate": 1.1928886760149277e-05, + "loss": 0.49728962779045105, + "step": 7390 + }, + { + "epoch": 1.3656731069048746, + "grad_norm": 0.07396135479211807, + "learning_rate": 1.1926929269674086e-05, + "loss": 0.4677727520465851, + "step": 7391 + }, + { + "epoch": 1.3658578836137705, + "grad_norm": 0.06157707795500755, + "learning_rate": 1.1924971702513137e-05, + "loss": 0.3914041817188263, + "step": 7392 + }, + { + "epoch": 1.3660426603226663, + "grad_norm": 0.0752425342798233, + "learning_rate": 1.1923014058744343e-05, + "loss": 0.5061258673667908, + "step": 7393 + }, + { + "epoch": 1.3662274370315621, + "grad_norm": 0.08719919621944427, + "learning_rate": 1.1921056338445599e-05, + "loss": 0.5727595686912537, + "step": 7394 + }, + { + "epoch": 1.366412213740458, + "grad_norm": 0.08396576344966888, + "learning_rate": 1.1919098541694828e-05, + "loss": 0.6433411836624146, + "step": 7395 + }, + { + "epoch": 1.3665969904493538, + "grad_norm": 0.07268782705068588, + "learning_rate": 1.1917140668569933e-05, + "loss": 0.4731309413909912, + "step": 7396 + }, + { + "epoch": 1.3667817671582498, + "grad_norm": 0.059807468205690384, + "learning_rate": 1.1915182719148841e-05, + "loss": 0.333195298910141, + "step": 7397 + }, + { + "epoch": 1.3669665438671457, + "grad_norm": 0.07601609826087952, + "learning_rate": 1.191322469350947e-05, + "loss": 0.43810439109802246, + "step": 7398 + }, + { + "epoch": 1.3671513205760415, + "grad_norm": 0.061796288937330246, + "learning_rate": 1.191126659172974e-05, + "loss": 0.43337252736091614, + "step": 7399 + }, + { + "epoch": 1.3673360972849373, + "grad_norm": 0.07230713963508606, + "learning_rate": 1.1909308413887579e-05, + "loss": 0.4834917485713959, + "step": 7400 + }, + { + "epoch": 1.3675208739938332, + "grad_norm": 0.08036571741104126, + "learning_rate": 1.1907350160060918e-05, + "loss": 0.4740179777145386, + "step": 7401 + }, + { + "epoch": 1.367705650702729, + "grad_norm": 0.061237361282110214, + "learning_rate": 1.1905391830327685e-05, + "loss": 0.39236539602279663, + "step": 7402 + }, + { + "epoch": 1.3678904274116248, + "grad_norm": 0.09754402190446854, + "learning_rate": 1.1903433424765822e-05, + "loss": 0.6992279291152954, + "step": 7403 + }, + { + "epoch": 1.3680752041205206, + "grad_norm": 0.07044808566570282, + "learning_rate": 1.1901474943453262e-05, + "loss": 0.6163637638092041, + "step": 7404 + }, + { + "epoch": 1.3682599808294165, + "grad_norm": 0.07580169290304184, + "learning_rate": 1.189951638646795e-05, + "loss": 0.5042867660522461, + "step": 7405 + }, + { + "epoch": 1.3684447575383123, + "grad_norm": 0.07228293269872665, + "learning_rate": 1.1897557753887826e-05, + "loss": 0.6257280111312866, + "step": 7406 + }, + { + "epoch": 1.3686295342472081, + "grad_norm": 0.0661831870675087, + "learning_rate": 1.189559904579084e-05, + "loss": 0.4398761987686157, + "step": 7407 + }, + { + "epoch": 1.368814310956104, + "grad_norm": 0.08952990919351578, + "learning_rate": 1.1893640262254946e-05, + "loss": 0.6264430284500122, + "step": 7408 + }, + { + "epoch": 1.3689990876649998, + "grad_norm": 0.0661187618970871, + "learning_rate": 1.189168140335809e-05, + "loss": 0.37947767972946167, + "step": 7409 + }, + { + "epoch": 1.3691838643738956, + "grad_norm": 0.09342033416032791, + "learning_rate": 1.1889722469178235e-05, + "loss": 0.6840120553970337, + "step": 7410 + }, + { + "epoch": 1.3693686410827914, + "grad_norm": 0.06493902951478958, + "learning_rate": 1.1887763459793335e-05, + "loss": 0.4494801163673401, + "step": 7411 + }, + { + "epoch": 1.3695534177916873, + "grad_norm": 0.07095891237258911, + "learning_rate": 1.1885804375281357e-05, + "loss": 0.39049607515335083, + "step": 7412 + }, + { + "epoch": 1.369738194500583, + "grad_norm": 0.08437249809503555, + "learning_rate": 1.1883845215720267e-05, + "loss": 0.5024149417877197, + "step": 7413 + }, + { + "epoch": 1.369922971209479, + "grad_norm": 0.08498772233724594, + "learning_rate": 1.1881885981188029e-05, + "loss": 0.5656739473342896, + "step": 7414 + }, + { + "epoch": 1.3701077479183748, + "grad_norm": 0.07123856991529465, + "learning_rate": 1.1879926671762619e-05, + "loss": 0.4553503692150116, + "step": 7415 + }, + { + "epoch": 1.3702925246272708, + "grad_norm": 0.09772376716136932, + "learning_rate": 1.1877967287522005e-05, + "loss": 0.725156307220459, + "step": 7416 + }, + { + "epoch": 1.3704773013361666, + "grad_norm": 0.07861115038394928, + "learning_rate": 1.1876007828544172e-05, + "loss": 0.5879685878753662, + "step": 7417 + }, + { + "epoch": 1.3706620780450625, + "grad_norm": 0.07092233002185822, + "learning_rate": 1.1874048294907094e-05, + "loss": 0.4736587405204773, + "step": 7418 + }, + { + "epoch": 1.3708468547539583, + "grad_norm": 0.049529027193784714, + "learning_rate": 1.1872088686688758e-05, + "loss": 0.3412547707557678, + "step": 7419 + }, + { + "epoch": 1.3710316314628541, + "grad_norm": 0.06565441191196442, + "learning_rate": 1.187012900396715e-05, + "loss": 0.4283704459667206, + "step": 7420 + }, + { + "epoch": 1.37121640817175, + "grad_norm": 0.06818858534097672, + "learning_rate": 1.1868169246820259e-05, + "loss": 0.5220703482627869, + "step": 7421 + }, + { + "epoch": 1.3714011848806458, + "grad_norm": 0.07601748406887054, + "learning_rate": 1.1866209415326073e-05, + "loss": 0.5260892510414124, + "step": 7422 + }, + { + "epoch": 1.3715859615895416, + "grad_norm": 0.07375529408454895, + "learning_rate": 1.1864249509562595e-05, + "loss": 0.4407345950603485, + "step": 7423 + }, + { + "epoch": 1.3717707382984374, + "grad_norm": 0.06595490127801895, + "learning_rate": 1.186228952960782e-05, + "loss": 0.3844723701477051, + "step": 7424 + }, + { + "epoch": 1.3719555150073333, + "grad_norm": 0.06893607974052429, + "learning_rate": 1.1860329475539745e-05, + "loss": 0.48252663016319275, + "step": 7425 + }, + { + "epoch": 1.372140291716229, + "grad_norm": 0.06890595704317093, + "learning_rate": 1.1858369347436376e-05, + "loss": 0.4432915151119232, + "step": 7426 + }, + { + "epoch": 1.3723250684251251, + "grad_norm": 0.061962999403476715, + "learning_rate": 1.1856409145375724e-05, + "loss": 0.38078954815864563, + "step": 7427 + }, + { + "epoch": 1.372509845134021, + "grad_norm": 0.060137588530778885, + "learning_rate": 1.1854448869435796e-05, + "loss": 0.39205625653266907, + "step": 7428 + }, + { + "epoch": 1.3726946218429168, + "grad_norm": 0.08822615444660187, + "learning_rate": 1.1852488519694601e-05, + "loss": 0.6523511409759521, + "step": 7429 + }, + { + "epoch": 1.3728793985518126, + "grad_norm": 0.07423841953277588, + "learning_rate": 1.1850528096230162e-05, + "loss": 0.5378643274307251, + "step": 7430 + }, + { + "epoch": 1.3730641752607085, + "grad_norm": 0.08107765018939972, + "learning_rate": 1.1848567599120493e-05, + "loss": 0.511623203754425, + "step": 7431 + }, + { + "epoch": 1.3732489519696043, + "grad_norm": 0.0851687341928482, + "learning_rate": 1.1846607028443617e-05, + "loss": 0.5813806056976318, + "step": 7432 + }, + { + "epoch": 1.3734337286785, + "grad_norm": 0.07867684960365295, + "learning_rate": 1.184464638427756e-05, + "loss": 0.7225265502929688, + "step": 7433 + }, + { + "epoch": 1.373618505387396, + "grad_norm": 0.07499439269304276, + "learning_rate": 1.184268566670035e-05, + "loss": 0.4679722785949707, + "step": 7434 + }, + { + "epoch": 1.3738032820962918, + "grad_norm": 0.08448562771081924, + "learning_rate": 1.1840724875790011e-05, + "loss": 0.662882924079895, + "step": 7435 + }, + { + "epoch": 1.3739880588051876, + "grad_norm": 0.05104814097285271, + "learning_rate": 1.1838764011624581e-05, + "loss": 0.320466011762619, + "step": 7436 + }, + { + "epoch": 1.3741728355140834, + "grad_norm": 0.07123497128486633, + "learning_rate": 1.1836803074282099e-05, + "loss": 0.5260931253433228, + "step": 7437 + }, + { + "epoch": 1.3743576122229793, + "grad_norm": 0.07810332626104355, + "learning_rate": 1.18348420638406e-05, + "loss": 0.5477177500724792, + "step": 7438 + }, + { + "epoch": 1.374542388931875, + "grad_norm": 0.07277029752731323, + "learning_rate": 1.1832880980378126e-05, + "loss": 0.5091302394866943, + "step": 7439 + }, + { + "epoch": 1.374727165640771, + "grad_norm": 0.06402041763067245, + "learning_rate": 1.1830919823972727e-05, + "loss": 0.3213406205177307, + "step": 7440 + }, + { + "epoch": 1.3749119423496667, + "grad_norm": 0.06970551609992981, + "learning_rate": 1.1828958594702444e-05, + "loss": 0.4358813464641571, + "step": 7441 + }, + { + "epoch": 1.3750967190585626, + "grad_norm": 0.0727848932147026, + "learning_rate": 1.1826997292645328e-05, + "loss": 0.4688945412635803, + "step": 7442 + }, + { + "epoch": 1.3752814957674584, + "grad_norm": 0.0911780372262001, + "learning_rate": 1.1825035917879442e-05, + "loss": 0.7580387592315674, + "step": 7443 + }, + { + "epoch": 1.3754662724763542, + "grad_norm": 0.08887773007154465, + "learning_rate": 1.1823074470482835e-05, + "loss": 0.5618323087692261, + "step": 7444 + }, + { + "epoch": 1.3756510491852503, + "grad_norm": 0.06911130249500275, + "learning_rate": 1.1821112950533564e-05, + "loss": 0.36385273933410645, + "step": 7445 + }, + { + "epoch": 1.375835825894146, + "grad_norm": 0.07710594683885574, + "learning_rate": 1.1819151358109697e-05, + "loss": 0.5176555514335632, + "step": 7446 + }, + { + "epoch": 1.376020602603042, + "grad_norm": 0.07961034774780273, + "learning_rate": 1.1817189693289299e-05, + "loss": 0.48502466082572937, + "step": 7447 + }, + { + "epoch": 1.3762053793119378, + "grad_norm": 0.08120684325695038, + "learning_rate": 1.1815227956150434e-05, + "loss": 0.5393785834312439, + "step": 7448 + }, + { + "epoch": 1.3763901560208336, + "grad_norm": 0.09085174649953842, + "learning_rate": 1.1813266146771178e-05, + "loss": 0.6135912537574768, + "step": 7449 + }, + { + "epoch": 1.3765749327297294, + "grad_norm": 0.07598740607500076, + "learning_rate": 1.1811304265229601e-05, + "loss": 0.5072231292724609, + "step": 7450 + }, + { + "epoch": 1.3767597094386252, + "grad_norm": 0.09174876660108566, + "learning_rate": 1.1809342311603784e-05, + "loss": 0.6139599084854126, + "step": 7451 + }, + { + "epoch": 1.376944486147521, + "grad_norm": 0.07367060333490372, + "learning_rate": 1.1807380285971796e-05, + "loss": 0.4490894675254822, + "step": 7452 + }, + { + "epoch": 1.377129262856417, + "grad_norm": 0.07609044760465622, + "learning_rate": 1.1805418188411735e-05, + "loss": 0.5755985379219055, + "step": 7453 + }, + { + "epoch": 1.3773140395653127, + "grad_norm": 0.06809990108013153, + "learning_rate": 1.1803456019001678e-05, + "loss": 0.4108802378177643, + "step": 7454 + }, + { + "epoch": 1.3774988162742086, + "grad_norm": 0.07754744589328766, + "learning_rate": 1.180149377781971e-05, + "loss": 0.5679728388786316, + "step": 7455 + }, + { + "epoch": 1.3776835929831046, + "grad_norm": 0.07412713766098022, + "learning_rate": 1.1799531464943926e-05, + "loss": 0.5606153011322021, + "step": 7456 + }, + { + "epoch": 1.3778683696920004, + "grad_norm": 0.05983395874500275, + "learning_rate": 1.1797569080452423e-05, + "loss": 0.4484902620315552, + "step": 7457 + }, + { + "epoch": 1.3780531464008963, + "grad_norm": 0.06855115294456482, + "learning_rate": 1.179560662442329e-05, + "loss": 0.3876204192638397, + "step": 7458 + }, + { + "epoch": 1.378237923109792, + "grad_norm": 0.08249981701374054, + "learning_rate": 1.1793644096934634e-05, + "loss": 0.5886198878288269, + "step": 7459 + }, + { + "epoch": 1.378422699818688, + "grad_norm": 0.09763695299625397, + "learning_rate": 1.1791681498064554e-05, + "loss": 0.5948026776313782, + "step": 7460 + }, + { + "epoch": 1.3786074765275838, + "grad_norm": 0.06549713760614395, + "learning_rate": 1.1789718827891157e-05, + "loss": 0.3910665214061737, + "step": 7461 + }, + { + "epoch": 1.3787922532364796, + "grad_norm": 0.07282456755638123, + "learning_rate": 1.1787756086492546e-05, + "loss": 0.5207515358924866, + "step": 7462 + }, + { + "epoch": 1.3789770299453754, + "grad_norm": 0.08371993154287338, + "learning_rate": 1.178579327394684e-05, + "loss": 0.507247269153595, + "step": 7463 + }, + { + "epoch": 1.3791618066542712, + "grad_norm": 0.0790616124868393, + "learning_rate": 1.178383039033215e-05, + "loss": 0.5724804401397705, + "step": 7464 + }, + { + "epoch": 1.379346583363167, + "grad_norm": 0.07022110372781754, + "learning_rate": 1.1781867435726587e-05, + "loss": 0.4332163631916046, + "step": 7465 + }, + { + "epoch": 1.379531360072063, + "grad_norm": 0.07322531193494797, + "learning_rate": 1.1779904410208276e-05, + "loss": 0.48209553956985474, + "step": 7466 + }, + { + "epoch": 1.3797161367809587, + "grad_norm": 0.07616209983825684, + "learning_rate": 1.177794131385534e-05, + "loss": 0.5288639068603516, + "step": 7467 + }, + { + "epoch": 1.3799009134898546, + "grad_norm": 0.08086004108190536, + "learning_rate": 1.1775978146745899e-05, + "loss": 0.47018125653266907, + "step": 7468 + }, + { + "epoch": 1.3800856901987504, + "grad_norm": 0.08034881204366684, + "learning_rate": 1.1774014908958085e-05, + "loss": 0.48584097623825073, + "step": 7469 + }, + { + "epoch": 1.3802704669076462, + "grad_norm": 0.07475937157869339, + "learning_rate": 1.1772051600570032e-05, + "loss": 0.5507193207740784, + "step": 7470 + }, + { + "epoch": 1.380455243616542, + "grad_norm": 0.0697658360004425, + "learning_rate": 1.1770088221659865e-05, + "loss": 0.57442706823349, + "step": 7471 + }, + { + "epoch": 1.3806400203254379, + "grad_norm": 0.10161164402961731, + "learning_rate": 1.1768124772305724e-05, + "loss": 0.8037537336349487, + "step": 7472 + }, + { + "epoch": 1.3808247970343337, + "grad_norm": 0.07438277453184128, + "learning_rate": 1.1766161252585751e-05, + "loss": 0.45627665519714355, + "step": 7473 + }, + { + "epoch": 1.3810095737432297, + "grad_norm": 0.07278723269701004, + "learning_rate": 1.1764197662578087e-05, + "loss": 0.543790340423584, + "step": 7474 + }, + { + "epoch": 1.3811943504521256, + "grad_norm": 0.08033698797225952, + "learning_rate": 1.1762234002360873e-05, + "loss": 0.5407695174217224, + "step": 7475 + }, + { + "epoch": 1.3813791271610214, + "grad_norm": 0.08345130831003189, + "learning_rate": 1.176027027201226e-05, + "loss": 0.6017276644706726, + "step": 7476 + }, + { + "epoch": 1.3815639038699172, + "grad_norm": 0.06852320581674576, + "learning_rate": 1.1758306471610397e-05, + "loss": 0.4384315609931946, + "step": 7477 + }, + { + "epoch": 1.381748680578813, + "grad_norm": 0.04410892724990845, + "learning_rate": 1.1756342601233437e-05, + "loss": 0.27874159812927246, + "step": 7478 + }, + { + "epoch": 1.3819334572877089, + "grad_norm": 0.08524870127439499, + "learning_rate": 1.1754378660959536e-05, + "loss": 0.6035138964653015, + "step": 7479 + }, + { + "epoch": 1.3821182339966047, + "grad_norm": 0.06554487347602844, + "learning_rate": 1.1752414650866855e-05, + "loss": 0.45440593361854553, + "step": 7480 + }, + { + "epoch": 1.3823030107055005, + "grad_norm": 0.06884905695915222, + "learning_rate": 1.1750450571033553e-05, + "loss": 0.4647334814071655, + "step": 7481 + }, + { + "epoch": 1.3824877874143964, + "grad_norm": 0.07676929980516434, + "learning_rate": 1.1748486421537794e-05, + "loss": 0.5768230557441711, + "step": 7482 + }, + { + "epoch": 1.3826725641232922, + "grad_norm": 0.08470456302165985, + "learning_rate": 1.1746522202457746e-05, + "loss": 0.5833501815795898, + "step": 7483 + }, + { + "epoch": 1.382857340832188, + "grad_norm": 0.09254126250743866, + "learning_rate": 1.1744557913871579e-05, + "loss": 0.4988571107387543, + "step": 7484 + }, + { + "epoch": 1.383042117541084, + "grad_norm": 0.08187223970890045, + "learning_rate": 1.1742593555857465e-05, + "loss": 0.6211193203926086, + "step": 7485 + }, + { + "epoch": 1.38322689424998, + "grad_norm": 0.07119544595479965, + "learning_rate": 1.1740629128493577e-05, + "loss": 0.41468870639801025, + "step": 7486 + }, + { + "epoch": 1.3834116709588757, + "grad_norm": 0.0553731806576252, + "learning_rate": 1.17386646318581e-05, + "loss": 0.32613423466682434, + "step": 7487 + }, + { + "epoch": 1.3835964476677716, + "grad_norm": 0.07978975772857666, + "learning_rate": 1.1736700066029206e-05, + "loss": 0.496436208486557, + "step": 7488 + }, + { + "epoch": 1.3837812243766674, + "grad_norm": 0.07208065688610077, + "learning_rate": 1.1734735431085084e-05, + "loss": 0.5078883767127991, + "step": 7489 + }, + { + "epoch": 1.3839660010855632, + "grad_norm": 0.06337263435125351, + "learning_rate": 1.1732770727103919e-05, + "loss": 0.3940962255001068, + "step": 7490 + }, + { + "epoch": 1.384150777794459, + "grad_norm": 0.07468237727880478, + "learning_rate": 1.1730805954163902e-05, + "loss": 0.5392362475395203, + "step": 7491 + }, + { + "epoch": 1.3843355545033549, + "grad_norm": 0.0637771487236023, + "learning_rate": 1.172884111234322e-05, + "loss": 0.3994047939777374, + "step": 7492 + }, + { + "epoch": 1.3845203312122507, + "grad_norm": 0.10853901505470276, + "learning_rate": 1.1726876201720074e-05, + "loss": 0.8128345012664795, + "step": 7493 + }, + { + "epoch": 1.3847051079211465, + "grad_norm": 0.09867019951343536, + "learning_rate": 1.1724911222372658e-05, + "loss": 0.7452707290649414, + "step": 7494 + }, + { + "epoch": 1.3848898846300424, + "grad_norm": 0.08588841557502747, + "learning_rate": 1.1722946174379168e-05, + "loss": 0.6157535910606384, + "step": 7495 + }, + { + "epoch": 1.3850746613389382, + "grad_norm": 0.08550074696540833, + "learning_rate": 1.1720981057817813e-05, + "loss": 0.7322298288345337, + "step": 7496 + }, + { + "epoch": 1.385259438047834, + "grad_norm": 0.07837128639221191, + "learning_rate": 1.1719015872766798e-05, + "loss": 0.6208504438400269, + "step": 7497 + }, + { + "epoch": 1.3854442147567299, + "grad_norm": 0.08587179332971573, + "learning_rate": 1.1717050619304324e-05, + "loss": 0.5662384629249573, + "step": 7498 + }, + { + "epoch": 1.3856289914656257, + "grad_norm": 0.06542603671550751, + "learning_rate": 1.1715085297508613e-05, + "loss": 0.40593597292900085, + "step": 7499 + }, + { + "epoch": 1.3858137681745215, + "grad_norm": 0.06688559800386429, + "learning_rate": 1.1713119907457869e-05, + "loss": 0.4476969540119171, + "step": 7500 + }, + { + "epoch": 1.3858137681745215, + "eval_loss": 0.5861050486564636, + "eval_runtime": 256.6455, + "eval_samples_per_second": 71.028, + "eval_steps_per_second": 8.88, + "step": 7500 + }, + { + "epoch": 1.3859985448834173, + "grad_norm": 0.07808694988489151, + "learning_rate": 1.1711154449230315e-05, + "loss": 0.5939205884933472, + "step": 7501 + }, + { + "epoch": 1.3861833215923132, + "grad_norm": 0.09479350596666336, + "learning_rate": 1.1709188922904167e-05, + "loss": 0.6449679732322693, + "step": 7502 + }, + { + "epoch": 1.386368098301209, + "grad_norm": 0.06300199031829834, + "learning_rate": 1.1707223328557644e-05, + "loss": 0.46869057416915894, + "step": 7503 + }, + { + "epoch": 1.386552875010105, + "grad_norm": 0.05616572126746178, + "learning_rate": 1.170525766626898e-05, + "loss": 0.365093857049942, + "step": 7504 + }, + { + "epoch": 1.3867376517190009, + "grad_norm": 0.0733765959739685, + "learning_rate": 1.170329193611639e-05, + "loss": 0.45358970761299133, + "step": 7505 + }, + { + "epoch": 1.3869224284278967, + "grad_norm": 0.06689228862524033, + "learning_rate": 1.1701326138178113e-05, + "loss": 0.5470128059387207, + "step": 7506 + }, + { + "epoch": 1.3871072051367925, + "grad_norm": 0.0790744200348854, + "learning_rate": 1.1699360272532376e-05, + "loss": 0.6203442811965942, + "step": 7507 + }, + { + "epoch": 1.3872919818456884, + "grad_norm": 0.08600825816392899, + "learning_rate": 1.1697394339257417e-05, + "loss": 0.5041205883026123, + "step": 7508 + }, + { + "epoch": 1.3874767585545842, + "grad_norm": 0.07286131381988525, + "learning_rate": 1.1695428338431479e-05, + "loss": 0.5549904704093933, + "step": 7509 + }, + { + "epoch": 1.38766153526348, + "grad_norm": 0.07373219728469849, + "learning_rate": 1.1693462270132792e-05, + "loss": 0.42410120368003845, + "step": 7510 + }, + { + "epoch": 1.3878463119723758, + "grad_norm": 0.06848616898059845, + "learning_rate": 1.1691496134439606e-05, + "loss": 0.4917903244495392, + "step": 7511 + }, + { + "epoch": 1.3880310886812717, + "grad_norm": 0.06176409870386124, + "learning_rate": 1.1689529931430166e-05, + "loss": 0.4779815673828125, + "step": 7512 + }, + { + "epoch": 1.3882158653901675, + "grad_norm": 0.07737766951322556, + "learning_rate": 1.1687563661182724e-05, + "loss": 0.4695681929588318, + "step": 7513 + }, + { + "epoch": 1.3884006420990633, + "grad_norm": 0.08544651418924332, + "learning_rate": 1.1685597323775522e-05, + "loss": 0.8225537538528442, + "step": 7514 + }, + { + "epoch": 1.3885854188079594, + "grad_norm": 0.07991264760494232, + "learning_rate": 1.1683630919286824e-05, + "loss": 0.5340745449066162, + "step": 7515 + }, + { + "epoch": 1.3887701955168552, + "grad_norm": 0.07726863771677017, + "learning_rate": 1.1681664447794883e-05, + "loss": 0.456493079662323, + "step": 7516 + }, + { + "epoch": 1.388954972225751, + "grad_norm": 0.08772004395723343, + "learning_rate": 1.1679697909377955e-05, + "loss": 0.5773293375968933, + "step": 7517 + }, + { + "epoch": 1.3891397489346469, + "grad_norm": 0.06700170040130615, + "learning_rate": 1.1677731304114306e-05, + "loss": 0.4972071945667267, + "step": 7518 + }, + { + "epoch": 1.3893245256435427, + "grad_norm": 0.05684570223093033, + "learning_rate": 1.1675764632082203e-05, + "loss": 0.4496079087257385, + "step": 7519 + }, + { + "epoch": 1.3895093023524385, + "grad_norm": 0.0730443224310875, + "learning_rate": 1.1673797893359908e-05, + "loss": 0.5882956385612488, + "step": 7520 + }, + { + "epoch": 1.3896940790613344, + "grad_norm": 0.07208561897277832, + "learning_rate": 1.1671831088025695e-05, + "loss": 0.4346911311149597, + "step": 7521 + }, + { + "epoch": 1.3898788557702302, + "grad_norm": 0.059102918952703476, + "learning_rate": 1.1669864216157834e-05, + "loss": 0.4118361175060272, + "step": 7522 + }, + { + "epoch": 1.390063632479126, + "grad_norm": 0.08321856707334518, + "learning_rate": 1.1667897277834603e-05, + "loss": 0.5240601897239685, + "step": 7523 + }, + { + "epoch": 1.3902484091880218, + "grad_norm": 0.07270022481679916, + "learning_rate": 1.1665930273134276e-05, + "loss": 0.5131801962852478, + "step": 7524 + }, + { + "epoch": 1.3904331858969177, + "grad_norm": 0.06680957973003387, + "learning_rate": 1.1663963202135137e-05, + "loss": 0.44497692584991455, + "step": 7525 + }, + { + "epoch": 1.3906179626058135, + "grad_norm": 0.06281045824289322, + "learning_rate": 1.166199606491547e-05, + "loss": 0.44331836700439453, + "step": 7526 + }, + { + "epoch": 1.3908027393147093, + "grad_norm": 0.07803647220134735, + "learning_rate": 1.1660028861553559e-05, + "loss": 0.5607433319091797, + "step": 7527 + }, + { + "epoch": 1.3909875160236052, + "grad_norm": 0.07634811103343964, + "learning_rate": 1.165806159212769e-05, + "loss": 0.5576267242431641, + "step": 7528 + }, + { + "epoch": 1.391172292732501, + "grad_norm": 0.09128347784280777, + "learning_rate": 1.1656094256716161e-05, + "loss": 0.7086127400398254, + "step": 7529 + }, + { + "epoch": 1.3913570694413968, + "grad_norm": 0.07963167876005173, + "learning_rate": 1.165412685539726e-05, + "loss": 0.615551233291626, + "step": 7530 + }, + { + "epoch": 1.3915418461502926, + "grad_norm": 0.07789136469364166, + "learning_rate": 1.1652159388249287e-05, + "loss": 0.5044615864753723, + "step": 7531 + }, + { + "epoch": 1.3917266228591885, + "grad_norm": 0.06654134392738342, + "learning_rate": 1.1650191855350537e-05, + "loss": 0.4450850486755371, + "step": 7532 + }, + { + "epoch": 1.3919113995680845, + "grad_norm": 0.08933846652507782, + "learning_rate": 1.1648224256779314e-05, + "loss": 0.5686116218566895, + "step": 7533 + }, + { + "epoch": 1.3920961762769803, + "grad_norm": 0.0678136795759201, + "learning_rate": 1.1646256592613923e-05, + "loss": 0.414152592420578, + "step": 7534 + }, + { + "epoch": 1.3922809529858762, + "grad_norm": 0.07052744925022125, + "learning_rate": 1.1644288862932669e-05, + "loss": 0.45875686407089233, + "step": 7535 + }, + { + "epoch": 1.392465729694772, + "grad_norm": 0.07333561033010483, + "learning_rate": 1.1642321067813864e-05, + "loss": 0.5426931977272034, + "step": 7536 + }, + { + "epoch": 1.3926505064036678, + "grad_norm": 0.08019225299358368, + "learning_rate": 1.1640353207335818e-05, + "loss": 0.5834104418754578, + "step": 7537 + }, + { + "epoch": 1.3928352831125637, + "grad_norm": 0.05879241228103638, + "learning_rate": 1.1638385281576844e-05, + "loss": 0.33529654145240784, + "step": 7538 + }, + { + "epoch": 1.3930200598214595, + "grad_norm": 0.058719415217638016, + "learning_rate": 1.1636417290615267e-05, + "loss": 0.3612607717514038, + "step": 7539 + }, + { + "epoch": 1.3932048365303553, + "grad_norm": 0.05941032990813255, + "learning_rate": 1.1634449234529399e-05, + "loss": 0.3875240385532379, + "step": 7540 + }, + { + "epoch": 1.3933896132392511, + "grad_norm": 0.07174340635538101, + "learning_rate": 1.1632481113397565e-05, + "loss": 0.4807130992412567, + "step": 7541 + }, + { + "epoch": 1.393574389948147, + "grad_norm": 0.07699482887983322, + "learning_rate": 1.1630512927298087e-05, + "loss": 0.49087509512901306, + "step": 7542 + }, + { + "epoch": 1.3937591666570428, + "grad_norm": 0.07787071168422699, + "learning_rate": 1.1628544676309302e-05, + "loss": 0.499368280172348, + "step": 7543 + }, + { + "epoch": 1.3939439433659389, + "grad_norm": 0.06933631747961044, + "learning_rate": 1.1626576360509529e-05, + "loss": 0.5152738094329834, + "step": 7544 + }, + { + "epoch": 1.3941287200748347, + "grad_norm": 0.07767070084810257, + "learning_rate": 1.1624607979977106e-05, + "loss": 0.5594071745872498, + "step": 7545 + }, + { + "epoch": 1.3943134967837305, + "grad_norm": 0.06435758620500565, + "learning_rate": 1.162263953479037e-05, + "loss": 0.39479848742485046, + "step": 7546 + }, + { + "epoch": 1.3944982734926263, + "grad_norm": 0.07449889183044434, + "learning_rate": 1.162067102502766e-05, + "loss": 0.3971273601055145, + "step": 7547 + }, + { + "epoch": 1.3946830502015222, + "grad_norm": 0.060775335878133774, + "learning_rate": 1.1618702450767306e-05, + "loss": 0.3650251030921936, + "step": 7548 + }, + { + "epoch": 1.394867826910418, + "grad_norm": 0.0665210410952568, + "learning_rate": 1.1616733812087663e-05, + "loss": 0.37720543146133423, + "step": 7549 + }, + { + "epoch": 1.3950526036193138, + "grad_norm": 0.091295525431633, + "learning_rate": 1.1614765109067075e-05, + "loss": 0.7030790448188782, + "step": 7550 + }, + { + "epoch": 1.3952373803282097, + "grad_norm": 0.07124901562929153, + "learning_rate": 1.1612796341783883e-05, + "loss": 0.46749329566955566, + "step": 7551 + }, + { + "epoch": 1.3954221570371055, + "grad_norm": 0.07841958850622177, + "learning_rate": 1.1610827510316442e-05, + "loss": 0.5060739517211914, + "step": 7552 + }, + { + "epoch": 1.3956069337460013, + "grad_norm": 0.07065027207136154, + "learning_rate": 1.160885861474311e-05, + "loss": 0.4663480818271637, + "step": 7553 + }, + { + "epoch": 1.3957917104548971, + "grad_norm": 0.05885373055934906, + "learning_rate": 1.1606889655142236e-05, + "loss": 0.468976765871048, + "step": 7554 + }, + { + "epoch": 1.395976487163793, + "grad_norm": 0.08445914834737778, + "learning_rate": 1.160492063159218e-05, + "loss": 0.6371353268623352, + "step": 7555 + }, + { + "epoch": 1.3961612638726888, + "grad_norm": 0.08427069336175919, + "learning_rate": 1.1602951544171307e-05, + "loss": 0.6276706457138062, + "step": 7556 + }, + { + "epoch": 1.3963460405815846, + "grad_norm": 0.07150643318891525, + "learning_rate": 1.1600982392957978e-05, + "loss": 0.4658666253089905, + "step": 7557 + }, + { + "epoch": 1.3965308172904805, + "grad_norm": 0.08658826351165771, + "learning_rate": 1.1599013178030553e-05, + "loss": 0.6273196339607239, + "step": 7558 + }, + { + "epoch": 1.3967155939993763, + "grad_norm": 0.07841559499502182, + "learning_rate": 1.1597043899467412e-05, + "loss": 0.5224420428276062, + "step": 7559 + }, + { + "epoch": 1.396900370708272, + "grad_norm": 0.0666312649846077, + "learning_rate": 1.159507455734692e-05, + "loss": 0.40652960538864136, + "step": 7560 + }, + { + "epoch": 1.397085147417168, + "grad_norm": 0.0653591901063919, + "learning_rate": 1.1593105151747448e-05, + "loss": 0.3719636797904968, + "step": 7561 + }, + { + "epoch": 1.397269924126064, + "grad_norm": 0.07236526161432266, + "learning_rate": 1.1591135682747374e-05, + "loss": 0.5152609944343567, + "step": 7562 + }, + { + "epoch": 1.3974547008349598, + "grad_norm": 0.07473872601985931, + "learning_rate": 1.1589166150425082e-05, + "loss": 0.5459132790565491, + "step": 7563 + }, + { + "epoch": 1.3976394775438556, + "grad_norm": 0.0817592591047287, + "learning_rate": 1.1587196554858946e-05, + "loss": 0.5027362704277039, + "step": 7564 + }, + { + "epoch": 1.3978242542527515, + "grad_norm": 0.08243642747402191, + "learning_rate": 1.1585226896127353e-05, + "loss": 0.5912965536117554, + "step": 7565 + }, + { + "epoch": 1.3980090309616473, + "grad_norm": 0.09193118661642075, + "learning_rate": 1.1583257174308693e-05, + "loss": 0.6074056029319763, + "step": 7566 + }, + { + "epoch": 1.3981938076705431, + "grad_norm": 0.07539702951908112, + "learning_rate": 1.1581287389481348e-05, + "loss": 0.5826414823532104, + "step": 7567 + }, + { + "epoch": 1.398378584379439, + "grad_norm": 0.07248812168836594, + "learning_rate": 1.1579317541723709e-05, + "loss": 0.4714256227016449, + "step": 7568 + }, + { + "epoch": 1.3985633610883348, + "grad_norm": 0.059154167771339417, + "learning_rate": 1.1577347631114178e-05, + "loss": 0.45838063955307007, + "step": 7569 + }, + { + "epoch": 1.3987481377972306, + "grad_norm": 0.08536428958177567, + "learning_rate": 1.1575377657731144e-05, + "loss": 0.6104124784469604, + "step": 7570 + }, + { + "epoch": 1.3989329145061264, + "grad_norm": 0.08255096524953842, + "learning_rate": 1.1573407621653007e-05, + "loss": 0.48993632197380066, + "step": 7571 + }, + { + "epoch": 1.3991176912150223, + "grad_norm": 0.08401396870613098, + "learning_rate": 1.157143752295817e-05, + "loss": 0.5046585202217102, + "step": 7572 + }, + { + "epoch": 1.3993024679239183, + "grad_norm": 0.1049598827958107, + "learning_rate": 1.1569467361725037e-05, + "loss": 0.7219524383544922, + "step": 7573 + }, + { + "epoch": 1.3994872446328142, + "grad_norm": 0.08518721163272858, + "learning_rate": 1.1567497138032014e-05, + "loss": 0.693490207195282, + "step": 7574 + }, + { + "epoch": 1.39967202134171, + "grad_norm": 0.07102842628955841, + "learning_rate": 1.1565526851957504e-05, + "loss": 0.4655759930610657, + "step": 7575 + }, + { + "epoch": 1.3998567980506058, + "grad_norm": 0.09537466615438461, + "learning_rate": 1.1563556503579929e-05, + "loss": 0.7377628087997437, + "step": 7576 + }, + { + "epoch": 1.4000415747595016, + "grad_norm": 0.06323997676372528, + "learning_rate": 1.1561586092977697e-05, + "loss": 0.43483224511146545, + "step": 7577 + }, + { + "epoch": 1.4002263514683975, + "grad_norm": 0.06556528061628342, + "learning_rate": 1.1559615620229216e-05, + "loss": 0.4366847276687622, + "step": 7578 + }, + { + "epoch": 1.4004111281772933, + "grad_norm": 0.07493555545806885, + "learning_rate": 1.1557645085412921e-05, + "loss": 0.436001181602478, + "step": 7579 + }, + { + "epoch": 1.4005959048861891, + "grad_norm": 0.07397256046533585, + "learning_rate": 1.1555674488607224e-05, + "loss": 0.5384519696235657, + "step": 7580 + }, + { + "epoch": 1.400780681595085, + "grad_norm": 0.09817645698785782, + "learning_rate": 1.1553703829890546e-05, + "loss": 0.7104623317718506, + "step": 7581 + }, + { + "epoch": 1.4009654583039808, + "grad_norm": 0.07672224938869476, + "learning_rate": 1.1551733109341318e-05, + "loss": 0.4858318269252777, + "step": 7582 + }, + { + "epoch": 1.4011502350128766, + "grad_norm": 0.08330681920051575, + "learning_rate": 1.1549762327037968e-05, + "loss": 0.6663315296173096, + "step": 7583 + }, + { + "epoch": 1.4013350117217724, + "grad_norm": 0.08952096104621887, + "learning_rate": 1.1547791483058926e-05, + "loss": 0.6832013130187988, + "step": 7584 + }, + { + "epoch": 1.4015197884306683, + "grad_norm": 0.06818807870149612, + "learning_rate": 1.1545820577482623e-05, + "loss": 0.47754937410354614, + "step": 7585 + }, + { + "epoch": 1.401704565139564, + "grad_norm": 0.06141260638833046, + "learning_rate": 1.1543849610387499e-05, + "loss": 0.3404228985309601, + "step": 7586 + }, + { + "epoch": 1.40188934184846, + "grad_norm": 0.08560726791620255, + "learning_rate": 1.1541878581851994e-05, + "loss": 0.5630397796630859, + "step": 7587 + }, + { + "epoch": 1.4020741185573558, + "grad_norm": 0.07210613787174225, + "learning_rate": 1.1539907491954539e-05, + "loss": 0.5454407334327698, + "step": 7588 + }, + { + "epoch": 1.4022588952662516, + "grad_norm": 0.06783408671617508, + "learning_rate": 1.1537936340773586e-05, + "loss": 0.46421727538108826, + "step": 7589 + }, + { + "epoch": 1.4024436719751474, + "grad_norm": 0.07097148150205612, + "learning_rate": 1.1535965128387578e-05, + "loss": 0.5893171429634094, + "step": 7590 + }, + { + "epoch": 1.4026284486840432, + "grad_norm": 0.07714993506669998, + "learning_rate": 1.1533993854874964e-05, + "loss": 0.5079788565635681, + "step": 7591 + }, + { + "epoch": 1.4028132253929393, + "grad_norm": 0.1183595284819603, + "learning_rate": 1.1532022520314192e-05, + "loss": 0.6799882054328918, + "step": 7592 + }, + { + "epoch": 1.4029980021018351, + "grad_norm": 0.07514689117670059, + "learning_rate": 1.153005112478372e-05, + "loss": 0.48914864659309387, + "step": 7593 + }, + { + "epoch": 1.403182778810731, + "grad_norm": 0.07823141664266586, + "learning_rate": 1.1528079668361997e-05, + "loss": 0.5553666353225708, + "step": 7594 + }, + { + "epoch": 1.4033675555196268, + "grad_norm": 0.0717146173119545, + "learning_rate": 1.1526108151127488e-05, + "loss": 0.5046097636222839, + "step": 7595 + }, + { + "epoch": 1.4035523322285226, + "grad_norm": 0.07187351584434509, + "learning_rate": 1.1524136573158646e-05, + "loss": 0.4185374081134796, + "step": 7596 + }, + { + "epoch": 1.4037371089374184, + "grad_norm": 0.07105645537376404, + "learning_rate": 1.1522164934533939e-05, + "loss": 0.4948467016220093, + "step": 7597 + }, + { + "epoch": 1.4039218856463143, + "grad_norm": 0.06178764998912811, + "learning_rate": 1.1520193235331827e-05, + "loss": 0.4289746582508087, + "step": 7598 + }, + { + "epoch": 1.40410666235521, + "grad_norm": 0.0821404680609703, + "learning_rate": 1.151822147563078e-05, + "loss": 0.4482502341270447, + "step": 7599 + }, + { + "epoch": 1.404291439064106, + "grad_norm": 0.05330733582377434, + "learning_rate": 1.1516249655509271e-05, + "loss": 0.30158668756484985, + "step": 7600 + }, + { + "epoch": 1.4044762157730017, + "grad_norm": 0.0885644257068634, + "learning_rate": 1.1514277775045768e-05, + "loss": 0.456144779920578, + "step": 7601 + }, + { + "epoch": 1.4046609924818976, + "grad_norm": 0.08371008932590485, + "learning_rate": 1.1512305834318746e-05, + "loss": 0.5358234643936157, + "step": 7602 + }, + { + "epoch": 1.4048457691907936, + "grad_norm": 0.06220470368862152, + "learning_rate": 1.1510333833406687e-05, + "loss": 0.37693867087364197, + "step": 7603 + }, + { + "epoch": 1.4050305458996895, + "grad_norm": 0.08973923325538635, + "learning_rate": 1.1508361772388064e-05, + "loss": 0.626574695110321, + "step": 7604 + }, + { + "epoch": 1.4052153226085853, + "grad_norm": 0.07953702658414841, + "learning_rate": 1.150638965134136e-05, + "loss": 0.5673231482505798, + "step": 7605 + }, + { + "epoch": 1.405400099317481, + "grad_norm": 0.06657926738262177, + "learning_rate": 1.1504417470345064e-05, + "loss": 0.48152756690979004, + "step": 7606 + }, + { + "epoch": 1.405584876026377, + "grad_norm": 0.09543479979038239, + "learning_rate": 1.150244522947766e-05, + "loss": 0.5430436134338379, + "step": 7607 + }, + { + "epoch": 1.4057696527352728, + "grad_norm": 0.08441811054944992, + "learning_rate": 1.1500472928817632e-05, + "loss": 0.5210660696029663, + "step": 7608 + }, + { + "epoch": 1.4059544294441686, + "grad_norm": 0.05825930833816528, + "learning_rate": 1.1498500568443477e-05, + "loss": 0.3573313057422638, + "step": 7609 + }, + { + "epoch": 1.4061392061530644, + "grad_norm": 0.07727733254432678, + "learning_rate": 1.149652814843369e-05, + "loss": 0.585524320602417, + "step": 7610 + }, + { + "epoch": 1.4063239828619603, + "grad_norm": 0.07217800617218018, + "learning_rate": 1.1494555668866762e-05, + "loss": 0.4551432728767395, + "step": 7611 + }, + { + "epoch": 1.406508759570856, + "grad_norm": 0.06366957724094391, + "learning_rate": 1.1492583129821198e-05, + "loss": 0.3803600072860718, + "step": 7612 + }, + { + "epoch": 1.406693536279752, + "grad_norm": 0.07053103297948837, + "learning_rate": 1.1490610531375493e-05, + "loss": 0.406423419713974, + "step": 7613 + }, + { + "epoch": 1.4068783129886477, + "grad_norm": 0.08406764268875122, + "learning_rate": 1.148863787360815e-05, + "loss": 0.5798128247261047, + "step": 7614 + }, + { + "epoch": 1.4070630896975436, + "grad_norm": 0.07802645862102509, + "learning_rate": 1.148666515659768e-05, + "loss": 0.49858537316322327, + "step": 7615 + }, + { + "epoch": 1.4072478664064394, + "grad_norm": 0.07138711959123611, + "learning_rate": 1.1484692380422587e-05, + "loss": 0.4957258999347687, + "step": 7616 + }, + { + "epoch": 1.4074326431153352, + "grad_norm": 0.06466956436634064, + "learning_rate": 1.1482719545161382e-05, + "loss": 0.4023810923099518, + "step": 7617 + }, + { + "epoch": 1.407617419824231, + "grad_norm": 0.08849228173494339, + "learning_rate": 1.1480746650892578e-05, + "loss": 0.5417582392692566, + "step": 7618 + }, + { + "epoch": 1.4078021965331269, + "grad_norm": 0.1008807122707367, + "learning_rate": 1.1478773697694691e-05, + "loss": 0.7184629440307617, + "step": 7619 + }, + { + "epoch": 1.4079869732420227, + "grad_norm": 0.09179085493087769, + "learning_rate": 1.1476800685646237e-05, + "loss": 0.678741455078125, + "step": 7620 + }, + { + "epoch": 1.4081717499509188, + "grad_norm": 0.08802378922700882, + "learning_rate": 1.1474827614825734e-05, + "loss": 0.5173779129981995, + "step": 7621 + }, + { + "epoch": 1.4083565266598146, + "grad_norm": 0.10323859006166458, + "learning_rate": 1.147285448531171e-05, + "loss": 0.5072680711746216, + "step": 7622 + }, + { + "epoch": 1.4085413033687104, + "grad_norm": 0.07147370278835297, + "learning_rate": 1.147088129718268e-05, + "loss": 0.4114370346069336, + "step": 7623 + }, + { + "epoch": 1.4087260800776062, + "grad_norm": 0.08556059002876282, + "learning_rate": 1.146890805051718e-05, + "loss": 0.6060706973075867, + "step": 7624 + }, + { + "epoch": 1.408910856786502, + "grad_norm": 0.06883016228675842, + "learning_rate": 1.1466934745393737e-05, + "loss": 0.41225066781044006, + "step": 7625 + }, + { + "epoch": 1.409095633495398, + "grad_norm": 0.09106704592704773, + "learning_rate": 1.1464961381890875e-05, + "loss": 0.6388334631919861, + "step": 7626 + }, + { + "epoch": 1.4092804102042937, + "grad_norm": 0.0875760018825531, + "learning_rate": 1.1462987960087139e-05, + "loss": 0.7768730521202087, + "step": 7627 + }, + { + "epoch": 1.4094651869131896, + "grad_norm": 0.06366395950317383, + "learning_rate": 1.1461014480061057e-05, + "loss": 0.4491613507270813, + "step": 7628 + }, + { + "epoch": 1.4096499636220854, + "grad_norm": 0.08622222393751144, + "learning_rate": 1.1459040941891169e-05, + "loss": 0.6296977400779724, + "step": 7629 + }, + { + "epoch": 1.4098347403309812, + "grad_norm": 0.06523562967777252, + "learning_rate": 1.1457067345656016e-05, + "loss": 0.4500395953655243, + "step": 7630 + }, + { + "epoch": 1.410019517039877, + "grad_norm": 0.08612232655286789, + "learning_rate": 1.145509369143414e-05, + "loss": 0.5739388465881348, + "step": 7631 + }, + { + "epoch": 1.410204293748773, + "grad_norm": 0.08802293241024017, + "learning_rate": 1.145311997930409e-05, + "loss": 0.6413158178329468, + "step": 7632 + }, + { + "epoch": 1.410389070457669, + "grad_norm": 0.06629212200641632, + "learning_rate": 1.145114620934441e-05, + "loss": 0.5219778418540955, + "step": 7633 + }, + { + "epoch": 1.4105738471665648, + "grad_norm": 0.07021202892065048, + "learning_rate": 1.1449172381633651e-05, + "loss": 0.5135833621025085, + "step": 7634 + }, + { + "epoch": 1.4107586238754606, + "grad_norm": 0.05778151750564575, + "learning_rate": 1.1447198496250367e-05, + "loss": 0.4304315149784088, + "step": 7635 + }, + { + "epoch": 1.4109434005843564, + "grad_norm": 0.06428337097167969, + "learning_rate": 1.1445224553273111e-05, + "loss": 0.454351544380188, + "step": 7636 + }, + { + "epoch": 1.4111281772932522, + "grad_norm": 0.07603947818279266, + "learning_rate": 1.1443250552780435e-05, + "loss": 0.4918436110019684, + "step": 7637 + }, + { + "epoch": 1.411312954002148, + "grad_norm": 0.07855551689863205, + "learning_rate": 1.1441276494850904e-05, + "loss": 0.5450300574302673, + "step": 7638 + }, + { + "epoch": 1.411497730711044, + "grad_norm": 0.06913916021585464, + "learning_rate": 1.143930237956308e-05, + "loss": 0.48682063817977905, + "step": 7639 + }, + { + "epoch": 1.4116825074199397, + "grad_norm": 0.0858033075928688, + "learning_rate": 1.1437328206995521e-05, + "loss": 0.457550972700119, + "step": 7640 + }, + { + "epoch": 1.4118672841288356, + "grad_norm": 0.05459976568818092, + "learning_rate": 1.1435353977226797e-05, + "loss": 0.3086320757865906, + "step": 7641 + }, + { + "epoch": 1.4120520608377314, + "grad_norm": 0.05813084542751312, + "learning_rate": 1.1433379690335478e-05, + "loss": 0.3826307952404022, + "step": 7642 + }, + { + "epoch": 1.4122368375466272, + "grad_norm": 0.08360633254051208, + "learning_rate": 1.1431405346400128e-05, + "loss": 0.64500892162323, + "step": 7643 + }, + { + "epoch": 1.412421614255523, + "grad_norm": 0.07159120589494705, + "learning_rate": 1.1429430945499324e-05, + "loss": 0.39974844455718994, + "step": 7644 + }, + { + "epoch": 1.4126063909644189, + "grad_norm": 0.07743488997220993, + "learning_rate": 1.1427456487711644e-05, + "loss": 0.6075916290283203, + "step": 7645 + }, + { + "epoch": 1.4127911676733147, + "grad_norm": 0.07663056254386902, + "learning_rate": 1.1425481973115659e-05, + "loss": 0.5331258177757263, + "step": 7646 + }, + { + "epoch": 1.4129759443822105, + "grad_norm": 0.07387567311525345, + "learning_rate": 1.142350740178995e-05, + "loss": 0.4058663845062256, + "step": 7647 + }, + { + "epoch": 1.4131607210911064, + "grad_norm": 0.08707687258720398, + "learning_rate": 1.1421532773813105e-05, + "loss": 0.6303785443305969, + "step": 7648 + }, + { + "epoch": 1.4133454978000022, + "grad_norm": 0.05880158394575119, + "learning_rate": 1.14195580892637e-05, + "loss": 0.4565977454185486, + "step": 7649 + }, + { + "epoch": 1.4135302745088982, + "grad_norm": 0.0803513377904892, + "learning_rate": 1.1417583348220322e-05, + "loss": 0.6228235960006714, + "step": 7650 + }, + { + "epoch": 1.413715051217794, + "grad_norm": 0.07094324380159378, + "learning_rate": 1.1415608550761563e-05, + "loss": 0.4482516944408417, + "step": 7651 + }, + { + "epoch": 1.41389982792669, + "grad_norm": 0.06476152688264847, + "learning_rate": 1.1413633696966016e-05, + "loss": 0.4385446012020111, + "step": 7652 + }, + { + "epoch": 1.4140846046355857, + "grad_norm": 0.07475713640451431, + "learning_rate": 1.141165878691227e-05, + "loss": 0.5316185355186462, + "step": 7653 + }, + { + "epoch": 1.4142693813444815, + "grad_norm": 0.0754825696349144, + "learning_rate": 1.1409683820678913e-05, + "loss": 0.5440897345542908, + "step": 7654 + }, + { + "epoch": 1.4144541580533774, + "grad_norm": 0.07446648925542831, + "learning_rate": 1.140770879834456e-05, + "loss": 0.47146567702293396, + "step": 7655 + }, + { + "epoch": 1.4146389347622732, + "grad_norm": 0.06318148970603943, + "learning_rate": 1.1405733719987797e-05, + "loss": 0.41523852944374084, + "step": 7656 + }, + { + "epoch": 1.414823711471169, + "grad_norm": 0.07789860665798187, + "learning_rate": 1.1403758585687226e-05, + "loss": 0.45916494727134705, + "step": 7657 + }, + { + "epoch": 1.4150084881800649, + "grad_norm": 0.07964061200618744, + "learning_rate": 1.1401783395521462e-05, + "loss": 0.5693597793579102, + "step": 7658 + }, + { + "epoch": 1.4151932648889607, + "grad_norm": 0.0904047042131424, + "learning_rate": 1.1399808149569102e-05, + "loss": 0.6129499077796936, + "step": 7659 + }, + { + "epoch": 1.4153780415978565, + "grad_norm": 0.06054529547691345, + "learning_rate": 1.1397832847908756e-05, + "loss": 0.35986170172691345, + "step": 7660 + }, + { + "epoch": 1.4155628183067526, + "grad_norm": 0.06948984414339066, + "learning_rate": 1.1395857490619035e-05, + "loss": 0.5107991099357605, + "step": 7661 + }, + { + "epoch": 1.4157475950156484, + "grad_norm": 0.07513797283172607, + "learning_rate": 1.1393882077778555e-05, + "loss": 0.5044052004814148, + "step": 7662 + }, + { + "epoch": 1.4159323717245442, + "grad_norm": 0.07386617362499237, + "learning_rate": 1.139190660946593e-05, + "loss": 0.5760729908943176, + "step": 7663 + }, + { + "epoch": 1.41611714843344, + "grad_norm": 0.08695702254772186, + "learning_rate": 1.1389931085759774e-05, + "loss": 0.6008087992668152, + "step": 7664 + }, + { + "epoch": 1.4163019251423359, + "grad_norm": 0.08835457265377045, + "learning_rate": 1.1387955506738711e-05, + "loss": 0.6213218569755554, + "step": 7665 + }, + { + "epoch": 1.4164867018512317, + "grad_norm": 0.06474512815475464, + "learning_rate": 1.1385979872481363e-05, + "loss": 0.3683367371559143, + "step": 7666 + }, + { + "epoch": 1.4166714785601275, + "grad_norm": 0.0808560773730278, + "learning_rate": 1.1384004183066347e-05, + "loss": 0.5479312539100647, + "step": 7667 + }, + { + "epoch": 1.4168562552690234, + "grad_norm": 0.061818208545446396, + "learning_rate": 1.1382028438572297e-05, + "loss": 0.39471688866615295, + "step": 7668 + }, + { + "epoch": 1.4170410319779192, + "grad_norm": 0.07842092216014862, + "learning_rate": 1.1380052639077841e-05, + "loss": 0.5562286972999573, + "step": 7669 + }, + { + "epoch": 1.417225808686815, + "grad_norm": 0.05551144480705261, + "learning_rate": 1.1378076784661606e-05, + "loss": 0.34070223569869995, + "step": 7670 + }, + { + "epoch": 1.4174105853957109, + "grad_norm": 0.07446765899658203, + "learning_rate": 1.1376100875402225e-05, + "loss": 0.5623782277107239, + "step": 7671 + }, + { + "epoch": 1.4175953621046067, + "grad_norm": 0.06815563142299652, + "learning_rate": 1.1374124911378338e-05, + "loss": 0.46452295780181885, + "step": 7672 + }, + { + "epoch": 1.4177801388135025, + "grad_norm": 0.06641937047243118, + "learning_rate": 1.1372148892668577e-05, + "loss": 0.475123792886734, + "step": 7673 + }, + { + "epoch": 1.4179649155223983, + "grad_norm": 0.09159291535615921, + "learning_rate": 1.1370172819351582e-05, + "loss": 0.7458155155181885, + "step": 7674 + }, + { + "epoch": 1.4181496922312942, + "grad_norm": 0.0605030432343483, + "learning_rate": 1.1368196691505995e-05, + "loss": 0.4790557622909546, + "step": 7675 + }, + { + "epoch": 1.41833446894019, + "grad_norm": 0.08951747417449951, + "learning_rate": 1.1366220509210464e-05, + "loss": 0.5695453882217407, + "step": 7676 + }, + { + "epoch": 1.4185192456490858, + "grad_norm": 0.06571762263774872, + "learning_rate": 1.1364244272543627e-05, + "loss": 0.44038400053977966, + "step": 7677 + }, + { + "epoch": 1.4187040223579817, + "grad_norm": 0.08783255517482758, + "learning_rate": 1.1362267981584137e-05, + "loss": 0.5953701734542847, + "step": 7678 + }, + { + "epoch": 1.4188887990668775, + "grad_norm": 0.07799126952886581, + "learning_rate": 1.1360291636410645e-05, + "loss": 0.4716167151927948, + "step": 7679 + }, + { + "epoch": 1.4190735757757735, + "grad_norm": 0.07493774592876434, + "learning_rate": 1.1358315237101798e-05, + "loss": 0.4993197023868561, + "step": 7680 + }, + { + "epoch": 1.4192583524846694, + "grad_norm": 0.06788554042577744, + "learning_rate": 1.1356338783736256e-05, + "loss": 0.4875277578830719, + "step": 7681 + }, + { + "epoch": 1.4194431291935652, + "grad_norm": 0.06015022099018097, + "learning_rate": 1.1354362276392677e-05, + "loss": 0.34643423557281494, + "step": 7682 + }, + { + "epoch": 1.419627905902461, + "grad_norm": 0.09699197113513947, + "learning_rate": 1.1352385715149711e-05, + "loss": 0.5874773859977722, + "step": 7683 + }, + { + "epoch": 1.4198126826113568, + "grad_norm": 0.08294727653265, + "learning_rate": 1.135040910008603e-05, + "loss": 0.6303514242172241, + "step": 7684 + }, + { + "epoch": 1.4199974593202527, + "grad_norm": 0.07292766124010086, + "learning_rate": 1.134843243128029e-05, + "loss": 0.5303201079368591, + "step": 7685 + }, + { + "epoch": 1.4201822360291485, + "grad_norm": 0.07239757478237152, + "learning_rate": 1.1346455708811157e-05, + "loss": 0.6103678941726685, + "step": 7686 + }, + { + "epoch": 1.4203670127380443, + "grad_norm": 0.08574247360229492, + "learning_rate": 1.13444789327573e-05, + "loss": 0.547768235206604, + "step": 7687 + }, + { + "epoch": 1.4205517894469402, + "grad_norm": 0.07449698448181152, + "learning_rate": 1.1342502103197386e-05, + "loss": 0.4052199721336365, + "step": 7688 + }, + { + "epoch": 1.420736566155836, + "grad_norm": 0.08636648952960968, + "learning_rate": 1.1340525220210092e-05, + "loss": 0.6392164826393127, + "step": 7689 + }, + { + "epoch": 1.4209213428647318, + "grad_norm": 0.07430004328489304, + "learning_rate": 1.1338548283874085e-05, + "loss": 0.5255374312400818, + "step": 7690 + }, + { + "epoch": 1.4211061195736279, + "grad_norm": 0.06150273606181145, + "learning_rate": 1.1336571294268045e-05, + "loss": 0.3642953634262085, + "step": 7691 + }, + { + "epoch": 1.4212908962825237, + "grad_norm": 0.06689255684614182, + "learning_rate": 1.133459425147065e-05, + "loss": 0.4309183359146118, + "step": 7692 + }, + { + "epoch": 1.4214756729914195, + "grad_norm": 0.06311143189668655, + "learning_rate": 1.1332617155560578e-05, + "loss": 0.3785903751850128, + "step": 7693 + }, + { + "epoch": 1.4216604497003154, + "grad_norm": 0.06366381794214249, + "learning_rate": 1.1330640006616514e-05, + "loss": 0.42182084918022156, + "step": 7694 + }, + { + "epoch": 1.4218452264092112, + "grad_norm": 0.073325976729393, + "learning_rate": 1.132866280471714e-05, + "loss": 0.4598470628261566, + "step": 7695 + }, + { + "epoch": 1.422030003118107, + "grad_norm": 0.08542947471141815, + "learning_rate": 1.1326685549941144e-05, + "loss": 0.503866970539093, + "step": 7696 + }, + { + "epoch": 1.4222147798270028, + "grad_norm": 0.08375982195138931, + "learning_rate": 1.1324708242367211e-05, + "loss": 0.5992326140403748, + "step": 7697 + }, + { + "epoch": 1.4223995565358987, + "grad_norm": 0.06164781376719475, + "learning_rate": 1.1322730882074036e-05, + "loss": 0.4079485833644867, + "step": 7698 + }, + { + "epoch": 1.4225843332447945, + "grad_norm": 0.05721287056803703, + "learning_rate": 1.132075346914031e-05, + "loss": 0.33973169326782227, + "step": 7699 + }, + { + "epoch": 1.4227691099536903, + "grad_norm": 0.06620140373706818, + "learning_rate": 1.1318776003644729e-05, + "loss": 0.4641711115837097, + "step": 7700 + }, + { + "epoch": 1.4229538866625862, + "grad_norm": 0.08204913884401321, + "learning_rate": 1.1316798485665989e-05, + "loss": 0.4957462251186371, + "step": 7701 + }, + { + "epoch": 1.423138663371482, + "grad_norm": 0.07838209718465805, + "learning_rate": 1.131482091528279e-05, + "loss": 0.6666757464408875, + "step": 7702 + }, + { + "epoch": 1.4233234400803778, + "grad_norm": 0.08237612992525101, + "learning_rate": 1.1312843292573827e-05, + "loss": 0.568879246711731, + "step": 7703 + }, + { + "epoch": 1.4235082167892736, + "grad_norm": 0.0746271014213562, + "learning_rate": 1.1310865617617814e-05, + "loss": 0.47434210777282715, + "step": 7704 + }, + { + "epoch": 1.4236929934981695, + "grad_norm": 0.07584428042173386, + "learning_rate": 1.1308887890493448e-05, + "loss": 0.45992404222488403, + "step": 7705 + }, + { + "epoch": 1.4238777702070653, + "grad_norm": 0.06510897725820541, + "learning_rate": 1.130691011127944e-05, + "loss": 0.3984183967113495, + "step": 7706 + }, + { + "epoch": 1.4240625469159611, + "grad_norm": 0.08189408481121063, + "learning_rate": 1.1304932280054497e-05, + "loss": 0.5754963755607605, + "step": 7707 + }, + { + "epoch": 1.424247323624857, + "grad_norm": 0.0759274810552597, + "learning_rate": 1.1302954396897333e-05, + "loss": 0.6165765523910522, + "step": 7708 + }, + { + "epoch": 1.424432100333753, + "grad_norm": 0.09124520421028137, + "learning_rate": 1.130097646188666e-05, + "loss": 0.6912825703620911, + "step": 7709 + }, + { + "epoch": 1.4246168770426488, + "grad_norm": 0.07441210001707077, + "learning_rate": 1.1298998475101193e-05, + "loss": 0.5612799525260925, + "step": 7710 + }, + { + "epoch": 1.4248016537515447, + "grad_norm": 0.05193053185939789, + "learning_rate": 1.1297020436619652e-05, + "loss": 0.31158825755119324, + "step": 7711 + }, + { + "epoch": 1.4249864304604405, + "grad_norm": 0.08454303443431854, + "learning_rate": 1.1295042346520755e-05, + "loss": 0.5870208740234375, + "step": 7712 + }, + { + "epoch": 1.4251712071693363, + "grad_norm": 0.08547506481409073, + "learning_rate": 1.1293064204883225e-05, + "loss": 0.5300642848014832, + "step": 7713 + }, + { + "epoch": 1.4253559838782321, + "grad_norm": 0.06898822635412216, + "learning_rate": 1.1291086011785785e-05, + "loss": 0.41161859035491943, + "step": 7714 + }, + { + "epoch": 1.425540760587128, + "grad_norm": 0.06064549833536148, + "learning_rate": 1.128910776730716e-05, + "loss": 0.4096584916114807, + "step": 7715 + }, + { + "epoch": 1.4257255372960238, + "grad_norm": 0.09677895158529282, + "learning_rate": 1.1287129471526081e-05, + "loss": 0.6471278667449951, + "step": 7716 + }, + { + "epoch": 1.4259103140049196, + "grad_norm": 0.07851700484752655, + "learning_rate": 1.1285151124521274e-05, + "loss": 0.5760736465454102, + "step": 7717 + }, + { + "epoch": 1.4260950907138155, + "grad_norm": 0.0756153017282486, + "learning_rate": 1.1283172726371473e-05, + "loss": 0.5310165286064148, + "step": 7718 + }, + { + "epoch": 1.4262798674227113, + "grad_norm": 0.0864722952246666, + "learning_rate": 1.1281194277155414e-05, + "loss": 0.5797637104988098, + "step": 7719 + }, + { + "epoch": 1.4264646441316073, + "grad_norm": 0.06263674050569534, + "learning_rate": 1.1279215776951828e-05, + "loss": 0.49331384897232056, + "step": 7720 + }, + { + "epoch": 1.4266494208405032, + "grad_norm": 0.08064206689596176, + "learning_rate": 1.1277237225839459e-05, + "loss": 0.5718737840652466, + "step": 7721 + }, + { + "epoch": 1.426834197549399, + "grad_norm": 0.04651773348450661, + "learning_rate": 1.1275258623897042e-05, + "loss": 0.33713024854660034, + "step": 7722 + }, + { + "epoch": 1.4270189742582948, + "grad_norm": 0.07942657172679901, + "learning_rate": 1.1273279971203324e-05, + "loss": 0.48411282896995544, + "step": 7723 + }, + { + "epoch": 1.4272037509671907, + "grad_norm": 0.059395305812358856, + "learning_rate": 1.1271301267837045e-05, + "loss": 0.3102441728115082, + "step": 7724 + }, + { + "epoch": 1.4273885276760865, + "grad_norm": 0.07383601367473602, + "learning_rate": 1.1269322513876955e-05, + "loss": 0.5118647813796997, + "step": 7725 + }, + { + "epoch": 1.4275733043849823, + "grad_norm": 0.06309115141630173, + "learning_rate": 1.1267343709401798e-05, + "loss": 0.4012720584869385, + "step": 7726 + }, + { + "epoch": 1.4277580810938781, + "grad_norm": 0.073763407766819, + "learning_rate": 1.1265364854490326e-05, + "loss": 0.4894561171531677, + "step": 7727 + }, + { + "epoch": 1.427942857802774, + "grad_norm": 0.08136720210313797, + "learning_rate": 1.1263385949221294e-05, + "loss": 0.6185980439186096, + "step": 7728 + }, + { + "epoch": 1.4281276345116698, + "grad_norm": 0.07367828488349915, + "learning_rate": 1.1261406993673451e-05, + "loss": 0.4043392837047577, + "step": 7729 + }, + { + "epoch": 1.4283124112205656, + "grad_norm": 0.07976184040307999, + "learning_rate": 1.1259427987925558e-05, + "loss": 0.6872898936271667, + "step": 7730 + }, + { + "epoch": 1.4284971879294615, + "grad_norm": 0.06573823094367981, + "learning_rate": 1.1257448932056373e-05, + "loss": 0.43768876791000366, + "step": 7731 + }, + { + "epoch": 1.4286819646383573, + "grad_norm": 0.059378981590270996, + "learning_rate": 1.125546982614465e-05, + "loss": 0.36808469891548157, + "step": 7732 + }, + { + "epoch": 1.428866741347253, + "grad_norm": 0.08485148102045059, + "learning_rate": 1.1253490670269158e-05, + "loss": 0.4989584982395172, + "step": 7733 + }, + { + "epoch": 1.429051518056149, + "grad_norm": 0.07902685552835464, + "learning_rate": 1.125151146450866e-05, + "loss": 0.562540590763092, + "step": 7734 + }, + { + "epoch": 1.4292362947650448, + "grad_norm": 0.07742798328399658, + "learning_rate": 1.1249532208941922e-05, + "loss": 0.47228848934173584, + "step": 7735 + }, + { + "epoch": 1.4294210714739406, + "grad_norm": 0.08142773061990738, + "learning_rate": 1.1247552903647709e-05, + "loss": 0.5640918612480164, + "step": 7736 + }, + { + "epoch": 1.4296058481828364, + "grad_norm": 0.07114843279123306, + "learning_rate": 1.1245573548704793e-05, + "loss": 0.4797847867012024, + "step": 7737 + }, + { + "epoch": 1.4297906248917325, + "grad_norm": 0.07043295353651047, + "learning_rate": 1.1243594144191949e-05, + "loss": 0.45127490162849426, + "step": 7738 + }, + { + "epoch": 1.4299754016006283, + "grad_norm": 0.08176163583993912, + "learning_rate": 1.1241614690187947e-05, + "loss": 0.565257728099823, + "step": 7739 + }, + { + "epoch": 1.4301601783095241, + "grad_norm": 0.06472037732601166, + "learning_rate": 1.1239635186771565e-05, + "loss": 0.3978027403354645, + "step": 7740 + }, + { + "epoch": 1.43034495501842, + "grad_norm": 0.06999760866165161, + "learning_rate": 1.1237655634021582e-05, + "loss": 0.4746716022491455, + "step": 7741 + }, + { + "epoch": 1.4305297317273158, + "grad_norm": 0.05453413724899292, + "learning_rate": 1.1235676032016777e-05, + "loss": 0.32790878415107727, + "step": 7742 + }, + { + "epoch": 1.4307145084362116, + "grad_norm": 0.06192592531442642, + "learning_rate": 1.123369638083593e-05, + "loss": 0.38390231132507324, + "step": 7743 + }, + { + "epoch": 1.4308992851451074, + "grad_norm": 0.07822002470493317, + "learning_rate": 1.1231716680557829e-05, + "loss": 0.46052321791648865, + "step": 7744 + }, + { + "epoch": 1.4310840618540033, + "grad_norm": 0.06971774250268936, + "learning_rate": 1.1229736931261258e-05, + "loss": 0.4210222065448761, + "step": 7745 + }, + { + "epoch": 1.431268838562899, + "grad_norm": 0.07256211340427399, + "learning_rate": 1.1227757133025002e-05, + "loss": 0.558273434638977, + "step": 7746 + }, + { + "epoch": 1.431453615271795, + "grad_norm": 0.05814126878976822, + "learning_rate": 1.1225777285927854e-05, + "loss": 0.34946370124816895, + "step": 7747 + }, + { + "epoch": 1.4316383919806908, + "grad_norm": 0.06424988806247711, + "learning_rate": 1.1223797390048607e-05, + "loss": 0.4597241282463074, + "step": 7748 + }, + { + "epoch": 1.4318231686895868, + "grad_norm": 0.08351857215166092, + "learning_rate": 1.122181744546605e-05, + "loss": 0.5707598924636841, + "step": 7749 + }, + { + "epoch": 1.4320079453984826, + "grad_norm": 0.06891462951898575, + "learning_rate": 1.1219837452258982e-05, + "loss": 0.49332642555236816, + "step": 7750 + }, + { + "epoch": 1.4321927221073785, + "grad_norm": 0.080210842192173, + "learning_rate": 1.12178574105062e-05, + "loss": 0.530530571937561, + "step": 7751 + }, + { + "epoch": 1.4323774988162743, + "grad_norm": 0.07507038861513138, + "learning_rate": 1.1215877320286506e-05, + "loss": 0.5159768462181091, + "step": 7752 + }, + { + "epoch": 1.4325622755251701, + "grad_norm": 0.06869399547576904, + "learning_rate": 1.1213897181678692e-05, + "loss": 0.4092599153518677, + "step": 7753 + }, + { + "epoch": 1.432747052234066, + "grad_norm": 0.059218842536211014, + "learning_rate": 1.1211916994761574e-05, + "loss": 0.4262658953666687, + "step": 7754 + }, + { + "epoch": 1.4329318289429618, + "grad_norm": 0.07015969604253769, + "learning_rate": 1.120993675961395e-05, + "loss": 0.4874521493911743, + "step": 7755 + }, + { + "epoch": 1.4331166056518576, + "grad_norm": 0.08915772289037704, + "learning_rate": 1.1207956476314625e-05, + "loss": 0.6265583038330078, + "step": 7756 + }, + { + "epoch": 1.4333013823607534, + "grad_norm": 0.08509238064289093, + "learning_rate": 1.1205976144942415e-05, + "loss": 0.5946600437164307, + "step": 7757 + }, + { + "epoch": 1.4334861590696493, + "grad_norm": 0.06658284366130829, + "learning_rate": 1.1203995765576128e-05, + "loss": 0.4934423565864563, + "step": 7758 + }, + { + "epoch": 1.433670935778545, + "grad_norm": 0.08315644413232803, + "learning_rate": 1.1202015338294574e-05, + "loss": 0.6432234644889832, + "step": 7759 + }, + { + "epoch": 1.433855712487441, + "grad_norm": 0.09150480479001999, + "learning_rate": 1.120003486317657e-05, + "loss": 0.7058236002922058, + "step": 7760 + }, + { + "epoch": 1.4340404891963368, + "grad_norm": 0.08542300760746002, + "learning_rate": 1.1198054340300934e-05, + "loss": 0.5598074197769165, + "step": 7761 + }, + { + "epoch": 1.4342252659052326, + "grad_norm": 0.07881592959165573, + "learning_rate": 1.1196073769746485e-05, + "loss": 0.49808305501937866, + "step": 7762 + }, + { + "epoch": 1.4344100426141284, + "grad_norm": 0.07317841798067093, + "learning_rate": 1.1194093151592037e-05, + "loss": 0.476825475692749, + "step": 7763 + }, + { + "epoch": 1.4345948193230242, + "grad_norm": 0.08663074672222137, + "learning_rate": 1.1192112485916422e-05, + "loss": 0.6702808141708374, + "step": 7764 + }, + { + "epoch": 1.43477959603192, + "grad_norm": 0.06999069452285767, + "learning_rate": 1.1190131772798461e-05, + "loss": 0.42619284987449646, + "step": 7765 + }, + { + "epoch": 1.434964372740816, + "grad_norm": 0.06526099145412445, + "learning_rate": 1.1188151012316974e-05, + "loss": 0.46033430099487305, + "step": 7766 + }, + { + "epoch": 1.4351491494497117, + "grad_norm": 0.05214836075901985, + "learning_rate": 1.1186170204550796e-05, + "loss": 0.31312480568885803, + "step": 7767 + }, + { + "epoch": 1.4353339261586078, + "grad_norm": 0.06493684649467468, + "learning_rate": 1.1184189349578756e-05, + "loss": 0.4746323525905609, + "step": 7768 + }, + { + "epoch": 1.4355187028675036, + "grad_norm": 0.0669315978884697, + "learning_rate": 1.1182208447479682e-05, + "loss": 0.485589861869812, + "step": 7769 + }, + { + "epoch": 1.4357034795763994, + "grad_norm": 0.08709082007408142, + "learning_rate": 1.1180227498332413e-05, + "loss": 0.6753013730049133, + "step": 7770 + }, + { + "epoch": 1.4358882562852953, + "grad_norm": 0.07773923128843307, + "learning_rate": 1.1178246502215782e-05, + "loss": 0.5319858193397522, + "step": 7771 + }, + { + "epoch": 1.436073032994191, + "grad_norm": 0.07116381824016571, + "learning_rate": 1.1176265459208629e-05, + "loss": 0.4657229781150818, + "step": 7772 + }, + { + "epoch": 1.436257809703087, + "grad_norm": 0.06802497059106827, + "learning_rate": 1.1174284369389783e-05, + "loss": 0.4718885123729706, + "step": 7773 + }, + { + "epoch": 1.4364425864119827, + "grad_norm": 0.08443068712949753, + "learning_rate": 1.11723032328381e-05, + "loss": 0.573927104473114, + "step": 7774 + }, + { + "epoch": 1.4366273631208786, + "grad_norm": 0.06734126806259155, + "learning_rate": 1.1170322049632415e-05, + "loss": 0.38770437240600586, + "step": 7775 + }, + { + "epoch": 1.4368121398297744, + "grad_norm": 0.08613882213830948, + "learning_rate": 1.116834081985157e-05, + "loss": 0.5945224761962891, + "step": 7776 + }, + { + "epoch": 1.4369969165386702, + "grad_norm": 0.07484793663024902, + "learning_rate": 1.1166359543574417e-05, + "loss": 0.5455222725868225, + "step": 7777 + }, + { + "epoch": 1.437181693247566, + "grad_norm": 0.07464735954999924, + "learning_rate": 1.1164378220879805e-05, + "loss": 0.513776957988739, + "step": 7778 + }, + { + "epoch": 1.4373664699564621, + "grad_norm": 0.08073262125253677, + "learning_rate": 1.1162396851846582e-05, + "loss": 0.7173177003860474, + "step": 7779 + }, + { + "epoch": 1.437551246665358, + "grad_norm": 0.07891233265399933, + "learning_rate": 1.11604154365536e-05, + "loss": 0.6460133790969849, + "step": 7780 + }, + { + "epoch": 1.4377360233742538, + "grad_norm": 0.06489276140928268, + "learning_rate": 1.1158433975079716e-05, + "loss": 0.4140462279319763, + "step": 7781 + }, + { + "epoch": 1.4379208000831496, + "grad_norm": 0.0625050961971283, + "learning_rate": 1.1156452467503785e-05, + "loss": 0.36107245087623596, + "step": 7782 + }, + { + "epoch": 1.4381055767920454, + "grad_norm": 0.06572655588388443, + "learning_rate": 1.1154470913904663e-05, + "loss": 0.3627634346485138, + "step": 7783 + }, + { + "epoch": 1.4382903535009413, + "grad_norm": 0.06454043090343475, + "learning_rate": 1.1152489314361208e-05, + "loss": 0.38726598024368286, + "step": 7784 + }, + { + "epoch": 1.438475130209837, + "grad_norm": 0.08084996044635773, + "learning_rate": 1.1150507668952287e-05, + "loss": 0.6202616691589355, + "step": 7785 + }, + { + "epoch": 1.438659906918733, + "grad_norm": 0.06921318173408508, + "learning_rate": 1.1148525977756757e-05, + "loss": 0.6285109519958496, + "step": 7786 + }, + { + "epoch": 1.4388446836276287, + "grad_norm": 0.08460685610771179, + "learning_rate": 1.1146544240853488e-05, + "loss": 0.546532928943634, + "step": 7787 + }, + { + "epoch": 1.4390294603365246, + "grad_norm": 0.0826251357793808, + "learning_rate": 1.1144562458321346e-05, + "loss": 0.5390278100967407, + "step": 7788 + }, + { + "epoch": 1.4392142370454204, + "grad_norm": 0.09981584548950195, + "learning_rate": 1.1142580630239197e-05, + "loss": 0.5163625478744507, + "step": 7789 + }, + { + "epoch": 1.4393990137543162, + "grad_norm": 0.08871352672576904, + "learning_rate": 1.1140598756685917e-05, + "loss": 0.5099503397941589, + "step": 7790 + }, + { + "epoch": 1.439583790463212, + "grad_norm": 0.06853802502155304, + "learning_rate": 1.1138616837740373e-05, + "loss": 0.34996840357780457, + "step": 7791 + }, + { + "epoch": 1.4397685671721079, + "grad_norm": 0.07858351618051529, + "learning_rate": 1.1136634873481442e-05, + "loss": 0.5394749045372009, + "step": 7792 + }, + { + "epoch": 1.4399533438810037, + "grad_norm": 0.07858268171548843, + "learning_rate": 1.1134652863987996e-05, + "loss": 0.5309223532676697, + "step": 7793 + }, + { + "epoch": 1.4401381205898995, + "grad_norm": 0.06469916552305222, + "learning_rate": 1.1132670809338916e-05, + "loss": 0.42184239625930786, + "step": 7794 + }, + { + "epoch": 1.4403228972987954, + "grad_norm": 0.09995020925998688, + "learning_rate": 1.1130688709613087e-05, + "loss": 0.58648282289505, + "step": 7795 + }, + { + "epoch": 1.4405076740076912, + "grad_norm": 0.0675228163599968, + "learning_rate": 1.112870656488938e-05, + "loss": 0.41711270809173584, + "step": 7796 + }, + { + "epoch": 1.4406924507165872, + "grad_norm": 0.06286881119012833, + "learning_rate": 1.1126724375246685e-05, + "loss": 0.47400546073913574, + "step": 7797 + }, + { + "epoch": 1.440877227425483, + "grad_norm": 0.07993468642234802, + "learning_rate": 1.1124742140763884e-05, + "loss": 0.5307193994522095, + "step": 7798 + }, + { + "epoch": 1.441062004134379, + "grad_norm": 0.06807641685009003, + "learning_rate": 1.1122759861519864e-05, + "loss": 0.5481207370758057, + "step": 7799 + }, + { + "epoch": 1.4412467808432747, + "grad_norm": 0.07621538639068604, + "learning_rate": 1.1120777537593516e-05, + "loss": 0.4112272262573242, + "step": 7800 + }, + { + "epoch": 1.4414315575521706, + "grad_norm": 0.06728537380695343, + "learning_rate": 1.1118795169063728e-05, + "loss": 0.45030948519706726, + "step": 7801 + }, + { + "epoch": 1.4416163342610664, + "grad_norm": 0.08467714488506317, + "learning_rate": 1.1116812756009394e-05, + "loss": 0.5709100365638733, + "step": 7802 + }, + { + "epoch": 1.4418011109699622, + "grad_norm": 0.07138433307409286, + "learning_rate": 1.1114830298509403e-05, + "loss": 0.5521631240844727, + "step": 7803 + }, + { + "epoch": 1.441985887678858, + "grad_norm": 0.08049609512090683, + "learning_rate": 1.1112847796642654e-05, + "loss": 0.632592499256134, + "step": 7804 + }, + { + "epoch": 1.4421706643877539, + "grad_norm": 0.07343754172325134, + "learning_rate": 1.1110865250488047e-05, + "loss": 0.4223109483718872, + "step": 7805 + }, + { + "epoch": 1.4423554410966497, + "grad_norm": 0.0751941129565239, + "learning_rate": 1.1108882660124479e-05, + "loss": 0.5047143697738647, + "step": 7806 + }, + { + "epoch": 1.4425402178055455, + "grad_norm": 0.07815185189247131, + "learning_rate": 1.110690002563085e-05, + "loss": 0.4017481207847595, + "step": 7807 + }, + { + "epoch": 1.4427249945144416, + "grad_norm": 0.07059190422296524, + "learning_rate": 1.1104917347086059e-05, + "loss": 0.44762492179870605, + "step": 7808 + }, + { + "epoch": 1.4429097712233374, + "grad_norm": 0.08881764858961105, + "learning_rate": 1.1102934624569017e-05, + "loss": 0.7014904618263245, + "step": 7809 + }, + { + "epoch": 1.4430945479322332, + "grad_norm": 0.07527472078800201, + "learning_rate": 1.1100951858158629e-05, + "loss": 0.4444360136985779, + "step": 7810 + }, + { + "epoch": 1.443279324641129, + "grad_norm": 0.08891136944293976, + "learning_rate": 1.1098969047933798e-05, + "loss": 0.6279330849647522, + "step": 7811 + }, + { + "epoch": 1.443464101350025, + "grad_norm": 0.08965428918600082, + "learning_rate": 1.109698619397344e-05, + "loss": 0.5877953767776489, + "step": 7812 + }, + { + "epoch": 1.4436488780589207, + "grad_norm": 0.06243619695305824, + "learning_rate": 1.1095003296356463e-05, + "loss": 0.4526597857475281, + "step": 7813 + }, + { + "epoch": 1.4438336547678166, + "grad_norm": 0.07425308972597122, + "learning_rate": 1.109302035516178e-05, + "loss": 0.4679838716983795, + "step": 7814 + }, + { + "epoch": 1.4440184314767124, + "grad_norm": 0.06625751405954361, + "learning_rate": 1.1091037370468307e-05, + "loss": 0.49603068828582764, + "step": 7815 + }, + { + "epoch": 1.4442032081856082, + "grad_norm": 0.07407105714082718, + "learning_rate": 1.1089054342354962e-05, + "loss": 0.4844321608543396, + "step": 7816 + }, + { + "epoch": 1.444387984894504, + "grad_norm": 0.08134344965219498, + "learning_rate": 1.108707127090066e-05, + "loss": 0.6283643245697021, + "step": 7817 + }, + { + "epoch": 1.4445727616033999, + "grad_norm": 0.08917857706546783, + "learning_rate": 1.1085088156184321e-05, + "loss": 0.686138927936554, + "step": 7818 + }, + { + "epoch": 1.4447575383122957, + "grad_norm": 0.06348041445016861, + "learning_rate": 1.1083104998284868e-05, + "loss": 0.39687955379486084, + "step": 7819 + }, + { + "epoch": 1.4449423150211915, + "grad_norm": 0.0743764117360115, + "learning_rate": 1.1081121797281227e-05, + "loss": 0.4445897042751312, + "step": 7820 + }, + { + "epoch": 1.4451270917300874, + "grad_norm": 0.06222608685493469, + "learning_rate": 1.1079138553252318e-05, + "loss": 0.4394078552722931, + "step": 7821 + }, + { + "epoch": 1.4453118684389832, + "grad_norm": 0.0755404457449913, + "learning_rate": 1.1077155266277074e-05, + "loss": 0.6143812537193298, + "step": 7822 + }, + { + "epoch": 1.445496645147879, + "grad_norm": 0.07101725041866302, + "learning_rate": 1.1075171936434416e-05, + "loss": 0.47755134105682373, + "step": 7823 + }, + { + "epoch": 1.4456814218567748, + "grad_norm": 0.07291561365127563, + "learning_rate": 1.1073188563803283e-05, + "loss": 0.4806116223335266, + "step": 7824 + }, + { + "epoch": 1.4458661985656707, + "grad_norm": 0.10162265598773956, + "learning_rate": 1.1071205148462602e-05, + "loss": 0.696403980255127, + "step": 7825 + }, + { + "epoch": 1.4460509752745667, + "grad_norm": 0.07536455243825912, + "learning_rate": 1.1069221690491306e-05, + "loss": 0.539941132068634, + "step": 7826 + }, + { + "epoch": 1.4462357519834625, + "grad_norm": 0.060847558081150055, + "learning_rate": 1.1067238189968331e-05, + "loss": 0.338792085647583, + "step": 7827 + }, + { + "epoch": 1.4464205286923584, + "grad_norm": 0.08490575104951859, + "learning_rate": 1.1065254646972618e-05, + "loss": 0.7636604905128479, + "step": 7828 + }, + { + "epoch": 1.4466053054012542, + "grad_norm": 0.08725559711456299, + "learning_rate": 1.10632710615831e-05, + "loss": 0.6760256886482239, + "step": 7829 + }, + { + "epoch": 1.44679008211015, + "grad_norm": 0.06068837270140648, + "learning_rate": 1.1061287433878722e-05, + "loss": 0.4545172154903412, + "step": 7830 + }, + { + "epoch": 1.4469748588190459, + "grad_norm": 0.06264138966798782, + "learning_rate": 1.1059303763938426e-05, + "loss": 0.3595879077911377, + "step": 7831 + }, + { + "epoch": 1.4471596355279417, + "grad_norm": 0.09896326810121536, + "learning_rate": 1.1057320051841152e-05, + "loss": 0.5653212070465088, + "step": 7832 + }, + { + "epoch": 1.4473444122368375, + "grad_norm": 0.07048387080430984, + "learning_rate": 1.1055336297665849e-05, + "loss": 0.4967333972454071, + "step": 7833 + }, + { + "epoch": 1.4475291889457333, + "grad_norm": 0.08791311085224152, + "learning_rate": 1.1053352501491464e-05, + "loss": 0.5783764719963074, + "step": 7834 + }, + { + "epoch": 1.4477139656546292, + "grad_norm": 0.07617121189832687, + "learning_rate": 1.1051368663396943e-05, + "loss": 0.4915910065174103, + "step": 7835 + }, + { + "epoch": 1.447898742363525, + "grad_norm": 0.06257335841655731, + "learning_rate": 1.1049384783461237e-05, + "loss": 0.4560215473175049, + "step": 7836 + }, + { + "epoch": 1.448083519072421, + "grad_norm": 0.07558248937129974, + "learning_rate": 1.1047400861763303e-05, + "loss": 0.5505630970001221, + "step": 7837 + }, + { + "epoch": 1.4482682957813169, + "grad_norm": 0.0665748342871666, + "learning_rate": 1.1045416898382088e-05, + "loss": 0.47031649947166443, + "step": 7838 + }, + { + "epoch": 1.4484530724902127, + "grad_norm": 0.0672403872013092, + "learning_rate": 1.1043432893396554e-05, + "loss": 0.44376689195632935, + "step": 7839 + }, + { + "epoch": 1.4486378491991085, + "grad_norm": 0.06862741708755493, + "learning_rate": 1.1041448846885654e-05, + "loss": 0.4826169013977051, + "step": 7840 + }, + { + "epoch": 1.4488226259080044, + "grad_norm": 0.08931844681501389, + "learning_rate": 1.1039464758928351e-05, + "loss": 0.6299209594726562, + "step": 7841 + }, + { + "epoch": 1.4490074026169002, + "grad_norm": 0.06782756000757217, + "learning_rate": 1.1037480629603599e-05, + "loss": 0.3971423804759979, + "step": 7842 + }, + { + "epoch": 1.449192179325796, + "grad_norm": 0.07321605831384659, + "learning_rate": 1.1035496458990365e-05, + "loss": 0.5970366597175598, + "step": 7843 + }, + { + "epoch": 1.4493769560346919, + "grad_norm": 0.07662840187549591, + "learning_rate": 1.1033512247167612e-05, + "loss": 0.5170060992240906, + "step": 7844 + }, + { + "epoch": 1.4495617327435877, + "grad_norm": 0.08513103425502777, + "learning_rate": 1.1031527994214303e-05, + "loss": 0.6284051537513733, + "step": 7845 + }, + { + "epoch": 1.4497465094524835, + "grad_norm": 0.0876043513417244, + "learning_rate": 1.102954370020941e-05, + "loss": 0.658281683921814, + "step": 7846 + }, + { + "epoch": 1.4499312861613793, + "grad_norm": 0.07371240109205246, + "learning_rate": 1.10275593652319e-05, + "loss": 0.5149878859519958, + "step": 7847 + }, + { + "epoch": 1.4501160628702752, + "grad_norm": 0.08449136465787888, + "learning_rate": 1.102557498936074e-05, + "loss": 0.5654085874557495, + "step": 7848 + }, + { + "epoch": 1.450300839579171, + "grad_norm": 0.08550971746444702, + "learning_rate": 1.10235905726749e-05, + "loss": 0.5113701820373535, + "step": 7849 + }, + { + "epoch": 1.4504856162880668, + "grad_norm": 0.07894014567136765, + "learning_rate": 1.1021606115253362e-05, + "loss": 0.5105152726173401, + "step": 7850 + }, + { + "epoch": 1.4506703929969627, + "grad_norm": 0.06562106311321259, + "learning_rate": 1.1019621617175098e-05, + "loss": 0.48219433426856995, + "step": 7851 + }, + { + "epoch": 1.4508551697058585, + "grad_norm": 0.06805983185768127, + "learning_rate": 1.101763707851908e-05, + "loss": 0.3967460095882416, + "step": 7852 + }, + { + "epoch": 1.4510399464147543, + "grad_norm": 0.08153540641069412, + "learning_rate": 1.1015652499364294e-05, + "loss": 0.5121418833732605, + "step": 7853 + }, + { + "epoch": 1.4512247231236501, + "grad_norm": 0.0606747642159462, + "learning_rate": 1.1013667879789713e-05, + "loss": 0.3817494511604309, + "step": 7854 + }, + { + "epoch": 1.4514094998325462, + "grad_norm": 0.07964327931404114, + "learning_rate": 1.1011683219874324e-05, + "loss": 0.6836763024330139, + "step": 7855 + }, + { + "epoch": 1.451594276541442, + "grad_norm": 0.07234257459640503, + "learning_rate": 1.1009698519697106e-05, + "loss": 0.5018043518066406, + "step": 7856 + }, + { + "epoch": 1.4517790532503378, + "grad_norm": 0.06623294204473495, + "learning_rate": 1.1007713779337046e-05, + "loss": 0.5385190844535828, + "step": 7857 + }, + { + "epoch": 1.4519638299592337, + "grad_norm": 0.0786714255809784, + "learning_rate": 1.1005728998873132e-05, + "loss": 0.5198732614517212, + "step": 7858 + }, + { + "epoch": 1.4521486066681295, + "grad_norm": 0.08908972889184952, + "learning_rate": 1.1003744178384347e-05, + "loss": 0.6613372564315796, + "step": 7859 + }, + { + "epoch": 1.4523333833770253, + "grad_norm": 0.07145047187805176, + "learning_rate": 1.1001759317949687e-05, + "loss": 0.509187638759613, + "step": 7860 + }, + { + "epoch": 1.4525181600859212, + "grad_norm": 0.08758586645126343, + "learning_rate": 1.0999774417648141e-05, + "loss": 0.5257436633110046, + "step": 7861 + }, + { + "epoch": 1.452702936794817, + "grad_norm": 0.08765916526317596, + "learning_rate": 1.09977894775587e-05, + "loss": 0.6729578375816345, + "step": 7862 + }, + { + "epoch": 1.4528877135037128, + "grad_norm": 0.08089493960142136, + "learning_rate": 1.0995804497760358e-05, + "loss": 0.6460222005844116, + "step": 7863 + }, + { + "epoch": 1.4530724902126086, + "grad_norm": 0.07192704826593399, + "learning_rate": 1.0993819478332114e-05, + "loss": 0.46706151962280273, + "step": 7864 + }, + { + "epoch": 1.4532572669215045, + "grad_norm": 0.07399758696556091, + "learning_rate": 1.0991834419352963e-05, + "loss": 0.4869329333305359, + "step": 7865 + }, + { + "epoch": 1.4534420436304005, + "grad_norm": 0.09239742904901505, + "learning_rate": 1.0989849320901905e-05, + "loss": 0.6180732250213623, + "step": 7866 + }, + { + "epoch": 1.4536268203392964, + "grad_norm": 0.07837023586034775, + "learning_rate": 1.0987864183057943e-05, + "loss": 0.41628366708755493, + "step": 7867 + }, + { + "epoch": 1.4538115970481922, + "grad_norm": 0.07429380714893341, + "learning_rate": 1.0985879005900078e-05, + "loss": 0.5442554354667664, + "step": 7868 + }, + { + "epoch": 1.453996373757088, + "grad_norm": 0.08155282586812973, + "learning_rate": 1.0983893789507307e-05, + "loss": 0.5126806497573853, + "step": 7869 + }, + { + "epoch": 1.4541811504659838, + "grad_norm": 0.0866590216755867, + "learning_rate": 1.0981908533958646e-05, + "loss": 0.517244815826416, + "step": 7870 + }, + { + "epoch": 1.4543659271748797, + "grad_norm": 0.07580895721912384, + "learning_rate": 1.0979923239333099e-05, + "loss": 0.4432229995727539, + "step": 7871 + }, + { + "epoch": 1.4545507038837755, + "grad_norm": 0.08365512639284134, + "learning_rate": 1.0977937905709667e-05, + "loss": 0.5921037197113037, + "step": 7872 + }, + { + "epoch": 1.4547354805926713, + "grad_norm": 0.08475431054830551, + "learning_rate": 1.0975952533167369e-05, + "loss": 0.5824610590934753, + "step": 7873 + }, + { + "epoch": 1.4549202573015672, + "grad_norm": 0.070068359375, + "learning_rate": 1.0973967121785216e-05, + "loss": 0.48140326142311096, + "step": 7874 + }, + { + "epoch": 1.455105034010463, + "grad_norm": 0.08091516047716141, + "learning_rate": 1.0971981671642216e-05, + "loss": 0.5462980270385742, + "step": 7875 + }, + { + "epoch": 1.4552898107193588, + "grad_norm": 0.09640698879957199, + "learning_rate": 1.0969996182817387e-05, + "loss": 0.5758302807807922, + "step": 7876 + }, + { + "epoch": 1.4554745874282546, + "grad_norm": 0.06986507028341293, + "learning_rate": 1.0968010655389745e-05, + "loss": 0.49991628527641296, + "step": 7877 + }, + { + "epoch": 1.4556593641371505, + "grad_norm": 0.06571005284786224, + "learning_rate": 1.0966025089438309e-05, + "loss": 0.4069470167160034, + "step": 7878 + }, + { + "epoch": 1.4558441408460463, + "grad_norm": 0.0801084116101265, + "learning_rate": 1.0964039485042091e-05, + "loss": 0.663557767868042, + "step": 7879 + }, + { + "epoch": 1.4560289175549421, + "grad_norm": 0.08647681772708893, + "learning_rate": 1.0962053842280123e-05, + "loss": 0.6868690848350525, + "step": 7880 + }, + { + "epoch": 1.456213694263838, + "grad_norm": 0.06825637072324753, + "learning_rate": 1.0960068161231422e-05, + "loss": 0.537468433380127, + "step": 7881 + }, + { + "epoch": 1.4563984709727338, + "grad_norm": 0.08584998548030853, + "learning_rate": 1.0958082441975009e-05, + "loss": 0.5897773504257202, + "step": 7882 + }, + { + "epoch": 1.4565832476816296, + "grad_norm": 0.08932146430015564, + "learning_rate": 1.0956096684589911e-05, + "loss": 0.6655458211898804, + "step": 7883 + }, + { + "epoch": 1.4567680243905254, + "grad_norm": 0.07608090341091156, + "learning_rate": 1.095411088915516e-05, + "loss": 0.5363531708717346, + "step": 7884 + }, + { + "epoch": 1.4569528010994215, + "grad_norm": 0.07527932524681091, + "learning_rate": 1.0952125055749779e-05, + "loss": 0.5014275908470154, + "step": 7885 + }, + { + "epoch": 1.4571375778083173, + "grad_norm": 0.06645578145980835, + "learning_rate": 1.0950139184452799e-05, + "loss": 0.46748968958854675, + "step": 7886 + }, + { + "epoch": 1.4573223545172131, + "grad_norm": 0.07744214683771133, + "learning_rate": 1.0948153275343255e-05, + "loss": 0.6590765714645386, + "step": 7887 + }, + { + "epoch": 1.457507131226109, + "grad_norm": 0.061628442257642746, + "learning_rate": 1.0946167328500175e-05, + "loss": 0.36661460995674133, + "step": 7888 + }, + { + "epoch": 1.4576919079350048, + "grad_norm": 0.08004427701234818, + "learning_rate": 1.0944181344002596e-05, + "loss": 0.5551319718360901, + "step": 7889 + }, + { + "epoch": 1.4578766846439006, + "grad_norm": 0.06822110712528229, + "learning_rate": 1.094219532192955e-05, + "loss": 0.4428638517856598, + "step": 7890 + }, + { + "epoch": 1.4580614613527965, + "grad_norm": 0.08248184621334076, + "learning_rate": 1.0940209262360082e-05, + "loss": 0.5554961562156677, + "step": 7891 + }, + { + "epoch": 1.4582462380616923, + "grad_norm": 0.09095754474401474, + "learning_rate": 1.0938223165373225e-05, + "loss": 0.4856225848197937, + "step": 7892 + }, + { + "epoch": 1.4584310147705881, + "grad_norm": 0.0733831375837326, + "learning_rate": 1.0936237031048023e-05, + "loss": 0.5460132956504822, + "step": 7893 + }, + { + "epoch": 1.458615791479484, + "grad_norm": 0.0810202956199646, + "learning_rate": 1.0934250859463516e-05, + "loss": 0.5689954161643982, + "step": 7894 + }, + { + "epoch": 1.4588005681883798, + "grad_norm": 0.09141723066568375, + "learning_rate": 1.0932264650698745e-05, + "loss": 0.5393565893173218, + "step": 7895 + }, + { + "epoch": 1.4589853448972758, + "grad_norm": 0.08879061788320541, + "learning_rate": 1.093027840483276e-05, + "loss": 0.4498291015625, + "step": 7896 + }, + { + "epoch": 1.4591701216061717, + "grad_norm": 0.06332932412624359, + "learning_rate": 1.0928292121944606e-05, + "loss": 0.3498190939426422, + "step": 7897 + }, + { + "epoch": 1.4593548983150675, + "grad_norm": 0.07837338000535965, + "learning_rate": 1.092630580211333e-05, + "loss": 0.5701397657394409, + "step": 7898 + }, + { + "epoch": 1.4595396750239633, + "grad_norm": 0.08210621774196625, + "learning_rate": 1.0924319445417978e-05, + "loss": 0.5923003554344177, + "step": 7899 + }, + { + "epoch": 1.4597244517328591, + "grad_norm": 0.09843314439058304, + "learning_rate": 1.0922333051937603e-05, + "loss": 0.6211321353912354, + "step": 7900 + }, + { + "epoch": 1.459909228441755, + "grad_norm": 0.07027072459459305, + "learning_rate": 1.0920346621751264e-05, + "loss": 0.3532789945602417, + "step": 7901 + }, + { + "epoch": 1.4600940051506508, + "grad_norm": 0.07323717325925827, + "learning_rate": 1.0918360154938004e-05, + "loss": 0.4459230303764343, + "step": 7902 + }, + { + "epoch": 1.4602787818595466, + "grad_norm": 0.0616673082113266, + "learning_rate": 1.0916373651576883e-05, + "loss": 0.43505892157554626, + "step": 7903 + }, + { + "epoch": 1.4604635585684425, + "grad_norm": 0.0709221363067627, + "learning_rate": 1.091438711174696e-05, + "loss": 0.5405328869819641, + "step": 7904 + }, + { + "epoch": 1.4606483352773383, + "grad_norm": 0.07395005226135254, + "learning_rate": 1.091240053552729e-05, + "loss": 0.3639757037162781, + "step": 7905 + }, + { + "epoch": 1.4608331119862341, + "grad_norm": 0.0811300203204155, + "learning_rate": 1.0910413922996934e-05, + "loss": 0.42535293102264404, + "step": 7906 + }, + { + "epoch": 1.46101788869513, + "grad_norm": 0.05116962641477585, + "learning_rate": 1.090842727423495e-05, + "loss": 0.2789819538593292, + "step": 7907 + }, + { + "epoch": 1.4612026654040258, + "grad_norm": 0.07930802553892136, + "learning_rate": 1.0906440589320404e-05, + "loss": 0.5680393576622009, + "step": 7908 + }, + { + "epoch": 1.4613874421129216, + "grad_norm": 0.09925345331430435, + "learning_rate": 1.0904453868332358e-05, + "loss": 0.705038845539093, + "step": 7909 + }, + { + "epoch": 1.4615722188218174, + "grad_norm": 0.0764572024345398, + "learning_rate": 1.0902467111349876e-05, + "loss": 0.4529610574245453, + "step": 7910 + }, + { + "epoch": 1.4617569955307133, + "grad_norm": 0.086216039955616, + "learning_rate": 1.0900480318452032e-05, + "loss": 0.5941925644874573, + "step": 7911 + }, + { + "epoch": 1.461941772239609, + "grad_norm": 0.06900719553232193, + "learning_rate": 1.0898493489717884e-05, + "loss": 0.40601980686187744, + "step": 7912 + }, + { + "epoch": 1.462126548948505, + "grad_norm": 0.08022689074277878, + "learning_rate": 1.0896506625226505e-05, + "loss": 0.604983389377594, + "step": 7913 + }, + { + "epoch": 1.462311325657401, + "grad_norm": 0.0808817595243454, + "learning_rate": 1.0894519725056971e-05, + "loss": 0.5770231485366821, + "step": 7914 + }, + { + "epoch": 1.4624961023662968, + "grad_norm": 0.06129438057541847, + "learning_rate": 1.0892532789288347e-05, + "loss": 0.356117308139801, + "step": 7915 + }, + { + "epoch": 1.4626808790751926, + "grad_norm": 0.09225615859031677, + "learning_rate": 1.0890545817999714e-05, + "loss": 0.6368810534477234, + "step": 7916 + }, + { + "epoch": 1.4628656557840884, + "grad_norm": 0.07198144495487213, + "learning_rate": 1.088855881127014e-05, + "loss": 0.4250560998916626, + "step": 7917 + }, + { + "epoch": 1.4630504324929843, + "grad_norm": 0.07320264726877213, + "learning_rate": 1.088657176917871e-05, + "loss": 0.5142825245857239, + "step": 7918 + }, + { + "epoch": 1.46323520920188, + "grad_norm": 0.0757870227098465, + "learning_rate": 1.0884584691804492e-05, + "loss": 0.49360334873199463, + "step": 7919 + }, + { + "epoch": 1.463419985910776, + "grad_norm": 0.09541133046150208, + "learning_rate": 1.0882597579226574e-05, + "loss": 0.717289388179779, + "step": 7920 + }, + { + "epoch": 1.4636047626196718, + "grad_norm": 0.07429690659046173, + "learning_rate": 1.0880610431524033e-05, + "loss": 0.4696374535560608, + "step": 7921 + }, + { + "epoch": 1.4637895393285676, + "grad_norm": 0.051724813878536224, + "learning_rate": 1.087862324877595e-05, + "loss": 0.3552211821079254, + "step": 7922 + }, + { + "epoch": 1.4639743160374634, + "grad_norm": 0.0671362653374672, + "learning_rate": 1.0876636031061412e-05, + "loss": 0.4889882206916809, + "step": 7923 + }, + { + "epoch": 1.4641590927463592, + "grad_norm": 0.07439544796943665, + "learning_rate": 1.0874648778459502e-05, + "loss": 0.5556880831718445, + "step": 7924 + }, + { + "epoch": 1.4643438694552553, + "grad_norm": 0.0624934583902359, + "learning_rate": 1.0872661491049308e-05, + "loss": 0.3543209135532379, + "step": 7925 + }, + { + "epoch": 1.4645286461641511, + "grad_norm": 0.0852447897195816, + "learning_rate": 1.0870674168909918e-05, + "loss": 0.49739766120910645, + "step": 7926 + }, + { + "epoch": 1.464713422873047, + "grad_norm": 0.08161558210849762, + "learning_rate": 1.0868686812120417e-05, + "loss": 0.6055195927619934, + "step": 7927 + }, + { + "epoch": 1.4648981995819428, + "grad_norm": 0.09370871633291245, + "learning_rate": 1.0866699420759901e-05, + "loss": 0.6243895292282104, + "step": 7928 + }, + { + "epoch": 1.4650829762908386, + "grad_norm": 0.08233344554901123, + "learning_rate": 1.0864711994907457e-05, + "loss": 0.544588029384613, + "step": 7929 + }, + { + "epoch": 1.4652677529997344, + "grad_norm": 0.082923524081707, + "learning_rate": 1.0862724534642186e-05, + "loss": 0.584108293056488, + "step": 7930 + }, + { + "epoch": 1.4654525297086303, + "grad_norm": 0.11480110138654709, + "learning_rate": 1.0860737040043175e-05, + "loss": 0.793763279914856, + "step": 7931 + }, + { + "epoch": 1.465637306417526, + "grad_norm": 0.08902088552713394, + "learning_rate": 1.0858749511189519e-05, + "loss": 0.6228944659233093, + "step": 7932 + }, + { + "epoch": 1.465822083126422, + "grad_norm": 0.08220183104276657, + "learning_rate": 1.0856761948160323e-05, + "loss": 0.5731696486473083, + "step": 7933 + }, + { + "epoch": 1.4660068598353178, + "grad_norm": 0.07819283753633499, + "learning_rate": 1.0854774351034682e-05, + "loss": 0.5738114714622498, + "step": 7934 + }, + { + "epoch": 1.4661916365442136, + "grad_norm": 0.07486468553543091, + "learning_rate": 1.0852786719891695e-05, + "loss": 0.557564914226532, + "step": 7935 + }, + { + "epoch": 1.4663764132531094, + "grad_norm": 0.06882079690694809, + "learning_rate": 1.0850799054810465e-05, + "loss": 0.4335728585720062, + "step": 7936 + }, + { + "epoch": 1.4665611899620052, + "grad_norm": 0.07702423632144928, + "learning_rate": 1.0848811355870097e-05, + "loss": 0.47517985105514526, + "step": 7937 + }, + { + "epoch": 1.466745966670901, + "grad_norm": 0.07775147259235382, + "learning_rate": 1.0846823623149687e-05, + "loss": 0.5218677520751953, + "step": 7938 + }, + { + "epoch": 1.466930743379797, + "grad_norm": 0.08731850981712341, + "learning_rate": 1.0844835856728348e-05, + "loss": 0.6244857311248779, + "step": 7939 + }, + { + "epoch": 1.4671155200886927, + "grad_norm": 0.09213139861822128, + "learning_rate": 1.0842848056685188e-05, + "loss": 0.553689181804657, + "step": 7940 + }, + { + "epoch": 1.4673002967975886, + "grad_norm": 0.07810238003730774, + "learning_rate": 1.084086022309931e-05, + "loss": 0.46642959117889404, + "step": 7941 + }, + { + "epoch": 1.4674850735064844, + "grad_norm": 0.07946629077196121, + "learning_rate": 1.0838872356049826e-05, + "loss": 0.4211277365684509, + "step": 7942 + }, + { + "epoch": 1.4676698502153804, + "grad_norm": 0.07928035408258438, + "learning_rate": 1.0836884455615848e-05, + "loss": 0.5612075924873352, + "step": 7943 + }, + { + "epoch": 1.4678546269242763, + "grad_norm": 0.08762326091527939, + "learning_rate": 1.0834896521876485e-05, + "loss": 0.5942943096160889, + "step": 7944 + }, + { + "epoch": 1.468039403633172, + "grad_norm": 0.09318449348211288, + "learning_rate": 1.0832908554910853e-05, + "loss": 0.7384404540061951, + "step": 7945 + }, + { + "epoch": 1.468224180342068, + "grad_norm": 0.06970518082380295, + "learning_rate": 1.0830920554798067e-05, + "loss": 0.4766594469547272, + "step": 7946 + }, + { + "epoch": 1.4684089570509637, + "grad_norm": 0.07656311243772507, + "learning_rate": 1.0828932521617244e-05, + "loss": 0.4877740740776062, + "step": 7947 + }, + { + "epoch": 1.4685937337598596, + "grad_norm": 0.0660754069685936, + "learning_rate": 1.0826944455447498e-05, + "loss": 0.48413288593292236, + "step": 7948 + }, + { + "epoch": 1.4687785104687554, + "grad_norm": 0.09300076216459274, + "learning_rate": 1.082495635636795e-05, + "loss": 0.6690374612808228, + "step": 7949 + }, + { + "epoch": 1.4689632871776512, + "grad_norm": 0.07548022270202637, + "learning_rate": 1.082296822445772e-05, + "loss": 0.5127116441726685, + "step": 7950 + }, + { + "epoch": 1.469148063886547, + "grad_norm": 0.10325155407190323, + "learning_rate": 1.0820980059795929e-05, + "loss": 0.699739933013916, + "step": 7951 + }, + { + "epoch": 1.469332840595443, + "grad_norm": 0.08389373123645782, + "learning_rate": 1.0818991862461701e-05, + "loss": 0.41915738582611084, + "step": 7952 + }, + { + "epoch": 1.4695176173043387, + "grad_norm": 0.07074711471796036, + "learning_rate": 1.081700363253416e-05, + "loss": 0.3849329948425293, + "step": 7953 + }, + { + "epoch": 1.4697023940132348, + "grad_norm": 0.08760944753885269, + "learning_rate": 1.081501537009243e-05, + "loss": 0.5913623571395874, + "step": 7954 + }, + { + "epoch": 1.4698871707221306, + "grad_norm": 0.08129290491342545, + "learning_rate": 1.0813027075215635e-05, + "loss": 0.6076329350471497, + "step": 7955 + }, + { + "epoch": 1.4700719474310264, + "grad_norm": 0.09393065422773361, + "learning_rate": 1.081103874798291e-05, + "loss": 0.7325844168663025, + "step": 7956 + }, + { + "epoch": 1.4702567241399223, + "grad_norm": 0.07572071999311447, + "learning_rate": 1.0809050388473382e-05, + "loss": 0.5757875442504883, + "step": 7957 + }, + { + "epoch": 1.470441500848818, + "grad_norm": 0.08286506682634354, + "learning_rate": 1.0807061996766174e-05, + "loss": 0.49454647302627563, + "step": 7958 + }, + { + "epoch": 1.470626277557714, + "grad_norm": 0.0690111443400383, + "learning_rate": 1.0805073572940425e-05, + "loss": 0.5452752113342285, + "step": 7959 + }, + { + "epoch": 1.4708110542666097, + "grad_norm": 0.0932869166135788, + "learning_rate": 1.080308511707527e-05, + "loss": 0.6137073040008545, + "step": 7960 + }, + { + "epoch": 1.4709958309755056, + "grad_norm": 0.0877099260687828, + "learning_rate": 1.0801096629249836e-05, + "loss": 0.5865559577941895, + "step": 7961 + }, + { + "epoch": 1.4711806076844014, + "grad_norm": 0.07646284252405167, + "learning_rate": 1.079910810954326e-05, + "loss": 0.5363489985466003, + "step": 7962 + }, + { + "epoch": 1.4713653843932972, + "grad_norm": 0.09914388507604599, + "learning_rate": 1.079711955803469e-05, + "loss": 0.6129844784736633, + "step": 7963 + }, + { + "epoch": 1.471550161102193, + "grad_norm": 0.08948146551847458, + "learning_rate": 1.0795130974803252e-05, + "loss": 0.5178477764129639, + "step": 7964 + }, + { + "epoch": 1.4717349378110889, + "grad_norm": 0.08743748068809509, + "learning_rate": 1.0793142359928084e-05, + "loss": 0.5934823751449585, + "step": 7965 + }, + { + "epoch": 1.4719197145199847, + "grad_norm": 0.05671244114637375, + "learning_rate": 1.0791153713488336e-05, + "loss": 0.29773834347724915, + "step": 7966 + }, + { + "epoch": 1.4721044912288805, + "grad_norm": 0.0830715000629425, + "learning_rate": 1.0789165035563145e-05, + "loss": 0.615138053894043, + "step": 7967 + }, + { + "epoch": 1.4722892679377764, + "grad_norm": 0.06543489545583725, + "learning_rate": 1.0787176326231651e-05, + "loss": 0.407809853553772, + "step": 7968 + }, + { + "epoch": 1.4724740446466722, + "grad_norm": 0.08552611619234085, + "learning_rate": 1.0785187585573007e-05, + "loss": 0.6576621532440186, + "step": 7969 + }, + { + "epoch": 1.472658821355568, + "grad_norm": 0.061488475650548935, + "learning_rate": 1.0783198813666354e-05, + "loss": 0.3980618715286255, + "step": 7970 + }, + { + "epoch": 1.4728435980644639, + "grad_norm": 0.07736705988645554, + "learning_rate": 1.0781210010590834e-05, + "loss": 0.5964474678039551, + "step": 7971 + }, + { + "epoch": 1.4730283747733597, + "grad_norm": 0.06732498109340668, + "learning_rate": 1.07792211764256e-05, + "loss": 0.3793677091598511, + "step": 7972 + }, + { + "epoch": 1.4732131514822557, + "grad_norm": 0.0850326418876648, + "learning_rate": 1.0777232311249805e-05, + "loss": 0.3835803270339966, + "step": 7973 + }, + { + "epoch": 1.4733979281911516, + "grad_norm": 0.07928745448589325, + "learning_rate": 1.0775243415142595e-05, + "loss": 0.559565544128418, + "step": 7974 + }, + { + "epoch": 1.4735827049000474, + "grad_norm": 0.0691276267170906, + "learning_rate": 1.0773254488183117e-05, + "loss": 0.37865975499153137, + "step": 7975 + }, + { + "epoch": 1.4737674816089432, + "grad_norm": 0.09156709909439087, + "learning_rate": 1.0771265530450537e-05, + "loss": 0.6320781111717224, + "step": 7976 + }, + { + "epoch": 1.473952258317839, + "grad_norm": 0.07213588804006577, + "learning_rate": 1.0769276542024e-05, + "loss": 0.6258044838905334, + "step": 7977 + }, + { + "epoch": 1.4741370350267349, + "grad_norm": 0.07551666349172592, + "learning_rate": 1.0767287522982662e-05, + "loss": 0.5296053290367126, + "step": 7978 + }, + { + "epoch": 1.4743218117356307, + "grad_norm": 0.08219839632511139, + "learning_rate": 1.0765298473405679e-05, + "loss": 0.5902359485626221, + "step": 7979 + }, + { + "epoch": 1.4745065884445265, + "grad_norm": 0.06733104586601257, + "learning_rate": 1.0763309393372215e-05, + "loss": 0.38392987847328186, + "step": 7980 + }, + { + "epoch": 1.4746913651534224, + "grad_norm": 0.08572923392057419, + "learning_rate": 1.076132028296142e-05, + "loss": 0.5022323131561279, + "step": 7981 + }, + { + "epoch": 1.4748761418623182, + "grad_norm": 0.06923422962427139, + "learning_rate": 1.0759331142252463e-05, + "loss": 0.4584689438343048, + "step": 7982 + }, + { + "epoch": 1.475060918571214, + "grad_norm": 0.09197697043418884, + "learning_rate": 1.0757341971324504e-05, + "loss": 0.6473321914672852, + "step": 7983 + }, + { + "epoch": 1.47524569528011, + "grad_norm": 0.06988525390625, + "learning_rate": 1.0755352770256704e-05, + "loss": 0.439445436000824, + "step": 7984 + }, + { + "epoch": 1.475430471989006, + "grad_norm": 0.05474436655640602, + "learning_rate": 1.0753363539128222e-05, + "loss": 0.2108364701271057, + "step": 7985 + }, + { + "epoch": 1.4756152486979017, + "grad_norm": 0.04617056995630264, + "learning_rate": 1.0751374278018232e-05, + "loss": 0.2944631576538086, + "step": 7986 + }, + { + "epoch": 1.4758000254067976, + "grad_norm": 0.08965945243835449, + "learning_rate": 1.0749384987005896e-05, + "loss": 0.4396054744720459, + "step": 7987 + }, + { + "epoch": 1.4759848021156934, + "grad_norm": 0.09829042851924896, + "learning_rate": 1.0747395666170382e-05, + "loss": 0.6436299085617065, + "step": 7988 + }, + { + "epoch": 1.4761695788245892, + "grad_norm": 0.09775824844837189, + "learning_rate": 1.0745406315590856e-05, + "loss": 0.626186728477478, + "step": 7989 + }, + { + "epoch": 1.476354355533485, + "grad_norm": 0.08036770671606064, + "learning_rate": 1.0743416935346496e-05, + "loss": 0.5929142236709595, + "step": 7990 + }, + { + "epoch": 1.4765391322423809, + "grad_norm": 0.07372663170099258, + "learning_rate": 1.0741427525516463e-05, + "loss": 0.5616413950920105, + "step": 7991 + }, + { + "epoch": 1.4767239089512767, + "grad_norm": 0.07719265669584274, + "learning_rate": 1.0739438086179934e-05, + "loss": 0.6793864369392395, + "step": 7992 + }, + { + "epoch": 1.4769086856601725, + "grad_norm": 0.09435825049877167, + "learning_rate": 1.0737448617416086e-05, + "loss": 0.6805324554443359, + "step": 7993 + }, + { + "epoch": 1.4770934623690684, + "grad_norm": 0.07104142010211945, + "learning_rate": 1.0735459119304093e-05, + "loss": 0.5474250912666321, + "step": 7994 + }, + { + "epoch": 1.4772782390779642, + "grad_norm": 0.08389271795749664, + "learning_rate": 1.0733469591923122e-05, + "loss": 0.5377687215805054, + "step": 7995 + }, + { + "epoch": 1.47746301578686, + "grad_norm": 0.0856410413980484, + "learning_rate": 1.0731480035352356e-05, + "loss": 0.47562843561172485, + "step": 7996 + }, + { + "epoch": 1.4776477924957558, + "grad_norm": 0.06180229038000107, + "learning_rate": 1.0729490449670976e-05, + "loss": 0.3866513669490814, + "step": 7997 + }, + { + "epoch": 1.4778325692046517, + "grad_norm": 0.06881602108478546, + "learning_rate": 1.0727500834958157e-05, + "loss": 0.43669071793556213, + "step": 7998 + }, + { + "epoch": 1.4780173459135475, + "grad_norm": 0.0712369903922081, + "learning_rate": 1.0725511191293082e-05, + "loss": 0.39811253547668457, + "step": 7999 + }, + { + "epoch": 1.4782021226224433, + "grad_norm": 0.10602513700723648, + "learning_rate": 1.0723521518754931e-05, + "loss": 0.7491739988327026, + "step": 8000 + }, + { + "epoch": 1.4782021226224433, + "eval_loss": 0.579645574092865, + "eval_runtime": 288.6369, + "eval_samples_per_second": 63.155, + "eval_steps_per_second": 7.896, + "step": 8000 + }, + { + "epoch": 1.4783868993313392, + "grad_norm": 0.08168964087963104, + "learning_rate": 1.0721531817422885e-05, + "loss": 0.6392906308174133, + "step": 8001 + }, + { + "epoch": 1.4785716760402352, + "grad_norm": 0.0581355020403862, + "learning_rate": 1.0719542087376134e-05, + "loss": 0.4122292995452881, + "step": 8002 + }, + { + "epoch": 1.478756452749131, + "grad_norm": 0.06892077624797821, + "learning_rate": 1.0717552328693855e-05, + "loss": 0.4517199695110321, + "step": 8003 + }, + { + "epoch": 1.4789412294580269, + "grad_norm": 0.05392163619399071, + "learning_rate": 1.0715562541455243e-05, + "loss": 0.3663863241672516, + "step": 8004 + }, + { + "epoch": 1.4791260061669227, + "grad_norm": 0.07395106554031372, + "learning_rate": 1.0713572725739476e-05, + "loss": 0.5130017995834351, + "step": 8005 + }, + { + "epoch": 1.4793107828758185, + "grad_norm": 0.08342242240905762, + "learning_rate": 1.0711582881625746e-05, + "loss": 0.6381667256355286, + "step": 8006 + }, + { + "epoch": 1.4794955595847143, + "grad_norm": 0.068038709461689, + "learning_rate": 1.070959300919325e-05, + "loss": 0.391664057970047, + "step": 8007 + }, + { + "epoch": 1.4796803362936102, + "grad_norm": 0.08663652092218399, + "learning_rate": 1.0707603108521165e-05, + "loss": 0.6319605112075806, + "step": 8008 + }, + { + "epoch": 1.479865113002506, + "grad_norm": 0.08702877908945084, + "learning_rate": 1.0705613179688694e-05, + "loss": 0.7374585866928101, + "step": 8009 + }, + { + "epoch": 1.4800498897114018, + "grad_norm": 0.09197530895471573, + "learning_rate": 1.0703623222775028e-05, + "loss": 0.589191734790802, + "step": 8010 + }, + { + "epoch": 1.4802346664202977, + "grad_norm": 0.08512798696756363, + "learning_rate": 1.0701633237859355e-05, + "loss": 0.5790627598762512, + "step": 8011 + }, + { + "epoch": 1.4804194431291935, + "grad_norm": 0.06841887533664703, + "learning_rate": 1.0699643225020876e-05, + "loss": 0.47389841079711914, + "step": 8012 + }, + { + "epoch": 1.4806042198380895, + "grad_norm": 0.0750150978565216, + "learning_rate": 1.0697653184338785e-05, + "loss": 0.5332591533660889, + "step": 8013 + }, + { + "epoch": 1.4807889965469854, + "grad_norm": 0.06956504285335541, + "learning_rate": 1.0695663115892282e-05, + "loss": 0.4910212755203247, + "step": 8014 + }, + { + "epoch": 1.4809737732558812, + "grad_norm": 0.0774892121553421, + "learning_rate": 1.0693673019760562e-05, + "loss": 0.46022897958755493, + "step": 8015 + }, + { + "epoch": 1.481158549964777, + "grad_norm": 0.07349559664726257, + "learning_rate": 1.0691682896022824e-05, + "loss": 0.5611245632171631, + "step": 8016 + }, + { + "epoch": 1.4813433266736729, + "grad_norm": 0.09003234654664993, + "learning_rate": 1.0689692744758276e-05, + "loss": 0.5233736038208008, + "step": 8017 + }, + { + "epoch": 1.4815281033825687, + "grad_norm": 0.05826477333903313, + "learning_rate": 1.068770256604611e-05, + "loss": 0.36907002329826355, + "step": 8018 + }, + { + "epoch": 1.4817128800914645, + "grad_norm": 0.07595459371805191, + "learning_rate": 1.0685712359965534e-05, + "loss": 0.5646535754203796, + "step": 8019 + }, + { + "epoch": 1.4818976568003603, + "grad_norm": 0.06655114889144897, + "learning_rate": 1.0683722126595753e-05, + "loss": 0.4589296877384186, + "step": 8020 + }, + { + "epoch": 1.4820824335092562, + "grad_norm": 0.08873427659273148, + "learning_rate": 1.0681731866015968e-05, + "loss": 0.77529376745224, + "step": 8021 + }, + { + "epoch": 1.482267210218152, + "grad_norm": 0.1050659716129303, + "learning_rate": 1.067974157830539e-05, + "loss": 0.714748203754425, + "step": 8022 + }, + { + "epoch": 1.4824519869270478, + "grad_norm": 0.10710117220878601, + "learning_rate": 1.0677751263543221e-05, + "loss": 0.7326914668083191, + "step": 8023 + }, + { + "epoch": 1.4826367636359437, + "grad_norm": 0.07048782706260681, + "learning_rate": 1.0675760921808673e-05, + "loss": 0.46614688634872437, + "step": 8024 + }, + { + "epoch": 1.4828215403448395, + "grad_norm": 0.07618443667888641, + "learning_rate": 1.0673770553180957e-05, + "loss": 0.45787060260772705, + "step": 8025 + }, + { + "epoch": 1.4830063170537353, + "grad_norm": 0.0792887732386589, + "learning_rate": 1.0671780157739282e-05, + "loss": 0.4927024245262146, + "step": 8026 + }, + { + "epoch": 1.4831910937626311, + "grad_norm": 0.10205483436584473, + "learning_rate": 1.0669789735562855e-05, + "loss": 0.6825256943702698, + "step": 8027 + }, + { + "epoch": 1.483375870471527, + "grad_norm": 0.08562511950731277, + "learning_rate": 1.066779928673089e-05, + "loss": 0.5680001974105835, + "step": 8028 + }, + { + "epoch": 1.4835606471804228, + "grad_norm": 0.07572870701551437, + "learning_rate": 1.0665808811322608e-05, + "loss": 0.5484418272972107, + "step": 8029 + }, + { + "epoch": 1.4837454238893186, + "grad_norm": 0.05491387099027634, + "learning_rate": 1.0663818309417216e-05, + "loss": 0.39442920684814453, + "step": 8030 + }, + { + "epoch": 1.4839302005982147, + "grad_norm": 0.101420558989048, + "learning_rate": 1.066182778109393e-05, + "loss": 0.5014135837554932, + "step": 8031 + }, + { + "epoch": 1.4841149773071105, + "grad_norm": 0.07395175844430923, + "learning_rate": 1.0659837226431973e-05, + "loss": 0.5414116978645325, + "step": 8032 + }, + { + "epoch": 1.4842997540160063, + "grad_norm": 0.07887198030948639, + "learning_rate": 1.0657846645510557e-05, + "loss": 0.5636579394340515, + "step": 8033 + }, + { + "epoch": 1.4844845307249022, + "grad_norm": 0.073245108127594, + "learning_rate": 1.06558560384089e-05, + "loss": 0.5003830790519714, + "step": 8034 + }, + { + "epoch": 1.484669307433798, + "grad_norm": 0.08296787738800049, + "learning_rate": 1.0653865405206227e-05, + "loss": 0.4997517168521881, + "step": 8035 + }, + { + "epoch": 1.4848540841426938, + "grad_norm": 0.07751402258872986, + "learning_rate": 1.0651874745981758e-05, + "loss": 0.5620325207710266, + "step": 8036 + }, + { + "epoch": 1.4850388608515896, + "grad_norm": 0.060117579996585846, + "learning_rate": 1.0649884060814713e-05, + "loss": 0.41544926166534424, + "step": 8037 + }, + { + "epoch": 1.4852236375604855, + "grad_norm": 0.08661402761936188, + "learning_rate": 1.0647893349784313e-05, + "loss": 0.5965865254402161, + "step": 8038 + }, + { + "epoch": 1.4854084142693813, + "grad_norm": 0.09608050435781479, + "learning_rate": 1.0645902612969788e-05, + "loss": 0.7004936337471008, + "step": 8039 + }, + { + "epoch": 1.4855931909782771, + "grad_norm": 0.07092927396297455, + "learning_rate": 1.0643911850450358e-05, + "loss": 0.5260869860649109, + "step": 8040 + }, + { + "epoch": 1.485777967687173, + "grad_norm": 0.07654356956481934, + "learning_rate": 1.064192106230525e-05, + "loss": 0.43520107865333557, + "step": 8041 + }, + { + "epoch": 1.485962744396069, + "grad_norm": 0.08207488805055618, + "learning_rate": 1.0639930248613694e-05, + "loss": 0.49081167578697205, + "step": 8042 + }, + { + "epoch": 1.4861475211049648, + "grad_norm": 0.07600727677345276, + "learning_rate": 1.0637939409454916e-05, + "loss": 0.588967502117157, + "step": 8043 + }, + { + "epoch": 1.4863322978138607, + "grad_norm": 0.08325869590044022, + "learning_rate": 1.063594854490814e-05, + "loss": 0.6097344160079956, + "step": 8044 + }, + { + "epoch": 1.4865170745227565, + "grad_norm": 0.06231686845421791, + "learning_rate": 1.0633957655052609e-05, + "loss": 0.4860547184944153, + "step": 8045 + }, + { + "epoch": 1.4867018512316523, + "grad_norm": 0.0686325877904892, + "learning_rate": 1.0631966739967545e-05, + "loss": 0.3996231257915497, + "step": 8046 + }, + { + "epoch": 1.4868866279405482, + "grad_norm": 0.06175853684544563, + "learning_rate": 1.062997579973218e-05, + "loss": 0.5008290410041809, + "step": 8047 + }, + { + "epoch": 1.487071404649444, + "grad_norm": 0.06243591755628586, + "learning_rate": 1.0627984834425748e-05, + "loss": 0.3662768602371216, + "step": 8048 + }, + { + "epoch": 1.4872561813583398, + "grad_norm": 0.08133753389120102, + "learning_rate": 1.062599384412749e-05, + "loss": 0.6068007349967957, + "step": 8049 + }, + { + "epoch": 1.4874409580672356, + "grad_norm": 0.07411174476146698, + "learning_rate": 1.0624002828916631e-05, + "loss": 0.48948463797569275, + "step": 8050 + }, + { + "epoch": 1.4876257347761315, + "grad_norm": 0.08100121468305588, + "learning_rate": 1.062201178887241e-05, + "loss": 0.5255101323127747, + "step": 8051 + }, + { + "epoch": 1.4878105114850273, + "grad_norm": 0.07053257524967194, + "learning_rate": 1.062002072407407e-05, + "loss": 0.5262172818183899, + "step": 8052 + }, + { + "epoch": 1.4879952881939231, + "grad_norm": 0.08912979066371918, + "learning_rate": 1.0618029634600843e-05, + "loss": 0.7172182202339172, + "step": 8053 + }, + { + "epoch": 1.488180064902819, + "grad_norm": 0.08581690490245819, + "learning_rate": 1.0616038520531969e-05, + "loss": 0.4855179190635681, + "step": 8054 + }, + { + "epoch": 1.4883648416117148, + "grad_norm": 0.06741776317358017, + "learning_rate": 1.061404738194669e-05, + "loss": 0.44996926188468933, + "step": 8055 + }, + { + "epoch": 1.4885496183206106, + "grad_norm": 0.07213829457759857, + "learning_rate": 1.061205621892425e-05, + "loss": 0.460223913192749, + "step": 8056 + }, + { + "epoch": 1.4887343950295064, + "grad_norm": 0.07456837594509125, + "learning_rate": 1.0610065031543881e-05, + "loss": 0.511305034160614, + "step": 8057 + }, + { + "epoch": 1.4889191717384023, + "grad_norm": 0.07812952995300293, + "learning_rate": 1.0608073819884837e-05, + "loss": 0.42069342732429504, + "step": 8058 + }, + { + "epoch": 1.489103948447298, + "grad_norm": 0.09203128516674042, + "learning_rate": 1.0606082584026357e-05, + "loss": 0.5720989108085632, + "step": 8059 + }, + { + "epoch": 1.489288725156194, + "grad_norm": 0.057817284017801285, + "learning_rate": 1.0604091324047683e-05, + "loss": 0.37874454259872437, + "step": 8060 + }, + { + "epoch": 1.48947350186509, + "grad_norm": 0.08087790012359619, + "learning_rate": 1.0602100040028068e-05, + "loss": 0.6484622359275818, + "step": 8061 + }, + { + "epoch": 1.4896582785739858, + "grad_norm": 0.08493216335773468, + "learning_rate": 1.0600108732046751e-05, + "loss": 0.5885729193687439, + "step": 8062 + }, + { + "epoch": 1.4898430552828816, + "grad_norm": 0.0692172572016716, + "learning_rate": 1.059811740018299e-05, + "loss": 0.46173328161239624, + "step": 8063 + }, + { + "epoch": 1.4900278319917775, + "grad_norm": 0.06761965155601501, + "learning_rate": 1.0596126044516021e-05, + "loss": 0.45651736855506897, + "step": 8064 + }, + { + "epoch": 1.4902126087006733, + "grad_norm": 0.10760365426540375, + "learning_rate": 1.0594134665125106e-05, + "loss": 0.7754571437835693, + "step": 8065 + }, + { + "epoch": 1.4903973854095691, + "grad_norm": 0.050112102180719376, + "learning_rate": 1.059214326208949e-05, + "loss": 0.30902236700057983, + "step": 8066 + }, + { + "epoch": 1.490582162118465, + "grad_norm": 0.06518737971782684, + "learning_rate": 1.059015183548842e-05, + "loss": 0.4446237087249756, + "step": 8067 + }, + { + "epoch": 1.4907669388273608, + "grad_norm": 0.0689021572470665, + "learning_rate": 1.0588160385401157e-05, + "loss": 0.421636700630188, + "step": 8068 + }, + { + "epoch": 1.4909517155362566, + "grad_norm": 0.07490003854036331, + "learning_rate": 1.0586168911906951e-05, + "loss": 0.4064212143421173, + "step": 8069 + }, + { + "epoch": 1.4911364922451524, + "grad_norm": 0.09233926236629486, + "learning_rate": 1.0584177415085053e-05, + "loss": 0.5941533446311951, + "step": 8070 + }, + { + "epoch": 1.4913212689540483, + "grad_norm": 0.10283198207616806, + "learning_rate": 1.0582185895014723e-05, + "loss": 0.5642294883728027, + "step": 8071 + }, + { + "epoch": 1.4915060456629443, + "grad_norm": 0.08836923539638519, + "learning_rate": 1.0580194351775217e-05, + "loss": 0.7438347935676575, + "step": 8072 + }, + { + "epoch": 1.4916908223718401, + "grad_norm": 0.0638003796339035, + "learning_rate": 1.0578202785445792e-05, + "loss": 0.4012036621570587, + "step": 8073 + }, + { + "epoch": 1.491875599080736, + "grad_norm": 0.09235439449548721, + "learning_rate": 1.05762111961057e-05, + "loss": 0.5444309711456299, + "step": 8074 + }, + { + "epoch": 1.4920603757896318, + "grad_norm": 0.09472963213920593, + "learning_rate": 1.0574219583834211e-05, + "loss": 0.6380682587623596, + "step": 8075 + }, + { + "epoch": 1.4922451524985276, + "grad_norm": 0.0787310004234314, + "learning_rate": 1.0572227948710578e-05, + "loss": 0.5088316202163696, + "step": 8076 + }, + { + "epoch": 1.4924299292074235, + "grad_norm": 0.0733528658747673, + "learning_rate": 1.057023629081406e-05, + "loss": 0.40022554993629456, + "step": 8077 + }, + { + "epoch": 1.4926147059163193, + "grad_norm": 0.0707663968205452, + "learning_rate": 1.0568244610223921e-05, + "loss": 0.4129393696784973, + "step": 8078 + }, + { + "epoch": 1.4927994826252151, + "grad_norm": 0.07245533168315887, + "learning_rate": 1.056625290701943e-05, + "loss": 0.48481282591819763, + "step": 8079 + }, + { + "epoch": 1.492984259334111, + "grad_norm": 0.08001293241977692, + "learning_rate": 1.056426118127984e-05, + "loss": 0.5082979798316956, + "step": 8080 + }, + { + "epoch": 1.4931690360430068, + "grad_norm": 0.05666988343000412, + "learning_rate": 1.056226943308442e-05, + "loss": 0.3453954756259918, + "step": 8081 + }, + { + "epoch": 1.4933538127519026, + "grad_norm": 0.06098370626568794, + "learning_rate": 1.0560277662512439e-05, + "loss": 0.4135182201862335, + "step": 8082 + }, + { + "epoch": 1.4935385894607984, + "grad_norm": 0.08647754788398743, + "learning_rate": 1.055828586964316e-05, + "loss": 0.564022421836853, + "step": 8083 + }, + { + "epoch": 1.4937233661696943, + "grad_norm": 0.07890351861715317, + "learning_rate": 1.0556294054555847e-05, + "loss": 0.6017817854881287, + "step": 8084 + }, + { + "epoch": 1.49390814287859, + "grad_norm": 0.07599826902151108, + "learning_rate": 1.0554302217329773e-05, + "loss": 0.5751614570617676, + "step": 8085 + }, + { + "epoch": 1.494092919587486, + "grad_norm": 0.10049251466989517, + "learning_rate": 1.0552310358044204e-05, + "loss": 0.554628312587738, + "step": 8086 + }, + { + "epoch": 1.4942776962963817, + "grad_norm": 0.06830247491598129, + "learning_rate": 1.0550318476778412e-05, + "loss": 0.5074917078018188, + "step": 8087 + }, + { + "epoch": 1.4944624730052776, + "grad_norm": 0.07232210040092468, + "learning_rate": 1.0548326573611662e-05, + "loss": 0.4642643928527832, + "step": 8088 + }, + { + "epoch": 1.4946472497141734, + "grad_norm": 0.0779031440615654, + "learning_rate": 1.0546334648623235e-05, + "loss": 0.6109477281570435, + "step": 8089 + }, + { + "epoch": 1.4948320264230694, + "grad_norm": 0.08378469198942184, + "learning_rate": 1.0544342701892396e-05, + "loss": 0.5547432899475098, + "step": 8090 + }, + { + "epoch": 1.4950168031319653, + "grad_norm": 0.07529163360595703, + "learning_rate": 1.0542350733498424e-05, + "loss": 0.5577362775802612, + "step": 8091 + }, + { + "epoch": 1.495201579840861, + "grad_norm": 0.08380243182182312, + "learning_rate": 1.0540358743520585e-05, + "loss": 0.5255307555198669, + "step": 8092 + }, + { + "epoch": 1.495386356549757, + "grad_norm": 0.08864055573940277, + "learning_rate": 1.0538366732038161e-05, + "loss": 0.7255281805992126, + "step": 8093 + }, + { + "epoch": 1.4955711332586528, + "grad_norm": 0.09007778763771057, + "learning_rate": 1.0536374699130422e-05, + "loss": 0.6297560930252075, + "step": 8094 + }, + { + "epoch": 1.4957559099675486, + "grad_norm": 0.07595361769199371, + "learning_rate": 1.053438264487665e-05, + "loss": 0.5389895439147949, + "step": 8095 + }, + { + "epoch": 1.4959406866764444, + "grad_norm": 0.07773599028587341, + "learning_rate": 1.0532390569356123e-05, + "loss": 0.489621639251709, + "step": 8096 + }, + { + "epoch": 1.4961254633853402, + "grad_norm": 0.07256493717432022, + "learning_rate": 1.0530398472648116e-05, + "loss": 0.4765538275241852, + "step": 8097 + }, + { + "epoch": 1.496310240094236, + "grad_norm": 0.06853928416967392, + "learning_rate": 1.0528406354831909e-05, + "loss": 0.5104422569274902, + "step": 8098 + }, + { + "epoch": 1.496495016803132, + "grad_norm": 0.0818312019109726, + "learning_rate": 1.0526414215986783e-05, + "loss": 0.4994242787361145, + "step": 8099 + }, + { + "epoch": 1.4966797935120277, + "grad_norm": 0.0743444487452507, + "learning_rate": 1.0524422056192014e-05, + "loss": 0.5327197909355164, + "step": 8100 + }, + { + "epoch": 1.4968645702209238, + "grad_norm": 0.06243869662284851, + "learning_rate": 1.0522429875526892e-05, + "loss": 0.3634476661682129, + "step": 8101 + }, + { + "epoch": 1.4970493469298196, + "grad_norm": 0.09468526393175125, + "learning_rate": 1.0520437674070694e-05, + "loss": 0.6794441342353821, + "step": 8102 + }, + { + "epoch": 1.4972341236387154, + "grad_norm": 0.06301013380289078, + "learning_rate": 1.0518445451902706e-05, + "loss": 0.5078780055046082, + "step": 8103 + }, + { + "epoch": 1.4974189003476113, + "grad_norm": 0.07378236949443817, + "learning_rate": 1.0516453209102209e-05, + "loss": 0.535196840763092, + "step": 8104 + }, + { + "epoch": 1.497603677056507, + "grad_norm": 0.09048765897750854, + "learning_rate": 1.051446094574849e-05, + "loss": 0.6304789185523987, + "step": 8105 + }, + { + "epoch": 1.497788453765403, + "grad_norm": 0.08033865690231323, + "learning_rate": 1.0512468661920836e-05, + "loss": 0.553955078125, + "step": 8106 + }, + { + "epoch": 1.4979732304742988, + "grad_norm": 0.060053229331970215, + "learning_rate": 1.0510476357698534e-05, + "loss": 0.40581458806991577, + "step": 8107 + }, + { + "epoch": 1.4981580071831946, + "grad_norm": 0.07334432750940323, + "learning_rate": 1.0508484033160868e-05, + "loss": 0.5493158102035522, + "step": 8108 + }, + { + "epoch": 1.4983427838920904, + "grad_norm": 0.08855029195547104, + "learning_rate": 1.0506491688387128e-05, + "loss": 0.565083920955658, + "step": 8109 + }, + { + "epoch": 1.4985275606009862, + "grad_norm": 0.08073693513870239, + "learning_rate": 1.0504499323456603e-05, + "loss": 0.567347526550293, + "step": 8110 + }, + { + "epoch": 1.498712337309882, + "grad_norm": 0.07002782821655273, + "learning_rate": 1.0502506938448586e-05, + "loss": 0.4169497489929199, + "step": 8111 + }, + { + "epoch": 1.498897114018778, + "grad_norm": 0.05655227601528168, + "learning_rate": 1.0500514533442364e-05, + "loss": 0.41766270995140076, + "step": 8112 + }, + { + "epoch": 1.4990818907276737, + "grad_norm": 0.09233249723911285, + "learning_rate": 1.0498522108517231e-05, + "loss": 0.607703685760498, + "step": 8113 + }, + { + "epoch": 1.4992666674365696, + "grad_norm": 0.0869932696223259, + "learning_rate": 1.0496529663752473e-05, + "loss": 0.5394681096076965, + "step": 8114 + }, + { + "epoch": 1.4994514441454654, + "grad_norm": 0.0950428918004036, + "learning_rate": 1.0494537199227393e-05, + "loss": 0.5990388989448547, + "step": 8115 + }, + { + "epoch": 1.4996362208543612, + "grad_norm": 0.07539371401071548, + "learning_rate": 1.0492544715021275e-05, + "loss": 0.4656071364879608, + "step": 8116 + }, + { + "epoch": 1.499820997563257, + "grad_norm": 0.0667133778333664, + "learning_rate": 1.0490552211213421e-05, + "loss": 0.39385464787483215, + "step": 8117 + }, + { + "epoch": 1.5000057742721529, + "grad_norm": 0.07178043574094772, + "learning_rate": 1.0488559687883125e-05, + "loss": 0.44700050354003906, + "step": 8118 + }, + { + "epoch": 1.5001905509810487, + "grad_norm": 0.07774877548217773, + "learning_rate": 1.0486567145109678e-05, + "loss": 0.575194239616394, + "step": 8119 + }, + { + "epoch": 1.5003753276899445, + "grad_norm": 0.054962847381830215, + "learning_rate": 1.0484574582972383e-05, + "loss": 0.32182666659355164, + "step": 8120 + }, + { + "epoch": 1.5005601043988406, + "grad_norm": 0.08489862084388733, + "learning_rate": 1.0482582001550537e-05, + "loss": 0.5646397471427917, + "step": 8121 + }, + { + "epoch": 1.5007448811077364, + "grad_norm": 0.059859659522771835, + "learning_rate": 1.0480589400923436e-05, + "loss": 0.367511510848999, + "step": 8122 + }, + { + "epoch": 1.5009296578166322, + "grad_norm": 0.07323503494262695, + "learning_rate": 1.047859678117038e-05, + "loss": 0.4996204078197479, + "step": 8123 + }, + { + "epoch": 1.501114434525528, + "grad_norm": 0.06804613769054413, + "learning_rate": 1.047660414237067e-05, + "loss": 0.38346338272094727, + "step": 8124 + }, + { + "epoch": 1.501299211234424, + "grad_norm": 0.07917284965515137, + "learning_rate": 1.0474611484603607e-05, + "loss": 0.6195372343063354, + "step": 8125 + }, + { + "epoch": 1.5014839879433197, + "grad_norm": 0.0733339861035347, + "learning_rate": 1.0472618807948488e-05, + "loss": 0.6930992603302002, + "step": 8126 + }, + { + "epoch": 1.5016687646522155, + "grad_norm": 0.0960981622338295, + "learning_rate": 1.0470626112484622e-05, + "loss": 0.6226276159286499, + "step": 8127 + }, + { + "epoch": 1.5018535413611114, + "grad_norm": 0.07643352448940277, + "learning_rate": 1.0468633398291313e-05, + "loss": 0.6525744199752808, + "step": 8128 + }, + { + "epoch": 1.5020383180700074, + "grad_norm": 0.09541403502225876, + "learning_rate": 1.0466640665447854e-05, + "loss": 0.6560640335083008, + "step": 8129 + }, + { + "epoch": 1.5022230947789033, + "grad_norm": 0.06832639873027802, + "learning_rate": 1.0464647914033558e-05, + "loss": 0.35440102219581604, + "step": 8130 + }, + { + "epoch": 1.502407871487799, + "grad_norm": 0.08282726258039474, + "learning_rate": 1.0462655144127734e-05, + "loss": 0.45437178015708923, + "step": 8131 + }, + { + "epoch": 1.502592648196695, + "grad_norm": 0.06447652727365494, + "learning_rate": 1.0460662355809678e-05, + "loss": 0.3525720238685608, + "step": 8132 + }, + { + "epoch": 1.5027774249055907, + "grad_norm": 0.06565721333026886, + "learning_rate": 1.0458669549158703e-05, + "loss": 0.4773813486099243, + "step": 8133 + }, + { + "epoch": 1.5029622016144866, + "grad_norm": 0.07156088203191757, + "learning_rate": 1.0456676724254114e-05, + "loss": 0.5528655648231506, + "step": 8134 + }, + { + "epoch": 1.5031469783233824, + "grad_norm": 0.07021511346101761, + "learning_rate": 1.0454683881175221e-05, + "loss": 0.4314422011375427, + "step": 8135 + }, + { + "epoch": 1.5033317550322782, + "grad_norm": 0.09521748125553131, + "learning_rate": 1.0452691020001329e-05, + "loss": 0.7305403351783752, + "step": 8136 + }, + { + "epoch": 1.503516531741174, + "grad_norm": 0.06114820018410683, + "learning_rate": 1.0450698140811753e-05, + "loss": 0.40088844299316406, + "step": 8137 + }, + { + "epoch": 1.5037013084500699, + "grad_norm": 0.07667379826307297, + "learning_rate": 1.0448705243685801e-05, + "loss": 0.5117903351783752, + "step": 8138 + }, + { + "epoch": 1.5038860851589657, + "grad_norm": 0.058828432112932205, + "learning_rate": 1.0446712328702784e-05, + "loss": 0.38778555393218994, + "step": 8139 + }, + { + "epoch": 1.5040708618678615, + "grad_norm": 0.08744698017835617, + "learning_rate": 1.0444719395942013e-05, + "loss": 0.6395158767700195, + "step": 8140 + }, + { + "epoch": 1.5042556385767574, + "grad_norm": 0.07198171317577362, + "learning_rate": 1.0442726445482805e-05, + "loss": 0.46748894453048706, + "step": 8141 + }, + { + "epoch": 1.5044404152856532, + "grad_norm": 0.07280024141073227, + "learning_rate": 1.0440733477404468e-05, + "loss": 0.4413139820098877, + "step": 8142 + }, + { + "epoch": 1.504625191994549, + "grad_norm": 0.07300464808940887, + "learning_rate": 1.0438740491786316e-05, + "loss": 0.5154467225074768, + "step": 8143 + }, + { + "epoch": 1.5048099687034449, + "grad_norm": 0.06128469109535217, + "learning_rate": 1.0436747488707666e-05, + "loss": 0.40261396765708923, + "step": 8144 + }, + { + "epoch": 1.5049947454123407, + "grad_norm": 0.08224528282880783, + "learning_rate": 1.0434754468247833e-05, + "loss": 0.47346919775009155, + "step": 8145 + }, + { + "epoch": 1.5051795221212365, + "grad_norm": 0.06956780701875687, + "learning_rate": 1.043276143048613e-05, + "loss": 0.4050731360912323, + "step": 8146 + }, + { + "epoch": 1.5053642988301323, + "grad_norm": 0.06966309994459152, + "learning_rate": 1.0430768375501877e-05, + "loss": 0.5770171880722046, + "step": 8147 + }, + { + "epoch": 1.5055490755390282, + "grad_norm": 0.07297446578741074, + "learning_rate": 1.0428775303374392e-05, + "loss": 0.4828217327594757, + "step": 8148 + }, + { + "epoch": 1.505733852247924, + "grad_norm": 0.06459491699934006, + "learning_rate": 1.0426782214182991e-05, + "loss": 0.3506518006324768, + "step": 8149 + }, + { + "epoch": 1.5059186289568198, + "grad_norm": 0.10174031555652618, + "learning_rate": 1.042478910800699e-05, + "loss": 0.6605497002601624, + "step": 8150 + }, + { + "epoch": 1.5061034056657159, + "grad_norm": 0.0809532031416893, + "learning_rate": 1.0422795984925712e-05, + "loss": 0.5446367859840393, + "step": 8151 + }, + { + "epoch": 1.5062881823746117, + "grad_norm": 0.06470794975757599, + "learning_rate": 1.0420802845018483e-05, + "loss": 0.41524538397789, + "step": 8152 + }, + { + "epoch": 1.5064729590835075, + "grad_norm": 0.07831034809350967, + "learning_rate": 1.041880968836461e-05, + "loss": 0.5452517867088318, + "step": 8153 + }, + { + "epoch": 1.5066577357924034, + "grad_norm": 0.07205667346715927, + "learning_rate": 1.0416816515043424e-05, + "loss": 0.5258175134658813, + "step": 8154 + }, + { + "epoch": 1.5068425125012992, + "grad_norm": 0.08268946409225464, + "learning_rate": 1.0414823325134248e-05, + "loss": 0.5627372860908508, + "step": 8155 + }, + { + "epoch": 1.507027289210195, + "grad_norm": 0.09006335586309433, + "learning_rate": 1.0412830118716396e-05, + "loss": 0.5282909870147705, + "step": 8156 + }, + { + "epoch": 1.5072120659190908, + "grad_norm": 0.06793666630983353, + "learning_rate": 1.0410836895869198e-05, + "loss": 0.4173637926578522, + "step": 8157 + }, + { + "epoch": 1.507396842627987, + "grad_norm": 0.0935467779636383, + "learning_rate": 1.0408843656671981e-05, + "loss": 0.6189107894897461, + "step": 8158 + }, + { + "epoch": 1.5075816193368827, + "grad_norm": 0.07903085649013519, + "learning_rate": 1.0406850401204062e-05, + "loss": 0.49702292680740356, + "step": 8159 + }, + { + "epoch": 1.5077663960457786, + "grad_norm": 0.08739448338747025, + "learning_rate": 1.040485712954477e-05, + "loss": 0.5867978930473328, + "step": 8160 + }, + { + "epoch": 1.5079511727546744, + "grad_norm": 0.09268154948949814, + "learning_rate": 1.0402863841773432e-05, + "loss": 0.648639440536499, + "step": 8161 + }, + { + "epoch": 1.5081359494635702, + "grad_norm": 0.07363422214984894, + "learning_rate": 1.0400870537969375e-05, + "loss": 0.5425917506217957, + "step": 8162 + }, + { + "epoch": 1.508320726172466, + "grad_norm": 0.07788573205471039, + "learning_rate": 1.039887721821192e-05, + "loss": 0.6117817163467407, + "step": 8163 + }, + { + "epoch": 1.5085055028813619, + "grad_norm": 0.07192922383546829, + "learning_rate": 1.0396883882580401e-05, + "loss": 0.5482012033462524, + "step": 8164 + }, + { + "epoch": 1.5086902795902577, + "grad_norm": 0.079349584877491, + "learning_rate": 1.039489053115415e-05, + "loss": 0.47276076674461365, + "step": 8165 + }, + { + "epoch": 1.5088750562991535, + "grad_norm": 0.06868570297956467, + "learning_rate": 1.0392897164012487e-05, + "loss": 0.5130391716957092, + "step": 8166 + }, + { + "epoch": 1.5090598330080494, + "grad_norm": 0.061139173805713654, + "learning_rate": 1.0390903781234748e-05, + "loss": 0.3642079830169678, + "step": 8167 + }, + { + "epoch": 1.5092446097169452, + "grad_norm": 0.0688120424747467, + "learning_rate": 1.0388910382900258e-05, + "loss": 0.3949092924594879, + "step": 8168 + }, + { + "epoch": 1.509429386425841, + "grad_norm": 0.07270647585391998, + "learning_rate": 1.0386916969088356e-05, + "loss": 0.4588416516780853, + "step": 8169 + }, + { + "epoch": 1.5096141631347368, + "grad_norm": 0.06708833575248718, + "learning_rate": 1.0384923539878366e-05, + "loss": 0.514162003993988, + "step": 8170 + }, + { + "epoch": 1.5097989398436327, + "grad_norm": 0.08766133338212967, + "learning_rate": 1.0382930095349625e-05, + "loss": 0.5897177457809448, + "step": 8171 + }, + { + "epoch": 1.5099837165525285, + "grad_norm": 0.08636727929115295, + "learning_rate": 1.0380936635581464e-05, + "loss": 0.6378280520439148, + "step": 8172 + }, + { + "epoch": 1.5101684932614243, + "grad_norm": 0.09457448869943619, + "learning_rate": 1.0378943160653216e-05, + "loss": 0.6356536746025085, + "step": 8173 + }, + { + "epoch": 1.5103532699703202, + "grad_norm": 0.07422550022602081, + "learning_rate": 1.0376949670644216e-05, + "loss": 0.50555419921875, + "step": 8174 + }, + { + "epoch": 1.510538046679216, + "grad_norm": 0.09057539701461792, + "learning_rate": 1.03749561656338e-05, + "loss": 0.650535523891449, + "step": 8175 + }, + { + "epoch": 1.5107228233881118, + "grad_norm": 0.0948851928114891, + "learning_rate": 1.0372962645701301e-05, + "loss": 0.6718326210975647, + "step": 8176 + }, + { + "epoch": 1.5109076000970076, + "grad_norm": 0.07239285856485367, + "learning_rate": 1.0370969110926052e-05, + "loss": 0.4960486888885498, + "step": 8177 + }, + { + "epoch": 1.5110923768059035, + "grad_norm": 0.0766497477889061, + "learning_rate": 1.0368975561387398e-05, + "loss": 0.552095890045166, + "step": 8178 + }, + { + "epoch": 1.5112771535147993, + "grad_norm": 0.06806263327598572, + "learning_rate": 1.036698199716467e-05, + "loss": 0.5912288427352905, + "step": 8179 + }, + { + "epoch": 1.5114619302236953, + "grad_norm": 0.0985720306634903, + "learning_rate": 1.0364988418337205e-05, + "loss": 0.6411997079849243, + "step": 8180 + }, + { + "epoch": 1.5116467069325912, + "grad_norm": 0.0719585195183754, + "learning_rate": 1.0362994824984343e-05, + "loss": 0.5217729806900024, + "step": 8181 + }, + { + "epoch": 1.511831483641487, + "grad_norm": 0.0763324648141861, + "learning_rate": 1.0361001217185425e-05, + "loss": 0.47890594601631165, + "step": 8182 + }, + { + "epoch": 1.5120162603503828, + "grad_norm": 0.05779948830604553, + "learning_rate": 1.0359007595019786e-05, + "loss": 0.3642115890979767, + "step": 8183 + }, + { + "epoch": 1.5122010370592787, + "grad_norm": 0.08898462355136871, + "learning_rate": 1.0357013958566766e-05, + "loss": 0.5947683453559875, + "step": 8184 + }, + { + "epoch": 1.5123858137681745, + "grad_norm": 0.06462650001049042, + "learning_rate": 1.035502030790571e-05, + "loss": 0.5002122521400452, + "step": 8185 + }, + { + "epoch": 1.5125705904770703, + "grad_norm": 0.06829912215471268, + "learning_rate": 1.0353026643115955e-05, + "loss": 0.4085816442966461, + "step": 8186 + }, + { + "epoch": 1.5127553671859664, + "grad_norm": 0.0703347772359848, + "learning_rate": 1.0351032964276846e-05, + "loss": 0.5114795565605164, + "step": 8187 + }, + { + "epoch": 1.5129401438948622, + "grad_norm": 0.05297807231545448, + "learning_rate": 1.0349039271467722e-05, + "loss": 0.3110232651233673, + "step": 8188 + }, + { + "epoch": 1.513124920603758, + "grad_norm": 0.07348600029945374, + "learning_rate": 1.0347045564767928e-05, + "loss": 0.5948768258094788, + "step": 8189 + }, + { + "epoch": 1.5133096973126539, + "grad_norm": 0.07313638925552368, + "learning_rate": 1.0345051844256806e-05, + "loss": 0.5299927592277527, + "step": 8190 + }, + { + "epoch": 1.5134944740215497, + "grad_norm": 0.0786438137292862, + "learning_rate": 1.0343058110013699e-05, + "loss": 0.5249634385108948, + "step": 8191 + }, + { + "epoch": 1.5136792507304455, + "grad_norm": 0.07947122305631638, + "learning_rate": 1.0341064362117954e-05, + "loss": 0.49638667702674866, + "step": 8192 + }, + { + "epoch": 1.5138640274393413, + "grad_norm": 0.0993708148598671, + "learning_rate": 1.0339070600648914e-05, + "loss": 0.6481083035469055, + "step": 8193 + }, + { + "epoch": 1.5140488041482372, + "grad_norm": 0.07443512976169586, + "learning_rate": 1.0337076825685924e-05, + "loss": 0.4163435995578766, + "step": 8194 + }, + { + "epoch": 1.514233580857133, + "grad_norm": 0.09199873358011246, + "learning_rate": 1.033508303730833e-05, + "loss": 0.5561436414718628, + "step": 8195 + }, + { + "epoch": 1.5144183575660288, + "grad_norm": 0.09095046669244766, + "learning_rate": 1.0333089235595481e-05, + "loss": 0.759151041507721, + "step": 8196 + }, + { + "epoch": 1.5146031342749247, + "grad_norm": 0.09198595583438873, + "learning_rate": 1.0331095420626724e-05, + "loss": 0.5493916273117065, + "step": 8197 + }, + { + "epoch": 1.5147879109838205, + "grad_norm": 0.07630749046802521, + "learning_rate": 1.0329101592481403e-05, + "loss": 0.5057935118675232, + "step": 8198 + }, + { + "epoch": 1.5149726876927163, + "grad_norm": 0.06232639402151108, + "learning_rate": 1.032710775123887e-05, + "loss": 0.4678415358066559, + "step": 8199 + }, + { + "epoch": 1.5151574644016121, + "grad_norm": 0.05888809263706207, + "learning_rate": 1.032511389697847e-05, + "loss": 0.3224062919616699, + "step": 8200 + }, + { + "epoch": 1.515342241110508, + "grad_norm": 0.06751586496829987, + "learning_rate": 1.0323120029779555e-05, + "loss": 0.47412821650505066, + "step": 8201 + }, + { + "epoch": 1.5155270178194038, + "grad_norm": 0.06171039491891861, + "learning_rate": 1.0321126149721472e-05, + "loss": 0.3479996621608734, + "step": 8202 + }, + { + "epoch": 1.5157117945282996, + "grad_norm": 0.06947099417448044, + "learning_rate": 1.0319132256883575e-05, + "loss": 0.43905627727508545, + "step": 8203 + }, + { + "epoch": 1.5158965712371955, + "grad_norm": 0.07921262085437775, + "learning_rate": 1.0317138351345211e-05, + "loss": 0.6788212656974792, + "step": 8204 + }, + { + "epoch": 1.5160813479460913, + "grad_norm": 0.06468327343463898, + "learning_rate": 1.0315144433185735e-05, + "loss": 0.37811005115509033, + "step": 8205 + }, + { + "epoch": 1.5162661246549871, + "grad_norm": 0.07263485342264175, + "learning_rate": 1.0313150502484494e-05, + "loss": 0.3825418949127197, + "step": 8206 + }, + { + "epoch": 1.516450901363883, + "grad_norm": 0.08249958604574203, + "learning_rate": 1.0311156559320844e-05, + "loss": 0.5926398038864136, + "step": 8207 + }, + { + "epoch": 1.5166356780727788, + "grad_norm": 0.0563255250453949, + "learning_rate": 1.0309162603774137e-05, + "loss": 0.36614182591438293, + "step": 8208 + }, + { + "epoch": 1.5168204547816748, + "grad_norm": 0.08559220284223557, + "learning_rate": 1.0307168635923725e-05, + "loss": 0.6239089369773865, + "step": 8209 + }, + { + "epoch": 1.5170052314905706, + "grad_norm": 0.06880374252796173, + "learning_rate": 1.0305174655848964e-05, + "loss": 0.39663127064704895, + "step": 8210 + }, + { + "epoch": 1.5171900081994665, + "grad_norm": 0.0709710642695427, + "learning_rate": 1.0303180663629201e-05, + "loss": 0.47326236963272095, + "step": 8211 + }, + { + "epoch": 1.5173747849083623, + "grad_norm": 0.07974464446306229, + "learning_rate": 1.0301186659343803e-05, + "loss": 0.6245824098587036, + "step": 8212 + }, + { + "epoch": 1.5175595616172581, + "grad_norm": 0.07741749286651611, + "learning_rate": 1.0299192643072116e-05, + "loss": 0.41176801919937134, + "step": 8213 + }, + { + "epoch": 1.517744338326154, + "grad_norm": 0.06590697914361954, + "learning_rate": 1.0297198614893498e-05, + "loss": 0.4024606943130493, + "step": 8214 + }, + { + "epoch": 1.5179291150350498, + "grad_norm": 0.07015514373779297, + "learning_rate": 1.0295204574887303e-05, + "loss": 0.5053236484527588, + "step": 8215 + }, + { + "epoch": 1.5181138917439456, + "grad_norm": 0.061859481036663055, + "learning_rate": 1.0293210523132889e-05, + "loss": 0.4355672001838684, + "step": 8216 + }, + { + "epoch": 1.5182986684528417, + "grad_norm": 0.08922475576400757, + "learning_rate": 1.0291216459709617e-05, + "loss": 0.5943321585655212, + "step": 8217 + }, + { + "epoch": 1.5184834451617375, + "grad_norm": 0.07734528183937073, + "learning_rate": 1.0289222384696838e-05, + "loss": 0.47709402441978455, + "step": 8218 + }, + { + "epoch": 1.5186682218706333, + "grad_norm": 0.07556109875440598, + "learning_rate": 1.0287228298173914e-05, + "loss": 0.6052689552307129, + "step": 8219 + }, + { + "epoch": 1.5188529985795292, + "grad_norm": 0.07528205215930939, + "learning_rate": 1.0285234200220202e-05, + "loss": 0.4916656017303467, + "step": 8220 + }, + { + "epoch": 1.519037775288425, + "grad_norm": 0.08439075946807861, + "learning_rate": 1.0283240090915063e-05, + "loss": 0.5587414503097534, + "step": 8221 + }, + { + "epoch": 1.5192225519973208, + "grad_norm": 0.07659289240837097, + "learning_rate": 1.0281245970337851e-05, + "loss": 0.4934830963611603, + "step": 8222 + }, + { + "epoch": 1.5194073287062166, + "grad_norm": 0.06956089287996292, + "learning_rate": 1.0279251838567931e-05, + "loss": 0.3786989152431488, + "step": 8223 + }, + { + "epoch": 1.5195921054151125, + "grad_norm": 0.07814884185791016, + "learning_rate": 1.0277257695684663e-05, + "loss": 0.4549594223499298, + "step": 8224 + }, + { + "epoch": 1.5197768821240083, + "grad_norm": 0.07259442657232285, + "learning_rate": 1.0275263541767405e-05, + "loss": 0.407787024974823, + "step": 8225 + }, + { + "epoch": 1.5199616588329041, + "grad_norm": 0.06543047726154327, + "learning_rate": 1.0273269376895518e-05, + "loss": 0.4028457701206207, + "step": 8226 + }, + { + "epoch": 1.5201464355418, + "grad_norm": 0.09713669121265411, + "learning_rate": 1.0271275201148368e-05, + "loss": 0.7962900996208191, + "step": 8227 + }, + { + "epoch": 1.5203312122506958, + "grad_norm": 0.08467891812324524, + "learning_rate": 1.0269281014605311e-05, + "loss": 0.6027758717536926, + "step": 8228 + }, + { + "epoch": 1.5205159889595916, + "grad_norm": 0.07185697555541992, + "learning_rate": 1.0267286817345714e-05, + "loss": 0.43385347723960876, + "step": 8229 + }, + { + "epoch": 1.5207007656684874, + "grad_norm": 0.09062127768993378, + "learning_rate": 1.0265292609448936e-05, + "loss": 0.684395968914032, + "step": 8230 + }, + { + "epoch": 1.5208855423773833, + "grad_norm": 0.0966351181268692, + "learning_rate": 1.0263298390994342e-05, + "loss": 0.7614896297454834, + "step": 8231 + }, + { + "epoch": 1.521070319086279, + "grad_norm": 0.05065008997917175, + "learning_rate": 1.0261304162061296e-05, + "loss": 0.3162464201450348, + "step": 8232 + }, + { + "epoch": 1.521255095795175, + "grad_norm": 0.06607464700937271, + "learning_rate": 1.0259309922729161e-05, + "loss": 0.4301515817642212, + "step": 8233 + }, + { + "epoch": 1.5214398725040708, + "grad_norm": 0.08169597387313843, + "learning_rate": 1.0257315673077307e-05, + "loss": 0.615820050239563, + "step": 8234 + }, + { + "epoch": 1.5216246492129666, + "grad_norm": 0.0687340497970581, + "learning_rate": 1.0255321413185091e-05, + "loss": 0.4618292450904846, + "step": 8235 + }, + { + "epoch": 1.5218094259218624, + "grad_norm": 0.09001611918210983, + "learning_rate": 1.025332714313188e-05, + "loss": 0.6406571269035339, + "step": 8236 + }, + { + "epoch": 1.5219942026307582, + "grad_norm": 0.10997528582811356, + "learning_rate": 1.0251332862997044e-05, + "loss": 0.749646008014679, + "step": 8237 + }, + { + "epoch": 1.522178979339654, + "grad_norm": 0.07221218198537827, + "learning_rate": 1.0249338572859945e-05, + "loss": 0.47017595171928406, + "step": 8238 + }, + { + "epoch": 1.5223637560485501, + "grad_norm": 0.09576410800218582, + "learning_rate": 1.024734427279995e-05, + "loss": 0.6915577054023743, + "step": 8239 + }, + { + "epoch": 1.522548532757446, + "grad_norm": 0.06706764549016953, + "learning_rate": 1.024534996289643e-05, + "loss": 0.3903615474700928, + "step": 8240 + }, + { + "epoch": 1.5227333094663418, + "grad_norm": 0.09002497047185898, + "learning_rate": 1.0243355643228747e-05, + "loss": 0.589722752571106, + "step": 8241 + }, + { + "epoch": 1.5229180861752376, + "grad_norm": 0.05878360942006111, + "learning_rate": 1.024136131387627e-05, + "loss": 0.3750290870666504, + "step": 8242 + }, + { + "epoch": 1.5231028628841334, + "grad_norm": 0.08013518899679184, + "learning_rate": 1.0239366974918367e-05, + "loss": 0.5195515155792236, + "step": 8243 + }, + { + "epoch": 1.5232876395930293, + "grad_norm": 0.07429202646017075, + "learning_rate": 1.023737262643441e-05, + "loss": 0.4740116596221924, + "step": 8244 + }, + { + "epoch": 1.523472416301925, + "grad_norm": 0.07139978557825089, + "learning_rate": 1.0235378268503764e-05, + "loss": 0.4189651310443878, + "step": 8245 + }, + { + "epoch": 1.5236571930108211, + "grad_norm": 0.06468570977449417, + "learning_rate": 1.0233383901205798e-05, + "loss": 0.443634569644928, + "step": 8246 + }, + { + "epoch": 1.523841969719717, + "grad_norm": 0.0754278302192688, + "learning_rate": 1.0231389524619886e-05, + "loss": 0.532339334487915, + "step": 8247 + }, + { + "epoch": 1.5240267464286128, + "grad_norm": 0.05140746012330055, + "learning_rate": 1.0229395138825394e-05, + "loss": 0.33748894929885864, + "step": 8248 + }, + { + "epoch": 1.5242115231375086, + "grad_norm": 0.0828530490398407, + "learning_rate": 1.0227400743901692e-05, + "loss": 0.5280756950378418, + "step": 8249 + }, + { + "epoch": 1.5243962998464045, + "grad_norm": 0.0824611708521843, + "learning_rate": 1.022540633992815e-05, + "loss": 0.5182260274887085, + "step": 8250 + }, + { + "epoch": 1.5245810765553003, + "grad_norm": 0.07414865493774414, + "learning_rate": 1.0223411926984146e-05, + "loss": 0.4884706437587738, + "step": 8251 + }, + { + "epoch": 1.5247658532641961, + "grad_norm": 0.09248776733875275, + "learning_rate": 1.022141750514904e-05, + "loss": 0.7126184701919556, + "step": 8252 + }, + { + "epoch": 1.524950629973092, + "grad_norm": 0.062182433903217316, + "learning_rate": 1.0219423074502213e-05, + "loss": 0.3596520721912384, + "step": 8253 + }, + { + "epoch": 1.5251354066819878, + "grad_norm": 0.08619441092014313, + "learning_rate": 1.0217428635123037e-05, + "loss": 0.6424943208694458, + "step": 8254 + }, + { + "epoch": 1.5253201833908836, + "grad_norm": 0.0970609188079834, + "learning_rate": 1.021543418709088e-05, + "loss": 0.6262872815132141, + "step": 8255 + }, + { + "epoch": 1.5255049600997794, + "grad_norm": 0.07370869815349579, + "learning_rate": 1.0213439730485111e-05, + "loss": 0.569189190864563, + "step": 8256 + }, + { + "epoch": 1.5256897368086753, + "grad_norm": 0.06459003686904907, + "learning_rate": 1.0211445265385114e-05, + "loss": 0.478630930185318, + "step": 8257 + }, + { + "epoch": 1.525874513517571, + "grad_norm": 0.0974598228931427, + "learning_rate": 1.0209450791870256e-05, + "loss": 0.6201545000076294, + "step": 8258 + }, + { + "epoch": 1.526059290226467, + "grad_norm": 0.0489286407828331, + "learning_rate": 1.0207456310019911e-05, + "loss": 0.2776273787021637, + "step": 8259 + }, + { + "epoch": 1.5262440669353627, + "grad_norm": 0.07265926152467728, + "learning_rate": 1.0205461819913454e-05, + "loss": 0.5331336259841919, + "step": 8260 + }, + { + "epoch": 1.5264288436442586, + "grad_norm": 0.047001227736473083, + "learning_rate": 1.0203467321630263e-05, + "loss": 0.26337340474128723, + "step": 8261 + }, + { + "epoch": 1.5266136203531544, + "grad_norm": 0.07281414419412613, + "learning_rate": 1.0201472815249705e-05, + "loss": 0.5219126343727112, + "step": 8262 + }, + { + "epoch": 1.5267983970620502, + "grad_norm": 0.08007118850946426, + "learning_rate": 1.0199478300851157e-05, + "loss": 0.6058944463729858, + "step": 8263 + }, + { + "epoch": 1.526983173770946, + "grad_norm": 0.05615640804171562, + "learning_rate": 1.0197483778514003e-05, + "loss": 0.3353560268878937, + "step": 8264 + }, + { + "epoch": 1.5271679504798419, + "grad_norm": 0.10725299268960953, + "learning_rate": 1.019548924831761e-05, + "loss": 0.7764474153518677, + "step": 8265 + }, + { + "epoch": 1.5273527271887377, + "grad_norm": 0.08144401758909225, + "learning_rate": 1.0193494710341354e-05, + "loss": 0.4821716547012329, + "step": 8266 + }, + { + "epoch": 1.5275375038976335, + "grad_norm": 0.07186413556337357, + "learning_rate": 1.0191500164664617e-05, + "loss": 0.4231451749801636, + "step": 8267 + }, + { + "epoch": 1.5277222806065296, + "grad_norm": 0.07108518481254578, + "learning_rate": 1.0189505611366772e-05, + "loss": 0.5909963846206665, + "step": 8268 + }, + { + "epoch": 1.5279070573154254, + "grad_norm": 0.08292071521282196, + "learning_rate": 1.0187511050527195e-05, + "loss": 0.5322451591491699, + "step": 8269 + }, + { + "epoch": 1.5280918340243212, + "grad_norm": 0.08781729638576508, + "learning_rate": 1.0185516482225264e-05, + "loss": 0.5437963008880615, + "step": 8270 + }, + { + "epoch": 1.528276610733217, + "grad_norm": 0.06114795804023743, + "learning_rate": 1.0183521906540362e-05, + "loss": 0.33090609312057495, + "step": 8271 + }, + { + "epoch": 1.528461387442113, + "grad_norm": 0.09468130022287369, + "learning_rate": 1.0181527323551859e-05, + "loss": 0.7327249646186829, + "step": 8272 + }, + { + "epoch": 1.5286461641510087, + "grad_norm": 0.07862450927495956, + "learning_rate": 1.0179532733339134e-05, + "loss": 0.6347699761390686, + "step": 8273 + }, + { + "epoch": 1.5288309408599046, + "grad_norm": 0.07262321561574936, + "learning_rate": 1.0177538135981573e-05, + "loss": 0.5719349980354309, + "step": 8274 + }, + { + "epoch": 1.5290157175688006, + "grad_norm": 0.0708065778017044, + "learning_rate": 1.0175543531558549e-05, + "loss": 0.48646262288093567, + "step": 8275 + }, + { + "epoch": 1.5292004942776964, + "grad_norm": 0.073647640645504, + "learning_rate": 1.0173548920149436e-05, + "loss": 0.42778047919273376, + "step": 8276 + }, + { + "epoch": 1.5293852709865923, + "grad_norm": 0.06990744173526764, + "learning_rate": 1.0171554301833626e-05, + "loss": 0.5315378904342651, + "step": 8277 + }, + { + "epoch": 1.529570047695488, + "grad_norm": 0.06842117756605148, + "learning_rate": 1.0169559676690491e-05, + "loss": 0.4153575301170349, + "step": 8278 + }, + { + "epoch": 1.529754824404384, + "grad_norm": 0.0740189403295517, + "learning_rate": 1.0167565044799405e-05, + "loss": 0.5298333764076233, + "step": 8279 + }, + { + "epoch": 1.5299396011132798, + "grad_norm": 0.08885012567043304, + "learning_rate": 1.016557040623976e-05, + "loss": 0.5935700535774231, + "step": 8280 + }, + { + "epoch": 1.5301243778221756, + "grad_norm": 0.0665549486875534, + "learning_rate": 1.016357576109093e-05, + "loss": 0.35256704688072205, + "step": 8281 + }, + { + "epoch": 1.5303091545310714, + "grad_norm": 0.08737396448850632, + "learning_rate": 1.0161581109432295e-05, + "loss": 0.6718435287475586, + "step": 8282 + }, + { + "epoch": 1.5304939312399672, + "grad_norm": 0.0853419080376625, + "learning_rate": 1.0159586451343236e-05, + "loss": 0.7269653081893921, + "step": 8283 + }, + { + "epoch": 1.530678707948863, + "grad_norm": 0.09545714408159256, + "learning_rate": 1.0157591786903138e-05, + "loss": 0.6577144861221313, + "step": 8284 + }, + { + "epoch": 1.530863484657759, + "grad_norm": 0.08123432099819183, + "learning_rate": 1.0155597116191382e-05, + "loss": 0.5123420357704163, + "step": 8285 + }, + { + "epoch": 1.5310482613666547, + "grad_norm": 0.07155416905879974, + "learning_rate": 1.0153602439287344e-05, + "loss": 0.58453768491745, + "step": 8286 + }, + { + "epoch": 1.5312330380755506, + "grad_norm": 0.06902708858251572, + "learning_rate": 1.015160775627041e-05, + "loss": 0.549127459526062, + "step": 8287 + }, + { + "epoch": 1.5314178147844464, + "grad_norm": 0.08658330887556076, + "learning_rate": 1.0149613067219963e-05, + "loss": 0.5340646505355835, + "step": 8288 + }, + { + "epoch": 1.5316025914933422, + "grad_norm": 0.06111351028084755, + "learning_rate": 1.0147618372215381e-05, + "loss": 0.3370615243911743, + "step": 8289 + }, + { + "epoch": 1.531787368202238, + "grad_norm": 0.06116218864917755, + "learning_rate": 1.0145623671336053e-05, + "loss": 0.305520623922348, + "step": 8290 + }, + { + "epoch": 1.5319721449111339, + "grad_norm": 0.08590128272771835, + "learning_rate": 1.0143628964661358e-05, + "loss": 0.58865886926651, + "step": 8291 + }, + { + "epoch": 1.5321569216200297, + "grad_norm": 0.09435473382472992, + "learning_rate": 1.0141634252270678e-05, + "loss": 0.5893629193305969, + "step": 8292 + }, + { + "epoch": 1.5323416983289255, + "grad_norm": 0.09070255607366562, + "learning_rate": 1.0139639534243397e-05, + "loss": 0.5695667862892151, + "step": 8293 + }, + { + "epoch": 1.5325264750378214, + "grad_norm": 0.0686533972620964, + "learning_rate": 1.0137644810658904e-05, + "loss": 0.4923887550830841, + "step": 8294 + }, + { + "epoch": 1.5327112517467172, + "grad_norm": 0.0996929332613945, + "learning_rate": 1.0135650081596574e-05, + "loss": 0.646103024482727, + "step": 8295 + }, + { + "epoch": 1.532896028455613, + "grad_norm": 0.07553873211145401, + "learning_rate": 1.0133655347135797e-05, + "loss": 0.4094032943248749, + "step": 8296 + }, + { + "epoch": 1.533080805164509, + "grad_norm": 0.0861702561378479, + "learning_rate": 1.0131660607355956e-05, + "loss": 0.7193480730056763, + "step": 8297 + }, + { + "epoch": 1.533265581873405, + "grad_norm": 0.07377294450998306, + "learning_rate": 1.0129665862336434e-05, + "loss": 0.41736268997192383, + "step": 8298 + }, + { + "epoch": 1.5334503585823007, + "grad_norm": 0.0883931890130043, + "learning_rate": 1.0127671112156614e-05, + "loss": 0.7603240609169006, + "step": 8299 + }, + { + "epoch": 1.5336351352911965, + "grad_norm": 0.0921977087855339, + "learning_rate": 1.0125676356895884e-05, + "loss": 0.67726731300354, + "step": 8300 + }, + { + "epoch": 1.5338199120000924, + "grad_norm": 0.09125876426696777, + "learning_rate": 1.012368159663363e-05, + "loss": 0.6322854161262512, + "step": 8301 + }, + { + "epoch": 1.5340046887089882, + "grad_norm": 0.08249793201684952, + "learning_rate": 1.0121686831449235e-05, + "loss": 0.590074360370636, + "step": 8302 + }, + { + "epoch": 1.534189465417884, + "grad_norm": 0.07465846091508865, + "learning_rate": 1.0119692061422086e-05, + "loss": 0.41635817289352417, + "step": 8303 + }, + { + "epoch": 1.53437424212678, + "grad_norm": 0.09888509660959244, + "learning_rate": 1.0117697286631565e-05, + "loss": 0.5953425168991089, + "step": 8304 + }, + { + "epoch": 1.534559018835676, + "grad_norm": 0.07236199080944061, + "learning_rate": 1.0115702507157061e-05, + "loss": 0.5066321492195129, + "step": 8305 + }, + { + "epoch": 1.5347437955445717, + "grad_norm": 0.09026050567626953, + "learning_rate": 1.011370772307796e-05, + "loss": 0.6313549280166626, + "step": 8306 + }, + { + "epoch": 1.5349285722534676, + "grad_norm": 0.06680828332901001, + "learning_rate": 1.0111712934473645e-05, + "loss": 0.5088815689086914, + "step": 8307 + }, + { + "epoch": 1.5351133489623634, + "grad_norm": 0.07624626904726028, + "learning_rate": 1.0109718141423508e-05, + "loss": 0.5378090739250183, + "step": 8308 + }, + { + "epoch": 1.5352981256712592, + "grad_norm": 0.06227405369281769, + "learning_rate": 1.010772334400693e-05, + "loss": 0.316373735666275, + "step": 8309 + }, + { + "epoch": 1.535482902380155, + "grad_norm": 0.08028050512075424, + "learning_rate": 1.0105728542303299e-05, + "loss": 0.5072022676467896, + "step": 8310 + }, + { + "epoch": 1.5356676790890509, + "grad_norm": 0.08129177987575531, + "learning_rate": 1.0103733736392006e-05, + "loss": 0.5504554510116577, + "step": 8311 + }, + { + "epoch": 1.5358524557979467, + "grad_norm": 0.06616882979869843, + "learning_rate": 1.0101738926352432e-05, + "loss": 0.5026891231536865, + "step": 8312 + }, + { + "epoch": 1.5360372325068425, + "grad_norm": 0.08874485641717911, + "learning_rate": 1.009974411226397e-05, + "loss": 0.6369149684906006, + "step": 8313 + }, + { + "epoch": 1.5362220092157384, + "grad_norm": 0.0581573061645031, + "learning_rate": 1.0097749294206e-05, + "loss": 0.3060384690761566, + "step": 8314 + }, + { + "epoch": 1.5364067859246342, + "grad_norm": 0.07999205589294434, + "learning_rate": 1.0095754472257919e-05, + "loss": 0.5771450996398926, + "step": 8315 + }, + { + "epoch": 1.53659156263353, + "grad_norm": 0.07648079842329025, + "learning_rate": 1.0093759646499106e-05, + "loss": 0.6316032409667969, + "step": 8316 + }, + { + "epoch": 1.5367763393424259, + "grad_norm": 0.09479255229234695, + "learning_rate": 1.0091764817008953e-05, + "loss": 0.5895540118217468, + "step": 8317 + }, + { + "epoch": 1.5369611160513217, + "grad_norm": 0.0748986080288887, + "learning_rate": 1.0089769983866849e-05, + "loss": 0.4719810485839844, + "step": 8318 + }, + { + "epoch": 1.5371458927602175, + "grad_norm": 0.08592166006565094, + "learning_rate": 1.008777514715218e-05, + "loss": 0.6072490811347961, + "step": 8319 + }, + { + "epoch": 1.5373306694691133, + "grad_norm": 0.06951703131198883, + "learning_rate": 1.0085780306944335e-05, + "loss": 0.49982306361198425, + "step": 8320 + }, + { + "epoch": 1.5375154461780092, + "grad_norm": 0.09663759917020798, + "learning_rate": 1.00837854633227e-05, + "loss": 0.6411300301551819, + "step": 8321 + }, + { + "epoch": 1.537700222886905, + "grad_norm": 0.05903920531272888, + "learning_rate": 1.0081790616366665e-05, + "loss": 0.4022294878959656, + "step": 8322 + }, + { + "epoch": 1.5378849995958008, + "grad_norm": 0.08403241634368896, + "learning_rate": 1.0079795766155622e-05, + "loss": 0.5444654226303101, + "step": 8323 + }, + { + "epoch": 1.5380697763046967, + "grad_norm": 0.08010748028755188, + "learning_rate": 1.0077800912768955e-05, + "loss": 0.4678199887275696, + "step": 8324 + }, + { + "epoch": 1.5382545530135925, + "grad_norm": 0.09412200003862381, + "learning_rate": 1.007580605628606e-05, + "loss": 0.5801590085029602, + "step": 8325 + }, + { + "epoch": 1.5384393297224885, + "grad_norm": 0.07934234291315079, + "learning_rate": 1.0073811196786316e-05, + "loss": 0.4976900517940521, + "step": 8326 + }, + { + "epoch": 1.5386241064313844, + "grad_norm": 0.07337716966867447, + "learning_rate": 1.007181633434912e-05, + "loss": 0.576170027256012, + "step": 8327 + }, + { + "epoch": 1.5388088831402802, + "grad_norm": 0.0709516704082489, + "learning_rate": 1.0069821469053858e-05, + "loss": 0.6091797351837158, + "step": 8328 + }, + { + "epoch": 1.538993659849176, + "grad_norm": 0.07814224809408188, + "learning_rate": 1.0067826600979917e-05, + "loss": 0.6235194206237793, + "step": 8329 + }, + { + "epoch": 1.5391784365580718, + "grad_norm": 0.06715002655982971, + "learning_rate": 1.0065831730206695e-05, + "loss": 0.4767167568206787, + "step": 8330 + }, + { + "epoch": 1.5393632132669677, + "grad_norm": 0.09266090393066406, + "learning_rate": 1.0063836856813571e-05, + "loss": 0.5710633993148804, + "step": 8331 + }, + { + "epoch": 1.5395479899758635, + "grad_norm": 0.06542815268039703, + "learning_rate": 1.0061841980879941e-05, + "loss": 0.4574289917945862, + "step": 8332 + }, + { + "epoch": 1.5397327666847593, + "grad_norm": 0.06793387979269028, + "learning_rate": 1.0059847102485196e-05, + "loss": 0.4224914312362671, + "step": 8333 + }, + { + "epoch": 1.5399175433936554, + "grad_norm": 0.07768009603023529, + "learning_rate": 1.0057852221708722e-05, + "loss": 0.4772679805755615, + "step": 8334 + }, + { + "epoch": 1.5401023201025512, + "grad_norm": 0.08414895832538605, + "learning_rate": 1.005585733862991e-05, + "loss": 0.5030648112297058, + "step": 8335 + }, + { + "epoch": 1.540287096811447, + "grad_norm": 0.07794249802827835, + "learning_rate": 1.0053862453328152e-05, + "loss": 0.434922456741333, + "step": 8336 + }, + { + "epoch": 1.5404718735203429, + "grad_norm": 0.09836164861917496, + "learning_rate": 1.0051867565882838e-05, + "loss": 0.8413593173027039, + "step": 8337 + }, + { + "epoch": 1.5406566502292387, + "grad_norm": 0.08989561349153519, + "learning_rate": 1.0049872676373354e-05, + "loss": 0.6303113698959351, + "step": 8338 + }, + { + "epoch": 1.5408414269381345, + "grad_norm": 0.07046696543693542, + "learning_rate": 1.0047877784879094e-05, + "loss": 0.4028065502643585, + "step": 8339 + }, + { + "epoch": 1.5410262036470304, + "grad_norm": 0.08524170517921448, + "learning_rate": 1.004588289147945e-05, + "loss": 0.569879949092865, + "step": 8340 + }, + { + "epoch": 1.5412109803559262, + "grad_norm": 0.0819799154996872, + "learning_rate": 1.004388799625381e-05, + "loss": 0.6062819957733154, + "step": 8341 + }, + { + "epoch": 1.541395757064822, + "grad_norm": 0.06758811324834824, + "learning_rate": 1.0041893099281564e-05, + "loss": 0.5327195525169373, + "step": 8342 + }, + { + "epoch": 1.5415805337737178, + "grad_norm": 0.057109855115413666, + "learning_rate": 1.0039898200642105e-05, + "loss": 0.38353052735328674, + "step": 8343 + }, + { + "epoch": 1.5417653104826137, + "grad_norm": 0.07154912501573563, + "learning_rate": 1.0037903300414821e-05, + "loss": 0.37415507435798645, + "step": 8344 + }, + { + "epoch": 1.5419500871915095, + "grad_norm": 0.055001989006996155, + "learning_rate": 1.0035908398679101e-05, + "loss": 0.27712109684944153, + "step": 8345 + }, + { + "epoch": 1.5421348639004053, + "grad_norm": 0.07522137463092804, + "learning_rate": 1.0033913495514346e-05, + "loss": 0.5713220238685608, + "step": 8346 + }, + { + "epoch": 1.5423196406093012, + "grad_norm": 0.07752378284931183, + "learning_rate": 1.0031918590999938e-05, + "loss": 0.49891120195388794, + "step": 8347 + }, + { + "epoch": 1.542504417318197, + "grad_norm": 0.0699925497174263, + "learning_rate": 1.0029923685215268e-05, + "loss": 0.461686372756958, + "step": 8348 + }, + { + "epoch": 1.5426891940270928, + "grad_norm": 0.07942511141300201, + "learning_rate": 1.0027928778239729e-05, + "loss": 0.5484244227409363, + "step": 8349 + }, + { + "epoch": 1.5428739707359886, + "grad_norm": 0.06200651451945305, + "learning_rate": 1.0025933870152714e-05, + "loss": 0.3867597281932831, + "step": 8350 + }, + { + "epoch": 1.5430587474448845, + "grad_norm": 0.0850205346941948, + "learning_rate": 1.0023938961033612e-05, + "loss": 0.6178774833679199, + "step": 8351 + }, + { + "epoch": 1.5432435241537803, + "grad_norm": 0.06608953326940536, + "learning_rate": 1.0021944050961809e-05, + "loss": 0.4367190897464752, + "step": 8352 + }, + { + "epoch": 1.5434283008626761, + "grad_norm": 0.06947171688079834, + "learning_rate": 1.0019949140016707e-05, + "loss": 0.4598299264907837, + "step": 8353 + }, + { + "epoch": 1.543613077571572, + "grad_norm": 0.08985291421413422, + "learning_rate": 1.0017954228277694e-05, + "loss": 0.7923848628997803, + "step": 8354 + }, + { + "epoch": 1.5437978542804678, + "grad_norm": 0.07726897299289703, + "learning_rate": 1.001595931582415e-05, + "loss": 0.42478588223457336, + "step": 8355 + }, + { + "epoch": 1.5439826309893638, + "grad_norm": 0.07254164665937424, + "learning_rate": 1.0013964402735482e-05, + "loss": 0.5146327018737793, + "step": 8356 + }, + { + "epoch": 1.5441674076982597, + "grad_norm": 0.09229913353919983, + "learning_rate": 1.0011969489091073e-05, + "loss": 0.6449254751205444, + "step": 8357 + }, + { + "epoch": 1.5443521844071555, + "grad_norm": 0.0819351077079773, + "learning_rate": 1.0009974574970316e-05, + "loss": 0.634602427482605, + "step": 8358 + }, + { + "epoch": 1.5445369611160513, + "grad_norm": 0.09047498553991318, + "learning_rate": 1.0007979660452601e-05, + "loss": 0.67640221118927, + "step": 8359 + }, + { + "epoch": 1.5447217378249471, + "grad_norm": 0.06298615038394928, + "learning_rate": 1.0005984745617321e-05, + "loss": 0.39689722657203674, + "step": 8360 + }, + { + "epoch": 1.544906514533843, + "grad_norm": 0.08086885511875153, + "learning_rate": 1.0003989830543868e-05, + "loss": 0.3936644196510315, + "step": 8361 + }, + { + "epoch": 1.5450912912427388, + "grad_norm": 0.08219406753778458, + "learning_rate": 1.0001994915311628e-05, + "loss": 0.6133519411087036, + "step": 8362 + }, + { + "epoch": 1.5452760679516349, + "grad_norm": 0.08173570781946182, + "learning_rate": 1e-05, + "loss": 0.5591699481010437, + "step": 8363 + }, + { + "epoch": 1.5454608446605307, + "grad_norm": 0.07632359862327576, + "learning_rate": 9.998005084688372e-06, + "loss": 0.5126186609268188, + "step": 8364 + }, + { + "epoch": 1.5456456213694265, + "grad_norm": 0.07817716896533966, + "learning_rate": 9.996010169456137e-06, + "loss": 0.5563318133354187, + "step": 8365 + }, + { + "epoch": 1.5458303980783223, + "grad_norm": 0.060379207134246826, + "learning_rate": 9.994015254382682e-06, + "loss": 0.36338597536087036, + "step": 8366 + }, + { + "epoch": 1.5460151747872182, + "grad_norm": 0.06967508792877197, + "learning_rate": 9.9920203395474e-06, + "loss": 0.4792593717575073, + "step": 8367 + }, + { + "epoch": 1.546199951496114, + "grad_norm": 0.08439387381076813, + "learning_rate": 9.990025425029689e-06, + "loss": 0.5181248784065247, + "step": 8368 + }, + { + "epoch": 1.5463847282050098, + "grad_norm": 0.0615164078772068, + "learning_rate": 9.988030510908929e-06, + "loss": 0.43316343426704407, + "step": 8369 + }, + { + "epoch": 1.5465695049139057, + "grad_norm": 0.07169511914253235, + "learning_rate": 9.98603559726452e-06, + "loss": 0.4729943871498108, + "step": 8370 + }, + { + "epoch": 1.5467542816228015, + "grad_norm": 0.09175395220518112, + "learning_rate": 9.984040684175853e-06, + "loss": 0.6354399919509888, + "step": 8371 + }, + { + "epoch": 1.5469390583316973, + "grad_norm": 0.0714784562587738, + "learning_rate": 9.982045771722311e-06, + "loss": 0.4669618010520935, + "step": 8372 + }, + { + "epoch": 1.5471238350405931, + "grad_norm": 0.0871136412024498, + "learning_rate": 9.980050859983296e-06, + "loss": 0.5754742622375488, + "step": 8373 + }, + { + "epoch": 1.547308611749489, + "grad_norm": 0.09413383156061172, + "learning_rate": 9.978055949038193e-06, + "loss": 0.7470415830612183, + "step": 8374 + }, + { + "epoch": 1.5474933884583848, + "grad_norm": 0.07848978042602539, + "learning_rate": 9.976061038966391e-06, + "loss": 0.5846307277679443, + "step": 8375 + }, + { + "epoch": 1.5476781651672806, + "grad_norm": 0.07685250043869019, + "learning_rate": 9.974066129847291e-06, + "loss": 0.5765482187271118, + "step": 8376 + }, + { + "epoch": 1.5478629418761765, + "grad_norm": 0.06022125110030174, + "learning_rate": 9.972071221760274e-06, + "loss": 0.367461621761322, + "step": 8377 + }, + { + "epoch": 1.5480477185850723, + "grad_norm": 0.09071533381938934, + "learning_rate": 9.970076314784735e-06, + "loss": 0.6036496758460999, + "step": 8378 + }, + { + "epoch": 1.5482324952939681, + "grad_norm": 0.06718786805868149, + "learning_rate": 9.968081409000067e-06, + "loss": 0.4123038649559021, + "step": 8379 + }, + { + "epoch": 1.548417272002864, + "grad_norm": 0.08263219892978668, + "learning_rate": 9.966086504485657e-06, + "loss": 0.5069796442985535, + "step": 8380 + }, + { + "epoch": 1.5486020487117598, + "grad_norm": 0.09405094385147095, + "learning_rate": 9.964091601320897e-06, + "loss": 0.6533642411231995, + "step": 8381 + }, + { + "epoch": 1.5487868254206556, + "grad_norm": 0.0682285875082016, + "learning_rate": 9.962096699585184e-06, + "loss": 0.45217806100845337, + "step": 8382 + }, + { + "epoch": 1.5489716021295514, + "grad_norm": 0.08995096385478973, + "learning_rate": 9.960101799357899e-06, + "loss": 0.6177809834480286, + "step": 8383 + }, + { + "epoch": 1.5491563788384473, + "grad_norm": 0.09581541270017624, + "learning_rate": 9.958106900718438e-06, + "loss": 0.6463266015052795, + "step": 8384 + }, + { + "epoch": 1.5493411555473433, + "grad_norm": 0.09332716464996338, + "learning_rate": 9.956112003746194e-06, + "loss": 0.6386651396751404, + "step": 8385 + }, + { + "epoch": 1.5495259322562391, + "grad_norm": 0.08254636079072952, + "learning_rate": 9.954117108520552e-06, + "loss": 0.5312403440475464, + "step": 8386 + }, + { + "epoch": 1.549710708965135, + "grad_norm": 0.10466068983078003, + "learning_rate": 9.952122215120906e-06, + "loss": 0.5892578959465027, + "step": 8387 + }, + { + "epoch": 1.5498954856740308, + "grad_norm": 0.08869829773902893, + "learning_rate": 9.950127323626648e-06, + "loss": 0.5440428853034973, + "step": 8388 + }, + { + "epoch": 1.5500802623829266, + "grad_norm": 0.0550822913646698, + "learning_rate": 9.948132434117165e-06, + "loss": 0.29246920347213745, + "step": 8389 + }, + { + "epoch": 1.5502650390918224, + "grad_norm": 0.07488343119621277, + "learning_rate": 9.946137546671853e-06, + "loss": 0.5593857169151306, + "step": 8390 + }, + { + "epoch": 1.5504498158007183, + "grad_norm": 0.07491283863782883, + "learning_rate": 9.944142661370091e-06, + "loss": 0.4983839690685272, + "step": 8391 + }, + { + "epoch": 1.5506345925096143, + "grad_norm": 0.08424947410821915, + "learning_rate": 9.94214777829128e-06, + "loss": 0.5868728160858154, + "step": 8392 + }, + { + "epoch": 1.5508193692185102, + "grad_norm": 0.08198340982198715, + "learning_rate": 9.940152897514809e-06, + "loss": 0.4823242723941803, + "step": 8393 + }, + { + "epoch": 1.551004145927406, + "grad_norm": 0.08373264223337173, + "learning_rate": 9.93815801912006e-06, + "loss": 0.5581148862838745, + "step": 8394 + }, + { + "epoch": 1.5511889226363018, + "grad_norm": 0.06925924867391586, + "learning_rate": 9.936163143186429e-06, + "loss": 0.5806288123130798, + "step": 8395 + }, + { + "epoch": 1.5513736993451976, + "grad_norm": 0.06480623781681061, + "learning_rate": 9.93416826979331e-06, + "loss": 0.4784233272075653, + "step": 8396 + }, + { + "epoch": 1.5515584760540935, + "grad_norm": 0.07471255213022232, + "learning_rate": 9.932173399020085e-06, + "loss": 0.6061058640480042, + "step": 8397 + }, + { + "epoch": 1.5517432527629893, + "grad_norm": 0.06772679090499878, + "learning_rate": 9.930178530946145e-06, + "loss": 0.4416591227054596, + "step": 8398 + }, + { + "epoch": 1.5519280294718851, + "grad_norm": 0.07152847200632095, + "learning_rate": 9.928183665650885e-06, + "loss": 0.38082125782966614, + "step": 8399 + }, + { + "epoch": 1.552112806180781, + "grad_norm": 0.06756367534399033, + "learning_rate": 9.926188803213687e-06, + "loss": 0.43275314569473267, + "step": 8400 + }, + { + "epoch": 1.5522975828896768, + "grad_norm": 0.07801774889230728, + "learning_rate": 9.924193943713943e-06, + "loss": 0.5323340892791748, + "step": 8401 + }, + { + "epoch": 1.5524823595985726, + "grad_norm": 0.06771323084831238, + "learning_rate": 9.922199087231046e-06, + "loss": 0.481885701417923, + "step": 8402 + }, + { + "epoch": 1.5526671363074684, + "grad_norm": 0.06427159905433655, + "learning_rate": 9.92020423384438e-06, + "loss": 0.46014758944511414, + "step": 8403 + }, + { + "epoch": 1.5528519130163643, + "grad_norm": 0.07718753814697266, + "learning_rate": 9.918209383633337e-06, + "loss": 0.6056550145149231, + "step": 8404 + }, + { + "epoch": 1.55303668972526, + "grad_norm": 0.06274012476205826, + "learning_rate": 9.916214536677304e-06, + "loss": 0.4475315511226654, + "step": 8405 + }, + { + "epoch": 1.553221466434156, + "grad_norm": 0.0780898854136467, + "learning_rate": 9.914219693055669e-06, + "loss": 0.5073195695877075, + "step": 8406 + }, + { + "epoch": 1.5534062431430518, + "grad_norm": 0.07583803683519363, + "learning_rate": 9.912224852847825e-06, + "loss": 0.5635548233985901, + "step": 8407 + }, + { + "epoch": 1.5535910198519476, + "grad_norm": 0.06344740837812424, + "learning_rate": 9.910230016133153e-06, + "loss": 0.379596471786499, + "step": 8408 + }, + { + "epoch": 1.5537757965608434, + "grad_norm": 0.06110159680247307, + "learning_rate": 9.908235182991047e-06, + "loss": 0.3941616714000702, + "step": 8409 + }, + { + "epoch": 1.5539605732697392, + "grad_norm": 0.07348445057868958, + "learning_rate": 9.906240353500899e-06, + "loss": 0.472446471452713, + "step": 8410 + }, + { + "epoch": 1.554145349978635, + "grad_norm": 0.08863788843154907, + "learning_rate": 9.904245527742083e-06, + "loss": 0.5516306161880493, + "step": 8411 + }, + { + "epoch": 1.554330126687531, + "grad_norm": 0.10298895090818405, + "learning_rate": 9.902250705794e-06, + "loss": 0.5250436067581177, + "step": 8412 + }, + { + "epoch": 1.5545149033964267, + "grad_norm": 0.07118190824985504, + "learning_rate": 9.900255887736036e-06, + "loss": 0.47028887271881104, + "step": 8413 + }, + { + "epoch": 1.5546996801053228, + "grad_norm": 0.08338896185159683, + "learning_rate": 9.89826107364757e-06, + "loss": 0.4545728862285614, + "step": 8414 + }, + { + "epoch": 1.5548844568142186, + "grad_norm": 0.09131994098424911, + "learning_rate": 9.896266263607996e-06, + "loss": 0.5806403160095215, + "step": 8415 + }, + { + "epoch": 1.5550692335231144, + "grad_norm": 0.07578743249177933, + "learning_rate": 9.894271457696703e-06, + "loss": 0.5110961198806763, + "step": 8416 + }, + { + "epoch": 1.5552540102320103, + "grad_norm": 0.12886503338813782, + "learning_rate": 9.892276655993073e-06, + "loss": 0.5968006253242493, + "step": 8417 + }, + { + "epoch": 1.555438786940906, + "grad_norm": 0.07970307767391205, + "learning_rate": 9.890281858576494e-06, + "loss": 0.5507674217224121, + "step": 8418 + }, + { + "epoch": 1.555623563649802, + "grad_norm": 0.07072608172893524, + "learning_rate": 9.888287065526358e-06, + "loss": 0.4310251772403717, + "step": 8419 + }, + { + "epoch": 1.5558083403586978, + "grad_norm": 0.061415500938892365, + "learning_rate": 9.886292276922044e-06, + "loss": 0.36818867921829224, + "step": 8420 + }, + { + "epoch": 1.5559931170675936, + "grad_norm": 0.090281181037426, + "learning_rate": 9.884297492842944e-06, + "loss": 0.49106040596961975, + "step": 8421 + }, + { + "epoch": 1.5561778937764896, + "grad_norm": 0.06825218349695206, + "learning_rate": 9.882302713368438e-06, + "loss": 0.5302210450172424, + "step": 8422 + }, + { + "epoch": 1.5563626704853855, + "grad_norm": 0.07292143255472183, + "learning_rate": 9.880307938577917e-06, + "loss": 0.4451478123664856, + "step": 8423 + }, + { + "epoch": 1.5565474471942813, + "grad_norm": 0.07285430282354355, + "learning_rate": 9.878313168550768e-06, + "loss": 0.4047282338142395, + "step": 8424 + }, + { + "epoch": 1.5567322239031771, + "grad_norm": 0.07194212079048157, + "learning_rate": 9.876318403366371e-06, + "loss": 0.37185072898864746, + "step": 8425 + }, + { + "epoch": 1.556917000612073, + "grad_norm": 0.06978459656238556, + "learning_rate": 9.874323643104116e-06, + "loss": 0.46612977981567383, + "step": 8426 + }, + { + "epoch": 1.5571017773209688, + "grad_norm": 0.076767697930336, + "learning_rate": 9.872328887843391e-06, + "loss": 0.49881860613822937, + "step": 8427 + }, + { + "epoch": 1.5572865540298646, + "grad_norm": 0.06601028889417648, + "learning_rate": 9.87033413766357e-06, + "loss": 0.4231835901737213, + "step": 8428 + }, + { + "epoch": 1.5574713307387604, + "grad_norm": 0.07401569187641144, + "learning_rate": 9.868339392644046e-06, + "loss": 0.5855440497398376, + "step": 8429 + }, + { + "epoch": 1.5576561074476563, + "grad_norm": 0.0813252180814743, + "learning_rate": 9.866344652864208e-06, + "loss": 0.593075692653656, + "step": 8430 + }, + { + "epoch": 1.557840884156552, + "grad_norm": 0.07243362069129944, + "learning_rate": 9.864349918403427e-06, + "loss": 0.5803065896034241, + "step": 8431 + }, + { + "epoch": 1.558025660865448, + "grad_norm": 0.11118949949741364, + "learning_rate": 9.862355189341097e-06, + "loss": 0.6703124642372131, + "step": 8432 + }, + { + "epoch": 1.5582104375743437, + "grad_norm": 0.07939611375331879, + "learning_rate": 9.860360465756606e-06, + "loss": 0.6099793314933777, + "step": 8433 + }, + { + "epoch": 1.5583952142832396, + "grad_norm": 0.08516814559698105, + "learning_rate": 9.858365747729325e-06, + "loss": 0.6696739196777344, + "step": 8434 + }, + { + "epoch": 1.5585799909921354, + "grad_norm": 0.07504752278327942, + "learning_rate": 9.856371035338641e-06, + "loss": 0.5141589045524597, + "step": 8435 + }, + { + "epoch": 1.5587647677010312, + "grad_norm": 0.07196090370416641, + "learning_rate": 9.85437632866395e-06, + "loss": 0.540097177028656, + "step": 8436 + }, + { + "epoch": 1.558949544409927, + "grad_norm": 0.06923947483301163, + "learning_rate": 9.85238162778462e-06, + "loss": 0.40927737951278687, + "step": 8437 + }, + { + "epoch": 1.5591343211188229, + "grad_norm": 0.07277125865221024, + "learning_rate": 9.850386932780042e-06, + "loss": 0.45931127667427063, + "step": 8438 + }, + { + "epoch": 1.5593190978277187, + "grad_norm": 0.09042638540267944, + "learning_rate": 9.848392243729594e-06, + "loss": 0.6475035548210144, + "step": 8439 + }, + { + "epoch": 1.5595038745366145, + "grad_norm": 0.07516621053218842, + "learning_rate": 9.846397560712658e-06, + "loss": 0.47815361618995667, + "step": 8440 + }, + { + "epoch": 1.5596886512455104, + "grad_norm": 0.06490602344274521, + "learning_rate": 9.844402883808623e-06, + "loss": 0.40921348333358765, + "step": 8441 + }, + { + "epoch": 1.5598734279544062, + "grad_norm": 0.08869045972824097, + "learning_rate": 9.842408213096863e-06, + "loss": 0.6063772439956665, + "step": 8442 + }, + { + "epoch": 1.560058204663302, + "grad_norm": 0.08136522024869919, + "learning_rate": 9.840413548656764e-06, + "loss": 0.5187150239944458, + "step": 8443 + }, + { + "epoch": 1.560242981372198, + "grad_norm": 0.07281823456287384, + "learning_rate": 9.83841889056771e-06, + "loss": 0.580685019493103, + "step": 8444 + }, + { + "epoch": 1.560427758081094, + "grad_norm": 0.06910470873117447, + "learning_rate": 9.836424238909073e-06, + "loss": 0.4532964825630188, + "step": 8445 + }, + { + "epoch": 1.5606125347899897, + "grad_norm": 0.06782284379005432, + "learning_rate": 9.834429593760241e-06, + "loss": 0.47977548837661743, + "step": 8446 + }, + { + "epoch": 1.5607973114988856, + "grad_norm": 0.0937873125076294, + "learning_rate": 9.832434955200597e-06, + "loss": 0.6324499249458313, + "step": 8447 + }, + { + "epoch": 1.5609820882077814, + "grad_norm": 0.08188667893409729, + "learning_rate": 9.830440323309514e-06, + "loss": 0.5060011148452759, + "step": 8448 + }, + { + "epoch": 1.5611668649166772, + "grad_norm": 0.09644730389118195, + "learning_rate": 9.828445698166375e-06, + "loss": 0.6682599186897278, + "step": 8449 + }, + { + "epoch": 1.561351641625573, + "grad_norm": 0.0819346234202385, + "learning_rate": 9.826451079850566e-06, + "loss": 0.48520559072494507, + "step": 8450 + }, + { + "epoch": 1.561536418334469, + "grad_norm": 0.07754965871572495, + "learning_rate": 9.824456468441455e-06, + "loss": 0.3911689817905426, + "step": 8451 + }, + { + "epoch": 1.561721195043365, + "grad_norm": 0.07915280759334564, + "learning_rate": 9.822461864018427e-06, + "loss": 0.5869355797767639, + "step": 8452 + }, + { + "epoch": 1.5619059717522608, + "grad_norm": 0.09538932144641876, + "learning_rate": 9.820467266660868e-06, + "loss": 0.7775084376335144, + "step": 8453 + }, + { + "epoch": 1.5620907484611566, + "grad_norm": 0.06357314437627792, + "learning_rate": 9.818472676448144e-06, + "loss": 0.42435356974601746, + "step": 8454 + }, + { + "epoch": 1.5622755251700524, + "grad_norm": 0.06316125392913818, + "learning_rate": 9.816478093459643e-06, + "loss": 0.3821038007736206, + "step": 8455 + }, + { + "epoch": 1.5624603018789482, + "grad_norm": 0.07353564351797104, + "learning_rate": 9.814483517774738e-06, + "loss": 0.5012965202331543, + "step": 8456 + }, + { + "epoch": 1.562645078587844, + "grad_norm": 0.07389863580465317, + "learning_rate": 9.812488949472809e-06, + "loss": 0.4186929762363434, + "step": 8457 + }, + { + "epoch": 1.56282985529674, + "grad_norm": 0.06745057553052902, + "learning_rate": 9.810494388633233e-06, + "loss": 0.44096508622169495, + "step": 8458 + }, + { + "epoch": 1.5630146320056357, + "grad_norm": 0.07269903272390366, + "learning_rate": 9.808499835335387e-06, + "loss": 0.5246237516403198, + "step": 8459 + }, + { + "epoch": 1.5631994087145316, + "grad_norm": 0.08972185850143433, + "learning_rate": 9.806505289658648e-06, + "loss": 0.5000263452529907, + "step": 8460 + }, + { + "epoch": 1.5633841854234274, + "grad_norm": 0.1067720502614975, + "learning_rate": 9.804510751682394e-06, + "loss": 0.7220219373703003, + "step": 8461 + }, + { + "epoch": 1.5635689621323232, + "grad_norm": 0.06829439848661423, + "learning_rate": 9.802516221486e-06, + "loss": 0.3715435266494751, + "step": 8462 + }, + { + "epoch": 1.563753738841219, + "grad_norm": 0.09388057887554169, + "learning_rate": 9.800521699148843e-06, + "loss": 0.63585364818573, + "step": 8463 + }, + { + "epoch": 1.5639385155501149, + "grad_norm": 0.07907140254974365, + "learning_rate": 9.7985271847503e-06, + "loss": 0.5614437460899353, + "step": 8464 + }, + { + "epoch": 1.5641232922590107, + "grad_norm": 0.07540346682071686, + "learning_rate": 9.796532678369742e-06, + "loss": 0.5172854661941528, + "step": 8465 + }, + { + "epoch": 1.5643080689679065, + "grad_norm": 0.05784687027335167, + "learning_rate": 9.794538180086546e-06, + "loss": 0.3279561698436737, + "step": 8466 + }, + { + "epoch": 1.5644928456768024, + "grad_norm": 0.08259300887584686, + "learning_rate": 9.79254368998009e-06, + "loss": 0.6335062384605408, + "step": 8467 + }, + { + "epoch": 1.5646776223856982, + "grad_norm": 0.08102997392416, + "learning_rate": 9.790549208129745e-06, + "loss": 0.45309337973594666, + "step": 8468 + }, + { + "epoch": 1.564862399094594, + "grad_norm": 0.09200865030288696, + "learning_rate": 9.788554734614891e-06, + "loss": 0.6316246390342712, + "step": 8469 + }, + { + "epoch": 1.5650471758034898, + "grad_norm": 0.0958419144153595, + "learning_rate": 9.78656026951489e-06, + "loss": 0.7225764393806458, + "step": 8470 + }, + { + "epoch": 1.5652319525123857, + "grad_norm": 0.06633394211530685, + "learning_rate": 9.784565812909124e-06, + "loss": 0.5134775042533875, + "step": 8471 + }, + { + "epoch": 1.5654167292212815, + "grad_norm": 0.08685419708490372, + "learning_rate": 9.78257136487697e-06, + "loss": 0.46518373489379883, + "step": 8472 + }, + { + "epoch": 1.5656015059301776, + "grad_norm": 0.06596294045448303, + "learning_rate": 9.780576925497789e-06, + "loss": 0.3888777196407318, + "step": 8473 + }, + { + "epoch": 1.5657862826390734, + "grad_norm": 0.07480762898921967, + "learning_rate": 9.778582494850962e-06, + "loss": 0.4374832808971405, + "step": 8474 + }, + { + "epoch": 1.5659710593479692, + "grad_norm": 0.07130126655101776, + "learning_rate": 9.77658807301586e-06, + "loss": 0.4217027425765991, + "step": 8475 + }, + { + "epoch": 1.566155836056865, + "grad_norm": 0.07441869378089905, + "learning_rate": 9.774593660071853e-06, + "loss": 0.5987619161605835, + "step": 8476 + }, + { + "epoch": 1.5663406127657609, + "grad_norm": 0.08373252302408218, + "learning_rate": 9.772599256098312e-06, + "loss": 0.6301749348640442, + "step": 8477 + }, + { + "epoch": 1.5665253894746567, + "grad_norm": 0.06343624740839005, + "learning_rate": 9.770604861174611e-06, + "loss": 0.3775671422481537, + "step": 8478 + }, + { + "epoch": 1.5667101661835525, + "grad_norm": 0.06141326576471329, + "learning_rate": 9.768610475380117e-06, + "loss": 0.3434855341911316, + "step": 8479 + }, + { + "epoch": 1.5668949428924486, + "grad_norm": 0.08183632045984268, + "learning_rate": 9.766616098794202e-06, + "loss": 0.48533132672309875, + "step": 8480 + }, + { + "epoch": 1.5670797196013444, + "grad_norm": 0.0791330561041832, + "learning_rate": 9.764621731496239e-06, + "loss": 0.4955023229122162, + "step": 8481 + }, + { + "epoch": 1.5672644963102402, + "grad_norm": 0.07928652316331863, + "learning_rate": 9.762627373565591e-06, + "loss": 0.49078071117401123, + "step": 8482 + }, + { + "epoch": 1.567449273019136, + "grad_norm": 0.0764070600271225, + "learning_rate": 9.760633025081633e-06, + "loss": 0.5474598407745361, + "step": 8483 + }, + { + "epoch": 1.5676340497280319, + "grad_norm": 0.08049654215574265, + "learning_rate": 9.758638686123732e-06, + "loss": 0.5398207902908325, + "step": 8484 + }, + { + "epoch": 1.5678188264369277, + "grad_norm": 0.061328765004873276, + "learning_rate": 9.756644356771256e-06, + "loss": 0.3806958794593811, + "step": 8485 + }, + { + "epoch": 1.5680036031458235, + "grad_norm": 0.08577550202608109, + "learning_rate": 9.754650037103577e-06, + "loss": 0.6493745446205139, + "step": 8486 + }, + { + "epoch": 1.5681883798547194, + "grad_norm": 0.07504302263259888, + "learning_rate": 9.752655727200051e-06, + "loss": 0.5318300127983093, + "step": 8487 + }, + { + "epoch": 1.5683731565636152, + "grad_norm": 0.08011891692876816, + "learning_rate": 9.750661427140057e-06, + "loss": 0.5619838237762451, + "step": 8488 + }, + { + "epoch": 1.568557933272511, + "grad_norm": 0.06651551276445389, + "learning_rate": 9.748667137002961e-06, + "loss": 0.4472563564777374, + "step": 8489 + }, + { + "epoch": 1.5687427099814069, + "grad_norm": 0.06522566080093384, + "learning_rate": 9.746672856868124e-06, + "loss": 0.3749229311943054, + "step": 8490 + }, + { + "epoch": 1.5689274866903027, + "grad_norm": 0.06759969890117645, + "learning_rate": 9.74467858681491e-06, + "loss": 0.45194461941719055, + "step": 8491 + }, + { + "epoch": 1.5691122633991985, + "grad_norm": 0.08250371366739273, + "learning_rate": 9.742684326922698e-06, + "loss": 0.5971841216087341, + "step": 8492 + }, + { + "epoch": 1.5692970401080943, + "grad_norm": 0.06295836716890335, + "learning_rate": 9.74069007727084e-06, + "loss": 0.5253193974494934, + "step": 8493 + }, + { + "epoch": 1.5694818168169902, + "grad_norm": 0.07202901691198349, + "learning_rate": 9.738695837938707e-06, + "loss": 0.3833845555782318, + "step": 8494 + }, + { + "epoch": 1.569666593525886, + "grad_norm": 0.06376456469297409, + "learning_rate": 9.736701609005661e-06, + "loss": 0.42644819617271423, + "step": 8495 + }, + { + "epoch": 1.5698513702347818, + "grad_norm": 0.09234484285116196, + "learning_rate": 9.734707390551069e-06, + "loss": 0.6424220204353333, + "step": 8496 + }, + { + "epoch": 1.5700361469436777, + "grad_norm": 0.08507819473743439, + "learning_rate": 9.73271318265429e-06, + "loss": 0.5291893482208252, + "step": 8497 + }, + { + "epoch": 1.5702209236525735, + "grad_norm": 0.07942351698875427, + "learning_rate": 9.730718985394692e-06, + "loss": 0.4431830942630768, + "step": 8498 + }, + { + "epoch": 1.5704057003614693, + "grad_norm": 0.06535894423723221, + "learning_rate": 9.728724798851636e-06, + "loss": 0.3336215615272522, + "step": 8499 + }, + { + "epoch": 1.5705904770703651, + "grad_norm": 0.061320461332798004, + "learning_rate": 9.726730623104482e-06, + "loss": 0.36527639627456665, + "step": 8500 + }, + { + "epoch": 1.5705904770703651, + "eval_loss": 0.574783444404602, + "eval_runtime": 277.0199, + "eval_samples_per_second": 65.804, + "eval_steps_per_second": 8.227, + "step": 8500 + }, + { + "epoch": 1.570775253779261, + "grad_norm": 0.095549575984478, + "learning_rate": 9.724736458232597e-06, + "loss": 0.6905973553657532, + "step": 8501 + }, + { + "epoch": 1.570960030488157, + "grad_norm": 0.08161711692810059, + "learning_rate": 9.722742304315339e-06, + "loss": 0.49852216243743896, + "step": 8502 + }, + { + "epoch": 1.5711448071970529, + "grad_norm": 0.08168318122625351, + "learning_rate": 9.72074816143207e-06, + "loss": 0.5491087436676025, + "step": 8503 + }, + { + "epoch": 1.5713295839059487, + "grad_norm": 0.061127275228500366, + "learning_rate": 9.71875402966215e-06, + "loss": 0.401092529296875, + "step": 8504 + }, + { + "epoch": 1.5715143606148445, + "grad_norm": 0.08369404822587967, + "learning_rate": 9.716759909084939e-06, + "loss": 0.5485149025917053, + "step": 8505 + }, + { + "epoch": 1.5716991373237403, + "grad_norm": 0.09047067910432816, + "learning_rate": 9.714765799779803e-06, + "loss": 0.6157129406929016, + "step": 8506 + }, + { + "epoch": 1.5718839140326362, + "grad_norm": 0.07468681037425995, + "learning_rate": 9.712771701826088e-06, + "loss": 0.4496452212333679, + "step": 8507 + }, + { + "epoch": 1.572068690741532, + "grad_norm": 0.06309043616056442, + "learning_rate": 9.710777615303163e-06, + "loss": 0.33331549167633057, + "step": 8508 + }, + { + "epoch": 1.5722534674504278, + "grad_norm": 0.08015831559896469, + "learning_rate": 9.708783540290388e-06, + "loss": 0.5962845683097839, + "step": 8509 + }, + { + "epoch": 1.5724382441593239, + "grad_norm": 0.06803075969219208, + "learning_rate": 9.706789476867115e-06, + "loss": 0.5292885303497314, + "step": 8510 + }, + { + "epoch": 1.5726230208682197, + "grad_norm": 0.08438073098659515, + "learning_rate": 9.704795425112699e-06, + "loss": 0.5161520838737488, + "step": 8511 + }, + { + "epoch": 1.5728077975771155, + "grad_norm": 0.0854136198759079, + "learning_rate": 9.702801385106508e-06, + "loss": 0.5688244700431824, + "step": 8512 + }, + { + "epoch": 1.5729925742860114, + "grad_norm": 0.0847952589392662, + "learning_rate": 9.700807356927888e-06, + "loss": 0.4528445601463318, + "step": 8513 + }, + { + "epoch": 1.5731773509949072, + "grad_norm": 0.06744641810655594, + "learning_rate": 9.698813340656199e-06, + "loss": 0.3817410171031952, + "step": 8514 + }, + { + "epoch": 1.573362127703803, + "grad_norm": 0.08746075630187988, + "learning_rate": 9.6968193363708e-06, + "loss": 0.5919474363327026, + "step": 8515 + }, + { + "epoch": 1.5735469044126988, + "grad_norm": 0.08473276346921921, + "learning_rate": 9.694825344151039e-06, + "loss": 0.6052566766738892, + "step": 8516 + }, + { + "epoch": 1.5737316811215947, + "grad_norm": 0.07219505310058594, + "learning_rate": 9.692831364076277e-06, + "loss": 0.40844255685806274, + "step": 8517 + }, + { + "epoch": 1.5739164578304905, + "grad_norm": 0.0649571493268013, + "learning_rate": 9.690837396225867e-06, + "loss": 0.5000103712081909, + "step": 8518 + }, + { + "epoch": 1.5741012345393863, + "grad_norm": 0.061946313828229904, + "learning_rate": 9.688843440679157e-06, + "loss": 0.39478957653045654, + "step": 8519 + }, + { + "epoch": 1.5742860112482822, + "grad_norm": 0.07370550185441971, + "learning_rate": 9.686849497515509e-06, + "loss": 0.4202343225479126, + "step": 8520 + }, + { + "epoch": 1.574470787957178, + "grad_norm": 0.07990618795156479, + "learning_rate": 9.684855566814268e-06, + "loss": 0.594482421875, + "step": 8521 + }, + { + "epoch": 1.5746555646660738, + "grad_norm": 0.07135336846113205, + "learning_rate": 9.68286164865479e-06, + "loss": 0.387400358915329, + "step": 8522 + }, + { + "epoch": 1.5748403413749696, + "grad_norm": 0.06443370878696442, + "learning_rate": 9.680867743116428e-06, + "loss": 0.3974674642086029, + "step": 8523 + }, + { + "epoch": 1.5750251180838655, + "grad_norm": 0.07257106900215149, + "learning_rate": 9.67887385027853e-06, + "loss": 0.3580196797847748, + "step": 8524 + }, + { + "epoch": 1.5752098947927613, + "grad_norm": 0.08325569331645966, + "learning_rate": 9.676879970220447e-06, + "loss": 0.5409526228904724, + "step": 8525 + }, + { + "epoch": 1.5753946715016571, + "grad_norm": 0.06914255023002625, + "learning_rate": 9.674886103021535e-06, + "loss": 0.4544118642807007, + "step": 8526 + }, + { + "epoch": 1.575579448210553, + "grad_norm": 0.08010513335466385, + "learning_rate": 9.672892248761134e-06, + "loss": 0.5300863981246948, + "step": 8527 + }, + { + "epoch": 1.5757642249194488, + "grad_norm": 0.0959707498550415, + "learning_rate": 9.670898407518598e-06, + "loss": 0.5134875178337097, + "step": 8528 + }, + { + "epoch": 1.5759490016283446, + "grad_norm": 0.07733378559350967, + "learning_rate": 9.668904579373281e-06, + "loss": 0.42300963401794434, + "step": 8529 + }, + { + "epoch": 1.5761337783372404, + "grad_norm": 0.06547591835260391, + "learning_rate": 9.666910764404522e-06, + "loss": 0.3899762034416199, + "step": 8530 + }, + { + "epoch": 1.5763185550461363, + "grad_norm": 0.0907437801361084, + "learning_rate": 9.66491696269167e-06, + "loss": 0.6079534888267517, + "step": 8531 + }, + { + "epoch": 1.5765033317550323, + "grad_norm": 0.08068032562732697, + "learning_rate": 9.662923174314081e-06, + "loss": 0.5906936526298523, + "step": 8532 + }, + { + "epoch": 1.5766881084639282, + "grad_norm": 0.07635143399238586, + "learning_rate": 9.66092939935109e-06, + "loss": 0.47133001685142517, + "step": 8533 + }, + { + "epoch": 1.576872885172824, + "grad_norm": 0.08321890980005264, + "learning_rate": 9.658935637882051e-06, + "loss": 0.43831631541252136, + "step": 8534 + }, + { + "epoch": 1.5770576618817198, + "grad_norm": 0.08589746803045273, + "learning_rate": 9.656941889986304e-06, + "loss": 0.5517308712005615, + "step": 8535 + }, + { + "epoch": 1.5772424385906156, + "grad_norm": 0.08684679120779037, + "learning_rate": 9.654948155743197e-06, + "loss": 0.5462250113487244, + "step": 8536 + }, + { + "epoch": 1.5774272152995115, + "grad_norm": 0.07754339277744293, + "learning_rate": 9.652954435232076e-06, + "loss": 0.47365671396255493, + "step": 8537 + }, + { + "epoch": 1.5776119920084073, + "grad_norm": 0.0754380151629448, + "learning_rate": 9.650960728532281e-06, + "loss": 0.5568907856941223, + "step": 8538 + }, + { + "epoch": 1.5777967687173033, + "grad_norm": 0.0809510350227356, + "learning_rate": 9.648967035723155e-06, + "loss": 0.6057025790214539, + "step": 8539 + }, + { + "epoch": 1.5779815454261992, + "grad_norm": 0.06684159487485886, + "learning_rate": 9.646973356884048e-06, + "loss": 0.45625466108322144, + "step": 8540 + }, + { + "epoch": 1.578166322135095, + "grad_norm": 0.08466535061597824, + "learning_rate": 9.644979692094291e-06, + "loss": 0.5078467726707458, + "step": 8541 + }, + { + "epoch": 1.5783510988439908, + "grad_norm": 0.07972890883684158, + "learning_rate": 9.642986041433234e-06, + "loss": 0.534195065498352, + "step": 8542 + }, + { + "epoch": 1.5785358755528867, + "grad_norm": 0.06992193311452866, + "learning_rate": 9.64099240498022e-06, + "loss": 0.3801504671573639, + "step": 8543 + }, + { + "epoch": 1.5787206522617825, + "grad_norm": 0.09189796447753906, + "learning_rate": 9.638998782814578e-06, + "loss": 0.8108822107315063, + "step": 8544 + }, + { + "epoch": 1.5789054289706783, + "grad_norm": 0.08371283113956451, + "learning_rate": 9.637005175015658e-06, + "loss": 0.606974184513092, + "step": 8545 + }, + { + "epoch": 1.5790902056795741, + "grad_norm": 0.08414312452077866, + "learning_rate": 9.6350115816628e-06, + "loss": 0.5521185398101807, + "step": 8546 + }, + { + "epoch": 1.57927498238847, + "grad_norm": 0.0676870197057724, + "learning_rate": 9.633018002835331e-06, + "loss": 0.45219096541404724, + "step": 8547 + }, + { + "epoch": 1.5794597590973658, + "grad_norm": 0.07192827016115189, + "learning_rate": 9.631024438612602e-06, + "loss": 0.5052711963653564, + "step": 8548 + }, + { + "epoch": 1.5796445358062616, + "grad_norm": 0.05874146893620491, + "learning_rate": 9.62903088907395e-06, + "loss": 0.4122426211833954, + "step": 8549 + }, + { + "epoch": 1.5798293125151575, + "grad_norm": 0.07684757560491562, + "learning_rate": 9.627037354298702e-06, + "loss": 0.4595337212085724, + "step": 8550 + }, + { + "epoch": 1.5800140892240533, + "grad_norm": 0.08987244218587875, + "learning_rate": 9.625043834366204e-06, + "loss": 0.7718538641929626, + "step": 8551 + }, + { + "epoch": 1.5801988659329491, + "grad_norm": 0.054467007517814636, + "learning_rate": 9.623050329355786e-06, + "loss": 0.3730161488056183, + "step": 8552 + }, + { + "epoch": 1.580383642641845, + "grad_norm": 0.09580789506435394, + "learning_rate": 9.621056839346785e-06, + "loss": 0.6520174741744995, + "step": 8553 + }, + { + "epoch": 1.5805684193507408, + "grad_norm": 0.07195496559143066, + "learning_rate": 9.619063364418539e-06, + "loss": 0.5536537170410156, + "step": 8554 + }, + { + "epoch": 1.5807531960596366, + "grad_norm": 0.07392167299985886, + "learning_rate": 9.617069904650378e-06, + "loss": 0.5084531903266907, + "step": 8555 + }, + { + "epoch": 1.5809379727685324, + "grad_norm": 0.07539352029561996, + "learning_rate": 9.615076460121636e-06, + "loss": 0.5134398341178894, + "step": 8556 + }, + { + "epoch": 1.5811227494774283, + "grad_norm": 0.0947013720870018, + "learning_rate": 9.613083030911647e-06, + "loss": 0.6003947854042053, + "step": 8557 + }, + { + "epoch": 1.581307526186324, + "grad_norm": 0.08073306828737259, + "learning_rate": 9.611089617099743e-06, + "loss": 0.5335020422935486, + "step": 8558 + }, + { + "epoch": 1.58149230289522, + "grad_norm": 0.06273633986711502, + "learning_rate": 9.609096218765254e-06, + "loss": 0.4417981207370758, + "step": 8559 + }, + { + "epoch": 1.5816770796041157, + "grad_norm": 0.08009282499551773, + "learning_rate": 9.607102835987516e-06, + "loss": 0.6260141134262085, + "step": 8560 + }, + { + "epoch": 1.5818618563130118, + "grad_norm": 0.07559967786073685, + "learning_rate": 9.605109468845854e-06, + "loss": 0.43849584460258484, + "step": 8561 + }, + { + "epoch": 1.5820466330219076, + "grad_norm": 0.0853399932384491, + "learning_rate": 9.603116117419597e-06, + "loss": 0.4622129797935486, + "step": 8562 + }, + { + "epoch": 1.5822314097308035, + "grad_norm": 0.05605161562561989, + "learning_rate": 9.601122781788082e-06, + "loss": 0.31195148825645447, + "step": 8563 + }, + { + "epoch": 1.5824161864396993, + "grad_norm": 0.05759282410144806, + "learning_rate": 9.599129462030628e-06, + "loss": 0.4043887257575989, + "step": 8564 + }, + { + "epoch": 1.582600963148595, + "grad_norm": 0.08678488433361053, + "learning_rate": 9.59713615822657e-06, + "loss": 0.5891516804695129, + "step": 8565 + }, + { + "epoch": 1.582785739857491, + "grad_norm": 0.06864611059427261, + "learning_rate": 9.595142870455233e-06, + "loss": 0.48909685015678406, + "step": 8566 + }, + { + "epoch": 1.5829705165663868, + "grad_norm": 0.063519187271595, + "learning_rate": 9.59314959879594e-06, + "loss": 0.42133191227912903, + "step": 8567 + }, + { + "epoch": 1.5831552932752828, + "grad_norm": 0.07372412085533142, + "learning_rate": 9.591156343328026e-06, + "loss": 0.49911460280418396, + "step": 8568 + }, + { + "epoch": 1.5833400699841786, + "grad_norm": 0.08856049925088882, + "learning_rate": 9.589163104130804e-06, + "loss": 0.5521577596664429, + "step": 8569 + }, + { + "epoch": 1.5835248466930745, + "grad_norm": 0.1006108745932579, + "learning_rate": 9.587169881283606e-06, + "loss": 0.6770339012145996, + "step": 8570 + }, + { + "epoch": 1.5837096234019703, + "grad_norm": 0.07106732577085495, + "learning_rate": 9.58517667486576e-06, + "loss": 0.49186861515045166, + "step": 8571 + }, + { + "epoch": 1.5838944001108661, + "grad_norm": 0.06963815540075302, + "learning_rate": 9.583183484956578e-06, + "loss": 0.4227297008037567, + "step": 8572 + }, + { + "epoch": 1.584079176819762, + "grad_norm": 0.11049696803092957, + "learning_rate": 9.581190311635392e-06, + "loss": 0.7336570024490356, + "step": 8573 + }, + { + "epoch": 1.5842639535286578, + "grad_norm": 0.060407184064388275, + "learning_rate": 9.579197154981523e-06, + "loss": 0.4391293525695801, + "step": 8574 + }, + { + "epoch": 1.5844487302375536, + "grad_norm": 0.06626936793327332, + "learning_rate": 9.57720401507429e-06, + "loss": 0.4679132401943207, + "step": 8575 + }, + { + "epoch": 1.5846335069464494, + "grad_norm": 0.0940513014793396, + "learning_rate": 9.575210891993012e-06, + "loss": 0.6576391458511353, + "step": 8576 + }, + { + "epoch": 1.5848182836553453, + "grad_norm": 0.08608409017324448, + "learning_rate": 9.573217785817014e-06, + "loss": 0.7329196929931641, + "step": 8577 + }, + { + "epoch": 1.585003060364241, + "grad_norm": 0.05827013775706291, + "learning_rate": 9.571224696625612e-06, + "loss": 0.3933447301387787, + "step": 8578 + }, + { + "epoch": 1.585187837073137, + "grad_norm": 0.07204660028219223, + "learning_rate": 9.569231624498125e-06, + "loss": 0.47026368975639343, + "step": 8579 + }, + { + "epoch": 1.5853726137820328, + "grad_norm": 0.07245006412267685, + "learning_rate": 9.567238569513872e-06, + "loss": 0.4619942605495453, + "step": 8580 + }, + { + "epoch": 1.5855573904909286, + "grad_norm": 0.08662433177232742, + "learning_rate": 9.56524553175217e-06, + "loss": 0.6254631280899048, + "step": 8581 + }, + { + "epoch": 1.5857421671998244, + "grad_norm": 0.07138058543205261, + "learning_rate": 9.563252511292335e-06, + "loss": 0.5061289072036743, + "step": 8582 + }, + { + "epoch": 1.5859269439087202, + "grad_norm": 0.0875978097319603, + "learning_rate": 9.561259508213687e-06, + "loss": 0.6061347723007202, + "step": 8583 + }, + { + "epoch": 1.586111720617616, + "grad_norm": 0.07914415001869202, + "learning_rate": 9.559266522595534e-06, + "loss": 0.5068250894546509, + "step": 8584 + }, + { + "epoch": 1.586296497326512, + "grad_norm": 0.07915471494197845, + "learning_rate": 9.5572735545172e-06, + "loss": 0.5223965048789978, + "step": 8585 + }, + { + "epoch": 1.5864812740354077, + "grad_norm": 0.0746086835861206, + "learning_rate": 9.555280604057989e-06, + "loss": 0.4711228311061859, + "step": 8586 + }, + { + "epoch": 1.5866660507443036, + "grad_norm": 0.07825423032045364, + "learning_rate": 9.553287671297216e-06, + "loss": 0.5604469776153564, + "step": 8587 + }, + { + "epoch": 1.5868508274531994, + "grad_norm": 0.08418682962656021, + "learning_rate": 9.551294756314202e-06, + "loss": 0.6489458680152893, + "step": 8588 + }, + { + "epoch": 1.5870356041620952, + "grad_norm": 0.0813329666852951, + "learning_rate": 9.54930185918825e-06, + "loss": 0.5974125266075134, + "step": 8589 + }, + { + "epoch": 1.5872203808709913, + "grad_norm": 0.08046593517065048, + "learning_rate": 9.547308979998673e-06, + "loss": 0.5678706169128418, + "step": 8590 + }, + { + "epoch": 1.587405157579887, + "grad_norm": 0.06909194588661194, + "learning_rate": 9.545316118824784e-06, + "loss": 0.46457499265670776, + "step": 8591 + }, + { + "epoch": 1.587589934288783, + "grad_norm": 0.09298303723335266, + "learning_rate": 9.543323275745891e-06, + "loss": 0.6481351852416992, + "step": 8592 + }, + { + "epoch": 1.5877747109976788, + "grad_norm": 0.09103282541036606, + "learning_rate": 9.5413304508413e-06, + "loss": 0.5711163282394409, + "step": 8593 + }, + { + "epoch": 1.5879594877065746, + "grad_norm": 0.08446116000413895, + "learning_rate": 9.539337644190327e-06, + "loss": 0.6023228168487549, + "step": 8594 + }, + { + "epoch": 1.5881442644154704, + "grad_norm": 0.07669594138860703, + "learning_rate": 9.537344855872271e-06, + "loss": 0.5727745890617371, + "step": 8595 + }, + { + "epoch": 1.5883290411243662, + "grad_norm": 0.05845790356397629, + "learning_rate": 9.535352085966442e-06, + "loss": 0.3232874274253845, + "step": 8596 + }, + { + "epoch": 1.588513817833262, + "grad_norm": 0.06394659727811813, + "learning_rate": 9.533359334552148e-06, + "loss": 0.42282405495643616, + "step": 8597 + }, + { + "epoch": 1.5886985945421581, + "grad_norm": 0.08589275926351547, + "learning_rate": 9.53136660170869e-06, + "loss": 0.5387320518493652, + "step": 8598 + }, + { + "epoch": 1.588883371251054, + "grad_norm": 0.058079712092876434, + "learning_rate": 9.52937388751538e-06, + "loss": 0.3864925801753998, + "step": 8599 + }, + { + "epoch": 1.5890681479599498, + "grad_norm": 0.0662282183766365, + "learning_rate": 9.527381192051513e-06, + "loss": 0.3943120539188385, + "step": 8600 + }, + { + "epoch": 1.5892529246688456, + "grad_norm": 0.07138784229755402, + "learning_rate": 9.525388515396395e-06, + "loss": 0.546523928642273, + "step": 8601 + }, + { + "epoch": 1.5894377013777414, + "grad_norm": 0.0852469652891159, + "learning_rate": 9.523395857629335e-06, + "loss": 0.587976336479187, + "step": 8602 + }, + { + "epoch": 1.5896224780866373, + "grad_norm": 0.08037744462490082, + "learning_rate": 9.521403218829622e-06, + "loss": 0.5629453659057617, + "step": 8603 + }, + { + "epoch": 1.589807254795533, + "grad_norm": 0.09018945693969727, + "learning_rate": 9.519410599076566e-06, + "loss": 0.602557897567749, + "step": 8604 + }, + { + "epoch": 1.589992031504429, + "grad_norm": 0.08428955078125, + "learning_rate": 9.517417998449468e-06, + "loss": 0.4888676106929779, + "step": 8605 + }, + { + "epoch": 1.5901768082133247, + "grad_norm": 0.07636924088001251, + "learning_rate": 9.515425417027619e-06, + "loss": 0.4932645261287689, + "step": 8606 + }, + { + "epoch": 1.5903615849222206, + "grad_norm": 0.06713202595710754, + "learning_rate": 9.513432854890322e-06, + "loss": 0.4275995194911957, + "step": 8607 + }, + { + "epoch": 1.5905463616311164, + "grad_norm": 0.07278622686862946, + "learning_rate": 9.51144031211688e-06, + "loss": 0.45914706587791443, + "step": 8608 + }, + { + "epoch": 1.5907311383400122, + "grad_norm": 0.07968050241470337, + "learning_rate": 9.509447788786582e-06, + "loss": 0.5035563111305237, + "step": 8609 + }, + { + "epoch": 1.590915915048908, + "grad_norm": 0.09256451576948166, + "learning_rate": 9.507455284978728e-06, + "loss": 0.6316693425178528, + "step": 8610 + }, + { + "epoch": 1.5911006917578039, + "grad_norm": 0.07208509743213654, + "learning_rate": 9.505462800772612e-06, + "loss": 0.46981462836265564, + "step": 8611 + }, + { + "epoch": 1.5912854684666997, + "grad_norm": 0.09786707162857056, + "learning_rate": 9.503470336247529e-06, + "loss": 0.6726077198982239, + "step": 8612 + }, + { + "epoch": 1.5914702451755955, + "grad_norm": 0.053754352033138275, + "learning_rate": 9.501477891482774e-06, + "loss": 0.3265490233898163, + "step": 8613 + }, + { + "epoch": 1.5916550218844914, + "grad_norm": 0.09386641532182693, + "learning_rate": 9.49948546655764e-06, + "loss": 0.5951492786407471, + "step": 8614 + }, + { + "epoch": 1.5918397985933872, + "grad_norm": 0.0635455846786499, + "learning_rate": 9.497493061551415e-06, + "loss": 0.3247123062610626, + "step": 8615 + }, + { + "epoch": 1.592024575302283, + "grad_norm": 0.09346818923950195, + "learning_rate": 9.495500676543398e-06, + "loss": 0.7429558634757996, + "step": 8616 + }, + { + "epoch": 1.5922093520111789, + "grad_norm": 0.06018964573740959, + "learning_rate": 9.493508311612874e-06, + "loss": 0.3677816092967987, + "step": 8617 + }, + { + "epoch": 1.5923941287200747, + "grad_norm": 0.06864192336797714, + "learning_rate": 9.491515966839134e-06, + "loss": 0.45008015632629395, + "step": 8618 + }, + { + "epoch": 1.5925789054289705, + "grad_norm": 0.06789940595626831, + "learning_rate": 9.48952364230147e-06, + "loss": 0.43501466512680054, + "step": 8619 + }, + { + "epoch": 1.5927636821378666, + "grad_norm": 0.08567140996456146, + "learning_rate": 9.487531338079166e-06, + "loss": 0.5239092707633972, + "step": 8620 + }, + { + "epoch": 1.5929484588467624, + "grad_norm": 0.09160125255584717, + "learning_rate": 9.48553905425151e-06, + "loss": 0.6530731320381165, + "step": 8621 + }, + { + "epoch": 1.5931332355556582, + "grad_norm": 0.07891631871461868, + "learning_rate": 9.483546790897796e-06, + "loss": 0.5528213977813721, + "step": 8622 + }, + { + "epoch": 1.593318012264554, + "grad_norm": 0.056103698909282684, + "learning_rate": 9.481554548097297e-06, + "loss": 0.2905942499637604, + "step": 8623 + }, + { + "epoch": 1.5935027889734499, + "grad_norm": 0.08046253770589828, + "learning_rate": 9.479562325929307e-06, + "loss": 0.49072909355163574, + "step": 8624 + }, + { + "epoch": 1.5936875656823457, + "grad_norm": 0.07400275021791458, + "learning_rate": 9.477570124473113e-06, + "loss": 0.511428952217102, + "step": 8625 + }, + { + "epoch": 1.5938723423912415, + "grad_norm": 0.06470328569412231, + "learning_rate": 9.47557794380799e-06, + "loss": 0.48424461483955383, + "step": 8626 + }, + { + "epoch": 1.5940571191001376, + "grad_norm": 0.0823003426194191, + "learning_rate": 9.473585784013219e-06, + "loss": 0.4720412790775299, + "step": 8627 + }, + { + "epoch": 1.5942418958090334, + "grad_norm": 0.0525951124727726, + "learning_rate": 9.471593645168096e-06, + "loss": 0.3639041483402252, + "step": 8628 + }, + { + "epoch": 1.5944266725179292, + "grad_norm": 0.05389215424656868, + "learning_rate": 9.469601527351887e-06, + "loss": 0.31378528475761414, + "step": 8629 + }, + { + "epoch": 1.594611449226825, + "grad_norm": 0.06935250759124756, + "learning_rate": 9.467609430643877e-06, + "loss": 0.5664317011833191, + "step": 8630 + }, + { + "epoch": 1.594796225935721, + "grad_norm": 0.07487247884273529, + "learning_rate": 9.465617355123352e-06, + "loss": 0.48654481768608093, + "step": 8631 + }, + { + "epoch": 1.5949810026446167, + "grad_norm": 0.07327792793512344, + "learning_rate": 9.46362530086958e-06, + "loss": 0.6170271039009094, + "step": 8632 + }, + { + "epoch": 1.5951657793535126, + "grad_norm": 0.08417121320962906, + "learning_rate": 9.461633267961844e-06, + "loss": 0.6322805285453796, + "step": 8633 + }, + { + "epoch": 1.5953505560624084, + "grad_norm": 0.08535663783550262, + "learning_rate": 9.459641256479419e-06, + "loss": 0.4844543933868408, + "step": 8634 + }, + { + "epoch": 1.5955353327713042, + "grad_norm": 0.07370703667402267, + "learning_rate": 9.45764926650158e-06, + "loss": 0.45001500844955444, + "step": 8635 + }, + { + "epoch": 1.5957201094802, + "grad_norm": 0.05562310293316841, + "learning_rate": 9.455657298107607e-06, + "loss": 0.3464704751968384, + "step": 8636 + }, + { + "epoch": 1.5959048861890959, + "grad_norm": 0.06573774665594101, + "learning_rate": 9.453665351376768e-06, + "loss": 0.4138845205307007, + "step": 8637 + }, + { + "epoch": 1.5960896628979917, + "grad_norm": 0.08080170303583145, + "learning_rate": 9.451673426388336e-06, + "loss": 0.49387991428375244, + "step": 8638 + }, + { + "epoch": 1.5962744396068875, + "grad_norm": 0.09069648385047913, + "learning_rate": 9.449681523221593e-06, + "loss": 0.6068739891052246, + "step": 8639 + }, + { + "epoch": 1.5964592163157834, + "grad_norm": 0.08178447932004929, + "learning_rate": 9.447689641955799e-06, + "loss": 0.581017792224884, + "step": 8640 + }, + { + "epoch": 1.5966439930246792, + "grad_norm": 0.0756615549325943, + "learning_rate": 9.445697782670229e-06, + "loss": 0.4431816637516022, + "step": 8641 + }, + { + "epoch": 1.596828769733575, + "grad_norm": 0.09431053698062897, + "learning_rate": 9.443705945444158e-06, + "loss": 0.7131768465042114, + "step": 8642 + }, + { + "epoch": 1.5970135464424708, + "grad_norm": 0.07183665037155151, + "learning_rate": 9.441714130356842e-06, + "loss": 0.41819319128990173, + "step": 8643 + }, + { + "epoch": 1.5971983231513667, + "grad_norm": 0.059549588710069656, + "learning_rate": 9.439722337487561e-06, + "loss": 0.40329742431640625, + "step": 8644 + }, + { + "epoch": 1.5973830998602625, + "grad_norm": 0.07357224822044373, + "learning_rate": 9.437730566915582e-06, + "loss": 0.49433422088623047, + "step": 8645 + }, + { + "epoch": 1.5975678765691583, + "grad_norm": 0.06763920187950134, + "learning_rate": 9.435738818720164e-06, + "loss": 0.37012627720832825, + "step": 8646 + }, + { + "epoch": 1.5977526532780542, + "grad_norm": 0.07158965617418289, + "learning_rate": 9.433747092980571e-06, + "loss": 0.49394795298576355, + "step": 8647 + }, + { + "epoch": 1.59793742998695, + "grad_norm": 0.08695575594902039, + "learning_rate": 9.43175538977608e-06, + "loss": 0.5455334782600403, + "step": 8648 + }, + { + "epoch": 1.598122206695846, + "grad_norm": 0.06903868168592453, + "learning_rate": 9.429763709185943e-06, + "loss": 0.4207424819469452, + "step": 8649 + }, + { + "epoch": 1.5983069834047419, + "grad_norm": 0.07839375734329224, + "learning_rate": 9.427772051289427e-06, + "loss": 0.5606071949005127, + "step": 8650 + }, + { + "epoch": 1.5984917601136377, + "grad_norm": 0.07867002487182617, + "learning_rate": 9.425780416165794e-06, + "loss": 0.4959498345851898, + "step": 8651 + }, + { + "epoch": 1.5986765368225335, + "grad_norm": 0.08320920169353485, + "learning_rate": 9.423788803894301e-06, + "loss": 0.5476346015930176, + "step": 8652 + }, + { + "epoch": 1.5988613135314294, + "grad_norm": 0.07733888924121857, + "learning_rate": 9.421797214554213e-06, + "loss": 0.4884255826473236, + "step": 8653 + }, + { + "epoch": 1.5990460902403252, + "grad_norm": 0.0582268163561821, + "learning_rate": 9.419805648224785e-06, + "loss": 0.3183627426624298, + "step": 8654 + }, + { + "epoch": 1.599230866949221, + "grad_norm": 0.06752382963895798, + "learning_rate": 9.417814104985278e-06, + "loss": 0.3429985046386719, + "step": 8655 + }, + { + "epoch": 1.599415643658117, + "grad_norm": 0.06709057837724686, + "learning_rate": 9.41582258491495e-06, + "loss": 0.4029124677181244, + "step": 8656 + }, + { + "epoch": 1.5996004203670129, + "grad_norm": 0.06716939806938171, + "learning_rate": 9.413831088093052e-06, + "loss": 0.47434157133102417, + "step": 8657 + }, + { + "epoch": 1.5997851970759087, + "grad_norm": 0.0751735270023346, + "learning_rate": 9.411839614598845e-06, + "loss": 0.4409109055995941, + "step": 8658 + }, + { + "epoch": 1.5999699737848045, + "grad_norm": 0.08015467971563339, + "learning_rate": 9.409848164511583e-06, + "loss": 0.43245774507522583, + "step": 8659 + }, + { + "epoch": 1.6001547504937004, + "grad_norm": 0.059320174157619476, + "learning_rate": 9.407856737910514e-06, + "loss": 0.3780234754085541, + "step": 8660 + }, + { + "epoch": 1.6003395272025962, + "grad_norm": 0.080257847905159, + "learning_rate": 9.405865334874896e-06, + "loss": 0.5547727346420288, + "step": 8661 + }, + { + "epoch": 1.600524303911492, + "grad_norm": 0.07040159404277802, + "learning_rate": 9.403873955483982e-06, + "loss": 0.4538348615169525, + "step": 8662 + }, + { + "epoch": 1.6007090806203879, + "grad_norm": 0.04806624725461006, + "learning_rate": 9.401882599817013e-06, + "loss": 0.33367177844047546, + "step": 8663 + }, + { + "epoch": 1.6008938573292837, + "grad_norm": 0.08615703880786896, + "learning_rate": 9.399891267953252e-06, + "loss": 0.6941844820976257, + "step": 8664 + }, + { + "epoch": 1.6010786340381795, + "grad_norm": 0.07073012739419937, + "learning_rate": 9.397899959971937e-06, + "loss": 0.4684998393058777, + "step": 8665 + }, + { + "epoch": 1.6012634107470753, + "grad_norm": 0.07388255000114441, + "learning_rate": 9.395908675952319e-06, + "loss": 0.45641809701919556, + "step": 8666 + }, + { + "epoch": 1.6014481874559712, + "grad_norm": 0.08160973340272903, + "learning_rate": 9.393917415973648e-06, + "loss": 0.5580446720123291, + "step": 8667 + }, + { + "epoch": 1.601632964164867, + "grad_norm": 0.07806427776813507, + "learning_rate": 9.391926180115168e-06, + "loss": 0.5362488627433777, + "step": 8668 + }, + { + "epoch": 1.6018177408737628, + "grad_norm": 0.07841908931732178, + "learning_rate": 9.38993496845612e-06, + "loss": 0.5302690267562866, + "step": 8669 + }, + { + "epoch": 1.6020025175826587, + "grad_norm": 0.08175771683454514, + "learning_rate": 9.387943781075755e-06, + "loss": 0.4767942428588867, + "step": 8670 + }, + { + "epoch": 1.6021872942915545, + "grad_norm": 0.07378681004047394, + "learning_rate": 9.385952618053313e-06, + "loss": 0.5257818698883057, + "step": 8671 + }, + { + "epoch": 1.6023720710004503, + "grad_norm": 0.10777638107538223, + "learning_rate": 9.383961479468031e-06, + "loss": 0.654915988445282, + "step": 8672 + }, + { + "epoch": 1.6025568477093461, + "grad_norm": 0.09170585870742798, + "learning_rate": 9.381970365399162e-06, + "loss": 0.6202282905578613, + "step": 8673 + }, + { + "epoch": 1.602741624418242, + "grad_norm": 0.07236034423112869, + "learning_rate": 9.379979275925934e-06, + "loss": 0.5273255109786987, + "step": 8674 + }, + { + "epoch": 1.6029264011271378, + "grad_norm": 0.06548592448234558, + "learning_rate": 9.377988211127591e-06, + "loss": 0.4312899112701416, + "step": 8675 + }, + { + "epoch": 1.6031111778360336, + "grad_norm": 0.07652442157268524, + "learning_rate": 9.375997171083372e-06, + "loss": 0.5723881125450134, + "step": 8676 + }, + { + "epoch": 1.6032959545449295, + "grad_norm": 0.08384647965431213, + "learning_rate": 9.374006155872514e-06, + "loss": 0.5194376111030579, + "step": 8677 + }, + { + "epoch": 1.6034807312538255, + "grad_norm": 0.08439996093511581, + "learning_rate": 9.37201516557425e-06, + "loss": 0.5405910015106201, + "step": 8678 + }, + { + "epoch": 1.6036655079627213, + "grad_norm": 0.05453351140022278, + "learning_rate": 9.370024200267822e-06, + "loss": 0.3144639730453491, + "step": 8679 + }, + { + "epoch": 1.6038502846716172, + "grad_norm": 0.08673521876335144, + "learning_rate": 9.368033260032458e-06, + "loss": 0.5571047067642212, + "step": 8680 + }, + { + "epoch": 1.604035061380513, + "grad_norm": 0.0772957131266594, + "learning_rate": 9.366042344947396e-06, + "loss": 0.41941192746162415, + "step": 8681 + }, + { + "epoch": 1.6042198380894088, + "grad_norm": 0.07876481115818024, + "learning_rate": 9.364051455091861e-06, + "loss": 0.5334144234657288, + "step": 8682 + }, + { + "epoch": 1.6044046147983047, + "grad_norm": 0.0865866169333458, + "learning_rate": 9.362060590545086e-06, + "loss": 0.5806391835212708, + "step": 8683 + }, + { + "epoch": 1.6045893915072005, + "grad_norm": 0.06845936924219131, + "learning_rate": 9.360069751386311e-06, + "loss": 0.4660915732383728, + "step": 8684 + }, + { + "epoch": 1.6047741682160963, + "grad_norm": 0.08829370141029358, + "learning_rate": 9.358078937694754e-06, + "loss": 0.5103304386138916, + "step": 8685 + }, + { + "epoch": 1.6049589449249924, + "grad_norm": 0.06883946061134338, + "learning_rate": 9.356088149549644e-06, + "loss": 0.44140616059303284, + "step": 8686 + }, + { + "epoch": 1.6051437216338882, + "grad_norm": 0.0737924799323082, + "learning_rate": 9.354097387030217e-06, + "loss": 0.44015246629714966, + "step": 8687 + }, + { + "epoch": 1.605328498342784, + "grad_norm": 0.08518895506858826, + "learning_rate": 9.35210665021569e-06, + "loss": 0.5649626851081848, + "step": 8688 + }, + { + "epoch": 1.6055132750516798, + "grad_norm": 0.06098601967096329, + "learning_rate": 9.35011593918529e-06, + "loss": 0.3328516483306885, + "step": 8689 + }, + { + "epoch": 1.6056980517605757, + "grad_norm": 0.06565692275762558, + "learning_rate": 9.348125254018245e-06, + "loss": 0.5171113610267639, + "step": 8690 + }, + { + "epoch": 1.6058828284694715, + "grad_norm": 0.08036236464977264, + "learning_rate": 9.346134594793774e-06, + "loss": 0.44804441928863525, + "step": 8691 + }, + { + "epoch": 1.6060676051783673, + "grad_norm": 0.06878243386745453, + "learning_rate": 9.3441439615911e-06, + "loss": 0.3826935589313507, + "step": 8692 + }, + { + "epoch": 1.6062523818872632, + "grad_norm": 0.05000479891896248, + "learning_rate": 9.342153354489448e-06, + "loss": 0.2692321240901947, + "step": 8693 + }, + { + "epoch": 1.606437158596159, + "grad_norm": 0.08034251630306244, + "learning_rate": 9.34016277356803e-06, + "loss": 0.5494377017021179, + "step": 8694 + }, + { + "epoch": 1.6066219353050548, + "grad_norm": 0.06501448899507523, + "learning_rate": 9.33817221890607e-06, + "loss": 0.40996047854423523, + "step": 8695 + }, + { + "epoch": 1.6068067120139506, + "grad_norm": 0.06616383045911789, + "learning_rate": 9.336181690582787e-06, + "loss": 0.5146843791007996, + "step": 8696 + }, + { + "epoch": 1.6069914887228465, + "grad_norm": 0.06285757571458817, + "learning_rate": 9.334191188677394e-06, + "loss": 0.4098890423774719, + "step": 8697 + }, + { + "epoch": 1.6071762654317423, + "grad_norm": 0.06963429600000381, + "learning_rate": 9.332200713269113e-06, + "loss": 0.47420984506607056, + "step": 8698 + }, + { + "epoch": 1.6073610421406381, + "grad_norm": 0.09927757829427719, + "learning_rate": 9.330210264437149e-06, + "loss": 0.5142176747322083, + "step": 8699 + }, + { + "epoch": 1.607545818849534, + "grad_norm": 0.0826658084988594, + "learning_rate": 9.328219842260721e-06, + "loss": 0.42444026470184326, + "step": 8700 + }, + { + "epoch": 1.6077305955584298, + "grad_norm": 0.07246047258377075, + "learning_rate": 9.326229446819048e-06, + "loss": 0.3660123944282532, + "step": 8701 + }, + { + "epoch": 1.6079153722673256, + "grad_norm": 0.10228099673986435, + "learning_rate": 9.324239078191329e-06, + "loss": 0.6338186860084534, + "step": 8702 + }, + { + "epoch": 1.6081001489762214, + "grad_norm": 0.06835248321294785, + "learning_rate": 9.322248736456779e-06, + "loss": 0.40147051215171814, + "step": 8703 + }, + { + "epoch": 1.6082849256851173, + "grad_norm": 0.06488882750272751, + "learning_rate": 9.320258421694615e-06, + "loss": 0.4587598145008087, + "step": 8704 + }, + { + "epoch": 1.608469702394013, + "grad_norm": 0.07219914346933365, + "learning_rate": 9.318268133984035e-06, + "loss": 0.5292624831199646, + "step": 8705 + }, + { + "epoch": 1.608654479102909, + "grad_norm": 0.09015645831823349, + "learning_rate": 9.316277873404249e-06, + "loss": 0.49059614539146423, + "step": 8706 + }, + { + "epoch": 1.6088392558118048, + "grad_norm": 0.12085429579019547, + "learning_rate": 9.31428764003447e-06, + "loss": 0.7310295104980469, + "step": 8707 + }, + { + "epoch": 1.6090240325207008, + "grad_norm": 0.05734136700630188, + "learning_rate": 9.312297433953894e-06, + "loss": 0.28329697251319885, + "step": 8708 + }, + { + "epoch": 1.6092088092295966, + "grad_norm": 0.07954109460115433, + "learning_rate": 9.310307255241729e-06, + "loss": 0.5011992454528809, + "step": 8709 + }, + { + "epoch": 1.6093935859384925, + "grad_norm": 0.055546123534440994, + "learning_rate": 9.308317103977177e-06, + "loss": 0.35209885239601135, + "step": 8710 + }, + { + "epoch": 1.6095783626473883, + "grad_norm": 0.07863243669271469, + "learning_rate": 9.306326980239441e-06, + "loss": 0.44597235321998596, + "step": 8711 + }, + { + "epoch": 1.6097631393562841, + "grad_norm": 0.05761402100324631, + "learning_rate": 9.30433688410772e-06, + "loss": 0.3015648424625397, + "step": 8712 + }, + { + "epoch": 1.60994791606518, + "grad_norm": 0.07875753194093704, + "learning_rate": 9.302346815661217e-06, + "loss": 0.451083242893219, + "step": 8713 + }, + { + "epoch": 1.6101326927740758, + "grad_norm": 0.06157178059220314, + "learning_rate": 9.300356774979125e-06, + "loss": 0.35838833451271057, + "step": 8714 + }, + { + "epoch": 1.6103174694829718, + "grad_norm": 0.06755080819129944, + "learning_rate": 9.298366762140648e-06, + "loss": 0.4344403147697449, + "step": 8715 + }, + { + "epoch": 1.6105022461918677, + "grad_norm": 0.07680190354585648, + "learning_rate": 9.296376777224977e-06, + "loss": 0.4962613880634308, + "step": 8716 + }, + { + "epoch": 1.6106870229007635, + "grad_norm": 0.06090494990348816, + "learning_rate": 9.294386820311306e-06, + "loss": 0.3367466926574707, + "step": 8717 + }, + { + "epoch": 1.6108717996096593, + "grad_norm": 0.09674331545829773, + "learning_rate": 9.292396891478838e-06, + "loss": 0.6231864094734192, + "step": 8718 + }, + { + "epoch": 1.6110565763185551, + "grad_norm": 0.07336708903312683, + "learning_rate": 9.290406990806754e-06, + "loss": 0.44256263971328735, + "step": 8719 + }, + { + "epoch": 1.611241353027451, + "grad_norm": 0.0809880793094635, + "learning_rate": 9.288417118374253e-06, + "loss": 0.5246638059616089, + "step": 8720 + }, + { + "epoch": 1.6114261297363468, + "grad_norm": 0.0665491595864296, + "learning_rate": 9.28642727426053e-06, + "loss": 0.48506948351860046, + "step": 8721 + }, + { + "epoch": 1.6116109064452426, + "grad_norm": 0.0818161740899086, + "learning_rate": 9.284437458544762e-06, + "loss": 0.49265772104263306, + "step": 8722 + }, + { + "epoch": 1.6117956831541385, + "grad_norm": 0.07088849693536758, + "learning_rate": 9.282447671306145e-06, + "loss": 0.574463963508606, + "step": 8723 + }, + { + "epoch": 1.6119804598630343, + "grad_norm": 0.07852593809366226, + "learning_rate": 9.280457912623873e-06, + "loss": 0.6700880527496338, + "step": 8724 + }, + { + "epoch": 1.6121652365719301, + "grad_norm": 0.07950948923826218, + "learning_rate": 9.278468182577118e-06, + "loss": 0.46207764744758606, + "step": 8725 + }, + { + "epoch": 1.612350013280826, + "grad_norm": 0.061707984656095505, + "learning_rate": 9.27647848124507e-06, + "loss": 0.4924660921096802, + "step": 8726 + }, + { + "epoch": 1.6125347899897218, + "grad_norm": 0.0966852679848671, + "learning_rate": 9.274488808706923e-06, + "loss": 0.7439047694206238, + "step": 8727 + }, + { + "epoch": 1.6127195666986176, + "grad_norm": 0.06226487085223198, + "learning_rate": 9.272499165041846e-06, + "loss": 0.4389248490333557, + "step": 8728 + }, + { + "epoch": 1.6129043434075134, + "grad_norm": 0.0721212774515152, + "learning_rate": 9.270509550329027e-06, + "loss": 0.45488476753234863, + "step": 8729 + }, + { + "epoch": 1.6130891201164093, + "grad_norm": 0.07964397221803665, + "learning_rate": 9.268519964647646e-06, + "loss": 0.566719651222229, + "step": 8730 + }, + { + "epoch": 1.613273896825305, + "grad_norm": 0.08104722201824188, + "learning_rate": 9.266530408076881e-06, + "loss": 0.5494105219841003, + "step": 8731 + }, + { + "epoch": 1.613458673534201, + "grad_norm": 0.07702388614416122, + "learning_rate": 9.264540880695914e-06, + "loss": 0.5685914158821106, + "step": 8732 + }, + { + "epoch": 1.6136434502430967, + "grad_norm": 0.07252875715494156, + "learning_rate": 9.262551382583916e-06, + "loss": 0.3761477470397949, + "step": 8733 + }, + { + "epoch": 1.6138282269519926, + "grad_norm": 0.06461136043071747, + "learning_rate": 9.260561913820066e-06, + "loss": 0.41747915744781494, + "step": 8734 + }, + { + "epoch": 1.6140130036608884, + "grad_norm": 0.07709618657827377, + "learning_rate": 9.25857247448354e-06, + "loss": 0.4577575623989105, + "step": 8735 + }, + { + "epoch": 1.6141977803697842, + "grad_norm": 0.07607993483543396, + "learning_rate": 9.256583064653509e-06, + "loss": 0.4133278429508209, + "step": 8736 + }, + { + "epoch": 1.6143825570786803, + "grad_norm": 0.08119706809520721, + "learning_rate": 9.254593684409144e-06, + "loss": 0.5895794034004211, + "step": 8737 + }, + { + "epoch": 1.614567333787576, + "grad_norm": 0.0602099783718586, + "learning_rate": 9.252604333829624e-06, + "loss": 0.37203460931777954, + "step": 8738 + }, + { + "epoch": 1.614752110496472, + "grad_norm": 0.08186354488134384, + "learning_rate": 9.250615012994106e-06, + "loss": 0.5753328800201416, + "step": 8739 + }, + { + "epoch": 1.6149368872053678, + "grad_norm": 0.06444091349840164, + "learning_rate": 9.24862572198177e-06, + "loss": 0.3414130210876465, + "step": 8740 + }, + { + "epoch": 1.6151216639142636, + "grad_norm": 0.06698300689458847, + "learning_rate": 9.246636460871781e-06, + "loss": 0.45006752014160156, + "step": 8741 + }, + { + "epoch": 1.6153064406231594, + "grad_norm": 0.058944471180438995, + "learning_rate": 9.244647229743299e-06, + "loss": 0.37072205543518066, + "step": 8742 + }, + { + "epoch": 1.6154912173320553, + "grad_norm": 0.07849530130624771, + "learning_rate": 9.242658028675498e-06, + "loss": 0.4830027222633362, + "step": 8743 + }, + { + "epoch": 1.6156759940409513, + "grad_norm": 0.08128982037305832, + "learning_rate": 9.24066885774754e-06, + "loss": 0.4712119400501251, + "step": 8744 + }, + { + "epoch": 1.6158607707498471, + "grad_norm": 0.07775290310382843, + "learning_rate": 9.238679717038582e-06, + "loss": 0.534960925579071, + "step": 8745 + }, + { + "epoch": 1.616045547458743, + "grad_norm": 0.08364227414131165, + "learning_rate": 9.236690606627792e-06, + "loss": 0.5130738019943237, + "step": 8746 + }, + { + "epoch": 1.6162303241676388, + "grad_norm": 0.06736582517623901, + "learning_rate": 9.234701526594325e-06, + "loss": 0.4213935136795044, + "step": 8747 + }, + { + "epoch": 1.6164151008765346, + "grad_norm": 0.05899931117892265, + "learning_rate": 9.232712477017343e-06, + "loss": 0.4013676047325134, + "step": 8748 + }, + { + "epoch": 1.6165998775854304, + "grad_norm": 0.09669655561447144, + "learning_rate": 9.230723457976006e-06, + "loss": 0.6942662596702576, + "step": 8749 + }, + { + "epoch": 1.6167846542943263, + "grad_norm": 0.07031256705522537, + "learning_rate": 9.228734469549467e-06, + "loss": 0.5576603412628174, + "step": 8750 + }, + { + "epoch": 1.616969431003222, + "grad_norm": 0.08005089312791824, + "learning_rate": 9.226745511816883e-06, + "loss": 0.4651269316673279, + "step": 8751 + }, + { + "epoch": 1.617154207712118, + "grad_norm": 0.09014077484607697, + "learning_rate": 9.22475658485741e-06, + "loss": 0.7220168113708496, + "step": 8752 + }, + { + "epoch": 1.6173389844210138, + "grad_norm": 0.0727510005235672, + "learning_rate": 9.222767688750196e-06, + "loss": 0.5171751379966736, + "step": 8753 + }, + { + "epoch": 1.6175237611299096, + "grad_norm": 0.0730699971318245, + "learning_rate": 9.220778823574398e-06, + "loss": 0.5313491225242615, + "step": 8754 + }, + { + "epoch": 1.6177085378388054, + "grad_norm": 0.07589662820100784, + "learning_rate": 9.218789989409167e-06, + "loss": 0.5469446778297424, + "step": 8755 + }, + { + "epoch": 1.6178933145477012, + "grad_norm": 0.07687672972679138, + "learning_rate": 9.21680118633365e-06, + "loss": 0.46767929196357727, + "step": 8756 + }, + { + "epoch": 1.618078091256597, + "grad_norm": 0.06446769088506699, + "learning_rate": 9.214812414426993e-06, + "loss": 0.39684683084487915, + "step": 8757 + }, + { + "epoch": 1.618262867965493, + "grad_norm": 0.07347891479730606, + "learning_rate": 9.21282367376835e-06, + "loss": 0.4656224250793457, + "step": 8758 + }, + { + "epoch": 1.6184476446743887, + "grad_norm": 0.08664803206920624, + "learning_rate": 9.210834964436857e-06, + "loss": 0.54388028383255, + "step": 8759 + }, + { + "epoch": 1.6186324213832846, + "grad_norm": 0.08450117707252502, + "learning_rate": 9.208846286511664e-06, + "loss": 0.560814619064331, + "step": 8760 + }, + { + "epoch": 1.6188171980921804, + "grad_norm": 0.0759967714548111, + "learning_rate": 9.20685764007192e-06, + "loss": 0.44394931197166443, + "step": 8761 + }, + { + "epoch": 1.6190019748010762, + "grad_norm": 0.0643659308552742, + "learning_rate": 9.204869025196753e-06, + "loss": 0.4153701663017273, + "step": 8762 + }, + { + "epoch": 1.619186751509972, + "grad_norm": 0.0887654572725296, + "learning_rate": 9.202880441965317e-06, + "loss": 0.5017815828323364, + "step": 8763 + }, + { + "epoch": 1.6193715282188679, + "grad_norm": 0.06874753534793854, + "learning_rate": 9.20089189045674e-06, + "loss": 0.48350366950035095, + "step": 8764 + }, + { + "epoch": 1.6195563049277637, + "grad_norm": 0.06923694163560867, + "learning_rate": 9.198903370750167e-06, + "loss": 0.43473052978515625, + "step": 8765 + }, + { + "epoch": 1.6197410816366598, + "grad_norm": 0.07469451427459717, + "learning_rate": 9.196914882924737e-06, + "loss": 0.4775337874889374, + "step": 8766 + }, + { + "epoch": 1.6199258583455556, + "grad_norm": 0.10432615131139755, + "learning_rate": 9.194926427059579e-06, + "loss": 0.7286350131034851, + "step": 8767 + }, + { + "epoch": 1.6201106350544514, + "grad_norm": 0.06797898560762405, + "learning_rate": 9.192938003233828e-06, + "loss": 0.4760822355747223, + "step": 8768 + }, + { + "epoch": 1.6202954117633472, + "grad_norm": 0.08036093413829803, + "learning_rate": 9.190949611526625e-06, + "loss": 0.4900142252445221, + "step": 8769 + }, + { + "epoch": 1.620480188472243, + "grad_norm": 0.07063406705856323, + "learning_rate": 9.188961252017094e-06, + "loss": 0.4609625041484833, + "step": 8770 + }, + { + "epoch": 1.620664965181139, + "grad_norm": 0.08164394646883011, + "learning_rate": 9.186972924784365e-06, + "loss": 0.617030143737793, + "step": 8771 + }, + { + "epoch": 1.6208497418900347, + "grad_norm": 0.08667688816785812, + "learning_rate": 9.184984629907575e-06, + "loss": 0.6391412615776062, + "step": 8772 + }, + { + "epoch": 1.6210345185989306, + "grad_norm": 0.07805630564689636, + "learning_rate": 9.182996367465843e-06, + "loss": 0.47581109404563904, + "step": 8773 + }, + { + "epoch": 1.6212192953078266, + "grad_norm": 0.07001031190156937, + "learning_rate": 9.1810081375383e-06, + "loss": 0.4111134707927704, + "step": 8774 + }, + { + "epoch": 1.6214040720167224, + "grad_norm": 0.09256626665592194, + "learning_rate": 9.179019940204073e-06, + "loss": 0.5408514738082886, + "step": 8775 + }, + { + "epoch": 1.6215888487256183, + "grad_norm": 0.09123337268829346, + "learning_rate": 9.177031775542282e-06, + "loss": 0.6441941857337952, + "step": 8776 + }, + { + "epoch": 1.621773625434514, + "grad_norm": 0.07255522906780243, + "learning_rate": 9.175043643632051e-06, + "loss": 0.5214483737945557, + "step": 8777 + }, + { + "epoch": 1.62195840214341, + "grad_norm": 0.09604248404502869, + "learning_rate": 9.173055544552505e-06, + "loss": 0.725089967250824, + "step": 8778 + }, + { + "epoch": 1.6221431788523057, + "grad_norm": 0.08073451370000839, + "learning_rate": 9.171067478382757e-06, + "loss": 0.4169258773326874, + "step": 8779 + }, + { + "epoch": 1.6223279555612016, + "grad_norm": 0.0782463401556015, + "learning_rate": 9.169079445201938e-06, + "loss": 0.5479577779769897, + "step": 8780 + }, + { + "epoch": 1.6225127322700974, + "grad_norm": 0.10017386823892593, + "learning_rate": 9.16709144508915e-06, + "loss": 0.6875959634780884, + "step": 8781 + }, + { + "epoch": 1.6226975089789932, + "grad_norm": 0.09250274300575256, + "learning_rate": 9.165103478123515e-06, + "loss": 0.5965291857719421, + "step": 8782 + }, + { + "epoch": 1.622882285687889, + "grad_norm": 0.08742630481719971, + "learning_rate": 9.163115544384157e-06, + "loss": 0.4917841851711273, + "step": 8783 + }, + { + "epoch": 1.6230670623967849, + "grad_norm": 0.07041438668966293, + "learning_rate": 9.161127643950178e-06, + "loss": 0.5060359835624695, + "step": 8784 + }, + { + "epoch": 1.6232518391056807, + "grad_norm": 0.07074175029993057, + "learning_rate": 9.159139776900691e-06, + "loss": 0.48050737380981445, + "step": 8785 + }, + { + "epoch": 1.6234366158145765, + "grad_norm": 0.06361597776412964, + "learning_rate": 9.157151943314817e-06, + "loss": 0.42081218957901, + "step": 8786 + }, + { + "epoch": 1.6236213925234724, + "grad_norm": 0.06792426109313965, + "learning_rate": 9.155164143271654e-06, + "loss": 0.4286332130432129, + "step": 8787 + }, + { + "epoch": 1.6238061692323682, + "grad_norm": 0.07698680460453033, + "learning_rate": 9.153176376850315e-06, + "loss": 0.49443984031677246, + "step": 8788 + }, + { + "epoch": 1.623990945941264, + "grad_norm": 0.08953724801540375, + "learning_rate": 9.15118864412991e-06, + "loss": 0.6965150237083435, + "step": 8789 + }, + { + "epoch": 1.6241757226501599, + "grad_norm": 0.0866585448384285, + "learning_rate": 9.149200945189536e-06, + "loss": 0.5574845671653748, + "step": 8790 + }, + { + "epoch": 1.6243604993590557, + "grad_norm": 0.09219162911176682, + "learning_rate": 9.147213280108307e-06, + "loss": 0.8432692289352417, + "step": 8791 + }, + { + "epoch": 1.6245452760679515, + "grad_norm": 0.08029278367757797, + "learning_rate": 9.145225648965321e-06, + "loss": 0.6044814586639404, + "step": 8792 + }, + { + "epoch": 1.6247300527768473, + "grad_norm": 0.07284297794103622, + "learning_rate": 9.143238051839678e-06, + "loss": 0.5288881659507751, + "step": 8793 + }, + { + "epoch": 1.6249148294857432, + "grad_norm": 0.08836342394351959, + "learning_rate": 9.141250488810481e-06, + "loss": 0.6295874118804932, + "step": 8794 + }, + { + "epoch": 1.625099606194639, + "grad_norm": 0.0754440426826477, + "learning_rate": 9.139262959956829e-06, + "loss": 0.5248824954032898, + "step": 8795 + }, + { + "epoch": 1.625284382903535, + "grad_norm": 0.0965680330991745, + "learning_rate": 9.137275465357817e-06, + "loss": 0.6318686008453369, + "step": 8796 + }, + { + "epoch": 1.6254691596124309, + "grad_norm": 0.0794186145067215, + "learning_rate": 9.135288005092546e-06, + "loss": 0.435815691947937, + "step": 8797 + }, + { + "epoch": 1.6256539363213267, + "grad_norm": 0.09976905584335327, + "learning_rate": 9.1333005792401e-06, + "loss": 0.6863988041877747, + "step": 8798 + }, + { + "epoch": 1.6258387130302225, + "grad_norm": 0.07866614311933517, + "learning_rate": 9.131313187879584e-06, + "loss": 0.5332317352294922, + "step": 8799 + }, + { + "epoch": 1.6260234897391184, + "grad_norm": 0.07597986608743668, + "learning_rate": 9.129325831090087e-06, + "loss": 0.5171675682067871, + "step": 8800 + }, + { + "epoch": 1.6262082664480142, + "grad_norm": 0.07138339430093765, + "learning_rate": 9.127338508950696e-06, + "loss": 0.3930073082447052, + "step": 8801 + }, + { + "epoch": 1.62639304315691, + "grad_norm": 0.08232542872428894, + "learning_rate": 9.125351221540498e-06, + "loss": 0.5828226804733276, + "step": 8802 + }, + { + "epoch": 1.626577819865806, + "grad_norm": 0.0767393633723259, + "learning_rate": 9.123363968938592e-06, + "loss": 0.4624963700771332, + "step": 8803 + }, + { + "epoch": 1.626762596574702, + "grad_norm": 0.05044902116060257, + "learning_rate": 9.121376751224054e-06, + "loss": 0.28808170557022095, + "step": 8804 + }, + { + "epoch": 1.6269473732835977, + "grad_norm": 0.06592875719070435, + "learning_rate": 9.119389568475972e-06, + "loss": 0.39005059003829956, + "step": 8805 + }, + { + "epoch": 1.6271321499924936, + "grad_norm": 0.08422244340181351, + "learning_rate": 9.117402420773431e-06, + "loss": 0.5206887722015381, + "step": 8806 + }, + { + "epoch": 1.6273169267013894, + "grad_norm": 0.07252081483602524, + "learning_rate": 9.115415308195511e-06, + "loss": 0.45373600721359253, + "step": 8807 + }, + { + "epoch": 1.6275017034102852, + "grad_norm": 0.06747157871723175, + "learning_rate": 9.113428230821296e-06, + "loss": 0.46685582399368286, + "step": 8808 + }, + { + "epoch": 1.627686480119181, + "grad_norm": 0.08551542460918427, + "learning_rate": 9.111441188729863e-06, + "loss": 0.5203744769096375, + "step": 8809 + }, + { + "epoch": 1.6278712568280769, + "grad_norm": 0.0564129538834095, + "learning_rate": 9.10945418200029e-06, + "loss": 0.35253602266311646, + "step": 8810 + }, + { + "epoch": 1.6280560335369727, + "grad_norm": 0.06635194271802902, + "learning_rate": 9.107467210711655e-06, + "loss": 0.4317592978477478, + "step": 8811 + }, + { + "epoch": 1.6282408102458685, + "grad_norm": 0.06984547525644302, + "learning_rate": 9.105480274943032e-06, + "loss": 0.5081874132156372, + "step": 8812 + }, + { + "epoch": 1.6284255869547644, + "grad_norm": 0.07593455910682678, + "learning_rate": 9.103493374773496e-06, + "loss": 0.4170948266983032, + "step": 8813 + }, + { + "epoch": 1.6286103636636602, + "grad_norm": 0.07477694749832153, + "learning_rate": 9.10150651028212e-06, + "loss": 0.5138434767723083, + "step": 8814 + }, + { + "epoch": 1.628795140372556, + "grad_norm": 0.09288772940635681, + "learning_rate": 9.099519681547973e-06, + "loss": 0.6115694642066956, + "step": 8815 + }, + { + "epoch": 1.6289799170814518, + "grad_norm": 0.05931880325078964, + "learning_rate": 9.097532888650124e-06, + "loss": 0.4423096776008606, + "step": 8816 + }, + { + "epoch": 1.6291646937903477, + "grad_norm": 0.06403397023677826, + "learning_rate": 9.095546131667647e-06, + "loss": 0.33560627698898315, + "step": 8817 + }, + { + "epoch": 1.6293494704992435, + "grad_norm": 0.07780005782842636, + "learning_rate": 9.093559410679598e-06, + "loss": 0.4819314181804657, + "step": 8818 + }, + { + "epoch": 1.6295342472081393, + "grad_norm": 0.08891147375106812, + "learning_rate": 9.09157272576505e-06, + "loss": 0.5211207270622253, + "step": 8819 + }, + { + "epoch": 1.6297190239170352, + "grad_norm": 0.05928242579102516, + "learning_rate": 9.089586077003073e-06, + "loss": 0.44235554337501526, + "step": 8820 + }, + { + "epoch": 1.629903800625931, + "grad_norm": 0.06842076033353806, + "learning_rate": 9.087599464472714e-06, + "loss": 0.42068982124328613, + "step": 8821 + }, + { + "epoch": 1.6300885773348268, + "grad_norm": 0.060262531042099, + "learning_rate": 9.085612888253041e-06, + "loss": 0.3564003109931946, + "step": 8822 + }, + { + "epoch": 1.6302733540437226, + "grad_norm": 0.07870537787675858, + "learning_rate": 9.08362634842312e-06, + "loss": 0.5124083757400513, + "step": 8823 + }, + { + "epoch": 1.6304581307526185, + "grad_norm": 0.08522040396928787, + "learning_rate": 9.081639845062e-06, + "loss": 0.621161937713623, + "step": 8824 + }, + { + "epoch": 1.6306429074615145, + "grad_norm": 0.059914588928222656, + "learning_rate": 9.07965337824874e-06, + "loss": 0.39740699529647827, + "step": 8825 + }, + { + "epoch": 1.6308276841704104, + "grad_norm": 0.08846762031316757, + "learning_rate": 9.077666948062399e-06, + "loss": 0.5534099340438843, + "step": 8826 + }, + { + "epoch": 1.6310124608793062, + "grad_norm": 0.07665707916021347, + "learning_rate": 9.075680554582024e-06, + "loss": 0.501498818397522, + "step": 8827 + }, + { + "epoch": 1.631197237588202, + "grad_norm": 0.07973427325487137, + "learning_rate": 9.073694197886676e-06, + "loss": 0.4741021692752838, + "step": 8828 + }, + { + "epoch": 1.6313820142970978, + "grad_norm": 0.07348043471574783, + "learning_rate": 9.071707878055398e-06, + "loss": 0.48767468333244324, + "step": 8829 + }, + { + "epoch": 1.6315667910059937, + "grad_norm": 0.0816214382648468, + "learning_rate": 9.069721595167241e-06, + "loss": 0.48633819818496704, + "step": 8830 + }, + { + "epoch": 1.6317515677148895, + "grad_norm": 0.055964767932891846, + "learning_rate": 9.067735349301258e-06, + "loss": 0.33711522817611694, + "step": 8831 + }, + { + "epoch": 1.6319363444237855, + "grad_norm": 0.08837044984102249, + "learning_rate": 9.065749140536487e-06, + "loss": 0.6438451409339905, + "step": 8832 + }, + { + "epoch": 1.6321211211326814, + "grad_norm": 0.09076151251792908, + "learning_rate": 9.063762968951978e-06, + "loss": 0.5306956171989441, + "step": 8833 + }, + { + "epoch": 1.6323058978415772, + "grad_norm": 0.07748226076364517, + "learning_rate": 9.061776834626777e-06, + "loss": 0.5394060015678406, + "step": 8834 + }, + { + "epoch": 1.632490674550473, + "grad_norm": 0.06562750786542892, + "learning_rate": 9.05979073763992e-06, + "loss": 0.4323406517505646, + "step": 8835 + }, + { + "epoch": 1.6326754512593689, + "grad_norm": 0.06664278358221054, + "learning_rate": 9.05780467807045e-06, + "loss": 0.4158181846141815, + "step": 8836 + }, + { + "epoch": 1.6328602279682647, + "grad_norm": 0.06045094504952431, + "learning_rate": 9.05581865599741e-06, + "loss": 0.3473166525363922, + "step": 8837 + }, + { + "epoch": 1.6330450046771605, + "grad_norm": 0.07034634053707123, + "learning_rate": 9.053832671499828e-06, + "loss": 0.38992130756378174, + "step": 8838 + }, + { + "epoch": 1.6332297813860563, + "grad_norm": 0.061902426183223724, + "learning_rate": 9.051846724656747e-06, + "loss": 0.3445275127887726, + "step": 8839 + }, + { + "epoch": 1.6334145580949522, + "grad_norm": 0.09002410620450974, + "learning_rate": 9.049860815547205e-06, + "loss": 0.5587263107299805, + "step": 8840 + }, + { + "epoch": 1.633599334803848, + "grad_norm": 0.08259917795658112, + "learning_rate": 9.047874944250225e-06, + "loss": 0.5446577668190002, + "step": 8841 + }, + { + "epoch": 1.6337841115127438, + "grad_norm": 0.07812831550836563, + "learning_rate": 9.04588911084484e-06, + "loss": 0.4547961950302124, + "step": 8842 + }, + { + "epoch": 1.6339688882216397, + "grad_norm": 0.07952714711427689, + "learning_rate": 9.043903315410092e-06, + "loss": 0.5320074558258057, + "step": 8843 + }, + { + "epoch": 1.6341536649305355, + "grad_norm": 0.08245772123336792, + "learning_rate": 9.041917558024994e-06, + "loss": 0.4858451187610626, + "step": 8844 + }, + { + "epoch": 1.6343384416394313, + "grad_norm": 0.07378239929676056, + "learning_rate": 9.039931838768583e-06, + "loss": 0.5240111351013184, + "step": 8845 + }, + { + "epoch": 1.6345232183483271, + "grad_norm": 0.06492896378040314, + "learning_rate": 9.03794615771988e-06, + "loss": 0.3219175636768341, + "step": 8846 + }, + { + "epoch": 1.634707995057223, + "grad_norm": 0.08643704652786255, + "learning_rate": 9.03596051495791e-06, + "loss": 0.5498473048210144, + "step": 8847 + }, + { + "epoch": 1.6348927717661188, + "grad_norm": 0.07704591006040573, + "learning_rate": 9.033974910561696e-06, + "loss": 0.5322506427764893, + "step": 8848 + }, + { + "epoch": 1.6350775484750146, + "grad_norm": 0.08054648339748383, + "learning_rate": 9.031989344610258e-06, + "loss": 0.5502832531929016, + "step": 8849 + }, + { + "epoch": 1.6352623251839105, + "grad_norm": 0.08075813949108124, + "learning_rate": 9.030003817182615e-06, + "loss": 0.5375747680664062, + "step": 8850 + }, + { + "epoch": 1.6354471018928063, + "grad_norm": 0.06502457708120346, + "learning_rate": 9.028018328357787e-06, + "loss": 0.36693182587623596, + "step": 8851 + }, + { + "epoch": 1.6356318786017021, + "grad_norm": 0.08187992125749588, + "learning_rate": 9.026032878214787e-06, + "loss": 0.5035943984985352, + "step": 8852 + }, + { + "epoch": 1.635816655310598, + "grad_norm": 0.08244334906339645, + "learning_rate": 9.024047466832631e-06, + "loss": 0.5270260572433472, + "step": 8853 + }, + { + "epoch": 1.636001432019494, + "grad_norm": 0.08298245817422867, + "learning_rate": 9.022062094290334e-06, + "loss": 0.5583405494689941, + "step": 8854 + }, + { + "epoch": 1.6361862087283898, + "grad_norm": 0.06815101951360703, + "learning_rate": 9.020076760666904e-06, + "loss": 0.47471892833709717, + "step": 8855 + }, + { + "epoch": 1.6363709854372857, + "grad_norm": 0.08780615776777267, + "learning_rate": 9.018091466041354e-06, + "loss": 0.6067984104156494, + "step": 8856 + }, + { + "epoch": 1.6365557621461815, + "grad_norm": 0.0770939365029335, + "learning_rate": 9.016106210492696e-06, + "loss": 0.6024122834205627, + "step": 8857 + }, + { + "epoch": 1.6367405388550773, + "grad_norm": 0.06842092424631119, + "learning_rate": 9.014120994099926e-06, + "loss": 0.46925783157348633, + "step": 8858 + }, + { + "epoch": 1.6369253155639731, + "grad_norm": 0.06141021475195885, + "learning_rate": 9.012135816942058e-06, + "loss": 0.3543952405452728, + "step": 8859 + }, + { + "epoch": 1.637110092272869, + "grad_norm": 0.06749419867992401, + "learning_rate": 9.010150679098097e-06, + "loss": 0.4042138159275055, + "step": 8860 + }, + { + "epoch": 1.6372948689817648, + "grad_norm": 0.06526118516921997, + "learning_rate": 9.008165580647039e-06, + "loss": 0.42056259512901306, + "step": 8861 + }, + { + "epoch": 1.6374796456906608, + "grad_norm": 0.06441036611795425, + "learning_rate": 9.00618052166789e-06, + "loss": 0.41664764285087585, + "step": 8862 + }, + { + "epoch": 1.6376644223995567, + "grad_norm": 0.07588757574558258, + "learning_rate": 9.004195502239645e-06, + "loss": 0.422836035490036, + "step": 8863 + }, + { + "epoch": 1.6378491991084525, + "grad_norm": 0.07412891089916229, + "learning_rate": 9.002210522441303e-06, + "loss": 0.6133260130882263, + "step": 8864 + }, + { + "epoch": 1.6380339758173483, + "grad_norm": 0.07033253461122513, + "learning_rate": 9.000225582351864e-06, + "loss": 0.4490993916988373, + "step": 8865 + }, + { + "epoch": 1.6382187525262442, + "grad_norm": 0.08125453442335129, + "learning_rate": 8.998240682050315e-06, + "loss": 0.5367612838745117, + "step": 8866 + }, + { + "epoch": 1.63840352923514, + "grad_norm": 0.09599972516298294, + "learning_rate": 8.996255821615654e-06, + "loss": 0.6475223302841187, + "step": 8867 + }, + { + "epoch": 1.6385883059440358, + "grad_norm": 0.07406838983297348, + "learning_rate": 8.994271001126873e-06, + "loss": 0.4529930055141449, + "step": 8868 + }, + { + "epoch": 1.6387730826529316, + "grad_norm": 0.08143652975559235, + "learning_rate": 8.992286220662956e-06, + "loss": 0.5256994366645813, + "step": 8869 + }, + { + "epoch": 1.6389578593618275, + "grad_norm": 0.07117968052625656, + "learning_rate": 8.990301480302896e-06, + "loss": 0.4198594093322754, + "step": 8870 + }, + { + "epoch": 1.6391426360707233, + "grad_norm": 0.062106333673000336, + "learning_rate": 8.98831678012568e-06, + "loss": 0.3492191731929779, + "step": 8871 + }, + { + "epoch": 1.6393274127796191, + "grad_norm": 0.056991275399923325, + "learning_rate": 8.986332120210289e-06, + "loss": 0.40724286437034607, + "step": 8872 + }, + { + "epoch": 1.639512189488515, + "grad_norm": 0.07889767736196518, + "learning_rate": 8.984347500635708e-06, + "loss": 0.5376442074775696, + "step": 8873 + }, + { + "epoch": 1.6396969661974108, + "grad_norm": 0.07714872062206268, + "learning_rate": 8.982362921480921e-06, + "loss": 0.5542723536491394, + "step": 8874 + }, + { + "epoch": 1.6398817429063066, + "grad_norm": 0.07410655170679092, + "learning_rate": 8.980378382824904e-06, + "loss": 0.4115622341632843, + "step": 8875 + }, + { + "epoch": 1.6400665196152024, + "grad_norm": 0.055506497621536255, + "learning_rate": 8.978393884746641e-06, + "loss": 0.3900960683822632, + "step": 8876 + }, + { + "epoch": 1.6402512963240983, + "grad_norm": 0.06810830533504486, + "learning_rate": 8.976409427325102e-06, + "loss": 0.41855576634407043, + "step": 8877 + }, + { + "epoch": 1.640436073032994, + "grad_norm": 0.07559581100940704, + "learning_rate": 8.974425010639262e-06, + "loss": 0.5396881103515625, + "step": 8878 + }, + { + "epoch": 1.64062084974189, + "grad_norm": 0.094178207218647, + "learning_rate": 8.972440634768105e-06, + "loss": 0.640783429145813, + "step": 8879 + }, + { + "epoch": 1.6408056264507858, + "grad_norm": 0.08872411400079727, + "learning_rate": 8.970456299790592e-06, + "loss": 0.5301050543785095, + "step": 8880 + }, + { + "epoch": 1.6409904031596816, + "grad_norm": 0.09028414636850357, + "learning_rate": 8.968472005785698e-06, + "loss": 0.5819946527481079, + "step": 8881 + }, + { + "epoch": 1.6411751798685774, + "grad_norm": 0.07763354480266571, + "learning_rate": 8.966487752832393e-06, + "loss": 0.5989773273468018, + "step": 8882 + }, + { + "epoch": 1.6413599565774732, + "grad_norm": 0.06810028851032257, + "learning_rate": 8.964503541009639e-06, + "loss": 0.5160920023918152, + "step": 8883 + }, + { + "epoch": 1.6415447332863693, + "grad_norm": 0.0847913920879364, + "learning_rate": 8.962519370396403e-06, + "loss": 0.5464459657669067, + "step": 8884 + }, + { + "epoch": 1.6417295099952651, + "grad_norm": 0.07933083921670914, + "learning_rate": 8.960535241071654e-06, + "loss": 0.5264043211936951, + "step": 8885 + }, + { + "epoch": 1.641914286704161, + "grad_norm": 0.06766915321350098, + "learning_rate": 8.958551153114348e-06, + "loss": 0.449647456407547, + "step": 8886 + }, + { + "epoch": 1.6420990634130568, + "grad_norm": 0.07304760068655014, + "learning_rate": 8.956567106603448e-06, + "loss": 0.4253140687942505, + "step": 8887 + }, + { + "epoch": 1.6422838401219526, + "grad_norm": 0.07899277657270432, + "learning_rate": 8.954583101617915e-06, + "loss": 0.4085846245288849, + "step": 8888 + }, + { + "epoch": 1.6424686168308484, + "grad_norm": 0.08426927775144577, + "learning_rate": 8.9525991382367e-06, + "loss": 0.5908872485160828, + "step": 8889 + }, + { + "epoch": 1.6426533935397443, + "grad_norm": 0.08341715484857559, + "learning_rate": 8.950615216538765e-06, + "loss": 0.5195132493972778, + "step": 8890 + }, + { + "epoch": 1.6428381702486403, + "grad_norm": 0.08649944514036179, + "learning_rate": 8.948631336603062e-06, + "loss": 0.5062446594238281, + "step": 8891 + }, + { + "epoch": 1.6430229469575361, + "grad_norm": 0.06944700330495834, + "learning_rate": 8.946647498508541e-06, + "loss": 0.3602409362792969, + "step": 8892 + }, + { + "epoch": 1.643207723666432, + "grad_norm": 0.0651615634560585, + "learning_rate": 8.944663702334158e-06, + "loss": 0.3831489682197571, + "step": 8893 + }, + { + "epoch": 1.6433925003753278, + "grad_norm": 0.06624651700258255, + "learning_rate": 8.94267994815885e-06, + "loss": 0.423088401556015, + "step": 8894 + }, + { + "epoch": 1.6435772770842236, + "grad_norm": 0.0795547142624855, + "learning_rate": 8.940696236061575e-06, + "loss": 0.5180891752243042, + "step": 8895 + }, + { + "epoch": 1.6437620537931195, + "grad_norm": 0.06199095770716667, + "learning_rate": 8.938712566121281e-06, + "loss": 0.3613620400428772, + "step": 8896 + }, + { + "epoch": 1.6439468305020153, + "grad_norm": 0.06290160119533539, + "learning_rate": 8.936728938416901e-06, + "loss": 0.48867276310920715, + "step": 8897 + }, + { + "epoch": 1.6441316072109111, + "grad_norm": 0.09374285489320755, + "learning_rate": 8.934745353027384e-06, + "loss": 0.6991412043571472, + "step": 8898 + }, + { + "epoch": 1.644316383919807, + "grad_norm": 0.06297103315591812, + "learning_rate": 8.932761810031672e-06, + "loss": 0.3790844976902008, + "step": 8899 + }, + { + "epoch": 1.6445011606287028, + "grad_norm": 0.08258585631847382, + "learning_rate": 8.930778309508698e-06, + "loss": 0.5071451663970947, + "step": 8900 + }, + { + "epoch": 1.6446859373375986, + "grad_norm": 0.07014893740415573, + "learning_rate": 8.928794851537401e-06, + "loss": 0.5357246994972229, + "step": 8901 + }, + { + "epoch": 1.6448707140464944, + "grad_norm": 0.05861230567097664, + "learning_rate": 8.92681143619672e-06, + "loss": 0.3450230360031128, + "step": 8902 + }, + { + "epoch": 1.6450554907553903, + "grad_norm": 0.08785264939069748, + "learning_rate": 8.924828063565585e-06, + "loss": 0.7066122889518738, + "step": 8903 + }, + { + "epoch": 1.645240267464286, + "grad_norm": 0.08250744640827179, + "learning_rate": 8.92284473372293e-06, + "loss": 0.6925334930419922, + "step": 8904 + }, + { + "epoch": 1.645425044173182, + "grad_norm": 0.06766656786203384, + "learning_rate": 8.920861446747685e-06, + "loss": 0.2983948886394501, + "step": 8905 + }, + { + "epoch": 1.6456098208820777, + "grad_norm": 0.07855965942144394, + "learning_rate": 8.918878202718778e-06, + "loss": 0.4852737486362457, + "step": 8906 + }, + { + "epoch": 1.6457945975909736, + "grad_norm": 0.08639141917228699, + "learning_rate": 8.916895001715134e-06, + "loss": 0.614033043384552, + "step": 8907 + }, + { + "epoch": 1.6459793742998694, + "grad_norm": 0.07663006335496902, + "learning_rate": 8.914911843815682e-06, + "loss": 0.43437522649765015, + "step": 8908 + }, + { + "epoch": 1.6461641510087652, + "grad_norm": 0.07699207961559296, + "learning_rate": 8.912928729099344e-06, + "loss": 0.5516226887702942, + "step": 8909 + }, + { + "epoch": 1.646348927717661, + "grad_norm": 0.07173573970794678, + "learning_rate": 8.910945657645043e-06, + "loss": 0.4079380929470062, + "step": 8910 + }, + { + "epoch": 1.6465337044265569, + "grad_norm": 0.08935403823852539, + "learning_rate": 8.908962629531695e-06, + "loss": 0.5903975963592529, + "step": 8911 + }, + { + "epoch": 1.6467184811354527, + "grad_norm": 0.07332302629947662, + "learning_rate": 8.906979644838221e-06, + "loss": 0.4380277097225189, + "step": 8912 + }, + { + "epoch": 1.6469032578443488, + "grad_norm": 0.0666826069355011, + "learning_rate": 8.904996703643542e-06, + "loss": 0.33464354276657104, + "step": 8913 + }, + { + "epoch": 1.6470880345532446, + "grad_norm": 0.06597012281417847, + "learning_rate": 8.903013806026561e-06, + "loss": 0.500480055809021, + "step": 8914 + }, + { + "epoch": 1.6472728112621404, + "grad_norm": 0.1073843464255333, + "learning_rate": 8.901030952066202e-06, + "loss": 0.6993005871772766, + "step": 8915 + }, + { + "epoch": 1.6474575879710363, + "grad_norm": 0.07667973637580872, + "learning_rate": 8.899048141841376e-06, + "loss": 0.5299189686775208, + "step": 8916 + }, + { + "epoch": 1.647642364679932, + "grad_norm": 0.07186425477266312, + "learning_rate": 8.897065375430987e-06, + "loss": 0.41190212965011597, + "step": 8917 + }, + { + "epoch": 1.647827141388828, + "grad_norm": 0.06618180125951767, + "learning_rate": 8.895082652913943e-06, + "loss": 0.37877678871154785, + "step": 8918 + }, + { + "epoch": 1.6480119180977237, + "grad_norm": 0.07953804731369019, + "learning_rate": 8.893099974369157e-06, + "loss": 0.5013386011123657, + "step": 8919 + }, + { + "epoch": 1.6481966948066198, + "grad_norm": 0.07290409505367279, + "learning_rate": 8.891117339875526e-06, + "loss": 0.49663516879081726, + "step": 8920 + }, + { + "epoch": 1.6483814715155156, + "grad_norm": 0.09176815301179886, + "learning_rate": 8.889134749511956e-06, + "loss": 0.631584107875824, + "step": 8921 + }, + { + "epoch": 1.6485662482244114, + "grad_norm": 0.0840664729475975, + "learning_rate": 8.88715220335735e-06, + "loss": 0.5021892189979553, + "step": 8922 + }, + { + "epoch": 1.6487510249333073, + "grad_norm": 0.08249734342098236, + "learning_rate": 8.8851697014906e-06, + "loss": 0.5705278515815735, + "step": 8923 + }, + { + "epoch": 1.648935801642203, + "grad_norm": 0.06764619797468185, + "learning_rate": 8.88318724399061e-06, + "loss": 0.392068475484848, + "step": 8924 + }, + { + "epoch": 1.649120578351099, + "grad_norm": 0.09124281257390976, + "learning_rate": 8.881204830936275e-06, + "loss": 0.6002790927886963, + "step": 8925 + }, + { + "epoch": 1.6493053550599948, + "grad_norm": 0.06591031700372696, + "learning_rate": 8.879222462406485e-06, + "loss": 0.4309958517551422, + "step": 8926 + }, + { + "epoch": 1.6494901317688906, + "grad_norm": 0.0704890787601471, + "learning_rate": 8.877240138480139e-06, + "loss": 0.42929908633232117, + "step": 8927 + }, + { + "epoch": 1.6496749084777864, + "grad_norm": 0.060363683849573135, + "learning_rate": 8.875257859236119e-06, + "loss": 0.4333549737930298, + "step": 8928 + }, + { + "epoch": 1.6498596851866822, + "grad_norm": 0.08272654563188553, + "learning_rate": 8.873275624753316e-06, + "loss": 0.49419546127319336, + "step": 8929 + }, + { + "epoch": 1.650044461895578, + "grad_norm": 0.07461053133010864, + "learning_rate": 8.871293435110623e-06, + "loss": 0.5192112922668457, + "step": 8930 + }, + { + "epoch": 1.650229238604474, + "grad_norm": 0.0803760215640068, + "learning_rate": 8.869311290386916e-06, + "loss": 0.49166378378868103, + "step": 8931 + }, + { + "epoch": 1.6504140153133697, + "grad_norm": 0.06983582675457001, + "learning_rate": 8.867329190661082e-06, + "loss": 0.4300093352794647, + "step": 8932 + }, + { + "epoch": 1.6505987920222656, + "grad_norm": 0.05998978391289711, + "learning_rate": 8.865347136012009e-06, + "loss": 0.41076162457466125, + "step": 8933 + }, + { + "epoch": 1.6507835687311614, + "grad_norm": 0.07758557796478271, + "learning_rate": 8.863365126518562e-06, + "loss": 0.488598495721817, + "step": 8934 + }, + { + "epoch": 1.6509683454400572, + "grad_norm": 0.06825264543294907, + "learning_rate": 8.861383162259628e-06, + "loss": 0.43793922662734985, + "step": 8935 + }, + { + "epoch": 1.651153122148953, + "grad_norm": 0.07090115547180176, + "learning_rate": 8.859401243314088e-06, + "loss": 0.4335429072380066, + "step": 8936 + }, + { + "epoch": 1.6513378988578489, + "grad_norm": 0.07850585877895355, + "learning_rate": 8.857419369760806e-06, + "loss": 0.47743913531303406, + "step": 8937 + }, + { + "epoch": 1.6515226755667447, + "grad_norm": 0.07450441271066666, + "learning_rate": 8.855437541678655e-06, + "loss": 0.4250149130821228, + "step": 8938 + }, + { + "epoch": 1.6517074522756405, + "grad_norm": 0.08737654983997345, + "learning_rate": 8.853455759146516e-06, + "loss": 0.6291829347610474, + "step": 8939 + }, + { + "epoch": 1.6518922289845364, + "grad_norm": 0.058018364012241364, + "learning_rate": 8.851474022243247e-06, + "loss": 0.3619850277900696, + "step": 8940 + }, + { + "epoch": 1.6520770056934322, + "grad_norm": 0.07415791600942612, + "learning_rate": 8.849492331047718e-06, + "loss": 0.5303621888160706, + "step": 8941 + }, + { + "epoch": 1.6522617824023282, + "grad_norm": 0.06933600455522537, + "learning_rate": 8.847510685638797e-06, + "loss": 0.4177916646003723, + "step": 8942 + }, + { + "epoch": 1.652446559111224, + "grad_norm": 0.08505621552467346, + "learning_rate": 8.845529086095342e-06, + "loss": 0.5294852256774902, + "step": 8943 + }, + { + "epoch": 1.65263133582012, + "grad_norm": 0.10572239756584167, + "learning_rate": 8.84354753249622e-06, + "loss": 0.7183526158332825, + "step": 8944 + }, + { + "epoch": 1.6528161125290157, + "grad_norm": 0.06564278900623322, + "learning_rate": 8.841566024920286e-06, + "loss": 0.34837156534194946, + "step": 8945 + }, + { + "epoch": 1.6530008892379116, + "grad_norm": 0.09466851502656937, + "learning_rate": 8.8395845634464e-06, + "loss": 0.5224567651748657, + "step": 8946 + }, + { + "epoch": 1.6531856659468074, + "grad_norm": 0.08879232406616211, + "learning_rate": 8.837603148153421e-06, + "loss": 0.6343720555305481, + "step": 8947 + }, + { + "epoch": 1.6533704426557032, + "grad_norm": 0.0756058394908905, + "learning_rate": 8.835621779120197e-06, + "loss": 0.48447737097740173, + "step": 8948 + }, + { + "epoch": 1.653555219364599, + "grad_norm": 0.0774824246764183, + "learning_rate": 8.833640456425583e-06, + "loss": 0.5902532339096069, + "step": 8949 + }, + { + "epoch": 1.653739996073495, + "grad_norm": 0.07572329789400101, + "learning_rate": 8.831659180148433e-06, + "loss": 0.5073521137237549, + "step": 8950 + }, + { + "epoch": 1.653924772782391, + "grad_norm": 0.0775173082947731, + "learning_rate": 8.829677950367589e-06, + "loss": 0.4409608244895935, + "step": 8951 + }, + { + "epoch": 1.6541095494912867, + "grad_norm": 0.09347783029079437, + "learning_rate": 8.827696767161902e-06, + "loss": 0.5821473598480225, + "step": 8952 + }, + { + "epoch": 1.6542943262001826, + "grad_norm": 0.08198609203100204, + "learning_rate": 8.82571563061022e-06, + "loss": 0.5458663702011108, + "step": 8953 + }, + { + "epoch": 1.6544791029090784, + "grad_norm": 0.08065678924322128, + "learning_rate": 8.823734540791375e-06, + "loss": 0.451998233795166, + "step": 8954 + }, + { + "epoch": 1.6546638796179742, + "grad_norm": 0.08799226582050323, + "learning_rate": 8.821753497784218e-06, + "loss": 0.5489275455474854, + "step": 8955 + }, + { + "epoch": 1.65484865632687, + "grad_norm": 0.07855572551488876, + "learning_rate": 8.81977250166759e-06, + "loss": 0.5080621838569641, + "step": 8956 + }, + { + "epoch": 1.6550334330357659, + "grad_norm": 0.07618331909179688, + "learning_rate": 8.817791552520319e-06, + "loss": 0.7386259436607361, + "step": 8957 + }, + { + "epoch": 1.6552182097446617, + "grad_norm": 0.07988817989826202, + "learning_rate": 8.815810650421249e-06, + "loss": 0.5634803175926208, + "step": 8958 + }, + { + "epoch": 1.6554029864535575, + "grad_norm": 0.0562705397605896, + "learning_rate": 8.813829795449206e-06, + "loss": 0.3609471917152405, + "step": 8959 + }, + { + "epoch": 1.6555877631624534, + "grad_norm": 0.09556792676448822, + "learning_rate": 8.811848987683028e-06, + "loss": 0.6133294701576233, + "step": 8960 + }, + { + "epoch": 1.6557725398713492, + "grad_norm": 0.09546475857496262, + "learning_rate": 8.809868227201546e-06, + "loss": 0.7730337977409363, + "step": 8961 + }, + { + "epoch": 1.655957316580245, + "grad_norm": 0.06718320399522781, + "learning_rate": 8.807887514083581e-06, + "loss": 0.34548744559288025, + "step": 8962 + }, + { + "epoch": 1.6561420932891409, + "grad_norm": 0.07118339091539383, + "learning_rate": 8.805906848407964e-06, + "loss": 0.45795369148254395, + "step": 8963 + }, + { + "epoch": 1.6563268699980367, + "grad_norm": 0.06916230171918869, + "learning_rate": 8.80392623025352e-06, + "loss": 0.45204415917396545, + "step": 8964 + }, + { + "epoch": 1.6565116467069325, + "grad_norm": 0.07920881360769272, + "learning_rate": 8.801945659699067e-06, + "loss": 0.43370485305786133, + "step": 8965 + }, + { + "epoch": 1.6566964234158283, + "grad_norm": 0.08380249887704849, + "learning_rate": 8.799965136823432e-06, + "loss": 0.5766981244087219, + "step": 8966 + }, + { + "epoch": 1.6568812001247242, + "grad_norm": 0.09630127996206284, + "learning_rate": 8.79798466170543e-06, + "loss": 0.7253932952880859, + "step": 8967 + }, + { + "epoch": 1.65706597683362, + "grad_norm": 0.07304397970438004, + "learning_rate": 8.796004234423876e-06, + "loss": 0.5263710021972656, + "step": 8968 + }, + { + "epoch": 1.6572507535425158, + "grad_norm": 0.0875425785779953, + "learning_rate": 8.794023855057587e-06, + "loss": 0.5486422777175903, + "step": 8969 + }, + { + "epoch": 1.6574355302514117, + "grad_norm": 0.05754028633236885, + "learning_rate": 8.792043523685376e-06, + "loss": 0.35505831241607666, + "step": 8970 + }, + { + "epoch": 1.6576203069603075, + "grad_norm": 0.09025000035762787, + "learning_rate": 8.790063240386053e-06, + "loss": 0.7122763395309448, + "step": 8971 + }, + { + "epoch": 1.6578050836692035, + "grad_norm": 0.08361772447824478, + "learning_rate": 8.788083005238428e-06, + "loss": 0.5144809484481812, + "step": 8972 + }, + { + "epoch": 1.6579898603780994, + "grad_norm": 0.07230621576309204, + "learning_rate": 8.786102818321311e-06, + "loss": 0.47324246168136597, + "step": 8973 + }, + { + "epoch": 1.6581746370869952, + "grad_norm": 0.07307907938957214, + "learning_rate": 8.784122679713497e-06, + "loss": 0.4546387493610382, + "step": 8974 + }, + { + "epoch": 1.658359413795891, + "grad_norm": 0.07837999612092972, + "learning_rate": 8.782142589493805e-06, + "loss": 0.5022808313369751, + "step": 8975 + }, + { + "epoch": 1.6585441905047869, + "grad_norm": 0.09872845560312271, + "learning_rate": 8.780162547741022e-06, + "loss": 0.5782057046890259, + "step": 8976 + }, + { + "epoch": 1.6587289672136827, + "grad_norm": 0.09069226682186127, + "learning_rate": 8.778182554533952e-06, + "loss": 0.6260485053062439, + "step": 8977 + }, + { + "epoch": 1.6589137439225785, + "grad_norm": 0.08357825875282288, + "learning_rate": 8.776202609951398e-06, + "loss": 0.6425731778144836, + "step": 8978 + }, + { + "epoch": 1.6590985206314746, + "grad_norm": 0.07038763910531998, + "learning_rate": 8.77422271407215e-06, + "loss": 0.48733213543891907, + "step": 8979 + }, + { + "epoch": 1.6592832973403704, + "grad_norm": 0.0719500184059143, + "learning_rate": 8.772242866975e-06, + "loss": 0.44783756136894226, + "step": 8980 + }, + { + "epoch": 1.6594680740492662, + "grad_norm": 0.09439882636070251, + "learning_rate": 8.770263068738747e-06, + "loss": 0.595038652420044, + "step": 8981 + }, + { + "epoch": 1.659652850758162, + "grad_norm": 0.06088631972670555, + "learning_rate": 8.768283319442173e-06, + "loss": 0.4202379882335663, + "step": 8982 + }, + { + "epoch": 1.6598376274670579, + "grad_norm": 0.09072203934192657, + "learning_rate": 8.76630361916407e-06, + "loss": 0.5772908926010132, + "step": 8983 + }, + { + "epoch": 1.6600224041759537, + "grad_norm": 0.0732671394944191, + "learning_rate": 8.764323967983226e-06, + "loss": 0.4386714696884155, + "step": 8984 + }, + { + "epoch": 1.6602071808848495, + "grad_norm": 0.08566321432590485, + "learning_rate": 8.76234436597842e-06, + "loss": 0.5181201696395874, + "step": 8985 + }, + { + "epoch": 1.6603919575937454, + "grad_norm": 0.07344254106283188, + "learning_rate": 8.760364813228436e-06, + "loss": 0.5022342205047607, + "step": 8986 + }, + { + "epoch": 1.6605767343026412, + "grad_norm": 0.08056008815765381, + "learning_rate": 8.758385309812055e-06, + "loss": 0.6142107248306274, + "step": 8987 + }, + { + "epoch": 1.660761511011537, + "grad_norm": 0.08112746477127075, + "learning_rate": 8.756405855808053e-06, + "loss": 0.5439735651016235, + "step": 8988 + }, + { + "epoch": 1.6609462877204328, + "grad_norm": 0.08961106836795807, + "learning_rate": 8.754426451295207e-06, + "loss": 0.6286073327064514, + "step": 8989 + }, + { + "epoch": 1.6611310644293287, + "grad_norm": 0.07029113173484802, + "learning_rate": 8.752447096352295e-06, + "loss": 0.36603888869285583, + "step": 8990 + }, + { + "epoch": 1.6613158411382245, + "grad_norm": 0.10147112607955933, + "learning_rate": 8.750467791058081e-06, + "loss": 0.743262767791748, + "step": 8991 + }, + { + "epoch": 1.6615006178471203, + "grad_norm": 0.06966464221477509, + "learning_rate": 8.748488535491346e-06, + "loss": 0.48661547899246216, + "step": 8992 + }, + { + "epoch": 1.6616853945560162, + "grad_norm": 0.06567555665969849, + "learning_rate": 8.746509329730846e-06, + "loss": 0.43307769298553467, + "step": 8993 + }, + { + "epoch": 1.661870171264912, + "grad_norm": 0.08893869817256927, + "learning_rate": 8.74453017385535e-06, + "loss": 0.4946390688419342, + "step": 8994 + }, + { + "epoch": 1.6620549479738078, + "grad_norm": 0.07407506555318832, + "learning_rate": 8.742551067943632e-06, + "loss": 0.5503557324409485, + "step": 8995 + }, + { + "epoch": 1.6622397246827036, + "grad_norm": 0.10296279937028885, + "learning_rate": 8.740572012074445e-06, + "loss": 0.7889887094497681, + "step": 8996 + }, + { + "epoch": 1.6624245013915995, + "grad_norm": 0.08157563209533691, + "learning_rate": 8.73859300632655e-06, + "loss": 0.5192692875862122, + "step": 8997 + }, + { + "epoch": 1.6626092781004953, + "grad_norm": 0.08204268664121628, + "learning_rate": 8.73661405077871e-06, + "loss": 0.5423938035964966, + "step": 8998 + }, + { + "epoch": 1.6627940548093911, + "grad_norm": 0.07997702807188034, + "learning_rate": 8.734635145509676e-06, + "loss": 0.4689732789993286, + "step": 8999 + }, + { + "epoch": 1.662978831518287, + "grad_norm": 0.0857803151011467, + "learning_rate": 8.732656290598205e-06, + "loss": 0.3920707404613495, + "step": 9000 + }, + { + "epoch": 1.662978831518287, + "eval_loss": 0.5690487623214722, + "eval_runtime": 172.3601, + "eval_samples_per_second": 105.761, + "eval_steps_per_second": 13.222, + "step": 9000 + }, + { + "epoch": 1.663163608227183, + "grad_norm": 0.08445625007152557, + "learning_rate": 8.730677486123048e-06, + "loss": 0.4855717718601227, + "step": 9001 + }, + { + "epoch": 1.6633483849360788, + "grad_norm": 0.08252844959497452, + "learning_rate": 8.728698732162956e-06, + "loss": 0.5647783875465393, + "step": 9002 + }, + { + "epoch": 1.6635331616449747, + "grad_norm": 0.08127132058143616, + "learning_rate": 8.726720028796678e-06, + "loss": 0.5227984189987183, + "step": 9003 + }, + { + "epoch": 1.6637179383538705, + "grad_norm": 0.0665494054555893, + "learning_rate": 8.72474137610296e-06, + "loss": 0.41443580389022827, + "step": 9004 + }, + { + "epoch": 1.6639027150627663, + "grad_norm": 0.05049487203359604, + "learning_rate": 8.722762774160543e-06, + "loss": 0.3106989562511444, + "step": 9005 + }, + { + "epoch": 1.6640874917716622, + "grad_norm": 0.07193901389837265, + "learning_rate": 8.720784223048175e-06, + "loss": 0.43464213609695435, + "step": 9006 + }, + { + "epoch": 1.664272268480558, + "grad_norm": 0.09168123453855515, + "learning_rate": 8.71880572284459e-06, + "loss": 0.6453408002853394, + "step": 9007 + }, + { + "epoch": 1.664457045189454, + "grad_norm": 0.08098369836807251, + "learning_rate": 8.716827273628528e-06, + "loss": 0.5169015526771545, + "step": 9008 + }, + { + "epoch": 1.6646418218983499, + "grad_norm": 0.08499372750520706, + "learning_rate": 8.714848875478732e-06, + "loss": 0.5493677258491516, + "step": 9009 + }, + { + "epoch": 1.6648265986072457, + "grad_norm": 0.06951579451560974, + "learning_rate": 8.712870528473922e-06, + "loss": 0.46776798367500305, + "step": 9010 + }, + { + "epoch": 1.6650113753161415, + "grad_norm": 0.10221091657876968, + "learning_rate": 8.71089223269284e-06, + "loss": 0.7205289602279663, + "step": 9011 + }, + { + "epoch": 1.6651961520250373, + "grad_norm": 0.07168225944042206, + "learning_rate": 8.70891398821422e-06, + "loss": 0.44184955954551697, + "step": 9012 + }, + { + "epoch": 1.6653809287339332, + "grad_norm": 0.06919576227664948, + "learning_rate": 8.706935795116779e-06, + "loss": 0.47983217239379883, + "step": 9013 + }, + { + "epoch": 1.665565705442829, + "grad_norm": 0.08107378333806992, + "learning_rate": 8.704957653479245e-06, + "loss": 0.5134053230285645, + "step": 9014 + }, + { + "epoch": 1.6657504821517248, + "grad_norm": 0.06921125948429108, + "learning_rate": 8.702979563380352e-06, + "loss": 0.37715357542037964, + "step": 9015 + }, + { + "epoch": 1.6659352588606207, + "grad_norm": 0.0681595578789711, + "learning_rate": 8.70100152489881e-06, + "loss": 0.4451044797897339, + "step": 9016 + }, + { + "epoch": 1.6661200355695165, + "grad_norm": 0.07533681392669678, + "learning_rate": 8.69902353811334e-06, + "loss": 0.5605936646461487, + "step": 9017 + }, + { + "epoch": 1.6663048122784123, + "grad_norm": 0.08548444509506226, + "learning_rate": 8.697045603102673e-06, + "loss": 0.5757061839103699, + "step": 9018 + }, + { + "epoch": 1.6664895889873081, + "grad_norm": 0.0741836205124855, + "learning_rate": 8.695067719945505e-06, + "loss": 0.49671831727027893, + "step": 9019 + }, + { + "epoch": 1.666674365696204, + "grad_norm": 0.06897836923599243, + "learning_rate": 8.693089888720563e-06, + "loss": 0.39301538467407227, + "step": 9020 + }, + { + "epoch": 1.6668591424050998, + "grad_norm": 0.07714451104402542, + "learning_rate": 8.691112109506556e-06, + "loss": 0.4285025894641876, + "step": 9021 + }, + { + "epoch": 1.6670439191139956, + "grad_norm": 0.06746784597635269, + "learning_rate": 8.68913438238219e-06, + "loss": 0.37637728452682495, + "step": 9022 + }, + { + "epoch": 1.6672286958228915, + "grad_norm": 0.07533597201108932, + "learning_rate": 8.687156707426175e-06, + "loss": 0.5479862093925476, + "step": 9023 + }, + { + "epoch": 1.6674134725317873, + "grad_norm": 0.10356532037258148, + "learning_rate": 8.685179084717215e-06, + "loss": 0.754237949848175, + "step": 9024 + }, + { + "epoch": 1.6675982492406831, + "grad_norm": 0.08410041034221649, + "learning_rate": 8.683201514334013e-06, + "loss": 0.5808772444725037, + "step": 9025 + }, + { + "epoch": 1.667783025949579, + "grad_norm": 0.08972388505935669, + "learning_rate": 8.681223996355275e-06, + "loss": 0.6561397910118103, + "step": 9026 + }, + { + "epoch": 1.6679678026584748, + "grad_norm": 0.08572930097579956, + "learning_rate": 8.679246530859693e-06, + "loss": 0.7714314460754395, + "step": 9027 + }, + { + "epoch": 1.6681525793673706, + "grad_norm": 0.07372952252626419, + "learning_rate": 8.677269117925964e-06, + "loss": 0.45184555649757385, + "step": 9028 + }, + { + "epoch": 1.6683373560762664, + "grad_norm": 0.059556107968091965, + "learning_rate": 8.675291757632794e-06, + "loss": 0.32072627544403076, + "step": 9029 + }, + { + "epoch": 1.6685221327851625, + "grad_norm": 0.0691826194524765, + "learning_rate": 8.67331445005886e-06, + "loss": 0.42070019245147705, + "step": 9030 + }, + { + "epoch": 1.6687069094940583, + "grad_norm": 0.0846749022603035, + "learning_rate": 8.671337195282862e-06, + "loss": 0.5196872353553772, + "step": 9031 + }, + { + "epoch": 1.6688916862029541, + "grad_norm": 0.0776742622256279, + "learning_rate": 8.669359993383491e-06, + "loss": 0.6171072721481323, + "step": 9032 + }, + { + "epoch": 1.66907646291185, + "grad_norm": 0.09349127113819122, + "learning_rate": 8.667382844439424e-06, + "loss": 0.709235668182373, + "step": 9033 + }, + { + "epoch": 1.6692612396207458, + "grad_norm": 0.0716007873415947, + "learning_rate": 8.66540574852935e-06, + "loss": 0.45914164185523987, + "step": 9034 + }, + { + "epoch": 1.6694460163296416, + "grad_norm": 0.05829813703894615, + "learning_rate": 8.663428705731957e-06, + "loss": 0.36543506383895874, + "step": 9035 + }, + { + "epoch": 1.6696307930385375, + "grad_norm": 0.061826031655073166, + "learning_rate": 8.661451716125917e-06, + "loss": 0.35469627380371094, + "step": 9036 + }, + { + "epoch": 1.6698155697474333, + "grad_norm": 0.07189781963825226, + "learning_rate": 8.65947477978991e-06, + "loss": 0.38797852396965027, + "step": 9037 + }, + { + "epoch": 1.6700003464563293, + "grad_norm": 0.07448961585760117, + "learning_rate": 8.657497896802616e-06, + "loss": 0.49736249446868896, + "step": 9038 + }, + { + "epoch": 1.6701851231652252, + "grad_norm": 0.0732865110039711, + "learning_rate": 8.655521067242703e-06, + "loss": 0.41947612166404724, + "step": 9039 + }, + { + "epoch": 1.670369899874121, + "grad_norm": 0.08115722984075546, + "learning_rate": 8.653544291188846e-06, + "loss": 0.5365369319915771, + "step": 9040 + }, + { + "epoch": 1.6705546765830168, + "grad_norm": 0.07986792922019958, + "learning_rate": 8.651567568719713e-06, + "loss": 0.4790862500667572, + "step": 9041 + }, + { + "epoch": 1.6707394532919126, + "grad_norm": 0.08238638937473297, + "learning_rate": 8.649590899913972e-06, + "loss": 0.5070688128471375, + "step": 9042 + }, + { + "epoch": 1.6709242300008085, + "grad_norm": 0.08489910513162613, + "learning_rate": 8.64761428485029e-06, + "loss": 0.5985916256904602, + "step": 9043 + }, + { + "epoch": 1.6711090067097043, + "grad_norm": 0.08574743568897247, + "learning_rate": 8.645637723607326e-06, + "loss": 0.45888751745224, + "step": 9044 + }, + { + "epoch": 1.6712937834186001, + "grad_norm": 0.0878172218799591, + "learning_rate": 8.643661216263744e-06, + "loss": 0.5856870412826538, + "step": 9045 + }, + { + "epoch": 1.671478560127496, + "grad_norm": 0.07768607884645462, + "learning_rate": 8.641684762898203e-06, + "loss": 0.5130709409713745, + "step": 9046 + }, + { + "epoch": 1.6716633368363918, + "grad_norm": 0.07102810591459274, + "learning_rate": 8.639708363589358e-06, + "loss": 0.49262791872024536, + "step": 9047 + }, + { + "epoch": 1.6718481135452876, + "grad_norm": 0.0737321674823761, + "learning_rate": 8.637732018415865e-06, + "loss": 0.5094331502914429, + "step": 9048 + }, + { + "epoch": 1.6720328902541834, + "grad_norm": 0.070593923330307, + "learning_rate": 8.63575572745638e-06, + "loss": 0.45176151394844055, + "step": 9049 + }, + { + "epoch": 1.6722176669630793, + "grad_norm": 0.08042187243700027, + "learning_rate": 8.63377949078954e-06, + "loss": 0.4876258671283722, + "step": 9050 + }, + { + "epoch": 1.672402443671975, + "grad_norm": 0.06371616572141647, + "learning_rate": 8.631803308494005e-06, + "loss": 0.39658114314079285, + "step": 9051 + }, + { + "epoch": 1.672587220380871, + "grad_norm": 0.07558796554803848, + "learning_rate": 8.629827180648423e-06, + "loss": 0.5124465227127075, + "step": 9052 + }, + { + "epoch": 1.6727719970897668, + "grad_norm": 0.08541289716959, + "learning_rate": 8.627851107331426e-06, + "loss": 0.4782090485095978, + "step": 9053 + }, + { + "epoch": 1.6729567737986626, + "grad_norm": 0.0727166086435318, + "learning_rate": 8.625875088621662e-06, + "loss": 0.5052157640457153, + "step": 9054 + }, + { + "epoch": 1.6731415505075584, + "grad_norm": 0.09100605547428131, + "learning_rate": 8.623899124597777e-06, + "loss": 0.48215600848197937, + "step": 9055 + }, + { + "epoch": 1.6733263272164542, + "grad_norm": 0.10287399590015411, + "learning_rate": 8.621923215338397e-06, + "loss": 0.6642863154411316, + "step": 9056 + }, + { + "epoch": 1.67351110392535, + "grad_norm": 0.08276277035474777, + "learning_rate": 8.619947360922162e-06, + "loss": 0.5098517537117004, + "step": 9057 + }, + { + "epoch": 1.673695880634246, + "grad_norm": 0.0931604877114296, + "learning_rate": 8.617971561427705e-06, + "loss": 0.5521177053451538, + "step": 9058 + }, + { + "epoch": 1.6738806573431417, + "grad_norm": 0.0651213601231575, + "learning_rate": 8.615995816933655e-06, + "loss": 0.3899434506893158, + "step": 9059 + }, + { + "epoch": 1.6740654340520378, + "grad_norm": 0.07249141484498978, + "learning_rate": 8.614020127518642e-06, + "loss": 0.4754531979560852, + "step": 9060 + }, + { + "epoch": 1.6742502107609336, + "grad_norm": 0.10817329585552216, + "learning_rate": 8.61204449326129e-06, + "loss": 0.7253162264823914, + "step": 9061 + }, + { + "epoch": 1.6744349874698294, + "grad_norm": 0.09194529801607132, + "learning_rate": 8.610068914240227e-06, + "loss": 0.6241350769996643, + "step": 9062 + }, + { + "epoch": 1.6746197641787253, + "grad_norm": 0.09278565645217896, + "learning_rate": 8.608093390534074e-06, + "loss": 0.6508000493049622, + "step": 9063 + }, + { + "epoch": 1.674804540887621, + "grad_norm": 0.08632796257734299, + "learning_rate": 8.606117922221447e-06, + "loss": 0.5592050552368164, + "step": 9064 + }, + { + "epoch": 1.674989317596517, + "grad_norm": 0.07858537137508392, + "learning_rate": 8.604142509380967e-06, + "loss": 0.6045482754707336, + "step": 9065 + }, + { + "epoch": 1.6751740943054128, + "grad_norm": 0.07718654721975327, + "learning_rate": 8.602167152091247e-06, + "loss": 0.4939536154270172, + "step": 9066 + }, + { + "epoch": 1.6753588710143088, + "grad_norm": 0.0820227712392807, + "learning_rate": 8.600191850430901e-06, + "loss": 0.4896371364593506, + "step": 9067 + }, + { + "epoch": 1.6755436477232046, + "grad_norm": 0.08000557869672775, + "learning_rate": 8.59821660447854e-06, + "loss": 0.48228392004966736, + "step": 9068 + }, + { + "epoch": 1.6757284244321005, + "grad_norm": 0.07113906741142273, + "learning_rate": 8.596241414312776e-06, + "loss": 0.5333797931671143, + "step": 9069 + }, + { + "epoch": 1.6759132011409963, + "grad_norm": 0.08210809528827667, + "learning_rate": 8.594266280012206e-06, + "loss": 0.6667137145996094, + "step": 9070 + }, + { + "epoch": 1.6760979778498921, + "grad_norm": 0.07185039669275284, + "learning_rate": 8.592291201655446e-06, + "loss": 0.5223159193992615, + "step": 9071 + }, + { + "epoch": 1.676282754558788, + "grad_norm": 0.06653635203838348, + "learning_rate": 8.590316179321088e-06, + "loss": 0.38789844512939453, + "step": 9072 + }, + { + "epoch": 1.6764675312676838, + "grad_norm": 0.06960821896791458, + "learning_rate": 8.588341213087734e-06, + "loss": 0.5253861546516418, + "step": 9073 + }, + { + "epoch": 1.6766523079765796, + "grad_norm": 0.05516147240996361, + "learning_rate": 8.586366303033989e-06, + "loss": 0.3092251121997833, + "step": 9074 + }, + { + "epoch": 1.6768370846854754, + "grad_norm": 0.10345713794231415, + "learning_rate": 8.584391449238439e-06, + "loss": 0.6506808996200562, + "step": 9075 + }, + { + "epoch": 1.6770218613943713, + "grad_norm": 0.06929323077201843, + "learning_rate": 8.58241665177968e-06, + "loss": 0.4382984936237335, + "step": 9076 + }, + { + "epoch": 1.677206638103267, + "grad_norm": 0.0668279156088829, + "learning_rate": 8.580441910736305e-06, + "loss": 0.4805983901023865, + "step": 9077 + }, + { + "epoch": 1.677391414812163, + "grad_norm": 0.079178586602211, + "learning_rate": 8.5784672261869e-06, + "loss": 0.5188418626785278, + "step": 9078 + }, + { + "epoch": 1.6775761915210587, + "grad_norm": 0.07111608237028122, + "learning_rate": 8.57649259821005e-06, + "loss": 0.46800345182418823, + "step": 9079 + }, + { + "epoch": 1.6777609682299546, + "grad_norm": 0.07276073843240738, + "learning_rate": 8.574518026884345e-06, + "loss": 0.36915266513824463, + "step": 9080 + }, + { + "epoch": 1.6779457449388504, + "grad_norm": 0.07534977793693542, + "learning_rate": 8.57254351228836e-06, + "loss": 0.4622310400009155, + "step": 9081 + }, + { + "epoch": 1.6781305216477462, + "grad_norm": 0.06693067401647568, + "learning_rate": 8.570569054500676e-06, + "loss": 0.4623390734195709, + "step": 9082 + }, + { + "epoch": 1.678315298356642, + "grad_norm": 0.07008402049541473, + "learning_rate": 8.568594653599875e-06, + "loss": 0.3596046268939972, + "step": 9083 + }, + { + "epoch": 1.6785000750655379, + "grad_norm": 0.06442760676145554, + "learning_rate": 8.566620309664525e-06, + "loss": 0.4336586594581604, + "step": 9084 + }, + { + "epoch": 1.6786848517744337, + "grad_norm": 0.0926290825009346, + "learning_rate": 8.564646022773204e-06, + "loss": 0.6721182465553284, + "step": 9085 + }, + { + "epoch": 1.6788696284833295, + "grad_norm": 0.08583901077508926, + "learning_rate": 8.562671793004482e-06, + "loss": 0.4880591630935669, + "step": 9086 + }, + { + "epoch": 1.6790544051922254, + "grad_norm": 0.08112744241952896, + "learning_rate": 8.560697620436924e-06, + "loss": 0.559378445148468, + "step": 9087 + }, + { + "epoch": 1.6792391819011212, + "grad_norm": 0.06255873292684555, + "learning_rate": 8.5587235051491e-06, + "loss": 0.4209061861038208, + "step": 9088 + }, + { + "epoch": 1.6794239586100173, + "grad_norm": 0.07908415049314499, + "learning_rate": 8.55674944721957e-06, + "loss": 0.48501768708229065, + "step": 9089 + }, + { + "epoch": 1.679608735318913, + "grad_norm": 0.09763891249895096, + "learning_rate": 8.554775446726892e-06, + "loss": 0.6048413515090942, + "step": 9090 + }, + { + "epoch": 1.679793512027809, + "grad_norm": 0.08248091489076614, + "learning_rate": 8.552801503749638e-06, + "loss": 0.5712785720825195, + "step": 9091 + }, + { + "epoch": 1.6799782887367047, + "grad_norm": 0.09981803596019745, + "learning_rate": 8.550827618366352e-06, + "loss": 0.5898700952529907, + "step": 9092 + }, + { + "epoch": 1.6801630654456006, + "grad_norm": 0.07063844799995422, + "learning_rate": 8.54885379065559e-06, + "loss": 0.4967755675315857, + "step": 9093 + }, + { + "epoch": 1.6803478421544964, + "grad_norm": 0.05967190861701965, + "learning_rate": 8.546880020695913e-06, + "loss": 0.481548935174942, + "step": 9094 + }, + { + "epoch": 1.6805326188633922, + "grad_norm": 0.07140645384788513, + "learning_rate": 8.544906308565861e-06, + "loss": 0.5011531710624695, + "step": 9095 + }, + { + "epoch": 1.6807173955722883, + "grad_norm": 0.055560152977705, + "learning_rate": 8.542932654343987e-06, + "loss": 0.3030966520309448, + "step": 9096 + }, + { + "epoch": 1.680902172281184, + "grad_norm": 0.0700981393456459, + "learning_rate": 8.540959058108835e-06, + "loss": 0.47643208503723145, + "step": 9097 + }, + { + "epoch": 1.68108694899008, + "grad_norm": 0.0505668930709362, + "learning_rate": 8.538985519938947e-06, + "loss": 0.30652886629104614, + "step": 9098 + }, + { + "epoch": 1.6812717256989758, + "grad_norm": 0.07859101891517639, + "learning_rate": 8.537012039912864e-06, + "loss": 0.450094074010849, + "step": 9099 + }, + { + "epoch": 1.6814565024078716, + "grad_norm": 0.06869371980428696, + "learning_rate": 8.535038618109126e-06, + "loss": 0.36125314235687256, + "step": 9100 + }, + { + "epoch": 1.6816412791167674, + "grad_norm": 0.08045898377895355, + "learning_rate": 8.533065254606266e-06, + "loss": 0.5351243615150452, + "step": 9101 + }, + { + "epoch": 1.6818260558256632, + "grad_norm": 0.0728968009352684, + "learning_rate": 8.53109194948282e-06, + "loss": 0.4623984098434448, + "step": 9102 + }, + { + "epoch": 1.682010832534559, + "grad_norm": 0.06779941916465759, + "learning_rate": 8.529118702817321e-06, + "loss": 0.47451716661453247, + "step": 9103 + }, + { + "epoch": 1.682195609243455, + "grad_norm": 0.06614118814468384, + "learning_rate": 8.527145514688293e-06, + "loss": 0.4593086242675781, + "step": 9104 + }, + { + "epoch": 1.6823803859523507, + "grad_norm": 0.06442172080278397, + "learning_rate": 8.525172385174271e-06, + "loss": 0.31958237290382385, + "step": 9105 + }, + { + "epoch": 1.6825651626612466, + "grad_norm": 0.08403867483139038, + "learning_rate": 8.523199314353767e-06, + "loss": 0.5540264844894409, + "step": 9106 + }, + { + "epoch": 1.6827499393701424, + "grad_norm": 0.08076582103967667, + "learning_rate": 8.52122630230531e-06, + "loss": 0.6045275330543518, + "step": 9107 + }, + { + "epoch": 1.6829347160790382, + "grad_norm": 0.0629241019487381, + "learning_rate": 8.519253349107426e-06, + "loss": 0.43549901247024536, + "step": 9108 + }, + { + "epoch": 1.683119492787934, + "grad_norm": 0.06979610025882721, + "learning_rate": 8.51728045483862e-06, + "loss": 0.46166253089904785, + "step": 9109 + }, + { + "epoch": 1.6833042694968299, + "grad_norm": 0.09503830224275589, + "learning_rate": 8.515307619577415e-06, + "loss": 0.5249154567718506, + "step": 9110 + }, + { + "epoch": 1.6834890462057257, + "grad_norm": 0.10128334164619446, + "learning_rate": 8.513334843402325e-06, + "loss": 0.6709297299385071, + "step": 9111 + }, + { + "epoch": 1.6836738229146215, + "grad_norm": 0.08861662447452545, + "learning_rate": 8.511362126391853e-06, + "loss": 0.5401449203491211, + "step": 9112 + }, + { + "epoch": 1.6838585996235174, + "grad_norm": 0.06866878271102905, + "learning_rate": 8.509389468624509e-06, + "loss": 0.50966477394104, + "step": 9113 + }, + { + "epoch": 1.6840433763324132, + "grad_norm": 0.06207570061087608, + "learning_rate": 8.507416870178807e-06, + "loss": 0.4012282192707062, + "step": 9114 + }, + { + "epoch": 1.684228153041309, + "grad_norm": 0.0821419358253479, + "learning_rate": 8.50544433113324e-06, + "loss": 0.47681760787963867, + "step": 9115 + }, + { + "epoch": 1.6844129297502048, + "grad_norm": 0.0714946985244751, + "learning_rate": 8.503471851566313e-06, + "loss": 0.5043104887008667, + "step": 9116 + }, + { + "epoch": 1.6845977064591007, + "grad_norm": 0.0466858446598053, + "learning_rate": 8.501499431556526e-06, + "loss": 0.30423155426979065, + "step": 9117 + }, + { + "epoch": 1.6847824831679967, + "grad_norm": 0.07633737474679947, + "learning_rate": 8.499527071182371e-06, + "loss": 0.5213798880577087, + "step": 9118 + }, + { + "epoch": 1.6849672598768926, + "grad_norm": 0.07348579913377762, + "learning_rate": 8.497554770522346e-06, + "loss": 0.43600961565971375, + "step": 9119 + }, + { + "epoch": 1.6851520365857884, + "grad_norm": 0.07686792314052582, + "learning_rate": 8.49558252965494e-06, + "loss": 0.4652104377746582, + "step": 9120 + }, + { + "epoch": 1.6853368132946842, + "grad_norm": 0.07115445286035538, + "learning_rate": 8.493610348658641e-06, + "loss": 0.44471052289009094, + "step": 9121 + }, + { + "epoch": 1.68552159000358, + "grad_norm": 0.08396821469068527, + "learning_rate": 8.49163822761194e-06, + "loss": 0.5738164782524109, + "step": 9122 + }, + { + "epoch": 1.6857063667124759, + "grad_norm": 0.07715192437171936, + "learning_rate": 8.489666166593317e-06, + "loss": 0.4296110272407532, + "step": 9123 + }, + { + "epoch": 1.6858911434213717, + "grad_norm": 0.09075894951820374, + "learning_rate": 8.487694165681254e-06, + "loss": 0.6163529753684998, + "step": 9124 + }, + { + "epoch": 1.6860759201302675, + "grad_norm": 0.08199284225702286, + "learning_rate": 8.485722224954237e-06, + "loss": 0.5180081129074097, + "step": 9125 + }, + { + "epoch": 1.6862606968391636, + "grad_norm": 0.06685285270214081, + "learning_rate": 8.48375034449073e-06, + "loss": 0.36368563771247864, + "step": 9126 + }, + { + "epoch": 1.6864454735480594, + "grad_norm": 0.0648450031876564, + "learning_rate": 8.48177852436922e-06, + "loss": 0.34765690565109253, + "step": 9127 + }, + { + "epoch": 1.6866302502569552, + "grad_norm": 0.06646620482206345, + "learning_rate": 8.479806764668178e-06, + "loss": 0.4463668763637543, + "step": 9128 + }, + { + "epoch": 1.686815026965851, + "grad_norm": 0.06785228103399277, + "learning_rate": 8.477835065466065e-06, + "loss": 0.42032456398010254, + "step": 9129 + }, + { + "epoch": 1.686999803674747, + "grad_norm": 0.08265707641839981, + "learning_rate": 8.475863426841356e-06, + "loss": 0.6559526920318604, + "step": 9130 + }, + { + "epoch": 1.6871845803836427, + "grad_norm": 0.0722656324505806, + "learning_rate": 8.473891848872517e-06, + "loss": 0.4738600254058838, + "step": 9131 + }, + { + "epoch": 1.6873693570925385, + "grad_norm": 0.07115411013364792, + "learning_rate": 8.471920331638004e-06, + "loss": 0.3999943137168884, + "step": 9132 + }, + { + "epoch": 1.6875541338014344, + "grad_norm": 0.06939396262168884, + "learning_rate": 8.469948875216281e-06, + "loss": 0.42091649770736694, + "step": 9133 + }, + { + "epoch": 1.6877389105103302, + "grad_norm": 0.07250919193029404, + "learning_rate": 8.46797747968581e-06, + "loss": 0.4605046808719635, + "step": 9134 + }, + { + "epoch": 1.687923687219226, + "grad_norm": 0.07542398571968079, + "learning_rate": 8.466006145125038e-06, + "loss": 0.42565053701400757, + "step": 9135 + }, + { + "epoch": 1.6881084639281219, + "grad_norm": 0.08695194125175476, + "learning_rate": 8.464034871612426e-06, + "loss": 0.6231099367141724, + "step": 9136 + }, + { + "epoch": 1.6882932406370177, + "grad_norm": 0.08473557233810425, + "learning_rate": 8.462063659226419e-06, + "loss": 0.5846316814422607, + "step": 9137 + }, + { + "epoch": 1.6884780173459135, + "grad_norm": 0.07632940262556076, + "learning_rate": 8.460092508045465e-06, + "loss": 0.4694315493106842, + "step": 9138 + }, + { + "epoch": 1.6886627940548093, + "grad_norm": 0.07590238004922867, + "learning_rate": 8.458121418148013e-06, + "loss": 0.48024001717567444, + "step": 9139 + }, + { + "epoch": 1.6888475707637052, + "grad_norm": 0.07789980620145798, + "learning_rate": 8.456150389612503e-06, + "loss": 0.5048647522926331, + "step": 9140 + }, + { + "epoch": 1.689032347472601, + "grad_norm": 0.08727554976940155, + "learning_rate": 8.454179422517378e-06, + "loss": 0.5586506724357605, + "step": 9141 + }, + { + "epoch": 1.6892171241814968, + "grad_norm": 0.06976776570081711, + "learning_rate": 8.452208516941079e-06, + "loss": 0.3690648078918457, + "step": 9142 + }, + { + "epoch": 1.6894019008903927, + "grad_norm": 0.07651933282613754, + "learning_rate": 8.450237672962034e-06, + "loss": 0.580231785774231, + "step": 9143 + }, + { + "epoch": 1.6895866775992885, + "grad_norm": 0.0674920603632927, + "learning_rate": 8.448266890658683e-06, + "loss": 0.368477463722229, + "step": 9144 + }, + { + "epoch": 1.6897714543081843, + "grad_norm": 0.06983933597803116, + "learning_rate": 8.446296170109456e-06, + "loss": 0.4283688962459564, + "step": 9145 + }, + { + "epoch": 1.6899562310170801, + "grad_norm": 0.08610700815916061, + "learning_rate": 8.444325511392779e-06, + "loss": 0.5489993095397949, + "step": 9146 + }, + { + "epoch": 1.690141007725976, + "grad_norm": 0.08241380751132965, + "learning_rate": 8.442354914587079e-06, + "loss": 0.5156794190406799, + "step": 9147 + }, + { + "epoch": 1.690325784434872, + "grad_norm": 0.09464959055185318, + "learning_rate": 8.440384379770785e-06, + "loss": 0.7287381887435913, + "step": 9148 + }, + { + "epoch": 1.6905105611437679, + "grad_norm": 0.10081733018159866, + "learning_rate": 8.438413907022307e-06, + "loss": 0.624424397945404, + "step": 9149 + }, + { + "epoch": 1.6906953378526637, + "grad_norm": 0.07631852477788925, + "learning_rate": 8.436443496420071e-06, + "loss": 0.5279861092567444, + "step": 9150 + }, + { + "epoch": 1.6908801145615595, + "grad_norm": 0.07864688336849213, + "learning_rate": 8.434473148042497e-06, + "loss": 0.5660857558250427, + "step": 9151 + }, + { + "epoch": 1.6910648912704553, + "grad_norm": 0.06365436315536499, + "learning_rate": 8.432502861967991e-06, + "loss": 0.40238404273986816, + "step": 9152 + }, + { + "epoch": 1.6912496679793512, + "grad_norm": 0.06593908369541168, + "learning_rate": 8.430532638274966e-06, + "loss": 0.4057481586933136, + "step": 9153 + }, + { + "epoch": 1.691434444688247, + "grad_norm": 0.0649256780743599, + "learning_rate": 8.428562477041833e-06, + "loss": 0.3830692768096924, + "step": 9154 + }, + { + "epoch": 1.691619221397143, + "grad_norm": 0.07413194328546524, + "learning_rate": 8.426592378346995e-06, + "loss": 0.5539273023605347, + "step": 9155 + }, + { + "epoch": 1.6918039981060389, + "grad_norm": 0.07905510812997818, + "learning_rate": 8.42462234226886e-06, + "loss": 0.6951673626899719, + "step": 9156 + }, + { + "epoch": 1.6919887748149347, + "grad_norm": 0.08952132612466812, + "learning_rate": 8.422652368885825e-06, + "loss": 0.5128817558288574, + "step": 9157 + }, + { + "epoch": 1.6921735515238305, + "grad_norm": 0.07787581533193588, + "learning_rate": 8.420682458276291e-06, + "loss": 0.43147289752960205, + "step": 9158 + }, + { + "epoch": 1.6923583282327264, + "grad_norm": 0.07305676490068436, + "learning_rate": 8.418712610518657e-06, + "loss": 0.4102160334587097, + "step": 9159 + }, + { + "epoch": 1.6925431049416222, + "grad_norm": 0.06288999319076538, + "learning_rate": 8.41674282569131e-06, + "loss": 0.4454600214958191, + "step": 9160 + }, + { + "epoch": 1.692727881650518, + "grad_norm": 0.07740910351276398, + "learning_rate": 8.414773103872647e-06, + "loss": 0.5403621196746826, + "step": 9161 + }, + { + "epoch": 1.6929126583594138, + "grad_norm": 0.08491584658622742, + "learning_rate": 8.412803445141055e-06, + "loss": 0.5626220703125, + "step": 9162 + }, + { + "epoch": 1.6930974350683097, + "grad_norm": 0.07782825082540512, + "learning_rate": 8.410833849574921e-06, + "loss": 0.5752198696136475, + "step": 9163 + }, + { + "epoch": 1.6932822117772055, + "grad_norm": 0.07523659616708755, + "learning_rate": 8.408864317252626e-06, + "loss": 0.5045156478881836, + "step": 9164 + }, + { + "epoch": 1.6934669884861013, + "grad_norm": 0.0657854899764061, + "learning_rate": 8.406894848252555e-06, + "loss": 0.4462105333805084, + "step": 9165 + }, + { + "epoch": 1.6936517651949972, + "grad_norm": 0.06609152257442474, + "learning_rate": 8.404925442653084e-06, + "loss": 0.4460630714893341, + "step": 9166 + }, + { + "epoch": 1.693836541903893, + "grad_norm": 0.06646375358104706, + "learning_rate": 8.40295610053259e-06, + "loss": 0.3748570382595062, + "step": 9167 + }, + { + "epoch": 1.6940213186127888, + "grad_norm": 0.08455774188041687, + "learning_rate": 8.400986821969449e-06, + "loss": 0.5032517910003662, + "step": 9168 + }, + { + "epoch": 1.6942060953216846, + "grad_norm": 0.06838744878768921, + "learning_rate": 8.399017607042025e-06, + "loss": 0.42218753695487976, + "step": 9169 + }, + { + "epoch": 1.6943908720305805, + "grad_norm": 0.09815676510334015, + "learning_rate": 8.397048455828698e-06, + "loss": 0.6644664406776428, + "step": 9170 + }, + { + "epoch": 1.6945756487394763, + "grad_norm": 0.06904610246419907, + "learning_rate": 8.395079368407822e-06, + "loss": 0.4369781017303467, + "step": 9171 + }, + { + "epoch": 1.6947604254483721, + "grad_norm": 0.07216240465641022, + "learning_rate": 8.393110344857767e-06, + "loss": 0.5592622756958008, + "step": 9172 + }, + { + "epoch": 1.694945202157268, + "grad_norm": 0.08995957672595978, + "learning_rate": 8.391141385256894e-06, + "loss": 0.5972262024879456, + "step": 9173 + }, + { + "epoch": 1.6951299788661638, + "grad_norm": 0.06630527228116989, + "learning_rate": 8.38917248968356e-06, + "loss": 0.34516778588294983, + "step": 9174 + }, + { + "epoch": 1.6953147555750596, + "grad_norm": 0.07681643217802048, + "learning_rate": 8.38720365821612e-06, + "loss": 0.4759213924407959, + "step": 9175 + }, + { + "epoch": 1.6954995322839554, + "grad_norm": 0.08156821131706238, + "learning_rate": 8.38523489093293e-06, + "loss": 0.42252761125564575, + "step": 9176 + }, + { + "epoch": 1.6956843089928515, + "grad_norm": 0.11992143839597702, + "learning_rate": 8.383266187912338e-06, + "loss": 0.8242896795272827, + "step": 9177 + }, + { + "epoch": 1.6958690857017473, + "grad_norm": 0.07057183980941772, + "learning_rate": 8.381297549232696e-06, + "loss": 0.4482508897781372, + "step": 9178 + }, + { + "epoch": 1.6960538624106432, + "grad_norm": 0.06995099782943726, + "learning_rate": 8.379328974972347e-06, + "loss": 0.3609004616737366, + "step": 9179 + }, + { + "epoch": 1.696238639119539, + "grad_norm": 0.0819055363535881, + "learning_rate": 8.377360465209632e-06, + "loss": 0.5192528367042542, + "step": 9180 + }, + { + "epoch": 1.6964234158284348, + "grad_norm": 0.06376343965530396, + "learning_rate": 8.375392020022894e-06, + "loss": 0.3223073482513428, + "step": 9181 + }, + { + "epoch": 1.6966081925373306, + "grad_norm": 0.09627628326416016, + "learning_rate": 8.373423639490474e-06, + "loss": 0.6265350580215454, + "step": 9182 + }, + { + "epoch": 1.6967929692462265, + "grad_norm": 0.08493668586015701, + "learning_rate": 8.371455323690701e-06, + "loss": 0.6294567584991455, + "step": 9183 + }, + { + "epoch": 1.6969777459551225, + "grad_norm": 0.07084629684686661, + "learning_rate": 8.369487072701911e-06, + "loss": 0.4158245623111725, + "step": 9184 + }, + { + "epoch": 1.6971625226640183, + "grad_norm": 0.1010499894618988, + "learning_rate": 8.367518886602439e-06, + "loss": 0.6740158200263977, + "step": 9185 + }, + { + "epoch": 1.6973472993729142, + "grad_norm": 0.08781524747610092, + "learning_rate": 8.365550765470603e-06, + "loss": 0.5869169235229492, + "step": 9186 + }, + { + "epoch": 1.69753207608181, + "grad_norm": 0.08365757763385773, + "learning_rate": 8.363582709384738e-06, + "loss": 0.5274476408958435, + "step": 9187 + }, + { + "epoch": 1.6977168527907058, + "grad_norm": 0.0663229301571846, + "learning_rate": 8.361614718423157e-06, + "loss": 0.3971540629863739, + "step": 9188 + }, + { + "epoch": 1.6979016294996017, + "grad_norm": 0.0671120434999466, + "learning_rate": 8.359646792664182e-06, + "loss": 0.5282134413719177, + "step": 9189 + }, + { + "epoch": 1.6980864062084975, + "grad_norm": 0.08804799616336823, + "learning_rate": 8.357678932186141e-06, + "loss": 0.5921247005462646, + "step": 9190 + }, + { + "epoch": 1.6982711829173933, + "grad_norm": 0.08442988246679306, + "learning_rate": 8.355711137067334e-06, + "loss": 0.6660032272338867, + "step": 9191 + }, + { + "epoch": 1.6984559596262891, + "grad_norm": 0.08538249880075455, + "learning_rate": 8.35374340738608e-06, + "loss": 0.49038660526275635, + "step": 9192 + }, + { + "epoch": 1.698640736335185, + "grad_norm": 0.06832028180360794, + "learning_rate": 8.351775743220691e-06, + "loss": 0.4274335205554962, + "step": 9193 + }, + { + "epoch": 1.6988255130440808, + "grad_norm": 0.07933923602104187, + "learning_rate": 8.349808144649468e-06, + "loss": 0.517008364200592, + "step": 9194 + }, + { + "epoch": 1.6990102897529766, + "grad_norm": 0.06587305665016174, + "learning_rate": 8.347840611750718e-06, + "loss": 0.4429994821548462, + "step": 9195 + }, + { + "epoch": 1.6991950664618725, + "grad_norm": 0.0778161957859993, + "learning_rate": 8.345873144602743e-06, + "loss": 0.4726182818412781, + "step": 9196 + }, + { + "epoch": 1.6993798431707683, + "grad_norm": 0.07775843143463135, + "learning_rate": 8.343905743283842e-06, + "loss": 0.5512153506278992, + "step": 9197 + }, + { + "epoch": 1.6995646198796641, + "grad_norm": 0.08814883232116699, + "learning_rate": 8.34193840787231e-06, + "loss": 0.5667763352394104, + "step": 9198 + }, + { + "epoch": 1.69974939658856, + "grad_norm": 0.09019384533166885, + "learning_rate": 8.339971138446445e-06, + "loss": 0.5292229056358337, + "step": 9199 + }, + { + "epoch": 1.6999341732974558, + "grad_norm": 0.06382526457309723, + "learning_rate": 8.338003935084531e-06, + "loss": 0.40799641609191895, + "step": 9200 + }, + { + "epoch": 1.7001189500063516, + "grad_norm": 0.08602697402238846, + "learning_rate": 8.336036797864866e-06, + "loss": 0.6295297145843506, + "step": 9201 + }, + { + "epoch": 1.7003037267152474, + "grad_norm": 0.059395682066679, + "learning_rate": 8.334069726865727e-06, + "loss": 0.3257017433643341, + "step": 9202 + }, + { + "epoch": 1.7004885034241433, + "grad_norm": 0.0641443058848381, + "learning_rate": 8.3321027221654e-06, + "loss": 0.3689802885055542, + "step": 9203 + }, + { + "epoch": 1.700673280133039, + "grad_norm": 0.07442375272512436, + "learning_rate": 8.330135783842171e-06, + "loss": 0.4756847321987152, + "step": 9204 + }, + { + "epoch": 1.700858056841935, + "grad_norm": 0.09624522924423218, + "learning_rate": 8.328168911974308e-06, + "loss": 0.684943437576294, + "step": 9205 + }, + { + "epoch": 1.701042833550831, + "grad_norm": 0.06564196199178696, + "learning_rate": 8.326202106640093e-06, + "loss": 0.4028245806694031, + "step": 9206 + }, + { + "epoch": 1.7012276102597268, + "grad_norm": 0.07290355116128922, + "learning_rate": 8.324235367917802e-06, + "loss": 0.4737251400947571, + "step": 9207 + }, + { + "epoch": 1.7014123869686226, + "grad_norm": 0.09687570482492447, + "learning_rate": 8.322268695885697e-06, + "loss": 0.6028020977973938, + "step": 9208 + }, + { + "epoch": 1.7015971636775185, + "grad_norm": 0.06902943551540375, + "learning_rate": 8.320302090622045e-06, + "loss": 0.40670400857925415, + "step": 9209 + }, + { + "epoch": 1.7017819403864143, + "grad_norm": 0.07954740524291992, + "learning_rate": 8.318335552205124e-06, + "loss": 0.5146956443786621, + "step": 9210 + }, + { + "epoch": 1.70196671709531, + "grad_norm": 0.07294806092977524, + "learning_rate": 8.31636908071318e-06, + "loss": 0.4044404923915863, + "step": 9211 + }, + { + "epoch": 1.702151493804206, + "grad_norm": 0.0694422796368599, + "learning_rate": 8.31440267622448e-06, + "loss": 0.48178523778915405, + "step": 9212 + }, + { + "epoch": 1.7023362705131018, + "grad_norm": 0.08079212158918381, + "learning_rate": 8.312436338817282e-06, + "loss": 0.4246774911880493, + "step": 9213 + }, + { + "epoch": 1.7025210472219978, + "grad_norm": 0.08284983783960342, + "learning_rate": 8.310470068569835e-06, + "loss": 0.6177600026130676, + "step": 9214 + }, + { + "epoch": 1.7027058239308936, + "grad_norm": 0.08616185933351517, + "learning_rate": 8.308503865560395e-06, + "loss": 0.5661207437515259, + "step": 9215 + }, + { + "epoch": 1.7028906006397895, + "grad_norm": 0.06548822671175003, + "learning_rate": 8.306537729867212e-06, + "loss": 0.5266295671463013, + "step": 9216 + }, + { + "epoch": 1.7030753773486853, + "grad_norm": 0.05171108990907669, + "learning_rate": 8.304571661568526e-06, + "loss": 0.3543473482131958, + "step": 9217 + }, + { + "epoch": 1.7032601540575811, + "grad_norm": 0.07920978218317032, + "learning_rate": 8.302605660742585e-06, + "loss": 0.5950316786766052, + "step": 9218 + }, + { + "epoch": 1.703444930766477, + "grad_norm": 0.07471512258052826, + "learning_rate": 8.300639727467626e-06, + "loss": 0.4885013699531555, + "step": 9219 + }, + { + "epoch": 1.7036297074753728, + "grad_norm": 0.08602702617645264, + "learning_rate": 8.29867386182189e-06, + "loss": 0.6973574757575989, + "step": 9220 + }, + { + "epoch": 1.7038144841842686, + "grad_norm": 0.07249139994382858, + "learning_rate": 8.296708063883614e-06, + "loss": 0.46478256583213806, + "step": 9221 + }, + { + "epoch": 1.7039992608931644, + "grad_norm": 0.09790276736021042, + "learning_rate": 8.294742333731026e-06, + "loss": 0.6673256158828735, + "step": 9222 + }, + { + "epoch": 1.7041840376020603, + "grad_norm": 0.08677719533443451, + "learning_rate": 8.292776671442356e-06, + "loss": 0.6140323281288147, + "step": 9223 + }, + { + "epoch": 1.704368814310956, + "grad_norm": 0.07667604833841324, + "learning_rate": 8.29081107709584e-06, + "loss": 0.4562872648239136, + "step": 9224 + }, + { + "epoch": 1.704553591019852, + "grad_norm": 0.09175780415534973, + "learning_rate": 8.288845550769688e-06, + "loss": 0.5294307470321655, + "step": 9225 + }, + { + "epoch": 1.7047383677287478, + "grad_norm": 0.07180283963680267, + "learning_rate": 8.286880092542131e-06, + "loss": 0.4086364209651947, + "step": 9226 + }, + { + "epoch": 1.7049231444376436, + "grad_norm": 0.06642324477434158, + "learning_rate": 8.284914702491392e-06, + "loss": 0.33012962341308594, + "step": 9227 + }, + { + "epoch": 1.7051079211465394, + "grad_norm": 0.0625172108411789, + "learning_rate": 8.282949380695679e-06, + "loss": 0.3410041332244873, + "step": 9228 + }, + { + "epoch": 1.7052926978554352, + "grad_norm": 0.09009237587451935, + "learning_rate": 8.280984127233204e-06, + "loss": 0.5375388264656067, + "step": 9229 + }, + { + "epoch": 1.705477474564331, + "grad_norm": 0.08445901423692703, + "learning_rate": 8.27901894218219e-06, + "loss": 0.42762553691864014, + "step": 9230 + }, + { + "epoch": 1.705662251273227, + "grad_norm": 0.0737801045179367, + "learning_rate": 8.277053825620836e-06, + "loss": 0.4096711575984955, + "step": 9231 + }, + { + "epoch": 1.7058470279821227, + "grad_norm": 0.08339770138263702, + "learning_rate": 8.275088777627346e-06, + "loss": 0.44599124789237976, + "step": 9232 + }, + { + "epoch": 1.7060318046910186, + "grad_norm": 0.06168259680271149, + "learning_rate": 8.27312379827993e-06, + "loss": 0.43915095925331116, + "step": 9233 + }, + { + "epoch": 1.7062165813999144, + "grad_norm": 0.06330515444278717, + "learning_rate": 8.271158887656781e-06, + "loss": 0.4240843951702118, + "step": 9234 + }, + { + "epoch": 1.7064013581088104, + "grad_norm": 0.07995455712080002, + "learning_rate": 8.269194045836103e-06, + "loss": 0.5356221199035645, + "step": 9235 + }, + { + "epoch": 1.7065861348177063, + "grad_norm": 0.07991659641265869, + "learning_rate": 8.267229272896083e-06, + "loss": 0.47236770391464233, + "step": 9236 + }, + { + "epoch": 1.706770911526602, + "grad_norm": 0.07582999765872955, + "learning_rate": 8.265264568914917e-06, + "loss": 0.43814337253570557, + "step": 9237 + }, + { + "epoch": 1.706955688235498, + "grad_norm": 0.08043278008699417, + "learning_rate": 8.263299933970798e-06, + "loss": 0.4860098659992218, + "step": 9238 + }, + { + "epoch": 1.7071404649443938, + "grad_norm": 0.0776016041636467, + "learning_rate": 8.261335368141904e-06, + "loss": 0.419701486825943, + "step": 9239 + }, + { + "epoch": 1.7073252416532896, + "grad_norm": 0.08165136724710464, + "learning_rate": 8.259370871506423e-06, + "loss": 0.5630422830581665, + "step": 9240 + }, + { + "epoch": 1.7075100183621854, + "grad_norm": 0.09332025796175003, + "learning_rate": 8.257406444142539e-06, + "loss": 0.548369824886322, + "step": 9241 + }, + { + "epoch": 1.7076947950710812, + "grad_norm": 0.06935670971870422, + "learning_rate": 8.255442086128423e-06, + "loss": 0.3939349949359894, + "step": 9242 + }, + { + "epoch": 1.7078795717799773, + "grad_norm": 0.06551147997379303, + "learning_rate": 8.253477797542256e-06, + "loss": 0.41584062576293945, + "step": 9243 + }, + { + "epoch": 1.7080643484888731, + "grad_norm": 0.07558514177799225, + "learning_rate": 8.251513578462211e-06, + "loss": 0.4720218777656555, + "step": 9244 + }, + { + "epoch": 1.708249125197769, + "grad_norm": 0.08111558854579926, + "learning_rate": 8.249549428966448e-06, + "loss": 0.5089553594589233, + "step": 9245 + }, + { + "epoch": 1.7084339019066648, + "grad_norm": 0.08173785358667374, + "learning_rate": 8.247585349133145e-06, + "loss": 0.5933139324188232, + "step": 9246 + }, + { + "epoch": 1.7086186786155606, + "grad_norm": 0.08025600761175156, + "learning_rate": 8.245621339040467e-06, + "loss": 0.4453078806400299, + "step": 9247 + }, + { + "epoch": 1.7088034553244564, + "grad_norm": 0.09482229501008987, + "learning_rate": 8.243657398766565e-06, + "loss": 0.5188925862312317, + "step": 9248 + }, + { + "epoch": 1.7089882320333523, + "grad_norm": 0.10804635286331177, + "learning_rate": 8.241693528389603e-06, + "loss": 0.682178258895874, + "step": 9249 + }, + { + "epoch": 1.709173008742248, + "grad_norm": 0.09234584122896194, + "learning_rate": 8.239729727987745e-06, + "loss": 0.5333462953567505, + "step": 9250 + }, + { + "epoch": 1.709357785451144, + "grad_norm": 0.0879039317369461, + "learning_rate": 8.23776599763913e-06, + "loss": 0.49046385288238525, + "step": 9251 + }, + { + "epoch": 1.7095425621600397, + "grad_norm": 0.08815449476242065, + "learning_rate": 8.23580233742192e-06, + "loss": 0.5991628766059875, + "step": 9252 + }, + { + "epoch": 1.7097273388689356, + "grad_norm": 0.07602617889642715, + "learning_rate": 8.23383874741425e-06, + "loss": 0.48946478962898254, + "step": 9253 + }, + { + "epoch": 1.7099121155778314, + "grad_norm": 0.07960982620716095, + "learning_rate": 8.231875227694277e-06, + "loss": 0.4935239255428314, + "step": 9254 + }, + { + "epoch": 1.7100968922867272, + "grad_norm": 0.07796081900596619, + "learning_rate": 8.22991177834014e-06, + "loss": 0.5628326535224915, + "step": 9255 + }, + { + "epoch": 1.710281668995623, + "grad_norm": 0.08486931771039963, + "learning_rate": 8.227948399429973e-06, + "loss": 0.5364896655082703, + "step": 9256 + }, + { + "epoch": 1.710466445704519, + "grad_norm": 0.0651976615190506, + "learning_rate": 8.225985091041914e-06, + "loss": 0.41608723998069763, + "step": 9257 + }, + { + "epoch": 1.7106512224134147, + "grad_norm": 0.09099406003952026, + "learning_rate": 8.224021853254103e-06, + "loss": 0.5208678841590881, + "step": 9258 + }, + { + "epoch": 1.7108359991223105, + "grad_norm": 0.06338699907064438, + "learning_rate": 8.222058686144664e-06, + "loss": 0.36156728863716125, + "step": 9259 + }, + { + "epoch": 1.7110207758312064, + "grad_norm": 0.08680493384599686, + "learning_rate": 8.220095589791725e-06, + "loss": 0.5534539818763733, + "step": 9260 + }, + { + "epoch": 1.7112055525401022, + "grad_norm": 0.07716375589370728, + "learning_rate": 8.218132564273415e-06, + "loss": 0.564572811126709, + "step": 9261 + }, + { + "epoch": 1.711390329248998, + "grad_norm": 0.06359605491161346, + "learning_rate": 8.216169609667854e-06, + "loss": 0.4242609739303589, + "step": 9262 + }, + { + "epoch": 1.7115751059578939, + "grad_norm": 0.08185631781816483, + "learning_rate": 8.21420672605316e-06, + "loss": 0.48531660437583923, + "step": 9263 + }, + { + "epoch": 1.7117598826667897, + "grad_norm": 0.06824252009391785, + "learning_rate": 8.212243913507456e-06, + "loss": 0.49250736832618713, + "step": 9264 + }, + { + "epoch": 1.7119446593756857, + "grad_norm": 0.06700660288333893, + "learning_rate": 8.210281172108844e-06, + "loss": 0.414394348859787, + "step": 9265 + }, + { + "epoch": 1.7121294360845816, + "grad_norm": 0.085497185587883, + "learning_rate": 8.208318501935451e-06, + "loss": 0.6617801189422607, + "step": 9266 + }, + { + "epoch": 1.7123142127934774, + "grad_norm": 0.07121191173791885, + "learning_rate": 8.20635590306537e-06, + "loss": 0.4714220464229584, + "step": 9267 + }, + { + "epoch": 1.7124989895023732, + "grad_norm": 0.0877823531627655, + "learning_rate": 8.204393375576713e-06, + "loss": 0.5986184477806091, + "step": 9268 + }, + { + "epoch": 1.712683766211269, + "grad_norm": 0.07633073627948761, + "learning_rate": 8.202430919547584e-06, + "loss": 0.48237112164497375, + "step": 9269 + }, + { + "epoch": 1.7128685429201649, + "grad_norm": 0.0636589303612709, + "learning_rate": 8.200468535056076e-06, + "loss": 0.42352762818336487, + "step": 9270 + }, + { + "epoch": 1.7130533196290607, + "grad_norm": 0.08576313406229019, + "learning_rate": 8.198506222180294e-06, + "loss": 0.6194055676460266, + "step": 9271 + }, + { + "epoch": 1.7132380963379568, + "grad_norm": 0.07260756194591522, + "learning_rate": 8.196543980998328e-06, + "loss": 0.4961593747138977, + "step": 9272 + }, + { + "epoch": 1.7134228730468526, + "grad_norm": 0.07460932433605194, + "learning_rate": 8.194581811588268e-06, + "loss": 0.6499295234680176, + "step": 9273 + }, + { + "epoch": 1.7136076497557484, + "grad_norm": 0.07087811827659607, + "learning_rate": 8.192619714028202e-06, + "loss": 0.5215992331504822, + "step": 9274 + }, + { + "epoch": 1.7137924264646442, + "grad_norm": 0.06718888133764267, + "learning_rate": 8.190657688396223e-06, + "loss": 0.36170563101768494, + "step": 9275 + }, + { + "epoch": 1.71397720317354, + "grad_norm": 0.09156475961208344, + "learning_rate": 8.1886957347704e-06, + "loss": 0.5560495853424072, + "step": 9276 + }, + { + "epoch": 1.714161979882436, + "grad_norm": 0.07081842422485352, + "learning_rate": 8.186733853228823e-06, + "loss": 0.47347983717918396, + "step": 9277 + }, + { + "epoch": 1.7143467565913317, + "grad_norm": 0.06800918281078339, + "learning_rate": 8.184772043849568e-06, + "loss": 0.418511301279068, + "step": 9278 + }, + { + "epoch": 1.7145315333002276, + "grad_norm": 0.08380264043807983, + "learning_rate": 8.182810306710703e-06, + "loss": 0.5594571828842163, + "step": 9279 + }, + { + "epoch": 1.7147163100091234, + "grad_norm": 0.104112409055233, + "learning_rate": 8.180848641890301e-06, + "loss": 0.7429915070533752, + "step": 9280 + }, + { + "epoch": 1.7149010867180192, + "grad_norm": 0.08604252338409424, + "learning_rate": 8.178887049466438e-06, + "loss": 0.5117092132568359, + "step": 9281 + }, + { + "epoch": 1.715085863426915, + "grad_norm": 0.07407425343990326, + "learning_rate": 8.176925529517168e-06, + "loss": 0.4131082594394684, + "step": 9282 + }, + { + "epoch": 1.7152706401358109, + "grad_norm": 0.09201198071241379, + "learning_rate": 8.174964082120563e-06, + "loss": 0.4691229462623596, + "step": 9283 + }, + { + "epoch": 1.7154554168447067, + "grad_norm": 0.07233555614948273, + "learning_rate": 8.173002707354673e-06, + "loss": 0.47029054164886475, + "step": 9284 + }, + { + "epoch": 1.7156401935536025, + "grad_norm": 0.08242167532444, + "learning_rate": 8.171041405297558e-06, + "loss": 0.4827267527580261, + "step": 9285 + }, + { + "epoch": 1.7158249702624984, + "grad_norm": 0.10752084106206894, + "learning_rate": 8.16908017602728e-06, + "loss": 0.7165725827217102, + "step": 9286 + }, + { + "epoch": 1.7160097469713942, + "grad_norm": 0.06484485417604446, + "learning_rate": 8.167119019621878e-06, + "loss": 0.3245079517364502, + "step": 9287 + }, + { + "epoch": 1.71619452368029, + "grad_norm": 0.07973135262727737, + "learning_rate": 8.165157936159404e-06, + "loss": 0.5481160879135132, + "step": 9288 + }, + { + "epoch": 1.7163793003891858, + "grad_norm": 0.08740073442459106, + "learning_rate": 8.163196925717906e-06, + "loss": 0.46844935417175293, + "step": 9289 + }, + { + "epoch": 1.7165640770980817, + "grad_norm": 0.07873176783323288, + "learning_rate": 8.16123598837542e-06, + "loss": 0.551338791847229, + "step": 9290 + }, + { + "epoch": 1.7167488538069775, + "grad_norm": 0.06615818291902542, + "learning_rate": 8.159275124209992e-06, + "loss": 0.3472258448600769, + "step": 9291 + }, + { + "epoch": 1.7169336305158733, + "grad_norm": 0.06066036969423294, + "learning_rate": 8.157314333299656e-06, + "loss": 0.376941978931427, + "step": 9292 + }, + { + "epoch": 1.7171184072247692, + "grad_norm": 0.06927074491977692, + "learning_rate": 8.155353615722442e-06, + "loss": 0.4731561541557312, + "step": 9293 + }, + { + "epoch": 1.7173031839336652, + "grad_norm": 0.0620710514485836, + "learning_rate": 8.153392971556384e-06, + "loss": 0.4183216392993927, + "step": 9294 + }, + { + "epoch": 1.717487960642561, + "grad_norm": 0.08235717564821243, + "learning_rate": 8.151432400879508e-06, + "loss": 0.6418178677558899, + "step": 9295 + }, + { + "epoch": 1.7176727373514569, + "grad_norm": 0.0678219273686409, + "learning_rate": 8.14947190376984e-06, + "loss": 0.43770119547843933, + "step": 9296 + }, + { + "epoch": 1.7178575140603527, + "grad_norm": 0.04814592003822327, + "learning_rate": 8.147511480305399e-06, + "loss": 0.2407323718070984, + "step": 9297 + }, + { + "epoch": 1.7180422907692485, + "grad_norm": 0.09880015254020691, + "learning_rate": 8.14555113056421e-06, + "loss": 0.5883893966674805, + "step": 9298 + }, + { + "epoch": 1.7182270674781444, + "grad_norm": 0.08084731549024582, + "learning_rate": 8.143590854624279e-06, + "loss": 0.5978436470031738, + "step": 9299 + }, + { + "epoch": 1.7184118441870402, + "grad_norm": 0.07525096833705902, + "learning_rate": 8.141630652563627e-06, + "loss": 0.5851767063140869, + "step": 9300 + }, + { + "epoch": 1.7185966208959362, + "grad_norm": 0.08009577542543411, + "learning_rate": 8.139670524460259e-06, + "loss": 0.661676824092865, + "step": 9301 + }, + { + "epoch": 1.718781397604832, + "grad_norm": 0.06729946285486221, + "learning_rate": 8.137710470392182e-06, + "loss": 0.41329067945480347, + "step": 9302 + }, + { + "epoch": 1.718966174313728, + "grad_norm": 0.06843673437833786, + "learning_rate": 8.135750490437409e-06, + "loss": 0.41551798582077026, + "step": 9303 + }, + { + "epoch": 1.7191509510226237, + "grad_norm": 0.08220253139734268, + "learning_rate": 8.133790584673929e-06, + "loss": 0.5747065544128418, + "step": 9304 + }, + { + "epoch": 1.7193357277315195, + "grad_norm": 0.1003865972161293, + "learning_rate": 8.131830753179743e-06, + "loss": 0.6246880292892456, + "step": 9305 + }, + { + "epoch": 1.7195205044404154, + "grad_norm": 0.09194488078355789, + "learning_rate": 8.129870996032854e-06, + "loss": 0.5484771728515625, + "step": 9306 + }, + { + "epoch": 1.7197052811493112, + "grad_norm": 0.08729182183742523, + "learning_rate": 8.127911313311244e-06, + "loss": 0.5819669961929321, + "step": 9307 + }, + { + "epoch": 1.719890057858207, + "grad_norm": 0.07935697585344315, + "learning_rate": 8.125951705092908e-06, + "loss": 0.6015247106552124, + "step": 9308 + }, + { + "epoch": 1.7200748345671029, + "grad_norm": 0.0668381005525589, + "learning_rate": 8.123992171455832e-06, + "loss": 0.4277127981185913, + "step": 9309 + }, + { + "epoch": 1.7202596112759987, + "grad_norm": 0.09297236800193787, + "learning_rate": 8.122032712477996e-06, + "loss": 0.6343827247619629, + "step": 9310 + }, + { + "epoch": 1.7204443879848945, + "grad_norm": 0.08086070418357849, + "learning_rate": 8.120073328237383e-06, + "loss": 0.6234132647514343, + "step": 9311 + }, + { + "epoch": 1.7206291646937903, + "grad_norm": 0.07798231393098831, + "learning_rate": 8.118114018811973e-06, + "loss": 0.5367053151130676, + "step": 9312 + }, + { + "epoch": 1.7208139414026862, + "grad_norm": 0.08083062618970871, + "learning_rate": 8.116154784279735e-06, + "loss": 0.7250441908836365, + "step": 9313 + }, + { + "epoch": 1.720998718111582, + "grad_norm": 0.07413654029369354, + "learning_rate": 8.114195624718643e-06, + "loss": 0.48422250151634216, + "step": 9314 + }, + { + "epoch": 1.7211834948204778, + "grad_norm": 0.07940345257520676, + "learning_rate": 8.112236540206667e-06, + "loss": 0.5368062853813171, + "step": 9315 + }, + { + "epoch": 1.7213682715293737, + "grad_norm": 0.07169221341609955, + "learning_rate": 8.110277530821768e-06, + "loss": 0.501593828201294, + "step": 9316 + }, + { + "epoch": 1.7215530482382695, + "grad_norm": 0.07917898893356323, + "learning_rate": 8.108318596641913e-06, + "loss": 0.5751444697380066, + "step": 9317 + }, + { + "epoch": 1.7217378249471653, + "grad_norm": 0.0815306231379509, + "learning_rate": 8.106359737745057e-06, + "loss": 0.5254802703857422, + "step": 9318 + }, + { + "epoch": 1.7219226016560611, + "grad_norm": 0.10219857096672058, + "learning_rate": 8.104400954209161e-06, + "loss": 0.6476911306381226, + "step": 9319 + }, + { + "epoch": 1.722107378364957, + "grad_norm": 0.06414046138525009, + "learning_rate": 8.10244224611218e-06, + "loss": 0.39360371232032776, + "step": 9320 + }, + { + "epoch": 1.7222921550738528, + "grad_norm": 0.07243400812149048, + "learning_rate": 8.100483613532052e-06, + "loss": 0.584331750869751, + "step": 9321 + }, + { + "epoch": 1.7224769317827486, + "grad_norm": 0.07788241654634476, + "learning_rate": 8.09852505654674e-06, + "loss": 0.49636155366897583, + "step": 9322 + }, + { + "epoch": 1.7226617084916447, + "grad_norm": 0.07601357251405716, + "learning_rate": 8.096566575234183e-06, + "loss": 0.5032370090484619, + "step": 9323 + }, + { + "epoch": 1.7228464852005405, + "grad_norm": 0.07387128472328186, + "learning_rate": 8.094608169672318e-06, + "loss": 0.6311846971511841, + "step": 9324 + }, + { + "epoch": 1.7230312619094363, + "grad_norm": 0.0795421153306961, + "learning_rate": 8.092649839939084e-06, + "loss": 0.5139166712760925, + "step": 9325 + }, + { + "epoch": 1.7232160386183322, + "grad_norm": 0.08422742038965225, + "learning_rate": 8.090691586112424e-06, + "loss": 0.4988109767436981, + "step": 9326 + }, + { + "epoch": 1.723400815327228, + "grad_norm": 0.05840161815285683, + "learning_rate": 8.088733408270265e-06, + "loss": 0.3771735429763794, + "step": 9327 + }, + { + "epoch": 1.7235855920361238, + "grad_norm": 0.050449687987565994, + "learning_rate": 8.086775306490532e-06, + "loss": 0.39605778455734253, + "step": 9328 + }, + { + "epoch": 1.7237703687450197, + "grad_norm": 0.07807933539152145, + "learning_rate": 8.084817280851162e-06, + "loss": 0.4537414312362671, + "step": 9329 + }, + { + "epoch": 1.7239551454539155, + "grad_norm": 0.07585584372282028, + "learning_rate": 8.082859331430068e-06, + "loss": 0.48708009719848633, + "step": 9330 + }, + { + "epoch": 1.7241399221628115, + "grad_norm": 0.07332560420036316, + "learning_rate": 8.08090145830518e-06, + "loss": 0.4790911376476288, + "step": 9331 + }, + { + "epoch": 1.7243246988717074, + "grad_norm": 0.07301469892263412, + "learning_rate": 8.078943661554403e-06, + "loss": 0.4665886461734772, + "step": 9332 + }, + { + "epoch": 1.7245094755806032, + "grad_norm": 0.05806805193424225, + "learning_rate": 8.076985941255662e-06, + "loss": 0.3560118079185486, + "step": 9333 + }, + { + "epoch": 1.724694252289499, + "grad_norm": 0.07970564812421799, + "learning_rate": 8.075028297486865e-06, + "loss": 0.5235270261764526, + "step": 9334 + }, + { + "epoch": 1.7248790289983948, + "grad_norm": 0.07301004976034164, + "learning_rate": 8.073070730325917e-06, + "loss": 0.4996209740638733, + "step": 9335 + }, + { + "epoch": 1.7250638057072907, + "grad_norm": 0.06933730840682983, + "learning_rate": 8.071113239850725e-06, + "loss": 0.43340930342674255, + "step": 9336 + }, + { + "epoch": 1.7252485824161865, + "grad_norm": 0.06640269607305527, + "learning_rate": 8.069155826139195e-06, + "loss": 0.37686920166015625, + "step": 9337 + }, + { + "epoch": 1.7254333591250823, + "grad_norm": 0.08653712272644043, + "learning_rate": 8.067198489269218e-06, + "loss": 0.6015225648880005, + "step": 9338 + }, + { + "epoch": 1.7256181358339782, + "grad_norm": 0.06931693851947784, + "learning_rate": 8.065241229318696e-06, + "loss": 0.3466190695762634, + "step": 9339 + }, + { + "epoch": 1.725802912542874, + "grad_norm": 0.08971694856882095, + "learning_rate": 8.063284046365522e-06, + "loss": 0.5727711915969849, + "step": 9340 + }, + { + "epoch": 1.7259876892517698, + "grad_norm": 0.08386840671300888, + "learning_rate": 8.06132694048758e-06, + "loss": 0.6206101179122925, + "step": 9341 + }, + { + "epoch": 1.7261724659606656, + "grad_norm": 0.06562499701976776, + "learning_rate": 8.059369911762761e-06, + "loss": 0.42439258098602295, + "step": 9342 + }, + { + "epoch": 1.7263572426695615, + "grad_norm": 0.07445552945137024, + "learning_rate": 8.057412960268951e-06, + "loss": 0.5168936848640442, + "step": 9343 + }, + { + "epoch": 1.7265420193784573, + "grad_norm": 0.07908100634813309, + "learning_rate": 8.055456086084025e-06, + "loss": 0.647506833076477, + "step": 9344 + }, + { + "epoch": 1.7267267960873531, + "grad_norm": 0.07324788719415665, + "learning_rate": 8.053499289285862e-06, + "loss": 0.5335649847984314, + "step": 9345 + }, + { + "epoch": 1.726911572796249, + "grad_norm": 0.0806252658367157, + "learning_rate": 8.051542569952343e-06, + "loss": 0.4795989692211151, + "step": 9346 + }, + { + "epoch": 1.7270963495051448, + "grad_norm": 0.10036157071590424, + "learning_rate": 8.04958592816133e-06, + "loss": 0.6414546370506287, + "step": 9347 + }, + { + "epoch": 1.7272811262140406, + "grad_norm": 0.09356236457824707, + "learning_rate": 8.047629363990696e-06, + "loss": 0.6074750423431396, + "step": 9348 + }, + { + "epoch": 1.7274659029229364, + "grad_norm": 0.058355070650577545, + "learning_rate": 8.045672877518303e-06, + "loss": 0.40787652134895325, + "step": 9349 + }, + { + "epoch": 1.7276506796318323, + "grad_norm": 0.07903466373682022, + "learning_rate": 8.043716468822016e-06, + "loss": 0.438971608877182, + "step": 9350 + }, + { + "epoch": 1.727835456340728, + "grad_norm": 0.0694471225142479, + "learning_rate": 8.041760137979696e-06, + "loss": 0.365113765001297, + "step": 9351 + }, + { + "epoch": 1.728020233049624, + "grad_norm": 0.08171175420284271, + "learning_rate": 8.039803885069193e-06, + "loss": 0.5658257603645325, + "step": 9352 + }, + { + "epoch": 1.72820500975852, + "grad_norm": 0.08628968894481659, + "learning_rate": 8.037847710168362e-06, + "loss": 0.44679346680641174, + "step": 9353 + }, + { + "epoch": 1.7283897864674158, + "grad_norm": 0.08869720995426178, + "learning_rate": 8.035891613355055e-06, + "loss": 0.569314181804657, + "step": 9354 + }, + { + "epoch": 1.7285745631763116, + "grad_norm": 0.0922289565205574, + "learning_rate": 8.033935594707116e-06, + "loss": 0.6781488656997681, + "step": 9355 + }, + { + "epoch": 1.7287593398852075, + "grad_norm": 0.07456088811159134, + "learning_rate": 8.031979654302389e-06, + "loss": 0.5413568019866943, + "step": 9356 + }, + { + "epoch": 1.7289441165941033, + "grad_norm": 0.09318801015615463, + "learning_rate": 8.030023792218717e-06, + "loss": 0.722627580165863, + "step": 9357 + }, + { + "epoch": 1.7291288933029991, + "grad_norm": 0.07570246607065201, + "learning_rate": 8.028068008533931e-06, + "loss": 0.41319751739501953, + "step": 9358 + }, + { + "epoch": 1.729313670011895, + "grad_norm": 0.07300356030464172, + "learning_rate": 8.026112303325872e-06, + "loss": 0.427320659160614, + "step": 9359 + }, + { + "epoch": 1.729498446720791, + "grad_norm": 0.09192816913127899, + "learning_rate": 8.02415667667237e-06, + "loss": 0.48963406682014465, + "step": 9360 + }, + { + "epoch": 1.7296832234296868, + "grad_norm": 0.07422766089439392, + "learning_rate": 8.022201128651244e-06, + "loss": 0.41928577423095703, + "step": 9361 + }, + { + "epoch": 1.7298680001385827, + "grad_norm": 0.09352274239063263, + "learning_rate": 8.020245659340329e-06, + "loss": 0.5814811587333679, + "step": 9362 + }, + { + "epoch": 1.7300527768474785, + "grad_norm": 0.05729234963655472, + "learning_rate": 8.018290268817446e-06, + "loss": 0.3619372844696045, + "step": 9363 + }, + { + "epoch": 1.7302375535563743, + "grad_norm": 0.07728464156389236, + "learning_rate": 8.016334957160405e-06, + "loss": 0.5245939493179321, + "step": 9364 + }, + { + "epoch": 1.7304223302652701, + "grad_norm": 0.06916660815477371, + "learning_rate": 8.01437972444703e-06, + "loss": 0.5502699613571167, + "step": 9365 + }, + { + "epoch": 1.730607106974166, + "grad_norm": 0.08366513252258301, + "learning_rate": 8.012424570755129e-06, + "loss": 0.5558500289916992, + "step": 9366 + }, + { + "epoch": 1.7307918836830618, + "grad_norm": 0.08778365701436996, + "learning_rate": 8.01046949616251e-06, + "loss": 0.588188886642456, + "step": 9367 + }, + { + "epoch": 1.7309766603919576, + "grad_norm": 0.06705878674983978, + "learning_rate": 8.008514500746984e-06, + "loss": 0.47494634985923767, + "step": 9368 + }, + { + "epoch": 1.7311614371008535, + "grad_norm": 0.08472789078950882, + "learning_rate": 8.006559584586346e-06, + "loss": 0.6099481582641602, + "step": 9369 + }, + { + "epoch": 1.7313462138097493, + "grad_norm": 0.08809303492307663, + "learning_rate": 8.004604747758403e-06, + "loss": 0.5671088695526123, + "step": 9370 + }, + { + "epoch": 1.7315309905186451, + "grad_norm": 0.06466368585824966, + "learning_rate": 8.002649990340947e-06, + "loss": 0.3539324104785919, + "step": 9371 + }, + { + "epoch": 1.731715767227541, + "grad_norm": 0.06473246961832047, + "learning_rate": 8.000695312411773e-06, + "loss": 0.389993280172348, + "step": 9372 + }, + { + "epoch": 1.7319005439364368, + "grad_norm": 0.09119462966918945, + "learning_rate": 7.998740714048669e-06, + "loss": 0.5501751899719238, + "step": 9373 + }, + { + "epoch": 1.7320853206453326, + "grad_norm": 0.08342806994915009, + "learning_rate": 7.996786195329426e-06, + "loss": 0.504432737827301, + "step": 9374 + }, + { + "epoch": 1.7322700973542284, + "grad_norm": 0.07765016704797745, + "learning_rate": 7.994831756331822e-06, + "loss": 0.42915263772010803, + "step": 9375 + }, + { + "epoch": 1.7324548740631243, + "grad_norm": 0.09550423920154572, + "learning_rate": 7.992877397133643e-06, + "loss": 0.6175699234008789, + "step": 9376 + }, + { + "epoch": 1.73263965077202, + "grad_norm": 0.09117421507835388, + "learning_rate": 7.990923117812665e-06, + "loss": 0.604438841342926, + "step": 9377 + }, + { + "epoch": 1.732824427480916, + "grad_norm": 0.06244116649031639, + "learning_rate": 7.98896891844666e-06, + "loss": 0.3006291389465332, + "step": 9378 + }, + { + "epoch": 1.7330092041898117, + "grad_norm": 0.09400566667318344, + "learning_rate": 7.987014799113398e-06, + "loss": 0.6906332969665527, + "step": 9379 + }, + { + "epoch": 1.7331939808987076, + "grad_norm": 0.07890813797712326, + "learning_rate": 7.985060759890656e-06, + "loss": 0.6461838483810425, + "step": 9380 + }, + { + "epoch": 1.7333787576076034, + "grad_norm": 0.08000336587429047, + "learning_rate": 7.983106800856183e-06, + "loss": 0.6111680865287781, + "step": 9381 + }, + { + "epoch": 1.7335635343164995, + "grad_norm": 0.07113444805145264, + "learning_rate": 7.981152922087759e-06, + "loss": 0.48228105902671814, + "step": 9382 + }, + { + "epoch": 1.7337483110253953, + "grad_norm": 0.07770003378391266, + "learning_rate": 7.979199123663126e-06, + "loss": 0.47291597723960876, + "step": 9383 + }, + { + "epoch": 1.7339330877342911, + "grad_norm": 0.08316867053508759, + "learning_rate": 7.977245405660045e-06, + "loss": 0.5423946976661682, + "step": 9384 + }, + { + "epoch": 1.734117864443187, + "grad_norm": 0.06833017617464066, + "learning_rate": 7.975291768156272e-06, + "loss": 0.4052598178386688, + "step": 9385 + }, + { + "epoch": 1.7343026411520828, + "grad_norm": 0.09429217129945755, + "learning_rate": 7.97333821122955e-06, + "loss": 0.6468340754508972, + "step": 9386 + }, + { + "epoch": 1.7344874178609786, + "grad_norm": 0.09072468429803848, + "learning_rate": 7.971384734957626e-06, + "loss": 0.5985592603683472, + "step": 9387 + }, + { + "epoch": 1.7346721945698744, + "grad_norm": 0.06963697820901871, + "learning_rate": 7.969431339418245e-06, + "loss": 0.3786318600177765, + "step": 9388 + }, + { + "epoch": 1.7348569712787705, + "grad_norm": 0.08657761663198471, + "learning_rate": 7.967478024689143e-06, + "loss": 0.5320836305618286, + "step": 9389 + }, + { + "epoch": 1.7350417479876663, + "grad_norm": 0.08098902553319931, + "learning_rate": 7.965524790848055e-06, + "loss": 0.6015971302986145, + "step": 9390 + }, + { + "epoch": 1.7352265246965621, + "grad_norm": 0.07428756356239319, + "learning_rate": 7.963571637972717e-06, + "loss": 0.49490147829055786, + "step": 9391 + }, + { + "epoch": 1.735411301405458, + "grad_norm": 0.08095613121986389, + "learning_rate": 7.961618566140856e-06, + "loss": 0.525471031665802, + "step": 9392 + }, + { + "epoch": 1.7355960781143538, + "grad_norm": 0.06748519837856293, + "learning_rate": 7.959665575430198e-06, + "loss": 0.4298991858959198, + "step": 9393 + }, + { + "epoch": 1.7357808548232496, + "grad_norm": 0.08516919612884521, + "learning_rate": 7.95771266591847e-06, + "loss": 0.5407508611679077, + "step": 9394 + }, + { + "epoch": 1.7359656315321454, + "grad_norm": 0.07032306492328644, + "learning_rate": 7.955759837683386e-06, + "loss": 0.3875845968723297, + "step": 9395 + }, + { + "epoch": 1.7361504082410413, + "grad_norm": 0.07609395682811737, + "learning_rate": 7.953807090802663e-06, + "loss": 0.5288292765617371, + "step": 9396 + }, + { + "epoch": 1.736335184949937, + "grad_norm": 0.10851506143808365, + "learning_rate": 7.95185442535402e-06, + "loss": 0.7212783098220825, + "step": 9397 + }, + { + "epoch": 1.736519961658833, + "grad_norm": 0.07989294826984406, + "learning_rate": 7.94990184141516e-06, + "loss": 0.48487424850463867, + "step": 9398 + }, + { + "epoch": 1.7367047383677288, + "grad_norm": 0.0886591300368309, + "learning_rate": 7.947949339063797e-06, + "loss": 0.6012457609176636, + "step": 9399 + }, + { + "epoch": 1.7368895150766246, + "grad_norm": 0.07172589749097824, + "learning_rate": 7.945996918377627e-06, + "loss": 0.5031811594963074, + "step": 9400 + }, + { + "epoch": 1.7370742917855204, + "grad_norm": 0.0771087184548378, + "learning_rate": 7.94404457943435e-06, + "loss": 0.4250201880931854, + "step": 9401 + }, + { + "epoch": 1.7372590684944162, + "grad_norm": 0.06980002671480179, + "learning_rate": 7.942092322311674e-06, + "loss": 0.47895023226737976, + "step": 9402 + }, + { + "epoch": 1.737443845203312, + "grad_norm": 0.08409155905246735, + "learning_rate": 7.940140147087281e-06, + "loss": 0.5779047012329102, + "step": 9403 + }, + { + "epoch": 1.737628621912208, + "grad_norm": 0.08319824188947678, + "learning_rate": 7.938188053838863e-06, + "loss": 0.585312008857727, + "step": 9404 + }, + { + "epoch": 1.7378133986211037, + "grad_norm": 0.06984545290470123, + "learning_rate": 7.936236042644116e-06, + "loss": 0.3505295515060425, + "step": 9405 + }, + { + "epoch": 1.7379981753299996, + "grad_norm": 0.09396980702877045, + "learning_rate": 7.934284113580715e-06, + "loss": 0.664936900138855, + "step": 9406 + }, + { + "epoch": 1.7381829520388954, + "grad_norm": 0.07843194901943207, + "learning_rate": 7.932332266726341e-06, + "loss": 0.4490804374217987, + "step": 9407 + }, + { + "epoch": 1.7383677287477912, + "grad_norm": 0.08212018013000488, + "learning_rate": 7.930380502158678e-06, + "loss": 0.492334246635437, + "step": 9408 + }, + { + "epoch": 1.738552505456687, + "grad_norm": 0.07664764672517776, + "learning_rate": 7.928428819955395e-06, + "loss": 0.44178497791290283, + "step": 9409 + }, + { + "epoch": 1.7387372821655829, + "grad_norm": 0.07601568102836609, + "learning_rate": 7.92647722019416e-06, + "loss": 0.5334704518318176, + "step": 9410 + }, + { + "epoch": 1.738922058874479, + "grad_norm": 0.06846748292446136, + "learning_rate": 7.924525702952648e-06, + "loss": 0.4268120527267456, + "step": 9411 + }, + { + "epoch": 1.7391068355833748, + "grad_norm": 0.07301660627126694, + "learning_rate": 7.92257426830852e-06, + "loss": 0.43639346957206726, + "step": 9412 + }, + { + "epoch": 1.7392916122922706, + "grad_norm": 0.09251867234706879, + "learning_rate": 7.920622916339436e-06, + "loss": 0.6594666838645935, + "step": 9413 + }, + { + "epoch": 1.7394763890011664, + "grad_norm": 0.06635914742946625, + "learning_rate": 7.918671647123055e-06, + "loss": 0.4128287732601166, + "step": 9414 + }, + { + "epoch": 1.7396611657100622, + "grad_norm": 0.07639806717634201, + "learning_rate": 7.916720460737029e-06, + "loss": 0.46493440866470337, + "step": 9415 + }, + { + "epoch": 1.739845942418958, + "grad_norm": 0.061943717300891876, + "learning_rate": 7.914769357259015e-06, + "loss": 0.3976179361343384, + "step": 9416 + }, + { + "epoch": 1.740030719127854, + "grad_norm": 0.07549803704023361, + "learning_rate": 7.91281833676665e-06, + "loss": 0.5620946884155273, + "step": 9417 + }, + { + "epoch": 1.7402154958367497, + "grad_norm": 0.0732521340250969, + "learning_rate": 7.910867399337587e-06, + "loss": 0.3955199122428894, + "step": 9418 + }, + { + "epoch": 1.7404002725456458, + "grad_norm": 0.09458731859922409, + "learning_rate": 7.908916545049473e-06, + "loss": 0.5515162944793701, + "step": 9419 + }, + { + "epoch": 1.7405850492545416, + "grad_norm": 0.08107059448957443, + "learning_rate": 7.906965773979932e-06, + "loss": 0.4785090684890747, + "step": 9420 + }, + { + "epoch": 1.7407698259634374, + "grad_norm": 0.07096207141876221, + "learning_rate": 7.905015086206601e-06, + "loss": 0.5611074566841125, + "step": 9421 + }, + { + "epoch": 1.7409546026723333, + "grad_norm": 0.07923568785190582, + "learning_rate": 7.903064481807123e-06, + "loss": 0.631668210029602, + "step": 9422 + }, + { + "epoch": 1.741139379381229, + "grad_norm": 0.09152689576148987, + "learning_rate": 7.901113960859115e-06, + "loss": 0.6958367824554443, + "step": 9423 + }, + { + "epoch": 1.741324156090125, + "grad_norm": 0.07087397575378418, + "learning_rate": 7.899163523440201e-06, + "loss": 0.3661838173866272, + "step": 9424 + }, + { + "epoch": 1.7415089327990207, + "grad_norm": 0.07663542032241821, + "learning_rate": 7.897213169628012e-06, + "loss": 0.5703363418579102, + "step": 9425 + }, + { + "epoch": 1.7416937095079166, + "grad_norm": 0.05955135449767113, + "learning_rate": 7.895262899500158e-06, + "loss": 0.3244597911834717, + "step": 9426 + }, + { + "epoch": 1.7418784862168124, + "grad_norm": 0.07609820365905762, + "learning_rate": 7.893312713134256e-06, + "loss": 0.5215273499488831, + "step": 9427 + }, + { + "epoch": 1.7420632629257082, + "grad_norm": 0.06051214411854744, + "learning_rate": 7.891362610607918e-06, + "loss": 0.3890979588031769, + "step": 9428 + }, + { + "epoch": 1.742248039634604, + "grad_norm": 0.07205689698457718, + "learning_rate": 7.889412591998749e-06, + "loss": 0.4643065631389618, + "step": 9429 + }, + { + "epoch": 1.7424328163435, + "grad_norm": 0.09298235923051834, + "learning_rate": 7.887462657384357e-06, + "loss": 0.5200021266937256, + "step": 9430 + }, + { + "epoch": 1.7426175930523957, + "grad_norm": 0.07720060646533966, + "learning_rate": 7.88551280684234e-06, + "loss": 0.49047455191612244, + "step": 9431 + }, + { + "epoch": 1.7428023697612915, + "grad_norm": 0.09403630346059799, + "learning_rate": 7.8835630404503e-06, + "loss": 0.6260966658592224, + "step": 9432 + }, + { + "epoch": 1.7429871464701874, + "grad_norm": 0.07295014709234238, + "learning_rate": 7.88161335828583e-06, + "loss": 0.4665113389492035, + "step": 9433 + }, + { + "epoch": 1.7431719231790832, + "grad_norm": 0.08470214158296585, + "learning_rate": 7.879663760426517e-06, + "loss": 0.5773360729217529, + "step": 9434 + }, + { + "epoch": 1.743356699887979, + "grad_norm": 0.08234825730323792, + "learning_rate": 7.877714246949954e-06, + "loss": 0.558610737323761, + "step": 9435 + }, + { + "epoch": 1.7435414765968749, + "grad_norm": 0.07873562723398209, + "learning_rate": 7.87576481793373e-06, + "loss": 0.5046735405921936, + "step": 9436 + }, + { + "epoch": 1.7437262533057707, + "grad_norm": 0.05728263407945633, + "learning_rate": 7.873815473455414e-06, + "loss": 0.2989661693572998, + "step": 9437 + }, + { + "epoch": 1.7439110300146665, + "grad_norm": 0.07303888350725174, + "learning_rate": 7.871866213592589e-06, + "loss": 0.4734708070755005, + "step": 9438 + }, + { + "epoch": 1.7440958067235623, + "grad_norm": 0.08110443502664566, + "learning_rate": 7.869917038422838e-06, + "loss": 0.5412731766700745, + "step": 9439 + }, + { + "epoch": 1.7442805834324582, + "grad_norm": 0.08457206934690475, + "learning_rate": 7.867967948023716e-06, + "loss": 0.59527987241745, + "step": 9440 + }, + { + "epoch": 1.7444653601413542, + "grad_norm": 0.09340102970600128, + "learning_rate": 7.866018942472803e-06, + "loss": 0.5167881846427917, + "step": 9441 + }, + { + "epoch": 1.74465013685025, + "grad_norm": 0.06627646088600159, + "learning_rate": 7.864070021847664e-06, + "loss": 0.39817744493484497, + "step": 9442 + }, + { + "epoch": 1.7448349135591459, + "grad_norm": 0.08489912748336792, + "learning_rate": 7.862121186225851e-06, + "loss": 0.5790280103683472, + "step": 9443 + }, + { + "epoch": 1.7450196902680417, + "grad_norm": 0.06530047208070755, + "learning_rate": 7.860172435684923e-06, + "loss": 0.39563748240470886, + "step": 9444 + }, + { + "epoch": 1.7452044669769375, + "grad_norm": 0.0687323808670044, + "learning_rate": 7.858223770302447e-06, + "loss": 0.5383394956588745, + "step": 9445 + }, + { + "epoch": 1.7453892436858334, + "grad_norm": 0.10695014894008636, + "learning_rate": 7.856275190155957e-06, + "loss": 0.6556435823440552, + "step": 9446 + }, + { + "epoch": 1.7455740203947292, + "grad_norm": 0.08394554257392883, + "learning_rate": 7.854326695323012e-06, + "loss": 0.6104413270950317, + "step": 9447 + }, + { + "epoch": 1.7457587971036252, + "grad_norm": 0.06306581944227219, + "learning_rate": 7.852378285881148e-06, + "loss": 0.4853476881980896, + "step": 9448 + }, + { + "epoch": 1.745943573812521, + "grad_norm": 0.08481727540493011, + "learning_rate": 7.850429961907908e-06, + "loss": 0.5893101096153259, + "step": 9449 + }, + { + "epoch": 1.746128350521417, + "grad_norm": 0.08066444098949432, + "learning_rate": 7.848481723480835e-06, + "loss": 0.516244113445282, + "step": 9450 + }, + { + "epoch": 1.7463131272303127, + "grad_norm": 0.07358620315790176, + "learning_rate": 7.846533570677454e-06, + "loss": 0.4533986449241638, + "step": 9451 + }, + { + "epoch": 1.7464979039392086, + "grad_norm": 0.06455602496862411, + "learning_rate": 7.8445855035753e-06, + "loss": 0.504241943359375, + "step": 9452 + }, + { + "epoch": 1.7466826806481044, + "grad_norm": 0.08291526883840561, + "learning_rate": 7.842637522251902e-06, + "loss": 0.5514028668403625, + "step": 9453 + }, + { + "epoch": 1.7468674573570002, + "grad_norm": 0.07108502089977264, + "learning_rate": 7.84068962678478e-06, + "loss": 0.37282323837280273, + "step": 9454 + }, + { + "epoch": 1.747052234065896, + "grad_norm": 0.08243813365697861, + "learning_rate": 7.838741817251454e-06, + "loss": 0.5866271257400513, + "step": 9455 + }, + { + "epoch": 1.7472370107747919, + "grad_norm": 0.06616461277008057, + "learning_rate": 7.836794093729447e-06, + "loss": 0.4198001027107239, + "step": 9456 + }, + { + "epoch": 1.7474217874836877, + "grad_norm": 0.07621096819639206, + "learning_rate": 7.834846456296258e-06, + "loss": 0.419243723154068, + "step": 9457 + }, + { + "epoch": 1.7476065641925835, + "grad_norm": 0.08235196769237518, + "learning_rate": 7.832898905029412e-06, + "loss": 0.5241071581840515, + "step": 9458 + }, + { + "epoch": 1.7477913409014794, + "grad_norm": 0.09907345473766327, + "learning_rate": 7.830951440006412e-06, + "loss": 0.6666129231452942, + "step": 9459 + }, + { + "epoch": 1.7479761176103752, + "grad_norm": 0.0671384260058403, + "learning_rate": 7.829004061304753e-06, + "loss": 0.3617722988128662, + "step": 9460 + }, + { + "epoch": 1.748160894319271, + "grad_norm": 0.07696773111820221, + "learning_rate": 7.827056769001942e-06, + "loss": 0.4717048406600952, + "step": 9461 + }, + { + "epoch": 1.7483456710281668, + "grad_norm": 0.07589234411716461, + "learning_rate": 7.825109563175478e-06, + "loss": 0.5012123584747314, + "step": 9462 + }, + { + "epoch": 1.7485304477370627, + "grad_norm": 0.08382602035999298, + "learning_rate": 7.823162443902845e-06, + "loss": 0.5069903135299683, + "step": 9463 + }, + { + "epoch": 1.7487152244459585, + "grad_norm": 0.08438605815172195, + "learning_rate": 7.821215411261537e-06, + "loss": 0.6001019477844238, + "step": 9464 + }, + { + "epoch": 1.7489000011548543, + "grad_norm": 0.07784610241651535, + "learning_rate": 7.819268465329038e-06, + "loss": 0.494361013174057, + "step": 9465 + }, + { + "epoch": 1.7490847778637502, + "grad_norm": 0.0861455500125885, + "learning_rate": 7.81732160618283e-06, + "loss": 0.7007192969322205, + "step": 9466 + }, + { + "epoch": 1.749269554572646, + "grad_norm": 0.07989050447940826, + "learning_rate": 7.815374833900398e-06, + "loss": 0.46829497814178467, + "step": 9467 + }, + { + "epoch": 1.7494543312815418, + "grad_norm": 0.09458218514919281, + "learning_rate": 7.813428148559208e-06, + "loss": 0.5205943584442139, + "step": 9468 + }, + { + "epoch": 1.7496391079904376, + "grad_norm": 0.06660239398479462, + "learning_rate": 7.811481550236739e-06, + "loss": 0.41754552721977234, + "step": 9469 + }, + { + "epoch": 1.7498238846993337, + "grad_norm": 0.07794316858053207, + "learning_rate": 7.80953503901046e-06, + "loss": 0.46037155389785767, + "step": 9470 + }, + { + "epoch": 1.7500086614082295, + "grad_norm": 0.07195150852203369, + "learning_rate": 7.807588614957829e-06, + "loss": 0.4594876766204834, + "step": 9471 + }, + { + "epoch": 1.7501934381171254, + "grad_norm": 0.08619675040245056, + "learning_rate": 7.805642278156313e-06, + "loss": 0.527175784111023, + "step": 9472 + }, + { + "epoch": 1.7503782148260212, + "grad_norm": 0.08233191072940826, + "learning_rate": 7.80369602868337e-06, + "loss": 0.6458790898323059, + "step": 9473 + }, + { + "epoch": 1.750562991534917, + "grad_norm": 0.07300320267677307, + "learning_rate": 7.801749866616453e-06, + "loss": 0.39369773864746094, + "step": 9474 + }, + { + "epoch": 1.7507477682438128, + "grad_norm": 0.09571012854576111, + "learning_rate": 7.799803792033014e-06, + "loss": 0.5348305106163025, + "step": 9475 + }, + { + "epoch": 1.7509325449527087, + "grad_norm": 0.08784550428390503, + "learning_rate": 7.797857805010502e-06, + "loss": 0.5967904329299927, + "step": 9476 + }, + { + "epoch": 1.7511173216616047, + "grad_norm": 0.0804838165640831, + "learning_rate": 7.795911905626356e-06, + "loss": 0.4976516366004944, + "step": 9477 + }, + { + "epoch": 1.7513020983705005, + "grad_norm": 0.08038236200809479, + "learning_rate": 7.793966093958028e-06, + "loss": 0.5232406854629517, + "step": 9478 + }, + { + "epoch": 1.7514868750793964, + "grad_norm": 0.08380588889122009, + "learning_rate": 7.79202037008294e-06, + "loss": 0.552866518497467, + "step": 9479 + }, + { + "epoch": 1.7516716517882922, + "grad_norm": 0.07746408134698868, + "learning_rate": 7.790074734078533e-06, + "loss": 0.41058093309402466, + "step": 9480 + }, + { + "epoch": 1.751856428497188, + "grad_norm": 0.0629132091999054, + "learning_rate": 7.788129186022244e-06, + "loss": 0.42107585072517395, + "step": 9481 + }, + { + "epoch": 1.7520412052060839, + "grad_norm": 0.061843644827604294, + "learning_rate": 7.78618372599149e-06, + "loss": 0.34514370560646057, + "step": 9482 + }, + { + "epoch": 1.7522259819149797, + "grad_norm": 0.09421967715024948, + "learning_rate": 7.784238354063697e-06, + "loss": 0.5947861075401306, + "step": 9483 + }, + { + "epoch": 1.7524107586238755, + "grad_norm": 0.06692761182785034, + "learning_rate": 7.782293070316287e-06, + "loss": 0.2929871380329132, + "step": 9484 + }, + { + "epoch": 1.7525955353327713, + "grad_norm": 0.07493630051612854, + "learning_rate": 7.780347874826672e-06, + "loss": 0.47335997223854065, + "step": 9485 + }, + { + "epoch": 1.7527803120416672, + "grad_norm": 0.08465097099542618, + "learning_rate": 7.778402767672268e-06, + "loss": 0.5007327795028687, + "step": 9486 + }, + { + "epoch": 1.752965088750563, + "grad_norm": 0.0741303339600563, + "learning_rate": 7.776457748930486e-06, + "loss": 0.41544151306152344, + "step": 9487 + }, + { + "epoch": 1.7531498654594588, + "grad_norm": 0.05960865691304207, + "learning_rate": 7.774512818678724e-06, + "loss": 0.3089883625507355, + "step": 9488 + }, + { + "epoch": 1.7533346421683547, + "grad_norm": 0.08837155252695084, + "learning_rate": 7.772567976994392e-06, + "loss": 0.5421566367149353, + "step": 9489 + }, + { + "epoch": 1.7535194188772505, + "grad_norm": 0.08413968235254288, + "learning_rate": 7.770623223954887e-06, + "loss": 0.6096692681312561, + "step": 9490 + }, + { + "epoch": 1.7537041955861463, + "grad_norm": 0.06976504623889923, + "learning_rate": 7.768678559637601e-06, + "loss": 0.4283706545829773, + "step": 9491 + }, + { + "epoch": 1.7538889722950421, + "grad_norm": 0.07454965263605118, + "learning_rate": 7.766733984119927e-06, + "loss": 0.5498755574226379, + "step": 9492 + }, + { + "epoch": 1.754073749003938, + "grad_norm": 0.07341553270816803, + "learning_rate": 7.764789497479256e-06, + "loss": 0.45700550079345703, + "step": 9493 + }, + { + "epoch": 1.7542585257128338, + "grad_norm": 0.07001147419214249, + "learning_rate": 7.762845099792968e-06, + "loss": 0.4273150861263275, + "step": 9494 + }, + { + "epoch": 1.7544433024217296, + "grad_norm": 0.05497468262910843, + "learning_rate": 7.76090079113845e-06, + "loss": 0.3325614035129547, + "step": 9495 + }, + { + "epoch": 1.7546280791306255, + "grad_norm": 0.05593828856945038, + "learning_rate": 7.758956571593069e-06, + "loss": 0.3491348326206207, + "step": 9496 + }, + { + "epoch": 1.7548128558395213, + "grad_norm": 0.10387798398733139, + "learning_rate": 7.757012441234206e-06, + "loss": 0.5724706053733826, + "step": 9497 + }, + { + "epoch": 1.7549976325484171, + "grad_norm": 0.07001633197069168, + "learning_rate": 7.755068400139236e-06, + "loss": 0.3945654034614563, + "step": 9498 + }, + { + "epoch": 1.7551824092573132, + "grad_norm": 0.09581859409809113, + "learning_rate": 7.753124448385514e-06, + "loss": 0.7347464561462402, + "step": 9499 + }, + { + "epoch": 1.755367185966209, + "grad_norm": 0.0977296233177185, + "learning_rate": 7.751180586050409e-06, + "loss": 0.734951376914978, + "step": 9500 + }, + { + "epoch": 1.755367185966209, + "eval_loss": 0.5634395480155945, + "eval_runtime": 156.2077, + "eval_samples_per_second": 116.697, + "eval_steps_per_second": 14.59, + "step": 9500 + }, + { + "epoch": 1.7555519626751048, + "grad_norm": 0.07938997447490692, + "learning_rate": 7.749236813211288e-06, + "loss": 0.47607746720314026, + "step": 9501 + }, + { + "epoch": 1.7557367393840007, + "grad_norm": 0.0752115324139595, + "learning_rate": 7.747293129945495e-06, + "loss": 0.7275061011314392, + "step": 9502 + }, + { + "epoch": 1.7559215160928965, + "grad_norm": 0.06737703084945679, + "learning_rate": 7.745349536330387e-06, + "loss": 0.4250943958759308, + "step": 9503 + }, + { + "epoch": 1.7561062928017923, + "grad_norm": 0.07570453733205795, + "learning_rate": 7.743406032443318e-06, + "loss": 0.5231233835220337, + "step": 9504 + }, + { + "epoch": 1.7562910695106881, + "grad_norm": 0.07378605753183365, + "learning_rate": 7.741462618361624e-06, + "loss": 0.4782612919807434, + "step": 9505 + }, + { + "epoch": 1.756475846219584, + "grad_norm": 0.07631854712963104, + "learning_rate": 7.739519294162652e-06, + "loss": 0.4818604588508606, + "step": 9506 + }, + { + "epoch": 1.75666062292848, + "grad_norm": 0.08436261117458344, + "learning_rate": 7.737576059923742e-06, + "loss": 0.5321990251541138, + "step": 9507 + }, + { + "epoch": 1.7568453996373758, + "grad_norm": 0.07916104048490524, + "learning_rate": 7.735632915722227e-06, + "loss": 0.5069300532341003, + "step": 9508 + }, + { + "epoch": 1.7570301763462717, + "grad_norm": 0.08644711226224899, + "learning_rate": 7.733689861635435e-06, + "loss": 0.511889636516571, + "step": 9509 + }, + { + "epoch": 1.7572149530551675, + "grad_norm": 0.08409576117992401, + "learning_rate": 7.7317468977407e-06, + "loss": 0.4757402241230011, + "step": 9510 + }, + { + "epoch": 1.7573997297640633, + "grad_norm": 0.0835278183221817, + "learning_rate": 7.729804024115339e-06, + "loss": 0.5135257244110107, + "step": 9511 + }, + { + "epoch": 1.7575845064729592, + "grad_norm": 0.08325167745351791, + "learning_rate": 7.727861240836679e-06, + "loss": 0.5767073035240173, + "step": 9512 + }, + { + "epoch": 1.757769283181855, + "grad_norm": 0.06520769000053406, + "learning_rate": 7.725918547982027e-06, + "loss": 0.3839736878871918, + "step": 9513 + }, + { + "epoch": 1.7579540598907508, + "grad_norm": 0.07241534441709518, + "learning_rate": 7.723975945628706e-06, + "loss": 0.45690375566482544, + "step": 9514 + }, + { + "epoch": 1.7581388365996466, + "grad_norm": 0.09301277995109558, + "learning_rate": 7.722033433854023e-06, + "loss": 0.6211903691291809, + "step": 9515 + }, + { + "epoch": 1.7583236133085425, + "grad_norm": 0.05278944596648216, + "learning_rate": 7.720091012735277e-06, + "loss": 0.34502747654914856, + "step": 9516 + }, + { + "epoch": 1.7585083900174383, + "grad_norm": 0.07892879843711853, + "learning_rate": 7.71814868234978e-06, + "loss": 0.4249326288700104, + "step": 9517 + }, + { + "epoch": 1.7586931667263341, + "grad_norm": 0.09665438532829285, + "learning_rate": 7.71620644277483e-06, + "loss": 0.5845499634742737, + "step": 9518 + }, + { + "epoch": 1.75887794343523, + "grad_norm": 0.06883665919303894, + "learning_rate": 7.714264294087711e-06, + "loss": 0.513221025466919, + "step": 9519 + }, + { + "epoch": 1.7590627201441258, + "grad_norm": 0.0666922777891159, + "learning_rate": 7.712322236365724e-06, + "loss": 0.5049949884414673, + "step": 9520 + }, + { + "epoch": 1.7592474968530216, + "grad_norm": 0.07511871308088303, + "learning_rate": 7.71038026968616e-06, + "loss": 0.4871228337287903, + "step": 9521 + }, + { + "epoch": 1.7594322735619174, + "grad_norm": 0.06378930807113647, + "learning_rate": 7.708438394126292e-06, + "loss": 0.3740116059780121, + "step": 9522 + }, + { + "epoch": 1.7596170502708133, + "grad_norm": 0.0661415383219719, + "learning_rate": 7.706496609763407e-06, + "loss": 0.49298548698425293, + "step": 9523 + }, + { + "epoch": 1.759801826979709, + "grad_norm": 0.07151772826910019, + "learning_rate": 7.704554916674785e-06, + "loss": 0.4438452422618866, + "step": 9524 + }, + { + "epoch": 1.759986603688605, + "grad_norm": 0.06835362315177917, + "learning_rate": 7.702613314937692e-06, + "loss": 0.48267531394958496, + "step": 9525 + }, + { + "epoch": 1.7601713803975008, + "grad_norm": 0.07833728194236755, + "learning_rate": 7.700671804629402e-06, + "loss": 0.4843129515647888, + "step": 9526 + }, + { + "epoch": 1.7603561571063966, + "grad_norm": 0.08553629368543625, + "learning_rate": 7.698730385827184e-06, + "loss": 0.5773013234138489, + "step": 9527 + }, + { + "epoch": 1.7605409338152924, + "grad_norm": 0.11041746288537979, + "learning_rate": 7.696789058608294e-06, + "loss": 0.79600989818573, + "step": 9528 + }, + { + "epoch": 1.7607257105241885, + "grad_norm": 0.09957388788461685, + "learning_rate": 7.694847823049995e-06, + "loss": 0.6443513035774231, + "step": 9529 + }, + { + "epoch": 1.7609104872330843, + "grad_norm": 0.07423588633537292, + "learning_rate": 7.692906679229539e-06, + "loss": 0.4746418297290802, + "step": 9530 + }, + { + "epoch": 1.7610952639419801, + "grad_norm": 0.09016186743974686, + "learning_rate": 7.690965627224181e-06, + "loss": 0.45739561319351196, + "step": 9531 + }, + { + "epoch": 1.761280040650876, + "grad_norm": 0.07074403762817383, + "learning_rate": 7.689024667111167e-06, + "loss": 0.5079753994941711, + "step": 9532 + }, + { + "epoch": 1.7614648173597718, + "grad_norm": 0.07867482304573059, + "learning_rate": 7.687083798967739e-06, + "loss": 0.5321707129478455, + "step": 9533 + }, + { + "epoch": 1.7616495940686676, + "grad_norm": 0.08339225500822067, + "learning_rate": 7.68514302287114e-06, + "loss": 0.6382849812507629, + "step": 9534 + }, + { + "epoch": 1.7618343707775634, + "grad_norm": 0.06861494481563568, + "learning_rate": 7.68320233889861e-06, + "loss": 0.5510586500167847, + "step": 9535 + }, + { + "epoch": 1.7620191474864595, + "grad_norm": 0.08419942855834961, + "learning_rate": 7.681261747127375e-06, + "loss": 0.5666383504867554, + "step": 9536 + }, + { + "epoch": 1.7622039241953553, + "grad_norm": 0.07097092270851135, + "learning_rate": 7.679321247634667e-06, + "loss": 0.47546514868736267, + "step": 9537 + }, + { + "epoch": 1.7623887009042511, + "grad_norm": 0.0674498975276947, + "learning_rate": 7.67738084049772e-06, + "loss": 0.4514087736606598, + "step": 9538 + }, + { + "epoch": 1.762573477613147, + "grad_norm": 0.08018751442432404, + "learning_rate": 7.67544052579374e-06, + "loss": 0.4930320382118225, + "step": 9539 + }, + { + "epoch": 1.7627582543220428, + "grad_norm": 0.05618758499622345, + "learning_rate": 7.673500303599956e-06, + "loss": 0.3329872786998749, + "step": 9540 + }, + { + "epoch": 1.7629430310309386, + "grad_norm": 0.10689683258533478, + "learning_rate": 7.671560173993588e-06, + "loss": 0.677473783493042, + "step": 9541 + }, + { + "epoch": 1.7631278077398345, + "grad_norm": 0.08824285119771957, + "learning_rate": 7.669620137051835e-06, + "loss": 0.6107396483421326, + "step": 9542 + }, + { + "epoch": 1.7633125844487303, + "grad_norm": 0.08850574493408203, + "learning_rate": 7.667680192851912e-06, + "loss": 0.5533414483070374, + "step": 9543 + }, + { + "epoch": 1.7634973611576261, + "grad_norm": 0.07688694447278976, + "learning_rate": 7.665740341471017e-06, + "loss": 0.46593785285949707, + "step": 9544 + }, + { + "epoch": 1.763682137866522, + "grad_norm": 0.0715436115860939, + "learning_rate": 7.663800582986356e-06, + "loss": 0.3948075771331787, + "step": 9545 + }, + { + "epoch": 1.7638669145754178, + "grad_norm": 0.057816267013549805, + "learning_rate": 7.661860917475124e-06, + "loss": 0.368903785943985, + "step": 9546 + }, + { + "epoch": 1.7640516912843136, + "grad_norm": 0.08684322983026505, + "learning_rate": 7.659921345014509e-06, + "loss": 0.5822094082832336, + "step": 9547 + }, + { + "epoch": 1.7642364679932094, + "grad_norm": 0.09293843060731888, + "learning_rate": 7.657981865681704e-06, + "loss": 0.6093835234642029, + "step": 9548 + }, + { + "epoch": 1.7644212447021053, + "grad_norm": 0.07811232656240463, + "learning_rate": 7.656042479553896e-06, + "loss": 0.463060587644577, + "step": 9549 + }, + { + "epoch": 1.764606021411001, + "grad_norm": 0.07818163931369781, + "learning_rate": 7.654103186708262e-06, + "loss": 0.5459082126617432, + "step": 9550 + }, + { + "epoch": 1.764790798119897, + "grad_norm": 0.07132180780172348, + "learning_rate": 7.65216398722198e-06, + "loss": 0.4241693615913391, + "step": 9551 + }, + { + "epoch": 1.7649755748287927, + "grad_norm": 0.07287544012069702, + "learning_rate": 7.65022488117223e-06, + "loss": 0.49057701230049133, + "step": 9552 + }, + { + "epoch": 1.7651603515376886, + "grad_norm": 0.06782901287078857, + "learning_rate": 7.648285868636177e-06, + "loss": 0.4755297303199768, + "step": 9553 + }, + { + "epoch": 1.7653451282465844, + "grad_norm": 0.06399807333946228, + "learning_rate": 7.646346949690987e-06, + "loss": 0.35742703080177307, + "step": 9554 + }, + { + "epoch": 1.7655299049554802, + "grad_norm": 0.07754683494567871, + "learning_rate": 7.644408124413828e-06, + "loss": 0.6166073083877563, + "step": 9555 + }, + { + "epoch": 1.765714681664376, + "grad_norm": 0.0765303298830986, + "learning_rate": 7.642469392881852e-06, + "loss": 0.4563341736793518, + "step": 9556 + }, + { + "epoch": 1.765899458373272, + "grad_norm": 0.07518597692251205, + "learning_rate": 7.64053075517222e-06, + "loss": 0.49914613366127014, + "step": 9557 + }, + { + "epoch": 1.766084235082168, + "grad_norm": 0.07142817974090576, + "learning_rate": 7.638592211362086e-06, + "loss": 0.45610418915748596, + "step": 9558 + }, + { + "epoch": 1.7662690117910638, + "grad_norm": 0.07195606827735901, + "learning_rate": 7.63665376152859e-06, + "loss": 0.4759080410003662, + "step": 9559 + }, + { + "epoch": 1.7664537884999596, + "grad_norm": 0.07048597931861877, + "learning_rate": 7.634715405748881e-06, + "loss": 0.4257037341594696, + "step": 9560 + }, + { + "epoch": 1.7666385652088554, + "grad_norm": 0.08749891072511673, + "learning_rate": 7.632777144100099e-06, + "loss": 0.46232447028160095, + "step": 9561 + }, + { + "epoch": 1.7668233419177513, + "grad_norm": 0.06787905097007751, + "learning_rate": 7.630838976659379e-06, + "loss": 0.371409147977829, + "step": 9562 + }, + { + "epoch": 1.767008118626647, + "grad_norm": 0.07177285850048065, + "learning_rate": 7.628900903503858e-06, + "loss": 0.414307177066803, + "step": 9563 + }, + { + "epoch": 1.767192895335543, + "grad_norm": 0.0932188481092453, + "learning_rate": 7.62696292471066e-06, + "loss": 0.6285747289657593, + "step": 9564 + }, + { + "epoch": 1.767377672044439, + "grad_norm": 0.08144089579582214, + "learning_rate": 7.625025040356915e-06, + "loss": 0.4960605800151825, + "step": 9565 + }, + { + "epoch": 1.7675624487533348, + "grad_norm": 0.07047521322965622, + "learning_rate": 7.623087250519744e-06, + "loss": 0.35761797428131104, + "step": 9566 + }, + { + "epoch": 1.7677472254622306, + "grad_norm": 0.08805403113365173, + "learning_rate": 7.621149555276262e-06, + "loss": 0.5082883834838867, + "step": 9567 + }, + { + "epoch": 1.7679320021711264, + "grad_norm": 0.09301161766052246, + "learning_rate": 7.619211954703586e-06, + "loss": 0.6269007325172424, + "step": 9568 + }, + { + "epoch": 1.7681167788800223, + "grad_norm": 0.08129319548606873, + "learning_rate": 7.6172744488788276e-06, + "loss": 0.5256569981575012, + "step": 9569 + }, + { + "epoch": 1.768301555588918, + "grad_norm": 0.05803351476788521, + "learning_rate": 7.615337037879089e-06, + "loss": 0.37849074602127075, + "step": 9570 + }, + { + "epoch": 1.768486332297814, + "grad_norm": 0.08406578749418259, + "learning_rate": 7.613399721781476e-06, + "loss": 0.5408613681793213, + "step": 9571 + }, + { + "epoch": 1.7686711090067098, + "grad_norm": 0.0749550610780716, + "learning_rate": 7.6114625006630885e-06, + "loss": 0.5040973424911499, + "step": 9572 + }, + { + "epoch": 1.7688558857156056, + "grad_norm": 0.06905291229486465, + "learning_rate": 7.609525374601019e-06, + "loss": 0.462920606136322, + "step": 9573 + }, + { + "epoch": 1.7690406624245014, + "grad_norm": 0.09600643068552017, + "learning_rate": 7.607588343672361e-06, + "loss": 0.7481486201286316, + "step": 9574 + }, + { + "epoch": 1.7692254391333972, + "grad_norm": 0.1076044961810112, + "learning_rate": 7.605651407954207e-06, + "loss": 0.713120698928833, + "step": 9575 + }, + { + "epoch": 1.769410215842293, + "grad_norm": 0.08154827356338501, + "learning_rate": 7.603714567523629e-06, + "loss": 0.5379562973976135, + "step": 9576 + }, + { + "epoch": 1.769594992551189, + "grad_norm": 0.07339855283498764, + "learning_rate": 7.60177782245772e-06, + "loss": 0.4116850197315216, + "step": 9577 + }, + { + "epoch": 1.7697797692600847, + "grad_norm": 0.07978306710720062, + "learning_rate": 7.599841172833548e-06, + "loss": 0.49824514985084534, + "step": 9578 + }, + { + "epoch": 1.7699645459689806, + "grad_norm": 0.07998265326023102, + "learning_rate": 7.597904618728187e-06, + "loss": 0.5987973809242249, + "step": 9579 + }, + { + "epoch": 1.7701493226778764, + "grad_norm": 0.07898522913455963, + "learning_rate": 7.5959681602187085e-06, + "loss": 0.47599172592163086, + "step": 9580 + }, + { + "epoch": 1.7703340993867722, + "grad_norm": 0.08431115746498108, + "learning_rate": 7.594031797382174e-06, + "loss": 0.5221173763275146, + "step": 9581 + }, + { + "epoch": 1.770518876095668, + "grad_norm": 0.07711915671825409, + "learning_rate": 7.592095530295648e-06, + "loss": 0.5558249950408936, + "step": 9582 + }, + { + "epoch": 1.7707036528045639, + "grad_norm": 0.09012097120285034, + "learning_rate": 7.590159359036188e-06, + "loss": 0.7073286771774292, + "step": 9583 + }, + { + "epoch": 1.7708884295134597, + "grad_norm": 0.07047846913337708, + "learning_rate": 7.588223283680844e-06, + "loss": 0.5091392993927002, + "step": 9584 + }, + { + "epoch": 1.7710732062223555, + "grad_norm": 0.07714288681745529, + "learning_rate": 7.586287304306667e-06, + "loss": 0.5538867115974426, + "step": 9585 + }, + { + "epoch": 1.7712579829312514, + "grad_norm": 0.0795782133936882, + "learning_rate": 7.584351420990707e-06, + "loss": 0.4915255606174469, + "step": 9586 + }, + { + "epoch": 1.7714427596401474, + "grad_norm": 0.08537991344928741, + "learning_rate": 7.58241563381e-06, + "loss": 0.48552417755126953, + "step": 9587 + }, + { + "epoch": 1.7716275363490432, + "grad_norm": 0.10023138672113419, + "learning_rate": 7.5804799428415865e-06, + "loss": 0.6737853288650513, + "step": 9588 + }, + { + "epoch": 1.771812313057939, + "grad_norm": 0.07969482243061066, + "learning_rate": 7.578544348162504e-06, + "loss": 0.5007420182228088, + "step": 9589 + }, + { + "epoch": 1.771997089766835, + "grad_norm": 0.09847646951675415, + "learning_rate": 7.5766088498497805e-06, + "loss": 0.6564161777496338, + "step": 9590 + }, + { + "epoch": 1.7721818664757307, + "grad_norm": 0.07532694935798645, + "learning_rate": 7.574673447980441e-06, + "loss": 0.5126252174377441, + "step": 9591 + }, + { + "epoch": 1.7723666431846266, + "grad_norm": 0.09312979876995087, + "learning_rate": 7.572738142631513e-06, + "loss": 0.6322164535522461, + "step": 9592 + }, + { + "epoch": 1.7725514198935224, + "grad_norm": 0.06892203539609909, + "learning_rate": 7.5708029338800104e-06, + "loss": 0.3911009132862091, + "step": 9593 + }, + { + "epoch": 1.7727361966024182, + "grad_norm": 0.07569395750761032, + "learning_rate": 7.5688678218029564e-06, + "loss": 0.4439738094806671, + "step": 9594 + }, + { + "epoch": 1.7729209733113143, + "grad_norm": 0.07597517222166061, + "learning_rate": 7.5669328064773515e-06, + "loss": 0.43144547939300537, + "step": 9595 + }, + { + "epoch": 1.77310575002021, + "grad_norm": 0.0796041339635849, + "learning_rate": 7.564997887980208e-06, + "loss": 0.5145467519760132, + "step": 9596 + }, + { + "epoch": 1.773290526729106, + "grad_norm": 0.06158768758177757, + "learning_rate": 7.563063066388537e-06, + "loss": 0.4237648546695709, + "step": 9597 + }, + { + "epoch": 1.7734753034380017, + "grad_norm": 0.06502994149923325, + "learning_rate": 7.561128341779327e-06, + "loss": 0.41174840927124023, + "step": 9598 + }, + { + "epoch": 1.7736600801468976, + "grad_norm": 0.069393090903759, + "learning_rate": 7.5591937142295775e-06, + "loss": 0.4664877951145172, + "step": 9599 + }, + { + "epoch": 1.7738448568557934, + "grad_norm": 0.06445951014757156, + "learning_rate": 7.557259183816286e-06, + "loss": 0.45197027921676636, + "step": 9600 + }, + { + "epoch": 1.7740296335646892, + "grad_norm": 0.09038061648607254, + "learning_rate": 7.555324750616433e-06, + "loss": 0.5763864517211914, + "step": 9601 + }, + { + "epoch": 1.774214410273585, + "grad_norm": 0.0962783694267273, + "learning_rate": 7.553390414707007e-06, + "loss": 0.7027230262756348, + "step": 9602 + }, + { + "epoch": 1.774399186982481, + "grad_norm": 0.08621051907539368, + "learning_rate": 7.551456176164989e-06, + "loss": 0.5542473793029785, + "step": 9603 + }, + { + "epoch": 1.7745839636913767, + "grad_norm": 0.07505802810192108, + "learning_rate": 7.549522035067355e-06, + "loss": 0.4667530953884125, + "step": 9604 + }, + { + "epoch": 1.7747687404002725, + "grad_norm": 0.09039128571748734, + "learning_rate": 7.5475879914910755e-06, + "loss": 0.6583998799324036, + "step": 9605 + }, + { + "epoch": 1.7749535171091684, + "grad_norm": 0.07745156437158585, + "learning_rate": 7.545654045513125e-06, + "loss": 0.5156211853027344, + "step": 9606 + }, + { + "epoch": 1.7751382938180642, + "grad_norm": 0.08875171840190887, + "learning_rate": 7.543720197210461e-06, + "loss": 0.5398255586624146, + "step": 9607 + }, + { + "epoch": 1.77532307052696, + "grad_norm": 0.08882952481508255, + "learning_rate": 7.541786446660051e-06, + "loss": 0.5149835348129272, + "step": 9608 + }, + { + "epoch": 1.7755078472358559, + "grad_norm": 0.06775467097759247, + "learning_rate": 7.5398527939388485e-06, + "loss": 0.45543473958969116, + "step": 9609 + }, + { + "epoch": 1.7756926239447517, + "grad_norm": 0.07815680652856827, + "learning_rate": 7.537919239123808e-06, + "loss": 0.46514320373535156, + "step": 9610 + }, + { + "epoch": 1.7758774006536475, + "grad_norm": 0.07627175003290176, + "learning_rate": 7.5359857822918814e-06, + "loss": 0.46797317266464233, + "step": 9611 + }, + { + "epoch": 1.7760621773625433, + "grad_norm": 0.07827355712652206, + "learning_rate": 7.534052423520007e-06, + "loss": 0.5280380249023438, + "step": 9612 + }, + { + "epoch": 1.7762469540714392, + "grad_norm": 0.07887908071279526, + "learning_rate": 7.5321191628851335e-06, + "loss": 0.5931577682495117, + "step": 9613 + }, + { + "epoch": 1.776431730780335, + "grad_norm": 0.067986860871315, + "learning_rate": 7.530186000464199e-06, + "loss": 0.45303210616111755, + "step": 9614 + }, + { + "epoch": 1.7766165074892308, + "grad_norm": 0.0722842738032341, + "learning_rate": 7.5282529363341316e-06, + "loss": 0.4417860507965088, + "step": 9615 + }, + { + "epoch": 1.7768012841981267, + "grad_norm": 0.0893688052892685, + "learning_rate": 7.526319970571861e-06, + "loss": 0.5766925811767578, + "step": 9616 + }, + { + "epoch": 1.7769860609070227, + "grad_norm": 0.08695358783006668, + "learning_rate": 7.524387103254325e-06, + "loss": 0.5963427424430847, + "step": 9617 + }, + { + "epoch": 1.7771708376159185, + "grad_norm": 0.0718984454870224, + "learning_rate": 7.522454334458431e-06, + "loss": 0.5042772889137268, + "step": 9618 + }, + { + "epoch": 1.7773556143248144, + "grad_norm": 0.08929789811372757, + "learning_rate": 7.520521664261103e-06, + "loss": 0.4746643602848053, + "step": 9619 + }, + { + "epoch": 1.7775403910337102, + "grad_norm": 0.09637338668107986, + "learning_rate": 7.518589092739259e-06, + "loss": 0.6487293243408203, + "step": 9620 + }, + { + "epoch": 1.777725167742606, + "grad_norm": 0.08786317706108093, + "learning_rate": 7.516656619969802e-06, + "loss": 0.43691861629486084, + "step": 9621 + }, + { + "epoch": 1.7779099444515019, + "grad_norm": 0.0733262300491333, + "learning_rate": 7.514724246029643e-06, + "loss": 0.45732972025871277, + "step": 9622 + }, + { + "epoch": 1.7780947211603977, + "grad_norm": 0.05736264958977699, + "learning_rate": 7.512791970995686e-06, + "loss": 0.35856419801712036, + "step": 9623 + }, + { + "epoch": 1.7782794978692937, + "grad_norm": 0.08865299820899963, + "learning_rate": 7.510859794944825e-06, + "loss": 0.5417463183403015, + "step": 9624 + }, + { + "epoch": 1.7784642745781896, + "grad_norm": 0.07106424868106842, + "learning_rate": 7.508927717953959e-06, + "loss": 0.4163905680179596, + "step": 9625 + }, + { + "epoch": 1.7786490512870854, + "grad_norm": 0.07050973176956177, + "learning_rate": 7.506995740099974e-06, + "loss": 0.376446932554245, + "step": 9626 + }, + { + "epoch": 1.7788338279959812, + "grad_norm": 0.07406870275735855, + "learning_rate": 7.505063861459758e-06, + "loss": 0.5682767033576965, + "step": 9627 + }, + { + "epoch": 1.779018604704877, + "grad_norm": 0.08051006495952606, + "learning_rate": 7.503132082110197e-06, + "loss": 0.5184383392333984, + "step": 9628 + }, + { + "epoch": 1.7792033814137729, + "grad_norm": 0.07349243015050888, + "learning_rate": 7.501200402128166e-06, + "loss": 0.4858967661857605, + "step": 9629 + }, + { + "epoch": 1.7793881581226687, + "grad_norm": 0.08572787046432495, + "learning_rate": 7.499268821590541e-06, + "loss": 0.5259782075881958, + "step": 9630 + }, + { + "epoch": 1.7795729348315645, + "grad_norm": 0.08060348778963089, + "learning_rate": 7.497337340574197e-06, + "loss": 0.6207387447357178, + "step": 9631 + }, + { + "epoch": 1.7797577115404604, + "grad_norm": 0.0894874706864357, + "learning_rate": 7.495405959155992e-06, + "loss": 0.6868194937705994, + "step": 9632 + }, + { + "epoch": 1.7799424882493562, + "grad_norm": 0.06732596457004547, + "learning_rate": 7.493474677412795e-06, + "loss": 0.4337766766548157, + "step": 9633 + }, + { + "epoch": 1.780127264958252, + "grad_norm": 0.0839940533041954, + "learning_rate": 7.491543495421468e-06, + "loss": 0.5468871593475342, + "step": 9634 + }, + { + "epoch": 1.7803120416671478, + "grad_norm": 0.07228980213403702, + "learning_rate": 7.489612413258858e-06, + "loss": 0.4850339889526367, + "step": 9635 + }, + { + "epoch": 1.7804968183760437, + "grad_norm": 0.08797910809516907, + "learning_rate": 7.4876814310018164e-06, + "loss": 0.5503541827201843, + "step": 9636 + }, + { + "epoch": 1.7806815950849395, + "grad_norm": 0.06826333701610565, + "learning_rate": 7.485750548727202e-06, + "loss": 0.4150521457195282, + "step": 9637 + }, + { + "epoch": 1.7808663717938353, + "grad_norm": 0.05818479508161545, + "learning_rate": 7.483819766511845e-06, + "loss": 0.3105936050415039, + "step": 9638 + }, + { + "epoch": 1.7810511485027312, + "grad_norm": 0.07861250638961792, + "learning_rate": 7.481889084432588e-06, + "loss": 0.5360723733901978, + "step": 9639 + }, + { + "epoch": 1.781235925211627, + "grad_norm": 0.08440329134464264, + "learning_rate": 7.479958502566271e-06, + "loss": 0.5286691188812256, + "step": 9640 + }, + { + "epoch": 1.7814207019205228, + "grad_norm": 0.0839398205280304, + "learning_rate": 7.47802802098972e-06, + "loss": 0.6164692640304565, + "step": 9641 + }, + { + "epoch": 1.7816054786294186, + "grad_norm": 0.07791807502508163, + "learning_rate": 7.476097639779763e-06, + "loss": 0.4462954103946686, + "step": 9642 + }, + { + "epoch": 1.7817902553383145, + "grad_norm": 0.09706820547580719, + "learning_rate": 7.474167359013223e-06, + "loss": 0.7078897356987, + "step": 9643 + }, + { + "epoch": 1.7819750320472103, + "grad_norm": 0.08299513906240463, + "learning_rate": 7.47223717876692e-06, + "loss": 0.6251681447029114, + "step": 9644 + }, + { + "epoch": 1.7821598087561061, + "grad_norm": 0.06504345685243607, + "learning_rate": 7.4703070991176706e-06, + "loss": 0.3207865357398987, + "step": 9645 + }, + { + "epoch": 1.7823445854650022, + "grad_norm": 0.07922597974538803, + "learning_rate": 7.468377120142282e-06, + "loss": 0.5469554662704468, + "step": 9646 + }, + { + "epoch": 1.782529362173898, + "grad_norm": 0.07841379940509796, + "learning_rate": 7.4664472419175645e-06, + "loss": 0.4845714569091797, + "step": 9647 + }, + { + "epoch": 1.7827141388827938, + "grad_norm": 0.061545539647340775, + "learning_rate": 7.464517464520322e-06, + "loss": 0.4248470664024353, + "step": 9648 + }, + { + "epoch": 1.7828989155916897, + "grad_norm": 0.088067926466465, + "learning_rate": 7.46258778802735e-06, + "loss": 0.6738609075546265, + "step": 9649 + }, + { + "epoch": 1.7830836923005855, + "grad_norm": 0.0731796994805336, + "learning_rate": 7.460658212515445e-06, + "loss": 0.4305770695209503, + "step": 9650 + }, + { + "epoch": 1.7832684690094813, + "grad_norm": 0.07577754557132721, + "learning_rate": 7.458728738061402e-06, + "loss": 0.5598363876342773, + "step": 9651 + }, + { + "epoch": 1.7834532457183772, + "grad_norm": 0.08362109959125519, + "learning_rate": 7.456799364742e-06, + "loss": 0.514435887336731, + "step": 9652 + }, + { + "epoch": 1.7836380224272732, + "grad_norm": 0.07791508734226227, + "learning_rate": 7.454870092634028e-06, + "loss": 0.48999202251434326, + "step": 9653 + }, + { + "epoch": 1.783822799136169, + "grad_norm": 0.07102424651384354, + "learning_rate": 7.452940921814268e-06, + "loss": 0.46250784397125244, + "step": 9654 + }, + { + "epoch": 1.7840075758450649, + "grad_norm": 0.07442230731248856, + "learning_rate": 7.451011852359486e-06, + "loss": 0.5393667817115784, + "step": 9655 + }, + { + "epoch": 1.7841923525539607, + "grad_norm": 0.0678488239645958, + "learning_rate": 7.449082884346455e-06, + "loss": 0.48200443387031555, + "step": 9656 + }, + { + "epoch": 1.7843771292628565, + "grad_norm": 0.0755557268857956, + "learning_rate": 7.447154017851952e-06, + "loss": 0.534457266330719, + "step": 9657 + }, + { + "epoch": 1.7845619059717523, + "grad_norm": 0.08040129393339157, + "learning_rate": 7.4452252529527266e-06, + "loss": 0.4577656090259552, + "step": 9658 + }, + { + "epoch": 1.7847466826806482, + "grad_norm": 0.08037807047367096, + "learning_rate": 7.443296589725546e-06, + "loss": 0.5453135371208191, + "step": 9659 + }, + { + "epoch": 1.784931459389544, + "grad_norm": 0.0812598168849945, + "learning_rate": 7.44136802824716e-06, + "loss": 0.6238100528717041, + "step": 9660 + }, + { + "epoch": 1.7851162360984398, + "grad_norm": 0.07747691124677658, + "learning_rate": 7.439439568594322e-06, + "loss": 0.4280628561973572, + "step": 9661 + }, + { + "epoch": 1.7853010128073357, + "grad_norm": 0.09302282333374023, + "learning_rate": 7.4375112108437805e-06, + "loss": 0.6061352491378784, + "step": 9662 + }, + { + "epoch": 1.7854857895162315, + "grad_norm": 0.09619981795549393, + "learning_rate": 7.435582955072274e-06, + "loss": 0.6168282628059387, + "step": 9663 + }, + { + "epoch": 1.7856705662251273, + "grad_norm": 0.1117103174328804, + "learning_rate": 7.433654801356543e-06, + "loss": 0.7298385500907898, + "step": 9664 + }, + { + "epoch": 1.7858553429340231, + "grad_norm": 0.07348813861608505, + "learning_rate": 7.431726749773322e-06, + "loss": 0.4037703275680542, + "step": 9665 + }, + { + "epoch": 1.786040119642919, + "grad_norm": 0.09630374610424042, + "learning_rate": 7.429798800399339e-06, + "loss": 0.5390593409538269, + "step": 9666 + }, + { + "epoch": 1.7862248963518148, + "grad_norm": 0.08244305104017258, + "learning_rate": 7.427870953311325e-06, + "loss": 0.5543193221092224, + "step": 9667 + }, + { + "epoch": 1.7864096730607106, + "grad_norm": 0.06571978330612183, + "learning_rate": 7.425943208586001e-06, + "loss": 0.40200668573379517, + "step": 9668 + }, + { + "epoch": 1.7865944497696065, + "grad_norm": 0.10474611818790436, + "learning_rate": 7.424015566300082e-06, + "loss": 0.597493052482605, + "step": 9669 + }, + { + "epoch": 1.7867792264785023, + "grad_norm": 0.07248591631650925, + "learning_rate": 7.422088026530283e-06, + "loss": 0.45960649847984314, + "step": 9670 + }, + { + "epoch": 1.7869640031873981, + "grad_norm": 0.08366648852825165, + "learning_rate": 7.420160589353321e-06, + "loss": 0.5246530771255493, + "step": 9671 + }, + { + "epoch": 1.787148779896294, + "grad_norm": 0.06871534883975983, + "learning_rate": 7.41823325484589e-06, + "loss": 0.4420692026615143, + "step": 9672 + }, + { + "epoch": 1.7873335566051898, + "grad_norm": 0.06331802904605865, + "learning_rate": 7.416306023084704e-06, + "loss": 0.3460390865802765, + "step": 9673 + }, + { + "epoch": 1.7875183333140856, + "grad_norm": 0.09252341091632843, + "learning_rate": 7.41437889414645e-06, + "loss": 0.5817192792892456, + "step": 9674 + }, + { + "epoch": 1.7877031100229817, + "grad_norm": 0.09684668481349945, + "learning_rate": 7.412451868107828e-06, + "loss": 0.5826550722122192, + "step": 9675 + }, + { + "epoch": 1.7878878867318775, + "grad_norm": 0.08396687358617783, + "learning_rate": 7.410524945045528e-06, + "loss": 0.6268729567527771, + "step": 9676 + }, + { + "epoch": 1.7880726634407733, + "grad_norm": 0.049349989742040634, + "learning_rate": 7.408598125036231e-06, + "loss": 0.2656620442867279, + "step": 9677 + }, + { + "epoch": 1.7882574401496691, + "grad_norm": 0.07575535774230957, + "learning_rate": 7.4066714081566225e-06, + "loss": 0.409138560295105, + "step": 9678 + }, + { + "epoch": 1.788442216858565, + "grad_norm": 0.08824644237756729, + "learning_rate": 7.404744794483378e-06, + "loss": 0.5588775873184204, + "step": 9679 + }, + { + "epoch": 1.7886269935674608, + "grad_norm": 0.06662043184041977, + "learning_rate": 7.4028182840931714e-06, + "loss": 0.3633524477481842, + "step": 9680 + }, + { + "epoch": 1.7888117702763566, + "grad_norm": 0.09043443202972412, + "learning_rate": 7.400891877062672e-06, + "loss": 0.623502254486084, + "step": 9681 + }, + { + "epoch": 1.7889965469852525, + "grad_norm": 0.07488062232732773, + "learning_rate": 7.398965573468544e-06, + "loss": 0.4273262619972229, + "step": 9682 + }, + { + "epoch": 1.7891813236941485, + "grad_norm": 0.07975243777036667, + "learning_rate": 7.397039373387449e-06, + "loss": 0.47635743021965027, + "step": 9683 + }, + { + "epoch": 1.7893661004030443, + "grad_norm": 0.08129294961690903, + "learning_rate": 7.395113276896042e-06, + "loss": 0.5076517462730408, + "step": 9684 + }, + { + "epoch": 1.7895508771119402, + "grad_norm": 0.07039433717727661, + "learning_rate": 7.393187284070979e-06, + "loss": 0.45592862367630005, + "step": 9685 + }, + { + "epoch": 1.789735653820836, + "grad_norm": 0.07881192862987518, + "learning_rate": 7.391261394988904e-06, + "loss": 0.4758132994174957, + "step": 9686 + }, + { + "epoch": 1.7899204305297318, + "grad_norm": 0.0708584189414978, + "learning_rate": 7.389335609726464e-06, + "loss": 0.4376681447029114, + "step": 9687 + }, + { + "epoch": 1.7901052072386276, + "grad_norm": 0.08312217891216278, + "learning_rate": 7.387409928360302e-06, + "loss": 0.5108171105384827, + "step": 9688 + }, + { + "epoch": 1.7902899839475235, + "grad_norm": 0.06353427469730377, + "learning_rate": 7.385484350967048e-06, + "loss": 0.3415788412094116, + "step": 9689 + }, + { + "epoch": 1.7904747606564193, + "grad_norm": 0.08577631413936615, + "learning_rate": 7.383558877623342e-06, + "loss": 0.5608941316604614, + "step": 9690 + }, + { + "epoch": 1.7906595373653151, + "grad_norm": 0.07521551102399826, + "learning_rate": 7.381633508405802e-06, + "loss": 0.4245568513870239, + "step": 9691 + }, + { + "epoch": 1.790844314074211, + "grad_norm": 0.09310653805732727, + "learning_rate": 7.379708243391055e-06, + "loss": 0.602554440498352, + "step": 9692 + }, + { + "epoch": 1.7910290907831068, + "grad_norm": 0.09033379703760147, + "learning_rate": 7.377783082655727e-06, + "loss": 0.5871620178222656, + "step": 9693 + }, + { + "epoch": 1.7912138674920026, + "grad_norm": 0.06903165578842163, + "learning_rate": 7.375858026276426e-06, + "loss": 0.3480731248855591, + "step": 9694 + }, + { + "epoch": 1.7913986442008984, + "grad_norm": 0.1019633412361145, + "learning_rate": 7.373933074329765e-06, + "loss": 0.6741067171096802, + "step": 9695 + }, + { + "epoch": 1.7915834209097943, + "grad_norm": 0.07089679688215256, + "learning_rate": 7.372008226892354e-06, + "loss": 0.40935018658638, + "step": 9696 + }, + { + "epoch": 1.79176819761869, + "grad_norm": 0.07195250689983368, + "learning_rate": 7.370083484040792e-06, + "loss": 0.3602537214756012, + "step": 9697 + }, + { + "epoch": 1.791952974327586, + "grad_norm": 0.06774020940065384, + "learning_rate": 7.368158845851679e-06, + "loss": 0.35686254501342773, + "step": 9698 + }, + { + "epoch": 1.7921377510364818, + "grad_norm": 0.0797189399600029, + "learning_rate": 7.366234312401611e-06, + "loss": 0.5029522776603699, + "step": 9699 + }, + { + "epoch": 1.7923225277453776, + "grad_norm": 0.07402968406677246, + "learning_rate": 7.364309883767177e-06, + "loss": 0.36364853382110596, + "step": 9700 + }, + { + "epoch": 1.7925073044542734, + "grad_norm": 0.06966808438301086, + "learning_rate": 7.362385560024963e-06, + "loss": 0.3557095229625702, + "step": 9701 + }, + { + "epoch": 1.7926920811631692, + "grad_norm": 0.08454054594039917, + "learning_rate": 7.360461341251552e-06, + "loss": 0.46431073546409607, + "step": 9702 + }, + { + "epoch": 1.792876857872065, + "grad_norm": 0.07569453120231628, + "learning_rate": 7.358537227523521e-06, + "loss": 0.4756021201610565, + "step": 9703 + }, + { + "epoch": 1.793061634580961, + "grad_norm": 0.09158246964216232, + "learning_rate": 7.356613218917445e-06, + "loss": 0.4759051501750946, + "step": 9704 + }, + { + "epoch": 1.793246411289857, + "grad_norm": 0.06996876746416092, + "learning_rate": 7.354689315509894e-06, + "loss": 0.40846434235572815, + "step": 9705 + }, + { + "epoch": 1.7934311879987528, + "grad_norm": 0.09398932754993439, + "learning_rate": 7.3527655173774306e-06, + "loss": 0.5916178822517395, + "step": 9706 + }, + { + "epoch": 1.7936159647076486, + "grad_norm": 0.08205067366361618, + "learning_rate": 7.350841824596622e-06, + "loss": 0.4599655568599701, + "step": 9707 + }, + { + "epoch": 1.7938007414165444, + "grad_norm": 0.0769825279712677, + "learning_rate": 7.3489182372440124e-06, + "loss": 0.37565580010414124, + "step": 9708 + }, + { + "epoch": 1.7939855181254403, + "grad_norm": 0.0755021944642067, + "learning_rate": 7.3469947553961665e-06, + "loss": 0.44360727071762085, + "step": 9709 + }, + { + "epoch": 1.794170294834336, + "grad_norm": 0.12141609936952591, + "learning_rate": 7.345071379129632e-06, + "loss": 0.6779170036315918, + "step": 9710 + }, + { + "epoch": 1.794355071543232, + "grad_norm": 0.06998870521783829, + "learning_rate": 7.343148108520948e-06, + "loss": 0.5503925681114197, + "step": 9711 + }, + { + "epoch": 1.794539848252128, + "grad_norm": 0.07839400321245193, + "learning_rate": 7.341224943646654e-06, + "loss": 0.616855263710022, + "step": 9712 + }, + { + "epoch": 1.7947246249610238, + "grad_norm": 0.07722536474466324, + "learning_rate": 7.3393018845832955e-06, + "loss": 0.5017062425613403, + "step": 9713 + }, + { + "epoch": 1.7949094016699196, + "grad_norm": 0.06142596900463104, + "learning_rate": 7.3373789314073925e-06, + "loss": 0.35877326130867004, + "step": 9714 + }, + { + "epoch": 1.7950941783788155, + "grad_norm": 0.0714077427983284, + "learning_rate": 7.335456084195479e-06, + "loss": 0.3872145414352417, + "step": 9715 + }, + { + "epoch": 1.7952789550877113, + "grad_norm": 0.0684652104973793, + "learning_rate": 7.33353334302408e-06, + "loss": 0.40973180532455444, + "step": 9716 + }, + { + "epoch": 1.7954637317966071, + "grad_norm": 0.07915206253528595, + "learning_rate": 7.331610707969707e-06, + "loss": 0.5760145783424377, + "step": 9717 + }, + { + "epoch": 1.795648508505503, + "grad_norm": 0.08069485425949097, + "learning_rate": 7.329688179108882e-06, + "loss": 0.5191196799278259, + "step": 9718 + }, + { + "epoch": 1.7958332852143988, + "grad_norm": 0.06689658761024475, + "learning_rate": 7.327765756518113e-06, + "loss": 0.37917643785476685, + "step": 9719 + }, + { + "epoch": 1.7960180619232946, + "grad_norm": 0.06615526229143143, + "learning_rate": 7.325843440273905e-06, + "loss": 0.3649718761444092, + "step": 9720 + }, + { + "epoch": 1.7962028386321904, + "grad_norm": 0.07950661331415176, + "learning_rate": 7.323921230452764e-06, + "loss": 0.4523574113845825, + "step": 9721 + }, + { + "epoch": 1.7963876153410863, + "grad_norm": 0.0933559387922287, + "learning_rate": 7.321999127131185e-06, + "loss": 0.5563049912452698, + "step": 9722 + }, + { + "epoch": 1.796572392049982, + "grad_norm": 0.0657040923833847, + "learning_rate": 7.320077130385661e-06, + "loss": 0.4413251578807831, + "step": 9723 + }, + { + "epoch": 1.796757168758878, + "grad_norm": 0.086649589240551, + "learning_rate": 7.318155240292686e-06, + "loss": 0.5430909991264343, + "step": 9724 + }, + { + "epoch": 1.7969419454677737, + "grad_norm": 0.0868072658777237, + "learning_rate": 7.316233456928738e-06, + "loss": 0.4656325876712799, + "step": 9725 + }, + { + "epoch": 1.7971267221766696, + "grad_norm": 0.09229880571365356, + "learning_rate": 7.3143117803703046e-06, + "loss": 0.5382356643676758, + "step": 9726 + }, + { + "epoch": 1.7973114988855654, + "grad_norm": 0.08575570583343506, + "learning_rate": 7.312390210693863e-06, + "loss": 0.5168148875236511, + "step": 9727 + }, + { + "epoch": 1.7974962755944612, + "grad_norm": 0.08345237374305725, + "learning_rate": 7.310468747975875e-06, + "loss": 0.5733045339584351, + "step": 9728 + }, + { + "epoch": 1.797681052303357, + "grad_norm": 0.07230813056230545, + "learning_rate": 7.30854739229282e-06, + "loss": 0.3298667371273041, + "step": 9729 + }, + { + "epoch": 1.797865829012253, + "grad_norm": 0.11275004595518112, + "learning_rate": 7.306626143721161e-06, + "loss": 0.6309958100318909, + "step": 9730 + }, + { + "epoch": 1.7980506057211487, + "grad_norm": 0.07427927106618881, + "learning_rate": 7.304705002337351e-06, + "loss": 0.45528972148895264, + "step": 9731 + }, + { + "epoch": 1.7982353824300445, + "grad_norm": 0.07842516154050827, + "learning_rate": 7.3027839682178485e-06, + "loss": 0.5899828672409058, + "step": 9732 + }, + { + "epoch": 1.7984201591389404, + "grad_norm": 0.06140856072306633, + "learning_rate": 7.300863041439113e-06, + "loss": 0.42100775241851807, + "step": 9733 + }, + { + "epoch": 1.7986049358478364, + "grad_norm": 0.09284207969903946, + "learning_rate": 7.298942222077576e-06, + "loss": 0.5992365479469299, + "step": 9734 + }, + { + "epoch": 1.7987897125567323, + "grad_norm": 0.074347585439682, + "learning_rate": 7.297021510209689e-06, + "loss": 0.3827507197856903, + "step": 9735 + }, + { + "epoch": 1.798974489265628, + "grad_norm": 0.08603885769844055, + "learning_rate": 7.295100905911894e-06, + "loss": 0.5358006954193115, + "step": 9736 + }, + { + "epoch": 1.799159265974524, + "grad_norm": 0.07486552000045776, + "learning_rate": 7.293180409260617e-06, + "loss": 0.6258729100227356, + "step": 9737 + }, + { + "epoch": 1.7993440426834197, + "grad_norm": 0.09046154469251633, + "learning_rate": 7.291260020332294e-06, + "loss": 0.5852406620979309, + "step": 9738 + }, + { + "epoch": 1.7995288193923156, + "grad_norm": 0.07911182194948196, + "learning_rate": 7.289339739203344e-06, + "loss": 0.4991699755191803, + "step": 9739 + }, + { + "epoch": 1.7997135961012114, + "grad_norm": 0.0798080712556839, + "learning_rate": 7.287419565950193e-06, + "loss": 0.4510963559150696, + "step": 9740 + }, + { + "epoch": 1.7998983728101074, + "grad_norm": 0.07900462299585342, + "learning_rate": 7.285499500649258e-06, + "loss": 0.5157014727592468, + "step": 9741 + }, + { + "epoch": 1.8000831495190033, + "grad_norm": 0.06722088903188705, + "learning_rate": 7.283579543376948e-06, + "loss": 0.3551257848739624, + "step": 9742 + }, + { + "epoch": 1.800267926227899, + "grad_norm": 0.06953731924295425, + "learning_rate": 7.281659694209674e-06, + "loss": 0.44554603099823, + "step": 9743 + }, + { + "epoch": 1.800452702936795, + "grad_norm": 0.07420971989631653, + "learning_rate": 7.279739953223841e-06, + "loss": 0.46750375628471375, + "step": 9744 + }, + { + "epoch": 1.8006374796456908, + "grad_norm": 0.06312204152345657, + "learning_rate": 7.277820320495846e-06, + "loss": 0.33511245250701904, + "step": 9745 + }, + { + "epoch": 1.8008222563545866, + "grad_norm": 0.06883435696363449, + "learning_rate": 7.275900796102087e-06, + "loss": 0.4391895532608032, + "step": 9746 + }, + { + "epoch": 1.8010070330634824, + "grad_norm": 0.06549646705389023, + "learning_rate": 7.2739813801189556e-06, + "loss": 0.3687029778957367, + "step": 9747 + }, + { + "epoch": 1.8011918097723782, + "grad_norm": 0.0845990851521492, + "learning_rate": 7.272062072622831e-06, + "loss": 0.5504123568534851, + "step": 9748 + }, + { + "epoch": 1.801376586481274, + "grad_norm": 0.07709339261054993, + "learning_rate": 7.270142873690103e-06, + "loss": 0.4302341938018799, + "step": 9749 + }, + { + "epoch": 1.80156136319017, + "grad_norm": 0.09314499795436859, + "learning_rate": 7.268223783397152e-06, + "loss": 0.5776973962783813, + "step": 9750 + }, + { + "epoch": 1.8017461398990657, + "grad_norm": 0.07090350985527039, + "learning_rate": 7.266304801820346e-06, + "loss": 0.41678741574287415, + "step": 9751 + }, + { + "epoch": 1.8019309166079616, + "grad_norm": 0.054357532411813736, + "learning_rate": 7.264385929036052e-06, + "loss": 0.30052199959754944, + "step": 9752 + }, + { + "epoch": 1.8021156933168574, + "grad_norm": 0.09315387904644012, + "learning_rate": 7.262467165120646e-06, + "loss": 0.5855849981307983, + "step": 9753 + }, + { + "epoch": 1.8023004700257532, + "grad_norm": 0.06688593327999115, + "learning_rate": 7.260548510150478e-06, + "loss": 0.4450649619102478, + "step": 9754 + }, + { + "epoch": 1.802485246734649, + "grad_norm": 0.07729198038578033, + "learning_rate": 7.258629964201911e-06, + "loss": 0.5191230177879333, + "step": 9755 + }, + { + "epoch": 1.8026700234435449, + "grad_norm": 0.08478415757417679, + "learning_rate": 7.256711527351292e-06, + "loss": 0.5620251893997192, + "step": 9756 + }, + { + "epoch": 1.8028548001524407, + "grad_norm": 0.06856848299503326, + "learning_rate": 7.2547931996749734e-06, + "loss": 0.382502019405365, + "step": 9757 + }, + { + "epoch": 1.8030395768613365, + "grad_norm": 0.09224586188793182, + "learning_rate": 7.252874981249297e-06, + "loss": 0.5560954809188843, + "step": 9758 + }, + { + "epoch": 1.8032243535702324, + "grad_norm": 0.07440272718667984, + "learning_rate": 7.250956872150601e-06, + "loss": 0.4763925075531006, + "step": 9759 + }, + { + "epoch": 1.8034091302791282, + "grad_norm": 0.06670232862234116, + "learning_rate": 7.24903887245522e-06, + "loss": 0.5068976879119873, + "step": 9760 + }, + { + "epoch": 1.803593906988024, + "grad_norm": 0.08348406851291656, + "learning_rate": 7.247120982239487e-06, + "loss": 0.5030762553215027, + "step": 9761 + }, + { + "epoch": 1.8037786836969198, + "grad_norm": 0.05656994879245758, + "learning_rate": 7.245203201579724e-06, + "loss": 0.277743935585022, + "step": 9762 + }, + { + "epoch": 1.803963460405816, + "grad_norm": 0.05856098234653473, + "learning_rate": 7.243285530552256e-06, + "loss": 0.35996246337890625, + "step": 9763 + }, + { + "epoch": 1.8041482371147117, + "grad_norm": 0.05998080223798752, + "learning_rate": 7.241367969233402e-06, + "loss": 0.3923867344856262, + "step": 9764 + }, + { + "epoch": 1.8043330138236076, + "grad_norm": 0.054737962782382965, + "learning_rate": 7.239450517699468e-06, + "loss": 0.3953229486942291, + "step": 9765 + }, + { + "epoch": 1.8045177905325034, + "grad_norm": 0.08780429512262344, + "learning_rate": 7.237533176026768e-06, + "loss": 0.6455278992652893, + "step": 9766 + }, + { + "epoch": 1.8047025672413992, + "grad_norm": 0.09323868900537491, + "learning_rate": 7.235615944291609e-06, + "loss": 0.6022985577583313, + "step": 9767 + }, + { + "epoch": 1.804887343950295, + "grad_norm": 0.08070196956396103, + "learning_rate": 7.233698822570279e-06, + "loss": 0.49842768907546997, + "step": 9768 + }, + { + "epoch": 1.8050721206591909, + "grad_norm": 0.0877484530210495, + "learning_rate": 7.231781810939085e-06, + "loss": 0.585066556930542, + "step": 9769 + }, + { + "epoch": 1.8052568973680867, + "grad_norm": 0.08354081213474274, + "learning_rate": 7.229864909474318e-06, + "loss": 0.6162436008453369, + "step": 9770 + }, + { + "epoch": 1.8054416740769827, + "grad_norm": 0.07809589803218842, + "learning_rate": 7.227948118252255e-06, + "loss": 0.5466450452804565, + "step": 9771 + }, + { + "epoch": 1.8056264507858786, + "grad_norm": 0.07099008560180664, + "learning_rate": 7.2260314373491905e-06, + "loss": 0.5045560002326965, + "step": 9772 + }, + { + "epoch": 1.8058112274947744, + "grad_norm": 0.0817057341337204, + "learning_rate": 7.224114866841392e-06, + "loss": 0.4768495261669159, + "step": 9773 + }, + { + "epoch": 1.8059960042036702, + "grad_norm": 0.04109758511185646, + "learning_rate": 7.222198406805137e-06, + "loss": 0.26770099997520447, + "step": 9774 + }, + { + "epoch": 1.806180780912566, + "grad_norm": 0.07153109461069107, + "learning_rate": 7.220282057316697e-06, + "loss": 0.41642487049102783, + "step": 9775 + }, + { + "epoch": 1.806365557621462, + "grad_norm": 0.09041387587785721, + "learning_rate": 7.2183658184523305e-06, + "loss": 0.47185105085372925, + "step": 9776 + }, + { + "epoch": 1.8065503343303577, + "grad_norm": 0.07475770264863968, + "learning_rate": 7.216449690288304e-06, + "loss": 0.47243043780326843, + "step": 9777 + }, + { + "epoch": 1.8067351110392535, + "grad_norm": 0.08335085958242416, + "learning_rate": 7.214533672900873e-06, + "loss": 0.5192751884460449, + "step": 9778 + }, + { + "epoch": 1.8069198877481494, + "grad_norm": 0.08981821686029434, + "learning_rate": 7.2126177663662855e-06, + "loss": 0.5628516674041748, + "step": 9779 + }, + { + "epoch": 1.8071046644570452, + "grad_norm": 0.06832242757081985, + "learning_rate": 7.210701970760789e-06, + "loss": 0.41456109285354614, + "step": 9780 + }, + { + "epoch": 1.807289441165941, + "grad_norm": 0.06621191650629044, + "learning_rate": 7.20878628616063e-06, + "loss": 0.484127402305603, + "step": 9781 + }, + { + "epoch": 1.8074742178748369, + "grad_norm": 0.07209111005067825, + "learning_rate": 7.2068707126420425e-06, + "loss": 0.5045483708381653, + "step": 9782 + }, + { + "epoch": 1.8076589945837327, + "grad_norm": 0.05990150570869446, + "learning_rate": 7.204955250281263e-06, + "loss": 0.44331952929496765, + "step": 9783 + }, + { + "epoch": 1.8078437712926285, + "grad_norm": 0.07342742383480072, + "learning_rate": 7.20303989915452e-06, + "loss": 0.4118276536464691, + "step": 9784 + }, + { + "epoch": 1.8080285480015243, + "grad_norm": 0.0759073868393898, + "learning_rate": 7.201124659338038e-06, + "loss": 0.45692363381385803, + "step": 9785 + }, + { + "epoch": 1.8082133247104202, + "grad_norm": 0.0632883831858635, + "learning_rate": 7.199209530908038e-06, + "loss": 0.3301604986190796, + "step": 9786 + }, + { + "epoch": 1.808398101419316, + "grad_norm": 0.06672390550374985, + "learning_rate": 7.197294513940739e-06, + "loss": 0.4440694749355316, + "step": 9787 + }, + { + "epoch": 1.8085828781282118, + "grad_norm": 0.07054869830608368, + "learning_rate": 7.195379608512344e-06, + "loss": 0.5055124759674072, + "step": 9788 + }, + { + "epoch": 1.8087676548371077, + "grad_norm": 0.08731034398078918, + "learning_rate": 7.193464814699073e-06, + "loss": 0.5469434261322021, + "step": 9789 + }, + { + "epoch": 1.8089524315460035, + "grad_norm": 0.06051642820239067, + "learning_rate": 7.191550132577116e-06, + "loss": 0.4700313210487366, + "step": 9790 + }, + { + "epoch": 1.8091372082548993, + "grad_norm": 0.07084860652685165, + "learning_rate": 7.189635562222676e-06, + "loss": 0.4844103455543518, + "step": 9791 + }, + { + "epoch": 1.8093219849637951, + "grad_norm": 0.0787714347243309, + "learning_rate": 7.1877211037119556e-06, + "loss": 0.5192242860794067, + "step": 9792 + }, + { + "epoch": 1.8095067616726912, + "grad_norm": 0.07096560299396515, + "learning_rate": 7.185806757121132e-06, + "loss": 0.47444817423820496, + "step": 9793 + }, + { + "epoch": 1.809691538381587, + "grad_norm": 0.08192567527294159, + "learning_rate": 7.183892522526394e-06, + "loss": 0.5131165981292725, + "step": 9794 + }, + { + "epoch": 1.8098763150904829, + "grad_norm": 0.06697290390729904, + "learning_rate": 7.1819784000039264e-06, + "loss": 0.45709118247032166, + "step": 9795 + }, + { + "epoch": 1.8100610917993787, + "grad_norm": 0.07221035659313202, + "learning_rate": 7.180064389629899e-06, + "loss": 0.4476308226585388, + "step": 9796 + }, + { + "epoch": 1.8102458685082745, + "grad_norm": 0.09427494555711746, + "learning_rate": 7.178150491480488e-06, + "loss": 0.6278370022773743, + "step": 9797 + }, + { + "epoch": 1.8104306452171703, + "grad_norm": 0.06774226576089859, + "learning_rate": 7.176236705631861e-06, + "loss": 0.36737802624702454, + "step": 9798 + }, + { + "epoch": 1.8106154219260662, + "grad_norm": 0.06325653940439224, + "learning_rate": 7.174323032160175e-06, + "loss": 0.42155921459198, + "step": 9799 + }, + { + "epoch": 1.8108001986349622, + "grad_norm": 0.06160309165716171, + "learning_rate": 7.172409471141593e-06, + "loss": 0.3376319706439972, + "step": 9800 + }, + { + "epoch": 1.810984975343858, + "grad_norm": 0.0902712419629097, + "learning_rate": 7.170496022652269e-06, + "loss": 0.6608295440673828, + "step": 9801 + }, + { + "epoch": 1.8111697520527539, + "grad_norm": 0.07389810681343079, + "learning_rate": 7.168582686768348e-06, + "loss": 0.41427645087242126, + "step": 9802 + }, + { + "epoch": 1.8113545287616497, + "grad_norm": 0.08707796782255173, + "learning_rate": 7.1666694635659826e-06, + "loss": 0.7434542179107666, + "step": 9803 + }, + { + "epoch": 1.8115393054705455, + "grad_norm": 0.07786441594362259, + "learning_rate": 7.164756353121303e-06, + "loss": 0.5525217652320862, + "step": 9804 + }, + { + "epoch": 1.8117240821794414, + "grad_norm": 0.0723809227347374, + "learning_rate": 7.162843355510452e-06, + "loss": 0.39500388503074646, + "step": 9805 + }, + { + "epoch": 1.8119088588883372, + "grad_norm": 0.08595466613769531, + "learning_rate": 7.160930470809563e-06, + "loss": 0.5993435382843018, + "step": 9806 + }, + { + "epoch": 1.812093635597233, + "grad_norm": 0.07043426483869553, + "learning_rate": 7.1590176990947545e-06, + "loss": 0.38586297631263733, + "step": 9807 + }, + { + "epoch": 1.8122784123061288, + "grad_norm": 0.08636261522769928, + "learning_rate": 7.157105040442151e-06, + "loss": 0.4882827401161194, + "step": 9808 + }, + { + "epoch": 1.8124631890150247, + "grad_norm": 0.08507783710956573, + "learning_rate": 7.1551924949278795e-06, + "loss": 0.509990930557251, + "step": 9809 + }, + { + "epoch": 1.8126479657239205, + "grad_norm": 0.0886579379439354, + "learning_rate": 7.153280062628043e-06, + "loss": 0.6217797994613647, + "step": 9810 + }, + { + "epoch": 1.8128327424328163, + "grad_norm": 0.0848434716463089, + "learning_rate": 7.15136774361875e-06, + "loss": 0.7512550950050354, + "step": 9811 + }, + { + "epoch": 1.8130175191417122, + "grad_norm": 0.09012161195278168, + "learning_rate": 7.1494555379761156e-06, + "loss": 0.49214449524879456, + "step": 9812 + }, + { + "epoch": 1.813202295850608, + "grad_norm": 0.06810518354177475, + "learning_rate": 7.147543445776228e-06, + "loss": 0.40887418389320374, + "step": 9813 + }, + { + "epoch": 1.8133870725595038, + "grad_norm": 0.0676533579826355, + "learning_rate": 7.145631467095188e-06, + "loss": 0.33318957686424255, + "step": 9814 + }, + { + "epoch": 1.8135718492683996, + "grad_norm": 0.0729999765753746, + "learning_rate": 7.1437196020090875e-06, + "loss": 0.5460203886032104, + "step": 9815 + }, + { + "epoch": 1.8137566259772955, + "grad_norm": 0.09123071283102036, + "learning_rate": 7.141807850594007e-06, + "loss": 0.7088720798492432, + "step": 9816 + }, + { + "epoch": 1.8139414026861913, + "grad_norm": 0.06890129297971725, + "learning_rate": 7.139896212926033e-06, + "loss": 0.4780724048614502, + "step": 9817 + }, + { + "epoch": 1.8141261793950871, + "grad_norm": 0.07324981689453125, + "learning_rate": 7.137984689081243e-06, + "loss": 0.5160731673240662, + "step": 9818 + }, + { + "epoch": 1.814310956103983, + "grad_norm": 0.07690378278493881, + "learning_rate": 7.136073279135707e-06, + "loss": 0.44267570972442627, + "step": 9819 + }, + { + "epoch": 1.8144957328128788, + "grad_norm": 0.0685325637459755, + "learning_rate": 7.134161983165498e-06, + "loss": 0.5272607803344727, + "step": 9820 + }, + { + "epoch": 1.8146805095217746, + "grad_norm": 0.06789597123861313, + "learning_rate": 7.132250801246672e-06, + "loss": 0.492597758769989, + "step": 9821 + }, + { + "epoch": 1.8148652862306707, + "grad_norm": 0.0947544276714325, + "learning_rate": 7.130339733455291e-06, + "loss": 0.6372851729393005, + "step": 9822 + }, + { + "epoch": 1.8150500629395665, + "grad_norm": 0.06775210797786713, + "learning_rate": 7.1284287798674165e-06, + "loss": 0.39347755908966064, + "step": 9823 + }, + { + "epoch": 1.8152348396484623, + "grad_norm": 0.07759533077478409, + "learning_rate": 7.1265179405590855e-06, + "loss": 0.5344981551170349, + "step": 9824 + }, + { + "epoch": 1.8154196163573582, + "grad_norm": 0.0798647329211235, + "learning_rate": 7.1246072156063536e-06, + "loss": 0.4983478784561157, + "step": 9825 + }, + { + "epoch": 1.815604393066254, + "grad_norm": 0.06834018975496292, + "learning_rate": 7.122696605085262e-06, + "loss": 0.434520423412323, + "step": 9826 + }, + { + "epoch": 1.8157891697751498, + "grad_norm": 0.06779895722866058, + "learning_rate": 7.120786109071838e-06, + "loss": 0.5057935118675232, + "step": 9827 + }, + { + "epoch": 1.8159739464840456, + "grad_norm": 0.0671994760632515, + "learning_rate": 7.118875727642121e-06, + "loss": 0.4207999110221863, + "step": 9828 + }, + { + "epoch": 1.8161587231929417, + "grad_norm": 0.08160896599292755, + "learning_rate": 7.1169654608721384e-06, + "loss": 0.5233028531074524, + "step": 9829 + }, + { + "epoch": 1.8163434999018375, + "grad_norm": 0.07913441210985184, + "learning_rate": 7.115055308837908e-06, + "loss": 0.5464879274368286, + "step": 9830 + }, + { + "epoch": 1.8165282766107334, + "grad_norm": 0.07577405124902725, + "learning_rate": 7.113145271615449e-06, + "loss": 0.4584244191646576, + "step": 9831 + }, + { + "epoch": 1.8167130533196292, + "grad_norm": 0.07947442680597305, + "learning_rate": 7.111235349280782e-06, + "loss": 0.4469531774520874, + "step": 9832 + }, + { + "epoch": 1.816897830028525, + "grad_norm": 0.08003919571638107, + "learning_rate": 7.109325541909906e-06, + "loss": 0.5821112990379333, + "step": 9833 + }, + { + "epoch": 1.8170826067374208, + "grad_norm": 0.08050795644521713, + "learning_rate": 7.107415849578829e-06, + "loss": 0.399353951215744, + "step": 9834 + }, + { + "epoch": 1.8172673834463167, + "grad_norm": 0.07931918650865555, + "learning_rate": 7.105506272363555e-06, + "loss": 0.4955158531665802, + "step": 9835 + }, + { + "epoch": 1.8174521601552125, + "grad_norm": 0.08377605676651001, + "learning_rate": 7.103596810340072e-06, + "loss": 0.5487820506095886, + "step": 9836 + }, + { + "epoch": 1.8176369368641083, + "grad_norm": 0.07905194163322449, + "learning_rate": 7.101687463584378e-06, + "loss": 0.5085796117782593, + "step": 9837 + }, + { + "epoch": 1.8178217135730041, + "grad_norm": 0.09129060804843903, + "learning_rate": 7.099778232172452e-06, + "loss": 0.6609033346176147, + "step": 9838 + }, + { + "epoch": 1.8180064902819, + "grad_norm": 0.0898594856262207, + "learning_rate": 7.0978691161802796e-06, + "loss": 0.6358002424240112, + "step": 9839 + }, + { + "epoch": 1.8181912669907958, + "grad_norm": 0.09380216896533966, + "learning_rate": 7.095960115683837e-06, + "loss": 0.589912474155426, + "step": 9840 + }, + { + "epoch": 1.8183760436996916, + "grad_norm": 0.06584444642066956, + "learning_rate": 7.0940512307590956e-06, + "loss": 0.44311562180519104, + "step": 9841 + }, + { + "epoch": 1.8185608204085875, + "grad_norm": 0.065439872443676, + "learning_rate": 7.0921424614820244e-06, + "loss": 0.4536382853984833, + "step": 9842 + }, + { + "epoch": 1.8187455971174833, + "grad_norm": 0.08510088175535202, + "learning_rate": 7.090233807928589e-06, + "loss": 0.612328827381134, + "step": 9843 + }, + { + "epoch": 1.8189303738263791, + "grad_norm": 0.07813709229230881, + "learning_rate": 7.088325270174739e-06, + "loss": 0.5523490309715271, + "step": 9844 + }, + { + "epoch": 1.819115150535275, + "grad_norm": 0.057043276727199554, + "learning_rate": 7.086416848296435e-06, + "loss": 0.4329480230808258, + "step": 9845 + }, + { + "epoch": 1.8192999272441708, + "grad_norm": 0.08415151387453079, + "learning_rate": 7.0845085423696295e-06, + "loss": 0.5207234621047974, + "step": 9846 + }, + { + "epoch": 1.8194847039530666, + "grad_norm": 0.06661385297775269, + "learning_rate": 7.082600352470256e-06, + "loss": 0.35886847972869873, + "step": 9847 + }, + { + "epoch": 1.8196694806619624, + "grad_norm": 0.08949161320924759, + "learning_rate": 7.080692278674264e-06, + "loss": 0.5949727296829224, + "step": 9848 + }, + { + "epoch": 1.8198542573708583, + "grad_norm": 0.07575736194849014, + "learning_rate": 7.078784321057589e-06, + "loss": 0.5442951321601868, + "step": 9849 + }, + { + "epoch": 1.820039034079754, + "grad_norm": 0.05346906557679176, + "learning_rate": 7.076876479696155e-06, + "loss": 0.32708707451820374, + "step": 9850 + }, + { + "epoch": 1.8202238107886501, + "grad_norm": 0.08726691454648972, + "learning_rate": 7.07496875466589e-06, + "loss": 0.5074048638343811, + "step": 9851 + }, + { + "epoch": 1.820408587497546, + "grad_norm": 0.08543438464403152, + "learning_rate": 7.073061146042723e-06, + "loss": 0.5461840629577637, + "step": 9852 + }, + { + "epoch": 1.8205933642064418, + "grad_norm": 0.06283988803625107, + "learning_rate": 7.071153653902562e-06, + "loss": 0.4173544943332672, + "step": 9853 + }, + { + "epoch": 1.8207781409153376, + "grad_norm": 0.06358476728200912, + "learning_rate": 7.069246278321325e-06, + "loss": 0.3695000112056732, + "step": 9854 + }, + { + "epoch": 1.8209629176242335, + "grad_norm": 0.08632160723209381, + "learning_rate": 7.067339019374912e-06, + "loss": 0.5667553544044495, + "step": 9855 + }, + { + "epoch": 1.8211476943331293, + "grad_norm": 0.06680776923894882, + "learning_rate": 7.065431877139232e-06, + "loss": 0.36568430066108704, + "step": 9856 + }, + { + "epoch": 1.8213324710420251, + "grad_norm": 0.08722712099552155, + "learning_rate": 7.063524851690187e-06, + "loss": 0.5414561629295349, + "step": 9857 + }, + { + "epoch": 1.821517247750921, + "grad_norm": 0.04953658580780029, + "learning_rate": 7.061617943103661e-06, + "loss": 0.3062533140182495, + "step": 9858 + }, + { + "epoch": 1.821702024459817, + "grad_norm": 0.06840323656797409, + "learning_rate": 7.0597111514555486e-06, + "loss": 0.3991606831550598, + "step": 9859 + }, + { + "epoch": 1.8218868011687128, + "grad_norm": 0.08398078382015228, + "learning_rate": 7.057804476821736e-06, + "loss": 0.5020613670349121, + "step": 9860 + }, + { + "epoch": 1.8220715778776087, + "grad_norm": 0.0917510837316513, + "learning_rate": 7.055897919278097e-06, + "loss": 0.6168114542961121, + "step": 9861 + }, + { + "epoch": 1.8222563545865045, + "grad_norm": 0.08755534142255783, + "learning_rate": 7.053991478900511e-06, + "loss": 0.6405090689659119, + "step": 9862 + }, + { + "epoch": 1.8224411312954003, + "grad_norm": 0.07648077607154846, + "learning_rate": 7.05208515576485e-06, + "loss": 0.5264204144477844, + "step": 9863 + }, + { + "epoch": 1.8226259080042961, + "grad_norm": 0.10150912404060364, + "learning_rate": 7.050178949946973e-06, + "loss": 0.5225452780723572, + "step": 9864 + }, + { + "epoch": 1.822810684713192, + "grad_norm": 0.10042621940374374, + "learning_rate": 7.048272861522746e-06, + "loss": 0.6024169921875, + "step": 9865 + }, + { + "epoch": 1.8229954614220878, + "grad_norm": 0.07107646018266678, + "learning_rate": 7.046366890568028e-06, + "loss": 0.445407509803772, + "step": 9866 + }, + { + "epoch": 1.8231802381309836, + "grad_norm": 0.0642337054014206, + "learning_rate": 7.044461037158661e-06, + "loss": 0.2864264249801636, + "step": 9867 + }, + { + "epoch": 1.8233650148398794, + "grad_norm": 0.08605214953422546, + "learning_rate": 7.042555301370504e-06, + "loss": 0.5610020160675049, + "step": 9868 + }, + { + "epoch": 1.8235497915487753, + "grad_norm": 0.06902395188808441, + "learning_rate": 7.04064968327939e-06, + "loss": 0.41148707270622253, + "step": 9869 + }, + { + "epoch": 1.823734568257671, + "grad_norm": 0.08039598912000656, + "learning_rate": 7.038744182961159e-06, + "loss": 0.4248596131801605, + "step": 9870 + }, + { + "epoch": 1.823919344966567, + "grad_norm": 0.059146229177713394, + "learning_rate": 7.036838800491648e-06, + "loss": 0.4018034040927887, + "step": 9871 + }, + { + "epoch": 1.8241041216754628, + "grad_norm": 0.08252823352813721, + "learning_rate": 7.03493353594668e-06, + "loss": 0.46294206380844116, + "step": 9872 + }, + { + "epoch": 1.8242888983843586, + "grad_norm": 0.07533363997936249, + "learning_rate": 7.03302838940208e-06, + "loss": 0.5676658153533936, + "step": 9873 + }, + { + "epoch": 1.8244736750932544, + "grad_norm": 0.06901130080223083, + "learning_rate": 7.03112336093367e-06, + "loss": 0.44045159220695496, + "step": 9874 + }, + { + "epoch": 1.8246584518021502, + "grad_norm": 0.05934334546327591, + "learning_rate": 7.02921845061726e-06, + "loss": 0.3102717697620392, + "step": 9875 + }, + { + "epoch": 1.824843228511046, + "grad_norm": 0.08287785947322845, + "learning_rate": 7.02731365852866e-06, + "loss": 0.517486035823822, + "step": 9876 + }, + { + "epoch": 1.825028005219942, + "grad_norm": 0.08595351129770279, + "learning_rate": 7.02540898474368e-06, + "loss": 0.5475971698760986, + "step": 9877 + }, + { + "epoch": 1.8252127819288377, + "grad_norm": 0.0753556564450264, + "learning_rate": 7.023504429338114e-06, + "loss": 0.4455927610397339, + "step": 9878 + }, + { + "epoch": 1.8253975586377336, + "grad_norm": 0.06898584961891174, + "learning_rate": 7.021599992387759e-06, + "loss": 0.5426642894744873, + "step": 9879 + }, + { + "epoch": 1.8255823353466294, + "grad_norm": 0.08508019149303436, + "learning_rate": 7.0196956739684074e-06, + "loss": 0.5580354928970337, + "step": 9880 + }, + { + "epoch": 1.8257671120555254, + "grad_norm": 0.07500471919775009, + "learning_rate": 7.0177914741558415e-06, + "loss": 0.41870805621147156, + "step": 9881 + }, + { + "epoch": 1.8259518887644213, + "grad_norm": 0.08029566705226898, + "learning_rate": 7.015887393025847e-06, + "loss": 0.49106553196907043, + "step": 9882 + }, + { + "epoch": 1.826136665473317, + "grad_norm": 0.07958827912807465, + "learning_rate": 7.013983430654199e-06, + "loss": 0.3987799286842346, + "step": 9883 + }, + { + "epoch": 1.826321442182213, + "grad_norm": 0.05812789127230644, + "learning_rate": 7.012079587116666e-06, + "loss": 0.3668726086616516, + "step": 9884 + }, + { + "epoch": 1.8265062188911088, + "grad_norm": 0.09355711191892624, + "learning_rate": 7.010175862489022e-06, + "loss": 0.4632348418235779, + "step": 9885 + }, + { + "epoch": 1.8266909956000046, + "grad_norm": 0.08564065396785736, + "learning_rate": 7.00827225684702e-06, + "loss": 0.48215222358703613, + "step": 9886 + }, + { + "epoch": 1.8268757723089004, + "grad_norm": 0.0790952816605568, + "learning_rate": 7.006368770266421e-06, + "loss": 0.5680748224258423, + "step": 9887 + }, + { + "epoch": 1.8270605490177965, + "grad_norm": 0.08761174976825714, + "learning_rate": 7.004465402822984e-06, + "loss": 0.5640802979469299, + "step": 9888 + }, + { + "epoch": 1.8272453257266923, + "grad_norm": 0.0727882906794548, + "learning_rate": 7.002562154592449e-06, + "loss": 0.43162351846694946, + "step": 9889 + }, + { + "epoch": 1.8274301024355881, + "grad_norm": 0.09398949891328812, + "learning_rate": 7.0006590256505625e-06, + "loss": 0.5356913805007935, + "step": 9890 + }, + { + "epoch": 1.827614879144484, + "grad_norm": 0.08576202392578125, + "learning_rate": 6.998756016073065e-06, + "loss": 0.507265031337738, + "step": 9891 + }, + { + "epoch": 1.8277996558533798, + "grad_norm": 0.09658868610858917, + "learning_rate": 6.996853125935685e-06, + "loss": 0.5329380631446838, + "step": 9892 + }, + { + "epoch": 1.8279844325622756, + "grad_norm": 0.055821459740400314, + "learning_rate": 6.9949503553141564e-06, + "loss": 0.27001625299453735, + "step": 9893 + }, + { + "epoch": 1.8281692092711714, + "grad_norm": 0.061614297330379486, + "learning_rate": 6.993047704284204e-06, + "loss": 0.29331445693969727, + "step": 9894 + }, + { + "epoch": 1.8283539859800673, + "grad_norm": 0.09182018041610718, + "learning_rate": 6.991145172921543e-06, + "loss": 0.5280823707580566, + "step": 9895 + }, + { + "epoch": 1.828538762688963, + "grad_norm": 0.0736556202173233, + "learning_rate": 6.9892427613018905e-06, + "loss": 0.4377639889717102, + "step": 9896 + }, + { + "epoch": 1.828723539397859, + "grad_norm": 0.08335860818624496, + "learning_rate": 6.987340469500959e-06, + "loss": 0.540762722492218, + "step": 9897 + }, + { + "epoch": 1.8289083161067548, + "grad_norm": 0.0884045958518982, + "learning_rate": 6.985438297594449e-06, + "loss": 0.6356663107872009, + "step": 9898 + }, + { + "epoch": 1.8290930928156506, + "grad_norm": 0.08124842494726181, + "learning_rate": 6.983536245658064e-06, + "loss": 0.5272491574287415, + "step": 9899 + }, + { + "epoch": 1.8292778695245464, + "grad_norm": 0.06727912276983261, + "learning_rate": 6.981634313767501e-06, + "loss": 0.49043992161750793, + "step": 9900 + }, + { + "epoch": 1.8294626462334422, + "grad_norm": 0.0834495797753334, + "learning_rate": 6.979732501998447e-06, + "loss": 0.4925161600112915, + "step": 9901 + }, + { + "epoch": 1.829647422942338, + "grad_norm": 0.08709096163511276, + "learning_rate": 6.9778308104265955e-06, + "loss": 0.5015747547149658, + "step": 9902 + }, + { + "epoch": 1.829832199651234, + "grad_norm": 0.0766579806804657, + "learning_rate": 6.975929239127614e-06, + "loss": 0.5223663449287415, + "step": 9903 + }, + { + "epoch": 1.8300169763601297, + "grad_norm": 0.06918003410100937, + "learning_rate": 6.974027788177191e-06, + "loss": 0.45638301968574524, + "step": 9904 + }, + { + "epoch": 1.8302017530690255, + "grad_norm": 0.072813019156456, + "learning_rate": 6.972126457650999e-06, + "loss": 0.459013968706131, + "step": 9905 + }, + { + "epoch": 1.8303865297779214, + "grad_norm": 0.09804744273424149, + "learning_rate": 6.970225247624698e-06, + "loss": 0.7139219641685486, + "step": 9906 + }, + { + "epoch": 1.8305713064868172, + "grad_norm": 0.0948563814163208, + "learning_rate": 6.968324158173949e-06, + "loss": 0.663381040096283, + "step": 9907 + }, + { + "epoch": 1.830756083195713, + "grad_norm": 0.09061747044324875, + "learning_rate": 6.966423189374422e-06, + "loss": 0.6212316155433655, + "step": 9908 + }, + { + "epoch": 1.8309408599046089, + "grad_norm": 0.06985548883676529, + "learning_rate": 6.964522341301756e-06, + "loss": 0.5033018589019775, + "step": 9909 + }, + { + "epoch": 1.831125636613505, + "grad_norm": 0.05785486102104187, + "learning_rate": 6.9626216140316035e-06, + "loss": 0.31822776794433594, + "step": 9910 + }, + { + "epoch": 1.8313104133224007, + "grad_norm": 0.07244248688220978, + "learning_rate": 6.9607210076396104e-06, + "loss": 0.46352434158325195, + "step": 9911 + }, + { + "epoch": 1.8314951900312966, + "grad_norm": 0.07493920624256134, + "learning_rate": 6.958820522201411e-06, + "loss": 0.5265142321586609, + "step": 9912 + }, + { + "epoch": 1.8316799667401924, + "grad_norm": 0.08248679339885712, + "learning_rate": 6.9569201577926395e-06, + "loss": 0.5041503310203552, + "step": 9913 + }, + { + "epoch": 1.8318647434490882, + "grad_norm": 0.08329528570175171, + "learning_rate": 6.955019914488927e-06, + "loss": 0.573448657989502, + "step": 9914 + }, + { + "epoch": 1.832049520157984, + "grad_norm": 0.09237170219421387, + "learning_rate": 6.953119792365895e-06, + "loss": 0.5240632891654968, + "step": 9915 + }, + { + "epoch": 1.8322342968668799, + "grad_norm": 0.09821862727403641, + "learning_rate": 6.951219791499161e-06, + "loss": 0.6255685091018677, + "step": 9916 + }, + { + "epoch": 1.832419073575776, + "grad_norm": 0.08152344822883606, + "learning_rate": 6.949319911964343e-06, + "loss": 0.6066064238548279, + "step": 9917 + }, + { + "epoch": 1.8326038502846718, + "grad_norm": 0.08967302739620209, + "learning_rate": 6.947420153837047e-06, + "loss": 0.5588473677635193, + "step": 9918 + }, + { + "epoch": 1.8327886269935676, + "grad_norm": 0.06246509402990341, + "learning_rate": 6.945520517192881e-06, + "loss": 0.3397400379180908, + "step": 9919 + }, + { + "epoch": 1.8329734037024634, + "grad_norm": 0.07850545644760132, + "learning_rate": 6.943621002107439e-06, + "loss": 0.4385947585105896, + "step": 9920 + }, + { + "epoch": 1.8331581804113593, + "grad_norm": 0.08367501199245453, + "learning_rate": 6.941721608656319e-06, + "loss": 0.47891995310783386, + "step": 9921 + }, + { + "epoch": 1.833342957120255, + "grad_norm": 0.07400240004062653, + "learning_rate": 6.9398223369151155e-06, + "loss": 0.41493046283721924, + "step": 9922 + }, + { + "epoch": 1.833527733829151, + "grad_norm": 0.085567906498909, + "learning_rate": 6.937923186959402e-06, + "loss": 0.5399067401885986, + "step": 9923 + }, + { + "epoch": 1.8337125105380467, + "grad_norm": 0.0904676541686058, + "learning_rate": 6.936024158864769e-06, + "loss": 0.7474747896194458, + "step": 9924 + }, + { + "epoch": 1.8338972872469426, + "grad_norm": 0.06639987975358963, + "learning_rate": 6.934125252706791e-06, + "loss": 0.33274802565574646, + "step": 9925 + }, + { + "epoch": 1.8340820639558384, + "grad_norm": 0.09406183660030365, + "learning_rate": 6.932226468561034e-06, + "loss": 0.66783607006073, + "step": 9926 + }, + { + "epoch": 1.8342668406647342, + "grad_norm": 0.08418642729520798, + "learning_rate": 6.930327806503061e-06, + "loss": 0.47889599204063416, + "step": 9927 + }, + { + "epoch": 1.83445161737363, + "grad_norm": 0.0629533976316452, + "learning_rate": 6.928429266608446e-06, + "loss": 0.3412812352180481, + "step": 9928 + }, + { + "epoch": 1.8346363940825259, + "grad_norm": 0.0737699642777443, + "learning_rate": 6.926530848952731e-06, + "loss": 0.37405097484588623, + "step": 9929 + }, + { + "epoch": 1.8348211707914217, + "grad_norm": 0.07190654426813126, + "learning_rate": 6.924632553611474e-06, + "loss": 0.4059646427631378, + "step": 9930 + }, + { + "epoch": 1.8350059475003175, + "grad_norm": 0.07069795578718185, + "learning_rate": 6.922734380660221e-06, + "loss": 0.4506323039531708, + "step": 9931 + }, + { + "epoch": 1.8351907242092134, + "grad_norm": 0.07418803125619888, + "learning_rate": 6.920836330174509e-06, + "loss": 0.3802109956741333, + "step": 9932 + }, + { + "epoch": 1.8353755009181092, + "grad_norm": 0.08620359003543854, + "learning_rate": 6.918938402229882e-06, + "loss": 0.4469900131225586, + "step": 9933 + }, + { + "epoch": 1.835560277627005, + "grad_norm": 0.07176492363214493, + "learning_rate": 6.9170405969018626e-06, + "loss": 0.475315660238266, + "step": 9934 + }, + { + "epoch": 1.8357450543359008, + "grad_norm": 0.09536311775445938, + "learning_rate": 6.915142914265984e-06, + "loss": 0.7110531330108643, + "step": 9935 + }, + { + "epoch": 1.8359298310447967, + "grad_norm": 0.07972738891839981, + "learning_rate": 6.913245354397768e-06, + "loss": 0.5264150500297546, + "step": 9936 + }, + { + "epoch": 1.8361146077536925, + "grad_norm": 0.09280319511890411, + "learning_rate": 6.911347917372726e-06, + "loss": 0.5452582836151123, + "step": 9937 + }, + { + "epoch": 1.8362993844625883, + "grad_norm": 0.06958615034818649, + "learning_rate": 6.909450603266376e-06, + "loss": 0.476435124874115, + "step": 9938 + }, + { + "epoch": 1.8364841611714844, + "grad_norm": 0.08039085566997528, + "learning_rate": 6.907553412154223e-06, + "loss": 0.5513376593589783, + "step": 9939 + }, + { + "epoch": 1.8366689378803802, + "grad_norm": 0.0806492269039154, + "learning_rate": 6.905656344111768e-06, + "loss": 0.48370277881622314, + "step": 9940 + }, + { + "epoch": 1.836853714589276, + "grad_norm": 0.07488995045423508, + "learning_rate": 6.903759399214509e-06, + "loss": 0.41942453384399414, + "step": 9941 + }, + { + "epoch": 1.8370384912981719, + "grad_norm": 0.08002443611621857, + "learning_rate": 6.901862577537945e-06, + "loss": 0.4895157217979431, + "step": 9942 + }, + { + "epoch": 1.8372232680070677, + "grad_norm": 0.09190994501113892, + "learning_rate": 6.899965879157549e-06, + "loss": 0.590259850025177, + "step": 9943 + }, + { + "epoch": 1.8374080447159635, + "grad_norm": 0.09339113533496857, + "learning_rate": 6.898069304148816e-06, + "loss": 0.5751887559890747, + "step": 9944 + }, + { + "epoch": 1.8375928214248594, + "grad_norm": 0.08202441781759262, + "learning_rate": 6.896172852587224e-06, + "loss": 0.607812225818634, + "step": 9945 + }, + { + "epoch": 1.8377775981337552, + "grad_norm": 0.05778725445270538, + "learning_rate": 6.8942765245482355e-06, + "loss": 0.36201390624046326, + "step": 9946 + }, + { + "epoch": 1.8379623748426512, + "grad_norm": 0.05295009911060333, + "learning_rate": 6.892380320107326e-06, + "loss": 0.2442961186170578, + "step": 9947 + }, + { + "epoch": 1.838147151551547, + "grad_norm": 0.0946890339255333, + "learning_rate": 6.8904842393399605e-06, + "loss": 0.6545706987380981, + "step": 9948 + }, + { + "epoch": 1.838331928260443, + "grad_norm": 0.06986173987388611, + "learning_rate": 6.888588282321591e-06, + "loss": 0.4297598898410797, + "step": 9949 + }, + { + "epoch": 1.8385167049693387, + "grad_norm": 0.06517749279737473, + "learning_rate": 6.886692449127676e-06, + "loss": 0.382990300655365, + "step": 9950 + }, + { + "epoch": 1.8387014816782346, + "grad_norm": 0.08499684184789658, + "learning_rate": 6.884796739833659e-06, + "loss": 0.512717068195343, + "step": 9951 + }, + { + "epoch": 1.8388862583871304, + "grad_norm": 0.08333538472652435, + "learning_rate": 6.8829011545149845e-06, + "loss": 0.5726484060287476, + "step": 9952 + }, + { + "epoch": 1.8390710350960262, + "grad_norm": 0.07835826277732849, + "learning_rate": 6.881005693247096e-06, + "loss": 0.49940311908721924, + "step": 9953 + }, + { + "epoch": 1.839255811804922, + "grad_norm": 0.07669610530138016, + "learning_rate": 6.87911035610542e-06, + "loss": 0.4895862638950348, + "step": 9954 + }, + { + "epoch": 1.8394405885138179, + "grad_norm": 0.07560370862483978, + "learning_rate": 6.877215143165387e-06, + "loss": 0.5197358131408691, + "step": 9955 + }, + { + "epoch": 1.8396253652227137, + "grad_norm": 0.09632917493581772, + "learning_rate": 6.875320054502424e-06, + "loss": 0.6280316114425659, + "step": 9956 + }, + { + "epoch": 1.8398101419316095, + "grad_norm": 0.08223257213830948, + "learning_rate": 6.873425090191944e-06, + "loss": 0.5331270098686218, + "step": 9957 + }, + { + "epoch": 1.8399949186405054, + "grad_norm": 0.07054473459720612, + "learning_rate": 6.871530250309364e-06, + "loss": 0.46475088596343994, + "step": 9958 + }, + { + "epoch": 1.8401796953494012, + "grad_norm": 0.08357888460159302, + "learning_rate": 6.8696355349300945e-06, + "loss": 0.4834239184856415, + "step": 9959 + }, + { + "epoch": 1.840364472058297, + "grad_norm": 0.06820651888847351, + "learning_rate": 6.867740944129535e-06, + "loss": 0.45789211988449097, + "step": 9960 + }, + { + "epoch": 1.8405492487671928, + "grad_norm": 0.08674849569797516, + "learning_rate": 6.865846477983087e-06, + "loss": 0.5982378125190735, + "step": 9961 + }, + { + "epoch": 1.8407340254760887, + "grad_norm": 0.060728199779987335, + "learning_rate": 6.863952136566147e-06, + "loss": 0.34441497921943665, + "step": 9962 + }, + { + "epoch": 1.8409188021849845, + "grad_norm": 0.0902811661362648, + "learning_rate": 6.862057919954095e-06, + "loss": 0.5118451714515686, + "step": 9963 + }, + { + "epoch": 1.8411035788938803, + "grad_norm": 0.08659269660711288, + "learning_rate": 6.860163828222323e-06, + "loss": 0.7537838220596313, + "step": 9964 + }, + { + "epoch": 1.8412883556027762, + "grad_norm": 0.08275507390499115, + "learning_rate": 6.858269861446209e-06, + "loss": 0.5685074329376221, + "step": 9965 + }, + { + "epoch": 1.841473132311672, + "grad_norm": 0.06703290343284607, + "learning_rate": 6.856376019701124e-06, + "loss": 0.3911975622177124, + "step": 9966 + }, + { + "epoch": 1.8416579090205678, + "grad_norm": 0.07106104493141174, + "learning_rate": 6.85448230306244e-06, + "loss": 0.4847944378852844, + "step": 9967 + }, + { + "epoch": 1.8418426857294636, + "grad_norm": 0.06882786750793457, + "learning_rate": 6.8525887116055155e-06, + "loss": 0.4596867561340332, + "step": 9968 + }, + { + "epoch": 1.8420274624383597, + "grad_norm": 0.07429055124521255, + "learning_rate": 6.850695245405714e-06, + "loss": 0.5174936056137085, + "step": 9969 + }, + { + "epoch": 1.8422122391472555, + "grad_norm": 0.08973759412765503, + "learning_rate": 6.848801904538392e-06, + "loss": 0.6093193888664246, + "step": 9970 + }, + { + "epoch": 1.8423970158561513, + "grad_norm": 0.06997554004192352, + "learning_rate": 6.846908689078892e-06, + "loss": 0.39199551939964294, + "step": 9971 + }, + { + "epoch": 1.8425817925650472, + "grad_norm": 0.06855449080467224, + "learning_rate": 6.845015599102561e-06, + "loss": 0.37640291452407837, + "step": 9972 + }, + { + "epoch": 1.842766569273943, + "grad_norm": 0.08905099332332611, + "learning_rate": 6.843122634684743e-06, + "loss": 0.59391188621521, + "step": 9973 + }, + { + "epoch": 1.8429513459828388, + "grad_norm": 0.09431247413158417, + "learning_rate": 6.841229795900762e-06, + "loss": 0.6495637893676758, + "step": 9974 + }, + { + "epoch": 1.8431361226917347, + "grad_norm": 0.07599987089633942, + "learning_rate": 6.839337082825954e-06, + "loss": 0.5637093782424927, + "step": 9975 + }, + { + "epoch": 1.8433208994006307, + "grad_norm": 0.07558701187372208, + "learning_rate": 6.837444495535646e-06, + "loss": 0.4508266746997833, + "step": 9976 + }, + { + "epoch": 1.8435056761095265, + "grad_norm": 0.08141207695007324, + "learning_rate": 6.835552034105147e-06, + "loss": 0.5105732083320618, + "step": 9977 + }, + { + "epoch": 1.8436904528184224, + "grad_norm": 0.07613011449575424, + "learning_rate": 6.8336596986097795e-06, + "loss": 0.4700372815132141, + "step": 9978 + }, + { + "epoch": 1.8438752295273182, + "grad_norm": 0.09200368821620941, + "learning_rate": 6.83176748912485e-06, + "loss": 0.5477043390274048, + "step": 9979 + }, + { + "epoch": 1.844060006236214, + "grad_norm": 0.07809238880872726, + "learning_rate": 6.829875405725661e-06, + "loss": 0.461892694234848, + "step": 9980 + }, + { + "epoch": 1.8442447829451099, + "grad_norm": 0.09412237256765366, + "learning_rate": 6.827983448487514e-06, + "loss": 0.5485309362411499, + "step": 9981 + }, + { + "epoch": 1.8444295596540057, + "grad_norm": 0.0677417665719986, + "learning_rate": 6.826091617485704e-06, + "loss": 0.41071319580078125, + "step": 9982 + }, + { + "epoch": 1.8446143363629015, + "grad_norm": 0.09405647963285446, + "learning_rate": 6.8241999127955125e-06, + "loss": 0.6819890141487122, + "step": 9983 + }, + { + "epoch": 1.8447991130717973, + "grad_norm": 0.06098010018467903, + "learning_rate": 6.822308334492234e-06, + "loss": 0.33615392446517944, + "step": 9984 + }, + { + "epoch": 1.8449838897806932, + "grad_norm": 0.0719384029507637, + "learning_rate": 6.82041688265114e-06, + "loss": 0.3856649398803711, + "step": 9985 + }, + { + "epoch": 1.845168666489589, + "grad_norm": 0.07117690145969391, + "learning_rate": 6.818525557347504e-06, + "loss": 0.42119550704956055, + "step": 9986 + }, + { + "epoch": 1.8453534431984848, + "grad_norm": 0.07510238140821457, + "learning_rate": 6.816634358656601e-06, + "loss": 0.4040794372558594, + "step": 9987 + }, + { + "epoch": 1.8455382199073807, + "grad_norm": 0.06964851915836334, + "learning_rate": 6.814743286653689e-06, + "loss": 0.43530791997909546, + "step": 9988 + }, + { + "epoch": 1.8457229966162765, + "grad_norm": 0.09477655589580536, + "learning_rate": 6.8128523414140266e-06, + "loss": 0.7339559197425842, + "step": 9989 + }, + { + "epoch": 1.8459077733251723, + "grad_norm": 0.06167411431670189, + "learning_rate": 6.810961523012875e-06, + "loss": 0.3423909544944763, + "step": 9990 + }, + { + "epoch": 1.8460925500340681, + "grad_norm": 0.08428163081407547, + "learning_rate": 6.8090708315254725e-06, + "loss": 0.5697319507598877, + "step": 9991 + }, + { + "epoch": 1.846277326742964, + "grad_norm": 0.10079807043075562, + "learning_rate": 6.807180267027069e-06, + "loss": 0.6265227794647217, + "step": 9992 + }, + { + "epoch": 1.8464621034518598, + "grad_norm": 0.08388925343751907, + "learning_rate": 6.8052898295929046e-06, + "loss": 0.4699854552745819, + "step": 9993 + }, + { + "epoch": 1.8466468801607556, + "grad_norm": 0.06862840056419373, + "learning_rate": 6.8033995192982085e-06, + "loss": 0.4226209223270416, + "step": 9994 + }, + { + "epoch": 1.8468316568696515, + "grad_norm": 0.0832512304186821, + "learning_rate": 6.801509336218208e-06, + "loss": 0.5740237236022949, + "step": 9995 + }, + { + "epoch": 1.8470164335785473, + "grad_norm": 0.07339507341384888, + "learning_rate": 6.799619280428133e-06, + "loss": 0.3959071934223175, + "step": 9996 + }, + { + "epoch": 1.847201210287443, + "grad_norm": 0.08081066608428955, + "learning_rate": 6.797729352003196e-06, + "loss": 0.5652762055397034, + "step": 9997 + }, + { + "epoch": 1.8473859869963392, + "grad_norm": 0.07260546088218689, + "learning_rate": 6.795839551018616e-06, + "loss": 0.45198237895965576, + "step": 9998 + }, + { + "epoch": 1.847570763705235, + "grad_norm": 0.07437772303819656, + "learning_rate": 6.79394987754959e-06, + "loss": 0.41886457800865173, + "step": 9999 + }, + { + "epoch": 1.8477555404141308, + "grad_norm": 0.07442960888147354, + "learning_rate": 6.792060331671333e-06, + "loss": 0.4797389507293701, + "step": 10000 + }, + { + "epoch": 1.8477555404141308, + "eval_loss": 0.5581634044647217, + "eval_runtime": 156.285, + "eval_samples_per_second": 116.64, + "eval_steps_per_second": 14.582, + "step": 10000 + }, + { + "epoch": 1.8479403171230266, + "grad_norm": 0.06269600987434387, + "learning_rate": 6.79017091345904e-06, + "loss": 0.37403443455696106, + "step": 10001 + }, + { + "epoch": 1.8481250938319225, + "grad_norm": 0.06929280608892441, + "learning_rate": 6.7882816229879e-06, + "loss": 0.46427372097969055, + "step": 10002 + }, + { + "epoch": 1.8483098705408183, + "grad_norm": 0.08431359380483627, + "learning_rate": 6.7863924603331e-06, + "loss": 0.6038509607315063, + "step": 10003 + }, + { + "epoch": 1.8484946472497141, + "grad_norm": 0.11570043861865997, + "learning_rate": 6.784503425569833e-06, + "loss": 0.7939857244491577, + "step": 10004 + }, + { + "epoch": 1.8486794239586102, + "grad_norm": 0.07394532114267349, + "learning_rate": 6.782614518773265e-06, + "loss": 0.4864119291305542, + "step": 10005 + }, + { + "epoch": 1.848864200667506, + "grad_norm": 0.06036113202571869, + "learning_rate": 6.7807257400185745e-06, + "loss": 0.29472512006759644, + "step": 10006 + }, + { + "epoch": 1.8490489773764018, + "grad_norm": 0.08272892236709595, + "learning_rate": 6.778837089380927e-06, + "loss": 0.4968661069869995, + "step": 10007 + }, + { + "epoch": 1.8492337540852977, + "grad_norm": 0.07526808977127075, + "learning_rate": 6.7769485669354865e-06, + "loss": 0.46775147318840027, + "step": 10008 + }, + { + "epoch": 1.8494185307941935, + "grad_norm": 0.0638841912150383, + "learning_rate": 6.775060172757408e-06, + "loss": 0.36736905574798584, + "step": 10009 + }, + { + "epoch": 1.8496033075030893, + "grad_norm": 0.07938042283058167, + "learning_rate": 6.773171906921847e-06, + "loss": 0.4630228579044342, + "step": 10010 + }, + { + "epoch": 1.8497880842119852, + "grad_norm": 0.06860628724098206, + "learning_rate": 6.771283769503948e-06, + "loss": 0.42078661918640137, + "step": 10011 + }, + { + "epoch": 1.849972860920881, + "grad_norm": 0.07782910764217377, + "learning_rate": 6.769395760578852e-06, + "loss": 0.5826637744903564, + "step": 10012 + }, + { + "epoch": 1.8501576376297768, + "grad_norm": 0.07134412974119186, + "learning_rate": 6.7675078802217e-06, + "loss": 0.45725923776626587, + "step": 10013 + }, + { + "epoch": 1.8503424143386726, + "grad_norm": 0.09386952221393585, + "learning_rate": 6.7656201285076195e-06, + "loss": 0.583639919757843, + "step": 10014 + }, + { + "epoch": 1.8505271910475685, + "grad_norm": 0.07963576167821884, + "learning_rate": 6.763732505511741e-06, + "loss": 0.5077404975891113, + "step": 10015 + }, + { + "epoch": 1.8507119677564643, + "grad_norm": 0.05521143600344658, + "learning_rate": 6.761845011309181e-06, + "loss": 0.3559388518333435, + "step": 10016 + }, + { + "epoch": 1.8508967444653601, + "grad_norm": 0.07247708737850189, + "learning_rate": 6.7599576459750595e-06, + "loss": 0.4644903838634491, + "step": 10017 + }, + { + "epoch": 1.851081521174256, + "grad_norm": 0.07846806943416595, + "learning_rate": 6.7580704095844894e-06, + "loss": 0.588983416557312, + "step": 10018 + }, + { + "epoch": 1.8512662978831518, + "grad_norm": 0.08637133240699768, + "learning_rate": 6.75618330221257e-06, + "loss": 0.5109580159187317, + "step": 10019 + }, + { + "epoch": 1.8514510745920476, + "grad_norm": 0.08097885549068451, + "learning_rate": 6.754296323934408e-06, + "loss": 0.47517940402030945, + "step": 10020 + }, + { + "epoch": 1.8516358513009434, + "grad_norm": 0.04997225105762482, + "learning_rate": 6.752409474825101e-06, + "loss": 0.26674655079841614, + "step": 10021 + }, + { + "epoch": 1.8518206280098393, + "grad_norm": 0.057102106511592865, + "learning_rate": 6.750522754959734e-06, + "loss": 0.27486395835876465, + "step": 10022 + }, + { + "epoch": 1.852005404718735, + "grad_norm": 0.08661918342113495, + "learning_rate": 6.748636164413392e-06, + "loss": 0.5088651776313782, + "step": 10023 + }, + { + "epoch": 1.852190181427631, + "grad_norm": 0.0762132778763771, + "learning_rate": 6.746749703261165e-06, + "loss": 0.4059827923774719, + "step": 10024 + }, + { + "epoch": 1.8523749581365268, + "grad_norm": 0.08523812144994736, + "learning_rate": 6.7448633715781176e-06, + "loss": 0.48798370361328125, + "step": 10025 + }, + { + "epoch": 1.8525597348454226, + "grad_norm": 0.075056292116642, + "learning_rate": 6.742977169439324e-06, + "loss": 0.4803304076194763, + "step": 10026 + }, + { + "epoch": 1.8527445115543186, + "grad_norm": 0.09188424795866013, + "learning_rate": 6.74109109691985e-06, + "loss": 0.5603108406066895, + "step": 10027 + }, + { + "epoch": 1.8529292882632145, + "grad_norm": 0.06817316263914108, + "learning_rate": 6.739205154094755e-06, + "loss": 0.40446001291275024, + "step": 10028 + }, + { + "epoch": 1.8531140649721103, + "grad_norm": 0.08375546336174011, + "learning_rate": 6.73731934103909e-06, + "loss": 0.5641688704490662, + "step": 10029 + }, + { + "epoch": 1.8532988416810061, + "grad_norm": 0.057858940213918686, + "learning_rate": 6.735433657827912e-06, + "loss": 0.3124054968357086, + "step": 10030 + }, + { + "epoch": 1.853483618389902, + "grad_norm": 0.08280590176582336, + "learning_rate": 6.733548104536258e-06, + "loss": 0.4873959422111511, + "step": 10031 + }, + { + "epoch": 1.8536683950987978, + "grad_norm": 0.07169628143310547, + "learning_rate": 6.731662681239172e-06, + "loss": 0.388360857963562, + "step": 10032 + }, + { + "epoch": 1.8538531718076936, + "grad_norm": 0.07017473876476288, + "learning_rate": 6.729777388011685e-06, + "loss": 0.47163650393486023, + "step": 10033 + }, + { + "epoch": 1.8540379485165894, + "grad_norm": 0.08672241121530533, + "learning_rate": 6.727892224928825e-06, + "loss": 0.44719213247299194, + "step": 10034 + }, + { + "epoch": 1.8542227252254855, + "grad_norm": 0.09848632663488388, + "learning_rate": 6.72600719206562e-06, + "loss": 0.6737756729125977, + "step": 10035 + }, + { + "epoch": 1.8544075019343813, + "grad_norm": 0.08251882344484329, + "learning_rate": 6.724122289497083e-06, + "loss": 0.5112337470054626, + "step": 10036 + }, + { + "epoch": 1.8545922786432771, + "grad_norm": 0.07042793184518814, + "learning_rate": 6.722237517298232e-06, + "loss": 0.4941219985485077, + "step": 10037 + }, + { + "epoch": 1.854777055352173, + "grad_norm": 0.09096898138523102, + "learning_rate": 6.720352875544076e-06, + "loss": 0.4680633842945099, + "step": 10038 + }, + { + "epoch": 1.8549618320610688, + "grad_norm": 0.08553384989500046, + "learning_rate": 6.718468364309609e-06, + "loss": 0.620337724685669, + "step": 10039 + }, + { + "epoch": 1.8551466087699646, + "grad_norm": 0.06293967366218567, + "learning_rate": 6.7165839836698364e-06, + "loss": 0.3222028911113739, + "step": 10040 + }, + { + "epoch": 1.8553313854788605, + "grad_norm": 0.06140477955341339, + "learning_rate": 6.714699733699752e-06, + "loss": 0.4061179459095001, + "step": 10041 + }, + { + "epoch": 1.8555161621877563, + "grad_norm": 0.06999810039997101, + "learning_rate": 6.712815614474337e-06, + "loss": 0.5046561360359192, + "step": 10042 + }, + { + "epoch": 1.855700938896652, + "grad_norm": 0.08148129284381866, + "learning_rate": 6.710931626068573e-06, + "loss": 0.516724169254303, + "step": 10043 + }, + { + "epoch": 1.855885715605548, + "grad_norm": 0.0839998796582222, + "learning_rate": 6.709047768557449e-06, + "loss": 0.42863452434539795, + "step": 10044 + }, + { + "epoch": 1.8560704923144438, + "grad_norm": 0.08073319494724274, + "learning_rate": 6.707164042015921e-06, + "loss": 0.47118139266967773, + "step": 10045 + }, + { + "epoch": 1.8562552690233396, + "grad_norm": 0.07812882214784622, + "learning_rate": 6.705280446518962e-06, + "loss": 0.4859619736671448, + "step": 10046 + }, + { + "epoch": 1.8564400457322354, + "grad_norm": 0.0773615762591362, + "learning_rate": 6.703396982141537e-06, + "loss": 0.39667513966560364, + "step": 10047 + }, + { + "epoch": 1.8566248224411313, + "grad_norm": 0.07865162938833237, + "learning_rate": 6.701513648958595e-06, + "loss": 0.56267911195755, + "step": 10048 + }, + { + "epoch": 1.856809599150027, + "grad_norm": 0.0917169600725174, + "learning_rate": 6.699630447045092e-06, + "loss": 0.6247175335884094, + "step": 10049 + }, + { + "epoch": 1.856994375858923, + "grad_norm": 0.07467745244503021, + "learning_rate": 6.697747376475972e-06, + "loss": 0.36883577704429626, + "step": 10050 + }, + { + "epoch": 1.8571791525678187, + "grad_norm": 0.071692556142807, + "learning_rate": 6.695864437326171e-06, + "loss": 0.4360310435295105, + "step": 10051 + }, + { + "epoch": 1.8573639292767146, + "grad_norm": 0.08954844623804092, + "learning_rate": 6.693981629670634e-06, + "loss": 0.544607400894165, + "step": 10052 + }, + { + "epoch": 1.8575487059856104, + "grad_norm": 0.06607211381196976, + "learning_rate": 6.6920989535842805e-06, + "loss": 0.41852515935897827, + "step": 10053 + }, + { + "epoch": 1.8577334826945062, + "grad_norm": 0.09537258744239807, + "learning_rate": 6.690216409142041e-06, + "loss": 0.6173677444458008, + "step": 10054 + }, + { + "epoch": 1.857918259403402, + "grad_norm": 0.0744006410241127, + "learning_rate": 6.688333996418834e-06, + "loss": 0.4436899721622467, + "step": 10055 + }, + { + "epoch": 1.8581030361122979, + "grad_norm": 0.10246053338050842, + "learning_rate": 6.686451715489573e-06, + "loss": 0.6799331903457642, + "step": 10056 + }, + { + "epoch": 1.858287812821194, + "grad_norm": 0.0734836682677269, + "learning_rate": 6.684569566429164e-06, + "loss": 0.5199905633926392, + "step": 10057 + }, + { + "epoch": 1.8584725895300898, + "grad_norm": 0.07924776524305344, + "learning_rate": 6.682687549312521e-06, + "loss": 0.4931090176105499, + "step": 10058 + }, + { + "epoch": 1.8586573662389856, + "grad_norm": 0.060191184282302856, + "learning_rate": 6.680805664214527e-06, + "loss": 0.4219018518924713, + "step": 10059 + }, + { + "epoch": 1.8588421429478814, + "grad_norm": 0.08619184046983719, + "learning_rate": 6.678923911210086e-06, + "loss": 0.6257535815238953, + "step": 10060 + }, + { + "epoch": 1.8590269196567772, + "grad_norm": 0.080179862678051, + "learning_rate": 6.677042290374086e-06, + "loss": 0.5102938413619995, + "step": 10061 + }, + { + "epoch": 1.859211696365673, + "grad_norm": 0.0652344822883606, + "learning_rate": 6.675160801781404e-06, + "loss": 0.4639420211315155, + "step": 10062 + }, + { + "epoch": 1.859396473074569, + "grad_norm": 0.08999873697757721, + "learning_rate": 6.673279445506917e-06, + "loss": 0.7283390164375305, + "step": 10063 + }, + { + "epoch": 1.859581249783465, + "grad_norm": 0.07395680248737335, + "learning_rate": 6.671398221625507e-06, + "loss": 0.46372368931770325, + "step": 10064 + }, + { + "epoch": 1.8597660264923608, + "grad_norm": 0.09114869683980942, + "learning_rate": 6.669517130212029e-06, + "loss": 0.5683853626251221, + "step": 10065 + }, + { + "epoch": 1.8599508032012566, + "grad_norm": 0.07969726622104645, + "learning_rate": 6.667636171341352e-06, + "loss": 0.5142582654953003, + "step": 10066 + }, + { + "epoch": 1.8601355799101524, + "grad_norm": 0.09282614290714264, + "learning_rate": 6.665755345088328e-06, + "loss": 0.5591207146644592, + "step": 10067 + }, + { + "epoch": 1.8603203566190483, + "grad_norm": 0.06714857369661331, + "learning_rate": 6.6638746515278086e-06, + "loss": 0.39201489090919495, + "step": 10068 + }, + { + "epoch": 1.860505133327944, + "grad_norm": 0.055099327117204666, + "learning_rate": 6.661994090734642e-06, + "loss": 0.3117598593235016, + "step": 10069 + }, + { + "epoch": 1.86068991003684, + "grad_norm": 0.09940553456544876, + "learning_rate": 6.660113662783667e-06, + "loss": 0.611347496509552, + "step": 10070 + }, + { + "epoch": 1.8608746867457358, + "grad_norm": 0.08205731213092804, + "learning_rate": 6.658233367749719e-06, + "loss": 0.47489482164382935, + "step": 10071 + }, + { + "epoch": 1.8610594634546316, + "grad_norm": 0.06458830088376999, + "learning_rate": 6.65635320570763e-06, + "loss": 0.40867653489112854, + "step": 10072 + }, + { + "epoch": 1.8612442401635274, + "grad_norm": 0.07972311973571777, + "learning_rate": 6.654473176732219e-06, + "loss": 0.5350630879402161, + "step": 10073 + }, + { + "epoch": 1.8614290168724232, + "grad_norm": 0.08452945202589035, + "learning_rate": 6.65259328089831e-06, + "loss": 0.599851667881012, + "step": 10074 + }, + { + "epoch": 1.861613793581319, + "grad_norm": 0.07066328078508377, + "learning_rate": 6.650713518280718e-06, + "loss": 0.44157370924949646, + "step": 10075 + }, + { + "epoch": 1.861798570290215, + "grad_norm": 0.08250157535076141, + "learning_rate": 6.648833888954247e-06, + "loss": 0.5383029580116272, + "step": 10076 + }, + { + "epoch": 1.8619833469991107, + "grad_norm": 0.08089201897382736, + "learning_rate": 6.646954392993703e-06, + "loss": 0.5467923283576965, + "step": 10077 + }, + { + "epoch": 1.8621681237080066, + "grad_norm": 0.07823770493268967, + "learning_rate": 6.645075030473887e-06, + "loss": 0.5012009143829346, + "step": 10078 + }, + { + "epoch": 1.8623529004169024, + "grad_norm": 0.06355928629636765, + "learning_rate": 6.643195801469584e-06, + "loss": 0.30434858798980713, + "step": 10079 + }, + { + "epoch": 1.8625376771257982, + "grad_norm": 0.08099062740802765, + "learning_rate": 6.6413167060555904e-06, + "loss": 0.5842652320861816, + "step": 10080 + }, + { + "epoch": 1.862722453834694, + "grad_norm": 0.06439043581485748, + "learning_rate": 6.63943774430668e-06, + "loss": 0.41050440073013306, + "step": 10081 + }, + { + "epoch": 1.8629072305435899, + "grad_norm": 0.08004370331764221, + "learning_rate": 6.637558916297635e-06, + "loss": 0.5602760314941406, + "step": 10082 + }, + { + "epoch": 1.8630920072524857, + "grad_norm": 0.07466733455657959, + "learning_rate": 6.6356802221032265e-06, + "loss": 0.4141661524772644, + "step": 10083 + }, + { + "epoch": 1.8632767839613815, + "grad_norm": 0.07983822375535965, + "learning_rate": 6.633801661798218e-06, + "loss": 0.523760199546814, + "step": 10084 + }, + { + "epoch": 1.8634615606702774, + "grad_norm": 0.10091492533683777, + "learning_rate": 6.631923235457371e-06, + "loss": 0.3581887185573578, + "step": 10085 + }, + { + "epoch": 1.8636463373791734, + "grad_norm": 0.0627380982041359, + "learning_rate": 6.630044943155445e-06, + "loss": 0.33766254782676697, + "step": 10086 + }, + { + "epoch": 1.8638311140880692, + "grad_norm": 0.0785946249961853, + "learning_rate": 6.628166784967185e-06, + "loss": 0.584014356136322, + "step": 10087 + }, + { + "epoch": 1.864015890796965, + "grad_norm": 0.09274392575025558, + "learning_rate": 6.6262887609673365e-06, + "loss": 0.5695738792419434, + "step": 10088 + }, + { + "epoch": 1.8642006675058609, + "grad_norm": 0.07663106918334961, + "learning_rate": 6.6244108712306435e-06, + "loss": 0.4880254566669464, + "step": 10089 + }, + { + "epoch": 1.8643854442147567, + "grad_norm": 0.07401253283023834, + "learning_rate": 6.622533115831834e-06, + "loss": 0.4619203805923462, + "step": 10090 + }, + { + "epoch": 1.8645702209236525, + "grad_norm": 0.08102946728467941, + "learning_rate": 6.62065549484564e-06, + "loss": 0.4943026900291443, + "step": 10091 + }, + { + "epoch": 1.8647549976325484, + "grad_norm": 0.07213003933429718, + "learning_rate": 6.618778008346787e-06, + "loss": 0.41184893250465393, + "step": 10092 + }, + { + "epoch": 1.8649397743414444, + "grad_norm": 0.0901002511382103, + "learning_rate": 6.616900656409989e-06, + "loss": 0.4627062976360321, + "step": 10093 + }, + { + "epoch": 1.8651245510503403, + "grad_norm": 0.06586375832557678, + "learning_rate": 6.61502343910996e-06, + "loss": 0.4276363253593445, + "step": 10094 + }, + { + "epoch": 1.865309327759236, + "grad_norm": 0.07709570974111557, + "learning_rate": 6.613146356521412e-06, + "loss": 0.46082285046577454, + "step": 10095 + }, + { + "epoch": 1.865494104468132, + "grad_norm": 0.07655750960111618, + "learning_rate": 6.6112694087190375e-06, + "loss": 0.4414830803871155, + "step": 10096 + }, + { + "epoch": 1.8656788811770277, + "grad_norm": 0.06628584116697311, + "learning_rate": 6.609392595777544e-06, + "loss": 0.4861391484737396, + "step": 10097 + }, + { + "epoch": 1.8658636578859236, + "grad_norm": 0.11271800100803375, + "learning_rate": 6.607515917771614e-06, + "loss": 0.5347905158996582, + "step": 10098 + }, + { + "epoch": 1.8660484345948194, + "grad_norm": 0.09717021882534027, + "learning_rate": 6.605639374775934e-06, + "loss": 0.5996444225311279, + "step": 10099 + }, + { + "epoch": 1.8662332113037152, + "grad_norm": 0.0778956487774849, + "learning_rate": 6.603762966865195e-06, + "loss": 0.4967779219150543, + "step": 10100 + }, + { + "epoch": 1.866417988012611, + "grad_norm": 0.0822027325630188, + "learning_rate": 6.601886694114058e-06, + "loss": 0.49735188484191895, + "step": 10101 + }, + { + "epoch": 1.8666027647215069, + "grad_norm": 0.08405333012342453, + "learning_rate": 6.600010556597198e-06, + "loss": 0.4059186577796936, + "step": 10102 + }, + { + "epoch": 1.8667875414304027, + "grad_norm": 0.077104352414608, + "learning_rate": 6.598134554389287e-06, + "loss": 0.45331335067749023, + "step": 10103 + }, + { + "epoch": 1.8669723181392985, + "grad_norm": 0.10179364681243896, + "learning_rate": 6.596258687564974e-06, + "loss": 0.6316981315612793, + "step": 10104 + }, + { + "epoch": 1.8671570948481944, + "grad_norm": 0.08048411458730698, + "learning_rate": 6.594382956198915e-06, + "loss": 0.5279122591018677, + "step": 10105 + }, + { + "epoch": 1.8673418715570902, + "grad_norm": 0.08929668366909027, + "learning_rate": 6.592507360365763e-06, + "loss": 0.49193674325942993, + "step": 10106 + }, + { + "epoch": 1.867526648265986, + "grad_norm": 0.06527028977870941, + "learning_rate": 6.5906319001401545e-06, + "loss": 0.40781906247138977, + "step": 10107 + }, + { + "epoch": 1.8677114249748819, + "grad_norm": 0.06002240255475044, + "learning_rate": 6.5887565755967305e-06, + "loss": 0.4073244333267212, + "step": 10108 + }, + { + "epoch": 1.8678962016837777, + "grad_norm": 0.0968843549489975, + "learning_rate": 6.586881386810125e-06, + "loss": 0.584854245185852, + "step": 10109 + }, + { + "epoch": 1.8680809783926735, + "grad_norm": 0.07757356762886047, + "learning_rate": 6.585006333854957e-06, + "loss": 0.49007049202919006, + "step": 10110 + }, + { + "epoch": 1.8682657551015693, + "grad_norm": 0.0810554251074791, + "learning_rate": 6.5831314168058554e-06, + "loss": 0.5097789764404297, + "step": 10111 + }, + { + "epoch": 1.8684505318104652, + "grad_norm": 0.08307379484176636, + "learning_rate": 6.581256635737435e-06, + "loss": 0.5431102514266968, + "step": 10112 + }, + { + "epoch": 1.868635308519361, + "grad_norm": 0.08331498503684998, + "learning_rate": 6.579381990724303e-06, + "loss": 0.5673648118972778, + "step": 10113 + }, + { + "epoch": 1.8688200852282568, + "grad_norm": 0.086639903485775, + "learning_rate": 6.577507481841069e-06, + "loss": 0.5178836584091187, + "step": 10114 + }, + { + "epoch": 1.8690048619371529, + "grad_norm": 0.06253974884748459, + "learning_rate": 6.575633109162324e-06, + "loss": 0.37117743492126465, + "step": 10115 + }, + { + "epoch": 1.8691896386460487, + "grad_norm": 0.07451470196247101, + "learning_rate": 6.57375887276267e-06, + "loss": 0.5134936571121216, + "step": 10116 + }, + { + "epoch": 1.8693744153549445, + "grad_norm": 0.06519269198179245, + "learning_rate": 6.5718847727166965e-06, + "loss": 0.40584704279899597, + "step": 10117 + }, + { + "epoch": 1.8695591920638404, + "grad_norm": 0.05505971610546112, + "learning_rate": 6.570010809098981e-06, + "loss": 0.23253515362739563, + "step": 10118 + }, + { + "epoch": 1.8697439687727362, + "grad_norm": 0.07418741285800934, + "learning_rate": 6.568136981984102e-06, + "loss": 0.46305274963378906, + "step": 10119 + }, + { + "epoch": 1.869928745481632, + "grad_norm": 0.07298068702220917, + "learning_rate": 6.5662632914466405e-06, + "loss": 0.38585028052330017, + "step": 10120 + }, + { + "epoch": 1.8701135221905278, + "grad_norm": 0.06870625913143158, + "learning_rate": 6.564389737561153e-06, + "loss": 0.4443548619747162, + "step": 10121 + }, + { + "epoch": 1.8702982988994237, + "grad_norm": 0.07343483716249466, + "learning_rate": 6.562516320402204e-06, + "loss": 0.43571585416793823, + "step": 10122 + }, + { + "epoch": 1.8704830756083197, + "grad_norm": 0.09324276447296143, + "learning_rate": 6.5606430400443555e-06, + "loss": 0.5833722352981567, + "step": 10123 + }, + { + "epoch": 1.8706678523172156, + "grad_norm": 0.08678468316793442, + "learning_rate": 6.558769896562149e-06, + "loss": 0.6048173308372498, + "step": 10124 + }, + { + "epoch": 1.8708526290261114, + "grad_norm": 0.07251591235399246, + "learning_rate": 6.556896890030137e-06, + "loss": 0.4826517105102539, + "step": 10125 + }, + { + "epoch": 1.8710374057350072, + "grad_norm": 0.06635832786560059, + "learning_rate": 6.555024020522858e-06, + "loss": 0.4049977660179138, + "step": 10126 + }, + { + "epoch": 1.871222182443903, + "grad_norm": 0.0675082802772522, + "learning_rate": 6.55315128811484e-06, + "loss": 0.40836289525032043, + "step": 10127 + }, + { + "epoch": 1.8714069591527989, + "grad_norm": 0.07173550873994827, + "learning_rate": 6.551278692880621e-06, + "loss": 0.45877552032470703, + "step": 10128 + }, + { + "epoch": 1.8715917358616947, + "grad_norm": 0.08252470940351486, + "learning_rate": 6.54940623489472e-06, + "loss": 0.5686274766921997, + "step": 10129 + }, + { + "epoch": 1.8717765125705905, + "grad_norm": 0.06638361513614655, + "learning_rate": 6.547533914231654e-06, + "loss": 0.3055427670478821, + "step": 10130 + }, + { + "epoch": 1.8719612892794864, + "grad_norm": 0.09710148721933365, + "learning_rate": 6.54566173096594e-06, + "loss": 0.6220736503601074, + "step": 10131 + }, + { + "epoch": 1.8721460659883822, + "grad_norm": 0.08042354881763458, + "learning_rate": 6.543789685172077e-06, + "loss": 0.5249215364456177, + "step": 10132 + }, + { + "epoch": 1.872330842697278, + "grad_norm": 0.0686904564499855, + "learning_rate": 6.541917776924574e-06, + "loss": 0.38781800866127014, + "step": 10133 + }, + { + "epoch": 1.8725156194061738, + "grad_norm": 0.06881073862314224, + "learning_rate": 6.540046006297928e-06, + "loss": 0.4447661340236664, + "step": 10134 + }, + { + "epoch": 1.8727003961150697, + "grad_norm": 0.06407603621482849, + "learning_rate": 6.538174373366619e-06, + "loss": 0.32195812463760376, + "step": 10135 + }, + { + "epoch": 1.8728851728239655, + "grad_norm": 0.07901185750961304, + "learning_rate": 6.536302878205143e-06, + "loss": 0.4765346348285675, + "step": 10136 + }, + { + "epoch": 1.8730699495328613, + "grad_norm": 0.08572155982255936, + "learning_rate": 6.534431520887978e-06, + "loss": 0.5873922109603882, + "step": 10137 + }, + { + "epoch": 1.8732547262417572, + "grad_norm": 0.09743129462003708, + "learning_rate": 6.532560301489594e-06, + "loss": 0.5956275463104248, + "step": 10138 + }, + { + "epoch": 1.873439502950653, + "grad_norm": 0.07195150852203369, + "learning_rate": 6.530689220084459e-06, + "loss": 0.4158497452735901, + "step": 10139 + }, + { + "epoch": 1.8736242796595488, + "grad_norm": 0.06035377085208893, + "learning_rate": 6.528818276747044e-06, + "loss": 0.3526591956615448, + "step": 10140 + }, + { + "epoch": 1.8738090563684446, + "grad_norm": 0.09304926544427872, + "learning_rate": 6.526947471551799e-06, + "loss": 0.563854992389679, + "step": 10141 + }, + { + "epoch": 1.8739938330773405, + "grad_norm": 0.10085073858499527, + "learning_rate": 6.525076804573176e-06, + "loss": 0.7046477794647217, + "step": 10142 + }, + { + "epoch": 1.8741786097862363, + "grad_norm": 0.055469196289777756, + "learning_rate": 6.523206275885632e-06, + "loss": 0.26885268092155457, + "step": 10143 + }, + { + "epoch": 1.8743633864951321, + "grad_norm": 0.07149965316057205, + "learning_rate": 6.521335885563595e-06, + "loss": 0.5555588006973267, + "step": 10144 + }, + { + "epoch": 1.8745481632040282, + "grad_norm": 0.07556617259979248, + "learning_rate": 6.5194656336815085e-06, + "loss": 0.5391772985458374, + "step": 10145 + }, + { + "epoch": 1.874732939912924, + "grad_norm": 0.09786257892847061, + "learning_rate": 6.517595520313799e-06, + "loss": 0.6267918944358826, + "step": 10146 + }, + { + "epoch": 1.8749177166218198, + "grad_norm": 0.07329027354717255, + "learning_rate": 6.515725545534894e-06, + "loss": 0.5676301717758179, + "step": 10147 + }, + { + "epoch": 1.8751024933307157, + "grad_norm": 0.09947676956653595, + "learning_rate": 6.513855709419212e-06, + "loss": 0.6026296019554138, + "step": 10148 + }, + { + "epoch": 1.8752872700396115, + "grad_norm": 0.07943307608366013, + "learning_rate": 6.511986012041163e-06, + "loss": 0.4566938281059265, + "step": 10149 + }, + { + "epoch": 1.8754720467485073, + "grad_norm": 0.10105638206005096, + "learning_rate": 6.510116453475159e-06, + "loss": 0.7011048197746277, + "step": 10150 + }, + { + "epoch": 1.8756568234574031, + "grad_norm": 0.08896586298942566, + "learning_rate": 6.508247033795605e-06, + "loss": 0.5455494523048401, + "step": 10151 + }, + { + "epoch": 1.8758416001662992, + "grad_norm": 0.0694127157330513, + "learning_rate": 6.506377753076891e-06, + "loss": 0.3472730219364166, + "step": 10152 + }, + { + "epoch": 1.876026376875195, + "grad_norm": 0.0655612051486969, + "learning_rate": 6.504508611393414e-06, + "loss": 0.3974023163318634, + "step": 10153 + }, + { + "epoch": 1.8762111535840909, + "grad_norm": 0.08311789482831955, + "learning_rate": 6.502639608819561e-06, + "loss": 0.6139525175094604, + "step": 10154 + }, + { + "epoch": 1.8763959302929867, + "grad_norm": 0.10273124277591705, + "learning_rate": 6.5007707454297055e-06, + "loss": 0.5326289534568787, + "step": 10155 + }, + { + "epoch": 1.8765807070018825, + "grad_norm": 0.08947394788265228, + "learning_rate": 6.498902021298227e-06, + "loss": 0.5882757902145386, + "step": 10156 + }, + { + "epoch": 1.8767654837107783, + "grad_norm": 0.06457432359457016, + "learning_rate": 6.497033436499498e-06, + "loss": 0.38947319984436035, + "step": 10157 + }, + { + "epoch": 1.8769502604196742, + "grad_norm": 0.05972028151154518, + "learning_rate": 6.495164991107874e-06, + "loss": 0.3036060035228729, + "step": 10158 + }, + { + "epoch": 1.87713503712857, + "grad_norm": 0.06575573980808258, + "learning_rate": 6.493296685197719e-06, + "loss": 0.41468435525894165, + "step": 10159 + }, + { + "epoch": 1.8773198138374658, + "grad_norm": 0.07296153903007507, + "learning_rate": 6.49142851884339e-06, + "loss": 0.4320918917655945, + "step": 10160 + }, + { + "epoch": 1.8775045905463617, + "grad_norm": 0.07599961757659912, + "learning_rate": 6.489560492119225e-06, + "loss": 0.4930938482284546, + "step": 10161 + }, + { + "epoch": 1.8776893672552575, + "grad_norm": 0.08170460909605026, + "learning_rate": 6.487692605099571e-06, + "loss": 0.5137930512428284, + "step": 10162 + }, + { + "epoch": 1.8778741439641533, + "grad_norm": 0.06708872318267822, + "learning_rate": 6.485824857858762e-06, + "loss": 0.4063275158405304, + "step": 10163 + }, + { + "epoch": 1.8780589206730491, + "grad_norm": 0.08488880097866058, + "learning_rate": 6.483957250471128e-06, + "loss": 0.47834888100624084, + "step": 10164 + }, + { + "epoch": 1.878243697381945, + "grad_norm": 0.07035119831562042, + "learning_rate": 6.482089783010997e-06, + "loss": 0.45612436532974243, + "step": 10165 + }, + { + "epoch": 1.8784284740908408, + "grad_norm": 0.07598753273487091, + "learning_rate": 6.480222455552685e-06, + "loss": 0.4267653226852417, + "step": 10166 + }, + { + "epoch": 1.8786132507997366, + "grad_norm": 0.07850490510463715, + "learning_rate": 6.478355268170507e-06, + "loss": 0.5753515362739563, + "step": 10167 + }, + { + "epoch": 1.8787980275086325, + "grad_norm": 0.06916812807321548, + "learning_rate": 6.476488220938775e-06, + "loss": 0.43277326226234436, + "step": 10168 + }, + { + "epoch": 1.8789828042175283, + "grad_norm": 0.08787582814693451, + "learning_rate": 6.474621313931784e-06, + "loss": 0.4945637881755829, + "step": 10169 + }, + { + "epoch": 1.879167580926424, + "grad_norm": 0.095709890127182, + "learning_rate": 6.4727545472238366e-06, + "loss": 0.5912142395973206, + "step": 10170 + }, + { + "epoch": 1.87935235763532, + "grad_norm": 0.07700788974761963, + "learning_rate": 6.470887920889224e-06, + "loss": 0.3847402334213257, + "step": 10171 + }, + { + "epoch": 1.8795371343442158, + "grad_norm": 0.09229899942874908, + "learning_rate": 6.4690214350022296e-06, + "loss": 0.546176552772522, + "step": 10172 + }, + { + "epoch": 1.8797219110531116, + "grad_norm": 0.06881638616323471, + "learning_rate": 6.4671550896371345e-06, + "loss": 0.43623825907707214, + "step": 10173 + }, + { + "epoch": 1.8799066877620076, + "grad_norm": 0.06641674786806107, + "learning_rate": 6.4652888848682194e-06, + "loss": 0.5173807144165039, + "step": 10174 + }, + { + "epoch": 1.8800914644709035, + "grad_norm": 0.07400946319103241, + "learning_rate": 6.46342282076974e-06, + "loss": 0.4365104138851166, + "step": 10175 + }, + { + "epoch": 1.8802762411797993, + "grad_norm": 0.07894344627857208, + "learning_rate": 6.461556897415972e-06, + "loss": 0.496677041053772, + "step": 10176 + }, + { + "epoch": 1.8804610178886951, + "grad_norm": 0.08894534409046173, + "learning_rate": 6.459691114881172e-06, + "loss": 0.6015393137931824, + "step": 10177 + }, + { + "epoch": 1.880645794597591, + "grad_norm": 0.08057379722595215, + "learning_rate": 6.457825473239583e-06, + "loss": 0.4932987689971924, + "step": 10178 + }, + { + "epoch": 1.8808305713064868, + "grad_norm": 0.08173685520887375, + "learning_rate": 6.4559599725654645e-06, + "loss": 0.5451905727386475, + "step": 10179 + }, + { + "epoch": 1.8810153480153826, + "grad_norm": 0.07516109943389893, + "learning_rate": 6.454094612933046e-06, + "loss": 0.37563127279281616, + "step": 10180 + }, + { + "epoch": 1.8812001247242787, + "grad_norm": 0.054435789585113525, + "learning_rate": 6.4522293944165695e-06, + "loss": 0.39257878065109253, + "step": 10181 + }, + { + "epoch": 1.8813849014331745, + "grad_norm": 0.0689535066485405, + "learning_rate": 6.450364317090265e-06, + "loss": 0.45522540807724, + "step": 10182 + }, + { + "epoch": 1.8815696781420703, + "grad_norm": 0.10657844692468643, + "learning_rate": 6.448499381028355e-06, + "loss": 0.7042019963264465, + "step": 10183 + }, + { + "epoch": 1.8817544548509662, + "grad_norm": 0.09257829934358597, + "learning_rate": 6.4466345863050565e-06, + "loss": 0.5799952745437622, + "step": 10184 + }, + { + "epoch": 1.881939231559862, + "grad_norm": 0.06281529366970062, + "learning_rate": 6.444769932994586e-06, + "loss": 0.3686409294605255, + "step": 10185 + }, + { + "epoch": 1.8821240082687578, + "grad_norm": 0.07586891204118729, + "learning_rate": 6.44290542117115e-06, + "loss": 0.4478495121002197, + "step": 10186 + }, + { + "epoch": 1.8823087849776536, + "grad_norm": 0.08726614713668823, + "learning_rate": 6.441041050908947e-06, + "loss": 0.5721866488456726, + "step": 10187 + }, + { + "epoch": 1.8824935616865495, + "grad_norm": 0.07208137959241867, + "learning_rate": 6.439176822282178e-06, + "loss": 0.49009835720062256, + "step": 10188 + }, + { + "epoch": 1.8826783383954453, + "grad_norm": 0.0700579583644867, + "learning_rate": 6.43731273536503e-06, + "loss": 0.4596255123615265, + "step": 10189 + }, + { + "epoch": 1.8828631151043411, + "grad_norm": 0.07105761021375656, + "learning_rate": 6.4354487902316885e-06, + "loss": 0.4763326644897461, + "step": 10190 + }, + { + "epoch": 1.883047891813237, + "grad_norm": 0.05622998997569084, + "learning_rate": 6.433584986956335e-06, + "loss": 0.31442946195602417, + "step": 10191 + }, + { + "epoch": 1.8832326685221328, + "grad_norm": 0.07619105279445648, + "learning_rate": 6.431721325613138e-06, + "loss": 0.47837021946907043, + "step": 10192 + }, + { + "epoch": 1.8834174452310286, + "grad_norm": 0.073966383934021, + "learning_rate": 6.4298578062762705e-06, + "loss": 0.4140937924385071, + "step": 10193 + }, + { + "epoch": 1.8836022219399244, + "grad_norm": 0.08703131228685379, + "learning_rate": 6.427994429019894e-06, + "loss": 0.4924027919769287, + "step": 10194 + }, + { + "epoch": 1.8837869986488203, + "grad_norm": 0.07850147038698196, + "learning_rate": 6.426131193918162e-06, + "loss": 0.3722976744174957, + "step": 10195 + }, + { + "epoch": 1.883971775357716, + "grad_norm": 0.06796495616436005, + "learning_rate": 6.424268101045231e-06, + "loss": 0.4188387095928192, + "step": 10196 + }, + { + "epoch": 1.884156552066612, + "grad_norm": 0.08321154117584229, + "learning_rate": 6.42240515047524e-06, + "loss": 0.47672274708747864, + "step": 10197 + }, + { + "epoch": 1.8843413287755078, + "grad_norm": 0.0876714438199997, + "learning_rate": 6.4205423422823265e-06, + "loss": 0.5653049349784851, + "step": 10198 + }, + { + "epoch": 1.8845261054844036, + "grad_norm": 0.08870537579059601, + "learning_rate": 6.418679676540635e-06, + "loss": 0.5262870192527771, + "step": 10199 + }, + { + "epoch": 1.8847108821932994, + "grad_norm": 0.0855855643749237, + "learning_rate": 6.4168171533242865e-06, + "loss": 0.4589315950870514, + "step": 10200 + }, + { + "epoch": 1.8848956589021952, + "grad_norm": 0.06956803053617477, + "learning_rate": 6.414954772707403e-06, + "loss": 0.44253531098365784, + "step": 10201 + }, + { + "epoch": 1.885080435611091, + "grad_norm": 0.08228602260351181, + "learning_rate": 6.4130925347641074e-06, + "loss": 0.5241605043411255, + "step": 10202 + }, + { + "epoch": 1.8852652123199871, + "grad_norm": 0.0796494111418724, + "learning_rate": 6.411230439568504e-06, + "loss": 0.4705255627632141, + "step": 10203 + }, + { + "epoch": 1.885449989028883, + "grad_norm": 0.08287597447633743, + "learning_rate": 6.4093684871947e-06, + "loss": 0.5432465076446533, + "step": 10204 + }, + { + "epoch": 1.8856347657377788, + "grad_norm": 0.08077621459960938, + "learning_rate": 6.4075066777167996e-06, + "loss": 0.5836336016654968, + "step": 10205 + }, + { + "epoch": 1.8858195424466746, + "grad_norm": 0.08096127957105637, + "learning_rate": 6.405645011208892e-06, + "loss": 0.4534474313259125, + "step": 10206 + }, + { + "epoch": 1.8860043191555704, + "grad_norm": 0.05567679926753044, + "learning_rate": 6.4037834877450675e-06, + "loss": 0.37423694133758545, + "step": 10207 + }, + { + "epoch": 1.8861890958644663, + "grad_norm": 0.061636678874492645, + "learning_rate": 6.401922107399411e-06, + "loss": 0.3531928062438965, + "step": 10208 + }, + { + "epoch": 1.886373872573362, + "grad_norm": 0.07669080793857574, + "learning_rate": 6.400060870245996e-06, + "loss": 0.40901196002960205, + "step": 10209 + }, + { + "epoch": 1.8865586492822581, + "grad_norm": 0.0789622887969017, + "learning_rate": 6.398199776358899e-06, + "loss": 0.47715774178504944, + "step": 10210 + }, + { + "epoch": 1.886743425991154, + "grad_norm": 0.08765920251607895, + "learning_rate": 6.3963388258121765e-06, + "loss": 0.46810436248779297, + "step": 10211 + }, + { + "epoch": 1.8869282027000498, + "grad_norm": 0.07397231459617615, + "learning_rate": 6.3944780186798964e-06, + "loss": 0.5212484002113342, + "step": 10212 + }, + { + "epoch": 1.8871129794089456, + "grad_norm": 0.08069170266389847, + "learning_rate": 6.392617355036115e-06, + "loss": 0.5141767263412476, + "step": 10213 + }, + { + "epoch": 1.8872977561178415, + "grad_norm": 0.09006567299365997, + "learning_rate": 6.390756834954871e-06, + "loss": 0.47084227204322815, + "step": 10214 + }, + { + "epoch": 1.8874825328267373, + "grad_norm": 0.07339008897542953, + "learning_rate": 6.388896458510214e-06, + "loss": 0.4445154368877411, + "step": 10215 + }, + { + "epoch": 1.887667309535633, + "grad_norm": 0.09226617962121964, + "learning_rate": 6.387036225776187e-06, + "loss": 0.7287762761116028, + "step": 10216 + }, + { + "epoch": 1.887852086244529, + "grad_norm": 0.07907861471176147, + "learning_rate": 6.385176136826808e-06, + "loss": 0.46330586075782776, + "step": 10217 + }, + { + "epoch": 1.8880368629534248, + "grad_norm": 0.07914993166923523, + "learning_rate": 6.383316191736108e-06, + "loss": 0.5281094312667847, + "step": 10218 + }, + { + "epoch": 1.8882216396623206, + "grad_norm": 0.08094493299722672, + "learning_rate": 6.381456390578115e-06, + "loss": 0.4696466326713562, + "step": 10219 + }, + { + "epoch": 1.8884064163712164, + "grad_norm": 0.09042327105998993, + "learning_rate": 6.3795967334268315e-06, + "loss": 0.5085193514823914, + "step": 10220 + }, + { + "epoch": 1.8885911930801123, + "grad_norm": 0.06535433977842331, + "learning_rate": 6.377737220356273e-06, + "loss": 0.36800816655158997, + "step": 10221 + }, + { + "epoch": 1.888775969789008, + "grad_norm": 0.08334439992904663, + "learning_rate": 6.3758778514404415e-06, + "loss": 0.4639260470867157, + "step": 10222 + }, + { + "epoch": 1.888960746497904, + "grad_norm": 0.06565045565366745, + "learning_rate": 6.374018626753331e-06, + "loss": 0.37057387828826904, + "step": 10223 + }, + { + "epoch": 1.8891455232067997, + "grad_norm": 0.06614066660404205, + "learning_rate": 6.372159546368935e-06, + "loss": 0.3984827995300293, + "step": 10224 + }, + { + "epoch": 1.8893302999156956, + "grad_norm": 0.06254734098911285, + "learning_rate": 6.370300610361242e-06, + "loss": 0.3221375048160553, + "step": 10225 + }, + { + "epoch": 1.8895150766245914, + "grad_norm": 0.09444409608840942, + "learning_rate": 6.368441818804225e-06, + "loss": 0.633273184299469, + "step": 10226 + }, + { + "epoch": 1.8896998533334872, + "grad_norm": 0.07186122983694077, + "learning_rate": 6.366583171771865e-06, + "loss": 0.5475074052810669, + "step": 10227 + }, + { + "epoch": 1.889884630042383, + "grad_norm": 0.07813169807195663, + "learning_rate": 6.364724669338125e-06, + "loss": 0.43461698293685913, + "step": 10228 + }, + { + "epoch": 1.8900694067512789, + "grad_norm": 0.08743274956941605, + "learning_rate": 6.362866311576971e-06, + "loss": 0.5691038966178894, + "step": 10229 + }, + { + "epoch": 1.8902541834601747, + "grad_norm": 0.07686718553304672, + "learning_rate": 6.361008098562361e-06, + "loss": 0.45107215642929077, + "step": 10230 + }, + { + "epoch": 1.8904389601690705, + "grad_norm": 0.0921458750963211, + "learning_rate": 6.3591500303682385e-06, + "loss": 0.518035888671875, + "step": 10231 + }, + { + "epoch": 1.8906237368779666, + "grad_norm": 0.06643166393041611, + "learning_rate": 6.357292107068556e-06, + "loss": 0.46066051721572876, + "step": 10232 + }, + { + "epoch": 1.8908085135868624, + "grad_norm": 0.08394398540258408, + "learning_rate": 6.355434328737255e-06, + "loss": 0.5157032608985901, + "step": 10233 + }, + { + "epoch": 1.8909932902957582, + "grad_norm": 0.09607004374265671, + "learning_rate": 6.3535766954482595e-06, + "loss": 0.6931172609329224, + "step": 10234 + }, + { + "epoch": 1.891178067004654, + "grad_norm": 0.09028512239456177, + "learning_rate": 6.3517192072755055e-06, + "loss": 0.560213565826416, + "step": 10235 + }, + { + "epoch": 1.89136284371355, + "grad_norm": 0.08541875332593918, + "learning_rate": 6.349861864292916e-06, + "loss": 0.6247984170913696, + "step": 10236 + }, + { + "epoch": 1.8915476204224457, + "grad_norm": 0.0664595291018486, + "learning_rate": 6.348004666574401e-06, + "loss": 0.4602713882923126, + "step": 10237 + }, + { + "epoch": 1.8917323971313416, + "grad_norm": 0.07790371775627136, + "learning_rate": 6.346147614193874e-06, + "loss": 0.4913193881511688, + "step": 10238 + }, + { + "epoch": 1.8919171738402374, + "grad_norm": 0.07915055751800537, + "learning_rate": 6.3442907072252445e-06, + "loss": 0.49132734537124634, + "step": 10239 + }, + { + "epoch": 1.8921019505491334, + "grad_norm": 0.05303800106048584, + "learning_rate": 6.342433945742405e-06, + "loss": 0.3594009280204773, + "step": 10240 + }, + { + "epoch": 1.8922867272580293, + "grad_norm": 0.06748857349157333, + "learning_rate": 6.34057732981925e-06, + "loss": 0.40470314025878906, + "step": 10241 + }, + { + "epoch": 1.892471503966925, + "grad_norm": 0.07024706900119781, + "learning_rate": 6.338720859529672e-06, + "loss": 0.43911558389663696, + "step": 10242 + }, + { + "epoch": 1.892656280675821, + "grad_norm": 0.08209729939699173, + "learning_rate": 6.336864534947547e-06, + "loss": 0.5497640371322632, + "step": 10243 + }, + { + "epoch": 1.8928410573847168, + "grad_norm": 0.07350103557109833, + "learning_rate": 6.335008356146755e-06, + "loss": 0.45002931356430054, + "step": 10244 + }, + { + "epoch": 1.8930258340936126, + "grad_norm": 0.06537279486656189, + "learning_rate": 6.333152323201161e-06, + "loss": 0.4208161532878876, + "step": 10245 + }, + { + "epoch": 1.8932106108025084, + "grad_norm": 0.08805809915065765, + "learning_rate": 6.331296436184633e-06, + "loss": 0.6379576325416565, + "step": 10246 + }, + { + "epoch": 1.8933953875114042, + "grad_norm": 0.07174934446811676, + "learning_rate": 6.329440695171029e-06, + "loss": 0.44198155403137207, + "step": 10247 + }, + { + "epoch": 1.8935801642203, + "grad_norm": 0.09321418404579163, + "learning_rate": 6.327585100234204e-06, + "loss": 0.5571639537811279, + "step": 10248 + }, + { + "epoch": 1.893764940929196, + "grad_norm": 0.08630082756280899, + "learning_rate": 6.325729651447999e-06, + "loss": 0.5520722270011902, + "step": 10249 + }, + { + "epoch": 1.8939497176380917, + "grad_norm": 0.07743130624294281, + "learning_rate": 6.323874348886261e-06, + "loss": 0.4844270348548889, + "step": 10250 + }, + { + "epoch": 1.8941344943469876, + "grad_norm": 0.0724787712097168, + "learning_rate": 6.3220191926228216e-06, + "loss": 0.38706961274147034, + "step": 10251 + }, + { + "epoch": 1.8943192710558834, + "grad_norm": 0.07613388448953629, + "learning_rate": 6.320164182731512e-06, + "loss": 0.5574577450752258, + "step": 10252 + }, + { + "epoch": 1.8945040477647792, + "grad_norm": 0.07308190315961838, + "learning_rate": 6.318309319286158e-06, + "loss": 0.472690612077713, + "step": 10253 + }, + { + "epoch": 1.894688824473675, + "grad_norm": 0.07388962805271149, + "learning_rate": 6.316454602360569e-06, + "loss": 0.3549666106700897, + "step": 10254 + }, + { + "epoch": 1.8948736011825709, + "grad_norm": 0.07017778605222702, + "learning_rate": 6.314600032028564e-06, + "loss": 0.405681848526001, + "step": 10255 + }, + { + "epoch": 1.8950583778914667, + "grad_norm": 0.09609831124544144, + "learning_rate": 6.312745608363952e-06, + "loss": 0.5824297666549683, + "step": 10256 + }, + { + "epoch": 1.8952431546003625, + "grad_norm": 0.10361137241125107, + "learning_rate": 6.310891331440525e-06, + "loss": 0.6679677367210388, + "step": 10257 + }, + { + "epoch": 1.8954279313092584, + "grad_norm": 0.07523815333843231, + "learning_rate": 6.30903720133208e-06, + "loss": 0.3741815984249115, + "step": 10258 + }, + { + "epoch": 1.8956127080181542, + "grad_norm": 0.07877101749181747, + "learning_rate": 6.307183218112412e-06, + "loss": 0.43796366453170776, + "step": 10259 + }, + { + "epoch": 1.89579748472705, + "grad_norm": 0.07526076585054398, + "learning_rate": 6.305329381855294e-06, + "loss": 0.5309394001960754, + "step": 10260 + }, + { + "epoch": 1.8959822614359458, + "grad_norm": 0.08595466613769531, + "learning_rate": 6.303475692634511e-06, + "loss": 0.5667848587036133, + "step": 10261 + }, + { + "epoch": 1.8961670381448419, + "grad_norm": 0.07923302054405212, + "learning_rate": 6.301622150523827e-06, + "loss": 0.5488939881324768, + "step": 10262 + }, + { + "epoch": 1.8963518148537377, + "grad_norm": 0.11377062648534775, + "learning_rate": 6.299768755597011e-06, + "loss": 0.6961948275566101, + "step": 10263 + }, + { + "epoch": 1.8965365915626335, + "grad_norm": 0.08949556201696396, + "learning_rate": 6.297915507927825e-06, + "loss": 0.5056279897689819, + "step": 10264 + }, + { + "epoch": 1.8967213682715294, + "grad_norm": 0.059668708592653275, + "learning_rate": 6.296062407590017e-06, + "loss": 0.32332471013069153, + "step": 10265 + }, + { + "epoch": 1.8969061449804252, + "grad_norm": 0.07532314211130142, + "learning_rate": 6.294209454657336e-06, + "loss": 0.4130173325538635, + "step": 10266 + }, + { + "epoch": 1.897090921689321, + "grad_norm": 0.07130767405033112, + "learning_rate": 6.292356649203528e-06, + "loss": 0.3886146545410156, + "step": 10267 + }, + { + "epoch": 1.8972756983982169, + "grad_norm": 0.07246781140565872, + "learning_rate": 6.290503991302324e-06, + "loss": 0.4462714195251465, + "step": 10268 + }, + { + "epoch": 1.897460475107113, + "grad_norm": 0.0651962012052536, + "learning_rate": 6.288651481027453e-06, + "loss": 0.5322108268737793, + "step": 10269 + }, + { + "epoch": 1.8976452518160087, + "grad_norm": 0.08073778450489044, + "learning_rate": 6.286799118452647e-06, + "loss": 0.534866452217102, + "step": 10270 + }, + { + "epoch": 1.8978300285249046, + "grad_norm": 0.06440076977014542, + "learning_rate": 6.284946903651614e-06, + "loss": 0.4517211616039276, + "step": 10271 + }, + { + "epoch": 1.8980148052338004, + "grad_norm": 0.07085192203521729, + "learning_rate": 6.283094836698074e-06, + "loss": 0.40669822692871094, + "step": 10272 + }, + { + "epoch": 1.8981995819426962, + "grad_norm": 0.09401829540729523, + "learning_rate": 6.281242917665733e-06, + "loss": 0.5307532548904419, + "step": 10273 + }, + { + "epoch": 1.898384358651592, + "grad_norm": 0.09388338774442673, + "learning_rate": 6.279391146628284e-06, + "loss": 0.6148204207420349, + "step": 10274 + }, + { + "epoch": 1.8985691353604879, + "grad_norm": 0.08041196316480637, + "learning_rate": 6.277539523659433e-06, + "loss": 0.5400888919830322, + "step": 10275 + }, + { + "epoch": 1.8987539120693837, + "grad_norm": 0.06962060928344727, + "learning_rate": 6.27568804883286e-06, + "loss": 0.30403396487236023, + "step": 10276 + }, + { + "epoch": 1.8989386887782795, + "grad_norm": 0.09105116128921509, + "learning_rate": 6.273836722222249e-06, + "loss": 0.526177167892456, + "step": 10277 + }, + { + "epoch": 1.8991234654871754, + "grad_norm": 0.08550825715065002, + "learning_rate": 6.271985543901281e-06, + "loss": 0.45866915583610535, + "step": 10278 + }, + { + "epoch": 1.8993082421960712, + "grad_norm": 0.07376360893249512, + "learning_rate": 6.270134513943624e-06, + "loss": 0.46657779812812805, + "step": 10279 + }, + { + "epoch": 1.899493018904967, + "grad_norm": 0.08404447138309479, + "learning_rate": 6.268283632422943e-06, + "loss": 0.5317788124084473, + "step": 10280 + }, + { + "epoch": 1.8996777956138629, + "grad_norm": 0.07737813889980316, + "learning_rate": 6.266432899412901e-06, + "loss": 0.43988361954689026, + "step": 10281 + }, + { + "epoch": 1.8998625723227587, + "grad_norm": 0.06922601908445358, + "learning_rate": 6.264582314987147e-06, + "loss": 0.3500733971595764, + "step": 10282 + }, + { + "epoch": 1.9000473490316545, + "grad_norm": 0.07693938165903091, + "learning_rate": 6.262731879219329e-06, + "loss": 0.508966863155365, + "step": 10283 + }, + { + "epoch": 1.9002321257405503, + "grad_norm": 0.0762089341878891, + "learning_rate": 6.2608815921830936e-06, + "loss": 0.5069466829299927, + "step": 10284 + }, + { + "epoch": 1.9004169024494462, + "grad_norm": 0.06816435605287552, + "learning_rate": 6.2590314539520695e-06, + "loss": 0.4386304020881653, + "step": 10285 + }, + { + "epoch": 1.900601679158342, + "grad_norm": 0.07959213852882385, + "learning_rate": 6.25718146459989e-06, + "loss": 0.47748059034347534, + "step": 10286 + }, + { + "epoch": 1.9007864558672378, + "grad_norm": 0.07564561069011688, + "learning_rate": 6.2553316242001806e-06, + "loss": 0.4964078366756439, + "step": 10287 + }, + { + "epoch": 1.9009712325761337, + "grad_norm": 0.07462283223867416, + "learning_rate": 6.253481932826554e-06, + "loss": 0.5468541383743286, + "step": 10288 + }, + { + "epoch": 1.9011560092850295, + "grad_norm": 0.0738021656870842, + "learning_rate": 6.251632390552626e-06, + "loss": 0.46736302971839905, + "step": 10289 + }, + { + "epoch": 1.9013407859939253, + "grad_norm": 0.06180545315146446, + "learning_rate": 6.249782997452003e-06, + "loss": 0.41027358174324036, + "step": 10290 + }, + { + "epoch": 1.9015255627028214, + "grad_norm": 0.0797877311706543, + "learning_rate": 6.247933753598282e-06, + "loss": 0.5366336107254028, + "step": 10291 + }, + { + "epoch": 1.9017103394117172, + "grad_norm": 0.07238733768463135, + "learning_rate": 6.246084659065064e-06, + "loss": 0.49080348014831543, + "step": 10292 + }, + { + "epoch": 1.901895116120613, + "grad_norm": 0.07169201970100403, + "learning_rate": 6.244235713925926e-06, + "loss": 0.47226986289024353, + "step": 10293 + }, + { + "epoch": 1.9020798928295088, + "grad_norm": 0.08983547240495682, + "learning_rate": 6.242386918254456e-06, + "loss": 0.4665200710296631, + "step": 10294 + }, + { + "epoch": 1.9022646695384047, + "grad_norm": 0.07715783268213272, + "learning_rate": 6.240538272124236e-06, + "loss": 0.5719449520111084, + "step": 10295 + }, + { + "epoch": 1.9024494462473005, + "grad_norm": 0.07715067267417908, + "learning_rate": 6.238689775608827e-06, + "loss": 0.5054954886436462, + "step": 10296 + }, + { + "epoch": 1.9026342229561963, + "grad_norm": 0.08391452580690384, + "learning_rate": 6.236841428781797e-06, + "loss": 0.4786243736743927, + "step": 10297 + }, + { + "epoch": 1.9028189996650924, + "grad_norm": 0.08295056223869324, + "learning_rate": 6.234993231716707e-06, + "loss": 0.6718369126319885, + "step": 10298 + }, + { + "epoch": 1.9030037763739882, + "grad_norm": 0.07840042561292648, + "learning_rate": 6.233145184487106e-06, + "loss": 0.5167830586433411, + "step": 10299 + }, + { + "epoch": 1.903188553082884, + "grad_norm": 0.09705166518688202, + "learning_rate": 6.23129728716654e-06, + "loss": 0.5749402046203613, + "step": 10300 + }, + { + "epoch": 1.9033733297917799, + "grad_norm": 0.07300394028425217, + "learning_rate": 6.2294495398285535e-06, + "loss": 0.4947923719882965, + "step": 10301 + }, + { + "epoch": 1.9035581065006757, + "grad_norm": 0.08533252030611038, + "learning_rate": 6.227601942546678e-06, + "loss": 0.5547330379486084, + "step": 10302 + }, + { + "epoch": 1.9037428832095715, + "grad_norm": 0.05910911411046982, + "learning_rate": 6.2257544953944425e-06, + "loss": 0.36130478978157043, + "step": 10303 + }, + { + "epoch": 1.9039276599184674, + "grad_norm": 0.047769539058208466, + "learning_rate": 6.2239071984453715e-06, + "loss": 0.35184383392333984, + "step": 10304 + }, + { + "epoch": 1.9041124366273632, + "grad_norm": 0.08566884696483612, + "learning_rate": 6.222060051772978e-06, + "loss": 0.5417880415916443, + "step": 10305 + }, + { + "epoch": 1.904297213336259, + "grad_norm": 0.07999825477600098, + "learning_rate": 6.2202130554507755e-06, + "loss": 0.5098516941070557, + "step": 10306 + }, + { + "epoch": 1.9044819900451548, + "grad_norm": 0.06672988831996918, + "learning_rate": 6.21836620955227e-06, + "loss": 0.45859119296073914, + "step": 10307 + }, + { + "epoch": 1.9046667667540507, + "grad_norm": 0.08266816288232803, + "learning_rate": 6.216519514150956e-06, + "loss": 0.5112531185150146, + "step": 10308 + }, + { + "epoch": 1.9048515434629465, + "grad_norm": 0.07994687557220459, + "learning_rate": 6.214672969320332e-06, + "loss": 0.48196014761924744, + "step": 10309 + }, + { + "epoch": 1.9050363201718423, + "grad_norm": 0.08007802069187164, + "learning_rate": 6.212826575133875e-06, + "loss": 0.43633612990379333, + "step": 10310 + }, + { + "epoch": 1.9052210968807382, + "grad_norm": 0.06801573932170868, + "learning_rate": 6.210980331665074e-06, + "loss": 0.3640526235103607, + "step": 10311 + }, + { + "epoch": 1.905405873589634, + "grad_norm": 0.10344184935092926, + "learning_rate": 6.209134238987405e-06, + "loss": 0.7233929634094238, + "step": 10312 + }, + { + "epoch": 1.9055906502985298, + "grad_norm": 0.08550361543893814, + "learning_rate": 6.20728829717433e-06, + "loss": 0.5223844647407532, + "step": 10313 + }, + { + "epoch": 1.9057754270074256, + "grad_norm": 0.10022129863500595, + "learning_rate": 6.205442506299313e-06, + "loss": 0.8361809253692627, + "step": 10314 + }, + { + "epoch": 1.9059602037163215, + "grad_norm": 0.05882253497838974, + "learning_rate": 6.203596866435817e-06, + "loss": 0.32034730911254883, + "step": 10315 + }, + { + "epoch": 1.9061449804252173, + "grad_norm": 0.0817984938621521, + "learning_rate": 6.2017513776572855e-06, + "loss": 0.5322924256324768, + "step": 10316 + }, + { + "epoch": 1.9063297571341131, + "grad_norm": 0.07522819936275482, + "learning_rate": 6.1999060400371666e-06, + "loss": 0.4112682342529297, + "step": 10317 + }, + { + "epoch": 1.906514533843009, + "grad_norm": 0.09052328765392303, + "learning_rate": 6.1980608536488994e-06, + "loss": 0.5937305688858032, + "step": 10318 + }, + { + "epoch": 1.9066993105519048, + "grad_norm": 0.10432654619216919, + "learning_rate": 6.196215818565914e-06, + "loss": 0.532381534576416, + "step": 10319 + }, + { + "epoch": 1.9068840872608008, + "grad_norm": 0.10870999097824097, + "learning_rate": 6.194370934861638e-06, + "loss": 0.7528077960014343, + "step": 10320 + }, + { + "epoch": 1.9070688639696967, + "grad_norm": 0.06587886810302734, + "learning_rate": 6.192526202609495e-06, + "loss": 0.3334970474243164, + "step": 10321 + }, + { + "epoch": 1.9072536406785925, + "grad_norm": 0.08507116138935089, + "learning_rate": 6.190681621882895e-06, + "loss": 0.43544384837150574, + "step": 10322 + }, + { + "epoch": 1.9074384173874883, + "grad_norm": 0.05950252711772919, + "learning_rate": 6.188837192755248e-06, + "loss": 0.35488152503967285, + "step": 10323 + }, + { + "epoch": 1.9076231940963841, + "grad_norm": 0.07645530998706818, + "learning_rate": 6.186992915299959e-06, + "loss": 0.4979608952999115, + "step": 10324 + }, + { + "epoch": 1.90780797080528, + "grad_norm": 0.11194054037332535, + "learning_rate": 6.18514878959042e-06, + "loss": 0.6795698404312134, + "step": 10325 + }, + { + "epoch": 1.9079927475141758, + "grad_norm": 0.08848891407251358, + "learning_rate": 6.183304815700029e-06, + "loss": 0.5858112573623657, + "step": 10326 + }, + { + "epoch": 1.9081775242230716, + "grad_norm": 0.08644504100084305, + "learning_rate": 6.181460993702161e-06, + "loss": 0.5262491703033447, + "step": 10327 + }, + { + "epoch": 1.9083623009319677, + "grad_norm": 0.07708645612001419, + "learning_rate": 6.1796173236702e-06, + "loss": 0.5004913806915283, + "step": 10328 + }, + { + "epoch": 1.9085470776408635, + "grad_norm": 0.0642990693449974, + "learning_rate": 6.177773805677521e-06, + "loss": 0.37086158990859985, + "step": 10329 + }, + { + "epoch": 1.9087318543497593, + "grad_norm": 0.06983623653650284, + "learning_rate": 6.1759304397974786e-06, + "loss": 0.4389687776565552, + "step": 10330 + }, + { + "epoch": 1.9089166310586552, + "grad_norm": 0.07059077173471451, + "learning_rate": 6.174087226103444e-06, + "loss": 0.42408299446105957, + "step": 10331 + }, + { + "epoch": 1.909101407767551, + "grad_norm": 0.04802374541759491, + "learning_rate": 6.172244164668773e-06, + "loss": 0.25084447860717773, + "step": 10332 + }, + { + "epoch": 1.9092861844764468, + "grad_norm": 0.0758056640625, + "learning_rate": 6.1704012555668025e-06, + "loss": 0.5087399482727051, + "step": 10333 + }, + { + "epoch": 1.9094709611853427, + "grad_norm": 0.10258832573890686, + "learning_rate": 6.16855849887088e-06, + "loss": 0.7423362731933594, + "step": 10334 + }, + { + "epoch": 1.9096557378942385, + "grad_norm": 0.06941121816635132, + "learning_rate": 6.166715894654348e-06, + "loss": 0.48833563923835754, + "step": 10335 + }, + { + "epoch": 1.9098405146031343, + "grad_norm": 0.08097316324710846, + "learning_rate": 6.164873442990526e-06, + "loss": 0.4520004093647003, + "step": 10336 + }, + { + "epoch": 1.9100252913120301, + "grad_norm": 0.08406997472047806, + "learning_rate": 6.1630311439527445e-06, + "loss": 0.5130544900894165, + "step": 10337 + }, + { + "epoch": 1.910210068020926, + "grad_norm": 0.07970693707466125, + "learning_rate": 6.161188997614319e-06, + "loss": 0.53461754322052, + "step": 10338 + }, + { + "epoch": 1.9103948447298218, + "grad_norm": 0.09099024534225464, + "learning_rate": 6.159347004048561e-06, + "loss": 0.5752238631248474, + "step": 10339 + }, + { + "epoch": 1.9105796214387176, + "grad_norm": 0.055456504225730896, + "learning_rate": 6.157505163328776e-06, + "loss": 0.3347260653972626, + "step": 10340 + }, + { + "epoch": 1.9107643981476135, + "grad_norm": 0.08471523225307465, + "learning_rate": 6.155663475528264e-06, + "loss": 0.57076096534729, + "step": 10341 + }, + { + "epoch": 1.9109491748565093, + "grad_norm": 0.05895467475056648, + "learning_rate": 6.153821940720317e-06, + "loss": 0.3902541995048523, + "step": 10342 + }, + { + "epoch": 1.911133951565405, + "grad_norm": 0.07855869084596634, + "learning_rate": 6.151980558978227e-06, + "loss": 0.5438547730445862, + "step": 10343 + }, + { + "epoch": 1.911318728274301, + "grad_norm": 0.08026958256959915, + "learning_rate": 6.1501393303752686e-06, + "loss": 0.565049409866333, + "step": 10344 + }, + { + "epoch": 1.9115035049831968, + "grad_norm": 0.08474355936050415, + "learning_rate": 6.148298254984721e-06, + "loss": 0.507989764213562, + "step": 10345 + }, + { + "epoch": 1.9116882816920926, + "grad_norm": 0.09837860614061356, + "learning_rate": 6.146457332879854e-06, + "loss": 0.6434282064437866, + "step": 10346 + }, + { + "epoch": 1.9118730584009884, + "grad_norm": 0.10607185959815979, + "learning_rate": 6.144616564133927e-06, + "loss": 0.7941292524337769, + "step": 10347 + }, + { + "epoch": 1.9120578351098843, + "grad_norm": 0.07790163159370422, + "learning_rate": 6.142775948820198e-06, + "loss": 0.5895043611526489, + "step": 10348 + }, + { + "epoch": 1.91224261181878, + "grad_norm": 0.06479763239622116, + "learning_rate": 6.140935487011924e-06, + "loss": 0.3058730363845825, + "step": 10349 + }, + { + "epoch": 1.9124273885276761, + "grad_norm": 0.07747827470302582, + "learning_rate": 6.139095178782337e-06, + "loss": 0.5066514015197754, + "step": 10350 + }, + { + "epoch": 1.912612165236572, + "grad_norm": 0.0633983463048935, + "learning_rate": 6.1372550242046855e-06, + "loss": 0.39069458842277527, + "step": 10351 + }, + { + "epoch": 1.9127969419454678, + "grad_norm": 0.09156794100999832, + "learning_rate": 6.135415023352203e-06, + "loss": 0.5516767501831055, + "step": 10352 + }, + { + "epoch": 1.9129817186543636, + "grad_norm": 0.07638484984636307, + "learning_rate": 6.133575176298108e-06, + "loss": 0.4340505003929138, + "step": 10353 + }, + { + "epoch": 1.9131664953632594, + "grad_norm": 0.07331151515245438, + "learning_rate": 6.131735483115622e-06, + "loss": 0.4559667408466339, + "step": 10354 + }, + { + "epoch": 1.9133512720721553, + "grad_norm": 0.1106058731675148, + "learning_rate": 6.129895943877967e-06, + "loss": 0.7445495128631592, + "step": 10355 + }, + { + "epoch": 1.913536048781051, + "grad_norm": 0.08542877435684204, + "learning_rate": 6.128056558658342e-06, + "loss": 0.47054705023765564, + "step": 10356 + }, + { + "epoch": 1.9137208254899472, + "grad_norm": 0.07826549559831619, + "learning_rate": 6.126217327529955e-06, + "loss": 0.51534104347229, + "step": 10357 + }, + { + "epoch": 1.913905602198843, + "grad_norm": 0.06726085394620895, + "learning_rate": 6.124378250565996e-06, + "loss": 0.4334796667098999, + "step": 10358 + }, + { + "epoch": 1.9140903789077388, + "grad_norm": 0.07645734399557114, + "learning_rate": 6.122539327839657e-06, + "loss": 0.48238635063171387, + "step": 10359 + }, + { + "epoch": 1.9142751556166346, + "grad_norm": 0.07240963727235794, + "learning_rate": 6.120700559424124e-06, + "loss": 0.4427392780780792, + "step": 10360 + }, + { + "epoch": 1.9144599323255305, + "grad_norm": 0.07713354378938675, + "learning_rate": 6.1188619453925704e-06, + "loss": 0.38899877667427063, + "step": 10361 + }, + { + "epoch": 1.9146447090344263, + "grad_norm": 0.0955655425786972, + "learning_rate": 6.117023485818169e-06, + "loss": 0.581228494644165, + "step": 10362 + }, + { + "epoch": 1.9148294857433221, + "grad_norm": 0.08136298507452011, + "learning_rate": 6.115185180774086e-06, + "loss": 0.5755365490913391, + "step": 10363 + }, + { + "epoch": 1.915014262452218, + "grad_norm": 0.09747554361820221, + "learning_rate": 6.113347030333476e-06, + "loss": 0.5765631198883057, + "step": 10364 + }, + { + "epoch": 1.9151990391611138, + "grad_norm": 0.06636013835668564, + "learning_rate": 6.111509034569496e-06, + "loss": 0.4715322256088257, + "step": 10365 + }, + { + "epoch": 1.9153838158700096, + "grad_norm": 0.10040026158094406, + "learning_rate": 6.109671193555292e-06, + "loss": 0.5345032811164856, + "step": 10366 + }, + { + "epoch": 1.9155685925789054, + "grad_norm": 0.0752132385969162, + "learning_rate": 6.107833507364001e-06, + "loss": 0.50083988904953, + "step": 10367 + }, + { + "epoch": 1.9157533692878013, + "grad_norm": 0.08617395907640457, + "learning_rate": 6.105995976068762e-06, + "loss": 0.5246359705924988, + "step": 10368 + }, + { + "epoch": 1.915938145996697, + "grad_norm": 0.07722268253564835, + "learning_rate": 6.104158599742701e-06, + "loss": 0.5290027260780334, + "step": 10369 + }, + { + "epoch": 1.916122922705593, + "grad_norm": 0.07662174850702286, + "learning_rate": 6.102321378458935e-06, + "loss": 0.4751085937023163, + "step": 10370 + }, + { + "epoch": 1.9163076994144888, + "grad_norm": 0.09481582790613174, + "learning_rate": 6.100484312290584e-06, + "loss": 0.5759310722351074, + "step": 10371 + }, + { + "epoch": 1.9164924761233846, + "grad_norm": 0.06988898664712906, + "learning_rate": 6.098647401310764e-06, + "loss": 0.40731358528137207, + "step": 10372 + }, + { + "epoch": 1.9166772528322804, + "grad_norm": 0.0789935514330864, + "learning_rate": 6.096810645592566e-06, + "loss": 0.45298251509666443, + "step": 10373 + }, + { + "epoch": 1.9168620295411762, + "grad_norm": 0.08178699761629105, + "learning_rate": 6.094974045209094e-06, + "loss": 0.6943897604942322, + "step": 10374 + }, + { + "epoch": 1.917046806250072, + "grad_norm": 0.05865257978439331, + "learning_rate": 6.093137600233438e-06, + "loss": 0.43970754742622375, + "step": 10375 + }, + { + "epoch": 1.917231582958968, + "grad_norm": 0.07162103056907654, + "learning_rate": 6.091301310738682e-06, + "loss": 0.3854061961174011, + "step": 10376 + }, + { + "epoch": 1.9174163596678637, + "grad_norm": 0.07564530521631241, + "learning_rate": 6.0894651767979065e-06, + "loss": 0.4976777732372284, + "step": 10377 + }, + { + "epoch": 1.9176011363767596, + "grad_norm": 0.06435713917016983, + "learning_rate": 6.0876291984841795e-06, + "loss": 0.3928331434726715, + "step": 10378 + }, + { + "epoch": 1.9177859130856556, + "grad_norm": 0.07462138682603836, + "learning_rate": 6.085793375870571e-06, + "loss": 0.4585098922252655, + "step": 10379 + }, + { + "epoch": 1.9179706897945514, + "grad_norm": 0.08818219602108002, + "learning_rate": 6.083957709030143e-06, + "loss": 0.5107340812683105, + "step": 10380 + }, + { + "epoch": 1.9181554665034473, + "grad_norm": 0.08969642221927643, + "learning_rate": 6.082122198035944e-06, + "loss": 0.39215609431266785, + "step": 10381 + }, + { + "epoch": 1.918340243212343, + "grad_norm": 0.0698242038488388, + "learning_rate": 6.080286842961023e-06, + "loss": 0.5694100260734558, + "step": 10382 + }, + { + "epoch": 1.918525019921239, + "grad_norm": 0.0948592871427536, + "learning_rate": 6.078451643878424e-06, + "loss": 0.5535092949867249, + "step": 10383 + }, + { + "epoch": 1.9187097966301347, + "grad_norm": 0.08598551899194717, + "learning_rate": 6.076616600861181e-06, + "loss": 0.4676663875579834, + "step": 10384 + }, + { + "epoch": 1.9188945733390306, + "grad_norm": 0.09912725538015366, + "learning_rate": 6.074781713982322e-06, + "loss": 0.6111690402030945, + "step": 10385 + }, + { + "epoch": 1.9190793500479266, + "grad_norm": 0.06452207267284393, + "learning_rate": 6.0729469833148716e-06, + "loss": 0.3722819685935974, + "step": 10386 + }, + { + "epoch": 1.9192641267568225, + "grad_norm": 0.06180416792631149, + "learning_rate": 6.071112408931843e-06, + "loss": 0.3394249975681305, + "step": 10387 + }, + { + "epoch": 1.9194489034657183, + "grad_norm": 0.09019298851490021, + "learning_rate": 6.0692779909062495e-06, + "loss": 0.6170430183410645, + "step": 10388 + }, + { + "epoch": 1.919633680174614, + "grad_norm": 0.08395922929048538, + "learning_rate": 6.067443729311098e-06, + "loss": 0.5684940218925476, + "step": 10389 + }, + { + "epoch": 1.91981845688351, + "grad_norm": 0.08177103102207184, + "learning_rate": 6.065609624219375e-06, + "loss": 0.5323196053504944, + "step": 10390 + }, + { + "epoch": 1.9200032335924058, + "grad_norm": 0.08991898596286774, + "learning_rate": 6.063775675704088e-06, + "loss": 0.6166906356811523, + "step": 10391 + }, + { + "epoch": 1.9201880103013016, + "grad_norm": 0.07399672269821167, + "learning_rate": 6.061941883838209e-06, + "loss": 0.5431609749794006, + "step": 10392 + }, + { + "epoch": 1.9203727870101974, + "grad_norm": 0.08427660912275314, + "learning_rate": 6.060108248694723e-06, + "loss": 0.441610723733902, + "step": 10393 + }, + { + "epoch": 1.9205575637190933, + "grad_norm": 0.09697717428207397, + "learning_rate": 6.058274770346604e-06, + "loss": 0.5912427306175232, + "step": 10394 + }, + { + "epoch": 1.920742340427989, + "grad_norm": 0.07520875334739685, + "learning_rate": 6.056441448866817e-06, + "loss": 0.5347209572792053, + "step": 10395 + }, + { + "epoch": 1.920927117136885, + "grad_norm": 0.07598564773797989, + "learning_rate": 6.0546082843283206e-06, + "loss": 0.5276317000389099, + "step": 10396 + }, + { + "epoch": 1.9211118938457807, + "grad_norm": 0.07036890089511871, + "learning_rate": 6.052775276804073e-06, + "loss": 0.35731253027915955, + "step": 10397 + }, + { + "epoch": 1.9212966705546766, + "grad_norm": 0.08115974813699722, + "learning_rate": 6.050942426367017e-06, + "loss": 0.5783150792121887, + "step": 10398 + }, + { + "epoch": 1.9214814472635724, + "grad_norm": 0.07887168973684311, + "learning_rate": 6.0491097330901e-06, + "loss": 0.5009549856185913, + "step": 10399 + }, + { + "epoch": 1.9216662239724682, + "grad_norm": 0.08980367332696915, + "learning_rate": 6.0472771970462555e-06, + "loss": 0.5049043297767639, + "step": 10400 + }, + { + "epoch": 1.921851000681364, + "grad_norm": 0.08237417787313461, + "learning_rate": 6.04544481830841e-06, + "loss": 0.5043971538543701, + "step": 10401 + }, + { + "epoch": 1.9220357773902599, + "grad_norm": 0.07935957610607147, + "learning_rate": 6.043612596949489e-06, + "loss": 0.4859786033630371, + "step": 10402 + }, + { + "epoch": 1.9222205540991557, + "grad_norm": 0.07649687677621841, + "learning_rate": 6.041780533042409e-06, + "loss": 0.5247918963432312, + "step": 10403 + }, + { + "epoch": 1.9224053308080515, + "grad_norm": 0.08480358868837357, + "learning_rate": 6.03994862666008e-06, + "loss": 0.5415157079696655, + "step": 10404 + }, + { + "epoch": 1.9225901075169474, + "grad_norm": 0.06939677149057388, + "learning_rate": 6.038116877875409e-06, + "loss": 0.4288090765476227, + "step": 10405 + }, + { + "epoch": 1.9227748842258432, + "grad_norm": 0.06798798590898514, + "learning_rate": 6.0362852867612864e-06, + "loss": 0.35469070076942444, + "step": 10406 + }, + { + "epoch": 1.922959660934739, + "grad_norm": 0.07632050663232803, + "learning_rate": 6.034453853390609e-06, + "loss": 0.40644606947898865, + "step": 10407 + }, + { + "epoch": 1.923144437643635, + "grad_norm": 0.07538872212171555, + "learning_rate": 6.032622577836268e-06, + "loss": 0.4695570468902588, + "step": 10408 + }, + { + "epoch": 1.923329214352531, + "grad_norm": 0.08649013936519623, + "learning_rate": 6.03079146017113e-06, + "loss": 0.5374628901481628, + "step": 10409 + }, + { + "epoch": 1.9235139910614267, + "grad_norm": 0.05631586164236069, + "learning_rate": 6.028960500468073e-06, + "loss": 0.33060532808303833, + "step": 10410 + }, + { + "epoch": 1.9236987677703226, + "grad_norm": 0.0786207765340805, + "learning_rate": 6.0271296987999695e-06, + "loss": 0.5497831702232361, + "step": 10411 + }, + { + "epoch": 1.9238835444792184, + "grad_norm": 0.09200528264045715, + "learning_rate": 6.025299055239671e-06, + "loss": 0.6450908780097961, + "step": 10412 + }, + { + "epoch": 1.9240683211881142, + "grad_norm": 0.07723037153482437, + "learning_rate": 6.023468569860034e-06, + "loss": 0.49986669421195984, + "step": 10413 + }, + { + "epoch": 1.92425309789701, + "grad_norm": 0.08802322298288345, + "learning_rate": 6.02163824273391e-06, + "loss": 0.4276263415813446, + "step": 10414 + }, + { + "epoch": 1.9244378746059059, + "grad_norm": 0.08626127243041992, + "learning_rate": 6.0198080739341345e-06, + "loss": 0.4679073095321655, + "step": 10415 + }, + { + "epoch": 1.924622651314802, + "grad_norm": 0.09432826936244965, + "learning_rate": 6.0179780635335464e-06, + "loss": 0.5690611004829407, + "step": 10416 + }, + { + "epoch": 1.9248074280236978, + "grad_norm": 0.07629045099020004, + "learning_rate": 6.016148211604974e-06, + "loss": 0.5528715252876282, + "step": 10417 + }, + { + "epoch": 1.9249922047325936, + "grad_norm": 0.06245577335357666, + "learning_rate": 6.014318518221237e-06, + "loss": 0.4268755316734314, + "step": 10418 + }, + { + "epoch": 1.9251769814414894, + "grad_norm": 0.10878819972276688, + "learning_rate": 6.012488983455154e-06, + "loss": 0.5934674739837646, + "step": 10419 + }, + { + "epoch": 1.9253617581503852, + "grad_norm": 0.07463783025741577, + "learning_rate": 6.0106596073795356e-06, + "loss": 0.41747212409973145, + "step": 10420 + }, + { + "epoch": 1.925546534859281, + "grad_norm": 0.09554737061262131, + "learning_rate": 6.008830390067182e-06, + "loss": 0.503079891204834, + "step": 10421 + }, + { + "epoch": 1.925731311568177, + "grad_norm": 0.06815576553344727, + "learning_rate": 6.007001331590894e-06, + "loss": 0.4974030554294586, + "step": 10422 + }, + { + "epoch": 1.9259160882770727, + "grad_norm": 0.061820026487112045, + "learning_rate": 6.005172432023458e-06, + "loss": 0.31762582063674927, + "step": 10423 + }, + { + "epoch": 1.9261008649859686, + "grad_norm": 0.0639713779091835, + "learning_rate": 6.003343691437662e-06, + "loss": 0.3796601891517639, + "step": 10424 + }, + { + "epoch": 1.9262856416948644, + "grad_norm": 0.06414221227169037, + "learning_rate": 6.001515109906286e-06, + "loss": 0.32676807045936584, + "step": 10425 + }, + { + "epoch": 1.9264704184037602, + "grad_norm": 0.08235526829957962, + "learning_rate": 5.999686687502096e-06, + "loss": 0.4994039237499237, + "step": 10426 + }, + { + "epoch": 1.926655195112656, + "grad_norm": 0.0586988627910614, + "learning_rate": 5.997858424297859e-06, + "loss": 0.38159024715423584, + "step": 10427 + }, + { + "epoch": 1.9268399718215519, + "grad_norm": 0.09036070853471756, + "learning_rate": 5.996030320366341e-06, + "loss": 0.6151449680328369, + "step": 10428 + }, + { + "epoch": 1.9270247485304477, + "grad_norm": 0.08748619258403778, + "learning_rate": 5.994202375780285e-06, + "loss": 0.585174560546875, + "step": 10429 + }, + { + "epoch": 1.9272095252393435, + "grad_norm": 0.10161363333463669, + "learning_rate": 5.9923745906124395e-06, + "loss": 0.5904964208602905, + "step": 10430 + }, + { + "epoch": 1.9273943019482394, + "grad_norm": 0.07477103918790817, + "learning_rate": 5.990546964935554e-06, + "loss": 0.4831435978412628, + "step": 10431 + }, + { + "epoch": 1.9275790786571352, + "grad_norm": 0.07682488858699799, + "learning_rate": 5.9887194988223506e-06, + "loss": 0.4539066255092621, + "step": 10432 + }, + { + "epoch": 1.927763855366031, + "grad_norm": 0.0927143543958664, + "learning_rate": 5.9868921923455605e-06, + "loss": 0.4629073441028595, + "step": 10433 + }, + { + "epoch": 1.9279486320749268, + "grad_norm": 0.0746363177895546, + "learning_rate": 5.985065045577912e-06, + "loss": 0.3715229630470276, + "step": 10434 + }, + { + "epoch": 1.9281334087838227, + "grad_norm": 0.07532990723848343, + "learning_rate": 5.983238058592107e-06, + "loss": 0.5250852108001709, + "step": 10435 + }, + { + "epoch": 1.9283181854927185, + "grad_norm": 0.09127747267484665, + "learning_rate": 5.981411231460863e-06, + "loss": 0.6466876268386841, + "step": 10436 + }, + { + "epoch": 1.9285029622016143, + "grad_norm": 0.10759834200143814, + "learning_rate": 5.9795845642568795e-06, + "loss": 0.7927917242050171, + "step": 10437 + }, + { + "epoch": 1.9286877389105104, + "grad_norm": 0.07551736384630203, + "learning_rate": 5.977758057052852e-06, + "loss": 0.3971814513206482, + "step": 10438 + }, + { + "epoch": 1.9288725156194062, + "grad_norm": 0.08489684015512466, + "learning_rate": 5.975931709921471e-06, + "loss": 0.5904057621955872, + "step": 10439 + }, + { + "epoch": 1.929057292328302, + "grad_norm": 0.07266104966402054, + "learning_rate": 5.974105522935416e-06, + "loss": 0.5374613404273987, + "step": 10440 + }, + { + "epoch": 1.9292420690371979, + "grad_norm": 0.07506699115037918, + "learning_rate": 5.972279496167366e-06, + "loss": 0.47188636660575867, + "step": 10441 + }, + { + "epoch": 1.9294268457460937, + "grad_norm": 0.06014501675963402, + "learning_rate": 5.970453629689993e-06, + "loss": 0.3415280878543854, + "step": 10442 + }, + { + "epoch": 1.9296116224549895, + "grad_norm": 0.09418486803770065, + "learning_rate": 5.968627923575956e-06, + "loss": 0.6142709851264954, + "step": 10443 + }, + { + "epoch": 1.9297963991638853, + "grad_norm": 0.06263016909360886, + "learning_rate": 5.966802377897916e-06, + "loss": 0.35985496640205383, + "step": 10444 + }, + { + "epoch": 1.9299811758727814, + "grad_norm": 0.09261230379343033, + "learning_rate": 5.964976992728527e-06, + "loss": 0.5534440875053406, + "step": 10445 + }, + { + "epoch": 1.9301659525816772, + "grad_norm": 0.08280779421329498, + "learning_rate": 5.963151768140424e-06, + "loss": 0.38883811235427856, + "step": 10446 + }, + { + "epoch": 1.930350729290573, + "grad_norm": 0.07502814382314682, + "learning_rate": 5.961326704206252e-06, + "loss": 0.46978461742401123, + "step": 10447 + }, + { + "epoch": 1.9305355059994689, + "grad_norm": 0.08471072465181351, + "learning_rate": 5.959501800998646e-06, + "loss": 0.6152291893959045, + "step": 10448 + }, + { + "epoch": 1.9307202827083647, + "grad_norm": 0.06850223988294601, + "learning_rate": 5.9576770585902246e-06, + "loss": 0.42707252502441406, + "step": 10449 + }, + { + "epoch": 1.9309050594172605, + "grad_norm": 0.05500276759266853, + "learning_rate": 5.955852477053606e-06, + "loss": 0.37856414914131165, + "step": 10450 + }, + { + "epoch": 1.9310898361261564, + "grad_norm": 0.09683556854724884, + "learning_rate": 5.954028056461413e-06, + "loss": 0.6295728087425232, + "step": 10451 + }, + { + "epoch": 1.9312746128350522, + "grad_norm": 0.07046552002429962, + "learning_rate": 5.95220379688624e-06, + "loss": 0.502396821975708, + "step": 10452 + }, + { + "epoch": 1.931459389543948, + "grad_norm": 0.08507654815912247, + "learning_rate": 5.950379698400691e-06, + "loss": 0.5647777915000916, + "step": 10453 + }, + { + "epoch": 1.9316441662528439, + "grad_norm": 0.08452076464891434, + "learning_rate": 5.9485557610773655e-06, + "loss": 0.5014138221740723, + "step": 10454 + }, + { + "epoch": 1.9318289429617397, + "grad_norm": 0.07229094952344894, + "learning_rate": 5.9467319849888425e-06, + "loss": 0.5160284638404846, + "step": 10455 + }, + { + "epoch": 1.9320137196706355, + "grad_norm": 0.07167952507734299, + "learning_rate": 5.944908370207707e-06, + "loss": 0.48818081617355347, + "step": 10456 + }, + { + "epoch": 1.9321984963795313, + "grad_norm": 0.08753294497728348, + "learning_rate": 5.943084916806529e-06, + "loss": 0.6104143261909485, + "step": 10457 + }, + { + "epoch": 1.9323832730884272, + "grad_norm": 0.07173937559127808, + "learning_rate": 5.94126162485788e-06, + "loss": 0.4641045033931732, + "step": 10458 + }, + { + "epoch": 1.932568049797323, + "grad_norm": 0.06941964477300644, + "learning_rate": 5.9394384944343216e-06, + "loss": 0.3881646990776062, + "step": 10459 + }, + { + "epoch": 1.9327528265062188, + "grad_norm": 0.08919605612754822, + "learning_rate": 5.937615525608406e-06, + "loss": 0.5695154070854187, + "step": 10460 + }, + { + "epoch": 1.9329376032151147, + "grad_norm": 0.06986981630325317, + "learning_rate": 5.935792718452682e-06, + "loss": 0.44560667872428894, + "step": 10461 + }, + { + "epoch": 1.9331223799240105, + "grad_norm": 0.0688788965344429, + "learning_rate": 5.933970073039694e-06, + "loss": 0.3941502273082733, + "step": 10462 + }, + { + "epoch": 1.9333071566329063, + "grad_norm": 0.06806276738643646, + "learning_rate": 5.932147589441976e-06, + "loss": 0.42018571496009827, + "step": 10463 + }, + { + "epoch": 1.9334919333418021, + "grad_norm": 0.06949281692504883, + "learning_rate": 5.930325267732056e-06, + "loss": 0.40737178921699524, + "step": 10464 + }, + { + "epoch": 1.933676710050698, + "grad_norm": 0.06800030916929245, + "learning_rate": 5.928503107982462e-06, + "loss": 0.5011507868766785, + "step": 10465 + }, + { + "epoch": 1.9338614867595938, + "grad_norm": 0.06391538679599762, + "learning_rate": 5.9266811102657e-06, + "loss": 0.35602039098739624, + "step": 10466 + }, + { + "epoch": 1.9340462634684898, + "grad_norm": 0.07929027080535889, + "learning_rate": 5.924859274654289e-06, + "loss": 0.45571425557136536, + "step": 10467 + }, + { + "epoch": 1.9342310401773857, + "grad_norm": 0.08948726207017899, + "learning_rate": 5.923037601220731e-06, + "loss": 0.5991467237472534, + "step": 10468 + }, + { + "epoch": 1.9344158168862815, + "grad_norm": 0.09263361245393753, + "learning_rate": 5.92121609003752e-06, + "loss": 0.4827702045440674, + "step": 10469 + }, + { + "epoch": 1.9346005935951773, + "grad_norm": 0.06380041688680649, + "learning_rate": 5.919394741177149e-06, + "loss": 0.41103777289390564, + "step": 10470 + }, + { + "epoch": 1.9347853703040732, + "grad_norm": 0.08030567318201065, + "learning_rate": 5.9175735547120975e-06, + "loss": 0.5373403429985046, + "step": 10471 + }, + { + "epoch": 1.934970147012969, + "grad_norm": 0.0762866884469986, + "learning_rate": 5.915752530714848e-06, + "loss": 0.4612323045730591, + "step": 10472 + }, + { + "epoch": 1.9351549237218648, + "grad_norm": 0.0838063657283783, + "learning_rate": 5.9139316692578705e-06, + "loss": 0.472083181142807, + "step": 10473 + }, + { + "epoch": 1.9353397004307609, + "grad_norm": 0.09463243186473846, + "learning_rate": 5.912110970413627e-06, + "loss": 0.6421471238136292, + "step": 10474 + }, + { + "epoch": 1.9355244771396567, + "grad_norm": 0.07890670001506805, + "learning_rate": 5.910290434254579e-06, + "loss": 0.4910679757595062, + "step": 10475 + }, + { + "epoch": 1.9357092538485525, + "grad_norm": 0.07321932166814804, + "learning_rate": 5.908470060853178e-06, + "loss": 0.317918062210083, + "step": 10476 + }, + { + "epoch": 1.9358940305574484, + "grad_norm": 0.08434881269931793, + "learning_rate": 5.906649850281865e-06, + "loss": 0.5010132789611816, + "step": 10477 + }, + { + "epoch": 1.9360788072663442, + "grad_norm": 0.06752678006887436, + "learning_rate": 5.904829802613081e-06, + "loss": 0.379227876663208, + "step": 10478 + }, + { + "epoch": 1.93626358397524, + "grad_norm": 0.08105649054050446, + "learning_rate": 5.903009917919262e-06, + "loss": 0.5523325204849243, + "step": 10479 + }, + { + "epoch": 1.9364483606841358, + "grad_norm": 0.07709749788045883, + "learning_rate": 5.9011901962728276e-06, + "loss": 0.49589478969573975, + "step": 10480 + }, + { + "epoch": 1.9366331373930317, + "grad_norm": 0.07420652359724045, + "learning_rate": 5.8993706377462e-06, + "loss": 0.4581543505191803, + "step": 10481 + }, + { + "epoch": 1.9368179141019275, + "grad_norm": 0.07852959632873535, + "learning_rate": 5.897551242411794e-06, + "loss": 0.42142826318740845, + "step": 10482 + }, + { + "epoch": 1.9370026908108233, + "grad_norm": 0.08900757133960724, + "learning_rate": 5.8957320103420124e-06, + "loss": 0.5019552707672119, + "step": 10483 + }, + { + "epoch": 1.9371874675197192, + "grad_norm": 0.07102083414793015, + "learning_rate": 5.893912941609255e-06, + "loss": 0.42338263988494873, + "step": 10484 + }, + { + "epoch": 1.937372244228615, + "grad_norm": 0.08349662274122238, + "learning_rate": 5.892094036285922e-06, + "loss": 0.4304684102535248, + "step": 10485 + }, + { + "epoch": 1.9375570209375108, + "grad_norm": 0.08405330032110214, + "learning_rate": 5.890275294444386e-06, + "loss": 0.4575079083442688, + "step": 10486 + }, + { + "epoch": 1.9377417976464066, + "grad_norm": 0.06834299117326736, + "learning_rate": 5.888456716157043e-06, + "loss": 0.35891595482826233, + "step": 10487 + }, + { + "epoch": 1.9379265743553025, + "grad_norm": 0.08467060327529907, + "learning_rate": 5.886638301496255e-06, + "loss": 0.5952319502830505, + "step": 10488 + }, + { + "epoch": 1.9381113510641983, + "grad_norm": 0.08719463646411896, + "learning_rate": 5.884820050534392e-06, + "loss": 0.5042622685432434, + "step": 10489 + }, + { + "epoch": 1.9382961277730941, + "grad_norm": 0.07109535485506058, + "learning_rate": 5.8830019633438215e-06, + "loss": 0.4510539472103119, + "step": 10490 + }, + { + "epoch": 1.93848090448199, + "grad_norm": 0.09614437073469162, + "learning_rate": 5.881184039996889e-06, + "loss": 0.6957901120185852, + "step": 10491 + }, + { + "epoch": 1.9386656811908858, + "grad_norm": 0.06927401572465897, + "learning_rate": 5.8793662805659455e-06, + "loss": 0.4295322597026825, + "step": 10492 + }, + { + "epoch": 1.9388504578997816, + "grad_norm": 0.09393969178199768, + "learning_rate": 5.877548685123334e-06, + "loss": 0.5344082117080688, + "step": 10493 + }, + { + "epoch": 1.9390352346086774, + "grad_norm": 0.09615222364664078, + "learning_rate": 5.875731253741386e-06, + "loss": 0.550508975982666, + "step": 10494 + }, + { + "epoch": 1.9392200113175733, + "grad_norm": 0.09041385352611542, + "learning_rate": 5.87391398649243e-06, + "loss": 0.5991291999816895, + "step": 10495 + }, + { + "epoch": 1.9394047880264693, + "grad_norm": 0.09496104717254639, + "learning_rate": 5.87209688344879e-06, + "loss": 0.673936128616333, + "step": 10496 + }, + { + "epoch": 1.9395895647353651, + "grad_norm": 0.07641255855560303, + "learning_rate": 5.870279944682779e-06, + "loss": 0.5525445938110352, + "step": 10497 + }, + { + "epoch": 1.939774341444261, + "grad_norm": 0.09686005860567093, + "learning_rate": 5.868463170266705e-06, + "loss": 0.47172901034355164, + "step": 10498 + }, + { + "epoch": 1.9399591181531568, + "grad_norm": 0.06705188751220703, + "learning_rate": 5.866646560272873e-06, + "loss": 0.4666908085346222, + "step": 10499 + }, + { + "epoch": 1.9401438948620526, + "grad_norm": 0.0796026661992073, + "learning_rate": 5.864830114773574e-06, + "loss": 0.4845815896987915, + "step": 10500 + }, + { + "epoch": 1.9401438948620526, + "eval_loss": 0.5543113946914673, + "eval_runtime": 155.6506, + "eval_samples_per_second": 117.115, + "eval_steps_per_second": 14.642, + "step": 10500 + }, + { + "epoch": 1.9403286715709485, + "grad_norm": 0.08364847302436829, + "learning_rate": 5.8630138338411005e-06, + "loss": 0.5841268301010132, + "step": 10501 + }, + { + "epoch": 1.9405134482798443, + "grad_norm": 0.06963703781366348, + "learning_rate": 5.8611977175477355e-06, + "loss": 0.37664470076560974, + "step": 10502 + }, + { + "epoch": 1.9406982249887401, + "grad_norm": 0.08510028570890427, + "learning_rate": 5.859381765965748e-06, + "loss": 0.5224068760871887, + "step": 10503 + }, + { + "epoch": 1.9408830016976362, + "grad_norm": 0.06919050216674805, + "learning_rate": 5.857565979167419e-06, + "loss": 0.49947017431259155, + "step": 10504 + }, + { + "epoch": 1.941067778406532, + "grad_norm": 0.07207190990447998, + "learning_rate": 5.855750357224998e-06, + "loss": 0.4688243567943573, + "step": 10505 + }, + { + "epoch": 1.9412525551154278, + "grad_norm": 0.1118570864200592, + "learning_rate": 5.853934900210746e-06, + "loss": 0.5335554480552673, + "step": 10506 + }, + { + "epoch": 1.9414373318243237, + "grad_norm": 0.07921619713306427, + "learning_rate": 5.85211960819692e-06, + "loss": 0.47763168811798096, + "step": 10507 + }, + { + "epoch": 1.9416221085332195, + "grad_norm": 0.08585667610168457, + "learning_rate": 5.850304481255751e-06, + "loss": 0.669448733329773, + "step": 10508 + }, + { + "epoch": 1.9418068852421153, + "grad_norm": 0.09483261406421661, + "learning_rate": 5.8484895194594796e-06, + "loss": 0.6821432709693909, + "step": 10509 + }, + { + "epoch": 1.9419916619510111, + "grad_norm": 0.07414834201335907, + "learning_rate": 5.846674722880343e-06, + "loss": 0.4566863179206848, + "step": 10510 + }, + { + "epoch": 1.942176438659907, + "grad_norm": 0.07801329344511032, + "learning_rate": 5.8448600915905555e-06, + "loss": 0.48371413350105286, + "step": 10511 + }, + { + "epoch": 1.9423612153688028, + "grad_norm": 0.09932101517915726, + "learning_rate": 5.8430456256623345e-06, + "loss": 0.5337772965431213, + "step": 10512 + }, + { + "epoch": 1.9425459920776986, + "grad_norm": 0.08487209677696228, + "learning_rate": 5.841231325167896e-06, + "loss": 0.5186351537704468, + "step": 10513 + }, + { + "epoch": 1.9427307687865945, + "grad_norm": 0.07193045318126678, + "learning_rate": 5.839417190179437e-06, + "loss": 0.4164048433303833, + "step": 10514 + }, + { + "epoch": 1.9429155454954903, + "grad_norm": 0.07649257779121399, + "learning_rate": 5.837603220769157e-06, + "loss": 0.5069774389266968, + "step": 10515 + }, + { + "epoch": 1.943100322204386, + "grad_norm": 0.07070234417915344, + "learning_rate": 5.83578941700925e-06, + "loss": 0.34997639060020447, + "step": 10516 + }, + { + "epoch": 1.943285098913282, + "grad_norm": 0.0739535391330719, + "learning_rate": 5.833975778971888e-06, + "loss": 0.41146987676620483, + "step": 10517 + }, + { + "epoch": 1.9434698756221778, + "grad_norm": 0.08722543716430664, + "learning_rate": 5.832162306729261e-06, + "loss": 0.5551208257675171, + "step": 10518 + }, + { + "epoch": 1.9436546523310736, + "grad_norm": 0.08653347194194794, + "learning_rate": 5.830349000353537e-06, + "loss": 0.5518236756324768, + "step": 10519 + }, + { + "epoch": 1.9438394290399694, + "grad_norm": 0.0890008807182312, + "learning_rate": 5.828535859916875e-06, + "loss": 0.6357223987579346, + "step": 10520 + }, + { + "epoch": 1.9440242057488653, + "grad_norm": 0.07691646367311478, + "learning_rate": 5.8267228854914396e-06, + "loss": 0.4500856101512909, + "step": 10521 + }, + { + "epoch": 1.944208982457761, + "grad_norm": 0.06855422258377075, + "learning_rate": 5.824910077149372e-06, + "loss": 0.31975024938583374, + "step": 10522 + }, + { + "epoch": 1.944393759166657, + "grad_norm": 0.10461317747831345, + "learning_rate": 5.8230974349628215e-06, + "loss": 0.6291202902793884, + "step": 10523 + }, + { + "epoch": 1.9445785358755527, + "grad_norm": 0.08101405203342438, + "learning_rate": 5.821284959003923e-06, + "loss": 0.5928642749786377, + "step": 10524 + }, + { + "epoch": 1.9447633125844486, + "grad_norm": 0.06952399015426636, + "learning_rate": 5.819472649344813e-06, + "loss": 0.49268776178359985, + "step": 10525 + }, + { + "epoch": 1.9449480892933446, + "grad_norm": 0.0785820409655571, + "learning_rate": 5.81766050605761e-06, + "loss": 0.4932481348514557, + "step": 10526 + }, + { + "epoch": 1.9451328660022404, + "grad_norm": 0.0837739109992981, + "learning_rate": 5.815848529214439e-06, + "loss": 0.5106244087219238, + "step": 10527 + }, + { + "epoch": 1.9453176427111363, + "grad_norm": 0.08298195898532867, + "learning_rate": 5.814036718887401e-06, + "loss": 0.6543368101119995, + "step": 10528 + }, + { + "epoch": 1.945502419420032, + "grad_norm": 0.0707169771194458, + "learning_rate": 5.812225075148607e-06, + "loss": 0.46865272521972656, + "step": 10529 + }, + { + "epoch": 1.945687196128928, + "grad_norm": 0.08626644313335419, + "learning_rate": 5.810413598070153e-06, + "loss": 0.4652383327484131, + "step": 10530 + }, + { + "epoch": 1.9458719728378238, + "grad_norm": 0.07402166724205017, + "learning_rate": 5.80860228772413e-06, + "loss": 0.4375515878200531, + "step": 10531 + }, + { + "epoch": 1.9460567495467196, + "grad_norm": 0.0773448720574379, + "learning_rate": 5.806791144182622e-06, + "loss": 0.49111664295196533, + "step": 10532 + }, + { + "epoch": 1.9462415262556156, + "grad_norm": 0.06117716431617737, + "learning_rate": 5.804980167517712e-06, + "loss": 0.4226382076740265, + "step": 10533 + }, + { + "epoch": 1.9464263029645115, + "grad_norm": 0.068551205098629, + "learning_rate": 5.803169357801463e-06, + "loss": 0.45732957124710083, + "step": 10534 + }, + { + "epoch": 1.9466110796734073, + "grad_norm": 0.07132992148399353, + "learning_rate": 5.801358715105947e-06, + "loss": 0.5618545413017273, + "step": 10535 + }, + { + "epoch": 1.9467958563823031, + "grad_norm": 0.06258437782526016, + "learning_rate": 5.799548239503214e-06, + "loss": 0.30846071243286133, + "step": 10536 + }, + { + "epoch": 1.946980633091199, + "grad_norm": 0.08688078820705414, + "learning_rate": 5.797737931065316e-06, + "loss": 0.6651712656021118, + "step": 10537 + }, + { + "epoch": 1.9471654098000948, + "grad_norm": 0.08604100346565247, + "learning_rate": 5.79592778986431e-06, + "loss": 0.5880390405654907, + "step": 10538 + }, + { + "epoch": 1.9473501865089906, + "grad_norm": 0.08469399809837341, + "learning_rate": 5.79411781597222e-06, + "loss": 0.544089674949646, + "step": 10539 + }, + { + "epoch": 1.9475349632178864, + "grad_norm": 0.08143501728773117, + "learning_rate": 5.7923080094610825e-06, + "loss": 0.4275970160961151, + "step": 10540 + }, + { + "epoch": 1.9477197399267823, + "grad_norm": 0.06817419826984406, + "learning_rate": 5.7904983704029265e-06, + "loss": 0.4366365075111389, + "step": 10541 + }, + { + "epoch": 1.947904516635678, + "grad_norm": 0.06847912818193436, + "learning_rate": 5.788688898869761e-06, + "loss": 0.3891089856624603, + "step": 10542 + }, + { + "epoch": 1.948089293344574, + "grad_norm": 0.060372162610292435, + "learning_rate": 5.786879594933601e-06, + "loss": 0.30533790588378906, + "step": 10543 + }, + { + "epoch": 1.9482740700534698, + "grad_norm": 0.07052935659885406, + "learning_rate": 5.785070458666453e-06, + "loss": 0.4420267641544342, + "step": 10544 + }, + { + "epoch": 1.9484588467623656, + "grad_norm": 0.0873042568564415, + "learning_rate": 5.783261490140315e-06, + "loss": 0.5908474326133728, + "step": 10545 + }, + { + "epoch": 1.9486436234712614, + "grad_norm": 0.0672554299235344, + "learning_rate": 5.781452689427176e-06, + "loss": 0.30557793378829956, + "step": 10546 + }, + { + "epoch": 1.9488284001801572, + "grad_norm": 0.07209660857915878, + "learning_rate": 5.779644056599025e-06, + "loss": 0.4309651255607605, + "step": 10547 + }, + { + "epoch": 1.949013176889053, + "grad_norm": 0.07991938292980194, + "learning_rate": 5.777835591727834e-06, + "loss": 0.46604594588279724, + "step": 10548 + }, + { + "epoch": 1.949197953597949, + "grad_norm": 0.08834175020456314, + "learning_rate": 5.7760272948855776e-06, + "loss": 0.6423034071922302, + "step": 10549 + }, + { + "epoch": 1.9493827303068447, + "grad_norm": 0.09046275168657303, + "learning_rate": 5.774219166144218e-06, + "loss": 0.5118681192398071, + "step": 10550 + }, + { + "epoch": 1.9495675070157406, + "grad_norm": 0.07558931410312653, + "learning_rate": 5.772411205575716e-06, + "loss": 0.40838319063186646, + "step": 10551 + }, + { + "epoch": 1.9497522837246364, + "grad_norm": 0.08017290383577347, + "learning_rate": 5.770603413252025e-06, + "loss": 0.554165244102478, + "step": 10552 + }, + { + "epoch": 1.9499370604335322, + "grad_norm": 0.08638852834701538, + "learning_rate": 5.768795789245083e-06, + "loss": 0.5371276140213013, + "step": 10553 + }, + { + "epoch": 1.950121837142428, + "grad_norm": 0.09567637741565704, + "learning_rate": 5.76698833362683e-06, + "loss": 0.6154451370239258, + "step": 10554 + }, + { + "epoch": 1.950306613851324, + "grad_norm": 0.07529415935277939, + "learning_rate": 5.7651810464692016e-06, + "loss": 0.41964831948280334, + "step": 10555 + }, + { + "epoch": 1.95049139056022, + "grad_norm": 0.09110301733016968, + "learning_rate": 5.7633739278441155e-06, + "loss": 0.6057930588722229, + "step": 10556 + }, + { + "epoch": 1.9506761672691157, + "grad_norm": 0.07765660434961319, + "learning_rate": 5.761566977823487e-06, + "loss": 0.41644635796546936, + "step": 10557 + }, + { + "epoch": 1.9508609439780116, + "grad_norm": 0.08114904910326004, + "learning_rate": 5.759760196479242e-06, + "loss": 0.49219024181365967, + "step": 10558 + }, + { + "epoch": 1.9510457206869074, + "grad_norm": 0.06297268718481064, + "learning_rate": 5.757953583883271e-06, + "loss": 0.3770653307437897, + "step": 10559 + }, + { + "epoch": 1.9512304973958032, + "grad_norm": 0.06261013448238373, + "learning_rate": 5.756147140107475e-06, + "loss": 0.38064223527908325, + "step": 10560 + }, + { + "epoch": 1.951415274104699, + "grad_norm": 0.08856848627328873, + "learning_rate": 5.7543408652237484e-06, + "loss": 0.6331533789634705, + "step": 10561 + }, + { + "epoch": 1.951600050813595, + "grad_norm": 0.07619134336709976, + "learning_rate": 5.7525347593039704e-06, + "loss": 0.5557543635368347, + "step": 10562 + }, + { + "epoch": 1.951784827522491, + "grad_norm": 0.06637249141931534, + "learning_rate": 5.7507288224200195e-06, + "loss": 0.39819884300231934, + "step": 10563 + }, + { + "epoch": 1.9519696042313868, + "grad_norm": 0.09656571596860886, + "learning_rate": 5.748923054643767e-06, + "loss": 0.7308982014656067, + "step": 10564 + }, + { + "epoch": 1.9521543809402826, + "grad_norm": 0.10094495117664337, + "learning_rate": 5.7471174560470775e-06, + "loss": 0.5335283875465393, + "step": 10565 + }, + { + "epoch": 1.9523391576491784, + "grad_norm": 0.06061745807528496, + "learning_rate": 5.745312026701808e-06, + "loss": 0.388337105512619, + "step": 10566 + }, + { + "epoch": 1.9525239343580743, + "grad_norm": 0.07064970582723618, + "learning_rate": 5.743506766679812e-06, + "loss": 0.4752836227416992, + "step": 10567 + }, + { + "epoch": 1.95270871106697, + "grad_norm": 0.07706630975008011, + "learning_rate": 5.741701676052926e-06, + "loss": 0.45462024211883545, + "step": 10568 + }, + { + "epoch": 1.952893487775866, + "grad_norm": 0.0834769457578659, + "learning_rate": 5.739896754892995e-06, + "loss": 0.46920305490493774, + "step": 10569 + }, + { + "epoch": 1.9530782644847617, + "grad_norm": 0.08264000713825226, + "learning_rate": 5.738092003271837e-06, + "loss": 0.5234748125076294, + "step": 10570 + }, + { + "epoch": 1.9532630411936576, + "grad_norm": 0.1118183583021164, + "learning_rate": 5.736287421261287e-06, + "loss": 0.8297079205513, + "step": 10571 + }, + { + "epoch": 1.9534478179025534, + "grad_norm": 0.07726228982210159, + "learning_rate": 5.734483008933163e-06, + "loss": 0.5071477890014648, + "step": 10572 + }, + { + "epoch": 1.9536325946114492, + "grad_norm": 0.05474299192428589, + "learning_rate": 5.732678766359265e-06, + "loss": 0.2890155613422394, + "step": 10573 + }, + { + "epoch": 1.953817371320345, + "grad_norm": 0.0789993479847908, + "learning_rate": 5.730874693611402e-06, + "loss": 0.4219309687614441, + "step": 10574 + }, + { + "epoch": 1.9540021480292409, + "grad_norm": 0.09682317823171616, + "learning_rate": 5.729070790761374e-06, + "loss": 0.4167204797267914, + "step": 10575 + }, + { + "epoch": 1.9541869247381367, + "grad_norm": 0.06620560586452484, + "learning_rate": 5.727267057880963e-06, + "loss": 0.36565524339675903, + "step": 10576 + }, + { + "epoch": 1.9543717014470325, + "grad_norm": 0.06693736463785172, + "learning_rate": 5.72546349504195e-06, + "loss": 0.30560731887817383, + "step": 10577 + }, + { + "epoch": 1.9545564781559284, + "grad_norm": 0.06256041675806046, + "learning_rate": 5.723660102316126e-06, + "loss": 0.354993999004364, + "step": 10578 + }, + { + "epoch": 1.9547412548648242, + "grad_norm": 0.07756361365318298, + "learning_rate": 5.7218568797752445e-06, + "loss": 0.587867796421051, + "step": 10579 + }, + { + "epoch": 1.95492603157372, + "grad_norm": 0.09032636880874634, + "learning_rate": 5.7200538274910775e-06, + "loss": 0.4667337238788605, + "step": 10580 + }, + { + "epoch": 1.9551108082826159, + "grad_norm": 0.08582337945699692, + "learning_rate": 5.718250945535382e-06, + "loss": 0.4862954616546631, + "step": 10581 + }, + { + "epoch": 1.9552955849915117, + "grad_norm": 0.07556741684675217, + "learning_rate": 5.716448233979897e-06, + "loss": 0.4102937877178192, + "step": 10582 + }, + { + "epoch": 1.9554803617004075, + "grad_norm": 0.09605452418327332, + "learning_rate": 5.714645692896372e-06, + "loss": 0.5979748964309692, + "step": 10583 + }, + { + "epoch": 1.9556651384093036, + "grad_norm": 0.068499855697155, + "learning_rate": 5.712843322356541e-06, + "loss": 0.384988933801651, + "step": 10584 + }, + { + "epoch": 1.9558499151181994, + "grad_norm": 0.07488887757062912, + "learning_rate": 5.711041122432132e-06, + "loss": 0.4321574568748474, + "step": 10585 + }, + { + "epoch": 1.9560346918270952, + "grad_norm": 0.08099797368049622, + "learning_rate": 5.709239093194872e-06, + "loss": 0.5244421362876892, + "step": 10586 + }, + { + "epoch": 1.956219468535991, + "grad_norm": 0.07003786414861679, + "learning_rate": 5.7074372347164695e-06, + "loss": 0.3788778781890869, + "step": 10587 + }, + { + "epoch": 1.9564042452448869, + "grad_norm": 0.0769960880279541, + "learning_rate": 5.7056355470686354e-06, + "loss": 0.4467634856700897, + "step": 10588 + }, + { + "epoch": 1.9565890219537827, + "grad_norm": 0.08135896921157837, + "learning_rate": 5.703834030323074e-06, + "loss": 0.46010300517082214, + "step": 10589 + }, + { + "epoch": 1.9567737986626785, + "grad_norm": 0.08489905297756195, + "learning_rate": 5.7020326845514695e-06, + "loss": 0.5769398212432861, + "step": 10590 + }, + { + "epoch": 1.9569585753715744, + "grad_norm": 0.0838000625371933, + "learning_rate": 5.7002315098255225e-06, + "loss": 0.5246551632881165, + "step": 10591 + }, + { + "epoch": 1.9571433520804704, + "grad_norm": 0.08578898012638092, + "learning_rate": 5.698430506216912e-06, + "loss": 0.4350026249885559, + "step": 10592 + }, + { + "epoch": 1.9573281287893662, + "grad_norm": 0.0876152515411377, + "learning_rate": 5.696629673797305e-06, + "loss": 0.6735652089118958, + "step": 10593 + }, + { + "epoch": 1.957512905498262, + "grad_norm": 0.0778566524386406, + "learning_rate": 5.694829012638374e-06, + "loss": 0.49129819869995117, + "step": 10594 + }, + { + "epoch": 1.957697682207158, + "grad_norm": 0.07125280797481537, + "learning_rate": 5.693028522811783e-06, + "loss": 0.38843223452568054, + "step": 10595 + }, + { + "epoch": 1.9578824589160537, + "grad_norm": 0.08513450622558594, + "learning_rate": 5.691228204389179e-06, + "loss": 0.44833922386169434, + "step": 10596 + }, + { + "epoch": 1.9580672356249496, + "grad_norm": 0.10721521079540253, + "learning_rate": 5.689428057442208e-06, + "loss": 0.6927924156188965, + "step": 10597 + }, + { + "epoch": 1.9582520123338454, + "grad_norm": 0.07172559946775436, + "learning_rate": 5.687628082042522e-06, + "loss": 0.42140018939971924, + "step": 10598 + }, + { + "epoch": 1.9584367890427412, + "grad_norm": 0.04447796568274498, + "learning_rate": 5.685828278261743e-06, + "loss": 0.27840760350227356, + "step": 10599 + }, + { + "epoch": 1.958621565751637, + "grad_norm": 0.06525732576847076, + "learning_rate": 5.684028646171505e-06, + "loss": 0.3571588695049286, + "step": 10600 + }, + { + "epoch": 1.9588063424605329, + "grad_norm": 0.06040734425187111, + "learning_rate": 5.682229185843418e-06, + "loss": 0.3269086182117462, + "step": 10601 + }, + { + "epoch": 1.9589911191694287, + "grad_norm": 0.08246826380491257, + "learning_rate": 5.680429897349102e-06, + "loss": 0.48208680748939514, + "step": 10602 + }, + { + "epoch": 1.9591758958783245, + "grad_norm": 0.10077651590108871, + "learning_rate": 5.6786307807601625e-06, + "loss": 0.7280152440071106, + "step": 10603 + }, + { + "epoch": 1.9593606725872204, + "grad_norm": 0.07238568365573883, + "learning_rate": 5.676831836148198e-06, + "loss": 0.42356643080711365, + "step": 10604 + }, + { + "epoch": 1.9595454492961162, + "grad_norm": 0.07383681833744049, + "learning_rate": 5.675033063584801e-06, + "loss": 0.49863195419311523, + "step": 10605 + }, + { + "epoch": 1.959730226005012, + "grad_norm": 0.07692846655845642, + "learning_rate": 5.6732344631415616e-06, + "loss": 0.4908929765224457, + "step": 10606 + }, + { + "epoch": 1.9599150027139078, + "grad_norm": 0.08402621746063232, + "learning_rate": 5.6714360348900475e-06, + "loss": 0.4820367693901062, + "step": 10607 + }, + { + "epoch": 1.9600997794228037, + "grad_norm": 0.05579538643360138, + "learning_rate": 5.66963777890184e-06, + "loss": 0.29534614086151123, + "step": 10608 + }, + { + "epoch": 1.9602845561316995, + "grad_norm": 0.07454511523246765, + "learning_rate": 5.667839695248498e-06, + "loss": 0.36852237582206726, + "step": 10609 + }, + { + "epoch": 1.9604693328405953, + "grad_norm": 0.07275497168302536, + "learning_rate": 5.666041784001584e-06, + "loss": 0.49770715832710266, + "step": 10610 + }, + { + "epoch": 1.9606541095494912, + "grad_norm": 0.07775789499282837, + "learning_rate": 5.664244045232647e-06, + "loss": 0.49964237213134766, + "step": 10611 + }, + { + "epoch": 1.960838886258387, + "grad_norm": 0.0660817101597786, + "learning_rate": 5.662446479013238e-06, + "loss": 0.4232540726661682, + "step": 10612 + }, + { + "epoch": 1.9610236629672828, + "grad_norm": 0.07350712269544601, + "learning_rate": 5.660649085414884e-06, + "loss": 0.41187983751296997, + "step": 10613 + }, + { + "epoch": 1.9612084396761789, + "grad_norm": 0.06443696469068527, + "learning_rate": 5.658851864509119e-06, + "loss": 0.3623831272125244, + "step": 10614 + }, + { + "epoch": 1.9613932163850747, + "grad_norm": 0.07406435161828995, + "learning_rate": 5.657054816367473e-06, + "loss": 0.5087496638298035, + "step": 10615 + }, + { + "epoch": 1.9615779930939705, + "grad_norm": 0.08812040090560913, + "learning_rate": 5.655257941061454e-06, + "loss": 0.6455977559089661, + "step": 10616 + }, + { + "epoch": 1.9617627698028663, + "grad_norm": 0.07852617651224136, + "learning_rate": 5.653461238662577e-06, + "loss": 0.5253956913948059, + "step": 10617 + }, + { + "epoch": 1.9619475465117622, + "grad_norm": 0.08296500146389008, + "learning_rate": 5.6516647092423414e-06, + "loss": 0.49200141429901123, + "step": 10618 + }, + { + "epoch": 1.962132323220658, + "grad_norm": 0.07371774315834045, + "learning_rate": 5.6498683528722486e-06, + "loss": 0.5164257287979126, + "step": 10619 + }, + { + "epoch": 1.9623170999295538, + "grad_norm": 0.07330730557441711, + "learning_rate": 5.6480721696237884e-06, + "loss": 0.3856649696826935, + "step": 10620 + }, + { + "epoch": 1.9625018766384499, + "grad_norm": 0.08983331173658371, + "learning_rate": 5.646276159568437e-06, + "loss": 0.5152300000190735, + "step": 10621 + }, + { + "epoch": 1.9626866533473457, + "grad_norm": 0.07849666476249695, + "learning_rate": 5.644480322777673e-06, + "loss": 0.5518277883529663, + "step": 10622 + }, + { + "epoch": 1.9628714300562415, + "grad_norm": 0.07726949453353882, + "learning_rate": 5.642684659322966e-06, + "loss": 0.4965454936027527, + "step": 10623 + }, + { + "epoch": 1.9630562067651374, + "grad_norm": 0.10425007343292236, + "learning_rate": 5.640889169275776e-06, + "loss": 0.647617757320404, + "step": 10624 + }, + { + "epoch": 1.9632409834740332, + "grad_norm": 0.08119388669729233, + "learning_rate": 5.63909385270756e-06, + "loss": 0.5539153814315796, + "step": 10625 + }, + { + "epoch": 1.963425760182929, + "grad_norm": 0.07241155952215195, + "learning_rate": 5.63729870968977e-06, + "loss": 0.4532361924648285, + "step": 10626 + }, + { + "epoch": 1.9636105368918249, + "grad_norm": 0.08779602497816086, + "learning_rate": 5.6355037402938375e-06, + "loss": 0.5239005088806152, + "step": 10627 + }, + { + "epoch": 1.9637953136007207, + "grad_norm": 0.07113391160964966, + "learning_rate": 5.6337089445912e-06, + "loss": 0.4499783217906952, + "step": 10628 + }, + { + "epoch": 1.9639800903096165, + "grad_norm": 0.09953673183917999, + "learning_rate": 5.631914322653289e-06, + "loss": 0.6576701402664185, + "step": 10629 + }, + { + "epoch": 1.9641648670185123, + "grad_norm": 0.07178323715925217, + "learning_rate": 5.6301198745515205e-06, + "loss": 0.487251877784729, + "step": 10630 + }, + { + "epoch": 1.9643496437274082, + "grad_norm": 0.0825979933142662, + "learning_rate": 5.6283256003573095e-06, + "loss": 0.5146287083625793, + "step": 10631 + }, + { + "epoch": 1.964534420436304, + "grad_norm": 0.07387817651033401, + "learning_rate": 5.626531500142065e-06, + "loss": 0.4900865852832794, + "step": 10632 + }, + { + "epoch": 1.9647191971451998, + "grad_norm": 0.07697905600070953, + "learning_rate": 5.624737573977182e-06, + "loss": 0.4235023260116577, + "step": 10633 + }, + { + "epoch": 1.9649039738540957, + "grad_norm": 0.09078847616910934, + "learning_rate": 5.622943821934058e-06, + "loss": 0.47874563932418823, + "step": 10634 + }, + { + "epoch": 1.9650887505629915, + "grad_norm": 0.07096932083368301, + "learning_rate": 5.621150244084072e-06, + "loss": 0.33702322840690613, + "step": 10635 + }, + { + "epoch": 1.9652735272718873, + "grad_norm": 0.07716767489910126, + "learning_rate": 5.619356840498607e-06, + "loss": 0.45271533727645874, + "step": 10636 + }, + { + "epoch": 1.9654583039807831, + "grad_norm": 0.08461139351129532, + "learning_rate": 5.617563611249034e-06, + "loss": 0.6253509521484375, + "step": 10637 + }, + { + "epoch": 1.965643080689679, + "grad_norm": 0.08762659132480621, + "learning_rate": 5.615770556406719e-06, + "loss": 0.7164840698242188, + "step": 10638 + }, + { + "epoch": 1.9658278573985748, + "grad_norm": 0.09939246624708176, + "learning_rate": 5.613977676043019e-06, + "loss": 0.6778596639633179, + "step": 10639 + }, + { + "epoch": 1.9660126341074706, + "grad_norm": 0.0584709607064724, + "learning_rate": 5.612184970229288e-06, + "loss": 0.3437689542770386, + "step": 10640 + }, + { + "epoch": 1.9661974108163665, + "grad_norm": 0.08932600915431976, + "learning_rate": 5.610392439036866e-06, + "loss": 0.4983118772506714, + "step": 10641 + }, + { + "epoch": 1.9663821875252623, + "grad_norm": 0.06830000877380371, + "learning_rate": 5.60860008253709e-06, + "loss": 0.3114332854747772, + "step": 10642 + }, + { + "epoch": 1.9665669642341583, + "grad_norm": 0.08273927122354507, + "learning_rate": 5.606807900801292e-06, + "loss": 0.4588139057159424, + "step": 10643 + }, + { + "epoch": 1.9667517409430542, + "grad_norm": 0.08656501770019531, + "learning_rate": 5.605015893900796e-06, + "loss": 0.4701785147190094, + "step": 10644 + }, + { + "epoch": 1.96693651765195, + "grad_norm": 0.07206618040800095, + "learning_rate": 5.6032240619069156e-06, + "loss": 0.45341089367866516, + "step": 10645 + }, + { + "epoch": 1.9671212943608458, + "grad_norm": 0.08199794590473175, + "learning_rate": 5.601432404890967e-06, + "loss": 0.44720548391342163, + "step": 10646 + }, + { + "epoch": 1.9673060710697416, + "grad_norm": 0.10419245809316635, + "learning_rate": 5.599640922924243e-06, + "loss": 0.6424961090087891, + "step": 10647 + }, + { + "epoch": 1.9674908477786375, + "grad_norm": 0.09122408926486969, + "learning_rate": 5.5978496160780435e-06, + "loss": 0.5698374509811401, + "step": 10648 + }, + { + "epoch": 1.9676756244875333, + "grad_norm": 0.08791826665401459, + "learning_rate": 5.5960584844236565e-06, + "loss": 0.6349215507507324, + "step": 10649 + }, + { + "epoch": 1.9678604011964294, + "grad_norm": 0.07481888681650162, + "learning_rate": 5.594267528032364e-06, + "loss": 0.39072588086128235, + "step": 10650 + }, + { + "epoch": 1.9680451779053252, + "grad_norm": 0.0983092337846756, + "learning_rate": 5.5924767469754435e-06, + "loss": 0.4008905291557312, + "step": 10651 + }, + { + "epoch": 1.968229954614221, + "grad_norm": 0.08656061440706253, + "learning_rate": 5.590686141324155e-06, + "loss": 0.3958432078361511, + "step": 10652 + }, + { + "epoch": 1.9684147313231168, + "grad_norm": 0.0860547348856926, + "learning_rate": 5.588895711149764e-06, + "loss": 0.6702467203140259, + "step": 10653 + }, + { + "epoch": 1.9685995080320127, + "grad_norm": 0.09186636656522751, + "learning_rate": 5.587105456523527e-06, + "loss": 0.6488978266716003, + "step": 10654 + }, + { + "epoch": 1.9687842847409085, + "grad_norm": 0.08431357890367508, + "learning_rate": 5.585315377516682e-06, + "loss": 0.5484136343002319, + "step": 10655 + }, + { + "epoch": 1.9689690614498043, + "grad_norm": 0.06792140752077103, + "learning_rate": 5.583525474200473e-06, + "loss": 0.35278281569480896, + "step": 10656 + }, + { + "epoch": 1.9691538381587002, + "grad_norm": 0.07110414654016495, + "learning_rate": 5.581735746646134e-06, + "loss": 0.4158727824687958, + "step": 10657 + }, + { + "epoch": 1.969338614867596, + "grad_norm": 0.08212693780660629, + "learning_rate": 5.579946194924888e-06, + "loss": 0.5473571419715881, + "step": 10658 + }, + { + "epoch": 1.9695233915764918, + "grad_norm": 0.07613690942525864, + "learning_rate": 5.5781568191079564e-06, + "loss": 0.5071023106575012, + "step": 10659 + }, + { + "epoch": 1.9697081682853876, + "grad_norm": 0.06780392676591873, + "learning_rate": 5.576367619266552e-06, + "loss": 0.3396739661693573, + "step": 10660 + }, + { + "epoch": 1.9698929449942835, + "grad_norm": 0.06602289527654648, + "learning_rate": 5.574578595471873e-06, + "loss": 0.37511688470840454, + "step": 10661 + }, + { + "epoch": 1.9700777217031793, + "grad_norm": 0.08096983283758163, + "learning_rate": 5.5727897477951196e-06, + "loss": 0.48158252239227295, + "step": 10662 + }, + { + "epoch": 1.9702624984120751, + "grad_norm": 0.10609427094459534, + "learning_rate": 5.5710010763074854e-06, + "loss": 0.5912395119667053, + "step": 10663 + }, + { + "epoch": 1.970447275120971, + "grad_norm": 0.07183204591274261, + "learning_rate": 5.56921258108015e-06, + "loss": 0.435452401638031, + "step": 10664 + }, + { + "epoch": 1.9706320518298668, + "grad_norm": 0.06339947134256363, + "learning_rate": 5.567424262184292e-06, + "loss": 0.4167206585407257, + "step": 10665 + }, + { + "epoch": 1.9708168285387626, + "grad_norm": 0.07174595445394516, + "learning_rate": 5.565636119691085e-06, + "loss": 0.4783751368522644, + "step": 10666 + }, + { + "epoch": 1.9710016052476584, + "grad_norm": 0.05023857578635216, + "learning_rate": 5.563848153671682e-06, + "loss": 0.25515496730804443, + "step": 10667 + }, + { + "epoch": 1.9711863819565543, + "grad_norm": 0.09054163098335266, + "learning_rate": 5.562060364197249e-06, + "loss": 0.597802460193634, + "step": 10668 + }, + { + "epoch": 1.97137115866545, + "grad_norm": 0.09694191068410873, + "learning_rate": 5.56027275133892e-06, + "loss": 0.4279719889163971, + "step": 10669 + }, + { + "epoch": 1.971555935374346, + "grad_norm": 0.0726257711648941, + "learning_rate": 5.558485315167849e-06, + "loss": 0.38296180963516235, + "step": 10670 + }, + { + "epoch": 1.9717407120832418, + "grad_norm": 0.06664053350687027, + "learning_rate": 5.556698055755173e-06, + "loss": 0.3466009497642517, + "step": 10671 + }, + { + "epoch": 1.9719254887921378, + "grad_norm": 0.07835600525140762, + "learning_rate": 5.554910973172008e-06, + "loss": 0.4621722400188446, + "step": 10672 + }, + { + "epoch": 1.9721102655010336, + "grad_norm": 0.09859440475702286, + "learning_rate": 5.5531240674894796e-06, + "loss": 0.5644648671150208, + "step": 10673 + }, + { + "epoch": 1.9722950422099295, + "grad_norm": 0.08445590734481812, + "learning_rate": 5.551337338778703e-06, + "loss": 0.45129987597465515, + "step": 10674 + }, + { + "epoch": 1.9724798189188253, + "grad_norm": 0.10042787343263626, + "learning_rate": 5.54955078711078e-06, + "loss": 0.5884333848953247, + "step": 10675 + }, + { + "epoch": 1.9726645956277211, + "grad_norm": 0.06397289037704468, + "learning_rate": 5.547764412556811e-06, + "loss": 0.39160019159317017, + "step": 10676 + }, + { + "epoch": 1.972849372336617, + "grad_norm": 0.07438860833644867, + "learning_rate": 5.545978215187889e-06, + "loss": 0.4273079037666321, + "step": 10677 + }, + { + "epoch": 1.9730341490455128, + "grad_norm": 0.09187070280313492, + "learning_rate": 5.5441921950751e-06, + "loss": 0.6627777814865112, + "step": 10678 + }, + { + "epoch": 1.9732189257544086, + "grad_norm": 0.0796092301607132, + "learning_rate": 5.542406352289521e-06, + "loss": 0.3833546042442322, + "step": 10679 + }, + { + "epoch": 1.9734037024633047, + "grad_norm": 0.07409553974866867, + "learning_rate": 5.540620686902227e-06, + "loss": 0.40941688418388367, + "step": 10680 + }, + { + "epoch": 1.9735884791722005, + "grad_norm": 0.07712230086326599, + "learning_rate": 5.5388351989842745e-06, + "loss": 0.49133092164993286, + "step": 10681 + }, + { + "epoch": 1.9737732558810963, + "grad_norm": 0.08963990211486816, + "learning_rate": 5.537049888606724e-06, + "loss": 0.4761016368865967, + "step": 10682 + }, + { + "epoch": 1.9739580325899921, + "grad_norm": 0.07985302060842514, + "learning_rate": 5.535264755840624e-06, + "loss": 0.5194723606109619, + "step": 10683 + }, + { + "epoch": 1.974142809298888, + "grad_norm": 0.06895793974399567, + "learning_rate": 5.5334798007570205e-06, + "loss": 0.3801591396331787, + "step": 10684 + }, + { + "epoch": 1.9743275860077838, + "grad_norm": 0.07750911265611649, + "learning_rate": 5.531695023426949e-06, + "loss": 0.48247331380844116, + "step": 10685 + }, + { + "epoch": 1.9745123627166796, + "grad_norm": 0.07938727736473083, + "learning_rate": 5.529910423921432e-06, + "loss": 0.55830979347229, + "step": 10686 + }, + { + "epoch": 1.9746971394255755, + "grad_norm": 0.09892601519823074, + "learning_rate": 5.528126002311496e-06, + "loss": 0.6771171689033508, + "step": 10687 + }, + { + "epoch": 1.9748819161344713, + "grad_norm": 0.06546211242675781, + "learning_rate": 5.526341758668158e-06, + "loss": 0.37425461411476135, + "step": 10688 + }, + { + "epoch": 1.975066692843367, + "grad_norm": 0.0693388432264328, + "learning_rate": 5.524557693062414e-06, + "loss": 0.41841715574264526, + "step": 10689 + }, + { + "epoch": 1.975251469552263, + "grad_norm": 0.08304619044065475, + "learning_rate": 5.5227738055652755e-06, + "loss": 0.5554640889167786, + "step": 10690 + }, + { + "epoch": 1.9754362462611588, + "grad_norm": 0.06779686361551285, + "learning_rate": 5.520990096247736e-06, + "loss": 0.48768579959869385, + "step": 10691 + }, + { + "epoch": 1.9756210229700546, + "grad_norm": 0.09251510351896286, + "learning_rate": 5.519206565180775e-06, + "loss": 0.6752281785011292, + "step": 10692 + }, + { + "epoch": 1.9758057996789504, + "grad_norm": 0.08035050332546234, + "learning_rate": 5.517423212435372e-06, + "loss": 0.5057024955749512, + "step": 10693 + }, + { + "epoch": 1.9759905763878463, + "grad_norm": 0.07251998037099838, + "learning_rate": 5.515640038082506e-06, + "loss": 0.46995460987091064, + "step": 10694 + }, + { + "epoch": 1.976175353096742, + "grad_norm": 0.06584444642066956, + "learning_rate": 5.5138570421931325e-06, + "loss": 0.3751562237739563, + "step": 10695 + }, + { + "epoch": 1.976360129805638, + "grad_norm": 0.09301778674125671, + "learning_rate": 5.5120742248382134e-06, + "loss": 0.626192033290863, + "step": 10696 + }, + { + "epoch": 1.9765449065145337, + "grad_norm": 0.07395187765359879, + "learning_rate": 5.5102915860887e-06, + "loss": 0.5168426632881165, + "step": 10697 + }, + { + "epoch": 1.9767296832234296, + "grad_norm": 0.08475355803966522, + "learning_rate": 5.508509126015535e-06, + "loss": 0.5751456022262573, + "step": 10698 + }, + { + "epoch": 1.9769144599323254, + "grad_norm": 0.08303186297416687, + "learning_rate": 5.506726844689658e-06, + "loss": 0.5156502723693848, + "step": 10699 + }, + { + "epoch": 1.9770992366412212, + "grad_norm": 0.06387243419885635, + "learning_rate": 5.5049447421819904e-06, + "loss": 0.533256471157074, + "step": 10700 + }, + { + "epoch": 1.977284013350117, + "grad_norm": 0.08207794278860092, + "learning_rate": 5.503162818563459e-06, + "loss": 0.5266383290290833, + "step": 10701 + }, + { + "epoch": 1.977468790059013, + "grad_norm": 0.09505506604909897, + "learning_rate": 5.50138107390498e-06, + "loss": 0.690740168094635, + "step": 10702 + }, + { + "epoch": 1.977653566767909, + "grad_norm": 0.07784173637628555, + "learning_rate": 5.4995995082774585e-06, + "loss": 0.513063907623291, + "step": 10703 + }, + { + "epoch": 1.9778383434768048, + "grad_norm": 0.08312927931547165, + "learning_rate": 5.497818121751797e-06, + "loss": 0.5379572510719299, + "step": 10704 + }, + { + "epoch": 1.9780231201857006, + "grad_norm": 0.062268663197755814, + "learning_rate": 5.4960369143988935e-06, + "loss": 0.38305482268333435, + "step": 10705 + }, + { + "epoch": 1.9782078968945964, + "grad_norm": 0.0820695236325264, + "learning_rate": 5.4942558862896255e-06, + "loss": 0.4770805537700653, + "step": 10706 + }, + { + "epoch": 1.9783926736034922, + "grad_norm": 0.07518643140792847, + "learning_rate": 5.492475037494875e-06, + "loss": 0.5276569724082947, + "step": 10707 + }, + { + "epoch": 1.978577450312388, + "grad_norm": 0.07038936764001846, + "learning_rate": 5.49069436808552e-06, + "loss": 0.48559287190437317, + "step": 10708 + }, + { + "epoch": 1.9787622270212841, + "grad_norm": 0.08067972213029861, + "learning_rate": 5.488913878132416e-06, + "loss": 0.5168792605400085, + "step": 10709 + }, + { + "epoch": 1.97894700373018, + "grad_norm": 0.07291867583990097, + "learning_rate": 5.487133567706429e-06, + "loss": 0.4612545967102051, + "step": 10710 + }, + { + "epoch": 1.9791317804390758, + "grad_norm": 0.0964045450091362, + "learning_rate": 5.48535343687841e-06, + "loss": 0.6943390369415283, + "step": 10711 + }, + { + "epoch": 1.9793165571479716, + "grad_norm": 0.06853216141462326, + "learning_rate": 5.483573485719196e-06, + "loss": 0.439113974571228, + "step": 10712 + }, + { + "epoch": 1.9795013338568674, + "grad_norm": 0.07594563066959381, + "learning_rate": 5.481793714299628e-06, + "loss": 0.49463391304016113, + "step": 10713 + }, + { + "epoch": 1.9796861105657633, + "grad_norm": 0.06695324182510376, + "learning_rate": 5.480014122690538e-06, + "loss": 0.4390440285205841, + "step": 10714 + }, + { + "epoch": 1.979870887274659, + "grad_norm": 0.07316230237483978, + "learning_rate": 5.47823471096274e-06, + "loss": 0.510161280632019, + "step": 10715 + }, + { + "epoch": 1.980055663983555, + "grad_norm": 0.05946960672736168, + "learning_rate": 5.476455479187055e-06, + "loss": 0.3409574031829834, + "step": 10716 + }, + { + "epoch": 1.9802404406924508, + "grad_norm": 0.07861226797103882, + "learning_rate": 5.474676427434289e-06, + "loss": 0.4796425700187683, + "step": 10717 + }, + { + "epoch": 1.9804252174013466, + "grad_norm": 0.0815582200884819, + "learning_rate": 5.472897555775243e-06, + "loss": 0.6053028106689453, + "step": 10718 + }, + { + "epoch": 1.9806099941102424, + "grad_norm": 0.06498534232378006, + "learning_rate": 5.471118864280716e-06, + "loss": 0.3518756031990051, + "step": 10719 + }, + { + "epoch": 1.9807947708191382, + "grad_norm": 0.07267723232507706, + "learning_rate": 5.469340353021484e-06, + "loss": 0.38878706097602844, + "step": 10720 + }, + { + "epoch": 1.980979547528034, + "grad_norm": 0.07959043234586716, + "learning_rate": 5.4675620220683315e-06, + "loss": 0.4758075177669525, + "step": 10721 + }, + { + "epoch": 1.98116432423693, + "grad_norm": 0.08470583707094193, + "learning_rate": 5.4657838714920295e-06, + "loss": 0.5368279814720154, + "step": 10722 + }, + { + "epoch": 1.9813491009458257, + "grad_norm": 0.07101590186357498, + "learning_rate": 5.464005901363345e-06, + "loss": 0.4188085198402405, + "step": 10723 + }, + { + "epoch": 1.9815338776547216, + "grad_norm": 0.06812476366758347, + "learning_rate": 5.462228111753034e-06, + "loss": 0.39685487747192383, + "step": 10724 + }, + { + "epoch": 1.9817186543636174, + "grad_norm": 0.0752476379275322, + "learning_rate": 5.460450502731851e-06, + "loss": 0.4410916268825531, + "step": 10725 + }, + { + "epoch": 1.9819034310725132, + "grad_norm": 0.06209181249141693, + "learning_rate": 5.4586730743705315e-06, + "loss": 0.400260329246521, + "step": 10726 + }, + { + "epoch": 1.982088207781409, + "grad_norm": 0.07415378093719482, + "learning_rate": 5.4568958267398165e-06, + "loss": 0.4660295844078064, + "step": 10727 + }, + { + "epoch": 1.9822729844903049, + "grad_norm": 0.07002268731594086, + "learning_rate": 5.455118759910437e-06, + "loss": 0.46611306071281433, + "step": 10728 + }, + { + "epoch": 1.9824577611992007, + "grad_norm": 0.08125322312116623, + "learning_rate": 5.453341873953104e-06, + "loss": 0.4385994076728821, + "step": 10729 + }, + { + "epoch": 1.9826425379080965, + "grad_norm": 0.07736670225858688, + "learning_rate": 5.451565168938544e-06, + "loss": 0.4350145161151886, + "step": 10730 + }, + { + "epoch": 1.9828273146169926, + "grad_norm": 0.08490642160177231, + "learning_rate": 5.449788644937464e-06, + "loss": 0.4928675889968872, + "step": 10731 + }, + { + "epoch": 1.9830120913258884, + "grad_norm": 0.06990993022918701, + "learning_rate": 5.448012302020556e-06, + "loss": 0.36245250701904297, + "step": 10732 + }, + { + "epoch": 1.9831968680347842, + "grad_norm": 0.0806194469332695, + "learning_rate": 5.44623614025852e-06, + "loss": 0.5019152164459229, + "step": 10733 + }, + { + "epoch": 1.98338164474368, + "grad_norm": 0.09441230446100235, + "learning_rate": 5.444460159722037e-06, + "loss": 0.6116966009140015, + "step": 10734 + }, + { + "epoch": 1.983566421452576, + "grad_norm": 0.07513176649808884, + "learning_rate": 5.442684360481787e-06, + "loss": 0.4708969295024872, + "step": 10735 + }, + { + "epoch": 1.9837511981614717, + "grad_norm": 0.08305010944604874, + "learning_rate": 5.4409087426084395e-06, + "loss": 0.5403691530227661, + "step": 10736 + }, + { + "epoch": 1.9839359748703675, + "grad_norm": 0.06144353374838829, + "learning_rate": 5.439133306172661e-06, + "loss": 0.38456642627716064, + "step": 10737 + }, + { + "epoch": 1.9841207515792636, + "grad_norm": 0.06547252833843231, + "learning_rate": 5.4373580512451095e-06, + "loss": 0.3211238980293274, + "step": 10738 + }, + { + "epoch": 1.9843055282881594, + "grad_norm": 0.09167303889989853, + "learning_rate": 5.435582977896435e-06, + "loss": 0.6328478455543518, + "step": 10739 + }, + { + "epoch": 1.9844903049970553, + "grad_norm": 0.09654346853494644, + "learning_rate": 5.433808086197274e-06, + "loss": 0.5210638642311096, + "step": 10740 + }, + { + "epoch": 1.984675081705951, + "grad_norm": 0.08878304809331894, + "learning_rate": 5.432033376218267e-06, + "loss": 0.5295599699020386, + "step": 10741 + }, + { + "epoch": 1.984859858414847, + "grad_norm": 0.06716418266296387, + "learning_rate": 5.4302588480300385e-06, + "loss": 0.4261035621166229, + "step": 10742 + }, + { + "epoch": 1.9850446351237427, + "grad_norm": 0.07184049487113953, + "learning_rate": 5.428484501703212e-06, + "loss": 0.49542099237442017, + "step": 10743 + }, + { + "epoch": 1.9852294118326386, + "grad_norm": 0.07388246059417725, + "learning_rate": 5.4267103373083985e-06, + "loss": 0.480552077293396, + "step": 10744 + }, + { + "epoch": 1.9854141885415344, + "grad_norm": 0.08299454301595688, + "learning_rate": 5.424936354916212e-06, + "loss": 0.43938887119293213, + "step": 10745 + }, + { + "epoch": 1.9855989652504302, + "grad_norm": 0.09323057532310486, + "learning_rate": 5.423162554597239e-06, + "loss": 0.5631217360496521, + "step": 10746 + }, + { + "epoch": 1.985783741959326, + "grad_norm": 0.07826078683137894, + "learning_rate": 5.421388936422082e-06, + "loss": 0.47334596514701843, + "step": 10747 + }, + { + "epoch": 1.9859685186682219, + "grad_norm": 0.0843670666217804, + "learning_rate": 5.419615500461316e-06, + "loss": 0.6462908387184143, + "step": 10748 + }, + { + "epoch": 1.9861532953771177, + "grad_norm": 0.09763434529304504, + "learning_rate": 5.41784224678552e-06, + "loss": 0.5311712026596069, + "step": 10749 + }, + { + "epoch": 1.9863380720860135, + "grad_norm": 0.06457974761724472, + "learning_rate": 5.416069175465274e-06, + "loss": 0.3622543513774872, + "step": 10750 + }, + { + "epoch": 1.9865228487949094, + "grad_norm": 0.06540407985448837, + "learning_rate": 5.41429628657113e-06, + "loss": 0.4064207673072815, + "step": 10751 + }, + { + "epoch": 1.9867076255038052, + "grad_norm": 0.09722238779067993, + "learning_rate": 5.412523580173647e-06, + "loss": 0.6162816286087036, + "step": 10752 + }, + { + "epoch": 1.986892402212701, + "grad_norm": 0.0980430468916893, + "learning_rate": 5.410751056343376e-06, + "loss": 0.6720651984214783, + "step": 10753 + }, + { + "epoch": 1.9870771789215969, + "grad_norm": 0.10663004219532013, + "learning_rate": 5.4089787151508525e-06, + "loss": 0.7572103142738342, + "step": 10754 + }, + { + "epoch": 1.9872619556304927, + "grad_norm": 0.08628568798303604, + "learning_rate": 5.407206556666612e-06, + "loss": 0.5272665619850159, + "step": 10755 + }, + { + "epoch": 1.9874467323393885, + "grad_norm": 0.07148962467908859, + "learning_rate": 5.405434580961182e-06, + "loss": 0.433988094329834, + "step": 10756 + }, + { + "epoch": 1.9876315090482843, + "grad_norm": 0.07028964906930923, + "learning_rate": 5.403662788105081e-06, + "loss": 0.44745177030563354, + "step": 10757 + }, + { + "epoch": 1.9878162857571802, + "grad_norm": 0.0810186043381691, + "learning_rate": 5.401891178168821e-06, + "loss": 0.5359044075012207, + "step": 10758 + }, + { + "epoch": 1.988001062466076, + "grad_norm": 0.09937306493520737, + "learning_rate": 5.40011975122291e-06, + "loss": 0.7215362191200256, + "step": 10759 + }, + { + "epoch": 1.988185839174972, + "grad_norm": 0.08598115295171738, + "learning_rate": 5.398348507337839e-06, + "loss": 0.5261132121086121, + "step": 10760 + }, + { + "epoch": 1.9883706158838679, + "grad_norm": 0.08206876367330551, + "learning_rate": 5.3965774465840985e-06, + "loss": 0.4480670094490051, + "step": 10761 + }, + { + "epoch": 1.9885553925927637, + "grad_norm": 0.0705202966928482, + "learning_rate": 5.394806569032174e-06, + "loss": 0.3446158170700073, + "step": 10762 + }, + { + "epoch": 1.9887401693016595, + "grad_norm": 0.08046627044677734, + "learning_rate": 5.3930358747525415e-06, + "loss": 0.47605058550834656, + "step": 10763 + }, + { + "epoch": 1.9889249460105554, + "grad_norm": 0.08263932168483734, + "learning_rate": 5.39126536381567e-06, + "loss": 0.45277732610702515, + "step": 10764 + }, + { + "epoch": 1.9891097227194512, + "grad_norm": 0.07077358663082123, + "learning_rate": 5.389495036292016e-06, + "loss": 0.48961758613586426, + "step": 10765 + }, + { + "epoch": 1.989294499428347, + "grad_norm": 0.06984628736972809, + "learning_rate": 5.387724892252034e-06, + "loss": 0.3571658432483673, + "step": 10766 + }, + { + "epoch": 1.9894792761372428, + "grad_norm": 0.0704636201262474, + "learning_rate": 5.385954931766175e-06, + "loss": 0.43523016571998596, + "step": 10767 + }, + { + "epoch": 1.989664052846139, + "grad_norm": 0.08782478421926498, + "learning_rate": 5.384185154904872e-06, + "loss": 0.6328921914100647, + "step": 10768 + }, + { + "epoch": 1.9898488295550347, + "grad_norm": 0.0787673145532608, + "learning_rate": 5.382415561738555e-06, + "loss": 0.4771063029766083, + "step": 10769 + }, + { + "epoch": 1.9900336062639306, + "grad_norm": 0.06567374616861343, + "learning_rate": 5.380646152337657e-06, + "loss": 0.39580053091049194, + "step": 10770 + }, + { + "epoch": 1.9902183829728264, + "grad_norm": 0.08275172114372253, + "learning_rate": 5.378876926772588e-06, + "loss": 0.5587243437767029, + "step": 10771 + }, + { + "epoch": 1.9904031596817222, + "grad_norm": 0.08253266662359238, + "learning_rate": 5.377107885113759e-06, + "loss": 0.5211490392684937, + "step": 10772 + }, + { + "epoch": 1.990587936390618, + "grad_norm": 0.08791507035493851, + "learning_rate": 5.375339027431579e-06, + "loss": 0.5906206369400024, + "step": 10773 + }, + { + "epoch": 1.9907727130995139, + "grad_norm": 0.06841687858104706, + "learning_rate": 5.373570353796431e-06, + "loss": 0.37706512212753296, + "step": 10774 + }, + { + "epoch": 1.9909574898084097, + "grad_norm": 0.062276240438222885, + "learning_rate": 5.371801864278709e-06, + "loss": 0.4059755504131317, + "step": 10775 + }, + { + "epoch": 1.9911422665173055, + "grad_norm": 0.0774695873260498, + "learning_rate": 5.370033558948793e-06, + "loss": 0.5846366286277771, + "step": 10776 + }, + { + "epoch": 1.9913270432262014, + "grad_norm": 0.0987345427274704, + "learning_rate": 5.368265437877056e-06, + "loss": 0.6710880398750305, + "step": 10777 + }, + { + "epoch": 1.9915118199350972, + "grad_norm": 0.06954395771026611, + "learning_rate": 5.366497501133865e-06, + "loss": 0.36767855286598206, + "step": 10778 + }, + { + "epoch": 1.991696596643993, + "grad_norm": 0.08661821484565735, + "learning_rate": 5.364729748789579e-06, + "loss": 0.6453872323036194, + "step": 10779 + }, + { + "epoch": 1.9918813733528888, + "grad_norm": 0.07232673466205597, + "learning_rate": 5.362962180914545e-06, + "loss": 0.37234097719192505, + "step": 10780 + }, + { + "epoch": 1.9920661500617847, + "grad_norm": 0.06527242809534073, + "learning_rate": 5.361194797579108e-06, + "loss": 0.45118600130081177, + "step": 10781 + }, + { + "epoch": 1.9922509267706805, + "grad_norm": 0.09439390897750854, + "learning_rate": 5.3594275988536045e-06, + "loss": 0.6040094494819641, + "step": 10782 + }, + { + "epoch": 1.9924357034795763, + "grad_norm": 0.061494261026382446, + "learning_rate": 5.357660584808364e-06, + "loss": 0.348712295293808, + "step": 10783 + }, + { + "epoch": 1.9926204801884722, + "grad_norm": 0.07200989127159119, + "learning_rate": 5.355893755513714e-06, + "loss": 0.38869237899780273, + "step": 10784 + }, + { + "epoch": 1.992805256897368, + "grad_norm": 0.08273415267467499, + "learning_rate": 5.354127111039957e-06, + "loss": 0.6209052801132202, + "step": 10785 + }, + { + "epoch": 1.9929900336062638, + "grad_norm": 0.07028911262750626, + "learning_rate": 5.3523606514574066e-06, + "loss": 0.47335246205329895, + "step": 10786 + }, + { + "epoch": 1.9931748103151596, + "grad_norm": 0.0690479502081871, + "learning_rate": 5.350594376836366e-06, + "loss": 0.4047301411628723, + "step": 10787 + }, + { + "epoch": 1.9933595870240555, + "grad_norm": 0.0809905007481575, + "learning_rate": 5.348828287247119e-06, + "loss": 0.5023267269134521, + "step": 10788 + }, + { + "epoch": 1.9935443637329513, + "grad_norm": 0.07330295443534851, + "learning_rate": 5.347062382759951e-06, + "loss": 0.4426124691963196, + "step": 10789 + }, + { + "epoch": 1.9937291404418473, + "grad_norm": 0.07849891483783722, + "learning_rate": 5.3452966634451494e-06, + "loss": 0.46866852045059204, + "step": 10790 + }, + { + "epoch": 1.9939139171507432, + "grad_norm": 0.0910225436091423, + "learning_rate": 5.343531129372976e-06, + "loss": 0.6327340006828308, + "step": 10791 + }, + { + "epoch": 1.994098693859639, + "grad_norm": 0.09518351405858994, + "learning_rate": 5.341765780613695e-06, + "loss": 0.5966469645500183, + "step": 10792 + }, + { + "epoch": 1.9942834705685348, + "grad_norm": 0.08670774847269058, + "learning_rate": 5.340000617237564e-06, + "loss": 0.6537419557571411, + "step": 10793 + }, + { + "epoch": 1.9944682472774307, + "grad_norm": 0.07579492032527924, + "learning_rate": 5.338235639314827e-06, + "loss": 0.5172703862190247, + "step": 10794 + }, + { + "epoch": 1.9946530239863265, + "grad_norm": 0.06951619684696198, + "learning_rate": 5.3364708469157265e-06, + "loss": 0.27607613801956177, + "step": 10795 + }, + { + "epoch": 1.9948378006952223, + "grad_norm": 0.07942194491624832, + "learning_rate": 5.334706240110497e-06, + "loss": 0.47532564401626587, + "step": 10796 + }, + { + "epoch": 1.9950225774041184, + "grad_norm": 0.07299544662237167, + "learning_rate": 5.3329418189693615e-06, + "loss": 0.4442344009876251, + "step": 10797 + }, + { + "epoch": 1.9952073541130142, + "grad_norm": 0.082549549639225, + "learning_rate": 5.3311775835625455e-06, + "loss": 0.45624038577079773, + "step": 10798 + }, + { + "epoch": 1.99539213082191, + "grad_norm": 0.08699746429920197, + "learning_rate": 5.329413533960251e-06, + "loss": 0.5316396951675415, + "step": 10799 + }, + { + "epoch": 1.9955769075308059, + "grad_norm": 0.08323982357978821, + "learning_rate": 5.327649670232684e-06, + "loss": 0.48319756984710693, + "step": 10800 + }, + { + "epoch": 1.9957616842397017, + "grad_norm": 0.0729801207780838, + "learning_rate": 5.325885992450043e-06, + "loss": 0.4878920614719391, + "step": 10801 + }, + { + "epoch": 1.9959464609485975, + "grad_norm": 0.07294317334890366, + "learning_rate": 5.324122500682516e-06, + "loss": 0.42991456389427185, + "step": 10802 + }, + { + "epoch": 1.9961312376574933, + "grad_norm": 0.0744498074054718, + "learning_rate": 5.322359195000284e-06, + "loss": 0.43306389451026917, + "step": 10803 + }, + { + "epoch": 1.9963160143663892, + "grad_norm": 0.07690717279911041, + "learning_rate": 5.320596075473527e-06, + "loss": 0.43909937143325806, + "step": 10804 + }, + { + "epoch": 1.996500791075285, + "grad_norm": 0.10090182721614838, + "learning_rate": 5.318833142172402e-06, + "loss": 0.6324017643928528, + "step": 10805 + }, + { + "epoch": 1.9966855677841808, + "grad_norm": 0.07912295311689377, + "learning_rate": 5.31707039516707e-06, + "loss": 0.48258641362190247, + "step": 10806 + }, + { + "epoch": 1.9968703444930767, + "grad_norm": 0.0740799680352211, + "learning_rate": 5.315307834527692e-06, + "loss": 0.4834136664867401, + "step": 10807 + }, + { + "epoch": 1.9970551212019725, + "grad_norm": 0.0663553848862648, + "learning_rate": 5.313545460324401e-06, + "loss": 0.4489022195339203, + "step": 10808 + }, + { + "epoch": 1.9972398979108683, + "grad_norm": 0.07279397547245026, + "learning_rate": 5.311783272627333e-06, + "loss": 0.40262991189956665, + "step": 10809 + }, + { + "epoch": 1.9974246746197641, + "grad_norm": 0.07630464434623718, + "learning_rate": 5.310021271506634e-06, + "loss": 0.44888296723365784, + "step": 10810 + }, + { + "epoch": 1.99760945132866, + "grad_norm": 0.07023772597312927, + "learning_rate": 5.3082594570324094e-06, + "loss": 0.4138171970844269, + "step": 10811 + }, + { + "epoch": 1.9977942280375558, + "grad_norm": 0.0694822445511818, + "learning_rate": 5.306497829274785e-06, + "loss": 0.38712769746780396, + "step": 10812 + }, + { + "epoch": 1.9979790047464516, + "grad_norm": 0.06312718242406845, + "learning_rate": 5.3047363883038575e-06, + "loss": 0.3996821939945221, + "step": 10813 + }, + { + "epoch": 1.9981637814553475, + "grad_norm": 0.08352584391832352, + "learning_rate": 5.302975134189734e-06, + "loss": 0.46897953748703003, + "step": 10814 + }, + { + "epoch": 1.9983485581642433, + "grad_norm": 0.07533421367406845, + "learning_rate": 5.3012140670025035e-06, + "loss": 0.3818869888782501, + "step": 10815 + }, + { + "epoch": 1.9985333348731391, + "grad_norm": 0.07463730871677399, + "learning_rate": 5.299453186812253e-06, + "loss": 0.5138566493988037, + "step": 10816 + }, + { + "epoch": 1.998718111582035, + "grad_norm": 0.08611175417900085, + "learning_rate": 5.2976924936890595e-06, + "loss": 0.5639271140098572, + "step": 10817 + }, + { + "epoch": 1.9989028882909308, + "grad_norm": 0.08599690347909927, + "learning_rate": 5.295931987702998e-06, + "loss": 0.43813201785087585, + "step": 10818 + }, + { + "epoch": 1.9990876649998268, + "grad_norm": 0.06455601006746292, + "learning_rate": 5.294171668924121e-06, + "loss": 0.38193124532699585, + "step": 10819 + }, + { + "epoch": 1.9992724417087226, + "grad_norm": 0.0643867626786232, + "learning_rate": 5.292411537422489e-06, + "loss": 0.32054176926612854, + "step": 10820 + }, + { + "epoch": 1.9994572184176185, + "grad_norm": 0.09307566285133362, + "learning_rate": 5.29065159326815e-06, + "loss": 0.5503920912742615, + "step": 10821 + }, + { + "epoch": 1.9996419951265143, + "grad_norm": 0.09054537862539291, + "learning_rate": 5.288891836531145e-06, + "loss": 0.4443438947200775, + "step": 10822 + }, + { + "epoch": 1.9998267718354101, + "grad_norm": 0.07253723591566086, + "learning_rate": 5.287132267281504e-06, + "loss": 0.3412075340747833, + "step": 10823 + }, + { + "epoch": 2.0, + "grad_norm": 0.10073523968458176, + "learning_rate": 5.28537288558926e-06, + "loss": 0.5651444792747498, + "step": 10824 + }, + { + "epoch": 2.000184776708896, + "grad_norm": 0.06520046293735504, + "learning_rate": 5.283613691524419e-06, + "loss": 0.45347392559051514, + "step": 10825 + }, + { + "epoch": 2.0003695534177917, + "grad_norm": 0.07755034416913986, + "learning_rate": 5.281854685156998e-06, + "loss": 0.44497644901275635, + "step": 10826 + }, + { + "epoch": 2.0005543301266875, + "grad_norm": 0.08444507420063019, + "learning_rate": 5.280095866557003e-06, + "loss": 0.5389115214347839, + "step": 10827 + }, + { + "epoch": 2.0007391068355833, + "grad_norm": 0.08076513558626175, + "learning_rate": 5.278337235794422e-06, + "loss": 0.4192272126674652, + "step": 10828 + }, + { + "epoch": 2.000923883544479, + "grad_norm": 0.08427664637565613, + "learning_rate": 5.2765787929392475e-06, + "loss": 0.5673325061798096, + "step": 10829 + }, + { + "epoch": 2.001108660253375, + "grad_norm": 0.07211994379758835, + "learning_rate": 5.2748205380614595e-06, + "loss": 0.4486318528652191, + "step": 10830 + }, + { + "epoch": 2.001293436962271, + "grad_norm": 0.06385567784309387, + "learning_rate": 5.273062471231029e-06, + "loss": 0.43328577280044556, + "step": 10831 + }, + { + "epoch": 2.0014782136711666, + "grad_norm": 0.04617303982377052, + "learning_rate": 5.27130459251793e-06, + "loss": 0.25220465660095215, + "step": 10832 + }, + { + "epoch": 2.0016629903800625, + "grad_norm": 0.07331185042858124, + "learning_rate": 5.269546901992108e-06, + "loss": 0.3785974085330963, + "step": 10833 + }, + { + "epoch": 2.0018477670889583, + "grad_norm": 0.0816309005022049, + "learning_rate": 5.267789399723522e-06, + "loss": 0.5629788637161255, + "step": 10834 + }, + { + "epoch": 2.002032543797854, + "grad_norm": 0.07381216436624527, + "learning_rate": 5.2660320857821116e-06, + "loss": 0.4770171344280243, + "step": 10835 + }, + { + "epoch": 2.00221732050675, + "grad_norm": 0.0860087126493454, + "learning_rate": 5.264274960237812e-06, + "loss": 0.6274966597557068, + "step": 10836 + }, + { + "epoch": 2.0024020972156458, + "grad_norm": 0.06753948330879211, + "learning_rate": 5.262518023160554e-06, + "loss": 0.35875818133354187, + "step": 10837 + }, + { + "epoch": 2.0025868739245416, + "grad_norm": 0.07698596268892288, + "learning_rate": 5.260761274620261e-06, + "loss": 0.49586576223373413, + "step": 10838 + }, + { + "epoch": 2.002771650633438, + "grad_norm": 0.09860380738973618, + "learning_rate": 5.259004714686839e-06, + "loss": 0.5277653932571411, + "step": 10839 + }, + { + "epoch": 2.0029564273423337, + "grad_norm": 0.07372115552425385, + "learning_rate": 5.2572483434301944e-06, + "loss": 0.4336004853248596, + "step": 10840 + }, + { + "epoch": 2.0031412040512295, + "grad_norm": 0.06894727051258087, + "learning_rate": 5.2554921609202296e-06, + "loss": 0.3536815047264099, + "step": 10841 + }, + { + "epoch": 2.0033259807601254, + "grad_norm": 0.07026606053113937, + "learning_rate": 5.253736167226833e-06, + "loss": 0.468704491853714, + "step": 10842 + }, + { + "epoch": 2.003510757469021, + "grad_norm": 0.07699372619390488, + "learning_rate": 5.2519803624198865e-06, + "loss": 0.448326051235199, + "step": 10843 + }, + { + "epoch": 2.003695534177917, + "grad_norm": 0.08106248825788498, + "learning_rate": 5.250224746569271e-06, + "loss": 0.4945138692855835, + "step": 10844 + }, + { + "epoch": 2.003880310886813, + "grad_norm": 0.08255085349082947, + "learning_rate": 5.248469319744848e-06, + "loss": 0.5569407939910889, + "step": 10845 + }, + { + "epoch": 2.0040650875957087, + "grad_norm": 0.06983159482479095, + "learning_rate": 5.246714082016483e-06, + "loss": 0.3689742088317871, + "step": 10846 + }, + { + "epoch": 2.0042498643046045, + "grad_norm": 0.0922977477312088, + "learning_rate": 5.244959033454022e-06, + "loss": 0.49106916785240173, + "step": 10847 + }, + { + "epoch": 2.0044346410135003, + "grad_norm": 0.05711909383535385, + "learning_rate": 5.2432041741273134e-06, + "loss": 0.3055451512336731, + "step": 10848 + }, + { + "epoch": 2.004619417722396, + "grad_norm": 0.07882208377122879, + "learning_rate": 5.241449504106202e-06, + "loss": 0.4588870704174042, + "step": 10849 + }, + { + "epoch": 2.004804194431292, + "grad_norm": 0.0810166671872139, + "learning_rate": 5.23969502346051e-06, + "loss": 0.5236678123474121, + "step": 10850 + }, + { + "epoch": 2.004988971140188, + "grad_norm": 0.06356259435415268, + "learning_rate": 5.237940732260063e-06, + "loss": 0.346655011177063, + "step": 10851 + }, + { + "epoch": 2.0051737478490836, + "grad_norm": 0.09319958090782166, + "learning_rate": 5.23618663057468e-06, + "loss": 0.46527671813964844, + "step": 10852 + }, + { + "epoch": 2.0053585245579795, + "grad_norm": 0.07804631441831589, + "learning_rate": 5.23443271847416e-06, + "loss": 0.37992700934410095, + "step": 10853 + }, + { + "epoch": 2.0055433012668753, + "grad_norm": 0.08168787509202957, + "learning_rate": 5.232678996028311e-06, + "loss": 0.46410107612609863, + "step": 10854 + }, + { + "epoch": 2.005728077975771, + "grad_norm": 0.06901519745588303, + "learning_rate": 5.230925463306921e-06, + "loss": 0.3564493656158447, + "step": 10855 + }, + { + "epoch": 2.005912854684667, + "grad_norm": 0.07116210460662842, + "learning_rate": 5.229172120379778e-06, + "loss": 0.3951399624347687, + "step": 10856 + }, + { + "epoch": 2.006097631393563, + "grad_norm": 0.05574139207601547, + "learning_rate": 5.2274189673166565e-06, + "loss": 0.2359476536512375, + "step": 10857 + }, + { + "epoch": 2.0062824081024586, + "grad_norm": 0.08123526722192764, + "learning_rate": 5.225666004187334e-06, + "loss": 0.3718389868736267, + "step": 10858 + }, + { + "epoch": 2.0064671848113544, + "grad_norm": 0.06732258200645447, + "learning_rate": 5.2239132310615635e-06, + "loss": 0.4056186378002167, + "step": 10859 + }, + { + "epoch": 2.0066519615202503, + "grad_norm": 0.09004206210374832, + "learning_rate": 5.222160648009105e-06, + "loss": 0.48015791177749634, + "step": 10860 + }, + { + "epoch": 2.006836738229146, + "grad_norm": 0.07492027431726456, + "learning_rate": 5.2204082550997026e-06, + "loss": 0.38669294118881226, + "step": 10861 + }, + { + "epoch": 2.007021514938042, + "grad_norm": 0.07910634577274323, + "learning_rate": 5.2186560524030995e-06, + "loss": 0.5493848919868469, + "step": 10862 + }, + { + "epoch": 2.0072062916469378, + "grad_norm": 0.06139914691448212, + "learning_rate": 5.21690403998903e-06, + "loss": 0.35784342885017395, + "step": 10863 + }, + { + "epoch": 2.0073910683558336, + "grad_norm": 0.06231553852558136, + "learning_rate": 5.215152217927213e-06, + "loss": 0.2554203271865845, + "step": 10864 + }, + { + "epoch": 2.0075758450647294, + "grad_norm": 0.07526741176843643, + "learning_rate": 5.213400586287366e-06, + "loss": 0.41515955328941345, + "step": 10865 + }, + { + "epoch": 2.0077606217736252, + "grad_norm": 0.07477451860904694, + "learning_rate": 5.211649145139205e-06, + "loss": 0.36199766397476196, + "step": 10866 + }, + { + "epoch": 2.007945398482521, + "grad_norm": 0.10428471118211746, + "learning_rate": 5.209897894552422e-06, + "loss": 0.588308572769165, + "step": 10867 + }, + { + "epoch": 2.0081301751914173, + "grad_norm": 0.06379430741071701, + "learning_rate": 5.208146834596715e-06, + "loss": 0.3397676944732666, + "step": 10868 + }, + { + "epoch": 2.008314951900313, + "grad_norm": 0.07628770172595978, + "learning_rate": 5.206395965341778e-06, + "loss": 0.3742277920246124, + "step": 10869 + }, + { + "epoch": 2.008499728609209, + "grad_norm": 0.08512669056653976, + "learning_rate": 5.2046452868572815e-06, + "loss": 0.42715978622436523, + "step": 10870 + }, + { + "epoch": 2.008684505318105, + "grad_norm": 0.07170064002275467, + "learning_rate": 5.2028947992129e-06, + "loss": 0.3647734522819519, + "step": 10871 + }, + { + "epoch": 2.0088692820270007, + "grad_norm": 0.0802338495850563, + "learning_rate": 5.2011445024783e-06, + "loss": 0.43790730834007263, + "step": 10872 + }, + { + "epoch": 2.0090540587358965, + "grad_norm": 0.09971331059932709, + "learning_rate": 5.199394396723132e-06, + "loss": 0.4873550534248352, + "step": 10873 + }, + { + "epoch": 2.0092388354447923, + "grad_norm": 0.08621958643198013, + "learning_rate": 5.197644482017048e-06, + "loss": 0.461483359336853, + "step": 10874 + }, + { + "epoch": 2.009423612153688, + "grad_norm": 0.0824543908238411, + "learning_rate": 5.195894758429689e-06, + "loss": 0.4282684326171875, + "step": 10875 + }, + { + "epoch": 2.009608388862584, + "grad_norm": 0.07176830619573593, + "learning_rate": 5.194145226030688e-06, + "loss": 0.3931328058242798, + "step": 10876 + }, + { + "epoch": 2.00979316557148, + "grad_norm": 0.07633424550294876, + "learning_rate": 5.192395884889676e-06, + "loss": 0.45451030135154724, + "step": 10877 + }, + { + "epoch": 2.0099779422803756, + "grad_norm": 0.08542587608098984, + "learning_rate": 5.190646735076262e-06, + "loss": 0.4537266492843628, + "step": 10878 + }, + { + "epoch": 2.0101627189892715, + "grad_norm": 0.09187118709087372, + "learning_rate": 5.188897776660062e-06, + "loss": 0.5085985064506531, + "step": 10879 + }, + { + "epoch": 2.0103474956981673, + "grad_norm": 0.08106537163257599, + "learning_rate": 5.187149009710681e-06, + "loss": 0.5278698205947876, + "step": 10880 + }, + { + "epoch": 2.010532272407063, + "grad_norm": 0.06416518241167068, + "learning_rate": 5.185400434297707e-06, + "loss": 0.31291911005973816, + "step": 10881 + }, + { + "epoch": 2.010717049115959, + "grad_norm": 0.05878720059990883, + "learning_rate": 5.183652050490735e-06, + "loss": 0.2681567370891571, + "step": 10882 + }, + { + "epoch": 2.0109018258248548, + "grad_norm": 0.07457895576953888, + "learning_rate": 5.181903858359346e-06, + "loss": 0.3523574769496918, + "step": 10883 + }, + { + "epoch": 2.0110866025337506, + "grad_norm": 0.0683598443865776, + "learning_rate": 5.180155857973106e-06, + "loss": 0.3450174629688263, + "step": 10884 + }, + { + "epoch": 2.0112713792426464, + "grad_norm": 0.07570520788431168, + "learning_rate": 5.178408049401584e-06, + "loss": 0.4106927216053009, + "step": 10885 + }, + { + "epoch": 2.0114561559515423, + "grad_norm": 0.05980301275849342, + "learning_rate": 5.176660432714342e-06, + "loss": 0.28846660256385803, + "step": 10886 + }, + { + "epoch": 2.011640932660438, + "grad_norm": 0.08202092349529266, + "learning_rate": 5.174913007980919e-06, + "loss": 0.4278833866119385, + "step": 10887 + }, + { + "epoch": 2.011825709369334, + "grad_norm": 0.07988191395998001, + "learning_rate": 5.173165775270859e-06, + "loss": 0.4565815031528473, + "step": 10888 + }, + { + "epoch": 2.0120104860782297, + "grad_norm": 0.07050027698278427, + "learning_rate": 5.171418734653707e-06, + "loss": 0.3265901207923889, + "step": 10889 + }, + { + "epoch": 2.0121952627871256, + "grad_norm": 0.09780313819646835, + "learning_rate": 5.16967188619898e-06, + "loss": 0.6623883843421936, + "step": 10890 + }, + { + "epoch": 2.0123800394960214, + "grad_norm": 0.07266128063201904, + "learning_rate": 5.167925229976199e-06, + "loss": 0.33542200922966003, + "step": 10891 + }, + { + "epoch": 2.0125648162049172, + "grad_norm": 0.06911231577396393, + "learning_rate": 5.16617876605488e-06, + "loss": 0.28636428713798523, + "step": 10892 + }, + { + "epoch": 2.012749592913813, + "grad_norm": 0.09343921393156052, + "learning_rate": 5.164432494504519e-06, + "loss": 0.5033407211303711, + "step": 10893 + }, + { + "epoch": 2.012934369622709, + "grad_norm": 0.0805625319480896, + "learning_rate": 5.1626864153946175e-06, + "loss": 0.4543337821960449, + "step": 10894 + }, + { + "epoch": 2.0131191463316047, + "grad_norm": 0.07634482532739639, + "learning_rate": 5.160940528794661e-06, + "loss": 0.48865413665771484, + "step": 10895 + }, + { + "epoch": 2.0133039230405005, + "grad_norm": 0.11683768779039383, + "learning_rate": 5.159194834774132e-06, + "loss": 0.420229971408844, + "step": 10896 + }, + { + "epoch": 2.0134886997493964, + "grad_norm": 0.07559465616941452, + "learning_rate": 5.1574493334025084e-06, + "loss": 0.4204034209251404, + "step": 10897 + }, + { + "epoch": 2.0136734764582926, + "grad_norm": 0.08906613290309906, + "learning_rate": 5.155704024749249e-06, + "loss": 0.49783578515052795, + "step": 10898 + }, + { + "epoch": 2.0138582531671885, + "grad_norm": 0.08135172724723816, + "learning_rate": 5.153958908883811e-06, + "loss": 0.427848219871521, + "step": 10899 + }, + { + "epoch": 2.0140430298760843, + "grad_norm": 0.08237715810537338, + "learning_rate": 5.1522139858756514e-06, + "loss": 0.4681159257888794, + "step": 10900 + }, + { + "epoch": 2.01422780658498, + "grad_norm": 0.05532315745949745, + "learning_rate": 5.150469255794199e-06, + "loss": 0.25912758708000183, + "step": 10901 + }, + { + "epoch": 2.014412583293876, + "grad_norm": 0.07079538702964783, + "learning_rate": 5.148724718708904e-06, + "loss": 0.4080043435096741, + "step": 10902 + }, + { + "epoch": 2.014597360002772, + "grad_norm": 0.06199576333165169, + "learning_rate": 5.146980374689192e-06, + "loss": 0.3021320700645447, + "step": 10903 + }, + { + "epoch": 2.0147821367116676, + "grad_norm": 0.0703669860959053, + "learning_rate": 5.145236223804473e-06, + "loss": 0.4637066721916199, + "step": 10904 + }, + { + "epoch": 2.0149669134205634, + "grad_norm": 0.08532287925481796, + "learning_rate": 5.143492266124164e-06, + "loss": 0.5112899541854858, + "step": 10905 + }, + { + "epoch": 2.0151516901294593, + "grad_norm": 0.0936102494597435, + "learning_rate": 5.1417485017176714e-06, + "loss": 0.47748786211013794, + "step": 10906 + }, + { + "epoch": 2.015336466838355, + "grad_norm": 0.08761214464902878, + "learning_rate": 5.140004930654385e-06, + "loss": 0.6001975536346436, + "step": 10907 + }, + { + "epoch": 2.015521243547251, + "grad_norm": 0.09377985447645187, + "learning_rate": 5.138261553003696e-06, + "loss": 0.5890383124351501, + "step": 10908 + }, + { + "epoch": 2.0157060202561468, + "grad_norm": 0.08416535705327988, + "learning_rate": 5.136518368834993e-06, + "loss": 0.47278961539268494, + "step": 10909 + }, + { + "epoch": 2.0158907969650426, + "grad_norm": 0.06976797431707382, + "learning_rate": 5.134775378217638e-06, + "loss": 0.3145328164100647, + "step": 10910 + }, + { + "epoch": 2.0160755736739384, + "grad_norm": 0.09272748231887817, + "learning_rate": 5.133032581221007e-06, + "loss": 0.6585069298744202, + "step": 10911 + }, + { + "epoch": 2.0162603503828342, + "grad_norm": 0.07258772850036621, + "learning_rate": 5.131289977914449e-06, + "loss": 0.3980956971645355, + "step": 10912 + }, + { + "epoch": 2.01644512709173, + "grad_norm": 0.07333315908908844, + "learning_rate": 5.129547568367317e-06, + "loss": 0.3659195303916931, + "step": 10913 + }, + { + "epoch": 2.016629903800626, + "grad_norm": 0.06620004773139954, + "learning_rate": 5.127805352648954e-06, + "loss": 0.3466736376285553, + "step": 10914 + }, + { + "epoch": 2.0168146805095217, + "grad_norm": 0.08554978668689728, + "learning_rate": 5.126063330828694e-06, + "loss": 0.41909193992614746, + "step": 10915 + }, + { + "epoch": 2.0169994572184176, + "grad_norm": 0.08530129492282867, + "learning_rate": 5.124321502975866e-06, + "loss": 0.4803772270679474, + "step": 10916 + }, + { + "epoch": 2.0171842339273134, + "grad_norm": 0.08911938220262527, + "learning_rate": 5.1225798691597915e-06, + "loss": 0.45174241065979004, + "step": 10917 + }, + { + "epoch": 2.017369010636209, + "grad_norm": 0.06977102160453796, + "learning_rate": 5.120838429449775e-06, + "loss": 0.340168297290802, + "step": 10918 + }, + { + "epoch": 2.017553787345105, + "grad_norm": 0.0680844634771347, + "learning_rate": 5.119097183915124e-06, + "loss": 0.34571701288223267, + "step": 10919 + }, + { + "epoch": 2.017738564054001, + "grad_norm": 0.0721874088048935, + "learning_rate": 5.117356132625138e-06, + "loss": 0.4670308232307434, + "step": 10920 + }, + { + "epoch": 2.0179233407628967, + "grad_norm": 0.0811346173286438, + "learning_rate": 5.115615275649095e-06, + "loss": 0.38460540771484375, + "step": 10921 + }, + { + "epoch": 2.0181081174717925, + "grad_norm": 0.08082343637943268, + "learning_rate": 5.113874613056287e-06, + "loss": 0.43244844675064087, + "step": 10922 + }, + { + "epoch": 2.0182928941806884, + "grad_norm": 0.06001897156238556, + "learning_rate": 5.112134144915986e-06, + "loss": 0.2741605341434479, + "step": 10923 + }, + { + "epoch": 2.018477670889584, + "grad_norm": 0.05844532698392868, + "learning_rate": 5.11039387129745e-06, + "loss": 0.36012426018714905, + "step": 10924 + }, + { + "epoch": 2.01866244759848, + "grad_norm": 0.1003597304224968, + "learning_rate": 5.108653792269941e-06, + "loss": 0.5373878479003906, + "step": 10925 + }, + { + "epoch": 2.018847224307376, + "grad_norm": 0.07224072515964508, + "learning_rate": 5.106913907902711e-06, + "loss": 0.3647426664829254, + "step": 10926 + }, + { + "epoch": 2.019032001016272, + "grad_norm": 0.06438612192869186, + "learning_rate": 5.105174218264995e-06, + "loss": 0.3541163206100464, + "step": 10927 + }, + { + "epoch": 2.019216777725168, + "grad_norm": 0.07255525141954422, + "learning_rate": 5.103434723426032e-06, + "loss": 0.4254012703895569, + "step": 10928 + }, + { + "epoch": 2.0194015544340638, + "grad_norm": 0.09289377927780151, + "learning_rate": 5.101695423455046e-06, + "loss": 0.39545321464538574, + "step": 10929 + }, + { + "epoch": 2.0195863311429596, + "grad_norm": 0.08766859769821167, + "learning_rate": 5.09995631842126e-06, + "loss": 0.4713236689567566, + "step": 10930 + }, + { + "epoch": 2.0197711078518554, + "grad_norm": 0.08483327925205231, + "learning_rate": 5.098217408393884e-06, + "loss": 0.4761367738246918, + "step": 10931 + }, + { + "epoch": 2.0199558845607513, + "grad_norm": 0.06505721807479858, + "learning_rate": 5.096478693442117e-06, + "loss": 0.3717211186885834, + "step": 10932 + }, + { + "epoch": 2.020140661269647, + "grad_norm": 0.09126225113868713, + "learning_rate": 5.094740173635156e-06, + "loss": 0.43229779601097107, + "step": 10933 + }, + { + "epoch": 2.020325437978543, + "grad_norm": 0.07946305721998215, + "learning_rate": 5.0930018490421895e-06, + "loss": 0.4669344127178192, + "step": 10934 + }, + { + "epoch": 2.0205102146874387, + "grad_norm": 0.09561219811439514, + "learning_rate": 5.091263719732398e-06, + "loss": 0.4787537753582001, + "step": 10935 + }, + { + "epoch": 2.0206949913963346, + "grad_norm": 0.09669752418994904, + "learning_rate": 5.089525785774951e-06, + "loss": 0.5886106491088867, + "step": 10936 + }, + { + "epoch": 2.0208797681052304, + "grad_norm": 0.10397924482822418, + "learning_rate": 5.087788047239021e-06, + "loss": 0.5654873251914978, + "step": 10937 + }, + { + "epoch": 2.0210645448141262, + "grad_norm": 0.06124212220311165, + "learning_rate": 5.086050504193753e-06, + "loss": 0.4133400321006775, + "step": 10938 + }, + { + "epoch": 2.021249321523022, + "grad_norm": 0.08833230286836624, + "learning_rate": 5.084313156708303e-06, + "loss": 0.5162288546562195, + "step": 10939 + }, + { + "epoch": 2.021434098231918, + "grad_norm": 0.09045036882162094, + "learning_rate": 5.082576004851808e-06, + "loss": 0.5982534885406494, + "step": 10940 + }, + { + "epoch": 2.0216188749408137, + "grad_norm": 0.0780353844165802, + "learning_rate": 5.080839048693405e-06, + "loss": 0.519133985042572, + "step": 10941 + }, + { + "epoch": 2.0218036516497095, + "grad_norm": 0.0687236413359642, + "learning_rate": 5.07910228830222e-06, + "loss": 0.3705589771270752, + "step": 10942 + }, + { + "epoch": 2.0219884283586054, + "grad_norm": 0.07354672998189926, + "learning_rate": 5.077365723747366e-06, + "loss": 0.41299891471862793, + "step": 10943 + }, + { + "epoch": 2.022173205067501, + "grad_norm": 0.08132113516330719, + "learning_rate": 5.075629355097955e-06, + "loss": 0.4075068533420563, + "step": 10944 + }, + { + "epoch": 2.022357981776397, + "grad_norm": 0.06150183826684952, + "learning_rate": 5.073893182423093e-06, + "loss": 0.37654855847358704, + "step": 10945 + }, + { + "epoch": 2.022542758485293, + "grad_norm": 0.10539187490940094, + "learning_rate": 5.072157205791866e-06, + "loss": 0.4971143901348114, + "step": 10946 + }, + { + "epoch": 2.0227275351941887, + "grad_norm": 0.05700913444161415, + "learning_rate": 5.070421425273366e-06, + "loss": 0.3304547071456909, + "step": 10947 + }, + { + "epoch": 2.0229123119030845, + "grad_norm": 0.08524139970541, + "learning_rate": 5.06868584093667e-06, + "loss": 0.5176447629928589, + "step": 10948 + }, + { + "epoch": 2.0230970886119803, + "grad_norm": 0.08728422969579697, + "learning_rate": 5.06695045285085e-06, + "loss": 0.408867746591568, + "step": 10949 + }, + { + "epoch": 2.023281865320876, + "grad_norm": 0.09003953635692596, + "learning_rate": 5.065215261084968e-06, + "loss": 0.4363194704055786, + "step": 10950 + }, + { + "epoch": 2.023466642029772, + "grad_norm": 0.0999642014503479, + "learning_rate": 5.063480265708083e-06, + "loss": 0.5393664240837097, + "step": 10951 + }, + { + "epoch": 2.023651418738668, + "grad_norm": 0.06618688255548477, + "learning_rate": 5.061745466789236e-06, + "loss": 0.3658475875854492, + "step": 10952 + }, + { + "epoch": 2.0238361954475637, + "grad_norm": 0.08020365238189697, + "learning_rate": 5.060010864397469e-06, + "loss": 0.5106897354125977, + "step": 10953 + }, + { + "epoch": 2.0240209721564595, + "grad_norm": 0.07106049358844757, + "learning_rate": 5.058276458601814e-06, + "loss": 0.30423349142074585, + "step": 10954 + }, + { + "epoch": 2.0242057488653553, + "grad_norm": 0.0817643254995346, + "learning_rate": 5.056542249471297e-06, + "loss": 0.4568421542644501, + "step": 10955 + }, + { + "epoch": 2.024390525574251, + "grad_norm": 0.06630393117666245, + "learning_rate": 5.054808237074931e-06, + "loss": 0.4356433153152466, + "step": 10956 + }, + { + "epoch": 2.0245753022831474, + "grad_norm": 0.10030622035264969, + "learning_rate": 5.05307442148173e-06, + "loss": 0.5095507502555847, + "step": 10957 + }, + { + "epoch": 2.0247600789920432, + "grad_norm": 0.07793160527944565, + "learning_rate": 5.051340802760686e-06, + "loss": 0.35566869378089905, + "step": 10958 + }, + { + "epoch": 2.024944855700939, + "grad_norm": 0.07248266786336899, + "learning_rate": 5.049607380980799e-06, + "loss": 0.3697780668735504, + "step": 10959 + }, + { + "epoch": 2.025129632409835, + "grad_norm": 0.09467242658138275, + "learning_rate": 5.047874156211044e-06, + "loss": 0.44589683413505554, + "step": 10960 + }, + { + "epoch": 2.0253144091187307, + "grad_norm": 0.0702122300863266, + "learning_rate": 5.046141128520408e-06, + "loss": 0.35430559515953064, + "step": 10961 + }, + { + "epoch": 2.0254991858276266, + "grad_norm": 0.07696325331926346, + "learning_rate": 5.04440829797786e-06, + "loss": 0.3792334794998169, + "step": 10962 + }, + { + "epoch": 2.0256839625365224, + "grad_norm": 0.07569151371717453, + "learning_rate": 5.042675664652353e-06, + "loss": 0.3286278247833252, + "step": 10963 + }, + { + "epoch": 2.025868739245418, + "grad_norm": 0.0927334651350975, + "learning_rate": 5.040943228612845e-06, + "loss": 0.4318874478340149, + "step": 10964 + }, + { + "epoch": 2.026053515954314, + "grad_norm": 0.08600287139415741, + "learning_rate": 5.039210989928287e-06, + "loss": 0.48451122641563416, + "step": 10965 + }, + { + "epoch": 2.02623829266321, + "grad_norm": 0.07625958323478699, + "learning_rate": 5.037478948667607e-06, + "loss": 0.3884626626968384, + "step": 10966 + }, + { + "epoch": 2.0264230693721057, + "grad_norm": 0.061320796608924866, + "learning_rate": 5.035747104899738e-06, + "loss": 0.36891666054725647, + "step": 10967 + }, + { + "epoch": 2.0266078460810015, + "grad_norm": 0.08422596752643585, + "learning_rate": 5.034015458693604e-06, + "loss": 0.43773600459098816, + "step": 10968 + }, + { + "epoch": 2.0267926227898974, + "grad_norm": 0.11182351410388947, + "learning_rate": 5.032284010118118e-06, + "loss": 0.604387640953064, + "step": 10969 + }, + { + "epoch": 2.026977399498793, + "grad_norm": 0.0635233074426651, + "learning_rate": 5.030552759242186e-06, + "loss": 0.28664737939834595, + "step": 10970 + }, + { + "epoch": 2.027162176207689, + "grad_norm": 0.09212157130241394, + "learning_rate": 5.028821706134712e-06, + "loss": 0.4764632284641266, + "step": 10971 + }, + { + "epoch": 2.027346952916585, + "grad_norm": 0.08039910346269608, + "learning_rate": 5.027090850864577e-06, + "loss": 0.4236772358417511, + "step": 10972 + }, + { + "epoch": 2.0275317296254807, + "grad_norm": 0.06990626454353333, + "learning_rate": 5.025360193500667e-06, + "loss": 0.381586492061615, + "step": 10973 + }, + { + "epoch": 2.0277165063343765, + "grad_norm": 0.06147979572415352, + "learning_rate": 5.023629734111858e-06, + "loss": 0.34560880064964294, + "step": 10974 + }, + { + "epoch": 2.0279012830432723, + "grad_norm": 0.06412357836961746, + "learning_rate": 5.021899472767015e-06, + "loss": 0.36367711424827576, + "step": 10975 + }, + { + "epoch": 2.028086059752168, + "grad_norm": 0.07729597389698029, + "learning_rate": 5.020169409535005e-06, + "loss": 0.34711959958076477, + "step": 10976 + }, + { + "epoch": 2.028270836461064, + "grad_norm": 0.0709250196814537, + "learning_rate": 5.0184395444846676e-06, + "loss": 0.360468327999115, + "step": 10977 + }, + { + "epoch": 2.02845561316996, + "grad_norm": 0.05430329591035843, + "learning_rate": 5.0167098776848515e-06, + "loss": 0.24139876663684845, + "step": 10978 + }, + { + "epoch": 2.0286403898788556, + "grad_norm": 0.08792615681886673, + "learning_rate": 5.014980409204395e-06, + "loss": 0.48062777519226074, + "step": 10979 + }, + { + "epoch": 2.0288251665877515, + "grad_norm": 0.10243596881628036, + "learning_rate": 5.013251139112114e-06, + "loss": 0.6778828501701355, + "step": 10980 + }, + { + "epoch": 2.0290099432966473, + "grad_norm": 0.07227854430675507, + "learning_rate": 5.0115220674768405e-06, + "loss": 0.3224528133869171, + "step": 10981 + }, + { + "epoch": 2.029194720005543, + "grad_norm": 0.08872295916080475, + "learning_rate": 5.009793194367385e-06, + "loss": 0.51368647813797, + "step": 10982 + }, + { + "epoch": 2.029379496714439, + "grad_norm": 0.07591360807418823, + "learning_rate": 5.008064519852545e-06, + "loss": 0.3432996869087219, + "step": 10983 + }, + { + "epoch": 2.029564273423335, + "grad_norm": 0.061702460050582886, + "learning_rate": 5.006336044001119e-06, + "loss": 0.30575910210609436, + "step": 10984 + }, + { + "epoch": 2.0297490501322306, + "grad_norm": 0.07337259501218796, + "learning_rate": 5.004607766881899e-06, + "loss": 0.44703561067581177, + "step": 10985 + }, + { + "epoch": 2.029933826841127, + "grad_norm": 0.09474797546863556, + "learning_rate": 5.002879688563658e-06, + "loss": 0.6129400134086609, + "step": 10986 + }, + { + "epoch": 2.0301186035500227, + "grad_norm": 0.07356349378824234, + "learning_rate": 5.0011518091151716e-06, + "loss": 0.4627012014389038, + "step": 10987 + }, + { + "epoch": 2.0303033802589185, + "grad_norm": 0.05867588892579079, + "learning_rate": 4.999424128605203e-06, + "loss": 0.2929174304008484, + "step": 10988 + }, + { + "epoch": 2.0304881569678144, + "grad_norm": 0.07791939377784729, + "learning_rate": 4.997696647102509e-06, + "loss": 0.40584731101989746, + "step": 10989 + }, + { + "epoch": 2.03067293367671, + "grad_norm": 0.06808242946863174, + "learning_rate": 4.995969364675839e-06, + "loss": 0.4458301067352295, + "step": 10990 + }, + { + "epoch": 2.030857710385606, + "grad_norm": 0.08416265994310379, + "learning_rate": 4.994242281393936e-06, + "loss": 0.43289077281951904, + "step": 10991 + }, + { + "epoch": 2.031042487094502, + "grad_norm": 0.08570495992898941, + "learning_rate": 4.992515397325526e-06, + "loss": 0.5073711276054382, + "step": 10992 + }, + { + "epoch": 2.0312272638033977, + "grad_norm": 0.09325053542852402, + "learning_rate": 4.990788712539336e-06, + "loss": 0.5359335541725159, + "step": 10993 + }, + { + "epoch": 2.0314120405122935, + "grad_norm": 0.05882570520043373, + "learning_rate": 4.989062227104083e-06, + "loss": 0.29013773798942566, + "step": 10994 + }, + { + "epoch": 2.0315968172211893, + "grad_norm": 0.07409077882766724, + "learning_rate": 4.987335941088478e-06, + "loss": 0.42141684889793396, + "step": 10995 + }, + { + "epoch": 2.031781593930085, + "grad_norm": 0.06654199212789536, + "learning_rate": 4.985609854561223e-06, + "loss": 0.37594857811927795, + "step": 10996 + }, + { + "epoch": 2.031966370638981, + "grad_norm": 0.06049758940935135, + "learning_rate": 4.9838839675910035e-06, + "loss": 0.31000638008117676, + "step": 10997 + }, + { + "epoch": 2.032151147347877, + "grad_norm": 0.09586911648511887, + "learning_rate": 4.982158280246508e-06, + "loss": 0.5141433477401733, + "step": 10998 + }, + { + "epoch": 2.0323359240567727, + "grad_norm": 0.07909748703241348, + "learning_rate": 4.980432792596419e-06, + "loss": 0.4322912096977234, + "step": 10999 + }, + { + "epoch": 2.0325207007656685, + "grad_norm": 0.06540367752313614, + "learning_rate": 4.978707504709394e-06, + "loss": 0.3917093276977539, + "step": 11000 + }, + { + "epoch": 2.0325207007656685, + "eval_loss": 0.5544933080673218, + "eval_runtime": 156.4967, + "eval_samples_per_second": 116.482, + "eval_steps_per_second": 14.563, + "step": 11000 + }, + { + "epoch": 2.0327054774745643, + "grad_norm": 0.09441975504159927, + "learning_rate": 4.976982416654102e-06, + "loss": 0.5133340358734131, + "step": 11001 + }, + { + "epoch": 2.03289025418346, + "grad_norm": 0.054924800992012024, + "learning_rate": 4.975257528499201e-06, + "loss": 0.2618792951107025, + "step": 11002 + }, + { + "epoch": 2.033075030892356, + "grad_norm": 0.07110217213630676, + "learning_rate": 4.973532840313325e-06, + "loss": 0.447940468788147, + "step": 11003 + }, + { + "epoch": 2.033259807601252, + "grad_norm": 0.08391235768795013, + "learning_rate": 4.971808352165116e-06, + "loss": 0.38242557644844055, + "step": 11004 + }, + { + "epoch": 2.0334445843101476, + "grad_norm": 0.08753757923841476, + "learning_rate": 4.970084064123208e-06, + "loss": 0.5258985757827759, + "step": 11005 + }, + { + "epoch": 2.0336293610190435, + "grad_norm": 0.07218511402606964, + "learning_rate": 4.968359976256213e-06, + "loss": 0.4401981234550476, + "step": 11006 + }, + { + "epoch": 2.0338141377279393, + "grad_norm": 0.08419043570756912, + "learning_rate": 4.966636088632749e-06, + "loss": 0.4633215665817261, + "step": 11007 + }, + { + "epoch": 2.033998914436835, + "grad_norm": 0.10938230901956558, + "learning_rate": 4.964912401321421e-06, + "loss": 0.5555753111839294, + "step": 11008 + }, + { + "epoch": 2.034183691145731, + "grad_norm": 0.0766386166214943, + "learning_rate": 4.963188914390827e-06, + "loss": 0.3662005066871643, + "step": 11009 + }, + { + "epoch": 2.0343684678546268, + "grad_norm": 0.08304692059755325, + "learning_rate": 4.961465627909561e-06, + "loss": 0.48914024233818054, + "step": 11010 + }, + { + "epoch": 2.0345532445635226, + "grad_norm": 0.07511365413665771, + "learning_rate": 4.959742541946195e-06, + "loss": 0.4251895546913147, + "step": 11011 + }, + { + "epoch": 2.0347380212724184, + "grad_norm": 0.09212878346443176, + "learning_rate": 4.958019656569306e-06, + "loss": 0.4835797846317291, + "step": 11012 + }, + { + "epoch": 2.0349227979813143, + "grad_norm": 0.07405047863721848, + "learning_rate": 4.956296971847462e-06, + "loss": 0.38855189085006714, + "step": 11013 + }, + { + "epoch": 2.03510757469021, + "grad_norm": 0.07276478409767151, + "learning_rate": 4.954574487849218e-06, + "loss": 0.3583826422691345, + "step": 11014 + }, + { + "epoch": 2.0352923513991064, + "grad_norm": 0.05918056517839432, + "learning_rate": 4.952852204643124e-06, + "loss": 0.32278576493263245, + "step": 11015 + }, + { + "epoch": 2.035477128108002, + "grad_norm": 0.08996355533599854, + "learning_rate": 4.951130122297725e-06, + "loss": 0.5059264898300171, + "step": 11016 + }, + { + "epoch": 2.035661904816898, + "grad_norm": 0.07419190555810928, + "learning_rate": 4.949408240881548e-06, + "loss": 0.3496807813644409, + "step": 11017 + }, + { + "epoch": 2.035846681525794, + "grad_norm": 0.08473897725343704, + "learning_rate": 4.947686560463122e-06, + "loss": 0.42983487248420715, + "step": 11018 + }, + { + "epoch": 2.0360314582346897, + "grad_norm": 0.08575071394443512, + "learning_rate": 4.945965081110967e-06, + "loss": 0.4554291367530823, + "step": 11019 + }, + { + "epoch": 2.0362162349435855, + "grad_norm": 0.08277926594018936, + "learning_rate": 4.944243802893584e-06, + "loss": 0.4762677550315857, + "step": 11020 + }, + { + "epoch": 2.0364010116524813, + "grad_norm": 0.07830332964658737, + "learning_rate": 4.942522725879483e-06, + "loss": 0.4043978750705719, + "step": 11021 + }, + { + "epoch": 2.036585788361377, + "grad_norm": 0.08838173002004623, + "learning_rate": 4.940801850137158e-06, + "loss": 0.4758176803588867, + "step": 11022 + }, + { + "epoch": 2.036770565070273, + "grad_norm": 0.06347142904996872, + "learning_rate": 4.939081175735087e-06, + "loss": 0.31101086735725403, + "step": 11023 + }, + { + "epoch": 2.036955341779169, + "grad_norm": 0.07915815711021423, + "learning_rate": 4.937360702741757e-06, + "loss": 0.45772865414619446, + "step": 11024 + }, + { + "epoch": 2.0371401184880646, + "grad_norm": 0.07311692088842392, + "learning_rate": 4.935640431225628e-06, + "loss": 0.3552285134792328, + "step": 11025 + }, + { + "epoch": 2.0373248951969605, + "grad_norm": 0.0813852846622467, + "learning_rate": 4.933920361255164e-06, + "loss": 0.42151376605033875, + "step": 11026 + }, + { + "epoch": 2.0375096719058563, + "grad_norm": 0.06344735622406006, + "learning_rate": 4.932200492898822e-06, + "loss": 0.3813314437866211, + "step": 11027 + }, + { + "epoch": 2.037694448614752, + "grad_norm": 0.06767319142818451, + "learning_rate": 4.930480826225043e-06, + "loss": 0.350860595703125, + "step": 11028 + }, + { + "epoch": 2.037879225323648, + "grad_norm": 0.07548850029706955, + "learning_rate": 4.928761361302269e-06, + "loss": 0.4137412905693054, + "step": 11029 + }, + { + "epoch": 2.038064002032544, + "grad_norm": 0.08888956904411316, + "learning_rate": 4.9270420981989295e-06, + "loss": 0.4689168930053711, + "step": 11030 + }, + { + "epoch": 2.0382487787414396, + "grad_norm": 0.080434650182724, + "learning_rate": 4.925323036983439e-06, + "loss": 0.4242910146713257, + "step": 11031 + }, + { + "epoch": 2.0384335554503354, + "grad_norm": 0.07344117015600204, + "learning_rate": 4.923604177724216e-06, + "loss": 0.39162227511405945, + "step": 11032 + }, + { + "epoch": 2.0386183321592313, + "grad_norm": 0.11162707954645157, + "learning_rate": 4.921885520489664e-06, + "loss": 0.6516607403755188, + "step": 11033 + }, + { + "epoch": 2.038803108868127, + "grad_norm": 0.09089545905590057, + "learning_rate": 4.9201670653481816e-06, + "loss": 0.5949812531471252, + "step": 11034 + }, + { + "epoch": 2.038987885577023, + "grad_norm": 0.09269700199365616, + "learning_rate": 4.918448812368156e-06, + "loss": 0.4980557858943939, + "step": 11035 + }, + { + "epoch": 2.0391726622859188, + "grad_norm": 0.05755534768104553, + "learning_rate": 4.916730761617975e-06, + "loss": 0.2817052900791168, + "step": 11036 + }, + { + "epoch": 2.0393574389948146, + "grad_norm": 0.0832810029387474, + "learning_rate": 4.915012913166001e-06, + "loss": 0.4219299554824829, + "step": 11037 + }, + { + "epoch": 2.0395422157037104, + "grad_norm": 0.08774325251579285, + "learning_rate": 4.913295267080604e-06, + "loss": 0.5183811783790588, + "step": 11038 + }, + { + "epoch": 2.0397269924126062, + "grad_norm": 0.09232005476951599, + "learning_rate": 4.911577823430146e-06, + "loss": 0.39076298475265503, + "step": 11039 + }, + { + "epoch": 2.039911769121502, + "grad_norm": 0.06912195682525635, + "learning_rate": 4.909860582282964e-06, + "loss": 0.36591362953186035, + "step": 11040 + }, + { + "epoch": 2.040096545830398, + "grad_norm": 0.07138343155384064, + "learning_rate": 4.908143543707412e-06, + "loss": 0.3453134298324585, + "step": 11041 + }, + { + "epoch": 2.0402813225392937, + "grad_norm": 0.06172487884759903, + "learning_rate": 4.906426707771813e-06, + "loss": 0.2931046783924103, + "step": 11042 + }, + { + "epoch": 2.0404660992481896, + "grad_norm": 0.08806567639112473, + "learning_rate": 4.904710074544495e-06, + "loss": 0.3510777950286865, + "step": 11043 + }, + { + "epoch": 2.040650875957086, + "grad_norm": 0.06607240438461304, + "learning_rate": 4.90299364409378e-06, + "loss": 0.39713090658187866, + "step": 11044 + }, + { + "epoch": 2.0408356526659817, + "grad_norm": 0.07734241336584091, + "learning_rate": 4.901277416487967e-06, + "loss": 0.37471258640289307, + "step": 11045 + }, + { + "epoch": 2.0410204293748775, + "grad_norm": 0.08430340886116028, + "learning_rate": 4.89956139179536e-06, + "loss": 0.46838080883026123, + "step": 11046 + }, + { + "epoch": 2.0412052060837733, + "grad_norm": 0.048094622790813446, + "learning_rate": 4.897845570084253e-06, + "loss": 0.22031986713409424, + "step": 11047 + }, + { + "epoch": 2.041389982792669, + "grad_norm": 0.09382618963718414, + "learning_rate": 4.896129951422931e-06, + "loss": 0.44207218289375305, + "step": 11048 + }, + { + "epoch": 2.041574759501565, + "grad_norm": 0.09609103947877884, + "learning_rate": 4.894414535879668e-06, + "loss": 0.693742573261261, + "step": 11049 + }, + { + "epoch": 2.041759536210461, + "grad_norm": 0.08293814957141876, + "learning_rate": 4.892699323522736e-06, + "loss": 0.41994351148605347, + "step": 11050 + }, + { + "epoch": 2.0419443129193566, + "grad_norm": 0.06315720081329346, + "learning_rate": 4.890984314420389e-06, + "loss": 0.30417191982269287, + "step": 11051 + }, + { + "epoch": 2.0421290896282525, + "grad_norm": 0.0869455635547638, + "learning_rate": 4.889269508640884e-06, + "loss": 0.48857417702674866, + "step": 11052 + }, + { + "epoch": 2.0423138663371483, + "grad_norm": 0.06293217092752457, + "learning_rate": 4.887554906252461e-06, + "loss": 0.3305082321166992, + "step": 11053 + }, + { + "epoch": 2.042498643046044, + "grad_norm": 0.0635986402630806, + "learning_rate": 4.885840507323359e-06, + "loss": 0.2795847952365875, + "step": 11054 + }, + { + "epoch": 2.04268341975494, + "grad_norm": 0.061714962124824524, + "learning_rate": 4.884126311921804e-06, + "loss": 0.3719215393066406, + "step": 11055 + }, + { + "epoch": 2.0428681964638358, + "grad_norm": 0.0834885984659195, + "learning_rate": 4.8824123201160205e-06, + "loss": 0.4365217983722687, + "step": 11056 + }, + { + "epoch": 2.0430529731727316, + "grad_norm": 0.07816538214683533, + "learning_rate": 4.880698531974212e-06, + "loss": 0.40753647685050964, + "step": 11057 + }, + { + "epoch": 2.0432377498816274, + "grad_norm": 0.0627506822347641, + "learning_rate": 4.878984947564589e-06, + "loss": 0.338919073343277, + "step": 11058 + }, + { + "epoch": 2.0434225265905233, + "grad_norm": 0.05984465032815933, + "learning_rate": 4.877271566955339e-06, + "loss": 0.3647313117980957, + "step": 11059 + }, + { + "epoch": 2.043607303299419, + "grad_norm": 0.0724530890583992, + "learning_rate": 4.875558390214652e-06, + "loss": 0.42080751061439514, + "step": 11060 + }, + { + "epoch": 2.043792080008315, + "grad_norm": 0.0803777351975441, + "learning_rate": 4.873845417410714e-06, + "loss": 0.5013444423675537, + "step": 11061 + }, + { + "epoch": 2.0439768567172107, + "grad_norm": 0.10302601754665375, + "learning_rate": 4.872132648611689e-06, + "loss": 0.6300573348999023, + "step": 11062 + }, + { + "epoch": 2.0441616334261066, + "grad_norm": 0.062125064432621, + "learning_rate": 4.870420083885741e-06, + "loss": 0.3633728623390198, + "step": 11063 + }, + { + "epoch": 2.0443464101350024, + "grad_norm": 0.07537313550710678, + "learning_rate": 4.8687077233010286e-06, + "loss": 0.36421942710876465, + "step": 11064 + }, + { + "epoch": 2.0445311868438982, + "grad_norm": 0.07662282884120941, + "learning_rate": 4.8669955669256905e-06, + "loss": 0.3631492257118225, + "step": 11065 + }, + { + "epoch": 2.044715963552794, + "grad_norm": 0.06461525708436966, + "learning_rate": 4.865283614827871e-06, + "loss": 0.3085329830646515, + "step": 11066 + }, + { + "epoch": 2.04490074026169, + "grad_norm": 0.08214090019464493, + "learning_rate": 4.863571867075699e-06, + "loss": 0.4510284960269928, + "step": 11067 + }, + { + "epoch": 2.0450855169705857, + "grad_norm": 0.05756901204586029, + "learning_rate": 4.861860323737297e-06, + "loss": 0.3061563968658447, + "step": 11068 + }, + { + "epoch": 2.0452702936794815, + "grad_norm": 0.08127584308385849, + "learning_rate": 4.860148984880778e-06, + "loss": 0.5320109128952026, + "step": 11069 + }, + { + "epoch": 2.0454550703883774, + "grad_norm": 0.07574288547039032, + "learning_rate": 4.858437850574253e-06, + "loss": 0.39699089527130127, + "step": 11070 + }, + { + "epoch": 2.045639847097273, + "grad_norm": 0.1033301055431366, + "learning_rate": 4.8567269208858125e-06, + "loss": 0.5131538510322571, + "step": 11071 + }, + { + "epoch": 2.045824623806169, + "grad_norm": 0.09955132752656937, + "learning_rate": 4.85501619588355e-06, + "loss": 0.5548917055130005, + "step": 11072 + }, + { + "epoch": 2.046009400515065, + "grad_norm": 0.05739162489771843, + "learning_rate": 4.853305675635544e-06, + "loss": 0.23811882734298706, + "step": 11073 + }, + { + "epoch": 2.046194177223961, + "grad_norm": 0.06883546710014343, + "learning_rate": 4.851595360209872e-06, + "loss": 0.26252683997154236, + "step": 11074 + }, + { + "epoch": 2.046378953932857, + "grad_norm": 0.07779659330844879, + "learning_rate": 4.849885249674601e-06, + "loss": 0.4272381067276001, + "step": 11075 + }, + { + "epoch": 2.046563730641753, + "grad_norm": 0.07720985263586044, + "learning_rate": 4.84817534409778e-06, + "loss": 0.48315441608428955, + "step": 11076 + }, + { + "epoch": 2.0467485073506486, + "grad_norm": 0.0919061005115509, + "learning_rate": 4.846465643547462e-06, + "loss": 0.5336605906486511, + "step": 11077 + }, + { + "epoch": 2.0469332840595444, + "grad_norm": 0.08197201788425446, + "learning_rate": 4.8447561480916925e-06, + "loss": 0.45011091232299805, + "step": 11078 + }, + { + "epoch": 2.0471180607684403, + "grad_norm": 0.0737820491194725, + "learning_rate": 4.8430468577984955e-06, + "loss": 0.4351140558719635, + "step": 11079 + }, + { + "epoch": 2.047302837477336, + "grad_norm": 0.08648515492677689, + "learning_rate": 4.841337772735897e-06, + "loss": 0.46810683608055115, + "step": 11080 + }, + { + "epoch": 2.047487614186232, + "grad_norm": 0.0811564177274704, + "learning_rate": 4.839628892971922e-06, + "loss": 0.37373584508895874, + "step": 11081 + }, + { + "epoch": 2.0476723908951278, + "grad_norm": 0.08425736427307129, + "learning_rate": 4.837920218574569e-06, + "loss": 0.4559929668903351, + "step": 11082 + }, + { + "epoch": 2.0478571676040236, + "grad_norm": 0.09902206808328629, + "learning_rate": 4.8362117496118395e-06, + "loss": 0.4782213270664215, + "step": 11083 + }, + { + "epoch": 2.0480419443129194, + "grad_norm": 0.08472497761249542, + "learning_rate": 4.8345034861517334e-06, + "loss": 0.4149915874004364, + "step": 11084 + }, + { + "epoch": 2.0482267210218152, + "grad_norm": 0.07617567479610443, + "learning_rate": 4.832795428262222e-06, + "loss": 0.3213249146938324, + "step": 11085 + }, + { + "epoch": 2.048411497730711, + "grad_norm": 0.09814009070396423, + "learning_rate": 4.8310875760112884e-06, + "loss": 0.4558155834674835, + "step": 11086 + }, + { + "epoch": 2.048596274439607, + "grad_norm": 0.10031770914793015, + "learning_rate": 4.829379929466897e-06, + "loss": 0.5350594520568848, + "step": 11087 + }, + { + "epoch": 2.0487810511485027, + "grad_norm": 0.0721861869096756, + "learning_rate": 4.827672488697007e-06, + "loss": 0.37536805868148804, + "step": 11088 + }, + { + "epoch": 2.0489658278573986, + "grad_norm": 0.09258493781089783, + "learning_rate": 4.825965253769574e-06, + "loss": 0.43440642952919006, + "step": 11089 + }, + { + "epoch": 2.0491506045662944, + "grad_norm": 0.08985716104507446, + "learning_rate": 4.824258224752533e-06, + "loss": 0.5369082093238831, + "step": 11090 + }, + { + "epoch": 2.04933538127519, + "grad_norm": 0.08058978617191315, + "learning_rate": 4.8225514017138205e-06, + "loss": 0.39088350534439087, + "step": 11091 + }, + { + "epoch": 2.049520157984086, + "grad_norm": 0.07473517954349518, + "learning_rate": 4.820844784721366e-06, + "loss": 0.312513530254364, + "step": 11092 + }, + { + "epoch": 2.049704934692982, + "grad_norm": 0.0834210067987442, + "learning_rate": 4.819138373843084e-06, + "loss": 0.44025424122810364, + "step": 11093 + }, + { + "epoch": 2.0498897114018777, + "grad_norm": 0.08357541263103485, + "learning_rate": 4.8174321691468865e-06, + "loss": 0.36911502480506897, + "step": 11094 + }, + { + "epoch": 2.0500744881107735, + "grad_norm": 0.08149900287389755, + "learning_rate": 4.8157261707006785e-06, + "loss": 0.47982460260391235, + "step": 11095 + }, + { + "epoch": 2.0502592648196694, + "grad_norm": 0.0683201402425766, + "learning_rate": 4.814020378572345e-06, + "loss": 0.32079848647117615, + "step": 11096 + }, + { + "epoch": 2.050444041528565, + "grad_norm": 0.08017799258232117, + "learning_rate": 4.812314792829776e-06, + "loss": 0.3812326490879059, + "step": 11097 + }, + { + "epoch": 2.050628818237461, + "grad_norm": 0.08644194155931473, + "learning_rate": 4.810609413540852e-06, + "loss": 0.47296470403671265, + "step": 11098 + }, + { + "epoch": 2.050813594946357, + "grad_norm": 0.07601633667945862, + "learning_rate": 4.808904240773433e-06, + "loss": 0.34887439012527466, + "step": 11099 + }, + { + "epoch": 2.0509983716552527, + "grad_norm": 0.08265700191259384, + "learning_rate": 4.807199274595382e-06, + "loss": 0.39728039503097534, + "step": 11100 + }, + { + "epoch": 2.0511831483641485, + "grad_norm": 0.07100403308868408, + "learning_rate": 4.805494515074561e-06, + "loss": 0.4029456675052643, + "step": 11101 + }, + { + "epoch": 2.0513679250730443, + "grad_norm": 0.10822330415248871, + "learning_rate": 4.803789962278802e-06, + "loss": 0.6121471524238586, + "step": 11102 + }, + { + "epoch": 2.0515527017819406, + "grad_norm": 0.07543343305587769, + "learning_rate": 4.802085616275947e-06, + "loss": 0.34128338098526, + "step": 11103 + }, + { + "epoch": 2.0517374784908364, + "grad_norm": 0.08622822910547256, + "learning_rate": 4.8003814771338256e-06, + "loss": 0.47281673550605774, + "step": 11104 + }, + { + "epoch": 2.0519222551997323, + "grad_norm": 0.13187064230442047, + "learning_rate": 4.79867754492025e-06, + "loss": 0.8056926727294922, + "step": 11105 + }, + { + "epoch": 2.052107031908628, + "grad_norm": 0.1005135178565979, + "learning_rate": 4.796973819703035e-06, + "loss": 0.5747947096824646, + "step": 11106 + }, + { + "epoch": 2.052291808617524, + "grad_norm": 0.09979305416345596, + "learning_rate": 4.795270301549984e-06, + "loss": 0.4201939105987549, + "step": 11107 + }, + { + "epoch": 2.0524765853264197, + "grad_norm": 0.08658093214035034, + "learning_rate": 4.793566990528891e-06, + "loss": 0.5614806413650513, + "step": 11108 + }, + { + "epoch": 2.0526613620353156, + "grad_norm": 0.08579076081514359, + "learning_rate": 4.791863886707547e-06, + "loss": 0.3762155771255493, + "step": 11109 + }, + { + "epoch": 2.0528461387442114, + "grad_norm": 0.11106253415346146, + "learning_rate": 4.7901609901537226e-06, + "loss": 0.5277875065803528, + "step": 11110 + }, + { + "epoch": 2.0530309154531072, + "grad_norm": 0.08382830023765564, + "learning_rate": 4.788458300935191e-06, + "loss": 0.48897239565849304, + "step": 11111 + }, + { + "epoch": 2.053215692162003, + "grad_norm": 0.08559130132198334, + "learning_rate": 4.786755819119715e-06, + "loss": 0.48729780316352844, + "step": 11112 + }, + { + "epoch": 2.053400468870899, + "grad_norm": 0.08569462597370148, + "learning_rate": 4.7850535447750455e-06, + "loss": 0.48792409896850586, + "step": 11113 + }, + { + "epoch": 2.0535852455797947, + "grad_norm": 0.11856204271316528, + "learning_rate": 4.783351477968932e-06, + "loss": 0.6652650237083435, + "step": 11114 + }, + { + "epoch": 2.0537700222886905, + "grad_norm": 0.08174629509449005, + "learning_rate": 4.7816496187691105e-06, + "loss": 0.5053659081459045, + "step": 11115 + }, + { + "epoch": 2.0539547989975864, + "grad_norm": 0.10635677725076675, + "learning_rate": 4.779947967243305e-06, + "loss": 0.5447197556495667, + "step": 11116 + }, + { + "epoch": 2.054139575706482, + "grad_norm": 0.07048063725233078, + "learning_rate": 4.778246523459239e-06, + "loss": 0.4420374631881714, + "step": 11117 + }, + { + "epoch": 2.054324352415378, + "grad_norm": 0.0684606060385704, + "learning_rate": 4.776545287484629e-06, + "loss": 0.3754832148551941, + "step": 11118 + }, + { + "epoch": 2.054509129124274, + "grad_norm": 0.07237427681684494, + "learning_rate": 4.774844259387169e-06, + "loss": 0.4265793561935425, + "step": 11119 + }, + { + "epoch": 2.0546939058331697, + "grad_norm": 0.08699125796556473, + "learning_rate": 4.773143439234558e-06, + "loss": 0.45075085759162903, + "step": 11120 + }, + { + "epoch": 2.0548786825420655, + "grad_norm": 0.08007710427045822, + "learning_rate": 4.771442827094493e-06, + "loss": 0.4470076262950897, + "step": 11121 + }, + { + "epoch": 2.0550634592509613, + "grad_norm": 0.07942335307598114, + "learning_rate": 4.76974242303464e-06, + "loss": 0.45654499530792236, + "step": 11122 + }, + { + "epoch": 2.055248235959857, + "grad_norm": 0.08444330096244812, + "learning_rate": 4.76804222712268e-06, + "loss": 0.4239978790283203, + "step": 11123 + }, + { + "epoch": 2.055433012668753, + "grad_norm": 0.08909330517053604, + "learning_rate": 4.766342239426267e-06, + "loss": 0.47700098156929016, + "step": 11124 + }, + { + "epoch": 2.055617789377649, + "grad_norm": 0.08406031131744385, + "learning_rate": 4.764642460013058e-06, + "loss": 0.40069150924682617, + "step": 11125 + }, + { + "epoch": 2.0558025660865447, + "grad_norm": 0.09387758374214172, + "learning_rate": 4.762942888950702e-06, + "loss": 0.5573445558547974, + "step": 11126 + }, + { + "epoch": 2.0559873427954405, + "grad_norm": 0.09293120354413986, + "learning_rate": 4.761243526306831e-06, + "loss": 0.5203923583030701, + "step": 11127 + }, + { + "epoch": 2.0561721195043363, + "grad_norm": 0.09802858531475067, + "learning_rate": 4.75954437214908e-06, + "loss": 0.5696846842765808, + "step": 11128 + }, + { + "epoch": 2.056356896213232, + "grad_norm": 0.07466862350702286, + "learning_rate": 4.7578454265450715e-06, + "loss": 0.41942253708839417, + "step": 11129 + }, + { + "epoch": 2.056541672922128, + "grad_norm": 0.09071548283100128, + "learning_rate": 4.75614668956241e-06, + "loss": 0.4312906265258789, + "step": 11130 + }, + { + "epoch": 2.056726449631024, + "grad_norm": 0.059735409915447235, + "learning_rate": 4.7544481612687045e-06, + "loss": 0.3316957652568817, + "step": 11131 + }, + { + "epoch": 2.0569112263399196, + "grad_norm": 0.08869393914937973, + "learning_rate": 4.75274984173155e-06, + "loss": 0.44609615206718445, + "step": 11132 + }, + { + "epoch": 2.057096003048816, + "grad_norm": 0.08205130696296692, + "learning_rate": 4.751051731018537e-06, + "loss": 0.4383608400821686, + "step": 11133 + }, + { + "epoch": 2.0572807797577117, + "grad_norm": 0.07823719084262848, + "learning_rate": 4.749353829197242e-06, + "loss": 0.4239680767059326, + "step": 11134 + }, + { + "epoch": 2.0574655564666076, + "grad_norm": 0.10449334979057312, + "learning_rate": 4.747656136335242e-06, + "loss": 0.5839539766311646, + "step": 11135 + }, + { + "epoch": 2.0576503331755034, + "grad_norm": 0.09385858476161957, + "learning_rate": 4.745958652500091e-06, + "loss": 0.713076651096344, + "step": 11136 + }, + { + "epoch": 2.057835109884399, + "grad_norm": 0.08174755424261093, + "learning_rate": 4.744261377759352e-06, + "loss": 0.4505770206451416, + "step": 11137 + }, + { + "epoch": 2.058019886593295, + "grad_norm": 0.07801038026809692, + "learning_rate": 4.742564312180562e-06, + "loss": 0.3908689618110657, + "step": 11138 + }, + { + "epoch": 2.058204663302191, + "grad_norm": 0.07908947765827179, + "learning_rate": 4.7408674558312654e-06, + "loss": 0.48987993597984314, + "step": 11139 + }, + { + "epoch": 2.0583894400110867, + "grad_norm": 0.10476920753717422, + "learning_rate": 4.73917080877899e-06, + "loss": 0.5112239122390747, + "step": 11140 + }, + { + "epoch": 2.0585742167199825, + "grad_norm": 0.07934331148862839, + "learning_rate": 4.737474371091257e-06, + "loss": 0.41086068749427795, + "step": 11141 + }, + { + "epoch": 2.0587589934288784, + "grad_norm": 0.07572036236524582, + "learning_rate": 4.735778142835581e-06, + "loss": 0.4580512046813965, + "step": 11142 + }, + { + "epoch": 2.058943770137774, + "grad_norm": 0.1033131554722786, + "learning_rate": 4.734082124079468e-06, + "loss": 0.6239036321640015, + "step": 11143 + }, + { + "epoch": 2.05912854684667, + "grad_norm": 0.061327748000621796, + "learning_rate": 4.732386314890408e-06, + "loss": 0.2922775149345398, + "step": 11144 + }, + { + "epoch": 2.059313323555566, + "grad_norm": 0.07396014779806137, + "learning_rate": 4.7306907153358915e-06, + "loss": 0.33885297179222107, + "step": 11145 + }, + { + "epoch": 2.0594981002644617, + "grad_norm": 0.07981745153665543, + "learning_rate": 4.7289953254834e-06, + "loss": 0.41183799505233765, + "step": 11146 + }, + { + "epoch": 2.0596828769733575, + "grad_norm": 0.09812553226947784, + "learning_rate": 4.727300145400403e-06, + "loss": 0.508681058883667, + "step": 11147 + }, + { + "epoch": 2.0598676536822533, + "grad_norm": 0.07962342351675034, + "learning_rate": 4.725605175154365e-06, + "loss": 0.5423054099082947, + "step": 11148 + }, + { + "epoch": 2.060052430391149, + "grad_norm": 0.07644487917423248, + "learning_rate": 4.723910414812742e-06, + "loss": 0.3208584189414978, + "step": 11149 + }, + { + "epoch": 2.060237207100045, + "grad_norm": 0.09190234541893005, + "learning_rate": 4.7222158644429764e-06, + "loss": 0.6065411567687988, + "step": 11150 + }, + { + "epoch": 2.060421983808941, + "grad_norm": 0.08584508299827576, + "learning_rate": 4.720521524112504e-06, + "loss": 0.4849585294723511, + "step": 11151 + }, + { + "epoch": 2.0606067605178366, + "grad_norm": 0.08093219250440598, + "learning_rate": 4.7188273938887605e-06, + "loss": 0.4830683469772339, + "step": 11152 + }, + { + "epoch": 2.0607915372267325, + "grad_norm": 0.10254708677530289, + "learning_rate": 4.717133473839163e-06, + "loss": 0.6699663400650024, + "step": 11153 + }, + { + "epoch": 2.0609763139356283, + "grad_norm": 0.09748372435569763, + "learning_rate": 4.715439764031129e-06, + "loss": 0.5278805494308472, + "step": 11154 + }, + { + "epoch": 2.061161090644524, + "grad_norm": 0.09834560006856918, + "learning_rate": 4.713746264532056e-06, + "loss": 0.4821929931640625, + "step": 11155 + }, + { + "epoch": 2.06134586735342, + "grad_norm": 0.10058598965406418, + "learning_rate": 4.712052975409342e-06, + "loss": 0.5771047472953796, + "step": 11156 + }, + { + "epoch": 2.061530644062316, + "grad_norm": 0.07111495733261108, + "learning_rate": 4.710359896730379e-06, + "loss": 0.3995524048805237, + "step": 11157 + }, + { + "epoch": 2.0617154207712116, + "grad_norm": 0.09514980763196945, + "learning_rate": 4.7086670285625406e-06, + "loss": 0.47377482056617737, + "step": 11158 + }, + { + "epoch": 2.0619001974801074, + "grad_norm": 0.07909617573022842, + "learning_rate": 4.7069743709731985e-06, + "loss": 0.41966110467910767, + "step": 11159 + }, + { + "epoch": 2.0620849741890033, + "grad_norm": 0.08692745119333267, + "learning_rate": 4.705281924029718e-06, + "loss": 0.45984774827957153, + "step": 11160 + }, + { + "epoch": 2.062269750897899, + "grad_norm": 0.070327527821064, + "learning_rate": 4.7035896877994514e-06, + "loss": 0.3887335956096649, + "step": 11161 + }, + { + "epoch": 2.0624545276067954, + "grad_norm": 0.07713624089956284, + "learning_rate": 4.701897662349745e-06, + "loss": 0.37153637409210205, + "step": 11162 + }, + { + "epoch": 2.062639304315691, + "grad_norm": 0.06490417569875717, + "learning_rate": 4.7002058477479395e-06, + "loss": 0.35753506422042847, + "step": 11163 + }, + { + "epoch": 2.062824081024587, + "grad_norm": 0.06854233890771866, + "learning_rate": 4.698514244061357e-06, + "loss": 0.37862610816955566, + "step": 11164 + }, + { + "epoch": 2.063008857733483, + "grad_norm": 0.07123073190450668, + "learning_rate": 4.696822851357321e-06, + "loss": 0.34985968470573425, + "step": 11165 + }, + { + "epoch": 2.0631936344423787, + "grad_norm": 0.06531429290771484, + "learning_rate": 4.695131669703145e-06, + "loss": 0.37143802642822266, + "step": 11166 + }, + { + "epoch": 2.0633784111512745, + "grad_norm": 0.08906910568475723, + "learning_rate": 4.6934406991661304e-06, + "loss": 0.38228845596313477, + "step": 11167 + }, + { + "epoch": 2.0635631878601703, + "grad_norm": 0.06802390515804291, + "learning_rate": 4.691749939813575e-06, + "loss": 0.2796257436275482, + "step": 11168 + }, + { + "epoch": 2.063747964569066, + "grad_norm": 0.09548873454332352, + "learning_rate": 4.690059391712767e-06, + "loss": 0.5309671759605408, + "step": 11169 + }, + { + "epoch": 2.063932741277962, + "grad_norm": 0.07820142060518265, + "learning_rate": 4.68836905493098e-06, + "loss": 0.4591372311115265, + "step": 11170 + }, + { + "epoch": 2.064117517986858, + "grad_norm": 0.08913306146860123, + "learning_rate": 4.68667892953549e-06, + "loss": 0.6002703905105591, + "step": 11171 + }, + { + "epoch": 2.0643022946957537, + "grad_norm": 0.10198698937892914, + "learning_rate": 4.684989015593547e-06, + "loss": 0.6035742163658142, + "step": 11172 + }, + { + "epoch": 2.0644870714046495, + "grad_norm": 0.08727562427520752, + "learning_rate": 4.683299313172418e-06, + "loss": 0.406217098236084, + "step": 11173 + }, + { + "epoch": 2.0646718481135453, + "grad_norm": 0.10919637978076935, + "learning_rate": 4.681609822339346e-06, + "loss": 0.501842200756073, + "step": 11174 + }, + { + "epoch": 2.064856624822441, + "grad_norm": 0.09159188717603683, + "learning_rate": 4.679920543161559e-06, + "loss": 0.5147289633750916, + "step": 11175 + }, + { + "epoch": 2.065041401531337, + "grad_norm": 0.07231157273054123, + "learning_rate": 4.678231475706291e-06, + "loss": 0.3952037990093231, + "step": 11176 + }, + { + "epoch": 2.065226178240233, + "grad_norm": 0.06625118851661682, + "learning_rate": 4.6765426200407635e-06, + "loss": 0.22810131311416626, + "step": 11177 + }, + { + "epoch": 2.0654109549491286, + "grad_norm": 0.08682150393724442, + "learning_rate": 4.674853976232182e-06, + "loss": 0.473644495010376, + "step": 11178 + }, + { + "epoch": 2.0655957316580245, + "grad_norm": 0.10142802447080612, + "learning_rate": 4.673165544347748e-06, + "loss": 0.5257569551467896, + "step": 11179 + }, + { + "epoch": 2.0657805083669203, + "grad_norm": 0.06687464565038681, + "learning_rate": 4.6714773244546665e-06, + "loss": 0.3753644824028015, + "step": 11180 + }, + { + "epoch": 2.065965285075816, + "grad_norm": 0.06486748158931732, + "learning_rate": 4.669789316620113e-06, + "loss": 0.32193541526794434, + "step": 11181 + }, + { + "epoch": 2.066150061784712, + "grad_norm": 0.08548235893249512, + "learning_rate": 4.66810152091127e-06, + "loss": 0.47026437520980835, + "step": 11182 + }, + { + "epoch": 2.0663348384936078, + "grad_norm": 0.11571351438760757, + "learning_rate": 4.666413937395308e-06, + "loss": 0.5890333652496338, + "step": 11183 + }, + { + "epoch": 2.0665196152025036, + "grad_norm": 0.10707179456949234, + "learning_rate": 4.6647265661393806e-06, + "loss": 0.5810878276824951, + "step": 11184 + }, + { + "epoch": 2.0667043919113994, + "grad_norm": 0.08522901684045792, + "learning_rate": 4.663039407210645e-06, + "loss": 0.42667514085769653, + "step": 11185 + }, + { + "epoch": 2.0668891686202953, + "grad_norm": 0.09314467012882233, + "learning_rate": 4.661352460676243e-06, + "loss": 0.4527299702167511, + "step": 11186 + }, + { + "epoch": 2.067073945329191, + "grad_norm": 0.07868710905313492, + "learning_rate": 4.659665726603309e-06, + "loss": 0.42437943816185, + "step": 11187 + }, + { + "epoch": 2.067258722038087, + "grad_norm": 0.07289140671491623, + "learning_rate": 4.657979205058978e-06, + "loss": 0.44353288412094116, + "step": 11188 + }, + { + "epoch": 2.0674434987469827, + "grad_norm": 0.09959884732961655, + "learning_rate": 4.656292896110356e-06, + "loss": 0.5056084990501404, + "step": 11189 + }, + { + "epoch": 2.0676282754558786, + "grad_norm": 0.11057322472333908, + "learning_rate": 4.6546067998245595e-06, + "loss": 0.6372722387313843, + "step": 11190 + }, + { + "epoch": 2.067813052164775, + "grad_norm": 0.09406181424856186, + "learning_rate": 4.6529209162686916e-06, + "loss": 0.5327972769737244, + "step": 11191 + }, + { + "epoch": 2.0679978288736707, + "grad_norm": 0.09712541103363037, + "learning_rate": 4.651235245509835e-06, + "loss": 0.5264273285865784, + "step": 11192 + }, + { + "epoch": 2.0681826055825665, + "grad_norm": 0.07588005065917969, + "learning_rate": 4.649549787615086e-06, + "loss": 0.4727795124053955, + "step": 11193 + }, + { + "epoch": 2.0683673822914623, + "grad_norm": 0.07036327570676804, + "learning_rate": 4.647864542651519e-06, + "loss": 0.31954243779182434, + "step": 11194 + }, + { + "epoch": 2.068552159000358, + "grad_norm": 0.07767093181610107, + "learning_rate": 4.646179510686195e-06, + "loss": 0.364122211933136, + "step": 11195 + }, + { + "epoch": 2.068736935709254, + "grad_norm": 0.08550487458705902, + "learning_rate": 4.644494691786177e-06, + "loss": 0.5416126847267151, + "step": 11196 + }, + { + "epoch": 2.06892171241815, + "grad_norm": 0.08919231593608856, + "learning_rate": 4.642810086018518e-06, + "loss": 0.512860894203186, + "step": 11197 + }, + { + "epoch": 2.0691064891270456, + "grad_norm": 0.08846841007471085, + "learning_rate": 4.641125693450253e-06, + "loss": 0.4849124550819397, + "step": 11198 + }, + { + "epoch": 2.0692912658359415, + "grad_norm": 0.07089854776859283, + "learning_rate": 4.6394415141484176e-06, + "loss": 0.3652840256690979, + "step": 11199 + }, + { + "epoch": 2.0694760425448373, + "grad_norm": 0.06689655035734177, + "learning_rate": 4.637757548180045e-06, + "loss": 0.37974056601524353, + "step": 11200 + }, + { + "epoch": 2.069660819253733, + "grad_norm": 0.05894790217280388, + "learning_rate": 4.6360737956121425e-06, + "loss": 0.3253614604473114, + "step": 11201 + }, + { + "epoch": 2.069845595962629, + "grad_norm": 0.1028907522559166, + "learning_rate": 4.634390256511725e-06, + "loss": 0.5811859369277954, + "step": 11202 + }, + { + "epoch": 2.070030372671525, + "grad_norm": 0.08756034821271896, + "learning_rate": 4.632706930945784e-06, + "loss": 0.49308815598487854, + "step": 11203 + }, + { + "epoch": 2.0702151493804206, + "grad_norm": 0.07366007566452026, + "learning_rate": 4.6310238189813165e-06, + "loss": 0.43656429648399353, + "step": 11204 + }, + { + "epoch": 2.0703999260893164, + "grad_norm": 0.09568152576684952, + "learning_rate": 4.629340920685302e-06, + "loss": 0.5442222356796265, + "step": 11205 + }, + { + "epoch": 2.0705847027982123, + "grad_norm": 0.08735237270593643, + "learning_rate": 4.627658236124717e-06, + "loss": 0.5923407673835754, + "step": 11206 + }, + { + "epoch": 2.070769479507108, + "grad_norm": 0.06391552090644836, + "learning_rate": 4.625975765366527e-06, + "loss": 0.3378419280052185, + "step": 11207 + }, + { + "epoch": 2.070954256216004, + "grad_norm": 0.0819556936621666, + "learning_rate": 4.624293508477691e-06, + "loss": 0.3597787022590637, + "step": 11208 + }, + { + "epoch": 2.0711390329248998, + "grad_norm": 0.10062183439731598, + "learning_rate": 4.622611465525152e-06, + "loss": 0.5810568332672119, + "step": 11209 + }, + { + "epoch": 2.0713238096337956, + "grad_norm": 0.0777035653591156, + "learning_rate": 4.620929636575852e-06, + "loss": 0.38935208320617676, + "step": 11210 + }, + { + "epoch": 2.0715085863426914, + "grad_norm": 0.09280351549386978, + "learning_rate": 4.619248021696728e-06, + "loss": 0.5045894980430603, + "step": 11211 + }, + { + "epoch": 2.0716933630515872, + "grad_norm": 0.0894412100315094, + "learning_rate": 4.617566620954691e-06, + "loss": 0.39157426357269287, + "step": 11212 + }, + { + "epoch": 2.071878139760483, + "grad_norm": 0.056693993508815765, + "learning_rate": 4.615885434416667e-06, + "loss": 0.3080008625984192, + "step": 11213 + }, + { + "epoch": 2.072062916469379, + "grad_norm": 0.07515745609998703, + "learning_rate": 4.614204462149561e-06, + "loss": 0.5135540962219238, + "step": 11214 + }, + { + "epoch": 2.0722476931782747, + "grad_norm": 0.09220051020383835, + "learning_rate": 4.612523704220264e-06, + "loss": 0.4865436851978302, + "step": 11215 + }, + { + "epoch": 2.0724324698871706, + "grad_norm": 0.10826101899147034, + "learning_rate": 4.610843160695668e-06, + "loss": 0.5282402634620667, + "step": 11216 + }, + { + "epoch": 2.0726172465960664, + "grad_norm": 0.08101743459701538, + "learning_rate": 4.6091628316426585e-06, + "loss": 0.39346298575401306, + "step": 11217 + }, + { + "epoch": 2.072802023304962, + "grad_norm": 0.07107812911272049, + "learning_rate": 4.607482717128098e-06, + "loss": 0.4143473207950592, + "step": 11218 + }, + { + "epoch": 2.072986800013858, + "grad_norm": 0.07952581346035004, + "learning_rate": 4.605802817218855e-06, + "loss": 0.439717561006546, + "step": 11219 + }, + { + "epoch": 2.0731715767227543, + "grad_norm": 0.06694263964891434, + "learning_rate": 4.604123131981782e-06, + "loss": 0.3764767050743103, + "step": 11220 + }, + { + "epoch": 2.07335635343165, + "grad_norm": 0.05754847824573517, + "learning_rate": 4.602443661483729e-06, + "loss": 0.32524630427360535, + "step": 11221 + }, + { + "epoch": 2.073541130140546, + "grad_norm": 0.07798845320940018, + "learning_rate": 4.600764405791533e-06, + "loss": 0.39575040340423584, + "step": 11222 + }, + { + "epoch": 2.073725906849442, + "grad_norm": 0.06815237551927567, + "learning_rate": 4.59908536497202e-06, + "loss": 0.37505558133125305, + "step": 11223 + }, + { + "epoch": 2.0739106835583376, + "grad_norm": 0.08333446830511093, + "learning_rate": 4.597406539092011e-06, + "loss": 0.41966623067855835, + "step": 11224 + }, + { + "epoch": 2.0740954602672335, + "grad_norm": 0.08505209535360336, + "learning_rate": 4.595727928218319e-06, + "loss": 0.4656205475330353, + "step": 11225 + }, + { + "epoch": 2.0742802369761293, + "grad_norm": 0.08556347340345383, + "learning_rate": 4.594049532417748e-06, + "loss": 0.3957214951515198, + "step": 11226 + }, + { + "epoch": 2.074465013685025, + "grad_norm": 0.06823302060365677, + "learning_rate": 4.592371351757093e-06, + "loss": 0.3491239845752716, + "step": 11227 + }, + { + "epoch": 2.074649790393921, + "grad_norm": 0.07873357087373734, + "learning_rate": 4.590693386303143e-06, + "loss": 0.45640286803245544, + "step": 11228 + }, + { + "epoch": 2.0748345671028168, + "grad_norm": 0.09663490951061249, + "learning_rate": 4.589015636122669e-06, + "loss": 0.5493663549423218, + "step": 11229 + }, + { + "epoch": 2.0750193438117126, + "grad_norm": 0.06493667513132095, + "learning_rate": 4.587338101282443e-06, + "loss": 0.41553476452827454, + "step": 11230 + }, + { + "epoch": 2.0752041205206084, + "grad_norm": 0.08128707110881805, + "learning_rate": 4.585660781849233e-06, + "loss": 0.45945361256599426, + "step": 11231 + }, + { + "epoch": 2.0753888972295043, + "grad_norm": 0.05496186390519142, + "learning_rate": 4.583983677889775e-06, + "loss": 0.30561724305152893, + "step": 11232 + }, + { + "epoch": 2.0755736739384, + "grad_norm": 0.05655686557292938, + "learning_rate": 4.582306789470826e-06, + "loss": 0.341400146484375, + "step": 11233 + }, + { + "epoch": 2.075758450647296, + "grad_norm": 0.083131805062294, + "learning_rate": 4.5806301166591214e-06, + "loss": 0.6002212762832642, + "step": 11234 + }, + { + "epoch": 2.0759432273561917, + "grad_norm": 0.07641672343015671, + "learning_rate": 4.578953659521379e-06, + "loss": 0.5141034722328186, + "step": 11235 + }, + { + "epoch": 2.0761280040650876, + "grad_norm": 0.07359210401773453, + "learning_rate": 4.577277418124324e-06, + "loss": 0.40362802147865295, + "step": 11236 + }, + { + "epoch": 2.0763127807739834, + "grad_norm": 0.09323938935995102, + "learning_rate": 4.575601392534659e-06, + "loss": 0.5022433400154114, + "step": 11237 + }, + { + "epoch": 2.0764975574828792, + "grad_norm": 0.06990652531385422, + "learning_rate": 4.573925582819088e-06, + "loss": 0.4661812484264374, + "step": 11238 + }, + { + "epoch": 2.076682334191775, + "grad_norm": 0.09031682461500168, + "learning_rate": 4.5722499890443015e-06, + "loss": 0.46780532598495483, + "step": 11239 + }, + { + "epoch": 2.076867110900671, + "grad_norm": 0.06735450774431229, + "learning_rate": 4.570574611276986e-06, + "loss": 0.34275004267692566, + "step": 11240 + }, + { + "epoch": 2.0770518876095667, + "grad_norm": 0.07152236998081207, + "learning_rate": 4.568899449583814e-06, + "loss": 0.3821204602718353, + "step": 11241 + }, + { + "epoch": 2.0772366643184625, + "grad_norm": 0.0831417590379715, + "learning_rate": 4.567224504031455e-06, + "loss": 0.47654974460601807, + "step": 11242 + }, + { + "epoch": 2.0774214410273584, + "grad_norm": 0.08847978711128235, + "learning_rate": 4.565549774686561e-06, + "loss": 0.4934597611427307, + "step": 11243 + }, + { + "epoch": 2.077606217736254, + "grad_norm": 0.09545759856700897, + "learning_rate": 4.563875261615782e-06, + "loss": 0.4834637939929962, + "step": 11244 + }, + { + "epoch": 2.07779099444515, + "grad_norm": 0.08540671318769455, + "learning_rate": 4.5622009648857625e-06, + "loss": 0.4346141815185547, + "step": 11245 + }, + { + "epoch": 2.077975771154046, + "grad_norm": 0.07556750625371933, + "learning_rate": 4.56052688456313e-06, + "loss": 0.38500696420669556, + "step": 11246 + }, + { + "epoch": 2.0781605478629417, + "grad_norm": 0.08311685919761658, + "learning_rate": 4.558853020714511e-06, + "loss": 0.5165578126907349, + "step": 11247 + }, + { + "epoch": 2.0783453245718375, + "grad_norm": 0.09188653528690338, + "learning_rate": 4.557179373406521e-06, + "loss": 0.5210410356521606, + "step": 11248 + }, + { + "epoch": 2.078530101280734, + "grad_norm": 0.07977807521820068, + "learning_rate": 4.555505942705761e-06, + "loss": 0.43148764967918396, + "step": 11249 + }, + { + "epoch": 2.0787148779896296, + "grad_norm": 0.09601611644029617, + "learning_rate": 4.5538327286788305e-06, + "loss": 0.5566371083259583, + "step": 11250 + }, + { + "epoch": 2.0788996546985254, + "grad_norm": 0.08546191453933716, + "learning_rate": 4.552159731392323e-06, + "loss": 0.5322090983390808, + "step": 11251 + }, + { + "epoch": 2.0790844314074213, + "grad_norm": 0.07977309823036194, + "learning_rate": 4.550486950912806e-06, + "loss": 0.3500477373600006, + "step": 11252 + }, + { + "epoch": 2.079269208116317, + "grad_norm": 0.07240080088376999, + "learning_rate": 4.548814387306866e-06, + "loss": 0.36769571900367737, + "step": 11253 + }, + { + "epoch": 2.079453984825213, + "grad_norm": 0.06968306005001068, + "learning_rate": 4.547142040641055e-06, + "loss": 0.3258252739906311, + "step": 11254 + }, + { + "epoch": 2.0796387615341088, + "grad_norm": 0.060997962951660156, + "learning_rate": 4.545469910981931e-06, + "loss": 0.34021857380867004, + "step": 11255 + }, + { + "epoch": 2.0798235382430046, + "grad_norm": 0.06318764388561249, + "learning_rate": 4.543797998396045e-06, + "loss": 0.3376246690750122, + "step": 11256 + }, + { + "epoch": 2.0800083149519004, + "grad_norm": 0.06657318770885468, + "learning_rate": 4.542126302949922e-06, + "loss": 0.2888146638870239, + "step": 11257 + }, + { + "epoch": 2.0801930916607962, + "grad_norm": 0.09087678045034409, + "learning_rate": 4.540454824710099e-06, + "loss": 0.4406352639198303, + "step": 11258 + }, + { + "epoch": 2.080377868369692, + "grad_norm": 0.07760107517242432, + "learning_rate": 4.538783563743091e-06, + "loss": 0.35341158509254456, + "step": 11259 + }, + { + "epoch": 2.080562645078588, + "grad_norm": 0.06082098186016083, + "learning_rate": 4.537112520115413e-06, + "loss": 0.30027127265930176, + "step": 11260 + }, + { + "epoch": 2.0807474217874837, + "grad_norm": 0.06878552585840225, + "learning_rate": 4.535441693893565e-06, + "loss": 0.36671727895736694, + "step": 11261 + }, + { + "epoch": 2.0809321984963796, + "grad_norm": 0.07365518808364868, + "learning_rate": 4.5337710851440445e-06, + "loss": 0.3336552381515503, + "step": 11262 + }, + { + "epoch": 2.0811169752052754, + "grad_norm": 0.08688491582870483, + "learning_rate": 4.53210069393333e-06, + "loss": 0.48835572600364685, + "step": 11263 + }, + { + "epoch": 2.081301751914171, + "grad_norm": 0.09472530335187912, + "learning_rate": 4.5304305203279005e-06, + "loss": 0.48959553241729736, + "step": 11264 + }, + { + "epoch": 2.081486528623067, + "grad_norm": 0.0924774780869484, + "learning_rate": 4.528760564394225e-06, + "loss": 0.46644827723503113, + "step": 11265 + }, + { + "epoch": 2.081671305331963, + "grad_norm": 0.07498573511838913, + "learning_rate": 4.527090826198761e-06, + "loss": 0.3633112609386444, + "step": 11266 + }, + { + "epoch": 2.0818560820408587, + "grad_norm": 0.10801418870687485, + "learning_rate": 4.525421305807963e-06, + "loss": 0.48465314507484436, + "step": 11267 + }, + { + "epoch": 2.0820408587497545, + "grad_norm": 0.08142693340778351, + "learning_rate": 4.523752003288267e-06, + "loss": 0.49605080485343933, + "step": 11268 + }, + { + "epoch": 2.0822256354586504, + "grad_norm": 0.08006380498409271, + "learning_rate": 4.5220829187061065e-06, + "loss": 0.45951616764068604, + "step": 11269 + }, + { + "epoch": 2.082410412167546, + "grad_norm": 0.09330236911773682, + "learning_rate": 4.520414052127913e-06, + "loss": 0.48875072598457336, + "step": 11270 + }, + { + "epoch": 2.082595188876442, + "grad_norm": 0.09738299995660782, + "learning_rate": 4.518745403620088e-06, + "loss": 0.5769286751747131, + "step": 11271 + }, + { + "epoch": 2.082779965585338, + "grad_norm": 0.08468339592218399, + "learning_rate": 4.517076973249052e-06, + "loss": 0.5612066388130188, + "step": 11272 + }, + { + "epoch": 2.0829647422942337, + "grad_norm": 0.08240416646003723, + "learning_rate": 4.5154087610812016e-06, + "loss": 0.513605535030365, + "step": 11273 + }, + { + "epoch": 2.0831495190031295, + "grad_norm": 0.06436806917190552, + "learning_rate": 4.51374076718292e-06, + "loss": 0.3433886170387268, + "step": 11274 + }, + { + "epoch": 2.0833342957120253, + "grad_norm": 0.08961392194032669, + "learning_rate": 4.512072991620592e-06, + "loss": 0.5268140435218811, + "step": 11275 + }, + { + "epoch": 2.083519072420921, + "grad_norm": 0.1014905646443367, + "learning_rate": 4.510405434460592e-06, + "loss": 0.5389054417610168, + "step": 11276 + }, + { + "epoch": 2.083703849129817, + "grad_norm": 0.07252752780914307, + "learning_rate": 4.508738095769278e-06, + "loss": 0.3551664650440216, + "step": 11277 + }, + { + "epoch": 2.083888625838713, + "grad_norm": 0.07098570466041565, + "learning_rate": 4.507070975613009e-06, + "loss": 0.3488197922706604, + "step": 11278 + }, + { + "epoch": 2.084073402547609, + "grad_norm": 0.06824567168951035, + "learning_rate": 4.505404074058127e-06, + "loss": 0.3708655834197998, + "step": 11279 + }, + { + "epoch": 2.084258179256505, + "grad_norm": 0.08788527548313141, + "learning_rate": 4.503737391170975e-06, + "loss": 0.481997549533844, + "step": 11280 + }, + { + "epoch": 2.0844429559654007, + "grad_norm": 0.07096780091524124, + "learning_rate": 4.502070927017879e-06, + "loss": 0.4012865424156189, + "step": 11281 + }, + { + "epoch": 2.0846277326742966, + "grad_norm": 0.10750937461853027, + "learning_rate": 4.500404681665161e-06, + "loss": 0.5957963466644287, + "step": 11282 + }, + { + "epoch": 2.0848125093831924, + "grad_norm": 0.08033590018749237, + "learning_rate": 4.498738655179129e-06, + "loss": 0.3472810685634613, + "step": 11283 + }, + { + "epoch": 2.0849972860920882, + "grad_norm": 0.06028683856129646, + "learning_rate": 4.497072847626087e-06, + "loss": 0.3033576011657715, + "step": 11284 + }, + { + "epoch": 2.085182062800984, + "grad_norm": 0.09236586093902588, + "learning_rate": 4.4954072590723285e-06, + "loss": 0.4759647846221924, + "step": 11285 + }, + { + "epoch": 2.08536683950988, + "grad_norm": 0.09992455691099167, + "learning_rate": 4.49374188958414e-06, + "loss": 0.5862817764282227, + "step": 11286 + }, + { + "epoch": 2.0855516162187757, + "grad_norm": 0.09145543724298477, + "learning_rate": 4.492076739227802e-06, + "loss": 0.5127483606338501, + "step": 11287 + }, + { + "epoch": 2.0857363929276715, + "grad_norm": 0.07276683300733566, + "learning_rate": 4.490411808069573e-06, + "loss": 0.3518741726875305, + "step": 11288 + }, + { + "epoch": 2.0859211696365674, + "grad_norm": 0.09228905290365219, + "learning_rate": 4.488747096175717e-06, + "loss": 0.568116307258606, + "step": 11289 + }, + { + "epoch": 2.086105946345463, + "grad_norm": 0.08240782469511032, + "learning_rate": 4.487082603612487e-06, + "loss": 0.3835344910621643, + "step": 11290 + }, + { + "epoch": 2.086290723054359, + "grad_norm": 0.09818808734416962, + "learning_rate": 4.485418330446114e-06, + "loss": 0.4284479022026062, + "step": 11291 + }, + { + "epoch": 2.086475499763255, + "grad_norm": 0.07579676806926727, + "learning_rate": 4.4837542767428436e-06, + "loss": 0.3927929103374481, + "step": 11292 + }, + { + "epoch": 2.0866602764721507, + "grad_norm": 0.07215126603841782, + "learning_rate": 4.482090442568898e-06, + "loss": 0.4338514506816864, + "step": 11293 + }, + { + "epoch": 2.0868450531810465, + "grad_norm": 0.07936978340148926, + "learning_rate": 4.480426827990486e-06, + "loss": 0.42621442675590515, + "step": 11294 + }, + { + "epoch": 2.0870298298899423, + "grad_norm": 0.08208302408456802, + "learning_rate": 4.478763433073817e-06, + "loss": 0.42807069420814514, + "step": 11295 + }, + { + "epoch": 2.087214606598838, + "grad_norm": 0.10644751042127609, + "learning_rate": 4.477100257885094e-06, + "loss": 0.6804676055908203, + "step": 11296 + }, + { + "epoch": 2.087399383307734, + "grad_norm": 0.08382293581962585, + "learning_rate": 4.475437302490498e-06, + "loss": 0.4962066411972046, + "step": 11297 + }, + { + "epoch": 2.08758416001663, + "grad_norm": 0.09304467588663101, + "learning_rate": 4.473774566956213e-06, + "loss": 0.46378782391548157, + "step": 11298 + }, + { + "epoch": 2.0877689367255257, + "grad_norm": 0.10059910267591476, + "learning_rate": 4.472112051348411e-06, + "loss": 0.5793818235397339, + "step": 11299 + }, + { + "epoch": 2.0879537134344215, + "grad_norm": 0.06869319081306458, + "learning_rate": 4.470449755733255e-06, + "loss": 0.29588621854782104, + "step": 11300 + }, + { + "epoch": 2.0881384901433173, + "grad_norm": 0.0781109407544136, + "learning_rate": 4.4687876801769025e-06, + "loss": 0.4064006507396698, + "step": 11301 + }, + { + "epoch": 2.088323266852213, + "grad_norm": 0.08586934953927994, + "learning_rate": 4.467125824745492e-06, + "loss": 0.5395097136497498, + "step": 11302 + }, + { + "epoch": 2.088508043561109, + "grad_norm": 0.07244936376810074, + "learning_rate": 4.465464189505163e-06, + "loss": 0.3408164978027344, + "step": 11303 + }, + { + "epoch": 2.088692820270005, + "grad_norm": 0.07981320470571518, + "learning_rate": 4.463802774522044e-06, + "loss": 0.3848876357078552, + "step": 11304 + }, + { + "epoch": 2.0888775969789006, + "grad_norm": 0.07874210178852081, + "learning_rate": 4.462141579862254e-06, + "loss": 0.46407175064086914, + "step": 11305 + }, + { + "epoch": 2.0890623736877965, + "grad_norm": 0.06232302635908127, + "learning_rate": 4.460480605591904e-06, + "loss": 0.2524704933166504, + "step": 11306 + }, + { + "epoch": 2.0892471503966923, + "grad_norm": 0.08226414024829865, + "learning_rate": 4.458819851777097e-06, + "loss": 0.35842904448509216, + "step": 11307 + }, + { + "epoch": 2.089431927105588, + "grad_norm": 0.0843888595700264, + "learning_rate": 4.457159318483922e-06, + "loss": 0.44134512543678284, + "step": 11308 + }, + { + "epoch": 2.0896167038144844, + "grad_norm": 0.07801353186368942, + "learning_rate": 4.455499005778464e-06, + "loss": 0.3788571357727051, + "step": 11309 + }, + { + "epoch": 2.08980148052338, + "grad_norm": 0.07737331092357635, + "learning_rate": 4.453838913726803e-06, + "loss": 0.40529578924179077, + "step": 11310 + }, + { + "epoch": 2.089986257232276, + "grad_norm": 0.0766071155667305, + "learning_rate": 4.452179042394994e-06, + "loss": 0.4201054871082306, + "step": 11311 + }, + { + "epoch": 2.090171033941172, + "grad_norm": 0.07338808476924896, + "learning_rate": 4.450519391849106e-06, + "loss": 0.3696323037147522, + "step": 11312 + }, + { + "epoch": 2.0903558106500677, + "grad_norm": 0.0812407061457634, + "learning_rate": 4.4488599621551876e-06, + "loss": 0.5269801020622253, + "step": 11313 + }, + { + "epoch": 2.0905405873589635, + "grad_norm": 0.07736600190401077, + "learning_rate": 4.447200753379273e-06, + "loss": 0.4681062400341034, + "step": 11314 + }, + { + "epoch": 2.0907253640678594, + "grad_norm": 0.07834678888320923, + "learning_rate": 4.445541765587394e-06, + "loss": 0.40067198872566223, + "step": 11315 + }, + { + "epoch": 2.090910140776755, + "grad_norm": 0.0707080066204071, + "learning_rate": 4.44388299884558e-06, + "loss": 0.38395214080810547, + "step": 11316 + }, + { + "epoch": 2.091094917485651, + "grad_norm": 0.10281843692064285, + "learning_rate": 4.442224453219836e-06, + "loss": 0.5071035027503967, + "step": 11317 + }, + { + "epoch": 2.091279694194547, + "grad_norm": 0.1025250256061554, + "learning_rate": 4.44056612877617e-06, + "loss": 0.5465942025184631, + "step": 11318 + }, + { + "epoch": 2.0914644709034427, + "grad_norm": 0.09008805453777313, + "learning_rate": 4.438908025580578e-06, + "loss": 0.5358907580375671, + "step": 11319 + }, + { + "epoch": 2.0916492476123385, + "grad_norm": 0.06519326567649841, + "learning_rate": 4.43725014369905e-06, + "loss": 0.3326597511768341, + "step": 11320 + }, + { + "epoch": 2.0918340243212343, + "grad_norm": 0.09936525672674179, + "learning_rate": 4.4355924831975665e-06, + "loss": 0.4908013343811035, + "step": 11321 + }, + { + "epoch": 2.09201880103013, + "grad_norm": 0.08073471486568451, + "learning_rate": 4.433935044142088e-06, + "loss": 0.3885650932788849, + "step": 11322 + }, + { + "epoch": 2.092203577739026, + "grad_norm": 0.09077001363039017, + "learning_rate": 4.432277826598582e-06, + "loss": 0.4323769807815552, + "step": 11323 + }, + { + "epoch": 2.092388354447922, + "grad_norm": 0.11247825622558594, + "learning_rate": 4.430620830632999e-06, + "loss": 0.6296273469924927, + "step": 11324 + }, + { + "epoch": 2.0925731311568176, + "grad_norm": 0.08233305811882019, + "learning_rate": 4.428964056311282e-06, + "loss": 0.4658873975276947, + "step": 11325 + }, + { + "epoch": 2.0927579078657135, + "grad_norm": 0.07252024859189987, + "learning_rate": 4.427307503699368e-06, + "loss": 0.41115713119506836, + "step": 11326 + }, + { + "epoch": 2.0929426845746093, + "grad_norm": 0.10351111739873886, + "learning_rate": 4.425651172863181e-06, + "loss": 0.35353049635887146, + "step": 11327 + }, + { + "epoch": 2.093127461283505, + "grad_norm": 0.08197803050279617, + "learning_rate": 4.423995063868637e-06, + "loss": 0.38247957825660706, + "step": 11328 + }, + { + "epoch": 2.093312237992401, + "grad_norm": 0.09849874675273895, + "learning_rate": 4.422339176781643e-06, + "loss": 0.4812338054180145, + "step": 11329 + }, + { + "epoch": 2.093497014701297, + "grad_norm": 0.06511776149272919, + "learning_rate": 4.420683511668102e-06, + "loss": 0.29902955889701843, + "step": 11330 + }, + { + "epoch": 2.0936817914101926, + "grad_norm": 0.05650252476334572, + "learning_rate": 4.419028068593896e-06, + "loss": 0.23297971487045288, + "step": 11331 + }, + { + "epoch": 2.0938665681190884, + "grad_norm": 0.09680482000112534, + "learning_rate": 4.417372847624915e-06, + "loss": 0.5279828906059265, + "step": 11332 + }, + { + "epoch": 2.0940513448279843, + "grad_norm": 0.05814513936638832, + "learning_rate": 4.415717848827034e-06, + "loss": 0.26471182703971863, + "step": 11333 + }, + { + "epoch": 2.09423612153688, + "grad_norm": 0.07361941784620285, + "learning_rate": 4.414063072266107e-06, + "loss": 0.3733961880207062, + "step": 11334 + }, + { + "epoch": 2.094420898245776, + "grad_norm": 0.07856351882219315, + "learning_rate": 4.412408518007998e-06, + "loss": 0.44502511620521545, + "step": 11335 + }, + { + "epoch": 2.0946056749546718, + "grad_norm": 0.09728807955980301, + "learning_rate": 4.410754186118543e-06, + "loss": 0.5463005304336548, + "step": 11336 + }, + { + "epoch": 2.0947904516635676, + "grad_norm": 0.08082041144371033, + "learning_rate": 4.409100076663587e-06, + "loss": 0.44808241724967957, + "step": 11337 + }, + { + "epoch": 2.094975228372464, + "grad_norm": 0.0760640949010849, + "learning_rate": 4.407446189708955e-06, + "loss": 0.31200912594795227, + "step": 11338 + }, + { + "epoch": 2.0951600050813597, + "grad_norm": 0.10331191122531891, + "learning_rate": 4.405792525320469e-06, + "loss": 0.565214991569519, + "step": 11339 + }, + { + "epoch": 2.0953447817902555, + "grad_norm": 0.07570210099220276, + "learning_rate": 4.404139083563937e-06, + "loss": 0.4376543164253235, + "step": 11340 + }, + { + "epoch": 2.0955295584991513, + "grad_norm": 0.07459602504968643, + "learning_rate": 4.402485864505167e-06, + "loss": 0.3753867447376251, + "step": 11341 + }, + { + "epoch": 2.095714335208047, + "grad_norm": 0.09447022527456284, + "learning_rate": 4.4008328682099436e-06, + "loss": 0.4516494572162628, + "step": 11342 + }, + { + "epoch": 2.095899111916943, + "grad_norm": 0.07076124846935272, + "learning_rate": 4.399180094744053e-06, + "loss": 0.45155102014541626, + "step": 11343 + }, + { + "epoch": 2.096083888625839, + "grad_norm": 0.08209472894668579, + "learning_rate": 4.397527544173273e-06, + "loss": 0.43991395831108093, + "step": 11344 + }, + { + "epoch": 2.0962686653347347, + "grad_norm": 0.10229241102933884, + "learning_rate": 4.39587521656337e-06, + "loss": 0.5501560568809509, + "step": 11345 + }, + { + "epoch": 2.0964534420436305, + "grad_norm": 0.08802369236946106, + "learning_rate": 4.394223111980099e-06, + "loss": 0.4686582088470459, + "step": 11346 + }, + { + "epoch": 2.0966382187525263, + "grad_norm": 0.08246442675590515, + "learning_rate": 4.392571230489214e-06, + "loss": 0.4176312983036041, + "step": 11347 + }, + { + "epoch": 2.096822995461422, + "grad_norm": 0.07992678135633469, + "learning_rate": 4.390919572156447e-06, + "loss": 0.4794798791408539, + "step": 11348 + }, + { + "epoch": 2.097007772170318, + "grad_norm": 0.09510378539562225, + "learning_rate": 4.389268137047535e-06, + "loss": 0.6322532892227173, + "step": 11349 + }, + { + "epoch": 2.097192548879214, + "grad_norm": 0.10204022377729416, + "learning_rate": 4.387616925228195e-06, + "loss": 0.5359669923782349, + "step": 11350 + }, + { + "epoch": 2.0973773255881096, + "grad_norm": 0.07107967138290405, + "learning_rate": 4.385965936764138e-06, + "loss": 0.34690505266189575, + "step": 11351 + }, + { + "epoch": 2.0975621022970055, + "grad_norm": 0.07835035026073456, + "learning_rate": 4.384315171721081e-06, + "loss": 0.4288128614425659, + "step": 11352 + }, + { + "epoch": 2.0977468790059013, + "grad_norm": 0.08641502261161804, + "learning_rate": 4.382664630164707e-06, + "loss": 0.4066121578216553, + "step": 11353 + }, + { + "epoch": 2.097931655714797, + "grad_norm": 0.11038441210985184, + "learning_rate": 4.381014312160706e-06, + "loss": 0.6246651411056519, + "step": 11354 + }, + { + "epoch": 2.098116432423693, + "grad_norm": 0.07031238079071045, + "learning_rate": 4.37936421777476e-06, + "loss": 0.3774273991584778, + "step": 11355 + }, + { + "epoch": 2.0983012091325888, + "grad_norm": 0.09349191188812256, + "learning_rate": 4.377714347072529e-06, + "loss": 0.4103989005088806, + "step": 11356 + }, + { + "epoch": 2.0984859858414846, + "grad_norm": 0.07701905071735382, + "learning_rate": 4.376064700119678e-06, + "loss": 0.47127047181129456, + "step": 11357 + }, + { + "epoch": 2.0986707625503804, + "grad_norm": 0.09151667356491089, + "learning_rate": 4.374415276981856e-06, + "loss": 0.5917862057685852, + "step": 11358 + }, + { + "epoch": 2.0988555392592763, + "grad_norm": 0.09142620861530304, + "learning_rate": 4.372766077724706e-06, + "loss": 0.5134718418121338, + "step": 11359 + }, + { + "epoch": 2.099040315968172, + "grad_norm": 0.0978093072772026, + "learning_rate": 4.371117102413861e-06, + "loss": 0.4793660044670105, + "step": 11360 + }, + { + "epoch": 2.099225092677068, + "grad_norm": 0.07756274193525314, + "learning_rate": 4.369468351114949e-06, + "loss": 0.4334990382194519, + "step": 11361 + }, + { + "epoch": 2.0994098693859637, + "grad_norm": 0.0866687074303627, + "learning_rate": 4.367819823893575e-06, + "loss": 0.6182705163955688, + "step": 11362 + }, + { + "epoch": 2.0995946460948596, + "grad_norm": 0.0961247980594635, + "learning_rate": 4.366171520815353e-06, + "loss": 0.5028599500656128, + "step": 11363 + }, + { + "epoch": 2.0997794228037554, + "grad_norm": 0.08079350739717484, + "learning_rate": 4.364523441945878e-06, + "loss": 0.41719940304756165, + "step": 11364 + }, + { + "epoch": 2.0999641995126512, + "grad_norm": 0.074763722717762, + "learning_rate": 4.36287558735074e-06, + "loss": 0.40286093950271606, + "step": 11365 + }, + { + "epoch": 2.100148976221547, + "grad_norm": 0.08648432046175003, + "learning_rate": 4.361227957095519e-06, + "loss": 0.5090119242668152, + "step": 11366 + }, + { + "epoch": 2.1003337529304433, + "grad_norm": 0.07295078039169312, + "learning_rate": 4.359580551245782e-06, + "loss": 0.35367390513420105, + "step": 11367 + }, + { + "epoch": 2.100518529639339, + "grad_norm": 0.10807738453149796, + "learning_rate": 4.357933369867092e-06, + "loss": 0.5633283257484436, + "step": 11368 + }, + { + "epoch": 2.100703306348235, + "grad_norm": 0.08042987436056137, + "learning_rate": 4.356286413025006e-06, + "loss": 0.43587690591812134, + "step": 11369 + }, + { + "epoch": 2.100888083057131, + "grad_norm": 0.07895466685295105, + "learning_rate": 4.354639680785059e-06, + "loss": 0.46001380681991577, + "step": 11370 + }, + { + "epoch": 2.1010728597660266, + "grad_norm": 0.08027620613574982, + "learning_rate": 4.3529931732127884e-06, + "loss": 0.41186389327049255, + "step": 11371 + }, + { + "epoch": 2.1012576364749225, + "grad_norm": 0.06099643185734749, + "learning_rate": 4.3513468903737285e-06, + "loss": 0.2797749936580658, + "step": 11372 + }, + { + "epoch": 2.1014424131838183, + "grad_norm": 0.08282571285963058, + "learning_rate": 4.349700832333387e-06, + "loss": 0.46589988470077515, + "step": 11373 + }, + { + "epoch": 2.101627189892714, + "grad_norm": 0.08938560634851456, + "learning_rate": 4.348054999157274e-06, + "loss": 0.37464770674705505, + "step": 11374 + }, + { + "epoch": 2.10181196660161, + "grad_norm": 0.06825102865695953, + "learning_rate": 4.346409390910894e-06, + "loss": 0.2708218991756439, + "step": 11375 + }, + { + "epoch": 2.101996743310506, + "grad_norm": 0.06989553570747375, + "learning_rate": 4.3447640076597284e-06, + "loss": 0.45829659700393677, + "step": 11376 + }, + { + "epoch": 2.1021815200194016, + "grad_norm": 0.07006486505270004, + "learning_rate": 4.343118849469262e-06, + "loss": 0.3486316204071045, + "step": 11377 + }, + { + "epoch": 2.1023662967282974, + "grad_norm": 0.08205582946538925, + "learning_rate": 4.341473916404968e-06, + "loss": 0.4590587615966797, + "step": 11378 + }, + { + "epoch": 2.1025510734371933, + "grad_norm": 0.08832045644521713, + "learning_rate": 4.339829208532309e-06, + "loss": 0.4677901566028595, + "step": 11379 + }, + { + "epoch": 2.102735850146089, + "grad_norm": 0.10020288825035095, + "learning_rate": 4.3381847259167385e-06, + "loss": 0.6569520831108093, + "step": 11380 + }, + { + "epoch": 2.102920626854985, + "grad_norm": 0.1064857766032219, + "learning_rate": 4.336540468623706e-06, + "loss": 0.5721414685249329, + "step": 11381 + }, + { + "epoch": 2.1031054035638808, + "grad_norm": 0.08472268283367157, + "learning_rate": 4.3348964367186405e-06, + "loss": 0.4144759774208069, + "step": 11382 + }, + { + "epoch": 2.1032901802727766, + "grad_norm": 0.08525696396827698, + "learning_rate": 4.333252630266973e-06, + "loss": 0.4409826397895813, + "step": 11383 + }, + { + "epoch": 2.1034749569816724, + "grad_norm": 0.06475947797298431, + "learning_rate": 4.331609049334123e-06, + "loss": 0.22162888944149017, + "step": 11384 + }, + { + "epoch": 2.1036597336905682, + "grad_norm": 0.07369149476289749, + "learning_rate": 4.3299656939854974e-06, + "loss": 0.3384570777416229, + "step": 11385 + }, + { + "epoch": 2.103844510399464, + "grad_norm": 0.07061440497636795, + "learning_rate": 4.328322564286501e-06, + "loss": 0.386101633310318, + "step": 11386 + }, + { + "epoch": 2.10402928710836, + "grad_norm": 0.0843283012509346, + "learning_rate": 4.3266796603025194e-06, + "loss": 0.4622347354888916, + "step": 11387 + }, + { + "epoch": 2.1042140638172557, + "grad_norm": 0.0684337466955185, + "learning_rate": 4.325036982098938e-06, + "loss": 0.40077027678489685, + "step": 11388 + }, + { + "epoch": 2.1043988405261516, + "grad_norm": 0.09900067746639252, + "learning_rate": 4.323394529741134e-06, + "loss": 0.5515775084495544, + "step": 11389 + }, + { + "epoch": 2.1045836172350474, + "grad_norm": 0.09377042949199677, + "learning_rate": 4.321752303294463e-06, + "loss": 0.43071335554122925, + "step": 11390 + }, + { + "epoch": 2.104768393943943, + "grad_norm": 0.09065300226211548, + "learning_rate": 4.320110302824283e-06, + "loss": 0.3990553319454193, + "step": 11391 + }, + { + "epoch": 2.104953170652839, + "grad_norm": 0.09074597805738449, + "learning_rate": 4.318468528395949e-06, + "loss": 0.48185208439826965, + "step": 11392 + }, + { + "epoch": 2.105137947361735, + "grad_norm": 0.07206054031848907, + "learning_rate": 4.3168269800747896e-06, + "loss": 0.4884740710258484, + "step": 11393 + }, + { + "epoch": 2.1053227240706307, + "grad_norm": 0.08428829163312912, + "learning_rate": 4.315185657926135e-06, + "loss": 0.40511807799339294, + "step": 11394 + }, + { + "epoch": 2.1055075007795265, + "grad_norm": 0.10548090189695358, + "learning_rate": 4.31354456201531e-06, + "loss": 0.6051211357116699, + "step": 11395 + }, + { + "epoch": 2.105692277488423, + "grad_norm": 0.0688234344124794, + "learning_rate": 4.311903692407617e-06, + "loss": 0.3551594614982605, + "step": 11396 + }, + { + "epoch": 2.1058770541973186, + "grad_norm": 0.09894660115242004, + "learning_rate": 4.3102630491683615e-06, + "loss": 0.49841412901878357, + "step": 11397 + }, + { + "epoch": 2.1060618309062145, + "grad_norm": 0.08894941210746765, + "learning_rate": 4.308622632362836e-06, + "loss": 0.5526653528213501, + "step": 11398 + }, + { + "epoch": 2.1062466076151103, + "grad_norm": 0.08873556554317474, + "learning_rate": 4.3069824420563235e-06, + "loss": 0.4127436876296997, + "step": 11399 + }, + { + "epoch": 2.106431384324006, + "grad_norm": 0.07422003149986267, + "learning_rate": 4.305342478314102e-06, + "loss": 0.5035715699195862, + "step": 11400 + }, + { + "epoch": 2.106616161032902, + "grad_norm": 0.07391268759965897, + "learning_rate": 4.303702741201431e-06, + "loss": 0.44224563241004944, + "step": 11401 + }, + { + "epoch": 2.1068009377417978, + "grad_norm": 0.0754476934671402, + "learning_rate": 4.302063230783568e-06, + "loss": 0.421756386756897, + "step": 11402 + }, + { + "epoch": 2.1069857144506936, + "grad_norm": 0.07438241690397263, + "learning_rate": 4.3004239471257625e-06, + "loss": 0.4200321137905121, + "step": 11403 + }, + { + "epoch": 2.1071704911595894, + "grad_norm": 0.0940864086151123, + "learning_rate": 4.2987848902932516e-06, + "loss": 0.4505451023578644, + "step": 11404 + }, + { + "epoch": 2.1073552678684853, + "grad_norm": 0.0756436213850975, + "learning_rate": 4.297146060351266e-06, + "loss": 0.2705419361591339, + "step": 11405 + }, + { + "epoch": 2.107540044577381, + "grad_norm": 0.10930308699607849, + "learning_rate": 4.295507457365029e-06, + "loss": 0.4992304742336273, + "step": 11406 + }, + { + "epoch": 2.107724821286277, + "grad_norm": 0.0841393992304802, + "learning_rate": 4.293869081399744e-06, + "loss": 0.4608651101589203, + "step": 11407 + }, + { + "epoch": 2.1079095979951727, + "grad_norm": 0.07042815536260605, + "learning_rate": 4.292230932520618e-06, + "loss": 0.4713101089000702, + "step": 11408 + }, + { + "epoch": 2.1080943747040686, + "grad_norm": 0.08705121278762817, + "learning_rate": 4.2905930107928465e-06, + "loss": 0.4724443554878235, + "step": 11409 + }, + { + "epoch": 2.1082791514129644, + "grad_norm": 0.06809006631374359, + "learning_rate": 4.288955316281608e-06, + "loss": 0.35661178827285767, + "step": 11410 + }, + { + "epoch": 2.1084639281218602, + "grad_norm": 0.06043905392289162, + "learning_rate": 4.287317849052075e-06, + "loss": 0.3042537569999695, + "step": 11411 + }, + { + "epoch": 2.108648704830756, + "grad_norm": 0.09773826599121094, + "learning_rate": 4.285680609169428e-06, + "loss": 0.5207033157348633, + "step": 11412 + }, + { + "epoch": 2.108833481539652, + "grad_norm": 0.08604561537504196, + "learning_rate": 4.28404359669881e-06, + "loss": 0.5575354695320129, + "step": 11413 + }, + { + "epoch": 2.1090182582485477, + "grad_norm": 0.06699854880571365, + "learning_rate": 4.282406811705379e-06, + "loss": 0.40921831130981445, + "step": 11414 + }, + { + "epoch": 2.1092030349574435, + "grad_norm": 0.07288289070129395, + "learning_rate": 4.280770254254264e-06, + "loss": 0.40244776010513306, + "step": 11415 + }, + { + "epoch": 2.1093878116663394, + "grad_norm": 0.052294157445430756, + "learning_rate": 4.2791339244106e-06, + "loss": 0.2464476376771927, + "step": 11416 + }, + { + "epoch": 2.109572588375235, + "grad_norm": 0.074588343501091, + "learning_rate": 4.277497822239507e-06, + "loss": 0.36213380098342896, + "step": 11417 + }, + { + "epoch": 2.109757365084131, + "grad_norm": 0.07086427509784698, + "learning_rate": 4.275861947806098e-06, + "loss": 0.4147190749645233, + "step": 11418 + }, + { + "epoch": 2.109942141793027, + "grad_norm": 0.08113404363393784, + "learning_rate": 4.274226301175475e-06, + "loss": 0.4108676314353943, + "step": 11419 + }, + { + "epoch": 2.1101269185019227, + "grad_norm": 0.0712827816605568, + "learning_rate": 4.272590882412735e-06, + "loss": 0.39405569434165955, + "step": 11420 + }, + { + "epoch": 2.1103116952108185, + "grad_norm": 0.0880788117647171, + "learning_rate": 4.270955691582955e-06, + "loss": 0.47541195154190063, + "step": 11421 + }, + { + "epoch": 2.1104964719197143, + "grad_norm": 0.08298428356647491, + "learning_rate": 4.2693207287512155e-06, + "loss": 0.4658651053905487, + "step": 11422 + }, + { + "epoch": 2.11068124862861, + "grad_norm": 0.08433230221271515, + "learning_rate": 4.267685993982581e-06, + "loss": 0.5950572490692139, + "step": 11423 + }, + { + "epoch": 2.110866025337506, + "grad_norm": 0.07129106670618057, + "learning_rate": 4.266051487342111e-06, + "loss": 0.3497973382472992, + "step": 11424 + }, + { + "epoch": 2.1110508020464023, + "grad_norm": 0.08530896157026291, + "learning_rate": 4.264417208894851e-06, + "loss": 0.4574190378189087, + "step": 11425 + }, + { + "epoch": 2.111235578755298, + "grad_norm": 0.08308255672454834, + "learning_rate": 4.262783158705846e-06, + "loss": 0.40405362844467163, + "step": 11426 + }, + { + "epoch": 2.111420355464194, + "grad_norm": 0.07241486757993698, + "learning_rate": 4.2611493368401194e-06, + "loss": 0.2930608093738556, + "step": 11427 + }, + { + "epoch": 2.1116051321730898, + "grad_norm": 0.09505489468574524, + "learning_rate": 4.259515743362694e-06, + "loss": 0.5223978757858276, + "step": 11428 + }, + { + "epoch": 2.1117899088819856, + "grad_norm": 0.06957757472991943, + "learning_rate": 4.257882378338586e-06, + "loss": 0.38343876600265503, + "step": 11429 + }, + { + "epoch": 2.1119746855908814, + "grad_norm": 0.08739648014307022, + "learning_rate": 4.25624924183279e-06, + "loss": 0.47917744517326355, + "step": 11430 + }, + { + "epoch": 2.1121594622997772, + "grad_norm": 0.08945481479167938, + "learning_rate": 4.254616333910305e-06, + "loss": 0.5491586923599243, + "step": 11431 + }, + { + "epoch": 2.112344239008673, + "grad_norm": 0.08414768427610397, + "learning_rate": 4.252983654636115e-06, + "loss": 0.45746487379074097, + "step": 11432 + }, + { + "epoch": 2.112529015717569, + "grad_norm": 0.08652835339307785, + "learning_rate": 4.2513512040751954e-06, + "loss": 0.506133496761322, + "step": 11433 + }, + { + "epoch": 2.1127137924264647, + "grad_norm": 0.09679882228374481, + "learning_rate": 4.249718982292517e-06, + "loss": 0.5231440663337708, + "step": 11434 + }, + { + "epoch": 2.1128985691353606, + "grad_norm": 0.10607045888900757, + "learning_rate": 4.248086989353027e-06, + "loss": 0.46721115708351135, + "step": 11435 + }, + { + "epoch": 2.1130833458442564, + "grad_norm": 0.09369184076786041, + "learning_rate": 4.2464552253216815e-06, + "loss": 0.4600922465324402, + "step": 11436 + }, + { + "epoch": 2.113268122553152, + "grad_norm": 0.09084629267454147, + "learning_rate": 4.244823690263417e-06, + "loss": 0.4539335072040558, + "step": 11437 + }, + { + "epoch": 2.113452899262048, + "grad_norm": 0.09121471643447876, + "learning_rate": 4.2431923842431635e-06, + "loss": 0.4551478326320648, + "step": 11438 + }, + { + "epoch": 2.113637675970944, + "grad_norm": 0.09681767970323563, + "learning_rate": 4.241561307325842e-06, + "loss": 0.5657509565353394, + "step": 11439 + }, + { + "epoch": 2.1138224526798397, + "grad_norm": 0.07663063704967499, + "learning_rate": 4.239930459576369e-06, + "loss": 0.41186854243278503, + "step": 11440 + }, + { + "epoch": 2.1140072293887355, + "grad_norm": 0.09556585550308228, + "learning_rate": 4.23829984105964e-06, + "loss": 0.5589721202850342, + "step": 11441 + }, + { + "epoch": 2.1141920060976314, + "grad_norm": 0.10073244571685791, + "learning_rate": 4.23666945184055e-06, + "loss": 0.5649580359458923, + "step": 11442 + }, + { + "epoch": 2.114376782806527, + "grad_norm": 0.09522378444671631, + "learning_rate": 4.235039291983984e-06, + "loss": 0.4742183983325958, + "step": 11443 + }, + { + "epoch": 2.114561559515423, + "grad_norm": 0.10242415219545364, + "learning_rate": 4.23340936155482e-06, + "loss": 0.549902617931366, + "step": 11444 + }, + { + "epoch": 2.114746336224319, + "grad_norm": 0.08314938098192215, + "learning_rate": 4.231779660617921e-06, + "loss": 0.478944331407547, + "step": 11445 + }, + { + "epoch": 2.1149311129332147, + "grad_norm": 0.07818952202796936, + "learning_rate": 4.230150189238148e-06, + "loss": 0.4710225760936737, + "step": 11446 + }, + { + "epoch": 2.1151158896421105, + "grad_norm": 0.09127730876207352, + "learning_rate": 4.228520947480343e-06, + "loss": 0.4920573830604553, + "step": 11447 + }, + { + "epoch": 2.1153006663510063, + "grad_norm": 0.07601220905780792, + "learning_rate": 4.226891935409352e-06, + "loss": 0.3525579273700714, + "step": 11448 + }, + { + "epoch": 2.115485443059902, + "grad_norm": 0.10441645234823227, + "learning_rate": 4.225263153089996e-06, + "loss": 0.5868218541145325, + "step": 11449 + }, + { + "epoch": 2.115670219768798, + "grad_norm": 0.09655965119600296, + "learning_rate": 4.223634600587099e-06, + "loss": 0.49087628722190857, + "step": 11450 + }, + { + "epoch": 2.115854996477694, + "grad_norm": 0.09375781565904617, + "learning_rate": 4.222006277965474e-06, + "loss": 0.4645474851131439, + "step": 11451 + }, + { + "epoch": 2.1160397731865896, + "grad_norm": 0.07621949911117554, + "learning_rate": 4.220378185289921e-06, + "loss": 0.34230300784111023, + "step": 11452 + }, + { + "epoch": 2.1162245498954855, + "grad_norm": 0.07253853231668472, + "learning_rate": 4.218750322625235e-06, + "loss": 0.3808154761791229, + "step": 11453 + }, + { + "epoch": 2.1164093266043817, + "grad_norm": 0.08523720502853394, + "learning_rate": 4.217122690036202e-06, + "loss": 0.44787776470184326, + "step": 11454 + }, + { + "epoch": 2.1165941033132776, + "grad_norm": 0.09397739917039871, + "learning_rate": 4.21549528758759e-06, + "loss": 0.46045711636543274, + "step": 11455 + }, + { + "epoch": 2.1167788800221734, + "grad_norm": 0.10516354441642761, + "learning_rate": 4.213868115344168e-06, + "loss": 0.6142834424972534, + "step": 11456 + }, + { + "epoch": 2.1169636567310692, + "grad_norm": 0.07183492183685303, + "learning_rate": 4.212241173370693e-06, + "loss": 0.36418893933296204, + "step": 11457 + }, + { + "epoch": 2.117148433439965, + "grad_norm": 0.10707825422286987, + "learning_rate": 4.2106144617319125e-06, + "loss": 0.5130747556686401, + "step": 11458 + }, + { + "epoch": 2.117333210148861, + "grad_norm": 0.09258676320314407, + "learning_rate": 4.208987980492562e-06, + "loss": 0.5682313442230225, + "step": 11459 + }, + { + "epoch": 2.1175179868577567, + "grad_norm": 0.10493189841508865, + "learning_rate": 4.207361729717378e-06, + "loss": 0.6336565613746643, + "step": 11460 + }, + { + "epoch": 2.1177027635666525, + "grad_norm": 0.09559211134910583, + "learning_rate": 4.205735709471069e-06, + "loss": 0.4078772962093353, + "step": 11461 + }, + { + "epoch": 2.1178875402755484, + "grad_norm": 0.08572104573249817, + "learning_rate": 4.204109919818351e-06, + "loss": 0.48710963129997253, + "step": 11462 + }, + { + "epoch": 2.118072316984444, + "grad_norm": 0.06969450414180756, + "learning_rate": 4.202484360823926e-06, + "loss": 0.3506666421890259, + "step": 11463 + }, + { + "epoch": 2.11825709369334, + "grad_norm": 0.08649852126836777, + "learning_rate": 4.200859032552484e-06, + "loss": 0.38269883394241333, + "step": 11464 + }, + { + "epoch": 2.118441870402236, + "grad_norm": 0.08273562788963318, + "learning_rate": 4.199233935068714e-06, + "loss": 0.4608445167541504, + "step": 11465 + }, + { + "epoch": 2.1186266471111317, + "grad_norm": 0.07988139986991882, + "learning_rate": 4.197609068437281e-06, + "loss": 0.445500910282135, + "step": 11466 + }, + { + "epoch": 2.1188114238200275, + "grad_norm": 0.08333956450223923, + "learning_rate": 4.195984432722855e-06, + "loss": 0.43747177720069885, + "step": 11467 + }, + { + "epoch": 2.1189962005289233, + "grad_norm": 0.07530811429023743, + "learning_rate": 4.194360027990092e-06, + "loss": 0.38888633251190186, + "step": 11468 + }, + { + "epoch": 2.119180977237819, + "grad_norm": 0.08294999599456787, + "learning_rate": 4.192735854303634e-06, + "loss": 0.36828699707984924, + "step": 11469 + }, + { + "epoch": 2.119365753946715, + "grad_norm": 0.08670689165592194, + "learning_rate": 4.19111191172812e-06, + "loss": 0.45033150911331177, + "step": 11470 + }, + { + "epoch": 2.119550530655611, + "grad_norm": 0.10300054401159286, + "learning_rate": 4.189488200328178e-06, + "loss": 0.5954097509384155, + "step": 11471 + }, + { + "epoch": 2.1197353073645067, + "grad_norm": 0.08131957799196243, + "learning_rate": 4.187864720168427e-06, + "loss": 0.45312342047691345, + "step": 11472 + }, + { + "epoch": 2.1199200840734025, + "grad_norm": 0.09454979747533798, + "learning_rate": 4.186241471313476e-06, + "loss": 0.4882694482803345, + "step": 11473 + }, + { + "epoch": 2.1201048607822983, + "grad_norm": 0.09436237812042236, + "learning_rate": 4.184618453827929e-06, + "loss": 0.5106803178787231, + "step": 11474 + }, + { + "epoch": 2.120289637491194, + "grad_norm": 0.08325103670358658, + "learning_rate": 4.18299566777637e-06, + "loss": 0.47879454493522644, + "step": 11475 + }, + { + "epoch": 2.12047441420009, + "grad_norm": 0.10389212518930435, + "learning_rate": 4.181373113223385e-06, + "loss": 0.5199220180511475, + "step": 11476 + }, + { + "epoch": 2.120659190908986, + "grad_norm": 0.08224156498908997, + "learning_rate": 4.179750790233545e-06, + "loss": 0.5292742848396301, + "step": 11477 + }, + { + "epoch": 2.1208439676178816, + "grad_norm": 0.08779798448085785, + "learning_rate": 4.178128698871415e-06, + "loss": 0.49805256724357605, + "step": 11478 + }, + { + "epoch": 2.1210287443267775, + "grad_norm": 0.10667131841182709, + "learning_rate": 4.176506839201553e-06, + "loss": 0.6199412941932678, + "step": 11479 + }, + { + "epoch": 2.1212135210356733, + "grad_norm": 0.08206725120544434, + "learning_rate": 4.1748852112884955e-06, + "loss": 0.4308219850063324, + "step": 11480 + }, + { + "epoch": 2.121398297744569, + "grad_norm": 0.08746568858623505, + "learning_rate": 4.173263815196781e-06, + "loss": 0.4784696698188782, + "step": 11481 + }, + { + "epoch": 2.121583074453465, + "grad_norm": 0.09873547405004501, + "learning_rate": 4.171642650990942e-06, + "loss": 0.48659324645996094, + "step": 11482 + }, + { + "epoch": 2.1217678511623608, + "grad_norm": 0.0774245634675026, + "learning_rate": 4.170021718735482e-06, + "loss": 0.41697925329208374, + "step": 11483 + }, + { + "epoch": 2.1219526278712566, + "grad_norm": 0.07203105092048645, + "learning_rate": 4.168401018494923e-06, + "loss": 0.3487274944782257, + "step": 11484 + }, + { + "epoch": 2.122137404580153, + "grad_norm": 0.07573983818292618, + "learning_rate": 4.166780550333761e-06, + "loss": 0.3803150951862335, + "step": 11485 + }, + { + "epoch": 2.1223221812890487, + "grad_norm": 0.08086445927619934, + "learning_rate": 4.165160314316481e-06, + "loss": 0.42372170090675354, + "step": 11486 + }, + { + "epoch": 2.1225069579979445, + "grad_norm": 0.07548331469297409, + "learning_rate": 4.163540310507566e-06, + "loss": 0.4192907512187958, + "step": 11487 + }, + { + "epoch": 2.1226917347068404, + "grad_norm": 0.07853441685438156, + "learning_rate": 4.161920538971489e-06, + "loss": 0.3423905074596405, + "step": 11488 + }, + { + "epoch": 2.122876511415736, + "grad_norm": 0.0724734291434288, + "learning_rate": 4.160300999772706e-06, + "loss": 0.3461209535598755, + "step": 11489 + }, + { + "epoch": 2.123061288124632, + "grad_norm": 0.09682875126600266, + "learning_rate": 4.158681692975673e-06, + "loss": 0.5491484999656677, + "step": 11490 + }, + { + "epoch": 2.123246064833528, + "grad_norm": 0.08606716990470886, + "learning_rate": 4.1570626186448344e-06, + "loss": 0.4194796681404114, + "step": 11491 + }, + { + "epoch": 2.1234308415424237, + "grad_norm": 0.08166229724884033, + "learning_rate": 4.155443776844624e-06, + "loss": 0.4911487400531769, + "step": 11492 + }, + { + "epoch": 2.1236156182513195, + "grad_norm": 0.08884919434785843, + "learning_rate": 4.1538251676394636e-06, + "loss": 0.4438813030719757, + "step": 11493 + }, + { + "epoch": 2.1238003949602153, + "grad_norm": 0.07676146924495697, + "learning_rate": 4.152206791093777e-06, + "loss": 0.37591931223869324, + "step": 11494 + }, + { + "epoch": 2.123985171669111, + "grad_norm": 0.0794263407588005, + "learning_rate": 4.15058864727196e-06, + "loss": 0.3733331561088562, + "step": 11495 + }, + { + "epoch": 2.124169948378007, + "grad_norm": 0.07943850010633469, + "learning_rate": 4.1489707362384145e-06, + "loss": 0.3863421380519867, + "step": 11496 + }, + { + "epoch": 2.124354725086903, + "grad_norm": 0.0789160281419754, + "learning_rate": 4.147353058057528e-06, + "loss": 0.39705219864845276, + "step": 11497 + }, + { + "epoch": 2.1245395017957986, + "grad_norm": 0.07900829613208771, + "learning_rate": 4.14573561279368e-06, + "loss": 0.3503285348415375, + "step": 11498 + }, + { + "epoch": 2.1247242785046945, + "grad_norm": 0.08270596712827682, + "learning_rate": 4.1441184005112425e-06, + "loss": 0.447653204202652, + "step": 11499 + }, + { + "epoch": 2.1249090552135903, + "grad_norm": 0.06858129799365997, + "learning_rate": 4.142501421274567e-06, + "loss": 0.3982352316379547, + "step": 11500 + }, + { + "epoch": 2.1249090552135903, + "eval_loss": 0.5545409321784973, + "eval_runtime": 158.3669, + "eval_samples_per_second": 115.106, + "eval_steps_per_second": 14.391, + "step": 11500 + }, + { + "epoch": 2.125093831922486, + "grad_norm": 0.10040252655744553, + "learning_rate": 4.140884675148011e-06, + "loss": 0.6206842660903931, + "step": 11501 + }, + { + "epoch": 2.125278608631382, + "grad_norm": 0.1062774583697319, + "learning_rate": 4.139268162195916e-06, + "loss": 0.5820229649543762, + "step": 11502 + }, + { + "epoch": 2.125463385340278, + "grad_norm": 0.06915470212697983, + "learning_rate": 4.137651882482607e-06, + "loss": 0.39213237166404724, + "step": 11503 + }, + { + "epoch": 2.1256481620491736, + "grad_norm": 0.07784624397754669, + "learning_rate": 4.136035836072414e-06, + "loss": 0.37819647789001465, + "step": 11504 + }, + { + "epoch": 2.1258329387580694, + "grad_norm": 0.10554467141628265, + "learning_rate": 4.134420023029654e-06, + "loss": 0.6587731838226318, + "step": 11505 + }, + { + "epoch": 2.1260177154669653, + "grad_norm": 0.09531303495168686, + "learning_rate": 4.132804443418621e-06, + "loss": 0.4118732511997223, + "step": 11506 + }, + { + "epoch": 2.126202492175861, + "grad_norm": 0.07894685864448547, + "learning_rate": 4.131189097303615e-06, + "loss": 0.39554134011268616, + "step": 11507 + }, + { + "epoch": 2.126387268884757, + "grad_norm": 0.08437313884496689, + "learning_rate": 4.129573984748927e-06, + "loss": 0.38657623529434204, + "step": 11508 + }, + { + "epoch": 2.1265720455936528, + "grad_norm": 0.09306003898382187, + "learning_rate": 4.127959105818823e-06, + "loss": 0.4309487044811249, + "step": 11509 + }, + { + "epoch": 2.1267568223025486, + "grad_norm": 0.08010219037532806, + "learning_rate": 4.126344460577573e-06, + "loss": 0.4455183446407318, + "step": 11510 + }, + { + "epoch": 2.1269415990114444, + "grad_norm": 0.07695150375366211, + "learning_rate": 4.124730049089443e-06, + "loss": 0.40218260884284973, + "step": 11511 + }, + { + "epoch": 2.1271263757203402, + "grad_norm": 0.09618864953517914, + "learning_rate": 4.123115871418672e-06, + "loss": 0.4410863518714905, + "step": 11512 + }, + { + "epoch": 2.127311152429236, + "grad_norm": 0.08329358696937561, + "learning_rate": 4.1215019276295074e-06, + "loss": 0.3536233603954315, + "step": 11513 + }, + { + "epoch": 2.1274959291381323, + "grad_norm": 0.07927120476961136, + "learning_rate": 4.119888217786171e-06, + "loss": 0.33560553193092346, + "step": 11514 + }, + { + "epoch": 2.127680705847028, + "grad_norm": 0.08909047394990921, + "learning_rate": 4.1182747419528864e-06, + "loss": 0.41112515330314636, + "step": 11515 + }, + { + "epoch": 2.127865482555924, + "grad_norm": 0.08944455534219742, + "learning_rate": 4.116661500193867e-06, + "loss": 0.44694074988365173, + "step": 11516 + }, + { + "epoch": 2.12805025926482, + "grad_norm": 0.09210187941789627, + "learning_rate": 4.115048492573311e-06, + "loss": 0.520761251449585, + "step": 11517 + }, + { + "epoch": 2.1282350359737157, + "grad_norm": 0.09095914661884308, + "learning_rate": 4.113435719155415e-06, + "loss": 0.5401807427406311, + "step": 11518 + }, + { + "epoch": 2.1284198126826115, + "grad_norm": 0.06786547601222992, + "learning_rate": 4.111823180004364e-06, + "loss": 0.3559070825576782, + "step": 11519 + }, + { + "epoch": 2.1286045893915073, + "grad_norm": 0.08700638264417648, + "learning_rate": 4.110210875184325e-06, + "loss": 0.4307456910610199, + "step": 11520 + }, + { + "epoch": 2.128789366100403, + "grad_norm": 0.09621629863977432, + "learning_rate": 4.108598804759466e-06, + "loss": 0.5875341296195984, + "step": 11521 + }, + { + "epoch": 2.128974142809299, + "grad_norm": 0.09203772991895676, + "learning_rate": 4.106986968793947e-06, + "loss": 0.45047008991241455, + "step": 11522 + }, + { + "epoch": 2.129158919518195, + "grad_norm": 0.08629295974969864, + "learning_rate": 4.105375367351902e-06, + "loss": 0.376220166683197, + "step": 11523 + }, + { + "epoch": 2.1293436962270906, + "grad_norm": 0.07493583858013153, + "learning_rate": 4.10376400049748e-06, + "loss": 0.3935466706752777, + "step": 11524 + }, + { + "epoch": 2.1295284729359865, + "grad_norm": 0.09711001068353653, + "learning_rate": 4.1021528682948064e-06, + "loss": 0.7404162287712097, + "step": 11525 + }, + { + "epoch": 2.1297132496448823, + "grad_norm": 0.07738396525382996, + "learning_rate": 4.100541970807993e-06, + "loss": 0.3708794414997101, + "step": 11526 + }, + { + "epoch": 2.129898026353778, + "grad_norm": 0.06657982617616653, + "learning_rate": 4.098931308101153e-06, + "loss": 0.33644160628318787, + "step": 11527 + }, + { + "epoch": 2.130082803062674, + "grad_norm": 0.07026130706071854, + "learning_rate": 4.097320880238388e-06, + "loss": 0.46239909529685974, + "step": 11528 + }, + { + "epoch": 2.1302675797715698, + "grad_norm": 0.10004347562789917, + "learning_rate": 4.095710687283781e-06, + "loss": 0.5297632217407227, + "step": 11529 + }, + { + "epoch": 2.1304523564804656, + "grad_norm": 0.09007824957370758, + "learning_rate": 4.0941007293014166e-06, + "loss": 0.43050703406333923, + "step": 11530 + }, + { + "epoch": 2.1306371331893614, + "grad_norm": 0.09030665457248688, + "learning_rate": 4.092491006355367e-06, + "loss": 0.4323001801967621, + "step": 11531 + }, + { + "epoch": 2.1308219098982573, + "grad_norm": 0.06192596256732941, + "learning_rate": 4.090881518509692e-06, + "loss": 0.32067015767097473, + "step": 11532 + }, + { + "epoch": 2.131006686607153, + "grad_norm": 0.08256769180297852, + "learning_rate": 4.089272265828449e-06, + "loss": 0.46855875849723816, + "step": 11533 + }, + { + "epoch": 2.131191463316049, + "grad_norm": 0.10042224824428558, + "learning_rate": 4.087663248375674e-06, + "loss": 0.5218668580055237, + "step": 11534 + }, + { + "epoch": 2.1313762400249447, + "grad_norm": 0.08853939175605774, + "learning_rate": 4.086054466215404e-06, + "loss": 0.4142692983150482, + "step": 11535 + }, + { + "epoch": 2.1315610167338406, + "grad_norm": 0.080595463514328, + "learning_rate": 4.084445919411664e-06, + "loss": 0.41706037521362305, + "step": 11536 + }, + { + "epoch": 2.1317457934427364, + "grad_norm": 0.08282291889190674, + "learning_rate": 4.082837608028471e-06, + "loss": 0.42229169607162476, + "step": 11537 + }, + { + "epoch": 2.1319305701516322, + "grad_norm": 0.08266448974609375, + "learning_rate": 4.081229532129826e-06, + "loss": 0.48666149377822876, + "step": 11538 + }, + { + "epoch": 2.132115346860528, + "grad_norm": 0.08011366426944733, + "learning_rate": 4.0796216917797335e-06, + "loss": 0.36033910512924194, + "step": 11539 + }, + { + "epoch": 2.132300123569424, + "grad_norm": 0.11487693339586258, + "learning_rate": 4.07801408704217e-06, + "loss": 0.6611344218254089, + "step": 11540 + }, + { + "epoch": 2.1324849002783197, + "grad_norm": 0.09254970401525497, + "learning_rate": 4.076406717981121e-06, + "loss": 0.4138562083244324, + "step": 11541 + }, + { + "epoch": 2.1326696769872155, + "grad_norm": 0.06875742226839066, + "learning_rate": 4.074799584660552e-06, + "loss": 0.2918391823768616, + "step": 11542 + }, + { + "epoch": 2.132854453696112, + "grad_norm": 0.08584243059158325, + "learning_rate": 4.073192687144418e-06, + "loss": 0.5013090372085571, + "step": 11543 + }, + { + "epoch": 2.1330392304050076, + "grad_norm": 0.09431134909391403, + "learning_rate": 4.071586025496679e-06, + "loss": 0.6037891507148743, + "step": 11544 + }, + { + "epoch": 2.1332240071139035, + "grad_norm": 0.10009553283452988, + "learning_rate": 4.0699795997812644e-06, + "loss": 0.46707063913345337, + "step": 11545 + }, + { + "epoch": 2.1334087838227993, + "grad_norm": 0.07532192021608353, + "learning_rate": 4.0683734100621085e-06, + "loss": 0.3813382387161255, + "step": 11546 + }, + { + "epoch": 2.133593560531695, + "grad_norm": 0.0831272080540657, + "learning_rate": 4.066767456403137e-06, + "loss": 0.49544063210487366, + "step": 11547 + }, + { + "epoch": 2.133778337240591, + "grad_norm": 0.07652248442173004, + "learning_rate": 4.065161738868255e-06, + "loss": 0.4752427041530609, + "step": 11548 + }, + { + "epoch": 2.133963113949487, + "grad_norm": 0.08891411125659943, + "learning_rate": 4.063556257521369e-06, + "loss": 0.4780322015285492, + "step": 11549 + }, + { + "epoch": 2.1341478906583826, + "grad_norm": 0.08017689734697342, + "learning_rate": 4.0619510124263684e-06, + "loss": 0.3103610873222351, + "step": 11550 + }, + { + "epoch": 2.1343326673672784, + "grad_norm": 0.06672009825706482, + "learning_rate": 4.060346003647142e-06, + "loss": 0.29543837904930115, + "step": 11551 + }, + { + "epoch": 2.1345174440761743, + "grad_norm": 0.0958172008395195, + "learning_rate": 4.0587412312475614e-06, + "loss": 0.5142347812652588, + "step": 11552 + }, + { + "epoch": 2.13470222078507, + "grad_norm": 0.07005161046981812, + "learning_rate": 4.057136695291495e-06, + "loss": 0.43864113092422485, + "step": 11553 + }, + { + "epoch": 2.134886997493966, + "grad_norm": 0.09195329993963242, + "learning_rate": 4.055532395842791e-06, + "loss": 0.4625697731971741, + "step": 11554 + }, + { + "epoch": 2.1350717742028618, + "grad_norm": 0.0720549076795578, + "learning_rate": 4.053928332965303e-06, + "loss": 0.35446739196777344, + "step": 11555 + }, + { + "epoch": 2.1352565509117576, + "grad_norm": 0.0694388598203659, + "learning_rate": 4.052324506722861e-06, + "loss": 0.3407813012599945, + "step": 11556 + }, + { + "epoch": 2.1354413276206534, + "grad_norm": 0.04996746778488159, + "learning_rate": 4.050720917179297e-06, + "loss": 0.23797312378883362, + "step": 11557 + }, + { + "epoch": 2.1356261043295492, + "grad_norm": 0.09368681162595749, + "learning_rate": 4.049117564398428e-06, + "loss": 0.4741019010543823, + "step": 11558 + }, + { + "epoch": 2.135810881038445, + "grad_norm": 0.08431044965982437, + "learning_rate": 4.047514448444065e-06, + "loss": 0.44846999645233154, + "step": 11559 + }, + { + "epoch": 2.135995657747341, + "grad_norm": 0.10597667098045349, + "learning_rate": 4.0459115693800015e-06, + "loss": 0.5060697197914124, + "step": 11560 + }, + { + "epoch": 2.1361804344562367, + "grad_norm": 0.08452227711677551, + "learning_rate": 4.044308927270032e-06, + "loss": 0.47105276584625244, + "step": 11561 + }, + { + "epoch": 2.1363652111651326, + "grad_norm": 0.08384208381175995, + "learning_rate": 4.042706522177932e-06, + "loss": 0.5375339984893799, + "step": 11562 + }, + { + "epoch": 2.1365499878740284, + "grad_norm": 0.10141726583242416, + "learning_rate": 4.04110435416747e-06, + "loss": 0.690080463886261, + "step": 11563 + }, + { + "epoch": 2.136734764582924, + "grad_norm": 0.07506753504276276, + "learning_rate": 4.039502423302418e-06, + "loss": 0.3889322876930237, + "step": 11564 + }, + { + "epoch": 2.13691954129182, + "grad_norm": 0.08942927420139313, + "learning_rate": 4.0379007296465186e-06, + "loss": 0.48192328214645386, + "step": 11565 + }, + { + "epoch": 2.137104318000716, + "grad_norm": 0.08105836808681488, + "learning_rate": 4.036299273263518e-06, + "loss": 0.40929368138313293, + "step": 11566 + }, + { + "epoch": 2.1372890947096117, + "grad_norm": 0.10397261381149292, + "learning_rate": 4.034698054217151e-06, + "loss": 0.5696961283683777, + "step": 11567 + }, + { + "epoch": 2.1374738714185075, + "grad_norm": 0.10482069104909897, + "learning_rate": 4.033097072571135e-06, + "loss": 0.5859604477882385, + "step": 11568 + }, + { + "epoch": 2.1376586481274034, + "grad_norm": 0.08016854524612427, + "learning_rate": 4.031496328389188e-06, + "loss": 0.42372801899909973, + "step": 11569 + }, + { + "epoch": 2.137843424836299, + "grad_norm": 0.0814351961016655, + "learning_rate": 4.029895821735013e-06, + "loss": 0.408677339553833, + "step": 11570 + }, + { + "epoch": 2.138028201545195, + "grad_norm": 0.08894795179367065, + "learning_rate": 4.028295552672307e-06, + "loss": 0.4432206153869629, + "step": 11571 + }, + { + "epoch": 2.1382129782540913, + "grad_norm": 0.09582308679819107, + "learning_rate": 4.0266955212647555e-06, + "loss": 0.5286830067634583, + "step": 11572 + }, + { + "epoch": 2.138397754962987, + "grad_norm": 0.07396747916936874, + "learning_rate": 4.025095727576036e-06, + "loss": 0.3959920406341553, + "step": 11573 + }, + { + "epoch": 2.138582531671883, + "grad_norm": 0.08140245079994202, + "learning_rate": 4.023496171669811e-06, + "loss": 0.3716605603694916, + "step": 11574 + }, + { + "epoch": 2.1387673083807788, + "grad_norm": 0.09383906424045563, + "learning_rate": 4.02189685360974e-06, + "loss": 0.5051342844963074, + "step": 11575 + }, + { + "epoch": 2.1389520850896746, + "grad_norm": 0.11029773950576782, + "learning_rate": 4.020297773459472e-06, + "loss": 0.6979007720947266, + "step": 11576 + }, + { + "epoch": 2.1391368617985704, + "grad_norm": 0.10944915562868118, + "learning_rate": 4.0186989312826445e-06, + "loss": 0.5362704396247864, + "step": 11577 + }, + { + "epoch": 2.1393216385074663, + "grad_norm": 0.07990576326847076, + "learning_rate": 4.017100327142889e-06, + "loss": 0.4585151970386505, + "step": 11578 + }, + { + "epoch": 2.139506415216362, + "grad_norm": 0.07353705167770386, + "learning_rate": 4.015501961103818e-06, + "loss": 0.3769177496433258, + "step": 11579 + }, + { + "epoch": 2.139691191925258, + "grad_norm": 0.09751986712217331, + "learning_rate": 4.013903833229048e-06, + "loss": 0.48764896392822266, + "step": 11580 + }, + { + "epoch": 2.1398759686341537, + "grad_norm": 0.10674029588699341, + "learning_rate": 4.01230594358218e-06, + "loss": 0.48834967613220215, + "step": 11581 + }, + { + "epoch": 2.1400607453430496, + "grad_norm": 0.09530480206012726, + "learning_rate": 4.010708292226798e-06, + "loss": 0.5856447815895081, + "step": 11582 + }, + { + "epoch": 2.1402455220519454, + "grad_norm": 0.07518292963504791, + "learning_rate": 4.009110879226485e-06, + "loss": 0.343195378780365, + "step": 11583 + }, + { + "epoch": 2.1404302987608412, + "grad_norm": 0.10637890547513962, + "learning_rate": 4.007513704644823e-06, + "loss": 0.48493310809135437, + "step": 11584 + }, + { + "epoch": 2.140615075469737, + "grad_norm": 0.10789762437343597, + "learning_rate": 4.005916768545365e-06, + "loss": 0.5495380759239197, + "step": 11585 + }, + { + "epoch": 2.140799852178633, + "grad_norm": 0.08924262225627899, + "learning_rate": 4.004320070991666e-06, + "loss": 0.5379125475883484, + "step": 11586 + }, + { + "epoch": 2.1409846288875287, + "grad_norm": 0.06434209644794464, + "learning_rate": 4.002723612047272e-06, + "loss": 0.3162035048007965, + "step": 11587 + }, + { + "epoch": 2.1411694055964245, + "grad_norm": 0.08127405494451523, + "learning_rate": 4.001127391775713e-06, + "loss": 0.393638551235199, + "step": 11588 + }, + { + "epoch": 2.1413541823053204, + "grad_norm": 0.09264098852872849, + "learning_rate": 3.999531410240516e-06, + "loss": 0.3852292001247406, + "step": 11589 + }, + { + "epoch": 2.141538959014216, + "grad_norm": 0.08991791307926178, + "learning_rate": 3.997935667505195e-06, + "loss": 0.4149450957775116, + "step": 11590 + }, + { + "epoch": 2.141723735723112, + "grad_norm": 0.08795911073684692, + "learning_rate": 3.996340163633257e-06, + "loss": 0.4311430752277374, + "step": 11591 + }, + { + "epoch": 2.141908512432008, + "grad_norm": 0.08005280047655106, + "learning_rate": 3.994744898688197e-06, + "loss": 0.40000590682029724, + "step": 11592 + }, + { + "epoch": 2.1420932891409037, + "grad_norm": 0.0808529183268547, + "learning_rate": 3.993149872733506e-06, + "loss": 0.47213149070739746, + "step": 11593 + }, + { + "epoch": 2.1422780658497995, + "grad_norm": 0.09862861782312393, + "learning_rate": 3.991555085832653e-06, + "loss": 0.6171346306800842, + "step": 11594 + }, + { + "epoch": 2.1424628425586953, + "grad_norm": 0.11860477179288864, + "learning_rate": 3.98996053804911e-06, + "loss": 0.6687604188919067, + "step": 11595 + }, + { + "epoch": 2.142647619267591, + "grad_norm": 0.09474646300077438, + "learning_rate": 3.988366229446335e-06, + "loss": 0.45556727051734924, + "step": 11596 + }, + { + "epoch": 2.142832395976487, + "grad_norm": 0.09376123547554016, + "learning_rate": 3.986772160087775e-06, + "loss": 0.5133540034294128, + "step": 11597 + }, + { + "epoch": 2.143017172685383, + "grad_norm": 0.09000354260206223, + "learning_rate": 3.985178330036874e-06, + "loss": 0.48997581005096436, + "step": 11598 + }, + { + "epoch": 2.1432019493942787, + "grad_norm": 0.07603364437818527, + "learning_rate": 3.983584739357055e-06, + "loss": 0.324707955121994, + "step": 11599 + }, + { + "epoch": 2.1433867261031745, + "grad_norm": 0.08514192700386047, + "learning_rate": 3.981991388111739e-06, + "loss": 0.4303866922855377, + "step": 11600 + }, + { + "epoch": 2.1435715028120708, + "grad_norm": 0.11421073228120804, + "learning_rate": 3.980398276364342e-06, + "loss": 0.5772789716720581, + "step": 11601 + }, + { + "epoch": 2.1437562795209666, + "grad_norm": 0.08341315388679504, + "learning_rate": 3.9788054041782535e-06, + "loss": 0.4874880611896515, + "step": 11602 + }, + { + "epoch": 2.1439410562298624, + "grad_norm": 0.07558920979499817, + "learning_rate": 3.977212771616875e-06, + "loss": 0.3493494689464569, + "step": 11603 + }, + { + "epoch": 2.1441258329387582, + "grad_norm": 0.06613948196172714, + "learning_rate": 3.97562037874359e-06, + "loss": 0.3161487281322479, + "step": 11604 + }, + { + "epoch": 2.144310609647654, + "grad_norm": 0.09245660156011581, + "learning_rate": 3.974028225621762e-06, + "loss": 0.45485496520996094, + "step": 11605 + }, + { + "epoch": 2.14449538635655, + "grad_norm": 0.06644047796726227, + "learning_rate": 3.972436312314758e-06, + "loss": 0.3771281838417053, + "step": 11606 + }, + { + "epoch": 2.1446801630654457, + "grad_norm": 0.06990991532802582, + "learning_rate": 3.9708446388859335e-06, + "loss": 0.3933688998222351, + "step": 11607 + }, + { + "epoch": 2.1448649397743416, + "grad_norm": 0.06094184145331383, + "learning_rate": 3.969253205398626e-06, + "loss": 0.26781320571899414, + "step": 11608 + }, + { + "epoch": 2.1450497164832374, + "grad_norm": 0.10649731755256653, + "learning_rate": 3.967662011916174e-06, + "loss": 0.5235942006111145, + "step": 11609 + }, + { + "epoch": 2.145234493192133, + "grad_norm": 0.09553772211074829, + "learning_rate": 3.966071058501902e-06, + "loss": 0.4927021265029907, + "step": 11610 + }, + { + "epoch": 2.145419269901029, + "grad_norm": 0.07307995110750198, + "learning_rate": 3.964480345219122e-06, + "loss": 0.39524558186531067, + "step": 11611 + }, + { + "epoch": 2.145604046609925, + "grad_norm": 0.07455287873744965, + "learning_rate": 3.962889872131147e-06, + "loss": 0.35658198595046997, + "step": 11612 + }, + { + "epoch": 2.1457888233188207, + "grad_norm": 0.07614074647426605, + "learning_rate": 3.961299639301262e-06, + "loss": 0.33883386850357056, + "step": 11613 + }, + { + "epoch": 2.1459736000277165, + "grad_norm": 0.08309691399335861, + "learning_rate": 3.959709646792761e-06, + "loss": 0.39179253578186035, + "step": 11614 + }, + { + "epoch": 2.1461583767366124, + "grad_norm": 0.12092384696006775, + "learning_rate": 3.958119894668917e-06, + "loss": 0.6273046135902405, + "step": 11615 + }, + { + "epoch": 2.146343153445508, + "grad_norm": 0.10526594519615173, + "learning_rate": 3.956530382992999e-06, + "loss": 0.5224920511245728, + "step": 11616 + }, + { + "epoch": 2.146527930154404, + "grad_norm": 0.07732649892568588, + "learning_rate": 3.954941111828263e-06, + "loss": 0.38463592529296875, + "step": 11617 + }, + { + "epoch": 2.1467127068633, + "grad_norm": 0.07383439689874649, + "learning_rate": 3.953352081237963e-06, + "loss": 0.4427803158760071, + "step": 11618 + }, + { + "epoch": 2.1468974835721957, + "grad_norm": 0.06280277669429779, + "learning_rate": 3.951763291285329e-06, + "loss": 0.26544296741485596, + "step": 11619 + }, + { + "epoch": 2.1470822602810915, + "grad_norm": 0.05331779643893242, + "learning_rate": 3.950174742033593e-06, + "loss": 0.2512390911579132, + "step": 11620 + }, + { + "epoch": 2.1472670369899873, + "grad_norm": 0.06967869400978088, + "learning_rate": 3.948586433545979e-06, + "loss": 0.39704135060310364, + "step": 11621 + }, + { + "epoch": 2.147451813698883, + "grad_norm": 0.07695640623569489, + "learning_rate": 3.946998365885685e-06, + "loss": 0.46992355585098267, + "step": 11622 + }, + { + "epoch": 2.147636590407779, + "grad_norm": 0.0801285058259964, + "learning_rate": 3.945410539115921e-06, + "loss": 0.4090726971626282, + "step": 11623 + }, + { + "epoch": 2.147821367116675, + "grad_norm": 0.07338245213031769, + "learning_rate": 3.943822953299881e-06, + "loss": 0.39517590403556824, + "step": 11624 + }, + { + "epoch": 2.1480061438255706, + "grad_norm": 0.09227310866117477, + "learning_rate": 3.942235608500735e-06, + "loss": 0.5156955122947693, + "step": 11625 + }, + { + "epoch": 2.1481909205344665, + "grad_norm": 0.08540918678045273, + "learning_rate": 3.940648504781664e-06, + "loss": 0.3832940459251404, + "step": 11626 + }, + { + "epoch": 2.1483756972433623, + "grad_norm": 0.09108877927064896, + "learning_rate": 3.93906164220582e-06, + "loss": 0.44129088521003723, + "step": 11627 + }, + { + "epoch": 2.148560473952258, + "grad_norm": 0.0894840881228447, + "learning_rate": 3.9374750208363625e-06, + "loss": 0.49256956577301025, + "step": 11628 + }, + { + "epoch": 2.148745250661154, + "grad_norm": 0.07960314303636551, + "learning_rate": 3.93588864073643e-06, + "loss": 0.4049118161201477, + "step": 11629 + }, + { + "epoch": 2.1489300273700502, + "grad_norm": 0.06465893238782883, + "learning_rate": 3.934302501969159e-06, + "loss": 0.28798285126686096, + "step": 11630 + }, + { + "epoch": 2.1491148040789456, + "grad_norm": 0.09121961891651154, + "learning_rate": 3.932716604597671e-06, + "loss": 0.47333887219429016, + "step": 11631 + }, + { + "epoch": 2.149299580787842, + "grad_norm": 0.0883626863360405, + "learning_rate": 3.931130948685083e-06, + "loss": 0.5514802932739258, + "step": 11632 + }, + { + "epoch": 2.1494843574967377, + "grad_norm": 0.08931796997785568, + "learning_rate": 3.9295455342944935e-06, + "loss": 0.40167751908302307, + "step": 11633 + }, + { + "epoch": 2.1496691342056335, + "grad_norm": 0.06782089173793793, + "learning_rate": 3.927960361489e-06, + "loss": 0.39248910546302795, + "step": 11634 + }, + { + "epoch": 2.1498539109145294, + "grad_norm": 0.06785066425800323, + "learning_rate": 3.926375430331685e-06, + "loss": 0.3341401517391205, + "step": 11635 + }, + { + "epoch": 2.150038687623425, + "grad_norm": 0.0803646445274353, + "learning_rate": 3.924790740885628e-06, + "loss": 0.43350958824157715, + "step": 11636 + }, + { + "epoch": 2.150223464332321, + "grad_norm": 0.08781901746988297, + "learning_rate": 3.923206293213892e-06, + "loss": 0.38169121742248535, + "step": 11637 + }, + { + "epoch": 2.150408241041217, + "grad_norm": 0.07651541382074356, + "learning_rate": 3.921622087379536e-06, + "loss": 0.4201239347457886, + "step": 11638 + }, + { + "epoch": 2.1505930177501127, + "grad_norm": 0.0754622220993042, + "learning_rate": 3.920038123445602e-06, + "loss": 0.3791052997112274, + "step": 11639 + }, + { + "epoch": 2.1507777944590085, + "grad_norm": 0.09134471416473389, + "learning_rate": 3.9184544014751295e-06, + "loss": 0.5088651180267334, + "step": 11640 + }, + { + "epoch": 2.1509625711679043, + "grad_norm": 0.08660312741994858, + "learning_rate": 3.916870921531148e-06, + "loss": 0.5049603581428528, + "step": 11641 + }, + { + "epoch": 2.1511473478768, + "grad_norm": 0.07500050216913223, + "learning_rate": 3.915287683676664e-06, + "loss": 0.32820960879325867, + "step": 11642 + }, + { + "epoch": 2.151332124585696, + "grad_norm": 0.09916878491640091, + "learning_rate": 3.913704687974701e-06, + "loss": 0.5198667049407959, + "step": 11643 + }, + { + "epoch": 2.151516901294592, + "grad_norm": 0.08865980803966522, + "learning_rate": 3.912121934488246e-06, + "loss": 0.5391780734062195, + "step": 11644 + }, + { + "epoch": 2.1517016780034877, + "grad_norm": 0.07601450383663177, + "learning_rate": 3.910539423280293e-06, + "loss": 0.470345675945282, + "step": 11645 + }, + { + "epoch": 2.1518864547123835, + "grad_norm": 0.08250866830348969, + "learning_rate": 3.908957154413823e-06, + "loss": 0.38118302822113037, + "step": 11646 + }, + { + "epoch": 2.1520712314212793, + "grad_norm": 0.09184881299734116, + "learning_rate": 3.907375127951797e-06, + "loss": 0.5007253289222717, + "step": 11647 + }, + { + "epoch": 2.152256008130175, + "grad_norm": 0.07770222425460815, + "learning_rate": 3.90579334395718e-06, + "loss": 0.39987003803253174, + "step": 11648 + }, + { + "epoch": 2.152440784839071, + "grad_norm": 0.09182540327310562, + "learning_rate": 3.904211802492922e-06, + "loss": 0.48013049364089966, + "step": 11649 + }, + { + "epoch": 2.152625561547967, + "grad_norm": 0.08273793756961823, + "learning_rate": 3.902630503621963e-06, + "loss": 0.5183853507041931, + "step": 11650 + }, + { + "epoch": 2.1528103382568626, + "grad_norm": 0.09289605170488358, + "learning_rate": 3.901049447407234e-06, + "loss": 0.39718329906463623, + "step": 11651 + }, + { + "epoch": 2.1529951149657585, + "grad_norm": 0.06910242140293121, + "learning_rate": 3.899468633911658e-06, + "loss": 0.3946877717971802, + "step": 11652 + }, + { + "epoch": 2.1531798916746543, + "grad_norm": 0.08967945724725723, + "learning_rate": 3.897888063198142e-06, + "loss": 0.4960063695907593, + "step": 11653 + }, + { + "epoch": 2.15336466838355, + "grad_norm": 0.07799212634563446, + "learning_rate": 3.896307735329588e-06, + "loss": 0.4674167037010193, + "step": 11654 + }, + { + "epoch": 2.153549445092446, + "grad_norm": 0.08065201342105865, + "learning_rate": 3.8947276503688925e-06, + "loss": 0.3883388936519623, + "step": 11655 + }, + { + "epoch": 2.1537342218013418, + "grad_norm": 0.055760860443115234, + "learning_rate": 3.893147808378935e-06, + "loss": 0.21677128970623016, + "step": 11656 + }, + { + "epoch": 2.1539189985102376, + "grad_norm": 0.09226616472005844, + "learning_rate": 3.8915682094225885e-06, + "loss": 0.4941445589065552, + "step": 11657 + }, + { + "epoch": 2.1541037752191334, + "grad_norm": 0.07622168213129044, + "learning_rate": 3.88998885356272e-06, + "loss": 0.4406668543815613, + "step": 11658 + }, + { + "epoch": 2.1542885519280297, + "grad_norm": 0.08101237565279007, + "learning_rate": 3.8884097408621754e-06, + "loss": 0.47072938084602356, + "step": 11659 + }, + { + "epoch": 2.154473328636925, + "grad_norm": 0.08489350974559784, + "learning_rate": 3.886830871383806e-06, + "loss": 0.3665739595890045, + "step": 11660 + }, + { + "epoch": 2.1546581053458214, + "grad_norm": 0.12182774394750595, + "learning_rate": 3.8852522451904395e-06, + "loss": 0.5753546357154846, + "step": 11661 + }, + { + "epoch": 2.154842882054717, + "grad_norm": 0.10022709518671036, + "learning_rate": 3.8836738623449e-06, + "loss": 0.5788068771362305, + "step": 11662 + }, + { + "epoch": 2.155027658763613, + "grad_norm": 0.08397048711776733, + "learning_rate": 3.882095722910011e-06, + "loss": 0.43908387422561646, + "step": 11663 + }, + { + "epoch": 2.155212435472509, + "grad_norm": 0.09467579424381256, + "learning_rate": 3.880517826948569e-06, + "loss": 0.4748491048812866, + "step": 11664 + }, + { + "epoch": 2.1553972121814047, + "grad_norm": 0.08129502087831497, + "learning_rate": 3.878940174523371e-06, + "loss": 0.5089216828346252, + "step": 11665 + }, + { + "epoch": 2.1555819888903005, + "grad_norm": 0.08315569162368774, + "learning_rate": 3.877362765697209e-06, + "loss": 0.3821069300174713, + "step": 11666 + }, + { + "epoch": 2.1557667655991963, + "grad_norm": 0.08684141933917999, + "learning_rate": 3.875785600532849e-06, + "loss": 0.3733517527580261, + "step": 11667 + }, + { + "epoch": 2.155951542308092, + "grad_norm": 0.09421560913324356, + "learning_rate": 3.874208679093063e-06, + "loss": 0.5246985554695129, + "step": 11668 + }, + { + "epoch": 2.156136319016988, + "grad_norm": 0.06547679752111435, + "learning_rate": 3.872632001440604e-06, + "loss": 0.33231088519096375, + "step": 11669 + }, + { + "epoch": 2.156321095725884, + "grad_norm": 0.08077463507652283, + "learning_rate": 3.871055567638224e-06, + "loss": 0.4162328839302063, + "step": 11670 + }, + { + "epoch": 2.1565058724347796, + "grad_norm": 0.08313507586717606, + "learning_rate": 3.869479377748655e-06, + "loss": 0.41471588611602783, + "step": 11671 + }, + { + "epoch": 2.1566906491436755, + "grad_norm": 0.06435898691415787, + "learning_rate": 3.867903431834632e-06, + "loss": 0.30664384365081787, + "step": 11672 + }, + { + "epoch": 2.1568754258525713, + "grad_norm": 0.08906294405460358, + "learning_rate": 3.866327729958863e-06, + "loss": 0.4741312563419342, + "step": 11673 + }, + { + "epoch": 2.157060202561467, + "grad_norm": 0.10594379901885986, + "learning_rate": 3.864752272184065e-06, + "loss": 0.7573657631874084, + "step": 11674 + }, + { + "epoch": 2.157244979270363, + "grad_norm": 0.07337291538715363, + "learning_rate": 3.863177058572925e-06, + "loss": 0.3911682963371277, + "step": 11675 + }, + { + "epoch": 2.157429755979259, + "grad_norm": 0.08629732578992844, + "learning_rate": 3.86160208918814e-06, + "loss": 0.44535213708877563, + "step": 11676 + }, + { + "epoch": 2.1576145326881546, + "grad_norm": 0.06811978667974472, + "learning_rate": 3.860027364092393e-06, + "loss": 0.37970200181007385, + "step": 11677 + }, + { + "epoch": 2.1577993093970504, + "grad_norm": 0.09466637670993805, + "learning_rate": 3.858452883348342e-06, + "loss": 0.42708879709243774, + "step": 11678 + }, + { + "epoch": 2.1579840861059463, + "grad_norm": 0.07300035655498505, + "learning_rate": 3.856878647018654e-06, + "loss": 0.46120768785476685, + "step": 11679 + }, + { + "epoch": 2.158168862814842, + "grad_norm": 0.09889834374189377, + "learning_rate": 3.855304655165978e-06, + "loss": 0.5782213807106018, + "step": 11680 + }, + { + "epoch": 2.158353639523738, + "grad_norm": 0.08899401128292084, + "learning_rate": 3.853730907852949e-06, + "loss": 0.4296332001686096, + "step": 11681 + }, + { + "epoch": 2.1585384162326338, + "grad_norm": 0.07711257040500641, + "learning_rate": 3.852157405142199e-06, + "loss": 0.49526482820510864, + "step": 11682 + }, + { + "epoch": 2.1587231929415296, + "grad_norm": 0.08905673772096634, + "learning_rate": 3.850584147096355e-06, + "loss": 0.456241637468338, + "step": 11683 + }, + { + "epoch": 2.1589079696504254, + "grad_norm": 0.10414526611566544, + "learning_rate": 3.849011133778021e-06, + "loss": 0.5894927382469177, + "step": 11684 + }, + { + "epoch": 2.1590927463593212, + "grad_norm": 0.08418159186840057, + "learning_rate": 3.847438365249799e-06, + "loss": 0.40886250138282776, + "step": 11685 + }, + { + "epoch": 2.159277523068217, + "grad_norm": 0.06604897230863571, + "learning_rate": 3.845865841574286e-06, + "loss": 0.3613528907299042, + "step": 11686 + }, + { + "epoch": 2.159462299777113, + "grad_norm": 0.098621666431427, + "learning_rate": 3.8442935628140545e-06, + "loss": 0.5726948380470276, + "step": 11687 + }, + { + "epoch": 2.1596470764860087, + "grad_norm": 0.07698316872119904, + "learning_rate": 3.84272152903168e-06, + "loss": 0.4723285138607025, + "step": 11688 + }, + { + "epoch": 2.1598318531949046, + "grad_norm": 0.0858742967247963, + "learning_rate": 3.841149740289725e-06, + "loss": 0.4319852292537689, + "step": 11689 + }, + { + "epoch": 2.160016629903801, + "grad_norm": 0.08265368640422821, + "learning_rate": 3.839578196650742e-06, + "loss": 0.4376170337200165, + "step": 11690 + }, + { + "epoch": 2.1602014066126967, + "grad_norm": 0.08331847935914993, + "learning_rate": 3.838006898177277e-06, + "loss": 0.5049540400505066, + "step": 11691 + }, + { + "epoch": 2.1603861833215925, + "grad_norm": 0.08013642579317093, + "learning_rate": 3.836435844931855e-06, + "loss": 0.40282782912254333, + "step": 11692 + }, + { + "epoch": 2.1605709600304883, + "grad_norm": 0.08243634551763535, + "learning_rate": 3.834865036977003e-06, + "loss": 0.5801693797111511, + "step": 11693 + }, + { + "epoch": 2.160755736739384, + "grad_norm": 0.08161672204732895, + "learning_rate": 3.833294474375234e-06, + "loss": 0.40056663751602173, + "step": 11694 + }, + { + "epoch": 2.16094051344828, + "grad_norm": 0.08250513672828674, + "learning_rate": 3.831724157189053e-06, + "loss": 0.433946430683136, + "step": 11695 + }, + { + "epoch": 2.161125290157176, + "grad_norm": 0.08036414533853531, + "learning_rate": 3.830154085480952e-06, + "loss": 0.4758602976799011, + "step": 11696 + }, + { + "epoch": 2.1613100668660716, + "grad_norm": 0.07142708450555801, + "learning_rate": 3.828584259313418e-06, + "loss": 0.34024667739868164, + "step": 11697 + }, + { + "epoch": 2.1614948435749675, + "grad_norm": 0.11011051386594772, + "learning_rate": 3.82701467874892e-06, + "loss": 0.5866760611534119, + "step": 11698 + }, + { + "epoch": 2.1616796202838633, + "grad_norm": 0.08226508647203445, + "learning_rate": 3.825445343849925e-06, + "loss": 0.5511084794998169, + "step": 11699 + }, + { + "epoch": 2.161864396992759, + "grad_norm": 0.0740421712398529, + "learning_rate": 3.823876254678891e-06, + "loss": 0.3521818220615387, + "step": 11700 + }, + { + "epoch": 2.162049173701655, + "grad_norm": 0.058426644653081894, + "learning_rate": 3.822307411298256e-06, + "loss": 0.21237623691558838, + "step": 11701 + }, + { + "epoch": 2.1622339504105508, + "grad_norm": 0.08686983585357666, + "learning_rate": 3.820738813770455e-06, + "loss": 0.4280414283275604, + "step": 11702 + }, + { + "epoch": 2.1624187271194466, + "grad_norm": 0.06894486397504807, + "learning_rate": 3.819170462157924e-06, + "loss": 0.35992392897605896, + "step": 11703 + }, + { + "epoch": 2.1626035038283424, + "grad_norm": 0.11185557395219803, + "learning_rate": 3.8176023565230676e-06, + "loss": 0.5863062739372253, + "step": 11704 + }, + { + "epoch": 2.1627882805372383, + "grad_norm": 0.08624433726072311, + "learning_rate": 3.816034496928295e-06, + "loss": 0.44079217314720154, + "step": 11705 + }, + { + "epoch": 2.162973057246134, + "grad_norm": 0.09721717238426208, + "learning_rate": 3.8144668834360067e-06, + "loss": 0.502701461315155, + "step": 11706 + }, + { + "epoch": 2.16315783395503, + "grad_norm": 0.06353076547384262, + "learning_rate": 3.812899516108579e-06, + "loss": 0.32220011949539185, + "step": 11707 + }, + { + "epoch": 2.1633426106639257, + "grad_norm": 0.0796390026807785, + "learning_rate": 3.8113323950083947e-06, + "loss": 0.4178166687488556, + "step": 11708 + }, + { + "epoch": 2.1635273873728216, + "grad_norm": 0.0978512093424797, + "learning_rate": 3.80976552019782e-06, + "loss": 0.5056663751602173, + "step": 11709 + }, + { + "epoch": 2.1637121640817174, + "grad_norm": 0.07964329421520233, + "learning_rate": 3.80819889173921e-06, + "loss": 0.3986744284629822, + "step": 11710 + }, + { + "epoch": 2.1638969407906132, + "grad_norm": 0.08560479432344437, + "learning_rate": 3.8066325096949153e-06, + "loss": 0.3445097804069519, + "step": 11711 + }, + { + "epoch": 2.164081717499509, + "grad_norm": 0.0772845521569252, + "learning_rate": 3.8050663741272675e-06, + "loss": 0.3878020644187927, + "step": 11712 + }, + { + "epoch": 2.164266494208405, + "grad_norm": 0.07131746411323547, + "learning_rate": 3.803500485098597e-06, + "loss": 0.3322165012359619, + "step": 11713 + }, + { + "epoch": 2.1644512709173007, + "grad_norm": 0.07706581801176071, + "learning_rate": 3.8019348426712198e-06, + "loss": 0.41919857263565063, + "step": 11714 + }, + { + "epoch": 2.1646360476261965, + "grad_norm": 0.08948921412229538, + "learning_rate": 3.8003694469074446e-06, + "loss": 0.411518394947052, + "step": 11715 + }, + { + "epoch": 2.1648208243350924, + "grad_norm": 0.07101071625947952, + "learning_rate": 3.7988042978695706e-06, + "loss": 0.3982822597026825, + "step": 11716 + }, + { + "epoch": 2.165005601043988, + "grad_norm": 0.09112266451120377, + "learning_rate": 3.797239395619887e-06, + "loss": 0.41766148805618286, + "step": 11717 + }, + { + "epoch": 2.165190377752884, + "grad_norm": 0.07062102854251862, + "learning_rate": 3.7956747402206663e-06, + "loss": 0.38626226782798767, + "step": 11718 + }, + { + "epoch": 2.1653751544617803, + "grad_norm": 0.09081759303808212, + "learning_rate": 3.79411033173418e-06, + "loss": 0.40110763907432556, + "step": 11719 + }, + { + "epoch": 2.165559931170676, + "grad_norm": 0.09068002551794052, + "learning_rate": 3.7925461702226897e-06, + "loss": 0.4494416117668152, + "step": 11720 + }, + { + "epoch": 2.165744707879572, + "grad_norm": 0.06869544833898544, + "learning_rate": 3.7909822557484378e-06, + "loss": 0.3673214912414551, + "step": 11721 + }, + { + "epoch": 2.165929484588468, + "grad_norm": 0.07023914158344269, + "learning_rate": 3.7894185883736633e-06, + "loss": 0.4389057755470276, + "step": 11722 + }, + { + "epoch": 2.1661142612973636, + "grad_norm": 0.0772065743803978, + "learning_rate": 3.7878551681606057e-06, + "loss": 0.4547201097011566, + "step": 11723 + }, + { + "epoch": 2.1662990380062594, + "grad_norm": 0.08422631770372391, + "learning_rate": 3.7862919951714737e-06, + "loss": 0.3660799562931061, + "step": 11724 + }, + { + "epoch": 2.1664838147151553, + "grad_norm": 0.08606277406215668, + "learning_rate": 3.7847290694684836e-06, + "loss": 0.4506904184818268, + "step": 11725 + }, + { + "epoch": 2.166668591424051, + "grad_norm": 0.07380979508161545, + "learning_rate": 3.7831663911138283e-06, + "loss": 0.30309197306632996, + "step": 11726 + }, + { + "epoch": 2.166853368132947, + "grad_norm": 0.08433002978563309, + "learning_rate": 3.7816039601696996e-06, + "loss": 0.6037353873252869, + "step": 11727 + }, + { + "epoch": 2.1670381448418428, + "grad_norm": 0.0858033299446106, + "learning_rate": 3.78004177669828e-06, + "loss": 0.30435726046562195, + "step": 11728 + }, + { + "epoch": 2.1672229215507386, + "grad_norm": 0.10412168502807617, + "learning_rate": 3.7784798407617364e-06, + "loss": 0.5917178988456726, + "step": 11729 + }, + { + "epoch": 2.1674076982596344, + "grad_norm": 0.08749578148126602, + "learning_rate": 3.776918152422231e-06, + "loss": 0.49174991250038147, + "step": 11730 + }, + { + "epoch": 2.1675924749685302, + "grad_norm": 0.07869294285774231, + "learning_rate": 3.7753567117419175e-06, + "loss": 0.2797946631908417, + "step": 11731 + }, + { + "epoch": 2.167777251677426, + "grad_norm": 0.10534682124853134, + "learning_rate": 3.773795518782929e-06, + "loss": 0.5504571795463562, + "step": 11732 + }, + { + "epoch": 2.167962028386322, + "grad_norm": 0.08306889235973358, + "learning_rate": 3.7722345736073984e-06, + "loss": 0.44501793384552, + "step": 11733 + }, + { + "epoch": 2.1681468050952177, + "grad_norm": 0.09396319091320038, + "learning_rate": 3.7706738762774485e-06, + "loss": 0.43054285645484924, + "step": 11734 + }, + { + "epoch": 2.1683315818041136, + "grad_norm": 0.062297649681568146, + "learning_rate": 3.7691134268551897e-06, + "loss": 0.2745853364467621, + "step": 11735 + }, + { + "epoch": 2.1685163585130094, + "grad_norm": 0.0928124189376831, + "learning_rate": 3.7675532254027216e-06, + "loss": 0.5709888935089111, + "step": 11736 + }, + { + "epoch": 2.168701135221905, + "grad_norm": 0.08623574674129486, + "learning_rate": 3.76599327198214e-06, + "loss": 0.5272578597068787, + "step": 11737 + }, + { + "epoch": 2.168885911930801, + "grad_norm": 0.0718969851732254, + "learning_rate": 3.76443356665552e-06, + "loss": 0.3616103529930115, + "step": 11738 + }, + { + "epoch": 2.169070688639697, + "grad_norm": 0.0819055438041687, + "learning_rate": 3.7628741094849374e-06, + "loss": 0.46330684423446655, + "step": 11739 + }, + { + "epoch": 2.1692554653485927, + "grad_norm": 0.09148211777210236, + "learning_rate": 3.761314900532449e-06, + "loss": 0.5191062688827515, + "step": 11740 + }, + { + "epoch": 2.1694402420574885, + "grad_norm": 0.0735086053609848, + "learning_rate": 3.7597559398601102e-06, + "loss": 0.38592714071273804, + "step": 11741 + }, + { + "epoch": 2.1696250187663844, + "grad_norm": 0.07115492224693298, + "learning_rate": 3.7581972275299606e-06, + "loss": 0.3410407304763794, + "step": 11742 + }, + { + "epoch": 2.16980979547528, + "grad_norm": 0.09364961087703705, + "learning_rate": 3.7566387636040334e-06, + "loss": 0.5007708668708801, + "step": 11743 + }, + { + "epoch": 2.169994572184176, + "grad_norm": 0.07597985863685608, + "learning_rate": 3.755080548144351e-06, + "loss": 0.49047887325286865, + "step": 11744 + }, + { + "epoch": 2.170179348893072, + "grad_norm": 0.0871448889374733, + "learning_rate": 3.7535225812129274e-06, + "loss": 0.37562286853790283, + "step": 11745 + }, + { + "epoch": 2.1703641256019677, + "grad_norm": 0.0897744670510292, + "learning_rate": 3.7519648628717596e-06, + "loss": 0.47237977385520935, + "step": 11746 + }, + { + "epoch": 2.1705489023108635, + "grad_norm": 0.11996883153915405, + "learning_rate": 3.7504073931828424e-06, + "loss": 0.6920106410980225, + "step": 11747 + }, + { + "epoch": 2.1707336790197598, + "grad_norm": 0.09214407950639725, + "learning_rate": 3.7488501722081582e-06, + "loss": 0.5268396735191345, + "step": 11748 + }, + { + "epoch": 2.1709184557286556, + "grad_norm": 0.08599527925252914, + "learning_rate": 3.7472932000096807e-06, + "loss": 0.39189955592155457, + "step": 11749 + }, + { + "epoch": 2.1711032324375514, + "grad_norm": 0.09038657695055008, + "learning_rate": 3.7457364766493708e-06, + "loss": 0.43005287647247314, + "step": 11750 + }, + { + "epoch": 2.1712880091464473, + "grad_norm": 0.07823736220598221, + "learning_rate": 3.7441800021891863e-06, + "loss": 0.4083983898162842, + "step": 11751 + }, + { + "epoch": 2.171472785855343, + "grad_norm": 0.0693674385547638, + "learning_rate": 3.742623776691061e-06, + "loss": 0.42666032910346985, + "step": 11752 + }, + { + "epoch": 2.171657562564239, + "grad_norm": 0.07568097859621048, + "learning_rate": 3.741067800216934e-06, + "loss": 0.39862126111984253, + "step": 11753 + }, + { + "epoch": 2.1718423392731347, + "grad_norm": 0.08159180730581284, + "learning_rate": 3.739512072828726e-06, + "loss": 0.4888143539428711, + "step": 11754 + }, + { + "epoch": 2.1720271159820306, + "grad_norm": 0.07871854305267334, + "learning_rate": 3.737956594588351e-06, + "loss": 0.3868490755558014, + "step": 11755 + }, + { + "epoch": 2.1722118926909264, + "grad_norm": 0.10375802218914032, + "learning_rate": 3.736401365557716e-06, + "loss": 0.6778278350830078, + "step": 11756 + }, + { + "epoch": 2.1723966693998222, + "grad_norm": 0.08540346473455429, + "learning_rate": 3.734846385798707e-06, + "loss": 0.4792468845844269, + "step": 11757 + }, + { + "epoch": 2.172581446108718, + "grad_norm": 0.07805228233337402, + "learning_rate": 3.73329165537321e-06, + "loss": 0.3809247314929962, + "step": 11758 + }, + { + "epoch": 2.172766222817614, + "grad_norm": 0.08918479830026627, + "learning_rate": 3.731737174343103e-06, + "loss": 0.44978439807891846, + "step": 11759 + }, + { + "epoch": 2.1729509995265097, + "grad_norm": 0.07604454457759857, + "learning_rate": 3.730182942770243e-06, + "loss": 0.3504337668418884, + "step": 11760 + }, + { + "epoch": 2.1731357762354055, + "grad_norm": 0.07437156140804291, + "learning_rate": 3.728628960716485e-06, + "loss": 0.3848017454147339, + "step": 11761 + }, + { + "epoch": 2.1733205529443014, + "grad_norm": 0.08974701166152954, + "learning_rate": 3.727075228243674e-06, + "loss": 0.4856947064399719, + "step": 11762 + }, + { + "epoch": 2.173505329653197, + "grad_norm": 0.09555413573980331, + "learning_rate": 3.7255217454136428e-06, + "loss": 0.4570215046405792, + "step": 11763 + }, + { + "epoch": 2.173690106362093, + "grad_norm": 0.06414473801851273, + "learning_rate": 3.7239685122882173e-06, + "loss": 0.37356042861938477, + "step": 11764 + }, + { + "epoch": 2.173874883070989, + "grad_norm": 0.0668707937002182, + "learning_rate": 3.722415528929212e-06, + "loss": 0.35844603180885315, + "step": 11765 + }, + { + "epoch": 2.1740596597798847, + "grad_norm": 0.0816434994339943, + "learning_rate": 3.7208627953984257e-06, + "loss": 0.4278120696544647, + "step": 11766 + }, + { + "epoch": 2.1742444364887805, + "grad_norm": 0.09556222707033157, + "learning_rate": 3.7193103117576557e-06, + "loss": 0.4568500220775604, + "step": 11767 + }, + { + "epoch": 2.1744292131976763, + "grad_norm": 0.09306345134973526, + "learning_rate": 3.7177580780686838e-06, + "loss": 0.4913712441921234, + "step": 11768 + }, + { + "epoch": 2.174613989906572, + "grad_norm": 0.07091156393289566, + "learning_rate": 3.7162060943932875e-06, + "loss": 0.3560081720352173, + "step": 11769 + }, + { + "epoch": 2.174798766615468, + "grad_norm": 0.07228139787912369, + "learning_rate": 3.7146543607932284e-06, + "loss": 0.3397913873195648, + "step": 11770 + }, + { + "epoch": 2.174983543324364, + "grad_norm": 0.057565197348594666, + "learning_rate": 3.7131028773302656e-06, + "loss": 0.3120599687099457, + "step": 11771 + }, + { + "epoch": 2.1751683200332597, + "grad_norm": 0.07905125617980957, + "learning_rate": 3.7115516440661347e-06, + "loss": 0.4089307487010956, + "step": 11772 + }, + { + "epoch": 2.1753530967421555, + "grad_norm": 0.08861220628023148, + "learning_rate": 3.710000661062578e-06, + "loss": 0.4595871567726135, + "step": 11773 + }, + { + "epoch": 2.1755378734510513, + "grad_norm": 0.07304118573665619, + "learning_rate": 3.7084499283813103e-06, + "loss": 0.30586081743240356, + "step": 11774 + }, + { + "epoch": 2.175722650159947, + "grad_norm": 0.0830596312880516, + "learning_rate": 3.706899446084055e-06, + "loss": 0.3427889943122864, + "step": 11775 + }, + { + "epoch": 2.175907426868843, + "grad_norm": 0.09748268127441406, + "learning_rate": 3.7053492142325156e-06, + "loss": 0.5185943841934204, + "step": 11776 + }, + { + "epoch": 2.1760922035777392, + "grad_norm": 0.08044993877410889, + "learning_rate": 3.703799232888381e-06, + "loss": 0.38030368089675903, + "step": 11777 + }, + { + "epoch": 2.176276980286635, + "grad_norm": 0.09990067034959793, + "learning_rate": 3.7022495021133378e-06, + "loss": 0.5028130412101746, + "step": 11778 + }, + { + "epoch": 2.176461756995531, + "grad_norm": 0.09113533794879913, + "learning_rate": 3.700700021969066e-06, + "loss": 0.3763606548309326, + "step": 11779 + }, + { + "epoch": 2.1766465337044267, + "grad_norm": 0.0950152724981308, + "learning_rate": 3.699150792517221e-06, + "loss": 0.5273303985595703, + "step": 11780 + }, + { + "epoch": 2.1768313104133226, + "grad_norm": 0.0905051901936531, + "learning_rate": 3.6976018138194625e-06, + "loss": 0.6240034699440002, + "step": 11781 + }, + { + "epoch": 2.1770160871222184, + "grad_norm": 0.11675798892974854, + "learning_rate": 3.6960530859374334e-06, + "loss": 0.7055141925811768, + "step": 11782 + }, + { + "epoch": 2.177200863831114, + "grad_norm": 0.08601131290197372, + "learning_rate": 3.6945046089327698e-06, + "loss": 0.47330668568611145, + "step": 11783 + }, + { + "epoch": 2.17738564054001, + "grad_norm": 0.06150782108306885, + "learning_rate": 3.6929563828670945e-06, + "loss": 0.27529773116111755, + "step": 11784 + }, + { + "epoch": 2.177570417248906, + "grad_norm": 0.07958129793405533, + "learning_rate": 3.6914084078020263e-06, + "loss": 0.42535436153411865, + "step": 11785 + }, + { + "epoch": 2.1777551939578017, + "grad_norm": 0.07603038847446442, + "learning_rate": 3.6898606837991635e-06, + "loss": 0.35221901535987854, + "step": 11786 + }, + { + "epoch": 2.1779399706666975, + "grad_norm": 0.06935341656208038, + "learning_rate": 3.6883132109201037e-06, + "loss": 0.34105798602104187, + "step": 11787 + }, + { + "epoch": 2.1781247473755934, + "grad_norm": 0.07835391908884048, + "learning_rate": 3.6867659892264307e-06, + "loss": 0.44978412985801697, + "step": 11788 + }, + { + "epoch": 2.178309524084489, + "grad_norm": 0.08281917124986649, + "learning_rate": 3.685219018779721e-06, + "loss": 0.37326085567474365, + "step": 11789 + }, + { + "epoch": 2.178494300793385, + "grad_norm": 0.08823174983263016, + "learning_rate": 3.683672299641541e-06, + "loss": 0.45342573523521423, + "step": 11790 + }, + { + "epoch": 2.178679077502281, + "grad_norm": 0.06711402535438538, + "learning_rate": 3.6821258318734376e-06, + "loss": 0.2581331431865692, + "step": 11791 + }, + { + "epoch": 2.1788638542111767, + "grad_norm": 0.10414480417966843, + "learning_rate": 3.680579615536961e-06, + "loss": 0.6712921857833862, + "step": 11792 + }, + { + "epoch": 2.1790486309200725, + "grad_norm": 0.078679159283638, + "learning_rate": 3.6790336506936473e-06, + "loss": 0.4711715281009674, + "step": 11793 + }, + { + "epoch": 2.1792334076289683, + "grad_norm": 0.0918063074350357, + "learning_rate": 3.677487937405013e-06, + "loss": 0.4823344647884369, + "step": 11794 + }, + { + "epoch": 2.179418184337864, + "grad_norm": 0.08719798922538757, + "learning_rate": 3.6759424757325813e-06, + "loss": 0.43217337131500244, + "step": 11795 + }, + { + "epoch": 2.17960296104676, + "grad_norm": 0.08216572552919388, + "learning_rate": 3.674397265737857e-06, + "loss": 0.4107217490673065, + "step": 11796 + }, + { + "epoch": 2.179787737755656, + "grad_norm": 0.10715235769748688, + "learning_rate": 3.6728523074823276e-06, + "loss": 0.5502179265022278, + "step": 11797 + }, + { + "epoch": 2.1799725144645516, + "grad_norm": 0.06882361322641373, + "learning_rate": 3.6713076010274806e-06, + "loss": 0.37534210085868835, + "step": 11798 + }, + { + "epoch": 2.1801572911734475, + "grad_norm": 0.08649523556232452, + "learning_rate": 3.669763146434795e-06, + "loss": 0.4610140323638916, + "step": 11799 + }, + { + "epoch": 2.1803420678823433, + "grad_norm": 0.07134760916233063, + "learning_rate": 3.668218943765729e-06, + "loss": 0.28405073285102844, + "step": 11800 + }, + { + "epoch": 2.180526844591239, + "grad_norm": 0.07951401174068451, + "learning_rate": 3.666674993081738e-06, + "loss": 0.44334912300109863, + "step": 11801 + }, + { + "epoch": 2.180711621300135, + "grad_norm": 0.07624167203903198, + "learning_rate": 3.6651312944442684e-06, + "loss": 0.4165308475494385, + "step": 11802 + }, + { + "epoch": 2.180896398009031, + "grad_norm": 0.07992348074913025, + "learning_rate": 3.6635878479147545e-06, + "loss": 0.38696640729904175, + "step": 11803 + }, + { + "epoch": 2.1810811747179266, + "grad_norm": 0.09303223341703415, + "learning_rate": 3.6620446535546227e-06, + "loss": 0.48980575799942017, + "step": 11804 + }, + { + "epoch": 2.1812659514268224, + "grad_norm": 0.08045366406440735, + "learning_rate": 3.6605017114252816e-06, + "loss": 0.3471381664276123, + "step": 11805 + }, + { + "epoch": 2.1814507281357187, + "grad_norm": 0.08809412270784378, + "learning_rate": 3.65895902158814e-06, + "loss": 0.42216527462005615, + "step": 11806 + }, + { + "epoch": 2.181635504844614, + "grad_norm": 0.07181458920240402, + "learning_rate": 3.6574165841045894e-06, + "loss": 0.30205219984054565, + "step": 11807 + }, + { + "epoch": 2.1818202815535104, + "grad_norm": 0.0813143402338028, + "learning_rate": 3.655874399036016e-06, + "loss": 0.49499523639678955, + "step": 11808 + }, + { + "epoch": 2.182005058262406, + "grad_norm": 0.08100121468305588, + "learning_rate": 3.6543324664437916e-06, + "loss": 0.37999892234802246, + "step": 11809 + }, + { + "epoch": 2.182189834971302, + "grad_norm": 0.0968104675412178, + "learning_rate": 3.6527907863892877e-06, + "loss": 0.42118382453918457, + "step": 11810 + }, + { + "epoch": 2.182374611680198, + "grad_norm": 0.10659095644950867, + "learning_rate": 3.6512493589338483e-06, + "loss": 0.5945422649383545, + "step": 11811 + }, + { + "epoch": 2.1825593883890937, + "grad_norm": 0.07730945199728012, + "learning_rate": 3.6497081841388215e-06, + "loss": 0.42148011922836304, + "step": 11812 + }, + { + "epoch": 2.1827441650979895, + "grad_norm": 0.1045936793088913, + "learning_rate": 3.6481672620655452e-06, + "loss": 0.5791264772415161, + "step": 11813 + }, + { + "epoch": 2.1829289418068853, + "grad_norm": 0.08466159552335739, + "learning_rate": 3.646626592775332e-06, + "loss": 0.4451292157173157, + "step": 11814 + }, + { + "epoch": 2.183113718515781, + "grad_norm": 0.07520155608654022, + "learning_rate": 3.6450861763295076e-06, + "loss": 0.3622979521751404, + "step": 11815 + }, + { + "epoch": 2.183298495224677, + "grad_norm": 0.0929901972413063, + "learning_rate": 3.643546012789374e-06, + "loss": 0.5606027841567993, + "step": 11816 + }, + { + "epoch": 2.183483271933573, + "grad_norm": 0.09161688387393951, + "learning_rate": 3.642006102216219e-06, + "loss": 0.44947391748428345, + "step": 11817 + }, + { + "epoch": 2.1836680486424687, + "grad_norm": 0.09159534424543381, + "learning_rate": 3.64046644467133e-06, + "loss": 0.5428000092506409, + "step": 11818 + }, + { + "epoch": 2.1838528253513645, + "grad_norm": 0.08401782065629959, + "learning_rate": 3.638927040215984e-06, + "loss": 0.5569350123405457, + "step": 11819 + }, + { + "epoch": 2.1840376020602603, + "grad_norm": 0.07696730643510818, + "learning_rate": 3.6373878889114356e-06, + "loss": 0.4635745882987976, + "step": 11820 + }, + { + "epoch": 2.184222378769156, + "grad_norm": 0.08972986042499542, + "learning_rate": 3.635848990818944e-06, + "loss": 0.5002453327178955, + "step": 11821 + }, + { + "epoch": 2.184407155478052, + "grad_norm": 0.08567506819963455, + "learning_rate": 3.634310345999752e-06, + "loss": 0.46873846650123596, + "step": 11822 + }, + { + "epoch": 2.184591932186948, + "grad_norm": 0.09065090119838715, + "learning_rate": 3.632771954515092e-06, + "loss": 0.5605395436286926, + "step": 11823 + }, + { + "epoch": 2.1847767088958436, + "grad_norm": 0.07702086865901947, + "learning_rate": 3.6312338164261917e-06, + "loss": 0.38053658604621887, + "step": 11824 + }, + { + "epoch": 2.1849614856047395, + "grad_norm": 0.09548892825841904, + "learning_rate": 3.629695931794257e-06, + "loss": 0.568294882774353, + "step": 11825 + }, + { + "epoch": 2.1851462623136353, + "grad_norm": 0.09351617842912674, + "learning_rate": 3.6281583006804933e-06, + "loss": 0.5914403200149536, + "step": 11826 + }, + { + "epoch": 2.185331039022531, + "grad_norm": 0.09066955745220184, + "learning_rate": 3.626620923146095e-06, + "loss": 0.44789034128189087, + "step": 11827 + }, + { + "epoch": 2.185515815731427, + "grad_norm": 0.09279196709394455, + "learning_rate": 3.6250837992522435e-06, + "loss": 0.5210738182067871, + "step": 11828 + }, + { + "epoch": 2.1857005924403228, + "grad_norm": 0.06666386127471924, + "learning_rate": 3.6235469290601122e-06, + "loss": 0.2853530943393707, + "step": 11829 + }, + { + "epoch": 2.1858853691492186, + "grad_norm": 0.07821212708950043, + "learning_rate": 3.622010312630867e-06, + "loss": 0.3277926445007324, + "step": 11830 + }, + { + "epoch": 2.1860701458581144, + "grad_norm": 0.09027982503175735, + "learning_rate": 3.6204739500256546e-06, + "loss": 0.4053875207901001, + "step": 11831 + }, + { + "epoch": 2.1862549225670103, + "grad_norm": 0.08265797793865204, + "learning_rate": 3.61893784130562e-06, + "loss": 0.4514121413230896, + "step": 11832 + }, + { + "epoch": 2.186439699275906, + "grad_norm": 0.09165399521589279, + "learning_rate": 3.6174019865318987e-06, + "loss": 0.5018165111541748, + "step": 11833 + }, + { + "epoch": 2.186624475984802, + "grad_norm": 0.07767356932163239, + "learning_rate": 3.615866385765603e-06, + "loss": 0.43387266993522644, + "step": 11834 + }, + { + "epoch": 2.186809252693698, + "grad_norm": 0.08318489789962769, + "learning_rate": 3.6143310390678544e-06, + "loss": 0.45476630330085754, + "step": 11835 + }, + { + "epoch": 2.1869940294025936, + "grad_norm": 0.08616919815540314, + "learning_rate": 3.6127959464997565e-06, + "loss": 0.39537912607192993, + "step": 11836 + }, + { + "epoch": 2.18717880611149, + "grad_norm": 0.09167175740003586, + "learning_rate": 3.6112611081223937e-06, + "loss": 0.38645097613334656, + "step": 11837 + }, + { + "epoch": 2.1873635828203857, + "grad_norm": 0.08732689172029495, + "learning_rate": 3.6097265239968537e-06, + "loss": 0.46446800231933594, + "step": 11838 + }, + { + "epoch": 2.1875483595292815, + "grad_norm": 0.07376278191804886, + "learning_rate": 3.6081921941842024e-06, + "loss": 0.3687284588813782, + "step": 11839 + }, + { + "epoch": 2.1877331362381773, + "grad_norm": 0.08316045999526978, + "learning_rate": 3.6066581187455042e-06, + "loss": 0.39745235443115234, + "step": 11840 + }, + { + "epoch": 2.187917912947073, + "grad_norm": 0.09907536953687668, + "learning_rate": 3.605124297741811e-06, + "loss": 0.5286823511123657, + "step": 11841 + }, + { + "epoch": 2.188102689655969, + "grad_norm": 0.07469500601291656, + "learning_rate": 3.603590731234163e-06, + "loss": 0.36436185240745544, + "step": 11842 + }, + { + "epoch": 2.188287466364865, + "grad_norm": 0.0899069607257843, + "learning_rate": 3.6020574192835934e-06, + "loss": 0.371680349111557, + "step": 11843 + }, + { + "epoch": 2.1884722430737606, + "grad_norm": 0.0726056843996048, + "learning_rate": 3.6005243619511242e-06, + "loss": 0.37702593207359314, + "step": 11844 + }, + { + "epoch": 2.1886570197826565, + "grad_norm": 0.08643928915262222, + "learning_rate": 3.598991559297761e-06, + "loss": 0.45487257838249207, + "step": 11845 + }, + { + "epoch": 2.1888417964915523, + "grad_norm": 0.08755392581224442, + "learning_rate": 3.5974590113845076e-06, + "loss": 0.3754900395870209, + "step": 11846 + }, + { + "epoch": 2.189026573200448, + "grad_norm": 0.10274093598127365, + "learning_rate": 3.5959267182723544e-06, + "loss": 0.6082805395126343, + "step": 11847 + }, + { + "epoch": 2.189211349909344, + "grad_norm": 0.0784970372915268, + "learning_rate": 3.5943946800222816e-06, + "loss": 0.4100046455860138, + "step": 11848 + }, + { + "epoch": 2.18939612661824, + "grad_norm": 0.05849752947688103, + "learning_rate": 3.5928628966952608e-06, + "loss": 0.28361162543296814, + "step": 11849 + }, + { + "epoch": 2.1895809033271356, + "grad_norm": 0.07341820001602173, + "learning_rate": 3.5913313683522544e-06, + "loss": 0.3717271685600281, + "step": 11850 + }, + { + "epoch": 2.1897656800360314, + "grad_norm": 0.09280548244714737, + "learning_rate": 3.5898000950542067e-06, + "loss": 0.39923617243766785, + "step": 11851 + }, + { + "epoch": 2.1899504567449273, + "grad_norm": 0.10625611990690231, + "learning_rate": 3.58826907686206e-06, + "loss": 0.5870501399040222, + "step": 11852 + }, + { + "epoch": 2.190135233453823, + "grad_norm": 0.10157416760921478, + "learning_rate": 3.586738313836746e-06, + "loss": 0.5157890319824219, + "step": 11853 + }, + { + "epoch": 2.190320010162719, + "grad_norm": 0.08418985456228256, + "learning_rate": 3.585207806039178e-06, + "loss": 0.4399016201496124, + "step": 11854 + }, + { + "epoch": 2.1905047868716148, + "grad_norm": 0.08287651836872101, + "learning_rate": 3.583677553530276e-06, + "loss": 0.4425356388092041, + "step": 11855 + }, + { + "epoch": 2.1906895635805106, + "grad_norm": 0.08783163130283356, + "learning_rate": 3.5821475563709294e-06, + "loss": 0.4846430718898773, + "step": 11856 + }, + { + "epoch": 2.1908743402894064, + "grad_norm": 0.10092713683843613, + "learning_rate": 3.5806178146220315e-06, + "loss": 0.6803297400474548, + "step": 11857 + }, + { + "epoch": 2.1910591169983022, + "grad_norm": 0.09470190852880478, + "learning_rate": 3.5790883283444643e-06, + "loss": 0.5392593741416931, + "step": 11858 + }, + { + "epoch": 2.191243893707198, + "grad_norm": 0.08710703253746033, + "learning_rate": 3.5775590975990903e-06, + "loss": 0.548549234867096, + "step": 11859 + }, + { + "epoch": 2.191428670416094, + "grad_norm": 0.07717004418373108, + "learning_rate": 3.576030122446771e-06, + "loss": 0.4283442497253418, + "step": 11860 + }, + { + "epoch": 2.1916134471249897, + "grad_norm": 0.09224745631217957, + "learning_rate": 3.574501402948354e-06, + "loss": 0.44451847672462463, + "step": 11861 + }, + { + "epoch": 2.1917982238338856, + "grad_norm": 0.09668068587779999, + "learning_rate": 3.572972939164678e-06, + "loss": 0.4325692653656006, + "step": 11862 + }, + { + "epoch": 2.1919830005427814, + "grad_norm": 0.07948118448257446, + "learning_rate": 3.5714447311565727e-06, + "loss": 0.37542101740837097, + "step": 11863 + }, + { + "epoch": 2.192167777251677, + "grad_norm": 0.1118590459227562, + "learning_rate": 3.5699167789848575e-06, + "loss": 0.49960219860076904, + "step": 11864 + }, + { + "epoch": 2.192352553960573, + "grad_norm": 0.0815550684928894, + "learning_rate": 3.568389082710334e-06, + "loss": 0.4175896942615509, + "step": 11865 + }, + { + "epoch": 2.1925373306694693, + "grad_norm": 0.08199605345726013, + "learning_rate": 3.566861642393803e-06, + "loss": 0.3794676661491394, + "step": 11866 + }, + { + "epoch": 2.192722107378365, + "grad_norm": 0.06854138523340225, + "learning_rate": 3.5653344580960525e-06, + "loss": 0.3001802861690521, + "step": 11867 + }, + { + "epoch": 2.192906884087261, + "grad_norm": 0.0924617350101471, + "learning_rate": 3.5638075298778584e-06, + "loss": 0.4806053936481476, + "step": 11868 + }, + { + "epoch": 2.193091660796157, + "grad_norm": 0.0795179083943367, + "learning_rate": 3.5622808577999922e-06, + "loss": 0.45049938559532166, + "step": 11869 + }, + { + "epoch": 2.1932764375050526, + "grad_norm": 0.07543812692165375, + "learning_rate": 3.5607544419232033e-06, + "loss": 0.39572301506996155, + "step": 11870 + }, + { + "epoch": 2.1934612142139485, + "grad_norm": 0.06583981215953827, + "learning_rate": 3.5592282823082413e-06, + "loss": 0.368479460477829, + "step": 11871 + }, + { + "epoch": 2.1936459909228443, + "grad_norm": 0.10285375267267227, + "learning_rate": 3.5577023790158472e-06, + "loss": 0.7166904807090759, + "step": 11872 + }, + { + "epoch": 2.19383076763174, + "grad_norm": 0.08366060256958008, + "learning_rate": 3.5561767321067387e-06, + "loss": 0.4107072055339813, + "step": 11873 + }, + { + "epoch": 2.194015544340636, + "grad_norm": 0.08970008790493011, + "learning_rate": 3.554651341641634e-06, + "loss": 0.45388728380203247, + "step": 11874 + }, + { + "epoch": 2.1942003210495318, + "grad_norm": 0.08710724115371704, + "learning_rate": 3.553126207681247e-06, + "loss": 0.49304187297821045, + "step": 11875 + }, + { + "epoch": 2.1943850977584276, + "grad_norm": 0.07061879336833954, + "learning_rate": 3.551601330286264e-06, + "loss": 0.36744633316993713, + "step": 11876 + }, + { + "epoch": 2.1945698744673234, + "grad_norm": 0.0940442904829979, + "learning_rate": 3.550076709517374e-06, + "loss": 0.4634474813938141, + "step": 11877 + }, + { + "epoch": 2.1947546511762193, + "grad_norm": 0.08538489043712616, + "learning_rate": 3.5485523454352543e-06, + "loss": 0.44748833775520325, + "step": 11878 + }, + { + "epoch": 2.194939427885115, + "grad_norm": 0.1014489233493805, + "learning_rate": 3.547028238100564e-06, + "loss": 0.5843960642814636, + "step": 11879 + }, + { + "epoch": 2.195124204594011, + "grad_norm": 0.10388089716434479, + "learning_rate": 3.5455043875739615e-06, + "loss": 0.4797080457210541, + "step": 11880 + }, + { + "epoch": 2.1953089813029067, + "grad_norm": 0.08865340054035187, + "learning_rate": 3.5439807939160907e-06, + "loss": 0.4965810179710388, + "step": 11881 + }, + { + "epoch": 2.1954937580118026, + "grad_norm": 0.07938023656606674, + "learning_rate": 3.5424574571875857e-06, + "loss": 0.41988202929496765, + "step": 11882 + }, + { + "epoch": 2.1956785347206984, + "grad_norm": 0.08724203705787659, + "learning_rate": 3.5409343774490714e-06, + "loss": 0.5557771921157837, + "step": 11883 + }, + { + "epoch": 2.1958633114295942, + "grad_norm": 0.08031976968050003, + "learning_rate": 3.539411554761164e-06, + "loss": 0.4043499231338501, + "step": 11884 + }, + { + "epoch": 2.19604808813849, + "grad_norm": 0.08532105386257172, + "learning_rate": 3.5378889891844616e-06, + "loss": 0.6659044027328491, + "step": 11885 + }, + { + "epoch": 2.196232864847386, + "grad_norm": 0.06489470601081848, + "learning_rate": 3.5363666807795595e-06, + "loss": 0.3123665452003479, + "step": 11886 + }, + { + "epoch": 2.1964176415562817, + "grad_norm": 0.10015974193811417, + "learning_rate": 3.5348446296070414e-06, + "loss": 0.4849621653556824, + "step": 11887 + }, + { + "epoch": 2.1966024182651775, + "grad_norm": 0.07387948781251907, + "learning_rate": 3.5333228357274794e-06, + "loss": 0.36443015933036804, + "step": 11888 + }, + { + "epoch": 2.1967871949740734, + "grad_norm": 0.08133751153945923, + "learning_rate": 3.5318012992014418e-06, + "loss": 0.4533928632736206, + "step": 11889 + }, + { + "epoch": 2.196971971682969, + "grad_norm": 0.08863720297813416, + "learning_rate": 3.5302800200894715e-06, + "loss": 0.40780341625213623, + "step": 11890 + }, + { + "epoch": 2.197156748391865, + "grad_norm": 0.08395319432020187, + "learning_rate": 3.5287589984521154e-06, + "loss": 0.48462802171707153, + "step": 11891 + }, + { + "epoch": 2.197341525100761, + "grad_norm": 0.09309794008731842, + "learning_rate": 3.527238234349909e-06, + "loss": 0.4617069363594055, + "step": 11892 + }, + { + "epoch": 2.1975263018096567, + "grad_norm": 0.09020397067070007, + "learning_rate": 3.5257177278433674e-06, + "loss": 0.45796695351600647, + "step": 11893 + }, + { + "epoch": 2.1977110785185525, + "grad_norm": 0.07047610729932785, + "learning_rate": 3.524197478993e-06, + "loss": 0.3196723461151123, + "step": 11894 + }, + { + "epoch": 2.197895855227449, + "grad_norm": 0.08661914616823196, + "learning_rate": 3.5226774878593208e-06, + "loss": 0.4376320242881775, + "step": 11895 + }, + { + "epoch": 2.1980806319363446, + "grad_norm": 0.08178701251745224, + "learning_rate": 3.5211577545028086e-06, + "loss": 0.42635318636894226, + "step": 11896 + }, + { + "epoch": 2.1982654086452404, + "grad_norm": 0.07078079879283905, + "learning_rate": 3.5196382789839477e-06, + "loss": 0.402277410030365, + "step": 11897 + }, + { + "epoch": 2.1984501853541363, + "grad_norm": 0.07917147874832153, + "learning_rate": 3.518119061363213e-06, + "loss": 0.4432550072669983, + "step": 11898 + }, + { + "epoch": 2.198634962063032, + "grad_norm": 0.08097616583108902, + "learning_rate": 3.5166001017010563e-06, + "loss": 0.4853762090206146, + "step": 11899 + }, + { + "epoch": 2.198819738771928, + "grad_norm": 0.08459150791168213, + "learning_rate": 3.5150814000579327e-06, + "loss": 0.45333394408226013, + "step": 11900 + }, + { + "epoch": 2.1990045154808238, + "grad_norm": 0.09444347023963928, + "learning_rate": 3.5135629564942797e-06, + "loss": 0.46410757303237915, + "step": 11901 + }, + { + "epoch": 2.1991892921897196, + "grad_norm": 0.08272453397512436, + "learning_rate": 3.5120447710705285e-06, + "loss": 0.33521467447280884, + "step": 11902 + }, + { + "epoch": 2.1993740688986154, + "grad_norm": 0.09044880419969559, + "learning_rate": 3.5105268438470996e-06, + "loss": 0.4096111059188843, + "step": 11903 + }, + { + "epoch": 2.1995588456075112, + "grad_norm": 0.11137299984693527, + "learning_rate": 3.5090091748843967e-06, + "loss": 0.5373837351799011, + "step": 11904 + }, + { + "epoch": 2.199743622316407, + "grad_norm": 0.09911485016345978, + "learning_rate": 3.5074917642428207e-06, + "loss": 0.5808022618293762, + "step": 11905 + }, + { + "epoch": 2.199928399025303, + "grad_norm": 0.10136883705854416, + "learning_rate": 3.5059746119827597e-06, + "loss": 0.4865693151950836, + "step": 11906 + }, + { + "epoch": 2.2001131757341987, + "grad_norm": 0.1080540269613266, + "learning_rate": 3.5044577181645923e-06, + "loss": 0.48078617453575134, + "step": 11907 + }, + { + "epoch": 2.2002979524430946, + "grad_norm": 0.07321763783693314, + "learning_rate": 3.502941082848685e-06, + "loss": 0.3743250370025635, + "step": 11908 + }, + { + "epoch": 2.2004827291519904, + "grad_norm": 0.08438288420438766, + "learning_rate": 3.501424706095401e-06, + "loss": 0.4794260561466217, + "step": 11909 + }, + { + "epoch": 2.200667505860886, + "grad_norm": 0.08540996164083481, + "learning_rate": 3.4999085879650772e-06, + "loss": 0.4936371147632599, + "step": 11910 + }, + { + "epoch": 2.200852282569782, + "grad_norm": 0.08107694983482361, + "learning_rate": 3.4983927285180565e-06, + "loss": 0.35202932357788086, + "step": 11911 + }, + { + "epoch": 2.201037059278678, + "grad_norm": 0.0972508043050766, + "learning_rate": 3.4968771278146675e-06, + "loss": 0.4687209725379944, + "step": 11912 + }, + { + "epoch": 2.2012218359875737, + "grad_norm": 0.0832279622554779, + "learning_rate": 3.495361785915219e-06, + "loss": 0.4170093238353729, + "step": 11913 + }, + { + "epoch": 2.2014066126964695, + "grad_norm": 0.0792597085237503, + "learning_rate": 3.4938467028800182e-06, + "loss": 0.2837228775024414, + "step": 11914 + }, + { + "epoch": 2.2015913894053654, + "grad_norm": 0.08879082649946213, + "learning_rate": 3.4923318787693704e-06, + "loss": 0.440814733505249, + "step": 11915 + }, + { + "epoch": 2.201776166114261, + "grad_norm": 0.08689576387405396, + "learning_rate": 3.4908173136435508e-06, + "loss": 0.3709763288497925, + "step": 11916 + }, + { + "epoch": 2.201960942823157, + "grad_norm": 0.06214141845703125, + "learning_rate": 3.4893030075628367e-06, + "loss": 0.2839714586734772, + "step": 11917 + }, + { + "epoch": 2.202145719532053, + "grad_norm": 0.0947304293513298, + "learning_rate": 3.487788960587497e-06, + "loss": 0.43734437227249146, + "step": 11918 + }, + { + "epoch": 2.2023304962409487, + "grad_norm": 0.07839353382587433, + "learning_rate": 3.48627517277778e-06, + "loss": 0.33791032433509827, + "step": 11919 + }, + { + "epoch": 2.2025152729498445, + "grad_norm": 0.09421993046998978, + "learning_rate": 3.4847616441939314e-06, + "loss": 0.4476447105407715, + "step": 11920 + }, + { + "epoch": 2.2027000496587403, + "grad_norm": 0.08122393488883972, + "learning_rate": 3.4832483748961866e-06, + "loss": 0.36605972051620483, + "step": 11921 + }, + { + "epoch": 2.202884826367636, + "grad_norm": 0.08852571249008179, + "learning_rate": 3.481735364944767e-06, + "loss": 0.39648857712745667, + "step": 11922 + }, + { + "epoch": 2.203069603076532, + "grad_norm": 0.07064167410135269, + "learning_rate": 3.4802226143998917e-06, + "loss": 0.2931559085845947, + "step": 11923 + }, + { + "epoch": 2.2032543797854283, + "grad_norm": 0.08540599793195724, + "learning_rate": 3.4787101233217546e-06, + "loss": 0.42261940240859985, + "step": 11924 + }, + { + "epoch": 2.203439156494324, + "grad_norm": 0.11070524901151657, + "learning_rate": 3.477197891770552e-06, + "loss": 0.5631526112556458, + "step": 11925 + }, + { + "epoch": 2.20362393320322, + "grad_norm": 0.11584214866161346, + "learning_rate": 3.475685919806465e-06, + "loss": 0.6900765895843506, + "step": 11926 + }, + { + "epoch": 2.2038087099121157, + "grad_norm": 0.09533005952835083, + "learning_rate": 3.474174207489668e-06, + "loss": 0.42649832367897034, + "step": 11927 + }, + { + "epoch": 2.2039934866210116, + "grad_norm": 0.0895998403429985, + "learning_rate": 3.4726627548803205e-06, + "loss": 0.4245060384273529, + "step": 11928 + }, + { + "epoch": 2.2041782633299074, + "grad_norm": 0.082939513027668, + "learning_rate": 3.471151562038577e-06, + "loss": 0.4422062039375305, + "step": 11929 + }, + { + "epoch": 2.2043630400388032, + "grad_norm": 0.0672890916466713, + "learning_rate": 3.469640629024572e-06, + "loss": 0.30697351694107056, + "step": 11930 + }, + { + "epoch": 2.204547816747699, + "grad_norm": 0.09478650987148285, + "learning_rate": 3.468129955898439e-06, + "loss": 0.4834575653076172, + "step": 11931 + }, + { + "epoch": 2.204732593456595, + "grad_norm": 0.07390895485877991, + "learning_rate": 3.466619542720302e-06, + "loss": 0.36322087049484253, + "step": 11932 + }, + { + "epoch": 2.2049173701654907, + "grad_norm": 0.08637066185474396, + "learning_rate": 3.46510938955026e-06, + "loss": 0.49460700154304504, + "step": 11933 + }, + { + "epoch": 2.2051021468743865, + "grad_norm": 0.09042581170797348, + "learning_rate": 3.4635994964484252e-06, + "loss": 0.40781348943710327, + "step": 11934 + }, + { + "epoch": 2.2052869235832824, + "grad_norm": 0.09745407849550247, + "learning_rate": 3.462089863474878e-06, + "loss": 0.4389975666999817, + "step": 11935 + }, + { + "epoch": 2.205471700292178, + "grad_norm": 0.08554673939943314, + "learning_rate": 3.4605804906897e-06, + "loss": 0.4696071147918701, + "step": 11936 + }, + { + "epoch": 2.205656477001074, + "grad_norm": 0.060610331594944, + "learning_rate": 3.4590713781529616e-06, + "loss": 0.2960943877696991, + "step": 11937 + }, + { + "epoch": 2.20584125370997, + "grad_norm": 0.07446163147687912, + "learning_rate": 3.4575625259247157e-06, + "loss": 0.393887996673584, + "step": 11938 + }, + { + "epoch": 2.2060260304188657, + "grad_norm": 0.07066913694143295, + "learning_rate": 3.456053934065012e-06, + "loss": 0.3027123212814331, + "step": 11939 + }, + { + "epoch": 2.2062108071277615, + "grad_norm": 0.07689270377159119, + "learning_rate": 3.4545456026338896e-06, + "loss": 0.3906818628311157, + "step": 11940 + }, + { + "epoch": 2.2063955838366573, + "grad_norm": 0.05763699486851692, + "learning_rate": 3.4530375316913734e-06, + "loss": 0.27099940180778503, + "step": 11941 + }, + { + "epoch": 2.206580360545553, + "grad_norm": 0.0828971266746521, + "learning_rate": 3.451529721297481e-06, + "loss": 0.44155043363571167, + "step": 11942 + }, + { + "epoch": 2.206765137254449, + "grad_norm": 0.0719708651304245, + "learning_rate": 3.450022171512221e-06, + "loss": 0.35504668951034546, + "step": 11943 + }, + { + "epoch": 2.206949913963345, + "grad_norm": 0.10324752330780029, + "learning_rate": 3.4485148823955827e-06, + "loss": 0.4461381435394287, + "step": 11944 + }, + { + "epoch": 2.2071346906722407, + "grad_norm": 0.09716669470071793, + "learning_rate": 3.4470078540075556e-06, + "loss": 0.5391970872879028, + "step": 11945 + }, + { + "epoch": 2.2073194673811365, + "grad_norm": 0.07661837339401245, + "learning_rate": 3.445501086408114e-06, + "loss": 0.38608694076538086, + "step": 11946 + }, + { + "epoch": 2.2075042440900323, + "grad_norm": 0.08236774057149887, + "learning_rate": 3.443994579657223e-06, + "loss": 0.48675239086151123, + "step": 11947 + }, + { + "epoch": 2.207689020798928, + "grad_norm": 0.10291637480258942, + "learning_rate": 3.4424883338148364e-06, + "loss": 0.5458536148071289, + "step": 11948 + }, + { + "epoch": 2.207873797507824, + "grad_norm": 0.07039511948823929, + "learning_rate": 3.440982348940902e-06, + "loss": 0.3777443766593933, + "step": 11949 + }, + { + "epoch": 2.20805857421672, + "grad_norm": 0.08171594887971878, + "learning_rate": 3.439476625095346e-06, + "loss": 0.4108811914920807, + "step": 11950 + }, + { + "epoch": 2.2082433509256156, + "grad_norm": 0.09483866393566132, + "learning_rate": 3.4379711623380984e-06, + "loss": 0.41626477241516113, + "step": 11951 + }, + { + "epoch": 2.2084281276345115, + "grad_norm": 0.0926767885684967, + "learning_rate": 3.436465960729065e-06, + "loss": 0.37870514392852783, + "step": 11952 + }, + { + "epoch": 2.2086129043434077, + "grad_norm": 0.08968275785446167, + "learning_rate": 3.4349610203281492e-06, + "loss": 0.4915672540664673, + "step": 11953 + }, + { + "epoch": 2.2087976810523036, + "grad_norm": 0.08231180906295776, + "learning_rate": 3.4334563411952514e-06, + "loss": 0.39932894706726074, + "step": 11954 + }, + { + "epoch": 2.2089824577611994, + "grad_norm": 0.08135201781988144, + "learning_rate": 3.4319519233902443e-06, + "loss": 0.34979644417762756, + "step": 11955 + }, + { + "epoch": 2.209167234470095, + "grad_norm": 0.07436149567365646, + "learning_rate": 3.4304477669730008e-06, + "loss": 0.35989686846733093, + "step": 11956 + }, + { + "epoch": 2.209352011178991, + "grad_norm": 0.09048670530319214, + "learning_rate": 3.4289438720033875e-06, + "loss": 0.4885326027870178, + "step": 11957 + }, + { + "epoch": 2.209536787887887, + "grad_norm": 0.13973698019981384, + "learning_rate": 3.4274402385412452e-06, + "loss": 0.7130243182182312, + "step": 11958 + }, + { + "epoch": 2.2097215645967827, + "grad_norm": 0.07120823860168457, + "learning_rate": 3.425936866646419e-06, + "loss": 0.32339027523994446, + "step": 11959 + }, + { + "epoch": 2.2099063413056785, + "grad_norm": 0.08559058606624603, + "learning_rate": 3.424433756378738e-06, + "loss": 0.3834274709224701, + "step": 11960 + }, + { + "epoch": 2.2100911180145744, + "grad_norm": 0.0591914989054203, + "learning_rate": 3.422930907798021e-06, + "loss": 0.3140208125114441, + "step": 11961 + }, + { + "epoch": 2.21027589472347, + "grad_norm": 0.06373035162687302, + "learning_rate": 3.4214283209640774e-06, + "loss": 0.33546945452690125, + "step": 11962 + }, + { + "epoch": 2.210460671432366, + "grad_norm": 0.08530183136463165, + "learning_rate": 3.4199259959367084e-06, + "loss": 0.42840614914894104, + "step": 11963 + }, + { + "epoch": 2.210645448141262, + "grad_norm": 0.09318653494119644, + "learning_rate": 3.418423932775694e-06, + "loss": 0.5658191442489624, + "step": 11964 + }, + { + "epoch": 2.2108302248501577, + "grad_norm": 0.09994968771934509, + "learning_rate": 3.4169221315408163e-06, + "loss": 0.5752363204956055, + "step": 11965 + }, + { + "epoch": 2.2110150015590535, + "grad_norm": 0.10247165709733963, + "learning_rate": 3.4154205922918428e-06, + "loss": 0.44143274426460266, + "step": 11966 + }, + { + "epoch": 2.2111997782679493, + "grad_norm": 0.0819198489189148, + "learning_rate": 3.4139193150885284e-06, + "loss": 0.41120263934135437, + "step": 11967 + }, + { + "epoch": 2.211384554976845, + "grad_norm": 0.09771253913640976, + "learning_rate": 3.412418299990623e-06, + "loss": 0.47400107979774475, + "step": 11968 + }, + { + "epoch": 2.211569331685741, + "grad_norm": 0.11114433407783508, + "learning_rate": 3.410917547057857e-06, + "loss": 0.6460572481155396, + "step": 11969 + }, + { + "epoch": 2.211754108394637, + "grad_norm": 0.09645184874534607, + "learning_rate": 3.4094170563499575e-06, + "loss": 0.485895037651062, + "step": 11970 + }, + { + "epoch": 2.2119388851035326, + "grad_norm": 0.08106567710638046, + "learning_rate": 3.407916827926644e-06, + "loss": 0.37728479504585266, + "step": 11971 + }, + { + "epoch": 2.2121236618124285, + "grad_norm": 0.07832597196102142, + "learning_rate": 3.4064168618476125e-06, + "loss": 0.36626359820365906, + "step": 11972 + }, + { + "epoch": 2.2123084385213243, + "grad_norm": 0.07612678408622742, + "learning_rate": 3.4049171581725584e-06, + "loss": 0.3591512441635132, + "step": 11973 + }, + { + "epoch": 2.21249321523022, + "grad_norm": 0.09806806594133377, + "learning_rate": 3.403417716961174e-06, + "loss": 0.5178738832473755, + "step": 11974 + }, + { + "epoch": 2.212677991939116, + "grad_norm": 0.07646838575601578, + "learning_rate": 3.4019185382731233e-06, + "loss": 0.31318169832229614, + "step": 11975 + }, + { + "epoch": 2.212862768648012, + "grad_norm": 0.0919872522354126, + "learning_rate": 3.400419622168073e-06, + "loss": 0.43961310386657715, + "step": 11976 + }, + { + "epoch": 2.2130475453569076, + "grad_norm": 0.08175146579742432, + "learning_rate": 3.3989209687056767e-06, + "loss": 0.38150280714035034, + "step": 11977 + }, + { + "epoch": 2.2132323220658034, + "grad_norm": 0.0916038379073143, + "learning_rate": 3.3974225779455703e-06, + "loss": 0.4013831913471222, + "step": 11978 + }, + { + "epoch": 2.2134170987746993, + "grad_norm": 0.08610547333955765, + "learning_rate": 3.3959244499473886e-06, + "loss": 0.4272667467594147, + "step": 11979 + }, + { + "epoch": 2.213601875483595, + "grad_norm": 0.08782272040843964, + "learning_rate": 3.3944265847707525e-06, + "loss": 0.5055347084999084, + "step": 11980 + }, + { + "epoch": 2.213786652192491, + "grad_norm": 0.06641381978988647, + "learning_rate": 3.392928982475272e-06, + "loss": 0.30324873328208923, + "step": 11981 + }, + { + "epoch": 2.213971428901387, + "grad_norm": 0.11009874194860458, + "learning_rate": 3.3914316431205476e-06, + "loss": 0.6258784532546997, + "step": 11982 + }, + { + "epoch": 2.2141562056102826, + "grad_norm": 0.08467631787061691, + "learning_rate": 3.389934566766171e-06, + "loss": 0.4102543294429779, + "step": 11983 + }, + { + "epoch": 2.214340982319179, + "grad_norm": 0.06726336479187012, + "learning_rate": 3.388437753471715e-06, + "loss": 0.2789519429206848, + "step": 11984 + }, + { + "epoch": 2.2145257590280747, + "grad_norm": 0.08241001516580582, + "learning_rate": 3.3869412032967552e-06, + "loss": 0.46530020236968994, + "step": 11985 + }, + { + "epoch": 2.2147105357369705, + "grad_norm": 0.06900808215141296, + "learning_rate": 3.3854449163008395e-06, + "loss": 0.3533879816532135, + "step": 11986 + }, + { + "epoch": 2.2148953124458663, + "grad_norm": 0.08180130273103714, + "learning_rate": 3.3839488925435248e-06, + "loss": 0.4307593107223511, + "step": 11987 + }, + { + "epoch": 2.215080089154762, + "grad_norm": 0.08925405889749527, + "learning_rate": 3.382453132084349e-06, + "loss": 0.41310012340545654, + "step": 11988 + }, + { + "epoch": 2.215264865863658, + "grad_norm": 0.1149209663271904, + "learning_rate": 3.380957634982831e-06, + "loss": 0.49383172392845154, + "step": 11989 + }, + { + "epoch": 2.215449642572554, + "grad_norm": 0.08416301012039185, + "learning_rate": 3.3794624012984913e-06, + "loss": 0.4587768316268921, + "step": 11990 + }, + { + "epoch": 2.2156344192814497, + "grad_norm": 0.08828668296337128, + "learning_rate": 3.3779674310908373e-06, + "loss": 0.4280339479446411, + "step": 11991 + }, + { + "epoch": 2.2158191959903455, + "grad_norm": 0.08794192224740982, + "learning_rate": 3.3764727244193596e-06, + "loss": 0.40835249423980713, + "step": 11992 + }, + { + "epoch": 2.2160039726992413, + "grad_norm": 0.08347532898187637, + "learning_rate": 3.3749782813435415e-06, + "loss": 0.41212543845176697, + "step": 11993 + }, + { + "epoch": 2.216188749408137, + "grad_norm": 0.11080392450094223, + "learning_rate": 3.373484101922867e-06, + "loss": 0.6224543452262878, + "step": 11994 + }, + { + "epoch": 2.216373526117033, + "grad_norm": 0.06352946907281876, + "learning_rate": 3.3719901862167903e-06, + "loss": 0.31230124831199646, + "step": 11995 + }, + { + "epoch": 2.216558302825929, + "grad_norm": 0.07902231812477112, + "learning_rate": 3.3704965342847683e-06, + "loss": 0.34405481815338135, + "step": 11996 + }, + { + "epoch": 2.2167430795348246, + "grad_norm": 0.07277875393629074, + "learning_rate": 3.369003146186246e-06, + "loss": 0.2731490731239319, + "step": 11997 + }, + { + "epoch": 2.2169278562437205, + "grad_norm": 0.07820237427949905, + "learning_rate": 3.36751002198065e-06, + "loss": 0.34150072932243347, + "step": 11998 + }, + { + "epoch": 2.2171126329526163, + "grad_norm": 0.08240468800067902, + "learning_rate": 3.366017161727404e-06, + "loss": 0.42970985174179077, + "step": 11999 + }, + { + "epoch": 2.217297409661512, + "grad_norm": 0.08826398104429245, + "learning_rate": 3.3645245654859206e-06, + "loss": 0.3769395053386688, + "step": 12000 + }, + { + "epoch": 2.217297409661512, + "eval_loss": 0.5515779852867126, + "eval_runtime": 155.3367, + "eval_samples_per_second": 117.352, + "eval_steps_per_second": 14.671, + "step": 12000 + }, + { + "epoch": 2.217482186370408, + "grad_norm": 0.08796269446611404, + "learning_rate": 3.3630322333155996e-06, + "loss": 0.49599650502204895, + "step": 12001 + }, + { + "epoch": 2.2176669630793038, + "grad_norm": 0.10347124934196472, + "learning_rate": 3.3615401652758353e-06, + "loss": 0.49439701437950134, + "step": 12002 + }, + { + "epoch": 2.2178517397881996, + "grad_norm": 0.1028018519282341, + "learning_rate": 3.3600483614259983e-06, + "loss": 0.4486571252346039, + "step": 12003 + }, + { + "epoch": 2.2180365164970954, + "grad_norm": 0.10097562521696091, + "learning_rate": 3.358556821825464e-06, + "loss": 0.5498074293136597, + "step": 12004 + }, + { + "epoch": 2.2182212932059913, + "grad_norm": 0.07746328413486481, + "learning_rate": 3.357065546533592e-06, + "loss": 0.3791002929210663, + "step": 12005 + }, + { + "epoch": 2.218406069914887, + "grad_norm": 0.08741196244955063, + "learning_rate": 3.3555745356097224e-06, + "loss": 0.41606611013412476, + "step": 12006 + }, + { + "epoch": 2.218590846623783, + "grad_norm": 0.09901390224695206, + "learning_rate": 3.3540837891132027e-06, + "loss": 0.5478041768074036, + "step": 12007 + }, + { + "epoch": 2.2187756233326787, + "grad_norm": 0.0802740678191185, + "learning_rate": 3.3525933071033578e-06, + "loss": 0.3526931405067444, + "step": 12008 + }, + { + "epoch": 2.2189604000415746, + "grad_norm": 0.09909913688898087, + "learning_rate": 3.3511030896394994e-06, + "loss": 0.5463982224464417, + "step": 12009 + }, + { + "epoch": 2.2191451767504704, + "grad_norm": 0.07805711776018143, + "learning_rate": 3.349613136780936e-06, + "loss": 0.4081910252571106, + "step": 12010 + }, + { + "epoch": 2.2193299534593667, + "grad_norm": 0.07684759795665741, + "learning_rate": 3.3481234485869673e-06, + "loss": 0.4387986361980438, + "step": 12011 + }, + { + "epoch": 2.219514730168262, + "grad_norm": 0.07478204369544983, + "learning_rate": 3.3466340251168706e-06, + "loss": 0.38535648584365845, + "step": 12012 + }, + { + "epoch": 2.2196995068771583, + "grad_norm": 0.06635311245918274, + "learning_rate": 3.3451448664299203e-06, + "loss": 0.2572517395019531, + "step": 12013 + }, + { + "epoch": 2.219884283586054, + "grad_norm": 0.08421587198972702, + "learning_rate": 3.343655972585391e-06, + "loss": 0.5203030109405518, + "step": 12014 + }, + { + "epoch": 2.22006906029495, + "grad_norm": 0.10073671489953995, + "learning_rate": 3.3421673436425263e-06, + "loss": 0.5088030695915222, + "step": 12015 + }, + { + "epoch": 2.220253837003846, + "grad_norm": 0.10075697302818298, + "learning_rate": 3.340678979660573e-06, + "loss": 0.4893205463886261, + "step": 12016 + }, + { + "epoch": 2.2204386137127416, + "grad_norm": 0.07593869417905807, + "learning_rate": 3.3391908806987604e-06, + "loss": 0.3319643437862396, + "step": 12017 + }, + { + "epoch": 2.2206233904216375, + "grad_norm": 0.11687902361154556, + "learning_rate": 3.3377030468163107e-06, + "loss": 0.5747287273406982, + "step": 12018 + }, + { + "epoch": 2.2208081671305333, + "grad_norm": 0.08582670241594315, + "learning_rate": 3.3362154780724378e-06, + "loss": 0.369323194026947, + "step": 12019 + }, + { + "epoch": 2.220992943839429, + "grad_norm": 0.0650225281715393, + "learning_rate": 3.3347281745263394e-06, + "loss": 0.256856769323349, + "step": 12020 + }, + { + "epoch": 2.221177720548325, + "grad_norm": 0.08185169100761414, + "learning_rate": 3.3332411362372063e-06, + "loss": 0.4835938811302185, + "step": 12021 + }, + { + "epoch": 2.221362497257221, + "grad_norm": 0.07414474338293076, + "learning_rate": 3.3317543632642215e-06, + "loss": 0.4327687919139862, + "step": 12022 + }, + { + "epoch": 2.2215472739661166, + "grad_norm": 0.08091580122709274, + "learning_rate": 3.330267855666548e-06, + "loss": 0.428693562746048, + "step": 12023 + }, + { + "epoch": 2.2217320506750124, + "grad_norm": 0.10111634433269501, + "learning_rate": 3.3287816135033467e-06, + "loss": 0.4685934782028198, + "step": 12024 + }, + { + "epoch": 2.2219168273839083, + "grad_norm": 0.06814808398485184, + "learning_rate": 3.327295636833766e-06, + "loss": 0.3026934862136841, + "step": 12025 + }, + { + "epoch": 2.222101604092804, + "grad_norm": 0.10446780920028687, + "learning_rate": 3.325809925716943e-06, + "loss": 0.6697503924369812, + "step": 12026 + }, + { + "epoch": 2.2222863808017, + "grad_norm": 0.12640899419784546, + "learning_rate": 3.3243244802120034e-06, + "loss": 0.6471139788627625, + "step": 12027 + }, + { + "epoch": 2.2224711575105958, + "grad_norm": 0.09218905866146088, + "learning_rate": 3.322839300378068e-06, + "loss": 0.4591101109981537, + "step": 12028 + }, + { + "epoch": 2.2226559342194916, + "grad_norm": 0.11036652326583862, + "learning_rate": 3.321354386274235e-06, + "loss": 0.6440871357917786, + "step": 12029 + }, + { + "epoch": 2.2228407109283874, + "grad_norm": 0.10076822340488434, + "learning_rate": 3.3198697379596023e-06, + "loss": 0.553157389163971, + "step": 12030 + }, + { + "epoch": 2.2230254876372832, + "grad_norm": 0.08576725423336029, + "learning_rate": 3.3183853554932576e-06, + "loss": 0.45056310296058655, + "step": 12031 + }, + { + "epoch": 2.223210264346179, + "grad_norm": 0.10493794828653336, + "learning_rate": 3.316901238934268e-06, + "loss": 0.550243079662323, + "step": 12032 + }, + { + "epoch": 2.223395041055075, + "grad_norm": 0.11206745356321335, + "learning_rate": 3.3154173883417016e-06, + "loss": 0.6716387867927551, + "step": 12033 + }, + { + "epoch": 2.2235798177639707, + "grad_norm": 0.08630875498056412, + "learning_rate": 3.3139338037746083e-06, + "loss": 0.38989031314849854, + "step": 12034 + }, + { + "epoch": 2.2237645944728666, + "grad_norm": 0.1089872494339943, + "learning_rate": 3.3124504852920323e-06, + "loss": 0.5918889045715332, + "step": 12035 + }, + { + "epoch": 2.2239493711817624, + "grad_norm": 0.09236101806163788, + "learning_rate": 3.3109674329530084e-06, + "loss": 0.4525047838687897, + "step": 12036 + }, + { + "epoch": 2.224134147890658, + "grad_norm": 0.06594641506671906, + "learning_rate": 3.3094846468165497e-06, + "loss": 0.33796876668930054, + "step": 12037 + }, + { + "epoch": 2.224318924599554, + "grad_norm": 0.07526916265487671, + "learning_rate": 3.3080021269416696e-06, + "loss": 0.36459073424339294, + "step": 12038 + }, + { + "epoch": 2.22450370130845, + "grad_norm": 0.10719329863786697, + "learning_rate": 3.306519873387368e-06, + "loss": 0.5638750791549683, + "step": 12039 + }, + { + "epoch": 2.224688478017346, + "grad_norm": 0.05023134872317314, + "learning_rate": 3.3050378862126355e-06, + "loss": 0.2469005435705185, + "step": 12040 + }, + { + "epoch": 2.2248732547262415, + "grad_norm": 0.10357912629842758, + "learning_rate": 3.303556165476448e-06, + "loss": 0.49883297085762024, + "step": 12041 + }, + { + "epoch": 2.225058031435138, + "grad_norm": 0.09542550891637802, + "learning_rate": 3.302074711237778e-06, + "loss": 0.502481997013092, + "step": 12042 + }, + { + "epoch": 2.2252428081440336, + "grad_norm": 0.08671344816684723, + "learning_rate": 3.3005935235555762e-06, + "loss": 0.3964404761791229, + "step": 12043 + }, + { + "epoch": 2.2254275848529295, + "grad_norm": 0.11842737346887589, + "learning_rate": 3.299112602488793e-06, + "loss": 0.6013658046722412, + "step": 12044 + }, + { + "epoch": 2.2256123615618253, + "grad_norm": 0.0860036090016365, + "learning_rate": 3.297631948096364e-06, + "loss": 0.4228939414024353, + "step": 12045 + }, + { + "epoch": 2.225797138270721, + "grad_norm": 0.08696401119232178, + "learning_rate": 3.296151560437214e-06, + "loss": 0.47006645798683167, + "step": 12046 + }, + { + "epoch": 2.225981914979617, + "grad_norm": 0.0872708186507225, + "learning_rate": 3.2946714395702584e-06, + "loss": 0.40168893337249756, + "step": 12047 + }, + { + "epoch": 2.2261666916885128, + "grad_norm": 0.0834670215845108, + "learning_rate": 3.2931915855544038e-06, + "loss": 0.44790807366371155, + "step": 12048 + }, + { + "epoch": 2.2263514683974086, + "grad_norm": 0.07953152805566788, + "learning_rate": 3.291711998448539e-06, + "loss": 0.4631030559539795, + "step": 12049 + }, + { + "epoch": 2.2265362451063044, + "grad_norm": 0.09107434004545212, + "learning_rate": 3.2902326783115514e-06, + "loss": 0.5299221277236938, + "step": 12050 + }, + { + "epoch": 2.2267210218152003, + "grad_norm": 0.08279658854007721, + "learning_rate": 3.2887536252023087e-06, + "loss": 0.41639232635498047, + "step": 12051 + }, + { + "epoch": 2.226905798524096, + "grad_norm": 0.08936301618814468, + "learning_rate": 3.2872748391796736e-06, + "loss": 0.45119351148605347, + "step": 12052 + }, + { + "epoch": 2.227090575232992, + "grad_norm": 0.07799794524908066, + "learning_rate": 3.2857963203025e-06, + "loss": 0.39014753699302673, + "step": 12053 + }, + { + "epoch": 2.2272753519418877, + "grad_norm": 0.08831122517585754, + "learning_rate": 3.2843180686296262e-06, + "loss": 0.4471457302570343, + "step": 12054 + }, + { + "epoch": 2.2274601286507836, + "grad_norm": 0.09028404951095581, + "learning_rate": 3.282840084219883e-06, + "loss": 0.45349228382110596, + "step": 12055 + }, + { + "epoch": 2.2276449053596794, + "grad_norm": 0.08866817504167557, + "learning_rate": 3.2813623671320914e-06, + "loss": 0.43158385157585144, + "step": 12056 + }, + { + "epoch": 2.2278296820685752, + "grad_norm": 0.08073228597640991, + "learning_rate": 3.2798849174250546e-06, + "loss": 0.40182361006736755, + "step": 12057 + }, + { + "epoch": 2.228014458777471, + "grad_norm": 0.09359989315271378, + "learning_rate": 3.278407735157574e-06, + "loss": 0.4055033028125763, + "step": 12058 + }, + { + "epoch": 2.228199235486367, + "grad_norm": 0.07239627838134766, + "learning_rate": 3.2769308203884365e-06, + "loss": 0.43219462037086487, + "step": 12059 + }, + { + "epoch": 2.2283840121952627, + "grad_norm": 0.11576760560274124, + "learning_rate": 3.275454173176418e-06, + "loss": 0.5686132907867432, + "step": 12060 + }, + { + "epoch": 2.2285687889041585, + "grad_norm": 0.10355863720178604, + "learning_rate": 3.273977793580285e-06, + "loss": 0.5342440009117126, + "step": 12061 + }, + { + "epoch": 2.2287535656130544, + "grad_norm": 0.10653654485940933, + "learning_rate": 3.2725016816587973e-06, + "loss": 0.5543602705001831, + "step": 12062 + }, + { + "epoch": 2.22893834232195, + "grad_norm": 0.12046940624713898, + "learning_rate": 3.2710258374706904e-06, + "loss": 0.6986100077629089, + "step": 12063 + }, + { + "epoch": 2.229123119030846, + "grad_norm": 0.09040313959121704, + "learning_rate": 3.269550261074703e-06, + "loss": 0.37894928455352783, + "step": 12064 + }, + { + "epoch": 2.229307895739742, + "grad_norm": 0.06946759670972824, + "learning_rate": 3.26807495252956e-06, + "loss": 0.34007248282432556, + "step": 12065 + }, + { + "epoch": 2.2294926724486377, + "grad_norm": 0.07851733267307281, + "learning_rate": 3.266599911893971e-06, + "loss": 0.3887780010700226, + "step": 12066 + }, + { + "epoch": 2.2296774491575335, + "grad_norm": 0.08522792905569077, + "learning_rate": 3.2651251392266424e-06, + "loss": 0.43342339992523193, + "step": 12067 + }, + { + "epoch": 2.2298622258664293, + "grad_norm": 0.0925016775727272, + "learning_rate": 3.2636506345862595e-06, + "loss": 0.4961267113685608, + "step": 12068 + }, + { + "epoch": 2.230047002575325, + "grad_norm": 0.0975349023938179, + "learning_rate": 3.262176398031506e-06, + "loss": 0.4908147156238556, + "step": 12069 + }, + { + "epoch": 2.230231779284221, + "grad_norm": 0.09131171554327011, + "learning_rate": 3.2607024296210553e-06, + "loss": 0.4999105632305145, + "step": 12070 + }, + { + "epoch": 2.2304165559931173, + "grad_norm": 0.10944220423698425, + "learning_rate": 3.2592287294135604e-06, + "loss": 0.5443828701972961, + "step": 12071 + }, + { + "epoch": 2.230601332702013, + "grad_norm": 0.08160214871168137, + "learning_rate": 3.2577552974676718e-06, + "loss": 0.38871800899505615, + "step": 12072 + }, + { + "epoch": 2.230786109410909, + "grad_norm": 0.073506660759449, + "learning_rate": 3.2562821338420303e-06, + "loss": 0.37624433636665344, + "step": 12073 + }, + { + "epoch": 2.2309708861198048, + "grad_norm": 0.07640836387872696, + "learning_rate": 3.25480923859526e-06, + "loss": 0.3682493567466736, + "step": 12074 + }, + { + "epoch": 2.2311556628287006, + "grad_norm": 0.06404562294483185, + "learning_rate": 3.25333661178598e-06, + "loss": 0.3405209183692932, + "step": 12075 + }, + { + "epoch": 2.2313404395375964, + "grad_norm": 0.09222139418125153, + "learning_rate": 3.2518642534727985e-06, + "loss": 0.5926950573921204, + "step": 12076 + }, + { + "epoch": 2.2315252162464922, + "grad_norm": 0.0780331939458847, + "learning_rate": 3.250392163714303e-06, + "loss": 0.4412083029747009, + "step": 12077 + }, + { + "epoch": 2.231709992955388, + "grad_norm": 0.07865004241466522, + "learning_rate": 3.248920342569084e-06, + "loss": 0.43166717886924744, + "step": 12078 + }, + { + "epoch": 2.231894769664284, + "grad_norm": 0.07837144285440445, + "learning_rate": 3.247448790095713e-06, + "loss": 0.4415147304534912, + "step": 12079 + }, + { + "epoch": 2.2320795463731797, + "grad_norm": 0.07582159340381622, + "learning_rate": 3.2459775063527543e-06, + "loss": 0.4302468001842499, + "step": 12080 + }, + { + "epoch": 2.2322643230820756, + "grad_norm": 0.0823829248547554, + "learning_rate": 3.2445064913987644e-06, + "loss": 0.48967957496643066, + "step": 12081 + }, + { + "epoch": 2.2324490997909714, + "grad_norm": 0.07873893529176712, + "learning_rate": 3.243035745292277e-06, + "loss": 0.3432171642780304, + "step": 12082 + }, + { + "epoch": 2.232633876499867, + "grad_norm": 0.1042979285120964, + "learning_rate": 3.2415652680918262e-06, + "loss": 0.5159332752227783, + "step": 12083 + }, + { + "epoch": 2.232818653208763, + "grad_norm": 0.08104976266622543, + "learning_rate": 3.240095059855938e-06, + "loss": 0.42197322845458984, + "step": 12084 + }, + { + "epoch": 2.233003429917659, + "grad_norm": 0.09248243272304535, + "learning_rate": 3.238625120643111e-06, + "loss": 0.526531994342804, + "step": 12085 + }, + { + "epoch": 2.2331882066265547, + "grad_norm": 0.0943179726600647, + "learning_rate": 3.237155450511852e-06, + "loss": 0.5523107647895813, + "step": 12086 + }, + { + "epoch": 2.2333729833354505, + "grad_norm": 0.08297137916088104, + "learning_rate": 3.235686049520652e-06, + "loss": 0.4356424808502197, + "step": 12087 + }, + { + "epoch": 2.2335577600443464, + "grad_norm": 0.06990028917789459, + "learning_rate": 3.2342169177279826e-06, + "loss": 0.3431797921657562, + "step": 12088 + }, + { + "epoch": 2.233742536753242, + "grad_norm": 0.09813868999481201, + "learning_rate": 3.2327480551923107e-06, + "loss": 0.4996069371700287, + "step": 12089 + }, + { + "epoch": 2.233927313462138, + "grad_norm": 0.08070428669452667, + "learning_rate": 3.2312794619720976e-06, + "loss": 0.3879484236240387, + "step": 12090 + }, + { + "epoch": 2.234112090171034, + "grad_norm": 0.0751897394657135, + "learning_rate": 3.229811138125782e-06, + "loss": 0.4100872576236725, + "step": 12091 + }, + { + "epoch": 2.2342968668799297, + "grad_norm": 0.07286939024925232, + "learning_rate": 3.2283430837118035e-06, + "loss": 0.32202818989753723, + "step": 12092 + }, + { + "epoch": 2.2344816435888255, + "grad_norm": 0.08649271726608276, + "learning_rate": 3.2268752987885834e-06, + "loss": 0.5581140518188477, + "step": 12093 + }, + { + "epoch": 2.2346664202977213, + "grad_norm": 0.09588231891393661, + "learning_rate": 3.225407783414536e-06, + "loss": 0.4919707775115967, + "step": 12094 + }, + { + "epoch": 2.234851197006617, + "grad_norm": 0.10235357284545898, + "learning_rate": 3.2239405376480638e-06, + "loss": 0.4288097620010376, + "step": 12095 + }, + { + "epoch": 2.235035973715513, + "grad_norm": 0.07489422708749771, + "learning_rate": 3.2224735615475612e-06, + "loss": 0.3515622913837433, + "step": 12096 + }, + { + "epoch": 2.235220750424409, + "grad_norm": 0.08253826200962067, + "learning_rate": 3.2210068551714045e-06, + "loss": 0.4348796010017395, + "step": 12097 + }, + { + "epoch": 2.2354055271333046, + "grad_norm": 0.07540115714073181, + "learning_rate": 3.2195404185779654e-06, + "loss": 0.3401135504245758, + "step": 12098 + }, + { + "epoch": 2.2355903038422005, + "grad_norm": 0.06745466589927673, + "learning_rate": 3.2180742518256047e-06, + "loss": 0.3664204180240631, + "step": 12099 + }, + { + "epoch": 2.2357750805510967, + "grad_norm": 0.07638771086931229, + "learning_rate": 3.216608354972671e-06, + "loss": 0.43941521644592285, + "step": 12100 + }, + { + "epoch": 2.2359598572599926, + "grad_norm": 0.08894451707601547, + "learning_rate": 3.215142728077505e-06, + "loss": 0.46996885538101196, + "step": 12101 + }, + { + "epoch": 2.2361446339688884, + "grad_norm": 0.07854200154542923, + "learning_rate": 3.2136773711984293e-06, + "loss": 0.36215487122535706, + "step": 12102 + }, + { + "epoch": 2.2363294106777842, + "grad_norm": 0.09711235761642456, + "learning_rate": 3.212212284393761e-06, + "loss": 0.41373053193092346, + "step": 12103 + }, + { + "epoch": 2.23651418738668, + "grad_norm": 0.07770132273435593, + "learning_rate": 3.210747467721812e-06, + "loss": 0.3723510503768921, + "step": 12104 + }, + { + "epoch": 2.236698964095576, + "grad_norm": 0.09046914428472519, + "learning_rate": 3.2092829212408662e-06, + "loss": 0.4817093312740326, + "step": 12105 + }, + { + "epoch": 2.2368837408044717, + "grad_norm": 0.060229867696762085, + "learning_rate": 3.2078186450092176e-06, + "loss": 0.23560619354248047, + "step": 12106 + }, + { + "epoch": 2.2370685175133675, + "grad_norm": 0.09669230878353119, + "learning_rate": 3.2063546390851397e-06, + "loss": 0.4499640464782715, + "step": 12107 + }, + { + "epoch": 2.2372532942222634, + "grad_norm": 0.08333880454301834, + "learning_rate": 3.2048909035268906e-06, + "loss": 0.4146597385406494, + "step": 12108 + }, + { + "epoch": 2.237438070931159, + "grad_norm": 0.0770716592669487, + "learning_rate": 3.2034274383927233e-06, + "loss": 0.39594510197639465, + "step": 12109 + }, + { + "epoch": 2.237622847640055, + "grad_norm": 0.07659077644348145, + "learning_rate": 3.2019642437408836e-06, + "loss": 0.32030820846557617, + "step": 12110 + }, + { + "epoch": 2.237807624348951, + "grad_norm": 0.09327710419893265, + "learning_rate": 3.2005013196295953e-06, + "loss": 0.47533729672431946, + "step": 12111 + }, + { + "epoch": 2.2379924010578467, + "grad_norm": 0.08465027809143066, + "learning_rate": 3.1990386661170825e-06, + "loss": 0.40699127316474915, + "step": 12112 + }, + { + "epoch": 2.2381771777667425, + "grad_norm": 0.0673907920718193, + "learning_rate": 3.197576283261553e-06, + "loss": 0.30016353726387024, + "step": 12113 + }, + { + "epoch": 2.2383619544756383, + "grad_norm": 0.08417432755231857, + "learning_rate": 3.196114171121205e-06, + "loss": 0.42807674407958984, + "step": 12114 + }, + { + "epoch": 2.238546731184534, + "grad_norm": 0.08571884036064148, + "learning_rate": 3.1946523297542298e-06, + "loss": 0.4084596633911133, + "step": 12115 + }, + { + "epoch": 2.23873150789343, + "grad_norm": 0.068547323346138, + "learning_rate": 3.1931907592187973e-06, + "loss": 0.2529278099536896, + "step": 12116 + }, + { + "epoch": 2.238916284602326, + "grad_norm": 0.06548000872135162, + "learning_rate": 3.1917294595730763e-06, + "loss": 0.27962929010391235, + "step": 12117 + }, + { + "epoch": 2.2391010613112217, + "grad_norm": 0.10712318867444992, + "learning_rate": 3.190268430875223e-06, + "loss": 0.48425424098968506, + "step": 12118 + }, + { + "epoch": 2.2392858380201175, + "grad_norm": 0.10012485086917877, + "learning_rate": 3.188807673183382e-06, + "loss": 0.4494583308696747, + "step": 12119 + }, + { + "epoch": 2.2394706147290133, + "grad_norm": 0.09713777899742126, + "learning_rate": 3.1873471865556848e-06, + "loss": 0.544177770614624, + "step": 12120 + }, + { + "epoch": 2.239655391437909, + "grad_norm": 0.09713928401470184, + "learning_rate": 3.1858869710502593e-06, + "loss": 0.5080793499946594, + "step": 12121 + }, + { + "epoch": 2.239840168146805, + "grad_norm": 0.10769485682249069, + "learning_rate": 3.184427026725211e-06, + "loss": 0.5964470505714417, + "step": 12122 + }, + { + "epoch": 2.240024944855701, + "grad_norm": 0.08020690083503723, + "learning_rate": 3.182967353638643e-06, + "loss": 0.3854624032974243, + "step": 12123 + }, + { + "epoch": 2.2402097215645966, + "grad_norm": 0.10620839148759842, + "learning_rate": 3.1815079518486505e-06, + "loss": 0.6212494373321533, + "step": 12124 + }, + { + "epoch": 2.2403944982734925, + "grad_norm": 0.06393173336982727, + "learning_rate": 3.1800488214133017e-06, + "loss": 0.31893590092658997, + "step": 12125 + }, + { + "epoch": 2.2405792749823883, + "grad_norm": 0.08972685784101486, + "learning_rate": 3.1785899623906767e-06, + "loss": 0.397165983915329, + "step": 12126 + }, + { + "epoch": 2.240764051691284, + "grad_norm": 0.0904398113489151, + "learning_rate": 3.1771313748388334e-06, + "loss": 0.4341369867324829, + "step": 12127 + }, + { + "epoch": 2.24094882840018, + "grad_norm": 0.06866602599620819, + "learning_rate": 3.1756730588158124e-06, + "loss": 0.2974797487258911, + "step": 12128 + }, + { + "epoch": 2.241133605109076, + "grad_norm": 0.10474536567926407, + "learning_rate": 3.1742150143796525e-06, + "loss": 0.5112119317054749, + "step": 12129 + }, + { + "epoch": 2.241318381817972, + "grad_norm": 0.08430393785238266, + "learning_rate": 3.1727572415883835e-06, + "loss": 0.4515208899974823, + "step": 12130 + }, + { + "epoch": 2.241503158526868, + "grad_norm": 0.08922908455133438, + "learning_rate": 3.1712997405000124e-06, + "loss": 0.4198884963989258, + "step": 12131 + }, + { + "epoch": 2.2416879352357637, + "grad_norm": 0.0893121287226677, + "learning_rate": 3.1698425111725485e-06, + "loss": 0.45976704359054565, + "step": 12132 + }, + { + "epoch": 2.2418727119446595, + "grad_norm": 0.07319130748510361, + "learning_rate": 3.168385553663983e-06, + "loss": 0.4156619608402252, + "step": 12133 + }, + { + "epoch": 2.2420574886535554, + "grad_norm": 0.09958360344171524, + "learning_rate": 3.1669288680322997e-06, + "loss": 0.6084612011909485, + "step": 12134 + }, + { + "epoch": 2.242242265362451, + "grad_norm": 0.08181202411651611, + "learning_rate": 3.165472454335472e-06, + "loss": 0.4880278408527374, + "step": 12135 + }, + { + "epoch": 2.242427042071347, + "grad_norm": 0.07543037086725235, + "learning_rate": 3.164016312631456e-06, + "loss": 0.43885689973831177, + "step": 12136 + }, + { + "epoch": 2.242611818780243, + "grad_norm": 0.08452077209949493, + "learning_rate": 3.162560442978203e-06, + "loss": 0.37461787462234497, + "step": 12137 + }, + { + "epoch": 2.2427965954891387, + "grad_norm": 0.08403962850570679, + "learning_rate": 3.1611048454336523e-06, + "loss": 0.3464776873588562, + "step": 12138 + }, + { + "epoch": 2.2429813721980345, + "grad_norm": 0.06161332130432129, + "learning_rate": 3.159649520055733e-06, + "loss": 0.3631473779678345, + "step": 12139 + }, + { + "epoch": 2.2431661489069303, + "grad_norm": 0.06809493154287338, + "learning_rate": 3.158194466902362e-06, + "loss": 0.3071410059928894, + "step": 12140 + }, + { + "epoch": 2.243350925615826, + "grad_norm": 0.08933389186859131, + "learning_rate": 3.1567396860314503e-06, + "loss": 0.4868786633014679, + "step": 12141 + }, + { + "epoch": 2.243535702324722, + "grad_norm": 0.12151765823364258, + "learning_rate": 3.1552851775008853e-06, + "loss": 0.6342805027961731, + "step": 12142 + }, + { + "epoch": 2.243720479033618, + "grad_norm": 0.0988689661026001, + "learning_rate": 3.153830941368555e-06, + "loss": 0.4726806879043579, + "step": 12143 + }, + { + "epoch": 2.2439052557425136, + "grad_norm": 0.0977170541882515, + "learning_rate": 3.1523769776923384e-06, + "loss": 0.46017003059387207, + "step": 12144 + }, + { + "epoch": 2.2440900324514095, + "grad_norm": 0.07871796190738678, + "learning_rate": 3.1509232865300886e-06, + "loss": 0.343954861164093, + "step": 12145 + }, + { + "epoch": 2.2442748091603053, + "grad_norm": 0.09883001446723938, + "learning_rate": 3.1494698679396697e-06, + "loss": 0.7568049430847168, + "step": 12146 + }, + { + "epoch": 2.244459585869201, + "grad_norm": 0.08142291754484177, + "learning_rate": 3.1480167219789136e-06, + "loss": 0.4528813362121582, + "step": 12147 + }, + { + "epoch": 2.244644362578097, + "grad_norm": 0.07283273339271545, + "learning_rate": 3.146563848705656e-06, + "loss": 0.3055526316165924, + "step": 12148 + }, + { + "epoch": 2.244829139286993, + "grad_norm": 0.10215907543897629, + "learning_rate": 3.1451112481777193e-06, + "loss": 0.5181620121002197, + "step": 12149 + }, + { + "epoch": 2.2450139159958886, + "grad_norm": 0.0920044481754303, + "learning_rate": 3.1436589204529044e-06, + "loss": 0.5287151336669922, + "step": 12150 + }, + { + "epoch": 2.2451986927047844, + "grad_norm": 0.1108456403017044, + "learning_rate": 3.1422068655890136e-06, + "loss": 0.5236777663230896, + "step": 12151 + }, + { + "epoch": 2.2453834694136803, + "grad_norm": 0.08828677237033844, + "learning_rate": 3.140755083643835e-06, + "loss": 0.49629533290863037, + "step": 12152 + }, + { + "epoch": 2.245568246122576, + "grad_norm": 0.08154132962226868, + "learning_rate": 3.1393035746751443e-06, + "loss": 0.41256043314933777, + "step": 12153 + }, + { + "epoch": 2.245753022831472, + "grad_norm": 0.06214584410190582, + "learning_rate": 3.1378523387407068e-06, + "loss": 0.3293093144893646, + "step": 12154 + }, + { + "epoch": 2.2459377995403678, + "grad_norm": 0.0842163935303688, + "learning_rate": 3.1364013758982803e-06, + "loss": 0.4421621859073639, + "step": 12155 + }, + { + "epoch": 2.2461225762492636, + "grad_norm": 0.08379825204610825, + "learning_rate": 3.134950686205602e-06, + "loss": 0.41820988059043884, + "step": 12156 + }, + { + "epoch": 2.2463073529581594, + "grad_norm": 0.11791109293699265, + "learning_rate": 3.1335002697204085e-06, + "loss": 0.8276405930519104, + "step": 12157 + }, + { + "epoch": 2.2464921296670557, + "grad_norm": 0.07951841503381729, + "learning_rate": 3.132050126500422e-06, + "loss": 0.3736032545566559, + "step": 12158 + }, + { + "epoch": 2.246676906375951, + "grad_norm": 0.0968988686800003, + "learning_rate": 3.1306002566033545e-06, + "loss": 0.40144771337509155, + "step": 12159 + }, + { + "epoch": 2.2468616830848473, + "grad_norm": 0.12055794149637222, + "learning_rate": 3.1291506600869037e-06, + "loss": 0.5013450384140015, + "step": 12160 + }, + { + "epoch": 2.247046459793743, + "grad_norm": 0.08677809685468674, + "learning_rate": 3.127701337008764e-06, + "loss": 0.41387808322906494, + "step": 12161 + }, + { + "epoch": 2.247231236502639, + "grad_norm": 0.07982303202152252, + "learning_rate": 3.1262522874266076e-06, + "loss": 0.40415987372398376, + "step": 12162 + }, + { + "epoch": 2.247416013211535, + "grad_norm": 0.10855205357074738, + "learning_rate": 3.124803511398108e-06, + "loss": 0.506013810634613, + "step": 12163 + }, + { + "epoch": 2.2476007899204307, + "grad_norm": 0.0775640606880188, + "learning_rate": 3.123355008980916e-06, + "loss": 0.41776102781295776, + "step": 12164 + }, + { + "epoch": 2.2477855666293265, + "grad_norm": 0.09691471606492996, + "learning_rate": 3.1219067802326763e-06, + "loss": 0.6090372800827026, + "step": 12165 + }, + { + "epoch": 2.2479703433382223, + "grad_norm": 0.08877398073673248, + "learning_rate": 3.1204588252110358e-06, + "loss": 0.49977755546569824, + "step": 12166 + }, + { + "epoch": 2.248155120047118, + "grad_norm": 0.11311852186918259, + "learning_rate": 3.119011143973606e-06, + "loss": 0.6844763159751892, + "step": 12167 + }, + { + "epoch": 2.248339896756014, + "grad_norm": 0.09220191091299057, + "learning_rate": 3.1175637365780053e-06, + "loss": 0.36833304166793823, + "step": 12168 + }, + { + "epoch": 2.24852467346491, + "grad_norm": 0.10116321593523026, + "learning_rate": 3.116116603081839e-06, + "loss": 0.5867173671722412, + "step": 12169 + }, + { + "epoch": 2.2487094501738056, + "grad_norm": 0.06711015850305557, + "learning_rate": 3.114669743542692e-06, + "loss": 0.3416115343570709, + "step": 12170 + }, + { + "epoch": 2.2488942268827015, + "grad_norm": 0.08700167387723923, + "learning_rate": 3.113223158018148e-06, + "loss": 0.41278156638145447, + "step": 12171 + }, + { + "epoch": 2.2490790035915973, + "grad_norm": 0.07968169450759888, + "learning_rate": 3.111776846565776e-06, + "loss": 0.35030606389045715, + "step": 12172 + }, + { + "epoch": 2.249263780300493, + "grad_norm": 0.07849443703889847, + "learning_rate": 3.110330809243134e-06, + "loss": 0.3592144250869751, + "step": 12173 + }, + { + "epoch": 2.249448557009389, + "grad_norm": 0.0856466069817543, + "learning_rate": 3.1088850461077724e-06, + "loss": 0.4331666827201843, + "step": 12174 + }, + { + "epoch": 2.2496333337182848, + "grad_norm": 0.08074059337377548, + "learning_rate": 3.1074395572172287e-06, + "loss": 0.4187830984592438, + "step": 12175 + }, + { + "epoch": 2.2498181104271806, + "grad_norm": 0.08852949738502502, + "learning_rate": 3.1059943426290228e-06, + "loss": 0.3407609760761261, + "step": 12176 + }, + { + "epoch": 2.2500028871360764, + "grad_norm": 0.08934972435235977, + "learning_rate": 3.104549402400675e-06, + "loss": 0.4727935791015625, + "step": 12177 + }, + { + "epoch": 2.2501876638449723, + "grad_norm": 0.0799507200717926, + "learning_rate": 3.103104736589687e-06, + "loss": 0.45224735140800476, + "step": 12178 + }, + { + "epoch": 2.250372440553868, + "grad_norm": 0.08885331451892853, + "learning_rate": 3.1016603452535533e-06, + "loss": 0.4831817150115967, + "step": 12179 + }, + { + "epoch": 2.250557217262764, + "grad_norm": 0.08326054364442825, + "learning_rate": 3.1002162284497584e-06, + "loss": 0.42966553568840027, + "step": 12180 + }, + { + "epoch": 2.2507419939716597, + "grad_norm": 0.08480405062437057, + "learning_rate": 3.0987723862357677e-06, + "loss": 0.33884021639823914, + "step": 12181 + }, + { + "epoch": 2.2509267706805556, + "grad_norm": 0.06686267256736755, + "learning_rate": 3.097328818669045e-06, + "loss": 0.3047025799751282, + "step": 12182 + }, + { + "epoch": 2.2511115473894514, + "grad_norm": 0.10958808660507202, + "learning_rate": 3.095885525807043e-06, + "loss": 0.5943275094032288, + "step": 12183 + }, + { + "epoch": 2.2512963240983472, + "grad_norm": 0.07362347841262817, + "learning_rate": 3.094442507707194e-06, + "loss": 0.3816607594490051, + "step": 12184 + }, + { + "epoch": 2.251481100807243, + "grad_norm": 0.08515738695859909, + "learning_rate": 3.092999764426925e-06, + "loss": 0.4283255636692047, + "step": 12185 + }, + { + "epoch": 2.251665877516139, + "grad_norm": 0.08227543532848358, + "learning_rate": 3.0915572960236617e-06, + "loss": 0.3923949599266052, + "step": 12186 + }, + { + "epoch": 2.251850654225035, + "grad_norm": 0.07537674158811569, + "learning_rate": 3.0901151025548026e-06, + "loss": 0.3772265911102295, + "step": 12187 + }, + { + "epoch": 2.2520354309339305, + "grad_norm": 0.08898736536502838, + "learning_rate": 3.0886731840777427e-06, + "loss": 0.42761051654815674, + "step": 12188 + }, + { + "epoch": 2.252220207642827, + "grad_norm": 0.06679991632699966, + "learning_rate": 3.087231540649872e-06, + "loss": 0.31185150146484375, + "step": 12189 + }, + { + "epoch": 2.2524049843517226, + "grad_norm": 0.09417211264371872, + "learning_rate": 3.0857901723285544e-06, + "loss": 0.5708805918693542, + "step": 12190 + }, + { + "epoch": 2.2525897610606185, + "grad_norm": 0.07637672126293182, + "learning_rate": 3.0843490791711562e-06, + "loss": 0.34745824337005615, + "step": 12191 + }, + { + "epoch": 2.2527745377695143, + "grad_norm": 0.0839376151561737, + "learning_rate": 3.082908261235029e-06, + "loss": 0.4435441195964813, + "step": 12192 + }, + { + "epoch": 2.25295931447841, + "grad_norm": 0.10060539841651917, + "learning_rate": 3.081467718577512e-06, + "loss": 0.6545190811157227, + "step": 12193 + }, + { + "epoch": 2.253144091187306, + "grad_norm": 0.07129131257534027, + "learning_rate": 3.0800274512559334e-06, + "loss": 0.414524108171463, + "step": 12194 + }, + { + "epoch": 2.253328867896202, + "grad_norm": 0.10510852932929993, + "learning_rate": 3.0785874593276167e-06, + "loss": 0.6114096641540527, + "step": 12195 + }, + { + "epoch": 2.2535136446050976, + "grad_norm": 0.08640439063310623, + "learning_rate": 3.077147742849862e-06, + "loss": 0.41606390476226807, + "step": 12196 + }, + { + "epoch": 2.2536984213139934, + "grad_norm": 0.09171494096517563, + "learning_rate": 3.0757083018799673e-06, + "loss": 0.51341313123703, + "step": 12197 + }, + { + "epoch": 2.2538831980228893, + "grad_norm": 0.08453557640314102, + "learning_rate": 3.0742691364752196e-06, + "loss": 0.43702369928359985, + "step": 12198 + }, + { + "epoch": 2.254067974731785, + "grad_norm": 0.0747491717338562, + "learning_rate": 3.0728302466928914e-06, + "loss": 0.37024620175361633, + "step": 12199 + }, + { + "epoch": 2.254252751440681, + "grad_norm": 0.0880352258682251, + "learning_rate": 3.0713916325902516e-06, + "loss": 0.44032952189445496, + "step": 12200 + }, + { + "epoch": 2.2544375281495768, + "grad_norm": 0.08446033298969269, + "learning_rate": 3.0699532942245446e-06, + "loss": 0.5010290741920471, + "step": 12201 + }, + { + "epoch": 2.2546223048584726, + "grad_norm": 0.07818498462438583, + "learning_rate": 3.0685152316530143e-06, + "loss": 0.44643253087997437, + "step": 12202 + }, + { + "epoch": 2.2548070815673684, + "grad_norm": 0.0621052086353302, + "learning_rate": 3.0670774449328956e-06, + "loss": 0.34929224848747253, + "step": 12203 + }, + { + "epoch": 2.2549918582762642, + "grad_norm": 0.09159861505031586, + "learning_rate": 3.0656399341214016e-06, + "loss": 0.49612244963645935, + "step": 12204 + }, + { + "epoch": 2.25517663498516, + "grad_norm": 0.07836683839559555, + "learning_rate": 3.064202699275739e-06, + "loss": 0.47169002890586853, + "step": 12205 + }, + { + "epoch": 2.255361411694056, + "grad_norm": 0.08987939357757568, + "learning_rate": 3.0627657404531164e-06, + "loss": 0.4715505838394165, + "step": 12206 + }, + { + "epoch": 2.2555461884029517, + "grad_norm": 0.08097187429666519, + "learning_rate": 3.061329057710711e-06, + "loss": 0.3911837637424469, + "step": 12207 + }, + { + "epoch": 2.2557309651118476, + "grad_norm": 0.0881069153547287, + "learning_rate": 3.0598926511057002e-06, + "loss": 0.4455852806568146, + "step": 12208 + }, + { + "epoch": 2.2559157418207434, + "grad_norm": 0.10333002358675003, + "learning_rate": 3.0584565206952534e-06, + "loss": 0.4860212802886963, + "step": 12209 + }, + { + "epoch": 2.256100518529639, + "grad_norm": 0.08561402559280396, + "learning_rate": 3.0570206665365152e-06, + "loss": 0.45404475927352905, + "step": 12210 + }, + { + "epoch": 2.256285295238535, + "grad_norm": 0.06861907243728638, + "learning_rate": 3.0555850886866334e-06, + "loss": 0.33333149552345276, + "step": 12211 + }, + { + "epoch": 2.256470071947431, + "grad_norm": 0.11791132390499115, + "learning_rate": 3.054149787202738e-06, + "loss": 0.5899015069007874, + "step": 12212 + }, + { + "epoch": 2.2566548486563267, + "grad_norm": 0.08489428460597992, + "learning_rate": 3.0527147621419504e-06, + "loss": 0.4829174280166626, + "step": 12213 + }, + { + "epoch": 2.2568396253652225, + "grad_norm": 0.08123718947172165, + "learning_rate": 3.051280013561384e-06, + "loss": 0.4060458838939667, + "step": 12214 + }, + { + "epoch": 2.2570244020741184, + "grad_norm": 0.08335306495428085, + "learning_rate": 3.0498455415181296e-06, + "loss": 0.3981687128543854, + "step": 12215 + }, + { + "epoch": 2.2572091787830146, + "grad_norm": 0.08260929584503174, + "learning_rate": 3.0484113460692786e-06, + "loss": 0.4201548993587494, + "step": 12216 + }, + { + "epoch": 2.25739395549191, + "grad_norm": 0.08963587880134583, + "learning_rate": 3.0469774272719075e-06, + "loss": 0.4701205790042877, + "step": 12217 + }, + { + "epoch": 2.2575787322008063, + "grad_norm": 0.09164369851350784, + "learning_rate": 3.0455437851830805e-06, + "loss": 0.4588594436645508, + "step": 12218 + }, + { + "epoch": 2.257763508909702, + "grad_norm": 0.07610947638750076, + "learning_rate": 3.044110419859855e-06, + "loss": 0.346407026052475, + "step": 12219 + }, + { + "epoch": 2.257948285618598, + "grad_norm": 0.0824890211224556, + "learning_rate": 3.042677331359274e-06, + "loss": 0.4116358160972595, + "step": 12220 + }, + { + "epoch": 2.258133062327494, + "grad_norm": 0.08806995302438736, + "learning_rate": 3.0412445197383667e-06, + "loss": 0.4943826198577881, + "step": 12221 + }, + { + "epoch": 2.2583178390363896, + "grad_norm": 0.10130472481250763, + "learning_rate": 3.0398119850541553e-06, + "loss": 0.5649289488792419, + "step": 12222 + }, + { + "epoch": 2.2585026157452854, + "grad_norm": 0.08071459829807281, + "learning_rate": 3.0383797273636552e-06, + "loss": 0.5119844675064087, + "step": 12223 + }, + { + "epoch": 2.2586873924541813, + "grad_norm": 0.08857184648513794, + "learning_rate": 3.0369477467238586e-06, + "loss": 0.3872298300266266, + "step": 12224 + }, + { + "epoch": 2.258872169163077, + "grad_norm": 0.0861668512225151, + "learning_rate": 3.035516043191753e-06, + "loss": 0.4301668405532837, + "step": 12225 + }, + { + "epoch": 2.259056945871973, + "grad_norm": 0.10000453144311905, + "learning_rate": 3.0340846168243265e-06, + "loss": 0.45657336711883545, + "step": 12226 + }, + { + "epoch": 2.2592417225808687, + "grad_norm": 0.0979422852396965, + "learning_rate": 3.0326534676785357e-06, + "loss": 0.4080390930175781, + "step": 12227 + }, + { + "epoch": 2.2594264992897646, + "grad_norm": 0.0736035481095314, + "learning_rate": 3.031222595811343e-06, + "loss": 0.3957144320011139, + "step": 12228 + }, + { + "epoch": 2.2596112759986604, + "grad_norm": 0.09222523123025894, + "learning_rate": 3.0297920012796842e-06, + "loss": 0.46320977807044983, + "step": 12229 + }, + { + "epoch": 2.2597960527075562, + "grad_norm": 0.09962626546621323, + "learning_rate": 3.0283616841404974e-06, + "loss": 0.47465965151786804, + "step": 12230 + }, + { + "epoch": 2.259980829416452, + "grad_norm": 0.07642363756895065, + "learning_rate": 3.0269316444507035e-06, + "loss": 0.4165627360343933, + "step": 12231 + }, + { + "epoch": 2.260165606125348, + "grad_norm": 0.08377820253372192, + "learning_rate": 3.0255018822672143e-06, + "loss": 0.4770684242248535, + "step": 12232 + }, + { + "epoch": 2.2603503828342437, + "grad_norm": 0.08431167155504227, + "learning_rate": 3.02407239764693e-06, + "loss": 0.4955669641494751, + "step": 12233 + }, + { + "epoch": 2.2605351595431395, + "grad_norm": 0.07196210324764252, + "learning_rate": 3.0226431906467425e-06, + "loss": 0.4227936863899231, + "step": 12234 + }, + { + "epoch": 2.2607199362520354, + "grad_norm": 0.08174673467874527, + "learning_rate": 3.021214261323524e-06, + "loss": 0.4703042507171631, + "step": 12235 + }, + { + "epoch": 2.260904712960931, + "grad_norm": 0.08144989609718323, + "learning_rate": 3.019785609734144e-06, + "loss": 0.3058887720108032, + "step": 12236 + }, + { + "epoch": 2.261089489669827, + "grad_norm": 0.09629102796316147, + "learning_rate": 3.0183572359354574e-06, + "loss": 0.5303314328193665, + "step": 12237 + }, + { + "epoch": 2.261274266378723, + "grad_norm": 0.08101619780063629, + "learning_rate": 3.0169291399843105e-06, + "loss": 0.41484183073043823, + "step": 12238 + }, + { + "epoch": 2.2614590430876187, + "grad_norm": 0.09562167525291443, + "learning_rate": 3.0155013219375374e-06, + "loss": 0.5148574709892273, + "step": 12239 + }, + { + "epoch": 2.2616438197965145, + "grad_norm": 0.08201883733272552, + "learning_rate": 3.0140737818519616e-06, + "loss": 0.41835635900497437, + "step": 12240 + }, + { + "epoch": 2.2618285965054103, + "grad_norm": 0.07495265454053879, + "learning_rate": 3.012646519784391e-06, + "loss": 0.36787310242652893, + "step": 12241 + }, + { + "epoch": 2.262013373214306, + "grad_norm": 0.08998357504606247, + "learning_rate": 3.0112195357916284e-06, + "loss": 0.4299069941043854, + "step": 12242 + }, + { + "epoch": 2.262198149923202, + "grad_norm": 0.08980880677700043, + "learning_rate": 3.0097928299304666e-06, + "loss": 0.3831421136856079, + "step": 12243 + }, + { + "epoch": 2.262382926632098, + "grad_norm": 0.09747115522623062, + "learning_rate": 3.0083664022576773e-06, + "loss": 0.4732903242111206, + "step": 12244 + }, + { + "epoch": 2.262567703340994, + "grad_norm": 0.07659371942281723, + "learning_rate": 3.0069402528300307e-06, + "loss": 0.3989792466163635, + "step": 12245 + }, + { + "epoch": 2.2627524800498895, + "grad_norm": 0.08523440361022949, + "learning_rate": 3.0055143817042844e-06, + "loss": 0.3412840962409973, + "step": 12246 + }, + { + "epoch": 2.2629372567587858, + "grad_norm": 0.08923565596342087, + "learning_rate": 3.0040887889371816e-06, + "loss": 0.35347479581832886, + "step": 12247 + }, + { + "epoch": 2.263122033467681, + "grad_norm": 0.10447020083665848, + "learning_rate": 3.002663474585461e-06, + "loss": 0.5423638224601746, + "step": 12248 + }, + { + "epoch": 2.2633068101765774, + "grad_norm": 0.06621216982603073, + "learning_rate": 3.001238438705839e-06, + "loss": 0.362056165933609, + "step": 12249 + }, + { + "epoch": 2.2634915868854733, + "grad_norm": 0.05140472948551178, + "learning_rate": 2.9998136813550318e-06, + "loss": 0.21704205870628357, + "step": 12250 + }, + { + "epoch": 2.263676363594369, + "grad_norm": 0.07546143233776093, + "learning_rate": 2.9983892025897386e-06, + "loss": 0.48610544204711914, + "step": 12251 + }, + { + "epoch": 2.263861140303265, + "grad_norm": 0.1048959270119667, + "learning_rate": 2.9969650024666497e-06, + "loss": 0.6081486940383911, + "step": 12252 + }, + { + "epoch": 2.2640459170121607, + "grad_norm": 0.061856064945459366, + "learning_rate": 2.9955410810424446e-06, + "loss": 0.29667016863822937, + "step": 12253 + }, + { + "epoch": 2.2642306937210566, + "grad_norm": 0.07674991339445114, + "learning_rate": 2.9941174383737937e-06, + "loss": 0.3966175615787506, + "step": 12254 + }, + { + "epoch": 2.2644154704299524, + "grad_norm": 0.10296843945980072, + "learning_rate": 2.992694074517346e-06, + "loss": 0.46330904960632324, + "step": 12255 + }, + { + "epoch": 2.264600247138848, + "grad_norm": 0.09111112356185913, + "learning_rate": 2.991270989529752e-06, + "loss": 0.4320758283138275, + "step": 12256 + }, + { + "epoch": 2.264785023847744, + "grad_norm": 0.10260162502527237, + "learning_rate": 2.9898481834676453e-06, + "loss": 0.5434527397155762, + "step": 12257 + }, + { + "epoch": 2.26496980055664, + "grad_norm": 0.08933594822883606, + "learning_rate": 2.988425656387648e-06, + "loss": 0.49509263038635254, + "step": 12258 + }, + { + "epoch": 2.2651545772655357, + "grad_norm": 0.07403186708688736, + "learning_rate": 2.987003408346374e-06, + "loss": 0.3863668441772461, + "step": 12259 + }, + { + "epoch": 2.2653393539744315, + "grad_norm": 0.07996796071529388, + "learning_rate": 2.9855814394004255e-06, + "loss": 0.32834455370903015, + "step": 12260 + }, + { + "epoch": 2.2655241306833274, + "grad_norm": 0.09327889233827591, + "learning_rate": 2.984159749606388e-06, + "loss": 0.4397789239883423, + "step": 12261 + }, + { + "epoch": 2.265708907392223, + "grad_norm": 0.08188837766647339, + "learning_rate": 2.9827383390208464e-06, + "loss": 0.3994868993759155, + "step": 12262 + }, + { + "epoch": 2.265893684101119, + "grad_norm": 0.09514510631561279, + "learning_rate": 2.9813172077003603e-06, + "loss": 0.5370783805847168, + "step": 12263 + }, + { + "epoch": 2.266078460810015, + "grad_norm": 0.08097981661558151, + "learning_rate": 2.9798963557014895e-06, + "loss": 0.36683669686317444, + "step": 12264 + }, + { + "epoch": 2.2662632375189107, + "grad_norm": 0.08686467260122299, + "learning_rate": 2.9784757830807852e-06, + "loss": 0.44896113872528076, + "step": 12265 + }, + { + "epoch": 2.2664480142278065, + "grad_norm": 0.09172335267066956, + "learning_rate": 2.9770554898947747e-06, + "loss": 0.5055941343307495, + "step": 12266 + }, + { + "epoch": 2.2666327909367023, + "grad_norm": 0.1009598821401596, + "learning_rate": 2.975635476199984e-06, + "loss": 0.6283546090126038, + "step": 12267 + }, + { + "epoch": 2.266817567645598, + "grad_norm": 0.06228647753596306, + "learning_rate": 2.9742157420529273e-06, + "loss": 0.2549760341644287, + "step": 12268 + }, + { + "epoch": 2.267002344354494, + "grad_norm": 0.07727950811386108, + "learning_rate": 2.9727962875101e-06, + "loss": 0.37321093678474426, + "step": 12269 + }, + { + "epoch": 2.26718712106339, + "grad_norm": 0.09002166986465454, + "learning_rate": 2.9713771126279958e-06, + "loss": 0.4421631097793579, + "step": 12270 + }, + { + "epoch": 2.2673718977722856, + "grad_norm": 0.10053392499685287, + "learning_rate": 2.9699582174630927e-06, + "loss": 0.534861147403717, + "step": 12271 + }, + { + "epoch": 2.2675566744811815, + "grad_norm": 0.06048179417848587, + "learning_rate": 2.9685396020718584e-06, + "loss": 0.26993635296821594, + "step": 12272 + }, + { + "epoch": 2.2677414511900773, + "grad_norm": 0.07431792467832565, + "learning_rate": 2.9671212665107496e-06, + "loss": 0.35286685824394226, + "step": 12273 + }, + { + "epoch": 2.2679262278989736, + "grad_norm": 0.08689253777265549, + "learning_rate": 2.9657032108362136e-06, + "loss": 0.5525953769683838, + "step": 12274 + }, + { + "epoch": 2.268111004607869, + "grad_norm": 0.0873987227678299, + "learning_rate": 2.96428543510468e-06, + "loss": 0.4629843831062317, + "step": 12275 + }, + { + "epoch": 2.2682957813167652, + "grad_norm": 0.08665183931589127, + "learning_rate": 2.9628679393725766e-06, + "loss": 0.4415779113769531, + "step": 12276 + }, + { + "epoch": 2.2684805580256606, + "grad_norm": 0.08151692152023315, + "learning_rate": 2.9614507236963077e-06, + "loss": 0.4622322618961334, + "step": 12277 + }, + { + "epoch": 2.268665334734557, + "grad_norm": 0.07684565335512161, + "learning_rate": 2.9600337881322805e-06, + "loss": 0.39885133504867554, + "step": 12278 + }, + { + "epoch": 2.2688501114434527, + "grad_norm": 0.09301837533712387, + "learning_rate": 2.958617132736887e-06, + "loss": 0.5050402879714966, + "step": 12279 + }, + { + "epoch": 2.2690348881523486, + "grad_norm": 0.09714837372303009, + "learning_rate": 2.9572007575665006e-06, + "loss": 0.5582828521728516, + "step": 12280 + }, + { + "epoch": 2.2692196648612444, + "grad_norm": 0.08942749351263046, + "learning_rate": 2.9557846626774876e-06, + "loss": 0.46698012948036194, + "step": 12281 + }, + { + "epoch": 2.26940444157014, + "grad_norm": 0.09786448627710342, + "learning_rate": 2.954368848126211e-06, + "loss": 0.5502012372016907, + "step": 12282 + }, + { + "epoch": 2.269589218279036, + "grad_norm": 0.06812259554862976, + "learning_rate": 2.952953313969008e-06, + "loss": 0.35303428769111633, + "step": 12283 + }, + { + "epoch": 2.269773994987932, + "grad_norm": 0.0862889289855957, + "learning_rate": 2.951538060262212e-06, + "loss": 0.4027027487754822, + "step": 12284 + }, + { + "epoch": 2.2699587716968277, + "grad_norm": 0.08643152564764023, + "learning_rate": 2.9501230870621565e-06, + "loss": 0.48595723509788513, + "step": 12285 + }, + { + "epoch": 2.2701435484057235, + "grad_norm": 0.06484793871641159, + "learning_rate": 2.9487083944251428e-06, + "loss": 0.2770717442035675, + "step": 12286 + }, + { + "epoch": 2.2703283251146193, + "grad_norm": 0.09154028445482254, + "learning_rate": 2.9472939824074742e-06, + "loss": 0.41690942645072937, + "step": 12287 + }, + { + "epoch": 2.270513101823515, + "grad_norm": 0.09700242429971695, + "learning_rate": 2.945879851065443e-06, + "loss": 0.473203182220459, + "step": 12288 + }, + { + "epoch": 2.270697878532411, + "grad_norm": 0.08067073673009872, + "learning_rate": 2.9444660004553207e-06, + "loss": 0.3523070514202118, + "step": 12289 + }, + { + "epoch": 2.270882655241307, + "grad_norm": 0.0762988030910492, + "learning_rate": 2.9430524306333785e-06, + "loss": 0.40847325325012207, + "step": 12290 + }, + { + "epoch": 2.2710674319502027, + "grad_norm": 0.06584256887435913, + "learning_rate": 2.94163914165587e-06, + "loss": 0.33420929312705994, + "step": 12291 + }, + { + "epoch": 2.2712522086590985, + "grad_norm": 0.09168466925621033, + "learning_rate": 2.9402261335790415e-06, + "loss": 0.4268166720867157, + "step": 12292 + }, + { + "epoch": 2.2714369853679943, + "grad_norm": 0.07886332273483276, + "learning_rate": 2.938813406459129e-06, + "loss": 0.42885634303092957, + "step": 12293 + }, + { + "epoch": 2.27162176207689, + "grad_norm": 0.09260807931423187, + "learning_rate": 2.937400960352348e-06, + "loss": 0.5658440589904785, + "step": 12294 + }, + { + "epoch": 2.271806538785786, + "grad_norm": 0.09772022813558578, + "learning_rate": 2.9359887953149125e-06, + "loss": 0.5705576539039612, + "step": 12295 + }, + { + "epoch": 2.271991315494682, + "grad_norm": 0.11060616374015808, + "learning_rate": 2.9345769114030264e-06, + "loss": 0.6149977445602417, + "step": 12296 + }, + { + "epoch": 2.2721760922035776, + "grad_norm": 0.0723041296005249, + "learning_rate": 2.9331653086728674e-06, + "loss": 0.3808667063713074, + "step": 12297 + }, + { + "epoch": 2.2723608689124735, + "grad_norm": 0.06351529806852341, + "learning_rate": 2.9317539871806235e-06, + "loss": 0.3369500935077667, + "step": 12298 + }, + { + "epoch": 2.2725456456213693, + "grad_norm": 0.05478255823254585, + "learning_rate": 2.9303429469824594e-06, + "loss": 0.24042272567749023, + "step": 12299 + }, + { + "epoch": 2.272730422330265, + "grad_norm": 0.06112119182944298, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.2693672180175781, + "step": 12300 + }, + { + "epoch": 2.272915199039161, + "grad_norm": 0.07038292288780212, + "learning_rate": 2.9275217106929675e-06, + "loss": 0.3221263587474823, + "step": 12301 + }, + { + "epoch": 2.273099975748057, + "grad_norm": 0.0761604756116867, + "learning_rate": 2.926111514713923e-06, + "loss": 0.35919904708862305, + "step": 12302 + }, + { + "epoch": 2.273284752456953, + "grad_norm": 0.12052042782306671, + "learning_rate": 2.9247016002535043e-06, + "loss": 0.5472878813743591, + "step": 12303 + }, + { + "epoch": 2.2734695291658484, + "grad_norm": 0.08945787698030472, + "learning_rate": 2.923291967367823e-06, + "loss": 0.43166667222976685, + "step": 12304 + }, + { + "epoch": 2.2736543058747447, + "grad_norm": 0.11389131098985672, + "learning_rate": 2.921882616112988e-06, + "loss": 0.569865882396698, + "step": 12305 + }, + { + "epoch": 2.27383908258364, + "grad_norm": 0.10434228926897049, + "learning_rate": 2.9204735465450773e-06, + "loss": 0.5434978008270264, + "step": 12306 + }, + { + "epoch": 2.2740238592925364, + "grad_norm": 0.09066736698150635, + "learning_rate": 2.9190647587201703e-06, + "loss": 0.5771661996841431, + "step": 12307 + }, + { + "epoch": 2.274208636001432, + "grad_norm": 0.08334506303071976, + "learning_rate": 2.917656252694335e-06, + "loss": 0.41939690709114075, + "step": 12308 + }, + { + "epoch": 2.274393412710328, + "grad_norm": 0.05808553472161293, + "learning_rate": 2.9162480285236204e-06, + "loss": 0.30752813816070557, + "step": 12309 + }, + { + "epoch": 2.274578189419224, + "grad_norm": 0.08325260877609253, + "learning_rate": 2.9148400862640726e-06, + "loss": 0.40775370597839355, + "step": 12310 + }, + { + "epoch": 2.2747629661281197, + "grad_norm": 0.07577984780073166, + "learning_rate": 2.913432425971722e-06, + "loss": 0.2847796380519867, + "step": 12311 + }, + { + "epoch": 2.2749477428370155, + "grad_norm": 0.10682272911071777, + "learning_rate": 2.9120250477025903e-06, + "loss": 0.4378647804260254, + "step": 12312 + }, + { + "epoch": 2.2751325195459113, + "grad_norm": 0.09837109595537186, + "learning_rate": 2.910617951512689e-06, + "loss": 0.47766849398612976, + "step": 12313 + }, + { + "epoch": 2.275317296254807, + "grad_norm": 0.08506254851818085, + "learning_rate": 2.9092111374580103e-06, + "loss": 0.47322845458984375, + "step": 12314 + }, + { + "epoch": 2.275502072963703, + "grad_norm": 0.09625948965549469, + "learning_rate": 2.9078046055945443e-06, + "loss": 0.4225115180015564, + "step": 12315 + }, + { + "epoch": 2.275686849672599, + "grad_norm": 0.07625984400510788, + "learning_rate": 2.906398355978269e-06, + "loss": 0.3932497203350067, + "step": 12316 + }, + { + "epoch": 2.2758716263814947, + "grad_norm": 0.07267304509878159, + "learning_rate": 2.90499238866514e-06, + "loss": 0.33147087693214417, + "step": 12317 + }, + { + "epoch": 2.2760564030903905, + "grad_norm": 0.0772964283823967, + "learning_rate": 2.90358670371112e-06, + "loss": 0.49541783332824707, + "step": 12318 + }, + { + "epoch": 2.2762411797992863, + "grad_norm": 0.07828882336616516, + "learning_rate": 2.90218130117215e-06, + "loss": 0.45402592420578003, + "step": 12319 + }, + { + "epoch": 2.276425956508182, + "grad_norm": 0.07979245483875275, + "learning_rate": 2.9007761811041555e-06, + "loss": 0.3754137456417084, + "step": 12320 + }, + { + "epoch": 2.276610733217078, + "grad_norm": 0.09181807935237885, + "learning_rate": 2.8993713435630576e-06, + "loss": 0.5156495571136475, + "step": 12321 + }, + { + "epoch": 2.276795509925974, + "grad_norm": 0.0858747586607933, + "learning_rate": 2.897966788604769e-06, + "loss": 0.46253150701522827, + "step": 12322 + }, + { + "epoch": 2.2769802866348696, + "grad_norm": 0.09898536652326584, + "learning_rate": 2.8965625162851794e-06, + "loss": 0.43217700719833374, + "step": 12323 + }, + { + "epoch": 2.2771650633437654, + "grad_norm": 0.08097942918539047, + "learning_rate": 2.8951585266601757e-06, + "loss": 0.3872702121734619, + "step": 12324 + }, + { + "epoch": 2.2773498400526613, + "grad_norm": 0.07538451254367828, + "learning_rate": 2.893754819785639e-06, + "loss": 0.39061447978019714, + "step": 12325 + }, + { + "epoch": 2.277534616761557, + "grad_norm": 0.07708559930324554, + "learning_rate": 2.8923513957174263e-06, + "loss": 0.4144393503665924, + "step": 12326 + }, + { + "epoch": 2.277719393470453, + "grad_norm": 0.08210837841033936, + "learning_rate": 2.8909482545113932e-06, + "loss": 0.43052053451538086, + "step": 12327 + }, + { + "epoch": 2.2779041701793488, + "grad_norm": 0.12307770550251007, + "learning_rate": 2.8895453962233757e-06, + "loss": 0.6036669015884399, + "step": 12328 + }, + { + "epoch": 2.2780889468882446, + "grad_norm": 0.07950335741043091, + "learning_rate": 2.8881428209092054e-06, + "loss": 0.42403754591941833, + "step": 12329 + }, + { + "epoch": 2.2782737235971404, + "grad_norm": 0.0881686806678772, + "learning_rate": 2.8867405286247007e-06, + "loss": 0.3945855498313904, + "step": 12330 + }, + { + "epoch": 2.2784585003060362, + "grad_norm": 0.08936318010091782, + "learning_rate": 2.8853385194256677e-06, + "loss": 0.41373908519744873, + "step": 12331 + }, + { + "epoch": 2.278643277014932, + "grad_norm": 0.0715850442647934, + "learning_rate": 2.883936793367904e-06, + "loss": 0.2821645140647888, + "step": 12332 + }, + { + "epoch": 2.278828053723828, + "grad_norm": 0.07524916529655457, + "learning_rate": 2.8825353505071953e-06, + "loss": 0.4662242531776428, + "step": 12333 + }, + { + "epoch": 2.279012830432724, + "grad_norm": 0.08967549353837967, + "learning_rate": 2.8811341908993084e-06, + "loss": 0.4699711501598358, + "step": 12334 + }, + { + "epoch": 2.2791976071416196, + "grad_norm": 0.07801796495914459, + "learning_rate": 2.8797333146000086e-06, + "loss": 0.33761677145957947, + "step": 12335 + }, + { + "epoch": 2.279382383850516, + "grad_norm": 0.09331436455249786, + "learning_rate": 2.87833272166505e-06, + "loss": 0.49292394518852234, + "step": 12336 + }, + { + "epoch": 2.2795671605594117, + "grad_norm": 0.10073967278003693, + "learning_rate": 2.8769324121501618e-06, + "loss": 0.4239262342453003, + "step": 12337 + }, + { + "epoch": 2.2797519372683075, + "grad_norm": 0.09610048681497574, + "learning_rate": 2.875532386111082e-06, + "loss": 0.5705044865608215, + "step": 12338 + }, + { + "epoch": 2.2799367139772033, + "grad_norm": 0.10764279961585999, + "learning_rate": 2.8741326436035255e-06, + "loss": 0.5637108087539673, + "step": 12339 + }, + { + "epoch": 2.280121490686099, + "grad_norm": 0.09255742281675339, + "learning_rate": 2.872733184683194e-06, + "loss": 0.5211510062217712, + "step": 12340 + }, + { + "epoch": 2.280306267394995, + "grad_norm": 0.09281174838542938, + "learning_rate": 2.871334009405785e-06, + "loss": 0.4678809642791748, + "step": 12341 + }, + { + "epoch": 2.280491044103891, + "grad_norm": 0.07860644906759262, + "learning_rate": 2.8699351178269787e-06, + "loss": 0.4409528076648712, + "step": 12342 + }, + { + "epoch": 2.2806758208127866, + "grad_norm": 0.1086873859167099, + "learning_rate": 2.868536510002445e-06, + "loss": 0.5125857591629028, + "step": 12343 + }, + { + "epoch": 2.2808605975216825, + "grad_norm": 0.07654060423374176, + "learning_rate": 2.8671381859878488e-06, + "loss": 0.4426422417163849, + "step": 12344 + }, + { + "epoch": 2.2810453742305783, + "grad_norm": 0.09781482815742493, + "learning_rate": 2.865740145838837e-06, + "loss": 0.43132483959198, + "step": 12345 + }, + { + "epoch": 2.281230150939474, + "grad_norm": 0.07767101377248764, + "learning_rate": 2.8643423896110455e-06, + "loss": 0.35789021849632263, + "step": 12346 + }, + { + "epoch": 2.28141492764837, + "grad_norm": 0.10004222393035889, + "learning_rate": 2.8629449173601067e-06, + "loss": 0.5255815982818604, + "step": 12347 + }, + { + "epoch": 2.281599704357266, + "grad_norm": 0.09296823292970657, + "learning_rate": 2.8615477291416284e-06, + "loss": 0.4425857961177826, + "step": 12348 + }, + { + "epoch": 2.2817844810661616, + "grad_norm": 0.08270562440156937, + "learning_rate": 2.8601508250112164e-06, + "loss": 0.40904340147972107, + "step": 12349 + }, + { + "epoch": 2.2819692577750574, + "grad_norm": 0.0664658322930336, + "learning_rate": 2.858754205024463e-06, + "loss": 0.34156301617622375, + "step": 12350 + }, + { + "epoch": 2.2821540344839533, + "grad_norm": 0.07145830988883972, + "learning_rate": 2.857357869236952e-06, + "loss": 0.28238388895988464, + "step": 12351 + }, + { + "epoch": 2.282338811192849, + "grad_norm": 0.07514777034521103, + "learning_rate": 2.8559618177042504e-06, + "loss": 0.37296679615974426, + "step": 12352 + }, + { + "epoch": 2.282523587901745, + "grad_norm": 0.07478883117437363, + "learning_rate": 2.8545660504819207e-06, + "loss": 0.3149990439414978, + "step": 12353 + }, + { + "epoch": 2.2827083646106407, + "grad_norm": 0.09237714856863022, + "learning_rate": 2.853170567625504e-06, + "loss": 0.43616247177124023, + "step": 12354 + }, + { + "epoch": 2.2828931413195366, + "grad_norm": 0.07515031844377518, + "learning_rate": 2.851775369190539e-06, + "loss": 0.3258705139160156, + "step": 12355 + }, + { + "epoch": 2.2830779180284324, + "grad_norm": 0.08433238416910172, + "learning_rate": 2.8503804552325497e-06, + "loss": 0.4098306894302368, + "step": 12356 + }, + { + "epoch": 2.2832626947373282, + "grad_norm": 0.0852990448474884, + "learning_rate": 2.848985825807051e-06, + "loss": 0.4909660220146179, + "step": 12357 + }, + { + "epoch": 2.283447471446224, + "grad_norm": 0.09817817807197571, + "learning_rate": 2.847591480969547e-06, + "loss": 0.47760963439941406, + "step": 12358 + }, + { + "epoch": 2.28363224815512, + "grad_norm": 0.10420354455709457, + "learning_rate": 2.8461974207755217e-06, + "loss": 0.7738246917724609, + "step": 12359 + }, + { + "epoch": 2.2838170248640157, + "grad_norm": 0.10038287192583084, + "learning_rate": 2.844803645280457e-06, + "loss": 0.44567567110061646, + "step": 12360 + }, + { + "epoch": 2.2840018015729115, + "grad_norm": 0.07754106819629669, + "learning_rate": 2.843410154539825e-06, + "loss": 0.34756210446357727, + "step": 12361 + }, + { + "epoch": 2.2841865782818074, + "grad_norm": 0.09777885675430298, + "learning_rate": 2.8420169486090765e-06, + "loss": 0.5114613175392151, + "step": 12362 + }, + { + "epoch": 2.2843713549907037, + "grad_norm": 0.0712917372584343, + "learning_rate": 2.840624027543658e-06, + "loss": 0.342816025018692, + "step": 12363 + }, + { + "epoch": 2.284556131699599, + "grad_norm": 0.09564562886953354, + "learning_rate": 2.8392313913990054e-06, + "loss": 0.4747394323348999, + "step": 12364 + }, + { + "epoch": 2.2847409084084953, + "grad_norm": 0.08557190746068954, + "learning_rate": 2.837839040230539e-06, + "loss": 0.42063960433006287, + "step": 12365 + }, + { + "epoch": 2.284925685117391, + "grad_norm": 0.09702543914318085, + "learning_rate": 2.8364469740936717e-06, + "loss": 0.4699907898902893, + "step": 12366 + }, + { + "epoch": 2.285110461826287, + "grad_norm": 0.0841965600848198, + "learning_rate": 2.8350551930438066e-06, + "loss": 0.3842927813529968, + "step": 12367 + }, + { + "epoch": 2.285295238535183, + "grad_norm": 0.07941141724586487, + "learning_rate": 2.8336636971363253e-06, + "loss": 0.44426429271698, + "step": 12368 + }, + { + "epoch": 2.2854800152440786, + "grad_norm": 0.09702118486166, + "learning_rate": 2.832272486426608e-06, + "loss": 0.5666193962097168, + "step": 12369 + }, + { + "epoch": 2.2856647919529745, + "grad_norm": 0.07746150344610214, + "learning_rate": 2.8308815609700203e-06, + "loss": 0.34402111172676086, + "step": 12370 + }, + { + "epoch": 2.2858495686618703, + "grad_norm": 0.07326214760541916, + "learning_rate": 2.829490920821918e-06, + "loss": 0.40299466252326965, + "step": 12371 + }, + { + "epoch": 2.286034345370766, + "grad_norm": 0.08180015534162521, + "learning_rate": 2.828100566037643e-06, + "loss": 0.4826923906803131, + "step": 12372 + }, + { + "epoch": 2.286219122079662, + "grad_norm": 0.07712136209011078, + "learning_rate": 2.826710496672531e-06, + "loss": 0.3627420663833618, + "step": 12373 + }, + { + "epoch": 2.2864038987885578, + "grad_norm": 0.0706978365778923, + "learning_rate": 2.825320712781895e-06, + "loss": 0.3895527124404907, + "step": 12374 + }, + { + "epoch": 2.2865886754974536, + "grad_norm": 0.07732373476028442, + "learning_rate": 2.8239312144210517e-06, + "loss": 0.3908056616783142, + "step": 12375 + }, + { + "epoch": 2.2867734522063494, + "grad_norm": 0.08550095558166504, + "learning_rate": 2.8225420016452886e-06, + "loss": 0.437029093503952, + "step": 12376 + }, + { + "epoch": 2.2869582289152453, + "grad_norm": 0.10298527777194977, + "learning_rate": 2.8211530745099016e-06, + "loss": 0.5659658312797546, + "step": 12377 + }, + { + "epoch": 2.287143005624141, + "grad_norm": 0.08626532554626465, + "learning_rate": 2.819764433070166e-06, + "loss": 0.46370428800582886, + "step": 12378 + }, + { + "epoch": 2.287327782333037, + "grad_norm": 0.090863436460495, + "learning_rate": 2.8183760773813384e-06, + "loss": 0.5512515902519226, + "step": 12379 + }, + { + "epoch": 2.2875125590419327, + "grad_norm": 0.07174748927354813, + "learning_rate": 2.8169880074986742e-06, + "loss": 0.3715180456638336, + "step": 12380 + }, + { + "epoch": 2.2876973357508286, + "grad_norm": 0.0870097428560257, + "learning_rate": 2.815600223477418e-06, + "loss": 0.4142930805683136, + "step": 12381 + }, + { + "epoch": 2.2878821124597244, + "grad_norm": 0.062451448291540146, + "learning_rate": 2.8142127253727923e-06, + "loss": 0.3340838849544525, + "step": 12382 + }, + { + "epoch": 2.28806688916862, + "grad_norm": 0.0948941633105278, + "learning_rate": 2.8128255132400196e-06, + "loss": 0.4817712604999542, + "step": 12383 + }, + { + "epoch": 2.288251665877516, + "grad_norm": 0.07591880857944489, + "learning_rate": 2.8114385871343053e-06, + "loss": 0.30848807096481323, + "step": 12384 + }, + { + "epoch": 2.288436442586412, + "grad_norm": 0.07042495161294937, + "learning_rate": 2.8100519471108447e-06, + "loss": 0.3212406039237976, + "step": 12385 + }, + { + "epoch": 2.2886212192953077, + "grad_norm": 0.0942525789141655, + "learning_rate": 2.808665593224822e-06, + "loss": 0.48285847902297974, + "step": 12386 + }, + { + "epoch": 2.2888059960042035, + "grad_norm": 0.06566546857357025, + "learning_rate": 2.807279525531413e-06, + "loss": 0.3156110346317291, + "step": 12387 + }, + { + "epoch": 2.2889907727130994, + "grad_norm": 0.10510123521089554, + "learning_rate": 2.805893744085774e-06, + "loss": 0.4791485369205475, + "step": 12388 + }, + { + "epoch": 2.289175549421995, + "grad_norm": 0.08050920069217682, + "learning_rate": 2.8045082489430554e-06, + "loss": 0.4345119893550873, + "step": 12389 + }, + { + "epoch": 2.289360326130891, + "grad_norm": 0.09542886167764664, + "learning_rate": 2.8031230401583965e-06, + "loss": 0.5287306308746338, + "step": 12390 + }, + { + "epoch": 2.289545102839787, + "grad_norm": 0.08587858080863953, + "learning_rate": 2.8017381177869253e-06, + "loss": 0.4475300908088684, + "step": 12391 + }, + { + "epoch": 2.289729879548683, + "grad_norm": 0.0851026400923729, + "learning_rate": 2.8003534818837586e-06, + "loss": 0.39573603868484497, + "step": 12392 + }, + { + "epoch": 2.2899146562575785, + "grad_norm": 0.0926097109913826, + "learning_rate": 2.798969132503997e-06, + "loss": 0.4512878656387329, + "step": 12393 + }, + { + "epoch": 2.290099432966475, + "grad_norm": 0.08771153539419174, + "learning_rate": 2.797585069702733e-06, + "loss": 0.4938456118106842, + "step": 12394 + }, + { + "epoch": 2.2902842096753706, + "grad_norm": 0.08554097265005112, + "learning_rate": 2.7962012935350537e-06, + "loss": 0.3957395851612091, + "step": 12395 + }, + { + "epoch": 2.2904689863842664, + "grad_norm": 0.10183302313089371, + "learning_rate": 2.794817804056019e-06, + "loss": 0.5511239767074585, + "step": 12396 + }, + { + "epoch": 2.2906537630931623, + "grad_norm": 0.10341359674930573, + "learning_rate": 2.793434601320697e-06, + "loss": 0.5638948082923889, + "step": 12397 + }, + { + "epoch": 2.290838539802058, + "grad_norm": 0.09127336740493774, + "learning_rate": 2.792051685384134e-06, + "loss": 0.43918925523757935, + "step": 12398 + }, + { + "epoch": 2.291023316510954, + "grad_norm": 0.08343155682086945, + "learning_rate": 2.7906690563013593e-06, + "loss": 0.38254082202911377, + "step": 12399 + }, + { + "epoch": 2.2912080932198498, + "grad_norm": 0.08328774571418762, + "learning_rate": 2.789286714127402e-06, + "loss": 0.402026504278183, + "step": 12400 + }, + { + "epoch": 2.2913928699287456, + "grad_norm": 0.09894675016403198, + "learning_rate": 2.7879046589172776e-06, + "loss": 0.3728368878364563, + "step": 12401 + }, + { + "epoch": 2.2915776466376414, + "grad_norm": 0.08401940017938614, + "learning_rate": 2.7865228907259802e-06, + "loss": 0.46870890259742737, + "step": 12402 + }, + { + "epoch": 2.2917624233465372, + "grad_norm": 0.09789912402629852, + "learning_rate": 2.785141409608504e-06, + "loss": 0.5191943049430847, + "step": 12403 + }, + { + "epoch": 2.291947200055433, + "grad_norm": 0.06944061815738678, + "learning_rate": 2.7837602156198262e-06, + "loss": 0.35012003779411316, + "step": 12404 + }, + { + "epoch": 2.292131976764329, + "grad_norm": 0.07652708142995834, + "learning_rate": 2.7823793088149166e-06, + "loss": 0.3387015759944916, + "step": 12405 + }, + { + "epoch": 2.2923167534732247, + "grad_norm": 0.10045712441205978, + "learning_rate": 2.7809986892487316e-06, + "loss": 0.5079493522644043, + "step": 12406 + }, + { + "epoch": 2.2925015301821206, + "grad_norm": 0.07915651798248291, + "learning_rate": 2.7796183569762103e-06, + "loss": 0.32102376222610474, + "step": 12407 + }, + { + "epoch": 2.2926863068910164, + "grad_norm": 0.11177308857440948, + "learning_rate": 2.7782383120522895e-06, + "loss": 0.5183802843093872, + "step": 12408 + }, + { + "epoch": 2.292871083599912, + "grad_norm": 0.09553904831409454, + "learning_rate": 2.7768585545318895e-06, + "loss": 0.40244248509407043, + "step": 12409 + }, + { + "epoch": 2.293055860308808, + "grad_norm": 0.09308487921953201, + "learning_rate": 2.775479084469921e-06, + "loss": 0.4925037622451782, + "step": 12410 + }, + { + "epoch": 2.293240637017704, + "grad_norm": 0.07482548803091049, + "learning_rate": 2.7740999019212824e-06, + "loss": 0.30733537673950195, + "step": 12411 + }, + { + "epoch": 2.2934254137265997, + "grad_norm": 0.0775015726685524, + "learning_rate": 2.772721006940863e-06, + "loss": 0.37895214557647705, + "step": 12412 + }, + { + "epoch": 2.2936101904354955, + "grad_norm": 0.09507368505001068, + "learning_rate": 2.7713423995835343e-06, + "loss": 0.41809919476509094, + "step": 12413 + }, + { + "epoch": 2.2937949671443914, + "grad_norm": 0.10335507243871689, + "learning_rate": 2.7699640799041615e-06, + "loss": 0.5223547220230103, + "step": 12414 + }, + { + "epoch": 2.293979743853287, + "grad_norm": 0.06919983774423599, + "learning_rate": 2.768586047957602e-06, + "loss": 0.35734066367149353, + "step": 12415 + }, + { + "epoch": 2.294164520562183, + "grad_norm": 0.09655167907476425, + "learning_rate": 2.7672083037986874e-06, + "loss": 0.4475431442260742, + "step": 12416 + }, + { + "epoch": 2.294349297271079, + "grad_norm": 0.07486025243997574, + "learning_rate": 2.765830847482257e-06, + "loss": 0.39141911268234253, + "step": 12417 + }, + { + "epoch": 2.2945340739799747, + "grad_norm": 0.07163551449775696, + "learning_rate": 2.7644536790631283e-06, + "loss": 0.4052906930446625, + "step": 12418 + }, + { + "epoch": 2.2947188506888705, + "grad_norm": 0.09198658913373947, + "learning_rate": 2.763076798596104e-06, + "loss": 0.4742256700992584, + "step": 12419 + }, + { + "epoch": 2.2949036273977663, + "grad_norm": 0.09546126425266266, + "learning_rate": 2.761700206135981e-06, + "loss": 0.38848382234573364, + "step": 12420 + }, + { + "epoch": 2.2950884041066626, + "grad_norm": 0.09016796946525574, + "learning_rate": 2.7603239017375483e-06, + "loss": 0.49152642488479614, + "step": 12421 + }, + { + "epoch": 2.295273180815558, + "grad_norm": 0.07008977234363556, + "learning_rate": 2.7589478854555694e-06, + "loss": 0.4098982512950897, + "step": 12422 + }, + { + "epoch": 2.2954579575244543, + "grad_norm": 0.1004912257194519, + "learning_rate": 2.757572157344812e-06, + "loss": 0.45596155524253845, + "step": 12423 + }, + { + "epoch": 2.29564273423335, + "grad_norm": 0.08788701146841049, + "learning_rate": 2.7561967174600234e-06, + "loss": 0.4334774613380432, + "step": 12424 + }, + { + "epoch": 2.295827510942246, + "grad_norm": 0.07405531406402588, + "learning_rate": 2.7548215658559417e-06, + "loss": 0.3822348117828369, + "step": 12425 + }, + { + "epoch": 2.2960122876511417, + "grad_norm": 0.10018901526927948, + "learning_rate": 2.753446702587299e-06, + "loss": 0.5195327997207642, + "step": 12426 + }, + { + "epoch": 2.2961970643600376, + "grad_norm": 0.08698148280382156, + "learning_rate": 2.7520721277088023e-06, + "loss": 0.5598820447921753, + "step": 12427 + }, + { + "epoch": 2.2963818410689334, + "grad_norm": 0.0688018724322319, + "learning_rate": 2.7506978412751585e-06, + "loss": 0.35568612813949585, + "step": 12428 + }, + { + "epoch": 2.2965666177778292, + "grad_norm": 0.08764747530221939, + "learning_rate": 2.7493238433410606e-06, + "loss": 0.5102495551109314, + "step": 12429 + }, + { + "epoch": 2.296751394486725, + "grad_norm": 0.09153677523136139, + "learning_rate": 2.74795013396119e-06, + "loss": 0.4793400764465332, + "step": 12430 + }, + { + "epoch": 2.296936171195621, + "grad_norm": 0.09142836928367615, + "learning_rate": 2.7465767131902154e-06, + "loss": 0.48736387491226196, + "step": 12431 + }, + { + "epoch": 2.2971209479045167, + "grad_norm": 0.06931941211223602, + "learning_rate": 2.7452035810827972e-06, + "loss": 0.32798847556114197, + "step": 12432 + }, + { + "epoch": 2.2973057246134125, + "grad_norm": 0.10837685316801071, + "learning_rate": 2.743830737693576e-06, + "loss": 0.5421178340911865, + "step": 12433 + }, + { + "epoch": 2.2974905013223084, + "grad_norm": 0.08300384879112244, + "learning_rate": 2.742458183077189e-06, + "loss": 0.3388262093067169, + "step": 12434 + }, + { + "epoch": 2.297675278031204, + "grad_norm": 0.08761202543973923, + "learning_rate": 2.7410859172882643e-06, + "loss": 0.4011119306087494, + "step": 12435 + }, + { + "epoch": 2.2978600547401, + "grad_norm": 0.08077117800712585, + "learning_rate": 2.7397139403814045e-06, + "loss": 0.4460929036140442, + "step": 12436 + }, + { + "epoch": 2.298044831448996, + "grad_norm": 0.07666885852813721, + "learning_rate": 2.7383422524112168e-06, + "loss": 0.3937883973121643, + "step": 12437 + }, + { + "epoch": 2.2982296081578917, + "grad_norm": 0.09943380206823349, + "learning_rate": 2.7369708534322924e-06, + "loss": 0.559493899345398, + "step": 12438 + }, + { + "epoch": 2.2984143848667875, + "grad_norm": 0.06779221445322037, + "learning_rate": 2.735599743499202e-06, + "loss": 0.3402836322784424, + "step": 12439 + }, + { + "epoch": 2.2985991615756833, + "grad_norm": 0.07608848065137863, + "learning_rate": 2.7342289226665185e-06, + "loss": 0.33676713705062866, + "step": 12440 + }, + { + "epoch": 2.298783938284579, + "grad_norm": 0.07917264848947525, + "learning_rate": 2.7328583909887875e-06, + "loss": 0.48675215244293213, + "step": 12441 + }, + { + "epoch": 2.298968714993475, + "grad_norm": 0.09452951699495316, + "learning_rate": 2.731488148520557e-06, + "loss": 0.4705582857131958, + "step": 12442 + }, + { + "epoch": 2.299153491702371, + "grad_norm": 0.09254277497529984, + "learning_rate": 2.730118195316358e-06, + "loss": 0.6044603586196899, + "step": 12443 + }, + { + "epoch": 2.2993382684112667, + "grad_norm": 0.08278968185186386, + "learning_rate": 2.7287485314307105e-06, + "loss": 0.41316351294517517, + "step": 12444 + }, + { + "epoch": 2.2995230451201625, + "grad_norm": 0.09281335026025772, + "learning_rate": 2.727379156918123e-06, + "loss": 0.4842070937156677, + "step": 12445 + }, + { + "epoch": 2.2997078218290583, + "grad_norm": 0.08693793416023254, + "learning_rate": 2.7260100718330938e-06, + "loss": 0.5524295568466187, + "step": 12446 + }, + { + "epoch": 2.299892598537954, + "grad_norm": 0.08974786847829819, + "learning_rate": 2.7246412762301045e-06, + "loss": 0.5332388877868652, + "step": 12447 + }, + { + "epoch": 2.30007737524685, + "grad_norm": 0.05453294888138771, + "learning_rate": 2.7232727701636306e-06, + "loss": 0.21315710246562958, + "step": 12448 + }, + { + "epoch": 2.300262151955746, + "grad_norm": 0.08957856148481369, + "learning_rate": 2.721904553688134e-06, + "loss": 0.43898043036460876, + "step": 12449 + }, + { + "epoch": 2.300446928664642, + "grad_norm": 0.06817204505205154, + "learning_rate": 2.7205366268580657e-06, + "loss": 0.30610817670822144, + "step": 12450 + }, + { + "epoch": 2.3006317053735375, + "grad_norm": 0.08877559751272202, + "learning_rate": 2.7191689897278662e-06, + "loss": 0.40955445170402527, + "step": 12451 + }, + { + "epoch": 2.3008164820824337, + "grad_norm": 0.08863110840320587, + "learning_rate": 2.7178016423519637e-06, + "loss": 0.5220192074775696, + "step": 12452 + }, + { + "epoch": 2.301001258791329, + "grad_norm": 0.07328446954488754, + "learning_rate": 2.7164345847847706e-06, + "loss": 0.3678598999977112, + "step": 12453 + }, + { + "epoch": 2.3011860355002254, + "grad_norm": 0.08586084097623825, + "learning_rate": 2.7150678170806944e-06, + "loss": 0.43170756101608276, + "step": 12454 + }, + { + "epoch": 2.301370812209121, + "grad_norm": 0.08982164412736893, + "learning_rate": 2.713701339294129e-06, + "loss": 0.40715205669403076, + "step": 12455 + }, + { + "epoch": 2.301555588918017, + "grad_norm": 0.09180708974599838, + "learning_rate": 2.7123351514794494e-06, + "loss": 0.4255968928337097, + "step": 12456 + }, + { + "epoch": 2.301740365626913, + "grad_norm": 0.0955817773938179, + "learning_rate": 2.710969253691036e-06, + "loss": 0.4432634711265564, + "step": 12457 + }, + { + "epoch": 2.3019251423358087, + "grad_norm": 0.06264004111289978, + "learning_rate": 2.7096036459832387e-06, + "loss": 0.25520288944244385, + "step": 12458 + }, + { + "epoch": 2.3021099190447045, + "grad_norm": 0.06978282332420349, + "learning_rate": 2.7082383284104085e-06, + "loss": 0.29496538639068604, + "step": 12459 + }, + { + "epoch": 2.3022946957536004, + "grad_norm": 0.07816436141729355, + "learning_rate": 2.706873301026882e-06, + "loss": 0.32827895879745483, + "step": 12460 + }, + { + "epoch": 2.302479472462496, + "grad_norm": 0.08574339002370834, + "learning_rate": 2.705508563886978e-06, + "loss": 0.47787851095199585, + "step": 12461 + }, + { + "epoch": 2.302664249171392, + "grad_norm": 0.08717440068721771, + "learning_rate": 2.704144117045012e-06, + "loss": 0.4790334105491638, + "step": 12462 + }, + { + "epoch": 2.302849025880288, + "grad_norm": 0.08738885074853897, + "learning_rate": 2.7027799605552842e-06, + "loss": 0.4241340458393097, + "step": 12463 + }, + { + "epoch": 2.3030338025891837, + "grad_norm": 0.06934608519077301, + "learning_rate": 2.7014160944720835e-06, + "loss": 0.34214988350868225, + "step": 12464 + }, + { + "epoch": 2.3032185792980795, + "grad_norm": 0.10133513063192368, + "learning_rate": 2.7000525188496885e-06, + "loss": 0.5286313891410828, + "step": 12465 + }, + { + "epoch": 2.3034033560069753, + "grad_norm": 0.07713404297828674, + "learning_rate": 2.6986892337423675e-06, + "loss": 0.4190782606601715, + "step": 12466 + }, + { + "epoch": 2.303588132715871, + "grad_norm": 0.11312388628721237, + "learning_rate": 2.6973262392043687e-06, + "loss": 0.5498267412185669, + "step": 12467 + }, + { + "epoch": 2.303772909424767, + "grad_norm": 0.07396145910024643, + "learning_rate": 2.695963535289938e-06, + "loss": 0.3859500288963318, + "step": 12468 + }, + { + "epoch": 2.303957686133663, + "grad_norm": 0.07330184429883957, + "learning_rate": 2.6946011220533085e-06, + "loss": 0.44282686710357666, + "step": 12469 + }, + { + "epoch": 2.3041424628425586, + "grad_norm": 0.10176707804203033, + "learning_rate": 2.6932389995486986e-06, + "loss": 0.49235883355140686, + "step": 12470 + }, + { + "epoch": 2.3043272395514545, + "grad_norm": 0.09177861362695694, + "learning_rate": 2.691877167830319e-06, + "loss": 0.5612393021583557, + "step": 12471 + }, + { + "epoch": 2.3045120162603503, + "grad_norm": 0.06859984993934631, + "learning_rate": 2.6905156269523603e-06, + "loss": 0.3102670907974243, + "step": 12472 + }, + { + "epoch": 2.304696792969246, + "grad_norm": 0.10172141343355179, + "learning_rate": 2.689154376969012e-06, + "loss": 0.6309396028518677, + "step": 12473 + }, + { + "epoch": 2.304881569678142, + "grad_norm": 0.07310739904642105, + "learning_rate": 2.68779341793445e-06, + "loss": 0.3750215172767639, + "step": 12474 + }, + { + "epoch": 2.305066346387038, + "grad_norm": 0.0803554430603981, + "learning_rate": 2.68643274990283e-06, + "loss": 0.43689945340156555, + "step": 12475 + }, + { + "epoch": 2.3052511230959336, + "grad_norm": 0.08919933438301086, + "learning_rate": 2.685072372928301e-06, + "loss": 0.5057060122489929, + "step": 12476 + }, + { + "epoch": 2.3054358998048294, + "grad_norm": 0.08986406028270721, + "learning_rate": 2.6837122870650136e-06, + "loss": 0.4250834286212921, + "step": 12477 + }, + { + "epoch": 2.3056206765137253, + "grad_norm": 0.08082538843154907, + "learning_rate": 2.682352492367084e-06, + "loss": 0.4560541808605194, + "step": 12478 + }, + { + "epoch": 2.3058054532226215, + "grad_norm": 0.08965017646551132, + "learning_rate": 2.680992988888631e-06, + "loss": 0.4921261668205261, + "step": 12479 + }, + { + "epoch": 2.305990229931517, + "grad_norm": 0.09341119229793549, + "learning_rate": 2.679633776683762e-06, + "loss": 0.5224530100822449, + "step": 12480 + }, + { + "epoch": 2.306175006640413, + "grad_norm": 0.08903618156909943, + "learning_rate": 2.678274855806564e-06, + "loss": 0.503605306148529, + "step": 12481 + }, + { + "epoch": 2.3063597833493086, + "grad_norm": 0.08572878688573837, + "learning_rate": 2.6769162263111194e-06, + "loss": 0.5046517252922058, + "step": 12482 + }, + { + "epoch": 2.306544560058205, + "grad_norm": 0.08222978562116623, + "learning_rate": 2.6755578882514976e-06, + "loss": 0.49034225940704346, + "step": 12483 + }, + { + "epoch": 2.3067293367671007, + "grad_norm": 0.07876164466142654, + "learning_rate": 2.6741998416817572e-06, + "loss": 0.42356806993484497, + "step": 12484 + }, + { + "epoch": 2.3069141134759965, + "grad_norm": 0.08781693875789642, + "learning_rate": 2.6728420866559424e-06, + "loss": 0.47269538044929504, + "step": 12485 + }, + { + "epoch": 2.3070988901848923, + "grad_norm": 0.0931205004453659, + "learning_rate": 2.6714846232280932e-06, + "loss": 0.49855610728263855, + "step": 12486 + }, + { + "epoch": 2.307283666893788, + "grad_norm": 0.08059027045965195, + "learning_rate": 2.6701274514522248e-06, + "loss": 0.490103542804718, + "step": 12487 + }, + { + "epoch": 2.307468443602684, + "grad_norm": 0.089565210044384, + "learning_rate": 2.668770571382351e-06, + "loss": 0.43446487188339233, + "step": 12488 + }, + { + "epoch": 2.30765322031158, + "grad_norm": 0.08159112930297852, + "learning_rate": 2.6674139830724722e-06, + "loss": 0.46210017800331116, + "step": 12489 + }, + { + "epoch": 2.3078379970204757, + "grad_norm": 0.06302274018526077, + "learning_rate": 2.6660576865765764e-06, + "loss": 0.3557790517807007, + "step": 12490 + }, + { + "epoch": 2.3080227737293715, + "grad_norm": 0.06396881490945816, + "learning_rate": 2.6647016819486427e-06, + "loss": 0.28712761402130127, + "step": 12491 + }, + { + "epoch": 2.3082075504382673, + "grad_norm": 0.09645667672157288, + "learning_rate": 2.66334596924263e-06, + "loss": 0.5571883916854858, + "step": 12492 + }, + { + "epoch": 2.308392327147163, + "grad_norm": 0.07313007116317749, + "learning_rate": 2.661990548512493e-06, + "loss": 0.3786899149417877, + "step": 12493 + }, + { + "epoch": 2.308577103856059, + "grad_norm": 0.07516108453273773, + "learning_rate": 2.6606354198121786e-06, + "loss": 0.388254851102829, + "step": 12494 + }, + { + "epoch": 2.308761880564955, + "grad_norm": 0.0844891294836998, + "learning_rate": 2.6592805831956105e-06, + "loss": 0.466629296541214, + "step": 12495 + }, + { + "epoch": 2.3089466572738506, + "grad_norm": 0.09410285204648972, + "learning_rate": 2.657926038716704e-06, + "loss": 0.6287585496902466, + "step": 12496 + }, + { + "epoch": 2.3091314339827465, + "grad_norm": 0.09713611751794815, + "learning_rate": 2.6565717864293784e-06, + "loss": 0.5529349446296692, + "step": 12497 + }, + { + "epoch": 2.3093162106916423, + "grad_norm": 0.09099281579256058, + "learning_rate": 2.6552178263875172e-06, + "loss": 0.4954475462436676, + "step": 12498 + }, + { + "epoch": 2.309500987400538, + "grad_norm": 0.07115762680768967, + "learning_rate": 2.6538641586450075e-06, + "loss": 0.3478897213935852, + "step": 12499 + }, + { + "epoch": 2.309685764109434, + "grad_norm": 0.08878853917121887, + "learning_rate": 2.652510783255725e-06, + "loss": 0.5340008735656738, + "step": 12500 + }, + { + "epoch": 2.309685764109434, + "eval_loss": 0.5491987466812134, + "eval_runtime": 156.6238, + "eval_samples_per_second": 116.387, + "eval_steps_per_second": 14.551, + "step": 12500 + } + ], + "logging_steps": 1, + "max_steps": 16236, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.554647767097955e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}