| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0710377267927425, |
| "eval_steps": 500, |
| "global_step": 8000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003839877123932034, |
| "grad_norm": 9.855803343729887, |
| "learning_rate": 1.9984639016897083e-05, |
| "loss": 0.9511, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007679754247864068, |
| "grad_norm": 7.900613749182144, |
| "learning_rate": 1.9969278033794163e-05, |
| "loss": 0.1502, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011519631371796103, |
| "grad_norm": 0.22927913857697063, |
| "learning_rate": 1.9953917050691244e-05, |
| "loss": 0.0234, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.015359508495728137, |
| "grad_norm": 0.08089452288889179, |
| "learning_rate": 1.993855606758833e-05, |
| "loss": 0.0068, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01919938561966017, |
| "grad_norm": 0.1111623671137423, |
| "learning_rate": 1.992319508448541e-05, |
| "loss": 0.0042, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023039262743592206, |
| "grad_norm": 0.0743738067650186, |
| "learning_rate": 1.990783410138249e-05, |
| "loss": 0.0027, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02687913986752424, |
| "grad_norm": 0.08445026035259467, |
| "learning_rate": 1.989247311827957e-05, |
| "loss": 0.0023, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.030719016991456273, |
| "grad_norm": 0.032381204224608405, |
| "learning_rate": 1.9877112135176652e-05, |
| "loss": 0.0019, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03455889411538831, |
| "grad_norm": 0.034738835887687565, |
| "learning_rate": 1.9861751152073733e-05, |
| "loss": 0.0015, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03839877123932034, |
| "grad_norm": 0.03125979639961295, |
| "learning_rate": 1.9846390168970814e-05, |
| "loss": 0.0012, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.042238648363252376, |
| "grad_norm": 0.027922819061575684, |
| "learning_rate": 1.98310291858679e-05, |
| "loss": 0.001, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04607852548718441, |
| "grad_norm": 0.027107384881600128, |
| "learning_rate": 1.981566820276498e-05, |
| "loss": 0.0009, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04991840261111644, |
| "grad_norm": 0.030852661769757015, |
| "learning_rate": 1.980030721966206e-05, |
| "loss": 0.0008, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05375827973504848, |
| "grad_norm": 0.017018191812887797, |
| "learning_rate": 1.978494623655914e-05, |
| "loss": 0.0007, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05759815685898051, |
| "grad_norm": 0.02039007906173804, |
| "learning_rate": 1.9769585253456222e-05, |
| "loss": 0.0007, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.061438033982912546, |
| "grad_norm": 0.01751117077975313, |
| "learning_rate": 1.9754224270353303e-05, |
| "loss": 0.0007, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06527791110684458, |
| "grad_norm": 0.025527484156853922, |
| "learning_rate": 1.9738863287250384e-05, |
| "loss": 0.0007, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06911778823077662, |
| "grad_norm": 0.010356304510994301, |
| "learning_rate": 1.9723502304147465e-05, |
| "loss": 0.0006, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07295766535470866, |
| "grad_norm": 0.019221562602418918, |
| "learning_rate": 1.970814132104455e-05, |
| "loss": 0.0006, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07679754247864068, |
| "grad_norm": 0.019408746838376397, |
| "learning_rate": 1.969278033794163e-05, |
| "loss": 0.0006, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08063741960257272, |
| "grad_norm": 0.012272661989691892, |
| "learning_rate": 1.967741935483871e-05, |
| "loss": 0.0006, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08447729672650475, |
| "grad_norm": 0.01575335759164804, |
| "learning_rate": 1.9662058371735792e-05, |
| "loss": 0.0006, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08831717385043679, |
| "grad_norm": 0.02337266868962172, |
| "learning_rate": 1.9646697388632873e-05, |
| "loss": 0.0006, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09215705097436883, |
| "grad_norm": 0.02044371550228021, |
| "learning_rate": 1.9631336405529954e-05, |
| "loss": 0.0005, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09599692809830085, |
| "grad_norm": 0.013592727470100728, |
| "learning_rate": 1.9615975422427035e-05, |
| "loss": 0.0005, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09983680522223289, |
| "grad_norm": 0.01722960420346088, |
| "learning_rate": 1.960061443932412e-05, |
| "loss": 0.0006, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10367668234616492, |
| "grad_norm": 0.010831279896438627, |
| "learning_rate": 1.95852534562212e-05, |
| "loss": 0.0005, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10751655947009696, |
| "grad_norm": 0.04396421107808547, |
| "learning_rate": 1.956989247311828e-05, |
| "loss": 0.0007, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.111356436594029, |
| "grad_norm": 0.009538347218684752, |
| "learning_rate": 1.9554531490015362e-05, |
| "loss": 0.0007, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11519631371796102, |
| "grad_norm": 0.019732833272054093, |
| "learning_rate": 1.9539170506912443e-05, |
| "loss": 0.0006, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11903619084189306, |
| "grad_norm": 0.01102864765215555, |
| "learning_rate": 1.9523809523809524e-05, |
| "loss": 0.0006, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12287606796582509, |
| "grad_norm": 0.022789866556699984, |
| "learning_rate": 1.9508448540706605e-05, |
| "loss": 0.0008, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12671594508975711, |
| "grad_norm": 0.026179745030787457, |
| "learning_rate": 1.949308755760369e-05, |
| "loss": 0.0007, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13055582221368917, |
| "grad_norm": 0.006770076864326156, |
| "learning_rate": 1.947772657450077e-05, |
| "loss": 0.0006, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1343956993376212, |
| "grad_norm": 0.012631828755799612, |
| "learning_rate": 1.946236559139785e-05, |
| "loss": 0.0006, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13823557646155324, |
| "grad_norm": 0.012963546321523804, |
| "learning_rate": 1.9447004608294932e-05, |
| "loss": 0.0005, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14207545358548526, |
| "grad_norm": 0.024135972419695974, |
| "learning_rate": 1.9431643625192013e-05, |
| "loss": 0.0007, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1459153307094173, |
| "grad_norm": 0.023687976253774837, |
| "learning_rate": 1.9416282642089094e-05, |
| "loss": 0.0007, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14975520783334934, |
| "grad_norm": 0.027951604107350918, |
| "learning_rate": 1.9400921658986175e-05, |
| "loss": 0.0007, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15359508495728136, |
| "grad_norm": 0.010865274419369686, |
| "learning_rate": 1.9385560675883256e-05, |
| "loss": 0.0006, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1574349620812134, |
| "grad_norm": 0.010649058165805126, |
| "learning_rate": 1.937019969278034e-05, |
| "loss": 0.0006, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16127483920514543, |
| "grad_norm": 0.017247417697752888, |
| "learning_rate": 1.935483870967742e-05, |
| "loss": 0.0005, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16511471632907748, |
| "grad_norm": 0.009598794250166713, |
| "learning_rate": 1.9339477726574502e-05, |
| "loss": 0.0005, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1689545934530095, |
| "grad_norm": 0.013190891135776709, |
| "learning_rate": 1.9324116743471583e-05, |
| "loss": 0.0005, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17279447057694153, |
| "grad_norm": 0.015198390723045437, |
| "learning_rate": 1.9308755760368664e-05, |
| "loss": 0.0005, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17663434770087358, |
| "grad_norm": 0.00888530246811667, |
| "learning_rate": 1.9293394777265745e-05, |
| "loss": 0.0005, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1804742248248056, |
| "grad_norm": 0.0072193681049761505, |
| "learning_rate": 1.9278033794162825e-05, |
| "loss": 0.0005, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18431410194873765, |
| "grad_norm": 0.015423575939404187, |
| "learning_rate": 1.926267281105991e-05, |
| "loss": 0.0004, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18815397907266967, |
| "grad_norm": 0.005888900378170728, |
| "learning_rate": 1.924731182795699e-05, |
| "loss": 0.0004, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1919938561966017, |
| "grad_norm": 0.009370771214962732, |
| "learning_rate": 1.923195084485407e-05, |
| "loss": 0.0005, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19583373332053375, |
| "grad_norm": 0.008880476477762786, |
| "learning_rate": 1.9216589861751153e-05, |
| "loss": 0.0004, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19967361044446577, |
| "grad_norm": 0.014710261381765207, |
| "learning_rate": 1.9201228878648233e-05, |
| "loss": 0.0004, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20351348756839782, |
| "grad_norm": 0.010564538973197759, |
| "learning_rate": 1.9185867895545314e-05, |
| "loss": 0.0005, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20735336469232984, |
| "grad_norm": 0.011022024037436684, |
| "learning_rate": 1.91705069124424e-05, |
| "loss": 0.0005, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21119324181626187, |
| "grad_norm": 0.010224510974710398, |
| "learning_rate": 1.915514592933948e-05, |
| "loss": 0.0005, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21503311894019392, |
| "grad_norm": 0.018187207031321574, |
| "learning_rate": 1.913978494623656e-05, |
| "loss": 0.0004, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21887299606412594, |
| "grad_norm": 0.01408592901908285, |
| "learning_rate": 1.912442396313364e-05, |
| "loss": 0.0004, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.222712873188058, |
| "grad_norm": 0.00579354434664508, |
| "learning_rate": 1.9109062980030722e-05, |
| "loss": 0.0004, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22655275031199001, |
| "grad_norm": 0.007179585527455935, |
| "learning_rate": 1.9093701996927803e-05, |
| "loss": 0.0004, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.23039262743592204, |
| "grad_norm": 0.01087115590194149, |
| "learning_rate": 1.9078341013824884e-05, |
| "loss": 0.0004, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2342325045598541, |
| "grad_norm": 0.013488801384563282, |
| "learning_rate": 1.906298003072197e-05, |
| "loss": 0.0004, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2380723816837861, |
| "grad_norm": 0.014270453003895688, |
| "learning_rate": 1.904761904761905e-05, |
| "loss": 0.0004, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.24191225880771816, |
| "grad_norm": 0.005608293770659859, |
| "learning_rate": 1.903225806451613e-05, |
| "loss": 0.0004, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24575213593165018, |
| "grad_norm": 0.01942922314775013, |
| "learning_rate": 1.901689708141321e-05, |
| "loss": 0.0004, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.24959201305558223, |
| "grad_norm": 0.008807942106723612, |
| "learning_rate": 1.9001536098310292e-05, |
| "loss": 0.0004, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.25343189017951423, |
| "grad_norm": 0.009642123808480296, |
| "learning_rate": 1.8986175115207373e-05, |
| "loss": 0.0004, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2572717673034463, |
| "grad_norm": 0.010839972501351821, |
| "learning_rate": 1.8970814132104458e-05, |
| "loss": 0.0004, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.26111164442737833, |
| "grad_norm": 0.007016830642797472, |
| "learning_rate": 1.895545314900154e-05, |
| "loss": 0.0004, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.26495152155131035, |
| "grad_norm": 0.01036596596633793, |
| "learning_rate": 1.894009216589862e-05, |
| "loss": 0.0004, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2687913986752424, |
| "grad_norm": 0.00699112176439698, |
| "learning_rate": 1.89247311827957e-05, |
| "loss": 0.0004, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2726312757991744, |
| "grad_norm": 0.023143846781726766, |
| "learning_rate": 1.890937019969278e-05, |
| "loss": 0.0004, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2764711529231065, |
| "grad_norm": 0.014371836855906761, |
| "learning_rate": 1.8894009216589862e-05, |
| "loss": 0.0004, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2803110300470385, |
| "grad_norm": 0.013134481657118259, |
| "learning_rate": 1.8878648233486943e-05, |
| "loss": 0.0004, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2841509071709705, |
| "grad_norm": 0.00936736410572265, |
| "learning_rate": 1.8863287250384027e-05, |
| "loss": 0.0004, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.28799078429490255, |
| "grad_norm": 0.0177202255192513, |
| "learning_rate": 1.8847926267281108e-05, |
| "loss": 0.0004, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2918306614188346, |
| "grad_norm": 0.006114501253180761, |
| "learning_rate": 1.883256528417819e-05, |
| "loss": 0.0004, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.29567053854276665, |
| "grad_norm": 0.008265452601599499, |
| "learning_rate": 1.881720430107527e-05, |
| "loss": 0.0004, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.29951041566669867, |
| "grad_norm": 0.014907543135145678, |
| "learning_rate": 1.880184331797235e-05, |
| "loss": 0.0004, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3033502927906307, |
| "grad_norm": 0.014797049223610434, |
| "learning_rate": 1.8786482334869432e-05, |
| "loss": 0.0004, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3071901699145627, |
| "grad_norm": 0.004652330628741432, |
| "learning_rate": 1.8771121351766516e-05, |
| "loss": 0.0003, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3110300470384948, |
| "grad_norm": 0.005493451170013414, |
| "learning_rate": 1.8755760368663597e-05, |
| "loss": 0.0004, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3148699241624268, |
| "grad_norm": 0.007045732848967435, |
| "learning_rate": 1.8740399385560678e-05, |
| "loss": 0.0003, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.31870980128635884, |
| "grad_norm": 0.018179892197985704, |
| "learning_rate": 1.872503840245776e-05, |
| "loss": 0.0004, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.32254967841029086, |
| "grad_norm": 0.005668747866614938, |
| "learning_rate": 1.870967741935484e-05, |
| "loss": 0.0003, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3263895555342229, |
| "grad_norm": 0.005624631016307953, |
| "learning_rate": 1.869431643625192e-05, |
| "loss": 0.0003, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.33022943265815496, |
| "grad_norm": 0.012701139148209117, |
| "learning_rate": 1.8678955453149005e-05, |
| "loss": 0.0004, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.334069309782087, |
| "grad_norm": 0.020568594933285278, |
| "learning_rate": 1.8663594470046086e-05, |
| "loss": 0.0004, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.337909186906019, |
| "grad_norm": 0.016752438047633097, |
| "learning_rate": 1.8648233486943167e-05, |
| "loss": 0.0003, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.34174906402995103, |
| "grad_norm": 0.022040592531457844, |
| "learning_rate": 1.8632872503840248e-05, |
| "loss": 0.0003, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.34558894115388306, |
| "grad_norm": 0.005501761305071796, |
| "learning_rate": 1.861751152073733e-05, |
| "loss": 0.0003, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.34942881827781513, |
| "grad_norm": 0.00913565126513445, |
| "learning_rate": 1.860215053763441e-05, |
| "loss": 0.0003, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.35326869540174716, |
| "grad_norm": 0.00643352891432492, |
| "learning_rate": 1.858678955453149e-05, |
| "loss": 0.0004, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3571085725256792, |
| "grad_norm": 0.005932277310972733, |
| "learning_rate": 1.8571428571428575e-05, |
| "loss": 0.0003, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3609484496496112, |
| "grad_norm": 0.00854566416057147, |
| "learning_rate": 1.8556067588325656e-05, |
| "loss": 0.0004, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3647883267735432, |
| "grad_norm": 0.004198303186077754, |
| "learning_rate": 1.8540706605222737e-05, |
| "loss": 0.0003, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3686282038974753, |
| "grad_norm": 0.006013969871660999, |
| "learning_rate": 1.8525345622119818e-05, |
| "loss": 0.0003, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3724680810214073, |
| "grad_norm": 0.00821940173101188, |
| "learning_rate": 1.85099846390169e-05, |
| "loss": 0.0003, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.37630795814533935, |
| "grad_norm": 0.01706199510535657, |
| "learning_rate": 1.849462365591398e-05, |
| "loss": 0.0003, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3801478352692714, |
| "grad_norm": 0.012195179945468527, |
| "learning_rate": 1.8479262672811064e-05, |
| "loss": 0.0003, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3839877123932034, |
| "grad_norm": 0.00807499700581235, |
| "learning_rate": 1.8463901689708145e-05, |
| "loss": 0.0003, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3878275895171355, |
| "grad_norm": 0.010945346397012758, |
| "learning_rate": 1.8448540706605226e-05, |
| "loss": 0.0003, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3916674666410675, |
| "grad_norm": 0.00880396626822876, |
| "learning_rate": 1.8433179723502307e-05, |
| "loss": 0.0003, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3955073437649995, |
| "grad_norm": 0.014399672052520717, |
| "learning_rate": 1.8417818740399388e-05, |
| "loss": 0.0003, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.39934722088893154, |
| "grad_norm": 0.006831959770996685, |
| "learning_rate": 1.840245775729647e-05, |
| "loss": 0.0003, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.40318709801286357, |
| "grad_norm": 18.516333985308375, |
| "learning_rate": 1.838709677419355e-05, |
| "loss": 0.0478, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.40702697513679564, |
| "grad_norm": 0.6477183946866085, |
| "learning_rate": 1.837173579109063e-05, |
| "loss": 0.1816, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.41086685226072767, |
| "grad_norm": 2.6861506025108475, |
| "learning_rate": 1.8356374807987715e-05, |
| "loss": 0.0536, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4147067293846597, |
| "grad_norm": 0.4416236285189527, |
| "learning_rate": 1.8341013824884796e-05, |
| "loss": 0.0249, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4185466065085917, |
| "grad_norm": 0.9786805141459802, |
| "learning_rate": 1.8325652841781877e-05, |
| "loss": 0.0101, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.42238648363252373, |
| "grad_norm": 1.627598109506058, |
| "learning_rate": 1.8310291858678958e-05, |
| "loss": 0.0108, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4262263607564558, |
| "grad_norm": 2.2472146744387635, |
| "learning_rate": 1.829493087557604e-05, |
| "loss": 0.0101, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.43006623788038784, |
| "grad_norm": 0.5695485826329719, |
| "learning_rate": 1.827956989247312e-05, |
| "loss": 0.0043, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.43390611500431986, |
| "grad_norm": 1.530827959431516, |
| "learning_rate": 1.82642089093702e-05, |
| "loss": 0.013, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4377459921282519, |
| "grad_norm": 0.029509683112095193, |
| "learning_rate": 1.8248847926267285e-05, |
| "loss": 0.0113, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4415858692521839, |
| "grad_norm": 0.01781992132649757, |
| "learning_rate": 1.8233486943164366e-05, |
| "loss": 0.0007, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.445425746376116, |
| "grad_norm": 0.010151888479514436, |
| "learning_rate": 1.8218125960061447e-05, |
| "loss": 0.0005, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.449265623500048, |
| "grad_norm": 0.014420471837514583, |
| "learning_rate": 1.8202764976958527e-05, |
| "loss": 0.0005, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.45310550062398003, |
| "grad_norm": 0.010072124184727966, |
| "learning_rate": 1.818740399385561e-05, |
| "loss": 0.0004, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.45694537774791205, |
| "grad_norm": 0.007661769308843087, |
| "learning_rate": 1.817204301075269e-05, |
| "loss": 0.0004, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4607852548718441, |
| "grad_norm": 0.010038812848366137, |
| "learning_rate": 1.815668202764977e-05, |
| "loss": 0.0004, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.46462513199577615, |
| "grad_norm": 0.011258850363850582, |
| "learning_rate": 1.8141321044546855e-05, |
| "loss": 0.0004, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4684650091197082, |
| "grad_norm": 0.016055405689836853, |
| "learning_rate": 1.8125960061443936e-05, |
| "loss": 0.0004, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4723048862436402, |
| "grad_norm": 0.007967416713376401, |
| "learning_rate": 1.8110599078341016e-05, |
| "loss": 0.0004, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4761447633675722, |
| "grad_norm": 0.008025613224993348, |
| "learning_rate": 1.8095238095238097e-05, |
| "loss": 0.0003, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.47998464049150424, |
| "grad_norm": 0.004966250706848606, |
| "learning_rate": 1.8079877112135178e-05, |
| "loss": 0.0004, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4838245176154363, |
| "grad_norm": 0.007419454029578721, |
| "learning_rate": 1.806451612903226e-05, |
| "loss": 0.0004, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.48766439473936835, |
| "grad_norm": 0.007372896744459003, |
| "learning_rate": 1.804915514592934e-05, |
| "loss": 0.0004, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.49150427186330037, |
| "grad_norm": 0.007260032706837447, |
| "learning_rate": 1.803379416282642e-05, |
| "loss": 0.0004, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4953441489872324, |
| "grad_norm": 0.006797112689312068, |
| "learning_rate": 1.8018433179723505e-05, |
| "loss": 0.0003, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.49918402611116447, |
| "grad_norm": 0.006596862668254978, |
| "learning_rate": 1.8003072196620586e-05, |
| "loss": 0.0003, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5030239032350965, |
| "grad_norm": 0.007442569811616661, |
| "learning_rate": 1.7987711213517667e-05, |
| "loss": 0.0004, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5068637803590285, |
| "grad_norm": 0.011659097052332864, |
| "learning_rate": 1.7972350230414748e-05, |
| "loss": 0.0003, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5107036574829605, |
| "grad_norm": 0.003189461384393768, |
| "learning_rate": 1.795698924731183e-05, |
| "loss": 0.0003, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5145435346068926, |
| "grad_norm": 0.006218121023821658, |
| "learning_rate": 1.794162826420891e-05, |
| "loss": 0.0003, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5183834117308246, |
| "grad_norm": 0.004661385155418944, |
| "learning_rate": 1.792626728110599e-05, |
| "loss": 0.0004, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5222232888547567, |
| "grad_norm": 0.007451036130599556, |
| "learning_rate": 1.7910906298003075e-05, |
| "loss": 0.0003, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5260631659786886, |
| "grad_norm": 0.005739057587058598, |
| "learning_rate": 1.7895545314900156e-05, |
| "loss": 0.0003, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5299030431026207, |
| "grad_norm": 0.008293201974170215, |
| "learning_rate": 1.7880184331797237e-05, |
| "loss": 0.0004, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5337429202265528, |
| "grad_norm": 0.011616342167072335, |
| "learning_rate": 1.7864823348694318e-05, |
| "loss": 0.0003, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5375827973504848, |
| "grad_norm": 0.011567680895725766, |
| "learning_rate": 1.78494623655914e-05, |
| "loss": 0.0004, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5414226744744168, |
| "grad_norm": 0.0057168290679564795, |
| "learning_rate": 1.783410138248848e-05, |
| "loss": 0.0003, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5452625515983488, |
| "grad_norm": 0.010165783676708838, |
| "learning_rate": 1.781874039938556e-05, |
| "loss": 0.0003, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5491024287222809, |
| "grad_norm": 0.0045309573507459015, |
| "learning_rate": 1.7803379416282645e-05, |
| "loss": 0.0003, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.552942305846213, |
| "grad_norm": 0.00811076581038844, |
| "learning_rate": 1.7788018433179726e-05, |
| "loss": 0.0003, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5567821829701449, |
| "grad_norm": 0.009995480779616097, |
| "learning_rate": 1.7772657450076807e-05, |
| "loss": 0.0003, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.560622060094077, |
| "grad_norm": 0.006925240596184182, |
| "learning_rate": 1.7757296466973888e-05, |
| "loss": 0.0003, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.564461937218009, |
| "grad_norm": 0.013412407169843198, |
| "learning_rate": 1.774193548387097e-05, |
| "loss": 0.0003, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.568301814341941, |
| "grad_norm": 0.012787736722349891, |
| "learning_rate": 1.772657450076805e-05, |
| "loss": 0.0003, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5721416914658731, |
| "grad_norm": 0.007058357804663414, |
| "learning_rate": 1.771121351766513e-05, |
| "loss": 0.0003, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5759815685898051, |
| "grad_norm": 0.007736272349706681, |
| "learning_rate": 1.7695852534562215e-05, |
| "loss": 0.0003, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5798214457137372, |
| "grad_norm": 0.0033192017596056908, |
| "learning_rate": 1.7680491551459296e-05, |
| "loss": 0.0003, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5836613228376692, |
| "grad_norm": 0.014268997426681756, |
| "learning_rate": 1.7665130568356377e-05, |
| "loss": 0.0003, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5875011999616012, |
| "grad_norm": 0.007258373149860229, |
| "learning_rate": 1.7649769585253458e-05, |
| "loss": 0.0003, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5913410770855333, |
| "grad_norm": 0.005355993128279297, |
| "learning_rate": 1.763440860215054e-05, |
| "loss": 0.0003, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5951809542094653, |
| "grad_norm": 0.006539831015011762, |
| "learning_rate": 1.761904761904762e-05, |
| "loss": 0.0003, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5990208313333973, |
| "grad_norm": 0.0029413603917756745, |
| "learning_rate": 1.76036866359447e-05, |
| "loss": 0.0003, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6028607084573294, |
| "grad_norm": 0.005311044247403118, |
| "learning_rate": 1.758832565284178e-05, |
| "loss": 0.0003, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6067005855812614, |
| "grad_norm": 0.008810927875552908, |
| "learning_rate": 1.7572964669738866e-05, |
| "loss": 0.0003, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6105404627051935, |
| "grad_norm": 0.019081216576469953, |
| "learning_rate": 1.7557603686635947e-05, |
| "loss": 0.0003, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6143803398291254, |
| "grad_norm": 0.018890578715268194, |
| "learning_rate": 1.7542242703533028e-05, |
| "loss": 0.0003, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6182202169530575, |
| "grad_norm": 0.006788409840537928, |
| "learning_rate": 1.752688172043011e-05, |
| "loss": 0.0003, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6220600940769896, |
| "grad_norm": 0.007667765007600492, |
| "learning_rate": 1.751152073732719e-05, |
| "loss": 0.0003, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6258999712009216, |
| "grad_norm": 0.0042895580282391686, |
| "learning_rate": 1.749615975422427e-05, |
| "loss": 0.0003, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6297398483248536, |
| "grad_norm": 0.0046351980587696125, |
| "learning_rate": 1.748079877112135e-05, |
| "loss": 0.0003, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6335797254487856, |
| "grad_norm": 0.0033174467847263173, |
| "learning_rate": 1.7465437788018436e-05, |
| "loss": 0.0003, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6374196025727177, |
| "grad_norm": 0.005803214350891364, |
| "learning_rate": 1.7450076804915517e-05, |
| "loss": 0.0003, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6412594796966498, |
| "grad_norm": 0.010355179051111019, |
| "learning_rate": 1.7434715821812597e-05, |
| "loss": 0.0003, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6450993568205817, |
| "grad_norm": 0.00695229076668098, |
| "learning_rate": 1.741935483870968e-05, |
| "loss": 0.0003, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6489392339445138, |
| "grad_norm": 0.0026433167192979326, |
| "learning_rate": 1.740399385560676e-05, |
| "loss": 0.0003, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6527791110684458, |
| "grad_norm": 0.004001528867502428, |
| "learning_rate": 1.738863287250384e-05, |
| "loss": 0.0003, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6566189881923779, |
| "grad_norm": 0.005032030468208995, |
| "learning_rate": 1.737327188940092e-05, |
| "loss": 0.0003, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6604588653163099, |
| "grad_norm": 0.012342312681179527, |
| "learning_rate": 1.7357910906298005e-05, |
| "loss": 0.0003, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6642987424402419, |
| "grad_norm": 0.006477437534892976, |
| "learning_rate": 1.7342549923195086e-05, |
| "loss": 0.0003, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.668138619564174, |
| "grad_norm": 0.003880319040550072, |
| "learning_rate": 1.7327188940092167e-05, |
| "loss": 0.0003, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6719784966881059, |
| "grad_norm": 0.007668035468060641, |
| "learning_rate": 1.7311827956989248e-05, |
| "loss": 0.0003, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.675818373812038, |
| "grad_norm": 0.011038361878502203, |
| "learning_rate": 1.729646697388633e-05, |
| "loss": 0.0003, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6796582509359701, |
| "grad_norm": 0.004974769727903427, |
| "learning_rate": 1.728110599078341e-05, |
| "loss": 0.0003, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6834981280599021, |
| "grad_norm": 0.004325284236550939, |
| "learning_rate": 1.726574500768049e-05, |
| "loss": 0.0003, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6873380051838341, |
| "grad_norm": 0.0042080867657549705, |
| "learning_rate": 1.7250384024577572e-05, |
| "loss": 0.0003, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6911778823077661, |
| "grad_norm": 0.0039328487679914535, |
| "learning_rate": 1.7235023041474656e-05, |
| "loss": 0.0003, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6950177594316982, |
| "grad_norm": 0.004207050570838415, |
| "learning_rate": 1.7219662058371737e-05, |
| "loss": 0.0003, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6988576365556303, |
| "grad_norm": 0.0036413526215648487, |
| "learning_rate": 1.7204301075268818e-05, |
| "loss": 0.0003, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7026975136795622, |
| "grad_norm": 0.004178601636330483, |
| "learning_rate": 1.71889400921659e-05, |
| "loss": 0.0003, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7065373908034943, |
| "grad_norm": 0.00973331298779335, |
| "learning_rate": 1.717357910906298e-05, |
| "loss": 0.0003, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7103772679274263, |
| "grad_norm": 0.004286254481889245, |
| "learning_rate": 1.715821812596006e-05, |
| "loss": 0.0003, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7142171450513584, |
| "grad_norm": 0.005281447209048475, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 0.0003, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7180570221752904, |
| "grad_norm": 0.011520628884629904, |
| "learning_rate": 1.7127496159754226e-05, |
| "loss": 0.0003, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7218968992992224, |
| "grad_norm": 0.015096661910423118, |
| "learning_rate": 1.7112135176651307e-05, |
| "loss": 0.0003, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7257367764231545, |
| "grad_norm": 0.004269384954031992, |
| "learning_rate": 1.7096774193548388e-05, |
| "loss": 0.0003, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7295766535470865, |
| "grad_norm": 0.007284302523487442, |
| "learning_rate": 1.708141321044547e-05, |
| "loss": 0.0003, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7334165306710185, |
| "grad_norm": 0.013816212104358527, |
| "learning_rate": 1.706605222734255e-05, |
| "loss": 0.0003, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7372564077949506, |
| "grad_norm": 0.006389002588565134, |
| "learning_rate": 1.705069124423963e-05, |
| "loss": 0.0003, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7410962849188826, |
| "grad_norm": 0.009485308445395068, |
| "learning_rate": 1.7035330261136712e-05, |
| "loss": 0.0003, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7449361620428147, |
| "grad_norm": 0.004423329749614452, |
| "learning_rate": 1.7019969278033796e-05, |
| "loss": 0.0003, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7487760391667466, |
| "grad_norm": 0.007527583045286338, |
| "learning_rate": 1.7004608294930877e-05, |
| "loss": 0.0003, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7526159162906787, |
| "grad_norm": 0.014586231809369528, |
| "learning_rate": 1.6989247311827958e-05, |
| "loss": 0.0003, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7564557934146108, |
| "grad_norm": 0.006651075913511302, |
| "learning_rate": 1.697388632872504e-05, |
| "loss": 0.0003, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7602956705385427, |
| "grad_norm": 0.009325021217663211, |
| "learning_rate": 1.695852534562212e-05, |
| "loss": 0.0003, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7641355476624748, |
| "grad_norm": 0.00448309467347562, |
| "learning_rate": 1.69431643625192e-05, |
| "loss": 0.0003, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7679754247864068, |
| "grad_norm": 0.00862440090599278, |
| "learning_rate": 1.6927803379416285e-05, |
| "loss": 0.0003, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7718153019103389, |
| "grad_norm": 0.003564225910536377, |
| "learning_rate": 1.6912442396313366e-05, |
| "loss": 0.0003, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.775655179034271, |
| "grad_norm": 0.009884322290648858, |
| "learning_rate": 1.6897081413210447e-05, |
| "loss": 0.0003, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7794950561582029, |
| "grad_norm": 0.0052686365175910795, |
| "learning_rate": 1.6881720430107528e-05, |
| "loss": 0.0003, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.783334933282135, |
| "grad_norm": 0.006169173971857438, |
| "learning_rate": 1.686635944700461e-05, |
| "loss": 0.0003, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.787174810406067, |
| "grad_norm": 0.006194757569521478, |
| "learning_rate": 1.685099846390169e-05, |
| "loss": 0.0003, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.791014687529999, |
| "grad_norm": 0.00415409003374665, |
| "learning_rate": 1.683563748079877e-05, |
| "loss": 0.0003, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7948545646539311, |
| "grad_norm": 0.01310714973576285, |
| "learning_rate": 1.6820276497695855e-05, |
| "loss": 0.0003, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7986944417778631, |
| "grad_norm": 0.0046992213585696965, |
| "learning_rate": 1.6804915514592936e-05, |
| "loss": 0.0003, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8025343189017952, |
| "grad_norm": 0.006291272173141966, |
| "learning_rate": 1.6789554531490017e-05, |
| "loss": 0.0003, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8063741960257271, |
| "grad_norm": 0.007546577147358044, |
| "learning_rate": 1.6774193548387098e-05, |
| "loss": 0.0003, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8102140731496592, |
| "grad_norm": 0.005443900744121676, |
| "learning_rate": 1.675883256528418e-05, |
| "loss": 0.0003, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8140539502735913, |
| "grad_norm": 0.0019010839135315712, |
| "learning_rate": 1.674347158218126e-05, |
| "loss": 0.0003, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8178938273975233, |
| "grad_norm": 0.009951343848549921, |
| "learning_rate": 1.6728110599078344e-05, |
| "loss": 0.0003, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8217337045214553, |
| "grad_norm": 0.003896513453508667, |
| "learning_rate": 1.6712749615975425e-05, |
| "loss": 0.0003, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8255735816453873, |
| "grad_norm": 0.014173651446143855, |
| "learning_rate": 1.6697388632872506e-05, |
| "loss": 0.0003, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8294134587693194, |
| "grad_norm": 0.003677873891818207, |
| "learning_rate": 1.6682027649769587e-05, |
| "loss": 0.0003, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8332533358932515, |
| "grad_norm": 0.007358001701120111, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0003, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8370932130171834, |
| "grad_norm": 0.014586891892807584, |
| "learning_rate": 1.665130568356375e-05, |
| "loss": 0.0003, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8409330901411155, |
| "grad_norm": 0.011685080400631487, |
| "learning_rate": 1.663594470046083e-05, |
| "loss": 0.0003, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8447729672650475, |
| "grad_norm": 0.006355858658307203, |
| "learning_rate": 1.6620583717357914e-05, |
| "loss": 0.0003, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8486128443889795, |
| "grad_norm": 0.004711497431600735, |
| "learning_rate": 1.6605222734254995e-05, |
| "loss": 0.0003, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8524527215129116, |
| "grad_norm": 0.00975542945138729, |
| "learning_rate": 1.6589861751152075e-05, |
| "loss": 0.0003, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8562925986368436, |
| "grad_norm": 0.009147508665138223, |
| "learning_rate": 1.6574500768049156e-05, |
| "loss": 0.0003, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8601324757607757, |
| "grad_norm": 0.004608721023888835, |
| "learning_rate": 1.6559139784946237e-05, |
| "loss": 0.0003, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8639723528847076, |
| "grad_norm": 0.007377051739331657, |
| "learning_rate": 1.6543778801843318e-05, |
| "loss": 0.0003, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8678122300086397, |
| "grad_norm": 0.012946767220354212, |
| "learning_rate": 1.6528417818740403e-05, |
| "loss": 0.0003, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8716521071325718, |
| "grad_norm": 0.014416613075616386, |
| "learning_rate": 1.6513056835637483e-05, |
| "loss": 0.0003, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8754919842565038, |
| "grad_norm": 0.006287700917447439, |
| "learning_rate": 1.6497695852534564e-05, |
| "loss": 0.0003, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8793318613804358, |
| "grad_norm": 0.0137632558355742, |
| "learning_rate": 1.6482334869431645e-05, |
| "loss": 0.0003, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8831717385043678, |
| "grad_norm": 0.004387114997812537, |
| "learning_rate": 1.6466973886328726e-05, |
| "loss": 0.0003, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8870116156282999, |
| "grad_norm": 0.006112696865963161, |
| "learning_rate": 1.6451612903225807e-05, |
| "loss": 0.0003, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.890851492752232, |
| "grad_norm": 0.00335985624587701, |
| "learning_rate": 1.643625192012289e-05, |
| "loss": 0.0003, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8946913698761639, |
| "grad_norm": 0.012311531291747248, |
| "learning_rate": 1.6420890937019972e-05, |
| "loss": 0.0003, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.898531247000096, |
| "grad_norm": 0.0056121939234220625, |
| "learning_rate": 1.6405529953917053e-05, |
| "loss": 0.0003, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.902371124124028, |
| "grad_norm": 0.008461725308188539, |
| "learning_rate": 1.6390168970814134e-05, |
| "loss": 0.0003, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9062110012479601, |
| "grad_norm": 0.007674455481245043, |
| "learning_rate": 1.6374807987711215e-05, |
| "loss": 0.0003, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9100508783718921, |
| "grad_norm": 0.015158413045124617, |
| "learning_rate": 1.6359447004608296e-05, |
| "loss": 0.0003, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9138907554958241, |
| "grad_norm": 0.012286030614540876, |
| "learning_rate": 1.6344086021505377e-05, |
| "loss": 0.0003, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9177306326197562, |
| "grad_norm": 0.0030696065567895983, |
| "learning_rate": 1.632872503840246e-05, |
| "loss": 0.0003, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.9215705097436881, |
| "grad_norm": 0.004565605709323647, |
| "learning_rate": 1.6313364055299542e-05, |
| "loss": 0.0003, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9254103868676202, |
| "grad_norm": 0.006838334269203355, |
| "learning_rate": 1.6298003072196623e-05, |
| "loss": 0.0003, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9292502639915523, |
| "grad_norm": 0.00990773655397776, |
| "learning_rate": 1.6282642089093704e-05, |
| "loss": 0.0003, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9330901411154843, |
| "grad_norm": 0.013734696643942659, |
| "learning_rate": 1.6267281105990785e-05, |
| "loss": 0.0003, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9369300182394164, |
| "grad_norm": 0.004412383577588846, |
| "learning_rate": 1.6251920122887866e-05, |
| "loss": 0.0003, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9407698953633483, |
| "grad_norm": 0.00711020501768463, |
| "learning_rate": 1.6236559139784947e-05, |
| "loss": 0.0003, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9446097724872804, |
| "grad_norm": 0.004814816584197371, |
| "learning_rate": 1.622119815668203e-05, |
| "loss": 0.0003, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9484496496112125, |
| "grad_norm": 0.007067507479770793, |
| "learning_rate": 1.6205837173579112e-05, |
| "loss": 0.0003, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9522895267351444, |
| "grad_norm": 0.0019971454498982043, |
| "learning_rate": 1.6190476190476193e-05, |
| "loss": 0.0003, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9561294038590765, |
| "grad_norm": 0.003950118900464544, |
| "learning_rate": 1.6175115207373274e-05, |
| "loss": 0.0003, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9599692809830085, |
| "grad_norm": 0.007758837909723049, |
| "learning_rate": 1.6159754224270355e-05, |
| "loss": 0.0003, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9638091581069406, |
| "grad_norm": 0.0033188489546766026, |
| "learning_rate": 1.6144393241167436e-05, |
| "loss": 0.0003, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9676490352308726, |
| "grad_norm": 0.005559097491387871, |
| "learning_rate": 1.6129032258064517e-05, |
| "loss": 0.0003, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9714889123548046, |
| "grad_norm": 0.0038490165506594267, |
| "learning_rate": 1.61136712749616e-05, |
| "loss": 0.0003, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9753287894787367, |
| "grad_norm": 0.00511457830944101, |
| "learning_rate": 1.6098310291858682e-05, |
| "loss": 0.0003, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9791686666026688, |
| "grad_norm": 0.0030355393802215156, |
| "learning_rate": 1.6082949308755763e-05, |
| "loss": 0.0003, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9830085437266007, |
| "grad_norm": 0.004994307766841962, |
| "learning_rate": 1.6067588325652844e-05, |
| "loss": 0.0003, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9868484208505328, |
| "grad_norm": 0.00586917516793509, |
| "learning_rate": 1.6052227342549925e-05, |
| "loss": 0.0003, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.9906882979744648, |
| "grad_norm": 0.003295655448503593, |
| "learning_rate": 1.6036866359447006e-05, |
| "loss": 0.0003, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9945281750983969, |
| "grad_norm": 0.0017248350640545309, |
| "learning_rate": 1.6021505376344087e-05, |
| "loss": 0.0003, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9983680522223289, |
| "grad_norm": 0.008360356694852496, |
| "learning_rate": 1.600614439324117e-05, |
| "loss": 0.0003, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.001919938561966, |
| "grad_norm": 0.00941746675646077, |
| "learning_rate": 1.5990783410138252e-05, |
| "loss": 0.0003, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.005759815685898, |
| "grad_norm": 0.007001645607480019, |
| "learning_rate": 1.5975422427035333e-05, |
| "loss": 0.0003, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.00959969280983, |
| "grad_norm": 0.00872263934893287, |
| "learning_rate": 1.5960061443932414e-05, |
| "loss": 0.0003, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.0134395699337622, |
| "grad_norm": 0.006998252185455207, |
| "learning_rate": 1.5944700460829495e-05, |
| "loss": 0.0003, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.0172794470576942, |
| "grad_norm": 0.008293218599741276, |
| "learning_rate": 1.5929339477726576e-05, |
| "loss": 0.0003, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.021119324181626, |
| "grad_norm": 0.006889505177967215, |
| "learning_rate": 1.5913978494623657e-05, |
| "loss": 0.0003, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.0249592013055582, |
| "grad_norm": 0.010069071017105902, |
| "learning_rate": 1.589861751152074e-05, |
| "loss": 0.0003, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.0287990784294903, |
| "grad_norm": 0.024634152994068834, |
| "learning_rate": 1.5883256528417822e-05, |
| "loss": 0.0003, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.0326389555534223, |
| "grad_norm": 0.010662609432049419, |
| "learning_rate": 1.5867895545314903e-05, |
| "loss": 0.0003, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.0364788326773544, |
| "grad_norm": 0.013283658088640498, |
| "learning_rate": 1.5852534562211984e-05, |
| "loss": 0.0003, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.0403187098012863, |
| "grad_norm": 0.004408580576716252, |
| "learning_rate": 1.5837173579109065e-05, |
| "loss": 0.0003, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.0441585869252183, |
| "grad_norm": 0.005069354965439019, |
| "learning_rate": 1.5821812596006145e-05, |
| "loss": 0.0003, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.0479984640491504, |
| "grad_norm": 0.005316965623453909, |
| "learning_rate": 1.5806451612903226e-05, |
| "loss": 0.0003, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.0518383411730825, |
| "grad_norm": 0.004614759009371165, |
| "learning_rate": 1.5791090629800307e-05, |
| "loss": 0.0003, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.0556782182970146, |
| "grad_norm": 0.006520159764607735, |
| "learning_rate": 1.577572964669739e-05, |
| "loss": 0.0003, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.0595180954209464, |
| "grad_norm": 0.007212891832854262, |
| "learning_rate": 1.5760368663594473e-05, |
| "loss": 0.0003, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.0633579725448785, |
| "grad_norm": 0.007329478767553912, |
| "learning_rate": 1.5745007680491553e-05, |
| "loss": 0.0003, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.0671978496688106, |
| "grad_norm": 0.012902138519829355, |
| "learning_rate": 1.5729646697388634e-05, |
| "loss": 0.0003, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.0710377267927427, |
| "grad_norm": 0.008115156348023293, |
| "learning_rate": 1.5714285714285715e-05, |
| "loss": 0.0003, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.0748776039166748, |
| "grad_norm": 0.0031222148539581814, |
| "learning_rate": 1.5698924731182796e-05, |
| "loss": 0.0003, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0787174810406066, |
| "grad_norm": 0.00358892329366379, |
| "learning_rate": 1.5683563748079877e-05, |
| "loss": 0.0003, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.0825573581645387, |
| "grad_norm": 0.003731764641158579, |
| "learning_rate": 1.566820276497696e-05, |
| "loss": 0.0003, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.0863972352884708, |
| "grad_norm": 0.009651710392543447, |
| "learning_rate": 1.5652841781874042e-05, |
| "loss": 0.0003, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.0902371124124028, |
| "grad_norm": 0.007272437262278606, |
| "learning_rate": 1.5637480798771123e-05, |
| "loss": 0.0003, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.094076989536335, |
| "grad_norm": 0.0038138336241257302, |
| "learning_rate": 1.5622119815668204e-05, |
| "loss": 0.0003, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.0979168666602668, |
| "grad_norm": 0.007645435686600415, |
| "learning_rate": 1.5606758832565285e-05, |
| "loss": 0.0003, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.1017567437841989, |
| "grad_norm": 0.004228910730827151, |
| "learning_rate": 1.5591397849462366e-05, |
| "loss": 0.0003, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.105596620908131, |
| "grad_norm": 0.007428720623939754, |
| "learning_rate": 1.5576036866359447e-05, |
| "loss": 0.0003, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.109436498032063, |
| "grad_norm": 0.0109342599815278, |
| "learning_rate": 1.556067588325653e-05, |
| "loss": 0.0003, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.113276375155995, |
| "grad_norm": 0.004880866544074755, |
| "learning_rate": 1.5545314900153612e-05, |
| "loss": 0.0003, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.117116252279927, |
| "grad_norm": 0.013738617802960406, |
| "learning_rate": 1.5529953917050693e-05, |
| "loss": 0.0003, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.120956129403859, |
| "grad_norm": 0.00498835427587417, |
| "learning_rate": 1.5514592933947774e-05, |
| "loss": 0.0003, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.124796006527791, |
| "grad_norm": 0.0030319982820064474, |
| "learning_rate": 1.5499231950844855e-05, |
| "loss": 0.0003, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.1286358836517232, |
| "grad_norm": 0.01067113935847921, |
| "learning_rate": 1.5483870967741936e-05, |
| "loss": 0.0003, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.1324757607756553, |
| "grad_norm": 0.003620520899235841, |
| "learning_rate": 1.5468509984639017e-05, |
| "loss": 0.0003, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.1363156378995871, |
| "grad_norm": 0.004795888699506633, |
| "learning_rate": 1.5453149001536098e-05, |
| "loss": 0.0003, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.1401555150235192, |
| "grad_norm": 0.006942279632235508, |
| "learning_rate": 1.5437788018433182e-05, |
| "loss": 0.0003, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.1439953921474513, |
| "grad_norm": 0.002130066736956189, |
| "learning_rate": 1.5422427035330263e-05, |
| "loss": 0.0003, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.1478352692713834, |
| "grad_norm": 0.005306111233629307, |
| "learning_rate": 1.5407066052227344e-05, |
| "loss": 0.0003, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.1516751463953154, |
| "grad_norm": 0.007715185339433146, |
| "learning_rate": 1.5391705069124425e-05, |
| "loss": 0.0003, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1555150235192473, |
| "grad_norm": 0.005655458788113601, |
| "learning_rate": 1.5376344086021506e-05, |
| "loss": 0.0003, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.1593549006431794, |
| "grad_norm": 0.008468884041379838, |
| "learning_rate": 1.5360983102918587e-05, |
| "loss": 0.0003, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.1631947777671114, |
| "grad_norm": 0.015885667831357166, |
| "learning_rate": 1.5345622119815668e-05, |
| "loss": 0.0003, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.1670346548910435, |
| "grad_norm": 0.003501130092621392, |
| "learning_rate": 1.5330261136712752e-05, |
| "loss": 0.0003, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.1708745320149756, |
| "grad_norm": 0.007702536520407232, |
| "learning_rate": 1.5314900153609833e-05, |
| "loss": 0.0003, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.1747144091389075, |
| "grad_norm": 0.00397165276711294, |
| "learning_rate": 1.5299539170506914e-05, |
| "loss": 0.0003, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.1785542862628395, |
| "grad_norm": 0.005806224235699376, |
| "learning_rate": 1.5284178187403995e-05, |
| "loss": 0.0003, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.1823941633867716, |
| "grad_norm": 0.014031967789109468, |
| "learning_rate": 1.5268817204301076e-05, |
| "loss": 0.0003, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.1862340405107037, |
| "grad_norm": 0.011597083317184202, |
| "learning_rate": 1.5253456221198157e-05, |
| "loss": 0.0003, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.1900739176346358, |
| "grad_norm": 0.003137805117200966, |
| "learning_rate": 1.523809523809524e-05, |
| "loss": 0.0003, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.1939137947585676, |
| "grad_norm": 0.008571751181818212, |
| "learning_rate": 1.5222734254992322e-05, |
| "loss": 0.0003, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.1977536718824997, |
| "grad_norm": 0.010354799239461948, |
| "learning_rate": 1.5207373271889403e-05, |
| "loss": 0.0003, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.2015935490064318, |
| "grad_norm": 0.006888806938098952, |
| "learning_rate": 1.5192012288786484e-05, |
| "loss": 0.0003, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.2054334261303639, |
| "grad_norm": 0.00560528529757068, |
| "learning_rate": 1.5176651305683565e-05, |
| "loss": 0.0003, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.209273303254296, |
| "grad_norm": 0.004803989045845827, |
| "learning_rate": 1.5161290322580646e-05, |
| "loss": 0.0003, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.2131131803782278, |
| "grad_norm": 0.010138889246585629, |
| "learning_rate": 1.5145929339477728e-05, |
| "loss": 0.0003, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.2169530575021599, |
| "grad_norm": 0.010337939960763999, |
| "learning_rate": 1.5130568356374809e-05, |
| "loss": 0.0003, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.220792934626092, |
| "grad_norm": 0.0034014308570009524, |
| "learning_rate": 1.511520737327189e-05, |
| "loss": 0.0003, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.224632811750024, |
| "grad_norm": 0.0029402516168536455, |
| "learning_rate": 1.5099846390168973e-05, |
| "loss": 0.0003, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.228472688873956, |
| "grad_norm": 0.0077860538241216895, |
| "learning_rate": 1.5084485407066054e-05, |
| "loss": 0.0003, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.232312565997888, |
| "grad_norm": 0.005558974463946693, |
| "learning_rate": 1.5069124423963135e-05, |
| "loss": 0.0003, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.23615244312182, |
| "grad_norm": 0.010803718895790473, |
| "learning_rate": 1.5053763440860215e-05, |
| "loss": 0.0003, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.2399923202457521, |
| "grad_norm": 0.007530607391625148, |
| "learning_rate": 1.5038402457757298e-05, |
| "loss": 0.0003, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.2438321973696842, |
| "grad_norm": 0.0024515971935964494, |
| "learning_rate": 1.5023041474654379e-05, |
| "loss": 0.0003, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.2476720744936163, |
| "grad_norm": 0.004793434169621187, |
| "learning_rate": 1.500768049155146e-05, |
| "loss": 0.0003, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.2515119516175481, |
| "grad_norm": 0.005198731469624748, |
| "learning_rate": 1.4992319508448543e-05, |
| "loss": 0.0003, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.2553518287414802, |
| "grad_norm": 0.002181117809025224, |
| "learning_rate": 1.4976958525345623e-05, |
| "loss": 0.0003, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.2591917058654123, |
| "grad_norm": 0.0037625493242586724, |
| "learning_rate": 1.4961597542242704e-05, |
| "loss": 0.0003, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.2630315829893444, |
| "grad_norm": 0.005316485072585287, |
| "learning_rate": 1.4946236559139787e-05, |
| "loss": 0.0003, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.2668714601132764, |
| "grad_norm": 0.004784634700102955, |
| "learning_rate": 1.4930875576036868e-05, |
| "loss": 0.0003, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.2707113372372083, |
| "grad_norm": 0.012696100545567314, |
| "learning_rate": 1.4915514592933949e-05, |
| "loss": 0.0003, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.2745512143611404, |
| "grad_norm": 0.005311522663075818, |
| "learning_rate": 1.490015360983103e-05, |
| "loss": 0.0003, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.2783910914850725, |
| "grad_norm": 0.0028746469015276917, |
| "learning_rate": 1.4884792626728112e-05, |
| "loss": 0.0003, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.2822309686090045, |
| "grad_norm": 0.010467141532211954, |
| "learning_rate": 1.4869431643625193e-05, |
| "loss": 0.0003, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.2860708457329366, |
| "grad_norm": 0.004927112231184059, |
| "learning_rate": 1.4854070660522274e-05, |
| "loss": 0.0003, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.2899107228568685, |
| "grad_norm": 0.004240757320008939, |
| "learning_rate": 1.4838709677419357e-05, |
| "loss": 0.0003, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.2937505999808006, |
| "grad_norm": 0.004593149961249671, |
| "learning_rate": 1.4823348694316438e-05, |
| "loss": 0.0003, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.2975904771047326, |
| "grad_norm": 0.012379522683153942, |
| "learning_rate": 1.4807987711213519e-05, |
| "loss": 0.0003, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.3014303542286647, |
| "grad_norm": 0.0030617662082256646, |
| "learning_rate": 1.47926267281106e-05, |
| "loss": 0.0003, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.3052702313525968, |
| "grad_norm": 0.012477621815569138, |
| "learning_rate": 1.477726574500768e-05, |
| "loss": 0.0003, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.3091101084765286, |
| "grad_norm": 0.008973089780140532, |
| "learning_rate": 1.4761904761904763e-05, |
| "loss": 0.0003, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.3129499856004607, |
| "grad_norm": 0.006012620992408176, |
| "learning_rate": 1.4746543778801846e-05, |
| "loss": 0.0003, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.3167898627243928, |
| "grad_norm": 0.002978034783135485, |
| "learning_rate": 1.4731182795698927e-05, |
| "loss": 0.0003, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.3206297398483249, |
| "grad_norm": 0.0076361528432857696, |
| "learning_rate": 1.4715821812596008e-05, |
| "loss": 0.0003, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.324469616972257, |
| "grad_norm": 0.01162698278703534, |
| "learning_rate": 1.4700460829493089e-05, |
| "loss": 0.0003, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.3283094940961888, |
| "grad_norm": 0.0023240753476107792, |
| "learning_rate": 1.468509984639017e-05, |
| "loss": 0.0003, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.332149371220121, |
| "grad_norm": 0.005329365238908933, |
| "learning_rate": 1.466973886328725e-05, |
| "loss": 0.0003, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.335989248344053, |
| "grad_norm": 0.007711907461069916, |
| "learning_rate": 1.4654377880184335e-05, |
| "loss": 0.0003, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.339829125467985, |
| "grad_norm": 0.0051703315905598365, |
| "learning_rate": 1.4639016897081416e-05, |
| "loss": 0.0003, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.3436690025919171, |
| "grad_norm": 0.00656849551678766, |
| "learning_rate": 1.4623655913978497e-05, |
| "loss": 0.0003, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.347508879715849, |
| "grad_norm": 0.004684960157051687, |
| "learning_rate": 1.4608294930875578e-05, |
| "loss": 0.0003, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.351348756839781, |
| "grad_norm": 0.003653453570474896, |
| "learning_rate": 1.4592933947772658e-05, |
| "loss": 0.0003, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.3551886339637131, |
| "grad_norm": 0.0036918985360549997, |
| "learning_rate": 1.457757296466974e-05, |
| "loss": 0.0003, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.3590285110876452, |
| "grad_norm": 0.005265072866815289, |
| "learning_rate": 1.456221198156682e-05, |
| "loss": 0.0003, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.3628683882115773, |
| "grad_norm": 0.002918853603638478, |
| "learning_rate": 1.4546850998463905e-05, |
| "loss": 0.0003, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.3667082653355092, |
| "grad_norm": 0.00935186960220143, |
| "learning_rate": 1.4531490015360986e-05, |
| "loss": 0.0003, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.3705481424594412, |
| "grad_norm": 0.0047564377664160475, |
| "learning_rate": 1.4516129032258066e-05, |
| "loss": 0.0003, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.3743880195833733, |
| "grad_norm": 0.0027827569792821744, |
| "learning_rate": 1.4500768049155147e-05, |
| "loss": 0.0003, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.3782278967073054, |
| "grad_norm": 0.007874618569613728, |
| "learning_rate": 1.4485407066052228e-05, |
| "loss": 0.0003, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.3820677738312375, |
| "grad_norm": 0.0021217629192740463, |
| "learning_rate": 1.447004608294931e-05, |
| "loss": 0.0003, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.3859076509551693, |
| "grad_norm": 0.0015950623575258483, |
| "learning_rate": 1.445468509984639e-05, |
| "loss": 0.0003, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.3897475280791014, |
| "grad_norm": 0.014526354269413554, |
| "learning_rate": 1.4439324116743471e-05, |
| "loss": 0.0003, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.3935874052030335, |
| "grad_norm": 0.004710032072376596, |
| "learning_rate": 1.4423963133640555e-05, |
| "loss": 0.0003, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.3974272823269656, |
| "grad_norm": 0.008945201526158983, |
| "learning_rate": 1.4408602150537636e-05, |
| "loss": 0.0003, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.4012671594508976, |
| "grad_norm": 0.004277811812491989, |
| "learning_rate": 1.4393241167434717e-05, |
| "loss": 0.0003, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.4051070365748295, |
| "grad_norm": 0.0039422467987323675, |
| "learning_rate": 1.4377880184331798e-05, |
| "loss": 0.0003, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.4089469136987616, |
| "grad_norm": 0.00676608035228549, |
| "learning_rate": 1.4362519201228879e-05, |
| "loss": 0.0003, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.4127867908226936, |
| "grad_norm": 0.010133852268400016, |
| "learning_rate": 1.434715821812596e-05, |
| "loss": 0.0003, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.4166266679466257, |
| "grad_norm": 0.009268588463915765, |
| "learning_rate": 1.4331797235023041e-05, |
| "loss": 0.0003, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.4204665450705578, |
| "grad_norm": 0.01313301243339411, |
| "learning_rate": 1.4316436251920125e-05, |
| "loss": 0.0003, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.4243064221944897, |
| "grad_norm": 0.0037448179676893684, |
| "learning_rate": 1.4301075268817206e-05, |
| "loss": 0.0003, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.4281462993184217, |
| "grad_norm": 0.007552592646915242, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 0.0003, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.4319861764423538, |
| "grad_norm": 0.0020607608386333867, |
| "learning_rate": 1.4270353302611368e-05, |
| "loss": 0.0003, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.435826053566286, |
| "grad_norm": 0.0017099532229802538, |
| "learning_rate": 1.4254992319508449e-05, |
| "loss": 0.0003, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.439665930690218, |
| "grad_norm": 0.009438446281080615, |
| "learning_rate": 1.423963133640553e-05, |
| "loss": 0.0003, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.4435058078141498, |
| "grad_norm": 0.0031712205983693118, |
| "learning_rate": 1.422427035330261e-05, |
| "loss": 0.0003, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.447345684938082, |
| "grad_norm": 0.004189847354145225, |
| "learning_rate": 1.4208909370199695e-05, |
| "loss": 0.0003, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.451185562062014, |
| "grad_norm": 0.01235381752195261, |
| "learning_rate": 1.4193548387096776e-05, |
| "loss": 0.0003, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.455025439185946, |
| "grad_norm": 0.008228828470936082, |
| "learning_rate": 1.4178187403993857e-05, |
| "loss": 0.0003, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.4588653163098781, |
| "grad_norm": 0.00803929836492854, |
| "learning_rate": 1.4162826420890938e-05, |
| "loss": 0.0003, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.46270519343381, |
| "grad_norm": 0.007786110195975117, |
| "learning_rate": 1.4147465437788019e-05, |
| "loss": 0.0003, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.466545070557742, |
| "grad_norm": 0.001228617041920703, |
| "learning_rate": 1.41321044546851e-05, |
| "loss": 0.0003, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.4703849476816742, |
| "grad_norm": 0.0038876544517334236, |
| "learning_rate": 1.4116743471582182e-05, |
| "loss": 0.0003, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.4742248248056062, |
| "grad_norm": 0.00866982390635113, |
| "learning_rate": 1.4101382488479263e-05, |
| "loss": 0.0003, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.4780647019295383, |
| "grad_norm": 0.011102877135429783, |
| "learning_rate": 1.4086021505376346e-05, |
| "loss": 0.0003, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.4819045790534702, |
| "grad_norm": 0.01047300225237876, |
| "learning_rate": 1.4070660522273427e-05, |
| "loss": 0.0003, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.4857444561774023, |
| "grad_norm": 0.006851930231571451, |
| "learning_rate": 1.4055299539170508e-05, |
| "loss": 0.0003, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.4895843333013343, |
| "grad_norm": 0.004987839668683684, |
| "learning_rate": 1.4039938556067589e-05, |
| "loss": 0.0003, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.4934242104252664, |
| "grad_norm": 0.0034966043588402418, |
| "learning_rate": 1.4024577572964671e-05, |
| "loss": 0.0003, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.4972640875491985, |
| "grad_norm": 0.002801267441148025, |
| "learning_rate": 1.4009216589861752e-05, |
| "loss": 0.0003, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.5011039646731303, |
| "grad_norm": 0.0018703310178060316, |
| "learning_rate": 1.3993855606758833e-05, |
| "loss": 0.0003, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.5049438417970626, |
| "grad_norm": 0.0015330340455295792, |
| "learning_rate": 1.3978494623655916e-05, |
| "loss": 0.0003, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.5087837189209945, |
| "grad_norm": 0.006896242096430408, |
| "learning_rate": 1.3963133640552997e-05, |
| "loss": 0.0003, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.5126235960449266, |
| "grad_norm": 0.0030363392744381756, |
| "learning_rate": 1.3947772657450078e-05, |
| "loss": 0.0003, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.5164634731688587, |
| "grad_norm": 0.0036790867879865252, |
| "learning_rate": 1.3932411674347159e-05, |
| "loss": 0.0003, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.5203033502927905, |
| "grad_norm": 0.003933748182425131, |
| "learning_rate": 1.3917050691244241e-05, |
| "loss": 0.0003, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.5241432274167228, |
| "grad_norm": 0.004219499765943358, |
| "learning_rate": 1.3901689708141322e-05, |
| "loss": 0.0003, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.5279831045406547, |
| "grad_norm": 0.007300405744499423, |
| "learning_rate": 1.3886328725038403e-05, |
| "loss": 0.0003, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.5318229816645867, |
| "grad_norm": 0.004617157024817587, |
| "learning_rate": 1.3870967741935486e-05, |
| "loss": 0.0003, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.5356628587885188, |
| "grad_norm": 0.010017965659017577, |
| "learning_rate": 1.3855606758832567e-05, |
| "loss": 0.0003, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.5395027359124507, |
| "grad_norm": 0.006735678653952309, |
| "learning_rate": 1.3840245775729648e-05, |
| "loss": 0.0003, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.543342613036383, |
| "grad_norm": 0.0050467679764191345, |
| "learning_rate": 1.382488479262673e-05, |
| "loss": 0.0003, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.5471824901603148, |
| "grad_norm": 0.0068324972480196195, |
| "learning_rate": 1.3809523809523811e-05, |
| "loss": 0.0003, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.551022367284247, |
| "grad_norm": 0.0114684792397768, |
| "learning_rate": 1.3794162826420892e-05, |
| "loss": 0.0003, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.554862244408179, |
| "grad_norm": 0.005883994452757438, |
| "learning_rate": 1.3778801843317973e-05, |
| "loss": 0.0003, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.5587021215321109, |
| "grad_norm": 0.01105835794681893, |
| "learning_rate": 1.3763440860215056e-05, |
| "loss": 0.0003, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.5625419986560432, |
| "grad_norm": 0.005406277635716297, |
| "learning_rate": 1.3748079877112136e-05, |
| "loss": 0.0003, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.566381875779975, |
| "grad_norm": 0.003846164185768484, |
| "learning_rate": 1.3732718894009217e-05, |
| "loss": 0.0003, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.570221752903907, |
| "grad_norm": 0.017545096795138392, |
| "learning_rate": 1.37173579109063e-05, |
| "loss": 0.0003, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.5740616300278392, |
| "grad_norm": 0.001157703963834084, |
| "learning_rate": 1.3701996927803381e-05, |
| "loss": 0.0003, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.577901507151771, |
| "grad_norm": 0.0047739921811557765, |
| "learning_rate": 1.3686635944700462e-05, |
| "loss": 0.0003, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.5817413842757033, |
| "grad_norm": 0.005347890941721552, |
| "learning_rate": 1.3671274961597543e-05, |
| "loss": 0.0003, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.5855812613996352, |
| "grad_norm": 0.0057929878311273305, |
| "learning_rate": 1.3655913978494624e-05, |
| "loss": 0.0003, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.5894211385235673, |
| "grad_norm": 0.0025138054493328834, |
| "learning_rate": 1.3640552995391706e-05, |
| "loss": 0.0003, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.5932610156474993, |
| "grad_norm": 0.005021480665595978, |
| "learning_rate": 1.3625192012288789e-05, |
| "loss": 0.0003, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.5971008927714312, |
| "grad_norm": 0.00485782939696147, |
| "learning_rate": 1.360983102918587e-05, |
| "loss": 0.0003, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.6009407698953635, |
| "grad_norm": 0.005107522993390195, |
| "learning_rate": 1.359447004608295e-05, |
| "loss": 0.0003, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.6047806470192953, |
| "grad_norm": 0.004581168496164048, |
| "learning_rate": 1.3579109062980032e-05, |
| "loss": 0.0003, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.6086205241432274, |
| "grad_norm": 0.0034042153583185666, |
| "learning_rate": 1.3563748079877113e-05, |
| "loss": 0.0003, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.6124604012671595, |
| "grad_norm": 0.004002545971611289, |
| "learning_rate": 1.3548387096774194e-05, |
| "loss": 0.0003, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.6163002783910914, |
| "grad_norm": 0.01005471207244953, |
| "learning_rate": 1.3533026113671278e-05, |
| "loss": 0.0003, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.6201401555150237, |
| "grad_norm": 0.007276858107586832, |
| "learning_rate": 1.3517665130568359e-05, |
| "loss": 0.0003, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.6239800326389555, |
| "grad_norm": 0.015261156983378876, |
| "learning_rate": 1.350230414746544e-05, |
| "loss": 0.0003, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.6278199097628876, |
| "grad_norm": 0.001529811428658314, |
| "learning_rate": 1.348694316436252e-05, |
| "loss": 0.0003, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.6316597868868197, |
| "grad_norm": 0.01176486699064342, |
| "learning_rate": 1.3471582181259602e-05, |
| "loss": 0.0003, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.6354996640107515, |
| "grad_norm": 0.005155818155633164, |
| "learning_rate": 1.3456221198156683e-05, |
| "loss": 0.0003, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.6393395411346838, |
| "grad_norm": 0.003646302493009192, |
| "learning_rate": 1.3440860215053763e-05, |
| "loss": 0.0003, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.6431794182586157, |
| "grad_norm": 0.013145592318521696, |
| "learning_rate": 1.3425499231950848e-05, |
| "loss": 0.0003, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.6470192953825478, |
| "grad_norm": 0.00808033295372671, |
| "learning_rate": 1.3410138248847929e-05, |
| "loss": 0.0003, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.6508591725064798, |
| "grad_norm": 0.004814145910232119, |
| "learning_rate": 1.339477726574501e-05, |
| "loss": 0.0003, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.6546990496304117, |
| "grad_norm": 0.0036057449146750355, |
| "learning_rate": 1.337941628264209e-05, |
| "loss": 0.0003, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.658538926754344, |
| "grad_norm": 0.0047424187952164075, |
| "learning_rate": 1.3364055299539171e-05, |
| "loss": 0.0003, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.6623788038782759, |
| "grad_norm": 0.006050475433915184, |
| "learning_rate": 1.3348694316436252e-05, |
| "loss": 0.0003, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.666218681002208, |
| "grad_norm": 0.0036218140340270734, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0003, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.67005855812614, |
| "grad_norm": 0.006466212744722768, |
| "learning_rate": 1.3317972350230414e-05, |
| "loss": 0.0003, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.6738984352500719, |
| "grad_norm": 0.00888961973510433, |
| "learning_rate": 1.3302611367127499e-05, |
| "loss": 0.0003, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.6777383123740042, |
| "grad_norm": 0.003812962996861351, |
| "learning_rate": 1.328725038402458e-05, |
| "loss": 0.0003, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.681578189497936, |
| "grad_norm": 0.0022713408875346015, |
| "learning_rate": 1.327188940092166e-05, |
| "loss": 0.0003, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.685418066621868, |
| "grad_norm": 0.006949027274056569, |
| "learning_rate": 1.3256528417818741e-05, |
| "loss": 0.0003, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.6892579437458002, |
| "grad_norm": 0.002962901611754243, |
| "learning_rate": 1.3241167434715822e-05, |
| "loss": 0.0003, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.693097820869732, |
| "grad_norm": 0.003682820073862977, |
| "learning_rate": 1.3225806451612903e-05, |
| "loss": 0.0003, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.6969376979936643, |
| "grad_norm": 0.006163319026317314, |
| "learning_rate": 1.3210445468509984e-05, |
| "loss": 0.0003, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.7007775751175962, |
| "grad_norm": 0.012379479151877055, |
| "learning_rate": 1.3195084485407068e-05, |
| "loss": 0.0003, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.7046174522415283, |
| "grad_norm": 0.010094598031404283, |
| "learning_rate": 1.317972350230415e-05, |
| "loss": 0.0003, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.7084573293654604, |
| "grad_norm": 0.006009523374733768, |
| "learning_rate": 1.316436251920123e-05, |
| "loss": 0.0003, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.7122972064893922, |
| "grad_norm": 0.006067418145337392, |
| "learning_rate": 1.3149001536098311e-05, |
| "loss": 0.0003, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.7161370836133245, |
| "grad_norm": 0.0027024918930963412, |
| "learning_rate": 1.3133640552995392e-05, |
| "loss": 0.0003, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.7199769607372564, |
| "grad_norm": 0.0021877878803625056, |
| "learning_rate": 1.3118279569892473e-05, |
| "loss": 0.0003, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.7238168378611884, |
| "grad_norm": 0.007268918222066073, |
| "learning_rate": 1.3102918586789554e-05, |
| "loss": 0.0003, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.7276567149851205, |
| "grad_norm": 0.006001244259273792, |
| "learning_rate": 1.3087557603686638e-05, |
| "loss": 0.0003, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.7314965921090524, |
| "grad_norm": 0.00978511362310727, |
| "learning_rate": 1.307219662058372e-05, |
| "loss": 0.0003, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.7353364692329847, |
| "grad_norm": 0.0061450285375501765, |
| "learning_rate": 1.30568356374808e-05, |
| "loss": 0.0003, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.7391763463569165, |
| "grad_norm": 0.009230442874108066, |
| "learning_rate": 1.3041474654377881e-05, |
| "loss": 0.0003, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.7430162234808486, |
| "grad_norm": 0.00791075409422416, |
| "learning_rate": 1.3026113671274962e-05, |
| "loss": 0.0003, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.7468561006047807, |
| "grad_norm": 0.011115169586698243, |
| "learning_rate": 1.3010752688172043e-05, |
| "loss": 0.0003, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.7506959777287125, |
| "grad_norm": 0.009573241931257201, |
| "learning_rate": 1.2995391705069126e-05, |
| "loss": 0.0003, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.7545358548526448, |
| "grad_norm": 0.004473006067490304, |
| "learning_rate": 1.2980030721966206e-05, |
| "loss": 0.0003, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.7583757319765767, |
| "grad_norm": 0.012766183527931843, |
| "learning_rate": 1.2964669738863289e-05, |
| "loss": 0.0003, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.7622156091005088, |
| "grad_norm": 0.0009086621286683109, |
| "learning_rate": 1.294930875576037e-05, |
| "loss": 0.0003, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.7660554862244409, |
| "grad_norm": 0.002772310397620897, |
| "learning_rate": 1.2933947772657451e-05, |
| "loss": 0.0003, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.7698953633483727, |
| "grad_norm": 0.00251248063627462, |
| "learning_rate": 1.2918586789554532e-05, |
| "loss": 0.0003, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.773735240472305, |
| "grad_norm": 0.005139480616844398, |
| "learning_rate": 1.2903225806451613e-05, |
| "loss": 0.0003, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.7775751175962369, |
| "grad_norm": 0.005373139082891636, |
| "learning_rate": 1.2887864823348695e-05, |
| "loss": 0.0003, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.781414994720169, |
| "grad_norm": 0.0061935858766664015, |
| "learning_rate": 1.2872503840245776e-05, |
| "loss": 0.0003, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.785254871844101, |
| "grad_norm": 0.0021300277356281886, |
| "learning_rate": 1.2857142857142859e-05, |
| "loss": 0.0003, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.7890947489680329, |
| "grad_norm": 0.002853522173265363, |
| "learning_rate": 1.284178187403994e-05, |
| "loss": 0.0003, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.7929346260919652, |
| "grad_norm": 0.005011976697348055, |
| "learning_rate": 1.282642089093702e-05, |
| "loss": 0.0003, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.796774503215897, |
| "grad_norm": 0.009681647398622931, |
| "learning_rate": 1.2811059907834102e-05, |
| "loss": 0.0003, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.8006143803398291, |
| "grad_norm": 0.004303781195289312, |
| "learning_rate": 1.2795698924731184e-05, |
| "loss": 0.0003, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.8044542574637612, |
| "grad_norm": 0.007494764921984889, |
| "learning_rate": 1.2780337941628265e-05, |
| "loss": 0.0003, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.808294134587693, |
| "grad_norm": 0.007339386856967861, |
| "learning_rate": 1.2764976958525346e-05, |
| "loss": 0.0003, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.8121340117116254, |
| "grad_norm": 0.00291836055735903, |
| "learning_rate": 1.2749615975422429e-05, |
| "loss": 0.0003, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.8159738888355572, |
| "grad_norm": 0.0029509058375776632, |
| "learning_rate": 1.273425499231951e-05, |
| "loss": 0.0003, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.8198137659594893, |
| "grad_norm": 0.0032136174047263694, |
| "learning_rate": 1.271889400921659e-05, |
| "loss": 0.0003, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.8236536430834214, |
| "grad_norm": 0.006459170016331068, |
| "learning_rate": 1.2703533026113673e-05, |
| "loss": 0.0003, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.8274935202073532, |
| "grad_norm": 0.004128532101702144, |
| "learning_rate": 1.2688172043010754e-05, |
| "loss": 0.0003, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.8313333973312855, |
| "grad_norm": 0.007831239303432262, |
| "learning_rate": 1.2672811059907835e-05, |
| "loss": 0.0003, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.8351732744552174, |
| "grad_norm": 0.006446091317683293, |
| "learning_rate": 1.2657450076804916e-05, |
| "loss": 0.0003, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.8390131515791495, |
| "grad_norm": 0.005149611980043962, |
| "learning_rate": 1.2642089093701997e-05, |
| "loss": 0.0003, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.8428530287030815, |
| "grad_norm": 0.00870763552581347, |
| "learning_rate": 1.262672811059908e-05, |
| "loss": 0.0003, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.8466929058270134, |
| "grad_norm": 0.00174497439177369, |
| "learning_rate": 1.261136712749616e-05, |
| "loss": 0.0003, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.8505327829509457, |
| "grad_norm": 0.004544066116015543, |
| "learning_rate": 1.2596006144393243e-05, |
| "loss": 0.0003, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.8543726600748776, |
| "grad_norm": 0.009197541337159318, |
| "learning_rate": 1.2580645161290324e-05, |
| "loss": 0.0003, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.8582125371988096, |
| "grad_norm": 0.005155440888093791, |
| "learning_rate": 1.2565284178187405e-05, |
| "loss": 0.0003, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.8620524143227417, |
| "grad_norm": 0.0033030466348720976, |
| "learning_rate": 1.2549923195084486e-05, |
| "loss": 0.0003, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.8658922914466736, |
| "grad_norm": 0.002903647020793528, |
| "learning_rate": 1.2534562211981567e-05, |
| "loss": 0.0003, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.8697321685706059, |
| "grad_norm": 0.0037452166656142243, |
| "learning_rate": 1.251920122887865e-05, |
| "loss": 0.0003, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.8735720456945377, |
| "grad_norm": 0.005423455715151995, |
| "learning_rate": 1.2503840245775732e-05, |
| "loss": 0.0003, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.8774119228184698, |
| "grad_norm": 0.004529789228413102, |
| "learning_rate": 1.2488479262672813e-05, |
| "loss": 0.0003, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.8812517999424019, |
| "grad_norm": 0.005454742890452505, |
| "learning_rate": 1.2473118279569894e-05, |
| "loss": 0.0003, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.8850916770663337, |
| "grad_norm": 0.003634537048057344, |
| "learning_rate": 1.2457757296466975e-05, |
| "loss": 0.0003, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.888931554190266, |
| "grad_norm": 0.005577515598529365, |
| "learning_rate": 1.2442396313364056e-05, |
| "loss": 0.0002, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.892771431314198, |
| "grad_norm": 0.003272095460140622, |
| "learning_rate": 1.2427035330261137e-05, |
| "loss": 0.0003, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.89661130843813, |
| "grad_norm": 0.002842547869737635, |
| "learning_rate": 1.2411674347158221e-05, |
| "loss": 0.0003, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.900451185562062, |
| "grad_norm": 0.0030756210619306856, |
| "learning_rate": 1.2396313364055302e-05, |
| "loss": 0.0003, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.904291062685994, |
| "grad_norm": 0.003876374581736294, |
| "learning_rate": 1.2380952380952383e-05, |
| "loss": 0.0003, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.9081309398099262, |
| "grad_norm": 0.0036495611616124695, |
| "learning_rate": 1.2365591397849464e-05, |
| "loss": 0.0003, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.911970816933858, |
| "grad_norm": 0.0022680105975665986, |
| "learning_rate": 1.2350230414746545e-05, |
| "loss": 0.0003, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.9158106940577901, |
| "grad_norm": 0.007439434020570674, |
| "learning_rate": 1.2334869431643626e-05, |
| "loss": 0.0003, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.9196505711817222, |
| "grad_norm": 0.004474265624120062, |
| "learning_rate": 1.2319508448540707e-05, |
| "loss": 0.0003, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.923490448305654, |
| "grad_norm": 0.009088460064078376, |
| "learning_rate": 1.2304147465437787e-05, |
| "loss": 0.0003, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.9273303254295864, |
| "grad_norm": 0.003261287645656727, |
| "learning_rate": 1.2288786482334872e-05, |
| "loss": 0.0003, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.9311702025535182, |
| "grad_norm": 0.003381354646964584, |
| "learning_rate": 1.2273425499231953e-05, |
| "loss": 0.0003, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.9350100796774503, |
| "grad_norm": 0.011785456037052021, |
| "learning_rate": 1.2258064516129034e-05, |
| "loss": 0.0003, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.9388499568013824, |
| "grad_norm": 0.003962236601696711, |
| "learning_rate": 1.2242703533026115e-05, |
| "loss": 0.0003, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.9426898339253142, |
| "grad_norm": 0.009479934166440634, |
| "learning_rate": 1.2227342549923195e-05, |
| "loss": 0.0003, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.9465297110492465, |
| "grad_norm": 0.0017963941477855995, |
| "learning_rate": 1.2211981566820276e-05, |
| "loss": 0.0003, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.9503695881731784, |
| "grad_norm": 0.003122530310710232, |
| "learning_rate": 1.2196620583717357e-05, |
| "loss": 0.0003, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.9542094652971105, |
| "grad_norm": 0.007807269576409189, |
| "learning_rate": 1.2181259600614442e-05, |
| "loss": 0.0003, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.9580493424210426, |
| "grad_norm": 0.010712992154630164, |
| "learning_rate": 1.2165898617511523e-05, |
| "loss": 0.0003, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.9618892195449744, |
| "grad_norm": 0.007059579575096798, |
| "learning_rate": 1.2150537634408604e-05, |
| "loss": 0.0003, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.9657290966689067, |
| "grad_norm": 0.006965891986586176, |
| "learning_rate": 1.2135176651305684e-05, |
| "loss": 0.0003, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.9695689737928386, |
| "grad_norm": 0.0032147842412904005, |
| "learning_rate": 1.2119815668202765e-05, |
| "loss": 0.0003, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.9734088509167707, |
| "grad_norm": 0.002816251401671389, |
| "learning_rate": 1.2104454685099846e-05, |
| "loss": 0.0003, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.9772487280407027, |
| "grad_norm": 0.00406117662486731, |
| "learning_rate": 1.2089093701996927e-05, |
| "loss": 0.0003, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.9810886051646346, |
| "grad_norm": 0.005187669887429079, |
| "learning_rate": 1.2073732718894012e-05, |
| "loss": 0.0003, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.9849284822885669, |
| "grad_norm": 0.006087920999209694, |
| "learning_rate": 1.2058371735791092e-05, |
| "loss": 0.0003, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.9887683594124987, |
| "grad_norm": 0.004575814464986424, |
| "learning_rate": 1.2043010752688173e-05, |
| "loss": 0.0003, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.9926082365364308, |
| "grad_norm": 0.014370083560726126, |
| "learning_rate": 1.2027649769585254e-05, |
| "loss": 0.0003, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.996448113660363, |
| "grad_norm": 0.005014106197288017, |
| "learning_rate": 1.2012288786482335e-05, |
| "loss": 0.0003, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.007128867284239335, |
| "learning_rate": 1.1996927803379416e-05, |
| "loss": 0.0002, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.003839877123932, |
| "grad_norm": 0.0035767749443771112, |
| "learning_rate": 1.1981566820276497e-05, |
| "loss": 0.0003, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.007679754247864, |
| "grad_norm": 0.010409746160305624, |
| "learning_rate": 1.1966205837173581e-05, |
| "loss": 0.0003, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.011519631371796, |
| "grad_norm": 0.006663355286188172, |
| "learning_rate": 1.1950844854070662e-05, |
| "loss": 0.0003, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.0153595084957283, |
| "grad_norm": 0.007056337672257391, |
| "learning_rate": 1.1935483870967743e-05, |
| "loss": 0.0003, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.01919938561966, |
| "grad_norm": 0.007042233672731786, |
| "learning_rate": 1.1920122887864824e-05, |
| "loss": 0.0002, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.023039262743592, |
| "grad_norm": 0.0034724836039492263, |
| "learning_rate": 1.1904761904761905e-05, |
| "loss": 0.0002, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.0268791398675243, |
| "grad_norm": 0.00582311493886213, |
| "learning_rate": 1.1889400921658986e-05, |
| "loss": 0.0003, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.030719016991456, |
| "grad_norm": 0.008442699615605335, |
| "learning_rate": 1.1874039938556069e-05, |
| "loss": 0.0003, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.0345588941153885, |
| "grad_norm": 0.007067754727575771, |
| "learning_rate": 1.185867895545315e-05, |
| "loss": 0.0003, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.0383987712393203, |
| "grad_norm": 0.011473488398813741, |
| "learning_rate": 1.1843317972350232e-05, |
| "loss": 0.0003, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.042238648363252, |
| "grad_norm": 0.0059529034647220706, |
| "learning_rate": 1.1827956989247313e-05, |
| "loss": 0.0003, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.0460785254871845, |
| "grad_norm": 0.005899117884748007, |
| "learning_rate": 1.1812596006144394e-05, |
| "loss": 0.0003, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.0499184026111164, |
| "grad_norm": 0.011298726784506993, |
| "learning_rate": 1.1797235023041475e-05, |
| "loss": 0.0003, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.0537582797350487, |
| "grad_norm": 0.005795088286120419, |
| "learning_rate": 1.1781874039938556e-05, |
| "loss": 0.0003, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.0575981568589805, |
| "grad_norm": 0.0036837813367058586, |
| "learning_rate": 1.1766513056835639e-05, |
| "loss": 0.0003, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.0614380339829124, |
| "grad_norm": 0.006214527860410358, |
| "learning_rate": 1.175115207373272e-05, |
| "loss": 0.0003, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.0652779111068447, |
| "grad_norm": 0.005963634039163199, |
| "learning_rate": 1.1735791090629802e-05, |
| "loss": 0.0003, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.0691177882307765, |
| "grad_norm": 0.009711371016533323, |
| "learning_rate": 1.1720430107526883e-05, |
| "loss": 0.0003, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.072957665354709, |
| "grad_norm": 0.004479690428922436, |
| "learning_rate": 1.1705069124423964e-05, |
| "loss": 0.0003, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.0767975424786407, |
| "grad_norm": 0.004339498596198677, |
| "learning_rate": 1.1689708141321045e-05, |
| "loss": 0.0003, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.0806374196025725, |
| "grad_norm": 0.005524750165344554, |
| "learning_rate": 1.1674347158218127e-05, |
| "loss": 0.0003, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.084477296726505, |
| "grad_norm": 0.0068414029393528645, |
| "learning_rate": 1.1658986175115208e-05, |
| "loss": 0.0003, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.0883171738504367, |
| "grad_norm": 0.0029993939116700393, |
| "learning_rate": 1.164362519201229e-05, |
| "loss": 0.0003, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.092157050974369, |
| "grad_norm": 0.0065445333490569925, |
| "learning_rate": 1.1628264208909372e-05, |
| "loss": 0.0003, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.095996928098301, |
| "grad_norm": 0.004157809191374774, |
| "learning_rate": 1.1612903225806453e-05, |
| "loss": 0.0003, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.0998368052222327, |
| "grad_norm": 0.01036397645054627, |
| "learning_rate": 1.1597542242703534e-05, |
| "loss": 0.0003, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.103676682346165, |
| "grad_norm": 0.004619789710041621, |
| "learning_rate": 1.1582181259600616e-05, |
| "loss": 0.0003, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.107516559470097, |
| "grad_norm": 0.0031701670908054584, |
| "learning_rate": 1.1566820276497697e-05, |
| "loss": 0.0003, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.111356436594029, |
| "grad_norm": 0.0049090365155047685, |
| "learning_rate": 1.1551459293394778e-05, |
| "loss": 0.0003, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.115196313717961, |
| "grad_norm": 0.006228538445781431, |
| "learning_rate": 1.153609831029186e-05, |
| "loss": 0.0003, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.119036190841893, |
| "grad_norm": 0.003439458661613514, |
| "learning_rate": 1.152073732718894e-05, |
| "loss": 0.0003, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.122876067965825, |
| "grad_norm": 0.0019012366528865447, |
| "learning_rate": 1.1505376344086023e-05, |
| "loss": 0.0003, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.126715945089757, |
| "grad_norm": 0.003757506993975558, |
| "learning_rate": 1.1490015360983104e-05, |
| "loss": 0.0003, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.1305558222136893, |
| "grad_norm": 0.007477619429838125, |
| "learning_rate": 1.1474654377880186e-05, |
| "loss": 0.0003, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.134395699337621, |
| "grad_norm": 0.0031273099272674763, |
| "learning_rate": 1.1459293394777267e-05, |
| "loss": 0.0003, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.138235576461553, |
| "grad_norm": 0.00735341646000325, |
| "learning_rate": 1.1443932411674348e-05, |
| "loss": 0.0003, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.1420754535854853, |
| "grad_norm": 0.00804142143071962, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.0003, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.145915330709417, |
| "grad_norm": 0.004355210176544316, |
| "learning_rate": 1.141321044546851e-05, |
| "loss": 0.0003, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.1497552078333495, |
| "grad_norm": 0.004213055601660093, |
| "learning_rate": 1.1397849462365593e-05, |
| "loss": 0.0003, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.1535950849572814, |
| "grad_norm": 0.0052307406743254785, |
| "learning_rate": 1.1382488479262675e-05, |
| "loss": 0.0003, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.157434962081213, |
| "grad_norm": 0.004583788106873781, |
| "learning_rate": 1.1367127496159756e-05, |
| "loss": 0.0003, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.1612748392051455, |
| "grad_norm": 0.0019315254969018546, |
| "learning_rate": 1.1351766513056837e-05, |
| "loss": 0.0003, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.1651147163290774, |
| "grad_norm": 0.0037899992818163615, |
| "learning_rate": 1.1336405529953918e-05, |
| "loss": 0.0003, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.1689545934530097, |
| "grad_norm": 0.006764631940574645, |
| "learning_rate": 1.1321044546850999e-05, |
| "loss": 0.0003, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.1727944705769415, |
| "grad_norm": 0.009761685741017033, |
| "learning_rate": 1.130568356374808e-05, |
| "loss": 0.0003, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.1766343477008734, |
| "grad_norm": 0.0020006686679916847, |
| "learning_rate": 1.1290322580645164e-05, |
| "loss": 0.0003, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.1804742248248057, |
| "grad_norm": 0.0037488861983966605, |
| "learning_rate": 1.1274961597542245e-05, |
| "loss": 0.0003, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.1843141019487375, |
| "grad_norm": 0.00537849863699846, |
| "learning_rate": 1.1259600614439326e-05, |
| "loss": 0.0003, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.18815397907267, |
| "grad_norm": 0.010248964745953612, |
| "learning_rate": 1.1244239631336407e-05, |
| "loss": 0.0003, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.1919938561966017, |
| "grad_norm": 0.005824548989521622, |
| "learning_rate": 1.1228878648233488e-05, |
| "loss": 0.0003, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.1958337333205336, |
| "grad_norm": 0.004325296871628092, |
| "learning_rate": 1.1213517665130569e-05, |
| "loss": 0.0003, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.199673610444466, |
| "grad_norm": 0.0035972982963026214, |
| "learning_rate": 1.119815668202765e-05, |
| "loss": 0.0003, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.2035134875683977, |
| "grad_norm": 0.004846244232237887, |
| "learning_rate": 1.118279569892473e-05, |
| "loss": 0.0003, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.20735336469233, |
| "grad_norm": 0.008745922011732979, |
| "learning_rate": 1.1167434715821815e-05, |
| "loss": 0.0003, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.211193241816262, |
| "grad_norm": 0.00778907419188414, |
| "learning_rate": 1.1152073732718896e-05, |
| "loss": 0.0003, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.2150331189401937, |
| "grad_norm": 0.0043654759196176075, |
| "learning_rate": 1.1136712749615977e-05, |
| "loss": 0.0003, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.218872996064126, |
| "grad_norm": 0.005005097526262015, |
| "learning_rate": 1.1121351766513058e-05, |
| "loss": 0.0003, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.222712873188058, |
| "grad_norm": 0.005100545621721535, |
| "learning_rate": 1.1105990783410139e-05, |
| "loss": 0.0003, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.22655275031199, |
| "grad_norm": 0.005711204873227365, |
| "learning_rate": 1.109062980030722e-05, |
| "loss": 0.0003, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.230392627435922, |
| "grad_norm": 0.005985518729144408, |
| "learning_rate": 1.10752688172043e-05, |
| "loss": 0.0003, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.234232504559854, |
| "grad_norm": 0.009325255194251696, |
| "learning_rate": 1.1059907834101385e-05, |
| "loss": 0.0003, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.238072381683786, |
| "grad_norm": 0.0009671272882854296, |
| "learning_rate": 1.1044546850998466e-05, |
| "loss": 0.0003, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.241912258807718, |
| "grad_norm": 0.007433837280776717, |
| "learning_rate": 1.1029185867895547e-05, |
| "loss": 0.0003, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.2457521359316504, |
| "grad_norm": 0.0069928744941826155, |
| "learning_rate": 1.1013824884792628e-05, |
| "loss": 0.0003, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.249592013055582, |
| "grad_norm": 0.004247699109310477, |
| "learning_rate": 1.0998463901689708e-05, |
| "loss": 0.0003, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.253431890179514, |
| "grad_norm": 0.004112472440297516, |
| "learning_rate": 1.098310291858679e-05, |
| "loss": 0.0003, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.2572717673034464, |
| "grad_norm": 0.003591482285585461, |
| "learning_rate": 1.096774193548387e-05, |
| "loss": 0.0003, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.261111644427378, |
| "grad_norm": 0.0021551424115954687, |
| "learning_rate": 1.0952380952380955e-05, |
| "loss": 0.0003, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.2649515215513105, |
| "grad_norm": 0.0037350002650392232, |
| "learning_rate": 1.0937019969278036e-05, |
| "loss": 0.0003, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.2687913986752424, |
| "grad_norm": 0.004979865757746785, |
| "learning_rate": 1.0921658986175116e-05, |
| "loss": 0.0003, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.2726312757991742, |
| "grad_norm": 0.005203006741838927, |
| "learning_rate": 1.0906298003072197e-05, |
| "loss": 0.0003, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.2764711529231065, |
| "grad_norm": 0.006728064653510132, |
| "learning_rate": 1.0890937019969278e-05, |
| "loss": 0.0003, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.2803110300470384, |
| "grad_norm": 0.009359939665881964, |
| "learning_rate": 1.087557603686636e-05, |
| "loss": 0.0003, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.2841509071709707, |
| "grad_norm": 0.0035717146182484686, |
| "learning_rate": 1.086021505376344e-05, |
| "loss": 0.0003, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.2879907842949025, |
| "grad_norm": 0.00761174029980443, |
| "learning_rate": 1.0844854070660523e-05, |
| "loss": 0.0003, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.2918306614188344, |
| "grad_norm": 0.005512789873618017, |
| "learning_rate": 1.0829493087557605e-05, |
| "loss": 0.0003, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.2956705385427667, |
| "grad_norm": 0.003572666545585101, |
| "learning_rate": 1.0814132104454686e-05, |
| "loss": 0.0003, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.2995104156666986, |
| "grad_norm": 0.0047703107744972405, |
| "learning_rate": 1.0798771121351767e-05, |
| "loss": 0.0003, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.303350292790631, |
| "grad_norm": 0.0034190136655308364, |
| "learning_rate": 1.0783410138248848e-05, |
| "loss": 0.0003, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.3071901699145627, |
| "grad_norm": 0.0036676135087216543, |
| "learning_rate": 1.0768049155145929e-05, |
| "loss": 0.0003, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.3110300470384946, |
| "grad_norm": 0.005769656112064071, |
| "learning_rate": 1.0752688172043012e-05, |
| "loss": 0.0003, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.314869924162427, |
| "grad_norm": 0.004110269843427993, |
| "learning_rate": 1.0737327188940093e-05, |
| "loss": 0.0003, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.3187098012863587, |
| "grad_norm": 0.0023331666885418414, |
| "learning_rate": 1.0721966205837175e-05, |
| "loss": 0.0003, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.322549678410291, |
| "grad_norm": 0.0027550349293288595, |
| "learning_rate": 1.0706605222734256e-05, |
| "loss": 0.0003, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.326389555534223, |
| "grad_norm": 0.00739949517331183, |
| "learning_rate": 1.0691244239631337e-05, |
| "loss": 0.0003, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.3302294326581547, |
| "grad_norm": 0.007948349024988999, |
| "learning_rate": 1.0675883256528418e-05, |
| "loss": 0.0003, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.334069309782087, |
| "grad_norm": 0.0068282243290130416, |
| "learning_rate": 1.0660522273425499e-05, |
| "loss": 0.0003, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.337909186906019, |
| "grad_norm": 0.0027681416973556576, |
| "learning_rate": 1.0645161290322582e-05, |
| "loss": 0.0003, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.341749064029951, |
| "grad_norm": 0.0048135041265458395, |
| "learning_rate": 1.0629800307219663e-05, |
| "loss": 0.0003, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.345588941153883, |
| "grad_norm": 0.003988841136950386, |
| "learning_rate": 1.0614439324116745e-05, |
| "loss": 0.0003, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.349428818277815, |
| "grad_norm": 0.001427852368200693, |
| "learning_rate": 1.0599078341013826e-05, |
| "loss": 0.0003, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.353268695401747, |
| "grad_norm": 0.0028685965319891656, |
| "learning_rate": 1.0583717357910907e-05, |
| "loss": 0.0003, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.357108572525679, |
| "grad_norm": 0.004228704639871503, |
| "learning_rate": 1.0568356374807988e-05, |
| "loss": 0.0003, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.3609484496496114, |
| "grad_norm": 0.004544577992861476, |
| "learning_rate": 1.055299539170507e-05, |
| "loss": 0.0003, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.3647883267735432, |
| "grad_norm": 0.0014514795044217516, |
| "learning_rate": 1.0537634408602151e-05, |
| "loss": 0.0003, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.368628203897475, |
| "grad_norm": 0.005123018146892371, |
| "learning_rate": 1.0522273425499232e-05, |
| "loss": 0.0003, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.3724680810214074, |
| "grad_norm": 0.007113409507414172, |
| "learning_rate": 1.0506912442396313e-05, |
| "loss": 0.0003, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.3763079581453392, |
| "grad_norm": 0.0051352903847912985, |
| "learning_rate": 1.0491551459293396e-05, |
| "loss": 0.0003, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.3801478352692715, |
| "grad_norm": 0.002104107790233039, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 0.0003, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.3839877123932034, |
| "grad_norm": 0.0025435299650544215, |
| "learning_rate": 1.046082949308756e-05, |
| "loss": 0.0003, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.3878275895171353, |
| "grad_norm": 0.0012255752995362287, |
| "learning_rate": 1.044546850998464e-05, |
| "loss": 0.0003, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.3916674666410676, |
| "grad_norm": 0.011675318963073902, |
| "learning_rate": 1.0430107526881721e-05, |
| "loss": 0.0003, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.3955073437649994, |
| "grad_norm": 0.002461069919600081, |
| "learning_rate": 1.0414746543778802e-05, |
| "loss": 0.0003, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.3993472208889317, |
| "grad_norm": 0.0034531583429282644, |
| "learning_rate": 1.0399385560675883e-05, |
| "loss": 0.0003, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.4031870980128636, |
| "grad_norm": 0.004785216112107441, |
| "learning_rate": 1.0384024577572966e-05, |
| "loss": 0.0003, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.4070269751367954, |
| "grad_norm": 0.005178536980072039, |
| "learning_rate": 1.0368663594470047e-05, |
| "loss": 0.0003, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.4108668522607277, |
| "grad_norm": 0.007894702946313798, |
| "learning_rate": 1.035330261136713e-05, |
| "loss": 0.0003, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.4147067293846596, |
| "grad_norm": 0.006680693675035738, |
| "learning_rate": 1.033794162826421e-05, |
| "loss": 0.0003, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.418546606508592, |
| "grad_norm": 0.0012495646198747838, |
| "learning_rate": 1.0322580645161291e-05, |
| "loss": 0.0003, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.4223864836325237, |
| "grad_norm": 0.0019525091355373828, |
| "learning_rate": 1.0307219662058372e-05, |
| "loss": 0.0003, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.4262263607564556, |
| "grad_norm": 0.010231795338312226, |
| "learning_rate": 1.0291858678955453e-05, |
| "loss": 0.0003, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.430066237880388, |
| "grad_norm": 0.0030330611729114856, |
| "learning_rate": 1.0276497695852536e-05, |
| "loss": 0.0002, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.4339061150043197, |
| "grad_norm": 0.004059549924408756, |
| "learning_rate": 1.0261136712749618e-05, |
| "loss": 0.0003, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.437745992128252, |
| "grad_norm": 0.0019612251556256, |
| "learning_rate": 1.02457757296467e-05, |
| "loss": 0.0003, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.441585869252184, |
| "grad_norm": 0.015437204765968675, |
| "learning_rate": 1.023041474654378e-05, |
| "loss": 0.0003, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.4454257463761158, |
| "grad_norm": 0.012071394006097472, |
| "learning_rate": 1.0215053763440861e-05, |
| "loss": 0.0003, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.449265623500048, |
| "grad_norm": 0.0036098717816734674, |
| "learning_rate": 1.0199692780337942e-05, |
| "loss": 0.0003, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.45310550062398, |
| "grad_norm": 0.0019974234019019756, |
| "learning_rate": 1.0184331797235023e-05, |
| "loss": 0.0003, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.456945377747912, |
| "grad_norm": 0.0020862007756023856, |
| "learning_rate": 1.0168970814132104e-05, |
| "loss": 0.0003, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.460785254871844, |
| "grad_norm": 0.003547053858291849, |
| "learning_rate": 1.0153609831029188e-05, |
| "loss": 0.0003, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.464625131995776, |
| "grad_norm": 0.007416597783167198, |
| "learning_rate": 1.0138248847926269e-05, |
| "loss": 0.0003, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.4684650091197082, |
| "grad_norm": 0.013003680388147372, |
| "learning_rate": 1.012288786482335e-05, |
| "loss": 0.0003, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.47230488624364, |
| "grad_norm": 0.008074660660103197, |
| "learning_rate": 1.0107526881720431e-05, |
| "loss": 0.0003, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.4761447633675724, |
| "grad_norm": 0.009777230368946704, |
| "learning_rate": 1.0092165898617512e-05, |
| "loss": 0.0003, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.4799846404915042, |
| "grad_norm": 0.004447494802395034, |
| "learning_rate": 1.0076804915514593e-05, |
| "loss": 0.0003, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.483824517615436, |
| "grad_norm": 0.007205042771433959, |
| "learning_rate": 1.0061443932411674e-05, |
| "loss": 0.0003, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.4876643947393684, |
| "grad_norm": 0.004129432165666555, |
| "learning_rate": 1.0046082949308758e-05, |
| "loss": 0.0003, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.4915042718633003, |
| "grad_norm": 0.003293907536270406, |
| "learning_rate": 1.0030721966205839e-05, |
| "loss": 0.0003, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.4953441489872326, |
| "grad_norm": 0.003480440018564576, |
| "learning_rate": 1.001536098310292e-05, |
| "loss": 0.0003, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.4991840261111644, |
| "grad_norm": 0.00941404682853377, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.5030239032350963, |
| "grad_norm": 0.0027230276406464524, |
| "learning_rate": 9.984639016897082e-06, |
| "loss": 0.0003, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.5068637803590286, |
| "grad_norm": 0.005538952293239004, |
| "learning_rate": 9.969278033794164e-06, |
| "loss": 0.0003, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.5107036574829604, |
| "grad_norm": 0.0050333328452282, |
| "learning_rate": 9.953917050691245e-06, |
| "loss": 0.0003, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.5145435346068927, |
| "grad_norm": 0.00522155205199295, |
| "learning_rate": 9.938556067588326e-06, |
| "loss": 0.0002, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.5183834117308246, |
| "grad_norm": 0.007188536096954456, |
| "learning_rate": 9.923195084485407e-06, |
| "loss": 0.0003, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.5222232888547564, |
| "grad_norm": 0.003184906537752492, |
| "learning_rate": 9.90783410138249e-06, |
| "loss": 0.0002, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.5260631659786887, |
| "grad_norm": 0.007133938771763716, |
| "learning_rate": 9.89247311827957e-06, |
| "loss": 0.0002, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.5299030431026206, |
| "grad_norm": 0.002331216788213924, |
| "learning_rate": 9.877112135176652e-06, |
| "loss": 0.0003, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.533742920226553, |
| "grad_norm": 0.00479250433136117, |
| "learning_rate": 9.861751152073733e-06, |
| "loss": 0.0003, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.5375827973504848, |
| "grad_norm": 0.005137631489738605, |
| "learning_rate": 9.846390168970815e-06, |
| "loss": 0.0002, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.5414226744744166, |
| "grad_norm": 0.0018644753603249751, |
| "learning_rate": 9.831029185867896e-06, |
| "loss": 0.0002, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.545262551598349, |
| "grad_norm": 0.00606148048013519, |
| "learning_rate": 9.815668202764977e-06, |
| "loss": 0.0003, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.5491024287222808, |
| "grad_norm": 0.0046942947495266455, |
| "learning_rate": 9.80030721966206e-06, |
| "loss": 0.0003, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.552942305846213, |
| "grad_norm": 0.0033581298827081688, |
| "learning_rate": 9.78494623655914e-06, |
| "loss": 0.0003, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.556782182970145, |
| "grad_norm": 0.0050898256061523055, |
| "learning_rate": 9.769585253456221e-06, |
| "loss": 0.0003, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.560622060094077, |
| "grad_norm": 0.007276030994283694, |
| "learning_rate": 9.754224270353302e-06, |
| "loss": 0.0003, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.564461937218009, |
| "grad_norm": 0.008102589186392525, |
| "learning_rate": 9.738863287250385e-06, |
| "loss": 0.0003, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.568301814341941, |
| "grad_norm": 0.007025327596243143, |
| "learning_rate": 9.723502304147466e-06, |
| "loss": 0.0003, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.5721416914658732, |
| "grad_norm": 0.005845460452608034, |
| "learning_rate": 9.708141321044547e-06, |
| "loss": 0.0003, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.575981568589805, |
| "grad_norm": 0.012489139495375251, |
| "learning_rate": 9.692780337941628e-06, |
| "loss": 0.0003, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.579821445713737, |
| "grad_norm": 0.012227770449223498, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 0.0003, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.5836613228376692, |
| "grad_norm": 0.005069955164515195, |
| "learning_rate": 9.662058371735791e-06, |
| "loss": 0.0003, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.587501199961601, |
| "grad_norm": 0.0061167037017676555, |
| "learning_rate": 9.646697388632872e-06, |
| "loss": 0.0002, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.5913410770855334, |
| "grad_norm": 0.08374847767752854, |
| "learning_rate": 9.631336405529955e-06, |
| "loss": 0.0004, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.5951809542094653, |
| "grad_norm": 0.8655212802945428, |
| "learning_rate": 9.615975422427036e-06, |
| "loss": 0.014, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.599020831333397, |
| "grad_norm": 0.5910483392483017, |
| "learning_rate": 9.600614439324117e-06, |
| "loss": 0.0096, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.6028607084573294, |
| "grad_norm": 0.013833647799367984, |
| "learning_rate": 9.5852534562212e-06, |
| "loss": 0.0017, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.6067005855812613, |
| "grad_norm": 0.006258343437812146, |
| "learning_rate": 9.56989247311828e-06, |
| "loss": 0.0003, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.6105404627051936, |
| "grad_norm": 0.007017002859118159, |
| "learning_rate": 9.554531490015361e-06, |
| "loss": 0.0003, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.6143803398291254, |
| "grad_norm": 0.003369751095391674, |
| "learning_rate": 9.539170506912442e-06, |
| "loss": 0.0003, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.6182202169530573, |
| "grad_norm": 0.003526571263237409, |
| "learning_rate": 9.523809523809525e-06, |
| "loss": 0.0003, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.6220600940769896, |
| "grad_norm": 0.014811925020528588, |
| "learning_rate": 9.508448540706606e-06, |
| "loss": 0.0003, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.6258999712009214, |
| "grad_norm": 0.0031378309452129815, |
| "learning_rate": 9.493087557603687e-06, |
| "loss": 0.0003, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.6297398483248537, |
| "grad_norm": 0.003306012394640935, |
| "learning_rate": 9.47772657450077e-06, |
| "loss": 0.0003, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.6335797254487856, |
| "grad_norm": 0.00579074066003059, |
| "learning_rate": 9.46236559139785e-06, |
| "loss": 0.0003, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.6374196025727175, |
| "grad_norm": 0.006190406938703898, |
| "learning_rate": 9.447004608294931e-06, |
| "loss": 0.0003, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.6412594796966498, |
| "grad_norm": 0.0019373298398581817, |
| "learning_rate": 9.431643625192014e-06, |
| "loss": 0.0003, |
| "step": 6880 |
| }, |
| { |
| "epoch": 2.6450993568205816, |
| "grad_norm": 0.003767593626643142, |
| "learning_rate": 9.416282642089095e-06, |
| "loss": 0.0003, |
| "step": 6890 |
| }, |
| { |
| "epoch": 2.648939233944514, |
| "grad_norm": 0.0021313401606799036, |
| "learning_rate": 9.400921658986176e-06, |
| "loss": 0.0003, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.6527791110684458, |
| "grad_norm": 0.0073770172122847415, |
| "learning_rate": 9.385560675883258e-06, |
| "loss": 0.0003, |
| "step": 6910 |
| }, |
| { |
| "epoch": 2.6566189881923776, |
| "grad_norm": 0.0067510318233110125, |
| "learning_rate": 9.370199692780339e-06, |
| "loss": 0.0003, |
| "step": 6920 |
| }, |
| { |
| "epoch": 2.66045886531631, |
| "grad_norm": 0.005588267928353358, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 0.0003, |
| "step": 6930 |
| }, |
| { |
| "epoch": 2.664298742440242, |
| "grad_norm": 0.006170795151990667, |
| "learning_rate": 9.339477726574503e-06, |
| "loss": 0.0003, |
| "step": 6940 |
| }, |
| { |
| "epoch": 2.668138619564174, |
| "grad_norm": 0.0012556712204226065, |
| "learning_rate": 9.324116743471584e-06, |
| "loss": 0.0003, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.671978496688106, |
| "grad_norm": 0.006374796263066127, |
| "learning_rate": 9.308755760368664e-06, |
| "loss": 0.0003, |
| "step": 6960 |
| }, |
| { |
| "epoch": 2.675818373812038, |
| "grad_norm": 0.003408128999224669, |
| "learning_rate": 9.293394777265745e-06, |
| "loss": 0.0003, |
| "step": 6970 |
| }, |
| { |
| "epoch": 2.67965825093597, |
| "grad_norm": 0.004088069338470728, |
| "learning_rate": 9.278033794162828e-06, |
| "loss": 0.0003, |
| "step": 6980 |
| }, |
| { |
| "epoch": 2.683498128059902, |
| "grad_norm": 0.0039123405460172464, |
| "learning_rate": 9.262672811059909e-06, |
| "loss": 0.0003, |
| "step": 6990 |
| }, |
| { |
| "epoch": 2.6873380051838343, |
| "grad_norm": 0.0039015005051876795, |
| "learning_rate": 9.24731182795699e-06, |
| "loss": 0.0003, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.691177882307766, |
| "grad_norm": 0.00822944757312854, |
| "learning_rate": 9.231950844854072e-06, |
| "loss": 0.0003, |
| "step": 7010 |
| }, |
| { |
| "epoch": 2.695017759431698, |
| "grad_norm": 0.009264902761011475, |
| "learning_rate": 9.216589861751153e-06, |
| "loss": 0.0003, |
| "step": 7020 |
| }, |
| { |
| "epoch": 2.6988576365556303, |
| "grad_norm": 0.0027568421905536863, |
| "learning_rate": 9.201228878648234e-06, |
| "loss": 0.0003, |
| "step": 7030 |
| }, |
| { |
| "epoch": 2.702697513679562, |
| "grad_norm": 0.007150407401258832, |
| "learning_rate": 9.185867895545315e-06, |
| "loss": 0.0003, |
| "step": 7040 |
| }, |
| { |
| "epoch": 2.7065373908034944, |
| "grad_norm": 0.0037931552321473815, |
| "learning_rate": 9.170506912442398e-06, |
| "loss": 0.0003, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.7103772679274263, |
| "grad_norm": 0.0035618752848147433, |
| "learning_rate": 9.155145929339479e-06, |
| "loss": 0.0003, |
| "step": 7060 |
| }, |
| { |
| "epoch": 2.714217145051358, |
| "grad_norm": 0.005406421463186957, |
| "learning_rate": 9.13978494623656e-06, |
| "loss": 0.0003, |
| "step": 7070 |
| }, |
| { |
| "epoch": 2.7180570221752904, |
| "grad_norm": 0.006311758799823545, |
| "learning_rate": 9.124423963133642e-06, |
| "loss": 0.0002, |
| "step": 7080 |
| }, |
| { |
| "epoch": 2.7218968992992223, |
| "grad_norm": 0.00455306940271519, |
| "learning_rate": 9.109062980030723e-06, |
| "loss": 0.0003, |
| "step": 7090 |
| }, |
| { |
| "epoch": 2.7257367764231546, |
| "grad_norm": 0.009213759005302373, |
| "learning_rate": 9.093701996927804e-06, |
| "loss": 0.0003, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.7295766535470865, |
| "grad_norm": 0.001955875427936466, |
| "learning_rate": 9.078341013824885e-06, |
| "loss": 0.0003, |
| "step": 7110 |
| }, |
| { |
| "epoch": 2.7334165306710183, |
| "grad_norm": 0.002645510266753951, |
| "learning_rate": 9.062980030721968e-06, |
| "loss": 0.0003, |
| "step": 7120 |
| }, |
| { |
| "epoch": 2.7372564077949506, |
| "grad_norm": 0.0015492690289872618, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": 0.0003, |
| "step": 7130 |
| }, |
| { |
| "epoch": 2.7410962849188825, |
| "grad_norm": 0.004407452957932437, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 0.0003, |
| "step": 7140 |
| }, |
| { |
| "epoch": 2.7449361620428148, |
| "grad_norm": 0.003584874788768233, |
| "learning_rate": 9.01689708141321e-06, |
| "loss": 0.0003, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.7487760391667466, |
| "grad_norm": 0.003426299476431039, |
| "learning_rate": 9.001536098310293e-06, |
| "loss": 0.0002, |
| "step": 7160 |
| }, |
| { |
| "epoch": 2.7526159162906785, |
| "grad_norm": 0.003706527902281191, |
| "learning_rate": 8.986175115207374e-06, |
| "loss": 0.0003, |
| "step": 7170 |
| }, |
| { |
| "epoch": 2.7564557934146108, |
| "grad_norm": 0.005686639850889155, |
| "learning_rate": 8.970814132104455e-06, |
| "loss": 0.0003, |
| "step": 7180 |
| }, |
| { |
| "epoch": 2.7602956705385426, |
| "grad_norm": 0.0052320038014579216, |
| "learning_rate": 8.955453149001538e-06, |
| "loss": 0.0002, |
| "step": 7190 |
| }, |
| { |
| "epoch": 2.764135547662475, |
| "grad_norm": 0.0035293581965504047, |
| "learning_rate": 8.940092165898619e-06, |
| "loss": 0.0003, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.767975424786407, |
| "grad_norm": 0.0030584138772079433, |
| "learning_rate": 8.9247311827957e-06, |
| "loss": 0.0003, |
| "step": 7210 |
| }, |
| { |
| "epoch": 2.7718153019103386, |
| "grad_norm": 0.001802338947140362, |
| "learning_rate": 8.90937019969278e-06, |
| "loss": 0.0003, |
| "step": 7220 |
| }, |
| { |
| "epoch": 2.775655179034271, |
| "grad_norm": 0.003941656074171531, |
| "learning_rate": 8.894009216589863e-06, |
| "loss": 0.0003, |
| "step": 7230 |
| }, |
| { |
| "epoch": 2.779495056158203, |
| "grad_norm": 0.007971016203195725, |
| "learning_rate": 8.878648233486944e-06, |
| "loss": 0.0003, |
| "step": 7240 |
| }, |
| { |
| "epoch": 2.783334933282135, |
| "grad_norm": 0.0030065159813745896, |
| "learning_rate": 8.863287250384025e-06, |
| "loss": 0.0003, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.787174810406067, |
| "grad_norm": 0.012438848822631438, |
| "learning_rate": 8.847926267281107e-06, |
| "loss": 0.0003, |
| "step": 7260 |
| }, |
| { |
| "epoch": 2.791014687529999, |
| "grad_norm": 0.004555632702125586, |
| "learning_rate": 8.832565284178188e-06, |
| "loss": 0.0003, |
| "step": 7270 |
| }, |
| { |
| "epoch": 2.794854564653931, |
| "grad_norm": 0.009624526970252073, |
| "learning_rate": 8.81720430107527e-06, |
| "loss": 0.0003, |
| "step": 7280 |
| }, |
| { |
| "epoch": 2.798694441777863, |
| "grad_norm": 0.006646400675058001, |
| "learning_rate": 8.80184331797235e-06, |
| "loss": 0.0003, |
| "step": 7290 |
| }, |
| { |
| "epoch": 2.8025343189017953, |
| "grad_norm": 0.00547555841349304, |
| "learning_rate": 8.786482334869433e-06, |
| "loss": 0.0003, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.806374196025727, |
| "grad_norm": 0.006365409097436495, |
| "learning_rate": 8.771121351766514e-06, |
| "loss": 0.0003, |
| "step": 7310 |
| }, |
| { |
| "epoch": 2.810214073149659, |
| "grad_norm": 0.004289823731985147, |
| "learning_rate": 8.755760368663595e-06, |
| "loss": 0.0003, |
| "step": 7320 |
| }, |
| { |
| "epoch": 2.8140539502735913, |
| "grad_norm": 0.0009114031462774198, |
| "learning_rate": 8.740399385560676e-06, |
| "loss": 0.0003, |
| "step": 7330 |
| }, |
| { |
| "epoch": 2.817893827397523, |
| "grad_norm": 0.0040925566094191235, |
| "learning_rate": 8.725038402457758e-06, |
| "loss": 0.0003, |
| "step": 7340 |
| }, |
| { |
| "epoch": 2.8217337045214554, |
| "grad_norm": 0.004769056547562217, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 0.0003, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.8255735816453873, |
| "grad_norm": 0.00403037096763482, |
| "learning_rate": 8.69431643625192e-06, |
| "loss": 0.0003, |
| "step": 7360 |
| }, |
| { |
| "epoch": 2.829413458769319, |
| "grad_norm": 0.0018637193862899913, |
| "learning_rate": 8.678955453149003e-06, |
| "loss": 0.0003, |
| "step": 7370 |
| }, |
| { |
| "epoch": 2.8332533358932515, |
| "grad_norm": 0.003596057132391645, |
| "learning_rate": 8.663594470046084e-06, |
| "loss": 0.0002, |
| "step": 7380 |
| }, |
| { |
| "epoch": 2.8370932130171833, |
| "grad_norm": 0.003687354810004547, |
| "learning_rate": 8.648233486943165e-06, |
| "loss": 0.0003, |
| "step": 7390 |
| }, |
| { |
| "epoch": 2.8409330901411156, |
| "grad_norm": 0.005906169853780166, |
| "learning_rate": 8.632872503840246e-06, |
| "loss": 0.0003, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.8447729672650475, |
| "grad_norm": 0.008785446912324073, |
| "learning_rate": 8.617511520737328e-06, |
| "loss": 0.0003, |
| "step": 7410 |
| }, |
| { |
| "epoch": 2.8486128443889793, |
| "grad_norm": 0.006765639595362392, |
| "learning_rate": 8.602150537634409e-06, |
| "loss": 0.0003, |
| "step": 7420 |
| }, |
| { |
| "epoch": 2.8524527215129116, |
| "grad_norm": 0.0034460169241499507, |
| "learning_rate": 8.58678955453149e-06, |
| "loss": 0.0003, |
| "step": 7430 |
| }, |
| { |
| "epoch": 2.8562925986368435, |
| "grad_norm": 0.0029012348201393513, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.0003, |
| "step": 7440 |
| }, |
| { |
| "epoch": 2.860132475760776, |
| "grad_norm": 0.009176699580615625, |
| "learning_rate": 8.556067588325654e-06, |
| "loss": 0.0003, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.8639723528847076, |
| "grad_norm": 0.007162151782757863, |
| "learning_rate": 8.540706605222734e-06, |
| "loss": 0.0002, |
| "step": 7460 |
| }, |
| { |
| "epoch": 2.8678122300086395, |
| "grad_norm": 0.011405641448979225, |
| "learning_rate": 8.525345622119815e-06, |
| "loss": 0.0002, |
| "step": 7470 |
| }, |
| { |
| "epoch": 2.871652107132572, |
| "grad_norm": 0.002892641787357458, |
| "learning_rate": 8.509984639016898e-06, |
| "loss": 0.0003, |
| "step": 7480 |
| }, |
| { |
| "epoch": 2.8754919842565037, |
| "grad_norm": 0.0045100170495708195, |
| "learning_rate": 8.494623655913979e-06, |
| "loss": 0.0003, |
| "step": 7490 |
| }, |
| { |
| "epoch": 2.879331861380436, |
| "grad_norm": 0.0042421348190372936, |
| "learning_rate": 8.47926267281106e-06, |
| "loss": 0.0002, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.883171738504368, |
| "grad_norm": 0.007795804545941071, |
| "learning_rate": 8.463901689708142e-06, |
| "loss": 0.0002, |
| "step": 7510 |
| }, |
| { |
| "epoch": 2.8870116156282997, |
| "grad_norm": 0.008298948469422989, |
| "learning_rate": 8.448540706605223e-06, |
| "loss": 0.0003, |
| "step": 7520 |
| }, |
| { |
| "epoch": 2.890851492752232, |
| "grad_norm": 0.0031254716702214095, |
| "learning_rate": 8.433179723502304e-06, |
| "loss": 0.0003, |
| "step": 7530 |
| }, |
| { |
| "epoch": 2.894691369876164, |
| "grad_norm": 0.005192654900334731, |
| "learning_rate": 8.417818740399385e-06, |
| "loss": 0.0003, |
| "step": 7540 |
| }, |
| { |
| "epoch": 2.898531247000096, |
| "grad_norm": 0.01235662584667344, |
| "learning_rate": 8.402457757296468e-06, |
| "loss": 0.0003, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.902371124124028, |
| "grad_norm": 0.00496363081609215, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 0.0003, |
| "step": 7560 |
| }, |
| { |
| "epoch": 2.90621100124796, |
| "grad_norm": 0.0036361713152800973, |
| "learning_rate": 8.37173579109063e-06, |
| "loss": 0.0003, |
| "step": 7570 |
| }, |
| { |
| "epoch": 2.910050878371892, |
| "grad_norm": 0.001736163807789765, |
| "learning_rate": 8.356374807987712e-06, |
| "loss": 0.0003, |
| "step": 7580 |
| }, |
| { |
| "epoch": 2.913890755495824, |
| "grad_norm": 0.0037050159683279602, |
| "learning_rate": 8.341013824884793e-06, |
| "loss": 0.0003, |
| "step": 7590 |
| }, |
| { |
| "epoch": 2.9177306326197563, |
| "grad_norm": 0.0039102919819924975, |
| "learning_rate": 8.325652841781874e-06, |
| "loss": 0.0002, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.921570509743688, |
| "grad_norm": 0.004742691161045984, |
| "learning_rate": 8.310291858678957e-06, |
| "loss": 0.0003, |
| "step": 7610 |
| }, |
| { |
| "epoch": 2.92541038686762, |
| "grad_norm": 0.005839965213671288, |
| "learning_rate": 8.294930875576038e-06, |
| "loss": 0.0003, |
| "step": 7620 |
| }, |
| { |
| "epoch": 2.9292502639915523, |
| "grad_norm": 0.005078116114432955, |
| "learning_rate": 8.279569892473119e-06, |
| "loss": 0.0003, |
| "step": 7630 |
| }, |
| { |
| "epoch": 2.933090141115484, |
| "grad_norm": 0.0047744269458432275, |
| "learning_rate": 8.264208909370201e-06, |
| "loss": 0.0003, |
| "step": 7640 |
| }, |
| { |
| "epoch": 2.9369300182394165, |
| "grad_norm": 0.003503766564796104, |
| "learning_rate": 8.248847926267282e-06, |
| "loss": 0.0003, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.9407698953633483, |
| "grad_norm": 0.002919305506009367, |
| "learning_rate": 8.233486943164363e-06, |
| "loss": 0.0003, |
| "step": 7660 |
| }, |
| { |
| "epoch": 2.94460977248728, |
| "grad_norm": 0.005234438137723877, |
| "learning_rate": 8.218125960061446e-06, |
| "loss": 0.0002, |
| "step": 7670 |
| }, |
| { |
| "epoch": 2.9484496496112125, |
| "grad_norm": 0.0058797401671725905, |
| "learning_rate": 8.202764976958527e-06, |
| "loss": 0.0003, |
| "step": 7680 |
| }, |
| { |
| "epoch": 2.9522895267351443, |
| "grad_norm": 0.003378937083777895, |
| "learning_rate": 8.187403993855608e-06, |
| "loss": 0.0002, |
| "step": 7690 |
| }, |
| { |
| "epoch": 2.9561294038590766, |
| "grad_norm": 0.0027598025310823916, |
| "learning_rate": 8.172043010752689e-06, |
| "loss": 0.0003, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.9599692809830085, |
| "grad_norm": 0.006302224478544003, |
| "learning_rate": 8.156682027649771e-06, |
| "loss": 0.0003, |
| "step": 7710 |
| }, |
| { |
| "epoch": 2.9638091581069403, |
| "grad_norm": 0.005721010444274242, |
| "learning_rate": 8.141321044546852e-06, |
| "loss": 0.0003, |
| "step": 7720 |
| }, |
| { |
| "epoch": 2.9676490352308726, |
| "grad_norm": 0.00895258523648075, |
| "learning_rate": 8.125960061443933e-06, |
| "loss": 0.0003, |
| "step": 7730 |
| }, |
| { |
| "epoch": 2.9714889123548045, |
| "grad_norm": 0.006250897732901797, |
| "learning_rate": 8.110599078341016e-06, |
| "loss": 0.0003, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.975328789478737, |
| "grad_norm": 0.004710767186934721, |
| "learning_rate": 8.095238095238097e-06, |
| "loss": 0.0003, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.9791686666026687, |
| "grad_norm": 0.004749110615282267, |
| "learning_rate": 8.079877112135177e-06, |
| "loss": 0.0003, |
| "step": 7760 |
| }, |
| { |
| "epoch": 2.9830085437266005, |
| "grad_norm": 0.0024146312389459854, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 0.0003, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.986848420850533, |
| "grad_norm": 0.0017491812102154464, |
| "learning_rate": 8.049155145929341e-06, |
| "loss": 0.0002, |
| "step": 7780 |
| }, |
| { |
| "epoch": 2.9906882979744647, |
| "grad_norm": 0.008039358375107466, |
| "learning_rate": 8.033794162826422e-06, |
| "loss": 0.0003, |
| "step": 7790 |
| }, |
| { |
| "epoch": 2.994528175098397, |
| "grad_norm": 0.003445401481616659, |
| "learning_rate": 8.018433179723503e-06, |
| "loss": 0.0003, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.998368052222329, |
| "grad_norm": 0.004124288268774834, |
| "learning_rate": 8.003072196620585e-06, |
| "loss": 0.0003, |
| "step": 7810 |
| }, |
| { |
| "epoch": 3.001919938561966, |
| "grad_norm": 0.002674977868985105, |
| "learning_rate": 7.987711213517666e-06, |
| "loss": 0.0002, |
| "step": 7820 |
| }, |
| { |
| "epoch": 3.0057598156858982, |
| "grad_norm": 0.0050648363913657094, |
| "learning_rate": 7.972350230414747e-06, |
| "loss": 0.0003, |
| "step": 7830 |
| }, |
| { |
| "epoch": 3.00959969280983, |
| "grad_norm": 0.0007924830296134951, |
| "learning_rate": 7.956989247311828e-06, |
| "loss": 0.0003, |
| "step": 7840 |
| }, |
| { |
| "epoch": 3.013439569933762, |
| "grad_norm": 0.001913560970676822, |
| "learning_rate": 7.941628264208911e-06, |
| "loss": 0.0003, |
| "step": 7850 |
| }, |
| { |
| "epoch": 3.0172794470576942, |
| "grad_norm": 0.0031764216786887415, |
| "learning_rate": 7.926267281105992e-06, |
| "loss": 0.0002, |
| "step": 7860 |
| }, |
| { |
| "epoch": 3.021119324181626, |
| "grad_norm": 0.00509301739025399, |
| "learning_rate": 7.910906298003073e-06, |
| "loss": 0.0003, |
| "step": 7870 |
| }, |
| { |
| "epoch": 3.0249592013055584, |
| "grad_norm": 0.004796585886393999, |
| "learning_rate": 7.895545314900154e-06, |
| "loss": 0.0003, |
| "step": 7880 |
| }, |
| { |
| "epoch": 3.0287990784294903, |
| "grad_norm": 0.006878984874609061, |
| "learning_rate": 7.880184331797236e-06, |
| "loss": 0.0003, |
| "step": 7890 |
| }, |
| { |
| "epoch": 3.032638955553422, |
| "grad_norm": 0.0028825313800368113, |
| "learning_rate": 7.864823348694317e-06, |
| "loss": 0.0003, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.0364788326773544, |
| "grad_norm": 0.008888757116371175, |
| "learning_rate": 7.849462365591398e-06, |
| "loss": 0.0002, |
| "step": 7910 |
| }, |
| { |
| "epoch": 3.0403187098012863, |
| "grad_norm": 0.011189685195352432, |
| "learning_rate": 7.83410138248848e-06, |
| "loss": 0.0003, |
| "step": 7920 |
| }, |
| { |
| "epoch": 3.0441585869252186, |
| "grad_norm": 0.004445925224066104, |
| "learning_rate": 7.818740399385562e-06, |
| "loss": 0.0003, |
| "step": 7930 |
| }, |
| { |
| "epoch": 3.0479984640491504, |
| "grad_norm": 0.005046704426390252, |
| "learning_rate": 7.803379416282643e-06, |
| "loss": 0.0002, |
| "step": 7940 |
| }, |
| { |
| "epoch": 3.0518383411730823, |
| "grad_norm": 0.00325039934600695, |
| "learning_rate": 7.788018433179724e-06, |
| "loss": 0.0002, |
| "step": 7950 |
| }, |
| { |
| "epoch": 3.0556782182970146, |
| "grad_norm": 0.009122195080615519, |
| "learning_rate": 7.772657450076806e-06, |
| "loss": 0.0002, |
| "step": 7960 |
| }, |
| { |
| "epoch": 3.0595180954209464, |
| "grad_norm": 0.0035357373556863343, |
| "learning_rate": 7.757296466973887e-06, |
| "loss": 0.0002, |
| "step": 7970 |
| }, |
| { |
| "epoch": 3.0633579725448787, |
| "grad_norm": 0.0030570656872309174, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 0.0003, |
| "step": 7980 |
| }, |
| { |
| "epoch": 3.0671978496688106, |
| "grad_norm": 0.0021996646452019718, |
| "learning_rate": 7.726574500768049e-06, |
| "loss": 0.0003, |
| "step": 7990 |
| }, |
| { |
| "epoch": 3.0710377267927425, |
| "grad_norm": 0.0018882278920401504, |
| "learning_rate": 7.711213517665132e-06, |
| "loss": 0.0003, |
| "step": 8000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 13020, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7534171666710528e+16, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|