| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 19532, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005119803399549457, | |
| "grad_norm": 1.4939812421798706, | |
| "learning_rate": 7.679705099324186e-05, | |
| "loss": 8.8823, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0010239606799098914, | |
| "grad_norm": 1.7099491357803345, | |
| "learning_rate": 0.00015359410198648372, | |
| "loss": 8.4098, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0015359410198648373, | |
| "grad_norm": 1.341354489326477, | |
| "learning_rate": 0.00023039115297972558, | |
| "loss": 7.809, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0020479213598197828, | |
| "grad_norm": 1.2555238008499146, | |
| "learning_rate": 0.00030718820397296744, | |
| "loss": 7.2814, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0025599016997747285, | |
| "grad_norm": 1.1430288553237915, | |
| "learning_rate": 0.0003839852549662093, | |
| "loss": 6.8009, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0030718820397296746, | |
| "grad_norm": 0.7848866581916809, | |
| "learning_rate": 0.00046078230595945115, | |
| "loss": 6.4164, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0035838623796846203, | |
| "grad_norm": 1.1270220279693604, | |
| "learning_rate": 0.000537579356952693, | |
| "loss": 6.1553, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0040958427196395655, | |
| "grad_norm": 0.5496548414230347, | |
| "learning_rate": 0.0006143764079459349, | |
| "loss": 5.9572, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.004607823059594511, | |
| "grad_norm": 0.8258134126663208, | |
| "learning_rate": 0.0006911734589391768, | |
| "loss": 5.7536, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.005119803399549457, | |
| "grad_norm": 0.544425368309021, | |
| "learning_rate": 0.0007679705099324186, | |
| "loss": 5.6043, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.005631783739504403, | |
| "grad_norm": 1.129820466041565, | |
| "learning_rate": 0.0008447675609256605, | |
| "loss": 5.3984, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.006143764079459349, | |
| "grad_norm": 1.6234118938446045, | |
| "learning_rate": 0.0009215646119189023, | |
| "loss": 5.2392, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.006655744419414295, | |
| "grad_norm": 0.7183708548545837, | |
| "learning_rate": 0.0009983616629121441, | |
| "loss": 5.107, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0071677247593692405, | |
| "grad_norm": 1.0296348333358765, | |
| "learning_rate": 0.001075158713905386, | |
| "loss": 4.9662, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.007679705099324186, | |
| "grad_norm": 1.6978133916854858, | |
| "learning_rate": 0.001151955764898628, | |
| "loss": 4.8161, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.008191685439279131, | |
| "grad_norm": 0.8946409821510315, | |
| "learning_rate": 0.0012287528158918697, | |
| "loss": 4.7119, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.008703665779234078, | |
| "grad_norm": 1.0135765075683594, | |
| "learning_rate": 0.0013055498668851117, | |
| "loss": 4.6082, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.009215646119189022, | |
| "grad_norm": 0.8236331343650818, | |
| "learning_rate": 0.0013823469178783536, | |
| "loss": 4.496, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.009727626459143969, | |
| "grad_norm": 1.161008596420288, | |
| "learning_rate": 0.0014591439688715956, | |
| "loss": 4.4071, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.010239606799098914, | |
| "grad_norm": 1.3253235816955566, | |
| "learning_rate": 0.0015, | |
| "loss": 4.3286, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01075158713905386, | |
| "grad_norm": 1.6026867628097534, | |
| "learning_rate": 0.0015, | |
| "loss": 4.2374, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.011263567479008805, | |
| "grad_norm": 1.0043503046035767, | |
| "learning_rate": 0.0015, | |
| "loss": 4.1526, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.011775547818963752, | |
| "grad_norm": 0.963283121585846, | |
| "learning_rate": 0.0015, | |
| "loss": 4.0709, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.012287528158918698, | |
| "grad_norm": 0.8025517463684082, | |
| "learning_rate": 0.0015, | |
| "loss": 3.9997, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.012799508498873643, | |
| "grad_norm": 0.7000623345375061, | |
| "learning_rate": 0.0015, | |
| "loss": 3.91, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01331148883882859, | |
| "grad_norm": 0.8964600563049316, | |
| "learning_rate": 0.0015, | |
| "loss": 3.8844, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.013823469178783535, | |
| "grad_norm": 0.7321097254753113, | |
| "learning_rate": 0.0015, | |
| "loss": 3.8324, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.014335449518738481, | |
| "grad_norm": 0.8242825269699097, | |
| "learning_rate": 0.0015, | |
| "loss": 3.7653, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.014847429858693426, | |
| "grad_norm": 1.045832633972168, | |
| "learning_rate": 0.0015, | |
| "loss": 3.7115, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.015359410198648372, | |
| "grad_norm": 1.0511783361434937, | |
| "learning_rate": 0.0015, | |
| "loss": 3.6754, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.015871390538603317, | |
| "grad_norm": 0.79283607006073, | |
| "learning_rate": 0.0015, | |
| "loss": 3.615, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.016383370878558262, | |
| "grad_norm": 0.7592840194702148, | |
| "learning_rate": 0.0015, | |
| "loss": 3.5692, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.01689535121851321, | |
| "grad_norm": 0.6317871809005737, | |
| "learning_rate": 0.0015, | |
| "loss": 3.5581, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.017407331558468155, | |
| "grad_norm": 0.8634727597236633, | |
| "learning_rate": 0.0015, | |
| "loss": 3.5035, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0179193118984231, | |
| "grad_norm": 0.9801504611968994, | |
| "learning_rate": 0.0015, | |
| "loss": 3.4543, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.018431292238378045, | |
| "grad_norm": 0.9941282868385315, | |
| "learning_rate": 0.0015, | |
| "loss": 3.4323, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.018943272578332993, | |
| "grad_norm": 1.1075271368026733, | |
| "learning_rate": 0.0015, | |
| "loss": 3.3992, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.019455252918287938, | |
| "grad_norm": 0.9263769388198853, | |
| "learning_rate": 0.0015, | |
| "loss": 3.3484, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.019967233258242883, | |
| "grad_norm": 0.6879151463508606, | |
| "learning_rate": 0.0015, | |
| "loss": 3.3255, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.020479213598197828, | |
| "grad_norm": 1.0170198678970337, | |
| "learning_rate": 0.0015, | |
| "loss": 3.2744, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.020991193938152776, | |
| "grad_norm": 0.9534377455711365, | |
| "learning_rate": 0.0015, | |
| "loss": 3.2513, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.02150317427810772, | |
| "grad_norm": 1.1487725973129272, | |
| "learning_rate": 0.0015, | |
| "loss": 3.2043, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.022015154618062666, | |
| "grad_norm": 0.8081286549568176, | |
| "learning_rate": 0.0015, | |
| "loss": 3.1891, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.02252713495801761, | |
| "grad_norm": 0.8324559926986694, | |
| "learning_rate": 0.0015, | |
| "loss": 3.1025, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.02303911529797256, | |
| "grad_norm": 0.9536003470420837, | |
| "learning_rate": 0.0015, | |
| "loss": 3.1029, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.023551095637927504, | |
| "grad_norm": 1.3307809829711914, | |
| "learning_rate": 0.0015, | |
| "loss": 3.0508, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.02406307597788245, | |
| "grad_norm": 1.237606167793274, | |
| "learning_rate": 0.0015, | |
| "loss": 3.0528, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.024575056317837397, | |
| "grad_norm": 0.9293427467346191, | |
| "learning_rate": 0.0015, | |
| "loss": 2.9933, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.02508703665779234, | |
| "grad_norm": 0.8388038873672485, | |
| "learning_rate": 0.0015, | |
| "loss": 2.9593, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.025599016997747286, | |
| "grad_norm": 0.7568584084510803, | |
| "learning_rate": 0.0015, | |
| "loss": 2.9442, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02611099733770223, | |
| "grad_norm": 0.7443001866340637, | |
| "learning_rate": 0.0015, | |
| "loss": 2.9138, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.02662297767765718, | |
| "grad_norm": 0.9567376375198364, | |
| "learning_rate": 0.0015, | |
| "loss": 2.8952, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.027134958017612124, | |
| "grad_norm": 0.7521085143089294, | |
| "learning_rate": 0.0015, | |
| "loss": 2.8719, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.02764693835756707, | |
| "grad_norm": 1.0200743675231934, | |
| "learning_rate": 0.0015, | |
| "loss": 2.8533, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.028158918697522014, | |
| "grad_norm": 0.8097197413444519, | |
| "learning_rate": 0.0015, | |
| "loss": 2.8476, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.028670899037476962, | |
| "grad_norm": 0.7335869669914246, | |
| "learning_rate": 0.0015, | |
| "loss": 2.7611, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.029182879377431907, | |
| "grad_norm": 0.7385020852088928, | |
| "learning_rate": 0.0015, | |
| "loss": 2.7824, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.029694859717386852, | |
| "grad_norm": 0.8730366826057434, | |
| "learning_rate": 0.0015, | |
| "loss": 2.7236, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.030206840057341797, | |
| "grad_norm": 0.8042418360710144, | |
| "learning_rate": 0.0015, | |
| "loss": 2.7331, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.030718820397296745, | |
| "grad_norm": 0.7750236392021179, | |
| "learning_rate": 0.0015, | |
| "loss": 2.6946, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03123080073725169, | |
| "grad_norm": 1.130753755569458, | |
| "learning_rate": 0.0015, | |
| "loss": 2.7127, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.031742781077206635, | |
| "grad_norm": 0.7699748277664185, | |
| "learning_rate": 0.0015, | |
| "loss": 2.665, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.03225476141716158, | |
| "grad_norm": 0.7676917314529419, | |
| "learning_rate": 0.0015, | |
| "loss": 2.6516, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.032766741757116524, | |
| "grad_norm": 0.9566435217857361, | |
| "learning_rate": 0.0015, | |
| "loss": 2.6311, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03327872209707147, | |
| "grad_norm": 0.9319092631340027, | |
| "learning_rate": 0.0015, | |
| "loss": 2.6062, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03379070243702642, | |
| "grad_norm": 0.7314916849136353, | |
| "learning_rate": 0.0015, | |
| "loss": 2.5822, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.03430268277698136, | |
| "grad_norm": 0.765346109867096, | |
| "learning_rate": 0.0015, | |
| "loss": 2.587, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.03481466311693631, | |
| "grad_norm": 0.8714979887008667, | |
| "learning_rate": 0.0015, | |
| "loss": 2.5479, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.03532664345689126, | |
| "grad_norm": 0.7182953357696533, | |
| "learning_rate": 0.0015, | |
| "loss": 2.5388, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.0358386237968462, | |
| "grad_norm": 0.71555095911026, | |
| "learning_rate": 0.0015, | |
| "loss": 2.5196, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03635060413680115, | |
| "grad_norm": 0.6901549696922302, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4948, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.03686258447675609, | |
| "grad_norm": 0.7073848247528076, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4814, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.03737456481671104, | |
| "grad_norm": 0.6590971350669861, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4799, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.037886545156665986, | |
| "grad_norm": 0.6124588251113892, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4529, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.03839852549662093, | |
| "grad_norm": 0.7170097231864929, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4397, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.038910505836575876, | |
| "grad_norm": 0.7509459853172302, | |
| "learning_rate": 0.0015, | |
| "loss": 2.433, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.039422486176530824, | |
| "grad_norm": 0.8185219168663025, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4364, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.039934466516485766, | |
| "grad_norm": 0.6452121734619141, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4375, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.040446446856440714, | |
| "grad_norm": 0.7798700928688049, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4082, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.040958427196395655, | |
| "grad_norm": 0.905072808265686, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3811, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.041470407536350604, | |
| "grad_norm": 0.7047348618507385, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3955, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.04198238787630555, | |
| "grad_norm": 0.6472852230072021, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3776, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.04249436821626049, | |
| "grad_norm": 0.729308545589447, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3465, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.04300634855621544, | |
| "grad_norm": 0.8292624950408936, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3578, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.04351832889617039, | |
| "grad_norm": 0.6298139691352844, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3349, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04403030923612533, | |
| "grad_norm": 0.647214949131012, | |
| "learning_rate": 0.0015, | |
| "loss": 2.299, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.04454228957608028, | |
| "grad_norm": 0.7034851312637329, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2927, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.04505426991603522, | |
| "grad_norm": 0.6373961567878723, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2776, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.04556625025599017, | |
| "grad_norm": 0.8384701609611511, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2948, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.04607823059594512, | |
| "grad_norm": 0.7856025695800781, | |
| "learning_rate": 0.0015, | |
| "loss": 2.3034, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04659021093590006, | |
| "grad_norm": 0.6041284799575806, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2773, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.04710219127585501, | |
| "grad_norm": 0.8801588416099548, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2706, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.047614171615809955, | |
| "grad_norm": 0.7567424178123474, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2754, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.0481261519557649, | |
| "grad_norm": 0.6421610713005066, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2514, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.048638132295719845, | |
| "grad_norm": 0.7311142683029175, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2005, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.04915011263567479, | |
| "grad_norm": 0.7399065494537354, | |
| "learning_rate": 0.0015, | |
| "loss": 2.2038, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.049662092975629735, | |
| "grad_norm": 0.708454430103302, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1758, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.05017407331558468, | |
| "grad_norm": 0.6199438571929932, | |
| "learning_rate": 0.0015, | |
| "loss": 2.227, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.050686053655539624, | |
| "grad_norm": 0.6159200668334961, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1547, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.05119803399549457, | |
| "grad_norm": 0.6560512781143188, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1787, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05171001433544952, | |
| "grad_norm": 0.6151387691497803, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1776, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.05222199467540446, | |
| "grad_norm": 0.6162774562835693, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1604, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.05273397501535941, | |
| "grad_norm": 0.6564657092094421, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1837, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.05324595535531436, | |
| "grad_norm": 0.5790508985519409, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1561, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.0537579356952693, | |
| "grad_norm": 0.6484589576721191, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1676, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.05426991603522425, | |
| "grad_norm": 0.6969457268714905, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1462, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.05478189637517919, | |
| "grad_norm": 0.7145557403564453, | |
| "learning_rate": 0.0015, | |
| "loss": 2.13, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.05529387671513414, | |
| "grad_norm": 0.6353093981742859, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1197, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.055805857055089086, | |
| "grad_norm": 0.5896279811859131, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1177, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.05631783739504403, | |
| "grad_norm": 0.6247608661651611, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1123, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.056829817734998976, | |
| "grad_norm": 0.6024080514907837, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0949, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.057341798074953924, | |
| "grad_norm": 0.7400630116462708, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0915, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.057853778414908866, | |
| "grad_norm": 0.6276081800460815, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0916, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.058365758754863814, | |
| "grad_norm": 0.7214579582214355, | |
| "learning_rate": 0.0015, | |
| "loss": 2.1027, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.05887773909481876, | |
| "grad_norm": 0.7833266258239746, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0884, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.059389719434773704, | |
| "grad_norm": 0.7453588247299194, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0764, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.05990169977472865, | |
| "grad_norm": 0.5965461134910583, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0941, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.06041368011468359, | |
| "grad_norm": 0.6565614938735962, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0396, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.06092566045463854, | |
| "grad_norm": 0.670816957950592, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0629, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.06143764079459349, | |
| "grad_norm": 0.6220470666885376, | |
| "learning_rate": 0.0015, | |
| "loss": 2.064, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.06194962113454843, | |
| "grad_norm": 0.5919376015663147, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0385, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.06246160147450338, | |
| "grad_norm": 0.6242793202400208, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0487, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.06297358181445832, | |
| "grad_norm": 0.5903810262680054, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0348, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.06348556215441327, | |
| "grad_norm": 0.6573896408081055, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0186, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.06399754249436822, | |
| "grad_norm": 0.6017488241195679, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0126, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.06450952283432317, | |
| "grad_norm": 0.533431351184845, | |
| "learning_rate": 0.0015, | |
| "loss": 2.026, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.06502150317427811, | |
| "grad_norm": 0.5461450815200806, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9961, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.06553348351423305, | |
| "grad_norm": 0.5579766035079956, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0064, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.066045463854188, | |
| "grad_norm": 0.5514289736747742, | |
| "learning_rate": 0.0015, | |
| "loss": 2.0079, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.06655744419414295, | |
| "grad_norm": 0.5938010215759277, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9811, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0670694245340979, | |
| "grad_norm": 0.703124463558197, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9634, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.06758140487405284, | |
| "grad_norm": 0.545432448387146, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9927, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.06809338521400778, | |
| "grad_norm": 0.5673125386238098, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9911, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.06860536555396272, | |
| "grad_norm": 0.5682245492935181, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9733, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.06911734589391767, | |
| "grad_norm": 0.5960274934768677, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9733, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.06962932623387262, | |
| "grad_norm": 0.6102215051651001, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9559, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.07014130657382757, | |
| "grad_norm": 0.5990728735923767, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9463, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.07065328691378252, | |
| "grad_norm": 0.6161502003669739, | |
| "learning_rate": 0.0015, | |
| "loss": 1.978, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.07116526725373745, | |
| "grad_norm": 0.5682898759841919, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9558, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.0716772475936924, | |
| "grad_norm": 0.5973048210144043, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9376, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.07218922793364735, | |
| "grad_norm": 0.5553535223007202, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9468, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.0727012082736023, | |
| "grad_norm": 0.5181711912155151, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9188, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.07321318861355725, | |
| "grad_norm": 0.6532855033874512, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9069, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.07372516895351218, | |
| "grad_norm": 0.531043291091919, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9319, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.07423714929346713, | |
| "grad_norm": 0.5700235962867737, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8891, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.07474912963342208, | |
| "grad_norm": 0.523414134979248, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9165, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.07526110997337702, | |
| "grad_norm": 0.5649904608726501, | |
| "learning_rate": 0.0015, | |
| "loss": 1.905, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.07577309031333197, | |
| "grad_norm": 0.5912672877311707, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9162, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.07628507065328691, | |
| "grad_norm": 0.5597636699676514, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9158, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.07679705099324186, | |
| "grad_norm": 0.553896963596344, | |
| "learning_rate": 0.0015, | |
| "loss": 1.871, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0773090313331968, | |
| "grad_norm": 0.5018342137336731, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9119, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.07782101167315175, | |
| "grad_norm": 0.5367796421051025, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8706, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.0783329920131067, | |
| "grad_norm": 0.5023203492164612, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8808, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.07884497235306165, | |
| "grad_norm": 0.5962059497833252, | |
| "learning_rate": 0.0015, | |
| "loss": 1.9022, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.07935695269301658, | |
| "grad_norm": 0.5200186967849731, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8728, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.07986893303297153, | |
| "grad_norm": 0.5361810922622681, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8462, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.08038091337292648, | |
| "grad_norm": 0.5771626830101013, | |
| "learning_rate": 0.0015, | |
| "loss": 1.873, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.08089289371288143, | |
| "grad_norm": 0.5451227426528931, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8693, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.08140487405283638, | |
| "grad_norm": 0.5574854016304016, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8615, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.08191685439279131, | |
| "grad_norm": 0.574317455291748, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8424, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.08242883473274626, | |
| "grad_norm": 0.545906662940979, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8572, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.08294081507270121, | |
| "grad_norm": 0.5127050280570984, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8391, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.08345279541265616, | |
| "grad_norm": 0.5646129250526428, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8316, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.0839647757526111, | |
| "grad_norm": 0.5549367666244507, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8371, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.08447675609256605, | |
| "grad_norm": 0.5479699373245239, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8378, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.08498873643252099, | |
| "grad_norm": 0.5359328985214233, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8372, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.08550071677247593, | |
| "grad_norm": 0.5599870085716248, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8499, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.08601269711243088, | |
| "grad_norm": 0.5272551774978638, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8381, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.08652467745238583, | |
| "grad_norm": 0.534377932548523, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8124, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.08703665779234078, | |
| "grad_norm": 0.6432906985282898, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8354, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.08754863813229571, | |
| "grad_norm": 0.5227901935577393, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8091, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.08806061847225066, | |
| "grad_norm": 0.48951131105422974, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7854, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.08857259881220561, | |
| "grad_norm": 0.5127034783363342, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8208, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.08908457915216056, | |
| "grad_norm": 0.5147260427474976, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8289, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.08959655949211551, | |
| "grad_norm": 0.536268413066864, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7894, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.09010853983207044, | |
| "grad_norm": 0.537369966506958, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7985, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.09062052017202539, | |
| "grad_norm": 0.5217599868774414, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8196, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.09113250051198034, | |
| "grad_norm": 0.47711503505706787, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7931, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.09164448085193529, | |
| "grad_norm": 0.5544558763504028, | |
| "learning_rate": 0.0015, | |
| "loss": 1.8201, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.09215646119189023, | |
| "grad_norm": 0.5024393200874329, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7974, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.09266844153184518, | |
| "grad_norm": 0.5126355290412903, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7874, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.09318042187180012, | |
| "grad_norm": 0.5882781744003296, | |
| "learning_rate": 0.0015, | |
| "loss": 1.791, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.09369240221175507, | |
| "grad_norm": 0.508765697479248, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7819, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.09420438255171001, | |
| "grad_norm": 0.5449949502944946, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7838, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.09471636289166496, | |
| "grad_norm": 0.4996667802333832, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7618, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.09522834323161991, | |
| "grad_norm": 0.5014889240264893, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7752, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.09574032357157485, | |
| "grad_norm": 0.5011769533157349, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7768, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.0962523039115298, | |
| "grad_norm": 0.49963292479515076, | |
| "learning_rate": 0.0015, | |
| "loss": 1.778, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.09676428425148474, | |
| "grad_norm": 0.46659213304519653, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7668, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.09727626459143969, | |
| "grad_norm": 0.5140760540962219, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7448, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.09778824493139464, | |
| "grad_norm": 0.49709445238113403, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7573, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.09830022527134959, | |
| "grad_norm": 0.464329332113266, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7435, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.09881220561130452, | |
| "grad_norm": 0.4815766215324402, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7533, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.09932418595125947, | |
| "grad_norm": 0.4601441025733948, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7339, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.09983616629121442, | |
| "grad_norm": 0.46905994415283203, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7421, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.10034814663116937, | |
| "grad_norm": 0.4927903413772583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7259, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.10086012697112431, | |
| "grad_norm": 0.4930973947048187, | |
| "learning_rate": 0.0015, | |
| "loss": 1.735, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.10137210731107925, | |
| "grad_norm": 0.4698399305343628, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7478, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1018840876510342, | |
| "grad_norm": 0.5083284974098206, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7491, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.10239606799098915, | |
| "grad_norm": 0.4888325035572052, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7261, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1029080483309441, | |
| "grad_norm": 0.524994432926178, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7221, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.10342002867089904, | |
| "grad_norm": 0.49820294976234436, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7279, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.10393200901085399, | |
| "grad_norm": 0.49288976192474365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.746, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.10444398935080892, | |
| "grad_norm": 0.4776252806186676, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7384, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.10495596969076387, | |
| "grad_norm": 0.46143004298210144, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7037, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.10546795003071882, | |
| "grad_norm": 0.4855809211730957, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7052, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.10597993037067377, | |
| "grad_norm": 0.491964727640152, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7275, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.10649191071062872, | |
| "grad_norm": 0.5072810053825378, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7262, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.10700389105058365, | |
| "grad_norm": 0.5020768642425537, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7106, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.1075158713905386, | |
| "grad_norm": 0.4881630837917328, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7411, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.10802785173049355, | |
| "grad_norm": 0.5104793906211853, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7053, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.1085398320704485, | |
| "grad_norm": 0.4574519991874695, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7219, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.10905181241040345, | |
| "grad_norm": 0.4427832365036011, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6966, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.10956379275035838, | |
| "grad_norm": 0.46723929047584534, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7106, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.11007577309031333, | |
| "grad_norm": 0.4710049629211426, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.11058775343026828, | |
| "grad_norm": 0.46849745512008667, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7071, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.11109973377022322, | |
| "grad_norm": 0.4712335765361786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.685, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.11161171411017817, | |
| "grad_norm": 0.45318537950515747, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6996, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.11212369445013312, | |
| "grad_norm": 0.4772440791130066, | |
| "learning_rate": 0.0015, | |
| "loss": 1.705, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.11263567479008806, | |
| "grad_norm": 0.4854085147380829, | |
| "learning_rate": 0.0015, | |
| "loss": 1.691, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.113147655130043, | |
| "grad_norm": 0.4931398928165436, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6979, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.11365963546999795, | |
| "grad_norm": 0.4212550222873688, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6792, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.1141716158099529, | |
| "grad_norm": 0.4916476905345917, | |
| "learning_rate": 0.0015, | |
| "loss": 1.682, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.11468359614990785, | |
| "grad_norm": 0.44974076747894287, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6734, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.11519557648986278, | |
| "grad_norm": 0.4464137554168701, | |
| "learning_rate": 0.0015, | |
| "loss": 1.7032, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.11570755682981773, | |
| "grad_norm": 0.4473714530467987, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6868, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.11621953716977268, | |
| "grad_norm": 0.4802720844745636, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6805, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.11673151750972763, | |
| "grad_norm": 0.45060625672340393, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6716, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.11724349784968258, | |
| "grad_norm": 0.47407498955726624, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6569, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.11775547818963752, | |
| "grad_norm": 0.45615556836128235, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6682, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.11826745852959246, | |
| "grad_norm": 0.4670998156070709, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6785, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.11877943886954741, | |
| "grad_norm": 0.45432570576667786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.674, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.11929141920950236, | |
| "grad_norm": 0.44804081320762634, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6619, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.1198033995494573, | |
| "grad_norm": 0.4523905813694, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6652, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.12031537988941225, | |
| "grad_norm": 0.4514728784561157, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6652, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.12082736022936719, | |
| "grad_norm": 0.41209134459495544, | |
| "learning_rate": 0.0015, | |
| "loss": 1.658, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.12133934056932213, | |
| "grad_norm": 0.4219752252101898, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6379, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.12185132090927708, | |
| "grad_norm": 0.47252357006073, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6636, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.12236330124923203, | |
| "grad_norm": 0.4292849004268646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6528, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.12287528158918698, | |
| "grad_norm": 0.4734489917755127, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6297, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.12338726192914191, | |
| "grad_norm": 0.48543623089790344, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6404, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.12389924226909686, | |
| "grad_norm": 0.4184911549091339, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6315, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.12441122260905181, | |
| "grad_norm": 0.42600351572036743, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6502, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.12492320294900676, | |
| "grad_norm": 0.4201619029045105, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6372, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.1254351832889617, | |
| "grad_norm": 0.4165250360965729, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6334, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.12594716362891664, | |
| "grad_norm": 0.4470268487930298, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6359, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.1264591439688716, | |
| "grad_norm": 0.4310542941093445, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6439, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.12697112430882654, | |
| "grad_norm": 0.4297926425933838, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6222, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.1274831046487815, | |
| "grad_norm": 0.45335137844085693, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6559, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.12799508498873644, | |
| "grad_norm": 0.4176558256149292, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6561, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12850706532869138, | |
| "grad_norm": 0.4358290433883667, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6241, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.12901904566864633, | |
| "grad_norm": 0.44109201431274414, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6022, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.12953102600860128, | |
| "grad_norm": 0.44387978315353394, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6335, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.13004300634855623, | |
| "grad_norm": 0.434861421585083, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6377, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.13055498668851115, | |
| "grad_norm": 0.419826865196228, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6238, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.1310669670284661, | |
| "grad_norm": 0.471110463142395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6383, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.13157894736842105, | |
| "grad_norm": 0.44935643672943115, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6006, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.132090927708376, | |
| "grad_norm": 0.4497852027416229, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6115, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.13260290804833094, | |
| "grad_norm": 0.45850351452827454, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6194, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.1331148883882859, | |
| "grad_norm": 0.40869665145874023, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6159, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.13362686872824084, | |
| "grad_norm": 0.4347962737083435, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6254, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.1341388490681958, | |
| "grad_norm": 0.4899897873401642, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6296, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.13465082940815074, | |
| "grad_norm": 0.44309839606285095, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6179, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.13516280974810568, | |
| "grad_norm": 0.3890606164932251, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6044, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.13567479008806063, | |
| "grad_norm": 0.42358025908470154, | |
| "learning_rate": 0.0015, | |
| "loss": 1.619, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.13618677042801555, | |
| "grad_norm": 0.42111581563949585, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6127, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.1366987507679705, | |
| "grad_norm": 0.4441932141780853, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6224, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.13721073110792545, | |
| "grad_norm": 0.4351959228515625, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5957, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.1377227114478804, | |
| "grad_norm": 0.43544304370880127, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5925, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.13823469178783535, | |
| "grad_norm": 0.4298728406429291, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5893, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.1387466721277903, | |
| "grad_norm": 0.4463229477405548, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5881, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.13925865246774524, | |
| "grad_norm": 0.43847158551216125, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5982, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.1397706328077002, | |
| "grad_norm": 0.44918614625930786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6095, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.14028261314765514, | |
| "grad_norm": 0.45398586988449097, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5985, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.1407945934876101, | |
| "grad_norm": 0.41213494539260864, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6153, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.14130657382756504, | |
| "grad_norm": 0.41266897320747375, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5919, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.14181855416751996, | |
| "grad_norm": 0.42942896485328674, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5793, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.1423305345074749, | |
| "grad_norm": 0.4180223047733307, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5938, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.14284251484742985, | |
| "grad_norm": 0.4204559922218323, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5927, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.1433544951873848, | |
| "grad_norm": 0.43727442622184753, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6018, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.14386647552733975, | |
| "grad_norm": 0.4330785870552063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.6004, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.1443784558672947, | |
| "grad_norm": 0.415101021528244, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5708, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.14489043620724965, | |
| "grad_norm": 0.41477903723716736, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5747, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.1454024165472046, | |
| "grad_norm": 0.4343889653682709, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5958, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.14591439688715954, | |
| "grad_norm": 0.4018150866031647, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5589, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.1464263772271145, | |
| "grad_norm": 0.4799724817276001, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5745, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.1469383575670694, | |
| "grad_norm": 0.42355528473854065, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5928, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.14745033790702436, | |
| "grad_norm": 0.40638747811317444, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5623, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.1479623182469793, | |
| "grad_norm": 0.39846664667129517, | |
| "learning_rate": 0.0015, | |
| "loss": 1.577, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.14847429858693426, | |
| "grad_norm": 0.4010321795940399, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5821, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1489862789268892, | |
| "grad_norm": 0.42778313159942627, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5623, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.14949825926684415, | |
| "grad_norm": 0.39266425371170044, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5821, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.1500102396067991, | |
| "grad_norm": 0.40784794092178345, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5664, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.15052221994675405, | |
| "grad_norm": 0.43437501788139343, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5658, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.151034200286709, | |
| "grad_norm": 0.4373057186603546, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5591, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.15154618062666395, | |
| "grad_norm": 0.40370023250579834, | |
| "learning_rate": 0.0015, | |
| "loss": 1.555, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.1520581609666189, | |
| "grad_norm": 0.4626748263835907, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5808, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.15257014130657381, | |
| "grad_norm": 0.4095107614994049, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5705, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.15308212164652876, | |
| "grad_norm": 0.4343841075897217, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5738, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.1535941019864837, | |
| "grad_norm": 0.42325645685195923, | |
| "learning_rate": 0.0015, | |
| "loss": 1.567, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.15410608232643866, | |
| "grad_norm": 0.39237692952156067, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5748, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.1546180626663936, | |
| "grad_norm": 0.39682793617248535, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5711, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.15513004300634856, | |
| "grad_norm": 0.4060477614402771, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5623, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.1556420233463035, | |
| "grad_norm": 0.4088119864463806, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5532, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.15615400368625845, | |
| "grad_norm": 0.39976736903190613, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5436, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.1566659840262134, | |
| "grad_norm": 0.42855167388916016, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5577, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.15717796436616835, | |
| "grad_norm": 0.4451335072517395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5375, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.1576899447061233, | |
| "grad_norm": 0.3867264688014984, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5418, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.15820192504607822, | |
| "grad_norm": 0.4165036976337433, | |
| "learning_rate": 0.0015, | |
| "loss": 1.564, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.15871390538603317, | |
| "grad_norm": 0.3978787958621979, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5408, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.15922588572598811, | |
| "grad_norm": 0.37848272919654846, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5477, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.15973786606594306, | |
| "grad_norm": 0.4218755066394806, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5533, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.160249846405898, | |
| "grad_norm": 0.38090386986732483, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5453, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.16076182674585296, | |
| "grad_norm": 0.39693617820739746, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5633, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.1612738070858079, | |
| "grad_norm": 0.3855767250061035, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5381, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.16178578742576286, | |
| "grad_norm": 0.3672980070114136, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5458, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.1622977677657178, | |
| "grad_norm": 0.3810063302516937, | |
| "learning_rate": 0.0015, | |
| "loss": 1.559, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.16280974810567275, | |
| "grad_norm": 0.4658653140068054, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5274, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.1633217284456277, | |
| "grad_norm": 0.40785935521125793, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5279, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.16383370878558262, | |
| "grad_norm": 0.40147677063941956, | |
| "learning_rate": 0.0015, | |
| "loss": 1.542, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.16434568912553757, | |
| "grad_norm": 0.39116302132606506, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5148, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.16485766946549252, | |
| "grad_norm": 0.3875216245651245, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5289, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.16536964980544747, | |
| "grad_norm": 0.4106022119522095, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5358, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.16588163014540241, | |
| "grad_norm": 0.393637090921402, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5334, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.16639361048535736, | |
| "grad_norm": 0.3800962269306183, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5364, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.1669055908253123, | |
| "grad_norm": 0.3848235011100769, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5411, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.16741757116526726, | |
| "grad_norm": 0.38832154870033264, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5373, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.1679295515052222, | |
| "grad_norm": 0.43623119592666626, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5558, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.16844153184517716, | |
| "grad_norm": 0.3507107198238373, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5365, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.1689535121851321, | |
| "grad_norm": 0.38700392842292786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5383, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.16946549252508702, | |
| "grad_norm": 0.38841623067855835, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5399, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.16997747286504197, | |
| "grad_norm": 0.39128798246383667, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5271, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.17048945320499692, | |
| "grad_norm": 0.38994646072387695, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5317, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.17100143354495187, | |
| "grad_norm": 0.37731438875198364, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5251, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.17151341388490682, | |
| "grad_norm": 0.4156712293624878, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5221, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.17202539422486177, | |
| "grad_norm": 0.38232874870300293, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5196, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.17253737456481671, | |
| "grad_norm": 0.3940838575363159, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5213, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.17304935490477166, | |
| "grad_norm": 0.4050334393978119, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5159, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.1735613352447266, | |
| "grad_norm": 0.3736588954925537, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5157, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.17407331558468156, | |
| "grad_norm": 0.40355414152145386, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5446, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.1745852959246365, | |
| "grad_norm": 0.37198445200920105, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5322, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.17509727626459143, | |
| "grad_norm": 0.35825085639953613, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5136, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.17560925660454638, | |
| "grad_norm": 0.4174591302871704, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5092, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.17612123694450132, | |
| "grad_norm": 0.38272011280059814, | |
| "learning_rate": 0.0015, | |
| "loss": 1.515, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.17663321728445627, | |
| "grad_norm": 0.4088602364063263, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5089, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.17714519762441122, | |
| "grad_norm": 0.37706780433654785, | |
| "learning_rate": 0.0015, | |
| "loss": 1.513, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.17765717796436617, | |
| "grad_norm": 0.3772091865539551, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5096, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.17816915830432112, | |
| "grad_norm": 0.3540133535861969, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5099, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.17868113864427607, | |
| "grad_norm": 0.36549830436706543, | |
| "learning_rate": 0.0015, | |
| "loss": 1.511, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.17919311898423101, | |
| "grad_norm": 0.39273905754089355, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5005, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.17970509932418596, | |
| "grad_norm": 0.35500046610832214, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4962, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.18021707966414088, | |
| "grad_norm": 0.39818084239959717, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4951, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.18072906000409583, | |
| "grad_norm": 0.3649390637874603, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5038, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.18124104034405078, | |
| "grad_norm": 0.376000314950943, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4945, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.18175302068400573, | |
| "grad_norm": 0.3638756573200226, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5012, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.18226500102396068, | |
| "grad_norm": 0.3695107400417328, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5261, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.18277698136391562, | |
| "grad_norm": 0.424125999212265, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5245, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.18328896170387057, | |
| "grad_norm": 0.3683246374130249, | |
| "learning_rate": 0.0015, | |
| "loss": 1.507, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.18380094204382552, | |
| "grad_norm": 0.3763924241065979, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4671, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.18431292238378047, | |
| "grad_norm": 0.3692323565483093, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5182, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.18482490272373542, | |
| "grad_norm": 0.37030673027038574, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5037, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.18533688306369037, | |
| "grad_norm": 0.3666503429412842, | |
| "learning_rate": 0.0015, | |
| "loss": 1.499, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.1858488634036453, | |
| "grad_norm": 0.3609069287776947, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5052, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.18636084374360024, | |
| "grad_norm": 0.3748449683189392, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4596, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.18687282408355518, | |
| "grad_norm": 0.4080664813518524, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5051, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.18738480442351013, | |
| "grad_norm": 0.3743340075016022, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4658, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.18789678476346508, | |
| "grad_norm": 0.36924538016319275, | |
| "learning_rate": 0.0015, | |
| "loss": 1.474, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.18840876510342003, | |
| "grad_norm": 0.3834936022758484, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4952, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.18892074544337498, | |
| "grad_norm": 0.3493509590625763, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4765, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.18943272578332992, | |
| "grad_norm": 0.3550162613391876, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4928, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.18994470612328487, | |
| "grad_norm": 0.3747323155403137, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4872, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.19045668646323982, | |
| "grad_norm": 0.3649948835372925, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5015, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.19096866680319477, | |
| "grad_norm": 0.37357765436172485, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4828, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.1914806471431497, | |
| "grad_norm": 0.36136525869369507, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5063, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.19199262748310464, | |
| "grad_norm": 0.35555464029312134, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4797, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.1925046078230596, | |
| "grad_norm": 0.3460323214530945, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4913, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.19301658816301454, | |
| "grad_norm": 0.35079696774482727, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4714, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.19352856850296948, | |
| "grad_norm": 0.3562418818473816, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4816, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.19404054884292443, | |
| "grad_norm": 0.3714292049407959, | |
| "learning_rate": 0.0015, | |
| "loss": 1.496, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.19455252918287938, | |
| "grad_norm": 0.37646958231925964, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4814, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.19506450952283433, | |
| "grad_norm": 0.37127116322517395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4902, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.19557648986278928, | |
| "grad_norm": 0.3644818961620331, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4811, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.19608847020274423, | |
| "grad_norm": 0.38677945733070374, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5001, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.19660045054269917, | |
| "grad_norm": 0.379823237657547, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4665, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.1971124308826541, | |
| "grad_norm": 0.37844884395599365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4783, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.19762441122260904, | |
| "grad_norm": 0.36030471324920654, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4883, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.198136391562564, | |
| "grad_norm": 0.3515039384365082, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4614, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.19864837190251894, | |
| "grad_norm": 0.3469856381416321, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4669, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.1991603522424739, | |
| "grad_norm": 0.3526422381401062, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4568, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.19967233258242884, | |
| "grad_norm": 0.34970229864120483, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4467, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.20018431292238378, | |
| "grad_norm": 0.35208991169929504, | |
| "learning_rate": 0.0015, | |
| "loss": 1.5057, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.20069629326233873, | |
| "grad_norm": 0.35446539521217346, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4677, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.20120827360229368, | |
| "grad_norm": 0.32680749893188477, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4577, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.20172025394224863, | |
| "grad_norm": 0.3479768931865692, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4679, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.20223223428220358, | |
| "grad_norm": 0.3349073529243469, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4497, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.2027442146221585, | |
| "grad_norm": 0.35016781091690063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.449, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.20325619496211345, | |
| "grad_norm": 0.349086195230484, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4751, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.2037681753020684, | |
| "grad_norm": 0.36575040221214294, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4653, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.20428015564202334, | |
| "grad_norm": 0.34002363681793213, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4826, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.2047921359819783, | |
| "grad_norm": 0.36541834473609924, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4485, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.20530411632193324, | |
| "grad_norm": 0.3874847888946533, | |
| "learning_rate": 0.0015, | |
| "loss": 1.478, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.2058160966618882, | |
| "grad_norm": 0.36418798565864563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4629, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.20632807700184314, | |
| "grad_norm": 0.34188389778137207, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4784, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.20684005734179808, | |
| "grad_norm": 0.35976287722587585, | |
| "learning_rate": 0.0015, | |
| "loss": 1.458, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.20735203768175303, | |
| "grad_norm": 0.37284791469573975, | |
| "learning_rate": 0.0015, | |
| "loss": 1.471, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.20786401802170798, | |
| "grad_norm": 0.3462198078632355, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4748, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.2083759983616629, | |
| "grad_norm": 0.3988822102546692, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4576, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.20888797870161785, | |
| "grad_norm": 0.361892431974411, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4516, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.2093999590415728, | |
| "grad_norm": 0.3648587763309479, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4537, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.20991193938152775, | |
| "grad_norm": 0.35592299699783325, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4346, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.2104239197214827, | |
| "grad_norm": 0.3457651138305664, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4455, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.21093590006143764, | |
| "grad_norm": 0.3580280542373657, | |
| "learning_rate": 0.0015, | |
| "loss": 1.452, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.2114478804013926, | |
| "grad_norm": 0.3704809844493866, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4655, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.21195986074134754, | |
| "grad_norm": 0.37433552742004395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4526, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.2124718410813025, | |
| "grad_norm": 0.35324522852897644, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4651, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.21298382142125744, | |
| "grad_norm": 0.34257858991622925, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4454, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.21349580176121236, | |
| "grad_norm": 0.34159529209136963, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4561, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.2140077821011673, | |
| "grad_norm": 0.3691791296005249, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4496, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.21451976244112225, | |
| "grad_norm": 0.3290902078151703, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4477, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.2150317427810772, | |
| "grad_norm": 0.35127583146095276, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4389, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.21554372312103215, | |
| "grad_norm": 0.3416004776954651, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4569, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.2160557034609871, | |
| "grad_norm": 0.33589133620262146, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4536, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.21656768380094205, | |
| "grad_norm": 0.3249707818031311, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4421, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.217079664140897, | |
| "grad_norm": 0.3269306719303131, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4644, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.21759164448085194, | |
| "grad_norm": 0.34012100100517273, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4419, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.2181036248208069, | |
| "grad_norm": 0.3248611390590668, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4321, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.21861560516076184, | |
| "grad_norm": 0.33508434891700745, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4547, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.21912758550071676, | |
| "grad_norm": 0.3807787597179413, | |
| "learning_rate": 0.0015, | |
| "loss": 1.441, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.2196395658406717, | |
| "grad_norm": 0.34403491020202637, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4309, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.22015154618062666, | |
| "grad_norm": 0.339507520198822, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4408, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.2206635265205816, | |
| "grad_norm": 0.34783267974853516, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4362, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.22117550686053655, | |
| "grad_norm": 0.3477760851383209, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4743, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.2216874872004915, | |
| "grad_norm": 0.33150288462638855, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4338, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.22219946754044645, | |
| "grad_norm": 0.3353327810764313, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4389, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.2227114478804014, | |
| "grad_norm": 0.35436680912971497, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4221, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.22322342822035635, | |
| "grad_norm": 0.35052821040153503, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4463, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.2237354085603113, | |
| "grad_norm": 0.3383365273475647, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4438, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.22424738890026624, | |
| "grad_norm": 0.33028966188430786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4365, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.22475936924022116, | |
| "grad_norm": 0.3439690172672272, | |
| "learning_rate": 0.0015, | |
| "loss": 1.434, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.2252713495801761, | |
| "grad_norm": 0.3257237374782562, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4268, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.22578332992013106, | |
| "grad_norm": 0.34487271308898926, | |
| "learning_rate": 0.0015, | |
| "loss": 1.419, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.226295310260086, | |
| "grad_norm": 0.3513702154159546, | |
| "learning_rate": 0.0015, | |
| "loss": 1.416, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.22680729060004096, | |
| "grad_norm": 0.32178881764411926, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4267, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.2273192709399959, | |
| "grad_norm": 0.32011663913726807, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4269, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.22783125127995085, | |
| "grad_norm": 0.3356774151325226, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4253, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.2283432316199058, | |
| "grad_norm": 0.33938485383987427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4137, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.22885521195986075, | |
| "grad_norm": 0.3313305675983429, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4178, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.2293671922998157, | |
| "grad_norm": 0.31967252492904663, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4421, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.22987917263977065, | |
| "grad_norm": 0.3485276401042938, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4202, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.23039115297972557, | |
| "grad_norm": 0.3465486764907837, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4364, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.23090313331968051, | |
| "grad_norm": 0.3443972170352936, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4326, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.23141511365963546, | |
| "grad_norm": 0.33160969614982605, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4147, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.2319270939995904, | |
| "grad_norm": 0.3427571952342987, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4316, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.23243907433954536, | |
| "grad_norm": 0.3282462954521179, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3933, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.2329510546795003, | |
| "grad_norm": 0.3840288519859314, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4206, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.23346303501945526, | |
| "grad_norm": 0.34188082814216614, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4286, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.2339750153594102, | |
| "grad_norm": 0.32480111718177795, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4191, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.23448699569936515, | |
| "grad_norm": 0.3416594862937927, | |
| "learning_rate": 0.0015, | |
| "loss": 1.432, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.2349989760393201, | |
| "grad_norm": 0.32898756861686707, | |
| "learning_rate": 0.0015, | |
| "loss": 1.414, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.23551095637927505, | |
| "grad_norm": 0.3290642499923706, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4272, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.23602293671922997, | |
| "grad_norm": 0.333150178194046, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4254, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.23653491705918492, | |
| "grad_norm": 0.30599096417427063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4255, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.23704689739913987, | |
| "grad_norm": 0.34288567304611206, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4027, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.23755887773909481, | |
| "grad_norm": 0.36715662479400635, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4155, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.23807085807904976, | |
| "grad_norm": 0.32257118821144104, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4178, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.2385828384190047, | |
| "grad_norm": 0.3298852741718292, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4149, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.23909481875895966, | |
| "grad_norm": 0.32268422842025757, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4384, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.2396067990989146, | |
| "grad_norm": 0.33715546131134033, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4014, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.24011877943886956, | |
| "grad_norm": 0.3131064772605896, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4163, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.2406307597788245, | |
| "grad_norm": 0.3470405042171478, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4186, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.24114274011877943, | |
| "grad_norm": 0.35475459694862366, | |
| "learning_rate": 0.0015, | |
| "loss": 1.417, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.24165472045873437, | |
| "grad_norm": 0.3337201178073883, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4271, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.24216670079868932, | |
| "grad_norm": 0.3554363548755646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4182, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.24267868113864427, | |
| "grad_norm": 0.32346460223197937, | |
| "learning_rate": 0.0015, | |
| "loss": 1.421, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.24319066147859922, | |
| "grad_norm": 0.3117121756076813, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4278, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.24370264181855417, | |
| "grad_norm": 0.3506932556629181, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3881, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.24421462215850911, | |
| "grad_norm": 0.3424610495567322, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4236, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.24472660249846406, | |
| "grad_norm": 0.3284012973308563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4147, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.245238582838419, | |
| "grad_norm": 0.3341637849807739, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4109, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.24575056317837396, | |
| "grad_norm": 0.32382500171661377, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4063, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.2462625435183289, | |
| "grad_norm": 0.3269002437591553, | |
| "learning_rate": 0.0015, | |
| "loss": 1.42, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.24677452385828383, | |
| "grad_norm": 0.33705347776412964, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4108, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.24728650419823878, | |
| "grad_norm": 0.32141435146331787, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4012, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.24779848453819373, | |
| "grad_norm": 0.32620713114738464, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3946, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.24831046487814867, | |
| "grad_norm": 0.3150465488433838, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4239, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.24882244521810362, | |
| "grad_norm": 0.3141099214553833, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4248, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.24933442555805857, | |
| "grad_norm": 0.31802797317504883, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3965, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.24984640589801352, | |
| "grad_norm": 0.31748947501182556, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4222, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.25035838623796847, | |
| "grad_norm": 0.30938032269477844, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4001, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.2508703665779234, | |
| "grad_norm": 0.3129180371761322, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3958, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.25138234691787836, | |
| "grad_norm": 0.31602999567985535, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4114, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.2518943272578333, | |
| "grad_norm": 0.3049462139606476, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3868, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.25240630759778826, | |
| "grad_norm": 0.3103995621204376, | |
| "learning_rate": 0.0015, | |
| "loss": 1.401, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.2529182879377432, | |
| "grad_norm": 0.30271056294441223, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4046, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.25343026827769816, | |
| "grad_norm": 0.32372725009918213, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3719, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.2539422486176531, | |
| "grad_norm": 0.3129730224609375, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3797, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.25445422895760805, | |
| "grad_norm": 0.3240148425102234, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4134, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.254966209297563, | |
| "grad_norm": 0.30317404866218567, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3894, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.2554781896375179, | |
| "grad_norm": 0.33288583159446716, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4132, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.25599016997747287, | |
| "grad_norm": 0.3233846127986908, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3762, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2565021503174278, | |
| "grad_norm": 0.30729755759239197, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3975, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.25701413065738277, | |
| "grad_norm": 0.3006018400192261, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4047, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.2575261109973377, | |
| "grad_norm": 0.3207467794418335, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4084, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.25803809133729266, | |
| "grad_norm": 0.3039129674434662, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4209, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.2585500716772476, | |
| "grad_norm": 0.29750290513038635, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4156, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.25906205201720256, | |
| "grad_norm": 0.314507395029068, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3685, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.2595740323571575, | |
| "grad_norm": 0.3176608681678772, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3701, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.26008601269711246, | |
| "grad_norm": 0.3273438513278961, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3841, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.2605979930370674, | |
| "grad_norm": 0.3173183798789978, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3732, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.2611099733770223, | |
| "grad_norm": 0.33317986130714417, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3815, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.2616219537169773, | |
| "grad_norm": 0.3045515716075897, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4042, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.2621339340569322, | |
| "grad_norm": 0.3056975305080414, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4156, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.26264591439688717, | |
| "grad_norm": 0.3231489956378937, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4076, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.3215503990650177, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3712, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.26366987507679707, | |
| "grad_norm": 0.30379393696784973, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3648, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.264181855416752, | |
| "grad_norm": 0.2987072765827179, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3859, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.26469383575670696, | |
| "grad_norm": 0.3293174207210541, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3974, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.2652058160966619, | |
| "grad_norm": 0.34920957684516907, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3868, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.26571779643661686, | |
| "grad_norm": 0.3054308295249939, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3838, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.2662297767765718, | |
| "grad_norm": 0.3131832182407379, | |
| "learning_rate": 0.0015, | |
| "loss": 1.377, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.2667417571165267, | |
| "grad_norm": 0.30868205428123474, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3999, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.2672537374564817, | |
| "grad_norm": 0.3193263113498688, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3789, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.2677657177964366, | |
| "grad_norm": 0.3142963945865631, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3993, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.2682776981363916, | |
| "grad_norm": 0.3012097179889679, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3959, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.2687896784763465, | |
| "grad_norm": 0.30580368638038635, | |
| "learning_rate": 0.0015, | |
| "loss": 1.4106, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.26930165881630147, | |
| "grad_norm": 0.2862599790096283, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3873, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.2698136391562564, | |
| "grad_norm": 0.3221125602722168, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3997, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.27032561949621137, | |
| "grad_norm": 0.29167062044143677, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3707, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.2708375998361663, | |
| "grad_norm": 0.3372457027435303, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3767, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.27134958017612126, | |
| "grad_norm": 0.308940589427948, | |
| "learning_rate": 0.0015, | |
| "loss": 1.377, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.2718615605160762, | |
| "grad_norm": 0.2946240305900574, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3811, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.2723735408560311, | |
| "grad_norm": 0.30118903517723083, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3991, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.2728855211959861, | |
| "grad_norm": 0.3128001093864441, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3806, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.273397501535941, | |
| "grad_norm": 0.3355924189090729, | |
| "learning_rate": 0.0015, | |
| "loss": 1.378, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.273909481875896, | |
| "grad_norm": 0.29809674620628357, | |
| "learning_rate": 0.0015, | |
| "loss": 1.365, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.2744214622158509, | |
| "grad_norm": 0.2897878885269165, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3796, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.2749334425558059, | |
| "grad_norm": 0.33131879568099976, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3789, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.2754454228957608, | |
| "grad_norm": 0.3270549476146698, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3877, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.27595740323571577, | |
| "grad_norm": 0.3001706898212433, | |
| "learning_rate": 0.0015, | |
| "loss": 1.376, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.2764693835756707, | |
| "grad_norm": 0.3149849772453308, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3815, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.27698136391562567, | |
| "grad_norm": 0.28992435336112976, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3731, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.2774933442555806, | |
| "grad_norm": 0.295311838388443, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3958, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.2780053245955355, | |
| "grad_norm": 0.2988681495189667, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3946, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.2785173049354905, | |
| "grad_norm": 0.3085227608680725, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3776, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.2790292852754454, | |
| "grad_norm": 0.30014750361442566, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3772, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.2795412656154004, | |
| "grad_norm": 0.3058876693248749, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3637, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.2800532459553553, | |
| "grad_norm": 0.2952674925327301, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3888, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.2805652262953103, | |
| "grad_norm": 0.3016969561576843, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3874, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.2810772066352652, | |
| "grad_norm": 0.30375874042510986, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3652, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.2815891869752202, | |
| "grad_norm": 0.29380300641059875, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3768, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.2821011673151751, | |
| "grad_norm": 0.2994033992290497, | |
| "learning_rate": 0.0015, | |
| "loss": 1.376, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.28261314765513007, | |
| "grad_norm": 0.3174065053462982, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3873, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.283125127995085, | |
| "grad_norm": 0.3069535791873932, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3636, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.2836371083350399, | |
| "grad_norm": 0.2826645076274872, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3567, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.2841490886749949, | |
| "grad_norm": 0.295926034450531, | |
| "learning_rate": 0.0015, | |
| "loss": 1.361, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.2846610690149498, | |
| "grad_norm": 0.29257112741470337, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3699, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.2851730493549048, | |
| "grad_norm": 0.28169023990631104, | |
| "learning_rate": 0.0015, | |
| "loss": 1.353, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.2856850296948597, | |
| "grad_norm": 0.31054553389549255, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3955, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.2861970100348147, | |
| "grad_norm": 0.28373947739601135, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3843, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.2867089903747696, | |
| "grad_norm": 0.29920247197151184, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3588, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.2872209707147246, | |
| "grad_norm": 0.2981637120246887, | |
| "learning_rate": 0.0015, | |
| "loss": 1.376, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.2877329510546795, | |
| "grad_norm": 0.269811749458313, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3733, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.2882449313946345, | |
| "grad_norm": 0.28365617990493774, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3376, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.2887569117345894, | |
| "grad_norm": 0.2953552305698395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.367, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.2892688920745443, | |
| "grad_norm": 0.2910911440849304, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3708, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.2897808724144993, | |
| "grad_norm": 0.2998880445957184, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3917, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.2902928527544542, | |
| "grad_norm": 0.3000008165836334, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3597, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.2908048330944092, | |
| "grad_norm": 0.3019564747810364, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3641, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.2913168134343641, | |
| "grad_norm": 0.28087547421455383, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3427, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.2918287937743191, | |
| "grad_norm": 0.32179591059684753, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3576, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.292340774114274, | |
| "grad_norm": 0.30196836590766907, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3866, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.292852754454229, | |
| "grad_norm": 0.29928138852119446, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3711, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.2933647347941839, | |
| "grad_norm": 0.30917906761169434, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3481, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.2938767151341388, | |
| "grad_norm": 0.32579630613327026, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3713, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.2943886954740938, | |
| "grad_norm": 0.3042047321796417, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3758, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.2949006758140487, | |
| "grad_norm": 0.2910909354686737, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3675, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.2954126561540037, | |
| "grad_norm": 0.29718905687332153, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3576, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.2959246364939586, | |
| "grad_norm": 0.28392040729522705, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3779, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.2964366168339136, | |
| "grad_norm": 0.2852902114391327, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3709, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.2969485971738685, | |
| "grad_norm": 0.29683250188827515, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3757, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.2974605775138235, | |
| "grad_norm": 0.2882269620895386, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3706, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.2979725578537784, | |
| "grad_norm": 0.3086804449558258, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3506, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.2984845381937334, | |
| "grad_norm": 0.2780090868473053, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3565, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.2989965185336883, | |
| "grad_norm": 0.30415329337120056, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3593, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.2995084988736432, | |
| "grad_norm": 0.2865590751171112, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3873, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.3000204792135982, | |
| "grad_norm": 0.2798267900943756, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3439, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.3005324595535531, | |
| "grad_norm": 0.29937195777893066, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3483, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.3010444398935081, | |
| "grad_norm": 0.27708205580711365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3207, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.301556420233463, | |
| "grad_norm": 0.2955605983734131, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3524, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.302068400573418, | |
| "grad_norm": 0.3226946294307709, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3545, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.3025803809133729, | |
| "grad_norm": 0.2925417721271515, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3435, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.3030923612533279, | |
| "grad_norm": 0.3087621331214905, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3275, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.3036043415932828, | |
| "grad_norm": 0.2996879518032074, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3514, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.3041163219332378, | |
| "grad_norm": 0.3085525333881378, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3539, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.3046283022731927, | |
| "grad_norm": 0.28985559940338135, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3661, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.30514028261314763, | |
| "grad_norm": 0.2889237701892853, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3622, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.3056522629531026, | |
| "grad_norm": 0.3278009593486786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3438, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.3061642432930575, | |
| "grad_norm": 0.2967126965522766, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3752, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.3066762236330125, | |
| "grad_norm": 0.2810833752155304, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3673, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.3071882039729674, | |
| "grad_norm": 0.2842026650905609, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3315, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3077001843129224, | |
| "grad_norm": 0.2904771864414215, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3551, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.3082121646528773, | |
| "grad_norm": 0.2798822224140167, | |
| "learning_rate": 0.0015, | |
| "loss": 1.374, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.3087241449928323, | |
| "grad_norm": 0.2831931412220001, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3449, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.3092361253327872, | |
| "grad_norm": 0.27797648310661316, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3427, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.3097481056727422, | |
| "grad_norm": 0.2972757816314697, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3498, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.3102600860126971, | |
| "grad_norm": 0.2661411166191101, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3391, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.31077206635265203, | |
| "grad_norm": 0.2736954689025879, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3637, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.311284046692607, | |
| "grad_norm": 0.27739083766937256, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3432, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.31179602703256193, | |
| "grad_norm": 0.275734543800354, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3523, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.3123080073725169, | |
| "grad_norm": 0.29389500617980957, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3566, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.3128199877124718, | |
| "grad_norm": 0.3517824113368988, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3401, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.3133319680524268, | |
| "grad_norm": 0.2847048342227936, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3345, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.3138439483923817, | |
| "grad_norm": 0.2781658470630646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3165, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.3143559287323367, | |
| "grad_norm": 0.27928218245506287, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3419, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.3148679090722916, | |
| "grad_norm": 0.29375484585762024, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3424, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.3153798894122466, | |
| "grad_norm": 0.2773997187614441, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3153, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.3158918697522015, | |
| "grad_norm": 0.2810317277908325, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3633, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.31640385009215644, | |
| "grad_norm": 0.2810805141925812, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3388, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.3169158304321114, | |
| "grad_norm": 0.27900010347366333, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3494, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.31742781077206633, | |
| "grad_norm": 0.2763247787952423, | |
| "learning_rate": 0.0015, | |
| "loss": 1.347, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.3179397911120213, | |
| "grad_norm": 0.27593132853507996, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3286, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.31845177145197623, | |
| "grad_norm": 0.2928100526332855, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3485, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.3189637517919312, | |
| "grad_norm": 0.2809889316558838, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3318, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.3194757321318861, | |
| "grad_norm": 0.2984907329082489, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3474, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.3199877124718411, | |
| "grad_norm": 0.2861260771751404, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3308, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.320499692811796, | |
| "grad_norm": 0.30209678411483765, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3438, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.321011673151751, | |
| "grad_norm": 0.27839919924736023, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3606, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.3215236534917059, | |
| "grad_norm": 0.27120068669319153, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3291, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.32203563383166084, | |
| "grad_norm": 0.2891988158226013, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3483, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.3225476141716158, | |
| "grad_norm": 0.3099561929702759, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3538, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.32305959451157074, | |
| "grad_norm": 0.28136762976646423, | |
| "learning_rate": 0.0015, | |
| "loss": 1.344, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.3235715748515257, | |
| "grad_norm": 0.27209803462028503, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3395, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.32408355519148063, | |
| "grad_norm": 0.2847345173358917, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3278, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.3245955355314356, | |
| "grad_norm": 0.29409244656562805, | |
| "learning_rate": 0.0015, | |
| "loss": 1.352, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.32510751587139053, | |
| "grad_norm": 0.26782944798469543, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3211, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.3256194962113455, | |
| "grad_norm": 0.27680841088294983, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3168, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.3261314765513004, | |
| "grad_norm": 0.28913265466690063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3412, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.3266434568912554, | |
| "grad_norm": 0.2598094046115875, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3235, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.3271554372312103, | |
| "grad_norm": 0.2622967064380646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3353, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.32766741757116524, | |
| "grad_norm": 0.2802422046661377, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3278, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.3281793979111202, | |
| "grad_norm": 0.2863336503505707, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3421, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.32869137825107514, | |
| "grad_norm": 0.28782033920288086, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3395, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.3292033585910301, | |
| "grad_norm": 0.2650611698627472, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3461, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.32971533893098504, | |
| "grad_norm": 0.28210777044296265, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3452, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.33022731927094, | |
| "grad_norm": 0.29541024565696716, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3304, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.33073929961089493, | |
| "grad_norm": 0.27473190426826477, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3277, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.3312512799508499, | |
| "grad_norm": 0.2899293005466461, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3193, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.33176326029080483, | |
| "grad_norm": 0.2961236834526062, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3252, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.3322752406307598, | |
| "grad_norm": 0.2859441637992859, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3327, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.3327872209707147, | |
| "grad_norm": 0.26721256971359253, | |
| "learning_rate": 0.0015, | |
| "loss": 1.344, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.33329920131066965, | |
| "grad_norm": 0.27258962392807007, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3291, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.3338111816506246, | |
| "grad_norm": 0.2868225872516632, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3542, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.33432316199057954, | |
| "grad_norm": 0.27058276534080505, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3428, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.3348351423305345, | |
| "grad_norm": 0.2648937404155731, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3345, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.33534712267048944, | |
| "grad_norm": 0.2588028609752655, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3163, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.3358591030104444, | |
| "grad_norm": 0.2773786783218384, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3353, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.33637108335039934, | |
| "grad_norm": 0.2635444402694702, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3073, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.3368830636903543, | |
| "grad_norm": 0.28633764386177063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3085, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.33739504403030923, | |
| "grad_norm": 0.29486966133117676, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3316, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.3379070243702642, | |
| "grad_norm": 0.2629407048225403, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3319, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.33841900471021913, | |
| "grad_norm": 0.2779609262943268, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3043, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.33893098505017405, | |
| "grad_norm": 0.2911774218082428, | |
| "learning_rate": 0.0015, | |
| "loss": 1.361, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.339442965390129, | |
| "grad_norm": 0.26540687680244446, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3095, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.33995494573008395, | |
| "grad_norm": 0.27710777521133423, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3173, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.3404669260700389, | |
| "grad_norm": 0.2614011764526367, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3178, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.34097890640999384, | |
| "grad_norm": 0.2797437906265259, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3287, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.3414908867499488, | |
| "grad_norm": 0.28846311569213867, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3222, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.34200286708990374, | |
| "grad_norm": 0.2507641911506653, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3297, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.3425148474298587, | |
| "grad_norm": 0.277458518743515, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3092, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.34302682776981364, | |
| "grad_norm": 0.28139162063598633, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3509, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.3435388081097686, | |
| "grad_norm": 0.26460030674934387, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3357, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.34405078844972353, | |
| "grad_norm": 0.2602977752685547, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3375, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.34456276878967845, | |
| "grad_norm": 0.3062650263309479, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3225, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.34507474912963343, | |
| "grad_norm": 0.27152612805366516, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3326, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.34558672946958835, | |
| "grad_norm": 0.2585943341255188, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3275, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.3460987098095433, | |
| "grad_norm": 0.2826108932495117, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3143, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.34661069014949825, | |
| "grad_norm": 0.2719128131866455, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3136, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.3471226704894532, | |
| "grad_norm": 0.2605542540550232, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3207, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.34763465082940814, | |
| "grad_norm": 0.26649779081344604, | |
| "learning_rate": 0.0015, | |
| "loss": 1.304, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.3481466311693631, | |
| "grad_norm": 0.28349971771240234, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3176, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.34865861150931804, | |
| "grad_norm": 0.27145761251449585, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3294, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.349170591849273, | |
| "grad_norm": 0.26513341069221497, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3299, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.34968257218922794, | |
| "grad_norm": 0.2701232135295868, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3028, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.35019455252918286, | |
| "grad_norm": 0.27336186170578003, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3253, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.35070653286913783, | |
| "grad_norm": 0.26006847620010376, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3097, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.35121851320909275, | |
| "grad_norm": 0.2867346405982971, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3489, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.35173049354904773, | |
| "grad_norm": 0.2665490210056305, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3029, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.35224247388900265, | |
| "grad_norm": 0.26250341534614563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.324, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.3527544542289576, | |
| "grad_norm": 0.27404358983039856, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3222, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.35326643456891255, | |
| "grad_norm": 0.271932989358902, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3068, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.3537784149088675, | |
| "grad_norm": 0.25479060411453247, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3143, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.35429039524882244, | |
| "grad_norm": 0.2571351230144501, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2886, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.35480237558877736, | |
| "grad_norm": 0.2612917125225067, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3199, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.35531435592873234, | |
| "grad_norm": 0.2573522925376892, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3143, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.35582633626868726, | |
| "grad_norm": 0.2598212659358978, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3039, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.35633831660864224, | |
| "grad_norm": 0.2575034201145172, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3095, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.35685029694859716, | |
| "grad_norm": 0.2559545636177063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2971, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.35736227728855213, | |
| "grad_norm": 0.26087066531181335, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3023, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.35787425762850705, | |
| "grad_norm": 0.2606737017631531, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3098, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.35838623796846203, | |
| "grad_norm": 0.27495986223220825, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3249, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.35889821830841695, | |
| "grad_norm": 0.25473734736442566, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3253, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.3594101986483719, | |
| "grad_norm": 0.2764824330806732, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3101, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.35992217898832685, | |
| "grad_norm": 0.27935823798179626, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3268, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.36043415932828177, | |
| "grad_norm": 0.26057881116867065, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2999, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.36094613966823674, | |
| "grad_norm": 0.27014756202697754, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3083, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.36145812000819166, | |
| "grad_norm": 0.26150983572006226, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3059, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.36197010034814664, | |
| "grad_norm": 0.2634667158126831, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3325, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.36248208068810156, | |
| "grad_norm": 0.2591879665851593, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3004, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.36299406102805654, | |
| "grad_norm": 0.27941566705703735, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3216, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.36350604136801146, | |
| "grad_norm": 0.2634701430797577, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3043, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.36401802170796643, | |
| "grad_norm": 0.2601988613605499, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3128, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.36453000204792135, | |
| "grad_norm": 0.2701079249382019, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2908, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.36504198238787633, | |
| "grad_norm": 0.2694578170776367, | |
| "learning_rate": 0.0015, | |
| "loss": 1.303, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.36555396272783125, | |
| "grad_norm": 0.2465587705373764, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3177, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.36606594306778617, | |
| "grad_norm": 0.26136472821235657, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3112, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.36657792340774115, | |
| "grad_norm": 0.2548895478248596, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3114, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.36708990374769607, | |
| "grad_norm": 0.2586556673049927, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3076, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.36760188408765104, | |
| "grad_norm": 0.25887277722358704, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3217, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.36811386442760596, | |
| "grad_norm": 0.2628803253173828, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3012, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.36862584476756094, | |
| "grad_norm": 0.2630269527435303, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3187, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.36913782510751586, | |
| "grad_norm": 0.2589748501777649, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2885, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.36964980544747084, | |
| "grad_norm": 0.262361615896225, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2962, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.37016178578742576, | |
| "grad_norm": 0.24950037896633148, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3026, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.37067376612738073, | |
| "grad_norm": 0.2537461817264557, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2971, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.37118574646733565, | |
| "grad_norm": 0.25920331478118896, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2951, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.3716977268072906, | |
| "grad_norm": 0.2526357173919678, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2989, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.37220970714724555, | |
| "grad_norm": 0.28876397013664246, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3063, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.37272168748720047, | |
| "grad_norm": 0.27300864458084106, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2954, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.37323366782715545, | |
| "grad_norm": 0.26332223415374756, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3329, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.37374564816711037, | |
| "grad_norm": 0.26332515478134155, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2908, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.37425762850706534, | |
| "grad_norm": 0.2604503631591797, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3002, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.37476960884702026, | |
| "grad_norm": 0.25917840003967285, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2983, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.37528158918697524, | |
| "grad_norm": 0.26824817061424255, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3183, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.37579356952693016, | |
| "grad_norm": 0.2575696110725403, | |
| "learning_rate": 0.0015, | |
| "loss": 1.318, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.37630554986688514, | |
| "grad_norm": 0.2578194737434387, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2833, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.37681753020684006, | |
| "grad_norm": 0.2768312096595764, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2948, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.377329510546795, | |
| "grad_norm": 0.2382088154554367, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.37784149088674995, | |
| "grad_norm": 0.2637539803981781, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2792, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.3783534712267049, | |
| "grad_norm": 0.2832081615924835, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3097, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.37886545156665985, | |
| "grad_norm": 0.2672945261001587, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2989, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.37937743190661477, | |
| "grad_norm": 0.24696801602840424, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3174, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.37988941224656975, | |
| "grad_norm": 0.2638930082321167, | |
| "learning_rate": 0.0015, | |
| "loss": 1.295, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.38040139258652467, | |
| "grad_norm": 0.2714937925338745, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2917, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.38091337292647964, | |
| "grad_norm": 0.2469353824853897, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2919, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.38142535326643456, | |
| "grad_norm": 0.25035470724105835, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2896, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.38193733360638954, | |
| "grad_norm": 0.26178446412086487, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2891, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.38244931394634446, | |
| "grad_norm": 0.26942870020866394, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2723, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.3829612942862994, | |
| "grad_norm": 0.26943838596343994, | |
| "learning_rate": 0.0015, | |
| "loss": 1.284, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.38347327462625436, | |
| "grad_norm": 0.25865715742111206, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3063, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.3839852549662093, | |
| "grad_norm": 0.27455562353134155, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2988, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.38449723530616425, | |
| "grad_norm": 0.2636263370513916, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2739, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.3850092156461192, | |
| "grad_norm": 0.26559826731681824, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2958, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.38552119598607415, | |
| "grad_norm": 0.2592698335647583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2981, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.38603317632602907, | |
| "grad_norm": 0.25872740149497986, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3005, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.38654515666598405, | |
| "grad_norm": 0.26369425654411316, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3021, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.38705713700593897, | |
| "grad_norm": 0.25757378339767456, | |
| "learning_rate": 0.0015, | |
| "loss": 1.302, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.38756911734589394, | |
| "grad_norm": 0.27320241928100586, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2802, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.38808109768584886, | |
| "grad_norm": 0.2795805335044861, | |
| "learning_rate": 0.0015, | |
| "loss": 1.295, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.3885930780258038, | |
| "grad_norm": 0.26023516058921814, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2889, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.38910505836575876, | |
| "grad_norm": 0.2582970857620239, | |
| "learning_rate": 0.0015, | |
| "loss": 1.302, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.3896170387057137, | |
| "grad_norm": 0.2473934441804886, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3023, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.39012901904566866, | |
| "grad_norm": 0.2547856271266937, | |
| "learning_rate": 0.0015, | |
| "loss": 1.29, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.3906409993856236, | |
| "grad_norm": 0.26764586567878723, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2905, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.39115297972557855, | |
| "grad_norm": 0.2481442391872406, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3164, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.3916649600655335, | |
| "grad_norm": 0.25532233715057373, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2958, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.39217694040548845, | |
| "grad_norm": 0.24001578986644745, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2827, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.39268892074544337, | |
| "grad_norm": 0.2489776611328125, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2742, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.39320090108539835, | |
| "grad_norm": 0.23535743355751038, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2855, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.39371288142535327, | |
| "grad_norm": 0.25811052322387695, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2971, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.3942248617653082, | |
| "grad_norm": 0.24241647124290466, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2968, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.39473684210526316, | |
| "grad_norm": 0.25648635625839233, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2916, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.3952488224452181, | |
| "grad_norm": 0.2703993618488312, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2909, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.39576080278517306, | |
| "grad_norm": 0.2558510899543762, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2913, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.396272783125128, | |
| "grad_norm": 0.2394089698791504, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2968, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.39678476346508296, | |
| "grad_norm": 0.2338177114725113, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2894, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.3972967438050379, | |
| "grad_norm": 0.25422418117523193, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2958, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.39780872414499285, | |
| "grad_norm": 0.2437313348054886, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2878, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.3983207044849478, | |
| "grad_norm": 0.26623979210853577, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2915, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.39883268482490275, | |
| "grad_norm": 0.24698524177074432, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2949, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.39934466516485767, | |
| "grad_norm": 0.23496921360492706, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3069, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.3998566455048126, | |
| "grad_norm": 0.2393864393234253, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2913, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.40036862584476757, | |
| "grad_norm": 0.24716414511203766, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2829, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.4008806061847225, | |
| "grad_norm": 0.24985013902187347, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2773, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.40139258652467746, | |
| "grad_norm": 0.24895814061164856, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2889, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.4019045668646324, | |
| "grad_norm": 0.2497827261686325, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2747, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.40241654720458736, | |
| "grad_norm": 0.23879243433475494, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3071, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.4029285275445423, | |
| "grad_norm": 0.24402157962322235, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2924, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.40344050788449726, | |
| "grad_norm": 0.24736930429935455, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2643, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.4039524882244522, | |
| "grad_norm": 0.2525321841239929, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3014, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.40446446856440715, | |
| "grad_norm": 0.2575211226940155, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2625, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.4049764489043621, | |
| "grad_norm": 0.24405083060264587, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2834, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.405488429244317, | |
| "grad_norm": 0.28250402212142944, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2814, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.40600040958427197, | |
| "grad_norm": 0.2795003056526184, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3154, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.4065123899242269, | |
| "grad_norm": 0.24883300065994263, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2887, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.40702437026418187, | |
| "grad_norm": 0.2502342164516449, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3033, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.4075363506041368, | |
| "grad_norm": 0.24973638355731964, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2947, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.40804833094409176, | |
| "grad_norm": 0.24371185898780823, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2908, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.4085603112840467, | |
| "grad_norm": 0.24570930004119873, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2879, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.40907229162400166, | |
| "grad_norm": 0.23717066645622253, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2928, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.4095842719639566, | |
| "grad_norm": 0.24726137518882751, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2915, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.41009625230391156, | |
| "grad_norm": 0.2352866679430008, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2817, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.4106082326438665, | |
| "grad_norm": 0.251365065574646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2979, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.4111202129838214, | |
| "grad_norm": 0.22410385310649872, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2749, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.4116321933237764, | |
| "grad_norm": 0.25029605627059937, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2862, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.4121441736637313, | |
| "grad_norm": 0.25629550218582153, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2749, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.41265615400368627, | |
| "grad_norm": 0.23836827278137207, | |
| "learning_rate": 0.0015, | |
| "loss": 1.28, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.4131681343436412, | |
| "grad_norm": 0.23752672970294952, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2916, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.41368011468359617, | |
| "grad_norm": 0.26047077775001526, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2718, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.4141920950235511, | |
| "grad_norm": 0.24297983944416046, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2961, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.41470407536350606, | |
| "grad_norm": 0.24528458714485168, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2591, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.415216055703461, | |
| "grad_norm": 0.24459367990493774, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2754, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.41572803604341596, | |
| "grad_norm": 0.24630287289619446, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2864, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.4162400163833709, | |
| "grad_norm": 0.2514908015727997, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2847, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.4167519967233258, | |
| "grad_norm": 0.227911576628685, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2798, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.4172639770632808, | |
| "grad_norm": 0.2512179911136627, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2817, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.4177759574032357, | |
| "grad_norm": 0.24971604347229004, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2856, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.4182879377431907, | |
| "grad_norm": 0.24980546534061432, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2932, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.4187999180831456, | |
| "grad_norm": 0.2510388493537903, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2849, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.41931189842310057, | |
| "grad_norm": 0.23916485905647278, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2787, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.4198238787630555, | |
| "grad_norm": 0.2525003254413605, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2856, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.42033585910301047, | |
| "grad_norm": 0.25865113735198975, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2473, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.4208478394429654, | |
| "grad_norm": 0.24689891934394836, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2663, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.4213598197829203, | |
| "grad_norm": 0.2257513701915741, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2576, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.4218718001228753, | |
| "grad_norm": 0.2339119166135788, | |
| "learning_rate": 0.0015, | |
| "loss": 1.3053, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.4223837804628302, | |
| "grad_norm": 0.2590661942958832, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2698, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.4228957608027852, | |
| "grad_norm": 0.2483995407819748, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2728, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.4234077411427401, | |
| "grad_norm": 0.23534591495990753, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2867, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.4239197214826951, | |
| "grad_norm": 0.22678501904010773, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2775, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.42443170182265, | |
| "grad_norm": 0.2298179715871811, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2866, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.424943682162605, | |
| "grad_norm": 0.2495158165693283, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2762, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.4254556625025599, | |
| "grad_norm": 0.22808024287223816, | |
| "learning_rate": 0.0015, | |
| "loss": 1.269, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.42596764284251487, | |
| "grad_norm": 0.24249188601970673, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2881, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.4264796231824698, | |
| "grad_norm": 0.2539406418800354, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2618, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.4269916035224247, | |
| "grad_norm": 0.2367791384458542, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2762, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.4275035838623797, | |
| "grad_norm": 0.2301592379808426, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2724, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.4280155642023346, | |
| "grad_norm": 0.24136430025100708, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2629, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.4285275445422896, | |
| "grad_norm": 0.23719066381454468, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2624, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.4290395248822445, | |
| "grad_norm": 0.2514694631099701, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2686, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.4295515052221995, | |
| "grad_norm": 0.24186182022094727, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2823, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.4300634855621544, | |
| "grad_norm": 0.23494115471839905, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2534, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.4305754659021094, | |
| "grad_norm": 0.2518327534198761, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2913, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.4310874462420643, | |
| "grad_norm": 0.23622803390026093, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2652, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.4315994265820193, | |
| "grad_norm": 0.22990188002586365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.277, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.4321114069219742, | |
| "grad_norm": 0.23679761588573456, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2839, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.4326233872619291, | |
| "grad_norm": 0.25512683391571045, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2818, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.4331353676018841, | |
| "grad_norm": 0.24284730851650238, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2882, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.433647347941839, | |
| "grad_norm": 0.24152646958827972, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2727, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.434159328281794, | |
| "grad_norm": 0.24133774638175964, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2743, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.4346713086217489, | |
| "grad_norm": 0.23270800709724426, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2651, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.4351832889617039, | |
| "grad_norm": 0.2446971833705902, | |
| "learning_rate": 0.0015, | |
| "loss": 1.268, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.4356952693016588, | |
| "grad_norm": 0.23358875513076782, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2774, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.4362072496416138, | |
| "grad_norm": 0.22265927493572235, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2602, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.4367192299815687, | |
| "grad_norm": 0.22781646251678467, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2724, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.4372312103215237, | |
| "grad_norm": 0.23868761956691742, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2581, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.4377431906614786, | |
| "grad_norm": 0.2235594540834427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2741, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.4382551710014335, | |
| "grad_norm": 0.2419920712709427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2765, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.4387671513413885, | |
| "grad_norm": 0.27400338649749756, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2635, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.4392791316813434, | |
| "grad_norm": 0.23386618494987488, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2806, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.4397911120212984, | |
| "grad_norm": 0.24642907083034515, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2739, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.4403030923612533, | |
| "grad_norm": 0.2347201406955719, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2581, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.4408150727012083, | |
| "grad_norm": 0.22591201961040497, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2882, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.4413270530411632, | |
| "grad_norm": 0.2508542537689209, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2699, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.4418390333811182, | |
| "grad_norm": 0.2366652637720108, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2522, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.4423510137210731, | |
| "grad_norm": 0.22938509285449982, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2676, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.4428629940610281, | |
| "grad_norm": 0.22820281982421875, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2712, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.443374974400983, | |
| "grad_norm": 0.22258944809436798, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2721, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.4438869547409379, | |
| "grad_norm": 0.23942533135414124, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2659, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.4443989350808929, | |
| "grad_norm": 0.23312713205814362, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2755, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.4449109154208478, | |
| "grad_norm": 0.2283553183078766, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2537, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.4454228957608028, | |
| "grad_norm": 0.23631595075130463, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2487, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.4459348761007577, | |
| "grad_norm": 0.2447190135717392, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2529, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.4464468564407127, | |
| "grad_norm": 0.24584966897964478, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2738, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.4469588367806676, | |
| "grad_norm": 0.2374550849199295, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2791, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.4474708171206226, | |
| "grad_norm": 0.240436390042305, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2518, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.4479827974605775, | |
| "grad_norm": 0.23341523110866547, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2688, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.4484947778005325, | |
| "grad_norm": 0.24230003356933594, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2379, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.4490067581404874, | |
| "grad_norm": 0.2401583343744278, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2699, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.4495187384804423, | |
| "grad_norm": 0.22647708654403687, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2656, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.4500307188203973, | |
| "grad_norm": 0.24045558273792267, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2531, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.4505426991603522, | |
| "grad_norm": 0.2597295045852661, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2568, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.4510546795003072, | |
| "grad_norm": 0.22485364973545074, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2478, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.4515666598402621, | |
| "grad_norm": 0.23133698105812073, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2688, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.4520786401802171, | |
| "grad_norm": 0.22866465151309967, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2516, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.452590620520172, | |
| "grad_norm": 0.2258300632238388, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2571, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.453102600860127, | |
| "grad_norm": 0.23454922437667847, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2413, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.4536145812000819, | |
| "grad_norm": 0.22673968970775604, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2504, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.4541265615400369, | |
| "grad_norm": 0.24363909661769867, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2511, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.4546385418799918, | |
| "grad_norm": 0.25056564807891846, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2423, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.45515052221994673, | |
| "grad_norm": 0.2318125218153, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2753, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.4556625025599017, | |
| "grad_norm": 0.22525230050086975, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2389, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.4561744828998566, | |
| "grad_norm": 0.23389683663845062, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2457, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.4566864632398116, | |
| "grad_norm": 0.23282834887504578, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2628, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.4571984435797665, | |
| "grad_norm": 0.24000655114650726, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2637, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.4577104239197215, | |
| "grad_norm": 0.22707650065422058, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2651, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.4582224042596764, | |
| "grad_norm": 0.24544113874435425, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2597, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.4587343845996314, | |
| "grad_norm": 0.2471536099910736, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2583, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.4592463649395863, | |
| "grad_norm": 0.2399998903274536, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2587, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.4597583452795413, | |
| "grad_norm": 0.239053875207901, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2604, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.4602703256194962, | |
| "grad_norm": 0.23578478395938873, | |
| "learning_rate": 0.0015, | |
| "loss": 1.251, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.46078230595945113, | |
| "grad_norm": 0.22768492996692657, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2584, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4612942862994061, | |
| "grad_norm": 0.2407897710800171, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2551, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.46180626663936103, | |
| "grad_norm": 0.24113765358924866, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2686, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.462318246979316, | |
| "grad_norm": 0.23086939752101898, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2521, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.4628302273192709, | |
| "grad_norm": 0.2428579032421112, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2539, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.4633422076592259, | |
| "grad_norm": 0.23166462779045105, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2452, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.4638541879991808, | |
| "grad_norm": 0.23648124933242798, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2522, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.4643661683391358, | |
| "grad_norm": 0.23984448611736298, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2556, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.4648781486790907, | |
| "grad_norm": 0.22623547911643982, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2496, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.4653901290190457, | |
| "grad_norm": 0.23154547810554504, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2688, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.4659021093590006, | |
| "grad_norm": 0.24457304179668427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2457, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.46641408969895554, | |
| "grad_norm": 0.22743169963359833, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2533, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.4669260700389105, | |
| "grad_norm": 0.23356840014457703, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2529, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.46743805037886543, | |
| "grad_norm": 0.23355025053024292, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2595, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 0.4679500307188204, | |
| "grad_norm": 0.21895302832126617, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2613, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.46846201105877533, | |
| "grad_norm": 0.23437921702861786, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2631, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.4689739913987303, | |
| "grad_norm": 0.22628231346607208, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2634, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.4694859717386852, | |
| "grad_norm": 0.2286689728498459, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2412, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 0.4699979520786402, | |
| "grad_norm": 0.21830707788467407, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2714, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.4705099324185951, | |
| "grad_norm": 0.2502080500125885, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2419, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 0.4710219127585501, | |
| "grad_norm": 0.21958868205547333, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2406, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.471533893098505, | |
| "grad_norm": 0.22988547384738922, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2802, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.47204587343845994, | |
| "grad_norm": 0.22131182253360748, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2496, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.4725578537784149, | |
| "grad_norm": 0.24254952371120453, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2702, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 0.47306983411836984, | |
| "grad_norm": 0.22780196368694305, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2452, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.4735818144583248, | |
| "grad_norm": 0.22993087768554688, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2475, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.47409379479827973, | |
| "grad_norm": 0.21792259812355042, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2532, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.4746057751382347, | |
| "grad_norm": 0.22392146289348602, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2451, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.47511775547818963, | |
| "grad_norm": 0.24879144132137299, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2492, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.4756297358181446, | |
| "grad_norm": 0.21757066249847412, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2508, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 0.4761417161580995, | |
| "grad_norm": 0.23313356935977936, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2532, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.4766536964980545, | |
| "grad_norm": 0.25208523869514465, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2286, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 0.4771656768380094, | |
| "grad_norm": 0.2262171059846878, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2398, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.47767765717796434, | |
| "grad_norm": 0.2252594530582428, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2525, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.4781896375179193, | |
| "grad_norm": 0.2281142771244049, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2453, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.47870161785787424, | |
| "grad_norm": 0.22341011464595795, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2628, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.4792135981978292, | |
| "grad_norm": 0.22117526829242706, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2597, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.47972557853778414, | |
| "grad_norm": 0.2359929233789444, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2504, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 0.4802375588777391, | |
| "grad_norm": 0.2348971962928772, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2352, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.48074953921769403, | |
| "grad_norm": 0.23461927473545074, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2383, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.481261519557649, | |
| "grad_norm": 0.2463158220052719, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2329, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.48177349989760393, | |
| "grad_norm": 0.240493506193161, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2614, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 0.48228548023755885, | |
| "grad_norm": 0.22357292473316193, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2553, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.4827974605775138, | |
| "grad_norm": 0.2223501205444336, | |
| "learning_rate": 0.0015, | |
| "loss": 1.245, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 0.48330944091746875, | |
| "grad_norm": 0.2278713434934616, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2544, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.4838214212574237, | |
| "grad_norm": 0.23052051663398743, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2614, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.48433340159737864, | |
| "grad_norm": 0.22685429453849792, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2613, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.4848453819373336, | |
| "grad_norm": 0.22306014597415924, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2289, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 0.48535736227728854, | |
| "grad_norm": 0.22385765612125397, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2452, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.4858693426172435, | |
| "grad_norm": 0.22245322167873383, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2541, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 0.48638132295719844, | |
| "grad_norm": 0.2279806137084961, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2557, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.4868933032971534, | |
| "grad_norm": 0.2449760138988495, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2358, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.48740528363710833, | |
| "grad_norm": 0.22621648013591766, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2466, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.48791726397706325, | |
| "grad_norm": 0.22223225235939026, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2522, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 0.48842924431701823, | |
| "grad_norm": 0.23512163758277893, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2542, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.48894122465697315, | |
| "grad_norm": 0.21729685366153717, | |
| "learning_rate": 0.0015, | |
| "loss": 1.224, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.4894532049969281, | |
| "grad_norm": 0.22177568078041077, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2624, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.48996518533688305, | |
| "grad_norm": 0.22674211859703064, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2191, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.490477165676838, | |
| "grad_norm": 0.25243934988975525, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2327, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.49098914601679294, | |
| "grad_norm": 0.22206014394760132, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2369, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 0.4915011263567479, | |
| "grad_norm": 0.21915268898010254, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2475, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.49201310669670284, | |
| "grad_norm": 0.219084694981575, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2469, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.4925250870366578, | |
| "grad_norm": 0.21210044622421265, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2385, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.49303706737661274, | |
| "grad_norm": 0.22252093255519867, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2652, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.49354904771656766, | |
| "grad_norm": 0.2407660186290741, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2436, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.49406102805652263, | |
| "grad_norm": 0.22691743075847626, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2254, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.49457300839647755, | |
| "grad_norm": 0.23666201531887054, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2297, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.49508498873643253, | |
| "grad_norm": 0.21549946069717407, | |
| "learning_rate": 0.0015, | |
| "loss": 1.238, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 0.49559696907638745, | |
| "grad_norm": 0.22083760797977448, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2531, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.4961089494163424, | |
| "grad_norm": 0.23391181230545044, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1973, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.49662092975629735, | |
| "grad_norm": 0.21990463137626648, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2357, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.4971329100962523, | |
| "grad_norm": 0.22842243313789368, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2566, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 0.49764489043620724, | |
| "grad_norm": 0.2154964953660965, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2489, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.4981568707761622, | |
| "grad_norm": 0.23381535708904266, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2379, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 0.49866885111611714, | |
| "grad_norm": 0.23405200242996216, | |
| "learning_rate": 0.0015, | |
| "loss": 1.251, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.49918083145607206, | |
| "grad_norm": 0.24905334413051605, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2247, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.49969281179602704, | |
| "grad_norm": 0.22687901556491852, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2362, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.500204792135982, | |
| "grad_norm": 0.21950958669185638, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2304, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 0.5007167724759369, | |
| "grad_norm": 0.24343635141849518, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2313, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.5012287528158919, | |
| "grad_norm": 0.2238016575574875, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2504, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 0.5017407331558468, | |
| "grad_norm": 0.22162608802318573, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2242, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.5022527134958018, | |
| "grad_norm": 0.2090781331062317, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2214, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.5027646938357567, | |
| "grad_norm": 0.23861265182495117, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2554, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.5032766741757116, | |
| "grad_norm": 0.24569468200206757, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2525, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 0.5037886545156666, | |
| "grad_norm": 0.22713309526443481, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2513, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.5043006348556216, | |
| "grad_norm": 0.22980822622776031, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2493, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.5048126151955765, | |
| "grad_norm": 0.23609554767608643, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2366, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.5053245955355314, | |
| "grad_norm": 0.2115827053785324, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2558, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.5058365758754864, | |
| "grad_norm": 0.20506598055362701, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2421, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.5063485562154413, | |
| "grad_norm": 0.21842671930789948, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2328, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 0.5068605365553963, | |
| "grad_norm": 0.2390349954366684, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2494, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.5073725168953512, | |
| "grad_norm": 0.21842844784259796, | |
| "learning_rate": 0.0015, | |
| "loss": 1.243, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 0.5078844972353062, | |
| "grad_norm": 0.21210695803165436, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2438, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.5083964775752611, | |
| "grad_norm": 0.21826642751693726, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2402, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.5089084579152161, | |
| "grad_norm": 0.21249307692050934, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2168, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.509420438255171, | |
| "grad_norm": 0.22593854367733002, | |
| "learning_rate": 0.0015, | |
| "loss": 1.222, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.509932418595126, | |
| "grad_norm": 0.22972868382930756, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2577, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.5104443989350809, | |
| "grad_norm": 0.21808108687400818, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2301, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 0.5109563792750358, | |
| "grad_norm": 0.21525093913078308, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2412, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.5114683596149908, | |
| "grad_norm": 0.22222475707530975, | |
| "learning_rate": 0.0015, | |
| "loss": 1.237, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.5119803399549457, | |
| "grad_norm": 0.23491185903549194, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2436, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5124923202949007, | |
| "grad_norm": 0.23327389359474182, | |
| "learning_rate": 0.0015, | |
| "loss": 1.223, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 0.5130043006348556, | |
| "grad_norm": 0.21225926280021667, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2215, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.5135162809748106, | |
| "grad_norm": 0.21181495487689972, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2297, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 0.5140282613147655, | |
| "grad_norm": 0.21177121996879578, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2228, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.5145402416547205, | |
| "grad_norm": 0.22206859290599823, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2579, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.5150522219946754, | |
| "grad_norm": 0.21502964198589325, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2298, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.5155642023346303, | |
| "grad_norm": 0.22302408516407013, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2226, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 0.5160761826745853, | |
| "grad_norm": 0.21490171551704407, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2554, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.5165881630145402, | |
| "grad_norm": 0.22137999534606934, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2189, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 0.5171001433544952, | |
| "grad_norm": 0.21363165974617004, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2533, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.5176121236944501, | |
| "grad_norm": 0.23033399879932404, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2406, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 0.5181241040344051, | |
| "grad_norm": 0.22692923247814178, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2294, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.51863608437436, | |
| "grad_norm": 0.23053601384162903, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2351, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 0.519148064714315, | |
| "grad_norm": 0.21180744469165802, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2518, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.5196600450542699, | |
| "grad_norm": 0.2388363927602768, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2188, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.5201720253942249, | |
| "grad_norm": 0.22531351447105408, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2242, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.5206840057341798, | |
| "grad_norm": 0.2166026532649994, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2122, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 0.5211959860741348, | |
| "grad_norm": 0.23231609165668488, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2078, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.5217079664140897, | |
| "grad_norm": 0.2189248949289322, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2392, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 0.5222199467540446, | |
| "grad_norm": 0.21036341786384583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2325, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.5227319270939996, | |
| "grad_norm": 0.21162335574626923, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2348, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 0.5232439074339545, | |
| "grad_norm": 0.21558861434459686, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2343, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.5237558877739095, | |
| "grad_norm": 0.22100234031677246, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2373, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.5242678681138644, | |
| "grad_norm": 0.225110724568367, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2368, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.5247798484538194, | |
| "grad_norm": 0.21674303710460663, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2365, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.5252918287937743, | |
| "grad_norm": 0.23076364398002625, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2202, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.5258038091337293, | |
| "grad_norm": 0.23180685937404633, | |
| "learning_rate": 0.0015, | |
| "loss": 1.234, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.21580268442630768, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2372, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.5268277698136391, | |
| "grad_norm": 0.2099384069442749, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2118, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 0.5273397501535941, | |
| "grad_norm": 0.23586790263652802, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2482, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.527851730493549, | |
| "grad_norm": 0.2149907946586609, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2469, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 0.528363710833504, | |
| "grad_norm": 0.21271546185016632, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2325, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.5288756911734589, | |
| "grad_norm": 0.20998185873031616, | |
| "learning_rate": 0.0015, | |
| "loss": 1.247, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 0.5293876715134139, | |
| "grad_norm": 0.23234112560749054, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2395, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.5298996518533688, | |
| "grad_norm": 0.2261328250169754, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2244, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.5304116321933238, | |
| "grad_norm": 0.2102995663881302, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2307, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.5309236125332787, | |
| "grad_norm": 0.21107365190982819, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2195, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 0.5314355928732337, | |
| "grad_norm": 0.2249820977449417, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2499, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.5319475732131886, | |
| "grad_norm": 0.2142641544342041, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2329, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 0.5324595535531436, | |
| "grad_norm": 0.2172004133462906, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2098, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.5329715338930985, | |
| "grad_norm": 0.19984416663646698, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2135, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 0.5334835142330534, | |
| "grad_norm": 0.22618216276168823, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2173, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.5339954945730084, | |
| "grad_norm": 0.22356146574020386, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2423, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 0.5345074749129634, | |
| "grad_norm": 0.2300511598587036, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2308, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.5350194552529183, | |
| "grad_norm": 0.22442519664764404, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2435, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.5355314355928732, | |
| "grad_norm": 0.21556325256824493, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2499, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.5360434159328282, | |
| "grad_norm": 0.21608006954193115, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2367, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 0.5365553962727831, | |
| "grad_norm": 0.22256320714950562, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2325, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.5370673766127381, | |
| "grad_norm": 0.22661398351192474, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2253, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 0.537579356952693, | |
| "grad_norm": 0.21327906847000122, | |
| "learning_rate": 0.0015, | |
| "loss": 1.215, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.5380913372926479, | |
| "grad_norm": 0.21695594489574432, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2372, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 0.5386033176326029, | |
| "grad_norm": 0.20584948360919952, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2491, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.5391152979725579, | |
| "grad_norm": 0.2212359756231308, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2415, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 0.5396272783125128, | |
| "grad_norm": 0.2696838974952698, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2254, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.5401392586524677, | |
| "grad_norm": 0.21417804062366486, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2307, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.5406512389924227, | |
| "grad_norm": 0.2126997709274292, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2134, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.5411632193323777, | |
| "grad_norm": 0.21690891683101654, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2136, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 0.5416751996723326, | |
| "grad_norm": 0.21153941750526428, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2157, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.5421871800122875, | |
| "grad_norm": 0.21089473366737366, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2272, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 0.5426991603522425, | |
| "grad_norm": 0.2564721703529358, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2026, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.5432111406921974, | |
| "grad_norm": 0.2235645204782486, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2373, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 0.5437231210321524, | |
| "grad_norm": 0.21624423563480377, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2208, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.5442351013721073, | |
| "grad_norm": 0.22423268854618073, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2246, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 0.5447470817120622, | |
| "grad_norm": 0.20781590044498444, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2197, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.5452590620520172, | |
| "grad_norm": 0.21837033331394196, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2195, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.5457710423919722, | |
| "grad_norm": 0.23481489717960358, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2221, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.5462830227319271, | |
| "grad_norm": 0.20522017776966095, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2119, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 0.546795003071882, | |
| "grad_norm": 0.24082933366298676, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2115, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.547306983411837, | |
| "grad_norm": 0.21289277076721191, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2386, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 0.547818963751792, | |
| "grad_norm": 0.21003836393356323, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2107, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.5483309440917469, | |
| "grad_norm": 0.21242666244506836, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2429, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 0.5488429244317018, | |
| "grad_norm": 0.2271721065044403, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2314, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.5493549047716567, | |
| "grad_norm": 0.21104945242404938, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2342, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 0.5498668851116117, | |
| "grad_norm": 0.2085346132516861, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2271, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.5503788654515667, | |
| "grad_norm": 0.22231942415237427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2306, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.5508908457915216, | |
| "grad_norm": 0.21245570480823517, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2258, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.5514028261314765, | |
| "grad_norm": 0.19826675951480865, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2163, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 0.5519148064714315, | |
| "grad_norm": 0.22163072228431702, | |
| "learning_rate": 0.0015, | |
| "loss": 1.229, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.5524267868113865, | |
| "grad_norm": 0.21903766691684723, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2139, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 0.5529387671513414, | |
| "grad_norm": 0.2075222283601761, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2129, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.5534507474912963, | |
| "grad_norm": 0.21938522160053253, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2232, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 0.5539627278312513, | |
| "grad_norm": 0.21770595014095306, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2465, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.5544747081712063, | |
| "grad_norm": 0.20712700486183167, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2183, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 0.5549866885111612, | |
| "grad_norm": 0.22477000951766968, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2186, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.5554986688511161, | |
| "grad_norm": 0.21939463913440704, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2355, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.556010649191071, | |
| "grad_norm": 0.2524956464767456, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2092, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.556522629531026, | |
| "grad_norm": 0.2115110456943512, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2137, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 0.557034609870981, | |
| "grad_norm": 0.20509475469589233, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2234, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.5575465902109359, | |
| "grad_norm": 0.21247826516628265, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2234, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 0.5580585705508908, | |
| "grad_norm": 0.21064293384552002, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2289, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.5585705508908458, | |
| "grad_norm": 0.21902692317962646, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2085, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 0.5590825312308008, | |
| "grad_norm": 0.21347709000110626, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2151, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.5595945115707557, | |
| "grad_norm": 0.20034797489643097, | |
| "learning_rate": 0.0015, | |
| "loss": 1.218, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 0.5601064919107106, | |
| "grad_norm": 0.20223546028137207, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2176, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.5606184722506655, | |
| "grad_norm": 0.23771893978118896, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2297, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.5611304525906206, | |
| "grad_norm": 0.24617038667201996, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2331, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.5616424329305755, | |
| "grad_norm": 0.2169172167778015, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2319, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 0.5621544132705304, | |
| "grad_norm": 0.21281367540359497, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2205, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.5626663936104853, | |
| "grad_norm": 0.21705804765224457, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2138, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 0.5631783739504403, | |
| "grad_norm": 0.19822140038013458, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2339, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5636903542903953, | |
| "grad_norm": 0.20427508652210236, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2195, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 0.5642023346303502, | |
| "grad_norm": 0.2140669971704483, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1975, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.5647143149703051, | |
| "grad_norm": 0.20858561992645264, | |
| "learning_rate": 0.0015, | |
| "loss": 1.208, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 0.5652262953102601, | |
| "grad_norm": 0.21723324060440063, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2193, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.5657382756502151, | |
| "grad_norm": 0.21611307561397552, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2199, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.56625025599017, | |
| "grad_norm": 0.21373584866523743, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2065, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.5667622363301249, | |
| "grad_norm": 0.2058737874031067, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2019, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 0.5672742166700798, | |
| "grad_norm": 0.22086186707019806, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2108, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.5677861970100349, | |
| "grad_norm": 0.21599149703979492, | |
| "learning_rate": 0.0015, | |
| "loss": 1.209, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 0.5682981773499898, | |
| "grad_norm": 0.22241829335689545, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2054, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.5688101576899447, | |
| "grad_norm": 0.19618919491767883, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2293, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 0.5693221380298996, | |
| "grad_norm": 0.19986511766910553, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1945, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.5698341183698546, | |
| "grad_norm": 0.20131878554821014, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2082, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 0.5703460987098096, | |
| "grad_norm": 0.20655354857444763, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2111, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.5708580790497645, | |
| "grad_norm": 0.2156609296798706, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2288, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.5713700593897194, | |
| "grad_norm": 0.20367379486560822, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2229, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.5718820397296743, | |
| "grad_norm": 0.20256848633289337, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2236, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 0.5723940200696294, | |
| "grad_norm": 0.20862998068332672, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2153, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.5729060004095843, | |
| "grad_norm": 0.21000482141971588, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2164, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 0.5734179807495392, | |
| "grad_norm": 0.21778449416160583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2221, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.5739299610894941, | |
| "grad_norm": 0.20954222977161407, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2257, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 0.5744419414294492, | |
| "grad_norm": 0.21105293929576874, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2218, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.5749539217694041, | |
| "grad_norm": 0.2167726457118988, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2193, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 0.575465902109359, | |
| "grad_norm": 0.20207858085632324, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2243, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.5759778824493139, | |
| "grad_norm": 0.21475255489349365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2222, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.576489862789269, | |
| "grad_norm": 0.22506240010261536, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2255, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.5770018431292239, | |
| "grad_norm": 0.23033161461353302, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2287, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 0.5775138234691788, | |
| "grad_norm": 0.20455433428287506, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2141, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.5780258038091337, | |
| "grad_norm": 0.22457818686962128, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2329, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 0.5785377841490886, | |
| "grad_norm": 0.2011692076921463, | |
| "learning_rate": 0.0015, | |
| "loss": 1.213, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.5790497644890437, | |
| "grad_norm": 0.20488318800926208, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2224, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.5795617448289986, | |
| "grad_norm": 0.22065885365009308, | |
| "learning_rate": 0.0015, | |
| "loss": 1.231, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.5800737251689535, | |
| "grad_norm": 0.20532485842704773, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2051, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 0.5805857055089084, | |
| "grad_norm": 0.20642031729221344, | |
| "learning_rate": 0.0015, | |
| "loss": 1.215, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.5810976858488635, | |
| "grad_norm": 0.20660312473773956, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2191, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.5816096661888184, | |
| "grad_norm": 0.21046073734760284, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2142, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.5821216465287733, | |
| "grad_norm": 0.21846343576908112, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2205, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 0.5826336268687282, | |
| "grad_norm": 0.20589517056941986, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2057, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.5831456072086831, | |
| "grad_norm": 0.20691034197807312, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2064, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 0.5836575875486382, | |
| "grad_norm": 0.21649305522441864, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2032, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.5841695678885931, | |
| "grad_norm": 0.2329801321029663, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2196, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 0.584681548228548, | |
| "grad_norm": 0.23256272077560425, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2124, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.5851935285685029, | |
| "grad_norm": 0.2036832720041275, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2098, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 0.585705508908458, | |
| "grad_norm": 0.21199576556682587, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2266, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.5862174892484129, | |
| "grad_norm": 0.2015303373336792, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1916, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.5867294695883678, | |
| "grad_norm": 0.2176617681980133, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1888, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.5872414499283227, | |
| "grad_norm": 0.21515142917633057, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2096, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 0.5877534302682776, | |
| "grad_norm": 0.21731404960155487, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2077, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.5882654106082327, | |
| "grad_norm": 0.20664644241333008, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2027, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 0.5887773909481876, | |
| "grad_norm": 0.20170624554157257, | |
| "learning_rate": 0.0015, | |
| "loss": 1.233, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5892893712881425, | |
| "grad_norm": 0.2092912346124649, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2004, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 0.5898013516280974, | |
| "grad_norm": 0.204396590590477, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2052, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.5903133319680525, | |
| "grad_norm": 0.2075720578432083, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2042, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 0.5908253123080074, | |
| "grad_norm": 0.19743815064430237, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1974, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.5913372926479623, | |
| "grad_norm": 0.19972637295722961, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2021, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.5918492729879172, | |
| "grad_norm": 0.20364214479923248, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2149, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.5923612533278723, | |
| "grad_norm": 0.20440620183944702, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1855, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 0.5928732336678272, | |
| "grad_norm": 0.21338412165641785, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2022, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.5933852140077821, | |
| "grad_norm": 0.2067076861858368, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2109, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 0.593897194347737, | |
| "grad_norm": 0.20598556101322174, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2132, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.5944091746876919, | |
| "grad_norm": 0.21331733465194702, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2021, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 0.594921155027647, | |
| "grad_norm": 0.23132279515266418, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1954, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.5954331353676019, | |
| "grad_norm": 0.2226603478193283, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2055, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 0.5959451157075568, | |
| "grad_norm": 0.19999723136425018, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1961, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.5964570960475117, | |
| "grad_norm": 0.19226787984371185, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2056, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.5969690763874668, | |
| "grad_norm": 0.20891976356506348, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2023, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.5974810567274217, | |
| "grad_norm": 0.19218876957893372, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2027, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 0.5979930370673766, | |
| "grad_norm": 0.20928075909614563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2176, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.5985050174073315, | |
| "grad_norm": 0.204718217253685, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2014, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 0.5990169977472865, | |
| "grad_norm": 0.22869887948036194, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1888, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.5995289780872415, | |
| "grad_norm": 0.19692908227443695, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2161, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 0.6000409584271964, | |
| "grad_norm": 0.2099919617176056, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1968, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.6005529387671513, | |
| "grad_norm": 0.20044675469398499, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2071, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 0.6010649191071062, | |
| "grad_norm": 0.20645897090435028, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2142, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.6015768994470613, | |
| "grad_norm": 0.20446518063545227, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1907, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.6020888797870162, | |
| "grad_norm": 0.19793803989887238, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2237, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.6026008601269711, | |
| "grad_norm": 0.23807552456855774, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2072, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 0.603112840466926, | |
| "grad_norm": 0.20290285348892212, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2048, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.6036248208068811, | |
| "grad_norm": 0.21725532412528992, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1961, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 0.604136801146836, | |
| "grad_norm": 0.20467454195022583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2301, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.6046487814867909, | |
| "grad_norm": 0.20618268847465515, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2026, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 0.6051607618267458, | |
| "grad_norm": 0.2097761183977127, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1992, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.6056727421667008, | |
| "grad_norm": 0.21861404180526733, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2047, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 0.6061847225066558, | |
| "grad_norm": 0.2066473513841629, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2022, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.6066967028466107, | |
| "grad_norm": 0.203571155667305, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1729, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.6072086831865656, | |
| "grad_norm": 0.20523090660572052, | |
| "learning_rate": 0.0015, | |
| "loss": 1.222, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.6077206635265205, | |
| "grad_norm": 0.2021731734275818, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1983, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 0.6082326438664756, | |
| "grad_norm": 0.20643019676208496, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2147, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.6087446242064305, | |
| "grad_norm": 0.21817174553871155, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1988, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 0.6092566045463854, | |
| "grad_norm": 0.21849657595157623, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1908, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.6097685848863403, | |
| "grad_norm": 0.21117383241653442, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2318, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 0.6102805652262953, | |
| "grad_norm": 0.2120293378829956, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2071, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.6107925455662503, | |
| "grad_norm": 0.20229868590831757, | |
| "learning_rate": 0.0015, | |
| "loss": 1.191, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 0.6113045259062052, | |
| "grad_norm": 0.19626636803150177, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2172, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.6118165062461601, | |
| "grad_norm": 0.21968694031238556, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1901, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.612328486586115, | |
| "grad_norm": 0.22982917726039886, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2023, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.6128404669260701, | |
| "grad_norm": 0.20328094065189362, | |
| "learning_rate": 0.0015, | |
| "loss": 1.193, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 0.613352447266025, | |
| "grad_norm": 0.20781250298023224, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1871, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.6138644276059799, | |
| "grad_norm": 0.1945171356201172, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1954, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 0.6143764079459348, | |
| "grad_norm": 0.2018270492553711, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1848, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6148883882858899, | |
| "grad_norm": 0.20180918276309967, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2081, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 0.6154003686258448, | |
| "grad_norm": 0.20221208035945892, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2076, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.6159123489657997, | |
| "grad_norm": 0.2013401836156845, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2211, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 0.6164243293057546, | |
| "grad_norm": 0.20016033947467804, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2037, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.6169363096457096, | |
| "grad_norm": 0.20722372829914093, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2052, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.6174482899856646, | |
| "grad_norm": 0.21285022795200348, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2066, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.6179602703256195, | |
| "grad_norm": 0.21281997859477997, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1955, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 0.6184722506655744, | |
| "grad_norm": 0.19675594568252563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2088, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.6189842310055294, | |
| "grad_norm": 0.21459296345710754, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2255, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.6194962113454844, | |
| "grad_norm": 0.20511606335639954, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.6200081916854393, | |
| "grad_norm": 0.20228254795074463, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1906, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 0.6205201720253942, | |
| "grad_norm": 0.1966087371110916, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1771, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.6210321523653491, | |
| "grad_norm": 0.2050897479057312, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1931, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 0.6215441327053041, | |
| "grad_norm": 0.20761296153068542, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1796, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.6220561130452591, | |
| "grad_norm": 0.19282642006874084, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2022, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.622568093385214, | |
| "grad_norm": 0.2018144577741623, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2151, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.6230800737251689, | |
| "grad_norm": 0.19583159685134888, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2027, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 0.6235920540651239, | |
| "grad_norm": 0.22334228456020355, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2158, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.6241040344050789, | |
| "grad_norm": 0.2306404560804367, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1856, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 0.6246160147450338, | |
| "grad_norm": 0.21355292201042175, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1723, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.6251279950849887, | |
| "grad_norm": 0.19845044612884521, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2052, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 0.6256399754249437, | |
| "grad_norm": 0.2062026709318161, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2093, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.6261519557648987, | |
| "grad_norm": 0.20521892607212067, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1888, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 0.6266639361048536, | |
| "grad_norm": 0.20746907591819763, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2038, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.6271759164448085, | |
| "grad_norm": 0.19719459116458893, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1995, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.6276878967847634, | |
| "grad_norm": 0.20681564509868622, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2157, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.6281998771247184, | |
| "grad_norm": 0.20236019790172577, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1859, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 0.6287118574646734, | |
| "grad_norm": 0.22654055058956146, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1961, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.6292238378046283, | |
| "grad_norm": 0.1928294599056244, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1932, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 0.6297358181445832, | |
| "grad_norm": 0.21249711513519287, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2018, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.6302477984845382, | |
| "grad_norm": 0.19809094071388245, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1806, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 0.6307597788244932, | |
| "grad_norm": 0.1965721845626831, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1956, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.6312717591644481, | |
| "grad_norm": 0.20646794140338898, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1907, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 0.631783739504403, | |
| "grad_norm": 0.19848330318927765, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2049, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.632295719844358, | |
| "grad_norm": 0.19884952902793884, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1886, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.6328077001843129, | |
| "grad_norm": 0.21490252017974854, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2033, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.6333196805242679, | |
| "grad_norm": 0.21076445281505585, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1725, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 0.6338316608642228, | |
| "grad_norm": 0.20743723213672638, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2118, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.6343436412041777, | |
| "grad_norm": 0.2091572880744934, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2058, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 0.6348556215441327, | |
| "grad_norm": 0.19593819975852966, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1789, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.6353676018840877, | |
| "grad_norm": 0.21120460331439972, | |
| "learning_rate": 0.0015, | |
| "loss": 1.199, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 0.6358795822240426, | |
| "grad_norm": 0.19703616201877594, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2062, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.6363915625639975, | |
| "grad_norm": 0.2228432148694992, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2046, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 0.6369035429039525, | |
| "grad_norm": 0.19556592404842377, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1958, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.6374155232439075, | |
| "grad_norm": 0.2118174135684967, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2158, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.6379275035838624, | |
| "grad_norm": 0.19802866876125336, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1889, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.6384394839238173, | |
| "grad_norm": 0.2045314460992813, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2052, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 0.6389514642637723, | |
| "grad_norm": 0.20061345398426056, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1859, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.6394634446037272, | |
| "grad_norm": 0.19872547686100006, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2002, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 0.6399754249436822, | |
| "grad_norm": 0.2001519650220871, | |
| "learning_rate": 0.0015, | |
| "loss": 1.192, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.6404874052836371, | |
| "grad_norm": 0.20049947500228882, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1919, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 0.640999385623592, | |
| "grad_norm": 0.20143716037273407, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1821, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 0.641511365963547, | |
| "grad_norm": 0.19347570836544037, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2135, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 0.642023346303502, | |
| "grad_norm": 0.19492658972740173, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1891, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.6425353266434569, | |
| "grad_norm": 0.19527223706245422, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2102, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.6430473069834118, | |
| "grad_norm": 0.1927892118692398, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1714, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 0.6435592873233668, | |
| "grad_norm": 0.2009015530347824, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2035, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 0.6440712676633217, | |
| "grad_norm": 0.21776844561100006, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1777, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 0.6445832480032767, | |
| "grad_norm": 0.19154374301433563, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1906, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 0.6450952283432316, | |
| "grad_norm": 0.19381144642829895, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1778, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.6456072086831866, | |
| "grad_norm": 0.19017955660820007, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1967, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 0.6461191890231415, | |
| "grad_norm": 0.21785299479961395, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2088, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 0.6466311693630965, | |
| "grad_norm": 0.2039538025856018, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1663, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 0.6471431497030514, | |
| "grad_norm": 0.19732427597045898, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1913, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 0.6476551300430063, | |
| "grad_norm": 0.1911800503730774, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2052, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.6481671103829613, | |
| "grad_norm": 0.19413244724273682, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1804, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.6486790907229162, | |
| "grad_norm": 0.1838771104812622, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1911, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 0.6491910710628712, | |
| "grad_norm": 0.1838536560535431, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1991, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 0.6497030514028261, | |
| "grad_norm": 0.20453278720378876, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1992, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 0.6502150317427811, | |
| "grad_norm": 0.21677398681640625, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1811, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.650727012082736, | |
| "grad_norm": 0.19484928250312805, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1924, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 0.651238992422691, | |
| "grad_norm": 0.1887393295764923, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1978, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.6517509727626459, | |
| "grad_norm": 0.19239051640033722, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2051, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 0.6522629531026009, | |
| "grad_norm": 0.20435065031051636, | |
| "learning_rate": 0.0015, | |
| "loss": 1.153, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 0.6527749334425558, | |
| "grad_norm": 0.2020270824432373, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2096, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.6532869137825108, | |
| "grad_norm": 0.21720841526985168, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1776, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.6537988941224657, | |
| "grad_norm": 0.19210828840732574, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1894, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 0.6543108744624206, | |
| "grad_norm": 0.19044719636440277, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1894, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.6548228548023756, | |
| "grad_norm": 0.20893365144729614, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1916, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 0.6553348351423305, | |
| "grad_norm": 0.20288752019405365, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2018, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.6558468154822855, | |
| "grad_norm": 0.1970445066690445, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1728, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 0.6563587958222404, | |
| "grad_norm": 0.19928324222564697, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1959, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 0.6568707761621954, | |
| "grad_norm": 0.1929846554994583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1885, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 0.6573827565021503, | |
| "grad_norm": 0.20633605122566223, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2145, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.6578947368421053, | |
| "grad_norm": 0.19971442222595215, | |
| "learning_rate": 0.0015, | |
| "loss": 1.188, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.6584067171820602, | |
| "grad_norm": 0.18677356839179993, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1943, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 0.6589186975220152, | |
| "grad_norm": 0.1940857172012329, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1921, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 0.6594306778619701, | |
| "grad_norm": 0.20788009464740753, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1922, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 0.659942658201925, | |
| "grad_norm": 0.20371931791305542, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1963, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 0.66045463854188, | |
| "grad_norm": 0.19461549818515778, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1639, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.6609666188818349, | |
| "grad_norm": 0.19904249906539917, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1708, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 0.6614785992217899, | |
| "grad_norm": 0.2062397003173828, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1937, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 0.6619905795617448, | |
| "grad_norm": 0.20642533898353577, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1929, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 0.6625025599016998, | |
| "grad_norm": 0.19433195888996124, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1886, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 0.6630145402416547, | |
| "grad_norm": 0.1951138973236084, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1847, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.6635265205816097, | |
| "grad_norm": 0.19220565259456635, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1847, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.6640385009215646, | |
| "grad_norm": 0.1887965053319931, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1791, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 0.6645504812615196, | |
| "grad_norm": 0.18562547862529755, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1677, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 0.6650624616014745, | |
| "grad_norm": 0.1826203167438507, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1796, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 0.6655744419414295, | |
| "grad_norm": 0.18740873038768768, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1797, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.6660864222813844, | |
| "grad_norm": 0.1979881227016449, | |
| "learning_rate": 0.0015, | |
| "loss": 1.198, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 0.6665984026213393, | |
| "grad_norm": 0.20608335733413696, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1926, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.6671103829612943, | |
| "grad_norm": 0.21441541612148285, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2049, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 0.6676223633012492, | |
| "grad_norm": 0.22678618133068085, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1917, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 0.6681343436412042, | |
| "grad_norm": 0.19718590378761292, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1968, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.6686463239811591, | |
| "grad_norm": 0.19607524573802948, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1721, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 0.6691583043211141, | |
| "grad_norm": 0.19298435747623444, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1979, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 0.669670284661069, | |
| "grad_norm": 0.19610482454299927, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1919, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 0.670182265001024, | |
| "grad_norm": 0.19872240722179413, | |
| "learning_rate": 0.0015, | |
| "loss": 1.183, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 0.6706942453409789, | |
| "grad_norm": 0.1863928586244583, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1868, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.6712062256809338, | |
| "grad_norm": 0.19495519995689392, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2084, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 0.6717182060208888, | |
| "grad_norm": 0.19348977506160736, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1981, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 0.6722301863608438, | |
| "grad_norm": 0.19418825209140778, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2081, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 0.6727421667007987, | |
| "grad_norm": 0.19263537228107452, | |
| "learning_rate": 0.0015, | |
| "loss": 1.181, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 0.6732541470407536, | |
| "grad_norm": 0.19272197782993317, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1908, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.6737661273807086, | |
| "grad_norm": 0.19103066623210907, | |
| "learning_rate": 0.0015, | |
| "loss": 1.164, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 0.6742781077206635, | |
| "grad_norm": 0.19996246695518494, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1951, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 0.6747900880606185, | |
| "grad_norm": 0.2288653403520584, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2188, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 0.6753020684005734, | |
| "grad_norm": 0.1978132575750351, | |
| "learning_rate": 0.0015, | |
| "loss": 1.177, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 0.6758140487405284, | |
| "grad_norm": 0.2042623907327652, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1833, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.6763260290804833, | |
| "grad_norm": 0.1838945895433426, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1638, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 0.6768380094204383, | |
| "grad_norm": 0.18537567555904388, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1879, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 0.6773499897603932, | |
| "grad_norm": 0.19888518750667572, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1648, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 0.6778619701003481, | |
| "grad_norm": 0.20373912155628204, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2043, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 0.6783739504403031, | |
| "grad_norm": 0.19218416512012482, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1553, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.678885930780258, | |
| "grad_norm": 0.1989835649728775, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1679, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 0.679397911120213, | |
| "grad_norm": 0.20067016780376434, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1827, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 0.6799098914601679, | |
| "grad_norm": 0.19568151235580444, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1839, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 0.6804218718001229, | |
| "grad_norm": 0.2029784619808197, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1787, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 0.6809338521400778, | |
| "grad_norm": 0.19807346165180206, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1763, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.6814458324800328, | |
| "grad_norm": 0.1898653358221054, | |
| "learning_rate": 0.0015, | |
| "loss": 1.2075, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 0.6819578128199877, | |
| "grad_norm": 0.2038862705230713, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1773, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 0.6824697931599426, | |
| "grad_norm": 0.18675602972507477, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1888, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 0.6829817734998976, | |
| "grad_norm": 0.20663636922836304, | |
| "learning_rate": 0.0015, | |
| "loss": 1.169, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 0.6834937538398526, | |
| "grad_norm": 0.1998421996831894, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1725, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.6840057341798075, | |
| "grad_norm": 0.20095355808734894, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1727, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 0.6845177145197624, | |
| "grad_norm": 0.19053997099399567, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1759, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 0.6850296948597174, | |
| "grad_norm": 0.20177049934864044, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1845, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 0.6855416751996724, | |
| "grad_norm": 0.19868339598178864, | |
| "learning_rate": 0.0015, | |
| "loss": 1.178, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 0.6860536555396273, | |
| "grad_norm": 0.1922164112329483, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1536, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.6865656358795822, | |
| "grad_norm": 0.2025415003299713, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1849, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 0.6870776162195372, | |
| "grad_norm": 0.19813013076782227, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1803, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 0.6875895965594921, | |
| "grad_norm": 0.18536531925201416, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1686, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 0.6881015768994471, | |
| "grad_norm": 0.1998080015182495, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1949, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 0.688613557239402, | |
| "grad_norm": 0.1955641508102417, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1758, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.6891255375793569, | |
| "grad_norm": 0.19140900671482086, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1675, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 0.6896375179193119, | |
| "grad_norm": 0.20261794328689575, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1802, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 0.6901494982592669, | |
| "grad_norm": 0.19682539999485016, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1798, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 0.6906614785992218, | |
| "grad_norm": 0.2020127922296524, | |
| "learning_rate": 0.0015, | |
| "loss": 1.172, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 0.6911734589391767, | |
| "grad_norm": 0.19824573397636414, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1888, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.6916854392791317, | |
| "grad_norm": 0.20089636743068695, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1865, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 0.6921974196190867, | |
| "grad_norm": 0.1954367458820343, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1734, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 0.6927093999590416, | |
| "grad_norm": 0.1989155411720276, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1676, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 0.6932213802989965, | |
| "grad_norm": 0.20354506373405457, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1638, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 0.6937333606389514, | |
| "grad_norm": 0.18505001068115234, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1623, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.6942453409789064, | |
| "grad_norm": 0.19758115708827972, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1715, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 0.6947573213188614, | |
| "grad_norm": 0.19761599600315094, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1892, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 0.6952693016588163, | |
| "grad_norm": 0.2028966248035431, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1779, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 0.6957812819987712, | |
| "grad_norm": 0.1852991133928299, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1756, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 0.6962932623387262, | |
| "grad_norm": 0.18972176313400269, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1583, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.6968052426786812, | |
| "grad_norm": 0.18746834993362427, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1758, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 0.6973172230186361, | |
| "grad_norm": 0.1831192672252655, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1904, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 0.697829203358591, | |
| "grad_norm": 0.21230356395244598, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1673, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 0.698341183698546, | |
| "grad_norm": 0.2109021544456482, | |
| "learning_rate": 0.0015, | |
| "loss": 1.176, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 0.698853164038501, | |
| "grad_norm": 0.18572686612606049, | |
| "learning_rate": 0.0015, | |
| "loss": 1.195, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.6993651443784559, | |
| "grad_norm": 0.19169217348098755, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1865, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 0.6998771247184108, | |
| "grad_norm": 0.18918085098266602, | |
| "learning_rate": 0.0015, | |
| "loss": 1.1788, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 0.7003891050583657, | |
| "grad_norm": 0.19315798580646515, | |
| "learning_rate": 0.0014955269451601939, | |
| "loss": 1.1739, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.7009010853983207, | |
| "grad_norm": 0.18943412601947784, | |
| "learning_rate": 0.0014896616625957439, | |
| "loss": 1.1649, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 0.7014130657382757, | |
| "grad_norm": 0.19846367835998535, | |
| "learning_rate": 0.001483819382986655, | |
| "loss": 1.1883, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.7019250460782306, | |
| "grad_norm": 0.19269226491451263, | |
| "learning_rate": 0.001478000016118014, | |
| "loss": 1.1775, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 0.7024370264181855, | |
| "grad_norm": 0.19260330498218536, | |
| "learning_rate": 0.0014722034721287212, | |
| "loss": 1.169, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 0.7029490067581405, | |
| "grad_norm": 0.19868920743465424, | |
| "learning_rate": 0.0014664296615101004, | |
| "loss": 1.1671, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 0.7034609870980955, | |
| "grad_norm": 0.1958989053964615, | |
| "learning_rate": 0.0014606784951045186, | |
| "loss": 1.2049, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 0.7039729674380504, | |
| "grad_norm": 0.194174125790596, | |
| "learning_rate": 0.0014549498841040086, | |
| "loss": 1.1703, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.7044849477780053, | |
| "grad_norm": 0.19567228853702545, | |
| "learning_rate": 0.0014492437400488976, | |
| "loss": 1.1649, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 0.7049969281179602, | |
| "grad_norm": 0.191901296377182, | |
| "learning_rate": 0.0014435599748264416, | |
| "loss": 1.169, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 0.7055089084579153, | |
| "grad_norm": 0.1933002918958664, | |
| "learning_rate": 0.0014378985006694644, | |
| "loss": 1.1873, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 0.7060208887978702, | |
| "grad_norm": 0.20665253698825836, | |
| "learning_rate": 0.0014322592301550022, | |
| "loss": 1.1773, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 0.7065328691378251, | |
| "grad_norm": 0.19543762505054474, | |
| "learning_rate": 0.0014266420762029542, | |
| "loss": 1.1738, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.70704484947778, | |
| "grad_norm": 0.186002716422081, | |
| "learning_rate": 0.0014210469520747377, | |
| "loss": 1.1783, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 0.707556829817735, | |
| "grad_norm": 0.1872335523366928, | |
| "learning_rate": 0.0014154737713719476, | |
| "loss": 1.1918, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 0.70806881015769, | |
| "grad_norm": 0.1909414827823639, | |
| "learning_rate": 0.0014099224480350252, | |
| "loss": 1.1587, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 0.7085807904976449, | |
| "grad_norm": 0.1957162618637085, | |
| "learning_rate": 0.0014043928963419256, | |
| "loss": 1.1783, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 0.7090927708375998, | |
| "grad_norm": 0.1931842565536499, | |
| "learning_rate": 0.0013988850309067965, | |
| "loss": 1.1749, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.7096047511775547, | |
| "grad_norm": 0.2018897980451584, | |
| "learning_rate": 0.0013933987666786593, | |
| "loss": 1.1457, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 0.7101167315175098, | |
| "grad_norm": 0.1824326366186142, | |
| "learning_rate": 0.0013879340189400947, | |
| "loss": 1.1861, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 0.7106287118574647, | |
| "grad_norm": 0.19200804829597473, | |
| "learning_rate": 0.0013824907033059355, | |
| "loss": 1.1669, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 0.7111406921974196, | |
| "grad_norm": 0.18873439729213715, | |
| "learning_rate": 0.001377068735721964, | |
| "loss": 1.1555, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 0.7116526725373745, | |
| "grad_norm": 0.19836601614952087, | |
| "learning_rate": 0.0013716680324636122, | |
| "loss": 1.1536, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.7121646528773296, | |
| "grad_norm": 0.2006756067276001, | |
| "learning_rate": 0.001366288510134671, | |
| "loss": 1.1595, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 0.7126766332172845, | |
| "grad_norm": 0.18679478764533997, | |
| "learning_rate": 0.0013609300856660014, | |
| "loss": 1.1762, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 0.7131886135572394, | |
| "grad_norm": 0.19826917350292206, | |
| "learning_rate": 0.001355592676314251, | |
| "loss": 1.1752, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 0.7137005938971943, | |
| "grad_norm": 0.18885891139507294, | |
| "learning_rate": 0.0013502761996605787, | |
| "loss": 1.1731, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 0.7142125742371493, | |
| "grad_norm": 0.1888403594493866, | |
| "learning_rate": 0.0013449805736093791, | |
| "loss": 1.1536, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.7147245545771043, | |
| "grad_norm": 0.20078985393047333, | |
| "learning_rate": 0.0013397057163870173, | |
| "loss": 1.1545, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 0.7152365349170592, | |
| "grad_norm": 0.19156110286712646, | |
| "learning_rate": 0.001334451546540564, | |
| "loss": 1.148, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 0.7157485152570141, | |
| "grad_norm": 0.19765546917915344, | |
| "learning_rate": 0.0013292179829365398, | |
| "loss": 1.1776, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 0.716260495596969, | |
| "grad_norm": 0.1948610097169876, | |
| "learning_rate": 0.001324004944759661, | |
| "loss": 1.1597, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 0.7167724759369241, | |
| "grad_norm": 0.1816781461238861, | |
| "learning_rate": 0.0013188123515115915, | |
| "loss": 1.1484, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.717284456276879, | |
| "grad_norm": 0.2072591632604599, | |
| "learning_rate": 0.0013136401230097012, | |
| "loss": 1.1678, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 0.7177964366168339, | |
| "grad_norm": 0.19381676614284515, | |
| "learning_rate": 0.0013084881793858267, | |
| "loss": 1.1714, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 0.7183084169567888, | |
| "grad_norm": 0.178278848528862, | |
| "learning_rate": 0.0013033564410850373, | |
| "loss": 1.162, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 0.7188203972967439, | |
| "grad_norm": 0.18733732402324677, | |
| "learning_rate": 0.001298244828864409, | |
| "loss": 1.1565, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 0.7193323776366988, | |
| "grad_norm": 0.18614625930786133, | |
| "learning_rate": 0.0012931532637917983, | |
| "loss": 1.1678, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.7198443579766537, | |
| "grad_norm": 0.17618735134601593, | |
| "learning_rate": 0.0012880816672446245, | |
| "loss": 1.1723, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 0.7203563383166086, | |
| "grad_norm": 0.17765553295612335, | |
| "learning_rate": 0.0012830299609086558, | |
| "loss": 1.1511, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 0.7208683186565635, | |
| "grad_norm": 0.19092194736003876, | |
| "learning_rate": 0.0012779980667767994, | |
| "loss": 1.1679, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 0.7213802989965186, | |
| "grad_norm": 0.18768686056137085, | |
| "learning_rate": 0.0012729859071478975, | |
| "loss": 1.1668, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 0.7218922793364735, | |
| "grad_norm": 0.18770349025726318, | |
| "learning_rate": 0.0012679934046255271, | |
| "loss": 1.1749, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.7224042596764284, | |
| "grad_norm": 0.1935562640428543, | |
| "learning_rate": 0.0012630204821168047, | |
| "loss": 1.1535, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 0.7229162400163833, | |
| "grad_norm": 0.17887477576732635, | |
| "learning_rate": 0.0012580670628311967, | |
| "loss": 1.1541, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 0.7234282203563384, | |
| "grad_norm": 0.18734948337078094, | |
| "learning_rate": 0.0012531330702793323, | |
| "loss": 1.1669, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 0.7239402006962933, | |
| "grad_norm": 0.17879174649715424, | |
| "learning_rate": 0.0012482184282718238, | |
| "loss": 1.1905, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 0.7244521810362482, | |
| "grad_norm": 0.1950501948595047, | |
| "learning_rate": 0.0012433230609180889, | |
| "loss": 1.1446, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.7249641613762031, | |
| "grad_norm": 0.1801559329032898, | |
| "learning_rate": 0.0012384468926251798, | |
| "loss": 1.1367, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 0.7254761417161582, | |
| "grad_norm": 0.17999699711799622, | |
| "learning_rate": 0.0012335898480966146, | |
| "loss": 1.1402, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 0.7259881220561131, | |
| "grad_norm": 0.18279437720775604, | |
| "learning_rate": 0.0012287518523312166, | |
| "loss": 1.1597, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 0.726500102396068, | |
| "grad_norm": 0.19126516580581665, | |
| "learning_rate": 0.001223932830621954, | |
| "loss": 1.1604, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 0.7270120827360229, | |
| "grad_norm": 0.18581058084964752, | |
| "learning_rate": 0.0012191327085547877, | |
| "loss": 1.1532, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.7275240630759778, | |
| "grad_norm": 0.20243413746356964, | |
| "learning_rate": 0.0012143514120075223, | |
| "loss": 1.1495, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 0.7280360434159329, | |
| "grad_norm": 0.19404320418834686, | |
| "learning_rate": 0.0012095888671486597, | |
| "loss": 1.1567, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 0.7285480237558878, | |
| "grad_norm": 0.18503792583942413, | |
| "learning_rate": 0.0012048450004362614, | |
| "loss": 1.128, | |
| "step": 14230 | |
| }, | |
| { | |
| "epoch": 0.7290600040958427, | |
| "grad_norm": 0.19073212146759033, | |
| "learning_rate": 0.0012001197386168117, | |
| "loss": 1.1458, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 0.7295719844357976, | |
| "grad_norm": 0.2037813812494278, | |
| "learning_rate": 0.0011954130087240865, | |
| "loss": 1.1741, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.7300839647757527, | |
| "grad_norm": 0.18591246008872986, | |
| "learning_rate": 0.0011907247380780264, | |
| "loss": 1.1458, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 0.7305959451157076, | |
| "grad_norm": 0.18210938572883606, | |
| "learning_rate": 0.0011860548542836156, | |
| "loss": 1.1695, | |
| "step": 14270 | |
| }, | |
| { | |
| "epoch": 0.7311079254556625, | |
| "grad_norm": 0.18794593214988708, | |
| "learning_rate": 0.0011814032852297623, | |
| "loss": 1.1458, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 0.7316199057956174, | |
| "grad_norm": 0.1834757775068283, | |
| "learning_rate": 0.001176769959088186, | |
| "loss": 1.1485, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 0.7321318861355723, | |
| "grad_norm": 0.1770770400762558, | |
| "learning_rate": 0.0011721548043123092, | |
| "loss": 1.1473, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.7326438664755274, | |
| "grad_norm": 0.19540582597255707, | |
| "learning_rate": 0.0011675577496361507, | |
| "loss": 1.14, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 0.7331558468154823, | |
| "grad_norm": 0.18834899365901947, | |
| "learning_rate": 0.0011629787240732272, | |
| "loss": 1.1326, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 0.7336678271554372, | |
| "grad_norm": 0.18618904054164886, | |
| "learning_rate": 0.0011584176569154553, | |
| "loss": 1.1388, | |
| "step": 14330 | |
| }, | |
| { | |
| "epoch": 0.7341798074953921, | |
| "grad_norm": 0.1807902604341507, | |
| "learning_rate": 0.0011538744777320608, | |
| "loss": 1.1448, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 0.7346917878353472, | |
| "grad_norm": 0.18239812552928925, | |
| "learning_rate": 0.0011493491163684908, | |
| "loss": 1.1355, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.7352037681753021, | |
| "grad_norm": 0.18156401813030243, | |
| "learning_rate": 0.0011448415029453305, | |
| "loss": 1.1309, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 0.735715748515257, | |
| "grad_norm": 0.1813691258430481, | |
| "learning_rate": 0.0011403515678572234, | |
| "loss": 1.134, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 0.7362277288552119, | |
| "grad_norm": 0.18241450190544128, | |
| "learning_rate": 0.0011358792417717981, | |
| "loss": 1.1378, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 0.736739709195167, | |
| "grad_norm": 0.18394464254379272, | |
| "learning_rate": 0.001131424455628596, | |
| "loss": 1.1497, | |
| "step": 14390 | |
| }, | |
| { | |
| "epoch": 0.7372516895351219, | |
| "grad_norm": 0.18612609803676605, | |
| "learning_rate": 0.0011269871406380059, | |
| "loss": 1.1669, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.7377636698750768, | |
| "grad_norm": 0.18373136222362518, | |
| "learning_rate": 0.001122567228280201, | |
| "loss": 1.1453, | |
| "step": 14410 | |
| }, | |
| { | |
| "epoch": 0.7382756502150317, | |
| "grad_norm": 0.193937748670578, | |
| "learning_rate": 0.001118164650304082, | |
| "loss": 1.1357, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 0.7387876305549866, | |
| "grad_norm": 0.18261444568634033, | |
| "learning_rate": 0.0011137793387262216, | |
| "loss": 1.169, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 0.7392996108949417, | |
| "grad_norm": 0.19592134654521942, | |
| "learning_rate": 0.0011094112258298167, | |
| "loss": 1.1518, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 0.7398115912348966, | |
| "grad_norm": 0.17495043575763702, | |
| "learning_rate": 0.0011050602441636402, | |
| "loss": 1.1481, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.7403235715748515, | |
| "grad_norm": 0.18108507990837097, | |
| "learning_rate": 0.001100726326541002, | |
| "loss": 1.1327, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 0.7408355519148064, | |
| "grad_norm": 0.1797986775636673, | |
| "learning_rate": 0.00109640940603871, | |
| "loss": 1.1394, | |
| "step": 14470 | |
| }, | |
| { | |
| "epoch": 0.7413475322547615, | |
| "grad_norm": 0.18484458327293396, | |
| "learning_rate": 0.001092109415996037, | |
| "loss": 1.1188, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 0.7418595125947164, | |
| "grad_norm": 0.1784062534570694, | |
| "learning_rate": 0.0010878262900136915, | |
| "loss": 1.125, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 0.7423714929346713, | |
| "grad_norm": 0.1869814693927765, | |
| "learning_rate": 0.0010835599619527924, | |
| "loss": 1.1417, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.7428834732746262, | |
| "grad_norm": 0.18346761167049408, | |
| "learning_rate": 0.0010793103659338475, | |
| "loss": 1.1182, | |
| "step": 14510 | |
| }, | |
| { | |
| "epoch": 0.7433954536145811, | |
| "grad_norm": 0.188985213637352, | |
| "learning_rate": 0.0010750774363357356, | |
| "loss": 1.1412, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 0.7439074339545362, | |
| "grad_norm": 0.1802164912223816, | |
| "learning_rate": 0.0010708611077946955, | |
| "loss": 1.1338, | |
| "step": 14530 | |
| }, | |
| { | |
| "epoch": 0.7444194142944911, | |
| "grad_norm": 0.17940784990787506, | |
| "learning_rate": 0.0010666613152033133, | |
| "loss": 1.1477, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 0.744931394634446, | |
| "grad_norm": 0.19481126964092255, | |
| "learning_rate": 0.00106247799370952, | |
| "loss": 1.1306, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.7454433749744009, | |
| "grad_norm": 0.17663590610027313, | |
| "learning_rate": 0.0010583110787155889, | |
| "loss": 1.1395, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 0.745955355314356, | |
| "grad_norm": 0.18392081558704376, | |
| "learning_rate": 0.001054160505877137, | |
| "loss": 1.1339, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 0.7464673356543109, | |
| "grad_norm": 0.1872582733631134, | |
| "learning_rate": 0.0010500262111021333, | |
| "loss": 1.1271, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 0.7469793159942658, | |
| "grad_norm": 0.18514196574687958, | |
| "learning_rate": 0.0010459081305499078, | |
| "loss": 1.1561, | |
| "step": 14590 | |
| }, | |
| { | |
| "epoch": 0.7474912963342207, | |
| "grad_norm": 0.18902930617332458, | |
| "learning_rate": 0.0010418062006301674, | |
| "loss": 1.1402, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.7480032766741758, | |
| "grad_norm": 0.1824546903371811, | |
| "learning_rate": 0.0010377203580020109, | |
| "loss": 1.1439, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 0.7485152570141307, | |
| "grad_norm": 0.1803770363330841, | |
| "learning_rate": 0.001033650539572954, | |
| "loss": 1.1313, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 0.7490272373540856, | |
| "grad_norm": 0.19267936050891876, | |
| "learning_rate": 0.0010295966824979534, | |
| "loss": 1.1082, | |
| "step": 14630 | |
| }, | |
| { | |
| "epoch": 0.7495392176940405, | |
| "grad_norm": 0.19047097861766815, | |
| "learning_rate": 0.0010255587241784366, | |
| "loss": 1.122, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 0.7500511980339954, | |
| "grad_norm": 0.1689426302909851, | |
| "learning_rate": 0.0010215366022613358, | |
| "loss": 1.1172, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.7505631783739505, | |
| "grad_norm": 0.18644796311855316, | |
| "learning_rate": 0.0010175302546381246, | |
| "loss": 1.146, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 0.7510751587139054, | |
| "grad_norm": 0.18672852218151093, | |
| "learning_rate": 0.0010135396194438586, | |
| "loss": 1.1386, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 0.7515871390538603, | |
| "grad_norm": 0.19166767597198486, | |
| "learning_rate": 0.0010095646350562206, | |
| "loss": 1.1365, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 0.7520991193938152, | |
| "grad_norm": 0.18109376728534698, | |
| "learning_rate": 0.0010056052400945696, | |
| "loss": 1.113, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 0.7526110997337703, | |
| "grad_norm": 0.17950654029846191, | |
| "learning_rate": 0.0010016613734189915, | |
| "loss": 1.1474, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.7531230800737252, | |
| "grad_norm": 0.184305801987648, | |
| "learning_rate": 0.0009977329741293565, | |
| "loss": 1.1199, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 0.7536350604136801, | |
| "grad_norm": 0.18768514692783356, | |
| "learning_rate": 0.0009938199815643773, | |
| "loss": 1.1451, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 0.754147040753635, | |
| "grad_norm": 0.17981773614883423, | |
| "learning_rate": 0.0009899223353006738, | |
| "loss": 1.1423, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 0.75465902109359, | |
| "grad_norm": 0.17722870409488678, | |
| "learning_rate": 0.0009860399751518388, | |
| "loss": 1.1208, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 0.755171001433545, | |
| "grad_norm": 0.18367789685726166, | |
| "learning_rate": 0.0009821728411675095, | |
| "loss": 1.148, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.7556829817734999, | |
| "grad_norm": 0.18441089987754822, | |
| "learning_rate": 0.0009783208736324418, | |
| "loss": 1.1112, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 0.7561949621134548, | |
| "grad_norm": 0.1897488385438919, | |
| "learning_rate": 0.000974484013065587, | |
| "loss": 1.1231, | |
| "step": 14770 | |
| }, | |
| { | |
| "epoch": 0.7567069424534097, | |
| "grad_norm": 0.18716907501220703, | |
| "learning_rate": 0.0009706622002191746, | |
| "loss": 1.1018, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 0.7572189227933648, | |
| "grad_norm": 0.18121209740638733, | |
| "learning_rate": 0.0009668553760777972, | |
| "loss": 1.1225, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 0.7577309031333197, | |
| "grad_norm": 0.19911837577819824, | |
| "learning_rate": 0.0009630634818574985, | |
| "loss": 1.1266, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.7582428834732746, | |
| "grad_norm": 0.169275164604187, | |
| "learning_rate": 0.0009592864590048661, | |
| "loss": 1.1152, | |
| "step": 14810 | |
| }, | |
| { | |
| "epoch": 0.7587548638132295, | |
| "grad_norm": 0.1855994015932083, | |
| "learning_rate": 0.0009555242491961278, | |
| "loss": 1.1318, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 0.7592668441531846, | |
| "grad_norm": 0.17527516186237335, | |
| "learning_rate": 0.0009517767943362495, | |
| "loss": 1.0988, | |
| "step": 14830 | |
| }, | |
| { | |
| "epoch": 0.7597788244931395, | |
| "grad_norm": 0.18066614866256714, | |
| "learning_rate": 0.0009480440365580401, | |
| "loss": 1.1097, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 0.7602908048330944, | |
| "grad_norm": 0.17801222205162048, | |
| "learning_rate": 0.000944325918221256, | |
| "loss": 1.1196, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.7608027851730493, | |
| "grad_norm": 0.19464291632175446, | |
| "learning_rate": 0.0009406223819117125, | |
| "loss": 1.1319, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 0.7613147655130043, | |
| "grad_norm": 0.1878882348537445, | |
| "learning_rate": 0.0009369333704403964, | |
| "loss": 1.13, | |
| "step": 14870 | |
| }, | |
| { | |
| "epoch": 0.7618267458529593, | |
| "grad_norm": 0.17626269161701202, | |
| "learning_rate": 0.0009332588268425832, | |
| "loss": 1.1181, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 0.7623387261929142, | |
| "grad_norm": 0.1895529329776764, | |
| "learning_rate": 0.0009295986943769574, | |
| "loss": 1.1333, | |
| "step": 14890 | |
| }, | |
| { | |
| "epoch": 0.7628507065328691, | |
| "grad_norm": 0.1784052848815918, | |
| "learning_rate": 0.0009259529165247364, | |
| "loss": 1.1242, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.763362686872824, | |
| "grad_norm": 0.17965124547481537, | |
| "learning_rate": 0.0009223214369887976, | |
| "loss": 1.1258, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 0.7638746672127791, | |
| "grad_norm": 0.17978616058826447, | |
| "learning_rate": 0.0009187041996928093, | |
| "loss": 1.1125, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 0.764386647552734, | |
| "grad_norm": 0.18885265290737152, | |
| "learning_rate": 0.0009151011487803643, | |
| "loss": 1.1061, | |
| "step": 14930 | |
| }, | |
| { | |
| "epoch": 0.7648986278926889, | |
| "grad_norm": 0.18489712476730347, | |
| "learning_rate": 0.0009115122286141184, | |
| "loss": 1.127, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 0.7654106082326438, | |
| "grad_norm": 0.17437365651130676, | |
| "learning_rate": 0.0009079373837749296, | |
| "loss": 1.1148, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.7659225885725988, | |
| "grad_norm": 0.18147113919258118, | |
| "learning_rate": 0.0009043765590610044, | |
| "loss": 1.1014, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 0.7664345689125538, | |
| "grad_norm": 0.17263419926166534, | |
| "learning_rate": 0.0009008296994870436, | |
| "loss": 1.1118, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 0.7669465492525087, | |
| "grad_norm": 0.17921820282936096, | |
| "learning_rate": 0.000897296750283394, | |
| "loss": 1.1245, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 0.7674585295924636, | |
| "grad_norm": 0.17663663625717163, | |
| "learning_rate": 0.0008937776568952028, | |
| "loss": 1.1078, | |
| "step": 14990 | |
| }, | |
| { | |
| "epoch": 0.7679705099324186, | |
| "grad_norm": 0.17961500585079193, | |
| "learning_rate": 0.0008902723649815751, | |
| "loss": 1.0977, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7684824902723736, | |
| "grad_norm": 0.18368123471736908, | |
| "learning_rate": 0.0008867808204147341, | |
| "loss": 1.103, | |
| "step": 15010 | |
| }, | |
| { | |
| "epoch": 0.7689944706123285, | |
| "grad_norm": 0.18269400298595428, | |
| "learning_rate": 0.0008833029692791867, | |
| "loss": 1.108, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 0.7695064509522834, | |
| "grad_norm": 0.1727774292230606, | |
| "learning_rate": 0.0008798387578708893, | |
| "loss": 1.1033, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 0.7700184312922383, | |
| "grad_norm": 0.18222136795520782, | |
| "learning_rate": 0.0008763881326964195, | |
| "loss": 1.1089, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 0.7705304116321933, | |
| "grad_norm": 0.1899970918893814, | |
| "learning_rate": 0.0008729510404721502, | |
| "loss": 1.1039, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.7710423919721483, | |
| "grad_norm": 0.18128469586372375, | |
| "learning_rate": 0.0008695274281234262, | |
| "loss": 1.1078, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 0.7715543723121032, | |
| "grad_norm": 0.18401475250720978, | |
| "learning_rate": 0.0008661172427837451, | |
| "loss": 1.1023, | |
| "step": 15070 | |
| }, | |
| { | |
| "epoch": 0.7720663526520581, | |
| "grad_norm": 0.18456844985485077, | |
| "learning_rate": 0.0008627204317939403, | |
| "loss": 1.1187, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 0.7725783329920131, | |
| "grad_norm": 0.18838796019554138, | |
| "learning_rate": 0.0008593369427013692, | |
| "loss": 1.0908, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 0.7730903133319681, | |
| "grad_norm": 0.18515382707118988, | |
| "learning_rate": 0.0008559667232591014, | |
| "loss": 1.1099, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.773602293671923, | |
| "grad_norm": 0.18746817111968994, | |
| "learning_rate": 0.0008526097214251135, | |
| "loss": 1.1073, | |
| "step": 15110 | |
| }, | |
| { | |
| "epoch": 0.7741142740118779, | |
| "grad_norm": 0.18683654069900513, | |
| "learning_rate": 0.0008492658853614846, | |
| "loss": 1.1195, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 0.7746262543518329, | |
| "grad_norm": 0.17560458183288574, | |
| "learning_rate": 0.0008459351634335962, | |
| "loss": 1.0919, | |
| "step": 15130 | |
| }, | |
| { | |
| "epoch": 0.7751382346917879, | |
| "grad_norm": 0.17539164423942566, | |
| "learning_rate": 0.0008426175042093346, | |
| "loss": 1.1082, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 0.7756502150317428, | |
| "grad_norm": 0.17442087829113007, | |
| "learning_rate": 0.0008393128564582973, | |
| "loss": 1.1077, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.7761621953716977, | |
| "grad_norm": 0.17610372602939606, | |
| "learning_rate": 0.0008360211691510009, | |
| "loss": 1.0976, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 0.7766741757116526, | |
| "grad_norm": 0.18700052797794342, | |
| "learning_rate": 0.0008327423914580938, | |
| "loss": 1.1116, | |
| "step": 15170 | |
| }, | |
| { | |
| "epoch": 0.7771861560516076, | |
| "grad_norm": 0.18908992409706116, | |
| "learning_rate": 0.0008294764727495717, | |
| "loss": 1.1266, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 0.7776981363915626, | |
| "grad_norm": 0.17554494738578796, | |
| "learning_rate": 0.0008262233625939947, | |
| "loss": 1.1228, | |
| "step": 15190 | |
| }, | |
| { | |
| "epoch": 0.7782101167315175, | |
| "grad_norm": 0.1848273128271103, | |
| "learning_rate": 0.0008229830107577095, | |
| "loss": 1.1032, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.7787220970714724, | |
| "grad_norm": 0.1751490831375122, | |
| "learning_rate": 0.0008197553672040732, | |
| "loss": 1.1022, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 0.7792340774114274, | |
| "grad_norm": 0.19107986986637115, | |
| "learning_rate": 0.0008165403820926805, | |
| "loss": 1.1107, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 0.7797460577513824, | |
| "grad_norm": 0.17038871347904205, | |
| "learning_rate": 0.000813338005778595, | |
| "loss": 1.0906, | |
| "step": 15230 | |
| }, | |
| { | |
| "epoch": 0.7802580380913373, | |
| "grad_norm": 0.17573246359825134, | |
| "learning_rate": 0.0008101481888115815, | |
| "loss": 1.1185, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 0.7807700184312922, | |
| "grad_norm": 0.18138054013252258, | |
| "learning_rate": 0.000806970881935343, | |
| "loss": 1.1068, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.7812819987712472, | |
| "grad_norm": 0.18504558503627777, | |
| "learning_rate": 0.00080380603608676, | |
| "loss": 1.1187, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 0.7817939791112021, | |
| "grad_norm": 0.1914263665676117, | |
| "learning_rate": 0.0008006536023951326, | |
| "loss": 1.1028, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 0.7823059594511571, | |
| "grad_norm": 0.17930828034877777, | |
| "learning_rate": 0.0007975135321814267, | |
| "loss": 1.12, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 0.782817939791112, | |
| "grad_norm": 0.18710237741470337, | |
| "learning_rate": 0.0007943857769575209, | |
| "loss": 1.0943, | |
| "step": 15290 | |
| }, | |
| { | |
| "epoch": 0.783329920131067, | |
| "grad_norm": 0.18522420525550842, | |
| "learning_rate": 0.0007912702884254589, | |
| "loss": 1.1125, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.7838419004710219, | |
| "grad_norm": 0.17634257674217224, | |
| "learning_rate": 0.0007881670184767039, | |
| "loss": 1.0855, | |
| "step": 15310 | |
| }, | |
| { | |
| "epoch": 0.7843538808109769, | |
| "grad_norm": 0.1925361305475235, | |
| "learning_rate": 0.0007850759191913941, | |
| "loss": 1.0957, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 0.7848658611509318, | |
| "grad_norm": 0.18163706362247467, | |
| "learning_rate": 0.0007819969428376047, | |
| "loss": 1.0994, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 0.7853778414908867, | |
| "grad_norm": 0.1802321821451187, | |
| "learning_rate": 0.0007789300418706098, | |
| "loss": 1.1043, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 0.7858898218308417, | |
| "grad_norm": 0.20434251427650452, | |
| "learning_rate": 0.0007758751689321484, | |
| "loss": 1.0943, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.7864018021707967, | |
| "grad_norm": 0.1818198412656784, | |
| "learning_rate": 0.0007728322768496924, | |
| "loss": 1.0916, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 0.7869137825107516, | |
| "grad_norm": 0.18060991168022156, | |
| "learning_rate": 0.0007698013186357197, | |
| "loss": 1.1122, | |
| "step": 15370 | |
| }, | |
| { | |
| "epoch": 0.7874257628507065, | |
| "grad_norm": 0.18546059727668762, | |
| "learning_rate": 0.0007667822474869874, | |
| "loss": 1.1075, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 0.7879377431906615, | |
| "grad_norm": 0.18823228776454926, | |
| "learning_rate": 0.0007637750167838097, | |
| "loss": 1.1197, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 0.7884497235306164, | |
| "grad_norm": 0.17590127885341644, | |
| "learning_rate": 0.0007607795800893374, | |
| "loss": 1.0865, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.7889617038705714, | |
| "grad_norm": 0.18602034449577332, | |
| "learning_rate": 0.000757795891148842, | |
| "loss": 1.1, | |
| "step": 15410 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 0.19357922673225403, | |
| "learning_rate": 0.0007548239038889995, | |
| "loss": 1.1015, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 0.7899856645504812, | |
| "grad_norm": 0.17590965330600739, | |
| "learning_rate": 0.000751863572417181, | |
| "loss": 1.1113, | |
| "step": 15430 | |
| }, | |
| { | |
| "epoch": 0.7904976448904362, | |
| "grad_norm": 0.1751716434955597, | |
| "learning_rate": 0.0007489148510207429, | |
| "loss": 1.0898, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 0.7910096252303912, | |
| "grad_norm": 0.17589299380779266, | |
| "learning_rate": 0.000745977694166321, | |
| "loss": 1.0931, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.7915216055703461, | |
| "grad_norm": 0.17544785141944885, | |
| "learning_rate": 0.0007430520564991282, | |
| "loss": 1.0914, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 0.792033585910301, | |
| "grad_norm": 0.18367989361286163, | |
| "learning_rate": 0.0007401378928422531, | |
| "loss": 1.1043, | |
| "step": 15470 | |
| }, | |
| { | |
| "epoch": 0.792545566250256, | |
| "grad_norm": 0.17736022174358368, | |
| "learning_rate": 0.0007372351581959634, | |
| "loss": 1.1252, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 0.7930575465902109, | |
| "grad_norm": 0.18722687661647797, | |
| "learning_rate": 0.0007343438077370098, | |
| "loss": 1.095, | |
| "step": 15490 | |
| }, | |
| { | |
| "epoch": 0.7935695269301659, | |
| "grad_norm": 0.1756405234336853, | |
| "learning_rate": 0.0007314637968179351, | |
| "loss": 1.1017, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7940815072701208, | |
| "grad_norm": 0.17875617742538452, | |
| "learning_rate": 0.0007285950809663841, | |
| "loss": 1.0979, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 0.7945934876100758, | |
| "grad_norm": 0.17093615233898163, | |
| "learning_rate": 0.0007257376158844169, | |
| "loss": 1.0886, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 0.7951054679500307, | |
| "grad_norm": 0.18361063301563263, | |
| "learning_rate": 0.0007228913574478252, | |
| "loss": 1.1089, | |
| "step": 15530 | |
| }, | |
| { | |
| "epoch": 0.7956174482899857, | |
| "grad_norm": 0.1857183277606964, | |
| "learning_rate": 0.0007200562617054503, | |
| "loss": 1.0806, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 0.7961294286299406, | |
| "grad_norm": 0.1974077820777893, | |
| "learning_rate": 0.0007172322848785056, | |
| "loss": 1.088, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.7966414089698955, | |
| "grad_norm": 0.173116534948349, | |
| "learning_rate": 0.0007144193833598987, | |
| "loss": 1.0921, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 0.7971533893098505, | |
| "grad_norm": 0.17753879725933075, | |
| "learning_rate": 0.0007116175137135599, | |
| "loss": 1.0846, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 0.7976653696498055, | |
| "grad_norm": 0.1796150505542755, | |
| "learning_rate": 0.0007088266326737707, | |
| "loss": 1.0816, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 0.7981773499897604, | |
| "grad_norm": 0.17271041870117188, | |
| "learning_rate": 0.0007060466971444953, | |
| "loss": 1.0875, | |
| "step": 15590 | |
| }, | |
| { | |
| "epoch": 0.7986893303297153, | |
| "grad_norm": 0.1766566038131714, | |
| "learning_rate": 0.0007032776641987162, | |
| "loss": 1.085, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.7992013106696703, | |
| "grad_norm": 0.17464908957481384, | |
| "learning_rate": 0.0007005194910777697, | |
| "loss": 1.0669, | |
| "step": 15610 | |
| }, | |
| { | |
| "epoch": 0.7997132910096252, | |
| "grad_norm": 0.18235880136489868, | |
| "learning_rate": 0.0006977721351906876, | |
| "loss": 1.0983, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 0.8002252713495802, | |
| "grad_norm": 0.17582911252975464, | |
| "learning_rate": 0.0006950355541135377, | |
| "loss": 1.0748, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 0.8007372516895351, | |
| "grad_norm": 0.18529601395130157, | |
| "learning_rate": 0.0006923097055887701, | |
| "loss": 1.082, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 0.80124923202949, | |
| "grad_norm": 0.18771891295909882, | |
| "learning_rate": 0.000689594547524564, | |
| "loss": 1.0792, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.801761212369445, | |
| "grad_norm": 0.18567664921283722, | |
| "learning_rate": 0.0006868900379941773, | |
| "loss": 1.0929, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 0.8022731927094, | |
| "grad_norm": 0.18062008917331696, | |
| "learning_rate": 0.0006841961352353004, | |
| "loss": 1.0952, | |
| "step": 15670 | |
| }, | |
| { | |
| "epoch": 0.8027851730493549, | |
| "grad_norm": 0.17383413016796112, | |
| "learning_rate": 0.0006815127976494104, | |
| "loss": 1.1029, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 0.8032971533893098, | |
| "grad_norm": 0.17971891164779663, | |
| "learning_rate": 0.0006788399838011287, | |
| "loss": 1.1032, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 0.8038091337292648, | |
| "grad_norm": 0.17936407029628754, | |
| "learning_rate": 0.0006761776524175815, | |
| "loss": 1.1001, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.8043211140692197, | |
| "grad_norm": 0.18222102522850037, | |
| "learning_rate": 0.0006735257623877627, | |
| "loss": 1.0872, | |
| "step": 15710 | |
| }, | |
| { | |
| "epoch": 0.8048330944091747, | |
| "grad_norm": 0.18015074729919434, | |
| "learning_rate": 0.0006708842727618985, | |
| "loss": 1.0991, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 0.8053450747491296, | |
| "grad_norm": 0.17375022172927856, | |
| "learning_rate": 0.0006682531427508156, | |
| "loss": 1.0623, | |
| "step": 15730 | |
| }, | |
| { | |
| "epoch": 0.8058570550890846, | |
| "grad_norm": 0.1764671802520752, | |
| "learning_rate": 0.0006656323317253108, | |
| "loss": 1.0984, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 0.8063690354290395, | |
| "grad_norm": 0.1692001074552536, | |
| "learning_rate": 0.0006630217992155241, | |
| "loss": 1.0859, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.8068810157689945, | |
| "grad_norm": 0.17819392681121826, | |
| "learning_rate": 0.0006604215049103134, | |
| "loss": 1.0899, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 0.8073929961089494, | |
| "grad_norm": 0.17758633196353912, | |
| "learning_rate": 0.0006578314086566325, | |
| "loss": 1.0826, | |
| "step": 15770 | |
| }, | |
| { | |
| "epoch": 0.8079049764489044, | |
| "grad_norm": 0.17600396275520325, | |
| "learning_rate": 0.0006552514704589104, | |
| "loss": 1.0912, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 0.8084169567888593, | |
| "grad_norm": 0.177523672580719, | |
| "learning_rate": 0.0006526816504784343, | |
| "loss": 1.0814, | |
| "step": 15790 | |
| }, | |
| { | |
| "epoch": 0.8089289371288143, | |
| "grad_norm": 0.17935074865818024, | |
| "learning_rate": 0.0006501219090327343, | |
| "loss": 1.0859, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.8094409174687692, | |
| "grad_norm": 0.18292473256587982, | |
| "learning_rate": 0.0006475722065949703, | |
| "loss": 1.0716, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 0.8099528978087241, | |
| "grad_norm": 0.18235322833061218, | |
| "learning_rate": 0.000645032503793322, | |
| "loss": 1.085, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 0.8104648781486791, | |
| "grad_norm": 0.18412081897258759, | |
| "learning_rate": 0.0006425027614103806, | |
| "loss": 1.0872, | |
| "step": 15830 | |
| }, | |
| { | |
| "epoch": 0.810976858488634, | |
| "grad_norm": 0.17389538884162903, | |
| "learning_rate": 0.0006399829403825436, | |
| "loss": 1.0935, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 0.811488838828589, | |
| "grad_norm": 0.17470002174377441, | |
| "learning_rate": 0.0006374730017994116, | |
| "loss": 1.0603, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.8120008191685439, | |
| "grad_norm": 0.17814920842647552, | |
| "learning_rate": 0.0006349729069031867, | |
| "loss": 1.1096, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 0.8125127995084989, | |
| "grad_norm": 0.18193413317203522, | |
| "learning_rate": 0.000632482617088075, | |
| "loss": 1.076, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 0.8130247798484538, | |
| "grad_norm": 0.18022698163986206, | |
| "learning_rate": 0.0006300020938996901, | |
| "loss": 1.0868, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 0.8135367601884088, | |
| "grad_norm": 0.16944915056228638, | |
| "learning_rate": 0.0006275312990344587, | |
| "loss": 1.0857, | |
| "step": 15890 | |
| }, | |
| { | |
| "epoch": 0.8140487405283637, | |
| "grad_norm": 0.17860791087150574, | |
| "learning_rate": 0.0006250701943390303, | |
| "loss": 1.0885, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.8145607208683187, | |
| "grad_norm": 0.169233039021492, | |
| "learning_rate": 0.0006226187418096868, | |
| "loss": 1.0701, | |
| "step": 15910 | |
| }, | |
| { | |
| "epoch": 0.8150727012082736, | |
| "grad_norm": 0.18404126167297363, | |
| "learning_rate": 0.0006201769035917569, | |
| "loss": 1.0862, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 0.8155846815482285, | |
| "grad_norm": 0.1732415407896042, | |
| "learning_rate": 0.0006177446419790303, | |
| "loss": 1.0552, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 0.8160966618881835, | |
| "grad_norm": 0.17680327594280243, | |
| "learning_rate": 0.0006153219194131765, | |
| "loss": 1.0839, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 0.8166086422281384, | |
| "grad_norm": 0.168556347489357, | |
| "learning_rate": 0.000612908698483164, | |
| "loss": 1.0628, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.8171206225680934, | |
| "grad_norm": 0.1826118528842926, | |
| "learning_rate": 0.0006105049419246835, | |
| "loss": 1.0855, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 0.8176326029080483, | |
| "grad_norm": 0.17182965576648712, | |
| "learning_rate": 0.0006081106126195717, | |
| "loss": 1.0669, | |
| "step": 15970 | |
| }, | |
| { | |
| "epoch": 0.8181445832480033, | |
| "grad_norm": 0.16935127973556519, | |
| "learning_rate": 0.0006057256735952383, | |
| "loss": 1.083, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 0.8186565635879582, | |
| "grad_norm": 0.17464590072631836, | |
| "learning_rate": 0.0006033500880240954, | |
| "loss": 1.0671, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 0.8191685439279132, | |
| "grad_norm": 0.17747105658054352, | |
| "learning_rate": 0.0006009838192229885, | |
| "loss": 1.0678, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8196805242678681, | |
| "grad_norm": 0.17449192702770233, | |
| "learning_rate": 0.0005986268306526304, | |
| "loss": 1.0796, | |
| "step": 16010 | |
| }, | |
| { | |
| "epoch": 0.8201925046078231, | |
| "grad_norm": 0.17097654938697815, | |
| "learning_rate": 0.0005962790859170364, | |
| "loss": 1.0778, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 0.820704484947778, | |
| "grad_norm": 0.16904379427433014, | |
| "learning_rate": 0.0005939405487629626, | |
| "loss": 1.0843, | |
| "step": 16030 | |
| }, | |
| { | |
| "epoch": 0.821216465287733, | |
| "grad_norm": 0.17497345805168152, | |
| "learning_rate": 0.0005916111830793466, | |
| "loss": 1.101, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 0.8217284456276879, | |
| "grad_norm": 0.1789994090795517, | |
| "learning_rate": 0.0005892909528967487, | |
| "loss": 1.0845, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.8222404259676428, | |
| "grad_norm": 0.1678200364112854, | |
| "learning_rate": 0.0005869798223867978, | |
| "loss": 1.0606, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 0.8227524063075978, | |
| "grad_norm": 0.17383365333080292, | |
| "learning_rate": 0.000584677755861637, | |
| "loss": 1.0674, | |
| "step": 16070 | |
| }, | |
| { | |
| "epoch": 0.8232643866475527, | |
| "grad_norm": 0.17335745692253113, | |
| "learning_rate": 0.0005823847177733732, | |
| "loss": 1.0965, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 0.8237763669875077, | |
| "grad_norm": 0.16967058181762695, | |
| "learning_rate": 0.0005801006727135282, | |
| "loss": 1.0677, | |
| "step": 16090 | |
| }, | |
| { | |
| "epoch": 0.8242883473274626, | |
| "grad_norm": 0.16847650706768036, | |
| "learning_rate": 0.0005778255854124912, | |
| "loss": 1.0791, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.8248003276674176, | |
| "grad_norm": 0.17251423001289368, | |
| "learning_rate": 0.0005755594207389755, | |
| "loss": 1.0806, | |
| "step": 16110 | |
| }, | |
| { | |
| "epoch": 0.8253123080073725, | |
| "grad_norm": 0.17555896937847137, | |
| "learning_rate": 0.0005733021436994743, | |
| "loss": 1.066, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 0.8258242883473275, | |
| "grad_norm": 0.16997992992401123, | |
| "learning_rate": 0.000571053719437722, | |
| "loss": 1.0876, | |
| "step": 16130 | |
| }, | |
| { | |
| "epoch": 0.8263362686872824, | |
| "grad_norm": 0.17845116555690765, | |
| "learning_rate": 0.0005688141132341551, | |
| "loss": 1.085, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 0.8268482490272373, | |
| "grad_norm": 0.1836511194705963, | |
| "learning_rate": 0.0005665832905053756, | |
| "loss": 1.0769, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.8273602293671923, | |
| "grad_norm": 0.1753719449043274, | |
| "learning_rate": 0.0005643612168036182, | |
| "loss": 1.0742, | |
| "step": 16160 | |
| }, | |
| { | |
| "epoch": 0.8278722097071473, | |
| "grad_norm": 0.17152993381023407, | |
| "learning_rate": 0.0005621478578162176, | |
| "loss": 1.0761, | |
| "step": 16170 | |
| }, | |
| { | |
| "epoch": 0.8283841900471022, | |
| "grad_norm": 0.18273817002773285, | |
| "learning_rate": 0.0005599431793650786, | |
| "loss": 1.0803, | |
| "step": 16180 | |
| }, | |
| { | |
| "epoch": 0.8288961703870571, | |
| "grad_norm": 0.1865053027868271, | |
| "learning_rate": 0.0005577471474061485, | |
| "loss": 1.0695, | |
| "step": 16190 | |
| }, | |
| { | |
| "epoch": 0.8294081507270121, | |
| "grad_norm": 0.16600672900676727, | |
| "learning_rate": 0.0005555597280288918, | |
| "loss": 1.0844, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.829920131066967, | |
| "grad_norm": 0.1850479394197464, | |
| "learning_rate": 0.0005533808874557656, | |
| "loss": 1.0658, | |
| "step": 16210 | |
| }, | |
| { | |
| "epoch": 0.830432111406922, | |
| "grad_norm": 0.17687514424324036, | |
| "learning_rate": 0.000551210592041699, | |
| "loss": 1.072, | |
| "step": 16220 | |
| }, | |
| { | |
| "epoch": 0.8309440917468769, | |
| "grad_norm": 0.1833869218826294, | |
| "learning_rate": 0.000549048808273573, | |
| "loss": 1.0739, | |
| "step": 16230 | |
| }, | |
| { | |
| "epoch": 0.8314560720868319, | |
| "grad_norm": 0.1750813126564026, | |
| "learning_rate": 0.0005468955027697031, | |
| "loss": 1.0851, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 0.8319680524267868, | |
| "grad_norm": 0.18595030903816223, | |
| "learning_rate": 0.0005447506422793241, | |
| "loss": 1.0615, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.8324800327667418, | |
| "grad_norm": 0.1711542159318924, | |
| "learning_rate": 0.0005426141936820762, | |
| "loss": 1.0689, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 0.8329920131066967, | |
| "grad_norm": 0.18596914410591125, | |
| "learning_rate": 0.000540486123987494, | |
| "loss": 1.0574, | |
| "step": 16270 | |
| }, | |
| { | |
| "epoch": 0.8335039934466516, | |
| "grad_norm": 0.17115946114063263, | |
| "learning_rate": 0.0005383664003344964, | |
| "loss": 1.0703, | |
| "step": 16280 | |
| }, | |
| { | |
| "epoch": 0.8340159737866066, | |
| "grad_norm": 0.1802951842546463, | |
| "learning_rate": 0.0005362549899908805, | |
| "loss": 1.074, | |
| "step": 16290 | |
| }, | |
| { | |
| "epoch": 0.8345279541265616, | |
| "grad_norm": 0.18504950404167175, | |
| "learning_rate": 0.0005341518603528143, | |
| "loss": 1.0747, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.8350399344665165, | |
| "grad_norm": 0.17508040368556976, | |
| "learning_rate": 0.000532056978944335, | |
| "loss": 1.0784, | |
| "step": 16310 | |
| }, | |
| { | |
| "epoch": 0.8355519148064714, | |
| "grad_norm": 0.1866855025291443, | |
| "learning_rate": 0.0005299703134168463, | |
| "loss": 1.0799, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 0.8360638951464264, | |
| "grad_norm": 0.16678877174854279, | |
| "learning_rate": 0.0005278918315486196, | |
| "loss": 1.0531, | |
| "step": 16330 | |
| }, | |
| { | |
| "epoch": 0.8365758754863813, | |
| "grad_norm": 0.1872544288635254, | |
| "learning_rate": 0.000525821501244296, | |
| "loss": 1.0768, | |
| "step": 16340 | |
| }, | |
| { | |
| "epoch": 0.8370878558263363, | |
| "grad_norm": 0.17887745797634125, | |
| "learning_rate": 0.0005237592905343908, | |
| "loss": 1.0552, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.8375998361662912, | |
| "grad_norm": 0.1764066219329834, | |
| "learning_rate": 0.0005217051675748001, | |
| "loss": 1.0511, | |
| "step": 16360 | |
| }, | |
| { | |
| "epoch": 0.8381118165062461, | |
| "grad_norm": 0.17765092849731445, | |
| "learning_rate": 0.0005196591006463087, | |
| "loss": 1.0645, | |
| "step": 16370 | |
| }, | |
| { | |
| "epoch": 0.8386237968462011, | |
| "grad_norm": 0.17197942733764648, | |
| "learning_rate": 0.0005176210581541006, | |
| "loss": 1.0561, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 0.8391357771861561, | |
| "grad_norm": 0.1778382807970047, | |
| "learning_rate": 0.0005155910086272709, | |
| "loss": 1.0818, | |
| "step": 16390 | |
| }, | |
| { | |
| "epoch": 0.839647757526111, | |
| "grad_norm": 0.1758384257555008, | |
| "learning_rate": 0.00051356892071834, | |
| "loss": 1.0755, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.8401597378660659, | |
| "grad_norm": 0.17765450477600098, | |
| "learning_rate": 0.0005115547632027694, | |
| "loss": 1.0622, | |
| "step": 16410 | |
| }, | |
| { | |
| "epoch": 0.8406717182060209, | |
| "grad_norm": 0.1722906529903412, | |
| "learning_rate": 0.0005095485049784797, | |
| "loss": 1.0562, | |
| "step": 16420 | |
| }, | |
| { | |
| "epoch": 0.8411836985459759, | |
| "grad_norm": 0.18041284382343292, | |
| "learning_rate": 0.0005075501150653699, | |
| "loss": 1.0563, | |
| "step": 16430 | |
| }, | |
| { | |
| "epoch": 0.8416956788859308, | |
| "grad_norm": 0.1721327304840088, | |
| "learning_rate": 0.0005055595626048399, | |
| "loss": 1.0872, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 0.8422076592258857, | |
| "grad_norm": 0.17623233795166016, | |
| "learning_rate": 0.000503576816859313, | |
| "loss": 1.0768, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.8427196395658406, | |
| "grad_norm": 0.1824178546667099, | |
| "learning_rate": 0.000501601847211762, | |
| "loss": 1.0773, | |
| "step": 16460 | |
| }, | |
| { | |
| "epoch": 0.8432316199057956, | |
| "grad_norm": 0.17492622137069702, | |
| "learning_rate": 0.0004996346231652357, | |
| "loss": 1.0751, | |
| "step": 16470 | |
| }, | |
| { | |
| "epoch": 0.8437436002457506, | |
| "grad_norm": 0.19331291317939758, | |
| "learning_rate": 0.0004976751143423888, | |
| "loss": 1.0522, | |
| "step": 16480 | |
| }, | |
| { | |
| "epoch": 0.8442555805857055, | |
| "grad_norm": 0.17318172752857208, | |
| "learning_rate": 0.0004957232904850122, | |
| "loss": 1.0611, | |
| "step": 16490 | |
| }, | |
| { | |
| "epoch": 0.8447675609256604, | |
| "grad_norm": 0.18951846659183502, | |
| "learning_rate": 0.0004937791214535661, | |
| "loss": 1.0584, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.8452795412656154, | |
| "grad_norm": 0.17713989317417145, | |
| "learning_rate": 0.0004918425772267145, | |
| "loss": 1.0542, | |
| "step": 16510 | |
| }, | |
| { | |
| "epoch": 0.8457915216055704, | |
| "grad_norm": 0.16759324073791504, | |
| "learning_rate": 0.0004899136279008613, | |
| "loss": 1.0689, | |
| "step": 16520 | |
| }, | |
| { | |
| "epoch": 0.8463035019455253, | |
| "grad_norm": 0.18664461374282837, | |
| "learning_rate": 0.000487992243689689, | |
| "loss": 1.0732, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 0.8468154822854802, | |
| "grad_norm": 0.17348751425743103, | |
| "learning_rate": 0.00048607839492369886, | |
| "loss": 1.0762, | |
| "step": 16540 | |
| }, | |
| { | |
| "epoch": 0.8473274626254352, | |
| "grad_norm": 0.17233343422412872, | |
| "learning_rate": 0.0004841720520497518, | |
| "loss": 1.0579, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.8478394429653902, | |
| "grad_norm": 0.18232837319374084, | |
| "learning_rate": 0.0004822731856306133, | |
| "loss": 1.0576, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 0.8483514233053451, | |
| "grad_norm": 0.17330168187618256, | |
| "learning_rate": 0.000480381766344498, | |
| "loss": 1.044, | |
| "step": 16570 | |
| }, | |
| { | |
| "epoch": 0.8488634036453, | |
| "grad_norm": 0.1745171695947647, | |
| "learning_rate": 0.00047849776498461725, | |
| "loss": 1.07, | |
| "step": 16580 | |
| }, | |
| { | |
| "epoch": 0.8493753839852549, | |
| "grad_norm": 0.1749190390110016, | |
| "learning_rate": 0.00047662115245872787, | |
| "loss": 1.0666, | |
| "step": 16590 | |
| }, | |
| { | |
| "epoch": 0.84988736432521, | |
| "grad_norm": 0.17629800736904144, | |
| "learning_rate": 0.0004747518997886834, | |
| "loss": 1.0694, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.8503993446651649, | |
| "grad_norm": 0.17141848802566528, | |
| "learning_rate": 0.00047288997810998585, | |
| "loss": 1.0752, | |
| "step": 16610 | |
| }, | |
| { | |
| "epoch": 0.8509113250051198, | |
| "grad_norm": 0.16317421197891235, | |
| "learning_rate": 0.00047103535867134064, | |
| "loss": 1.0575, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 0.8514233053450747, | |
| "grad_norm": 0.1698952317237854, | |
| "learning_rate": 0.0004691880128342126, | |
| "loss": 1.054, | |
| "step": 16630 | |
| }, | |
| { | |
| "epoch": 0.8519352856850297, | |
| "grad_norm": 0.17862023413181305, | |
| "learning_rate": 0.00046734791207238334, | |
| "loss": 1.0578, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 0.8524472660249847, | |
| "grad_norm": 0.17291221022605896, | |
| "learning_rate": 0.0004655150279715109, | |
| "loss": 1.0614, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.8529592463649396, | |
| "grad_norm": 0.18683776259422302, | |
| "learning_rate": 0.0004636893322286915, | |
| "loss": 1.0587, | |
| "step": 16660 | |
| }, | |
| { | |
| "epoch": 0.8534712267048945, | |
| "grad_norm": 0.17157678306102753, | |
| "learning_rate": 0.00046187079665202144, | |
| "loss": 1.0876, | |
| "step": 16670 | |
| }, | |
| { | |
| "epoch": 0.8539832070448494, | |
| "grad_norm": 0.16680538654327393, | |
| "learning_rate": 0.0004600593931601628, | |
| "loss": 1.0608, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 0.8544951873848045, | |
| "grad_norm": 0.17904032766819, | |
| "learning_rate": 0.00045825509378190934, | |
| "loss": 1.0622, | |
| "step": 16690 | |
| }, | |
| { | |
| "epoch": 0.8550071677247594, | |
| "grad_norm": 0.17377473413944244, | |
| "learning_rate": 0.0004564578706557547, | |
| "loss": 1.0761, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.8555191480647143, | |
| "grad_norm": 0.17606638371944427, | |
| "learning_rate": 0.0004546676960294617, | |
| "loss": 1.0627, | |
| "step": 16710 | |
| }, | |
| { | |
| "epoch": 0.8560311284046692, | |
| "grad_norm": 0.1655128300189972, | |
| "learning_rate": 0.0004528845422596346, | |
| "loss": 1.0579, | |
| "step": 16720 | |
| }, | |
| { | |
| "epoch": 0.8565431087446242, | |
| "grad_norm": 0.185993954539299, | |
| "learning_rate": 0.0004511083818112919, | |
| "loss": 1.0604, | |
| "step": 16730 | |
| }, | |
| { | |
| "epoch": 0.8570550890845792, | |
| "grad_norm": 0.18218767642974854, | |
| "learning_rate": 0.00044933918725744066, | |
| "loss": 1.0595, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 0.8575670694245341, | |
| "grad_norm": 0.16947178542613983, | |
| "learning_rate": 0.000447576931278654, | |
| "loss": 1.0494, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.858079049764489, | |
| "grad_norm": 0.17753495275974274, | |
| "learning_rate": 0.00044582158666264793, | |
| "loss": 1.0522, | |
| "step": 16760 | |
| }, | |
| { | |
| "epoch": 0.858591030104444, | |
| "grad_norm": 0.1756090372800827, | |
| "learning_rate": 0.0004440731263038627, | |
| "loss": 1.074, | |
| "step": 16770 | |
| }, | |
| { | |
| "epoch": 0.859103010444399, | |
| "grad_norm": 0.18287988007068634, | |
| "learning_rate": 0.00044233152320304276, | |
| "loss": 1.0883, | |
| "step": 16780 | |
| }, | |
| { | |
| "epoch": 0.8596149907843539, | |
| "grad_norm": 0.18234935402870178, | |
| "learning_rate": 0.0004405967504668205, | |
| "loss": 1.0481, | |
| "step": 16790 | |
| }, | |
| { | |
| "epoch": 0.8601269711243088, | |
| "grad_norm": 0.17408689856529236, | |
| "learning_rate": 0.0004388687813073016, | |
| "loss": 1.0672, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.8606389514642637, | |
| "grad_norm": 0.1746188998222351, | |
| "learning_rate": 0.00043714758904165, | |
| "loss": 1.0581, | |
| "step": 16810 | |
| }, | |
| { | |
| "epoch": 0.8611509318042188, | |
| "grad_norm": 0.17414236068725586, | |
| "learning_rate": 0.0004354331470916772, | |
| "loss": 1.0296, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 0.8616629121441737, | |
| "grad_norm": 0.17176198959350586, | |
| "learning_rate": 0.00043372542898343074, | |
| "loss": 1.048, | |
| "step": 16830 | |
| }, | |
| { | |
| "epoch": 0.8621748924841286, | |
| "grad_norm": 0.17366254329681396, | |
| "learning_rate": 0.0004320244083467865, | |
| "loss": 1.0584, | |
| "step": 16840 | |
| }, | |
| { | |
| "epoch": 0.8626868728240835, | |
| "grad_norm": 0.17431634664535522, | |
| "learning_rate": 0.0004303300589150403, | |
| "loss": 1.0747, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.8631988531640385, | |
| "grad_norm": 0.17983673512935638, | |
| "learning_rate": 0.0004286423545245033, | |
| "loss": 1.0477, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 0.8637108335039935, | |
| "grad_norm": 0.17973174154758453, | |
| "learning_rate": 0.00042696126911409766, | |
| "loss": 1.0733, | |
| "step": 16870 | |
| }, | |
| { | |
| "epoch": 0.8642228138439484, | |
| "grad_norm": 0.17209124565124512, | |
| "learning_rate": 0.0004252867767249536, | |
| "loss": 1.0553, | |
| "step": 16880 | |
| }, | |
| { | |
| "epoch": 0.8647347941839033, | |
| "grad_norm": 0.17548377811908722, | |
| "learning_rate": 0.0004236188515000098, | |
| "loss": 1.0317, | |
| "step": 16890 | |
| }, | |
| { | |
| "epoch": 0.8652467745238582, | |
| "grad_norm": 0.1856032758951187, | |
| "learning_rate": 0.0004219574676836124, | |
| "loss": 1.0645, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.8657587548638133, | |
| "grad_norm": 0.171828031539917, | |
| "learning_rate": 0.0004203025996211187, | |
| "loss": 1.0468, | |
| "step": 16910 | |
| }, | |
| { | |
| "epoch": 0.8662707352037682, | |
| "grad_norm": 0.1737641543149948, | |
| "learning_rate": 0.00041865422175850074, | |
| "loss": 1.0593, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 0.8667827155437231, | |
| "grad_norm": 0.17497050762176514, | |
| "learning_rate": 0.00041701230864194997, | |
| "loss": 1.0558, | |
| "step": 16930 | |
| }, | |
| { | |
| "epoch": 0.867294695883678, | |
| "grad_norm": 0.1742735356092453, | |
| "learning_rate": 0.00041537683491748515, | |
| "loss": 1.0524, | |
| "step": 16940 | |
| }, | |
| { | |
| "epoch": 0.8678066762236331, | |
| "grad_norm": 0.16955190896987915, | |
| "learning_rate": 0.00041374777533055996, | |
| "loss": 1.0734, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.868318656563588, | |
| "grad_norm": 0.17131267488002777, | |
| "learning_rate": 0.00041212510472567404, | |
| "loss": 1.047, | |
| "step": 16960 | |
| }, | |
| { | |
| "epoch": 0.8688306369035429, | |
| "grad_norm": 0.18686212599277496, | |
| "learning_rate": 0.00041050879804598354, | |
| "loss": 1.0628, | |
| "step": 16970 | |
| }, | |
| { | |
| "epoch": 0.8693426172434978, | |
| "grad_norm": 0.18018223345279694, | |
| "learning_rate": 0.0004088988303329146, | |
| "loss": 1.0727, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 0.8698545975834528, | |
| "grad_norm": 0.17378225922584534, | |
| "learning_rate": 0.00040729517672577834, | |
| "loss": 1.0608, | |
| "step": 16990 | |
| }, | |
| { | |
| "epoch": 0.8703665779234078, | |
| "grad_norm": 0.17299434542655945, | |
| "learning_rate": 0.0004056978124613862, | |
| "loss": 1.0572, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.8708785582633627, | |
| "grad_norm": 0.17272843420505524, | |
| "learning_rate": 0.0004041067128736684, | |
| "loss": 1.068, | |
| "step": 17010 | |
| }, | |
| { | |
| "epoch": 0.8713905386033176, | |
| "grad_norm": 0.17482733726501465, | |
| "learning_rate": 0.0004025218533932921, | |
| "loss": 1.0434, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 0.8719025189432725, | |
| "grad_norm": 0.17604181170463562, | |
| "learning_rate": 0.00040094320954728313, | |
| "loss": 1.0473, | |
| "step": 17030 | |
| }, | |
| { | |
| "epoch": 0.8724144992832276, | |
| "grad_norm": 0.17563997209072113, | |
| "learning_rate": 0.000399370756958647, | |
| "loss": 1.0326, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 0.8729264796231825, | |
| "grad_norm": 0.17245963215827942, | |
| "learning_rate": 0.00039780447134599286, | |
| "loss": 1.0473, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.8734384599631374, | |
| "grad_norm": 0.1761290282011032, | |
| "learning_rate": 0.00039624432852315933, | |
| "loss": 1.0521, | |
| "step": 17060 | |
| }, | |
| { | |
| "epoch": 0.8739504403030923, | |
| "grad_norm": 0.17559461295604706, | |
| "learning_rate": 0.0003946903043988396, | |
| "loss": 1.0499, | |
| "step": 17070 | |
| }, | |
| { | |
| "epoch": 0.8744624206430474, | |
| "grad_norm": 0.16970165073871613, | |
| "learning_rate": 0.00039314237497621053, | |
| "loss": 1.0653, | |
| "step": 17080 | |
| }, | |
| { | |
| "epoch": 0.8749744009830023, | |
| "grad_norm": 0.1792786717414856, | |
| "learning_rate": 0.00039160051635256165, | |
| "loss": 1.0554, | |
| "step": 17090 | |
| }, | |
| { | |
| "epoch": 0.8754863813229572, | |
| "grad_norm": 0.16863805055618286, | |
| "learning_rate": 0.0003900647047189262, | |
| "loss": 1.0524, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.8759983616629121, | |
| "grad_norm": 0.1794777661561966, | |
| "learning_rate": 0.0003885349163597133, | |
| "loss": 1.0741, | |
| "step": 17110 | |
| }, | |
| { | |
| "epoch": 0.876510342002867, | |
| "grad_norm": 0.1949402540922165, | |
| "learning_rate": 0.0003870111276523419, | |
| "loss": 1.0458, | |
| "step": 17120 | |
| }, | |
| { | |
| "epoch": 0.8770223223428221, | |
| "grad_norm": 0.17837046086788177, | |
| "learning_rate": 0.0003854933150668761, | |
| "loss": 1.0484, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 0.877534302682777, | |
| "grad_norm": 0.16682222485542297, | |
| "learning_rate": 0.00038398145516566133, | |
| "loss": 1.0643, | |
| "step": 17140 | |
| }, | |
| { | |
| "epoch": 0.8780462830227319, | |
| "grad_norm": 0.17241717875003815, | |
| "learning_rate": 0.00038247552460296324, | |
| "loss": 1.0561, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.8785582633626868, | |
| "grad_norm": 0.16557161509990692, | |
| "learning_rate": 0.00038097550012460626, | |
| "loss": 1.0614, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 0.8790702437026419, | |
| "grad_norm": 0.17597849667072296, | |
| "learning_rate": 0.00037948135856761536, | |
| "loss": 1.0541, | |
| "step": 17170 | |
| }, | |
| { | |
| "epoch": 0.8795822240425968, | |
| "grad_norm": 0.17368751764297485, | |
| "learning_rate": 0.00037799307685985786, | |
| "loss": 1.0482, | |
| "step": 17180 | |
| }, | |
| { | |
| "epoch": 0.8800942043825517, | |
| "grad_norm": 0.17278683185577393, | |
| "learning_rate": 0.00037651063201968706, | |
| "loss": 1.0493, | |
| "step": 17190 | |
| }, | |
| { | |
| "epoch": 0.8806061847225066, | |
| "grad_norm": 0.17373493313789368, | |
| "learning_rate": 0.00037503400115558816, | |
| "loss": 1.0547, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.8811181650624617, | |
| "grad_norm": 0.1761094480752945, | |
| "learning_rate": 0.0003735631614658236, | |
| "loss": 1.0476, | |
| "step": 17210 | |
| }, | |
| { | |
| "epoch": 0.8816301454024166, | |
| "grad_norm": 0.1749420464038849, | |
| "learning_rate": 0.00037209809023808216, | |
| "loss": 1.0313, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 0.8821421257423715, | |
| "grad_norm": 0.1756523847579956, | |
| "learning_rate": 0.0003706387648491272, | |
| "loss": 1.0551, | |
| "step": 17230 | |
| }, | |
| { | |
| "epoch": 0.8826541060823264, | |
| "grad_norm": 0.1767933964729309, | |
| "learning_rate": 0.0003691851627644478, | |
| "loss": 1.0385, | |
| "step": 17240 | |
| }, | |
| { | |
| "epoch": 0.8831660864222813, | |
| "grad_norm": 0.17991852760314941, | |
| "learning_rate": 0.00036773726153791126, | |
| "loss": 1.0534, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.8836780667622364, | |
| "grad_norm": 0.17097926139831543, | |
| "learning_rate": 0.00036629503881141533, | |
| "loss": 1.0424, | |
| "step": 17260 | |
| }, | |
| { | |
| "epoch": 0.8841900471021913, | |
| "grad_norm": 0.1836550533771515, | |
| "learning_rate": 0.00036485847231454427, | |
| "loss": 1.0627, | |
| "step": 17270 | |
| }, | |
| { | |
| "epoch": 0.8847020274421462, | |
| "grad_norm": 0.18745499849319458, | |
| "learning_rate": 0.00036342753986422373, | |
| "loss": 1.0475, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 0.8852140077821011, | |
| "grad_norm": 0.17117556929588318, | |
| "learning_rate": 0.00036200221936437925, | |
| "loss": 1.0457, | |
| "step": 17290 | |
| }, | |
| { | |
| "epoch": 0.8857259881220562, | |
| "grad_norm": 0.17555800080299377, | |
| "learning_rate": 0.0003605824888055944, | |
| "loss": 1.0505, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.8862379684620111, | |
| "grad_norm": 0.17367680370807648, | |
| "learning_rate": 0.00035916832626477105, | |
| "loss": 1.0433, | |
| "step": 17310 | |
| }, | |
| { | |
| "epoch": 0.886749948801966, | |
| "grad_norm": 0.16771985590457916, | |
| "learning_rate": 0.0003577597099047911, | |
| "loss": 1.0405, | |
| "step": 17320 | |
| }, | |
| { | |
| "epoch": 0.8872619291419209, | |
| "grad_norm": 0.17749017477035522, | |
| "learning_rate": 0.00035635661797417894, | |
| "loss": 1.0326, | |
| "step": 17330 | |
| }, | |
| { | |
| "epoch": 0.8877739094818758, | |
| "grad_norm": 0.1756659597158432, | |
| "learning_rate": 0.0003549590288067658, | |
| "loss": 1.0481, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 0.8882858898218309, | |
| "grad_norm": 0.17804957926273346, | |
| "learning_rate": 0.00035356692082135497, | |
| "loss": 1.0348, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.8887978701617858, | |
| "grad_norm": 0.17013497650623322, | |
| "learning_rate": 0.000352180272521389, | |
| "loss": 1.0444, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 0.8893098505017407, | |
| "grad_norm": 0.16462627053260803, | |
| "learning_rate": 0.000350799062494617, | |
| "loss": 1.0473, | |
| "step": 17370 | |
| }, | |
| { | |
| "epoch": 0.8898218308416956, | |
| "grad_norm": 0.18292909860610962, | |
| "learning_rate": 0.00034942326941276463, | |
| "loss": 1.0548, | |
| "step": 17380 | |
| }, | |
| { | |
| "epoch": 0.8903338111816507, | |
| "grad_norm": 0.16778182983398438, | |
| "learning_rate": 0.00034805287203120474, | |
| "loss": 1.0486, | |
| "step": 17390 | |
| }, | |
| { | |
| "epoch": 0.8908457915216056, | |
| "grad_norm": 0.17783689498901367, | |
| "learning_rate": 0.0003466878491886288, | |
| "loss": 1.0422, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.8913577718615605, | |
| "grad_norm": 0.17219282686710358, | |
| "learning_rate": 0.0003453281798067208, | |
| "loss": 1.036, | |
| "step": 17410 | |
| }, | |
| { | |
| "epoch": 0.8918697522015154, | |
| "grad_norm": 0.17862632870674133, | |
| "learning_rate": 0.00034397384288983114, | |
| "loss": 1.0441, | |
| "step": 17420 | |
| }, | |
| { | |
| "epoch": 0.8923817325414705, | |
| "grad_norm": 0.17450949549674988, | |
| "learning_rate": 0.00034262481752465293, | |
| "loss": 1.0629, | |
| "step": 17430 | |
| }, | |
| { | |
| "epoch": 0.8928937128814254, | |
| "grad_norm": 0.17378470301628113, | |
| "learning_rate": 0.00034128108287989866, | |
| "loss": 1.0322, | |
| "step": 17440 | |
| }, | |
| { | |
| "epoch": 0.8934056932213803, | |
| "grad_norm": 0.17379970848560333, | |
| "learning_rate": 0.00033994261820597885, | |
| "loss": 1.0553, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.8939176735613352, | |
| "grad_norm": 0.17971958220005035, | |
| "learning_rate": 0.00033860940283468143, | |
| "loss": 1.0532, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 0.8944296539012901, | |
| "grad_norm": 0.17435471713542938, | |
| "learning_rate": 0.0003372814161788526, | |
| "loss": 1.0289, | |
| "step": 17470 | |
| }, | |
| { | |
| "epoch": 0.8949416342412452, | |
| "grad_norm": 0.17900234460830688, | |
| "learning_rate": 0.00033595863773207914, | |
| "loss": 1.0407, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 0.8954536145812001, | |
| "grad_norm": 0.1703522503376007, | |
| "learning_rate": 0.00033464104706837144, | |
| "loss": 1.0505, | |
| "step": 17490 | |
| }, | |
| { | |
| "epoch": 0.895965594921155, | |
| "grad_norm": 0.1772749274969101, | |
| "learning_rate": 0.00033332862384184833, | |
| "loss": 1.0504, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8964775752611099, | |
| "grad_norm": 0.19156505167484283, | |
| "learning_rate": 0.0003320213477864227, | |
| "loss": 1.0537, | |
| "step": 17510 | |
| }, | |
| { | |
| "epoch": 0.896989555601065, | |
| "grad_norm": 0.17889319360256195, | |
| "learning_rate": 0.00033071919871548877, | |
| "loss": 1.0371, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 0.8975015359410199, | |
| "grad_norm": 0.17776621878147125, | |
| "learning_rate": 0.0003294221565216104, | |
| "loss": 1.0498, | |
| "step": 17530 | |
| }, | |
| { | |
| "epoch": 0.8980135162809748, | |
| "grad_norm": 0.1731380671262741, | |
| "learning_rate": 0.0003281302011762101, | |
| "loss": 1.048, | |
| "step": 17540 | |
| }, | |
| { | |
| "epoch": 0.8985254966209297, | |
| "grad_norm": 0.17784886062145233, | |
| "learning_rate": 0.0003268433127292607, | |
| "loss": 1.0477, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.8990374769608847, | |
| "grad_norm": 0.17313584685325623, | |
| "learning_rate": 0.00032556147130897615, | |
| "loss": 1.0323, | |
| "step": 17560 | |
| }, | |
| { | |
| "epoch": 0.8995494573008397, | |
| "grad_norm": 0.17907077074050903, | |
| "learning_rate": 0.00032428465712150536, | |
| "loss": 1.0527, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 0.9000614376407946, | |
| "grad_norm": 0.1737951934337616, | |
| "learning_rate": 0.0003230128504506268, | |
| "loss": 1.036, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 0.9005734179807495, | |
| "grad_norm": 0.17653332650661469, | |
| "learning_rate": 0.00032174603165744314, | |
| "loss": 1.0478, | |
| "step": 17590 | |
| }, | |
| { | |
| "epoch": 0.9010853983207044, | |
| "grad_norm": 0.16936801373958588, | |
| "learning_rate": 0.00032048418118007897, | |
| "loss": 1.0452, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.9015973786606595, | |
| "grad_norm": 0.17044688761234283, | |
| "learning_rate": 0.00031922727953337794, | |
| "loss": 1.0433, | |
| "step": 17610 | |
| }, | |
| { | |
| "epoch": 0.9021093590006144, | |
| "grad_norm": 0.16897530853748322, | |
| "learning_rate": 0.0003179753073086024, | |
| "loss": 1.041, | |
| "step": 17620 | |
| }, | |
| { | |
| "epoch": 0.9026213393405693, | |
| "grad_norm": 0.17904484272003174, | |
| "learning_rate": 0.00031672824517313354, | |
| "loss": 1.0562, | |
| "step": 17630 | |
| }, | |
| { | |
| "epoch": 0.9031333196805242, | |
| "grad_norm": 0.1729121208190918, | |
| "learning_rate": 0.0003154860738701725, | |
| "loss": 1.0345, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 0.9036453000204792, | |
| "grad_norm": 0.17275741696357727, | |
| "learning_rate": 0.00031424877421844385, | |
| "loss": 1.0494, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.9041572803604342, | |
| "grad_norm": 0.16756050288677216, | |
| "learning_rate": 0.0003130163271118985, | |
| "loss": 1.0305, | |
| "step": 17660 | |
| }, | |
| { | |
| "epoch": 0.9046692607003891, | |
| "grad_norm": 0.17867998778820038, | |
| "learning_rate": 0.00031178871351941924, | |
| "loss": 1.045, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 0.905181241040344, | |
| "grad_norm": 0.17364557087421417, | |
| "learning_rate": 0.00031056591448452663, | |
| "loss": 1.0407, | |
| "step": 17680 | |
| }, | |
| { | |
| "epoch": 0.905693221380299, | |
| "grad_norm": 0.18060193955898285, | |
| "learning_rate": 0.0003093479111250863, | |
| "loss": 1.0404, | |
| "step": 17690 | |
| }, | |
| { | |
| "epoch": 0.906205201720254, | |
| "grad_norm": 0.17321224510669708, | |
| "learning_rate": 0.0003081346846330176, | |
| "loss": 1.0338, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.9067171820602089, | |
| "grad_norm": 0.1827027052640915, | |
| "learning_rate": 0.0003069262162740026, | |
| "loss": 1.0513, | |
| "step": 17710 | |
| }, | |
| { | |
| "epoch": 0.9072291624001638, | |
| "grad_norm": 0.17330406606197357, | |
| "learning_rate": 0.0003057224873871977, | |
| "loss": 1.0537, | |
| "step": 17720 | |
| }, | |
| { | |
| "epoch": 0.9077411427401187, | |
| "grad_norm": 0.1664852797985077, | |
| "learning_rate": 0.00030452347938494435, | |
| "loss": 1.0385, | |
| "step": 17730 | |
| }, | |
| { | |
| "epoch": 0.9082531230800738, | |
| "grad_norm": 0.1791536808013916, | |
| "learning_rate": 0.00030332917375248324, | |
| "loss": 1.0205, | |
| "step": 17740 | |
| }, | |
| { | |
| "epoch": 0.9087651034200287, | |
| "grad_norm": 0.168918177485466, | |
| "learning_rate": 0.0003021395520476674, | |
| "loss": 1.0278, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.9092770837599836, | |
| "grad_norm": 0.17502665519714355, | |
| "learning_rate": 0.00030095459590067796, | |
| "loss": 1.0533, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 0.9097890640999385, | |
| "grad_norm": 0.17242580652236938, | |
| "learning_rate": 0.00029977428701374024, | |
| "loss": 1.0465, | |
| "step": 17770 | |
| }, | |
| { | |
| "epoch": 0.9103010444398935, | |
| "grad_norm": 0.16884900629520416, | |
| "learning_rate": 0.0002985986071608414, | |
| "loss": 1.0553, | |
| "step": 17780 | |
| }, | |
| { | |
| "epoch": 0.9108130247798485, | |
| "grad_norm": 0.17999139428138733, | |
| "learning_rate": 0.00029742753818744894, | |
| "loss": 1.052, | |
| "step": 17790 | |
| }, | |
| { | |
| "epoch": 0.9113250051198034, | |
| "grad_norm": 0.19205188751220703, | |
| "learning_rate": 0.0002962610620102301, | |
| "loss": 1.0386, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.9118369854597583, | |
| "grad_norm": 0.17089873552322388, | |
| "learning_rate": 0.00029509916061677314, | |
| "loss": 1.0519, | |
| "step": 17810 | |
| }, | |
| { | |
| "epoch": 0.9123489657997133, | |
| "grad_norm": 0.1669624298810959, | |
| "learning_rate": 0.0002939418160653087, | |
| "loss": 1.045, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 0.9128609461396683, | |
| "grad_norm": 0.1757606416940689, | |
| "learning_rate": 0.000292789010484433, | |
| "loss": 1.0311, | |
| "step": 17830 | |
| }, | |
| { | |
| "epoch": 0.9133729264796232, | |
| "grad_norm": 0.1726016104221344, | |
| "learning_rate": 0.00029164072607283187, | |
| "loss": 1.0302, | |
| "step": 17840 | |
| }, | |
| { | |
| "epoch": 0.9138849068195781, | |
| "grad_norm": 0.17893843352794647, | |
| "learning_rate": 0.0002904969450990057, | |
| "loss": 1.0236, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.914396887159533, | |
| "grad_norm": 0.17613349854946136, | |
| "learning_rate": 0.00028935764990099594, | |
| "loss": 1.0467, | |
| "step": 17860 | |
| }, | |
| { | |
| "epoch": 0.914908867499488, | |
| "grad_norm": 0.1762663722038269, | |
| "learning_rate": 0.00028822282288611204, | |
| "loss": 1.0143, | |
| "step": 17870 | |
| }, | |
| { | |
| "epoch": 0.915420847839443, | |
| "grad_norm": 0.17385472357273102, | |
| "learning_rate": 0.00028709244653066, | |
| "loss": 1.0373, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 0.9159328281793979, | |
| "grad_norm": 0.173353374004364, | |
| "learning_rate": 0.0002859665033796716, | |
| "loss": 1.0231, | |
| "step": 17890 | |
| }, | |
| { | |
| "epoch": 0.9164448085193528, | |
| "grad_norm": 0.1739385724067688, | |
| "learning_rate": 0.0002848449760466353, | |
| "loss": 1.0174, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.9169567888593078, | |
| "grad_norm": 0.17758533358573914, | |
| "learning_rate": 0.000283727847213227, | |
| "loss": 1.0271, | |
| "step": 17910 | |
| }, | |
| { | |
| "epoch": 0.9174687691992628, | |
| "grad_norm": 0.17424450814723969, | |
| "learning_rate": 0.00028261509962904325, | |
| "loss": 1.0464, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 0.9179807495392177, | |
| "grad_norm": 0.18018485605716705, | |
| "learning_rate": 0.0002815067161113347, | |
| "loss": 1.0379, | |
| "step": 17930 | |
| }, | |
| { | |
| "epoch": 0.9184927298791726, | |
| "grad_norm": 0.18166567385196686, | |
| "learning_rate": 0.0002804026795447407, | |
| "loss": 1.0364, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 0.9190047102191276, | |
| "grad_norm": 0.17235900461673737, | |
| "learning_rate": 0.00027930297288102513, | |
| "loss": 1.052, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.9195166905590826, | |
| "grad_norm": 0.17493902146816254, | |
| "learning_rate": 0.000278207579138813, | |
| "loss": 1.0377, | |
| "step": 17960 | |
| }, | |
| { | |
| "epoch": 0.9200286708990375, | |
| "grad_norm": 0.17957419157028198, | |
| "learning_rate": 0.0002771164814033282, | |
| "loss": 1.0392, | |
| "step": 17970 | |
| }, | |
| { | |
| "epoch": 0.9205406512389924, | |
| "grad_norm": 0.178439199924469, | |
| "learning_rate": 0.00027602966282613264, | |
| "loss": 1.0333, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 0.9210526315789473, | |
| "grad_norm": 0.17528565227985382, | |
| "learning_rate": 0.0002749471066248655, | |
| "loss": 1.035, | |
| "step": 17990 | |
| }, | |
| { | |
| "epoch": 0.9215646119189023, | |
| "grad_norm": 0.18786676228046417, | |
| "learning_rate": 0.0002738687960829849, | |
| "loss": 1.0263, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.9220765922588573, | |
| "grad_norm": 0.18565250933170319, | |
| "learning_rate": 0.00027279471454950873, | |
| "loss": 1.0266, | |
| "step": 18010 | |
| }, | |
| { | |
| "epoch": 0.9225885725988122, | |
| "grad_norm": 0.17576780915260315, | |
| "learning_rate": 0.00027172484543875865, | |
| "loss": 1.0472, | |
| "step": 18020 | |
| }, | |
| { | |
| "epoch": 0.9231005529387671, | |
| "grad_norm": 0.17549046874046326, | |
| "learning_rate": 0.00027065917223010303, | |
| "loss": 1.0357, | |
| "step": 18030 | |
| }, | |
| { | |
| "epoch": 0.9236125332787221, | |
| "grad_norm": 0.17524850368499756, | |
| "learning_rate": 0.00026959767846770227, | |
| "loss": 1.0194, | |
| "step": 18040 | |
| }, | |
| { | |
| "epoch": 0.9241245136186771, | |
| "grad_norm": 0.18681474030017853, | |
| "learning_rate": 0.00026854034776025495, | |
| "loss": 1.0406, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.924636493958632, | |
| "grad_norm": 0.1830626130104065, | |
| "learning_rate": 0.000267487163780744, | |
| "loss": 1.0445, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 0.9251484742985869, | |
| "grad_norm": 0.1787140816450119, | |
| "learning_rate": 0.00026643811026618537, | |
| "loss": 1.0365, | |
| "step": 18070 | |
| }, | |
| { | |
| "epoch": 0.9256604546385419, | |
| "grad_norm": 0.1781841665506363, | |
| "learning_rate": 0.00026539317101737637, | |
| "loss": 1.0278, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 0.9261724349784968, | |
| "grad_norm": 0.18114568293094635, | |
| "learning_rate": 0.00026435232989864576, | |
| "loss": 1.0273, | |
| "step": 18090 | |
| }, | |
| { | |
| "epoch": 0.9266844153184518, | |
| "grad_norm": 0.18065612018108368, | |
| "learning_rate": 0.0002633155708376045, | |
| "loss": 1.0435, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.9271963956584067, | |
| "grad_norm": 0.17828424274921417, | |
| "learning_rate": 0.0002622828778248974, | |
| "loss": 1.0103, | |
| "step": 18110 | |
| }, | |
| { | |
| "epoch": 0.9277083759983616, | |
| "grad_norm": 0.17807289958000183, | |
| "learning_rate": 0.0002612542349139565, | |
| "loss": 1.0437, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 0.9282203563383166, | |
| "grad_norm": 0.17496445775032043, | |
| "learning_rate": 0.0002602296262207541, | |
| "loss": 1.0219, | |
| "step": 18130 | |
| }, | |
| { | |
| "epoch": 0.9287323366782716, | |
| "grad_norm": 0.17806415259838104, | |
| "learning_rate": 0.00025920903592355785, | |
| "loss": 1.0256, | |
| "step": 18140 | |
| }, | |
| { | |
| "epoch": 0.9292443170182265, | |
| "grad_norm": 0.17231720685958862, | |
| "learning_rate": 0.00025819244826268654, | |
| "loss": 1.0487, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.9297562973581814, | |
| "grad_norm": 0.18158575892448425, | |
| "learning_rate": 0.00025717984754026655, | |
| "loss": 1.0258, | |
| "step": 18160 | |
| }, | |
| { | |
| "epoch": 0.9302682776981364, | |
| "grad_norm": 0.17217537760734558, | |
| "learning_rate": 0.0002561712181199894, | |
| "loss": 1.012, | |
| "step": 18170 | |
| }, | |
| { | |
| "epoch": 0.9307802580380914, | |
| "grad_norm": 0.16844135522842407, | |
| "learning_rate": 0.0002551665444268703, | |
| "loss": 1.0449, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 0.9312922383780463, | |
| "grad_norm": 0.17478111386299133, | |
| "learning_rate": 0.0002541658109470081, | |
| "loss": 1.0357, | |
| "step": 18190 | |
| }, | |
| { | |
| "epoch": 0.9318042187180012, | |
| "grad_norm": 0.17291343212127686, | |
| "learning_rate": 0.00025316900222734496, | |
| "loss": 1.0406, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.9323161990579562, | |
| "grad_norm": 0.17205969989299774, | |
| "learning_rate": 0.00025217610287542845, | |
| "loss": 1.0263, | |
| "step": 18210 | |
| }, | |
| { | |
| "epoch": 0.9328281793979111, | |
| "grad_norm": 0.17579463124275208, | |
| "learning_rate": 0.0002511870975591733, | |
| "loss": 1.0487, | |
| "step": 18220 | |
| }, | |
| { | |
| "epoch": 0.9333401597378661, | |
| "grad_norm": 0.185591459274292, | |
| "learning_rate": 0.00025020197100662507, | |
| "loss": 1.0289, | |
| "step": 18230 | |
| }, | |
| { | |
| "epoch": 0.933852140077821, | |
| "grad_norm": 0.18697933852672577, | |
| "learning_rate": 0.0002492207080057241, | |
| "loss": 1.0445, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 0.934364120417776, | |
| "grad_norm": 0.1702352613210678, | |
| "learning_rate": 0.00024824329340407056, | |
| "loss": 1.017, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.9348761007577309, | |
| "grad_norm": 0.17386525869369507, | |
| "learning_rate": 0.0002472697121086907, | |
| "loss": 1.0265, | |
| "step": 18260 | |
| }, | |
| { | |
| "epoch": 0.9353880810976859, | |
| "grad_norm": 0.17194058001041412, | |
| "learning_rate": 0.0002462999490858035, | |
| "loss": 1.0305, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 0.9359000614376408, | |
| "grad_norm": 0.17600733041763306, | |
| "learning_rate": 0.00024533398936058893, | |
| "loss": 1.0161, | |
| "step": 18280 | |
| }, | |
| { | |
| "epoch": 0.9364120417775957, | |
| "grad_norm": 0.17031820118427277, | |
| "learning_rate": 0.0002443718180169563, | |
| "loss": 1.0435, | |
| "step": 18290 | |
| }, | |
| { | |
| "epoch": 0.9369240221175507, | |
| "grad_norm": 0.17277632653713226, | |
| "learning_rate": 0.00024341342019731398, | |
| "loss": 1.0321, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.9374360024575056, | |
| "grad_norm": 0.17314958572387695, | |
| "learning_rate": 0.00024245878110234033, | |
| "loss": 1.0419, | |
| "step": 18310 | |
| }, | |
| { | |
| "epoch": 0.9379479827974606, | |
| "grad_norm": 0.17943693697452545, | |
| "learning_rate": 0.0002415078859907547, | |
| "loss": 1.0455, | |
| "step": 18320 | |
| }, | |
| { | |
| "epoch": 0.9384599631374155, | |
| "grad_norm": 0.17218518257141113, | |
| "learning_rate": 0.00024056072017909026, | |
| "loss": 1.0174, | |
| "step": 18330 | |
| }, | |
| { | |
| "epoch": 0.9389719434773705, | |
| "grad_norm": 0.1672009378671646, | |
| "learning_rate": 0.0002396172690414667, | |
| "loss": 1.0304, | |
| "step": 18340 | |
| }, | |
| { | |
| "epoch": 0.9394839238173254, | |
| "grad_norm": 0.16872192919254303, | |
| "learning_rate": 0.00023867751800936513, | |
| "loss": 1.0334, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.9399959041572804, | |
| "grad_norm": 0.17709334194660187, | |
| "learning_rate": 0.0002377414525714023, | |
| "loss": 1.043, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 0.9405078844972353, | |
| "grad_norm": 0.17235656082630157, | |
| "learning_rate": 0.00023680905827310717, | |
| "loss": 1.0296, | |
| "step": 18370 | |
| }, | |
| { | |
| "epoch": 0.9410198648371902, | |
| "grad_norm": 0.17677216231822968, | |
| "learning_rate": 0.0002358803207166974, | |
| "loss": 1.0304, | |
| "step": 18380 | |
| }, | |
| { | |
| "epoch": 0.9415318451771452, | |
| "grad_norm": 0.17921361327171326, | |
| "learning_rate": 0.00023495522556085693, | |
| "loss": 1.0287, | |
| "step": 18390 | |
| }, | |
| { | |
| "epoch": 0.9420438255171002, | |
| "grad_norm": 0.18774552643299103, | |
| "learning_rate": 0.0002340337585205149, | |
| "loss": 1.0303, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.9425558058570551, | |
| "grad_norm": 0.1885557323694229, | |
| "learning_rate": 0.00023311590536662463, | |
| "loss": 1.0225, | |
| "step": 18410 | |
| }, | |
| { | |
| "epoch": 0.94306778619701, | |
| "grad_norm": 0.17091277241706848, | |
| "learning_rate": 0.00023220165192594432, | |
| "loss": 1.0216, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 0.943579766536965, | |
| "grad_norm": 0.17530862987041473, | |
| "learning_rate": 0.00023129098408081777, | |
| "loss": 1.0303, | |
| "step": 18430 | |
| }, | |
| { | |
| "epoch": 0.9440917468769199, | |
| "grad_norm": 0.17937549948692322, | |
| "learning_rate": 0.00023038388776895662, | |
| "loss": 1.0234, | |
| "step": 18440 | |
| }, | |
| { | |
| "epoch": 0.9446037272168749, | |
| "grad_norm": 0.1720314472913742, | |
| "learning_rate": 0.00022948034898322335, | |
| "loss": 1.0304, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.9451157075568298, | |
| "grad_norm": 0.1731894463300705, | |
| "learning_rate": 0.00022858035377141452, | |
| "loss": 1.021, | |
| "step": 18460 | |
| }, | |
| { | |
| "epoch": 0.9456276878967848, | |
| "grad_norm": 0.17468558251857758, | |
| "learning_rate": 0.00022768388823604584, | |
| "loss": 1.0224, | |
| "step": 18470 | |
| }, | |
| { | |
| "epoch": 0.9461396682367397, | |
| "grad_norm": 0.17135438323020935, | |
| "learning_rate": 0.00022679093853413717, | |
| "loss": 1.0392, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 0.9466516485766947, | |
| "grad_norm": 0.1784532517194748, | |
| "learning_rate": 0.00022590149087699918, | |
| "loss": 1.0183, | |
| "step": 18490 | |
| }, | |
| { | |
| "epoch": 0.9471636289166496, | |
| "grad_norm": 0.18522332608699799, | |
| "learning_rate": 0.00022501553153001985, | |
| "loss": 1.0361, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.9476756092566045, | |
| "grad_norm": 0.18401268124580383, | |
| "learning_rate": 0.00022413304681245284, | |
| "loss": 1.0329, | |
| "step": 18510 | |
| }, | |
| { | |
| "epoch": 0.9481875895965595, | |
| "grad_norm": 0.16760528087615967, | |
| "learning_rate": 0.00022325402309720624, | |
| "loss": 1.0199, | |
| "step": 18520 | |
| }, | |
| { | |
| "epoch": 0.9486995699365144, | |
| "grad_norm": 0.18120263516902924, | |
| "learning_rate": 0.00022237844681063175, | |
| "loss": 1.0252, | |
| "step": 18530 | |
| }, | |
| { | |
| "epoch": 0.9492115502764694, | |
| "grad_norm": 0.1899506002664566, | |
| "learning_rate": 0.00022150630443231562, | |
| "loss": 1.0064, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 0.9497235306164243, | |
| "grad_norm": 0.1819719672203064, | |
| "learning_rate": 0.00022063758249486932, | |
| "loss": 1.0246, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.9502355109563793, | |
| "grad_norm": 0.17660754919052124, | |
| "learning_rate": 0.00021977226758372213, | |
| "loss": 1.0305, | |
| "step": 18560 | |
| }, | |
| { | |
| "epoch": 0.9507474912963342, | |
| "grad_norm": 0.17415086925029755, | |
| "learning_rate": 0.00021891034633691347, | |
| "loss": 1.0369, | |
| "step": 18570 | |
| }, | |
| { | |
| "epoch": 0.9512594716362892, | |
| "grad_norm": 0.17310403287410736, | |
| "learning_rate": 0.00021805180544488684, | |
| "loss": 1.0272, | |
| "step": 18580 | |
| }, | |
| { | |
| "epoch": 0.9517714519762441, | |
| "grad_norm": 0.17484420537948608, | |
| "learning_rate": 0.0002171966316502845, | |
| "loss": 1.028, | |
| "step": 18590 | |
| }, | |
| { | |
| "epoch": 0.952283432316199, | |
| "grad_norm": 0.18543212115764618, | |
| "learning_rate": 0.00021634481174774217, | |
| "loss": 1.0296, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.952795412656154, | |
| "grad_norm": 0.1763850450515747, | |
| "learning_rate": 0.00021549633258368582, | |
| "loss": 1.0307, | |
| "step": 18610 | |
| }, | |
| { | |
| "epoch": 0.953307392996109, | |
| "grad_norm": 0.16824059188365936, | |
| "learning_rate": 0.00021465118105612805, | |
| "loss": 1.0206, | |
| "step": 18620 | |
| }, | |
| { | |
| "epoch": 0.9538193733360639, | |
| "grad_norm": 0.17931176722049713, | |
| "learning_rate": 0.00021380934411446574, | |
| "loss": 1.016, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 0.9543313536760188, | |
| "grad_norm": 0.18147091567516327, | |
| "learning_rate": 0.00021297080875927913, | |
| "loss": 1.0211, | |
| "step": 18640 | |
| }, | |
| { | |
| "epoch": 0.9548433340159738, | |
| "grad_norm": 0.18163631856441498, | |
| "learning_rate": 0.00021213556204213033, | |
| "loss": 1.0263, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.9553553143559287, | |
| "grad_norm": 0.17591601610183716, | |
| "learning_rate": 0.00021130359106536384, | |
| "loss": 1.0417, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 0.9558672946958837, | |
| "grad_norm": 0.17677730321884155, | |
| "learning_rate": 0.00021047488298190723, | |
| "loss": 1.0299, | |
| "step": 18670 | |
| }, | |
| { | |
| "epoch": 0.9563792750358386, | |
| "grad_norm": 0.17326125502586365, | |
| "learning_rate": 0.0002096494249950729, | |
| "loss": 1.0268, | |
| "step": 18680 | |
| }, | |
| { | |
| "epoch": 0.9568912553757936, | |
| "grad_norm": 0.1793946474790573, | |
| "learning_rate": 0.00020882720435836026, | |
| "loss": 1.0355, | |
| "step": 18690 | |
| }, | |
| { | |
| "epoch": 0.9574032357157485, | |
| "grad_norm": 0.1703524887561798, | |
| "learning_rate": 0.00020800820837525892, | |
| "loss": 1.005, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.9579152160557035, | |
| "grad_norm": 0.17965586483478546, | |
| "learning_rate": 0.000207192424399053, | |
| "loss": 1.0182, | |
| "step": 18710 | |
| }, | |
| { | |
| "epoch": 0.9584271963956584, | |
| "grad_norm": 0.16650822758674622, | |
| "learning_rate": 0.00020637983983262526, | |
| "loss": 1.0304, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 0.9589391767356134, | |
| "grad_norm": 0.1700984239578247, | |
| "learning_rate": 0.00020557044212826323, | |
| "loss": 1.0103, | |
| "step": 18730 | |
| }, | |
| { | |
| "epoch": 0.9594511570755683, | |
| "grad_norm": 0.18094299733638763, | |
| "learning_rate": 0.0002047642187874647, | |
| "loss": 1.0247, | |
| "step": 18740 | |
| }, | |
| { | |
| "epoch": 0.9599631374155232, | |
| "grad_norm": 0.16972561180591583, | |
| "learning_rate": 0.0002039611573607455, | |
| "loss": 1.0328, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.9604751177554782, | |
| "grad_norm": 0.1718764752149582, | |
| "learning_rate": 0.0002031612454474467, | |
| "loss": 1.0015, | |
| "step": 18760 | |
| }, | |
| { | |
| "epoch": 0.9609870980954331, | |
| "grad_norm": 0.17211291193962097, | |
| "learning_rate": 0.00020236447069554324, | |
| "loss": 1.0485, | |
| "step": 18770 | |
| }, | |
| { | |
| "epoch": 0.9614990784353881, | |
| "grad_norm": 0.17325459420681, | |
| "learning_rate": 0.00020157082080145356, | |
| "loss": 1.0122, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 0.962011058775343, | |
| "grad_norm": 0.1677115559577942, | |
| "learning_rate": 0.00020078028350984888, | |
| "loss": 1.0144, | |
| "step": 18790 | |
| }, | |
| { | |
| "epoch": 0.962523039115298, | |
| "grad_norm": 0.17302511632442474, | |
| "learning_rate": 0.00019999284661346487, | |
| "loss": 1.0247, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.9630350194552529, | |
| "grad_norm": 0.1713932901620865, | |
| "learning_rate": 0.00019920849795291223, | |
| "loss": 1.0135, | |
| "step": 18810 | |
| }, | |
| { | |
| "epoch": 0.9635469997952079, | |
| "grad_norm": 0.1779249906539917, | |
| "learning_rate": 0.00019842722541648977, | |
| "loss": 1.0166, | |
| "step": 18820 | |
| }, | |
| { | |
| "epoch": 0.9640589801351628, | |
| "grad_norm": 0.17072229087352753, | |
| "learning_rate": 0.00019764901693999665, | |
| "loss": 1.0214, | |
| "step": 18830 | |
| }, | |
| { | |
| "epoch": 0.9645709604751177, | |
| "grad_norm": 0.17682915925979614, | |
| "learning_rate": 0.00019687386050654655, | |
| "loss": 1.0412, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 0.9650829408150727, | |
| "grad_norm": 0.17209376394748688, | |
| "learning_rate": 0.00019610174414638203, | |
| "loss": 1.0139, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.9655949211550277, | |
| "grad_norm": 0.16988667845726013, | |
| "learning_rate": 0.0001953326559366896, | |
| "loss": 1.03, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 0.9661069014949826, | |
| "grad_norm": 0.17056208848953247, | |
| "learning_rate": 0.0001945665840014157, | |
| "loss": 1.0335, | |
| "step": 18870 | |
| }, | |
| { | |
| "epoch": 0.9666188818349375, | |
| "grad_norm": 0.17054276168346405, | |
| "learning_rate": 0.0001938035165110831, | |
| "loss": 1.0281, | |
| "step": 18880 | |
| }, | |
| { | |
| "epoch": 0.9671308621748925, | |
| "grad_norm": 0.17490647733211517, | |
| "learning_rate": 0.00019304344168260865, | |
| "loss": 1.0401, | |
| "step": 18890 | |
| }, | |
| { | |
| "epoch": 0.9676428425148474, | |
| "grad_norm": 0.17823657393455505, | |
| "learning_rate": 0.00019228634777912089, | |
| "loss": 1.0225, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.9681548228548024, | |
| "grad_norm": 0.1651022583246231, | |
| "learning_rate": 0.00019153222310977906, | |
| "loss": 1.0088, | |
| "step": 18910 | |
| }, | |
| { | |
| "epoch": 0.9686668031947573, | |
| "grad_norm": 0.18135780096054077, | |
| "learning_rate": 0.00019078105602959264, | |
| "loss": 1.0289, | |
| "step": 18920 | |
| }, | |
| { | |
| "epoch": 0.9691787835347123, | |
| "grad_norm": 0.17016355693340302, | |
| "learning_rate": 0.00019003283493924117, | |
| "loss": 1.0111, | |
| "step": 18930 | |
| }, | |
| { | |
| "epoch": 0.9696907638746672, | |
| "grad_norm": 0.17754383385181427, | |
| "learning_rate": 0.00018928754828489555, | |
| "loss": 1.0291, | |
| "step": 18940 | |
| }, | |
| { | |
| "epoch": 0.9702027442146222, | |
| "grad_norm": 0.16962246596813202, | |
| "learning_rate": 0.00018854518455803946, | |
| "loss": 1.0228, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.9707147245545771, | |
| "grad_norm": 0.17820075154304504, | |
| "learning_rate": 0.00018780573229529142, | |
| "loss": 1.0231, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 0.971226704894532, | |
| "grad_norm": 0.16597416996955872, | |
| "learning_rate": 0.00018706918007822834, | |
| "loss": 1.0327, | |
| "step": 18970 | |
| }, | |
| { | |
| "epoch": 0.971738685234487, | |
| "grad_norm": 0.17721499502658844, | |
| "learning_rate": 0.00018633551653320852, | |
| "loss": 1.0084, | |
| "step": 18980 | |
| }, | |
| { | |
| "epoch": 0.972250665574442, | |
| "grad_norm": 0.17141114175319672, | |
| "learning_rate": 0.0001856047303311967, | |
| "loss": 1.0361, | |
| "step": 18990 | |
| }, | |
| { | |
| "epoch": 0.9727626459143969, | |
| "grad_norm": 0.17473644018173218, | |
| "learning_rate": 0.0001848768101875884, | |
| "loss": 1.0051, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.9732746262543518, | |
| "grad_norm": 0.17746561765670776, | |
| "learning_rate": 0.00018415174486203638, | |
| "loss": 1.0266, | |
| "step": 19010 | |
| }, | |
| { | |
| "epoch": 0.9737866065943068, | |
| "grad_norm": 0.16750702261924744, | |
| "learning_rate": 0.00018342952315827656, | |
| "loss": 1.0282, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 0.9742985869342617, | |
| "grad_norm": 0.1748443841934204, | |
| "learning_rate": 0.00018271013392395522, | |
| "loss": 1.0183, | |
| "step": 19030 | |
| }, | |
| { | |
| "epoch": 0.9748105672742167, | |
| "grad_norm": 0.17715822160243988, | |
| "learning_rate": 0.0001819935660504572, | |
| "loss": 1.0145, | |
| "step": 19040 | |
| }, | |
| { | |
| "epoch": 0.9753225476141716, | |
| "grad_norm": 0.17972363531589508, | |
| "learning_rate": 0.0001812798084727336, | |
| "loss": 1.0069, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.9758345279541265, | |
| "grad_norm": 0.17496472597122192, | |
| "learning_rate": 0.00018056885016913175, | |
| "loss": 1.0074, | |
| "step": 19060 | |
| }, | |
| { | |
| "epoch": 0.9763465082940815, | |
| "grad_norm": 0.18323951959609985, | |
| "learning_rate": 0.00017986068016122433, | |
| "loss": 1.0487, | |
| "step": 19070 | |
| }, | |
| { | |
| "epoch": 0.9768584886340365, | |
| "grad_norm": 0.16890741884708405, | |
| "learning_rate": 0.00017915528751364033, | |
| "loss": 1.0153, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 0.9773704689739914, | |
| "grad_norm": 0.17116831243038177, | |
| "learning_rate": 0.0001784526613338959, | |
| "loss": 1.0132, | |
| "step": 19090 | |
| }, | |
| { | |
| "epoch": 0.9778824493139463, | |
| "grad_norm": 0.17036503553390503, | |
| "learning_rate": 0.00017775279077222617, | |
| "loss": 1.0228, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.9783944296539013, | |
| "grad_norm": 0.17859075963497162, | |
| "learning_rate": 0.00017705566502141802, | |
| "loss": 1.0123, | |
| "step": 19110 | |
| }, | |
| { | |
| "epoch": 0.9789064099938563, | |
| "grad_norm": 0.17719532549381256, | |
| "learning_rate": 0.00017636127331664266, | |
| "loss": 1.0385, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 0.9794183903338112, | |
| "grad_norm": 0.17673194408416748, | |
| "learning_rate": 0.00017566960493528995, | |
| "loss": 1.0224, | |
| "step": 19130 | |
| }, | |
| { | |
| "epoch": 0.9799303706737661, | |
| "grad_norm": 0.1806950718164444, | |
| "learning_rate": 0.00017498064919680242, | |
| "loss": 1.0111, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 0.9804423510137211, | |
| "grad_norm": 0.16843082010746002, | |
| "learning_rate": 0.00017429439546251066, | |
| "loss": 1.0059, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.980954331353676, | |
| "grad_norm": 0.17275168001651764, | |
| "learning_rate": 0.00017361083313546875, | |
| "loss": 1.037, | |
| "step": 19160 | |
| }, | |
| { | |
| "epoch": 0.981466311693631, | |
| "grad_norm": 0.17250047624111176, | |
| "learning_rate": 0.0001729299516602907, | |
| "loss": 1.0193, | |
| "step": 19170 | |
| }, | |
| { | |
| "epoch": 0.9819782920335859, | |
| "grad_norm": 0.17009197175502777, | |
| "learning_rate": 0.00017225174052298777, | |
| "loss": 1.0412, | |
| "step": 19180 | |
| }, | |
| { | |
| "epoch": 0.9824902723735408, | |
| "grad_norm": 0.16845643520355225, | |
| "learning_rate": 0.0001715761892508056, | |
| "loss": 1.0268, | |
| "step": 19190 | |
| }, | |
| { | |
| "epoch": 0.9830022527134958, | |
| "grad_norm": 0.16763417422771454, | |
| "learning_rate": 0.0001709032874120629, | |
| "loss": 1.0425, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.9835142330534508, | |
| "grad_norm": 0.1747148334980011, | |
| "learning_rate": 0.00017023302461599015, | |
| "loss": 1.0228, | |
| "step": 19210 | |
| }, | |
| { | |
| "epoch": 0.9840262133934057, | |
| "grad_norm": 0.17626087367534637, | |
| "learning_rate": 0.0001695653905125693, | |
| "loss": 1.0142, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 0.9845381937333606, | |
| "grad_norm": 0.17711155116558075, | |
| "learning_rate": 0.00016890037479237377, | |
| "loss": 1.0238, | |
| "step": 19230 | |
| }, | |
| { | |
| "epoch": 0.9850501740733156, | |
| "grad_norm": 0.1858174353837967, | |
| "learning_rate": 0.00016823796718640937, | |
| "loss": 1.033, | |
| "step": 19240 | |
| }, | |
| { | |
| "epoch": 0.9855621544132706, | |
| "grad_norm": 0.1855236142873764, | |
| "learning_rate": 0.0001675781574659558, | |
| "loss": 1.0276, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.9860741347532255, | |
| "grad_norm": 0.16916634142398834, | |
| "learning_rate": 0.0001669209354424084, | |
| "loss": 1.0208, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 0.9865861150931804, | |
| "grad_norm": 0.18142545223236084, | |
| "learning_rate": 0.00016626629096712137, | |
| "loss": 1.0302, | |
| "step": 19270 | |
| }, | |
| { | |
| "epoch": 0.9870980954331353, | |
| "grad_norm": 0.16748617589473724, | |
| "learning_rate": 0.00016561421393125036, | |
| "loss": 1.0244, | |
| "step": 19280 | |
| }, | |
| { | |
| "epoch": 0.9876100757730903, | |
| "grad_norm": 0.180519700050354, | |
| "learning_rate": 0.000164964694265597, | |
| "loss": 1.0009, | |
| "step": 19290 | |
| }, | |
| { | |
| "epoch": 0.9881220561130453, | |
| "grad_norm": 0.16856172680854797, | |
| "learning_rate": 0.00016431772194045298, | |
| "loss": 1.009, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.9886340364530002, | |
| "grad_norm": 0.17907920479774475, | |
| "learning_rate": 0.00016367328696544536, | |
| "loss": 1.0182, | |
| "step": 19310 | |
| }, | |
| { | |
| "epoch": 0.9891460167929551, | |
| "grad_norm": 0.18012414872646332, | |
| "learning_rate": 0.00016303137938938238, | |
| "loss": 1.0238, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 0.9896579971329101, | |
| "grad_norm": 0.17940422892570496, | |
| "learning_rate": 0.0001623919893000996, | |
| "loss": 1.035, | |
| "step": 19330 | |
| }, | |
| { | |
| "epoch": 0.9901699774728651, | |
| "grad_norm": 0.17534732818603516, | |
| "learning_rate": 0.00016175510682430694, | |
| "loss": 1.0282, | |
| "step": 19340 | |
| }, | |
| { | |
| "epoch": 0.99068195781282, | |
| "grad_norm": 0.17742076516151428, | |
| "learning_rate": 0.0001611207221274363, | |
| "loss": 1.0308, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.9911939381527749, | |
| "grad_norm": 0.174584299325943, | |
| "learning_rate": 0.00016048882541348943, | |
| "loss": 1.0433, | |
| "step": 19360 | |
| }, | |
| { | |
| "epoch": 0.9917059184927299, | |
| "grad_norm": 0.17817029356956482, | |
| "learning_rate": 0.00015985940692488709, | |
| "loss": 1.0088, | |
| "step": 19370 | |
| }, | |
| { | |
| "epoch": 0.9922178988326849, | |
| "grad_norm": 0.1764860898256302, | |
| "learning_rate": 0.00015923245694231792, | |
| "loss": 1.0051, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 0.9927298791726398, | |
| "grad_norm": 0.1679990142583847, | |
| "learning_rate": 0.00015860796578458873, | |
| "loss": 1.0383, | |
| "step": 19390 | |
| }, | |
| { | |
| "epoch": 0.9932418595125947, | |
| "grad_norm": 0.17141203582286835, | |
| "learning_rate": 0.00015798592380847468, | |
| "loss": 1.0367, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.9937538398525496, | |
| "grad_norm": 0.17301303148269653, | |
| "learning_rate": 0.00015736632140857067, | |
| "loss": 1.0227, | |
| "step": 19410 | |
| }, | |
| { | |
| "epoch": 0.9942658201925046, | |
| "grad_norm": 0.17585515975952148, | |
| "learning_rate": 0.00015674914901714278, | |
| "loss": 1.0373, | |
| "step": 19420 | |
| }, | |
| { | |
| "epoch": 0.9947778005324596, | |
| "grad_norm": 0.17036980390548706, | |
| "learning_rate": 0.0001561343971039807, | |
| "loss": 1.0025, | |
| "step": 19430 | |
| }, | |
| { | |
| "epoch": 0.9952897808724145, | |
| "grad_norm": 0.1802191138267517, | |
| "learning_rate": 0.00015552205617625053, | |
| "loss": 1.0378, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 0.9958017612123694, | |
| "grad_norm": 0.17641904950141907, | |
| "learning_rate": 0.000154912116778348, | |
| "loss": 1.0317, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.9963137415523244, | |
| "grad_norm": 0.18595443665981293, | |
| "learning_rate": 0.0001543045694917528, | |
| "loss": 1.0081, | |
| "step": 19460 | |
| }, | |
| { | |
| "epoch": 0.9968257218922794, | |
| "grad_norm": 0.17444072663784027, | |
| "learning_rate": 0.0001536994049348828, | |
| "loss": 1.0242, | |
| "step": 19470 | |
| }, | |
| { | |
| "epoch": 0.9973377022322343, | |
| "grad_norm": 0.17894035577774048, | |
| "learning_rate": 0.00015309661376294953, | |
| "loss": 1.0269, | |
| "step": 19480 | |
| }, | |
| { | |
| "epoch": 0.9978496825721892, | |
| "grad_norm": 0.17125560343265533, | |
| "learning_rate": 0.00015249618666781352, | |
| "loss": 1.0189, | |
| "step": 19490 | |
| }, | |
| { | |
| "epoch": 0.9983616629121441, | |
| "grad_norm": 0.1681634485721588, | |
| "learning_rate": 0.0001518981143778408, | |
| "loss": 1.0014, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.9988736432520992, | |
| "grad_norm": 0.17360231280326843, | |
| "learning_rate": 0.0001513023876577597, | |
| "loss": 1.0033, | |
| "step": 19510 | |
| }, | |
| { | |
| "epoch": 0.9993856235920541, | |
| "grad_norm": 0.17242667078971863, | |
| "learning_rate": 0.00015070899730851815, | |
| "loss": 1.0236, | |
| "step": 19520 | |
| }, | |
| { | |
| "epoch": 0.999897603932009, | |
| "grad_norm": 0.16095665097236633, | |
| "learning_rate": 0.0001501179341671418, | |
| "loss": 1.0393, | |
| "step": 19530 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 19532, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8288897328545792e+17, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |