| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032, |
| "grad_norm": 16.167704755253098, |
| "learning_rate": 1.437699680511182e-07, |
| "loss": 0.6528051853179931, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 15.890120546753822, |
| "learning_rate": 3.0351437699680514e-07, |
| "loss": 0.6462714195251464, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 14.94996510180698, |
| "learning_rate": 4.6325878594249205e-07, |
| "loss": 0.6038930416107178, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 7.595956825837255, |
| "learning_rate": 6.230031948881789e-07, |
| "loss": 0.49077792167663575, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.026643067758099, |
| "learning_rate": 7.82747603833866e-07, |
| "loss": 0.3725566864013672, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 1.45050871801394, |
| "learning_rate": 9.424920127795528e-07, |
| "loss": 0.3130798816680908, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 0.7098603642718405, |
| "learning_rate": 1.1022364217252397e-06, |
| "loss": 0.29621334075927735, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 0.6027577608327673, |
| "learning_rate": 1.2619808306709266e-06, |
| "loss": 0.27455599308013917, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 0.6521596145147045, |
| "learning_rate": 1.4217252396166134e-06, |
| "loss": 0.2667043447494507, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.5069890685833461, |
| "learning_rate": 1.5814696485623005e-06, |
| "loss": 0.26807360649108886, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 0.5470393023746721, |
| "learning_rate": 1.7412140575079875e-06, |
| "loss": 0.26680865287780764, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.5543553869620175, |
| "learning_rate": 1.9009584664536742e-06, |
| "loss": 0.25434055328369143, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 0.5420531484574165, |
| "learning_rate": 2.060702875399361e-06, |
| "loss": 0.25767529010772705, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 0.645702037816744, |
| "learning_rate": 2.220447284345048e-06, |
| "loss": 0.24863953590393068, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.6143136416629473, |
| "learning_rate": 2.380191693290735e-06, |
| "loss": 0.24553947448730468, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 0.5094817219127052, |
| "learning_rate": 2.539936102236422e-06, |
| "loss": 0.2415369987487793, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 0.6291606522275387, |
| "learning_rate": 2.699680511182109e-06, |
| "loss": 0.24887418746948242, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 0.6248895072998087, |
| "learning_rate": 2.8594249201277955e-06, |
| "loss": 0.2414403438568115, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 0.6640745861299296, |
| "learning_rate": 3.0191693290734825e-06, |
| "loss": 0.24553894996643066, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.6136916428260776, |
| "learning_rate": 3.17891373801917e-06, |
| "loss": 0.24655485153198242, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 0.6572881584027297, |
| "learning_rate": 3.3386581469648564e-06, |
| "loss": 0.2433255910873413, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 0.6365580690264084, |
| "learning_rate": 3.4984025559105434e-06, |
| "loss": 0.23687341213226318, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.6771736107097397, |
| "learning_rate": 3.6581469648562303e-06, |
| "loss": 0.23829469680786133, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 0.6990706788858505, |
| "learning_rate": 3.817891373801918e-06, |
| "loss": 0.23471264839172362, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.6029376877872676, |
| "learning_rate": 3.977635782747604e-06, |
| "loss": 0.23215394020080565, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 0.6082124769869354, |
| "learning_rate": 4.137380191693291e-06, |
| "loss": 0.2326298713684082, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 0.7069824323872274, |
| "learning_rate": 4.297124600638978e-06, |
| "loss": 0.23525137901306153, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 0.6697633994539672, |
| "learning_rate": 4.456869009584665e-06, |
| "loss": 0.23122966289520264, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 0.5896144959913211, |
| "learning_rate": 4.616613418530352e-06, |
| "loss": 0.2369994878768921, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.6202443536122002, |
| "learning_rate": 4.776357827476039e-06, |
| "loss": 0.23878774642944336, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 0.654740818437731, |
| "learning_rate": 4.936102236421725e-06, |
| "loss": 0.22523627281188965, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.5332231058888761, |
| "learning_rate": 4.999943833158769e-06, |
| "loss": 0.22634780406951904, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 0.5353007164619794, |
| "learning_rate": 4.999600600490783e-06, |
| "loss": 0.23276047706604003, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 0.53617134295571, |
| "learning_rate": 4.9989453817439345e-06, |
| "loss": 0.22672569751739502, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.5149149938648103, |
| "learning_rate": 4.997978258698942e-06, |
| "loss": 0.22631363868713378, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 0.5959881018141326, |
| "learning_rate": 4.996699352066659e-06, |
| "loss": 0.22707018852233887, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.6648028246958526, |
| "learning_rate": 4.995108821473014e-06, |
| "loss": 0.22777373790740968, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 0.6395047869916185, |
| "learning_rate": 4.993206865439084e-06, |
| "loss": 0.22382116317749023, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 0.6449783716947614, |
| "learning_rate": 4.990993721356317e-06, |
| "loss": 0.22268824577331542, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.6709421623745665, |
| "learning_rate": 4.988469665456901e-06, |
| "loss": 0.22317943572998047, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 0.5466948727484514, |
| "learning_rate": 4.985635012779288e-06, |
| "loss": 0.23101482391357422, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.48989327197226856, |
| "learning_rate": 4.98249011712887e-06, |
| "loss": 0.2234072208404541, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 0.5417400145938138, |
| "learning_rate": 4.979035371033824e-06, |
| "loss": 0.22212049961090088, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.5576422767413268, |
| "learning_rate": 4.975271205696115e-06, |
| "loss": 0.22228083610534669, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.6175584799790863, |
| "learning_rate": 4.971198090937671e-06, |
| "loss": 0.21532373428344725, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.6360712146764758, |
| "learning_rate": 4.966816535141756e-06, |
| "loss": 0.21311187744140625, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 0.5401953881204377, |
| "learning_rate": 4.9621270851895035e-06, |
| "loss": 0.22237277030944824, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.5988873649948656, |
| "learning_rate": 4.957130326391662e-06, |
| "loss": 0.22391064167022706, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 0.5132670412160366, |
| "learning_rate": 4.951826882415544e-06, |
| "loss": 0.2206397533416748, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5935020011592513, |
| "learning_rate": 4.946217415207177e-06, |
| "loss": 0.2148068904876709, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 0.5324390349507315, |
| "learning_rate": 4.940302624908689e-06, |
| "loss": 0.21909193992614745, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.6082929578051663, |
| "learning_rate": 4.934083249770912e-06, |
| "loss": 0.2133782386779785, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 0.6272295187969801, |
| "learning_rate": 4.927560066061251e-06, |
| "loss": 0.2180723190307617, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.5538741111929965, |
| "learning_rate": 4.920733887966783e-06, |
| "loss": 0.22759020328521729, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.5703593568416581, |
| "learning_rate": 4.913605567492636e-06, |
| "loss": 0.21657073497772217, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.5873043850881617, |
| "learning_rate": 4.906175994355656e-06, |
| "loss": 0.21824207305908203, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.7955355117519857, |
| "learning_rate": 4.898446095873345e-06, |
| "loss": 0.2209712028503418, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.5347403539894492, |
| "learning_rate": 4.890416836848128e-06, |
| "loss": 0.2184591293334961, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.5464598874722423, |
| "learning_rate": 4.882089219446925e-06, |
| "loss": 0.2130581855773926, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.5871382794412585, |
| "learning_rate": 4.873464283076074e-06, |
| "loss": 0.21770844459533692, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 0.5516595084585112, |
| "learning_rate": 4.864543104251587e-06, |
| "loss": 0.21629047393798828, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.5949100146178041, |
| "learning_rate": 4.855326796464798e-06, |
| "loss": 0.22033746242523194, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 0.5798876425998256, |
| "learning_rate": 4.8458165100433725e-06, |
| "loss": 0.21477458477020264, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.563545251458103, |
| "learning_rate": 4.836013432007738e-06, |
| "loss": 0.21490144729614258, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.5256728978801903, |
| "learning_rate": 4.825918785922921e-06, |
| "loss": 0.21858677864074708, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.5062609806869888, |
| "learning_rate": 4.8155338317458315e-06, |
| "loss": 0.21592459678649903, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.555318042395406, |
| "learning_rate": 4.804859865668002e-06, |
| "loss": 0.21323423385620116, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.6382467151310525, |
| "learning_rate": 4.793898219953804e-06, |
| "loss": 0.2151188373565674, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 0.5426280956852546, |
| "learning_rate": 4.782650262774164e-06, |
| "loss": 0.2155141830444336, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.5602841392771764, |
| "learning_rate": 4.7711173980357886e-06, |
| "loss": 0.21242978572845458, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 0.5837827171492797, |
| "learning_rate": 4.759301065205947e-06, |
| "loss": 0.2129213333129883, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.5678516858648391, |
| "learning_rate": 4.7472027391328e-06, |
| "loss": 0.21422340869903564, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.6213695156464779, |
| "learning_rate": 4.734823929861317e-06, |
| "loss": 0.2172607660293579, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.6084105321286742, |
| "learning_rate": 4.722166182444801e-06, |
| "loss": 0.21331138610839845, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5848312022835148, |
| "learning_rate": 4.709231076752045e-06, |
| "loss": 0.21255254745483398, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.5855428740644943, |
| "learning_rate": 4.696020227270142e-06, |
| "loss": 0.21734881401062012, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.5135013968609298, |
| "learning_rate": 4.6825352829029705e-06, |
| "loss": 0.21321442127227783, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.5938685951597557, |
| "learning_rate": 4.668777926765392e-06, |
| "loss": 0.21113758087158202, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 0.6490004462160337, |
| "learning_rate": 4.6547498759731725e-06, |
| "loss": 0.20692987442016603, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.5694207965471786, |
| "learning_rate": 4.6404528814286575e-06, |
| "loss": 0.20959222316741943, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 0.5648942925010132, |
| "learning_rate": 4.6258887276022425e-06, |
| "loss": 0.21758944988250734, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.6544068998265237, |
| "learning_rate": 4.611059232309639e-06, |
| "loss": 0.21146907806396484, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 0.6680185905090128, |
| "learning_rate": 4.595966246484986e-06, |
| "loss": 0.21348462104797364, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.4956164506371995, |
| "learning_rate": 4.580611653949829e-06, |
| "loss": 0.21317172050476074, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.6491508776235345, |
| "learning_rate": 4.564997371177992e-06, |
| "loss": 0.2108323574066162, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.6859739128419746, |
| "learning_rate": 4.54912534705637e-06, |
| "loss": 0.21068863868713378, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.5876140035889241, |
| "learning_rate": 4.532997562641683e-06, |
| "loss": 0.20738301277160645, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.5388630641864397, |
| "learning_rate": 4.516616030913214e-06, |
| "loss": 0.2113194465637207, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.527263546069221, |
| "learning_rate": 4.499982796521556e-06, |
| "loss": 0.20718231201171874, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.6778383199902553, |
| "learning_rate": 4.48309993553341e-06, |
| "loss": 0.20899975299835205, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 0.6041502046582736, |
| "learning_rate": 4.465969555172468e-06, |
| "loss": 0.20922982692718506, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.5872507915529911, |
| "learning_rate": 4.448593793556391e-06, |
| "loss": 0.21518073081970215, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 0.5414243473578003, |
| "learning_rate": 4.430974819429954e-06, |
| "loss": 0.20869126319885253, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.4624854855413159, |
| "learning_rate": 4.413114831894344e-06, |
| "loss": 0.20277881622314453, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.5247854876993729, |
| "learning_rate": 4.3950160601326865e-06, |
| "loss": 0.20181698799133302, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.5808078368512252, |
| "learning_rate": 4.376680763131811e-06, |
| "loss": 0.20898809432983398, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 0.5805212694083882, |
| "learning_rate": 4.358111229400296e-06, |
| "loss": 0.21212198734283447, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.5721764020420262, |
| "learning_rate": 4.33930977668283e-06, |
| "loss": 0.21448736190795897, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 0.5598008397585128, |
| "learning_rate": 4.320278751670922e-06, |
| "loss": 0.20758256912231446, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5522723710696453, |
| "learning_rate": 4.301020529710009e-06, |
| "loss": 0.20947573184967042, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.5556932215476815, |
| "learning_rate": 4.281537514502962e-06, |
| "loss": 0.2131945848464966, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.5256326530235461, |
| "learning_rate": 4.261832137810093e-06, |
| "loss": 0.20962438583374024, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 0.5141067804644184, |
| "learning_rate": 4.241906859145611e-06, |
| "loss": 0.21035046577453614, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.509376911595103, |
| "learning_rate": 4.221764165470661e-06, |
| "loss": 0.20757730007171632, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.5632632187185198, |
| "learning_rate": 4.201406570882898e-06, |
| "loss": 0.20691304206848143, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.5786515758035645, |
| "learning_rate": 4.180836616302704e-06, |
| "loss": 0.20582923889160157, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 0.591108109764431, |
| "learning_rate": 4.160056869156041e-06, |
| "loss": 0.2102893590927124, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.5367428274966828, |
| "learning_rate": 4.139069923053995e-06, |
| "loss": 0.20834057331085204, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 0.49962583382458753, |
| "learning_rate": 4.117878397469062e-06, |
| "loss": 0.2114588975906372, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.5580828852277292, |
| "learning_rate": 4.096484937408195e-06, |
| "loss": 0.2029412269592285, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 0.5671943339841842, |
| "learning_rate": 4.074892213082676e-06, |
| "loss": 0.20308828353881836, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.5583868175031171, |
| "learning_rate": 4.0531029195748265e-06, |
| "loss": 0.2104210376739502, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 0.5452939479895703, |
| "learning_rate": 4.03111977650163e-06, |
| "loss": 0.20968456268310548, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.6195183591357212, |
| "learning_rate": 4.008945527675281e-06, |
| "loss": 0.20957679748535157, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.6171258889408775, |
| "learning_rate": 3.986582940760717e-06, |
| "loss": 0.1984492540359497, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.6164010362674036, |
| "learning_rate": 3.9640348069301785e-06, |
| "loss": 0.20632429122924806, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 0.5558070727772452, |
| "learning_rate": 3.941303940514826e-06, |
| "loss": 0.20776019096374512, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.5943916453083408, |
| "learning_rate": 3.918393178653472e-06, |
| "loss": 0.20839078426361085, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.5018385923371635, |
| "learning_rate": 3.895305380938468e-06, |
| "loss": 0.2044908285140991, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.48660847876218716, |
| "learning_rate": 3.872043429058783e-06, |
| "loss": 0.20328717231750487, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 0.5586353975354608, |
| "learning_rate": 3.84861022644033e-06, |
| "loss": 0.20572426319122314, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 0.5709168788921625, |
| "learning_rate": 3.825008697883574e-06, |
| "loss": 0.21369614601135253, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.5589246090839964, |
| "learning_rate": 3.8012417891984776e-06, |
| "loss": 0.2072831630706787, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 0.5711782327133378, |
| "learning_rate": 3.777312466836819e-06, |
| "loss": 0.20526669025421143, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5656399244912672, |
| "learning_rate": 3.7532237175219378e-06, |
| "loss": 0.20442888736724854, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 0.5520901024337347, |
| "learning_rate": 3.728978547875948e-06, |
| "loss": 0.2092284679412842, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 0.553756025103199, |
| "learning_rate": 3.7045799840444712e-06, |
| "loss": 0.20277605056762696, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.5430187148138641, |
| "learning_rate": 3.6800310713189258e-06, |
| "loss": 0.20491743087768555, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 0.7620941398223869, |
| "learning_rate": 3.6553348737564328e-06, |
| "loss": 0.2055516481399536, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.5265612798122297, |
| "learning_rate": 3.6304944737973794e-06, |
| "loss": 0.21130599975585937, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 0.5353794185025008, |
| "learning_rate": 3.6055129718806836e-06, |
| "loss": 0.20504627227783204, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.5979654766960453, |
| "learning_rate": 3.5803934860568134e-06, |
| "loss": 0.2000981330871582, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.5915664314356317, |
| "learning_rate": 3.5551391515986163e-06, |
| "loss": 0.20581989288330077, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.562992516341074, |
| "learning_rate": 3.529753120609982e-06, |
| "loss": 0.20160207748413086, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.7046032478558245, |
| "learning_rate": 3.5042385616324243e-06, |
| "loss": 0.2043483018875122, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.5184492477363449, |
| "learning_rate": 3.4785986592495934e-06, |
| "loss": 0.20285494327545167, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.5806380074338086, |
| "learning_rate": 3.452836613689803e-06, |
| "loss": 0.2009434223175049, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 0.5204618736945451, |
| "learning_rate": 3.426955640426584e-06, |
| "loss": 0.20416510105133057, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 0.5765864502605341, |
| "learning_rate": 3.4009589697773605e-06, |
| "loss": 0.20326631069183348, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.5779970501460372, |
| "learning_rate": 3.3748498465002475e-06, |
| "loss": 0.20073289871215821, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.6393995362823897, |
| "learning_rate": 3.3486315293890693e-06, |
| "loss": 0.20874643325805664, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 0.5108762095593324, |
| "learning_rate": 3.3223072908666053e-06, |
| "loss": 0.19835340976715088, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.6435280387445825, |
| "learning_rate": 3.295880416576153e-06, |
| "loss": 0.20992684364318848, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.5838753206875198, |
| "learning_rate": 3.269354204971427e-06, |
| "loss": 0.20265870094299315, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.6745984788898958, |
| "learning_rate": 3.242731966904865e-06, |
| "loss": 0.20037527084350587, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 0.5358161645108944, |
| "learning_rate": 3.2160170252143913e-06, |
| "loss": 0.20123369693756105, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.5112361606823973, |
| "learning_rate": 3.1892127143086716e-06, |
| "loss": 0.20752406120300293, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 0.6333759965752455, |
| "learning_rate": 3.1623223797509347e-06, |
| "loss": 0.19706425666809083, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.6206117536462172, |
| "learning_rate": 3.135349377841396e-06, |
| "loss": 0.20125732421875, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5541712474486513, |
| "learning_rate": 3.1082970751983497e-06, |
| "loss": 0.20749812126159667, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.5835934183180771, |
| "learning_rate": 3.0811688483379546e-06, |
| "loss": 0.20475554466247559, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.5792514427898341, |
| "learning_rate": 3.0539680832528074e-06, |
| "loss": 0.20504088401794435, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 0.6358843481166787, |
| "learning_rate": 3.026698174989316e-06, |
| "loss": 0.20325000286102296, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 0.5059500889981753, |
| "learning_rate": 2.999362527223952e-06, |
| "loss": 0.2031909465789795, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.5388306821924389, |
| "learning_rate": 2.9719645518384194e-06, |
| "loss": 0.20504312515258788, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 0.5939936480408617, |
| "learning_rate": 2.944507668493807e-06, |
| "loss": 0.2084404706954956, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.5687025114161597, |
| "learning_rate": 2.9169953042037623e-06, |
| "loss": 0.20367155075073243, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.5703613797457775, |
| "learning_rate": 2.889430892906754e-06, |
| "loss": 0.19950419664382935, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.50147360976836, |
| "learning_rate": 2.861817875037462e-06, |
| "loss": 0.19737675189971923, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.5962810686359508, |
| "learning_rate": 2.8341596970973683e-06, |
| "loss": 0.206866455078125, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 0.564566320219468, |
| "learning_rate": 2.80645981122458e-06, |
| "loss": 0.2020205020904541, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.5246372929237232, |
| "learning_rate": 2.7787216747629508e-06, |
| "loss": 0.20939722061157226, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.5415181940486332, |
| "learning_rate": 2.7509487498305615e-06, |
| "loss": 0.19629446268081666, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 0.5627430222118958, |
| "learning_rate": 2.7231445028875924e-06, |
| "loss": 0.20240178108215331, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.5578941065241574, |
| "learning_rate": 2.6953124043036604e-06, |
| "loss": 0.2012562036514282, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.5487117054063715, |
| "learning_rate": 2.667455927924667e-06, |
| "loss": 0.20127537250518798, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.571360126804376, |
| "learning_rate": 2.6395785506392164e-06, |
| "loss": 0.1964709758758545, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 0.6088527341362128, |
| "learning_rate": 2.6116837519446407e-06, |
| "loss": 0.1997244954109192, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.5974545138027041, |
| "learning_rate": 2.5837750135127192e-06, |
| "loss": 0.19768773317337035, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.5496714163583045, |
| "learning_rate": 2.555855818755108e-06, |
| "loss": 0.20294923782348634, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 0.7083231030411815, |
| "learning_rate": 2.5279296523885636e-06, |
| "loss": 0.20083847045898437, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.5938882026412365, |
| "learning_rate": 2.5e-06, |
| "loss": 0.20156488418579102, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.5963429209905415, |
| "learning_rate": 2.472070347611437e-06, |
| "loss": 0.19514652490615844, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 0.6395947365412442, |
| "learning_rate": 2.444144181244893e-06, |
| "loss": 0.20121583938598633, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.5998001248295249, |
| "learning_rate": 2.416224986487282e-06, |
| "loss": 0.19726226329803467, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 0.5593754591530539, |
| "learning_rate": 2.3883162480553605e-06, |
| "loss": 0.19497768878936766, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 0.5860785466160793, |
| "learning_rate": 2.3604214493607844e-06, |
| "loss": 0.1996150493621826, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 0.5963601131944923, |
| "learning_rate": 2.332544072075333e-06, |
| "loss": 0.20348951816558838, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.5745583695919886, |
| "learning_rate": 2.30468759569634e-06, |
| "loss": 0.2016512393951416, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.5720738010975994, |
| "learning_rate": 2.276855497112408e-06, |
| "loss": 0.1983588457107544, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.594436652050367, |
| "learning_rate": 2.2490512501694394e-06, |
| "loss": 0.19393882751464844, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 0.5547702774883363, |
| "learning_rate": 2.2212783252370496e-06, |
| "loss": 0.19950855970382692, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.49741997333090354, |
| "learning_rate": 2.1935401887754213e-06, |
| "loss": 0.20486598014831542, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.6191188389453962, |
| "learning_rate": 2.165840302902632e-06, |
| "loss": 0.1979525566101074, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.613998551941137, |
| "learning_rate": 2.1381821249625383e-06, |
| "loss": 0.2030627727508545, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 0.6115410126221079, |
| "learning_rate": 2.1105691070932465e-06, |
| "loss": 0.1951197624206543, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.5666967026000811, |
| "learning_rate": 2.083004695796238e-06, |
| "loss": 0.1926891803741455, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 0.5564168831256036, |
| "learning_rate": 2.055492331506194e-06, |
| "loss": 0.20087857246398927, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 0.647003695530594, |
| "learning_rate": 2.0280354481615814e-06, |
| "loss": 0.1991624116897583, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.6020348842840653, |
| "learning_rate": 2.000637472776049e-06, |
| "loss": 0.20029563903808595, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.593460784828495, |
| "learning_rate": 1.973301825010685e-06, |
| "loss": 0.19462828636169432, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 0.6796900420369784, |
| "learning_rate": 1.9460319167471934e-06, |
| "loss": 0.20009157657623292, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 0.5803908647953272, |
| "learning_rate": 1.9188311516620466e-06, |
| "loss": 0.19473812580108643, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 0.5919196787967083, |
| "learning_rate": 1.891702924801651e-06, |
| "loss": 0.20190510749816895, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.6056764566097385, |
| "learning_rate": 1.864650622158604e-06, |
| "loss": 0.2063821792602539, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.5106064574990916, |
| "learning_rate": 1.8376776202490666e-06, |
| "loss": 0.20139360427856445, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.5816570517079882, |
| "learning_rate": 1.8107872856913293e-06, |
| "loss": 0.19568054676055907, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.6100308085295513, |
| "learning_rate": 1.7839829747856096e-06, |
| "loss": 0.19661173820495606, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.6256775545767371, |
| "learning_rate": 1.7572680330951359e-06, |
| "loss": 0.19576869010925294, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.5979254874380191, |
| "learning_rate": 1.7306457950285747e-06, |
| "loss": 0.19802470207214357, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 0.6445065470953916, |
| "learning_rate": 1.704119583423848e-06, |
| "loss": 0.19182772636413575, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.5238518416749739, |
| "learning_rate": 1.677692709133396e-06, |
| "loss": 0.19971816539764403, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.5902086462380663, |
| "learning_rate": 1.6513684706109311e-06, |
| "loss": 0.20058016777038573, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.5301315426540266, |
| "learning_rate": 1.6251501534997529e-06, |
| "loss": 0.19816763401031495, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.5702221922649561, |
| "learning_rate": 1.5990410302226405e-06, |
| "loss": 0.19167234897613525, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.5682142108318351, |
| "learning_rate": 1.5730443595734162e-06, |
| "loss": 0.19806729555130004, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.6268750721579749, |
| "learning_rate": 1.5471633863101982e-06, |
| "loss": 0.1990320086479187, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.6501758398050216, |
| "learning_rate": 1.521401340750407e-06, |
| "loss": 0.20063567161560059, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 0.5367071332530153, |
| "learning_rate": 1.495761438367577e-06, |
| "loss": 0.2000502109527588, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.6644202151690211, |
| "learning_rate": 1.4702468793900187e-06, |
| "loss": 0.19811663627624512, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.6020454013039992, |
| "learning_rate": 1.444860848401384e-06, |
| "loss": 0.19873985052108764, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.5672766014696592, |
| "learning_rate": 1.4196065139431866e-06, |
| "loss": 0.19663108587265016, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 0.6668756559032718, |
| "learning_rate": 1.3944870281193178e-06, |
| "loss": 0.19677751064300536, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 0.6146850263092741, |
| "learning_rate": 1.3695055262026208e-06, |
| "loss": 0.20252432823181152, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.6023134400750195, |
| "learning_rate": 1.3446651262435679e-06, |
| "loss": 0.19564807415008545, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 0.5973758444267007, |
| "learning_rate": 1.3199689286810746e-06, |
| "loss": 0.19767165184020996, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.604085220565822, |
| "learning_rate": 1.2954200159555294e-06, |
| "loss": 0.19245314598083496, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.5971658440027723, |
| "learning_rate": 1.2710214521240527e-06, |
| "loss": 0.19593756198883056, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.6712656742168871, |
| "learning_rate": 1.246776282478063e-06, |
| "loss": 0.19848381280899047, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.5303502593262494, |
| "learning_rate": 1.222687533163181e-06, |
| "loss": 0.19739968776702882, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.6329890536946617, |
| "learning_rate": 1.1987582108015228e-06, |
| "loss": 0.19885218143463135, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 0.6175733280769058, |
| "learning_rate": 1.1749913021164255e-06, |
| "loss": 0.20003676414489746, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.6297338992517326, |
| "learning_rate": 1.1513897735596702e-06, |
| "loss": 0.19420522451400757, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 0.5570261846558745, |
| "learning_rate": 1.127956570941218e-06, |
| "loss": 0.19144604206085206, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.7464999016757174, |
| "learning_rate": 1.104694619061533e-06, |
| "loss": 0.20028018951416016, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.5813509472785208, |
| "learning_rate": 1.0816068213465295e-06, |
| "loss": 0.2022254228591919, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 0.5788680063085246, |
| "learning_rate": 1.0586960594851762e-06, |
| "loss": 0.19734264612197877, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.6879904092074834, |
| "learning_rate": 1.0359651930698217e-06, |
| "loss": 0.19566457271575927, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.545714278159425, |
| "learning_rate": 1.0134170592392837e-06, |
| "loss": 0.19808268547058105, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.6957466724150051, |
| "learning_rate": 9.910544723247204e-07, |
| "loss": 0.19703471660614014, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.5722555379171206, |
| "learning_rate": 9.688802234983706e-07, |
| "loss": 0.19638856649398803, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 0.6657445816108672, |
| "learning_rate": 9.468970804251742e-07, |
| "loss": 0.1994560480117798, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.6118638240003964, |
| "learning_rate": 9.251077869173244e-07, |
| "loss": 0.19247424602508545, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 0.618262759129052, |
| "learning_rate": 9.035150625918054e-07, |
| "loss": 0.19384448528289794, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.5841167908088344, |
| "learning_rate": 8.821216025309395e-07, |
| "loss": 0.19670048952102662, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 0.6330443090953268, |
| "learning_rate": 8.609300769460055e-07, |
| "loss": 0.191538667678833, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.6922248169402944, |
| "learning_rate": 8.399431308439592e-07, |
| "loss": 0.19869886636734008, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 0.5821907331028691, |
| "learning_rate": 8.191633836972962e-07, |
| "loss": 0.19837281703948975, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.5484553164447705, |
| "learning_rate": 7.985934291171024e-07, |
| "loss": 0.19366707801818847, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.6131324978552078, |
| "learning_rate": 7.7823583452934e-07, |
| "loss": 0.19763607978820802, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.5665386766642198, |
| "learning_rate": 7.58093140854389e-07, |
| "loss": 0.19747262001037597, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.6702088035794936, |
| "learning_rate": 7.381678621899077e-07, |
| "loss": 0.19848825931549072, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 0.6808200224599221, |
| "learning_rate": 7.184624854970379e-07, |
| "loss": 0.19454023838043213, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.5446840545845119, |
| "learning_rate": 6.989794702899932e-07, |
| "loss": 0.1943270444869995, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.6415010178339859, |
| "learning_rate": 6.797212483290777e-07, |
| "loss": 0.19584910869598388, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 0.603526871568268, |
| "learning_rate": 6.60690223317171e-07, |
| "loss": 0.19342836141586303, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 0.5817111419419255, |
| "learning_rate": 6.418887705997046e-07, |
| "loss": 0.19574793577194213, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.7792382444355755, |
| "learning_rate": 6.23319236868189e-07, |
| "loss": 0.1987607717514038, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.6291788716222239, |
| "learning_rate": 6.049839398673141e-07, |
| "loss": 0.20009655952453614, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.674170182636883, |
| "learning_rate": 5.868851681056567e-07, |
| "loss": 0.2016763210296631, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.5738700746068163, |
| "learning_rate": 5.690251805700467e-07, |
| "loss": 0.19858623743057252, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.5748267344102337, |
| "learning_rate": 5.514062064436096e-07, |
| "loss": 0.19959205389022827, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.6464282974919533, |
| "learning_rate": 5.34030444827533e-07, |
| "loss": 0.19621236324310304, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 0.6390320405050175, |
| "learning_rate": 5.169000644665895e-07, |
| "loss": 0.19293551445007323, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.5856228193289068, |
| "learning_rate": 5.000172034784442e-07, |
| "loss": 0.1952167272567749, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 0.6152721851543074, |
| "learning_rate": 4.833839690867853e-07, |
| "loss": 0.19755464792251587, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.6792777707129383, |
| "learning_rate": 4.6700243735831705e-07, |
| "loss": 0.1906466007232666, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.5650779115466599, |
| "learning_rate": 4.508746529436311e-07, |
| "loss": 0.1896218776702881, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.6068556104605155, |
| "learning_rate": 4.350026288220083e-07, |
| "loss": 0.1972370147705078, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.6087844635927864, |
| "learning_rate": 4.1938834605017133e-07, |
| "loss": 0.19401493072509765, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.594443863161453, |
| "learning_rate": 4.0403375351501515e-07, |
| "loss": 0.19397275447845458, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 0.5777613928889838, |
| "learning_rate": 3.88940767690362e-07, |
| "loss": 0.19363962411880492, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.6122408540819826, |
| "learning_rate": 3.7411127239775774e-07, |
| "loss": 0.19224631786346436, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 0.5922115547592817, |
| "learning_rate": 3.595471185713431e-07, |
| "loss": 0.19027912616729736, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.6012010067551694, |
| "learning_rate": 3.4525012402682826e-07, |
| "loss": 0.1921192765235901, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.6089446682050474, |
| "learning_rate": 3.3122207323460804e-07, |
| "loss": 0.19460537433624267, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.6314431181993275, |
| "learning_rate": 3.1746471709702963e-07, |
| "loss": 0.19075865745544435, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.6136529603147252, |
| "learning_rate": 3.039797727298585e-07, |
| "loss": 0.1973212718963623, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.6278068265217286, |
| "learning_rate": 2.9076892324795546e-07, |
| "loss": 0.19564627408981322, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.6308491327804164, |
| "learning_rate": 2.778338175551995e-07, |
| "loss": 0.19089040756225586, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.6806226474068601, |
| "learning_rate": 2.6517607013868326e-07, |
| "loss": 0.19906394481658934, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.6497216896329614, |
| "learning_rate": 2.527972608672002e-07, |
| "loss": 0.19420729875564574, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 0.5988037888796804, |
| "learning_rate": 2.40698934794053e-07, |
| "loss": 0.1949334740638733, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 0.5825410688543936, |
| "learning_rate": 2.2888260196421237e-07, |
| "loss": 0.19373006820678712, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.5659393573725252, |
| "learning_rate": 2.1734973722583735e-07, |
| "loss": 0.19743962287902833, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.6810045862821603, |
| "learning_rate": 2.0610178004619564e-07, |
| "loss": 0.18792747259140014, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.5624807528399969, |
| "learning_rate": 1.9514013433199834e-07, |
| "loss": 0.20065484046936036, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.5300049949985157, |
| "learning_rate": 1.8446616825416958e-07, |
| "loss": 0.19963890314102173, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.6417643354263414, |
| "learning_rate": 1.7408121407708007e-07, |
| "loss": 0.19946534633636476, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.6263783317633913, |
| "learning_rate": 1.6398656799226253e-07, |
| "loss": 0.1873138427734375, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.6642472444356609, |
| "learning_rate": 1.5418348995662773e-07, |
| "loss": 0.1936098575592041, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.6361104958877116, |
| "learning_rate": 1.4467320353520275e-07, |
| "loss": 0.192909574508667, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 0.606401356191172, |
| "learning_rate": 1.3545689574841341e-07, |
| "loss": 0.1932598114013672, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.6138805257535019, |
| "learning_rate": 1.26535716923927e-07, |
| "loss": 0.19897468090057374, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.6113791423952993, |
| "learning_rate": 1.1791078055307493e-07, |
| "loss": 0.19516528844833375, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.5897316619244026, |
| "learning_rate": 1.0958316315187289e-07, |
| "loss": 0.1947079300880432, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.6570448249633108, |
| "learning_rate": 1.0155390412665528e-07, |
| "loss": 0.19286593198776245, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.6143543897264965, |
| "learning_rate": 9.38240056443443e-08, |
| "loss": 0.18985612392425538, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.6208574768565508, |
| "learning_rate": 8.639443250736402e-08, |
| "loss": 0.1930636167526245, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.6380337536968056, |
| "learning_rate": 7.926611203321777e-08, |
| "loss": 0.1940324306488037, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.6333119199427104, |
| "learning_rate": 7.243993393874882e-08, |
| "loss": 0.195207679271698, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 0.5601684784399228, |
| "learning_rate": 6.591675022908805e-08, |
| "loss": 0.1926344394683838, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 0.7001254632467586, |
| "learning_rate": 5.969737509131241e-08, |
| "loss": 0.189910888671875, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.5707165379372983, |
| "learning_rate": 5.3782584792823334e-08, |
| "loss": 0.1941395878791809, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.637882100753534, |
| "learning_rate": 4.817311758445686e-08, |
| "loss": 0.19586544036865233, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 0.58305847153215, |
| "learning_rate": 4.286967360833866e-08, |
| "loss": 0.19621498584747316, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 0.6444124781946634, |
| "learning_rate": 3.787291481049754e-08, |
| "loss": 0.19597216844558715, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 0.68778482150424, |
| "learning_rate": 3.3183464858244364e-08, |
| "loss": 0.20229551792144776, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.589065287919965, |
| "learning_rate": 2.8801909062328992e-08, |
| "loss": 0.1879359722137451, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.7200708770444023, |
| "learning_rate": 2.4728794303886248e-08, |
| "loss": 0.18806444406509398, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 0.6369212243333968, |
| "learning_rate": 2.0964628966175794e-08, |
| "loss": 0.19293060302734374, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.6150129328436796, |
| "learning_rate": 1.750988287113009e-08, |
| "loss": 0.19189660549163817, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 0.5966036549992078, |
| "learning_rate": 1.4364987220713278e-08, |
| "loss": 0.1992994427680969, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.6785615385564472, |
| "learning_rate": 1.1530334543099763e-08, |
| "loss": 0.19624128341674804, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.626236262460755, |
| "learning_rate": 9.006278643683697e-09, |
| "loss": 0.19942662715911866, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.71228117768398, |
| "learning_rate": 6.793134560916514e-09, |
| "loss": 0.2007957935333252, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 0.5740813965788273, |
| "learning_rate": 4.891178526986451e-09, |
| "loss": 0.19730459451675414, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 0.6522249776214731, |
| "learning_rate": 3.3006479333413943e-09, |
| "loss": 0.1995969295501709, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 0.6484316026206892, |
| "learning_rate": 2.021741301058422e-09, |
| "loss": 0.19556543827056885, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.6355663767406068, |
| "learning_rate": 1.0546182560652872e-09, |
| "loss": 0.19732578992843627, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 0.6169267731488666, |
| "learning_rate": 3.9939950921774607e-10, |
| "loss": 0.1917206883430481, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 0.5994063111681457, |
| "learning_rate": 5.616684123160854e-11, |
| "loss": 0.1916499137878418, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 3125, |
| "total_flos": 1201860236279808.0, |
| "train_loss": 0.2128027264213562, |
| "train_runtime": 15463.9635, |
| "train_samples_per_second": 12.933, |
| "train_steps_per_second": 0.202 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1201860236279808.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|