| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9990766389658357, |
| "eval_steps": 500, |
| "global_step": 541, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0018467220683287165, |
| "grad_norm": 0.18254348635673523, |
| "learning_rate": 3.636363636363636e-06, |
| "loss": 0.8244, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.009233610341643583, |
| "grad_norm": 0.1816311925649643, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.7911, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.018467220683287166, |
| "grad_norm": 0.16621439158916473, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.7968, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.027700831024930747, |
| "grad_norm": 0.1893552839756012, |
| "learning_rate": 5.4545454545454546e-05, |
| "loss": 0.7942, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03693444136657433, |
| "grad_norm": 0.12671160697937012, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 0.7531, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.046168051708217916, |
| "grad_norm": 0.12510479986667633, |
| "learning_rate": 9.090909090909092e-05, |
| "loss": 0.7688, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.055401662049861494, |
| "grad_norm": 0.1175699383020401, |
| "learning_rate": 0.00010909090909090909, |
| "loss": 0.7694, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06463527239150507, |
| "grad_norm": 0.11805781722068787, |
| "learning_rate": 0.00012727272727272728, |
| "loss": 0.7348, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07386888273314866, |
| "grad_norm": 0.11772854626178741, |
| "learning_rate": 0.00014545454545454546, |
| "loss": 0.7407, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08310249307479224, |
| "grad_norm": 0.2634792625904083, |
| "learning_rate": 0.00016363636363636366, |
| "loss": 0.7324, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09233610341643583, |
| "grad_norm": 0.1200411394238472, |
| "learning_rate": 0.00018181818181818183, |
| "loss": 0.7535, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10156971375807941, |
| "grad_norm": 0.12060506641864777, |
| "learning_rate": 0.0002, |
| "loss": 0.7487, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11080332409972299, |
| "grad_norm": 0.12120962888002396, |
| "learning_rate": 0.00019994777247895855, |
| "loss": 0.745, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12003693444136658, |
| "grad_norm": 0.14212490618228912, |
| "learning_rate": 0.00019979114447011323, |
| "loss": 0.7294, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12927054478301014, |
| "grad_norm": 0.1235802099108696, |
| "learning_rate": 0.00019953027957931658, |
| "loss": 0.7391, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13850415512465375, |
| "grad_norm": 0.11884737759828568, |
| "learning_rate": 0.00019916545029310012, |
| "loss": 0.7324, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.14773776546629733, |
| "grad_norm": 0.1151009351015091, |
| "learning_rate": 0.00019869703769404828, |
| "loss": 0.7266, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1569713758079409, |
| "grad_norm": 0.11894387751817703, |
| "learning_rate": 0.00019812553106273847, |
| "loss": 0.7467, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16620498614958448, |
| "grad_norm": 0.1144394502043724, |
| "learning_rate": 0.00019745152736666302, |
| "loss": 0.7376, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 0.11750061810016632, |
| "learning_rate": 0.0001966757306366662, |
| "loss": 0.7383, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.18467220683287167, |
| "grad_norm": 0.13155049085617065, |
| "learning_rate": 0.0001957989512315489, |
| "loss": 0.7451, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19390581717451524, |
| "grad_norm": 0.11150489002466202, |
| "learning_rate": 0.00019482210499160765, |
| "loss": 0.7412, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20313942751615882, |
| "grad_norm": 0.10819390416145325, |
| "learning_rate": 0.0001937462122819935, |
| "loss": 0.7422, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2123730378578024, |
| "grad_norm": 0.11923832446336746, |
| "learning_rate": 0.00019257239692688907, |
| "loss": 0.7377, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22160664819944598, |
| "grad_norm": 0.11017678678035736, |
| "learning_rate": 0.00019130188503561741, |
| "loss": 0.7351, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23084025854108955, |
| "grad_norm": 0.11701685935258865, |
| "learning_rate": 0.00018993600372190932, |
| "loss": 0.7282, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.24007386888273316, |
| "grad_norm": 0.11328760534524918, |
| "learning_rate": 0.00018847617971766577, |
| "loss": 0.7269, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24930747922437674, |
| "grad_norm": 0.11526869982481003, |
| "learning_rate": 0.00018692393788266479, |
| "loss": 0.7083, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2585410895660203, |
| "grad_norm": 0.1022554561495781, |
| "learning_rate": 0.0001852808996117683, |
| "loss": 0.7118, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2677746999076639, |
| "grad_norm": 0.12440157681703568, |
| "learning_rate": 0.00018354878114129367, |
| "loss": 0.7294, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2770083102493075, |
| "grad_norm": 0.11588307470083237, |
| "learning_rate": 0.00018172939175631808, |
| "loss": 0.743, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28624192059095105, |
| "grad_norm": 0.12347640097141266, |
| "learning_rate": 0.0001798246319007893, |
| "loss": 0.7449, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.29547553093259465, |
| "grad_norm": 0.12588274478912354, |
| "learning_rate": 0.00017783649119241602, |
| "loss": 0.761, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3047091412742382, |
| "grad_norm": 0.11528731882572174, |
| "learning_rate": 0.0001757670463444118, |
| "loss": 0.7353, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3139427516158818, |
| "grad_norm": 0.11055415123701096, |
| "learning_rate": 0.00017361845899626355, |
| "loss": 0.7331, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3231763619575254, |
| "grad_norm": 0.10451104491949081, |
| "learning_rate": 0.00017139297345578994, |
| "loss": 0.7594, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.33240997229916897, |
| "grad_norm": 0.11109859496355057, |
| "learning_rate": 0.0001690929143548488, |
| "loss": 0.7291, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.34164358264081257, |
| "grad_norm": 0.11210189759731293, |
| "learning_rate": 0.00016672068422114196, |
| "loss": 0.7372, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 0.11521945893764496, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.7404, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3601108033240997, |
| "grad_norm": 0.11584653705358505, |
| "learning_rate": 0.00016176969530934572, |
| "loss": 0.7465, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.36934441366574333, |
| "grad_norm": 0.11106808483600616, |
| "learning_rate": 0.0001591961080888076, |
| "loss": 0.7235, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3785780240073869, |
| "grad_norm": 0.09438136965036392, |
| "learning_rate": 0.00015656068754865387, |
| "loss": 0.7104, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3878116343490305, |
| "grad_norm": 0.10474290698766708, |
| "learning_rate": 0.0001538661865185188, |
| "loss": 0.7567, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.39704524469067404, |
| "grad_norm": 0.11921609193086624, |
| "learning_rate": 0.00015111541954058734, |
| "loss": 0.739, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.40627885503231764, |
| "grad_norm": 0.11335526406764984, |
| "learning_rate": 0.00014831125992966385, |
| "loss": 0.734, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4155124653739612, |
| "grad_norm": 0.11289556324481964, |
| "learning_rate": 0.00014545663677185006, |
| "loss": 0.7448, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4247460757156048, |
| "grad_norm": 0.10999900847673416, |
| "learning_rate": 0.00014255453186496673, |
| "loss": 0.7316, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4339796860572484, |
| "grad_norm": 0.1110299751162529, |
| "learning_rate": 0.0001396079766039157, |
| "loss": 0.7247, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.44321329639889195, |
| "grad_norm": 0.11613520234823227, |
| "learning_rate": 0.0001366200488142348, |
| "loss": 0.7364, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.45244690674053556, |
| "grad_norm": 0.11091934144496918, |
| "learning_rate": 0.00013359386953715421, |
| "loss": 0.7497, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4616805170821791, |
| "grad_norm": 0.1117897480726242, |
| "learning_rate": 0.00013053259976951133, |
| "loss": 0.7318, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4709141274238227, |
| "grad_norm": 0.10972230136394501, |
| "learning_rate": 0.00012743943716193016, |
| "loss": 0.7362, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4801477377654663, |
| "grad_norm": 0.1100962832570076, |
| "learning_rate": 0.00012431761267871417, |
| "loss": 0.7246, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.48938134810710987, |
| "grad_norm": 0.11420110613107681, |
| "learning_rate": 0.0001211703872229411, |
| "loss": 0.6997, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4986149584487535, |
| "grad_norm": 0.11659246683120728, |
| "learning_rate": 0.00011800104823028515, |
| "loss": 0.7216, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5078485687903971, |
| "grad_norm": 0.10777433216571808, |
| "learning_rate": 0.0001148129062351249, |
| "loss": 0.7386, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5170821791320406, |
| "grad_norm": 0.1175241768360138, |
| "learning_rate": 0.00011160929141252303, |
| "loss": 0.7354, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.11116177588701248, |
| "learning_rate": 0.00010839355009969068, |
| "loss": 0.74, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5355493998153278, |
| "grad_norm": 0.11569847166538239, |
| "learning_rate": 0.00010516904130056946, |
| "loss": 0.7488, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5447830101569714, |
| "grad_norm": 0.12016792595386505, |
| "learning_rate": 0.00010193913317718244, |
| "loss": 0.7238, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.554016620498615, |
| "grad_norm": 0.11518382281064987, |
| "learning_rate": 9.870719953141917e-05, |
| "loss": 0.7223, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5632502308402585, |
| "grad_norm": 0.1142842248082161, |
| "learning_rate": 9.547661628092937e-05, |
| "loss": 0.7409, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5724838411819021, |
| "grad_norm": 0.10657477378845215, |
| "learning_rate": 9.225075793280692e-05, |
| "loss": 0.7228, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5817174515235457, |
| "grad_norm": 0.10703656822443008, |
| "learning_rate": 8.903299405874684e-05, |
| "loss": 0.7367, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5909510618651893, |
| "grad_norm": 0.10378415137529373, |
| "learning_rate": 8.582668577535797e-05, |
| "loss": 0.7368, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6001846722068329, |
| "grad_norm": 0.1063636764883995, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.7504, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6094182825484764, |
| "grad_norm": 0.1067524254322052, |
| "learning_rate": 7.94618171189618e-05, |
| "loss": 0.7114, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.61865189289012, |
| "grad_norm": 0.11942796409130096, |
| "learning_rate": 7.630990517218808e-05, |
| "loss": 0.7372, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6278855032317636, |
| "grad_norm": 0.11493529379367828, |
| "learning_rate": 7.318273872393625e-05, |
| "loss": 0.7194, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6371191135734072, |
| "grad_norm": 0.10850054770708084, |
| "learning_rate": 7.008358425723585e-05, |
| "loss": 0.7423, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6463527239150508, |
| "grad_norm": 0.11228082329034805, |
| "learning_rate": 6.701567899518924e-05, |
| "loss": 0.7064, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6555863342566943, |
| "grad_norm": 0.10916541516780853, |
| "learning_rate": 6.398222751952899e-05, |
| "loss": 0.7026, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6648199445983379, |
| "grad_norm": 0.11889058351516724, |
| "learning_rate": 6.098639842327052e-05, |
| "loss": 0.7273, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6740535549399815, |
| "grad_norm": 0.11622773110866547, |
| "learning_rate": 5.80313210009571e-05, |
| "loss": 0.7251, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6832871652816251, |
| "grad_norm": 0.11390994489192963, |
| "learning_rate": 5.5120081979953785e-05, |
| "loss": 0.7425, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6925207756232687, |
| "grad_norm": 0.10942788422107697, |
| "learning_rate": 5.22557222962051e-05, |
| "loss": 0.7278, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.11150877922773361, |
| "learning_rate": 4.9441233917824106e-05, |
| "loss": 0.7199, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7109879963065558, |
| "grad_norm": 0.11406592279672623, |
| "learning_rate": 4.66795567198309e-05, |
| "loss": 0.7635, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7202216066481995, |
| "grad_norm": 0.10552997142076492, |
| "learning_rate": 4.397357541330476e-05, |
| "loss": 0.74, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7294552169898431, |
| "grad_norm": 0.11307038366794586, |
| "learning_rate": 4.132611653215822e-05, |
| "loss": 0.7155, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7386888273314867, |
| "grad_norm": 0.11548582464456558, |
| "learning_rate": 3.873994548067972e-05, |
| "loss": 0.735, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7479224376731302, |
| "grad_norm": 0.11258837580680847, |
| "learning_rate": 3.621776364492939e-05, |
| "loss": 0.7438, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7571560480147738, |
| "grad_norm": 0.11234745383262634, |
| "learning_rate": 3.376220557100523e-05, |
| "loss": 0.7289, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7663896583564174, |
| "grad_norm": 0.11089134961366653, |
| "learning_rate": 3.137583621312665e-05, |
| "loss": 0.7185, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.775623268698061, |
| "grad_norm": 0.1163964495062828, |
| "learning_rate": 2.906114825441072e-05, |
| "loss": 0.7194, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7848568790397045, |
| "grad_norm": 0.12141181528568268, |
| "learning_rate": 2.6820559503138797e-05, |
| "loss": 0.7271, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7940904893813481, |
| "grad_norm": 0.11405418813228607, |
| "learning_rate": 2.465641036723393e-05, |
| "loss": 0.7176, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8033240997229917, |
| "grad_norm": 0.1123843789100647, |
| "learning_rate": 2.2570961409586754e-05, |
| "loss": 0.7218, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8125577100646353, |
| "grad_norm": 0.1090715080499649, |
| "learning_rate": 2.0566390986783646e-05, |
| "loss": 0.7348, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8217913204062789, |
| "grad_norm": 0.1076655238866806, |
| "learning_rate": 1.864479297370325e-05, |
| "loss": 0.7508, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8310249307479224, |
| "grad_norm": 0.1181124672293663, |
| "learning_rate": 1.6808174576358848e-05, |
| "loss": 0.7248, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.840258541089566, |
| "grad_norm": 0.11672370880842209, |
| "learning_rate": 1.505845423527027e-05, |
| "loss": 0.7357, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8494921514312096, |
| "grad_norm": 0.10791666060686111, |
| "learning_rate": 1.339745962155613e-05, |
| "loss": 0.7117, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8587257617728532, |
| "grad_norm": 0.10970946401357651, |
| "learning_rate": 1.18269257278392e-05, |
| "loss": 0.724, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8679593721144968, |
| "grad_norm": 0.10867480933666229, |
| "learning_rate": 1.0348493055959062e-05, |
| "loss": 0.7184, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 0.10204034298658371, |
| "learning_rate": 8.963705903385345e-06, |
| "loss": 0.7395, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8864265927977839, |
| "grad_norm": 0.10882284492254257, |
| "learning_rate": 7.674010750120964e-06, |
| "loss": 0.727, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8956602031394275, |
| "grad_norm": 0.11304344236850739, |
| "learning_rate": 6.480754747781037e-06, |
| "loss": 0.7039, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9048938134810711, |
| "grad_norm": 0.11570131033658981, |
| "learning_rate": 5.385184312424974e-06, |
| "loss": 0.7444, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9141274238227147, |
| "grad_norm": 0.10709495097398758, |
| "learning_rate": 4.3884438226120424e-06, |
| "loss": 0.7134, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9233610341643582, |
| "grad_norm": 0.11614098399877548, |
| "learning_rate": 3.4915744240403558e-06, |
| "loss": 0.707, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9325946445060018, |
| "grad_norm": 0.11611202359199524, |
| "learning_rate": 2.6955129420176196e-06, |
| "loss": 0.737, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9418282548476454, |
| "grad_norm": 0.11080285161733627, |
| "learning_rate": 2.0010909028998827e-06, |
| "loss": 0.7359, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.951061865189289, |
| "grad_norm": 0.1167815551161766, |
| "learning_rate": 1.409033665520354e-06, |
| "loss": 0.7131, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9602954755309326, |
| "grad_norm": 0.11581467092037201, |
| "learning_rate": 9.199596635154683e-07, |
| "loss": 0.7062, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9695290858725761, |
| "grad_norm": 0.1139911562204361, |
| "learning_rate": 5.343797593398536e-07, |
| "loss": 0.7382, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9787626962142197, |
| "grad_norm": 0.10670532286167145, |
| "learning_rate": 2.5269671064467313e-07, |
| "loss": 0.6965, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9879963065558633, |
| "grad_norm": 0.10769211500883102, |
| "learning_rate": 7.520474957699586e-08, |
| "loss": 0.7086, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.997229916897507, |
| "grad_norm": 0.10409457236528397, |
| "learning_rate": 2.0892754394208346e-09, |
| "loss": 0.702, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9990766389658357, |
| "eval_loss": 0.9494999051094055, |
| "eval_runtime": 131.5083, |
| "eval_samples_per_second": 8.783, |
| "eval_steps_per_second": 0.555, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9990766389658357, |
| "step": 541, |
| "total_flos": 2.2228397020277637e+18, |
| "train_loss": 0.7336494325489742, |
| "train_runtime": 26329.8311, |
| "train_samples_per_second": 3.947, |
| "train_steps_per_second": 0.021 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 541, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2228397020277637e+18, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|