| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.903111111111111, |
| "eval_steps": 500, |
| "global_step": 1100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 1.6136552095413208, |
| "learning_rate": 1.4084507042253522e-07, |
| "loss": 1.4283, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 2.3250255584716797, |
| "learning_rate": 2.8169014084507043e-07, |
| "loss": 1.4176, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 2.205648422241211, |
| "learning_rate": 4.225352112676056e-07, |
| "loss": 1.3904, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 1.679602861404419, |
| "learning_rate": 5.633802816901409e-07, |
| "loss": 1.3256, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 1.6885226964950562, |
| "learning_rate": 7.04225352112676e-07, |
| "loss": 1.2877, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 1.3719532489776611, |
| "learning_rate": 8.450704225352112e-07, |
| "loss": 1.2335, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 1.6127221584320068, |
| "learning_rate": 9.859154929577465e-07, |
| "loss": 1.1898, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 1.3292348384857178, |
| "learning_rate": 9.998876955784181e-07, |
| "loss": 1.1213, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1058685779571533, |
| "learning_rate": 9.994995475316987e-07, |
| "loss": 1.104, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.0595113039016724, |
| "learning_rate": 9.988343845952696e-07, |
| "loss": 1.059, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 0.9761242270469666, |
| "learning_rate": 9.978925756584284e-07, |
| "loss": 0.9813, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 0.8893954157829285, |
| "learning_rate": 9.966746430341582e-07, |
| "loss": 0.9635, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 0.8302690982818604, |
| "learning_rate": 9.951812621694608e-07, |
| "loss": 0.9373, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 0.74117112159729, |
| "learning_rate": 9.93413261270763e-07, |
| "loss": 0.9394, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.910311758518219, |
| "learning_rate": 9.913716208446065e-07, |
| "loss": 0.9476, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 0.9787248373031616, |
| "learning_rate": 9.890574731538739e-07, |
| "loss": 0.9403, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 0.6852824091911316, |
| "learning_rate": 9.864721015898523e-07, |
| "loss": 0.9306, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.9083530306816101, |
| "learning_rate": 9.836169399604845e-07, |
| "loss": 0.9356, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 0.6284005641937256, |
| "learning_rate": 9.80493571695201e-07, |
| "loss": 0.9154, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.8122096061706543, |
| "learning_rate": 9.771037289667726e-07, |
| "loss": 0.8989, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 0.6801354885101318, |
| "learning_rate": 9.734492917306754e-07, |
| "loss": 0.9159, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 1.5338674783706665, |
| "learning_rate": 9.695322866824947e-07, |
| "loss": 0.8969, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 0.9366681575775146, |
| "learning_rate": 9.653548861339508e-07, |
| "loss": 0.9099, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 0.8953334093093872, |
| "learning_rate": 9.60919406808168e-07, |
| "loss": 0.8797, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.7514542937278748, |
| "learning_rate": 9.562283085548543e-07, |
| "loss": 0.8666, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 0.7203475832939148, |
| "learning_rate": 9.512841929861068e-07, |
| "loss": 0.893, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.9745852947235107, |
| "learning_rate": 9.460898020335964e-07, |
| "loss": 0.8883, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 0.9440745711326599, |
| "learning_rate": 9.40648016427934e-07, |
| "loss": 0.869, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0284444444444445, |
| "grad_norm": 1.0532046556472778, |
| "learning_rate": 9.349618541010616e-07, |
| "loss": 0.7853, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.7366812825202942, |
| "learning_rate": 9.290344685125519e-07, |
| "loss": 0.8485, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0995555555555556, |
| "grad_norm": 0.6317222118377686, |
| "learning_rate": 9.228691469007486e-07, |
| "loss": 0.8323, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1351111111111112, |
| "grad_norm": 0.4928416907787323, |
| "learning_rate": 9.16469308459712e-07, |
| "loss": 0.881, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1706666666666667, |
| "grad_norm": 0.8622790575027466, |
| "learning_rate": 9.098385024429874e-07, |
| "loss": 0.8618, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2062222222222223, |
| "grad_norm": 0.9656073451042175, |
| "learning_rate": 9.029804061952424e-07, |
| "loss": 0.8504, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2417777777777779, |
| "grad_norm": 0.8012099862098694, |
| "learning_rate": 8.958988231128663e-07, |
| "loss": 0.8289, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2773333333333334, |
| "grad_norm": 0.831724226474762, |
| "learning_rate": 8.885976805346651e-07, |
| "loss": 0.8313, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3128888888888888, |
| "grad_norm": 0.9381484389305115, |
| "learning_rate": 8.810810275638182e-07, |
| "loss": 0.8222, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3484444444444446, |
| "grad_norm": 0.7074716687202454, |
| "learning_rate": 8.733530328223075e-07, |
| "loss": 0.815, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.6802889704704285, |
| "learning_rate": 8.654179821390621e-07, |
| "loss": 0.8485, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4195555555555557, |
| "grad_norm": 0.6159129738807678, |
| "learning_rate": 8.572802761731031e-07, |
| "loss": 0.8396, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.455111111111111, |
| "grad_norm": 1.0787162780761719, |
| "learning_rate": 8.489444279730045e-07, |
| "loss": 0.8342, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.4906666666666666, |
| "grad_norm": 0.850229024887085, |
| "learning_rate": 8.404150604740248e-07, |
| "loss": 0.8385, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5262222222222221, |
| "grad_norm": 0.9370916485786438, |
| "learning_rate": 8.316969039342963e-07, |
| "loss": 0.7899, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.561777777777778, |
| "grad_norm": 0.7209655046463013, |
| "learning_rate": 8.22794793311497e-07, |
| "loss": 0.8046, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.5973333333333333, |
| "grad_norm": 0.8257189989089966, |
| "learning_rate": 8.137136655814549e-07, |
| "loss": 0.8178, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6328888888888888, |
| "grad_norm": 0.8620548248291016, |
| "learning_rate": 8.044585570001769e-07, |
| "loss": 0.807, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6684444444444444, |
| "grad_norm": 0.8659062385559082, |
| "learning_rate": 7.950346003108166e-07, |
| "loss": 0.8087, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.5293139815330505, |
| "learning_rate": 7.854470218971332e-07, |
| "loss": 0.7872, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7395555555555555, |
| "grad_norm": 0.5208423733711243, |
| "learning_rate": 7.75701138885018e-07, |
| "loss": 0.8161, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.775111111111111, |
| "grad_norm": 0.7580987811088562, |
| "learning_rate": 7.658023561936966e-07, |
| "loss": 0.8314, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8106666666666666, |
| "grad_norm": 0.8971360325813293, |
| "learning_rate": 7.557561635382432e-07, |
| "loss": 0.806, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8462222222222222, |
| "grad_norm": 0.6375018954277039, |
| "learning_rate": 7.455681323850668e-07, |
| "loss": 0.7969, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8817777777777778, |
| "grad_norm": 1.017171859741211, |
| "learning_rate": 7.352439128620609e-07, |
| "loss": 0.7974, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9173333333333333, |
| "grad_norm": 0.8392543196678162, |
| "learning_rate": 7.247892306251275e-07, |
| "loss": 0.807, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.952888888888889, |
| "grad_norm": 1.016851782798767, |
| "learning_rate": 7.142098836828161e-07, |
| "loss": 0.8062, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9884444444444445, |
| "grad_norm": 0.8153456449508667, |
| "learning_rate": 7.035117391808341e-07, |
| "loss": 0.7673, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.021333333333333, |
| "grad_norm": 0.7162724733352661, |
| "learning_rate": 6.927007301482186e-07, |
| "loss": 0.7502, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.056888888888889, |
| "grad_norm": 0.9724966883659363, |
| "learning_rate": 6.817828522069667e-07, |
| "loss": 0.7868, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.0924444444444443, |
| "grad_norm": 1.1692003011703491, |
| "learning_rate": 6.707641602469553e-07, |
| "loss": 0.7739, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.7322782874107361, |
| "learning_rate": 6.596507650679899e-07, |
| "loss": 0.7829, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.1635555555555555, |
| "grad_norm": 0.9158796072006226, |
| "learning_rate": 6.484488299908486e-07, |
| "loss": 0.772, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.1991111111111112, |
| "grad_norm": 0.8015128374099731, |
| "learning_rate": 6.371645674391966e-07, |
| "loss": 0.7806, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.2346666666666666, |
| "grad_norm": 0.7846320271492004, |
| "learning_rate": 6.258042354942707e-07, |
| "loss": 0.775, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.2702222222222224, |
| "grad_norm": 0.8747680187225342, |
| "learning_rate": 6.143741344242423e-07, |
| "loss": 0.7837, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.3057777777777777, |
| "grad_norm": 0.8119185566902161, |
| "learning_rate": 6.028806031901829e-07, |
| "loss": 0.7519, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.3413333333333335, |
| "grad_norm": 0.8647979497909546, |
| "learning_rate": 5.91330015930574e-07, |
| "loss": 0.7715, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.376888888888889, |
| "grad_norm": 0.8015746474266052, |
| "learning_rate": 5.797287784263046e-07, |
| "loss": 0.7829, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.4124444444444446, |
| "grad_norm": 0.715522289276123, |
| "learning_rate": 5.680833245481234e-07, |
| "loss": 0.7719, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.9125120639801025, |
| "learning_rate": 5.564001126885105e-07, |
| "loss": 0.7632, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.4835555555555557, |
| "grad_norm": 0.9937298893928528, |
| "learning_rate": 5.446856221799514e-07, |
| "loss": 0.7511, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.519111111111111, |
| "grad_norm": 0.5765209794044495, |
| "learning_rate": 5.329463497015968e-07, |
| "loss": 0.7581, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.554666666666667, |
| "grad_norm": 0.841436505317688, |
| "learning_rate": 5.211888056763029e-07, |
| "loss": 0.7813, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.590222222222222, |
| "grad_norm": 1.1379077434539795, |
| "learning_rate": 5.094195106600489e-07, |
| "loss": 0.7874, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.6257777777777775, |
| "grad_norm": 0.7455689311027527, |
| "learning_rate": 4.976449917257365e-07, |
| "loss": 0.797, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.6613333333333333, |
| "grad_norm": 0.6947171092033386, |
| "learning_rate": 4.858717788433725e-07, |
| "loss": 0.7531, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.696888888888889, |
| "grad_norm": 0.8182320594787598, |
| "learning_rate": 4.741064012586478e-07, |
| "loss": 0.7659, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.7324444444444445, |
| "grad_norm": 0.8583469390869141, |
| "learning_rate": 4.6235538387191507e-07, |
| "loss": 0.753, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.6977065205574036, |
| "learning_rate": 4.50625243619579e-07, |
| "loss": 0.7786, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.8035555555555556, |
| "grad_norm": 0.8603796362876892, |
| "learning_rate": 4.3892248585990147e-07, |
| "loss": 0.7842, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.8391111111111114, |
| "grad_norm": 0.6347509026527405, |
| "learning_rate": 4.27253600765228e-07, |
| "loss": 0.7808, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.8746666666666667, |
| "grad_norm": 0.6170427203178406, |
| "learning_rate": 4.1562505972263726e-07, |
| "loss": 0.7623, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.910222222222222, |
| "grad_norm": 0.6599701046943665, |
| "learning_rate": 4.0404331174500656e-07, |
| "loss": 0.7692, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.945777777777778, |
| "grad_norm": 0.6815395951271057, |
| "learning_rate": 3.9251477989448795e-07, |
| "loss": 0.8188, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.981333333333333, |
| "grad_norm": 0.5231301784515381, |
| "learning_rate": 3.810458577203749e-07, |
| "loss": 0.7577, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.014222222222222, |
| "grad_norm": 0.6689186692237854, |
| "learning_rate": 3.696429057133358e-07, |
| "loss": 0.715, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.049777777777778, |
| "grad_norm": 0.7008723020553589, |
| "learning_rate": 3.583122477779834e-07, |
| "loss": 0.782, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.0853333333333333, |
| "grad_norm": 0.915671706199646, |
| "learning_rate": 3.470601677257323e-07, |
| "loss": 0.8049, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.120888888888889, |
| "grad_norm": 0.6437973976135254, |
| "learning_rate": 3.3589290578989213e-07, |
| "loss": 0.7404, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.1564444444444444, |
| "grad_norm": 0.6364536285400391, |
| "learning_rate": 3.2481665516492876e-07, |
| "loss": 0.7662, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.192, |
| "grad_norm": 0.7271984219551086, |
| "learning_rate": 3.138375585718125e-07, |
| "loss": 0.7738, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.2275555555555555, |
| "grad_norm": 0.6700648665428162, |
| "learning_rate": 3.0296170485135784e-07, |
| "loss": 0.735, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.2631111111111113, |
| "grad_norm": 0.6754481196403503, |
| "learning_rate": 2.9219512558744486e-07, |
| "loss": 0.7539, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.2986666666666666, |
| "grad_norm": 0.8119938969612122, |
| "learning_rate": 2.815437917619932e-07, |
| "loss": 0.7498, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.3342222222222224, |
| "grad_norm": 0.5352524518966675, |
| "learning_rate": 2.7101361044354696e-07, |
| "loss": 0.7316, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.3697777777777778, |
| "grad_norm": 0.7653639316558838, |
| "learning_rate": 2.6061042151130323e-07, |
| "loss": 0.73, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.405333333333333, |
| "grad_norm": 0.7560474872589111, |
| "learning_rate": 2.5033999441640344e-07, |
| "loss": 0.7561, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.440888888888889, |
| "grad_norm": 0.7517653703689575, |
| "learning_rate": 2.4020802498228334e-07, |
| "loss": 0.7382, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.4764444444444447, |
| "grad_norm": 1.0488708019256592, |
| "learning_rate": 2.3022013224585519e-07, |
| "loss": 0.7805, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.512, |
| "grad_norm": 0.8792369365692139, |
| "learning_rate": 2.203818553412757e-07, |
| "loss": 0.7754, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.5475555555555554, |
| "grad_norm": 0.6874270439147949, |
| "learning_rate": 2.10698650428025e-07, |
| "loss": 0.7465, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.583111111111111, |
| "grad_norm": 0.7939172983169556, |
| "learning_rate": 2.011758876650037e-07, |
| "loss": 0.7451, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.618666666666667, |
| "grad_norm": 0.7084336876869202, |
| "learning_rate": 1.9181884823232413e-07, |
| "loss": 0.7559, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.6542222222222223, |
| "grad_norm": 0.6327200531959534, |
| "learning_rate": 1.82632721402448e-07, |
| "loss": 0.7191, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.6897777777777776, |
| "grad_norm": 0.5157420635223389, |
| "learning_rate": 1.7362260166229308e-07, |
| "loss": 0.7336, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.7253333333333334, |
| "grad_norm": 0.5553033947944641, |
| "learning_rate": 1.6479348588791e-07, |
| "loss": 0.7527, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.7608888888888887, |
| "grad_norm": 0.7045750617980957, |
| "learning_rate": 1.561502705732883e-07, |
| "loss": 0.7352, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.7964444444444445, |
| "grad_norm": 0.70656418800354, |
| "learning_rate": 1.4769774911483686e-07, |
| "loss": 0.7666, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.832, |
| "grad_norm": 0.8279157876968384, |
| "learning_rate": 1.394406091530367e-07, |
| "loss": 0.7362, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.8675555555555556, |
| "grad_norm": 0.7268490195274353, |
| "learning_rate": 1.313834299727488e-07, |
| "loss": 0.7346, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.903111111111111, |
| "grad_norm": 0.5250927209854126, |
| "learning_rate": 1.2353067996361033e-07, |
| "loss": 0.7359, |
| "step": 1100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1405, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.602096798242701e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|