{ "best_global_step": 2250, "best_metric": 0.18876151740550995, "best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-2250", "epoch": 1.0, "eval_steps": 125, "global_step": 2256, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004434343994235353, "grad_norm": 0.21817488968372345, "last_batch_tokens": 257, "learning_rate": 0.0, "loss": 2.5221590995788574, "lr": 2e-05, "step": 1, "tokens_per_second": 27.955696254559246, "tokens_per_step": 1560.0, "total_tokens_seen": 1560 }, { "epoch": 0.022171719971176763, "grad_norm": 0.49327757954597473, "last_batch_tokens": 229, "learning_rate": 9.990575514806563e-05, "loss": 1.981216119260204, "lr": 9.990142403513012e-05, "step": 50, "tokens_per_second": 76.9470637679645, "tokens_per_step": 1551.7, "total_tokens_seen": 77585 }, { "epoch": 0.04434343994235353, "grad_norm": 0.8501848578453064, "last_batch_tokens": 193, "learning_rate": 9.957034339013742e-05, "loss": 1.1713996887207032, "lr": 9.956116660116155e-05, "step": 100, "tokens_per_second": 86.53475088010235, "tokens_per_step": 1572.7, "total_tokens_seen": 157270 }, { "epoch": 0.05542929992794191, "eval_loss": 0.9524237513542175, "eval_runtime": 104.4026, "eval_samples_per_second": 3.477, "eval_steps_per_second": 1.743, "last_batch_tokens": 172, "lr": 9.930042238269485e-05, "step": 125, "tokens_per_second": 133.98724044386626, "tokens_per_step": 1853.72, "total_tokens_seen": 231715 }, { "epoch": 0.0665151599135303, "grad_norm": 0.892642617225647, "last_batch_tokens": 59, "learning_rate": 9.899364434012273e-05, "loss": 0.8618771362304688, "lr": 9.897966654380171e-05, "step": 150, "tokens_per_second": 78.95926927075263, "tokens_per_step": 1780.0866666666666, "total_tokens_seen": 267013 }, { "epoch": 0.08868687988470705, "grad_norm": 0.8177722692489624, "last_batch_tokens": 275, "learning_rate": 9.817846512306061e-05, "loss": 0.69920166015625, "lr": 9.815975435734603e-05, "step": 200, "tokens_per_second": 83.13681099990983, "tokens_per_step": 1700.74, "total_tokens_seen": 340148 }, { "epoch": 0.11085859985588382, "grad_norm": 1.009503960609436, "last_batch_tokens": 181, "learning_rate": 9.712877368374224e-05, "loss": 0.6449888610839843, "lr": 9.710542102466229e-05, "step": 250, "tokens_per_second": 82.25185579838005, "tokens_per_step": 1656.984, "total_tokens_seen": 414246 }, { "epoch": 0.11085859985588382, "eval_loss": 0.6187728047370911, "eval_runtime": 88.3548, "eval_samples_per_second": 4.108, "eval_steps_per_second": 2.06, "last_batch_tokens": 172, "lr": 9.710542102466229e-05, "step": 250, "tokens_per_second": 363.7051920895653, "tokens_per_step": 1785.536, "total_tokens_seen": 446384 }, { "epoch": 0.1330303198270606, "grad_norm": 0.5571497082710266, "last_batch_tokens": 329, "learning_rate": 9.584967947244769e-05, "loss": 0.5449295806884765, "lr": 9.582179859078793e-05, "step": 300, "tokens_per_second": 81.02823836816424, "tokens_per_step": 1724.5733333333333, "total_tokens_seen": 517372 }, { "epoch": 0.15520203979823735, "grad_norm": 0.7961392998695374, "last_batch_tokens": 165, "learning_rate": 9.434740857432105e-05, "loss": 0.46938041687011717, "lr": 9.431513518232342e-05, "step": 350, "tokens_per_second": 89.52354398651325, "tokens_per_step": 1704.1371428571429, "total_tokens_seen": 596448 }, { "epoch": 0.16628789978382574, "eval_loss": 0.4863806366920471, "eval_runtime": 87.0251, "eval_samples_per_second": 4.171, "eval_steps_per_second": 2.091, "last_batch_tokens": 172, "lr": 9.348041345533653e-05, "step": 375, "tokens_per_second": 135.38091364115044, "tokens_per_step": 1784.712, "total_tokens_seen": 669267 }, { "epoch": 0.1773737597694141, "grad_norm": 0.7586395144462585, "last_batch_tokens": 351, "learning_rate": 9.262927340344295e-05, "loss": 0.4675440216064453, "lr": 9.259276459421655e-05, "step": 400, "tokens_per_second": 81.3096016563381, "tokens_per_step": 1764.9875, "total_tokens_seen": 705995 }, { "epoch": 0.19954547974059086, "grad_norm": 0.7313582897186279, "last_batch_tokens": 369, "learning_rate": 9.070363710911735e-05, "loss": 0.3964078140258789, "lr": 9.066307059197612e-05, "step": 450, "tokens_per_second": 87.86278133239196, "tokens_per_step": 1744.9444444444443, "total_tokens_seen": 785225 }, { "epoch": 0.22171719971176765, "grad_norm": 0.5969849228858948, "last_batch_tokens": 193, "learning_rate": 8.857987286762718e-05, "loss": 0.3672472381591797, "lr": 8.853544610307675e-05, "step": 500, "tokens_per_second": 87.74574317837812, "tokens_per_step": 1729.026, "total_tokens_seen": 864513 }, { "epoch": 0.22171719971176765, "eval_loss": 0.40328726172447205, "eval_runtime": 87.1124, "eval_samples_per_second": 4.167, "eval_steps_per_second": 2.089, "last_batch_tokens": 172, "lr": 8.853544610307675e-05, "step": 500, "tokens_per_second": 368.8907701487212, "tokens_per_step": 1793.302, "total_tokens_seen": 896651 }, { "epoch": 0.2438889196829444, "grad_norm": 0.7751753330230713, "last_batch_tokens": 273, "learning_rate": 8.626831825760946e-05, "loss": 0.3414393615722656, "lr": 8.622024749619364e-05, "step": 550, "tokens_per_second": 82.92877874873523, "tokens_per_step": 1766.3690909090908, "total_tokens_seen": 971503 }, { "epoch": 0.2660606396541212, "grad_norm": 0.7136653065681458, "last_batch_tokens": 305, "learning_rate": 8.378022494113098e-05, "loss": 0.3377827072143555, "lr": 8.372874417081631e-05, "step": 600, "tokens_per_second": 90.40251231127895, "tokens_per_step": 1748.685, "total_tokens_seen": 1049211 }, { "epoch": 0.27714649963970955, "eval_loss": 0.35334891080856323, "eval_runtime": 87.0325, "eval_samples_per_second": 4.171, "eval_steps_per_second": 2.091, "last_batch_tokens": 172, "lr": 8.24206361704162e-05, "step": 625, "tokens_per_second": 135.75737480096265, "tokens_per_step": 1791.824, "total_tokens_seen": 1119890 }, { "epoch": 0.2882323596252979, "grad_norm": 0.7202998399734497, "last_batch_tokens": 211, "learning_rate": 8.112770389539574e-05, "loss": 0.3233934020996094, "lr": 8.107306370261785e-05, "step": 650, "tokens_per_second": 84.5144051400581, "tokens_per_step": 1779.3815384615384, "total_tokens_seen": 1156598 }, { "epoch": 0.3104040795964747, "grad_norm": 0.7681185007095337, "last_batch_tokens": 236, "learning_rate": 7.832366646167268e-05, "loss": 0.3125551414489746, "lr": 7.826613281158841e-05, "step": 700, "tokens_per_second": 84.37944807859942, "tokens_per_step": 1759.6771428571428, "total_tokens_seen": 1231774 }, { "epoch": 0.3325757995676515, "grad_norm": 0.659271776676178, "last_batch_tokens": 939, "learning_rate": 7.538176149839243e-05, "loss": 0.28798053741455076, "lr": 7.532161444027488e-05, "step": 750, "tokens_per_second": 87.73140620694117, "tokens_per_step": 1745.06, "total_tokens_seen": 1308795 }, { "epoch": 0.3325757995676515, "eval_loss": 0.3200623393058777, "eval_runtime": 87.2377, "eval_samples_per_second": 4.161, "eval_steps_per_second": 2.086, "last_batch_tokens": 172, "lr": 7.532161444027488e-05, "step": 750, "tokens_per_second": 368.35941630029333, "tokens_per_step": 1787.9106666666667, "total_tokens_seen": 1340933 }, { "epoch": 0.3547475195388282, "grad_norm": 0.5721789598464966, "last_batch_tokens": 124, "learning_rate": 7.231630894432527e-05, "loss": 0.29953609466552733, "lr": 7.22538412484033e-05, "step": 800, "tokens_per_second": 65.97096831279634, "tokens_per_step": 98.35625, "total_tokens_seen": 78685 }, { "epoch": 0.376919239510005, "grad_norm": 0.4275953471660614, "last_batch_tokens": 266, "learning_rate": 6.914223011522581e-05, "loss": 0.27611801147460935, "lr": 6.907774584760349e-05, "step": 850, "tokens_per_second": 76.59339331072898, "tokens_per_step": 183.97411764705882, "total_tokens_seen": 156378 }, { "epoch": 0.38800509949559336, "eval_loss": 0.28222641348838806, "eval_runtime": 113.424, "eval_samples_per_second": 3.2, "eval_steps_per_second": 1.605, "last_batch_tokens": 172, "lr": 6.745388997609773e-05, "step": 875, "tokens_per_second": 114.49594753151979, "tokens_per_step": 258.8742857142857, "total_tokens_seen": 226515 }, { "epoch": 0.39909095948118173, "grad_norm": 0.5093332529067993, "last_batch_tokens": 209, "learning_rate": 6.587497507323132e-05, "loss": 0.26179553985595705, "lr": 6.580878811582379e-05, "step": 900, "tokens_per_second": 82.29563477689274, "tokens_per_step": 298.55555555555554, "total_tokens_seen": 268700 }, { "epoch": 0.4212626794523585, "grad_norm": 0.3912750482559204, "last_batch_tokens": 103, "learning_rate": 6.253044742254792e-05, "loss": 0.25117488861083986, "lr": 6.246287994523805e-05, "step": 950, "tokens_per_second": 79.79549481684828, "tokens_per_step": 366.02947368421053, "total_tokens_seen": 347728 }, { "epoch": 0.4434343994235353, "grad_norm": 0.4664643406867981, "last_batch_tokens": 203, "learning_rate": 5.9124926897487534e-05, "loss": 0.25925636291503906, "lr": 5.9056307789940357e-05, "step": 1000, "tokens_per_second": 76.53280228762407, "tokens_per_step": 422.387, "total_tokens_seen": 422387 }, { "epoch": 0.4434343994235353, "eval_loss": 0.26276224851608276, "eval_runtime": 95.1275, "eval_samples_per_second": 3.816, "eval_steps_per_second": 1.913, "last_batch_tokens": 172, "lr": 5.9056307789940357e-05, "step": 1000, "tokens_per_second": 337.8095566509732, "tokens_per_step": 454.525, "total_tokens_seen": 454525 }, { "epoch": 0.465606119394712, "grad_norm": 0.7413877248764038, "last_batch_tokens": 252, "learning_rate": 5.56749901196638e-05, "loss": 0.2307398223876953, "lr": 5.5605653390431875e-05, "step": 1050, "tokens_per_second": 85.43713054173512, "tokens_per_step": 512.1695238095238, "total_tokens_seen": 537778 }, { "epoch": 0.4877778393658888, "grad_norm": 0.43335428833961487, "last_batch_tokens": 142, "learning_rate": 5.219742991006728e-05, "loss": 0.24115974426269532, "lr": 5.21277130607795e-05, "step": 1100, "tokens_per_second": 75.7193860694182, "tokens_per_step": 556.2981818181818, "total_tokens_seen": 611928 }, { "epoch": 0.4988636993514772, "eval_loss": 0.24811844527721405, "eval_runtime": 94.9042, "eval_samples_per_second": 3.825, "eval_steps_per_second": 1.918, "last_batch_tokens": 172, "lr": 5.038379808781369e-05, "step": 1125, "tokens_per_second": 123.01878328450903, "tokens_per_step": 607.1377777777777, "total_tokens_seen": 683030 }, { "epoch": 0.5099495593370655, "grad_norm": 0.6529182195663452, "last_batch_tokens": 102, "learning_rate": 4.870917354877421e-05, "loss": 0.22134504318237305, "lr": 4.8639415931321794e-05, "step": 1150, "tokens_per_second": 83.41761800246071, "tokens_per_step": 630.3573913043479, "total_tokens_seen": 724911 }, { "epoch": 0.5321212793082424, "grad_norm": 0.4320646822452545, "last_batch_tokens": 175, "learning_rate": 4.522720038016592e-05, "loss": 0.2152995491027832, "lr": 4.515774154488211e-05, "step": 1200, "tokens_per_second": 82.13691539662977, "tokens_per_step": 672.07, "total_tokens_seen": 806484 }, { "epoch": 0.5542929992794191, "grad_norm": 0.6192132234573364, "last_batch_tokens": 267, "learning_rate": 4.1768459164721196e-05, "loss": 0.20546873092651366, "lr": 4.1699637207595034e-05, "step": 1250, "tokens_per_second": 83.92327455847254, "tokens_per_step": 710.0544, "total_tokens_seen": 887568 }, { "epoch": 0.5542929992794191, "eval_loss": 0.23204679787158966, "eval_runtime": 94.3616, "eval_samples_per_second": 3.847, "eval_steps_per_second": 1.929, "last_batch_tokens": 172, "lr": 4.1699637207595034e-05, "step": 1250, "tokens_per_second": 340.54961477393465, "tokens_per_step": 735.7648, "total_tokens_seen": 919706 }, { "epoch": 0.5764647192505958, "grad_norm": 0.3487900495529175, "last_batch_tokens": 134, "learning_rate": 3.8349785579678194e-05, "loss": 0.21177234649658203, "lr": 3.828193549664752e-05, "step": 1300, "tokens_per_second": 79.01101943117263, "tokens_per_step": 766.2323076923077, "total_tokens_seen": 996102 }, { "epoch": 0.5986364392217727, "grad_norm": 0.42593374848365784, "last_batch_tokens": 942, "learning_rate": 3.498782027013742e-05, "loss": 0.2180424690246582, "lr": 3.492127232647139e-05, "step": 1350, "tokens_per_second": 80.48352941836103, "tokens_per_step": 795.4074074074074, "total_tokens_seen": 1073800 }, { "epoch": 0.609722299207361, "eval_loss": 0.2193347066640854, "eval_runtime": 94.4814, "eval_samples_per_second": 3.842, "eval_steps_per_second": 1.926, "last_batch_tokens": 172, "lr": 3.326745518863976e-05, "step": 1375, "tokens_per_second": 124.66627567382365, "tokens_per_step": 832.9498181818182, "total_tokens_seen": 1145306 }, { "epoch": 0.6208081591929494, "grad_norm": 0.3440966010093689, "last_batch_tokens": 176, "learning_rate": 3.169892784949768e-05, "loss": 0.22419458389282226, "lr": 3.163400597220633e-05, "step": 1400, "tokens_per_second": 84.21467446062582, "tokens_per_step": 847.435, "total_tokens_seen": 1186409 }, { "epoch": 0.6429798791641261, "grad_norm": 0.48472294211387634, "last_batch_tokens": 99, "learning_rate": 2.8499117243496988e-05, "loss": 0.20303966522216796, "lr": 2.843613744459269e-05, "step": 1450, "tokens_per_second": 84.12853596803436, "tokens_per_step": 874.0124137931034, "total_tokens_seen": 1267318 }, { "epoch": 0.665151599135303, "grad_norm": 0.48055633902549744, "last_batch_tokens": 92, "learning_rate": 2.5403963765589118e-05, "loss": 0.18697463989257812, "lr": 2.5343232603874866e-05, "step": 1500, "tokens_per_second": 83.84733093235428, "tokens_per_step": 900.2046666666666, "total_tokens_seen": 1350307 }, { "epoch": 0.665151599135303, "eval_loss": 0.20863106846809387, "eval_runtime": 94.5131, "eval_samples_per_second": 3.841, "eval_steps_per_second": 1.926, "last_batch_tokens": 172, "lr": 2.5343232603874866e-05, "step": 1500, "tokens_per_second": 340.00444794736484, "tokens_per_step": 921.63, "total_tokens_seen": 1382445 }, { "epoch": 0.6873233191064797, "grad_norm": 0.41916459798812866, "last_batch_tokens": 426, "learning_rate": 2.2428533302959837e-05, "loss": 0.201729736328125, "lr": 2.2370346391831737e-05, "step": 1550, "tokens_per_second": 80.49134617228279, "tokens_per_step": 942.8058064516129, "total_tokens_seen": 1461349 }, { "epoch": 0.7094950390776564, "grad_norm": 0.38731154799461365, "last_batch_tokens": 312, "learning_rate": 1.9587308982213076e-05, "loss": 0.18205615997314453, "lr": 1.953194955074038e-05, "step": 1600, "tokens_per_second": 79.4246014683713, "tokens_per_step": 961.505, "total_tokens_seen": 1538408 }, { "epoch": 0.7205808990632449, "eval_loss": 0.20174801349639893, "eval_runtime": 94.2485, "eval_samples_per_second": 3.852, "eval_steps_per_second": 1.931, "last_batch_tokens": 172, "lr": 1.816752961112065e-05, "step": 1625, "tokens_per_second": 120.14447834109774, "tokens_per_step": 988.5981538461539, "total_tokens_seen": 1606472 }, { "epoch": 0.7316667590488332, "grad_norm": 0.42647936940193176, "last_batch_tokens": 168, "learning_rate": 1.6894120671686986e-05, "loss": 0.1889303970336914, "lr": 1.6841858185973775e-05, "step": 1650, "tokens_per_second": 75.64734832954207, "tokens_per_step": 995.8060606060606, "total_tokens_seen": 1643080 }, { "epoch": 0.75383847902001, "grad_norm": 0.41556963324546814, "last_batch_tokens": 169, "learning_rate": 1.4362077663552753e-05, "loss": 0.1900373077392578, "lr": 1.4313166515091864e-05, "step": 1700, "tokens_per_second": 76.28403702273542, "tokens_per_step": 1009.9758823529412, "total_tokens_seen": 1716959 }, { "epoch": 0.7760101989911867, "grad_norm": 0.4044085443019867, "last_batch_tokens": 140, "learning_rate": 1.2003504863370746e-05, "loss": 0.1899305534362793, "lr": 1.1958183130774469e-05, "step": 1750, "tokens_per_second": 84.05214560453553, "tokens_per_step": 1027.8245714285715, "total_tokens_seen": 1798693 }, { "epoch": 0.7760101989911867, "eval_loss": 0.19616812467575073, "eval_runtime": 94.684, "eval_samples_per_second": 3.834, "eval_steps_per_second": 1.922, "last_batch_tokens": 172, "lr": 1.1958183130774469e-05, "step": 1750, "tokens_per_second": 339.39215730410245, "tokens_per_step": 1046.1891428571428, "total_tokens_seen": 1830831 }, { "epoch": 0.7981819189623635, "grad_norm": 0.5659682154655457, "last_batch_tokens": 103, "learning_rate": 9.829882797706336e-06, "loss": 0.1962204933166504, "lr": 9.788371087841237e-06, "step": 1800, "tokens_per_second": 83.61138183187425, "tokens_per_step": 1063.1733333333334, "total_tokens_seen": 1913712 }, { "epoch": 0.8203536389335403, "grad_norm": 0.3827808201313019, "last_batch_tokens": 211, "learning_rate": 7.85179173182246e-06, "loss": 0.17033554077148438, "lr": 7.814292105989308e-06, "step": 1850, "tokens_per_second": 82.77525601918174, "tokens_per_step": 1078.207027027027, "total_tokens_seen": 1994683 }, { "epoch": 0.8314394989191286, "eval_loss": 0.19150112569332123, "eval_runtime": 94.6106, "eval_samples_per_second": 3.837, "eval_steps_per_second": 1.924, "last_batch_tokens": 172, "lr": 6.9036938458111764e-06, "step": 1875, "tokens_per_second": 129.4537055546321, "tokens_per_step": 1103.9941333333334, "total_tokens_seen": 2069989 }, { "epoch": 0.842525358904717, "grad_norm": 0.4506838917732239, "last_batch_tokens": 132, "learning_rate": 6.078860169460415e-06, "loss": 0.18061737060546876, "lr": 6.045555159845828e-06, "step": 1900, "tokens_per_second": 84.8577689646082, "tokens_per_step": 1111.4515789473685, "total_tokens_seen": 2111758 }, { "epoch": 0.8646970788758938, "grad_norm": 0.42664435505867004, "last_batch_tokens": 123, "learning_rate": 4.519717985389665e-06, "loss": 0.18581958770751952, "lr": 4.490769706577352e-06, "step": 1950, "tokens_per_second": 81.16470478657682, "tokens_per_step": 1123.2635897435898, "total_tokens_seen": 2190364 }, { "epoch": 0.8868687988470706, "grad_norm": 0.3934974670410156, "last_batch_tokens": 291, "learning_rate": 3.18195441885778e-06, "loss": 0.17605453491210937, "lr": 3.157503778723847e-06, "step": 2000, "tokens_per_second": 78.4029933600584, "tokens_per_step": 1133.7585, "total_tokens_seen": 2267517 }, { "epoch": 0.8868687988470706, "eval_loss": 0.1900114119052887, "eval_runtime": 94.4943, "eval_samples_per_second": 3.842, "eval_steps_per_second": 1.926, "last_batch_tokens": 172, "lr": 3.157503778723847e-06, "step": 2000, "tokens_per_second": 340.07002840114217, "tokens_per_step": 1149.8275, "total_tokens_seen": 2299655 }, { "epoch": 0.9090405188182473, "grad_norm": 0.44616127014160156, "last_batch_tokens": 151, "learning_rate": 2.072081132410253e-06, "loss": 0.1782122802734375, "lr": 2.0522471462437796e-06, "step": 2050, "tokens_per_second": 81.0157351221381, "tokens_per_step": 1160.878536585366, "total_tokens_seen": 2379801 }, { "epoch": 0.931212238789424, "grad_norm": 0.4230777621269226, "last_batch_tokens": 188, "learning_rate": 1.195500515894149e-06, "loss": 0.17306018829345704, "lr": 1.1803797270814765e-06, "step": 2100, "tokens_per_second": 80.14939686559167, "tokens_per_step": 1170.3680952380953, "total_tokens_seen": 2457773 }, { "epoch": 0.9422980987750125, "eval_loss": 0.18897105753421783, "eval_runtime": 95.0115, "eval_samples_per_second": 3.821, "eval_steps_per_second": 1.916, "last_batch_tokens": 172, "lr": 8.333381642750881e-07, "step": 2125, "tokens_per_second": 120.10909547338339, "tokens_per_step": 1188.5943529411766, "total_tokens_seen": 2525763 }, { "epoch": 0.9533839587606009, "grad_norm": 0.2957008183002472, "last_batch_tokens": 305, "learning_rate": 5.564793899281884e-07, "loss": 0.1782497787475586, "lr": 5.461454000209198e-07, "step": 2150, "tokens_per_second": 83.24645935651418, "tokens_per_step": 1193.8697674418604, "total_tokens_seen": 2566820 }, { "epoch": 0.9755556787317776, "grad_norm": 0.49967435002326965, "last_batch_tokens": 156, "learning_rate": 1.5812823683962197e-07, "loss": 0.19703115463256837, "lr": 1.5263134729363583e-07, "step": 2200, "tokens_per_second": 75.00656410059429, "tokens_per_step": 1199.9336363636364, "total_tokens_seen": 2639854 }, { "epoch": 0.9977273987029543, "grad_norm": 0.26038259267807007, "last_batch_tokens": 322, "learning_rate": 2.386060162717918e-09, "loss": 0.17010717391967772, "lr": 1.7530274921462308e-09, "step": 2250, "tokens_per_second": 78.96076733362268, "tokens_per_step": 1208.1137777777778, "total_tokens_seen": 2718256 }, { "epoch": 0.9977273987029543, "eval_loss": 0.18876151740550995, "eval_runtime": 95.314, "eval_samples_per_second": 3.808, "eval_steps_per_second": 1.909, "last_batch_tokens": 172, "lr": 1.7530274921462308e-09, "step": 2250, "tokens_per_second": 337.1431434660513, "tokens_per_step": 1222.3973333333333, "total_tokens_seen": 2750394 } ], "logging_steps": 50, "max_steps": 2256, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.666058653049815e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }