| { |
| "best_metric": 94.22774869109946, |
| "best_model_checkpoint": "outputs/bitfit/t5-base/qqp/checkpoint-8600", |
| "epoch": 3.0, |
| "global_step": 34017, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.41643835616439, |
| "eval_f1": 91.23287671232877, |
| "eval_loss": 0.06451932340860367, |
| "eval_runtime": 4.5218, |
| "eval_samples_per_second": 221.15, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 90.2, |
| "eval_average_metrics": 89.15339805825244, |
| "eval_f1": 88.10679611650487, |
| "eval_loss": 0.08826350420713425, |
| "eval_runtime": 4.5364, |
| "eval_samples_per_second": 220.438, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00029559044007408056, |
| "loss": 0.1448, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.76439688715953, |
| "eval_f1": 91.82879377431907, |
| "eval_loss": 0.061929114162921906, |
| "eval_runtime": 4.5766, |
| "eval_samples_per_second": 218.503, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 93.30000000000001, |
| "eval_average_metrics": 92.3385456885457, |
| "eval_f1": 91.37709137709138, |
| "eval_loss": 0.0669504851102829, |
| "eval_runtime": 4.5237, |
| "eval_samples_per_second": 221.059, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002911808801481612, |
| "loss": 0.0782, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.74308996088658, |
| "eval_f1": 91.78617992177314, |
| "eval_loss": 0.06603064388036728, |
| "eval_runtime": 4.727, |
| "eval_samples_per_second": 211.549, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_accuracy": 93.30000000000001, |
| "eval_average_metrics": 92.14125168236878, |
| "eval_f1": 90.98250336473754, |
| "eval_loss": 0.06394415348768234, |
| "eval_runtime": 4.6179, |
| "eval_samples_per_second": 216.548, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_accuracy": 93.30000000000001, |
| "eval_average_metrics": 92.07974079126876, |
| "eval_f1": 90.85948158253751, |
| "eval_loss": 0.059915054589509964, |
| "eval_runtime": 4.4912, |
| "eval_samples_per_second": 222.655, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002867713202222418, |
| "loss": 0.0773, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_accuracy": 93.0, |
| "eval_average_metrics": 92.0242966751918, |
| "eval_f1": 91.04859335038363, |
| "eval_loss": 0.06896140426397324, |
| "eval_runtime": 4.614, |
| "eval_samples_per_second": 216.732, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.61151832460735, |
| "eval_f1": 91.62303664921467, |
| "eval_loss": 0.06234096363186836, |
| "eval_runtime": 4.6818, |
| "eval_samples_per_second": 213.593, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002823617602963224, |
| "loss": 0.0746, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.23333333333332, |
| "eval_f1": 92.26666666666667, |
| "eval_loss": 0.0571160614490509, |
| "eval_runtime": 4.7897, |
| "eval_samples_per_second": 208.783, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.54468085106384, |
| "eval_f1": 91.48936170212767, |
| "eval_loss": 0.059621669352054596, |
| "eval_runtime": 4.6087, |
| "eval_samples_per_second": 216.979, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.36786703601109, |
| "eval_f1": 91.13573407202216, |
| "eval_loss": 0.06162749230861664, |
| "eval_runtime": 4.7312, |
| "eval_samples_per_second": 211.361, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00027795220037040303, |
| "loss": 0.0771, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_accuracy": 93.10000000000001, |
| "eval_average_metrics": 91.91912751677853, |
| "eval_f1": 90.73825503355705, |
| "eval_loss": 0.06196223199367523, |
| "eval_runtime": 4.4778, |
| "eval_samples_per_second": 223.322, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.72155963302754, |
| "eval_f1": 91.74311926605506, |
| "eval_loss": 0.059104129672050476, |
| "eval_runtime": 4.462, |
| "eval_samples_per_second": 224.113, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002735426404444836, |
| "loss": 0.0764, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1633069828722, |
| "eval_f1": 92.2266139657444, |
| "eval_loss": 0.059123676270246506, |
| "eval_runtime": 4.5787, |
| "eval_samples_per_second": 218.4, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_accuracy": 91.5, |
| "eval_average_metrics": 90.49660074165637, |
| "eval_f1": 89.49320148331273, |
| "eval_loss": 0.07883985340595245, |
| "eval_runtime": 4.5148, |
| "eval_samples_per_second": 221.496, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_accuracy": 93.10000000000001, |
| "eval_average_metrics": 92.02837483617301, |
| "eval_f1": 90.956749672346, |
| "eval_loss": 0.06356123834848404, |
| "eval_runtime": 4.5836, |
| "eval_samples_per_second": 218.171, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002691330805185642, |
| "loss": 0.0732, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.598987854251, |
| "eval_f1": 91.49797570850201, |
| "eval_loss": 0.058661118149757385, |
| "eval_runtime": 4.581, |
| "eval_samples_per_second": 218.292, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 93.10000000000001, |
| "eval_average_metrics": 92.00454545454545, |
| "eval_f1": 90.90909090909089, |
| "eval_loss": 0.06345341354608536, |
| "eval_runtime": 4.6337, |
| "eval_samples_per_second": 215.808, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00026472352059264486, |
| "loss": 0.0745, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_accuracy": 92.7, |
| "eval_average_metrics": 91.72389100126742, |
| "eval_f1": 90.74778200253485, |
| "eval_loss": 0.07112478464841843, |
| "eval_runtime": 4.5897, |
| "eval_samples_per_second": 217.881, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_accuracy": 92.4, |
| "eval_average_metrics": 91.38987341772153, |
| "eval_f1": 90.37974683544304, |
| "eval_loss": 0.06926184892654419, |
| "eval_runtime": 4.5334, |
| "eval_samples_per_second": 220.585, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.2021505376344, |
| "eval_f1": 92.20430107526882, |
| "eval_loss": 0.06072888895869255, |
| "eval_runtime": 4.7545, |
| "eval_samples_per_second": 210.329, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00026031396066672545, |
| "loss": 0.0766, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.8779038718291, |
| "eval_f1": 91.85580774365822, |
| "eval_loss": 0.06117413192987442, |
| "eval_runtime": 4.5554, |
| "eval_samples_per_second": 219.52, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.13402061855669, |
| "eval_f1": 92.2680412371134, |
| "eval_loss": 0.060421667993068695, |
| "eval_runtime": 4.5444, |
| "eval_samples_per_second": 220.049, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00025590440074080604, |
| "loss": 0.0729, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_accuracy": 93.0, |
| "eval_average_metrics": 91.95454545454547, |
| "eval_f1": 90.90909090909092, |
| "eval_loss": 0.06169410049915314, |
| "eval_runtime": 4.6688, |
| "eval_samples_per_second": 214.188, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.3244966442953, |
| "eval_f1": 92.34899328859059, |
| "eval_loss": 0.0613214485347271, |
| "eval_runtime": 4.5049, |
| "eval_samples_per_second": 221.979, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.71070959264125, |
| "eval_f1": 91.72141918528251, |
| "eval_loss": 0.06161003187298775, |
| "eval_runtime": 4.5685, |
| "eval_samples_per_second": 218.891, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002514948408148867, |
| "loss": 0.071, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.95262123197902, |
| "eval_f1": 92.00524246395806, |
| "eval_loss": 0.06118384748697281, |
| "eval_runtime": 4.564, |
| "eval_samples_per_second": 219.107, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_accuracy": 93.0, |
| "eval_average_metrics": 92.0470737913486, |
| "eval_f1": 91.09414758269719, |
| "eval_loss": 0.06946446746587753, |
| "eval_runtime": 4.5787, |
| "eval_samples_per_second": 218.4, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002470852808889673, |
| "loss": 0.0748, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.7095744680851, |
| "eval_f1": 92.81914893617021, |
| "eval_loss": 0.05765092372894287, |
| "eval_runtime": 4.5272, |
| "eval_samples_per_second": 220.889, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.3244966442953, |
| "eval_f1": 92.34899328859059, |
| "eval_loss": 0.05894589051604271, |
| "eval_runtime": 4.6099, |
| "eval_samples_per_second": 216.924, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.598987854251, |
| "eval_f1": 91.49797570850201, |
| "eval_loss": 0.061102479696273804, |
| "eval_runtime": 4.6948, |
| "eval_samples_per_second": 213.001, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00024267572096304786, |
| "loss": 0.074, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_accuracy": 93.0, |
| "eval_average_metrics": 92.03571428571429, |
| "eval_f1": 91.07142857142858, |
| "eval_loss": 0.06452207267284393, |
| "eval_runtime": 4.6106, |
| "eval_samples_per_second": 216.891, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_accuracy": 93.5, |
| "eval_average_metrics": 92.55645161290323, |
| "eval_f1": 91.61290322580645, |
| "eval_loss": 0.05938281863927841, |
| "eval_runtime": 4.5228, |
| "eval_samples_per_second": 221.102, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00023826616103712848, |
| "loss": 0.0738, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.77519582245431, |
| "eval_f1": 92.95039164490862, |
| "eval_loss": 0.057858582586050034, |
| "eval_runtime": 4.5704, |
| "eval_samples_per_second": 218.797, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.71909814323607, |
| "eval_f1": 92.83819628647215, |
| "eval_loss": 0.05671229586005211, |
| "eval_runtime": 4.4966, |
| "eval_samples_per_second": 222.39, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.64580602883355, |
| "eval_f1": 92.7916120576671, |
| "eval_loss": 0.059491805732250214, |
| "eval_runtime": 4.5973, |
| "eval_samples_per_second": 217.521, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00023385660111120907, |
| "loss": 0.0746, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.53879892037787, |
| "eval_f1": 92.57759784075573, |
| "eval_loss": 0.057486891746520996, |
| "eval_runtime": 4.6372, |
| "eval_samples_per_second": 215.649, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_accuracy": 94.69999999999999, |
| "eval_average_metrics": 93.81194926568757, |
| "eval_f1": 92.92389853137516, |
| "eval_loss": 0.05628298968076706, |
| "eval_runtime": 4.6937, |
| "eval_samples_per_second": 213.051, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0002294470411852897, |
| "loss": 0.0762, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.4566844919786, |
| "eval_f1": 92.51336898395722, |
| "eval_loss": 0.05849047377705574, |
| "eval_runtime": 4.6139, |
| "eval_samples_per_second": 216.737, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 94.69999999999999, |
| "eval_average_metrics": 93.86773981603153, |
| "eval_f1": 93.03547963206307, |
| "eval_loss": 0.056792281568050385, |
| "eval_runtime": 4.6916, |
| "eval_samples_per_second": 213.147, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.59794156706508, |
| "eval_f1": 92.69588313413014, |
| "eval_loss": 0.05638590082526207, |
| "eval_runtime": 4.6952, |
| "eval_samples_per_second": 212.982, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0002250374812593703, |
| "loss": 0.0726, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_accuracy": 95.0, |
| "eval_average_metrics": 94.22774869109946, |
| "eval_f1": 93.45549738219894, |
| "eval_loss": 0.055720701813697815, |
| "eval_runtime": 4.5004, |
| "eval_samples_per_second": 222.204, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.08355091383812, |
| "eval_f1": 92.16710182767625, |
| "eval_loss": 0.06084197014570236, |
| "eval_runtime": 4.5822, |
| "eval_samples_per_second": 218.238, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002206279213334509, |
| "loss": 0.0734, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_accuracy": 93.10000000000001, |
| "eval_average_metrics": 92.14386973180078, |
| "eval_f1": 91.18773946360155, |
| "eval_loss": 0.06530317664146423, |
| "eval_runtime": 4.5035, |
| "eval_samples_per_second": 222.05, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.76666666666665, |
| "eval_f1": 91.73333333333332, |
| "eval_loss": 0.05946441367268562, |
| "eval_runtime": 4.8109, |
| "eval_samples_per_second": 207.861, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.4566844919786, |
| "eval_f1": 92.51336898395722, |
| "eval_loss": 0.059339020401239395, |
| "eval_runtime": 4.5265, |
| "eval_samples_per_second": 220.922, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00021621836140753152, |
| "loss": 0.0731, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 92.60000000000001, |
| "eval_average_metrics": 91.64005037783375, |
| "eval_f1": 90.6801007556675, |
| "eval_loss": 0.07186109572649002, |
| "eval_runtime": 4.5431, |
| "eval_samples_per_second": 220.114, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1633069828722, |
| "eval_f1": 92.2266139657444, |
| "eval_loss": 0.05946135148406029, |
| "eval_runtime": 4.5655, |
| "eval_samples_per_second": 219.036, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0002118088014816121, |
| "loss": 0.0733, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.95262123197902, |
| "eval_f1": 92.00524246395806, |
| "eval_loss": 0.06076710671186447, |
| "eval_runtime": 4.5407, |
| "eval_samples_per_second": 220.229, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.34492656875835, |
| "eval_f1": 92.3898531375167, |
| "eval_loss": 0.055939000099897385, |
| "eval_runtime": 4.4996, |
| "eval_samples_per_second": 222.24, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_accuracy": 94.8, |
| "eval_average_metrics": 93.96084656084656, |
| "eval_f1": 93.12169312169313, |
| "eval_loss": 0.05636580288410187, |
| "eval_runtime": 4.4643, |
| "eval_samples_per_second": 223.998, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00020739924155569272, |
| "loss": 0.0738, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.71081081081081, |
| "eval_f1": 91.62162162162161, |
| "eval_loss": 0.059175312519073486, |
| "eval_runtime": 4.7877, |
| "eval_samples_per_second": 208.867, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.91026490066224, |
| "eval_f1": 91.9205298013245, |
| "eval_loss": 0.059644319117069244, |
| "eval_runtime": 4.6781, |
| "eval_samples_per_second": 213.761, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002029896816297733, |
| "loss": 0.0752, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.622454308094, |
| "eval_f1": 91.644908616188, |
| "eval_loss": 0.061212606728076935, |
| "eval_runtime": 4.4874, |
| "eval_samples_per_second": 222.845, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.41474442988203, |
| "eval_f1": 92.52948885976409, |
| "eval_loss": 0.059587035328149796, |
| "eval_runtime": 4.5784, |
| "eval_samples_per_second": 218.418, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.2533462033462, |
| "eval_f1": 92.4066924066924, |
| "eval_loss": 0.060919877141714096, |
| "eval_runtime": 4.5416, |
| "eval_samples_per_second": 220.185, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00019858012170385393, |
| "loss": 0.0716, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.11141522029372, |
| "eval_f1": 92.12283044058745, |
| "eval_loss": 0.05961688980460167, |
| "eval_runtime": 4.5998, |
| "eval_samples_per_second": 217.402, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.0212201591512, |
| "eval_f1": 92.04244031830238, |
| "eval_loss": 0.06122226640582085, |
| "eval_runtime": 4.6213, |
| "eval_samples_per_second": 216.391, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00019417056177793455, |
| "loss": 0.0713, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 92.96774193548387, |
| "eval_f1": 91.93548387096774, |
| "eval_loss": 0.06119931861758232, |
| "eval_runtime": 4.5888, |
| "eval_samples_per_second": 217.92, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.31419919246298, |
| "eval_f1": 92.32839838492598, |
| "eval_loss": 0.05847727879881859, |
| "eval_runtime": 4.4633, |
| "eval_samples_per_second": 224.05, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_accuracy": 93.4, |
| "eval_average_metrics": 92.39190600522193, |
| "eval_f1": 91.38381201044386, |
| "eval_loss": 0.06247144192457199, |
| "eval_runtime": 4.5667, |
| "eval_samples_per_second": 218.978, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00018976100185201514, |
| "loss": 0.0687, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 93.04475032010242, |
| "eval_f1": 92.18950064020484, |
| "eval_loss": 0.0635332465171814, |
| "eval_runtime": 4.5944, |
| "eval_samples_per_second": 217.654, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.05263157894737, |
| "eval_f1": 92.10526315789474, |
| "eval_loss": 0.06063272804021835, |
| "eval_runtime": 4.5058, |
| "eval_samples_per_second": 221.934, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00018535144192609576, |
| "loss": 0.0711, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.12403100775194, |
| "eval_f1": 92.24806201550389, |
| "eval_loss": 0.06045162305235863, |
| "eval_runtime": 4.6598, |
| "eval_samples_per_second": 214.601, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.50606860158311, |
| "eval_f1": 92.61213720316623, |
| "eval_loss": 0.06117108836770058, |
| "eval_runtime": 4.501, |
| "eval_samples_per_second": 222.172, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.4962962962963, |
| "eval_f1": 92.5925925925926, |
| "eval_loss": 0.05846463143825531, |
| "eval_runtime": 4.4931, |
| "eval_samples_per_second": 222.561, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00018094188200017637, |
| "loss": 0.0694, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.06889338731443, |
| "eval_f1": 92.03778677462888, |
| "eval_loss": 0.05917409434914589, |
| "eval_runtime": 4.6082, |
| "eval_samples_per_second": 217.003, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.85300261096606, |
| "eval_f1": 91.9060052219321, |
| "eval_loss": 0.06280769407749176, |
| "eval_runtime": 4.5282, |
| "eval_samples_per_second": 220.836, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017653232207425696, |
| "loss": 0.0741, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.65492227979274, |
| "eval_f1": 91.70984455958549, |
| "eval_loss": 0.06333824247121811, |
| "eval_runtime": 4.4743, |
| "eval_samples_per_second": 223.497, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.94211563731932, |
| "eval_f1": 91.98423127463865, |
| "eval_loss": 0.06064913421869278, |
| "eval_runtime": 4.6765, |
| "eval_samples_per_second": 213.836, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_accuracy": 92.60000000000001, |
| "eval_average_metrics": 91.62828282828283, |
| "eval_f1": 90.65656565656566, |
| "eval_loss": 0.07161322236061096, |
| "eval_runtime": 4.5138, |
| "eval_samples_per_second": 221.545, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00017212276214833758, |
| "loss": 0.0715, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.6998023715415, |
| "eval_f1": 91.699604743083, |
| "eval_loss": 0.06242042034864426, |
| "eval_runtime": 4.764, |
| "eval_samples_per_second": 209.909, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.73235294117647, |
| "eval_f1": 91.76470588235294, |
| "eval_loss": 0.0626644566655159, |
| "eval_runtime": 4.4732, |
| "eval_samples_per_second": 223.555, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001677132022224182, |
| "loss": 0.0714, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.54464751958224, |
| "eval_f1": 92.68929503916449, |
| "eval_loss": 0.05990656465291977, |
| "eval_runtime": 4.5922, |
| "eval_samples_per_second": 217.762, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.73799472295514, |
| "eval_f1": 92.87598944591029, |
| "eval_loss": 0.060957495123147964, |
| "eval_runtime": 4.4536, |
| "eval_samples_per_second": 224.537, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.06167261675000191, |
| "eval_runtime": 4.4865, |
| "eval_samples_per_second": 222.89, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0001633036422964988, |
| "loss": 0.0707, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.061066027730703354, |
| "eval_runtime": 4.5716, |
| "eval_samples_per_second": 218.74, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1633069828722, |
| "eval_f1": 92.2266139657444, |
| "eval_loss": 0.06235107034444809, |
| "eval_runtime": 4.499, |
| "eval_samples_per_second": 222.27, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00015889408237057938, |
| "loss": 0.0709, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.47659574468085, |
| "eval_f1": 92.55319148936171, |
| "eval_loss": 0.06194847822189331, |
| "eval_runtime": 4.5231, |
| "eval_samples_per_second": 221.086, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.76596858638743, |
| "eval_f1": 92.93193717277488, |
| "eval_loss": 0.05966123938560486, |
| "eval_runtime": 4.637, |
| "eval_samples_per_second": 215.656, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.44664879356569, |
| "eval_f1": 92.49329758713138, |
| "eval_loss": 0.06104936823248863, |
| "eval_runtime": 4.5374, |
| "eval_samples_per_second": 220.391, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00015448452244466002, |
| "loss": 0.0729, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_accuracy": 94.69999999999999, |
| "eval_average_metrics": 93.87686762778506, |
| "eval_f1": 93.05373525557013, |
| "eval_loss": 0.06205834820866585, |
| "eval_runtime": 4.4838, |
| "eval_samples_per_second": 223.023, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.78437500000001, |
| "eval_f1": 92.96875000000001, |
| "eval_loss": 0.06089754402637482, |
| "eval_runtime": 4.5038, |
| "eval_samples_per_second": 222.035, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00015007496251874061, |
| "loss": 0.07, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.36288659793814, |
| "eval_f1": 92.52577319587628, |
| "eval_loss": 0.06112566590309143, |
| "eval_runtime": 4.6026, |
| "eval_samples_per_second": 217.269, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.48647214854111, |
| "eval_f1": 92.57294429708222, |
| "eval_loss": 0.06089947372674942, |
| "eval_runtime": 4.5389, |
| "eval_samples_per_second": 220.318, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.24354838709678, |
| "eval_f1": 92.38709677419355, |
| "eval_loss": 0.06110972911119461, |
| "eval_runtime": 4.6859, |
| "eval_samples_per_second": 213.405, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00014566540259282123, |
| "loss": 0.0669, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.38513870541613, |
| "eval_f1": 92.47027741083225, |
| "eval_loss": 0.06174538657069206, |
| "eval_runtime": 4.6674, |
| "eval_samples_per_second": 214.254, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.7095744680851, |
| "eval_f1": 92.81914893617021, |
| "eval_loss": 0.059681929647922516, |
| "eval_runtime": 4.6196, |
| "eval_samples_per_second": 216.471, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.00014125584266690182, |
| "loss": 0.07, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.29422572178477, |
| "eval_f1": 92.38845144356955, |
| "eval_loss": 0.061346184462308884, |
| "eval_runtime": 4.4984, |
| "eval_samples_per_second": 222.301, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.28421052631577, |
| "eval_f1": 92.36842105263158, |
| "eval_loss": 0.06077203154563904, |
| "eval_runtime": 4.454, |
| "eval_samples_per_second": 224.518, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.13233731739707, |
| "eval_f1": 92.16467463479415, |
| "eval_loss": 0.05959217995405197, |
| "eval_runtime": 4.588, |
| "eval_samples_per_second": 217.959, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00013684628274098244, |
| "loss": 0.069, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.46666666666665, |
| "eval_f1": 92.53333333333332, |
| "eval_loss": 0.06017257645726204, |
| "eval_runtime": 4.483, |
| "eval_samples_per_second": 223.066, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.58821571238349, |
| "eval_f1": 92.67643142476697, |
| "eval_loss": 0.058851905167102814, |
| "eval_runtime": 4.5048, |
| "eval_samples_per_second": 221.985, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.00013243672281506306, |
| "loss": 0.0713, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.97346805736636, |
| "eval_f1": 92.04693611473273, |
| "eval_loss": 0.06167756766080856, |
| "eval_runtime": 4.4859, |
| "eval_samples_per_second": 222.921, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.6998023715415, |
| "eval_f1": 91.699604743083, |
| "eval_loss": 0.06253690272569656, |
| "eval_runtime": 4.582, |
| "eval_samples_per_second": 218.244, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 93.00433376455368, |
| "eval_f1": 92.10866752910736, |
| "eval_loss": 0.06255872547626495, |
| "eval_runtime": 4.5188, |
| "eval_samples_per_second": 221.296, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00012802716288914365, |
| "loss": 0.0699, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.40492772667542, |
| "eval_f1": 92.50985545335085, |
| "eval_loss": 0.062451381236314774, |
| "eval_runtime": 4.5919, |
| "eval_samples_per_second": 217.773, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.01063829787235, |
| "eval_f1": 92.0212765957447, |
| "eval_loss": 0.06319490820169449, |
| "eval_runtime": 4.591, |
| "eval_samples_per_second": 217.817, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.00012361760296322426, |
| "loss": 0.0698, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_accuracy": 93.5, |
| "eval_average_metrics": 92.51271186440678, |
| "eval_f1": 91.52542372881356, |
| "eval_loss": 0.06364640593528748, |
| "eval_runtime": 4.5171, |
| "eval_samples_per_second": 221.382, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.99409857328145, |
| "eval_f1": 92.0881971465629, |
| "eval_loss": 0.06635148823261261, |
| "eval_runtime": 4.6206, |
| "eval_samples_per_second": 216.422, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.11141522029372, |
| "eval_f1": 92.12283044058745, |
| "eval_loss": 0.0606299452483654, |
| "eval_runtime": 4.4605, |
| "eval_samples_per_second": 224.19, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00011920804303730487, |
| "loss": 0.0703, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.82, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.32395833333332, |
| "eval_f1": 92.44791666666666, |
| "eval_loss": 0.060722097754478455, |
| "eval_runtime": 4.5249, |
| "eval_samples_per_second": 221.001, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.76666666666665, |
| "eval_f1": 91.73333333333332, |
| "eval_loss": 0.05862819775938988, |
| "eval_runtime": 4.5187, |
| "eval_samples_per_second": 221.304, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.00011479848311138547, |
| "loss": 0.0698, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.79947089947089, |
| "eval_f1": 91.7989417989418, |
| "eval_loss": 0.06128830835223198, |
| "eval_runtime": 4.521, |
| "eval_samples_per_second": 221.188, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.87402597402597, |
| "eval_f1": 91.94805194805194, |
| "eval_loss": 0.06374780088663101, |
| "eval_runtime": 4.4879, |
| "eval_samples_per_second": 222.822, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.88874833555259, |
| "eval_f1": 91.87749667110519, |
| "eval_loss": 0.06154455617070198, |
| "eval_runtime": 4.6011, |
| "eval_samples_per_second": 217.337, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.00011038892318546609, |
| "loss": 0.0709, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.57843791722297, |
| "eval_f1": 92.65687583444593, |
| "eval_loss": 0.060043178498744965, |
| "eval_runtime": 4.46, |
| "eval_samples_per_second": 224.215, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.93155467720685, |
| "eval_f1": 91.9631093544137, |
| "eval_loss": 0.061132512986660004, |
| "eval_runtime": 4.4987, |
| "eval_samples_per_second": 222.287, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.00010597936325954669, |
| "loss": 0.0695, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_accuracy": 93.5, |
| "eval_average_metrics": 92.56724581724582, |
| "eval_f1": 91.63449163449164, |
| "eval_loss": 0.06395059078931808, |
| "eval_runtime": 4.6548, |
| "eval_samples_per_second": 214.832, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.29422572178477, |
| "eval_f1": 92.38845144356955, |
| "eval_loss": 0.06141780689358711, |
| "eval_runtime": 4.4836, |
| "eval_samples_per_second": 223.034, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.65522875816993, |
| "eval_f1": 92.81045751633987, |
| "eval_loss": 0.058759015053510666, |
| "eval_runtime": 4.5162, |
| "eval_samples_per_second": 221.426, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0001015698033336273, |
| "loss": 0.0715, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_accuracy": 93.89999999999999, |
| "eval_average_metrics": 92.97346805736636, |
| "eval_f1": 92.04693611473273, |
| "eval_loss": 0.06228160858154297, |
| "eval_runtime": 4.4726, |
| "eval_samples_per_second": 223.582, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.73799472295514, |
| "eval_f1": 92.87598944591029, |
| "eval_loss": 0.05991463363170624, |
| "eval_runtime": 4.5003, |
| "eval_samples_per_second": 222.206, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 9.71602434077079e-05, |
| "loss": 0.0682, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.03, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.1038961038961, |
| "eval_f1": 92.20779220779221, |
| "eval_loss": 0.061682794243097305, |
| "eval_runtime": 4.611, |
| "eval_samples_per_second": 216.874, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_accuracy": 93.5, |
| "eval_average_metrics": 92.55645161290323, |
| "eval_f1": 91.61290322580645, |
| "eval_loss": 0.06373216211795807, |
| "eval_runtime": 4.6044, |
| "eval_samples_per_second": 217.186, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.34492656875835, |
| "eval_f1": 92.3898531375167, |
| "eval_loss": 0.05869932472705841, |
| "eval_runtime": 4.4706, |
| "eval_samples_per_second": 223.684, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 9.27506834817885e-05, |
| "loss": 0.0652, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.50606860158311, |
| "eval_f1": 92.61213720316623, |
| "eval_loss": 0.06166525185108185, |
| "eval_runtime": 4.5725, |
| "eval_samples_per_second": 218.699, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.33376623376623, |
| "eval_f1": 92.46753246753246, |
| "eval_loss": 0.06055561453104019, |
| "eval_runtime": 4.5492, |
| "eval_samples_per_second": 219.818, |
| "step": 23800 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 8.834112355586911e-05, |
| "loss": 0.0691, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_accuracy": 93.7, |
| "eval_average_metrics": 92.75377113133939, |
| "eval_f1": 91.8075422626788, |
| "eval_loss": 0.06339309364557266, |
| "eval_runtime": 4.6365, |
| "eval_samples_per_second": 215.678, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1633069828722, |
| "eval_f1": 92.2266139657444, |
| "eval_loss": 0.06319531798362732, |
| "eval_runtime": 4.5722, |
| "eval_samples_per_second": 218.712, |
| "step": 24200 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.14271523178809, |
| "eval_f1": 92.18543046357617, |
| "eval_loss": 0.060979247093200684, |
| "eval_runtime": 4.4363, |
| "eval_samples_per_second": 225.412, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 8.393156362994973e-05, |
| "loss": 0.0679, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.38513870541613, |
| "eval_f1": 92.47027741083225, |
| "eval_loss": 0.061841148883104324, |
| "eval_runtime": 4.4945, |
| "eval_samples_per_second": 222.493, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.06020021066069603, |
| "eval_runtime": 4.6482, |
| "eval_samples_per_second": 215.136, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 7.952200370403033e-05, |
| "loss": 0.0678, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_accuracy": 94.69999999999999, |
| "eval_average_metrics": 93.86773981603153, |
| "eval_f1": 93.03547963206307, |
| "eval_loss": 0.061626460403203964, |
| "eval_runtime": 4.4847, |
| "eval_samples_per_second": 222.982, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.35505992010653, |
| "eval_f1": 92.41011984021304, |
| "eval_loss": 0.05932234972715378, |
| "eval_runtime": 4.444, |
| "eval_samples_per_second": 225.02, |
| "step": 25200 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.35505992010653, |
| "eval_f1": 92.41011984021304, |
| "eval_loss": 0.05860959738492966, |
| "eval_runtime": 4.4729, |
| "eval_samples_per_second": 223.568, |
| "step": 25400 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 7.511244377811093e-05, |
| "loss": 0.0687, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.74736842105261, |
| "eval_f1": 92.89473684210525, |
| "eval_loss": 0.05995591729879379, |
| "eval_runtime": 4.6311, |
| "eval_samples_per_second": 215.933, |
| "step": 25600 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.06067919358611107, |
| "eval_runtime": 4.4705, |
| "eval_samples_per_second": 223.69, |
| "step": 25800 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 7.070288385219154e-05, |
| "loss": 0.0665, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.29, |
| "eval_accuracy": 94.6, |
| "eval_average_metrics": 93.74736842105261, |
| "eval_f1": 92.89473684210525, |
| "eval_loss": 0.06090604141354561, |
| "eval_runtime": 4.4777, |
| "eval_samples_per_second": 223.33, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.63633377135348, |
| "eval_f1": 92.77266754270696, |
| "eval_loss": 0.06175965070724487, |
| "eval_runtime": 4.5456, |
| "eval_samples_per_second": 219.993, |
| "step": 26200 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1937908496732, |
| "eval_f1": 92.2875816993464, |
| "eval_loss": 0.062108419835567474, |
| "eval_runtime": 4.5414, |
| "eval_samples_per_second": 220.196, |
| "step": 26400 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 6.629332392627216e-05, |
| "loss": 0.0681, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.35, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.48647214854111, |
| "eval_f1": 92.57294429708222, |
| "eval_loss": 0.060741446912288666, |
| "eval_runtime": 4.4624, |
| "eval_samples_per_second": 224.096, |
| "step": 26600 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.4962962962963, |
| "eval_f1": 92.5925925925926, |
| "eval_loss": 0.06029416620731354, |
| "eval_runtime": 4.5256, |
| "eval_samples_per_second": 220.966, |
| "step": 26800 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 6.188376400035276e-05, |
| "loss": 0.0667, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 94.69999999999999, |
| "eval_average_metrics": 93.84933949801848, |
| "eval_f1": 92.99867899603699, |
| "eval_loss": 0.059210509061813354, |
| "eval_runtime": 4.8741, |
| "eval_samples_per_second": 205.167, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.41474442988203, |
| "eval_f1": 92.52948885976409, |
| "eval_loss": 0.0605180561542511, |
| "eval_runtime": 4.5293, |
| "eval_samples_per_second": 220.783, |
| "step": 27200 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.64580602883355, |
| "eval_f1": 92.7916120576671, |
| "eval_loss": 0.060811493545770645, |
| "eval_runtime": 4.5424, |
| "eval_samples_per_second": 220.147, |
| "step": 27400 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 5.747420407443336e-05, |
| "loss": 0.0685, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.11141522029372, |
| "eval_f1": 92.12283044058745, |
| "eval_loss": 0.05978462100028992, |
| "eval_runtime": 4.4831, |
| "eval_samples_per_second": 223.06, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_accuracy": 93.8, |
| "eval_average_metrics": 92.87402597402597, |
| "eval_f1": 91.94805194805194, |
| "eval_loss": 0.06267183274030685, |
| "eval_runtime": 4.4576, |
| "eval_samples_per_second": 224.334, |
| "step": 27800 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 5.3064644148513973e-05, |
| "loss": 0.0672, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.06299212598425, |
| "eval_f1": 92.1259842519685, |
| "eval_loss": 0.061355073004961014, |
| "eval_runtime": 4.5194, |
| "eval_samples_per_second": 221.27, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.29422572178477, |
| "eval_f1": 92.38845144356955, |
| "eval_loss": 0.06131287291646004, |
| "eval_runtime": 4.5837, |
| "eval_samples_per_second": 218.165, |
| "step": 28200 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.04221635883906, |
| "eval_f1": 92.0844327176781, |
| "eval_loss": 0.06105473265051842, |
| "eval_runtime": 4.5101, |
| "eval_samples_per_second": 221.726, |
| "step": 28400 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 4.8655084222594584e-05, |
| "loss": 0.0656, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.52, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.25384615384615, |
| "eval_f1": 92.3076923076923, |
| "eval_loss": 0.06093791127204895, |
| "eval_runtime": 4.6588, |
| "eval_samples_per_second": 214.647, |
| "step": 28600 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.40492772667542, |
| "eval_f1": 92.50985545335085, |
| "eval_loss": 0.061501096934080124, |
| "eval_runtime": 4.5262, |
| "eval_samples_per_second": 220.936, |
| "step": 28800 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 4.424552429667519e-05, |
| "loss": 0.067, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.27414248021108, |
| "eval_f1": 92.34828496042218, |
| "eval_loss": 0.05971948057413101, |
| "eval_runtime": 4.5243, |
| "eval_samples_per_second": 221.027, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_accuracy": 93.60000000000001, |
| "eval_average_metrics": 92.65492227979274, |
| "eval_f1": 91.70984455958549, |
| "eval_loss": 0.063376285135746, |
| "eval_runtime": 4.6334, |
| "eval_samples_per_second": 215.825, |
| "step": 29200 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.26402116402116, |
| "eval_f1": 92.32804232804234, |
| "eval_loss": 0.06081530451774597, |
| "eval_runtime": 4.7045, |
| "eval_samples_per_second": 212.561, |
| "step": 29400 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 3.98359643707558e-05, |
| "loss": 0.0675, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.18368283093054, |
| "eval_f1": 92.26736566186108, |
| "eval_loss": 0.062262628227472305, |
| "eval_runtime": 4.6273, |
| "eval_samples_per_second": 216.108, |
| "step": 29600 |
| }, |
| { |
| "epoch": 2.63, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.3751655629139, |
| "eval_f1": 92.45033112582782, |
| "eval_loss": 0.06007382273674011, |
| "eval_runtime": 4.5698, |
| "eval_samples_per_second": 218.83, |
| "step": 29800 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.54264044448364e-05, |
| "loss": 0.0682, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_accuracy": 94.1, |
| "eval_average_metrics": 93.1633069828722, |
| "eval_f1": 92.2266139657444, |
| "eval_loss": 0.0607917495071888, |
| "eval_runtime": 4.6423, |
| "eval_samples_per_second": 215.411, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.5254593175853, |
| "eval_f1": 92.6509186351706, |
| "eval_loss": 0.06171978637576103, |
| "eval_runtime": 4.4956, |
| "eval_samples_per_second": 222.439, |
| "step": 30200 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.4566844919786, |
| "eval_f1": 92.51336898395722, |
| "eval_loss": 0.05954898148775101, |
| "eval_runtime": 4.5069, |
| "eval_samples_per_second": 221.881, |
| "step": 30400 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.1016844518917006e-05, |
| "loss": 0.0684, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.64580602883355, |
| "eval_f1": 92.7916120576671, |
| "eval_loss": 0.06073066592216492, |
| "eval_runtime": 4.5568, |
| "eval_samples_per_second": 219.452, |
| "step": 30600 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.64580602883355, |
| "eval_f1": 92.7916120576671, |
| "eval_loss": 0.06212097778916359, |
| "eval_runtime": 4.4991, |
| "eval_samples_per_second": 222.265, |
| "step": 30800 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2.6607284592997617e-05, |
| "loss": 0.0644, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.73, |
| "eval_accuracy": 94.5, |
| "eval_average_metrics": 93.64580602883355, |
| "eval_f1": 92.7916120576671, |
| "eval_loss": 0.061464857310056686, |
| "eval_runtime": 4.6313, |
| "eval_samples_per_second": 215.924, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.28421052631577, |
| "eval_f1": 92.36842105263158, |
| "eval_loss": 0.06165764480829239, |
| "eval_runtime": 4.4772, |
| "eval_samples_per_second": 223.356, |
| "step": 31200 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.27414248021108, |
| "eval_f1": 92.34828496042218, |
| "eval_loss": 0.061222758144140244, |
| "eval_runtime": 4.485, |
| "eval_samples_per_second": 222.965, |
| "step": 31400 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.219772466707822e-05, |
| "loss": 0.0656, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.40492772667542, |
| "eval_f1": 92.50985545335085, |
| "eval_loss": 0.06175553798675537, |
| "eval_runtime": 4.4473, |
| "eval_samples_per_second": 224.857, |
| "step": 31600 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_accuracy": 94.0, |
| "eval_average_metrics": 93.04221635883906, |
| "eval_f1": 92.0844327176781, |
| "eval_loss": 0.06141304597258568, |
| "eval_runtime": 4.5384, |
| "eval_samples_per_second": 220.341, |
| "step": 31800 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.778816474115883e-05, |
| "loss": 0.0682, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.82, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.39505928853755, |
| "eval_f1": 92.49011857707511, |
| "eval_loss": 0.06122256815433502, |
| "eval_runtime": 4.532, |
| "eval_samples_per_second": 220.652, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.84, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.5254593175853, |
| "eval_f1": 92.6509186351706, |
| "eval_loss": 0.06179660186171532, |
| "eval_runtime": 4.5432, |
| "eval_samples_per_second": 220.11, |
| "step": 32200 |
| }, |
| { |
| "epoch": 2.86, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.26402116402116, |
| "eval_f1": 92.32804232804234, |
| "eval_loss": 0.060935478657484055, |
| "eval_runtime": 4.5308, |
| "eval_samples_per_second": 220.712, |
| "step": 32400 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.3378604815239437e-05, |
| "loss": 0.0628, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.06167520210146904, |
| "eval_runtime": 4.5363, |
| "eval_samples_per_second": 220.442, |
| "step": 32600 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_accuracy": 94.39999999999999, |
| "eval_average_metrics": 93.51578947368421, |
| "eval_f1": 92.63157894736842, |
| "eval_loss": 0.061225228011608124, |
| "eval_runtime": 4.5208, |
| "eval_samples_per_second": 221.199, |
| "step": 32800 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 8.969044889320046e-06, |
| "loss": 0.0659, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.3751655629139, |
| "eval_f1": 92.45033112582782, |
| "eval_loss": 0.06039771810173988, |
| "eval_runtime": 4.5643, |
| "eval_samples_per_second": 219.093, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.93, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.40492772667542, |
| "eval_f1": 92.50985545335085, |
| "eval_loss": 0.06096240133047104, |
| "eval_runtime": 4.5827, |
| "eval_samples_per_second": 218.214, |
| "step": 33200 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_accuracy": 94.3, |
| "eval_average_metrics": 93.38513870541613, |
| "eval_f1": 92.47027741083225, |
| "eval_loss": 0.060673393309116364, |
| "eval_runtime": 4.9126, |
| "eval_samples_per_second": 203.559, |
| "step": 33400 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 4.559484963400652e-06, |
| "loss": 0.0692, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.26402116402116, |
| "eval_f1": 92.32804232804234, |
| "eval_loss": 0.06072871759533882, |
| "eval_runtime": 4.5081, |
| "eval_samples_per_second": 221.824, |
| "step": 33600 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.26402116402116, |
| "eval_f1": 92.32804232804234, |
| "eval_loss": 0.06088118627667427, |
| "eval_runtime": 4.513, |
| "eval_samples_per_second": 221.581, |
| "step": 33800 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.4992503748125936e-07, |
| "loss": 0.0654, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 94.19999999999999, |
| "eval_average_metrics": 93.26402116402116, |
| "eval_f1": 92.32804232804234, |
| "eval_loss": 0.060787323862314224, |
| "eval_runtime": 4.526, |
| "eval_samples_per_second": 220.947, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 34017, |
| "total_flos": 1.0629344517601075e+17, |
| "train_loss": 0.07169761398949699, |
| "train_runtime": 13428.6442, |
| "train_samples_per_second": 81.061, |
| "train_steps_per_second": 2.533 |
| } |
| ], |
| "max_steps": 34017, |
| "num_train_epochs": 3, |
| "total_flos": 1.0629344517601075e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|