{ "best_metric": 0.8990160346845362, "best_model_checkpoint": "./save_models/qqp/roberta-base_lr1e-05_run0/checkpoint-204670", "epoch": 10.0, "eval_steps": 500, "global_step": 204670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.0713296962788053e-07, "loss": 0.6914, "step": 500 }, { "epoch": 0.05, "learning_rate": 8.142659392557611e-07, "loss": 0.6479, "step": 1000 }, { "epoch": 0.07, "learning_rate": 1.2213989088836414e-06, "loss": 0.5224, "step": 1500 }, { "epoch": 0.1, "learning_rate": 1.6285318785115221e-06, "loss": 0.4532, "step": 2000 }, { "epoch": 0.12, "learning_rate": 2.0356648481394024e-06, "loss": 0.4179, "step": 2500 }, { "epoch": 0.15, "learning_rate": 2.442797817767283e-06, "loss": 0.4075, "step": 3000 }, { "epoch": 0.17, "learning_rate": 2.8499307873951637e-06, "loss": 0.3777, "step": 3500 }, { "epoch": 0.2, "learning_rate": 3.2570637570230442e-06, "loss": 0.3735, "step": 4000 }, { "epoch": 0.22, "learning_rate": 3.6641967266509243e-06, "loss": 0.3609, "step": 4500 }, { "epoch": 0.24, "learning_rate": 4.071329696278805e-06, "loss": 0.3624, "step": 5000 }, { "epoch": 0.27, "learning_rate": 4.478462665906685e-06, "loss": 0.3574, "step": 5500 }, { "epoch": 0.29, "learning_rate": 4.885595635534566e-06, "loss": 0.3509, "step": 6000 }, { "epoch": 0.32, "learning_rate": 5.292728605162446e-06, "loss": 0.3387, "step": 6500 }, { "epoch": 0.34, "learning_rate": 5.6998615747903275e-06, "loss": 0.335, "step": 7000 }, { "epoch": 0.37, "learning_rate": 6.106994544418208e-06, "loss": 0.3242, "step": 7500 }, { "epoch": 0.39, "learning_rate": 6.5141275140460884e-06, "loss": 0.3317, "step": 8000 }, { "epoch": 0.42, "learning_rate": 6.921260483673968e-06, "loss": 0.3182, "step": 8500 }, { "epoch": 0.44, "learning_rate": 7.3283934533018485e-06, "loss": 0.3197, "step": 9000 }, { "epoch": 0.46, "learning_rate": 7.73552642292973e-06, "loss": 0.3213, "step": 9500 }, { "epoch": 0.49, "learning_rate": 8.14265939255761e-06, "loss": 0.3204, "step": 10000 }, { "epoch": 0.51, "learning_rate": 8.54979236218549e-06, "loss": 0.3271, "step": 10500 }, { "epoch": 0.54, "learning_rate": 8.95692533181337e-06, "loss": 0.3107, "step": 11000 }, { "epoch": 0.56, "learning_rate": 9.364058301441251e-06, "loss": 0.31, "step": 11500 }, { "epoch": 0.59, "learning_rate": 9.771191271069131e-06, "loss": 0.3089, "step": 12000 }, { "epoch": 0.61, "learning_rate": 9.988616812811544e-06, "loss": 0.2948, "step": 12500 }, { "epoch": 0.64, "learning_rate": 9.962627800965753e-06, "loss": 0.3019, "step": 13000 }, { "epoch": 0.66, "learning_rate": 9.936638789119961e-06, "loss": 0.3093, "step": 13500 }, { "epoch": 0.68, "learning_rate": 9.91064977727417e-06, "loss": 0.3032, "step": 14000 }, { "epoch": 0.71, "learning_rate": 9.884660765428378e-06, "loss": 0.3014, "step": 14500 }, { "epoch": 0.73, "learning_rate": 9.858671753582586e-06, "loss": 0.3039, "step": 15000 }, { "epoch": 0.76, "learning_rate": 9.832682741736795e-06, "loss": 0.2994, "step": 15500 }, { "epoch": 0.78, "learning_rate": 9.806693729891003e-06, "loss": 0.2954, "step": 16000 }, { "epoch": 0.81, "learning_rate": 9.78070471804521e-06, "loss": 0.2907, "step": 16500 }, { "epoch": 0.83, "learning_rate": 9.75471570619942e-06, "loss": 0.2887, "step": 17000 }, { "epoch": 0.86, "learning_rate": 9.728726694353629e-06, "loss": 0.299, "step": 17500 }, { "epoch": 0.88, "learning_rate": 9.702737682507836e-06, "loss": 0.2789, "step": 18000 }, { "epoch": 0.9, "learning_rate": 9.676748670662046e-06, "loss": 0.2883, "step": 18500 }, { "epoch": 0.93, "learning_rate": 9.650759658816253e-06, "loss": 0.2873, "step": 19000 }, { "epoch": 0.95, "learning_rate": 9.624770646970461e-06, "loss": 0.2948, "step": 19500 }, { "epoch": 0.98, "learning_rate": 9.59878163512467e-06, "loss": 0.2746, "step": 20000 }, { "epoch": 1.0, "eval_accuracy": 0.8817094956712931, "eval_averaged_scores": 0.8643799788235642, "eval_f1": 0.8470504619758352, "eval_loss": 0.2860792577266693, "eval_runtime": 36.8913, "eval_samples_per_second": 986.277, "eval_steps_per_second": 61.668, "step": 20467 }, { "epoch": 1.0, "learning_rate": 9.572792623278878e-06, "loss": 0.2906, "step": 20500 }, { "epoch": 1.03, "learning_rate": 9.546803611433086e-06, "loss": 0.2525, "step": 21000 }, { "epoch": 1.05, "learning_rate": 9.520814599587295e-06, "loss": 0.2551, "step": 21500 }, { "epoch": 1.07, "learning_rate": 9.494825587741504e-06, "loss": 0.259, "step": 22000 }, { "epoch": 1.1, "learning_rate": 9.468836575895712e-06, "loss": 0.2459, "step": 22500 }, { "epoch": 1.12, "learning_rate": 9.442847564049921e-06, "loss": 0.256, "step": 23000 }, { "epoch": 1.15, "learning_rate": 9.416858552204129e-06, "loss": 0.2573, "step": 23500 }, { "epoch": 1.17, "learning_rate": 9.390869540358337e-06, "loss": 0.2465, "step": 24000 }, { "epoch": 1.2, "learning_rate": 9.364880528512546e-06, "loss": 0.2519, "step": 24500 }, { "epoch": 1.22, "learning_rate": 9.338891516666754e-06, "loss": 0.2457, "step": 25000 }, { "epoch": 1.25, "learning_rate": 9.312902504820961e-06, "loss": 0.2502, "step": 25500 }, { "epoch": 1.27, "learning_rate": 9.28691349297517e-06, "loss": 0.253, "step": 26000 }, { "epoch": 1.29, "learning_rate": 9.26092448112938e-06, "loss": 0.2453, "step": 26500 }, { "epoch": 1.32, "learning_rate": 9.234935469283588e-06, "loss": 0.2576, "step": 27000 }, { "epoch": 1.34, "learning_rate": 9.208946457437797e-06, "loss": 0.2505, "step": 27500 }, { "epoch": 1.37, "learning_rate": 9.182957445592005e-06, "loss": 0.252, "step": 28000 }, { "epoch": 1.39, "learning_rate": 9.156968433746212e-06, "loss": 0.2536, "step": 28500 }, { "epoch": 1.42, "learning_rate": 9.130979421900422e-06, "loss": 0.2369, "step": 29000 }, { "epoch": 1.44, "learning_rate": 9.10499041005463e-06, "loss": 0.2459, "step": 29500 }, { "epoch": 1.47, "learning_rate": 9.079001398208837e-06, "loss": 0.2446, "step": 30000 }, { "epoch": 1.49, "learning_rate": 9.053012386363046e-06, "loss": 0.2434, "step": 30500 }, { "epoch": 1.51, "learning_rate": 9.027023374517256e-06, "loss": 0.2493, "step": 31000 }, { "epoch": 1.54, "learning_rate": 9.001034362671463e-06, "loss": 0.2506, "step": 31500 }, { "epoch": 1.56, "learning_rate": 8.975045350825672e-06, "loss": 0.2421, "step": 32000 }, { "epoch": 1.59, "learning_rate": 8.94905633897988e-06, "loss": 0.2467, "step": 32500 }, { "epoch": 1.61, "learning_rate": 8.923067327134088e-06, "loss": 0.2439, "step": 33000 }, { "epoch": 1.64, "learning_rate": 8.897078315288297e-06, "loss": 0.2423, "step": 33500 }, { "epoch": 1.66, "learning_rate": 8.871089303442505e-06, "loss": 0.25, "step": 34000 }, { "epoch": 1.69, "learning_rate": 8.845100291596712e-06, "loss": 0.2367, "step": 34500 }, { "epoch": 1.71, "learning_rate": 8.819111279750922e-06, "loss": 0.2387, "step": 35000 }, { "epoch": 1.73, "learning_rate": 8.793122267905131e-06, "loss": 0.2455, "step": 35500 }, { "epoch": 1.76, "learning_rate": 8.767133256059339e-06, "loss": 0.2439, "step": 36000 }, { "epoch": 1.78, "learning_rate": 8.741144244213548e-06, "loss": 0.2332, "step": 36500 }, { "epoch": 1.81, "learning_rate": 8.715155232367756e-06, "loss": 0.2446, "step": 37000 }, { "epoch": 1.83, "learning_rate": 8.689166220521963e-06, "loss": 0.2389, "step": 37500 }, { "epoch": 1.86, "learning_rate": 8.663177208676173e-06, "loss": 0.2346, "step": 38000 }, { "epoch": 1.88, "learning_rate": 8.63718819683038e-06, "loss": 0.2305, "step": 38500 }, { "epoch": 1.91, "learning_rate": 8.61119918498459e-06, "loss": 0.2331, "step": 39000 }, { "epoch": 1.93, "learning_rate": 8.585210173138797e-06, "loss": 0.2393, "step": 39500 }, { "epoch": 1.95, "learning_rate": 8.559221161293007e-06, "loss": 0.2371, "step": 40000 }, { "epoch": 1.98, "learning_rate": 8.533232149447214e-06, "loss": 0.2291, "step": 40500 }, { "epoch": 2.0, "eval_accuracy": 0.8982272914662636, "eval_averaged_scores": 0.8808907930119556, "eval_f1": 0.8635542945576477, "eval_loss": 0.27606385946273804, "eval_runtime": 36.4573, "eval_samples_per_second": 998.018, "eval_steps_per_second": 62.402, "step": 40934 }, { "epoch": 2.0, "learning_rate": 8.507243137601424e-06, "loss": 0.2293, "step": 41000 }, { "epoch": 2.03, "learning_rate": 8.481254125755631e-06, "loss": 0.1943, "step": 41500 }, { "epoch": 2.05, "learning_rate": 8.455265113909839e-06, "loss": 0.2058, "step": 42000 }, { "epoch": 2.08, "learning_rate": 8.429276102064048e-06, "loss": 0.2089, "step": 42500 }, { "epoch": 2.1, "learning_rate": 8.403287090218256e-06, "loss": 0.2043, "step": 43000 }, { "epoch": 2.13, "learning_rate": 8.377298078372465e-06, "loss": 0.2005, "step": 43500 }, { "epoch": 2.15, "learning_rate": 8.351309066526673e-06, "loss": 0.2061, "step": 44000 }, { "epoch": 2.17, "learning_rate": 8.325320054680882e-06, "loss": 0.2058, "step": 44500 }, { "epoch": 2.2, "learning_rate": 8.29933104283509e-06, "loss": 0.2114, "step": 45000 }, { "epoch": 2.22, "learning_rate": 8.2733420309893e-06, "loss": 0.2008, "step": 45500 }, { "epoch": 2.25, "learning_rate": 8.247353019143507e-06, "loss": 0.2198, "step": 46000 }, { "epoch": 2.27, "learning_rate": 8.221364007297714e-06, "loss": 0.2153, "step": 46500 }, { "epoch": 2.3, "learning_rate": 8.195374995451924e-06, "loss": 0.2116, "step": 47000 }, { "epoch": 2.32, "learning_rate": 8.169385983606131e-06, "loss": 0.1998, "step": 47500 }, { "epoch": 2.35, "learning_rate": 8.14339697176034e-06, "loss": 0.2122, "step": 48000 }, { "epoch": 2.37, "learning_rate": 8.117407959914548e-06, "loss": 0.2036, "step": 48500 }, { "epoch": 2.39, "learning_rate": 8.091418948068758e-06, "loss": 0.2065, "step": 49000 }, { "epoch": 2.42, "learning_rate": 8.065429936222965e-06, "loss": 0.2038, "step": 49500 }, { "epoch": 2.44, "learning_rate": 8.039440924377175e-06, "loss": 0.207, "step": 50000 }, { "epoch": 2.47, "learning_rate": 8.013451912531382e-06, "loss": 0.2046, "step": 50500 }, { "epoch": 2.49, "learning_rate": 7.98746290068559e-06, "loss": 0.2116, "step": 51000 }, { "epoch": 2.52, "learning_rate": 7.9614738888398e-06, "loss": 0.2072, "step": 51500 }, { "epoch": 2.54, "learning_rate": 7.935484876994007e-06, "loss": 0.2117, "step": 52000 }, { "epoch": 2.57, "learning_rate": 7.909495865148216e-06, "loss": 0.2197, "step": 52500 }, { "epoch": 2.59, "learning_rate": 7.883506853302424e-06, "loss": 0.2037, "step": 53000 }, { "epoch": 2.61, "learning_rate": 7.857517841456633e-06, "loss": 0.207, "step": 53500 }, { "epoch": 2.64, "learning_rate": 7.831528829610841e-06, "loss": 0.219, "step": 54000 }, { "epoch": 2.66, "learning_rate": 7.80553981776505e-06, "loss": 0.2082, "step": 54500 }, { "epoch": 2.69, "learning_rate": 7.779550805919258e-06, "loss": 0.201, "step": 55000 }, { "epoch": 2.71, "learning_rate": 7.753561794073467e-06, "loss": 0.2127, "step": 55500 }, { "epoch": 2.74, "learning_rate": 7.727572782227675e-06, "loss": 0.2165, "step": 56000 }, { "epoch": 2.76, "learning_rate": 7.701583770381883e-06, "loss": 0.203, "step": 56500 }, { "epoch": 2.78, "learning_rate": 7.675594758536092e-06, "loss": 0.2147, "step": 57000 }, { "epoch": 2.81, "learning_rate": 7.6496057466903e-06, "loss": 0.2006, "step": 57500 }, { "epoch": 2.83, "learning_rate": 7.623616734844508e-06, "loss": 0.2063, "step": 58000 }, { "epoch": 2.86, "learning_rate": 7.597627722998717e-06, "loss": 0.2046, "step": 58500 }, { "epoch": 2.88, "learning_rate": 7.571638711152926e-06, "loss": 0.1942, "step": 59000 }, { "epoch": 2.91, "learning_rate": 7.5456496993071335e-06, "loss": 0.1976, "step": 59500 }, { "epoch": 2.93, "learning_rate": 7.519660687461342e-06, "loss": 0.2047, "step": 60000 }, { "epoch": 2.96, "learning_rate": 7.4936716756155505e-06, "loss": 0.2047, "step": 60500 }, { "epoch": 2.98, "learning_rate": 7.467682663769759e-06, "loss": 0.2035, "step": 61000 }, { "epoch": 3.0, "eval_accuracy": 0.9074618661536348, "eval_averaged_scores": 0.8925016186560136, "eval_f1": 0.8775413711583925, "eval_loss": 0.2975204288959503, "eval_runtime": 36.3008, "eval_samples_per_second": 1002.319, "eval_steps_per_second": 62.671, "step": 61401 }, { "epoch": 3.0, "learning_rate": 7.441693651923967e-06, "loss": 0.1955, "step": 61500 }, { "epoch": 3.03, "learning_rate": 7.415704640078175e-06, "loss": 0.1745, "step": 62000 }, { "epoch": 3.05, "learning_rate": 7.389715628232384e-06, "loss": 0.1855, "step": 62500 }, { "epoch": 3.08, "learning_rate": 7.363726616386593e-06, "loss": 0.1743, "step": 63000 }, { "epoch": 3.1, "learning_rate": 7.337737604540801e-06, "loss": 0.1829, "step": 63500 }, { "epoch": 3.13, "learning_rate": 7.311748592695009e-06, "loss": 0.1898, "step": 64000 }, { "epoch": 3.15, "learning_rate": 7.2857595808492175e-06, "loss": 0.1878, "step": 64500 }, { "epoch": 3.18, "learning_rate": 7.259770569003426e-06, "loss": 0.1886, "step": 65000 }, { "epoch": 3.2, "learning_rate": 7.2337815571576345e-06, "loss": 0.1941, "step": 65500 }, { "epoch": 3.22, "learning_rate": 7.207792545311842e-06, "loss": 0.1901, "step": 66000 }, { "epoch": 3.25, "learning_rate": 7.181803533466051e-06, "loss": 0.1813, "step": 66500 }, { "epoch": 3.27, "learning_rate": 7.155814521620259e-06, "loss": 0.1947, "step": 67000 }, { "epoch": 3.3, "learning_rate": 7.1298255097744685e-06, "loss": 0.1787, "step": 67500 }, { "epoch": 3.32, "learning_rate": 7.103836497928677e-06, "loss": 0.1927, "step": 68000 }, { "epoch": 3.35, "learning_rate": 7.077847486082885e-06, "loss": 0.1961, "step": 68500 }, { "epoch": 3.37, "learning_rate": 7.051858474237093e-06, "loss": 0.1785, "step": 69000 }, { "epoch": 3.4, "learning_rate": 7.025869462391302e-06, "loss": 0.1892, "step": 69500 }, { "epoch": 3.42, "learning_rate": 6.99988045054551e-06, "loss": 0.1853, "step": 70000 }, { "epoch": 3.44, "learning_rate": 6.973891438699718e-06, "loss": 0.1892, "step": 70500 }, { "epoch": 3.47, "learning_rate": 6.947902426853926e-06, "loss": 0.1908, "step": 71000 }, { "epoch": 3.49, "learning_rate": 6.921913415008135e-06, "loss": 0.1859, "step": 71500 }, { "epoch": 3.52, "learning_rate": 6.895924403162344e-06, "loss": 0.1855, "step": 72000 }, { "epoch": 3.54, "learning_rate": 6.8699353913165525e-06, "loss": 0.1763, "step": 72500 }, { "epoch": 3.57, "learning_rate": 6.84394637947076e-06, "loss": 0.1886, "step": 73000 }, { "epoch": 3.59, "learning_rate": 6.817957367624969e-06, "loss": 0.1935, "step": 73500 }, { "epoch": 3.62, "learning_rate": 6.791968355779177e-06, "loss": 0.1889, "step": 74000 }, { "epoch": 3.64, "learning_rate": 6.765979343933386e-06, "loss": 0.1837, "step": 74500 }, { "epoch": 3.66, "learning_rate": 6.739990332087593e-06, "loss": 0.1782, "step": 75000 }, { "epoch": 3.69, "learning_rate": 6.714001320241802e-06, "loss": 0.1738, "step": 75500 }, { "epoch": 3.71, "learning_rate": 6.68801230839601e-06, "loss": 0.1833, "step": 76000 }, { "epoch": 3.74, "learning_rate": 6.66202329655022e-06, "loss": 0.1826, "step": 76500 }, { "epoch": 3.76, "learning_rate": 6.636034284704428e-06, "loss": 0.1856, "step": 77000 }, { "epoch": 3.79, "learning_rate": 6.610045272858637e-06, "loss": 0.1843, "step": 77500 }, { "epoch": 3.81, "learning_rate": 6.584056261012844e-06, "loss": 0.1948, "step": 78000 }, { "epoch": 3.84, "learning_rate": 6.558067249167053e-06, "loss": 0.1764, "step": 78500 }, { "epoch": 3.86, "learning_rate": 6.532078237321261e-06, "loss": 0.179, "step": 79000 }, { "epoch": 3.88, "learning_rate": 6.50608922547547e-06, "loss": 0.1906, "step": 79500 }, { "epoch": 3.91, "learning_rate": 6.480100213629677e-06, "loss": 0.1853, "step": 80000 }, { "epoch": 3.93, "learning_rate": 6.454111201783886e-06, "loss": 0.1918, "step": 80500 }, { "epoch": 3.96, "learning_rate": 6.428122189938095e-06, "loss": 0.1769, "step": 81000 }, { "epoch": 3.98, "learning_rate": 6.402133178092304e-06, "loss": 0.1969, "step": 81500 }, { "epoch": 4.0, "eval_accuracy": 0.9094407035866429, "eval_averaged_scores": 0.8949438560203089, "eval_f1": 0.8804470084539749, "eval_loss": 0.3116997480392456, "eval_runtime": 36.2351, "eval_samples_per_second": 1004.137, "eval_steps_per_second": 62.784, "step": 81868 }, { "epoch": 4.01, "learning_rate": 6.376144166246512e-06, "loss": 0.1737, "step": 82000 }, { "epoch": 4.03, "learning_rate": 6.35015515440072e-06, "loss": 0.1467, "step": 82500 }, { "epoch": 4.06, "learning_rate": 6.324166142554928e-06, "loss": 0.1557, "step": 83000 }, { "epoch": 4.08, "learning_rate": 6.298177130709137e-06, "loss": 0.1533, "step": 83500 }, { "epoch": 4.1, "learning_rate": 6.272188118863345e-06, "loss": 0.1573, "step": 84000 }, { "epoch": 4.13, "learning_rate": 6.246199107017553e-06, "loss": 0.1602, "step": 84500 }, { "epoch": 4.15, "learning_rate": 6.220210095171761e-06, "loss": 0.1598, "step": 85000 }, { "epoch": 4.18, "learning_rate": 6.194221083325971e-06, "loss": 0.162, "step": 85500 }, { "epoch": 4.2, "learning_rate": 6.168232071480179e-06, "loss": 0.1668, "step": 86000 }, { "epoch": 4.23, "learning_rate": 6.142243059634388e-06, "loss": 0.1673, "step": 86500 }, { "epoch": 4.25, "learning_rate": 6.116254047788595e-06, "loss": 0.159, "step": 87000 }, { "epoch": 4.28, "learning_rate": 6.090265035942804e-06, "loss": 0.1615, "step": 87500 }, { "epoch": 4.3, "learning_rate": 6.064276024097012e-06, "loss": 0.1742, "step": 88000 }, { "epoch": 4.32, "learning_rate": 6.038287012251221e-06, "loss": 0.1663, "step": 88500 }, { "epoch": 4.35, "learning_rate": 6.0122980004054285e-06, "loss": 0.1699, "step": 89000 }, { "epoch": 4.37, "learning_rate": 5.986308988559637e-06, "loss": 0.1625, "step": 89500 }, { "epoch": 4.4, "learning_rate": 5.9603199767138455e-06, "loss": 0.158, "step": 90000 }, { "epoch": 4.42, "learning_rate": 5.934330964868055e-06, "loss": 0.1636, "step": 90500 }, { "epoch": 4.45, "learning_rate": 5.908341953022263e-06, "loss": 0.1568, "step": 91000 }, { "epoch": 4.47, "learning_rate": 5.882352941176471e-06, "loss": 0.165, "step": 91500 }, { "epoch": 4.5, "learning_rate": 5.856363929330679e-06, "loss": 0.1687, "step": 92000 }, { "epoch": 4.52, "learning_rate": 5.830374917484888e-06, "loss": 0.1595, "step": 92500 }, { "epoch": 4.54, "learning_rate": 5.804385905639096e-06, "loss": 0.1769, "step": 93000 }, { "epoch": 4.57, "learning_rate": 5.778396893793304e-06, "loss": 0.1567, "step": 93500 }, { "epoch": 4.59, "learning_rate": 5.7524078819475125e-06, "loss": 0.1724, "step": 94000 }, { "epoch": 4.62, "learning_rate": 5.726418870101721e-06, "loss": 0.1745, "step": 94500 }, { "epoch": 4.64, "learning_rate": 5.70042985825593e-06, "loss": 0.167, "step": 95000 }, { "epoch": 4.67, "learning_rate": 5.674440846410139e-06, "loss": 0.1632, "step": 95500 }, { "epoch": 4.69, "learning_rate": 5.6484518345643465e-06, "loss": 0.1737, "step": 96000 }, { "epoch": 4.71, "learning_rate": 5.622462822718555e-06, "loss": 0.1625, "step": 96500 }, { "epoch": 4.74, "learning_rate": 5.5964738108727635e-06, "loss": 0.1582, "step": 97000 }, { "epoch": 4.76, "learning_rate": 5.570484799026972e-06, "loss": 0.1691, "step": 97500 }, { "epoch": 4.79, "learning_rate": 5.54449578718118e-06, "loss": 0.1652, "step": 98000 }, { "epoch": 4.81, "learning_rate": 5.518506775335388e-06, "loss": 0.1813, "step": 98500 }, { "epoch": 4.84, "learning_rate": 5.4925177634895966e-06, "loss": 0.1738, "step": 99000 }, { "epoch": 4.86, "learning_rate": 5.466528751643806e-06, "loss": 0.1717, "step": 99500 }, { "epoch": 4.89, "learning_rate": 5.440539739798014e-06, "loss": 0.1623, "step": 100000 }, { "epoch": 4.91, "learning_rate": 5.414550727952223e-06, "loss": 0.1771, "step": 100500 }, { "epoch": 4.93, "learning_rate": 5.3885617161064305e-06, "loss": 0.157, "step": 101000 }, { "epoch": 4.96, "learning_rate": 5.362572704260639e-06, "loss": 0.1654, "step": 101500 }, { "epoch": 4.98, "learning_rate": 5.3365836924148475e-06, "loss": 0.165, "step": 102000 }, { "epoch": 5.0, "eval_accuracy": 0.9078741239521781, "eval_averaged_scores": 0.893031472907116, "eval_f1": 0.8781888218620539, "eval_loss": 0.3768274188041687, "eval_runtime": 36.2704, "eval_samples_per_second": 1003.16, "eval_steps_per_second": 62.723, "step": 102335 }, { "epoch": 5.01, "learning_rate": 5.310594680569056e-06, "loss": 0.1649, "step": 102500 }, { "epoch": 5.03, "learning_rate": 5.284605668723264e-06, "loss": 0.1318, "step": 103000 }, { "epoch": 5.06, "learning_rate": 5.258616656877472e-06, "loss": 0.1436, "step": 103500 }, { "epoch": 5.08, "learning_rate": 5.2326276450316815e-06, "loss": 0.1509, "step": 104000 }, { "epoch": 5.11, "learning_rate": 5.20663863318589e-06, "loss": 0.1363, "step": 104500 }, { "epoch": 5.13, "learning_rate": 5.1806496213400985e-06, "loss": 0.1321, "step": 105000 }, { "epoch": 5.15, "learning_rate": 5.154660609494306e-06, "loss": 0.1412, "step": 105500 }, { "epoch": 5.18, "learning_rate": 5.128671597648515e-06, "loss": 0.1347, "step": 106000 }, { "epoch": 5.2, "learning_rate": 5.102682585802723e-06, "loss": 0.1455, "step": 106500 }, { "epoch": 5.23, "learning_rate": 5.0766935739569316e-06, "loss": 0.1356, "step": 107000 }, { "epoch": 5.25, "learning_rate": 5.050704562111139e-06, "loss": 0.1446, "step": 107500 }, { "epoch": 5.28, "learning_rate": 5.024715550265348e-06, "loss": 0.1403, "step": 108000 }, { "epoch": 5.3, "learning_rate": 4.998726538419556e-06, "loss": 0.1339, "step": 108500 }, { "epoch": 5.33, "learning_rate": 4.972737526573765e-06, "loss": 0.1342, "step": 109000 }, { "epoch": 5.35, "learning_rate": 4.946748514727974e-06, "loss": 0.1281, "step": 109500 }, { "epoch": 5.37, "learning_rate": 4.920759502882182e-06, "loss": 0.1373, "step": 110000 }, { "epoch": 5.4, "learning_rate": 4.89477049103639e-06, "loss": 0.1349, "step": 110500 }, { "epoch": 5.42, "learning_rate": 4.868781479190599e-06, "loss": 0.1572, "step": 111000 }, { "epoch": 5.45, "learning_rate": 4.842792467344807e-06, "loss": 0.1316, "step": 111500 }, { "epoch": 5.47, "learning_rate": 4.816803455499016e-06, "loss": 0.1287, "step": 112000 }, { "epoch": 5.5, "learning_rate": 4.790814443653224e-06, "loss": 0.1416, "step": 112500 }, { "epoch": 5.52, "learning_rate": 4.764825431807433e-06, "loss": 0.1419, "step": 113000 }, { "epoch": 5.55, "learning_rate": 4.73883641996164e-06, "loss": 0.1397, "step": 113500 }, { "epoch": 5.57, "learning_rate": 4.712847408115849e-06, "loss": 0.1466, "step": 114000 }, { "epoch": 5.59, "learning_rate": 4.686858396270057e-06, "loss": 0.143, "step": 114500 }, { "epoch": 5.62, "learning_rate": 4.660869384424266e-06, "loss": 0.1428, "step": 115000 }, { "epoch": 5.64, "learning_rate": 4.634880372578474e-06, "loss": 0.1413, "step": 115500 }, { "epoch": 5.67, "learning_rate": 4.608891360732683e-06, "loss": 0.1479, "step": 116000 }, { "epoch": 5.69, "learning_rate": 4.582902348886891e-06, "loss": 0.1454, "step": 116500 }, { "epoch": 5.72, "learning_rate": 4.5569133370411e-06, "loss": 0.1332, "step": 117000 }, { "epoch": 5.74, "learning_rate": 4.530924325195308e-06, "loss": 0.1473, "step": 117500 }, { "epoch": 5.77, "learning_rate": 4.504935313349516e-06, "loss": 0.1353, "step": 118000 }, { "epoch": 5.79, "learning_rate": 4.478946301503724e-06, "loss": 0.1557, "step": 118500 }, { "epoch": 5.81, "learning_rate": 4.452957289657933e-06, "loss": 0.1452, "step": 119000 }, { "epoch": 5.84, "learning_rate": 4.426968277812141e-06, "loss": 0.1469, "step": 119500 }, { "epoch": 5.86, "learning_rate": 4.40097926596635e-06, "loss": 0.1394, "step": 120000 }, { "epoch": 5.89, "learning_rate": 4.374990254120558e-06, "loss": 0.1525, "step": 120500 }, { "epoch": 5.91, "learning_rate": 4.349001242274767e-06, "loss": 0.1448, "step": 121000 }, { "epoch": 5.94, "learning_rate": 4.323012230428975e-06, "loss": 0.146, "step": 121500 }, { "epoch": 5.96, "learning_rate": 4.297023218583184e-06, "loss": 0.1365, "step": 122000 }, { "epoch": 5.99, "learning_rate": 4.271034206737391e-06, "loss": 0.1467, "step": 122500 }, { "epoch": 6.0, "eval_accuracy": 0.9117768311117218, "eval_averaged_scores": 0.8970713531108301, "eval_f1": 0.8823658751099384, "eval_loss": 0.44876691699028015, "eval_runtime": 36.3498, "eval_samples_per_second": 1000.969, "eval_steps_per_second": 62.586, "step": 122802 }, { "epoch": 6.01, "learning_rate": 4.2450451948916e-06, "loss": 0.1278, "step": 123000 }, { "epoch": 6.03, "learning_rate": 4.219056183045809e-06, "loss": 0.1097, "step": 123500 }, { "epoch": 6.06, "learning_rate": 4.193067171200017e-06, "loss": 0.118, "step": 124000 }, { "epoch": 6.08, "learning_rate": 4.167078159354225e-06, "loss": 0.1108, "step": 124500 }, { "epoch": 6.11, "learning_rate": 4.141089147508434e-06, "loss": 0.1124, "step": 125000 }, { "epoch": 6.13, "learning_rate": 4.115100135662642e-06, "loss": 0.1161, "step": 125500 }, { "epoch": 6.16, "learning_rate": 4.089111123816851e-06, "loss": 0.1159, "step": 126000 }, { "epoch": 6.18, "learning_rate": 4.063122111971059e-06, "loss": 0.1152, "step": 126500 }, { "epoch": 6.21, "learning_rate": 4.037133100125267e-06, "loss": 0.1125, "step": 127000 }, { "epoch": 6.23, "learning_rate": 4.011144088279475e-06, "loss": 0.1126, "step": 127500 }, { "epoch": 6.25, "learning_rate": 3.985155076433685e-06, "loss": 0.1233, "step": 128000 }, { "epoch": 6.28, "learning_rate": 3.959166064587892e-06, "loss": 0.1183, "step": 128500 }, { "epoch": 6.3, "learning_rate": 3.933177052742101e-06, "loss": 0.1207, "step": 129000 }, { "epoch": 6.33, "learning_rate": 3.907188040896309e-06, "loss": 0.1114, "step": 129500 }, { "epoch": 6.35, "learning_rate": 3.881199029050518e-06, "loss": 0.1164, "step": 130000 }, { "epoch": 6.38, "learning_rate": 3.855210017204726e-06, "loss": 0.1179, "step": 130500 }, { "epoch": 6.4, "learning_rate": 3.829221005358935e-06, "loss": 0.1115, "step": 131000 }, { "epoch": 6.42, "learning_rate": 3.803231993513143e-06, "loss": 0.1124, "step": 131500 }, { "epoch": 6.45, "learning_rate": 3.777242981667351e-06, "loss": 0.1228, "step": 132000 }, { "epoch": 6.47, "learning_rate": 3.75125396982156e-06, "loss": 0.1314, "step": 132500 }, { "epoch": 6.5, "learning_rate": 3.7252649579757684e-06, "loss": 0.1076, "step": 133000 }, { "epoch": 6.52, "learning_rate": 3.6992759461299764e-06, "loss": 0.1189, "step": 133500 }, { "epoch": 6.55, "learning_rate": 3.673286934284185e-06, "loss": 0.1234, "step": 134000 }, { "epoch": 6.57, "learning_rate": 3.647297922438393e-06, "loss": 0.1199, "step": 134500 }, { "epoch": 6.6, "learning_rate": 3.621308910592602e-06, "loss": 0.1295, "step": 135000 }, { "epoch": 6.62, "learning_rate": 3.5953198987468104e-06, "loss": 0.1282, "step": 135500 }, { "epoch": 6.64, "learning_rate": 3.5693308869010185e-06, "loss": 0.124, "step": 136000 }, { "epoch": 6.67, "learning_rate": 3.543341875055227e-06, "loss": 0.1292, "step": 136500 }, { "epoch": 6.69, "learning_rate": 3.5173528632094355e-06, "loss": 0.112, "step": 137000 }, { "epoch": 6.72, "learning_rate": 3.491363851363644e-06, "loss": 0.1225, "step": 137500 }, { "epoch": 6.74, "learning_rate": 3.465374839517852e-06, "loss": 0.1262, "step": 138000 }, { "epoch": 6.77, "learning_rate": 3.4393858276720605e-06, "loss": 0.1174, "step": 138500 }, { "epoch": 6.79, "learning_rate": 3.4133968158262686e-06, "loss": 0.1219, "step": 139000 }, { "epoch": 6.82, "learning_rate": 3.3874078039804775e-06, "loss": 0.1172, "step": 139500 }, { "epoch": 6.84, "learning_rate": 3.361418792134686e-06, "loss": 0.126, "step": 140000 }, { "epoch": 6.86, "learning_rate": 3.335429780288894e-06, "loss": 0.1206, "step": 140500 }, { "epoch": 6.89, "learning_rate": 3.3094407684431025e-06, "loss": 0.1216, "step": 141000 }, { "epoch": 6.91, "learning_rate": 3.283451756597311e-06, "loss": 0.1215, "step": 141500 }, { "epoch": 6.94, "learning_rate": 3.2574627447515195e-06, "loss": 0.12, "step": 142000 }, { "epoch": 6.96, "learning_rate": 3.2314737329057276e-06, "loss": 0.1198, "step": 142500 }, { "epoch": 6.99, "learning_rate": 3.205484721059936e-06, "loss": 0.1093, "step": 143000 }, { "epoch": 7.0, "eval_accuracy": 0.9125738628555724, "eval_averaged_scores": 0.8981269870097512, "eval_f1": 0.8836801111639301, "eval_loss": 0.48590707778930664, "eval_runtime": 36.5067, "eval_samples_per_second": 996.667, "eval_steps_per_second": 62.317, "step": 143269 }, { "epoch": 7.01, "learning_rate": 3.179495709214144e-06, "loss": 0.1072, "step": 143500 }, { "epoch": 7.04, "learning_rate": 3.153506697368353e-06, "loss": 0.0934, "step": 144000 }, { "epoch": 7.06, "learning_rate": 3.1275176855225615e-06, "loss": 0.0963, "step": 144500 }, { "epoch": 7.08, "learning_rate": 3.1015286736767696e-06, "loss": 0.0962, "step": 145000 }, { "epoch": 7.11, "learning_rate": 3.075539661830978e-06, "loss": 0.0964, "step": 145500 }, { "epoch": 7.13, "learning_rate": 3.049550649985186e-06, "loss": 0.0983, "step": 146000 }, { "epoch": 7.16, "learning_rate": 3.023561638139395e-06, "loss": 0.0942, "step": 146500 }, { "epoch": 7.18, "learning_rate": 2.9975726262936036e-06, "loss": 0.1016, "step": 147000 }, { "epoch": 7.21, "learning_rate": 2.9715836144478116e-06, "loss": 0.0806, "step": 147500 }, { "epoch": 7.23, "learning_rate": 2.94559460260202e-06, "loss": 0.1037, "step": 148000 }, { "epoch": 7.26, "learning_rate": 2.9196055907562286e-06, "loss": 0.1013, "step": 148500 }, { "epoch": 7.28, "learning_rate": 2.893616578910437e-06, "loss": 0.0929, "step": 149000 }, { "epoch": 7.3, "learning_rate": 2.867627567064645e-06, "loss": 0.1034, "step": 149500 }, { "epoch": 7.33, "learning_rate": 2.8416385552188536e-06, "loss": 0.0955, "step": 150000 }, { "epoch": 7.35, "learning_rate": 2.8156495433730617e-06, "loss": 0.0857, "step": 150500 }, { "epoch": 7.38, "learning_rate": 2.7896605315272706e-06, "loss": 0.106, "step": 151000 }, { "epoch": 7.4, "learning_rate": 2.763671519681479e-06, "loss": 0.0914, "step": 151500 }, { "epoch": 7.43, "learning_rate": 2.737682507835687e-06, "loss": 0.0979, "step": 152000 }, { "epoch": 7.45, "learning_rate": 2.7116934959898957e-06, "loss": 0.0986, "step": 152500 }, { "epoch": 7.48, "learning_rate": 2.685704484144104e-06, "loss": 0.1024, "step": 153000 }, { "epoch": 7.5, "learning_rate": 2.6597154722983127e-06, "loss": 0.0968, "step": 153500 }, { "epoch": 7.52, "learning_rate": 2.6337264604525207e-06, "loss": 0.0964, "step": 154000 }, { "epoch": 7.55, "learning_rate": 2.607737448606729e-06, "loss": 0.1112, "step": 154500 }, { "epoch": 7.57, "learning_rate": 2.5817484367609373e-06, "loss": 0.0961, "step": 155000 }, { "epoch": 7.6, "learning_rate": 2.555759424915146e-06, "loss": 0.0994, "step": 155500 }, { "epoch": 7.62, "learning_rate": 2.5297704130693547e-06, "loss": 0.0928, "step": 156000 }, { "epoch": 7.65, "learning_rate": 2.5037814012235627e-06, "loss": 0.0964, "step": 156500 }, { "epoch": 7.67, "learning_rate": 2.4777923893777712e-06, "loss": 0.0817, "step": 157000 }, { "epoch": 7.7, "learning_rate": 2.4518033775319797e-06, "loss": 0.1125, "step": 157500 }, { "epoch": 7.72, "learning_rate": 2.425814365686188e-06, "loss": 0.0903, "step": 158000 }, { "epoch": 7.74, "learning_rate": 2.3998253538403967e-06, "loss": 0.0947, "step": 158500 }, { "epoch": 7.77, "learning_rate": 2.3738363419946048e-06, "loss": 0.0947, "step": 159000 }, { "epoch": 7.79, "learning_rate": 2.3478473301488133e-06, "loss": 0.0958, "step": 159500 }, { "epoch": 7.82, "learning_rate": 2.3218583183030217e-06, "loss": 0.1071, "step": 160000 }, { "epoch": 7.84, "learning_rate": 2.2958693064572302e-06, "loss": 0.0878, "step": 160500 }, { "epoch": 7.87, "learning_rate": 2.2698802946114383e-06, "loss": 0.0985, "step": 161000 }, { "epoch": 7.89, "learning_rate": 2.243891282765647e-06, "loss": 0.0942, "step": 161500 }, { "epoch": 7.92, "learning_rate": 2.2179022709198553e-06, "loss": 0.1043, "step": 162000 }, { "epoch": 7.94, "learning_rate": 2.1919132590740634e-06, "loss": 0.0908, "step": 162500 }, { "epoch": 7.96, "learning_rate": 2.1659242472282723e-06, "loss": 0.0969, "step": 163000 }, { "epoch": 7.99, "learning_rate": 2.1399352353824803e-06, "loss": 0.1005, "step": 163500 }, { "epoch": 8.0, "eval_accuracy": 0.9120241857908479, "eval_averaged_scores": 0.897542067688103, "eval_f1": 0.8830599495853579, "eval_loss": 0.543989360332489, "eval_runtime": 36.3255, "eval_samples_per_second": 1001.637, "eval_steps_per_second": 62.628, "step": 163736 }, { "epoch": 8.01, "learning_rate": 2.113946223536689e-06, "loss": 0.0745, "step": 164000 }, { "epoch": 8.04, "learning_rate": 2.0879572116908973e-06, "loss": 0.0656, "step": 164500 }, { "epoch": 8.06, "learning_rate": 2.061968199845106e-06, "loss": 0.0775, "step": 165000 }, { "epoch": 8.09, "learning_rate": 2.035979187999314e-06, "loss": 0.0877, "step": 165500 }, { "epoch": 8.11, "learning_rate": 2.0099901761535224e-06, "loss": 0.0785, "step": 166000 }, { "epoch": 8.14, "learning_rate": 1.984001164307731e-06, "loss": 0.07, "step": 166500 }, { "epoch": 8.16, "learning_rate": 1.9580121524619393e-06, "loss": 0.0866, "step": 167000 }, { "epoch": 8.18, "learning_rate": 1.932023140616148e-06, "loss": 0.0714, "step": 167500 }, { "epoch": 8.21, "learning_rate": 1.9060341287703559e-06, "loss": 0.0878, "step": 168000 }, { "epoch": 8.23, "learning_rate": 1.8800451169245646e-06, "loss": 0.083, "step": 168500 }, { "epoch": 8.26, "learning_rate": 1.8540561050787729e-06, "loss": 0.081, "step": 169000 }, { "epoch": 8.28, "learning_rate": 1.8280670932329814e-06, "loss": 0.0726, "step": 169500 }, { "epoch": 8.31, "learning_rate": 1.8020780813871896e-06, "loss": 0.0764, "step": 170000 }, { "epoch": 8.33, "learning_rate": 1.776089069541398e-06, "loss": 0.0739, "step": 170500 }, { "epoch": 8.35, "learning_rate": 1.7501000576956064e-06, "loss": 0.0881, "step": 171000 }, { "epoch": 8.38, "learning_rate": 1.7241110458498147e-06, "loss": 0.0763, "step": 171500 }, { "epoch": 8.4, "learning_rate": 1.6981220340040234e-06, "loss": 0.0754, "step": 172000 }, { "epoch": 8.43, "learning_rate": 1.6721330221582317e-06, "loss": 0.0842, "step": 172500 }, { "epoch": 8.45, "learning_rate": 1.6461440103124402e-06, "loss": 0.0804, "step": 173000 }, { "epoch": 8.48, "learning_rate": 1.6201549984666484e-06, "loss": 0.0871, "step": 173500 }, { "epoch": 8.5, "learning_rate": 1.5941659866208567e-06, "loss": 0.0784, "step": 174000 }, { "epoch": 8.53, "learning_rate": 1.5681769747750652e-06, "loss": 0.0769, "step": 174500 }, { "epoch": 8.55, "learning_rate": 1.5421879629292735e-06, "loss": 0.0786, "step": 175000 }, { "epoch": 8.57, "learning_rate": 1.5161989510834822e-06, "loss": 0.0787, "step": 175500 }, { "epoch": 8.6, "learning_rate": 1.4902099392376905e-06, "loss": 0.0735, "step": 176000 }, { "epoch": 8.62, "learning_rate": 1.464220927391899e-06, "loss": 0.0853, "step": 176500 }, { "epoch": 8.65, "learning_rate": 1.4382319155461072e-06, "loss": 0.0775, "step": 177000 }, { "epoch": 8.67, "learning_rate": 1.4122429037003157e-06, "loss": 0.0794, "step": 177500 }, { "epoch": 8.7, "learning_rate": 1.386253891854524e-06, "loss": 0.0814, "step": 178000 }, { "epoch": 8.72, "learning_rate": 1.3602648800087323e-06, "loss": 0.0792, "step": 178500 }, { "epoch": 8.75, "learning_rate": 1.3342758681629408e-06, "loss": 0.0857, "step": 179000 }, { "epoch": 8.77, "learning_rate": 1.308286856317149e-06, "loss": 0.086, "step": 179500 }, { "epoch": 8.79, "learning_rate": 1.2822978444713577e-06, "loss": 0.0811, "step": 180000 }, { "epoch": 8.82, "learning_rate": 1.256308832625566e-06, "loss": 0.077, "step": 180500 }, { "epoch": 8.84, "learning_rate": 1.2303198207797743e-06, "loss": 0.0813, "step": 181000 }, { "epoch": 8.87, "learning_rate": 1.2043308089339828e-06, "loss": 0.0726, "step": 181500 }, { "epoch": 8.89, "learning_rate": 1.1783417970881913e-06, "loss": 0.0813, "step": 182000 }, { "epoch": 8.92, "learning_rate": 1.1523527852423996e-06, "loss": 0.081, "step": 182500 }, { "epoch": 8.94, "learning_rate": 1.126363773396608e-06, "loss": 0.0774, "step": 183000 }, { "epoch": 8.97, "learning_rate": 1.1003747615508165e-06, "loss": 0.0806, "step": 183500 }, { "epoch": 8.99, "learning_rate": 1.0743857497050248e-06, "loss": 0.0671, "step": 184000 }, { "epoch": 9.0, "eval_accuracy": 0.9127112821217535, "eval_averaged_scores": 0.8981017892633713, "eval_f1": 0.883492296404989, "eval_loss": 0.5674276351928711, "eval_runtime": 36.287, "eval_samples_per_second": 1002.701, "eval_steps_per_second": 62.695, "step": 184203 }, { "epoch": 9.01, "learning_rate": 1.048396737859233e-06, "loss": 0.073, "step": 184500 }, { "epoch": 9.04, "learning_rate": 1.0224077260134416e-06, "loss": 0.0623, "step": 185000 }, { "epoch": 9.06, "learning_rate": 9.9641871416765e-07, "loss": 0.0562, "step": 185500 }, { "epoch": 9.09, "learning_rate": 9.704297023218584e-07, "loss": 0.062, "step": 186000 }, { "epoch": 9.11, "learning_rate": 9.444406904760668e-07, "loss": 0.0753, "step": 186500 }, { "epoch": 9.14, "learning_rate": 9.184516786302752e-07, "loss": 0.0624, "step": 187000 }, { "epoch": 9.16, "learning_rate": 8.924626667844836e-07, "loss": 0.0593, "step": 187500 }, { "epoch": 9.19, "learning_rate": 8.664736549386919e-07, "loss": 0.0758, "step": 188000 }, { "epoch": 9.21, "learning_rate": 8.404846430929004e-07, "loss": 0.0766, "step": 188500 }, { "epoch": 9.23, "learning_rate": 8.144956312471088e-07, "loss": 0.0681, "step": 189000 }, { "epoch": 9.26, "learning_rate": 7.885066194013171e-07, "loss": 0.0778, "step": 189500 }, { "epoch": 9.28, "learning_rate": 7.625176075555256e-07, "loss": 0.0596, "step": 190000 }, { "epoch": 9.31, "learning_rate": 7.36528595709734e-07, "loss": 0.0702, "step": 190500 }, { "epoch": 9.33, "learning_rate": 7.105395838639424e-07, "loss": 0.0658, "step": 191000 }, { "epoch": 9.36, "learning_rate": 6.845505720181508e-07, "loss": 0.0729, "step": 191500 }, { "epoch": 9.38, "learning_rate": 6.585615601723591e-07, "loss": 0.0656, "step": 192000 }, { "epoch": 9.41, "learning_rate": 6.325725483265676e-07, "loss": 0.0592, "step": 192500 }, { "epoch": 9.43, "learning_rate": 6.065835364807759e-07, "loss": 0.0653, "step": 193000 }, { "epoch": 9.45, "learning_rate": 5.805945246349843e-07, "loss": 0.0637, "step": 193500 }, { "epoch": 9.48, "learning_rate": 5.546055127891928e-07, "loss": 0.0597, "step": 194000 }, { "epoch": 9.5, "learning_rate": 5.286165009434012e-07, "loss": 0.074, "step": 194500 }, { "epoch": 9.53, "learning_rate": 5.026274890976096e-07, "loss": 0.0531, "step": 195000 }, { "epoch": 9.55, "learning_rate": 4.7663847725181796e-07, "loss": 0.0644, "step": 195500 }, { "epoch": 9.58, "learning_rate": 4.5064946540602635e-07, "loss": 0.0585, "step": 196000 }, { "epoch": 9.6, "learning_rate": 4.246604535602348e-07, "loss": 0.0624, "step": 196500 }, { "epoch": 9.63, "learning_rate": 3.986714417144431e-07, "loss": 0.0615, "step": 197000 }, { "epoch": 9.65, "learning_rate": 3.7268242986865155e-07, "loss": 0.07, "step": 197500 }, { "epoch": 9.67, "learning_rate": 3.4669341802286e-07, "loss": 0.0612, "step": 198000 }, { "epoch": 9.7, "learning_rate": 3.207044061770684e-07, "loss": 0.0703, "step": 198500 }, { "epoch": 9.72, "learning_rate": 2.9471539433127676e-07, "loss": 0.0748, "step": 199000 }, { "epoch": 9.75, "learning_rate": 2.6872638248548514e-07, "loss": 0.0602, "step": 199500 }, { "epoch": 9.77, "learning_rate": 2.427373706396936e-07, "loss": 0.063, "step": 200000 }, { "epoch": 9.8, "learning_rate": 2.1674835879390194e-07, "loss": 0.0722, "step": 200500 }, { "epoch": 9.82, "learning_rate": 1.9075934694811035e-07, "loss": 0.0528, "step": 201000 }, { "epoch": 9.85, "learning_rate": 1.6477033510231873e-07, "loss": 0.0783, "step": 201500 }, { "epoch": 9.87, "learning_rate": 1.3878132325652717e-07, "loss": 0.0623, "step": 202000 }, { "epoch": 9.89, "learning_rate": 1.1279231141073555e-07, "loss": 0.0745, "step": 202500 }, { "epoch": 9.92, "learning_rate": 8.680329956494395e-08, "loss": 0.0507, "step": 203000 }, { "epoch": 9.94, "learning_rate": 6.081428771915235e-08, "loss": 0.0633, "step": 203500 }, { "epoch": 9.97, "learning_rate": 3.4825275873360744e-08, "loss": 0.0659, "step": 204000 }, { "epoch": 9.99, "learning_rate": 8.836264027569144e-09, "loss": 0.0669, "step": 204500 }, { "epoch": 10.0, "eval_accuracy": 0.913398378452659, "eval_averaged_scores": 0.8990160346845362, "eval_f1": 0.8846336909164134, "eval_loss": 0.611711859703064, "eval_runtime": 36.3229, "eval_samples_per_second": 1001.71, "eval_steps_per_second": 62.633, "step": 204670 }, { "epoch": 10.0, "step": 204670, "total_flos": 1.0022032836134272e+17, "train_loss": 0.16490516577327513, "train_runtime": 10899.0474, "train_samples_per_second": 300.449, "train_steps_per_second": 18.779 } ], "logging_steps": 500, "max_steps": 204670, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.0022032836134272e+17, "trial_name": null, "trial_params": null }