| { |
| "best_metric": 0.9142857142857143, |
| "best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-18", |
| "epoch": 149.88888888888889, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.89, |
| "eval_accuracy": 0.6857142857142857, |
| "eval_loss": 0.6600533127784729, |
| "eval_runtime": 0.4026, |
| "eval_samples_per_second": 173.882, |
| "eval_steps_per_second": 7.452, |
| "step": 2 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_accuracy": 0.7, |
| "eval_loss": 0.6563708186149597, |
| "eval_runtime": 0.3922, |
| "eval_samples_per_second": 178.495, |
| "eval_steps_per_second": 7.65, |
| "step": 4 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_accuracy": 0.7428571428571429, |
| "eval_loss": 0.6498632431030273, |
| "eval_runtime": 0.3916, |
| "eval_samples_per_second": 178.774, |
| "eval_steps_per_second": 7.662, |
| "step": 6 |
| }, |
| { |
| "epoch": 3.89, |
| "eval_accuracy": 0.7714285714285715, |
| "eval_loss": 0.6404139399528503, |
| "eval_runtime": 0.3913, |
| "eval_samples_per_second": 178.878, |
| "eval_steps_per_second": 7.666, |
| "step": 8 |
| }, |
| { |
| "epoch": 4.89, |
| "eval_accuracy": 0.8, |
| "eval_loss": 0.62859708070755, |
| "eval_runtime": 0.3923, |
| "eval_samples_per_second": 178.448, |
| "eval_steps_per_second": 7.648, |
| "step": 10 |
| }, |
| { |
| "epoch": 5.89, |
| "eval_accuracy": 0.8142857142857143, |
| "eval_loss": 0.6146815419197083, |
| "eval_runtime": 0.3861, |
| "eval_samples_per_second": 181.308, |
| "eval_steps_per_second": 7.77, |
| "step": 12 |
| }, |
| { |
| "epoch": 6.89, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_loss": 0.5986873507499695, |
| "eval_runtime": 0.3853, |
| "eval_samples_per_second": 181.687, |
| "eval_steps_per_second": 7.787, |
| "step": 14 |
| }, |
| { |
| "epoch": 7.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.5808569192886353, |
| "eval_runtime": 0.391, |
| "eval_samples_per_second": 179.013, |
| "eval_steps_per_second": 7.672, |
| "step": 16 |
| }, |
| { |
| "epoch": 8.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.5618674755096436, |
| "eval_runtime": 0.3861, |
| "eval_samples_per_second": 181.318, |
| "eval_steps_per_second": 7.771, |
| "step": 18 |
| }, |
| { |
| "epoch": 9.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.5425492525100708, |
| "eval_runtime": 0.3895, |
| "eval_samples_per_second": 179.718, |
| "eval_steps_per_second": 7.702, |
| "step": 20 |
| }, |
| { |
| "epoch": 10.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.5230604410171509, |
| "eval_runtime": 0.3908, |
| "eval_samples_per_second": 179.109, |
| "eval_steps_per_second": 7.676, |
| "step": 22 |
| }, |
| { |
| "epoch": 11.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.5035112500190735, |
| "eval_runtime": 0.3841, |
| "eval_samples_per_second": 182.262, |
| "eval_steps_per_second": 7.811, |
| "step": 24 |
| }, |
| { |
| "epoch": 12.44, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6786, |
| "step": 25 |
| }, |
| { |
| "epoch": 12.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.4828525483608246, |
| "eval_runtime": 0.3958, |
| "eval_samples_per_second": 176.835, |
| "eval_steps_per_second": 7.579, |
| "step": 26 |
| }, |
| { |
| "epoch": 13.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.4621497690677643, |
| "eval_runtime": 0.3893, |
| "eval_samples_per_second": 179.815, |
| "eval_steps_per_second": 7.706, |
| "step": 28 |
| }, |
| { |
| "epoch": 14.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.44225531816482544, |
| "eval_runtime": 0.3936, |
| "eval_samples_per_second": 177.866, |
| "eval_steps_per_second": 7.623, |
| "step": 30 |
| }, |
| { |
| "epoch": 15.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.4237878918647766, |
| "eval_runtime": 0.3885, |
| "eval_samples_per_second": 180.201, |
| "eval_steps_per_second": 7.723, |
| "step": 32 |
| }, |
| { |
| "epoch": 16.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.4087783396244049, |
| "eval_runtime": 0.3869, |
| "eval_samples_per_second": 180.93, |
| "eval_steps_per_second": 7.754, |
| "step": 34 |
| }, |
| { |
| "epoch": 17.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.39638686180114746, |
| "eval_runtime": 0.3919, |
| "eval_samples_per_second": 178.599, |
| "eval_steps_per_second": 7.654, |
| "step": 36 |
| }, |
| { |
| "epoch": 18.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3871336281299591, |
| "eval_runtime": 0.3895, |
| "eval_samples_per_second": 179.725, |
| "eval_steps_per_second": 7.703, |
| "step": 38 |
| }, |
| { |
| "epoch": 19.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.37913817167282104, |
| "eval_runtime": 0.3975, |
| "eval_samples_per_second": 176.113, |
| "eval_steps_per_second": 7.548, |
| "step": 40 |
| }, |
| { |
| "epoch": 20.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.37276408076286316, |
| "eval_runtime": 0.3884, |
| "eval_samples_per_second": 180.205, |
| "eval_steps_per_second": 7.723, |
| "step": 42 |
| }, |
| { |
| "epoch": 21.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.36989572644233704, |
| "eval_runtime": 0.3857, |
| "eval_samples_per_second": 181.502, |
| "eval_steps_per_second": 7.779, |
| "step": 44 |
| }, |
| { |
| "epoch": 22.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.36899128556251526, |
| "eval_runtime": 0.3889, |
| "eval_samples_per_second": 180.008, |
| "eval_steps_per_second": 7.715, |
| "step": 46 |
| }, |
| { |
| "epoch": 23.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.36841970682144165, |
| "eval_runtime": 0.3867, |
| "eval_samples_per_second": 181.0, |
| "eval_steps_per_second": 7.757, |
| "step": 48 |
| }, |
| { |
| "epoch": 24.89, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 0.544, |
| "step": 50 |
| }, |
| { |
| "epoch": 24.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.36657989025115967, |
| "eval_runtime": 0.4003, |
| "eval_samples_per_second": 174.876, |
| "eval_steps_per_second": 7.495, |
| "step": 50 |
| }, |
| { |
| "epoch": 25.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.36433330178260803, |
| "eval_runtime": 0.386, |
| "eval_samples_per_second": 181.342, |
| "eval_steps_per_second": 7.772, |
| "step": 52 |
| }, |
| { |
| "epoch": 26.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3611242175102234, |
| "eval_runtime": 0.3881, |
| "eval_samples_per_second": 180.356, |
| "eval_steps_per_second": 7.73, |
| "step": 54 |
| }, |
| { |
| "epoch": 27.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.356433242559433, |
| "eval_runtime": 0.3885, |
| "eval_samples_per_second": 180.192, |
| "eval_steps_per_second": 7.723, |
| "step": 56 |
| }, |
| { |
| "epoch": 28.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3520072400569916, |
| "eval_runtime": 0.3869, |
| "eval_samples_per_second": 180.937, |
| "eval_steps_per_second": 7.754, |
| "step": 58 |
| }, |
| { |
| "epoch": 29.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3477487862110138, |
| "eval_runtime": 0.39, |
| "eval_samples_per_second": 179.492, |
| "eval_steps_per_second": 7.693, |
| "step": 60 |
| }, |
| { |
| "epoch": 30.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3445895314216614, |
| "eval_runtime": 0.4111, |
| "eval_samples_per_second": 170.276, |
| "eval_steps_per_second": 7.298, |
| "step": 62 |
| }, |
| { |
| "epoch": 31.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3390456736087799, |
| "eval_runtime": 0.3852, |
| "eval_samples_per_second": 181.703, |
| "eval_steps_per_second": 7.787, |
| "step": 64 |
| }, |
| { |
| "epoch": 32.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3347129821777344, |
| "eval_runtime": 0.4506, |
| "eval_samples_per_second": 155.353, |
| "eval_steps_per_second": 6.658, |
| "step": 66 |
| }, |
| { |
| "epoch": 33.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.33145877718925476, |
| "eval_runtime": 0.394, |
| "eval_samples_per_second": 177.648, |
| "eval_steps_per_second": 7.613, |
| "step": 68 |
| }, |
| { |
| "epoch": 34.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3286542296409607, |
| "eval_runtime": 0.4325, |
| "eval_samples_per_second": 161.841, |
| "eval_steps_per_second": 6.936, |
| "step": 70 |
| }, |
| { |
| "epoch": 35.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3254566192626953, |
| "eval_runtime": 0.3901, |
| "eval_samples_per_second": 179.436, |
| "eval_steps_per_second": 7.69, |
| "step": 72 |
| }, |
| { |
| "epoch": 36.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3226519525051117, |
| "eval_runtime": 0.3957, |
| "eval_samples_per_second": 176.889, |
| "eval_steps_per_second": 7.581, |
| "step": 74 |
| }, |
| { |
| "epoch": 37.44, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.4904, |
| "step": 75 |
| }, |
| { |
| "epoch": 37.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.32086578011512756, |
| "eval_runtime": 0.3917, |
| "eval_samples_per_second": 178.712, |
| "eval_steps_per_second": 7.659, |
| "step": 76 |
| }, |
| { |
| "epoch": 38.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.32166367769241333, |
| "eval_runtime": 0.3879, |
| "eval_samples_per_second": 180.476, |
| "eval_steps_per_second": 7.735, |
| "step": 78 |
| }, |
| { |
| "epoch": 39.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3228186070919037, |
| "eval_runtime": 0.3945, |
| "eval_samples_per_second": 177.444, |
| "eval_steps_per_second": 7.605, |
| "step": 80 |
| }, |
| { |
| "epoch": 40.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.32248714566230774, |
| "eval_runtime": 0.3925, |
| "eval_samples_per_second": 178.361, |
| "eval_steps_per_second": 7.644, |
| "step": 82 |
| }, |
| { |
| "epoch": 41.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.32159554958343506, |
| "eval_runtime": 0.3914, |
| "eval_samples_per_second": 178.848, |
| "eval_steps_per_second": 7.665, |
| "step": 84 |
| }, |
| { |
| "epoch": 42.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.31996870040893555, |
| "eval_runtime": 0.3902, |
| "eval_samples_per_second": 179.374, |
| "eval_steps_per_second": 7.687, |
| "step": 86 |
| }, |
| { |
| "epoch": 43.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.31838318705558777, |
| "eval_runtime": 0.3951, |
| "eval_samples_per_second": 177.183, |
| "eval_steps_per_second": 7.594, |
| "step": 88 |
| }, |
| { |
| "epoch": 44.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3160267174243927, |
| "eval_runtime": 0.3892, |
| "eval_samples_per_second": 179.849, |
| "eval_steps_per_second": 7.708, |
| "step": 90 |
| }, |
| { |
| "epoch": 45.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.31339019536972046, |
| "eval_runtime": 0.3914, |
| "eval_samples_per_second": 178.826, |
| "eval_steps_per_second": 7.664, |
| "step": 92 |
| }, |
| { |
| "epoch": 46.89, |
| "eval_accuracy": 0.9142857142857143, |
| "eval_loss": 0.3097696900367737, |
| "eval_runtime": 0.3909, |
| "eval_samples_per_second": 179.096, |
| "eval_steps_per_second": 7.676, |
| "step": 94 |
| }, |
| { |
| "epoch": 47.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3083226680755615, |
| "eval_runtime": 0.3975, |
| "eval_samples_per_second": 176.094, |
| "eval_steps_per_second": 7.547, |
| "step": 96 |
| }, |
| { |
| "epoch": 48.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.306194543838501, |
| "eval_runtime": 0.39, |
| "eval_samples_per_second": 179.464, |
| "eval_steps_per_second": 7.691, |
| "step": 98 |
| }, |
| { |
| "epoch": 49.89, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.4468, |
| "step": 100 |
| }, |
| { |
| "epoch": 49.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3034059703350067, |
| "eval_runtime": 0.3988, |
| "eval_samples_per_second": 175.515, |
| "eval_steps_per_second": 7.522, |
| "step": 100 |
| }, |
| { |
| "epoch": 50.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3030487298965454, |
| "eval_runtime": 0.3912, |
| "eval_samples_per_second": 178.919, |
| "eval_steps_per_second": 7.668, |
| "step": 102 |
| }, |
| { |
| "epoch": 51.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3037838339805603, |
| "eval_runtime": 0.4036, |
| "eval_samples_per_second": 173.422, |
| "eval_steps_per_second": 7.432, |
| "step": 104 |
| }, |
| { |
| "epoch": 52.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.30441269278526306, |
| "eval_runtime": 0.3905, |
| "eval_samples_per_second": 179.272, |
| "eval_steps_per_second": 7.683, |
| "step": 106 |
| }, |
| { |
| "epoch": 53.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.30462566018104553, |
| "eval_runtime": 0.4004, |
| "eval_samples_per_second": 174.832, |
| "eval_steps_per_second": 7.493, |
| "step": 108 |
| }, |
| { |
| "epoch": 54.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.30405017733573914, |
| "eval_runtime": 0.3926, |
| "eval_samples_per_second": 178.307, |
| "eval_steps_per_second": 7.642, |
| "step": 110 |
| }, |
| { |
| "epoch": 55.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3027400076389313, |
| "eval_runtime": 0.3921, |
| "eval_samples_per_second": 178.52, |
| "eval_steps_per_second": 7.651, |
| "step": 112 |
| }, |
| { |
| "epoch": 56.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.3015061914920807, |
| "eval_runtime": 0.3917, |
| "eval_samples_per_second": 178.717, |
| "eval_steps_per_second": 7.659, |
| "step": 114 |
| }, |
| { |
| "epoch": 57.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.298301637172699, |
| "eval_runtime": 0.3901, |
| "eval_samples_per_second": 179.444, |
| "eval_steps_per_second": 7.69, |
| "step": 116 |
| }, |
| { |
| "epoch": 58.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2965472638607025, |
| "eval_runtime": 0.3941, |
| "eval_samples_per_second": 177.631, |
| "eval_steps_per_second": 7.613, |
| "step": 118 |
| }, |
| { |
| "epoch": 59.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2955659329891205, |
| "eval_runtime": 0.3929, |
| "eval_samples_per_second": 178.161, |
| "eval_steps_per_second": 7.635, |
| "step": 120 |
| }, |
| { |
| "epoch": 60.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2953043282032013, |
| "eval_runtime": 0.3961, |
| "eval_samples_per_second": 176.718, |
| "eval_steps_per_second": 7.574, |
| "step": 122 |
| }, |
| { |
| "epoch": 61.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2960428297519684, |
| "eval_runtime": 0.3894, |
| "eval_samples_per_second": 179.751, |
| "eval_steps_per_second": 7.704, |
| "step": 124 |
| }, |
| { |
| "epoch": 62.44, |
| "learning_rate": 6.481481481481482e-06, |
| "loss": 0.411, |
| "step": 125 |
| }, |
| { |
| "epoch": 62.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.29606738686561584, |
| "eval_runtime": 0.3907, |
| "eval_samples_per_second": 179.177, |
| "eval_steps_per_second": 7.679, |
| "step": 126 |
| }, |
| { |
| "epoch": 63.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.29615068435668945, |
| "eval_runtime": 0.3957, |
| "eval_samples_per_second": 176.884, |
| "eval_steps_per_second": 7.581, |
| "step": 128 |
| }, |
| { |
| "epoch": 64.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.29546239972114563, |
| "eval_runtime": 0.4028, |
| "eval_samples_per_second": 173.791, |
| "eval_steps_per_second": 7.448, |
| "step": 130 |
| }, |
| { |
| "epoch": 65.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.29375123977661133, |
| "eval_runtime": 0.3984, |
| "eval_samples_per_second": 175.721, |
| "eval_steps_per_second": 7.531, |
| "step": 132 |
| }, |
| { |
| "epoch": 66.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.293165922164917, |
| "eval_runtime": 0.3896, |
| "eval_samples_per_second": 179.657, |
| "eval_steps_per_second": 7.7, |
| "step": 134 |
| }, |
| { |
| "epoch": 67.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2927437126636505, |
| "eval_runtime": 0.3965, |
| "eval_samples_per_second": 176.563, |
| "eval_steps_per_second": 7.567, |
| "step": 136 |
| }, |
| { |
| "epoch": 68.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2911137044429779, |
| "eval_runtime": 0.3982, |
| "eval_samples_per_second": 175.784, |
| "eval_steps_per_second": 7.534, |
| "step": 138 |
| }, |
| { |
| "epoch": 69.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.29145926237106323, |
| "eval_runtime": 0.3948, |
| "eval_samples_per_second": 177.308, |
| "eval_steps_per_second": 7.599, |
| "step": 140 |
| }, |
| { |
| "epoch": 70.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.291074275970459, |
| "eval_runtime": 0.3933, |
| "eval_samples_per_second": 177.964, |
| "eval_steps_per_second": 7.627, |
| "step": 142 |
| }, |
| { |
| "epoch": 71.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2896248400211334, |
| "eval_runtime": 0.3962, |
| "eval_samples_per_second": 176.685, |
| "eval_steps_per_second": 7.572, |
| "step": 144 |
| }, |
| { |
| "epoch": 72.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2880937457084656, |
| "eval_runtime": 0.3944, |
| "eval_samples_per_second": 177.463, |
| "eval_steps_per_second": 7.606, |
| "step": 146 |
| }, |
| { |
| "epoch": 73.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.28765279054641724, |
| "eval_runtime": 0.3966, |
| "eval_samples_per_second": 176.488, |
| "eval_steps_per_second": 7.564, |
| "step": 148 |
| }, |
| { |
| "epoch": 74.89, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.3705, |
| "step": 150 |
| }, |
| { |
| "epoch": 74.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.2876226603984833, |
| "eval_runtime": 0.3888, |
| "eval_samples_per_second": 180.054, |
| "eval_steps_per_second": 7.717, |
| "step": 150 |
| }, |
| { |
| "epoch": 75.89, |
| "eval_accuracy": 0.9, |
| "eval_loss": 0.28748172521591187, |
| "eval_runtime": 0.411, |
| "eval_samples_per_second": 170.337, |
| "eval_steps_per_second": 7.3, |
| "step": 152 |
| }, |
| { |
| "epoch": 76.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2876095473766327, |
| "eval_runtime": 0.4294, |
| "eval_samples_per_second": 163.0, |
| "eval_steps_per_second": 6.986, |
| "step": 154 |
| }, |
| { |
| "epoch": 77.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28670457005500793, |
| "eval_runtime": 0.3949, |
| "eval_samples_per_second": 177.274, |
| "eval_steps_per_second": 7.597, |
| "step": 156 |
| }, |
| { |
| "epoch": 78.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28570595383644104, |
| "eval_runtime": 0.3939, |
| "eval_samples_per_second": 177.69, |
| "eval_steps_per_second": 7.615, |
| "step": 158 |
| }, |
| { |
| "epoch": 79.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28557589650154114, |
| "eval_runtime": 0.3949, |
| "eval_samples_per_second": 177.268, |
| "eval_steps_per_second": 7.597, |
| "step": 160 |
| }, |
| { |
| "epoch": 80.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2853609621524811, |
| "eval_runtime": 0.3898, |
| "eval_samples_per_second": 179.59, |
| "eval_steps_per_second": 7.697, |
| "step": 162 |
| }, |
| { |
| "epoch": 81.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2859923243522644, |
| "eval_runtime": 0.3917, |
| "eval_samples_per_second": 178.729, |
| "eval_steps_per_second": 7.66, |
| "step": 164 |
| }, |
| { |
| "epoch": 82.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2853332459926605, |
| "eval_runtime": 0.3898, |
| "eval_samples_per_second": 179.566, |
| "eval_steps_per_second": 7.696, |
| "step": 166 |
| }, |
| { |
| "epoch": 83.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.28681549429893494, |
| "eval_runtime": 0.3903, |
| "eval_samples_per_second": 179.364, |
| "eval_steps_per_second": 7.687, |
| "step": 168 |
| }, |
| { |
| "epoch": 84.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2884899079799652, |
| "eval_runtime": 0.4004, |
| "eval_samples_per_second": 174.808, |
| "eval_steps_per_second": 7.492, |
| "step": 170 |
| }, |
| { |
| "epoch": 85.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2893112599849701, |
| "eval_runtime": 0.3932, |
| "eval_samples_per_second": 178.006, |
| "eval_steps_per_second": 7.629, |
| "step": 172 |
| }, |
| { |
| "epoch": 86.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.29076695442199707, |
| "eval_runtime": 0.4006, |
| "eval_samples_per_second": 174.727, |
| "eval_steps_per_second": 7.488, |
| "step": 174 |
| }, |
| { |
| "epoch": 87.44, |
| "learning_rate": 4.62962962962963e-06, |
| "loss": 0.3442, |
| "step": 175 |
| }, |
| { |
| "epoch": 87.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.29055681824684143, |
| "eval_runtime": 0.3948, |
| "eval_samples_per_second": 177.302, |
| "eval_steps_per_second": 7.599, |
| "step": 176 |
| }, |
| { |
| "epoch": 88.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.29001426696777344, |
| "eval_runtime": 0.4034, |
| "eval_samples_per_second": 173.528, |
| "eval_steps_per_second": 7.437, |
| "step": 178 |
| }, |
| { |
| "epoch": 89.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.287276953458786, |
| "eval_runtime": 0.3919, |
| "eval_samples_per_second": 178.62, |
| "eval_steps_per_second": 7.655, |
| "step": 180 |
| }, |
| { |
| "epoch": 90.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.28426897525787354, |
| "eval_runtime": 0.4018, |
| "eval_samples_per_second": 174.231, |
| "eval_steps_per_second": 7.467, |
| "step": 182 |
| }, |
| { |
| "epoch": 91.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28223827481269836, |
| "eval_runtime": 0.3886, |
| "eval_samples_per_second": 180.111, |
| "eval_steps_per_second": 7.719, |
| "step": 184 |
| }, |
| { |
| "epoch": 92.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.28137001395225525, |
| "eval_runtime": 0.3867, |
| "eval_samples_per_second": 181.016, |
| "eval_steps_per_second": 7.758, |
| "step": 186 |
| }, |
| { |
| "epoch": 93.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2821663022041321, |
| "eval_runtime": 0.395, |
| "eval_samples_per_second": 177.23, |
| "eval_steps_per_second": 7.596, |
| "step": 188 |
| }, |
| { |
| "epoch": 94.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2825068235397339, |
| "eval_runtime": 0.3994, |
| "eval_samples_per_second": 175.263, |
| "eval_steps_per_second": 7.511, |
| "step": 190 |
| }, |
| { |
| "epoch": 95.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2831081449985504, |
| "eval_runtime": 0.3997, |
| "eval_samples_per_second": 175.112, |
| "eval_steps_per_second": 7.505, |
| "step": 192 |
| }, |
| { |
| "epoch": 96.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2835954427719116, |
| "eval_runtime": 0.3968, |
| "eval_samples_per_second": 176.408, |
| "eval_steps_per_second": 7.56, |
| "step": 194 |
| }, |
| { |
| "epoch": 97.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28326863050460815, |
| "eval_runtime": 0.3965, |
| "eval_samples_per_second": 176.561, |
| "eval_steps_per_second": 7.567, |
| "step": 196 |
| }, |
| { |
| "epoch": 98.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.281973272562027, |
| "eval_runtime": 0.3973, |
| "eval_samples_per_second": 176.206, |
| "eval_steps_per_second": 7.552, |
| "step": 198 |
| }, |
| { |
| "epoch": 99.89, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.3159, |
| "step": 200 |
| }, |
| { |
| "epoch": 99.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.28054317831993103, |
| "eval_runtime": 0.3966, |
| "eval_samples_per_second": 176.5, |
| "eval_steps_per_second": 7.564, |
| "step": 200 |
| }, |
| { |
| "epoch": 100.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27876371145248413, |
| "eval_runtime": 0.393, |
| "eval_samples_per_second": 178.117, |
| "eval_steps_per_second": 7.634, |
| "step": 202 |
| }, |
| { |
| "epoch": 101.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27795442938804626, |
| "eval_runtime": 0.388, |
| "eval_samples_per_second": 180.426, |
| "eval_steps_per_second": 7.733, |
| "step": 204 |
| }, |
| { |
| "epoch": 102.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27797621488571167, |
| "eval_runtime": 0.3898, |
| "eval_samples_per_second": 179.574, |
| "eval_steps_per_second": 7.696, |
| "step": 206 |
| }, |
| { |
| "epoch": 103.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27778205275535583, |
| "eval_runtime": 0.3946, |
| "eval_samples_per_second": 177.402, |
| "eval_steps_per_second": 7.603, |
| "step": 208 |
| }, |
| { |
| "epoch": 104.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2783416509628296, |
| "eval_runtime": 0.3956, |
| "eval_samples_per_second": 176.929, |
| "eval_steps_per_second": 7.583, |
| "step": 210 |
| }, |
| { |
| "epoch": 105.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2788624167442322, |
| "eval_runtime": 0.3965, |
| "eval_samples_per_second": 176.537, |
| "eval_steps_per_second": 7.566, |
| "step": 212 |
| }, |
| { |
| "epoch": 106.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27885138988494873, |
| "eval_runtime": 0.401, |
| "eval_samples_per_second": 174.562, |
| "eval_steps_per_second": 7.481, |
| "step": 214 |
| }, |
| { |
| "epoch": 107.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2788878381252289, |
| "eval_runtime": 0.3934, |
| "eval_samples_per_second": 177.953, |
| "eval_steps_per_second": 7.627, |
| "step": 216 |
| }, |
| { |
| "epoch": 108.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27941665053367615, |
| "eval_runtime": 0.3952, |
| "eval_samples_per_second": 177.139, |
| "eval_steps_per_second": 7.592, |
| "step": 218 |
| }, |
| { |
| "epoch": 109.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2792874872684479, |
| "eval_runtime": 0.3992, |
| "eval_samples_per_second": 175.358, |
| "eval_steps_per_second": 7.515, |
| "step": 220 |
| }, |
| { |
| "epoch": 110.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27895817160606384, |
| "eval_runtime": 0.393, |
| "eval_samples_per_second": 178.121, |
| "eval_steps_per_second": 7.634, |
| "step": 222 |
| }, |
| { |
| "epoch": 111.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2787097692489624, |
| "eval_runtime": 0.3941, |
| "eval_samples_per_second": 177.636, |
| "eval_steps_per_second": 7.613, |
| "step": 224 |
| }, |
| { |
| "epoch": 112.44, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.2982, |
| "step": 225 |
| }, |
| { |
| "epoch": 112.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2774018943309784, |
| "eval_runtime": 0.3953, |
| "eval_samples_per_second": 177.084, |
| "eval_steps_per_second": 7.589, |
| "step": 226 |
| }, |
| { |
| "epoch": 113.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2765597105026245, |
| "eval_runtime": 0.389, |
| "eval_samples_per_second": 179.928, |
| "eval_steps_per_second": 7.711, |
| "step": 228 |
| }, |
| { |
| "epoch": 114.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27541905641555786, |
| "eval_runtime": 0.3931, |
| "eval_samples_per_second": 178.054, |
| "eval_steps_per_second": 7.631, |
| "step": 230 |
| }, |
| { |
| "epoch": 115.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27446889877319336, |
| "eval_runtime": 0.3931, |
| "eval_samples_per_second": 178.076, |
| "eval_steps_per_second": 7.632, |
| "step": 232 |
| }, |
| { |
| "epoch": 116.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.2735791802406311, |
| "eval_runtime": 0.3922, |
| "eval_samples_per_second": 178.501, |
| "eval_steps_per_second": 7.65, |
| "step": 234 |
| }, |
| { |
| "epoch": 117.89, |
| "eval_accuracy": 0.8714285714285714, |
| "eval_loss": 0.27332428097724915, |
| "eval_runtime": 0.3911, |
| "eval_samples_per_second": 178.974, |
| "eval_steps_per_second": 7.67, |
| "step": 236 |
| }, |
| { |
| "epoch": 118.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2734207212924957, |
| "eval_runtime": 0.396, |
| "eval_samples_per_second": 176.768, |
| "eval_steps_per_second": 7.576, |
| "step": 238 |
| }, |
| { |
| "epoch": 119.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27379804849624634, |
| "eval_runtime": 0.3939, |
| "eval_samples_per_second": 177.731, |
| "eval_steps_per_second": 7.617, |
| "step": 240 |
| }, |
| { |
| "epoch": 120.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2744147777557373, |
| "eval_runtime": 0.3947, |
| "eval_samples_per_second": 177.362, |
| "eval_steps_per_second": 7.601, |
| "step": 242 |
| }, |
| { |
| "epoch": 121.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2748957574367523, |
| "eval_runtime": 0.4048, |
| "eval_samples_per_second": 172.911, |
| "eval_steps_per_second": 7.41, |
| "step": 244 |
| }, |
| { |
| "epoch": 122.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27517834305763245, |
| "eval_runtime": 0.398, |
| "eval_samples_per_second": 175.867, |
| "eval_steps_per_second": 7.537, |
| "step": 246 |
| }, |
| { |
| "epoch": 123.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2754015624523163, |
| "eval_runtime": 0.394, |
| "eval_samples_per_second": 177.676, |
| "eval_steps_per_second": 7.615, |
| "step": 248 |
| }, |
| { |
| "epoch": 124.89, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 0.2757, |
| "step": 250 |
| }, |
| { |
| "epoch": 124.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27559223771095276, |
| "eval_runtime": 0.3999, |
| "eval_samples_per_second": 175.064, |
| "eval_steps_per_second": 7.503, |
| "step": 250 |
| }, |
| { |
| "epoch": 125.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27577343583106995, |
| "eval_runtime": 0.3951, |
| "eval_samples_per_second": 177.152, |
| "eval_steps_per_second": 7.592, |
| "step": 252 |
| }, |
| { |
| "epoch": 126.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2756122648715973, |
| "eval_runtime": 0.3965, |
| "eval_samples_per_second": 176.526, |
| "eval_steps_per_second": 7.565, |
| "step": 254 |
| }, |
| { |
| "epoch": 127.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2754615247249603, |
| "eval_runtime": 0.3971, |
| "eval_samples_per_second": 176.293, |
| "eval_steps_per_second": 7.555, |
| "step": 256 |
| }, |
| { |
| "epoch": 128.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27490970492362976, |
| "eval_runtime": 0.3981, |
| "eval_samples_per_second": 175.828, |
| "eval_steps_per_second": 7.535, |
| "step": 258 |
| }, |
| { |
| "epoch": 129.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27434810996055603, |
| "eval_runtime": 0.3988, |
| "eval_samples_per_second": 175.52, |
| "eval_steps_per_second": 7.522, |
| "step": 260 |
| }, |
| { |
| "epoch": 130.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27393919229507446, |
| "eval_runtime": 0.3964, |
| "eval_samples_per_second": 176.607, |
| "eval_steps_per_second": 7.569, |
| "step": 262 |
| }, |
| { |
| "epoch": 131.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2735598385334015, |
| "eval_runtime": 0.3963, |
| "eval_samples_per_second": 176.647, |
| "eval_steps_per_second": 7.571, |
| "step": 264 |
| }, |
| { |
| "epoch": 132.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27345114946365356, |
| "eval_runtime": 0.3943, |
| "eval_samples_per_second": 177.525, |
| "eval_steps_per_second": 7.608, |
| "step": 266 |
| }, |
| { |
| "epoch": 133.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27316632866859436, |
| "eval_runtime": 0.3968, |
| "eval_samples_per_second": 176.395, |
| "eval_steps_per_second": 7.56, |
| "step": 268 |
| }, |
| { |
| "epoch": 134.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2731429636478424, |
| "eval_runtime": 0.3964, |
| "eval_samples_per_second": 176.608, |
| "eval_steps_per_second": 7.569, |
| "step": 270 |
| }, |
| { |
| "epoch": 135.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2727990746498108, |
| "eval_runtime": 0.3965, |
| "eval_samples_per_second": 176.545, |
| "eval_steps_per_second": 7.566, |
| "step": 272 |
| }, |
| { |
| "epoch": 136.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2725311815738678, |
| "eval_runtime": 0.3934, |
| "eval_samples_per_second": 177.937, |
| "eval_steps_per_second": 7.626, |
| "step": 274 |
| }, |
| { |
| "epoch": 137.44, |
| "learning_rate": 9.259259259259259e-07, |
| "loss": 0.2724, |
| "step": 275 |
| }, |
| { |
| "epoch": 137.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27232787013053894, |
| "eval_runtime": 0.397, |
| "eval_samples_per_second": 176.334, |
| "eval_steps_per_second": 7.557, |
| "step": 276 |
| }, |
| { |
| "epoch": 138.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2722608149051666, |
| "eval_runtime": 0.3984, |
| "eval_samples_per_second": 175.719, |
| "eval_steps_per_second": 7.531, |
| "step": 278 |
| }, |
| { |
| "epoch": 139.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2721470594406128, |
| "eval_runtime": 0.3948, |
| "eval_samples_per_second": 177.289, |
| "eval_steps_per_second": 7.598, |
| "step": 280 |
| }, |
| { |
| "epoch": 140.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2720603346824646, |
| "eval_runtime": 0.4008, |
| "eval_samples_per_second": 174.65, |
| "eval_steps_per_second": 7.485, |
| "step": 282 |
| }, |
| { |
| "epoch": 141.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27187877893447876, |
| "eval_runtime": 0.3937, |
| "eval_samples_per_second": 177.785, |
| "eval_steps_per_second": 7.619, |
| "step": 284 |
| }, |
| { |
| "epoch": 142.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27186280488967896, |
| "eval_runtime": 0.3942, |
| "eval_samples_per_second": 177.564, |
| "eval_steps_per_second": 7.61, |
| "step": 286 |
| }, |
| { |
| "epoch": 143.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27189135551452637, |
| "eval_runtime": 0.3939, |
| "eval_samples_per_second": 177.73, |
| "eval_steps_per_second": 7.617, |
| "step": 288 |
| }, |
| { |
| "epoch": 144.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27191561460494995, |
| "eval_runtime": 0.4045, |
| "eval_samples_per_second": 173.067, |
| "eval_steps_per_second": 7.417, |
| "step": 290 |
| }, |
| { |
| "epoch": 145.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2719388008117676, |
| "eval_runtime": 0.3958, |
| "eval_samples_per_second": 176.858, |
| "eval_steps_per_second": 7.58, |
| "step": 292 |
| }, |
| { |
| "epoch": 146.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.2719106674194336, |
| "eval_runtime": 0.3928, |
| "eval_samples_per_second": 178.226, |
| "eval_steps_per_second": 7.638, |
| "step": 294 |
| }, |
| { |
| "epoch": 147.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.271912157535553, |
| "eval_runtime": 0.3948, |
| "eval_samples_per_second": 177.319, |
| "eval_steps_per_second": 7.599, |
| "step": 296 |
| }, |
| { |
| "epoch": 148.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.271916002035141, |
| "eval_runtime": 0.3979, |
| "eval_samples_per_second": 175.916, |
| "eval_steps_per_second": 7.539, |
| "step": 298 |
| }, |
| { |
| "epoch": 149.89, |
| "learning_rate": 0.0, |
| "loss": 0.2614, |
| "step": 300 |
| }, |
| { |
| "epoch": 149.89, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.27192050218582153, |
| "eval_runtime": 0.3918, |
| "eval_samples_per_second": 178.659, |
| "eval_steps_per_second": 7.657, |
| "step": 300 |
| }, |
| { |
| "epoch": 149.89, |
| "step": 300, |
| "total_flos": 1.0435256966870508e+18, |
| "train_loss": 0.39242326736450195, |
| "train_runtime": 677.2441, |
| "train_samples_per_second": 61.352, |
| "train_steps_per_second": 0.443 |
| } |
| ], |
| "max_steps": 300, |
| "num_train_epochs": 150, |
| "total_flos": 1.0435256966870508e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|