| { |
| "best_metric": 0.5172018348623854, |
| "best_model_checkpoint": "outputs/soft_prompt/deberta-v2-xlarge/sst2/checkpoint-3400", |
| "epoch": 3.0, |
| "global_step": 25257, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.831559658050537, |
| "eval_runtime": 12.4014, |
| "eval_samples_per_second": 70.315, |
| "eval_steps_per_second": 8.789, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7252020835876465, |
| "eval_runtime": 12.5411, |
| "eval_samples_per_second": 69.532, |
| "eval_steps_per_second": 8.691, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0294061052381518, |
| "loss": 2.7746, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 6.318162441253662, |
| "eval_runtime": 12.2524, |
| "eval_samples_per_second": 71.17, |
| "eval_steps_per_second": 8.896, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.9581830501556396, |
| "eval_runtime": 13.2254, |
| "eval_samples_per_second": 65.934, |
| "eval_steps_per_second": 8.242, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0288122104763036, |
| "loss": 2.8668, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.97942715883255, |
| "eval_runtime": 12.2204, |
| "eval_samples_per_second": 71.356, |
| "eval_steps_per_second": 8.92, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 9.121758460998535, |
| "eval_runtime": 12.6821, |
| "eval_samples_per_second": 68.758, |
| "eval_steps_per_second": 8.595, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.1010525226593018, |
| "eval_runtime": 13.3142, |
| "eval_samples_per_second": 65.494, |
| "eval_steps_per_second": 8.187, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0282183157144554, |
| "loss": 3.0595, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.240478754043579, |
| "eval_runtime": 11.7539, |
| "eval_samples_per_second": 74.188, |
| "eval_steps_per_second": 9.274, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.822434663772583, |
| "eval_runtime": 11.937, |
| "eval_samples_per_second": 73.05, |
| "eval_steps_per_second": 9.131, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.027624420952607195, |
| "loss": 2.7406, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.2581450939178467, |
| "eval_runtime": 12.944, |
| "eval_samples_per_second": 67.367, |
| "eval_steps_per_second": 8.421, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 5.0679168701171875, |
| "eval_runtime": 12.8993, |
| "eval_samples_per_second": 67.6, |
| "eval_steps_per_second": 8.45, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 4.079117774963379, |
| "eval_runtime": 12.6438, |
| "eval_samples_per_second": 68.966, |
| "eval_steps_per_second": 8.621, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.027030526190758998, |
| "loss": 2.341, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 6.498974800109863, |
| "eval_runtime": 11.7875, |
| "eval_samples_per_second": 73.977, |
| "eval_steps_per_second": 9.247, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.2673330307006836, |
| "eval_runtime": 12.4045, |
| "eval_samples_per_second": 70.297, |
| "eval_steps_per_second": 8.787, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.026436631428910798, |
| "loss": 2.5017, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.775005340576172, |
| "eval_runtime": 12.8935, |
| "eval_samples_per_second": 67.631, |
| "eval_steps_per_second": 8.454, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_accuracy": 0.5126146788990825, |
| "eval_loss": 0.8905919790267944, |
| "eval_runtime": 11.7168, |
| "eval_samples_per_second": 74.423, |
| "eval_steps_per_second": 9.303, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.5172018348623854, |
| "eval_loss": 3.1630539894104004, |
| "eval_runtime": 12.3677, |
| "eval_samples_per_second": 70.506, |
| "eval_steps_per_second": 8.813, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.025842736667062594, |
| "loss": 2.587, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.8831442594528198, |
| "eval_runtime": 12.7491, |
| "eval_samples_per_second": 68.397, |
| "eval_steps_per_second": 8.55, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.40650475025177, |
| "eval_runtime": 11.8781, |
| "eval_samples_per_second": 73.412, |
| "eval_steps_per_second": 9.177, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.025248841905214398, |
| "loss": 2.2272, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 5.7484235763549805, |
| "eval_runtime": 12.2084, |
| "eval_samples_per_second": 71.426, |
| "eval_steps_per_second": 8.928, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 3.051417589187622, |
| "eval_runtime": 13.0485, |
| "eval_samples_per_second": 66.828, |
| "eval_steps_per_second": 8.353, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.1735517978668213, |
| "eval_runtime": 12.52, |
| "eval_samples_per_second": 69.648, |
| "eval_steps_per_second": 8.706, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.024654947143366194, |
| "loss": 2.474, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 3.5813305377960205, |
| "eval_runtime": 12.4532, |
| "eval_samples_per_second": 70.022, |
| "eval_steps_per_second": 8.753, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 3.0905096530914307, |
| "eval_runtime": 12.1764, |
| "eval_samples_per_second": 71.614, |
| "eval_steps_per_second": 8.952, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.024061052381517994, |
| "loss": 2.6958, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.6613879203796387, |
| "eval_runtime": 13.0709, |
| "eval_samples_per_second": 66.713, |
| "eval_steps_per_second": 8.339, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.9912046194076538, |
| "eval_runtime": 11.0684, |
| "eval_samples_per_second": 78.783, |
| "eval_steps_per_second": 9.848, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.708740711212158, |
| "eval_runtime": 12.3098, |
| "eval_samples_per_second": 70.838, |
| "eval_steps_per_second": 8.855, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.023467157619669794, |
| "loss": 2.4571, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.9453651309013367, |
| "eval_runtime": 12.5807, |
| "eval_samples_per_second": 69.313, |
| "eval_steps_per_second": 8.664, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.9460629820823669, |
| "eval_runtime": 12.3033, |
| "eval_samples_per_second": 70.875, |
| "eval_steps_per_second": 8.859, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.022873262857821593, |
| "loss": 2.4046, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_accuracy": 0.5034403669724771, |
| "eval_loss": 0.7613060474395752, |
| "eval_runtime": 12.922, |
| "eval_samples_per_second": 67.482, |
| "eval_steps_per_second": 8.435, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 3.473540782928467, |
| "eval_runtime": 12.5608, |
| "eval_samples_per_second": 69.423, |
| "eval_steps_per_second": 8.678, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8309389352798462, |
| "eval_runtime": 12.3028, |
| "eval_samples_per_second": 70.878, |
| "eval_steps_per_second": 8.86, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.022279368095973393, |
| "loss": 1.9778, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.345905303955078, |
| "eval_runtime": 12.6993, |
| "eval_samples_per_second": 68.665, |
| "eval_steps_per_second": 8.583, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.992404818534851, |
| "eval_runtime": 11.8953, |
| "eval_samples_per_second": 73.306, |
| "eval_steps_per_second": 9.163, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.02168547333412519, |
| "loss": 1.9132, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 0.48394495412844035, |
| "eval_loss": 0.7653124928474426, |
| "eval_runtime": 12.5926, |
| "eval_samples_per_second": 69.247, |
| "eval_steps_per_second": 8.656, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.6025058031082153, |
| "eval_runtime": 12.8694, |
| "eval_samples_per_second": 67.757, |
| "eval_steps_per_second": 8.47, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.3198583126068115, |
| "eval_runtime": 12.2526, |
| "eval_samples_per_second": 71.168, |
| "eval_steps_per_second": 8.896, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.021091578572276993, |
| "loss": 2.1041, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.9014425277709961, |
| "eval_runtime": 12.1923, |
| "eval_samples_per_second": 71.52, |
| "eval_steps_per_second": 8.94, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.1414848566055298, |
| "eval_runtime": 12.2534, |
| "eval_samples_per_second": 71.164, |
| "eval_steps_per_second": 8.896, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.020497683810428793, |
| "loss": 2.2236, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.9049626588821411, |
| "eval_runtime": 12.5976, |
| "eval_samples_per_second": 69.219, |
| "eval_steps_per_second": 8.652, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.53812575340271, |
| "eval_runtime": 12.192, |
| "eval_samples_per_second": 71.522, |
| "eval_steps_per_second": 8.94, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 3.9090523719787598, |
| "eval_runtime": 12.1436, |
| "eval_samples_per_second": 71.807, |
| "eval_steps_per_second": 8.976, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.01990378904858059, |
| "loss": 1.9257, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_accuracy": 0.44380733944954126, |
| "eval_loss": 1.3826260566711426, |
| "eval_runtime": 12.394, |
| "eval_samples_per_second": 70.357, |
| "eval_steps_per_second": 8.795, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.05, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7106401920318604, |
| "eval_runtime": 12.254, |
| "eval_samples_per_second": 71.161, |
| "eval_steps_per_second": 8.895, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.019309894286732392, |
| "loss": 1.9533, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.6487476825714111, |
| "eval_runtime": 13.5392, |
| "eval_samples_per_second": 64.406, |
| "eval_steps_per_second": 8.051, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.4527719020843506, |
| "eval_runtime": 12.2271, |
| "eval_samples_per_second": 71.317, |
| "eval_steps_per_second": 8.915, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.1699163913726807, |
| "eval_runtime": 12.1388, |
| "eval_samples_per_second": 71.836, |
| "eval_steps_per_second": 8.979, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.01871599952488419, |
| "loss": 1.7969, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.155882716178894, |
| "eval_runtime": 12.1707, |
| "eval_samples_per_second": 71.648, |
| "eval_steps_per_second": 8.956, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.0576764345169067, |
| "eval_runtime": 12.3024, |
| "eval_samples_per_second": 70.88, |
| "eval_steps_per_second": 8.86, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.01812210476303599, |
| "loss": 1.8048, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.1585994958877563, |
| "eval_runtime": 12.8412, |
| "eval_samples_per_second": 67.906, |
| "eval_steps_per_second": 8.488, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7068227529525757, |
| "eval_runtime": 12.4262, |
| "eval_samples_per_second": 70.174, |
| "eval_steps_per_second": 8.772, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7638933658599854, |
| "eval_runtime": 12.0016, |
| "eval_samples_per_second": 72.657, |
| "eval_steps_per_second": 9.082, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.01752821000118779, |
| "loss": 2.0729, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.188310146331787, |
| "eval_runtime": 13.0443, |
| "eval_samples_per_second": 66.849, |
| "eval_steps_per_second": 8.356, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.6973594427108765, |
| "eval_runtime": 11.5707, |
| "eval_samples_per_second": 75.363, |
| "eval_steps_per_second": 9.42, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.016934315239339588, |
| "loss": 1.7558, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.8548436164855957, |
| "eval_runtime": 13.0206, |
| "eval_samples_per_second": 66.971, |
| "eval_steps_per_second": 8.371, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.33, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7331790924072266, |
| "eval_runtime": 12.107, |
| "eval_samples_per_second": 72.024, |
| "eval_steps_per_second": 9.003, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.7065454721450806, |
| "eval_runtime": 12.0677, |
| "eval_samples_per_second": 72.259, |
| "eval_steps_per_second": 9.032, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.016340420477491388, |
| "loss": 1.931, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 3.5782742500305176, |
| "eval_runtime": 12.3183, |
| "eval_samples_per_second": 70.789, |
| "eval_steps_per_second": 8.849, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.2677643299102783, |
| "eval_runtime": 11.6938, |
| "eval_samples_per_second": 74.569, |
| "eval_steps_per_second": 9.321, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.015746525715643188, |
| "loss": 1.739, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.367167353630066, |
| "eval_runtime": 12.6396, |
| "eval_samples_per_second": 68.99, |
| "eval_steps_per_second": 8.624, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.4737409353256226, |
| "eval_runtime": 12.3657, |
| "eval_samples_per_second": 70.517, |
| "eval_steps_per_second": 8.815, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8329254984855652, |
| "eval_runtime": 12.1625, |
| "eval_samples_per_second": 71.696, |
| "eval_steps_per_second": 8.962, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.015152630953794988, |
| "loss": 1.655, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.237131118774414, |
| "eval_runtime": 12.2197, |
| "eval_samples_per_second": 71.36, |
| "eval_steps_per_second": 8.92, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 3.846944570541382, |
| "eval_runtime": 12.0155, |
| "eval_samples_per_second": 72.573, |
| "eval_steps_per_second": 9.072, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.014558736191946788, |
| "loss": 1.7284, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.403191328048706, |
| "eval_runtime": 12.6645, |
| "eval_samples_per_second": 68.854, |
| "eval_steps_per_second": 8.607, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.1265949010849, |
| "eval_runtime": 12.3046, |
| "eval_samples_per_second": 70.868, |
| "eval_steps_per_second": 8.859, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.9130467176437378, |
| "eval_runtime": 12.0511, |
| "eval_samples_per_second": 72.359, |
| "eval_steps_per_second": 9.045, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.013964841430098586, |
| "loss": 1.5742, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.832761824131012, |
| "eval_runtime": 12.2374, |
| "eval_samples_per_second": 71.257, |
| "eval_steps_per_second": 8.907, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 3.8501062393188477, |
| "eval_runtime": 12.573, |
| "eval_samples_per_second": 69.355, |
| "eval_steps_per_second": 8.669, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.013370946668250385, |
| "loss": 1.7039, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.2914493083953857, |
| "eval_runtime": 12.0326, |
| "eval_samples_per_second": 72.47, |
| "eval_steps_per_second": 9.059, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.136923909187317, |
| "eval_runtime": 11.7256, |
| "eval_samples_per_second": 74.367, |
| "eval_steps_per_second": 9.296, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7574475407600403, |
| "eval_runtime": 12.2584, |
| "eval_samples_per_second": 71.135, |
| "eval_steps_per_second": 8.892, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.012777051906402184, |
| "loss": 1.4352, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7623356580734253, |
| "eval_runtime": 12.706, |
| "eval_samples_per_second": 68.629, |
| "eval_steps_per_second": 8.579, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.6579828262329102, |
| "eval_runtime": 12.16, |
| "eval_samples_per_second": 71.71, |
| "eval_steps_per_second": 8.964, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.012183157144553985, |
| "loss": 1.6328, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 0.481651376146789, |
| "eval_loss": 0.693511426448822, |
| "eval_runtime": 12.7779, |
| "eval_samples_per_second": 68.243, |
| "eval_steps_per_second": 8.53, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.6989684700965881, |
| "eval_runtime": 12.0393, |
| "eval_samples_per_second": 72.43, |
| "eval_steps_per_second": 9.054, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7303033471107483, |
| "eval_runtime": 12.3696, |
| "eval_samples_per_second": 70.496, |
| "eval_steps_per_second": 8.812, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.011589262382705785, |
| "loss": 1.4498, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.675624132156372, |
| "eval_runtime": 12.6317, |
| "eval_samples_per_second": 69.032, |
| "eval_steps_per_second": 8.629, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.0083491802215576, |
| "eval_runtime": 12.349, |
| "eval_samples_per_second": 70.613, |
| "eval_steps_per_second": 8.827, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.010995367620857583, |
| "loss": 1.4022, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7790195345878601, |
| "eval_runtime": 11.9659, |
| "eval_samples_per_second": 72.874, |
| "eval_steps_per_second": 9.109, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.296112060546875, |
| "eval_runtime": 13.0796, |
| "eval_samples_per_second": 66.669, |
| "eval_steps_per_second": 8.334, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.95, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7208316326141357, |
| "eval_runtime": 12.4652, |
| "eval_samples_per_second": 69.955, |
| "eval_steps_per_second": 8.744, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.010401472859009383, |
| "loss": 1.4503, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8011333346366882, |
| "eval_runtime": 12.77, |
| "eval_samples_per_second": 68.285, |
| "eval_steps_per_second": 8.536, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8194194436073303, |
| "eval_runtime": 12.2506, |
| "eval_samples_per_second": 71.18, |
| "eval_steps_per_second": 8.898, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.009807578097161183, |
| "loss": 1.3401, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.9209619760513306, |
| "eval_runtime": 12.7245, |
| "eval_samples_per_second": 68.529, |
| "eval_steps_per_second": 8.566, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_accuracy": 0.4954128440366973, |
| "eval_loss": 2.172947645187378, |
| "eval_runtime": 13.3907, |
| "eval_samples_per_second": 65.12, |
| "eval_steps_per_second": 8.14, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 2.8217873573303223, |
| "eval_runtime": 12.0414, |
| "eval_samples_per_second": 72.417, |
| "eval_steps_per_second": 9.052, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.009213683335312983, |
| "loss": 1.515, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.8880350589752197, |
| "eval_runtime": 12.7892, |
| "eval_samples_per_second": 68.183, |
| "eval_steps_per_second": 8.523, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8059159517288208, |
| "eval_runtime": 12.8563, |
| "eval_samples_per_second": 67.827, |
| "eval_steps_per_second": 8.478, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 0.008619788573464782, |
| "loss": 1.2519, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.1668144464492798, |
| "eval_runtime": 12.0845, |
| "eval_samples_per_second": 72.158, |
| "eval_steps_per_second": 9.02, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.8440486192703247, |
| "eval_runtime": 11.5453, |
| "eval_samples_per_second": 75.529, |
| "eval_steps_per_second": 9.441, |
| "step": 18200 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.482177972793579, |
| "eval_runtime": 11.5533, |
| "eval_samples_per_second": 75.476, |
| "eval_steps_per_second": 9.435, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.008025893811616582, |
| "loss": 1.2221, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.6977333426475525, |
| "eval_runtime": 12.3902, |
| "eval_samples_per_second": 70.378, |
| "eval_steps_per_second": 8.797, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.3418121337890625, |
| "eval_runtime": 12.4195, |
| "eval_samples_per_second": 70.212, |
| "eval_steps_per_second": 8.776, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 0.00743199904976838, |
| "loss": 1.1201, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7915144562721252, |
| "eval_runtime": 12.717, |
| "eval_samples_per_second": 68.57, |
| "eval_steps_per_second": 8.571, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.961918830871582, |
| "eval_runtime": 13.173, |
| "eval_samples_per_second": 66.196, |
| "eval_steps_per_second": 8.275, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.220428228378296, |
| "eval_runtime": 12.2958, |
| "eval_samples_per_second": 70.918, |
| "eval_steps_per_second": 8.865, |
| "step": 19400 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.006838104287920181, |
| "loss": 1.0869, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.6541168689727783, |
| "eval_runtime": 12.5954, |
| "eval_samples_per_second": 69.232, |
| "eval_steps_per_second": 8.654, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.35, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 2.7402684688568115, |
| "eval_runtime": 12.7516, |
| "eval_samples_per_second": 68.383, |
| "eval_steps_per_second": 8.548, |
| "step": 19800 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.00624420952607198, |
| "loss": 1.0804, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.2037415504455566, |
| "eval_runtime": 13.138, |
| "eval_samples_per_second": 66.372, |
| "eval_steps_per_second": 8.297, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7337152361869812, |
| "eval_runtime": 11.6201, |
| "eval_samples_per_second": 75.043, |
| "eval_steps_per_second": 9.38, |
| "step": 20200 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.8853695392608643, |
| "eval_runtime": 13.0127, |
| "eval_samples_per_second": 67.012, |
| "eval_steps_per_second": 8.376, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 0.005650314764223779, |
| "loss": 1.0025, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7113233804702759, |
| "eval_runtime": 12.2168, |
| "eval_samples_per_second": 71.377, |
| "eval_steps_per_second": 8.922, |
| "step": 20600 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.0583016872406006, |
| "eval_runtime": 12.0017, |
| "eval_samples_per_second": 72.657, |
| "eval_steps_per_second": 9.082, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.005056420002375579, |
| "loss": 0.9856, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7113476395606995, |
| "eval_runtime": 12.9697, |
| "eval_samples_per_second": 67.234, |
| "eval_steps_per_second": 8.404, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.52, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7482680678367615, |
| "eval_runtime": 12.7958, |
| "eval_samples_per_second": 68.147, |
| "eval_steps_per_second": 8.518, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.6966097950935364, |
| "eval_runtime": 12.2964, |
| "eval_samples_per_second": 70.915, |
| "eval_steps_per_second": 8.864, |
| "step": 21400 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.004462525240527379, |
| "loss": 1.0364, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.0607110261917114, |
| "eval_runtime": 12.067, |
| "eval_samples_per_second": 72.263, |
| "eval_steps_per_second": 9.033, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.1381345987319946, |
| "eval_runtime": 12.2151, |
| "eval_samples_per_second": 71.387, |
| "eval_steps_per_second": 8.923, |
| "step": 21800 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 0.003868630478679178, |
| "loss": 0.9683, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7138826847076416, |
| "eval_runtime": 12.7162, |
| "eval_samples_per_second": 68.574, |
| "eval_steps_per_second": 8.572, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.6361145973205566, |
| "eval_runtime": 11.8698, |
| "eval_samples_per_second": 73.464, |
| "eval_steps_per_second": 9.183, |
| "step": 22200 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 1.1421782970428467, |
| "eval_runtime": 11.321, |
| "eval_samples_per_second": 77.025, |
| "eval_steps_per_second": 9.628, |
| "step": 22400 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.0032747357168309774, |
| "loss": 0.908, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7317955493927002, |
| "eval_runtime": 12.2644, |
| "eval_samples_per_second": 71.1, |
| "eval_steps_per_second": 8.888, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.6962340474128723, |
| "eval_runtime": 11.822, |
| "eval_samples_per_second": 73.761, |
| "eval_steps_per_second": 9.22, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0026808409549827768, |
| "loss": 0.8761, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.73, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.9568504691123962, |
| "eval_runtime": 12.3037, |
| "eval_samples_per_second": 70.873, |
| "eval_steps_per_second": 8.859, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.9744265675544739, |
| "eval_runtime": 11.9452, |
| "eval_samples_per_second": 73.0, |
| "eval_steps_per_second": 9.125, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.78, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 1.091610312461853, |
| "eval_runtime": 12.8562, |
| "eval_samples_per_second": 67.827, |
| "eval_steps_per_second": 8.478, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0020869461931345766, |
| "loss": 0.8209, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.8106526136398315, |
| "eval_runtime": 11.7827, |
| "eval_samples_per_second": 74.007, |
| "eval_steps_per_second": 9.251, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7210954427719116, |
| "eval_runtime": 11.3303, |
| "eval_samples_per_second": 76.962, |
| "eval_steps_per_second": 9.62, |
| "step": 23800 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.001493051431286376, |
| "loss": 0.8008, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_accuracy": 0.49770642201834864, |
| "eval_loss": 0.6930689811706543, |
| "eval_runtime": 12.4553, |
| "eval_samples_per_second": 70.01, |
| "eval_steps_per_second": 8.751, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_accuracy": 0.4908256880733945, |
| "eval_loss": 0.7184925675392151, |
| "eval_runtime": 12.3113, |
| "eval_samples_per_second": 70.829, |
| "eval_steps_per_second": 8.854, |
| "step": 24200 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.729166567325592, |
| "eval_runtime": 11.2952, |
| "eval_samples_per_second": 77.201, |
| "eval_steps_per_second": 9.65, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 0.0008991566694381756, |
| "loss": 0.7738, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7178325057029724, |
| "eval_runtime": 12.4374, |
| "eval_samples_per_second": 70.111, |
| "eval_steps_per_second": 8.764, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.6961101293563843, |
| "eval_runtime": 12.5193, |
| "eval_samples_per_second": 69.652, |
| "eval_steps_per_second": 8.707, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 0.00030526190758997505, |
| "loss": 0.755, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7101095914840698, |
| "eval_runtime": 12.5702, |
| "eval_samples_per_second": 69.371, |
| "eval_steps_per_second": 8.671, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_accuracy": 0.5091743119266054, |
| "eval_loss": 0.7049440145492554, |
| "eval_runtime": 11.2857, |
| "eval_samples_per_second": 77.266, |
| "eval_steps_per_second": 9.658, |
| "step": 25200 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 25257, |
| "total_flos": 1.070008779996841e+17, |
| "train_loss": 1.708119561667774, |
| "train_runtime": 7791.0638, |
| "train_samples_per_second": 25.933, |
| "train_steps_per_second": 3.242 |
| } |
| ], |
| "max_steps": 25257, |
| "num_train_epochs": 3, |
| "total_flos": 1.070008779996841e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|