| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.097173828881146, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.020599913279623e-06, |
| "loss": 1.2605, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.2041199826559246e-05, |
| "loss": 1.1266, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.5563025007672873e-05, |
| "loss": 0.9129, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.806179973983887e-05, |
| "loss": 0.7453, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 0.6517, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2e-05, |
| "loss": 0.5941, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2e-05, |
| "loss": 0.5568, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2e-05, |
| "loss": 0.5374, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 2e-05, |
| "loss": 0.5121, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 2e-05, |
| "loss": 0.5023, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 2e-05, |
| "loss": 0.4889, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 2e-05, |
| "loss": 0.4846, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 2e-05, |
| "loss": 0.4739, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2e-05, |
| "loss": 0.4667, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2e-05, |
| "loss": 0.4566, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2e-05, |
| "loss": 0.4544, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2e-05, |
| "loss": 0.4409, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2e-05, |
| "loss": 0.4397, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2e-05, |
| "loss": 0.4408, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2e-05, |
| "loss": 0.4376, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 2e-05, |
| "loss": 0.4279, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2e-05, |
| "loss": 0.4267, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2e-05, |
| "loss": 0.4209, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2e-05, |
| "loss": 0.4169, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2e-05, |
| "loss": 0.4171, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2e-05, |
| "loss": 0.4162, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2e-05, |
| "loss": 0.4187, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2e-05, |
| "loss": 0.4138, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 2e-05, |
| "loss": 0.4184, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 2e-05, |
| "loss": 0.4118, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 2e-05, |
| "loss": 0.4039, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 2e-05, |
| "loss": 0.404, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 2e-05, |
| "loss": 0.4031, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 2e-05, |
| "loss": 0.3959, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2e-05, |
| "loss": 0.4046, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2e-05, |
| "loss": 0.3943, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 2e-05, |
| "loss": 0.3995, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2e-05, |
| "loss": 0.3925, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2e-05, |
| "loss": 0.3908, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 2e-05, |
| "loss": 0.3901, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 2e-05, |
| "loss": 0.3848, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2e-05, |
| "loss": 0.3877, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2e-05, |
| "loss": 0.3857, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2e-05, |
| "loss": 0.381, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2e-05, |
| "loss": 0.3841, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2e-05, |
| "loss": 0.3895, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2e-05, |
| "loss": 0.3774, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2e-05, |
| "loss": 0.3794, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2e-05, |
| "loss": 0.3761, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 0.376, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2e-05, |
| "loss": 0.3808, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2e-05, |
| "loss": 0.3791, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2e-05, |
| "loss": 0.3716, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2e-05, |
| "loss": 0.377, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2e-05, |
| "loss": 0.3684, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2e-05, |
| "loss": 0.3665, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2e-05, |
| "loss": 0.3705, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2e-05, |
| "loss": 0.3719, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2e-05, |
| "loss": 0.371, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2e-05, |
| "loss": 0.3686, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2e-05, |
| "loss": 0.3689, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2e-05, |
| "loss": 0.3519, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2e-05, |
| "loss": 0.3601, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2e-05, |
| "loss": 0.3615, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2e-05, |
| "loss": 0.3602, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 2e-05, |
| "loss": 0.3618, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2e-05, |
| "loss": 0.36, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2e-05, |
| "loss": 0.3538, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2e-05, |
| "loss": 0.3534, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2e-05, |
| "loss": 0.3651, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 2e-05, |
| "loss": 0.3543, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2e-05, |
| "loss": 0.3521, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2e-05, |
| "loss": 0.3503, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 2e-05, |
| "loss": 0.3443, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 2e-05, |
| "loss": 0.3486, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 2e-05, |
| "loss": 0.348, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 2e-05, |
| "loss": 0.3509, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 2e-05, |
| "loss": 0.3406, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 2e-05, |
| "loss": 0.3476, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2e-05, |
| "loss": 0.3385, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2e-05, |
| "loss": 0.3419, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2e-05, |
| "loss": 0.3449, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2e-05, |
| "loss": 0.335, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2e-05, |
| "loss": 0.3395, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 2e-05, |
| "loss": 0.3364, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 2e-05, |
| "loss": 0.3377, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 2e-05, |
| "loss": 0.339, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2e-05, |
| "loss": 0.3358, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2e-05, |
| "loss": 0.3402, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2e-05, |
| "loss": 0.3324, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2e-05, |
| "loss": 0.3363, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2e-05, |
| "loss": 0.3329, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2e-05, |
| "loss": 0.3282, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2e-05, |
| "loss": 0.3363, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 2e-05, |
| "loss": 0.3334, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 2e-05, |
| "loss": 0.331, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2e-05, |
| "loss": 0.3297, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 2e-05, |
| "loss": 0.331, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 2e-05, |
| "loss": 0.3277, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 2e-05, |
| "loss": 0.3246, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2e-05, |
| "loss": 0.3269, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2e-05, |
| "loss": 0.3269, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2e-05, |
| "loss": 0.3261, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2e-05, |
| "loss": 0.3288, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2e-05, |
| "loss": 0.3266, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2e-05, |
| "loss": 0.3239, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2e-05, |
| "loss": 0.3247, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2e-05, |
| "loss": 0.3215, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2e-05, |
| "loss": 0.3212, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2e-05, |
| "loss": 0.3195, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2e-05, |
| "loss": 0.3187, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2e-05, |
| "loss": 0.3179, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2e-05, |
| "loss": 0.3159, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2e-05, |
| "loss": 0.3193, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2e-05, |
| "loss": 0.3168, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2e-05, |
| "loss": 0.3164, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2e-05, |
| "loss": 0.3174, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2e-05, |
| "loss": 0.3185, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2e-05, |
| "loss": 0.3173, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2e-05, |
| "loss": 0.3132, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2e-05, |
| "loss": 0.3116, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2e-05, |
| "loss": 0.3142, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2e-05, |
| "loss": 0.3086, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2e-05, |
| "loss": 0.3154, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2e-05, |
| "loss": 0.3053, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2e-05, |
| "loss": 0.3108, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2e-05, |
| "loss": 0.3081, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2e-05, |
| "loss": 0.3041, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2e-05, |
| "loss": 0.3112, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2e-05, |
| "loss": 0.3077, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2e-05, |
| "loss": 0.3032, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2e-05, |
| "loss": 0.3068, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2e-05, |
| "loss": 0.3111, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2e-05, |
| "loss": 0.3061, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2e-05, |
| "loss": 0.3015, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2e-05, |
| "loss": 0.3043, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2e-05, |
| "loss": 0.2996, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2e-05, |
| "loss": 0.3047, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2e-05, |
| "loss": 0.2996, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2e-05, |
| "loss": 0.3038, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2e-05, |
| "loss": 0.2977, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2e-05, |
| "loss": 0.2981, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2e-05, |
| "loss": 0.3054, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2e-05, |
| "loss": 0.3002, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2e-05, |
| "loss": 0.3028, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2e-05, |
| "loss": 0.2942, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2e-05, |
| "loss": 0.2966, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2e-05, |
| "loss": 0.2955, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2e-05, |
| "loss": 0.3015, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2e-05, |
| "loss": 0.2933, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2e-05, |
| "loss": 0.2935, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2e-05, |
| "loss": 0.2979, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2e-05, |
| "loss": 0.2951, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2e-05, |
| "loss": 0.2901, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2e-05, |
| "loss": 0.2885, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2e-05, |
| "loss": 0.2897, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2e-05, |
| "loss": 0.291, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2e-05, |
| "loss": 0.2919, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2e-05, |
| "loss": 0.2869, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2e-05, |
| "loss": 0.2873, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2e-05, |
| "loss": 0.2913, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2e-05, |
| "loss": 0.2778, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 2e-05, |
| "loss": 0.2778, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2e-05, |
| "loss": 0.2808, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2e-05, |
| "loss": 0.2713, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2e-05, |
| "loss": 0.2719, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2e-05, |
| "loss": 0.2749, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2e-05, |
| "loss": 0.269, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2e-05, |
| "loss": 0.272, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2e-05, |
| "loss": 0.2662, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 2e-05, |
| "loss": 0.2745, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 2e-05, |
| "loss": 0.2697, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 2e-05, |
| "loss": 0.2725, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2e-05, |
| "loss": 0.2697, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2e-05, |
| "loss": 0.2783, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 2e-05, |
| "loss": 0.2699, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2e-05, |
| "loss": 0.2677, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2e-05, |
| "loss": 0.2667, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 2e-05, |
| "loss": 0.2746, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 2e-05, |
| "loss": 0.2714, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 2e-05, |
| "loss": 0.2677, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2e-05, |
| "loss": 0.2656, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2e-05, |
| "loss": 0.2667, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 2e-05, |
| "loss": 0.266, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2e-05, |
| "loss": 0.2691, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2e-05, |
| "loss": 0.2671, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 2e-05, |
| "loss": 0.2679, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 2e-05, |
| "loss": 0.263, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 2e-05, |
| "loss": 0.262, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 2e-05, |
| "loss": 0.2668, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 2e-05, |
| "loss": 0.265, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 2e-05, |
| "loss": 0.2674, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2e-05, |
| "loss": 0.2614, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2e-05, |
| "loss": 0.2612, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2e-05, |
| "loss": 0.2582, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2e-05, |
| "loss": 0.2662, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2e-05, |
| "loss": 0.2652, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2e-05, |
| "loss": 0.2618, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2e-05, |
| "loss": 0.2647, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2e-05, |
| "loss": 0.2592, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2e-05, |
| "loss": 0.2633, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2e-05, |
| "loss": 0.2651, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2e-05, |
| "loss": 0.2573, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2e-05, |
| "loss": 0.2633, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2e-05, |
| "loss": 0.2537, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2e-05, |
| "loss": 0.2625, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2e-05, |
| "loss": 0.2636, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2e-05, |
| "loss": 0.2549, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2e-05, |
| "loss": 0.2596, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2e-05, |
| "loss": 0.257, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2e-05, |
| "loss": 0.2605, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2e-05, |
| "loss": 0.2531, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2e-05, |
| "loss": 0.2547, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2e-05, |
| "loss": 0.255, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2e-05, |
| "loss": 0.2518, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2e-05, |
| "loss": 0.25, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2e-05, |
| "loss": 0.2552, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2e-05, |
| "loss": 0.2532, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2e-05, |
| "loss": 0.2533, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2e-05, |
| "loss": 0.2543, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2e-05, |
| "loss": 0.2508, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2e-05, |
| "loss": 0.2504, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2e-05, |
| "loss": 0.2477, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2e-05, |
| "loss": 0.2535, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2e-05, |
| "loss": 0.2458, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2e-05, |
| "loss": 0.2494, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2e-05, |
| "loss": 0.2466, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2e-05, |
| "loss": 0.2472, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2e-05, |
| "loss": 0.2477, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2e-05, |
| "loss": 0.2501, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2e-05, |
| "loss": 0.2534, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2e-05, |
| "loss": 0.2443, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2e-05, |
| "loss": 0.2463, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2e-05, |
| "loss": 0.2426, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2e-05, |
| "loss": 0.246, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2e-05, |
| "loss": 0.2474, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2e-05, |
| "loss": 0.247, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2e-05, |
| "loss": 0.241, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2e-05, |
| "loss": 0.2498, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2e-05, |
| "loss": 0.2443, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2e-05, |
| "loss": 0.2503, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2e-05, |
| "loss": 0.2483, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2e-05, |
| "loss": 0.2467, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2e-05, |
| "loss": 0.2473, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2e-05, |
| "loss": 0.2412, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2e-05, |
| "loss": 0.2401, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2e-05, |
| "loss": 0.2448, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2e-05, |
| "loss": 0.2373, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2e-05, |
| "loss": 0.2425, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2e-05, |
| "loss": 0.2375, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2e-05, |
| "loss": 0.2441, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2e-05, |
| "loss": 0.2383, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2e-05, |
| "loss": 0.2471, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2e-05, |
| "loss": 0.2385, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2e-05, |
| "loss": 0.2385, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2e-05, |
| "loss": 0.2411, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2e-05, |
| "loss": 0.2346, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2e-05, |
| "loss": 0.2373, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2e-05, |
| "loss": 0.2404, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2e-05, |
| "loss": 0.2403, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2e-05, |
| "loss": 0.2413, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2e-05, |
| "loss": 0.2323, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2e-05, |
| "loss": 0.2376, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2e-05, |
| "loss": 0.2362, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2e-05, |
| "loss": 0.2359, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2e-05, |
| "loss": 0.2376, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2e-05, |
| "loss": 0.2351, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2e-05, |
| "loss": 0.2341, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2e-05, |
| "loss": 0.2345, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2e-05, |
| "loss": 0.2354, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2e-05, |
| "loss": 0.2365, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2e-05, |
| "loss": 0.2335, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2e-05, |
| "loss": 0.2342, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2e-05, |
| "loss": 0.2319, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2e-05, |
| "loss": 0.2388, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2e-05, |
| "loss": 0.2362, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2e-05, |
| "loss": 0.2342, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2e-05, |
| "loss": 0.2282, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2e-05, |
| "loss": 0.2354, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2e-05, |
| "loss": 0.2337, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2e-05, |
| "loss": 0.2286, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2e-05, |
| "loss": 0.2323, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2e-05, |
| "loss": 0.2298, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2e-05, |
| "loss": 0.2302, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2e-05, |
| "loss": 0.2296, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2e-05, |
| "loss": 0.2329, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2e-05, |
| "loss": 0.2298, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2e-05, |
| "loss": 0.228, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2e-05, |
| "loss": 0.2262, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2e-05, |
| "loss": 0.2296, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2e-05, |
| "loss": 0.2284, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2e-05, |
| "loss": 0.2299, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2e-05, |
| "loss": 0.2276, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 2e-05, |
| "loss": 0.2315, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 2e-05, |
| "loss": 0.2304, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 2e-05, |
| "loss": 0.2253, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 2e-05, |
| "loss": 0.2243, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 2e-05, |
| "loss": 0.2282, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 2e-05, |
| "loss": 0.2277, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 2e-05, |
| "loss": 0.2306, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 2e-05, |
| "loss": 0.23, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 2e-05, |
| "loss": 0.2295, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2e-05, |
| "loss": 0.2266, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2e-05, |
| "loss": 0.2191, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 2e-05, |
| "loss": 0.2219, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 2e-05, |
| "loss": 0.2277, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 2e-05, |
| "loss": 0.2237, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2e-05, |
| "loss": 0.2229, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2e-05, |
| "loss": 0.2209, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2e-05, |
| "loss": 0.2229, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 2e-05, |
| "loss": 0.2208, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 2e-05, |
| "loss": 0.2245, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 2e-05, |
| "loss": 0.2245, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 2e-05, |
| "loss": 0.2219, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 2e-05, |
| "loss": 0.2218, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 2e-05, |
| "loss": 0.218, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 2e-05, |
| "loss": 0.2238, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 2e-05, |
| "loss": 0.2176, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 2e-05, |
| "loss": 0.2167, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 2e-05, |
| "loss": 0.2226, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 2e-05, |
| "loss": 0.2182, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 2e-05, |
| "loss": 0.2144, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 2e-05, |
| "loss": 0.2122, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 2e-05, |
| "loss": 0.201, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 2e-05, |
| "loss": 0.2033, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 2e-05, |
| "loss": 0.1941, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 2e-05, |
| "loss": 0.2006, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 2e-05, |
| "loss": 0.1975, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 2e-05, |
| "loss": 0.2006, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 2e-05, |
| "loss": 0.1975, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 2e-05, |
| "loss": 0.1962, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 2e-05, |
| "loss": 0.195, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 2e-05, |
| "loss": 0.1967, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 2e-05, |
| "loss": 0.1918, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 2e-05, |
| "loss": 0.1927, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 2e-05, |
| "loss": 0.1934, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 2e-05, |
| "loss": 0.1963, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 2e-05, |
| "loss": 0.1951, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 2e-05, |
| "loss": 0.1886, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 2e-05, |
| "loss": 0.1939, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 2e-05, |
| "loss": 0.1978, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 2e-05, |
| "loss": 0.1941, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 2e-05, |
| "loss": 0.1955, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 2e-05, |
| "loss": 0.194, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 2e-05, |
| "loss": 0.1971, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 2e-05, |
| "loss": 0.19, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 2e-05, |
| "loss": 0.1887, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 2e-05, |
| "loss": 0.1923, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 2e-05, |
| "loss": 0.1909, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 2e-05, |
| "loss": 0.1932, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 2e-05, |
| "loss": 0.1929, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 2e-05, |
| "loss": 0.1981, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 2e-05, |
| "loss": 0.1923, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 2e-05, |
| "loss": 0.1896, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 2e-05, |
| "loss": 0.1946, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 2e-05, |
| "loss": 0.1913, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 2e-05, |
| "loss": 0.1938, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 2e-05, |
| "loss": 0.1949, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 2e-05, |
| "loss": 0.1935, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 2e-05, |
| "loss": 0.1912, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 2e-05, |
| "loss": 0.1934, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 2e-05, |
| "loss": 0.1953, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 2e-05, |
| "loss": 0.1892, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 2e-05, |
| "loss": 0.1918, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 2e-05, |
| "loss": 0.1923, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2e-05, |
| "loss": 0.1818, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2e-05, |
| "loss": 0.1873, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2e-05, |
| "loss": 0.1898, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2e-05, |
| "loss": 0.1882, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2e-05, |
| "loss": 0.1854, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2e-05, |
| "loss": 0.189, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2e-05, |
| "loss": 0.1919, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2e-05, |
| "loss": 0.1902, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2e-05, |
| "loss": 0.1865, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2e-05, |
| "loss": 0.1858, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2e-05, |
| "loss": 0.1833, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2e-05, |
| "loss": 0.1829, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2e-05, |
| "loss": 0.1845, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2e-05, |
| "loss": 0.182, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2e-05, |
| "loss": 0.1854, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2e-05, |
| "loss": 0.1876, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2e-05, |
| "loss": 0.1868, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2e-05, |
| "loss": 0.1911, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2e-05, |
| "loss": 0.1804, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2e-05, |
| "loss": 0.1791, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2e-05, |
| "loss": 0.1889, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2e-05, |
| "loss": 0.1827, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2e-05, |
| "loss": 0.1854, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 2e-05, |
| "loss": 0.1826, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 2e-05, |
| "loss": 0.1793, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 2e-05, |
| "loss": 0.1859, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 2e-05, |
| "loss": 0.1827, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 2e-05, |
| "loss": 0.1894, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 2e-05, |
| "loss": 0.1878, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 2e-05, |
| "loss": 0.1858, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 2e-05, |
| "loss": 0.1836, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 2e-05, |
| "loss": 0.184, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 2e-05, |
| "loss": 0.1828, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 2e-05, |
| "loss": 0.1804, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 2e-05, |
| "loss": 0.1764, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 2e-05, |
| "loss": 0.181, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 2e-05, |
| "loss": 0.1881, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 2e-05, |
| "loss": 0.1848, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 2e-05, |
| "loss": 0.1799, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 2e-05, |
| "loss": 0.1846, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 2e-05, |
| "loss": 0.1799, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 2e-05, |
| "loss": 0.1805, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 2e-05, |
| "loss": 0.1825, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 2e-05, |
| "loss": 0.1804, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 2e-05, |
| "loss": 0.1805, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 2e-05, |
| "loss": 0.1799, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 2e-05, |
| "loss": 0.1797, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2e-05, |
| "loss": 0.1778, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2e-05, |
| "loss": 0.1851, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 2e-05, |
| "loss": 0.1743, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 2e-05, |
| "loss": 0.1766, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 2e-05, |
| "loss": 0.1852, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 2e-05, |
| "loss": 0.1737, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 2e-05, |
| "loss": 0.1758, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 2e-05, |
| "loss": 0.1754, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2e-05, |
| "loss": 0.1754, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2e-05, |
| "loss": 0.1757, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 2e-05, |
| "loss": 0.1755, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 2e-05, |
| "loss": 0.1755, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 2e-05, |
| "loss": 0.1754, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 2e-05, |
| "loss": 0.1751, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 2e-05, |
| "loss": 0.1726, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 2e-05, |
| "loss": 0.1727, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 2e-05, |
| "loss": 0.1743, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 2e-05, |
| "loss": 0.1769, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 2e-05, |
| "loss": 0.1833, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 2e-05, |
| "loss": 0.1737, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 2e-05, |
| "loss": 0.1762, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 2e-05, |
| "loss": 0.172, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 2e-05, |
| "loss": 0.1753, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 2e-05, |
| "loss": 0.1739, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 2e-05, |
| "loss": 0.1752, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 2e-05, |
| "loss": 0.1748, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 2e-05, |
| "loss": 0.1813, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2e-05, |
| "loss": 0.1745, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2e-05, |
| "loss": 0.1737, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 2e-05, |
| "loss": 0.1769, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 2e-05, |
| "loss": 0.176, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 2e-05, |
| "loss": 0.1721, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 2e-05, |
| "loss": 0.174, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 2e-05, |
| "loss": 0.1713, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2e-05, |
| "loss": 0.1702, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2e-05, |
| "loss": 0.1749, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2e-05, |
| "loss": 0.1737, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2e-05, |
| "loss": 0.175, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2e-05, |
| "loss": 0.1686, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2e-05, |
| "loss": 0.1714, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2e-05, |
| "loss": 0.1728, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2e-05, |
| "loss": 0.1686, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2e-05, |
| "loss": 0.1734, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2e-05, |
| "loss": 0.1727, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2e-05, |
| "loss": 0.1728, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2e-05, |
| "loss": 0.1684, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2e-05, |
| "loss": 0.17, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2e-05, |
| "loss": 0.1664, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2e-05, |
| "loss": 0.1657, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2e-05, |
| "loss": 0.1695, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2e-05, |
| "loss": 0.1731, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2e-05, |
| "loss": 0.1681, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2e-05, |
| "loss": 0.1683, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 2e-05, |
| "loss": 0.1685, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 2e-05, |
| "loss": 0.168, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 2e-05, |
| "loss": 0.1702, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 2e-05, |
| "loss": 0.1693, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 2e-05, |
| "loss": 0.1717, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 2e-05, |
| "loss": 0.1701, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 2e-05, |
| "loss": 0.1641, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2e-05, |
| "loss": 0.1703, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 2e-05, |
| "loss": 0.1676, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 2e-05, |
| "loss": 0.1709, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 2e-05, |
| "loss": 0.1755, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 2e-05, |
| "loss": 0.1705, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 2e-05, |
| "loss": 0.1643, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2e-05, |
| "loss": 0.1663, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2e-05, |
| "loss": 0.1647, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2e-05, |
| "loss": 0.1619, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2e-05, |
| "loss": 0.17, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2e-05, |
| "loss": 0.165, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 2e-05, |
| "loss": 0.1603, |
| "step": 968 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 2e-05, |
| "loss": 0.1437, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 2e-05, |
| "loss": 0.1377, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 2e-05, |
| "loss": 0.1427, |
| "step": 974 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 2e-05, |
| "loss": 0.148, |
| "step": 976 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 2e-05, |
| "loss": 0.1383, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 2e-05, |
| "loss": 0.1441, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 2e-05, |
| "loss": 0.1416, |
| "step": 982 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 2e-05, |
| "loss": 0.143, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 2e-05, |
| "loss": 0.1388, |
| "step": 986 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 2e-05, |
| "loss": 0.1423, |
| "step": 988 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 2e-05, |
| "loss": 0.1396, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 2e-05, |
| "loss": 0.1447, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 2e-05, |
| "loss": 0.1381, |
| "step": 994 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 2e-05, |
| "loss": 0.1402, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 2e-05, |
| "loss": 0.1392, |
| "step": 998 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 2e-05, |
| "loss": 0.1361, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1288, |
| "num_train_epochs": 4, |
| "save_steps": 200, |
| "total_flos": 2.2885242503168e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|