Upload folder using huggingface_hub
Browse files- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- ref/adapter_config.json +5 -5
- ref/adapter_model.safetensors +1 -1
- rng_state.pth +1 -1
- trainer_state.json +640 -640
adapter_config.json
CHANGED
|
@@ -30,13 +30,13 @@
|
|
| 30 |
"rank_pattern": {},
|
| 31 |
"revision": null,
|
| 32 |
"target_modules": [
|
|
|
|
| 33 |
"gate_proj",
|
| 34 |
-
"k_proj",
|
| 35 |
-
"up_proj",
|
| 36 |
-
"down_proj",
|
| 37 |
"q_proj",
|
| 38 |
-
"
|
| 39 |
-
"
|
|
|
|
|
|
|
| 40 |
],
|
| 41 |
"target_parameters": null,
|
| 42 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 30 |
"rank_pattern": {},
|
| 31 |
"revision": null,
|
| 32 |
"target_modules": [
|
| 33 |
+
"o_proj",
|
| 34 |
"gate_proj",
|
|
|
|
|
|
|
|
|
|
| 35 |
"q_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
+
"down_proj",
|
| 38 |
+
"k_proj",
|
| 39 |
+
"up_proj"
|
| 40 |
],
|
| 41 |
"target_parameters": null,
|
| 42 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36981856
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7680f780544af3904e7232d54a0ff4bc26477fe15049cb99453cb60e94b2f079
|
| 3 |
size 36981856
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 74194619
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed4c64e15dad7e171d2580c38ba6922a406dd5b02c87cf007ecb20e986582510
|
| 3 |
size 74194619
|
ref/adapter_config.json
CHANGED
|
@@ -30,13 +30,13 @@
|
|
| 30 |
"rank_pattern": {},
|
| 31 |
"revision": null,
|
| 32 |
"target_modules": [
|
|
|
|
| 33 |
"gate_proj",
|
| 34 |
-
"k_proj",
|
| 35 |
-
"up_proj",
|
| 36 |
-
"down_proj",
|
| 37 |
"q_proj",
|
| 38 |
-
"
|
| 39 |
-
"
|
|
|
|
|
|
|
| 40 |
],
|
| 41 |
"target_parameters": null,
|
| 42 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 30 |
"rank_pattern": {},
|
| 31 |
"revision": null,
|
| 32 |
"target_modules": [
|
| 33 |
+
"o_proj",
|
| 34 |
"gate_proj",
|
|
|
|
|
|
|
|
|
|
| 35 |
"q_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
+
"down_proj",
|
| 38 |
+
"k_proj",
|
| 39 |
+
"up_proj"
|
| 40 |
],
|
| 41 |
"target_parameters": null,
|
| 42 |
"task_type": "CAUSAL_LM",
|
ref/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73911112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d696a715e6ccdd8eb59060020be0bd578738383f6e181a3fe76f362d014a253b
|
| 3 |
size 73911112
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7
|
| 3 |
size 14645
|
trainer_state.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
{
|
| 13 |
"entropy": 1.6346618384122849,
|
| 14 |
"epoch": 0.050314465408805034,
|
| 15 |
-
"grad_norm": 3.
|
| 16 |
"learning_rate": 5e-05,
|
| 17 |
"logits/chosen": -0.7411658949470903,
|
| 18 |
"logits/rejected": -0.20507352810005436,
|
|
@@ -28,1065 +28,1065 @@
|
|
| 28 |
"step": 1
|
| 29 |
},
|
| 30 |
{
|
| 31 |
-
"entropy": 1.
|
| 32 |
"epoch": 0.10062893081761007,
|
| 33 |
-
"grad_norm": 3.
|
| 34 |
"learning_rate": 4.9166666666666665e-05,
|
| 35 |
-
"logits/chosen": -0.
|
| 36 |
-
"logits/rejected": -0.
|
| 37 |
-
"logps/chosen": -153.
|
| 38 |
-
"logps/rejected": -180.
|
| 39 |
-
"loss": 0.
|
| 40 |
"mean_token_accuracy": 0.583746887743473,
|
| 41 |
"num_tokens": 12841.0,
|
| 42 |
"rewards/accuracies": 1.0,
|
| 43 |
-
"rewards/chosen": 0.
|
| 44 |
-
"rewards/margins": 0.
|
| 45 |
-
"rewards/rejected": -0.
|
| 46 |
"step": 2
|
| 47 |
},
|
| 48 |
{
|
| 49 |
-
"entropy": 1.
|
| 50 |
"epoch": 0.1509433962264151,
|
| 51 |
-
"grad_norm": 2.
|
| 52 |
"learning_rate": 4.8333333333333334e-05,
|
| 53 |
-
"logits/chosen": -1.
|
| 54 |
-
"logits/rejected": -0.
|
| 55 |
-
"logps/chosen": -146.
|
| 56 |
-
"logps/rejected": -187.
|
| 57 |
-
"loss": 0.
|
| 58 |
"mean_token_accuracy": 0.6039908826351166,
|
| 59 |
"num_tokens": 18816.0,
|
| 60 |
"rewards/accuracies": 1.0,
|
| 61 |
-
"rewards/chosen": 0.
|
| 62 |
-
"rewards/margins": 0.
|
| 63 |
-
"rewards/rejected": -0.
|
| 64 |
"step": 3
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"entropy": 1.
|
| 68 |
"epoch": 0.20125786163522014,
|
| 69 |
-
"grad_norm": 2.
|
| 70 |
"learning_rate": 4.75e-05,
|
| 71 |
-
"logits/chosen": -0.
|
| 72 |
-
"logits/rejected": -0.
|
| 73 |
-
"logps/chosen": -118.
|
| 74 |
-
"logps/rejected": -
|
| 75 |
-
"loss": 0.
|
| 76 |
"mean_token_accuracy": 0.5912635922431946,
|
| 77 |
"num_tokens": 25522.0,
|
| 78 |
"rewards/accuracies": 1.0,
|
| 79 |
-
"rewards/chosen": 0.
|
| 80 |
-
"rewards/margins": 0.
|
| 81 |
-
"rewards/rejected": -0.
|
| 82 |
"step": 4
|
| 83 |
},
|
| 84 |
{
|
| 85 |
-
"entropy": 1.
|
| 86 |
"epoch": 0.25157232704402516,
|
| 87 |
-
"grad_norm": 1.
|
| 88 |
"learning_rate": 4.666666666666667e-05,
|
| 89 |
-
"logits/chosen": -1.
|
| 90 |
-
"logits/rejected": -0.
|
| 91 |
-
"logps/chosen": -118.
|
| 92 |
-
"logps/rejected": -193.
|
| 93 |
-
"loss": 0.
|
| 94 |
"mean_token_accuracy": 0.5765212289988995,
|
| 95 |
"num_tokens": 32576.0,
|
| 96 |
"rewards/accuracies": 1.0,
|
| 97 |
-
"rewards/chosen": 0.
|
| 98 |
-
"rewards/margins": 1.
|
| 99 |
-
"rewards/rejected": -1.
|
| 100 |
"step": 5
|
| 101 |
},
|
| 102 |
{
|
| 103 |
-
"entropy": 1.
|
| 104 |
"epoch": 0.3018867924528302,
|
| 105 |
-
"grad_norm": 1.
|
| 106 |
"learning_rate": 4.5833333333333334e-05,
|
| 107 |
-
"logits/chosen": -1.
|
| 108 |
-
"logits/rejected": -0.
|
| 109 |
-
"logps/chosen": -120.
|
| 110 |
-
"logps/rejected": -203.
|
| 111 |
-
"loss": 0.
|
| 112 |
"mean_token_accuracy": 0.5985999256372452,
|
| 113 |
"num_tokens": 39703.0,
|
| 114 |
"rewards/accuracies": 1.0,
|
| 115 |
-
"rewards/chosen": 0.
|
| 116 |
-
"rewards/margins": 1.
|
| 117 |
-
"rewards/rejected": -1.
|
| 118 |
"step": 6
|
| 119 |
},
|
| 120 |
{
|
| 121 |
-
"entropy": 1.
|
| 122 |
"epoch": 0.3522012578616352,
|
| 123 |
-
"grad_norm": 1.
|
| 124 |
"learning_rate": 4.5e-05,
|
| 125 |
-
"logits/chosen": -1.
|
| 126 |
-
"logits/rejected": -0.
|
| 127 |
-
"logps/chosen": -108.
|
| 128 |
-
"logps/rejected": -
|
| 129 |
-
"loss": 0.
|
| 130 |
"mean_token_accuracy": 0.6349928826093674,
|
| 131 |
"num_tokens": 47481.0,
|
| 132 |
"rewards/accuracies": 1.0,
|
| 133 |
-
"rewards/chosen": 0.
|
| 134 |
-
"rewards/margins": 1.
|
| 135 |
-
"rewards/rejected": -1.
|
| 136 |
"step": 7
|
| 137 |
},
|
| 138 |
{
|
| 139 |
-
"entropy": 1.
|
| 140 |
"epoch": 0.4025157232704403,
|
| 141 |
-
"grad_norm": 1.
|
| 142 |
"learning_rate": 4.4166666666666665e-05,
|
| 143 |
-
"logits/chosen": -1.
|
| 144 |
-
"logits/rejected": -
|
| 145 |
-
"logps/chosen": -137.
|
| 146 |
-
"logps/rejected": -
|
| 147 |
-
"loss": 0.
|
| 148 |
-
"mean_token_accuracy": 0.
|
| 149 |
"num_tokens": 54551.0,
|
| 150 |
"rewards/accuracies": 1.0,
|
| 151 |
-
"rewards/chosen": 0.
|
| 152 |
-
"rewards/margins": 2.
|
| 153 |
-
"rewards/rejected": -1.
|
| 154 |
"step": 8
|
| 155 |
},
|
| 156 |
{
|
| 157 |
-
"entropy": 1.
|
| 158 |
"epoch": 0.4528301886792453,
|
| 159 |
-
"grad_norm": 0.
|
| 160 |
"learning_rate": 4.3333333333333334e-05,
|
| 161 |
-
"logits/chosen": -1.
|
| 162 |
-
"logits/rejected": -0.
|
| 163 |
-
"logps/chosen": -133.
|
| 164 |
-
"logps/rejected": -
|
| 165 |
-
"loss": 0.
|
| 166 |
-
"mean_token_accuracy": 0.
|
| 167 |
"num_tokens": 61082.0,
|
| 168 |
"rewards/accuracies": 1.0,
|
| 169 |
-
"rewards/chosen": 0.
|
| 170 |
-
"rewards/margins": 3.
|
| 171 |
-
"rewards/rejected": -2.
|
| 172 |
"step": 9
|
| 173 |
},
|
| 174 |
{
|
| 175 |
-
"entropy": 1.
|
| 176 |
"epoch": 0.5031446540880503,
|
| 177 |
-
"grad_norm": 0.
|
| 178 |
"learning_rate": 4.25e-05,
|
| 179 |
-
"logits/chosen": -1.
|
| 180 |
-
"logits/rejected": -1.
|
| 181 |
-
"logps/chosen": -114.
|
| 182 |
-
"logps/rejected": -
|
| 183 |
-
"loss": 0.
|
| 184 |
"mean_token_accuracy": 0.5977272689342499,
|
| 185 |
"num_tokens": 68063.0,
|
| 186 |
"rewards/accuracies": 1.0,
|
| 187 |
-
"rewards/chosen": 0.
|
| 188 |
-
"rewards/margins":
|
| 189 |
-
"rewards/rejected": -3.
|
| 190 |
"step": 10
|
| 191 |
},
|
| 192 |
{
|
| 193 |
-
"entropy": 1.
|
| 194 |
"epoch": 0.5534591194968553,
|
| 195 |
-
"grad_norm": 0.
|
| 196 |
"learning_rate": 4.166666666666667e-05,
|
| 197 |
-
"logits/chosen": -1.
|
| 198 |
-
"logits/rejected": -1.
|
| 199 |
-
"logps/chosen": -
|
| 200 |
-
"logps/rejected": -
|
| 201 |
-
"loss": 0.
|
| 202 |
-
"mean_token_accuracy": 0.
|
| 203 |
"num_tokens": 73558.0,
|
| 204 |
"rewards/accuracies": 1.0,
|
| 205 |
-
"rewards/chosen": 1.
|
| 206 |
-
"rewards/margins": 4.
|
| 207 |
-
"rewards/rejected": -3.
|
| 208 |
"step": 11
|
| 209 |
},
|
| 210 |
{
|
| 211 |
-
"entropy": 1.
|
| 212 |
"epoch": 0.6037735849056604,
|
| 213 |
-
"grad_norm": 0.
|
| 214 |
"learning_rate": 4.0833333333333334e-05,
|
| 215 |
-
"logits/chosen": -1.
|
| 216 |
-
"logits/rejected": -1.
|
| 217 |
-
"logps/chosen": -133.
|
| 218 |
-
"logps/rejected": -
|
| 219 |
-
"loss": 0.
|
| 220 |
-
"mean_token_accuracy": 0.
|
| 221 |
"num_tokens": 79421.0,
|
| 222 |
"rewards/accuracies": 1.0,
|
| 223 |
-
"rewards/chosen": 1.
|
| 224 |
-
"rewards/margins":
|
| 225 |
-
"rewards/rejected": -3.
|
| 226 |
"step": 12
|
| 227 |
},
|
| 228 |
{
|
| 229 |
-
"entropy": 1.
|
| 230 |
"epoch": 0.6540880503144654,
|
| 231 |
-
"grad_norm": 0.
|
| 232 |
"learning_rate": 4e-05,
|
| 233 |
-
"logits/chosen": -1.
|
| 234 |
-
"logits/rejected": -1.
|
| 235 |
-
"logps/chosen": -130.
|
| 236 |
-
"logps/rejected": -
|
| 237 |
-
"loss": 0.
|
| 238 |
-
"mean_token_accuracy": 0.
|
| 239 |
"num_tokens": 86682.0,
|
| 240 |
"rewards/accuracies": 1.0,
|
| 241 |
-
"rewards/chosen": 0.
|
| 242 |
-
"rewards/margins": 5.
|
| 243 |
-
"rewards/rejected": -4.
|
| 244 |
"step": 13
|
| 245 |
},
|
| 246 |
{
|
| 247 |
-
"entropy": 1.
|
| 248 |
"epoch": 0.7044025157232704,
|
| 249 |
-
"grad_norm": 0.
|
| 250 |
"learning_rate": 3.9166666666666665e-05,
|
| 251 |
-
"logits/chosen": -1.
|
| 252 |
-
"logits/rejected": -1.
|
| 253 |
-
"logps/chosen": -134.
|
| 254 |
-
"logps/rejected": -
|
| 255 |
-
"loss": 0.
|
| 256 |
-
"mean_token_accuracy": 0.
|
| 257 |
"num_tokens": 93035.0,
|
| 258 |
"rewards/accuracies": 1.0,
|
| 259 |
-
"rewards/chosen": 0.
|
| 260 |
-
"rewards/margins":
|
| 261 |
-
"rewards/rejected": -4.
|
| 262 |
"step": 14
|
| 263 |
},
|
| 264 |
{
|
| 265 |
-
"entropy": 1.
|
| 266 |
"epoch": 0.7547169811320755,
|
| 267 |
-
"grad_norm": 0.
|
| 268 |
"learning_rate": 3.8333333333333334e-05,
|
| 269 |
-
"logits/chosen": -1.
|
| 270 |
-
"logits/rejected": -1.
|
| 271 |
-
"logps/chosen": -132.
|
| 272 |
-
"logps/rejected": -
|
| 273 |
-
"loss": 0.
|
| 274 |
-
"mean_token_accuracy": 0.
|
| 275 |
"num_tokens": 98572.0,
|
| 276 |
"rewards/accuracies": 1.0,
|
| 277 |
-
"rewards/chosen": 1.
|
| 278 |
-
"rewards/margins": 7.
|
| 279 |
-
"rewards/rejected": -
|
| 280 |
"step": 15
|
| 281 |
},
|
| 282 |
{
|
| 283 |
-
"entropy": 1.
|
| 284 |
"epoch": 0.8050314465408805,
|
| 285 |
-
"grad_norm": 0.
|
| 286 |
"learning_rate": 3.7500000000000003e-05,
|
| 287 |
-
"logits/chosen": -1.
|
| 288 |
-
"logits/rejected": -1.
|
| 289 |
-
"logps/chosen": -139.
|
| 290 |
-
"logps/rejected": -
|
| 291 |
-
"loss": 0.
|
| 292 |
-
"mean_token_accuracy": 0.
|
| 293 |
"num_tokens": 106014.0,
|
| 294 |
"rewards/accuracies": 1.0,
|
| 295 |
-
"rewards/chosen": 0.
|
| 296 |
-
"rewards/margins": 6.
|
| 297 |
-
"rewards/rejected": -5.
|
| 298 |
"step": 16
|
| 299 |
},
|
| 300 |
{
|
| 301 |
-
"entropy": 1.
|
| 302 |
"epoch": 0.8553459119496856,
|
| 303 |
-
"grad_norm": 0.
|
| 304 |
"learning_rate": 3.6666666666666666e-05,
|
| 305 |
-
"logits/chosen": -1.
|
| 306 |
-
"logits/rejected": -1.
|
| 307 |
-
"logps/chosen": -127.
|
| 308 |
-
"logps/rejected": -
|
| 309 |
-
"loss": 0.
|
| 310 |
"mean_token_accuracy": 0.6195648014545441,
|
| 311 |
"num_tokens": 112297.0,
|
| 312 |
"rewards/accuracies": 1.0,
|
| 313 |
-
"rewards/chosen": 0.
|
| 314 |
-
"rewards/margins": 7.
|
| 315 |
-
"rewards/rejected": -6.
|
| 316 |
"step": 17
|
| 317 |
},
|
| 318 |
{
|
| 319 |
-
"entropy": 1.
|
| 320 |
"epoch": 0.9056603773584906,
|
| 321 |
-
"grad_norm": 0.
|
| 322 |
"learning_rate": 3.5833333333333335e-05,
|
| 323 |
-
"logits/chosen": -2.
|
| 324 |
-
"logits/rejected": -1.
|
| 325 |
-
"logps/chosen": -128.
|
| 326 |
-
"logps/rejected": -
|
| 327 |
-
"loss": 0.
|
| 328 |
-
"mean_token_accuracy": 0.
|
| 329 |
"num_tokens": 118259.0,
|
| 330 |
"rewards/accuracies": 1.0,
|
| 331 |
-
"rewards/chosen": 0.
|
| 332 |
-
"rewards/margins": 7.
|
| 333 |
-
"rewards/rejected": -6.
|
| 334 |
"step": 18
|
| 335 |
},
|
| 336 |
{
|
| 337 |
-
"entropy": 1.
|
| 338 |
"epoch": 0.9559748427672956,
|
| 339 |
-
"grad_norm": 0.
|
| 340 |
"learning_rate": 3.5e-05,
|
| 341 |
-
"logits/chosen": -2.
|
| 342 |
-
"logits/rejected": -1.
|
| 343 |
-
"logps/chosen": -
|
| 344 |
-
"logps/rejected": -
|
| 345 |
-
"loss": 0.
|
| 346 |
-
"mean_token_accuracy": 0.
|
| 347 |
"num_tokens": 123618.0,
|
| 348 |
"rewards/accuracies": 1.0,
|
| 349 |
-
"rewards/chosen": 1.
|
| 350 |
-
"rewards/margins": 7.
|
| 351 |
-
"rewards/rejected": -6.
|
| 352 |
"step": 19
|
| 353 |
},
|
| 354 |
{
|
| 355 |
-
"entropy": 1.
|
| 356 |
"epoch": 1.0,
|
| 357 |
-
"grad_norm": 0.
|
| 358 |
"learning_rate": 3.4166666666666666e-05,
|
| 359 |
-
"logits/chosen": -1.
|
| 360 |
-
"logits/rejected": -1.
|
| 361 |
-
"logps/chosen": -140.
|
| 362 |
-
"logps/rejected": -
|
| 363 |
-
"loss": 0.
|
| 364 |
-
"mean_token_accuracy": 0.
|
| 365 |
"num_tokens": 130000.0,
|
| 366 |
"rewards/accuracies": 1.0,
|
| 367 |
-
"rewards/chosen": -0.
|
| 368 |
-
"rewards/margins":
|
| 369 |
-
"rewards/rejected": -7.
|
| 370 |
"step": 20
|
| 371 |
},
|
| 372 |
{
|
| 373 |
-
"entropy": 1.
|
| 374 |
"epoch": 1.050314465408805,
|
| 375 |
-
"grad_norm": 0.
|
| 376 |
"learning_rate": 3.3333333333333335e-05,
|
| 377 |
-
"logits/chosen": -2.
|
| 378 |
-
"logits/rejected": -1.
|
| 379 |
-
"logps/chosen": -132.
|
| 380 |
-
"logps/rejected": -
|
| 381 |
-
"loss": 0.
|
| 382 |
-
"mean_token_accuracy": 0.
|
| 383 |
"num_tokens": 135799.0,
|
| 384 |
"rewards/accuracies": 1.0,
|
| 385 |
-
"rewards/chosen": 1.
|
| 386 |
-
"rewards/margins":
|
| 387 |
-
"rewards/rejected": -
|
| 388 |
"step": 21
|
| 389 |
},
|
| 390 |
{
|
| 391 |
-
"entropy": 1.
|
| 392 |
"epoch": 1.10062893081761,
|
| 393 |
-
"grad_norm": 0.
|
| 394 |
"learning_rate": 3.2500000000000004e-05,
|
| 395 |
-
"logits/chosen": -2.
|
| 396 |
-
"logits/rejected": -1.
|
| 397 |
-
"logps/chosen": -
|
| 398 |
-
"logps/rejected": -
|
| 399 |
-
"loss": 0.
|
| 400 |
-
"mean_token_accuracy": 0.
|
| 401 |
"num_tokens": 142686.0,
|
| 402 |
"rewards/accuracies": 1.0,
|
| 403 |
-
"rewards/chosen": 0.
|
| 404 |
-
"rewards/margins": 8.
|
| 405 |
-
"rewards/rejected": -8.
|
| 406 |
"step": 22
|
| 407 |
},
|
| 408 |
{
|
| 409 |
-
"entropy": 1.
|
| 410 |
"epoch": 1.150943396226415,
|
| 411 |
-
"grad_norm": 0.
|
| 412 |
"learning_rate": 3.1666666666666666e-05,
|
| 413 |
-
"logits/chosen": -2.
|
| 414 |
-
"logits/rejected": -1.
|
| 415 |
-
"logps/chosen": -121.
|
| 416 |
-
"logps/rejected": -
|
| 417 |
-
"loss": 0.
|
| 418 |
-
"mean_token_accuracy": 0.
|
| 419 |
"num_tokens": 148888.0,
|
| 420 |
"rewards/accuracies": 1.0,
|
| 421 |
-
"rewards/chosen": 0.
|
| 422 |
-
"rewards/margins": 8.
|
| 423 |
-
"rewards/rejected": -7.
|
| 424 |
"step": 23
|
| 425 |
},
|
| 426 |
{
|
| 427 |
-
"entropy": 1.
|
| 428 |
"epoch": 1.20125786163522,
|
| 429 |
-
"grad_norm": 0.
|
| 430 |
"learning_rate": 3.0833333333333335e-05,
|
| 431 |
-
"logits/chosen": -2.
|
| 432 |
-
"logits/rejected": -1.
|
| 433 |
-
"logps/chosen": -
|
| 434 |
-
"logps/rejected": -
|
| 435 |
-
"loss": 0.
|
| 436 |
-
"mean_token_accuracy": 0.
|
| 437 |
"num_tokens": 155066.0,
|
| 438 |
"rewards/accuracies": 1.0,
|
| 439 |
-
"rewards/chosen": 0.
|
| 440 |
-
"rewards/margins": 9.
|
| 441 |
-
"rewards/rejected": -8.
|
| 442 |
"step": 24
|
| 443 |
},
|
| 444 |
{
|
| 445 |
-
"entropy": 1.
|
| 446 |
"epoch": 1.251572327044025,
|
| 447 |
-
"grad_norm": 0.
|
| 448 |
"learning_rate": 3e-05,
|
| 449 |
-
"logits/chosen": -2.
|
| 450 |
-
"logits/rejected": -1.
|
| 451 |
-
"logps/chosen": -
|
| 452 |
-
"logps/rejected": -
|
| 453 |
-
"loss": 0.
|
| 454 |
-
"mean_token_accuracy": 0.
|
| 455 |
"num_tokens": 161896.0,
|
| 456 |
"rewards/accuracies": 1.0,
|
| 457 |
-
"rewards/chosen": 0.
|
| 458 |
-
"rewards/margins": 8.
|
| 459 |
-
"rewards/rejected": -
|
| 460 |
"step": 25
|
| 461 |
},
|
| 462 |
{
|
| 463 |
-
"entropy": 1.
|
| 464 |
"epoch": 1.3018867924528301,
|
| 465 |
-
"grad_norm": 0.
|
| 466 |
"learning_rate": 2.916666666666667e-05,
|
| 467 |
-
"logits/chosen": -2.
|
| 468 |
-
"logits/rejected": -1.
|
| 469 |
-
"logps/chosen": -
|
| 470 |
-
"logps/rejected": -
|
| 471 |
-
"loss": 0.
|
| 472 |
-
"mean_token_accuracy": 0.
|
| 473 |
"num_tokens": 168755.0,
|
| 474 |
"rewards/accuracies": 1.0,
|
| 475 |
-
"rewards/chosen": 0.
|
| 476 |
-
"rewards/margins":
|
| 477 |
-
"rewards/rejected": -7.
|
| 478 |
"step": 26
|
| 479 |
},
|
| 480 |
{
|
| 481 |
-
"entropy": 1.
|
| 482 |
"epoch": 1.3522012578616351,
|
| 483 |
-
"grad_norm": 0.
|
| 484 |
"learning_rate": 2.8333333333333335e-05,
|
| 485 |
-
"logits/chosen": -2.
|
| 486 |
-
"logits/rejected": -1.
|
| 487 |
-
"logps/chosen": -
|
| 488 |
-
"logps/rejected": -
|
| 489 |
-
"loss": 0.
|
| 490 |
-
"mean_token_accuracy": 0.
|
| 491 |
"num_tokens": 175381.0,
|
| 492 |
"rewards/accuracies": 1.0,
|
| 493 |
-
"rewards/chosen":
|
| 494 |
-
"rewards/margins": 9.
|
| 495 |
-
"rewards/rejected": -8.
|
| 496 |
"step": 27
|
| 497 |
},
|
| 498 |
{
|
| 499 |
-
"entropy": 1.
|
| 500 |
"epoch": 1.4025157232704402,
|
| 501 |
-
"grad_norm": 0.
|
| 502 |
"learning_rate": 2.7500000000000004e-05,
|
| 503 |
-
"logits/chosen": -2.
|
| 504 |
-
"logits/rejected": -1.
|
| 505 |
-
"logps/chosen": -
|
| 506 |
-
"logps/rejected": -
|
| 507 |
-
"loss": 0.
|
| 508 |
-
"mean_token_accuracy": 0.
|
| 509 |
"num_tokens": 181771.0,
|
| 510 |
"rewards/accuracies": 1.0,
|
| 511 |
-
"rewards/chosen": 0.
|
| 512 |
-
"rewards/margins": 9.
|
| 513 |
-
"rewards/rejected": -8.
|
| 514 |
"step": 28
|
| 515 |
},
|
| 516 |
{
|
| 517 |
-
"entropy": 1.
|
| 518 |
"epoch": 1.4528301886792452,
|
| 519 |
-
"grad_norm": 0.
|
| 520 |
"learning_rate": 2.6666666666666667e-05,
|
| 521 |
-
"logits/chosen": -2.
|
| 522 |
-
"logits/rejected": -
|
| 523 |
-
"logps/chosen": -
|
| 524 |
-
"logps/rejected": -
|
| 525 |
-
"loss": 0.
|
| 526 |
-
"mean_token_accuracy": 0.
|
| 527 |
"num_tokens": 188644.0,
|
| 528 |
"rewards/accuracies": 1.0,
|
| 529 |
-
"rewards/chosen": -0.
|
| 530 |
-
"rewards/margins": 8.
|
| 531 |
-
"rewards/rejected": -8.
|
| 532 |
"step": 29
|
| 533 |
},
|
| 534 |
{
|
| 535 |
-
"entropy": 1.
|
| 536 |
"epoch": 1.5031446540880502,
|
| 537 |
-
"grad_norm": 0.
|
| 538 |
"learning_rate": 2.5833333333333336e-05,
|
| 539 |
-
"logits/chosen": -2.
|
| 540 |
-
"logits/rejected": -1.
|
| 541 |
-
"logps/chosen": -
|
| 542 |
-
"logps/rejected": -
|
| 543 |
-
"loss": 0.
|
| 544 |
-
"mean_token_accuracy": 0.
|
| 545 |
"num_tokens": 196391.0,
|
| 546 |
"rewards/accuracies": 1.0,
|
| 547 |
-
"rewards/chosen": 0.
|
| 548 |
-
"rewards/margins":
|
| 549 |
-
"rewards/rejected": -
|
| 550 |
"step": 30
|
| 551 |
},
|
| 552 |
{
|
| 553 |
-
"entropy": 1.
|
| 554 |
"epoch": 1.5534591194968552,
|
| 555 |
-
"grad_norm": 0.
|
| 556 |
"learning_rate": 2.5e-05,
|
| 557 |
-
"logits/chosen": -2.
|
| 558 |
-
"logits/rejected": -1.
|
| 559 |
-
"logps/chosen": -
|
| 560 |
-
"logps/rejected": -
|
| 561 |
-
"loss": 0.
|
| 562 |
-
"mean_token_accuracy": 0.
|
| 563 |
"num_tokens": 203518.0,
|
| 564 |
"rewards/accuracies": 1.0,
|
| 565 |
-
"rewards/chosen": -0.
|
| 566 |
-
"rewards/margins": 8.
|
| 567 |
-
"rewards/rejected": -
|
| 568 |
"step": 31
|
| 569 |
},
|
| 570 |
{
|
| 571 |
-
"entropy": 1.
|
| 572 |
"epoch": 1.6037735849056602,
|
| 573 |
-
"grad_norm": 0.
|
| 574 |
"learning_rate": 2.4166666666666667e-05,
|
| 575 |
-
"logits/chosen": -2.
|
| 576 |
-
"logits/rejected": -1.
|
| 577 |
-
"logps/chosen": -143.
|
| 578 |
-
"logps/rejected": -
|
| 579 |
-
"loss": 0.
|
| 580 |
-
"mean_token_accuracy": 0.
|
| 581 |
"num_tokens": 209850.0,
|
| 582 |
"rewards/accuracies": 1.0,
|
| 583 |
-
"rewards/chosen": 0.
|
| 584 |
-
"rewards/margins": 10.
|
| 585 |
-
"rewards/rejected": -10.
|
| 586 |
"step": 32
|
| 587 |
},
|
| 588 |
{
|
| 589 |
-
"entropy": 1.
|
| 590 |
"epoch": 1.6540880503144653,
|
| 591 |
-
"grad_norm": 0.
|
| 592 |
"learning_rate": 2.3333333333333336e-05,
|
| 593 |
-
"logits/chosen": -2.
|
| 594 |
-
"logits/rejected": -1.
|
| 595 |
-
"logps/chosen": -125.
|
| 596 |
-
"logps/rejected": -
|
| 597 |
-
"loss": 0.
|
| 598 |
-
"mean_token_accuracy": 0.
|
| 599 |
"num_tokens": 215637.0,
|
| 600 |
"rewards/accuracies": 1.0,
|
| 601 |
-
"rewards/chosen": 0.
|
| 602 |
-
"rewards/margins": 9.
|
| 603 |
-
"rewards/rejected": -8.
|
| 604 |
"step": 33
|
| 605 |
},
|
| 606 |
{
|
| 607 |
-
"entropy": 1.
|
| 608 |
"epoch": 1.7044025157232703,
|
| 609 |
-
"grad_norm": 0.
|
| 610 |
"learning_rate": 2.25e-05,
|
| 611 |
-
"logits/chosen": -2.
|
| 612 |
-
"logits/rejected": -2.
|
| 613 |
-
"logps/chosen": -125.
|
| 614 |
-
"logps/rejected": -
|
| 615 |
-
"loss": 0.
|
| 616 |
-
"mean_token_accuracy": 0.
|
| 617 |
"num_tokens": 221051.0,
|
| 618 |
"rewards/accuracies": 1.0,
|
| 619 |
-
"rewards/chosen": 1.
|
| 620 |
-
"rewards/margins":
|
| 621 |
-
"rewards/rejected": -8.
|
| 622 |
"step": 34
|
| 623 |
},
|
| 624 |
{
|
| 625 |
-
"entropy": 1.
|
| 626 |
"epoch": 1.7547169811320755,
|
| 627 |
-
"grad_norm": 0.
|
| 628 |
"learning_rate": 2.1666666666666667e-05,
|
| 629 |
-
"logits/chosen": -2.
|
| 630 |
-
"logits/rejected": -2.
|
| 631 |
-
"logps/chosen": -131.
|
| 632 |
-
"logps/rejected": -
|
| 633 |
-
"loss": 0.
|
| 634 |
-
"mean_token_accuracy": 0.
|
| 635 |
"num_tokens": 226426.0,
|
| 636 |
"rewards/accuracies": 1.0,
|
| 637 |
-
"rewards/chosen": 0.
|
| 638 |
-
"rewards/margins": 10.
|
| 639 |
-
"rewards/rejected": -9.
|
| 640 |
"step": 35
|
| 641 |
},
|
| 642 |
{
|
| 643 |
-
"entropy": 1.
|
| 644 |
"epoch": 1.8050314465408805,
|
| 645 |
-
"grad_norm": 0.
|
| 646 |
"learning_rate": 2.0833333333333336e-05,
|
| 647 |
-
"logits/chosen": -2.
|
| 648 |
-
"logits/rejected": -1.
|
| 649 |
-
"logps/chosen": -
|
| 650 |
-
"logps/rejected": -
|
| 651 |
-
"loss": 0.
|
| 652 |
-
"mean_token_accuracy": 0.
|
| 653 |
"num_tokens": 233419.0,
|
| 654 |
"rewards/accuracies": 1.0,
|
| 655 |
-
"rewards/chosen": 0.
|
| 656 |
-
"rewards/margins": 9.
|
| 657 |
-
"rewards/rejected": -9.
|
| 658 |
"step": 36
|
| 659 |
},
|
| 660 |
{
|
| 661 |
-
"entropy": 1.
|
| 662 |
"epoch": 1.8553459119496856,
|
| 663 |
-
"grad_norm": 0.
|
| 664 |
"learning_rate": 2e-05,
|
| 665 |
-
"logits/chosen": -2.
|
| 666 |
-
"logits/rejected": -1.
|
| 667 |
-
"logps/chosen": -144.
|
| 668 |
-
"logps/rejected": -
|
| 669 |
-
"loss": 0.
|
| 670 |
-
"mean_token_accuracy": 0.
|
| 671 |
"num_tokens": 240673.0,
|
| 672 |
"rewards/accuracies": 1.0,
|
| 673 |
-
"rewards/chosen": -0.
|
| 674 |
-
"rewards/margins": 10.
|
| 675 |
-
"rewards/rejected": -10.
|
| 676 |
"step": 37
|
| 677 |
},
|
| 678 |
{
|
| 679 |
-
"entropy": 1.
|
| 680 |
"epoch": 1.9056603773584906,
|
| 681 |
-
"grad_norm": 0.
|
| 682 |
"learning_rate": 1.9166666666666667e-05,
|
| 683 |
-
"logits/chosen": -2.
|
| 684 |
-
"logits/rejected": -2.
|
| 685 |
-
"logps/chosen": -
|
| 686 |
-
"logps/rejected": -
|
| 687 |
-
"loss": 0.
|
| 688 |
-
"mean_token_accuracy": 0.
|
| 689 |
"num_tokens": 245974.0,
|
| 690 |
"rewards/accuracies": 1.0,
|
| 691 |
-
"rewards/chosen": 0.
|
| 692 |
-
"rewards/margins": 10.
|
| 693 |
-
"rewards/rejected": -
|
| 694 |
"step": 38
|
| 695 |
},
|
| 696 |
{
|
| 697 |
-
"entropy": 1.
|
| 698 |
"epoch": 1.9559748427672956,
|
| 699 |
-
"grad_norm": 0.
|
| 700 |
"learning_rate": 1.8333333333333333e-05,
|
| 701 |
-
"logits/chosen": -2.
|
| 702 |
-
"logits/rejected": -1.
|
| 703 |
-
"logps/chosen": -
|
| 704 |
-
"logps/rejected": -
|
| 705 |
-
"loss": 0.
|
| 706 |
-
"mean_token_accuracy": 0.
|
| 707 |
"num_tokens": 253184.0,
|
| 708 |
"rewards/accuracies": 1.0,
|
| 709 |
-
"rewards/chosen": -0.
|
| 710 |
-
"rewards/margins": 9.
|
| 711 |
-
"rewards/rejected": -9.
|
| 712 |
"step": 39
|
| 713 |
},
|
| 714 |
{
|
| 715 |
-
"entropy": 1.
|
| 716 |
"epoch": 2.0,
|
| 717 |
-
"grad_norm": 0.
|
| 718 |
"learning_rate": 1.75e-05,
|
| 719 |
-
"logits/chosen": -2.
|
| 720 |
-
"logits/rejected": -1.
|
| 721 |
-
"logps/chosen": -
|
| 722 |
-
"logps/rejected": -
|
| 723 |
-
"loss": 0.
|
| 724 |
"mean_token_accuracy": 0.5644707764898028,
|
| 725 |
"num_tokens": 260000.0,
|
| 726 |
"rewards/accuracies": 1.0,
|
| 727 |
-
"rewards/chosen": -0.
|
| 728 |
-
"rewards/margins":
|
| 729 |
-
"rewards/rejected": -9.
|
| 730 |
"step": 40
|
| 731 |
},
|
| 732 |
{
|
| 733 |
-
"entropy": 1.
|
| 734 |
"epoch": 2.050314465408805,
|
| 735 |
-
"grad_norm": 0.
|
| 736 |
"learning_rate": 1.6666666666666667e-05,
|
| 737 |
-
"logits/chosen": -2.
|
| 738 |
-
"logits/rejected": -1.
|
| 739 |
-
"logps/chosen": -
|
| 740 |
-
"logps/rejected": -
|
| 741 |
-
"loss": 0.
|
| 742 |
"mean_token_accuracy": 0.6062684953212738,
|
| 743 |
"num_tokens": 266060.0,
|
| 744 |
"rewards/accuracies": 1.0,
|
| 745 |
-
"rewards/chosen": 0.
|
| 746 |
-
"rewards/margins": 9.
|
| 747 |
-
"rewards/rejected": -9.
|
| 748 |
"step": 41
|
| 749 |
},
|
| 750 |
{
|
| 751 |
-
"entropy": 1.
|
| 752 |
"epoch": 2.10062893081761,
|
| 753 |
-
"grad_norm": 0.
|
| 754 |
"learning_rate": 1.5833333333333333e-05,
|
| 755 |
-
"logits/chosen": -2.
|
| 756 |
-
"logits/rejected": -2.
|
| 757 |
-
"logps/chosen": -
|
| 758 |
-
"logps/rejected": -
|
| 759 |
-
"loss": 0.
|
| 760 |
-
"mean_token_accuracy": 0.
|
| 761 |
"num_tokens": 272769.0,
|
| 762 |
"rewards/accuracies": 1.0,
|
| 763 |
-
"rewards/chosen": 0.
|
| 764 |
-
"rewards/margins": 9.
|
| 765 |
-
"rewards/rejected": -9.
|
| 766 |
"step": 42
|
| 767 |
},
|
| 768 |
{
|
| 769 |
-
"entropy": 1.
|
| 770 |
"epoch": 2.150943396226415,
|
| 771 |
-
"grad_norm": 0.
|
| 772 |
"learning_rate": 1.5e-05,
|
| 773 |
-
"logits/chosen": -2.
|
| 774 |
-
"logits/rejected": -1.
|
| 775 |
-
"logps/chosen": -140.
|
| 776 |
-
"logps/rejected": -
|
| 777 |
-
"loss": 0.
|
| 778 |
-
"mean_token_accuracy": 0.
|
| 779 |
"num_tokens": 279603.0,
|
| 780 |
"rewards/accuracies": 1.0,
|
| 781 |
-
"rewards/chosen": -0.
|
| 782 |
-
"rewards/margins": 10.
|
| 783 |
-
"rewards/rejected": -10.
|
| 784 |
"step": 43
|
| 785 |
},
|
| 786 |
{
|
| 787 |
-
"entropy": 1.
|
| 788 |
"epoch": 2.20125786163522,
|
| 789 |
-
"grad_norm": 0.
|
| 790 |
"learning_rate": 1.4166666666666668e-05,
|
| 791 |
-
"logits/chosen": -2.
|
| 792 |
-
"logits/rejected": -1.
|
| 793 |
-
"logps/chosen": -
|
| 794 |
-
"logps/rejected": -
|
| 795 |
-
"loss": 0.
|
| 796 |
-
"mean_token_accuracy": 0.
|
| 797 |
"num_tokens": 286436.0,
|
| 798 |
"rewards/accuracies": 1.0,
|
| 799 |
-
"rewards/chosen": 0.
|
| 800 |
-
"rewards/margins": 9.
|
| 801 |
-
"rewards/rejected": -9.
|
| 802 |
"step": 44
|
| 803 |
},
|
| 804 |
{
|
| 805 |
-
"entropy": 1.
|
| 806 |
"epoch": 2.251572327044025,
|
| 807 |
-
"grad_norm": 0.
|
| 808 |
"learning_rate": 1.3333333333333333e-05,
|
| 809 |
-
"logits/chosen": -2.
|
| 810 |
-
"logits/rejected": -1.
|
| 811 |
-
"logps/chosen": -137.
|
| 812 |
-
"logps/rejected": -
|
| 813 |
-
"loss": 0.
|
| 814 |
-
"mean_token_accuracy": 0.
|
| 815 |
"num_tokens": 294223.0,
|
| 816 |
"rewards/accuracies": 1.0,
|
| 817 |
-
"rewards/chosen": -0.
|
| 818 |
-
"rewards/margins": 10.
|
| 819 |
-
"rewards/rejected": -10.
|
| 820 |
"step": 45
|
| 821 |
},
|
| 822 |
{
|
| 823 |
-
"entropy": 1.
|
| 824 |
"epoch": 2.30188679245283,
|
| 825 |
-
"grad_norm": 0.
|
| 826 |
"learning_rate": 1.25e-05,
|
| 827 |
-
"logits/chosen": -2.
|
| 828 |
-
"logits/rejected": -2.
|
| 829 |
-
"logps/chosen": -
|
| 830 |
-
"logps/rejected": -
|
| 831 |
-
"loss": 0.
|
| 832 |
-
"mean_token_accuracy": 0.
|
| 833 |
"num_tokens": 299634.0,
|
| 834 |
"rewards/accuracies": 1.0,
|
| 835 |
-
"rewards/chosen": 1.
|
| 836 |
-
"rewards/margins": 10.
|
| 837 |
-
"rewards/rejected": -9.
|
| 838 |
"step": 46
|
| 839 |
},
|
| 840 |
{
|
| 841 |
-
"entropy": 1.
|
| 842 |
"epoch": 2.352201257861635,
|
| 843 |
-
"grad_norm": 0.
|
| 844 |
"learning_rate": 1.1666666666666668e-05,
|
| 845 |
-
"logits/chosen": -2.
|
| 846 |
-
"logits/rejected": -1.
|
| 847 |
-
"logps/chosen": -
|
| 848 |
-
"logps/rejected": -
|
| 849 |
-
"loss": 0.
|
| 850 |
-
"mean_token_accuracy": 0.
|
| 851 |
"num_tokens": 305920.0,
|
| 852 |
"rewards/accuracies": 1.0,
|
| 853 |
-
"rewards/chosen": 0.
|
| 854 |
-
"rewards/margins": 10.
|
| 855 |
-
"rewards/rejected": -10.
|
| 856 |
"step": 47
|
| 857 |
},
|
| 858 |
{
|
| 859 |
-
"entropy": 1.
|
| 860 |
"epoch": 2.40251572327044,
|
| 861 |
-
"grad_norm": 0.
|
| 862 |
"learning_rate": 1.0833333333333334e-05,
|
| 863 |
-
"logits/chosen": -2.
|
| 864 |
-
"logits/rejected": -1.
|
| 865 |
-
"logps/chosen": -
|
| 866 |
-
"logps/rejected": -
|
| 867 |
-
"loss": 0.
|
| 868 |
-
"mean_token_accuracy": 0.
|
| 869 |
"num_tokens": 313346.0,
|
| 870 |
"rewards/accuracies": 1.0,
|
| 871 |
-
"rewards/chosen": 0.
|
| 872 |
-
"rewards/margins": 8.
|
| 873 |
-
"rewards/rejected": -8.
|
| 874 |
"step": 48
|
| 875 |
},
|
| 876 |
{
|
| 877 |
-
"entropy": 1.
|
| 878 |
"epoch": 2.452830188679245,
|
| 879 |
-
"grad_norm": 0.
|
| 880 |
"learning_rate": 1e-05,
|
| 881 |
-
"logits/chosen": -2.
|
| 882 |
-
"logits/rejected": -1.
|
| 883 |
-
"logps/chosen": -
|
| 884 |
-
"logps/rejected": -
|
| 885 |
-
"loss": 0.
|
| 886 |
-
"mean_token_accuracy": 0.
|
| 887 |
"num_tokens": 320662.0,
|
| 888 |
"rewards/accuracies": 1.0,
|
| 889 |
-
"rewards/chosen": -0.
|
| 890 |
-
"rewards/margins": 9.
|
| 891 |
-
"rewards/rejected": -9.
|
| 892 |
"step": 49
|
| 893 |
},
|
| 894 |
{
|
| 895 |
-
"entropy": 1.
|
| 896 |
"epoch": 2.50314465408805,
|
| 897 |
-
"grad_norm": 0.
|
| 898 |
"learning_rate": 9.166666666666666e-06,
|
| 899 |
-
"logits/chosen": -2.
|
| 900 |
-
"logits/rejected": -2.
|
| 901 |
-
"logps/chosen": -
|
| 902 |
-
"logps/rejected": -
|
| 903 |
-
"loss": 0.
|
| 904 |
-
"mean_token_accuracy": 0.
|
| 905 |
"num_tokens": 326611.0,
|
| 906 |
"rewards/accuracies": 1.0,
|
| 907 |
-
"rewards/chosen": 0.
|
| 908 |
-
"rewards/margins": 9.
|
| 909 |
-
"rewards/rejected": -
|
| 910 |
"step": 50
|
| 911 |
},
|
| 912 |
{
|
| 913 |
-
"entropy": 1.
|
| 914 |
"epoch": 2.5534591194968552,
|
| 915 |
-
"grad_norm": 0.
|
| 916 |
"learning_rate": 8.333333333333334e-06,
|
| 917 |
-
"logits/chosen": -2.
|
| 918 |
-
"logits/rejected": -1.
|
| 919 |
-
"logps/chosen": -116.
|
| 920 |
-
"logps/rejected": -
|
| 921 |
-
"loss": 0.
|
| 922 |
-
"mean_token_accuracy": 0.
|
| 923 |
"num_tokens": 332814.0,
|
| 924 |
"rewards/accuracies": 1.0,
|
| 925 |
-
"rewards/chosen": 0.
|
| 926 |
-
"rewards/margins":
|
| 927 |
-
"rewards/rejected": -9.
|
| 928 |
"step": 51
|
| 929 |
},
|
| 930 |
{
|
| 931 |
-
"entropy": 1.
|
| 932 |
"epoch": 2.6037735849056602,
|
| 933 |
-
"grad_norm": 0.
|
| 934 |
"learning_rate": 7.5e-06,
|
| 935 |
-
"logits/chosen": -2.
|
| 936 |
-
"logits/rejected": -1.
|
| 937 |
-
"logps/chosen": -120.
|
| 938 |
-
"logps/rejected": -
|
| 939 |
-
"loss":
|
| 940 |
-
"mean_token_accuracy": 0.
|
| 941 |
"num_tokens": 339547.0,
|
| 942 |
"rewards/accuracies": 1.0,
|
| 943 |
-
"rewards/chosen": 0.
|
| 944 |
-
"rewards/margins": 10.
|
| 945 |
-
"rewards/rejected": -9.
|
| 946 |
"step": 52
|
| 947 |
},
|
| 948 |
{
|
| 949 |
-
"entropy": 1.
|
| 950 |
"epoch": 2.6540880503144653,
|
| 951 |
-
"grad_norm": 0.
|
| 952 |
"learning_rate": 6.666666666666667e-06,
|
| 953 |
-
"logits/chosen": -2.
|
| 954 |
-
"logits/rejected": -1.
|
| 955 |
-
"logps/chosen": -115.
|
| 956 |
-
"logps/rejected": -
|
| 957 |
-
"loss": 0.
|
| 958 |
-
"mean_token_accuracy": 0.
|
| 959 |
"num_tokens": 346647.0,
|
| 960 |
"rewards/accuracies": 1.0,
|
| 961 |
-
"rewards/chosen": 0.
|
| 962 |
-
"rewards/margins": 9.
|
| 963 |
-
"rewards/rejected": -9.
|
| 964 |
"step": 53
|
| 965 |
},
|
| 966 |
{
|
| 967 |
-
"entropy": 1.
|
| 968 |
"epoch": 2.7044025157232703,
|
| 969 |
-
"grad_norm": 0.
|
| 970 |
"learning_rate": 5.833333333333334e-06,
|
| 971 |
-
"logits/chosen": -2.
|
| 972 |
-
"logits/rejected": -1.
|
| 973 |
-
"logps/chosen": -
|
| 974 |
-
"logps/rejected": -
|
| 975 |
-
"loss": 0.
|
| 976 |
-
"mean_token_accuracy": 0.
|
| 977 |
"num_tokens": 353155.0,
|
| 978 |
"rewards/accuracies": 1.0,
|
| 979 |
-
"rewards/chosen": 0.
|
| 980 |
-
"rewards/margins": 10.
|
| 981 |
-
"rewards/rejected": -10.
|
| 982 |
"step": 54
|
| 983 |
},
|
| 984 |
{
|
| 985 |
-
"entropy": 1.
|
| 986 |
"epoch": 2.7547169811320753,
|
| 987 |
-
"grad_norm": 0.
|
| 988 |
"learning_rate": 5e-06,
|
| 989 |
-
"logits/chosen": -2.
|
| 990 |
-
"logits/rejected": -1.
|
| 991 |
-
"logps/chosen": -
|
| 992 |
-
"logps/rejected": -
|
| 993 |
-
"loss":
|
| 994 |
-
"mean_token_accuracy": 0.
|
| 995 |
"num_tokens": 358538.0,
|
| 996 |
"rewards/accuracies": 1.0,
|
| 997 |
-
"rewards/chosen": 0.
|
| 998 |
-
"rewards/margins": 10.
|
| 999 |
-
"rewards/rejected": -
|
| 1000 |
"step": 55
|
| 1001 |
},
|
| 1002 |
{
|
| 1003 |
-
"entropy": 1.
|
| 1004 |
"epoch": 2.8050314465408803,
|
| 1005 |
-
"grad_norm": 0.
|
| 1006 |
"learning_rate": 4.166666666666667e-06,
|
| 1007 |
-
"logits/chosen": -2.
|
| 1008 |
-
"logits/rejected": -1.
|
| 1009 |
-
"logps/chosen": -121.
|
| 1010 |
-
"logps/rejected": -
|
| 1011 |
-
"loss": 0.
|
| 1012 |
-
"mean_token_accuracy": 0.
|
| 1013 |
"num_tokens": 364730.0,
|
| 1014 |
"rewards/accuracies": 1.0,
|
| 1015 |
-
"rewards/chosen": 0.
|
| 1016 |
-
"rewards/margins": 8.
|
| 1017 |
-
"rewards/rejected": -8.
|
| 1018 |
"step": 56
|
| 1019 |
},
|
| 1020 |
{
|
| 1021 |
-
"entropy": 1.
|
| 1022 |
"epoch": 2.8553459119496853,
|
| 1023 |
-
"grad_norm": 0.
|
| 1024 |
"learning_rate": 3.3333333333333333e-06,
|
| 1025 |
-
"logits/chosen": -2.
|
| 1026 |
-
"logits/rejected": -1.
|
| 1027 |
-
"logps/chosen": -142.
|
| 1028 |
-
"logps/rejected": -
|
| 1029 |
-
"loss": 0.
|
| 1030 |
-
"mean_token_accuracy": 0.
|
| 1031 |
"num_tokens": 371209.0,
|
| 1032 |
"rewards/accuracies": 1.0,
|
| 1033 |
-
"rewards/chosen": 0.
|
| 1034 |
-
"rewards/margins": 9.
|
| 1035 |
-
"rewards/rejected": -
|
| 1036 |
"step": 57
|
| 1037 |
},
|
| 1038 |
{
|
| 1039 |
-
"entropy": 1.
|
| 1040 |
"epoch": 2.9056603773584904,
|
| 1041 |
-
"grad_norm": 0.
|
| 1042 |
"learning_rate": 2.5e-06,
|
| 1043 |
-
"logits/chosen": -2.
|
| 1044 |
-
"logits/rejected": -1.
|
| 1045 |
-
"logps/chosen": -
|
| 1046 |
-
"logps/rejected": -
|
| 1047 |
-
"loss": 0.
|
| 1048 |
-
"mean_token_accuracy": 0.
|
| 1049 |
"num_tokens": 378077.0,
|
| 1050 |
"rewards/accuracies": 1.0,
|
| 1051 |
-
"rewards/chosen": 0.
|
| 1052 |
-
"rewards/margins": 10.
|
| 1053 |
-
"rewards/rejected": -
|
| 1054 |
"step": 58
|
| 1055 |
},
|
| 1056 |
{
|
| 1057 |
-
"entropy": 1.
|
| 1058 |
"epoch": 2.9559748427672954,
|
| 1059 |
-
"grad_norm": 0.
|
| 1060 |
"learning_rate": 1.6666666666666667e-06,
|
| 1061 |
-
"logits/chosen": -2.
|
| 1062 |
-
"logits/rejected": -2.
|
| 1063 |
-
"logps/chosen": -141.
|
| 1064 |
-
"logps/rejected": -
|
| 1065 |
-
"loss":
|
| 1066 |
-
"mean_token_accuracy": 0.
|
| 1067 |
"num_tokens": 384115.0,
|
| 1068 |
"rewards/accuracies": 1.0,
|
| 1069 |
-
"rewards/chosen": 0.
|
| 1070 |
-
"rewards/margins": 10.
|
| 1071 |
-
"rewards/rejected": -9.
|
| 1072 |
"step": 59
|
| 1073 |
},
|
| 1074 |
{
|
| 1075 |
-
"entropy": 1.
|
| 1076 |
"epoch": 3.0,
|
| 1077 |
-
"grad_norm": 0.
|
| 1078 |
"learning_rate": 8.333333333333333e-07,
|
| 1079 |
-
"logits/chosen": -2.
|
| 1080 |
-
"logits/rejected": -1.
|
| 1081 |
-
"logps/chosen": -
|
| 1082 |
-
"logps/rejected": -
|
| 1083 |
-
"loss": 0.
|
| 1084 |
-
"mean_token_accuracy": 0.
|
| 1085 |
"num_tokens": 390000.0,
|
| 1086 |
"rewards/accuracies": 1.0,
|
| 1087 |
-
"rewards/chosen": -0.
|
| 1088 |
-
"rewards/margins": 8.
|
| 1089 |
-
"rewards/rejected": -8.
|
| 1090 |
"step": 60
|
| 1091 |
}
|
| 1092 |
],
|
|
|
|
| 12 |
{
|
| 13 |
"entropy": 1.6346618384122849,
|
| 14 |
"epoch": 0.050314465408805034,
|
| 15 |
+
"grad_norm": 3.734375,
|
| 16 |
"learning_rate": 5e-05,
|
| 17 |
"logits/chosen": -0.7411658949470903,
|
| 18 |
"logits/rejected": -0.20507352810005436,
|
|
|
|
| 28 |
"step": 1
|
| 29 |
},
|
| 30 |
{
|
| 31 |
+
"entropy": 1.4584085196256638,
|
| 32 |
"epoch": 0.10062893081761007,
|
| 33 |
+
"grad_norm": 3.40625,
|
| 34 |
"learning_rate": 4.9166666666666665e-05,
|
| 35 |
+
"logits/chosen": -0.9251237438184291,
|
| 36 |
+
"logits/rejected": -0.42817166651451355,
|
| 37 |
+
"logps/chosen": -153.27350616455078,
|
| 38 |
+
"logps/rejected": -180.06759071350098,
|
| 39 |
+
"loss": 0.5006560683250427,
|
| 40 |
"mean_token_accuracy": 0.583746887743473,
|
| 41 |
"num_tokens": 12841.0,
|
| 42 |
"rewards/accuracies": 1.0,
|
| 43 |
+
"rewards/chosen": 0.23006458766758442,
|
| 44 |
+
"rewards/margins": 0.4357452392578125,
|
| 45 |
+
"rewards/rejected": -0.20568065904080868,
|
| 46 |
"step": 2
|
| 47 |
},
|
| 48 |
{
|
| 49 |
+
"entropy": 1.4456398040056229,
|
| 50 |
"epoch": 0.1509433962264151,
|
| 51 |
+
"grad_norm": 2.609375,
|
| 52 |
"learning_rate": 4.8333333333333334e-05,
|
| 53 |
+
"logits/chosen": -1.0732471831941308,
|
| 54 |
+
"logits/rejected": -0.5970107323243525,
|
| 55 |
+
"logps/chosen": -146.46685123443604,
|
| 56 |
+
"logps/rejected": -187.66933250427246,
|
| 57 |
+
"loss": 0.3808254599571228,
|
| 58 |
"mean_token_accuracy": 0.6039908826351166,
|
| 59 |
"num_tokens": 18816.0,
|
| 60 |
"rewards/accuracies": 1.0,
|
| 61 |
+
"rewards/chosen": 0.34212866611778736,
|
| 62 |
+
"rewards/margins": 0.7833537347614765,
|
| 63 |
+
"rewards/rejected": -0.4412250593304634,
|
| 64 |
"step": 3
|
| 65 |
},
|
| 66 |
{
|
| 67 |
+
"entropy": 1.5161051750183105,
|
| 68 |
"epoch": 0.20125786163522014,
|
| 69 |
+
"grad_norm": 2.15625,
|
| 70 |
"learning_rate": 4.75e-05,
|
| 71 |
+
"logits/chosen": -0.9412079692678976,
|
| 72 |
+
"logits/rejected": -0.4002159728435217,
|
| 73 |
+
"logps/chosen": -118.29372596740723,
|
| 74 |
+
"logps/rejected": -182.18238067626953,
|
| 75 |
+
"loss": 0.34213775396347046,
|
| 76 |
"mean_token_accuracy": 0.5912635922431946,
|
| 77 |
"num_tokens": 25522.0,
|
| 78 |
"rewards/accuracies": 1.0,
|
| 79 |
+
"rewards/chosen": 0.3642648714594543,
|
| 80 |
+
"rewards/margins": 0.954810731112957,
|
| 81 |
+
"rewards/rejected": -0.5905458554625511,
|
| 82 |
"step": 4
|
| 83 |
},
|
| 84 |
{
|
| 85 |
+
"entropy": 1.4330662935972214,
|
| 86 |
"epoch": 0.25157232704402516,
|
| 87 |
+
"grad_norm": 1.7421875,
|
| 88 |
"learning_rate": 4.666666666666667e-05,
|
| 89 |
+
"logits/chosen": -1.2388932583587922,
|
| 90 |
+
"logits/rejected": -0.6178736694127079,
|
| 91 |
+
"logps/chosen": -118.57859134674072,
|
| 92 |
+
"logps/rejected": -193.721284866333,
|
| 93 |
+
"loss": 0.21044230461120605,
|
| 94 |
"mean_token_accuracy": 0.5765212289988995,
|
| 95 |
"num_tokens": 32576.0,
|
| 96 |
"rewards/accuracies": 1.0,
|
| 97 |
+
"rewards/chosen": 0.45518894493579865,
|
| 98 |
+
"rewards/margins": 1.520921140909195,
|
| 99 |
+
"rewards/rejected": -1.065732218325138,
|
| 100 |
"step": 5
|
| 101 |
},
|
| 102 |
{
|
| 103 |
+
"entropy": 1.4573922604322433,
|
| 104 |
"epoch": 0.3018867924528302,
|
| 105 |
+
"grad_norm": 1.484375,
|
| 106 |
"learning_rate": 4.5833333333333334e-05,
|
| 107 |
+
"logits/chosen": -1.2737800530058234,
|
| 108 |
+
"logits/rejected": -0.5034645169102513,
|
| 109 |
+
"logps/chosen": -120.18647003173828,
|
| 110 |
+
"logps/rejected": -203.72421646118164,
|
| 111 |
+
"loss": 0.17699970304965973,
|
| 112 |
"mean_token_accuracy": 0.5985999256372452,
|
| 113 |
"num_tokens": 39703.0,
|
| 114 |
"rewards/accuracies": 1.0,
|
| 115 |
+
"rewards/chosen": 0.41289406828582287,
|
| 116 |
+
"rewards/margins": 1.7254902124404907,
|
| 117 |
+
"rewards/rejected": -1.3125961422920227,
|
| 118 |
"step": 6
|
| 119 |
},
|
| 120 |
{
|
| 121 |
+
"entropy": 1.2998351603746414,
|
| 122 |
"epoch": 0.3522012578616352,
|
| 123 |
+
"grad_norm": 1.546875,
|
| 124 |
"learning_rate": 4.5e-05,
|
| 125 |
+
"logits/chosen": -1.3130657002820167,
|
| 126 |
+
"logits/rejected": -0.6094413558435448,
|
| 127 |
+
"logps/chosen": -108.3181095123291,
|
| 128 |
+
"logps/rejected": -194.44207382202148,
|
| 129 |
+
"loss": 0.18618769943714142,
|
| 130 |
"mean_token_accuracy": 0.6349928826093674,
|
| 131 |
"num_tokens": 47481.0,
|
| 132 |
"rewards/accuracies": 1.0,
|
| 133 |
+
"rewards/chosen": 0.26029996760189533,
|
| 134 |
+
"rewards/margins": 1.7353856414556503,
|
| 135 |
+
"rewards/rejected": -1.4750856757164001,
|
| 136 |
"step": 7
|
| 137 |
},
|
| 138 |
{
|
| 139 |
+
"entropy": 1.354924201965332,
|
| 140 |
"epoch": 0.4025157232704403,
|
| 141 |
+
"grad_norm": 1.015625,
|
| 142 |
"learning_rate": 4.4166666666666665e-05,
|
| 143 |
+
"logits/chosen": -1.437566920334179,
|
| 144 |
+
"logits/rejected": -1.0028015186250743,
|
| 145 |
+
"logps/chosen": -137.25361728668213,
|
| 146 |
+
"logps/rejected": -203.34245681762695,
|
| 147 |
+
"loss": 0.1016073077917099,
|
| 148 |
+
"mean_token_accuracy": 0.6037986874580383,
|
| 149 |
"num_tokens": 54551.0,
|
| 150 |
"rewards/accuracies": 1.0,
|
| 151 |
+
"rewards/chosen": 0.6320773344486952,
|
| 152 |
+
"rewards/margins": 2.5685721784830093,
|
| 153 |
+
"rewards/rejected": -1.9364948570728302,
|
| 154 |
"step": 8
|
| 155 |
},
|
| 156 |
{
|
| 157 |
+
"entropy": 1.3007782101631165,
|
| 158 |
"epoch": 0.4528301886792453,
|
| 159 |
+
"grad_norm": 0.6796875,
|
| 160 |
"learning_rate": 4.3333333333333334e-05,
|
| 161 |
+
"logits/chosen": -1.4337730887270794,
|
| 162 |
+
"logits/rejected": -0.9420388615307833,
|
| 163 |
+
"logps/chosen": -133.88286685943604,
|
| 164 |
+
"logps/rejected": -215.5273036956787,
|
| 165 |
+
"loss": 0.06624128669500351,
|
| 166 |
+
"mean_token_accuracy": 0.6150126904249191,
|
| 167 |
"num_tokens": 61082.0,
|
| 168 |
"rewards/accuracies": 1.0,
|
| 169 |
+
"rewards/chosen": 0.7177156624384224,
|
| 170 |
+
"rewards/margins": 3.2935406416654587,
|
| 171 |
+
"rewards/rejected": -2.5758249759674072,
|
| 172 |
"step": 9
|
| 173 |
},
|
| 174 |
{
|
| 175 |
+
"entropy": 1.3091522455215454,
|
| 176 |
"epoch": 0.5031446540880503,
|
| 177 |
+
"grad_norm": 0.515625,
|
| 178 |
"learning_rate": 4.25e-05,
|
| 179 |
+
"logits/chosen": -1.7237460889408442,
|
| 180 |
+
"logits/rejected": -1.1708205992219858,
|
| 181 |
+
"logps/chosen": -114.79162216186523,
|
| 182 |
+
"logps/rejected": -216.33137321472168,
|
| 183 |
+
"loss": 0.03747009485960007,
|
| 184 |
"mean_token_accuracy": 0.5977272689342499,
|
| 185 |
"num_tokens": 68063.0,
|
| 186 |
"rewards/accuracies": 1.0,
|
| 187 |
+
"rewards/chosen": 0.7877620831131935,
|
| 188 |
+
"rewards/margins": 4.083773195743561,
|
| 189 |
+
"rewards/rejected": -3.296011045575142,
|
| 190 |
"step": 10
|
| 191 |
},
|
| 192 |
{
|
| 193 |
+
"entropy": 1.2404007613658905,
|
| 194 |
"epoch": 0.5534591194968553,
|
| 195 |
+
"grad_norm": 0.259765625,
|
| 196 |
"learning_rate": 4.166666666666667e-05,
|
| 197 |
+
"logits/chosen": -1.655923360923379,
|
| 198 |
+
"logits/rejected": -1.1929389227396723,
|
| 199 |
+
"logps/chosen": -137.04386043548584,
|
| 200 |
+
"logps/rejected": -223.17731285095215,
|
| 201 |
+
"loss": 0.019634969532489777,
|
| 202 |
+
"mean_token_accuracy": 0.615639328956604,
|
| 203 |
"num_tokens": 73558.0,
|
| 204 |
"rewards/accuracies": 1.0,
|
| 205 |
+
"rewards/chosen": 1.1441535080084577,
|
| 206 |
+
"rewards/margins": 4.826931297779083,
|
| 207 |
+
"rewards/rejected": -3.6827778220176697,
|
| 208 |
"step": 11
|
| 209 |
},
|
| 210 |
{
|
| 211 |
+
"entropy": 1.2675090283155441,
|
| 212 |
"epoch": 0.6037735849056604,
|
| 213 |
+
"grad_norm": 0.16796875,
|
| 214 |
"learning_rate": 4.0833333333333334e-05,
|
| 215 |
+
"logits/chosen": -1.738670325722853,
|
| 216 |
+
"logits/rejected": -1.2716987398570991,
|
| 217 |
+
"logps/chosen": -133.57194137573242,
|
| 218 |
+
"logps/rejected": -223.28443908691406,
|
| 219 |
+
"loss": 0.013121644034981728,
|
| 220 |
+
"mean_token_accuracy": 0.6074652224779129,
|
| 221 |
"num_tokens": 79421.0,
|
| 222 |
"rewards/accuracies": 1.0,
|
| 223 |
+
"rewards/chosen": 1.0685334280133247,
|
| 224 |
+
"rewards/margins": 5.048604816198349,
|
| 225 |
+
"rewards/rejected": -3.980071395635605,
|
| 226 |
"step": 12
|
| 227 |
},
|
| 228 |
{
|
| 229 |
+
"entropy": 1.221432313323021,
|
| 230 |
"epoch": 0.6540880503144654,
|
| 231 |
+
"grad_norm": 0.466796875,
|
| 232 |
"learning_rate": 4e-05,
|
| 233 |
+
"logits/chosen": -1.8638311019017777,
|
| 234 |
+
"logits/rejected": -1.3540506586835501,
|
| 235 |
+
"logps/chosen": -130.8041524887085,
|
| 236 |
+
"logps/rejected": -240.23817443847656,
|
| 237 |
+
"loss": 0.029251903295516968,
|
| 238 |
+
"mean_token_accuracy": 0.6208610609173775,
|
| 239 |
"num_tokens": 86682.0,
|
| 240 |
"rewards/accuracies": 1.0,
|
| 241 |
+
"rewards/chosen": 0.4837151520187035,
|
| 242 |
+
"rewards/margins": 5.231908708810806,
|
| 243 |
+
"rewards/rejected": -4.748193636536598,
|
| 244 |
"step": 13
|
| 245 |
},
|
| 246 |
{
|
| 247 |
+
"entropy": 1.179955169558525,
|
| 248 |
"epoch": 0.7044025157232704,
|
| 249 |
+
"grad_norm": 0.263671875,
|
| 250 |
"learning_rate": 3.9166666666666665e-05,
|
| 251 |
+
"logits/chosen": -1.714330251919895,
|
| 252 |
+
"logits/rejected": -1.4322710140173394,
|
| 253 |
+
"logps/chosen": -134.82413864135742,
|
| 254 |
+
"logps/rejected": -221.86817169189453,
|
| 255 |
+
"loss": 0.019698552787303925,
|
| 256 |
+
"mean_token_accuracy": 0.6337382346391678,
|
| 257 |
"num_tokens": 93035.0,
|
| 258 |
"rewards/accuracies": 1.0,
|
| 259 |
+
"rewards/chosen": 0.23185482621192932,
|
| 260 |
+
"rewards/margins": 5.084479838609695,
|
| 261 |
+
"rewards/rejected": -4.852624952793121,
|
| 262 |
"step": 14
|
| 263 |
},
|
| 264 |
{
|
| 265 |
+
"entropy": 1.1727432310581207,
|
| 266 |
"epoch": 0.7547169811320755,
|
| 267 |
+
"grad_norm": 0.046142578125,
|
| 268 |
"learning_rate": 3.8333333333333334e-05,
|
| 269 |
+
"logits/chosen": -1.907136892939249,
|
| 270 |
+
"logits/rejected": -1.514815267146274,
|
| 271 |
+
"logps/chosen": -132.96124839782715,
|
| 272 |
+
"logps/rejected": -249.50230979919434,
|
| 273 |
+
"loss": 0.002145277801901102,
|
| 274 |
+
"mean_token_accuracy": 0.6479089632630348,
|
| 275 |
"num_tokens": 98572.0,
|
| 276 |
"rewards/accuracies": 1.0,
|
| 277 |
+
"rewards/chosen": 1.1812188103795052,
|
| 278 |
+
"rewards/margins": 7.1862099170684814,
|
| 279 |
+
"rewards/rejected": -6.004990994930267,
|
| 280 |
"step": 15
|
| 281 |
},
|
| 282 |
{
|
| 283 |
+
"entropy": 1.2092190980911255,
|
| 284 |
"epoch": 0.8050314465408805,
|
| 285 |
+
"grad_norm": 0.203125,
|
| 286 |
"learning_rate": 3.7500000000000003e-05,
|
| 287 |
+
"logits/chosen": -1.8553556408587935,
|
| 288 |
+
"logits/rejected": -1.4260795074385058,
|
| 289 |
+
"logps/chosen": -139.758376121521,
|
| 290 |
+
"logps/rejected": -248.12162590026855,
|
| 291 |
+
"loss": 0.015183546580374241,
|
| 292 |
+
"mean_token_accuracy": 0.6128971949219704,
|
| 293 |
"num_tokens": 106014.0,
|
| 294 |
"rewards/accuracies": 1.0,
|
| 295 |
+
"rewards/chosen": 0.30653896857984364,
|
| 296 |
+
"rewards/margins": 6.243123233318329,
|
| 297 |
+
"rewards/rejected": -5.9365842044353485,
|
| 298 |
"step": 16
|
| 299 |
},
|
| 300 |
{
|
| 301 |
+
"entropy": 1.2008470296859741,
|
| 302 |
"epoch": 0.8553459119496856,
|
| 303 |
+
"grad_norm": 0.134765625,
|
| 304 |
"learning_rate": 3.6666666666666666e-05,
|
| 305 |
+
"logits/chosen": -1.9781900924423292,
|
| 306 |
+
"logits/rejected": -1.3513694447951352,
|
| 307 |
+
"logps/chosen": -127.7047929763794,
|
| 308 |
+
"logps/rejected": -251.46310424804688,
|
| 309 |
+
"loss": 0.005312850698828697,
|
| 310 |
"mean_token_accuracy": 0.6195648014545441,
|
| 311 |
"num_tokens": 112297.0,
|
| 312 |
"rewards/accuracies": 1.0,
|
| 313 |
+
"rewards/chosen": 0.948453530203551,
|
| 314 |
+
"rewards/margins": 7.2391074895858765,
|
| 315 |
+
"rewards/rejected": -6.290653884410858,
|
| 316 |
"step": 17
|
| 317 |
},
|
| 318 |
{
|
| 319 |
+
"entropy": 1.1938088834285736,
|
| 320 |
"epoch": 0.9056603773584906,
|
| 321 |
+
"grad_norm": 0.0283203125,
|
| 322 |
"learning_rate": 3.5833333333333335e-05,
|
| 323 |
+
"logits/chosen": -2.0970170230360243,
|
| 324 |
+
"logits/rejected": -1.6893045219197895,
|
| 325 |
+
"logps/chosen": -128.71086406707764,
|
| 326 |
+
"logps/rejected": -248.6879997253418,
|
| 327 |
+
"loss": 0.0016415835125371814,
|
| 328 |
+
"mean_token_accuracy": 0.6066833287477493,
|
| 329 |
"num_tokens": 118259.0,
|
| 330 |
"rewards/accuracies": 1.0,
|
| 331 |
+
"rewards/chosen": 0.6370860114693642,
|
| 332 |
+
"rewards/margins": 7.347302317619324,
|
| 333 |
+
"rewards/rejected": -6.710216283798218,
|
| 334 |
"step": 18
|
| 335 |
},
|
| 336 |
{
|
| 337 |
+
"entropy": 1.1347751468420029,
|
| 338 |
"epoch": 0.9559748427672956,
|
| 339 |
+
"grad_norm": 0.021240234375,
|
| 340 |
"learning_rate": 3.5e-05,
|
| 341 |
+
"logits/chosen": -2.174784405399738,
|
| 342 |
+
"logits/rejected": -1.8171304190186441,
|
| 343 |
+
"logps/chosen": -137.3084011077881,
|
| 344 |
+
"logps/rejected": -237.20912742614746,
|
| 345 |
+
"loss": 0.0013000022154301405,
|
| 346 |
+
"mean_token_accuracy": 0.6105500273406506,
|
| 347 |
"num_tokens": 123618.0,
|
| 348 |
"rewards/accuracies": 1.0,
|
| 349 |
+
"rewards/chosen": 1.1855077669024467,
|
| 350 |
+
"rewards/margins": 7.6381800174713135,
|
| 351 |
+
"rewards/rejected": -6.452672183513641,
|
| 352 |
"step": 19
|
| 353 |
},
|
| 354 |
{
|
| 355 |
+
"entropy": 1.2229801756995065,
|
| 356 |
"epoch": 1.0,
|
| 357 |
+
"grad_norm": 0.034912109375,
|
| 358 |
"learning_rate": 3.4166666666666666e-05,
|
| 359 |
+
"logits/chosen": -1.9402861332224905,
|
| 360 |
+
"logits/rejected": -1.478508916635409,
|
| 361 |
+
"logps/chosen": -140.62159075055803,
|
| 362 |
+
"logps/rejected": -258.58056640625,
|
| 363 |
+
"loss": 0.0018078959546983242,
|
| 364 |
+
"mean_token_accuracy": 0.6002635615212577,
|
| 365 |
"num_tokens": 130000.0,
|
| 366 |
"rewards/accuracies": 1.0,
|
| 367 |
+
"rewards/chosen": -0.18390404965196336,
|
| 368 |
+
"rewards/margins": 7.131651401519775,
|
| 369 |
+
"rewards/rejected": -7.31555564062936,
|
| 370 |
"step": 20
|
| 371 |
},
|
| 372 |
{
|
| 373 |
+
"entropy": 1.1338201016187668,
|
| 374 |
"epoch": 1.050314465408805,
|
| 375 |
+
"grad_norm": 0.00341796875,
|
| 376 |
"learning_rate": 3.3333333333333335e-05,
|
| 377 |
+
"logits/chosen": -2.2381289981693855,
|
| 378 |
+
"logits/rejected": -1.7685196120349025,
|
| 379 |
+
"logps/chosen": -132.71670532226562,
|
| 380 |
+
"logps/rejected": -270.32165908813477,
|
| 381 |
+
"loss": 0.00017723625933285803,
|
| 382 |
+
"mean_token_accuracy": 0.640245221555233,
|
| 383 |
"num_tokens": 135799.0,
|
| 384 |
"rewards/accuracies": 1.0,
|
| 385 |
+
"rewards/chosen": 1.0271762115880847,
|
| 386 |
+
"rewards/margins": 9.069988369941711,
|
| 387 |
+
"rewards/rejected": -8.04281210899353,
|
| 388 |
"step": 21
|
| 389 |
},
|
| 390 |
{
|
| 391 |
+
"entropy": 1.184071958065033,
|
| 392 |
"epoch": 1.10062893081761,
|
| 393 |
+
"grad_norm": 0.06201171875,
|
| 394 |
"learning_rate": 3.2500000000000004e-05,
|
| 395 |
+
"logits/chosen": -2.2030959691219643,
|
| 396 |
+
"logits/rejected": -1.549622772314418,
|
| 397 |
+
"logps/chosen": -137.2278184890747,
|
| 398 |
+
"logps/rejected": -278.6941947937012,
|
| 399 |
+
"loss": 0.002708145882934332,
|
| 400 |
+
"mean_token_accuracy": 0.5984649807214737,
|
| 401 |
"num_tokens": 142686.0,
|
| 402 |
"rewards/accuracies": 1.0,
|
| 403 |
+
"rewards/chosen": 0.10265790205448866,
|
| 404 |
+
"rewards/margins": 8.331138014793396,
|
| 405 |
+
"rewards/rejected": -8.228479981422424,
|
| 406 |
"step": 22
|
| 407 |
},
|
| 408 |
{
|
| 409 |
+
"entropy": 1.1163400262594223,
|
| 410 |
"epoch": 1.150943396226415,
|
| 411 |
+
"grad_norm": 0.010498046875,
|
| 412 |
"learning_rate": 3.1666666666666666e-05,
|
| 413 |
+
"logits/chosen": -2.2692944342560915,
|
| 414 |
+
"logits/rejected": -1.9815728330371412,
|
| 415 |
+
"logps/chosen": -121.65661811828613,
|
| 416 |
+
"logps/rejected": -251.65609550476074,
|
| 417 |
+
"loss": 0.0006153068970888853,
|
| 418 |
+
"mean_token_accuracy": 0.5803752839565277,
|
| 419 |
"num_tokens": 148888.0,
|
| 420 |
"rewards/accuracies": 1.0,
|
| 421 |
+
"rewards/chosen": 0.6473507843911648,
|
| 422 |
+
"rewards/margins": 8.257275819778442,
|
| 423 |
+
"rewards/rejected": -7.609925091266632,
|
| 424 |
"step": 23
|
| 425 |
},
|
| 426 |
{
|
| 427 |
+
"entropy": 1.2022811472415924,
|
| 428 |
"epoch": 1.20125786163522,
|
| 429 |
+
"grad_norm": 0.0196533203125,
|
| 430 |
"learning_rate": 3.0833333333333335e-05,
|
| 431 |
+
"logits/chosen": -2.0684997205168574,
|
| 432 |
+
"logits/rejected": -1.5187979793505884,
|
| 433 |
+
"logps/chosen": -142.0221881866455,
|
| 434 |
+
"logps/rejected": -288.0162467956543,
|
| 435 |
+
"loss": 0.0010658408282324672,
|
| 436 |
+
"mean_token_accuracy": 0.6041584983468056,
|
| 437 |
"num_tokens": 155066.0,
|
| 438 |
"rewards/accuracies": 1.0,
|
| 439 |
+
"rewards/chosen": 0.3129555657505989,
|
| 440 |
+
"rewards/margins": 9.287788093090057,
|
| 441 |
+
"rewards/rejected": -8.974832653999329,
|
| 442 |
"step": 24
|
| 443 |
},
|
| 444 |
{
|
| 445 |
+
"entropy": 1.0715540498495102,
|
| 446 |
"epoch": 1.251572327044025,
|
| 447 |
+
"grad_norm": 0.0152587890625,
|
| 448 |
"learning_rate": 3e-05,
|
| 449 |
+
"logits/chosen": -2.253688589577229,
|
| 450 |
+
"logits/rejected": -1.709912522981535,
|
| 451 |
+
"logps/chosen": -124.24794864654541,
|
| 452 |
+
"logps/rejected": -268.31462478637695,
|
| 453 |
+
"loss": 0.0007783680921420455,
|
| 454 |
+
"mean_token_accuracy": 0.6487728357315063,
|
| 455 |
"num_tokens": 161896.0,
|
| 456 |
"rewards/accuracies": 1.0,
|
| 457 |
+
"rewards/chosen": 0.5254653260344639,
|
| 458 |
+
"rewards/margins": 8.636412143707275,
|
| 459 |
+
"rewards/rejected": -8.110946834087372,
|
| 460 |
"step": 25
|
| 461 |
},
|
| 462 |
{
|
| 463 |
+
"entropy": 1.1292345225811005,
|
| 464 |
"epoch": 1.3018867924528301,
|
| 465 |
+
"grad_norm": 0.01611328125,
|
| 466 |
"learning_rate": 2.916666666666667e-05,
|
| 467 |
+
"logits/chosen": -2.03905456801847,
|
| 468 |
+
"logits/rejected": -1.653255032290945,
|
| 469 |
+
"logps/chosen": -121.39779376983643,
|
| 470 |
+
"logps/rejected": -255.01618576049805,
|
| 471 |
+
"loss": 0.0008846853161230683,
|
| 472 |
+
"mean_token_accuracy": 0.622931219637394,
|
| 473 |
"num_tokens": 168755.0,
|
| 474 |
"rewards/accuracies": 1.0,
|
| 475 |
+
"rewards/chosen": 0.2767142332158983,
|
| 476 |
+
"rewards/margins": 8.121429800987244,
|
| 477 |
+
"rewards/rejected": -7.844715654850006,
|
| 478 |
"step": 26
|
| 479 |
},
|
| 480 |
{
|
| 481 |
+
"entropy": 1.107276238501072,
|
| 482 |
"epoch": 1.3522012578616351,
|
| 483 |
+
"grad_norm": 0.0076904296875,
|
| 484 |
"learning_rate": 2.8333333333333335e-05,
|
| 485 |
+
"logits/chosen": -2.286326659906524,
|
| 486 |
+
"logits/rejected": -1.9377510490705079,
|
| 487 |
+
"logps/chosen": -144.30573749542236,
|
| 488 |
+
"logps/rejected": -272.37299728393555,
|
| 489 |
+
"loss": 0.00033011281630024314,
|
| 490 |
+
"mean_token_accuracy": 0.619342528283596,
|
| 491 |
"num_tokens": 175381.0,
|
| 492 |
"rewards/accuracies": 1.0,
|
| 493 |
+
"rewards/chosen": 0.9377570524811745,
|
| 494 |
+
"rewards/margins": 9.561910688877106,
|
| 495 |
+
"rewards/rejected": -8.6241534948349,
|
| 496 |
"step": 27
|
| 497 |
},
|
| 498 |
{
|
| 499 |
+
"entropy": 1.157460778951645,
|
| 500 |
"epoch": 1.4025157232704402,
|
| 501 |
+
"grad_norm": 0.0047607421875,
|
| 502 |
"learning_rate": 2.7500000000000004e-05,
|
| 503 |
+
"logits/chosen": -2.1326494507814266,
|
| 504 |
+
"logits/rejected": -1.8268033175884986,
|
| 505 |
+
"logps/chosen": -125.0985517501831,
|
| 506 |
+
"logps/rejected": -274.5887870788574,
|
| 507 |
+
"loss": 0.0002390609442954883,
|
| 508 |
+
"mean_token_accuracy": 0.620079979300499,
|
| 509 |
"num_tokens": 181771.0,
|
| 510 |
"rewards/accuracies": 1.0,
|
| 511 |
+
"rewards/chosen": 0.8465396109968424,
|
| 512 |
+
"rewards/margins": 9.534583747386932,
|
| 513 |
+
"rewards/rejected": -8.688044130802155,
|
| 514 |
"step": 28
|
| 515 |
},
|
| 516 |
{
|
| 517 |
+
"entropy": 1.1201048269867897,
|
| 518 |
"epoch": 1.4528301886792452,
|
| 519 |
+
"grad_norm": 0.0093994140625,
|
| 520 |
"learning_rate": 2.6666666666666667e-05,
|
| 521 |
+
"logits/chosen": -2.311015681209517,
|
| 522 |
+
"logits/rejected": -2.0115759556684885,
|
| 523 |
+
"logps/chosen": -140.10973072052002,
|
| 524 |
+
"logps/rejected": -260.4351615905762,
|
| 525 |
+
"loss": 0.0005316605675034225,
|
| 526 |
+
"mean_token_accuracy": 0.6181787773966789,
|
| 527 |
"num_tokens": 188644.0,
|
| 528 |
"rewards/accuracies": 1.0,
|
| 529 |
+
"rewards/chosen": -0.1467662937939167,
|
| 530 |
+
"rewards/margins": 8.486627459526062,
|
| 531 |
+
"rewards/rejected": -8.633393704891205,
|
| 532 |
"step": 29
|
| 533 |
},
|
| 534 |
{
|
| 535 |
+
"entropy": 1.1074048355221748,
|
| 536 |
"epoch": 1.5031446540880502,
|
| 537 |
+
"grad_norm": 0.024658203125,
|
| 538 |
"learning_rate": 2.5833333333333336e-05,
|
| 539 |
+
"logits/chosen": -2.2242952052635343,
|
| 540 |
+
"logits/rejected": -1.9084164743872978,
|
| 541 |
+
"logps/chosen": -120.53641414642334,
|
| 542 |
+
"logps/rejected": -248.61384201049805,
|
| 543 |
+
"loss": 0.0015231292927637696,
|
| 544 |
+
"mean_token_accuracy": 0.6136712729930878,
|
| 545 |
"num_tokens": 196391.0,
|
| 546 |
"rewards/accuracies": 1.0,
|
| 547 |
+
"rewards/chosen": -0.0413979523582384,
|
| 548 |
+
"rewards/margins": 8.107804000377655,
|
| 549 |
+
"rewards/rejected": -8.149202108383179,
|
| 550 |
"step": 30
|
| 551 |
},
|
| 552 |
{
|
| 553 |
+
"entropy": 1.0835556164383888,
|
| 554 |
"epoch": 1.5534591194968552,
|
| 555 |
+
"grad_norm": 0.0308837890625,
|
| 556 |
"learning_rate": 2.5e-05,
|
| 557 |
+
"logits/chosen": -2.2358070307390454,
|
| 558 |
+
"logits/rejected": -1.8015257262782527,
|
| 559 |
+
"logps/chosen": -160.49627590179443,
|
| 560 |
+
"logps/rejected": -286.36713790893555,
|
| 561 |
+
"loss": 0.0012202183715999126,
|
| 562 |
+
"mean_token_accuracy": 0.6140667796134949,
|
| 563 |
"num_tokens": 203518.0,
|
| 564 |
"rewards/accuracies": 1.0,
|
| 565 |
+
"rewards/chosen": -0.12220276962034404,
|
| 566 |
+
"rewards/margins": 8.978903114795685,
|
| 567 |
+
"rewards/rejected": -9.10110592842102,
|
| 568 |
"step": 31
|
| 569 |
},
|
| 570 |
{
|
| 571 |
+
"entropy": 1.1327729299664497,
|
| 572 |
"epoch": 1.6037735849056602,
|
| 573 |
+
"grad_norm": 0.0078125,
|
| 574 |
"learning_rate": 2.4166666666666667e-05,
|
| 575 |
+
"logits/chosen": -2.222914372779498,
|
| 576 |
+
"logits/rejected": -1.8115026450741711,
|
| 577 |
+
"logps/chosen": -143.74180603027344,
|
| 578 |
+
"logps/rejected": -296.8906936645508,
|
| 579 |
+
"loss": 0.0003141126944683492,
|
| 580 |
+
"mean_token_accuracy": 0.6098875515162945,
|
| 581 |
"num_tokens": 209850.0,
|
| 582 |
"rewards/accuracies": 1.0,
|
| 583 |
+
"rewards/chosen": 0.1798817589879036,
|
| 584 |
+
"rewards/margins": 10.448372840881348,
|
| 585 |
+
"rewards/rejected": -10.268491089344025,
|
| 586 |
"step": 32
|
| 587 |
},
|
| 588 |
{
|
| 589 |
+
"entropy": 1.125333271920681,
|
| 590 |
"epoch": 1.6540880503144653,
|
| 591 |
+
"grad_norm": 0.017578125,
|
| 592 |
"learning_rate": 2.3333333333333336e-05,
|
| 593 |
+
"logits/chosen": -2.4135272067915143,
|
| 594 |
+
"logits/rejected": -1.7676647261719525,
|
| 595 |
+
"logps/chosen": -125.86479663848877,
|
| 596 |
+
"logps/rejected": -266.06410026550293,
|
| 597 |
+
"loss": 0.0005237428122200072,
|
| 598 |
+
"mean_token_accuracy": 0.6229566335678101,
|
| 599 |
"num_tokens": 215637.0,
|
| 600 |
"rewards/accuracies": 1.0,
|
| 601 |
+
"rewards/chosen": 0.7084813113324344,
|
| 602 |
+
"rewards/margins": 9.689094841480255,
|
| 603 |
+
"rewards/rejected": -8.980613589286804,
|
| 604 |
"step": 33
|
| 605 |
},
|
| 606 |
{
|
| 607 |
+
"entropy": 1.058401882648468,
|
| 608 |
"epoch": 1.7044025157232703,
|
| 609 |
+
"grad_norm": 0.007354736328125,
|
| 610 |
"learning_rate": 2.25e-05,
|
| 611 |
+
"logits/chosen": -2.46378613488542,
|
| 612 |
+
"logits/rejected": -2.1071431291236653,
|
| 613 |
+
"logps/chosen": -125.6760368347168,
|
| 614 |
+
"logps/rejected": -258.0559844970703,
|
| 615 |
+
"loss": 0.00038854233571328223,
|
| 616 |
+
"mean_token_accuracy": 0.6303330659866333,
|
| 617 |
"num_tokens": 221051.0,
|
| 618 |
"rewards/accuracies": 1.0,
|
| 619 |
+
"rewards/chosen": 1.3429398629814386,
|
| 620 |
+
"rewards/margins": 10.128098785877228,
|
| 621 |
+
"rewards/rejected": -8.785158812999725,
|
| 622 |
"step": 34
|
| 623 |
},
|
| 624 |
{
|
| 625 |
+
"entropy": 1.1321996748447418,
|
| 626 |
"epoch": 1.7547169811320755,
|
| 627 |
+
"grad_norm": 0.0030670166015625,
|
| 628 |
"learning_rate": 2.1666666666666667e-05,
|
| 629 |
+
"logits/chosen": -2.3747636222967063,
|
| 630 |
+
"logits/rejected": -2.115993388323722,
|
| 631 |
+
"logps/chosen": -131.67159175872803,
|
| 632 |
+
"logps/rejected": -276.36437797546387,
|
| 633 |
+
"loss": 0.00012030061770929024,
|
| 634 |
+
"mean_token_accuracy": 0.6251056790351868,
|
| 635 |
"num_tokens": 226426.0,
|
| 636 |
"rewards/accuracies": 1.0,
|
| 637 |
+
"rewards/chosen": 0.8489564098417759,
|
| 638 |
+
"rewards/margins": 10.621768474578857,
|
| 639 |
+
"rewards/rejected": -9.772812008857727,
|
| 640 |
"step": 35
|
| 641 |
},
|
| 642 |
{
|
| 643 |
+
"entropy": 1.116398274898529,
|
| 644 |
"epoch": 1.8050314465408805,
|
| 645 |
+
"grad_norm": 0.0181884765625,
|
| 646 |
"learning_rate": 2.0833333333333336e-05,
|
| 647 |
+
"logits/chosen": -2.4568723077785557,
|
| 648 |
+
"logits/rejected": -1.9738618373268513,
|
| 649 |
+
"logps/chosen": -117.32646560668945,
|
| 650 |
+
"logps/rejected": -272.7527599334717,
|
| 651 |
+
"loss": 0.0008142158621922135,
|
| 652 |
+
"mean_token_accuracy": 0.6250453069806099,
|
| 653 |
"num_tokens": 233419.0,
|
| 654 |
"rewards/accuracies": 1.0,
|
| 655 |
+
"rewards/chosen": 0.3417433723807335,
|
| 656 |
+
"rewards/margins": 9.766010344028473,
|
| 657 |
+
"rewards/rejected": -9.424266993999481,
|
| 658 |
"step": 36
|
| 659 |
},
|
| 660 |
{
|
| 661 |
+
"entropy": 1.0965562090277672,
|
| 662 |
"epoch": 1.8553459119496856,
|
| 663 |
+
"grad_norm": 0.0027313232421875,
|
| 664 |
"learning_rate": 2e-05,
|
| 665 |
+
"logits/chosen": -2.3551532120785383,
|
| 666 |
+
"logits/rejected": -1.9812547363037738,
|
| 667 |
+
"logps/chosen": -144.9142713546753,
|
| 668 |
+
"logps/rejected": -296.3636951446533,
|
| 669 |
+
"loss": 0.00013589797890745103,
|
| 670 |
+
"mean_token_accuracy": 0.5917238146066666,
|
| 671 |
"num_tokens": 240673.0,
|
| 672 |
"rewards/accuracies": 1.0,
|
| 673 |
+
"rewards/chosen": -0.10223913192749023,
|
| 674 |
+
"rewards/margins": 10.127011775970459,
|
| 675 |
+
"rewards/rejected": -10.22925090789795,
|
| 676 |
"step": 37
|
| 677 |
},
|
| 678 |
{
|
| 679 |
+
"entropy": 1.088953472673893,
|
| 680 |
"epoch": 1.9056603773584906,
|
| 681 |
+
"grad_norm": 0.0032806396484375,
|
| 682 |
"learning_rate": 1.9166666666666667e-05,
|
| 683 |
+
"logits/chosen": -2.326897647347544,
|
| 684 |
+
"logits/rejected": -2.0923411183004155,
|
| 685 |
+
"logps/chosen": -141.62347412109375,
|
| 686 |
+
"logps/rejected": -285.8790645599365,
|
| 687 |
+
"loss": 0.0001322527095908299,
|
| 688 |
+
"mean_token_accuracy": 0.6075941771268845,
|
| 689 |
"num_tokens": 245974.0,
|
| 690 |
"rewards/accuracies": 1.0,
|
| 691 |
+
"rewards/chosen": 0.7096749469637871,
|
| 692 |
+
"rewards/margins": 10.769983649253845,
|
| 693 |
+
"rewards/rejected": -10.060308814048767,
|
| 694 |
"step": 38
|
| 695 |
},
|
| 696 |
{
|
| 697 |
+
"entropy": 1.1708777844905853,
|
| 698 |
"epoch": 1.9559748427672956,
|
| 699 |
+
"grad_norm": 0.003997802734375,
|
| 700 |
"learning_rate": 1.8333333333333333e-05,
|
| 701 |
+
"logits/chosen": -2.1918049696005726,
|
| 702 |
+
"logits/rejected": -1.7380488443097255,
|
| 703 |
+
"logps/chosen": -127.21480464935303,
|
| 704 |
+
"logps/rejected": -273.00823402404785,
|
| 705 |
+
"loss": 0.00020453613251447678,
|
| 706 |
+
"mean_token_accuracy": 0.6081509068608284,
|
| 707 |
"num_tokens": 253184.0,
|
| 708 |
"rewards/accuracies": 1.0,
|
| 709 |
+
"rewards/chosen": -0.09920912818051875,
|
| 710 |
+
"rewards/margins": 9.214752078056335,
|
| 711 |
+
"rewards/rejected": -9.313961207866669,
|
| 712 |
"step": 39
|
| 713 |
},
|
| 714 |
{
|
| 715 |
+
"entropy": 1.1617241672107153,
|
| 716 |
"epoch": 2.0,
|
| 717 |
+
"grad_norm": 0.0191650390625,
|
| 718 |
"learning_rate": 1.75e-05,
|
| 719 |
+
"logits/chosen": -2.2749118295696342,
|
| 720 |
+
"logits/rejected": -1.7584887991939784,
|
| 721 |
+
"logps/chosen": -154.0042724609375,
|
| 722 |
+
"logps/rejected": -292.14568001883373,
|
| 723 |
+
"loss": 0.0009170390549115837,
|
| 724 |
"mean_token_accuracy": 0.5644707764898028,
|
| 725 |
"num_tokens": 260000.0,
|
| 726 |
"rewards/accuracies": 1.0,
|
| 727 |
+
"rewards/chosen": -0.44687261964593616,
|
| 728 |
+
"rewards/margins": 9.13522618157523,
|
| 729 |
+
"rewards/rejected": -9.582098756517683,
|
| 730 |
"step": 40
|
| 731 |
},
|
| 732 |
{
|
| 733 |
+
"entropy": 1.1219489574432373,
|
| 734 |
"epoch": 2.050314465408805,
|
| 735 |
+
"grad_norm": 0.00396728515625,
|
| 736 |
"learning_rate": 1.6666666666666667e-05,
|
| 737 |
+
"logits/chosen": -2.3933433594531426,
|
| 738 |
+
"logits/rejected": -1.8782996338947386,
|
| 739 |
+
"logps/chosen": -121.1051197052002,
|
| 740 |
+
"logps/rejected": -273.1605224609375,
|
| 741 |
+
"loss": 0.00018580301548354328,
|
| 742 |
"mean_token_accuracy": 0.6062684953212738,
|
| 743 |
"num_tokens": 266060.0,
|
| 744 |
"rewards/accuracies": 1.0,
|
| 745 |
+
"rewards/chosen": 0.5098631188739091,
|
| 746 |
+
"rewards/margins": 9.81256103515625,
|
| 747 |
+
"rewards/rejected": -9.302697837352753,
|
| 748 |
"step": 41
|
| 749 |
},
|
| 750 |
{
|
| 751 |
+
"entropy": 1.142582356929779,
|
| 752 |
"epoch": 2.10062893081761,
|
| 753 |
+
"grad_norm": 0.00506591796875,
|
| 754 |
"learning_rate": 1.5833333333333333e-05,
|
| 755 |
+
"logits/chosen": -2.3932424547453,
|
| 756 |
+
"logits/rejected": -2.034844354918984,
|
| 757 |
+
"logps/chosen": -132.19951725006104,
|
| 758 |
+
"logps/rejected": -273.3875484466553,
|
| 759 |
+
"loss": 0.00027762039098888636,
|
| 760 |
+
"mean_token_accuracy": 0.6254889070987701,
|
| 761 |
"num_tokens": 272769.0,
|
| 762 |
"rewards/accuracies": 1.0,
|
| 763 |
+
"rewards/chosen": 0.027547355741262436,
|
| 764 |
+
"rewards/margins": 9.588396728038788,
|
| 765 |
+
"rewards/rejected": -9.560849368572235,
|
| 766 |
"step": 42
|
| 767 |
},
|
| 768 |
{
|
| 769 |
+
"entropy": 1.0831276252865791,
|
| 770 |
"epoch": 2.150943396226415,
|
| 771 |
+
"grad_norm": 0.0201416015625,
|
| 772 |
"learning_rate": 1.5e-05,
|
| 773 |
+
"logits/chosen": -2.3213323581687466,
|
| 774 |
+
"logits/rejected": -1.9188541617869985,
|
| 775 |
+
"logps/chosen": -140.96678638458252,
|
| 776 |
+
"logps/rejected": -298.01341819763184,
|
| 777 |
+
"loss": 0.0006403317674994469,
|
| 778 |
+
"mean_token_accuracy": 0.6212100088596344,
|
| 779 |
"num_tokens": 279603.0,
|
| 780 |
"rewards/accuracies": 1.0,
|
| 781 |
+
"rewards/chosen": -0.13022289611399174,
|
| 782 |
+
"rewards/margins": 10.482895195484161,
|
| 783 |
+
"rewards/rejected": -10.61311811208725,
|
| 784 |
"step": 43
|
| 785 |
},
|
| 786 |
{
|
| 787 |
+
"entropy": 1.1165131032466888,
|
| 788 |
"epoch": 2.20125786163522,
|
| 789 |
+
"grad_norm": 0.004119873046875,
|
| 790 |
"learning_rate": 1.4166666666666668e-05,
|
| 791 |
+
"logits/chosen": -2.325539811977018,
|
| 792 |
+
"logits/rejected": -1.819132333499079,
|
| 793 |
+
"logps/chosen": -137.1863489151001,
|
| 794 |
+
"logps/rejected": -280.5503120422363,
|
| 795 |
+
"loss": 0.00018718022329267114,
|
| 796 |
+
"mean_token_accuracy": 0.6205575913190842,
|
| 797 |
"num_tokens": 286436.0,
|
| 798 |
"rewards/accuracies": 1.0,
|
| 799 |
+
"rewards/chosen": 0.0954840648919344,
|
| 800 |
+
"rewards/margins": 9.663193047046661,
|
| 801 |
+
"rewards/rejected": -9.567708969116211,
|
| 802 |
"step": 44
|
| 803 |
},
|
| 804 |
{
|
| 805 |
+
"entropy": 1.0798147842288017,
|
| 806 |
"epoch": 2.251572327044025,
|
| 807 |
+
"grad_norm": 0.006195068359375,
|
| 808 |
"learning_rate": 1.3333333333333333e-05,
|
| 809 |
+
"logits/chosen": -2.462720523269614,
|
| 810 |
+
"logits/rejected": -1.9150426029399608,
|
| 811 |
+
"logps/chosen": -137.90959072113037,
|
| 812 |
+
"logps/rejected": -299.62617683410645,
|
| 813 |
+
"loss": 0.00032327763619832695,
|
| 814 |
+
"mean_token_accuracy": 0.6055086851119995,
|
| 815 |
"num_tokens": 294223.0,
|
| 816 |
"rewards/accuracies": 1.0,
|
| 817 |
+
"rewards/chosen": -0.0803464986383915,
|
| 818 |
+
"rewards/margins": 10.386663496494293,
|
| 819 |
+
"rewards/rejected": -10.467009961605072,
|
| 820 |
"step": 45
|
| 821 |
},
|
| 822 |
{
|
| 823 |
+
"entropy": 1.0406965985894203,
|
| 824 |
"epoch": 2.30188679245283,
|
| 825 |
+
"grad_norm": 0.00360107421875,
|
| 826 |
"learning_rate": 1.25e-05,
|
| 827 |
+
"logits/chosen": -2.351859813790029,
|
| 828 |
+
"logits/rejected": -2.079019230872164,
|
| 829 |
+
"logps/chosen": -114.20672798156738,
|
| 830 |
+
"logps/rejected": -270.77587890625,
|
| 831 |
+
"loss": 0.0001596831134520471,
|
| 832 |
+
"mean_token_accuracy": 0.6743896827101707,
|
| 833 |
"num_tokens": 299634.0,
|
| 834 |
"rewards/accuracies": 1.0,
|
| 835 |
+
"rewards/chosen": 1.2796257368754596,
|
| 836 |
+
"rewards/margins": 10.65468156337738,
|
| 837 |
+
"rewards/rejected": -9.375055730342865,
|
| 838 |
"step": 46
|
| 839 |
},
|
| 840 |
{
|
| 841 |
+
"entropy": 1.1319249421358109,
|
| 842 |
"epoch": 2.352201257861635,
|
| 843 |
+
"grad_norm": 0.00323486328125,
|
| 844 |
"learning_rate": 1.1666666666666668e-05,
|
| 845 |
+
"logits/chosen": -2.3348495432588066,
|
| 846 |
+
"logits/rejected": -1.8859447778318736,
|
| 847 |
+
"logps/chosen": -162.34913635253906,
|
| 848 |
+
"logps/rejected": -305.2253665924072,
|
| 849 |
+
"loss": 0.00012244780373293906,
|
| 850 |
+
"mean_token_accuracy": 0.5992361158132553,
|
| 851 |
"num_tokens": 305920.0,
|
| 852 |
"rewards/accuracies": 1.0,
|
| 853 |
+
"rewards/chosen": -0.0735319098457694,
|
| 854 |
+
"rewards/margins": 10.709958910942078,
|
| 855 |
+
"rewards/rejected": -10.783490896224976,
|
| 856 |
"step": 47
|
| 857 |
},
|
| 858 |
{
|
| 859 |
+
"entropy": 1.1012349873781204,
|
| 860 |
"epoch": 2.40251572327044,
|
| 861 |
+
"grad_norm": 0.00555419921875,
|
| 862 |
"learning_rate": 1.0833333333333334e-05,
|
| 863 |
+
"logits/chosen": -2.376434497997167,
|
| 864 |
+
"logits/rejected": -1.9649504169742804,
|
| 865 |
+
"logps/chosen": -134.29817581176758,
|
| 866 |
+
"logps/rejected": -269.4762592315674,
|
| 867 |
+
"loss": 0.00029120981344021857,
|
| 868 |
+
"mean_token_accuracy": 0.615195669233799,
|
| 869 |
"num_tokens": 313346.0,
|
| 870 |
"rewards/accuracies": 1.0,
|
| 871 |
+
"rewards/chosen": 0.04450349509716034,
|
| 872 |
+
"rewards/margins": 8.847583532333374,
|
| 873 |
+
"rewards/rejected": -8.803080201148987,
|
| 874 |
"step": 48
|
| 875 |
},
|
| 876 |
{
|
| 877 |
+
"entropy": 1.0822330936789513,
|
| 878 |
"epoch": 2.452830188679245,
|
| 879 |
+
"grad_norm": 0.004486083984375,
|
| 880 |
"learning_rate": 1e-05,
|
| 881 |
+
"logits/chosen": -2.4181823111443506,
|
| 882 |
+
"logits/rejected": -1.9432117626083543,
|
| 883 |
+
"logps/chosen": -139.22668647766113,
|
| 884 |
+
"logps/rejected": -286.98141860961914,
|
| 885 |
+
"loss": 0.00024342790129594505,
|
| 886 |
+
"mean_token_accuracy": 0.5807452276349068,
|
| 887 |
"num_tokens": 320662.0,
|
| 888 |
"rewards/accuracies": 1.0,
|
| 889 |
+
"rewards/chosen": -0.20274513261392713,
|
| 890 |
+
"rewards/margins": 9.553876638412476,
|
| 891 |
+
"rewards/rejected": -9.756621778011322,
|
| 892 |
"step": 49
|
| 893 |
},
|
| 894 |
{
|
| 895 |
+
"entropy": 1.1255912110209465,
|
| 896 |
"epoch": 2.50314465408805,
|
| 897 |
+
"grad_norm": 0.01043701171875,
|
| 898 |
"learning_rate": 9.166666666666666e-06,
|
| 899 |
+
"logits/chosen": -2.390428515839434,
|
| 900 |
+
"logits/rejected": -2.070867890248319,
|
| 901 |
+
"logps/chosen": -152.57851219177246,
|
| 902 |
+
"logps/rejected": -278.1038990020752,
|
| 903 |
+
"loss": 0.0005284082726575434,
|
| 904 |
+
"mean_token_accuracy": 0.5914618484675884,
|
| 905 |
"num_tokens": 326611.0,
|
| 906 |
"rewards/accuracies": 1.0,
|
| 907 |
+
"rewards/chosen": 0.24946272000670433,
|
| 908 |
+
"rewards/margins": 9.3264040350914,
|
| 909 |
+
"rewards/rejected": -9.076941430568695,
|
| 910 |
"step": 50
|
| 911 |
},
|
| 912 |
{
|
| 913 |
+
"entropy": 1.10995664447546,
|
| 914 |
"epoch": 2.5534591194968552,
|
| 915 |
+
"grad_norm": 0.005950927734375,
|
| 916 |
"learning_rate": 8.333333333333334e-06,
|
| 917 |
+
"logits/chosen": -2.3087954809493545,
|
| 918 |
+
"logits/rejected": -1.9792274410316972,
|
| 919 |
+
"logps/chosen": -116.59206485748291,
|
| 920 |
+
"logps/rejected": -267.238338470459,
|
| 921 |
+
"loss": 0.00025631688185967505,
|
| 922 |
+
"mean_token_accuracy": 0.625831313431263,
|
| 923 |
"num_tokens": 332814.0,
|
| 924 |
"rewards/accuracies": 1.0,
|
| 925 |
+
"rewards/chosen": 0.8696157680824399,
|
| 926 |
+
"rewards/margins": 10.124218106269836,
|
| 927 |
+
"rewards/rejected": -9.254602372646332,
|
| 928 |
"step": 51
|
| 929 |
},
|
| 930 |
{
|
| 931 |
+
"entropy": 1.117017239332199,
|
| 932 |
"epoch": 2.6037735849056602,
|
| 933 |
+
"grad_norm": 0.00152587890625,
|
| 934 |
"learning_rate": 7.5e-06,
|
| 935 |
+
"logits/chosen": -2.4137662915261817,
|
| 936 |
+
"logits/rejected": -1.9523699949115683,
|
| 937 |
+
"logps/chosen": -120.80679893493652,
|
| 938 |
+
"logps/rejected": -280.64561653137207,
|
| 939 |
+
"loss": 7.924844976514578e-05,
|
| 940 |
+
"mean_token_accuracy": 0.6106409505009651,
|
| 941 |
"num_tokens": 339547.0,
|
| 942 |
"rewards/accuracies": 1.0,
|
| 943 |
+
"rewards/chosen": 0.790530975908041,
|
| 944 |
+
"rewards/margins": 10.681683659553528,
|
| 945 |
+
"rewards/rejected": -9.891152799129486,
|
| 946 |
"step": 52
|
| 947 |
},
|
| 948 |
{
|
| 949 |
+
"entropy": 1.1054791137576103,
|
| 950 |
"epoch": 2.6540880503144653,
|
| 951 |
+
"grad_norm": 0.0164794921875,
|
| 952 |
"learning_rate": 6.666666666666667e-06,
|
| 953 |
+
"logits/chosen": -2.273507778205582,
|
| 954 |
+
"logits/rejected": -1.9109036313937824,
|
| 955 |
+
"logps/chosen": -115.94961833953857,
|
| 956 |
+
"logps/rejected": -269.0294952392578,
|
| 957 |
+
"loss": 0.0007658082176931202,
|
| 958 |
+
"mean_token_accuracy": 0.5990233793854713,
|
| 959 |
"num_tokens": 346647.0,
|
| 960 |
"rewards/accuracies": 1.0,
|
| 961 |
+
"rewards/chosen": 0.25752640701830387,
|
| 962 |
+
"rewards/margins": 9.45040088891983,
|
| 963 |
+
"rewards/rejected": -9.192874610424042,
|
| 964 |
"step": 53
|
| 965 |
},
|
| 966 |
{
|
| 967 |
+
"entropy": 1.103651024401188,
|
| 968 |
"epoch": 2.7044025157232703,
|
| 969 |
+
"grad_norm": 0.002593994140625,
|
| 970 |
"learning_rate": 5.833333333333334e-06,
|
| 971 |
+
"logits/chosen": -2.333249501998797,
|
| 972 |
+
"logits/rejected": -1.8058473440003249,
|
| 973 |
+
"logps/chosen": -152.4753885269165,
|
| 974 |
+
"logps/rejected": -301.55738639831543,
|
| 975 |
+
"loss": 0.00010592853504931554,
|
| 976 |
+
"mean_token_accuracy": 0.6078185737133026,
|
| 977 |
"num_tokens": 353155.0,
|
| 978 |
"rewards/accuracies": 1.0,
|
| 979 |
+
"rewards/chosen": 0.3159999940544367,
|
| 980 |
+
"rewards/margins": 10.630276560783386,
|
| 981 |
+
"rewards/rejected": -10.314276397228241,
|
| 982 |
"step": 54
|
| 983 |
},
|
| 984 |
{
|
| 985 |
+
"entropy": 1.160094790160656,
|
| 986 |
"epoch": 2.7547169811320753,
|
| 987 |
+
"grad_norm": 0.0015716552734375,
|
| 988 |
"learning_rate": 5e-06,
|
| 989 |
+
"logits/chosen": -2.3308505618468462,
|
| 990 |
+
"logits/rejected": -1.8965502590056609,
|
| 991 |
+
"logps/chosen": -141.07257843017578,
|
| 992 |
+
"logps/rejected": -296.07042503356934,
|
| 993 |
+
"loss": 6.342934648273513e-05,
|
| 994 |
+
"mean_token_accuracy": 0.6171156838536263,
|
| 995 |
"num_tokens": 358538.0,
|
| 996 |
"rewards/accuracies": 1.0,
|
| 997 |
+
"rewards/chosen": 0.7141566202044487,
|
| 998 |
+
"rewards/margins": 10.787114977836609,
|
| 999 |
+
"rewards/rejected": -10.072958290576935,
|
| 1000 |
"step": 55
|
| 1001 |
},
|
| 1002 |
{
|
| 1003 |
+
"entropy": 1.1058026999235153,
|
| 1004 |
"epoch": 2.8050314465408803,
|
| 1005 |
+
"grad_norm": 0.030517578125,
|
| 1006 |
"learning_rate": 4.166666666666667e-06,
|
| 1007 |
+
"logits/chosen": -2.322696784138965,
|
| 1008 |
+
"logits/rejected": -1.894479697226797,
|
| 1009 |
+
"logps/chosen": -121.85036373138428,
|
| 1010 |
+
"logps/rejected": -254.1634178161621,
|
| 1011 |
+
"loss": 0.00172812445089221,
|
| 1012 |
+
"mean_token_accuracy": 0.6225817948579788,
|
| 1013 |
"num_tokens": 364730.0,
|
| 1014 |
"rewards/accuracies": 1.0,
|
| 1015 |
+
"rewards/chosen": 0.10639754496514797,
|
| 1016 |
+
"rewards/margins": 8.579113006591797,
|
| 1017 |
+
"rewards/rejected": -8.472715437412262,
|
| 1018 |
"step": 56
|
| 1019 |
},
|
| 1020 |
{
|
| 1021 |
+
"entropy": 1.102943792939186,
|
| 1022 |
"epoch": 2.8553459119496853,
|
| 1023 |
+
"grad_norm": 0.00872802734375,
|
| 1024 |
"learning_rate": 3.3333333333333333e-06,
|
| 1025 |
+
"logits/chosen": -2.173708624672683,
|
| 1026 |
+
"logits/rejected": -1.970646926930764,
|
| 1027 |
+
"logps/chosen": -142.89206504821777,
|
| 1028 |
+
"logps/rejected": -268.407958984375,
|
| 1029 |
+
"loss": 0.00041806488297879696,
|
| 1030 |
+
"mean_token_accuracy": 0.5970962047576904,
|
| 1031 |
"num_tokens": 371209.0,
|
| 1032 |
"rewards/accuracies": 1.0,
|
| 1033 |
+
"rewards/chosen": 0.27814904414117336,
|
| 1034 |
+
"rewards/margins": 9.40092819929123,
|
| 1035 |
+
"rewards/rejected": -9.122779071331024,
|
| 1036 |
"step": 57
|
| 1037 |
},
|
| 1038 |
{
|
| 1039 |
+
"entropy": 1.1477283239364624,
|
| 1040 |
"epoch": 2.9056603773584904,
|
| 1041 |
+
"grad_norm": 0.00469970703125,
|
| 1042 |
"learning_rate": 2.5e-06,
|
| 1043 |
+
"logits/chosen": -2.293317013131777,
|
| 1044 |
+
"logits/rejected": -1.8812048808544868,
|
| 1045 |
+
"logps/chosen": -137.3569221496582,
|
| 1046 |
+
"logps/rejected": -291.4598960876465,
|
| 1047 |
+
"loss": 0.00018517834541853517,
|
| 1048 |
+
"mean_token_accuracy": 0.6110436543822289,
|
| 1049 |
"num_tokens": 378077.0,
|
| 1050 |
"rewards/accuracies": 1.0,
|
| 1051 |
+
"rewards/chosen": 0.33568347059190273,
|
| 1052 |
+
"rewards/margins": 10.355388283729553,
|
| 1053 |
+
"rewards/rejected": -10.01970487833023,
|
| 1054 |
"step": 58
|
| 1055 |
},
|
| 1056 |
{
|
| 1057 |
+
"entropy": 1.0955762341618538,
|
| 1058 |
"epoch": 2.9559748427672954,
|
| 1059 |
+
"grad_norm": 0.00193023681640625,
|
| 1060 |
"learning_rate": 1.6666666666666667e-06,
|
| 1061 |
+
"logits/chosen": -2.3105184309249727,
|
| 1062 |
+
"logits/rejected": -2.0821060341980577,
|
| 1063 |
+
"logps/chosen": -141.80660915374756,
|
| 1064 |
+
"logps/rejected": -279.77105140686035,
|
| 1065 |
+
"loss": 8.936067024478689e-05,
|
| 1066 |
+
"mean_token_accuracy": 0.6105019077658653,
|
| 1067 |
"num_tokens": 384115.0,
|
| 1068 |
"rewards/accuracies": 1.0,
|
| 1069 |
+
"rewards/chosen": 0.5417802967131138,
|
| 1070 |
+
"rewards/margins": 10.488943040370941,
|
| 1071 |
+
"rewards/rejected": -9.947162747383118,
|
| 1072 |
"step": 59
|
| 1073 |
},
|
| 1074 |
{
|
| 1075 |
+
"entropy": 1.0900011318070548,
|
| 1076 |
"epoch": 3.0,
|
| 1077 |
+
"grad_norm": 0.024169921875,
|
| 1078 |
"learning_rate": 8.333333333333333e-07,
|
| 1079 |
+
"logits/chosen": -2.332351866206725,
|
| 1080 |
+
"logits/rejected": -1.8031474708962778,
|
| 1081 |
+
"logps/chosen": -138.6800994873047,
|
| 1082 |
+
"logps/rejected": -265.439217703683,
|
| 1083 |
+
"loss": 0.0013465541414916515,
|
| 1084 |
+
"mean_token_accuracy": 0.6026440688541957,
|
| 1085 |
"num_tokens": 390000.0,
|
| 1086 |
"rewards/accuracies": 1.0,
|
| 1087 |
+
"rewards/chosen": -0.2070258174623762,
|
| 1088 |
+
"rewards/margins": 8.572459425245013,
|
| 1089 |
+
"rewards/rejected": -8.77948522567749,
|
| 1090 |
"step": 60
|
| 1091 |
}
|
| 1092 |
],
|