Starred commited on
Training in progress, step 1000, checkpoint
Browse files- last-checkpoint/adapter_config.json +4 -4
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +92 -6
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/adapter_config.json
CHANGED
|
@@ -33,12 +33,12 @@
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
| 36 |
-
"gate_proj",
|
| 37 |
-
"k_proj",
|
| 38 |
-
"o_proj",
|
| 39 |
-
"down_proj",
|
| 40 |
"v_proj",
|
| 41 |
"q_proj",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
|
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"v_proj",
|
| 37 |
"q_proj",
|
| 38 |
+
"gate_proj",
|
| 39 |
+
"down_proj",
|
| 40 |
+
"o_proj",
|
| 41 |
+
"k_proj",
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 84972248
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7f16ebf88a0d95056235f2d36fa0dde45d635dd19fe373f7221414a1374b7f8
|
| 3 |
size 84972248
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 43434405
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:296ded756d0f414f7e445c370d0d925cf67f696f3ea8d13bca350f070ddf214c
|
| 3 |
size 43434405
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a01d50a1db8acb4ced09f825addc1c2a57ecdfe1a6e741863efa9e5c63d21121
|
| 3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f787fa81c178b3e54f8156ac38c6f8fbdc1bce6b672c879fefb79c4d09a69a02
|
| 3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0255635c2d75d2f3bb5934ec2e22bac7b97c15231ed8cbeda2a9e8b220185d9
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 125,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -278,6 +278,92 @@
|
|
| 278 |
"tokens_per_second": 368.35941630029333,
|
| 279 |
"tokens_per_step": 1787.9106666666667,
|
| 280 |
"total_tokens_seen": 1340933
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
}
|
| 282 |
],
|
| 283 |
"logging_steps": 50,
|
|
@@ -306,7 +392,7 @@
|
|
| 306 |
"attributes": {}
|
| 307 |
}
|
| 308 |
},
|
| 309 |
-
"total_flos":
|
| 310 |
"train_batch_size": 1,
|
| 311 |
"trial_name": null,
|
| 312 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 1000,
|
| 3 |
+
"best_metric": 0.26276224851608276,
|
| 4 |
+
"best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-1000",
|
| 5 |
+
"epoch": 0.4434343994235353,
|
| 6 |
"eval_steps": 125,
|
| 7 |
+
"global_step": 1000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 278 |
"tokens_per_second": 368.35941630029333,
|
| 279 |
"tokens_per_step": 1787.9106666666667,
|
| 280 |
"total_tokens_seen": 1340933
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"epoch": 0.3547475195388282,
|
| 284 |
+
"grad_norm": 0.5721789598464966,
|
| 285 |
+
"last_batch_tokens": 124,
|
| 286 |
+
"learning_rate": 7.231630894432527e-05,
|
| 287 |
+
"loss": 0.29953609466552733,
|
| 288 |
+
"lr": 7.22538412484033e-05,
|
| 289 |
+
"step": 800,
|
| 290 |
+
"tokens_per_second": 65.97096831279634,
|
| 291 |
+
"tokens_per_step": 98.35625,
|
| 292 |
+
"total_tokens_seen": 78685
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"epoch": 0.376919239510005,
|
| 296 |
+
"grad_norm": 0.4275953471660614,
|
| 297 |
+
"last_batch_tokens": 266,
|
| 298 |
+
"learning_rate": 6.914223011522581e-05,
|
| 299 |
+
"loss": 0.27611801147460935,
|
| 300 |
+
"lr": 6.907774584760349e-05,
|
| 301 |
+
"step": 850,
|
| 302 |
+
"tokens_per_second": 76.59339331072898,
|
| 303 |
+
"tokens_per_step": 183.97411764705882,
|
| 304 |
+
"total_tokens_seen": 156378
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.38800509949559336,
|
| 308 |
+
"eval_loss": 0.28222641348838806,
|
| 309 |
+
"eval_runtime": 113.424,
|
| 310 |
+
"eval_samples_per_second": 3.2,
|
| 311 |
+
"eval_steps_per_second": 1.605,
|
| 312 |
+
"last_batch_tokens": 172,
|
| 313 |
+
"lr": 6.745388997609773e-05,
|
| 314 |
+
"step": 875,
|
| 315 |
+
"tokens_per_second": 114.49594753151979,
|
| 316 |
+
"tokens_per_step": 258.8742857142857,
|
| 317 |
+
"total_tokens_seen": 226515
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.39909095948118173,
|
| 321 |
+
"grad_norm": 0.5093332529067993,
|
| 322 |
+
"last_batch_tokens": 209,
|
| 323 |
+
"learning_rate": 6.587497507323132e-05,
|
| 324 |
+
"loss": 0.26179553985595705,
|
| 325 |
+
"lr": 6.580878811582379e-05,
|
| 326 |
+
"step": 900,
|
| 327 |
+
"tokens_per_second": 82.29563477689274,
|
| 328 |
+
"tokens_per_step": 298.55555555555554,
|
| 329 |
+
"total_tokens_seen": 268700
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"epoch": 0.4212626794523585,
|
| 333 |
+
"grad_norm": 0.3912750482559204,
|
| 334 |
+
"last_batch_tokens": 103,
|
| 335 |
+
"learning_rate": 6.253044742254792e-05,
|
| 336 |
+
"loss": 0.25117488861083986,
|
| 337 |
+
"lr": 6.246287994523805e-05,
|
| 338 |
+
"step": 950,
|
| 339 |
+
"tokens_per_second": 79.79549481684828,
|
| 340 |
+
"tokens_per_step": 366.02947368421053,
|
| 341 |
+
"total_tokens_seen": 347728
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"epoch": 0.4434343994235353,
|
| 345 |
+
"grad_norm": 0.4664643406867981,
|
| 346 |
+
"last_batch_tokens": 203,
|
| 347 |
+
"learning_rate": 5.9124926897487534e-05,
|
| 348 |
+
"loss": 0.25925636291503906,
|
| 349 |
+
"lr": 5.9056307789940357e-05,
|
| 350 |
+
"step": 1000,
|
| 351 |
+
"tokens_per_second": 76.53280228762407,
|
| 352 |
+
"tokens_per_step": 422.387,
|
| 353 |
+
"total_tokens_seen": 422387
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.4434343994235353,
|
| 357 |
+
"eval_loss": 0.26276224851608276,
|
| 358 |
+
"eval_runtime": 95.1275,
|
| 359 |
+
"eval_samples_per_second": 3.816,
|
| 360 |
+
"eval_steps_per_second": 1.913,
|
| 361 |
+
"last_batch_tokens": 172,
|
| 362 |
+
"lr": 5.9056307789940357e-05,
|
| 363 |
+
"step": 1000,
|
| 364 |
+
"tokens_per_second": 337.8095566509732,
|
| 365 |
+
"tokens_per_step": 454.525,
|
| 366 |
+
"total_tokens_seen": 454525
|
| 367 |
}
|
| 368 |
],
|
| 369 |
"logging_steps": 50,
|
|
|
|
| 392 |
"attributes": {}
|
| 393 |
}
|
| 394 |
},
|
| 395 |
+
"total_flos": 7.329721439433523e+16,
|
| 396 |
"train_batch_size": 1,
|
| 397 |
"trial_name": null,
|
| 398 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c39bdfd581a6de355d794e4165fae9336ff85f4eeb9590da60305570e0139f34
|
| 3 |
size 5841
|