Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul commited on
Commit Β·
ec28ba1
1
Parent(s): 4e9d4f7
v18: HUB_ID base-aware + T4 FP8 guard
Browse filesTwo compat fixes for base-model swap:
1. HUB_MODEL_ID was hardcoded by detected SIZE (7B β v1.3-polymath path).
Setting BASE_MODEL=qwen3.5-9b would silently overwrite the v1.3 Qwen2.5
adapter on Hub. Now derives a short base tag and embeds it in the path:
Qwen2.5-Coder-7B-Instruct β axentx/surrogate-1-7B-v1.3-polymath (kept)
Qwen3.5-9B β axentx/surrogate-1-qwen3.5-9b-lora-v1.6
Qwen3.6-27B β axentx/surrogate-1-qwen3.6-27b-lora-v1.6
2. T4 (SM 7.5) cannot run FP8. Pre-quantized FP8 bases (e.g. Qwen3.6-27B-FP8)
crash on load or dequant silently. Trainer now detects SM<9 + 'fp8' in
path and drops the -FP8 suffix to load raw BF16 weights instead.
- bin/kaggle-trainer.sh +43 -9
bin/kaggle-trainer.sh
CHANGED
|
@@ -478,20 +478,54 @@ _user_base = os.environ.get("BASE_MODEL", _auto_base)
|
|
| 478 |
BASE = _BASE_ALIASES.get(_user_base, _user_base) # alias OR full HF path
|
| 479 |
if _user_base != BASE:
|
| 480 |
print(f" resolved BASE_MODEL alias '{_user_base}' β '{BASE}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
|
| 482 |
EPOCHS = float(os.environ.get("EPOCHS", "1"))
|
| 483 |
|
| 484 |
-
# HUB_MODEL_ID auto-suffixes by detected size unless explicitly set.
|
| 485 |
-
#
|
|
|
|
|
|
|
|
|
|
| 486 |
# v1 7B + minimal LoRA (existing baseline, on Hub)
|
| 487 |
-
# v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4Γ2 β
|
| 488 |
-
# v1.5 14B/32B + winning techniques
|
| 489 |
# v2 72B magnificent run (Civo $250, far future)
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)
|
| 496 |
# seq_len auto-shrinks for smaller hardware budget
|
| 497 |
_default_seq = {32.0: 2048, 14.0: 4096, 7.0: 8192}.get(_auto_size, 2048)
|
|
|
|
| 478 |
BASE = _BASE_ALIASES.get(_user_base, _user_base) # alias OR full HF path
|
| 479 |
if _user_base != BASE:
|
| 480 |
print(f" resolved BASE_MODEL alias '{_user_base}' β '{BASE}'")
|
| 481 |
+
|
| 482 |
+
# V18 hardware-vs-base sanity check. T4 (SM 7.5) cannot execute FP8 ops,
|
| 483 |
+
# so any pre-quantized FP8 base will either crash on load or silently
|
| 484 |
+
# dequantize to BF16 with severe perf penalty. GPTQ-Int4 is fine on T4.
|
| 485 |
+
if torch.cuda.is_available():
|
| 486 |
+
_sm = torch.cuda.get_device_capability(0)
|
| 487 |
+
if _sm[0] < 9 and "fp8" in BASE.lower():
|
| 488 |
+
print(f" β FP8 base '{BASE}' on SM {_sm[0]}.{_sm[1]} (T4=7.5, A100=8.0).")
|
| 489 |
+
print(f" FP8 needs Hopper (H100, SM 9.0) or Ada (L40, SM 8.9).")
|
| 490 |
+
print(f" Dropping '-FP8' suffix and loading raw BF16 weights instead.")
|
| 491 |
+
_alt = BASE.replace("-FP8", "").replace("-fp8", "")
|
| 492 |
+
BASE = _alt
|
| 493 |
MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
|
| 494 |
EPOCHS = float(os.environ.get("EPOCHS", "1"))
|
| 495 |
|
| 496 |
+
# HUB_MODEL_ID auto-suffixes by detected size + base family unless explicitly set.
|
| 497 |
+
# V18 fix (2026-04-30): adapters from different bases CANNOT load on each other
|
| 498 |
+
# (LoRA shapes are arch-locked). Embedding the base family in the hub path
|
| 499 |
+
# prevents Qwen3.5-9B run from overwriting Qwen2.5-Coder-7B v1.3 baseline.
|
| 500 |
+
# Strategy ladder per owner 2026-05-01:
|
| 501 |
# v1 7B + minimal LoRA (existing baseline, on Hub)
|
| 502 |
+
# v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4Γ2 β VALIDATED)
|
| 503 |
+
# v1.5 14B/32B + winning techniques
|
| 504 |
# v2 72B magnificent run (Civo $250, far future)
|
| 505 |
+
def _base_shortname(hf_path: str) -> str:
|
| 506 |
+
"""Map HF repo path β short tag for hub-id suffixing.
|
| 507 |
+
Examples:
|
| 508 |
+
Qwen/Qwen2.5-Coder-7B-Instruct β qwen2.5-coder-7b
|
| 509 |
+
Qwen/Qwen3.5-9B β qwen3.5-9b
|
| 510 |
+
Qwen/Qwen3.6-27B β qwen3.6-27b
|
| 511 |
+
zai-org/glm-4-9b-chat β glm-4-9b
|
| 512 |
+
"""
|
| 513 |
+
tail = hf_path.split("/", 1)[-1].lower()
|
| 514 |
+
for kill in ("-instruct", "-chat", "-base", "-fp8", "-int4", "-gptq"):
|
| 515 |
+
tail = tail.replace(kill, "")
|
| 516 |
+
return tail.replace("--", "-").strip("-")
|
| 517 |
+
|
| 518 |
+
_base_short = _base_shortname(BASE)
|
| 519 |
+
_default_hub_by_size = {
|
| 520 |
+
32.0: f"axentx/surrogate-1-{_base_short}-lora-v1.6",
|
| 521 |
+
14.0: f"axentx/surrogate-1-{_base_short}-lora-v1.6",
|
| 522 |
+
7.0: f"axentx/surrogate-1-{_base_short}-lora-v1.6",
|
| 523 |
+
}
|
| 524 |
+
_default_hub = _default_hub_by_size.get(_auto_size,
|
| 525 |
+
f"axentx/surrogate-1-{_base_short}-lora-v1.6")
|
| 526 |
+
# Backward-compat: keep existing v1.3-polymath path if base is original Qwen2.5-Coder-7B
|
| 527 |
+
if BASE == "Qwen/Qwen2.5-Coder-7B-Instruct":
|
| 528 |
+
_default_hub = "axentx/surrogate-1-7B-v1.3-polymath"
|
| 529 |
HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)
|
| 530 |
# seq_len auto-shrinks for smaller hardware budget
|
| 531 |
_default_seq = {32.0: 2048, 14.0: 4096, 7.0: 8192}.get(_auto_size, 2048)
|