Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul commited on
Commit ·
02e2084
1
Parent(s): e3077e1
v18: BASE_MODEL alias resolver — short names just work
Browse filesBefore: BASE_MODEL=qwen-coder-7b silently tried to load HF repo
'qwen-coder-7b' and 404'd. Users had to know the full HF path.
After: short alias resolves to full path before passing to from_pretrained.
Any full HF repo path (no '/' check) still passes through unchanged, so the
fix is non-breaking. Logs the resolution so it's visible in the kernel run.
- bin/kaggle-trainer.sh +26 -1
bin/kaggle-trainer.sh
CHANGED
|
@@ -424,7 +424,32 @@ def pick_base_for_hardware():
|
|
| 424 |
|
| 425 |
|
| 426 |
_auto_base, _auto_size = pick_base_for_hardware()
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
|
| 429 |
EPOCHS = float(os.environ.get("EPOCHS", "1"))
|
| 430 |
|
|
|
|
| 424 |
|
| 425 |
|
| 426 |
_auto_base, _auto_size = pick_base_for_hardware()
|
| 427 |
+
_BASE_ALIASES = {
|
| 428 |
+
# short-name → real HF path (matches BASE_SWAP_CANDIDATES dict below;
|
| 429 |
+
# resolved here so BASE_MODEL=qwen-coder-7b just works without users
|
| 430 |
+
# having to type the full HF repo path).
|
| 431 |
+
"qwen-coder-7b": "Qwen/Qwen2.5-Coder-7B-Instruct",
|
| 432 |
+
"qwen-coder-14b": "Qwen/Qwen2.5-Coder-14B-Instruct",
|
| 433 |
+
"qwen-coder-32b": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 434 |
+
"granite-4.1-8b": "ibm-granite/granite-4.1-8B-base",
|
| 435 |
+
"olmoe-1b-7b": "allenai/OLMoE-1B-7B-0924-Instruct",
|
| 436 |
+
"qwen3-coder-7b": "Qwen/Qwen3-Coder-7B-Instruct",
|
| 437 |
+
"qwen3-coder-30b": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
| 438 |
+
"qwen3-7b-instruct": "Qwen/Qwen3-7B-Instruct",
|
| 439 |
+
"qwen3-8b-instruct": "Qwen/Qwen3-8B-Instruct",
|
| 440 |
+
"glm-4-9b-chat": "zai-org/glm-4-9b-chat",
|
| 441 |
+
"glm-4-9b-chat-1m": "zai-org/glm-4-9b-chat-1m",
|
| 442 |
+
"glm-4.1v-9b-think": "zai-org/GLM-4.1V-9B-Thinking",
|
| 443 |
+
"glm-4.7-flash": "zai-org/GLM-4.7-Flash",
|
| 444 |
+
"glm-4.5-air": "zai-org/GLM-4.5-Air-Base",
|
| 445 |
+
"glm-5": "zai-org/GLM-5",
|
| 446 |
+
"glm-5.1": "zai-org/GLM-5.1",
|
| 447 |
+
"glm-5.1-fp8": "zai-org/GLM-5.1-FP8",
|
| 448 |
+
}
|
| 449 |
+
_user_base = os.environ.get("BASE_MODEL", _auto_base)
|
| 450 |
+
BASE = _BASE_ALIASES.get(_user_base, _user_base) # alias OR full HF path
|
| 451 |
+
if _user_base != BASE:
|
| 452 |
+
print(f" resolved BASE_MODEL alias '{_user_base}' → '{BASE}'")
|
| 453 |
MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
|
| 454 |
EPOCHS = float(os.environ.get("EPOCHS", "1"))
|
| 455 |
|