Ashira Pitchayapakayakul commited on
Commit
02e2084
·
1 Parent(s): e3077e1

v18: BASE_MODEL alias resolver — short names just work

Browse files

Before: BASE_MODEL=qwen-coder-7b silently tried to load HF repo
'qwen-coder-7b' and 404'd. Users had to know the full HF path.

After: short alias resolves to full path before passing to from_pretrained.
Any full HF repo path (no '/' check) still passes through unchanged, so the
fix is non-breaking. Logs the resolution so it's visible in the kernel run.

Files changed (1) hide show
  1. bin/kaggle-trainer.sh +26 -1
bin/kaggle-trainer.sh CHANGED
@@ -424,7 +424,32 @@ def pick_base_for_hardware():
424
 
425
 
426
  _auto_base, _auto_size = pick_base_for_hardware()
427
- BASE = os.environ.get("BASE_MODEL", _auto_base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
429
  EPOCHS = float(os.environ.get("EPOCHS", "1"))
430
 
 
424
 
425
 
426
  _auto_base, _auto_size = pick_base_for_hardware()
427
+ _BASE_ALIASES = {
428
+ # short-name → real HF path (matches BASE_SWAP_CANDIDATES dict below;
429
+ # resolved here so BASE_MODEL=qwen-coder-7b just works without users
430
+ # having to type the full HF repo path).
431
+ "qwen-coder-7b": "Qwen/Qwen2.5-Coder-7B-Instruct",
432
+ "qwen-coder-14b": "Qwen/Qwen2.5-Coder-14B-Instruct",
433
+ "qwen-coder-32b": "Qwen/Qwen2.5-Coder-32B-Instruct",
434
+ "granite-4.1-8b": "ibm-granite/granite-4.1-8B-base",
435
+ "olmoe-1b-7b": "allenai/OLMoE-1B-7B-0924-Instruct",
436
+ "qwen3-coder-7b": "Qwen/Qwen3-Coder-7B-Instruct",
437
+ "qwen3-coder-30b": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
438
+ "qwen3-7b-instruct": "Qwen/Qwen3-7B-Instruct",
439
+ "qwen3-8b-instruct": "Qwen/Qwen3-8B-Instruct",
440
+ "glm-4-9b-chat": "zai-org/glm-4-9b-chat",
441
+ "glm-4-9b-chat-1m": "zai-org/glm-4-9b-chat-1m",
442
+ "glm-4.1v-9b-think": "zai-org/GLM-4.1V-9B-Thinking",
443
+ "glm-4.7-flash": "zai-org/GLM-4.7-Flash",
444
+ "glm-4.5-air": "zai-org/GLM-4.5-Air-Base",
445
+ "glm-5": "zai-org/GLM-5",
446
+ "glm-5.1": "zai-org/GLM-5.1",
447
+ "glm-5.1-fp8": "zai-org/GLM-5.1-FP8",
448
+ }
449
+ _user_base = os.environ.get("BASE_MODEL", _auto_base)
450
+ BASE = _BASE_ALIASES.get(_user_base, _user_base) # alias OR full HF path
451
+ if _user_base != BASE:
452
+ print(f" resolved BASE_MODEL alias '{_user_base}' → '{BASE}'")
453
  MAX_SAMPLES = int(os.environ.get("MAX_SAMPLES", "100000"))
454
  EPOCHS = float(os.environ.get("EPOCHS", "1"))
455