Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul commited on
Commit Β·
8aaeb2d
1
Parent(s): ec28ba1
v18: simplify hub naming to surrogate-1-{SIZE}B-v1.5 (owner directive)
Browse filesOwner wants the consistent ladder:
axentx/surrogate-1-7B-v1.3-polymath (existing 7B baseline β kept)
axentx/surrogate-1-9B-v1.5 (Qwen3.5-9B test, V18 stack)
axentx/surrogate-1-27B-v1.5 (Qwen3.6-27B if T4x2 fits)
axentx/surrogate-1-4B-v1.5 (Qwen3.5-4B fast iteration)
axentx/surrogate-1-35B-v1.5 (35B-A3B MoE, needs L40S+)
No '-coder' or '-lora' suffix in the path; size detected via regex on the
base model name (Qwen3.5-9B β '9B', Qwen3.6-35B-A3B β '35B' from leading
total-param number for MoE).
- bin/kaggle-trainer.sh +21 -22
bin/kaggle-trainer.sh
CHANGED
|
@@ -502,28 +502,27 @@ EPOCHS = float(os.environ.get("EPOCHS", "1"))
|
|
| 502 |
# v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4Γ2 β VALIDATED)
|
| 503 |
# v1.5 14B/32B + winning techniques
|
| 504 |
# v2 72B magnificent run (Civo $250, far future)
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
""
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
_default_hub =
|
| 525 |
-
|
| 526 |
-
# Backward-compat: keep existing v1.3-polymath path if base is original Qwen2.5-Coder-7B
|
| 527 |
if BASE == "Qwen/Qwen2.5-Coder-7B-Instruct":
|
| 528 |
_default_hub = "axentx/surrogate-1-7B-v1.3-polymath"
|
| 529 |
HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)
|
|
|
|
| 502 |
# v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4Γ2 β VALIDATED)
|
| 503 |
# v1.5 14B/32B + winning techniques
|
| 504 |
# v2 72B magnificent run (Civo $250, far future)
|
| 505 |
+
import re as _re_size
|
| 506 |
+
def _detect_base_size(hf_path: str) -> str:
|
| 507 |
+
"""Extract param-size tag from model name. Catches 7B, 9B, 27B, 1.5B, etc.
|
| 508 |
+
For MoE the leading total-param number is used (35B-A3B β '35B').
|
| 509 |
+
Returns the matched tag (incl. trailing 'B') or empty string."""
|
| 510 |
+
tail = hf_path.split("/", 1)[-1]
|
| 511 |
+
m = _re_size.search(r"(\d+(?:\.\d+)?B)", tail, _re_size.I)
|
| 512 |
+
return m.group(1).upper() if m else ""
|
| 513 |
+
|
| 514 |
+
# Naming convention (owner directive 2026-05-01):
|
| 515 |
+
# axentx/surrogate-1-{SIZE}B-v{VERSION}[-tag]
|
| 516 |
+
# Examples:
|
| 517 |
+
# Qwen2.5-Coder-7B-Instruct β axentx/surrogate-1-7B-v1.3-polymath (kept; existing baseline)
|
| 518 |
+
# Qwen3.5-9B β axentx/surrogate-1-9B-v1.5
|
| 519 |
+
# Qwen3.6-27B β axentx/surrogate-1-27B-v1.5
|
| 520 |
+
# Qwen3.5-4B β axentx/surrogate-1-4B-v1.5
|
| 521 |
+
# v1.5 = V18 stack (R6 datasets + Phases 78-96 wired). Bump to v1.6+ when
|
| 522 |
+
# specialty DoRA composition or merge recipes finalize.
|
| 523 |
+
_size_tag = _detect_base_size(BASE) or "unknown"
|
| 524 |
+
_default_hub = f"axentx/surrogate-1-{_size_tag}-v1.5"
|
| 525 |
+
# Backward-compat: keep existing v1.3-polymath path for the original Qwen2.5-Coder-7B baseline.
|
|
|
|
| 526 |
if BASE == "Qwen/Qwen2.5-Coder-7B-Instruct":
|
| 527 |
_default_hub = "axentx/surrogate-1-7B-v1.3-polymath"
|
| 528 |
HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)
|