Spaces:

axentx
/

surrogate-1

Runtime error

Ashira Pitchayapakayakul commited on 7 days ago

Commit

8aaeb2d

1 Parent(s): ec28ba1

v18: simplify hub naming to surrogate-1-{SIZE}B-v1.5 (owner directive)

Owner wants the consistent ladder:
axentx/surrogate-1-7B-v1.3-polymath (existing 7B baseline — kept)
axentx/surrogate-1-9B-v1.5 (Qwen3.5-9B test, V18 stack)
axentx/surrogate-1-27B-v1.5 (Qwen3.6-27B if T4x2 fits)
axentx/surrogate-1-4B-v1.5 (Qwen3.5-4B fast iteration)
axentx/surrogate-1-35B-v1.5 (35B-A3B MoE, needs L40S+)

No '-coder' or '-lora' suffix in the path; size detected via regex on the
base model name (Qwen3.5-9B → '9B', Qwen3.6-35B-A3B → '35B' from leading
total-param number for MoE).

Files changed (1) hide show

bin/kaggle-trainer.sh +21 -22

bin/kaggle-trainer.sh CHANGED Viewed

@@ -502,28 +502,27 @@ EPOCHS = float(os.environ.get("EPOCHS", "1"))
 #   v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4×2 — VALIDATED)
 #   v1.5          14B/32B + winning techniques
 #   v2            72B magnificent run (Civo $250, far future)
-def _base_shortname(hf_path: str) -> str:
-    """Map HF repo path → short tag for hub-id suffixing.
-    Examples:
-      Qwen/Qwen2.5-Coder-7B-Instruct → qwen2.5-coder-7b
-      Qwen/Qwen3.5-9B                → qwen3.5-9b
-      Qwen/Qwen3.6-27B               → qwen3.6-27b
-      zai-org/glm-4-9b-chat          → glm-4-9b
-    """
-    tail = hf_path.split("/", 1)[-1].lower()
-    for kill in ("-instruct", "-chat", "-base", "-fp8", "-int4", "-gptq"):
-        tail = tail.replace(kill, "")
-    return tail.replace("--", "-").strip("-")
-_base_short = _base_shortname(BASE)
-_default_hub_by_size = {
-    32.0: f"axentx/surrogate-1-{_base_short}-lora-v1.6",
-    14.0: f"axentx/surrogate-1-{_base_short}-lora-v1.6",
-    7.0:  f"axentx/surrogate-1-{_base_short}-lora-v1.6",
-}
-_default_hub = _default_hub_by_size.get(_auto_size,
-    f"axentx/surrogate-1-{_base_short}-lora-v1.6")
-# Backward-compat: keep existing v1.3-polymath path if base is original Qwen2.5-Coder-7B
 if BASE == "Qwen/Qwen2.5-Coder-7B-Instruct":
     _default_hub = "axentx/surrogate-1-7B-v1.3-polymath"
 HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)

 #   v1.1-extended 7B + FULL R1-12 + EXTENDED stack (Kaggle T4×2 — VALIDATED)
 #   v1.5          14B/32B + winning techniques
 #   v2            72B magnificent run (Civo $250, far future)
+import re as _re_size
+def _detect_base_size(hf_path: str) -> str:
+    """Extract param-size tag from model name. Catches 7B, 9B, 27B, 1.5B, etc.
+    For MoE the leading total-param number is used (35B-A3B → '35B').
+    Returns the matched tag (incl. trailing 'B') or empty string."""
+    tail = hf_path.split("/", 1)[-1]
+    m = _re_size.search(r"(\d+(?:\.\d+)?B)", tail, _re_size.I)
+    return m.group(1).upper() if m else ""
+# Naming convention (owner directive 2026-05-01):
+#   axentx/surrogate-1-{SIZE}B-v{VERSION}[-tag]
+# Examples:
+#   Qwen2.5-Coder-7B-Instruct → axentx/surrogate-1-7B-v1.3-polymath  (kept; existing baseline)
+#   Qwen3.5-9B                → axentx/surrogate-1-9B-v1.5
+#   Qwen3.6-27B               → axentx/surrogate-1-27B-v1.5
+#   Qwen3.5-4B                → axentx/surrogate-1-4B-v1.5
+# v1.5 = V18 stack (R6 datasets + Phases 78-96 wired). Bump to v1.6+ when
+# specialty DoRA composition or merge recipes finalize.
+_size_tag = _detect_base_size(BASE) or "unknown"
+_default_hub = f"axentx/surrogate-1-{_size_tag}-v1.5"
+# Backward-compat: keep existing v1.3-polymath path for the original Qwen2.5-Coder-7B baseline.
 if BASE == "Qwen/Qwen2.5-Coder-7B-Instruct":
     _default_hub = "axentx/surrogate-1-7B-v1.3-polymath"
 HUB_ID = os.environ.get("HUB_MODEL_ID", _default_hub)