Spaces:

agentDebugger
/

AgentDebugger-training-v3

Running

shank commited on 12 days ago

Commit

5d0b2d4

1 Parent(s): 2005cd2

fix: empty requirements.txt, install training deps at runtime

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -1,16 +1,3 @@
-# ── Training dependencies ──────────────────────────────────────────────────────
-# Fully pinned to a pre-validated compatible set.
-# NOTES:
-#   - gradio is injected by HF Spaces automatically — do NOT add it here
-#   - wandb is excluded: it conflicts with gradio over click versioning
-#     (wandb>=0.18 requires click!=8.0.0,>=7.1 but gradio requires click>=8.1)
-#     wandb is initialized at runtime by the training script if available
-datasets==3.0.2
-transformers==4.46.3
-accelerate==1.0.1
-trl==0.14.0
-peft==0.13.2
-bitsandbytes==0.43.3
-mergekit==0.1.4
-pydantic==2.10.6

+# Training packages are installed at runtime by training/train_grpo.py
+# This file is intentionally minimal to avoid pip dependency conflicts
+# during HF Spaces Docker builds (gradio's pinned deps conflict with ML stack)

training/train_grpo.py CHANGED Viewed

@@ -37,27 +37,33 @@ parser.add_argument("--resume", type=str, default=None, help="Path to checkpoint
 parser.add_argument("--max_steps", type=int, default=500)
 args = parser.parse_args()
-# ── Optional dependency bootstrap (disabled by default in Spaces) ─────────────
-# Runtime installs with loose versions caused repeated breakages from version drift.
-# Keep this opt-in for fresh Colab notebooks only.
-if os.environ.get("FORCE_BOOTSTRAP_DEPS") == "1":
-    os.system(
-        f"{sys.executable} -m pip install -q "
-        "wandb==0.18.7 datasets==3.0.2 transformers==4.46.3 "
-        "accelerate==1.0.1 trl==0.14.0 bitsandbytes==0.43.3 peft==0.13.2"
     )
 # ── GPU/training imports (skipped in --test-local mode) ───────────────────────
 if not args.test_local:
-    # wandb is not in requirements.txt (conflicts with gradio over click versioning)
-    # Install it at runtime before importing
-    try:
-        import wandb
-    except ImportError:
-        os.system(f"{sys.executable} -m pip install -q 'wandb>=0.18.0'")
-        import wandb
     import torch
     from datasets import Dataset
     from transformers import (
         AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainerCallback

 parser.add_argument("--max_steps", type=int, default=500)
 args = parser.parse_args()
+# ── Runtime dependency install ─────────────────────────────────────────────────
+# requirements.txt is empty to avoid pip conflicts during HF Spaces Docker build.
+# We install all training deps here, at runtime, after gradio is already up.
+if not args.test_local:
+    _TRAIN_DEPS = [
+        "wandb==0.18.7",
+        "datasets==3.0.2",
+        "transformers==4.46.3",
+        "accelerate==1.0.1",
+        "trl==0.14.0",
+        "peft==0.13.2",
+        "bitsandbytes==0.43.3",
+        "mergekit==0.1.4",
+        "pydantic==2.10.6",
+    ]
+    print("Installing training dependencies...", flush=True)
+    ret = os.system(
+        f"{sys.executable} -m pip install -q --no-cache-dir " + " ".join(f'"{d}"' for d in _TRAIN_DEPS)
     )
+    if ret != 0:
+        print("WARNING: pip install returned non-zero exit. Continuing anyway...", flush=True)
+    print("Dependencies installed.", flush=True)
 # ── GPU/training imports (skipped in --test-local mode) ───────────────────────
 if not args.test_local:
     import torch
+    import wandb
     from datasets import Dataset
     from transformers import (
         AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainerCallback