fix: sys.path + early forgeenv import to survive uv-run env
Browse files
scripts/jobs/train_repair_agent.py
CHANGED
|
@@ -63,6 +63,10 @@ _sh([
|
|
| 63 |
f"https://USER:{HF_TOKEN}@huggingface.co/{SOURCE_REPO}",
|
| 64 |
str(src_dir),
|
| 65 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
step("1. pip install + verify GPU")
|
| 68 |
_sh([sys.executable, "-m", "pip", "install", "-e", str(src_dir)])
|
|
@@ -87,7 +91,7 @@ if torch.cuda.is_available():
|
|
| 87 |
flush=True,
|
| 88 |
)
|
| 89 |
|
| 90 |
-
step("2. ping live env Space")
|
| 91 |
import requests # noqa: E402
|
| 92 |
|
| 93 |
try:
|
|
@@ -96,6 +100,13 @@ try:
|
|
| 96 |
except Exception as e: # noqa: BLE001
|
| 97 |
print(f"[job] WARN: env ping failed: {e}", flush=True)
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
step("3. SFT: load Qwen + LoRA via Unsloth, train on warm-start pairs")
|
| 100 |
from unsloth import FastLanguageModel # noqa: E402
|
| 101 |
|
|
|
|
| 63 |
f"https://USER:{HF_TOKEN}@huggingface.co/{SOURCE_REPO}",
|
| 64 |
str(src_dir),
|
| 65 |
])
|
| 66 |
+
# Belt-and-braces: prepend the source dir to sys.path so `import forgeenv`
|
| 67 |
+
# works even if `pip install -e` doesn't persist inside the uv-managed
|
| 68 |
+
# venv. We still run pip install for any setuptools side-effects.
|
| 69 |
+
sys.path.insert(0, str(src_dir))
|
| 70 |
|
| 71 |
step("1. pip install + verify GPU")
|
| 72 |
_sh([sys.executable, "-m", "pip", "install", "-e", str(src_dir)])
|
|
|
|
| 91 |
flush=True,
|
| 92 |
)
|
| 93 |
|
| 94 |
+
step("2. ping live env Space + verify forgeenv import")
|
| 95 |
import requests # noqa: E402
|
| 96 |
|
| 97 |
try:
|
|
|
|
| 100 |
except Exception as e: # noqa: BLE001
|
| 101 |
print(f"[job] WARN: env ping failed: {e}", flush=True)
|
| 102 |
|
| 103 |
+
# Fail fast if forgeenv isn't on the path -- much cheaper to discover
|
| 104 |
+
# this here than after 8+ minutes of SFT.
|
| 105 |
+
import forgeenv # noqa: F401, E402
|
| 106 |
+
from forgeenv.training.grpo_repair import run_grpo # noqa: F401, E402
|
| 107 |
+
|
| 108 |
+
print("[job] forgeenv import OK", flush=True)
|
| 109 |
+
|
| 110 |
step("3. SFT: load Qwen + LoRA via Unsloth, train on warm-start pairs")
|
| 111 |
from unsloth import FastLanguageModel # noqa: E402
|
| 112 |
|