Spaces:
Sleeping
Sleeping
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "unsloth", | |
| # "trl==0.24.0", | |
| # "transformers", | |
| # "datasets", | |
| # "peft", | |
| # "accelerate", | |
| # "bitsandbytes", | |
| # "wandb", | |
| # "setuptools", | |
| # "wheel", | |
| # "pip", | |
| # "scipy>=1.10,<2.0", | |
| # "sympy>=1.12,<2.0", | |
| # "pydantic>=2.5,<3.0", | |
| # "numpy>=1.24,<3.0", | |
| # "openenv-core[core]>=0.2.2", | |
| # "huggingface_hub>=0.24,<1.0", | |
| # "matplotlib>=3.7,<4.0", | |
| # ] | |
| # /// | |
| """PhysiX RLVR single-system training job — damped_spring only. | |
| Identical pipeline to job_train.py (SFT warm-start → GRPO) but scoped | |
| to a single physical system (damped_spring) so the reward signal is | |
| maximally focused and easy to observe as a clean increasing curve. | |
| Deploy with: | |
| hf jobs uv run job_train_single.py \ | |
| --image unsloth/unsloth:2026.3.8-pt2.9.0-vllm-0.16.0-cu12.8-studio-release \ | |
| --flavor l40sx1 \ | |
| --secrets HF_TOKEN \ | |
| --secrets WANDB_API_KEY \ | |
| -v hf://datasets/Pratyush-01/physix-live-src:/physix-live \ | |
| --timeout 2h | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| SYSTEM_ID = "damped_spring" | |
| PROFILE: dict = { | |
| "base_model": "Qwen/Qwen2.5-3B-Instruct", | |
| "sft_lora_r": "32", | |
| "grpo_lora_r": "32", | |
| "sft_lr": "1.5e-5", | |
| "grpo_lr": "3e-6", | |
| "sft_epochs": "3", | |
| "num_steps": "200", | |
| "num_generations": "4", | |
| "max_completion": "256", | |
| # Separate repos so this run never touches the 3-system checkpoints. | |
| "hub_final_repo": "Pratyush-01/physix-3b-rl-damped", | |
| "hub_ckpt_repo": "Pratyush-01/physix-3b-rl-damped-ckpt", | |
| "sft_run_name": "physix-sft-3b-damped", | |
| "grpo_run_name": "physix-grpo-3b-damped", | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Environment hardening (same as job_train.py) | |
| # --------------------------------------------------------------------------- | |
| def _harden_env() -> None: | |
| os.environ.setdefault("USER", "physix") | |
| os.environ.setdefault("LOGNAME", "physix") | |
| os.environ.setdefault("HOME", "/tmp/home") | |
| os.environ.setdefault("HF_HOME", "/tmp/hf_cache") | |
| os.environ.setdefault("TORCHINDUCTOR_CACHE_DIR", "/tmp/torchinductor_cache") | |
| os.environ.setdefault("TRITON_CACHE_DIR", "/tmp/triton_cache") | |
| os.environ.setdefault("XDG_CACHE_HOME", "/tmp/xdg-cache") | |
| os.environ.setdefault("WANDB_DIR", "/tmp/wandb") | |
| os.environ.setdefault("WANDB_CACHE_DIR", "/tmp/wandb-cache") | |
| os.environ.setdefault("WANDB_DATA_DIR", "/tmp/wandb-data") | |
| os.environ.setdefault("WANDB_ARTIFACT_DIR", "/tmp/wandb-artifacts") | |
| os.environ.setdefault("WANDB_CONFIG_DIR", "/tmp/wandb-config") | |
| os.environ.setdefault("WANDB_DISABLE_ARTIFACTS", "true") | |
| os.environ.setdefault("WANDB_LOG_MODEL", "false") | |
| os.environ.setdefault("WANDB_PROJECT", "physix-live") | |
| os.environ.setdefault("UNSLOTH_COMPILE_DISABLE", "1") | |
| os.environ.setdefault("TORCH_COMPILE_DISABLE", "1") | |
| os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1") | |
| os.environ.setdefault("TORCHDYNAMO_DISABLE", "1") | |
| os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") | |
| os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") | |
| os.environ.setdefault("PYTHONUNBUFFERED", "1") | |
| if os.environ.get("HF_TOKEN"): | |
| os.environ.setdefault("HUGGINGFACE_HUB_TOKEN", os.environ["HF_TOKEN"]) | |
| for d in ( | |
| os.environ["HOME"], | |
| os.environ["HF_HOME"], | |
| os.environ["TORCHINDUCTOR_CACHE_DIR"], | |
| os.environ["TRITON_CACHE_DIR"], | |
| os.environ["XDG_CACHE_HOME"], | |
| os.environ["WANDB_DIR"], | |
| os.environ["WANDB_CACHE_DIR"], | |
| os.environ["WANDB_DATA_DIR"], | |
| os.environ["WANDB_ARTIFACT_DIR"], | |
| os.environ["WANDB_CONFIG_DIR"], | |
| ): | |
| Path(d).mkdir(parents=True, exist_ok=True) | |
| def _banner(msg: str) -> None: | |
| line = "=" * 72 | |
| print(f"\n{line}\n {msg}\n{line}", flush=True) | |
| def _run(cmd: list[str], *, env: dict | None = None) -> None: | |
| print(f"$ {' '.join(cmd)}", flush=True) | |
| subprocess.run(cmd, check=True, env=env or os.environ.copy()) | |
| def _require(name: str) -> str: | |
| val = os.environ.get(name) | |
| if not val: | |
| sys.exit(f"FATAL: required secret {name!r} is not set on the job") | |
| return val | |
| def _stage_physix_live() -> Path: | |
| src = Path("/physix-live") | |
| if not src.exists(): | |
| sys.exit( | |
| "FATAL: expected physix-live source mounted at /physix-live. " | |
| "Pass `-v hf://datasets/<user>/physix-live-src:/physix-live` " | |
| "when submitting the job." | |
| ) | |
| dst = Path("/tmp/src/physix-live") | |
| if dst.exists(): | |
| shutil.rmtree(dst) | |
| dst.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.copytree(src, dst) | |
| return dst | |
| def _install_physix(repo: Path) -> None: | |
| install_args = ["--no-cache-dir", "-e", str(repo), "--no-deps"] | |
| try: | |
| _run(["uv", "pip", "install", "--python", sys.executable, *install_args]) | |
| return | |
| except (subprocess.CalledProcessError, FileNotFoundError) as exc: | |
| print(f"[install] uv pip path failed ({exc!r}); bootstrapping pip via ensurepip", flush=True) | |
| _run([sys.executable, "-m", "ensurepip", "--upgrade"]) | |
| _run([sys.executable, "-m", "pip", "install", *install_args]) | |
| def _sanity_check_imports() -> None: | |
| print("--- Sanity import check ---", flush=True) | |
| code = ( | |
| "import torch, trl, transformers, datasets, wandb, unsloth, physix; " | |
| "print(f'torch={torch.__version__} cuda={torch.cuda.is_available()} " | |
| "device={torch.cuda.get_device_name(0) if torch.cuda.is_available() else None}'); " | |
| "print(f'unsloth={unsloth.__version__} trl={trl.__version__} " | |
| "transformers={transformers.__version__} datasets={datasets.__version__}'); " | |
| "print(f'physix loaded from {physix.__file__}'); " | |
| "assert trl.__version__ == '0.24.0', f'trl must be pinned to 0.24.0, got {trl.__version__}'" | |
| ) | |
| _run([sys.executable, "-c", code]) | |
| def _gpu_check() -> None: | |
| print("--- GPU check ---", flush=True) | |
| try: | |
| subprocess.run(["nvidia-smi"], check=True) | |
| except FileNotFoundError: | |
| sys.exit("FATAL: nvidia-smi missing — job hardware is not GPU") | |
| # --------------------------------------------------------------------------- | |
| # SFT + GRPO steps, each locked to SYSTEM_ID | |
| # --------------------------------------------------------------------------- | |
| def _run_sft() -> None: | |
| p = PROFILE | |
| _banner(f"Step 1/2: SFT warm-start ({p['base_model']}) — system: {SYSTEM_ID}") | |
| _run([ | |
| sys.executable, "-m", "physix.training.sft", | |
| "--model", p["base_model"], | |
| "--output-dir", "/tmp/physix-sft-damped", | |
| "--epochs", p["sft_epochs"], | |
| "--instances-per-system", "32", | |
| "--system-ids", SYSTEM_ID, | |
| "--lora-r", p["sft_lora_r"], | |
| "--learning-rate", p["sft_lr"], | |
| "--wandb-run-name", p["sft_run_name"], | |
| "--hub-checkpoint-repo-id", p["hub_ckpt_repo"], | |
| "--seed", "0", | |
| ]) | |
| def _run_grpo() -> None: | |
| p = PROFILE | |
| _banner(f"Step 2/2: GRPO RLVR ({p['num_steps']} steps) — system: {SYSTEM_ID}") | |
| _run([ | |
| sys.executable, "-m", "physix.training.loop", | |
| "--model", p["base_model"], | |
| "--output-dir", "/tmp/physix-grpo-damped", | |
| "--num-steps", p["num_steps"], | |
| "--num-generations", p["num_generations"], | |
| "--max-completion-length", p["max_completion"], | |
| "--learning-rate", p["grpo_lr"], | |
| "--instances-per-system", "32", | |
| "--system-ids", SYSTEM_ID, | |
| "--lora-r", p["grpo_lora_r"], | |
| "--save-method", "merged_16bit", | |
| "--push-to-hub", | |
| "--hub-repo-id", p["hub_final_repo"], | |
| "--hub-checkpoint-repo-id", p["hub_ckpt_repo"], | |
| "--wandb-project", "physix-live", | |
| "--wandb-run-name", p["grpo_run_name"], | |
| "--sft-checkpoint", "/tmp/physix-sft-damped/merged", | |
| "--seed", "0", | |
| ]) | |
| def main() -> None: | |
| _harden_env() | |
| _banner( | |
| f"PhysiX RLVR single-system job\n" | |
| f" system: {SYSTEM_ID}\n" | |
| f" model: {PROFILE['base_model']}\n" | |
| f" steps: {PROFILE['num_steps']}\n" | |
| f" wandb: physix-live / {PROFILE['grpo_run_name']}" | |
| ) | |
| _require("HF_TOKEN") | |
| _require("WANDB_API_KEY") | |
| _gpu_check() | |
| repo = _stage_physix_live() | |
| _install_physix(repo) | |
| _sanity_check_imports() | |
| _run_sft() | |
| _run_grpo() | |
| _banner("DONE") | |
| print( | |
| f"System trained on → {SYSTEM_ID}\n" | |
| f"Final model → https://huggingface.co/{PROFILE['hub_final_repo']}\n" | |
| f"Checkpoints → https://huggingface.co/{PROFILE['hub_ckpt_repo']}\n" | |
| f"W&B project → https://wandb.ai/pratyush01/physix-live\n", | |
| flush=True, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |