# /// script # requires-python = ">=3.10" # dependencies = [ # "unsloth", # "trl==0.24.0", # "transformers", # "datasets", # "peft", # "accelerate", # "bitsandbytes", # "wandb", # "setuptools", # "wheel", # "pip", # "scipy>=1.10,<2.0", # "sympy>=1.12,<2.0", # "pydantic>=2.5,<3.0", # "numpy>=1.24,<3.0", # "openenv-core[core]>=0.2.2", # "huggingface_hub>=0.24,<1.0", # "matplotlib>=3.7,<4.0", # ] # /// """PhysiX RLVR single-system training job — damped_spring only. Identical pipeline to job_train.py (SFT warm-start → GRPO) but scoped to a single physical system (damped_spring) so the reward signal is maximally focused and easy to observe as a clean increasing curve. Deploy with: hf jobs uv run job_train_single.py \ --image unsloth/unsloth:2026.3.8-pt2.9.0-vllm-0.16.0-cu12.8-studio-release \ --flavor l40sx1 \ --secrets HF_TOKEN \ --secrets WANDB_API_KEY \ -v hf://datasets/Pratyush-01/physix-live-src:/physix-live \ --timeout 2h """ from __future__ import annotations import os import shutil import subprocess import sys from pathlib import Path SYSTEM_ID = "damped_spring" PROFILE: dict = { "base_model": "Qwen/Qwen2.5-3B-Instruct", "sft_lora_r": "32", "grpo_lora_r": "32", "sft_lr": "1.5e-5", "grpo_lr": "3e-6", "sft_epochs": "3", "num_steps": "200", "num_generations": "4", "max_completion": "256", # Separate repos so this run never touches the 3-system checkpoints. "hub_final_repo": "Pratyush-01/physix-3b-rl-damped", "hub_ckpt_repo": "Pratyush-01/physix-3b-rl-damped-ckpt", "sft_run_name": "physix-sft-3b-damped", "grpo_run_name": "physix-grpo-3b-damped", } # --------------------------------------------------------------------------- # Environment hardening (same as job_train.py) # --------------------------------------------------------------------------- def _harden_env() -> None: os.environ.setdefault("USER", "physix") os.environ.setdefault("LOGNAME", "physix") os.environ.setdefault("HOME", "/tmp/home") os.environ.setdefault("HF_HOME", "/tmp/hf_cache") os.environ.setdefault("TORCHINDUCTOR_CACHE_DIR", "/tmp/torchinductor_cache") os.environ.setdefault("TRITON_CACHE_DIR", "/tmp/triton_cache") os.environ.setdefault("XDG_CACHE_HOME", "/tmp/xdg-cache") os.environ.setdefault("WANDB_DIR", "/tmp/wandb") os.environ.setdefault("WANDB_CACHE_DIR", "/tmp/wandb-cache") os.environ.setdefault("WANDB_DATA_DIR", "/tmp/wandb-data") os.environ.setdefault("WANDB_ARTIFACT_DIR", "/tmp/wandb-artifacts") os.environ.setdefault("WANDB_CONFIG_DIR", "/tmp/wandb-config") os.environ.setdefault("WANDB_DISABLE_ARTIFACTS", "true") os.environ.setdefault("WANDB_LOG_MODEL", "false") os.environ.setdefault("WANDB_PROJECT", "physix-live") os.environ.setdefault("UNSLOTH_COMPILE_DISABLE", "1") os.environ.setdefault("TORCH_COMPILE_DISABLE", "1") os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1") os.environ.setdefault("TORCHDYNAMO_DISABLE", "1") os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") os.environ.setdefault("PYTHONUNBUFFERED", "1") if os.environ.get("HF_TOKEN"): os.environ.setdefault("HUGGINGFACE_HUB_TOKEN", os.environ["HF_TOKEN"]) for d in ( os.environ["HOME"], os.environ["HF_HOME"], os.environ["TORCHINDUCTOR_CACHE_DIR"], os.environ["TRITON_CACHE_DIR"], os.environ["XDG_CACHE_HOME"], os.environ["WANDB_DIR"], os.environ["WANDB_CACHE_DIR"], os.environ["WANDB_DATA_DIR"], os.environ["WANDB_ARTIFACT_DIR"], os.environ["WANDB_CONFIG_DIR"], ): Path(d).mkdir(parents=True, exist_ok=True) def _banner(msg: str) -> None: line = "=" * 72 print(f"\n{line}\n {msg}\n{line}", flush=True) def _run(cmd: list[str], *, env: dict | None = None) -> None: print(f"$ {' '.join(cmd)}", flush=True) subprocess.run(cmd, check=True, env=env or os.environ.copy()) def _require(name: str) -> str: val = os.environ.get(name) if not val: sys.exit(f"FATAL: required secret {name!r} is not set on the job") return val def _stage_physix_live() -> Path: src = Path("/physix-live") if not src.exists(): sys.exit( "FATAL: expected physix-live source mounted at /physix-live. " "Pass `-v hf://datasets//physix-live-src:/physix-live` " "when submitting the job." ) dst = Path("/tmp/src/physix-live") if dst.exists(): shutil.rmtree(dst) dst.parent.mkdir(parents=True, exist_ok=True) shutil.copytree(src, dst) return dst def _install_physix(repo: Path) -> None: install_args = ["--no-cache-dir", "-e", str(repo), "--no-deps"] try: _run(["uv", "pip", "install", "--python", sys.executable, *install_args]) return except (subprocess.CalledProcessError, FileNotFoundError) as exc: print(f"[install] uv pip path failed ({exc!r}); bootstrapping pip via ensurepip", flush=True) _run([sys.executable, "-m", "ensurepip", "--upgrade"]) _run([sys.executable, "-m", "pip", "install", *install_args]) def _sanity_check_imports() -> None: print("--- Sanity import check ---", flush=True) code = ( "import torch, trl, transformers, datasets, wandb, unsloth, physix; " "print(f'torch={torch.__version__} cuda={torch.cuda.is_available()} " "device={torch.cuda.get_device_name(0) if torch.cuda.is_available() else None}'); " "print(f'unsloth={unsloth.__version__} trl={trl.__version__} " "transformers={transformers.__version__} datasets={datasets.__version__}'); " "print(f'physix loaded from {physix.__file__}'); " "assert trl.__version__ == '0.24.0', f'trl must be pinned to 0.24.0, got {trl.__version__}'" ) _run([sys.executable, "-c", code]) def _gpu_check() -> None: print("--- GPU check ---", flush=True) try: subprocess.run(["nvidia-smi"], check=True) except FileNotFoundError: sys.exit("FATAL: nvidia-smi missing — job hardware is not GPU") # --------------------------------------------------------------------------- # SFT + GRPO steps, each locked to SYSTEM_ID # --------------------------------------------------------------------------- def _run_sft() -> None: p = PROFILE _banner(f"Step 1/2: SFT warm-start ({p['base_model']}) — system: {SYSTEM_ID}") _run([ sys.executable, "-m", "physix.training.sft", "--model", p["base_model"], "--output-dir", "/tmp/physix-sft-damped", "--epochs", p["sft_epochs"], "--instances-per-system", "32", "--system-ids", SYSTEM_ID, "--lora-r", p["sft_lora_r"], "--learning-rate", p["sft_lr"], "--wandb-run-name", p["sft_run_name"], "--hub-checkpoint-repo-id", p["hub_ckpt_repo"], "--seed", "0", ]) def _run_grpo() -> None: p = PROFILE _banner(f"Step 2/2: GRPO RLVR ({p['num_steps']} steps) — system: {SYSTEM_ID}") _run([ sys.executable, "-m", "physix.training.loop", "--model", p["base_model"], "--output-dir", "/tmp/physix-grpo-damped", "--num-steps", p["num_steps"], "--num-generations", p["num_generations"], "--max-completion-length", p["max_completion"], "--learning-rate", p["grpo_lr"], "--instances-per-system", "32", "--system-ids", SYSTEM_ID, "--lora-r", p["grpo_lora_r"], "--save-method", "merged_16bit", "--push-to-hub", "--hub-repo-id", p["hub_final_repo"], "--hub-checkpoint-repo-id", p["hub_ckpt_repo"], "--wandb-project", "physix-live", "--wandb-run-name", p["grpo_run_name"], "--sft-checkpoint", "/tmp/physix-sft-damped/merged", "--seed", "0", ]) def main() -> None: _harden_env() _banner( f"PhysiX RLVR single-system job\n" f" system: {SYSTEM_ID}\n" f" model: {PROFILE['base_model']}\n" f" steps: {PROFILE['num_steps']}\n" f" wandb: physix-live / {PROFILE['grpo_run_name']}" ) _require("HF_TOKEN") _require("WANDB_API_KEY") _gpu_check() repo = _stage_physix_live() _install_physix(repo) _sanity_check_imports() _run_sft() _run_grpo() _banner("DONE") print( f"System trained on → {SYSTEM_ID}\n" f"Final model → https://huggingface.co/{PROFILE['hub_final_repo']}\n" f"Checkpoints → https://huggingface.co/{PROFILE['hub_ckpt_repo']}\n" f"W&B project → https://wandb.ai/pratyush01/physix-live\n", flush=True, ) if __name__ == "__main__": main()