polyguard-openenv / scripts /deploy_training_space.py
TheJackBright's picture
Deploy PolyGuard OpenEnv Space
877add7 verified
#!/usr/bin/env python3
"""Create/update the private Hugging Face Space used for PolyGuard training."""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import shutil
import sys
from huggingface_hub import HfApi
ROOT = Path(__file__).resolve().parents[1]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Deploy the PolyGuard remote training Space.")
parser.add_argument("--repo-id", default="TheJackBright/polyguard-openenv-training")
parser.add_argument("--artifact-repo-id", default="TheJackBright/polyguard-openenv-training-artifacts")
parser.add_argument("--model-id", default="Qwen/Qwen2.5-0.5B-Instruct")
parser.add_argument("--hardware", default="t4-small")
parser.add_argument("--sleep-time", type=int, default=3600)
parser.add_argument("--bundle-dir", default="/tmp/polyguard-openenv-training-space")
parser.add_argument("--public", action="store_true")
parser.add_argument("--skip-upload", action="store_true")
parser.add_argument("--bundle-only", action="store_true")
return parser.parse_args()
def _ignore(_dir: str, names: list[str]) -> set[str]:
ignored = {
".git",
".venv",
"__pycache__",
".pytest_cache",
".mypy_cache",
".ruff_cache",
"outputs",
"checkpoints",
"polyguard_rl.egg-info",
"dist",
"build",
}
return {
name
for name in names
if name in ignored
or name.endswith(".pyc")
or name == "node_modules"
or name == ".DS_Store"
}
def build_bundle(bundle_dir: Path) -> None:
if bundle_dir.exists():
shutil.rmtree(bundle_dir)
shutil.copytree(ROOT, bundle_dir, ignore=_ignore)
shutil.copy2(ROOT / "app" / "hf_space" / "Dockerfile", bundle_dir / "Dockerfile")
project_readme = bundle_dir / "PROJECT_README.md"
if (bundle_dir / "README.md").exists():
(bundle_dir / "README.md").replace(project_readme)
(bundle_dir / "README.md").write_text(
"""---
title: PolyGuard HF Training
sdk: docker
app_port: 7860
pinned: false
---
# PolyGuard HF Training
Private Docker Space for running PolyGuard SFT/GRPO training on Hugging Face hardware.
The original project README is included as `PROJECT_README.md`.
""",
encoding="utf-8",
)
def main() -> None:
args = parse_args()
bundle_dir = Path(args.bundle_dir)
build_bundle(bundle_dir)
if args.bundle_only:
print(f"bundle_dir={bundle_dir}")
return
token = os.getenv("HF_TOKEN")
api = HfApi(token=token)
whoami = api.whoami(token=token)
username = str(whoami.get("name") or whoami.get("fullname") or "")
if username and not args.repo_id.startswith(f"{username}/"):
print(f"[deploy_training_space] authenticated as {username}; target={args.repo_id}")
space_variables = [
{"key": "POLYGUARD_MODEL_ID", "value": args.model_id},
{"key": "POLYGUARD_OFFLINE_MODE", "value": "false"},
{"key": "POLYGUARD_AUTORUN", "value": "1"},
{"key": "POLYGUARD_ARTIFACT_REPO_ID", "value": args.artifact_repo_id},
{"key": "POLYGUARD_SPACE_REPO_ID", "value": args.repo_id},
]
space_secrets = [{"key": "HF_TOKEN", "value": token}] if token else None
api.create_repo(repo_id=args.artifact_repo_id, repo_type="model", private=True, exist_ok=True)
api.create_repo(
repo_id=args.repo_id,
repo_type="space",
space_sdk="docker",
private=not args.public,
exist_ok=True,
space_hardware=args.hardware,
space_sleep_time=args.sleep_time,
space_variables=space_variables,
space_secrets=space_secrets,
)
for variable in space_variables:
api.add_space_variable(repo_id=args.repo_id, key=variable["key"], value=variable["value"])
if token:
api.add_space_secret(repo_id=args.repo_id, key="HF_TOKEN", value=token)
if not args.skip_upload:
api.upload_folder(
repo_id=args.repo_id,
repo_type="space",
folder_path=str(bundle_dir),
commit_message="Deploy PolyGuard HF training Space",
ignore_patterns=[
".git/*",
".venv/*",
"**/node_modules/*",
"outputs/*",
"checkpoints/*",
"**/__pycache__/*",
"*.pyc",
],
)
try:
api.request_space_hardware(repo_id=args.repo_id, hardware=args.hardware, sleep_time=args.sleep_time)
except Exception as exc: # noqa: BLE001
print(f"hardware_request_warning={exc}", file=sys.stderr)
print(f"space_url=https://huggingface.co/spaces/{args.repo_id}")
print(f"artifact_repo=https://huggingface.co/{args.artifact_repo_id}")
print(f"bundle_dir={bundle_dir}")
if __name__ == "__main__":
main()