File size: 1,244 Bytes
4419350
 
 
 
 
 
 
 
 
 
 
 
 
ad48770
4419350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env bash
# Run train_grpo.ipynb on Hugging Face Jobs from your machine.
# Prereqs: hf auth login  (or export HF_TOKEN for API + --secrets HF_TOKEN below)
#
# Optional — hf skills add (newer CLI only; do not upgrade global hf if you use transformers):
#   uv venv .venv-hf && . .venv-hf/bin/activate && pip install -U 'huggingface_hub>=1.11' typer && hf skills add

set -euo pipefail

IMAGE="${HF_JOB_IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime}"
FLAVOR="${HF_JOB_FLAVOR:-l4x1}"
TIMEOUT="${HF_JOB_TIMEOUT:-8h}"
REPO_URL="${HF_REPO_URL:-https://github.com/VaibhavKhandare/viral-posts-env.git}"
REPO_BRANCH="${HF_REPO_BRANCH:-main}"

exec hf jobs run \
  --flavor "$FLAVOR" \
  --detach \
  --timeout "$TIMEOUT" \
  --env "REPO_URL=$REPO_URL" \
  --env "REPO_BRANCH=$REPO_BRANCH" \
  "$IMAGE" \
  bash -lc 'set -euo pipefail
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq && apt-get install -y --no-install-recommends git curl
rm -rf /work && git clone --depth 1 --branch "${REPO_BRANCH}" "${REPO_URL}" /work
cd /work
pip install -q --root-user-action=ignore jupyter nbconvert nbclient ipykernel
jupyter nbconvert --to notebook --execute training/train_grpo.ipynb \
  --ExecutePreprocessor.timeout=86400 --inplace'