Spaces:
Paused
Paused
Commit ·
4419350
1
Parent(s): b1bd9cc
update
Browse files- plots/signals_breakdown.png +3 -0
- train-bhai-train +1 -0
- training/hf_run_space_train_job.sh +44 -0
- training/hf_run_train_grpo.sh +30 -0
plots/signals_breakdown.png
ADDED
|
Git LFS Details
|
train-bhai-train
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit ce11257dc34d8f8dbb36445cf834f89852539088
|
training/hf_run_space_train_job.sh
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Same environment as your HF Job (Space clone + nbconvert + upload to Space).
|
| 3 |
+
# Old UI command was invalid shell (no &&); this version is a proper chain.
|
| 4 |
+
#
|
| 5 |
+
# Requires: hf auth login (token is sent via --secrets HF_TOKEN from the CLI cache)
|
| 6 |
+
# Optional: HF_SPACE_REPO_ID (default vaibhavkhandare/train-bhai-train)
|
| 7 |
+
|
| 8 |
+
set -euo pipefail
|
| 9 |
+
|
| 10 |
+
IMAGE="${HF_JOB_IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime}"
|
| 11 |
+
FLAVOR="${HF_JOB_FLAVOR:-l4x1}"
|
| 12 |
+
TIMEOUT="${HF_JOB_TIMEOUT:-8h}"
|
| 13 |
+
SPACE_REPO="${HF_SPACE_REPO_ID:-vaibhavkhandare/train-bhai-train}"
|
| 14 |
+
NB_EXEC_TIMEOUT="${NB_EXEC_TIMEOUT:-3600}"
|
| 15 |
+
|
| 16 |
+
if ! hf auth whoami &>/dev/null; then
|
| 17 |
+
echo "Run: hf auth login" >&2
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
|
| 21 |
+
REMOTE_SCRIPT=$(cat <<'EOS'
|
| 22 |
+
set -euo pipefail
|
| 23 |
+
export DEBIAN_FRONTEND=noninteractive
|
| 24 |
+
apt-get update -qq && apt-get install -y --no-install-recommends git curl ca-certificates
|
| 25 |
+
pip install -q --root-user-action=ignore --upgrade "typing_extensions>=4.15.0" jupyter nbconvert nbclient ipykernel huggingface_hub
|
| 26 |
+
rm -rf /work
|
| 27 |
+
git clone --depth 1 "https://user:${HF_TOKEN}@huggingface.co/spaces/${SPACE_REPO}" /work
|
| 28 |
+
cd /work
|
| 29 |
+
jupyter nbconvert --to notebook --execute training/train_grpo.ipynb \
|
| 30 |
+
--output train_grpo.executed.ipynb \
|
| 31 |
+
--ExecutePreprocessor.timeout="${NB_EXEC_TIMEOUT}"
|
| 32 |
+
python -c "import os; from huggingface_hub import HfApi; HfApi().upload_folder(folder_path='.', path_in_repo='run-output', repo_id=os.environ['SPACE_REPO'], repo_type='space', allow_patterns=['training/train_grpo.executed.ipynb','plots/**','**/lora-*/**'])"
|
| 33 |
+
EOS
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
exec hf jobs run \
|
| 37 |
+
--flavor "$FLAVOR" \
|
| 38 |
+
--detach \
|
| 39 |
+
--timeout "$TIMEOUT" \
|
| 40 |
+
--secrets HF_TOKEN \
|
| 41 |
+
--env "SPACE_REPO=$SPACE_REPO" \
|
| 42 |
+
--env "NB_EXEC_TIMEOUT=$NB_EXEC_TIMEOUT" \
|
| 43 |
+
"$IMAGE" \
|
| 44 |
+
bash -lc "$REMOTE_SCRIPT"
|
training/hf_run_train_grpo.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Run train_grpo.ipynb on Hugging Face Jobs from your machine.
|
| 3 |
+
# Prereqs: hf auth login (or export HF_TOKEN for API + --secrets HF_TOKEN below)
|
| 4 |
+
#
|
| 5 |
+
# Optional — hf skills add (newer CLI only; do not upgrade global hf if you use transformers):
|
| 6 |
+
# uv venv .venv-hf && . .venv-hf/bin/activate && pip install -U 'huggingface_hub>=1.11' typer && hf skills add
|
| 7 |
+
|
| 8 |
+
set -euo pipefail
|
| 9 |
+
|
| 10 |
+
IMAGE="${HF_JOB_IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime}"
|
| 11 |
+
FLAVOR="${HF_JOB_FLAVOR:-l4x1}"
|
| 12 |
+
TIMEOUT="${HF_JOB_TIMEOUT:-8h}"
|
| 13 |
+
REPO_URL="${HF_REPO_URL:-https://github.com/VaibhavKhandare/viral-posts-env.git}"
|
| 14 |
+
REPO_BRANCH="${HF_REPO_BRANCH:-hack1}"
|
| 15 |
+
|
| 16 |
+
exec hf jobs run \
|
| 17 |
+
--flavor "$FLAVOR" \
|
| 18 |
+
--detach \
|
| 19 |
+
--timeout "$TIMEOUT" \
|
| 20 |
+
--env "REPO_URL=$REPO_URL" \
|
| 21 |
+
--env "REPO_BRANCH=$REPO_BRANCH" \
|
| 22 |
+
"$IMAGE" \
|
| 23 |
+
bash -lc 'set -euo pipefail
|
| 24 |
+
export DEBIAN_FRONTEND=noninteractive
|
| 25 |
+
apt-get update -qq && apt-get install -y --no-install-recommends git curl
|
| 26 |
+
rm -rf /work && git clone --depth 1 --branch "${REPO_BRANCH}" "${REPO_URL}" /work
|
| 27 |
+
cd /work
|
| 28 |
+
pip install -q --root-user-action=ignore jupyter nbconvert nbclient ipykernel
|
| 29 |
+
jupyter nbconvert --to notebook --execute training/train_grpo.ipynb \
|
| 30 |
+
--ExecutePreprocessor.timeout=86400 --inplace'
|