vaibhav12332112312 commited on
Commit
4419350
·
1 Parent(s): b1bd9cc
plots/signals_breakdown.png ADDED

Git LFS Details

  • SHA256: 969f5a44266316dc0519a90df9190db758c388d438d16a880a349c12b4f42dd8
  • Pointer size: 130 Bytes
  • Size of remote file: 40.4 kB
train-bhai-train ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit ce11257dc34d8f8dbb36445cf834f89852539088
training/hf_run_space_train_job.sh ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Same environment as your HF Job (Space clone + nbconvert + upload to Space).
3
+ # Old UI command was invalid shell (no &&); this version is a proper chain.
4
+ #
5
+ # Requires: hf auth login (token is sent via --secrets HF_TOKEN from the CLI cache)
6
+ # Optional: HF_SPACE_REPO_ID (default vaibhavkhandare/train-bhai-train)
7
+
8
+ set -euo pipefail
9
+
10
+ IMAGE="${HF_JOB_IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime}"
11
+ FLAVOR="${HF_JOB_FLAVOR:-l4x1}"
12
+ TIMEOUT="${HF_JOB_TIMEOUT:-8h}"
13
+ SPACE_REPO="${HF_SPACE_REPO_ID:-vaibhavkhandare/train-bhai-train}"
14
+ NB_EXEC_TIMEOUT="${NB_EXEC_TIMEOUT:-3600}"
15
+
16
+ if ! hf auth whoami &>/dev/null; then
17
+ echo "Run: hf auth login" >&2
18
+ exit 1
19
+ fi
20
+
21
+ REMOTE_SCRIPT=$(cat <<'EOS'
22
+ set -euo pipefail
23
+ export DEBIAN_FRONTEND=noninteractive
24
+ apt-get update -qq && apt-get install -y --no-install-recommends git curl ca-certificates
25
+ pip install -q --root-user-action=ignore --upgrade "typing_extensions>=4.15.0" jupyter nbconvert nbclient ipykernel huggingface_hub
26
+ rm -rf /work
27
+ git clone --depth 1 "https://user:${HF_TOKEN}@huggingface.co/spaces/${SPACE_REPO}" /work
28
+ cd /work
29
+ jupyter nbconvert --to notebook --execute training/train_grpo.ipynb \
30
+ --output train_grpo.executed.ipynb \
31
+ --ExecutePreprocessor.timeout="${NB_EXEC_TIMEOUT}"
32
+ python -c "import os; from huggingface_hub import HfApi; HfApi().upload_folder(folder_path='.', path_in_repo='run-output', repo_id=os.environ['SPACE_REPO'], repo_type='space', allow_patterns=['training/train_grpo.executed.ipynb','plots/**','**/lora-*/**'])"
33
+ EOS
34
+ )
35
+
36
+ exec hf jobs run \
37
+ --flavor "$FLAVOR" \
38
+ --detach \
39
+ --timeout "$TIMEOUT" \
40
+ --secrets HF_TOKEN \
41
+ --env "SPACE_REPO=$SPACE_REPO" \
42
+ --env "NB_EXEC_TIMEOUT=$NB_EXEC_TIMEOUT" \
43
+ "$IMAGE" \
44
+ bash -lc "$REMOTE_SCRIPT"
training/hf_run_train_grpo.sh ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Run train_grpo.ipynb on Hugging Face Jobs from your machine.
3
+ # Prereqs: hf auth login (or export HF_TOKEN for API + --secrets HF_TOKEN below)
4
+ #
5
+ # Optional — hf skills add (newer CLI only; do not upgrade global hf if you use transformers):
6
+ # uv venv .venv-hf && . .venv-hf/bin/activate && pip install -U 'huggingface_hub>=1.11' typer && hf skills add
7
+
8
+ set -euo pipefail
9
+
10
+ IMAGE="${HF_JOB_IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime}"
11
+ FLAVOR="${HF_JOB_FLAVOR:-l4x1}"
12
+ TIMEOUT="${HF_JOB_TIMEOUT:-8h}"
13
+ REPO_URL="${HF_REPO_URL:-https://github.com/VaibhavKhandare/viral-posts-env.git}"
14
+ REPO_BRANCH="${HF_REPO_BRANCH:-hack1}"
15
+
16
+ exec hf jobs run \
17
+ --flavor "$FLAVOR" \
18
+ --detach \
19
+ --timeout "$TIMEOUT" \
20
+ --env "REPO_URL=$REPO_URL" \
21
+ --env "REPO_BRANCH=$REPO_BRANCH" \
22
+ "$IMAGE" \
23
+ bash -lc 'set -euo pipefail
24
+ export DEBIAN_FRONTEND=noninteractive
25
+ apt-get update -qq && apt-get install -y --no-install-recommends git curl
26
+ rm -rf /work && git clone --depth 1 --branch "${REPO_BRANCH}" "${REPO_URL}" /work
27
+ cd /work
28
+ pip install -q --root-user-action=ignore jupyter nbconvert nbclient ipykernel
29
+ jupyter nbconvert --to notebook --execute training/train_grpo.ipynb \
30
+ --ExecutePreprocessor.timeout=86400 --inplace'