Draken1606 commited on
Commit
409ca4d
·
1 Parent(s): 3f2e418
training/UndertriAI_GRPO_Training.ipynb CHANGED
@@ -1,17 +1,4 @@
1
  {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "gpuType": "T4"
8
- },
9
- "kernelspec": {
10
- "name": "python3",
11
- "display_name": "Python 3"
12
- },
13
- "accelerator": "GPU"
14
- },
15
  "cells": [
16
  {
17
  "cell_type": "markdown",
@@ -1940,5 +1927,18 @@
1940
  " )\n"
1941
  ]
1942
  }
1943
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1944
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "cells": [
3
  {
4
  "cell_type": "markdown",
 
1927
  " )\n"
1928
  ]
1929
  }
1930
+ ],
1931
+ "metadata": {
1932
+ "accelerator": "GPU",
1933
+ "colab": {
1934
+ "gpuType": "T4",
1935
+ "provenance": []
1936
+ },
1937
+ "kernelspec": {
1938
+ "display_name": "Python 3",
1939
+ "name": "python3"
1940
+ }
1941
+ },
1942
+ "nbformat": 4,
1943
+ "nbformat_minor": 0
1944
  }
training/run_hf_job.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "unsloth @ git+https://github.com/unslothai/unsloth.git",
5
+ # "unsloth_zoo",
6
+ # "trl>=0.11.0",
7
+ # "peft",
8
+ # "accelerate",
9
+ # "bitsandbytes",
10
+ # "xformers",
11
+ # "torchvision",
12
+ # "sentencepiece",
13
+ # "protobuf",
14
+ # "einops",
15
+ # "hf_transfer",
16
+ # "datasets",
17
+ # "wandb",
18
+ # "matplotlib",
19
+ # ]
20
+ # ///
21
+ """
22
+ UndertriAI — Hugging Face Jobs bootstrap launcher.
23
+
24
+ `hf jobs uv run <script>` only uploads ONE file to the job container, but
25
+ `training/train_grpo.py` needs the rest of the repo (server/, models.py,
26
+ data/episodes/*.jsonl). This launcher:
27
+
28
+ 1. Declares every runtime dependency via PEP 723 so `uv run` installs
29
+ them in one resolver pass (avoids the iterative ``--with`` whack-a-mole
30
+ that hits unsloth_zoo → torchvision → sentencepiece → …).
31
+ 2. Downloads the repo as a GitHub tarball (no system `git` required).
32
+ 3. Invokes ``python training/train_grpo.py`` with the CLI args passed
33
+ through to this script.
34
+
35
+ Canonical HF Jobs command
36
+ ─────────────────────────
37
+ hf jobs uv run --flavor a10g-large --timeout 5h --secrets HF_TOKEN \
38
+ https://raw.githubusercontent.com/Faiz-1606/Undertrial/main/training/run_hf_job.py \
39
+ --curriculum \
40
+ --env_url https://draken1606-undertrial-ai.hf.space \
41
+ --steps 300 --batch_size 1 --grad_accum 8 \
42
+ --output ./output/undertrial_grpo
43
+
44
+ Everything after the script URL is forwarded verbatim to ``train_grpo.py``.
45
+
46
+ Local use is also supported: if this file is executed from inside a clone of
47
+ the repo, it skips the download and runs the sibling ``train_grpo.py``.
48
+
49
+ Overrides (environment variables)
50
+ ─────────────────────────────────
51
+ UNDERTRIAL_REPO_URL default: https://github.com/Faiz-1606/Undertrial
52
+ UNDERTRIAL_REPO_BRANCH default: main
53
+ UNDERTRIAL_WORK_DIR default: /work (where the repo will live)
54
+ UNDERTRIAL_ENV_URL default: https://draken1606-undertrial-ai.hf.space
55
+ (forwarded to train_grpo.py if --env_url absent)
56
+ """
57
+ from __future__ import annotations
58
+
59
+ import io
60
+ import os
61
+ import shutil
62
+ import subprocess
63
+ import sys
64
+ import tarfile
65
+ import tempfile
66
+ import urllib.request
67
+ from pathlib import Path
68
+ from typing import Optional
69
+
70
+
71
+ # ── Config ──────────────────────────────────────────────────────────────────
72
+ REPO_OWNER_REPO = "Faiz-1606/Undertrial"
73
+ REPO_URL = os.environ.get(
74
+ "UNDERTRIAL_REPO_URL",
75
+ f"https://github.com/{REPO_OWNER_REPO}",
76
+ )
77
+ REPO_BRANCH = os.environ.get("UNDERTRIAL_REPO_BRANCH", "main")
78
+ WORK_DIR = Path(os.environ.get("UNDERTRIAL_WORK_DIR", "/work"))
79
+ DEFAULT_ENV_URL = os.environ.get(
80
+ "UNDERTRIAL_ENV_URL",
81
+ "https://draken1606-undertrial-ai.hf.space",
82
+ )
83
+
84
+
85
+ # ── Helpers ─────────────────────────────────────────────────────────────────
86
+ def _log(msg: str) -> None:
87
+ print(f"[bootstrap] {msg}", flush=True)
88
+
89
+
90
+ def _running_inside_repo() -> Optional[Path]:
91
+ """
92
+ If this file sits inside a cloned Undertrial repo (e.g. run locally),
93
+ return the repo root so we skip the download step.
94
+ """
95
+ here = Path(__file__).resolve().parent.parent
96
+ train_script = here / "training" / "train_grpo.py"
97
+ episodes_dir = here / "data" / "episodes"
98
+ if train_script.exists() and episodes_dir.exists():
99
+ return here
100
+ return None
101
+
102
+
103
+ def _download_tarball(url: str) -> bytes:
104
+ _log(f"downloading {url}")
105
+ req = urllib.request.Request(
106
+ url,
107
+ headers={"User-Agent": "undertrial-run-hf-job/1.0"},
108
+ )
109
+ with urllib.request.urlopen(req, timeout=120) as resp:
110
+ data = resp.read()
111
+ _log(f"downloaded {len(data) / 1_000_000:.1f} MB")
112
+ return data
113
+
114
+
115
+ def _extract_tarball_to(tarball_bytes: bytes, dest: Path) -> Path:
116
+ """
117
+ Extract a GitHub tar.gz (top-level dir like ``Undertrial-main/``) into
118
+ ``dest``, flattening the top-level wrapper so the repo contents land
119
+ directly inside ``dest``. Returns ``dest``.
120
+ """
121
+ if dest.exists():
122
+ _log(f"removing stale {dest}")
123
+ shutil.rmtree(dest)
124
+ dest.mkdir(parents=True)
125
+
126
+ with tempfile.TemporaryDirectory() as staging_str:
127
+ staging = Path(staging_str)
128
+ with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:
129
+ tar.extractall(path=staging)
130
+ top_level = next(staging.iterdir())
131
+ if not top_level.is_dir():
132
+ raise RuntimeError(
133
+ f"Unexpected tarball layout: {top_level} is not a directory"
134
+ )
135
+ for entry in top_level.iterdir():
136
+ shutil.move(str(entry), str(dest / entry.name))
137
+
138
+ return dest
139
+
140
+
141
+ def _ensure_repo() -> Path:
142
+ """
143
+ Materialise the Undertrial repo on disk and return its root path.
144
+ """
145
+ local_root = _running_inside_repo()
146
+ if local_root is not None:
147
+ _log(f"reusing local checkout at {local_root}")
148
+ return local_root
149
+
150
+ tarball_url = (
151
+ f"https://codeload.github.com/{REPO_OWNER_REPO}/tar.gz/refs/heads/{REPO_BRANCH}"
152
+ )
153
+ tarball_bytes = _download_tarball(tarball_url)
154
+ _extract_tarball_to(tarball_bytes, WORK_DIR)
155
+ _log(f"repo ready at {WORK_DIR}")
156
+ return WORK_DIR
157
+
158
+
159
+ def _forward_args(extra: list[str], work_root: Path) -> list[str]:
160
+ """
161
+ Inject sensible defaults into the CLI args if the user omitted them.
162
+ Does not override any flag the user supplied.
163
+ """
164
+ args = list(extra)
165
+ if "--episodes_dir" not in args:
166
+ args += ["--episodes_dir", str(work_root / "data" / "episodes")]
167
+ if "--env_url" not in args and "--offline" not in args:
168
+ args += ["--env_url", DEFAULT_ENV_URL]
169
+ return args
170
+
171
+
172
+ # ── Entry point ─────────────────────────────────────────────────────────────
173
+ def main() -> int:
174
+ work_root = _ensure_repo()
175
+ train_script = work_root / "training" / "train_grpo.py"
176
+ if not train_script.exists():
177
+ _log(f"ERROR: {train_script} not found after fetching the repo")
178
+ return 1
179
+
180
+ forwarded = _forward_args(sys.argv[1:], work_root)
181
+ cmd = [sys.executable, str(train_script), *forwarded]
182
+
183
+ _log(f"cwd = {work_root}")
184
+ _log(f"python = {sys.executable}")
185
+ _log(f"train script = {train_script}")
186
+ _log(f"args = {forwarded}")
187
+ _log("launching train_grpo.py …")
188
+
189
+ proc = subprocess.run(cmd, cwd=work_root)
190
+ _log(f"train_grpo.py exited with code {proc.returncode}")
191
+ return proc.returncode
192
+
193
+
194
+ if __name__ == "__main__":
195
+ sys.exit(main())