feat: added images, new sft notebook, jobs to do grpo
Browse files- .cursorrules +1 -1
- README.md +4 -0
- dashboard/api.py +10 -0
- dashboard/train_results.html +20 -0
- images/sft_loss_curve.png +3 -0
- main.py +6 -0
- scripts/hf_grpo_entry.sh +93 -0
- training/GRPO_HF_RUNBOOK.md +174 -0
- training/notebooks/build_sft_qwen_colab.py +66 -10
- training/notebooks/sft_qwen_colab.ipynb +0 -0
.cursorrules
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
## Training Results Page
|
| 4 |
- Route: `GET /train` → `dashboard/train_results.html` (served from `main.py`)
|
| 5 |
- API: `GET /api/training-status` → reads `results/eval_results.json` when present; exposes plot file flags and `HF_MODEL_REPO` presence
|
| 6 |
-
- Static: `results/`
|
| 7 |
- `results/` is gitignored for data; plot PNGs/HTML can be committed intentionally if the team chooses
|
| 8 |
- The `/train` page uses placeholders when files are missing
|
| 9 |
- Training runs in Colab / local GPU — **not** inside the Space process
|
|
|
|
| 3 |
## Training Results Page
|
| 4 |
- Route: `GET /train` → `dashboard/train_results.html` (served from `main.py`)
|
| 5 |
- API: `GET /api/training-status` → reads `results/eval_results.json` when present; exposes plot file flags and `HF_MODEL_REPO` presence
|
| 6 |
+
- Static: `results/` at `/results/`, `images/` at `/images/` (e.g. committed `images/sft_loss_curve.png`) via `StaticFiles` in `main.py`
|
| 7 |
- `results/` is gitignored for data; plot PNGs/HTML can be committed intentionally if the team chooses
|
| 8 |
- The `/train` page uses placeholders when files are missing
|
| 9 |
- Training runs in Colab / local GPU — **not** inside the Space process
|
README.md
CHANGED
|
@@ -165,6 +165,8 @@ GRPO Fine-tuning — 100 steps, G=4, static JSONL prompts
|
|
| 165 |
sh4shv4t/parlay-grpo-1-5b
|
| 166 |
```
|
| 167 |
|
|
|
|
|
|
|
| 168 |
```text
|
| 169 |
Gemini self-play (generate_data.py)
|
| 170 |
→ 80 quality-filtered episodes across 9 persona×scenario combos
|
|
@@ -184,6 +186,8 @@ The ω warmup is a practical detail worth flagging: at step 0, the base model oc
|
|
| 184 |
|
| 185 |
### Results
|
| 186 |
|
|
|
|
|
|
|
| 187 |

|
| 188 |
|
| 189 |

|
|
|
|
| 165 |
sh4shv4t/parlay-grpo-1-5b
|
| 166 |
```
|
| 167 |
|
| 168 |
+
**Run GRPO on Hugging Face Jobs** (pre-paid credits, data + SFT on the Hub; `scripts/hf_grpo_entry.sh`; template uses **`--timeout 6h`** and **`a100-large`**): see [`training/GRPO_HF_RUNBOOK.md`](training/GRPO_HF_RUNBOOK.md).
|
| 169 |
+
|
| 170 |
```text
|
| 171 |
Gemini self-play (generate_data.py)
|
| 172 |
→ 80 quality-filtered episodes across 9 persona×scenario combos
|
|
|
|
| 186 |
|
| 187 |
### Results
|
| 188 |
|
| 189 |
+

|
| 190 |
+
|
| 191 |

|
| 192 |
|
| 193 |

|
dashboard/api.py
CHANGED
|
@@ -48,6 +48,7 @@ _sessions: dict[str, dict[str, Any]] = {}
|
|
| 48 |
# Opponent backend for /api/game/move: "gemini" (default) or "trained" (HF_MODEL_REPO + Qwen)
|
| 49 |
OPPONENT_MODE: str = "gemini"
|
| 50 |
_RESULTS_DIR = Path("results")
|
|
|
|
| 51 |
_CP_COSTS: dict[TacticalMove, int] = {
|
| 52 |
TacticalMove.ANCHOR_HIGH: 0,
|
| 53 |
TacticalMove.BATNA_REVEAL: 20,
|
|
@@ -345,6 +346,13 @@ def _training_status_payload() -> dict[str, Any]:
|
|
| 345 |
except Exception: # noqa: BLE001
|
| 346 |
has_results = False
|
| 347 |
repo = (os.environ.get("HF_MODEL_REPO") or "").strip() or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
return {
|
| 349 |
"has_results": has_results,
|
| 350 |
"grpo_mean_reward": grpo,
|
|
@@ -352,10 +360,12 @@ def _training_status_payload() -> dict[str, Any]:
|
|
| 352 |
"random_mean_reward": rnd,
|
| 353 |
"model_on_hub": bool(repo),
|
| 354 |
"model_repo": repo,
|
|
|
|
| 355 |
"plots_available": {
|
| 356 |
"reward_curve": (_RESULTS_DIR / "grpo_reward_curve.png").is_file(),
|
| 357 |
"comparison": (_RESULTS_DIR / "training_curves.png").is_file(),
|
| 358 |
"transcript": (_RESULTS_DIR / "before_after_transcript.html").is_file(),
|
|
|
|
| 359 |
},
|
| 360 |
}
|
| 361 |
|
|
|
|
| 48 |
# Opponent backend for /api/game/move: "gemini" (default) or "trained" (HF_MODEL_REPO + Qwen)
|
| 49 |
OPPONENT_MODE: str = "gemini"
|
| 50 |
_RESULTS_DIR = Path("results")
|
| 51 |
+
_IMAGES_DIR = Path("images")
|
| 52 |
_CP_COSTS: dict[TacticalMove, int] = {
|
| 53 |
TacticalMove.ANCHOR_HIGH: 0,
|
| 54 |
TacticalMove.BATNA_REVEAL: 20,
|
|
|
|
| 346 |
except Exception: # noqa: BLE001
|
| 347 |
has_results = False
|
| 348 |
repo = (os.environ.get("HF_MODEL_REPO") or "").strip() or None
|
| 349 |
+
|
| 350 |
+
sft_loss_path: str | None = None
|
| 351 |
+
if (_IMAGES_DIR / "sft_loss_curve.png").is_file():
|
| 352 |
+
sft_loss_path = "/images/sft_loss_curve.png"
|
| 353 |
+
elif (_RESULTS_DIR / "sft_loss_curve.png").is_file():
|
| 354 |
+
sft_loss_path = "/results/sft_loss_curve.png"
|
| 355 |
+
|
| 356 |
return {
|
| 357 |
"has_results": has_results,
|
| 358 |
"grpo_mean_reward": grpo,
|
|
|
|
| 360 |
"random_mean_reward": rnd,
|
| 361 |
"model_on_hub": bool(repo),
|
| 362 |
"model_repo": repo,
|
| 363 |
+
"sft_loss_url": sft_loss_path,
|
| 364 |
"plots_available": {
|
| 365 |
"reward_curve": (_RESULTS_DIR / "grpo_reward_curve.png").is_file(),
|
| 366 |
"comparison": (_RESULTS_DIR / "training_curves.png").is_file(),
|
| 367 |
"transcript": (_RESULTS_DIR / "before_after_transcript.html").is_file(),
|
| 368 |
+
"sft_loss": sft_loss_path is not None,
|
| 369 |
},
|
| 370 |
}
|
| 371 |
|
dashboard/train_results.html
CHANGED
|
@@ -136,6 +136,12 @@
|
|
| 136 |
</div>
|
| 137 |
</section>
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
<section>
|
| 140 |
<h2>GRPO Training Curve</h2>
|
| 141 |
<div id="fig-reward"></div>
|
|
@@ -198,6 +204,19 @@
|
|
| 198 |
d.style.color = x >= 0 ? "var(--emerald)" : "var(--scarlet)";
|
| 199 |
}
|
| 200 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
function rewardSection(pa) {
|
| 202 |
const el = document.getElementById("fig-reward");
|
| 203 |
const cap = document.getElementById("cap-reward");
|
|
@@ -272,6 +291,7 @@
|
|
| 272 |
if (data.has_results) {
|
| 273 |
document.querySelector(".subtitle")?.setAttribute("data-has-eval", "1");
|
| 274 |
}
|
|
|
|
| 275 |
rewardSection(data.plots_available);
|
| 276 |
compareSection(data.plots_available);
|
| 277 |
await transcriptSection(data.plots_available);
|
|
|
|
| 136 |
</div>
|
| 137 |
</section>
|
| 138 |
|
| 139 |
+
<section>
|
| 140 |
+
<h2>SFT cold start — training loss</h2>
|
| 141 |
+
<div id="fig-sft"></div>
|
| 142 |
+
<p class="caption" id="cap-sft" hidden>Supervised fine-tuning loss (Qwen2.5-1.5B + LoRA on Parlay episodes from Colab).</p>
|
| 143 |
+
</section>
|
| 144 |
+
|
| 145 |
<section>
|
| 146 |
<h2>GRPO Training Curve</h2>
|
| 147 |
<div id="fig-reward"></div>
|
|
|
|
| 204 |
d.style.color = x >= 0 ? "var(--emerald)" : "var(--scarlet)";
|
| 205 |
}
|
| 206 |
}
|
| 207 |
+
function sftSection(url) {
|
| 208 |
+
const el = document.getElementById("fig-sft");
|
| 209 |
+
const cap = document.getElementById("cap-sft");
|
| 210 |
+
if (url) {
|
| 211 |
+
el.innerHTML = '<img src="' + url + '" alt="SFT training loss" style="width:100%;border:1px solid var(--gold);border-radius:2px" />';
|
| 212 |
+
cap.hidden = false;
|
| 213 |
+
} else {
|
| 214 |
+
el.innerHTML = '<div class="fig-placeholder">SFT loss curve will appear after running ' +
|
| 215 |
+
'<a href="https://colab.research.google.com/github/sh4shv4t/Parlay/blob/main/training/notebooks/sft_qwen_colab.ipynb" target="_blank" rel="noopener">sft_qwen_colab.ipynb</a> ' +
|
| 216 |
+
'or by adding <code>images/sft_loss_curve.png</code> to the repo.</div>';
|
| 217 |
+
cap.hidden = true;
|
| 218 |
+
}
|
| 219 |
+
}
|
| 220 |
function rewardSection(pa) {
|
| 221 |
const el = document.getElementById("fig-reward");
|
| 222 |
const cap = document.getElementById("cap-reward");
|
|
|
|
| 291 |
if (data.has_results) {
|
| 292 |
document.querySelector(".subtitle")?.setAttribute("data-has-eval", "1");
|
| 293 |
}
|
| 294 |
+
sftSection(data.sft_loss_url);
|
| 295 |
rewardSection(data.plots_available);
|
| 296 |
compareSection(data.plots_available);
|
| 297 |
await transcriptSection(data.plots_available);
|
images/sft_loss_curve.png
ADDED
|
Git LFS Details
|
main.py
CHANGED
|
@@ -78,6 +78,12 @@ try:
|
|
| 78 |
except OSError as exc:
|
| 79 |
logger.warning("Could not mount /results: %s", exc)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
@app.get("/", include_in_schema=False)
|
| 83 |
async def serve_index() -> FileResponse:
|
|
|
|
| 78 |
except OSError as exc:
|
| 79 |
logger.warning("Could not mount /results: %s", exc)
|
| 80 |
|
| 81 |
+
os.makedirs("images", exist_ok=True)
|
| 82 |
+
try:
|
| 83 |
+
app.mount("/images", StaticFiles(directory="images"), name="images")
|
| 84 |
+
except OSError as exc:
|
| 85 |
+
logger.warning("Could not mount /images: %s", exc)
|
| 86 |
+
|
| 87 |
|
| 88 |
@app.get("/", include_in_schema=False)
|
| 89 |
async def serve_index() -> FileResponse:
|
scripts/hf_grpo_entry.sh
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Run from the Parlay repository root (the folder that contains training/ and parlay_env/).
|
| 3 |
+
# Intended for Linux GPU jobs (Hugging Face Jobs, RunPod, etc.).
|
| 4 |
+
#
|
| 5 |
+
# Usage (after: git clone ... && cd Parlay && pip install -r requirements-train.txt):
|
| 6 |
+
# export HF_TOKEN=... # read private assets + push (required if PUSH_TO_HF=1)
|
| 7 |
+
# export GRPO_STEPS=120 GRPO_G=4
|
| 8 |
+
# bash scripts/hf_grpo_entry.sh
|
| 9 |
+
#
|
| 10 |
+
# See training/GRPO_HF_RUNBOOK.md for a full walkthrough.
|
| 11 |
+
set -euo pipefail
|
| 12 |
+
export PYTHONUNBUFFERED=1
|
| 13 |
+
|
| 14 |
+
: "${DATASET_ID:=sh4shv4t/parlay-episodes}"
|
| 15 |
+
: "${EPISODE_FILE:=episodes_v2.jsonl}"
|
| 16 |
+
: "${SFT_MODEL:=sh4shv4t/parlay-sft-1-5b}"
|
| 17 |
+
: "${GRPO_STEPS:=120}"
|
| 18 |
+
: "${GRPO_G:=4}"
|
| 19 |
+
: "${MIN_REWARD:=-50.0}"
|
| 20 |
+
: "${OUTPUT_DIR:=outputs/grpo_run}"
|
| 21 |
+
# Set to 0 to skip push (e.g. smoke test)
|
| 22 |
+
: "${PUSH_TO_HF:=1}"
|
| 23 |
+
# Model repo to upload the GRPO output folder to
|
| 24 |
+
: "${HF_GRPO_REPO:=sh4shv4t/parlay-grpo-1-5b}"
|
| 25 |
+
|
| 26 |
+
if [[ ! -f "training/grpo_train.py" ]]; then
|
| 27 |
+
echo "Run this script from the Parlay repo root (training/grpo_train.py not found). pwd=$(pwd)" >&2
|
| 28 |
+
exit 1
|
| 29 |
+
fi
|
| 30 |
+
|
| 31 |
+
echo "==> Downloading ${EPISODE_FILE} from dataset ${DATASET_ID} ..."
|
| 32 |
+
export DATASET_ID EPISODE_FILE
|
| 33 |
+
JSONL_PATH=$(
|
| 34 |
+
python -c "import os
|
| 35 |
+
from huggingface_hub import hf_hub_download
|
| 36 |
+
print(hf_hub_download(
|
| 37 |
+
repo_id=os.environ['DATASET_ID'],
|
| 38 |
+
filename=os.environ['EPISODE_FILE'],
|
| 39 |
+
repo_type='dataset',
|
| 40 |
+
))"
|
| 41 |
+
)
|
| 42 |
+
echo " JSONL: ${JSONL_PATH}"
|
| 43 |
+
|
| 44 |
+
mkdir -p "$(dirname "$OUTPUT_DIR")"
|
| 45 |
+
OUT_ABS="$(cd "$(dirname "$OUTPUT_DIR")" && pwd)/$(basename "$OUTPUT_DIR")"
|
| 46 |
+
|
| 47 |
+
echo "==> GRPO: SFT=${SFT_MODEL} steps=${GRPO_STEPS} G=${GRPO_G} out=${OUT_ABS}"
|
| 48 |
+
python -m training.grpo_train \
|
| 49 |
+
--model "${SFT_MODEL}" \
|
| 50 |
+
--data "${JSONL_PATH}" \
|
| 51 |
+
--output "${OUT_ABS}" \
|
| 52 |
+
--steps "${GRPO_STEPS}" \
|
| 53 |
+
--g "${GRPO_G}" \
|
| 54 |
+
--min-reward "${MIN_REWARD}"
|
| 55 |
+
|
| 56 |
+
# Bundle Matplotlib curves + TRL log JSON into the model folder so one Hub upload includes visualizations.
|
| 57 |
+
echo "==> Collecting training plots under ${OUT_ABS}/training_plots/ ..."
|
| 58 |
+
TP="${OUT_ABS}/training_plots"
|
| 59 |
+
mkdir -p "${TP}"
|
| 60 |
+
for f in results/grpo_reward_curve.png results/grpo_loss_curve.png; do
|
| 61 |
+
if [[ -f "$f" ]]; then
|
| 62 |
+
cp -f "$f" "${TP}/"
|
| 63 |
+
echo " + ${f}"
|
| 64 |
+
fi
|
| 65 |
+
done
|
| 66 |
+
if [[ -d "${OUT_ABS}/plots" ]]; then
|
| 67 |
+
shopt -s nullglob
|
| 68 |
+
for f in "${OUT_ABS}/plots/"*.png "${OUT_ABS}/plots/"*.json; do
|
| 69 |
+
[[ -e "$f" ]] || continue
|
| 70 |
+
cp -f "$f" "${TP}/"
|
| 71 |
+
echo " + ${f}"
|
| 72 |
+
done
|
| 73 |
+
shopt -u nullglob
|
| 74 |
+
fi
|
| 75 |
+
if [[ ! -f "${TP}/grpo_reward_curve.png" && ! -f "${TP}/grpo_reward.png" ]]; then
|
| 76 |
+
echo " (warning: no reward plot in training_plots — check logs for empty log_history or plot errors)"
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
if [[ "${PUSH_TO_HF}" == "1" || "${PUSH_TO_HF}" == "true" ]]; then
|
| 80 |
+
if [[ -z "${HF_TOKEN:-}" && -z "${HUGGINGFACE_HUB_TOKEN:-}" ]]; then
|
| 81 |
+
echo "PUSH_TO_HF is set but neither HF_TOKEN nor HUGGINGFACE_HUB_TOKEN is set." >&2
|
| 82 |
+
exit 1
|
| 83 |
+
fi
|
| 84 |
+
# push_to_hub.py reads HF_TOKEN; Jobs often set HUGGINGFACE_HUB_TOKEN
|
| 85 |
+
export HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}"
|
| 86 |
+
echo "==> Pushing to https://huggingface.co/${HF_GRPO_REPO} ..."
|
| 87 |
+
export HF_REPO_ID="${HF_GRPO_REPO}"
|
| 88 |
+
python -m training.push_to_hub --model "${OUT_ABS}" --repo "${HF_GRPO_REPO}"
|
| 89 |
+
else
|
| 90 |
+
echo "==> PUSH_TO_HF disabled; model saved at ${OUT_ABS}"
|
| 91 |
+
fi
|
| 92 |
+
|
| 93 |
+
echo "==> Done."
|
training/GRPO_HF_RUNBOOK.md
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Run Parlay GRPO on Hugging Face (Jobs + your credits)
|
| 2 |
+
|
| 3 |
+
## Checklist — what you actually do
|
| 4 |
+
|
| 5 |
+
1. **On your PC:** install the HF CLI → `hf auth login` (token with read + write).
|
| 6 |
+
2. **Money:** confirm [pre-paid credits / billing](https://huggingface.co/settings/billing) ($30 is enough for a modest A100 run if you cap time and steps).
|
| 7 |
+
3. **Repo:** commit and push this repo (including `scripts/hf_grpo_entry.sh`) to GitHub — or set `GITHUB_CLONE` in the job to a URL/branch that has the script.
|
| 8 |
+
4. **Start the job** from your terminal using the **`hf jobs run ... huggingface/trl ...`** command in [§3.1](#31-command-template). This runbook uses **`--timeout 6h`** on **`a100-large`** (see [§3.2](#32-is-6-hours-on-a100-enough)).
|
| 9 |
+
5. **Watch** [huggingface.co/jobs](https://huggingface.co/jobs) until it finishes or errors.
|
| 10 |
+
6. **Artifacts:** the trained adapter lands in `HF_GRPO_REPO` (default `sh4shv4t/parlay-grpo-1-5b`). **Plots** are generated during training and copied into **`training_plots/`** inside that upload (see [Visualizations](#visualizations--curves)).
|
| 11 |
+
7. **Optional — local app / README:** download `grpo_reward_curve.png` / `grpo_loss_curve.png` from the model repo’s `training_plots/` (or from the job’s `results/` folder) and put them in your clone under `results/` so the Space **Training Results** page can show them ([`dashboard/api.py`](../dashboard/api.py) looks for `results/grpo_reward_curve.png`).
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
This walkthrough uses assets that are **already on the Hub**:
|
| 16 |
+
|
| 17 |
+
| What | Hub ID |
|
| 18 |
+
|------|--------|
|
| 19 |
+
| Episodes JSONL (dataset) | `sh4shv4t/parlay-episodes` (file: `episodes_v2.jsonl`) |
|
| 20 |
+
| SFT LoRA (starting point) | `sh4shv4t/parlay-sft-1-5b` |
|
| 21 |
+
| Optional: upload GRPO output here | `sh4shv4t/parlay-grpo-1-5b` (default in `scripts/hf_grpo_entry.sh`) |
|
| 22 |
+
|
| 23 |
+
`training/grpo_train.py` loads the SFT **adapter** from the Hub (it looks for `adapter_config.json` and fetches the **base** model name from that file, usually `Qwen/Qwen2.5-1.5B-Instruct`).
|
| 24 |
+
|
| 25 |
+
**Requirements:** a Hugging Face account with [pre-paid credits](https://huggingface.co/pricing) and a [fine-grained or classic token](https://huggingface.co/settings/tokens) with **read** (dataset + models) and **write** (if you push the trained adapter to your model repo).
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
## 0. What you are paying for
|
| 30 |
+
|
| 31 |
+
- [Jobs pricing](https://huggingface.co/docs/hub/jobs-pricing) is **per minute** while the job is **starting** or **running** (rough guide: 1× A100 large ≈ $2.50/hr; **$30** is on the order of **~12 h** on that flavor if you used it nonstop—always check the current table).
|
| 32 |
+
- The default timeout for Jobs is **short**; you must set **`--timeout`**. This guide standardizes on **`6h`** for **1× A100** with `GRPO_STEPS=120` and `G=4` (see §3.2). Cost is still **~per minute of runtime** (see current [pricing](https://huggingface.co/docs/hub/jobs-pricing)).
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
+
## 1. One-time setup on your laptop
|
| 37 |
+
|
| 38 |
+
1. **Install the HF CLI** (see [Quickstart](https://huggingface.co/docs/hub/jobs-quickstart)):
|
| 39 |
+
- e.g. `curl -LsSf https://hf.co/cli/install.sh | bash` (macOS/Linux) or the Windows installer from the same doc page.
|
| 40 |
+
2. **Log in**:
|
| 41 |
+
- `hf auth login`
|
| 42 |
+
- Paste a token with **read** and **write** to the Hub.
|
| 43 |
+
3. **Confirm credits** at [huggingface.co/settings/billing](https://huggingface.co/settings/billing).
|
| 44 |
+
4. **Open the Jobs UI** to watch runs: [huggingface.co/jobs](https://huggingface.co/jobs) (or your org’s jobs page).
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## 2. What the repo provides
|
| 49 |
+
|
| 50 |
+
From the **repository root** after `git clone`:
|
| 51 |
+
|
| 52 |
+
- **`scripts/hf_grpo_entry.sh`**
|
| 53 |
+
- Downloads `episodes_v2.jsonl` from `sh4shv4t/parlay-episodes`
|
| 54 |
+
- Runs `python -m training.grpo_train` with the Hub SFT adapter
|
| 55 |
+
- Optionally runs `python -m training.push_to_hub` to upload the output folder
|
| 56 |
+
|
| 57 |
+
You only need a **GPU Linux** environment with **git**, **Python**, and **pip**; the rest is installed from `requirements-train.txt`.
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## 3. Recommended: Hugging Face Job with the TRL image
|
| 62 |
+
|
| 63 |
+
Hugging Face documents a ready image for TRL workflows: `huggingface/trl` (see [Popular images](https://huggingface.co/docs/hub/jobs-popular-images)).
|
| 64 |
+
|
| 65 |
+
### 3.1 Command template
|
| 66 |
+
|
| 67 |
+
Run this from **your** machine; it starts the job in the cloud and streams logs. Adjust `GITHUB_CLONE` if you use a fork (your MetaHackathon mirror should push to GitHub or you must change the URL).
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# `--secrets HF_TOKEN` passes your Hub token (from `hf auth login`) into the job for downloads + push.
|
| 71 |
+
# Image is the first *positional* argument after flags (same pattern as: hf jobs run ubuntu echo hi).
|
| 72 |
+
# Point GITHUB_CLONE at your fork if needed.
|
| 73 |
+
hf jobs run \
|
| 74 |
+
--flavor a100-large \
|
| 75 |
+
--timeout 5h \
|
| 76 |
+
--secrets HF_TOKEN \
|
| 77 |
+
--env GITHUB_CLONE=https://github.com/sh4shv4t/Parlay.git \
|
| 78 |
+
--env GRPO_STEPS=120 \
|
| 79 |
+
--env GRPO_G=4 \
|
| 80 |
+
--env PUSH_TO_HF=1 \
|
| 81 |
+
--env HF_GRPO_REPO=sh4shv4t/parlay-grpo-1-5b \
|
| 82 |
+
huggingface/trl \
|
| 83 |
+
bash -lc 'set -e; apt-get update -qq && apt-get install -y -qq git; git clone --depth 1 "$GITHUB_CLONE" /work; cd /work; pip install -q -r requirements-train.txt; bash scripts/hf_grpo_entry.sh'
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
**Note:** if your CLI version differs, run `hf jobs run --help`. If the image already has `git`, drop the `apt-get` / `apt-get install` lines. The TRL image is listed under [Popular images](https://huggingface.co/docs/hub/jobs-popular-images). For a UV-based job instead, see `hf jobs uv run --image huggingface/trl ...` in the same page (usually a small `train.py`); Parlay’s GRPO needs the **full** repo, so the `git clone` pattern above is the straightforward fit.
|
| 87 |
+
|
| 88 |
+
**Windows:** run this block from **Git Bash** or **WSL** (so `bash -lc '... "$GITHUB_CLONE" ...'` is parsed like Linux). In plain PowerShell, quote escaping is different—open this file in an editor and paste the block there.
|
| 89 |
+
|
| 90 |
+
**Environment variables** you can add with `--env`:
|
| 91 |
+
|
| 92 |
+
| Variable | Default (in `hf_grpo_entry.sh`) | Purpose |
|
| 93 |
+
|----------|---------------------------------|---------|
|
| 94 |
+
| `DATASET_ID` | `sh4shv4t/parlay-episodes` | JSONL source |
|
| 95 |
+
| `EPISODE_FILE` | `episodes_v2.jsonl` | File inside the dataset repo |
|
| 96 |
+
| `SFT_MODEL` | `sh4shv4t/parlay-sft-1-5b` | Hub LoRA to continue from |
|
| 97 |
+
| `GRPO_STEPS` | `120` | Training steps |
|
| 98 |
+
| `GRPO_G` | `4` | Group size (lower if OOM) |
|
| 99 |
+
| `MIN_REWARD` | `-50.0` | Skips very bad train rows |
|
| 100 |
+
| `OUTPUT_DIR` | `outputs/grpo_run` | Local output in the job |
|
| 101 |
+
| `PUSH_TO_HF` | `1` | Set to `0` to skip upload |
|
| 102 |
+
| `HF_GRPO_REPO` | `sh4shv4t/parlay-grpo-1-5b` | Push target |
|
| 103 |
+
|
| 104 |
+
`HF_TOKEN` is provided by **`--secrets HF_TOKEN`** (or the short form your CLI supports) so `huggingface-cli` / `huggingface_hub` can push; it must be allowed to write to `HF_GRPO_REPO`.
|
| 105 |
+
|
| 106 |
+
### 3.2 Is 6 hours on A100 enough?
|
| 107 |
+
|
| 108 |
+
**Usually yes** for the template defaults: **Qwen2.5-1.5B** + SFT LoRA, **`GRPO_STEPS=120`**, **`GRPO_G=4`**. In practice, training often finishes in **on the order of 1–3 hours** of wall time; **6h** is a **safety cap** for Hub downloads, first-time `pip install`, and plotting—so the job is not cut off at the default short timeout. If you **increase** steps into the many hundreds, either raise **`--timeout` to `8h`** or **lower** `GRPO_STEPS` / `G` to stay inside 6h.
|
| 109 |
+
|
| 110 |
+
### 3.3 If the job OOMs
|
| 111 |
+
|
| 112 |
+
Lower **`GRPO_G`** first (e.g. `2` or `1`), then **`GRPO_STEPS`**. A100 80GB usually runs `G=4` for this 1.5B setup; T4-style VRAM may need `G=2`.
|
| 113 |
+
|
| 114 |
+
### 3.4 If the job times out
|
| 115 |
+
|
| 116 |
+
Increase **`--timeout`**, or run two shorter jobs: first run saves under `OUTPUT_DIR` / Hub; a second could continue from a saved adapter (only if you wire resume in your workflow—the stock `grpo_train` is single-stage; the practical fix is a longer timeout or fewer steps per job and manual continuation by pointing `--model` at the last save).
|
| 117 |
+
|
| 118 |
+
### 3.5 Cheaper hardware
|
| 119 |
+
|
| 120 |
+
Use `--flavor t4-small` (or `l4x1` per [hardware list](https://huggingface.co/docs/hub/jobs-pricing)) and reduce steps + `G` so the job **finishes** within the timeout; you trade wall-clock and quality for cost.
|
| 121 |
+
|
| 122 |
+
### 3.6 After the run
|
| 123 |
+
|
| 124 |
+
- **Model:** [huggingface.co/sh4shv4t/parlay-grpo-1-5b](https://huggingface.co/sh4shv4t/parlay-grpo-1-5b) (if you pushed to that repo).
|
| 125 |
+
- **Curves:** also under that repo in **`training_plots/`** (bundled by `scripts/hf_grpo_entry.sh` before upload).
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## Visualizations & curves
|
| 130 |
+
|
| 131 |
+
GRPO already **builds charts in code** (`training/grpo_train.py` → `_save_training_plots`):
|
| 132 |
+
|
| 133 |
+
| Output | Where it is written during training | What it shows |
|
| 134 |
+
|--------|-------------------------------------|----------------|
|
| 135 |
+
| Reward curve | `results/grpo_reward_curve.png` and `<output>/plots/grpo_reward.png` | Mean reward vs step (blue) |
|
| 136 |
+
| Loss curve | `results/grpo_loss_curve.png` and `<output>/plots/grpo_loss.png` | Training loss vs step |
|
| 137 |
+
| Raw TRL logs | `<output>/plots/grpo_log.json` | Full `log_history` for your own plotting |
|
| 138 |
+
|
| 139 |
+
**`scripts/hf_grpo_entry.sh`** copies those into **`<OUTPUT_DIR>/training_plots/`** before `push_to_hub`, so after a successful job you get **PNGs + JSON on the model card** next to the adapter (no extra step).
|
| 140 |
+
|
| 141 |
+
**Training Results page (`/train`):** the API exposes `plots_available.reward_curve` when **`results/grpo_reward_curve.png`** exists in the deployed app’s repo. After you download the PNGs from the Hub (or copy from a job artifact), add them under `results/` in the git repo you deploy to Spaces and redeploy.
|
| 142 |
+
|
| 143 |
+
**Optional extras (not wired in repo by default):**
|
| 144 |
+
|
| 145 |
+
- **W&B / TensorBoard:** set `report_to` in `GRPOConfig` inside `training/grpo_train.py` if you want live dashboards (adds setup and secrets).
|
| 146 |
+
- **Eval bar chart:** after GRPO, run `python -m training.evaluate ...` locally or in a small job to regenerate `results/eval_results.json` and comparison figures (see main README).
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## 4. Alternative: `hf jobs uv run` (TRL quickstart style)
|
| 151 |
+
|
| 152 |
+
The [Jobs quickstart](https://huggingface.co/docs/hub/jobs-quickstart) uses `hf jobs uv run` with `--with trl` and a small `train.py`. Parlay’s GRPO needs the **full repo** (for `parlay_env` and reward functions), so the **`git clone` + `huggingface/trl` image** pattern in §3 is usually simpler than inlining the whole project into one file.
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## 5. Alternative: Colab (no Jobs)
|
| 157 |
+
|
| 158 |
+
Use `notebooks/parlay_grpo_colab.ipynb`. In the **config** cell, set:
|
| 159 |
+
|
| 160 |
+
```python
|
| 161 |
+
JSONL_VIA_HF = ("sh4shv4t/parlay-episodes", "episodes_v2.jsonl")
|
| 162 |
+
SFT_MODEL_HF = "sh4shv4t/parlay-sft-1-5b"
|
| 163 |
+
# Optional: use Colab Pro+ or a longer runtime; HF Jobs avoids Colab’s usage caps.
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
## 6. Sanity checks before spending credits
|
| 169 |
+
|
| 170 |
+
- **Non-empty train split:** `grpo_train` only uses JSONL lines with `"split": "train"`. If the script prints `0 remaining for GRPO`, fix the JSONL or filters (`MIN_REWARD`).
|
| 171 |
+
- **Token:** `python -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('sh4shv4t/parlay-episodes','episodes_v2.jsonl', repo_type='dataset'))"` on your machine should print a local path.
|
| 172 |
+
- **Config pre-flight:** `python scripts/check_training_config.py` (read-only) reviews env defaults for SFT/GRPO.
|
| 173 |
+
|
| 174 |
+
If anything in this file drifts (repo names, file names), align with `README.md` and `scripts/push_dataset.py` (`episodes_v2.jsonl`).
|
training/notebooks/build_sft_qwen_colab.py
CHANGED
|
@@ -38,7 +38,11 @@ episodes using **Supervised Fine-Tuning (SFT)** with LoRA via
|
|
| 38 |
3. Formats to ChatML, trains with LoRA (r=16)
|
| 39 |
4. Plots, evaluates before/after, pushes adapter to `sh4shv4t/parlay-negotiator`
|
| 40 |
|
| 41 |
-
**Runtime:** ~25 min on T4 · **Cost:** $0 (free tier).
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
add_md("## Step 1 — Install dependencies")
|
| 44 |
|
|
@@ -52,18 +56,39 @@ subprocess.run([
|
|
| 52 |
subprocess.run([
|
| 53 |
sys.executable, "-m", "pip", "install",
|
| 54 |
"trl>=0.8.6", "peft>=0.10.0", "accelerate>=0.28.0",
|
| 55 |
-
"datasets>=2.18.0", "huggingface-hub>=0.22.0",
|
| 56 |
"bitsandbytes>=0.43.0", "xformers", "--quiet",
|
| 57 |
], check=True)
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
add_code("""import os, json
|
| 63 |
import torch
|
| 64 |
from google.colab import userdata
|
| 65 |
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
print(torch.cuda.get_device_name(0), torch.cuda.get_device_properties(0).total_memory / 1e9, "GB VRAM")
|
| 68 |
print("torch", torch.__version__, "cuda", torch.version.cuda)
|
| 69 |
|
|
@@ -80,10 +105,37 @@ MIN_REWARD_KEEP = 0.25""")
|
|
| 80 |
|
| 81 |
add_md("## Step 3 — Load dataset")
|
| 82 |
|
| 83 |
-
add_code("""
|
| 84 |
import pandas as pd
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
df = raw["train"].to_pandas()
|
| 88 |
print(len(df), "rows", list(df.columns))
|
| 89 |
if "reward" in df.columns:
|
|
@@ -137,7 +189,10 @@ eval_data = Dataset.from_list(formatted[split:])""")
|
|
| 137 |
|
| 138 |
add_md("## Step 5 — Model + LoRA (Unsloth)")
|
| 139 |
|
| 140 |
-
add_code("""
|
|
|
|
|
|
|
|
|
|
| 141 |
from unsloth.chat_templates import get_chat_template
|
| 142 |
|
| 143 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
@@ -146,6 +201,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
| 146 |
dtype=None,
|
| 147 |
load_in_4bit=True,
|
| 148 |
token=HF_TOKEN,
|
|
|
|
| 149 |
)
|
| 150 |
tokenizer = get_chat_template(tokenizer, chat_template="chatml")
|
| 151 |
model = FastLanguageModel.get_peft_model(
|
|
@@ -187,7 +243,7 @@ args = SFTConfig(
|
|
| 187 |
fp16=not torch.cuda.is_bf16_supported(),
|
| 188 |
bf16=torch.cuda.is_bf16_supported(),
|
| 189 |
logging_steps=5,
|
| 190 |
-
|
| 191 |
save_strategy="epoch",
|
| 192 |
report_to="none",
|
| 193 |
seed=42,
|
|
|
|
| 38 |
3. Formats to ChatML, trains with LoRA (r=16)
|
| 39 |
4. Plots, evaluates before/after, pushes adapter to `sh4shv4t/parlay-negotiator`
|
| 40 |
|
| 41 |
+
**Runtime:** ~25 min on T4 · **Cost:** $0 (free tier).
|
| 42 |
+
|
| 43 |
+
**If `load_dataset` fails on `Feature type 'Json'`:** the install cell upgrades `datasets`;
|
| 44 |
+
if it still fails, use **Runtime → Restart runtime**, then run from Step 2 onward. The
|
| 45 |
+
data cell also falls back to downloading `episodes_v2.jsonl` directly (no `Json` schema).""")
|
| 46 |
|
| 47 |
add_md("## Step 1 — Install dependencies")
|
| 48 |
|
|
|
|
| 56 |
subprocess.run([
|
| 57 |
sys.executable, "-m", "pip", "install",
|
| 58 |
"trl>=0.8.6", "peft>=0.10.0", "accelerate>=0.28.0",
|
|
|
|
| 59 |
"bitsandbytes>=0.43.0", "xformers", "--quiet",
|
| 60 |
], check=True)
|
| 61 |
+
# Hub dataset uses `Json` features — needs datasets 3.x (Colab ships an older build)
|
| 62 |
+
subprocess.run([
|
| 63 |
+
sys.executable, "-m", "pip", "install",
|
| 64 |
+
"-U", "datasets>=3.0.0", "huggingface-hub>=0.23.0", "pyarrow>=14.0.0",
|
| 65 |
+
"--quiet",
|
| 66 |
+
], check=True)
|
| 67 |
+
import os
|
| 68 |
+
os.environ.setdefault("UNSLOTH_DISABLE_STATISTICS", "1")
|
| 69 |
+
print("OK: dependencies (datasets>=3 for Json dtype on Hub)")""")
|
| 70 |
+
|
| 71 |
+
add_md("""## Step 2 — GPU + config
|
| 72 |
|
| 73 |
+
**GPU required** (Unsloth + 4-bit QLoRA will not run on CPU).
|
| 74 |
+
|
| 75 |
+
In Colab: **Runtime** → **Change runtime type** → **Hardware accelerator** = **T4** (or L4 / A100 / V100) → **Save**, then re-run this cell. Free tier: choose **T4** if offered.
|
| 76 |
+
|
| 77 |
+
**Unsloth:** we set `UNSLOTH_DISABLE_STATISTICS=1` below so Colab does not hang 120s on Unsloth’s Hugging Face “statistics” fetch (often mis-reported as “HF is down”).""")
|
| 78 |
|
| 79 |
add_code("""import os, json
|
| 80 |
import torch
|
| 81 |
from google.colab import userdata
|
| 82 |
|
| 83 |
+
# Before any `import unsloth` in later cells — avoids TimeoutError on HF stats ping
|
| 84 |
+
os.environ.setdefault("UNSLOTH_DISABLE_STATISTICS", "1")
|
| 85 |
+
|
| 86 |
+
if not torch.cuda.is_available():
|
| 87 |
+
raise RuntimeError(
|
| 88 |
+
"No GPU visible to PyTorch. Colab: Runtime → Change runtime type → set "
|
| 89 |
+
"Hardware accelerator to GPU (T4, L4, A100, …) → Save, then re-run. "
|
| 90 |
+
"If you are on **CPU** only, this notebook cannot train; connect a GPU session."
|
| 91 |
+
)
|
| 92 |
print(torch.cuda.get_device_name(0), torch.cuda.get_device_properties(0).total_memory / 1e9, "GB VRAM")
|
| 93 |
print("torch", torch.__version__, "cuda", torch.version.cuda)
|
| 94 |
|
|
|
|
| 105 |
|
| 106 |
add_md("## Step 3 — Load dataset")
|
| 107 |
|
| 108 |
+
add_code("""import json as _json
|
| 109 |
import pandas as pd
|
| 110 |
+
from datasets import Dataset, DatasetDict, load_dataset
|
| 111 |
+
from huggingface_hub import hf_hub_download
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def load_parlay_episodes():
|
| 115 |
+
\"\"\"Prefer `load_dataset`; fall back to raw JSONL if Colab's old `datasets` can't parse `Json` features.\"\"\"
|
| 116 |
+
try:
|
| 117 |
+
return load_dataset(DATASET_ID, token=HF_TOKEN)
|
| 118 |
+
except (ValueError, KeyError) as e:
|
| 119 |
+
msg = str(e)
|
| 120 |
+
if "Json" not in msg and "Feature type" not in msg:
|
| 121 |
+
raise
|
| 122 |
+
path = hf_hub_download(
|
| 123 |
+
repo_id=DATASET_ID,
|
| 124 |
+
repo_type="dataset",
|
| 125 |
+
filename="episodes_v2.jsonl",
|
| 126 |
+
token=HF_TOKEN,
|
| 127 |
+
)
|
| 128 |
+
rows = []
|
| 129 |
+
with open(path, encoding="utf-8") as f:
|
| 130 |
+
for line in f:
|
| 131 |
+
line = line.strip()
|
| 132 |
+
if line:
|
| 133 |
+
rows.append(_json.loads(line))
|
| 134 |
+
df = pd.DataFrame(rows)
|
| 135 |
+
return DatasetDict({"train": Dataset.from_pandas(df, preserve_index=False)})
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
raw = load_parlay_episodes()
|
| 139 |
df = raw["train"].to_pandas()
|
| 140 |
print(len(df), "rows", list(df.columns))
|
| 141 |
if "reward" in df.columns:
|
|
|
|
| 189 |
|
| 190 |
add_md("## Step 5 — Model + LoRA (Unsloth)")
|
| 191 |
|
| 192 |
+
add_code("""import os
|
| 193 |
+
os.environ.setdefault("UNSLOTH_DISABLE_STATISTICS", "1")
|
| 194 |
+
|
| 195 |
+
from unsloth import FastLanguageModel
|
| 196 |
from unsloth.chat_templates import get_chat_template
|
| 197 |
|
| 198 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
|
|
| 201 |
dtype=None,
|
| 202 |
load_in_4bit=True,
|
| 203 |
token=HF_TOKEN,
|
| 204 |
+
disable_log_stats=True,
|
| 205 |
)
|
| 206 |
tokenizer = get_chat_template(tokenizer, chat_template="chatml")
|
| 207 |
model = FastLanguageModel.get_peft_model(
|
|
|
|
| 243 |
fp16=not torch.cuda.is_bf16_supported(),
|
| 244 |
bf16=torch.cuda.is_bf16_supported(),
|
| 245 |
logging_steps=5,
|
| 246 |
+
eval_strategy="epoch",
|
| 247 |
save_strategy="epoch",
|
| 248 |
report_to="none",
|
| 249 |
seed=42,
|
training/notebooks/sft_qwen_colab.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|