Parlay / scripts /hf_grpo_entry.sh
sh4shv4t's picture
feat: added images, new sft notebook, jobs to do grpo
213dee8
#!/usr/bin/env bash
# Run from the Parlay repository root (the folder that contains training/ and parlay_env/).
# Intended for Linux GPU jobs (Hugging Face Jobs, RunPod, etc.).
#
# Usage (after: git clone ... && cd Parlay && pip install -r requirements-train.txt):
# export HF_TOKEN=... # read private assets + push (required if PUSH_TO_HF=1)
# export GRPO_STEPS=120 GRPO_G=4
# bash scripts/hf_grpo_entry.sh
#
# See training/GRPO_HF_RUNBOOK.md for a full walkthrough.
set -euo pipefail
export PYTHONUNBUFFERED=1
: "${DATASET_ID:=sh4shv4t/parlay-episodes}"
: "${EPISODE_FILE:=episodes_v2.jsonl}"
: "${SFT_MODEL:=sh4shv4t/parlay-sft-1-5b}"
: "${GRPO_STEPS:=120}"
: "${GRPO_G:=4}"
: "${MIN_REWARD:=-50.0}"
: "${OUTPUT_DIR:=outputs/grpo_run}"
# Set to 0 to skip push (e.g. smoke test)
: "${PUSH_TO_HF:=1}"
# Model repo to upload the GRPO output folder to
: "${HF_GRPO_REPO:=sh4shv4t/parlay-grpo-1-5b}"
if [[ ! -f "training/grpo_train.py" ]]; then
echo "Run this script from the Parlay repo root (training/grpo_train.py not found). pwd=$(pwd)" >&2
exit 1
fi
echo "==> Downloading ${EPISODE_FILE} from dataset ${DATASET_ID} ..."
export DATASET_ID EPISODE_FILE
JSONL_PATH=$(
python -c "import os
from huggingface_hub import hf_hub_download
print(hf_hub_download(
repo_id=os.environ['DATASET_ID'],
filename=os.environ['EPISODE_FILE'],
repo_type='dataset',
))"
)
echo " JSONL: ${JSONL_PATH}"
mkdir -p "$(dirname "$OUTPUT_DIR")"
OUT_ABS="$(cd "$(dirname "$OUTPUT_DIR")" && pwd)/$(basename "$OUTPUT_DIR")"
echo "==> GRPO: SFT=${SFT_MODEL} steps=${GRPO_STEPS} G=${GRPO_G} out=${OUT_ABS}"
python -m training.grpo_train \
--model "${SFT_MODEL}" \
--data "${JSONL_PATH}" \
--output "${OUT_ABS}" \
--steps "${GRPO_STEPS}" \
--g "${GRPO_G}" \
--min-reward "${MIN_REWARD}"
# Bundle Matplotlib curves + TRL log JSON into the model folder so one Hub upload includes visualizations.
echo "==> Collecting training plots under ${OUT_ABS}/training_plots/ ..."
TP="${OUT_ABS}/training_plots"
mkdir -p "${TP}"
for f in results/grpo_reward_curve.png results/grpo_loss_curve.png; do
if [[ -f "$f" ]]; then
cp -f "$f" "${TP}/"
echo " + ${f}"
fi
done
if [[ -d "${OUT_ABS}/plots" ]]; then
shopt -s nullglob
for f in "${OUT_ABS}/plots/"*.png "${OUT_ABS}/plots/"*.json; do
[[ -e "$f" ]] || continue
cp -f "$f" "${TP}/"
echo " + ${f}"
done
shopt -u nullglob
fi
if [[ ! -f "${TP}/grpo_reward_curve.png" && ! -f "${TP}/grpo_reward.png" ]]; then
echo " (warning: no reward plot in training_plots — check logs for empty log_history or plot errors)"
fi
if [[ "${PUSH_TO_HF}" == "1" || "${PUSH_TO_HF}" == "true" ]]; then
if [[ -z "${HF_TOKEN:-}" && -z "${HUGGINGFACE_HUB_TOKEN:-}" ]]; then
echo "PUSH_TO_HF is set but neither HF_TOKEN nor HUGGINGFACE_HUB_TOKEN is set." >&2
exit 1
fi
# push_to_hub.py reads HF_TOKEN; Jobs often set HUGGINGFACE_HUB_TOKEN
export HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}"
echo "==> Pushing to https://huggingface.co/${HF_GRPO_REPO} ..."
export HF_REPO_ID="${HF_GRPO_REPO}"
python -m training.push_to_hub --model "${OUT_ABS}" --repo "${HF_GRPO_REPO}"
else
echo "==> PUSH_TO_HF disabled; model saved at ${OUT_ABS}"
fi
echo "==> Done."