#!/usr/bin/env bash # Run from the Parlay repository root (the folder that contains training/ and parlay_env/). # Intended for Linux GPU jobs (Hugging Face Jobs, RunPod, etc.). # # Usage (after: git clone ... && cd Parlay && pip install -r requirements-train.txt): # export HF_TOKEN=... # read private assets + push (required if PUSH_TO_HF=1) # export GRPO_STEPS=120 GRPO_G=4 # bash scripts/hf_grpo_entry.sh # # See training/GRPO_HF_RUNBOOK.md for a full walkthrough. set -euo pipefail export PYTHONUNBUFFERED=1 : "${DATASET_ID:=sh4shv4t/parlay-episodes}" : "${EPISODE_FILE:=episodes_v2.jsonl}" : "${SFT_MODEL:=sh4shv4t/parlay-sft-1-5b}" : "${GRPO_STEPS:=120}" : "${GRPO_G:=4}" : "${MIN_REWARD:=-50.0}" : "${OUTPUT_DIR:=outputs/grpo_run}" # Set to 0 to skip push (e.g. smoke test) : "${PUSH_TO_HF:=1}" # Model repo to upload the GRPO output folder to : "${HF_GRPO_REPO:=sh4shv4t/parlay-grpo-1-5b}" if [[ ! -f "training/grpo_train.py" ]]; then echo "Run this script from the Parlay repo root (training/grpo_train.py not found). pwd=$(pwd)" >&2 exit 1 fi echo "==> Downloading ${EPISODE_FILE} from dataset ${DATASET_ID} ..." export DATASET_ID EPISODE_FILE JSONL_PATH=$( python -c "import os from huggingface_hub import hf_hub_download print(hf_hub_download( repo_id=os.environ['DATASET_ID'], filename=os.environ['EPISODE_FILE'], repo_type='dataset', ))" ) echo " JSONL: ${JSONL_PATH}" mkdir -p "$(dirname "$OUTPUT_DIR")" OUT_ABS="$(cd "$(dirname "$OUTPUT_DIR")" && pwd)/$(basename "$OUTPUT_DIR")" echo "==> GRPO: SFT=${SFT_MODEL} steps=${GRPO_STEPS} G=${GRPO_G} out=${OUT_ABS}" python -m training.grpo_train \ --model "${SFT_MODEL}" \ --data "${JSONL_PATH}" \ --output "${OUT_ABS}" \ --steps "${GRPO_STEPS}" \ --g "${GRPO_G}" \ --min-reward "${MIN_REWARD}" # Bundle Matplotlib curves + TRL log JSON into the model folder so one Hub upload includes visualizations. echo "==> Collecting training plots under ${OUT_ABS}/training_plots/ ..." TP="${OUT_ABS}/training_plots" mkdir -p "${TP}" for f in results/grpo_reward_curve.png results/grpo_loss_curve.png; do if [[ -f "$f" ]]; then cp -f "$f" "${TP}/" echo " + ${f}" fi done if [[ -d "${OUT_ABS}/plots" ]]; then shopt -s nullglob for f in "${OUT_ABS}/plots/"*.png "${OUT_ABS}/plots/"*.json; do [[ -e "$f" ]] || continue cp -f "$f" "${TP}/" echo " + ${f}" done shopt -u nullglob fi if [[ ! -f "${TP}/grpo_reward_curve.png" && ! -f "${TP}/grpo_reward.png" ]]; then echo " (warning: no reward plot in training_plots — check logs for empty log_history or plot errors)" fi if [[ "${PUSH_TO_HF}" == "1" || "${PUSH_TO_HF}" == "true" ]]; then if [[ -z "${HF_TOKEN:-}" && -z "${HUGGINGFACE_HUB_TOKEN:-}" ]]; then echo "PUSH_TO_HF is set but neither HF_TOKEN nor HUGGINGFACE_HUB_TOKEN is set." >&2 exit 1 fi # push_to_hub.py reads HF_TOKEN; Jobs often set HUGGINGFACE_HUB_TOKEN export HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}" echo "==> Pushing to https://huggingface.co/${HF_GRPO_REPO} ..." export HF_REPO_ID="${HF_GRPO_REPO}" python -m training.push_to_hub --model "${OUT_ABS}" --repo "${HF_GRPO_REPO}" else echo "==> PUSH_TO_HF disabled; model saved at ${OUT_ABS}" fi echo "==> Done."