Spaces:

sh4shv4t
/

Parlay

Paused

App Files Files Community

Parlay / scripts /hf_grpo_entry.sh

sh4shv4t

feat: added images, new sft notebook, jobs to do grpo

213dee8 28 days ago

raw

history blame contribute delete

3.26 kB

	#!/usr/bin/env bash
	# Run from the Parlay repository root (the folder that contains training/ and parlay_env/).
	# Intended for Linux GPU jobs (Hugging Face Jobs, RunPod, etc.).
	#
	# Usage (after: git clone ... && cd Parlay && pip install -r requirements-train.txt):
	# export HF_TOKEN=... # read private assets + push (required if PUSH_TO_HF=1)
	# export GRPO_STEPS=120 GRPO_G=4
	# bash scripts/hf_grpo_entry.sh
	#
	# See training/GRPO_HF_RUNBOOK.md for a full walkthrough.
	set -euo pipefail
	export PYTHONUNBUFFERED=1

	: "${DATASET_ID:=sh4shv4t/parlay-episodes}"
	: "${EPISODE_FILE:=episodes_v2.jsonl}"
	: "${SFT_MODEL:=sh4shv4t/parlay-sft-1-5b}"
	: "${GRPO_STEPS:=120}"
	: "${GRPO_G:=4}"
	: "${MIN_REWARD:=-50.0}"
	: "${OUTPUT_DIR:=outputs/grpo_run}"
	# Set to 0 to skip push (e.g. smoke test)
	: "${PUSH_TO_HF:=1}"
	# Model repo to upload the GRPO output folder to
	: "${HF_GRPO_REPO:=sh4shv4t/parlay-grpo-1-5b}"

	if [[ ! -f "training/grpo_train.py" ]]; then
	echo "Run this script from the Parlay repo root (training/grpo_train.py not found). pwd=$(pwd)" >&2
	exit 1
	fi

	echo "==> Downloading ${EPISODE_FILE} from dataset ${DATASET_ID} ..."
	export DATASET_ID EPISODE_FILE
	JSONL_PATH=$(
	python -c "import os
	from huggingface_hub import hf_hub_download
	print(hf_hub_download(
	repo_id=os.environ['DATASET_ID'],
	filename=os.environ['EPISODE_FILE'],
	repo_type='dataset',
	))"
	)
	echo " JSONL: ${JSONL_PATH}"

	mkdir -p "$(dirname "$OUTPUT_DIR")"
	OUT_ABS="$(cd "$(dirname "$OUTPUT_DIR")" && pwd)/$(basename "$OUTPUT_DIR")"

	echo "==> GRPO: SFT=${SFT_MODEL} steps=${GRPO_STEPS} G=${GRPO_G} out=${OUT_ABS}"
	python -m training.grpo_train \
	--model "${SFT_MODEL}" \
	--data "${JSONL_PATH}" \
	--output "${OUT_ABS}" \
	--steps "${GRPO_STEPS}" \
	--g "${GRPO_G}" \
	--min-reward "${MIN_REWARD}"

	# Bundle Matplotlib curves + TRL log JSON into the model folder so one Hub upload includes visualizations.
	echo "==> Collecting training plots under ${OUT_ABS}/training_plots/ ..."
	TP="${OUT_ABS}/training_plots"
	mkdir -p "${TP}"
	for f in results/grpo_reward_curve.png results/grpo_loss_curve.png; do
	if [[ -f "$f" ]]; then
	cp -f "$f" "${TP}/"
	echo " + ${f}"
	fi
	done
	if [[ -d "${OUT_ABS}/plots" ]]; then
	shopt -s nullglob
	for f in "${OUT_ABS}/plots/".png "${OUT_ABS}/plots/".json; do
	[[ -e "$f" ]] \|\| continue
	cp -f "$f" "${TP}/"
	echo " + ${f}"
	done
	shopt -u nullglob
	fi
	if [[ ! -f "${TP}/grpo_reward_curve.png" && ! -f "${TP}/grpo_reward.png" ]]; then
	echo " (warning: no reward plot in training_plots — check logs for empty log_history or plot errors)"
	fi

	if [[ "${PUSH_TO_HF}" == "1" \|\| "${PUSH_TO_HF}" == "true" ]]; then
	if [[ -z "${HF_TOKEN:-}" && -z "${HUGGINGFACE_HUB_TOKEN:-}" ]]; then
	echo "PUSH_TO_HF is set but neither HF_TOKEN nor HUGGINGFACE_HUB_TOKEN is set." >&2
	exit 1
	fi
	# push_to_hub.py reads HF_TOKEN; Jobs often set HUGGINGFACE_HUB_TOKEN
	export HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}"
	echo "==> Pushing to https://huggingface.co/${HF_GRPO_REPO} ..."
	export HF_REPO_ID="${HF_GRPO_REPO}"
	python -m training.push_to_hub --model "${OUT_ABS}" --repo "${HF_GRPO_REPO}"
	else
	echo "==> PUSH_TO_HF disabled; model saved at ${OUT_ABS}"
	fi

	echo "==> Done."