| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
| export PYTHONUNBUFFERED=1 |
|
|
| : "${DATASET_ID:=sh4shv4t/parlay-episodes}" |
| : "${EPISODE_FILE:=episodes_v2.jsonl}" |
| : "${SFT_MODEL:=sh4shv4t/parlay-sft-1-5b}" |
| : "${GRPO_STEPS:=120}" |
| : "${GRPO_G:=4}" |
| : "${MIN_REWARD:=-50.0}" |
| : "${OUTPUT_DIR:=outputs/grpo_run}" |
| |
| : "${PUSH_TO_HF:=1}" |
| |
| : "${HF_GRPO_REPO:=sh4shv4t/parlay-grpo-1-5b}" |
|
|
| if [[ ! -f "training/grpo_train.py" ]]; then |
| echo "Run this script from the Parlay repo root (training/grpo_train.py not found). pwd=$(pwd)" >&2 |
| exit 1 |
| fi |
|
|
| echo "==> Downloading ${EPISODE_FILE} from dataset ${DATASET_ID} ..." |
| export DATASET_ID EPISODE_FILE |
| JSONL_PATH=$( |
| python -c "import os |
| from huggingface_hub import hf_hub_download |
| print(hf_hub_download( |
| repo_id=os.environ['DATASET_ID'], |
| filename=os.environ['EPISODE_FILE'], |
| repo_type='dataset', |
| ))" |
| ) |
| echo " JSONL: ${JSONL_PATH}" |
|
|
| mkdir -p "$(dirname "$OUTPUT_DIR")" |
| OUT_ABS="$(cd "$(dirname "$OUTPUT_DIR")" && pwd)/$(basename "$OUTPUT_DIR")" |
|
|
| echo "==> GRPO: SFT=${SFT_MODEL} steps=${GRPO_STEPS} G=${GRPO_G} out=${OUT_ABS}" |
| python -m training.grpo_train \ |
| --model "${SFT_MODEL}" \ |
| --data "${JSONL_PATH}" \ |
| --output "${OUT_ABS}" \ |
| --steps "${GRPO_STEPS}" \ |
| --g "${GRPO_G}" \ |
| --min-reward "${MIN_REWARD}" |
|
|
| |
| echo "==> Collecting training plots under ${OUT_ABS}/training_plots/ ..." |
| TP="${OUT_ABS}/training_plots" |
| mkdir -p "${TP}" |
| for f in results/grpo_reward_curve.png results/grpo_loss_curve.png; do |
| if [[ -f "$f" ]]; then |
| cp -f "$f" "${TP}/" |
| echo " + ${f}" |
| fi |
| done |
| if [[ -d "${OUT_ABS}/plots" ]]; then |
| shopt -s nullglob |
| for f in "${OUT_ABS}/plots/"*.png "${OUT_ABS}/plots/"*.json; do |
| [[ -e "$f" ]] || continue |
| cp -f "$f" "${TP}/" |
| echo " + ${f}" |
| done |
| shopt -u nullglob |
| fi |
| if [[ ! -f "${TP}/grpo_reward_curve.png" && ! -f "${TP}/grpo_reward.png" ]]; then |
| echo " (warning: no reward plot in training_plots — check logs for empty log_history or plot errors)" |
| fi |
|
|
| if [[ "${PUSH_TO_HF}" == "1" || "${PUSH_TO_HF}" == "true" ]]; then |
| if [[ -z "${HF_TOKEN:-}" && -z "${HUGGINGFACE_HUB_TOKEN:-}" ]]; then |
| echo "PUSH_TO_HF is set but neither HF_TOKEN nor HUGGINGFACE_HUB_TOKEN is set." >&2 |
| exit 1 |
| fi |
| |
| export HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}" |
| echo "==> Pushing to https://huggingface.co/${HF_GRPO_REPO} ..." |
| export HF_REPO_ID="${HF_GRPO_REPO}" |
| python -m training.push_to_hub --model "${OUT_ABS}" --repo "${HF_GRPO_REPO}" |
| else |
| echo "==> PUSH_TO_HF disabled; model saved at ${OUT_ABS}" |
| fi |
|
|
| echo "==> Done." |
|
|