BioRLHF / scripts /run_grpo_full.sh
jang1563's picture
Phase 3: Fix GRPO learning signal with continuous rewards and multi-reward
7dbf475
#!/bin/bash
#SBATCH --job-name=biogrpo_full
#SBATCH --partition=scu-gpu
#SBATCH --account=cayuga_0003
#SBATCH --gres=gpu:1
#SBATCH --mem=96G
#SBATCH --cpus-per-task=8
#SBATCH --time=48:00:00
#SBATCH --output=logs/grpo_full_%j.log
#SBATCH --error=logs/grpo_full_%j.err
# ============================================================
# BioGRPO Full Experiment v2
# All V1-V4 verifiers, G=16, multi-reward, from SFT checkpoint
# ============================================================
SCRATCH="/athena/cayuga_0003/scratch/users/jak4013/otsuka"
WORKDIR="${SCRATCH}/training/BioRLHF"
echo "============================================================"
echo "BioGRPO Full Training"
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURMD_NODENAME"
echo "Working dir: $WORKDIR"
echo "Start time: $(date)"
echo "============================================================"
cd "$WORKDIR" || { echo "WORKDIR not found: $WORKDIR"; exit 1; }
mkdir -p logs
module purge
module load cuda/12.1
. /home/fs01/jak4013/miniconda3/miniconda3/etc/profile.d/conda.sh
conda activate biorlhf
echo ""
echo "GPU Information:"
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
echo ""
export CUDA_VISIBLE_DEVICES=0
export TRANSFORMERS_CACHE="${WORKDIR}/cache/transformers"
export HF_HOME="${WORKDIR}/cache/huggingface"
export WANDB_DIR="${WORKDIR}/wandb"
export TOKENIZERS_PARALLELISM=false
# Data paths
export GENELAB_BASE="${SCRATCH}/data/GeneLab_benchmark"
export BIOEVAL_DATA="${SCRATCH}/data/BioEval/data"
export SPACEOMICS_DATA="${SCRATCH}/data/SpaceOmicsBench/v3/evaluation/llm"
export BIOEVAL_ROOT="${SCRATCH}/data/BioEval"
mkdir -p $TRANSFORMERS_CACHE $HF_HOME $WANDB_DIR
# Symlink SFT checkpoint if not already present
if [ ! -e "${WORKDIR}/kmp_sft_model_final" ]; then
ln -s "${SCRATCH}/training/biorlhf/kmp_sft_model_final" "${WORKDIR}/kmp_sft_model_final"
echo "Symlinked kmp_sft_model_final"
fi
echo "Starting BioGRPO Full v2 training..."
biorlhf-grpo --config configs/grpo_full_v2.json
if [ $? -eq 0 ]; then
echo ""
echo "============================================================"
echo "BioGRPO Full v2 training completed!"
echo "Model saved to: ./biogrpo_full_v2_model"
echo "End time: $(date)"
echo "============================================================"
else
echo ""
echo "============================================================"
echo "BioGRPO Full training failed with exit code $?"
echo "Check logs/grpo_full_${SLURM_JOB_ID}.err for details"
echo "============================================================"
exit 1
fi