BioRLHF / scripts /run_grpo_phase4.sh
jang1563's picture
Phase 4: V1-aware calibration verifier, eval tools, cleanup
2145d80
#!/bin/bash
#SBATCH --job-name=biogrpo_phase4
#SBATCH --partition=scu-gpu
#SBATCH --account=cayuga_0003
#SBATCH --gres=gpu:1
#SBATCH --mem=96G
#SBATCH --cpus-per-task=8
#SBATCH --time=48:00:00
#SBATCH --output=logs/grpo_phase4_%j.log
#SBATCH --error=logs/grpo_phase4_%j.err
# ============================================================
# BioGRPO Phase 4: V1-Aware V4 Calibration Fix
# V4 weight=0.45 (dominant), V1-aware confidence targeting
# ============================================================
SCRATCH="/athena/cayuga_0003/scratch/users/jak4013/otsuka"
WORKDIR="${SCRATCH}/training/BioRLHF"
echo "============================================================"
echo "BioGRPO Phase 4 Training"
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURMD_NODENAME"
echo "Working dir: $WORKDIR"
echo "Start time: $(date)"
echo "============================================================"
cd "$WORKDIR" || { echo "WORKDIR not found: $WORKDIR"; exit 1; }
mkdir -p logs
module purge
module load cuda/12.1
. /home/fs01/jak4013/miniconda3/miniconda3/etc/profile.d/conda.sh
conda activate biorlhf
echo ""
echo "GPU Information:"
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
echo ""
export CUDA_VISIBLE_DEVICES=0
export TRANSFORMERS_CACHE="${WORKDIR}/cache/transformers"
export HF_HOME="${WORKDIR}/cache/huggingface"
export WANDB_DIR="${WORKDIR}/wandb"
export TOKENIZERS_PARALLELISM=false
# Data paths
export GENELAB_BASE="${SCRATCH}/data/GeneLab_benchmark"
export BIOEVAL_DATA="${SCRATCH}/data/BioEval/data"
export SPACEOMICS_DATA="${SCRATCH}/data/SpaceOmicsBench/v3/evaluation/llm"
export BIOEVAL_ROOT="${SCRATCH}/data/BioEval"
mkdir -p $TRANSFORMERS_CACHE $HF_HOME $WANDB_DIR
# Symlink SFT checkpoint if not already present
if [ ! -e "${WORKDIR}/kmp_sft_model_final" ]; then
ln -s "${SCRATCH}/training/biorlhf/kmp_sft_model_final" "${WORKDIR}/kmp_sft_model_final"
echo "Symlinked kmp_sft_model_final"
fi
echo "Starting BioGRPO Phase 4 training..."
biorlhf-grpo --config configs/grpo_phase4.json
if [ $? -eq 0 ]; then
echo ""
echo "============================================================"
echo "BioGRPO Phase 4 training completed!"
echo "Model saved to: ./biogrpo_phase4_model"
echo "End time: $(date)"
echo "============================================================"
else
echo ""
echo "============================================================"
echo "BioGRPO Phase 4 training failed with exit code $?"
echo "Check logs/grpo_phase4_${SLURM_JOB_ID}.err for details"
echo "============================================================"
exit 1
fi