#!/bin/bash
# E-GRPO (G2RPO) training script for QwenImage
# Based on finetune_mergestep.sh configuration

set -e

# GPU 6 is faulty - use only the 6 confirmed working GPUs: 0,1,2,3,4,5
# Force set these values (override any existing env vars)
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5"
NPROC_PER_NODE=6
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
MASTER_ADDR=${MASTER_ADDR:-localhost}
MASTER_PORT=${MASTER_PORT:-29501}

# NCCL configuration to fix communication hangs
export NCCL_P2P_DISABLE=1          # Disable P2P (peer-to-peer GPU communication)
export NCCL_IB_DISABLE=1           # Disable InfiniBand
export NCCL_SHM_DISABLE=0          # Keep shared memory enabled
export NCCL_SOCKET_IFNAME=lo       # Use localhost interface
export NCCL_DEBUG=WARN             # Show warnings

# Change to source_code directory
cd "$(dirname "$0")/.."

echo "=========================================="
echo "E-GRPO Training for QwenImage"
echo "=========================================="
echo "Nodes: $NNODES"
echo "GPUs per node: $NPROC_PER_NODE"
echo "Master addr: $MASTER_ADDR"
echo "Master port: $MASTER_PORT"
echo "=========================================="

# Run training
python -m torch.distributed.run \
    --nnodes=$NNODES \
    --nproc_per_node=$NPROC_PER_NODE \
    --node_rank=$NODE_RANK \
    --master_addr=$MASTER_ADDR \
    --master_port=$MASTER_PORT \
    fastvideo/train_g2rpo_qwenimage_merge.py \
    --pretrained_model_name_or_path ./data/QwenImage \
    --data_json_path ./data/qwenimage_rl_embeddings/videos2caption.json \
    --output_dir ./output/g2rpo_qwenimage \
    --hps_path ./data/hps/HPS_v2.1_compressed.pt \
    --hps_clip_path ./data/hps/open_clip_pytorch_model.bin \
    --h 1024 \
    --w 1024 \
    --sampling_steps 16 \
    --eta 0.7 \
    --shift 3.0 \
    --num_generations 12 \
    --learning_rate 2e-6 \
    --max_train_steps 301 \
    --checkpointing_steps 50 \
    --eta_step_list 0 1 2 3 4 5 6 7 \
    --eta_step_merge_list 1 1 1 2 2 2 3 3 \
    --granular_list 1 \
    --init_same_noise \
    --clip_range 1e-4 \
    --adv_clip_max 5.0 \
    --use_hpsv2

echo "Training completed!"