File size: 2,010 Bytes
f81133b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | #!/bin/bash
# ============================================================================
# Downstream fine-tuning script for ModernProteinLM
#
# Usage:
# bash run_finetune.sh --task fluorescence
# bash run_finetune.sh --task solubility,fluorescence,stability
# ============================================================================
set -e
# ----------------------------------------------------------------------------
# CONFIGURATION
# ----------------------------------------------------------------------------
# Pretrained checkpoint
PRETRAIN_DIR="${PRETRAIN_DIR:-./outputs/pretrain/final}"
TASKS="${TASKS:-fluorescence,solubility}"
# Fine-tuning
EPOCHS="${EPOCHS:-20}"
BATCH_SIZE="${BATCH_SIZE:-16}"
LR="${LR:-1e-4}"
WARMUP_RATIO="${WARMUP_RATIO:-0.1}"
WEIGHT_DECAY="${WEIGHT_DECAY:-0.01}"
# System
OUTPUT_DIR="${OUTPUT_DIR:-./outputs/finetune}"
NUM_WORKERS="${NUM_WORKERS:-4}"
NUM_GPUS="${NUM_GPUS:-1}"
# Precision
USE_AMP="${USE_AMP:-1}"
# Tracking
USE_TRACKIO="${USE_TRACKIO:-0}"
TRACKIO_PROJECT="${TRACKIO_PROJECT:-modern-protein-lm}"
# ----------------------------------------------------------------------------
# LAUNCH
# ----------------------------------------------------------------------------
PYTHON_ARGS=(
train_finetune.py
--pretrain_dir "$PRETRAIN_DIR"
--tasks "$TASKS"
--epochs "$EPOCHS"
--batch_size "$BATCH_SIZE"
--lr "$LR"
--warmup_ratio "$WARMUP_RATIO"
--weight_decay "$WEIGHT_DECAY"
--output_dir "$OUTPUT_DIR"
--num_workers "$NUM_WORKERS"
)
if [[ "$USE_AMP" == "1" ]]; then
PYTHON_ARGS+=(--use_amp)
fi
if [[ "$USE_TRACKIO" == "1" ]]; then
PYTHON_ARGS+=(--use_trackio --trackio_project "$TRACKIO_PROJECT")
fi
if command -v torchrun &> /dev/null && [[ "$NUM_GPUS" -gt 1 ]]; then
echo "Launching with torchrun on $NUM_GPUS GPUs..."
torchrun --standalone --nnodes=1 --nproc_per_node="$NUM_GPUS" "${PYTHON_ARGS[@]}"
else
python "${PYTHON_ARGS[@]}"
fi
echo "Fine-tuning complete. Results in $OUTPUT_DIR"
|