File size: 2,010 Bytes
f81133b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
# ============================================================================
# Downstream fine-tuning script for ModernProteinLM
# 
# Usage:
#   bash run_finetune.sh --task fluorescence
#   bash run_finetune.sh --task solubility,fluorescence,stability
# ============================================================================

set -e

# ----------------------------------------------------------------------------
# CONFIGURATION
# ----------------------------------------------------------------------------

# Pretrained checkpoint
PRETRAIN_DIR="${PRETRAIN_DIR:-./outputs/pretrain/final}"
TASKS="${TASKS:-fluorescence,solubility}"

# Fine-tuning
EPOCHS="${EPOCHS:-20}"
BATCH_SIZE="${BATCH_SIZE:-16}"
LR="${LR:-1e-4}"
WARMUP_RATIO="${WARMUP_RATIO:-0.1}"
WEIGHT_DECAY="${WEIGHT_DECAY:-0.01}"

# System
OUTPUT_DIR="${OUTPUT_DIR:-./outputs/finetune}"
NUM_WORKERS="${NUM_WORKERS:-4}"
NUM_GPUS="${NUM_GPUS:-1}"

# Precision
USE_AMP="${USE_AMP:-1}"

# Tracking
USE_TRACKIO="${USE_TRACKIO:-0}"
TRACKIO_PROJECT="${TRACKIO_PROJECT:-modern-protein-lm}"

# ----------------------------------------------------------------------------
# LAUNCH
# ----------------------------------------------------------------------------

PYTHON_ARGS=(
    train_finetune.py
    --pretrain_dir "$PRETRAIN_DIR"
    --tasks "$TASKS"
    --epochs "$EPOCHS"
    --batch_size "$BATCH_SIZE"
    --lr "$LR"
    --warmup_ratio "$WARMUP_RATIO"
    --weight_decay "$WEIGHT_DECAY"
    --output_dir "$OUTPUT_DIR"
    --num_workers "$NUM_WORKERS"
)

if [[ "$USE_AMP" == "1" ]]; then
    PYTHON_ARGS+=(--use_amp)
fi

if [[ "$USE_TRACKIO" == "1" ]]; then
    PYTHON_ARGS+=(--use_trackio --trackio_project "$TRACKIO_PROJECT")
fi

if command -v torchrun &> /dev/null && [[ "$NUM_GPUS" -gt 1 ]]; then
    echo "Launching with torchrun on $NUM_GPUS GPUs..."
    torchrun --standalone --nnodes=1 --nproc_per_node="$NUM_GPUS" "${PYTHON_ARGS[@]}"
else
    python "${PYTHON_ARGS[@]}"
fi

echo "Fine-tuning complete. Results in $OUTPUT_DIR"