Spaces:
Paused
Paused
File size: 11,220 Bytes
5551585 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 | #!/usr/bin/env bash
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# setup.sh β Medical VQA Environment Setup
# HỠtrợ: Vast.ai (CUDA), Google Colab, local macOS (CPU/MPS)
#
# CΓ‘ch dΓΉng:
# chmod +x setup.sh && bash setup.sh
# bash setup.sh --colab # Google Colab mode (skip git config)
# bash setup.sh --offline # Offline mode (khΓ΄ng sync WandB)
# bash setup.sh --skip-nltk # Bα» qua download NLTK data
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
set -euo pipefail
# ββ Parse flags ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
COLAB_MODE=0
OFFLINE_MODE=0
SKIP_NLTK=0
for arg in "$@"; do
case $arg in
--colab) COLAB_MODE=1 ;;
--offline) OFFLINE_MODE=1 ;;
--skip-nltk) SKIP_NLTK=1 ;;
esac
done
# ββ Colors βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
info() { echo -e "${GREEN}[INFO]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
echo ""
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo " π₯ Medical VQA β Environment Setup"
echo " Project: DL Final 523H0173 & 523H0178"
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo ""
# ββ 1. Python version check ββββββββββββββββββββββββββββββββββββββββββββββββββ
PYTHON=$(command -v python3 || command -v python)
PY_VER=$($PYTHON --version 2>&1 | grep -oP '\d+\.\d+')
PY_MAJOR=$(echo $PY_VER | cut -d. -f1)
PY_MINOR=$(echo $PY_VER | cut -d. -f2)
info "Python $PY_VER tαΊ‘i: $($PYTHON -c 'import sys; print(sys.executable)')"
if [ "$PY_MAJOR" -lt 3 ] || { [ "$PY_MAJOR" -eq 3 ] && [ "$PY_MINOR" -lt 10 ]; }; then
error "CαΊ§n Python β₯ 3.10 (hiα»n tαΊ‘i: $PY_VER)"
fi
# ββ 2. GPU detection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CUDA_AVAILABLE=$($PYTHON -c "import torch; print(torch.cuda.is_available())" 2>/dev/null || echo "False")
if [ "$CUDA_AVAILABLE" = "True" ]; then
GPU_NAME=$($PYTHON -c "import torch; print(torch.cuda.get_device_name(0))" 2>/dev/null || echo "Unknown")
VRAM=$($PYTHON -c "import torch; print(round(torch.cuda.get_device_properties(0).total_memory/1e9,1))" 2>/dev/null || echo "?")
info "GPU: $GPU_NAME | VRAM: ${VRAM}GB"
else
warn "KhΓ΄ng phΓ‘t hiα»n CUDA GPU β training sαΊ½ rαΊ₯t chαΊm trΓͺn CPU"
fi
# ββ 3. Install pip packages ββββββββββββββββββββββββββββββββββββββββββββββββββ
info "CΓ i ΔαΊ·t dependencies tα»« requirements.txt..."
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REQ_FILE="$SCRIPT_DIR/requirements.txt"
if [ ! -f "$REQ_FILE" ]; then
error "KhΓ΄ng tΓ¬m thαΊ₯y $REQ_FILE"
fi
# NΓ’ng pip trΖ°α»c
$PYTHON -m pip install --upgrade pip --quiet
# CΓ i main requirements (quiet Δα» giαΊ£m noise)
$PYTHON -m pip install -r "$REQ_FILE" --quiet || {
warn "CΓ i ΔαΊ·t silent thαΊ₯t bαΊ‘i, thα» vα»i verbose..."
$PYTHON -m pip install -r "$REQ_FILE"
}
# wandb (cαΊ§n version chΓnh xΓ‘c)
$PYTHON -m pip install "wandb>=0.16.0" --quiet
info "β
Dependencies ΔΓ£ cΓ i xong"
# ββ 4. NLTK data download βββββββββββββββββββββββββββββββββββββββββββββββββββββ
if [ "$SKIP_NLTK" -eq 0 ]; then
info "TαΊ£i NLTK data (punkt, wordnet)..."
$PYTHON -c "
import nltk
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
for pkg in ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']:
try:
nltk.download(pkg, quiet=True)
except Exception as e:
print(f' [WARN] NLTK {pkg}: {e}')
print(' NLTK data OK')
"
fi
# ββ 5. Python path configuration βββββββββββββββββββββββββββββββββββββββββββββ
info "CαΊ₯u hΓ¬nh Python path..."
# TαΊ‘o .pth file Δα» Python tα»± Δα»ng thΓͺm project root vΓ o sys.path
SITE_PACKAGES=$($PYTHON -c "import site; print(site.getsitepackages()[0])" 2>/dev/null || \
$PYTHON -c "import site; print(site.getusersitepackages())")
PTH_FILE="$SITE_PACKAGES/medical_vqa.pth"
echo "$SCRIPT_DIR" > "$PTH_FILE" && \
info "β
Path cαΊ₯u hΓ¬nh tαΊ‘i: $PTH_FILE" || \
warn "KhΓ΄ng thα» ghi vΓ o site-packages, thα» export PYTHONPATH thα»§ cΓ΄ng."
# CΕ©ng export PYTHONPATH trong session hiα»n tαΊ‘i
export PYTHONPATH="$SCRIPT_DIR:${PYTHONPATH:-}"
info "PYTHONPATH = $PYTHONPATH"
# ββ 6. .env file βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ENV_FILE="$SCRIPT_DIR/.env"
ENV_EXAMPLE="$SCRIPT_DIR/.env.example"
if [ ! -f "$ENV_FILE" ] && [ -f "$ENV_EXAMPLE" ]; then
cp "$ENV_EXAMPLE" "$ENV_FILE"
warn "ΔΓ£ tαΊ‘o .env tα»« .env.example β HΓ£y Δiα»n WANDB_API_KEY!"
fi
if [ -f "$ENV_FILE" ]; then
# Source .env (bα» qua comment vΓ dΓ²ng trα»ng)
set -a
source <(grep -v '^\s*#' "$ENV_FILE" | grep -v '^\s*$') 2>/dev/null || true
set +a
info ".env ΔΓ£ Δược load"
fi
# ββ 7. WandB login βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if [ "$OFFLINE_MODE" -eq 1 ]; then
export WANDB_MODE=offline
info "WandB: OFFLINE mode (sync sau bαΊ±ng: wandb sync)"
elif [ -n "${WANDB_API_KEY:-}" ]; then
$PYTHON -m wandb login "$WANDB_API_KEY" --relogin --quiet 2>/dev/null && \
info "β
WandB logged in (entity: SpringWang08)" || \
warn "WandB login thαΊ₯t bαΊ‘i β kiα»m tra WANDB_API_KEY"
else
warn "WANDB_API_KEY chΖ°a Δược set β WandB sαΊ½ bα» bα» qua khi training"
warn " Set bαΊ±ng: export WANDB_API_KEY=your_key"
warn " HoαΊ·c Δiα»n vΓ o file .env"
fi
# ββ 8. HuggingFace login βββββββββββββββββββββββββββββββββββββββββββββββββββββ
if [ -n "${HF_TOKEN:-}" ]; then
$PYTHON -c "from huggingface_hub import login; login(token='${HF_TOKEN}', add_to_git_credential=False)" 2>/dev/null && \
info "β
HuggingFace logged in" || \
warn "HF login thαΊ₯t bαΊ‘i β dataset cΓ΄ng khai vαΊ«n tαΊ£i Δược"
else
warn "HF_TOKEN chΖ°a Δược set (khΓ΄ng cαΊ§n nαΊΏu dataset lΓ public)"
fi
# ββ 9. TαΊ‘o thΖ° mα»₯c cαΊ§n thiαΊΏt βββββββββββββββββββββββββββββββββββββββββββββββββ
info "TαΊ‘o thΖ° mα»₯c dα»± Γ‘n..."
for dir in checkpoints logs/history results/charts data scripts; do
mkdir -p "$SCRIPT_DIR/$dir"
done
info "β
ThΖ° mα»₯c sαΊ΅n sΓ ng"
# ββ 10. Smoke test import βββββββββββββββββββββββββββββββββββββββββββββββββββββ
info "Kiα»m tra imports..."
$PYTHON - <<'PYEOF'
import sys, importlib
ok, fail = [], []
checks = [
("torch", "PyTorch"),
("torchvision", "TorchVision"),
("transformers", "Transformers"),
("datasets", "HF Datasets"),
("peft", "PEFT (LoRA)"),
("trl", "TRL (SFT/DPO)"),
("wandb", "WandB"),
("nltk", "NLTK"),
("bert_score", "BERTScore"),
("rouge_score", "ROUGE"),
("sklearn", "Scikit-learn"),
("matplotlib", "Matplotlib"),
("yaml", "PyYAML"),
("dotenv", "python-dotenv"),
("cv2", "OpenCV"),
]
for mod, name in checks:
try:
importlib.import_module(mod)
ok.append(name)
except ImportError:
fail.append(name)
print(f" β
OK ({len(ok)}): {', '.join(ok)}")
if fail:
print(f" β MISSING ({len(fail)}): {', '.join(fail)}")
sys.exit(1)
PYEOF
# ββ 11. Kiα»m tra src modules βββββββββββββββββββββββββββββββββββββββββββββββββ
info "Kiα»m tra src modules..."
$PYTHON - <<'PYEOF'
import sys
checks = [
"src.models.medical_vqa_model",
"src.models.transformer_decoder",
"src.engine.trainer",
"src.engine.medical_eval",
"src.data.medical_dataset",
"src.utils.text_utils",
"src.utils.translator",
]
ok, fail = [], []
for mod in checks:
try:
__import__(mod)
ok.append(mod.split(".")[-1])
except Exception as e:
fail.append(f"{mod.split('.')[-1]} ({e})")
print(f" β
src OK ({len(ok)}): {', '.join(ok)}")
if fail:
print(f" β src FAIL ({len(fail)}): {', '.join(fail)}")
PYEOF
# ββ Done βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
echo ""
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo " β
Setup hoΓ n tαΊ₯t!"
echo ""
echo " TiαΊΏp theo:"
echo " export WANDB_API_KEY=your_key # nαΊΏu chΖ°a cΓ³"
echo " python train_medical.py --variant A1"
echo " python train_medical.py --variant A2"
echo " python train_medical.py --variant B1"
echo " python train_medical.py --variant B2"
echo " python train_medical.py --variant DPO"
echo ""
echo " So sΓ‘nh 5 model sau khi train xong:"
echo " python scripts/compare_models.py"
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo ""
|