Medical-VQA / setup.sh
SpringWang08's picture
Deploy Gradio notebook-style Medical VQA app
5551585 verified
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════════════════
# setup.sh β€” Medical VQA Environment Setup
# Hα»— trợ: Vast.ai (CUDA), Google Colab, local macOS (CPU/MPS)
#
# CΓ‘ch dΓΉng:
# chmod +x setup.sh && bash setup.sh
# bash setup.sh --colab # Google Colab mode (skip git config)
# bash setup.sh --offline # Offline mode (khΓ΄ng sync WandB)
# bash setup.sh --skip-nltk # Bỏ qua download NLTK data
# ═══════════════════════════════════════════════════════════════════════════
set -euo pipefail
# ── Parse flags ──────────────────────────────────────────────────────────────
COLAB_MODE=0
OFFLINE_MODE=0
SKIP_NLTK=0
for arg in "$@"; do
case $arg in
--colab) COLAB_MODE=1 ;;
--offline) OFFLINE_MODE=1 ;;
--skip-nltk) SKIP_NLTK=1 ;;
esac
done
# ── Colors ───────────────────────────────────────────────────────────────────
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
info() { echo -e "${GREEN}[INFO]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
echo ""
echo "════════════════════════════════════════════════════════════"
echo " πŸ₯ Medical VQA β€” Environment Setup"
echo " Project: DL Final 523H0173 & 523H0178"
echo "════════════════════════════════════════════════════════════"
echo ""
# ── 1. Python version check ──────────────────────────────────────────────────
PYTHON=$(command -v python3 || command -v python)
PY_VER=$($PYTHON --version 2>&1 | grep -oP '\d+\.\d+')
PY_MAJOR=$(echo $PY_VER | cut -d. -f1)
PY_MINOR=$(echo $PY_VER | cut -d. -f2)
info "Python $PY_VER tαΊ‘i: $($PYTHON -c 'import sys; print(sys.executable)')"
if [ "$PY_MAJOR" -lt 3 ] || { [ "$PY_MAJOR" -eq 3 ] && [ "$PY_MINOR" -lt 10 ]; }; then
error "CαΊ§n Python β‰₯ 3.10 (hiện tαΊ‘i: $PY_VER)"
fi
# ── 2. GPU detection ─────────────────────────────────────────────────────────
CUDA_AVAILABLE=$($PYTHON -c "import torch; print(torch.cuda.is_available())" 2>/dev/null || echo "False")
if [ "$CUDA_AVAILABLE" = "True" ]; then
GPU_NAME=$($PYTHON -c "import torch; print(torch.cuda.get_device_name(0))" 2>/dev/null || echo "Unknown")
VRAM=$($PYTHON -c "import torch; print(round(torch.cuda.get_device_properties(0).total_memory/1e9,1))" 2>/dev/null || echo "?")
info "GPU: $GPU_NAME | VRAM: ${VRAM}GB"
else
warn "KhΓ΄ng phΓ‘t hiện CUDA GPU β€” training sαΊ½ rαΊ₯t chαΊ­m trΓͺn CPU"
fi
# ── 3. Install pip packages ──────────────────────────────────────────────────
info "CΓ i Δ‘αΊ·t dependencies tα»« requirements.txt..."
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REQ_FILE="$SCRIPT_DIR/requirements.txt"
if [ ! -f "$REQ_FILE" ]; then
error "KhΓ΄ng tΓ¬m thαΊ₯y $REQ_FILE"
fi
# NΓ’ng pip trΖ°α»›c
$PYTHON -m pip install --upgrade pip --quiet
# CΓ i main requirements (quiet để giαΊ£m noise)
$PYTHON -m pip install -r "$REQ_FILE" --quiet || {
warn "CΓ i Δ‘αΊ·t silent thαΊ₯t bαΊ‘i, thα»­ vα»›i verbose..."
$PYTHON -m pip install -r "$REQ_FILE"
}
# wandb (cαΊ§n version chΓ­nh xΓ‘c)
$PYTHON -m pip install "wandb>=0.16.0" --quiet
info "βœ… Dependencies Δ‘Γ£ cΓ i xong"
# ── 4. NLTK data download ─────────────────────────────────────────────────────
if [ "$SKIP_NLTK" -eq 0 ]; then
info "TαΊ£i NLTK data (punkt, wordnet)..."
$PYTHON -c "
import nltk
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
for pkg in ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']:
try:
nltk.download(pkg, quiet=True)
except Exception as e:
print(f' [WARN] NLTK {pkg}: {e}')
print(' NLTK data OK')
"
fi
# ── 5. Python path configuration ─────────────────────────────────────────────
info "CαΊ₯u hΓ¬nh Python path..."
# TαΊ‘o .pth file để Python tα»± Δ‘α»™ng thΓͺm project root vΓ o sys.path
SITE_PACKAGES=$($PYTHON -c "import site; print(site.getsitepackages()[0])" 2>/dev/null || \
$PYTHON -c "import site; print(site.getusersitepackages())")
PTH_FILE="$SITE_PACKAGES/medical_vqa.pth"
echo "$SCRIPT_DIR" > "$PTH_FILE" && \
info "βœ… Path cαΊ₯u hΓ¬nh tαΊ‘i: $PTH_FILE" || \
warn "KhΓ΄ng thể ghi vΓ o site-packages, thα»­ export PYTHONPATH thα»§ cΓ΄ng."
# CΕ©ng export PYTHONPATH trong session hiện tαΊ‘i
export PYTHONPATH="$SCRIPT_DIR:${PYTHONPATH:-}"
info "PYTHONPATH = $PYTHONPATH"
# ── 6. .env file ─────────────────────────────────────────────────────────────
ENV_FILE="$SCRIPT_DIR/.env"
ENV_EXAMPLE="$SCRIPT_DIR/.env.example"
if [ ! -f "$ENV_FILE" ] && [ -f "$ENV_EXAMPLE" ]; then
cp "$ENV_EXAMPLE" "$ENV_FILE"
warn "Đã tαΊ‘o .env tα»« .env.example β€” HΓ£y Δ‘iền WANDB_API_KEY!"
fi
if [ -f "$ENV_FILE" ]; then
# Source .env (bỏ qua comment vΓ  dΓ²ng trα»‘ng)
set -a
source <(grep -v '^\s*#' "$ENV_FILE" | grep -v '^\s*$') 2>/dev/null || true
set +a
info ".env Δ‘Γ£ được load"
fi
# ── 7. WandB login ───────────────────────────────────────────────────────────
if [ "$OFFLINE_MODE" -eq 1 ]; then
export WANDB_MODE=offline
info "WandB: OFFLINE mode (sync sau bαΊ±ng: wandb sync)"
elif [ -n "${WANDB_API_KEY:-}" ]; then
$PYTHON -m wandb login "$WANDB_API_KEY" --relogin --quiet 2>/dev/null && \
info "βœ… WandB logged in (entity: SpringWang08)" || \
warn "WandB login thαΊ₯t bαΊ‘i β€” kiểm tra WANDB_API_KEY"
else
warn "WANDB_API_KEY chΖ°a được set β€” WandB sαΊ½ bα»‹ bỏ qua khi training"
warn " Set bαΊ±ng: export WANDB_API_KEY=your_key"
warn " HoαΊ·c Δ‘iền vΓ o file .env"
fi
# ── 8. HuggingFace login ─────────────────────────────────────────────────────
if [ -n "${HF_TOKEN:-}" ]; then
$PYTHON -c "from huggingface_hub import login; login(token='${HF_TOKEN}', add_to_git_credential=False)" 2>/dev/null && \
info "βœ… HuggingFace logged in" || \
warn "HF login thαΊ₯t bαΊ‘i β€” dataset cΓ΄ng khai vαΊ«n tαΊ£i được"
else
warn "HF_TOKEN chΖ°a được set (khΓ΄ng cαΊ§n nαΊΏu dataset lΓ  public)"
fi
# ── 9. TαΊ‘o thΖ° mα»₯c cαΊ§n thiαΊΏt ─────────────────────────────────────────────────
info "TαΊ‘o thΖ° mα»₯c dα»± Γ‘n..."
for dir in checkpoints logs/history results/charts data scripts; do
mkdir -p "$SCRIPT_DIR/$dir"
done
info "βœ… ThΖ° mα»₯c sαΊ΅n sΓ ng"
# ── 10. Smoke test import ─────────────────────────────────────────────────────
info "Kiểm tra imports..."
$PYTHON - <<'PYEOF'
import sys, importlib
ok, fail = [], []
checks = [
("torch", "PyTorch"),
("torchvision", "TorchVision"),
("transformers", "Transformers"),
("datasets", "HF Datasets"),
("peft", "PEFT (LoRA)"),
("trl", "TRL (SFT/DPO)"),
("wandb", "WandB"),
("nltk", "NLTK"),
("bert_score", "BERTScore"),
("rouge_score", "ROUGE"),
("sklearn", "Scikit-learn"),
("matplotlib", "Matplotlib"),
("yaml", "PyYAML"),
("dotenv", "python-dotenv"),
("cv2", "OpenCV"),
]
for mod, name in checks:
try:
importlib.import_module(mod)
ok.append(name)
except ImportError:
fail.append(name)
print(f" βœ… OK ({len(ok)}): {', '.join(ok)}")
if fail:
print(f" ❌ MISSING ({len(fail)}): {', '.join(fail)}")
sys.exit(1)
PYEOF
# ── 11. Kiểm tra src modules ─────────────────────────────────────────────────
info "Kiểm tra src modules..."
$PYTHON - <<'PYEOF'
import sys
checks = [
"src.models.medical_vqa_model",
"src.models.transformer_decoder",
"src.engine.trainer",
"src.engine.medical_eval",
"src.data.medical_dataset",
"src.utils.text_utils",
"src.utils.translator",
]
ok, fail = [], []
for mod in checks:
try:
__import__(mod)
ok.append(mod.split(".")[-1])
except Exception as e:
fail.append(f"{mod.split('.')[-1]} ({e})")
print(f" βœ… src OK ({len(ok)}): {', '.join(ok)}")
if fail:
print(f" ❌ src FAIL ({len(fail)}): {', '.join(fail)}")
PYEOF
# ── Done ─────────────────────────────────────────────────────────────────────
echo ""
echo "════════════════════════════════════════════════════════════"
echo " βœ… Setup hoΓ n tαΊ₯t!"
echo ""
echo " TiαΊΏp theo:"
echo " export WANDB_API_KEY=your_key # nαΊΏu chΖ°a cΓ³"
echo " python train_medical.py --variant A1"
echo " python train_medical.py --variant A2"
echo " python train_medical.py --variant B1"
echo " python train_medical.py --variant B2"
echo " python train_medical.py --variant DPO"
echo ""
echo " So sΓ‘nh 5 model sau khi train xong:"
echo " python scripts/compare_models.py"
echo "════════════════════════════════════════════════════════════"
echo ""