File size: 7,123 Bytes
62af342 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | #!/usr/bin/env bash
# Riprap GPU-droplet bring-up — vLLM + riprap-models, idempotent.
#
# Designed for a fresh AMD MI300X droplet (DigitalOcean GPU droplet,
# AMD Developer Cloud node, etc.) with nothing more than:
# - Ubuntu 22.04 / 24.04
# - Docker + AMD ROCm GPU drivers (kfd / dri device files)
# - SSH root access
#
# The script SSHes to the droplet, ensures the right images are
# pulled, builds the riprap-models container from this repo, starts
# both services, and runs healthchecks. Re-running on the same
# droplet is idempotent: existing containers are removed and
# recreated cleanly.
#
# Usage:
# scripts/deploy_droplet.sh <droplet-ip> <bearer-token>
#
# Example:
# scripts/deploy_droplet.sh 129.212.181.238 "$(cat /tmp/riprap/vllm_token.txt)"
#
# Env knobs (optional, all have sensible defaults):
# SSH_USER default "root"
# SSH_KEY path to ssh key; default uses ssh-agent
# VLLM_IMAGE default "vllm/vllm-openai-rocm:v0.17.1"
# VLLM_PORT default 8001 (host) → 8000 (container)
# MODELS_PORT default 7860 (host) → 7860 (container)
# MODEL_REPO default "ibm-granite/granite-4.1-8b"
# HF_CACHE_HOST default "/root/hf-cache" on droplet
# SKIP_BUILD "1" to skip building riprap-models image
# (assume it's already present on droplet)
#
# Exits non-zero on any step that fails — including the final
# healthcheck — so this is safe to wrap in CI.
set -euo pipefail
if [ "$#" -lt 2 ]; then
echo "Usage: $0 <droplet-ip> <bearer-token>" >&2
exit 64
fi
DROPLET_IP="$1"
TOKEN="$2"
SSH_USER="${SSH_USER:-root}"
SSH_KEY_FLAG=""
if [ -n "${SSH_KEY:-}" ]; then
SSH_KEY_FLAG="-i $SSH_KEY"
fi
SSH="ssh $SSH_KEY_FLAG -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 ${SSH_USER}@${DROPLET_IP}"
SCP="scp $SSH_KEY_FLAG -o StrictHostKeyChecking=accept-new"
VLLM_IMAGE="${VLLM_IMAGE:-vllm/vllm-openai-rocm:v0.17.1}"
VLLM_PORT="${VLLM_PORT:-8001}"
MODELS_PORT="${MODELS_PORT:-7860}"
MODEL_REPO="${MODEL_REPO:-ibm-granite/granite-4.1-8b}"
HF_CACHE_HOST="${HF_CACHE_HOST:-/root/hf-cache}"
SKIP_BUILD="${SKIP_BUILD:-0}"
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
echo "==> Riprap droplet bring-up"
echo " droplet ip: $DROPLET_IP"
echo " vllm port: $VLLM_PORT"
echo " models port: $MODELS_PORT"
echo " model repo: $MODEL_REPO"
echo " repo root: $REPO_ROOT"
echo
# ---- 1. Verify SSH + droplet readiness ----------------------------------
echo "==> 1. SSH connectivity + GPU device check"
$SSH bash -s <<'REMOTE'
set -e
if ! command -v docker > /dev/null; then
echo "[droplet] docker not installed; aborting" >&2
exit 1
fi
if [ ! -e /dev/kfd ] || [ ! -e /dev/dri ]; then
echo "[droplet] no AMD GPU device files (/dev/kfd or /dev/dri); aborting" >&2
exit 1
fi
echo "[droplet] docker + AMD GPU device files present"
docker --version
REMOTE
# ---- 2. Pull vLLM image ---------------------------------------------------
echo
echo "==> 2. Pull vLLM image (if not cached)"
$SSH "docker image inspect $VLLM_IMAGE > /dev/null 2>&1 || docker pull $VLLM_IMAGE"
# ---- 3. Sync riprap-models source to droplet -----------------------------
echo
echo "==> 3. Sync riprap-models source"
$SSH "mkdir -p /workspace/riprap-models /workspace/riprap-build"
# Sync Dockerfile + sources via tar over SSH (rsync may be missing on
# a minimal droplet; tar is part of any Linux base).
tar -C "$REPO_ROOT" -cf - services/riprap-models | \
$SSH "tar -C /workspace/riprap-build -xf -"
# ---- 4. Build riprap-models image ----------------------------------------
if [ "$SKIP_BUILD" = "1" ]; then
echo
echo "==> 4. Skipping image build (SKIP_BUILD=1)"
else
echo
echo "==> 4. Build riprap-models image"
echo " (this takes ~10-20 min on first build; subsequent builds"
echo " reuse layer cache and are < 1 min)"
$SSH "cd /workspace/riprap-build && \
docker build \
-t riprap-models:latest \
-f services/riprap-models/Dockerfile \
."
fi
# ---- 5. Start vLLM container ---------------------------------------------
echo
echo "==> 5. Start vLLM container"
$SSH bash -s <<REMOTE
set -e
docker rm -f vllm > /dev/null 2>&1 || true
mkdir -p ${HF_CACHE_HOST}
docker run -d --name vllm \\
--device=/dev/kfd --device=/dev/dri --group-add=video \\
--ipc=host --shm-size=16g \\
-p ${VLLM_PORT}:8000 \\
-v ${HF_CACHE_HOST}:/root/.cache/huggingface \\
-e GLOO_SOCKET_IFNAME=eth0 -e VLLM_HOST_IP=127.0.0.1 \\
--restart unless-stopped \\
${VLLM_IMAGE} \\
--model ${MODEL_REPO} \\
--host 0.0.0.0 --port 8000 --api-key "${TOKEN}" \\
--max-model-len 8192 --served-model-name granite-4.1-8b
echo "[droplet] vllm container started"
REMOTE
# ---- 6. Start riprap-models container ------------------------------------
echo
echo "==> 6. Start riprap-models container"
$SSH bash -s <<REMOTE
set -e
docker rm -f riprap-models > /dev/null 2>&1 || true
docker run -d --name riprap-models \\
--device=/dev/kfd --device=/dev/dri --group-add=video \\
--ipc=host --shm-size=8g \\
-p ${MODELS_PORT}:7860 \\
-v ${HF_CACHE_HOST}:/root/.cache/huggingface \\
-e RIPRAP_MODELS_API_KEY="${TOKEN}" \\
--restart unless-stopped \\
riprap-models:latest
echo "[droplet] riprap-models container started"
REMOTE
# ---- 7. Healthchecks -----------------------------------------------------
echo
echo "==> 7. Healthchecks"
echo " waiting up to 90s for vLLM to expose /v1/models..."
DEADLINE=$((SECONDS + 90))
while (( SECONDS < DEADLINE )); do
if curl -sf --max-time 5 "http://${DROPLET_IP}:${VLLM_PORT}/v1/models" \
-H "Authorization: Bearer ${TOKEN}" > /tmp/vllm-models.json 2>/dev/null; then
echo " vLLM ready: $(head -c 200 /tmp/vllm-models.json)..."
break
fi
sleep 3
done
if (( SECONDS >= DEADLINE )); then
echo " vLLM did not become ready in 90s; tailing container logs:" >&2
$SSH "docker logs --tail 30 vllm" >&2
exit 1
fi
echo " waiting up to 60s for riprap-models /healthz..."
DEADLINE=$((SECONDS + 60))
while (( SECONDS < DEADLINE )); do
if curl -sf --max-time 5 "http://${DROPLET_IP}:${MODELS_PORT}/healthz" \
> /tmp/models-health.json 2>/dev/null; then
echo " riprap-models ready: $(cat /tmp/models-health.json)"
break
fi
sleep 2
done
if (( SECONDS >= DEADLINE )); then
echo " riprap-models did not become ready in 60s; tailing container logs:" >&2
$SSH "docker logs --tail 30 riprap-models" >&2
exit 1
fi
echo
echo "==> DONE"
echo " vLLM http://${DROPLET_IP}:${VLLM_PORT}/v1/models"
echo " riprap-models http://${DROPLET_IP}:${MODELS_PORT}/healthz"
echo
echo "Set these in your local env or HF Space variables:"
echo " RIPRAP_LLM_PRIMARY=vllm"
echo " RIPRAP_LLM_BASE_URL=http://${DROPLET_IP}:${VLLM_PORT}/v1"
echo " RIPRAP_LLM_API_KEY=${TOKEN}"
echo " RIPRAP_ML_BACKEND=remote"
echo " RIPRAP_ML_BASE_URL=http://${DROPLET_IP}:${MODELS_PORT}"
echo " RIPRAP_ML_API_KEY=${TOKEN}"
|