| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| if [ "$#" -lt 2 ]; then |
| echo "Usage: $0 <droplet-ip> <bearer-token>" >&2 |
| exit 64 |
| fi |
|
|
| DROPLET_IP="$1" |
| TOKEN="$2" |
|
|
| SSH_USER="${SSH_USER:-root}" |
| SSH_KEY_FLAG="" |
| if [ -n "${SSH_KEY:-}" ]; then |
| SSH_KEY_FLAG="-i $SSH_KEY" |
| fi |
| SSH="ssh $SSH_KEY_FLAG -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 ${SSH_USER}@${DROPLET_IP}" |
| SCP="scp $SSH_KEY_FLAG -o StrictHostKeyChecking=accept-new" |
|
|
| VLLM_IMAGE="${VLLM_IMAGE:-vllm/vllm-openai-rocm:v0.17.1}" |
| VLLM_PORT="${VLLM_PORT:-8001}" |
| MODELS_PORT="${MODELS_PORT:-7860}" |
| MODEL_REPO="${MODEL_REPO:-ibm-granite/granite-4.1-8b}" |
| HF_CACHE_HOST="${HF_CACHE_HOST:-/root/hf-cache}" |
| SKIP_BUILD="${SKIP_BUILD:-0}" |
|
|
| REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
|
|
| echo "==> Riprap droplet bring-up" |
| echo " droplet ip: $DROPLET_IP" |
| echo " vllm port: $VLLM_PORT" |
| echo " models port: $MODELS_PORT" |
| echo " model repo: $MODEL_REPO" |
| echo " repo root: $REPO_ROOT" |
| echo |
|
|
| |
| echo "==> 1. SSH connectivity + GPU device check" |
| $SSH bash -s <<'REMOTE' |
| set -e |
| if ! command -v docker > /dev/null; then |
| echo "[droplet] docker not installed; aborting" >&2 |
| exit 1 |
| fi |
| if [ ! -e /dev/kfd ] || [ ! -e /dev/dri ]; then |
| echo "[droplet] no AMD GPU device files (/dev/kfd or /dev/dri); aborting" >&2 |
| exit 1 |
| fi |
| echo "[droplet] docker + AMD GPU device files present" |
| docker --version |
| REMOTE |
|
|
| |
| echo |
| echo "==> 2. Pull vLLM image (if not cached)" |
| $SSH "docker image inspect $VLLM_IMAGE > /dev/null 2>&1 || docker pull $VLLM_IMAGE" |
|
|
| |
| echo |
| echo "==> 3. Sync riprap-models source" |
| $SSH "mkdir -p /workspace/riprap-models /workspace/riprap-build" |
| |
| |
| tar -C "$REPO_ROOT" -cf - services/riprap-models | \ |
| $SSH "tar -C /workspace/riprap-build -xf -" |
|
|
| |
| if [ "$SKIP_BUILD" = "1" ]; then |
| echo |
| echo "==> 4. Skipping image build (SKIP_BUILD=1)" |
| else |
| echo |
| echo "==> 4. Build riprap-models image" |
| echo " (this takes ~10-20 min on first build; subsequent builds" |
| echo " reuse layer cache and are < 1 min)" |
| $SSH "cd /workspace/riprap-build && \ |
| docker build \ |
| -t riprap-models:latest \ |
| -f services/riprap-models/Dockerfile \ |
| ." |
| fi |
|
|
| |
| echo |
| echo "==> 5. Start vLLM container" |
| $SSH bash -s <<REMOTE |
| set -e |
| docker rm -f vllm > /dev/null 2>&1 || true |
| mkdir -p ${HF_CACHE_HOST} |
| docker run -d --name vllm \\ |
| --device=/dev/kfd --device=/dev/dri --group-add=video \\ |
| --ipc=host --shm-size=16g \\ |
| -p ${VLLM_PORT}:8000 \\ |
| -v ${HF_CACHE_HOST}:/root/.cache/huggingface \\ |
| -e GLOO_SOCKET_IFNAME=eth0 -e VLLM_HOST_IP=127.0.0.1 \\ |
| --restart unless-stopped \\ |
| ${VLLM_IMAGE} \\ |
| --model ${MODEL_REPO} \\ |
| --host 0.0.0.0 --port 8000 --api-key "${TOKEN}" \\ |
| --max-model-len 8192 --served-model-name granite-4.1-8b |
| echo "[droplet] vllm container started" |
| REMOTE |
|
|
| |
| echo |
| echo "==> 6. Start riprap-models container" |
| $SSH bash -s <<REMOTE |
| set -e |
| docker rm -f riprap-models > /dev/null 2>&1 || true |
| docker run -d --name riprap-models \\ |
| --device=/dev/kfd --device=/dev/dri --group-add=video \\ |
| --ipc=host --shm-size=8g \\ |
| -p ${MODELS_PORT}:7860 \\ |
| -v ${HF_CACHE_HOST}:/root/.cache/huggingface \\ |
| -e RIPRAP_MODELS_API_KEY="${TOKEN}" \\ |
| --restart unless-stopped \\ |
| riprap-models:latest |
| echo "[droplet] riprap-models container started" |
| REMOTE |
|
|
| |
| echo |
| echo "==> 7. Healthchecks" |
| echo " waiting up to 90s for vLLM to expose /v1/models..." |
| DEADLINE=$((SECONDS + 90)) |
| while (( SECONDS < DEADLINE )); do |
| if curl -sf --max-time 5 "http://${DROPLET_IP}:${VLLM_PORT}/v1/models" \ |
| -H "Authorization: Bearer ${TOKEN}" > /tmp/vllm-models.json 2>/dev/null; then |
| echo " vLLM ready: $(head -c 200 /tmp/vllm-models.json)..." |
| break |
| fi |
| sleep 3 |
| done |
| if (( SECONDS >= DEADLINE )); then |
| echo " vLLM did not become ready in 90s; tailing container logs:" >&2 |
| $SSH "docker logs --tail 30 vllm" >&2 |
| exit 1 |
| fi |
|
|
| echo " waiting up to 60s for riprap-models /healthz..." |
| DEADLINE=$((SECONDS + 60)) |
| while (( SECONDS < DEADLINE )); do |
| if curl -sf --max-time 5 "http://${DROPLET_IP}:${MODELS_PORT}/healthz" \ |
| > /tmp/models-health.json 2>/dev/null; then |
| echo " riprap-models ready: $(cat /tmp/models-health.json)" |
| break |
| fi |
| sleep 2 |
| done |
| if (( SECONDS >= DEADLINE )); then |
| echo " riprap-models did not become ready in 60s; tailing container logs:" >&2 |
| $SSH "docker logs --tail 30 riprap-models" >&2 |
| exit 1 |
| fi |
|
|
| echo |
| echo "==> DONE" |
| echo " vLLM http://${DROPLET_IP}:${VLLM_PORT}/v1/models" |
| echo " riprap-models http://${DROPLET_IP}:${MODELS_PORT}/healthz" |
| echo |
| echo "Set these in your local env or HF Space variables:" |
| echo " RIPRAP_LLM_PRIMARY=vllm" |
| echo " RIPRAP_LLM_BASE_URL=http://${DROPLET_IP}:${VLLM_PORT}/v1" |
| echo " RIPRAP_LLM_API_KEY=${TOKEN}" |
| echo " RIPRAP_ML_BACKEND=remote" |
| echo " RIPRAP_ML_BASE_URL=http://${DROPLET_IP}:${MODELS_PORT}" |
| echo " RIPRAP_ML_API_KEY=${TOKEN}" |
|
|