scripts: add update_hf_env.sh and redeploy.sh -- close runbook gaps
Browse filesupdate_hf_env.sh: sets the 6 HF Space variables (RIPRAP_LLM_PRIMARY,
RIPRAP_LLM_BASE_URL, RIPRAP_LLM_API_KEY, RIPRAP_ML_BACKEND,
RIPRAP_ML_BASE_URL, RIPRAP_ML_API_KEY) via huggingface_hub Python API
(add_space_variable / restart_space -- huggingface-cli space does not
exist in v0.36.x), then polls /api/backend up to 120 s for HTTP 200.
redeploy.sh: thin orchestrator -- generates a fresh token, calls
deploy_droplet.sh, calls update_hf_env.sh, calls probe_addresses.py,
prints a pass/fail summary. Exit 0 only when all three steps pass.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- scripts/redeploy.sh +118 -0
- scripts/update_hf_env.sh +117 -0
scripts/redeploy.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Full redeploy to an existing AMD MI300X droplet.
|
| 3 |
+
#
|
| 4 |
+
# 1. Generate a fresh bearer token
|
| 5 |
+
# 2. scripts/deploy_droplet.sh <ip> <token> (bring up vLLM + riprap-models)
|
| 6 |
+
# 3. scripts/update_hf_env.sh <ip> <token> (update HF Space vars + restart)
|
| 7 |
+
# 4. .venv/bin/python scripts/probe_addresses.py (5/5 must pass)
|
| 8 |
+
#
|
| 9 |
+
# Usage: scripts/redeploy.sh <droplet-ip>
|
| 10 |
+
#
|
| 11 |
+
# Requires:
|
| 12 |
+
# HF_TOKEN env var with write access to the HF Space
|
| 13 |
+
# .venv Python virtual environment with probe_addresses.py deps
|
| 14 |
+
# SSH access to the droplet (ssh-agent or SSH_KEY env var)
|
| 15 |
+
#
|
| 16 |
+
# Exit codes:
|
| 17 |
+
# 0 all three steps passed
|
| 18 |
+
# 1 deploy_droplet.sh failed (HF Space NOT touched)
|
| 19 |
+
# 1 update_hf_env.sh failed (droplet is up but HF Space NOT updated)
|
| 20 |
+
# 1 probe_addresses.py failed (deploy + HF update succeeded; not rolled back)
|
| 21 |
+
set -euo pipefail
|
| 22 |
+
|
| 23 |
+
if [ "$#" -ne 1 ]; then
|
| 24 |
+
echo "Usage: $0 <droplet-ip>" >&2
|
| 25 |
+
exit 1
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
IP="$1"
|
| 29 |
+
|
| 30 |
+
if [ -z "${HF_TOKEN:-}" ]; then
|
| 31 |
+
echo "Error: HF_TOKEN env var is required (write access to the HF Space)" >&2
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
| 36 |
+
START_SECONDS=$SECONDS
|
| 37 |
+
|
| 38 |
+
DEPLOY_STATUS="FAIL"
|
| 39 |
+
HF_STATUS="FAIL"
|
| 40 |
+
PROBE_STATUS="FAIL"
|
| 41 |
+
|
| 42 |
+
# ---- 1. Generate a fresh bearer token ------------------------------------
|
| 43 |
+
# openssl rand -base64 24 produces 32 chars; strip +/= to keep URL-safe.
|
| 44 |
+
TOKEN=$(openssl rand -base64 24 | tr -d '/+=')
|
| 45 |
+
echo "==> Deploying to ${IP} with fresh token..."
|
| 46 |
+
echo
|
| 47 |
+
|
| 48 |
+
# ---- 2. deploy_droplet.sh ------------------------------------------------
|
| 49 |
+
if bash "${REPO_ROOT}/scripts/deploy_droplet.sh" "$IP" "$TOKEN"; then
|
| 50 |
+
DEPLOY_STATUS="PASS"
|
| 51 |
+
else
|
| 52 |
+
echo "deploy_droplet.sh failed" >&2
|
| 53 |
+
# Print summary before exiting so the caller sees partial state.
|
| 54 |
+
ELAPSED=$(( SECONDS - START_SECONDS ))
|
| 55 |
+
echo
|
| 56 |
+
echo "=== redeploy summary ==="
|
| 57 |
+
echo "Droplet IP : ${IP}"
|
| 58 |
+
echo "Token : (not set — deploy failed before token was registered)"
|
| 59 |
+
echo "Deploy : ${DEPLOY_STATUS}"
|
| 60 |
+
echo "HF Space : ${HF_STATUS}"
|
| 61 |
+
echo "E2E probe : ${PROBE_STATUS}"
|
| 62 |
+
printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
|
| 63 |
+
exit 1
|
| 64 |
+
fi
|
| 65 |
+
|
| 66 |
+
echo
|
| 67 |
+
echo "==> Deploy succeeded. Updating HF Space..."
|
| 68 |
+
echo
|
| 69 |
+
|
| 70 |
+
# ---- 3. update_hf_env.sh -------------------------------------------------
|
| 71 |
+
if bash "${REPO_ROOT}/scripts/update_hf_env.sh" "$IP" "$TOKEN"; then
|
| 72 |
+
HF_STATUS="PASS"
|
| 73 |
+
else
|
| 74 |
+
echo "update_hf_env.sh failed. HF Space NOT updated." >&2
|
| 75 |
+
ELAPSED=$(( SECONDS - START_SECONDS ))
|
| 76 |
+
echo
|
| 77 |
+
echo "=== redeploy summary ==="
|
| 78 |
+
echo "Droplet IP : ${IP}"
|
| 79 |
+
echo "Token : (regenerated, see HF Space vars)"
|
| 80 |
+
echo "Deploy : ${DEPLOY_STATUS}"
|
| 81 |
+
echo "HF Space : ${HF_STATUS}"
|
| 82 |
+
echo "E2E probe : ${PROBE_STATUS}"
|
| 83 |
+
printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
|
| 84 |
+
exit 1
|
| 85 |
+
fi
|
| 86 |
+
|
| 87 |
+
echo
|
| 88 |
+
echo "==> HF Space updated. Running end-to-end probe..."
|
| 89 |
+
echo
|
| 90 |
+
|
| 91 |
+
# ---- 4. probe_addresses.py -----------------------------------------------
|
| 92 |
+
# probe_addresses.py exits 0 only when 5/5 pass (from docs/DROPLET-RUNBOOK.md).
|
| 93 |
+
# Disable set -e for this step so we can capture the exit code and still
|
| 94 |
+
# print the summary.
|
| 95 |
+
set +e
|
| 96 |
+
"${REPO_ROOT}/.venv/bin/python" "${REPO_ROOT}/scripts/probe_addresses.py"
|
| 97 |
+
PROBE_EXIT=$?
|
| 98 |
+
set -e
|
| 99 |
+
|
| 100 |
+
if [ "$PROBE_EXIT" -eq 0 ]; then
|
| 101 |
+
PROBE_STATUS="PASS"
|
| 102 |
+
else
|
| 103 |
+
PROBE_STATUS="FAIL"
|
| 104 |
+
fi
|
| 105 |
+
|
| 106 |
+
# ---- 5. Summary ----------------------------------------------------------
|
| 107 |
+
ELAPSED=$(( SECONDS - START_SECONDS ))
|
| 108 |
+
echo
|
| 109 |
+
echo "=== redeploy summary ==="
|
| 110 |
+
echo "Droplet IP : ${IP}"
|
| 111 |
+
echo "Token : (regenerated, see HF Space vars)"
|
| 112 |
+
echo "Deploy : ${DEPLOY_STATUS}"
|
| 113 |
+
echo "HF Space : ${HF_STATUS}"
|
| 114 |
+
echo "E2E probe : ${PROBE_STATUS}"
|
| 115 |
+
printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
|
| 116 |
+
|
| 117 |
+
# Exit 1 if probe failed; deploy + HF update already succeeded, not rolling back.
|
| 118 |
+
[ "$PROBE_STATUS" = "PASS" ]
|
scripts/update_hf_env.sh
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Update HF Space env vars to point at a new droplet, restart the Space,
|
| 3 |
+
# and poll until the agent endpoint returns HTTP 200.
|
| 4 |
+
#
|
| 5 |
+
# Usage: scripts/update_hf_env.sh <droplet-ip> <bearer-token>
|
| 6 |
+
#
|
| 7 |
+
# Requires:
|
| 8 |
+
# HF_TOKEN env var with write access to the Space
|
| 9 |
+
# huggingface_hub >= 0.36 installed (provides the Python API used below;
|
| 10 |
+
# note: 'huggingface-cli space variables' does not exist in this version)
|
| 11 |
+
#
|
| 12 |
+
# Space slug: lablab-ai-amd-developer-hackathon/riprap-nyc
|
| 13 |
+
# Variables set (from docs/DROPLET-RUNBOOK.md §Required secrets):
|
| 14 |
+
# RIPRAP_LLM_PRIMARY vllm
|
| 15 |
+
# RIPRAP_LLM_BASE_URL http://<ip>:8001/v1
|
| 16 |
+
# RIPRAP_LLM_API_KEY <token>
|
| 17 |
+
# RIPRAP_ML_BACKEND remote
|
| 18 |
+
# RIPRAP_ML_BASE_URL http://<ip>:7860
|
| 19 |
+
# RIPRAP_ML_API_KEY <token>
|
| 20 |
+
set -euo pipefail
|
| 21 |
+
|
| 22 |
+
if [ "$#" -ne 2 ]; then
|
| 23 |
+
echo "Usage: $0 <droplet-ip> <bearer-token>" >&2
|
| 24 |
+
exit 1
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
IP="$1"
|
| 28 |
+
TOKEN="$2"
|
| 29 |
+
|
| 30 |
+
if [ -z "${HF_TOKEN:-}" ]; then
|
| 31 |
+
echo "Error: HF_TOKEN env var is required (write access to the Space)" >&2
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
SPACE_ID="lablab-ai-amd-developer-hackathon/riprap-nyc"
|
| 36 |
+
SPACE_URL="https://lablab-ai-amd-developer-hackathon-riprap-nyc.hf.space"
|
| 37 |
+
VLLM_PORT=8001
|
| 38 |
+
MODELS_PORT=7860
|
| 39 |
+
|
| 40 |
+
echo "==> Updating HF Space variables"
|
| 41 |
+
echo " space: ${SPACE_ID}"
|
| 42 |
+
echo " droplet ip: ${IP}"
|
| 43 |
+
echo " vLLM port: ${VLLM_PORT}"
|
| 44 |
+
echo " models port: ${MODELS_PORT}"
|
| 45 |
+
echo
|
| 46 |
+
|
| 47 |
+
# ---- 1. Set all six Space variables via the huggingface_hub Python API ----
|
| 48 |
+
# huggingface-cli space variables does not exist in huggingface_hub 0.36.x;
|
| 49 |
+
# add_space_variable is the documented programmatic interface.
|
| 50 |
+
python3 -c "
|
| 51 |
+
import sys, os
|
| 52 |
+
try:
|
| 53 |
+
from huggingface_hub import HfApi
|
| 54 |
+
except ImportError:
|
| 55 |
+
print('Error: huggingface_hub not installed', file=sys.stderr)
|
| 56 |
+
sys.exit(1)
|
| 57 |
+
|
| 58 |
+
api = HfApi(token=os.environ['HF_TOKEN'])
|
| 59 |
+
space_id = '${SPACE_ID}'
|
| 60 |
+
ip = '${IP}'
|
| 61 |
+
token = '${TOKEN}'
|
| 62 |
+
vllm_port = ${VLLM_PORT}
|
| 63 |
+
models_port = ${MODELS_PORT}
|
| 64 |
+
|
| 65 |
+
variables = {
|
| 66 |
+
'RIPRAP_LLM_PRIMARY': 'vllm',
|
| 67 |
+
'RIPRAP_LLM_BASE_URL': f'http://{ip}:{vllm_port}/v1',
|
| 68 |
+
'RIPRAP_LLM_API_KEY': token,
|
| 69 |
+
'RIPRAP_ML_BACKEND': 'remote',
|
| 70 |
+
'RIPRAP_ML_BASE_URL': f'http://{ip}:{models_port}',
|
| 71 |
+
'RIPRAP_ML_API_KEY': token,
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
for key, value in variables.items():
|
| 75 |
+
display = '<redacted>' if 'KEY' in key else value
|
| 76 |
+
print(f' setting {key} = {display}')
|
| 77 |
+
api.add_space_variable(repo_id=space_id, key=key, value=value)
|
| 78 |
+
|
| 79 |
+
print('[python] all 6 variables set')
|
| 80 |
+
"
|
| 81 |
+
echo
|
| 82 |
+
|
| 83 |
+
# ---- 2. Restart the Space ------------------------------------------------
|
| 84 |
+
echo "==> Restarting HF Space"
|
| 85 |
+
python3 -c "
|
| 86 |
+
import os
|
| 87 |
+
from huggingface_hub import HfApi
|
| 88 |
+
api = HfApi(token=os.environ['HF_TOKEN'])
|
| 89 |
+
rt = api.restart_space(repo_id='${SPACE_ID}')
|
| 90 |
+
print(f' stage after restart request: {rt.stage}')
|
| 91 |
+
"
|
| 92 |
+
echo
|
| 93 |
+
|
| 94 |
+
# ---- 3. Poll /api/backend until HTTP 200 (max 120 s) ---------------------
|
| 95 |
+
# /api/backend is documented in docs/DROPLET-RUNBOOK.md §Destroy checklist
|
| 96 |
+
# as the endpoint to verify the Space is serving.
|
| 97 |
+
echo "==> Polling ${SPACE_URL}/api/backend (up to 120 s)..."
|
| 98 |
+
DEADLINE=$((SECONDS + 120))
|
| 99 |
+
HEALTHY=0
|
| 100 |
+
while (( SECONDS < DEADLINE )); do
|
| 101 |
+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
| 102 |
+
--max-time 10 "${SPACE_URL}/api/backend" 2>/dev/null || echo "000")
|
| 103 |
+
if [ "$HTTP_CODE" = "200" ]; then
|
| 104 |
+
HEALTHY=1
|
| 105 |
+
break
|
| 106 |
+
fi
|
| 107 |
+
echo " (${HTTP_CODE}) not ready yet — waiting 10 s..."
|
| 108 |
+
sleep 10
|
| 109 |
+
done
|
| 110 |
+
|
| 111 |
+
if [ "$HEALTHY" -ne 1 ]; then
|
| 112 |
+
echo "HF Space did not become healthy within 120s" >&2
|
| 113 |
+
exit 1
|
| 114 |
+
fi
|
| 115 |
+
|
| 116 |
+
echo
|
| 117 |
+
echo "HF Space updated and healthy. IP=${IP}"
|