seriffic Claude Sonnet 4.6 commited on
Commit
2c2dbe8
·
1 Parent(s): ab4f0a6

scripts: add update_hf_env.sh and redeploy.sh -- close runbook gaps

Browse files

update_hf_env.sh: sets the 6 HF Space variables (RIPRAP_LLM_PRIMARY,
RIPRAP_LLM_BASE_URL, RIPRAP_LLM_API_KEY, RIPRAP_ML_BACKEND,
RIPRAP_ML_BASE_URL, RIPRAP_ML_API_KEY) via huggingface_hub Python API
(add_space_variable / restart_space -- huggingface-cli space does not
exist in v0.36.x), then polls /api/backend up to 120 s for HTTP 200.

redeploy.sh: thin orchestrator -- generates a fresh token, calls
deploy_droplet.sh, calls update_hf_env.sh, calls probe_addresses.py,
prints a pass/fail summary. Exit 0 only when all three steps pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. scripts/redeploy.sh +118 -0
  2. scripts/update_hf_env.sh +117 -0
scripts/redeploy.sh ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Full redeploy to an existing AMD MI300X droplet.
3
+ #
4
+ # 1. Generate a fresh bearer token
5
+ # 2. scripts/deploy_droplet.sh <ip> <token> (bring up vLLM + riprap-models)
6
+ # 3. scripts/update_hf_env.sh <ip> <token> (update HF Space vars + restart)
7
+ # 4. .venv/bin/python scripts/probe_addresses.py (5/5 must pass)
8
+ #
9
+ # Usage: scripts/redeploy.sh <droplet-ip>
10
+ #
11
+ # Requires:
12
+ # HF_TOKEN env var with write access to the HF Space
13
+ # .venv Python virtual environment with probe_addresses.py deps
14
+ # SSH access to the droplet (ssh-agent or SSH_KEY env var)
15
+ #
16
+ # Exit codes:
17
+ # 0 all three steps passed
18
+ # 1 deploy_droplet.sh failed (HF Space NOT touched)
19
+ # 1 update_hf_env.sh failed (droplet is up but HF Space NOT updated)
20
+ # 1 probe_addresses.py failed (deploy + HF update succeeded; not rolled back)
21
+ set -euo pipefail
22
+
23
+ if [ "$#" -ne 1 ]; then
24
+ echo "Usage: $0 <droplet-ip>" >&2
25
+ exit 1
26
+ fi
27
+
28
+ IP="$1"
29
+
30
+ if [ -z "${HF_TOKEN:-}" ]; then
31
+ echo "Error: HF_TOKEN env var is required (write access to the HF Space)" >&2
32
+ exit 1
33
+ fi
34
+
35
+ REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
36
+ START_SECONDS=$SECONDS
37
+
38
+ DEPLOY_STATUS="FAIL"
39
+ HF_STATUS="FAIL"
40
+ PROBE_STATUS="FAIL"
41
+
42
+ # ---- 1. Generate a fresh bearer token ------------------------------------
43
+ # openssl rand -base64 24 produces 32 chars; strip +/= to keep URL-safe.
44
+ TOKEN=$(openssl rand -base64 24 | tr -d '/+=')
45
+ echo "==> Deploying to ${IP} with fresh token..."
46
+ echo
47
+
48
+ # ---- 2. deploy_droplet.sh ------------------------------------------------
49
+ if bash "${REPO_ROOT}/scripts/deploy_droplet.sh" "$IP" "$TOKEN"; then
50
+ DEPLOY_STATUS="PASS"
51
+ else
52
+ echo "deploy_droplet.sh failed" >&2
53
+ # Print summary before exiting so the caller sees partial state.
54
+ ELAPSED=$(( SECONDS - START_SECONDS ))
55
+ echo
56
+ echo "=== redeploy summary ==="
57
+ echo "Droplet IP : ${IP}"
58
+ echo "Token : (not set — deploy failed before token was registered)"
59
+ echo "Deploy : ${DEPLOY_STATUS}"
60
+ echo "HF Space : ${HF_STATUS}"
61
+ echo "E2E probe : ${PROBE_STATUS}"
62
+ printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
63
+ exit 1
64
+ fi
65
+
66
+ echo
67
+ echo "==> Deploy succeeded. Updating HF Space..."
68
+ echo
69
+
70
+ # ---- 3. update_hf_env.sh -------------------------------------------------
71
+ if bash "${REPO_ROOT}/scripts/update_hf_env.sh" "$IP" "$TOKEN"; then
72
+ HF_STATUS="PASS"
73
+ else
74
+ echo "update_hf_env.sh failed. HF Space NOT updated." >&2
75
+ ELAPSED=$(( SECONDS - START_SECONDS ))
76
+ echo
77
+ echo "=== redeploy summary ==="
78
+ echo "Droplet IP : ${IP}"
79
+ echo "Token : (regenerated, see HF Space vars)"
80
+ echo "Deploy : ${DEPLOY_STATUS}"
81
+ echo "HF Space : ${HF_STATUS}"
82
+ echo "E2E probe : ${PROBE_STATUS}"
83
+ printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
84
+ exit 1
85
+ fi
86
+
87
+ echo
88
+ echo "==> HF Space updated. Running end-to-end probe..."
89
+ echo
90
+
91
+ # ---- 4. probe_addresses.py -----------------------------------------------
92
+ # probe_addresses.py exits 0 only when 5/5 pass (from docs/DROPLET-RUNBOOK.md).
93
+ # Disable set -e for this step so we can capture the exit code and still
94
+ # print the summary.
95
+ set +e
96
+ "${REPO_ROOT}/.venv/bin/python" "${REPO_ROOT}/scripts/probe_addresses.py"
97
+ PROBE_EXIT=$?
98
+ set -e
99
+
100
+ if [ "$PROBE_EXIT" -eq 0 ]; then
101
+ PROBE_STATUS="PASS"
102
+ else
103
+ PROBE_STATUS="FAIL"
104
+ fi
105
+
106
+ # ---- 5. Summary ----------------------------------------------------------
107
+ ELAPSED=$(( SECONDS - START_SECONDS ))
108
+ echo
109
+ echo "=== redeploy summary ==="
110
+ echo "Droplet IP : ${IP}"
111
+ echo "Token : (regenerated, see HF Space vars)"
112
+ echo "Deploy : ${DEPLOY_STATUS}"
113
+ echo "HF Space : ${HF_STATUS}"
114
+ echo "E2E probe : ${PROBE_STATUS}"
115
+ printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
116
+
117
+ # Exit 1 if probe failed; deploy + HF update already succeeded, not rolling back.
118
+ [ "$PROBE_STATUS" = "PASS" ]
scripts/update_hf_env.sh ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Update HF Space env vars to point at a new droplet, restart the Space,
3
+ # and poll until the agent endpoint returns HTTP 200.
4
+ #
5
+ # Usage: scripts/update_hf_env.sh <droplet-ip> <bearer-token>
6
+ #
7
+ # Requires:
8
+ # HF_TOKEN env var with write access to the Space
9
+ # huggingface_hub >= 0.36 installed (provides the Python API used below;
10
+ # note: 'huggingface-cli space variables' does not exist in this version)
11
+ #
12
+ # Space slug: lablab-ai-amd-developer-hackathon/riprap-nyc
13
+ # Variables set (from docs/DROPLET-RUNBOOK.md §Required secrets):
14
+ # RIPRAP_LLM_PRIMARY vllm
15
+ # RIPRAP_LLM_BASE_URL http://<ip>:8001/v1
16
+ # RIPRAP_LLM_API_KEY <token>
17
+ # RIPRAP_ML_BACKEND remote
18
+ # RIPRAP_ML_BASE_URL http://<ip>:7860
19
+ # RIPRAP_ML_API_KEY <token>
20
+ set -euo pipefail
21
+
22
+ if [ "$#" -ne 2 ]; then
23
+ echo "Usage: $0 <droplet-ip> <bearer-token>" >&2
24
+ exit 1
25
+ fi
26
+
27
+ IP="$1"
28
+ TOKEN="$2"
29
+
30
+ if [ -z "${HF_TOKEN:-}" ]; then
31
+ echo "Error: HF_TOKEN env var is required (write access to the Space)" >&2
32
+ exit 1
33
+ fi
34
+
35
+ SPACE_ID="lablab-ai-amd-developer-hackathon/riprap-nyc"
36
+ SPACE_URL="https://lablab-ai-amd-developer-hackathon-riprap-nyc.hf.space"
37
+ VLLM_PORT=8001
38
+ MODELS_PORT=7860
39
+
40
+ echo "==> Updating HF Space variables"
41
+ echo " space: ${SPACE_ID}"
42
+ echo " droplet ip: ${IP}"
43
+ echo " vLLM port: ${VLLM_PORT}"
44
+ echo " models port: ${MODELS_PORT}"
45
+ echo
46
+
47
+ # ---- 1. Set all six Space variables via the huggingface_hub Python API ----
48
+ # huggingface-cli space variables does not exist in huggingface_hub 0.36.x;
49
+ # add_space_variable is the documented programmatic interface.
50
+ python3 -c "
51
+ import sys, os
52
+ try:
53
+ from huggingface_hub import HfApi
54
+ except ImportError:
55
+ print('Error: huggingface_hub not installed', file=sys.stderr)
56
+ sys.exit(1)
57
+
58
+ api = HfApi(token=os.environ['HF_TOKEN'])
59
+ space_id = '${SPACE_ID}'
60
+ ip = '${IP}'
61
+ token = '${TOKEN}'
62
+ vllm_port = ${VLLM_PORT}
63
+ models_port = ${MODELS_PORT}
64
+
65
+ variables = {
66
+ 'RIPRAP_LLM_PRIMARY': 'vllm',
67
+ 'RIPRAP_LLM_BASE_URL': f'http://{ip}:{vllm_port}/v1',
68
+ 'RIPRAP_LLM_API_KEY': token,
69
+ 'RIPRAP_ML_BACKEND': 'remote',
70
+ 'RIPRAP_ML_BASE_URL': f'http://{ip}:{models_port}',
71
+ 'RIPRAP_ML_API_KEY': token,
72
+ }
73
+
74
+ for key, value in variables.items():
75
+ display = '<redacted>' if 'KEY' in key else value
76
+ print(f' setting {key} = {display}')
77
+ api.add_space_variable(repo_id=space_id, key=key, value=value)
78
+
79
+ print('[python] all 6 variables set')
80
+ "
81
+ echo
82
+
83
+ # ---- 2. Restart the Space ------------------------------------------------
84
+ echo "==> Restarting HF Space"
85
+ python3 -c "
86
+ import os
87
+ from huggingface_hub import HfApi
88
+ api = HfApi(token=os.environ['HF_TOKEN'])
89
+ rt = api.restart_space(repo_id='${SPACE_ID}')
90
+ print(f' stage after restart request: {rt.stage}')
91
+ "
92
+ echo
93
+
94
+ # ---- 3. Poll /api/backend until HTTP 200 (max 120 s) ---------------------
95
+ # /api/backend is documented in docs/DROPLET-RUNBOOK.md §Destroy checklist
96
+ # as the endpoint to verify the Space is serving.
97
+ echo "==> Polling ${SPACE_URL}/api/backend (up to 120 s)..."
98
+ DEADLINE=$((SECONDS + 120))
99
+ HEALTHY=0
100
+ while (( SECONDS < DEADLINE )); do
101
+ HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
102
+ --max-time 10 "${SPACE_URL}/api/backend" 2>/dev/null || echo "000")
103
+ if [ "$HTTP_CODE" = "200" ]; then
104
+ HEALTHY=1
105
+ break
106
+ fi
107
+ echo " (${HTTP_CODE}) not ready yet — waiting 10 s..."
108
+ sleep 10
109
+ done
110
+
111
+ if [ "$HEALTHY" -ne 1 ]; then
112
+ echo "HF Space did not become healthy within 120s" >&2
113
+ exit 1
114
+ fi
115
+
116
+ echo
117
+ echo "HF Space updated and healthy. IP=${IP}"