File size: 3,863 Bytes
2c2dbe8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env bash
# Full redeploy to an existing AMD MI300X droplet.
#
#   1. Generate a fresh bearer token
#   2. scripts/deploy_droplet.sh <ip> <token>   (bring up vLLM + riprap-models)
#   3. scripts/update_hf_env.sh <ip> <token>    (update HF Space vars + restart)
#   4. .venv/bin/python scripts/probe_addresses.py  (5/5 must pass)
#
# Usage: scripts/redeploy.sh <droplet-ip>
#
# Requires:
#   HF_TOKEN  env var with write access to the HF Space
#   .venv     Python virtual environment with probe_addresses.py deps
#   SSH access to the droplet (ssh-agent or SSH_KEY env var)
#
# Exit codes:
#   0  all three steps passed
#   1  deploy_droplet.sh failed (HF Space NOT touched)
#   1  update_hf_env.sh failed (droplet is up but HF Space NOT updated)
#   1  probe_addresses.py failed (deploy + HF update succeeded; not rolled back)
set -euo pipefail

if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <droplet-ip>" >&2
    exit 1
fi

IP="$1"

if [ -z "${HF_TOKEN:-}" ]; then
    echo "Error: HF_TOKEN env var is required (write access to the HF Space)" >&2
    exit 1
fi

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
START_SECONDS=$SECONDS

DEPLOY_STATUS="FAIL"
HF_STATUS="FAIL"
PROBE_STATUS="FAIL"

# ---- 1. Generate a fresh bearer token ------------------------------------
# openssl rand -base64 24 produces 32 chars; strip +/= to keep URL-safe.
TOKEN=$(openssl rand -base64 24 | tr -d '/+=')
echo "==> Deploying to ${IP} with fresh token..."
echo

# ---- 2. deploy_droplet.sh ------------------------------------------------
if bash "${REPO_ROOT}/scripts/deploy_droplet.sh" "$IP" "$TOKEN"; then
    DEPLOY_STATUS="PASS"
else
    echo "deploy_droplet.sh failed" >&2
    # Print summary before exiting so the caller sees partial state.
    ELAPSED=$(( SECONDS - START_SECONDS ))
    echo
    echo "=== redeploy summary ==="
    echo "Droplet IP : ${IP}"
    echo "Token      : (not set — deploy failed before token was registered)"
    echo "Deploy     : ${DEPLOY_STATUS}"
    echo "HF Space   : ${HF_STATUS}"
    echo "E2E probe  : ${PROBE_STATUS}"
    printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
    exit 1
fi

echo
echo "==> Deploy succeeded. Updating HF Space..."
echo

# ---- 3. update_hf_env.sh -------------------------------------------------
if bash "${REPO_ROOT}/scripts/update_hf_env.sh" "$IP" "$TOKEN"; then
    HF_STATUS="PASS"
else
    echo "update_hf_env.sh failed. HF Space NOT updated." >&2
    ELAPSED=$(( SECONDS - START_SECONDS ))
    echo
    echo "=== redeploy summary ==="
    echo "Droplet IP : ${IP}"
    echo "Token      : (regenerated, see HF Space vars)"
    echo "Deploy     : ${DEPLOY_STATUS}"
    echo "HF Space   : ${HF_STATUS}"
    echo "E2E probe  : ${PROBE_STATUS}"
    printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
    exit 1
fi

echo
echo "==> HF Space updated. Running end-to-end probe..."
echo

# ---- 4. probe_addresses.py -----------------------------------------------
# probe_addresses.py exits 0 only when 5/5 pass (from docs/DROPLET-RUNBOOK.md).
# Disable set -e for this step so we can capture the exit code and still
# print the summary.
set +e
"${REPO_ROOT}/.venv/bin/python" "${REPO_ROOT}/scripts/probe_addresses.py"
PROBE_EXIT=$?
set -e

if [ "$PROBE_EXIT" -eq 0 ]; then
    PROBE_STATUS="PASS"
else
    PROBE_STATUS="FAIL"
fi

# ---- 5. Summary ----------------------------------------------------------
ELAPSED=$(( SECONDS - START_SECONDS ))
echo
echo "=== redeploy summary ==="
echo "Droplet IP : ${IP}"
echo "Token      : (regenerated, see HF Space vars)"
echo "Deploy     : ${DEPLOY_STATUS}"
echo "HF Space   : ${HF_STATUS}"
echo "E2E probe  : ${PROBE_STATUS}"
printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))

# Exit 1 if probe failed; deploy + HF update already succeeded, not rolling back.
[ "$PROBE_STATUS" = "PASS" ]