File size: 4,229 Bytes
2c2dbe8
 
 
 
 
 
 
 
 
 
 
f9e2ab8
2c2dbe8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9e2ab8
 
 
 
 
 
 
 
 
 
 
 
 
 
2c2dbe8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env bash
# Full redeploy to an existing AMD MI300X droplet.
#
#   1. Generate a fresh bearer token
#   2. scripts/deploy_droplet.sh <ip> <token>   (bring up vLLM + riprap-models)
#   3. scripts/update_hf_env.sh <ip> <token>    (update HF Space vars + restart)
#   4. .venv/bin/python scripts/probe_addresses.py  (5/5 must pass)
#
# Usage: scripts/redeploy.sh <droplet-ip>
#
# Requires:
#   HF auth — either `huggingface-cli login` (preferred) or HF_TOKEN env var
#   .venv     Python virtual environment with probe_addresses.py deps
#   SSH access to the droplet (ssh-agent or SSH_KEY env var)
#
# Exit codes:
#   0  all three steps passed
#   1  deploy_droplet.sh failed (HF Space NOT touched)
#   1  update_hf_env.sh failed (droplet is up but HF Space NOT updated)
#   1  probe_addresses.py failed (deploy + HF update succeeded; not rolled back)
set -euo pipefail

if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <droplet-ip>" >&2
    exit 1
fi

IP="$1"

# Verify HF auth is available before doing the long droplet build.
# Either HF_TOKEN env or a cached CLI login works — HfApi() picks up
# whichever is set.
if ! python3 -c "
import sys
from huggingface_hub import HfApi
try:
    HfApi().whoami()
except Exception as e:
    print(f'HF auth check failed: {e}', file=sys.stderr)
    print('Run: huggingface-cli login   (or: export HF_TOKEN=...)',
          file=sys.stderr)
    sys.exit(1)
" >/dev/null; then
    exit 1
fi

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
START_SECONDS=$SECONDS

DEPLOY_STATUS="FAIL"
HF_STATUS="FAIL"
PROBE_STATUS="FAIL"

# ---- 1. Generate a fresh bearer token ------------------------------------
# openssl rand -base64 24 produces 32 chars; strip +/= to keep URL-safe.
TOKEN=$(openssl rand -base64 24 | tr -d '/+=')
echo "==> Deploying to ${IP} with fresh token..."
echo

# ---- 2. deploy_droplet.sh ------------------------------------------------
if bash "${REPO_ROOT}/scripts/deploy_droplet.sh" "$IP" "$TOKEN"; then
    DEPLOY_STATUS="PASS"
else
    echo "deploy_droplet.sh failed" >&2
    # Print summary before exiting so the caller sees partial state.
    ELAPSED=$(( SECONDS - START_SECONDS ))
    echo
    echo "=== redeploy summary ==="
    echo "Droplet IP : ${IP}"
    echo "Token      : (not set — deploy failed before token was registered)"
    echo "Deploy     : ${DEPLOY_STATUS}"
    echo "HF Space   : ${HF_STATUS}"
    echo "E2E probe  : ${PROBE_STATUS}"
    printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
    exit 1
fi

echo
echo "==> Deploy succeeded. Updating HF Space..."
echo

# ---- 3. update_hf_env.sh -------------------------------------------------
if bash "${REPO_ROOT}/scripts/update_hf_env.sh" "$IP" "$TOKEN"; then
    HF_STATUS="PASS"
else
    echo "update_hf_env.sh failed. HF Space NOT updated." >&2
    ELAPSED=$(( SECONDS - START_SECONDS ))
    echo
    echo "=== redeploy summary ==="
    echo "Droplet IP : ${IP}"
    echo "Token      : (regenerated, see HF Space vars)"
    echo "Deploy     : ${DEPLOY_STATUS}"
    echo "HF Space   : ${HF_STATUS}"
    echo "E2E probe  : ${PROBE_STATUS}"
    printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))
    exit 1
fi

echo
echo "==> HF Space updated. Running end-to-end probe..."
echo

# ---- 4. probe_addresses.py -----------------------------------------------
# probe_addresses.py exits 0 only when 5/5 pass (from docs/DROPLET-RUNBOOK.md).
# Disable set -e for this step so we can capture the exit code and still
# print the summary.
set +e
"${REPO_ROOT}/.venv/bin/python" "${REPO_ROOT}/scripts/probe_addresses.py"
PROBE_EXIT=$?
set -e

if [ "$PROBE_EXIT" -eq 0 ]; then
    PROBE_STATUS="PASS"
else
    PROBE_STATUS="FAIL"
fi

# ---- 5. Summary ----------------------------------------------------------
ELAPSED=$(( SECONDS - START_SECONDS ))
echo
echo "=== redeploy summary ==="
echo "Droplet IP : ${IP}"
echo "Token      : (regenerated, see HF Space vars)"
echo "Deploy     : ${DEPLOY_STATUS}"
echo "HF Space   : ${HF_STATUS}"
echo "E2E probe  : ${PROBE_STATUS}"
printf "Total time : %dm%02ds\n" $(( ELAPSED / 60 )) $(( ELAPSED % 60 ))

# Exit 1 if probe failed; deploy + HF update already succeeded, not rolling back.
[ "$PROBE_STATUS" = "PASS" ]