File size: 5,833 Bytes
a8358d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | #!/usr/bin/env bash
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# scripts/deploy_hf.sh β fully-automated deploy to Hugging Face Spaces
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#
# Usage:
# HF_TOKEN="<your-hf-write-token>" ./scripts/deploy_hf.sh <hf-user>/<space-name>
#
# Example (token redacted β get yours at https://huggingface.co/settings/tokens):
# HF_TOKEN="$HF_TOKEN" ./scripts/deploy_hf.sh W1nd5pac/microclimate-x
#
# What it does (no manual steps):
# 1. Ensures huggingface_hub CLI is installed in .venv/
# 2. Authenticates with HF_TOKEN
# 3. Creates the Space (Docker SDK) if it doesn't exist yet
# 4. Uploads the whole repo (server-side LFS handles the 217 MB model)
# 5. Prints the live URL when the build is queued
#
# Skips:
# data/ figures/ tests/ .venv/ .git/ *.sqlite3 __pycache__/
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: HF_TOKEN=hf_xxx $0 <hf-user>/<space-name>"
echo "Example: HF_TOKEN=hf_xxx $0 W1nd5pac/microclimate-x"
exit 2
fi
REPO_ID="$1"
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
# Clean env so other venvs / PYTHONPATH leaks don't break us.
unset PYTHONPATH VIRTUAL_ENV PYTHONHOME
# ββ 1. ensure .venv has huggingface_hub ββββββββββββββββββββββββββββββββββ
if [[ ! -x ".venv/bin/hf" ]]; then
echo "βΆ Installing huggingface_hub CLI into .venv/ β¦"
.venv/bin/pip install -q -U "huggingface_hub[cli,hf_transfer]"
fi
HF=".venv/bin/hf"
# Speed-boost for the 217 MB model upload.
export HF_HUB_ENABLE_HF_TRANSFER=1
# ββ 2. authenticate ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if [[ -z "${HF_TOKEN:-}" ]]; then
if ! $HF auth whoami >/dev/null 2>&1; then
echo "β HF_TOKEN env not set and not already logged in."
echo " Get a Write token at https://huggingface.co/settings/tokens and run:"
echo " HF_TOKEN=hf_xxx $0 $REPO_ID"
exit 1
fi
fi
if [[ -n "${HF_TOKEN:-}" ]]; then
# Re-login non-interactively so we use the supplied token (idempotent).
echo "βΆ Authenticating as the token's owner β¦"
echo "$HF_TOKEN" | $HF auth login --token "$HF_TOKEN" --add-to-git-credential >/dev/null 2>&1 || true
fi
WHOAMI=$($HF auth whoami 2>/dev/null | head -1 || echo "?")
echo " Logged in as: $WHOAMI"
# ββ 3. create the Space if missing (idempotent β 409 means "exists") βββββ
echo "βΆ Ensuring Space $REPO_ID exists (Docker SDK) β¦"
CREATE_OUTPUT=$($HF repos create "$REPO_ID" --repo-type space --space-sdk docker 2>&1 || true)
if echo "$CREATE_OUTPUT" | grep -q "Successfully created"; then
echo " Created fresh Space."
elif echo "$CREATE_OUTPUT" | grep -qi "already created\|409"; then
echo " Space already exists β will push to it."
else
echo "$CREATE_OUTPUT"
echo "β Unexpected response from 'hf repos create'. Aborting."
exit 1
fi
# ββ 4. sanity-check the model exists locally βββββββββββββββββββββββββββββ
MODEL="models/rf_model.pkl"
if [[ ! -f "$MODEL" ]]; then
echo "β οΈ $MODEL not found β the Space will fall back to a heuristic predictor."
read -r -p "Continue without the trained model? [y/N] " ans
[[ "$ans" =~ ^[Yy]$ ]] || exit 1
fi
# ββ 5. upload everything βββββββββββββββββββββββββββββββββββββββββββββββββ
echo "βΆ Uploading repo β spaces/$REPO_ID β¦"
echo " (217 MB rf_model.pkl uses HF's server-side Xet/LFS β no local LFS needed)"
DEPLOY_MSG="Deploy $(date -u +%Y-%m-%dT%H:%M:%SZ) β $(git rev-parse --short HEAD 2>/dev/null || echo local)"
# Pass 1: bulk-upload everything except the model (default: respects .gitignore
# which already excludes *.pkl, so the big file won't go in this pass).
$HF upload \
"$REPO_ID" \
. \
. \
--repo-type=space \
--commit-message="$DEPLOY_MSG (code)" \
--exclude "data/*" \
--exclude "figures/*" \
--exclude "tests/*" \
--exclude ".venv/*" \
--exclude ".local/*" \
--exclude ".pytest_cache/*" \
--exclude ".ruff_cache/*" \
--exclude ".mypy_cache/*" \
--exclude "**/__pycache__/*" \
--exclude "*.sqlite3" \
--exclude "*.sqlite3-*" \
--exclude "*.pyc" \
--exclude ".DS_Store" \
--exclude ".git/*" \
--exclude ".github/*"
# Pass 2: explicitly push the 217 MB Random Forest model. An explicit
# single-file path bypasses .gitignore filtering β without this step the Space
# falls back to the heuristic predictor and the AUC=0.871 claim won't reproduce.
if [[ -f "$MODEL" ]]; then
echo "βΆ Uploading models/rf_model.pkl (217 MB) β bypassing .gitignore β¦"
$HF upload \
"$REPO_ID" \
"$MODEL" \
"models/rf_model.pkl" \
--repo-type=space \
--commit-message="$DEPLOY_MSG (model)"
fi
echo
echo "β
Upload complete. Space is rebuilding now."
echo " Status: https://huggingface.co/spaces/$REPO_ID"
echo " Live URL: https://huggingface.co/spaces/$REPO_ID (β 3-5 min for first build)"
echo
echo "Tip: once the green Running badge shows, send the Live URL to your supervisor."
|