microclimate-x-demo / scripts /deploy_hf.sh
W1nd5pac's picture
Deploy 2026-05-20T07:09:24Z β€” 11e81c5 (code)
a8358d8 verified
#!/usr/bin/env bash
# ──────────────────────────────────────────────────────────────────────────
# scripts/deploy_hf.sh β€” fully-automated deploy to Hugging Face Spaces
# ──────────────────────────────────────────────────────────────────────────
#
# Usage:
# HF_TOKEN="<your-hf-write-token>" ./scripts/deploy_hf.sh <hf-user>/<space-name>
#
# Example (token redacted β€” get yours at https://huggingface.co/settings/tokens):
# HF_TOKEN="$HF_TOKEN" ./scripts/deploy_hf.sh W1nd5pac/microclimate-x
#
# What it does (no manual steps):
# 1. Ensures huggingface_hub CLI is installed in .venv/
# 2. Authenticates with HF_TOKEN
# 3. Creates the Space (Docker SDK) if it doesn't exist yet
# 4. Uploads the whole repo (server-side LFS handles the 217 MB model)
# 5. Prints the live URL when the build is queued
#
# Skips:
# data/ figures/ tests/ .venv/ .git/ *.sqlite3 __pycache__/
# ──────────────────────────────────────────────────────────────────────────
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: HF_TOKEN=hf_xxx $0 <hf-user>/<space-name>"
echo "Example: HF_TOKEN=hf_xxx $0 W1nd5pac/microclimate-x"
exit 2
fi
REPO_ID="$1"
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
# Clean env so other venvs / PYTHONPATH leaks don't break us.
unset PYTHONPATH VIRTUAL_ENV PYTHONHOME
# ── 1. ensure .venv has huggingface_hub ──────────────────────────────────
if [[ ! -x ".venv/bin/hf" ]]; then
echo "β–Ά Installing huggingface_hub CLI into .venv/ …"
.venv/bin/pip install -q -U "huggingface_hub[cli,hf_transfer]"
fi
HF=".venv/bin/hf"
# Speed-boost for the 217 MB model upload.
export HF_HUB_ENABLE_HF_TRANSFER=1
# ── 2. authenticate ──────────────────────────────────────────────────────
if [[ -z "${HF_TOKEN:-}" ]]; then
if ! $HF auth whoami >/dev/null 2>&1; then
echo "❌ HF_TOKEN env not set and not already logged in."
echo " Get a Write token at https://huggingface.co/settings/tokens and run:"
echo " HF_TOKEN=hf_xxx $0 $REPO_ID"
exit 1
fi
fi
if [[ -n "${HF_TOKEN:-}" ]]; then
# Re-login non-interactively so we use the supplied token (idempotent).
echo "β–Ά Authenticating as the token's owner …"
echo "$HF_TOKEN" | $HF auth login --token "$HF_TOKEN" --add-to-git-credential >/dev/null 2>&1 || true
fi
WHOAMI=$($HF auth whoami 2>/dev/null | head -1 || echo "?")
echo " Logged in as: $WHOAMI"
# ── 3. create the Space if missing (idempotent β€” 409 means "exists") ─────
echo "β–Ά Ensuring Space $REPO_ID exists (Docker SDK) …"
CREATE_OUTPUT=$($HF repos create "$REPO_ID" --repo-type space --space-sdk docker 2>&1 || true)
if echo "$CREATE_OUTPUT" | grep -q "Successfully created"; then
echo " Created fresh Space."
elif echo "$CREATE_OUTPUT" | grep -qi "already created\|409"; then
echo " Space already exists β€” will push to it."
else
echo "$CREATE_OUTPUT"
echo "❌ Unexpected response from 'hf repos create'. Aborting."
exit 1
fi
# ── 4. sanity-check the model exists locally ─────────────────────────────
MODEL="models/rf_model.pkl"
if [[ ! -f "$MODEL" ]]; then
echo "⚠️ $MODEL not found β€” the Space will fall back to a heuristic predictor."
read -r -p "Continue without the trained model? [y/N] " ans
[[ "$ans" =~ ^[Yy]$ ]] || exit 1
fi
# ── 5. upload everything ─────────────────────────────────────────────────
echo "β–Ά Uploading repo β†’ spaces/$REPO_ID …"
echo " (217 MB rf_model.pkl uses HF's server-side Xet/LFS β€” no local LFS needed)"
DEPLOY_MSG="Deploy $(date -u +%Y-%m-%dT%H:%M:%SZ) β€” $(git rev-parse --short HEAD 2>/dev/null || echo local)"
# Pass 1: bulk-upload everything except the model (default: respects .gitignore
# which already excludes *.pkl, so the big file won't go in this pass).
$HF upload \
"$REPO_ID" \
. \
. \
--repo-type=space \
--commit-message="$DEPLOY_MSG (code)" \
--exclude "data/*" \
--exclude "figures/*" \
--exclude "tests/*" \
--exclude ".venv/*" \
--exclude ".local/*" \
--exclude ".pytest_cache/*" \
--exclude ".ruff_cache/*" \
--exclude ".mypy_cache/*" \
--exclude "**/__pycache__/*" \
--exclude "*.sqlite3" \
--exclude "*.sqlite3-*" \
--exclude "*.pyc" \
--exclude ".DS_Store" \
--exclude ".git/*" \
--exclude ".github/*"
# Pass 2: explicitly push the 217 MB Random Forest model. An explicit
# single-file path bypasses .gitignore filtering β€” without this step the Space
# falls back to the heuristic predictor and the AUC=0.871 claim won't reproduce.
if [[ -f "$MODEL" ]]; then
echo "β–Ά Uploading models/rf_model.pkl (217 MB) β€” bypassing .gitignore …"
$HF upload \
"$REPO_ID" \
"$MODEL" \
"models/rf_model.pkl" \
--repo-type=space \
--commit-message="$DEPLOY_MSG (model)"
fi
echo
echo "βœ… Upload complete. Space is rebuilding now."
echo " Status: https://huggingface.co/spaces/$REPO_ID"
echo " Live URL: https://huggingface.co/spaces/$REPO_ID (β‰ˆ 3-5 min for first build)"
echo
echo "Tip: once the green Running badge shows, send the Live URL to your supervisor."