#!/usr/bin/env bash # ────────────────────────────────────────────────────────────────────────── # scripts/deploy_hf.sh — fully-automated deploy to Hugging Face Spaces # ────────────────────────────────────────────────────────────────────────── # # Usage: # HF_TOKEN="" ./scripts/deploy_hf.sh / # # Example (token redacted — get yours at https://huggingface.co/settings/tokens): # HF_TOKEN="$HF_TOKEN" ./scripts/deploy_hf.sh W1nd5pac/microclimate-x # # What it does (no manual steps): # 1. Ensures huggingface_hub CLI is installed in .venv/ # 2. Authenticates with HF_TOKEN # 3. Creates the Space (Docker SDK) if it doesn't exist yet # 4. Uploads the whole repo (server-side LFS handles the 217 MB model) # 5. Prints the live URL when the build is queued # # Skips: # data/ figures/ tests/ .venv/ .git/ *.sqlite3 __pycache__/ # ────────────────────────────────────────────────────────────────────────── set -euo pipefail if [[ $# -lt 1 ]]; then echo "Usage: HF_TOKEN=hf_xxx $0 /" echo "Example: HF_TOKEN=hf_xxx $0 W1nd5pac/microclimate-x" exit 2 fi REPO_ID="$1" ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$ROOT" # Clean env so other venvs / PYTHONPATH leaks don't break us. unset PYTHONPATH VIRTUAL_ENV PYTHONHOME # ── 1. ensure .venv has huggingface_hub ────────────────────────────────── if [[ ! -x ".venv/bin/hf" ]]; then echo "▶ Installing huggingface_hub CLI into .venv/ …" .venv/bin/pip install -q -U "huggingface_hub[cli,hf_transfer]" fi HF=".venv/bin/hf" # Speed-boost for the 217 MB model upload. export HF_HUB_ENABLE_HF_TRANSFER=1 # ── 2. authenticate ────────────────────────────────────────────────────── if [[ -z "${HF_TOKEN:-}" ]]; then if ! $HF auth whoami >/dev/null 2>&1; then echo "❌ HF_TOKEN env not set and not already logged in." echo " Get a Write token at https://huggingface.co/settings/tokens and run:" echo " HF_TOKEN=hf_xxx $0 $REPO_ID" exit 1 fi fi if [[ -n "${HF_TOKEN:-}" ]]; then # Re-login non-interactively so we use the supplied token (idempotent). echo "▶ Authenticating as the token's owner …" echo "$HF_TOKEN" | $HF auth login --token "$HF_TOKEN" --add-to-git-credential >/dev/null 2>&1 || true fi WHOAMI=$($HF auth whoami 2>/dev/null | head -1 || echo "?") echo " Logged in as: $WHOAMI" # ── 3. create the Space if missing (idempotent — 409 means "exists") ───── echo "▶ Ensuring Space $REPO_ID exists (Docker SDK) …" CREATE_OUTPUT=$($HF repos create "$REPO_ID" --repo-type space --space-sdk docker 2>&1 || true) if echo "$CREATE_OUTPUT" | grep -q "Successfully created"; then echo " Created fresh Space." elif echo "$CREATE_OUTPUT" | grep -qi "already created\|409"; then echo " Space already exists — will push to it." else echo "$CREATE_OUTPUT" echo "❌ Unexpected response from 'hf repos create'. Aborting." exit 1 fi # ── 4. sanity-check the model exists locally ───────────────────────────── MODEL="models/rf_model.pkl" if [[ ! -f "$MODEL" ]]; then echo "⚠️ $MODEL not found — the Space will fall back to a heuristic predictor." read -r -p "Continue without the trained model? [y/N] " ans [[ "$ans" =~ ^[Yy]$ ]] || exit 1 fi # ── 5. upload everything ───────────────────────────────────────────────── echo "▶ Uploading repo → spaces/$REPO_ID …" echo " (217 MB rf_model.pkl uses HF's server-side Xet/LFS — no local LFS needed)" DEPLOY_MSG="Deploy $(date -u +%Y-%m-%dT%H:%M:%SZ) — $(git rev-parse --short HEAD 2>/dev/null || echo local)" # Pass 1: bulk-upload everything except the model (default: respects .gitignore # which already excludes *.pkl, so the big file won't go in this pass). $HF upload \ "$REPO_ID" \ . \ . \ --repo-type=space \ --commit-message="$DEPLOY_MSG (code)" \ --exclude "data/*" \ --exclude "figures/*" \ --exclude "tests/*" \ --exclude ".venv/*" \ --exclude ".local/*" \ --exclude ".pytest_cache/*" \ --exclude ".ruff_cache/*" \ --exclude ".mypy_cache/*" \ --exclude "**/__pycache__/*" \ --exclude "*.sqlite3" \ --exclude "*.sqlite3-*" \ --exclude "*.pyc" \ --exclude ".DS_Store" \ --exclude ".git/*" \ --exclude ".github/*" # Pass 2: explicitly push the 217 MB Random Forest model. An explicit # single-file path bypasses .gitignore filtering — without this step the Space # falls back to the heuristic predictor and the AUC=0.871 claim won't reproduce. if [[ -f "$MODEL" ]]; then echo "▶ Uploading models/rf_model.pkl (217 MB) — bypassing .gitignore …" $HF upload \ "$REPO_ID" \ "$MODEL" \ "models/rf_model.pkl" \ --repo-type=space \ --commit-message="$DEPLOY_MSG (model)" fi echo echo "✅ Upload complete. Space is rebuilding now." echo " Status: https://huggingface.co/spaces/$REPO_ID" echo " Live URL: https://huggingface.co/spaces/$REPO_ID (≈ 3-5 min for first build)" echo echo "Tip: once the green Running badge shows, send the Live URL to your supervisor."