File size: 5,833 Bytes
a8358d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env bash
# ──────────────────────────────────────────────────────────────────────────
# scripts/deploy_hf.sh β€” fully-automated deploy to Hugging Face Spaces
# ──────────────────────────────────────────────────────────────────────────
#
# Usage:
#   HF_TOKEN="<your-hf-write-token>" ./scripts/deploy_hf.sh <hf-user>/<space-name>
#
# Example (token redacted β€” get yours at https://huggingface.co/settings/tokens):
#   HF_TOKEN="$HF_TOKEN" ./scripts/deploy_hf.sh W1nd5pac/microclimate-x
#
# What it does (no manual steps):
#   1. Ensures huggingface_hub CLI is installed in .venv/
#   2. Authenticates with HF_TOKEN
#   3. Creates the Space (Docker SDK) if it doesn't exist yet
#   4. Uploads the whole repo (server-side LFS handles the 217 MB model)
#   5. Prints the live URL when the build is queued
#
# Skips:
#   data/   figures/   tests/   .venv/   .git/   *.sqlite3   __pycache__/
# ──────────────────────────────────────────────────────────────────────────
set -euo pipefail

if [[ $# -lt 1 ]]; then
  echo "Usage: HF_TOKEN=hf_xxx $0 <hf-user>/<space-name>"
  echo "Example: HF_TOKEN=hf_xxx $0 W1nd5pac/microclimate-x"
  exit 2
fi

REPO_ID="$1"
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"

# Clean env so other venvs / PYTHONPATH leaks don't break us.
unset PYTHONPATH VIRTUAL_ENV PYTHONHOME

# ── 1. ensure .venv has huggingface_hub ──────────────────────────────────
if [[ ! -x ".venv/bin/hf" ]]; then
  echo "β–Ά Installing huggingface_hub CLI into .venv/ …"
  .venv/bin/pip install -q -U "huggingface_hub[cli,hf_transfer]"
fi
HF=".venv/bin/hf"

# Speed-boost for the 217 MB model upload.
export HF_HUB_ENABLE_HF_TRANSFER=1

# ── 2. authenticate ──────────────────────────────────────────────────────
if [[ -z "${HF_TOKEN:-}" ]]; then
  if ! $HF auth whoami >/dev/null 2>&1; then
    echo "❌ HF_TOKEN env not set and not already logged in."
    echo "   Get a Write token at https://huggingface.co/settings/tokens and run:"
    echo "     HF_TOKEN=hf_xxx $0 $REPO_ID"
    exit 1
  fi
fi

if [[ -n "${HF_TOKEN:-}" ]]; then
  # Re-login non-interactively so we use the supplied token (idempotent).
  echo "β–Ά Authenticating as the token's owner …"
  echo "$HF_TOKEN" | $HF auth login --token "$HF_TOKEN" --add-to-git-credential >/dev/null 2>&1 || true
fi

WHOAMI=$($HF auth whoami 2>/dev/null | head -1 || echo "?")
echo "  Logged in as: $WHOAMI"

# ── 3. create the Space if missing (idempotent β€” 409 means "exists") ─────
echo "β–Ά Ensuring Space $REPO_ID exists (Docker SDK) …"
CREATE_OUTPUT=$($HF repos create "$REPO_ID" --repo-type space --space-sdk docker 2>&1 || true)
if echo "$CREATE_OUTPUT" | grep -q "Successfully created"; then
  echo "  Created fresh Space."
elif echo "$CREATE_OUTPUT" | grep -qi "already created\|409"; then
  echo "  Space already exists β€” will push to it."
else
  echo "$CREATE_OUTPUT"
  echo "❌ Unexpected response from 'hf repos create'. Aborting."
  exit 1
fi

# ── 4. sanity-check the model exists locally ─────────────────────────────
MODEL="models/rf_model.pkl"
if [[ ! -f "$MODEL" ]]; then
  echo "⚠️  $MODEL not found β€” the Space will fall back to a heuristic predictor."
  read -r -p "Continue without the trained model? [y/N] " ans
  [[ "$ans" =~ ^[Yy]$ ]] || exit 1
fi

# ── 5. upload everything ─────────────────────────────────────────────────
echo "β–Ά Uploading repo β†’ spaces/$REPO_ID …"
echo "  (217 MB rf_model.pkl uses HF's server-side Xet/LFS β€” no local LFS needed)"

DEPLOY_MSG="Deploy $(date -u +%Y-%m-%dT%H:%M:%SZ) β€” $(git rev-parse --short HEAD 2>/dev/null || echo local)"

# Pass 1: bulk-upload everything except the model (default: respects .gitignore
# which already excludes *.pkl, so the big file won't go in this pass).
$HF upload \
  "$REPO_ID" \
  . \
  . \
  --repo-type=space \
  --commit-message="$DEPLOY_MSG (code)" \
  --exclude "data/*" \
  --exclude "figures/*" \
  --exclude "tests/*" \
  --exclude ".venv/*" \
  --exclude ".local/*" \
  --exclude ".pytest_cache/*" \
  --exclude ".ruff_cache/*" \
  --exclude ".mypy_cache/*" \
  --exclude "**/__pycache__/*" \
  --exclude "*.sqlite3" \
  --exclude "*.sqlite3-*" \
  --exclude "*.pyc" \
  --exclude ".DS_Store" \
  --exclude ".git/*" \
  --exclude ".github/*"

# Pass 2: explicitly push the 217 MB Random Forest model. An explicit
# single-file path bypasses .gitignore filtering β€” without this step the Space
# falls back to the heuristic predictor and the AUC=0.871 claim won't reproduce.
if [[ -f "$MODEL" ]]; then
  echo "β–Ά Uploading models/rf_model.pkl (217 MB) β€” bypassing .gitignore …"
  $HF upload \
    "$REPO_ID" \
    "$MODEL" \
    "models/rf_model.pkl" \
    --repo-type=space \
    --commit-message="$DEPLOY_MSG (model)"
fi

echo
echo "βœ… Upload complete. Space is rebuilding now."
echo "   Status:    https://huggingface.co/spaces/$REPO_ID"
echo "   Live URL:  https://huggingface.co/spaces/$REPO_ID  (β‰ˆ 3-5 min for first build)"
echo
echo "Tip: once the green Running badge shows, send the Live URL to your supervisor."