File size: 3,324 Bytes
98a5a8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env bash
# Push to a Hugging Face Space as branch "main" (force).
#
# By default we do **not** push your current HEAD as-is. We build a **new** commit (no local
# branch change) that **drops** anything that would exceed the Hub / your size policy:
#   - any binary file (plain Git binaries are rejected by Hugging Face without Xet)
#   - any file (blob) > MAX_BYTES (default 10 MiB)
#   - any directory *prefix* whose **sum of kept** blob sizes would exceed MAX_BYTES; the
#     whole subtree is dropped
# This is the only way to "silently skip" content that is *already* in your git history: a
# normal `git push` would still send old blobs. See scripts/hf_export_build_commit.py.
# Default stderr prints a one-line omit count; set QUIET=1 to suppress it, VERBOSE=1
# to print each omitted path. MAX_BYTES is honored by the Python step.
#
# Environment:
#   HF_TOKEN         — required; write token
#   SNAPSHOT_COMMIT=1 — `git add -A`, commit (after unstaging single files over MAX_BYTES on
#                    disk, still respects .gitignore), then export+push
#   SKIP_HF_FILTER=1 — push **HEAD** without building the size-filtered export (same as old
#                    "push whole branch"; large blobs in history are still sent)
#   QUIET, VERBOSE, MAX_BYTES, HF_USER, PYTHON
set -euo pipefail

NAMESPACE="${1:-}"
SPACE_NAME="${2:-}"
: "${NAMESPACE:?usage: $0 <namespace> <space_name>}"
: "${SPACE_NAME:?usage: $0 <namespace> <space_name>}"
: "${HF_TOKEN:?set HF_TOKEN to a Hugging Face write token}"

HF_USER="${HF_USER:-$NAMESPACE}"
MAX_BYTES="${MAX_BYTES:-$((10 * 1024 * 1024))}"
PYTHON="${PYTHON:-python3}"
ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || {
  echo "error: not inside a git repository" >&2
  exit 1
}
cd "$ROOT"

unstage_staged_file_over_max() {
  local f s
  while IFS= read -r f; do
    [ -n "$f" ] || continue
    s=0
    if [ -L "$f" ] || ( [ -e "$f" ] && [ ! -d "$f" ] ); then
      s=$(stat -f%z -- "$f" 2>/dev/null) || s=0
    fi
    if [ -n "$s" ] && [ "$s" -gt "$MAX_BYTES" ] 2>/dev/null; then
      git reset -q HEAD -- "$f" 2>/dev/null || true
    fi
  done < <(git diff --cached --name-only 2>/dev/null)
}

if [[ "${SNAPSHOT_COMMIT:-0}" == "1" ]]; then
  git add -A
  unstage_staged_file_over_max
  if git diff --cached --quiet; then
    echo "SNAPSHOT_COMMIT: nothing to commit" >&2
  else
    git commit -m "chore: snapshot before push to Hugging Face Space"
  fi
fi

if [[ "${SKIP_HF_FILTER:-0}" == "1" ]]; then
  PUBLISH_REF="HEAD"
else
  if ! command -v "$PYTHON" &>/dev/null; then
    echo "error: $PYTHON is required to build the HF size-filtered export" >&2
    exit 1
  fi
  if [[ ! -f "$ROOT/scripts/hf_export_build_commit.py" ]]; then
    echo "error: missing $ROOT/scripts/hf_export_build_commit.py" >&2
    exit 1
  fi
  export MAX_BYTES
  PUBLISH_REF=$("$PYTHON" "$ROOT/scripts/hf_export_build_commit.py")
  if [ "${#PUBLISH_REF}" -ne 40 ]; then
    echo "error: export commit not produced" >&2
    exit 1
  fi
fi

REMOTE="https://${HF_USER}:${HF_TOKEN}@huggingface.co/spaces/${NAMESPACE}/${SPACE_NAME}"
FSTATE="on"
if [[ "${SKIP_HF_FILTER:-0}" == "1" ]]; then
  FSTATE="off (HEAD as-is)"
fi
echo "Pushing ${PUBLISH_REF} (size filter: ${FSTATE}) -> huggingface.co/${NAMESPACE}/${SPACE_NAME} main"
git push --force "$REMOTE" "${PUBLISH_REF}:main"