Spaces:
Running on Zero
feat(spaces): mirror build-time HF cache to runtime-writable tree
Browse filespreload_from_hub populates ~/.cache/huggingface/ as the build user,
leaving it read-only for runtime uid 1000. Lazy hf_hub_download calls
for non-preloaded files (GGUF, camera LoRAs) failed with "Permission
denied". chmod couldn't help — wrong inode owner.
_mirror_preload_hf_cache() walks the preload tree once at bootstrap,
building a parallel ~/hf-cache-rw/ that we own:
- blobs/<sha>: hardlinked (zero-copy, shared inode, instant reads).
Falls back to symlink on EXDEV (cross-device).
- snapshots/<commit>/<file>: relative symlinks preserved. Targets
resolve within the mirror tree, no extra walking.
- refs/<branch>: byte-copied — HF lib overwrites these on etag check,
hardlinks would fail there.
- everything else: byte-copied (safest default for unknown files).
Sets HF_HOME + HF_HUB_CACHE so HF lib reads/writes through the mirror.
After this:
- preloaded files: instant cache hit (no network)
- new lazy downloads: write to dirs we created → no permission errors
Tested locally on a synthetic preload tree: hardlinks shared inode,
snapshot symlinks resolved correctly, refs/main was overwritable, new
files in both preloaded and entirely-new repos wrote successfully.
- app.py +93 -17
- docs/future_improvements.md +19 -0
|
@@ -63,6 +63,91 @@ def _git_clone(url: str, dst: pathlib.Path, ref: str) -> None:
|
|
| 63 |
subprocess.check_call(["git", "-C", str(dst), "checkout", "-q", "FETCH_HEAD"])
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def _bootstrap() -> None:
|
| 67 |
on_spaces = _on_spaces()
|
| 68 |
# /data requires the paid persistent-storage add-on (separate from Pro).
|
|
@@ -95,24 +180,15 @@ def _bootstrap() -> None:
|
|
| 95 |
sys.path.insert(0, str(comfy_dir))
|
| 96 |
os.environ.setdefault("COMFY_MODELS_DIR", str(comfy_dir / "models"))
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
#
|
| 100 |
-
#
|
| 101 |
-
#
|
| 102 |
-
#
|
|
|
|
|
|
|
| 103 |
if on_spaces:
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
hf_cache = pathlib.Path.home() / ".cache" / "huggingface"
|
| 107 |
-
if hf_cache.exists():
|
| 108 |
-
try:
|
| 109 |
-
subprocess.run(
|
| 110 |
-
["chmod", "-R", "u+rwX", str(hf_cache)],
|
| 111 |
-
check=False,
|
| 112 |
-
timeout=30,
|
| 113 |
-
)
|
| 114 |
-
except Exception as exc:
|
| 115 |
-
print(f"[bootstrap] hf cache chmod skipped: {exc}", flush=True)
|
| 116 |
|
| 117 |
# Stage placeholder input files so the workflow's hard-referenced loaders
|
| 118 |
# (LoadImage/VHS_Load*) don't error at runtime even when the active mode
|
|
|
|
| 63 |
subprocess.check_call(["git", "-C", str(dst), "checkout", "-q", "FETCH_HEAD"])
|
| 64 |
|
| 65 |
|
| 66 |
+
def _mirror_preload_hf_cache() -> None:
|
| 67 |
+
"""Mirror the build-populated HF cache into a writable runtime tree.
|
| 68 |
+
|
| 69 |
+
HF Spaces' build pipeline runs `preload_from_hub` as a different user
|
| 70 |
+
than the runtime container, so the populated `~/.cache/huggingface/`
|
| 71 |
+
is read-only for us (uid 1000). Any subsequent `hf_hub_download` call
|
| 72 |
+
that needs to write a NEW file (lazy-loaded LoRAs, GGUF, etc.) fails
|
| 73 |
+
with "Permission denied" because the parent dir isn't writable.
|
| 74 |
+
|
| 75 |
+
Fix: build a parallel tree at `~/hf-cache-rw/` that we own, with:
|
| 76 |
+
- dirs: created fresh via mkdir
|
| 77 |
+
- blob files (`blobs/<sha>`): hardlinked (shared inode, instant)
|
| 78 |
+
- relative snapshot symlinks: preserved as symlinks
|
| 79 |
+
- `refs/<branch>` files: byte-copied (HF lib overwrites these)
|
| 80 |
+
- everything else: byte-copied (safest default)
|
| 81 |
+
Then set HF_HOME / HF_HUB_CACHE so HF lib reads/writes through the
|
| 82 |
+
mirror. Reads are zero-copy via hardlink/symlink; new downloads land
|
| 83 |
+
in dirs we created.
|
| 84 |
+
"""
|
| 85 |
+
import shutil
|
| 86 |
+
|
| 87 |
+
src_root = pathlib.Path.home() / ".cache" / "huggingface"
|
| 88 |
+
dst_root = pathlib.Path.home() / "hf-cache-rw"
|
| 89 |
+
dst_root.mkdir(parents=True, exist_ok=True)
|
| 90 |
+
os.environ["HF_HOME"] = str(dst_root)
|
| 91 |
+
os.environ["HF_HUB_CACHE"] = str(dst_root / "hub")
|
| 92 |
+
|
| 93 |
+
if not src_root.exists():
|
| 94 |
+
return
|
| 95 |
+
|
| 96 |
+
counts = {"dirs": 0, "hardlinks": 0, "symlinks": 0, "copies": 0, "errors": 0}
|
| 97 |
+
|
| 98 |
+
def _treat_as_copy(rel_path: pathlib.PurePath) -> bool:
|
| 99 |
+
# Anything under a refs/ dir, anywhere in the tree.
|
| 100 |
+
return any(part == "refs" for part in rel_path.parts)
|
| 101 |
+
|
| 102 |
+
def _walk(s: pathlib.Path, d: pathlib.Path) -> None:
|
| 103 |
+
try:
|
| 104 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 105 |
+
counts["dirs"] += 1
|
| 106 |
+
except OSError as exc:
|
| 107 |
+
print(f"[bootstrap] mirror mkdir fail {d}: {exc}", flush=True)
|
| 108 |
+
counts["errors"] += 1
|
| 109 |
+
return
|
| 110 |
+
|
| 111 |
+
for entry in s.iterdir():
|
| 112 |
+
de = d / entry.name
|
| 113 |
+
try:
|
| 114 |
+
if entry.is_symlink():
|
| 115 |
+
if de.exists() or de.is_symlink():
|
| 116 |
+
continue
|
| 117 |
+
target = os.readlink(str(entry))
|
| 118 |
+
de.symlink_to(target)
|
| 119 |
+
counts["symlinks"] += 1
|
| 120 |
+
elif entry.is_dir():
|
| 121 |
+
_walk(entry, de)
|
| 122 |
+
elif entry.is_file():
|
| 123 |
+
if de.exists():
|
| 124 |
+
continue
|
| 125 |
+
rel = de.relative_to(dst_root)
|
| 126 |
+
if _treat_as_copy(rel):
|
| 127 |
+
shutil.copy2(entry, de)
|
| 128 |
+
counts["copies"] += 1
|
| 129 |
+
else:
|
| 130 |
+
try:
|
| 131 |
+
os.link(str(entry), str(de))
|
| 132 |
+
counts["hardlinks"] += 1
|
| 133 |
+
except OSError:
|
| 134 |
+
# Cross-device or other — fall back to symlink.
|
| 135 |
+
de.symlink_to(entry)
|
| 136 |
+
counts["symlinks"] += 1
|
| 137 |
+
except OSError as exc:
|
| 138 |
+
print(f"[bootstrap] mirror skip {entry}: {exc}", flush=True)
|
| 139 |
+
counts["errors"] += 1
|
| 140 |
+
|
| 141 |
+
_walk(src_root, dst_root)
|
| 142 |
+
print(
|
| 143 |
+
f"[bootstrap] hf cache mirrored to {dst_root}: "
|
| 144 |
+
f"{counts['dirs']} dirs, {counts['hardlinks']} hardlinks, "
|
| 145 |
+
f"{counts['symlinks']} symlinks, {counts['copies']} copies, "
|
| 146 |
+
f"{counts['errors']} errors",
|
| 147 |
+
flush=True,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
def _bootstrap() -> None:
|
| 152 |
on_spaces = _on_spaces()
|
| 153 |
# /data requires the paid persistent-storage add-on (separate from Pro).
|
|
|
|
| 180 |
sys.path.insert(0, str(comfy_dir))
|
| 181 |
os.environ.setdefault("COMFY_MODELS_DIR", str(comfy_dir / "models"))
|
| 182 |
|
| 183 |
+
# Mirror the build-time HF cache (populated by preload_from_hub, owned by
|
| 184 |
+
# build user → read-only for runtime user 1000) into a writable parallel
|
| 185 |
+
# tree under $HOME, then point HF_HUB_CACHE / HF_HOME at it. After this:
|
| 186 |
+
# - preloaded blobs are accessible via hardlink (no data copy, instant reads)
|
| 187 |
+
# - relative snapshot symlinks resolve within the mirror
|
| 188 |
+
# - refs/* are byte-copies so HF lib can overwrite when commits advance
|
| 189 |
+
# - new lazy-downloaded files write to dirs we own → no permission errors
|
| 190 |
if on_spaces:
|
| 191 |
+
_mirror_preload_hf_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# Stage placeholder input files so the workflow's hard-referenced loaders
|
| 194 |
# (LoadImage/VHS_Load*) don't error at runtime even when the active mode
|
|
@@ -6,6 +6,25 @@ not commitment.
|
|
| 6 |
|
| 7 |
## Spaces / preload
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
### ~~1. Stop preloading models that aren't referenced by any workflow~~ — DONE 2026-05-02
|
| 10 |
|
| 11 |
Audit on 2026-05-02 showed two `Lightricks/LTX-2.3` files in `preload_from_hub`
|
|
|
|
| 6 |
|
| 7 |
## Spaces / preload
|
| 8 |
|
| 9 |
+
### ~~0. Re-enable `preload_from_hub` via runtime cache mirror~~ — DONE 2026-05-02
|
| 10 |
+
|
| 11 |
+
Initial preload deployment failed because HF's build pipeline writes
|
| 12 |
+
`~/.cache/huggingface/` as the build user, leaving it read-only for runtime
|
| 13 |
+
user 1000. Lazy `hf_hub_download` for non-preloaded files (GGUF, camera LoRAs)
|
| 14 |
+
failed with `Permission denied (os error 13)`. `chmod` couldn't help — we
|
| 15 |
+
don't own the inode.
|
| 16 |
+
|
| 17 |
+
Fix landed in `_bootstrap()`'s `_mirror_preload_hf_cache()`:
|
| 18 |
+
- Walks `~/.cache/huggingface/` to a parallel `~/hf-cache-rw/` we own
|
| 19 |
+
- Hardlinks `blobs/<sha>` files (zero-copy, shared inode, instant reads)
|
| 20 |
+
- Preserves relative snapshot symlinks (resolve within the mirror tree)
|
| 21 |
+
- Byte-copies `refs/<branch>` files (HF lib overwrites these on etag check)
|
| 22 |
+
- Sets `HF_HOME` + `HF_HUB_CACHE` to the mirror so HF lib uses our writable copy
|
| 23 |
+
- Falls back to symlink if `os.link()` returns EXDEV (cross-device)
|
| 24 |
+
|
| 25 |
+
Result: preloaded files are instantly available (cache hit on first generate),
|
| 26 |
+
non-preloaded files lazy-download into dirs we own (no permission errors).
|
| 27 |
+
|
| 28 |
### ~~1. Stop preloading models that aren't referenced by any workflow~~ — DONE 2026-05-02
|
| 29 |
|
| 30 |
Audit on 2026-05-02 showed two `Lightricks/LTX-2.3` files in `preload_from_hub`
|