Spaces:
Running on Zero
fix(deploy): switch model lookups to HF + fix ZeroGPU build
Browse filesMultiple issues caught in the HF Space build cycle:
1. DiffSynth defaults to ModelScope, not HF. Set DIFFSYNTH_DOWNLOAD_SOURCE=
huggingface in app.py before imports so preload_from_hub + HF cache work.
2. PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 is the ModelScope slug. On HF
the mirror is alibaba-pai/... — update models.py + README + spec + test.
3. ZeroGPU build appends spaces==0.50.0 to the install line, conflicting with
our spaces==0.30.0 pin. Drop the pin entirely — HF provides the right one.
4. DiffSynth resolves models at ./models/<repo>/, not ~/.cache/huggingface/hub.
Add models.symlink_hf_cache_to_diffsynth_layout() that creates symlinks
from cache snapshots to that layout. Wire into _bootstrap() so the preload
weights are actually findable at runtime (otherwise the Space re-downloads
all 30 GB on first call).
5. Set DIFFSYNTH_MODEL_BASE_PATH=<repo>/models in _bootstrap().
6. Add models/ to .gitignore.
7. _bootstrap() now also runs locally (just the symlink step) so the user's
existing HF cache snapshots get linked into the project's ./models/.
- .gitignore +3 -0
- README.md +1 -1
- app.py +34 -9
- docs/superpowers/specs/2026-05-13-z-image-studio-design.md +3 -3
- models.py +47 -1
- requirements.txt +2 -1
- tests/test_models.py +1 -1
|
@@ -21,6 +21,9 @@ venv/
|
|
| 21 |
!assets/seed_inputs/*.png
|
| 22 |
!assets/seed_inputs/*.jpg
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# Gradio runtime
|
| 25 |
gradio_cached_examples/
|
| 26 |
output/
|
|
|
|
| 21 |
!assets/seed_inputs/*.png
|
| 22 |
!assets/seed_inputs/*.jpg
|
| 23 |
|
| 24 |
+
# DiffSynth-local model dir (symlinks to HF cache; weights live in ~/.cache/huggingface/hub)
|
| 25 |
+
models/
|
| 26 |
+
|
| 27 |
# Gradio runtime
|
| 28 |
gradio_cached_examples/
|
| 29 |
output/
|
|
@@ -12,7 +12,7 @@ hf_oauth: false
|
|
| 12 |
preload_from_hub:
|
| 13 |
- Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
|
| 14 |
- Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
|
| 15 |
-
-
|
| 16 |
- lllyasviel/Annotators RealESRGAN_x4plus.pth
|
| 17 |
---
|
| 18 |
|
|
|
|
| 12 |
preload_from_hub:
|
| 13 |
- Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
|
| 14 |
- Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
|
| 15 |
+
- alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
|
| 16 |
- lllyasviel/Annotators RealESRGAN_x4plus.pth
|
| 17 |
---
|
| 18 |
|
|
@@ -10,10 +10,14 @@ import os
|
|
| 10 |
import random
|
| 11 |
from pathlib import Path
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
import backend
|
| 16 |
-
import lora as lora_mod
|
| 17 |
import models
|
| 18 |
import theme
|
| 19 |
import ui
|
|
@@ -21,15 +25,36 @@ import ui
|
|
| 21 |
# ----- HF Spaces bootstrap ---------------------------------------------------
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def _bootstrap() -> None:
|
| 25 |
-
"""Mirror the preload_from_hub cache
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
_bootstrap()
|
|
|
|
| 10 |
import random
|
| 11 |
from pathlib import Path
|
| 12 |
|
| 13 |
+
# DiffSynth defaults to ModelScope; force HF so preload_from_hub + HF cache work.
|
| 14 |
+
# Must be set before any diffsynth import path is taken (backend imports it lazily).
|
| 15 |
+
os.environ.setdefault("DIFFSYNTH_DOWNLOAD_SOURCE", "huggingface")
|
| 16 |
+
|
| 17 |
import gradio as gr
|
| 18 |
|
| 19 |
import backend
|
| 20 |
+
import lora as lora_mod
|
| 21 |
import models
|
| 22 |
import theme
|
| 23 |
import ui
|
|
|
|
| 25 |
# ----- HF Spaces bootstrap ---------------------------------------------------
|
| 26 |
|
| 27 |
|
| 28 |
+
_REPO_ROOT = Path(__file__).resolve().parent
|
| 29 |
+
_DIFFSYNTH_MODELS_DIR = _REPO_ROOT / "models"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def _bootstrap() -> None:
|
| 33 |
+
"""Mirror the preload_from_hub cache, then symlink snapshots into DiffSynth's
|
| 34 |
+
expected ``./models/<repo>/`` layout so the pipeline reuses preloaded weights
|
| 35 |
+
instead of re-downloading on first call.
|
| 36 |
+
|
| 37 |
+
On Spaces: cache is read-only owned by the build user → mirror to ~/hf-cache-rw
|
| 38 |
+
first, then point HF env there, then symlink into ./models.
|
| 39 |
+
|
| 40 |
+
Locally: skip the mirror (we own the dirs); just symlink from the user's HF
|
| 41 |
+
cache to ./models so DiffSynth finds the snapshots.
|
| 42 |
+
"""
|
| 43 |
+
if models.on_spaces():
|
| 44 |
+
src = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
|
| 45 |
+
dst = Path.home() / "hf-cache-rw"
|
| 46 |
+
models.mirror_preload_hf_cache(src, dst)
|
| 47 |
+
os.environ["HF_HOME"] = str(dst)
|
| 48 |
+
os.environ["HF_HUB_CACHE"] = str(dst / "hub")
|
| 49 |
+
cache_hub = dst / "hub"
|
| 50 |
+
else:
|
| 51 |
+
cache_hub = Path(os.environ.get("HF_HUB_CACHE", str(Path.home() / ".cache" / "huggingface" / "hub")))
|
| 52 |
+
|
| 53 |
+
# Point DiffSynth at our project-local models dir + symlink every cached
|
| 54 |
+
# snapshot so DiffSynth's ModelConfig finds them without re-downloading.
|
| 55 |
+
os.environ.setdefault("DIFFSYNTH_MODEL_BASE_PATH", str(_DIFFSYNTH_MODELS_DIR))
|
| 56 |
+
_DIFFSYNTH_MODELS_DIR.mkdir(exist_ok=True)
|
| 57 |
+
models.symlink_hf_cache_to_diffsynth_layout(cache_hub, _DIFFSYNTH_MODELS_DIR)
|
| 58 |
|
| 59 |
|
| 60 |
_bootstrap()
|
|
@@ -69,7 +69,7 @@ All three modes go through one `ZImagePipeline.__call__`. Mode-specific code is
|
|
| 69 |
| --- | --- | --- | --- |
|
| 70 |
| **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
|
| 71 |
| **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
|
| 72 |
-
| **ControlNet** | Turbo + `
|
| 73 |
| **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
|
| 74 |
|
| 75 |
**LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
|
|
@@ -335,7 +335,7 @@ hf_oauth: false
|
|
| 335 |
preload_from_hub:
|
| 336 |
- Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
|
| 337 |
- Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
|
| 338 |
-
-
|
| 339 |
- lllyasviel/Annotators RealESRGAN_x4plus.pth
|
| 340 |
---
|
| 341 |
```
|
|
@@ -371,7 +371,7 @@ pipe = ZImagePipeline.from_pretrained(
|
|
| 371 |
ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
|
| 372 |
# ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
|
| 373 |
# If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
|
| 374 |
-
ModelConfig(model_id="
|
| 375 |
origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
|
| 376 |
],
|
| 377 |
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),
|
|
|
|
| 69 |
| --- | --- | --- | --- |
|
| 70 |
| **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
|
| 71 |
| **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
|
| 72 |
+
| **ControlNet** | Turbo + `alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1` | `pipe(prompt, controlnet_inputs=[ControlNetInput(image=preprocessed, scale)], cfg_scale=1.0, num_inference_steps=9, sigma_shift=3.0)` | `image_z_image_turbo_fun_union_controlnet.json` |
|
| 73 |
| **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
|
| 74 |
|
| 75 |
**LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
|
|
|
|
| 335 |
preload_from_hub:
|
| 336 |
- Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
|
| 337 |
- Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
|
| 338 |
+
- alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
|
| 339 |
- lllyasviel/Annotators RealESRGAN_x4plus.pth
|
| 340 |
---
|
| 341 |
```
|
|
|
|
| 371 |
ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
|
| 372 |
# ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
|
| 373 |
# If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
|
| 374 |
+
ModelConfig(model_id="alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
|
| 375 |
origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
|
| 376 |
],
|
| 377 |
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),
|
|
@@ -77,7 +77,7 @@ MODEL_CONFIGS: tuple[ModelConfig, ...] = (
|
|
| 77 |
ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
|
| 78 |
# ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
|
| 79 |
ModelConfig(
|
| 80 |
-
"
|
| 81 |
"Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
|
| 82 |
"ControlNet Union 2.1 — canny/depth/pose",
|
| 83 |
),
|
|
@@ -151,3 +151,49 @@ def mirror_preload_hf_cache(src_root: Path | str, dst_root: Path | str) -> None:
|
|
| 151 |
dst_path.symlink_to(src_path)
|
| 152 |
else:
|
| 153 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
|
| 78 |
# ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
|
| 79 |
ModelConfig(
|
| 80 |
+
"alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
|
| 81 |
"Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
|
| 82 |
"ControlNet Union 2.1 — canny/depth/pose",
|
| 83 |
),
|
|
|
|
| 151 |
dst_path.symlink_to(src_path)
|
| 152 |
else:
|
| 153 |
raise
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def symlink_hf_cache_to_diffsynth_layout(cache_hub: Path | str, dest_root: Path | str) -> list[str]:
|
| 157 |
+
"""For each ``models--<org>--<repo>`` under ``cache_hub``, symlink the latest snapshot
|
| 158 |
+
dir to ``dest_root/<org>/<repo>/`` — the layout DiffSynth's ModelConfig expects.
|
| 159 |
+
|
| 160 |
+
DiffSynth's ``download()`` joins ``local_model_path`` with ``model_id`` and either
|
| 161 |
+
finds matching files (skipping download) or fetches them. Putting symlinks at the
|
| 162 |
+
expected location lets DiffSynth reuse our HF-cache snapshots instead of re-downloading.
|
| 163 |
+
|
| 164 |
+
Returns the list of dest paths created. Idempotent: existing valid symlinks are kept.
|
| 165 |
+
"""
|
| 166 |
+
cache_hub = Path(cache_hub)
|
| 167 |
+
dest_root = Path(dest_root)
|
| 168 |
+
if not cache_hub.is_dir():
|
| 169 |
+
return []
|
| 170 |
+
|
| 171 |
+
created: list[str] = []
|
| 172 |
+
for entry in sorted(cache_hub.iterdir()):
|
| 173 |
+
if not entry.is_dir() or not entry.name.startswith("models--"):
|
| 174 |
+
continue
|
| 175 |
+
# "models--Tongyi-MAI--Z-Image-Turbo" -> ("Tongyi-MAI", "Z-Image-Turbo")
|
| 176 |
+
# Some repos contain "--" in their name; only split off the first segment.
|
| 177 |
+
rest = entry.name[len("models--") :]
|
| 178 |
+
parts = rest.split("--", 1)
|
| 179 |
+
if len(parts) != 2:
|
| 180 |
+
continue
|
| 181 |
+
org, repo = parts
|
| 182 |
+
|
| 183 |
+
snapshots = entry / "snapshots"
|
| 184 |
+
if not snapshots.is_dir():
|
| 185 |
+
continue
|
| 186 |
+
sha_dirs = [d for d in snapshots.iterdir() if d.is_dir()]
|
| 187 |
+
if not sha_dirs:
|
| 188 |
+
continue
|
| 189 |
+
# Newest by mtime — usually the only one for our preload + first-fetch flow.
|
| 190 |
+
sha_dirs.sort(key=lambda d: d.stat().st_mtime, reverse=True)
|
| 191 |
+
snap = sha_dirs[0]
|
| 192 |
+
|
| 193 |
+
link = dest_root / org / repo
|
| 194 |
+
if link.is_symlink() or link.exists():
|
| 195 |
+
continue
|
| 196 |
+
link.parent.mkdir(parents=True, exist_ok=True)
|
| 197 |
+
link.symlink_to(snap)
|
| 198 |
+
created.append(str(link))
|
| 199 |
+
return created
|
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# Core
|
| 2 |
gradio==5.50.0
|
| 3 |
-
spaces
|
|
|
|
| 4 |
diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
|
| 5 |
torch>=2.4
|
| 6 |
safetensors>=0.4.5
|
|
|
|
| 1 |
# Core
|
| 2 |
gradio==5.50.0
|
| 3 |
+
# `spaces` is auto-injected by HF Spaces on ZeroGPU at build time (pin would conflict).
|
| 4 |
+
# Locally, install it ad-hoc if you want to test the @spaces.GPU decorator path.
|
| 5 |
diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
|
| 6 |
torch>=2.4
|
| 7 |
safetensors>=0.4.5
|
|
@@ -25,7 +25,7 @@ def test_model_configs_contains_both_transformers():
|
|
| 25 |
repos = {c.model_id for c in configs}
|
| 26 |
assert "Tongyi-MAI/Z-Image" in repos
|
| 27 |
assert "Tongyi-MAI/Z-Image-Turbo" in repos
|
| 28 |
-
assert "
|
| 29 |
|
| 30 |
|
| 31 |
def test_vram_limit_for_cuda_is_reasonable():
|
|
|
|
| 25 |
repos = {c.model_id for c in configs}
|
| 26 |
assert "Tongyi-MAI/Z-Image" in repos
|
| 27 |
assert "Tongyi-MAI/Z-Image-Turbo" in repos
|
| 28 |
+
assert "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
|
| 29 |
|
| 30 |
|
| 31 |
def test_vram_limit_for_cuda_is_reasonable():
|