techfreakworm commited on
Commit
99302bc
·
unverified ·
1 Parent(s): 213bf15

fix(deploy): switch model lookups to HF + fix ZeroGPU build

Browse files

Multiple issues caught in the HF Space build cycle:

1. DiffSynth defaults to ModelScope, not HF. Set DIFFSYNTH_DOWNLOAD_SOURCE=
huggingface in app.py before imports so preload_from_hub + HF cache work.

2. PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 is the ModelScope slug. On HF
the mirror is alibaba-pai/... — update models.py + README + spec + test.

3. ZeroGPU build appends spaces==0.50.0 to the install line, conflicting with
our spaces==0.30.0 pin. Drop the pin entirely — HF provides the right one.

4. DiffSynth resolves models at ./models/<repo>/, not ~/.cache/huggingface/hub.
Add models.symlink_hf_cache_to_diffsynth_layout() that creates symlinks
from cache snapshots to that layout. Wire into _bootstrap() so the preload
weights are actually findable at runtime (otherwise the Space re-downloads
all 30 GB on first call).

5. Set DIFFSYNTH_MODEL_BASE_PATH=<repo>/models in _bootstrap().

6. Add models/ to .gitignore.

7. _bootstrap() now also runs locally (just the symlink step) so the user's
existing HF cache snapshots get linked into the project's ./models/.

.gitignore CHANGED
@@ -21,6 +21,9 @@ venv/
21
  !assets/seed_inputs/*.png
22
  !assets/seed_inputs/*.jpg
23
 
 
 
 
24
  # Gradio runtime
25
  gradio_cached_examples/
26
  output/
 
21
  !assets/seed_inputs/*.png
22
  !assets/seed_inputs/*.jpg
23
 
24
+ # DiffSynth-local model dir (symlinks to HF cache; weights live in ~/.cache/huggingface/hub)
25
+ models/
26
+
27
  # Gradio runtime
28
  gradio_cached_examples/
29
  output/
README.md CHANGED
@@ -12,7 +12,7 @@ hf_oauth: false
12
  preload_from_hub:
13
  - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
14
  - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
15
- - PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
16
  - lllyasviel/Annotators RealESRGAN_x4plus.pth
17
  ---
18
 
 
12
  preload_from_hub:
13
  - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
14
  - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
15
+ - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
16
  - lllyasviel/Annotators RealESRGAN_x4plus.pth
17
  ---
18
 
app.py CHANGED
@@ -10,10 +10,14 @@ import os
10
  import random
11
  from pathlib import Path
12
 
 
 
 
 
13
  import gradio as gr
14
 
15
  import backend
16
- import lora as lora_mod # avoid shadowing the gr.File `lora_path` name
17
  import models
18
  import theme
19
  import ui
@@ -21,15 +25,36 @@ import ui
21
  # ----- HF Spaces bootstrap ---------------------------------------------------
22
 
23
 
 
 
 
 
24
  def _bootstrap() -> None:
25
- """Mirror the preload_from_hub cache once, then point HF env at the mirror."""
26
- if not models.on_spaces():
27
- return
28
- src = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
29
- dst = Path.home() / "hf-cache-rw"
30
- models.mirror_preload_hf_cache(src, dst)
31
- os.environ["HF_HOME"] = str(dst)
32
- os.environ["HF_HUB_CACHE"] = str(dst / "hub")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
  _bootstrap()
 
10
  import random
11
  from pathlib import Path
12
 
13
+ # DiffSynth defaults to ModelScope; force HF so preload_from_hub + HF cache work.
14
+ # Must be set before any diffsynth import path is taken (backend imports it lazily).
15
+ os.environ.setdefault("DIFFSYNTH_DOWNLOAD_SOURCE", "huggingface")
16
+
17
  import gradio as gr
18
 
19
  import backend
20
+ import lora as lora_mod
21
  import models
22
  import theme
23
  import ui
 
25
  # ----- HF Spaces bootstrap ---------------------------------------------------
26
 
27
 
28
+ _REPO_ROOT = Path(__file__).resolve().parent
29
+ _DIFFSYNTH_MODELS_DIR = _REPO_ROOT / "models"
30
+
31
+
32
  def _bootstrap() -> None:
33
+ """Mirror the preload_from_hub cache, then symlink snapshots into DiffSynth's
34
+ expected ``./models/<repo>/`` layout so the pipeline reuses preloaded weights
35
+ instead of re-downloading on first call.
36
+
37
+ On Spaces: cache is read-only owned by the build user → mirror to ~/hf-cache-rw
38
+ first, then point HF env there, then symlink into ./models.
39
+
40
+ Locally: skip the mirror (we own the dirs); just symlink from the user's HF
41
+ cache to ./models so DiffSynth finds the snapshots.
42
+ """
43
+ if models.on_spaces():
44
+ src = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
45
+ dst = Path.home() / "hf-cache-rw"
46
+ models.mirror_preload_hf_cache(src, dst)
47
+ os.environ["HF_HOME"] = str(dst)
48
+ os.environ["HF_HUB_CACHE"] = str(dst / "hub")
49
+ cache_hub = dst / "hub"
50
+ else:
51
+ cache_hub = Path(os.environ.get("HF_HUB_CACHE", str(Path.home() / ".cache" / "huggingface" / "hub")))
52
+
53
+ # Point DiffSynth at our project-local models dir + symlink every cached
54
+ # snapshot so DiffSynth's ModelConfig finds them without re-downloading.
55
+ os.environ.setdefault("DIFFSYNTH_MODEL_BASE_PATH", str(_DIFFSYNTH_MODELS_DIR))
56
+ _DIFFSYNTH_MODELS_DIR.mkdir(exist_ok=True)
57
+ models.symlink_hf_cache_to_diffsynth_layout(cache_hub, _DIFFSYNTH_MODELS_DIR)
58
 
59
 
60
  _bootstrap()
docs/superpowers/specs/2026-05-13-z-image-studio-design.md CHANGED
@@ -69,7 +69,7 @@ All three modes go through one `ZImagePipeline.__call__`. Mode-specific code is
69
  | --- | --- | --- | --- |
70
  | **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
71
  | **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
72
- | **ControlNet** | Turbo + `PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1` | `pipe(prompt, controlnet_inputs=[ControlNetInput(image=preprocessed, scale)], cfg_scale=1.0, num_inference_steps=9, sigma_shift=3.0)` | `image_z_image_turbo_fun_union_controlnet.json` |
73
  | **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
74
 
75
  **LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
@@ -335,7 +335,7 @@ hf_oauth: false
335
  preload_from_hub:
336
  - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
337
  - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
338
- - PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
339
  - lllyasviel/Annotators RealESRGAN_x4plus.pth
340
  ---
341
  ```
@@ -371,7 +371,7 @@ pipe = ZImagePipeline.from_pretrained(
371
  ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
372
  # ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
373
  # If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
374
- ModelConfig(model_id="PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
375
  origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
376
  ],
377
  tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),
 
69
  | --- | --- | --- | --- |
70
  | **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
71
  | **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
72
+ | **ControlNet** | Turbo + `alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1` | `pipe(prompt, controlnet_inputs=[ControlNetInput(image=preprocessed, scale)], cfg_scale=1.0, num_inference_steps=9, sigma_shift=3.0)` | `image_z_image_turbo_fun_union_controlnet.json` |
73
  | **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
74
 
75
  **LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
 
335
  preload_from_hub:
336
  - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
337
  - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
338
+ - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
339
  - lllyasviel/Annotators RealESRGAN_x4plus.pth
340
  ---
341
  ```
 
371
  ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
372
  # ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
373
  # If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
374
+ ModelConfig(model_id="alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
375
  origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
376
  ],
377
  tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),
models.py CHANGED
@@ -77,7 +77,7 @@ MODEL_CONFIGS: tuple[ModelConfig, ...] = (
77
  ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
78
  # ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
79
  ModelConfig(
80
- "PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
81
  "Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
82
  "ControlNet Union 2.1 — canny/depth/pose",
83
  ),
@@ -151,3 +151,49 @@ def mirror_preload_hf_cache(src_root: Path | str, dst_root: Path | str) -> None:
151
  dst_path.symlink_to(src_path)
152
  else:
153
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
78
  # ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
79
  ModelConfig(
80
+ "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
81
  "Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
82
  "ControlNet Union 2.1 — canny/depth/pose",
83
  ),
 
151
  dst_path.symlink_to(src_path)
152
  else:
153
  raise
154
+
155
+
156
+ def symlink_hf_cache_to_diffsynth_layout(cache_hub: Path | str, dest_root: Path | str) -> list[str]:
157
+ """For each ``models--<org>--<repo>`` under ``cache_hub``, symlink the latest snapshot
158
+ dir to ``dest_root/<org>/<repo>/`` — the layout DiffSynth's ModelConfig expects.
159
+
160
+ DiffSynth's ``download()`` joins ``local_model_path`` with ``model_id`` and either
161
+ finds matching files (skipping download) or fetches them. Putting symlinks at the
162
+ expected location lets DiffSynth reuse our HF-cache snapshots instead of re-downloading.
163
+
164
+ Returns the list of dest paths created. Idempotent: existing valid symlinks are kept.
165
+ """
166
+ cache_hub = Path(cache_hub)
167
+ dest_root = Path(dest_root)
168
+ if not cache_hub.is_dir():
169
+ return []
170
+
171
+ created: list[str] = []
172
+ for entry in sorted(cache_hub.iterdir()):
173
+ if not entry.is_dir() or not entry.name.startswith("models--"):
174
+ continue
175
+ # "models--Tongyi-MAI--Z-Image-Turbo" -> ("Tongyi-MAI", "Z-Image-Turbo")
176
+ # Some repos contain "--" in their name; only split off the first segment.
177
+ rest = entry.name[len("models--") :]
178
+ parts = rest.split("--", 1)
179
+ if len(parts) != 2:
180
+ continue
181
+ org, repo = parts
182
+
183
+ snapshots = entry / "snapshots"
184
+ if not snapshots.is_dir():
185
+ continue
186
+ sha_dirs = [d for d in snapshots.iterdir() if d.is_dir()]
187
+ if not sha_dirs:
188
+ continue
189
+ # Newest by mtime — usually the only one for our preload + first-fetch flow.
190
+ sha_dirs.sort(key=lambda d: d.stat().st_mtime, reverse=True)
191
+ snap = sha_dirs[0]
192
+
193
+ link = dest_root / org / repo
194
+ if link.is_symlink() or link.exists():
195
+ continue
196
+ link.parent.mkdir(parents=True, exist_ok=True)
197
+ link.symlink_to(snap)
198
+ created.append(str(link))
199
+ return created
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  # Core
2
  gradio==5.50.0
3
- spaces==0.30.0
 
4
  diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
5
  torch>=2.4
6
  safetensors>=0.4.5
 
1
  # Core
2
  gradio==5.50.0
3
+ # `spaces` is auto-injected by HF Spaces on ZeroGPU at build time (pin would conflict).
4
+ # Locally, install it ad-hoc if you want to test the @spaces.GPU decorator path.
5
  diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
6
  torch>=2.4
7
  safetensors>=0.4.5
tests/test_models.py CHANGED
@@ -25,7 +25,7 @@ def test_model_configs_contains_both_transformers():
25
  repos = {c.model_id for c in configs}
26
  assert "Tongyi-MAI/Z-Image" in repos
27
  assert "Tongyi-MAI/Z-Image-Turbo" in repos
28
- assert "PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
29
 
30
 
31
  def test_vram_limit_for_cuda_is_reasonable():
 
25
  repos = {c.model_id for c in configs}
26
  assert "Tongyi-MAI/Z-Image" in repos
27
  assert "Tongyi-MAI/Z-Image-Turbo" in repos
28
+ assert "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
29
 
30
 
31
  def test_vram_limit_for_cuda_is_reasonable():