Spaces:

techfreakworm
/

z-image-studio

Running on Zero

App Files Files Community

techfreakworm commited on 7 days ago

Commit

99302bc

unverified ·

1 Parent(s): 213bf15

fix(deploy): switch model lookups to HF + fix ZeroGPU build

Browse files

Multiple issues caught in the HF Space build cycle:

1. DiffSynth defaults to ModelScope, not HF. Set DIFFSYNTH_DOWNLOAD_SOURCE=
huggingface in app.py before imports so preload_from_hub + HF cache work.

2. PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 is the ModelScope slug. On HF
the mirror is alibaba-pai/... — update models.py + README + spec + test.

3. ZeroGPU build appends spaces==0.50.0 to the install line, conflicting with
our spaces==0.30.0 pin. Drop the pin entirely — HF provides the right one.

4. DiffSynth resolves models at ./models/<repo>/, not ~/.cache/huggingface/hub.
Add models.symlink_hf_cache_to_diffsynth_layout() that creates symlinks
from cache snapshots to that layout. Wire into _bootstrap() so the preload
weights are actually findable at runtime (otherwise the Space re-downloads
all 30 GB on first call).

5. Set DIFFSYNTH_MODEL_BASE_PATH=<repo>/models in _bootstrap().

6. Add models/ to .gitignore.

7. _bootstrap() now also runs locally (just the symlink step) so the user's
existing HF cache snapshots get linked into the project's ./models/.

Files changed (7) hide show

.gitignore +3 -0
README.md +1 -1
app.py +34 -9
docs/superpowers/specs/2026-05-13-z-image-studio-design.md +3 -3
models.py +47 -1
requirements.txt +2 -1
tests/test_models.py +1 -1

.gitignore CHANGED Viewed

@@ -21,6 +21,9 @@ venv/
 !assets/seed_inputs/*.png
 !assets/seed_inputs/*.jpg
 # Gradio runtime
 gradio_cached_examples/
 output/

 !assets/seed_inputs/*.png
 !assets/seed_inputs/*.jpg
+# DiffSynth-local model dir (symlinks to HF cache; weights live in ~/.cache/huggingface/hub)
+models/
 # Gradio runtime
 gradio_cached_examples/
 output/

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ hf_oauth: false
 preload_from_hub:
   - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
   - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
-  - PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
   - lllyasviel/Annotators RealESRGAN_x4plus.pth
 ---

 preload_from_hub:
   - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
   - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
+  - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
   - lllyasviel/Annotators RealESRGAN_x4plus.pth
 ---

app.py CHANGED Viewed

@@ -10,10 +10,14 @@ import os
 import random
 from pathlib import Path
 import gradio as gr
 import backend
-import lora as lora_mod  # avoid shadowing the gr.File `lora_path` name
 import models
 import theme
 import ui
@@ -21,15 +25,36 @@ import ui
 # ----- HF Spaces bootstrap ---------------------------------------------------
 def _bootstrap() -> None:
-    """Mirror the preload_from_hub cache once, then point HF env at the mirror."""
-    if not models.on_spaces():
-        return
-    src = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
-    dst = Path.home() / "hf-cache-rw"
-    models.mirror_preload_hf_cache(src, dst)
-    os.environ["HF_HOME"] = str(dst)
-    os.environ["HF_HUB_CACHE"] = str(dst / "hub")
 _bootstrap()

 import random
 from pathlib import Path
+# DiffSynth defaults to ModelScope; force HF so preload_from_hub + HF cache work.
+# Must be set before any diffsynth import path is taken (backend imports it lazily).
+os.environ.setdefault("DIFFSYNTH_DOWNLOAD_SOURCE", "huggingface")
 import gradio as gr
 import backend
+import lora as lora_mod
 import models
 import theme
 import ui
 # ----- HF Spaces bootstrap ---------------------------------------------------
+_REPO_ROOT = Path(__file__).resolve().parent
+_DIFFSYNTH_MODELS_DIR = _REPO_ROOT / "models"
 def _bootstrap() -> None:
+    """Mirror the preload_from_hub cache, then symlink snapshots into DiffSynth's
+    expected ``./models/<repo>/`` layout so the pipeline reuses preloaded weights
+    instead of re-downloading on first call.
+    On Spaces: cache is read-only owned by the build user → mirror to ~/hf-cache-rw
+    first, then point HF env there, then symlink into ./models.
+    Locally: skip the mirror (we own the dirs); just symlink from the user's HF
+    cache to ./models so DiffSynth finds the snapshots.
+    """
+    if models.on_spaces():
+        src = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
+        dst = Path.home() / "hf-cache-rw"
+        models.mirror_preload_hf_cache(src, dst)
+        os.environ["HF_HOME"] = str(dst)
+        os.environ["HF_HUB_CACHE"] = str(dst / "hub")
+        cache_hub = dst / "hub"
+    else:
+        cache_hub = Path(os.environ.get("HF_HUB_CACHE", str(Path.home() / ".cache" / "huggingface" / "hub")))
+    # Point DiffSynth at our project-local models dir + symlink every cached
+    # snapshot so DiffSynth's ModelConfig finds them without re-downloading.
+    os.environ.setdefault("DIFFSYNTH_MODEL_BASE_PATH", str(_DIFFSYNTH_MODELS_DIR))
+    _DIFFSYNTH_MODELS_DIR.mkdir(exist_ok=True)
+    models.symlink_hf_cache_to_diffsynth_layout(cache_hub, _DIFFSYNTH_MODELS_DIR)
 _bootstrap()

docs/superpowers/specs/2026-05-13-z-image-studio-design.md CHANGED Viewed

@@ -69,7 +69,7 @@ All three modes go through one `ZImagePipeline.__call__`. Mode-specific code is
 | --- | --- | --- | --- |
 | **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
 | **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
-| **ControlNet** | Turbo + `PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1` | `pipe(prompt, controlnet_inputs=[ControlNetInput(image=preprocessed, scale)], cfg_scale=1.0, num_inference_steps=9, sigma_shift=3.0)` | `image_z_image_turbo_fun_union_controlnet.json` |
 | **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
 **LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
@@ -335,7 +335,7 @@ hf_oauth: false
 preload_from_hub:
   - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
   - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
-  - PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
   - lllyasviel/Annotators RealESRGAN_x4plus.pth
 ---
 ```
@@ -371,7 +371,7 @@ pipe = ZImagePipeline.from_pretrained(
         ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
         # ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
         # If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
-        ModelConfig(model_id="PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
                     origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
     ],
     tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),

 | --- | --- | --- | --- |
 | **T2I (Base)** | `Tongyi-MAI/Z-Image` | `pipe(prompt, negative_prompt, cfg_scale=4.0, num_inference_steps=25, sigma_shift=3.0, height, width, seed)` | `image_z_image.json` |
 | **T2I (Turbo)** | `Tongyi-MAI/Z-Image-Turbo` | `pipe(prompt, cfg_scale=1.0, num_inference_steps=8, sigma_shift=3.0, height, width, seed)` | `image_z_image_turbo.json` |
+| **ControlNet** | Turbo + `alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1` | `pipe(prompt, controlnet_inputs=[ControlNetInput(image=preprocessed, scale)], cfg_scale=1.0, num_inference_steps=9, sigma_shift=3.0)` | `image_z_image_turbo_fun_union_controlnet.json` |
 | **Upscale** | Turbo + RealESRGAN_x4plus | `RealESRGAN_x4(input) → PIL.resize 0.5 → pipe(prompt="masterpiece, 8k", input_image=upscaled, denoising_strength=0.33, num_inference_steps=5, cfg_scale=1.0, sigma_shift=3.0)` | `utility_z_image_turbo_2k_upscaler.json` |
 **LoRA wiring:** validated `safetensors` file + `gr.Slider(0.0, 1.5, value=0.8)` strength. Applied via DiffSynth's `merge_lora` inside an apply/revert context manager so the cached GPU model returns to a clean state after each request. Safetensors header sniffed before `@spaces.GPU` fires to reject mismatched LoRAs with a clear error (no GPU slot wasted).
 preload_from_hub:
   - Tongyi-MAI/Z-Image transformer/diffusion_pytorch_model.safetensors,text_encoder/*.safetensors,vae/diffusion_pytorch_model.safetensors,tokenizer/*
   - Tongyi-MAI/Z-Image-Turbo transformer/diffusion_pytorch_model.safetensors
+  - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1 Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors
   - lllyasviel/Annotators RealESRGAN_x4plus.pth
 ---
 ```
         ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_cfg),
         # ControlNet — eager preload at boot to avoid first-ControlNet-call wait.
         # If startup RAM becomes tight on Spaces, move this to a lazy-load on first ControlNet request.
+        ModelConfig(model_id="alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
                     origin_file_pattern="Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors", **vram_cfg),
     ],
     tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="tokenizer/"),

models.py CHANGED Viewed

@@ -77,7 +77,7 @@ MODEL_CONFIGS: tuple[ModelConfig, ...] = (
     ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
     # ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
     ModelConfig(
-        "PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
         "Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
         "ControlNet Union 2.1 — canny/depth/pose",
     ),
@@ -151,3 +151,49 @@ def mirror_preload_hf_cache(src_root: Path | str, dst_root: Path | str) -> None:
                     dst_path.symlink_to(src_path)
                 else:
                     raise

     ModelConfig("Tongyi-MAI/Z-Image-Turbo", "transformer/*.safetensors", "Z-Image-Turbo transformer (8 steps, cfg=1)"),
     # ControlNet Union 2.1 (eager preload per spec; can move to lazy if RAM is tight)
     ModelConfig(
+        "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1",
         "Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.safetensors",
         "ControlNet Union 2.1 — canny/depth/pose",
     ),
                     dst_path.symlink_to(src_path)
                 else:
                     raise
+def symlink_hf_cache_to_diffsynth_layout(cache_hub: Path | str, dest_root: Path | str) -> list[str]:
+    """For each ``models--<org>--<repo>`` under ``cache_hub``, symlink the latest snapshot
+    dir to ``dest_root/<org>/<repo>/`` — the layout DiffSynth's ModelConfig expects.
+    DiffSynth's ``download()`` joins ``local_model_path`` with ``model_id`` and either
+    finds matching files (skipping download) or fetches them. Putting symlinks at the
+    expected location lets DiffSynth reuse our HF-cache snapshots instead of re-downloading.
+    Returns the list of dest paths created. Idempotent: existing valid symlinks are kept.
+    """
+    cache_hub = Path(cache_hub)
+    dest_root = Path(dest_root)
+    if not cache_hub.is_dir():
+        return []
+    created: list[str] = []
+    for entry in sorted(cache_hub.iterdir()):
+        if not entry.is_dir() or not entry.name.startswith("models--"):
+            continue
+        # "models--Tongyi-MAI--Z-Image-Turbo" -> ("Tongyi-MAI", "Z-Image-Turbo")
+        # Some repos contain "--" in their name; only split off the first segment.
+        rest = entry.name[len("models--") :]
+        parts = rest.split("--", 1)
+        if len(parts) != 2:
+            continue
+        org, repo = parts
+        snapshots = entry / "snapshots"
+        if not snapshots.is_dir():
+            continue
+        sha_dirs = [d for d in snapshots.iterdir() if d.is_dir()]
+        if not sha_dirs:
+            continue
+        # Newest by mtime — usually the only one for our preload + first-fetch flow.
+        sha_dirs.sort(key=lambda d: d.stat().st_mtime, reverse=True)
+        snap = sha_dirs[0]
+        link = dest_root / org / repo
+        if link.is_symlink() or link.exists():
+            continue
+        link.parent.mkdir(parents=True, exist_ok=True)
+        link.symlink_to(snap)
+        created.append(str(link))
+    return created

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 # Core
 gradio==5.50.0
-spaces==0.30.0
 diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
 torch>=2.4
 safetensors>=0.4.5

 # Core
 gradio==5.50.0
+# `spaces` is auto-injected by HF Spaces on ZeroGPU at build time (pin would conflict).
+# Locally, install it ad-hoc if you want to test the @spaces.GPU decorator path.
 diffsynth @ git+https://github.com/modelscope/DiffSynth-Studio.git
 torch>=2.4
 safetensors>=0.4.5

tests/test_models.py CHANGED Viewed

@@ -25,7 +25,7 @@ def test_model_configs_contains_both_transformers():
     repos = {c.model_id for c in configs}
     assert "Tongyi-MAI/Z-Image" in repos
     assert "Tongyi-MAI/Z-Image-Turbo" in repos
-    assert "PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
 def test_vram_limit_for_cuda_is_reasonable():

     repos = {c.model_id for c in configs}
     assert "Tongyi-MAI/Z-Image" in repos
     assert "Tongyi-MAI/Z-Image-Turbo" in repos
+    assert "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.1" in repos
 def test_vram_limit_for_cuda_is_reasonable():