Spaces:

techfreakworm
/

z-image-studio

Running on Zero

App Files Files Community

techfreakworm commited on 7 days ago

Commit

0cf8ffc

unverified ·

1 Parent(s): 5ab6428

fix: pool-stashed transformer swap + MPS-safe vram + corrected model-zoo anchor

Browse files

Three coupled fixes uncovered by the first real Generate click locally:

1. AttributeError: 'ZImagePipeline' object has no attribute 'model_pool'
DiffSynth's from_pretrained builds a fresh ModelPool, attaches pipe.dit/
pipe.text_encoder/etc. from it, then discards the pool. My _swap_transformer
in modes.py assumed pipe.model_pool persisted. Replace from_pretrained call
with a manual replication that stashes the pool on pipe._zis_pool, then
index into pool.model for the two z_image_dit entries (Base loaded first,
Turbo second per MODEL_CONFIGS order). fetch_model can't distinguish them
since both register under the same name.

2. AttributeError: module 'torch.mps' has no attribute 'mem_get_info'
DiffSynth's AutoWrappedModule.forward gates module load on check_free_vram
(vram/layers.py:195), which calls torch.<device>.mem_get_info — CUDA-only.
The escape hatch is vram_limit=None, which short-circuits the gate. Update
models.vram_limit_for('mps') -> None (was a positive float).

3. Preemptively set PYTORCH_ENABLE_MPS_FALLBACK=1 in app.py so any other
MPS-unsupported op (SDPA variants, certain index ops) falls back to CPU
instead of crashing the request.

4. Corrected the Z-Image Model Zoo anchor — README heading has a leading
emoji that GitHub renders as a '-' prefix, so the URL is #-model-zoo.

Tests: 68 passing, ruff clean. Validated locally; HF Space will rebuild on push.

Files changed (10) hide show

app.py +5 -0
backend.py +37 -9
docs/superpowers/plans/2026-05-13-z-image-studio.md +2 -2
docs/superpowers/specs/2026-05-13-z-image-studio-design.md +2 -2
models.py +14 -12
modes.py +16 -2
tests/test_models.py +5 -3
tests/test_modes.py +22 -14
tests/test_ui.py +1 -1
ui.py +1 -1

app.py CHANGED Viewed

@@ -14,6 +14,11 @@ from pathlib import Path
 # Must be set before any diffsynth import path is taken (backend imports it lazily).
 os.environ.setdefault("DIFFSYNTH_DOWNLOAD_SOURCE", "huggingface")
 import gradio as gr
 import backend

 # Must be set before any diffsynth import path is taken (backend imports it lazily).
 os.environ.setdefault("DIFFSYNTH_DOWNLOAD_SOURCE", "huggingface")
+# Apple Silicon: let PyTorch fall back to CPU for the small set of ops MPS doesn't
+# implement (some scaled-dot-product flavors, certain index ops). Without this,
+# DiffSynth crashes mid-pipeline on the first unsupported op rather than degrading.
+os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
 import gradio as gr
 import backend

backend.py CHANGED Viewed

@@ -61,9 +61,18 @@ _GPU = (
 def _build_pipeline() -> Any:
-    """Construct the DiffSynth ZImagePipeline. Imported lazily to keep tests fast."""
     import torch
     from diffsynth.pipelines.z_image import ZImagePipeline
     import models
@@ -81,16 +90,35 @@ def _build_pipeline() -> Any:
             computation_device=device,
         )
-    pipe = ZImagePipeline.from_pretrained(
-        torch_dtype=torch.bfloat16,
-        device=device,
-        model_configs=models.build_diffsynth_configs(vram_cfg=vram_cfg),
-        tokenizer_config=models.build_diffsynth_configs(
-            (models.TOKENIZER_CONFIG,),
-            vram_cfg=None,
-        )[0],
         vram_limit=models.vram_limit_for(device),
     )
     return pipe

 def _build_pipeline() -> Any:
+    """Construct a ZImagePipeline carrying BOTH Base and Turbo transformers.
+    DiffSynth's ``ZImagePipeline.from_pretrained`` builds a fresh ``ModelPool``
+    locally and throws it away after attaching ``pipe.dit`` etc. — so a later
+    transformer swap has nothing to switch between. We replicate the same
+    initialization manually and keep the pool on ``pipe._zis_pool`` so
+    :func:`modes._swap_transformer` can flip ``pipe.dit`` between the two
+    ``z_image_dit`` entries (Base loaded first, Turbo second per MODEL_CONFIGS).
+    """
     import torch
     from diffsynth.pipelines.z_image import ZImagePipeline
+    from transformers import AutoTokenizer
     import models
             computation_device=device,
         )
+    pipe = ZImagePipeline(device=device, torch_dtype=torch.bfloat16)
+    # Load every safetensors listed in MODEL_CONFIGS — both transformers + shared
+    # text encoder + VAE + controlnet — into one pool.
+    pool = pipe.download_and_load_models(
+        models.build_diffsynth_configs(vram_cfg=vram_cfg),
         vram_limit=models.vram_limit_for(device),
     )
+    pipe._zis_pool = pool
+    pipe.text_encoder = pool.fetch_model("z_image_text_encoder")
+    pipe.dit = pool.fetch_model("z_image_dit")  # first match = Base per load order
+    pipe.vae_encoder = pool.fetch_model("flux_vae_encoder")
+    pipe.vae_decoder = pool.fetch_model("flux_vae_decoder")
+    pipe.controlnet = pool.fetch_model("z_image_controlnet")
+    # Optional image encoders that DiffSynth's ZImagePipeline references but
+    # aren't in our preload (Omni / image2lora). fetch_model returns None when
+    # absent — that's the documented "not an error" path.
+    pipe.image_encoder = pool.fetch_model("siglip_vision_model_428m")
+    pipe.siglip2_image_encoder = pool.fetch_model("siglip2_image_encoder")
+    pipe.dinov3_image_encoder = pool.fetch_model("dinov3_image_encoder")
+    pipe.image2lora_style = pool.fetch_model("z_image_image2lora_style")
+    # Tokenizer (Qwen3-4B tokenizer dir under Z-Image)
+    tok_cfg = models.build_diffsynth_configs((models.TOKENIZER_CONFIG,), vram_cfg=None)[0]
+    tok_cfg.download_if_necessary()
+    pipe.tokenizer = AutoTokenizer.from_pretrained(tok_cfg.path)
+    pipe.vram_management_enabled = pipe.check_vram_management_state()
     return pipe

docs/superpowers/plans/2026-05-13-z-image-studio.md CHANGED Viewed

@@ -2892,7 +2892,7 @@ def test_model_selector_html_marks_current_as_on():
 def test_model_selector_html_includes_both_soon_cards_with_github_link():
     out = ui.model_selector_html(current="Turbo")
-    assert out.count("github.com/Tongyi-MAI/Z-Image#model-zoo") == 2
     assert "Edit" in out
     assert "Omni Base" in out
     assert "soon-tag" in out
@@ -2920,7 +2920,7 @@ from __future__ import annotations
 from html import escape
-GITHUB_MODEL_ZOO_URL = "https://github.com/Tongyi-MAI/Z-Image#model-zoo"
 def labeled_label(text: str, info_text: str) -> str:

 def test_model_selector_html_includes_both_soon_cards_with_github_link():
     out = ui.model_selector_html(current="Turbo")
+    assert out.count("github.com/Tongyi-MAI/Z-Image#-model-zoo") == 2
     assert "Edit" in out
     assert "Omni Base" in out
     assert "soon-tag" in out
 from html import escape
+GITHUB_MODEL_ZOO_URL = "https://github.com/Tongyi-MAI/Z-Image#-model-zoo"
 def labeled_label(text: str, info_text: str) -> str:

docs/superpowers/specs/2026-05-13-z-image-studio-design.md CHANGED Viewed

@@ -195,14 +195,14 @@ The T2I tab's Model selector replaces `gr.Radio` with a custom HTML grid because
     <span class="dot"></span><span class="name">Turbo</span>
   </button>
   <a class="zis-model soon"
-     href="https://github.com/Tongyi-MAI/Z-Image#model-zoo"
      target="_blank" rel="noopener noreferrer">
     <span class="dot"></span>
     <span class="name">Edit<span class="ext">↗</span></span>
     <span class="soon-tag">soon</span>
   </a>
   <a class="zis-model soon"
-     href="https://github.com/Tongyi-MAI/Z-Image#model-zoo"
      target="_blank" rel="noopener noreferrer">
     <span class="dot"></span>
     <span class="name">Omni Base<span class="ext">↗</span></span>

     <span class="dot"></span><span class="name">Turbo</span>
   </button>
   <a class="zis-model soon"
+     href="https://github.com/Tongyi-MAI/Z-Image#-model-zoo"
      target="_blank" rel="noopener noreferrer">
     <span class="dot"></span>
     <span class="name">Edit<span class="ext">↗</span></span>
     <span class="soon-tag">soon</span>
   </a>
   <a class="zis-model soon"
+     href="https://github.com/Tongyi-MAI/Z-Image#-model-zoo"
      target="_blank" rel="noopener noreferrer">
     <span class="dot"></span>
     <span class="name">Omni Base<span class="ext">↗</span></span>

models.py CHANGED Viewed

@@ -26,27 +26,29 @@ def auto_device() -> str:
     return "cpu"
-def vram_limit_for(device: str, free_gb: float | None = None) -> float:
     """Conservative VRAM limit (GB) passed to DiffSynth's vram_management.
-    - CUDA: keep ~5% headroom (loaded models + scratch).
-    - MPS: half of unified memory (CPU still needs RAM), capped.
     - CPU: 0.0 (no offload budget).
     """
     if device == "cpu":
         return 0.0
     if free_gb is None:
         import torch
-        if device == "cuda":
-            free_gb = torch.cuda.mem_get_info()[1] / (1024**3)
-        else:  # mps
-            # torch.mps has no mem_get_info on most builds; fall back to a safe constant.
-            free_gb = 24.0
-    if device == "mps":
-        # Use half of unified memory; clamp to 8 GB floor for safety.
-        return max(8.0, free_gb / 2)
-    # cuda
     return max(8.0, free_gb - 4.0)

     return "cpu"
+def vram_limit_for(device: str, free_gb: float | None = None) -> float | None:
     """Conservative VRAM limit (GB) passed to DiffSynth's vram_management.
+    - CUDA: keep a few GB headroom (loaded models + scratch).
+    - MPS: ``None`` — PyTorch's MPS has no ``mem_get_info`` API, and DiffSynth's
+      ``check_free_vram`` raises AttributeError when called on MPS. Returning
+      ``None`` short-circuits the check (``vram/layers.py:195``) so module
+      swapping still works without the gate.
     - CPU: 0.0 (no offload budget).
     """
     if device == "cpu":
         return 0.0
+    if device == "mps":
+        # PyTorch's MPS backend has no ``torch.mps.mem_get_info``. DiffSynth's
+        # ``AutoWrappedModule.check_free_vram`` calls it and raises AttributeError.
+        # Returning None short-circuits the gate at vram/layers.py:195 so we keep
+        # CPU↔MPS module swapping (offload/onload) without the doomed check.
+        return None
+    # cuda
     if free_gb is None:
         import torch
+        free_gb = torch.cuda.mem_get_info()[1] / (1024**3)
     return max(8.0, free_gb - 4.0)

modes.py CHANGED Viewed

@@ -36,9 +36,23 @@ class T2IParams(TypedDict, total=False):
 def _swap_transformer(pipe: Any, model_name: str) -> None:
-    """Swap the active transformer in the pipeline's model pool."""
     variant = "z_image" if model_name == "Base" else "z_image_turbo"
-    pipe.dit = pipe.model_pool.fetch_model("z_image_dit", variant=variant)
     try:
         pipe.dit._zis_variant = variant
     except (AttributeError, RuntimeError):

 def _swap_transformer(pipe: Any, model_name: str) -> None:
+    """Swap the active transformer between Base (index 0) and Turbo (index 1).
+    ``backend._build_pipeline`` loads both transformers into ``pipe._zis_pool``
+    and stores them under the same name ``z_image_dit``. DiffSynth's
+    ``ModelPool.fetch_model`` doesn't expose a variant kwarg — both entries
+    share the same name — so we index into ``pool.model`` directly. MODEL_CONFIGS
+    loads Base first, then Turbo (so index 0 = Base, index 1 = Turbo).
+    No-op if the pool is unavailable (e.g. mocked tests) or only one transformer
+    was loaded.
+    """
     variant = "z_image" if model_name == "Base" else "z_image_turbo"
+    pool = getattr(pipe, "_zis_pool", None)
+    if pool is not None:
+        dits = [m for m, n in zip(pool.model, pool.model_name, strict=False) if n == "z_image_dit"]
+        if len(dits) >= 2:
+            pipe.dit = dits[0 if model_name == "Base" else 1]
     try:
         pipe.dit._zis_variant = variant
     except (AttributeError, RuntimeError):

tests/test_models.py CHANGED Viewed

@@ -33,9 +33,11 @@ def test_vram_limit_for_cuda_is_reasonable():
     assert 60.0 <= limit <= 80.0  # leave headroom
-def test_vram_limit_for_mps_is_unified_memory_aware():
-    limit = models.vram_limit_for("mps", free_gb=24.0)
-    assert 12.0 <= limit <= 22.0  # half of unified, headroom
 def test_vram_limit_for_cpu_is_zero():

     assert 60.0 <= limit <= 80.0  # leave headroom
+def test_vram_limit_for_mps_returns_none():
+    # MPS has no torch.mps.mem_get_info; DiffSynth's check_free_vram crashes
+    # on a numeric limit. None short-circuits the check (vram/layers.py:195).
+    assert models.vram_limit_for("mps", free_gb=24.0) is None
+    assert models.vram_limit_for("mps") is None
 def test_vram_limit_for_cpu_is_zero():

tests/test_modes.py CHANGED Viewed

@@ -68,25 +68,33 @@ def test_t2i_base_passes_negative_prompt_and_cfg4(fake_pipe):
     assert kwargs["num_inference_steps"] == 25
-def test_t2i_swaps_transformer_via_model_pool(fake_pipe):
     modes.call_t2i(
         fake_pipe,
         params=dict(
-            prompt="x",
-            negative_prompt="",
-            model="Base",
-            steps=25,
-            cfg=4.0,
-            width=1024,
-            height=1024,
-            seed=0,
-            lora_path=None,
-            lora_strength=0.0,
         ),
     )
-    fake_pipe.model_pool.fetch_model.assert_called()
-    call = fake_pipe.model_pool.fetch_model.call_args
-    assert call.args[0] == "z_image_dit"
 def test_controlnet_calls_preprocessor_then_pipeline(fake_pipe, monkeypatch):

     assert kwargs["num_inference_steps"] == 25
+def test_t2i_swaps_transformer_via_pool_index(fake_pipe):
+    """Base picks pool.model[0]; Turbo picks pool.model[1] (load-order indexed)."""
+    base_dit = object()
+    turbo_dit = object()
+    # Two z_image_dit entries in load order: Base first, Turbo second.
+    fake_pipe._zis_pool.model = [base_dit, turbo_dit, "vae_decoder_obj"]
+    fake_pipe._zis_pool.model_name = ["z_image_dit", "z_image_dit", "flux_vae_decoder"]
     modes.call_t2i(
         fake_pipe,
         params=dict(
+            prompt="x", negative_prompt="", model="Base",
+            steps=25, cfg=4.0, width=1024, height=1024, seed=0,
+            lora_path=None, lora_strength=0.0,
+        ),
+    )
+    assert fake_pipe.dit is base_dit
+    modes.call_t2i(
+        fake_pipe,
+        params=dict(
+            prompt="x", negative_prompt="", model="Turbo",
+            steps=8, cfg=1.0, width=1024, height=1024, seed=0,
+            lora_path=None, lora_strength=0.0,
         ),
     )
+    assert fake_pipe.dit is turbo_dit
 def test_controlnet_calls_preprocessor_then_pipeline(fake_pipe, monkeypatch):

tests/test_ui.py CHANGED Viewed

@@ -28,7 +28,7 @@ def test_model_selector_html_marks_current_as_on():
 def test_model_selector_html_includes_both_soon_cards_with_github_link():
     out = ui.model_selector_html(current="Turbo")
-    assert out.count("github.com/Tongyi-MAI/Z-Image#model-zoo") == 2
     assert "Edit" in out
     assert "Omni Base" in out
     assert "soon-tag" in out

 def test_model_selector_html_includes_both_soon_cards_with_github_link():
     out = ui.model_selector_html(current="Turbo")
+    assert out.count("github.com/Tongyi-MAI/Z-Image#-model-zoo") == 2
     assert "Edit" in out
     assert "Omni Base" in out
     assert "soon-tag" in out

ui.py CHANGED Viewed

@@ -9,7 +9,7 @@ import gradio as gr
 import preprocessors
 from tooltips import TOOLTIPS
-GITHUB_MODEL_ZOO_URL = "https://github.com/Tongyi-MAI/Z-Image#model-zoo"
 def labeled_label(text: str, info_text: str) -> str:

 import preprocessors
 from tooltips import TOOLTIPS
+GITHUB_MODEL_ZOO_URL = "https://github.com/Tongyi-MAI/Z-Image#-model-zoo"
 def labeled_label(text: str, info_text: str) -> str: