Spaces:

techfreakworm
/

z-image-studio

Running on Zero

App Files Files Community

techfreakworm commited on 8 days ago

Commit

296faa9

unverified ·

1 Parent(s): 6d862f4

fix(upscale): pass post-upscale dims to pipe + LoRA label hints model compat

Browse files

Two coupled UX fixes from live testing:

1. Upscale generation crashed with 'size of tensor a (256) must match size of
tensor b (128) at non-singleton dimension 3' in DiffSynth's add_noise.
RealESRGAN x4 + 0.5 resize yields a 2x image (1024 -> 2048). The VAE-
encoded input latents were 256x256 but DiffSynth's noise initializer used
the default 1024 height/width, producing 128x128 noise. call_upscale now
passes height=upscaled.size[1], width=upscaled.size[0] — mirrors what
call_controlnet already did for the preprocessed control image.

2. The LoRA toggle didn't say which model's LoRA it accepts. Now reads
'Use a LoRA (compatible with Z-Image-Turbo)' by default. On the T2I tab,
_on_model_change updates the label to 'Use a LoRA (compatible with Z-Image)'
when the user picks Base. ControlNet + Upscale stay Turbo-locked.

Files changed (4) hide show

app.py +8 -6
modes.py +5 -0
tests/test_modes.py +4 -0
ui.py +3 -3

app.py CHANGED Viewed

@@ -93,11 +93,12 @@ def _coerce_lora(lora_path: str | None) -> Path | None:
     return p
-def _on_model_change(model_name: str) -> tuple[int, float]:
-    """When the user picks Base / Turbo in the radio, update steps + cfg defaults."""
     if model_name == "Base":
-        return 25, 4.0
-    return 8, 1.0  # Turbo
 def _preview_cn(image, mode):
@@ -267,11 +268,12 @@ def build_app() -> gr.Blocks:
                     ],
                     outputs=[t["output_image"], t["output_meta"]],
                 )
-                # Radio change → update step / cfg defaults + reveal Base-only fields.
                 t["model"].change(
                     fn=_on_model_change,
                     inputs=[t["model"]],
-                    outputs=[t["steps"], t["cfg"]],
                 )
                 t["model"].change(
                     fn=lambda m: gr.Group(visible=(m == "Base")),

     return p
+def _on_model_change(model_name: str):
+    """When the user picks Base / Turbo in the radio, update steps + cfg defaults
+    and the LoRA-compatibility hint on the toggle label."""
     if model_name == "Base":
+        return 25, 4.0, gr.update(label="Use a LoRA (compatible with Z-Image)")
+    return 8, 1.0, gr.update(label="Use a LoRA (compatible with Z-Image-Turbo)")
 def _preview_cn(image, mode):
                     ],
                     outputs=[t["output_image"], t["output_meta"]],
                 )
+                # Radio change → update step / cfg defaults + LoRA-compatibility hint
+                # on the toggle label + reveal Base-only fields.
                 t["model"].change(
                     fn=_on_model_change,
                     inputs=[t["model"]],
+                    outputs=[t["steps"], t["cfg"], t["lora_enabled"]],
                 )
                 t["model"].change(
                     fn=lambda m: gr.Group(visible=(m == "Base")),

modes.py CHANGED Viewed

@@ -162,6 +162,11 @@ def call_upscale(pipe: Any, params: dict[str, Any]) -> tuple[Image.Image, dict[s
         sigma_shift=3.0,
         input_image=upscaled,
         denoising_strength=float(params.get("refine_denoise", 0.33)),
         seed=int(params.get("seed", 0)),
     )

         sigma_shift=3.0,
         input_image=upscaled,
         denoising_strength=float(params.get("refine_denoise", 0.33)),
+        # Track the upscaled image's dims so the noise initializer builds latents of
+        # the same shape as the VAE-encoded input_image. Otherwise DiffSynth defaults
+        # height/width to 1024 and add_noise crashes on a shape mismatch.
+        height=upscaled.size[1],
+        width=upscaled.size[0],
         seed=int(params.get("seed", 0)),
     )

tests/test_modes.py CHANGED Viewed

@@ -194,6 +194,10 @@ def test_upscale_runs_realesrgan_then_pipeline(fake_pipe, monkeypatch):
     assert kwargs["denoising_strength"] == 0.33
     assert kwargs["num_inference_steps"] == 5
     assert kwargs["cfg_scale"] == 1.0
     assert meta["mode"] == "upscale"

     assert kwargs["denoising_strength"] == 0.33
     assert kwargs["num_inference_steps"] == 5
     assert kwargs["cfg_scale"] == 1.0
+    # height/width must match the post-upscale image, else add_noise blows up on
+    # a shape mismatch between input_latents and noise.
+    assert kwargs["width"] == 1024
+    assert kwargs["height"] == 1024
     assert meta["mode"] == "upscale"

ui.py CHANGED Viewed

@@ -69,7 +69,7 @@ def build_t2i_tab() -> dict[str, gr.components.Component]:
                     info=TOOLTIPS["cfg"],
                 )
-            lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",
@@ -174,7 +174,7 @@ def build_controlnet_tab() -> dict[str, gr.components.Component]:
                     info=TOOLTIPS["controlnet_scale"],
                 )
-            lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",
@@ -262,7 +262,7 @@ def build_upscale_tab() -> dict[str, gr.components.Component]:
                     info=TOOLTIPS["refine_denoise"],
                 )
-            lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",

                     info=TOOLTIPS["cfg"],
                 )
+            lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",
                     info=TOOLTIPS["controlnet_scale"],
                 )
+            lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",
                     info=TOOLTIPS["refine_denoise"],
                 )
+            lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
             with gr.Group(visible=False) as lora_group:
                 lora_path = gr.File(
                     label="LoRA file",