Spaces:
Running on Zero
fix(upscale): pass post-upscale dims to pipe + LoRA label hints model compat
Browse filesTwo coupled UX fixes from live testing:
1. Upscale generation crashed with 'size of tensor a (256) must match size of
tensor b (128) at non-singleton dimension 3' in DiffSynth's add_noise.
RealESRGAN x4 + 0.5 resize yields a 2x image (1024 -> 2048). The VAE-
encoded input latents were 256x256 but DiffSynth's noise initializer used
the default 1024 height/width, producing 128x128 noise. call_upscale now
passes height=upscaled.size[1], width=upscaled.size[0] — mirrors what
call_controlnet already did for the preprocessed control image.
2. The LoRA toggle didn't say which model's LoRA it accepts. Now reads
'Use a LoRA (compatible with Z-Image-Turbo)' by default. On the T2I tab,
_on_model_change updates the label to 'Use a LoRA (compatible with Z-Image)'
when the user picks Base. ControlNet + Upscale stay Turbo-locked.
|
@@ -93,11 +93,12 @@ def _coerce_lora(lora_path: str | None) -> Path | None:
|
|
| 93 |
return p
|
| 94 |
|
| 95 |
|
| 96 |
-
def _on_model_change(model_name: str)
|
| 97 |
-
"""When the user picks Base / Turbo in the radio, update steps + cfg defaults
|
|
|
|
| 98 |
if model_name == "Base":
|
| 99 |
-
return 25, 4.0
|
| 100 |
-
return 8, 1.0
|
| 101 |
|
| 102 |
|
| 103 |
def _preview_cn(image, mode):
|
|
@@ -267,11 +268,12 @@ def build_app() -> gr.Blocks:
|
|
| 267 |
],
|
| 268 |
outputs=[t["output_image"], t["output_meta"]],
|
| 269 |
)
|
| 270 |
-
# Radio change → update step / cfg defaults +
|
|
|
|
| 271 |
t["model"].change(
|
| 272 |
fn=_on_model_change,
|
| 273 |
inputs=[t["model"]],
|
| 274 |
-
outputs=[t["steps"], t["cfg"]],
|
| 275 |
)
|
| 276 |
t["model"].change(
|
| 277 |
fn=lambda m: gr.Group(visible=(m == "Base")),
|
|
|
|
| 93 |
return p
|
| 94 |
|
| 95 |
|
| 96 |
+
def _on_model_change(model_name: str):
|
| 97 |
+
"""When the user picks Base / Turbo in the radio, update steps + cfg defaults
|
| 98 |
+
and the LoRA-compatibility hint on the toggle label."""
|
| 99 |
if model_name == "Base":
|
| 100 |
+
return 25, 4.0, gr.update(label="Use a LoRA (compatible with Z-Image)")
|
| 101 |
+
return 8, 1.0, gr.update(label="Use a LoRA (compatible with Z-Image-Turbo)")
|
| 102 |
|
| 103 |
|
| 104 |
def _preview_cn(image, mode):
|
|
|
|
| 268 |
],
|
| 269 |
outputs=[t["output_image"], t["output_meta"]],
|
| 270 |
)
|
| 271 |
+
# Radio change → update step / cfg defaults + LoRA-compatibility hint
|
| 272 |
+
# on the toggle label + reveal Base-only fields.
|
| 273 |
t["model"].change(
|
| 274 |
fn=_on_model_change,
|
| 275 |
inputs=[t["model"]],
|
| 276 |
+
outputs=[t["steps"], t["cfg"], t["lora_enabled"]],
|
| 277 |
)
|
| 278 |
t["model"].change(
|
| 279 |
fn=lambda m: gr.Group(visible=(m == "Base")),
|
|
@@ -162,6 +162,11 @@ def call_upscale(pipe: Any, params: dict[str, Any]) -> tuple[Image.Image, dict[s
|
|
| 162 |
sigma_shift=3.0,
|
| 163 |
input_image=upscaled,
|
| 164 |
denoising_strength=float(params.get("refine_denoise", 0.33)),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
seed=int(params.get("seed", 0)),
|
| 166 |
)
|
| 167 |
|
|
|
|
| 162 |
sigma_shift=3.0,
|
| 163 |
input_image=upscaled,
|
| 164 |
denoising_strength=float(params.get("refine_denoise", 0.33)),
|
| 165 |
+
# Track the upscaled image's dims so the noise initializer builds latents of
|
| 166 |
+
# the same shape as the VAE-encoded input_image. Otherwise DiffSynth defaults
|
| 167 |
+
# height/width to 1024 and add_noise crashes on a shape mismatch.
|
| 168 |
+
height=upscaled.size[1],
|
| 169 |
+
width=upscaled.size[0],
|
| 170 |
seed=int(params.get("seed", 0)),
|
| 171 |
)
|
| 172 |
|
|
@@ -194,6 +194,10 @@ def test_upscale_runs_realesrgan_then_pipeline(fake_pipe, monkeypatch):
|
|
| 194 |
assert kwargs["denoising_strength"] == 0.33
|
| 195 |
assert kwargs["num_inference_steps"] == 5
|
| 196 |
assert kwargs["cfg_scale"] == 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
assert meta["mode"] == "upscale"
|
| 198 |
|
| 199 |
|
|
|
|
| 194 |
assert kwargs["denoising_strength"] == 0.33
|
| 195 |
assert kwargs["num_inference_steps"] == 5
|
| 196 |
assert kwargs["cfg_scale"] == 1.0
|
| 197 |
+
# height/width must match the post-upscale image, else add_noise blows up on
|
| 198 |
+
# a shape mismatch between input_latents and noise.
|
| 199 |
+
assert kwargs["width"] == 1024
|
| 200 |
+
assert kwargs["height"] == 1024
|
| 201 |
assert meta["mode"] == "upscale"
|
| 202 |
|
| 203 |
|
|
@@ -69,7 +69,7 @@ def build_t2i_tab() -> dict[str, gr.components.Component]:
|
|
| 69 |
info=TOOLTIPS["cfg"],
|
| 70 |
)
|
| 71 |
|
| 72 |
-
lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
|
| 73 |
with gr.Group(visible=False) as lora_group:
|
| 74 |
lora_path = gr.File(
|
| 75 |
label="LoRA file",
|
|
@@ -174,7 +174,7 @@ def build_controlnet_tab() -> dict[str, gr.components.Component]:
|
|
| 174 |
info=TOOLTIPS["controlnet_scale"],
|
| 175 |
)
|
| 176 |
|
| 177 |
-
lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
|
| 178 |
with gr.Group(visible=False) as lora_group:
|
| 179 |
lora_path = gr.File(
|
| 180 |
label="LoRA file",
|
|
@@ -262,7 +262,7 @@ def build_upscale_tab() -> dict[str, gr.components.Component]:
|
|
| 262 |
info=TOOLTIPS["refine_denoise"],
|
| 263 |
)
|
| 264 |
|
| 265 |
-
lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
|
| 266 |
with gr.Group(visible=False) as lora_group:
|
| 267 |
lora_path = gr.File(
|
| 268 |
label="LoRA file",
|
|
|
|
| 69 |
info=TOOLTIPS["cfg"],
|
| 70 |
)
|
| 71 |
|
| 72 |
+
lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
|
| 73 |
with gr.Group(visible=False) as lora_group:
|
| 74 |
lora_path = gr.File(
|
| 75 |
label="LoRA file",
|
|
|
|
| 174 |
info=TOOLTIPS["controlnet_scale"],
|
| 175 |
)
|
| 176 |
|
| 177 |
+
lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
|
| 178 |
with gr.Group(visible=False) as lora_group:
|
| 179 |
lora_path = gr.File(
|
| 180 |
label="LoRA file",
|
|
|
|
| 262 |
info=TOOLTIPS["refine_denoise"],
|
| 263 |
)
|
| 264 |
|
| 265 |
+
lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
|
| 266 |
with gr.Group(visible=False) as lora_group:
|
| 267 |
lora_path = gr.File(
|
| 268 |
label="LoRA file",
|