techfreakworm commited on
Commit
296faa9
·
unverified ·
1 Parent(s): 6d862f4

fix(upscale): pass post-upscale dims to pipe + LoRA label hints model compat

Browse files

Two coupled UX fixes from live testing:

1. Upscale generation crashed with 'size of tensor a (256) must match size of
tensor b (128) at non-singleton dimension 3' in DiffSynth's add_noise.
RealESRGAN x4 + 0.5 resize yields a 2x image (1024 -> 2048). The VAE-
encoded input latents were 256x256 but DiffSynth's noise initializer used
the default 1024 height/width, producing 128x128 noise. call_upscale now
passes height=upscaled.size[1], width=upscaled.size[0] — mirrors what
call_controlnet already did for the preprocessed control image.

2. The LoRA toggle didn't say which model's LoRA it accepts. Now reads
'Use a LoRA (compatible with Z-Image-Turbo)' by default. On the T2I tab,
_on_model_change updates the label to 'Use a LoRA (compatible with Z-Image)'
when the user picks Base. ControlNet + Upscale stay Turbo-locked.

Files changed (4) hide show
  1. app.py +8 -6
  2. modes.py +5 -0
  3. tests/test_modes.py +4 -0
  4. ui.py +3 -3
app.py CHANGED
@@ -93,11 +93,12 @@ def _coerce_lora(lora_path: str | None) -> Path | None:
93
  return p
94
 
95
 
96
- def _on_model_change(model_name: str) -> tuple[int, float]:
97
- """When the user picks Base / Turbo in the radio, update steps + cfg defaults."""
 
98
  if model_name == "Base":
99
- return 25, 4.0
100
- return 8, 1.0 # Turbo
101
 
102
 
103
  def _preview_cn(image, mode):
@@ -267,11 +268,12 @@ def build_app() -> gr.Blocks:
267
  ],
268
  outputs=[t["output_image"], t["output_meta"]],
269
  )
270
- # Radio change → update step / cfg defaults + reveal Base-only fields.
 
271
  t["model"].change(
272
  fn=_on_model_change,
273
  inputs=[t["model"]],
274
- outputs=[t["steps"], t["cfg"]],
275
  )
276
  t["model"].change(
277
  fn=lambda m: gr.Group(visible=(m == "Base")),
 
93
  return p
94
 
95
 
96
+ def _on_model_change(model_name: str):
97
+ """When the user picks Base / Turbo in the radio, update steps + cfg defaults
98
+ and the LoRA-compatibility hint on the toggle label."""
99
  if model_name == "Base":
100
+ return 25, 4.0, gr.update(label="Use a LoRA (compatible with Z-Image)")
101
+ return 8, 1.0, gr.update(label="Use a LoRA (compatible with Z-Image-Turbo)")
102
 
103
 
104
  def _preview_cn(image, mode):
 
268
  ],
269
  outputs=[t["output_image"], t["output_meta"]],
270
  )
271
+ # Radio change → update step / cfg defaults + LoRA-compatibility hint
272
+ # on the toggle label + reveal Base-only fields.
273
  t["model"].change(
274
  fn=_on_model_change,
275
  inputs=[t["model"]],
276
+ outputs=[t["steps"], t["cfg"], t["lora_enabled"]],
277
  )
278
  t["model"].change(
279
  fn=lambda m: gr.Group(visible=(m == "Base")),
modes.py CHANGED
@@ -162,6 +162,11 @@ def call_upscale(pipe: Any, params: dict[str, Any]) -> tuple[Image.Image, dict[s
162
  sigma_shift=3.0,
163
  input_image=upscaled,
164
  denoising_strength=float(params.get("refine_denoise", 0.33)),
 
 
 
 
 
165
  seed=int(params.get("seed", 0)),
166
  )
167
 
 
162
  sigma_shift=3.0,
163
  input_image=upscaled,
164
  denoising_strength=float(params.get("refine_denoise", 0.33)),
165
+ # Track the upscaled image's dims so the noise initializer builds latents of
166
+ # the same shape as the VAE-encoded input_image. Otherwise DiffSynth defaults
167
+ # height/width to 1024 and add_noise crashes on a shape mismatch.
168
+ height=upscaled.size[1],
169
+ width=upscaled.size[0],
170
  seed=int(params.get("seed", 0)),
171
  )
172
 
tests/test_modes.py CHANGED
@@ -194,6 +194,10 @@ def test_upscale_runs_realesrgan_then_pipeline(fake_pipe, monkeypatch):
194
  assert kwargs["denoising_strength"] == 0.33
195
  assert kwargs["num_inference_steps"] == 5
196
  assert kwargs["cfg_scale"] == 1.0
 
 
 
 
197
  assert meta["mode"] == "upscale"
198
 
199
 
 
194
  assert kwargs["denoising_strength"] == 0.33
195
  assert kwargs["num_inference_steps"] == 5
196
  assert kwargs["cfg_scale"] == 1.0
197
+ # height/width must match the post-upscale image, else add_noise blows up on
198
+ # a shape mismatch between input_latents and noise.
199
+ assert kwargs["width"] == 1024
200
+ assert kwargs["height"] == 1024
201
  assert meta["mode"] == "upscale"
202
 
203
 
ui.py CHANGED
@@ -69,7 +69,7 @@ def build_t2i_tab() -> dict[str, gr.components.Component]:
69
  info=TOOLTIPS["cfg"],
70
  )
71
 
72
- lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
73
  with gr.Group(visible=False) as lora_group:
74
  lora_path = gr.File(
75
  label="LoRA file",
@@ -174,7 +174,7 @@ def build_controlnet_tab() -> dict[str, gr.components.Component]:
174
  info=TOOLTIPS["controlnet_scale"],
175
  )
176
 
177
- lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
178
  with gr.Group(visible=False) as lora_group:
179
  lora_path = gr.File(
180
  label="LoRA file",
@@ -262,7 +262,7 @@ def build_upscale_tab() -> dict[str, gr.components.Component]:
262
  info=TOOLTIPS["refine_denoise"],
263
  )
264
 
265
- lora_enabled = gr.Checkbox(label="Use a LoRA", value=False)
266
  with gr.Group(visible=False) as lora_group:
267
  lora_path = gr.File(
268
  label="LoRA file",
 
69
  info=TOOLTIPS["cfg"],
70
  )
71
 
72
+ lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
73
  with gr.Group(visible=False) as lora_group:
74
  lora_path = gr.File(
75
  label="LoRA file",
 
174
  info=TOOLTIPS["controlnet_scale"],
175
  )
176
 
177
+ lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
178
  with gr.Group(visible=False) as lora_group:
179
  lora_path = gr.File(
180
  label="LoRA file",
 
262
  info=TOOLTIPS["refine_denoise"],
263
  )
264
 
265
+ lora_enabled = gr.Checkbox(label="Use a LoRA (compatible with Z-Image-Turbo)", value=False)
266
  with gr.Group(visible=False) as lora_group:
267
  lora_path = gr.File(
268
  label="LoRA file",