apolinario commited on
Commit
1e34c9b
·
1 Parent(s): d1e77f7

Shorter description; add 'Randomize seed' checkbox that writes the picked seed back into the Seed field

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -218,6 +218,7 @@ def _evenly_spaced_capture_steps(total_steps: int, num_captures: int) -> list[in
218
  return sorted({int(round(x)) for x in raw})
219
 
220
 
 
221
  import threading
222
  import queue as _queue
223
 
@@ -229,15 +230,19 @@ def generate(
229
  guidance_scale: float = 5.0,
230
  seed: int = 0,
231
  resolution: int = 512,
 
232
  ):
233
  if not prompt or not prompt.strip():
234
  raise gr.Error("Please enter a prompt.")
235
 
 
 
 
236
  num_inference_steps = int(num_inference_steps)
237
  H = W = int(resolution)
238
 
239
  # initial: show the live preview, hide the final slider
240
- yield gr.update(visible=True, value=None, label="Generating Z-Image…"), gr.update(visible=False, value=None)
241
 
242
  # ---- Run Z-Image in a thread; stream taef1 previews via a queue ----
243
  preview_q: "_queue.Queue" = _queue.Queue()
@@ -285,13 +290,13 @@ def generate(
285
  raw_output = payload
286
  break
287
  label = f"Generating Z-Image — step {step_index + 1}/{num_inference_steps}"
288
- yield gr.update(visible=True, value=payload, label=label), gr.update(visible=False)
289
 
290
  thread.join()
291
  final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
292
 
293
  # ---- VAE decode of the final clean latent (Z-Image baseline) ----
294
- yield gr.update(visible=True, label="Decoding final Z-Image…"), gr.update(visible=False)
295
  with torch.no_grad():
296
  baseline_01 = decode_with_pipeline_vae(pipeline, final_latent, pipe_cfg)
297
  zimage_img = Image.fromarray(
@@ -306,25 +311,23 @@ def generate(
306
  yield (
307
  gr.update(visible=True, value=pid_img, label=f"Upscaling with PiD — step {k}/{total}"),
308
  gr.update(visible=False),
 
309
  )
310
 
311
  # ---- Done: hide live preview, show the A/B slider ----
312
  yield (
313
  gr.update(visible=False, value=None),
314
  gr.update(visible=True, value=(zimage_img, pid_img)),
 
315
  )
316
 
317
 
318
  DESCRIPTION = """
319
  # 🪄 PiD — Pixel Diffusion Decoder for Z-Image
320
 
321
- Each tile shows what NVIDIA's [PiD](https://github.com/nv-tlabs/PiD) (a 4-step
322
- distilled pixel-space diffusion decoder) reconstructs from Z-Image's denoising
323
- loop at progressive timesteps. The first few tiles come from noisy intermediate
324
- latents (`xt`); the last tile is decoded from the final clean `x₀`.
325
-
326
- PiD upsamples 4× during decode, so a 512² Z-Image latent track becomes a
327
- 2048² super-resolved image.
328
  """
329
 
330
  CSS = " .dark .gradio-container { color: var(--body-text-color);"
@@ -356,11 +359,12 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=CSS) as demo:
356
  with gr.Row():
357
  guidance_scale = gr.Slider(label="Guidance", minimum=1.0, maximum=10.0, step=0.5, value=5.0)
358
  seed = gr.Number(label="Seed", value=0, precision=0)
 
359
 
360
  run.click(
361
  fn=generate,
362
- inputs=[prompt, num_inference_steps, guidance_scale, seed, resolution],
363
- outputs=[live_preview, slider],
364
  )
365
 
366
  if __name__ == "__main__":
 
218
  return sorted({int(round(x)) for x in raw})
219
 
220
 
221
+ import random
222
  import threading
223
  import queue as _queue
224
 
 
230
  guidance_scale: float = 5.0,
231
  seed: int = 0,
232
  resolution: int = 512,
233
+ randomize_seed: bool = False,
234
  ):
235
  if not prompt or not prompt.strip():
236
  raise gr.Error("Please enter a prompt.")
237
 
238
+ if randomize_seed:
239
+ seed = random.randint(0, 2**31 - 1)
240
+ seed = int(seed)
241
  num_inference_steps = int(num_inference_steps)
242
  H = W = int(resolution)
243
 
244
  # initial: show the live preview, hide the final slider
245
+ yield gr.update(visible=True, value=None, label="Generating Z-Image…"), gr.update(visible=False, value=None), gr.update(value=seed)
246
 
247
  # ---- Run Z-Image in a thread; stream taef1 previews via a queue ----
248
  preview_q: "_queue.Queue" = _queue.Queue()
 
290
  raw_output = payload
291
  break
292
  label = f"Generating Z-Image — step {step_index + 1}/{num_inference_steps}"
293
+ yield gr.update(visible=True, value=payload, label=label), gr.update(visible=False), gr.update()
294
 
295
  thread.join()
296
  final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
297
 
298
  # ---- VAE decode of the final clean latent (Z-Image baseline) ----
299
+ yield gr.update(visible=True, label="Decoding final Z-Image…"), gr.update(visible=False), gr.update()
300
  with torch.no_grad():
301
  baseline_01 = decode_with_pipeline_vae(pipeline, final_latent, pipe_cfg)
302
  zimage_img = Image.fromarray(
 
311
  yield (
312
  gr.update(visible=True, value=pid_img, label=f"Upscaling with PiD — step {k}/{total}"),
313
  gr.update(visible=False),
314
+ gr.update(),
315
  )
316
 
317
  # ---- Done: hide live preview, show the A/B slider ----
318
  yield (
319
  gr.update(visible=False, value=None),
320
  gr.update(visible=True, value=(zimage_img, pid_img)),
321
+ gr.update(),
322
  )
323
 
324
 
325
  DESCRIPTION = """
326
  # 🪄 PiD — Pixel Diffusion Decoder for Z-Image
327
 
328
+ Runs [Z-Image](https://huggingface.co/Tongyi-MAI/Z-Image) (live previews via TAEF1) then
329
+ [PiD](https://github.com/nv-tlabs/PiD)'s 4-step pixel-diffusion decoder for a
330
+ super-resolved result. The slider compares Z-Image's native VAE output to the PiD upscale.
 
 
 
 
331
  """
332
 
333
  CSS = " .dark .gradio-container { color: var(--body-text-color);"
 
359
  with gr.Row():
360
  guidance_scale = gr.Slider(label="Guidance", minimum=1.0, maximum=10.0, step=0.5, value=5.0)
361
  seed = gr.Number(label="Seed", value=0, precision=0)
362
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
363
 
364
  run.click(
365
  fn=generate,
366
+ inputs=[prompt, num_inference_steps, guidance_scale, seed, resolution, randomize_seed],
367
+ outputs=[live_preview, slider, seed],
368
  )
369
 
370
  if __name__ == "__main__":