Spaces:

multimodalart
/

pid

Running on Zero

App Files Files Community

apolinario commited on 1 day ago

Commit

1e34c9b

1 Parent(s): d1e77f7

Shorter description; add 'Randomize seed' checkbox that writes the picked seed back into the Seed field

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -218,6 +218,7 @@ def _evenly_spaced_capture_steps(total_steps: int, num_captures: int) -> list[in
     return sorted({int(round(x)) for x in raw})
 import threading
 import queue as _queue
@@ -229,15 +230,19 @@ def generate(
     guidance_scale: float = 5.0,
     seed: int = 0,
     resolution: int = 512,
 ):
     if not prompt or not prompt.strip():
         raise gr.Error("Please enter a prompt.")
     num_inference_steps = int(num_inference_steps)
     H = W = int(resolution)
     # initial: show the live preview, hide the final slider
-    yield gr.update(visible=True, value=None, label="Generating Z-Image…"), gr.update(visible=False, value=None)
     # ---- Run Z-Image in a thread; stream taef1 previews via a queue ----
     preview_q: "_queue.Queue" = _queue.Queue()
@@ -285,13 +290,13 @@ def generate(
             raw_output = payload
             break
         label = f"Generating Z-Image — step {step_index + 1}/{num_inference_steps}"
-        yield gr.update(visible=True, value=payload, label=label), gr.update(visible=False)
     thread.join()
     final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
     # ---- VAE decode of the final clean latent (Z-Image baseline) ----
-    yield gr.update(visible=True, label="Decoding final Z-Image…"), gr.update(visible=False)
     with torch.no_grad():
         baseline_01 = decode_with_pipeline_vae(pipeline, final_latent, pipe_cfg)
     zimage_img = Image.fromarray(
@@ -306,25 +311,23 @@ def generate(
         yield (
             gr.update(visible=True, value=pid_img, label=f"Upscaling with PiD — step {k}/{total}"),
             gr.update(visible=False),
         )
     # ---- Done: hide live preview, show the A/B slider ----
     yield (
         gr.update(visible=False, value=None),
         gr.update(visible=True, value=(zimage_img, pid_img)),
     )
 DESCRIPTION = """
 # 🪄 PiD — Pixel Diffusion Decoder for Z-Image
-Each tile shows what NVIDIA's [PiD](https://github.com/nv-tlabs/PiD) (a 4-step
-distilled pixel-space diffusion decoder) reconstructs from Z-Image's denoising
-loop at progressive timesteps. The first few tiles come from noisy intermediate
-latents (`xt`); the last tile is decoded from the final clean `x₀`.
-PiD upsamples 4× during decode, so a 512² Z-Image latent track becomes a
-2048² super-resolved image.
 """
 CSS = " .dark .gradio-container { color: var(--body-text-color);"
@@ -356,11 +359,12 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=CSS) as demo:
         with gr.Row():
             guidance_scale = gr.Slider(label="Guidance", minimum=1.0, maximum=10.0, step=0.5, value=5.0)
             seed = gr.Number(label="Seed", value=0, precision=0)
     run.click(
         fn=generate,
-        inputs=[prompt, num_inference_steps, guidance_scale, seed, resolution],
-        outputs=[live_preview, slider],
     )
 if __name__ == "__main__":

     return sorted({int(round(x)) for x in raw})
+import random
 import threading
 import queue as _queue
     guidance_scale: float = 5.0,
     seed: int = 0,
     resolution: int = 512,
+    randomize_seed: bool = False,
 ):
     if not prompt or not prompt.strip():
         raise gr.Error("Please enter a prompt.")
+    if randomize_seed:
+        seed = random.randint(0, 2**31 - 1)
+    seed = int(seed)
     num_inference_steps = int(num_inference_steps)
     H = W = int(resolution)
     # initial: show the live preview, hide the final slider
+    yield gr.update(visible=True, value=None, label="Generating Z-Image…"), gr.update(visible=False, value=None), gr.update(value=seed)
     # ---- Run Z-Image in a thread; stream taef1 previews via a queue ----
     preview_q: "_queue.Queue" = _queue.Queue()
             raw_output = payload
             break
         label = f"Generating Z-Image — step {step_index + 1}/{num_inference_steps}"
+        yield gr.update(visible=True, value=payload, label=label), gr.update(visible=False), gr.update()
     thread.join()
     final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
     # ---- VAE decode of the final clean latent (Z-Image baseline) ----
+    yield gr.update(visible=True, label="Decoding final Z-Image…"), gr.update(visible=False), gr.update()
     with torch.no_grad():
         baseline_01 = decode_with_pipeline_vae(pipeline, final_latent, pipe_cfg)
     zimage_img = Image.fromarray(
         yield (
             gr.update(visible=True, value=pid_img, label=f"Upscaling with PiD — step {k}/{total}"),
             gr.update(visible=False),
+            gr.update(),
         )
     # ---- Done: hide live preview, show the A/B slider ----
     yield (
         gr.update(visible=False, value=None),
         gr.update(visible=True, value=(zimage_img, pid_img)),
+        gr.update(),
     )
 DESCRIPTION = """
 # 🪄 PiD — Pixel Diffusion Decoder for Z-Image
+Runs [Z-Image](https://huggingface.co/Tongyi-MAI/Z-Image) (live previews via TAEF1) then
+[PiD](https://github.com/nv-tlabs/PiD)'s 4-step pixel-diffusion decoder for a 4×
+super-resolved result. The slider compares Z-Image's native VAE output to the PiD upscale.
 """
 CSS = " .dark .gradio-container { color: var(--body-text-color);"
         with gr.Row():
             guidance_scale = gr.Slider(label="Guidance", minimum=1.0, maximum=10.0, step=0.5, value=5.0)
             seed = gr.Number(label="Seed", value=0, precision=0)
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
     run.click(
         fn=generate,
+        inputs=[prompt, num_inference_steps, guidance_scale, seed, resolution, randomize_seed],
+        outputs=[live_preview, slider, seed],
     )
 if __name__ == "__main__":