Spaces:

multimodalart
/

pid

Running on Zero

apolinario commited on 1 day ago

Commit

6cd8e25

1 Parent(s): 9014add

Fix _latent_to_pil squeeze dim (T=1, dim=1 not dim=0); stream gallery as each PiD step completes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -114,9 +114,9 @@ print("[pid] ready", flush=True)
 def _latent_to_pil(tensor: torch.Tensor) -> Image.Image:
-    """[C, H, W] in [-1, 1] -> PIL.Image."""
     if tensor.dim() == 4:
-        tensor = tensor.squeeze(0)
     arr = ((tensor.float().clamp(-1, 1) + 1) * 127.5).permute(1, 2, 0).cpu().numpy().astype(np.uint8)
     return Image.fromarray(arr)
@@ -193,7 +193,6 @@ def generate(
     final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
     progress(0.5, desc="Decoding each captured step with PiD…")
-    outputs: list[tuple[Image.Image, str]] = []
     steps_iter = []
     if xt_cb is not None:
         for K in sorted(xt_cb.captured.keys()):
@@ -202,8 +201,9 @@ def generate(
             xt_latent = extract_latent(pipeline, SimpleNamespace(images=xt_packed), pipe_cfg, H, W)
             steps_iter.append((f"step {K:02d}/{num_inference_steps}", xt_latent, sigma))
     final_sigma = float(pipeline.scheduler.sigmas[-1].item())
-    steps_iter.append((f"final x₀", final_latent, final_sigma))
     total = len(steps_iter)
     for i, (label, latent, sigma) in enumerate(steps_iter):
         progress(0.5 + 0.5 * (i / total), desc=f"PiD decoding {label}")
@@ -211,8 +211,7 @@ def generate(
             baseline_01 = decode_with_pipeline_vae(pipeline, latent, pipe_cfg)
             pid_img = _pid_decode(latent, baseline_01, sigma, prompt)
         outputs.append((pid_img, f"{label}  (σ={sigma:.3f})"))
-    return outputs
 DESCRIPTION = """

 def _latent_to_pil(tensor: torch.Tensor) -> Image.Image:
+    """PiD output is (C, T, H, W) with T=1 for image -> PIL.Image."""
     if tensor.dim() == 4:
+        tensor = tensor.squeeze(1)
     arr = ((tensor.float().clamp(-1, 1) + 1) * 127.5).permute(1, 2, 0).cpu().numpy().astype(np.uint8)
     return Image.fromarray(arr)
     final_latent = extract_latent(pipeline, raw_output, pipe_cfg, H, W)
     progress(0.5, desc="Decoding each captured step with PiD…")
     steps_iter = []
     if xt_cb is not None:
         for K in sorted(xt_cb.captured.keys()):
             xt_latent = extract_latent(pipeline, SimpleNamespace(images=xt_packed), pipe_cfg, H, W)
             steps_iter.append((f"step {K:02d}/{num_inference_steps}", xt_latent, sigma))
     final_sigma = float(pipeline.scheduler.sigmas[-1].item())
+    steps_iter.append(("final x₀", final_latent, final_sigma))
+    outputs: list[tuple[Image.Image, str]] = []
     total = len(steps_iter)
     for i, (label, latent, sigma) in enumerate(steps_iter):
         progress(0.5 + 0.5 * (i / total), desc=f"PiD decoding {label}")
             baseline_01 = decode_with_pipeline_vae(pipeline, latent, pipe_cfg)
             pid_img = _pid_decode(latent, baseline_01, sigma, prompt)
         outputs.append((pid_img, f"{label}  (σ={sigma:.3f})"))
+        yield outputs
 DESCRIPTION = """