Spaces:

multimodalart
/

pid

Running on Zero

apolinario commited on 1 day ago

Commit

b0fc9e3

1 Parent(s): bcabac3

Replace transformers' vmap-based mask broadcaster with explicit broadcasting (ZeroGPU __torch_function__ can't fake-alloc inside vmap)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,26 +53,29 @@ SR_SCALE = 4
 PID_INFERENCE_STEPS = 4
 print("[pid] loading Z-Image pipeline...", flush=True)
-# transformers 4.57's SDPA causal-mask uses torch.vmap, which clashes with
-# ZeroGPU's __torch_function__ hijack during fake tensor allocation. Force
-# eager attention on the text encoder to skip the vmap codepath.
-from diffusers import ZImagePipeline
-from transformers import Qwen3Model
-_text_encoder = Qwen3Model.from_pretrained(
-    "Tongyi-MAI/Z-Image",
-    subfolder="text_encoder",
-    torch_dtype=DTYPE,
-    attn_implementation="eager",
-)
-pipeline = ZImagePipeline.from_pretrained(
-    "Tongyi-MAI/Z-Image",
-    torch_dtype=DTYPE,
-    text_encoder=_text_encoder,
-)
 pipeline.to("cuda")
-from pid._src.inference.pipeline_registry import get_config as _get_pipe_cfg
-pipe_cfg = _get_pipe_cfg(BACKBONE)
 print("[pid] loading PiD decoder...", flush=True)
 pid_meta = get_pid_checkpoint(BACKBONE, CKPT_TYPE)

 PID_INFERENCE_STEPS = 4
 print("[pid] loading Z-Image pipeline...", flush=True)
+# transformers 4.57's SDPA / eager mask builders both broadcast the mask
+# function over (b, h, q, k) via torch.vmap, which trips ZeroGPU's
+# __torch_function__ hijack when it tries to fake-allocate the indexed
+# tensors. Replace vmap with explicit broadcasting — same result, same speed,
+# no functorch transform context.
+from transformers import masking_utils as _mu
+def _broadcasting_vmap_for_bhqkv(mask_function, bh_indices: bool = True):
+    def wrapped(b, h, q, k):
+        if bh_indices:
+            return mask_function(
+                b[:, None, None, None],
+                h[None, :, None, None],
+                q[None, None, :, None],
+                k[None, None, None, :],
+            )
+        return mask_function(b, h, q[:, None], k[None, :])
+    return wrapped
+_mu._vmap_for_bhqkv = _broadcasting_vmap_for_bhqkv
+pipeline, pipe_cfg = load_pipeline(BACKBONE, dtype=DTYPE)
 pipeline.to("cuda")
 print("[pid] loading PiD decoder...", flush=True)
 pid_meta = get_pid_checkpoint(BACKBONE, CKPT_TYPE)