multimodalart
Broader Citrus dark-mode fix: override container color directly
37b081d
"""Gradio ZeroGPU demo for L2P (Z-Image-6B pixel-space).
Loads the L2P-z-image pipeline once at module scope (ZeroGPU lets us touch
CUDA at import time after `import spaces`). Text encoder and tokenizer come
from Tongyi-MAI/Z-Image-Turbo; the L2P 1k merged DiT comes from zhen-nan/L2P.
"""
from __future__ import annotations
import os
import random
from pathlib import Path
import spaces
import torch
import gradio as gr
from huggingface_hub import hf_hub_download, snapshot_download
from diffsynth.pipelines.z_image_L2P import ZImagePipeline, ModelConfig
DTYPE = torch.bfloat16
DEVICE = "cuda"
MAX_SEED = 2**31 - 1
# ---------------------------------------------------------------------------
# 1. Download checkpoints.
# ---------------------------------------------------------------------------
l2p_model_path = hf_hub_download(
repo_id="zhen-nan/L2P",
filename="model-1k-merge.safetensors",
)
zimage_dir = snapshot_download(
repo_id="Tongyi-MAI/Z-Image-Turbo",
allow_patterns=["text_encoder/*", "tokenizer/*"],
)
text_encoder_paths = sorted(
str(p) for p in (Path(zimage_dir) / "text_encoder").glob("model-*.safetensors")
)
tokenizer_path = (Path(zimage_dir) / "tokenizer").as_posix()
# ---------------------------------------------------------------------------
# 2. Build the pipeline at module load.
# ---------------------------------------------------------------------------
print("[L2P] loading pipeline...")
pipe = ZImagePipeline.from_pretrained(
torch_dtype=DTYPE,
device=DEVICE,
model_configs=[
ModelConfig(path=[l2p_model_path]),
ModelConfig(path=text_encoder_paths),
],
tokenizer_config=ModelConfig(path=tokenizer_path),
)
print("[L2P] ready.")
@spaces.GPU(duration=120)
@torch.no_grad()
def generate(
prompt: str,
negative_prompt: str = "",
height: int = 1024,
width: int = 1024,
steps: int = 30,
cfg_scale: float = 2.0,
seed: int = 0,
randomize_seed: bool = True,
progress=gr.Progress(track_tqdm=True),
):
if not prompt or not prompt.strip():
raise gr.Error("Please enter a prompt.")
if randomize_seed:
seed = random.randint(0, MAX_SEED)
seed = int(seed)
image = pipe(
prompt=prompt.strip(),
negative_prompt=negative_prompt or "",
cfg_scale=float(cfg_scale),
height=int(height),
width=int(width),
seed=seed,
rand_device=DEVICE,
num_inference_steps=int(steps),
)
return image, seed
CSS = """
#col-container { max-width: 1100px; margin: 0 auto; }
/* Workaround for gradio Citrus theme dark-mode bug.
Root cause: Embed.svelte sets `.gradio-container { color: var(--button-secondary-text-color); }`,
and Citrus's button_secondary_text_color_dark = neutral_900 (near-black) — so any
unstyled descendant text (progress, Examples cells, …) inherits dark-on-dark.
Override the container's inherited color directly — this is the architectural
fix from the upstream PR applied as user CSS. */
.dark .gradio-container { color: var(--body-text-color); }
"""
with gr.Blocks(theme=gr.themes.Citrus(), css=CSS, title="L2P - Z-Image 6B Pixel-Space") as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(
"""
# L2P — Z-Image 6B Pixel-Space
End-to-end **pixel-space** diffusion built on Z-Image-Turbo via
**L2P** (Latent → Pixel transfer). 6B params, 1K resolution, no VAE.
[Project](https://nju-pcalab.github.io/projects/L2P/) · [Paper](https://arxiv.org/abs/2605.12013) · [Code](https://github.com/TencentYoutuResearch/T2I-L2P) · [Weights](https://huggingface.co/zhen-nan/L2P)
"""
)
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(
label="Prompt",
placeholder="an origami pig on fire in the middle of a dark room with a pentagram on the floor",
lines=3,
)
run_btn = gr.Button("Generate", variant="primary")
with gr.Accordion("Advanced", open=False):
negative_prompt = gr.Textbox(label="Negative prompt", value="", lines=2)
with gr.Row():
height = gr.Slider(256, 1024, value=1024, step=16, label="Height")
width = gr.Slider(256, 1024, value=1024, step=16, label="Width")
with gr.Row():
steps = gr.Slider(1, 100, value=30, step=1, label="Steps")
cfg = gr.Slider(0.1, 10.0, value=2.0, step=0.1, label="CFG scale")
with gr.Row():
seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
randomize = gr.Checkbox(value=True, label="Randomize seed")
with gr.Column(scale=3):
image = gr.Image(label="Output", type="pil", height=640)
used_seed = gr.Number(label="Seed used", interactive=False)
gr.Examples(
examples=[
["an origami pig on fire in the middle of a dark room with a pentagram on the floor"],
["Young Chinese woman in red Hanfu, intricate embroidery, impeccable makeup, red floral forehead pattern, elaborate high bun with golden phoenix headdress, holding a round folding fan, soft-lit outdoor night background with silhouetted tiered pagoda, blurred colorful distant lights"],
["A cinematic photograph of a lone astronaut standing on a black volcanic beach at sunset, deep orange sky, crashing waves, dramatic backlight"],
["A vintage botanical illustration of a fictional bioluminescent mushroom forest, ink and watercolor, ornate Latin labels, parchment background"],
],
inputs=[prompt],
outputs=[image, used_seed],
fn=generate,
cache_examples=True,
cache_mode="lazy",
)
run_btn.click(
generate,
inputs=[prompt, negative_prompt, height, width, steps, cfg, seed, randomize],
outputs=[image, used_seed],
)
prompt.submit(
generate,
inputs=[prompt, negative_prompt, height, width, steps, cfg, seed, randomize],
outputs=[image, used_seed],
)
if __name__ == "__main__":
demo.launch()