Spaces:

multimodalart
/

z-image-6b-pixel-space

Running on Zero

multimodalart

Broader Citrus dark-mode fix: override container color directly

37b081d 5 days ago

6.4 kB

	"""Gradio ZeroGPU demo for L2P (Z-Image-6B pixel-space).

	Loads the L2P-z-image pipeline once at module scope (ZeroGPU lets us touch
	CUDA at import time after `import spaces`). Text encoder and tokenizer come
	from Tongyi-MAI/Z-Image-Turbo; the L2P 1k merged DiT comes from zhen-nan/L2P.
	"""

	from __future__ import annotations

	import os
	import random
	from pathlib import Path

	import spaces
	import torch
	import gradio as gr

	from huggingface_hub import hf_hub_download, snapshot_download

	from diffsynth.pipelines.z_image_L2P import ZImagePipeline, ModelConfig


	DTYPE = torch.bfloat16
	DEVICE = "cuda"
	MAX_SEED = 2**31 - 1

	# ---------------------------------------------------------------------------
	# 1. Download checkpoints.
	# ---------------------------------------------------------------------------
	l2p_model_path = hf_hub_download(
	repo_id="zhen-nan/L2P",
	filename="model-1k-merge.safetensors",
	)
	zimage_dir = snapshot_download(
	repo_id="Tongyi-MAI/Z-Image-Turbo",
	allow_patterns=["text_encoder/", "tokenizer/"],
	)

	text_encoder_paths = sorted(
	str(p) for p in (Path(zimage_dir) / "text_encoder").glob("model-*.safetensors")
	)
	tokenizer_path = (Path(zimage_dir) / "tokenizer").as_posix()

	# ---------------------------------------------------------------------------
	# 2. Build the pipeline at module load.
	# ---------------------------------------------------------------------------
	print("[L2P] loading pipeline...")
	pipe = ZImagePipeline.from_pretrained(
	torch_dtype=DTYPE,
	device=DEVICE,
	model_configs=[
	ModelConfig(path=[l2p_model_path]),
	ModelConfig(path=text_encoder_paths),
	],
	tokenizer_config=ModelConfig(path=tokenizer_path),
	)
	print("[L2P] ready.")


	@spaces.GPU(duration=120)
	@torch.no_grad()
	def generate(
	prompt: str,
	negative_prompt: str = "",
	height: int = 1024,
	width: int = 1024,
	steps: int = 30,
	cfg_scale: float = 2.0,
	seed: int = 0,
	randomize_seed: bool = True,
	progress=gr.Progress(track_tqdm=True),
	):
	if not prompt or not prompt.strip():
	raise gr.Error("Please enter a prompt.")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	seed = int(seed)

	image = pipe(
	prompt=prompt.strip(),
	negative_prompt=negative_prompt or "",
	cfg_scale=float(cfg_scale),
	height=int(height),
	width=int(width),
	seed=seed,
	rand_device=DEVICE,
	num_inference_steps=int(steps),
	)
	return image, seed


	CSS = """
	#col-container { max-width: 1100px; margin: 0 auto; }

	/* Workaround for gradio Citrus theme dark-mode bug.
	Root cause: Embed.svelte sets `.gradio-container { color: var(--button-secondary-text-color); }`,
	and Citrus's button_secondary_text_color_dark = neutral_900 (near-black) — so any
	unstyled descendant text (progress, Examples cells, …) inherits dark-on-dark.
	Override the container's inherited color directly — this is the architectural
	fix from the upstream PR applied as user CSS. */
	.dark .gradio-container { color: var(--body-text-color); }
	"""

	with gr.Blocks(theme=gr.themes.Citrus(), css=CSS, title="L2P - Z-Image 6B Pixel-Space") as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(
	"""
	# L2P — Z-Image 6B Pixel-Space
	End-to-end pixel-space diffusion built on Z-Image-Turbo via
	L2P (Latent → Pixel transfer). 6B params, 1K resolution, no VAE.

	[Project](https://nju-pcalab.github.io/projects/L2P/) · [Paper](https://arxiv.org/abs/2605.12013) · [Code](https://github.com/TencentYoutuResearch/T2I-L2P) · [Weights](https://huggingface.co/zhen-nan/L2P)
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="an origami pig on fire in the middle of a dark room with a pentagram on the floor",
	lines=3,
	)
	run_btn = gr.Button("Generate", variant="primary")
	with gr.Accordion("Advanced", open=False):
	negative_prompt = gr.Textbox(label="Negative prompt", value="", lines=2)
	with gr.Row():
	height = gr.Slider(256, 1024, value=1024, step=16, label="Height")
	width = gr.Slider(256, 1024, value=1024, step=16, label="Width")
	with gr.Row():
	steps = gr.Slider(1, 100, value=30, step=1, label="Steps")
	cfg = gr.Slider(0.1, 10.0, value=2.0, step=0.1, label="CFG scale")
	with gr.Row():
	seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
	randomize = gr.Checkbox(value=True, label="Randomize seed")

	with gr.Column(scale=3):
	image = gr.Image(label="Output", type="pil", height=640)
	used_seed = gr.Number(label="Seed used", interactive=False)

	gr.Examples(
	examples=[
	["an origami pig on fire in the middle of a dark room with a pentagram on the floor"],
	["Young Chinese woman in red Hanfu, intricate embroidery, impeccable makeup, red floral forehead pattern, elaborate high bun with golden phoenix headdress, holding a round folding fan, soft-lit outdoor night background with silhouetted tiered pagoda, blurred colorful distant lights"],
	["A cinematic photograph of a lone astronaut standing on a black volcanic beach at sunset, deep orange sky, crashing waves, dramatic backlight"],
	["A vintage botanical illustration of a fictional bioluminescent mushroom forest, ink and watercolor, ornate Latin labels, parchment background"],
	],
	inputs=[prompt],
	outputs=[image, used_seed],
	fn=generate,
	cache_examples=True,
	cache_mode="lazy",
	)

	run_btn.click(
	generate,
	inputs=[prompt, negative_prompt, height, width, steps, cfg, seed, randomize],
	outputs=[image, used_seed],
	)
	prompt.submit(
	generate,
	inputs=[prompt, negative_prompt, height, width, steps, cfg, seed, randomize],
	outputs=[image, used_seed],
	)

	if __name__ == "__main__":
	demo.launch()