Z-Image-Turbo_with_image

Sleeping

App Files Files Community

Z-Image-Turbo_with_image / app.py

Mccscs2

Update app.py

a69f969 verified 27 days ago

raw

history blame contribute delete

11.9 kB

	import torch
	import spaces
	import gradio as gr
	from diffusers import (
	DiffusionPipeline,
	FluxImg2ImgPipeline,
	StableDiffusionInstructPix2PixPipeline,
	StableDiffusionXLImg2ImgPipeline,
	)
	from PIL import Image

	print("Loading pipelines...")

	# Text to image pipeline
	pipe_t2i = DiffusionPipeline.from_pretrained(
	"Tongyi-MAI/Z-Image-Turbo",
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=False,
	)
	pipe_t2i.to("cuda")

	# FLUX
	pipe_flux = FluxImg2ImgPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-schnell",
	torch_dtype=torch.bfloat16,
	)
	pipe_flux.to("cuda")

	# InstructPix2Pix
	pipe_ip2p = StableDiffusionInstructPix2PixPipeline.from_pretrained(
	"timbrooks/instruct-pix2pix",
	torch_dtype=torch.float16,
	safety_checker=None,
	)
	pipe_ip2p.to("cuda")

	print("Pipelines loaded! Portrait models load on first use.")

	# Portrait models — lazy loaded on first selection to save memory
	portrait_pipes = {}

	PORTRAIT_MODELS = {
	"SDXL Base (Balanced quality & control)": {
	"repo": "stabilityai/stable-diffusion-xl-base-1.0",
	"variant": "fp16",
	},
	"Juggernaut XL (Photorealistic portraits)": {
	"repo": "RunDiffusion/Juggernaut-XL-v9",
	"variant": None,
	},
	"DreamShaper XL (Creative portraits)": {
	"repo": "Lykon/dreamshaper-xl-v2-turbo",
	"variant": None,
	},
	"Realistic Vision XL (Portrait photography)": {
	"repo": "SG161222/RealVisXL_V4.0",
	"variant": "fp16",
	},
	"CyberRealistic XL (Detailed skin & faces)": {
	"repo": "stablediffusionapi/cyberrealistic-xl",
	"variant": None,
	},
	"DreamShaper (SD1.5, fast portraits)": {
	"repo": "Lykon/DreamShaper",
	"variant": None,
	"sd15": True,
	},
	}

	def get_portrait_pipe(model_name):
	"""Lazy load portrait models on first use."""
	if model_name not in portrait_pipes:
	print(f"Loading {model_name} for the first time...")
	cfg = PORTRAIT_MODELS[model_name]
	kwargs = dict(torch_dtype=torch.float16, use_safetensors=True)
	if cfg.get("variant"):
	kwargs["variant"] = cfg["variant"]
	pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(cfg["repo"], **kwargs)
	pipe.to("cuda")
	portrait_pipes[model_name] = pipe
	print(f"{model_name} loaded!")
	return portrait_pipes[model_name]

	def resize_image(image, max_size=1024):
	orig_w, orig_h = image.size
	scale = min(max_size / orig_w, max_size / orig_h)
	if scale < 1:
	new_w = round(orig_w * scale / 64) * 64
	new_h = round(orig_h * scale / 64) * 64
	else:
	new_w = round(orig_w / 64) * 64
	new_h = round(orig_h / 64) * 64
	return image.resize((new_w, new_h))

	@spaces.GPU
	def generate_t2i(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
	if randomize_seed:
	seed = torch.randint(0, 2**32 - 1, (1,)).item()
	generator = torch.Generator("cuda").manual_seed(int(seed))
	image = pipe_t2i(
	prompt=prompt,
	height=int(height),
	width=int(width),
	num_inference_steps=int(num_inference_steps),
	guidance_scale=0.0,
	generator=generator,
	).images[0]
	return image, seed

	@spaces.GPU
	def generate_i2i(model_choice, input_image, prompt, strength, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
	if input_image is None:
	raise gr.Error("Please upload an image first.")
	if randomize_seed:
	seed = torch.randint(0, 2**32 - 1, (1,)).item()
	generator = torch.Generator("cuda").manual_seed(int(seed))
	input_image = resize_image(input_image)

	if model_choice == "FLUX.1-schnell (Creative, high change)":
	image = pipe_flux(
	prompt=prompt,
	image=input_image,
	strength=float(strength),
	num_inference_steps=int(num_inference_steps),
	generator=generator,
	width=input_image.width,
	height=input_image.height,
	).images[0]

	elif model_choice == "InstructPix2Pix (Precise, preserves identity)":
	image = pipe_ip2p(
	prompt=prompt,
	image=input_image,
	num_inference_steps=int(num_inference_steps),
	image_guidance_scale=1.5,
	guidance_scale=7.5,
	generator=generator,
	).images[0]

	else:
	# Portrait models (lazy loaded)
	pipe = get_portrait_pipe(model_choice)
	image = pipe(
	prompt=prompt,
	image=input_image,
	strength=float(strength),
	num_inference_steps=int(num_inference_steps),
	generator=generator,
	).images[0]

	return image, seed

	examples_t2i = [
	["Young Chinese woman in red Hanfu, intricate embroidery, elaborate high bun, golden phoenix headdress"],
	["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors"],
	["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, photorealistic"],
	["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"],
	["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff"],
	]

	custom_theme = gr.themes.Soft(
	primary_hue="yellow",
	secondary_hue="amber",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="md",
	radius_size="lg"
	).set(
	button_primary_background_fill="*primary_500",
	button_primary_background_fill_hover="*primary_600",
	block_title_text_weight="600",
	)

	I2I_MODELS = [
	"FLUX.1-schnell (Creative, high change)",
	"InstructPix2Pix (Precise, preserves identity)",
	] + list(PORTRAIT_MODELS.keys())

	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown(
	"""
	# 🎨 Z-Image-Turbo
	Ultra-fast AI image generation & editing • Text to Image + Image to Image
	""",
	elem_classes="header-text"
	)

	with gr.Tabs():

	# ── Tab 1: Text to Image ──────────────────────────────────
	with gr.Tab("✨ Text to Image"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1, min_width=320):
	t2i_prompt = gr.Textbox(
	label="✨ Your Prompt",
	placeholder="Describe the image you want to create...",
	lines=5, max_lines=10, autofocus=True,
	)
	with gr.Accordion("⚙️ Advanced Settings", open=False):
	with gr.Row():
	t2i_height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
	t2i_width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
	t2i_steps = gr.Slider(1, 20, value=9, step=1, label="Inference Steps")
	with gr.Row():
	t2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
	t2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
	t2i_randomize.change(
	lambda r: gr.Number(visible=not r),
	inputs=[t2i_randomize], outputs=[t2i_seed]
	)
	t2i_btn = gr.Button("🚀 Generate Image", variant="primary", size="lg")
	gr.Examples(examples=examples_t2i, inputs=[t2i_prompt], label="💡 Try these prompts")

	with gr.Column(scale=1, min_width=320):
	t2i_output = gr.Image(label="Generated Image", type="pil", format="png", show_label=False, height=600)
	t2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

	t2i_btn.click(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])
	t2i_prompt.submit(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])

	# ── Tab 2: Image to Image ─────────────────────────────────
	with gr.Tab("🖼️ Image to Image"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1, min_width=320):
	model_choice = gr.Radio(
	choices=I2I_MODELS,
	value="InstructPix2Pix (Precise, preserves identity)",
	label="🤖 Model",
	info="InstructPix2Pix: best for targeted edits. Portrait models: high quality people & faces. FLUX: creative transformations. Note: portrait models load on first use."
	)
	i2i_input = gr.Image(label="Upload Image", type="pil")
	i2i_prompt = gr.Textbox(
	label="✨ Edit Instruction",
	placeholder="e.g. 'woman in blue dress, photorealistic portrait' or 'make it a sunset'",
	lines=4,
	)
	with gr.Accordion("⚙️ Advanced Settings", open=False):
	i2i_strength = gr.Slider(0.1, 1.0, value=0.65, step=0.05,
	label="Strength (all except InstructPix2Pix)",
	info="Lower = more faithful to original. Higher = more creative change.")
	i2i_steps = gr.Slider(1, 50, value=25, step=1, label="Inference Steps")
	with gr.Row():
	i2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
	i2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
	i2i_randomize.change(
	lambda r: gr.Number(visible=not r),
	inputs=[i2i_randomize], outputs=[i2i_seed]
	)
	i2i_btn = gr.Button("🚀 Edit Image", variant="primary", size="lg")

	with gr.Column(scale=1, min_width=320):
	i2i_output = gr.Image(label="Result", type="pil", format="png", show_label=False, height=600)
	i2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

	i2i_btn.click(
	generate_i2i,
	[model_choice, i2i_input, i2i_prompt, i2i_strength, i2i_steps, i2i_seed, i2i_randomize],
	[i2i_output, i2i_used_seed]
	)

	gr.Markdown(
	"""
	---
	<div style="text-align: center; opacity: 0.7; font-size: 0.9em;">
	<strong>T2I:</strong> Tongyi-MAI/Z-Image-Turbo •
	<strong>I2I:</strong> FLUX.1-schnell + InstructPix2Pix + Juggernaut XL + DreamShaper XL + RealVisXL + CyberRealistic XL
	</div>
	"""
	)

	if __name__ == "__main__":
	demo.launch(
	theme=custom_theme,
	css="""
	.header-text h1 {
	font-size: 2.5rem !important;
	font-weight: 700 !important;
	background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}
	.header-text p { font-size: 1.1rem !important; color: #64748b !important; }
	.gradio-container { max-width: 1400px !important; margin: 0 auto !important; }
	button { transition: all 0.2s ease !important; }
	button:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important; }
	""",
	mcp_server=True
	)