Mccscs2's picture
Update app.py
a69f969 verified
import torch
import spaces
import gradio as gr
from diffusers import (
DiffusionPipeline,
FluxImg2ImgPipeline,
StableDiffusionInstructPix2PixPipeline,
StableDiffusionXLImg2ImgPipeline,
)
from PIL import Image
print("Loading pipelines...")
# Text to image pipeline
pipe_t2i = DiffusionPipeline.from_pretrained(
"Tongyi-MAI/Z-Image-Turbo",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=False,
)
pipe_t2i.to("cuda")
# FLUX
pipe_flux = FluxImg2ImgPipeline.from_pretrained(
"black-forest-labs/FLUX.1-schnell",
torch_dtype=torch.bfloat16,
)
pipe_flux.to("cuda")
# InstructPix2Pix
pipe_ip2p = StableDiffusionInstructPix2PixPipeline.from_pretrained(
"timbrooks/instruct-pix2pix",
torch_dtype=torch.float16,
safety_checker=None,
)
pipe_ip2p.to("cuda")
print("Pipelines loaded! Portrait models load on first use.")
# Portrait models β€” lazy loaded on first selection to save memory
portrait_pipes = {}
PORTRAIT_MODELS = {
"SDXL Base (Balanced quality & control)": {
"repo": "stabilityai/stable-diffusion-xl-base-1.0",
"variant": "fp16",
},
"Juggernaut XL (Photorealistic portraits)": {
"repo": "RunDiffusion/Juggernaut-XL-v9",
"variant": None,
},
"DreamShaper XL (Creative portraits)": {
"repo": "Lykon/dreamshaper-xl-v2-turbo",
"variant": None,
},
"Realistic Vision XL (Portrait photography)": {
"repo": "SG161222/RealVisXL_V4.0",
"variant": "fp16",
},
"CyberRealistic XL (Detailed skin & faces)": {
"repo": "stablediffusionapi/cyberrealistic-xl",
"variant": None,
},
"DreamShaper (SD1.5, fast portraits)": {
"repo": "Lykon/DreamShaper",
"variant": None,
"sd15": True,
},
}
def get_portrait_pipe(model_name):
"""Lazy load portrait models on first use."""
if model_name not in portrait_pipes:
print(f"Loading {model_name} for the first time...")
cfg = PORTRAIT_MODELS[model_name]
kwargs = dict(torch_dtype=torch.float16, use_safetensors=True)
if cfg.get("variant"):
kwargs["variant"] = cfg["variant"]
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(cfg["repo"], **kwargs)
pipe.to("cuda")
portrait_pipes[model_name] = pipe
print(f"{model_name} loaded!")
return portrait_pipes[model_name]
def resize_image(image, max_size=1024):
orig_w, orig_h = image.size
scale = min(max_size / orig_w, max_size / orig_h)
if scale < 1:
new_w = round(orig_w * scale / 64) * 64
new_h = round(orig_h * scale / 64) * 64
else:
new_w = round(orig_w / 64) * 64
new_h = round(orig_h / 64) * 64
return image.resize((new_w, new_h))
@spaces.GPU
def generate_t2i(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
if randomize_seed:
seed = torch.randint(0, 2**32 - 1, (1,)).item()
generator = torch.Generator("cuda").manual_seed(int(seed))
image = pipe_t2i(
prompt=prompt,
height=int(height),
width=int(width),
num_inference_steps=int(num_inference_steps),
guidance_scale=0.0,
generator=generator,
).images[0]
return image, seed
@spaces.GPU
def generate_i2i(model_choice, input_image, prompt, strength, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
if input_image is None:
raise gr.Error("Please upload an image first.")
if randomize_seed:
seed = torch.randint(0, 2**32 - 1, (1,)).item()
generator = torch.Generator("cuda").manual_seed(int(seed))
input_image = resize_image(input_image)
if model_choice == "FLUX.1-schnell (Creative, high change)":
image = pipe_flux(
prompt=prompt,
image=input_image,
strength=float(strength),
num_inference_steps=int(num_inference_steps),
generator=generator,
width=input_image.width,
height=input_image.height,
).images[0]
elif model_choice == "InstructPix2Pix (Precise, preserves identity)":
image = pipe_ip2p(
prompt=prompt,
image=input_image,
num_inference_steps=int(num_inference_steps),
image_guidance_scale=1.5,
guidance_scale=7.5,
generator=generator,
).images[0]
else:
# Portrait models (lazy loaded)
pipe = get_portrait_pipe(model_choice)
image = pipe(
prompt=prompt,
image=input_image,
strength=float(strength),
num_inference_steps=int(num_inference_steps),
generator=generator,
).images[0]
return image, seed
examples_t2i = [
["Young Chinese woman in red Hanfu, intricate embroidery, elaborate high bun, golden phoenix headdress"],
["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors"],
["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, photorealistic"],
["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"],
["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff"],
]
custom_theme = gr.themes.Soft(
primary_hue="yellow",
secondary_hue="amber",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="md",
radius_size="lg"
).set(
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
block_title_text_weight="600",
)
I2I_MODELS = [
"FLUX.1-schnell (Creative, high change)",
"InstructPix2Pix (Precise, preserves identity)",
] + list(PORTRAIT_MODELS.keys())
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"""
# 🎨 Z-Image-Turbo
**Ultra-fast AI image generation & editing** β€’ Text to Image + Image to Image
""",
elem_classes="header-text"
)
with gr.Tabs():
# ── Tab 1: Text to Image ──────────────────────────────────
with gr.Tab("✨ Text to Image"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=320):
t2i_prompt = gr.Textbox(
label="✨ Your Prompt",
placeholder="Describe the image you want to create...",
lines=5, max_lines=10, autofocus=True,
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
with gr.Row():
t2i_height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
t2i_width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
t2i_steps = gr.Slider(1, 20, value=9, step=1, label="Inference Steps")
with gr.Row():
t2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
t2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
t2i_randomize.change(
lambda r: gr.Number(visible=not r),
inputs=[t2i_randomize], outputs=[t2i_seed]
)
t2i_btn = gr.Button("πŸš€ Generate Image", variant="primary", size="lg")
gr.Examples(examples=examples_t2i, inputs=[t2i_prompt], label="πŸ’‘ Try these prompts")
with gr.Column(scale=1, min_width=320):
t2i_output = gr.Image(label="Generated Image", type="pil", format="png", show_label=False, height=600)
t2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)
t2i_btn.click(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])
t2i_prompt.submit(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])
# ── Tab 2: Image to Image ─────────────────────────────────
with gr.Tab("πŸ–ΌοΈ Image to Image"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=320):
model_choice = gr.Radio(
choices=I2I_MODELS,
value="InstructPix2Pix (Precise, preserves identity)",
label="πŸ€– Model",
info="InstructPix2Pix: best for targeted edits. Portrait models: high quality people & faces. FLUX: creative transformations. Note: portrait models load on first use."
)
i2i_input = gr.Image(label="Upload Image", type="pil")
i2i_prompt = gr.Textbox(
label="✨ Edit Instruction",
placeholder="e.g. 'woman in blue dress, photorealistic portrait' or 'make it a sunset'",
lines=4,
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
i2i_strength = gr.Slider(0.1, 1.0, value=0.65, step=0.05,
label="Strength (all except InstructPix2Pix)",
info="Lower = more faithful to original. Higher = more creative change.")
i2i_steps = gr.Slider(1, 50, value=25, step=1, label="Inference Steps")
with gr.Row():
i2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
i2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
i2i_randomize.change(
lambda r: gr.Number(visible=not r),
inputs=[i2i_randomize], outputs=[i2i_seed]
)
i2i_btn = gr.Button("πŸš€ Edit Image", variant="primary", size="lg")
with gr.Column(scale=1, min_width=320):
i2i_output = gr.Image(label="Result", type="pil", format="png", show_label=False, height=600)
i2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)
i2i_btn.click(
generate_i2i,
[model_choice, i2i_input, i2i_prompt, i2i_strength, i2i_steps, i2i_seed, i2i_randomize],
[i2i_output, i2i_used_seed]
)
gr.Markdown(
"""
---
<div style="text-align: center; opacity: 0.7; font-size: 0.9em;">
<strong>T2I:</strong> Tongyi-MAI/Z-Image-Turbo β€’
<strong>I2I:</strong> FLUX.1-schnell + InstructPix2Pix + Juggernaut XL + DreamShaper XL + RealVisXL + CyberRealistic XL
</div>
"""
)
if __name__ == "__main__":
demo.launch(
theme=custom_theme,
css="""
.header-text h1 {
font-size: 2.5rem !important;
font-weight: 700 !important;
background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.header-text p { font-size: 1.1rem !important; color: #64748b !important; }
.gradio-container { max-width: 1400px !important; margin: 0 auto !important; }
button { transition: all 0.2s ease !important; }
button:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important; }
""",
mcp_server=True
)