Z-Image-Turbo_with_image

Sleeping

File size: 11,861 Bytes

import torch
import spaces
import gradio as gr
from diffusers import (
    DiffusionPipeline,
    FluxImg2ImgPipeline,
    StableDiffusionInstructPix2PixPipeline,
    StableDiffusionXLImg2ImgPipeline,
)
from PIL import Image

print("Loading pipelines...")

# Text to image pipeline
pipe_t2i = DiffusionPipeline.from_pretrained(
    "Tongyi-MAI/Z-Image-Turbo",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=False,
)
pipe_t2i.to("cuda")

# FLUX
pipe_flux = FluxImg2ImgPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16,
)
pipe_flux.to("cuda")

# InstructPix2Pix
pipe_ip2p = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    "timbrooks/instruct-pix2pix",
    torch_dtype=torch.float16,
    safety_checker=None,
)
pipe_ip2p.to("cuda")

print("Pipelines loaded! Portrait models load on first use.")

# Portrait models — lazy loaded on first selection to save memory
portrait_pipes = {}

PORTRAIT_MODELS = {
    "SDXL Base (Balanced quality & control)": {
        "repo": "stabilityai/stable-diffusion-xl-base-1.0",
        "variant": "fp16",
    },
    "Juggernaut XL (Photorealistic portraits)": {
        "repo": "RunDiffusion/Juggernaut-XL-v9",
        "variant": None,
    },
    "DreamShaper XL (Creative portraits)": {
        "repo": "Lykon/dreamshaper-xl-v2-turbo",
        "variant": None,
    },
    "Realistic Vision XL (Portrait photography)": {
        "repo": "SG161222/RealVisXL_V4.0",
        "variant": "fp16",
    },
    "CyberRealistic XL (Detailed skin & faces)": {
        "repo": "stablediffusionapi/cyberrealistic-xl",
        "variant": None,
    },
    "DreamShaper (SD1.5, fast portraits)": {
        "repo": "Lykon/DreamShaper",
        "variant": None,
        "sd15": True,
    },
}

def get_portrait_pipe(model_name):
    """Lazy load portrait models on first use."""
    if model_name not in portrait_pipes:
        print(f"Loading {model_name} for the first time...")
        cfg = PORTRAIT_MODELS[model_name]
        kwargs = dict(torch_dtype=torch.float16, use_safetensors=True)
        if cfg.get("variant"):
            kwargs["variant"] = cfg["variant"]
        pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(cfg["repo"], **kwargs)
        pipe.to("cuda")
        portrait_pipes[model_name] = pipe
        print(f"{model_name} loaded!")
    return portrait_pipes[model_name]

def resize_image(image, max_size=1024):
    orig_w, orig_h = image.size
    scale = min(max_size / orig_w, max_size / orig_h)
    if scale < 1:
        new_w = round(orig_w * scale / 64) * 64
        new_h = round(orig_h * scale / 64) * 64
    else:
        new_w = round(orig_w / 64) * 64
        new_h = round(orig_h / 64) * 64
    return image.resize((new_w, new_h))

@spaces.GPU
def generate_t2i(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator("cuda").manual_seed(int(seed))
    image = pipe_t2i(
        prompt=prompt,
        height=int(height),
        width=int(width),
        num_inference_steps=int(num_inference_steps),
        guidance_scale=0.0,
        generator=generator,
    ).images[0]
    return image, seed

@spaces.GPU
def generate_i2i(model_choice, input_image, prompt, strength, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    if input_image is None:
        raise gr.Error("Please upload an image first.")
    if randomize_seed:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator("cuda").manual_seed(int(seed))
    input_image = resize_image(input_image)

    if model_choice == "FLUX.1-schnell (Creative, high change)":
        image = pipe_flux(
            prompt=prompt,
            image=input_image,
            strength=float(strength),
            num_inference_steps=int(num_inference_steps),
            generator=generator,
            width=input_image.width,
            height=input_image.height,
        ).images[0]

    elif model_choice == "InstructPix2Pix (Precise, preserves identity)":
        image = pipe_ip2p(
            prompt=prompt,
            image=input_image,
            num_inference_steps=int(num_inference_steps),
            image_guidance_scale=1.5,
            guidance_scale=7.5,
            generator=generator,
        ).images[0]

    else:
        # Portrait models (lazy loaded)
        pipe = get_portrait_pipe(model_choice)
        image = pipe(
            prompt=prompt,
            image=input_image,
            strength=float(strength),
            num_inference_steps=int(num_inference_steps),
            generator=generator,
        ).images[0]

    return image, seed

examples_t2i = [
    ["Young Chinese woman in red Hanfu, intricate embroidery, elaborate high bun, golden phoenix headdress"],
    ["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors"],
    ["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, photorealistic"],
    ["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"],
    ["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff"],
]

custom_theme = gr.themes.Soft(
    primary_hue="yellow",
    secondary_hue="amber",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    text_size="lg",
    spacing_size="md",
    radius_size="lg"
).set(
    button_primary_background_fill="*primary_500",
    button_primary_background_fill_hover="*primary_600",
    block_title_text_weight="600",
)

I2I_MODELS = [
    "FLUX.1-schnell (Creative, high change)",
    "InstructPix2Pix (Precise, preserves identity)",
] + list(PORTRAIT_MODELS.keys())

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        """
        # 🎨 Z-Image-Turbo
        **Ultra-fast AI image generation & editing** • Text to Image + Image to Image
        """,
        elem_classes="header-text"
    )

    with gr.Tabs():

        # ── Tab 1: Text to Image ──────────────────────────────────
        with gr.Tab("✨ Text to Image"):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=320):
                    t2i_prompt = gr.Textbox(
                        label="✨ Your Prompt",
                        placeholder="Describe the image you want to create...",
                        lines=5, max_lines=10, autofocus=True,
                    )
                    with gr.Accordion("⚙️ Advanced Settings", open=False):
                        with gr.Row():
                            t2i_height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
                            t2i_width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
                        t2i_steps = gr.Slider(1, 20, value=9, step=1, label="Inference Steps")
                        with gr.Row():
                            t2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
                            t2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
                        t2i_randomize.change(
                            lambda r: gr.Number(visible=not r),
                            inputs=[t2i_randomize], outputs=[t2i_seed]
                        )
                    t2i_btn = gr.Button("🚀 Generate Image", variant="primary", size="lg")
                    gr.Examples(examples=examples_t2i, inputs=[t2i_prompt], label="💡 Try these prompts")

                with gr.Column(scale=1, min_width=320):
                    t2i_output = gr.Image(label="Generated Image", type="pil", format="png", show_label=False, height=600)
                    t2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

            t2i_btn.click(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])
            t2i_prompt.submit(generate_t2i, [t2i_prompt, t2i_height, t2i_width, t2i_steps, t2i_seed, t2i_randomize], [t2i_output, t2i_used_seed])

        # ── Tab 2: Image to Image ─────────────────────────────────
        with gr.Tab("🖼️ Image to Image"):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=320):
                    model_choice = gr.Radio(
                        choices=I2I_MODELS,
                        value="InstructPix2Pix (Precise, preserves identity)",
                        label="🤖 Model",
                        info="InstructPix2Pix: best for targeted edits. Portrait models: high quality people & faces. FLUX: creative transformations. Note: portrait models load on first use."
                    )
                    i2i_input = gr.Image(label="Upload Image", type="pil")
                    i2i_prompt = gr.Textbox(
                        label="✨ Edit Instruction",
                        placeholder="e.g. 'woman in blue dress, photorealistic portrait' or 'make it a sunset'",
                        lines=4,
                    )
                    with gr.Accordion("⚙️ Advanced Settings", open=False):
                        i2i_strength = gr.Slider(0.1, 1.0, value=0.65, step=0.05,
                            label="Strength (all except InstructPix2Pix)",
                            info="Lower = more faithful to original. Higher = more creative change.")
                        i2i_steps = gr.Slider(1, 50, value=25, step=1, label="Inference Steps")
                        with gr.Row():
                            i2i_randomize = gr.Checkbox(label="🎲 Random Seed", value=True)
                            i2i_seed = gr.Number(label="Seed", value=42, precision=0, visible=False)
                        i2i_randomize.change(
                            lambda r: gr.Number(visible=not r),
                            inputs=[i2i_randomize], outputs=[i2i_seed]
                        )
                    i2i_btn = gr.Button("🚀 Edit Image", variant="primary", size="lg")

                with gr.Column(scale=1, min_width=320):
                    i2i_output = gr.Image(label="Result", type="pil", format="png", show_label=False, height=600)
                    i2i_used_seed = gr.Number(label="🎲 Seed Used", interactive=False)

            i2i_btn.click(
                generate_i2i,
                [model_choice, i2i_input, i2i_prompt, i2i_strength, i2i_steps, i2i_seed, i2i_randomize],
                [i2i_output, i2i_used_seed]
            )

    gr.Markdown(
        """
        ---
        <div style="text-align: center; opacity: 0.7; font-size: 0.9em;">
        <strong>T2I:</strong> Tongyi-MAI/Z-Image-Turbo • 
        <strong>I2I:</strong> FLUX.1-schnell + InstructPix2Pix + Juggernaut XL + DreamShaper XL + RealVisXL + CyberRealistic XL
        </div>
        """
    )

if __name__ == "__main__":
    demo.launch(
        theme=custom_theme,
        css="""
        .header-text h1 {
            font-size: 2.5rem !important;
            font-weight: 700 !important;
            background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            background-clip: text;
        }
        .header-text p { font-size: 1.1rem !important; color: #64748b !important; }
        .gradio-container { max-width: 1400px !important; margin: 0 auto !important; }
        button { transition: all 0.2s ease !important; }
        button:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important; }
        """,
        mcp_server=True
    )